diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 35e8e3c7b..6059533e2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -9,6 +9,9 @@ on: jobs: build: runs-on: windows-latest + strategy: + matrix: + platform: ["x64", "ARM64"] steps: - uses: actions/checkout@v4 @@ -24,10 +27,10 @@ jobs: uses: actions/cache@v4 with: path: ~/.conan2/p - key: ${{ runner.os }}-conan-${{ hashFiles('src/**/conanfile.txt') }} + key: Conan-${{ hashFiles('src/**/conanfile.txt') }}-${{ matrix.platform }} - name: Build - run: python publish.py + run: python publish.py ${{ matrix.platform }} - name: Save hash id: hash @@ -36,5 +39,5 @@ jobs: - name: Store build uses: actions/upload-artifact@v4 with: - name: Magpie-dev-${{ steps.hash.outputs.sha_short }} - path: ./publish + name: Magpie-dev-${{ steps.hash.outputs.sha_short }}-${{ matrix.platform }} + path: ./publish/${{ matrix.platform }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 842b9e1e4..98f5ef198 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -24,9 +24,13 @@ on: required: true type: boolean jobs: - release: + build: runs-on: windows-latest - + outputs: + tag: ${{ steps.tag.outputs.tag }} + strategy: + matrix: + platform: ["x64", "ARM64"] steps: - uses: actions/checkout@v4 @@ -42,13 +46,50 @@ jobs: with: path: ~/.conan2/p key: ${{ runner.os }}-conan-${{ hashFiles('src/**/conanfile.txt') }} + + - name: Generate tag + id: tag + run: | + $tag = "${{ inputs.tag }}" -eq "" ? "v${{ inputs.major }}.${{ inputs.minor }}.${{ inputs.patch }}" : "${{ inputs.tag }}" + echo "tag=$tag" >> $env:GITHUB_OUTPUT - - name: Publish release - run: python publish.py + - name: Build + run: python publish.py ${{ matrix.platform }} env: MAJOR: ${{ inputs.major }} MINOR: ${{ inputs.minor }} PATCH: ${{ inputs.patch }} - TAG: ${{ inputs.tag }} - PRERELEASE: ${{ inputs.prerelease }} - ACCESS_TOKEN: ${{ secrets.CONTENTS_ACCESS_TOKEN }} + TAG: ${{ steps.tag.outputs.tag }} + + - name: Store artifacts + uses: actions/upload-artifact@v4 + with: + name: Magpie-${{ steps.tag.outputs.tag }}-${{ matrix.platform }} + path: publish/${{ matrix.platform }} + release: + runs-on: windows-latest + needs: build + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Setup Requests + run: pip install requests + + - name: Restore artifacts + uses: actions/download-artifact@v4 + with: + path: publish + + - name: Publish release + run: python ci/release.py + env: + MAJOR: ${{ inputs.major }} + MINOR: ${{ inputs.minor }} + PATCH: ${{ inputs.patch }} + TAG: ${{ needs.build.outputs.tag }} + PRERELEASE: ${{ inputs.prerelease }} + ACCESS_TOKEN: ${{ secrets.CONTENTS_ACCESS_TOKEN }} diff --git a/Magpie.sln b/Magpie.sln index c311fc39e..b93c7277e 100644 --- a/Magpie.sln +++ b/Magpie.sln @@ -24,7 +24,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution src\Common.Pre.props = src\Common.Pre.props Directory.Build.props = Directory.Build.props src\extract_winui_runtime.py = src\extract_winui_runtime.py - src\fix_resfiles.py = src\fix_resfiles.py src\HybridCRT.props = src\HybridCRT.props src\WinUI.props = src\WinUI.props EndProjectSection diff --git a/ci/release.py b/ci/release.py new file mode 100644 index 000000000..b872383d0 --- /dev/null +++ b/ci/release.py @@ -0,0 +1,158 @@ +import sys +import os +import subprocess +import shutil +import requests +import hashlib +import json + +try: + # https://docs.github.com/en/actions/learn-github-actions/variables + if os.environ["GITHUB_ACTIONS"].lower() == "true": + # 不知为何在 Github Actions 中运行时默认编码为 ANSI,并且 print 需刷新流才能正常显示 + for stream in [sys.stdout, sys.stderr]: + stream.reconfigure(encoding="utf-8") +except: + pass + +majorVersion = os.environ["MAJOR"] +minorVersion = os.environ["MINOR"] +patchVersion = os.environ["PATCH"] +tag = os.environ["TAG"] +isPrerelease = os.environ["PRERELEASE"].lower() == "true" +githubAccessToken = os.environ["ACCESS_TOKEN"] +repo = os.environ["GITHUB_REPOSITORY"] +actor = os.environ["GITHUB_ACTOR"] + +subprocess.run("git config user.name " + actor) +subprocess.run(f"git config user.email {actor}@users.noreply.github.com") + +subprocess.run( + f"git remote set-url origin https://{githubAccessToken}@github.com/{repo}.git" +) + +# 打标签 +if subprocess.run(f"git tag -a {tag} -m {tag}").returncode != 0: + raise Exception("打标签失败") + +if subprocess.run("git push origin " + tag).returncode != 0: + raise Exception("推送标签失败") + +print("已创建标签 " + tag, flush=True) + +headers = { + "Accept": "application/vnd.github+json", + "Authorization": "Bearer " + githubAccessToken, + "X-GitHub-Api-Version": "2022-11-28", +} + +# 获取前一个发布版本来生成默认发行说明 +prevReleaseTag = None +try: + if isPrerelease: + # 发布预发行版与最新的版本(无论是正式版还是预发行版)对比 + response = requests.get( + f"https://api.github.com/repos/{repo}/releases", + json={"per_page": 1}, + headers=headers, + ) + if response.ok: + prevReleaseTag = response.json()[0]["tag_name"] + else: + # 发布正式版则与最新的正式版对比 + # 由于可以自己选择最新版本,此接口可能不会返回时间上最新发布的版本,不是大问题 + response = requests.get( + f"https://api.github.com/repos/{repo}/releases/latest", headers=headers + ) + if response.ok: + prevReleaseTag = response.json()["tag_name"] +except: + # 忽略错误 + pass + +# 发布 release +if prevReleaseTag == None: + body = "" +else: + # 默认发行说明为比较两个 tag + body = f"https://github.com/{repo}/compare/{prevReleaseTag}...{tag}" + +response = requests.post( + f"https://api.github.com/repos/{repo}/releases", + json={ + "tag_name": tag, + "name": tag, + "prerelease": isPrerelease, + "body": body, + "discussion_category_name": "Announcements", + }, + headers=headers, +) +if not response.ok: + raise Exception("发布失败") + +uploadUrl = response.json()["upload_url"] +uploadUrl = uploadUrl[: uploadUrl.find("{")] + "?name=" + +os.chdir(os.path.dirname(__file__) + "\\..\\publish") + +pkgInfos = {} +for platform in ["x64", "ARM64"]: + # 打包成 zip + pkgName = "Magpie-" + tag + "-" + platform + shutil.make_archive(pkgName, "zip", pkgName) + pkgName += ".zip" + + # 上传资产 + with open(pkgName, "rb") as f: + # 流式上传 + # https://requests.readthedocs.io/en/latest/user/advanced/#streaming-uploads + response = requests.post( + uploadUrl + pkgName, + data=f, + headers={**headers, "Content-Type": "application/zip"}, + ) + + if not response.ok: + raise Exception("上传失败") + + # 计算哈希 + f.seek(0, os.SEEK_SET) + md5 = hashlib.file_digest(f, hashlib.md5).hexdigest() + + pkgInfos[platform] = (pkgName, md5) + +print("已发布 " + tag, flush=True) + +# 更新 version.json +# 此步应在发布版本之后,因为程序使用 version.json 检查更新 +os.chdir("..") +with open("version.json", "w", encoding="utf-8") as f: + json.dump( + { + "version": f"{majorVersion}.{minorVersion}.{patchVersion}", + "tag": tag, + "binary": { + "x64": { + "url": f"https://github.com/{repo}/releases/download/{tag}/{pkgInfos['x64'][0]}", + "hash": pkgInfos["x64"][1], + }, + "ARM64": { + "url": f"https://github.com/{repo}/releases/download/{tag}/{pkgInfos['ARM64'][0]}", + "hash": pkgInfos["ARM64"][1], + }, + }, + }, + f, + indent=4, + ) + +# 提交对 version.json 的更改 +if subprocess.run("git add version.json").returncode != 0: + raise Exception("git add 失败") + +if subprocess.run('git commit -m "Update version.json"').returncode != 0: + raise Exception("git commit 失败") + +if subprocess.run("git push").returncode != 0: + raise Exception("git push 失败") diff --git a/docs/Built-in effects.md b/docs/Built-in effects.md index 501957025..471cd69ef 100644 --- a/docs/Built-in effects.md +++ b/docs/Built-in effects.md @@ -18,7 +18,7 @@ Magpie ships with a handful of effects that can be used in combinations. Most of * Parameter: * Strength: Denoise magnitude -* Anime4K_Restore_S, Anime4K_Restore_M, Anime4K_Restore_L, Anime4K_Restore_VL, Anime4K_Restore_UL, Anime4K_Restore_Soft_S, Anime4K_Restore_Soft_M, Anime4K_Restore_Soft_L, Anime4K_Restore_Soft_VL, Anime4K_Restore_Soft_UL: Algorithms to restore the lines in animations. In increasing order of demand for computing power. The Soft variants are more conservative in sharpening. +* Anime4K_Restore family: Algorithms to restore the lines in animations. In increasing order of demand for computing power. The Soft variants are more conservative in sharpening. * Output size: the same as the input * Anime4K_Thin_HQ: Algorithm to clarify lines in animations provided by Anime4K. @@ -27,7 +27,7 @@ Magpie ships with a handful of effects that can be used in combinations. Most of * Strength: The strength in each iteration. * Iterations: The number of iterations. Decreasing strength and increasing iterations improves the quality of the images, but will lower the processing speed. -* Anime4K_Upscale_S, Anime4K_Upscale_L, Anime4K_Upscale_Denoise_S, Anime4K_Upscale_Denoise_L, and Anime4K_Upscale_GAN_x2_S: Anime-style scaling algorithms provided by Anime4K. The denoise variant includes denoise functionality. The GAN variant, which keeps more details, is still under experiment. +* Anime4K_Upscale family: Anime-style scaling algorithms provided by Anime4K. The denoise variant includes denoise functionality. The GAN variant, which keeps more details, is still under experiment. * Output size: twice that of the input * Bicubic: Interpolation algorithms. The lite variant is fast, but at the cost of quality degradation, Suitable for users will weak graphics cards. @@ -124,6 +124,9 @@ Magpie ships with a handful of effects that can be used in combinations. Most of * Bloom Amount * Filter Kernel Shape +* CuNNy family:Suitable for visual novel-style images. The DS variants offer a subtle denoise effect. Provided by [CuNNy](https://github.com/cunnyplapper/CuNNy) + * Output size: twice that of the input + * Deband * Output size: the same as the input * Parameters @@ -221,7 +224,7 @@ Magpie ships with a handful of effects that can be used in combinations. Most of * Sharpness * Note: Only supports upscaling. -* NNEDI3_nns16_win8x4 and NNEDI3_nns64_win8x6:These shaders originally designed for deinterlacing and are also high-quality interpolation algorithms. NNEDI3_nns64_win8x6 produces higher quality results, but slower. +* NNEDI3 family:These shaders originally designed for deinterlacing and are also high-quality interpolation algorithms. NNEDI3_nns64_win8x6 produces higher quality results, but slower. * Output size: twice that of the input * NVSharpen: Port of NVSharpen that was published along with NIS. @@ -232,10 +235,10 @@ Magpie ships with a handful of effects that can be used in combinations. Most of * Pixellate: Scale with the Pixellate algorithm. Suitable for upscaling pixel arts. * Output size: determined by scale configuration -* RAVU_Lite_R3: Port of ravu-lite-r3 +* RAVU family: Ported from https://github.com/bjin/mpv-prescalers * Output size: twice that of the input -* RAVU_Zoom_R3: Port of ravu-zoom-r3 +* RAVU_Zoom family: Ported from https://github.com/bjin/mpv-prescalers * Output size: determined by scale configuration * Note: Only supports upscaling. diff --git a/docs/Comparison of capture methods.md b/docs/Comparison of capture methods.md index 52ed2c56a..496df19b7 100644 --- a/docs/Comparison of capture methods.md +++ b/docs/Comparison of capture methods.md @@ -1,4 +1,4 @@ -Magpie provides several capture methods. They have their pros and cons in different scenarios. +Magpie provides several capture methods. They have their pros and cons in different scenarios. For general purposes, it's recommended to use Graphics Capture, as it provides the best compatibility and smoothness. | | Graphics Capture | Desktop Duplication | GDI | DwmSharedSurface | | :---: | :---: | :---: | :---: |:---: | @@ -6,11 +6,9 @@ Magpie provides several capture methods. They have their pros and cons in differ | Supports recording/streaming | No under extreme conditions[1] | No | Yes | Yes | | Support the source window to span multiple screens | No under extreme conditions[1] | No | Yes | Yes | | Ignores DPI virtualization[2] | No | No | Yes| Yes | -| Notes | The most recommended capture method | Requires Win10 v2004, suitable for games with more static frames[3], could capture pop-ups | | Low VRAM usage | +| Notes | The most recommended capture method | Requires Win10 v2004 | | Low VRAM usage | [1]: (1) The source window does not support regular window capture. (2) The operating system is Windows 11. [2]: The system will perform bicubic interpolation upscaling to windows that do not support DPI scaling. The capture methods supporting this options captures the images before such scaling. - -[3]: The Desktop Duplication mode effectively reduces the power consumption if there are many static frames. diff --git a/docs/MagpieFX (EN).md b/docs/MagpieFX (EN).md index d0f404e89..624d447e7 100644 --- a/docs/MagpieFX (EN).md +++ b/docs/MagpieFX (EN).md @@ -2,23 +2,12 @@ MagpieFX is based on DirectX 11 compute shader ``` hlsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 -// Specify "USE_DYNAMIC" to use GetFrameCount or GetCursorPos. +//!VERSION 4 +// Specify "USE_DYNAMIC" to use GetFrameCount. //!USE_DYNAMIC -// Specifying "GENERIC_DOWNSCALER" indicates that this effect can be used as the "default downscaling effect". -//!GENERIC_DOWNSCALER // Use "SORT_NAME" to specify the name used for sorting, otherwise the files will be sorted by their file names. //!SORT_NAME test1 -// Not specifying "OUTPUT_WIDTH" and "OUTPUT_HEIGHT" indicates that this effect supports outputting to any size. -// You can use some pre-defined constants when calculating texture size. -// INPUT_WIDTH -// INPUT_HEIGHT -// OUTPUT_WIDTH -// OUTPUT_HEIGHT - // Definition of parameters //!PARAMETER @@ -33,13 +22,25 @@ float sharpness; // Definition of textures -// "INPUT" is a special keyword. -// "INPUT" cannot be used as the output of a pass. -// Defining INPUT is optional, but it is recommended to define it explicitly for the sake of semantic completeness. +// "INPUT" and "OUTPUT" are special keywords. +// "INPUT" cannot be used as the output of a pass; "OUTPUT" cannot be used as the input of a pass. +// Defining INPUT/OUTPUT is optional, but it is recommended to define them explicitly for the sake of semantic completeness. +// The size of the OUTPUT represents the output size of this effect. Not specifying it indicates support for output of any size. //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +// You can use some pre-defined constants to calculate texture size. +// INPUT_WIDTH +// INPUT_HEIGHT +// OUTPUT_WIDTH +// OUTPUT_HEIGHT + // Supported texture formats: // R32G32B32A32_FLOAT // R16G16B16A16_FLOAT @@ -110,11 +111,10 @@ float4 Pass1(float2 pos) { return float4(1, 1, 1, 1); } -// The last pass does not support "OUT". -// If you are using the CS style, you must use "WriteToOutput" to output the result. - //!PASS 2 //!IN INPUT, tex1 +// The output of the last pass must be "OUTPUT". +//!OUT OUTPUT // "BLOACK_SIZE" specifies how large an area is processed in one dispatch. // "BLOACK_SIZE" can have only one dimension, meaning that length and height are specified at the same time. //!BLOCK_SIZE 16, 16 @@ -123,18 +123,13 @@ float4 Pass1(float2 pos) { //!NUM_THREADS 64, 1, 1 void Pass2(uint2 blockStart, uint3 threadId) { - // Render the cursor and then output. - // Available only in the last pass. - WriteToOutput(blockStart, float3(1,1,1)); + // Write to OUPUT + OUTPUT[blockStart] = float4(1,1,1,1); } ``` ### Predefined functions -**void WriteToOutput(uint2 pos, float3 color)**: Only available in the last pass and is used to write results to the output texture. - -**bool CheckViewport(uint2 pos)**: Only available in the last pass and is used to check whether the output coordinates are inside the viewport. - **uint2 GetInputSize()**: Retrieves the size of the input texture. **float2 GetInputPt()**: Retrieves the size of pixel in the input texture. @@ -147,8 +142,6 @@ void Pass2(uint2 blockStart, uint3 threadId) { **uint GetFrameCount()**: Retrieves the total number of frames rendered so far. When using this function, you must specify USE_DYNAMIC. -**uint2 GetCursorPos()**: Retrieves the current cursor position. When using this function, you must specify USE_DYNAMIC. - **uint2 Rmp8x8(uint id)**: Maps the values of 0 to 63 to coordinates in an 8x8 square in swizzle order, which can improve texture cache hit rate. @@ -164,10 +157,6 @@ void Pass2(uint2 blockStart, uint3 threadId) { **MP_DEBUG**: Whether the shader is being compiled in debug mode (when compiling shaders in debug mode, they are not optimized and contain debug information). -**MP_LAST_PASS**: Whether the current pass is the last pass of the effect. - -**MP_LAST_EFFECT**: Whether the effect is the last effect for the current scaling mode (the last effect needs to handle viewport and cursor rendering). - **MP_FP16**: Whether to use half-precision floating-point numbers (specifed by user). **MF、MF1、MF2、...、MF4x4**: Floating-point data types that conform to MP_FP16. When half-precision is not specified, they are aliases for float..., otherwise they are aliases for min16float... diff --git a/docs/MagpieFX.md b/docs/MagpieFX.md index b447d65fa..d14327332 100644 --- a/docs/MagpieFX.md +++ b/docs/MagpieFX.md @@ -2,23 +2,12 @@ MagpieFX 基于 DirectX 11 计算着色器 ``` hlsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 -// 若要使用 GetFrameCount 或 GetCursorPos 需指定 USE_DYNAMIC +//!VERSION 4 +// 若要使用 GetFrameCount 需指定 USE_DYNAMIC //!USE_DYNAMIC -// GENERIC_DOWNSCALER 表示此效果可以作为“默认降采样效果” -//!GENERIC_DOWNSCALER // 使用 SORT_NAME 指定排序时使用的名字,否则按照文件名排序 //!SORT_NAME test1 -// 不指定 OUTPUT_WIDTH 和 OUTPUT_HEIGHT 表示此效果支持输出任意尺寸 -// 计算纹理尺寸时可以使用一些预定义常量 -// INPUT_WIDTH -// INPUT_HEIGHT -// OUTPUT_WIDTH -// OUTPUT_HEIGHT - // 参数定义 //!PARAMETER @@ -33,13 +22,25 @@ float sharpness; // 纹理定义 -// INPUT 是特殊关键字 -// INPUT 不能作为通道的输出 -// 定义 INPUT 是可选的,但为了保持语义的完整性,建议显式定义 +// INPUT、OUTPUT 是特殊关键字 +// INPUT 不能作为通道的输出,OUTPUT 不能作为通道的输入 +// 定义 INPUT 和 OUTPUT 是可选的,但为了保持语义的完整性,建议显式定义 +// OUTPUT 的尺寸即为此效果的输出尺寸,不指定则表示支持任意尺寸的输出 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +// 计算纹理尺寸时可以使用一些预定义常量 +// INPUT_WIDTH +// INPUT_HEIGHT +// OUTPUT_WIDTH +// OUTPUT_HEIGHT + // 支持的纹理格式: // R32G32B32A32_FLOAT // R16G16B16A16_FLOAT @@ -109,11 +110,10 @@ float4 Pass1(float2 pos) { return float4(1, 1, 1, 1); } -// 最后一个通道不能指定 OUT -// 如果是 CS 风格必须使用 WriteToOutput 输出结果 - //!PASS 2 //!IN INPUT, tex1 +// 最后一个通道的输出只能是 OUTPUT +//!OUT OUTPUT // BLOCK_SIZE 指定一次 dispatch 处理多大的区域 // 可以只有一维,即同时指定长和高 //!BLOCK_SIZE 16, 16 @@ -122,18 +122,13 @@ float4 Pass1(float2 pos) { //!NUM_THREADS 64, 1, 1 void Pass2(uint2 blockStart, uint3 threadId) { - // 渲染光标并写入 OUPUT - // 只在最后一个通道中可用 - WriteToOutput(blockStart, float3(1,1,1)); + // 写入 OUPUT + OUTPUT[blockStart] = float4(1,1,1,1); } ``` ### 预定义函数 -**void WriteToOutput(uint2 pos, float3 color)**:只在最后一个通道(Pass)中可用,用于将结果写入到输出纹理。 - -**bool CheckViewport(uint2 pos)**:只在最后一个通道中可用,检查输出坐标是否位于视口内。 - **uint2 GetInputSize()**:获取输入纹理尺寸。 **float2 GetInputPt()**:获取输入纹理每个像素的尺寸。 @@ -146,8 +141,6 @@ void Pass2(uint2 blockStart, uint3 threadId) { **uint GetFrameCount()**:获取当前总计帧数。使用此函数时必须指定 "USE_DYNAMIC"。 -**uint2 GetCursorPos()**:获取当前光标位置。使用此函数时必须指定 "USE_DYNAMIC"。 - **uint2 Rmp8x8(uint id)**:将 0~63 的值以 swizzle 顺序映射到 8x8 的正方形内的坐标,用以提高纹理缓存的命中率。 @@ -163,10 +156,6 @@ void Pass2(uint2 blockStart, uint3 threadId) { **MP_DEBUG**:当前是否为调试模式(调试模式下编译的着色器不进行优化且含有调试信息) -**MP_LAST_PASS**:当前通道是否是当前效果的最后一个通道 - -**MP_LAST_EFFECT**:当前效果是否是当前缩放模式的最后一个效果(最后一个效果要处理视口和光标渲染) - **MP_FP16**:当前是否使用半精度浮点数(由用户指定) **MF、MF1、MF2、...、MF4x4**:遵守 fp16 参数的浮点数类型。当未指定 fp16,它们为 float... 的别名,否则为 min16float... 的别名 diff --git a/docs/Performance optimization.md b/docs/Performance optimization.md index 86bd1bf2c..ad261116d 100644 --- a/docs/Performance optimization.md +++ b/docs/Performance optimization.md @@ -8,8 +8,6 @@ If you cannot run some effects with high computing power requirements (e.g. Anim 1. Change to the variants with lower requirements. For example, Anime4K_Upscale_S is much faster than Anime4K_Upscale_L. CAS is much faster than AdaptiveSharpen. They can effectively improve the smoothness of the effects at the cost of some quality degradation. 2. Change the capture mode. We recommend you to try each of them. -3. Set the frame rate to "unlimited." This will turn off Vsync. It usually increases the frame rate substantially, but may causes the screen to tear. -4. Turn on "allow additional latency to improve performance" when Vsync is on. This will not lead to screen tearing and it also raises the frame rate. However, it will cause an extra 1-frame latency. ## Intermittent lagging @@ -25,6 +23,5 @@ If your graphics card is powerful enough, but you are still experiencing lagging When you need to save electricity or reduce the heat generated, try the following: -1. Change the capture more. The Desktop Duplication capture mode effectively reduces the power consumption if there are a lot of static frames in the game. -2. Change the effects to their variants with lower requirements. -3. Limit the frame rate, which may cause screen tearing. +1. Limit the frame rate. +2. Opt for effects that require lower performance. diff --git "a/docs/\345\206\205\347\275\256\346\225\210\346\236\234\344\273\213\347\273\215.md" "b/docs/\345\206\205\347\275\256\346\225\210\346\236\234\344\273\213\347\273\215.md" index 7724a5a19..dc4894efe 100644 --- "a/docs/\345\206\205\347\275\256\346\225\210\346\236\234\344\273\213\347\273\215.md" +++ "b/docs/\345\206\205\347\275\256\346\225\210\346\236\234\344\273\213\347\273\215.md" @@ -18,7 +18,7 @@ Magpie 内置了大量效果供组合使用,大部分提供了参数选项以 * 参数 * Strength:降噪强度 -* Anime4K_Restore_S、Anime4K_Restore_M、Anime4K_Restore_L、Anime4K_Restore_VL、Anime4K_Restore_UL、Anime4K_Restore_Soft_S、Anime4K_Restore_Soft_M、Anime4K_Restore_Soft_L、Anime4K_Restore_Soft_VL 和 Anime4K_Restore_Soft_UL:Anime4K 提供的用于还原动漫画面线条的算法,S->M->L->VL->UL 对性能的需求依次提高,Soft 变体效果稍弱 +* Anime4K_Restore 族:Anime4K 提供的用于还原动漫画面线条的算法,S->M->L->VL->UL 对性能的需求依次提高,Soft 变体效果稍弱 * 输出尺寸:和输入相同 * Anime4K_Thin_HQ:Anime4K 提供的用于细化动漫画面线条的算法 @@ -27,7 +27,7 @@ Magpie 内置了大量效果供组合使用,大部分提供了参数选项以 * Strength:每次迭代的强度 * Iterations:迭代次数。降低 Strength 并提高 Iterations 可以提高画面质量,但会降低速度。 -* Anime4K_Upscale_S、Anime4K_Upscale_L、Anime4K_Upscale_VL、Anime4K_Upscale_UL、Anime4K_Upscale_Denoise_S、Anime4K_Upscale_Denoise_L、Anime4K_Upscale_Denoise_VL、Anime4K_Upscale_Denoise_UL 和 Anime4K_Upscale_GAN_x2_S:Anime4K 提供的动画风格图像缩放算法。Denoise 变体包含降噪效果,GAN 变体处于实验阶段,可以保留更多细节。S、L、VL、UL 对性能的要求依次提高 +* Anime4K_Upscale 族:Anime4K 提供的动画风格图像缩放算法。Denoise 变体包含降噪效果,GAN 变体处于实验阶段,可以保留更多细节。S、L、VL、UL 对性能的要求依次提高 * 输出尺寸:输入的两倍 * Bicubic:双立方(双三次)插值算法 @@ -124,6 +124,9 @@ Magpie 内置了大量效果供组合使用,大部分提供了参数选项以 * Bloom Amount * Filter Kernel Shape +* CuNNy 族:适合视觉小说风格图像的缩放,由 [CuNNy](https://github.com/cunnyplapper/CuNNy) 提供。DS 变体有轻微降噪效果 + * 输出尺寸:输入的两倍 + * Deband:去除色带 * 输出尺寸:和输入相同 * 参数 @@ -221,7 +224,7 @@ Magpie 内置了大量效果供组合使用,大部分提供了参数选项以 * Sharpness:锐化强度 * 备注:只支持放大 -* NNEDI3_nns16_win8x4 和 NNEDI3_nns64_win8x6:原本用于去隔行,也是高质量的插值算法。NNEDI3_nns64_win8x6 质量更高,速度更慢 +* NNEDI3 族:原本用于去隔行,也是高质量的插值算法。移植自 https://github.com/bjin/mpv-prescalers * 输出尺寸:输入的两倍 * NVSharpen:随 NIS 发布的 NVSharpen 的移植 @@ -232,10 +235,10 @@ Magpie 内置了大量效果供组合使用,大部分提供了参数选项以 * Pixellate:使用 Pixellate 算法缩放输入。适合放大像素画 * 输出尺寸:取决于缩放选项 -* RAVU_Lite_R3:ravu-lite-r3的移植 +* RAVU 族:移植自 https://github.com/bjin/mpv-prescalers * 输出尺寸:输入的两倍 -* RAVU_Zoom_R3:ravu-zoom-r3的移植 +* RAVU-Zoom 族:移植自 https://github.com/bjin/mpv-prescalers * 输出尺寸:取决于缩放选项 * 备注:只支持放大 diff --git "a/docs/\346\200\247\350\203\275\344\274\230\345\214\226\345\273\272\350\256\256.md" "b/docs/\346\200\247\350\203\275\344\274\230\345\214\226\345\273\272\350\256\256.md" index 1874b1a11..72143ac17 100644 --- "a/docs/\346\200\247\350\203\275\344\274\230\345\214\226\345\273\272\350\256\256.md" +++ "b/docs/\346\200\247\350\203\275\344\274\230\345\214\226\345\273\272\350\256\256.md" @@ -8,8 +8,6 @@ 1. 更换为性能需求更低的效果。如 Anime4K_Upscale_S 比 Anime4K_Upscale_L 快的多,CAS 比 AdaptiveSharpen 快的多,它们可以有效提高流畅度,代价是一定程度的画面质量损失。 2. 尝试更换捕获模式。建议你每种模式都尝试一下。 -3. 关闭垂直同步。这通常可以大幅提高帧率,但可能造成画面撕裂。 -4. 开启“垂直同步”并“允许额外的延迟以提高性能”。这个配置不会造成画面撕裂,同时也可以有效提高帧率。缺点是会引入一帧的延迟。 ## 间歇性卡顿 @@ -25,5 +23,5 @@ 在需要节省电量或降低发热时,请尝试下面的操作: -1. 更换捕获模式。如果游戏的静止画面较多,Desktop Duplication 捕获模式可以有效降低功耗。 +1. 限制帧率。 2. 更换为性能需求更低的效果。 diff --git "a/docs/\346\215\225\350\216\267\346\226\271\345\274\217\345\257\271\346\257\224.md" "b/docs/\346\215\225\350\216\267\346\226\271\345\274\217\345\257\271\346\257\224.md" index 6ac25d9c2..fe0be05cd 100644 --- "a/docs/\346\215\225\350\216\267\346\226\271\345\274\217\345\257\271\346\257\224.md" +++ "b/docs/\346\215\225\350\216\267\346\226\271\345\274\217\345\257\271\346\257\224.md" @@ -1,4 +1,4 @@ -Magpie 提供数种捕获方式,根据使用场景,它们各有优劣。 +Magpie 提供数种捕获方式,根据使用场景,它们各有优劣。无特殊需求应使用 Graphics Capture,它提供最好的兼容性和流畅度。 | | Graphics Capture | Desktop Duplication | GDI | DwmSharedSurface | | :---: | :---: | :---: | :---: |:---: | @@ -6,11 +6,9 @@ Magpie 提供数种捕获方式,根据使用场景,它们各有优劣。 | 支持录制/串流 | 特殊情况下不支持[1] | 否 | 是 | 是 | | 支持源窗口跨越多个屏幕 | 特殊情况下不支持[1] | 否 | 是 | 是 | | 无视 DPI 虚拟化[2] | 否 | 否 | 是| 是 | -| 备注 | 首选捕获方式 | 要求 Win10 v2004;适合静止帧较多的游戏[3];可以捕获到弹窗 | | 占用的显存较少 | +| 备注 | 首选捕获方式 | 要求 Win10 v2004 | | 占用的显存较少 | [1]: (1) 源窗口不支持常规的窗口捕获 (2) 操作系统为 Windows 11 [2]: 系统会对不支持 DPI 缩放的窗口进行双三次插值放大,支持此项的捕获方式可以捕获到放大前的图像 - -[3]: 如果窗口的静止帧较多,使用 Desktop Duplication 可以有效降低功耗 diff --git a/publish.py b/publish.py index 9831aeb6b..64ad05526 100644 --- a/publish.py +++ b/publish.py @@ -18,31 +18,18 @@ except: pass +platform = "x64" +if len(sys.argv) == 2: + platform = sys.argv[1] + if not platform in ["x64", "ARM64"]: + raise Exception("非法参数") + if majorVersion != None: import re - import hashlib - import json - - # 使用第三方库 requests 发送 HTTP 请求,它是 Conan 的依赖项,无需单独安装 - import requests minorVersion = os.environ["MINOR"] patchVersion = os.environ["PATCH"] - - tag = "" - try: - tag = os.environ["TAG"] - except: - pass - - if tag == "": - tag = f"v{majorVersion}.{minorVersion}.{patchVersion}" - - isPrerelease = os.environ["PRERELEASE"].lower() == "true" - - githubAccessToken = os.environ["ACCESS_TOKEN"] - repo = os.environ["GITHUB_REPOSITORY"] - actor = os.environ["GITHUB_ACTOR"] + tag = os.environ["TAG"] ##################################################################### # @@ -111,7 +98,7 @@ version_props = "" p = subprocess.run( - f'"{msbuildPath}" -restore -p:RestorePackagesConfig=true;Configuration=Release;Platform=x64;OutDir={os.getcwd()}\\publish\\;CommitId={commit_id}{version_props} Magpie.sln' + f'"{msbuildPath}" -restore -p:RestorePackagesConfig=true;Configuration=Release;Platform={platform};OutDir={os.getcwd()}\\publish\\{platform}\\;CommitId={commit_id}{version_props} Magpie.sln' ) if p.returncode != 0: raise Exception("编译失败") @@ -122,7 +109,7 @@ # ##################################################################### -os.chdir("publish") +os.chdir("publish\\" + platform) # 删除文件,忽略错误 @@ -133,10 +120,9 @@ def remove_file(file): pass -for folder in ["Microsoft.UI.Xaml", "Magpie.App"]: - shutil.rmtree(folder, ignore_errors=True) +shutil.rmtree("Microsoft.UI.Xaml", ignore_errors=True) -for pattern in ["*.pdb", "*.lib", "*.exp", "*.winmd", "*.xml", "*.xbf", "dummy.*"]: +for pattern in ["*.pdb", "*.lib", "*.exp", "*.winmd", "*.xml", "*.xbf"]: for file in glob.glob(pattern): remove_file(file) @@ -225,138 +211,3 @@ def remove_file(file): os.remove("priconfig.xml") print("已修剪 resources.pri", flush=True) - -##################################################################### -# -# 发布 -# -##################################################################### - -if majorVersion != None: - os.chdir("..") - - subprocess.run("git config user.name " + actor) - subprocess.run(f"git config user.email {actor}@users.noreply.github.com") - - subprocess.run( - f"git remote set-url origin https://{githubAccessToken}@github.com/{repo}.git" - ) - - # 打标签 - if subprocess.run(f"git tag -a {tag} -m {tag}").returncode != 0: - raise Exception("打标签失败") - - if subprocess.run("git push origin " + tag).returncode != 0: - raise Exception("推送标签失败") - - print("已创建标签 " + tag, flush=True) - - # 打包成 zip - pkgName = "Magpie-" + tag + "-x64" - shutil.make_archive(pkgName, "zip", "publish") - pkgName += ".zip" - - headers = { - "Accept": "application/vnd.github+json", - "Authorization": "Bearer " + githubAccessToken, - "X-GitHub-Api-Version": "2022-11-28", - } - - # 获取前一个发布版本来生成默认发行说明 - prevReleaseTag = None - try: - if isPrerelease: - # 发布预发行版与最新的版本(无论是正式版还是预发行版)对比 - response = requests.get( - f"https://api.github.com/repos/{repo}/releases", - json={ - "per_page": 1 - }, - headers=headers - ) - if response.ok: - prevReleaseTag = response.json()[0]["tag_name"] - else: - # 发布正式版则与最新的正式版对比 - # 由于可以自己选择最新版本,此接口可能不会返回时间上最新发布的版本,不是大问题 - response = requests.get(f"https://api.github.com/repos/{repo}/releases/latest", headers=headers) - if response.ok: - prevReleaseTag = response.json()["tag_name"] - except: - # 忽略错误 - pass - - # 发布 release - if prevReleaseTag == None: - body = "" - else: - # 默认发行说明为比较两个 tag - body = f"https://github.com/{repo}/compare/{prevReleaseTag}...{tag}" - - response = requests.post( - f"https://api.github.com/repos/{repo}/releases", - json={ - "tag_name": tag, - "name": tag, - "prerelease": isPrerelease, - "body": body, - "discussion_category_name": "Announcements", - }, - headers=headers, - ) - if not response.ok: - raise Exception("发布失败") - - upload_url = response.json()["upload_url"] - upload_url = upload_url[: upload_url.find("{")] + "?name=" + pkgName - - # 上传资产 - with open(pkgName, "rb") as f: - # 流式上传 - # https://requests.readthedocs.io/en/latest/user/advanced/#streaming-uploads - response = requests.post( - upload_url, - data=f, - headers={**headers, "Content-Type": "application/zip"}, - ) - - if not response.ok: - raise Exception("上传失败") - - # 计算哈希 - f.seek(0, os.SEEK_SET) - md5 = hashlib.file_digest(f, hashlib.md5).hexdigest() - - print("已发布 " + tag, flush=True) - - # 丢弃当前修改并更新到最新,防止编译时有新的提交 - subprocess.run("git checkout -f") - subprocess.run("git pull") - - # 更新 version.json - # 此步应在发布版本之后,因为程序使用 version.json 检查更新 - with open("version.json", "w", encoding="utf-8") as f: - json.dump( - { - "version": f"{majorVersion}.{minorVersion}.{patchVersion}", - "tag": tag, - "binary": { - "x64": { - "url": f"https://github.com/{repo}/releases/download/{tag}/{pkgName}", - "hash": md5, - } - }, - }, - f, - indent=4, - ) - - # 提交对 version.json 的更改 - if subprocess.run("git add version.json").returncode != 0: - raise Exception("git add 失败") - - if subprocess.run('git commit -m "Update version.json"').returncode != 0: - raise Exception("git commit 失败") - - if subprocess.run("git push").returncode != 0: - raise Exception("git push 失败") diff --git a/src/Common.Post.props b/src/Common.Post.props index aa5ae3350..2acd03b7d 100644 --- a/src/Common.Post.props +++ b/src/Common.Post.props @@ -2,6 +2,7 @@ en-US + true true true low diff --git a/src/Effects/ACNet.hlsl b/src/Effects/ACNet.hlsl index 5de3abefc..a80823aca 100644 --- a/src/Effects/ACNet.hlsl +++ b/src/Effects/ACNet.hlsl @@ -3,14 +3,17 @@ //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 +//!VERSION 4 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + //!TEXTURE //!WIDTH INPUT_WIDTH //!HEIGHT INPUT_HEIGHT @@ -3741,6 +3744,7 @@ void Pass8(uint2 blockStart, uint3 threadId) { //!PASS 9 //!DESC L9, L10 //!IN INPUT, tex3, tex4 +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -3978,8 +3982,9 @@ const static float3x3 yuv2rgb = { void Pass9(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -4277,12 +4282,6 @@ void Pass9(uint2 blockStart, uint3 threadId) { for (uint j = 0; j <= 1; ++j) { uint2 destPos = gxy + uint2(i, j); - if (i != 0 || j != 0) { - if (!CheckViewport(destPos)) { - continue; - } - } - uint index = j * 2 + i; float luma = clamp( target1.x * kernelsL10[0 + index] + @@ -4295,7 +4294,7 @@ void Pass9(uint2 blockStart, uint3 threadId) { target2.w * kernelsL10[28 + index], 0.0f, 1.0f); float2 originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * outputPt, 0).rgb); - WriteToOutput(destPos, mul(yuv2rgb, float3(luma, originUV))); + OUTPUT[destPos] = float4(mul(yuv2rgb, float3(luma, originUV)), 1); } } } diff --git a/src/Effects/Anime4K/Anime4K_3D_AA_Upscale_US.hlsl b/src/Effects/Anime4K/Anime4K_3D_AA_Upscale_US.hlsl index ab3e80dbb..e4dff9cf4 100644 --- a/src/Effects/Anime4K/Anime4K_3D_AA_Upscale_US.hlsl +++ b/src/Effects/Anime4K/Anime4K_3D_AA_Upscale_US.hlsl @@ -2,15 +2,18 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Upscale/Anime4K_3DGraphics_AA_Upscale_x2_US.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 +//!VERSION 4 //!SORT_NAME Anime4K_3D_Upscale_1 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -175,13 +178,15 @@ void Pass2(uint2 blockStart, uint3 threadId) { //!PASS 3 //!DESC Conv-4x3x3x4, Depth-to-Space //!IN INPUT, tex2 +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 void Pass3(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -221,24 +226,19 @@ void Pass3(uint2 blockStart, uint3 threadId) { result += float4(-3.1127936e-05, 3.3726166e-05, 4.8580805e-05, -9.541029e-06); pos -= 0.5f * outputPt; - WriteToOutput(gxy, result.x + INPUT.SampleLevel(sam1, pos, 0).rgb); + OUTPUT[gxy] = float4(result.x + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); ++gxy.x; pos.x += outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, result.y + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(result.y + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); + ++gxy.y; pos.y += outputPt.y; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, result.w + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(result.w + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); --gxy.x; pos.x -= outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, result.z + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(result.z + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); } diff --git a/src/Effects/Anime4K/Anime4K_3D_Upscale_US.hlsl b/src/Effects/Anime4K/Anime4K_3D_Upscale_US.hlsl index 783052c3f..03bed730a 100644 --- a/src/Effects/Anime4K/Anime4K_3D_Upscale_US.hlsl +++ b/src/Effects/Anime4K/Anime4K_3D_Upscale_US.hlsl @@ -2,15 +2,18 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Upscale/Anime4K_3DGraphics_Upscale_x2_US.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 +//!VERSION 4 //!SORT_NAME Anime4K_3D_Upscale_0 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -176,13 +179,15 @@ void Pass2(uint2 blockStart, uint3 threadId) { //!PASS 3 //!DESC Conv-4x3x3x4, Depth-to-Space //!IN INPUT, tex2 +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 void Pass3(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -222,23 +227,18 @@ void Pass3(uint2 blockStart, uint3 threadId) { result += float4(-0.00016697648, -0.00015957489, 0.00017437353, -0.00019393339); pos -= 0.5f * outputPt; - WriteToOutput(gxy, result.x + INPUT.SampleLevel(sam1, pos, 0).rgb); + OUTPUT[gxy] = float4(result.x + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); ++gxy.x; pos.x += outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, result.y + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(result.y + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); + ++gxy.y; pos.y += outputPt.y; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, result.w + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(result.w + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); --gxy.x; pos.x -= outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, result.z + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(result.z + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Denoise_Bilateral_Mean.hlsl b/src/Effects/Anime4K/Anime4K_Denoise_Bilateral_Mean.hlsl index de0cba4a5..e6a70f689 100644 --- a/src/Effects/Anime4K/Anime4K_Denoise_Bilateral_Mean.hlsl +++ b/src/Effects/Anime4K/Anime4K_Denoise_Bilateral_Mean.hlsl @@ -3,9 +3,7 @@ //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!PARAMETER @@ -19,6 +17,11 @@ float intensitySigma; //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -26,6 +29,7 @@ SamplerState sam; //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -52,7 +56,9 @@ float gaussian(float x, float rcpS, float m) { void Pass1(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -93,12 +99,6 @@ void Pass1(uint2 blockStart, uint3 threadId) { for (j = 0; j <= 1; ++j) { uint2 destPos = gxy + uint2(i, j); - if (i != 0 || j != 0) { - if (!CheckViewport(gxy)) { - continue; - } - } - float3 sum = 0; float3 n = 0; @@ -118,7 +118,7 @@ void Pass1(uint2 blockStart, uint3 threadId) { } } - WriteToOutput(destPos, sum / n); + OUTPUT[destPos] = float4(sum / n, 1); } } } diff --git a/src/Effects/Anime4K/Anime4K_Denoise_Bilateral_Median.hlsl b/src/Effects/Anime4K/Anime4K_Denoise_Bilateral_Median.hlsl index 57930a710..2cd812b4c 100644 --- a/src/Effects/Anime4K/Anime4K_Denoise_Bilateral_Median.hlsl +++ b/src/Effects/Anime4K/Anime4K_Denoise_Bilateral_Median.hlsl @@ -2,9 +2,7 @@ //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!PARAMETER @@ -18,14 +16,19 @@ float intensitySigma; //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; - //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 8 //!NUM_THREADS 64 @@ -77,7 +80,9 @@ float3 getMedian(float3 v[KERNELLEN], float w[KERNELLEN], float n) { void Pass1(uint2 blockStart, uint3 threadId) { uint2 gxy = Rmp8x8(threadId.x) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -126,9 +131,9 @@ void Pass1(uint2 blockStart, uint3 threadId) { n += histogram_wn[i]; } - WriteToOutput(gxy, getMedian(histogram_v, histogram_wn, n)); + OUTPUT[gxy] = float4(getMedian(histogram_v, histogram_wn, n), 1); return; } - WriteToOutput(gxy, getMedian(histogram_v, histogram_w, n)); + OUTPUT[gxy] = float4(getMedian(histogram_v, histogram_w, n), 1); } diff --git a/src/Effects/Anime4K/Anime4K_Denoise_Bilateral_Mode.hlsl b/src/Effects/Anime4K/Anime4K_Denoise_Bilateral_Mode.hlsl index 1f2ae5a37..579e3b295 100644 --- a/src/Effects/Anime4K/Anime4K_Denoise_Bilateral_Mode.hlsl +++ b/src/Effects/Anime4K/Anime4K_Denoise_Bilateral_Mode.hlsl @@ -3,14 +3,9 @@ //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 -//!TEXTURE -Texture2D INPUT; - //!PARAMETER //!LABEL Strength //!DEFAULT 0.1 @@ -19,6 +14,14 @@ Texture2D INPUT; //!STEP 0.01 float intensitySigma; +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -26,6 +29,7 @@ SamplerState sam; //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -52,7 +56,9 @@ float gaussian(float x, float s, float m) { void Pass1(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -84,12 +90,6 @@ void Pass1(uint2 blockStart, uint3 threadId) { for (j = 0; j <= 1; ++j) { const uint2 destPos = gxy + uint2(i, j); - if (i != 0 || j != 0) { - if (!CheckViewport(gxy)) { - continue; - } - } - float3 histogram_v[KERNELLEN]; float histogram_l[KERNELLEN]; float histogram_w[KERNELLEN]; @@ -132,7 +132,7 @@ void Pass1(uint2 blockStart, uint3 threadId) { } } - WriteToOutput(destPos, maxv); + OUTPUT[destPos] = float4(maxv, 1); } } } diff --git a/src/Effects/Anime4K/Anime4K_Restore_L.hlsl b/src/Effects/Anime4K/Anime4K_Restore_L.hlsl index 1d0dab8f3..a8b4da719 100644 --- a/src/Effects/Anime4K/Anime4K_Restore_L.hlsl +++ b/src/Effects/Anime4K/Anime4K_Restore_L.hlsl @@ -1,18 +1,17 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Restore/Anime4K_Restore_CNN_L.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!SORT_NAME Anime4K_Restore_2 //!TEXTURE Texture2D INPUT; -//!SAMPLER -//!FILTER POINT -SamplerState sam; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; //!TEXTURE //!WIDTH INPUT_WIDTH @@ -38,6 +37,10 @@ Texture2D tex3; //!FORMAT R16G16B16A16_FLOAT Texture2D tex4; +//!SAMPLER +//!FILTER POINT +SamplerState sam; + //!PASS 1 //!DESC Conv-4x3x3x3 @@ -602,13 +605,15 @@ void Pass4(uint2 blockStart, uint3 threadId) { //!PASS 5 //!DESC Conv-3x3x3x16 //!IN INPUT, tex3, tex4 +//!OUT OUTPUT //!BLOCK_SIZE 8 //!NUM_THREADS 64 void Pass5(uint2 blockStart, uint3 threadId) { uint2 gxy = Rmp8x8(threadId.x) + blockStart; - uint2 inputSize = GetInputSize(); - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -638,45 +643,45 @@ void Pass5(uint2 blockStart, uint3 threadId) { float4 h2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); float4 i2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); - float4 result = mul(max(a1, 0), float4x4(0.012102164, 0.01385959, 0.018815203, 0.0, -0.017435113, -0.04530735, -0.051318135, 0.0, 0.01267727, 0.01400136, 0.017735276, 0.0, 0.012681183, 0.035241637, 0.03990959, 0.0)); - result += mul(max(b1, 0), float4x4(0.16069227, 0.098007366, 0.076831706, 0.0, 0.081593364, 0.017831434, 0.010174303, 0.0, 0.014732323, 0.02229113, 0.029828338, 0.0, 0.0048171813, 0.051809076, 0.055740006, 0.0)); - result += mul(max(c1, 0), float4x4(0.0347963, -0.014327445, -0.024176419, 0.0, 0.003463003, -0.050532356, -0.06565927, 0.0, 0.082851514, 0.10950989, 0.12022889, 0.0, -0.038950548, -0.015094648, -0.0119305095, 0.0)); - result += mul(max(d1, 0), float4x4(-0.11845135, -0.08067485, -0.06981454, 0.0, 0.00058037776, 0.01160575, 0.014900963, 0.0, -0.0374349, -0.052966926, -0.044557698, 0.0, 0.017439643, 0.005496974, -0.0024181441, 0.0)); - result += mul(max(e1, 0), float4x4(-0.1084345, -0.18271221, -0.18795776, 0.0, 0.110637866, 0.08913364, 0.09161146, 0.0, -0.19889367, -0.17172937, -0.1600661, 0.0, -0.03789556, -0.028977778, -0.029903485, 0.0)); - result += mul(max(f1, 0), float4x4(0.017774954, -0.048732057, -0.061161697, 0.0, 0.022389695, -0.013317256, -0.019972157, 0.0, 0.051979035, 0.08774837, 0.09633588, 0.0, -0.047462203, -0.033091765, -0.028352588, 0.0)); - result += mul(max(g1, 0), float4x4(0.022178177, 0.05031684, 0.05802219, 0.0, -0.027539665, -0.020904189, -0.01800042, 0.0, 0.0019531948, 0.00019749763, -0.0013961957, 0.0, 0.024253767, -0.00058503833, 0.0006474611, 0.0)); - result += mul(max(h1, 0), float4x4(0.06707921, 0.0817431, 0.07561426, 0.0, -0.04157211, -0.006174012, -0.003754037, 0.0, 0.0031168605, 0.02320992, 0.026471246, 0.0, 0.0029530525, -0.004939263, -0.0070194793, 0.0)); - result += mul(max(i1, 0), float4x4(0.03383418, 0.042321067, 0.04266926, 0.0, -0.043634403, -0.0182769, -0.011314871, 0.0, -0.050008457, -0.003527757, 0.0035165092, 0.0, -0.00016610099, 0.019936454, 0.022199173, 0.0)); - result += mul(max(a2, 0), float4x4(-0.055203374, -0.03910439, -0.03778927, 0.0, 0.027640847, 0.019469904, 0.0277834, 0.0, -0.026225597, 0.04481541, 0.047454204, 0.0, 0.031545334, 0.019874612, 0.011878432, 0.0)); - result += mul(max(b2, 0), float4x4(0.016088601, -0.045959134, -0.048793618, 0.0, -0.009834776, 0.0077799167, 0.00873151, 0.0, 0.031265914, 0.09698676, 0.10005417, 0.0, 0.039120086, 0.0005542848, -0.0049420255, 0.0)); - result += mul(max(c2, 0), float4x4(0.028432969, -0.014792921, -0.026881924, 0.0, -0.00586326, 0.013427183, 0.018215714, 0.0, -0.013559131, 0.017704675, 0.024854776, 0.0, -0.09087544, -0.104627624, -0.0921747, 0.0)); - result += mul(max(d2, 0), float4x4(-0.022899037, 0.026374351, 0.03145993, 0.0, -0.008008749, -0.0013132087, -0.003957525, 0.0, -0.02490554, 0.0020362549, 0.006453752, 0.0, 0.031494617, 0.049864545, 0.04702567, 0.0)); - result += mul(max(e2, 0), float4x4(-0.12318068, -0.121377476, -0.11615006, 0.0, -0.1321696, -0.078085914, -0.07868927, 0.0, -0.072339885, 0.0012095685, 0.010923645, 0.0, 0.10844834, 0.10038668, 0.09919817, 0.0)); - result += mul(max(f2, 0), float4x4(0.058991943, 0.018824834, 0.01659209, 0.0, -0.041878223, 0.013176531, 0.023566704, 0.0, -0.010507848, 0.02042605, 0.028884022, 0.0, -0.1193022, -0.10676289, -0.096668206, 0.0)); - result += mul(max(g2, 0), float4x4(0.023510003, 0.06057355, 0.052194174, 0.0, 0.02304783, 0.031745855, 0.025863871, 0.0, -0.01060811, -0.043136407, -0.03569961, 0.0, -0.022243036, 0.014206766, 0.0032128936, 0.0)); - result += mul(max(h2, 0), float4x4(0.025120225, 0.07386707, 0.07916389, 0.0, -0.020202598, 0.010854587, 0.009825397, 0.0, -0.043466344, -0.049230598, -0.038344223, 0.0, 0.006438127, 0.041072655, 0.036958262, 0.0)); - result += mul(max(i2, 0), float4x4(0.027640026, 0.04239058, 0.055017423, 0.0, -0.002110394, 0.040088017, 0.045239322, 0.0, -0.020238828, -0.01711292, -0.014726791, 0.0, -0.029621653, -0.007380026, -0.002073584, 0.0)); - result += mul(max(-a1, 0), float4x4(0.008071638, 0.0034274645, -0.0016181463, 0.0, 0.044838928, 0.06936641, 0.072150804, 0.0, 0.0006324625, -0.02223834, -0.021122342, 0.0, 0.043963037, 0.047561962, 0.026419055, 0.0)); - result += mul(max(-b1, 0), float4x4(-0.06605246, -0.011649812, -0.0022502556, 0.0, -0.09256232, -0.06281528, -0.055003755, 0.0, 0.032296494, -0.011113339, -0.015790787, 0.0, 0.05214882, 0.022887057, 0.013746634, 0.0)); - result += mul(max(-c1, 0), float4x4(-0.03587372, 0.018986767, 0.03229596, 0.0, 0.008917248, 0.050303612, 0.06147115, 0.0, 0.01872278, -0.011048741, -0.017369485, 0.0, 0.030770298, 0.0063107815, 0.003187433, 0.0)); - result += mul(max(-d1, 0), float4x4(0.087662674, 0.048391398, 0.042332277, 0.0, 0.0043635606, 0.02438183, 0.020213395, 0.0, -0.023863237, -0.0051179314, -0.0060627074, 0.0, 0.06292237, 0.05821987, 0.051667042, 0.0)); - result += mul(max(-e1, 0), float4x4(-0.048478693, 0.008368922, 0.016874269, 0.0, -0.19261299, -0.1848583, -0.18258469, 0.0, 0.112302095, 0.061518673, 0.058282077, 0.0, 0.024626324, 0.0058449907, 0.006936535, 0.0)); - result += mul(max(-f1, 0), float4x4(-0.04468695, 0.0099176075, 0.025094027, 0.0, 0.05447911, 0.08220857, 0.08161316, 0.0, -0.0007933787, -0.03090106, -0.040217776, 0.0, -0.028044306, -0.050590593, -0.05027328, 0.0)); - result += mul(max(-g1, 0), float4x4(0.029733973, -0.0129855955, -0.019776886, 0.0, 0.01860655, 0.017793713, 0.020113358, 0.0, -0.023667783, -0.0013290358, -0.004159268, 0.0, -0.01960303, -0.012806444, -0.016549494, 0.0)); - result += mul(max(-h1, 0), float4x4(-0.00952229, -0.007181503, -0.0061082463, 0.0, 0.04292393, 0.01510459, 0.0062862537, 0.0, -0.016540393, -0.023619318, -0.02633423, 0.0, -0.06652295, -0.06933143, -0.063913494, 0.0)); - result += mul(max(-i1, 0), float4x4(-0.015281855, -0.012470513, -0.008184894, 0.0, 0.045862548, 0.023707546, 0.014719574, 0.0, 0.032412887, -0.0038218168, -0.0065955487, 0.0, -0.027728679, -0.04009727, -0.018856067, 0.0)); - result += mul(max(-a2, 0), float4x4(0.042844415, 0.00673587, 0.0038338478, 0.0, -0.031152235, -0.06649269, -0.065986395, 0.0, 0.005666899, -0.015819343, -0.012795757, 0.0, -0.0007617308, 0.021531299, 0.026071105, 0.0)); - result += mul(max(-b2, 0), float4x4(-0.118266046, -0.07211513, -0.058381762, 0.0, 0.02361942, 0.012819485, 0.010511434, 0.0, 0.077196896, 0.003424893, 0.001927401, 0.0, -0.03160996, -0.0034473129, -0.00444674, 0.0)); - result += mul(max(-c2, 0), float4x4(-0.06548674, -0.018152835, 0.0034779215, 0.0, -0.006173449, 0.008357867, -0.0033986098, 0.0, 0.021622533, -0.03722321, -0.045832597, 0.0, -0.011835129, 0.0109178, 0.010480887, 0.0)); - result += mul(max(-d2, 0), float4x4(0.041682176, -0.008985459, -0.018538723, 0.0, -0.054624356, -0.09495616, -0.090484254, 0.0, -0.0060466817, -0.017551763, -0.014151624, 0.0, -0.015683241, -0.012590141, -0.014278323, 0.0)); - result += mul(max(-e2, 0), float4x4(0.073194094, 0.055347454, 0.060976587, 0.0, 0.18175459, 0.13776664, 0.13139476, 0.0, 0.14047755, 0.061971992, 0.056503728, 0.0, 0.0068531767, -0.011873265, -0.016871026, 0.0)); - result += mul(max(-f2, 0), float4x4(-0.041848205, -0.009582, -0.0076929387, 0.0, 0.044274334, 0.04011985, 0.03085897, 0.0, 0.009403278, -0.03346772, -0.04463548, 0.0, 0.04548978, 0.014613167, 0.0055232802, 0.0)); - result += mul(max(-g2, 0), float4x4(0.019901669, -0.0011372451, -0.007423424, 0.0, -0.053240675, -0.07105105, -0.07122227, 0.0, -0.01892976, -0.019795185, -0.019204788, 0.0, 0.01228504, -0.005040437, -0.0010069044, 0.0)); - result += mul(max(-h2, 0), float4x4(0.032843515, 0.014947385, 0.007550199, 0.0, -0.0006476342, -0.020907652, -0.030297596, 0.0, -0.015617971, -0.029182931, -0.038677275, 0.0, 0.037908908, -0.018132487, -0.020226713, 0.0)); - result += mul(max(-i2, 0), float4x4(0.03232915, 0.02915194, 0.014929652, 0.0, 0.016676396, 0.004807404, -0.0008906752, 0.0, 0.0076904814, 0.00541351, -0.0048240838, 0.0, 0.03459369, -0.012969539, -0.024712864, 0.0)); - result += float4(-0.0096404655, 0.0022038757, 0.0035988842, 0.0); - - result += INPUT.SampleLevel(sam, pos, 0); - - WriteToOutput(gxy, result.rgb); + float3 result = mul(max(a1, 0), float4x3(0.012102164, 0.01385959, 0.018815203, -0.017435113, -0.04530735, -0.051318135, 0.01267727, 0.01400136, 0.017735276, 0.012681183, 0.035241637, 0.03990959)); + result += mul(max(b1, 0), float4x3(0.16069227, 0.098007366, 0.076831706, 0.081593364, 0.017831434, 0.010174303, 0.014732323, 0.02229113, 0.029828338, 0.0048171813, 0.051809076, 0.055740006)); + result += mul(max(c1, 0), float4x3(0.0347963, -0.014327445, -0.024176419, 0.003463003, -0.050532356, -0.06565927, 0.082851514, 0.10950989, 0.12022889, -0.038950548, -0.015094648, -0.0119305095)); + result += mul(max(d1, 0), float4x3(-0.11845135, -0.08067485, -0.06981454, 0.00058037776, 0.01160575, 0.014900963, -0.0374349, -0.052966926, -0.044557698, 0.017439643, 0.005496974, -0.0024181441)); + result += mul(max(e1, 0), float4x3(-0.1084345, -0.18271221, -0.18795776, 0.110637866, 0.08913364, 0.09161146, -0.19889367, -0.17172937, -0.1600661, -0.03789556, -0.028977778, -0.029903485)); + result += mul(max(f1, 0), float4x3(0.017774954, -0.048732057, -0.061161697, 0.022389695, -0.013317256, -0.019972157, 0.051979035, 0.08774837, 0.09633588, -0.047462203, -0.033091765, -0.028352588)); + result += mul(max(g1, 0), float4x3(0.022178177, 0.05031684, 0.05802219, -0.027539665, -0.020904189, -0.01800042, 0.0019531948, 0.00019749763, -0.0013961957, 0.024253767, -0.00058503833, 0.0006474611)); + result += mul(max(h1, 0), float4x3(0.06707921, 0.0817431, 0.07561426, -0.04157211, -0.006174012, -0.003754037, 0.0031168605, 0.02320992, 0.026471246, 0.0029530525, -0.004939263, -0.0070194793)); + result += mul(max(i1, 0), float4x3(0.03383418, 0.042321067, 0.04266926, -0.043634403, -0.0182769, -0.011314871, -0.050008457, -0.003527757, 0.0035165092, -0.00016610099, 0.019936454, 0.022199173)); + result += mul(max(a2, 0), float4x3(-0.055203374, -0.03910439, -0.03778927, 0.027640847, 0.019469904, 0.0277834, -0.026225597, 0.04481541, 0.047454204, 0.031545334, 0.019874612, 0.011878432)); + result += mul(max(b2, 0), float4x3(0.016088601, -0.045959134, -0.048793618, -0.009834776, 0.0077799167, 0.00873151, 0.031265914, 0.09698676, 0.10005417, 0.039120086, 0.0005542848, -0.0049420255)); + result += mul(max(c2, 0), float4x3(0.028432969, -0.014792921, -0.026881924, -0.00586326, 0.013427183, 0.018215714, -0.013559131, 0.017704675, 0.024854776, -0.09087544, -0.104627624, -0.0921747)); + result += mul(max(d2, 0), float4x3(-0.022899037, 0.026374351, 0.03145993, -0.008008749, -0.0013132087, -0.003957525, -0.02490554, 0.0020362549, 0.006453752, 0.031494617, 0.049864545, 0.04702567)); + result += mul(max(e2, 0), float4x3(-0.12318068, -0.121377476, -0.11615006, -0.1321696, -0.078085914, -0.07868927, -0.072339885, 0.0012095685, 0.010923645, 0.10844834, 0.10038668, 0.09919817)); + result += mul(max(f2, 0), float4x3(0.058991943, 0.018824834, 0.01659209, -0.041878223, 0.013176531, 0.023566704, -0.010507848, 0.02042605, 0.028884022, -0.1193022, -0.10676289, -0.096668206)); + result += mul(max(g2, 0), float4x3(0.023510003, 0.06057355, 0.052194174, 0.02304783, 0.031745855, 0.025863871, -0.01060811, -0.043136407, -0.03569961, -0.022243036, 0.014206766, 0.0032128936)); + result += mul(max(h2, 0), float4x3(0.025120225, 0.07386707, 0.07916389, -0.020202598, 0.010854587, 0.009825397, -0.043466344, -0.049230598, -0.038344223, 0.006438127, 0.041072655, 0.036958262)); + result += mul(max(i2, 0), float4x3(0.027640026, 0.04239058, 0.055017423, -0.002110394, 0.040088017, 0.045239322, -0.020238828, -0.01711292, -0.014726791, -0.029621653, -0.007380026, -0.002073584)); + result += mul(max(-a1, 0), float4x3(0.008071638, 0.0034274645, -0.0016181463, 0.044838928, 0.06936641, 0.072150804, 0.0006324625, -0.02223834, -0.021122342, 0.043963037, 0.047561962, 0.026419055)); + result += mul(max(-b1, 0), float4x3(-0.06605246, -0.011649812, -0.0022502556, -0.09256232, -0.06281528, -0.055003755, 0.032296494, -0.011113339, -0.015790787, 0.05214882, 0.022887057, 0.013746634)); + result += mul(max(-c1, 0), float4x3(-0.03587372, 0.018986767, 0.03229596, 0.008917248, 0.050303612, 0.06147115, 0.01872278, -0.011048741, -0.017369485, 0.030770298, 0.0063107815, 0.003187433)); + result += mul(max(-d1, 0), float4x3(0.087662674, 0.048391398, 0.042332277, 0.0043635606, 0.02438183, 0.020213395, -0.023863237, -0.0051179314, -0.0060627074, 0.06292237, 0.05821987, 0.051667042)); + result += mul(max(-e1, 0), float4x3(-0.048478693, 0.008368922, 0.016874269, -0.19261299, -0.1848583, -0.18258469, 0.112302095, 0.061518673, 0.058282077, 0.024626324, 0.0058449907, 0.006936535)); + result += mul(max(-f1, 0), float4x3(-0.04468695, 0.0099176075, 0.025094027, 0.05447911, 0.08220857, 0.08161316, -0.0007933787, -0.03090106, -0.040217776, -0.028044306, -0.050590593, -0.05027328)); + result += mul(max(-g1, 0), float4x3(0.029733973, -0.0129855955, -0.019776886, 0.01860655, 0.017793713, 0.020113358, -0.023667783, -0.0013290358, -0.004159268, -0.01960303, -0.012806444, -0.016549494)); + result += mul(max(-h1, 0), float4x3(-0.00952229, -0.007181503, -0.0061082463, 0.04292393, 0.01510459, 0.0062862537, -0.016540393, -0.023619318, -0.02633423, -0.06652295, -0.06933143, -0.063913494)); + result += mul(max(-i1, 0), float4x3(-0.015281855, -0.012470513, -0.008184894, 0.045862548, 0.023707546, 0.014719574, 0.032412887, -0.0038218168, -0.0065955487, -0.027728679, -0.04009727, -0.018856067)); + result += mul(max(-a2, 0), float4x3(0.042844415, 0.00673587, 0.0038338478, -0.031152235, -0.06649269, -0.065986395, 0.005666899, -0.015819343, -0.012795757, -0.0007617308, 0.021531299, 0.026071105)); + result += mul(max(-b2, 0), float4x3(-0.118266046, -0.07211513, -0.058381762, 0.02361942, 0.012819485, 0.010511434, 0.077196896, 0.003424893, 0.001927401, -0.03160996, -0.0034473129, -0.00444674)); + result += mul(max(-c2, 0), float4x3(-0.06548674, -0.018152835, 0.0034779215, -0.006173449, 0.008357867, -0.0033986098, 0.021622533, -0.03722321, -0.045832597, -0.011835129, 0.0109178, 0.010480887)); + result += mul(max(-d2, 0), float4x3(0.041682176, -0.008985459, -0.018538723, -0.054624356, -0.09495616, -0.090484254, -0.0060466817, -0.017551763, -0.014151624, -0.015683241, -0.012590141, -0.014278323)); + result += mul(max(-e2, 0), float4x3(0.073194094, 0.055347454, 0.060976587, 0.18175459, 0.13776664, 0.13139476, 0.14047755, 0.061971992, 0.056503728, 0.0068531767, -0.011873265, -0.016871026)); + result += mul(max(-f2, 0), float4x3(-0.041848205, -0.009582, -0.0076929387, 0.044274334, 0.04011985, 0.03085897, 0.009403278, -0.03346772, -0.04463548, 0.04548978, 0.014613167, 0.0055232802)); + result += mul(max(-g2, 0), float4x3(0.019901669, -0.0011372451, -0.007423424, -0.053240675, -0.07105105, -0.07122227, -0.01892976, -0.019795185, -0.019204788, 0.01228504, -0.005040437, -0.0010069044)); + result += mul(max(-h2, 0), float4x3(0.032843515, 0.014947385, 0.007550199, -0.0006476342, -0.020907652, -0.030297596, -0.015617971, -0.029182931, -0.038677275, 0.037908908, -0.018132487, -0.020226713)); + result += mul(max(-i2, 0), float4x3(0.03232915, 0.02915194, 0.014929652, 0.016676396, 0.004807404, -0.0008906752, 0.0076904814, 0.00541351, -0.0048240838, 0.03459369, -0.012969539, -0.024712864)); + result += float3(-0.0096404655, 0.0022038757, 0.0035988842); + + result += INPUT.SampleLevel(sam, pos, 0).rgb; + + OUTPUT[gxy] = float4(result, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Restore_M.hlsl b/src/Effects/Anime4K/Anime4K_Restore_M.hlsl index 3b6b15760..d131af1d3 100644 --- a/src/Effects/Anime4K/Anime4K_Restore_M.hlsl +++ b/src/Effects/Anime4K/Anime4K_Restore_M.hlsl @@ -2,18 +2,17 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Restore/Anime4K_Restore_CNN_M.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!SORT_NAME Anime4K_Restore_1 //!TEXTURE Texture2D INPUT; -//!SAMPLER -//!FILTER POINT -SamplerState sam; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; //!TEXTURE //!WIDTH INPUT_WIDTH @@ -51,6 +50,10 @@ Texture2D tex5; //!FORMAT R16G16B16A16_FLOAT Texture2D tex6; +//!SAMPLER +//!FILTER POINT +SamplerState sam; + //!PASS 1 //!DESC Conv-4x3x3x3 @@ -495,15 +498,18 @@ void Pass6(uint2 blockStart, uint3 threadId) { //!PASS 7 //!DESC Conv-4x3x3x8, Conv-3x1x1x56 //!IN INPUT, tex1, tex2, tex3, tex4, tex5, tex6 +//!OUT OUTPUT //!BLOCK_SIZE 8 //!NUM_THREADS 64 void Pass7(uint2 blockStart, uint3 threadId) { uint2 gxy = Rmp8x8(threadId.x) + blockStart; - uint2 inputSize = GetInputSize(); - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } + float2 inputPt = GetInputPt(); float2 pos = (gxy + 0.5f) * inputPt; @@ -564,5 +570,5 @@ void Pass7(uint2 blockStart, uint3 threadId) { result += mul(max(-src7, 0), float4x3(0.10676299, 0.118409514, 0.10618478, -0.05880252, -0.06488367, -0.06432695, 0.019221924, 0.017602798, 0.017413978, -0.07512528, -0.080483615, -0.066218294)); result += float3(-0.010478934, -0.008364784, -0.010246552); - WriteToOutput(gxy, result + origin); + OUTPUT[gxy] = float4(result + origin, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Restore_S.hlsl b/src/Effects/Anime4K/Anime4K_Restore_S.hlsl index 6ab79dcdf..6d4eb7b71 100644 --- a/src/Effects/Anime4K/Anime4K_Restore_S.hlsl +++ b/src/Effects/Anime4K/Anime4K_Restore_S.hlsl @@ -2,15 +2,18 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Restore/Anime4K_Restore_CNN_S.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!SORT_NAME Anime4K_Restore_0 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -246,13 +249,15 @@ void Pass3(uint2 blockStart, uint3 threadId) { //!PASS 4 //!DESC Conv-3x3x3x8 //!IN INPUT, tex1 +//!OUT OUTPUT //!BLOCK_SIZE 8 //!NUM_THREADS 64 void Pass4(uint2 blockStart, uint3 threadId) { uint2 gxy = Rmp8x8(threadId.x) + blockStart; - uint2 inputSize = GetInputSize(); - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -294,5 +299,5 @@ void Pass4(uint2 blockStart, uint3 threadId) { result += INPUT.SampleLevel(sam, pos, 0).rgb; - WriteToOutput(gxy, result); + OUTPUT[gxy] = float4(result, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Restore_Soft_L.hlsl b/src/Effects/Anime4K/Anime4K_Restore_Soft_L.hlsl index 5fcd04eff..1e117b8cb 100644 --- a/src/Effects/Anime4K/Anime4K_Restore_Soft_L.hlsl +++ b/src/Effects/Anime4K/Anime4K_Restore_Soft_L.hlsl @@ -1,18 +1,17 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Restore/Anime4K_Restore_CNN_Soft_L.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!SORT_NAME Anime4K_Restore_Soft_2 //!TEXTURE Texture2D INPUT; -//!SAMPLER -//!FILTER POINT -SamplerState sam; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; //!TEXTURE //!WIDTH INPUT_WIDTH @@ -38,6 +37,10 @@ Texture2D tex3; //!FORMAT R16G16B16A16_FLOAT Texture2D tex4; +//!SAMPLER +//!FILTER POINT +SamplerState sam; + //!PASS 1 //!DESC Conv-4x3x3x3 @@ -602,13 +605,15 @@ void Pass4(uint2 blockStart, uint3 threadId) { //!PASS 5 //!DESC Conv-3x3x3x16 //!IN INPUT, tex3, tex4 +//!OUT OUTPUT //!BLOCK_SIZE 8 //!NUM_THREADS 64 void Pass5(uint2 blockStart, uint3 threadId) { uint2 gxy = Rmp8x8(threadId.x) + blockStart; - uint2 inputSize = GetInputSize(); - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -638,45 +643,45 @@ void Pass5(uint2 blockStart, uint3 threadId) { float4 h2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); float4 i2 = tex4.SampleLevel(sam, pos + float2(inputPt.x, inputPt.y), 0); - float4 result = mul(max(a1, 0), float4x4(-0.01858372, 0.017144108, 0.02794388, 0.0, 0.0129101565, -0.0073674284, -0.011766938, 0.0, 0.01970984, 0.01209068, 0.009530311, 0.0, -0.009190449, -0.006996753, -0.0038750458, 0.0)); - result += mul(max(b1, 0), float4x4(0.15856947, 0.10162126, 0.08489005, 0.0, 0.038381726, -0.017771017, -0.03226132, 0.0, -0.011787879, -0.0152445, -0.007564454, 0.0, 0.055921376, 0.08389841, 0.08452836, 0.0)); - result += mul(max(c1, 0), float4x4(0.026705442, -0.0070655374, -0.018199183, 0.0, 0.016254421, -0.025398912, -0.03461042, 0.0, 0.03950644, 0.06586101, 0.0707467, 0.0, -0.03793455, -0.04957139, -0.04777402, 0.0)); - result += mul(max(d1, 0), float4x4(-0.115341224, -0.04463122, -0.016549354, 0.0, -0.059433736, -0.04303295, -0.042805545, 0.0, 0.010830498, -0.011057443, -0.0141014, 0.0, 0.067396216, 0.06553637, 0.06705378, 0.0)); - result += mul(max(e1, 0), float4x4(-0.12767975, -0.19935511, -0.20109995, 0.0, 0.11554901, 0.11426503, 0.11161185, 0.0, -0.22092125, -0.22041021, -0.2142712, 0.0, -0.06326996, -0.061314825, -0.059039716, 0.0)); - result += mul(max(f1, 0), float4x4(0.007717391, -0.046238754, -0.056983955, 0.0, 0.021419598, 0.0036924274, -0.00033630748, 0.0, 0.053556852, 0.0824714, 0.08295022, 0.0, -0.09881205, -0.043157153, -0.040801782, 0.0)); - result += mul(max(g1, 0), float4x4(0.0052828738, 0.049702674, 0.056108, 0.0, 0.009478552, 0.010345037, 0.0094180945, 0.0, -0.010412882, 0.0006965096, 0.0021917222, 0.0, -0.010701383, -0.023212843, -0.024252625, 0.0)); - result += mul(max(h1, 0), float4x4(0.07542127, 0.0739301, 0.06642962, 0.0, -0.08054489, -0.037553925, -0.026762033, 0.0, 0.09727509, 0.102272816, 0.097533874, 0.0, 0.01325714, -0.004582272, -0.006647532, 0.0)); - result += mul(max(i1, 0), float4x4(0.03005975, 0.017012767, 0.007840201, 0.0, -0.028650383, -0.0019064787, 0.01083078, 0.0, -0.071352504, -0.019919744, -0.008299795, 0.0, 0.023253804, 0.042413715, 0.04681489, 0.0)); - result += mul(max(a2, 0), float4x4(-0.052201163, -0.021727808, -0.020888992, 0.0, 0.008365179, -0.016546093, -0.0111018475, 0.0, -0.06236095, -0.019278256, -0.021443967, 0.0, 0.0029381379, -0.0033039588, -0.006425339, 0.0)); - result += mul(max(b2, 0), float4x4(0.02397296, -0.041659098, -0.050882675, 0.0, -0.013487, 0.0067506596, 0.005435185, 0.0, 0.066447854, 0.13331215, 0.13754861, 0.0, 0.028300207, -0.0048033795, -0.010058485, 0.0)); - result += mul(max(c2, 0), float4x4(0.08140248, 0.018564016, 0.0036607496, 0.0, -0.0112075955, 0.0022339798, 0.0045722146, 0.0, -0.045716517, -0.0076076477, -0.0016939791, 0.0, -0.030486025, -0.07539711, -0.07185734, 0.0)); - result += mul(max(d2, 0), float4x4(-0.0155724995, 0.048904862, 0.059412133, 0.0, -0.013894624, -0.0061430936, -0.011662488, 0.0, -0.0052947477, -0.0176474, -0.018611705, 0.0, 0.022075793, 0.031703226, 0.026735537, 0.0)); - result += mul(max(e2, 0), float4x4(-0.18287502, -0.18703277, -0.18331653, 0.0, -0.08616293, -0.011741755, -0.009296464, 0.0, -0.054274965, 0.016794622, 0.022522328, 0.0, 0.06965258, 0.08260611, 0.08285337, 0.0)); - result += mul(max(f2, 0), float4x4(0.08107809, 0.0336241, 0.025449684, 0.0, -0.031931, 0.01179566, 0.019694995, 0.0, 0.025930194, 0.042288166, 0.04673656, 0.0, -0.14357394, -0.11003491, -0.094090074, 0.0)); - result += mul(max(g2, 0), float4x4(0.007188181, 0.050626095, 0.050705966, 0.0, -0.008030409, -0.018670242, -0.019766346, 0.0, 0.014874803, -0.03657919, -0.034044486, 0.0, -0.011178416, -0.004358302, -0.013611815, 0.0)); - result += mul(max(h2, 0), float4x4(0.07987872, 0.11399873, 0.12089382, 0.0, -0.01514355, 0.0068139364, 0.010206274, 0.0, -0.0005701044, -0.011158322, 0.006484812, 0.0, 0.002018227, 0.043359682, 0.042987905, 0.0)); - result += mul(max(i2, 0), float4x4(0.0017806455, -0.0015697709, -0.0018252691, 0.0, 0.0058658062, 0.021681193, 0.028615465, 0.0, -0.054827355, -0.04541651, -0.027485048, 0.0, -0.017649114, 0.017717479, 0.027309911, 0.0)); - result += mul(max(-a1, 0), float4x4(0.02555098, -0.0028983613, -0.005134733, 0.0, -0.0029332284, 0.015552135, 0.022189403, 0.0, -0.019786593, -0.0031676649, -0.0014604586, 0.0, 0.06648065, 0.0672302, 0.04586375, 0.0)); - result += mul(max(-b1, 0), float4x4(-0.06674696, 0.002328631, 0.014039355, 0.0, -0.03636718, 0.014560653, 0.028076636, 0.0, 0.042305287, 0.015249338, 0.0136925895, 0.0, 0.033586804, 0.00701501, -0.011588751, 0.0)); - result += mul(max(-c1, 0), float4x4(-0.039022632, 0.015240631, 0.02699061, 0.0, -0.02614261, 0.0051843156, 0.012590042, 0.0, 0.015304643, -0.022641543, -0.030434309, 0.0, 0.016862666, 0.020819275, 0.022333218, 0.0)); - result += mul(max(-d1, 0), float4x4(0.08056982, 0.026592938, 0.009744146, 0.0, 0.08762212, 0.10150359, 0.09662005, 0.0, -0.044551965, -0.016349116, -0.014629014, 0.0, -0.014341297, -0.030914815, -0.038747486, 0.0)); - result += mul(max(-e1, 0), float4x4(-0.048734166, 0.019775594, 0.03124684, 0.0, -0.2345022, -0.23639877, -0.22958128, 0.0, 0.12412277, 0.10245112, 0.10389806, 0.0, -0.0030797734, -0.01989389, -0.02020691, 0.0)); - result += mul(max(-f1, 0), float4x4(-0.0133485105, 0.029644802, 0.041630358, 0.0, 0.041081797, 0.059993293, 0.060033485, 0.0, -0.02155099, -0.035306025, -0.03838472, 0.0, 0.017466968, -0.01866363, -0.004764589, 0.0)); - result += mul(max(-g1, 0), float4x4(0.0030783121, -0.04064586, -0.04504904, 0.0, -0.023528632, -0.029308239, -0.022441925, 0.0, 0.020095564, 0.018979732, 0.015117934, 0.0, 0.008429918, 0.021180628, 0.020137152, 0.0)); - result += mul(max(-h1, 0), float4x4(0.0012200709, 0.013313984, 0.014122978, 0.0, 0.08750284, 0.038747437, 0.027102578, 0.0, -0.09627132, -0.09706183, -0.09405641, 0.0, -0.05180081, -0.03555434, -0.021694236, 0.0)); - result += mul(max(-i1, 0), float4x4(-0.022396728, -0.018316073, -0.01250564, 0.0, 0.045423746, 0.025315331, 0.010639915, 0.0, 0.05618814, 0.022210265, 0.014195103, 0.0, -0.014828652, -0.010245087, 0.0020570823, 0.0)); - result += mul(max(-a2, 0), float4x4(0.046651457, 0.001333767, -0.003572458, 0.0, -0.0077845114, -0.012861641, -0.015116351, 0.0, 0.01338984, 0.029198132, 0.026183384, 0.0, 0.0014878022, 0.020025207, 0.024829973, 0.0)); - result += mul(max(-b2, 0), float4x4(-0.09506711, -0.06541528, -0.051106647, 0.0, 0.02552611, 0.01181497, 0.0020236392, 0.0, 0.03234602, -0.03153924, -0.035502207, 0.0, -0.034516744, 0.00018784113, 0.0085376045, 0.0)); - result += mul(max(-c2, 0), float4x4(-0.05945615, -0.0046793907, 0.011128929, 0.0, -0.0061961384, -0.0040663416, -0.010319631, 0.0, 0.044197917, -0.033448357, -0.04109943, 0.0, -0.04109929, 0.006773195, 0.016976412, 0.0)); - result += mul(max(-d2, 0), float4x4(0.02855516, -0.033051047, -0.04864978, 0.0, -0.06393814, -0.082921155, -0.0730681, 0.0, -0.058905125, -0.038639963, -0.027698845, 0.0, -0.013616608, -0.007876684, -0.006182652, 0.0)); - result += mul(max(-e2, 0), float4x4(0.15423118, 0.14667909, 0.14534634, 0.0, 0.1485341, 0.096721016, 0.0820024, 0.0, 0.1263968, 0.088775866, 0.083860956, 0.0, 0.04213644, 0.020989005, 0.010447147, 0.0)); - result += mul(max(-f2, 0), float4x4(-0.068275765, -0.018390667, -0.011452603, 0.0, 0.03738383, 0.019398715, 0.005998161, 0.0, -0.0011161854, -0.039955888, -0.04444185, 0.0, 0.052985556, 0.017621813, 0.009551621, 0.0)); - result += mul(max(-g2, 0), float4x4(0.01387326, -0.0033411914, -0.009420935, 0.0, -0.034494568, -0.019219222, -0.009562797, 0.0, 0.0074023325, 0.022065453, 0.027121471, 0.0, 0.00019609048, -0.0042242454, 2.0403608e-05, 0.0)); - result += mul(max(-h2, 0), float4x4(-0.015793918, -0.024342488, -0.037188973, 0.0, 0.004534637, -0.025236975, -0.028567247, 0.0, -0.055682972, -0.054670315, -0.06584981, 0.0, 0.043045517, -0.0075941198, -0.014196169, 0.0)); - result += mul(max(-i2, 0), float4x4(0.0132598495, 0.01775289, 0.017206183, 0.0, 0.010604703, -0.007352816, -0.017301153, 0.0, 0.030967329, 0.027615465, 0.0145311365, 0.0, 0.008636854, -0.033379406, -0.042725433, 0.0)); - result += float4(-0.0056639817, -0.0017339308, -0.0011913306, 0.0); - - result += INPUT.SampleLevel(sam, pos, 0); - - WriteToOutput(gxy, result.rgb); + float3 result = mul(max(a1, 0), float4x3(-0.01858372, 0.017144108, 0.02794388, 0.0129101565, -0.0073674284, -0.011766938, 0.01970984, 0.01209068, 0.009530311, -0.009190449, -0.006996753, -0.0038750458)); + result += mul(max(b1, 0), float4x3(0.15856947, 0.10162126, 0.08489005, 0.038381726, -0.017771017, -0.03226132, -0.011787879, -0.0152445, -0.007564454, 0.055921376, 0.08389841, 0.08452836)); + result += mul(max(c1, 0), float4x3(0.026705442, -0.0070655374, -0.018199183, 0.016254421, -0.025398912, -0.03461042, 0.03950644, 0.06586101, 0.0707467, -0.03793455, -0.04957139, -0.04777402)); + result += mul(max(d1, 0), float4x3(-0.115341224, -0.04463122, -0.016549354, -0.059433736, -0.04303295, -0.042805545, 0.010830498, -0.011057443, -0.0141014, 0.067396216, 0.06553637, 0.06705378)); + result += mul(max(e1, 0), float4x3(-0.12767975, -0.19935511, -0.20109995, 0.11554901, 0.11426503, 0.11161185, -0.22092125, -0.22041021, -0.2142712, -0.06326996, -0.061314825, -0.059039716)); + result += mul(max(f1, 0), float4x3(0.007717391, -0.046238754, -0.056983955, 0.021419598, 0.0036924274, -0.00033630748, 0.053556852, 0.0824714, 0.08295022, -0.09881205, -0.043157153, -0.040801782)); + result += mul(max(g1, 0), float4x3(0.0052828738, 0.049702674, 0.056108, 0.009478552, 0.010345037, 0.0094180945, -0.010412882, 0.0006965096, 0.0021917222, -0.010701383, -0.023212843, -0.024252625)); + result += mul(max(h1, 0), float4x3(0.07542127, 0.0739301, 0.06642962, -0.08054489, -0.037553925, -0.026762033, 0.09727509, 0.102272816, 0.097533874, 0.01325714, -0.004582272, -0.006647532)); + result += mul(max(i1, 0), float4x3(0.03005975, 0.017012767, 0.007840201, -0.028650383, -0.0019064787, 0.01083078, -0.071352504, -0.019919744, -0.008299795, 0.023253804, 0.042413715, 0.04681489)); + result += mul(max(a2, 0), float4x3(-0.052201163, -0.021727808, -0.020888992, 0.008365179, -0.016546093, -0.0111018475, -0.06236095, -0.019278256, -0.021443967, 0.0029381379, -0.0033039588, -0.006425339)); + result += mul(max(b2, 0), float4x3(0.02397296, -0.041659098, -0.050882675, -0.013487, 0.0067506596, 0.005435185, 0.066447854, 0.13331215, 0.13754861, 0.028300207, -0.0048033795, -0.010058485)); + result += mul(max(c2, 0), float4x3(0.08140248, 0.018564016, 0.0036607496, -0.0112075955, 0.0022339798, 0.0045722146, -0.045716517, -0.0076076477, -0.0016939791, -0.030486025, -0.07539711, -0.07185734)); + result += mul(max(d2, 0), float4x3(-0.0155724995, 0.048904862, 0.059412133, -0.013894624, -0.0061430936, -0.011662488, -0.0052947477, -0.0176474, -0.018611705, 0.022075793, 0.031703226, 0.026735537)); + result += mul(max(e2, 0), float4x3(-0.18287502, -0.18703277, -0.18331653, -0.08616293, -0.011741755, -0.009296464, -0.054274965, 0.016794622, 0.022522328, 0.06965258, 0.08260611, 0.08285337)); + result += mul(max(f2, 0), float4x3(0.08107809, 0.0336241, 0.025449684, -0.031931, 0.01179566, 0.019694995, 0.025930194, 0.042288166, 0.04673656, -0.14357394, -0.11003491, -0.094090074)); + result += mul(max(g2, 0), float4x3(0.007188181, 0.050626095, 0.050705966, -0.008030409, -0.018670242, -0.019766346, 0.014874803, -0.03657919, -0.034044486, -0.011178416, -0.004358302, -0.013611815)); + result += mul(max(h2, 0), float4x3(0.07987872, 0.11399873, 0.12089382, -0.01514355, 0.0068139364, 0.010206274, -0.0005701044, -0.011158322, 0.006484812, 0.002018227, 0.043359682, 0.042987905)); + result += mul(max(i2, 0), float4x3(0.0017806455, -0.0015697709, -0.0018252691, 0.0058658062, 0.021681193, 0.028615465, -0.054827355, -0.04541651, -0.027485048, -0.017649114, 0.017717479, 0.027309911)); + result += mul(max(-a1, 0), float4x3(0.02555098, -0.0028983613, -0.005134733, -0.0029332284, 0.015552135, 0.022189403, -0.019786593, -0.0031676649, -0.0014604586, 0.06648065, 0.0672302, 0.04586375)); + result += mul(max(-b1, 0), float4x3(-0.06674696, 0.002328631, 0.014039355, -0.03636718, 0.014560653, 0.028076636, 0.042305287, 0.015249338, 0.0136925895, 0.033586804, 0.00701501, -0.011588751)); + result += mul(max(-c1, 0), float4x3(-0.039022632, 0.015240631, 0.02699061, -0.02614261, 0.0051843156, 0.012590042, 0.015304643, -0.022641543, -0.030434309, 0.016862666, 0.020819275, 0.022333218)); + result += mul(max(-d1, 0), float4x3(0.08056982, 0.026592938, 0.009744146, 0.08762212, 0.10150359, 0.09662005, -0.044551965, -0.016349116, -0.014629014, -0.014341297, -0.030914815, -0.038747486)); + result += mul(max(-e1, 0), float4x3(-0.048734166, 0.019775594, 0.03124684, -0.2345022, -0.23639877, -0.22958128, 0.12412277, 0.10245112, 0.10389806, -0.0030797734, -0.01989389, -0.02020691)); + result += mul(max(-f1, 0), float4x3(-0.0133485105, 0.029644802, 0.041630358, 0.041081797, 0.059993293, 0.060033485, -0.02155099, -0.035306025, -0.03838472, 0.017466968, -0.01866363, -0.004764589)); + result += mul(max(-g1, 0), float4x3(0.0030783121, -0.04064586, -0.04504904, -0.023528632, -0.029308239, -0.022441925, 0.020095564, 0.018979732, 0.015117934, 0.008429918, 0.021180628, 0.020137152)); + result += mul(max(-h1, 0), float4x3(0.0012200709, 0.013313984, 0.014122978, 0.08750284, 0.038747437, 0.027102578, -0.09627132, -0.09706183, -0.09405641, -0.05180081, -0.03555434, -0.021694236)); + result += mul(max(-i1, 0), float4x3(-0.022396728, -0.018316073, -0.01250564, 0.045423746, 0.025315331, 0.010639915, 0.05618814, 0.022210265, 0.014195103, -0.014828652, -0.010245087, 0.0020570823)); + result += mul(max(-a2, 0), float4x3(0.046651457, 0.001333767, -0.003572458, -0.0077845114, -0.012861641, -0.015116351, 0.01338984, 0.029198132, 0.026183384, 0.0014878022, 0.020025207, 0.024829973)); + result += mul(max(-b2, 0), float4x3(-0.09506711, -0.06541528, -0.051106647, 0.02552611, 0.01181497, 0.0020236392, 0.03234602, -0.03153924, -0.035502207, -0.034516744, 0.00018784113, 0.0085376045)); + result += mul(max(-c2, 0), float4x3(-0.05945615, -0.0046793907, 0.011128929, -0.0061961384, -0.0040663416, -0.010319631, 0.044197917, -0.033448357, -0.04109943, -0.04109929, 0.006773195, 0.016976412)); + result += mul(max(-d2, 0), float4x3(0.02855516, -0.033051047, -0.04864978, -0.06393814, -0.082921155, -0.0730681, -0.058905125, -0.038639963, -0.027698845, -0.013616608, -0.007876684, -0.006182652)); + result += mul(max(-e2, 0), float4x3(0.15423118, 0.14667909, 0.14534634, 0.1485341, 0.096721016, 0.0820024, 0.1263968, 0.088775866, 0.083860956, 0.04213644, 0.020989005, 0.010447147)); + result += mul(max(-f2, 0), float4x3(-0.068275765, -0.018390667, -0.011452603, 0.03738383, 0.019398715, 0.005998161, -0.0011161854, -0.039955888, -0.04444185, 0.052985556, 0.017621813, 0.009551621)); + result += mul(max(-g2, 0), float4x3(0.01387326, -0.0033411914, -0.009420935, -0.034494568, -0.019219222, -0.009562797, 0.0074023325, 0.022065453, 0.027121471, 0.00019609048, -0.0042242454, 2.0403608e-05)); + result += mul(max(-h2, 0), float4x3(-0.015793918, -0.024342488, -0.037188973, 0.004534637, -0.025236975, -0.028567247, -0.055682972, -0.054670315, -0.06584981, 0.043045517, -0.0075941198, -0.014196169)); + result += mul(max(-i2, 0), float4x3(0.0132598495, 0.01775289, 0.017206183, 0.010604703, -0.007352816, -0.017301153, 0.030967329, 0.027615465, 0.0145311365, 0.008636854, -0.033379406, -0.042725433)); + result += float3(-0.0056639817, -0.0017339308, -0.0011913306); + + result += INPUT.SampleLevel(sam, pos, 0).rgb; + + OUTPUT[gxy] = float4(result, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Restore_Soft_M.hlsl b/src/Effects/Anime4K/Anime4K_Restore_Soft_M.hlsl index 051da91df..f525d68e4 100644 --- a/src/Effects/Anime4K/Anime4K_Restore_Soft_M.hlsl +++ b/src/Effects/Anime4K/Anime4K_Restore_Soft_M.hlsl @@ -2,18 +2,17 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Restore/Anime4K_Restore_CNN_Soft_M.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!SORT_NAME Anime4K_Restore_Soft_1 //!TEXTURE Texture2D INPUT; -//!SAMPLER -//!FILTER POINT -SamplerState sam; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; //!TEXTURE //!WIDTH INPUT_WIDTH @@ -51,6 +50,10 @@ Texture2D tex5; //!FORMAT R16G16B16A16_FLOAT Texture2D tex6; +//!SAMPLER +//!FILTER POINT +SamplerState sam; + //!PASS 1 //!DESC Conv-4x3x3x3 @@ -495,15 +498,18 @@ void Pass6(uint2 blockStart, uint3 threadId) { //!PASS 7 //!DESC Conv-4x3x3x8, Conv-3x1x1x56 //!IN INPUT, tex1, tex2, tex3, tex4, tex5, tex6 +//!OUT OUTPUT //!BLOCK_SIZE 8 //!NUM_THREADS 64 void Pass7(uint2 blockStart, uint3 threadId) { uint2 gxy = Rmp8x8(threadId.x) + blockStart; - uint2 inputSize = GetInputSize(); - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } + float2 inputPt = GetInputPt(); float2 pos = (gxy + 0.5f) * inputPt; @@ -564,5 +570,5 @@ void Pass7(uint2 blockStart, uint3 threadId) { result += mul(max(-src7, 0), float4x3(0.09681486, 0.113604136, 0.10416855, -0.08199983, -0.09013433, -0.08562243, 0.041304465, 0.048315883, 0.042945288, -0.09863276, -0.117853515, -0.09870226)); result += float3(-0.0039074384, -0.0085585555, -0.0132283475); - WriteToOutput(gxy, result + origin); + OUTPUT[gxy] = float4(result + origin, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Restore_Soft_S.hlsl b/src/Effects/Anime4K/Anime4K_Restore_Soft_S.hlsl index d15dd696a..c16325488 100644 --- a/src/Effects/Anime4K/Anime4K_Restore_Soft_S.hlsl +++ b/src/Effects/Anime4K/Anime4K_Restore_Soft_S.hlsl @@ -2,15 +2,18 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Restore/Anime4K_Restore_CNN_Soft_S.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!SORT_NAME Anime4K_Restore_Soft_0 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -246,13 +249,15 @@ void Pass3(uint2 blockStart, uint3 threadId) { //!PASS 4 //!DESC Conv-3x3x3x8 //!IN INPUT, tex1 +//!OUT OUTPUT //!BLOCK_SIZE 8 //!NUM_THREADS 64 void Pass4(uint2 blockStart, uint3 threadId) { uint2 gxy = Rmp8x8(threadId.x) + blockStart; - uint2 inputSize = GetInputSize(); - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -294,5 +299,5 @@ void Pass4(uint2 blockStart, uint3 threadId) { result += INPUT.SampleLevel(sam, pos, 0).rgb; - WriteToOutput(gxy, result); + OUTPUT[gxy] = float4(result, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Restore_Soft_UL.hlsl b/src/Effects/Anime4K/Anime4K_Restore_Soft_UL.hlsl index 0e9cb52b2..e7c7fda0d 100644 --- a/src/Effects/Anime4K/Anime4K_Restore_Soft_UL.hlsl +++ b/src/Effects/Anime4K/Anime4K_Restore_Soft_UL.hlsl @@ -2,18 +2,17 @@ // Ported from https://github.com/bloc97/Anime4K/blob/4ba94b179a144200cb6b3052e690fe2ca5c6914c/glsl/Restore/Anime4K_Restore_CNN_Soft_UL.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!SORT_NAME Anime4K_Restore_Soft_4 //!TEXTURE Texture2D INPUT; -//!SAMPLER -//!FILTER POINT -SamplerState sam; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; //!TEXTURE //!WIDTH INPUT_WIDTH @@ -63,6 +62,11 @@ Texture2D tex7; //!FORMAT R16G16B16A16_FLOAT Texture2D tex8; +//!SAMPLER +//!FILTER POINT +SamplerState sam; + + //!PASS 1 //!DESC Conv-4x3x3x3 //!IN INPUT @@ -1879,13 +1883,15 @@ void Pass7(uint2 blockStart, uint3 threadId) { //!PASS 8 //!DESC Conv-4x3x3x24, Conv-3x1x1x120 //!IN INPUT, tex1, tex2, tex3, tex7 +//!OUT OUTPUT //!BLOCK_SIZE 8 //!NUM_THREADS 64 void Pass8(uint2 blockStart, uint3 threadId) { uint2 gxy = Rmp8x8(threadId.x) + blockStart; - uint2 inputSize = GetInputSize(); - if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -2169,5 +2175,5 @@ void Pass8(uint2 blockStart, uint3 threadId) { result += float3(-0.0036656514, 0.006677459, 0.007698717); result += INPUT.SampleLevel(sam, pos, 0).rgb; - WriteToOutput(gxy, result.rgb); + OUTPUT[gxy] = float4(result, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Restore_Soft_VL.hlsl b/src/Effects/Anime4K/Anime4K_Restore_Soft_VL.hlsl index c74bf7a64..9a3133d6b 100644 --- a/src/Effects/Anime4K/Anime4K_Restore_Soft_VL.hlsl +++ b/src/Effects/Anime4K/Anime4K_Restore_Soft_VL.hlsl @@ -2,18 +2,17 @@ // Ported from https://github.com/bloc97/Anime4K/blob/4ba94b179a144200cb6b3052e690fe2ca5c6914c/glsl/Restore/Anime4K_Restore_CNN_Soft_VL.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!SORT_NAME Anime4K_Restore_Soft_3 //!TEXTURE Texture2D INPUT; -//!SAMPLER -//!FILTER POINT -SamplerState sam; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; //!TEXTURE //!WIDTH INPUT_WIDTH @@ -51,6 +50,10 @@ Texture2D tex5; //!FORMAT R16G16B16A16_FLOAT Texture2D tex6; +//!SAMPLER +//!FILTER POINT +SamplerState sam; + //!PASS 1 //!DESC Conv-4x3x3x3 //!IN INPUT @@ -1125,13 +1128,15 @@ void Pass7(uint2 blockStart, uint3 threadId) { //!PASS 8 //!DESC Conv-4x3x3x16, Conv-3x1x1x112 //!IN INPUT, tex1, tex2, tex5 +//!OUT OUTPUT //!BLOCK_SIZE 8 //!NUM_THREADS 64 void Pass8(uint2 blockStart, uint3 threadId) { uint2 gxy = Rmp8x8(threadId.x) + blockStart; - uint2 inputSize = GetInputSize(); - if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -1289,5 +1294,5 @@ void Pass8(uint2 blockStart, uint3 threadId) { result += float3(0.018580848, -0.022256816, -0.0266178); result += INPUT.SampleLevel(sam, pos, 0).rgb; - WriteToOutput(gxy, result); + OUTPUT[gxy] = float4(result, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Restore_UL.hlsl b/src/Effects/Anime4K/Anime4K_Restore_UL.hlsl index 56649a0e5..e2b0e844e 100644 --- a/src/Effects/Anime4K/Anime4K_Restore_UL.hlsl +++ b/src/Effects/Anime4K/Anime4K_Restore_UL.hlsl @@ -2,18 +2,17 @@ // Ported from https://github.com/bloc97/Anime4K/blob/4ba94b179a144200cb6b3052e690fe2ca5c6914c/glsl/Restore/Anime4K_Restore_CNN_UL.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!SORT_NAME Anime4K_Restore_4 //!TEXTURE Texture2D INPUT; -//!SAMPLER -//!FILTER POINT -SamplerState sam; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; //!TEXTURE //!WIDTH INPUT_WIDTH @@ -63,6 +62,11 @@ Texture2D tex7; //!FORMAT R16G16B16A16_FLOAT Texture2D tex8; +//!SAMPLER +//!FILTER POINT +SamplerState sam; + + //!PASS 1 //!DESC Conv-4x3x3x3 //!IN INPUT @@ -1879,13 +1883,15 @@ void Pass7(uint2 blockStart, uint3 threadId) { //!PASS 8 //!DESC Conv-4x3x3x24, Conv-3x1x1x120 //!IN INPUT, tex1, tex2, tex3, tex7 +//!OUT OUTPUT //!BLOCK_SIZE 8 //!NUM_THREADS 64 void Pass8(uint2 blockStart, uint3 threadId) { uint2 gxy = Rmp8x8(threadId.x) + blockStart; - uint2 inputSize = GetInputSize(); - if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -2169,5 +2175,5 @@ void Pass8(uint2 blockStart, uint3 threadId) { result += float3(-0.0071146404, 0.005606682, 0.010180816); result += INPUT.SampleLevel(sam, pos, 0).rgb; - WriteToOutput(gxy, result.rgb); + OUTPUT[gxy] = float4(result, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Restore_VL.hlsl b/src/Effects/Anime4K/Anime4K_Restore_VL.hlsl index 13c76d1c5..437754a36 100644 --- a/src/Effects/Anime4K/Anime4K_Restore_VL.hlsl +++ b/src/Effects/Anime4K/Anime4K_Restore_VL.hlsl @@ -2,18 +2,17 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Restore/Anime4K_Restore_CNN_VL.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!SORT_NAME Anime4K_Restore_3 //!TEXTURE Texture2D INPUT; -//!SAMPLER -//!FILTER POINT -SamplerState sam; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; //!TEXTURE //!WIDTH INPUT_WIDTH @@ -51,6 +50,10 @@ Texture2D tex5; //!FORMAT R16G16B16A16_FLOAT Texture2D tex6; +//!SAMPLER +//!FILTER POINT +SamplerState sam; + //!PASS 1 //!DESC Conv-4x3x3x3 @@ -1132,13 +1135,15 @@ void Pass7(uint2 blockStart, uint3 threadId) { //!PASS 8 //!DESC Conv-4x3x3x16, Conv-3x1x1x112 //!IN INPUT, tex1, tex2, tex5 +//!OUT OUTPUT //!BLOCK_SIZE 8 //!NUM_THREADS 64 void Pass8(uint2 blockStart, uint3 threadId) { uint2 gxy = Rmp8x8(threadId.x) + blockStart; - uint2 inputSize = GetInputSize(); - if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -1296,5 +1301,5 @@ void Pass8(uint2 blockStart, uint3 threadId) { result += float3(0.047567394, -0.02504617, -0.028163986); result += INPUT.SampleLevel(sam, pos, 0).rgb; - WriteToOutput(gxy, result); + OUTPUT[gxy] = float4(result, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Thin_HQ.hlsl b/src/Effects/Anime4K/Anime4K_Thin_HQ.hlsl index 270224f8d..bf03e9ed5 100644 --- a/src/Effects/Anime4K/Anime4K_Thin_HQ.hlsl +++ b/src/Effects/Anime4K/Anime4K_Thin_HQ.hlsl @@ -2,9 +2,7 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Experimental-Effects/Anime4K_Thin_HQ.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!PARAMETER @@ -30,6 +28,11 @@ int iterations; //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!TEXTURE //!WIDTH INPUT_WIDTH //!HEIGHT INPUT_HEIGHT @@ -280,13 +283,15 @@ void Pass4(uint2 blockStart, uint3 threadId) { //!PASS 5 //!DESC Warp //!IN tex1, INPUT +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 void Pass5(uint2 blockStart, uint3 threadId) { const uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - const uint2 inputSize = GetInputSize(); - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -299,12 +304,6 @@ void Pass5(uint2 blockStart, uint3 threadId) { for (uint j = 0; j <= 1; ++j) { const uint2 destPos = gxy + uint2(i, j); - if (i != 0 || j != 0) { - if (!CheckViewport(destPos)) { - continue; - } - } - float2 pos = (destPos + 0.5f) * inputPt; for (int i = 0; i < iterations; ++i) { @@ -313,7 +312,7 @@ void Pass5(uint2 blockStart, uint3 threadId) { pos -= dd; } - WriteToOutput(destPos, INPUT.SampleLevel(sam1, pos, 0).rgb); + OUTPUT[destPos] = INPUT.SampleLevel(sam1, pos, 0); } } } diff --git a/src/Effects/Anime4K/Anime4K_Upscale_Denoise_L.hlsl b/src/Effects/Anime4K/Anime4K_Upscale_Denoise_L.hlsl index c6128c8f4..3f81a0da3 100644 --- a/src/Effects/Anime4K/Anime4K_Upscale_Denoise_L.hlsl +++ b/src/Effects/Anime4K/Anime4K_Upscale_Denoise_L.hlsl @@ -2,22 +2,17 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Upscale%2BDenoise/Anime4K_Upscale_Denoise_CNN_x2_L.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 +//!VERSION 4 //!SORT_NAME Anime4K_Upscale_Denoise_1 //!TEXTURE Texture2D INPUT; -//!SAMPLER -//!FILTER POINT -SamplerState sam; - -//!SAMPLER -//!FILTER LINEAR -SamplerState sam1; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; //!TEXTURE //!WIDTH INPUT_WIDTH @@ -43,6 +38,14 @@ Texture2D tex3; //!FORMAT R16G16B16A16_FLOAT Texture2D tex4; +//!SAMPLER +//!FILTER POINT +SamplerState sam; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam1; + //!PASS 1 //!DESC Conv-4x3x3x3 @@ -446,12 +449,15 @@ void Pass3(uint2 blockStart, uint3 threadId) { //!PASS 4 //!DESC Conv-4x3x3x16, Depth-to-Space //!IN INPUT, tex1, tex2 +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 void Pass4(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -638,23 +644,17 @@ void Pass4(uint2 blockStart, uint3 threadId) { float2 outputPt = GetOutputPt(); pos -= 0.5f * outputPt; - WriteToOutput(gxy, float3(target1.x, target2.x, target3.x) + INPUT.SampleLevel(sam1, pos, 0).rgb); + OUTPUT[gxy] = float4(float3(target1.x, target2.x, target3.x) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); - gxy.x += 1u; + ++gxy.x; pos.x += outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.y, target2.y, target3.y) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } - - gxy.y += 1u; + OUTPUT[gxy] = float4(float3(target1.y, target2.y, target3.y) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); + + ++gxy.y; pos.y += outputPt.y; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.w, target2.w, target3.w) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(float3(target1.w, target2.w, target3.w) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); - gxy.x -= 1u; + --gxy.x; pos.x -= outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.z, target2.z, target3.z) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(float3(target1.z, target2.z, target3.z) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Upscale_Denoise_S.hlsl b/src/Effects/Anime4K/Anime4K_Upscale_Denoise_S.hlsl index 93b420991..82cd19430 100644 --- a/src/Effects/Anime4K/Anime4K_Upscale_Denoise_S.hlsl +++ b/src/Effects/Anime4K/Anime4K_Upscale_Denoise_S.hlsl @@ -2,15 +2,18 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Upscale%2BDenoise/Anime4K_Upscale_Denoise_CNN_x2_S.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 +//!VERSION 4 //!SORT_NAME Anime4K_Upscale_Denoise_0 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + //!TEXTURE //!WIDTH INPUT_WIDTH //!HEIGHT INPUT_HEIGHT @@ -238,6 +241,7 @@ void Pass3(uint2 blockStart, uint3 threadId) { //!PASS 4 //!DESC Conv-4x3x3x8, Depth-to-Space //!IN INPUT, tex1 +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -282,7 +286,8 @@ float4 A4KS4(float2 pos) { void Pass4(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -291,25 +296,19 @@ void Pass4(uint2 blockStart, uint3 threadId) { float2 pos = ((gxy >> 1) + 0.5f) * inputPt; float4 c = A4KS4(pos); - + pos -= 0.5f * outputPt; - WriteToOutput(gxy, c.x + INPUT.SampleLevel(sam1, pos, 0).rgb); + OUTPUT[gxy] = float4(c.x + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); - gxy.x += 1u; + ++gxy.x; pos.x += outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, c.y + INPUT.SampleLevel(sam1, pos, 0).rgb); - } - - gxy.y += 1u; + OUTPUT[gxy] = float4(c.y + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); + + ++gxy.y; pos.y += outputPt.y; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, c.w + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(c.w + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); - gxy.x -= 1u; + --gxy.x; pos.x -= outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, c.z + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(c.z + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Upscale_Denoise_UL.hlsl b/src/Effects/Anime4K/Anime4K_Upscale_Denoise_UL.hlsl index b475b20c4..3298de5b5 100644 --- a/src/Effects/Anime4K/Anime4K_Upscale_Denoise_UL.hlsl +++ b/src/Effects/Anime4K/Anime4K_Upscale_Denoise_UL.hlsl @@ -2,22 +2,17 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/78e4f78f65b772e94bae6e7db5c49af1e889f784/glsl/Upscale%2BDenoise/Anime4K_Upscale_Denoise_CNN_x2_UL.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 +//!VERSION 4 //!SORT_NAME Anime4K_Upscale_Denoise_3 //!TEXTURE Texture2D INPUT; -//!SAMPLER -//!FILTER POINT -SamplerState sam; - -//!SAMPLER -//!FILTER LINEAR -SamplerState sam1; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; //!TEXTURE //!WIDTH INPUT_WIDTH @@ -145,6 +140,14 @@ Texture2D conv2d_6_tf1; //!FORMAT R16G16B16A16_FLOAT Texture2D conv2d_6_tf2; +//!SAMPLER +//!FILTER POINT +SamplerState sam; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam1; + //!PASS 1 //!DESC Conv-4x3x3x3 //!IN INPUT @@ -1929,12 +1932,15 @@ void Pass7(uint2 blockStart, uint3 threadId) { //!PASS 8 //!DESC Conv-4x1x1x120, Depth-to-Space //!IN INPUT, conv2d_2_tf, conv2d_2_tf1, conv2d_2_tf2, conv2d_3_tf, conv2d_3_tf1, conv2d_3_tf2, conv2d_4_tf, conv2d_4_tf1, conv2d_4_tf2, conv2d_5_tf, conv2d_5_tf1, conv2d_5_tf2, conv2d_6_tf, conv2d_6_tf1, conv2d_6_tf2 +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 void Pass8(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -2086,25 +2092,19 @@ void Pass8(uint2 blockStart, uint3 threadId) { target3 += float4(0.00428531, -0.011541925, 0.00898425, -0.01374321); float2 outputPt = GetOutputPt(); - + pos -= 0.5f * outputPt; - WriteToOutput(gxy, float3(target1.x, target2.x, target3.x) + INPUT.SampleLevel(sam1, pos, 0).rgb); + OUTPUT[gxy] = float4(float3(target1.x, target2.x, target3.x) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); - gxy.x += 1u; + ++gxy.x; pos.x += outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.y, target2.y, target3.y) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } - - gxy.y += 1u; + OUTPUT[gxy] = float4(float3(target1.y, target2.y, target3.y) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); + + ++gxy.y; pos.y += outputPt.y; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.w, target2.w, target3.w) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(float3(target1.w, target2.w, target3.w) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); - gxy.x -= 1u; + --gxy.x; pos.x -= outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.z, target2.z, target3.z) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(float3(target1.z, target2.z, target3.z) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Upscale_Denoise_VL.hlsl b/src/Effects/Anime4K/Anime4K_Upscale_Denoise_VL.hlsl index 8b6852ecf..202939f90 100644 --- a/src/Effects/Anime4K/Anime4K_Upscale_Denoise_VL.hlsl +++ b/src/Effects/Anime4K/Anime4K_Upscale_Denoise_VL.hlsl @@ -2,22 +2,17 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/78e4f78f65b772e94bae6e7db5c49af1e889f784/glsl/Upscale%2BDenoise/Anime4K_Upscale_Denoise_CNN_x2_VL.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 +//!VERSION 4 //!SORT_NAME Anime4K_Upscale_Denoise_2 //!TEXTURE Texture2D INPUT; -//!SAMPLER -//!FILTER POINT -SamplerState sam; - -//!SAMPLER -//!FILTER LINEAR -SamplerState sam1; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; //!TEXTURE //!WIDTH INPUT_WIDTH @@ -103,6 +98,15 @@ Texture2D conv2d_6_tf; //!FORMAT R16G16B16A16_FLOAT Texture2D conv2d_6_tf1; +//!SAMPLER +//!FILTER POINT +SamplerState sam; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam1; + + //!PASS 1 //!DESC Conv-4x3x3x3 //!IN INPUT @@ -1143,12 +1147,15 @@ void Pass7(uint2 blockStart, uint3 threadId) { //!PASS 8 //!DESC Conv-4x1x1x112, Depth-to-Space //!IN INPUT, conv2d_tf, conv2d_tf1, conv2d_1_tf, conv2d_1_tf1, conv2d_2_tf, conv2d_2_tf1, conv2d_3_tf, conv2d_3_tf1, conv2d_4_tf, conv2d_4_tf1, conv2d_5_tf, conv2d_5_tf1, conv2d_6_tf, conv2d_6_tf1 +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 void Pass8(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -1293,23 +1300,17 @@ void Pass8(uint2 blockStart, uint3 threadId) { float2 outputPt = GetOutputPt(); pos -= 0.5f * outputPt; - WriteToOutput(gxy, float3(target1.x, target2.x, target3.x) + INPUT.SampleLevel(sam1, pos, 0).rgb); + OUTPUT[gxy] = float4(float3(target1.x, target2.x, target3.x) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); - gxy.x += 1u; + ++gxy.x; pos.x += outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.y, target2.y, target3.y) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } - - gxy.y += 1u; + OUTPUT[gxy] = float4(float3(target1.y, target2.y, target3.y) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); + + ++gxy.y; pos.y += outputPt.y; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.w, target2.w, target3.w) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(float3(target1.w, target2.w, target3.w) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); - gxy.x -= 1u; + --gxy.x; pos.x -= outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.z, target2.z, target3.z) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(float3(target1.z, target2.z, target3.z) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Upscale_GAN_x2_M.hlsl b/src/Effects/Anime4K/Anime4K_Upscale_GAN_x2_M.hlsl new file mode 100644 index 000000000..6ab5ce8bf --- /dev/null +++ b/src/Effects/Anime4K/Anime4K_Upscale_GAN_x2_M.hlsl @@ -0,0 +1,1324 @@ +// Anime4K_Upscale_GAN_x2_M +// 移植自 https://github.com/bloc97/Anime4K/blob/8e39551ce96ed172605c89b7dd8be855b5502cc9/glsl/Upscale/Anime4K_Upscale_GAN_x2_M.glsl + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME Anime4K_Upscale_GAN_x2_2 + +// 圆括号内的输入只被采样一次 +// INPUT -> tf, tf1 +// tf, tf1 -> 1_tf, 3_tf, 3_tf1 +// 3_tf, 3_tf1, (1_tf) -> 4_tf, 6_tf, 6_tf1 +// 6_tf, 6_tf1, (1_tf), (4_tf) -> 7_tf, 9_tf, 9_tf1 +// 9_tf, 9_tf1, (1_tf), (4_tf), 7_tf -> 11_tf, 10_tf, 12_tf, 12_tf1 +// 12_tf, 12_tf1, 11_tf, (1_tf), (4_tf), (7_tf), (10_tf) -> 0ups, 0ups1, 0ups2 +// (INPUT), 0ups, 0ups1, 0ups2 -> OUTPUT + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex2; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex3; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex4; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex5; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex6; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex7; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex8; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex9; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex10; + + +//!PASS 1 +//!DESC Conv-4x3x3x3 +//!IN INPUT +//!OUT tex1, tex2 +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 + +void Pass1(uint2 blockStart, uint3 threadId) { + uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + float2 inputPt = GetInputPt(); + + uint i, j; + + min16float3 src[4][4]; + [unroll] + for (i = 0; i <= 2; i += 2) { + [unroll] + for (j = 0; j <= 2; j += 2) { + float2 tpos = (gxy + uint2(i, j)) * inputPt; + const min16float4 sr = INPUT.GatherRed(sam, tpos); + const min16float4 sg = INPUT.GatherGreen(sam, tpos); + const min16float4 sb = INPUT.GatherBlue(sam, tpos); + + // w z + // x y + src[i][j] = min16float3(sr.w, sg.w, sb.w); + src[i][j + 1] = min16float3(sr.x, sg.x, sb.x); + src[i + 1][j] = min16float3(sr.z, sg.z, sb.z); + src[i + 1][j + 1] = min16float3(sr.y, sg.y, sb.y); + } + } + + [unroll] + for (i = 1; i <= 2; ++i) { + [unroll] + for (j = 1; j <= 2; ++j) { + uint2 destPos = gxy + uint2(i - 1, j - 1); + + if (i != 1 || j != 1) { + if (destPos.x >= inputSize.x || destPos.y >= inputSize.y) { + continue; + } + } + + min16float4 result = mul(src[i - 1][j - 1], min16float3x4(-0.17498326, -0.14677401, -0.43065637, 0.10841958, 0.24096319, -0.008683959, -0.29844064, 0.3567803, 0.43360776, 0.11304715, -0.0802437, 0.190904)); + result += mul(src[i - 1][j], min16float3x4(0.24688073, 0.086462855, 0.05716678, -0.1739644, 0.3236298, 0.23382919, 0.20481811, -0.022618154, -0.336325, -0.21624258, -0.18736486, -0.14936537)); + result += mul(src[i - 1][j + 1], min16float3x4(0.38230455, 0.410552, 0.34809712, 0.2510045, 0.30689523, 0.09889703, -0.26991332, 0.1108426, 0.5083409, 0.2854462, -0.1912902, 0.40354714)); + result += mul(src[i][j - 1], min16float3x4(0.46870667, -0.03530456, 0.13705169, -0.11884997, -0.0772201, 0.17073877, 0.03287621, -0.14975251, -0.18155691, 0.14545092, -0.1584816, 0.051269397)); + result += mul(src[i][j], min16float3x4(-0.5830986, -0.009166566, 0.54358304, -0.4545001, -0.27541155, 0.6697277, -0.29205534, -0.61038095, -0.64781004, 0.32052672, 0.14704794, -0.6479083)); + result += mul(src[i][j + 1], min16float3x4(-0.04402336, 0.05461938, -0.18035333, 0.5464947, 0.21475682, -0.6899343, 0.49390903, 0.62440956, 0.75365967, -0.26500008, 0.59187347, 0.10037025)); + result += mul(src[i + 1][j - 1], min16float3x4(-0.25319895, -0.1764162, -0.22574338, 0.03075524, -0.29618785, -0.491323, 0.008427114, -0.363144, -0.17214127, -0.11891048, -0.19321653, -0.13424487)); + result += mul(src[i + 1][j], min16float3x4(0.17425235, 0.07049646, -0.1759216, 0.05697634, -0.39496303, 0.35450256, -0.09984144, 0.15470548, -0.03375828, 0.06442114, 0.14598753, 0.46114844)); + result += mul(src[i + 1][j + 1], min16float3x4(-0.19262458, -0.17141157, -0.11393742, -0.07778959, -0.006366565, -0.16713034, 0.2135569, 0.23494779, -0.37996295, -0.2767951, -0.1515432, -0.110363424)); + result += min16float4(0.010385515, 0.011541315, -0.002942497, -0.00020902864); + tex1[destPos] = result; + + result = mul(src[i - 1][j - 1], min16float3x4(0.8031736, -0.1500194, -0.23398483, -0.060760673, 0.5049785, -0.099199474, -0.035531044, 0.0310586, -0.0310334, 0.15932913, 0.08973915, 0.08766925)); + result += mul(src[i - 1][j], min16float3x4(-0.2187303, 0.20974335, 0.016500302, 0.15386087, 0.2381243, -0.176845, -0.003643712, 0.08195259, 0.18417378, -0.18228108, 0.19170114, -0.3758241)); + result += mul(src[i - 1][j + 1], min16float3x4(0.4429508, -0.025832538, -0.021855514, 0.11322045, -0.08459551, -0.17815724, -0.19924322, -0.03736318, -0.22390507, -0.50430673, -0.13770194, 0.03014482)); + result += mul(src[i][j - 1], min16float3x4(-0.15976174, 0.31052437, 0.2498092, -0.29137832, -0.10121105, 0.35164458, 0.4901633, -0.35297948, -0.2569739, -0.14258477, 0.12585007, -0.2552164)); + result += mul(src[i][j], min16float3x4(-0.5260107, -0.8547037, 0.92173797, 0.37817466, -0.4162576, 0.10989847, 0.26875922, 0.8614761, 0.069195434, 0.045593478, 0.03790176, 0.7332446)); + result += mul(src[i][j + 1], min16float3x4(0.14287843, -0.283008, -0.28487602, -0.13313514, -0.019538656, -0.02361782, 0.28037757, -0.10543745, 0.1586713, 0.12037641, 0.24249536, 0.2524637)); + result += mul(src[i + 1][j - 1], min16float3x4(-0.037178896, 0.23858358, -0.18704462, -0.13747689, 0.07629898, 0.2710832, -0.71619016, -0.09074896, 0.30446374, -0.0052702115, -0.27990812, -0.1392364)); + result += mul(src[i + 1][j], min16float3x4(-0.086045384, 0.695562, -0.23519892, -0.23438415, 0.16208446, 0.2172693, -0.16647956, -0.3718635, 0.024940055, 0.5650778, 0.20409326, -0.13530363)); + result += mul(src[i + 1][j + 1], min16float3x4(-0.19389555, -0.028506106, -0.35060602, 0.22244014, 0.055054635, -0.17651209, -0.19871834, -0.02667603, -0.1402023, -0.02455308, -0.57856905, -0.2174221)); + result += min16float4(0.02648044, -0.0017647704, -0.016136197, 0.0011179475); + tex2[destPos] = result; + } + } +} + + +//!PASS 2 +//!DESC Conv-4x3x3x16, Conv-4x1x1x32 +//!IN tex1, tex2 +//!OUT tex3, tex4, tex5 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass2(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + min16float4 a1 = tex1.SampleLevel(sam, pos - inputPt, 0); + min16float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e1 = tex1.SampleLevel(sam, pos, 0); + min16float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i1 = tex1.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na1 = max(-a1, 0); + min16float4 nb1 = max(-b1, 0); + min16float4 nc1 = max(-c1, 0); + min16float4 nd1 = max(-d1, 0); + min16float4 ne1 = max(-e1, 0); + min16float4 nf1 = max(-f1, 0); + min16float4 ng1 = max(-g1, 0); + min16float4 nh1 = max(-h1, 0); + min16float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + min16float4 a2 = tex2.SampleLevel(sam, pos - inputPt, 0); + min16float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e2 = tex2.SampleLevel(sam, pos, 0); + min16float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i2 = tex2.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na2 = max(-a2, 0); + min16float4 nb2 = max(-b2, 0); + min16float4 nc2 = max(-c2, 0); + min16float4 nd2 = max(-d2, 0); + min16float4 ne2 = max(-e2, 0); + min16float4 nf2 = max(-f2, 0); + min16float4 ng2 = max(-g2, 0); + min16float4 nh2 = max(-h2, 0); + min16float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + min16float4 conv2d_2_tf = mul(a1, min16float4x4(-0.14656883, -0.044076134, -0.40314636, -0.08023388, 0.12564746, -0.21633625, -0.0210282, -0.19231434, -0.019945038, 0.020343186, -0.007134301, 0.013607319, 0.07334655, -0.050848506, 0.0011201366, 0.26975143)); + conv2d_2_tf += mul(b1, min16float4x4(-0.043205153, -0.13764456, -0.5368405, -0.04096279, 0.009450832, 0.23953767, -0.022408254, -0.124040656, 0.53450584, 0.02690831, 0.39857075, 0.42423433, 0.014167992, 0.055189077, -0.038074926, 0.12800713)); + conv2d_2_tf += mul(c1, min16float4x4(-0.05354771, -0.06626498, 0.0092389295, 0.100637995, 0.05051714, -0.0033487207, -0.0076860636, 0.013058279, 0.10727092, -0.31131467, 0.058990292, 0.46365786, 0.08736531, 0.038865663, -0.008022449, -0.067517675)); + conv2d_2_tf += mul(d1, min16float4x4(-0.25327486, -0.0041089035, 0.04877498, -0.36375836, 0.0003920389, -0.09273049, 0.016388323, 0.11530572, -0.14216854, 0.07370458, -0.27584067, -0.34536567, 0.0848517, -0.1954229, -0.22656868, -0.13531597)); + conv2d_2_tf += mul(e1, min16float4x4(0.7035245, 0.1131446, 0.1833189, 0.63726306, -0.20649737, 0.14149575, -0.084267326, 0.020898562, -0.026810758, -0.17932594, -0.08032681, 0.07790513, -0.01148237, -0.19930641, 0.33902612, -0.013703277)); + conv2d_2_tf += mul(f1, min16float4x4(-0.2862842, 0.01491211, -0.30474076, 0.19604082, 0.21565811, 0.29193363, 0.024934597, -0.17113955, 0.26136434, -0.12819171, 0.3874644, -0.30533502, 0.004006889, -0.07340657, -0.04817435, -0.013651047)); + conv2d_2_tf += mul(g1, min16float4x4(-0.14331155, -0.09605764, -0.06941299, -0.09740676, 0.0059936745, -0.27215815, -0.31393203, 0.17594862, 0.045626156, 0.060231503, 0.10607796, -0.030635068, 0.15021041, -0.19662435, -0.14648037, 0.028361326)); + conv2d_2_tf += mul(h1, min16float4x4(0.25090003, -0.2845429, -0.30109838, -0.070956856, -0.08051349, -0.07526823, 0.13524723, 0.14151429, -0.1232367, 0.08824123, 0.28804728, 0.31701297, 0.014839836, -0.09193038, 0.30188346, -0.07903937)); + conv2d_2_tf += mul(i1, min16float4x4(0.21150468, 0.13863595, -0.2624825, 0.1652623, -0.026336774, -0.45599234, 0.015901498, 0.23009071, 0.19700526, -0.11013044, 0.19850798, -0.19702299, 0.060643747, -0.03162518, -0.18272553, 0.24863112)); + conv2d_2_tf += mul(a2, min16float4x4(0.16361383, 0.0028921412, 0.18107067, 0.0720563, 0.06378758, -0.09442821, -0.3054202, 0.06843394, 0.20913927, -0.17700543, 0.14682317, 0.21683829, 0.02948067, -0.34866366, -0.04474257, -0.011365872)); + conv2d_2_tf += mul(b2, min16float4x4(0.008512374, 0.19717449, 0.4456541, -0.15356806, -0.24209222, 0.12543896, -0.18232138, 0.012759448, 0.052473016, 0.17268041, 0.25826934, -0.16848944, 0.10150518, -0.30244592, 0.38495708, -0.2090818)); + conv2d_2_tf += mul(c2, min16float4x4(-0.07227807, -0.10066125, -0.1090768, 0.13579647, 0.023154313, 0.079166815, -0.20014893, -0.21884407, 0.09634875, -0.22551452, 0.20771019, 0.16381831, -0.23455033, 0.12578821, -0.43342614, -0.23609087)); + conv2d_2_tf += mul(d2, min16float4x4(-0.11084086, -0.03875876, -0.17912252, -0.24158017, 0.070904315, 0.21862641, 0.02659038, -0.36572614, 0.06265698, 0.32029516, 0.12044166, 0.18424052, 0.050192874, 0.15095103, 0.13794746, -0.111053675)); + conv2d_2_tf += mul(e2, min16float4x4(-0.11362966, 0.5249116, 0.27814335, -0.023295242, 0.022581467, 0.3195408, -0.06865207, -0.13818301, 0.18826036, 0.21182717, -0.30241874, 0.02916674, -0.19999875, 0.8222055, -0.2981789, -0.31122693)); + conv2d_2_tf += mul(f2, min16float4x4(0.058648925, -0.39456168, -0.36158726, 0.4050607, 0.0609484, 0.01624418, -0.2699451, 0.25976416, 0.31131884, 0.18382475, 0.12856431, 0.3285595, 0.4798488, -0.26074353, 0.78901637, -0.071622506)); + conv2d_2_tf += mul(g2, min16float4x4(-0.038631868, -0.20723929, 0.045573164, 0.10398485, 0.20236868, 0.14958549, 0.18842755, -0.23352885, 0.18624173, 0.2800279, 0.23280786, -0.12909916, -0.037398554, 0.1557195, -0.04866289, -0.13633357)); + conv2d_2_tf += mul(h2, min16float4x4(-0.15441336, 0.0968205, -0.32649723, -0.021546176, -0.10667603, 0.18065608, 0.017242601, 0.027690934, -0.23079967, 0.093206555, -0.11170116, 0.19002458, -0.352287, 0.008375842, 0.2459501, -0.09389683)); + conv2d_2_tf += mul(i2, min16float4x4(0.2130623, -0.4781421, -0.53600657, 0.44947717, -0.018234696, -0.17257519, -0.063182175, 0.22729957, -0.037309792, 0.13939567, -0.013829814, -0.20586358, 0.052985236, -0.04452726, 0.1880475, 0.096934296)); + conv2d_2_tf += mul(na1, min16float4x4(0.026266143, -0.03171053, 0.2277772, 0.01093641, -0.007701242, 0.115488306, 0.029304042, 0.33619022, 0.14467055, 0.075788446, -0.076583475, -0.051929206, 0.02211152, 0.031270072, -0.075583085, -0.20198274)); + conv2d_2_tf += mul(nb1, min16float4x4(-0.010648877, 0.21413183, 0.24339998, -0.22960022, -0.16156821, -0.45364898, -0.105244495, -0.07713787, -0.31945667, -0.097204186, -0.2457385, 0.04241939, -0.16228637, 0.13461526, 0.009693403, -0.13537757)); + conv2d_2_tf += mul(nc1, min16float4x4(0.058250688, 0.007912516, -0.071061306, 0.01889538, -0.14592043, -0.10374761, 0.07840785, 0.008756123, -0.045008816, 0.05261628, -0.2615482, -0.01929421, -0.23048545, 0.010220507, -0.16385053, 0.031251106)); + conv2d_2_tf += mul(nd1, min16float4x4(-0.03350765, 0.0737811, -0.09780837, -0.031780828, -0.1919008, 0.36382285, 0.19377235, -0.2797014, -0.12267188, 0.023496462, 0.38848102, -0.010005188, -0.09733866, 0.51535326, 0.47232744, 0.0073942994)); + conv2d_2_tf += mul(ne1, min16float4x4(-0.27284998, 0.14916854, -0.25612846, -0.029941292, 0.18539569, -0.43832946, -0.119871736, 0.044226155, -0.106426276, 0.05740293, -0.046056107, -0.17616963, -0.52316684, 0.33400205, -0.08133327, 0.0948221)); + conv2d_2_tf += mul(nf1, min16float4x4(0.32683802, -0.26026967, 0.19948171, -0.011760837, -0.30256173, -0.45944482, 0.051236197, 0.84710604, -0.08078167, 0.2675028, -0.27241448, 0.27764642, 0.13335843, 0.068502, -0.033614077, 0.19930291)); + conv2d_2_tf += mul(ng1, min16float4x4(0.07075588, 0.029963106, 0.055358, 0.042518128, -0.1441339, 0.42236832, 0.1387107, -0.40421516, 0.02318193, -0.36765453, -0.21558793, 0.21393713, 0.31122518, -0.3358225, -0.4967671, 0.46344024)); + conv2d_2_tf += mul(nh1, min16float4x4(-0.28364134, 0.19475235, 0.42310834, 0.060645495, -0.14013693, -0.049322303, -0.09870014, 0.23229486, -0.033104394, -0.37716264, -0.18488638, 0.17441164, -0.24427529, -0.26787207, -0.16721556, -0.10374529)); + conv2d_2_tf += mul(ni1, min16float4x4(-0.3376618, -0.09682554, 0.3423445, 0.047880173, 0.3354013, -0.21854481, -0.40352795, 0.1841921, 0.008460585, -0.03459756, -0.22880521, 0.35112804, -0.01764322, -0.16448145, 0.107058726, -0.28482538)); + conv2d_2_tf += mul(na2, min16float4x4(-0.032480888, 0.0034003556, -0.032999255, 0.16414961, 0.098690405, 0.0887987, 0.32215804, -0.002440519, -0.16814353, 0.0029867117, -0.28380692, 0.060728613, 0.15944195, 0.16642234, 0.110365815, 0.22413619)); + conv2d_2_tf += mul(nb2, min16float4x4(-0.088509634, 0.047311794, -0.30038288, -0.27227867, 0.41235012, 0.23889793, 0.7280631, 0.13555974, -0.08230139, 0.09955461, -0.13654864, 0.0314745, -0.275061, -0.10253638, -0.34706068, 0.03781376)); + conv2d_2_tf += mul(nc2, min16float4x4(0.09819424, -0.017704371, -0.031446967, 0.061441656, -0.110502265, -0.19236599, 0.2783733, 0.12798637, -0.047672354, -0.018956421, -0.17555775, -0.018790504, 0.43967727, -0.62039405, 0.08790998, 0.4353703)); + conv2d_2_tf += mul(nd2, min16float4x4(-0.019217307, 0.14623284, 0.015177701, 0.15983194, -0.106374666, -0.0131188845, 0.033161264, 0.41326195, 0.052029386, -0.11639186, -0.026856689, -0.020853983, -0.024652582, -0.12368135, -0.39344305, 0.17345576)); + conv2d_2_tf += mul(ne2, min16float4x4(-0.047131967, -0.28568837, 0.4201909, -0.28901812, -0.13973507, 0.03312194, -0.16265458, -0.10710893, 0.21189946, -0.32837728, 0.12424836, -0.30587387, 0.036961686, -0.8623908, 0.3661179, -0.1692949)); + conv2d_2_tf += mul(nf2, min16float4x4(0.1143412, 0.07707313, 0.3981437, -0.17059685, -0.094056316, -0.27234176, 0.12281097, -0.16966031, -0.1512859, -0.0524175, 0.1654043, 0.13700214, -0.3156236, -0.27636334, -0.52670264, 0.9250529)); + conv2d_2_tf += mul(ng2, min16float4x4(0.16162306, -0.15842794, -0.06699449, 0.059618954, 0.06798694, -0.060685594, -0.14878511, 0.17194197, -0.05110082, -0.12152871, -0.2020905, 0.09337634, 0.0602552, -0.07327089, 0.07043988, 0.15926042)); + conv2d_2_tf += mul(nh2, min16float4x4(-0.10312201, -0.13890414, -0.07694594, -0.29262447, 0.0597966, -0.228, -0.00046558332, 0.09373052, 0.2520174, -0.2992283, -0.01796473, -0.052195024, 0.09554047, -0.25678295, -0.38657847, 0.16130428)); + conv2d_2_tf += mul(ni2, min16float4x4(0.21114396, -0.64854, -0.52819866, -0.67061704, 0.05760163, -0.121914886, 0.05448798, -0.1352843, 0.007051261, 0.065677196, -0.09763541, 0.032613076, -0.17908493, -0.7194699, -0.6342276, 0.031814635)); + conv2d_2_tf += min16float4(0.051319666, 0.019196881, 0.0759832, 0.050857317); + min16float4 nconv2d_2_tf = max(-conv2d_2_tf, 0); + conv2d_2_tf = max(conv2d_2_tf, 0); + + min16float4 conv2d_1_tf = mul(a1, min16float4x4(0.10187621, 0.11053595, 0.14810364, -0.18582201, 0.16617906, -0.011798966, 0.09280227, 0.13307849, -0.044728525, 0.10914104, 0.075626835, -0.10416733, -0.094498746, -0.06870642, -0.07571491, 0.04897303)); + conv2d_1_tf += mul(b1, min16float4x4(0.33485547, 0.03678466, -0.29866266, -0.048795477, -0.010474432, -0.10252797, 0.036609326, -0.013254512, -0.14475596, 0.011886287, 0.11828754, -0.13557065, -0.28870094, -0.17330378, 0.044048756, -0.019826433)); + conv2d_1_tf += mul(c1, min16float4x4(0.105582856, -0.039765045, 0.0818729, 0.09955303, 0.023201315, 0.09243788, 0.07389467, -0.012808492, 0.0492865, 0.19755632, -0.06548781, 0.08533675, -0.013952, 0.017339202, -0.20518751, -0.054678205)); + conv2d_1_tf += mul(d1, min16float4x4(-0.26653445, 0.04810761, -0.23108084, -0.19818014, 0.23671885, 0.016349426, 0.0045669116, 0.077428445, -0.140711, 0.11972277, 0.101062275, -0.18716832, -0.190941, -0.34035257, -0.09143259, 0.04359683)); + conv2d_1_tf += mul(e1, min16float4x4(-0.14573975, 0.23356283, -0.3772715, -0.22460096, -0.053278442, 0.069576025, 0.05169695, 0.17249753, 0.028048603, -0.25471392, -0.09931249, 0.2095619, 0.22173007, 0.38787642, -0.30738685, 0.01936576)); + conv2d_1_tf += mul(f1, min16float4x4(0.081078954, -0.16813248, 0.1542311, 0.17158946, -0.15383756, 0.025605323, 0.2360881, -0.14753577, -0.016537111, 0.048651446, -0.35849985, 0.01651406, 0.17044473, 0.13180882, 0.324054, -0.18812656)); + conv2d_1_tf += mul(g1, min16float4x4(-0.15537027, -0.08164218, 0.049979087, -0.31885874, -0.15126401, -0.14352658, 0.18948728, 0.020951044, 0.054829888, -0.18936221, -0.22699763, 0.14384085, 0.055476833, -0.011031805, -0.23653851, 0.02768591)); + conv2d_1_tf += mul(h1, min16float4x4(-0.34108123, -0.28492066, 0.50347435, 0.0034134283, 0.041766707, 0.12375689, -0.08600751, 0.22726676, 0.10521852, 0.16621545, 0.038216297, 0.029870255, 0.07065742, -0.03542451, 0.38924676, -0.117774665)); + conv2d_1_tf += mul(i1, min16float4x4(-0.19437145, -0.01827461, 0.15408134, -0.14991991, 0.13832837, 0.0668659, 0.092678316, 0.05341174, 0.21633142, 0.09575402, -0.111060366, -0.00874764, -0.21256353, -0.052944425, 0.16459747, 0.07091838)); + conv2d_1_tf += mul(a2, min16float4x4(0.022236984, 0.19067548, 0.049743406, 0.05148808, 0.23003219, 0.08688227, 0.030773275, -0.059972208, -0.039038613, 0.21701579, -0.11092254, -0.10850967, -0.17777155, -0.20399293, -0.006843039, 0.24139926)); + conv2d_1_tf += mul(b2, min16float4x4(-0.07928885, -0.011657496, -0.03982505, -0.031084592, -0.09403157, -0.13860224, 0.15166754, 0.1279725, -0.084909394, 0.18945958, 0.055481352, -0.24365151, -0.04130202, 0.105171725, -0.47306657, -0.2218246)); + conv2d_1_tf += mul(c2, min16float4x4(-0.06171395, 0.0029490888, 0.055825688, -0.01362009, 0.045571987, -0.04197536, -0.024671398, -0.11600467, 0.02611751, -0.06675449, 0.38841903, 0.109969236, 0.1846224, -0.22673915, -0.11488994, -0.18271959)); + conv2d_1_tf += mul(d2, min16float4x4(-0.08073766, -0.1512685, 0.09596278, 0.061552938, -0.23016383, 0.044725727, -0.1058148, -0.09081257, 0.25391936, 0.13075152, 0.1153331, 0.035533328, 0.14628118, 0.053434838, -0.061957166, -0.11092296)); + conv2d_1_tf += mul(e2, min16float4x4(0.004972408, 0.26720062, -0.0014180156, -0.15569477, 0.08964792, 0.39218047, -0.113748655, -0.20653862, -0.0182982, -0.009456181, 0.096566215, 0.19871894, -0.45192167, -0.19494532, 0.5282211, -0.033234302)); + conv2d_1_tf += mul(f2, min16float4x4(0.11633487, 0.055492207, -0.09550419, 0.019721292, 0.05191187, 0.110391244, 0.13541168, 0.108687185, -0.3231262, -0.071254596, 0.12103068, -0.063508354, 0.16086432, 0.22202429, -0.2793211, -0.059888415)); + conv2d_1_tf += mul(g2, min16float4x4(0.09845572, -0.11364447, -0.06817361, 0.20479278, 0.008171668, -0.10222864, -0.12512983, 0.11285637, 0.2092848, 0.12593135, -0.054839488, 0.1560058, 0.109415986, -0.04229047, -0.21525817, 0.10153635)); + conv2d_1_tf += mul(h2, min16float4x4(-0.26443723, 0.18267378, 0.2874903, -0.15007962, 0.23901714, -0.039331976, -0.4055973, 0.18869716, 0.060133275, -0.030050457, -0.16689767, -0.024223989, 0.43243858, -0.004281818, -0.5925553, 0.08473984)); + conv2d_1_tf += mul(i2, min16float4x4(-0.11769163, -0.6005158, -0.0700652, 0.0062212353, -0.022391787, 0.08070833, 0.10332995, 0.100591965, 0.1680161, 0.1209537, -0.11606606, -0.0032385625, -0.30508906, -0.11541758, 0.27825746, 0.18774803)); + conv2d_1_tf += mul(na1, min16float4x4(-0.06629365, -0.14032914, -0.2580204, 0.18303558, -0.1916567, 0.029803488, -0.12213443, -0.07165115, 0.012936617, -0.11358297, -0.19138688, 0.10422416, 0.18062063, 0.14369549, 0.10535131, -0.036331207)); + conv2d_1_tf += mul(nb1, min16float4x4(-0.23739359, -0.14102252, 0.16535138, -0.055494435, 0.11510639, -0.02530117, 0.13571805, -0.11962709, 0.14311576, -0.11346015, -0.053082045, 0.23039193, 0.2412315, 0.34595123, -0.057626486, 0.1273758)); + conv2d_1_tf += mul(nc1, min16float4x4(-0.031894613, 0.04056866, -0.14806709, -0.061261263, -0.05113628, -0.150074, -0.05885426, 0.025318084, -0.028839143, -0.14976048, -0.061418023, -0.10849576, 0.10669465, 0.025044547, 0.13002798, 0.033596892)); + conv2d_1_tf += mul(nd1, min16float4x4(0.31830126, -0.109857574, 0.022382054, 0.19084917, -0.21992075, -0.06509279, 0.04586319, -0.10979886, 0.07565896, 0.008375114, -0.025531407, 0.112079956, 0.32532254, 0.39258766, 0.15983114, -0.047324624)); + conv2d_1_tf += mul(ne1, min16float4x4(0.06333816, -0.43997836, 0.28480944, -0.037927028, -0.16247569, 0.14209846, -0.5309942, -0.23058164, -0.18387268, 0.3324917, 0.010288075, -0.2516956, -0.42476243, -0.19866063, 0.32058033, 0.052254338)); + conv2d_1_tf += mul(nf1, min16float4x4(-0.019851776, 0.17185202, -0.14713249, -0.1373522, 0.23155597, -0.009191596, -0.15395427, 0.24423079, -0.11106813, -0.034888845, 0.17169674, -0.08786573, -0.08697707, -0.28842747, -0.25445274, 0.13578549)); + conv2d_1_tf += mul(ng1, min16float4x4(0.2099323, 0.09262897, -0.08977398, 0.30791095, 0.12376861, 0.24654338, -0.097672515, 0.008614657, 0.006388779, 0.076170854, 0.25119394, -0.12392118, 0.3138793, -0.015998395, 0.15131904, -0.3009305)); + conv2d_1_tf += mul(nh1, min16float4x4(0.33982292, 0.26557416, -0.3754559, -0.110353656, 0.08402225, -0.053171434, 0.051136248, -0.2696132, -0.14568366, -0.048726343, 0.06216166, 0.018804165, -0.084439, 0.15103953, -0.020082679, 0.15082058)); + conv2d_1_tf += mul(ni1, min16float4x4(0.14522389, -0.0462971, -0.10824406, 0.14163211, -0.08392773, -0.22920173, -0.23795773, -0.2580316, -0.22207144, -0.15956368, 0.12665017, -0.08286834, 0.09581649, 0.12603259, -0.15513468, -0.010735423)); + conv2d_1_tf += mul(na2, min16float4x4(0.00818024, -0.15539199, -0.011369519, 0.05717366, -0.25330603, -0.018393422, 0.027386196, 0.121692196, 0.059138533, -0.1631142, 0.10282322, 0.08011751, 0.10027271, 0.255391, 0.010682224, -0.3095357)); + conv2d_1_tf += mul(nb2, min16float4x4(0.117767766, 0.120644994, 0.09232613, -0.018057318, -0.038398392, 0.14537762, -0.016560853, -0.08958423, 0.06743331, -0.23562634, -0.123906426, 0.028323429, -0.09386831, -0.16833909, 0.019829117, -0.08108203)); + conv2d_1_tf += mul(nc2, min16float4x4(0.05462869, -0.031615634, -0.121678494, 0.05315917, -0.012636353, -0.13374922, 0.18577711, 0.0005971412, -0.099537544, -0.060773082, -0.28754288, -0.20077203, -0.15873533, -0.11387871, -0.17841183, -0.120239034)); + conv2d_1_tf += mul(nd2, min16float4x4(0.13845754, 0.223389, -0.20315485, -0.03479761, 0.1806296, 0.057029717, 0.010771242, 0.15245064, -0.0040082, 0.015283898, -0.34807077, 0.078581005, 0.026417086, -0.058825746, 0.07728649, 0.066044815)); + conv2d_1_tf += mul(ne2, min16float4x4(-0.13820273, -0.050027788, 0.061389934, 0.11189863, 0.008062022, -0.17326912, 0.18159898, 0.08510656, 0.22065656, 0.3918094, -0.05124615, -0.22959533, 0.85480285, 0.5621734, -0.817405, 0.065126896)); + conv2d_1_tf += mul(nf2, min16float4x4(-0.15309735, 0.1396192, 0.16662036, -0.10952867, -0.03473452, -0.08712044, -0.2422528, -0.19236326, 0.49887487, 0.2615184, -0.076631024, 0.16010238, -0.09836315, -0.27126545, 0.17968613, -0.21053861)); + conv2d_1_tf += mul(ng2, min16float4x4(-0.18809205, 0.050410215, 0.1418759, -0.2876976, -0.13414268, 0.07458343, 0.096421175, -0.060676426, -0.17345451, -0.13678914, -0.06512698, -0.102106765, -0.12989639, 0.09089589, 0.07377932, -0.07263102)); + conv2d_1_tf += mul(nh2, min16float4x4(0.45035192, 0.2393797, -0.045452517, -0.04553052, -0.26037264, -0.021321824, 0.24618645, -0.108074926, -0.030116243, 0.04612789, 0.2273845, -0.07468269, -0.48789972, 0.12628402, 1.0130231, -0.14672706)); + conv2d_1_tf += mul(ni2, min16float4x4(0.5591947, -0.0326075, 0.12768768, -0.7916967, 0.023168698, -0.042015456, -0.12410894, -0.033611402, -0.14815444, -0.124497496, 0.08198418, -0.014488041, 0.4252749, -0.20253694, 0.042329047, -0.50953263)); + conv2d_1_tf += min16float4(-0.048558664, 0.11006767, -0.074099846, -0.016021004); + tex3[gxy] = conv2d_1_tf; + min16float4 nconv2d_1_tf = max(-conv2d_1_tf, 0); + conv2d_1_tf = max(conv2d_1_tf, 0); + + min16float4 target = mul(e1, min16float4x4(-0.26519376, -0.45442572, -0.24128473, 0.56122154, 0.45048368, 0.32492852, -0.14123245, -0.027976234, -0.11764467, -0.47563952, -0.09401533, 0.024141679, -0.19278349, -0.5169275, -0.26203018, 0.04326379)); + target += mul(e2, min16float4x4(-0.14198317, 0.18704857, -0.20165806, 0.3868074, 0.26532957, 0.13556235, -0.5872983, 0.13357028, 0.48151335, -0.3750496, 0.020972235, -0.32213062, -0.46967435, 0.10506199, 0.24039303, -0.3906582)); + target += mul(ne1, min16float4x4(0.10981934, -0.0040414287, -0.0025180888, -0.23061854, -0.6781062, -0.27331296, -0.1538456, 0.31020573, -0.05341261, 0.45214307, 0.23456645, 0.3261386, -0.020520406, 0.46579385, 0.57791334, 0.441774)); + target += mul(ne2, min16float4x4(0.11475315, 0.18062253, 0.21255025, -0.1963313, -0.22190428, -0.19369084, 0.5878038, -0.051808596, -0.39728877, -0.044071846, 0.0066692936, -0.0066007506, 0.03501876, 0.27602142, 0.11396466, 0.81461775)); + target += mul(conv2d_2_tf, min16float4x4(-0.44411597, -0.11377309, 0.16160126, 0.47119814, 0.22932883, -0.43011594, 0.01986201, 0.01446102, -0.2783236, -0.07647468, -0.5016725, 0.4227215, 0.31808656, 0.23829709, -0.12855907, -0.15950239)); + target += mul(nconv2d_2_tf, min16float4x4(-0.4784548, -0.042179376, -0.4882858, -0.046462137, -0.21421364, -0.35029694, -0.15496174, 0.11386904, 0.22592051, 0.1590684, 0.49690887, -0.37077406, -0.48519966, -0.14407466, 0.24836525, 0.38462397)); + target += mul(conv2d_1_tf, min16float4x4(-0.043213595, -0.004892144, 0.29046863, 0.57064444, 0.37136674, -0.5603234, -0.30733815, 0.26740906, 0.016959883, -0.26567596, 0.101653986, 0.34387913, -0.13222592, -0.34239995, 0.32046688, 0.023962379)); + target += mul(nconv2d_1_tf, min16float4x4(-0.2955613, 0.44671535, 0.056253802, -0.6011664, -0.30715483, 0.16890973, 0.041257784, -0.1544008, 0.4653661, -0.22183, -0.23155628, -0.063779, 0.10350268, 0.02045104, -0.22509801, 0.14633855)); + target += min16float4(-0.00089101185, -0.038285345, 0.023986168, -0.122330956); + tex4[gxy] = target; + + target = mul(e1, min16float4x4(-0.6336626, -0.23328744, 0.054100014, -0.6572063, 0.22899812, 0.47125596, 0.087406546, 0.5788615, -0.24324284, -0.17465535, 0.23223022, -0.4417298, -0.1195797, -0.14119461, -0.2301777, -0.1748931)); + target += mul(e2, min16float4x4(0.2554768, -0.0835268, 0.13054265, 0.033940453, -0.22754695, 0.053536188, -0.10300488, -0.10146903, 0.3104604, -0.5024146, 0.089460805, -0.20216464, 0.6033507, 0.12908716, -0.29953086, 0.292064)); + target += mul(ne1, min16float4x4(0.09586759, -0.037499018, -0.23253569, 0.63889295, 0.18920106, -0.6646685, 0.07218118, -0.61459464, -0.16397415, 0.3131906, -0.39399612, 0.36777702, 0.39545253, 0.030677503, 0.29420745, -0.02527333)); + target += mul(ne2, min16float4x4(-0.2464485, -0.117239855, -0.13390337, 0.43170166, 0.10044111, -0.13811369, -0.007668335, 0.06387773, -0.11786689, 0.23223364, 0.12805769, 0.06410502, -0.2818576, 0.21286973, 0.17026524, -0.22247931)); + target += mul(conv2d_2_tf, min16float4x4(0.12590794, 0.25101408, -0.014941272, -0.06091461, -0.106272854, -0.23196393, 0.64016813, 0.0025616125, 0.16706267, 0.008579063, 0.04476896, -0.5403641, -0.011274305, -0.014704461, -0.068788156, 0.47190762)); + target += mul(nconv2d_2_tf, min16float4x4(0.10427173, -0.11386145, -0.6048206, -0.20245847, -0.011730377, -0.0119483, 0.06255473, -0.5017671, -0.07181296, -0.08626898, -0.035322662, 0.42718327, 0.041101683, 0.017210655, -0.07089471, -0.6541289)); + target += mul(conv2d_1_tf, min16float4x4(-0.43911383, -0.099413894, -0.22120018, -0.3121928, -0.32394376, 0.1159015, 0.04434728, 0.014404674, 0.040322874, 0.06727233, -0.046662346, -0.066591434, -0.004613069, -0.6566657, -0.13442427, -0.081967555)); + target += mul(nconv2d_1_tf, min16float4x4(0.7393613, 0.059159152, 0.21900342, 0.26184326, 0.15656939, -0.05151207, -0.02730003, -0.055701576, -0.50296444, 0.09566756, -0.10248052, -0.39747316, 0.5877897, 0.83397114, -0.07968032, -0.3097048)); + target += min16float4(-0.010642331, -0.050244823, -0.009665539, 0.26457447); + tex5[gxy] = target; +} + + +//!PASS 3 +//!DESC Conv-4x3x3x16, Conv-4x1x1x40 +//!IN tex4, tex5, tex3 +//!OUT tex6, tex1, tex2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass3(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + min16float4 a1 = tex4.SampleLevel(sam, pos - inputPt, 0); + min16float4 b1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d1 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e1 = tex4.SampleLevel(sam, pos, 0); + min16float4 f1 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i1 = tex4.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na1 = max(-a1, 0); + min16float4 nb1 = max(-b1, 0); + min16float4 nc1 = max(-c1, 0); + min16float4 nd1 = max(-d1, 0); + min16float4 ne1 = max(-e1, 0); + min16float4 nf1 = max(-f1, 0); + min16float4 ng1 = max(-g1, 0); + min16float4 nh1 = max(-h1, 0); + min16float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + min16float4 a2 = tex5.SampleLevel(sam, pos - inputPt, 0); + min16float4 b2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d2 = tex5.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e2 = tex5.SampleLevel(sam, pos, 0); + min16float4 f2 = tex5.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i2 = tex5.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na2 = max(-a2, 0); + min16float4 nb2 = max(-b2, 0); + min16float4 nc2 = max(-c2, 0); + min16float4 nd2 = max(-d2, 0); + min16float4 ne2 = max(-e2, 0); + min16float4 nf2 = max(-f2, 0); + min16float4 ng2 = max(-g2, 0); + min16float4 nh2 = max(-h2, 0); + min16float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + min16float4 conv2d_1_tf = tex3.SampleLevel(sam, pos, 0); + min16float4 nconv2d_1_tf = max(-conv2d_1_tf, 0); + conv2d_1_tf = max(conv2d_1_tf, 0); + + min16float4 conv2d_5_tf = mul(a1, min16float4x4(-0.11527973, 0.18487021, 0.0010509634, -0.3687562, -0.012861112, -0.37319645, -0.31061935, -0.051598914, 0.061436053, -0.2643697, -0.032551475, 0.59398615, 0.17265628, 0.1634019, 0.026527049, -0.0040123775)); + conv2d_5_tf += mul(b1, min16float4x4(-0.19826698, -0.29437867, 0.15727736, 0.44590214, 0.27655315, 0.28220633, 0.12990361, -0.09000104, -0.26396993, -0.53520125, 0.40639028, 0.7958488, 0.043264065, -0.08110669, -0.28618547, 0.12722827)); + conv2d_5_tf += mul(c1, min16float4x4(0.26455724, -0.36315665, -0.22116943, 0.049996275, 0.28526706, -0.0045478707, -0.20538875, 0.03192557, 0.04443011, -0.48084733, -0.32755423, 0.0075373487, 0.34481105, 0.04272154, -0.11092845, -0.07401724)); + conv2d_5_tf += mul(d1, min16float4x4(0.28374255, 0.13204694, 0.041846596, -0.57726663, 1.0038753, 0.42640173, -0.045806255, 0.3795911, 0.52897507, -0.2522673, 0.37759414, 0.158503, 0.111165345, -0.033814687, -0.37906894, 0.14007671)); + conv2d_5_tf += mul(e1, min16float4x4(0.30553007, -0.032092307, 0.6779135, -0.32720757, 0.29837027, 0.13522549, 0.21653146, 0.4553826, -0.22200927, -0.20921928, 0.36475468, 0.27989116, 0.6222863, -0.37027213, 0.06746388, 0.16675332)); + conv2d_5_tf += mul(f1, min16float4x4(0.31677073, -0.37482786, 0.4029838, 0.43627468, 0.32849845, -0.3442297, 0.1752726, 0.37502408, -0.1561963, -0.17489041, 0.7141825, -0.13179696, 0.17682795, 0.052273672, 0.07300372, 0.20322469)); + conv2d_5_tf += mul(g1, min16float4x4(0.07722791, 0.51997215, 0.2052519, -0.6162976, 0.07318059, -0.16653596, -0.0609372, -0.13199529, -0.011298448, -0.066250905, 0.11658636, -0.07317175, -0.068134755, 0.032443475, -0.27242857, 0.26479205)); + conv2d_5_tf += mul(h1, min16float4x4(-0.46400046, 0.34256476, -0.074927844, -0.082626544, 0.38616362, 0.10320202, 0.7306549, -0.41960227, -0.33295953, -0.35537082, 0.040369444, 0.18173583, 0.36835003, -0.078561984, -0.13071333, -0.06847678)); + conv2d_5_tf += mul(i1, min16float4x4(0.0951899, -0.21144655, 0.12174552, 0.09496668, -0.17025085, -0.36465582, 0.20724316, 0.07027979, 0.17988989, -0.16671456, -0.15068638, 0.26715076, 0.022114933, 0.14284599, -0.06316286, 0.017598677)); + conv2d_5_tf += mul(a2, min16float4x4(0.22179046, -0.19104601, 0.10500515, 0.22017653, -0.065115064, -0.027006533, -0.21086605, 0.00932852, -0.6196575, 0.04396425, 0.52487534, 0.61164427, 0.15172893, 0.219877, 0.103516005, -0.103571504)); + conv2d_5_tf += mul(b2, min16float4x4(0.122733794, 0.19491453, 0.22410785, -0.17341182, -0.18816754, 0.22092234, -0.055087283, -0.14617631, 0.4338981, -0.45366564, -1.4062341, 0.19594707, 0.2178627, 0.016837195, -0.2226328, 0.079190396)); + conv2d_5_tf += mul(c2, min16float4x4(0.16418308, 0.14917587, 0.35162288, 0.04064204, -0.037038237, 0.06579139, -0.08464511, -0.2156906, 0.22791082, -1.1695892, 0.53665465, -0.77753544, 0.0065266103, 0.15857838, 0.010236925, 0.14953533)); + conv2d_5_tf += mul(d2, min16float4x4(0.64548135, -0.02291521, -0.14370848, 0.049308565, 0.13637903, 0.14568083, -0.1488358, -0.0038734428, 0.0809154, -0.15466721, -0.06614126, -0.047732286, 0.311668, 0.22075401, 0.26094854, -0.27763176)); + conv2d_5_tf += mul(e2, min16float4x4(0.12075334, -0.23920162, 0.19115442, -0.33920774, 0.15199614, 0.27974042, -0.05022236, -0.15280685, 0.37271795, -0.76389724, -0.56503266, 1.4975219, 0.24002175, -0.12661129, 0.045953903, 0.2102559)); + conv2d_5_tf += mul(f2, min16float4x4(-0.02855315, -0.16729961, -0.27380818, -0.08810453, 0.061245166, 0.27268958, 0.2282609, 0.072155826, -0.65736717, -0.46307757, -0.5473049, 0.50772667, -0.1581774, 0.28763455, -0.1870661, -0.16523343)); + conv2d_5_tf += mul(g2, min16float4x4(0.23464368, 0.25850806, -0.054024473, -0.13788947, -0.24835043, -0.028147692, -0.23022775, 0.11494646, 0.31069988, -0.21450949, 0.40749013, -0.073832974, -0.16241223, 0.15673774, 0.23648019, -0.34203738)); + conv2d_5_tf += mul(h2, min16float4x4(-0.10198349, -0.052500926, 0.02638934, 0.19718044, -0.09078705, 0.07717591, 0.44648582, -0.30146563, -0.10124157, 0.12145466, -0.2133955, 0.16855773, -0.12310728, 0.35327804, -0.44273457, 0.20639896)); + conv2d_5_tf += mul(i2, min16float4x4(0.08033835, 0.0977811, 0.007069267, -0.110171854, -0.008568571, -0.10922981, 0.12048108, -0.0835261, 0.019930357, -0.12652875, 0.02870121, 0.12214532, -0.024486745, 0.3588685, -0.16501926, 0.11914434)); + conv2d_5_tf += mul(na1, min16float4x4(0.24003507, -0.040643565, -0.4267142, 0.34356147, -0.2618635, -0.1550601, -0.18566506, 0.33267352, -0.17584917, -0.24971883, 0.167064, -0.20808934, 0.3197215, 0.19626021, -0.16993162, -0.16976681)); + conv2d_5_tf += mul(nb1, min16float4x4(0.159248, -0.33713767, -0.37823528, 0.25286102, -0.6171255, 0.01159639, 0.08387377, -0.0796005, -0.18405017, -0.11881008, -0.03026552, 0.030733835, 0.17692643, 0.17118043, 0.23938146, -0.40504465)); + conv2d_5_tf += mul(nc1, min16float4x4(0.11274836, -0.023647472, 0.083114825, 0.5222033, -0.07415273, -0.3251913, -0.034298245, -0.07125199, 0.09593269, -0.23062208, -0.3168607, -0.13040248, -0.41249517, 0.39030293, 0.47400078, -0.109306306)); + conv2d_5_tf += mul(nd1, min16float4x4(-0.49999082, 0.012254524, -0.035179958, 0.212335, -0.10354367, -0.19730526, 0.092015326, -0.07317916, -0.21900047, -0.13948579, -0.3228226, -0.22363624, -0.06421761, 0.16125691, 0.38075948, -0.31371582)); + conv2d_5_tf += mul(ne1, min16float4x4(-1.0006356, -0.13763155, -0.8414047, -0.051852856, -0.44105098, 0.526086, 0.23091859, -0.6621191, -0.015348964, 0.37972412, -0.24986422, 0.13964157, -0.03184678, 0.25394693, -0.051659737, -0.34171197)); + conv2d_5_tf += mul(nf1, min16float4x4(0.14520285, 0.1346628, 0.047271203, 0.64346415, -0.25639483, 0.052174076, 0.28681588, -0.32156095, 0.014350296, 0.028580237, 0.33776954, 0.06681965, -0.27312553, 0.44097883, -0.16519593, -0.7293824)); + conv2d_5_tf += mul(ng1, min16float4x4(-0.65626615, -0.20801732, -0.18783297, 0.27998376, -0.51550066, -0.23272751, -0.3744558, 0.11267917, -0.1879591, 0.043539204, -0.17665562, 0.28546363, -0.20627682, 0.33176526, 0.34412766, -0.4310386)); + conv2d_5_tf += mul(nh1, min16float4x4(0.51410156, -0.08615402, -0.2396778, -0.027256064, 0.11491742, -0.20842157, 0.3855824, -0.19823207, 0.0062098945, -0.2629099, 0.13158852, -0.08746773, -0.46980307, 0.57169086, -0.13392213, 0.13375558)); + conv2d_5_tf += mul(ni1, min16float4x4(0.09988252, 0.19396676, -0.011215926, 0.2714918, 0.07985461, 0.30587563, 0.21915142, -0.14004244, -0.336268, 0.023702772, 0.15740578, -0.06307948, 0.06453276, 0.26978606, 0.45891464, -0.35511568)); + conv2d_5_tf += mul(na2, min16float4x4(-0.33263445, -0.13086738, -0.30128893, 0.03720744, 0.46366304, -0.13430476, -0.26493385, 0.14521147, -0.025578065, -0.043376725, 0.055235144, -0.08467402, 0.12879072, 0.2621278, -0.030150373, -0.079033755)); + conv2d_5_tf += mul(nb2, min16float4x4(-0.15686864, 0.06962337, -0.24032803, 0.05093969, 0.12118379, 0.2144539, 0.21314697, -0.15564163, -0.15193312, -0.15797225, 0.061610706, 0.06689548, 0.42354256, 0.24339569, 0.14413804, -0.08890708)); + conv2d_5_tf += mul(nc2, min16float4x4(0.021830576, -0.0682399, -0.25052184, 0.035374403, -0.0022370394, 0.23796171, 0.40747103, -0.14309348, -0.22325014, 0.12337428, -0.0727028, 0.12374459, -0.24148722, 0.34091887, 0.5052561, -0.13712624)); + conv2d_5_tf += mul(nd2, min16float4x4(-0.583754, -0.10253819, -0.26736188, -0.084894784, 0.7130811, 0.5888696, 0.24837445, 0.20670207, 0.08242887, -0.03090308, 0.24002716, -0.04146999, 0.33550006, -0.006085788, -0.2078999, 0.016955601)); + conv2d_5_tf += mul(ne2, min16float4x4(-0.23921615, 1.0534316, -0.29723012, -0.06626253, 0.022887046, -0.6139072, 0.22857629, 0.4203786, -0.02951169, 0.0501039, -0.054740574, -0.15496075, 0.9533812, 0.21038955, 0.33969748, 0.18853404)); + conv2d_5_tf += mul(nf2, min16float4x4(-0.13571729, -0.045776337, 0.23663524, 0.1457326, -0.23159564, -0.44608104, -0.35497522, -0.14684997, 0.042379193, 0.16966693, 0.2560789, -0.07091574, 0.010749883, -0.26966086, -0.16322245, 0.095426805)); + conv2d_5_tf += mul(ng2, min16float4x4(-0.027934154, -0.25037688, 0.19623838, 0.16128206, 0.21479255, 0.4066385, -0.06756232, -0.19681008, 0.09168842, 0.46935177, -0.059632402, -0.3419115, 0.2789002, 0.012714867, 0.15322958, 0.05255599)); + conv2d_5_tf += mul(nh2, min16float4x4(0.2074098, -0.19564646, 0.21713807, -0.29207307, -0.08546043, 0.122562535, -0.5150736, 0.5190804, -0.116998374, 0.17080544, -0.29132518, 0.47585255, -0.14625762, -0.026589578, -0.13111407, 0.03473621)); + conv2d_5_tf += mul(ni2, min16float4x4(-0.3399405, 0.063775875, -0.0121724615, 0.13809827, -0.1575877, 0.13529225, -0.28708464, -0.063552216, 0.08623843, 0.034867074, 0.25082812, -0.038863987, 0.08048017, -0.43998414, -0.05038377, -0.20123458)); + conv2d_5_tf += min16float4(0.19016464, 0.19431238, -0.073604904, 0.101166695); + min16float4 nconv2d_5_tf = max(-conv2d_5_tf, 0); + conv2d_5_tf = max(conv2d_5_tf, 0); + + min16float4 conv2d_4_tf = mul(a1, min16float4x4(0.259803, 0.14121838, -0.3216694, 0.16912009, -0.24997918, -0.024859427, 0.07951931, -0.17898253, 0.14770418, -0.38608834, 0.7155576, -0.008749993, 0.106385805, -0.08190305, 0.06277034, 0.05247095)); + conv2d_4_tf += mul(b1, min16float4x4(-0.10331291, 0.29847905, -0.20864278, -0.34607938, -0.0629403, 0.24202278, 0.15617771, 0.09471163, 0.29827452, -0.5237911, 0.8446165, -0.038001515, 0.085504964, -0.012998129, -0.12903701, -0.068084855)); + conv2d_4_tf += mul(c1, min16float4x4(-0.028803846, 0.117718086, 0.11924323, -0.23554896, -0.31169716, 0.2164557, 0.054745417, -0.2886858, 0.34304592, -0.15872054, 0.21533915, 0.23624876, -0.02507208, 0.16001348, -0.14645866, -0.013143789)); + conv2d_4_tf += mul(d1, min16float4x4(0.12311184, 0.16843726, -0.5478087, 0.036556758, -0.0024939126, -0.12264501, 0.090127975, -0.14638199, -0.33366996, 0.1817309, 0.018728942, -0.025097579, -0.00233696, 0.15182042, -0.072947, -0.15065937)); + conv2d_4_tf += mul(e1, min16float4x4(0.3238381, 0.19316678, 0.23307748, -0.10455285, -0.35405514, -0.06559013, 0.4206979, 0.08059919, -0.26130152, -0.23416454, -0.21285532, 0.07799376, 0.12372864, -0.3774056, 0.022239799, 0.22356819)); + conv2d_4_tf += mul(f1, min16float4x4(0.066345, 0.20370135, -0.01601085, 0.014701113, 0.27098605, 0.25511372, -0.048403386, -0.014162313, 0.11301996, -0.09638182, 0.12047054, -0.010323633, 0.21627729, 0.18377618, -0.12752205, -0.0668105)); + conv2d_4_tf += mul(g1, min16float4x4(0.18890683, -0.21100806, -0.38314816, 0.12188494, -0.09069559, 0.1785706, -0.19502263, -0.22853898, -0.096488185, 0.18105212, -0.0045291157, -0.018952737, 0.14934972, -0.17416078, 0.05363704, -0.17642738)); + conv2d_4_tf += mul(h1, min16float4x4(-0.15392087, 0.13997103, -0.12765433, -0.054465868, 0.0061383434, 0.03424787, -0.08585949, -0.10249745, -0.055375032, -0.047258787, -0.10105776, 0.09468892, 0.32030013, -0.14938186, 0.18287018, 0.007592655)); + conv2d_4_tf += mul(i1, min16float4x4(0.109669484, 0.02212132, 0.038995523, -0.0041161263, -0.12115841, -0.048061926, 0.06674463, -0.33846095, 0.04251217, -0.05917749, 0.17834029, 0.010219928, 0.2690458, 0.09282476, 0.077470005, -0.07310091)); + conv2d_4_tf += mul(a2, min16float4x4(0.4314233, 0.035379685, 0.27331847, 0.19597715, -0.09619968, -0.055907905, 0.07898602, 0.031254813, -0.09366987, -0.37436283, 0.061305135, -0.32644534, -0.16999187, 0.06906536, -0.1228417, -0.09826574)); + conv2d_4_tf += mul(b2, min16float4x4(0.6059936, -0.10060162, -0.18080838, 0.26205355, 0.033052504, -0.10625297, -0.0038814575, 0.026052764, 0.19484659, -0.24242568, 0.8054419, -0.3437365, -0.010305425, -0.079504244, 0.11879563, -0.14375582)); + conv2d_4_tf += mul(c2, min16float4x4(0.23313539, -0.026485069, 0.13332158, 0.28462213, -0.19786534, 0.048259735, 0.024113638, 0.23403068, -1.0330093, 0.0059400625, 0.23721488, -1.379481, 0.12166913, -0.07133997, 0.060898513, 0.092720084)); + conv2d_4_tf += mul(d2, min16float4x4(0.16513251, 0.013819962, -0.009859532, -0.037474833, 0.25651336, -0.131653, 0.03145131, -0.27886832, 0.27808505, -0.099978246, -0.11189488, 0.053313572, 0.11455811, 0.10826371, 0.0017301271, -0.041959)); + conv2d_4_tf += mul(e2, min16float4x4(-0.037442397, 0.061722398, 0.099159, -0.18970016, -0.13042277, 0.16767356, -0.028342545, 0.18715699, 0.22246139, 0.3154743, -0.39717823, 0.26053482, -0.012097491, 0.1746896, 0.3899962, -0.13013846)); + conv2d_4_tf += mul(f2, min16float4x4(-0.14552362, -0.26800197, 0.09035887, 0.24266347, -0.14494316, 0.033814326, -0.06647855, -0.16609156, 0.30540654, 0.037082594, 0.14951941, 0.12753695, -0.045153987, -0.28476146, 0.37640104, -0.04667195)); + conv2d_4_tf += mul(g2, min16float4x4(0.2071077, -0.09297775, -0.04906301, -0.24280597, 0.15925987, -0.05631783, 0.08169953, -0.20124075, 0.23060048, -0.05786468, 0.23959383, 0.1620485, 0.14333409, -0.12757483, -0.1424963, 0.13118197)); + conv2d_4_tf += mul(h2, min16float4x4(-0.101942524, -0.02240319, 0.11718157, -0.13591368, 0.11223302, -0.042933583, -0.07766777, 0.01667011, 0.07462998, 0.020704709, -0.04329035, -0.01358702, 0.13569939, 0.015980164, -0.08001042, 0.13890027)); + conv2d_4_tf += mul(i2, min16float4x4(0.01755685, -0.047599614, 0.06456479, -0.08004052, 0.08108282, 0.06789228, -0.14048836, -0.020240005, 0.039701223, 0.023405846, 0.06305444, -0.046804685, 0.040620867, 0.013529182, -0.094961315, 0.02959053)); + conv2d_4_tf += mul(na1, min16float4x4(-0.053775985, -0.0060494044, 0.14724614, 0.07248909, -0.056616947, 0.0004714896, -0.18737504, -0.15240799, -0.030883765, -0.007487297, -0.0044565946, 0.15024893, -0.16870505, 0.09338804, -0.21873595, -0.14493267)); + conv2d_4_tf += mul(nb1, min16float4x4(-0.045113027, -0.2153715, 0.04520989, 0.26561612, -0.12634845, -0.10975088, -0.3677834, -0.4343602, -0.34146985, 0.29135808, 0.026339425, -0.0995021, 0.012693227, 0.07312179, 0.21671581, 0.11961088)); + conv2d_4_tf += mul(nc1, min16float4x4(0.19766524, -0.31538734, 0.35708517, 0.33092737, 0.027086282, 0.024219114, -0.15289012, -0.18128034, -0.16041638, 0.057314564, 0.079830885, -0.08828221, 0.11828446, -0.13336371, -0.078453206, 0.21232514)); + conv2d_4_tf += mul(nd1, min16float4x4(-0.13100033, -0.24849984, 0.3087074, 0.017271562, -0.17455627, -0.014364008, 0.077686995, -0.015820628, 0.18584616, -0.16705278, -0.3169503, 0.09107534, -0.04958684, -0.008202742, 0.024148908, -0.04654239)); + conv2d_4_tf += mul(ne1, min16float4x4(-0.16020702, -0.18623418, -0.29434547, 0.5008317, 0.23796988, -0.11154579, -0.5167728, -0.14195764, 0.15495163, -0.028505204, -0.2105556, 0.22491512, -0.11658545, 0.31665426, 0.35085753, -0.40148884)); + conv2d_4_tf += mul(nf1, min16float4x4(0.24866697, -0.3752738, 0.8472619, 0.16663249, -0.25808626, -0.037561346, -0.1440471, -0.107407264, 0.016663626, 0.1599037, -0.31926402, 0.15272903, -0.14700623, -0.05275371, 0.061130624, 0.084672675)); + conv2d_4_tf += mul(ng1, min16float4x4(-0.24184473, -0.016008917, 0.040023588, 0.1517675, -0.1339458, 0.009985992, 0.15634708, -0.07649679, 0.0021696684, -0.07027257, -0.07509208, -0.27060902, -0.21299353, 0.12154156, -0.3159698, 0.2511261)); + conv2d_4_tf += mul(nh1, min16float4x4(0.19845779, 0.023986215, -0.073409855, 0.0812208, 0.013382121, -0.049414996, -0.12990347, 0.052681953, -0.12787153, -0.100129806, -0.036296804, -0.13915883, -0.24022135, 0.167096, -0.15128131, 0.17779276)); + conv2d_4_tf += mul(ni1, min16float4x4(-0.05787442, -0.19698323, 0.13090582, 0.1501304, -0.09954089, -0.008470983, -0.095334776, 0.114635326, -0.16330223, -0.046815667, -0.086304545, -0.15729928, -0.1982723, 0.10607274, -0.25540838, 0.09633669)); + conv2d_4_tf += mul(na2, min16float4x4(-0.25680968, -0.18444876, 0.053333476, 0.10470261, 0.17798793, -0.108659215, 0.1787569, -0.027407814, 0.12637395, -0.038193744, -0.16185284, 0.14068736, 0.092281684, 0.022276353, 0.013779975, 0.026369803)); + conv2d_4_tf += mul(nb2, min16float4x4(-0.17329752, 0.21632285, -0.036964342, 0.30856085, 0.015225849, 0.04158692, -0.010607313, 0.16295516, 0.18873654, 0.24728407, 0.09787, -0.14381099, -0.091119304, 0.12914585, -0.039659716, -0.10700463)); + conv2d_4_tf += mul(nc2, min16float4x4(-0.037163302, 0.05201725, -0.149489, -0.05682234, -0.022634465, -0.074764505, -0.010783339, 0.028970495, -0.045976285, -0.1923207, -0.037494432, -0.13024884, -0.1957353, 0.013454359, -0.30236122, -0.078870796)); + conv2d_4_tf += mul(nd2, min16float4x4(-0.17753989, -0.1549664, 0.08087595, 0.046868976, -0.09354348, 0.22648604, 0.002651186, 0.11890617, -0.0073132347, 0.05030891, -0.08128038, 0.14395374, -0.001108739, -0.030957213, -0.03568773, 0.055131156)); + conv2d_4_tf += mul(ne2, min16float4x4(-0.029484594, -0.013036961, -0.31721568, 0.11611545, -0.24111903, -0.33007705, 0.5950326, -0.070911475, -0.04757172, -0.037676062, -0.14590797, 0.076822214, -0.1672743, -0.41848892, 0.39202756, -0.30958134)); + conv2d_4_tf += mul(nf2, min16float4x4(0.17605461, 0.12216047, -0.02412872, -0.14132546, -0.052373543, 0.08169531, 0.18497281, 0.074685514, -0.055427983, 0.14018987, -0.11671619, 0.108945735, -0.032986425, 0.11385016, 0.05801377, -0.1457665)); + conv2d_4_tf += mul(ng2, min16float4x4(-0.27222672, -0.0074164676, 0.35768685, 0.0074552484, 0.16729778, 0.14860032, -0.3657366, 0.24510175, -0.0621289, -0.0137252435, -0.26145887, 0.0556681, -0.07332952, 0.13122542, -0.020396946, 0.113705456)); + conv2d_4_tf += mul(nh2, min16float4x4(0.08118381, -0.06442098, 0.00044297878, 0.13279027, -0.20708169, 0.11252618, -0.033728387, -0.0105973175, -0.2138218, 0.34612998, -0.15597765, 0.18179017, -0.007853463, -0.045547944, 0.22064093, 0.0548327)); + conv2d_4_tf += mul(ni2, min16float4x4(-0.10656318, -0.014200068, 0.062040597, -0.037210476, -0.07271065, -0.027337732, -0.14988437, -0.14711551, -0.028843492, -0.0046596485, -0.15023676, 0.08530336, -0.016875269, -0.024734195, 0.055177588, 0.010381644)); + conv2d_4_tf += min16float4(-0.021330277, -0.09496422, -0.1339419, 0.012216251); + tex6[gxy] = conv2d_4_tf; + min16float4 nconv2d_4_tf = max(-conv2d_4_tf, 0); + conv2d_4_tf = max(conv2d_4_tf, 0); + + min16float4 target = mul(e1, min16float4x4(-0.4756803, -0.16041027, 0.30747655, 0.27719444, 0.33626345, -0.093426555, -0.08751585, -0.025898175, 0.12469858, 0.162526, 0.071950376, 0.36727026, -0.26165214, 0.17652564, -0.081568465, 0.17669047)); + target += mul(e2, min16float4x4(0.10045615, -0.47277164, 0.13970673, -0.036603283, 0.10723418, -0.0733819, 0.07046736, 0.04479655, -0.5100679, 0.4051206, -0.3043826, 0.07709692, 0.25090587, -0.5827475, 0.27195984, 0.42297873)); + target += mul(ne1, min16float4x4(-0.34415862, -0.056642354, -0.32332316, 0.049897127, 0.08399151, 0.683046, -0.16349371, -0.4878456, -0.097749546, 0.7214421, -0.2821467, -0.16691755, 0.3712332, -0.71557045, 0.40365914, 0.37325174)); + target += mul(ne2, min16float4x4(-0.333854, 0.11971563, -0.26533902, -0.033346854, 0.09896302, -0.19311592, -0.006087015, -0.104003794, 0.05347405, -0.16057043, 0.15876219, 0.1538847, -0.07954591, 0.24062383, -0.025401022, -0.33599105)); + target += mul(conv2d_5_tf, min16float4x4(0.11794056, -0.0031797416, 0.08360105, 0.12222232, -0.16638078, 0.26014742, -0.047267277, -0.27900735, 0.17616066, -0.12788172, 0.22856903, -0.39034957, -0.36313176, 0.12272574, 0.2235959, -0.31102005)); + target += mul(nconv2d_5_tf, min16float4x4(0.03297161, 0.19597028, -0.068131894, -0.059938233, 0.18935929, -0.12004069, 0.08705267, 0.26411813, -0.021374375, 0.24630849, -0.08980925, 0.15982057, 0.3533297, -0.15414584, -0.19008748, 0.11310849)); + target += mul(conv2d_1_tf, min16float4x4(-0.4622819, 0.31923467, -0.38989246, 0.5539857, -0.035433546, -0.12729715, -0.0669769, -0.048216928, -0.32078394, 0.26958883, 0.08897814, -0.31043166, 0.26743132, 0.38835636, -0.30535862, -0.22241123)); + target += mul(nconv2d_1_tf, min16float4x4(0.47431698, -0.755935, -0.075302646, 0.27771655, 0.052087527, -0.17221431, 0.0008429987, 0.15527548, -0.04587466, -0.11802989, 0.39905685, -0.07758683, -0.11415051, 0.004637339, -0.19803126, 0.19956517)); + target += mul(conv2d_4_tf, min16float4x4(0.36277947, -0.13364364, 0.18459712, -0.1705512, -0.46083033, 0.43629453, 0.112646095, -0.18511245, 0.037818372, 0.1220617, -0.22268273, -0.11983507, -0.5432721, -0.2102279, -0.014456884, 0.16428374)); + target += mul(nconv2d_4_tf, min16float4x4(0.22811654, 0.16262956, 0.18411161, 0.49102694, -0.15078211, -0.6144134, -0.11632199, 0.2740543, -0.11322067, -0.16751853, 0.18453367, 0.14305107, 0.36418238, -0.34248996, -0.055178564, 0.37168074)); + target += min16float4(0.07878663, -0.045328207, -0.07142425, -0.006036755); + tex1[gxy] = target; + + target = mul(e1, min16float4x4(-0.35645446, -0.01804877, -0.53608185, 0.32968932, 0.13975728, -0.1716116, 0.09503091, -0.12088551, 0.30239868, 0.9217966, 0.016221086, -0.26894137, -0.0047026747, 0.54764843, -0.2826915, 0.0016894634)); + target += mul(e2, min16float4x4(-0.15123259, 0.2014175, 0.05961645, -0.32386652, -0.25275725, 0.3658508, -0.104193784, -0.02756655, 0.2696138, 0.17608197, 0.17685752, 0.6808081, -0.40293297, 0.48387393, 0.25278264, 0.28291366)); + target += mul(ne1, min16float4x4(-0.18928573, -0.18908137, 0.47045723, 0.5454373, 0.31339395, -0.0064702537, -0.37307036, -0.37479213, 0.2235379, -0.370863, 0.02827034, 0.024350066, -0.32538193, -0.33686417, 0.8949382, 0.3324315)); + target += mul(ne2, min16float4x4(-0.17215039, -0.14995, -0.4451278, 0.30758965, 0.21607, 0.08995007, 0.09553425, -0.21233945, -0.14442022, 0.09295349, -0.29228872, -0.3875935, 0.11704046, -0.4206096, 0.35226774, -0.08189522)); + target += mul(conv2d_5_tf, min16float4x4(-0.12517966, 0.060051568, -0.38888076, 0.08354471, 0.17010468, -0.34286287, -0.06961373, 0.032387406, -0.025718998, -0.1661844, -0.075671494, 0.10289619, -0.28309906, -0.14461538, 0.22726184, 0.4752376)); + target += mul(nconv2d_5_tf, min16float4x4(0.15411675, 0.17533994, 0.3406641, -0.0597274, -0.21072194, 0.1517182, 0.032032263, 0.18653658, 0.20970167, -0.10793765, -0.05335404, -0.095203936, 0.2917104, -0.1170929, -0.11652503, -0.46912733)); + target += mul(conv2d_1_tf, min16float4x4(-0.272871, 0.07467413, 0.16981912, 0.57318956, 0.35038894, -0.06679483, 0.3777534, -0.01522816, 0.2588504, -0.008976239, 0.31769443, 0.07070477, 0.059302222, 0.28855336, -0.14700443, -0.08605704)); + target += mul(nconv2d_1_tf, min16float4x4(-0.27067363, -0.2191635, -0.2377148, -1.0028448, -0.25673935, 0.10997322, -0.39032057, 0.06524818, 0.5248202, 0.40049195, 0.6711809, 0.2878331, 0.19606547, -0.092196286, 0.27838528, 0.03120515)); + target += mul(conv2d_4_tf, min16float4x4(0.3029178, -0.027027214, 0.13855064, -0.16550988, 0.2354576, -0.1715326, 0.12981784, 0.5013446, 0.24411377, -0.13030572, -0.08595908, -0.104394995, 0.16794646, -0.044388745, 0.2807999, 0.39108425)); + target += mul(nconv2d_4_tf, min16float4x4(-0.05535261, -0.15662162, 0.14935054, 0.10706811, 0.026958441, -0.15323113, -0.19261432, -0.24361719, -0.2607876, 0.038486157, -0.04509224, 0.18722118, -0.14478058, 0.03614682, -0.12608361, -0.5203596)); + target += min16float4(-0.17363991, 0.071162574, -0.09289675, 0.013446863); + tex2[gxy] = target; +} + + +//!PASS 4 +//!DESC Conv-4x3x3x16, Conv-4x1x1x48 +//!IN tex1, tex2, tex3, tex6 +//!OUT tex7, tex4, tex5 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass4(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + min16float4 a1 = tex1.SampleLevel(sam, pos - inputPt, 0); + min16float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e1 = tex1.SampleLevel(sam, pos, 0); + min16float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i1 = tex1.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na1 = max(-a1, 0); + min16float4 nb1 = max(-b1, 0); + min16float4 nc1 = max(-c1, 0); + min16float4 nd1 = max(-d1, 0); + min16float4 ne1 = max(-e1, 0); + min16float4 nf1 = max(-f1, 0); + min16float4 ng1 = max(-g1, 0); + min16float4 nh1 = max(-h1, 0); + min16float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + min16float4 a2 = tex2.SampleLevel(sam, pos - inputPt, 0); + min16float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e2 = tex2.SampleLevel(sam, pos, 0); + min16float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i2 = tex2.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na2 = max(-a2, 0); + min16float4 nb2 = max(-b2, 0); + min16float4 nc2 = max(-c2, 0); + min16float4 nd2 = max(-d2, 0); + min16float4 ne2 = max(-e2, 0); + min16float4 nf2 = max(-f2, 0); + min16float4 ng2 = max(-g2, 0); + min16float4 nh2 = max(-h2, 0); + min16float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + min16float4 conv2d_1_tf = tex3.SampleLevel(sam, pos, 0); + min16float4 nconv2d_1_tf = max(-conv2d_1_tf, 0); + conv2d_1_tf = max(conv2d_1_tf, 0); + + min16float4 conv2d_4_tf = tex6.SampleLevel(sam, pos, 0); + min16float4 nconv2d_4_tf = max(-conv2d_4_tf, 0); + conv2d_4_tf = max(conv2d_4_tf, 0); + + min16float4 conv2d_8_tf = mul(a1, min16float4x4(-0.162897, -0.21250516, -0.11219427, 0.30969706, 0.078927204, -0.14922144, 0.5486932, 0.2884913, 0.07018745, 0.45946357, -0.23759702, -0.18914284, 0.19762751, 0.56881535, -0.2141465, 0.27216902)); + conv2d_8_tf += mul(b1, min16float4x4(-0.17507325, -0.577772, -0.46351492, 0.09431303, 0.023881523, -0.068162896, -0.0029204858, -0.076631226, -0.07645065, 0.28997856, -0.0349899, 0.087704636, 0.29194608, 0.7767595, 0.17478088, -0.295144)); + conv2d_8_tf += mul(c1, min16float4x4(0.23039296, -0.000184939, -0.032427344, 0.0926983, -0.4264918, -0.44138262, 0.39098918, -0.0102598835, 0.066287994, 0.15478721, -0.062338993, 0.15079321, 0.120016515, 0.7005824, -0.12260436, 0.090042405)); + conv2d_8_tf += mul(d1, min16float4x4(0.014269367, 0.21645544, -0.4203915, 0.0077638677, -0.18618487, 0.30165052, 0.6985895, -0.014695781, -0.071353786, -0.49996287, -0.79902583, -0.06343025, 0.291085, 0.28801495, 0.46547806, 0.1311194)); + conv2d_8_tf += mul(e1, min16float4x4(0.17163453, 0.21760814, -0.67896426, 0.1487859, 0.05881719, -0.08391752, 0.44536906, 0.24853623, -0.7685656, 0.45705163, -1.0204223, 0.1884743, -0.3251896, -0.49221343, 0.38116506, -0.09428967)); + conv2d_8_tf += mul(f1, min16float4x4(0.2759429, 0.22141403, -0.13269989, 0.06833041, -0.29562923, -0.26589182, -0.34452415, 0.36388424, -0.3184807, 0.08254464, -0.15477169, 0.03237491, -0.34190834, -0.2777646, -0.15713428, -0.22231084)); + conv2d_8_tf += mul(g1, min16float4x4(-0.36887082, -0.34607458, 0.5719879, 0.09200919, 0.12724651, 0.20281908, 0.39280674, 0.09216231, 0.3126475, -0.0075341803, -0.046779484, 0.10883631, 0.20136468, 0.70330596, -0.024237871, -0.061087623)); + conv2d_8_tf += mul(h1, min16float4x4(-0.08114617, -0.02456657, 0.1287709, 0.5486885, -0.10143632, -0.39116892, 0.24008204, 5.8133483e-05, -0.36640543, -0.52113515, 0.3836287, 0.09541327, 0.01115865, -0.2044513, 0.07734024, -0.29509112)); + conv2d_8_tf += mul(i1, min16float4x4(0.25962162, -0.05327207, -0.28945914, 0.320823, 0.049143463, 0.011241379, 0.004193257, 0.3872085, -0.47137228, -0.44385332, -0.2591483, -0.20189615, 0.08729277, -0.14813553, -0.29911307, 0.0067013856)); + conv2d_8_tf += mul(a2, min16float4x4(0.13526323, 0.6637005, 0.09491454, -0.038491655, -0.5078187, -0.5782128, -1.0748478, 0.18678135, 0.16064858, 0.0795754, 0.116070546, 0.06408978, 0.085641995, -0.39126787, -0.16669247, -0.024058852)); + conv2d_8_tf += mul(b2, min16float4x4(-0.30658495, -0.08933112, 0.38358685, -0.048228927, 0.050148476, -0.08750905, -0.5015779, 0.4012965, -0.068299964, 0.08993712, 0.03617703, -0.030809006, 0.05144756, -0.7659615, -0.33359128, 0.0054376507)); + conv2d_8_tf += mul(c2, min16float4x4(-0.24894494, 0.08617524, -0.095747314, 0.14710969, -0.09528072, 0.19016005, 0.060339417, -0.059556015, 0.01127292, -0.021082405, 0.5204205, 0.23831797, -0.035384487, 0.001653611, -0.28902745, -0.0060615037)); + conv2d_8_tf += mul(d2, min16float4x4(0.2978602, 0.2580722, 0.11472323, -0.06937241, -0.45087403, -0.35747236, -0.38835877, 0.22520676, 0.09162963, 0.50932664, -0.41183934, -0.08526183, -0.043625794, -0.27782285, -0.4119391, -0.339948)); + conv2d_8_tf += mul(e2, min16float4x4(-0.005034612, 0.010024151, 0.55194247, -0.16040643, 0.0072234212, -0.047230296, 0.011222393, -0.017184192, 0.2156304, 0.02429907, 0.41669923, -0.06159069, -0.39241523, 0.009254305, 0.35784644, -0.45379582)); + conv2d_8_tf += mul(f2, min16float4x4(-0.18008694, -0.35366225, 0.12110043, -0.102665015, 0.2763678, -0.61502653, 0.3051717, -0.23991431, 0.6076138, -1.142571, 1.2579885, 0.15013893, -0.13282573, -0.16185799, -0.26278257, 0.044563264)); + conv2d_8_tf += mul(g2, min16float4x4(-0.043284204, -0.1374118, -0.6523209, -0.1682561, -0.002918912, 0.2768846, 0.045174655, -0.046218265, 0.10686049, -0.004872297, 0.04118156, -0.07015327, -0.3329307, 0.19972506, -0.38307762, 0.11627049)); + conv2d_8_tf += mul(h2, min16float4x4(0.09306764, -0.5036807, -0.25358048, -0.033543527, 0.07199686, -0.28982875, -0.022885432, -0.078454, -0.0836088, -0.08261633, 0.38759607, 0.021209864, 0.09516953, -0.1896164, -0.12284774, 0.16532375)); + conv2d_8_tf += mul(i2, min16float4x4(0.27196047, -0.6199637, 0.12209493, -0.0055379267, -0.08997175, -0.0025996822, -0.20710677, 0.15223576, -0.07073166, -0.20732503, -0.044538528, 0.35751408, 0.33849528, -0.14603287, 0.19472563, 0.20992133)); + conv2d_8_tf += mul(na1, min16float4x4(0.018979501, 0.030001618, 0.09530055, -0.22441792, -0.11513775, -0.05383842, 0.042144198, -0.2824055, 0.20338169, 0.9622458, -0.20780474, 0.5217952, 0.11518432, 0.24126045, -0.046675194, -0.07326568)); + conv2d_8_tf += mul(nb1, min16float4x4(-0.13768856, 0.17157272, -0.32123035, -0.08968111, 0.011915078, -0.08129057, -0.71480066, 0.24239756, 0.5093838, 0.29058817, -0.07181868, -0.22533971, 0.24244072, -0.2716092, 0.03331018, 0.008624937)); + conv2d_8_tf += mul(nc1, min16float4x4(0.21304299, 0.5180637, 0.40324917, -0.078679435, 0.17033757, -0.4813804, -0.47702515, -0.017285354, -0.054009005, -0.5853617, -0.5427995, 0.13533083, 0.12440328, -0.6455633, 0.0012186684, 0.031838413)); + conv2d_8_tf += mul(nd1, min16float4x4(0.04057183, -0.27768528, -0.07563423, 0.13400203, -0.03429928, -0.32794374, -0.085426375, -0.3724642, -0.19195397, 0.1349262, -0.2909766, -0.43096116, 0.056601644, 0.5106557, -0.267059, -0.046354882)); + conv2d_8_tf += mul(ne1, min16float4x4(0.14343774, -0.29267886, -0.2406526, -0.30307195, -0.10270894, 0.008828463, -1.5378821, 0.017785087, 0.48302534, -0.310974, 1.5381073, 0.08598342, 0.82111055, -0.0049781636, 0.4820726, 0.301231)); + conv2d_8_tf += mul(nf1, min16float4x4(0.012052944, -0.090234, 0.52199095, -0.3329521, 0.110252894, 0.2897882, -0.37447298, 0.17326026, 0.18148576, -0.23976558, 0.1848407, 0.5042414, 0.33321953, 0.2712571, 0.18124644, 0.20849751)); + conv2d_8_tf += mul(ng1, min16float4x4(0.066107936, 0.035174694, -0.1587501, -0.22672103, 0.012212267, -0.05451626, -0.6004301, 0.013387352, -0.04113352, 0.53583735, -0.15342614, -0.0018758774, 0.09947345, -0.18213694, 0.02965846, -0.044368513)); + conv2d_8_tf += mul(nh1, min16float4x4(0.099831305, 0.2666737, -0.12301129, -0.113591194, 0.018106552, 0.290373, 0.1480011, 0.032558106, 0.0024403003, 0.11745559, 0.7669008, -0.18195944, 0.21291047, 0.49549788, -0.04361018, 0.6138144)); + conv2d_8_tf += mul(ni1, min16float4x4(-0.24335642, -0.023037815, -0.22853605, -0.49450716, 0.04834612, 0.040727314, 0.36239302, -0.076259434, -0.08173315, 0.14689375, 0.3357786, 0.34003472, -0.11701219, -0.35594055, 0.55640507, 0.3573448)); + conv2d_8_tf += mul(na2, min16float4x4(0.039600838, -0.08580259, -0.25375724, -0.41294497, 0.052295998, 0.34286344, 0.23627926, 0.08080187, 0.0015981429, 0.37459275, -0.11763548, 0.027264152, 0.11372706, 0.34742436, 0.30963847, -0.2995273)); + conv2d_8_tf += mul(nb2, min16float4x4(-0.035936117, -0.42153218, -0.40176156, 0.20363232, 0.22382015, 0.48679677, 0.07365761, -0.20890754, 0.22791456, -0.28418672, -0.17189962, 0.0968373, -0.70834696, -0.41918173, -0.13482817, 0.037949625)); + conv2d_8_tf += mul(nc2, min16float4x4(0.11910686, 0.0473921, 0.37869528, 0.17928337, 0.17311068, 0.21572089, 0.34996882, -0.26002827, -0.014036688, -0.6574892, -0.14409806, -0.06467717, -0.33688435, -0.18185017, 0.04036214, 0.06086553)); + conv2d_8_tf += mul(nd2, min16float4x4(-0.15657301, -0.05661294, -0.36640826, -0.13215317, 0.060342815, 0.19098124, 0.18715985, -0.40765548, 0.090474375, -0.07720432, -0.016231487, 0.0885778, 0.0272616, 0.22065723, 0.1691866, -0.19491237)); + conv2d_8_tf += mul(ne2, min16float4x4(-0.13054666, 0.3278881, -1.3170725, -0.4575742, -0.061401486, 0.15868792, 0.2789515, 0.13829961, 0.09607008, -0.7175924, 0.01804374, 0.41284522, 0.044577077, 0.04847126, -0.25607756, -0.02249741)); + conv2d_8_tf += mul(nf2, min16float4x4(0.22145797, 0.8114419, -0.17527157, 0.09274125, -0.25224185, 0.2955128, -0.37553602, -0.17377761, -0.9684024, 0.42457148, -0.64265996, 0.10394252, -0.11231096, 0.064703405, 0.42858216, -0.21214609)); + conv2d_8_tf += mul(ng2, min16float4x4(0.1910386, -0.0065560606, 0.18119961, -0.026436953, 0.07887997, 0.15127628, -0.11523928, -0.0679343, 0.031198656, 0.16947536, 0.05943052, 0.060350783, 0.32215032, -0.1347014, 0.017390233, -0.06527528)); + conv2d_8_tf += mul(nh2, min16float4x4(-0.19811153, -0.033103824, 0.0053317053, 0.008003428, -0.020805335, 0.17872533, -0.3161484, -0.11559199, -0.24902378, -0.2596549, 0.034520704, -0.006125487, 0.13173361, -0.10967251, -0.7860965, -0.035326626)); + conv2d_8_tf += mul(ni2, min16float4x4(-0.124631934, 0.21335506, 0.375809, -0.13598146, 0.047685858, 0.14553228, -0.068173625, -0.117949426, 0.07296198, 0.08935096, -0.26368606, 0.29653412, -0.27378097, 0.060699224, -0.09753418, -0.08484599)); + conv2d_8_tf += min16float4(-0.009278051, 0.62221414, 0.22868732, 0.14880095); + min16float4 nconv2d_8_tf = max(-conv2d_8_tf, 0); + conv2d_8_tf = max(conv2d_8_tf, 0); + + min16float4 conv2d_7_tf = mul(a1, min16float4x4(0.018128054, -0.14104486, -0.027475944, 0.22669935, -2.7264505e-05, 0.14775783, 0.13441783, 0.11450963, -0.09942102, 0.29735768, 0.04839269, -0.14066552, -0.024448555, 0.3104163, -0.03636913, 0.002947356)); + conv2d_7_tf += mul(b1, min16float4x4(-0.20438337, 0.35419708, 0.037506625, 0.100693576, -0.074241616, -0.15304284, 0.0054191337, -0.12816934, 0.028913809, -0.098240785, 0.5653599, -0.38662913, 0.018716848, 0.0021957273, 0.061397206, -0.111899704)); + conv2d_7_tf += mul(c1, min16float4x4(-0.18681246, -0.23609419, 0.21475013, 0.051762715, 0.04889926, -0.033886652, 0.26262638, -0.27322114, 0.049140245, 0.3380464, -0.13617653, -0.05796957, 0.080669545, 0.21348572, -0.10067047, -0.0016244814)); + conv2d_7_tf += mul(d1, min16float4x4(0.025566151, -0.027286734, -0.10856872, 0.108885765, -0.07635088, 0.13037659, 0.2892404, -0.2160093, -0.30649704, 0.34650138, -0.021391464, 0.08717436, -0.02000013, 0.027722841, 0.43060175, -0.04844848)); + conv2d_7_tf += mul(e1, min16float4x4(0.09925131, -0.11167345, -0.14262813, -0.21267861, -0.15972298, -0.1823657, -0.073309824, 0.15542479, 0.005081145, -0.40594074, 0.24862696, 0.19943975, -0.36283687, -0.38990027, 0.4759463, 0.45561194)); + conv2d_7_tf += mul(f1, min16float4x4(-0.13126811, 0.24284562, 0.06109369, -0.15402594, 0.016967572, -0.08234942, -0.053873185, 0.026438333, 0.13412815, -0.10839792, -0.345438, 0.0720746, 0.21260333, -0.15989558, -0.012461376, 0.20363508)); + conv2d_7_tf += mul(g1, min16float4x4(0.09231617, 0.17787862, 0.22783166, 0.09095521, -0.0935426, -0.22921127, 0.2591894, -0.19451278, -0.0046325484, -0.60839254, 0.061737422, -0.024267042, -0.04048761, 0.2450175, 0.14390652, 0.07999217)); + conv2d_7_tf += mul(h1, min16float4x4(-0.09204067, -0.05434134, 0.32136026, -0.053413626, 0.044170942, 0.10284346, 0.10827547, -0.03207593, -0.036979157, -0.37019014, -0.07072617, 0.07745549, 0.026007036, 0.13402742, 0.22873925, -0.09879518)); + conv2d_7_tf += mul(i1, min16float4x4(-0.039409183, -0.15304323, 0.110744946, 0.04479048, 0.073402554, -0.31955537, 0.13518381, 0.09020946, 0.21437532, -0.08866372, 0.062359575, -0.08147204, -0.012339588, 0.038986444, -0.059496317, 0.04353628)); + conv2d_7_tf += mul(a2, min16float4x4(-0.029447578, 0.18052183, 0.026130654, -0.18024941, -0.2357611, 0.92272073, -0.40873498, 0.3829195, -0.049990416, -0.2626007, 0.07313907, -0.20231684, 0.23846717, 0.06304234, -0.072538964, 0.34895507)); + conv2d_7_tf += mul(b2, min16float4x4(-0.21427542, 0.33398184, 0.19135003, -0.079177245, -0.047564022, 0.25006044, 0.19287021, -0.07119212, -0.0064072064, 0.14020945, -0.15136649, -0.04587045, -0.113710366, 0.05126853, -0.084781885, 0.1418395)); + conv2d_7_tf += mul(c2, min16float4x4(0.04655672, -0.010115347, 0.18253572, 0.017085062, -0.04543099, 0.08404545, 0.07929449, 0.17069206, -0.045596916, 0.12133366, 0.12615037, -0.11942128, -0.07431312, -0.0975234, 0.17188828, -0.021951154)); + conv2d_7_tf += mul(d2, min16float4x4(0.013333504, -0.22424631, -0.25461286, -0.09366057, -0.24168679, -0.1413706, -0.084172204, 0.1557298, 0.023721283, 0.18159337, -0.029377997, -0.12690134, -0.07779016, 0.49728185, 0.060146395, 0.17318316)); + conv2d_7_tf += mul(e2, min16float4x4(0.08302447, 0.86936367, -0.17584775, -0.2508983, 0.16770333, 0.106514744, 0.056097895, -0.1516464, -0.04237734, 0.3350473, 0.08797126, 0.053822745, 0.36157215, -0.04365805, -0.20060433, -0.23983552)); + conv2d_7_tf += mul(f2, min16float4x4(0.09215062, 0.0729301, 0.2564446, -0.09456067, -0.04279617, 0.009632537, -0.067693666, 0.07115211, -0.58410543, 0.7954688, -0.6856004, -0.0039867237, 0.05259691, -0.19899113, 0.34015554, -0.1301164)); + conv2d_7_tf += mul(g2, min16float4x4(-0.08229732, 0.22852908, -0.17944984, -0.053203765, 0.01401186, -0.01731911, -0.017196467, 0.017660033, -0.06473575, 0.11841842, -0.09651762, 0.08812678, 0.15789783, 0.41068667, -0.17433365, 0.112683386)); + conv2d_7_tf += mul(h2, min16float4x4(0.19192256, -0.048173536, -0.27452058, -0.086614236, 0.03459962, -0.076093, -0.13129567, 0.10529364, -0.003243667, -0.11558274, 0.15014142, -0.11415493, -0.058378108, -0.23308878, 0.016655494, -0.06092205)); + conv2d_7_tf += mul(i2, min16float4x4(0.053656723, -0.2520498, -0.06450468, 0.14063323, -0.07785553, 0.06996582, 0.043691944, -0.09447727, -0.19854756, 0.08710172, 0.103271045, -0.20072943, -0.10393605, -0.19852036, -0.01656043, 0.19936512)); + conv2d_7_tf += mul(na1, min16float4x4(-0.043692272, -0.15573448, -0.07609012, -0.25906095, 0.042468645, 0.06499704, 0.021691361, -0.14418614, 0.007778065, -0.04098781, 0.16854198, 0.1880123, -0.0024735837, -0.38171276, 0.29813913, -0.13975172)); + conv2d_7_tf += mul(nb1, min16float4x4(0.0786739, -0.13743922, -0.16762766, 0.0551441, -0.16237186, 0.47069517, -0.16434868, 0.38760075, 0.29262593, 0.21078295, 0.1564407, -0.19921672, -0.07819381, 0.045407712, 0.25388238, 0.12049804)); + conv2d_7_tf += mul(nc1, min16float4x4(0.13686253, 0.15139718, -0.14193471, -0.037212268, 0.017021572, -0.13029522, -0.07875422, 0.22883393, -0.117323294, -0.11999564, 0.074406326, 0.029792523, 0.071242705, 0.04940517, 0.27540857, 0.094216466)); + conv2d_7_tf += mul(nd1, min16float4x4(0.05651692, -0.09319446, -0.15223487, -0.16004439, 0.09602424, 0.114855476, 0.13851804, 0.11632249, -0.15697844, -0.03465572, -0.6334014, 0.0043645306, -0.13810518, -0.24692737, -0.13962403, -0.17288178)); + conv2d_7_tf += mul(ne1, min16float4x4(-0.1125169, 0.2582768, 0.14571975, 0.3412717, 0.046649273, 0.053606547, -0.5402628, -0.14801335, -0.12299524, 0.79026186, -0.3587726, -0.040698707, 0.18239951, 0.18461016, -0.13213885, -0.6929199)); + conv2d_7_tf += mul(nf1, min16float4x4(-0.009360833, 0.22758053, -0.334423, 0.35250792, 0.05025162, -0.1640276, 0.21909785, -0.12123492, -0.33830088, -0.26451996, 0.09280175, -0.18673559, -0.20446195, 0.13918248, 0.09164517, -0.20213476)); + conv2d_7_tf += mul(ng1, min16float4x4(-0.03443797, -0.25032473, -0.0018426777, -0.065064386, 0.03455914, 0.022166712, -0.2954429, 0.012212829, -0.0223488, 0.1161553, -0.106024936, 0.028343895, 0.15230536, -0.5538007, -0.24089493, 0.06740007)); + conv2d_7_tf += mul(nh1, min16float4x4(0.09501347, -0.0845406, -0.13952151, 0.031915456, 0.05118853, -0.25089842, -0.113984115, 0.08745874, 0.14493734, 0.17449388, 0.037183553, 0.060414817, 0.045083977, -0.50209135, -0.25451177, 0.23309624)); + conv2d_7_tf += mul(ni1, min16float4x4(0.08991499, 0.14019197, -0.12056033, -0.05024532, -0.07585356, 0.073596515, 0.017992107, -0.0009288775, -0.17292187, 0.07525249, 0.14620323, -0.058494095, 0.09669742, -0.28342497, 0.10102461, 0.0075472025)); + conv2d_7_tf += mul(na2, min16float4x4(-0.059322756, 0.07296391, -0.22688308, 0.17183779, 0.0921908, -0.18311407, -0.10553935, -0.2998603, -0.05373476, -0.08882287, 0.009316159, -0.09303765, 0.08415284, -0.044707574, 0.07481887, 0.06931905)); + conv2d_7_tf += mul(nb2, min16float4x4(-0.26374707, 0.17429374, -0.54841083, 0.23039351, 0.1550329, -0.0991982, -0.07031106, -0.23306605, -0.076208115, 0.058818877, 0.48602778, -0.116065495, 0.13632986, 0.5399192, -0.088733315, -0.04031161)); + conv2d_7_tf += mul(nc2, min16float4x4(-0.118198454, -0.04607605, -0.10619185, 0.034395956, 0.0023600461, 0.1470174, -0.21100855, -0.024570175, -0.0016899678, 0.1612513, -0.03985272, 0.01355469, 0.30949214, -0.056687307, 0.1295898, 0.031099077)); + conv2d_7_tf += mul(nd2, min16float4x4(-0.37869355, 0.06961967, 0.2779311, 0.3090361, 0.23564096, -0.014765556, -0.097406775, -0.08233581, -0.05444356, -0.056364074, -0.13940345, -0.1710778, 0.053456437, -0.5668305, -0.21371025, -0.11354647)); + conv2d_7_tf += mul(ne2, min16float4x4(-0.2009931, -0.46823156, 0.04674297, -0.33720648, -0.48212242, -0.022402052, 0.4083246, 0.3498801, -0.12801081, 0.080993176, 0.12559398, 0.30281347, -0.36876208, -0.19425368, 0.040795308, 0.4358033)); + conv2d_7_tf += mul(nf2, min16float4x4(-0.008429336, -0.007929484, -0.21348138, 0.19799937, -0.0032136212, -0.037011284, 0.060586747, -0.012355498, 0.37488303, -0.626778, 0.45391387, -0.030982537, 0.26613617, -0.027296683, -0.094556324, 0.03054091)); + conv2d_7_tf += mul(ng2, min16float4x4(-0.0032568173, -0.3056237, 0.0007252052, 0.052250773, -0.05099108, 0.23182255, -0.044636346, 0.08786388, -0.12470104, -0.16238213, 0.16018245, -0.11313074, -0.044513255, -0.2792024, 0.13793966, -0.20955163)); + conv2d_7_tf += mul(nh2, min16float4x4(-0.14750522, -0.022307748, -0.15649515, 0.15537989, -0.061475005, 0.19822353, 0.0671258, -0.06628393, -0.04068137, 0.22010179, 0.12955783, -0.0517817, 0.02655539, 0.17269138, -0.1296634, 0.030146338)); + conv2d_7_tf += mul(ni2, min16float4x4(0.061146796, 0.31339607, 0.034430694, 0.10376425, 0.03029668, -0.0401898, -0.1825413, 0.06257798, 0.08390942, -0.31551626, 0.010347497, -0.0031549276, 0.21435012, -0.13221692, -0.021980911, -0.1482502)); + conv2d_7_tf += min16float4(0.039428633, 0.032666046, 0.16482623, -0.016402772); + tex7[gxy] = conv2d_7_tf; + min16float4 nconv2d_7_tf = max(-conv2d_7_tf, 0); + conv2d_7_tf = max(conv2d_7_tf, 0); + + min16float4 target = mul(e1, min16float4x4(0.13591515, 0.21395922, 0.040862843, 0.3054825, -0.088837944, -0.6928339, -0.15643471, 0.13081591, 0.07604966, 0.37446347, -0.34723157, -0.17870799, -0.2037286, -0.106576756, 0.25523958, -0.13762575)); + target += mul(e2, min16float4x4(0.21503459, 0.0373132, -0.008046219, -0.18440363, -0.09729587, 0.043958187, 0.23459528, -0.044009138, 0.1686642, -0.1615934, -0.13173419, -0.079085656, -0.07647595, -0.37286422, -0.06148421, 0.015342882)); + target += mul(ne1, min16float4x4(-0.14785692, -0.2707874, -0.017647093, -0.2908642, 0.5612585, 0.4271698, -0.48191005, 0.11905855, -0.21741737, -0.2821245, 0.29278705, -0.20538986, 0.03150152, 0.03138199, 0.10423793, -0.045527548)); + target += mul(ne2, min16float4x4(0.31277063, 0.07915742, -0.34087706, 0.39680582, -0.022496004, -0.33672526, -0.111507386, 0.025953399, -0.15757395, 0.11465282, 0.28329894, 0.12420795, -0.36261007, 0.46334505, 0.30303243, -0.03249052)); + target += mul(conv2d_8_tf, min16float4x4(0.57927984, 0.06878386, -0.24236098, 0.31338137, 0.10464923, -0.07153124, 0.13588428, -0.02373762, -0.19124955, -0.1138502, 0.17388438, 0.01707623, -0.24228282, 0.04736911, 0.6398566, -0.32334659)); + target += mul(nconv2d_8_tf, min16float4x4(-0.54402775, -0.24674532, 0.11212342, -0.09593871, -0.17339998, 0.1323692, -0.1680261, 0.025882099, -0.19121705, 0.1832492, -0.08548955, -0.14068407, 0.13255714, 0.10409962, -0.01394588, 0.22216345)); + target += mul(conv2d_1_tf, min16float4x4(0.2702694, -0.56255573, -0.5357781, 0.05541389, 0.070275396, -0.08012564, -0.13473864, -0.113696516, 0.06642909, 0.23810093, 0.0728827, -0.17656006, 0.48172018, -0.25749484, -0.1752313, 0.33768335)); + target += mul(nconv2d_1_tf, min16float4x4(0.46950498, 0.059317388, -0.09860531, -0.006304164, -0.4128484, -0.049649406, 0.2954393, -0.190237, -0.20938443, 0.034176145, 0.063109055, 0.07802573, -0.20652357, -0.23180202, -0.11936575, 0.2589604)); + target += mul(conv2d_4_tf, min16float4x4(0.3843954, -0.08686217, 0.18839231, 0.01876761, -0.03335079, -0.12043262, -0.42323095, -0.02321388, -0.22252762, -0.049455926, 0.2268798, 0.082169, 0.2473631, 0.23347862, 0.002254042, 0.2757807)); + target += mul(nconv2d_4_tf, min16float4x4(0.1020188, -0.037612554, -0.33062017, 0.1570476, 0.19851524, 0.35976177, -0.016449552, 0.22057539, 0.20401593, 0.07004227, -0.062413715, -0.10547836, 0.14671406, -0.3905135, -0.038352408, -0.28926837)); + target += mul(conv2d_7_tf, min16float4x4(0.4110517, 0.06280497, 0.16709873, -0.49500167, -0.10045096, -0.2238529, 0.012172345, 0.19666891, -0.16135901, 0.017100533, 0.35809904, 0.35188627, 0.20347194, -0.14602524, 0.71737736, 0.14195462)); + target += mul(nconv2d_7_tf, min16float4x4(-0.5236819, 0.4352016, -0.4066126, -0.04252335, 0.1086945, 0.145471, 0.21984594, -0.24670586, -0.07109616, -0.2711473, -0.89353126, -0.3953869, 0.17096898, 0.12978637, -0.42527854, -0.019720567)); + target += min16float4(-0.027689768, -0.16386859, -0.009289161, 0.09287236); + tex4[gxy] = target; + + target = mul(e1, min16float4x4(0.19380243, 0.020101497, 0.021015864, 0.40521726, 0.038862754, -0.3473658, 0.22289194, -0.2075226, -0.15960178, 0.20686232, -0.19066268, -0.24524036, -0.19289994, -0.6356018, 0.040245753, -0.22887161)); + target += mul(e2, min16float4x4(-0.06837712, -0.59243137, 0.08107887, -0.18099897, 0.08890105, -0.20113088, 0.0076543097, -0.28404838, -0.39403212, 0.124420464, 0.07661543, -0.16511264, 0.440653, 0.17841326, -0.40957427, -0.055862557)); + target += mul(ne1, min16float4x4(-0.052128255, -0.17906874, -0.0063690864, -0.3027001, -0.12118662, 0.5986499, -0.35075194, 0.11334461, -0.13089949, 0.48732534, 0.31238684, 0.0636065, 0.21470545, -0.12680373, 0.20702313, -0.14277203)); + target += mul(ne2, min16float4x4(-0.13521394, 0.5266374, -0.4765612, 0.32102558, -0.07704129, -0.26604977, 0.36475307, 0.27245706, 0.16729634, -0.04975267, 0.18763311, 0.07594951, -0.20137721, 0.07614109, -0.056586545, 0.35838535)); + target += mul(conv2d_8_tf, min16float4x4(0.22150421, -0.023909386, -0.30742592, 0.54860467, 0.038963366, -0.47929683, 0.001491465, -0.2016597, 0.14891255, -0.12298715, 0.12770613, 0.16882578, 0.52988553, -0.34417477, -0.11196754, 0.038432673)); + target += mul(nconv2d_8_tf, min16float4x4(0.10892675, 0.15687913, 0.4061297, -0.2549851, -0.12231971, 0.7066191, -0.038577385, 0.1871752, -0.23520122, 0.6384404, -0.04857454, -0.23879313, -0.26810166, -0.08090798, 0.3287431, 0.15214305)); + target += mul(conv2d_1_tf, min16float4x4(0.16076286, 0.08942198, 0.79264593, -0.5107746, -0.10051664, -0.18325275, 0.31161344, 0.023725776, 0.09911152, 0.1552438, -0.22447744, -0.2995641, 0.27984253, -1.107023, 0.010454479, 0.6606262)); + target += mul(nconv2d_1_tf, min16float4x4(0.041668475, 0.16935597, -0.11855577, 0.2013473, 0.2991738, -0.38238418, 0.17906274, -0.27559698, -0.4381387, 0.39814267, -0.40905684, 0.57992136, 0.2830281, 0.12482517, -0.30402762, 0.47808015)); + target += mul(conv2d_4_tf, min16float4x4(0.05201121, 0.3396993, -0.04965309, -0.25744373, -0.13495848, -0.120026626, 0.15645088, -0.20658544, 0.414069, -0.03110071, 0.070210315, 0.028046172, -0.17324251, 0.14329922, -0.14353131, 0.028436944)); + target += mul(nconv2d_4_tf, min16float4x4(-0.15607943, 0.98266315, -0.15506491, 0.34884667, -0.16584046, 0.07532187, 0.0062847883, 0.8719761, -0.30521882, -0.34961814, -0.055313803, 0.041199762, 0.2634066, 0.31106153, 0.029962108, -0.017541675)); + target += mul(conv2d_7_tf, min16float4x4(0.1285044, 0.41011113, 0.16163284, -0.40202442, 0.33554438, -0.2626098, 0.18437132, 0.06627138, 0.26390168, -0.23918642, -0.17191365, -0.16348109, 0.30074367, -0.99079835, 0.60264456, 0.050881945)); + target += mul(nconv2d_7_tf, min16float4x4(0.3971443, -0.034655187, 0.11870823, 0.39984652, -0.45068088, -0.054210827, -0.27554438, -0.16074227, -0.14983663, 0.35434055, 0.42479035, 0.07799301, -0.4260275, 0.66214204, -0.095251344, 0.09080398)); + target += min16float4(-0.012729538, -0.13335368, 0.14840336, 0.025965473); + tex5[gxy] = target; +} + + +//!PASS 5 +//!DESC Conv-4x3x3x16, Conv-4x1x1x56 +//!IN tex4, tex5, tex3, tex6, tex7 +//!OUT tex8, tex9, tex1, tex2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass5(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + min16float4 a1 = tex4.SampleLevel(sam, pos - inputPt, 0); + min16float4 b1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c1 = tex4.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d1 = tex4.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e1 = tex4.SampleLevel(sam, pos, 0); + min16float4 f1 = tex4.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h1 = tex4.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i1 = tex4.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na1 = max(-a1, 0); + min16float4 nb1 = max(-b1, 0); + min16float4 nc1 = max(-c1, 0); + min16float4 nd1 = max(-d1, 0); + min16float4 ne1 = max(-e1, 0); + min16float4 nf1 = max(-f1, 0); + min16float4 ng1 = max(-g1, 0); + min16float4 nh1 = max(-h1, 0); + min16float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + min16float4 a2 = tex5.SampleLevel(sam, pos - inputPt, 0); + min16float4 b2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c2 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d2 = tex5.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e2 = tex5.SampleLevel(sam, pos, 0); + min16float4 f2 = tex5.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h2 = tex5.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i2 = tex5.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na2 = max(-a2, 0); + min16float4 nb2 = max(-b2, 0); + min16float4 nc2 = max(-c2, 0); + min16float4 nd2 = max(-d2, 0); + min16float4 ne2 = max(-e2, 0); + min16float4 nf2 = max(-f2, 0); + min16float4 ng2 = max(-g2, 0); + min16float4 nh2 = max(-h2, 0); + min16float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + min16float4 conv2d_1_tf = tex3.SampleLevel(sam, pos, 0); + min16float4 nconv2d_1_tf = max(-conv2d_1_tf, 0); + conv2d_1_tf = max(conv2d_1_tf, 0); + + min16float4 conv2d_4_tf = tex6.SampleLevel(sam, pos, 0); + min16float4 nconv2d_4_tf = max(-conv2d_4_tf, 0); + conv2d_4_tf = max(conv2d_4_tf, 0); + + min16float4 conv2d_7_tf = tex7.SampleLevel(sam, pos, 0); + min16float4 nconv2d_7_tf = max(-conv2d_7_tf, 0); + conv2d_7_tf = max(conv2d_7_tf, 0); + + min16float4 conv2d_11_tf = mul(a1, min16float4x4(-0.22341304, 0.26908797, 0.04134543, 0.06961319, 0.32176727, 0.07702703, 0.03751845, -0.13761088, -0.09979559, 0.06891045, -0.01716057, -0.031486046, -0.016294012, 0.0262252, 0.012725462, -0.054174248)); + conv2d_11_tf += mul(b1, min16float4x4(0.0758998, 0.044578414, -0.058127478, -0.04941571, 0.1685694, 0.9547572, 0.3217995, 0.04913146, 0.08628588, -0.49687696, 0.05530926, -0.19010891, 0.0077229803, 0.3938303, 0.18076055, -0.048131783)); + conv2d_11_tf += mul(c1, min16float4x4(0.03656385, 0.23112705, 0.13059878, 0.16223684, -0.2766845, 0.053392846, 0.06446786, 0.19696166, -0.14884388, -0.23103243, -0.07006061, -0.021727445, 0.026394684, -0.31138313, -0.0976933, -0.062459927)); + conv2d_11_tf += mul(d1, min16float4x4(-0.36985022, -0.3396681, 0.035750575, 0.019713784, 0.10074354, -0.34114882, -0.01150834, -0.1436701, -0.36870074, -0.3272402, -0.03879516, -0.094077155, 0.016875539, 0.23895474, -0.14396004, -0.06785279)); + conv2d_11_tf += mul(e1, min16float4x4(0.057131216, -0.5966212, -0.13011967, -0.3684052, 0.6414469, 0.45823926, 0.043126952, -0.12702179, 0.029217511, 0.43957123, 0.06747733, 0.35508418, -0.13576074, 0.28117993, 0.1785782, 0.20060769)); + conv2d_11_tf += mul(f1, min16float4x4(0.112133466, 0.2773932, -0.047416527, -0.06561597, 0.093935706, 0.032524325, 0.02208551, 0.10400939, -0.0062363064, 0.20578235, 0.124429, 0.045867924, 0.024913216, -0.07508951, -0.1506746, -0.07368737)); + conv2d_11_tf += mul(g1, min16float4x4(0.029188056, 0.13675697, -0.10047892, -0.15162368, 0.11152231, 0.17758776, 0.04638467, -0.15375991, -0.08195171, -0.00092798605, -0.11137887, -0.20476487, -0.06701632, -0.38742077, 0.10833869, 0.07575963)); + conv2d_11_tf += mul(h1, min16float4x4(0.12579612, -0.13082299, 0.022704111, -0.049295194, 0.02813974, 0.06766161, 0.021488592, -0.22899324, -0.13967377, -0.42789128, -0.15561862, -0.13880157, -0.31957027, -0.051553562, -0.15501565, -0.17607704)); + conv2d_11_tf += mul(i1, min16float4x4(-0.014785312, -0.3358245, 0.09859993, 0.17852743, 0.06758491, 0.040827237, -0.014897847, -0.027630018, -0.041637477, -0.10967412, -0.10507281, 0.058183335, -0.01929858, 0.09047934, -0.19679205, -0.16896065)); + conv2d_11_tf += mul(a2, min16float4x4(-0.19059956, 0.059083544, -0.07367043, 0.10374235, -0.12928921, 0.16821185, 0.03542259, 0.07853399, -0.029948441, 0.045060057, 0.10522493, 0.15548709, 0.13417992, 0.12784965, 0.068737574, 0.024369959)); + conv2d_11_tf += mul(b2, min16float4x4(-0.2539489, -0.15361321, -0.024794202, 0.23387837, -0.021986792, 0.035640705, -0.053465687, 0.041275553, -0.12349385, 0.11599216, -0.12158652, -0.0016647653, 0.03552641, 0.15126309, 0.10521408, 0.022221778)); + conv2d_11_tf += mul(c2, min16float4x4(-0.09391041, 0.21640098, 0.06468435, 0.021124857, -0.017427467, 0.14731239, 0.0888631, 0.06669842, 0.16802992, -0.042000934, -0.007442969, -0.17762569, -0.106376246, -0.007006815, 0.048836768, 0.07634349)); + conv2d_11_tf += mul(d2, min16float4x4(-0.08242374, -0.35055616, 0.11752318, 0.06287576, -0.08078838, 0.015269983, 0.07802465, 0.036515962, -0.047435157, -0.23535018, 0.10882656, 0.00760307, 0.20816213, 0.16291322, -0.17480974, -0.09656055)); + conv2d_11_tf += mul(e2, min16float4x4(0.3776239, 0.48836887, 0.046571143, -0.0005301381, 0.111404456, -0.2056147, 0.0976322, -0.07087254, -0.23208277, 0.64508325, 0.029519977, -0.32163903, 0.12203931, 1.2488136, 0.0713469, -0.12589021)); + conv2d_11_tf += mul(f2, min16float4x4(-0.1458724, -0.2927259, -0.11825573, 0.050236594, 0.005908592, 0.009147886, 0.014676971, -0.09960781, -0.031219782, 0.0008116867, -0.16999915, -0.08393424, -0.017762119, 0.15271363, 0.17894958, 0.104973435)); + conv2d_11_tf += mul(g2, min16float4x4(0.15102111, -0.017580042, -0.009878415, 0.09603493, -0.14158034, 0.01766169, 0.026301328, 0.14016923, 0.07513633, 0.12250821, 0.14139763, 0.119470306, 0.056335848, 0.011718554, -0.051952817, -0.1087701)); + conv2d_11_tf += mul(h2, min16float4x4(0.12267096, 0.22258927, -0.23374331, -0.336529, -0.03149633, -0.26095635, 0.00365308, 0.048830956, 0.035902984, -0.04686918, -0.08079191, -0.17013429, 0.0254567, -0.05592242, 0.0968047, 0.07426071)); + conv2d_11_tf += mul(i2, min16float4x4(-0.16953564, 0.074455656, 0.0029755495, 0.20576377, -0.050961535, 0.060958825, 0.014226229, 0.104992926, 0.06942283, 0.29077423, 0.040234245, 0.12337425, -0.012045997, -0.11109262, 0.020255094, 0.08945579)); + conv2d_11_tf += mul(na1, min16float4x4(0.2978639, -0.24613461, -0.083074145, -0.2367985, -0.13995647, -0.21201506, -0.16809967, -0.08163256, 0.22451796, -0.21319884, 0.097241744, 0.17276905, 0.059754357, -0.21800114, 0.016986718, 0.059852242)); + conv2d_11_tf += mul(nb1, min16float4x4(0.10399378, 0.016165858, 0.006949626, -0.00957426, -0.07206657, 0.85400176, -0.069736175, 0.11563255, -0.15550873, 0.21035826, -0.09730208, 0.21803263, -0.029731166, 0.07174115, -0.075019605, 0.06605764)); + conv2d_11_tf += mul(nc1, min16float4x4(0.008660154, -0.1689362, -0.13275097, -0.14157207, -0.06571528, 0.2641335, 0.17738026, 0.016201235, -0.058384545, -0.089386165, -0.10691102, 0.03380599, 0.07696467, 0.010921241, -0.05858657, 0.044599395)); + conv2d_11_tf += mul(nd1, min16float4x4(0.29438433, 0.39757052, -0.12448894, -0.14726874, 0.054101802, 0.19893955, 0.0081761405, -0.030686913, -0.09465847, -0.09517581, 0.0046200817, 0.2743172, 0.18768987, 0.2577441, 0.3185588, -0.0043636197)); + conv2d_11_tf += mul(ne1, min16float4x4(0.30364004, 0.45719072, -0.002478791, -0.25550374, 0.044718135, 0.9974692, 0.27661783, 0.38724384, 0.20643012, -0.36335453, 0.04044719, -0.15773767, 0.019318745, -0.015368104, -0.13033883, -0.21446472)); + conv2d_11_tf += mul(nf1, min16float4x4(0.17225221, -0.2870429, -0.11031537, -0.20985241, -0.1813215, 0.47034717, 0.19177493, 0.1565604, -0.22090979, -0.1778559, -0.15998572, 0.20591277, -0.27751637, -0.17734572, -0.22385214, 0.2001247)); + conv2d_11_tf += mul(ng1, min16float4x4(0.09103924, 0.012440279, -0.11811386, -0.28955194, -0.024203198, -0.014690502, -0.041423846, 0.0062359073, 0.06732812, -0.040848043, -0.0807372, -0.06598595, -0.020464217, 0.35617942, 0.054869782, -0.06990699)); + conv2d_11_tf += mul(nh1, min16float4x4(-0.22022852, -0.30250633, -0.008539953, -0.17535509, 0.048545327, -0.06961757, 0.1520779, 0.15551318, 0.145789, 0.41386685, 0.19608185, 0.02285933, 0.19650589, 0.1140758, 0.058065582, 0.06438903)); + conv2d_11_tf += mul(ni1, min16float4x4(0.17500387, 0.009752107, -0.08735754, -0.40322778, -0.04718948, -0.1520063, 0.015334469, 0.055586398, -0.06315823, 0.01381341, 0.06333497, 0.20780154, -0.14789844, 0.008873181, 0.20424104, 0.18570045)); + conv2d_11_tf += mul(na2, min16float4x4(0.17809622, -0.054737452, 0.045792647, -0.05761767, 0.1530876, -0.058534857, -0.008100565, 0.036446143, 0.27693272, 0.3004126, -0.1283306, -0.50103384, -0.3350802, 0.09919993, -0.10481551, 0.059236333)); + conv2d_11_tf += mul(nb2, min16float4x4(0.08178473, 0.01796507, 0.045470674, -0.1395204, -0.07053285, -0.15308544, -0.016434597, 0.09957456, 0.07303232, 0.5558379, 0.1058254, -0.12340164, -0.37540868, 0.20688659, 0.11254531, 0.08988308)); + conv2d_11_tf += mul(nc2, min16float4x4(-0.115479395, -0.04145597, -0.02444945, -0.0012505532, -0.016777854, -0.21254961, -0.11969028, -0.10986302, 0.34061527, 0.35168666, 0.19457188, -0.25304377, 0.089430355, -0.13593785, -0.03715568, -0.07161111)); + conv2d_11_tf += mul(nd2, min16float4x4(0.135465, 0.16024914, -0.16819438, -0.076060556, 0.14722055, -0.12402309, -0.091675736, -0.11345004, 0.3370019, 0.21161243, 0.08165217, 0.26650387, 0.11799823, 1.1248134, 0.031586587, 0.40626523)); + conv2d_11_tf += mul(ne2, min16float4x4(-0.3881156, 0.075572714, -0.2955678, -0.04820779, -0.14431494, 0.17108414, -0.031334974, 0.14272547, 0.10431918, -0.92185026, -0.550305, -0.09849551, -0.19279402, 0.47034186, 0.38574138, 0.5469418)); + conv2d_11_tf += mul(nf2, min16float4x4(0.07301299, -0.1655295, 0.0851716, 0.0349889, 0.037978686, -0.34476924, -0.09894407, -0.09279173, -0.017504893, 0.16626996, 0.23299451, -0.29538614, -0.035250418, 0.102075204, 0.014679606, 0.05283856)); + conv2d_11_tf += mul(ng2, min16float4x4(0.082496785, -0.047353677, -0.1036778, -0.014507561, 0.091381975, -0.07229443, -0.03069601, -0.07463806, 0.2173226, 0.061551273, 0.01672064, 0.065622196, 0.1645865, 0.08651663, 0.18979368, 0.2012662)); + conv2d_11_tf += mul(nh2, min16float4x4(-0.2116467, -0.26988897, -0.049475558, 0.18609211, -0.08837133, -0.219245, 0.05900789, -0.007832284, -0.028579885, 0.20587349, -0.07297767, -0.19551088, 0.052455146, -0.24630548, 0.12438646, -0.017073039)); + conv2d_11_tf += mul(ni2, min16float4x4(0.15815273, -0.13286865, -0.036927793, -0.118895106, 0.06876401, -0.08193885, -0.073907554, -0.17851423, 0.025570622, -0.05206693, 0.0054880823, -0.14550385, 0.031355973, -0.0617539, -0.09522895, 0.007602468)); + conv2d_11_tf += min16float4(0.10656278, 0.12657918, 0.16990805, -0.12699938); + tex8[gxy] = conv2d_11_tf; + min16float4 nconv2d_11_tf = max(-conv2d_11_tf, 0); + conv2d_11_tf = max(conv2d_11_tf, 0); + + min16float4 conv2d_10_tf = mul(a1, min16float4x4(-0.07384766, -0.027958225, 0.37361667, -0.082532816, 0.14156812, 0.02939518, 0.22737388, 0.19935979, -0.090212055, 0.04403584, 0.18456662, -0.026585983, 0.22868252, 0.09938934, -0.08726494, -0.115827106)); + conv2d_10_tf += mul(b1, min16float4x4(-0.09788985, -0.3116416, 0.35298944, -0.08990593, 0.16181462, -0.22193117, -0.5422943, 0.23932208, 0.15739329, -0.06103239, 0.7953177, -0.047183976, 0.21341586, 0.19858226, 0.0016054768, 0.054749873)); + conv2d_10_tf += mul(c1, min16float4x4(-0.026696216, 0.061291914, -0.35742328, 0.00082715444, 0.10632543, -0.09428293, -0.12645036, -0.043706786, 0.09915236, 0.13788143, 0.15950204, -0.089837976, 0.04461279, -0.054954246, 0.04740199, 0.07014664)); + conv2d_10_tf += mul(d1, min16float4x4(-0.12016896, 0.16669498, 0.26552972, -0.35876223, 0.045097463, -0.15016092, -0.0988156, -0.416339, -0.0101760905, 0.26459762, 0.31927487, -0.16307381, 0.12096833, -0.06770049, -0.017283063, 0.013299284)); + conv2d_10_tf += mul(e1, min16float4x4(0.15951112, 0.14506923, 0.6747884, -0.24716964, -0.3413045, -0.2017185, -0.9612693, 0.5421329, -0.16023788, 0.32216108, 0.062496744, 0.21633703, 0.004581572, 0.2359334, -0.35295007, 0.09726352)); + conv2d_10_tf += mul(f1, min16float4x4(0.13874753, -0.0063067, -0.14469895, 0.11554976, -0.019183924, -0.04544159, -0.29430693, -0.10431769, 0.15769906, 0.00601582, -0.454376, -0.11790236, 0.16000259, 0.29670846, -0.9759625, 0.31053123)); + conv2d_10_tf += mul(g1, min16float4x4(0.014491841, 0.0074491766, -0.09696308, -0.09127842, -0.03579932, -0.20163259, -0.21284793, -0.261139, 0.24359487, 0.14113441, 0.23983651, -0.16634561, -0.09547295, 0.10859189, 0.13468629, 0.33521304)); + conv2d_10_tf += mul(h1, min16float4x4(0.008276171, 0.12959969, 0.5093179, 0.002464717, 0.016199486, -0.03156574, -0.4428472, -0.10885838, -0.049632378, 0.2476587, 0.07033375, -0.20044556, 0.04982328, 0.19631135, -0.33776414, -0.6421577)); + conv2d_10_tf += mul(i1, min16float4x4(-0.04192616, 0.06393284, 0.07120974, 0.076716706, -0.09867013, -0.13239172, 0.012114291, -0.038557116, 0.029985918, 0.022090917, 0.07777519, 0.008410333, 0.0034299784, 0.062100925, -0.38884223, -0.01593217)); + conv2d_10_tf += mul(a2, min16float4x4(-0.013629574, -0.06545711, 0.14423661, -0.03981215, -0.052800525, -0.058425374, -0.05814048, -0.11337634, 0.05479856, -0.010584571, -0.22650285, 0.056241333, -0.1396656, -0.0010838923, -0.30166936, 0.040658727)); + conv2d_10_tf += mul(b2, min16float4x4(0.045267094, -0.086306006, -0.05226326, 0.1539859, -0.02723665, -0.13326567, 0.22143897, -0.018399606, 0.12181383, 0.1452545, -0.3973738, -0.10285705, -0.15147118, -0.28072536, 0.4379245, -0.06340889)); + conv2d_10_tf += mul(c2, min16float4x4(0.14590915, 0.034363795, -0.02217679, 0.15465777, -0.020056443, 0.06256286, 0.00068213895, -0.004845135, 0.10313473, 0.13895464, -0.0957288, 0.10452721, -0.06313026, -0.06739777, 0.16052145, -0.115432285)); + conv2d_10_tf += mul(d2, min16float4x4(-0.083468825, 0.15143521, 0.19880214, -0.0054416056, -0.1074472, 0.027439727, -0.16624895, -0.026701076, -0.046576414, -0.061388403, 0.34304553, -0.08921803, 0.09399348, -0.043658186, -1.3050584, -0.07285428)); + conv2d_10_tf += mul(e2, min16float4x4(-0.2544287, -0.38059148, 0.7181705, -0.44567156, 0.10387618, 0.06472145, 0.08178852, -0.016514499, -0.1630076, -0.16066378, -0.19193888, -0.24423774, -0.14821364, -0.28755048, -0.1322022, 0.25716448)); + conv2d_10_tf += mul(f2, min16float4x4(0.13228743, 0.24624044, 0.10462062, 0.26341802, 0.035913363, 0.09206641, 0.044785645, 0.010443224, 0.05206244, 0.008345797, -0.32408288, -0.2484674, -0.027154556, 0.0006338974, 0.09008037, 0.027416239)); + conv2d_10_tf += mul(g2, min16float4x4(-0.061936356, -0.07008738, -0.22344092, 0.20339371, 0.03216865, 0.103117235, 0.10232644, 0.10809929, 0.08320763, 0.058004253, -0.06520991, 0.038012277, -0.12916973, -0.1150849, -0.03713365, -0.0886423)); + conv2d_10_tf += mul(h2, min16float4x4(0.3213531, 0.1826207, 0.022152286, 0.025484305, -0.054090437, 0.08160166, 0.13491987, -0.06896833, 0.10781034, 0.08944192, -0.34036443, -0.018937334, -0.18917687, -0.13239872, 0.11581373, -0.038915917)); + conv2d_10_tf += mul(i2, min16float4x4(-0.20916902, 0.08310064, 0.19347866, 0.29880634, -0.007023385, 0.005319598, -0.06649972, 0.03248317, -0.04066817, -0.06176127, -0.41747397, 0.14132817, -0.021392342, -0.021360394, 0.101215124, -0.05375729)); + conv2d_10_tf += mul(na1, min16float4x4(-0.008702178, -0.03840238, 0.13321695, 0.065163925, -0.062342774, -0.030948557, 0.0069512874, -0.2634128, -0.09415655, 0.02985776, 0.021763485, 0.27137864, -0.21608604, -0.19126832, -0.37335086, -0.16941321)); + conv2d_10_tf += mul(nb1, min16float4x4(0.04631249, 0.33492458, -0.6266605, 0.20180638, 0.039800193, -0.14341171, -0.8203481, 0.04878081, 0.008235832, 0.15065777, -0.32971388, 0.1828355, -0.1510293, -0.17637968, 0.125366, -0.06719769)); + conv2d_10_tf += mul(nc1, min16float4x4(-0.014685718, -0.04156494, 0.2728874, -0.106735535, -0.1312142, -0.05991217, 0.15173748, -0.09276527, 0.027946949, 0.12980466, 0.017537035, 0.058945708, -0.11254791, -0.06708247, -0.28308856, -0.058375884)); + conv2d_10_tf += mul(nd1, min16float4x4(0.2220684, -0.19030218, -0.1259754, 0.09647918, -0.20530927, -0.16737363, -0.055208467, -0.067288965, 0.1428622, 0.08903465, 0.494294, 0.28669015, -0.17464463, -0.2190753, 0.13515279, 0.24887499)); + conv2d_10_tf += mul(ne1, min16float4x4(-0.24211104, -0.11129136, 0.03340221, 0.49835417, -0.11755811, -0.732711, -0.3876752, 0.6178176, 0.1437329, -0.05131951, -0.16705558, -0.3823752, -0.23198022, -0.27967533, 0.7223488, -0.5565778)); + conv2d_10_tf += mul(nf1, min16float4x4(-0.04738433, -0.14606567, 0.22317784, 0.0055712103, -0.064653076, -0.16446865, -0.10802961, -0.10179589, 0.060855757, 0.22762765, -0.037358448, 0.24772792, -0.15458576, -0.0770241, 0.43480682, 0.008342627)); + conv2d_10_tf += mul(ng1, min16float4x4(0.117756896, -0.06760757, 0.12629354, -0.13241243, -0.05329636, 0.031004142, 0.19809054, 0.1504123, -0.024029436, -0.011011192, -0.014698134, 0.12855798, 0.027526522, -0.102618076, -0.2597635, -0.23887417)); + conv2d_10_tf += mul(nh1, min16float4x4(-0.012681944, 0.088339254, 0.58977854, 0.020116867, -0.30643263, -0.11593101, 0.2829653, -0.060883448, 0.027514484, -0.19997032, -0.12530403, 0.3302542, -0.10344085, -0.0644199, -0.11374762, 0.38778695)); + conv2d_10_tf += mul(ni1, min16float4x4(0.073869206, -0.059440095, -0.016326021, -0.08571949, -0.04171866, 0.042949438, 0.13984677, -0.15829174, -0.025245706, 0.0059198164, -0.0432442, 0.20765327, -0.058762096, 0.11539401, 0.036120266, 0.24331446)); + conv2d_10_tf += mul(na2, min16float4x4(0.012567978, 0.07251118, -0.12190053, 0.10283353, 0.088345066, 0.0017397653, -0.2381744, 0.101314925, 0.022791719, -0.043069735, -0.15024713, -0.072577685, 0.19976862, -0.059844784, 0.38824072, 0.0020866133)); + conv2d_10_tf += mul(nb2, min16float4x4(0.27314463, 0.0739519, 0.08960633, 0.03709254, 0.032681584, 0.22859, -0.41635752, -0.07382896, 0.13144481, -0.24017848, 0.07981319, 0.15370876, 0.059314378, 0.29214182, -0.39464346, -0.13867916)); + conv2d_10_tf += mul(nc2, min16float4x4(-0.005685388, -0.039528795, -0.055917054, -0.06578973, 0.020702876, -0.00709528, 0.08486715, -0.0075865295, 0.05714374, -0.27417144, 0.4555885, 0.013780273, 0.05096835, 0.159233, -0.05228782, 0.15794256)); + conv2d_10_tf += mul(nd2, min16float4x4(-0.0010807351, -0.022064442, 0.13078515, 0.11357431, 0.11269685, 0.029679844, 0.14385091, 0.10241993, 0.030162932, -0.016101424, 0.20761637, 0.4683215, 0.03091817, -0.58406824, -0.3438075, 0.3653469)); + conv2d_10_tf += mul(ne2, min16float4x4(-0.016927537, 0.13944507, -0.38772225, -0.11645372, -0.1683389, -0.081295304, 0.271328, 0.14980802, 0.47266555, 0.04091753, 0.006903156, -0.00832747, -0.056511678, 0.06924621, -1.0780094, 0.1268596)); + conv2d_10_tf += mul(nf2, min16float4x4(-0.21017683, -0.077091806, 0.28906518, 0.022843512, -0.062092084, -0.017447937, 0.25115407, -0.1367289, 0.0021664056, 0.0034106125, 0.5305142, -0.029012429, -0.014483031, 0.05575314, -0.35784876, -0.09252365)); + conv2d_10_tf += mul(ng2, min16float4x4(0.008859689, 0.06481962, 0.09483335, 0.18473764, 0.0015982646, -0.06144117, 0.054042596, -0.19934553, -0.20250106, 0.096015476, 0.21697922, 0.6265738, -0.16049659, -0.33120447, 0.27775142, 0.14459921)); + conv2d_10_tf += mul(nh2, min16float4x4(-0.11195867, 0.21663944, 0.5021048, 0.04712746, 0.08637696, 0.07792573, 0.23626573, -0.075164914, 0.06574307, -0.16795279, 0.06829719, -0.027584063, -0.015064924, -0.057976205, 0.14589287, -0.15683101)); + conv2d_10_tf += mul(ni2, min16float4x4(0.07626267, -0.03523683, 0.106941625, -0.15825523, 0.032598946, 0.038718563, -0.016688785, -0.054390162, 0.05544311, 0.13933052, 0.078817375, -0.10183935, 0.041770034, 0.032732744, 0.062236354, 0.0068387473)); + conv2d_10_tf += min16float4(-0.11589812, -0.123082116, -0.003926807, -0.15363532); + tex9[gxy] = conv2d_10_tf; + min16float4 nconv2d_10_tf = max(-conv2d_10_tf, 0); + conv2d_10_tf = max(conv2d_10_tf, 0); + + min16float4 target = mul(e1, min16float4x4(-0.25229862, 0.22394362, 0.0050771693, -0.07544911, -0.11078993, -0.14940143, 0.009394699, 0.0110528935, 0.044721916, 0.26324025, -0.046336185, 0.38099283, 0.053437576, -0.07238376, -0.090147175, 0.5568665)); + target += mul(e2, min16float4x4(0.036739275, -0.2334262, 0.032853063, 0.24364692, -0.122930475, 0.1975849, -0.01315444, -0.13528247, -0.014283123, 0.057573725, 0.058717266, 0.16260214, 0.03097313, -0.11750414, -0.18610783, -0.23006414)); + target += mul(ne1, min16float4x4(0.37318927, -0.26915783, 0.035015646, 0.2676218, 0.1748369, 0.094052985, -0.11020892, -0.14514406, 0.004877109, -0.26225975, 0.13958913, -0.16787122, 0.06908459, -0.10446216, -0.028498875, -0.28281447)); + target += mul(ne2, min16float4x4(0.1980342, 0.021963626, -0.03271427, 0.28889674, 0.043385092, -0.16916741, -0.008713317, 0.00013464666, 0.0819348, 0.0152427135, -0.14862345, -0.15659885, -0.050634, 0.04153691, 0.042288564, 0.00585241)); + target += mul(conv2d_11_tf, min16float4x4(-0.17560056, 0.3521319, 0.20137301, -0.25535235, 0.030570813, 0.2411823, 0.053508975, -0.34454364, 0.22279017, -0.41471666, -0.15029109, 0.22158626, -0.08751699, -0.09357398, 0.20704596, -0.20073438)); + target += mul(nconv2d_11_tf, min16float4x4(0.15419295, 0.31318265, 0.004593545, 0.78029615, -0.16751337, -0.32214537, -0.44051525, 0.22405408, -0.0064655836, 0.36599794, -0.26032063, 0.1850997, 0.13661511, -0.49070612, -0.34533858, 0.16373816)); + target += mul(conv2d_1_tf, min16float4x4(0.09806042, 0.36764845, 0.11531638, 0.073847674, -0.16854957, -0.19408809, -0.16800502, -0.12827317, -0.5168489, 0.030958507, -0.03509507, 0.086487584, 0.01842899, -0.10123225, -0.17940263, -0.028054722)); + target += mul(nconv2d_1_tf, min16float4x4(0.21619087, -0.05322262, -0.31423846, 0.37783054, 0.20402598, 0.53124064, -0.012658878, 0.20003271, -0.17958061, -0.37326333, -0.24583863, 0.057008818, -0.13031931, -0.031875104, -0.2130229, 0.44612458)); + target += mul(conv2d_4_tf, min16float4x4(0.25865164, -0.28258085, 0.09512834, 0.054259088, 0.25939894, 0.38799945, -0.33007956, 0.6692063, -0.22719514, 0.16910313, 0.056874167, 0.016987909, -0.19956954, -0.20683451, -0.19937307, -0.41771019)); + target += mul(nconv2d_4_tf, min16float4x4(0.23592101, -0.15792374, -0.06965535, 0.30855724, -0.22757038, 0.12033792, 0.3199687, 0.2674324, 0.112318985, -0.14153072, -0.13629095, 0.13337436, 0.09185144, 0.24124412, 0.028630963, 0.22709718)); + target += mul(conv2d_7_tf, min16float4x4(0.44043523, 0.32490492, -0.117098905, 0.38431495, 0.07962198, 0.1517891, 0.22628377, 0.13990402, 0.38505656, -0.014830039, 0.20684186, 0.065970615, -0.054330014, -0.046108313, 0.49422976, 0.13082288)); + target += mul(nconv2d_7_tf, min16float4x4(-0.08174229, -0.013488396, -0.09494761, 0.31210786, -0.14530393, -0.22510533, -0.30971226, -0.17040919, -0.64233893, -0.07164386, -0.20537859, -0.17981663, -0.0060102916, -0.10167985, -0.24380594, 0.36305648)); + target += mul(conv2d_10_tf, min16float4x4(-0.23301682, -0.19649999, -0.0016176507, 0.7897105, -0.68460715, -0.06446943, -0.5841334, -0.17928797, 0.021772655, 0.46175778, 0.36450028, 0.27175686, -0.03546283, -0.19889158, -0.24603742, -0.090037055)); + target += mul(nconv2d_10_tf, min16float4x4(0.1085313, 0.04249687, 0.13247591, 0.09551512, -0.37197208, 0.3261908, -0.13848339, -0.13538006, 0.13875476, -0.3748712, -0.21430004, 0.09772982, -0.35635203, 0.13196826, -0.09840773, -0.21841893)); + target += min16float4(0.062238827, 0.069814906, -0.107347876, 0.64385885); + tex1[gxy] = target; + + target = mul(e1, min16float4x4(0.22607668, 0.021170171, -0.06774968, -0.019062893, -0.029051676, 0.029224426, 0.097410545, 0.07505055, 0.17470665, -0.025774082, -0.041022647, 0.07615996, 0.031361237, -0.18075092, -0.01981288, 0.30251572)); + target += mul(e2, min16float4x4(-0.2228827, -0.18372375, 0.17952546, 0.031262513, 0.10978829, 0.095414534, -0.11202218, -0.017824037, 0.13419671, -0.056704585, 0.086960495, 0.089463, 0.0436869, 0.1987542, -0.24825421, -0.14668585)); + target += mul(ne1, min16float4x4(-0.2848745, -0.09242928, 0.24002336, -0.06059541, -0.0066300016, 0.050746392, -0.26092768, -0.060129635, -0.2699064, -0.13927452, 0.3134039, -0.21668927, 0.0028670141, 0.044556674, 0.040246494, -0.26040232)); + target += mul(ne2, min16float4x4(0.08408219, -0.038882803, -0.08522774, 0.1714629, -0.03067602, -0.10863579, 0.072058044, -0.012343554, -0.0076697394, 0.17840211, -0.2823912, 0.11976201, -0.05657313, 0.092938855, -0.060931504, 0.06991858)); + target += mul(conv2d_11_tf, min16float4x4(0.09868284, 0.054261737, 0.13327791, -0.14897001, -0.06348394, 0.11385057, 0.09684055, -0.084950894, -0.3038146, -0.08645148, 0.035114545, -0.07148952, -0.15862693, 0.26620075, -0.018059343, 0.35772058)); + target += mul(nconv2d_11_tf, min16float4x4(-0.4964452, -0.32340884, 0.5129584, -0.090460144, 0.28658384, -0.117274396, 0.25311428, 0.119918026, 0.27442876, -0.19332558, -0.40261742, -0.0627285, -0.36318043, -0.07865861, -0.11114984, -0.1290027)); + target += mul(conv2d_1_tf, min16float4x4(0.42158237, -0.032889403, 0.034080755, 0.25719455, -0.18799819, 0.0981468, 0.22785765, -0.07262642, 0.22532979, -0.09519116, -0.1005627, 0.1767603, -0.100850165, -0.06818755, 0.0059797456, -0.0718568)); + target += mul(nconv2d_1_tf, min16float4x4(0.12787001, -0.20670003, 0.0034799385, -0.024907416, 0.04423561, -0.13276835, -0.102332935, 0.14673741, 0.08700579, 0.08124997, -0.009865786, 0.041748982, -0.076119795, 0.09744985, 0.13542135, 0.12240728)); + target += mul(conv2d_4_tf, min16float4x4(-0.1702021, 0.18497302, 0.06786661, -0.09040049, 0.15212716, 0.055503774, 0.020584844, 0.24927403, 0.23556694, -0.1571619, -0.02012801, 0.08423509, -0.114376806, -0.04171382, 0.040876187, -0.116261706)); + target += mul(nconv2d_4_tf, min16float4x4(-0.0854133, -0.023111762, 0.3320211, -0.21760856, -0.169973, 0.22671382, 0.4513697, 0.35962802, -0.1499719, 0.24696982, -0.29979527, 0.006662296, 0.20241787, -0.2276791, 0.059445832, 0.18853071)); + target += mul(conv2d_7_tf, min16float4x4(-0.026398154, 0.124663144, 0.20381314, 0.2053697, 0.010302614, -0.050437275, 0.033807695, 0.014369258, -0.20720173, 0.05919782, 0.008449617, -0.31949872, 0.011598942, -0.0432789, 0.12732887, 0.049919438)); + target += mul(nconv2d_7_tf, min16float4x4(-0.06617085, 0.023928246, 0.1698239, 0.19584818, 0.022199618, -0.0040151025, -0.14364237, -0.06734091, 0.49634683, 0.40206975, -0.023004102, 0.16953272, 0.13243976, -0.47359994, 0.18358715, -0.15007599)); + target += mul(conv2d_10_tf, min16float4x4(0.03754883, -0.84370553, -0.0057923268, -0.06449944, 0.09488198, -0.09577232, 0.31362334, -0.09768442, 0.15369056, -0.16346063, 0.41194627, 0.10364933, -0.2073915, -0.15944852, -0.57649344, 0.1580545)); + target += mul(nconv2d_10_tf, min16float4x4(-0.3224099, -0.17332473, 0.12429976, -0.12284861, 0.32270268, 0.2888736, -0.20192772, 0.15415959, -0.10240418, 0.09524166, -0.14117688, -0.1239787, 0.0015336396, 0.10390812, 0.20461708, -0.12672688)); + target += min16float4(0.01866206, -0.01430976, -0.04231479, 0.06331023); + tex2[gxy] = target; +} + +//!PASS 6 +//!DESC Conv-4x3x3x16, Conv-4x1x1x64 +//!IN tex1, tex2, tex8, tex3, tex6, tex7, tex9 +//!OUT tex4, tex5, tex10 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass6(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + min16float4 a1 = tex1.SampleLevel(sam, pos - inputPt, 0); + min16float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e1 = tex1.SampleLevel(sam, pos, 0); + min16float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i1 = tex1.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na1 = max(-a1, 0); + min16float4 nb1 = max(-b1, 0); + min16float4 nc1 = max(-c1, 0); + min16float4 nd1 = max(-d1, 0); + min16float4 ne1 = max(-e1, 0); + min16float4 nf1 = max(-f1, 0); + min16float4 ng1 = max(-g1, 0); + min16float4 nh1 = max(-h1, 0); + min16float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + min16float4 a2 = tex2.SampleLevel(sam, pos - inputPt, 0); + min16float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e2 = tex2.SampleLevel(sam, pos, 0); + min16float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i2 = tex2.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na2 = max(-a2, 0); + min16float4 nb2 = max(-b2, 0); + min16float4 nc2 = max(-c2, 0); + min16float4 nd2 = max(-d2, 0); + min16float4 ne2 = max(-e2, 0); + min16float4 nf2 = max(-f2, 0); + min16float4 ng2 = max(-g2, 0); + min16float4 nh2 = max(-h2, 0); + min16float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + min16float4 conv2d_11_tf = tex8.SampleLevel(sam, pos, 0); + min16float4 nconv2d_11_tf = max(-conv2d_11_tf, 0); + conv2d_11_tf = max(conv2d_11_tf, 0); + + min16float4 conv2d_1_tf = tex3.SampleLevel(sam, pos, 0); + min16float4 nconv2d_1_tf = max(-conv2d_1_tf, 0); + conv2d_1_tf = max(conv2d_1_tf, 0); + + min16float4 conv2d_4_tf = tex6.SampleLevel(sam, pos, 0); + min16float4 nconv2d_4_tf = max(-conv2d_4_tf, 0); + conv2d_4_tf = max(conv2d_4_tf, 0); + + min16float4 conv2d_7_tf = tex7.SampleLevel(sam, pos, 0); + min16float4 nconv2d_7_tf = max(-conv2d_7_tf, 0); + conv2d_7_tf = max(conv2d_7_tf, 0); + + min16float4 conv2d_10_tf = tex9.SampleLevel(sam, pos, 0); + min16float4 nconv2d_10_tf = max(-conv2d_10_tf, 0); + conv2d_10_tf = max(conv2d_10_tf, 0); + + min16float4 conv2d_13_tf = mul(a1, min16float4x4(0.09638616, 0.041973136, 0.032690834, 0.0017506832, 0.035889357, 0.046528358, 0.06497702, 0.06353481, -0.07129311, -0.027845494, 0.003971696, 0.015161773, -0.016153565, -0.02228567, -0.011083082, 0.037676543)); + conv2d_13_tf += mul(b1, min16float4x4(0.2134379, 0.26289365, 0.1335757, 0.13036838, -0.08787389, -0.106764, -0.048054244, 0.17788094, -0.15528837, -0.11408854, -0.06642222, -0.07838564, -0.09646518, -0.116988175, -0.22729287, -0.11145718)); + conv2d_13_tf += mul(c1, min16float4x4(0.09568265, 0.006643416, 0.11656759, -0.049414653, 0.14153476, -0.04269765, 0.09150523, 0.26861703, 0.16641477, -0.1080059, 0.22390138, -0.08730618, -0.01928994, -0.06351, -0.0022028533, 0.04301657)); + conv2d_13_tf += mul(d1, min16float4x4(-0.11731019, -0.040432923, -0.1977298, -0.17696093, -0.09182833, 0.071209684, -0.120773874, 0.021507429, -0.016429326, 0.04448132, 0.0681032, 0.044070866, -0.14647268, 0.008662263, -0.06507026, -0.075289875)); + conv2d_13_tf += mul(e1, min16float4x4(0.5694518, -0.6138523, 0.28939885, -0.06047394, 0.11681902, -0.7026379, 0.20342608, 0.07128985, 0.06697409, 0.2678358, 1.1430641, 0.20436136, -1.6117494, 0.2799715, -0.01652429, -0.16711035)); + conv2d_13_tf += mul(f1, min16float4x4(0.15830286, 0.16772346, -0.03232187, 0.029600514, -0.18494213, -0.25623813, 0.15487063, 0.06255487, -0.058094956, 0.19903323, 0.4756497, 0.6381142, -0.036022857, -0.09470495, 0.046093524, 0.031300675)); + conv2d_13_tf += mul(g1, min16float4x4(-0.38466138, 0.16052443, -0.13819315, -0.059899956, 0.14069949, -0.1297194, 0.105595976, 0.13371274, 0.06298681, -0.038837492, 0.08675327, 0.1501906, 0.031129224, 0.029751344, -0.06775066, -0.047534525)); + conv2d_13_tf += mul(h1, min16float4x4(0.097809926, -0.14269543, -0.14661346, -0.1819761, -0.023082452, 0.19019675, -0.15678905, -0.07669464, -0.07322769, -0.30472377, 0.33603573, 0.22620338, 0.05328552, 0.030486144, -0.037603505, -0.081246674)); + conv2d_13_tf += mul(i1, min16float4x4(-0.15090303, -0.0650902, 0.11741429, -0.003369476, 0.043803368, 0.13717425, -0.038966697, -0.05230889, -0.0042353314, -0.017051768, 0.102879845, 0.044044945, -0.012893164, 0.0152335, 0.015073082, 0.08049258)); + conv2d_13_tf += mul(a2, min16float4x4(-0.07802851, -0.07544602, -0.0039040581, -0.03915584, 0.115673676, -0.024907975, -0.011459969, 0.026098263, 0.043594692, 0.10627707, 0.027093858, 0.051561285, 0.071452856, -0.1758179, 0.28485832, 0.28952092)); + conv2d_13_tf += mul(b2, min16float4x4(-0.052147392, 0.18546684, 0.19015399, -0.053752594, -0.29468048, 0.010600442, -0.09287294, -0.09246605, 0.17687573, -0.04858957, 0.06478161, -0.0035372626, 0.5927226, 0.38359696, 0.33155236, 0.13010578)); + conv2d_13_tf += mul(c2, min16float4x4(0.04136322, 0.11806175, 0.19966072, 0.07308716, -0.09563447, -0.064514905, -0.0077517326, 0.11964638, -0.1460613, 0.02240298, 0.014256963, -0.0123070385, 0.1897282, -0.0058207656, 0.040057864, -0.49406672)); + conv2d_13_tf += mul(d2, min16float4x4(-0.43775788, -0.25118434, -0.3468631, -0.30180287, -0.27033472, -0.0023914252, 0.053275872, -0.021835659, 0.02879347, 0.036559265, 0.044093054, 0.12771723, 0.2702892, -0.2581491, -0.059361164, -0.046974897)); + conv2d_13_tf += mul(e2, min16float4x4(-0.03310008, -0.5622936, 0.5419483, -0.3599514, 0.2634039, 0.3500813, 0.4152074, 0.24876466, -0.2629078, -0.18554081, -0.76194984, -0.54471385, 0.72921526, 0.3316481, -0.20936906, -0.16736485)); + conv2d_13_tf += mul(f2, min16float4x4(0.07884802, 0.16494922, 0.2734585, -0.09396988, -0.14178166, -0.105561115, 0.006780099, 0.063054875, 0.12384575, -0.163967, -0.19682601, -0.1647527, 0.59927565, 0.24755491, -0.29760644, -0.074884824)); + conv2d_13_tf += mul(g2, min16float4x4(-0.186745, 0.21136905, 0.027726538, 0.08498169, 0.009122279, 0.01566938, -0.051473126, 0.014151464, 0.04580383, 0.02071651, 0.14929157, 0.17253524, -0.034080226, 0.07048439, -0.11602547, -0.12655921)); + conv2d_13_tf += mul(h2, min16float4x4(-0.2831727, -0.21816732, -0.37266397, -0.26041594, -0.18912914, -0.13482115, -0.10902061, -0.110694066, -0.20758803, -0.07158453, 0.14401175, 0.1590672, 0.27700564, -0.3202948, -0.23177631, 0.060082316)); + conv2d_13_tf += mul(i2, min16float4x4(0.16861005, -0.13237478, -0.12109852, -0.16306286, 0.032467425, 0.009778175, -0.05084063, 0.02528882, -0.028993038, -0.06119019, 0.0124081755, -0.0819979, -0.2308113, -0.23910572, 0.3170529, 0.22742116)); + conv2d_13_tf += mul(na1, min16float4x4(-0.19654512, 0.037653327, -0.015190324, 0.038381096, 0.034783594, -0.16242851, 0.07052334, 0.0019672879, 0.08069976, 0.090035714, 0.12597767, -0.00065050717, -0.10528094, 0.015088367, -0.045706235, -0.14849594)); + conv2d_13_tf += mul(nb1, min16float4x4(-0.0981129, -0.0044483114, 0.00918156, 0.28903985, 0.23872024, 0.11113565, 0.23359483, 0.21115206, 0.2144387, 0.106830046, 0.03875094, -0.14864162, 0.19366172, 0.21310017, 0.06280982, -0.0581721)); + conv2d_13_tf += mul(nc1, min16float4x4(-0.22814496, -0.08812413, -0.25392863, -0.02752917, 0.05930787, 0.08304853, -0.04027662, -0.010756739, 0.034590207, 0.070662424, 0.15285444, 0.058270697, -0.022838322, 0.024096202, 0.01309858, -0.10489201)); + conv2d_13_tf += mul(nd1, min16float4x4(0.17219496, -0.0066256993, 0.1442649, -0.07291206, 0.34312358, -0.24952441, 0.040031537, 0.18302973, 0.0015231773, 0.24825755, -0.01807878, -0.037405558, 0.21687117, 0.02481246, -0.08312088, -0.14397743)); + conv2d_13_tf += mul(ne1, min16float4x4(0.2859165, 0.6145777, 0.060804237, 0.22117847, -0.25534254, 0.3753605, 0.4193899, 0.06387241, -0.13308842, 0.0012660836, -0.055252563, -0.2552111, 0.8831952, -0.16249466, 0.76958305, 0.3658401)); + conv2d_13_tf += mul(nf1, min16float4x4(-0.14865848, -0.13086087, 0.17719927, 0.2801542, 0.3776111, 0.20903045, 0.1710449, 0.25524843, 0.11910105, 0.034738105, -0.12101939, -0.22116004, 0.11605619, 0.16838482, -0.07223086, -0.15225673)); + conv2d_13_tf += mul(ng1, min16float4x4(0.101802975, -0.12683764, -0.21380596, -0.19243564, 0.017763488, 0.0076850834, -0.0107422285, 0.058099743, 0.03071978, 0.02958345, 0.09209252, -0.012379192, -0.058930825, -0.07321041, -0.09178575, -0.09764888)); + conv2d_13_tf += mul(nh1, min16float4x4(0.2205578, -0.053928245, -0.14290524, -0.18790527, 0.002521159, -0.23389481, 0.11274272, 0.17174199, 0.2128134, 0.14586388, 0.08666812, 0.052028902, 0.024853414, -0.027658377, 0.033780072, -0.0045349374)); + conv2d_13_tf += mul(ni1, min16float4x4(-0.053073518, 0.12716359, 0.008456044, 0.014315154, 0.01918925, -0.13495505, 0.08007481, 0.08627198, 0.024612406, 0.0021514448, 0.04478567, -0.034171678, 0.0027070146, 0.0149149615, -0.15999815, -0.1866448)); + conv2d_13_tf += mul(na2, min16float4x4(0.040357295, -0.12759757, 0.03543834, -0.029329961, -0.078925595, 0.07807751, 0.08971355, -0.05469623, -0.08630596, -0.11219292, -0.08082983, -0.020131797, -0.04191703, 0.22003745, -0.28878415, -0.132956)); + conv2d_13_tf += mul(nb2, min16float4x4(0.021098461, 0.048261415, -0.121181525, -0.24724431, 0.32716268, 0.03046708, -0.28138334, -0.22871564, -0.15983087, 0.10721642, -0.14833531, -0.115366876, -0.393837, -0.62930757, -0.29534766, 0.02588463)); + conv2d_13_tf += mul(nc2, min16float4x4(-0.03972534, -0.051577512, -0.04452277, -0.12650263, 0.15491997, -0.026459083, 0.009715449, -0.20551588, -0.042652152, 0.119186826, -0.13313279, -0.13183416, -0.20730016, 0.003008999, -0.19962612, 0.1760052)); + conv2d_13_tf += mul(nd2, min16float4x4(0.1724579, -0.3179752, 0.18908302, 0.40730157, 0.44569418, -0.038390577, -0.13144472, -0.18369946, -0.1654486, -0.2106428, -0.084723935, 0.10262653, -0.26097777, 0.15257284, -0.36599034, -0.30871773)); + conv2d_13_tf += mul(ne2, min16float4x4(-0.21338613, 0.680362, 0.079820015, 0.6081361, -0.9754953, -0.33735132, -1.2323227, -0.17950675, -0.31327835, 0.4732144, 0.22757599, 0.23051551, -0.8099572, -0.49106973, 0.96547806, 0.30975753)); + conv2d_13_tf += mul(nf2, min16float4x4(0.16933723, 0.17994887, -0.38310486, -0.4208871, 0.373761, 0.20749316, -0.080664486, -0.26229286, -0.04797456, 0.28605196, -0.040223103, -0.034632236, -0.5650002, -0.38834664, 0.14565933, 0.1488285)); + conv2d_13_tf += mul(ng2, min16float4x4(0.32558438, -0.18572666, 0.049500592, 0.2319145, -0.23547912, 0.2740939, 0.027905073, -0.022077003, 0.10860379, -0.15617043, -0.097419575, -0.11391895, -0.4266203, 0.060962453, -0.12154808, -0.19734453)); + conv2d_13_tf += mul(nh2, min16float4x4(-0.07880791, -0.2247225, 0.445858, 0.3889803, 0.14111102, 0.378859, 0.040187526, -0.021096235, 0.04169405, -0.075737596, 0.046068836, 0.11624106, 0.08169536, 0.3022304, -0.24427707, -0.34422734)); + conv2d_13_tf += mul(ni2, min16float4x4(0.13501012, -0.07389663, -0.010668981, -0.069029465, 0.06960202, -0.067375034, 0.08431378, 0.04207825, -0.121635035, -0.051126126, -0.1546829, 0.00073073455, -0.20674464, 0.27346626, -0.15771666, -0.024096)); + conv2d_13_tf += min16float4(-0.17614856, -0.14261112, 0.14600825, 0.20389698); + min16float4 nconv2d_13_tf = max(-conv2d_13_tf, 0); + conv2d_13_tf = max(conv2d_13_tf, 0); + + min16float4 target = mul(e1, min16float4x4(-0.3378193, 0.013861057, 0.19208853, -0.05050854, 0.08691835, 0.16724123, 0.10351982, -0.40157926, -0.055889476, -0.040115904, -0.13351472, -0.7937818, 0.18700145, 0.109559685, -0.119053595, -0.12651901)); + target += mul(e2, min16float4x4(0.05863214, -0.011048432, 0.22007701, -0.21624403, -0.06139813, -0.06766812, 0.022506371, 0.17585056, -0.37994936, -0.018394569, 0.5127985, -0.19700864, -0.07880973, 0.15687309, -0.12574019, -0.19570859)); + target += mul(ne1, min16float4x4(0.5059051, -0.010676642, -0.47922808, -0.017590942, -0.20583269, -0.10777252, -0.33185184, -0.0025075034, -0.1518394, 0.14268444, 0.005011664, 0.09016961, -0.46011007, -0.09428751, 0.34915137, 0.13334215)); + target += mul(ne2, min16float4x4(-0.15615676, 0.09427065, 0.006016912, -0.0003997069, 0.16170138, 0.09666374, 0.14158808, -0.23772424, 0.39373854, 0.004074768, -0.28073287, 0.0032489141, 0.23473479, -0.12678933, -0.24589436, -0.21988034)); + target += mul(conv2d_11_tf, min16float4x4(-0.12682347, 0.033012364, 0.18928578, 0.12523666, 0.12809147, 0.008567846, -0.10653368, -0.03712133, 0.075765386, -0.042196997, 0.039182812, 0.17273012, 0.21258987, 0.039698593, -0.0018848967, -0.07930902)); + target += mul(nconv2d_11_tf, min16float4x4(0.013454855, -0.18023406, -0.49323913, -0.032017395, 0.11903338, -0.043025218, -0.46579728, 0.21894619, -0.21387324, -0.13455649, 0.30638975, 0.3472243, 0.09305909, -0.015791988, 0.071368046, -0.038680866)); + target += mul(conv2d_1_tf, min16float4x4(0.012506262, 0.09754124, -0.092920735, 0.23061672, 0.08051618, -0.38472125, 0.17626029, 0.009075537, -0.18316247, -0.1338181, 0.2650675, 0.0516641, 0.080453254, 0.22033659, -0.13004474, -0.07781194)); + target += mul(nconv2d_1_tf, min16float4x4(-0.12412428, -0.11978811, 0.06780084, -0.1710261, -0.09355731, 0.31283846, -0.022725523, -0.16437142, -0.11865966, 0.10907317, 0.22463441, 0.017325362, 0.02512185, -0.49577957, 0.2016018, 0.14196795)); + target += mul(conv2d_4_tf, min16float4x4(0.02570746, 0.22231244, -0.10168496, -0.21518417, -0.0054759895, -0.32655567, -0.34048972, 0.11826245, -0.002854444, -0.11257602, -0.09318273, -0.10332744, 0.078923725, -0.11612356, -0.030546617, -0.12474622)); + target += mul(nconv2d_4_tf, min16float4x4(-0.11420135, -0.24489257, 0.15446539, 0.12646616, -0.07092042, 0.110105604, 0.054362826, 0.07867222, -0.15557991, 0.071640015, 0.21894808, 0.24164975, 0.0062167975, 0.10681122, -0.32373384, 0.06931269)); + target += mul(conv2d_7_tf, min16float4x4(0.0769479, -0.09528171, -0.38724712, 0.010703831, -0.016925508, -0.018486671, 0.035855293, -0.17932071, -0.078450575, -0.036463127, 0.20942347, 0.060895607, -0.16549253, -0.008952913, 0.20420915, -0.009001661)); + target += mul(nconv2d_7_tf, min16float4x4(0.074243605, 0.015648128, -0.05003613, 0.10121142, -0.0218682, 0.006933849, 0.101385176, 0.16132122, 0.0013466089, 0.14042993, -0.25816667, -0.040413387, -0.19570185, -0.08637437, 0.17934911, 0.24961887)); + target += mul(conv2d_10_tf, min16float4x4(-0.40401492, -0.16131033, 0.454142, 0.56882274, -0.013024656, -0.04423676, -0.023137214, 0.36117804, -0.0901519, -0.03237353, 0.010538879, -0.033432953, 0.105834074, -0.0549062, 0.05576519, -0.092626475)); + target += mul(nconv2d_10_tf, min16float4x4(-0.0017419134, -0.022569131, 0.027351622, -0.1289159, -0.0823291, -0.020735232, -0.28244564, -0.21001048, -0.048950948, 0.022033915, 0.14678808, -0.010097721, -0.06839686, 0.031720705, 0.11333891, 0.05049834)); + target += mul(conv2d_13_tf, min16float4x4(-0.2191025, -0.005935159, 0.24627906, 0.058490098, -0.011270337, -0.019233467, -0.17698613, -0.0052346545, 0.2288101, -2.5289672e-05, 0.267102, -0.026019678, -0.17386179, -0.017672652, -0.35420522, 0.2836498)); + target += mul(nconv2d_13_tf, min16float4x4(0.19294678, 0.011570707, -0.34666267, -0.09040537, 0.18127288, 0.10182209, 0.08549184, -0.48737645, -0.040560674, 0.20645715, -0.68665904, -1.3146902, 0.18629448, 0.09806124, 0.09953519, -0.5450951)); + target += min16float4(-0.24792486, -0.09899526, 0.3761066, 0.022595163); + tex4[gxy] = target; + + target = mul(e1, min16float4x4(0.15938057, -0.23559119, -0.28445953, 0.05912659, 0.5229142, -0.02843545, -0.004113748, -0.056947608, 0.1367782, -0.026573306, -0.0056468234, 0.2564603, 0.25593445, 0.08957574, 0.26139608, -0.053708326)); + target += mul(e2, min16float4x4(0.1382045, -0.103480555, 0.05831098, 0.000735441, 0.20176832, -0.087079, -0.07839967, -0.0750771, -0.31373122, -0.27509713, -0.23071732, -0.2560584, 0.110963896, -0.052200988, 0.0015331429, -0.30707568)); + target += mul(ne1, min16float4x4(-0.056460302, 0.2147989, 0.40628514, -0.058157466, -0.17940372, -0.033689886, -0.022241283, -0.0018471872, 0.26578268, -0.098452985, -0.01501511, -0.35676336, -0.07152056, -0.07245194, -0.32194778, 0.03888747)); + target += mul(ne2, min16float4x4(0.09541087, 0.24680884, -0.045627397, -0.08557985, 0.08790337, 0.10179883, 0.3007415, 0.044102084, 0.1064372, 0.2994135, 0.15280741, 0.2683849, 0.24750276, -0.021364288, -0.004039902, 0.28266376)); + target += mul(conv2d_11_tf, min16float4x4(-0.26525706, -0.08389754, -0.10918147, -0.06878537, -0.080960914, 0.03737948, 0.107663736, -0.0025957434, -0.10748625, 0.03004828, 0.03505711, 0.075969726, 0.06360464, -0.02740913, 0.025467616, 0.017698402)); + target += mul(nconv2d_11_tf, min16float4x4(-0.2370006, -0.07687027, 0.015225365, 0.17986605, 0.37507248, 0.2088343, 0.17946883, 0.2379337, -0.25194344, 0.035336476, -0.15362923, -0.008527836, 0.045963865, 0.025127884, 0.06973296, 0.063168526)); + target += mul(conv2d_1_tf, min16float4x4(0.09583503, 0.15350054, -0.15248272, 0.045916792, -0.18339546, -0.29747355, 0.027330166, -0.39461568, 0.095963046, -0.1775004, -0.19221638, -0.15368307, 0.056089737, 0.18232727, 0.03182419, 0.30851522)); + target += mul(nconv2d_1_tf, min16float4x4(-0.053062204, -0.0018095247, -0.04514637, 0.05689337, 0.07561519, 0.17035827, -0.0048587993, 0.38348997, -0.063476466, 0.09454219, 0.03969728, 0.11693653, -0.0012066896, -0.25955358, -0.14428577, -0.19967856)); + target += mul(conv2d_4_tf, min16float4x4(0.034378257, 0.16030714, 0.05160261, 0.21927983, -0.14469208, 0.041181874, 0.034202367, 0.07983977, 0.22149332, -0.08595994, -0.102985874, -0.07265774, -0.123233125, -0.12819915, 0.08662329, -0.12866889)); + target += mul(nconv2d_4_tf, min16float4x4(-0.1511104, -0.056531575, -0.023363205, -0.1909304, -0.15387732, 0.0671428, -0.15435332, 0.32735124, -0.3293996, 0.055349957, -0.043602336, 0.08102016, 0.200238, 0.13393362, 0.0044564987, 0.16932343)); + target += mul(conv2d_7_tf, min16float4x4(-0.09768015, 0.09503259, 0.12768175, 0.109941825, 0.006567291, -0.102840215, -0.05611706, -0.06865725, -0.2605998, 0.00585688, -0.035119556, -0.06810342, -0.090756536, -0.079376444, -0.22370447, -0.05727839)); + target += mul(nconv2d_7_tf, min16float4x4(-0.101120085, 0.028628688, 0.07296149, 0.15868604, 0.047761433, 0.07732842, -0.016735386, 0.049528413, 0.45619023, 0.062347047, -0.026208224, 0.046785966, -0.05715451, 0.04459997, -0.13676195, 0.07778552)); + target += mul(conv2d_10_tf, min16float4x4(-0.051393595, -0.12524572, -0.36763692, 0.039426118, 0.0349489, 0.07154008, -0.12969223, 0.30249006, -0.15237582, -0.06685149, -0.042049125, -0.0065471376, 0.017375907, -0.07143284, -0.018227521, -0.02778629)); + target += mul(nconv2d_10_tf, min16float4x4(-0.048270147, -0.07275859, 0.05502608, -0.034233145, 0.12822276, -0.02580663, -0.035358194, 0.05195595, 0.044340245, 0.04435722, 0.017985033, 0.007126749, -0.052825354, -0.059360538, -0.09412195, 0.060212586)); + target += mul(conv2d_13_tf, min16float4x4(-0.18645881, -0.04506676, -0.035483524, 0.0063163475, -0.13747677, -0.046985928, 0.0015511635, 0.019160518, -0.4315584, -0.06979354, -0.001936674, 0.0034739177, 0.3490474, 0.15375568, -0.0085117165, 0.017511753)); + target += mul(nconv2d_13_tf, min16float4x4(0.20412005, 0.017221482, 0.08719384, -0.016668927, 0.10308073, -0.1013255, 0.087567665, -0.1004404, 0.9800944, -0.25387812, 0.36526182, -0.21970014, 0.36388537, -0.111629054, 0.21855496, -0.10375334)); + target += min16float4(-0.14657217, -0.04252579, -0.24773599, 0.13271233); + tex5[gxy] = target; + + target = mul(e1, min16float4x4(-0.22553514, -0.086349756, -0.07735866, 0.48776403, -0.33010843, 0.28214008, -0.2242988, -0.11439686, -0.14720698, 0.2391116, 0.017813087, 0.4352493, -0.16412133, -0.12791261, -0.019643517, 0.19420698)); + target += mul(e2, min16float4x4(-0.9178235, -0.6335296, 0.11146894, -0.0759723, -0.4519685, -0.3007054, 0.014501872, 0.49081457, 0.10673664, 0.035011876, 0.10259641, 0.106546804, 0.5186602, 0.44900152, 0.20597687, -0.39562696)); + target += mul(ne1, min16float4x4(-0.11399027, -0.19542706, 0.087422565, -0.70140034, -0.41029623, -0.049330976, 0.19682989, 0.22516033, -0.22858454, -0.12200487, -0.14852463, -0.40852943, -0.035900578, 0.1886829, 0.019452838, -0.16703403)); + target += mul(ne2, min16float4x4(0.077843145, 0.7323388, -0.022324003, 0.09445821, 0.026166735, -0.1790519, 0.086004496, -0.40011314, 0.01210975, -0.053515363, -0.2501869, 0.06671936, -0.71530163, -0.57196116, -0.38604704, 0.5024949)); + target += mul(conv2d_11_tf, min16float4x4(0.30748057, 0.12223383, 0.059069566, 0.18568543, 0.008148904, 0.009438993, 0.053996127, -0.19665428, 0.38345802, 0.20945628, 0.01368962, -0.2834185, -0.15974379, -0.4628119, -0.18307796, 0.22361058)); + target += mul(nconv2d_11_tf, min16float4x4(0.00833237, -0.10446639, -0.028896136, -0.18917766, -0.24016596, -0.034934085, -0.013062447, 0.079293504, -0.16635038, -0.11056953, 0.2618598, 0.07227063, 0.057050053, 0.013885738, 0.09385356, -0.27068567)); + target += mul(conv2d_1_tf, min16float4x4(-0.5675842, 0.13328329, -0.0252242, 0.34746942, 0.34712863, 0.13635597, 0.02356317, -0.1617803, -0.16861948, -0.018621348, 0.02680753, 0.30408886, -0.034069773, 0.08948961, -0.057724215, 0.111602895)); + target += mul(nconv2d_1_tf, min16float4x4(-0.03835732, -0.11742271, 0.025922403, 0.24378933, -0.36450952, -0.15091905, 0.1214089, 0.21004228, 0.28717628, 0.17053549, 0.10836553, -0.08449643, 0.17507422, -0.03195037, -0.03947606, 0.050725944)); + target += mul(conv2d_4_tf, min16float4x4(-0.21257977, -0.0043600267, -0.12929972, -0.233982, -0.26728988, -0.21511734, 0.07835361, -0.24275993, -0.359975, -0.23956355, -0.07852281, 0.40282407, 0.17184453, 0.11672362, 0.0433819, -0.032416925)); + target += mul(nconv2d_4_tf, min16float4x4(0.20235331, 0.16114245, 0.015931258, -0.17612378, 0.2449233, 0.0031623375, -0.2784109, 0.3347522, 0.46005112, 0.20291579, 0.13030154, -0.23390344, -0.39526668, -0.09738018, 0.013237711, 0.15512206)); + target += mul(conv2d_7_tf, min16float4x4(-0.1434995, -0.12447443, 0.095140964, -0.08841888, -0.05424789, -0.11747197, -0.097216785, 0.12958516, 0.34194428, 0.111434594, -0.02794559, -0.22843723, -0.043816507, -0.16116165, -0.29044297, 0.33768278)); + target += mul(nconv2d_7_tf, min16float4x4(0.39615574, 0.05410518, -0.07885892, -0.22024721, 0.011598219, 0.1446308, 0.11650995, -0.020602686, -0.51892537, 0.14221898, -0.01697185, 0.05188913, 0.07683384, 0.122416414, 0.02296055, 0.2932525)); + target += mul(conv2d_10_tf, min16float4x4(-0.058334768, -0.12389275, -0.02024463, 0.46323973, 0.17553197, 0.35435143, 0.19796194, 0.06836581, 0.15947883, -0.056819815, -0.091066726, 0.22499265, -0.21629064, -0.22203816, 0.053594038, 0.09816408)); + target += mul(nconv2d_10_tf, min16float4x4(-0.016514458, -0.14323495, 0.017527288, -0.19750872, -0.47891942, -0.073656894, -0.086305656, 0.38173944, 0.1016976, 0.15224999, 0.048396923, -0.19529565, 0.13985658, 0.07292602, 0.06549534, 0.210662)); + target += mul(conv2d_13_tf, min16float4x4(0.3459035, 0.0071707424, -0.019186711, 0.2527976, 0.29675815, 0.35949966, -0.06114439, -0.02610484, 0.5475115, -0.13828747, 0.019238133, 0.101953685, -0.52718824, 0.017254699, 0.08887026, -0.19507161)); + target += mul(nconv2d_13_tf, min16float4x4(-0.3064509, -0.031613164, 0.040971015, -0.24252266, -0.21725285, -0.35069898, 0.0951283, -0.065222666, -0.98867434, 0.08824426, 0.06094605, -0.21000125, -0.72066385, -0.34141323, 0.049487203, 0.0690126)); + target += min16float4(0.25545248, -0.112931795, -0.073284395, 0.29349956); + tex10[gxy] = target; +} + +//!PASS 7 +//!DESC Conv-3x3x3x24 +//!IN INPUT, tex4, tex5, tex10 +//!OUT OUTPUT +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass7(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { + return; + } + + const float2 outputPt = GetOutputPt(); + const float2 pos = (gxy + 0.5f) * outputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + min16float4 a1 = tex4.SampleLevel(sam1, pos - outputPt, 0); + min16float4 b1 = tex4.SampleLevel(sam1, pos + float2(-outputPt.x, 0), 0); + min16float4 c1 = tex4.SampleLevel(sam1, pos + float2(-outputPt.x, outputPt.y), 0); + min16float4 d1 = tex4.SampleLevel(sam1, pos + float2(0, -outputPt.y), 0); + min16float4 e1 = tex4.SampleLevel(sam1, pos, 0); + min16float4 f1 = tex4.SampleLevel(sam1, pos + float2(0, outputPt.y), 0); + min16float4 g1 = tex4.SampleLevel(sam1, pos + float2(outputPt.x, -outputPt.y), 0); + min16float4 h1 = tex4.SampleLevel(sam1, pos + float2(outputPt.x, 0), 0); + min16float4 i1 = tex4.SampleLevel(sam1, pos + outputPt, 0); + + min16float4 na1 = max(-a1, 0); + min16float4 nb1 = max(-b1, 0); + min16float4 nc1 = max(-c1, 0); + min16float4 nd1 = max(-d1, 0); + min16float4 ne1 = max(-e1, 0); + min16float4 nf1 = max(-f1, 0); + min16float4 ng1 = max(-g1, 0); + min16float4 nh1 = max(-h1, 0); + min16float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + min16float4 a2 = tex5.SampleLevel(sam1, pos - outputPt, 0); + min16float4 b2 = tex5.SampleLevel(sam1, pos + float2(-outputPt.x, 0), 0); + min16float4 c2 = tex5.SampleLevel(sam1, pos + float2(-outputPt.x, outputPt.y), 0); + min16float4 d2 = tex5.SampleLevel(sam1, pos + float2(0, -outputPt.y), 0); + min16float4 e2 = tex5.SampleLevel(sam1, pos, 0); + min16float4 f2 = tex5.SampleLevel(sam1, pos + float2(0, outputPt.y), 0); + min16float4 g2 = tex5.SampleLevel(sam1, pos + float2(outputPt.x, -outputPt.y), 0); + min16float4 h2 = tex5.SampleLevel(sam1, pos + float2(outputPt.x, 0), 0); + min16float4 i2 = tex5.SampleLevel(sam1, pos + outputPt, 0); + + min16float4 na2 = max(-a2, 0); + min16float4 nb2 = max(-b2, 0); + min16float4 nc2 = max(-c2, 0); + min16float4 nd2 = max(-d2, 0); + min16float4 ne2 = max(-e2, 0); + min16float4 nf2 = max(-f2, 0); + min16float4 ng2 = max(-g2, 0); + min16float4 nh2 = max(-h2, 0); + min16float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + min16float4 a3 = tex10.SampleLevel(sam1, pos - outputPt, 0); + min16float4 b3 = tex10.SampleLevel(sam1, pos + float2(-outputPt.x, 0), 0); + min16float4 c3 = tex10.SampleLevel(sam1, pos + float2(-outputPt.x, outputPt.y), 0); + min16float4 d3 = tex10.SampleLevel(sam1, pos + float2(0, -outputPt.y), 0); + min16float4 e3 = tex10.SampleLevel(sam1, pos, 0); + min16float4 f3 = tex10.SampleLevel(sam1, pos + float2(0, outputPt.y), 0); + min16float4 g3 = tex10.SampleLevel(sam1, pos + float2(outputPt.x, -outputPt.y), 0); + min16float4 h3 = tex10.SampleLevel(sam1, pos + float2(outputPt.x, 0), 0); + min16float4 i3 = tex10.SampleLevel(sam1, pos + outputPt, 0); + + min16float4 na3 = max(-a3, 0); + min16float4 nb3 = max(-b3, 0); + min16float4 nc3 = max(-c3, 0); + min16float4 nd3 = max(-d3, 0); + min16float4 ne3 = max(-e3, 0); + min16float4 nf3 = max(-f3, 0); + min16float4 ng3 = max(-g3, 0); + min16float4 nh3 = max(-h3, 0); + min16float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + min16float3 target = mul(a1, min16float4x3(0.009331738, 0.018572107, 0.022010602, 0.0039357482, -0.016444422, -0.02944063, -0.03631314, -0.056094132, -0.050672945, 0.0077923858, -0.023002634, 0.021950275)); + target += mul(b1, min16float4x3(-0.015352033, -0.018134398, -0.031076321, 0.09254242, 0.07433854, 0.094745025, 0.09154548, 0.10833595, 0.084574744, -0.06755486, 0.022037052, -0.09424632)); + target += mul(c1, min16float4x3(0.019884977, 0.021337362, 0.026944455, 0.11712925, 0.021360623, -0.017487818, -0.14924358, -0.1149652, -0.12671575, 0.012104617, -0.039750118, -0.002691512)); + target += mul(d1, min16float4x3(0.00344861, -0.0071971808, -0.011530234, 0.039175995, 0.12297611, 0.15838134, 0.033669177, 0.018021118, -0.010552058, -0.048705686, 0.03920792, -0.00043378037)); + target += mul(e1, min16float4x3(-0.09026871, -0.09393277, -0.0849584, -0.16249315, -0.15300421, -0.1383744, -0.10384136, -0.04767781, 0.022754611, 0.14949107, 0.06619118, 0.016498014)); + target += mul(f1, min16float4x3(0.0138111375, 0.0033931104, 0.010171692, -0.037168514, -0.029690385, -0.045251988, 0.074186325, 0.056937214, 0.06968052, -0.057218343, -0.060974367, -0.030270662)); + target += mul(g1, min16float4x3(-0.0028436058, 0.010812401, 0.012844112, 0.050941236, -0.035253663, -0.061899442, -0.01614737, 0.01752726, -0.023620276, -0.04961744, -0.06673698, 0.039417736)); + target += mul(h1, min16float4x3(0.042587858, 0.03904053, 0.028782798, -0.09807107, -0.01929461, -0.034585416, 0.029584344, -0.053522006, 0.0068953806, -0.04451219, 0.018451538, -0.029895604)); + target += mul(i1, min16float4x3(-0.0041629653, 0.0070575047, 0.004515914, 0.043267716, 0.0020689464, 0.010954458, -0.0013374113, 0.009222025, -0.0272451, 0.00645634, -0.037133303, -0.03209227)); + target += mul(a2, min16float4x3(-0.010575585, -0.00065620174, -0.009598815, -0.068592854, -0.06461729, -0.05058234, 0.03790364, 0.044340994, 0.049410254, -0.009466368, 0.081484325, 0.07265021)); + target += mul(b2, min16float4x3(-0.01598744, -0.025267042, -0.010857686, 0.0771284, 0.081469566, 0.07138724, -0.00555409, -0.006099002, -0.02123016, -0.29761449, -0.10614364, -0.1027762)); + target += mul(c2, min16float4x3(0.02664693, 0.027294884, 0.019080907, 0.010511018, 0.01179118, 0.02403106, 0.05436632, 0.07234358, 0.08310484, 0.03146414, -0.02122628, -0.021377526)); + target += mul(d2, min16float4x3(0.027889153, 0.018621879, 0.025370836, -0.14017807, -0.14772555, -0.14436993, -0.017539013, -0.028932836, -0.06139342, 0.0007456944, -0.00086823467, -0.05282406)); + target += mul(e2, min16float4x3(-0.0017060362, 0.00777287, 0.003343087, 0.20926197, 0.21706305, 0.23307496, -0.16601992, -0.183019, -0.139133, 0.13933188, -0.013340946, -0.021960167)); + target += mul(f2, min16float4x3(-0.018459205, -0.023415336, -0.0173199, 0.08558963, 0.10207333, 0.06444232, -2.5721886e-06, -0.015806457, -0.036833573, -0.20488425, -0.009690944, 0.020323949)); + target += mul(g2, min16float4x3(0.010601256, 0.007344732, 0.0056538777, 0.021578439, 0.017345639, 0.0032158173, 0.031785835, 0.04436094, 0.05920955, 0.23948166, -0.06085234, -0.14597872)); + target += mul(h2, min16float4x3(0.00777581, 0.012557825, 0.0123206265, -0.0691877, -0.0861206, -0.077578135, -0.018104369, -0.024902673, -0.036656447, 0.10611258, 0.09515675, 0.118361965)); + target += mul(i2, min16float4x3(0.0021278602, 0.003906813, 0.0016891633, -0.06379228, -0.060215514, -0.051921096, 0.039505195, 0.052035928, 0.05059492, -0.047328927, -0.0066980706, 0.09447027)); + target += mul(a3, min16float4x3(0.18920127, -0.045531996, -0.044905778, 0.013732142, 0.019208554, 0.011500921, -0.0040531917, -0.02001873, -0.0023935249, -0.033091005, -0.017751431, -0.009764133)); + target += mul(b3, min16float4x3(0.15241088, -0.13676398, -0.01825122, -0.003517022, -0.004041717, 0.003177141, 0.011362495, 0.03685609, 0.008397426, -0.08597375, -0.111830845, -0.110682696)); + target += mul(c3, min16float4x3(-0.046171717, 0.23827009, -0.119844295, 0.005446854, 0.00826863, 0.002206898, -0.11165099, -0.14702465, -0.1203897, 0.12169146, 0.11585612, 0.10473949)); + target += mul(d3, min16float4x3(-0.18456058, 0.13293917, 0.06901046, 0.010084839, -0.0006403412, -0.011852079, -0.062180433, -0.06781299, -0.08111614, -0.02218764, -0.015271581, -0.019768957)); + target += mul(e3, min16float4x3(0.034135204, -0.20479187, 0.27587336, -0.058966126, -0.065613195, -0.056132246, 0.07697151, 0.0706985, 0.098771244, 0.06747748, 0.10971204, 0.13186967)); + target += mul(f3, min16float4x3(0.017322296, -0.06730298, 0.07034802, 0.013449086, 0.007968637, 0.012679429, 0.0902275, 0.11269024, 0.08805874, -0.06179092, -0.06705483, -0.13040404)); + target += mul(g3, min16float4x3(-0.052505482, -0.018989135, 0.03388015, -0.068704374, -0.05350174, -0.057223134, 0.011537428, 0.017847707, 0.0270268, -0.008713432, -0.02698126, -0.017463546)); + target += mul(h3, min16float4x3(0.15220639, -0.05387876, -0.08352881, 0.026893694, 0.027608246, 0.025959803, 0.035518423, 0.035180617, 0.01858579, -0.021064412, -0.014214504, -0.0051168953)); + target += mul(i3, min16float4x3(-0.11906418, 0.13103563, -0.06997703, 0.005664134, 0.0075536724, 0.009519002, -0.025366528, -0.013528652, -0.015087253, 0.0071858848, -0.027586544, 0.016723866)); + target += mul(na1, min16float4x3(0.015307254, 0.02070064, 0.012568325, 0.06845904, -0.033312738, -0.0058661965, -0.016281582, -0.01631146, -0.021667928, -0.012522515, -0.020992521, -0.015833912)); + target += mul(nb1, min16float4x3(0.04937768, 0.0405066, 0.041023023, 0.05503905, -0.13230717, -0.14439866, 0.01618014, 0.0122084245, 0.016226485, 0.0014116488, 0.011495032, 0.002382562)); + target += mul(nc1, min16float4x3(-0.04847043, -0.050508745, -0.041216835, -0.067119725, -0.0448592, -0.011477939, -0.035635237, -0.037191708, -0.034170575, -0.016549444, -0.027191242, -0.017883684)); + target += mul(nd1, min16float4x3(0.034498286, 0.026938718, 0.052970096, -0.10511612, -0.13200648, -0.09493861, -0.0018118658, -0.0072637545, 0.0043198126, -0.038338073, -0.031448375, -0.035546694)); + target += mul(ne1, min16float4x3(0.048043568, 0.057704087, 0.06386534, 0.04542113, 0.20604704, 0.2598609, 0.049180254, 0.064697154, 0.05789202, 0.08370016, 0.08105142, 0.08807082)); + target += mul(nf1, min16float4x3(-0.018156562, 0.008306473, -0.014604633, 0.18912326, 0.024388695, -0.08006485, 0.009333483, 0.011596536, 0.0056475243, 0.027749287, 0.039271932, 0.02655462)); + target += mul(ng1, min16float4x3(-0.030157864, -0.035259083, -0.05771176, -0.22293729, 0.0768592, 0.14670776, -0.013287718, -0.011300663, -0.01670879, -0.009928094, -0.016364388, -0.013879692)); + target += mul(nh1, min16float4x3(-0.013415757, -0.013257486, -0.01940959, 0.014077903, 0.05088362, 0.04006286, -0.0033998038, -0.0062313867, -0.00833104, 0.015246904, 0.017004015, 0.01802002)); + target += mul(ni1, min16float4x3(-0.0016801689, -0.022088053, 0.0031654288, 0.027371893, -0.007083684, -0.10904292, -0.015408179, -0.01793058, -0.010933266, -0.023707654, -0.026440954, -0.025527867)); + target += mul(na2, min16float4x3(0.009003153, 0.0078040734, 0.037757806, 0.054483943, 0.058831017, 0.060899608, -0.011133613, -0.01601666, -0.007977876, -0.07686641, -0.049250316, -0.045481566)); + target += mul(nb2, min16float4x3(0.04344093, 0.07054628, 0.037604738, -0.0914579, -0.105631486, -0.108511426, 0.04426105, 0.0492282, 0.048829302, 0.14961997, 0.16839094, 0.16053638)); + target += mul(nc2, min16float4x3(-0.0032967671, -0.019857304, -0.014145445, -0.013525817, 0.001614058, -0.009782301, -0.044629153, -0.07325184, -0.07655591, -0.08667146, 0.024955297, 0.04591592)); + target += mul(nd2, min16float4x3(0.04816059, 0.030722216, 0.032487474, 0.09684092, 0.10024655, 0.101904154, 0.08137448, 0.092595905, 0.1118598, 0.0796932, 0.009548236, 0.0013610915)); + target += mul(ne2, min16float4x3(-0.17208904, -0.19137467, -0.17717223, -0.10827683, -0.11960323, -0.1204814, -0.030430049, -0.019306151, -0.05230355, -0.021787236, -0.015395303, -0.093210146)); + target += mul(nf2, min16float4x3(0.04527227, 0.057978027, 0.10569097, -0.1015645, -0.12595437, -0.097537845, 0.060087565, 0.09157804, 0.060251515, 0.05170573, 0.042533275, 0.08233745)); + target += mul(ng2, min16float4x3(-0.01908824, 0.0039797956, -0.015060464, 0.008187719, 0.013936167, 0.008152853, -0.02618239, -0.056918032, -0.0504624, -0.083657, 0.02122987, 0.022906482)); + target += mul(nh2, min16float4x3(0.058020473, 0.08750743, 0.032107625, 0.021999976, 0.030119067, 0.03513493, 0.06583862, 0.08137626, 0.09867312, -0.0021064964, -0.1227668, -0.0912879)); + target += mul(ni2, min16float4x3(0.022279112, -0.012710205, -0.0011416139, 0.05606448, 0.066590145, 0.061043978, -0.008292685, -0.019583363, -0.006212003, -0.053282585, -0.029954918, -0.021437356)); + target += mul(na3, min16float4x3(0.019198919, 0.020138288, 0.02048463, -0.012281223, -0.01964347, -0.010557296, 0.00830553, 0.02714052, 0.016606145, -0.0047117253, -0.0060619717, 0.0015284229)); + target += mul(nb3, min16float4x3(-0.01620369, -0.018634152, -0.018486649, -0.0037721654, -0.005256878, -0.0032221128, 0.048627518, 0.033200823, 0.05459796, 0.0064762663, 0.005607537, 0.0014544157)); + target += mul(nc3, min16float4x3(-0.0049319286, -0.003757374, -0.008033526, -0.009529666, -0.01023788, -0.011724289, 0.08779079, 0.11368912, 0.10699827, 0.014564745, 0.017019482, 0.018130492)); + target += mul(nd3, min16float4x3(-0.018128838, -0.020529313, -0.021291668, 0.022232227, 0.032956265, 0.030233478, 0.057042982, 0.052126013, 0.039634123, 0.04395578, 0.042147905, 0.047779605)); + target += mul(ne3, min16float4x3(-0.008916549, -0.011398656, -0.006473247, 0.07594334, 0.07910866, 0.0726948, -0.1670962, -0.17030263, -0.18856722, 0.0067814733, 0.01550948, 0.002108076)); + target += mul(nf3, min16float4x3(-0.0020052418, -0.0015789939, 0.0024248413, -0.018381692, -0.012541983, -0.016114611, -0.054943718, -0.08546223, -0.045788202, -0.02116913, -0.02479526, -0.02281286)); + target += mul(ng3, min16float4x3(0.004089441, 0.004577225, 0.009165186, -0.023352642, -0.03344756, -0.03359231, 0.051127084, 0.055484984, 0.06788994, -0.009284511, -0.0026670755, -0.011205212)); + target += mul(nh3, min16float4x3(-0.008048874, -0.003658728, -0.011127851, 0.0034879802, 0.014905489, 0.016252292, -0.07353042, -0.0754597, -0.09509333, 0.009990113, -0.0003871956, 0.0049740863)); + target += mul(ni3, min16float4x3(0.009073377, 0.006138898, 0.006741848, -0.009877169, -0.019738095, -0.015525384, 0.057441086, 0.06538757, 0.053950094, -0.0011834118, 0.0010558038, 0.004649949)); + target += min16float3(-0.008654677, -0.008960475, -0.009207461); + + OUTPUT[gxy] = float4(target + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); +} diff --git a/src/Effects/Anime4K/Anime4K_Upscale_GAN_x2_S.hlsl b/src/Effects/Anime4K/Anime4K_Upscale_GAN_x2_S.hlsl index 1331fb58b..0ee08c7aa 100644 --- a/src/Effects/Anime4K/Anime4K_Upscale_GAN_x2_S.hlsl +++ b/src/Effects/Anime4K/Anime4K_Upscale_GAN_x2_S.hlsl @@ -1,15 +1,18 @@ // Anime4K_Upscale_GAN_x2_S -// 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Upscale/Anime4K_Upscale_GAN_x2_S.glsl +// 移植自 https://github.com/bloc97/Anime4K/blob/8e39551ce96ed172605c89b7dd8be855b5502cc9/glsl/Upscale/Anime4K_Upscale_GAN_x2_S.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 - +//!VERSION 4 +//!SORT_NAME Anime4K_Upscale_GAN_x2_1 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -696,12 +699,14 @@ void Pass6(uint2 blockStart, uint3 threadId) { //!PASS 7 //!DESC Conv-3x3x3x16 //!IN tex6, tex8, INPUT +//!OUT OUTPUT //!BLOCK_SIZE 8 //!NUM_THREADS 64 void Pass7(uint2 blockStart, uint3 threadId) { uint2 gxy = Rmp8x8(threadId.x) + blockStart; - uint2 outputSize = GetOutputSize(); + + const uint2 outputSize = GetOutputSize(); if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -810,5 +815,5 @@ void Pass7(uint2 blockStart, uint3 threadId) { result += mul(ni2, float4x3(0.068098865, 0.07742245, 0.04117883, -0.07239023, -0.0048315763, -0.0029638975, -0.053049978, 0.121163346, 0.048760712, -0.033619802, -0.010043663, -0.012648383)); result += float3(0.00016753975, -0.00019302216, -0.0001663917); - WriteToOutput(gxy, result + INPUT.SampleLevel(sam1, pos, 0).rgb); + OUTPUT[gxy] = float4(result + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Upscale_GAN_x3_L.hlsl b/src/Effects/Anime4K/Anime4K_Upscale_GAN_x3_L.hlsl new file mode 100644 index 000000000..1ea2119d1 --- /dev/null +++ b/src/Effects/Anime4K/Anime4K_Upscale_GAN_x3_L.hlsl @@ -0,0 +1,1932 @@ +// Anime4K_Upscale_GAN_x3_L +// 移植自 https://github.com/bloc97/Anime4K/blob/8e39551ce96ed172605c89b7dd8be855b5502cc9/glsl/Upscale/Anime4K_Upscale_GAN_x3_L.glsl + +//!MAGPIE EFFECT +//!VERSION 4 + +// 圆括号内的输入只被采样一次 +// INPUT -> tf, tf1, tf2 +// tf, tf1, tf2 -> 1_tf, 3_tf, 3_tf1, 3_tf2 +// 3_tf, 3_tf1, 3_tf2, (1_tf) -> 4_tf, 6_tf, 6_tf1, 6_tf2 +// 6_tf, 6_tf1, 6_tf2, (1_tf), (4_tf) -> 7_tf, 9_tf, 9_tf1, 9_tf2 +// 9_tf, 9_tf1, 9_tf2, (1_tf), (4_tf), (7_tf) -> 11_tf, 10_tf, 12_tf, 12_tf1, 12_tf2 +// 12_tf, 12_tf1, 12_tf2, 11_tf, (1_tf), (4_tf), (7_tf), (10_tf) -> 0ups, 0ups1, 0ups2 +// 0ups, 0ups1, 0ups2 -> 1ups, 1ups1 +// (INPUT), 1ups, 1ups1 -> OUTPUT + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 3 +//!HEIGHT INPUT_HEIGHT * 3 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex2; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex3; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex4; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex5; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex6; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex7; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex8; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex9; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex10; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R16G16B16A16_FLOAT +Texture2D tex11; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 3 +//!HEIGHT INPUT_HEIGHT * 3 +//!FORMAT R16G16B16A16_FLOAT +Texture2D conv1ups; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 3 +//!HEIGHT INPUT_HEIGHT * 3 +//!FORMAT R16G16B16A16_FLOAT +Texture2D conv1ups1; + + +//!PASS 1 +//!DESC Conv-4x3x3x3 +//!IN INPUT +//!OUT tex1, tex2, tex3 +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 + +void Pass1(uint2 blockStart, uint3 threadId) { + uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + float2 inputPt = GetInputPt(); + + uint i, j; + + min16float3 src[4][4]; + [unroll] + for (i = 0; i <= 2; i += 2) { + [unroll] + for (j = 0; j <= 2; j += 2) { + float2 tpos = (gxy + uint2(i, j)) * inputPt; + const min16float4 sr = INPUT.GatherRed(sam, tpos); + const min16float4 sg = INPUT.GatherGreen(sam, tpos); + const min16float4 sb = INPUT.GatherBlue(sam, tpos); + + // w z + // x y + src[i][j] = min16float3(sr.w, sg.w, sb.w); + src[i][j + 1] = min16float3(sr.x, sg.x, sb.x); + src[i + 1][j] = min16float3(sr.z, sg.z, sb.z); + src[i + 1][j + 1] = min16float3(sr.y, sg.y, sb.y); + } + } + + [unroll] + for (i = 1; i <= 2; ++i) { + [unroll] + for (j = 1; j <= 2; ++j) { + uint2 destPos = gxy + uint2(i - 1, j - 1); + + if (i != 1 || j != 1) { + if (destPos.x >= inputSize.x || destPos.y >= inputSize.y) { + continue; + } + } + + min16float4 result = mul(src[i - 1][j - 1], min16float3x4(-0.26345107, 0.18636681, 0.068192646, 0.06335259, -0.5026903, -0.39884016, -0.14562744, -0.24653248, -0.44533378, 0.52169526, -0.35453957, 0.25303423)); + result += mul(src[i - 1][j], min16float3x4(-0.22396083, 0.1324318, 0.47152156, -0.3061965, -0.06026671, -0.26795772, 0.0081171375, -0.32897332, -0.16401465, -0.1018444, 0.48241594, -0.09054633)); + result += mul(src[i - 1][j + 1], min16float3x4(0.25090155, -0.15917313, 0.028407926, -0.24014995, 0.4114972, -0.45535553, 0.08742311, 0.16796699, 0.0995656, -0.4009339, 0.21471445, 0.2708967)); + result += mul(src[i][j - 1], min16float3x4(-0.16269766, 0.16389379, -0.12857921, -0.1602467, -0.16460834, 0.15754342, 0.46217716, 0.20442651, 0.0548621, -0.018400457, 0.38643107, -0.29171357)); + result += mul(src[i][j], min16float3x4(-0.24035631, -0.33344224, -0.3904698, -0.4168555, -0.42237657, 0.36649242, 0.41396108, -0.38945103, -0.5806718, 0.035621256, 0.09171773, -0.54301006)); + result += mul(src[i][j + 1], min16float3x4(0.15957133, -0.035278857, 0.1318051, 0.6896821, 0.18556473, 0.16378926, 0.32670698, 0.2675555, 0.08802092, 0.41140598, 0.05322177, 0.5030955)); + result += mul(src[i + 1][j - 1], min16float3x4(-0.082798496, 0.24381381, -0.30908522, 0.04553323, 0.25664318, 0.4123797, -0.29377607, 0.15920162, 0.13717672, 0.027625162, 0.25476956, 0.21843456)); + result += mul(src[i + 1][j], min16float3x4(0.14534818, -0.239681, 0.22961527, 0.3814783, 0.1233398, 0.2449555, 0.015051085, 0.1661234, -0.27740797, -0.29109767, -0.19438179, -0.027439274)); + result += mul(src[i + 1][j + 1], min16float3x4(0.0011904882, -0.01287622, -0.1573707, -0.13167281, -0.12803882, -0.079415865, -0.04034391, -0.09625339, 0.23190106, -0.26743674, -0.48981485, -0.2063946)); + result += min16float4(0.034235504, 0.039522275, -0.032817896, -0.0031068379); + tex1[destPos] = result; + + result = mul(src[i - 1][j - 1], min16float3x4(-0.17155029, -0.084075995, 0.2281505, 0.38326037, 0.18672232, -0.2562305, 0.30811027, 0.30188802, -0.24588907, 0.088734694, 0.14092724, -0.18793459)); + result += mul(src[i - 1][j], min16float3x4(-0.47514066, 0.51882815, 0.1561294, -0.043147214, -0.19554369, 0.19514531, -0.14636773, 0.11425865, -0.2772368, 0.5388449, 0.54875004, -0.4526634)); + result += mul(src[i - 1][j + 1], min16float3x4(0.11270131, 0.44642356, -0.066219814, 0.15781905, 0.056682296, 0.026522577, 0.05600635, -0.13799536, 0.15637676, -0.15661198, 0.53794587, 0.09693692)); + result += mul(src[i][j - 1], min16float3x4(-0.23679815, 0.16397353, 0.37343305, 0.07477207, -0.36061585, 0.24027273, 0.3222875, 0.05577238, -0.17547923, 0.11737104, 0.10193468, -0.056727592)); + result += mul(src[i][j], min16float3x4(0.2335428, -0.5571976, 0.13586389, -0.3443148, 0.4537042, -0.59349614, -0.24114902, 0.08669349, 0.2881981, -0.29106617, -0.47775048, 0.22723311)); + result += mul(src[i][j + 1], min16float3x4(0.006350133, -0.28196353, 0.22710627, 0.30080464, -0.3500525, 0.09254133, -0.48047104, -0.30452347, -0.077637784, -0.11856046, 0.07377078, 0.44280833)); + result += mul(src[i + 1][j - 1], min16float3x4(0.2200762, 0.3665277, 0.043291833, 0.21484855, 0.15553318, -0.035003938, 0.14891839, -0.29007155, 0.23154758, -0.2348225, 0.48130423, 0.00733271)); + result += mul(src[i + 1][j], min16float3x4(0.28228128, 0.054867495, 0.08010268, -0.2980908, 0.15146615, -0.058449056, -0.43990552, -0.5963296, 0.09321943, 0.20146254, -0.08043876, 0.017381484)); + result += mul(src[i + 1][j + 1], min16float3x4(0.076894, 0.16354772, 0.25471574, 0.24382424, -0.15274979, -0.19706573, -0.30667382, 0.523845, 0.023073493, 0.34462887, -0.3384359, 0.18867111)); + result += min16float4(0.014904483, -0.009271063, 0.04884906, 0.0106121525); + tex2[destPos] = result; + + result = mul(src[i - 1][j - 1], min16float3x4(-0.34360278, -0.28731042, -0.017787619, 0.36802426, 0.33655256, -0.24784079, 0.29148427, 0.28857, -0.3111454, 0.0030706236, -0.25914, 0.5528963)); + result += mul(src[i - 1][j], min16float3x4(0.12459981, -0.17094392, -0.18776429, 0.37819883, 0.1320519, 0.21927781, -0.16188109, 0.050895408, -0.06871313, 0.16754176, 0.29934305, 0.052247107)); + result += mul(src[i - 1][j + 1], min16float3x4(-0.016753385, -0.0935026, -0.3025131, 0.029084548, -0.17713268, 0.23525053, 0.015773006, 0.5464473, 0.49457568, 0.03073306, 0.18685353, 0.28700578)); + result += mul(src[i][j - 1], min16float3x4(0.135332, 0.07585244, 0.05262212, -0.15484884, -0.13468477, 0.5161883, 0.10347934, -0.37127933, 0.12426171, 0.48973167, 0.19040361, -0.24403319)); + result += mul(src[i][j], min16float3x4(-0.54557467, 0.07250278, 0.37912187, 0.0044768555, -0.47080016, -0.4050018, 0.64416456, -0.58235925, -0.28048036, -0.32962233, -0.28131053, 0.022653949)); + result += mul(src[i][j + 1], min16float3x4(0.17059836, 0.016603703, 0.34638256, 0.028987328, 0.43271738, -0.15030707, 0.072848, 0.1422675, -0.23391044, -0.12179815, 0.37569857, -0.056668952)); + result += mul(src[i + 1][j - 1], min16float3x4(-0.0428437, 0.15237094, -0.26750615, 0.053740855, -0.04772152, -0.13561963, -0.20043467, -0.018060924, 0.29031327, -0.17592178, -0.5016104, -0.36639994)); + result += mul(src[i + 1][j], min16float3x4(0.39091983, -0.257284, -0.39293087, -0.1182859, -0.46328986, -0.1585645, -0.32158652, 0.41519204, 0.21179573, -0.3613411, -0.032484483, -0.03755994)); + result += mul(src[i + 1][j + 1], min16float3x4(0.42772895, 0.11436431, -0.115817815, -0.29173127, 0.57807744, -0.21997264, -0.49362126, 0.021626333, 0.1258072, -0.062251803, -0.16541855, 0.061321106)); + result += min16float4(-0.017981518, -0.012223751, -0.0033700857, 0.013441364); + tex3[destPos] = result; + } + } +} + + +//!PASS 2 +//!DESC Conv-4x3x3x24, Conv-4x1x1x40 +//!IN tex1, tex2, tex3 +//!OUT tex4, tex5, tex6, tex7 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass2(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + min16float4 a1 = tex1.SampleLevel(sam, pos - inputPt, 0); + min16float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e1 = tex1.SampleLevel(sam, pos, 0); + min16float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i1 = tex1.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na1 = max(-a1, 0); + min16float4 nb1 = max(-b1, 0); + min16float4 nc1 = max(-c1, 0); + min16float4 nd1 = max(-d1, 0); + min16float4 ne1 = max(-e1, 0); + min16float4 nf1 = max(-f1, 0); + min16float4 ng1 = max(-g1, 0); + min16float4 nh1 = max(-h1, 0); + min16float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + min16float4 a2 = tex2.SampleLevel(sam, pos - inputPt, 0); + min16float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e2 = tex2.SampleLevel(sam, pos, 0); + min16float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i2 = tex2.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na2 = max(-a2, 0); + min16float4 nb2 = max(-b2, 0); + min16float4 nc2 = max(-c2, 0); + min16float4 nd2 = max(-d2, 0); + min16float4 ne2 = max(-e2, 0); + min16float4 nf2 = max(-f2, 0); + min16float4 ng2 = max(-g2, 0); + min16float4 nh2 = max(-h2, 0); + min16float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + min16float4 a3 = tex3.SampleLevel(sam, pos - inputPt, 0); + min16float4 b3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d3 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e3 = tex3.SampleLevel(sam, pos, 0); + min16float4 f3 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i3 = tex3.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na3 = max(-a3, 0); + min16float4 nb3 = max(-b3, 0); + min16float4 nc3 = max(-c3, 0); + min16float4 nd3 = max(-d3, 0); + min16float4 ne3 = max(-e3, 0); + min16float4 nf3 = max(-f3, 0); + min16float4 ng3 = max(-g3, 0); + min16float4 nh3 = max(-h3, 0); + min16float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + min16float4 conv2d_2_tf = mul(a1, min16float4x4(0.1881249, -0.14544061, -0.022969106, 0.088232316, 0.0058642747, -0.049336948, -0.039974928, 0.06410949, -0.09211665, -0.034005307, -0.1095955, 0.10930763, -0.26397142, 0.1384094, 0.017342392, -0.21376696)); + conv2d_2_tf += mul(b1, min16float4x4(-0.14526816, -0.0639951, 0.11742242, -0.006982521, -0.05208895, 0.089485295, -0.19564004, -0.08121572, -0.11621622, 0.15375662, -0.15378582, 0.0596373, 0.14132364, -0.06385903, 0.05449634, -0.047440365)); + conv2d_2_tf += mul(c1, min16float4x4(-0.082622305, -0.23856479, 0.06367865, -0.019509695, 0.094882965, -0.04511791, 0.16706854, 0.20536391, 0.1409632, -0.10635743, 0.038904104, -0.0039008786, 0.16198882, -0.17409256, 0.13213669, 0.08329318)); + conv2d_2_tf += mul(d1, min16float4x4(-0.1186756, 0.2009846, -0.086417995, 0.20491274, -0.13763973, -0.0800847, 0.16069777, 0.10931271, 0.14152408, 0.047218926, 0.041168302, -0.054257084, -0.08315953, -0.1573787, 0.20787828, 0.118524544)); + conv2d_2_tf += mul(e1, min16float4x4(-0.19046788, 0.10213364, -0.112078644, -0.16287695, 0.011410189, -0.016858546, -0.09383451, -0.063516155, -0.17561492, -0.15199865, -0.112707786, -0.18099716, 0.19017689, -0.20048961, -0.5382596, -0.24332014)); + conv2d_2_tf += mul(f1, min16float4x4(-0.1465597, 0.044423096, 0.04632811, -0.015121401, -0.051081203, -0.09574356, -0.10018257, -0.019390205, -0.1562855, 0.041693382, -0.012624074, 0.057703923, 0.09825134, 0.1544577, 0.1683734, 0.018580355)); + conv2d_2_tf += mul(g1, min16float4x4(-0.22240564, -0.051839057, 0.12950379, 0.0048653902, 0.0144696245, -0.10698864, -0.08654499, -0.131132, 0.15429983, 0.025204081, -0.09136411, -0.053068906, -0.005858075, -0.02560129, 0.0469077, 0.018962694)); + conv2d_2_tf += mul(h1, min16float4x4(-0.4698737, -0.053354982, 0.27541625, -0.020424731, 0.06935965, 0.008045162, -0.11538889, -0.038876567, -0.049084928, 0.1629101, -0.012742019, 0.12038333, -0.0705842, 0.12735052, 0.17640172, 0.050716672)); + conv2d_2_tf += mul(i1, min16float4x4(-0.10064598, 0.016594354, -0.14633141, -0.09175336, -0.12889755, -0.1671076, 0.22031903, 0.0759859, 0.102435045, -0.072596334, -0.17714, 0.03531571, -0.022843607, 0.047148425, 0.105391234, 0.05260699)); + conv2d_2_tf += mul(a2, min16float4x4(-0.1057386, 0.020954097, -0.022159133, 0.064248964, -0.031145383, -0.008180922, -0.023611609, 0.05197287, -0.017418958, 0.02461813, 0.0584847, -0.10087345, -0.16315617, 0.15651843, 0.10478647, 0.08347392)); + conv2d_2_tf += mul(b2, min16float4x4(-0.01323452, 0.044956483, -0.007983463, 0.10643116, -0.030048033, -0.11766427, -0.101889476, 0.015120098, 0.031690307, 0.014470776, -0.10197176, -0.10045749, -0.065616645, -0.15230782, -0.26183948, -0.071056716)); + conv2d_2_tf += mul(c2, min16float4x4(0.026220636, -0.044518135, 0.17167594, 0.3016424, 0.12054841, 0.042369425, 0.04208856, 0.14938886, -0.07018442, -0.008244587, 0.14260693, -0.094418734, -0.032693435, 0.042990524, -0.053002246, -0.003936231)); + conv2d_2_tf += mul(d2, min16float4x4(-0.10141095, 0.17178011, -0.10951717, -0.28119737, 0.008288983, 0.14197187, 0.10903869, 0.017220182, 0.041842293, -0.03106527, -0.05892881, 0.02668739, 0.072505936, -0.060759444, 0.00032896115, -0.03440771)); + conv2d_2_tf += mul(e2, min16float4x4(0.13831837, -0.13864368, 0.15232176, 0.31198958, 0.033965178, 0.053397447, -0.30352455, -0.17199865, -0.06429645, 0.013913047, 0.10764071, -0.12238359, -0.04544379, 0.17074125, 0.024108075, -0.14521888)); + conv2d_2_tf += mul(f2, min16float4x4(-0.11011318, -0.0102100335, -0.37701187, -0.36465186, 0.16052358, -0.06683314, 0.16916892, 0.23348652, -0.17332914, 0.007918098, -0.055450343, 0.12134491, 0.002598775, 0.050541576, 0.16586582, -0.08609246)); + conv2d_2_tf += mul(g2, min16float4x4(0.055008903, -0.038048673, 0.12065314, -0.034293417, 0.026340824, 0.0635937, 0.0072025824, 0.1099919, -0.022833373, 0.06988719, 0.098066956, 0.09838032, 0.027212605, -0.10769643, 0.025036965, 0.07822364)); + conv2d_2_tf += mul(h2, min16float4x4(0.12550583, -0.015590264, -0.20009072, -0.2595937, -0.040573828, -0.07032441, 0.13428123, 0.0024277875, -0.103335135, -0.08461066, 0.05634581, -0.113169014, 0.05591198, -0.16420694, -0.06915715, -0.19497992)); + conv2d_2_tf += mul(i2, min16float4x4(0.088232726, -0.05118527, 0.119473234, 0.23262945, 0.06746001, 0.14686997, -0.25685823, 0.08349066, 0.120035954, 0.11132579, 0.1024914, 0.008478224, -0.054700002, -0.029634893, 0.050064556, -0.08939752)); + conv2d_2_tf += mul(a3, min16float4x4(0.022236016, -0.14602192, -0.011037687, 0.09127931, 0.14263593, 0.2303995, -0.07378528, 0.07426219, -0.25500375, 0.18845809, -0.065374866, 0.016772734, 0.02813176, 0.15021992, -0.032982655, 0.0046127643)); + conv2d_2_tf += mul(b3, min16float4x4(0.110158965, 0.02073459, 0.1380525, 0.017634321, -0.3546499, -0.099760525, -0.1195462, 0.057210118, -0.53130746, 0.23352407, -0.18252264, -0.19651698, -0.10013627, -0.006907238, -0.022171183, 0.023419948)); + conv2d_2_tf += mul(c3, min16float4x4(-0.009217382, 0.00943576, 0.005295363, 0.010542551, -0.21079898, -0.14469005, -0.19105618, 0.2098414, 0.18261504, 0.19765937, 0.044775106, -0.25939676, 0.072466746, -0.08828442, 0.066161856, 0.05692894)); + conv2d_2_tf += mul(d3, min16float4x4(-0.051633067, 0.019243274, 0.28932014, -0.029704608, -0.06255436, -0.4573925, -0.10963281, 0.121834375, 0.10874706, -0.093909726, 0.06983889, 0.048236616, -0.15379356, -0.06354611, -0.10668147, -0.02901699)); + conv2d_2_tf += mul(e3, min16float4x4(-0.044167574, 0.022249546, -0.3618917, -0.054136246, -0.105739385, -0.22325896, -0.070169605, -0.19650152, 0.07689512, -0.17047665, -0.07742679, 0.031095566, -0.01903123, -0.033752028, -0.2286711, 0.044381924)); + conv2d_2_tf += mul(f3, min16float4x4(-0.05709193, 0.15251294, -0.16776492, 0.09025173, 0.18235344, 0.3685535, -0.053927444, 0.10351524, -0.0938133, -0.26824594, -0.036424845, -0.106756285, -0.13051414, -0.07613318, -0.10721611, -0.13445549)); + conv2d_2_tf += mul(g3, min16float4x4(-0.0268394, 0.017245602, 0.1185864, 0.031915247, -0.037321728, 0.037805032, 0.13701047, 0.025731707, 0.03791209, -0.16549957, 0.08953334, -0.13901101, -0.1287722, 0.072961085, 0.06859001, 0.18934746)); + conv2d_2_tf += mul(h3, min16float4x4(-0.11152981, 0.13712928, -0.05714947, 0.05542204, -0.32208005, -0.015176284, 0.10014709, -0.030125491, -0.04422843, 0.12897238, 0.108573034, -0.025267191, 0.02247499, -0.058167085, -0.15205052, 0.043249656)); + conv2d_2_tf += mul(i3, min16float4x4(-0.12951276, -0.14417744, 0.012708804, -0.0040302873, 0.09192804, -0.092346616, -0.09659876, -0.13512622, -0.0737095, 0.002481852, 0.048459593, 0.05455724, -0.14035852, 0.07777282, 0.07471883, 0.107781895)); + conv2d_2_tf += mul(na1, min16float4x4(0.028793033, -0.039604917, -0.0045903274, -0.05023892, 0.04976248, -0.026074547, 0.1733191, -0.06694405, -0.12434122, 0.12477937, -0.105804294, 0.06170465, 0.33725888, -0.15944988, 0.09790923, 0.030690596)); + conv2d_2_tf += mul(nb1, min16float4x4(0.005191585, 0.08373177, -0.018288689, 0.020527333, -0.055718876, -0.12754384, 0.17755422, 0.1597085, 0.17601304, -0.0258804, 0.16454586, 0.106551126, -0.20891763, -0.05360957, -0.24229631, -0.15886526)); + conv2d_2_tf += mul(nc1, min16float4x4(0.03740399, -0.0043318006, -0.010840595, -0.01674406, -0.17876416, 0.09188681, -0.12203759, -0.09808559, 0.1243873, -0.184597, 0.07484877, 0.14448164, -0.15161137, 0.033237204, -0.054772068, -0.085399576)); + conv2d_2_tf += mul(nd1, min16float4x4(0.071139924, 0.025827989, 0.021663137, -0.12484576, -0.07799051, 0.20053016, 0.014714873, -0.041652568, 0.046397317, -0.07650734, 0.06753141, 0.080667324, 0.4557549, -0.029605106, -0.25674006, -0.27842438)); + conv2d_2_tf += mul(ne1, min16float4x4(0.16805562, -0.03722638, 0.021958483, -0.04969856, -0.15340807, -0.22158863, -0.25280216, -0.024268134, 0.085401855, 0.22427009, -0.04698029, -0.071075134, -0.10739174, 0.030285811, 0.31068414, 0.2882289)); + conv2d_2_tf += mul(nf1, min16float4x4(-0.010069354, -0.045132317, -0.08054911, 0.19212297, -0.11246117, 0.203382, 0.10145021, 0.1476792, -0.022835081, 0.16916804, -0.018178321, 0.076025024, -0.29570428, -0.007177177, -0.1047155, -0.0178633)); + conv2d_2_tf += mul(ng1, min16float4x4(0.076137505, 0.117270656, -0.077183075, -0.052782975, -0.08236995, 0.053947527, 0.13501388, 0.17139077, -0.2424162, -0.15007298, 0.123724684, 0.09327283, 0.19777925, 0.07314544, -0.18668725, -0.010371631)); + conv2d_2_tf += mul(nh1, min16float4x4(0.15866037, 0.053233996, -0.026709981, -0.1574147, -0.012303242, 0.06893102, 0.031804018, 0.10116885, -0.016902728, -0.082480945, 0.05133729, -0.20160739, -0.012635841, 0.032104325, 0.00968726, -0.018941477)); + conv2d_2_tf += mul(ni1, min16float4x4(-0.02683365, 0.14024723, 0.0020279875, 0.035137076, -0.019948762, 0.3120297, -0.018649966, -0.17814124, -0.14863688, -0.12977526, -0.09194036, 0.19637106, 0.12040974, 0.09383599, 0.10559805, -0.0319509)); + conv2d_2_tf += mul(na2, min16float4x4(-0.07015076, -0.07818044, 0.12413185, -0.0018199648, -0.015275738, -0.21548629, 0.046161238, -0.10475311, 0.082367115, 0.0053079966, 0.09559984, 0.039583992, -0.1681236, -0.23862287, -0.09229484, -0.12317666)); + conv2d_2_tf += mul(nb2, min16float4x4(-0.17587087, -0.097817905, 0.08857801, 0.14012139, -0.20023742, 0.029083535, 0.056073546, -0.06810832, 0.08625035, 0.023427716, 0.1797412, 0.048568305, -0.09278378, -0.09250215, -0.12440772, 0.2587798)); + conv2d_2_tf += mul(nc2, min16float4x4(-0.24181388, -0.016290328, -0.026988767, -0.005399553, -0.061761368, -0.0013004051, -0.1990831, -0.07799404, 0.03282008, 0.079514205, -0.07474829, -0.36701006, 0.078521594, -0.156468, 0.09041213, 0.1292482)); + conv2d_2_tf += mul(nd2, min16float4x4(-0.21960634, 0.041841425, 0.122728646, 0.06800145, 0.07355482, 0.26123464, -0.13518283, -0.05085496, -0.099832244, 0.04960356, 0.066544525, 0.09741243, -0.10965899, -0.16163626, 0.09816793, -0.014595947)); + conv2d_2_tf += mul(ne2, min16float4x4(0.07614604, -0.062298786, -0.07941662, -0.22525579, -0.29955792, 0.11145522, 0.123146005, 0.13863817, 0.15309983, 0.025902487, -0.08610474, -0.07598799, -0.26134565, -0.2818921, 0.0046356185, 0.007307074)); + conv2d_2_tf += mul(nf2, min16float4x4(-0.15936229, -0.10145381, 0.058567517, 0.21258314, -0.18010478, -0.22477242, -0.039975245, -0.34447697, -0.21647838, 0.31467855, -0.0674453, -0.5146147, 0.05382176, -0.026282668, -0.24090777, 0.10222359)); + conv2d_2_tf += mul(ng2, min16float4x4(-0.1045028, -0.027515164, 0.013251722, 0.108239084, 0.03163253, -0.030052185, 0.10836872, 0.15349132, 0.09593661, 0.0062710177, -0.19837233, -0.098303355, -0.23947543, -0.04082913, 0.16908304, -0.031784274)); + conv2d_2_tf += mul(nh2, min16float4x4(-0.07773699, 0.30408737, 0.10054892, 0.36721498, 0.51369953, -0.11931886, -0.17019019, -0.3288588, 0.11095048, -0.29225063, -0.075574756, -0.18392691, -0.10289336, 0.06882282, 0.20403436, 0.12073833)); + conv2d_2_tf += mul(ni2, min16float4x4(0.024539007, 0.053005982, -0.099204265, -0.084534295, -0.2587164, -0.31929657, 0.07193254, 0.18271501, -0.043669797, 0.062497724, -0.055462, 0.057130013, -0.015285072, -0.030743862, -0.07051513, -0.13783172)); + conv2d_2_tf += mul(na3, min16float4x4(-0.4343681, 0.35928357, -0.004770178, -0.079942055, 0.014088603, -0.20866469, -0.1378781, -0.06831558, 0.21436058, -0.08427488, 0.2455502, -0.065596916, -0.06559933, -0.027101375, 0.023555819, -0.20939256)); + conv2d_2_tf += mul(nb3, min16float4x4(-0.37720296, -0.111260146, -0.25392932, -0.33377793, -0.17806955, -0.008747484, 0.17404033, 0.058826912, 0.0039355545, -0.18436235, 0.15803719, 0.15143508, 0.11155828, 0.09333553, -0.17960371, -0.036842924)); + conv2d_2_tf += mul(nc3, min16float4x4(-0.087490946, 0.0959697, -0.08301798, -0.19364063, -0.00996324, 0.014655412, 0.021732382, 0.07269497, 0.012744119, 0.01542146, 0.109438084, 0.18674947, -0.05728511, 0.017406877, 0.036412247, -0.044986803)); + conv2d_2_tf += mul(nd3, min16float4x4(0.30902067, 0.25019556, -0.079495244, -0.26099077, 0.08450634, -0.08346094, 0.004498276, -0.119334444, -0.08587327, -0.019446453, -0.1811446, -0.16136086, 0.006683898, 0.0005228834, -0.11937812, -0.2045503)); + conv2d_2_tf += mul(ne3, min16float4x4(0.19326456, -0.052496854, 0.12926556, 0.10167019, 0.090374604, 0.07595169, -0.0048561483, 0.12414255, 0.19320521, -0.027459998, 0.08993327, -0.035830285, 0.006461366, 0.023297347, 0.0691706, -0.00831113)); + conv2d_2_tf += mul(nf3, min16float4x4(0.13971736, 0.0788502, 0.12267767, 0.004433991, -0.053574555, -0.08087108, -0.26019198, -0.04175351, -0.13934188, 0.04144695, -0.070562504, -0.068388134, -0.1347503, -0.02173245, -0.1099242, -0.020897312)); + conv2d_2_tf += mul(ng3, min16float4x4(0.07843604, 0.04441641, -0.016214373, -0.15351163, -0.021339556, 0.023823377, -0.01442564, -0.09113205, -0.02552644, 0.14885889, -0.16178642, 0.14472331, 0.14082494, 0.05760455, -0.11503234, -0.16907685)); + conv2d_2_tf += mul(nh3, min16float4x4(-0.042953692, -0.3268466, 0.13181087, -0.06399399, 0.17543526, 0.111214496, 0.07369484, -0.003378238, 0.040965978, -0.0073295045, 0.07711077, -0.033094298, -0.08758825, -0.01715938, 0.056862406, -0.010732023)); + conv2d_2_tf += mul(ni3, min16float4x4(-0.039256442, -0.07153648, 0.10314899, -0.1192048, -0.033410206, 0.13077301, 0.19343375, -0.07479033, 0.10759806, -0.037313893, 0.06156247, 0.021744521, -0.18148352, -0.15683053, 0.017884498, -0.11338723)); + conv2d_2_tf += min16float4(-0.077597156, 0.024995416, 0.0048880246, -0.06210122); + min16float4 nconv2d_2_tf = max(-conv2d_2_tf, 0); + conv2d_2_tf = max(conv2d_2_tf, 0); + + min16float4 conv2d_1_tf = mul(a1, min16float4x4(0.10368956, 0.09174666, 0.07265347, 0.009965846, 0.04307676, 0.018726716, 0.064217605, -0.024381645, 0.013237381, 0.039251406, 0.13164084, -0.05265028, -0.08619517, 0.015469731, 0.10171868, -0.11194108)); + conv2d_1_tf += mul(b1, min16float4x4(-0.055484463, 0.1386706, -0.22939423, -0.2222723, 0.04815343, 0.05425625, 0.08234074, 0.12962975, 0.030559294, -0.07823733, 0.12347866, -0.13917705, -0.031347297, 0.010592373, -0.38942683, -0.302033)); + conv2d_1_tf += mul(c1, min16float4x4(0.06968848, -0.03574659, 0.11817242, 0.044270225, 0.0481696, 0.045347195, -0.14479072, 0.06971279, 0.012434736, 0.03927546, 0.13076504, 0.032268204, 0.040274065, 0.053418823, -0.05195065, 0.1341056)); + conv2d_1_tf += mul(d1, min16float4x4(0.1314648, 0.08953099, -0.058160458, -0.098807305, -0.08652445, -0.19136623, -0.012327089, 0.14297265, 0.11436408, 0.031837817, -0.0038611747, 0.08295747, 0.19534546, -0.033664998, -0.51042134, -0.21606028)); + conv2d_1_tf += mul(e1, min16float4x4(-0.332711, -0.2260786, 0.35732532, 0.026584813, 0.16421017, 0.21153966, -0.112725854, -0.048803244, 0.059562314, -0.010458478, 0.0063304375, -0.007279937, -0.41918445, 0.10137393, -0.0989079, -0.17768846)); + conv2d_1_tf += mul(f1, min16float4x4(-0.22947264, 0.008074958, -0.03876367, 0.28019628, -0.18640186, 0.072562195, -0.001338717, 0.17349707, 0.13131878, 0.05085823, -0.11547487, -0.084437385, -0.18131672, 0.026830718, 0.0960529, -0.014084568)); + conv2d_1_tf += mul(g1, min16float4x4(0.13153158, 0.079937235, -0.14291838, -0.062477887, -0.0690248, 0.15090927, 0.060723048, -0.044703092, 0.005483621, -0.113471694, 0.048640195, -0.024538955, -0.01751092, 0.19206041, -0.1859277, -0.22007878)); + conv2d_1_tf += mul(h1, min16float4x4(-0.04971548, -0.38541326, -0.080354154, -0.1132633, -0.13348146, 0.11406493, 0.05543971, 0.022810424, -0.09030199, -0.053045455, -0.084034644, 0.0014670533, 0.0007018557, -0.24078067, 0.047226585, 0.08619653)); + conv2d_1_tf += mul(i1, min16float4x4(-0.08993396, -0.09246378, 0.11467184, 0.060891952, -0.022887891, -0.008537377, 0.13542707, 0.08030356, -0.06174077, -0.07314582, -0.111782126, -0.08939319, -0.09756803, -0.15771574, 0.073002145, 0.035939205)); + conv2d_1_tf += mul(a2, min16float4x4(-0.09398606, -0.118093155, 0.024832802, 0.049131367, 0.06665196, -0.039545495, -0.107865654, -0.043897964, -0.03278348, -0.111089505, 0.12056342, -0.10977613, -0.05880801, -0.08684503, -0.15480064, -0.09669209)); + conv2d_1_tf += mul(b2, min16float4x4(-0.12028866, -0.0130571015, 0.010480521, 0.28919983, 0.050575808, -0.07968808, -0.15499628, -0.13613448, 0.030993043, 0.13226634, -0.12666325, -0.010337325, -0.025353834, 0.017561335, -0.08171704, -0.17280379)); + conv2d_1_tf += mul(c2, min16float4x4(-0.0008190666, 0.017923795, -0.13926646, -0.00083633314, -0.14120303, 0.109396234, 0.026602108, 0.2108425, 0.15093753, -0.0016773659, 0.028220268, 0.09914804, -0.045055833, 0.040082425, 0.007756443, -0.04522211)); + conv2d_1_tf += mul(d2, min16float4x4(0.059589684, 0.04780217, 0.30785602, 0.25626636, 0.08686253, 0.11348654, 0.042249523, -0.2264382, -0.058502045, 0.05044742, 0.0031711252, -0.021721566, -0.011926813, 0.042892855, -0.08586602, -0.029168598)); + conv2d_1_tf += mul(e2, min16float4x4(0.09367661, -0.019030625, -0.34638473, -0.10968469, -0.16300671, 0.21311292, 0.11657136, -0.044009518, 0.10225506, -0.044505168, 0.20920436, -0.018161744, -0.018144146, 0.026626088, -0.056913715, 0.15370414)); + conv2d_1_tf += mul(f2, min16float4x4(-0.28757727, 0.14743091, -0.021321807, -0.048045393, -0.109708, -0.14760888, 0.15246773, -0.028329216, 0.009206364, -0.06396112, 0.12593451, 0.052947026, 0.066429235, -0.08044728, 0.0070432564, -0.057647638)); + conv2d_1_tf += mul(g2, min16float4x4(-0.023919886, -0.20876022, 0.05590491, 0.12671952, -0.07277091, 0.024939056, 0.03633482, -0.10239475, -0.12012349, -0.17192347, 0.014865882, 0.1858935, -0.013352806, -0.04451544, 0.0032296637, 0.09310079)); + conv2d_1_tf += mul(h2, min16float4x4(-0.29340369, 0.1377685, -0.018134177, -0.0819466, 0.2541578, -0.1270915, -0.12300359, 0.114513785, 0.21511158, -0.060876742, 0.07682154, 0.09775888, -0.09133818, 0.04477866, 0.058042303, -0.027626123)); + conv2d_1_tf += mul(i2, min16float4x4(-0.098641984, -0.09568759, 0.27307647, 0.044102278, -0.03640084, -0.10440432, -0.011212675, -0.22568303, -0.008232321, 0.14870772, -0.17107275, -0.023316732, 0.03395947, 0.14223643, -0.08063479, 0.14301774)); + conv2d_1_tf += mul(a3, min16float4x4(-0.08714423, -0.12230681, -0.22175795, -0.10298021, 0.0009175108, 0.19820437, 0.04215484, 0.2772454, 0.046766162, 0.023245906, 0.36313313, -0.29657102, 0.0010776661, 0.047935788, 0.113361314, -0.05614472)); + conv2d_1_tf += mul(b3, min16float4x4(0.15069975, 0.06458973, 0.08984772, -0.08219822, -0.37328726, -0.03008995, 0.31162828, 0.07075847, -0.13914284, -0.10216768, 0.22251949, -0.30631062, 0.17172062, 0.058428258, -0.11345689, 0.08461611)); + conv2d_1_tf += mul(c3, min16float4x4(0.007734305, 0.042484675, -0.15685312, -0.048171967, 0.10970874, 0.061090663, -0.08464978, 0.08347133, -0.17933917, 0.2308347, -0.053314723, 0.09323812, -0.04228206, 0.055042125, -0.046495847, -0.032692812)); + conv2d_1_tf += mul(d3, min16float4x4(-0.09439761, 0.03567186, -0.17220385, -0.103939146, -0.064900115, -0.16004047, 0.004621011, -0.014501001, -0.14071538, -0.05238438, -0.04519603, 0.21972013, -0.007383857, -0.07692677, -0.14034486, 0.08030412)); + conv2d_1_tf += mul(e3, min16float4x4(-0.22748968, 0.12067121, -0.05225513, 0.04308743, -0.081648685, 0.28658885, 0.37694585, -0.018508147, -0.019247225, 0.095557846, 0.015747357, 0.12365868, -0.076417744, -0.03912286, 0.18391648, -0.09244896)); + conv2d_1_tf += mul(f3, min16float4x4(-0.00221828, -0.0894836, 0.038467363, -0.019945016, 0.13546647, 0.17713489, -0.17275713, 0.08575425, -0.019129591, 0.16340882, -0.16357088, -0.0033604207, -0.06446814, -0.15712759, 0.18558913, -0.115558594)); + conv2d_1_tf += mul(g3, min16float4x4(-0.09995351, 0.18885328, -0.057601925, 0.01172547, -0.031203317, -0.1181948, 0.006120215, 0.25098777, -0.06316651, 0.047607217, -0.056073133, -0.029685916, 0.12195799, -0.056664392, -0.054523658, 0.03753435)); + conv2d_1_tf += mul(h3, min16float4x4(0.007936505, -0.021070726, 0.040594626, 0.061293513, -0.074233375, 0.10112329, -0.19424592, -0.14433385, -0.04661142, -0.09192385, 0.034151867, -0.11941847, 0.046759605, -0.15323174, 0.09908571, 0.18290807)); + conv2d_1_tf += mul(i3, min16float4x4(-0.012291647, 0.114136524, 0.10576901, -0.012061901, 0.2356885, 0.048024837, 0.18102467, -0.034004245, -0.06746709, 0.09405117, 0.12362687, 0.0254422, 0.22654915, 0.04224264, -0.049588405, 0.11478716)); + conv2d_1_tf += mul(na1, min16float4x4(-0.021690933, 0.13663062, -0.161411, 0.06806553, -0.1773275, -0.0940566, -0.18002738, 0.047475196, 0.0072157113, -0.008688586, -0.15493456, 0.022294179, 0.041401867, -0.10311516, -0.006603416, 0.059536614)); + conv2d_1_tf += mul(nb1, min16float4x4(-0.13541889, 0.047185, -0.027699882, 0.060225613, -0.035152074, 0.05752177, -0.026204573, 0.11251955, -0.0049166707, 0.17533402, -0.15755837, 0.16124752, 0.04805776, -0.10309488, 0.15945134, 0.025226792)); + conv2d_1_tf += mul(nc1, min16float4x4(-0.015074193, -0.094979845, 0.027753184, -0.071142055, -0.17082961, -0.06833402, 0.13620014, -0.24564765, 0.036582932, 0.13075556, 0.036705326, 0.03863992, -0.018921472, -0.0016482361, 0.13597268, -0.038188133)); + conv2d_1_tf += mul(nd1, min16float4x4(-0.14212462, -0.1483275, 0.05649678, 0.05684924, -0.11407954, 0.13978885, 0.070467845, -0.07458527, -0.19702937, 0.23950967, -0.15242746, -0.26435548, -0.14437793, 0.21487178, 0.4991241, 0.18331984)); + conv2d_1_tf += mul(ne1, min16float4x4(0.20045248, 0.066468574, -0.015601024, 0.012849705, -0.14952832, -0.06828453, 0.16009094, -0.09515789, -0.1071139, -0.021629127, -0.012993768, -0.022518635, 0.19255438, -0.09875012, 0.07555782, 0.0780372)); + conv2d_1_tf += mul(nf1, min16float4x4(-0.028311213, -0.025465565, 0.020059558, -0.116105095, -0.042490575, 0.020179577, 0.010893176, -0.11184776, -0.1702318, -0.025035636, 0.008381181, 0.0586714, 0.03539251, -0.0448198, -0.056921933, -0.029987138)); + conv2d_1_tf += mul(ng1, min16float4x4(0.049813945, 0.08434948, 0.09337763, 0.06701621, -0.061224304, -0.24754077, -0.017353527, -0.042758185, 0.013161995, -0.22947139, 0.019135898, 0.11039477, 0.16954716, -0.25619635, 0.18368678, 0.03542052)); + conv2d_1_tf += mul(nh1, min16float4x4(-0.15430786, 0.07348774, 0.15545642, 0.20969617, 0.1067826, 0.15255202, 0.020220853, 0.09658389, -0.088782035, -0.19119574, 0.13885954, 0.15108526, -0.07552868, -0.11574438, -0.034102093, -0.031383175)); + conv2d_1_tf += mul(ni1, min16float4x4(0.061409608, -0.00082869077, -0.08336049, -0.01866603, 0.07322213, -0.1152386, -0.004205211, -0.18793713, 0.091782115, 0.05387527, 0.069104694, 0.25387684, -0.101916246, 0.065856785, -0.020407397, 0.035098225)); + conv2d_1_tf += mul(na2, min16float4x4(0.06225989, -0.039721318, 0.19908188, 0.08382035, -0.024357362, 0.014932128, -0.060558856, -0.049815435, -0.03166011, 0.0339055, -0.12810327, 0.008812703, 0.06120202, 0.085533425, 0.21571258, -0.20605975)); + conv2d_1_tf += mul(nb2, min16float4x4(-0.045329664, 0.02261115, -0.0335033, -0.058562186, -0.0099387, 0.0046313554, 0.21475597, 0.04558062, 0.17891279, 0.005057579, 0.22518916, 0.1998231, 0.09627137, -0.2318303, -0.08868813, -0.27863982)); + conv2d_1_tf += mul(nc2, min16float4x4(-0.15865076, 0.077262044, 0.036153752, 0.07885703, 0.13166751, -0.12820594, -0.05823962, -0.2583444, -0.2245552, -0.04434666, -0.13453422, -0.27865237, 0.014107271, 0.045582164, 0.0064884513, -0.019007552)); + conv2d_1_tf += mul(nd2, min16float4x4(0.0643133, 0.06440001, -0.14517003, -0.101694606, 0.058990445, 0.11955667, 0.45094532, 0.20261864, 0.07944409, -0.061399437, 0.022036074, 0.046660237, -0.17064287, -0.076766625, 0.25972953, 0.29821205)); + conv2d_1_tf += mul(ne2, min16float4x4(-0.11031386, -0.05850727, 0.055557184, 0.11549242, 0.12120408, -0.33330265, 0.095613986, 0.09242419, -0.011835885, -0.19384164, -0.01893125, 0.27290896, -0.18104021, 0.044360142, 0.06759539, -0.0027218745)); + conv2d_1_tf += mul(nf2, min16float4x4(0.19390257, -0.13378039, 0.07428329, 0.016053686, -0.18574655, 0.055462763, -0.2527128, -0.47279125, -0.17490762, 0.21626428, -0.1473371, -0.35594228, 0.054865763, -0.04086486, -0.061911695, 0.051812805)); + conv2d_1_tf += mul(ng2, min16float4x4(-0.029701848, 0.24927482, 0.00581731, -0.10748679, -0.07500632, 0.033424605, 0.14734372, -0.18966366, 0.031880617, 0.17622112, -0.031867832, -0.10119831, -0.15391265, -0.14308685, 0.093484215, 0.18867014)); + conv2d_1_tf += mul(nh2, min16float4x4(0.19035357, -0.19525306, -0.025621792, 0.09154427, -0.07798503, -0.22271548, 0.11034287, -0.04197031, -0.24772005, 0.43681505, -0.19703668, -0.2614237, 0.05807699, -0.2631317, -0.020604266, -0.048005704)); + conv2d_1_tf += mul(ni2, min16float4x4(-0.08587588, 0.13374045, -0.09263761, -0.13216262, -0.11242246, -0.12541875, -0.09835177, 0.1586739, -0.21013282, 0.087373346, 0.107112356, 0.47657737, 0.0459955, -0.07181196, 0.07818155, -0.10435423)); + conv2d_1_tf += mul(na3, min16float4x4(-0.091803394, -0.32280564, 0.28972253, 0.12908047, 0.06683764, -0.039376236, 0.024078066, 0.18940936, -0.055246543, 0.12222864, -0.0177199, 0.09346665, 0.07164098, 0.065791056, -0.08516637, -0.10187257)); + conv2d_1_tf += mul(nb3, min16float4x4(-0.12561126, -0.28730518, 0.190799, -0.17922764, 0.04376582, -0.08152354, -0.0690038, -0.10861494, -0.03100546, 0.10962334, -0.20492296, 0.12868984, 0.06536495, 0.08559974, 0.033028, -0.07235402)); + conv2d_1_tf += mul(nc3, min16float4x4(-0.012734173, -0.12211726, 0.057524282, 0.015053666, -0.052275516, 0.11774483, 0.08221696, -0.024205929, 0.122006595, 0.054565493, -0.049608365, 0.02801238, 0.07593017, 0.074450806, 0.097137615, -0.008985974)); + conv2d_1_tf += mul(nd3, min16float4x4(-0.32826158, -0.022971062, 0.37642807, 0.38614145, -0.06932448, 0.0641898, -0.09011684, -0.019884817, -0.004897904, 0.07661578, -0.050405186, -0.24849766, 0.04642452, 0.09120379, 0.26060387, -0.2533109)); + conv2d_1_tf += mul(ne3, min16float4x4(0.09669597, -0.045555357, -0.24132517, -0.28401875, 0.11226361, 0.08378312, -0.07415474, -0.036874313, -0.001286788, 0.14013582, 0.14750466, -0.048925027, 0.13374946, 0.10844033, 0.123459235, -0.10933974)); + conv2d_1_tf += mul(nf3, min16float4x4(-0.03275827, 0.27429518, -0.0983686, -0.010947437, -0.18409865, 0.12616666, -0.05766888, 0.07149005, -0.13777009, 0.022123039, 0.084938325, 0.015972659, 0.20145003, -0.09534558, -0.0082679195, -0.1515079)); + conv2d_1_tf += mul(ng3, min16float4x4(0.13148536, -0.3421452, 0.08851102, 0.012056574, -0.1525749, 0.09364548, -0.02235517, -0.1775178, 0.18052714, -0.14639667, 0.07453223, 0.03912742, -0.284782, 0.023833552, 0.09671063, -0.168578)); + conv2d_1_tf += mul(nh3, min16float4x4(-0.24303597, -0.05585747, -0.21645154, -0.084838174, -0.15413773, -0.15403214, -0.021544017, 0.15751824, -0.027032627, -0.18457665, -0.02174098, -0.0070916233, -0.1609649, -0.32226282, -0.18423033, -0.29629233)); + conv2d_1_tf += mul(ni3, min16float4x4(0.1602529, 0.026087781, 0.01551678, 0.07093837, -0.007075046, -0.0061597642, -0.0057887356, -0.08935906, 0.0028665168, -0.1038671, -0.093715765, -0.035213456, -0.041290607, -0.15825188, 0.11327359, -0.20286629)); + conv2d_1_tf += min16float4(-0.062293675, 0.09216847, 0.010529031, 0.03100192); + tex4[gxy] = conv2d_1_tf; + min16float4 nconv2d_1_tf = max(-conv2d_1_tf, 0); + conv2d_1_tf = max(conv2d_1_tf, 0); + + min16float4 target = mul(e1, min16float4x4(-0.02357968, 0.13800439, 0.054744735, -0.32328397, -0.2263118, -0.3222542, -0.15286992, -0.3053175, -0.20046607, 0.025345843, 0.032755207, 0.40165102, 0.03166696, 0.29110438, 0.28861988, 0.05585125)); + target += mul(e2, min16float4x4(0.11055126, -0.33034575, 0.039494887, -0.17843343, 0.35742196, 0.00032650787, 0.21049741, 0.18823248, -0.1741954, 0.27586365, -0.043366615, 0.02092058, -0.082515135, -0.15504313, 0.13261497, 0.14650741)); + target += mul(e3, min16float4x4(0.39276633, -0.031067554, -0.08830738, -0.23975314, -0.20294978, 0.030291535, 0.4623106, 0.06494191, 0.042467684, -0.28105733, -0.053258326, -0.17269841, 0.09479501, 0.11930515, 0.1258843, 0.11058792)); + target += mul(ne1, min16float4x4(-0.18343425, -0.4381688, -0.08248827, -0.42846557, -0.08277779, 0.45192116, 0.21961756, 0.23076119, -0.2093829, -0.29050866, 0.26212537, -0.25469857, -0.4832557, -0.45126852, -0.35072148, -0.18368497)); + target += mul(ne2, min16float4x4(0.10529696, 0.5964488, 0.13258573, -0.07494986, -0.3341919, 0.19418421, -0.18307082, 0.34982273, -0.0430461, 0.21097268, 0.03212202, -0.015623122, 0.43791813, 0.16207397, 0.123477034, -0.087993294)); + target += mul(ne3, min16float4x4(-0.01878982, 0.007308694, 0.25769314, 0.18407181, 0.00095180905, -0.2600526, -0.31043288, -0.24622385, 0.07832029, 0.05502411, 0.37793204, -0.07329948, -0.28405467, -0.15038961, 0.19259417, 0.105486296)); + target += mul(conv2d_2_tf, min16float4x4(0.047820415, 0.3303589, 0.035807017, -0.41168606, -0.2118325, -0.045765184, -0.15234827, 0.28021428, -0.2084036, -0.40200952, -0.3261011, -0.13480914, -0.06876906, -0.19167677, -0.20444186, -0.44851676)); + target += mul(nconv2d_2_tf, min16float4x4(-0.24726203, -0.0097923195, -0.23193192, 0.31947026, 0.4274281, -0.36929542, 0.10095328, -0.19663717, 0.3244895, 0.49458218, 0.24745567, 0.15722558, 0.43052208, 0.377559, 0.22543637, 0.13009055)); + target += mul(conv2d_1_tf, min16float4x4(0.01817998, 0.111477636, -0.12727399, 0.27395004, 0.19770023, -0.1636959, 0.25407487, -0.24871433, -0.08552937, 0.3223687, 0.30668882, 0.40221208, -0.20192504, 0.14656074, 0.5100356, -0.0948956)); + target += mul(nconv2d_1_tf, min16float4x4(0.40383592, -0.043663148, 0.4813348, 0.10317451, -0.049076255, -0.022925228, 0.0872564, 0.21741754, 0.23656987, -0.22309794, -0.2260013, 0.20823886, -0.055542476, 0.016604664, -0.1964831, 0.11962174)); + target += min16float4(-0.049604952, -0.039514415, -0.06137416, -0.0015509313); + tex5[gxy] = target; + + target = mul(e1, min16float4x4(0.029635962, 0.08045753, 0.03622311, 0.06677362, 0.14780864, -0.087087184, 0.22309896, -0.1772139, -0.08716722, 0.1075154, 0.044472143, 0.021324798, 0.10346262, -0.24718447, -0.2489118, 0.4517737)); + target += mul(e2, min16float4x4(0.20637918, -0.11695054, 0.27656725, 0.009858572, -0.62555677, 0.12796827, -0.057749186, -0.02636826, 0.11764726, -0.034879886, -0.062285252, -0.048256125, 0.37146622, -0.17392562, 0.24782267, 0.3184173)); + target += mul(e3, min16float4x4(0.2624149, 0.007052751, 0.1595428, 0.26269603, -0.33775207, -0.66331345, 0.18036188, -0.25012106, -0.15003558, 0.12337829, -0.3230818, 0.06187628, 0.096601635, 0.24300486, -0.13784438, 0.27110842)); + target += mul(ne1, min16float4x4(-0.180413, 0.039972585, 0.48966697, -0.4130023, -0.03654654, -0.27514896, -0.025462124, 0.06652415, 0.28900522, 0.035381883, 0.20655172, 0.0073647103, -0.5028713, -0.0061578755, -0.09185675, -0.52771837)); + target += mul(ne2, min16float4x4(-0.3205473, -0.23172325, -0.20749244, 0.058195353, 0.20280065, -0.106998004, 0.08968707, 0.10981961, -0.13291806, 0.0028465164, 0.11793527, 0.11942547, 0.100123264, -0.14852245, -0.032194547, -0.118260525)); + target += mul(ne3, min16float4x4(0.004620961, -0.13271236, 0.110130526, -0.075169735, 0.35998157, -0.046072174, 0.02044828, -0.1019322, -0.038753018, -0.12328749, -0.28227237, 0.18373057, -0.23704045, 0.20384738, 0.097455874, -0.23102747)); + target += mul(conv2d_2_tf, min16float4x4(0.30397, -0.007688397, -0.2519374, -0.14401323, -0.031671453, 0.10171321, -0.18295656, -0.029794114, 0.19171898, 0.23662621, 0.09319509, -0.3479054, 0.036986895, 0.13572362, 0.1142681, -0.17851138)); + target += mul(nconv2d_2_tf, min16float4x4(-0.19525734, 0.36855492, 0.05751295, -0.12524441, 0.06309533, 0.20228319, -0.07533531, 0.26733333, -0.21407285, -0.2900094, -0.28743416, 0.18039729, -0.27968687, -0.23786859, -0.21049118, -0.006130187)); + target += mul(conv2d_1_tf, min16float4x4(0.34406897, -0.14967814, 0.56049985, -0.18166065, -0.061995413, 0.117799215, 0.3054206, 0.4034068, -0.2116504, -0.6017806, 0.004660423, 0.051566444, 0.4380975, -0.3172436, -0.09930328, -0.16182126)); + target += mul(nconv2d_1_tf, min16float4x4(-0.09316841, 0.036305115, -0.30209473, 0.098138526, -0.012532953, -0.050068337, -0.22571203, -0.30636647, -0.124337815, 0.07323685, -0.15504828, 0.19263308, -0.017216058, 0.34484297, -0.1460544, -0.24951003)); + target += min16float4(0.10388342, 0.00828351, 0.14884935, 0.034392886); + tex6[gxy] = target; + + target = mul(e1, min16float4x4(-0.15275823, 0.31693572, 0.03429309, -0.06982273, 0.08535909, 0.019838037, -0.03189405, 0.3190016, 0.16633914, 0.48730284, -0.27923077, 0.31791112, 0.43154097, 0.005003616, -0.26277873, -0.009333685)); + target += mul(e2, min16float4x4(0.23504019, -0.12419379, 0.07217815, -0.090434305, -0.0380588, -0.14686479, -0.33812302, -0.20242776, -0.20776805, 0.24741934, -0.16489775, 0.07052134, -0.08030772, 0.23784883, -0.28709608, -0.17689173)); + target += mul(e3, min16float4x4(-0.05109775, -0.40860242, -0.003464472, -0.19893257, 0.23186824, -0.12760048, -0.22718583, 0.02299852, 0.27083093, 0.073904194, -0.056870755, -0.35324985, -0.023004858, -0.29591596, -0.020298446, -0.05753052)); + target += mul(ne1, min16float4x4(0.0035456547, -0.37682405, 0.047876693, 0.1168026, 0.015805494, -0.04388269, 0.12970346, 0.2497829, -0.009891778, 0.116980106, 0.13058232, 0.22570355, 0.13866597, 0.036246244, 0.10916998, -0.040503114)); + target += mul(ne2, min16float4x4(-0.25300103, -0.065156855, 0.063345924, 0.11406543, -0.1902478, 0.16440767, 0.043949526, 0.43318078, -0.03932035, -0.08510957, 0.19621156, -0.045045726, -0.08339006, -0.04335483, 0.37129655, -0.22328225)); + target += mul(ne3, min16float4x4(0.16169593, 0.2758587, 0.38249364, 0.12606645, 0.4582731, 0.09374545, -0.10988087, -0.21678255, -0.004099455, -0.09436347, 0.33964127, 0.20880581, -0.06742301, -0.025149476, 0.12146305, 0.5012377)); + target += mul(conv2d_2_tf, min16float4x4(0.11523535, 0.31662583, -0.0709322, -0.066175185, 0.08868106, -0.042457394, 0.32469732, -0.1987238, 0.41399983, 0.015568244, 0.14037918, 0.2879998, -0.32157704, 0.22491854, -0.07769691, 0.2052648)); + target += mul(nconv2d_2_tf, min16float4x4(-0.299831, -0.247278, -0.2011737, -0.3759366, -0.14935663, -0.095033385, 0.06259881, -0.23891686, -0.4340098, 0.07340212, -0.0012697511, -0.16527005, 0.0814454, -0.43962866, -0.3040046, 0.06242604)); + target += mul(conv2d_1_tf, min16float4x4(0.11802704, 0.2323739, 0.13466287, -0.25053164, -0.08020803, 0.1628004, -0.030645542, -0.40872335, -0.24624921, 0.15931502, 0.40752286, -0.07906199, 0.4286516, -0.1651973, -0.07021073, 0.0867332)); + target += mul(nconv2d_1_tf, min16float4x4(-0.23617363, 0.053548977, -0.14130518, -0.37744048, -0.11805406, -0.13757266, -0.026939899, 0.028020354, 0.24626125, -0.06998214, -0.02793638, 0.10509643, 0.06577935, -0.17211749, -0.12747282, -0.16999653)); + target += min16float4(-0.022106458, -0.012578552, 0.016203664, 0.026009269); + tex7[gxy] = target; +} + + +//!PASS 3 +//!DESC Conv-4x3x3x24, Conv-4x1x1x48 +//!IN tex5, tex6, tex7, tex4 +//!OUT tex8, tex1, tex2, tex3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass3(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + min16float4 a1 = tex5.SampleLevel(sam, pos - inputPt, 0); + min16float4 b1 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c1 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d1 = tex5.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e1 = tex5.SampleLevel(sam, pos, 0); + min16float4 f1 = tex5.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g1 = tex5.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h1 = tex5.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i1 = tex5.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na1 = max(-a1, 0); + min16float4 nb1 = max(-b1, 0); + min16float4 nc1 = max(-c1, 0); + min16float4 nd1 = max(-d1, 0); + min16float4 ne1 = max(-e1, 0); + min16float4 nf1 = max(-f1, 0); + min16float4 ng1 = max(-g1, 0); + min16float4 nh1 = max(-h1, 0); + min16float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + min16float4 a2 = tex6.SampleLevel(sam, pos - inputPt, 0); + min16float4 b2 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c2 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d2 = tex6.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e2 = tex6.SampleLevel(sam, pos, 0); + min16float4 f2 = tex6.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g2 = tex6.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h2 = tex6.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i2 = tex6.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na2 = max(-a2, 0); + min16float4 nb2 = max(-b2, 0); + min16float4 nc2 = max(-c2, 0); + min16float4 nd2 = max(-d2, 0); + min16float4 ne2 = max(-e2, 0); + min16float4 nf2 = max(-f2, 0); + min16float4 ng2 = max(-g2, 0); + min16float4 nh2 = max(-h2, 0); + min16float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + min16float4 a3 = tex7.SampleLevel(sam, pos - inputPt, 0); + min16float4 b3 = tex7.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c3 = tex7.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d3 = tex7.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e3 = tex7.SampleLevel(sam, pos, 0); + min16float4 f3 = tex7.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g3 = tex7.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h3 = tex7.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i3 = tex7.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na3 = max(-a3, 0); + min16float4 nb3 = max(-b3, 0); + min16float4 nc3 = max(-c3, 0); + min16float4 nd3 = max(-d3, 0); + min16float4 ne3 = max(-e3, 0); + min16float4 nf3 = max(-f3, 0); + min16float4 ng3 = max(-g3, 0); + min16float4 nh3 = max(-h3, 0); + min16float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + min16float4 conv2d_1_tf = tex4.SampleLevel(sam, pos, 0); + min16float4 nconv2d_1_tf = max(-conv2d_1_tf, 0); + conv2d_1_tf = max(conv2d_1_tf, 0); + + min16float4 conv2d_5_tf = mul(a1, min16float4x4(0.001049049, 0.017747996, -0.067229465, -0.020442853, -0.04868684, 0.09733606, -0.07313501, 0.02070675, 0.01012683, -0.034293324, -0.026002094, 0.008298949, -0.045532364, -0.069049254, 0.109774776, -0.092840426)); + conv2d_5_tf += mul(b1, min16float4x4(0.3071666, 0.108723, -0.018787129, 0.17321438, -0.07934712, 0.11855833, -0.032467257, -0.048425578, -0.091413595, -0.08235019, -0.050003942, -0.007800964, -0.07821158, 0.120108165, -0.15341766, -0.04518874)); + conv2d_5_tf += mul(c1, min16float4x4(-0.0038995466, 0.059817232, -0.13333397, 0.022390908, -0.054531172, -0.12521502, 0.061349645, 0.08832908, 0.015541151, -0.005833245, -0.103023596, -0.031728514, -0.1393958, 0.12932369, -0.024058655, -0.02949061)); + conv2d_5_tf += mul(d1, min16float4x4(0.10676212, -0.0919305, -0.045313094, 0.036725752, -0.2360789, 0.08090541, 0.08044168, -0.088691026, 0.05462964, -0.047420587, 0.011766264, -0.044065233, -0.09330811, -0.04302891, -0.09276843, 0.01615573)); + conv2d_5_tf += mul(e1, min16float4x4(0.14728056, 0.014297587, 0.20523176, -0.016391741, -0.25267518, -0.09126818, 0.14681858, 0.0720258, -0.034471154, -0.103409246, 0.029827712, 0.09607032, -0.12944661, -0.09812552, 0.19399726, 0.18891408)); + conv2d_5_tf += mul(f1, min16float4x4(0.0793041, -0.18886381, -0.08229493, -0.13476922, -0.034637094, -0.06667868, 0.09988945, -0.08209682, -0.07416632, 0.10529841, -0.14161663, -0.088301264, 0.0029876695, 0.11381751, 0.083498895, 0.15414985)); + conv2d_5_tf += mul(g1, min16float4x4(0.20285544, -0.16456522, 0.06494461, -0.013555718, -0.07797077, -0.13418226, -0.0014035929, 0.056061633, -0.024789125, -0.053674392, 0.048963223, 0.121051155, 0.064334966, -0.0482476, 0.068401285, -0.07039275)); + conv2d_5_tf += mul(h1, min16float4x4(0.098433256, -0.3636959, 0.2678772, -0.046356395, -0.1771877, -0.017444499, -0.06527938, 0.073921666, -0.1880833, 0.1873346, 0.10331725, -0.05711381, 0.049431477, -0.047258172, 0.13095368, -0.35352108)); + conv2d_5_tf += mul(i1, min16float4x4(0.10444254, -0.16424808, -0.00615067, 0.1023235, -0.122729294, -0.2563471, 0.00030699265, 0.09230543, 0.07732433, -0.03397466, -0.03141724, 0.2431111, 0.009742008, -0.07286298, -0.015188814, 0.025636861)); + conv2d_5_tf += mul(a2, min16float4x4(-0.06326144, -0.045018848, -0.130233, -0.015639791, -0.015171213, -0.009451374, 0.06830251, 0.07718799, 0.009820809, -0.10778585, 0.011396909, -0.067577444, 0.16482629, 0.099055305, 0.0517957, 0.008594935)); + conv2d_5_tf += mul(b2, min16float4x4(-0.037354734, 0.09272911, -0.11168438, 0.1708543, -0.12653585, -0.042765, 0.008014873, 0.22469266, 0.019282004, 0.0041092015, -0.029787902, 0.025127187, -0.05086034, 0.0077483514, 0.010261478, 0.07023893)); + conv2d_5_tf += mul(c2, min16float4x4(0.055195954, 0.004654069, -0.02118881, -0.05352797, -0.021830624, -0.010750989, -0.032053873, 0.18029462, -0.0703946, 0.06940036, 0.011578795, 0.049051903, 0.12236165, 0.1469314, -0.04752202, -0.02873477)); + conv2d_5_tf += mul(d2, min16float4x4(0.11799468, -0.022473548, 0.0045530205, 0.0870364, 0.1895775, -0.041058388, 0.079169616, -0.08769193, -0.012526104, 0.03904729, 0.016011083, -0.010498281, 0.08499936, -0.050380737, 0.14939919, 0.009984251)); + conv2d_5_tf += mul(e2, min16float4x4(0.10477428, 0.16810521, -0.1314053, 0.084377944, 0.17922944, -0.304226, 0.25293878, -0.15422472, 0.20214307, 0.10322054, -0.13431601, 0.04898287, 0.09717359, -0.07664543, 0.14711176, 0.15777126)); + conv2d_5_tf += mul(f2, min16float4x4(-0.027849296, -0.107415505, -0.048003152, -0.14503942, 0.16935585, -0.11120448, 0.19879252, 0.25992575, 0.10300595, 0.044460453, 0.095423825, -0.0006854256, 0.04321415, -0.042708825, 0.02633511, -0.06220348)); + conv2d_5_tf += mul(g2, min16float4x4(-0.004024937, 0.05021026, -0.00765448, 0.18315202, -0.078596614, -0.022813313, 0.09930163, 0.08525698, -0.0024254897, -0.06150155, 0.12159309, 0.056743085, -0.19437842, 0.02563038, -0.14668292, -0.0805431)); + conv2d_5_tf += mul(h2, min16float4x4(0.037370156, -0.13586049, -0.11521326, -0.07453397, -0.025900846, -0.0823091, -0.14436729, 0.14114335, 0.055820756, 0.05531836, -0.1474026, 0.10203739, 0.053665128, 0.00896543, 0.13431323, -0.12663968)); + conv2d_5_tf += mul(i2, min16float4x4(-0.20144333, 0.05849729, 0.06303023, -0.17678042, 0.03238696, -0.19829398, 0.12956308, -0.20013878, -0.1353999, -0.001031907, 0.10556917, -0.14760506, 0.03315909, -0.10838441, 0.16175537, -0.001477876)); + conv2d_5_tf += mul(a3, min16float4x4(0.087629505, -0.05908092, 0.16011593, -0.11285914, -0.4358247, 0.18938082, -0.31105244, -0.3638732, -0.0069619874, 0.029419519, -0.2156866, -0.13693112, -0.113110565, -0.09906378, -0.11164287, -0.084068194)); + conv2d_5_tf += mul(b3, min16float4x4(0.072181284, 0.035425037, 0.028820323, 0.12786204, 0.37121117, -0.076626934, 0.058864776, -0.20865935, -0.0014984896, 0.05978116, 0.117927864, 0.013273026, 0.088378325, 0.13492325, 0.018144222, 0.22580223)); + conv2d_5_tf += mul(c3, min16float4x4(0.045780275, 0.13346507, 0.056960598, -0.0019664192, -0.24231891, -0.13189796, 0.11114239, -0.07587297, 0.03099761, 0.10284658, 0.094186746, 0.04669001, -0.20374449, -0.12047404, -0.10640337, -0.03541381)); + conv2d_5_tf += mul(d3, min16float4x4(0.14384045, 0.12343541, -0.029074568, 0.13204664, 0.18878254, 0.115503244, -0.20217639, 0.16410889, -0.79949176, 0.5460196, -0.09889672, 0.27109572, 0.10628155, 0.13510233, -0.20859608, -0.07706875)); + conv2d_5_tf += mul(e3, min16float4x4(-0.11215904, 0.08981538, -0.10094039, -0.054024383, 0.2652237, -0.2002571, -0.15960355, 0.032049023, 0.007806114, 0.10592316, -0.3487021, 0.048408728, 0.10263737, -0.026020324, 0.072276175, -0.1190967)); + conv2d_5_tf += mul(f3, min16float4x4(-0.03184955, -0.00798831, -0.028087616, -0.010780139, -0.05444991, 0.09402867, 0.30834422, 0.14518146, -0.010965188, 0.14643683, -0.02568113, 0.068982124, 0.044459574, -0.05092265, -0.0028792082, 0.17158687)); + conv2d_5_tf += mul(g3, min16float4x4(0.0869746, 0.15908171, -0.0033584125, 0.049515188, -0.15995023, 0.20953654, -0.16041277, -0.08435643, 0.42034048, 0.096904315, -0.1927207, -0.0792477, 0.078221194, -0.10053459, -0.17969237, 0.08374661)); + conv2d_5_tf += mul(h3, min16float4x4(0.10612468, -0.23303585, -0.08996894, 0.10191982, 0.10724305, 0.1258089, -0.08111434, 0.103680536, 0.00824538, 0.2173516, -0.601468, -0.17365147, -0.09311857, -0.045947216, 0.20118287, 0.00016345571)); + conv2d_5_tf += mul(i3, min16float4x4(-0.07453406, 0.02476293, -0.089717, -0.14455949, -0.1427004, -0.21921235, 0.1878364, -0.023677701, -0.29442346, 0.13739492, -0.10435927, -0.35067815, 0.00956389, 0.049088918, -0.055482347, 0.1527778)); + conv2d_5_tf += mul(na1, min16float4x4(-0.20727113, 0.23718962, 0.17435564, -0.017858913, -0.042935595, 0.1996666, -0.059547734, 0.09735509, 0.019539079, -0.012399102, 0.057370137, 0.027493393, -0.10042333, -0.07915818, 0.07218426, 0.1309558)); + conv2d_5_tf += mul(nb1, min16float4x4(-0.032295313, 0.07833535, 0.22808518, 0.012292011, 0.09856554, -0.01996994, -0.028461069, 0.029348027, -0.25023523, 0.21794361, 0.14906348, 0.039845698, -0.004544177, -0.031246802, 0.019103816, 0.07738693)); + conv2d_5_tf += mul(nc1, min16float4x4(-0.15647748, -0.048666175, -0.03838509, 0.22003315, 0.048363995, -0.077338494, 0.109276325, -0.000109877525, -0.10441263, 0.18494262, -0.08754767, 0.12850273, 0.03408794, 0.15086798, -0.19896401, 0.048397515)); + conv2d_5_tf += mul(nd1, min16float4x4(0.011858143, -0.121841036, 0.0048841173, -0.062427614, 0.14153655, 0.011297287, 0.12778129, 0.004588582, 0.021572713, 0.15850346, 0.06464319, 0.06260356, 0.0838926, 0.04272777, 0.0733926, -0.08732838)); + conv2d_5_tf += mul(ne1, min16float4x4(0.20364462, 0.15701732, 0.053049877, -0.46085536, -0.037331745, -0.05813282, 0.036300424, 0.05660442, 0.14007641, 0.12849629, 0.08266283, -0.07872285, 0.07497584, -0.102409676, -0.12487048, -0.06305082)); + conv2d_5_tf += mul(nf1, min16float4x4(0.26158065, -0.090300985, 0.3522249, 0.18087223, -0.06095069, -0.10725335, 0.285748, 0.15195337, -0.19382374, -0.11163994, -0.10937165, -0.05908017, 0.0042464877, -0.14594594, -0.16316739, -0.17099144)); + conv2d_5_tf += mul(ng1, min16float4x4(-0.10028552, -0.18077525, 0.29705408, 0.12354066, 0.0198171, -0.08987044, 0.26377577, 0.075702764, 0.06952089, 0.0049671913, -0.3116211, 0.017268507, 0.37579817, -0.037516277, -0.09738986, 0.0917646)); + conv2d_5_tf += mul(nh1, min16float4x4(0.17661515, -0.17850937, -0.0018308868, 0.18318558, -0.0013081668, -0.113424055, -0.22193146, 0.15262845, -0.13412614, -0.13704826, -0.22099695, 0.24989522, 0.0740908, -0.3789193, -0.05141985, 0.14818457)); + conv2d_5_tf += mul(ni1, min16float4x4(0.31471825, 0.16524819, 0.03326876, -0.14611365, -0.1191457, -0.06510173, -0.13893965, -0.33106923, 0.13048746, -0.527816, 0.01877066, 0.26005507, -0.06294366, -0.24761125, -0.102864824, 0.094261676)); + conv2d_5_tf += mul(na2, min16float4x4(0.023637002, -0.07186282, 0.0946568, 0.13016573, 0.27244806, -0.08329611, 0.049762517, 0.14729369, 0.15868294, 0.07715838, -0.039478883, -0.06753388, 0.13460182, -0.092146814, -0.11814287, 0.12007007)); + conv2d_5_tf += mul(nb2, min16float4x4(0.06190745, -0.023566067, 0.239366, -0.0068376404, -0.15343493, 0.043685004, -0.047154866, 0.06527902, 0.11998191, -0.2565534, -0.091910206, -0.24104144, -0.12814765, 0.18195467, 0.11766466, 0.06181653)); + conv2d_5_tf += mul(nc2, min16float4x4(-0.06866098, 0.11969287, 0.00997188, 0.09261804, -0.14177154, -0.0052282973, 0.008734555, -0.20822202, 0.0068409014, -0.00470473, 0.031823143, -0.0601048, 0.05632819, 0.01690721, 0.01305342, -0.05824624)); + conv2d_5_tf += mul(nd2, min16float4x4(0.20557542, -0.10924632, 0.012821291, -0.11472336, -0.012862975, -0.09720539, 0.016499901, 0.053605244, 0.2183789, -0.014083709, -0.052786104, -0.075659566, -0.15531872, -0.1454758, 0.032142643, 0.28776056)); + conv2d_5_tf += mul(ne2, min16float4x4(-0.09832725, 0.3388722, -0.092447765, -0.16408351, -0.2557467, 0.031259898, 0.12057204, -0.018744074, -0.46363798, 0.042668946, 0.06506717, -0.25751963, 0.043604825, 0.11740889, 0.07365291, -0.027296776)); + conv2d_5_tf += mul(nf2, min16float4x4(-0.060943104, -0.00371101, 0.13572243, 0.013030143, 0.01196217, -0.14187267, -0.016784329, -0.048273906, 0.2050283, -0.02000498, -0.069050424, -0.09851947, 0.028769497, 0.1289265, -0.0022706073, -0.00296877)); + conv2d_5_tf += mul(ng2, min16float4x4(-0.015049836, 0.01153945, -0.006021933, -0.022156725, -0.030286482, 0.24230544, 0.040056467, -0.021735856, 0.20740065, -0.08999259, 0.006861033, -0.104062624, 0.26829463, 0.051726963, -0.12235904, 0.19572715)); + conv2d_5_tf += mul(nh2, min16float4x4(0.12676726, 0.17367609, -0.03689342, -0.034580305, -0.006836569, -0.06386566, 0.30929026, 0.09361281, -0.06405332, 0.26401913, -0.33314535, -0.06335476, -0.10960964, 0.13062708, 0.058030583, -0.1269144)); + conv2d_5_tf += mul(ni2, min16float4x4(0.03625719, 0.07449099, 0.021113826, 0.008309737, -0.09200202, -0.13108951, -0.0054502958, 0.19819209, -0.24836262, 0.22340319, -0.06844758, -0.22940424, -0.03410828, 0.03854127, -0.050844472, 0.019776637)); + conv2d_5_tf += mul(na3, min16float4x4(0.014228765, -0.013087027, -0.18055649, 0.001141047, 0.14329694, -0.008534367, 0.006927009, -0.058499523, -0.030727612, -0.07256724, 0.0025644915, 0.007111054, 0.036673337, -0.026148604, 0.120233335, 0.110904366)); + conv2d_5_tf += mul(nb3, min16float4x4(-0.008129229, 0.047908727, -0.1769762, 0.013220415, 0.066762984, 0.06523022, -0.016525066, -0.014394631, -0.008272182, -0.029847749, -0.10351308, 0.036801845, 0.11523106, -0.055156656, 0.11873017, -0.128935)); + conv2d_5_tf += mul(nc3, min16float4x4(0.21848068, -0.002019241, -0.06304477, 0.026670042, 0.039536465, -0.14145948, -0.06304873, 0.023532849, -0.122648045, 0.036414735, -0.037745856, -7.688992e-06, 0.059370764, -0.015019475, -0.029084614, 0.015826277)); + conv2d_5_tf += mul(nd3, min16float4x4(-0.09427522, -0.001972529, -0.09509679, -0.104867265, 0.05705236, 0.00031401246, 0.096889675, 0.15868911, -0.033721585, 0.08299121, -0.095194876, -0.1062834, -0.029866459, -0.041780088, -0.023895228, -0.0026728562)); + conv2d_5_tf += mul(ne3, min16float4x4(-0.27093527, -0.026471421, 0.09702481, 0.036061123, -0.1268649, 0.099340335, 0.15685195, -0.070615016, -0.13991052, -0.04212775, 0.096722156, 0.056507673, 0.02626438, 0.030435594, -0.00033173471, -0.024930432)); + conv2d_5_tf += mul(nf3, min16float4x4(-0.21608484, 0.038410295, -0.10975598, 0.12944944, -0.034110125, 0.03908566, -0.030190451, 0.031670973, -0.018954927, 0.0726848, 0.023156218, 0.017966276, -0.09825987, 0.023912448, 0.07257811, -0.008502145)); + conv2d_5_tf += mul(ng3, min16float4x4(0.044695053, -0.046481512, -0.098602146, -0.13273694, -0.09406325, -0.0062411693, 0.10242225, 0.025881069, 0.061662897, 0.019632077, -0.069696225, -0.14693011, 0.034227923, 0.037439592, -0.17188378, -0.19963826)); + conv2d_5_tf += mul(nh3, min16float4x4(-0.25531536, -0.050288115, 0.11258405, -0.24783169, -0.034263797, 0.054084245, 0.119918555, -0.027509615, 0.10056127, -0.09610037, 0.16208062, 0.005269051, 0.08660796, 0.11050934, -0.012584769, -0.0040703616)); + conv2d_5_tf += mul(ni3, min16float4x4(0.07649277, 0.13011539, -0.052341804, 0.07836859, 0.18562089, 0.07701519, -0.15669914, 0.007145429, 0.018427812, -0.12513049, -0.03395353, 0.14632194, -0.108091615, -0.01585824, 0.0602756, -0.11572579)); + conv2d_5_tf += min16float4(0.028852103, -0.003142654, 0.019121574, 0.026819304); + min16float4 nconv2d_5_tf = max(-conv2d_5_tf, 0); + conv2d_5_tf = max(conv2d_5_tf, 0); + + min16float4 conv2d_4_tf = mul(a1, min16float4x4(-0.032557677, 0.15826401, -0.11499422, -0.08640765, -0.09198991, -0.007192731, 0.010252954, 0.023780089, 0.15372203, -0.0009684923, 0.051660325, 0.011104123, 0.023871671, 0.005045307, 0.10722681, 0.065446004)); + conv2d_4_tf += mul(b1, min16float4x4(0.04723326, 0.21495502, -0.4453857, -0.020825233, 0.021379868, 0.04798187, 0.11383445, 0.08597329, 0.05730255, -0.046370696, -0.105095126, -0.03220056, -0.10122536, 0.06955123, -0.023051325, -0.04296927)); + conv2d_4_tf += mul(c1, min16float4x4(-0.02551809, 0.16179861, -0.15771814, -0.00045056897, 0.05842655, 0.11279471, 0.08018674, 0.05973765, 0.044070918, 0.08054599, -0.070336945, 0.05499731, -0.039118823, 0.003635353, -0.019759493, -0.040480837)); + conv2d_4_tf += mul(d1, min16float4x4(-0.04707628, 0.040738698, -0.013698143, -0.047391538, 0.031729057, -0.01837267, -0.10985463, -0.0028168112, -0.03167109, 0.0007989082, 0.011234699, 0.06895626, -0.12226361, 0.016290974, -0.055669673, -0.17432979)); + conv2d_4_tf += mul(e1, min16float4x4(-0.05069543, 0.15450205, 0.06981913, -0.377529, -0.14111535, 0.124757245, 0.021858096, 0.044034548, -0.16741593, 0.09746289, -0.045757677, -0.11644043, -0.09906484, 0.19128124, 0.061969943, -0.14589702)); + conv2d_4_tf += mul(f1, min16float4x4(0.12177423, 0.077437244, 0.059054222, 0.14925033, 0.016682645, -0.004765056, -0.2194741, 0.11314126, 0.2384071, -0.12049565, 0.12753354, 0.19679058, 0.03558123, 0.018636368, -0.11018761, -0.027520377)); + conv2d_4_tf += mul(g1, min16float4x4(-0.03618456, -0.030103968, 0.02968891, -0.00393875, -0.07128213, 0.022181263, -0.08430743, -0.027601235, -0.09228556, 0.04661313, 0.054729965, 0.052708175, 0.050483003, -0.022951633, 0.099321984, -0.043519083)); + conv2d_4_tf += mul(h1, min16float4x4(0.034695346, 0.10380181, -0.043013666, 0.037639238, 0.118943654, 0.027931944, 0.07628075, -0.12427217, 0.14970858, -0.065848, 0.0030750742, 0.011039123, 0.27721024, -0.055808693, 0.25105593, -0.1825985)); + conv2d_4_tf += mul(i1, min16float4x4(0.03627934, -0.17293514, 0.09188732, 0.11569783, -0.035355445, -0.10536353, -0.0068529076, -0.0929389, 0.09053234, 0.05907859, 0.049182277, 0.15194432, -0.09835422, 0.00061943196, 0.066343345, -0.06307589)); + conv2d_4_tf += mul(a2, min16float4x4(0.10120336, -0.10855617, 0.13412404, -0.018874792, 0.037988223, 0.0957435, 0.015402347, -0.08589699, -0.07694196, -0.03258571, 0.064437136, -0.0495422, 0.24836332, -0.0041739377, 0.093993485, -0.0076778256)); + conv2d_4_tf += mul(b2, min16float4x4(-0.20205948, 0.035698004, 0.0120531265, 0.03971649, 0.07550046, 0.047750015, -0.049045984, 0.04001014, -0.030263485, -0.0030697742, 0.05283423, -0.00014085052, -0.062447365, -0.0503476, -0.085151225, -0.04436882)); + conv2d_4_tf += mul(c2, min16float4x4(0.1516312, -0.073820546, -0.01047401, 0.0002717457, -0.17057727, 0.20856272, -0.09357496, -0.17346743, -0.068092465, -0.023344085, -0.03279074, -0.077289, -0.09844614, -0.035491887, 0.048796505, -0.03633584)); + conv2d_4_tf += mul(d2, min16float4x4(0.0073127835, 0.041834716, 0.015633723, -0.042742077, 0.08359733, -0.13898548, 0.1343008, 0.04692816, 0.051663343, -0.1277769, 0.029269615, 0.021745533, 0.09920264, 0.032076713, -0.05319438, 0.040574815)); + conv2d_4_tf += mul(e2, min16float4x4(0.052737534, -0.02136074, -0.18437223, 0.030766862, 0.23291707, -0.010449272, 0.032748792, 0.1304141, 0.27302903, 0.008562884, 0.13475919, 0.044446316, -0.17819557, 0.08270108, 0.06075267, -0.112788476)); + conv2d_4_tf += mul(f2, min16float4x4(-0.093748294, -0.004655885, -0.044859763, -0.11719146, -0.4701752, 0.09076277, -0.2283514, -0.34524822, -0.11999304, -0.010338027, 0.026785752, 0.029790966, -0.0635327, -0.024085084, -0.12074973, 0.080456585)); + conv2d_4_tf += mul(g2, min16float4x4(-0.023425102, -0.105786875, 0.1220016, 0.017974272, -0.12736784, -0.050550908, -0.1985566, 0.09139255, -0.18943925, -0.0067088404, -0.15007311, -0.015332959, 0.16430685, 0.006736225, -0.009263825, -0.08230126)); + conv2d_4_tf += mul(h2, min16float4x4(-0.15165123, 0.057155497, -0.09756418, 0.0475568, -0.14430566, 0.05169595, -0.24240975, 0.061147846, 0.0017831615, 0.028189357, -0.12519005, 0.03604646, -0.0460214, 0.05936097, -0.0213775, -0.28192145)); + conv2d_4_tf += mul(i2, min16float4x4(-0.019390648, 0.005514995, -0.0024649797, 0.056670878, -0.10385216, -0.05531206, 0.23233996, -0.16394126, 0.1718211, -0.08723329, 0.08580946, -0.028214762, -0.060853615, 0.0458013, 0.106201656, 0.031685878)); + conv2d_4_tf += mul(a3, min16float4x4(-0.105268896, 0.0106684705, -0.10355101, -0.07401398, 0.12425712, -0.21308881, 0.05200582, -0.024954682, -0.1120292, 0.07799603, -0.031506516, 0.0031533986, -0.05264893, -0.11141642, 0.107277475, 0.049987797)); + conv2d_4_tf += mul(b3, min16float4x4(0.08439962, -0.14181082, -0.20358182, 0.09080642, -0.061622817, 0.24017061, -0.12030436, 0.17224449, -0.0220505, 0.20025904, 0.1032571, 0.032335218, -0.09232964, -0.06172056, -0.1011141, -0.07322099)); + conv2d_4_tf += mul(c3, min16float4x4(-0.10896482, 0.06107763, -0.100641444, -0.018832406, 0.020139545, -0.0037260412, -0.10512619, -0.24599148, 0.014342631, 0.056689363, -0.06662091, 0.03999069, 0.00824376, 0.030449467, 0.027041748, -0.056902107)); + conv2d_4_tf += mul(d3, min16float4x4(-0.18174766, 0.040627997, 0.1140224, -0.20088135, 0.07404639, 0.01215843, -0.050341435, -0.0011868333, -0.5206288, 0.53214884, -0.60289955, 0.25364086, -0.05814184, 0.21600877, 0.07475344, 0.0624221)); + conv2d_4_tf += mul(e3, min16float4x4(-0.07710521, 0.030054979, -0.28164682, -0.13994755, 0.028757188, 0.04356096, -0.14357159, 0.2761477, -0.5300268, 0.44994202, -0.15364286, -0.18580483, 0.084563375, -0.13093601, 0.08291044, 0.017790407)); + conv2d_4_tf += mul(f3, min16float4x4(0.013963807, 0.0032885068, 0.0069646467, 0.03777879, -0.30103573, -0.047965538, 0.057550967, -0.3402889, 0.0026557294, 0.2289777, 0.01937088, 0.18484715, 0.083694465, -0.056240357, -0.0023172104, -0.13328342)); + conv2d_4_tf += mul(g3, min16float4x4(-0.05847699, 0.06990862, -0.0076244893, 0.03992696, 0.088809974, -0.059422277, -0.10557949, 0.058280375, -0.37764055, -0.19777957, -0.86350954, -0.21546844, 0.21863134, -0.074350335, 0.039010234, -0.021216504)); + conv2d_4_tf += mul(h3, min16float4x4(-0.18698102, -0.024641648, -0.16558538, -0.06499548, 0.10435924, 0.0030438402, -0.021636335, 0.046050593, -0.22217542, -0.14033853, -0.21516539, -0.4834089, 0.061894827, -0.024107188, 0.045805957, 0.20019397)); + conv2d_4_tf += mul(i3, min16float4x4(-0.0657418, 0.074276686, -0.07074239, -0.0101531055, -0.17146541, -0.016556345, -0.16196094, -0.13551502, -0.017605018, 0.065230414, 0.10717515, 0.41153327, 0.07095331, -0.05611257, -0.09297768, -0.054604497)); + conv2d_4_tf += mul(na1, min16float4x4(-0.051999312, 0.28559515, -0.09147715, 0.04536181, 0.077552326, 0.052161235, 0.006652824, 0.12593806, -0.07654755, 0.056134425, 0.029163264, -0.05461885, 0.04772557, 0.14073811, 0.07795857, -0.0397234)); + conv2d_4_tf += mul(nb1, min16float4x4(-0.0698435, 0.17774913, -0.07301677, -0.14336437, -0.104051985, 0.14831689, 0.045199208, -0.1867252, 0.07530157, 0.12153924, 0.1397731, -0.026905237, 0.056165505, 0.21213025, 0.073159344, 0.03143804)); + conv2d_4_tf += mul(nc1, min16float4x4(0.029820994, -0.079599164, 0.12901585, 0.014192698, -0.0816397, 0.02425821, 0.10938256, 0.0077257096, -0.009784561, 0.20602871, -0.07226973, -0.16234052, 0.0064664064, -0.023469927, 0.0037447219, 0.015258041)); + conv2d_4_tf += mul(nd1, min16float4x4(-0.028296372, 0.23841251, 0.04076168, 0.061052933, -0.082375534, 0.11200519, 0.025308013, 0.1736187, 0.23024227, -0.004161287, 0.16408522, -0.0141539015, 0.01496407, -0.037708607, 0.15057993, 0.14573294)); + conv2d_4_tf += mul(ne1, min16float4x4(0.22485349, -0.2217838, -0.011602474, 0.22668324, 0.2172098, -0.21826234, -0.09506227, -0.06592076, 0.14401191, 0.014868243, 0.41509256, 0.2799861, 0.04998898, -0.121938676, -0.29612163, 0.16926381)); + conv2d_4_tf += mul(nf1, min16float4x4(0.009154201, -0.14300221, 0.0121250935, -0.049595118, -0.3256411, -0.07036471, -0.066481166, -0.32643607, 0.13287841, -0.096211806, -0.24969384, -0.36735064, -0.14625767, 0.07217462, 0.06205977, 0.13962744)); + conv2d_4_tf += mul(ng1, min16float4x4(0.10122661, -0.042678952, 0.08920629, -0.022906423, -0.048781462, 0.008094098, 0.16410494, 0.01511925, 0.009355741, -0.034123767, 0.06522056, -0.04114966, 0.025140515, -0.046565775, 0.18292467, 0.009392873)); + conv2d_4_tf += mul(nh1, min16float4x4(-0.06604219, -0.10034091, 0.10934946, 0.18707348, -0.19358878, 0.11417287, -0.024397675, 0.04772407, -0.10278711, -0.03847901, -0.025120566, 0.047323767, -0.26464674, 0.15394583, -0.042590924, -0.09511779)); + conv2d_4_tf += mul(ni1, min16float4x4(-0.13339657, 0.13506593, 0.011463314, 0.077461444, -0.022262955, 0.06132727, -0.113292165, -0.1987806, 0.0027555283, -0.016475892, 0.14219329, -0.211625, 0.11405046, -0.12044097, -0.088240534, 0.17436995)); + conv2d_4_tf += mul(na2, min16float4x4(-0.08783496, 0.06564822, -0.10796846, -0.13460107, 0.10140343, 0.08105866, 0.0040176474, -0.045305755, -0.09299188, -0.18928377, -0.099694185, 0.11314726, -0.018881949, 0.04591721, 0.117965475, -0.00035760578)); + conv2d_4_tf += mul(nb2, min16float4x4(0.043456256, 0.10901491, 0.010485461, -0.061420415, -0.04018357, 0.1689085, 0.015425885, 0.061508525, 0.069377325, -0.18156749, 0.19194232, -0.25884745, -0.036184482, -0.0069973134, 0.021037813, -0.08046543)); + conv2d_4_tf += mul(nc2, min16float4x4(-0.044377886, 0.18098527, -0.07314578, -0.00287104, 0.038114406, -0.044841792, -0.063126855, 0.19896339, -0.09739791, -0.24212237, 0.19623765, -0.06326722, 0.062247403, 0.054567214, 0.10500492, 0.04231698)); + conv2d_4_tf += mul(nd2, min16float4x4(0.12399143, -0.09728722, 0.06730315, -0.011540306, -0.116925925, 0.0074092527, 0.21276267, 0.068349704, -0.05713399, 0.17656437, -0.10295556, -0.12709019, 0.102335855, 0.2679535, -0.06597912, -0.022839248)); + conv2d_4_tf += mul(ne2, min16float4x4(0.1265364, 0.16177331, -0.075765, -0.06347739, -0.056721687, 0.18794554, 0.006572088, -0.00011200755, 0.05219661, 0.21530084, -0.101604566, 0.04750483, -0.09394214, -0.11256657, 0.11389309, -0.011598962)); + conv2d_4_tf += mul(nf2, min16float4x4(0.015922887, -0.046698473, 0.0130271325, -0.052948795, 0.16426764, 0.09934194, -0.07745314, 0.038738497, -0.040967297, 0.06423774, 0.034312535, -0.013723525, -0.0030767843, 0.041221425, 0.041528914, 0.027097305)); + conv2d_4_tf += mul(ng2, min16float4x4(-0.13077654, 0.046842843, 0.034140635, 0.10109363, 0.20840693, -0.012975956, -0.041564208, 0.009877259, -0.033334266, -0.106034294, 0.2507187, -0.01512933, -0.008589095, 0.1849223, -0.06436464, 0.087347835)); + conv2d_4_tf += mul(nh2, min16float4x4(0.13326278, -0.035467118, 0.12698379, -0.034838732, 0.023856519, 0.05274121, -0.09120117, 0.070493534, -0.14804247, 0.08772896, -0.1343374, -0.058013596, -0.1194792, -0.07288297, 0.074856065, 0.021033823)); + conv2d_4_tf += mul(ni2, min16float4x4(0.023594514, -0.018284807, -0.037060708, -0.06051526, 0.13681069, 0.09436225, -0.044987947, 0.21031074, -0.14567234, 0.04987286, -0.24576813, -0.091558464, 0.0040201824, -0.045261826, 0.050834723, 0.04080285)); + conv2d_4_tf += mul(na3, min16float4x4(-0.12843935, 0.11059404, 0.035774253, 0.016019672, 0.13419932, -0.082884714, 0.086934, -0.027470622, -0.0055711996, 0.14726739, 0.00025540774, -0.082832016, 0.015134819, -0.1869738, -0.15580305, 0.118347436)); + conv2d_4_tf += mul(nb3, min16float4x4(-0.03210018, -0.07439424, 0.09171389, 0.0061248797, -0.122092225, -0.0055175424, 0.060848907, 0.05447007, -0.1005626, -0.13843839, -0.11508479, 0.034595586, 0.16528612, 0.07630222, 0.10175574, -0.034656286)); + conv2d_4_tf += mul(nc3, min16float4x4(0.05687666, -0.1130296, -0.038044114, 0.1376985, 0.02434624, -0.21984427, -0.0038558878, -0.10872551, 0.00807944, 0.019718373, 0.07016335, 0.001672884, -0.051990695, -0.04958167, -0.036594924, -0.0008506928)); + conv2d_4_tf += mul(nd3, min16float4x4(-0.07842389, -0.0907049, 0.10945533, -0.14496571, 0.03524454, -0.12881151, -0.13281278, -0.023060825, -0.037150636, -0.0001619192, 0.07462792, 0.19251943, -0.048907887, -0.09152158, 0.077018015, -0.0076050037)); + conv2d_4_tf += mul(ne3, min16float4x4(-0.06379491, 0.22390717, -0.044009656, -0.19816853, -0.14713046, 0.114638254, -0.008227305, -0.014490413, 0.04359834, 0.10032826, -0.17928778, -0.13981889, -0.07729277, 0.11685862, 0.21970165, -0.09117455)); + conv2d_4_tf += mul(nf3, min16float4x4(0.21068226, 0.030921075, 0.109845765, 0.058498275, 0.015876649, -0.0067828237, -0.10064077, 0.13756661, 0.017506564, 0.041748323, 0.17195722, 0.012285508, -0.023290245, 0.07060226, 0.069730066, -0.018874977)); + conv2d_4_tf += mul(ng3, min16float4x4(0.19153018, -0.07691863, -0.03687873, -0.069982305, -0.097453654, 0.060358603, -0.030159682, -0.048520114, 0.12498585, -0.07376571, -0.01039302, -0.099845245, 0.00042995642, 0.035783857, -0.12854497, -0.024975097)); + conv2d_4_tf += mul(nh3, min16float4x4(0.11177764, -0.02895167, 0.09053559, -0.24130683, -0.09276382, 0.04739869, -0.005453787, 0.031923447, 0.089385964, -0.048109047, 0.061177306, 0.117845595, 0.014615613, 0.1153759, -0.0007218852, -0.10042441)); + conv2d_4_tf += mul(ni3, min16float4x4(0.041179586, 0.00042151578, 0.07818137, 0.06354339, 0.0049364083, -0.055836283, -0.0073542926, 0.047470722, -0.15328479, 0.03497268, -0.17375292, 0.0006636334, -0.043640774, -0.007737031, 0.10040319, -0.09145891)); + conv2d_4_tf += min16float4(-0.0542914, -0.045369092, 0.029350873, -0.018128533); + tex8[gxy] = conv2d_4_tf; + min16float4 nconv2d_4_tf = max(-conv2d_4_tf, 0); + conv2d_4_tf = max(conv2d_4_tf, 0); + + min16float4 target = mul(e1, min16float4x4(0.15610647, -0.15150696, -0.076018915, 0.030773202, -0.13935511, 0.17644633, 0.028819937, 0.30125114, 0.38625193, 0.35517895, 0.0975343, 0.114022225, 0.25494647, -0.23291643, 0.29096943, 0.15063812)); + target += mul(e2, min16float4x4(-0.22949804, -0.1368772, -0.07729264, 0.08470473, -0.06426131, -0.0064847367, 0.08241476, -0.1476949, -0.13712044, -0.36110023, -0.081719294, 0.19409889, 0.05562042, 0.26609465, 0.020447321, 0.2567414)); + target += mul(e3, min16float4x4(0.03337578, 0.2905731, 0.21772428, -0.074480034, 0.071880735, 0.27764675, -0.17273173, -0.0037474795, -0.1842544, 0.21896398, -0.30134472, 0.1711769, 0.23913746, -0.0435854, -0.12745531, -0.050227556)); + target += mul(ne1, min16float4x4(0.34923258, -0.5455803, -0.2904644, -0.5446842, -0.040965725, -0.055288248, -0.50672686, -0.10309429, 0.045286313, -0.04284262, -0.19785875, -0.16594213, -0.10000842, 0.47245356, -0.32767087, 0.32854807)); + target += mul(ne2, min16float4x4(0.05952625, -0.062991776, 0.3438396, -0.08141334, -0.2488028, -0.04746144, 0.06563561, 0.45020792, -0.19996788, 0.015523991, -0.19214569, -0.24849077, -0.022107737, 0.28190804, 0.13384444, -0.12800638)); + target += mul(ne3, min16float4x4(-0.37812218, 0.09970516, 0.015231938, 0.07226164, -0.33720142, -0.05899804, -0.0025790115, -0.17770731, 0.111127384, 0.008749534, -0.09077738, -0.060420215, -0.10196339, 0.09641038, 0.25222716, 0.12781976)); + target += mul(conv2d_5_tf, min16float4x4(0.24168618, 0.18625724, -0.012904225, -0.011732107, 0.085045695, -0.4754185, 0.10896487, 0.09179793, -0.31662637, -0.117563, 0.5133052, -0.09457646, -0.15872721, -0.09779008, 0.56810176, 0.3339073)); + target += mul(nconv2d_5_tf, min16float4x4(-0.09105348, -0.17617023, -0.21897802, -0.14157395, 0.16165406, -0.46579927, 0.24905841, 0.11579037, 0.09073764, 0.36771873, -0.29340085, -0.04271419, -0.11684365, -0.17138094, 0.12188604, -0.14749436)); + target += mul(conv2d_1_tf, min16float4x4(0.10943254, -0.17193961, -0.07027378, -0.26047203, 0.04288517, 0.21311204, 0.03997142, -0.17006959, 0.16181368, 0.28361118, 0.26655135, -0.097007245, -0.15998597, -0.09568138, -0.27558687, -0.11706871)); + target += mul(nconv2d_1_tf, min16float4x4(0.365517, 0.5422966, -0.0013869518, 0.3447622, -0.25885904, -0.098901175, -0.048043057, 0.15867509, -0.12303401, -0.15362008, 0.270228, -0.2756776, -0.44207478, -0.0419657, 0.09387863, -0.07240854)); + target += mul(conv2d_4_tf, min16float4x4(0.15073416, -0.032387026, -0.039117433, -0.50999755, 0.073477276, -0.14495571, 0.15120687, -0.3443857, -0.29039595, -0.16189122, 0.14190345, -0.10934344, -0.21965231, -0.45768484, 0.11907852, 0.5091087)); + target += mul(nconv2d_4_tf, min16float4x4(0.23260471, 0.16441877, 0.16760987, 0.10740154, -0.21663232, -0.10124566, -0.20843595, 0.066555224, 0.24608357, 0.16345865, -0.11965141, 0.18451719, 0.41683537, -0.044497896, 0.39102596, -0.11944608)); + target += min16float4(-0.02423156, 0.015124756, -0.02608139, 0.030428935); + tex1[gxy] = target; + + target = mul(e1, min16float4x4(-0.12407633, -0.027812717, 0.23094666, 0.060302667, -0.16624144, -0.0007371851, -0.28186718, 0.22369424, 0.022404855, 0.09096415, 0.0017822908, 0.336001, -0.09130467, 0.034111694, 0.19113103, -0.14513424)); + target += mul(e2, min16float4x4(-0.014768806, -0.31290373, 0.015769936, -0.13507901, -0.010203078, 0.4945444, -0.01088852, -0.1582938, -0.14903755, -0.1840089, -0.009966903, -0.19425109, -0.21303283, 0.26285252, -0.046254523, -0.15465552)); + target += mul(e3, min16float4x4(0.07533467, 0.26080438, 0.024856985, 0.34277654, -0.3129344, 0.30575162, 0.06931557, -0.044698272, 0.18042412, 0.45999247, -0.5192437, 0.022618707, -0.020097036, -0.27706465, -0.0050434433, -0.12770803)); + target += mul(ne1, min16float4x4(0.098648146, -0.21701503, 0.10266521, -0.085537605, 0.02402345, -0.28643832, 0.19378376, -0.12658586, 0.115897186, 0.01580828, 0.11827048, 0.29019687, -0.19341177, 0.09564265, 0.03476779, 0.11699004)); + target += mul(ne2, min16float4x4(0.058346223, 0.25530934, -0.026972264, 0.3190419, 0.12263199, 0.124316074, 0.04734691, 0.011293402, -0.17419139, -0.15893947, 0.093723476, 0.23282392, 0.19400646, -0.0533148, 0.026266033, 0.19663234)); + target += mul(ne3, min16float4x4(-0.06663804, 0.20435949, 0.044924624, -0.24982749, 0.20327586, 0.12442739, -0.3155765, -0.18541007, 0.18991531, -0.19276267, 0.21697456, 0.03178544, -0.3381796, -0.15325621, -0.25820518, -0.07297032)); + target += mul(conv2d_5_tf, min16float4x4(0.098007046, -0.17018083, 0.3390076, -0.2280134, 0.12989196, -0.044336785, -0.10702673, -0.37464848, 0.028437488, 0.24224928, -0.107826136, 0.0031239046, -0.34256136, -0.17936559, 0.091159485, -0.054418396)); + target += mul(nconv2d_5_tf, min16float4x4(0.053965975, -0.17428857, -0.43524495, -0.15119378, -0.25487635, 0.16371927, 0.1467712, -0.08216164, -0.5624722, -0.11886804, -0.058240388, 0.17669299, -0.15173754, 0.13094892, 0.39045286, -0.017048221)); + target += mul(conv2d_1_tf, min16float4x4(-0.15798661, -0.36355045, 0.1957264, -0.05392931, 0.098283805, 0.14677107, 0.16887192, -0.11125151, -0.113571666, 0.15960959, -0.09331763, -0.032195523, 0.17286941, 0.33965907, 0.09051416, -0.25542957)); + target += mul(nconv2d_1_tf, min16float4x4(0.16866244, 0.05636189, -0.100324616, 0.20495924, -0.102705345, -0.08387417, -0.09328024, 0.21541446, 0.1430065, 0.0308464, -0.0793588, -0.029477509, -0.28854427, -0.29555637, 0.33754608, -0.18144317)); + target += mul(conv2d_4_tf, min16float4x4(-0.11338383, 0.019528843, -0.24414338, -0.36290777, 0.54908705, -0.083018646, 0.007534378, -0.1406417, 0.37853354, 0.09911941, -0.047861155, -0.3186758, 0.2125856, -0.114667036, -0.07411896, 0.050717812)); + target += mul(nconv2d_4_tf, min16float4x4(0.2961511, 0.28937215, -0.36593223, -0.16141813, -0.087650776, -0.47516292, 0.0052091824, 0.033959586, -0.06072628, -0.0012637508, -0.037578013, -0.35235298, 0.11726439, 0.6064031, 0.34058803, 0.45300734)); + target += min16float4(-0.0038817346, -0.052502215, 0.008882693, -0.017785465); + tex2[gxy] = target; + + target = mul(e1, min16float4x4(-0.21563801, -0.12204513, 0.31932783, 0.28290224, -0.17011476, -0.06448831, 0.004365267, -0.07169507, 0.21165244, -0.07712424, 0.14979824, 0.2240992, 0.48357385, -0.015724417, -0.3836641, 0.07599027)); + target += mul(e2, min16float4x4(-0.20743755, -0.119118474, 0.1009234, -0.2842955, -0.24531132, 0.062108602, 0.11733637, 0.06687575, -0.065953426, 0.15715389, 0.21475503, -0.1019138, 0.08085453, -0.24522887, -0.108375534, 0.29179853)); + target += mul(e3, min16float4x4(0.16713834, 0.030504826, -0.2423963, -0.41885766, -0.20249867, -0.061683156, -0.14999944, 0.54505223, 0.16486095, -0.023248592, -0.17566164, 0.089543514, -0.1884646, 0.15263423, 0.14438081, -0.21730141)); + target += mul(ne1, min16float4x4(0.37399703, 0.2731133, 0.11279373, 0.004775496, -0.19443156, -0.071899086, 0.17512012, -0.11265631, 0.01926881, -0.31321192, -0.32160205, -0.23714963, 0.097321026, 0.13937393, -0.28038052, -0.046872586)); + target += mul(ne2, min16float4x4(0.124041334, 0.083966166, 0.13945055, 0.087915726, 0.11154068, -0.09223973, -0.012948238, 0.16114026, 0.13717382, 0.11968761, 0.076536775, -0.15866219, -0.19017774, -0.11172013, 0.024816172, 0.096302085)); + target += mul(ne3, min16float4x4(0.081017025, -0.1537902, 0.193927, 0.22226687, 0.441012, 0.18478638, 0.30040395, 0.032401927, -0.13839063, 0.017778423, -0.42750338, -0.19760555, -0.21953818, -0.2148397, -0.084683254, 0.20916465)); + target += mul(conv2d_5_tf, min16float4x4(-0.3921892, 0.2123992, 0.14027761, 0.10175143, -0.11134986, -0.16432697, -0.1097465, -0.21807413, -0.09732297, -0.11108596, -0.39636138, -0.06654249, 0.18766358, -0.0061503067, 0.1286225, 0.2418667)); + target += mul(nconv2d_5_tf, min16float4x4(-0.0039234986, 0.17088562, 0.12906016, -0.13476452, -0.09124947, 0.3098052, 0.09895542, 0.18631962, -0.06776231, 0.19485205, 0.14722902, 0.32147923, -0.1811334, 0.15313488, 0.0796922, 0.0012897709)); + target += mul(conv2d_1_tf, min16float4x4(0.032229863, 0.025498863, 0.06695979, 0.019412167, -0.16543043, -0.12314033, 0.112201385, 0.16554663, 0.13644108, 0.3098045, 0.081390016, -0.006008416, -0.016406069, 0.22883923, 0.22282913, -0.13947442)); + target += mul(nconv2d_1_tf, min16float4x4(0.010251363, 0.08210024, -0.33465254, -0.012109372, 0.027115503, 0.1481351, -0.081793204, -0.20716506, 0.0056828605, -0.30995828, 0.11498873, 0.15678942, -0.061227474, -0.14681229, 0.1498136, 0.11219651)); + target += mul(conv2d_4_tf, min16float4x4(0.21796124, -0.12195326, 0.44734144, -0.124715045, -0.05986958, -0.25252253, -0.13802508, 0.16756216, 0.28327593, 0.38355786, -0.27178785, -0.19969118, -0.26010805, -0.074593216, 0.10679648, 0.15610766)); + target += mul(nconv2d_4_tf, min16float4x4(-0.07648412, -0.18866923, -0.2592641, 0.32486007, -0.6200149, 0.09312683, 0.42827863, -0.2703639, 0.08144911, -0.054994784, -0.24911343, 0.41974616, 0.036914464, -0.32325324, 0.012920313, -0.48379797)); + target += min16float4(-0.013587518, 0.049618572, -0.065549955, -0.007242324); + tex3[gxy] = target; +} + + +//!PASS 4 +//!DESC Conv-4x3x3x24, Conv-4x1x1x56 +//!IN tex1, tex2, tex3, tex4, tex8 +//!OUT tex9, tex5, tex6, tex7 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass4(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + min16float4 a1 = tex1.SampleLevel(sam, pos - inputPt, 0); + min16float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e1 = tex1.SampleLevel(sam, pos, 0); + min16float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i1 = tex1.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na1 = max(-a1, 0); + min16float4 nb1 = max(-b1, 0); + min16float4 nc1 = max(-c1, 0); + min16float4 nd1 = max(-d1, 0); + min16float4 ne1 = max(-e1, 0); + min16float4 nf1 = max(-f1, 0); + min16float4 ng1 = max(-g1, 0); + min16float4 nh1 = max(-h1, 0); + min16float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + min16float4 a2 = tex2.SampleLevel(sam, pos - inputPt, 0); + min16float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e2 = tex2.SampleLevel(sam, pos, 0); + min16float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i2 = tex2.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na2 = max(-a2, 0); + min16float4 nb2 = max(-b2, 0); + min16float4 nc2 = max(-c2, 0); + min16float4 nd2 = max(-d2, 0); + min16float4 ne2 = max(-e2, 0); + min16float4 nf2 = max(-f2, 0); + min16float4 ng2 = max(-g2, 0); + min16float4 nh2 = max(-h2, 0); + min16float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + min16float4 a3 = tex3.SampleLevel(sam, pos - inputPt, 0); + min16float4 b3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d3 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e3 = tex3.SampleLevel(sam, pos, 0); + min16float4 f3 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i3 = tex3.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na3 = max(-a3, 0); + min16float4 nb3 = max(-b3, 0); + min16float4 nc3 = max(-c3, 0); + min16float4 nd3 = max(-d3, 0); + min16float4 ne3 = max(-e3, 0); + min16float4 nf3 = max(-f3, 0); + min16float4 ng3 = max(-g3, 0); + min16float4 nh3 = max(-h3, 0); + min16float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + min16float4 conv2d_1_tf = tex4.SampleLevel(sam, pos, 0); + min16float4 nconv2d_1_tf = max(-conv2d_1_tf, 0); + conv2d_1_tf = max(conv2d_1_tf, 0); + + min16float4 conv2d_4_tf = tex8.SampleLevel(sam, pos, 0); + min16float4 nconv2d_4_tf = max(-conv2d_4_tf, 0); + conv2d_4_tf = max(conv2d_4_tf, 0); + + min16float4 conv2d_8_tf = mul(a1, min16float4x4(-0.11553467, -0.14921814, -0.085377395, 0.10231987, 0.08155549, 0.07075523, 0.012124212, 0.013545821, 0.103393115, -0.007523045, 0.060739517, -0.05890024, 0.21902815, 0.020522034, -0.1402768, 0.1280077)); + conv2d_8_tf += mul(b1, min16float4x4(0.051253397, -0.040904667, 0.1898603, -0.11879134, 0.069219105, -0.04280286, -0.022459755, 0.14305754, -0.063906856, 0.21501009, 0.0023572869, 0.09840124, -0.072510734, 0.057598237, 0.06159614, -0.13078417)); + conv2d_8_tf += mul(c1, min16float4x4(0.09612547, -0.11506342, -0.0017697238, 0.082210004, 0.14244868, -0.018724512, 0.12827, -0.011984352, 0.08484893, 0.0534688, 0.06480922, 0.14134778, 0.0876346, -0.010892883, 0.15595037, -0.040623467)); + conv2d_8_tf += mul(d1, min16float4x4(-0.15201004, 0.0093916925, 0.25506935, 0.003084567, -0.06869725, -0.27494308, 0.12937209, -0.12043822, -0.1918611, -0.09398222, 0.045312967, -0.1111442, -0.1376949, 0.0053297062, -0.28389412, -0.1396928)); + conv2d_8_tf += mul(e1, min16float4x4(-0.12742004, -0.23415208, 0.12804613, -0.1406368, 0.09349193, -0.12212758, -0.05245734, -0.39274624, 0.23036338, 0.04170077, -0.12391477, -0.00871988, 0.012228075, 0.31633002, -0.17377669, -0.124939)); + conv2d_8_tf += mul(f1, min16float4x4(-0.01582657, 0.018224325, -0.01147676, -0.09984998, -0.026615107, -0.21468964, 0.21078119, 0.19190042, -0.096901044, -0.041285027, -0.014912263, 0.17798825, 0.06570931, 0.09232608, -0.1068993, 0.089612365)); + conv2d_8_tf += mul(g1, min16float4x4(0.051225413, -0.07643113, 0.058832865, 0.083958775, 0.11160564, -0.14167392, -0.021870648, 0.10238029, 0.047018003, 0.11449065, 0.08001371, -0.06804109, 0.033969186, 0.10051381, -0.0008517809, -0.07459736)); + conv2d_8_tf += mul(h1, min16float4x4(0.09263853, -0.09833199, 0.042132426, -0.13103375, -0.03731804, -0.039324153, 0.10190401, 0.024146391, 0.110644914, -0.12685625, -0.12852249, 0.021824492, 0.0784485, 0.11471671, -0.09116125, 0.010305502)); + conv2d_8_tf += mul(i1, min16float4x4(0.005317984, -0.055282168, 0.09082919, -0.10774655, 0.21394931, 0.0045357225, -0.06699662, 0.2507622, 0.15671767, 0.11952803, -0.06123182, 0.13399701, 0.046645127, 0.0026899239, 0.022635492, 0.07161002)); + conv2d_8_tf += mul(a2, min16float4x4(-0.017425103, 0.12552156, -0.093341894, -0.071356304, 0.15947455, 0.24979044, -0.03843421, 0.14001197, 0.15455416, -0.05550835, -0.011375887, -0.07661705, -0.12418336, 0.056913756, 0.16633298, 0.11513766)); + conv2d_8_tf += mul(b2, min16float4x4(-0.08778774, 0.057353538, -0.092138395, 0.002837398, 0.22633068, 0.120333284, 0.09834124, 0.05738123, 0.059130516, 0.22035405, -0.024255643, 0.02477418, 0.04645929, 0.39426094, 0.276884, -0.01479481)); + conv2d_8_tf += mul(c2, min16float4x4(0.18796739, 0.083501674, 0.07283311, 0.06415875, -0.024382524, 0.04679669, -0.15093789, -0.22831221, 0.058881074, 0.16446854, -0.028955745, 0.1956661, 0.0516941, 0.16135721, 0.11951658, 0.10451706)); + conv2d_8_tf += mul(d2, min16float4x4(0.008279574, 0.23456147, -0.12539841, -0.17107405, 0.12736088, -0.028486755, -0.18606788, -0.15545112, -0.025036227, 0.028735701, 0.17332946, -0.1413287, 0.050435208, -0.07583189, 0.14276801, 0.08007638)); + conv2d_8_tf += mul(e2, min16float4x4(0.23048489, -0.045157567, -0.014840823, 0.041994587, -0.0002087858, -0.01711496, -0.08994919, -0.05393212, -0.048546836, 0.18694918, -0.014523763, -0.14133967, 0.02896907, 0.08478857, 0.020594146, -0.0013243662)); + conv2d_8_tf += mul(f2, min16float4x4(-0.1141037, -0.11394802, 0.11164606, 0.12330282, -0.044497687, -0.06207866, 0.08016056, 0.16055691, -0.062488995, 0.051081542, 0.086364634, 0.10802774, 0.16742289, -0.08850773, 0.26072827, -0.34441397)); + conv2d_8_tf += mul(g2, min16float4x4(0.06775539, -0.19385163, 0.12488108, 0.11025669, 0.028568348, 0.051090416, -0.15175076, -0.17447716, -0.14535129, -0.15599817, -0.10742375, 0.23767987, -0.071634814, -0.19241351, -0.052424364, 0.105806515)); + conv2d_8_tf += mul(h2, min16float4x4(-0.044398658, 0.0027700714, -0.22429284, 0.11238373, -0.081747256, -0.016608216, 0.012278578, 0.036800906, 0.015081323, 0.12504977, -0.05880422, -0.05670147, -0.051358018, 0.03139849, -0.0058919964, -0.029613987)); + conv2d_8_tf += mul(i2, min16float4x4(-0.05326926, -0.06667389, -0.15082167, 0.011100974, -0.17428419, 0.06436674, 0.12850241, 0.07432186, 0.08191501, 0.24600182, -0.085727975, -0.22370532, 0.15681425, -0.112885654, 0.10803866, 0.09235784)); + conv2d_8_tf += mul(a3, min16float4x4(-0.15705872, -0.1011224, 0.11024848, 0.100342564, -0.112648144, -0.18259776, -0.0134320175, -0.19909476, 0.09715426, 0.015931793, -0.13415024, -0.1476672, -0.07625902, 0.11680044, -0.02269237, 0.013758246)); + conv2d_8_tf += mul(b3, min16float4x4(-0.24389952, 0.1949585, -0.08155146, -0.14432955, 0.061777957, 0.0053770593, 0.11755161, -0.053200334, -0.18301581, -0.015372121, -0.10212801, 0.27215135, 0.089837484, 0.011281987, -0.1765269, 0.060139008)); + conv2d_8_tf += mul(c3, min16float4x4(0.1613523, -0.051561244, -0.08003759, -0.15677674, -0.010480271, -0.05442542, 0.03414788, -0.054194316, -0.087549254, 0.22978279, -0.0047125067, 0.16779551, 0.0654713, 0.055772237, -0.009877759, 0.04076752)); + conv2d_8_tf += mul(d3, min16float4x4(-0.018052207, -0.07168355, -0.1447087, 0.2920458, 0.1345294, -0.0847823, 0.0014948811, -0.10205125, -0.044011697, -0.16249846, -0.052916005, -0.0181699, -0.08360677, -0.06418388, -0.036664434, -0.15985154)); + conv2d_8_tf += mul(e3, min16float4x4(-0.0043584667, 0.1973149, 0.07195116, 0.07608803, -0.10798404, 0.11076036, 0.23318382, -0.23839737, -0.29880977, -0.03647466, -0.13977784, -0.27129006, 0.14539374, 0.003516734, -0.17389128, -0.14548092)); + conv2d_8_tf += mul(f3, min16float4x4(-0.039712217, -0.14402422, 0.115726, 0.026172435, 0.088555016, 0.07606563, 0.047167692, -0.048009936, -0.19357018, 0.01590195, -0.08144182, 0.11633417, 0.044445038, -0.038849603, 0.02644488, 0.12953997)); + conv2d_8_tf += mul(g3, min16float4x4(-0.2535649, -0.09789916, -0.059466388, -0.17749946, -0.024909042, 0.07494422, -0.0817595, 0.20722246, 0.049061295, -0.26182574, 0.11551785, -0.11284367, -0.19183765, -0.075118415, 0.023913708, -0.13905819)); + conv2d_8_tf += mul(h3, min16float4x4(-0.009345336, 0.06655174, -0.002273717, -0.06538255, -0.015212964, 0.039716627, -0.08802585, -0.112940565, 0.018324325, 0.24168438, -0.2545027, 0.025853468, -0.11133557, -0.028638441, 0.026320668, -0.09357033)); + conv2d_8_tf += mul(i3, min16float4x4(-0.23745783, -0.032814, 0.2784286, -0.04626241, -0.02654139, -0.018567635, -0.0013748549, -0.064650096, 0.08974625, 0.04735343, -0.027304498, 0.14134395, 0.009515457, -0.0011779714, -0.001755572, 0.008599811)); + conv2d_8_tf += mul(na1, min16float4x4(-0.053202473, -0.17543721, 0.03065013, -0.11342283, 0.13609491, 0.15735649, 0.040357295, -0.062337715, 0.060803644, -0.0032487542, -0.13659185, -0.09013045, -0.058906827, -0.116660595, 0.03664988, 0.059270184)); + conv2d_8_tf += mul(nb1, min16float4x4(0.21752366, -0.06447607, -0.083456226, -0.06617954, -0.013684511, -0.1191609, -0.2506009, -0.08164425, 0.1306491, 0.19933657, 0.13410534, 0.09191758, -0.039843913, -0.06834293, 0.08471115, -0.09353382)); + conv2d_8_tf += mul(nc1, min16float4x4(-0.027393917, -0.08497713, 0.26017472, 0.2136785, -0.1488196, -0.07492567, 0.14468898, 0.16119008, 0.0121641755, 0.22242029, -0.06302512, 0.062499605, 0.06213177, -0.09802615, -0.30932772, 0.011748043)); + conv2d_8_tf += mul(nd1, min16float4x4(0.1187535, 0.04582557, -0.12194581, -0.039476555, 0.20283094, -0.10453671, 0.09578921, -0.22217935, 0.2739068, 0.09089512, -0.3268319, 0.17347647, -0.08915248, -0.13531092, 0.14857613, -0.07792796)); + conv2d_8_tf += mul(ne1, min16float4x4(-0.082583435, 0.16037074, 0.034193352, -0.07133332, -0.0669728, -0.24518156, 0.11620159, -0.10171298, -0.03303509, -0.0028717325, 0.0760564, -0.07741538, 0.046745025, -0.25254723, -0.01662034, 0.055250034)); + conv2d_8_tf += mul(nf1, min16float4x4(0.12526712, -0.0023898773, -0.3012884, -0.047304068, -0.09815741, 0.013686822, -0.050375015, 0.14987841, -0.038195454, 0.040165856, 0.014663741, 0.16414583, -0.15489048, 0.0926139, -0.21309514, -0.1200608)); + conv2d_8_tf += mul(ng1, min16float4x4(-0.09133431, -0.16783749, -0.062135316, 0.018470682, 0.022288319, -0.02211177, 0.13391319, -0.18012549, 0.49915206, 0.13974468, -0.08988157, 0.12178317, 0.0401673, 0.053748768, 0.019889776, 0.03453906)); + conv2d_8_tf += mul(nh1, min16float4x4(0.14379664, 0.08435809, 0.036211815, 0.07440852, -0.06631962, -0.12839338, 0.14946012, -0.21335278, 0.34956563, 0.5433695, -0.2727362, -0.086059555, 0.15091617, -0.1394221, 0.19740397, 0.14155756)); + conv2d_8_tf += mul(ni1, min16float4x4(-0.020419724, 0.07860248, -0.25041556, 0.043661647, -0.018286234, -0.059268583, -0.018467212, 0.04894847, -0.06933085, 0.31178948, -0.11954371, -0.0636989, 0.07150373, -0.04530066, -0.0018285213, 0.019425247)); + conv2d_8_tf += mul(na2, min16float4x4(0.09962638, -0.17088315, -0.06602017, -0.06087763, -0.1418266, -0.13101861, -0.13441323, -0.246784, -0.11813881, -0.28987116, 0.0533919, 0.058272794, -0.005445841, 0.015091582, 0.20249642, -0.105762914)); + conv2d_8_tf += mul(nb2, min16float4x4(-0.21612363, -0.1450863, -0.23284402, 0.006895393, -0.017744822, -0.20156701, 0.012746878, 0.018686332, 0.07711055, -0.10632525, -0.12213612, 0.051344417, -0.0141962785, -0.08607468, -0.05173791, -0.012742015)); + conv2d_8_tf += mul(nc2, min16float4x4(-0.35659614, 0.06504701, 0.0072779786, 0.3384698, -0.14741105, -0.107767306, -0.14098823, 0.22308472, -0.08386747, 0.09358457, 0.052461777, 0.16237038, -0.0059022917, -0.088671595, 0.14027567, -0.04549793)); + conv2d_8_tf += mul(nd2, min16float4x4(-0.23274305, 0.087585405, -0.006931044, -0.23876844, 0.08388762, -0.3022666, -0.16896221, 0.06452799, 0.2715658, -0.10732195, -0.057401773, 0.11985068, -0.06397641, -0.04235397, -0.026778454, 0.21212392)); + conv2d_8_tf += mul(ne2, min16float4x4(0.0082654, 0.28741485, -0.14546123, 0.20393674, -0.02755474, -0.120006405, 0.3581759, 0.12956442, 0.009266114, 0.012998164, 0.032407217, 0.06048391, 0.041528724, -0.13716324, 0.10482829, 0.084386185)); + conv2d_8_tf += mul(nf2, min16float4x4(-0.11990044, 0.092382684, -0.27219963, 0.15899557, -0.001977273, 0.120091155, 0.046375066, -0.21674563, 0.055842437, 0.07407933, 0.123498544, -0.08587901, 0.06925744, -0.07803027, -0.18120557, -0.0013798468)); + conv2d_8_tf += mul(ng2, min16float4x4(-0.025172636, 0.0014970741, -0.12216828, -0.07777998, -0.11570999, -0.2672482, -0.04927161, 0.047932815, 0.017598571, 0.06150582, -0.006943665, 0.06608355, 0.09816235, -0.02132959, 0.022629065, -0.11914383)); + conv2d_8_tf += mul(nh2, min16float4x4(-0.03462315, 0.0662906, 0.043817297, -0.09336832, -0.02393236, 0.12857129, -0.08293834, -0.079446144, 0.07298153, -0.22665861, 0.19360217, -0.027094053, 0.067512356, 0.054872043, 0.07353051, -0.019753326)); + conv2d_8_tf += mul(ni2, min16float4x4(0.052837294, 0.122079946, 0.10026166, -0.16611442, -0.20202795, 0.10773466, 0.016957153, -0.06257964, 0.065463126, -0.0070094382, 0.0057103466, 0.0263681, -0.083057486, 0.011921135, 0.18715331, -0.009138652)); + conv2d_8_tf += mul(na3, min16float4x4(-0.039395697, 0.047360536, 0.08876623, -0.051131938, 0.079491556, -0.062068135, -0.11143306, -0.1600982, 0.1182525, 0.0990501, 0.032290936, 0.16515383, 0.048210137, 0.27581617, 0.2143776, -0.26727012)); + conv2d_8_tf += mul(nb3, min16float4x4(0.009885355, -0.10188308, 0.014354376, -0.07466153, -0.09686006, 0.03712243, -0.07547052, -0.2513815, -0.1224751, 0.28383356, -0.11245158, -0.0022227417, 0.10997654, -0.12797359, -0.026750803, -0.15781246)); + conv2d_8_tf += mul(nc3, min16float4x4(-0.03825075, 0.0119200265, 0.13641061, 0.08023444, -0.05399191, -0.029703232, 0.11449091, 0.104263976, 0.13190906, 0.03559845, 0.00035285854, -0.24578363, -0.030404888, 0.03632663, 0.2665158, 0.287037)); + conv2d_8_tf += mul(nd3, min16float4x4(0.19444078, 0.04411847, 0.10453107, 0.16204067, -0.10203096, -0.1057438, -0.10478279, -0.10320498, 0.0060342676, 0.20314808, -0.080608025, -0.13728383, 0.23798111, 0.03982377, 0.0018392511, -0.17587116)); + conv2d_8_tf += mul(ne3, min16float4x4(0.093861975, -0.037806403, -0.023811158, 0.08989214, 0.16903597, -0.11738837, 0.057141513, 0.03039443, 0.07186046, -0.16815007, 0.041725967, 0.023349155, -0.21743254, -0.054814734, 0.21988024, -0.19913116)); + conv2d_8_tf += mul(nf3, min16float4x4(-0.098907694, 0.12669978, -0.022410035, -0.09411821, -0.037412155, 0.04395231, -0.15797623, -0.14484851, -0.036790654, -0.038002916, 0.16846262, 0.21878582, -0.053109415, -0.03769754, -0.24775061, -0.010048842)); + conv2d_8_tf += mul(ng3, min16float4x4(-0.12894969, 0.0033566963, 0.030691003, 0.033040218, -0.08500356, -0.043196633, 0.06903723, -0.17297482, -0.102706455, 0.13380836, 0.20812829, -0.054975122, -0.058504406, -0.08924625, 0.0967954, -0.12462231)); + conv2d_8_tf += mul(nh3, min16float4x4(-0.020506827, 0.040906876, 0.15277289, -0.11496513, 0.19803853, 0.011656168, 0.0041951393, 0.16394733, -0.052599292, -0.2028797, -0.012671829, 0.12447954, -0.042609632, 0.18015629, -0.047704864, -0.20819715)); + conv2d_8_tf += mul(ni3, min16float4x4(-0.04611932, -0.04080319, 0.1732811, -0.16310379, -0.0759677, -0.012633483, -0.12658887, -0.10228954, 0.11699648, 0.020952728, -0.1922721, 0.079663426, -0.017287953, 0.050658427, -0.061943304, -0.26140955)); + conv2d_8_tf += min16float4(-0.020329567, 0.07771538, 0.06740593, -0.00038238944); + min16float4 nconv2d_8_tf = max(-conv2d_8_tf, 0); + conv2d_8_tf = max(conv2d_8_tf, 0); + + min16float4 conv2d_7_tf = mul(a1, min16float4x4(0.09670644, -0.04566203, -0.10664036, -0.11654977, 0.10353238, -0.026668113, -0.06772906, -0.058057647, -0.04721855, -0.019877478, -0.16225834, -0.18661498, -0.1137224, 0.01452415, 0.09002202, -0.07991262)); + conv2d_7_tf += mul(b1, min16float4x4(0.12247382, 0.10237518, 0.04044118, -0.04867563, 0.106729075, 0.19503647, -0.01294371, 0.12316606, 0.08497549, -0.01606401, 0.031219587, 0.1474753, -0.14370713, -0.24351072, -0.17444824, 0.12567697)); + conv2d_7_tf += mul(c1, min16float4x4(-0.05373204, -0.11406721, -0.04307548, -0.0011615923, 0.09172633, -0.034839034, 0.12179155, -0.032049768, -0.036665026, 0.02375685, 0.01977139, -0.115673535, -0.065757565, 0.12521514, 0.03739438, -0.012122441)); + conv2d_7_tf += mul(d1, min16float4x4(0.0037090098, -0.09165263, -0.22216173, -0.09436383, -0.018459387, 0.15764487, 0.106846556, -0.15703869, -0.1056327, 0.100443825, 0.15728104, -0.07118126, -0.071113996, 0.07175751, 0.1066827, 0.015554562)); + conv2d_7_tf += mul(e1, min16float4x4(-0.08138076, -0.005017353, 0.0024575114, -0.0280491, -0.1689416, -0.24320668, -0.07413122, -0.026848925, -0.17659375, 0.095876895, 0.1875987, -0.0052445224, 0.0041429237, -0.13173698, -0.21236134, 0.14331093)); + conv2d_7_tf += mul(f1, min16float4x4(-0.023982342, -0.028810123, -0.1591679, -0.02026218, -0.16651444, 0.050990265, -0.1640659, -0.109770395, -0.06517823, 0.06647583, 0.09519326, -0.14313333, 0.061294477, 0.066543005, 0.12260083, -0.1436599)); + conv2d_7_tf += mul(g1, min16float4x4(0.07363797, -0.07069135, -0.01332299, -0.1166729, -0.17299873, 0.10319499, 0.17256232, -0.15059224, 0.12490272, 0.03816397, -0.07081764, -0.0005555199, 0.009463498, -0.080442056, 0.05372971, -0.01984048)); + conv2d_7_tf += mul(h1, min16float4x4(0.07747191, 0.038767997, -0.042611655, -0.025650622, -0.20976418, 0.11478602, 0.05521954, 0.03552756, 0.012396808, 0.10836491, 0.01147957, 0.17223893, -0.09354668, -0.061399113, 0.03731426, -0.095968515)); + conv2d_7_tf += mul(i1, min16float4x4(0.0029518164, -0.07522048, -0.30731654, 0.14996396, -0.09563301, -0.1635997, 0.16482228, -0.33490175, 0.034455117, -0.124511935, 0.003454064, -0.011791387, -0.08124914, -0.020552732, 0.14202276, -0.053646516)); + conv2d_7_tf += mul(a2, min16float4x4(0.029005067, -0.019747132, 0.041804817, 0.10725602, 0.09535564, 0.17670439, 0.18999198, 0.06499296, 0.09519827, -0.09794806, 0.10868586, -0.038871128, -0.092565574, -0.018548176, 0.028203959, -0.050549477)); + conv2d_7_tf += mul(b2, min16float4x4(0.10629401, -0.01204608, -0.0766338, 0.112705976, -0.103695825, 0.10200874, 0.008448839, 0.017780313, -0.024469525, 0.1860687, 0.14225325, 0.15677285, -0.14190355, -0.22543404, 0.024092557, -0.2790124)); + conv2d_7_tf += mul(c2, min16float4x4(0.08649951, 0.040031336, -0.010628009, -0.04257323, 0.052871518, 0.06654039, -0.07866483, 0.09136843, -0.10960993, -0.029104995, 0.18752916, 0.022354944, -0.15167497, -0.04915799, -0.03720373, 0.18194139)); + conv2d_7_tf += mul(d2, min16float4x4(-0.029030664, 0.063362755, 0.010331715, 0.034228537, -0.010749333, 0.026652085, -0.06266523, -0.047827587, 0.19567958, -0.07156196, 0.080418445, 0.040099807, 0.06901692, -0.10262759, 0.10190994, 0.1662688)); + conv2d_7_tf += mul(e2, min16float4x4(-0.04938947, 0.20808902, -0.012551209, 0.13833791, -0.08467056, -0.06768094, -0.0035055066, 0.2141383, 0.011813273, -0.094283104, -0.11627318, 0.0035407832, -0.16360888, -0.04307167, 0.18481791, 0.07308102)); + conv2d_7_tf += mul(f2, min16float4x4(0.058353335, 0.09541393, 0.013101275, -0.081891365, 0.08742119, -0.005137093, 0.025961146, -0.037318625, -0.14933549, 0.06090928, 0.12738119, -0.10817076, -0.13165309, 0.16108744, -0.13503371, 0.15482368)); + conv2d_7_tf += mul(g2, min16float4x4(-0.034848627, -0.0430948, -0.048124265, -0.04486795, -0.035008915, 0.08321689, -0.04977505, 0.048597503, 0.020555262, -0.07508485, 0.20037362, 0.06753769, 0.058704067, -0.009009662, -0.05421176, 0.20524938)); + conv2d_7_tf += mul(h2, min16float4x4(-0.12115005, 0.045643892, 0.112293474, 0.022908293, 0.073470674, -0.067966096, -0.017103313, -0.13648018, -0.07021163, 0.031020392, -0.048876107, 0.10397969, -0.005251243, -0.2611716, -0.07903786, 0.3444416)); + conv2d_7_tf += mul(i2, min16float4x4(0.10680049, -0.09858707, -0.0010306702, 0.10842332, -0.09013634, 0.02091661, 0.22192872, -0.15876925, 0.035971455, -0.04786045, 0.009500665, 0.09247623, 0.013221849, 0.1912487, -0.12753724, -0.061068386)); + conv2d_7_tf += mul(a3, min16float4x4(-0.03980972, -0.1474463, 0.22852057, -0.030534718, 0.103116564, -0.024893943, 0.023735823, -0.19768827, -0.088497065, -0.20338957, -0.022078201, -0.058560856, 0.16291575, 0.014483492, -0.093514696, 0.14760342)); + conv2d_7_tf += mul(b3, min16float4x4(-0.09319041, 0.08757541, 0.024344994, -0.004351115, 0.0023287807, 0.036806494, -0.02552934, -0.06227957, -0.1354203, 0.0283256, 0.2185213, -0.087060206, -0.022696337, -0.16076073, -0.20330715, 0.036380492)); + conv2d_7_tf += mul(c3, min16float4x4(-0.041115735, -0.023528732, -0.10124798, 0.21328308, -0.009342506, 0.07328608, 0.009285847, -0.23402044, 0.13117228, 0.1009154, 0.18027642, 0.074597865, 0.09881346, -0.00081656995, -0.002189424, -0.105243)); + conv2d_7_tf += mul(d3, min16float4x4(0.11213601, -0.23114498, 0.10217712, -0.083360896, 0.07913656, -0.039601568, 0.11367716, -0.034739245, -0.14472133, -0.035573903, -0.35375246, 0.040547356, -0.1504422, -0.15183373, -0.08146184, -0.015926573)); + conv2d_7_tf += mul(e3, min16float4x4(0.007678496, 0.045396518, 0.067442104, 0.357935, 0.1795549, -0.028398065, 0.26147032, -0.22306849, -0.028738718, -0.10074325, -0.08521542, -0.020190565, -0.175108, -0.26179528, -0.1149573, 0.05406529)); + conv2d_7_tf += mul(f3, min16float4x4(0.030697253, 0.06005289, 0.024412693, -0.013535843, 0.030500244, 0.14023077, -0.047582973, 0.07610684, 0.0571624, 0.19386198, 0.021660715, 0.03154867, -0.03788935, -0.08817162, 0.0053847465, -0.015165054)); + conv2d_7_tf += mul(g3, min16float4x4(-0.26646808, -0.2275448, -0.0619738, 0.104571655, 0.024079306, 0.033514917, 0.016844772, -0.14415953, -0.01694689, -0.0072623887, -0.12263149, 0.030444223, -0.03220662, 0.022894913, 0.03112325, -0.036533017)); + conv2d_7_tf += mul(h3, min16float4x4(-0.15611476, -0.19298914, -0.17546865, -0.080604054, 0.07597506, 0.097353615, 0.029924694, -0.078176685, -0.12268953, -0.05687716, -0.05294087, -0.18172315, -0.0773961, 0.084935166, -0.009803619, 0.040560953)); + conv2d_7_tf += mul(i3, min16float4x4(-0.10773278, -0.0012994999, 0.004722267, -0.057820093, -0.10506255, 0.029771779, 0.015667265, 0.14186347, -0.108355746, -0.11185942, 0.022062123, -0.123649485, -0.0666645, -0.0107138315, -0.0130763, -0.046252076)); + conv2d_7_tf += mul(na1, min16float4x4(-0.031815648, -0.0084208995, -0.072824255, -0.1508182, -0.064399414, 0.021369422, -0.18965991, 0.03649226, 0.15370539, -0.117377125, 0.15578026, 0.15059558, 0.1423233, 0.013444947, -0.16911474, -0.21899599)); + conv2d_7_tf += mul(nb1, min16float4x4(-0.050074972, 0.06591971, -0.20185336, -0.19894198, -0.045794237, -0.09582899, 0.019117232, 0.054774716, 0.00469303, 0.08466791, -0.10310348, 0.03430011, -0.05189703, 0.08612288, -0.09612641, 0.15337339)); + conv2d_7_tf += mul(nc1, min16float4x4(-0.058103696, -0.13447452, -0.06501768, -0.08269111, -0.043869898, 0.0398948, 0.033771295, -0.021524182, 0.0027115596, -0.030671224, 0.045388903, 0.04590158, -0.26087472, -0.16301683, 0.03324832, 0.024285218)); + conv2d_7_tf += mul(nd1, min16float4x4(-0.051421262, 0.15028518, 0.06384462, -0.08590671, 0.101886876, -0.012882116, -0.051741008, 0.11888618, -0.15590154, -0.38625813, 0.042900138, 0.22492291, -0.09111901, -0.005388837, 0.051056426, 0.043860577)); + conv2d_7_tf += mul(ne1, min16float4x4(-0.079883516, 0.05735032, 0.10719803, 0.16519663, -0.11724404, 0.25990528, 0.012375103, -0.010302452, 0.49185735, 0.1696493, 0.060474537, 0.3722603, 0.014323083, -0.16412182, -0.059749532, -0.24289557)); + conv2d_7_tf += mul(nf1, min16float4x4(-0.034733526, -0.084441185, -0.04596736, -0.0042962483, -0.0392975, -0.11149175, 0.14051792, 0.0702665, 0.117540844, -0.102869704, 0.27858627, 0.069043316, 0.04871729, -0.24745311, -0.058776632, -0.0017110928)); + conv2d_7_tf += mul(ng1, min16float4x4(-0.06277427, 0.16004023, -0.11507597, 0.15097888, 0.027060283, 0.1953599, -0.0031669976, -0.0005737168, -0.19876455, -0.23691651, 0.17741823, -0.12453466, -0.040428206, -0.0018632353, 0.023173677, -0.076046385)); + conv2d_7_tf += mul(nh1, min16float4x4(0.13513252, 0.0295901, -0.006554118, 0.06786791, 0.15473233, 0.012762339, 0.1927368, -0.06255987, -0.30587965, -0.44131213, -0.086936355, 0.011615333, 0.097696826, 0.02502633, 0.08837973, -0.07914361)); + conv2d_7_tf += mul(ni1, min16float4x4(-0.013541286, -0.034861088, 0.052821327, 0.037984103, 0.04338181, -0.0133451065, 0.041617934, -0.034278907, -0.053211715, -0.16200064, 0.11068738, -0.0867221, 0.04498939, 0.045188803, -0.05908562, 0.081477076)); + conv2d_7_tf += mul(na2, min16float4x4(-0.15266198, 0.22576767, 0.030019565, -0.045541495, 0.04881405, 0.0142783765, -0.1529103, 0.18320109, -0.00480197, 0.094124764, -0.010995377, 0.01641767, -0.010706163, 0.100903675, 0.19038767, -0.18477328)); + conv2d_7_tf += mul(nb2, min16float4x4(0.008087569, 0.13434748, -0.32156894, 0.07736676, 0.10494717, -0.11782738, -0.0029439328, -0.09557844, 0.015514035, -0.089648925, -0.17554814, -0.14883392, -0.04063905, 0.050346915, -0.08932905, -0.010719376)); + conv2d_7_tf += mul(nc2, min16float4x4(-0.11777635, -0.33014166, 0.34624732, 0.11740032, 0.1543961, -0.019076902, -0.12216481, -0.017081184, -0.00078788324, 0.031078909, -0.028584918, -0.026835786, 0.091864, 0.05272115, -0.12571204, 0.008416047)); + conv2d_7_tf += mul(nd2, min16float4x4(-0.043549653, 0.1039711, -0.20336658, -0.010299696, -0.27827185, 0.019381372, -0.1632188, 0.077465065, 0.20229691, -0.069236994, 0.014810417, 0.22877559, 0.02143673, 0.17381601, 0.09082899, -0.053508762)); + conv2d_7_tf += mul(ne2, min16float4x4(0.2391153, -0.19723871, -0.25610062, 0.07108974, -0.03182384, 0.2192639, -0.09241812, 0.048452295, -0.021405702, -0.2554734, -0.1965786, 0.20361422, -0.14465299, 0.058985952, -0.025833346, -0.10550291)); + conv2d_7_tf += mul(nf2, min16float4x4(-0.39593056, -0.4537898, 0.023792682, 0.37393433, 0.041772638, -0.020854915, 0.050651625, 0.0766088, 0.23962118, -0.06411897, -0.106468715, 0.17854762, 0.03402648, 0.0236968, -0.033498786, -0.12094796)); + conv2d_7_tf += mul(ng2, min16float4x4(-0.2517486, -0.011749091, -0.08157814, 0.1392019, 0.042420883, -0.23219018, 0.05053571, 0.13250825, -0.050171047, 0.15462638, -0.043420136, -0.014093825, 0.16176236, -0.14638837, -0.0071619265, -0.055462677)); + conv2d_7_tf += mul(nh2, min16float4x4(-0.3264325, -0.30403548, -0.15088049, -0.010203428, -0.018360123, -0.060466267, -0.090672255, -0.13885537, -0.038393795, 0.20886149, -0.10593147, 0.017991208, 0.08373391, 0.20925963, 0.028997745, 0.06881825)); + conv2d_7_tf += mul(ni2, min16float4x4(0.19107129, -0.16896184, -0.12929466, 0.07562441, 0.064231046, 0.0864716, -0.03966105, 0.09153016, -0.0628452, -0.015886426, -0.07048391, -0.24076262, 0.011216516, 0.07708032, -0.03814493, 0.13395755)); + conv2d_7_tf += mul(na3, min16float4x4(-0.05879415, -0.019550052, -0.023919582, -0.11289196, -0.0064408537, 0.07402445, 0.058795378, 0.15885338, -0.043667927, 0.10769252, 0.030309072, 0.048533317, -0.2524471, 0.059829284, 0.0797783, -0.019442867)); + conv2d_7_tf += mul(nb3, min16float4x4(-0.0038486274, -0.04580634, 0.07400007, -0.031162377, 0.10273923, 0.008071164, 0.11991736, 0.026728682, 0.026876984, -0.07799812, 0.1297364, 0.14695424, -0.06859438, -0.10330936, -0.07446633, 0.02616857)); + conv2d_7_tf += mul(nc3, min16float4x4(-0.16036308, 0.04957999, 0.01030331, -0.1962486, 0.103015296, -0.007340536, -0.049429756, 0.07165493, 0.008103339, 0.083655335, 0.098038, -0.1358248, -0.25885662, 0.029940864, -0.008321852, 0.2294651)); + conv2d_7_tf += mul(nd3, min16float4x4(-0.06087098, 0.00019651231, 0.03534409, 0.03318348, -0.0879954, 0.034764756, -0.30367124, -0.09713905, -0.026543869, -0.089636214, 0.12096616, -0.034594636, 0.054902434, -0.09290082, -0.07779638, -0.0821119)); + conv2d_7_tf += mul(ne3, min16float4x4(0.13779263, 0.18896884, -0.076830864, -0.09442952, -0.23735744, -0.014474691, 0.009051341, 0.10342686, 0.041046456, -0.10701024, -0.18442988, 0.02789949, -0.00074035715, -0.025513707, -0.040514592, 0.036068246)); + conv2d_7_tf += mul(nf3, min16float4x4(-0.048401676, 0.20745294, 0.0070508514, -0.0705337, -0.022934115, -0.043547787, 0.04628692, -0.07658743, -0.10154497, -0.13417569, -0.0013773212, 0.14263885, -0.07437275, -0.13121726, 0.12632057, 0.034687687)); + conv2d_7_tf += mul(ng3, min16float4x4(-0.027830327, -0.030560987, 0.12718935, -0.102934904, -0.02562363, 0.008175067, -0.0028858446, -0.015783066, 0.15272577, 0.10772941, 0.043485314, 0.014232708, 0.08577555, -0.16121073, 0.026591625, -0.055126593)); + conv2d_7_tf += mul(nh3, min16float4x4(-0.06485661, -0.11781964, -0.1421969, -0.16376711, 0.18121801, 0.123108625, -0.15428194, -0.06915854, 0.05089843, 0.08377868, 0.09607435, -0.02494757, -0.076740764, -0.19782536, -0.3470603, 0.037040427)); + conv2d_7_tf += mul(ni3, min16float4x4(0.10614744, 0.09086957, -0.02948694, 0.017862784, 0.027194018, 0.069870904, -0.021802098, 0.21401364, 0.11846571, -0.056183722, -0.071595654, 0.029162262, -0.124404505, -0.072095454, 0.040073395, -0.02816261)); + conv2d_7_tf += min16float4(-0.034254678, 0.047492404, -0.00038721046, -0.00072104816); + tex9[gxy] = conv2d_7_tf; + min16float4 nconv2d_7_tf = max(-conv2d_7_tf, 0); + conv2d_7_tf = max(conv2d_7_tf, 0); + + min16float4 target = mul(e1, min16float4x4(-0.20878315, 0.073090814, 0.34913197, 0.04554434, -0.3036766, 0.04255219, 0.060676616, 0.24025755, -0.019680336, -0.15252031, -0.03416314, -0.072506554, 0.013241457, -0.10496547, 0.050562985, -0.033250205)); + target += mul(e2, min16float4x4(-0.18049034, 0.09664636, 0.41482204, 0.23575203, -0.05704124, -0.044852983, 0.1783455, -0.017561441, -0.06852369, 0.014129533, -0.21115111, -0.22699773, 0.38242704, 0.01165174, 0.04190493, -0.2141891)); + target += mul(e3, min16float4x4(-0.011946614, -0.16289592, 0.041371312, 0.40975794, 0.0041022287, -0.23657559, 0.10817027, -0.26924378, -0.12006245, 0.26678962, 0.072988346, -0.2085322, 0.0048250603, 0.12894252, 0.07966851, 0.24471562)); + target += mul(ne1, min16float4x4(0.18590502, 0.0845459, -0.12875262, 0.26096, 0.029233042, 0.36381075, 0.117661506, 0.006412487, 0.20946807, 0.07426911, 0.029169528, 0.0654646, 0.16450708, 0.12593012, -0.109644994, 0.14572893)); + target += mul(ne2, min16float4x4(0.1973355, -0.2275125, -0.28223652, 0.31719315, 0.3813502, 0.2693579, -0.037815563, -0.16148391, 0.12829015, -0.0030689894, 0.022164742, 0.035949815, -0.3378249, -0.13235879, 0.15883659, -0.17731927)); + target += mul(ne3, min16float4x4(-0.2885664, 0.14904943, -0.19845994, 0.23251331, -0.30293494, 0.02003626, 0.20378608, 0.27291408, -0.16427508, -0.1587996, -0.22501752, -0.04937006, -0.115756296, 0.09290222, -0.26140857, -0.014537909)); + target += mul(conv2d_8_tf, min16float4x4(-0.1513065, -0.31879196, -0.2727547, -0.4583672, 0.3103975, -0.09158548, 0.009788355, -0.09834531, 0.011489709, 0.042706747, 0.37254226, 0.15954055, 0.2172001, 0.09373807, 0.29088458, -0.35286763)); + target += mul(nconv2d_8_tf, min16float4x4(0.23374696, 0.33407655, 0.23616461, -0.09521148, -0.14927168, 0.11939751, 0.42869845, -0.16612507, -0.2706815, 0.16172597, -0.5814591, -0.11577833, 0.065650895, -0.3334003, -0.41168052, 0.32357255)); + target += mul(conv2d_1_tf, min16float4x4(0.3248823, -0.27207342, -0.048840526, -0.217887, -0.018053366, -0.24292938, 0.1603505, 0.06505262, -0.010766065, 0.07076721, 0.22251016, -0.041497335, -0.09878612, 0.2061045, 0.080330074, -0.029014835)); + target += mul(nconv2d_1_tf, min16float4x4(-0.26376098, -0.04971863, -0.03045489, 0.009807002, 0.11108562, 0.0693266, 0.15279642, -0.1372833, 0.18326105, -0.059612468, -0.005589879, 0.021735538, -0.027800532, -0.14984077, -0.116767704, -0.06531209)); + target += mul(conv2d_4_tf, min16float4x4(0.19206688, 0.21824414, 0.03791829, 0.22117318, 0.01257811, -0.044042267, 0.25616458, 0.082941554, -0.1181948, -0.17940602, -0.20808466, -0.06987383, 0.0019713745, -0.1609917, 0.153718, -0.32214788)); + target += mul(nconv2d_4_tf, min16float4x4(-0.19472712, -0.007020553, -0.36049378, -0.24589752, -0.011828978, 0.38882232, -0.3257698, 0.08382738, -0.09556564, -0.20949766, -0.32732338, 0.08303877, -0.107999764, 0.2836336, -0.0661124, 0.24043255)); + target += mul(conv2d_7_tf, min16float4x4(-0.1972939, 0.12734106, -0.09953153, -0.45152718, -0.15855458, 0.08746372, 0.11452114, 0.030538268, 0.11946308, 0.17044471, -0.24375156, -0.10093911, 0.19120134, -0.14312318, -0.14860255, -0.1223525)); + target += mul(nconv2d_7_tf, min16float4x4(0.14979935, -0.3136038, -0.25878516, 0.12995318, -0.075706124, -0.104598634, 0.1455947, -0.6167443, 0.06843719, -0.16347055, 0.04413483, 0.08870554, -0.29839858, 0.07214889, 0.049274225, -0.15555117)); + target += min16float4(-0.004266169, -0.020547107, -0.0031655694, 0.0643683); + tex5[gxy] = target; + + target = mul(e1, min16float4x4(0.06760422, 0.16268754, -0.14517367, -0.023386402, -0.23272006, 0.48739922, 0.06399116, -0.032946702, -0.17306012, 0.334446, 0.17779559, -0.2660973, -0.3468709, 0.51220256, -0.010311926, -0.040047005)); + target += mul(e2, min16float4x4(-0.0538168, -0.048309397, 0.064760834, 0.09675621, 0.20269404, -0.2615111, -0.27282992, -0.12584937, 0.10904846, -0.15973651, -0.076846495, -0.09462694, 0.12722874, 0.21629119, -0.35314724, -0.086036965)); + target += mul(e3, min16float4x4(-0.049174394, -0.05765949, 0.21250841, 0.17151582, 0.15764381, 0.040890984, 0.05118504, -0.14658877, 0.05469671, 0.13701054, 0.20377803, -0.39008877, -0.0016028697, 0.13317284, -0.11653242, 0.12591232)); + target += mul(ne1, min16float4x4(0.21234287, -0.3048995, -0.12653783, -0.109162085, -0.050768167, -0.17156011, 0.05592974, 0.27197394, -0.19419932, -0.046344608, -0.05445905, -0.13253787, 0.05778321, 0.16979085, -0.04466505, -0.06867837)); + target += mul(ne2, min16float4x4(-0.18974759, 0.22814974, -0.007522141, -0.10096491, -0.26759568, 0.32048568, 0.2660603, 0.112091035, 0.41875598, -0.1051111, 0.06525224, 0.27191457, 0.017352497, -0.31743342, 0.29108858, 0.26573792)); + target += mul(ne3, min16float4x4(0.031855166, -0.122523904, -0.28207538, 0.12833035, -0.025733596, 0.008542537, -0.1891138, 0.16361842, 0.058317598, -0.007289248, 0.03349703, -0.038986582, 0.18147361, -0.3912238, 0.024964351, 0.14339498)); + target += mul(conv2d_8_tf, min16float4x4(0.37369347, -0.012460246, -0.037854888, 0.067713045, -0.06288331, 0.26436228, -0.058873445, 0.04463945, -0.04286497, -0.04824939, 0.17835206, -0.036378298, 0.33058742, -0.14685723, 0.1025378, 0.051385757)); + target += mul(nconv2d_8_tf, min16float4x4(-0.131484, -0.040644694, -0.14443769, 0.1950223, 0.09507341, 0.48859578, -0.26267928, 0.24538381, -0.063596986, -0.18749404, -0.031884808, -0.07132067, -0.04606875, 0.03708701, -0.26145473, 0.2371378)); + target += mul(conv2d_1_tf, min16float4x4(0.094301306, -0.08795415, -0.035933804, 0.21765485, -0.29858732, 0.11440603, 0.14095801, 0.18262209, -0.08135902, -0.45404965, 0.20399955, -0.06393024, 0.023793167, 0.16001467, -0.11817577, -0.16322103)); + target += mul(nconv2d_1_tf, min16float4x4(0.07168084, 0.0879652, -0.083207026, -0.045181375, 0.07845201, -0.15828066, 0.05710845, 0.05699917, -0.061211787, 0.039662443, 0.036026876, 0.14224064, -0.23701179, 0.01259322, -0.091701694, 0.42408752)); + target += mul(conv2d_4_tf, min16float4x4(0.017442457, -0.1311232, -0.22520894, -0.049517628, -0.20945188, -0.035541452, -0.13055338, -0.04001523, -0.09402065, -0.19641486, -0.10066238, 0.115912616, -0.10684873, 0.02787531, 0.28450257, 0.02690632)); + target += mul(nconv2d_4_tf, min16float4x4(-0.2659566, 0.43625832, -0.0695883, -0.2624756, -0.2827253, -0.22893822, 0.26025924, 0.24121284, 0.2272709, 0.2178127, -0.15199527, 0.32607552, 0.005909836, 0.056527212, 0.19446251, -0.010751997)); + target += mul(conv2d_7_tf, min16float4x4(0.1273358, -0.28996274, -0.19322409, 0.018734567, 0.48555133, -0.17389202, 0.13595583, 0.46163267, -0.08973322, -0.30239192, 0.49897516, 0.021815563, -0.2589829, 0.0039008032, 0.056682784, 0.048075546)); + target += mul(nconv2d_7_tf, min16float4x4(0.415353, 0.112207405, 0.20997275, 0.033321556, -0.1327579, 0.12338585, 0.61820966, -0.3411527, 0.018252999, 0.05708125, -0.24571265, 0.11019793, 0.24145919, 0.20340635, -0.0693869, 0.16271423)); + target += min16float4(-0.07107039, 0.0061239223, 0.0013546069, 0.02994767); + tex6[gxy] = target; + + target = mul(e1, min16float4x4(0.0014731521, -0.15165007, 0.04889816, -0.23228844, 0.11362322, 0.07071926, -0.23770805, -0.04347728, -0.16787082, -0.008313435, -0.42370048, 0.08681679, 0.10611205, -0.012660734, 0.10022364, 0.027629996)); + target += mul(e2, min16float4x4(-0.35393402, 0.018436229, 0.10629333, 0.029471794, -0.21129252, -0.301571, 0.0045201713, -0.15636055, 0.298371, 0.11426107, 0.018450111, -0.13657977, 0.22216578, 0.009629214, 0.5373198, 0.30699998)); + target += mul(e3, min16float4x4(-0.1504586, -0.16447587, -0.2739809, -0.14074785, 0.39510623, -0.08384201, 0.14561974, -0.43195033, -0.055713434, 0.12800978, 0.2829296, -0.23494978, 0.14326042, -0.09509476, -0.3169162, 0.124649614)); + target += mul(ne1, min16float4x4(-0.23705968, 0.15959233, 0.11467344, 0.15141489, -0.096755706, 0.023953263, 0.13856179, 0.024189185, 0.13272291, 0.46271062, 0.55494446, -0.14286532, 0.1501738, 0.28827608, 0.058801714, 0.029045105)); + target += mul(ne2, min16float4x4(-0.002308931, 0.07281086, -0.5197955, 0.079986535, 0.38919175, 0.3164044, 0.35857818, 0.09364757, 0.17373051, -0.1447216, -0.05244769, 0.15533692, 0.046295535, -0.19459103, -0.33215967, -0.15369573)); + target += mul(ne3, min16float4x4(0.11478203, -0.29375935, -0.19501545, -0.081721894, -0.103483915, 0.041965716, 0.056954723, 0.19596405, -0.13819647, 0.010641367, -0.11124998, -0.08675409, 0.036859434, 0.23720297, 0.14129876, -0.044769786)); + target += mul(conv2d_8_tf, min16float4x4(0.08397742, -0.12651941, 0.17676216, -0.084249385, 0.36716628, 0.039452277, -0.27606088, -0.36796048, 0.31680533, 0.14186403, 0.4466997, 0.13315229, 0.011085958, -0.17513317, 0.13940759, 0.27495402)); + target += mul(nconv2d_8_tf, min16float4x4(-0.1870658, 0.18817395, 0.010469263, -0.39973256, -0.57167524, -0.38714117, -0.26255277, 0.14361858, 0.018649995, 0.15935089, -0.21745402, -0.0056655053, -0.15408997, -0.03154883, -0.29631105, 0.27472818)); + target += mul(conv2d_1_tf, min16float4x4(-0.07735958, 0.042861674, 0.36729267, -0.2362879, -0.15516327, -0.009109079, 0.063800156, -0.253287, 0.4471074, 0.0944695, -0.26948866, -0.07759066, 0.045151226, -0.13749917, 0.14566323, -0.13593693)); + target += mul(nconv2d_1_tf, min16float4x4(0.28955856, 0.09293573, 0.07423561, 0.1616493, 0.22285056, 0.01639275, 0.026332684, -0.14958683, -0.32087958, -0.3138252, -0.17335242, -0.38171476, -0.25562596, -0.022701526, 0.17425084, -0.042576227)); + target += mul(conv2d_4_tf, min16float4x4(0.24964347, -0.07078707, 0.18416835, -0.054758202, -0.061644293, -0.0964391, 0.14583856, -0.34874785, -0.3402768, 0.14743538, 0.36047265, 0.04471611, 0.015971184, 0.25227246, -0.011749087, -0.18359871)); + target += mul(nconv2d_4_tf, min16float4x4(-0.059328917, -0.07904788, -0.23883855, -0.06956805, -0.040810965, 0.09536262, 0.0018617791, -0.1898438, 0.1794419, 0.11382087, -0.16192305, 0.22020166, 0.03995484, -0.19086155, -0.2970539, 0.14597812)); + target += mul(conv2d_7_tf, min16float4x4(-0.034995254, 0.060782332, -0.0519364, 0.41303346, -0.06989344, 0.21384521, 0.31474474, 0.12592849, 0.17633408, -0.2764535, 0.36884397, -0.015302021, 0.02951528, 0.094452016, 0.13392285, 0.14435606)); + target += mul(nconv2d_7_tf, min16float4x4(0.13522784, 0.101011604, 0.04657966, -0.043399148, 0.008192044, 0.0027336285, 0.011269824, 0.09976881, -0.026473437, -0.124423906, -0.19602631, -0.09871594, -0.10603456, 0.057509303, -0.09007557, -0.14438893)); + target += min16float4(-0.07283617, -0.09245546, -0.006695486, -0.013076421); + tex7[gxy] = target; +} + + +//!PASS 5 +//!DESC Conv-4x3x3x24, Conv-4x1x1x64 +//!IN tex5, tex6, tex7, tex4, tex8, tex9 +//!OUT tex10, tex11, tex1, tex2, tex3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass5(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + min16float4 a1 = tex5.SampleLevel(sam, pos - inputPt, 0); + min16float4 b1 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c1 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d1 = tex5.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e1 = tex5.SampleLevel(sam, pos, 0); + min16float4 f1 = tex5.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g1 = tex5.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h1 = tex5.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i1 = tex5.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na1 = max(-a1, 0); + min16float4 nb1 = max(-b1, 0); + min16float4 nc1 = max(-c1, 0); + min16float4 nd1 = max(-d1, 0); + min16float4 ne1 = max(-e1, 0); + min16float4 nf1 = max(-f1, 0); + min16float4 ng1 = max(-g1, 0); + min16float4 nh1 = max(-h1, 0); + min16float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + min16float4 a2 = tex6.SampleLevel(sam, pos - inputPt, 0); + min16float4 b2 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c2 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d2 = tex6.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e2 = tex6.SampleLevel(sam, pos, 0); + min16float4 f2 = tex6.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g2 = tex6.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h2 = tex6.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i2 = tex6.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na2 = max(-a2, 0); + min16float4 nb2 = max(-b2, 0); + min16float4 nc2 = max(-c2, 0); + min16float4 nd2 = max(-d2, 0); + min16float4 ne2 = max(-e2, 0); + min16float4 nf2 = max(-f2, 0); + min16float4 ng2 = max(-g2, 0); + min16float4 nh2 = max(-h2, 0); + min16float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + min16float4 a3 = tex7.SampleLevel(sam, pos - inputPt, 0); + min16float4 b3 = tex7.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c3 = tex7.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d3 = tex7.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e3 = tex7.SampleLevel(sam, pos, 0); + min16float4 f3 = tex7.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g3 = tex7.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h3 = tex7.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i3 = tex7.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na3 = max(-a3, 0); + min16float4 nb3 = max(-b3, 0); + min16float4 nc3 = max(-c3, 0); + min16float4 nd3 = max(-d3, 0); + min16float4 ne3 = max(-e3, 0); + min16float4 nf3 = max(-f3, 0); + min16float4 ng3 = max(-g3, 0); + min16float4 nh3 = max(-h3, 0); + min16float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + min16float4 conv2d_1_tf = tex4.SampleLevel(sam, pos, 0); + min16float4 nconv2d_1_tf = max(-conv2d_1_tf, 0); + conv2d_1_tf = max(conv2d_1_tf, 0); + + min16float4 conv2d_4_tf = tex8.SampleLevel(sam, pos, 0); + min16float4 nconv2d_4_tf = max(-conv2d_4_tf, 0); + conv2d_4_tf = max(conv2d_4_tf, 0); + + min16float4 conv2d_7_tf = tex9.SampleLevel(sam, pos, 0); + min16float4 nconv2d_7_tf = max(-conv2d_7_tf, 0); + conv2d_7_tf = max(conv2d_7_tf, 0); + + min16float4 conv2d_11_tf = mul(a1, min16float4x4(-0.47819614, -0.0145807015, -0.14235033, -0.06459091, 0.051679384, -0.24727756, 0.16531977, 0.23668537, -0.044610042, -0.03163047, -0.024059737, 0.21251118, -0.02900184, -0.11218355, 0.085020125, -0.08413842)); + conv2d_11_tf += mul(b1, min16float4x4(-0.083133794, 0.08406656, 0.20940667, 0.21155417, -0.12855776, -0.061865382, 0.2486309, 0.13191856, -0.028019775, 0.11366226, 0.13459402, 0.18391807, -0.09688631, 0.011591497, -0.2466206, -0.02237942)); + conv2d_11_tf += mul(c1, min16float4x4(0.0037495645, 0.019915475, 0.07625364, 0.07119373, 0.18423386, 0.07686032, -0.013689673, -0.11513128, -0.12845139, 0.273121, 0.077030145, 0.13114497, 0.04543684, 0.09308563, 0.19357756, 0.24509594)); + conv2d_11_tf += mul(d1, min16float4x4(0.03458686, -0.42040396, -0.104271114, 0.1918791, -0.25708342, 0.03583752, 0.2589993, -0.050576445, 0.0043004244, 0.19324894, 0.080590524, 0.14723596, 0.041485116, 0.13033897, 0.28028202, -0.058933)); + conv2d_11_tf += mul(e1, min16float4x4(-0.09480703, -0.13742156, -0.30406207, -0.03582789, 0.12367775, 0.064455606, -0.061555192, 0.06453598, 0.0917327, 0.04270991, 0.21958654, 0.13570474, -0.12048236, -0.024039079, 0.11226094, 0.050419748)); + conv2d_11_tf += mul(f1, min16float4x4(0.02062305, -0.10862912, 0.12883052, 0.18951532, -0.03850205, 0.11498875, 0.16137509, 0.009759631, -0.09211893, 0.0708826, 0.15651149, 0.19246778, 0.056577608, -0.0871854, 0.090261444, -0.019027064)); + conv2d_11_tf += mul(g1, min16float4x4(0.2780629, -0.054287303, -0.13351089, 0.021154758, -0.12753387, 0.031183334, 0.29430825, -0.06750467, -0.044209514, -0.042159047, -0.12532234, 0.006239919, -0.1961551, 0.099502094, 0.11470277, 0.10832906)); + conv2d_11_tf += mul(h1, min16float4x4(-0.2063426, -0.24898255, -0.28594568, 0.15958025, 0.03609107, 0.06394462, 0.022269696, -0.058725126, -0.104284525, 0.16744058, -0.14197277, -0.0051877275, -0.14164501, 0.021519974, -0.12835859, -0.12090698)); + conv2d_11_tf += mul(i1, min16float4x4(-0.45966595, 0.015630098, -0.3182287, 0.10282032, 0.14680836, -0.23460387, 0.15356645, 0.017346757, 0.05120857, -0.035891768, -0.092325106, 0.005394217, -0.09328155, -0.012819384, 0.14028293, 0.012717323)); + conv2d_11_tf += mul(a2, min16float4x4(0.06736054, -0.0044496846, -0.061849196, -0.04067691, -0.06897966, 0.12449442, -0.2508966, -0.090206414, -0.0938398, 0.013633642, 0.1409954, -0.08719504, -0.06788997, 0.098299906, 0.06095718, -0.071988545)); + conv2d_11_tf += mul(b2, min16float4x4(-0.036788728, 0.0037640312, 0.0037646547, -0.021026969, 0.09899778, -0.054118365, 0.08308994, 0.10520542, -0.2592658, 0.113168575, -0.15985844, -0.15588784, -0.114530176, 0.0118468655, -0.08904175, -0.106764145)); + conv2d_11_tf += mul(c2, min16float4x4(-0.027948795, 0.004584627, -0.03517112, -0.0007581547, -0.025537577, -0.035759352, 0.0973176, 0.03644148, -0.16327894, -0.12705119, -0.028998915, 0.123633325, -0.19453679, 0.113648765, 0.012692621, 0.057508085)); + conv2d_11_tf += mul(d2, min16float4x4(-0.010877041, -0.03980561, 0.013339347, 0.061969575, 0.2810196, 0.058558464, -0.1776418, 0.16630451, 0.05817873, 0.07262613, -0.03700459, -0.04399585, -0.16587572, 0.08260915, -0.009857085, 0.04391152)); + conv2d_11_tf += mul(e2, min16float4x4(0.13224548, -0.074267186, 0.03054752, -0.07024831, 0.074185595, -0.02313642, 0.065747924, -0.004960654, 0.14500527, 0.04731656, -0.117002204, -0.08217113, -0.07336124, -0.084052, 0.12326484, 0.09011222)); + conv2d_11_tf += mul(f2, min16float4x4(-0.16047195, 0.076803066, -0.2786948, 0.1176941, -0.0131406775, 0.009936233, 0.1374073, -0.09565009, 0.10070214, 0.11528786, -0.0730813, -0.13431457, -0.21621323, -0.013119195, -0.18385343, -0.058667593)); + conv2d_11_tf += mul(g2, min16float4x4(0.0040132185, 0.134705, -0.011815555, -0.094924495, -0.05727005, 0.0950522, 0.04084158, 0.016365912, -0.02917897, -0.03862751, 0.012003192, 0.03835569, -0.096041076, 0.004406702, -0.10389978, -0.0009610953)); + conv2d_11_tf += mul(h2, min16float4x4(-0.10157398, -0.10319637, -0.012073916, -0.19039184, -0.02369365, -0.021698838, 0.015538155, -0.051231697, 0.041044284, -0.02691978, -0.1713024, -0.12904704, -0.03471921, 0.037977315, 0.064845525, 0.1264632)); + conv2d_11_tf += mul(i2, min16float4x4(-0.08406344, 0.044064984, 0.056877784, -0.13283873, -0.0058603142, 0.075482026, -0.09246969, -0.065718174, -0.025745329, 0.015633717, -0.06059284, 0.08149079, -0.022848418, 0.061127402, 0.047879003, 0.04544503)); + conv2d_11_tf += mul(a3, min16float4x4(-0.21875143, 0.053516608, 0.04243476, -0.08509983, 0.406294, -0.060116358, -0.13793904, -0.1559247, -0.017128536, 0.021633752, 0.08865264, -0.032922007, 0.08250139, -0.17353764, -0.16137601, 0.12943612)); + conv2d_11_tf += mul(b3, min16float4x4(-0.27127337, -0.057137657, 0.005031509, 0.10027777, 0.20500132, 0.0073007634, -0.09760265, -0.2428409, -0.16160156, 0.32289484, -0.096351616, -0.15562637, -0.24892123, 0.13368145, 0.31498823, 0.09549184)); + conv2d_11_tf += mul(c3, min16float4x4(0.01444343, 0.07115736, -0.17920075, -0.024846312, 0.057884447, 0.14358939, 0.043788955, -0.013016863, 0.087220736, -0.0060180086, 0.19609165, -0.025888423, -0.06294847, 0.03406598, -0.04250465, -0.04808649)); + conv2d_11_tf += mul(d3, min16float4x4(-0.02662509, 0.24295834, -0.07612864, -0.20615683, 0.16377121, -0.05186765, -0.1750536, -0.04726876, 0.29443663, 0.0983683, -0.18610948, -0.1949004, -0.1446201, 0.11045659, 0.013536009, 0.18919495)); + conv2d_11_tf += mul(e3, min16float4x4(-0.080005094, -0.29404542, 0.14548069, 0.013500291, -0.011395713, -0.048017073, -0.053436857, -0.13627477, 0.041908856, -0.30820572, 0.17044339, 0.22999896, -0.32405153, 0.12114645, -0.080108374, -0.06520369)); + conv2d_11_tf += mul(f3, min16float4x4(0.062350888, 0.170049, 0.04211445, 0.12288375, 0.010835714, -0.17722476, -0.18930283, 0.11607083, -0.054421842, -0.004191082, 0.14655825, -0.1229237, -0.058039404, 0.09008831, -0.017603457, 0.027497675)); + conv2d_11_tf += mul(g3, min16float4x4(0.24208143, 0.04073837, -0.014191606, -0.069054805, 0.33024073, 0.25458166, -0.062864356, 0.028975246, 0.17692459, 0.22093695, -0.17666881, -0.03709188, -0.2001521, -0.06491504, 0.199202, 0.08666711)); + conv2d_11_tf += mul(h3, min16float4x4(0.053933676, -0.058177974, 0.006558046, -0.018798346, -0.05610966, 0.21288905, -0.06513558, -0.012686734, 0.11635233, -0.039428618, 0.21562201, -0.07206132, 0.065123, -0.056875434, 0.08877115, -0.10216625)); + conv2d_11_tf += mul(i3, min16float4x4(-0.03294463, 0.011720216, 0.056083966, -0.05530083, -0.16838011, -0.0026962461, -0.17402422, -0.009680605, -0.0064969915, 0.14410603, 0.090527765, 0.048180934, -0.06482277, -0.17573984, 0.36281663, 0.14240478)); + conv2d_11_tf += mul(na1, min16float4x4(0.026252843, 0.01621395, -0.03018171, 0.20843759, -0.05987382, -0.13891932, 0.008612968, -0.03674587, 0.055803657, -0.020272622, -0.12338887, -0.21429133, -0.026188683, -0.08283737, -0.07952566, 0.11333926)); + conv2d_11_tf += mul(nb1, min16float4x4(-0.03251504, -0.04554576, 0.012727539, 0.06115098, -0.23113467, -0.21784578, 0.10390341, -0.028863542, 0.1405748, -0.092941806, 0.04094931, 0.26037696, 0.014778488, -0.0012763811, 0.120576814, 0.017626097)); + conv2d_11_tf += mul(nc1, min16float4x4(-0.18005073, 0.08914073, -0.19792715, 0.07666369, -0.040389247, 0.06043132, -0.068735644, 0.006061951, -0.09742132, -0.015570641, -0.05810036, -0.06305046, 0.06286483, -0.1669205, -0.15426171, 0.046022687)); + conv2d_11_tf += mul(nd1, min16float4x4(-0.045976873, 0.028456753, 0.037186757, 0.05231241, -0.12909305, -0.16277504, -0.0035813665, -0.06294949, -0.04205357, -0.15816367, -0.021810539, -0.108161986, -0.08399507, -0.12965044, -0.00611913, -0.029711436)); + conv2d_11_tf += mul(ne1, min16float4x4(0.2537032, -0.018604688, 0.16584206, -0.20883793, -0.10245589, -0.06570063, -0.16321684, 0.02899805, -0.1427425, 0.20915249, -0.1761724, -0.09594, -0.10995607, -0.11155546, 0.037878104, 0.028106442)); + conv2d_11_tf += mul(nf1, min16float4x4(-0.1628865, -0.17466225, -0.14372015, 0.05667306, 0.10472602, -0.018716356, 0.087850116, -0.056246866, 0.083403885, -0.082255535, -0.10299376, -0.1840543, -0.35220358, -0.059505656, -0.21391232, 0.16591822)); + conv2d_11_tf += mul(ng1, min16float4x4(0.040541083, -0.1146205, -0.021495365, -0.033008795, 0.007970957, 0.007984478, 0.02606323, 0.012668774, 0.12771203, -0.09947922, -0.14149466, -0.1890857, -0.14682727, 0.033072542, -0.11833484, -0.038956877)); + conv2d_11_tf += mul(nh1, min16float4x4(-0.14274059, 0.08827524, 0.011712704, 0.10902492, 0.060481314, 0.003578728, 0.029129535, 0.08889746, -0.09685511, -0.095264345, -0.13920794, -0.11014531, -0.05436568, 0.060371455, 0.07251505, 0.20626338)); + conv2d_11_tf += mul(ni1, min16float4x4(-0.07604635, -0.035359483, 0.010230144, 0.030468917, -0.008423673, 0.0273416, -0.10538517, 0.10806335, 0.03605524, -0.082360476, -0.06390322, -0.19094782, -0.10980772, 0.13070256, -0.009116851, 0.094997086)); + conv2d_11_tf += mul(na2, min16float4x4(0.06696349, 0.02884076, -0.21400648, 0.10645195, -0.15960447, 0.07844191, 0.09057932, -0.022310507, -0.20641366, -0.20897295, 0.05159085, -0.042257026, 0.16398512, -0.22846761, -0.033591952, 0.3359712)); + conv2d_11_tf += mul(nb2, min16float4x4(-0.024236226, -0.13937415, 0.29392216, 0.075087205, 0.07763272, 0.27571923, -0.28625518, -0.37574485, -0.0041614594, 0.051519327, -0.1727601, -0.002199689, -0.32436445, 0.059740037, 0.006543187, 0.11488307)); + conv2d_11_tf += mul(nc2, min16float4x4(-0.025740145, 0.10688955, 0.3432225, 0.04467087, 0.033870216, 0.16714002, 0.20819634, -0.11762629, 0.19059974, 0.0661928, 0.022394795, -0.14459209, -0.16684553, 0.08020461, -0.37147745, 0.04065124)); + conv2d_11_tf += mul(nd2, min16float4x4(-0.006134667, -0.0031798254, -0.101459935, 0.15463492, 0.039860703, 0.077067874, 0.17671694, -0.06597644, -0.12203232, -0.058787927, 0.008942991, 0.0570718, -0.043793175, -0.06388724, 0.0247615, -0.09814649)); + conv2d_11_tf += mul(ne2, min16float4x4(0.009333359, -0.10666345, 0.19417302, -0.08021104, 0.071850464, 0.18651992, 0.1487532, 0.03132098, -0.21202543, 0.02972519, 0.028346745, 0.17178747, -0.24139602, -0.18386513, -0.03009887, -0.17363264)); + conv2d_11_tf += mul(nf2, min16float4x4(0.006349671, -0.0199598, 0.14889078, -0.14921328, -0.08713048, 0.14722322, 0.041971955, -0.019181551, 0.07069949, -0.12362262, 0.08554868, 0.16224997, -0.11218193, 0.3132043, -0.18114331, -0.104602315)); + conv2d_11_tf += mul(ng2, min16float4x4(0.047690846, -0.26872492, 0.2183612, 0.19340567, -0.06084255, 0.04798949, 0.19492827, 0.14699973, -0.07016259, 0.14654481, -0.06714773, 0.07936776, 0.073397264, -0.10646918, -0.13238135, 0.07208961)); + conv2d_11_tf += mul(nh2, min16float4x4(0.07382223, -0.044347115, -0.032497067, -0.02002406, 0.18200569, -0.09839878, -0.0027670355, -0.032592446, -0.05297432, 0.11200702, -0.019955616, 0.112369545, -0.2748285, -0.139697, -0.26332188, -0.303972)); + conv2d_11_tf += mul(ni2, min16float4x4(-0.23713836, -0.003925555, 0.16436225, 0.15221255, 0.1077621, -0.027760457, 0.0059113647, -0.11066059, -0.0980858, 0.011830199, 0.040253483, 0.06447465, -0.0827841, 0.04048125, 0.04551489, -0.12471252)); + conv2d_11_tf += mul(na3, min16float4x4(0.010833946, -0.058524415, -0.19618602, -0.11400699, -0.088038966, -0.08249501, 0.025192872, -0.04508469, -0.017629553, 0.10654934, 0.007814974, 0.041299284, 0.054442752, 0.14059617, 0.09760092, -0.060198124)); + conv2d_11_tf += mul(nb3, min16float4x4(-0.16173755, 0.14454803, -0.036523324, 0.016083395, -0.04597214, 0.019925527, 0.10551423, 0.07915449, -0.09191786, 0.040694106, 0.079085656, 0.04860138, -0.00920608, 0.015785221, 0.08149557, -0.070038155)); + conv2d_11_tf += mul(nc3, min16float4x4(0.09396598, -0.27780503, 0.057351794, 0.17856738, 0.06403465, -0.019479418, 0.13132542, 0.09766009, -0.13038878, 0.106342256, 0.19923963, -0.107940085, -0.11207263, 0.07427199, 0.122141175, -0.17083314)); + conv2d_11_tf += mul(nd3, min16float4x4(-0.0129763335, 0.029884486, -0.1591489, 0.05743726, -0.10154112, -0.05951815, 0.038755298, 0.31987077, 0.041023176, 0.15760195, 0.020455543, 0.117823385, 0.008611401, 0.10392111, -0.029049959, -0.00561999)); + conv2d_11_tf += mul(ne3, min16float4x4(0.11115114, 0.13910228, -0.15370879, 0.14353245, -0.106912665, 0.16457058, -0.0007093892, -0.16065751, 0.12172275, -0.0071658283, -0.13790236, -0.05790294, 0.0258849, 0.047155324, 0.028826248, 0.077854194)); + conv2d_11_tf += mul(nf3, min16float4x4(0.04222945, 0.016645031, -0.22052032, -0.108474314, -0.037527397, 0.1508435, 0.13960642, 0.051745985, 0.17182018, -0.0071819094, 0.13896792, 0.12522686, 0.1307583, 0.09315921, 0.031736225, -0.24318463)); + conv2d_11_tf += mul(ng3, min16float4x4(0.12233872, 0.16193391, -0.045825243, -0.021991767, -0.06857775, 0.019997157, 0.26207915, 0.017674582, 0.14816906, -0.011254348, 0.11932189, -0.06385669, -0.08113471, 0.13287768, -0.008416972, -0.039866585)); + conv2d_11_tf += mul(nh3, min16float4x4(0.15459004, -0.029546147, -0.20761466, -0.12011381, -0.09814943, -0.12983616, 0.0019625768, 0.086729765, 0.22380745, 0.112912305, -0.073421806, -0.061414655, -0.00015528004, -0.10514693, 0.0449276, 0.1197672)); + conv2d_11_tf += mul(ni3, min16float4x4(0.031599533, -0.0699447, 0.10802751, -0.011152619, 0.08078543, 0.10828058, 0.10941837, -0.07911565, 0.16324246, -0.034676578, 0.04017893, 0.01809475, -0.0054880627, 0.027349245, -0.041267768, 0.041391887)); + conv2d_11_tf += min16float4(-0.022754392, 0.009821446, 0.06426939, -0.052443504); + tex10[gxy] = conv2d_11_tf; + min16float4 nconv2d_11_tf = max(-conv2d_11_tf, 0); + conv2d_11_tf = max(conv2d_11_tf, 0); + + min16float4 conv2d_10_tf = mul(a1, min16float4x4(0.31697825, -0.38101152, 0.26027805, 0.19195847, -0.15098146, 0.17915927, 0.263392, -0.108211316, 0.004631585, -0.06989657, 0.057514362, 0.013759571, -0.06416892, 0.033370133, -0.04808954, -0.1563251)); + conv2d_10_tf += mul(b1, min16float4x4(0.15827416, -0.17950794, 0.16834997, 0.13073751, 0.030396005, 0.040662624, 0.16062944, 0.041357074, -0.13926722, -0.06929913, 0.10808029, -0.06798461, 0.10745701, -0.102971874, -0.06641405, 0.0885879)); + conv2d_10_tf += mul(c1, min16float4x4(0.017569518, -0.074986644, -0.0381504, -0.108356364, -0.028105393, 0.107422166, 0.010693419, -0.03790183, -0.056355134, -0.17228265, 0.19153535, 0.014339309, -0.072250925, 0.25570604, 0.06766601, 0.10274542)); + conv2d_10_tf += mul(d1, min16float4x4(0.11808023, 0.03209569, -0.047605, 0.10232121, -0.089450955, 0.22296266, -0.031239472, 0.12547736, -0.13355453, 0.09658202, 0.14639929, 0.1722445, -0.16578807, -0.01587181, -0.06775275, 0.106690586)); + conv2d_10_tf += mul(e1, min16float4x4(-0.08015724, -0.09917064, 0.17005561, -0.11093009, -0.033904083, -0.18723048, -0.42410555, -0.34870258, -0.024956835, -0.057636626, -0.17249386, 0.3452565, -0.0781917, 0.048283495, -0.1849922, 0.10712763)); + conv2d_10_tf += mul(f1, min16float4x4(-0.19845031, 0.018594265, 0.11669769, 0.04427017, -0.13347605, 0.14735079, -0.20751207, -0.08490434, -0.077883884, -0.17200643, 0.03127422, 0.11106135, -0.04682848, -0.04392586, 0.11629085, -0.03191463)); + conv2d_10_tf += mul(g1, min16float4x4(-0.035416074, 0.032688126, -0.034218192, -0.35819814, -0.07167647, -0.032766674, -0.09849224, 0.27033108, -0.040135793, 0.11793038, 0.024326177, 0.056732934, 0.0072507905, -0.15076852, -0.007368895, -0.07758195)); + conv2d_10_tf += mul(h1, min16float4x4(0.03677586, 0.088763975, 0.04954433, -0.047844727, -0.07487822, -0.06698103, 0.12568145, -0.22909173, 0.1671084, -0.17893419, 0.09722236, 0.20345661, 0.057767022, 0.044742733, 0.06905004, -0.010992711)); + conv2d_10_tf += mul(i1, min16float4x4(0.0028451576, -0.27325574, 0.14329389, -0.07025869, -0.09781529, 0.0151023185, 0.08696752, -0.056844577, -0.19665222, 0.09358589, 0.16416575, 0.06988374, 0.16515698, 0.09760437, 0.023626767, 0.16473217)); + conv2d_10_tf += mul(a2, min16float4x4(-0.01080354, 0.014449004, 0.11467091, -0.07119837, 0.18900962, -0.06401898, -0.025841001, 0.13663737, -0.04860565, 0.15505394, 0.11083383, -0.06831929, -0.12395706, 0.04564376, -0.132784, 0.095948376)); + conv2d_10_tf += mul(b2, min16float4x4(-0.009644828, 0.05351468, -0.086626254, -0.07883177, 0.12082235, 0.16186416, 0.20026602, -0.12537873, -0.02765183, -0.19664048, -0.14943156, 0.17649364, -0.15099925, -0.16448402, 0.04770359, 0.08525748)); + conv2d_10_tf += mul(c2, min16float4x4(-0.07529481, 0.057762332, 0.02256763, 0.0037007954, 0.052606575, 0.008619477, 0.035252705, -0.060551647, 0.03680644, 0.1457205, 0.0970469, 0.00867666, -0.0931654, -0.046189044, -0.118787736, 0.059376143)); + conv2d_10_tf += mul(d2, min16float4x4(0.024567254, -0.07128407, -0.02618071, -0.16522972, 0.02537496, 0.09393943, -0.018046979, -0.12497053, 0.041589152, 0.028847594, 0.072174646, -0.12484334, -0.096903354, 0.07245438, -0.03219862, 0.037360255)); + conv2d_10_tf += mul(e2, min16float4x4(0.05599119, -0.0027604182, -0.004961665, -0.1297362, 0.10879746, 0.14088875, -0.031004267, -0.016735828, 0.07093551, 0.024946349, 0.16840066, -0.10094298, -0.04150052, 0.09933387, 0.09332617, -0.121228844)); + conv2d_10_tf += mul(f2, min16float4x4(0.099246845, -0.17000747, -0.17089754, 0.0021521626, 0.046584304, -0.037944607, 0.1009471, 0.110904016, 0.17920195, -0.00022254961, 0.07443117, 0.07490046, 0.1700909, -0.18371364, -0.15320961, -0.0344897)); + conv2d_10_tf += mul(g2, min16float4x4(0.10543544, 0.04469465, 0.14627467, -0.07649682, -0.082381524, 0.12919065, 0.090079635, -0.07820535, -0.06769879, -0.12625079, -0.06946243, -0.19333136, 0.02998107, 0.01594043, 0.12332583, 0.015775004)); + conv2d_10_tf += mul(h2, min16float4x4(0.025815854, 0.015107419, -0.045278236, 0.13242702, -0.059958965, 0.031560495, 0.047686167, 0.064922616, 0.09818797, -0.07938157, -0.08586279, 0.079509474, -0.031728156, 0.052335043, 0.046583798, 0.17072229)); + conv2d_10_tf += mul(i2, min16float4x4(-0.07827454, -0.033509843, 0.054832056, -0.011652403, -0.029872715, -0.13623856, 0.013034195, -0.009600983, -0.08374398, 0.0022505643, 0.042340405, 0.050227124, -0.072084844, -0.044353593, 0.06991293, -0.024949703)); + conv2d_10_tf += mul(a3, min16float4x4(0.08938938, -0.092218116, -0.016011834, 0.038319822, 0.12462916, 0.30430344, -0.2225195, 0.23016618, 0.16917962, -0.10025298, 0.03197825, -0.0028935818, -0.20949106, 0.16084236, 0.02389285, -0.07628905)); + conv2d_10_tf += mul(b3, min16float4x4(0.008811933, -0.07407284, 0.06164061, -0.08511243, 0.23705618, -0.04852394, -0.09615244, -0.14999956, 0.14771207, -0.31061637, 0.053693004, 0.12648372, 0.13281338, -0.052495755, -0.10527891, 0.055210527)); + conv2d_10_tf += mul(c3, min16float4x4(-0.002706158, -0.08600029, 0.067195736, 0.11638961, 0.22492133, 0.21856707, -0.07640264, -0.06916772, 0.06080084, 0.11333604, 0.06812178, -0.033994764, 0.18698989, -0.0062931813, -0.07839693, -0.19759217)); + conv2d_10_tf += mul(d3, min16float4x4(0.016470285, -0.08823432, 0.22680223, 0.09997554, 0.23114151, 0.19813643, -0.35361916, 0.2194339, 0.11047473, 0.068083756, 0.067214124, 0.43412095, -0.012517998, 0.15817562, 0.041793827, -0.12873247)); + conv2d_10_tf += mul(e3, min16float4x4(0.072530076, 0.13730067, 0.2244758, -0.07199118, -0.052385315, 0.10464238, 0.26556495, -0.2717685, -0.11540168, -0.018752037, 0.025696546, -0.12900795, -0.010386023, -0.020768933, 0.24903738, -0.14111607)); + conv2d_10_tf += mul(f3, min16float4x4(-0.24632111, -0.015176092, -0.02656606, 0.009465184, -0.0051622107, 0.14365524, 0.110313326, 0.075529456, -0.041912608, -0.012926297, 0.099115536, -0.043660834, 0.14709431, 0.069978856, 0.19860862, 0.30215213)); + conv2d_10_tf += mul(g3, min16float4x4(0.003388868, 0.000683922, 0.025133248, 0.004995937, -0.06642034, 0.028584523, -0.14691937, -0.2014579, 0.15427552, -0.027058927, 0.04456965, 0.084938034, -0.24065961, -0.014348999, -0.093859546, -0.032467082)); + conv2d_10_tf += mul(h3, min16float4x4(-0.067999065, -0.061825316, -0.056987073, 0.0009880592, -0.014163033, -0.30605268, 0.22628185, 0.01192761, -0.08495571, 0.17559315, -0.17546391, -0.0027795131, -0.289151, -0.41655365, 0.11138813, -0.18327911)); + conv2d_10_tf += mul(i3, min16float4x4(-0.032702215, 0.072819114, -0.06573772, -0.023648093, -0.28138083, 0.0492584, 0.17402509, -0.04257587, 0.109756455, 0.086533375, -0.017961387, 0.02175586, -0.12014975, 0.0101643065, 0.34295502, -0.04737776)); + conv2d_10_tf += mul(na1, min16float4x4(-0.043654937, 0.030818325, 0.009349365, 0.0058960635, 0.075968295, 0.10992966, -0.056467474, -0.053309787, -0.020969287, 0.13869311, 0.118167736, 0.20124547, -0.071703844, 0.16065824, 0.0333816, 0.16069882)); + conv2d_10_tf += mul(nb1, min16float4x4(-0.00913058, 0.11581215, -0.08088577, 0.048499383, -0.002100561, 0.14013395, -0.021854091, 0.022357881, -0.007194664, 0.2258521, 0.28041685, 0.035750967, -0.17555529, -0.06302401, 0.006144002, 0.073763065)); + conv2d_10_tf += mul(nc1, min16float4x4(0.13105561, 0.033134516, -0.123544686, 0.036164157, 0.081316054, -0.09048299, -0.034898795, -0.04975392, -0.118228555, 0.0013148085, -0.024866905, -0.07593515, -0.058713235, 0.081549294, 0.09502267, -0.06489622)); + conv2d_10_tf += mul(nd1, min16float4x4(-0.013302538, 0.14520672, -0.041146558, 0.08169293, 0.1506187, 0.062507726, 0.19582897, 0.05240332, 0.015582799, 0.08783006, 0.016972601, -0.23824452, -0.056192238, -0.087197326, 0.0045260703, -0.012997719)); + conv2d_10_tf += mul(ne1, min16float4x4(-0.074937195, -0.018988643, -0.07370074, 0.048774365, 0.07236563, 0.0904083, -0.10467449, 0.10507359, 0.12723474, -0.1263123, -0.17705469, -0.15779553, -0.23850663, -0.119912334, 0.21794695, 0.19370297)); + conv2d_10_tf += mul(nf1, min16float4x4(-0.04097957, -0.0038975494, 0.11273524, -0.049562607, -0.041399803, 0.013795214, -0.07912852, 0.06913985, -0.039762158, 0.031136844, -0.22443683, -0.07978295, 0.15926225, -0.021239735, 0.02987538, 0.0073201153)); + conv2d_10_tf += mul(ng1, min16float4x4(-0.00022499492, 0.07021377, 0.10080298, -0.049646243, 0.08742822, -0.05083212, 0.11067444, 0.0028296155, -0.06948983, -0.032108277, -0.17148562, 0.031176677, 0.028853005, 0.06482861, 0.0068417406, 0.20317557)); + conv2d_10_tf += mul(nh1, min16float4x4(0.11648821, -0.17146581, 0.067954056, 0.08905258, -0.08075704, 0.019719714, -0.11522013, 0.07268729, 0.0639498, 0.19816676, 0.014075983, -0.032495353, -0.017302783, 0.001971279, -0.03852454, 0.13213885)); + conv2d_10_tf += mul(ni1, min16float4x4(-0.043073803, 0.013491542, -0.0071037943, 0.104073495, 0.02311169, 0.058454588, -0.036697295, -0.048574958, -0.02161516, 0.10554709, 0.07252144, 0.013570617, -0.08058747, -0.050845098, 0.11659161, 0.12994757)); + conv2d_10_tf += mul(na2, min16float4x4(-0.065163076, 0.19974495, -0.4120684, 0.07145881, 0.113002166, 0.23591681, 0.09600776, -0.12980238, -0.032298863, -0.09617708, -0.09807077, -0.019956803, -0.0144692, -0.11556348, -0.080140986, -0.088292986)); + conv2d_10_tf += mul(nb2, min16float4x4(-0.012835261, -0.04646276, 0.072318554, -0.08490823, 0.1648558, -0.15578964, 0.07145768, 0.12143512, 0.007787767, 0.07922046, -0.10203864, -0.15637778, 0.17195338, -0.16184372, -0.01940918, -0.0037627215)); + conv2d_10_tf += mul(nc2, min16float4x4(-0.118128635, -0.06761304, 0.20045926, -0.11828058, 0.022446023, -0.09117082, 0.11077834, 0.12605691, -0.094919816, -0.016070768, -0.025274863, 0.13070245, 0.14234897, -0.080053166, -0.14352201, 0.24688406)); + conv2d_10_tf += mul(nd2, min16float4x4(-0.038446598, 0.06076558, 0.011793446, -0.027539631, 0.12532312, 0.12770405, 0.05115926, 0.07202868, 0.00048553053, -0.20094085, 0.14294891, 0.27486032, 0.09690127, -0.19488129, -0.010087613, -0.32277402)); + conv2d_10_tf += mul(ne2, min16float4x4(-0.03640304, -0.03347442, -0.14699876, 0.084367014, -0.0931957, 0.0046109143, -0.10012045, -0.21788213, -0.22289619, -0.15080798, 0.053079627, 0.058909237, 0.0033036254, -0.266638, 0.15794982, 0.15606833)); + conv2d_10_tf += mul(nf2, min16float4x4(-0.16570765, -0.19292961, -0.040884703, 0.0350054, 0.044223823, -0.05094823, -0.10369617, -0.026184212, -0.07026344, 0.08071905, -0.05532503, -0.105882615, 0.11906692, -0.12926123, 0.18500324, 0.09285109)); + conv2d_10_tf += mul(ng2, min16float4x4(-0.30376035, -0.015966324, -0.080935225, -0.054857124, 0.008181847, -0.051866602, 0.086870745, -0.205586, -0.13184556, -0.03217006, 0.029946566, -0.10589564, 0.045322973, -0.1656244, -0.08579307, -0.121582575)); + conv2d_10_tf += mul(nh2, min16float4x4(-0.06772616, -0.14879958, -0.17823575, 0.020676576, -0.04157187, -0.019993478, -0.026832247, -0.22187601, -0.12282354, -0.101527624, 0.10540906, -0.09816911, 0.01171376, -0.35307917, -0.21599512, -0.12673624)); + conv2d_10_tf += mul(ni2, min16float4x4(0.13506149, -0.12476234, -0.23067783, 0.0016245812, 0.27068454, 0.085986294, 0.08674341, 0.07736311, 0.04183122, 0.09630597, 0.005955931, -0.033355173, -0.19212, -0.2707448, -0.18517534, -0.035879433)); + conv2d_10_tf += mul(na3, min16float4x4(-0.0151614295, 0.047397353, 0.0923022, 0.08485078, 0.15618569, -0.11042138, 0.12418296, -0.07967247, 0.053651772, 0.015027734, 0.048835948, 0.07711154, 0.020557769, 0.023958597, 0.04587901, -0.0014006038)); + conv2d_10_tf += mul(nb3, min16float4x4(0.038551513, -0.10045045, 0.06231501, 0.043190606, 0.011727592, 0.10791629, 0.022111481, -0.053163722, 0.11845128, -0.102105886, 0.08789077, -0.0027942352, -0.08893058, 0.008466707, 0.011015023, -0.047280762)); + conv2d_10_tf += mul(nc3, min16float4x4(0.013820725, 0.1256963, 0.041195784, -0.057415746, -0.07633132, -0.025274424, 0.029755162, -0.046797376, -0.037444938, -0.09385259, 0.14993298, 0.040402364, 0.057619866, 0.0044342144, 0.044209216, 0.13005155)); + conv2d_10_tf += mul(nd3, min16float4x4(0.07646884, 0.18639803, -0.021711063, 0.021434348, 0.11517055, 0.010340496, -0.0018932755, -0.3739696, 0.1309672, 0.08240308, 0.08870368, 0.09622062, -0.07567563, -0.08575518, 0.12712875, 0.16571298)); + conv2d_10_tf += mul(ne3, min16float4x4(-0.028878238, -0.06821328, -0.048233025, 0.010556409, 0.08252249, 0.12659778, 0.10306397, 0.041443437, -0.008534995, -0.08196783, -0.13689299, 0.048229158, 0.12889823, 0.12517701, -0.06344265, 0.11288182)); + conv2d_10_tf += mul(nf3, min16float4x4(0.20085302, 0.024324976, 0.012985146, 0.045487225, -0.14292689, 0.091915675, 0.030304266, -0.007919423, -0.09057523, -0.13942213, 0.22375956, -0.15821122, 0.13392857, 0.06950518, -0.009899817, -0.19455001)); + conv2d_10_tf += mul(ng3, min16float4x4(-0.18937646, 0.13056205, -0.09389302, -0.06861626, 0.030355467, -0.07237441, 0.079272114, -0.018099891, -0.057733692, 0.14460595, -0.068894215, 0.073404275, -0.005731954, -0.16851021, 0.029365558, 0.04029561)); + conv2d_10_tf += mul(nh3, min16float4x4(-0.11247864, -0.026352342, -0.26439467, 0.021711655, -0.17112786, 0.09201832, 0.058435153, -0.18282679, -0.058647767, -0.0882594, -0.09513095, 0.046603747, 0.118426494, -0.06860188, 0.14646193, -0.10118678)); + conv2d_10_tf += mul(ni3, min16float4x4(-0.08203177, 0.049650684, 0.11541628, 0.07473622, -0.06572682, -0.018375592, -0.0739239, -0.08190655, -0.012673694, 0.0003337712, 0.041397918, -0.047579113, -0.13510825, 0.025625594, -0.035801806, -0.045355853)); + conv2d_10_tf += min16float4(0.03802586, 0.06033134, 0.0405485, 0.00039835402); + tex11[gxy] = conv2d_10_tf; + min16float4 nconv2d_10_tf = max(-conv2d_10_tf, 0); + conv2d_10_tf = max(conv2d_10_tf, 0); + + min16float4 target = mul(e1, min16float4x4(0.2216899, -0.006199309, -0.14865121, 0.06256912, 0.082141966, 0.069441915, -0.064958416, -0.014999604, -0.017270254, 0.054063573, -0.30066323, 0.09460075, 0.17069338, -0.26000282, 0.026078973, -0.0024098607)); + target += mul(e2, min16float4x4(0.22918217, 0.2753827, -0.2260137, 0.0074888375, 0.007864308, 0.01738929, 0.036404576, 0.15125586, 0.12692557, -0.1064573, -0.105954304, 0.17095445, -0.295937, 0.2284073, -0.28089303, 0.17836742)); + target += mul(e3, min16float4x4(-0.23949356, -0.20830329, 0.043005105, 0.11848222, 0.26292896, 0.13052817, 0.14105777, -0.14028162, 0.033770017, -0.12098709, -0.19063175, -0.020637099, 0.032703582, -0.31454226, 0.07559202, 0.067997165)); + target += mul(ne1, min16float4x4(-0.26934767, 0.25418487, 0.2089665, -0.15689164, 0.068669625, -0.19087234, 0.034052055, -0.038685646, 0.037284948, 0.14673525, -0.001882231, 0.07179596, -0.054052413, 0.2954734, 0.108455196, 0.21742904)); + target += mul(ne2, min16float4x4(0.24180835, 0.012385412, -0.017178789, 0.032714315, -0.26524556, 0.024244266, -0.226589, -0.0358992, -0.2241718, 0.08004254, -0.017615836, -0.2492002, 0.09387765, 0.18154638, -0.034240507, 0.3605678)); + target += mul(ne3, min16float4x4(0.24151021, -0.014141217, -0.1259467, -0.19366209, -0.07166293, 0.08856931, -0.08999051, 0.31848234, -0.07388433, -0.16038652, 0.28902727, 0.2382835, -0.15296587, -0.12924191, 0.16233487, 0.05408346)); + target += mul(conv2d_11_tf, min16float4x4(-0.18532315, 0.116318375, -0.043276392, -0.20643523, -0.1317004, -0.025412546, -0.32449946, 0.08039049, -0.18457016, -0.015615943, -0.01645252, 0.21732457, 0.082662076, 0.1900878, -0.11705433, 0.14767131)); + target += mul(nconv2d_11_tf, min16float4x4(0.052993804, -0.11595191, 0.32436988, -0.003765943, 0.2296748, 0.119828835, -0.019125028, -0.3126433, -0.039699726, -0.24760635, 0.08949547, -0.012501165, 0.33296522, -0.349697, -0.081094205, 0.061596226)); + target += mul(conv2d_1_tf, min16float4x4(-0.033869196, 0.12660468, 0.12152309, -0.18401411, 0.1442463, 0.18430543, 0.22487932, 0.29795903, 0.17951487, -0.24413475, -0.13472381, 0.3147198, -0.22021247, -0.15316834, 0.013162168, -0.20238425)); + target += mul(nconv2d_1_tf, min16float4x4(-0.0015613904, -0.09523476, 0.024224702, -0.17930624, -0.061623972, 0.06495367, 0.3776854, -0.17299566, -0.36212873, 0.13202415, 0.07052771, -0.1219512, 0.29942214, -0.011110212, 0.36104754, 0.0010065075)); + target += mul(conv2d_4_tf, min16float4x4(0.16467105, 0.29388088, 0.13385788, 0.118168965, 0.15695275, -0.2269201, 0.097460486, -0.04286567, 0.020316202, -0.07753041, -0.18018067, -0.111885116, -0.17371373, 0.04722513, 0.2188871, 0.1295067)); + target += mul(nconv2d_4_tf, min16float4x4(0.2567296, 0.0027146419, -0.18108767, -0.10636566, -0.04075492, 0.08977396, 0.27601838, 0.041642547, -0.29131287, -0.0026349663, 0.16847563, 0.29684088, 0.23944439, -0.12667872, -0.31902757, -0.023768846)); + target += mul(conv2d_7_tf, min16float4x4(-0.12111429, 0.046077378, 0.07920395, -0.3619861, 0.0030046673, -0.21324079, -0.14134064, 0.07692796, 0.2308601, 0.050601542, -0.20067136, 0.1312576, 0.078878105, -0.07905382, 0.04887801, 0.11589316)); + target += mul(nconv2d_7_tf, min16float4x4(0.18035689, 0.022012187, -0.05441432, -0.13895841, 0.1792498, 0.06579118, -0.3518265, 0.19284686, -0.36724597, -0.19384578, 0.052024953, 0.069351286, -0.17106277, 0.01428955, -0.022695465, -0.03882866)); + target += mul(conv2d_10_tf, min16float4x4(0.12341931, 0.21374431, 0.14095145, 0.11081035, -0.1377048, 0.2957615, 0.2647214, -0.21324296, 0.18657272, -0.16867872, 0.13558641, -0.14022234, -0.00384067, -0.19601567, -0.20603377, 0.006892211)); + target += mul(nconv2d_10_tf, min16float4x4(0.05891213, 0.17766091, -0.11099863, -0.10597074, 0.4759035, -0.20892517, -0.35479382, -0.057822235, -0.10161365, -0.11828349, -0.021581944, 0.057930104, -0.46801752, -0.25330284, 0.30126703, -0.31744412)); + target += min16float4(0.011156243, 0.004168819, 0.082229175, 0.043994825); + tex1[gxy] = target; + + target = mul(e1, min16float4x4(0.137003, -0.06089221, -0.108805895, 0.27130327, -0.3015222, -0.26373127, 0.019133324, 0.035202216, 0.040255867, 0.09030984, -0.46218738, -0.3097094, -0.057662863, 0.123317555, 0.037645355, 0.010423522)); + target += mul(e2, min16float4x4(0.29102653, -0.17060617, 0.31592718, -0.15487169, -0.09719322, 0.08212171, -0.24112037, -0.5323616, 0.050776903, 0.26745227, -0.0123307025, -0.0076298076, -0.044822518, -0.15961778, 0.26758936, 0.019300641)); + target += mul(e3, min16float4x4(0.19517086, -0.2878986, 0.12765801, -0.12057966, 0.27521843, 0.028182628, 0.32267106, 0.035355434, -0.065272234, -0.015919037, 0.38220987, 0.14314096, 0.052418232, 0.07207548, -0.41493666, -0.03195114)); + target += mul(ne1, min16float4x4(0.18309553, -0.11183888, -0.052814357, -0.08971906, -0.14353213, -0.20144752, -0.20325397, -0.16143575, 0.028960846, -0.16557908, 0.266044, -0.2373641, 0.12750591, -0.11190832, 0.35028338, 0.17638433)); + target += mul(ne2, min16float4x4(0.058721025, 0.21000905, -0.2719825, -0.16923684, 0.2887994, 0.08877727, -0.1274528, 0.12557751, -0.09804875, -0.37839252, -0.1465434, -0.1059692, 0.07212408, -0.101579584, -0.16375211, -0.09519384)); + target += mul(ne3, min16float4x4(-0.145749, -0.15073515, -0.2661711, -0.21265043, -0.3345085, -0.16820145, 0.07732321, 0.13837157, 0.014605319, -0.14113256, -0.3269443, -0.100293055, 0.114504874, -0.4271041, -0.17389913, 0.0033216716)); + target += mul(conv2d_11_tf, min16float4x4(0.022264633, -0.19477129, 0.050657783, -0.08318149, -0.5125155, 0.030831251, 0.110084355, -0.25779435, 0.08368584, 0.48425493, -0.28335044, 0.23433922, 0.31263804, -0.12789254, -0.14072786, 0.10106589)); + target += mul(nconv2d_11_tf, min16float4x4(0.007650675, -0.082783565, -0.1599306, 0.22329025, -0.01190027, 0.09498623, -0.06526687, -0.074669816, 0.13880949, -0.0060707824, -0.044009406, 0.15161307, -0.121638715, 0.012903123, 0.047266923, -0.41495043)); + target += mul(conv2d_1_tf, min16float4x4(0.1315474, 0.2878135, -0.03521026, 0.31479505, 0.4425801, 0.22921802, -0.19864602, -0.0049938424, -0.39346734, 0.09232505, 0.20387846, 0.08173493, -0.2582244, -0.23351125, 0.04481434, -0.105453715)); + target += mul(nconv2d_1_tf, min16float4x4(-0.10668876, -0.026544912, 0.19446668, 0.0045490777, -0.024656052, -0.11874863, 0.21377616, 0.16957945, 0.36561254, -0.19234993, -0.16987774, 0.05442733, -0.13925838, -0.09912278, -0.06849117, 0.2862709)); + target += mul(conv2d_4_tf, min16float4x4(0.33045495, -0.13048914, -0.023560356, -0.21611182, 0.031752963, 0.14722162, -0.18900181, -0.214494, -0.014231522, 0.23605579, 0.04047805, 0.4060913, -0.13969432, -0.20286381, -0.29891747, -0.043839972)); + target += mul(nconv2d_4_tf, min16float4x4(0.12433207, 0.20156589, -0.16986352, 0.07386095, -0.08681933, -0.055620465, -0.043641977, 0.25392216, -0.19010517, -0.018021587, -0.040169913, 0.3845108, -0.18094495, -0.07285529, 0.1848976, -0.24628341)); + target += mul(conv2d_7_tf, min16float4x4(-0.038218584, 0.1562106, -0.14935517, 0.14979756, -0.24085392, -0.32680586, -0.015209841, 0.31288582, 0.15819284, -0.084411524, -0.18117775, 0.16964395, 0.29338664, -0.020204993, 0.011733066, -0.03798886)); + target += mul(nconv2d_7_tf, min16float4x4(-0.020065956, -0.043856975, 0.016091857, 0.19466555, 0.16528654, 0.049655683, -0.3676622, -0.14080617, -0.094320625, 0.27908608, -0.084430434, -0.07656003, 0.19461128, 0.11947404, -0.05046522, -0.12625407)); + target += mul(conv2d_10_tf, min16float4x4(-0.013265381, -0.015804514, -0.12068759, -0.06364535, -0.040848896, -0.07602193, -0.04744431, 0.29088646, 0.1358165, 0.010972456, -0.04270195, -0.091147564, -0.2690454, 0.23030208, -0.39135924, -0.22463588)); + target += mul(nconv2d_10_tf, min16float4x4(0.20590256, 0.098045684, 0.3285928, 0.04094028, 0.12415101, 0.244203, 0.048238404, 0.17298737, 0.22513592, 0.048016686, -0.11171281, 0.12644528, -0.40468216, -0.02186692, -0.09637657, -0.20869099)); + target += min16float4(-0.01212462, -0.018702446, -0.0063916473, -0.015887083); + tex2[gxy] = target; + + target = mul(e1, min16float4x4(0.06816948, 0.34817252, -0.046539452, 0.0051957658, -0.1393289, -0.123660676, -0.28295487, -0.09683893, -0.3166085, 0.112649016, 0.016630042, 0.12213537, 0.048850413, 0.10865108, 0.36645818, -0.1570077)); + target += mul(e2, min16float4x4(0.16992034, 0.15695556, 0.23111318, -0.07952356, 0.008467285, -0.11592582, -0.18852152, 0.11257074, 0.24210866, 0.1062648, -0.101493195, 0.04611632, -0.13289067, -0.07632904, 0.012860103, -0.08678244)); + target += mul(e3, min16float4x4(0.19332299, -0.06392618, -0.18013911, 0.23211008, -0.0025107847, 0.4468814, -0.15807462, -0.27148855, 0.24238719, 0.16024797, -0.22240195, 0.2425211, 0.008685379, -0.43995225, 0.28782377, -0.04508348)); + target += mul(ne1, min16float4x4(-0.038411126, -0.0034189979, -0.10616163, -0.22397435, 0.005768774, 0.13181472, 0.091235116, 0.07068676, 0.08932033, 0.025967117, -0.053367026, -0.22340903, -0.13413511, 0.24192514, -0.011392121, -0.09885669)); + target += mul(ne2, min16float4x4(-0.13691483, 0.058308467, 0.14866434, 0.005773672, -0.16254735, -0.03150588, 0.16304344, 0.31798756, -0.22399272, 0.033883456, -0.09658691, -0.12437203, -0.117079385, 0.21686973, -0.037619635, -0.085622996)); + target += mul(ne3, min16float4x4(-0.24666454, -0.06097481, -0.08042751, -0.09151835, -0.09213628, 0.06706758, -0.12596707, 0.05328458, 0.25016794, -0.21868211, 0.22890028, -0.16557315, 0.036212686, 0.13603954, -0.20226133, -0.22868301)); + target += mul(conv2d_11_tf, min16float4x4(0.022882584, -0.023618432, 0.08065757, 0.33173925, 0.07162631, -0.010860303, 0.15222527, -0.21064946, 0.023574507, 0.06347729, -0.2955436, 0.31633475, -0.3643237, -0.087610714, -0.089636534, 0.13809934)); + target += mul(nconv2d_11_tf, min16float4x4(-0.22458415, -0.01961852, -0.014363966, -0.2820657, -0.20567393, 0.106780864, -0.43547606, 0.3259588, 0.42431846, -0.30789465, -0.053756483, 0.18392731, -0.43784657, 0.23359884, 0.25319567, -0.1464313)); + target += mul(conv2d_1_tf, min16float4x4(0.06667747, 0.011182004, 0.26176485, -0.15575507, -0.017922953, 0.0014675539, -0.13763407, -0.086996995, -0.00082739035, 0.03939667, -0.09286956, 0.29952076, 0.014103506, 0.10058367, 0.16165632, 0.23478027)); + target += mul(nconv2d_1_tf, min16float4x4(-0.1966405, 0.11404606, -0.12005759, -0.22895505, -0.0848272, 0.021871557, 0.044186037, -0.111861885, -0.16986093, -0.24633476, 0.07282808, -0.26975635, 0.34241816, 0.030470898, -0.09903839, -0.22579415)); + target += mul(conv2d_4_tf, min16float4x4(0.10059369, 0.010142443, 0.061046213, 0.6807189, 0.005402132, -0.21700516, 0.16900781, -0.09973772, -0.025505878, 0.14216411, 0.14366129, -0.02743741, 0.09240224, 0.055595424, -0.22342968, 0.32391673)); + target += mul(nconv2d_4_tf, min16float4x4(-0.24940865, -0.042881966, -0.19815244, -0.05011009, 0.32227826, 0.07563262, -0.22649106, 0.10700333, -0.14117172, 0.1359497, -0.14451554, 0.34859756, 0.060239617, 0.09917812, 0.13169186, 0.077682465)); + target += mul(conv2d_7_tf, min16float4x4(-0.0714192, 0.12607583, -0.3341241, 0.18375745, -0.18943295, 0.11634349, 0.06633747, -0.13485552, 0.045528308, 0.2432545, 0.26417813, 0.0074096527, 0.004411052, -0.5647283, 0.021793056, -0.1910634)); + target += mul(nconv2d_7_tf, min16float4x4(0.04678379, 0.15781826, -0.14137928, -0.065010436, 0.1379615, -0.07252597, -0.05457498, 0.049137864, 0.054244712, -0.24069838, -0.11444052, 0.27642834, 0.19889133, 0.31845504, -0.102143094, 0.088378325)); + target += mul(conv2d_10_tf, min16float4x4(-0.1163185, 0.19226453, -0.1896929, -0.30681732, -0.013604632, -0.12468549, 0.018667353, 0.09807849, 0.030277459, 0.18578297, 0.14520812, 0.43598676, 0.24981564, 0.22188906, -0.12707953, 0.35956743)); + target += mul(nconv2d_10_tf, min16float4x4(-0.1817424, 0.27081814, -0.16284765, 0.033412658, -0.29831278, -0.1345311, 0.27491164, 0.14552177, -0.054520354, -0.2996891, -0.1279112, -0.64904505, 0.049450837, -0.021562194, -0.6366078, 0.15545636)); + target += min16float4(0.019361967, -0.009793055, 0.03647491, -0.010136049); + tex3[gxy] = target; +} + +//!PASS 6 +//!DESC Conv-4x3x3x24, Conv-4x1x1x72 +//!IN tex1, tex2, tex3, tex10, tex4, tex8, tex9, tex11 +//!OUT tex5, tex6, tex7 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass6(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + uint2 inputSize = GetInputSize(); + if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + float2 pos = (gxy + 0.5f) * inputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + min16float4 a1 = tex1.SampleLevel(sam, pos - inputPt, 0); + min16float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e1 = tex1.SampleLevel(sam, pos, 0); + min16float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i1 = tex1.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na1 = max(-a1, 0); + min16float4 nb1 = max(-b1, 0); + min16float4 nc1 = max(-c1, 0); + min16float4 nd1 = max(-d1, 0); + min16float4 ne1 = max(-e1, 0); + min16float4 nf1 = max(-f1, 0); + min16float4 ng1 = max(-g1, 0); + min16float4 nh1 = max(-h1, 0); + min16float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + min16float4 a2 = tex2.SampleLevel(sam, pos - inputPt, 0); + min16float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e2 = tex2.SampleLevel(sam, pos, 0); + min16float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i2 = tex2.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na2 = max(-a2, 0); + min16float4 nb2 = max(-b2, 0); + min16float4 nc2 = max(-c2, 0); + min16float4 nd2 = max(-d2, 0); + min16float4 ne2 = max(-e2, 0); + min16float4 nf2 = max(-f2, 0); + min16float4 ng2 = max(-g2, 0); + min16float4 nh2 = max(-h2, 0); + min16float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + min16float4 a3 = tex3.SampleLevel(sam, pos - inputPt, 0); + min16float4 b3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0); + min16float4 c3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0); + min16float4 d3 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0); + min16float4 e3 = tex3.SampleLevel(sam, pos, 0); + min16float4 f3 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0); + min16float4 g3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0); + min16float4 h3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0); + min16float4 i3 = tex3.SampleLevel(sam, pos + inputPt, 0); + + min16float4 na3 = max(-a3, 0); + min16float4 nb3 = max(-b3, 0); + min16float4 nc3 = max(-c3, 0); + min16float4 nd3 = max(-d3, 0); + min16float4 ne3 = max(-e3, 0); + min16float4 nf3 = max(-f3, 0); + min16float4 ng3 = max(-g3, 0); + min16float4 nh3 = max(-h3, 0); + min16float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + min16float4 conv2d_11_tf = tex10.SampleLevel(sam, pos, 0); + min16float4 nconv2d_11_tf = max(-conv2d_11_tf, 0); + conv2d_11_tf = max(conv2d_11_tf, 0); + + min16float4 conv2d_1_tf = tex4.SampleLevel(sam, pos, 0); + min16float4 nconv2d_1_tf = max(-conv2d_1_tf, 0); + conv2d_1_tf = max(conv2d_1_tf, 0); + + min16float4 conv2d_4_tf = tex8.SampleLevel(sam, pos, 0); + min16float4 nconv2d_4_tf = max(-conv2d_4_tf, 0); + conv2d_4_tf = max(conv2d_4_tf, 0); + + min16float4 conv2d_7_tf = tex9.SampleLevel(sam, pos, 0); + min16float4 nconv2d_7_tf = max(-conv2d_7_tf, 0); + conv2d_7_tf = max(conv2d_7_tf, 0); + + min16float4 conv2d_10_tf = tex11.SampleLevel(sam, pos, 0); + min16float4 nconv2d_10_tf = max(-conv2d_10_tf, 0); + conv2d_10_tf = max(conv2d_10_tf, 0); + + min16float4 conv2d_13_tf = mul(a1, min16float4x4(0.18203236, 0.08024887, -0.036568414, 0.13316368, -0.04578262, -0.06611782, -0.006320991, 0.19218548, 0.21009454, -0.08195536, 0.02459481, 0.037244156, -0.09028578, 0.025431598, 0.118399516, -0.11004066)); + conv2d_13_tf += mul(b1, min16float4x4(-0.057694096, 0.016725041, 0.09517554, -0.063389495, -0.15092854, -0.17499524, -0.023841592, -0.0021040211, -0.15481879, 0.058758404, -0.11097904, -0.026721174, 0.0025346193, 0.05679149, 0.0072498247, -0.13605994)); + conv2d_13_tf += mul(c1, min16float4x4(-0.029078262, 0.009836967, 0.07132015, 0.06620542, -0.21533649, 0.13504961, -0.026253965, 0.15687846, -0.041120164, -0.11824143, -0.03505001, -0.069189556, -0.020444538, -0.040636882, 0.104192354, 0.02525567)); + conv2d_13_tf += mul(d1, min16float4x4(-0.0033649271, 0.11870954, -0.29426005, 0.06678275, -0.21843383, -0.06478074, 0.055388454, 0.03360907, 0.15619075, 0.03552764, -0.004492958, -0.11098848, 0.0945473, -0.12231036, 0.060498584, -0.017200515)); + conv2d_13_tf += mul(e1, min16float4x4(0.11771511, 0.19074214, 0.2556847, -0.00011035888, -0.06266651, -0.18949944, -0.08458407, -0.4617736, 0.049839392, 0.0028800126, 0.112179466, -0.23129073, 0.07304365, 0.08169297, 0.010470617, -0.10990468)); + conv2d_13_tf += mul(f1, min16float4x4(0.2365061, -0.011560716, 0.040669534, 0.05682574, -0.108832434, 0.108204864, -0.016406072, -0.35809964, 0.19385669, 0.011194286, 0.022534747, -0.18770002, 0.040628985, -0.106064685, -0.12965748, -0.11940811)); + conv2d_13_tf += mul(g1, min16float4x4(-0.07578536, 0.055741407, -0.07985701, -0.08520933, -0.119309366, -0.001639899, -0.045735355, 0.060324576, -0.04602573, -0.067629695, -0.024497611, 0.11022731, 0.0866483, 0.023369456, 0.092412636, -0.15647933)); + conv2d_13_tf += mul(h1, min16float4x4(0.1151201, -0.06150153, -0.007215896, -0.027420595, -0.27713504, -0.30414173, -0.25002155, 0.04579516, 0.07746921, -0.039378557, -0.0007037489, 0.05171079, 0.04729991, -0.24362347, -0.03316277, -0.029248973)); + conv2d_13_tf += mul(i1, min16float4x4(0.08882578, -0.089215234, -0.027231896, 0.09565371, -0.040585488, -0.0666667, -0.10971792, -0.18664278, 0.08676577, 0.011609924, -0.11474831, -0.0032087977, -0.14733344, -0.013885521, -0.0600112, -0.028661741)); + conv2d_13_tf += mul(a2, min16float4x4(-0.16421804, 0.13640842, -0.053869005, -0.105430946, 0.33498198, -0.19186987, -0.044760693, 0.12338264, 0.04087762, 0.35624924, 0.16211961, -0.16837841, -0.21358813, 0.07136877, 0.09507147, 0.15890902)); + conv2d_13_tf += mul(b2, min16float4x4(-0.3021354, -0.3319794, 0.070228204, 0.1157857, -0.23864768, -0.124694765, -0.035166927, -0.2196196, 0.11144565, 0.15449396, 0.31777796, 0.23201036, 0.36269313, 0.0791044, -0.14027423, -0.10298774)); + conv2d_13_tf += mul(c2, min16float4x4(0.0045441133, 0.14908041, -0.04037237, 0.012396483, -0.41045487, -0.049013153, -0.25163352, -0.18674599, -0.020136787, -0.04309944, 0.16324212, 0.20724443, 0.0013537789, 0.10984782, -0.050586786, 0.07564281)); + conv2d_13_tf += mul(d2, min16float4x4(-0.35524195, -0.08884062, -0.061092835, 0.0016606712, -0.18841584, -0.28330895, 0.110710636, -0.20210983, 0.01599891, -0.019640112, -0.06881855, -0.2822387, 0.16723692, 0.42387784, 0.17316435, 0.014779502)); + conv2d_13_tf += mul(e2, min16float4x4(-0.14352255, 0.2557878, 0.14022757, -0.09769558, -0.08192019, 0.4160667, 0.20182422, -0.29740554, -0.16924635, 0.044684824, -0.21592674, -0.04393559, -0.44846448, -0.2268265, -0.15033214, -0.1552571)); + conv2d_13_tf += mul(f2, min16float4x4(-0.46804324, -0.05876729, 0.0023225946, -0.1399195, -0.12917824, 0.12800436, 0.5672086, 0.42298177, -0.25502345, -0.15043756, -0.010454711, -0.16799574, 0.1695203, 0.31919575, 0.090758204, -0.03608345)); + conv2d_13_tf += mul(g2, min16float4x4(-0.20859653, -0.025662629, 0.11013811, 0.021071844, -0.21565554, -0.08014497, -0.041803278, 0.15999684, -0.09659372, -0.2930284, 0.22263159, -0.058361106, -0.06474458, -0.18948506, -0.22297342, 0.002085207)); + conv2d_13_tf += mul(h2, min16float4x4(-0.25466987, 0.07562997, -0.046997566, 0.01815494, 0.015587753, 0.19885786, 0.17028151, -0.20973559, -0.13089986, 0.056037027, -0.16056974, -0.09570157, 0.36515233, 0.2177508, -0.19389395, 0.042368103)); + conv2d_13_tf += mul(i2, min16float4x4(-0.09177028, 0.029719152, 0.035980605, -0.111842036, -0.14203559, -0.0016779151, -0.23984708, -0.24259119, -0.32218066, -0.16303101, -0.042665064, 0.018674236, 0.132396, -0.07117317, -0.11266681, -0.25140917)); + conv2d_13_tf += mul(a3, min16float4x4(-0.2254921, -0.089444794, -0.03250626, -0.002422312, -0.07599525, 0.06057337, 0.09297158, -0.13625564, -0.05718329, 0.1393445, -0.14238319, -0.035561938, 0.10357985, 0.14509755, -0.05125032, 0.071264446)); + conv2d_13_tf += mul(b3, min16float4x4(0.14345558, -0.13649228, 0.07141237, -0.31665677, -0.106410414, -0.024022767, 0.022847228, -0.066274576, 0.27855787, 0.27377915, 0.100737795, 0.2585287, 0.065262236, 0.3338305, 0.013705893, 0.15107758)); + conv2d_13_tf += mul(c3, min16float4x4(0.24887003, 0.27924842, 0.011750549, 0.02100809, -0.060978264, 0.09022114, 0.10060977, -0.117189266, -0.064989, 0.050291102, 0.02154075, 0.07428455, 0.0128874695, -0.0824151, -0.0955003, 0.1240542)); + conv2d_13_tf += mul(d3, min16float4x4(0.09787086, -0.038460266, -0.012216873, 0.020269781, -0.14274825, -0.10365878, 0.107120685, 0.005830931, 0.18160833, -0.039512586, 0.054537058, -0.10175313, 0.2583083, 0.12110453, 0.11164319, -0.097267024)); + conv2d_13_tf += mul(e3, min16float4x4(0.04403219, -0.06616097, -0.1881836, -0.17728293, -0.30001318, 0.14179994, 0.077847786, 0.009201645, 0.2055038, 0.10847946, 0.034566265, 0.0823046, 0.016860636, -0.029249087, -0.16692844, 0.15714505)); + conv2d_13_tf += mul(f3, min16float4x4(0.01654197, -0.005030059, 0.15659711, 0.029457249, -0.10084003, -0.17541635, 0.20056525, 0.11890777, 0.041007854, -0.021843065, -0.047474306, 0.02461869, 0.09578964, -0.054728534, -0.022951778, 0.1384323)); + conv2d_13_tf += mul(g3, min16float4x4(-0.17401876, 0.0050307186, -0.14960738, -0.06744025, -0.026341015, -0.015185451, 0.097973764, 0.01230041, 0.043848213, -0.022325305, -0.01173514, -0.12744233, -0.1192904, -0.019170178, 0.16593695, -0.11961721)); + conv2d_13_tf += mul(h3, min16float4x4(0.16905174, -0.05465901, -0.10304148, 0.06422409, -0.06595216, 0.032311443, 0.06506821, 0.06866468, 0.12749052, 0.2812222, 0.10223055, -0.009964554, 0.10145132, 0.05452548, 0.21845295, 0.060436632)); + conv2d_13_tf += mul(i3, min16float4x4(0.009076048, -0.075771615, -0.010236168, -0.049228482, -0.009111011, 0.0032872239, 0.030809326, -0.021389242, -0.13207865, -0.20758687, 0.06795314, 0.16716966, 0.022448925, 0.005127875, 0.14822717, 0.1543517)); + conv2d_13_tf += mul(na1, min16float4x4(0.15177163, -0.059483033, -0.061815593, -0.048359588, 0.08666249, 0.01303385, -0.0797276, 0.00045918894, 0.044986565, -0.16032507, 0.001627205, -0.11240742, 0.36031052, 0.3453977, 0.20082399, -0.0872419)); + conv2d_13_tf += mul(nb1, min16float4x4(0.20732729, 0.29925603, -0.027490204, -0.07813189, -0.16492629, 0.13322815, 0.0031292376, 0.041497275, -0.08320837, 0.10200068, -0.17992872, -0.06903506, 0.12075557, 0.10240156, 0.17685287, -0.07302424)); + conv2d_13_tf += mul(nc1, min16float4x4(0.09832397, 0.072229534, -0.038651302, 0.23663157, -0.049382553, -0.11816951, -0.095177956, -0.0063895187, -0.22133054, 0.027618079, -0.010867105, 0.20221426, -0.055768233, 0.123813964, -0.04770652, 0.031318672)); + conv2d_13_tf += mul(nd1, min16float4x4(0.19019139, -0.0055707553, 0.26110023, 0.053353935, -0.09224678, 0.103274055, 0.054068115, -0.028470352, -0.050636273, 0.044128064, -0.1790452, -0.01937518, -0.22987902, 0.13224003, 0.06837358, -0.21524249)); + conv2d_13_tf += mul(ne1, min16float4x4(-0.06441057, -0.14875272, 0.1966193, -0.19311902, 0.116319604, -0.015221862, 0.22331011, -0.12665007, 0.1492529, -0.060963593, -0.13987945, -0.00267954, 0.17791282, -0.017524656, 0.009128157, -0.19969128)); + conv2d_13_tf += mul(nf1, min16float4x4(-0.15118724, -0.02174076, 0.18955654, 0.004134554, -0.074481554, -0.022116778, -0.23729491, -0.21471047, -0.17819612, 0.13824348, -0.0189012, 0.2410327, 0.122907236, 0.115833536, 0.07078602, 0.1497625)); + conv2d_13_tf += mul(ng1, min16float4x4(-0.0030512493, -0.004724951, 0.13259876, 0.009409425, -0.09696517, -0.12920079, -0.13467522, -0.05229473, -0.03711706, -0.038291495, -0.1493357, 0.09193146, -0.11654958, -0.1384159, -0.0809269, 0.12138653)); + conv2d_13_tf += mul(nh1, min16float4x4(-0.111716144, -0.033208963, 0.19639781, -0.28904846, 0.043729085, 0.016957026, -0.078926295, -0.19079417, 0.06363828, -0.019629745, 0.058766138, -0.120303996, -0.15203112, -0.16788657, -0.15019903, -0.20598294)); + conv2d_13_tf += mul(ni1, min16float4x4(0.09737031, 0.19906493, 0.31577814, 0.09887659, -0.10737645, 0.03927124, 0.008865094, 0.030515334, -0.03767332, 0.19419806, 0.052343797, -0.12595782, 0.018560758, -0.004252203, 0.12685028, -0.19064935)); + conv2d_13_tf += mul(na2, min16float4x4(-0.107926846, 0.05654491, 0.039178263, -0.022938857, -0.055884767, 0.01403891, 0.040060706, -0.0876108, -0.08530536, 0.035486717, -0.1397322, -0.111439094, 0.3098693, 0.031957068, -0.1323169, 0.036736827)); + conv2d_13_tf += mul(nb2, min16float4x4(-0.042637993, -0.13947937, -0.06313642, -0.013281999, -0.07746704, -0.0033614477, 0.062081654, -0.028974544, -0.09252038, 0.23787987, -0.03051402, 0.08857487, -0.10345242, 0.08111023, 0.012858327, 0.025468932)); + conv2d_13_tf += mul(nc2, min16float4x4(-0.057991188, 0.06572571, -0.17195612, -0.18226011, 0.13167764, -0.029910656, 0.07416073, 0.011874738, 0.020921603, 0.1790944, -0.02713754, -0.04678265, 0.0025504003, -0.07831189, 0.0022889362, 0.17452945)); + conv2d_13_tf += mul(nd2, min16float4x4(-0.08273035, -0.06628758, 0.09288723, 0.17525311, -0.015099176, -0.02920585, 0.01664239, 0.16360165, -0.058821842, 0.023668878, 0.13803177, 0.05805197, -0.033553623, -0.020296576, -0.2126249, 0.054712847)); + conv2d_13_tf += mul(ne2, min16float4x4(0.11607657, 0.09721635, 0.076664194, 0.107737765, -0.18090104, -0.09323497, 0.1018825, 0.025112988, -0.037965916, 0.07314205, 0.16523585, -0.16451308, 0.011332593, 0.05381852, 0.053742763, -0.051402804)); + conv2d_13_tf += mul(nf2, min16float4x4(0.08998201, -0.09690652, -0.090980336, 0.21645999, -0.1421605, 0.017344419, -0.080088496, -0.1686495, 0.13406368, 0.004237983, 0.028970357, -0.015848784, -0.07229926, -0.08199748, 0.14972275, 0.11688227)); + conv2d_13_tf += mul(ng2, min16float4x4(-0.10923993, -0.006186229, -0.0059918985, -0.056261536, 0.12305135, 0.07601222, 0.015556293, 0.039497726, 0.004694121, 0.03006972, -0.11686323, -0.1083031, -0.053210545, 0.06765771, 0.1847543, 0.12722884)); + conv2d_13_tf += mul(nh2, min16float4x4(-0.15110816, -0.114151604, 0.06755774, 0.1535812, -0.0055134855, 0.124444366, 0.116650686, 0.015837835, -0.13255565, -0.023659749, 0.012672263, -0.014328633, -0.25721112, 0.03517644, 0.07895924, 0.017762167)); + conv2d_13_tf += mul(ni2, min16float4x4(-0.0048434106, -0.15848884, 0.07007013, -0.0040173456, 0.12461628, -0.006840197, 0.054776177, 0.030113375, 0.011075732, -0.12137928, 0.039907288, 0.041261338, -0.03539033, -0.010571816, 0.17591824, 0.07626049)); + conv2d_13_tf += mul(na3, min16float4x4(-0.09215494, -0.047397707, 0.020372266, -0.03961589, -0.2969749, -0.23441714, 0.041512486, -0.23838238, 0.15105574, 0.030688843, 0.10364508, -0.037372112, 0.24514282, 0.11799978, -0.25672802, -0.05064504)); + conv2d_13_tf += mul(nb3, min16float4x4(-0.22321941, -0.22637981, 0.12784286, -0.15949993, -0.1747607, 0.019964136, -0.101212226, -0.14332725, -0.0040852833, 0.13991846, -0.121760346, -0.074741244, -0.14598946, 0.017030315, -0.21471639, 0.023562988)); + conv2d_13_tf += mul(nc3, min16float4x4(-0.025941253, -0.085331805, 0.006736805, 0.080889955, -0.06974209, -0.20366986, -0.2243817, -0.18153073, -0.0024152526, 0.047323234, 0.03407195, 0.016644841, -0.0060426793, -0.1146607, 0.11816627, -0.09477427)); + conv2d_13_tf += mul(nd3, min16float4x4(-0.11221949, -0.016993113, -0.028873868, 0.30510077, -0.10090775, -0.56358117, -0.2178131, -0.3253011, 0.05903533, 0.23069671, -0.040006876, -0.2242038, -0.10916342, -0.038909998, -0.081489064, 0.06539624)); + conv2d_13_tf += mul(ne3, min16float4x4(-0.059550002, -0.07048971, 0.08075795, 0.07341893, 0.08720143, -0.08745607, -0.28628471, 0.004085622, -0.059510656, -0.07080941, -0.17805275, 0.010445313, 0.08262345, 0.14971328, 0.086313516, 0.4270992)); + conv2d_13_tf += mul(nf3, min16float4x4(-0.25829327, -0.25821465, -0.025910528, -0.1256417, -0.32173184, -0.012251011, -0.31182033, -0.17723739, 0.05439974, -0.0018167618, 0.06974409, -0.024687098, 0.05163715, 0.011181801, 0.060559656, 0.18320788)); + conv2d_13_tf += mul(ng3, min16float4x4(0.048055783, 0.030901788, 0.00014199098, -0.015663194, -0.27395675, -0.1374474, 0.055429243, 0.09942114, -0.037852254, -0.033255827, 0.022523645, 0.04666904, 0.16599222, -0.02004086, 0.21397619, -0.11373404)); + conv2d_13_tf += mul(nh3, min16float4x4(-0.23445702, -0.06371413, -0.08418856, 0.06907252, 0.20780656, -0.13808912, 0.018577656, -0.0046262434, 0.09724245, -0.114031695, 0.022883652, 0.107561804, -0.010228, 0.0033352477, 0.12142382, -0.035946723)); + conv2d_13_tf += mul(ni3, min16float4x4(0.058773417, -0.06617424, -0.13876313, -0.007238876, -0.17449926, 0.14130935, -0.17021981, 0.09241347, 0.018518088, 0.085447155, -0.14430992, 0.035074715, -0.02784563, 0.15934117, -0.00036379634, -0.040411446)); + conv2d_13_tf += min16float4(-0.0258258, -0.014007201, -0.0051976936, 0.023554644); + min16float4 nconv2d_13_tf = max(-conv2d_13_tf, 0); + conv2d_13_tf = max(conv2d_13_tf, 0); + + min16float4 target = mul(e1, min16float4x4(0.13381699, 0.17966591, -0.0866034, -0.15282217, -0.2567282, -0.38080183, 0.10091161, 0.32172382, -0.064547606, -0.08161712, -0.033353675, -0.0019234467, 0.027740227, 0.2277078, 0.06759129, -0.22699283)); + target += mul(e2, min16float4x4(-0.122093834, 0.20621717, -0.08142724, 0.16477586, 0.4863212, -0.24032472, 0.00055996195, 0.50562304, 0.028121283, 0.56215876, 0.014577866, 0.06960302, -0.15964645, 0.14526807, -0.026474794, -0.02554081)); + target += mul(e3, min16float4x4(-0.101622745, 0.022395104, -0.14208415, 0.09508211, 0.20496333, 0.11371943, -0.024784304, 0.09519364, 0.09233463, 0.03117482, -0.15262024, -0.16956648, -0.2432608, -0.12877996, -0.13148616, 0.043081667)); + target += mul(ne1, min16float4x4(-0.28086182, -0.15846887, -0.058738094, -0.181707, -0.018847898, 0.05197007, 0.09753647, -0.19714034, -0.062462445, -0.17604835, 0.1268098, 0.15334699, 0.05568127, 0.16867611, -0.1686486, 0.28579247)); + target += mul(ne2, min16float4x4(0.20252296, -0.27393097, 0.06578763, -0.12628423, -0.10547165, 0.030740904, -0.19412865, -0.034658667, -0.09081653, -0.19958268, 0.16915733, 0.056093715, 0.10596871, -0.1742866, 0.004890009, 0.19515324)); + target += mul(ne3, min16float4x4(0.32077652, -0.004434404, -0.12717858, -0.13544025, -0.450333, 0.04072708, 0.04316467, -0.2578049, -0.011932833, 0.18828999, 0.12326536, -0.016795376, -0.0054118615, 0.061453808, 0.28015187, 0.13463841)); + target += mul(conv2d_11_tf, min16float4x4(0.08942177, -0.0021343376, 0.23693596, -0.15413974, -0.32839566, -0.010874302, 0.033822935, 0.038676813, 0.18920816, 0.019961799, -0.055697896, -0.042120066, 0.10387084, 0.047366753, 0.17899887, -0.071130194)); + target += mul(nconv2d_11_tf, min16float4x4(0.0010777018, -0.071475126, -0.16156957, -0.08781234, -0.08701292, 0.29084647, -0.34587428, 0.06969663, 0.036580127, 0.106745, -0.1534462, 0.106189206, -0.22758242, 0.20691736, -0.018554503, -0.056773946)); + target += mul(conv2d_1_tf, min16float4x4(0.14826776, -0.03700497, 0.066144, 0.023859248, -0.16708666, -0.23908418, 0.062023632, -0.16278005, 0.06265635, -0.039846748, -0.13978398, -0.027952245, 0.099891245, 0.18235108, 0.00991435, 0.0423486)); + target += mul(nconv2d_1_tf, min16float4x4(-0.17948383, -0.082759954, 0.10543674, -0.18660031, 0.0664088, -0.06837087, 0.04300318, 0.011699623, -0.017162412, -0.030628186, 0.07547453, 0.20060332, -0.19182351, 0.04914753, 0.040280227, -0.12417484)); + target += mul(conv2d_4_tf, min16float4x4(0.04074336, -0.041421015, -0.0372822, 0.1647266, -0.13993263, 0.0029407872, -0.39398977, -0.1778468, 0.21322449, 0.19134948, -0.02818874, 0.226251, 0.06352273, 0.12620094, 0.24221466, 0.20657893)); + target += mul(nconv2d_4_tf, min16float4x4(-0.094572894, -0.046852108, 0.21210444, -0.14082888, -0.050984625, -0.13443558, 0.24309658, 0.1573335, 0.21941295, 0.11642813, 0.09684106, -0.08597462, 0.15502413, -0.018070435, 0.1292023, -0.1557655)); + target += mul(conv2d_7_tf, min16float4x4(0.025215387, 0.16676718, -0.068287216, 0.017648363, 0.2779579, 0.059142746, -0.096408874, 0.22609432, 0.20962398, 0.24879578, 0.023621194, -0.29692242, 0.02272032, -0.33367038, 0.15799981, -0.1699598)); + target += mul(nconv2d_7_tf, min16float4x4(0.08816878, 0.076234445, -0.06670541, 0.024926793, -0.12045598, 0.07443171, 0.22081238, -0.044906516, -0.02448027, -0.22067828, -0.016471038, 0.21801811, 0.16276583, 0.34590468, -0.18487914, 0.0554853)); + target += mul(conv2d_10_tf, min16float4x4(-0.085593045, -0.002904318, 0.049969394, -0.06931361, -0.10722648, -0.08499641, -0.25997344, 0.22650665, 0.069008924, -0.23179024, 0.20058884, -0.20237185, -0.1606995, 0.0758858, -0.09946377, -0.21032207)); + target += mul(nconv2d_10_tf, min16float4x4(0.11210572, 0.055658836, 0.041539114, 0.078087114, -0.060435783, 0.08331363, 0.07356019, 0.0842336, -0.38098484, 0.020591227, -0.45916042, 0.06386686, -0.19348675, 0.041925576, -0.23489946, -0.06711732)); + target += mul(conv2d_13_tf, min16float4x4(-0.13721304, 0.15404533, 0.102312036, -0.090253755, 0.08690545, 0.034154307, 0.07618604, -0.15844443, -0.10604342, 0.2646684, -0.08719668, 0.19331944, 0.10569642, -0.058054388, -0.0110980645, -0.08710107)); + target += mul(nconv2d_13_tf, min16float4x4(0.15567884, -0.11589786, 0.031855986, 0.005064268, 0.37850487, 0.30044487, -0.2604449, 0.061879188, -0.015081224, -0.30759993, -0.07571204, -0.0077929585, -0.08748009, 0.22546281, -0.06377379, 0.435342)); + target += min16float4(0.0053140894, -0.030208405, 0.04287835, -0.059097543); + tex5[gxy] = target; + + target = mul(e1, min16float4x4(0.0029025443, 0.021165721, 0.0070854356, 0.065646365, 0.024636142, 0.20825955, -0.0917655, -0.1706138, -0.1827491, 0.13347003, 0.12910214, 0.06828513, -0.026193604, -0.11451178, 0.0356333, -0.08071165)); + target += mul(e2, min16float4x4(-0.027241195, 0.032633994, -0.17490302, -0.5352789, -0.15734912, 0.24714436, 0.029301014, 0.212763, -0.051665317, -0.06783505, -0.040298667, 0.041179724, 0.49683514, -0.35600296, -0.2518442, -0.22965558)); + target += mul(e3, min16float4x4(-0.061614696, -0.10463926, 0.1594845, 0.036565617, 0.09095015, -0.15100475, -0.09242749, 0.08335822, -0.027257469, 0.4156707, 0.03322028, 0.19685929, 0.07034635, 0.10204465, 0.03657313, 0.30920812)); + target += mul(ne1, min16float4x4(-0.20980133, -0.054115582, 0.031674277, -0.040077273, -0.21693806, 0.016596884, -0.029177245, -0.16924128, 0.121823296, -0.0004884774, 0.10644538, 0.068388954, 0.16517027, -0.12152921, -0.18299894, -0.17595083)); + target += mul(ne2, min16float4x4(-0.0006413291, -0.09444853, 0.15260176, 0.23014128, 0.09366626, 0.06947763, 0.04956597, -0.07001088, -0.075523324, 0.16111156, -0.11700089, 0.14528704, -0.096407495, 0.027310526, -0.03946532, 0.15302157)); + target += mul(ne3, min16float4x4(0.086061105, -0.0070365844, -0.25230658, 0.18741103, -0.36380208, -0.058444727, 0.25284684, -0.26617825, -0.08817363, -0.12209333, 0.011920746, -0.031505488, -0.21880315, 0.16762236, 0.14518112, 0.13803998)); + target += mul(conv2d_11_tf, min16float4x4(-0.17088315, -0.06812898, -0.085912764, 0.25550255, -0.26439053, 0.23305506, 0.18186118, -0.06186191, 0.0075220955, 0.10316868, 0.04271979, -0.008083033, -0.19474187, -0.06700431, 0.15485007, -0.11886802)); + target += mul(nconv2d_11_tf, min16float4x4(0.06597312, -0.31435877, -0.08179224, -0.2568261, 0.29904976, 0.21664406, -0.15343861, -0.11589945, 0.12654455, -0.042093027, -0.17231914, -0.26832506, -0.12008876, 0.11483079, 0.10222754, 0.12562539)); + target += mul(conv2d_1_tf, min16float4x4(-0.09949413, 0.01479024, -0.16933955, 0.025359191, -0.2210058, -0.19663176, 0.19453603, -0.111461386, -0.12529027, 0.14243664, 0.122677036, -0.101476125, 0.011010597, -0.014422488, -0.048979994, 0.03657997)); + target += mul(nconv2d_1_tf, min16float4x4(-0.06923051, -0.1223873, 0.021781938, 0.1323696, -0.11582021, -0.018292433, 0.07495496, 0.043008957, 0.0070410958, -0.14431225, -0.06380941, -0.17411429, 0.052226365, 0.021460915, 0.097367965, 0.37138346)); + target += mul(conv2d_4_tf, min16float4x4(0.16420697, 0.008790036, 0.17185563, -0.025144322, -0.108827055, -0.13030754, -0.14254087, 0.05208047, 0.03751449, 0.06774824, -0.07746288, 0.2250457, 0.039049506, 0.101244815, -0.18138403, -0.12212992)); + target += mul(nconv2d_4_tf, min16float4x4(-0.05138809, 0.19150224, 0.05698308, 0.015970863, 0.23931703, -0.085039265, -0.18294281, 0.03647365, -0.041568805, -0.2920049, 0.013272974, -0.41181135, -0.08101046, 0.028989056, 0.2952233, 0.16312017)); + target += mul(conv2d_7_tf, min16float4x4(0.093839854, -0.038790308, -0.086285874, -0.17890124, -0.2598202, 0.069419555, -0.0065180454, 0.01453452, -0.090191156, 0.012278203, -0.13148692, -0.025104592, 0.09296121, -0.1833281, 0.074660525, -0.031280298)); + target += mul(nconv2d_7_tf, min16float4x4(-0.05336347, 0.08608969, -0.074649446, 0.014608438, 0.22511393, 0.18610351, -0.0029040743, 0.096127085, -0.20254624, 0.14036441, -0.005226189, 0.055212848, 0.20482111, 0.06645607, -0.12018032, 0.062814355)); + target += mul(conv2d_10_tf, min16float4x4(0.13722958, -0.077169575, 0.07269382, 0.20902501, -0.103985704, -0.21184038, -0.12424109, -0.3059887, -0.185413, -0.1964241, -0.14370187, 0.07646031, -0.057924826, 0.28884047, -0.06701312, -0.14548934)); + target += mul(nconv2d_10_tf, min16float4x4(0.14129579, 0.12990993, -0.08791828, 0.07986884, -0.006362554, 0.005971629, 0.016816271, 0.075642705, -0.060138028, 0.13658188, 0.0020529197, -0.38745758, -0.16191563, 0.20532359, 0.34441018, 0.0071060034)); + target += mul(conv2d_13_tf, min16float4x4(-0.03236983, -0.08242242, 0.065607354, -0.072457135, 0.024461512, 0.15522943, 0.120296456, 0.052112654, 0.21442589, 0.19565494, 0.06760742, 0.37604833, 0.097620994, -0.002347599, 0.09269131, -0.34238556)); + target += mul(nconv2d_13_tf, min16float4x4(0.3276042, -0.17974046, -0.095954694, -0.123248585, 0.08306674, -0.3486506, -0.4620704, -0.40518835, -0.17438394, 0.24350463, 0.05616052, -0.14715664, 0.2078043, -0.007834002, -0.21199054, 0.026597755)); + target += min16float4(-0.015380624, 0.018387195, 0.052286647, 0.055403516); + tex6[gxy] = target; + + target = mul(e1, min16float4x4(0.029018598, -0.09923186, -0.1346201, -0.084818475, 0.013764684, 0.054601744, -0.023713779, -0.16826102, 0.038605224, -0.17664196, -0.16562279, 0.14602208, -0.046339583, 0.08062112, 0.20166601, -0.15399997)); + target += mul(e2, min16float4x4(-0.022488657, 0.28881705, 0.22283012, -0.1935156, 0.22948948, -0.26604095, 0.12130448, 0.35176682, -0.044228308, -0.14734231, 0.07643742, -0.008511517, 0.04313213, -0.03179344, 0.048205808, -0.046295088)); + target += mul(e3, min16float4x4(-0.2531207, 0.10446124, 0.12730333, -0.13316457, 0.2988587, 0.025091104, -0.00482534, 0.037484948, -0.04006528, 0.14588606, -0.2078635, -0.18636562, 0.112230495, 0.15386717, -0.11122423, 0.1115416)); + target += mul(ne1, min16float4x4(0.058421213, 0.086035125, -0.042249937, -0.22377387, -0.055913106, 0.020280339, 0.10572877, 0.124147646, -0.16199678, 0.25662583, 0.051422223, -0.11681551, 0.3789257, -0.21530285, -0.18586366, -0.2222266)); + target += mul(ne2, min16float4x4(-0.11123776, 0.056422785, -0.20566264, -0.07211227, -0.011873865, 0.30742383, 0.1306618, 0.06808572, 0.068643585, -0.045474447, -0.11596973, 0.0069175013, 0.0331586, -0.013221628, -0.089815594, -0.17750767)); + target += mul(ne3, min16float4x4(0.45630908, 0.11607409, -0.05464286, 0.013246808, -0.28643015, 0.025237702, -0.1445959, 0.05237954, -0.07100623, -0.34417382, 0.13903524, 0.21305767, -0.17371523, -0.13203263, -0.09479281, 0.018392125)); + target += mul(conv2d_11_tf, min16float4x4(-0.018931253, -0.14936836, -0.06770882, 0.10720343, -0.10476732, 0.1157603, -0.2245781, 0.23242487, -0.21631289, 0.12723672, 0.4190526, 0.38829032, -0.192142, 0.034754496, -0.1103798, -0.17207326)); + target += mul(nconv2d_11_tf, min16float4x4(0.10311498, 0.08424212, -0.048713315, -0.2784966, 0.034522116, -0.13184515, -0.22852737, 0.003882436, 0.36972147, -0.21263883, -0.3308556, 0.10331102, 0.2462766, -0.12618823, -0.040451203, 0.03362719)); + target += mul(conv2d_1_tf, min16float4x4(-0.0150432745, 0.11757923, 0.23359092, -0.19003578, -0.22206408, 0.15738077, -0.14019541, -0.14201044, 0.19273758, -0.003298494, -0.16530107, 0.17979017, 0.24293105, -0.049160067, -0.14296743, -0.12812854)); + target += mul(nconv2d_1_tf, min16float4x4(-0.0020534277, 0.016410163, -0.012038507, -0.0028629426, 0.016464395, 0.0755886, 0.20384903, -0.029324949, -0.13087441, 0.2138074, 0.03701677, -0.1671415, -0.10499825, -0.042930905, -0.007613907, -0.05984843)); + target += mul(conv2d_4_tf, min16float4x4(-0.07029106, 0.05386552, 0.101365924, -0.008048512, -0.090149835, 0.024272785, -0.16436198, 0.2721913, 0.17460534, 0.0034964401, -0.023265982, -0.0120567605, -0.10151709, 0.059922412, -0.13204409, -0.36116782)); + target += mul(nconv2d_4_tf, min16float4x4(-0.12569033, 0.08523279, -0.047763485, -0.0025170774, -0.108375974, -0.032045245, 0.232404, -0.24801816, -0.09875204, -0.14990453, -0.10958757, -0.23116525, 0.015989894, -0.09210713, 0.19653663, 0.14138049)); + target += mul(conv2d_7_tf, min16float4x4(0.17831743, 0.04722249, 0.22804007, -0.29099363, 0.29851902, 0.2542661, 0.0067702304, 0.17606215, 0.25847578, -0.3118978, 0.122089565, -0.07010249, 0.014281751, 0.16585219, -0.1659864, -0.30643156)); + target += mul(nconv2d_7_tf, min16float4x4(0.19042191, -0.028259574, -0.009187334, 0.21004388, -0.08070036, -0.07838277, -0.023598602, 0.13891627, -0.10481482, 0.05874796, -0.256131, 0.19640857, 0.19515458, -0.07920633, 0.020810237, 0.11040215)); + target += mul(conv2d_10_tf, min16float4x4(-0.093089096, -0.09344762, 0.24232084, 0.21563776, -0.23910145, 0.09092736, 0.12202717, 0.27240792, -0.008079913, 0.07417433, -0.11870247, -0.35385913, 0.107840456, 0.033915944, 0.16016287, 0.023731219)); + target += mul(nconv2d_10_tf, min16float4x4(0.21967673, 0.09896617, 0.04236673, -0.20100762, 0.02077549, -0.075936705, 0.008608214, -0.09693712, 0.44249, -0.31763947, -0.027664369, 0.6166134, -0.43993565, -0.025720617, -0.3275949, 0.041507874)); + target += mul(conv2d_13_tf, min16float4x4(0.20305479, -0.06975863, -0.18130508, -0.11641104, 0.119906515, -0.27588886, -0.15420493, -0.1399163, 0.075970694, -0.16776691, 0.05045285, 0.44775927, -0.036058784, -0.28161573, 0.1877619, 0.10209392)); + target += mul(nconv2d_13_tf, min16float4x4(-0.4250348, -0.007887921, 0.307136, -0.18842702, 0.30411714, 0.05816079, 0.26664746, -0.007951849, -0.18454021, 0.30914694, -0.34967366, -0.18838291, 0.06042888, 0.1902336, -0.062413342, 0.015706044)); + target += min16float4(-0.0011628491, -0.0046341973, 0.0007886035, -0.04435556); + tex7[gxy] = target; +} + +//!PASS 7 +//!DESC Conv-4x3x3x24 +//!IN tex5, tex6, tex7 +//!OUT conv1ups, conv1ups1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass7(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { + return; + } + + float2 outputPt = GetOutputPt(); + const float2 pos = (gxy + 0.5f) * outputPt; + + outputPt *= 2; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + min16float4 a1 = tex5.SampleLevel(sam1, pos - outputPt, 0); + min16float4 b1 = tex5.SampleLevel(sam1, pos + float2(-outputPt.x, 0), 0); + min16float4 c1 = tex5.SampleLevel(sam1, pos + float2(-outputPt.x, outputPt.y), 0); + min16float4 d1 = tex5.SampleLevel(sam1, pos + float2(0, -outputPt.y), 0); + min16float4 e1 = tex5.SampleLevel(sam1, pos, 0); + min16float4 f1 = tex5.SampleLevel(sam1, pos + float2(0, outputPt.y), 0); + min16float4 g1 = tex5.SampleLevel(sam1, pos + float2(outputPt.x, -outputPt.y), 0); + min16float4 h1 = tex5.SampleLevel(sam1, pos + float2(outputPt.x, 0), 0); + min16float4 i1 = tex5.SampleLevel(sam1, pos + outputPt, 0); + + min16float4 na1 = max(-a1, 0); + min16float4 nb1 = max(-b1, 0); + min16float4 nc1 = max(-c1, 0); + min16float4 nd1 = max(-d1, 0); + min16float4 ne1 = max(-e1, 0); + min16float4 nf1 = max(-f1, 0); + min16float4 ng1 = max(-g1, 0); + min16float4 nh1 = max(-h1, 0); + min16float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + min16float4 a2 = tex6.SampleLevel(sam1, pos - outputPt, 0); + min16float4 b2 = tex6.SampleLevel(sam1, pos + float2(-outputPt.x, 0), 0); + min16float4 c2 = tex6.SampleLevel(sam1, pos + float2(-outputPt.x, outputPt.y), 0); + min16float4 d2 = tex6.SampleLevel(sam1, pos + float2(0, -outputPt.y), 0); + min16float4 e2 = tex6.SampleLevel(sam1, pos, 0); + min16float4 f2 = tex6.SampleLevel(sam1, pos + float2(0, outputPt.y), 0); + min16float4 g2 = tex6.SampleLevel(sam1, pos + float2(outputPt.x, -outputPt.y), 0); + min16float4 h2 = tex6.SampleLevel(sam1, pos + float2(outputPt.x, 0), 0); + min16float4 i2 = tex6.SampleLevel(sam1, pos + outputPt, 0); + + min16float4 na2 = max(-a2, 0); + min16float4 nb2 = max(-b2, 0); + min16float4 nc2 = max(-c2, 0); + min16float4 nd2 = max(-d2, 0); + min16float4 ne2 = max(-e2, 0); + min16float4 nf2 = max(-f2, 0); + min16float4 ng2 = max(-g2, 0); + min16float4 nh2 = max(-h2, 0); + min16float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + min16float4 a3 = tex7.SampleLevel(sam1, pos - outputPt, 0); + min16float4 b3 = tex7.SampleLevel(sam1, pos + float2(-outputPt.x, 0), 0); + min16float4 c3 = tex7.SampleLevel(sam1, pos + float2(-outputPt.x, outputPt.y), 0); + min16float4 d3 = tex7.SampleLevel(sam1, pos + float2(0, -outputPt.y), 0); + min16float4 e3 = tex7.SampleLevel(sam1, pos, 0); + min16float4 f3 = tex7.SampleLevel(sam1, pos + float2(0, outputPt.y), 0); + min16float4 g3 = tex7.SampleLevel(sam1, pos + float2(outputPt.x, -outputPt.y), 0); + min16float4 h3 = tex7.SampleLevel(sam1, pos + float2(outputPt.x, 0), 0); + min16float4 i3 = tex7.SampleLevel(sam1, pos + outputPt, 0); + + min16float4 na3 = max(-a3, 0); + min16float4 nb3 = max(-b3, 0); + min16float4 nc3 = max(-c3, 0); + min16float4 nd3 = max(-d3, 0); + min16float4 ne3 = max(-e3, 0); + min16float4 nf3 = max(-f3, 0); + min16float4 ng3 = max(-g3, 0); + min16float4 nh3 = max(-h3, 0); + min16float4 ni3 = max(-i3, 0); + + a3 = max(a3, 0); + b3 = max(b3, 0); + c3 = max(c3, 0); + d3 = max(d3, 0); + e3 = max(e3, 0); + f3 = max(f3, 0); + g3 = max(g3, 0); + h3 = max(h3, 0); + i3 = max(i3, 0); + + min16float4 target = mul(a1, min16float4x4(-0.04461327, 0.026094772, -0.04326873, -0.022564206, 0.041758694, -0.13209347, -0.022546854, 0.004888482, 0.041229382, 0.078778535, -0.09928822, 0.045134705, 0.07555903, 0.095968306, 0.017260674, -0.16633268)); + target += mul(b1, min16float4x4(0.074613005, -0.024822153, 0.006285665, 0.064223155, 0.08983999, -0.04401517, 0.0021585347, -0.05762909, -0.04529031, -0.081778474, -0.006732511, -0.11184791, 0.10299652, -0.23328288, 0.15988354, 0.100146465)); + target += mul(c1, min16float4x4(0.035105877, -0.0018613822, -0.10513717, -0.033936206, -0.015839642, 0.036846053, 0.057443213, 0.0151035935, 0.073372714, -0.032272663, -0.10095864, 0.11976275, 0.019719468, -0.03309878, -0.09841568, 0.02204194)); + target += mul(d1, min16float4x4(0.030945469, -0.17030734, -0.012849732, 0.015892556, 0.056250833, 0.24895169, -0.13764419, 0.16325791, -0.01160465, 0.006647464, -0.026491588, -0.17801395, -0.02435574, -0.2039599, -0.02686966, -0.026576484)); + target += mul(e1, min16float4x4(-0.037470777, 0.019415256, 0.09230313, 0.018368619, 0.12947397, 0.055918667, 0.03108532, -0.112716034, -0.18622373, 0.13083778, 0.11290179, 0.02457941, -0.055062827, 0.2621282, -0.47111708, 0.14229195)); + target += mul(f1, min16float4x4(-0.028525796, -0.044668507, 0.0581049, -0.05924212, -0.16126277, 0.02257456, -0.08723546, 0.0291216, 0.08648604, 0.1816661, -0.10166446, 0.054426763, -0.049978323, -0.014283805, 0.08187003, -0.33347195)); + target += mul(g1, min16float4x4(0.0660737, 0.07962152, -0.08272859, 0.06791631, -0.055610694, -0.04899803, 0.001302826, 0.034116816, 0.055754438, -0.090739936, -0.058503445, -0.21402411, 0.08279316, -0.017558504, -0.06069706, -0.009158945)); + target += mul(h1, min16float4x4(0.004801658, 0.19979613, -0.12919085, -0.08680655, -0.07869315, 0.13493058, 0.09466464, -0.06683993, 0.45278597, -0.031217117, -0.36346734, 0.007986247, -0.034918886, -0.06899428, -0.17898467, 0.048572816)); + target += mul(i1, min16float4x4(-0.058319356, -0.11041357, -0.038064227, 0.008961388, 0.059284043, -0.006377162, -0.08503998, 0.08246113, -0.042524133, -0.009021081, -0.06406861, -0.036977306, 0.015088326, 0.007376721, 0.045255665, -0.048585415)); + target += mul(a2, min16float4x4(-0.04103631, -0.041285936, 0.032812588, 0.0030869239, 0.04834749, -0.0023517366, 0.01230978, 0.09776701, 0.08415344, 0.20653047, -0.19338459, -0.04812796, -0.084704414, 0.038988277, 0.075450994, -0.08053876)); + target += mul(b2, min16float4x4(0.13506958, -0.2392332, 0.07425533, -0.05262753, -0.06849319, -0.0686977, 0.09134643, 0.032770213, 0.0725978, -0.12106999, 0.068602145, 0.0030026592, -0.0808173, 0.06421806, -0.08257931, 0.21460927)); + target += mul(c2, min16float4x4(-0.008367152, 0.0035576785, -0.012087096, -0.08389121, -0.01598755, 0.12065467, 0.099018045, -0.14851409, 0.030730573, 0.028257858, -0.08153201, -0.08644078, -0.114632666, -0.03989634, 0.005787138, -0.080551155)); + target += mul(d2, min16float4x4(0.063049294, -0.13418451, -0.020768259, -0.12566003, -0.038050238, 0.024393935, 0.040856704, -0.10639481, -0.0021406382, 0.12272091, 0.039621927, 0.009142157, -0.12273027, 0.06595554, 0.03680899, -0.045653462)); + target += mul(e2, min16float4x4(0.14783141, 0.062921695, -0.2287169, 0.17810576, 0.12781417, -0.23455006, 0.08652726, -0.05671725, -0.0154688135, -0.0757278, 0.028468473, -0.055354204, 0.3387407, 0.06741395, -0.21965146, 0.28021505)); + target += mul(f2, min16float4x4(0.12927511, -0.083112024, -0.026347974, 0.11680802, -0.046030812, 0.04145888, 0.029390097, 0.07615963, 0.21023202, 0.015840504, -0.03812723, -0.03267151, -0.03871269, -0.009839764, 0.09856007, -0.07423972)); + target += mul(g2, min16float4x4(0.017651597, 0.020432748, 0.1884304, -0.004845205, 0.009974344, -0.022273665, 0.03930962, -0.035542846, 0.036834106, 0.14699532, -0.099249355, 0.10607033, -0.027745333, -0.0970868, 0.114169724, -0.023726419)); + target += mul(h2, min16float4x4(-0.028299367, -0.15123722, -0.00423565, 0.06813279, 0.00024022427, -0.025944803, 0.022504266, -0.08420193, -0.20596851, -0.1337249, 0.1062062, -0.01428787, 0.014752737, -0.012875446, 0.030165028, 0.035561644)); + target += mul(i2, min16float4x4(-0.09437882, 0.088986255, -0.019357264, -0.07609514, -0.11045937, -0.09335526, 0.0051609105, 0.046330493, -0.102482855, 0.16320266, -0.07661479, 0.033833966, -0.06805305, 0.051780142, -0.015298791, 0.010972507)); + target += mul(a3, min16float4x4(0.0022961323, 0.10782266, -0.06649802, -0.006361161, -0.13554603, 0.032311134, 0.01145253, -0.018523335, -0.051428523, -0.0073554716, -0.11821805, -0.0227195, -0.06375, 0.029970335, -0.038386237, -0.046592798)); + target += mul(b3, min16float4x4(-0.0839258, -0.0200528, 0.004925492, -0.035113, 0.08860089, 0.052822098, -0.16518101, -0.052028593, 0.042811155, 0.13656183, 0.06579406, -0.26585788, -0.00531827, -0.12001242, -0.07681884, -0.021055153)); + target += mul(c3, min16float4x4(0.0678669, 0.038901877, -0.096601896, -0.081621505, 0.0028282998, -0.04645044, 0.04284913, 0.015117329, 0.104568556, 0.006391826, -0.021010842, -0.036205173, 0.06698969, 0.08495347, 0.065073915, 0.07002784)); + target += mul(d3, min16float4x4(-0.041274223, -0.065267585, 0.0070607257, -0.067357324, 0.056948107, 0.04808867, 0.07966329, -0.017361488, 0.030913807, -0.119355716, -0.004582609, 0.050158955, 0.03867934, -0.13543603, -0.0011923639, -0.06866172)); + target += mul(e3, min16float4x4(0.11586327, -0.047302328, 0.062475067, 0.018575871, 0.12420718, -0.03602303, 0.021922488, 0.16011192, -0.16549775, 0.123044305, 0.065160766, -0.30708137, 0.07341779, -0.12929793, 0.08692529, 0.0007729847)); + target += mul(f3, min16float4x4(-0.013340411, 0.058056828, -0.028747091, -0.0020311237, -0.1070798, 0.13726988, 0.017587787, -0.06898856, 0.03802266, 0.13165978, -0.035371024, 0.098588474, -0.036178526, -0.1068027, -0.03172579, 0.0816444)); + target += mul(g3, min16float4x4(0.025470722, -0.010980958, -0.08286821, -0.031260632, -0.0134636145, 0.041295316, -0.09980376, 0.07899825, 0.046056226, 0.17291167, -0.066611394, 0.03685817, -0.020917175, 0.11551815, -0.016370535, -0.003991822)); + target += mul(h3, min16float4x4(-0.039056864, 0.011015572, 0.014014594, -0.08614736, -0.08130745, 0.045282196, -0.04879853, -0.07139807, 0.09670427, -0.07834781, -0.022022815, 0.053423326, -0.055300128, 0.23542596, -0.11442394, -0.05190056)); + target += mul(i3, min16float4x4(0.12978806, -0.020104066, -0.032463916, -0.04754379, 0.05811374, 0.029061198, -0.013163837, 0.051058855, 0.04294865, -0.12551701, 0.17822845, -0.16549106, 0.12024249, -0.0790749, 0.035424378, 0.0062358896)); + target += mul(na1, min16float4x4(0.030824278, 0.06636776, -0.047206167, 0.02480193, 0.071935624, -0.18845995, -0.028480597, -0.10213147, -0.03973547, 0.025171004, 0.016600806, -0.10615915, -0.07395773, -0.050147526, -0.011541545, -0.027081985)); + target += mul(nb1, min16float4x4(-0.035749037, -0.052818663, -0.020621216, -0.023525307, -0.02461827, 0.3019646, -0.024478583, -0.1398278, 0.17499511, 0.22476715, -0.13090259, -0.05484457, -0.023759075, 0.002843161, 0.014099166, -0.011660793)); + target += mul(nc1, min16float4x4(-0.008461302, 0.14787683, 0.07476249, -0.035538696, 0.007945418, 0.04992842, -0.2388183, 0.0061813896, 0.016805701, 0.019992555, 0.034271393, -0.040170603, -0.039961495, 0.009210595, 0.07606321, 0.05323195)); + target += mul(nd1, min16float4x4(-0.017007355, -0.01304119, -0.011782462, 0.043480955, 0.041575707, 0.20513225, -0.16858323, 0.019438695, -0.02795952, -0.032667078, 0.08400571, 0.012488913, -0.025382128, 0.06756553, 0.14349163, -0.012960532)); + target += mul(ne1, min16float4x4(-0.015847925, 0.035881996, 0.09946923, -0.2583748, -0.11036338, 0.02174868, 0.023047017, -0.023119839, 0.0014623358, -0.05400468, 0.1088209, 0.056070726, 0.09849772, 0.106276534, -0.2869582, 0.122843154)); + target += mul(nf1, min16float4x4(0.120457835, 0.0030220735, 0.011593652, 0.04870485, 0.051817082, -0.12444271, -0.0030080245, 0.03186695, -0.119991936, -0.03661239, 0.0462927, 0.047734156, 0.035473768, -0.050326344, 0.048162602, 0.0044394233)); + target += mul(ng1, min16float4x4(0.004526382, -0.040592365, 0.038592715, 0.06312635, -0.012543924, -0.03860053, 0.013131243, -0.11894808, -0.05983815, -0.09653036, 0.14409515, -0.022803063, 0.02864931, 0.014170389, 0.091406494, 0.08613508)); + target += mul(nh1, min16float4x4(0.12344745, -0.034350697, 0.10549495, -0.11843059, -0.041916244, -0.035728436, -0.052881684, -0.07620879, 0.06760638, -0.039527662, -0.006650022, -0.05049626, 0.12109734, -0.005554175, 0.17754045, -0.098896034)); + target += mul(ni1, min16float4x4(0.017840233, -0.0118570635, -0.080244206, -0.14309776, -0.03778345, 0.12812364, -0.011180574, -0.03749929, -0.013458457, 0.028993722, 0.03479446, -0.11635739, -0.01636896, -0.010422004, -0.022923285, 0.013722603)); + target += mul(na2, min16float4x4(0.0022784397, -0.026745517, 0.07457438, -0.023941608, -0.056146793, -0.012885049, 0.010106243, -0.13570426, -0.055139925, -0.0553148, 0.037558038, -0.015558114, 0.055840485, -0.08124391, -0.013017814, 0.18931141)); + target += mul(nb2, min16float4x4(0.10672792, 0.129464, 0.1233261, -0.062469885, -0.08835128, 0.17588028, -0.02560139, -0.07349341, -0.08052734, 0.03086464, 0.12930822, 0.107045054, 0.03136081, -0.11335949, 0.09541032, -0.015009924)); + target += mul(nc2, min16float4x4(0.023294786, -0.17904189, -0.036457974, -0.060965557, 0.088545635, 0.001061151, -0.016771115, 0.082081355, -0.0030623788, -0.05096391, 0.022067994, -0.078540295, -0.12912196, -0.045786213, 0.05568379, -0.16344398)); + target += mul(nd2, min16float4x4(0.043200932, 0.006267473, -0.081682056, 0.044593308, 0.03179784, 0.20806344, -0.038468197, 0.06644582, 0.01704569, -0.029287282, -0.0036700617, 0.018897371, -0.075105995, 0.09612947, -0.06442493, 0.012179776)); + target += mul(ne2, min16float4x4(-0.21926114, 0.18097721, -0.037700515, 0.016763914, -0.057943042, -0.06129067, 0.04456528, -0.2304425, 0.013301696, 0.11028081, -0.18095498, 0.14712757, 0.2271199, -0.3185643, -0.19932592, -0.08554962)); + target += mul(nf2, min16float4x4(0.0117652705, -0.041661818, 0.029219367, -0.046232816, 0.047820047, 0.068789035, -0.113418594, 0.1141295, -0.027060978, 0.07267708, 0.093252845, -0.049717877, -0.087836266, 0.14460698, 0.10277318, -0.04977497)); + target += mul(ng2, min16float4x4(0.022564596, -0.037228584, -0.065915406, -0.011077084, 0.030235467, -0.04677627, -0.06419004, -0.018991074, 0.034164365, -0.019168181, 0.022525655, -0.029373096, -0.079060145, 0.13279332, -0.08545939, -0.045388315)); + target += mul(nh2, min16float4x4(-0.14000517, -0.08309406, 0.13520917, -0.10369978, -0.016325317, 0.00970006, -0.048059512, 0.1412818, 0.040955327, 0.030759163, -0.108052924, 0.005294165, -0.10046129, 0.16592641, -0.035368618, -0.29051507)); + target += mul(ni2, min16float4x4(-0.09455044, 0.0005962807, 0.0006215668, -0.038142636, -0.03929331, -0.01591621, 0.0056410446, -0.036902174, -0.056509133, -0.10841171, 0.07702632, -0.08160013, 0.040747657, -0.08348532, 0.019081287, 0.020851197)); + target += mul(na3, min16float4x4(-0.03399592, 0.10141488, -0.0077629937, -0.17129703, -0.025233645, 0.052428465, -0.019579021, -0.072962284, 0.022322712, -0.18443614, -0.00848578, 0.0376278, 0.055581484, 0.06439001, -0.026564457, 0.015072123)); + target += mul(nb3, min16float4x4(0.11295866, -0.1541795, 0.11074539, -0.12757398, -0.11353885, 0.12023232, -0.07913168, 0.25957996, -0.0064171744, 0.08077023, 0.09673833, 0.008732368, 0.03630595, 0.059769, 0.028521406, 0.029331883)); + target += mul(nc3, min16float4x4(-0.081345834, -0.06722959, -0.13713932, 0.03613845, -0.084334835, 0.046838246, -0.004890033, -0.08524675, 0.15460378, -0.09410546, -0.058240023, 0.11844812, 0.00092362246, 0.028734036, 0.0028451593, 0.03558664)); + target += mul(nd3, min16float4x4(0.067000724, 0.08689177, 0.003695697, 0.08341895, -0.08124141, -0.20499983, 0.09505712, -0.07436812, -0.028131844, 0.050506454, -0.107579716, 0.058785282, 0.031196257, 0.021408495, -0.100359544, 0.07999305)); + target += mul(ne3, min16float4x4(-0.16514844, 0.117525734, 0.24123909, 0.09518423, 0.17757961, -0.28094006, 0.081966326, 0.0802129, 0.0011662474, 0.06366135, 0.07578068, -0.08616794, 0.19857462, -0.10196374, -0.13831666, -0.18653043)); + target += mul(nf3, min16float4x4(-0.06649859, 0.0935902, -0.19097336, 0.16118656, 0.2938468, -0.10315292, 0.08256489, -0.06169784, -0.05889727, -0.018046174, -0.17596339, 0.20343648, -0.08962845, -0.027532624, 0.059598826, -0.14278376)); + target += mul(ng3, min16float4x4(-0.0070921015, -0.07634683, -0.066166356, -0.06432544, 0.050059035, 0.20213397, -0.071587585, 0.031234715, 0.10629024, 0.044645656, -0.023101477, -0.022136679, 0.009119783, -0.10172394, 0.024746796, -0.1161207)); + target += mul(nh3, min16float4x4(-0.046572298, -0.06981039, 0.08314394, 0.043344617, 0.1914716, 0.0046652057, -0.0683364, 0.086023554, 0.06213587, -0.0077511827, -0.03336288, 0.1474879, -0.032717533, 0.078666836, -0.001740435, 0.048321523)); + target += mul(ni3, min16float4x4(0.18346673, -0.20763724, 0.05431475, -0.08291483, -0.0073792376, -0.053458065, 0.08561732, -0.103502, -0.06856406, 0.05193988, -0.009717332, 0.06446446, 0.050632656, 0.013681985, -0.02556495, 0.05056843)); + target += min16float4(-0.01824226, 0.05140684, 0.010533643, 0.017739987); + conv1ups[gxy] = target; + + target = mul(a1, min16float4x4(0.070670135, -0.026429666, 0.09446684, -0.04920855, -0.08720965, -0.022478819, -0.15962029, 0.29240617, -0.10499224, 0.10415364, 0.11922523, -0.08293139, 0.07846739, -0.15612845, -0.19753109, -0.033664245)); + target += mul(b1, min16float4x4(-0.10003188, 0.088794544, -0.028137686, -0.1375475, 0.079632774, -0.012540568, -0.092962824, 0.10438857, -0.12865996, -0.040098958, -0.030862473, 0.009116932, -0.14513193, 0.13843827, -0.14862274, 0.27156416)); + target += mul(c1, min16float4x4(0.03148634, -0.13966283, -0.022684515, 0.080294125, -0.013548243, -0.01112399, 0.021930493, -0.24562296, -0.029252343, -0.0053704586, 0.088651545, -0.10468119, 0.0077052945, 0.027455118, -0.008439029, -0.08633876)); + target += mul(d1, min16float4x4(-0.073491044, 0.11097277, -0.02937573, 0.045977436, -0.015563786, 0.04763272, -0.17349051, 0.02479734, 0.12201058, -0.09606755, -0.064500526, -0.068423286, -0.10828311, 0.0025430934, 0.060595006, 0.10702606)); + target += mul(e1, min16float4x4(-0.10012673, -0.026187293, 0.039673958, 0.25377232, 0.16539277, 0.015475691, -0.017826023, -0.037547242, 0.27426562, 0.039105, -0.29495236, -0.20741108, 0.3893781, -0.00018520994, 0.18736628, 0.016120607)); + target += mul(f1, min16float4x4(-0.0902328, -0.035078812, 0.0423949, 0.10428684, -0.012309703, -0.0022217801, 0.12843162, 0.008824024, 0.10457806, -0.13958204, 0.042961385, -0.17798209, 0.13051195, -0.2078117, 0.014258071, 0.27743495)); + target += mul(g1, min16float4x4(0.0037268966, -0.002057136, -0.086700045, -0.04034686, -0.039582066, -0.05536445, -0.013854305, 0.13898304, 0.08383669, -0.1389377, 0.09724791, 0.27256468, 0.0012985421, 0.026786802, -0.09553305, -0.08505046)); + target += mul(h1, min16float4x4(0.047094945, -0.15165734, -0.16622189, 0.27696493, 0.04804586, 0.017589863, -0.048407666, -0.1423487, -0.18051605, -0.037678123, -0.083375834, 0.21356659, 0.056051373, 0.058305956, 0.020808164, 0.20114677)); + target += mul(i1, min16float4x4(-0.06873173, 0.056631878, -0.09389161, -0.026553899, -0.005246827, 0.011163956, 0.0807366, 0.018891184, 0.037806395, -0.08414753, -0.29572666, 0.12225136, 0.028108165, -0.12746434, -0.1242189, 0.06427617)); + target += mul(a2, min16float4x4(-0.054436807, 0.0463667, -0.3160585, -0.26496625, -0.0016307884, 0.0027304688, 0.13524249, 0.14023106, 0.15203272, -0.0055950717, -0.047067486, -0.1299749, -0.023347244, -0.011924935, 0.04708069, 0.14064)); + target += mul(b2, min16float4x4(-0.15567084, -0.03462954, 0.014766895, 0.28104082, -0.015955932, 0.048590813, 0.14149605, 0.016979203, 0.15654798, -0.124170296, -0.000571697, 0.18732761, -0.15969957, 0.036891263, -0.08222836, 0.007162299)); + target += mul(c2, min16float4x4(-0.027358167, -0.05515796, -0.21783291, -0.061588667, 0.14288566, 0.034540724, -0.0779948, -0.004935965, 0.087642424, -0.03457867, 0.26657468, -0.08798545, 0.06278833, 0.01650169, -0.15035287, 0.043133624)); + target += mul(d2, min16float4x4(0.05577383, 0.058146708, 0.0057744626, -0.043521628, 0.14279243, -0.22507532, 0.0896487, -0.03373711, -0.29882178, 0.12674153, 0.21856095, -0.03654502, 0.09770278, 0.011492664, 0.01397184, 0.11037485)); + target += mul(e2, min16float4x4(0.14057921, -0.18916433, -0.10062621, -0.19464967, -0.19286343, -0.08279728, 0.0062218676, -0.15246014, 0.0960211, -0.3964747, -0.016336296, 0.028859172, -0.047788087, 0.032031618, 0.054299697, -0.11431765)); + target += mul(f2, min16float4x4(-0.15350376, 0.1362609, -0.011803502, 0.2660655, -0.037387744, 0.18536955, -0.0015025261, -0.011900626, -0.023042146, -0.15995252, 0.060023192, 0.08954088, 0.07074839, 0.059100557, -0.08593189, -0.045180846)); + target += mul(g2, min16float4x4(-0.031948235, 0.07176401, -0.007034352, -0.12552954, 0.049458012, -0.07971771, 0.0093457, -0.10731874, 0.07024961, 0.27386668, 0.07679444, -0.28798524, -0.06428793, -0.0057761013, 0.014161652, -0.0065095956)); + target += mul(h2, min16float4x4(-0.1427731, 0.0833077, 0.13927783, 0.016691789, -0.16832228, 0.10298729, 0.1446675, -0.2656778, 0.0788247, 0.13420862, 0.050337754, -0.08008961, 0.07605825, 0.04659439, -0.054331373, 0.074493684)); + target += mul(i2, min16float4x4(0.07614274, -0.050090652, -0.066727035, 0.055715825, -0.07636078, 0.08155946, -0.061731443, -0.022193443, 0.057011697, -0.009381379, 0.176684, -0.05981099, -0.04690691, 0.051825907, -0.019666756, 0.0017494732)); + target += mul(a3, min16float4x4(0.12878093, -0.091072194, 0.03426444, -0.0014821129, 0.04648442, -0.056241687, 0.12965083, -0.2177644, 0.03271057, 0.013664906, -0.27382636, 0.009116637, -0.020398485, 0.026515692, 0.0059792865, -0.10869647)); + target += mul(b3, min16float4x4(0.017064014, 0.012380988, 0.015886486, 0.041969348, -0.056818817, 0.057386417, -0.19103225, 0.02042478, 0.022307403, -0.16955635, -0.25923833, -0.19144051, 0.044084065, 0.09931404, 0.08665806, -0.17140177)); + target += mul(c3, min16float4x4(-0.034919903, -0.00735085, -0.0040107057, 0.013110185, 0.008756165, -0.11104751, -0.03863784, 0.20081028, 0.008359515, 0.056265604, 0.0035791632, 0.14127707, 0.008306366, -0.061028276, -0.01180833, 0.11239347)); + target += mul(d3, min16float4x4(-0.055210557, -0.0047766017, -0.040911432, 0.04214669, 0.015301695, 0.035733294, -0.09534393, 0.3189227, -0.043539703, 0.10847848, 0.052175194, 0.25319937, -0.075755194, 0.07450996, -0.2392008, 0.17029741)); + target += mul(e3, min16float4x4(0.008697264, -0.062783785, 0.23503996, 0.06680282, -0.10700762, -0.05921618, 0.12575574, 0.12539467, 0.21779932, -0.27365687, -0.08419621, -0.23255387, -0.097952545, -0.33015022, -0.27839977, 0.54275817)); + target += mul(f3, min16float4x4(0.043178167, -0.07644931, -0.002126049, -0.0041748723, 0.12747553, 0.05624526, 0.08894693, 0.1273868, 0.13564228, -0.029284991, -0.1010155, 0.0144336475, -0.067769796, 0.12993337, 0.23458317, -0.1404509)); + target += mul(g3, min16float4x4(0.037086505, 0.04712714, 0.00080463936, 0.026554452, -0.032055024, -0.0346718, 0.14792679, 0.025423491, 0.045839246, 0.040022433, -0.010968567, -0.03638554, 0.03469138, -0.048995998, -0.080627054, -0.15703341)); + target += mul(h3, min16float4x4(0.0022719046, -0.11156194, -0.1660571, 0.07095863, 0.06325309, 0.03638195, 0.011129683, -0.16795434, 0.05859281, -0.050576515, 0.025492875, 0.14741158, 0.16042823, -0.021238782, -0.10693587, 0.062508605)); + target += mul(i3, min16float4x4(0.04699144, -0.06268154, -0.032550193, 0.1368816, -0.046266492, -0.09626834, 0.035877157, -0.017621659, -0.025884021, 0.016501589, -0.033517126, -0.16266182, 0.0063534426, -0.034565207, 0.107733876, -0.19080792)); + target += mul(na1, min16float4x4(-0.01089889, -0.046437796, -0.2864276, -0.059123863, 0.010273228, 0.035363402, -0.18365921, 0.002496715, 0.010531512, -0.044639286, -0.14159343, -0.04712995, 0.031355694, 0.041651487, 0.04172989, -0.072659165)); + target += mul(nb1, min16float4x4(-0.29903612, 0.016968794, 0.2026591, 0.14354537, 0.210121, -0.1271222, 0.11928214, 0.075612746, 0.07222206, -0.113600664, -0.031380497, -0.04970697, -0.040690526, -0.024844045, -0.14514743, 0.10170265)); + target += mul(nc1, min16float4x4(0.00901007, -0.0077540767, -0.16780637, -0.0772044, -0.08349278, 0.035623573, -0.0036132522, -0.1559422, 0.079474956, -0.024358552, 0.05147624, -0.095216155, -0.001963766, 0.026185913, 0.041633602, -0.068779185)); + target += mul(nd1, min16float4x4(0.11536367, 0.06698426, -0.019352471, -0.027348887, 0.12543406, -0.017715944, -0.22333942, -0.07524913, -0.023550004, 0.09020137, 0.15082505, -0.019156344, 0.014714152, -0.100751296, -0.10988814, 0.013269792)); + target += mul(ne1, min16float4x4(0.23938964, -0.015321653, -0.085038215, -0.21858668, -0.15793826, -0.1725926, 0.16878416, -0.15579711, -0.21086636, -0.023652412, -0.10312092, 0.047774162, 0.11063097, 0.02804365, -0.049057744, -0.20330532)); + target += mul(nf1, min16float4x4(0.058630574, 0.10365072, -0.112122595, -0.10462442, -0.04204145, 0.0060419035, -0.038622607, -0.22971797, -0.081746876, 0.110261, -0.03279762, 0.10083948, -0.07525642, 0.096350044, -0.15403591, 0.01831559)); + target += mul(ng1, min16float4x4(-0.013126955, 0.11560779, 0.06401061, -0.014257845, -0.078378044, 0.07452937, 0.030035159, 0.07133207, -0.072352365, -0.049404953, -0.2006817, -0.04745451, -0.0645119, 0.0849615, 0.053003483, 0.07766129)); + target += mul(nh1, min16float4x4(0.07683494, -0.47826648, 0.05708172, 0.12041683, 0.18084203, -0.08476069, 0.093064874, 0.016264802, 0.06801874, -0.01283242, -0.13347803, -0.035351828, -0.0011718989, -0.12699558, -0.0240836, -0.08060763)); + target += mul(ni1, min16float4x4(0.0521042, -0.062541164, 0.05483789, 0.14211908, 0.08606814, 0.06433033, -0.23270494, 0.05307593, 0.09299324, 0.04586578, -0.1193637, 0.12056507, -0.06442679, 0.06762315, -0.010547303, 0.031680685)); + target += mul(na2, min16float4x4(-0.09215318, -0.115724616, -0.061507307, 0.08273653, 0.0265886, -0.092683844, -0.22037667, -0.023114366, 0.028223295, -0.029118685, -0.088996224, 0.1023557, -0.089898214, 0.15436162, 0.16985597, 0.1431367)); + target += mul(nb2, min16float4x4(0.10560199, 0.13460231, 0.024534458, 0.1370791, 0.16920403, 0.013769043, -0.004941373, -0.22188903, -0.1193022, 0.07823969, -0.097713776, 0.044269208, 0.036816355, -0.11568587, -0.07947363, 0.022213666)); + target += mul(nc2, min16float4x4(0.002128253, 0.014331295, 0.09004623, -0.12958615, 0.0048723617, -0.072075516, 0.024190098, 0.011900665, 0.038696863, 0.07110043, -0.10347002, 0.082676366, 0.017796163, 0.004747536, 0.11188511, -0.21652836)); + target += mul(nd2, min16float4x4(-0.051317807, 0.13453357, 0.05310306, -0.033790052, -0.06231268, 0.11130248, -0.075370945, 0.2774124, 0.04305133, -0.045057327, -0.04373203, -0.10055409, 0.042824138, -0.021799369, -0.08762204, -0.16729161)); + target += mul(ne2, min16float4x4(0.08727262, -0.074025065, -0.113067836, -0.07882044, 0.04476854, -0.14519121, -0.0434838, -0.010525559, -0.0425304, 0.106957085, -0.28644025, -0.105096966, 0.12650728, -0.15108573, 0.013723224, 0.5163331)); + target += mul(nf2, min16float4x4(-0.021519013, -0.05317946, 0.0036545463, 0.0003156711, 0.12984163, -0.11362556, 0.061670557, -0.030158816, 0.04674806, 0.16352096, -0.23135264, 0.074876174, 0.0047455966, -0.120593436, 0.032926966, -0.20865184)); + target += mul(ng2, min16float4x4(-0.029197322, -0.09204084, -0.13026133, -0.020570219, 0.043402288, -0.016610064, 0.08961119, -0.09460752, -0.057213686, -0.14044005, 0.080606215, 0.12573113, 0.094055034, 0.06523493, -0.16264567, -0.0716556)); + target += mul(nh2, min16float4x4(-0.020557933, 0.077145614, 0.04620034, 0.22271551, 0.114781894, 0.11590448, -0.03233266, 0.13224865, -0.054499403, -0.01435028, -0.09684464, 0.022300925, 0.16768926, -0.019053463, 0.08804071, -0.14398381)); + target += mul(ni2, min16float4x4(0.0025323853, -0.016476262, 0.12608051, 0.016324151, -0.0035798363, 0.020308342, 0.06474364, -0.042083416, -0.08742628, 0.016960703, -0.120870225, 0.07373239, -0.06463355, -0.018745359, -0.02229239, -0.1039809)); + target += mul(na3, min16float4x4(-0.008440462, -0.15268475, -0.09420959, -0.07718843, 0.35601637, -0.0010803771, 0.050411247, -0.09859693, -0.008227993, 0.06407621, -0.19121973, -0.15547852, -0.033705134, 0.023920614, -0.12611681, 0.021967601)); + target += mul(nb3, min16float4x4(-0.24474435, 0.07716706, -0.24876165, -0.18184067, -0.020811914, 0.07414089, -0.21809489, 0.015727887, 0.12278457, -0.08471355, -0.06071567, -0.07017344, -0.064291485, -0.07627711, 0.076017715, 0.2072293)); + target += mul(nc3, min16float4x4(0.013676314, -0.04966636, 0.06895822, 0.15210962, 0.07330876, -0.034188077, -0.0173066, 0.11160374, -0.12326202, -0.002551885, 0.0015338673, 0.1079974, 0.03733164, 0.077835836, -0.07733004, -0.0058571417)); + target += mul(nd3, min16float4x4(-0.1854433, 0.02924247, -0.14843488, 0.18941449, -0.17652206, -0.13730201, -0.29041716, -0.12161381, -0.04599312, 0.16662349, 0.045855995, -0.005569671, -0.050993398, 0.019462017, -0.10552683, -0.19930908)); + target += mul(ne3, min16float4x4(0.08246259, 0.2602547, 0.16599776, -0.12149122, -0.048151806, 0.12042248, -0.16163243, 0.00087805535, 0.0536958, 0.05350576, 0.08406917, -0.060227945, 0.19056156, -0.2276745, -0.13755281, 0.39423308)); + target += mul(nf3, min16float4x4(-0.0775391, 0.105803244, 0.08474868, -0.019011196, 0.026801828, -0.036453005, -0.018443616, -0.03005072, -0.10748735, 0.080679856, 0.07718584, 0.07871323, 0.030023575, 0.022230582, -0.090973295, -0.1363233)); + target += mul(ng3, min16float4x4(-0.14770739, -0.09530047, 0.10400556, -0.115337685, 0.14459239, 0.1432794, -0.070606485, -0.053847175, 0.09378594, -0.09445331, 0.088633865, 0.071158156, 0.04437499, -0.04694172, -0.059354205, -0.00041449978)); + target += mul(nh3, min16float4x4(0.016041227, -0.2313572, -0.011389983, 0.030348316, 0.07260269, 0.009828401, -0.06116872, 0.026138552, -0.15607156, 0.042709354, 0.079162516, -0.16348995, -0.019872159, 0.13251646, 0.020712351, -0.16324571)); + target += mul(ni3, min16float4x4(-0.08813695, 0.093021385, 0.019460218, 0.096429825, -0.010391231, 0.0216966, -0.1490125, -0.04100963, -0.024641959, 0.044109546, 0.08043847, -0.03676336, -0.026315603, 0.025947884, -0.10771212, 0.0010732685)); + target += min16float4(0.003290131, -0.0154397, 0.04528908, -0.04218369); + conv1ups1[gxy] = target; +} + +//!PASS 8 +//!DESC Conv-3x3x3x16 +//!IN INPUT, conv1ups, conv1ups1 +//!OUT OUTPUT +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 + +void Pass8(uint2 blockStart, uint3 threadId) { + uint2 gxy = Rmp8x8(threadId.x) + blockStart; + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { + return; + } + + const float2 outputPt = GetOutputPt(); + const float2 pos = (gxy + 0.5f) * outputPt; + + // [ a, d, g ] + // [ b, e, h ] + // [ c, f, i ] + min16float4 a1 = conv1ups.SampleLevel(sam, pos - outputPt, 0); + min16float4 b1 = conv1ups.SampleLevel(sam, pos + float2(-outputPt.x, 0), 0); + min16float4 c1 = conv1ups.SampleLevel(sam, pos + float2(-outputPt.x, outputPt.y), 0); + min16float4 d1 = conv1ups.SampleLevel(sam, pos + float2(0, -outputPt.y), 0); + min16float4 e1 = conv1ups.SampleLevel(sam, pos, 0); + min16float4 f1 = conv1ups.SampleLevel(sam, pos + float2(0, outputPt.y), 0); + min16float4 g1 = conv1ups.SampleLevel(sam, pos + float2(outputPt.x, -outputPt.y), 0); + min16float4 h1 = conv1ups.SampleLevel(sam, pos + float2(outputPt.x, 0), 0); + min16float4 i1 = conv1ups.SampleLevel(sam, pos + outputPt, 0); + + min16float4 na1 = max(-a1, 0); + min16float4 nb1 = max(-b1, 0); + min16float4 nc1 = max(-c1, 0); + min16float4 nd1 = max(-d1, 0); + min16float4 ne1 = max(-e1, 0); + min16float4 nf1 = max(-f1, 0); + min16float4 ng1 = max(-g1, 0); + min16float4 nh1 = max(-h1, 0); + min16float4 ni1 = max(-i1, 0); + + a1 = max(a1, 0); + b1 = max(b1, 0); + c1 = max(c1, 0); + d1 = max(d1, 0); + e1 = max(e1, 0); + f1 = max(f1, 0); + g1 = max(g1, 0); + h1 = max(h1, 0); + i1 = max(i1, 0); + + min16float4 a2 = conv1ups1.SampleLevel(sam, pos - outputPt, 0); + min16float4 b2 = conv1ups1.SampleLevel(sam, pos + float2(-outputPt.x, 0), 0); + min16float4 c2 = conv1ups1.SampleLevel(sam, pos + float2(-outputPt.x, outputPt.y), 0); + min16float4 d2 = conv1ups1.SampleLevel(sam, pos + float2(0, -outputPt.y), 0); + min16float4 e2 = conv1ups1.SampleLevel(sam, pos, 0); + min16float4 f2 = conv1ups1.SampleLevel(sam, pos + float2(0, outputPt.y), 0); + min16float4 g2 = conv1ups1.SampleLevel(sam, pos + float2(outputPt.x, -outputPt.y), 0); + min16float4 h2 = conv1ups1.SampleLevel(sam, pos + float2(outputPt.x, 0), 0); + min16float4 i2 = conv1ups1.SampleLevel(sam, pos + outputPt, 0); + + min16float4 na2 = max(-a2, 0); + min16float4 nb2 = max(-b2, 0); + min16float4 nc2 = max(-c2, 0); + min16float4 nd2 = max(-d2, 0); + min16float4 ne2 = max(-e2, 0); + min16float4 nf2 = max(-f2, 0); + min16float4 ng2 = max(-g2, 0); + min16float4 nh2 = max(-h2, 0); + min16float4 ni2 = max(-i2, 0); + + a2 = max(a2, 0); + b2 = max(b2, 0); + c2 = max(c2, 0); + d2 = max(d2, 0); + e2 = max(e2, 0); + f2 = max(f2, 0); + g2 = max(g2, 0); + h2 = max(h2, 0); + i2 = max(i2, 0); + + min16float3 target = mul(a1, min16float4x3(-0.009692998, -0.008524317, 0.0010432196, 0.00057165127, -0.011818117, 0.0014487396, 0.0049518407, -0.001888361, -0.013262905, 0.05004511, 0.023134997, -0.016969386)); + target += mul(b1, min16float4x3(0.008501838, -0.001176035, -0.0035942376, 0.009015378, 0.011752493, 0.0061198603, -0.056669727, -0.035067406, -0.040517025, -0.039194923, 0.007251104, -0.0124227265)); + target += mul(c1, min16float4x3(0.010942934, 0.0100984, 0.0133265015, -0.019482462, -0.014820488, -0.021098822, -0.02860967, -0.10633767, -0.03296336, -0.011277147, -0.007915212, 0.008589044)); + target += mul(d1, min16float4x3(-0.004447993, -0.0019008318, 0.0054705385, -0.008042658, -0.0007432871, -0.0091506895, 0.010537624, 0.047716837, 0.01504048, -0.108882375, -0.06776622, -0.04354868)); + target += mul(e1, min16float4x3(-0.0030183722, 0.007729766, -0.007144855, 0.029383881, 0.024865916, 0.028182652, 0.16122057, 0.16675095, 0.18204775, 0.12284804, 0.031072017, 0.042543165)); + target += mul(f1, min16float4x3(0.0012941018, -0.00043673834, 0.009252594, 0.009156994, 0.0138289975, 0.015774839, -0.051840767, -0.07687406, -0.069361895, 0.017338578, 0.022834148, -0.0025963243)); + target += mul(g1, min16float4x3(0.01646397, 0.0028061832, 0.007990534, -0.0073729097, -0.011168949, -0.0024975399, -0.0066431006, -0.014508122, -0.005740217, -0.06746655, -0.02083968, -0.05371696)); + target += mul(h1, min16float4x3(-0.013606154, 0.0062064505, 0.008410423, 0.0038487792, 0.012054022, 0.007878108, 0.034913104, -0.008084116, 0.014990575, -0.005912989, 0.021872269, 0.055241022)); + target += mul(i1, min16float4x3(0.014251287, 0.0016604483, -0.006772879, 0.0028646574, 0.0015996173, -0.002210879, -0.0323296, 0.015729006, -0.017242312, -0.03718726, -0.03889927, -0.041001298)); + target += mul(a2, min16float4x3(0.007536155, 0.009848646, 0.007846354, 0.019176869, 0.019928271, 0.031777207, 0.026086887, 0.01971131, -0.017595863, 0.012899679, 0.0026994154, 0.008934449)); + target += mul(b2, min16float4x3(0.017639438, 0.01536491, 0.011161806, 0.034244597, 0.025257796, 0.031185368, -0.18240982, 0.038758054, 0.13050976, -0.0075258785, -0.0034674285, 0.008525112)); + target += mul(c2, min16float4x3(0.01788933, 0.017623115, 0.020215526, 0.0045994874, -0.0031487814, 0.003752946, -0.06494309, -0.07747321, 0.06544584, -0.004555707, -0.001776991, -0.017493976)); + target += mul(d2, min16float4x3(0.01359033, 0.02045422, 0.008234278, -0.008073938, -0.036093507, -0.0027978886, -0.37033105, 0.009709281, 0.28951523, 0.003258166, 0.0044517294, -0.003740991)); + target += mul(e2, min16float4x3(-0.036449786, -0.03035285, -0.025356997, 0.097153045, 0.10745537, 0.08421458, 0.043944303, -0.004867672, -0.15142196, 0.007044417, -0.00785739, 0.007504869)); + target += mul(f2, min16float4x3(-0.007951127, -0.008863303, -0.012213915, 0.007273406, 0.00944796, -0.002621692, 0.2919848, 0.06830943, -0.16119143, -0.0033908382, 0.007383878, 0.007847461)); + target += mul(g2, min16float4x3(0.011670784, 0.00805604, 0.013980011, -0.032067183, -0.045659855, -0.03957935, 0.14678614, 0.014678316, -0.11203954, -0.002894618, 0.008089503, 0.0056759617)); + target += mul(h2, min16float4x3(0.008941132, -0.008732514, -0.004122878, -0.01872218, 0.0058594598, -0.014218105, 0.15922345, -0.00061763515, -0.10605325, 0.0059564817, 0.0062196897, -0.0031137357)); + target += mul(i2, min16float4x3(-0.027044835, -0.0113663385, -0.018061407, -0.01064461, 0.0004394501, 0.0068360637, 0.12218274, -0.025980305, 0.060082816, 0.002298275, -0.005121948, -0.0018933173)); + target += mul(na1, min16float4x3(-0.014044151, -0.0055593867, -0.0091519095, 0.018282808, -0.054974634, -0.02104256, 0.004737865, 0.009833153, 0.0050819647, 0.009256364, 0.004517343, -0.0012567915)); + target += mul(nb1, min16float4x3(0.035084303, 0.019331766, -0.006399992, -0.08042094, -0.14020248, -0.13438301, -0.0014871466, -0.0071605383, -0.0070841024, 0.001705956, -0.010914731, -0.0022737188)); + target += mul(nc1, min16float4x3(-0.024562238, -0.025555398, 0.00043982622, 0.04687896, 0.062265635, 0.06194832, 0.016357735, 0.0056735775, 0.01868422, 0.0035063815, 0.0050708377, 0.009102912)); + target += mul(nd1, min16float4x3(0.024276884, 0.031309772, 0.053946678, 0.027081756, 0.023922514, 0.051302873, -0.005081098, -0.013981954, -0.007141123, -0.017242068, -0.00036468913, 0.0071311933)); + target += mul(ne1, min16float4x3(0.096000426, 0.12978247, 0.089689955, 0.03013154, 0.09065384, 0.010782777, -0.009774296, -0.010487119, -0.018002238, 0.027585275, 0.018800229, 0.007482455)); + target += mul(nf1, min16float4x3(-0.031725004, -0.05638542, -0.06471826, -0.038512804, -0.036520924, -0.026658544, 0.0019714478, 0.004168433, 0.0036675548, 0.009312959, -0.009726487, 0.003937418)); + target += mul(ng1, min16float4x3(0.008056586, -0.03609238, -0.0035044104, -0.0052967947, 0.010446542, 0.010737699, -0.00941154, -0.005599727, -0.0071648047, 0.0028106347, 0.0063315486, 0.0005620387)); + target += mul(nh1, min16float4x3(-0.10104362, -0.06228799, -0.057575073, -0.0008651546, -0.010849562, -0.0066441186, -0.016244762, -0.0053532585, -0.012414173, -0.012507298, 0.005470365, 0.0032063425)); + target += mul(ni1, min16float4x3(-0.019126823, -0.022827078, -0.01918732, -0.0049576303, -0.010899637, -0.01990915, 0.019013962, 0.007385637, 0.015615745, 0.025586424, 0.02317941, 0.019631773)); + target += mul(na2, min16float4x3(-0.011578009, -0.0037521352, -0.0044622095, -0.0022668878, 0.0022691146, -0.00570573, 0.0052153515, 0.005547525, 0.0033032992, 0.009927488, -0.0061824876, -0.016856432)); + target += mul(nb2, min16float4x3(-0.07627339, -0.0595728, -0.08247348, -0.016201988, -0.019643232, -0.021891698, -0.0033560628, 0.0056153075, 0.005510208, -0.0061155884, 0.004726241, 0.03613314)); + target += mul(nc2, min16float4x3(-0.026918657, -0.017315133, -0.021586075, -0.021625597, -0.008547036, -0.011233614, -0.0047514364, -0.0029167454, -0.00583421, 0.012949899, 0.0035817428, -0.0045735473)); + target += mul(nd2, min16float4x3(-0.08581085, -0.07063111, -0.06381294, -0.0040735947, -0.012934923, -0.0057904166, -0.0077691195, -0.00034605907, 0.0023017807, -0.00029635165, -0.042357627, -0.057994146)); + target += mul(ne2, min16float4x3(0.05193261, 0.047533646, 0.071092665, -0.015042884, -0.023481138, -0.020945435, 0.008216166, 0.004034294, 0.0030410702, 0.10532969, 0.13052966, 0.11042539)); + target += mul(nf2, min16float4x3(0.052652936, 0.045103617, 0.036393207, 0.0018712351, -0.009865708, -0.00591473, -0.0008652197, 7.966737e-05, -0.004292879, -0.013765752, -0.0603564, 0.032057546)); + target += mul(ng2, min16float4x3(0.0020095943, -0.014555452, -0.008721001, 0.00085926603, -0.0012287357, 0.007974135, 0.004697991, -1.4738258e-05, -0.0048043244, 0.047545042, 0.099660076, 0.09649951)); + target += mul(nh2, min16float4x3(0.024352267, 0.03303334, 0.02903438, 0.0062978864, 0.014672455, 0.0043003284, -0.0017531263, -0.0032476797, 0.001345206, -0.20736417, -0.1745426, -0.32957983)); + target += mul(ni2, min16float4x3(0.027512033, 0.029760962, 0.033007182, 9.0356014e-05, 0.0061743665, 0.0036443318, -0.016802983, -0.019364875, -0.014311061, 0.021530075, 0.059616566, 0.07120056)); + target += min16float3(-0.0007544955, -0.0007692414, 0.00032997545); + + OUTPUT[gxy] = float4(target + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); +} diff --git a/src/Effects/Anime4K/Anime4K_Upscale_L.hlsl b/src/Effects/Anime4K/Anime4K_Upscale_L.hlsl index 55f6c9ed7..6147b1063 100644 --- a/src/Effects/Anime4K/Anime4K_Upscale_L.hlsl +++ b/src/Effects/Anime4K/Anime4K_Upscale_L.hlsl @@ -2,22 +2,17 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Upscale/Anime4K_Upscale_CNN_x2_L.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 +//!VERSION 4 //!SORT_NAME Anime4K_Upscale_1 //!TEXTURE Texture2D INPUT; -//!SAMPLER -//!FILTER POINT -SamplerState sam; - -//!SAMPLER -//!FILTER LINEAR -SamplerState sam1; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; //!TEXTURE //!WIDTH INPUT_WIDTH @@ -43,6 +38,14 @@ Texture2D tex3; //!FORMAT R16G16B16A16_FLOAT Texture2D tex4; +//!SAMPLER +//!FILTER POINT +SamplerState sam; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam1; + //!PASS 1 //!DESC Conv-4x3x3x3 @@ -446,12 +449,15 @@ void Pass3(uint2 blockStart, uint3 threadId) { //!PASS 4 //!DESC Conv-4x3x3x16, Depth-to-Space //!IN INPUT, tex1, tex2 +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 void Pass4(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -638,23 +644,17 @@ void Pass4(uint2 blockStart, uint3 threadId) { float2 outputPt = GetOutputPt(); pos -= 0.5f * outputPt; - WriteToOutput(gxy, float3(target1.x, target2.x, target3.x) + INPUT.SampleLevel(sam1, pos, 0).rgb); + OUTPUT[gxy] = float4(float3(target1.x, target2.x, target3.x) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); - gxy.x += 1u; + ++gxy.x; pos.x += outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.y, target2.y, target3.y) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(float3(target1.y, target2.y, target3.y) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); - gxy.y += 1u; + ++gxy.y; pos.y += outputPt.y; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.w, target2.w, target3.w) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(float3(target1.w, target2.w, target3.w) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); - gxy.x -= 1u; + --gxy.x; pos.x -= outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.z, target2.z, target3.z) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(float3(target1.z, target2.z, target3.z) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Upscale_S.hlsl b/src/Effects/Anime4K/Anime4K_Upscale_S.hlsl index 4c9cb9b83..4030ed2f1 100644 --- a/src/Effects/Anime4K/Anime4K_Upscale_S.hlsl +++ b/src/Effects/Anime4K/Anime4K_Upscale_S.hlsl @@ -2,15 +2,18 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Upscale/Anime4K_Upscale_CNN_x2_S.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 +//!VERSION 4 //!SORT_NAME Anime4K_Upscale_0 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + //!TEXTURE //!WIDTH INPUT_WIDTH //!HEIGHT INPUT_HEIGHT @@ -238,6 +241,7 @@ void Pass3(uint2 blockStart, uint3 threadId) { //!PASS 4 //!DESC Conv-4x3x3x8, Depth-to-Space //!IN INPUT, tex1 +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -282,7 +286,8 @@ float4 A4KS4(float2 pos) { void Pass4(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -293,23 +298,17 @@ void Pass4(uint2 blockStart, uint3 threadId) { float4 c = A4KS4(pos); pos -= 0.5f * outputPt; - WriteToOutput(gxy, c.x + INPUT.SampleLevel(sam1, pos, 0).rgb); - - gxy.x += 1u; + OUTPUT[gxy] = float4(c.x + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); + + ++gxy.x; pos.x += outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, c.y + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(c.y + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); - gxy.y += 1u; + ++gxy.y; pos.y += outputPt.y; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, c.w + INPUT.SampleLevel(sam1, pos, 0).rgb); - } - - gxy.x -= 1u; + OUTPUT[gxy] = float4(c.w + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); + + --gxy.x; pos.x -= outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, c.z + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(c.z + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Upscale_UL.hlsl b/src/Effects/Anime4K/Anime4K_Upscale_UL.hlsl index f03ee4d64..c65991785 100644 --- a/src/Effects/Anime4K/Anime4K_Upscale_UL.hlsl +++ b/src/Effects/Anime4K/Anime4K_Upscale_UL.hlsl @@ -2,22 +2,17 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/78e4f78f65b772e94bae6e7db5c49af1e889f784/glsl/Upscale/Anime4K_Upscale_CNN_x2_UL.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 +//!VERSION 4 //!SORT_NAME Anime4K_Upscale_3 //!TEXTURE Texture2D INPUT; -//!SAMPLER -//!FILTER POINT -SamplerState sam; - -//!SAMPLER -//!FILTER LINEAR -SamplerState sam1; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; //!TEXTURE //!WIDTH INPUT_WIDTH @@ -145,6 +140,15 @@ Texture2D conv2d_6_tf1; //!FORMAT R16G16B16A16_FLOAT Texture2D conv2d_6_tf2; +//!SAMPLER +//!FILTER POINT +SamplerState sam; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam1; + + //!PASS 1 //!DESC Conv-4x3x3x3 //!IN INPUT @@ -1929,12 +1933,15 @@ void Pass7(uint2 blockStart, uint3 threadId) { //!PASS 8 //!DESC Conv-4x1x1x120, Depth-to-Space //!IN INPUT, conv2d_2_tf, conv2d_2_tf1, conv2d_2_tf2, conv2d_3_tf, conv2d_3_tf1, conv2d_3_tf2, conv2d_4_tf, conv2d_4_tf1, conv2d_4_tf2, conv2d_5_tf, conv2d_5_tf1, conv2d_5_tf2, conv2d_6_tf, conv2d_6_tf1, conv2d_6_tf2 +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 void Pass8(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -2088,23 +2095,17 @@ void Pass8(uint2 blockStart, uint3 threadId) { float2 outputPt = GetOutputPt(); pos -= 0.5f * outputPt; - WriteToOutput(gxy, float3(target1.x, target2.x, target3.x) + INPUT.SampleLevel(sam1, pos, 0).rgb); + OUTPUT[gxy] = float4(float3(target1.x, target2.x, target3.x) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); - gxy.x += 1u; + ++gxy.x; pos.x += outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.y, target2.y, target3.y) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } - - gxy.y += 1u; + OUTPUT[gxy] = float4(float3(target1.y, target2.y, target3.y) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); + + ++gxy.y; pos.y += outputPt.y; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.w, target2.w, target3.w) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(float3(target1.w, target2.w, target3.w) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); - gxy.x -= 1u; + --gxy.x; pos.x -= outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.z, target2.z, target3.z) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(float3(target1.z, target2.z, target3.z) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); } diff --git a/src/Effects/Anime4K/Anime4K_Upscale_VL.hlsl b/src/Effects/Anime4K/Anime4K_Upscale_VL.hlsl index a0337884d..bb8f1ca78 100644 --- a/src/Effects/Anime4K/Anime4K_Upscale_VL.hlsl +++ b/src/Effects/Anime4K/Anime4K_Upscale_VL.hlsl @@ -2,22 +2,17 @@ // 移植自 https://github.com/bloc97/Anime4K/blob/78e4f78f65b772e94bae6e7db5c49af1e889f784/glsl/Upscale/Anime4K_Upscale_CNN_x2_VL.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 +//!VERSION 4 //!SORT_NAME Anime4K_Upscale_2 //!TEXTURE Texture2D INPUT; -//!SAMPLER -//!FILTER POINT -SamplerState sam; - -//!SAMPLER -//!FILTER LINEAR -SamplerState sam1; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; //!TEXTURE //!WIDTH INPUT_WIDTH @@ -103,6 +98,15 @@ Texture2D conv2d_6_tf; //!FORMAT R16G16B16A16_FLOAT Texture2D conv2d_6_tf1; +//!SAMPLER +//!FILTER POINT +SamplerState sam; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam1; + + //!PASS 1 //!DESC Conv-4x3x3x3 //!IN INPUT @@ -1143,12 +1147,15 @@ void Pass7(uint2 blockStart, uint3 threadId) { //!PASS 8 //!DESC Conv-4x1x1x112, Depth-to-Space //!IN INPUT, conv2d_tf, conv2d_tf1, conv2d_1_tf, conv2d_1_tf1, conv2d_2_tf, conv2d_2_tf1, conv2d_3_tf, conv2d_3_tf1, conv2d_4_tf, conv2d_4_tf1, conv2d_5_tf, conv2d_5_tf1, conv2d_6_tf, conv2d_6_tf1 +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 void Pass8(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -1293,23 +1300,17 @@ void Pass8(uint2 blockStart, uint3 threadId) { float2 outputPt = GetOutputPt(); pos -= 0.5f * outputPt; - WriteToOutput(gxy, float3(target1.x, target2.x, target3.x) + INPUT.SampleLevel(sam1, pos, 0).rgb); + OUTPUT[gxy] = float4(float3(target1.x, target2.x, target3.x) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); - gxy.x += 1u; + ++gxy.x; pos.x += outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.y, target2.y, target3.y) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } - - gxy.y += 1u; + OUTPUT[gxy] = float4(float3(target1.y, target2.y, target3.y) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); + + ++gxy.y; pos.y += outputPt.y; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.w, target2.w, target3.w) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(float3(target1.w, target2.w, target3.w) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); - gxy.x -= 1u; + --gxy.x; pos.x -= outputPt.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(target1.z, target2.z, target3.z) + INPUT.SampleLevel(sam1, pos, 0).rgb); - } + OUTPUT[gxy] = float4(float3(target1.z, target2.z, target3.z) + INPUT.SampleLevel(sam1, pos, 0).rgb, 1); } diff --git a/src/Effects/Bicubic.hlsl b/src/Effects/Bicubic.hlsl index 14450415f..b4f2ce710 100644 --- a/src/Effects/Bicubic.hlsl +++ b/src/Effects/Bicubic.hlsl @@ -2,8 +2,7 @@ // 移植自 https://github.com/ActualMandM/cemu_graphic_packs/blob/468d165cf27dae13a06e8bdc3d588d0af775ad91/Filters/Bicubic/output.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!GENERIC_DOWNSCALER +//!VERSION 4 //!PARAMETER @@ -27,6 +26,9 @@ float paramC; //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; + //!SAMPLER //!FILTER LINEAR SamplerState sam; @@ -35,7 +37,7 @@ SamplerState sam; //!PASS 1 //!STYLE PS //!IN INPUT - +//!OUT OUTPUT float weight(float x) { const float B = paramB; @@ -93,20 +95,20 @@ float4 Pass1(float2 pos) { int2 coord_top_left = int2(max(uv0 * inputSize, 0.5)); int2 coord_bottom_right = int2(min(uv3 * inputSize, inputSize - 0.5)); - float4 top = INPUT.Load(int3(coord_top_left, 0)) * rowtaps.x; - top += INPUT.SampleLevel(sam, float2(u_middle, uv0.y), 0) * u_weight_sum; - top += INPUT.Load(int3(coord_bottom_right.x, coord_top_left.y, 0)) * rowtaps.w; - float4 total = top * coltaps.x; + float3 top = INPUT.Load(int3(coord_top_left, 0)).rgb * rowtaps.x; + top += INPUT.SampleLevel(sam, float2(u_middle, uv0.y), 0).rgb * u_weight_sum; + top += INPUT.Load(int3(coord_bottom_right.x, coord_top_left.y, 0)).rgb * rowtaps.w; + float3 total = top * coltaps.x; - float4 middle = INPUT.SampleLevel(sam, float2(uv0.x, v_middle), 0) * rowtaps.x; - middle += INPUT.SampleLevel(sam, float2(u_middle, v_middle), 0) * u_weight_sum; - middle += INPUT.SampleLevel(sam, float2(uv3.x, v_middle), 0) * rowtaps.w; + float3 middle = INPUT.SampleLevel(sam, float2(uv0.x, v_middle), 0).rgb * rowtaps.x; + middle += INPUT.SampleLevel(sam, float2(u_middle, v_middle), 0).rgb * u_weight_sum; + middle += INPUT.SampleLevel(sam, float2(uv3.x, v_middle), 0).rgb * rowtaps.w; total += middle * v_weight_sum; - float4 bottom = INPUT.Load(int3(coord_top_left.x, coord_bottom_right.y, 0)) * rowtaps.x; - bottom += INPUT.SampleLevel(sam, float2(u_middle, uv3.y), 0) * u_weight_sum; - bottom += INPUT.Load(int3(coord_bottom_right, 0)) * rowtaps.w; + float3 bottom = INPUT.Load(int3(coord_top_left.x, coord_bottom_right.y, 0)).rgb * rowtaps.x; + bottom += INPUT.SampleLevel(sam, float2(u_middle, uv3.y), 0).rgb * u_weight_sum; + bottom += INPUT.Load(int3(coord_bottom_right, 0)).rgb * rowtaps.w; total += bottom * coltaps.w; - return total; + return float4(total, 1); } diff --git a/src/Effects/Bilinear.hlsl b/src/Effects/Bilinear.hlsl index c7b7f44da..d8a3dd0db 100644 --- a/src/Effects/Bilinear.hlsl +++ b/src/Effects/Bilinear.hlsl @@ -1,20 +1,20 @@ //!MAGPIE EFFECT -//!VERSION 3 -//!GENERIC_DOWNSCALER - +//!VERSION 4 //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; + //!SAMPLER //!FILTER LINEAR SamplerState sam; - //!PASS 1 //!STYLE PS //!IN INPUT - +//!OUT OUTPUT float4 Pass1(float2 pos) { return INPUT.SampleLevel(sam, pos, 0); } diff --git a/src/Effects/CAS/CAS.hlsl b/src/Effects/CAS/CAS.hlsl index 0f09c154e..fc38bebb3 100644 --- a/src/Effects/CAS/CAS.hlsl +++ b/src/Effects/CAS/CAS.hlsl @@ -1,9 +1,8 @@ // 移植自 https://github.com/GPUOpen-Effects/FidelityFX-CAS/blob/master/ffx-cas/ffx_cas.h //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 + //!PARAMETER //!LABEL Sharpness @@ -16,6 +15,11 @@ float sharpness; //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -23,6 +27,7 @@ SamplerState sam; //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -35,254 +40,244 @@ SamplerState sam; #ifdef MP_FP16 void CasFilterH( - MF3 src[4][4], - uint pos, - MF peak, - // Output values are for 2 8x8 tiles in a 16x8 region. - // pix.x = right 8x8 tile - // pix.y = left 8x8 tile - // This enables later processing to easily be packed as well. - out MF2 pixR, - out MF2 pixG, - out MF2 pixB + MF3 src[4][4], + uint pos, + MF peak, + // Output values are for 2 8x8 tiles in a 16x8 region. + // pix.x = right 8x8 tile + // pix.y = left 8x8 tile + // This enables later processing to easily be packed as well. + out MF2 pixR, + out MF2 pixG, + out MF2 pixB ) { - // AOS to SOA conversion. - MF2 aR = MF2(src[0][pos + 0].r, src[1][pos + 0].r); - MF2 aG = MF2(src[0][pos + 0].g, src[1][pos + 0].g); - MF2 aB = MF2(src[0][pos + 0].b, src[1][pos + 0].b); - MF2 bR = MF2(src[1][pos + 0].r, src[2][pos + 0].r); - MF2 bG = MF2(src[1][pos + 0].g, src[2][pos + 0].g); - MF2 bB = MF2(src[1][pos + 0].b, src[2][pos + 0].b); - MF2 cR = MF2(src[2][pos + 0].r, src[3][pos + 0].r); - MF2 cG = MF2(src[2][pos + 0].g, src[3][pos + 0].g); - MF2 cB = MF2(src[2][pos + 0].b, src[3][pos + 0].b); - MF2 dR = MF2(src[0][pos + 1].r, src[1][pos + 1].r); - MF2 dG = MF2(src[0][pos + 1].g, src[1][pos + 1].g); - MF2 dB = MF2(src[0][pos + 1].b, src[1][pos + 1].b); - MF2 eR = MF2(src[1][pos + 1].r, src[2][pos + 1].r); - MF2 eG = MF2(src[1][pos + 1].g, src[2][pos + 1].g); - MF2 eB = MF2(src[1][pos + 1].b, src[2][pos + 1].b); - MF2 fR = MF2(src[2][pos + 1].r, src[3][pos + 1].r); - MF2 fG = MF2(src[2][pos + 1].g, src[3][pos + 1].g); - MF2 fB = MF2(src[2][pos + 1].b, src[3][pos + 1].b); - MF2 gR = MF2(src[0][pos + 2].r, src[1][pos + 2].r); - MF2 gG = MF2(src[0][pos + 2].g, src[1][pos + 2].g); - MF2 gB = MF2(src[0][pos + 2].b, src[1][pos + 2].b); - MF2 hR = MF2(src[1][pos + 2].r, src[2][pos + 2].r); - MF2 hG = MF2(src[1][pos + 2].g, src[2][pos + 2].g); - MF2 hB = MF2(src[1][pos + 2].b, src[2][pos + 2].b); - MF2 iR = MF2(src[2][pos + 2].r, src[3][pos + 2].r); - MF2 iG = MF2(src[2][pos + 2].g, src[3][pos + 2].g); - MF2 iB = MF2(src[2][pos + 2].b, src[3][pos + 2].b); - - // Soft min and max. - MF2 mnR = min(min(fR, hR), min(min(bR, dR), eR)); - MF2 mnG = min(min(fG, hG), min(min(bG, dG), eG)); - MF2 mnB = min(min(fB, hB), min(min(bB, dB), eB)); + // AOS to SOA conversion. + MF2 aR = MF2(src[0][pos + 0].r, src[1][pos + 0].r); + MF2 aG = MF2(src[0][pos + 0].g, src[1][pos + 0].g); + MF2 aB = MF2(src[0][pos + 0].b, src[1][pos + 0].b); + MF2 bR = MF2(src[1][pos + 0].r, src[2][pos + 0].r); + MF2 bG = MF2(src[1][pos + 0].g, src[2][pos + 0].g); + MF2 bB = MF2(src[1][pos + 0].b, src[2][pos + 0].b); + MF2 cR = MF2(src[2][pos + 0].r, src[3][pos + 0].r); + MF2 cG = MF2(src[2][pos + 0].g, src[3][pos + 0].g); + MF2 cB = MF2(src[2][pos + 0].b, src[3][pos + 0].b); + MF2 dR = MF2(src[0][pos + 1].r, src[1][pos + 1].r); + MF2 dG = MF2(src[0][pos + 1].g, src[1][pos + 1].g); + MF2 dB = MF2(src[0][pos + 1].b, src[1][pos + 1].b); + MF2 eR = MF2(src[1][pos + 1].r, src[2][pos + 1].r); + MF2 eG = MF2(src[1][pos + 1].g, src[2][pos + 1].g); + MF2 eB = MF2(src[1][pos + 1].b, src[2][pos + 1].b); + MF2 fR = MF2(src[2][pos + 1].r, src[3][pos + 1].r); + MF2 fG = MF2(src[2][pos + 1].g, src[3][pos + 1].g); + MF2 fB = MF2(src[2][pos + 1].b, src[3][pos + 1].b); + MF2 gR = MF2(src[0][pos + 2].r, src[1][pos + 2].r); + MF2 gG = MF2(src[0][pos + 2].g, src[1][pos + 2].g); + MF2 gB = MF2(src[0][pos + 2].b, src[1][pos + 2].b); + MF2 hR = MF2(src[1][pos + 2].r, src[2][pos + 2].r); + MF2 hG = MF2(src[1][pos + 2].g, src[2][pos + 2].g); + MF2 hB = MF2(src[1][pos + 2].b, src[2][pos + 2].b); + MF2 iR = MF2(src[2][pos + 2].r, src[3][pos + 2].r); + MF2 iG = MF2(src[2][pos + 2].g, src[3][pos + 2].g); + MF2 iB = MF2(src[2][pos + 2].b, src[3][pos + 2].b); + + // Soft min and max. + MF2 mnR = min(min(fR, hR), min(min(bR, dR), eR)); + MF2 mnG = min(min(fG, hG), min(min(bG, dG), eG)); + MF2 mnB = min(min(fB, hB), min(min(bB, dB), eB)); #ifdef CAS_BETTER_DIAGONALS - MF2 mnR2 = min(min(gR, iR), min(min(aR, cR), mnR)); - MF2 mnG2 = min(min(gG, iG), min(min(aG, cG), mnG)); - MF2 mnB2 = min(min(gB, iB), min(min(aB, cB), mnB)); - mnR = mnR + mnR2; - mnG = mnG + mnG2; - mnB = mnB + mnB2; + MF2 mnR2 = min(min(gR, iR), min(min(aR, cR), mnR)); + MF2 mnG2 = min(min(gG, iG), min(min(aG, cG), mnG)); + MF2 mnB2 = min(min(gB, iB), min(min(aB, cB), mnB)); + mnR = mnR + mnR2; + mnG = mnG + mnG2; + mnB = mnB + mnB2; #endif - MF2 mxR = max(max(fR, hR), max(max(bR, dR), eR)); - MF2 mxG = max(max(fG, hG), max(max(bG, dG), eG)); - MF2 mxB = max(max(fB, hB), max(max(bB, dB), eB)); + MF2 mxR = max(max(fR, hR), max(max(bR, dR), eR)); + MF2 mxG = max(max(fG, hG), max(max(bG, dG), eG)); + MF2 mxB = max(max(fB, hB), max(max(bB, dB), eB)); #ifdef CAS_BETTER_DIAGONALS - MF2 mxR2 = max(max(gR, iR), max(max(aR, cR), mxR)); - MF2 mxG2 = max(max(gG, iG), max(max(aG, cG), mxG)); - MF2 mxB2 = max(max(gB, iB), max(max(aB, cB), mxB)); - mxR = mxR + mxR2; - mxG = mxG + mxG2; - mxB = mxB + mxB2; + MF2 mxR2 = max(max(gR, iR), max(max(aR, cR), mxR)); + MF2 mxG2 = max(max(gG, iG), max(max(aG, cG), mxG)); + MF2 mxB2 = max(max(gB, iB), max(max(aB, cB), mxB)); + mxR = mxR + mxR2; + mxG = mxG + mxG2; + mxB = mxB + mxB2; #endif - // Smooth minimum distance to signal limit divided by smooth max. - MF2 rcpMR = rcp(mxR); - MF2 rcpMG = rcp(mxG); - MF2 rcpMB = rcp(mxB); + // Smooth minimum distance to signal limit divided by smooth max. + MF2 rcpMR = rcp(mxR); + MF2 rcpMG = rcp(mxG); + MF2 rcpMB = rcp(mxB); #ifdef CAS_BETTER_DIAGONALS - MF2 ampR = saturate(min(mnR, 2.0 - mxR) * rcpMR); - MF2 ampG = saturate(min(mnG, 2.0 - mxG) * rcpMG); - MF2 ampB = saturate(min(mnB, 2.0 - mxB) * rcpMB); + MF2 ampR = saturate(min(mnR, 2.0 - mxR) * rcpMR); + MF2 ampG = saturate(min(mnG, 2.0 - mxG) * rcpMG); + MF2 ampB = saturate(min(mnB, 2.0 - mxB) * rcpMB); #else - MF2 ampR = saturate(min(mnR, 1.0 - mxR) * rcpMR); - MF2 ampG = saturate(min(mnG, 1.0 - mxG) * rcpMG); - MF2 ampB = saturate(min(mnB, 1.0 - mxB) * rcpMB); + MF2 ampR = saturate(min(mnR, 1.0 - mxR) * rcpMR); + MF2 ampG = saturate(min(mnG, 1.0 - mxG) * rcpMG); + MF2 ampB = saturate(min(mnB, 1.0 - mxB) * rcpMB); #endif - // Shaping amount of sharpening. + // Shaping amount of sharpening. - ampR = sqrt(ampR); - ampG = sqrt(ampG); - ampB = sqrt(ampB); + ampR = sqrt(ampR); + ampG = sqrt(ampG); + ampB = sqrt(ampB); - // Filter shape. - MF2 wR = ampR * peak; - MF2 wG = ampG * peak; - MF2 wB = ampB * peak; - // Filter. + // Filter shape. + MF2 wR = ampR * peak; + MF2 wG = ampG * peak; + MF2 wB = ampB * peak; + // Filter. - MF2 rcpWeight = rcp(1.0 + 4.0 * wG); + MF2 rcpWeight = rcp(1.0 + 4.0 * wG); - pixR = saturate((bR * wG + dR * wG + fR * wG + hR * wG + eR) * rcpWeight); - pixG = saturate((bG * wG + dG * wG + fG * wG + hG * wG + eG) * rcpWeight); - pixB = saturate((bB * wG + dB * wG + fB * wG + hB * wG + eB) * rcpWeight); + pixR = saturate((bR * wG + dR * wG + fR * wG + hR * wG + eR) * rcpWeight); + pixG = saturate((bG * wG + dG * wG + fG * wG + hG * wG + eG) * rcpWeight); + pixB = saturate((bB * wG + dB * wG + fB * wG + hB * wG + eB) * rcpWeight); } #else MF3 CasFilter(MF3 src[4][4], uint2 pos, MF peak) { - // a b c - // d e f - // g h i - MF3 a = src[pos.x - 1][pos.y - 1]; - MF3 b = src[pos.x][pos.y - 1]; - MF3 c = src[pos.x + 1][pos.y - 1]; - MF3 d = src[pos.x - 1][pos.y]; - MF3 e = src[pos.x][pos.y]; - MF3 f = src[pos.x + 1][pos.y]; - MF3 g = src[pos.x - 1][pos.y + 1]; - MF3 h = src[pos.x][pos.y + 1]; - MF3 i = src[pos.x + 1][pos.y + 1]; - - // Soft min and max. - // a b c b - // d e f * 0.5 + d e f * 0.5 - // g h i h - // These are 2.0x bigger (factored out the extra multiply). - MF mnR = min3(min3(d.r, e.r, f.r), b.r, h.r); - MF mnG = min3(min3(d.g, e.g, f.g), b.g, h.g); - MF mnB = min3(min3(d.b, e.b, f.b), b.b, h.b); + // a b c + // d e f + // g h i + MF3 a = src[pos.x - 1][pos.y - 1]; + MF3 b = src[pos.x][pos.y - 1]; + MF3 c = src[pos.x + 1][pos.y - 1]; + MF3 d = src[pos.x - 1][pos.y]; + MF3 e = src[pos.x][pos.y]; + MF3 f = src[pos.x + 1][pos.y]; + MF3 g = src[pos.x - 1][pos.y + 1]; + MF3 h = src[pos.x][pos.y + 1]; + MF3 i = src[pos.x + 1][pos.y + 1]; + + // Soft min and max. + // a b c b + // d e f * 0.5 + d e f * 0.5 + // g h i h + // These are 2.0x bigger (factored out the extra multiply). + MF mnR = min3(min3(d.r, e.r, f.r), b.r, h.r); + MF mnG = min3(min3(d.g, e.g, f.g), b.g, h.g); + MF mnB = min3(min3(d.b, e.b, f.b), b.b, h.b); #ifdef CAS_BETTER_DIAGONALS - MF mnR2 = min3(min3(mnR, a.r, c.r), g.r, i.r); - MF mnG2 = min3(min3(mnG, a.g, c.g), g.g, i.g); - MF mnB2 = min3(min3(mnB, a.b, c.b), g.b, i.b); - mnR = mnR + mnR2; - mnG = mnG + mnG2; - mnB = mnB + mnB2; + MF mnR2 = min3(min3(mnR, a.r, c.r), g.r, i.r); + MF mnG2 = min3(min3(mnG, a.g, c.g), g.g, i.g); + MF mnB2 = min3(min3(mnB, a.b, c.b), g.b, i.b); + mnR = mnR + mnR2; + mnG = mnG + mnG2; + mnB = mnB + mnB2; #endif - MF mxR = max3(max3(d.r, e.r, f.r), b.r, h.r); - MF mxG = max3(max3(d.g, e.g, f.g), b.g, h.g); - MF mxB = max3(max3(d.b, e.b, f.b), b.b, h.b); + MF mxR = max3(max3(d.r, e.r, f.r), b.r, h.r); + MF mxG = max3(max3(d.g, e.g, f.g), b.g, h.g); + MF mxB = max3(max3(d.b, e.b, f.b), b.b, h.b); #ifdef CAS_BETTER_DIAGONALS - MF mxR2 = max3(max3(mxR, a.r, c.r), g.r, i.r); - MF mxG2 = max3(max3(mxG, a.g, c.g), g.g, i.g); - MF mxB2 = max3(max3(mxB, a.b, c.b), g.b, i.b); - mxR = mxR + mxR2; - mxG = mxG + mxG2; - mxB = mxB + mxB2; + MF mxR2 = max3(max3(mxR, a.r, c.r), g.r, i.r); + MF mxG2 = max3(max3(mxG, a.g, c.g), g.g, i.g); + MF mxB2 = max3(max3(mxB, a.b, c.b), g.b, i.b); + mxR = mxR + mxR2; + mxG = mxG + mxG2; + mxB = mxB + mxB2; #endif - // Smooth minimum distance to signal limit divided by smooth max. + // Smooth minimum distance to signal limit divided by smooth max. - MF rcpMR = rcp(mxR); - MF rcpMG = rcp(mxG); - MF rcpMB = rcp(mxB); + MF rcpMR = rcp(mxR); + MF rcpMG = rcp(mxG); + MF rcpMB = rcp(mxB); #ifdef CAS_BETTER_DIAGONALS - MF ampR = saturate(min(mnR, 2.0 - mxR) * rcpMR); - MF ampG = saturate(min(mnG, 2.0 - mxG) * rcpMG); - MF ampB = saturate(min(mnB, 2.0 - mxB) * rcpMB); + MF ampR = saturate(min(mnR, 2.0 - mxR) * rcpMR); + MF ampG = saturate(min(mnG, 2.0 - mxG) * rcpMG); + MF ampB = saturate(min(mnB, 2.0 - mxB) * rcpMB); #else - MF ampR = saturate(min(mnR, 1.0 - mxR) * rcpMR); - MF ampG = saturate(min(mnG, 1.0 - mxG) * rcpMG); - MF ampB = saturate(min(mnB, 1.0 - mxB) * rcpMB); + MF ampR = saturate(min(mnR, 1.0 - mxR) * rcpMR); + MF ampG = saturate(min(mnG, 1.0 - mxG) * rcpMG); + MF ampB = saturate(min(mnB, 1.0 - mxB) * rcpMB); #endif - // Shaping amount of sharpening. - ampR = sqrt(ampR); - ampG = sqrt(ampG); - ampB = sqrt(ampB); - - // Filter shape. - // 0 w 0 - // w 1 w - // 0 w 0 - MF wR = ampR * peak; - MF wG = ampG * peak; - MF wB = ampB * peak; - // Filter. - // Using green coef only, depending on dead code removal to strip out the extra overhead. - MF rcpWeight = rcp(1.0 + 4.0 * wG); - - return MF3( - saturate((b.r * wG + d.r * wG + f.r * wG + h.r * wG + e.r) * rcpWeight), - saturate((b.g * wG + d.g * wG + f.g * wG + h.g * wG + e.g) * rcpWeight), - saturate((b.b * wG + d.b * wG + f.b * wG + h.b * wG + e.b) * rcpWeight) - ); + // Shaping amount of sharpening. + ampR = sqrt(ampR); + ampG = sqrt(ampG); + ampB = sqrt(ampB); + + // Filter shape. + // 0 w 0 + // w 1 w + // 0 w 0 + MF wR = ampR * peak; + MF wG = ampG * peak; + MF wB = ampB * peak; + // Filter. + // Using green coef only, depending on dead code removal to strip out the extra overhead. + MF rcpWeight = rcp(1.0 + 4.0 * wG); + + return MF3( + saturate((b.r * wG + d.r * wG + f.r * wG + h.r * wG + e.r) * rcpWeight), + saturate((b.g * wG + d.g * wG + f.g * wG + h.g * wG + e.g) * rcpWeight), + saturate((b.b * wG + d.b * wG + f.b * wG + h.b * wG + e.b) * rcpWeight) + ); } #endif void Pass1(uint2 blockStart, uint3 threadId) { - uint2 gxy = blockStart + (Rmp8x8(threadId.x) << 1); - if (!CheckViewport(gxy)) { - return; - } - - float2 inputPt = GetInputPt(); - uint i, j; - - MF3 src[4][4]; - [unroll] - for (i = 0; i < 3; i += 2) { - [unroll] - for (j = 0; j < 3; j += 2) { - float2 tpos = (gxy + uint2(i, j)) * inputPt; - const MF4 sr = (MF4)INPUT.GatherRed(sam, tpos); - const MF4 sg = (MF4)INPUT.GatherGreen(sam, tpos); - const MF4 sb = (MF4)INPUT.GatherBlue(sam, tpos); - - // w z - // x y - src[i][j] = MF3(sr.w, sg.w, sb.w); - src[i][j + 1] = MF3(sr.x, sg.x, sb.x); - src[i + 1][j] = MF3(sr.z, sg.z, sb.z); - src[i + 1][j + 1] = MF3(sr.y, sg.y, sb.y); - } - } - - const MF peak = -rcp(lerp(8.0, 5.0, (MF)sharpness)); + uint2 gxy = blockStart + (Rmp8x8(threadId.x) << 1); + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { + return; + } + + float2 inputPt = GetInputPt(); + uint i, j; + + MF3 src[4][4]; + [unroll] + for (i = 0; i < 3; i += 2) { + [unroll] + for (j = 0; j < 3; j += 2) { + float2 tpos = (gxy + uint2(i, j)) * inputPt; + const MF4 sr = (MF4)INPUT.GatherRed(sam, tpos); + const MF4 sg = (MF4)INPUT.GatherGreen(sam, tpos); + const MF4 sb = (MF4)INPUT.GatherBlue(sam, tpos); + + // w z + // x y + src[i][j] = MF3(sr.w, sg.w, sb.w); + src[i][j + 1] = MF3(sr.x, sg.x, sb.x); + src[i + 1][j] = MF3(sr.z, sg.z, sb.z); + src[i + 1][j + 1] = MF3(sr.y, sg.y, sb.y); + } + } + + const MF peak = -rcp(lerp(8.0, 5.0, (MF)sharpness)); #ifdef MP_FP16 - MF2 pixR, pixG, pixB; - CasFilterH(src, 0, peak, pixR, pixG, pixB); + MF2 pixR, pixG, pixB; + CasFilterH(src, 0, peak, pixR, pixG, pixB); - WriteToOutput(gxy, float3(pixR.x, pixG.x, pixB.x)); + OUTPUT[gxy] = float4(float3(pixR.x, pixG.x, pixB.x), 1); - ++gxy.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(pixR.y, pixG.y, pixB.y)); - } + ++gxy.x; + OUTPUT[gxy] = float4(float3(pixR.y, pixG.y, pixB.y), 1); - CasFilterH(src, 1, peak, pixR, pixG, pixB); + CasFilterH(src, 1, peak, pixR, pixG, pixB); - ++gxy.y; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(pixR.y, pixG.y, pixB.y)); - } + ++gxy.y; + OUTPUT[gxy] = float4(float3(pixR.y, pixG.y, pixB.y), 1); - --gxy.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, float3(pixR.x, pixG.x, pixB.x)); - } + --gxy.x; + OUTPUT[gxy] = float4(float3(pixR.x, pixG.x, pixB.x), 1); #else - WriteToOutput(gxy, CasFilter(src, uint2(1, 1), peak)); - - ++gxy.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, CasFilter(src, uint2(2, 1), peak)); - } - - ++gxy.y; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, CasFilter(src, uint2(2, 2), peak)); - } - - --gxy.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, CasFilter(src, uint2(1, 2), peak)); - } + OUTPUT[gxy] = float4(CasFilter(src, uint2(1, 1), peak), 1); + + ++gxy.x; + OUTPUT[gxy] = float4(CasFilter(src, uint2(2, 1), peak), 1); + + ++gxy.y; + OUTPUT[gxy] = float4(CasFilter(src, uint2(2, 2), peak), 1); + + --gxy.x; + OUTPUT[gxy] = float4(CasFilter(src, uint2(1, 2), peak), 1); #endif } diff --git a/src/Effects/CAS/CAS_Scaling.hlsl b/src/Effects/CAS/CAS_Scaling.hlsl index b09503739..f296aa4ce 100644 --- a/src/Effects/CAS/CAS_Scaling.hlsl +++ b/src/Effects/CAS/CAS_Scaling.hlsl @@ -1,7 +1,7 @@ // 移植自 https://github.com/GPUOpen-Effects/FidelityFX-CAS/blob/master/ffx-cas/ffx_cas.h //!MAGPIE EFFECT -//!VERSION 3 +//!VERSION 4 //!PARAMETER //!LABEL Sharpness @@ -14,9 +14,13 @@ float sharpness; //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; + //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -307,7 +311,9 @@ float3 CasFilter(uint2 ip, float4 const0, float peak) { void Pass1(uint2 blockStart, uint3 threadId) { uint2 gxy = blockStart + Rmp8x8(threadId.x); - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -317,20 +323,14 @@ void Pass1(uint2 blockStart, uint3 threadId) { const float peak = -rcp(lerp(8.0, 5.0, sharpness)); - WriteToOutput(gxy, CasFilter(gxy, const0, peak)); + OUTPUT[gxy] = float4(CasFilter(gxy, const0, peak), 1); gxy.x += 8u; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, CasFilter(gxy, const0, peak)); - } + OUTPUT[gxy] = float4(CasFilter(gxy, const0, peak), 1); gxy.y += 8u; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, CasFilter(gxy, const0, peak)); - } + OUTPUT[gxy] = float4(CasFilter(gxy, const0, peak), 1); gxy.x -= 8u; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, CasFilter(gxy, const0, peak)); - } + OUTPUT[gxy] = float4(CasFilter(gxy, const0, peak), 1); } diff --git a/src/Effects/CRT/CRT_Easymode.hlsl b/src/Effects/CRT/CRT_Easymode.hlsl index fc9a96020..3be2608c1 100644 --- a/src/Effects/CRT/CRT_Easymode.hlsl +++ b/src/Effects/CRT/CRT_Easymode.hlsl @@ -31,7 +31,7 @@ */ //!MAGPIE EFFECT -//!VERSION 3 +//!VERSION 4 //!PARAMETER @@ -173,6 +173,9 @@ int dilation; //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -181,6 +184,7 @@ SamplerState sam; //!PASS 1 //!STYLE PS //!IN INPUT +//!OUT OUTPUT #pragma warning(disable: 3571) // X3571: pow(f, e) will not work for negative f, use abs(f) or conditionally handle negative values if you expect them diff --git a/src/Effects/CRT/CRT_Geom.hlsl b/src/Effects/CRT/CRT_Geom.hlsl index bc2010b6f..1555cb504 100644 --- a/src/Effects/CRT/CRT_Geom.hlsl +++ b/src/Effects/CRT/CRT_Geom.hlsl @@ -24,7 +24,7 @@ */ //!MAGPIE EFFECT -//!VERSION 3 +//!VERSION 4 //!USE_DYNAMIC @@ -160,6 +160,9 @@ int interlace; //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -168,6 +171,7 @@ SamplerState sam; //!PASS 1 //!STYLE PS //!IN INPUT +//!OUT OUTPUT #pragma warning(disable: 3571) // X3571: pow(f, e) will not work for negative f, use abs(f) or conditionally handle negative values if you expect them diff --git a/src/Effects/CRT/CRT_Hyllian.hlsl b/src/Effects/CRT/CRT_Hyllian.hlsl index 9d8ff7b76..eb4f8d85e 100644 --- a/src/Effects/CRT/CRT_Hyllian.hlsl +++ b/src/Effects/CRT/CRT_Hyllian.hlsl @@ -28,7 +28,7 @@ */ //!MAGPIE EFFECT -//!VERSION 3 +//!VERSION 4 //!PARAMETER @@ -138,6 +138,9 @@ float crtAntiRinging; //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -146,6 +149,7 @@ SamplerState sam; //!PASS 1 //!STYLE PS //!IN INPUT +//!OUT OUTPUT #pragma warning(disable: 3571) // X3571: pow(f, e) will not work for negative f, use abs(f) or conditionally handle negative values if you expect them diff --git a/src/Effects/CRT/CRT_Lottes.hlsl b/src/Effects/CRT/CRT_Lottes.hlsl index 52a9f8781..d3e24afe0 100644 --- a/src/Effects/CRT/CRT_Lottes.hlsl +++ b/src/Effects/CRT/CRT_Lottes.hlsl @@ -17,7 +17,7 @@ //!MAGPIE EFFECT -//!VERSION 3 +//!VERSION 4 //!PARAMETER //!LABEL Scanline Hardness @@ -119,6 +119,9 @@ float shape; //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -126,6 +129,7 @@ SamplerState sam; //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 8 //!NUM_THREADS 64 @@ -303,7 +307,9 @@ float3 Mask(float2 pos) { void Pass1(uint2 blockStart, uint3 threadId) { uint2 gxy = Rmp8x8(threadId.x) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -318,8 +324,9 @@ void Pass1(uint2 blockStart, uint3 threadId) { outColor.rgb += Bloom(pos1, inputSize) * bloomAmount; #endif - if (shadowMask) + if (shadowMask) { outColor.rgb *= Mask(gxy + 0.5f); + } - WriteToOutput(gxy, pow(outColor.rgb, 1.0f / 2.2f)); + OUTPUT[gxy] = float4(pow(outColor.rgb, 1.0f / 2.2f), 1); } diff --git a/src/Effects/CRT/GTU_v050.hlsl b/src/Effects/CRT/GTU_v050.hlsl index cba83c707..06230f7e1 100644 --- a/src/Effects/CRT/GTU_v050.hlsl +++ b/src/Effects/CRT/GTU_v050.hlsl @@ -9,7 +9,7 @@ //!MAGPIE EFFECT -//!VERSION 3 +//!VERSION 4 //!PARAMETER @@ -80,6 +80,9 @@ float contrast; //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; + //!TEXTURE //!WIDTH OUTPUT_WIDTH //!HEIGHT INPUT_HEIGHT @@ -153,6 +156,7 @@ float4 Pass1(float2 pos) { //!PASS 2 //!STYLE PS //!IN tex1 +//!OUT OUTPUT #define pi 3.14159265358 #define normalGauss(x) ((exp(-(x)*(x)*0.5))/sqrt(2.0*pi)) diff --git a/src/Effects/CuNNy/CuNNy-16x16C-NVL-DN.hlsl b/src/Effects/CuNNy/CuNNy-16x16C-NVL-DN.hlsl new file mode 100644 index 000000000..b2e2953c1 --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-16x16C-NVL-DN.hlsl @@ -0,0 +1,7635 @@ +// CuNNy 16x16C BILINEAR RGB NVL DN - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-DN-D16N16 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t2; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t3; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t4; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t5; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t6; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t7; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0, t1, t2, t3 + +#define l0(x, y) min16float((dot(float3(1.813e-01, 3.616e-01, 7.758e-02), O(INPUT, float2(x, y)).rgb) + -1.943e-01)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-7.980e-03, -3.076e-02, -4.968e-02, -5.507e-02) * s0_0; + r += V4(-1.630e-03, 9.011e-02, 1.569e-01, -1.171e-01) * s0_1; + r += V4(6.748e-03, 3.943e-02, -2.483e-02, 8.049e-02) * s0_2; + r += V4(1.366e-02, 7.292e-03, 1.892e-01, -7.788e-02) * s0_3; + r += V4(-1.200e-01, -4.487e-01, -6.842e-02, 5.161e-02) * s0_4; + r += V4(-6.519e-02, 2.036e-01, -1.251e-02, 1.274e-01) * s0_5; + r += V4(-3.159e-02, 2.875e-02, -1.077e-01, 1.257e-02) * s0_6; + r += V4(2.857e-01, -6.175e-02, -1.141e-01, 1.544e-01) * s0_7; + r += V4(4.700e-02, 1.641e-01, 1.432e-02, -1.952e-01) * s0_8; + r += V4(2.289e-02, -9.738e-03, -1.114e-02, -6.420e-03); + return r; +} + +V4 f1(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(2.687e-01, -3.137e-02, 2.714e-02, -4.394e-02) * s0_0; + r += V4(2.045e-02, 8.505e-02, -4.982e-03, -9.058e-02) * s0_1; + r += V4(6.386e-02, -5.317e-02, -6.019e-02, -6.573e-03) * s0_2; + r += V4(-1.215e-01, -1.615e-02, 9.762e-02, 9.438e-02) * s0_3; + r += V4(-7.273e-02, -8.166e-02, -1.773e-01, 1.022e-01) * s0_4; + r += V4(-1.313e-01, -2.619e-01, 1.178e-01, 4.578e-02) * s0_5; + r += V4(1.567e-01, 7.322e-02, -6.227e-02, -2.403e-03) * s0_6; + r += V4(-2.226e-01, -9.741e-02, -2.121e-02, -7.671e-02) * s0_7; + r += V4(2.273e-02, 1.006e-01, 2.815e-02, -3.005e-02) * s0_8; + r += V4(1.315e-02, -4.822e-02, 1.570e-03, 1.289e-02); + return r; +} + +V4 f2(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-1.161e-02, 9.096e-02, 1.460e-01, 9.043e-02) * s0_0; + r += V4(4.599e-02, 1.247e-01, 1.297e-02, 2.141e-01) * s0_1; + r += V4(-7.605e-03, -8.120e-02, 8.736e-02, 6.155e-02) * s0_2; + r += V4(9.851e-02, -1.175e-02, -4.543e-03, -1.553e-01) * s0_3; + r += V4(-1.685e-01, 1.256e-01, -2.056e-01, -6.121e-02) * s0_4; + r += V4(4.006e-03, 6.040e-02, -1.224e-02, 1.277e-01) * s0_5; + r += V4(4.245e-02, 3.785e-02, 8.967e-02, -1.193e-01) * s0_6; + r += V4(-2.506e-02, -2.161e-03, -2.012e-01, -1.872e-01) * s0_7; + r += V4(5.837e-02, -1.905e-02, -9.231e-02, 2.377e-02) * s0_8; + r += V4(8.704e-03, 3.978e-02, -2.434e-02, 9.064e-03); + return r; +} + +V4 f3(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(2.014e-03, -5.120e-03, 1.469e-01, -2.760e-03) * s0_0; + r += V4(6.458e-03, -3.387e-02, 1.152e-01, -1.061e-01) * s0_1; + r += V4(-6.233e-03, 7.249e-02, 2.964e-03, 9.741e-02) * s0_2; + r += V4(-4.754e-03, -1.019e-01, 2.455e-01, -8.219e-02) * s0_3; + r += V4(6.881e-03, 1.177e-01, 2.419e-02, 4.032e-02) * s0_4; + r += V4(2.764e-01, -9.610e-02, -5.004e-02, 1.126e-01) * s0_5; + r += V4(1.787e-02, 1.606e-01, 1.749e-02, -1.304e-01) * s0_6; + r += V4(-2.934e-02, -9.155e-02, -5.788e-02, -2.840e-02) * s0_7; + r += V4(-2.607e-01, 5.171e-03, 2.436e-02, 1.191e-01) * s0_8; + r += V4(1.194e-02, 7.492e-03, -1.938e-01, 6.217e-03); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.232e-03, -1.263e-02, 1.604e-02, 1.024e-01, -3.414e-01, -6.902e-02, -2.683e-02, -9.299e-02, -7.921e-02, -9.931e-02, 1.531e-02, 2.957e-02, 1.157e-01, 1.279e-01, -1.983e-02, -2.226e-01)); + r += mul(s0_1, M4(4.282e-02, 4.810e-02, 2.519e-02, -2.683e-02, -2.278e-01, 1.087e-01, 2.035e-01, -2.022e-02, -4.101e-02, -2.410e-01, -1.590e-01, -1.869e-01, 2.409e-01, -7.862e-02, 5.305e-02, -2.379e-01)); + r += mul(s0_2, M4(6.824e-02, 4.298e-03, -3.620e-02, 6.571e-02, 1.014e-01, -6.163e-02, -4.906e-03, 1.129e-01, -4.051e-01, 1.088e-01, 4.718e-02, 9.828e-02, 2.015e-01, -1.438e-01, -3.376e-02, -3.588e-01)); + r += mul(s0_3, M4(-1.032e-01, -3.451e-02, 1.023e-01, 1.668e-01, -3.269e-02, 1.371e-01, 2.365e-01, 1.415e-02, 1.336e-01, 2.168e-01, 1.946e-02, -8.461e-02, -7.150e-02, -3.241e-01, -6.226e-02, -1.517e-01)); + r += mul(s0_4, M4(-3.957e-02, 1.035e-01, 2.231e-01, 1.377e-01, -1.762e-01, -8.220e-02, -6.978e-02, 2.561e-01, -9.208e-02, -1.297e-01, 3.774e-02, -2.095e-01, -1.958e-03, -3.276e-01, 1.283e-01, -1.089e-01)); + r += mul(s0_5, M4(5.492e-03, 6.888e-02, 4.573e-04, 1.222e-03, -7.004e-02, 1.235e-01, 7.911e-02, 1.326e-01, 1.109e-01, -5.032e-02, 2.981e-02, -7.277e-02, -1.899e-01, -3.998e-01, 3.372e-02, 1.580e-02)); + r += mul(s0_6, M4(9.754e-02, -2.830e-02, -5.702e-02, 5.207e-02, -3.047e-01, -2.664e-01, -1.230e-02, 8.129e-02, 1.607e-02, -1.711e-01, -2.426e-02, -1.682e-01, -1.057e-01, 5.643e-02, 8.680e-02, 7.754e-02)); + r += mul(s0_7, M4(-3.861e-02, 6.737e-02, -1.089e-01, -8.752e-02, -3.457e-01, -1.598e-01, -8.906e-03, 7.005e-02, -1.116e-01, 1.432e-01, -3.431e-02, -1.621e-01, 7.981e-02, 5.537e-02, -1.455e-01, 4.108e-01)); + r += mul(s0_8, M4(3.274e-02, 7.273e-03, -4.861e-02, -5.922e-02, 1.132e-01, 2.571e-02, 1.105e-01, 6.493e-04, -2.355e-01, -1.652e-01, -9.999e-02, -2.700e-01, -1.294e-01, 5.101e-02, -1.395e-01, -7.676e-02)); + r += mul(s1_0, M4(-2.194e-03, -7.369e-02, -1.714e-01, -1.710e-01, -7.343e-02, -2.065e-02, -6.641e-02, -5.477e-02, 1.710e-01, -1.409e-01, -1.086e-02, -2.295e-02, 1.003e-01, -1.355e-01, 8.370e-02, -3.968e-02)); + r += mul(s1_1, M4(1.456e-01, -2.707e-01, 5.298e-02, -4.356e-01, -4.610e-02, -1.830e-01, 8.627e-02, -7.877e-02, -1.260e-01, 3.420e-02, -4.331e-02, 6.292e-02, -1.356e-01, 3.717e-02, 9.696e-03, 1.419e-01)); + r += mul(s1_2, M4(-4.370e-02, 1.515e-01, -1.471e-01, 5.180e-02, 1.941e-01, -3.475e-02, -2.119e-02, 4.992e-02, -7.970e-02, 1.016e-01, 4.215e-02, -6.728e-02, -9.104e-02, 1.109e-01, 2.284e-02, 1.356e-01)); + r += mul(s1_3, M4(-4.121e-01, 2.762e-01, -1.719e-01, 3.415e-02, 1.492e-01, 2.398e-01, 1.802e-01, -1.359e-01, 1.048e-01, -1.091e-01, 5.598e-02, 5.408e-02, 8.922e-02, 1.237e-01, -1.087e-01, -2.341e-02)); + r += mul(s1_4, M4(-4.086e-01, 3.231e-02, -8.000e-01, 1.567e-01, -3.440e-02, 1.166e-01, -8.848e-02, 2.378e-01, -5.513e-02, -6.543e-03, -9.741e-02, -3.948e-02, 1.110e-01, -7.642e-02, 6.105e-02, 1.034e-01)); + r += mul(s1_5, M4(-2.552e-01, 4.160e-02, 1.345e-01, 2.989e-02, -8.820e-02, -3.577e-02, 1.492e-01, -4.709e-02, -4.680e-02, -1.114e-01, 3.098e-02, -5.875e-02, 5.616e-02, -6.438e-03, 4.510e-02, 1.104e-02)); + r += mul(s1_6, M4(-3.260e-01, -2.310e-02, 9.558e-04, -8.444e-02, -1.877e-01, 9.342e-02, 1.272e-01, -2.466e-01, 1.322e-01, 1.308e-02, -1.377e-02, 5.056e-02, -1.029e-01, -8.177e-02, 6.398e-02, -9.232e-02)); + r += mul(s1_7, M4(-4.628e-01, -2.434e-01, -1.955e-02, -1.592e-01, -1.218e-01, 1.512e-01, -3.348e-01, -1.380e-01, -5.974e-02, 6.669e-02, 2.932e-02, 7.818e-02, -9.201e-02, 9.057e-02, -5.445e-02, -2.324e-02)); + r += mul(s1_8, M4(-1.298e-01, -3.600e-01, -1.398e-02, 2.785e-01, 3.239e-02, 1.024e-01, 4.002e-02, -1.654e-01, 1.207e-01, 1.935e-01, 2.551e-02, -3.661e-03, 7.205e-02, -5.778e-02, -4.316e-02, -1.053e-01)); + r += mul(s2_0, M4(-7.089e-02, -8.723e-02, 5.822e-02, -1.300e-01, -2.327e-01, -1.664e-01, 5.151e-02, 7.662e-02, -2.503e-03, 1.583e-01, -7.790e-02, -2.311e-02, -4.394e-03, -6.863e-02, -7.991e-02, 6.175e-02)); + r += mul(s2_1, M4(-1.789e-01, -5.748e-02, 6.327e-02, -4.628e-02, -1.190e-02, -5.142e-02, -1.517e-01, 3.932e-01, -1.007e-02, 3.506e-01, -3.729e-02, -2.814e-02, 8.168e-02, 4.268e-02, -1.572e-02, 6.749e-02)); + r += mul(s2_2, M4(-6.394e-02, 6.625e-02, -3.661e-02, -3.363e-02, -1.152e-01, 9.530e-02, 1.485e-01, 1.582e-01, -2.685e-01, 6.743e-02, 1.000e-01, 1.363e-01, -2.344e-02, 1.054e-01, -9.292e-03, -3.414e-02)); + r += mul(s2_3, M4(-1.325e-02, 5.769e-02, 9.497e-02, -1.102e-01, -8.531e-02, 4.940e-01, -2.478e-01, 5.556e-02, 1.253e-01, 1.196e-01, -7.608e-02, -9.433e-02, -2.328e-01, 5.913e-02, 2.705e-02, 6.825e-02)); + r += mul(s2_4, M4(-2.225e-02, 2.734e-02, -2.433e-01, -4.216e-02, 1.270e-01, 5.062e-01, 7.176e-02, 3.093e-01, 1.390e-01, 1.153e-02, 6.485e-02, 5.247e-02, -1.734e-02, -6.897e-02, -5.755e-02, -5.145e-02)); + r += mul(s2_5, M4(-4.082e-02, 1.448e-01, 9.686e-02, 3.773e-02, -1.496e-02, 3.650e-01, 6.160e-02, -2.780e-02, -1.849e-01, 4.693e-02, -1.304e-01, -1.975e-01, 1.455e-01, -1.066e-01, -2.135e-02, -4.541e-02)); + r += mul(s2_6, M4(-1.796e-02, -7.124e-02, -5.399e-02, 6.812e-02, 1.207e-01, 4.614e-01, 1.302e+00, -2.446e-01, -2.134e-01, -9.068e-02, -8.372e-02, -2.020e-02, -1.306e-01, 1.112e-01, 1.321e-02, 7.761e-02)); + r += mul(s2_7, M4(4.480e-02, 6.023e-02, -1.196e-01, 4.332e-02, 3.713e-01, 3.954e-01, 8.121e-01, -3.093e-01, 2.232e-01, 5.831e-02, -4.077e-02, -1.415e-01, 1.256e-01, -9.794e-03, 6.676e-02, 8.091e-03)); + r += mul(s2_8, M4(-6.303e-02, 1.297e-01, -2.822e-01, 1.475e-01, 1.822e-01, 2.403e-01, 6.199e-01, 1.607e-01, -1.782e-02, 6.645e-02, -3.818e-02, 4.054e-01, 1.471e-01, -1.032e-01, 4.629e-02, 2.921e-02)); + r += mul(s3_0, M4(2.935e-01, 1.957e-01, -2.266e-02, 3.264e-02, -3.908e-02, 1.400e-01, -6.904e-02, -1.184e-01, 1.400e-01, -3.520e-02, -5.787e-02, -3.441e-02, 2.693e-02, -1.742e-01, -2.707e-02, 1.949e-02)); + r += mul(s3_1, M4(-2.875e-02, -6.801e-02, 4.792e-03, 1.226e-02, 1.318e-03, -3.648e-02, -2.690e-02, 5.585e-03, 4.851e-02, 4.054e-03, 7.015e-02, 8.548e-02, -6.633e-02, 4.076e-01, 8.485e-03, -2.852e-01)); + r += mul(s3_2, M4(2.192e-03, -1.598e-02, -5.636e-02, -1.786e-02, -8.461e-03, -7.038e-02, -4.633e-02, 7.459e-02, -4.072e-02, -2.260e-02, -2.599e-02, -1.689e-01, -2.695e-01, 3.035e-01, 1.049e-01, -5.409e-02)); + r += mul(s3_3, M4(1.142e-01, 1.198e-01, 3.545e-02, 1.778e-01, 9.564e-03, -4.826e-02, 7.830e-02, 1.123e-01, 2.424e-02, 9.883e-02, 3.808e-02, 9.802e-03, -7.263e-02, 2.442e-01, 1.354e-02, -8.291e-03)); + r += mul(s3_4, M4(-8.273e-02, -1.434e-01, -2.165e-01, 1.566e-01, -5.180e-02, 4.937e-02, 5.564e-03, 1.229e-01, -4.729e-02, -1.214e-01, 3.427e-02, 9.345e-02, -2.942e-01, -5.558e-02, -4.219e-01, -1.772e-01)); + r += mul(s3_5, M4(5.593e-02, 1.818e-02, 2.035e-01, 1.659e-01, -4.378e-02, -6.190e-02, -8.371e-02, 2.842e-02, 4.158e-02, -8.044e-02, 2.686e-02, 4.061e-02, -1.102e-01, 3.095e-01, -7.692e-02, 3.397e-03)); + r += mul(s3_6, M4(-7.776e-02, 2.238e-01, -1.441e-01, 4.501e-02, 2.254e-01, -1.272e-01, 1.171e-02, 1.119e-02, -7.533e-02, 1.687e-01, -1.100e-03, 1.080e-03, -2.423e-02, -3.446e-01, -2.180e-01, -9.774e-03)); + r += mul(s3_7, M4(5.486e-02, -1.573e-01, -9.305e-02, -6.218e-02, 5.573e-02, -7.214e-02, -1.015e-01, -7.444e-03, -4.333e-02, -2.102e-01, -7.572e-02, -9.376e-02, -3.271e-01, 1.398e-01, 7.296e-01, 6.384e-02)); + r += mul(s3_8, M4(1.001e-01, -2.694e-01, -4.672e-02, -1.425e-01, -7.395e-02, -5.039e-03, -2.449e-02, -3.480e-03, 2.517e-02, -2.365e-02, -2.376e-02, -4.630e-03, 1.519e-01, -4.867e-02, 9.130e-02, 4.954e-02)); + r += mul(s4_0, M4(-1.190e-02, 3.105e-02, -5.302e-02, 2.839e-02, -1.766e-01, -2.932e-02, 3.040e-02, 4.314e-02, 2.831e-01, 2.573e-01, -4.039e-02, 2.720e-01, 9.287e-02, 4.324e-02, 3.882e-03, 1.599e-01)); + r += mul(s4_1, M4(8.672e-03, -5.654e-02, 1.299e-02, -1.554e-02, 5.415e-02, -4.059e-02, 3.943e-02, 2.054e-02, 3.076e-01, 3.693e-01, -2.222e-01, 1.096e-01, -6.335e-02, -2.677e-02, 1.427e-02, -9.692e-02)); + r += mul(s4_2, M4(1.783e-01, -3.502e-02, 7.618e-03, 7.304e-02, 9.326e-02, 8.295e-03, -1.252e-02, -2.887e-03, -3.823e-01, 4.908e-02, 2.382e-02, -3.313e-02, 5.280e-02, -4.991e-02, -7.152e-03, -1.737e-01)); + r += mul(s4_3, M4(-5.825e-02, 1.393e-01, -3.566e-02, 6.874e-02, -1.361e-01, 9.102e-02, -4.185e-02, -4.120e-02, -2.030e-01, 1.278e-01, -6.667e-02, -3.361e-02, 1.890e-01, 1.746e-01, 8.857e-02, 3.206e-02)); + r += mul(s4_4, M4(-5.837e-02, -4.659e-02, -1.099e-02, 9.193e-02, 1.919e-01, 9.881e-02, 2.039e-02, -6.581e-02, -1.028e-01, 2.360e-01, 6.861e-02, 8.700e-02, 8.960e-02, 1.889e-02, -1.942e-01, 3.711e-02)); + r += mul(s4_5, M4(1.235e-01, -6.465e-02, 6.970e-03, -1.458e-01, 1.019e-02, 7.978e-02, 1.156e-01, -1.568e-01, -3.517e-01, -1.489e-01, -7.682e-02, 2.566e-01, -3.646e-03, 8.862e-02, -4.665e-02, -1.264e-01)); + r += mul(s4_6, M4(-1.168e-01, 7.567e-02, 4.442e-02, -1.496e-01, 2.950e-02, 2.805e-02, 1.348e-01, -6.303e-03, -2.002e-02, -7.925e-02, 1.072e-01, 1.361e-01, 9.353e-02, -5.266e-03, 3.989e-03, 8.648e-02)); + r += mul(s4_7, M4(6.570e-02, 3.402e-02, 1.471e-04, -4.474e-02, -4.834e-03, -5.764e-02, 8.183e-02, -9.186e-02, 5.233e-02, 2.400e-01, 3.102e-01, -3.024e-02, -7.292e-02, 1.583e-02, -4.936e-02, -2.610e-01)); + r += mul(s4_8, M4(-3.826e-03, -6.294e-02, 2.547e-02, -2.491e-02, -5.972e-02, 2.935e-02, 1.576e-02, -9.063e-02, 2.044e-02, 6.679e-03, 1.096e-03, 2.228e-01, -2.407e-01, 1.354e-01, -7.616e-02, 9.792e-02)); + r += mul(s5_0, M4(5.608e-03, 8.229e-03, -2.456e-02, -1.006e-01, -4.599e-01, -8.566e-02, -1.052e-01, 3.307e-01, -4.093e-02, -2.267e-02, 1.253e-02, -6.191e-02, 8.924e-02, 8.230e-02, 1.199e-02, 5.318e-02)); + r += mul(s5_1, M4(-2.619e-01, -4.897e-01, 1.833e-02, -2.030e-01, -1.539e-01, -1.435e-01, -2.970e-01, -6.604e-02, -1.095e-01, -3.638e-03, -1.213e-01, 1.524e-02, 2.464e-02, -1.593e-01, -2.723e-02, 1.168e-01)); + r += mul(s5_2, M4(-1.785e-01, 2.530e-01, -2.439e-01, 2.566e-01, 1.387e-01, -1.182e-01, -3.776e-01, -4.125e-01, -3.954e-02, 1.032e-01, 7.002e-02, 2.044e-01, -4.406e-02, -8.684e-03, 1.155e-02, -2.161e-01)); + r += mul(s5_3, M4(-1.777e-01, -2.579e-01, -4.535e-02, 2.068e-01, -9.316e-02, -4.666e-01, -9.052e-02, 6.572e-02, 1.703e-01, -1.078e-01, -5.785e-02, -6.235e-02, -1.023e-01, -8.569e-02, 6.376e-02, -3.068e-01)); + r += mul(s5_4, M4(-4.940e-01, 3.483e-01, -6.921e-02, 2.380e-01, 1.427e-01, -6.275e-01, -4.544e-01, -3.053e-01, -5.621e-02, 1.289e-01, -1.396e-01, 6.402e-02, 1.020e-01, -8.552e-02, -1.554e-01, 5.649e-02)); + r += mul(s5_5, M4(9.104e-03, -3.946e-02, -3.489e-01, 3.821e-02, 1.626e-01, -5.198e-01, -5.062e-01, -6.730e-02, 8.234e-03, 8.062e-02, -2.827e-02, 7.927e-02, -5.086e-02, 2.596e-02, -1.753e-01, -3.009e-02)); + r += mul(s5_6, M4(-3.913e-01, 1.101e-01, 1.442e-01, 7.216e-02, -2.240e-01, -7.264e-01, -4.422e-01, 5.701e-01, -1.410e-01, 3.260e-02, -4.088e-02, -2.344e-02, 8.190e-03, -9.536e-02, 5.690e-02, 1.618e-02)); + r += mul(s5_7, M4(-9.386e-01, -1.086e-01, -1.548e-01, 8.514e-04, -8.171e-02, -8.063e-01, -1.166e+00, -2.847e-01, -5.213e-02, -1.733e-01, -1.278e-01, 4.072e-04, 2.080e-01, -1.636e-01, 8.938e-02, 1.357e-01)); + r += mul(s5_8, M4(-3.893e-01, 2.112e-01, 2.145e-01, 3.330e-01, -1.034e-01, -6.189e-01, -8.157e-01, -2.690e-01, 3.968e-02, -5.141e-02, 2.373e-02, 7.592e-02, -9.925e-02, -2.414e-02, -2.636e-02, 2.153e-02)); + r += mul(s6_0, M4(1.136e-01, 8.093e-02, -9.985e-02, 1.499e-02, -6.009e-02, 1.862e-02, -5.180e-02, 1.059e-02, 2.233e-01, -8.905e-01, 1.255e-01, -1.679e-01, 1.036e-01, 9.730e-02, -6.267e-02, -5.428e-02)); + r += mul(s6_1, M4(-2.109e-03, 6.298e-02, 3.134e-03, -1.532e-01, -1.769e-01, -1.546e-01, 3.674e-02, -8.395e-02, 3.887e-01, 3.715e-01, 2.054e-01, -7.510e-01, 1.340e-01, -1.268e-02, -5.245e-02, 7.880e-02)); + r += mul(s6_2, M4(-1.101e-01, -2.842e-02, -1.595e-04, 5.668e-02, 7.653e-02, 1.617e-02, -6.329e-02, 9.278e-02, 4.830e-01, 2.833e-02, 4.292e-01, -2.053e-01, 2.326e-02, 1.637e-01, -1.947e-02, -8.910e-02)); + r += mul(s6_3, M4(-1.725e-02, 1.329e-01, -2.409e-01, -1.431e-01, -6.088e-02, -1.209e-01, 6.302e-03, -4.155e-03, 2.497e-01, -1.025e+00, 1.091e-01, -8.530e-01, -1.597e-01, -1.489e-01, 1.854e-01, -6.847e-02)); + r += mul(s6_4, M4(8.771e-02, -1.043e-01, -6.309e-02, -3.651e-03, -9.639e-02, -1.604e-02, -8.599e-02, -2.125e-02, -1.450e-01, 2.313e-01, 1.777e-01, -5.452e-01, -2.405e-03, -1.849e-01, 4.595e-02, 2.129e-03)); + r += mul(s6_5, M4(6.450e-02, -4.157e-02, 1.194e-01, 1.012e-01, 1.620e-01, 1.771e-01, 8.360e-02, 1.448e-02, 1.998e-01, 5.925e-01, 4.185e-01, -5.502e-01, 8.328e-02, -4.896e-02, -1.595e-01, -1.282e-02)); + r += mul(s6_6, M4(-1.301e-01, -5.223e-02, 1.743e-01, 1.901e-01, -1.524e-01, 1.624e-01, 2.464e-02, -2.137e-02, -4.599e-01, 4.252e-02, 4.269e-01, -8.681e-01, -3.066e-01, 4.993e-02, 1.657e-02, 1.455e-02)); + r += mul(s6_7, M4(7.055e-02, -1.138e-02, -3.697e-02, 6.304e-02, 7.056e-02, 8.624e-02, 3.157e-02, 1.738e-03, -5.091e-02, -1.116e-01, 6.074e-01, -8.667e-01, 4.539e-02, -1.009e-02, 3.756e-02, 2.695e-02)); + r += mul(s6_8, M4(2.303e-03, -2.149e-02, 8.215e-02, 9.044e-02, 1.001e-01, -1.099e-01, 2.439e-02, -6.588e-02, 1.238e+00, 1.123e-01, 1.204e+00, -5.638e-01, 4.821e-02, 2.375e-03, -5.836e-03, 3.382e-02)); + r += mul(s7_0, M4(2.280e-01, -4.209e-01, 6.084e-03, -2.358e-01, 1.567e-01, -1.142e-01, -2.985e-02, 1.016e-01, -6.077e-02, -3.948e-02, -7.984e-03, -6.340e-02, -2.348e-01, -6.651e-02, -3.006e-02, -1.758e-01)); + r += mul(s7_1, M4(4.794e-02, -9.253e-02, -6.952e-02, 1.584e-01, -3.184e-01, 1.867e-01, 8.533e-02, 5.744e-03, 9.830e-03, 6.881e-02, 8.306e-02, 1.439e-01, -1.774e-01, -1.299e-01, 3.531e-02, 2.743e-01)); + r += mul(s7_2, M4(2.150e-01, -9.738e-02, -2.491e-02, 1.611e-01, 5.285e-01, -1.843e-01, -9.378e-02, 5.800e-02, -1.332e-02, 2.594e-02, 4.409e-03, -2.908e-02, -1.850e-01, 5.331e-02, 1.562e-02, 2.035e-02)); + r += mul(s7_3, M4(1.859e-02, -3.335e-03, -1.779e-01, -2.356e-02, -6.775e-02, -3.462e-01, 6.554e-03, 4.673e-01, -3.758e-02, -7.708e-02, 1.072e-01, 7.910e-02, -2.035e-01, -1.150e-01, 2.485e-01, 4.250e-02)); + r += mul(s7_4, M4(1.317e-01, 4.584e-02, -1.015e-01, 1.026e-01, 2.818e-02, -6.962e-02, -2.352e-01, 8.511e-03, -1.362e-01, 1.974e-01, -4.170e-02, 5.235e-02, -8.838e-02, 6.127e-02, -5.776e-02, 2.008e-01)); + r += mul(s7_5, M4(4.451e-02, 1.866e-01, 3.327e-02, -9.457e-02, 1.774e-01, -3.330e-01, -1.942e-02, -1.345e-01, -1.723e-02, -1.108e-01, 7.892e-02, 4.799e-02, 8.422e-02, 2.290e-02, -1.273e-01, -7.738e-02)); + r += mul(s7_6, M4(9.301e-02, -9.386e-02, -1.273e-01, -2.132e-02, 8.577e-02, -5.984e-02, -3.506e-03, 3.370e-01, 1.480e-01, -1.095e-01, 1.172e-01, -3.318e-02, -7.578e-02, -1.567e-01, 9.427e-02, 8.845e-02)); + r += mul(s7_7, M4(-3.632e-02, 1.124e-01, 8.124e-02, 1.514e-01, -2.447e-01, -7.509e-02, 1.311e-01, -5.810e-02, 5.562e-02, 1.533e-02, 1.313e-01, -3.108e-02, -9.952e-02, 7.539e-02, 2.049e-01, 3.185e-01)); + r += mul(s7_8, M4(-9.766e-02, -1.952e-03, 4.433e-02, 1.406e-01, 6.114e-02, 1.562e-01, 1.502e-02, -2.778e-01, 6.818e-02, 8.618e-02, 1.895e-01, 7.110e-02, 4.375e-02, 5.454e-02, 9.155e-03, 1.353e-01)); + r += V4(-2.169e-02, -7.251e-02, -1.396e-01, 3.650e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.775e-01, -1.666e-02, 2.113e-02, -1.239e-01, 1.264e-02, 2.318e-01, -5.209e-03, 9.788e-02, 3.047e-01, -6.490e-02, -3.152e-02, -3.440e-01, -1.708e-01, 1.047e-01, 2.269e-01, -2.688e-02)); + r += mul(s0_1, M4(7.222e-02, -1.354e-02, -9.023e-02, 7.743e-02, -1.774e-01, 1.933e-02, 1.082e-01, 1.154e-01, 7.841e-02, -1.001e-01, 2.004e-01, -3.504e-01, -2.600e-02, 6.343e-02, -2.025e-01, -1.070e-02)); + r += mul(s0_2, M4(-1.828e-02, 2.302e-02, 3.921e-02, 3.057e-02, 6.901e-02, -1.374e-01, 1.080e-01, -6.520e-02, -1.584e-01, -2.177e-02, -9.491e-02, -1.289e-01, -1.461e-01, -2.556e-02, 1.921e-01, 1.474e-01)); + r += mul(s0_3, M4(-1.343e-01, 6.336e-02, 1.862e-01, 9.577e-02, -4.924e-02, 1.149e-01, -3.575e-02, 2.123e-01, -1.659e-01, 9.363e-02, 3.628e-01, 3.677e-02, 2.087e-01, -1.171e-01, -1.263e-01, -3.249e-02)); + r += mul(s0_4, M4(3.837e-02, -6.882e-02, 2.254e-01, 8.898e-02, -8.437e-02, -1.221e-01, 3.803e-01, -1.880e-01, -2.448e-01, -1.350e-01, 2.000e-01, 5.808e-02, 6.826e-02, 9.996e-03, -2.128e-01, 7.567e-03)); + r += mul(s0_5, M4(3.019e-02, -3.115e-02, -3.425e-02, 3.495e-02, 8.757e-02, 1.741e-02, 7.830e-02, 2.187e-01, 3.421e-01, 1.835e-01, 1.304e-01, 2.987e-01, 5.425e-03, 5.812e-02, 3.939e-02, 8.306e-02)); + r += mul(s0_6, M4(-1.755e-02, 1.265e-02, -6.516e-02, -4.036e-02, -1.449e-01, 8.036e-02, -9.509e-02, -2.841e-01, 6.021e-02, 1.190e-01, 3.651e-02, -1.777e-01, -8.464e-02, 7.716e-02, -5.514e-02, -3.438e-01)); + r += mul(s0_7, M4(6.529e-02, 6.229e-02, -6.483e-02, -3.224e-02, 1.174e-02, -2.118e-01, -3.957e-01, -9.645e-02, 1.996e-01, -5.530e-02, -9.620e-03, -3.575e-02, -1.937e-01, -4.422e-02, -1.885e-01, -1.128e-01)); + r += mul(s0_8, M4(-6.677e-02, 4.985e-03, 5.960e-05, -5.510e-02, -1.532e-03, -2.686e-02, 8.343e-02, 1.080e-01, 7.810e-02, 1.712e-01, -9.339e-03, 2.619e-02, 1.932e-01, -1.233e-01, 2.490e-02, -1.811e-01)); + r += mul(s1_0, M4(1.961e-01, 1.267e-02, -1.162e-02, -9.608e-02, 2.658e-01, -5.419e-02, -1.469e-02, -1.295e-01, 7.791e-02, 9.548e-02, 7.228e-02, 1.439e-02, 7.483e-02, -8.509e-02, 1.010e-01, -6.508e-02)); + r += mul(s1_1, M4(2.794e-01, 2.878e-01, -5.265e-01, 1.560e-01, -1.515e-01, 9.347e-02, 7.951e-02, -9.054e-02, 5.093e-02, -2.977e-02, 1.010e-01, 4.830e-03, 1.338e-02, 2.748e-02, -2.289e-01, -1.024e-01)); + r += mul(s1_2, M4(7.681e-02, -8.505e-03, 3.200e-01, -5.171e-02, 1.745e-02, -5.962e-02, -4.359e-02, 6.329e-02, 1.122e-01, -6.580e-02, 1.511e-02, -2.182e-02, 1.381e-02, 5.334e-02, 9.869e-03, -3.874e-03)); + r += mul(s1_3, M4(-5.790e-02, 4.297e-01, 2.180e-01, -3.490e-01, 5.206e-01, 1.042e-01, -2.797e-03, -8.630e-02, -5.800e-02, -1.127e-01, -1.039e-02, -6.326e-02, 4.866e-02, 4.524e-02, -1.471e-01, 5.535e-02)); + r += mul(s1_4, M4(2.486e-02, 5.458e-01, -1.845e-01, -2.061e-01, -3.408e-01, 4.788e-03, 4.484e-01, -7.937e-02, -3.388e-02, 9.579e-02, -1.823e-01, 4.135e-02, 6.400e-02, 3.829e-03, -1.028e-02, -1.141e-01)); + r += mul(s1_5, M4(-2.133e-01, -1.242e-01, -1.956e-01, 6.902e-02, 1.134e-02, 6.592e-02, 1.245e-01, 1.885e-01, 5.202e-02, 3.551e-02, -7.043e-02, -3.911e-02, -3.874e-02, -3.652e-02, 1.795e-01, 1.084e-01)); + r += mul(s1_6, M4(-7.038e-02, -1.997e-02, -7.819e-02, -6.066e-02, 2.418e-01, -1.132e-01, -2.147e-02, 1.733e-01, 4.530e-02, 6.559e-02, -2.115e-02, 1.483e-01, -7.494e-02, -1.038e-01, 3.806e-02, 1.279e-01)); + r += mul(s1_7, M4(-1.047e-01, 1.030e-01, -1.559e-01, -2.634e-01, -7.276e-02, -1.085e-01, -4.366e-01, -1.306e-01, 3.752e-02, -7.724e-02, -1.336e-01, -1.201e-01, 6.546e-02, 8.972e-02, 1.402e-01, 6.884e-02)); + r += mul(s1_8, M4(-3.296e-02, -5.481e-02, -8.518e-02, 5.481e-02, -5.662e-02, -1.010e-01, -8.579e-02, -2.996e-02, -2.077e-02, -3.781e-02, 1.152e-01, 5.694e-02, -6.651e-02, -2.632e-02, 1.062e-02, -6.098e-02)); + r += mul(s2_0, M4(6.375e-02, 5.409e-02, 4.370e-02, -3.229e-02, -9.817e-02, -1.247e-01, -1.783e-01, 2.652e-01, -1.650e-01, 6.811e-02, 1.056e-01, -1.367e-01, 9.807e-02, -3.406e-02, 2.187e-02, 2.229e-01)); + r += mul(s2_1, M4(-1.144e-01, 3.300e-02, 1.841e-01, 2.472e-02, -3.033e-01, -5.392e-01, 4.039e-01, -7.624e-03, 1.104e-01, -4.457e-02, 1.784e-01, 2.715e-01, 2.169e-02, 1.997e-02, 3.160e-02, 1.185e-02)); + r += mul(s2_2, M4(5.688e-02, -2.599e-02, -2.319e-01, -1.469e-01, 7.917e-02, 1.217e-01, -5.545e-02, -1.032e-01, -1.998e-01, 5.322e-02, 6.770e-03, -1.039e-01, -8.576e-02, -6.547e-02, -7.809e-02, -2.821e-01)); + r += mul(s2_3, M4(1.753e-02, 2.562e-02, 3.190e-02, 5.736e-02, -5.852e-01, 3.539e-03, 3.772e-01, 1.215e-01, -3.855e-02, 2.189e-01, -1.070e-01, 4.810e-02, -2.566e-02, -1.954e-02, -6.151e-02, 6.944e-02)); + r += mul(s2_4, M4(-6.889e-02, 3.412e-03, 6.761e-03, -2.931e-02, -6.143e-01, -3.518e-01, 5.373e-02, 7.054e-02, -1.948e-02, -1.110e-01, 1.130e-01, -8.545e-02, 2.696e-02, -3.268e-02, -9.364e-02, -4.108e-02)); + r += mul(s2_5, M4(-1.225e-01, 6.195e-02, -1.952e-02, 9.308e-02, -2.384e-01, 1.145e-01, -1.627e-01, 2.213e-01, -1.555e-03, 9.597e-02, -1.522e-01, 1.507e-01, -2.537e-02, 1.745e-02, -7.436e-02, -1.538e-02)); + r += mul(s2_6, M4(-3.516e-02, 1.166e-01, -1.581e-01, -7.152e-02, -2.311e-01, -5.554e-01, 1.196e-01, 9.133e-02, -5.553e-02, -7.650e-02, 6.208e-03, 9.953e-02, -3.923e-02, 1.282e-01, 8.115e-02, -9.125e-02)); + r += mul(s2_7, M4(-4.273e-02, -1.592e-02, -2.201e-01, 1.755e-01, 2.373e-01, -3.873e-01, -2.481e-01, -2.088e-01, -3.757e-02, -6.478e-02, -1.204e-01, -4.478e-02, -3.462e-02, 2.490e-02, 1.454e-01, -2.777e-02)); + r += mul(s2_8, M4(6.972e-03, -1.572e-01, -2.195e-01, -1.669e-01, 9.833e-02, -9.097e-02, -2.890e-01, 4.248e-01, -5.493e-02, 1.569e-01, -1.205e-01, -3.168e-02, -2.535e-02, 7.545e-02, 6.862e-02, 3.931e-02)); + r += mul(s3_0, M4(7.353e-02, -3.805e-04, 3.388e-02, 1.743e-01, -4.692e-03, -5.937e-02, 2.209e-02, -2.106e-02, 4.100e-02, -7.074e-02, 3.180e-02, -9.841e-02, 3.646e-02, -5.433e-02, -1.609e-01, 8.015e-03)); + r += mul(s3_1, M4(-1.315e-01, 1.033e-03, 1.096e-01, -7.715e-02, -1.812e-01, 1.179e-02, 1.870e-02, 3.131e-02, -2.752e-02, 1.077e-01, -9.963e-03, 1.084e-01, -6.595e-02, -2.094e-01, 1.140e-01, -4.945e-02)); + r += mul(s3_2, M4(5.571e-02, 6.210e-02, -1.623e-02, 8.368e-03, -1.408e-01, -9.622e-03, 4.700e-02, 1.432e-02, -7.276e-03, 1.324e-02, 5.876e-02, -1.067e-01, -4.464e-02, -9.793e-02, -9.425e-02, -1.411e-01)); + r += mul(s3_3, M4(-3.458e-02, -5.870e-02, 6.770e-02, 1.029e-02, 6.527e-02, -1.635e-02, 5.908e-02, -4.672e-02, 9.661e-02, -1.748e-02, -1.440e-01, -1.449e-01, -1.096e-01, -3.897e-02, -3.221e-02, -1.150e-01)); + r += mul(s3_4, M4(1.564e-02, -8.144e-02, 5.939e-02, -4.613e-02, -1.032e-01, -7.433e-02, 1.248e-01, 1.520e-02, -4.192e-02, -1.883e-02, 1.772e-01, 5.885e-02, -1.387e-01, -1.972e-01, 1.235e-01, 7.554e-02)); + r += mul(s3_5, M4(-1.157e-01, 4.967e-02, 1.229e-01, 1.896e-03, -1.007e-01, -4.251e-02, 5.625e-02, 9.364e-02, 6.603e-02, -7.735e-03, -6.559e-02, -1.652e-01, -4.909e-02, -1.829e-02, -1.763e-01, -3.099e-02)); + r += mul(s3_6, M4(1.098e-02, 2.031e-01, 9.523e-02, 1.802e-01, -2.358e-02, -2.145e-02, -1.352e-01, -1.158e-01, 9.923e-02, 8.149e-03, 1.769e-02, 8.410e-02, -1.697e-01, 1.352e-01, 1.234e-01, 1.555e-01)); + r += mul(s3_7, M4(2.095e-01, 3.668e-02, -1.419e-01, -1.490e-01, 5.141e-02, -1.244e-01, -5.789e-02, -2.059e-02, 2.748e-02, -3.486e-02, -7.727e-02, -5.393e-02, -1.747e-01, -1.610e-01, -1.276e-02, -9.515e-02)); + r += mul(s3_8, M4(1.033e-02, -3.392e-02, -2.311e-01, -3.520e-02, 1.150e-01, -3.758e-02, 7.600e-03, 1.172e-01, -1.360e-02, 1.151e-02, -2.404e-02, 2.077e-01, 2.845e-01, -1.232e-01, -8.156e-02, -1.894e-01)); + r += mul(s4_0, M4(-9.735e-02, -3.486e-02, -6.713e-02, -2.585e-02, -6.854e-02, 6.316e-02, 8.265e-03, -6.834e-02, 7.139e-02, -5.798e-02, 6.760e-02, -1.391e-01, 7.996e-02, -2.167e-02, -1.062e-01, 2.066e-01)); + r += mul(s4_1, M4(-8.635e-02, 2.987e-02, -4.322e-02, -3.828e-02, 5.137e-03, 4.678e-02, -1.404e-01, 5.578e-02, -2.783e-01, -1.015e-01, 3.459e-01, -2.319e-01, 1.553e-01, 2.762e-04, 1.473e-01, 1.165e-01)); + r += mul(s4_2, M4(-7.463e-02, -4.850e-02, -7.089e-02, -2.267e-02, -4.120e-02, -5.133e-02, -4.900e-02, 1.918e-02, -1.260e-01, 2.254e-03, -2.160e-01, 6.045e-02, 2.996e-02, -6.463e-03, -1.101e-02, 5.736e-02)); + r += mul(s4_3, M4(1.193e-02, -2.418e-02, 4.533e-02, 5.430e-02, -9.467e-02, 3.288e-02, -5.260e-02, 8.023e-02, 1.474e-01, -4.439e-02, -8.914e-02, -1.458e-01, -3.892e-02, 8.081e-02, -9.803e-02, 1.021e-01)); + r += mul(s4_4, M4(-2.304e-01, 1.246e-02, 1.935e-01, -1.178e-01, 1.601e-01, 4.687e-02, 4.043e-02, 8.424e-03, 7.338e-03, -1.905e-01, 1.704e-01, 1.178e-01, -3.089e-02, -1.439e-02, -1.412e-01, -1.774e-01)); + r += mul(s4_5, M4(1.828e-02, -1.877e-03, 1.229e-02, 7.833e-02, 1.167e-02, 1.347e-01, -1.139e-02, -1.327e-02, -1.475e-01, 5.962e-02, -2.596e-01, -1.411e-01, 9.417e-02, 3.386e-02, -3.430e-02, -5.016e-02)); + r += mul(s4_6, M4(1.449e-02, 2.653e-03, 2.908e-02, 6.208e-02, 6.766e-02, 1.268e-01, 1.136e-01, 1.258e-01, -1.932e-02, 3.102e-02, -1.572e-02, 3.379e-02, 5.706e-02, -4.562e-02, 7.471e-02, 5.423e-02)); + r += mul(s4_7, M4(6.315e-02, -1.922e-02, -7.600e-02, -1.088e-01, 5.920e-02, 5.026e-02, 4.642e-02, 8.855e-02, 3.478e-02, -2.381e-01, -4.820e-02, -1.841e-01, 4.603e-02, 1.440e-02, -2.844e-01, -6.164e-02)); + r += mul(s4_8, M4(5.340e-02, 3.927e-02, -3.356e-02, 9.408e-02, 6.674e-02, 1.403e-01, -1.937e-02, -3.005e-02, -1.384e-01, -7.463e-02, -2.501e-01, -1.620e-01, -9.500e-03, -1.138e-01, -1.036e-01, 2.868e-02)); + r += mul(s5_0, M4(2.800e-01, -1.215e-01, -1.408e-01, -5.378e-01, 5.833e-03, 2.368e-02, -9.039e-03, -3.018e-01, 5.485e-02, 7.920e-02, 4.624e-02, -4.291e-02, 5.511e-02, -2.629e-02, -8.483e-02, -1.310e-02)); + r += mul(s5_1, M4(2.315e-01, 2.562e-01, 5.922e-02, 5.734e-01, 1.729e-01, 2.301e-02, -1.977e-01, 1.018e-01, -1.037e-01, -6.595e-02, 6.467e-02, -1.063e-01, -2.122e-02, -1.615e-01, 5.164e-02, 5.493e-02)); + r += mul(s5_2, M4(1.994e-02, -5.266e-02, -5.133e-02, -1.698e-01, 2.320e-01, 2.233e-01, -1.157e-01, 4.154e-01, -2.680e-02, -9.549e-02, 7.711e-03, 4.158e-02, 6.469e-02, 1.245e-01, -3.013e-02, -7.501e-04)); + r += mul(s5_3, M4(2.144e-01, 1.362e-01, -2.548e-02, -1.641e-01, -2.607e-02, 1.424e-01, -1.871e-01, -2.545e-01, 1.510e-01, 4.586e-02, -1.526e-01, 1.189e-01, 1.640e-02, -7.506e-02, -5.653e-03, -3.108e-03)); + r += mul(s5_4, M4(-1.110e-01, -8.102e-02, 1.704e-01, 1.739e-01, 8.720e-02, -2.242e-02, -1.892e-01, -3.763e-01, -1.164e-01, -4.511e-02, 1.446e-01, -4.661e-03, -3.799e-02, -1.243e-01, -8.787e-03, -1.173e-01)); + r += mul(s5_5, M4(2.429e-01, -4.189e-03, -1.239e-01, 7.303e-02, -3.365e-04, 2.011e-01, -1.538e-01, -4.140e-02, -1.297e-01, -6.552e-02, -3.407e-04, 5.280e-02, -1.188e-01, 1.448e-02, 6.314e-02, 6.178e-02)); + r += mul(s5_6, M4(2.389e-01, 3.353e-02, 9.744e-02, -4.271e-01, -3.359e-01, 1.520e-01, 7.034e-02, 6.372e-02, 1.122e-01, -6.739e-02, -1.923e-01, 7.455e-02, -3.936e-02, -2.021e-02, 1.964e-02, 1.082e-01)); + r += mul(s5_7, M4(2.993e-01, 9.326e-02, 2.733e-02, 2.784e-01, -8.332e-02, 2.112e-01, 1.602e-01, 6.696e-02, -9.538e-03, -1.055e-01, -1.819e-01, 5.099e-02, 1.543e-01, -6.303e-02, -1.475e-01, 1.866e-01)); + r += mul(s5_8, M4(-4.296e-02, 7.301e-02, -1.854e-01, 1.012e-01, -1.827e-02, 1.379e-01, 2.076e-02, 4.075e-01, -9.882e-02, -5.355e-02, -2.266e-01, -2.860e-02, 1.387e-01, -6.984e-02, 1.198e-02, 4.958e-02)); + r += mul(s6_0, M4(-4.199e-02, -6.949e-02, 6.321e-03, 1.020e-01, 1.012e-01, 1.717e-02, -1.264e-01, -1.463e-03, 1.513e-01, 3.324e-01, 2.602e-01, 5.010e-01, -9.360e-02, 6.098e-02, 5.095e-02, 1.808e-01)); + r += mul(s6_1, M4(-2.671e-02, 3.535e-02, -3.235e-02, 1.113e-02, -5.662e-02, -2.013e-02, -3.218e-02, -1.404e-01, -2.585e-01, 3.519e-01, 3.055e-02, 1.426e+00, -5.909e-02, -8.276e-02, 1.303e-01, 4.591e-02)); + r += mul(s6_2, M4(2.104e-02, -1.997e-02, -1.282e-02, -1.288e-01, -4.834e-02, -8.819e-03, -1.001e-01, 1.190e-01, -6.097e-01, 4.538e-01, 3.375e-02, 7.287e-01, -4.996e-02, 1.007e-02, -6.780e-02, -9.774e-02)); + r += mul(s6_3, M4(8.145e-02, -1.226e-02, -2.695e-01, -1.840e-01, -2.791e-03, 3.953e-02, 5.544e-02, 1.626e-01, -8.656e-02, 8.049e-01, 8.527e-02, 1.007e+00, 1.289e-01, 6.034e-02, 5.650e-02, 5.077e-02)); + r += mul(s6_4, M4(-5.059e-02, -5.942e-02, -1.618e-01, -1.326e-01, 4.363e-02, -8.956e-02, -1.675e-02, 5.685e-02, -7.273e-02, 7.846e-01, -1.943e-01, 9.296e-01, -1.506e-02, -6.739e-04, -1.322e-01, -5.708e-02)); + r += mul(s6_5, M4(-5.501e-02, 7.983e-02, 8.777e-02, 6.892e-02, 2.920e-02, -7.697e-02, 1.084e-01, 8.862e-02, -2.660e-01, 6.913e-01, -4.073e-01, 9.941e-01, -1.284e-01, 5.222e-03, -3.999e-02, 8.610e-03)); + r += mul(s6_6, M4(6.583e-02, 4.271e-02, 2.569e-01, 8.534e-02, -1.704e-01, 5.589e-02, -3.546e-03, -1.523e-01, -1.311e-01, 8.730e-01, 3.699e-02, 6.776e-01, 6.118e-02, 1.445e-02, 1.026e-02, 2.652e-02)); + r += mul(s6_7, M4(1.279e-02, -1.900e-03, 7.960e-02, -1.289e-01, 8.040e-02, 2.170e-02, 7.302e-02, -1.060e-01, 1.450e-01, 7.483e-01, 9.028e-02, 1.651e-01, -2.201e-01, 9.073e-02, 2.536e-01, -1.352e-01)); + r += mul(s6_8, M4(-2.910e-02, 4.179e-03, 1.675e-02, -3.174e-02, -2.982e-02, -2.929e-02, 1.555e-03, 2.411e-03, -3.180e-01, 9.460e-01, 1.352e-01, -1.250e-01, -2.457e-02, 6.235e-02, -4.464e-02, 7.891e-03)); + r += mul(s7_0, M4(-6.169e-02, -1.517e-01, -1.996e-02, 1.295e-01, -2.849e-02, -1.885e-01, -1.792e-01, -8.735e-02, -8.643e-02, -1.027e-01, -2.212e-02, 1.958e-02, -9.077e-02, 1.214e-01, -8.204e-02, -1.169e-01)); + r += mul(s7_1, M4(-2.212e-01, -7.720e-02, 1.765e-02, -3.732e-01, 1.117e-01, -2.174e-01, 1.636e-01, -8.238e-02, 2.783e-02, 2.234e-02, -1.158e-01, -1.121e-01, 7.623e-02, -8.504e-02, -1.329e-01, 9.974e-02)); + r += mul(s7_2, M4(-6.344e-02, 2.761e-02, -1.002e-01, -8.684e-02, 2.075e-01, 1.503e-01, -1.899e-01, 8.479e-03, 1.561e-02, -6.885e-02, 1.325e-02, -4.810e-02, -5.711e-02, -1.432e-02, -6.513e-02, -7.492e-02)); + r += mul(s7_3, M4(-2.427e-01, 2.396e-01, 8.939e-02, 4.002e-01, 1.633e-01, 7.055e-02, 2.189e-01, -6.663e-02, -1.279e-02, 4.636e-02, -8.664e-03, 1.132e-02, -2.329e-01, 6.385e-02, -8.100e-02, 2.979e-02)); + r += mul(s7_4, M4(-2.821e-02, 5.042e-02, 1.950e-01, 2.954e-02, 2.501e-01, 8.593e-02, 1.409e-02, 2.578e-01, -1.368e-01, 1.928e-01, -6.146e-02, -7.787e-02, -8.456e-02, -1.480e-01, -2.411e-02, 3.021e-02)); + r += mul(s7_5, M4(-1.217e-01, 1.643e-01, -3.103e-02, -7.779e-02, -1.296e-01, 1.216e-01, 1.362e-01, 2.597e-01, 6.748e-02, -9.254e-04, -4.988e-02, -1.005e-01, -5.144e-02, -1.559e-01, -8.093e-02, -7.864e-02)); + r += mul(s7_6, M4(-1.545e-01, 1.825e-01, 2.664e-01, 3.557e-01, -2.919e-01, 3.683e-02, 1.001e-01, -5.162e-03, -5.217e-02, -1.259e-01, 3.705e-02, 6.868e-02, 2.186e-01, -6.010e-02, -1.280e-01, -8.277e-02)); + r += mul(s7_7, M4(1.190e-01, 2.876e-02, 1.043e-02, 4.347e-01, 5.788e-02, -8.798e-02, 1.988e-01, -1.649e-01, 1.316e-02, 3.971e-02, -6.393e-03, 1.290e-01, -2.058e-03, -1.706e-01, 1.685e-01, 2.529e-01)); + r += mul(s7_8, M4(-7.538e-02, 1.036e-01, -1.008e-01, 2.185e-01, -3.579e-02, 2.764e-01, 1.386e-03, -1.145e-01, -2.264e-02, -2.090e-02, 5.644e-02, -1.025e-01, -1.019e-01, 1.294e-01, -2.593e-02, 8.675e-02)); + r += V4(-7.017e-03, -3.288e-02, -1.861e-02, -1.819e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-7.581e-03, 5.764e-02, 6.289e-02, 1.514e-02, 1.221e-01, 4.928e-01, -5.057e-02, -1.483e-02, -1.792e-01, 8.489e-03, -1.431e-01, 5.868e-02, -3.492e-02, -7.226e-02, -2.105e-01, -1.764e-01)); + r += mul(s0_1, M4(-6.464e-02, 1.506e-01, 1.133e-01, -3.755e-02, -2.756e-01, 5.449e-02, -7.036e-02, -1.405e-01, -1.942e-01, -2.099e-01, 2.038e-02, -1.083e-01, -1.265e-01, 5.352e-04, -8.953e-02, 1.424e-01)); + r += mul(s0_2, M4(1.567e-01, 6.428e-03, -5.115e-02, -1.072e-01, -2.739e-01, 1.045e-01, 8.730e-02, -8.456e-02, -4.602e-02, -2.847e-01, 1.083e-01, -1.294e-01, -1.015e-01, -1.025e-02, 1.041e-02, -1.304e-01)); + r += mul(s0_3, M4(-4.572e-02, -1.455e-02, 5.614e-02, -2.425e-02, 2.413e-01, -1.611e-01, 1.586e-02, -8.876e-02, -1.026e-01, -1.917e-01, -2.480e-03, 2.035e-01, 6.331e-03, -3.646e-01, -1.175e-01, 2.484e-01)); + r += mul(s0_4, M4(1.387e-01, 4.597e-02, -1.948e-01, -1.308e-01, 1.082e-01, 3.149e-01, -1.774e-01, 6.195e-02, -2.746e-01, -7.085e-02, -3.781e-01, -5.056e-02, -2.648e-01, -3.117e-01, 4.133e-02, -8.854e-02)); + r += mul(s0_5, M4(-1.232e-01, 7.906e-02, -1.870e-01, 2.458e-02, -5.047e-02, -1.065e-01, 3.048e-02, -4.759e-02, -2.323e-01, -2.952e-02, -2.927e-01, -1.625e-01, -3.878e-02, -1.678e-02, -1.532e-01, 1.148e-01)); + r += mul(s0_6, M4(1.387e-02, -2.661e-02, -2.866e-04, 5.459e-02, -3.236e-01, 1.805e-02, -1.612e-01, -1.243e-01, -1.931e-01, -8.690e-02, 1.062e-01, -2.763e-01, -1.862e-01, 1.799e-01, -7.901e-02, 1.175e-02)); + r += mul(s0_7, M4(-2.899e-02, -2.938e-02, 8.832e-02, -9.537e-04, -3.940e-02, 2.951e-03, 1.549e-01, 7.872e-02, -8.307e-02, -9.757e-02, -6.711e-02, 1.056e-02, 2.290e-01, 2.113e-01, -4.343e-02, -3.240e-01)); + r += mul(s0_8, M4(-4.447e-02, -4.737e-02, -8.591e-02, -1.686e-01, 2.065e-02, 2.123e-01, 1.514e-01, -1.460e-01, -6.031e-02, 1.517e-01, -1.982e-01, -1.426e-01, -4.185e-02, -1.391e-01, -1.208e-01, -3.198e-04)); + r += mul(s1_0, M4(-2.046e-02, 4.678e-03, 2.224e-01, 3.020e-01, -3.632e-02, -7.535e-02, 1.002e-02, -8.862e-02, -6.067e-03, -1.110e-01, 5.225e-02, 1.497e-01, -1.390e-01, -1.191e-01, -2.232e-02, -6.692e-03)); + r += mul(s1_1, M4(-1.366e-01, -5.478e-02, 2.148e-01, 2.632e-01, -2.041e-01, -2.066e-01, -2.040e-01, -1.025e-01, 7.493e-02, 3.911e-02, -1.173e-04, 7.517e-02, 3.558e-02, 9.945e-02, 6.047e-02, 4.600e-02)); + r += mul(s1_2, M4(4.576e-01, 1.579e-01, -3.173e-02, -2.815e-01, -1.532e-01, 2.862e-02, 5.831e-02, 1.509e-01, 3.045e-02, 9.582e-02, 1.986e-02, -6.961e-02, 1.850e-02, -1.108e-01, 4.282e-02, -3.387e-02)); + r += mul(s1_3, M4(1.732e-01, 1.267e-01, 4.647e-01, 2.656e-02, 2.940e-01, -1.993e-01, -4.426e-02, 9.008e-02, 8.253e-02, 7.108e-03, -1.494e-01, -1.288e-01, 9.136e-02, 3.714e-02, 2.129e-02, -1.684e-01)); + r += mul(s1_4, M4(-5.636e-02, -2.412e-01, 4.480e-01, -2.122e-01, 2.270e-01, 3.985e-01, -2.752e-01, 9.774e-02, -3.704e-03, 2.587e-01, 1.135e-02, 3.374e-02, -1.561e-01, 1.880e-02, 1.153e-01, -1.707e-01)); + r += mul(s1_5, M4(7.268e-02, -3.111e-01, -8.362e-02, -1.898e-01, 3.023e-02, 1.225e-01, 4.258e-02, -9.576e-03, -8.835e-02, 7.056e-02, 6.475e-02, -1.310e-01, 1.088e-02, -2.065e-02, -1.054e-01, -1.411e-01)); + r += mul(s1_6, M4(-8.884e-02, 2.483e-01, -6.000e-02, 1.005e-01, -1.314e-01, -3.988e-02, -1.323e-01, 1.256e-01, -1.535e-03, -2.209e-02, -3.661e-02, 6.688e-02, 5.027e-02, 3.482e-02, 6.547e-03, 1.188e-01)); + r += mul(s1_7, M4(6.344e-02, 1.579e-01, 1.679e-01, 2.667e-01, 1.531e-01, -2.901e-02, 1.724e-01, 2.930e-01, -1.711e-02, 8.647e-02, 1.046e-01, 1.191e-02, 1.342e-01, 8.340e-02, 8.214e-03, 2.079e-01)); + r += mul(s1_8, M4(-3.021e-01, 1.907e-02, -2.016e-01, 1.267e-01, -2.294e-02, -5.400e-02, 9.827e-02, -5.517e-02, -5.363e-02, -4.889e-02, 6.296e-02, -9.737e-03, 3.925e-02, 5.326e-02, 7.340e-03, -7.322e-04)); + r += mul(s2_0, M4(-1.444e-01, 1.028e-01, -1.690e-01, 4.507e-03, -3.979e-01, -5.055e-01, 8.414e-02, -2.992e-01, -1.153e-01, 1.809e-01, 4.605e-02, -5.255e-02, 1.754e-01, 2.028e-02, 8.436e-02, 2.540e-02)); + r += mul(s2_1, M4(-1.711e-01, -4.863e-02, -6.410e-02, -1.019e-01, -4.134e-01, 4.277e-02, -2.486e-01, -3.703e-01, -2.274e-01, -2.226e-01, 1.523e-01, 2.370e-01, -4.884e-02, -1.029e-01, -1.104e-01, 8.226e-02)); + r += mul(s2_2, M4(-8.140e-02, 1.081e-01, 1.777e-01, 1.003e-02, -3.272e-01, 9.784e-03, 2.820e-01, -8.613e-03, 9.318e-02, -3.326e-01, -3.971e-02, 1.246e-01, 1.257e-01, -6.054e-02, -6.647e-02, 7.432e-02)); + r += mul(s2_3, M4(1.508e-01, -8.128e-02, -5.221e-02, -6.611e-02, 3.457e-01, 4.610e-01, -4.847e-01, -7.489e-01, -4.416e-02, 1.110e-02, 1.604e-02, -1.602e-01, 2.462e-02, -1.182e-01, -1.151e-02, -5.689e-03)); + r += mul(s2_4, M4(1.936e-01, 6.475e-02, 4.692e-03, 1.287e-01, 1.099e-01, -6.930e-04, -6.362e-01, -8.032e-01, 3.933e-03, 6.387e-02, -1.938e-01, -5.386e-02, -1.930e-01, -1.201e-01, -5.197e-02, 2.089e-02)); + r += mul(s2_5, M4(1.472e-01, 1.752e-02, 1.642e-01, 5.075e-02, -1.939e-01, -1.430e-02, -2.506e-01, -2.485e-01, -1.068e-01, -1.327e-01, -6.546e-02, -2.278e-01, 2.349e-02, 3.054e-02, 1.091e-01, -1.302e-01)); + r += mul(s2_6, M4(-8.224e-02, 3.867e-02, -4.681e-02, -6.327e-02, 3.796e-01, -2.604e-01, -1.010e-01, -2.640e-01, 1.778e-01, 2.419e-02, 1.026e-01, -6.207e-02, -7.240e-03, -1.114e-02, 1.119e-01, 1.810e-02)); + r += mul(s2_7, M4(-5.650e-04, 1.023e-02, -8.498e-03, -1.265e-02, 1.975e-01, -1.816e-01, -1.205e-01, -3.322e-01, -1.140e-01, 1.550e-01, 3.409e-02, 1.268e-01, -1.322e-02, 5.101e-02, -2.385e-02, -1.287e-01)); + r += mul(s2_8, M4(-1.391e-01, 8.706e-02, 2.942e-01, 1.300e-01, 4.721e-02, -3.099e-01, -2.001e-01, -5.063e-01, 4.158e-02, 4.024e-02, -1.037e-01, 2.470e-02, -2.557e-02, 1.965e-02, -2.441e-02, 9.164e-02)); + r += mul(s3_0, M4(1.647e-01, -1.805e-02, -3.406e-02, -1.294e-01, -1.415e-01, -4.049e-02, 2.216e-02, 1.185e-01, 1.929e-02, -5.613e-02, 8.207e-02, 6.108e-02, 3.670e-02, -2.089e-01, -7.717e-02, 2.253e-01)); + r += mul(s3_1, M4(-7.123e-02, -3.315e-01, -4.448e-02, -5.631e-02, -3.428e-02, -1.262e-01, -4.484e-02, -1.613e-02, -2.587e-03, -1.097e-01, 1.216e-01, -1.016e-01, 5.639e-02, -4.027e-01, 5.756e-02, -1.267e-02)); + r += mul(s3_2, M4(-7.166e-02, 2.423e-01, 3.399e-02, 6.326e-02, -1.613e-01, 5.102e-02, 3.494e-02, 7.078e-03, -8.212e-02, 5.988e-02, -3.490e-02, -8.170e-02, -2.060e-01, 3.550e-02, 7.665e-02, -6.092e-03)); + r += mul(s3_3, M4(3.093e-02, -6.196e-02, -1.018e-01, 1.027e-02, 2.010e-03, -2.233e-02, -3.281e-02, -7.155e-02, 1.719e-01, -2.694e-02, 3.917e-03, -3.190e-02, -2.370e-01, 2.511e-01, 2.929e-02, 8.000e-02)); + r += mul(s3_4, M4(-8.777e-02, 9.786e-02, 4.368e-02, 5.115e-02, 1.074e-01, 4.259e-02, 1.114e-01, 2.402e-01, 2.019e-02, 1.234e-01, -1.880e-01, -6.301e-04, -2.513e-01, -8.639e-03, -7.225e-02, 1.418e-01)); + r += mul(s3_5, M4(-2.268e-01, 2.029e-01, -1.185e-01, -5.495e-02, -1.163e-01, 4.372e-02, 8.053e-02, -1.812e-02, -9.069e-02, -3.884e-02, 8.661e-02, 2.608e-01, 1.140e-01, -7.601e-03, 5.186e-02, 3.865e-02)); + r += mul(s3_6, M4(2.052e-02, 4.939e-03, 1.158e-01, -6.508e-02, 8.047e-02, -4.546e-02, 3.617e-02, -1.138e-01, -2.398e-02, -6.332e-02, -1.038e-01, 8.081e-02, -2.302e-01, 8.088e-02, 3.060e-01, 2.446e-01)); + r += mul(s3_7, M4(1.062e-01, 1.106e-01, 1.035e-02, 1.028e-01, 1.072e-01, -3.130e-02, 1.465e-01, -1.049e-01, -4.924e-02, 1.304e-02, -9.063e-02, 6.612e-02, 4.834e-01, -1.259e-01, 7.290e-02, 4.857e-01)); + r += mul(s3_8, M4(1.174e-02, 5.546e-02, 1.373e-01, 7.799e-02, -1.851e-02, -6.496e-03, -1.534e-02, -1.367e-02, 5.389e-02, 8.490e-02, 6.008e-02, -2.141e-02, -1.899e-01, 8.971e-02, 1.200e-01, -4.990e-01)); + r += mul(s4_0, M4(-7.002e-03, 5.508e-02, 1.741e-02, 6.846e-02, 4.935e-02, 1.226e-01, 5.007e-02, -2.383e-02, -1.428e-01, 8.378e-02, -5.937e-02, -1.099e-01, 2.281e-01, -2.436e-02, 1.209e-01, -3.198e-02)); + r += mul(s4_1, M4(-7.399e-02, -4.566e-02, -7.268e-02, -1.275e-02, -5.746e-02, 7.156e-02, 4.005e-02, 1.183e-01, 5.846e-02, 2.354e-01, -3.516e-02, -5.764e-02, 1.953e-01, 3.119e-02, 1.454e-01, -1.406e-02)); + r += mul(s4_2, M4(3.127e-02, -8.150e-02, 8.277e-02, 1.413e-01, 5.123e-02, 2.205e-02, 3.230e-02, 5.683e-02, 4.894e-03, -1.257e-01, 9.340e-02, 2.224e-02, 1.370e-01, 7.398e-02, -6.368e-03, 1.608e-02)); + r += mul(s4_3, M4(6.881e-02, -3.375e-03, 1.056e-01, -5.304e-02, -1.064e-01, -4.148e-02, 8.624e-02, -4.904e-02, -7.752e-03, -7.298e-02, -7.932e-04, -1.262e-01, 2.048e-01, 3.475e-02, -1.271e-01, 1.224e-02)); + r += mul(s4_4, M4(2.041e-04, -4.132e-02, -2.237e-01, -3.584e-02, 3.167e-02, -3.019e-03, 2.618e-02, -5.664e-02, 1.946e-01, 1.340e-01, -1.109e-01, -2.126e-01, 3.268e-01, 2.296e-01, -9.041e-02, 1.938e-01)); + r += mul(s4_5, M4(-3.432e-02, 1.126e-01, -1.291e-02, 1.289e-01, 3.903e-02, -1.367e-01, 6.517e-02, -5.216e-04, -4.940e-02, 1.556e-01, -2.347e-02, -1.418e-01, 2.151e-01, 2.646e-01, -6.822e-02, 4.200e-02)); + r += mul(s4_6, M4(-2.471e-02, 7.440e-02, -5.017e-02, -1.015e-01, -6.059e-02, -3.496e-02, 4.484e-02, -9.797e-03, 2.390e-01, 4.988e-02, 6.820e-02, 1.513e-01, -4.653e-02, 7.645e-02, 1.273e-01, 2.260e-02)); + r += mul(s4_7, M4(3.388e-02, -1.471e-01, 4.271e-02, -2.965e-05, -6.814e-02, -4.665e-04, -4.201e-02, -4.251e-02, 3.050e-01, 1.337e-01, -1.464e-01, 8.366e-02, 8.331e-03, 2.890e-02, -4.571e-03, 8.624e-02)); + r += mul(s4_8, M4(1.735e-02, -4.533e-02, 5.140e-02, -1.281e-01, -8.486e-03, 4.054e-03, -8.573e-03, -1.170e-02, -1.231e-01, -1.433e-01, 9.381e-02, 3.782e-02, 2.768e-02, -7.110e-02, 5.206e-02, 8.258e-02)); + r += mul(s5_0, M4(-2.181e-01, 6.178e-02, -8.720e-02, -1.409e-01, 1.476e-01, 3.411e-01, 9.144e-02, 6.769e-02, -8.762e-02, -7.521e-02, 2.938e-02, -1.200e-02, 2.207e-02, 1.687e-01, -3.703e-02, -3.066e-02)); + r += mul(s5_1, M4(9.949e-02, 3.700e-01, 1.149e-01, 2.050e-01, 2.727e-02, 9.261e-02, 9.753e-02, 3.320e-01, -6.206e-03, -8.850e-02, -1.404e-01, 2.072e-02, 7.878e-03, 1.309e-01, 8.403e-03, -1.107e-01)); + r += mul(s5_2, M4(-2.403e-01, 2.991e-02, 2.990e-01, -2.211e-01, 4.919e-02, 2.493e-01, 8.766e-02, 2.302e-01, -7.887e-02, 9.146e-02, 4.180e-02, 1.044e-01, 3.087e-02, -4.650e-02, 2.782e-03, -9.519e-02)); + r += mul(s5_3, M4(-6.979e-02, -2.377e-01, 1.708e-01, -1.276e-02, 1.923e-02, 8.070e-02, 3.884e-01, 8.077e-02, 2.241e-01, 1.601e-02, -1.076e-01, -1.517e-02, -1.445e-01, -1.506e-01, -6.133e-02, -1.256e-02)); + r += mul(s5_4, M4(4.506e-01, 7.498e-01, -3.484e-02, -4.042e-02, -9.386e-02, -1.795e-01, 1.740e-01, 4.823e-01, -5.567e-02, 3.168e-02, 1.078e-01, 8.777e-02, 1.062e-01, 8.114e-02, -1.237e-01, -1.974e-03)); + r += mul(s5_5, M4(-1.471e-01, -6.964e-02, -1.597e-02, 3.554e-02, 6.958e-02, 7.652e-02, 4.270e-02, 4.024e-01, -1.123e-01, 1.554e-03, 1.842e-01, -1.366e-01, 2.237e-02, 5.522e-02, -9.837e-02, -3.022e-02)); + r += mul(s5_6, M4(5.915e-02, -1.744e-01, -4.662e-04, 2.549e-01, -1.787e-01, 4.083e-01, 3.959e-01, 2.818e-01, 2.729e-01, -8.656e-02, -4.996e-02, -2.870e-04, 5.173e-03, -1.345e-01, 2.743e-02, 7.207e-02)); + r += mul(s5_7, M4(-4.325e-02, -1.319e-01, 3.855e-01, 2.217e-01, -3.173e-01, -8.804e-02, 1.035e-01, 5.375e-01, 4.128e-02, -3.985e-03, 7.725e-02, -1.015e-01, -2.278e-02, -1.185e-01, 2.272e-01, -1.947e-01)); + r += mul(s5_8, M4(-4.690e-03, -7.318e-02, 1.432e-01, 4.873e-01, 4.692e-02, 2.026e-01, 1.888e-01, 4.836e-01, 4.146e-02, 8.686e-03, 1.694e-01, 5.939e-02, -8.867e-03, -9.594e-02, -6.524e-02, -1.929e-01)); + r += mul(s6_0, M4(1.568e-01, -2.073e-02, 8.341e-02, -2.046e-01, -9.280e-03, 2.334e-02, -2.938e-02, -5.672e-02, 4.066e-01, 2.305e-01, -1.271e-02, 6.631e-01, 1.497e-01, 4.212e-02, 8.599e-02, -7.885e-02)); + r += mul(s6_1, M4(2.515e-01, -9.740e-02, 3.432e-02, -1.194e-01, -9.355e-02, 3.657e-02, -5.205e-02, 8.594e-02, 5.878e-02, 7.739e-01, -1.422e-01, 3.957e-01, 1.517e-01, 3.349e-02, 9.690e-02, -7.139e-03)); + r += mul(s6_2, M4(1.954e-02, -1.181e-01, 4.747e-02, 1.045e-01, 4.518e-02, 1.305e-01, -2.697e-02, 6.527e-02, 7.863e-02, 8.908e-01, -1.274e-01, 4.605e-01, 1.267e-02, 9.088e-02, -1.163e-01, -5.796e-03)); + r += mul(s6_3, M4(1.233e-02, -6.354e-02, 3.720e-02, 7.892e-02, -5.153e-02, 1.068e-01, -1.965e-02, 8.320e-02, 8.402e-01, 4.660e-01, -3.976e-01, 4.275e-01, -2.392e-02, -1.018e-01, -6.422e-02, 5.182e-02)); + r += mul(s6_4, M4(-2.903e-02, 3.879e-02, 2.918e-02, 8.715e-02, 4.102e-02, -1.282e-01, 1.751e-01, 8.530e-02, 4.392e-01, 1.070e+00, -6.528e-01, 9.735e-02, 2.811e-02, -6.109e-02, -1.582e-01, 1.365e-01)); + r += mul(s6_5, M4(-2.513e-02, 1.878e-02, -2.251e-02, -2.193e-02, 1.118e-01, 1.221e-02, -1.193e-01, -4.231e-02, 3.048e-01, 9.500e-01, -5.301e-01, 6.359e-01, 1.858e-02, -3.289e-02, -1.076e-01, 4.126e-02)); + r += mul(s6_6, M4(-4.805e-02, 1.302e-01, 9.917e-02, -1.466e-02, -7.287e-02, -2.519e-02, 3.926e-02, -9.813e-02, -9.269e-02, 4.149e-01, -4.088e-01, 9.631e-01, 9.875e-02, -9.482e-02, 5.801e-02, 4.301e-02)); + r += mul(s6_7, M4(-9.555e-02, -8.370e-02, -6.806e-02, 2.162e-02, -1.121e-02, -7.891e-02, -4.695e-02, -5.281e-03, -3.754e-01, -2.820e-01, 1.180e-02, 1.720e-01, -3.990e-02, 1.057e-01, -5.257e-02, 1.805e-02)); + r += mul(s6_8, M4(-2.579e-02, -1.194e-01, -8.538e-03, 1.739e-01, 3.480e-02, -4.258e-02, 4.300e-02, 2.388e-02, 5.042e-02, 2.960e-01, 1.991e-01, 1.114e+00, -5.936e-03, -3.309e-02, -1.775e-03, -1.074e-02)); + r += mul(s7_0, M4(-4.948e-01, -5.235e-01, -5.783e-02, 1.154e-01, 3.577e-02, 2.507e-01, -7.143e-02, -4.314e-01, -6.553e-03, -4.330e-02, 3.033e-02, -3.339e-02, -1.357e-01, 8.840e-02, -1.361e-01, -3.746e-02)); + r += mul(s7_1, M4(-3.641e-01, 5.871e-02, -8.952e-02, 4.781e-01, -1.201e-01, -2.183e-01, -1.605e-01, -4.684e-01, 1.597e-01, -1.616e-01, 1.642e-02, -2.894e-02, -1.706e-01, -4.990e-02, 2.802e-02, 6.643e-02)); + r += mul(s7_2, M4(-4.576e-02, 1.907e-01, 1.280e-01, -1.856e-02, 1.723e-01, -3.408e-01, -1.564e-02, -6.472e-02, 1.314e-01, -7.113e-02, -7.765e-02, 8.551e-02, 5.081e-02, -3.404e-01, -1.670e-01, -3.953e-02)); + r += mul(s7_3, M4(-1.001e-01, -4.048e-01, -1.702e-01, 1.433e-01, -1.750e-02, -1.612e-02, 1.528e-02, -1.222e-01, -3.094e-02, 7.821e-02, -2.875e-02, -4.723e-02, 1.174e-01, 5.041e-02, -3.062e-03, -1.971e-01)); + r += mul(s7_4, M4(-1.204e-01, -7.649e-02, -1.210e-01, 5.221e-02, 2.542e-01, 5.947e-02, 2.175e-01, 2.127e-01, -7.925e-02, 7.789e-02, 1.044e-01, 9.239e-02, -2.241e-01, 1.293e-01, 2.459e-01, -1.729e-01)); + r += mul(s7_5, M4(-1.291e-01, 1.958e-01, -1.094e-01, 1.646e-01, 1.794e-01, -1.150e-01, 1.523e-01, 1.430e-02, -3.675e-02, -5.951e-02, 1.841e-01, -7.548e-02, -7.641e-02, 7.561e-02, 1.775e-01, 9.342e-03)); + r += mul(s7_6, M4(-1.779e-01, 1.218e-01, -6.176e-02, 2.127e-01, -1.294e-01, -1.136e-01, 6.021e-02, 1.557e-02, 2.650e-02, 7.300e-02, 4.451e-02, 3.111e-03, 1.138e-01, 2.764e-02, -7.746e-02, 3.992e-02)); + r += mul(s7_7, M4(-1.950e-01, -2.336e-02, -1.849e-01, 6.938e-02, -1.039e-01, -3.789e-02, 3.041e-02, -2.017e-01, -1.126e-01, -6.084e-02, 1.352e-01, 1.157e-02, -6.909e-02, 5.668e-02, 2.388e-01, 7.445e-03)); + r += mul(s7_8, M4(2.975e-02, -7.086e-02, -5.371e-02, -1.105e-02, -1.465e-01, -8.496e-02, 6.396e-02, 2.739e-01, 2.182e-03, 4.118e-02, -1.874e-01, -7.968e-02, -3.285e-02, -3.565e-02, 1.240e-02, 8.849e-02)); + r += V4(-1.179e-02, -2.914e-02, 6.564e-02, 3.234e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.753e-02, 1.160e-01, -1.269e-01, 9.143e-02, 1.019e-01, 7.000e-02, -6.194e-02, -5.348e-02, 5.273e-01, 4.315e-02, -2.275e-01, 1.919e-01, -3.212e-01, -2.432e-01, -1.944e-01, 2.996e-01)); + r += mul(s0_1, M4(-4.951e-02, 1.357e-03, 1.031e-01, -4.953e-02, -1.417e-01, -1.463e-01, -6.715e-02, -1.217e-01, -6.400e-01, -5.599e-02, -2.127e-01, 3.810e-02, -8.862e-03, 2.585e-02, -3.099e-02, 1.269e-01)); + r += mul(s0_2, M4(-8.903e-02, -1.431e-01, 1.724e-01, 8.231e-03, 6.244e-02, 4.866e-02, 8.680e-02, 1.017e-01, -2.014e-01, -7.754e-02, -3.044e-03, -2.485e-01, -2.124e-01, 7.233e-02, -9.456e-02, 1.639e-01)); + r += mul(s0_3, M4(1.051e-02, 6.900e-02, -3.804e-02, 7.057e-02, -2.420e-01, 1.190e-01, -1.950e-02, 9.890e-02, 1.897e-02, -1.311e-01, -2.827e-01, 2.823e-01, 5.921e-02, 3.560e-01, -3.249e-01, -1.611e-01)); + r += mul(s0_4, M4(6.422e-02, -3.729e-02, -1.152e-01, 6.029e-02, 2.641e-01, -1.811e-01, -1.823e-01, 6.399e-03, -2.417e-01, -1.529e-01, -5.156e-02, 2.731e-01, 3.111e-01, 5.217e-02, 2.734e-02, -4.023e-04)); + r += mul(s0_5, M4(5.829e-02, -5.505e-02, -1.138e-01, -4.952e-02, -6.139e-02, 2.337e-01, -1.819e-01, 8.360e-02, 4.169e-02, -3.438e-02, 2.903e-02, 2.498e-01, 6.296e-02, -4.415e-02, 1.939e-02, 7.194e-02)); + r += mul(s0_6, M4(1.292e-01, 4.660e-02, -1.532e-02, -9.872e-03, 4.496e-02, 2.380e-01, 1.217e-01, -1.109e-01, -5.028e-02, 1.675e-01, -1.568e-01, 6.519e-02, -1.263e-01, -1.036e-01, 1.377e-02, -1.131e-01)); + r += mul(s0_7, M4(-1.659e-02, -7.879e-02, -2.418e-02, -3.019e-02, -1.356e-01, 4.405e-02, 2.832e-01, -3.750e-02, -1.432e-01, -8.256e-02, 1.373e-02, 3.424e-02, -1.050e-01, 1.883e-01, 4.134e-02, 7.965e-02)); + r += mul(s0_8, M4(2.675e-02, -5.278e-02, -1.958e-01, -2.066e-02, -1.344e-01, 2.913e-02, 7.606e-02, -2.964e-02, -1.313e-01, -9.613e-03, -1.319e-01, 1.505e-02, 1.136e-01, 1.523e-01, -1.058e-01, 4.408e-02)); + r += mul(s1_0, M4(-3.900e-01, 2.456e-01, -4.285e-02, 2.658e-01, -6.556e-02, -3.491e-02, 5.756e-02, 1.538e-01, 1.134e-01, -2.795e-02, 7.565e-03, -7.198e-02, 9.609e-02, -1.510e-01, -5.964e-02, -2.881e-01)); + r += mul(s1_1, M4(1.668e-01, 2.787e-01, 4.333e-02, -2.974e-01, 1.094e-01, -2.211e-01, -8.154e-02, -2.189e-01, 1.551e-01, -3.197e-02, -8.719e-02, -1.484e-01, 4.442e-02, 7.316e-02, -2.625e-02, -8.610e-02)); + r += mul(s1_2, M4(5.845e-02, -1.389e-01, -3.456e-01, -1.044e-01, 3.292e-02, 1.487e-01, -7.203e-02, 8.379e-02, 3.166e-02, 3.898e-02, -5.670e-02, 2.292e-02, 6.872e-02, -4.522e-02, -1.448e-01, 6.844e-02)); + r += mul(s1_3, M4(-3.155e-01, -2.408e-01, -5.448e-02, 4.171e-02, -2.457e-01, -9.897e-02, 8.344e-02, -4.598e-02, 1.920e-02, 4.171e-02, -6.982e-02, 4.977e-02, -6.839e-02, 5.980e-03, 1.086e-01, -1.904e-01)); + r += mul(s1_4, M4(-2.159e-01, 2.080e-01, -9.792e-02, 3.998e-01, 2.786e-01, -1.400e-01, -2.510e-01, 3.333e-01, 2.438e-01, 1.157e-02, 4.421e-02, -2.785e-02, -1.499e-01, 1.087e-01, -1.468e-01, -6.480e-02)); + r += mul(s1_5, M4(1.131e-01, -8.773e-02, 1.201e-01, -3.160e-02, -2.395e-02, 2.306e-01, -2.283e-02, 3.290e-02, -5.039e-02, -1.410e-02, -4.766e-02, 8.466e-02, 9.510e-02, -9.014e-02, 1.913e-01, 4.092e-02)); + r += mul(s1_6, M4(-1.842e-01, -1.553e-01, 9.553e-02, 9.113e-02, -1.162e-01, 7.742e-02, -2.725e-03, -1.880e-02, -1.337e-01, 1.846e-02, 5.310e-02, -5.656e-02, -4.915e-02, 7.530e-03, 8.957e-02, 1.642e-01)); + r += mul(s1_7, M4(-4.356e-01, -1.068e-01, 1.175e-01, -1.185e-01, -1.105e-01, -7.094e-02, 2.674e-01, -5.722e-02, -1.175e-01, 3.593e-02, 5.452e-02, 5.885e-02, 8.733e-02, -2.635e-03, 5.368e-02, -6.987e-03)); + r += mul(s1_8, M4(8.769e-02, 1.854e-01, -1.942e-02, -4.662e-02, -5.551e-03, 4.066e-03, -1.519e-01, 8.958e-02, -6.454e-02, -1.071e-02, 4.913e-02, -7.078e-02, -4.383e-02, -1.590e-02, 6.319e-02, 1.095e-01)); + r += mul(s2_0, M4(-1.729e-01, -2.021e-02, -4.229e-02, 9.926e-02, -3.796e-01, -1.775e-01, -7.721e-01, 4.283e-01, -4.649e-02, -8.597e-02, 7.155e-02, -1.387e-01, -8.587e-02, 5.203e-02, 2.037e-01, 1.290e-01)); + r += mul(s2_1, M4(-9.028e-02, -1.608e-02, 4.919e-02, 4.651e-02, 7.455e-02, 3.268e-01, -1.519e-01, -3.667e-01, -9.927e-02, -2.468e-01, 2.008e-01, 2.132e-01, -1.325e-01, -8.896e-06, 1.261e-01, -4.802e-02)); + r += mul(s2_2, M4(-1.879e-02, 2.161e-01, -1.026e-01, 6.351e-02, -1.240e-01, -1.133e-01, -3.072e-01, -4.096e-01, -2.990e-02, 1.434e-01, -7.412e-02, -1.324e-01, 7.264e-02, 9.586e-02, -4.112e-02, 1.843e-02)); + r += mul(s2_3, M4(2.250e-02, 1.458e-02, -6.317e-02, 1.976e-01, 7.670e-01, -1.387e-01, 1.173e-01, 2.007e-01, -9.971e-02, -2.958e-03, 1.879e-01, -1.306e-01, 8.892e-03, -2.086e-02, 1.666e-01, 9.401e-03)); + r += mul(s2_4, M4(1.537e-01, 3.129e-02, 6.454e-02, -4.913e-02, 4.720e-01, -1.061e-01, -3.422e-01, 1.372e-01, 2.658e-01, -1.857e-01, -2.166e-01, -2.046e-01, 1.218e-01, -2.499e-02, -2.471e-02, -1.758e-01)); + r += mul(s2_5, M4(3.431e-01, 1.888e-01, -1.755e-01, -1.416e-01, 3.933e-01, -3.451e-01, 9.913e-03, 2.057e-01, 4.078e-02, 1.771e-01, 5.514e-02, 1.610e-01, 2.854e-02, 2.748e-02, -1.872e-01, 3.411e-02)); + r += mul(s2_6, M4(-1.003e-01, 8.879e-03, 2.002e-01, 7.435e-03, 2.879e-01, 1.927e-01, 5.846e-02, 5.309e-01, 2.174e-02, -5.902e-02, 2.934e-02, -2.275e-01, -4.843e-02, 3.327e-02, -2.839e-02, 4.788e-04)); + r += mul(s2_7, M4(-6.818e-03, 5.386e-02, 1.705e-01, 1.055e-01, -2.552e-01, 2.348e-01, -5.376e-01, 8.415e-01, 9.206e-02, -2.165e-01, -6.736e-03, 1.054e-01, -5.970e-02, -2.189e-02, 2.291e-02, 8.281e-02)); + r += mul(s2_8, M4(-9.683e-02, 7.370e-02, -1.486e-03, 5.930e-02, -1.231e-01, 5.585e-02, -2.850e-02, 3.915e-01, -1.038e-01, 2.451e-01, 9.171e-03, -8.992e-02, 1.089e-02, -5.349e-02, -7.425e-02, 2.983e-02)); + r += mul(s3_0, M4(6.806e-02, -4.765e-02, -4.098e-02, -1.450e-02, 1.719e-01, -1.118e-01, 7.612e-02, 2.340e-02, -3.503e-02, -1.497e-01, -1.946e-01, 6.818e-02, 2.024e-01, -8.164e-02, -1.750e-01, -1.483e-01)); + r += mul(s3_1, M4(-1.791e-01, -2.131e-01, -1.254e-02, 1.586e-02, 1.273e-01, 1.195e-01, -6.582e-02, 8.326e-02, 4.553e-02, -1.516e-02, 1.181e-01, -4.220e-02, -3.388e-03, 4.326e-02, -2.783e-01, 2.707e-02)); + r += mul(s3_2, M4(-8.434e-02, 1.290e-01, 1.284e-01, 5.182e-02, 1.122e-01, 5.347e-02, 1.215e-01, 1.662e-01, -9.834e-02, 8.082e-04, 1.051e-02, 5.248e-02, -9.915e-02, -2.159e-01, 5.827e-02, -1.049e-01)); + r += mul(s3_3, M4(1.346e-01, -8.483e-02, -1.476e-01, 2.748e-02, 1.791e-02, -4.909e-02, -1.327e-02, -6.284e-02, -4.048e-02, -2.921e-03, 2.202e-02, 8.188e-02, 3.256e-01, 1.427e-01, -9.825e-02, 2.977e-02)); + r += mul(s3_4, M4(-1.412e-01, -1.176e-01, 1.140e-01, 1.489e-01, -6.777e-02, -4.582e-02, 9.809e-02, 2.975e-02, 5.165e-02, -8.093e-02, -6.055e-02, 7.214e-02, 3.706e-01, -4.749e-01, -2.525e-02, -2.335e-01)); + r += mul(s3_5, M4(8.684e-02, 4.703e-02, -1.277e-01, 2.117e-01, -2.123e-02, 1.276e-01, -6.533e-02, 7.431e-02, -6.904e-02, 1.692e-01, 1.729e-02, 1.353e-01, -2.167e-01, 1.675e-01, 1.109e-01, 2.392e-01)); + r += mul(s3_6, M4(3.277e-02, -2.271e-02, -4.455e-02, -1.837e-02, -1.161e-01, -1.299e-01, -3.771e-02, -1.140e-01, -6.393e-02, 1.766e-02, 6.553e-03, -4.404e-02, 6.962e-02, 1.692e-01, -6.897e-02, -6.590e-05)); + r += mul(s3_7, M4(-1.314e-01, -1.961e-01, 8.045e-02, -1.115e-01, -8.862e-02, -4.149e-02, -2.506e-02, -1.571e-03, -6.118e-02, 6.175e-02, 1.056e-01, -1.248e-01, 1.067e-01, -1.729e-01, -8.977e-02, -1.716e-01)); + r += mul(s3_8, M4(-1.046e-01, 1.136e-01, -8.508e-03, 1.211e-01, -8.917e-02, 3.733e-02, 8.172e-02, -5.733e-03, 2.049e-01, 1.795e-01, -1.721e-01, -5.420e-02, 2.037e-01, 2.319e-02, 3.012e-01, -3.095e-01)); + r += mul(s4_0, M4(-6.174e-02, 1.823e-01, 6.577e-02, 7.931e-02, 5.619e-02, 6.599e-02, -2.907e-02, -1.747e-02, 2.922e-02, 4.411e-02, -2.690e-02, -2.129e-01, 2.936e-02, -2.412e-02, -1.066e-02, 3.026e-02)); + r += mul(s4_1, M4(-9.450e-02, -1.038e-01, -1.717e-02, 7.076e-02, -2.367e-01, 1.526e-02, -1.049e-02, 1.390e-02, 8.487e-02, -1.204e-02, -6.444e-02, 1.747e-01, -4.432e-02, -1.778e-01, 5.627e-02, -3.594e-02)); + r += mul(s4_2, M4(1.912e-02, -1.306e-02, -1.746e-01, -4.288e-02, -4.139e-02, 2.986e-02, -4.016e-02, -4.361e-02, 1.453e-01, 6.837e-02, -8.332e-02, -1.564e-01, -3.011e-02, 2.196e-02, -2.421e-02, -2.315e-01)); + r += mul(s4_3, M4(1.118e-01, 6.878e-02, 5.315e-02, 1.246e-01, 3.269e-02, 7.104e-02, 4.679e-02, 3.018e-03, -9.116e-02, -4.459e-02, 8.972e-02, -6.690e-02, -3.056e-02, 1.471e-01, 6.756e-02, 6.757e-02)); + r += mul(s4_4, M4(1.698e-01, -7.062e-02, 6.763e-02, -7.260e-02, -8.372e-02, 4.456e-02, 7.913e-02, -9.825e-02, 1.527e-01, -4.960e-02, -1.660e-01, 1.954e-01, 3.782e-02, -1.666e-01, 1.953e-02, 1.209e-01)); + r += mul(s4_5, M4(-9.017e-02, -1.464e-02, -1.876e-02, 1.001e-01, -1.053e-01, -7.557e-02, -1.448e-01, -9.475e-02, 1.274e-01, 3.928e-01, -4.408e-02, 1.633e-01, 4.701e-02, -2.134e-01, -1.312e-02, 1.216e-02)); + r += mul(s4_6, M4(4.620e-02, -1.978e-02, 6.714e-02, -6.474e-02, 1.636e-02, 9.339e-02, -4.837e-03, 1.756e-03, 8.870e-03, -5.370e-02, -5.992e-02, -1.971e-01, -9.497e-02, -2.736e-02, 6.098e-02, -1.200e-01)); + r += mul(s4_7, M4(-8.177e-02, -8.742e-03, -3.711e-02, 2.097e-02, 8.538e-02, 5.007e-02, -2.769e-02, 6.117e-02, 1.209e-01, -5.349e-02, 2.031e-01, -1.272e-01, -1.418e-01, 8.815e-02, 1.173e-01, 4.758e-02)); + r += mul(s4_8, M4(-2.341e-02, -4.843e-02, -6.954e-02, -1.073e-01, 1.958e-02, -1.200e-02, -1.276e-01, 5.947e-02, -2.526e-01, 1.087e-01, -4.189e-01, -3.295e-02, -7.157e-03, -9.873e-02, -3.551e-02, 2.754e-02)); + r += mul(s5_0, M4(2.855e-01, -9.893e-02, 4.629e-01, 3.685e-01, 1.554e-01, 2.264e-01, 8.320e-02, 3.088e-01, -7.983e-02, -1.085e-01, -1.017e-02, 9.117e-02, 1.422e-01, 1.047e-01, 5.215e-03, -1.489e-01)); + r += mul(s5_1, M4(2.404e-01, 8.492e-02, 1.039e-01, 2.752e-03, 2.740e-01, 1.184e-02, 3.245e-01, 3.345e-01, -4.420e-02, 5.843e-02, -5.644e-02, 5.527e-02, -8.163e-02, -6.649e-02, -8.762e-02, 4.387e-02)); + r += mul(s5_2, M4(-5.425e-02, 1.489e-01, -1.437e-01, -2.529e-01, -1.157e-01, -6.165e-02, 1.275e-01, 3.054e-01, 1.627e-02, 2.292e-01, -4.550e-02, 1.312e-02, -2.202e-01, -9.887e-02, 1.580e-01, -1.435e-02)); + r += mul(s5_3, M4(-1.697e-01, 1.959e-02, 6.914e-02, 4.129e-02, 1.584e-01, 8.147e-02, 1.958e-01, -4.450e-01, 6.746e-02, -1.768e-02, 6.434e-02, 1.126e-01, 3.588e-02, -3.752e-02, -2.925e-02, -2.701e-02)); + r += mul(s5_4, M4(-2.298e-01, 2.289e-01, -2.091e-01, 2.205e-01, 1.286e-01, 3.793e-02, 2.837e-01, 8.140e-02, 2.141e-01, -1.052e-01, -5.835e-02, 1.332e-01, -1.450e-01, -2.986e-02, 5.020e-02, -7.013e-02)); + r += mul(s5_5, M4(-1.075e-01, -3.479e-01, 1.246e-01, -1.686e-01, -7.951e-02, -4.929e-02, 3.006e-01, -2.037e-01, 9.014e-02, 1.676e-01, 5.972e-02, 8.490e-03, -9.535e-02, -8.521e-03, 6.319e-02, 9.563e-02)); + r += mul(s5_6, M4(-1.586e-01, -8.457e-02, 4.014e-01, -2.831e-01, -3.806e-01, 8.747e-02, 7.486e-02, 9.824e-02, -2.501e-02, -8.783e-02, -1.230e-01, 7.386e-02, 2.113e-01, -3.155e-02, -8.221e-02, 4.656e-02)); + r += mul(s5_7, M4(1.175e-02, -9.179e-02, 1.514e-02, 2.905e-01, -3.416e-01, -7.666e-02, -2.358e-01, -5.337e-02, -1.582e-01, 6.928e-02, 1.278e-01, -1.203e-01, 5.235e-02, 6.887e-02, 3.206e-02, -4.626e-02)); + r += mul(s5_8, M4(3.329e-01, -3.605e-02, 2.378e-01, -6.314e-02, -4.085e-01, -1.875e-01, -1.258e-01, -3.102e-01, -4.072e-02, 1.975e-01, 2.059e-02, -3.668e-02, -1.472e-01, -1.499e-01, 6.695e-02, 2.047e-02)); + r += mul(s6_0, M4(-2.750e-01, 5.779e-02, 1.428e-01, -2.794e-03, 1.336e-02, 3.631e-02, 9.575e-02, 1.314e-01, -2.453e-03, 2.720e-01, 3.414e-01, 7.753e-01, 2.981e-01, 2.486e-02, 1.996e-02, 5.238e-02)); + r += mul(s6_1, M4(-2.172e-01, -3.708e-02, -5.879e-02, 9.169e-02, -3.380e-02, -6.326e-02, -2.337e-01, -3.298e-02, 1.424e-01, 5.494e-01, -1.648e-01, 5.078e-01, -1.090e-02, 3.645e-02, 2.296e-02, 1.901e-02)); + r += mul(s6_2, M4(3.577e-02, 6.006e-02, 6.164e-02, 1.085e-01, 6.524e-02, 5.457e-02, -1.931e-01, 4.378e-02, -5.000e-01, 3.868e-01, -2.808e-01, 1.434e+00, -2.766e-01, -1.873e-01, 1.011e-01, -2.703e-01)); + r += mul(s6_3, M4(-4.899e-03, -3.966e-02, 2.964e-01, -1.368e-01, 3.748e-02, 3.274e-02, 5.521e-02, 1.617e-02, 1.966e-02, 2.033e-01, -3.028e-01, 8.351e-01, -3.167e-02, -3.855e-02, -6.558e-02, 1.280e-01)); + r += mul(s6_4, M4(7.920e-02, -2.379e-02, 1.778e-02, 1.055e-03, -2.762e-02, 1.177e-01, 9.482e-02, -6.561e-02, 4.040e-01, 6.195e-01, 1.967e-01, 1.053e+00, -5.925e-02, -1.213e-01, 9.595e-02, -1.081e-01)); + r += mul(s6_5, M4(-2.577e-02, 6.871e-03, 1.317e-01, -4.759e-02, -1.998e-02, -2.636e-02, 1.609e-02, -1.392e-01, -8.404e-01, 3.874e-01, 1.802e-01, 1.588e+00, -1.716e-03, -1.695e-01, 1.894e-01, -4.657e-02)); + r += mul(s6_6, M4(-2.028e-03, 5.885e-02, 1.136e-02, 9.104e-02, 7.997e-03, 3.105e-02, 1.839e-03, 1.654e-01, -4.107e-01, 8.543e-02, 1.357e-01, 1.067e+00, -1.180e-01, -1.710e-02, 8.009e-02, 5.065e-02)); + r += mul(s6_7, M4(7.280e-02, 4.661e-02, -9.582e-02, -1.309e-01, -2.232e-03, -9.439e-02, 3.515e-02, -1.917e-01, -7.614e-01, 6.188e-01, 3.196e-01, 9.877e-01, -9.609e-02, 1.005e-02, -1.771e-01, 1.163e-01)); + r += mul(s6_8, M4(-1.872e-02, -9.173e-02, 1.890e-02, 7.081e-03, -5.329e-02, -5.919e-02, 2.833e-02, 1.134e-01, -9.223e-01, 3.772e-01, -1.301e-01, 6.352e-01, -2.958e-02, -1.044e-01, -1.302e-01, -7.854e-02)); + r += mul(s7_0, M4(1.434e-01, -3.259e-01, -8.240e-02, -1.579e-01, 1.615e-01, -6.471e-02, -4.002e-02, -1.113e-01, -1.132e-02, 4.376e-02, -6.766e-02, -9.032e-02, 5.690e-02, 1.125e-01, -1.094e-01, -1.650e-01)); + r += mul(s7_1, M4(-2.541e-02, 6.512e-02, -9.465e-02, -2.184e-01, -2.772e-01, 7.455e-03, -3.263e-02, 2.454e-01, 4.006e-02, 3.165e-02, 1.181e-02, -5.626e-02, 1.704e-01, 1.682e-01, -8.115e-02, 2.731e-02)); + r += mul(s7_2, M4(-2.395e-02, -9.132e-02, -9.520e-02, 6.978e-03, 1.864e-03, -1.293e-01, -9.969e-02, 3.719e-01, 1.173e-01, -6.817e-03, 6.705e-02, 8.744e-02, -1.032e-02, -7.968e-02, -3.208e-02, -1.314e-01)); + r += mul(s7_3, M4(9.108e-02, 7.856e-02, 4.842e-02, 1.747e-01, 2.279e-01, 9.139e-02, -8.356e-02, 1.165e-01, -7.116e-02, 9.958e-02, -5.151e-02, -4.524e-02, 1.658e-01, 2.940e-01, 8.399e-02, -2.767e-01)); + r += mul(s7_4, M4(5.245e-01, -4.385e-01, -2.367e-01, 3.836e-02, -3.209e-01, -4.128e-02, 3.896e-03, 3.336e-01, 4.742e-02, 4.354e-02, 1.015e-01, 2.345e-02, -5.708e-02, 1.239e-01, 2.494e-02, -3.044e-02)); + r += mul(s7_5, M4(1.659e-01, -1.262e-01, -2.488e-01, 1.157e-01, -1.470e-01, -5.262e-01, -3.503e-03, -1.144e-01, 9.953e-03, -4.331e-02, 6.715e-02, -4.391e-02, 2.829e-02, -8.685e-02, -1.009e-01, 1.625e-01)); + r += mul(s7_6, M4(3.642e-01, 6.941e-03, -3.161e-01, 1.724e-01, -1.356e-01, 8.346e-02, -1.899e-02, -2.575e-01, 5.002e-03, 3.191e-02, 6.711e-02, 6.119e-03, 9.062e-02, -5.186e-02, 3.093e-01, -8.964e-02)); + r += mul(s7_7, M4(1.857e-01, -4.822e-02, 7.918e-02, -1.658e-02, 1.364e-01, -5.057e-03, -1.313e-01, 5.012e-01, -6.122e-04, 1.199e-02, -3.115e-02, 1.149e-01, 2.614e-01, 1.622e-01, -1.727e-01, 1.666e-01)); + r += mul(s7_8, M4(-5.647e-02, -1.514e-01, -1.514e-01, 5.845e-02, -1.107e-01, -1.344e-01, -1.261e-01, 7.256e-02, -3.303e-02, -1.809e-01, 5.185e-02, 1.081e-01, 1.400e-01, 5.002e-02, 2.903e-01, -1.358e-01)); + r += V4(3.312e-02, 1.493e-02, 4.529e-02, 2.631e-02); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.106e-01, 7.670e-03, -1.286e-01, 3.018e-01, 4.581e-02, -1.217e-01, 1.187e-01, -1.327e-02, -3.080e-01, 4.486e-01, -2.847e-01, 5.405e-01, -1.992e-01, -2.933e-01, -1.790e-01, 2.018e-01)); + r += mul(s0_1, M4(6.861e-02, -2.420e-01, 1.315e-01, 7.517e-02, -4.806e-02, -1.086e-01, -9.308e-02, 1.835e-01, 1.853e-01, 3.102e-01, -9.225e-02, 2.640e-01, -1.481e-01, -1.542e-01, -2.495e-01, 3.864e-01)); + r += mul(s0_2, M4(-2.908e-02, 4.023e-03, 7.872e-03, 3.708e-03, -1.186e-01, -4.844e-02, 3.922e-02, -4.161e-02, -4.872e-01, -2.191e-02, 3.183e-01, 5.081e-01, -8.175e-02, 7.931e-02, 2.249e-01, -1.870e-01)); + r += mul(s0_3, M4(2.636e-02, 1.090e-01, -1.625e-01, -2.987e-01, 1.163e-01, 5.129e-02, -2.591e-01, 7.786e-03, 3.786e-01, -1.351e-02, -2.464e-01, 1.982e-01, -2.095e-01, 1.984e-01, 6.387e-02, -3.157e-02)); + r += mul(s0_4, M4(-2.258e-01, 4.148e-02, -4.479e-01, -3.390e-01, -9.786e-03, 2.094e-01, -3.566e-02, 5.195e-02, -2.070e-01, 2.975e-02, -1.083e-01, 5.773e-01, 5.147e-01, -5.604e-02, 4.930e-02, 1.321e-01)); + r += mul(s0_5, M4(3.594e-01, 1.268e-02, 1.728e-01, 7.189e-02, 1.150e-01, 5.564e-02, -9.940e-02, -1.487e-02, -1.964e-01, -3.537e-01, 2.455e-01, 2.112e-01, -3.369e-01, 1.388e-01, 8.150e-02, 1.431e-01)); + r += mul(s0_6, M4(-1.118e-01, -1.718e-03, 1.401e-01, 1.372e-01, 2.374e-02, -5.415e-02, -1.005e-01, 7.274e-02, -2.817e-01, 1.803e-01, 3.102e-02, 5.768e-01, 1.678e-02, -1.261e-01, -8.865e-02, 1.719e-01)); + r += mul(s0_7, M4(-2.923e-02, 2.963e-01, 1.036e-01, -8.971e-02, 7.788e-02, 1.097e-02, 1.250e-01, 1.545e-01, 1.096e-01, 1.449e-01, -1.111e-01, 1.741e-01, 9.997e-02, -1.627e-01, -5.352e-02, 4.582e-02)); + r += mul(s0_8, M4(1.907e-01, 7.104e-02, 3.369e-01, 4.789e-02, 2.948e-02, 5.076e-02, 1.535e-01, 1.651e-03, -1.650e-02, -4.324e-01, 8.895e-02, 5.674e-02, -5.023e-04, 1.787e-01, -3.778e-01, -2.155e-02)); + r += mul(s1_0, M4(-5.160e-02, 3.976e-02, -9.952e-02, -4.953e-02, -9.583e-02, 7.235e-02, 8.871e-02, 5.601e-02, -1.450e-01, -7.690e-02, -9.010e-02, 5.021e-02, -1.281e-01, 1.148e-01, 3.020e-02, -2.530e-02)); + r += mul(s1_1, M4(1.300e-02, -9.556e-02, -1.150e-01, 3.666e-02, 3.252e-02, 3.251e-03, -1.276e-01, 1.543e-01, 7.399e-02, -1.355e-01, -1.185e-01, -2.143e-01, 1.719e-01, 2.554e-02, 6.463e-02, 3.932e-02)); + r += mul(s1_2, M4(-7.282e-02, -1.820e-02, 1.919e-02, 1.074e-02, -2.097e-01, 9.752e-02, -1.958e-02, -8.647e-02, 6.535e-02, -2.755e-02, 1.022e-01, -2.680e-02, -3.509e-03, -7.656e-02, -3.162e-02, -1.080e-01)); + r += mul(s1_3, M4(4.049e-02, 1.557e-01, -1.394e-02, -2.459e-02, -1.799e-01, 1.076e-02, 3.295e-02, -7.567e-03, -4.113e-02, -1.389e-01, 7.414e-03, -6.812e-02, 4.436e-02, -7.321e-02, -6.320e-02, 3.476e-02)); + r += mul(s1_4, M4(6.296e-02, 1.105e-02, 8.425e-02, -1.598e-02, -1.032e-02, 1.704e-02, -7.022e-02, 6.957e-03, 2.220e-02, 6.579e-02, 5.622e-02, 6.265e-02, -8.197e-02, 2.071e-02, 9.494e-02, 1.237e-01)); + r += mul(s1_5, M4(-9.015e-02, -1.011e-01, -4.560e-02, 7.887e-02, 5.623e-02, 2.528e-02, 5.075e-03, -1.046e-02, 3.357e-02, -3.218e-02, -2.049e-02, 1.284e-02, -3.689e-02, -1.521e-01, -7.719e-02, 1.521e-01)); + r += mul(s1_6, M4(1.903e-01, -1.253e-01, 7.906e-02, 7.562e-02, -1.305e-01, -9.010e-02, -2.324e-02, 7.094e-02, 5.681e-03, 8.103e-02, -1.556e-02, -1.164e-02, -7.473e-03, -7.018e-02, 7.782e-02, 2.081e-02)); + r += mul(s1_7, M4(-5.258e-02, 4.002e-02, -2.441e-03, 2.738e-02, 2.307e-01, 1.964e-02, 5.693e-02, 5.497e-02, -9.583e-02, -2.712e-02, 6.319e-02, 4.404e-02, -1.437e-02, -5.799e-02, 2.038e-02, -1.081e-01)); + r += mul(s1_8, M4(-1.771e-01, -3.819e-02, 4.159e-02, -7.371e-03, 1.103e-01, -3.843e-02, 1.053e-01, 9.504e-03, -6.042e-03, 1.495e-01, 6.809e-02, 1.753e-02, 1.258e-01, -2.334e-01, 3.983e-03, 9.594e-02)); + r += mul(s2_0, M4(8.470e-02, -1.773e-01, 9.455e-03, 4.759e-02, -1.431e-01, 3.932e-03, -1.545e-01, -7.367e-02, -4.871e-02, 6.728e-02, 3.994e-02, -4.476e-02, -2.842e-01, 1.265e-01, 1.751e-01, -3.460e-02)); + r += mul(s2_1, M4(5.692e-02, -3.552e-02, 1.946e-02, -1.704e-02, 6.537e-02, -1.294e-01, -2.161e-03, 7.778e-02, 1.616e-01, 1.401e-01, 2.866e-02, -9.264e-02, 1.368e-01, 1.787e-01, -1.987e-02, -8.060e-02)); + r += mul(s2_2, M4(-1.114e-01, -1.410e-02, 4.794e-02, -2.637e-02, -2.495e-02, -4.884e-03, -7.775e-02, -6.720e-02, -5.142e-04, 1.518e-01, -1.261e-02, -2.380e-01, 8.331e-02, -1.208e-01, -7.970e-02, -6.441e-02)); + r += mul(s2_3, M4(1.778e-01, 1.209e-02, -1.997e-01, 3.192e-02, 2.430e-01, 4.024e-02, -5.513e-02, 7.519e-02, -1.167e-01, -1.249e-01, 1.249e-01, -1.446e-01, 7.126e-02, -6.795e-02, -8.406e-02, 1.305e-02)); + r += mul(s2_4, M4(-9.427e-02, -8.138e-03, -8.036e-02, -1.474e-01, 7.301e-02, -1.313e-01, 8.006e-02, 5.452e-02, 1.127e-01, 4.325e-03, 2.758e-01, 2.219e-01, 1.501e-02, 1.454e-01, 1.056e-01, 3.213e-02)); + r += mul(s2_5, M4(3.789e-02, 9.747e-02, 3.788e-02, 9.989e-02, 1.223e-01, -3.660e-02, 8.788e-02, 4.626e-02, -1.673e-01, 5.030e-02, 7.130e-02, -6.818e-02, 1.002e-01, -5.157e-02, 2.846e-02, -1.019e-01)); + r += mul(s2_6, M4(5.866e-02, -1.172e-01, 5.294e-02, 1.291e-02, -1.841e-01, 1.076e-01, -1.009e-01, -6.028e-02, 1.089e-02, 4.124e-02, 3.907e-02, 6.840e-02, 1.165e-01, -1.340e-01, 8.841e-02, -5.944e-02)); + r += mul(s2_7, M4(-8.505e-02, 3.822e-02, 1.127e-01, 5.977e-02, 2.905e-02, 1.477e-01, 3.378e-02, -3.830e-02, 7.395e-02, 7.332e-02, 2.368e-01, -1.645e-01, 7.004e-02, 5.115e-02, -4.602e-02, 9.389e-02)); + r += mul(s2_8, M4(1.146e-01, 1.376e-01, 1.859e-01, -1.260e-03, 4.046e-02, 1.407e-01, 5.402e-02, -3.509e-02, -8.465e-02, 7.918e-02, -2.051e-01, 1.289e-01, 3.716e-02, -6.743e-02, -1.306e-01, 9.368e-02)); + r += mul(s3_0, M4(-8.518e-02, 2.708e-02, -5.449e-03, -3.024e-02, 2.116e-01, -1.750e-01, -5.726e-02, 1.729e-01, -1.523e-01, 1.187e-02, -6.049e-02, 8.550e-02, -1.797e-01, 5.282e-02, -7.250e-02, -4.983e-02)); + r += mul(s3_1, M4(1.751e-02, 1.192e-02, 1.091e-01, -3.897e-02, -1.735e-01, -1.346e-01, 2.168e-02, -2.314e-01, 3.644e-03, 1.259e-01, 3.968e-02, 7.563e-02, -5.672e-02, 9.338e-02, 4.882e-03, -1.688e-01)); + r += mul(s3_2, M4(4.182e-02, 8.886e-02, 1.246e-01, -2.448e-01, -1.897e-01, 1.382e-02, 8.624e-02, 1.035e-01, -1.952e-02, 6.874e-02, 2.193e-01, -9.409e-02, 5.727e-02, -1.276e-01, 9.849e-03, -8.370e-02)); + r += mul(s3_3, M4(-1.089e-02, -1.891e-01, 8.993e-03, 8.074e-03, 2.076e-01, -1.243e-01, -1.613e-01, -3.196e-02, 1.255e-01, 4.043e-03, -3.341e-02, -8.549e-02, -1.131e-01, 1.215e-01, -1.069e-01, 7.707e-03)); + r += mul(s3_4, M4(2.400e-01, -1.286e-01, -1.168e-01, -1.283e-01, -3.051e-01, -1.333e-01, -1.961e-01, 1.049e-01, -6.058e-03, 2.434e-01, -5.507e-02, 2.330e-01, 5.414e-02, -4.292e-02, -6.215e-03, 6.113e-02)); + r += mul(s3_5, M4(1.987e-01, -1.559e-01, 5.049e-02, -6.321e-02, -1.371e-01, -4.967e-02, 7.531e-02, 4.293e-01, 6.067e-02, 2.865e-01, 8.815e-02, -5.601e-02, -7.575e-02, 3.643e-02, 6.242e-02, 5.949e-02)); + r += mul(s3_6, M4(1.069e-01, -4.535e-03, -4.047e-02, 2.504e-01, 7.440e-02, 2.122e-03, 7.750e-02, -3.155e-01, -6.375e-02, 6.073e-02, -2.885e-02, 1.821e-01, 1.692e-01, -2.458e-02, 6.111e-02, 7.549e-03)); + r += mul(s3_7, M4(8.820e-02, -4.547e-03, -4.999e-02, 1.704e-01, 1.116e-01, 4.329e-01, 2.396e-02, 1.519e-01, 1.402e-01, 1.259e-01, -8.280e-02, 2.102e-02, -1.875e-02, 8.996e-02, -1.889e-02, 3.921e-02)); + r += mul(s3_8, M4(1.528e-01, -2.168e-01, -4.028e-03, -1.300e-01, 1.285e-01, 4.750e-01, 1.400e-01, 2.065e-01, 2.631e-02, 1.038e-01, -4.665e-02, 6.579e-02, 1.235e-01, 7.133e-02, -1.172e-01, -5.303e-03)); + r += mul(s4_0, M4(-1.340e-02, 2.074e-01, -1.705e-02, -7.420e-02, 1.053e-02, -9.147e-02, 2.526e-02, 1.026e-02, -7.592e-02, -1.404e-01, 8.351e-03, 1.730e-02, 4.360e-02, -6.868e-02, -1.140e-01, -1.605e-02)); + r += mul(s4_1, M4(8.707e-02, 1.055e-01, -1.279e-02, -3.775e-02, -5.099e-02, 6.661e-02, -1.315e-02, -4.065e-02, 2.320e-02, -4.357e-02, -1.313e-01, 1.870e-02, 2.267e-02, 9.643e-02, 3.252e-02, 6.173e-02)); + r += mul(s4_2, M4(-1.146e-01, 1.701e-01, -2.906e-02, 4.315e-02, -2.166e-02, -1.823e-02, -4.847e-02, 4.414e-02, 8.021e-02, -1.982e-01, -1.249e-01, 1.342e-01, -1.880e-01, 1.307e-02, 8.718e-02, -1.784e-02)); + r += mul(s4_3, M4(-1.524e-01, 1.792e-02, 1.293e-01, -1.338e-01, -9.022e-02, 2.274e-02, 9.638e-03, -2.646e-01, -1.919e-01, -1.435e-01, -2.097e-01, 1.255e-01, 1.651e-01, -3.011e-02, 8.854e-02, -8.389e-02)); + r += mul(s4_4, M4(-2.367e-01, -9.871e-02, -2.830e-03, -8.051e-02, 3.013e-01, 4.373e-02, -2.674e-02, 3.491e-02, 2.304e-01, -5.362e-02, -9.155e-02, -1.227e-01, -7.575e-03, 8.455e-02, -9.711e-02, -1.671e-01)); + r += mul(s4_5, M4(6.942e-02, 9.524e-02, -9.299e-02, 8.153e-03, 5.888e-02, -7.219e-02, -6.424e-02, 1.187e-01, -1.179e-01, -7.678e-02, 6.407e-02, 3.443e-02, 3.539e-02, -1.277e-02, 1.201e-01, -5.887e-02)); + r += mul(s4_6, M4(-4.164e-02, 5.339e-02, 3.651e-02, -3.101e-02, 9.081e-02, 1.674e-01, -1.281e-01, -6.046e-02, 1.913e-01, 3.205e-02, -9.261e-02, 5.055e-06, 2.909e-02, -8.520e-02, -1.762e-02, -4.433e-02)); + r += mul(s4_7, M4(1.197e-01, 6.880e-02, -3.625e-02, 1.798e-01, -2.881e-01, 1.125e-01, -5.999e-02, -2.046e-01, -1.051e-01, -1.701e-01, 9.698e-02, 2.261e-02, 2.416e-02, -1.367e-01, -5.911e-02, 9.895e-02)); + r += mul(s4_8, M4(4.871e-02, 1.336e-01, -2.433e-02, 1.234e-02, -1.234e-01, 1.144e-01, 1.246e-01, 8.938e-02, 7.953e-02, -9.458e-02, 6.365e-02, -1.207e-01, -7.731e-03, -1.426e-02, -1.368e-01, 9.274e-02)); + r += mul(s5_0, M4(-1.005e-01, 2.445e-03, 5.230e-02, 1.152e-01, -1.592e-01, -2.043e-01, -7.492e-02, 1.011e-01, -1.755e-01, -8.817e-02, -1.179e-02, -4.539e-02, -4.091e-02, -5.761e-02, 2.523e-01, 1.103e-01)); + r += mul(s5_1, M4(1.641e-02, -3.067e-01, 9.185e-02, 9.128e-02, 3.286e-02, 1.947e-02, -4.989e-02, 5.360e-02, -1.157e-01, 4.087e-02, 8.444e-02, 1.411e-01, 3.768e-02, 1.976e-02, 2.049e-01, 1.071e-01)); + r += mul(s5_2, M4(1.134e-01, -6.178e-02, 3.290e-02, 6.283e-02, 1.023e-04, 8.330e-02, 2.145e-01, -1.397e-01, -8.124e-02, 1.013e-01, -2.137e-02, -7.958e-02, 1.534e-01, -5.867e-03, -6.494e-02, -9.285e-02)); + r += mul(s5_3, M4(-2.051e-02, 6.925e-02, 6.149e-02, 3.242e-01, -6.243e-01, 1.179e-01, 2.106e-01, -3.046e-01, -5.706e-04, -1.155e-01, 8.130e-03, 3.356e-02, -2.281e-03, 1.334e-01, -1.241e-01, -6.799e-02)); + r += mul(s5_4, M4(-2.705e-02, 1.060e-01, 1.528e-01, 1.223e-01, 3.211e-01, -1.730e-01, 1.253e-01, 5.410e-01, 1.508e-01, 2.882e-03, -9.738e-02, -4.871e-03, 6.994e-02, 2.571e-02, -9.819e-02, -1.700e-01)); + r += mul(s5_5, M4(4.869e-02, 2.421e-01, -7.101e-02, 1.245e-01, 2.406e-01, -1.675e-01, -4.183e-01, -1.827e-01, -2.084e-01, -1.768e-02, 1.672e-01, 7.756e-02, 1.056e-02, 1.765e-01, -1.561e-01, -7.703e-02)); + r += mul(s5_6, M4(6.569e-02, 1.023e-01, -4.162e-02, -4.910e-02, 2.085e-01, -2.971e-01, 1.728e-01, -2.518e-01, 1.621e-01, 1.735e-02, 3.280e-02, 9.486e-02, 5.738e-02, 1.501e-01, 4.067e-02, 5.826e-02)); + r += mul(s5_7, M4(1.475e-01, -1.313e-01, -3.392e-01, -6.234e-02, 1.607e-01, -8.255e-02, 9.700e-02, 5.601e-01, -9.607e-02, -5.716e-04, 9.148e-02, -2.466e-02, -1.983e-01, 2.422e-02, 6.771e-02, 6.628e-02)); + r += mul(s5_8, M4(1.197e-01, -2.585e-02, -9.683e-02, 8.054e-02, -3.877e-01, -4.829e-02, 5.832e-02, -9.984e-02, 1.697e-01, 2.958e-02, -1.363e-01, 1.280e-01, 2.276e-01, -1.047e-01, -2.271e-01, 8.008e-02)); + r += mul(s6_0, M4(-2.387e-01, -2.552e-01, 3.395e-01, -5.265e-02, -4.866e-03, 1.072e-02, 7.850e-02, 8.140e-02, 1.753e-01, 1.958e-01, 1.330e-01, 1.653e-01, 6.228e-02, 1.308e-02, -8.889e-02, -2.958e-02)); + r += mul(s6_1, M4(1.223e-01, -2.539e-02, -2.974e-01, 3.280e-03, 1.140e-01, -2.521e-02, -1.109e-01, -1.236e-01, 3.579e-02, -4.744e-02, 3.749e-02, 1.404e-01, -2.275e-02, -1.076e-02, -1.870e-02, -1.125e-01)); + r += mul(s6_2, M4(6.481e-02, 1.559e-01, 8.548e-02, -1.091e-01, 4.069e-02, 9.493e-04, 1.436e-02, -9.013e-02, -2.222e-02, 4.476e-02, -1.482e-01, 1.574e-01, -2.415e-02, 2.352e-01, 1.177e-01, 1.094e-01)); + r += mul(s6_3, M4(-3.810e-02, 8.932e-02, 1.747e-02, -1.814e-01, 2.592e-01, -7.350e-02, -2.803e-01, 8.401e-02, 4.885e-01, -4.088e-02, 1.891e-01, 1.402e-01, -1.697e-01, 3.032e-01, -6.832e-02, 2.520e-01)); + r += mul(s6_4, M4(5.080e-01, -1.330e-01, 4.702e-02, 3.560e-01, 1.971e-01, -5.934e-02, -2.050e-01, -1.404e-01, -2.276e-01, 9.001e-02, -1.257e-02, -1.844e-01, -3.716e-01, -5.842e-02, 1.151e-01, -1.315e-01)); + r += mul(s6_5, M4(-8.018e-02, 2.536e-01, -1.156e-01, -1.679e-01, -9.114e-02, 6.778e-03, -6.799e-02, 1.115e-02, -1.072e-02, 1.227e-01, 1.080e-01, 7.753e-02, 1.393e-01, -2.807e-01, 2.770e-02, 2.628e-01)); + r += mul(s6_6, M4(3.951e-02, -1.216e-01, 1.179e-01, -6.518e-02, -1.211e-02, 4.296e-02, -2.270e-01, 1.071e-01, 9.939e-02, 2.098e-01, -1.493e-02, -2.716e-02, 2.526e-01, -2.241e-02, 7.088e-02, 3.245e-02)); + r += mul(s6_7, M4(2.067e-01, -4.707e-01, -7.482e-02, 3.539e-01, 1.177e-01, -1.092e-01, -1.023e-01, 4.164e-02, -3.600e-02, 1.516e-01, -2.437e-01, 8.298e-02, -9.193e-02, -1.452e-01, 4.257e-02, -2.124e-01)); + r += mul(s6_8, M4(-2.915e-01, 1.080e-02, 9.993e-03, -8.805e-02, -2.745e-02, -5.484e-02, -7.195e-02, -8.099e-03, 5.414e-02, 3.017e-01, 9.490e-02, 2.250e-02, 1.981e-01, -1.306e-01, -1.162e-02, -3.500e-02)); + r += mul(s7_0, M4(-7.985e-02, 8.566e-02, -7.230e-02, -3.377e-02, 4.579e-02, 6.077e-02, -1.418e-01, 1.145e-01, 3.962e-02, -1.011e-01, -1.120e-01, -8.656e-02, 2.178e-02, 9.423e-02, 2.644e-02, 1.217e-01)); + r += mul(s7_1, M4(-3.308e-02, 1.354e-01, -1.195e-01, -9.986e-02, -1.597e-01, 7.120e-02, 9.256e-02, -9.827e-02, 1.653e-02, 4.166e-02, -4.236e-02, -7.087e-02, 2.284e-02, 6.135e-02, -7.818e-02, 5.338e-02)); + r += mul(s7_2, M4(-7.062e-02, -3.890e-02, -4.435e-02, -2.382e-02, 1.791e-02, -1.045e-01, -3.526e-02, 9.678e-02, -7.565e-02, 7.253e-02, -1.510e-01, 3.099e-03, -5.306e-02, 9.983e-02, 3.110e-02, -1.298e-01)); + r += mul(s7_3, M4(1.554e-02, -1.892e-02, 6.554e-02, -9.729e-04, -1.477e-01, 6.217e-02, 5.542e-03, 6.975e-02, 3.759e-02, 4.232e-02, 3.292e-02, 1.204e-01, 5.064e-04, -4.057e-02, 6.802e-02, 3.772e-02)); + r += mul(s7_4, M4(3.781e-02, -9.765e-02, 7.612e-02, -4.848e-02, -2.151e-01, 7.928e-03, -4.992e-02, -6.911e-02, 9.894e-02, -4.879e-02, 9.217e-02, -1.417e-01, 5.228e-05, -6.974e-02, -1.455e-02, 5.341e-02)); + r += mul(s7_5, M4(-2.148e-02, 9.837e-02, 6.053e-02, 1.387e-01, -6.492e-02, 8.200e-02, -4.534e-02, 6.754e-02, 1.607e-02, 7.394e-02, -3.567e-02, -8.860e-02, -6.110e-02, -2.350e-03, 1.426e-02, -1.123e-01)); + r += mul(s7_6, M4(-3.090e-02, -4.675e-03, 3.797e-03, 1.764e-01, -1.302e-02, 4.235e-02, -1.232e-01, -1.248e-01, 1.384e-02, 1.040e-01, 1.289e-01, 1.555e-03, -1.215e-01, -1.021e-02, 6.856e-02, 2.160e-02)); + r += mul(s7_7, M4(1.848e-02, -1.505e-02, 4.518e-02, -2.168e-01, 1.555e-02, -1.385e-01, 1.732e-01, 5.792e-02, -8.967e-02, -1.573e-01, -6.748e-02, 6.622e-02, 1.793e-01, -1.737e-02, -8.133e-02, 3.772e-02)); + r += mul(s7_8, M4(-1.528e-01, -4.560e-02, 4.371e-03, 6.002e-02, 1.841e-01, 5.361e-02, 1.400e-02, -4.889e-02, -1.744e-01, -4.758e-04, 2.771e-02, -1.233e-01, -1.299e-02, 1.363e-01, -2.682e-01, -2.026e-01)); + r += V4(-4.037e-02, -1.675e-02, 3.704e-02, 1.400e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(5.359e-02, 9.392e-02, -1.478e-01, 8.028e-02, 2.418e-02, -1.892e-02, 6.965e-02, -9.329e-03, 2.970e-01, -2.939e-01, 1.871e-01, -4.107e-02, -1.957e-01, 9.889e-02, -2.289e-01, -1.716e-01)); + r += mul(s0_1, M4(7.159e-02, 7.395e-02, 1.411e-02, 1.908e-01, -1.529e-01, 5.522e-02, 6.274e-02, -4.293e-02, 7.288e-02, -4.996e-01, -1.728e-01, 7.171e-02, 3.953e-01, 3.121e-02, -1.144e-01, -2.980e-01)); + r += mul(s0_2, M4(1.281e-01, 2.113e-01, -1.793e-02, -1.434e-01, -3.459e-02, -1.502e-01, -1.265e-01, -8.224e-02, 2.602e-02, -1.770e-01, 1.371e-01, 2.355e-01, -1.329e-01, 8.931e-02, 1.303e-01, -1.144e-01)); + r += mul(s0_3, M4(7.054e-02, -3.927e-02, 2.427e-01, 8.472e-02, 4.186e-02, 3.723e-02, 8.074e-02, -9.314e-02, 5.253e-01, -1.610e-01, 1.036e-01, 3.180e-01, -7.711e-02, -2.627e-01, -1.591e-01, -1.161e-01)); + r += mul(s0_4, M4(1.364e-01, -7.588e-02, 1.185e-01, 1.617e-01, 1.008e-02, 1.126e-01, -1.458e-01, -1.110e-01, 3.619e-01, -4.962e-02, -1.534e-01, 2.996e-01, -4.501e-01, 1.993e-01, -3.799e-01, 4.546e-02)); + r += mul(s0_5, M4(-4.318e-01, 1.466e-01, -3.836e-02, -2.268e-01, 3.750e-02, -7.447e-02, 1.090e-02, -1.200e-01, 5.559e-01, -6.123e-02, -1.486e-01, -1.172e-01, -2.707e-02, -1.418e-01, -2.744e-01, -7.687e-02)); + r += mul(s0_6, M4(2.797e-01, 6.728e-02, 1.223e-01, -1.082e-04, -2.560e-03, 6.357e-02, 1.290e-01, -4.615e-02, 3.593e-01, -1.852e-01, -1.198e-01, -5.259e-02, -8.925e-02, -9.296e-02, 5.557e-02, 1.834e-01)); + r += mul(s0_7, M4(-9.005e-02, -1.121e-01, -1.477e-01, 1.781e-02, -1.100e-01, 1.190e-01, -5.723e-02, -1.282e-01, 2.722e-01, -1.359e-01, 4.613e-01, 3.558e-01, -2.149e-01, -3.125e-02, 3.621e-02, -4.010e-02)); + r += mul(s0_8, M4(-4.799e-02, 2.708e-03, -5.212e-03, -1.705e-01, -1.316e-01, -1.425e-01, 1.505e-01, -5.512e-02, 3.036e-01, -1.299e-02, -2.344e-03, -1.990e-02, 1.391e-02, -1.951e-01, 1.276e-01, -1.054e-01)); + r += mul(s1_0, M4(3.800e-02, 9.656e-02, -6.685e-02, 1.867e-01, 4.085e-02, 1.031e-02, 9.019e-02, -2.565e-02, -1.535e-02, 9.396e-02, 1.368e-01, -6.468e-03, 5.725e-02, -4.208e-02, 7.414e-02, -2.342e-02)); + r += mul(s1_1, M4(-6.740e-02, 3.539e-02, 2.409e-02, 1.803e-02, 6.601e-02, 1.697e-02, -2.279e-02, 1.011e-01, 9.741e-02, -3.994e-02, -5.265e-02, -6.697e-03, 2.065e-03, -4.452e-02, 8.573e-02, 1.119e-01)); + r += mul(s1_2, M4(-3.638e-03, 1.285e-02, 4.366e-02, -5.360e-02, 5.436e-02, 6.215e-03, -5.649e-02, 6.265e-02, -2.411e-02, 8.766e-02, -8.103e-02, -3.148e-02, 8.132e-02, 9.113e-03, 4.695e-02, 4.233e-02)); + r += mul(s1_3, M4(7.019e-02, -2.512e-01, -5.508e-02, -4.033e-02, -2.620e-01, 3.060e-02, -5.995e-02, 2.043e-01, -4.539e-02, -6.491e-02, 1.768e-01, 7.705e-02, 4.870e-02, 1.053e-02, 4.352e-02, 1.674e-02)); + r += mul(s1_4, M4(1.164e-01, -9.858e-03, 1.457e-01, 6.112e-02, 6.947e-04, 4.087e-02, -2.676e-02, 8.315e-02, -7.122e-02, 2.029e-02, -9.414e-02, -5.512e-03, 1.954e-02, 1.362e-01, -9.850e-03, -1.051e-01)); + r += mul(s1_5, M4(6.618e-02, 5.335e-02, 5.182e-02, -1.430e-01, -1.251e-02, -8.332e-02, 1.674e-01, -3.576e-02, 1.417e-02, -4.650e-02, 2.594e-02, -6.467e-02, 2.801e-03, -4.410e-02, -1.009e-01, -2.563e-02)); + r += mul(s1_6, M4(-6.718e-02, 6.018e-02, 9.142e-02, -4.475e-02, 1.592e-02, -1.879e-02, 2.796e-02, -5.731e-02, -8.798e-02, -3.738e-02, -5.375e-02, -4.677e-02, 5.755e-02, -1.558e-01, -2.621e-04, 7.923e-02)); + r += mul(s1_7, M4(-7.345e-02, -2.241e-02, -1.339e-01, 3.816e-02, -2.457e-02, 6.471e-02, 2.474e-02, 3.074e-02, -3.547e-02, 2.760e-02, 7.020e-02, 6.069e-03, -7.161e-02, -4.419e-02, 5.787e-02, 5.445e-02)); + r += mul(s1_8, M4(-1.805e-01, -5.921e-02, 3.022e-02, -8.273e-02, -1.053e-01, -1.721e-02, -4.414e-02, -2.392e-02, -6.442e-02, 3.035e-03, -7.375e-02, 2.852e-02, -4.651e-02, 1.898e-02, 9.078e-02, -1.200e-01)); + r += mul(s2_0, M4(-1.281e-01, 5.373e-02, 2.685e-02, 1.292e-02, 8.557e-02, 5.859e-02, -1.068e-02, 1.038e-01, 5.150e-02, -1.111e-01, 6.592e-02, 3.929e-02, -1.668e-01, -1.356e-01, 1.001e-01, -3.902e-02)); + r += mul(s2_1, M4(1.962e-02, 6.647e-02, 4.700e-02, 9.000e-02, 9.737e-03, -4.224e-03, -7.276e-02, 5.652e-02, -2.292e-04, -1.822e-01, -3.527e-02, 7.265e-02, 9.374e-02, 1.317e-01, -3.114e-02, 2.456e-02)); + r += mul(s2_2, M4(1.597e-02, -2.637e-02, 9.580e-02, -6.369e-02, 5.057e-02, 8.468e-02, 4.267e-03, 7.474e-02, 1.078e-01, -9.482e-03, -1.727e-01, 2.970e-02, 5.134e-02, 1.363e-01, -6.067e-02, 4.048e-02)); + r += mul(s2_3, M4(-2.884e-02, -8.832e-02, -6.075e-02, -3.294e-03, 3.834e-02, -2.263e-02, -5.452e-02, 6.482e-03, 1.428e-02, 5.782e-02, 3.720e-01, -4.082e-02, -1.178e-01, 9.901e-03, 1.791e-01, 4.845e-02)); + r += mul(s2_4, M4(-5.438e-02, -9.899e-02, -2.456e-01, 1.285e-01, 4.232e-02, -1.557e-02, 5.495e-02, 1.077e-01, -3.258e-02, 7.771e-02, 2.900e-01, 1.432e-01, -1.675e-02, 5.543e-03, 1.078e-02, 1.532e-01)); + r += mul(s2_5, M4(2.182e-02, 1.227e-01, -5.033e-02, -6.187e-02, 1.330e-01, -6.431e-02, -9.959e-02, 1.091e-01, 2.743e-02, 1.172e-01, -2.246e-01, -1.348e-01, -5.197e-02, -8.711e-04, -5.547e-02, 9.522e-03)); + r += mul(s2_6, M4(-1.206e-01, 7.882e-03, -1.090e-02, 2.974e-02, 3.590e-03, -1.111e-01, 8.554e-02, 1.091e-01, -1.601e-02, 1.324e-01, -1.966e-03, -3.257e-02, 5.852e-02, -2.458e-02, 1.611e-02, -3.468e-02)); + r += mul(s2_7, M4(-1.929e-01, 1.215e-02, 8.072e-03, -1.662e-01, 1.130e-01, -6.542e-03, 2.019e-02, 6.547e-02, 7.990e-02, -4.069e-02, 9.728e-02, -2.795e-01, 1.114e-01, -8.817e-03, -8.428e-02, 6.218e-02)); + r += mul(s2_8, M4(1.551e-01, -3.498e-02, -7.700e-03, 1.575e-02, 1.430e-01, -4.726e-02, -1.064e-02, -6.947e-03, 9.397e-02, 3.900e-03, 1.258e-01, -3.835e-02, 1.277e-01, 2.509e-02, 8.729e-02, -9.870e-04)); + r += mul(s3_0, M4(-2.635e-02, 1.782e-01, -1.694e-02, 5.306e-02, 1.580e-01, 6.969e-03, 3.944e-02, 6.108e-02, -4.229e-04, -1.987e-01, 3.180e-02, 7.524e-02, -4.798e-02, -1.567e-01, 7.564e-02, -1.183e-01)); + r += mul(s3_1, M4(6.275e-02, 5.701e-02, -7.258e-02, 7.556e-02, 1.801e-01, 7.088e-02, -1.084e-01, 2.409e-01, -1.257e-01, -5.239e-01, -8.246e-02, -3.461e-02, -1.147e-01, 2.545e-01, 1.385e-01, -1.835e-01)); + r += mul(s3_2, M4(5.300e-02, 4.263e-02, -6.859e-02, -1.132e-01, 1.989e-01, 2.543e-01, -1.528e-01, -4.319e-02, 3.084e-02, -9.744e-02, 6.969e-02, 8.086e-02, -1.547e-01, -6.636e-02, 1.025e-01, -1.728e-01)); + r += mul(s3_3, M4(1.060e-01, 5.446e-02, 1.170e-01, -2.163e-02, -1.162e-01, 2.090e-02, 2.091e-01, -9.561e-02, 9.790e-02, 2.899e-02, -1.844e-01, 1.636e-01, 6.397e-02, 1.750e-01, -1.875e-02, -8.913e-02)); + r += mul(s3_4, M4(2.836e-01, 9.436e-02, 4.142e-02, 2.304e-01, 2.979e-02, -3.237e-01, -1.735e-02, 3.771e-01, -1.150e-01, 2.209e-01, 2.394e-01, 7.586e-02, -1.756e-01, -1.931e-01, 8.589e-02, -2.265e-04)); + r += mul(s3_5, M4(1.088e-01, 6.678e-02, -2.916e-02, -3.234e-02, 5.359e-02, -1.229e-01, -8.072e-02, 2.183e-01, -4.825e-02, 1.674e-03, -2.927e-02, 8.313e-02, -1.081e-01, -7.192e-02, -2.553e-01, -1.865e-01)); + r += mul(s3_6, M4(1.575e-02, 2.193e-02, 4.950e-02, 8.411e-02, -5.007e-02, -6.622e-02, 1.987e-01, 2.368e-01, -1.061e-01, 1.255e-01, -4.007e-02, -2.425e-02, 7.443e-02, 4.368e-02, 1.239e-02, -1.162e-02)); + r += mul(s3_7, M4(6.849e-03, 9.556e-03, 2.016e-01, 2.675e-01, -2.552e-01, 8.692e-02, -1.800e-02, 2.611e-01, -5.581e-02, 3.036e-02, 7.863e-03, -1.557e-01, 3.245e-02, 8.553e-02, -1.081e-01, -1.361e-01)); + r += mul(s3_8, M4(2.556e-02, 9.324e-02, 8.321e-02, -4.573e-03, 7.401e-02, 4.903e-02, -9.715e-02, -1.496e-01, -4.412e-02, 2.469e-02, 7.537e-02, 4.379e-02, -1.750e-02, 1.513e-02, 1.219e-01, -1.819e-01)); + r += mul(s4_0, M4(-3.690e-02, -3.441e-02, -3.924e-02, -6.007e-02, 5.748e-02, -9.919e-03, -1.154e-01, -1.501e-02, -3.522e-02, 6.309e-04, 4.868e-02, 9.365e-02, 6.030e-02, 3.201e-03, 2.582e-02, -1.564e-02)); + r += mul(s4_1, M4(3.904e-02, -7.788e-02, 4.307e-02, 2.538e-02, -2.324e-01, 7.691e-02, 6.854e-03, 4.209e-02, -2.631e-02, 1.592e-01, -3.820e-02, -5.688e-02, -1.343e-01, -1.334e-01, 3.546e-02, -5.380e-04)); + r += mul(s4_2, M4(9.549e-02, -7.248e-02, -1.325e-01, -1.463e-02, -1.133e-01, -1.068e-02, 3.268e-02, 8.634e-03, 7.654e-02, -1.712e-01, -2.000e-01, -8.355e-02, 2.194e-02, 5.199e-02, 6.314e-02, -2.819e-02)); + r += mul(s4_3, M4(-4.752e-02, 2.464e-02, -1.639e-02, 1.384e-02, 3.763e-02, 7.014e-03, 7.251e-03, -3.992e-02, -2.566e-02, 1.835e-02, 2.827e-01, -1.739e-01, 1.858e-01, -4.782e-02, -1.730e-01, -8.191e-02)); + r += mul(s4_4, M4(4.778e-02, 4.010e-01, 5.081e-03, -9.438e-02, -7.711e-03, -1.183e-02, 4.648e-02, 3.270e-02, 2.426e-02, -1.146e-01, -9.082e-02, -4.269e-02, -1.494e-01, 8.305e-03, -1.883e-02, -8.324e-02)); + r += mul(s4_5, M4(-9.889e-02, 1.005e-01, 1.832e-01, 4.781e-02, -8.160e-02, -7.781e-02, 1.390e-01, 1.247e-03, -1.654e-01, -2.202e-02, 8.227e-02, 1.056e-02, -8.250e-02, 7.482e-02, -1.573e-02, 1.059e-01)); + r += mul(s4_6, M4(1.319e-01, -6.502e-03, -5.332e-02, -7.135e-02, -6.270e-03, -6.166e-02, 6.672e-02, 5.700e-02, 7.430e-02, 1.376e-02, 3.540e-02, 9.783e-02, 5.452e-02, -2.676e-02, -2.449e-02, -1.114e-01)); + r += mul(s4_7, M4(-1.140e-01, 8.221e-02, -9.009e-02, -2.045e-02, 1.120e-01, 3.351e-02, 9.168e-02, 8.984e-02, 6.165e-02, 5.846e-03, -1.170e-01, 9.888e-02, -5.968e-02, -1.768e-02, -1.493e-02, 3.218e-02)); + r += mul(s4_8, M4(-3.808e-03, 6.917e-02, -1.208e-02, -8.708e-02, 1.538e-01, -1.203e-02, -3.871e-02, -6.714e-02, 5.404e-02, -1.747e-02, -9.208e-02, 1.674e-03, 2.795e-02, -2.504e-02, 8.272e-02, -1.598e-02)); + r += mul(s5_0, M4(1.849e-02, -1.106e-01, -1.307e-01, -1.809e-02, 2.501e-01, 3.170e-02, -3.167e-02, 1.846e-02, -5.607e-02, 3.111e-02, 1.516e-01, 1.052e-01, -8.117e-02, 2.545e-03, 1.696e-01, 5.196e-02)); + r += mul(s5_1, M4(-3.450e-02, 3.294e-02, 1.150e-01, -9.631e-02, -2.406e-01, -2.215e-01, 1.461e-02, 9.308e-02, 9.157e-02, 2.553e-01, 7.148e-02, 1.042e-02, 7.841e-02, -7.270e-02, 9.864e-02, 1.496e-01)); + r += mul(s5_2, M4(5.166e-02, 4.302e-02, -7.258e-03, 5.968e-02, -1.219e-02, -4.505e-02, 2.568e-01, -9.986e-02, -2.914e-02, 1.338e-01, 4.162e-02, 5.395e-02, -1.360e-01, -2.342e-02, 1.640e-01, 1.126e-01)); + r += mul(s5_3, M4(5.679e-03, -1.634e-02, -2.614e-01, 6.877e-03, -2.579e-02, -2.627e-01, -4.804e-02, -5.215e-02, -3.290e-02, -6.036e-02, 5.714e-02, -1.647e-01, 2.247e-01, -1.278e-02, -1.708e-01, 1.154e-01)); + r += mul(s5_4, M4(-3.261e-02, 4.729e-02, 5.397e-02, -1.239e-01, 6.622e-02, 3.045e-01, 2.128e-01, -4.741e-01, 2.397e-01, 1.441e-02, 8.847e-03, 1.231e-01, 1.921e-01, -3.118e-02, -1.782e-02, -1.320e-01)); + r += mul(s5_5, M4(-6.610e-04, 2.221e-02, 1.163e-01, 2.642e-02, 1.497e-03, -2.122e-01, 1.083e-01, 7.362e-02, 7.281e-02, 1.397e-01, 4.682e-02, 3.093e-01, 7.155e-02, 1.126e-02, -1.913e-01, 1.475e-02)); + r += mul(s5_6, M4(-8.450e-02, -1.207e-01, -1.391e-01, -4.209e-02, 1.201e-01, -1.023e-01, 2.736e-01, 2.171e-01, -4.266e-02, 9.532e-02, 6.658e-02, 4.056e-03, -9.055e-02, 3.366e-02, 1.447e-01, 5.828e-02)); + r += mul(s5_7, M4(1.398e-01, 1.394e-01, -3.104e-01, -1.527e-02, -1.108e-01, -2.226e-01, -3.162e-01, 4.604e-01, -1.103e-01, -2.872e-02, 5.296e-02, 6.879e-02, 1.880e-01, -2.118e-03, 3.585e-02, 2.129e-01)); + r += mul(s5_8, M4(6.985e-02, -2.443e-02, -4.665e-02, -5.924e-02, -1.070e-01, 1.001e-01, 2.506e-02, -8.091e-02, 1.955e-02, 1.376e-02, 1.636e-01, -3.354e-02, -1.254e-01, -1.351e-02, 1.439e-01, 1.434e-01)); + r += mul(s6_0, M4(2.055e-01, -9.397e-02, 1.412e-01, 1.193e-01, -7.398e-02, 2.422e-02, 4.889e-02, 6.445e-02, -1.966e-01, -1.331e-01, 8.664e-02, 1.070e-01, -1.076e-01, -3.743e-03, -3.440e-02, 1.573e-01)); + r += mul(s6_1, M4(-1.183e-01, -2.183e-01, 1.809e-01, -2.173e-01, 1.747e-01, -7.500e-02, 7.648e-02, 2.389e-02, -1.144e-01, -3.135e-02, -1.397e-01, -4.562e-02, 1.011e-01, -5.550e-02, 3.267e-02, 6.254e-02)); + r += mul(s6_2, M4(-1.003e-01, 8.644e-03, -1.189e-01, 2.301e-02, -2.309e-02, 1.970e-02, -9.311e-02, 1.215e-02, -1.689e-01, 8.210e-02, -1.385e-02, 4.341e-02, 1.155e-01, -1.890e-01, -5.568e-02, -6.058e-02)); + r += mul(s6_3, M4(-9.299e-02, -2.571e-01, -2.069e-01, 3.947e-02, 4.162e-02, 1.204e-01, 2.203e-01, -9.760e-02, -4.572e-02, -1.029e-01, -4.367e-03, 9.108e-02, -2.537e-01, -6.520e-02, 3.719e-02, -1.694e-01)); + r += mul(s6_4, M4(2.415e-01, 3.740e-01, 3.292e-01, -1.215e-01, -1.114e-01, -4.160e-02, 5.061e-03, 4.549e-02, 2.401e-02, -6.357e-02, -2.312e-01, -3.438e-02, 4.018e-02, -5.289e-02, -1.294e-01, -1.304e-01)); + r += mul(s6_5, M4(-1.445e-01, -2.017e-01, -9.538e-02, -1.315e-02, 2.716e-02, 4.953e-02, -9.832e-02, 1.230e-02, 5.241e-02, 1.703e-01, 1.064e-01, -8.697e-02, 5.406e-02, 2.517e-02, -7.605e-02, -1.602e-02)); + r += mul(s6_6, M4(1.227e-01, -1.989e-01, -1.872e-02, 2.003e-01, 2.548e-01, -6.034e-02, 1.542e-01, -4.469e-03, 8.203e-02, 4.234e-02, 1.523e-01, 2.219e-02, 1.997e-01, -1.103e-01, -3.337e-01, 7.199e-02)); + r += mul(s6_7, M4(-1.763e-01, 2.622e-02, 8.851e-02, 3.764e-01, -8.470e-02, -2.142e-02, 1.045e-01, 5.815e-02, 4.777e-02, -1.656e-03, -2.274e-02, 3.622e-03, -1.004e-01, 1.950e-02, -3.939e-01, 9.619e-03)); + r += mul(s6_8, M4(1.669e-01, 3.680e-02, -2.683e-01, -1.763e-01, 7.757e-02, 1.858e-02, 5.891e-02, 7.372e-03, -1.652e-01, -6.328e-02, 1.398e-02, 1.452e-01, -2.534e-01, 1.047e-01, 6.606e-02, 1.251e-01)); + r += mul(s7_0, M4(-4.698e-02, 4.001e-02, 1.513e-02, -6.184e-02, -1.538e-01, -5.059e-02, 1.702e-01, 5.881e-02, 2.812e-02, 8.479e-02, -1.627e-01, 7.281e-02, 1.658e-01, -3.580e-02, -1.803e-01, -4.138e-02)); + r += mul(s7_1, M4(-3.242e-02, 2.335e-02, 2.019e-02, -1.190e-01, 3.045e-03, 7.206e-02, 3.620e-02, -3.594e-02, 7.757e-02, 8.746e-02, 1.343e-01, -1.438e-02, -1.177e-01, -3.528e-02, -4.854e-03, -3.678e-02)); + r += mul(s7_2, M4(1.456e-01, 9.068e-02, -8.379e-02, -4.816e-02, -1.514e-02, 1.022e-03, -4.484e-02, 1.095e-01, 8.802e-03, 1.059e-02, -1.524e-02, 5.501e-02, 4.891e-02, -4.176e-02, 6.690e-02, 2.803e-02)); + r += mul(s7_3, M4(4.880e-02, -9.778e-02, 7.139e-02, -3.403e-02, -1.265e-01, 1.243e-01, 4.718e-03, -9.577e-02, 1.154e-01, -6.257e-02, -5.989e-02, -1.084e-01, 1.630e-02, 1.882e-02, 3.142e-02, -1.523e-01)); + r += mul(s7_4, M4(1.631e-02, 5.055e-02, 1.642e-01, 5.091e-02, 2.055e-01, -4.724e-02, -6.370e-02, 2.239e-02, -1.355e-01, -9.611e-02, 4.697e-03, -3.559e-02, 4.487e-02, 1.632e-01, 5.328e-02, 4.724e-02)); + r += mul(s7_5, M4(-2.235e-02, 3.882e-02, -3.194e-02, 9.299e-02, -1.983e-02, 7.830e-02, 6.663e-02, -1.378e-01, 5.669e-02, 9.130e-03, 1.249e-01, 3.432e-02, 4.857e-02, -1.639e-01, -2.301e-02, 7.515e-02)); + r += mul(s7_6, M4(-1.004e-01, 4.822e-02, 3.599e-02, -1.124e-02, 2.083e-01, 1.402e-01, -7.544e-02, -5.453e-02, -9.656e-03, -8.927e-02, -5.573e-03, -6.093e-02, 6.438e-02, 9.692e-03, -8.695e-03, 1.205e-02)); + r += mul(s7_7, M4(2.545e-02, -9.701e-02, 3.757e-02, -6.665e-02, -9.585e-02, -9.654e-02, -2.686e-01, 1.593e-02, 2.550e-02, 2.506e-02, -8.576e-02, 1.587e-01, -1.345e-01, 7.874e-03, -1.132e-02, 7.404e-02)); + r += mul(s7_8, M4(-9.947e-02, 2.029e-02, -9.657e-03, -9.297e-02, -3.285e-02, 2.794e-03, -1.467e-01, -1.324e-01, 1.945e-02, -4.082e-02, -3.194e-02, -5.236e-03, 2.245e-01, 1.092e-01, 8.064e-02, -4.513e-04)); + r += V4(-3.849e-02, -1.365e-02, -7.279e-03, 4.354e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.802e-02, -5.156e-02, 1.206e-01, -3.892e-01, 9.234e-03, -2.731e-02, 1.182e-01, -6.746e-02, -2.030e-01, 6.598e-02, 3.799e-01, 3.176e-01, 3.402e-02, 3.375e-03, -2.049e-01, 3.869e-01)); + r += mul(s0_1, M4(5.085e-02, 9.497e-03, -1.361e-01, 1.211e-01, 2.047e-02, -2.909e-02, 7.192e-02, -3.160e-02, 6.266e-01, -7.379e-01, 3.400e-01, 4.910e-01, -3.014e-02, -2.511e-01, -7.872e-02, 5.207e-03)); + r += mul(s0_2, M4(5.088e-02, 2.924e-01, 2.247e-01, 3.032e-01, -9.986e-02, 9.435e-02, -9.937e-02, 6.549e-02, 1.634e-01, 3.651e-01, 7.896e-02, 4.499e-01, 1.107e-01, -8.795e-02, 1.298e-02, 3.925e-01)); + r += mul(s0_3, M4(-1.226e-01, -2.052e-01, 9.700e-02, 3.455e-01, -9.883e-03, 5.352e-02, -1.109e-01, 1.370e-01, -3.751e-01, 1.816e-02, -1.321e-01, 2.390e-01, -2.378e-01, -3.816e-02, 3.457e-01, 2.798e-01)); + r += mul(s0_4, M4(1.096e-01, -1.880e-02, -6.901e-02, 3.486e-01, 9.603e-02, -1.650e-01, 1.648e-02, 2.145e-01, 1.302e-01, 8.368e-02, 1.101e-01, 1.574e-01, -4.341e-01, 3.264e-01, -4.945e-02, -3.723e-02)); + r += mul(s0_5, M4(-1.711e-01, 2.180e-01, 2.788e-02, -1.105e-01, -4.460e-02, 1.254e-01, -3.483e-02, 4.794e-03, 1.091e-01, 6.501e-02, 1.169e-01, 3.037e-01, -1.726e-01, -1.524e-01, 1.148e-01, -6.807e-02)); + r += mul(s0_6, M4(-1.816e-02, 2.070e-01, 3.857e-03, -7.230e-02, 5.213e-02, -1.013e-01, -1.852e-02, 5.578e-02, -1.822e-01, 5.943e-02, -6.860e-02, 3.346e-01, 1.336e-02, 1.881e-03, 1.706e-04, -1.993e-02)); + r += mul(s0_7, M4(-1.091e-01, -1.831e-01, 7.351e-02, 5.954e-02, -1.961e-01, 3.039e-02, 3.556e-02, 1.123e-01, -1.606e-01, -8.329e-02, -3.582e-02, 1.874e-01, 2.041e-01, 1.687e-01, 1.214e-01, -2.145e-01)); + r += mul(s0_8, M4(-1.759e-01, 3.716e-01, 1.810e-01, 1.497e-01, 2.445e-01, 3.580e-02, -4.696e-02, 6.966e-02, -1.795e-01, -1.277e-01, 8.469e-02, 2.548e-01, 2.237e-02, -1.727e-01, 9.870e-02, -1.800e-01)); + r += mul(s1_0, M4(-4.170e-02, -5.407e-02, -2.206e-02, -4.477e-02, 7.677e-02, -1.467e-01, 1.297e-01, -7.096e-02, -7.251e-02, 1.257e-02, -1.232e-01, -8.921e-02, 5.899e-02, 2.376e-02, -1.022e-02, 3.457e-02)); + r += mul(s1_1, M4(-4.335e-02, 1.874e-03, -1.954e-02, -5.067e-02, -3.084e-02, -7.801e-02, -5.762e-02, -4.240e-02, -1.198e-02, 2.986e-02, 9.301e-02, 2.905e-02, 3.990e-02, -1.610e-01, 1.314e-02, 3.194e-02)); + r += mul(s1_2, M4(4.938e-02, 1.901e-03, -2.677e-02, -9.100e-02, 2.205e-03, 1.424e-01, -6.446e-02, -1.117e-01, -6.649e-02, -5.324e-03, 4.897e-02, -7.976e-02, 1.293e-03, -5.757e-02, 4.666e-02, -8.052e-03)); + r += mul(s1_3, M4(-1.181e-01, 8.895e-02, -3.002e-02, 2.494e-03, 6.048e-02, 9.794e-02, 7.071e-02, -1.105e-01, 2.595e-03, -4.194e-02, 9.149e-02, 5.148e-02, 4.510e-02, -1.003e-01, -2.136e-01, -9.856e-02)); + r += mul(s1_4, M4(8.952e-02, 4.227e-02, -9.305e-02, -5.687e-02, 5.234e-02, -1.249e-01, 2.152e-02, 6.812e-02, 1.520e-01, -8.037e-02, -1.195e-01, 1.176e-01, 1.126e-02, 1.979e-01, -1.536e-01, 6.813e-02)); + r += mul(s1_5, M4(7.858e-03, -1.131e-01, -1.845e-02, -2.885e-02, -1.907e-01, -2.352e-02, -9.424e-02, 7.438e-02, -1.977e-03, 1.714e-02, -9.526e-04, 1.184e-01, -7.097e-02, -1.900e-01, 1.357e-01, -1.178e-02)); + r += mul(s1_6, M4(1.399e-02, 1.570e-02, 8.513e-02, 4.497e-02, -5.835e-02, -1.206e-01, 2.929e-02, -7.731e-02, -2.053e-02, -1.783e-02, 1.242e-02, -7.639e-02, 1.098e-02, 1.347e-01, -2.134e-02, -9.737e-02)); + r += mul(s1_7, M4(5.280e-02, 1.558e-02, 5.262e-02, -2.740e-02, -9.981e-02, 1.174e-01, 1.792e-01, -5.477e-02, 2.825e-03, 1.184e-02, -1.019e-01, 4.856e-02, -7.495e-02, 2.933e-02, 3.793e-02, -4.763e-02)); + r += mul(s1_8, M4(4.504e-02, -6.129e-02, -8.692e-02, 2.067e-01, 1.957e-01, 2.407e-01, -9.113e-02, -1.615e-01, -2.641e-03, -9.470e-02, -1.203e-02, 4.439e-02, 3.156e-02, -1.854e-02, 4.911e-02, -5.385e-02)); + r += mul(s2_0, M4(-2.898e-02, 1.585e-02, -6.785e-02, -3.388e-02, -5.240e-02, -7.491e-02, 5.920e-03, 8.501e-02, -2.700e-02, 2.790e-02, -6.616e-02, 9.314e-02, 2.613e-03, 4.740e-02, -1.952e-02, 3.159e-02)); + r += mul(s2_1, M4(2.105e-01, -1.568e-01, 7.612e-02, -8.321e-02, 4.020e-02, 6.669e-02, 6.664e-02, 1.237e-01, -1.096e-01, -1.486e-01, 3.096e-02, 2.008e-02, -4.263e-02, 1.113e-02, 1.185e-01, 1.281e-01)); + r += mul(s2_2, M4(-1.161e-02, 1.698e-02, -4.592e-02, 6.924e-02, 4.046e-02, 1.279e-01, -4.622e-02, 3.728e-02, 3.625e-02, 7.702e-02, -6.480e-02, 1.553e-01, -1.106e-01, 2.186e-02, 1.083e-02, -1.135e-02)); + r += mul(s2_3, M4(-1.065e-01, -9.866e-02, -5.545e-02, -9.504e-03, -4.676e-02, -4.768e-02, 3.364e-02, 1.528e-01, 2.147e-01, 4.792e-02, -2.744e-01, 1.018e-01, 3.624e-02, -5.829e-02, 5.161e-02, 1.064e-01)); + r += mul(s2_4, M4(2.242e-01, -6.267e-02, 7.893e-02, -3.285e-03, 6.343e-02, -1.234e-02, -8.881e-03, 5.795e-02, -2.264e-01, 2.534e-01, -4.626e-01, 6.956e-02, 8.826e-02, -1.142e-01, -2.645e-01, -1.276e-02)); + r += mul(s2_5, M4(3.328e-02, -2.416e-02, -1.345e-01, -1.615e-02, 5.329e-02, 2.259e-02, 1.267e-02, -2.176e-02, 4.219e-02, 2.430e-03, -2.007e-01, 6.283e-02, 9.299e-02, -5.957e-02, 1.196e-01, -1.401e-01)); + r += mul(s2_6, M4(-1.372e-01, 5.980e-02, -4.472e-02, -2.230e-04, -2.846e-02, -1.046e-02, 9.732e-02, 7.188e-02, 8.358e-02, -9.484e-03, -4.320e-02, 4.657e-02, -1.000e-01, 6.414e-02, 5.864e-02, -4.058e-02)); + r += mul(s2_7, M4(2.689e-01, 5.418e-02, 1.132e-01, 5.436e-02, 2.305e-02, 8.284e-02, -8.678e-02, 7.193e-02, -1.923e-01, 6.689e-03, 4.396e-03, -1.197e-01, 3.617e-02, -5.872e-02, -1.023e-01, 1.241e-02)); + r += mul(s2_8, M4(2.724e-02, -6.824e-02, -3.476e-02, -6.227e-03, 1.405e-02, 1.160e-01, 2.937e-02, 1.500e-01, 5.183e-02, -6.483e-02, -3.462e-02, -1.028e-02, -2.252e-02, 3.933e-02, 9.059e-03, -1.151e-02)); + r += mul(s3_0, M4(-6.616e-02, 7.941e-03, -7.667e-02, 9.626e-02, -5.899e-02, 5.067e-02, 1.123e-01, -7.155e-02, -9.984e-02, 8.379e-03, 1.885e-01, -4.505e-02, -1.377e-01, -7.615e-02, -1.460e-01, 1.968e-02)); + r += mul(s3_1, M4(-6.391e-02, -1.772e-01, -5.519e-02, -1.564e-01, 2.967e-03, -1.374e-01, -2.468e-01, -3.889e-01, -2.793e-02, -1.800e-01, 1.317e-01, 3.061e-02, -2.984e-02, 7.570e-02, -9.489e-02, 2.382e-01)); + r += mul(s3_2, M4(-7.658e-02, 1.526e-02, -1.444e-02, 1.143e-01, 3.034e-03, 3.447e-02, 1.152e-01, 6.495e-02, 8.716e-02, 4.289e-02, 2.336e-02, 6.799e-02, -1.327e-01, 1.032e-01, 6.880e-03, 8.986e-02)); + r += mul(s3_3, M4(-3.334e-02, -4.563e-02, -7.153e-02, 5.970e-02, 1.871e-01, -6.696e-02, -6.439e-02, 2.520e-01, -1.090e-01, 3.417e-02, -1.971e-02, -9.568e-02, -1.305e-02, 1.863e-01, 1.025e-01, -2.313e-03)); + r += mul(s3_4, M4(-1.017e-01, -3.506e-01, -9.725e-02, -3.023e-02, 1.184e-01, 7.168e-02, -4.717e-01, 3.008e-01, -3.493e-01, -6.348e-02, -9.199e-02, -7.848e-03, 1.379e-01, -1.452e-01, -2.035e-01, 8.566e-02)); + r += mul(s3_5, M4(-9.457e-02, -1.684e-01, 7.912e-02, -4.504e-02, -1.344e-01, 1.922e-01, 2.359e-01, 3.596e-01, 7.699e-02, 2.859e-02, -5.206e-02, -2.060e-01, 9.835e-02, 3.742e-02, -4.881e-02, -3.242e-03)); + r += mul(s3_6, M4(-8.423e-02, -5.352e-02, -8.182e-02, 9.426e-02, -2.998e-02, 3.028e-02, 5.481e-02, -2.197e-01, 1.147e-01, 6.420e-04, 1.152e-01, -3.011e-03, 8.877e-02, 9.533e-03, -2.035e-01, 1.808e-01)); + r += mul(s3_7, M4(-5.532e-02, -3.217e-01, 1.923e-02, -1.066e-02, 6.336e-02, 3.110e-02, 7.060e-02, -6.651e-02, -1.623e-01, -1.309e-01, 3.931e-02, -1.972e-02, 7.495e-02, -2.356e-01, 5.781e-02, 3.142e-03)); + r += mul(s3_8, M4(-8.396e-02, -7.026e-02, 3.163e-02, -4.612e-02, 2.304e-01, 1.903e-01, 6.762e-03, 3.414e-01, -1.054e-03, -1.374e-02, 2.692e-02, -8.614e-02, 1.355e-02, -5.218e-03, -1.359e-02, -6.684e-03)); + r += mul(s4_0, M4(-4.822e-02, 6.151e-03, 1.156e-02, 1.258e-01, 1.186e-01, -3.688e-02, -4.881e-02, -2.237e-02, -1.432e-01, 1.391e-01, -7.915e-02, -4.125e-02, -9.872e-03, -1.622e-01, 5.435e-02, 9.903e-02)); + r += mul(s4_1, M4(2.703e-02, 1.311e-01, 6.639e-02, 5.176e-03, -5.718e-02, -3.089e-02, -8.949e-03, -8.158e-03, -1.306e-01, 5.654e-02, 8.562e-02, 3.945e-02, -1.173e-01, 8.266e-02, -1.476e-01, 1.633e-03)); + r += mul(s4_2, M4(-5.208e-02, 4.768e-02, 1.031e-02, 1.786e-01, 7.597e-03, 2.836e-01, -4.354e-02, -8.829e-02, 5.808e-02, 1.167e-01, 2.405e-03, 1.354e-01, 4.777e-02, -3.373e-02, -6.581e-02, -5.401e-02)); + r += mul(s4_3, M4(1.481e-01, 3.910e-02, 1.066e-02, 2.328e-02, 3.709e-02, -5.371e-02, 3.497e-02, -1.132e-01, 1.859e-02, -6.759e-02, 3.889e-02, -5.446e-02, 5.095e-02, -8.579e-02, 4.652e-02, 1.072e-01)); + r += mul(s4_4, M4(-9.275e-02, 1.607e-02, -8.003e-02, 3.099e-02, -7.704e-02, 7.382e-02, 8.479e-02, 5.053e-02, -9.539e-02, 8.376e-02, 1.122e-01, 7.184e-03, 5.598e-02, 1.336e-01, 4.726e-02, 5.561e-02)); + r += mul(s4_5, M4(-6.532e-02, -1.142e-02, 2.389e-02, 6.100e-02, -5.159e-03, -8.185e-02, -1.234e-01, 9.162e-02, 3.504e-02, 1.170e-01, -1.275e-01, -1.137e-01, 2.591e-02, -1.459e-01, -1.475e-01, -8.061e-03)); + r += mul(s4_6, M4(-5.724e-02, -7.243e-02, -1.139e-02, -1.672e-01, -4.244e-02, -1.267e-01, -2.326e-02, -9.063e-02, -5.881e-02, -4.040e-02, -7.768e-02, -2.173e-01, -1.426e-01, 9.559e-02, 6.449e-02, -7.141e-02)); + r += mul(s4_7, M4(1.598e-01, 1.262e-01, 1.558e-01, -5.444e-02, 3.989e-02, -2.002e-02, 1.427e-01, 8.954e-03, -1.607e-02, 1.143e-01, -1.499e-02, 1.431e-01, 1.431e-01, 1.025e-01, -3.980e-03, 3.351e-02)); + r += mul(s4_8, M4(-4.759e-02, -1.156e-03, -4.510e-02, -1.927e-02, 5.402e-02, 2.333e-01, 1.037e-01, 8.638e-02, 4.829e-02, 8.402e-02, 6.311e-02, -1.432e-01, 1.010e-02, -9.231e-02, -1.179e-01, -6.892e-02)); + r += mul(s5_0, M4(2.913e-02, -2.549e-03, -1.849e-02, -1.047e-01, 1.137e-01, -1.432e-01, -1.483e-02, -2.535e-01, 1.068e-01, -2.117e-02, -5.043e-02, 7.363e-03, 1.117e-01, 1.255e-02, 3.333e-02, -1.697e-01)); + r += mul(s5_1, M4(1.014e-01, 2.990e-02, -1.086e-01, -4.205e-02, 8.811e-02, 1.411e-01, 4.324e-02, 2.712e-02, -1.168e-01, -6.908e-02, 7.386e-02, -3.770e-02, -1.730e-02, 3.807e-01, -2.026e-02, -2.077e-01)); + r += mul(s5_2, M4(7.198e-03, -1.615e-02, -7.113e-02, -7.417e-02, 3.233e-02, 1.465e-01, 3.877e-02, 1.170e-01, 1.342e-01, -3.204e-02, 1.353e-01, 1.315e-01, -1.601e-02, 1.509e-03, -1.582e-01, -1.458e-01)); + r += mul(s5_3, M4(1.164e-01, 5.345e-03, 2.005e-02, -3.676e-02, 2.190e-01, 1.469e-01, -8.587e-02, -6.513e-02, -3.172e-01, -3.607e-02, 1.723e-01, 2.307e-02, 1.115e-02, -3.679e-02, 1.605e-01, 2.028e-01)); + r += mul(s5_4, M4(2.060e-01, 2.299e-02, -2.166e-02, 4.765e-02, -2.165e-01, 2.464e-01, -2.905e-01, -5.862e-02, 3.866e-02, 3.496e-02, 1.835e-01, -1.279e-02, -1.741e-01, 2.865e-01, 6.864e-02, -1.401e-01)); + r += mul(s5_5, M4(-1.086e-01, -2.790e-01, -1.446e-01, 1.313e-01, -1.805e-01, -2.065e-02, 9.124e-02, -1.430e-01, 3.229e-02, 1.767e-02, -1.588e-01, -7.295e-02, 6.204e-02, 1.200e-01, -1.326e-01, -5.273e-03)); + r += mul(s5_6, M4(2.269e-01, -9.886e-02, 1.361e-01, -1.473e-01, -2.404e-01, 7.051e-02, -9.116e-02, -1.986e-01, -8.099e-02, -9.891e-02, -4.235e-02, -6.047e-02, -1.654e-01, 1.165e-01, 1.611e-01, -9.142e-03)); + r += mul(s5_7, M4(2.170e-02, 2.024e-01, 2.867e-01, -8.679e-02, 3.076e-01, 3.922e-02, -8.530e-02, 5.478e-01, 5.273e-02, -2.640e-02, -1.919e-01, 1.131e-01, 1.696e-01, 5.603e-03, -3.013e-02, -1.121e-01)); + r += mul(s5_8, M4(-8.068e-03, 3.940e-02, 3.067e-01, 6.644e-03, -6.139e-02, -2.007e-01, -9.687e-03, 3.392e-01, 1.211e-01, -1.120e-01, -1.171e-02, -3.860e-02, 1.217e-01, -2.424e-02, 1.843e-01, -1.736e-01)); + r += mul(s6_0, M4(6.439e-02, -6.336e-02, -1.081e-01, 4.337e-02, -3.236e-02, 1.752e-02, 8.909e-02, -4.700e-02, -3.128e-02, -4.913e-02, 1.235e-01, -9.853e-02, -1.651e-02, 3.878e-01, -3.516e-02, -3.919e-02)); + r += mul(s6_1, M4(2.277e-01, 4.336e-02, 7.852e-02, 1.576e-01, -1.563e-02, -1.959e-01, 1.693e-01, 1.596e-01, -1.672e-01, -6.992e-02, -5.763e-02, -2.148e-02, 6.415e-02, -1.158e-01, 7.301e-02, -8.661e-02)); + r += mul(s6_2, M4(-4.668e-02, 8.039e-02, 2.044e-02, 2.014e-02, 4.289e-02, -9.399e-02, -9.409e-02, 1.101e-01, 6.680e-02, 1.579e-01, 1.659e-01, 6.766e-02, -1.477e-01, 8.359e-02, 5.109e-02, 9.460e-02)); + r += mul(s6_3, M4(3.234e-01, 1.618e-01, -1.801e-01, -1.483e-01, -2.036e-01, 2.286e-02, 3.435e-02, 9.168e-03, 1.707e-01, 1.538e-01, -1.954e-01, 6.670e-02, -1.488e-01, 3.419e-04, 1.204e-01, -1.939e-02)); + r += mul(s6_4, M4(-2.884e-01, 1.031e-01, -2.009e-01, -2.042e-01, -2.182e-02, -4.738e-02, -2.117e-03, -6.116e-02, 2.673e-02, -4.994e-01, 1.248e-01, -1.820e-01, -1.018e-01, 7.883e-02, -8.556e-02, -1.523e-01)); + r += mul(s6_5, M4(-6.129e-03, 3.436e-01, -4.294e-02, 3.408e-02, -5.108e-02, 1.636e-02, 1.194e-01, -2.425e-02, -1.486e-01, 1.052e-01, -2.564e-02, 6.141e-03, -7.817e-02, -7.600e-02, 2.309e-02, -1.317e-01)); + r += mul(s6_6, M4(-3.343e-01, 1.562e-01, 2.314e-02, -8.994e-02, -1.566e-01, -1.302e-01, -1.503e-01, -3.620e-02, 3.442e-02, -4.477e-02, 8.212e-02, 1.376e-02, -1.104e-01, 2.407e-01, 8.167e-02, 1.373e-01)); + r += mul(s6_7, M4(1.755e-01, -2.458e-01, -8.140e-02, 5.062e-02, -7.652e-02, 1.068e-02, 1.579e-01, 7.307e-02, -5.127e-02, -1.075e-01, -1.052e-01, -1.181e-01, -1.447e-01, -1.229e-01, 7.682e-02, -3.210e-01)); + r += mul(s6_8, M4(7.308e-04, -1.060e-01, 1.500e-02, 2.670e-01, -3.298e-02, -1.744e-02, 4.964e-02, -2.915e-02, 1.542e-01, 3.643e-02, -2.617e-02, 8.074e-02, -1.216e-01, -2.563e-02, -2.689e-02, 1.399e-01)); + r += mul(s7_0, M4(9.782e-02, -5.249e-02, -1.006e-01, 1.151e-01, -1.402e-01, -3.647e-02, 6.410e-02, 9.094e-03, 4.262e-02, -3.584e-02, -1.978e-02, 5.485e-02, -2.607e-04, 7.551e-02, -6.126e-02, -1.940e-02)); + r += mul(s7_1, M4(6.846e-02, 2.305e-01, -4.094e-02, 1.400e-01, -1.560e-02, -1.438e-02, 5.824e-05, -1.448e-02, -9.337e-02, -5.603e-03, 1.820e-02, 2.381e-02, 2.165e-02, -9.807e-02, -3.444e-02, -3.379e-02)); + r += mul(s7_2, M4(-4.101e-02, -7.144e-02, 3.168e-02, -1.573e-02, -5.890e-04, 7.380e-02, -7.336e-02, -4.920e-03, 3.547e-02, 1.015e-01, 5.512e-02, 4.064e-02, -6.814e-02, -3.267e-02, -5.650e-02, 2.536e-02)); + r += mul(s7_3, M4(1.159e-01, 4.143e-02, -7.666e-02, -3.528e-02, -1.557e-01, 1.066e-01, -1.218e-01, 1.546e-04, 1.202e-01, 2.015e-01, -5.828e-02, -1.662e-02, 1.083e-01, 1.395e-01, 4.536e-02, 3.132e-02)); + r += mul(s7_4, M4(-5.859e-02, 3.619e-02, -4.449e-02, 3.329e-03, 1.145e-01, -1.256e-01, -1.127e-01, 3.612e-02, 5.740e-02, 1.154e-01, 1.674e-01, -1.153e-03, 1.617e-02, -1.630e-02, -9.418e-02, 8.744e-02)); + r += mul(s7_5, M4(3.250e-02, -7.466e-03, 1.048e-01, 2.813e-02, 3.007e-02, -6.485e-02, -2.396e-02, 1.130e-01, 1.517e-02, -1.140e-01, 3.263e-02, 4.794e-02, 9.150e-03, -1.624e-01, -7.188e-02, -2.948e-02)); + r += mul(s7_6, M4(-5.930e-02, -9.788e-02, 1.161e-01, 1.467e-02, -1.963e-02, 5.350e-02, 1.978e-02, -3.094e-02, -6.057e-02, 1.543e-01, -5.657e-02, -1.053e-01, -2.314e-02, 1.829e-02, 3.077e-02, 3.972e-02)); + r += mul(s7_7, M4(-1.266e-01, 4.303e-02, -4.994e-02, 3.032e-02, -9.509e-02, 1.854e-01, -1.631e-02, 4.650e-02, -5.716e-02, -1.233e-02, -3.943e-02, -1.468e-01, -1.487e-02, 2.762e-02, 9.000e-02, 3.216e-02)); + r += mul(s7_8, M4(1.010e-02, -6.407e-02, -2.538e-02, -5.266e-02, 7.357e-03, -6.038e-02, -1.984e-02, 3.594e-02, -7.398e-03, -1.875e-01, -3.417e-02, -1.130e-03, -7.340e-02, -7.423e-02, -4.463e-02, 3.237e-02)); + r += V4(-3.010e-02, -1.824e-02, 2.032e-02, -3.652e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.458e-01, -1.057e-01, -3.860e-01, 3.604e-01, -5.817e-02, -8.183e-02, -6.805e-02, 8.234e-02, -5.423e-03, 5.963e-02, 2.290e-01, 3.974e-04, 2.429e-01, -1.556e-02, -9.961e-02, -1.299e-01)); + r += mul(s0_1, M4(2.341e-01, -4.703e-02, 2.257e-02, 5.068e-01, 1.539e-01, 6.969e-02, -1.353e-01, 3.199e-04, -1.972e-01, 9.778e-02, -4.482e-01, 3.351e-01, 8.723e-02, -1.328e-01, 3.968e-01, -6.975e-02)); + r += mul(s0_2, M4(-2.142e-01, 3.969e-02, -3.004e-01, -2.884e-02, -1.830e-02, 1.636e-01, 2.705e-02, -6.598e-02, 1.193e-01, -4.205e-02, -6.471e-01, 6.102e-01, 2.572e-01, -1.189e-01, -2.108e-01, -2.460e-01)); + r += mul(s0_3, M4(3.241e-02, -1.809e-01, -6.209e-02, -2.740e-01, -9.664e-02, -4.631e-02, 1.004e-02, -8.167e-03, -2.651e-01, 1.124e-01, -3.589e-01, -5.007e-01, -5.856e-02, -2.368e-01, 1.615e-01, -4.301e-01)); + r += mul(s0_4, M4(2.754e-02, 8.503e-02, 1.093e-01, -2.404e-01, -2.841e-03, -3.447e-02, 4.092e-02, 2.156e-01, -4.043e-01, 3.334e-01, -2.567e-01, -7.555e-01, 4.484e-01, -2.664e-01, -2.881e-02, -1.850e-01)); + r += mul(s0_5, M4(-4.196e-02, 1.179e-01, -2.343e-02, -2.367e-01, 2.046e-01, 3.918e-02, 1.311e-01, 1.751e-01, 3.000e-01, -1.206e-01, -4.298e-01, 4.896e-01, 6.354e-01, 5.186e-01, 3.271e-01, -2.650e-01)); + r += mul(s0_6, M4(-2.206e-01, -1.766e-01, 1.442e-01, 1.336e-01, 1.546e-01, 1.824e-02, 1.466e-01, -2.096e-01, -3.067e-01, 3.769e-01, -1.642e-01, 3.411e-01, -2.238e-01, 2.257e-01, -1.335e-01, -2.824e-01)); + r += mul(s0_7, M4(5.319e-02, -1.473e-01, 9.736e-02, 2.883e-02, -3.570e-01, -6.722e-02, -7.487e-02, 2.148e-01, -4.199e-01, 3.485e-02, -5.580e-02, -3.259e-01, -2.798e-02, 1.914e-01, 2.524e-01, -1.967e-01)); + r += mul(s0_8, M4(-1.292e-01, 1.673e-01, -1.227e-01, 2.136e-01, 3.612e-02, 5.502e-02, -4.655e-03, -9.540e-02, 2.594e-02, -4.818e-02, -1.172e-01, -4.448e-03, -7.485e-02, 6.047e-02, 1.883e-01, 3.448e-02)); + r += mul(s1_0, M4(2.700e-02, -5.188e-02, 5.934e-02, 1.762e-01, 5.025e-02, -4.472e-02, -7.632e-02, -1.632e-02, -3.359e-02, 5.183e-02, 5.725e-02, 1.816e-01, 2.679e-02, -1.176e-01, 1.402e-01, 4.902e-02)); + r += mul(s1_1, M4(1.645e-01, -4.533e-02, 1.421e-01, -2.275e-02, -4.301e-02, 4.713e-02, 2.986e-01, 6.367e-02, -6.663e-02, 1.499e-01, -4.491e-02, 2.421e-03, 1.310e-01, -9.423e-03, 7.218e-02, -6.969e-02)); + r += mul(s1_2, M4(-3.249e-02, 1.498e-01, 6.415e-02, 1.244e-01, -1.128e-01, -4.648e-02, -1.526e-02, 8.026e-02, -5.200e-02, 5.942e-02, 1.186e-01, -1.078e-01, -2.788e-02, -1.210e-01, -1.752e-02, -4.173e-02)); + r += mul(s1_3, M4(-6.615e-02, -3.023e-02, -2.472e-02, -9.554e-02, 5.002e-03, 4.139e-02, -1.589e-01, -1.873e-01, 1.275e-01, -7.109e-02, -9.129e-02, 7.003e-02, -2.288e-02, 2.433e-02, 9.158e-02, -2.862e-02)); + r += mul(s1_4, M4(-6.587e-02, -3.614e-02, 1.100e-03, 6.735e-02, 1.356e-01, 9.111e-02, 9.717e-02, -1.448e-01, 8.483e-02, -1.013e-01, 3.991e-02, -1.816e-02, -1.464e-02, -1.150e-02, -4.866e-02, 2.912e-03)); + r += mul(s1_5, M4(-4.282e-02, 1.452e-01, -9.649e-02, -1.703e-01, -3.860e-02, 5.221e-02, -2.984e-01, -2.036e-01, -1.365e-02, 8.505e-02, -9.315e-03, 2.664e-02, 6.935e-02, 5.125e-02, -1.443e-01, -1.144e-01)); + r += mul(s1_6, M4(-1.330e-01, 2.596e-02, 1.121e-01, 5.089e-02, -1.685e-02, -1.642e-01, 2.425e-01, 1.509e-01, -2.067e-02, -1.228e-01, -5.734e-02, -2.298e-02, -7.177e-02, -2.879e-02, -5.220e-02, -5.664e-02)); + r += mul(s1_7, M4(-4.861e-02, -1.351e-01, -4.328e-02, -4.571e-02, -5.402e-02, 4.793e-03, 3.349e-01, 5.714e-02, -1.283e-02, -3.564e-02, 1.486e-02, 5.641e-02, -4.300e-02, 1.057e-01, -7.635e-03, 1.724e-01)); + r += mul(s1_8, M4(7.989e-02, 3.316e-02, -8.216e-02, -6.660e-02, -1.776e-01, -6.948e-02, 2.577e-02, 2.058e-01, -2.778e-02, -3.578e-02, -1.244e-02, -2.773e-02, -2.935e-02, 1.343e-02, -8.059e-02, 1.333e-01)); + r += mul(s2_0, M4(6.232e-02, -3.149e-02, 1.008e-01, -1.106e-02, -3.848e-02, -8.277e-02, 6.132e-02, 5.282e-02, -3.018e-02, -3.332e-02, -4.491e-02, -4.111e-03, 2.646e-01, 6.702e-03, 8.115e-02, -9.399e-02)); + r += mul(s2_1, M4(-5.254e-02, 1.544e-02, -1.533e-01, 9.559e-03, 1.267e-01, 1.300e-01, -6.107e-02, 3.549e-03, 7.784e-02, -2.019e-01, 4.710e-02, 1.812e-02, -1.439e-01, -5.178e-02, -3.601e-02, -1.172e-02)); + r += mul(s2_2, M4(1.030e-02, 1.181e-01, 1.131e-02, 1.816e-02, -2.074e-02, 6.633e-02, -6.371e-02, 3.127e-02, 5.810e-02, -1.121e-01, -4.048e-02, 5.210e-02, 4.552e-02, -1.315e-01, 2.048e-02, 9.477e-02)); + r += mul(s2_3, M4(-9.004e-03, 6.638e-02, 2.992e-02, 7.252e-02, -8.015e-03, 3.287e-02, -1.135e-01, 9.058e-02, -1.739e-01, 7.196e-02, 1.993e-01, 1.446e-02, -1.300e-01, 1.280e-01, -1.393e-01, -3.597e-02)); + r += mul(s2_4, M4(-1.705e-01, 1.210e-03, 3.184e-02, 9.400e-02, 3.255e-02, 1.192e-01, -7.085e-03, -7.351e-02, -8.781e-03, -5.180e-02, 2.926e-02, -1.694e-01, 8.069e-02, 1.388e-01, 1.032e-01, 1.272e-01)); + r += mul(s2_5, M4(-1.014e-01, 1.645e-02, -2.046e-02, -4.512e-02, 6.069e-02, 1.121e-01, -6.333e-02, -5.930e-03, 1.885e-01, 5.901e-03, 1.992e-01, -1.039e-01, 2.526e-02, 5.944e-02, 1.636e-01, -2.309e-02)); + r += mul(s2_6, M4(3.584e-02, 2.289e-02, 4.466e-02, -1.670e-01, 3.483e-02, -1.253e-01, -4.845e-02, -1.067e-01, -3.520e-02, -4.303e-02, 7.572e-02, 7.915e-03, -6.681e-02, 5.621e-02, -9.969e-03, 7.440e-03)); + r += mul(s2_7, M4(-1.897e-01, -9.183e-02, -7.758e-02, -2.510e-03, -5.537e-02, 1.171e-03, -2.415e-02, -3.851e-02, 8.360e-02, -5.710e-02, 2.774e-02, 2.952e-02, 1.164e-01, 8.237e-02, -1.203e-01, -2.561e-02)); + r += mul(s2_8, M4(9.266e-03, -5.956e-02, 9.509e-02, 5.436e-02, -1.968e-02, -2.842e-02, -6.990e-02, -1.198e-01, -4.262e-02, -1.328e-01, 1.243e-01, -6.755e-02, 6.468e-02, -1.296e-01, -1.240e-01, -7.022e-02)); + r += mul(s3_0, M4(1.413e-02, 4.781e-02, 1.564e-01, -6.482e-02, 1.809e-01, -9.167e-02, -2.371e-02, 2.177e-01, 7.312e-02, -1.957e-01, 8.278e-02, -1.701e-02, -1.665e-01, 2.045e-02, 2.567e-01, -3.322e-02)); + r += mul(s3_1, M4(2.266e-01, 1.190e-01, -1.189e-01, -4.612e-03, -2.023e-01, 3.506e-01, 5.051e-02, -5.788e-03, 9.186e-02, -5.926e-02, -6.753e-02, 7.830e-03, -7.641e-02, 7.843e-03, 2.000e-01, -1.995e-01)); + r += mul(s3_2, M4(-3.059e-01, 1.439e-02, -2.829e-02, -9.502e-02, -1.778e-01, 5.261e-02, 1.083e-01, 2.346e-01, 1.277e-02, 2.619e-02, -1.854e-01, 1.131e-01, -4.176e-03, 5.011e-03, 6.163e-02, 8.296e-02)); + r += mul(s3_3, M4(-2.323e-02, -3.141e-02, -1.047e-01, -1.028e-01, 1.246e-01, 9.232e-02, -2.727e-01, -2.723e-01, -1.058e-01, 2.544e-02, -6.973e-02, -1.832e-01, -1.606e-01, -9.955e-02, -4.539e-02, 9.036e-03)); + r += mul(s3_4, M4(2.623e-02, -4.559e-02, -1.179e-01, 9.006e-02, -4.979e-02, -1.042e-02, -3.673e-02, -4.996e-01, 7.021e-02, -1.124e-01, -5.125e-03, -1.706e-01, 5.848e-03, -1.593e-01, 1.029e-02, -5.513e-02)); + r += mul(s3_5, M4(-7.010e-02, -3.848e-02, 1.459e-01, -1.935e-02, 3.735e-01, 1.714e-01, 8.493e-02, 1.259e-01, 4.226e-03, -7.313e-02, 4.575e-02, -6.714e-02, 5.353e-02, 1.139e-01, -4.538e-02, 4.854e-02)); + r += mul(s3_6, M4(4.551e-02, -4.272e-02, 5.300e-02, -6.099e-03, -5.680e-02, -3.682e-01, 4.008e-02, 3.812e-01, 6.084e-03, 4.887e-02, 3.415e-02, 5.117e-02, 1.555e-03, -5.922e-02, 8.022e-02, 2.330e-04)); + r += mul(s3_7, M4(2.511e-01, -1.463e-01, -1.421e-01, 3.722e-01, -1.537e-01, -3.415e-01, -4.897e-02, 1.385e-01, 2.084e-01, -6.924e-02, 4.013e-02, -7.006e-03, 1.067e-01, 1.622e-01, 8.471e-02, 1.176e-01)); + r += mul(s3_8, M4(4.099e-02, 1.062e-02, -8.365e-02, 2.326e-01, -7.736e-02, 2.510e-02, 4.253e-02, -5.139e-02, -7.250e-02, 2.679e-02, -5.142e-02, 1.117e-02, -1.339e-01, -3.943e-02, -9.308e-02, 6.173e-02)); + r += mul(s4_0, M4(-3.013e-02, 1.576e-01, 1.254e-02, 1.647e-02, 5.044e-03, -5.229e-02, -4.009e-03, -1.699e-02, 3.875e-02, -5.560e-02, 8.825e-02, 4.523e-02, 7.351e-02, 1.831e-02, -2.503e-02, -2.665e-02)); + r += mul(s4_1, M4(6.452e-02, -5.458e-02, -5.632e-02, 1.595e-01, -7.125e-02, 1.512e-01, -9.220e-02, 1.126e-01, -7.965e-02, -1.315e-01, 1.354e-01, 5.626e-02, -2.081e-02, 5.125e-03, 6.471e-03, 6.599e-02)); + r += mul(s4_2, M4(1.958e-02, 6.594e-02, 1.108e-01, 2.743e-02, 1.106e-01, 8.074e-02, 1.238e-02, 1.530e-01, -1.547e-03, -7.476e-02, -2.067e-02, 2.805e-02, 4.422e-02, -1.744e-02, -3.843e-02, 6.642e-02)); + r += mul(s4_3, M4(-1.299e-01, 6.686e-02, 1.589e-01, -7.593e-02, 1.923e-01, 5.234e-02, -8.208e-03, -1.401e-03, 1.549e-01, -1.942e-01, -1.506e-01, 1.181e-01, 9.531e-02, 6.049e-02, 1.813e-01, 8.355e-02)); + r += mul(s4_4, M4(-2.774e-02, -1.567e-01, -7.373e-02, 6.260e-02, 5.863e-02, -1.018e-02, -1.111e-01, -3.510e-02, -1.002e-01, 6.370e-02, -1.535e-03, 1.797e-01, 7.641e-02, -4.574e-02, -1.022e-01, -9.351e-02)); + r += mul(s4_5, M4(-1.200e-01, -1.160e-02, -6.534e-02, -1.067e-01, 5.484e-02, 1.177e-01, 1.267e-01, -1.225e-01, -2.964e-02, 2.300e-02, -1.816e-01, -2.683e-01, -8.582e-02, 1.671e-02, -6.333e-02, 4.319e-02)); + r += mul(s4_6, M4(4.055e-02, 7.781e-02, -3.199e-02, -2.787e-02, -1.216e-02, -7.212e-03, -1.433e-01, 2.704e-02, 4.327e-02, -4.886e-02, -9.572e-02, -6.567e-02, 3.094e-02, -1.714e-02, 7.660e-02, 1.517e-01)); + r += mul(s4_7, M4(-1.141e-01, 1.709e-02, -1.614e-01, -1.094e-01, 9.059e-02, -7.837e-02, -4.849e-02, -8.270e-02, -6.884e-02, 1.992e-01, -8.652e-03, 1.141e-01, -1.778e-01, 8.916e-02, 6.338e-03, 3.520e-02)); + r += mul(s4_8, M4(5.285e-02, 1.784e-02, 9.877e-02, -4.795e-02, -1.474e-02, 5.813e-02, 1.600e-02, -2.331e-01, 6.161e-02, 1.326e-02, -2.269e-02, -9.264e-02, -7.120e-02, -6.561e-02, -1.330e-01, -1.070e-01)); + r += mul(s5_0, M4(-3.443e-02, 1.211e-02, 7.835e-02, 8.088e-02, 6.290e-02, -1.237e-01, 3.414e-02, -4.064e-02, -3.365e-02, -8.764e-02, 1.468e-01, -1.177e-01, 1.902e-01, 3.332e-03, 7.480e-02, 2.019e-01)); + r += mul(s5_1, M4(1.156e-01, 1.688e-01, -1.575e-01, 2.182e-02, 2.382e-01, -1.034e-01, 9.477e-02, 1.114e-01, -4.447e-02, 1.162e-01, -1.532e-02, -1.506e-01, -2.357e-01, -2.335e-02, -2.261e-01, 9.156e-02)); + r += mul(s5_2, M4(1.047e-01, -3.653e-02, -6.395e-02, 6.790e-03, -8.731e-02, 1.351e-02, -1.481e-01, 3.597e-02, -7.186e-02, -1.126e-01, -4.724e-02, 1.909e-02, 5.390e-02, 1.135e-01, 7.112e-02, 1.034e-01)); + r += mul(s5_3, M4(1.474e-01, 1.468e-01, -4.779e-02, 3.081e-02, 2.681e-03, 2.823e-01, 2.609e-01, 1.840e-01, 5.953e-02, -6.490e-02, 6.995e-02, 1.097e-01, 1.500e-03, 9.816e-02, -7.883e-02, -2.856e-01)); + r += mul(s5_4, M4(1.473e-01, 3.564e-01, -1.592e-01, 9.082e-02, 3.926e-01, 3.493e-02, 3.395e-02, -1.690e-01, -1.149e-01, 3.361e-01, 1.345e-01, -7.451e-02, -3.164e-01, -6.502e-02, -7.120e-02, -7.121e-02)); + r += mul(s5_5, M4(1.170e-01, 6.699e-02, -9.870e-03, -2.853e-03, 6.475e-02, 6.402e-02, 3.185e-01, -5.999e-01, -1.418e-02, -6.196e-02, -2.738e-01, 1.501e-02, 1.551e-01, -9.429e-02, 9.613e-02, 1.801e-01)); + r += mul(s5_6, M4(1.714e-01, 9.871e-03, 7.027e-02, -1.070e-01, 1.474e-01, 1.584e-01, -5.971e-02, -3.344e-01, -7.838e-02, -1.431e-01, 9.112e-02, -1.975e-02, -1.336e-01, 5.051e-02, 7.399e-02, -2.969e-01)); + r += mul(s5_7, M4(6.431e-02, 1.743e-01, 6.981e-02, 1.151e-01, 1.525e-01, 3.157e-02, 2.851e-01, 1.275e-02, -2.096e-01, 2.519e-02, 9.657e-02, 1.002e-01, -9.906e-02, 2.752e-01, -4.120e-02, -3.626e-01)); + r += mul(s5_8, M4(9.874e-02, 1.203e-01, 5.129e-02, 1.009e-02, 3.056e-01, -6.610e-03, 5.447e-03, 7.317e-01, -1.282e-01, -3.201e-02, 4.725e-02, 9.542e-02, 1.964e-02, -2.541e-01, -1.279e-01, -1.049e-01)); + r += mul(s6_0, M4(1.578e-01, -3.237e-01, 2.350e-02, -6.793e-02, 8.381e-02, 4.005e-02, -7.837e-02, 1.254e-01, 1.684e-01, -1.441e-01, -1.255e-01, -9.504e-02, -8.860e-02, 5.765e-02, 8.474e-02, 2.351e-02)); + r += mul(s6_1, M4(-1.006e-02, 1.862e-01, 9.833e-02, 2.697e-01, -1.419e-02, 1.021e-02, 5.005e-02, -1.079e-01, 2.202e-01, 3.633e-02, 1.289e-01, 2.221e-01, -1.699e-01, -1.060e-01, -1.264e-01, 1.018e-01)); + r += mul(s6_2, M4(6.856e-02, -1.482e-01, -4.429e-02, -2.246e-02, -3.099e-02, -1.002e-01, -3.366e-03, -1.192e-01, -2.949e-02, 1.752e-01, -1.146e-01, 1.870e-01, -1.321e-01, -3.956e-02, -4.185e-02, -1.567e-01)); + r += mul(s6_3, M4(4.985e-02, 3.060e-01, 1.996e-01, -2.055e-01, 8.430e-02, -2.116e-01, -1.391e-01, 6.589e-02, -1.074e-01, 1.493e-01, -1.257e-01, -9.184e-02, 2.587e-01, -9.467e-02, -1.248e-01, 3.444e-01)); + r += mul(s6_4, M4(2.068e-01, -8.650e-02, -3.576e-01, -1.830e-01, 2.032e-01, 6.428e-02, 6.387e-02, -1.238e-01, 1.180e-01, -6.822e-02, -9.796e-02, -3.811e-02, -3.004e-02, -1.787e-01, 1.575e-01, 1.338e-01)); + r += mul(s6_5, M4(4.840e-02, 3.701e-01, -1.104e-01, 8.398e-02, 2.643e-02, -7.537e-02, 5.299e-02, 3.334e-02, -2.874e-01, -1.612e-02, 8.122e-02, 8.529e-02, 5.994e-02, -4.841e-02, 3.470e-02, -9.663e-02)); + r += mul(s6_6, M4(2.399e-01, -1.344e-02, 4.854e-02, -7.704e-02, 2.154e-01, -7.763e-02, -1.018e-01, 1.362e-01, -1.389e-02, -8.421e-02, 2.387e-02, 1.391e-01, -1.868e-01, -1.023e-02, 9.095e-02, 7.046e-04)); + r += mul(s6_7, M4(-3.735e-01, -1.332e-01, -3.959e-03, -1.446e-01, -6.002e-02, 6.089e-02, 8.194e-02, -1.282e-01, -9.532e-02, -6.605e-02, 5.474e-02, -2.218e-02, 9.658e-02, -1.103e-01, -5.073e-01, 1.831e-01)); + r += mul(s6_8, M4(-6.805e-02, -1.718e-01, -1.616e-01, 8.859e-01, -9.957e-02, -2.877e-02, 8.855e-02, 1.533e-03, 2.438e-01, -3.730e-02, 7.614e-03, -2.106e-02, -1.311e-01, 4.054e-02, -3.424e-01, -1.383e-01)); + r += mul(s7_0, M4(6.965e-03, -5.751e-02, -8.842e-03, 1.654e-02, 9.443e-02, 3.511e-02, -8.971e-02, 2.087e-01, -1.560e-01, -1.594e-01, -4.946e-03, -1.539e-01, -5.362e-02, 3.822e-02, 1.008e-01, -8.965e-02)); + r += mul(s7_1, M4(-7.483e-02, 1.594e-01, 1.747e-02, -4.957e-02, -8.034e-02, 3.226e-03, -9.016e-02, 6.601e-02, 6.784e-02, 9.068e-02, 1.376e-02, 1.284e-01, -6.647e-02, -1.523e-01, 1.083e-01, -1.671e-01)); + r += mul(s7_2, M4(3.809e-02, -2.300e-01, 9.337e-02, -7.427e-02, 2.687e-02, 7.178e-02, -3.732e-02, 1.850e-01, 9.315e-02, -5.495e-02, -1.183e-02, -1.315e-02, -3.891e-02, -5.993e-02, 3.488e-02, 6.137e-03)); + r += mul(s7_3, M4(-3.894e-02, -1.127e-02, -3.305e-03, -1.909e-01, 7.313e-02, -2.764e-02, -1.817e-03, 6.986e-02, 1.268e-03, 7.042e-02, -4.240e-02, 8.969e-02, -4.727e-02, 7.670e-02, 4.900e-02, 4.966e-02)); + r += mul(s7_4, M4(-4.756e-04, 5.827e-02, -1.501e-02, -2.065e-02, -3.107e-02, 5.694e-02, -1.732e-01, 1.295e-01, 9.486e-02, 4.963e-03, 1.125e-01, -1.603e-02, -1.566e-02, 8.881e-03, -1.428e-01, 1.537e-02)); + r += mul(s7_5, M4(-1.593e-01, -3.419e-02, -6.503e-02, 2.058e-01, 5.016e-02, -6.579e-02, -5.439e-02, 5.561e-03, -1.958e-02, -5.906e-02, -5.460e-02, -1.838e-01, 1.928e-01, 8.075e-02, 3.020e-02, -4.424e-02)); + r += mul(s7_6, M4(9.986e-02, -1.588e-02, 4.622e-02, 7.410e-02, 1.925e-01, -6.696e-02, -4.680e-02, 4.967e-02, -1.223e-01, 2.824e-02, -2.625e-02, 3.808e-02, -8.269e-02, -1.127e-01, 2.749e-02, 9.222e-03)); + r += mul(s7_7, M4(6.322e-02, 1.169e-01, -5.795e-02, -1.079e-03, -7.010e-02, 1.980e-01, -3.895e-02, -6.577e-02, -1.422e-01, 3.416e-02, -4.720e-02, -1.617e-01, 2.709e-02, 8.447e-02, -1.373e-01, 3.151e-02)); + r += mul(s7_8, M4(-1.497e-01, -5.371e-02, 1.870e-02, 4.774e-02, 6.793e-03, -8.559e-02, 3.785e-02, -5.313e-03, 1.966e-02, 2.851e-02, -6.064e-03, 1.514e-01, 2.522e-01, 1.173e-01, 9.527e-02, 1.197e-01)); + r += V4(-2.679e-02, 2.861e-03, 2.983e-03, 1.786e-02); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.824e-03, 1.984e-02, -1.947e-01, 9.803e-03, 2.174e-01, -3.263e-02, -1.526e-01, 1.593e-03, -1.912e-02, -1.175e-01, -1.088e-01, -8.861e-02, -8.019e-02, 3.020e-02, 4.746e-02, -3.358e-02)); + r += mul(s0_1, M4(-1.177e-01, 8.201e-02, 1.411e-01, 3.350e-03, 4.358e-02, -1.561e-01, -6.960e-02, 2.106e-01, 1.618e-01, -6.133e-02, -2.230e-03, -3.605e-03, -1.173e-02, -1.137e-01, 6.437e-03, -1.214e-02)); + r += mul(s0_2, M4(8.418e-02, -1.156e-02, -1.589e-02, 7.733e-04, 1.901e-02, -2.164e-01, 7.217e-03, -5.674e-02, 1.313e-01, -2.424e-01, 3.826e-02, 5.194e-02, 8.220e-02, -1.985e-01, 3.844e-02, 2.949e-02)); + r += mul(s0_3, M4(1.491e-01, -1.970e-01, 1.615e-02, 4.271e-02, 5.862e-02, 5.701e-02, -6.334e-02, 7.912e-02, -9.144e-02, -1.073e-01, 3.700e-02, 1.672e-02, 1.982e-03, -1.455e-03, -1.904e-02, 7.679e-02)); + r += mul(s0_4, M4(-1.377e-01, -1.900e-01, -8.234e-02, -6.595e-02, 1.306e-02, 1.309e-01, -5.066e-02, 8.366e-02, -1.082e-01, -9.153e-02, 8.381e-02, 1.153e-01, -1.096e-01, 4.144e-02, 1.572e-01, -9.698e-02)); + r += mul(s0_5, M4(6.954e-02, 1.007e-01, 5.669e-03, 6.376e-02, -8.465e-02, -1.026e-01, -9.748e-02, -6.989e-03, 5.987e-02, -9.004e-02, -1.457e-01, 1.234e-01, 1.343e-01, 6.650e-02, 4.572e-02, 1.591e-02)); + r += mul(s0_6, M4(-1.079e-02, 2.281e-01, -5.435e-03, -4.308e-03, -3.168e-02, 1.446e-02, 5.164e-02, -1.238e-01, 1.077e-01, -1.439e-01, 3.583e-02, 7.127e-03, -2.933e-02, 1.393e-01, -4.583e-02, -2.124e-02)); + r += mul(s0_7, M4(-9.039e-02, -1.761e-02, 1.885e-01, 1.782e-02, 3.468e-02, 1.284e-01, 1.500e-01, 1.329e-01, 4.006e-02, 7.862e-02, -4.609e-02, 8.688e-02, 6.303e-02, -2.599e-01, -7.164e-02, -9.295e-02)); + r += mul(s0_8, M4(-9.183e-02, 9.019e-02, -1.442e-01, 3.553e-02, -1.751e-02, -2.352e-01, -1.772e-01, 1.164e-01, 2.843e-02, -1.063e-01, -3.228e-02, 9.867e-02, -2.188e-01, 8.323e-02, 6.436e-02, 7.404e-03)); + r += mul(s1_0, M4(1.096e-01, -7.150e-02, -2.326e-01, 3.074e-02, 6.687e-02, 4.780e-02, -2.172e-02, 6.202e-02, -3.545e-02, -8.431e-02, 2.420e-02, 9.476e-03, 2.568e-02, -5.384e-02, -2.290e-02, 5.260e-02)); + r += mul(s1_1, M4(4.047e-02, -1.417e-01, 3.742e-01, 2.130e-01, -1.026e-02, 2.140e-01, -2.203e-01, 8.287e-02, 3.059e-03, -5.168e-02, 7.797e-02, -7.983e-03, -4.622e-02, -1.208e-02, -2.153e-01, 1.757e-01)); + r += mul(s1_2, M4(3.949e-02, -3.499e-01, 2.211e-02, -4.854e-02, -3.249e-02, -8.875e-02, 1.019e-01, 9.838e-02, -2.568e-02, 6.213e-02, 1.026e-01, 8.773e-02, 3.195e-02, -2.138e-01, -9.769e-03, 1.023e-01)); + r += mul(s1_3, M4(3.489e-01, 7.578e-02, -1.384e-01, -1.000e-01, 2.159e-01, -1.537e-01, 8.139e-02, 1.081e-01, 6.928e-02, 3.479e-03, -2.065e-01, 4.685e-02, 1.345e-01, -5.114e-02, 1.397e-03, -3.190e-04)); + r += mul(s1_4, M4(-1.131e-02, -3.310e-01, -2.831e-01, -1.713e-01, -4.212e-02, 1.771e-02, 2.622e-02, 7.984e-02, -1.656e-02, 3.108e-02, 1.089e-01, -3.210e-02, -1.209e-01, -7.105e-02, -2.352e-01, -4.034e-03)); + r += mul(s1_5, M4(4.338e-01, 2.230e-01, 1.184e-01, 1.444e-01, -2.695e-01, -2.069e-01, -5.883e-02, 5.791e-02, 1.479e-01, -6.455e-02, -4.461e-02, 5.482e-02, 9.545e-02, 2.171e-01, -8.118e-02, 6.398e-02)); + r += mul(s1_6, M4(-3.037e-02, -4.636e-02, 1.223e-01, -1.255e-01, -8.541e-02, 3.001e-02, 1.393e-01, -4.605e-02, -8.531e-02, 9.275e-02, 1.362e-01, -9.073e-03, -2.249e-02, 3.716e-03, 1.433e-01, 1.285e-01)); + r += mul(s1_7, M4(3.612e-02, 3.724e-02, 2.517e-01, 3.506e-02, 6.147e-02, 1.538e-01, -2.245e-02, 3.803e-02, -1.459e-01, -4.336e-02, 3.873e-02, 2.860e-02, -5.779e-02, 2.527e-02, -8.128e-02, -1.848e-01)); + r += mul(s1_8, M4(-2.045e-01, 3.185e-01, 1.897e-01, -3.090e-01, 1.303e-01, -4.772e-02, -1.929e-02, 5.857e-03, -8.579e-02, 1.297e-01, 3.964e-02, -1.187e-01, -5.910e-02, 8.492e-02, 7.102e-02, 2.462e-02)); + r += mul(s2_0, M4(-5.622e-02, -3.406e-02, 1.625e-01, -2.395e-02, 7.981e-02, -8.697e-02, -1.340e-02, -7.849e-02, 8.512e-02, -6.636e-02, 6.628e-02, 1.257e-01, 9.518e-02, -1.967e-02, 3.263e-02, -1.632e-02)); + r += mul(s2_1, M4(-8.568e-02, -2.104e-02, -9.114e-02, 2.791e-02, 4.465e-02, 5.472e-02, -1.561e-01, -8.667e-02, -8.310e-02, -2.272e-01, 8.860e-02, 3.920e-02, 2.148e-02, -4.088e-04, -3.415e-02, -9.479e-02)); + r += mul(s2_2, M4(1.967e-02, -1.177e-01, -8.033e-02, -1.082e-02, 8.279e-02, -4.859e-02, 8.015e-03, -1.620e-03, -5.299e-02, -1.518e-01, 1.951e-02, -9.250e-02, 1.210e-03, 8.942e-03, 6.476e-02, 8.816e-02)); + r += mul(s2_3, M4(6.961e-02, -2.541e-02, -8.874e-02, -8.571e-02, -6.565e-02, -8.235e-02, -7.851e-02, -2.462e-02, -5.045e-02, 1.492e-02, -1.060e-01, 9.583e-02, 7.954e-02, 1.517e-01, 9.226e-02, -5.570e-03)); + r += mul(s2_4, M4(4.027e-03, 5.900e-02, -9.730e-02, 9.346e-02, 2.364e-02, -5.125e-02, -1.369e-01, -1.545e-01, -2.032e-01, -1.652e-01, -1.208e-01, -1.012e-01, 4.560e-02, -1.362e-01, 4.839e-02, 3.833e-02)); + r += mul(s2_5, M4(-1.356e-01, -9.492e-02, 6.686e-03, 1.573e-01, 1.015e-01, -4.342e-03, -6.012e-02, 5.045e-02, -1.773e-01, 2.489e-02, -6.572e-02, -8.431e-02, 1.251e-01, 1.012e-01, -7.313e-02, 1.060e-01)); + r += mul(s2_6, M4(4.918e-02, -2.224e-02, 1.051e-02, -7.640e-02, -4.369e-02, -3.976e-02, -1.676e-01, -2.006e-02, 8.876e-02, 2.020e-02, 5.471e-02, -9.058e-02, 4.734e-02, -6.211e-02, 9.784e-02, -5.968e-02)); + r += mul(s2_7, M4(-3.554e-03, 1.414e-01, -7.057e-02, 2.734e-04, -1.762e-01, 2.698e-02, -4.323e-02, 6.518e-02, -2.096e-03, 2.544e-02, 1.057e-01, -1.179e-01, -3.325e-02, -9.233e-02, -9.906e-02, 5.904e-03)); + r += mul(s2_8, M4(-5.982e-02, 1.036e-01, -7.251e-02, -5.751e-02, 2.274e-02, -5.704e-02, -2.759e-02, -3.227e-03, 3.411e-02, -2.169e-01, 4.238e-02, -5.646e-02, 6.166e-02, -8.737e-02, -5.433e-02, -2.193e-02)); + r += mul(s3_0, M4(9.979e-02, 6.667e-02, -2.051e-01, 3.280e-01, 6.811e-02, 8.896e-03, -6.806e-03, -5.281e-02, -1.662e-02, 7.599e-02, -2.618e-02, -9.118e-04, -1.773e-01, -1.096e-01, 2.660e-01, -3.410e-02)); + r += mul(s3_1, M4(7.067e-02, 1.609e-01, -4.891e-01, 1.395e-01, -2.836e-02, 4.949e-02, 5.419e-02, -1.912e-02, 1.265e-01, 9.371e-02, -2.274e-01, -2.388e-02, 1.294e-01, 1.215e-01, -6.401e-02, 2.552e-02)); + r += mul(s3_2, M4(-3.948e-01, 1.481e-01, 1.238e-01, 8.896e-02, 5.564e-02, -4.630e-02, 8.182e-02, -5.120e-02, 3.065e-02, 2.289e-02, -4.132e-02, 5.470e-02, -5.811e-04, 4.488e-02, 4.771e-02, 1.252e-01)); + r += mul(s3_3, M4(1.083e-01, 3.190e-01, 3.322e-02, 1.615e-02, -6.420e-03, 7.514e-02, 1.930e-02, 9.305e-02, -1.854e-02, -6.599e-02, -1.284e-01, 4.180e-02, -2.438e-01, -3.204e-01, -1.481e-01, 2.012e-01)); + r += mul(s3_4, M4(1.541e-01, -1.294e-01, 3.407e-01, -2.888e-02, 2.320e-02, -1.261e-01, 1.244e-01, -9.444e-02, 1.904e-01, 7.003e-02, -2.908e-02, -4.724e-02, -1.739e-01, -1.706e-01, 2.284e-01, 3.447e-01)); + r += mul(s3_5, M4(2.852e-01, -3.790e-02, -1.617e-01, 3.981e-01, 4.763e-02, -9.060e-02, -5.601e-02, -3.278e-02, 3.698e-02, 1.148e-01, 7.054e-02, 2.161e-02, -5.027e-02, 1.229e-01, 3.213e-01, -2.440e-02)); + r += mul(s3_6, M4(6.498e-02, -2.845e-02, 4.051e-01, -4.428e-02, 6.620e-03, -6.252e-02, -1.915e-02, -8.686e-02, -8.271e-02, 5.187e-02, -2.114e-02, 3.542e-02, 3.496e-02, 4.899e-02, -1.401e-01, -2.056e-01)); + r += mul(s3_7, M4(-8.188e-02, 8.977e-02, 3.044e-02, -3.808e-01, -1.818e-02, -8.713e-02, 1.459e-01, -1.006e-01, 6.703e-02, 7.376e-02, 4.647e-02, -8.068e-03, -8.089e-02, -4.263e-02, -1.011e-02, -5.059e-02)); + r += mul(s3_8, M4(4.991e-03, 1.335e-01, 2.895e-01, -2.575e-02, 2.970e-02, 1.426e-02, -3.004e-02, -9.444e-02, -7.585e-02, 7.416e-02, 1.773e-01, 2.740e-02, -1.133e-01, -1.625e-01, -2.058e-01, 1.230e-01)); + r += mul(s4_0, M4(-2.102e-02, 3.131e-02, -1.211e-01, -7.470e-03, -1.591e-02, -7.396e-02, 1.130e-01, 8.145e-02, 9.655e-02, -1.283e-01, -2.491e-02, -9.100e-02, -7.133e-02, 8.901e-03, -6.968e-03, 6.504e-02)); + r += mul(s4_1, M4(-3.988e-02, 3.759e-02, -2.350e-01, -3.543e-02, -1.110e-02, -1.670e-01, -2.306e-01, -6.088e-02, 4.749e-02, 4.387e-02, 4.186e-02, -8.751e-02, 5.597e-02, -1.072e-01, -3.988e-03, 1.393e-01)); + r += mul(s4_2, M4(1.889e-02, -4.900e-02, 1.151e-01, 5.871e-02, 1.303e-01, 5.656e-02, 2.618e-02, -2.952e-02, 1.727e-01, 1.147e-03, -1.948e-01, -1.243e-01, -4.239e-02, 7.484e-02, -9.689e-02, 9.238e-02)); + r += mul(s4_3, M4(1.297e-01, -4.217e-02, -3.800e-02, -6.907e-02, 1.070e-01, -4.291e-02, -1.046e-01, 3.485e-02, -5.988e-02, 2.235e-02, -1.191e-01, 7.352e-02, -2.498e-01, 9.790e-02, -6.694e-02, 2.014e-02)); + r += mul(s4_4, M4(-2.211e-02, 1.964e-01, -6.388e-03, 2.147e-01, -9.189e-02, 2.813e-02, 1.456e-01, -6.744e-02, -8.596e-02, 1.435e-01, -8.166e-02, -3.708e-02, 2.932e-02, -3.732e-03, -5.609e-02, 4.482e-02)); + r += mul(s4_5, M4(5.921e-02, 2.883e-02, -3.935e-02, 9.551e-03, -4.956e-02, 6.194e-02, -4.123e-02, -1.835e-03, -1.528e-01, -9.703e-02, -1.265e-01, -5.136e-02, -4.934e-03, 2.766e-02, -2.725e-02, 1.749e-01)); + r += mul(s4_6, M4(-2.997e-02, 6.308e-02, -9.442e-02, -5.405e-02, -4.856e-02, 1.301e-04, -2.347e-02, -3.285e-02, -5.298e-03, 1.065e-01, 9.091e-02, 1.112e-01, 1.438e-01, -2.032e-02, 9.683e-03, -7.034e-02)); + r += mul(s4_7, M4(-6.707e-02, -1.933e-01, 1.524e-01, 1.439e-01, -7.010e-02, 1.156e-01, 3.741e-03, -5.321e-02, 1.095e-01, 3.952e-02, 1.725e-01, 5.637e-02, 2.454e-02, -1.182e-02, 3.138e-02, 4.080e-02)); + r += mul(s4_8, M4(7.707e-02, -1.650e-01, -5.874e-02, 9.839e-02, 9.945e-03, -7.412e-02, -2.603e-02, -1.213e-02, -3.176e-02, 1.431e-01, 1.369e-01, -2.394e-02, 4.046e-02, -1.273e-01, 2.042e-03, 5.492e-02)); + r += mul(s5_0, M4(-6.479e-02, -3.666e-02, 3.631e-02, -1.134e-01, 1.970e-01, -2.427e-02, 2.536e-01, 1.759e-01, 3.687e-02, -2.974e-02, -1.511e-01, -1.112e-01, 3.452e-03, 1.038e-01, -3.214e-02, -6.203e-03)); + r += mul(s5_1, M4(-9.080e-02, -1.463e-02, 8.569e-02, -1.903e-02, -3.211e-01, -5.551e-02, -4.583e-02, 1.149e-01, 9.762e-02, 9.307e-02, 4.994e-02, -7.576e-03, 2.918e-01, 1.427e-01, -1.242e-01, -1.260e-02)); + r += mul(s5_2, M4(1.430e-02, -1.786e-01, 8.093e-02, -7.370e-02, 2.378e-01, 7.734e-02, 6.844e-02, 2.921e-01, -3.155e-02, -2.868e-02, -4.562e-02, 9.424e-02, -2.106e-01, -1.462e-01, 2.395e-01, 4.309e-02)); + r += mul(s5_3, M4(9.591e-02, -9.144e-02, -4.656e-02, 4.231e-02, 2.264e-02, 1.354e-02, -2.381e-01, -1.298e-01, -3.961e-02, -4.566e-02, -5.357e-02, -1.811e-01, -1.441e-01, 5.399e-02, 3.584e-01, 7.013e-02)); + r += mul(s5_4, M4(-3.230e-02, -8.598e-02, -1.194e-01, 1.281e-01, 4.380e-01, -2.147e-01, 1.278e-01, -2.084e-01, 1.027e-01, -5.091e-02, 1.104e-02, -2.743e-02, 2.803e-01, -4.974e-02, 6.860e-02, -4.789e-02)); + r += mul(s5_5, M4(-1.439e-01, -1.892e-01, -5.241e-02, -2.163e-02, -1.568e-01, 3.539e-01, 2.662e-02, -1.477e-01, -1.227e-01, -1.204e-01, -6.518e-02, 8.005e-02, 5.432e-02, 1.226e-03, -2.454e-03, 4.690e-02)); + r += mul(s5_6, M4(-3.239e-02, 1.420e-02, -3.429e-02, -3.895e-02, 6.057e-01, 4.380e-02, -6.836e-01, 1.556e-01, 6.391e-03, -7.455e-02, 2.477e-02, -5.640e-02, -3.121e-01, 4.938e-02, -9.663e-02, 1.507e-01)); + r += mul(s5_7, M4(3.230e-03, -5.809e-02, -1.020e-02, 4.602e-02, 1.941e-02, 9.688e-02, 3.482e-01, 9.593e-02, 6.448e-02, 1.075e-01, 7.606e-02, 3.724e-02, -6.785e-02, 3.216e-01, -3.050e-01, 1.372e-01)); + r += mul(s5_8, M4(2.103e-01, 1.393e-01, 1.636e-01, -9.668e-02, 4.504e-02, -1.291e-01, -8.852e-04, -1.928e-01, 7.878e-02, -3.617e-02, 6.621e-02, 4.323e-02, -1.529e-01, 8.576e-02, 5.715e-02, 1.650e-01)); + r += mul(s6_0, M4(-2.095e-01, 9.915e-02, -4.596e-02, -1.748e-01, -1.098e-01, -6.448e-02, 1.780e-01, 1.125e-01, -2.114e-01, 4.083e-01, 1.558e-03, 1.899e-02, 4.266e-02, 1.046e-01, 3.426e-02, -9.806e-03)); + r += mul(s6_1, M4(1.583e-01, -9.478e-02, 4.572e-02, -1.527e-02, 1.246e-01, -7.757e-02, -2.957e-02, -4.569e-02, 3.186e-01, 1.932e-01, -3.262e-02, 1.058e-01, 2.350e-01, -1.374e-01, 2.152e-01, 1.027e-01)); + r += mul(s6_2, M4(4.265e-02, 1.480e-01, 1.762e-01, -2.729e-01, -7.674e-02, -2.616e-01, -5.785e-02, -6.605e-02, -9.494e-02, -7.623e-03, 5.629e-02, 1.979e-02, 6.318e-02, 1.119e-01, -1.775e-01, 4.471e-02)); + r += mul(s6_3, M4(1.416e-01, 6.286e-02, 2.753e-01, 4.370e-02, 2.555e-02, 1.217e-01, 2.916e-02, -8.370e-02, -3.478e-01, 9.489e-03, -2.076e-01, 6.405e-02, 7.222e-02, 8.956e-02, 1.820e-02, -1.510e-01)); + r += mul(s6_4, M4(-4.919e-02, 3.645e-02, -2.264e-01, -1.779e-02, 3.702e-02, -1.291e-01, -1.130e-01, 8.156e-02, 7.529e-02, -1.063e-01, 9.065e-02, -3.642e-02, -2.478e-02, -5.106e-02, 2.422e-02, 1.540e-01)); + r += mul(s6_5, M4(-2.097e-02, -9.303e-02, -5.754e-02, -1.113e-01, -2.005e-02, 2.385e-02, 2.058e-02, -8.553e-02, -2.611e-01, -2.323e-01, -2.546e-02, 9.836e-02, -3.118e-02, 1.810e-01, 4.481e-02, 6.158e-02)); + r += mul(s6_6, M4(-1.205e-01, 3.792e-02, -2.491e-01, 1.104e-02, 6.093e-02, -6.249e-03, 2.042e-02, -4.415e-02, -7.268e-02, 4.343e-01, 2.416e-01, 8.859e-02, 1.997e-01, 2.222e-01, -1.322e-01, 3.828e-02)); + r += mul(s6_7, M4(5.871e-02, -3.529e-01, 2.206e-02, 3.605e-03, -2.877e-01, 1.420e-01, -1.695e-02, -5.085e-02, 1.826e-01, -1.024e-01, -1.523e-01, -1.639e-01, 5.823e-02, -1.405e-01, -1.124e-01, -2.890e-02)); + r += mul(s6_8, M4(4.023e-02, -3.477e-02, -7.664e-02, 6.820e-02, 5.723e-02, 1.819e-01, -9.973e-02, -1.035e-01, -1.219e-01, -2.891e-01, 2.666e-01, -3.550e-02, -1.744e-01, -7.098e-02, -9.127e-02, -9.753e-02)); + r += mul(s7_0, M4(9.776e-02, 3.419e-02, 7.676e-02, -4.181e-02, -1.284e-01, 7.161e-02, 5.880e-02, -9.813e-02, 1.232e-01, -7.420e-02, -6.708e-02, -7.536e-02, -3.379e-02, -3.744e-02, -4.778e-02, 2.363e-02)); + r += mul(s7_1, M4(-1.066e-01, -1.720e-02, 1.412e-01, 3.237e-03, 7.086e-03, 9.996e-02, -1.067e-01, 3.318e-02, -7.187e-02, 1.859e-01, 1.658e-02, 5.164e-02, 1.636e-01, 5.624e-02, -5.522e-02, 2.799e-02)); + r += mul(s7_2, M4(-3.080e-02, 5.477e-02, -6.302e-02, 2.429e-02, -1.038e-01, -1.639e-03, 4.041e-02, -2.281e-02, 9.072e-02, -6.279e-02, 1.764e-02, -2.716e-02, -4.723e-02, -2.239e-02, -3.870e-02, -2.529e-02)); + r += mul(s7_3, M4(-9.795e-02, -1.404e-01, 3.984e-02, 5.945e-02, -2.916e-01, -7.429e-02, -3.369e-02, 5.798e-02, -3.809e-02, -8.103e-02, 9.385e-02, -9.303e-02, -2.433e-02, 1.103e-01, 6.621e-02, -5.597e-02)); + r += mul(s7_4, M4(6.292e-03, -4.406e-02, -3.898e-02, 1.096e-01, 1.658e-01, -3.856e-02, 2.735e-02, 1.190e-01, 1.086e-02, 6.992e-02, -6.964e-02, 1.049e-01, -8.304e-02, -2.879e-01, -2.934e-03, -6.641e-02)); + r += mul(s7_5, M4(9.370e-03, -1.240e-01, 3.014e-02, 5.018e-02, 2.381e-01, -6.004e-02, -3.740e-02, -4.776e-03, -2.297e-02, 1.405e-01, -5.090e-02, -2.799e-02, -1.277e-02, 6.238e-02, -7.780e-03, -8.577e-02)); + r += mul(s7_6, M4(-5.754e-02, 1.323e-01, 1.598e-01, 1.861e-03, 7.371e-02, 3.565e-02, -5.038e-02, -3.091e-02, -1.125e-02, -1.466e-02, 3.518e-02, 1.336e-02, 1.662e-01, 1.033e-02, 1.263e-02, 7.036e-03)); + r += mul(s7_7, M4(3.799e-02, 7.738e-02, -1.830e-01, 1.088e-02, 2.345e-02, -8.913e-02, 4.484e-03, -7.087e-04, 2.995e-02, 6.962e-02, 6.441e-03, -1.072e-01, -1.717e-01, -6.303e-02, -6.593e-03, -3.927e-02)); + r += mul(s7_8, M4(-5.977e-02, 3.553e-02, -5.340e-02, -1.383e-03, -1.593e-01, -5.221e-02, -6.791e-02, -4.706e-03, -3.981e-02, -8.876e-02, -5.683e-02, -4.046e-02, -4.187e-02, 6.329e-02, 1.353e-01, -4.508e-02)); + r += V4(5.171e-03, -3.519e-03, 1.162e-02, 4.614e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.748e-01, -1.615e-01, 1.196e-01, 9.843e-02, 4.946e-02, 1.034e-01, -1.539e-01, 1.314e-01, 7.507e-02, -2.103e-01, -4.577e-02, 4.777e-02, -1.197e-03, -7.288e-02, -3.140e-02, 3.566e-02)); + r += mul(s0_1, M4(-7.328e-02, 1.621e-03, -1.413e-01, -1.474e-01, 2.385e-02, 7.395e-02, 1.558e-02, -1.473e-01, -2.019e-03, -1.272e-01, -8.000e-02, 8.127e-02, -5.842e-02, 1.357e-01, 1.823e-02, 3.446e-02)); + r += mul(s0_2, M4(7.100e-02, 2.505e-01, 5.390e-02, -2.937e-02, -7.170e-02, -7.104e-02, -1.392e-01, 5.778e-02, 1.155e-01, -2.112e-01, -1.450e-02, 7.374e-02, 1.321e-02, 8.969e-02, 1.288e-02, -5.910e-02)); + r += mul(s0_3, M4(1.684e-02, 2.163e-02, 7.136e-02, 8.144e-02, 1.947e-01, -2.859e-02, 9.785e-02, 1.356e-01, -9.415e-03, -1.112e-01, 7.570e-02, -1.091e-01, -2.500e-01, 1.681e-01, -9.164e-02, 8.519e-02)); + r += mul(s0_4, M4(-1.422e-01, -9.273e-03, -1.359e-02, -3.367e-02, 1.059e-01, -5.308e-02, 1.559e-01, -9.744e-02, 2.041e-01, 1.342e-01, -6.243e-02, 9.553e-02, -1.557e-02, 2.216e-01, 4.646e-02, 3.626e-02)); + r += mul(s0_5, M4(5.181e-02, -4.935e-02, 6.051e-02, 6.619e-02, 1.311e-01, -5.899e-02, -1.714e-02, 9.745e-02, 8.865e-02, -1.504e-01, -1.081e-01, -6.180e-02, -1.285e-01, 6.585e-02, -5.292e-02, -1.062e-01)); + r += mul(s0_6, M4(4.277e-02, 1.043e-01, -1.369e-01, 6.853e-02, 5.930e-02, 3.663e-02, -4.107e-02, -1.006e-01, -9.768e-03, -3.706e-02, 6.806e-02, -9.002e-02, 5.096e-02, -1.300e-01, -7.291e-02, 1.431e-01)); + r += mul(s0_7, M4(1.037e-01, -9.086e-02, 7.090e-03, 8.210e-02, -7.251e-02, -9.346e-02, -3.284e-02, 8.748e-02, 4.957e-02, 9.675e-02, 6.061e-02, -5.284e-02, 6.728e-03, -7.363e-02, 9.566e-02, 6.330e-02)); + r += mul(s0_8, M4(-2.623e-02, 1.047e-01, -8.739e-04, -2.738e-02, 1.967e-01, 3.078e-02, 1.638e-01, 8.194e-02, -3.152e-02, 5.328e-02, 1.483e-02, -5.355e-02, -1.456e-01, -1.638e-03, 5.093e-02, 8.867e-02)); + r += mul(s1_0, M4(-2.947e-01, -4.130e-01, 9.883e-02, -6.187e-02, -9.668e-02, -6.321e-02, 9.855e-02, 1.098e-01, 3.308e-02, -2.460e-02, 8.938e-02, 1.724e-01, 1.259e-01, 4.671e-02, -5.314e-02, 1.246e-02)); + r += mul(s1_1, M4(1.907e-01, -3.510e-02, 1.183e-01, -2.284e-02, -4.719e-02, 2.314e-01, -2.440e-01, 7.838e-02, 3.864e-02, -1.380e-01, 4.039e-02, 6.139e-02, -5.638e-02, 1.308e-02, -5.999e-02, -8.289e-02)); + r += mul(s1_2, M4(-6.946e-02, -2.491e-01, -3.473e-01, -3.905e-03, -2.543e-02, -1.474e-01, 1.076e-01, 6.376e-02, -1.941e-01, 1.551e-01, 9.853e-02, 2.544e-02, 1.372e-01, -2.749e-02, -7.466e-02, 1.019e-01)); + r += mul(s1_3, M4(-5.817e-03, 1.422e-01, 2.063e-02, -1.789e-01, 2.091e-02, 1.212e-01, 9.807e-02, -2.877e-02, 1.791e-02, 5.875e-02, 7.261e-02, -4.916e-02, -1.170e-01, -1.439e-01, -3.418e-02, 7.885e-02)); + r += mul(s1_4, M4(-5.330e-01, -6.688e-02, -8.916e-03, 1.409e-01, 1.859e-01, 2.055e-03, 8.773e-02, 1.338e-01, 1.177e-01, -1.205e-01, 2.362e-01, 9.410e-02, 2.480e-01, 2.385e-01, 5.357e-02, -3.786e-03)); + r += mul(s1_5, M4(2.259e-01, 1.886e-01, -3.190e-01, -3.224e-01, 5.312e-02, -2.302e-02, -3.165e-02, -1.275e-01, -7.705e-02, 1.638e-01, -6.539e-02, -4.015e-02, -3.581e-03, 1.161e-02, -2.000e-02, 1.375e-03)); + r += mul(s1_6, M4(-4.769e-02, 1.558e-01, -1.516e-01, -9.477e-02, -4.890e-02, 8.982e-02, -4.474e-02, 5.636e-02, -1.303e-01, -2.098e-02, 8.743e-02, 7.577e-02, 7.475e-02, -3.978e-02, 1.887e-03, 4.019e-02)); + r += mul(s1_7, M4(4.887e-01, 1.727e-01, 1.540e-01, 1.387e-01, -6.586e-03, 1.452e-01, -1.666e-01, -3.717e-02, 1.178e-01, -7.970e-02, 1.419e-01, 1.066e-01, -4.486e-02, -1.489e-01, 1.204e-01, 9.282e-03)); + r += mul(s1_8, M4(7.865e-03, 2.430e-01, 9.405e-02, 2.463e-01, -9.942e-02, -1.766e-02, 8.818e-02, 4.626e-02, -1.734e-01, 1.733e-01, 6.431e-02, 1.807e-02, 1.419e-01, 1.384e-03, 1.402e-01, -4.538e-03)); + r += mul(s2_0, M4(-5.657e-02, 1.028e-01, 3.944e-02, -6.775e-03, -1.748e-02, 4.732e-02, -7.919e-02, 5.948e-03, -1.338e-02, -5.615e-02, -7.276e-02, 9.564e-02, 2.020e-02, -4.129e-02, -7.555e-02, -1.745e-01)); + r += mul(s2_1, M4(5.225e-02, -4.729e-02, 1.293e-02, -3.269e-02, 1.056e-01, -7.550e-02, 4.480e-02, -1.898e-02, -6.565e-02, 7.022e-02, -7.420e-02, -2.915e-02, -4.022e-02, -1.859e-01, -3.307e-02, -6.515e-02)); + r += mul(s2_2, M4(7.045e-04, 3.107e-03, 7.490e-02, -1.396e-02, -1.606e-02, -7.554e-02, 7.387e-02, 8.452e-02, 9.357e-02, -3.828e-02, -4.244e-02, 8.902e-02, 1.833e-03, -5.397e-02, -8.971e-02, 1.277e-01)); + r += mul(s2_3, M4(1.095e-02, 1.951e-01, 2.717e-02, -1.180e-02, 1.469e-02, -6.277e-02, 4.388e-02, -2.989e-02, -6.168e-02, -1.791e-01, 5.965e-02, -2.100e-02, -3.944e-02, 4.708e-02, -5.720e-02, 9.102e-02)); + r += mul(s2_4, M4(7.482e-02, 7.287e-02, 1.403e-02, -9.716e-03, 7.961e-02, -1.097e-01, -3.964e-02, 1.354e-01, 3.683e-02, 1.352e-01, 1.112e-01, 1.081e-01, -4.575e-02, -1.005e-01, 6.648e-02, 6.886e-02)); + r += mul(s2_5, M4(-7.712e-02, -7.929e-02, 3.142e-02, 4.174e-02, -7.125e-04, 6.194e-02, -5.649e-02, 3.229e-02, -1.864e-02, 6.764e-03, 6.032e-02, 2.846e-02, 1.494e-01, -8.652e-03, -1.685e-02, -4.379e-03)); + r += mul(s2_6, M4(6.293e-02, 1.324e-01, 9.080e-02, -1.523e-01, 7.835e-02, 1.507e-01, -3.016e-02, 2.567e-02, -6.138e-02, -7.353e-02, 3.133e-02, 3.352e-02, -2.064e-02, 9.068e-02, -8.710e-02, -7.440e-02)); + r += mul(s2_7, M4(2.154e-02, 8.069e-02, -1.166e-02, 5.221e-02, 1.772e-01, -3.329e-03, 5.671e-02, -5.385e-02, 4.746e-02, 1.224e-01, 9.627e-02, -5.288e-02, 2.469e-02, 1.710e-01, -1.491e-02, 1.499e-01)); + r += mul(s2_8, M4(-6.704e-04, 1.070e-01, 2.679e-02, -5.011e-02, 1.453e-01, 7.252e-02, -1.393e-01, -1.075e-01, -4.456e-04, 2.186e-02, -4.725e-02, -1.362e-01, 1.527e-02, 2.724e-02, 2.545e-02, 8.717e-02)); + r += mul(s3_0, M4(3.017e-01, -1.501e-01, -1.961e-01, -7.417e-02, 1.654e-02, -1.148e-01, -6.167e-02, 7.964e-02, 9.864e-02, 9.110e-02, 5.213e-02, -4.144e-02, 1.049e-01, 1.637e-01, 8.912e-02, -1.214e-02)); + r += mul(s3_1, M4(-3.045e-02, -2.177e-02, -3.691e-02, 4.217e-01, 2.646e-02, -9.921e-02, -2.771e-02, -3.207e-02, 6.110e-02, 1.510e-02, -1.159e-01, -3.572e-02, -6.402e-02, -1.384e-01, -6.026e-02, -3.557e-01)); + r += mul(s3_2, M4(1.718e-02, -1.570e-01, 3.399e-01, -3.092e-01, 2.826e-02, 2.188e-02, 2.107e-02, -4.435e-02, 1.802e-01, -1.031e-01, -7.824e-03, 1.364e-02, -1.114e-02, -2.933e-01, 1.581e-01, -1.537e-01)); + r += mul(s3_3, M4(2.864e-01, 1.998e-01, -2.899e-01, -1.224e-01, 2.005e-02, -1.923e-02, -1.397e-02, 1.772e-01, -5.151e-02, -2.643e-02, 2.733e-02, -6.762e-02, -2.189e-02, 8.119e-02, -5.560e-02, -2.332e-01)); + r += mul(s3_4, M4(-1.826e-01, -1.490e-01, -2.736e-01, -4.559e-02, 1.390e-01, 6.365e-03, 1.035e-01, 9.957e-02, 1.353e-01, -5.694e-02, 5.557e-02, -9.781e-03, 1.258e-01, -5.048e-02, -3.724e-01, -2.814e-01)); + r += mul(s3_5, M4(1.173e-01, -9.484e-02, -8.183e-02, 1.112e-01, -8.685e-02, 2.307e-01, 4.268e-02, 7.449e-02, 5.437e-03, -2.596e-02, 6.978e-02, 9.838e-02, -6.126e-02, -2.213e-01, 2.726e-02, -1.269e-01)); + r += mul(s3_6, M4(-2.323e-02, -3.830e-02, 1.485e-01, -1.917e-01, 3.082e-02, 1.853e-02, -6.795e-02, 7.886e-02, 9.713e-03, 1.821e-01, 5.631e-02, 9.517e-02, -1.825e-01, -1.390e-01, 1.019e-01, 1.430e-01)); + r += mul(s3_7, M4(1.461e-02, -1.586e-01, -2.037e-01, -6.212e-02, -2.341e-01, 4.101e-02, -4.673e-02, -5.431e-02, -1.974e-01, -5.334e-02, 1.144e-02, 1.407e-03, -3.401e-02, -9.971e-02, 7.023e-02, 1.888e-01)); + r += mul(s3_8, M4(-9.911e-03, 5.079e-02, 1.603e-03, 1.441e-01, -1.524e-01, 2.441e-01, 3.075e-02, -7.364e-02, 6.665e-03, 5.184e-02, -3.165e-02, -1.421e-01, -1.619e-01, 6.534e-03, -1.063e-01, -2.173e-02)); + r += mul(s4_0, M4(1.057e-01, 9.868e-02, 2.256e-02, -3.395e-02, -3.246e-03, 1.111e-01, -3.631e-02, 1.443e-02, -1.252e-01, 3.224e-02, 3.818e-02, 1.087e-02, -2.653e-02, 1.160e-01, 6.845e-03, 1.214e-01)); + r += mul(s4_1, M4(3.058e-02, -6.265e-03, 1.794e-01, -8.032e-02, -2.890e-02, 8.468e-02, 1.151e-01, 1.418e-02, -4.156e-02, 5.453e-02, 1.970e-01, 5.255e-02, 1.206e-01, -4.842e-02, -1.200e-01, 5.315e-02)); + r += mul(s4_2, M4(1.562e-01, -8.529e-02, -1.301e-01, -6.048e-02, -1.460e-01, 1.099e-01, -2.493e-02, -1.052e-01, 3.668e-02, -4.395e-02, 2.758e-02, 9.260e-02, -6.341e-03, 1.488e-01, 1.854e-02, -1.228e-01)); + r += mul(s4_3, M4(7.929e-02, -9.089e-02, -1.114e-01, -9.822e-02, -1.339e-03, 4.170e-02, -3.721e-02, 1.412e-02, -1.804e-01, -2.298e-03, -8.089e-02, 5.274e-02, -6.457e-03, -2.378e-01, -1.845e-02, -1.076e-01)); + r += mul(s4_4, M4(-1.244e-02, 1.377e-01, -3.954e-02, -1.343e-01, 1.463e-03, -7.157e-03, 1.271e-02, -3.069e-02, -1.434e-01, 7.176e-02, 1.542e-01, -1.758e-02, -4.979e-02, -3.072e-02, -1.705e-02, -4.326e-02)); + r += mul(s4_5, M4(1.421e-01, -7.427e-02, -4.890e-02, 1.688e-02, 9.534e-02, -2.860e-02, -5.040e-02, -5.590e-02, -1.669e-01, 1.852e-02, 8.911e-02, -1.499e-03, 9.191e-02, -7.256e-02, -5.635e-02, -1.770e-01)); + r += mul(s4_6, M4(9.657e-02, 1.743e-01, -6.738e-03, 9.367e-02, 5.639e-02, -1.266e-01, -5.179e-02, -1.271e-01, 1.155e-01, 9.245e-02, 8.212e-02, -8.191e-02, 3.643e-02, -1.235e-01, -5.844e-03, -1.685e-01)); + r += mul(s4_7, M4(-6.505e-02, -1.373e-01, -7.947e-03, -4.844e-02, 1.172e-01, -5.261e-02, -3.373e-02, 2.592e-02, 1.092e-01, 5.494e-02, 5.989e-02, 1.640e-02, -1.102e-01, -1.626e-02, -2.203e-01, 2.464e-02)); + r += mul(s4_8, M4(-1.233e-01, 2.385e-02, -1.691e-03, -1.214e-01, 7.522e-02, -1.268e-01, 4.043e-02, 2.823e-02, 3.916e-02, 5.684e-02, -3.064e-02, -5.190e-02, -3.799e-02, 5.107e-02, -1.892e-01, -4.348e-02)); + r += mul(s5_0, M4(-5.303e-02, 5.702e-02, -8.550e-02, 7.232e-02, -7.413e-01, 4.604e-01, -4.028e-02, 3.394e-01, 1.833e-01, -1.569e-01, -1.436e-01, -3.062e-02, -1.673e-01, 8.320e-02, 4.586e-02, -7.652e-03)); + r += mul(s5_1, M4(-8.396e-02, 7.796e-02, -6.078e-02, 6.849e-02, -1.824e-01, 3.107e-01, 1.817e-02, -1.860e-01, 5.479e-02, -6.675e-02, 1.574e-02, 2.615e-02, -5.817e-02, -7.982e-02, 1.045e-01, -1.335e-01)); + r += mul(s5_2, M4(5.750e-02, -9.499e-02, -1.621e-01, 4.559e-02, -2.783e-01, -1.867e-01, 3.312e-02, 1.404e-01, 1.107e-01, 1.630e-01, -1.072e-01, 1.740e-01, -1.165e-01, 8.215e-02, 2.852e-01, -1.126e-01)); + r += mul(s5_3, M4(-1.659e-02, -7.775e-02, 5.727e-02, -1.463e-01, 4.364e-02, -3.235e-02, 2.315e-01, -2.474e-01, 1.484e-01, -1.107e-01, 3.104e-03, -1.521e-01, 6.964e-02, -2.303e-02, -6.717e-02, 1.543e-01)); + r += mul(s5_4, M4(-1.395e-01, -2.826e-02, -4.643e-02, -1.125e-01, 3.335e-01, 5.176e-01, 2.013e-01, 3.713e-01, 8.691e-02, -6.559e-02, -1.357e-01, -4.651e-02, 5.091e-02, -2.091e-01, -1.030e-01, -1.264e-01)); + r += mul(s5_5, M4(-1.997e-01, -1.546e-01, 1.519e-01, 7.314e-02, 4.439e-01, 2.572e-01, 2.348e-02, -5.472e-02, 1.685e-01, -8.481e-02, 3.522e-03, 5.953e-02, -5.739e-03, -2.416e-01, -1.984e-02, -8.440e-02)); + r += mul(s5_6, M4(-1.437e-01, -1.509e-02, -1.688e-02, -4.559e-02, 1.703e-01, 8.404e-02, -1.447e-01, 2.836e-01, 6.791e-02, -4.945e-02, 1.353e-02, 9.114e-02, -2.826e-01, 1.348e-01, 1.145e-02, 1.919e-01)); + r += mul(s5_7, M4(5.095e-02, 1.377e-01, -5.523e-02, 1.399e-02, -4.539e-01, 6.555e-02, 2.816e-01, -7.605e-02, -3.078e-02, -3.283e-02, -6.590e-02, 8.787e-02, 6.161e-03, 4.405e-01, -4.672e-02, 7.676e-02)); + r += mul(s5_8, M4(-1.126e-01, 2.139e-01, 1.042e-02, 8.322e-02, -1.688e-01, -1.951e-01, -1.810e-01, -5.828e-02, 3.061e-03, 8.707e-02, -8.261e-02, 9.187e-02, -2.217e-01, -1.355e-01, -5.980e-02, 8.894e-02)); + r += mul(s6_0, M4(3.375e-02, -6.837e-03, 8.504e-03, 7.628e-02, -2.573e-02, 7.264e-02, 1.587e-01, 1.557e-02, -1.811e-01, -1.192e-01, 1.897e-01, 2.055e-01, -3.040e-02, -1.131e-03, 8.485e-02, 1.370e-01)); + r += mul(s6_1, M4(4.436e-03, 1.918e-01, -5.534e-02, 1.431e-01, -2.456e-01, 6.074e-02, 9.280e-02, -2.043e-02, 2.538e-01, -3.557e-01, -1.565e-01, 3.277e-01, -6.989e-02, -8.344e-02, 4.863e-01, 1.682e-01)); + r += mul(s6_2, M4(9.147e-02, 1.444e-01, 4.041e-02, 9.281e-02, -4.971e-02, -6.443e-02, -9.659e-02, 9.822e-02, -4.897e-01, 1.339e-01, 1.057e-01, -8.598e-02, 7.949e-02, 1.796e-01, 1.406e-01, -1.317e-01)); + r += mul(s6_3, M4(-3.108e-02, 1.187e-01, -1.405e-01, -2.096e-02, -1.240e-02, 2.296e-02, -7.784e-02, 1.341e-02, -1.308e-02, 1.270e-01, -2.542e-02, -5.869e-02, -4.655e-02, 6.312e-02, -6.959e-02, -2.146e-02)); + r += mul(s6_4, M4(-6.897e-02, -1.439e-01, 1.375e-02, -5.901e-02, 8.027e-02, -9.178e-02, -1.270e-01, -3.970e-02, 3.945e-02, -1.408e-01, -6.189e-01, -5.693e-02, -2.586e-01, -7.398e-02, 3.017e-01, 6.918e-02)); + r += mul(s6_5, M4(2.303e-02, 6.761e-02, -2.823e-02, -2.381e-01, 6.317e-02, -2.130e-01, 9.502e-02, 1.910e-01, -3.220e-01, -1.783e-01, -8.975e-02, 2.929e-01, -2.166e-01, -3.300e-04, -8.396e-02, 4.748e-02)); + r += mul(s6_6, M4(8.997e-02, 9.390e-02, 3.089e-02, 6.613e-02, -7.310e-02, -6.031e-02, 4.047e-03, 3.567e-02, -6.620e-02, -4.536e-02, 4.968e-02, 1.109e-01, 7.025e-02, 1.981e-02, 1.885e-01, -1.674e-01)); + r += mul(s6_7, M4(9.632e-02, -1.427e-01, 6.963e-02, 8.605e-03, -1.484e-01, 2.370e-02, -2.559e-02, -9.231e-02, 1.958e-01, 2.344e-01, -3.780e-02, 5.337e-02, -8.072e-03, -2.691e-01, 1.615e-01, 6.801e-03)); + r += mul(s6_8, M4(3.382e-02, 3.745e-02, -1.950e-01, -3.597e-02, -9.278e-03, -5.420e-02, -5.932e-02, -7.582e-02, -9.291e-02, 2.071e-02, 6.282e-02, -2.184e-01, 7.254e-02, -8.537e-02, 1.136e-01, 3.059e-02)); + r += mul(s7_0, M4(5.663e-02, -1.343e-02, 5.780e-02, -8.611e-02, 6.815e-03, -5.406e-02, 3.681e-02, 1.104e-01, -3.168e-02, -8.807e-02, 1.690e-01, 5.809e-02, -4.456e-02, -1.519e-02, 2.311e-02, 5.913e-02)); + r += mul(s7_1, M4(5.982e-02, -1.091e-01, 5.650e-02, 6.565e-02, -9.883e-02, 2.926e-02, -1.342e-01, -1.565e-01, 7.153e-02, 1.869e-01, 8.656e-02, -3.569e-02, -5.679e-02, -6.900e-02, -1.143e-02, 4.873e-02)); + r += mul(s7_2, M4(-4.890e-02, 2.329e-02, 4.757e-02, -4.350e-02, 1.223e-01, -9.936e-02, -1.299e-01, 1.133e-01, 4.006e-02, -1.584e-01, -2.698e-02, -4.945e-02, 6.293e-02, 7.017e-02, -1.704e-01, -1.799e-01)); + r += mul(s7_3, M4(2.485e-02, -5.407e-02, -8.527e-02, -1.510e-01, 1.643e-01, -5.463e-02, -6.835e-03, 5.878e-02, -5.531e-02, -2.257e-03, -5.767e-02, -6.246e-02, 1.384e-01, 8.035e-02, -6.237e-03, 5.670e-02)); + r += mul(s7_4, M4(8.087e-02, -7.150e-02, -5.149e-02, 1.542e-01, -1.815e-01, 1.715e-02, -1.012e-01, -2.905e-03, 2.317e-02, 1.235e-01, -1.332e-02, -2.943e-02, 8.026e-02, 1.686e-01, 2.589e-02, 1.300e-01)); + r += mul(s7_5, M4(-1.076e-01, 5.289e-03, 1.478e-02, -5.922e-02, 6.083e-02, 1.112e-01, 5.104e-02, -2.085e-02, -8.144e-02, 1.094e-01, 2.394e-02, -2.996e-02, -3.953e-02, 6.552e-02, -1.242e-01, 3.664e-02)); + r += mul(s7_6, M4(-2.648e-02, 3.515e-02, 1.103e-01, 7.541e-02, 9.822e-02, 1.516e-01, 6.112e-02, -9.476e-02, 5.026e-02, -4.293e-02, -7.005e-02, -8.219e-02, -1.015e-01, -8.383e-02, -1.067e-01, -2.989e-03)); + r += mul(s7_7, M4(-9.236e-02, -5.836e-02, -2.822e-02, 1.138e-01, 1.048e-01, 9.065e-02, -5.784e-02, -1.620e-01, -1.461e-01, 1.087e-01, 2.606e-02, 3.185e-02, 1.948e-01, -2.235e-02, -6.895e-03, -4.220e-02)); + r += mul(s7_8, M4(6.419e-02, 2.451e-02, -7.139e-02, -7.798e-02, 1.275e-01, 3.751e-02, 8.619e-02, 4.441e-02, -1.117e-01, -1.395e-02, 7.185e-02, -1.220e-02, 2.262e-02, -4.089e-02, 8.666e-02, -2.434e-02)); + r += V4(-7.995e-03, -1.224e-02, -1.444e-02, -2.582e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(5.957e-02, -3.049e-03, 2.515e-03, 7.423e-03, 9.982e-02, 1.812e-01, 1.187e-01, 2.259e-01, 1.165e-01, -1.347e-02, -3.428e-02, 8.958e-02, -1.029e-01, 2.937e-02, 2.708e-02, -1.298e-01)); + r += mul(s0_1, M4(-1.847e-01, -5.502e-02, -1.051e-01, -7.236e-02, -1.206e-01, 1.803e-01, 1.486e-01, -7.366e-02, 3.302e-02, -7.880e-02, 5.818e-02, 1.179e-01, 1.430e-01, 1.488e-01, -5.780e-02, 2.994e-02)); + r += mul(s0_2, M4(1.800e-01, -1.022e-01, -1.171e-03, -6.833e-03, -1.753e-02, -1.434e-01, 1.970e-02, -3.685e-02, 4.655e-02, 1.126e-01, -2.728e-02, 7.717e-02, -2.154e-02, 4.008e-02, 1.455e-01, 4.344e-02)); + r += mul(s0_3, M4(-3.641e-02, 5.033e-03, -1.325e-02, -1.015e-01, 8.481e-02, 2.554e-01, -1.070e-01, 1.024e-01, 1.923e-02, -1.010e-03, -4.799e-02, 1.288e-01, -7.996e-02, -1.427e-01, -2.689e-01, 5.776e-02)); + r += mul(s0_4, M4(1.833e-01, -4.067e-03, 7.679e-03, 5.957e-02, -8.062e-02, -2.109e-02, 8.797e-02, -7.552e-02, 1.492e-01, -1.806e-01, -4.736e-02, 1.260e-01, 9.266e-02, 5.566e-02, 7.952e-02, -1.757e-01)); + r += mul(s0_5, M4(-7.300e-02, 7.447e-02, 5.468e-02, 3.852e-02, -4.063e-02, -7.468e-03, -1.014e-01, 3.237e-02, 1.435e-01, -8.569e-02, 3.054e-02, -4.666e-02, 1.150e-01, 6.880e-03, 7.433e-02, 1.294e-01)); + r += mul(s0_6, M4(-7.807e-02, -3.489e-02, 9.954e-02, -2.053e-02, 1.920e-02, -6.982e-02, -7.709e-02, 4.164e-02, -2.460e-02, 4.299e-02, -2.332e-02, 6.819e-02, -8.945e-02, -5.789e-02, -9.769e-02, -9.254e-03)); + r += mul(s0_7, M4(-3.709e-02, 1.226e-03, -1.219e-01, 4.918e-03, -7.907e-02, -4.814e-02, 9.349e-02, -8.402e-02, -2.609e-02, -4.457e-02, 8.509e-03, 7.508e-02, 8.673e-02, -9.438e-02, -9.328e-02, 5.861e-03)); + r += mul(s0_8, M4(3.006e-02, 1.393e-02, 7.983e-02, 8.630e-02, 1.689e-02, -8.399e-02, -1.049e-01, -5.334e-02, 1.484e-01, -7.935e-02, 1.386e-01, 4.845e-02, -5.598e-02, -2.523e-02, 8.228e-02, -8.778e-02)); + r += mul(s1_0, M4(-2.164e-01, -9.404e-02, 1.043e-01, 2.007e-01, 8.723e-02, 2.391e-01, 9.450e-02, 7.558e-02, -4.033e-02, 3.938e-02, -1.114e-01, 1.119e-02, 5.581e-02, -7.281e-03, 1.876e-02, -1.019e-01)); + r += mul(s1_1, M4(-9.791e-02, -7.794e-02, 2.162e-02, -3.561e-02, -5.282e-02, -1.422e-02, 8.914e-02, -2.121e-02, -8.363e-02, 2.216e-01, 4.317e-02, 1.991e-01, 1.373e-01, 8.835e-02, -1.499e-02, 6.239e-02)); + r += mul(s1_2, M4(4.177e-01, -2.990e-01, 5.412e-03, 7.816e-02, -2.068e-01, -1.488e-01, -3.840e-02, -4.823e-03, -2.806e-02, 2.133e-01, 2.451e-03, 1.602e-02, -2.125e-01, -1.082e-01, -1.455e-02, 2.847e-03)); + r += mul(s1_3, M4(2.299e-02, -8.708e-02, -3.546e-02, 1.070e-01, 8.130e-02, 1.195e-01, -7.525e-02, 1.726e-01, -2.942e-02, 2.129e-01, -9.805e-02, 5.453e-02, -4.581e-02, 1.577e-02, -8.092e-02, 1.111e-01)); + r += mul(s1_4, M4(4.460e-03, -1.676e-02, -9.924e-02, -1.566e-02, -1.788e-01, 1.211e-02, 5.756e-02, -9.596e-02, 2.085e-01, 5.858e-02, -5.055e-02, 2.971e-02, 2.450e-01, -1.739e-01, 2.155e-01, -1.562e-01)); + r += mul(s1_5, M4(2.001e-01, 7.301e-02, 2.093e-01, 1.053e-01, -1.331e-02, -3.578e-02, -7.708e-02, 3.420e-02, -8.389e-02, 4.010e-02, -2.524e-04, -3.785e-02, 1.020e-01, -1.071e-01, 3.095e-03, 1.411e-01)); + r += mul(s1_6, M4(7.020e-02, -1.008e-03, 1.983e-02, -6.337e-02, 5.443e-02, 2.197e-02, -1.557e-02, 2.523e-02, -2.901e-02, 9.682e-02, -7.168e-02, -4.479e-02, -5.737e-02, -7.622e-02, -1.268e-01, 5.257e-02)); + r += mul(s1_7, M4(-8.855e-02, -8.280e-03, -1.156e-01, -3.358e-02, -1.597e-01, -1.030e-01, 1.370e-02, 4.266e-02, -8.264e-02, 1.193e-01, -9.311e-02, -2.361e-02, 6.552e-03, -2.326e-02, -1.029e-01, 2.257e-02)); + r += mul(s1_8, M4(8.102e-02, 1.553e-01, -1.178e-01, 6.726e-02, 6.600e-02, -1.267e-02, -4.002e-03, -4.638e-02, 5.313e-02, 8.323e-03, 6.413e-02, 9.978e-02, 1.108e-01, -9.390e-02, -1.744e-02, -8.957e-02)); + r += mul(s2_0, M4(4.738e-02, -9.025e-02, -1.875e-02, 1.161e-01, -1.321e-02, 1.354e-01, 1.405e-02, 1.416e-01, -1.197e-01, -1.130e-02, 5.646e-03, 1.371e-02, 1.499e-01, -1.998e-01, 6.524e-02, -1.900e-01)); + r += mul(s2_1, M4(-3.611e-02, 1.329e-01, -6.656e-03, -1.775e-01, 1.026e-02, 9.408e-02, -5.093e-02, 1.961e-02, -2.624e-01, 2.131e-02, 4.280e-02, -1.226e-01, 1.730e-01, 1.343e-01, -1.671e-01, -6.461e-03)); + r += mul(s2_2, M4(2.938e-02, -1.179e-01, 7.802e-02, -1.769e-02, -5.427e-02, -3.815e-02, 2.688e-02, 3.222e-02, 1.737e-01, 5.565e-02, -8.783e-02, 5.011e-02, 2.056e-01, 3.965e-02, -7.423e-02, 7.593e-02)); + r += mul(s2_3, M4(-4.279e-02, 1.892e-02, -1.629e-01, 8.270e-02, 5.401e-02, -6.041e-02, -9.506e-02, 2.218e-01, -5.125e-02, 1.004e-01, -3.841e-02, 9.423e-02, -6.710e-02, 8.415e-02, 1.696e-02, -1.256e-01)); + r += mul(s2_4, M4(-3.843e-02, 2.114e-02, -1.109e-01, -8.447e-02, -5.302e-02, -2.032e-01, 1.450e-01, 3.852e-02, -1.481e-01, -5.735e-02, 2.225e-01, 9.101e-03, -1.746e-01, 8.228e-02, 4.341e-02, 1.278e-01)); + r += mul(s2_5, M4(-1.138e-02, 3.630e-02, 1.412e-01, 3.819e-02, -9.534e-02, 1.548e-01, 1.390e-01, -3.959e-02, -4.372e-02, 1.395e-01, 7.127e-02, 2.372e-02, -9.610e-03, -9.713e-02, -3.195e-02, 2.132e-02)); + r += mul(s2_6, M4(1.045e-01, 8.938e-02, 9.943e-02, 1.064e-02, -1.489e-02, -6.177e-02, -7.106e-02, -5.035e-02, 8.367e-02, -5.951e-02, 1.692e-02, -1.152e-02, -8.637e-03, 7.272e-02, -2.973e-02, -9.375e-02)); + r += mul(s2_7, M4(-1.147e-01, 7.269e-02, -1.572e-02, 1.538e-01, -1.678e-01, 1.536e-01, 7.278e-02, 6.957e-02, -2.188e-02, 9.501e-02, -3.409e-02, 1.484e-01, -7.706e-02, -5.266e-02, 1.234e-01, 1.243e-02)); + r += mul(s2_8, M4(1.371e-01, -6.741e-02, -1.627e-01, -3.880e-02, -4.867e-02, 1.805e-01, 1.315e-01, 9.806e-02, -2.459e-02, -1.065e-02, -6.366e-02, -7.442e-02, -3.105e-02, 3.967e-02, 9.181e-02, 3.679e-02)); + r += mul(s3_0, M4(-4.685e-02, 2.092e-01, -3.243e-02, 1.201e-01, -9.950e-02, 1.814e-01, 2.770e-02, -1.061e-01, 6.173e-02, -3.614e-02, -3.884e-03, -1.754e-02, -6.866e-02, 1.386e-02, 4.887e-02, -2.357e-02)); + r += mul(s3_1, M4(-2.743e-01, -9.328e-02, 3.926e-01, -2.396e-01, -5.040e-02, 5.586e-02, -3.486e-02, -7.192e-02, 1.664e-02, 2.172e-02, 3.996e-02, -1.952e-01, 1.958e-01, 1.802e-02, -1.209e-01, 2.013e-02)); + r += mul(s3_2, M4(-7.065e-02, -2.231e-01, -1.740e-01, -1.036e-01, -4.724e-02, 1.182e-02, 9.885e-02, 1.422e-02, 1.404e-01, -7.340e-02, -1.123e-04, -1.421e-01, 2.634e-01, -7.101e-02, -2.584e-02, 1.234e-01)); + r += mul(s3_3, M4(1.386e-01, -1.280e-01, -3.779e-01, -3.310e-01, 7.156e-02, -1.150e-01, 1.079e-02, 7.545e-02, -1.125e-02, -1.691e-02, -1.538e-01, -3.010e-02, -8.678e-02, 2.458e-02, 5.418e-02, -7.666e-02)); + r += mul(s3_4, M4(1.449e-01, 9.721e-02, -2.594e-01, 2.108e-01, 2.351e-01, -3.130e-01, -8.903e-02, -4.188e-02, 2.007e-01, -2.197e-01, 1.613e-01, -3.164e-01, -3.532e-01, 6.255e-02, 2.272e-01, 7.759e-03)); + r += mul(s3_5, M4(-2.253e-02, -7.612e-02, -9.930e-02, -1.385e-01, -1.933e-01, -1.089e-01, -1.863e-02, 3.789e-02, 2.229e-02, 6.735e-02, 3.127e-01, -1.855e-02, -1.775e-01, -2.837e-02, 1.607e-01, 7.195e-02)); + r += mul(s3_6, M4(1.558e-01, 2.682e-02, 1.504e-01, 9.806e-02, 6.693e-02, 3.296e-02, -1.118e-01, -3.649e-02, 2.780e-02, -5.672e-02, -3.103e-02, 1.411e-01, 1.649e-02, 9.769e-02, 1.311e-01, -3.010e-02)); + r += mul(s3_7, M4(2.929e-02, 1.012e-01, -5.385e-02, 3.901e-02, 3.679e-02, 8.885e-03, -2.742e-01, 5.118e-02, 2.344e-03, 1.122e-03, -6.791e-02, -5.733e-02, 3.292e-01, 6.902e-02, -5.233e-02, 8.570e-02)); + r += mul(s3_8, M4(-7.097e-02, 1.848e-02, -3.787e-02, 4.900e-03, 6.421e-02, 7.465e-02, -1.535e-01, 1.406e-02, -1.187e-01, 2.577e-03, 9.254e-02, 5.308e-02, 2.464e-02, -7.548e-02, -1.966e-01, -1.053e-01)); + r += mul(s4_0, M4(3.893e-02, -4.947e-02, -7.893e-03, 6.037e-03, 6.807e-02, -8.136e-03, 7.377e-02, 5.098e-02, -5.985e-02, -3.314e-02, -1.090e-01, -1.603e-01, -6.519e-02, 1.479e-02, -4.851e-02, -6.077e-02)); + r += mul(s4_1, M4(1.111e-01, 1.671e-01, -1.255e-01, -3.826e-03, -6.154e-03, 1.481e-01, -2.236e-02, 2.436e-04, -1.970e-02, -2.319e-01, -3.602e-02, 7.956e-02, -1.163e-01, 1.890e-01, -5.660e-02, 3.174e-02)); + r += mul(s4_2, M4(1.580e-02, 4.120e-02, 6.110e-02, -1.322e-01, 3.182e-03, 3.036e-02, 7.864e-02, -1.343e-02, -1.131e-01, -2.483e-02, 3.549e-03, 1.102e-01, 1.428e-01, 5.457e-03, 1.126e-02, 5.344e-02)); + r += mul(s4_3, M4(-1.522e-01, 1.218e-02, 3.848e-04, -1.357e-01, -7.722e-02, -6.365e-02, -1.327e-02, 1.026e-01, -1.313e-01, -8.871e-02, -4.374e-03, -2.673e-02, -2.268e-02, 6.203e-02, 2.476e-02, -7.891e-02)); + r += mul(s4_4, M4(1.277e-01, 2.146e-01, -1.622e-01, 3.286e-02, -1.704e-01, 9.845e-03, 2.795e-02, -7.210e-02, -3.702e-01, 5.903e-02, -2.025e-01, 2.572e-02, -2.745e-02, -2.299e-02, 3.136e-02, 9.026e-02)); + r += mul(s4_5, M4(5.797e-03, -3.580e-02, -4.552e-03, -1.105e-01, 8.373e-02, -4.397e-02, -5.189e-03, -6.894e-02, -2.798e-02, -5.324e-02, -5.876e-02, 9.981e-03, 1.140e-01, -7.434e-02, 7.077e-02, 2.092e-02)); + r += mul(s4_6, M4(1.196e-02, -8.666e-02, 1.137e-01, 4.510e-02, 4.648e-02, -1.031e-01, -7.070e-02, 1.777e-02, -5.807e-02, 6.463e-02, 3.425e-02, 5.167e-02, 1.442e-02, -4.559e-02, 7.944e-03, -9.425e-02)); + r += mul(s4_7, M4(-5.318e-02, 5.288e-02, 1.233e-01, -1.647e-02, 5.040e-02, 3.649e-03, -2.084e-02, -3.695e-02, 3.423e-02, 1.215e-01, 7.316e-02, 4.451e-02, -1.786e-01, 9.964e-02, 7.429e-02, -2.555e-02)); + r += mul(s4_8, M4(-1.145e-02, -4.109e-02, -1.333e-01, 9.021e-02, 8.219e-02, 4.919e-02, -1.678e-02, 6.022e-02, 3.826e-04, -9.322e-02, 5.648e-02, -4.974e-02, -5.503e-02, -2.237e-02, 2.452e-02, 1.236e-01)); + r += mul(s5_0, M4(-3.120e-02, -2.506e-02, 8.325e-03, 4.761e-02, -4.672e-01, -2.430e-01, 5.374e-02, -1.592e-01, 1.392e-01, 9.341e-02, -1.893e-01, 8.724e-03, -5.083e-02, -7.755e-02, 3.613e-02, -1.841e-01)); + r += mul(s5_1, M4(4.977e-02, 1.632e-01, -9.581e-02, 3.688e-01, 2.764e-01, 1.806e-01, -1.471e-01, -9.434e-02, -1.930e-01, -1.048e-02, 1.817e-02, 1.018e-01, 4.963e-02, -2.904e-02, -1.284e-01, -8.687e-02)); + r += mul(s5_2, M4(-2.475e-01, 1.064e-01, -2.106e-02, -1.520e-02, -5.434e-02, -9.044e-02, -2.736e-03, 1.030e-01, -2.364e-01, 2.136e-01, 1.391e-01, -1.069e-01, 1.714e-01, -1.077e-02, -9.362e-02, 7.189e-02)); + r += mul(s5_3, M4(-1.841e-01, -3.266e-02, 9.362e-02, -3.814e-02, -5.682e-01, 3.984e-01, 4.681e-01, 2.657e-01, -2.097e-02, -9.244e-03, 9.430e-03, -2.963e-02, -9.612e-02, 7.458e-02, 4.870e-02, -5.087e-02)); + r += mul(s5_4, M4(-1.757e-01, 1.198e-01, -1.528e-01, 3.579e-01, 6.719e-01, -5.689e-01, -1.907e-01, -2.981e-01, -3.525e-02, 8.262e-02, 2.617e-03, -7.026e-02, 3.087e-01, -2.135e-02, -1.579e-02, 8.239e-02)); + r += mul(s5_5, M4(1.460e-01, -9.163e-02, -6.629e-02, 2.753e-02, -1.082e-01, 2.592e-02, -7.397e-02, -1.469e-01, 9.262e-02, -1.249e-02, 5.569e-02, 9.788e-02, 2.121e-02, -1.666e-01, -1.258e-01, 1.772e-01)); + r += mul(s5_6, M4(6.194e-02, -3.392e-02, 5.301e-02, -7.029e-02, 1.058e-01, 2.590e-02, 3.245e-01, 2.971e-01, 8.965e-02, 1.275e-01, -5.641e-03, -4.784e-02, -7.069e-02, 2.989e-02, -1.026e-01, -1.121e-01)); + r += mul(s5_7, M4(-4.134e-02, 9.351e-02, -2.658e-02, 1.898e-02, -1.142e-01, -9.247e-02, -1.610e-01, -8.344e-02, -1.560e-01, -7.492e-02, 5.762e-02, -2.066e-02, -1.029e-01, 1.144e-01, 1.237e-01, 1.510e-02)); + r += mul(s5_8, M4(1.234e-01, 1.372e-02, -3.129e-02, 1.210e-02, -1.381e-01, 7.946e-02, 4.540e-02, -4.533e-02, -8.761e-03, -4.579e-02, -4.991e-02, -1.097e-01, -3.653e-01, -1.008e-01, 9.064e-02, -1.664e-02)); + r += mul(s6_0, M4(1.035e-01, -5.898e-02, 7.442e-02, 4.579e-02, 9.165e-02, -8.487e-02, -7.653e-02, -1.472e-01, 1.458e-01, -2.610e-01, 5.489e-02, -3.148e-02, -1.961e-02, -1.283e-01, -1.550e-03, 1.396e-01)); + r += mul(s6_1, M4(3.029e-02, 1.203e-01, 1.137e-01, -4.167e-02, 2.036e-01, 5.974e-02, 9.833e-03, -4.837e-03, 2.107e-02, 5.559e-02, -1.459e-01, 8.987e-02, -2.228e-01, -1.185e-01, -1.612e-01, -1.318e-01)); + r += mul(s6_2, M4(2.158e-01, 1.618e-01, 4.534e-02, 1.148e-01, 7.521e-02, 8.428e-02, -3.577e-02, 8.668e-02, -2.172e-01, 1.231e-01, -3.659e-02, -7.905e-02, -9.192e-02, -2.736e-02, -1.080e-01, -3.183e-02)); + r += mul(s6_3, M4(-5.911e-02, -1.347e-01, 3.753e-02, 5.744e-02, 1.702e-01, 9.262e-02, -7.533e-02, 1.787e-02, 2.806e-02, -6.910e-02, 7.101e-02, -3.144e-02, -6.674e-02, 1.759e-02, 8.950e-02, 1.166e-01)); + r += mul(s6_4, M4(2.470e-01, 4.738e-02, 9.160e-02, 1.635e-02, -1.139e-02, -7.980e-02, -8.763e-02, 2.369e-01, 8.297e-02, -3.180e-01, 1.388e-02, -3.104e-02, 1.348e-02, 2.618e-01, 2.261e-02, -1.885e-01)); + r += mul(s6_5, M4(-7.889e-02, 1.627e-01, 9.866e-03, -2.770e-02, 6.266e-02, -4.376e-02, -6.148e-03, 1.318e-01, 3.748e-02, -1.347e-01, 3.466e-02, -7.621e-02, -4.696e-02, 8.234e-02, 1.539e-01, 4.679e-02)); + r += mul(s6_6, M4(-9.591e-03, -9.131e-02, -2.189e-02, -3.906e-03, -8.384e-02, -1.096e-01, -4.869e-02, 1.044e-01, 6.963e-03, 3.361e-02, -4.759e-03, 7.508e-02, -3.060e-03, -1.384e-01, -3.605e-02, 1.665e-02)); + r += mul(s6_7, M4(1.625e-01, -3.431e-02, -3.472e-02, 6.639e-02, 4.569e-02, 2.451e-02, -1.030e-01, 5.134e-02, 3.258e-01, 9.507e-02, 9.125e-03, 1.037e-01, 1.091e-01, 3.748e-02, 6.382e-02, 2.083e-02)); + r += mul(s6_8, M4(-4.303e-02, -4.050e-02, -1.151e-01, -6.843e-02, 5.202e-02, -3.790e-02, 4.180e-03, -6.684e-02, 1.225e-01, 1.198e-01, 4.950e-02, 7.972e-02, 2.018e-02, 1.512e-01, 1.024e-01, 1.460e-02)); + r += mul(s7_0, M4(-1.173e-01, 1.072e-01, 5.356e-03, 1.385e-01, -6.036e-02, -1.034e-01, -6.001e-02, 6.160e-02, -4.321e-02, 5.623e-02, -1.003e-01, 1.845e-03, -4.557e-02, 2.336e-03, -5.258e-02, 6.187e-02)); + r += mul(s7_1, M4(-1.835e-02, 8.510e-03, 9.641e-02, -6.166e-02, -6.591e-02, 1.562e-02, -1.011e-01, -5.767e-02, -1.791e-01, -4.820e-02, -4.084e-02, 3.447e-02, 7.111e-02, -8.455e-02, 9.572e-02, -2.570e-02)); + r += mul(s7_2, M4(2.063e-04, 7.999e-02, -2.165e-02, 1.236e-02, 1.408e-02, -5.522e-02, -3.566e-02, 8.257e-02, -1.279e-01, 1.305e-01, -1.413e-03, 4.585e-02, 2.789e-03, 1.053e-02, -7.205e-02, -6.562e-02)); + r += mul(s7_3, M4(4.291e-02, -4.951e-02, 8.818e-02, 3.302e-02, 1.800e-02, -7.639e-02, 2.755e-02, -7.894e-02, 5.420e-02, -3.219e-02, -1.015e-01, -5.315e-02, -4.991e-02, 7.107e-02, 1.468e-02, -2.473e-02)); + r += mul(s7_4, M4(1.823e-01, -8.644e-03, 7.152e-02, -2.014e-01, -3.008e-02, 3.164e-02, -2.384e-01, 1.322e-01, -6.781e-02, 5.100e-02, 3.457e-02, 1.042e-02, 4.205e-02, 6.503e-02, -3.424e-02, -4.911e-02)); + r += mul(s7_5, M4(1.248e-01, -2.998e-02, 2.813e-03, -5.579e-02, 8.234e-02, -6.763e-02, -7.895e-02, 2.507e-04, 5.331e-03, -9.873e-02, 1.283e-01, 4.402e-02, 2.722e-02, -6.559e-02, -8.504e-02, 1.554e-02)); + r += mul(s7_6, M4(-8.137e-02, -3.382e-02, -4.983e-03, 1.141e-01, -7.500e-02, -1.591e-01, 7.594e-02, 1.251e-01, 7.075e-02, 4.401e-02, -5.485e-02, -1.526e-02, -1.338e-01, -1.707e-01, -2.600e-02, 2.292e-02)); + r += mul(s7_7, M4(-8.672e-02, -2.362e-02, -7.777e-02, 1.356e-02, -3.393e-02, -7.631e-02, -2.710e-01, -6.589e-02, 1.881e-01, -9.581e-02, 2.370e-02, -8.766e-02, -4.940e-02, 5.028e-02, 2.167e-02, 4.738e-03)); + r += mul(s7_8, M4(4.778e-02, 7.154e-02, 5.355e-02, -2.512e-02, -8.144e-02, 1.947e-02, -1.547e-01, -2.908e-02, -3.783e-02, 3.476e-02, 3.998e-02, 4.067e-02, 1.007e-02, 2.896e-03, -3.280e-02, -2.884e-03)); + r += V4(2.792e-02, 6.402e-03, 2.704e-02, 8.885e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.150e-02, 2.098e-02, -1.908e-03, 1.370e-01, -1.004e-01, 3.667e-02, 1.652e-01, -1.928e-01, -1.453e-01, -8.522e-02, -1.028e-02, -9.115e-02, -1.022e-01, -1.301e-01, 1.325e-01, -9.540e-02)); + r += mul(s0_1, M4(-4.920e-03, 4.564e-02, -9.749e-02, -7.350e-02, -9.275e-02, -2.181e-01, 5.228e-03, -1.274e-02, -1.396e-01, -1.462e-02, -2.177e-01, -3.610e-01, 9.250e-02, -4.713e-02, 8.113e-02, -1.250e-01)); + r += mul(s0_2, M4(2.999e-02, 1.197e-01, 3.022e-02, 7.073e-02, -3.234e-02, 4.483e-03, 3.886e-02, -5.276e-02, 4.253e-02, -1.674e-01, -7.591e-03, -1.164e-01, -6.967e-02, 6.605e-02, -3.461e-02, 2.936e-02)); + r += mul(s0_3, M4(-3.725e-02, 3.155e-02, -7.687e-02, -2.039e-02, 4.441e-02, 1.862e-02, 1.289e-01, -8.736e-02, -2.970e-02, 2.526e-01, 1.500e-01, -2.748e-01, 1.343e-01, 5.158e-02, 3.829e-02, -1.029e-01)); + r += mul(s0_4, M4(1.480e-01, -1.204e-01, 1.481e-01, 6.791e-02, -2.369e-02, -1.166e-01, -1.001e-01, 2.066e-01, 5.810e-02, 1.231e-01, -2.750e-02, -2.348e-02, -1.851e-01, -1.561e-01, 9.651e-03, 1.222e-01)); + r += mul(s0_5, M4(-2.281e-02, 7.214e-02, 4.872e-02, -3.459e-02, 1.557e-01, -4.078e-02, -5.643e-02, 7.592e-02, 7.703e-02, -1.563e-01, 7.854e-02, -1.026e-01, 8.463e-02, -2.650e-01, 1.179e-01, 5.095e-02)); + r += mul(s0_6, M4(-1.307e-01, 5.779e-02, -5.937e-02, -8.787e-02, -1.897e-01, 4.236e-02, -3.115e-02, 5.699e-02, 9.361e-02, 1.806e-02, -1.096e-01, -1.321e-01, 1.118e-01, -1.057e-01, -6.268e-03, -3.682e-03)); + r += mul(s0_7, M4(-3.039e-03, -3.794e-02, 2.495e-02, -5.958e-02, 3.056e-02, -1.187e-02, 4.943e-02, 1.002e-02, -1.508e-02, 8.286e-02, -1.137e-01, -3.081e-02, -9.305e-02, 2.237e-01, 1.300e-01, 1.462e-01)); + r += mul(s0_8, M4(3.845e-02, -2.379e-02, 7.917e-02, 5.293e-02, -9.989e-02, -8.364e-02, 4.484e-02, -7.819e-02, 1.247e-01, -6.405e-02, -4.323e-02, -9.018e-02, 1.771e-02, 6.845e-02, -3.819e-02, 2.506e-01)); + r += mul(s1_0, M4(1.505e-01, 4.282e-02, 2.325e-01, -1.088e-01, -1.533e-01, 6.644e-02, 1.375e-01, -7.004e-02, 9.644e-02, 9.485e-02, 6.898e-02, -5.298e-03, -4.165e-02, -1.174e-01, -8.163e-02, -4.830e-02)); + r += mul(s1_1, M4(8.243e-03, -1.574e-02, -4.337e-02, -4.904e-02, -6.238e-02, 2.752e-02, -1.450e-01, 1.297e-02, 8.632e-02, 2.109e-01, 2.266e-03, -1.468e-01, 5.894e-02, 7.963e-02, -1.032e-01, -2.958e-02)); + r += mul(s1_2, M4(-4.761e-01, 3.923e-01, -2.083e-01, 1.344e-01, -5.242e-02, -6.285e-03, 3.866e-02, -6.010e-02, 5.071e-02, 5.493e-02, 6.665e-02, 4.845e-03, -2.614e-02, 3.681e-02, -2.129e-01, -2.912e-02)); + r += mul(s1_3, M4(-9.229e-02, -2.160e-01, -3.112e-01, 7.508e-02, -2.224e-01, -1.128e-01, 1.067e-01, 6.648e-02, 6.660e-02, 6.957e-02, 1.109e-01, -5.484e-02, 8.785e-03, -1.616e-01, -9.332e-03, -8.794e-02)); + r += mul(s1_4, M4(1.016e-01, 1.375e-03, -2.052e-02, 1.088e-01, 8.379e-02, 1.776e-01, 6.166e-02, -3.273e-02, -2.966e-03, 1.742e-02, 8.485e-02, 2.937e-02, -1.109e-01, 9.783e-02, -1.222e-01, 6.124e-02)); + r += mul(s1_5, M4(-5.278e-02, -2.068e-01, -1.875e-01, -9.722e-02, 1.369e-01, 1.573e-01, 2.855e-02, 1.431e-01, 1.175e-01, 1.476e-02, 1.504e-01, -1.062e-01, -9.199e-02, 2.577e-02, 6.478e-03, 1.121e-01)); + r += mul(s1_6, M4(2.704e-01, -5.489e-02, 1.746e-01, -1.111e-01, -1.860e-01, -1.403e-01, -5.571e-02, -1.211e-01, 4.785e-02, -7.522e-02, -9.783e-03, 1.015e-01, 4.958e-03, -1.289e-01, -5.302e-02, 5.505e-02)); + r += mul(s1_7, M4(-4.411e-02, -1.045e-01, 4.994e-02, -1.574e-01, 5.788e-02, 2.045e-01, 3.314e-02, -3.551e-02, 7.251e-02, -5.918e-02, -9.556e-03, 4.214e-02, -3.630e-02, 2.468e-01, -2.294e-02, -1.261e-02)); + r += mul(s1_8, M4(-2.821e-01, 5.553e-03, -9.473e-02, -2.821e-01, -1.144e-01, -1.501e-02, -4.085e-02, -6.069e-02, 1.062e-01, -1.199e-01, 5.926e-02, -2.123e-02, -6.757e-02, -1.281e-02, -2.638e-02, 4.982e-02)); + r += mul(s2_0, M4(4.639e-03, -8.981e-02, -3.127e-02, -1.559e-01, -1.868e-01, 5.938e-02, 7.574e-02, -7.216e-02, 1.794e-01, 1.192e-01, -1.298e-02, -4.886e-02, 4.221e-02, -8.491e-02, -1.210e-01, 1.066e-01)); + r += mul(s2_1, M4(-1.867e-01, -1.192e-01, -1.711e-01, 7.593e-02, -7.570e-02, 5.431e-02, 1.509e-01, -8.083e-02, -7.039e-02, 1.170e-01, 1.004e-01, -9.263e-02, 6.978e-02, 5.257e-02, -1.963e-02, 5.092e-02)); + r += mul(s2_2, M4(1.716e-01, 1.621e-02, -1.479e-01, -1.278e-01, 9.007e-02, -4.578e-02, 5.916e-02, -4.273e-02, -1.310e-01, 1.857e-01, -3.085e-02, 6.763e-02, 3.874e-02, 1.704e-01, -9.613e-02, 8.957e-02)); + r += mul(s2_3, M4(1.064e-02, 2.534e-02, -5.788e-03, -4.810e-02, 8.883e-02, 6.142e-02, 3.759e-02, -1.599e-02, -2.281e-01, 4.078e-02, -2.529e-02, -1.290e-03, -3.682e-02, 2.757e-02, 3.746e-02, 6.837e-02)); + r += mul(s2_4, M4(4.689e-02, -1.032e-01, 2.029e-02, -1.496e-02, 3.248e-02, 1.349e-01, -1.425e-02, -4.335e-02, -1.864e-01, 2.204e-01, -1.185e-01, -3.788e-02, 7.017e-03, -2.078e-02, 2.113e-02, 6.477e-02)); + r += mul(s2_5, M4(-6.346e-02, 1.536e-01, -5.008e-02, -7.137e-02, 5.735e-02, -1.805e-01, 1.751e-02, -1.993e-02, -8.088e-02, 1.131e-01, 2.861e-02, -5.225e-02, 7.813e-02, -1.596e-01, -5.213e-02, -2.286e-02)); + r += mul(s2_6, M4(6.213e-02, -1.340e-01, -4.045e-02, 5.417e-02, 1.412e-02, 2.320e-02, 3.999e-02, -4.847e-03, 3.844e-02, 2.233e-02, -1.047e-01, -7.587e-03, -7.185e-02, 2.859e-02, -2.312e-02, -5.680e-02)); + r += mul(s2_7, M4(-4.109e-02, 2.704e-02, -8.921e-02, 4.878e-02, -1.897e-01, 3.091e-02, 7.605e-02, -2.485e-01, 7.590e-02, -1.513e-02, -8.579e-03, 4.597e-02, -6.991e-02, -3.877e-02, -5.631e-02, -5.555e-02)); + r += mul(s2_8, M4(-1.464e-02, 5.491e-03, 8.155e-02, 4.692e-02, 1.756e-01, 5.157e-02, -2.228e-01, -9.451e-02, 2.240e-02, -2.119e-02, -8.800e-02, -2.836e-02, 1.321e-03, 5.610e-03, 6.110e-02, -1.010e-01)); + r += mul(s3_0, M4(-2.863e-01, 1.281e-01, 3.390e-01, 2.793e-02, 8.745e-02, -3.212e-02, -7.006e-02, -8.724e-02, 1.313e-01, -5.727e-04, -2.541e-03, 9.595e-02, 1.818e-01, 1.798e-01, -7.001e-02, 1.867e-01)); + r += mul(s3_1, M4(-7.490e-01, 2.820e-01, -8.783e-02, -8.340e-02, -5.457e-02, 2.979e-02, 6.042e-02, -5.967e-02, 4.259e-02, 3.115e-02, 1.132e-01, -1.519e-01, -1.058e-01, 1.261e-01, -1.253e-01, -1.140e-01)); + r += mul(s3_2, M4(-1.103e-01, 1.477e-01, -8.571e-02, -2.903e-03, -6.956e-02, -1.613e-03, 9.327e-02, 1.674e-02, 9.031e-02, -9.055e-03, 5.152e-02, -4.042e-02, -2.994e-02, -8.168e-02, -2.455e-01, -2.800e-02)); + r += mul(s3_3, M4(1.524e-01, 2.541e-01, -2.778e-02, -5.655e-02, -4.606e-02, 8.783e-02, 7.691e-02, 4.097e-02, -7.642e-02, 2.777e-02, 1.949e-02, -6.352e-02, 3.537e-01, 1.018e-01, -3.824e-02, -1.221e-02)); + r += mul(s3_4, M4(-3.948e-01, 1.403e-01, -3.365e-01, -1.501e-01, 1.689e-01, 5.901e-02, -9.550e-02, 1.362e-01, -2.804e-02, 3.937e-02, -1.233e-01, -3.170e-02, 6.212e-02, 1.210e-01, -3.112e-01, 5.814e-03)); + r += mul(s3_5, M4(-2.880e-01, -7.068e-02, -3.583e-02, -1.900e-02, -1.555e-01, -9.420e-02, -1.856e-03, 5.042e-02, -6.842e-02, 5.013e-02, -5.946e-02, -8.058e-02, -1.084e-01, -5.146e-03, -7.522e-02, 5.965e-03)); + r += mul(s3_6, M4(6.954e-02, 2.202e-02, 1.715e-01, -1.923e-01, -2.088e-01, -2.457e-01, -8.212e-02, 6.059e-02, -6.665e-02, 5.656e-03, -3.019e-02, 5.139e-02, 3.282e-01, 1.749e-01, 1.477e-02, -5.291e-02)); + r += mul(s3_7, M4(-5.721e-02, -1.348e-01, -1.032e-01, 2.518e-03, -1.444e-01, -1.540e-01, -1.222e-01, 2.705e-01, -5.951e-02, -5.826e-02, 3.968e-02, 1.580e-02, 4.005e-03, -1.579e-01, -7.896e-02, -1.453e-01)); + r += mul(s3_8, M4(-6.637e-02, -2.803e-01, 6.848e-02, 6.454e-02, -7.755e-04, 6.420e-02, 4.581e-03, 6.191e-02, 2.481e-02, -5.033e-02, -3.187e-02, -9.523e-02, 4.138e-02, -4.772e-03, -1.821e-02, -1.466e-01)); + r += mul(s4_0, M4(6.172e-02, 4.860e-02, -1.050e-01, 1.161e-01, 1.948e-01, 4.551e-02, -3.890e-02, 1.641e-01, -7.216e-02, -1.356e-01, -9.914e-02, -4.317e-02, 6.625e-03, -6.305e-02, -1.298e-01, -2.164e-02)); + r += mul(s4_1, M4(1.006e-01, -1.191e-02, -1.051e-01, 2.251e-01, 9.049e-03, 1.394e-01, 8.543e-02, -4.219e-02, 1.124e-01, -4.260e-02, -1.078e-02, 1.445e-01, 7.338e-02, 1.046e-01, -9.125e-02, -5.300e-03)); + r += mul(s4_2, M4(1.374e-01, -1.830e-01, 1.657e-01, -5.735e-02, 3.288e-02, -5.684e-02, 1.186e-02, -1.383e-02, 8.108e-02, 1.465e-02, 7.691e-02, 4.414e-02, 3.654e-04, 2.120e-01, -2.360e-02, 4.260e-02)); + r += mul(s4_3, M4(-5.470e-02, -1.727e-01, -9.758e-02, 1.201e-01, -1.376e-01, 1.443e-01, -2.002e-03, -1.086e-01, -8.649e-02, -1.393e-01, -9.284e-02, 1.437e-01, 5.944e-03, -1.920e-02, -6.170e-02, 2.351e-02)); + r += mul(s4_4, M4(8.198e-02, 8.723e-03, -2.378e-01, -9.163e-02, -1.058e-02, 6.321e-02, 1.436e-01, 3.090e-03, -7.826e-03, 1.253e-01, 5.161e-02, 2.252e-01, -4.539e-02, 2.287e-02, 4.011e-03, 1.571e-02)); + r += mul(s4_5, M4(1.134e-01, -1.501e-01, 1.345e-01, 8.493e-02, 3.416e-02, -1.753e-01, -3.733e-02, -2.671e-02, -1.743e-01, 8.097e-04, -7.299e-02, 8.528e-02, 1.934e-02, -6.367e-02, -9.613e-02, -9.835e-02)); + r += mul(s4_6, M4(-1.071e-01, 1.109e-01, -6.513e-03, -1.421e-02, 7.728e-02, -2.574e-02, 2.111e-02, -4.417e-02, 2.104e-02, -1.677e-01, -8.871e-02, 1.657e-02, -1.920e-01, 4.121e-02, -9.233e-02, -7.041e-02)); + r += mul(s4_7, M4(-1.750e-01, 1.759e-01, -1.211e-01, 4.576e-02, 4.790e-03, 2.754e-02, -5.749e-02, -5.159e-02, -9.071e-02, 7.544e-02, 5.648e-02, 2.652e-02, -4.998e-02, 5.046e-03, -6.872e-02, -9.695e-02)); + r += mul(s4_8, M4(-4.520e-02, -1.272e-01, -4.931e-02, 1.418e-01, -1.038e-01, 1.825e-02, -5.155e-02, 4.532e-02, 8.587e-02, -1.928e-02, -1.674e-02, -6.196e-02, 9.840e-02, -2.571e-02, -3.161e-03, -1.645e-02)); + r += mul(s5_0, M4(-1.004e-02, -1.549e-02, 4.202e-02, 3.765e-02, 3.177e-01, -6.272e-02, -4.354e-01, 3.390e-02, 2.077e-02, -1.162e-01, -5.451e-02, -4.558e-02, -1.102e-01, 8.683e-02, 4.586e-02, 1.046e-01)); + r += mul(s5_1, M4(-2.387e-02, -1.223e-02, 2.966e-02, -3.690e-02, 6.212e-02, -1.667e-01, 1.442e-01, 1.095e-02, 3.150e-02, -9.111e-02, 4.348e-02, -2.276e-01, 3.239e-02, 1.461e-01, -7.576e-02, -2.275e-02)); + r += mul(s5_2, M4(7.439e-05, -6.544e-02, 5.617e-02, 6.543e-02, -1.848e-01, 1.469e-01, -1.609e-01, -1.088e-01, 1.695e-01, -4.279e-02, 1.413e-01, 3.313e-02, 1.424e-01, 3.390e-02, 3.491e-02, -4.123e-02)); + r += mul(s5_3, M4(6.728e-03, -1.499e-01, -2.789e-02, -1.596e-01, -3.184e-01, -6.526e-01, -1.408e-01, 2.954e-01, 4.372e-02, 5.506e-02, 6.864e-03, 2.858e-02, -8.610e-02, -1.349e-01, -7.156e-02, 1.250e-01)); + r += mul(s5_4, M4(1.145e-01, 2.027e-01, -6.550e-02, -1.407e-02, -2.456e-01, -5.884e-02, 2.882e-01, 1.430e-01, -2.385e-02, 1.494e-01, -5.016e-03, 5.681e-02, 3.792e-02, 2.435e-01, 1.533e-02, -9.062e-02)); + r += mul(s5_5, M4(4.646e-03, -6.645e-02, -2.206e-02, 1.096e-01, 1.602e-01, -2.943e-02, 2.762e-01, -3.057e-02, 1.723e-02, 7.497e-02, -1.430e-01, -4.040e-02, 4.436e-01, -1.915e-01, -3.968e-02, -3.058e-02)); + r += mul(s5_6, M4(2.339e-01, 6.296e-02, -1.281e-02, -3.622e-02, 8.146e-01, 5.991e-01, 2.123e-01, -5.318e-02, -1.350e-01, -1.494e-01, -8.924e-02, 2.989e-02, -4.053e-01, -2.480e-03, -6.285e-02, -1.024e-01)); + r += mul(s5_7, M4(-1.237e-01, 5.261e-02, 1.015e-02, -5.028e-02, 3.307e-01, -1.153e-01, -2.319e-02, 1.187e-02, -3.670e-02, -1.935e-02, -7.525e-02, -1.157e-01, -3.444e-01, 5.113e-02, 8.425e-02, -2.187e-01)); + r += mul(s5_8, M4(-6.028e-02, 1.487e-02, 4.743e-02, 8.088e-02, -2.379e-02, 1.973e-01, -6.041e-02, 7.984e-02, 1.320e-02, -3.280e-02, 2.315e-03, -7.257e-02, 6.421e-02, -1.990e-01, 9.782e-02, -1.607e-01)); + r += mul(s6_0, M4(-1.746e-01, -1.096e-01, -4.016e-02, 2.316e-02, -1.189e-01, 9.210e-02, -1.773e-02, -8.569e-02, -4.666e-02, 3.651e-01, 4.906e-03, 2.222e-01, 2.055e-01, 1.802e-01, 1.129e-01, 1.117e-01)); + r += mul(s6_1, M4(-1.525e-01, 1.392e-01, 4.853e-02, 2.790e-02, 6.696e-02, 1.395e-01, -5.621e-02, 1.454e-01, -5.803e-02, 1.297e-01, 7.659e-02, 8.137e-02, 7.510e-03, 2.824e-03, 8.717e-02, 2.927e-02)); + r += mul(s6_2, M4(-1.327e-02, 8.183e-03, 1.518e-01, -2.946e-03, 7.108e-02, 1.311e-01, -2.148e-02, 2.953e-02, -9.645e-02, 3.880e-01, 8.442e-02, 1.101e-01, 9.102e-02, -1.020e-01, 2.197e-02, -1.570e-01)); + r += mul(s6_3, M4(-1.404e-01, 1.216e-01, 1.599e-02, -1.471e-01, -1.515e-02, 6.933e-02, -1.272e-01, -9.458e-03, -2.156e-01, -2.151e-01, -1.655e-01, -4.164e-02, -1.211e-01, -9.239e-02, -3.468e-02, 1.937e-03)); + r += mul(s6_4, M4(2.207e-02, 1.661e-02, 6.881e-02, -4.938e-02, -1.981e-02, 8.082e-02, -1.806e-02, 1.308e-01, -1.566e-01, -1.687e-01, 4.704e-02, -3.613e-01, -9.430e-02, -2.023e-01, 1.244e-01, 5.668e-02)); + r += mul(s6_5, M4(-1.438e-02, 8.418e-02, 5.582e-02, 4.658e-02, 9.767e-03, 1.296e-01, -8.625e-02, 6.566e-02, -4.197e-02, -4.831e-02, 7.632e-02, -3.700e-02, 2.381e-02, -1.422e-01, 2.721e-01, -1.149e-01)); + r += mul(s6_6, M4(1.040e-01, 1.133e-02, 2.081e-02, 1.203e-02, 2.419e-01, 4.390e-02, 4.088e-02, 1.739e-01, 1.626e-01, 1.626e-01, 7.494e-02, 9.982e-02, 7.710e-02, 1.424e-01, 6.593e-03, -1.490e-01)); + r += mul(s6_7, M4(1.593e-02, -8.501e-02, -3.821e-02, 5.587e-02, 1.242e-01, -1.739e-01, 6.258e-03, 2.321e-02, 2.304e-01, 5.343e-02, 7.296e-02, 3.140e-02, -8.605e-02, -1.336e-02, -2.243e-01, -1.296e-01)); + r += mul(s6_8, M4(-7.701e-03, -2.540e-02, 6.064e-02, 4.354e-02, -5.639e-02, -1.145e-01, -2.456e-02, -6.280e-02, 2.549e-01, 8.659e-03, 2.024e-01, 2.090e-01, 1.751e-01, 1.903e-01, -2.858e-01, -4.304e-03)); + r += mul(s7_0, M4(-1.837e-02, 2.402e-02, 3.309e-03, 8.617e-03, 6.836e-02, 2.067e-01, -3.861e-03, 4.940e-02, -9.221e-02, 2.281e-02, -1.917e-02, -8.949e-02, -3.042e-02, -6.460e-02, -4.224e-02, 3.207e-02)); + r += mul(s7_1, M4(-2.121e-01, 5.468e-02, 2.221e-02, -2.191e-03, -5.089e-02, 9.094e-03, 3.493e-02, 3.266e-02, 4.843e-02, 6.992e-02, 1.177e-01, -2.361e-02, -3.117e-02, 1.798e-01, -2.280e-01, -5.832e-02)); + r += mul(s7_2, M4(8.313e-02, -3.986e-02, 4.971e-02, 6.207e-02, 9.079e-02, 9.608e-02, 1.472e-02, 2.360e-02, 3.350e-02, -7.497e-02, 5.004e-03, 2.574e-02, -3.489e-02, 1.469e-03, 5.901e-02, -7.264e-02)); + r += mul(s7_3, M4(1.947e-01, -1.781e-01, 3.008e-03, -1.291e-01, 7.364e-02, 6.604e-02, -6.041e-02, 1.262e-01, -1.353e-01, 1.625e-02, 3.219e-02, 2.061e-02, -6.350e-02, -1.169e-01, -8.725e-02, 7.904e-02)); + r += mul(s7_4, M4(-4.648e-02, -1.010e-02, 1.896e-02, 2.226e-01, 3.922e-02, 1.277e-01, 9.701e-02, -3.059e-02, 3.853e-03, -1.012e-01, 1.153e-02, -4.876e-02, -4.124e-02, 4.852e-02, -5.050e-02, 1.209e-01)); + r += mul(s7_5, M4(-1.581e-02, 3.426e-02, -9.987e-02, 6.714e-02, 1.979e-01, -1.942e-02, -1.218e-01, -4.330e-02, -9.028e-02, -1.265e-02, 9.580e-02, -4.611e-02, -5.395e-02, -1.992e-01, 7.735e-02, 5.766e-03)); + r += mul(s7_6, M4(1.670e-01, 1.696e-01, 6.262e-02, -4.667e-02, 3.841e-02, 6.407e-02, -8.766e-02, -5.676e-02, 8.497e-02, -3.010e-02, -3.407e-02, 2.600e-02, 5.656e-02, 6.309e-03, -2.136e-02, -9.589e-02)); + r += mul(s7_7, M4(1.196e-01, 6.606e-02, -1.223e-01, -2.459e-02, -4.519e-02, 6.432e-02, 6.285e-02, 1.156e-01, -1.890e-02, 8.361e-02, -1.170e-02, -1.684e-02, 1.223e-01, 2.185e-01, 4.274e-02, -2.461e-02)); + r += mul(s7_8, M4(1.232e-01, -5.513e-02, -1.628e-02, -3.733e-02, 1.027e-02, 3.609e-02, 6.349e-02, -9.551e-02, 1.832e-01, -9.725e-03, -1.850e-02, -5.244e-03, -6.073e-02, -1.743e-01, 1.350e-02, -2.928e-02)); + r += V4(-1.309e-02, -2.129e-02, 4.474e-03, 1.972e-02); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 5 +//!DESC conv4 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.403e-02, -3.617e-02, 6.562e-02, 5.876e-02, -6.178e-02, 1.725e-01, 1.095e-01, 8.162e-02, -1.465e-01, 2.106e-02, 2.242e-01, -8.096e-03, 5.256e-02, 4.765e-03, 3.636e-03, -1.354e-01)); + r += mul(s0_1, M4(-5.538e-02, -1.166e-01, -5.697e-02, -8.648e-02, -1.756e-01, 6.405e-02, -3.340e-01, 1.930e-01, 1.808e-01, -5.507e-01, -1.670e-01, 2.021e-02, -3.780e-02, -4.192e-02, 1.718e-03, -5.116e-02)); + r += mul(s0_2, M4(-2.934e-02, -9.119e-02, -1.676e-01, -1.473e-02, -8.223e-02, -3.447e-01, 1.063e-01, 6.842e-03, -1.147e-01, 1.334e-01, 2.362e-01, -1.720e-01, -2.902e-02, 4.821e-02, 1.806e-03, -6.014e-03)); + r += mul(s0_3, M4(1.224e-01, 9.122e-02, 2.484e-02, -1.232e-01, 8.127e-02, -2.319e-01, 1.683e-01, -2.717e-03, 3.447e-01, 4.864e-01, -3.165e-01, 1.025e-01, 1.120e-01, -9.687e-02, -1.213e-01, -3.732e-02)); + r += mul(s0_4, M4(1.194e-01, -1.310e-02, 4.169e-03, 4.984e-02, 1.139e-02, 3.027e-01, 8.156e-02, -1.198e-01, 2.414e-02, -1.129e-01, 2.602e-01, 4.357e-01, 1.966e-02, 4.241e-02, -1.662e-02, -1.402e-01)); + r += mul(s0_5, M4(7.734e-02, 7.766e-03, -1.040e-01, 4.922e-02, 7.198e-02, -3.357e-02, 7.957e-02, 2.508e-02, 2.455e-02, -2.451e-01, -8.346e-02, -9.900e-02, -4.279e-02, 6.600e-02, -1.474e-01, 6.493e-02)); + r += mul(s0_6, M4(-1.662e-02, 1.719e-01, 1.430e-01, -1.996e-01, -6.115e-02, 1.890e-01, 8.272e-02, 2.100e-01, -1.199e-01, -8.902e-04, -4.773e-02, -1.766e-02, 1.617e-01, 1.993e-03, 8.756e-02, -8.822e-04)); + r += mul(s0_7, M4(-3.232e-02, -9.500e-02, -5.929e-02, 8.015e-03, -5.343e-02, 1.559e-01, 7.296e-02, 1.787e-01, -1.910e-01, -4.287e-01, -2.358e-01, 1.208e-01, -5.813e-03, 9.995e-03, 4.872e-02, -5.149e-02)); + r += mul(s0_8, M4(-7.287e-02, 8.100e-02, 8.894e-03, -6.739e-02, 4.589e-02, -5.955e-02, 3.243e-02, 1.166e-01, 7.412e-02, 4.255e-01, -1.286e-01, 3.347e-01, 9.307e-02, -2.762e-02, -4.989e-02, -5.027e-02)); + r += mul(s1_0, M4(2.221e-03, -1.009e-02, 5.894e-02, -5.153e-03, -4.216e-02, 4.741e-02, -2.521e-02, 2.051e-02, 7.111e-02, 1.454e-02, -1.271e-01, 4.734e-02, 1.520e-01, -2.913e-02, -1.744e-02, -3.606e-04)); + r += mul(s1_1, M4(-7.256e-02, -5.916e-02, -1.794e-01, -9.403e-02, -3.887e-02, -1.682e-02, -1.125e-01, 1.256e-01, -1.047e-01, 1.052e-01, 3.306e-02, -1.035e-01, 3.830e-02, 6.017e-02, -1.455e-01, -3.673e-02)); + r += mul(s1_2, M4(-4.656e-02, 6.576e-02, -1.206e-02, 7.373e-02, 4.382e-02, -1.212e-01, 1.372e-02, 8.064e-02, -2.083e-02, -3.965e-02, 9.661e-02, 1.333e-01, -1.871e-02, 6.751e-02, -7.939e-02, -5.082e-03)); + r += mul(s1_3, M4(-4.448e-02, 1.242e-01, 1.363e-01, 1.097e-02, 3.059e-02, 1.664e-01, 6.723e-03, -2.766e-02, 2.248e-01, 3.909e-02, -3.693e-02, -2.665e-02, -6.694e-02, -6.712e-02, -2.997e-02, -5.125e-02)); + r += mul(s1_4, M4(-8.008e-02, -8.090e-02, 2.143e-02, 1.325e-01, 2.997e-02, 4.596e-03, 4.955e-02, -1.075e-01, -1.620e-01, -1.642e-02, -7.744e-02, 4.908e-02, 1.461e-01, 4.913e-02, 1.980e-01, 1.527e-03)); + r += mul(s1_5, M4(7.835e-02, 2.489e-03, -1.135e-01, -7.925e-03, -6.510e-02, -5.075e-03, -1.535e-01, -5.507e-02, -3.445e-03, -6.483e-02, -4.045e-02, -5.231e-02, 6.616e-02, -1.497e-01, 1.292e-01, -1.680e-01)); + r += mul(s1_6, M4(7.055e-02, 4.363e-04, -8.264e-02, 1.886e-01, 1.036e-01, -3.691e-02, 6.142e-02, -8.803e-02, 3.578e-02, 4.038e-02, 2.817e-02, 3.503e-02, 1.955e-02, 6.893e-02, -9.415e-02, -1.477e-01)); + r += mul(s1_7, M4(1.175e-01, -4.995e-02, 2.677e-02, 1.575e-01, -5.070e-02, -1.014e-01, -4.180e-02, 9.582e-02, -6.029e-02, -4.345e-03, 1.650e-01, -1.279e-01, -5.815e-02, 2.293e-01, -1.717e-01, -1.789e-03)); + r += mul(s1_8, M4(6.751e-02, 2.129e-02, -1.387e-01, 1.960e-01, 7.240e-02, -9.169e-02, 2.059e-02, -2.487e-02, 3.175e-02, 8.674e-02, 1.437e-02, 1.178e-01, 1.861e-01, 9.650e-02, 9.152e-02, 1.052e-01)); + r += mul(s2_0, M4(-1.147e-02, -1.573e-01, -1.140e-01, -1.091e-01, 5.245e-02, 1.415e-01, -1.954e-02, -1.658e-02, 8.472e-02, 5.175e-02, 1.571e-02, -6.254e-03, 3.042e-01, -7.161e-02, -2.180e-01, 1.224e-01)); + r += mul(s2_1, M4(-4.226e-02, 2.147e-01, 4.965e-02, 1.262e-02, 7.037e-02, 1.135e-01, 2.018e-02, -9.901e-03, 1.053e-01, 3.991e-02, -4.108e-02, 1.931e-01, -1.279e-01, -7.678e-02, -2.745e-01, -4.086e-02)); + r += mul(s2_2, M4(1.274e-01, 2.729e-02, -3.767e-02, -9.551e-02, -8.582e-04, 6.972e-02, 5.652e-02, -1.703e-01, -1.039e-01, 2.611e-02, 5.974e-02, 1.935e-03, 7.292e-02, 1.259e-01, -1.342e-01, 1.929e-01)); + r += mul(s2_3, M4(-6.665e-02, -7.866e-02, -8.065e-02, 1.293e-02, -8.966e-02, 2.920e-01, -1.036e-01, 1.502e-02, -5.053e-02, -8.296e-03, 5.614e-02, -3.914e-02, -4.724e-02, 1.787e-01, -1.154e-03, 1.690e-01)); + r += mul(s2_4, M4(6.822e-02, 1.516e-01, -3.617e-02, -4.529e-02, -1.694e-01, 5.198e-02, 1.686e-01, 2.174e-02, -2.447e-02, 6.691e-02, -5.546e-03, -2.035e-01, 6.614e-02, -5.093e-03, -4.064e-02, -1.165e-01)); + r += mul(s2_5, M4(4.233e-03, -1.801e-02, 4.971e-02, 1.083e-01, -1.115e-01, 2.789e-01, 2.396e-02, 9.253e-02, 1.171e-01, -2.170e-01, -5.590e-02, 1.451e-01, 1.054e-01, 4.272e-01, 7.070e-02, -1.768e-01)); + r += mul(s2_6, M4(-8.236e-02, -8.419e-02, 7.004e-02, -2.031e-02, 6.163e-03, -5.510e-02, 2.173e-01, -2.632e-03, -1.647e-01, 1.409e-01, -2.231e-01, 4.268e-02, -8.566e-02, 1.053e-01, 3.142e-02, -2.915e-02)); + r += mul(s2_7, M4(-2.966e-02, 5.465e-02, 6.451e-02, -2.482e-02, 1.275e-01, -1.012e-02, 2.244e-01, -5.186e-02, -1.035e-01, -3.406e-02, -1.420e-01, 1.346e-01, -1.446e-01, 6.941e-02, 1.040e-01, -2.175e-01)); + r += mul(s2_8, M4(-3.278e-02, -1.629e-01, -7.868e-02, 1.538e-02, -6.383e-02, 1.542e-01, 1.447e-02, -4.754e-02, 1.373e-01, 6.638e-02, -9.902e-02, 1.489e-01, -6.896e-02, 2.958e-01, 1.456e-01, -4.785e-02)); + r += mul(s3_0, M4(2.547e-01, 3.212e-01, 8.020e-02, -1.309e-01, 5.922e-02, 7.839e-02, -4.911e-02, 1.490e-01, -5.825e-02, 7.189e-02, -3.384e-03, -2.041e-02, -1.284e-03, -1.997e-01, -1.863e-01, -8.275e-02)); + r += mul(s3_1, M4(-1.870e-01, 4.164e-02, -3.392e-02, 1.332e-01, 5.288e-03, 9.235e-02, 1.344e-01, 1.175e-01, 5.099e-02, 2.085e-02, 9.926e-02, -2.592e-02, 1.802e-01, 1.656e-02, -3.266e-02, -7.870e-02)); + r += mul(s3_2, M4(2.632e-01, -3.499e-02, -8.815e-02, -3.241e-02, 1.468e-01, 1.008e-01, 9.987e-02, -1.061e-01, 7.046e-02, -7.794e-02, -9.205e-03, -6.827e-02, -6.186e-02, 9.601e-02, 1.487e-01, -5.550e-02)); + r += mul(s3_3, M4(-2.477e-01, 3.069e-02, -9.052e-03, -4.887e-02, -1.160e-02, -1.008e-01, -1.210e-01, 8.770e-03, 2.958e-02, 1.116e-01, -6.531e-02, -1.139e-01, -1.901e-02, -1.002e-01, 5.271e-02, 7.910e-02)); + r += mul(s3_4, M4(-3.314e-02, -1.590e-02, -3.390e-01, -4.169e-02, -1.809e-02, -3.746e-02, -1.133e-01, -1.072e-02, 2.649e-03, -5.970e-02, 1.974e-03, -9.189e-02, -6.000e-02, -3.674e-02, 8.258e-02, -3.241e-02)); + r += mul(s3_5, M4(4.051e-02, -2.999e-02, 3.019e-01, 2.686e-01, -3.148e-02, -2.528e-03, -4.993e-02, 1.105e-02, 3.181e-02, 3.027e-02, -1.154e-01, -1.808e-02, -5.615e-02, -6.097e-02, 4.423e-02, 1.191e-02)); + r += mul(s3_6, M4(-5.900e-02, 1.167e-01, -1.050e-01, 6.975e-02, -5.700e-02, 7.639e-02, -1.640e-01, 5.529e-02, -2.196e-02, -5.372e-02, -3.286e-03, 1.109e-03, 7.070e-02, 2.862e-02, -2.767e-03, 3.804e-03)); + r += mul(s3_7, M4(-2.711e-03, 1.331e-01, -2.052e-02, -7.003e-02, 7.249e-02, -2.045e-01, 9.754e-02, -5.843e-03, -1.166e-01, -1.128e-01, 6.700e-03, -8.543e-03, 7.056e-02, 5.545e-02, -1.505e-03, 1.847e-02)); + r += mul(s3_8, M4(1.887e-02, -1.313e-01, 2.085e-02, -1.108e-01, -2.634e-02, -1.174e-01, 7.497e-02, -9.914e-02, 3.341e-02, 6.734e-03, -3.777e-02, -8.734e-02, -1.343e-01, 2.762e-02, 4.527e-02, 7.153e-02)); + r += mul(s4_0, M4(-1.545e-02, -3.653e-02, -5.942e-02, 1.156e-01, 6.447e-03, -5.395e-02, -9.426e-03, 1.245e-01, 1.464e-01, 1.203e-01, 1.234e-02, 6.136e-02, 5.830e-02, -2.980e-02, -5.208e-02, -6.155e-02)); + r += mul(s4_1, M4(6.423e-02, 1.237e-01, 3.567e-03, 8.611e-02, -7.281e-02, -6.306e-02, 2.345e-02, 1.440e-01, 1.770e-01, 2.031e-01, -4.515e-02, -1.793e-01, 7.675e-02, -3.717e-02, 5.172e-02, 7.787e-02)); + r += mul(s4_2, M4(-1.312e-02, 1.646e-01, 1.046e-02, 2.364e-02, 7.445e-02, 9.217e-02, -5.421e-02, 3.565e-02, -3.234e-02, -3.305e-02, 1.478e-01, -6.713e-02, 4.497e-02, 7.324e-02, 1.659e-02, 7.569e-03)); + r += mul(s4_3, M4(-8.683e-02, 1.411e-01, -6.177e-02, 6.614e-03, 1.355e-01, -1.229e-01, -2.661e-04, 4.569e-02, 1.053e-01, -4.977e-03, 5.875e-02, -3.044e-03, -9.842e-02, 1.890e-02, 1.434e-02, 1.655e-02)); + r += mul(s4_4, M4(-9.536e-02, -1.763e-01, 7.815e-02, -1.333e-02, 7.132e-02, 5.331e-02, -7.482e-02, 1.781e-01, 7.652e-02, -1.080e-01, 1.546e-02, -5.917e-02, -2.365e-01, 7.391e-02, 1.376e-02, 5.754e-02)); + r += mul(s4_5, M4(-4.995e-03, -2.146e-02, 9.806e-02, 4.611e-02, -4.164e-02, -8.312e-02, -1.299e-01, 7.270e-02, -3.732e-02, 1.034e-01, 6.050e-02, -1.064e-02, -9.351e-02, 8.942e-02, -2.541e-01, -1.477e-01)); + r += mul(s4_6, M4(-1.657e-02, 8.071e-03, -2.339e-03, -1.180e-01, -1.259e-01, -4.578e-02, -7.240e-02, -9.815e-02, -3.630e-03, -1.096e-01, -3.503e-02, 3.489e-02, -1.277e-02, 1.032e-01, -1.663e-02, 1.283e-01)); + r += mul(s4_7, M4(1.148e-01, -1.073e-01, -5.350e-02, -6.558e-02, 1.551e-01, 5.418e-02, 5.600e-02, -3.793e-02, 2.760e-02, -9.009e-02, 1.656e-01, -1.353e-01, -7.874e-02, 9.518e-02, -2.130e-01, 2.211e-02)); + r += mul(s4_8, M4(1.467e-02, -1.723e-01, -1.542e-01, 3.568e-02, 1.502e-01, 1.887e-01, 4.048e-02, -9.308e-02, 9.895e-02, -6.491e-02, 3.288e-02, 3.030e-03, -1.132e-01, -6.092e-02, -1.132e-01, -8.899e-02)); + r += mul(s5_0, M4(1.955e-01, 8.898e-02, -1.181e-01, 2.091e-01, -2.980e-02, 9.983e-04, 6.224e-02, 1.254e-01, 1.760e-01, -7.738e-02, 1.133e-01, 3.441e-04, -7.540e-02, -3.831e-03, -9.871e-02, -1.331e-01)); + r += mul(s5_1, M4(1.465e-01, 1.291e-01, -1.122e-02, 1.248e-02, 2.605e-02, 6.992e-02, -2.890e-02, 6.967e-02, 1.604e-01, 4.109e-02, -1.681e-01, -1.727e-01, -6.260e-02, -5.841e-02, 7.631e-02, 1.554e-01)); + r += mul(s5_2, M4(-5.447e-02, -1.075e-02, 1.339e-01, 5.172e-02, -1.473e-01, -4.341e-02, -7.417e-03, -1.221e-01, 6.087e-02, -6.401e-03, -2.981e-02, 1.068e-02, 9.820e-02, -1.350e-01, 7.692e-02, -3.415e-02)); + r += mul(s5_3, M4(-8.800e-02, 2.183e-01, -1.861e-01, 8.092e-02, -1.644e-02, 5.692e-02, 5.083e-02, -3.447e-02, 7.326e-02, -4.875e-02, 4.948e-02, 9.255e-02, -8.272e-02, 1.329e-01, -2.294e-01, 5.548e-02)); + r += mul(s5_4, M4(1.760e-01, -1.564e-01, 1.384e-01, 7.514e-02, 1.817e-01, 2.840e-01, -3.442e-02, 7.496e-02, 2.412e-02, -1.839e-01, 2.331e-02, -6.661e-02, -1.343e-01, 1.361e-01, 5.822e-02, 1.591e-01)); + r += mul(s5_5, M4(-3.173e-02, 3.841e-02, 2.363e-01, -2.262e-02, -7.230e-02, -2.509e-01, 1.558e-01, 1.509e-02, 1.496e-02, -1.069e-01, -4.598e-02, 7.440e-02, 1.208e-01, -1.768e-01, 1.889e-03, -1.900e-03)); + r += mul(s5_6, M4(2.696e-01, 1.620e-01, -8.768e-02, -2.647e-01, 1.032e-02, -1.206e-01, 2.903e-02, -2.450e-01, 1.581e-01, 1.221e-01, -3.151e-02, -1.756e-02, 1.783e-02, -5.833e-02, -1.955e-03, -3.332e-02)); + r += mul(s5_7, M4(-1.619e-02, -1.299e-01, -3.026e-02, -1.959e-01, 2.244e-01, 4.688e-02, -3.125e-02, -1.485e-02, -1.218e-01, 1.568e-01, -3.067e-01, -1.595e-01, 1.118e-01, -1.011e-03, 5.350e-02, -2.963e-03)); + r += mul(s5_8, M4(1.541e-01, -4.289e-02, -1.134e-01, -3.869e-02, 4.947e-03, -1.265e-01, 6.246e-02, -1.231e-01, -8.599e-02, -9.106e-02, -7.921e-03, -2.395e-01, -4.238e-02, 1.077e-01, -1.513e-02, -1.894e-02)); + r += mul(s6_0, M4(2.234e-02, -4.553e-02, 1.398e-01, 1.044e-01, 6.616e-02, -6.898e-02, -6.531e-02, -9.138e-02, 6.607e-03, 1.845e-01, 1.200e-01, 8.614e-03, -5.389e-02, -7.037e-02, -6.011e-02, -7.417e-02)); + r += mul(s6_1, M4(1.756e-01, 7.443e-02, 7.561e-03, -6.561e-02, 1.822e-01, 3.157e-01, 1.129e-01, -9.867e-02, 9.461e-02, -1.430e-02, 4.898e-02, -1.162e-02, -1.654e-01, -1.253e-01, 5.611e-02, 1.337e-02)); + r += mul(s6_2, M4(-1.100e-01, -4.007e-02, -1.145e-01, -1.359e-02, 4.556e-02, -1.674e-01, -2.648e-02, 3.427e-02, -4.420e-02, 1.863e-01, -2.892e-02, 9.546e-02, 5.401e-02, 8.755e-02, -7.285e-02, 8.890e-02)); + r += mul(s6_3, M4(1.080e-02, 8.277e-02, -4.599e-03, 5.186e-02, -3.957e-02, -3.767e-02, -2.234e-02, 1.051e-01, -1.412e-01, -4.216e-02, 1.428e-01, 4.970e-02, 4.656e-02, 4.265e-02, -9.892e-02, -1.250e-01)); + r += mul(s6_4, M4(-1.043e-01, -1.390e-01, 1.606e-01, -9.595e-03, -1.813e-01, -7.386e-02, 2.139e-02, 1.030e-01, -9.393e-02, -1.933e-01, -3.866e-02, -7.151e-02, -1.906e-01, 4.585e-02, -7.522e-02, -7.019e-02)); + r += mul(s6_5, M4(3.033e-01, 2.124e-01, -2.764e-01, 2.947e-02, -1.150e-01, 1.872e-01, -1.574e-01, 5.572e-03, -1.556e-03, 2.202e-01, -1.994e-01, -2.116e-02, -3.251e-02, -1.068e-01, -1.175e-01, 6.481e-02)); + r += mul(s6_6, M4(-9.177e-02, 1.527e-03, 2.124e-01, -9.220e-02, 1.814e-01, -9.471e-02, -3.132e-02, 1.653e-02, 9.774e-02, -6.811e-02, 1.044e-01, 1.755e-01, 5.873e-02, 7.586e-02, -3.951e-02, 5.926e-02)); + r += mul(s6_7, M4(-1.676e-02, 2.192e-01, -5.895e-02, 8.951e-02, -3.314e-02, -1.216e-01, 2.604e-01, -6.996e-02, -6.390e-02, 1.267e-01, 1.534e-01, 1.274e-02, -4.624e-02, -1.175e-01, 1.763e-01, -1.708e-03)); + r += mul(s6_8, M4(-1.782e-01, 5.198e-02, -1.841e-01, -8.423e-02, 2.654e-02, 1.502e-02, 7.709e-02, 1.042e-01, -1.621e-01, 7.380e-02, 3.352e-01, -2.183e-01, 7.550e-02, -1.664e-01, 5.871e-02, 6.341e-02)); + r += mul(s7_0, M4(2.787e-02, -1.106e-01, 1.183e-01, 9.263e-02, -1.182e-01, -2.032e-02, 2.971e-02, -2.570e-02, 7.528e-03, 1.364e-01, -1.684e-02, 1.154e-02, 1.148e-02, -1.973e-01, 1.085e-02, 8.788e-02)); + r += mul(s7_1, M4(6.795e-02, -6.052e-02, -4.651e-02, -3.076e-02, -3.892e-02, 9.608e-02, 1.010e-02, 2.401e-02, 9.185e-02, 6.770e-02, -2.561e-02, 3.853e-02, 7.608e-02, -1.193e-01, -6.173e-02, -1.462e-02)); + r += mul(s7_2, M4(-2.358e-01, -5.858e-02, 8.549e-02, -6.835e-02, -1.159e-01, -1.814e-01, 6.819e-02, -9.286e-03, 7.542e-03, 3.463e-02, -7.895e-02, 1.015e-02, 7.713e-03, 2.471e-01, -7.800e-02, -5.346e-04)); + r += mul(s7_3, M4(3.231e-02, -1.111e-02, 1.412e-02, 2.273e-02, -1.642e-02, -5.983e-04, -8.450e-04, -1.484e-01, 1.165e-01, -7.660e-02, -2.169e-02, 1.688e-03, -7.418e-02, -1.375e-01, 6.957e-02, 1.065e-01)); + r += mul(s7_4, M4(-9.288e-03, 7.980e-02, -2.395e-02, 1.716e-02, 4.346e-02, -1.573e-02, -5.085e-02, 8.665e-02, 4.425e-02, -2.547e-02, 2.819e-02, -4.830e-02, -1.122e-01, 1.073e-01, -8.217e-02, -1.054e-01)); + r += mul(s7_5, M4(3.645e-02, -1.349e-01, -9.587e-02, 4.121e-02, 1.509e-01, 1.081e-01, -1.608e-02, -2.710e-03, -1.228e-02, -9.014e-02, -6.461e-02, -6.745e-02, -8.735e-03, 1.363e-01, 1.944e-02, -1.469e-01)); + r += mul(s7_6, M4(4.604e-02, 6.706e-02, 4.920e-02, -6.415e-02, -2.627e-02, 2.023e-02, -7.547e-02, 9.757e-03, 6.469e-03, -3.236e-02, 1.086e-01, 1.333e-01, -4.419e-02, 1.301e-01, -9.470e-02, 1.160e-01)); + r += mul(s7_7, M4(-1.934e-02, -8.758e-02, -2.389e-02, 4.202e-02, -9.827e-02, 2.160e-03, 4.170e-02, 1.612e-01, -1.579e-01, 6.141e-04, -6.358e-02, 1.494e-01, 8.995e-03, 3.656e-03, -1.656e-02, -1.924e-02)); + r += mul(s7_8, M4(-8.968e-02, -5.243e-02, -1.008e-01, 2.100e-02, 6.757e-02, 3.647e-02, 9.779e-02, -2.094e-02, 6.224e-02, 1.040e-01, 7.031e-02, 4.442e-02, 4.028e-02, -1.658e-01, 1.233e-01, -4.432e-02)); + r += V4(1.948e-04, -1.211e-02, -1.102e-02, -4.060e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(7.666e-02, 9.032e-02, -2.201e-02, -6.146e-02, 2.565e-02, -1.249e-01, -2.453e-01, 1.563e-01, 3.906e-02, -1.881e-01, -1.185e-01, -6.974e-02, -7.445e-02, 2.590e-02, 1.370e-01, 9.587e-02)); + r += mul(s0_1, M4(1.485e-02, -5.406e-02, 1.198e-01, -3.525e-02, -2.749e-01, 7.934e-02, -8.831e-02, -1.235e-01, 2.601e-02, -3.620e-01, 4.006e-01, 6.251e-02, 1.061e-01, -4.198e-02, -1.020e-01, 7.027e-02)); + r += mul(s0_2, M4(8.552e-02, -8.702e-02, -1.050e-01, 8.812e-02, -8.084e-02, -1.278e-01, -5.957e-02, -7.390e-02, 9.906e-02, 3.698e-03, -2.657e-02, 1.046e-01, 3.750e-02, -4.238e-02, -6.149e-02, 9.148e-02)); + r += mul(s0_3, M4(-2.718e-02, 2.005e-01, 3.921e-02, -1.183e-01, 2.641e-02, -3.971e-01, 3.685e-01, 2.269e-01, -3.809e-01, 1.650e-02, -1.960e-02, 9.178e-02, -4.059e-02, 1.761e-01, 6.480e-03, -6.381e-02)); + r += mul(s0_4, M4(-9.580e-02, 7.215e-02, -2.382e-02, -1.245e-01, 1.164e-01, 2.302e-01, -2.095e-01, 7.592e-03, -9.482e-03, 6.913e-02, -2.947e-01, 8.693e-02, -7.960e-02, -1.149e-01, -9.717e-02, 1.169e-01)); + r += mul(s0_5, M4(6.824e-02, -3.740e-02, -1.515e-02, 1.262e-02, -2.050e-01, 3.256e-02, 7.667e-02, -9.143e-03, 3.358e-01, 1.383e-01, 1.427e-02, 3.629e-02, 1.372e-01, -1.802e-01, 1.532e-02, -9.561e-02)); + r += mul(s0_6, M4(-6.141e-02, -3.787e-02, -2.865e-02, 1.904e-02, -5.788e-02, -1.007e-01, -1.655e-01, -2.909e-02, 4.200e-01, -7.197e-02, 1.794e-01, -1.511e-01, -2.651e-02, 3.723e-02, 1.975e-02, -5.730e-02)); + r += mul(s0_7, M4(-2.352e-01, 5.721e-03, -6.843e-03, -1.756e-02, -2.442e-01, 1.376e-01, 1.743e-01, -1.157e-01, 8.620e-02, -3.409e-01, 2.808e-01, 1.574e-02, -2.012e-01, -1.460e-01, 1.815e-02, -3.646e-02)); + r += mul(s0_8, M4(-7.671e-02, -7.516e-03, 7.112e-03, 1.314e-01, -1.211e-01, 2.989e-02, -1.114e-01, 8.195e-02, 1.931e-01, -3.090e-01, 2.236e-01, -1.479e-01, -1.198e-01, 1.394e-01, 6.682e-02, 2.823e-02)); + r += mul(s1_0, M4(2.086e-03, 7.605e-02, 2.401e-02, 4.246e-02, 1.589e-02, 1.838e-01, 3.515e-02, 8.184e-02, -3.673e-02, -9.430e-02, 2.514e-02, 6.057e-02, 2.841e-01, -6.646e-02, -1.239e-01, 2.357e-01)); + r += mul(s1_1, M4(8.877e-02, -9.418e-02, 3.641e-02, 1.261e-01, -6.069e-02, -8.376e-02, -1.186e-01, 1.055e-01, -1.299e-01, -9.798e-02, -3.721e-03, 1.341e-01, -5.935e-02, 1.766e-01, -1.779e-02, 2.536e-01)); + r += mul(s1_2, M4(-1.483e-02, -7.602e-02, 9.671e-02, 2.947e-02, 2.799e-02, -8.214e-02, 1.456e-02, -3.581e-02, 6.073e-02, -4.709e-02, 3.617e-02, 1.108e-01, -3.386e-02, 2.226e-01, -8.220e-02, 8.424e-02)); + r += mul(s1_3, M4(1.184e-01, -1.050e-01, 1.171e-01, -1.030e-01, -2.208e-02, 4.890e-02, 9.540e-03, 2.225e-02, -4.529e-02, -1.523e-01, 7.643e-02, 2.764e-02, -1.046e-01, 4.842e-02, -9.415e-02, -3.510e-02)); + r += mul(s1_4, M4(1.693e-01, 1.102e-01, 3.030e-02, -8.288e-02, 1.280e-01, -6.049e-02, -6.572e-02, 5.382e-02, 7.693e-02, -4.614e-02, 5.881e-02, -5.057e-02, -7.652e-02, 6.112e-02, 4.165e-02, 1.633e-01)); + r += mul(s1_5, M4(2.528e-01, -2.931e-03, 5.261e-02, -9.514e-02, 6.023e-02, -1.958e-01, 1.570e-01, -7.941e-02, 1.139e-01, 1.269e-01, 8.440e-02, -2.689e-02, -9.776e-02, 2.662e-01, 1.035e-01, -3.723e-02)); + r += mul(s1_6, M4(-1.351e-02, -1.382e-01, -3.532e-02, -2.055e-02, -1.894e-02, 2.036e-01, 1.207e-01, 3.210e-02, 3.033e-02, 2.978e-02, -7.349e-02, -2.898e-02, 4.164e-02, -7.748e-04, -4.873e-03, -1.019e-01)); + r += mul(s1_7, M4(1.637e-01, 6.672e-02, -3.445e-02, -1.153e-01, -7.795e-02, 2.590e-02, 8.038e-03, -7.181e-03, 7.782e-02, 1.270e-01, -8.117e-02, 1.058e-01, -1.331e-01, -1.909e-02, -1.573e-01, -1.063e-01)); + r += mul(s1_8, M4(2.150e-02, 1.225e-01, -5.032e-02, 4.249e-02, -4.711e-02, -2.849e-02, 2.106e-02, 6.465e-02, -1.970e-01, -7.572e-02, -5.393e-02, -1.200e-02, 2.129e-02, 1.024e-01, 1.226e-01, -3.303e-03)); + r += mul(s2_0, M4(-5.706e-02, 1.852e-02, -1.208e-01, 3.757e-02, 2.709e-01, 9.974e-02, 3.542e-02, -3.886e-02, -1.522e-01, 1.936e-02, -7.223e-03, 4.352e-02, 1.106e-01, -1.664e-01, 6.247e-02, 5.651e-02)); + r += mul(s2_1, M4(-4.986e-02, 1.104e-01, -2.070e-02, 1.312e-02, -2.313e-02, 9.140e-02, -1.958e-02, 6.337e-02, 1.447e-01, -7.056e-02, -6.767e-02, -7.895e-03, -3.117e-01, -1.126e-01, 5.749e-02, -1.313e-01)); + r += mul(s2_2, M4(1.639e-01, 6.634e-02, -8.794e-02, -1.775e-02, 1.660e-01, 1.250e-01, -2.245e-01, -7.732e-03, 4.920e-02, 4.479e-02, 2.068e-01, -5.093e-02, 1.976e-01, -8.390e-02, 2.176e-01, -1.004e-01)); + r += mul(s2_3, M4(-3.828e-03, 1.570e-01, 1.870e-02, 5.240e-02, -4.134e-02, -1.812e-01, -7.654e-02, 4.044e-02, -1.988e-01, 7.861e-02, -1.080e-01, 2.211e-01, -1.910e-01, 1.124e-01, -1.051e-01, 9.256e-02)); + r += mul(s2_4, M4(3.312e-03, -3.115e-01, -3.936e-02, -1.400e-01, -1.562e-01, 7.303e-02, 3.164e-02, -2.597e-01, 4.709e-02, 8.292e-02, -2.773e-01, -7.151e-02, -1.589e-02, -7.293e-02, -1.918e-01, -3.138e-01)); + r += mul(s2_5, M4(1.104e-01, 3.507e-02, -3.472e-02, 1.219e-01, 7.315e-02, -4.458e-02, 1.653e-02, -1.176e-01, 2.262e-01, 1.725e-01, -3.199e-02, 2.294e-01, -4.454e-01, 7.487e-02, 7.350e-02, 6.679e-02)); + r += mul(s2_6, M4(-2.130e-01, 1.176e-02, 3.890e-02, -2.579e-02, 9.297e-02, 1.060e-01, 8.130e-02, 1.334e-01, -1.224e-02, -6.851e-02, 1.174e-01, 5.566e-02, 1.802e-01, -2.152e-02, -1.306e-01, 3.551e-02)); + r += mul(s2_7, M4(-1.007e-01, -1.406e-02, 7.112e-02, 7.262e-02, -6.888e-02, -6.141e-02, 6.369e-02, 1.721e-02, -2.350e-02, 1.393e-01, -4.095e-02, -8.129e-02, -1.207e-01, 1.489e-01, -3.054e-03, 5.149e-02)); + r += mul(s2_8, M4(-6.981e-02, 4.287e-02, -5.023e-02, -5.744e-02, 9.653e-02, 1.017e-01, -1.565e-01, -4.122e-02, 2.834e-02, -5.081e-02, -6.699e-02, -1.666e-01, 2.145e-01, 2.819e-01, 1.927e-01, -7.054e-02)); + r += mul(s3_0, M4(1.677e-01, 1.473e-01, 3.507e-01, -4.333e-02, -8.797e-03, -6.254e-02, -5.991e-02, -6.359e-02, 2.555e-02, -5.722e-02, -1.022e-01, 2.396e-02, -1.831e-03, -9.545e-02, 2.552e-02, 9.494e-02)); + r += mul(s3_1, M4(3.567e-01, 1.140e-01, -3.827e-02, 2.076e-02, -1.553e-02, 1.175e-02, 1.953e-02, -4.080e-02, 3.106e-02, -2.799e-02, 1.818e-02, 6.441e-02, 7.938e-02, 3.981e-02, 7.466e-02, 3.005e-02)); + r += mul(s3_2, M4(5.776e-02, 8.585e-02, 1.620e-01, 7.642e-03, -2.849e-03, -1.101e-02, -1.019e-01, 3.324e-02, -4.136e-02, -7.884e-02, -3.288e-02, 2.354e-02, -3.060e-02, -3.389e-02, 2.350e-02, 1.188e-01)); + r += mul(s3_3, M4(-1.462e-01, 7.524e-02, 1.006e-01, 1.034e-01, -1.036e-01, -1.128e-01, -1.691e-02, 1.681e-02, -2.843e-02, 1.467e-01, 2.567e-02, 6.833e-04, -9.970e-03, 9.909e-03, 2.123e-02, -1.002e-01)); + r += mul(s3_4, M4(6.614e-03, -3.403e-02, -1.475e-01, 9.263e-02, 1.236e-01, -5.917e-02, 4.470e-02, -5.797e-02, -9.955e-02, 1.586e-01, -5.972e-02, 7.191e-03, 1.374e-01, 6.408e-03, -8.691e-02, -5.325e-02)); + r += mul(s3_5, M4(1.359e-01, -1.171e-01, -1.458e-01, -1.435e-01, -8.667e-02, -9.621e-02, 5.608e-02, -2.996e-02, 9.120e-02, 6.453e-02, -8.623e-03, -1.160e-01, 3.232e-02, 9.819e-02, 8.440e-02, -1.131e-02)); + r += mul(s3_6, M4(1.741e-01, -1.437e-01, -1.568e-01, -8.545e-02, -7.874e-02, 4.162e-02, 5.367e-03, 4.751e-02, 1.575e-03, 6.930e-02, -1.165e-01, -4.856e-02, 2.079e-03, -2.181e-02, -4.974e-04, -1.268e-01)); + r += mul(s3_7, M4(1.503e-01, -2.117e-02, 1.944e-02, 1.516e-01, 5.005e-04, -9.844e-03, 8.157e-02, -5.608e-02, -1.814e-03, 1.025e-02, 1.097e-01, 2.607e-02, 1.773e-01, -7.290e-02, 1.030e-01, -3.631e-03)); + r += mul(s3_8, M4(3.999e-02, -4.659e-02, -5.558e-02, -7.875e-02, 2.126e-02, 7.585e-02, 1.009e-02, 1.504e-01, 6.764e-02, 3.493e-02, 1.973e-02, 1.869e-02, -6.713e-02, -1.065e-02, 3.534e-02, -1.395e-02)); + r += mul(s4_0, M4(-1.134e-01, 8.116e-02, -3.079e-02, 4.865e-02, 3.037e-02, -8.456e-03, 2.037e-02, 4.035e-02, 8.870e-02, -5.168e-02, -7.097e-02, 1.182e-02, 8.179e-02, 6.262e-02, -1.896e-02, -1.180e-02)); + r += mul(s4_1, M4(-7.539e-02, -1.209e-02, 5.066e-02, 5.975e-02, 3.251e-02, 1.350e-01, 1.070e-01, -1.179e-01, 5.979e-02, -1.042e-01, 3.319e-02, 6.937e-02, 1.298e-02, 1.025e-01, 4.439e-02, 1.572e-02)); + r += mul(s4_2, M4(1.715e-01, 1.622e-02, -1.270e-02, -2.385e-02, 9.077e-02, 3.933e-02, 3.160e-02, -3.490e-02, -8.716e-02, 2.980e-02, 1.200e-02, 1.393e-02, -1.437e-02, -1.855e-02, 6.809e-02, 5.919e-02)); + r += mul(s4_3, M4(1.477e-03, 1.297e-02, 1.144e-01, -6.711e-02, 5.587e-02, 2.710e-02, -5.822e-03, 4.206e-02, 9.215e-02, -1.196e-01, -7.597e-02, -1.183e-01, -1.901e-01, -3.141e-02, -2.777e-02, 1.525e-02)); + r += mul(s4_4, M4(-4.174e-02, -3.953e-02, 4.881e-02, 8.356e-02, 1.196e-01, 8.192e-02, 6.524e-03, 3.029e-01, -1.256e-01, -2.574e-01, -2.211e-02, -1.309e-01, 1.115e-02, 8.013e-02, -1.392e-01, 4.997e-02)); + r += mul(s4_5, M4(1.356e-01, -1.733e-02, 1.330e-02, -6.480e-02, 3.517e-02, 2.987e-02, -4.573e-03, -2.763e-02, 1.162e-01, 9.809e-03, 3.198e-02, -3.287e-02, 1.137e-02, 7.550e-02, -1.550e-02, 3.757e-02)); + r += mul(s4_6, M4(-4.280e-02, 1.226e-01, -2.720e-02, -1.766e-01, 1.696e-01, -3.785e-02, -1.127e-01, -7.823e-02, 8.688e-02, -4.526e-02, 4.855e-02, 1.790e-02, 3.076e-02, -1.223e-01, 7.112e-02, -1.042e-02)); + r += mul(s4_7, M4(-1.149e-01, -1.131e-01, 1.137e-02, -1.700e-02, 7.438e-02, -5.237e-02, -2.700e-02, -1.385e-01, -5.454e-02, 2.996e-02, 1.073e-01, -1.845e-02, -2.428e-02, -2.637e-01, -3.791e-02, -9.931e-03)); + r += mul(s4_8, M4(1.571e-02, 4.287e-02, 1.423e-03, -4.407e-02, -3.704e-02, 1.618e-02, -2.138e-02, -1.316e-02, 3.961e-02, -8.012e-03, 3.706e-03, -4.285e-03, 9.088e-02, -5.418e-02, 3.478e-02, -4.119e-02)); + r += mul(s5_0, M4(4.162e-02, -1.331e-01, -5.190e-02, 5.715e-02, 2.217e-02, -1.053e-01, 2.508e-02, 1.156e-02, -2.425e-01, 1.921e-01, 8.250e-03, 1.186e-01, -3.488e-02, 6.156e-03, -1.557e-01, 9.502e-02)); + r += mul(s5_1, M4(3.882e-02, -2.317e-01, 6.298e-02, 8.951e-02, -6.931e-02, -3.340e-02, 8.133e-03, -6.065e-02, -7.284e-02, 2.075e-01, 1.225e-01, 1.334e-01, -7.015e-02, 1.558e-01, -1.325e-01, 2.827e-02)); + r += mul(s5_2, M4(-1.140e-01, -8.167e-03, 8.685e-02, -1.204e-01, 5.963e-02, -5.059e-02, -2.958e-02, 2.435e-02, -3.522e-02, 6.776e-02, 4.747e-02, 1.390e-02, -7.056e-02, 4.729e-02, -3.997e-02, 1.950e-02)); + r += mul(s5_3, M4(-2.847e-01, 8.763e-03, 3.029e-03, -8.931e-02, 4.601e-02, -1.008e-01, 1.617e-01, -1.121e-01, -3.437e-02, -2.914e-01, -6.525e-02, -2.407e-02, -1.468e-01, -1.163e-01, -2.765e-02, 1.279e-01)); + r += mul(s5_4, M4(-3.532e-01, -5.370e-01, -2.293e-01, -1.509e-01, -1.627e-01, -1.320e-01, 8.390e-02, 2.549e-01, 6.796e-02, -1.001e-01, 1.203e-01, -1.304e-01, 2.196e-01, -1.370e-01, -4.373e-02, 1.462e-01)); + r += mul(s5_5, M4(6.871e-02, 1.861e-01, 3.017e-02, -1.267e-01, -5.801e-02, -4.496e-02, 7.975e-03, -2.299e-03, 4.447e-02, 7.482e-02, 1.188e-01, -4.312e-02, 2.429e-02, -3.091e-02, -7.634e-02, -6.494e-03)); + r += mul(s5_6, M4(6.759e-02, 2.705e-01, 7.172e-02, -1.114e-01, 2.264e-02, -3.490e-02, 1.427e-01, -1.086e-01, -7.622e-02, -1.221e-01, 6.731e-02, 3.163e-02, -5.137e-03, 6.783e-02, 6.506e-02, 1.396e-02)); + r += mul(s5_7, M4(-2.853e-02, -1.336e-02, -2.271e-01, -2.608e-01, 8.926e-03, -5.704e-02, -1.149e-01, -5.734e-02, 2.139e-01, 1.250e-01, -3.221e-02, 7.016e-02, 3.786e-02, -4.232e-02, 2.596e-02, -1.358e-02)); + r += mul(s5_8, M4(-1.766e-01, 9.220e-02, -1.463e-01, 9.172e-02, -3.084e-02, 4.158e-03, 3.663e-02, 1.392e-01, 2.581e-02, 4.948e-02, 7.296e-02, 6.705e-03, 9.721e-02, 5.589e-02, 9.313e-02, 5.676e-02)); + r += mul(s6_0, M4(1.389e-01, -5.061e-02, -1.088e-01, -5.382e-02, -1.983e-01, -1.810e-01, -5.466e-02, 1.263e-01, 1.119e-01, -5.409e-02, 1.548e-01, -1.145e-02, -1.095e-01, 8.266e-02, -1.538e-01, -8.467e-02)); + r += mul(s6_1, M4(-1.058e-01, 2.745e-03, -1.151e-01, 8.218e-02, 1.352e-01, 2.115e-02, 1.762e-01, -2.165e-01, -8.160e-02, 9.388e-02, -9.487e-02, -3.961e-02, -2.485e-01, 8.816e-02, 1.005e-01, -3.592e-02)); + r += mul(s6_2, M4(-3.547e-02, 1.115e-01, -9.055e-02, 1.703e-02, -4.863e-02, -8.964e-03, 8.184e-02, 1.259e-01, 1.277e-01, 7.002e-02, 8.729e-02, -5.827e-02, -1.313e-01, -1.059e-02, 5.656e-02, -6.988e-02)); + r += mul(s6_3, M4(1.333e-01, 2.085e-02, -8.655e-02, 8.451e-03, 5.484e-02, 4.948e-02, -2.670e-02, 1.193e-01, -7.518e-02, -2.204e-02, -3.060e-01, -3.716e-02, -2.628e-01, -6.542e-02, 1.778e-02, 5.259e-02)); + r += mul(s6_4, M4(-1.002e-01, 5.889e-02, -1.626e-02, 6.611e-02, 4.457e-02, -1.635e-01, 1.607e-01, -4.766e-02, 1.387e-01, 7.029e-02, -2.287e-02, -1.344e-01, -5.337e-02, 3.612e-01, -3.124e-02, 6.267e-03)); + r += mul(s6_5, M4(-3.155e-02, -1.333e-01, -2.880e-01, -8.074e-02, 1.202e-02, -1.220e-01, 7.272e-02, -8.412e-02, 3.824e-02, -2.659e-02, -1.101e-01, -1.081e-01, 9.377e-02, 7.997e-02, 7.275e-02, -3.931e-02)); + r += mul(s6_6, M4(2.572e-01, -1.077e-01, -1.644e-02, 1.457e-01, 5.132e-02, 9.947e-03, 4.456e-02, -6.646e-02, 6.961e-02, -8.575e-02, 1.134e-03, -1.151e-01, 1.080e-01, -8.681e-02, -1.298e-01, -1.321e-02)); + r += mul(s6_7, M4(3.594e-02, 3.047e-02, -2.546e-02, 6.505e-02, 1.297e-02, 7.161e-02, 7.782e-03, 7.071e-02, -2.103e-01, 1.381e-01, 6.068e-02, 4.778e-03, 2.234e-01, 5.741e-02, -3.634e-02, 1.066e-01)); + r += mul(s6_8, M4(1.112e-01, 6.533e-02, 9.034e-02, 6.432e-03, -6.278e-02, -5.334e-02, -4.704e-02, -8.631e-02, 7.447e-02, -2.760e-02, 3.754e-02, -6.832e-03, 1.714e-01, -6.705e-02, 1.092e-01, 4.948e-03)); + r += mul(s7_0, M4(3.159e-02, 1.240e-01, -5.362e-02, 1.381e-02, 8.764e-02, 5.201e-02, -1.034e-01, -5.419e-02, -4.280e-02, -4.836e-02, -1.431e-02, 8.510e-03, 3.429e-02, -3.161e-03, 1.769e-01, -2.308e-02)); + r += mul(s7_1, M4(1.021e-01, -1.192e-02, 1.610e-02, 6.216e-02, -4.984e-02, 7.992e-02, 3.379e-02, -1.361e-01, -4.569e-02, 2.165e-02, 1.517e-02, -9.636e-02, 1.179e-01, 3.497e-02, -4.487e-03, -9.354e-02)); + r += mul(s7_2, M4(-8.054e-02, 1.085e-01, -1.880e-01, -1.110e-02, 7.848e-02, 1.598e-01, -1.752e-02, -3.300e-02, -2.045e-02, -5.159e-04, -1.080e-01, 9.817e-03, 2.359e-01, -1.174e-01, 1.120e-02, 3.895e-02)); + r += mul(s7_3, M4(-1.928e-01, -1.955e-01, -6.519e-02, -5.186e-02, -6.512e-02, 8.111e-02, 2.676e-02, 1.932e-01, -4.831e-02, -7.808e-02, -1.497e-01, -1.236e-01, -2.210e-02, 2.161e-02, -1.781e-02, 8.890e-02)); + r += mul(s7_4, M4(-6.295e-02, -3.097e-02, 9.677e-02, -5.350e-02, -7.605e-02, 7.948e-02, 1.186e-02, 1.029e-02, 6.487e-02, 8.562e-02, -2.551e-02, -7.505e-02, 5.046e-02, -1.407e-01, 6.605e-02, 2.090e-02)); + r += mul(s7_5, M4(-7.145e-02, -2.834e-01, -2.368e-01, -6.555e-03, 1.479e-02, -6.847e-03, -7.866e-02, -6.069e-02, -2.575e-02, -3.028e-02, -1.240e-01, 5.744e-03, 7.801e-02, -2.100e-01, -5.000e-02, 1.188e-01)); + r += mul(s7_6, M4(1.041e-01, 8.486e-02, 3.132e-02, 7.366e-02, -2.167e-03, -3.460e-02, 6.584e-02, -6.718e-03, 2.570e-02, 7.664e-02, -5.134e-02, -4.992e-02, -3.093e-02, 1.045e-01, -1.632e-02, -6.519e-02)); + r += mul(s7_7, M4(2.307e-01, -9.116e-02, 6.069e-02, 1.480e-01, -3.872e-02, 1.230e-02, -1.795e-02, -4.544e-02, 2.310e-02, -1.826e-01, -4.322e-04, -5.085e-02, -1.604e-01, -1.015e-01, 1.450e-01, 8.899e-02)); + r += mul(s7_8, M4(-2.918e-02, -1.560e-02, 7.724e-02, 2.655e-02, 1.305e-02, -3.557e-02, 1.672e-02, -1.096e-01, -1.017e-01, -8.922e-02, -1.382e-02, -4.060e-02, -2.747e-02, 1.763e-01, -5.824e-02, 7.754e-02)); + r += V4(5.032e-02, -4.577e-02, 1.268e-02, -1.043e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(5.596e-02, 6.943e-02, -9.947e-02, 6.927e-03, -2.689e-01, 1.489e-01, 1.433e-01, -8.788e-02, -2.760e-01, -1.684e-01, 3.374e-01, -1.597e-01, 1.809e-01, -4.356e-02, -3.220e-02, 3.343e-02)); + r += mul(s0_1, M4(2.311e-03, 6.202e-02, 5.539e-02, -2.832e-02, -1.119e-01, 2.494e-03, 8.241e-02, 4.263e-02, 5.947e-02, 5.792e-01, 7.347e-03, -5.233e-01, -2.318e-02, -3.264e-02, 9.149e-02, 1.913e-01)); + r += mul(s0_2, M4(7.400e-02, 2.716e-02, 8.377e-02, -5.535e-02, -1.384e-01, -2.959e-02, -2.358e-02, 4.064e-02, -3.272e-01, -1.154e-01, -1.282e-02, 1.296e-01, 1.348e-01, 7.577e-02, 5.781e-02, -8.975e-02)); + r += mul(s0_3, M4(-1.644e-02, -3.264e-02, 1.552e-01, -2.369e-02, -1.587e-01, -6.883e-02, -2.633e-01, 1.207e-01, -3.596e-02, -4.935e-02, 1.061e-01, 1.171e-01, -9.021e-02, -1.341e-01, 9.733e-02, 4.458e-02)); + r += mul(s0_4, M4(-7.350e-02, 1.915e-02, 3.980e-02, -1.548e-01, 2.778e-02, 6.400e-02, -2.679e-02, 2.614e-01, -5.313e-02, -3.638e-01, 3.060e-03, -1.356e-01, 8.916e-02, -6.320e-02, 1.008e-01, -1.072e-01)); + r += mul(s0_5, M4(-1.026e-01, 3.062e-02, 1.216e-01, 2.672e-02, -7.390e-02, 5.774e-02, 3.043e-01, -3.872e-02, 9.893e-02, -1.651e-01, -4.124e-01, -5.980e-02, 1.284e-01, 9.903e-02, -1.100e-01, 5.260e-02)); + r += mul(s0_6, M4(-1.704e-02, 2.078e-02, -4.136e-02, -1.215e-01, 7.739e-02, 3.368e-02, 2.705e-01, 1.507e-01, 6.234e-02, 8.265e-02, 8.168e-02, -2.070e-01, -4.027e-02, -1.179e-01, -2.789e-02, 2.922e-02)); + r += mul(s0_7, M4(-5.078e-02, -1.269e-01, 7.594e-02, -2.399e-02, -2.337e-01, -2.242e-01, -1.177e-01, -3.980e-02, 9.262e-02, 1.368e-01, 3.213e-01, 1.154e-02, 2.914e-02, 1.221e-02, 4.595e-02, -3.833e-02)); + r += mul(s0_8, M4(8.054e-03, 2.570e-02, -2.123e-02, -5.568e-02, -1.061e-02, 4.955e-02, 3.951e-02, -4.474e-02, 2.179e-01, 1.455e-01, 2.215e-01, -4.518e-02, 2.380e-02, -5.217e-02, 8.083e-02, 1.411e-01)); + r += mul(s1_0, M4(-1.332e-02, 1.908e-01, -1.272e-01, -8.897e-02, 6.893e-02, 4.808e-02, -1.235e-02, 1.406e-01, 9.182e-02, 1.199e-01, -6.087e-02, -2.790e-02, 7.823e-02, -7.443e-02, -2.908e-02, 7.824e-02)); + r += mul(s1_1, M4(-5.931e-02, -2.106e-01, -7.093e-03, 3.143e-01, -7.278e-03, -8.448e-02, -6.706e-02, 7.873e-02, -1.372e-01, 1.503e-02, 4.273e-03, 2.550e-01, 7.066e-02, 1.156e-02, 2.972e-02, 6.368e-02)); + r += mul(s1_2, M4(-1.457e-01, -1.103e-01, -5.727e-02, 1.685e-01, 8.429e-02, -4.365e-02, 9.143e-02, 4.470e-02, 7.318e-02, -6.442e-02, 5.918e-02, 1.633e-01, -1.041e-01, -1.029e-01, 1.642e-01, 8.357e-02)); + r += mul(s1_3, M4(-2.876e-02, -6.630e-02, -6.576e-02, 9.062e-02, -1.580e-01, -5.584e-02, -8.636e-02, 9.241e-02, -3.216e-02, 1.613e-01, -4.132e-02, 2.281e-02, 2.734e-02, 2.553e-02, 1.083e-01, -5.546e-02)); + r += mul(s1_4, M4(9.239e-02, -2.471e-02, 5.862e-02, -5.237e-02, -5.395e-02, 1.035e-01, -1.003e-01, 2.782e-02, -1.126e-02, -2.389e-01, -3.320e-02, -7.841e-02, 5.396e-02, 2.841e-02, 9.477e-02, -2.390e-01)); + r += mul(s1_5, M4(-1.362e-01, 3.032e-02, -7.620e-02, 1.458e-01, -1.179e-01, 1.034e-02, 1.727e-02, -4.245e-02, -7.083e-02, 7.070e-02, -4.095e-02, 5.322e-02, -1.408e-02, 3.720e-02, 5.996e-02, -4.981e-02)); + r += mul(s1_6, M4(1.119e-01, 8.073e-02, 5.773e-02, 2.301e-02, 5.561e-02, 2.241e-03, -7.863e-02, 5.573e-02, -7.112e-02, -9.685e-02, -1.772e-01, -3.446e-02, -1.049e-01, 1.026e-02, 1.957e-02, -1.111e-01)); + r += mul(s1_7, M4(1.788e-01, -2.716e-02, 1.019e-01, 2.178e-02, 1.246e-01, 1.107e-01, 1.697e-01, 1.167e-01, 3.394e-02, -1.176e-01, -2.129e-02, -3.989e-02, 2.066e-01, 8.348e-02, -3.966e-02, 1.921e-02)); + r += mul(s1_8, M4(1.814e-01, 1.194e-02, -3.687e-03, -2.048e-02, -5.610e-02, -1.853e-02, 4.549e-02, 6.164e-04, -5.818e-02, 5.473e-02, -1.331e-02, -1.554e-02, 7.833e-02, 9.653e-02, -4.808e-03, -1.758e-02)); + r += mul(s2_0, M4(6.169e-02, 5.514e-02, 3.773e-02, -6.746e-02, 6.614e-02, -8.601e-03, 1.393e-01, 1.328e-01, -3.773e-02, -9.326e-02, 7.894e-02, -3.562e-02, 4.718e-02, 8.964e-02, 8.342e-02, -2.218e-02)); + r += mul(s2_1, M4(2.138e-02, 4.727e-05, -7.793e-02, -1.449e-02, -9.726e-02, 2.636e-01, -1.008e-01, -1.360e-01, 9.358e-02, -2.191e-01, -1.389e-01, 1.010e-01, 1.833e-01, -2.234e-02, 8.280e-02, 1.099e-01)); + r += mul(s2_2, M4(-1.759e-02, 2.214e-02, -9.699e-02, -7.946e-02, 3.458e-01, 2.307e-01, 1.965e-02, -5.948e-02, -1.102e-01, 1.136e-01, 6.058e-02, -2.060e-01, -1.455e-01, 2.091e-01, -1.234e-01, -2.595e-02)); + r += mul(s2_3, M4(-3.891e-02, -4.991e-02, 7.644e-02, 8.675e-03, -1.237e-01, -1.090e-01, -6.923e-02, 1.674e-01, 6.834e-02, -1.947e-03, -2.001e-01, -2.587e-01, -6.223e-02, -1.757e-02, 4.044e-02, -2.019e-01)); + r += mul(s2_4, M4(-1.240e-01, -1.421e-01, -1.120e-02, 1.355e-01, -1.043e-01, 1.205e-01, -6.896e-02, 8.419e-02, 1.083e-01, -3.586e-01, -2.787e-01, 3.128e-01, 2.695e-01, 6.336e-02, 1.496e-01, 7.207e-02)); + r += mul(s2_5, M4(-1.943e-02, 4.023e-02, -9.246e-02, -2.339e-02, -1.184e-01, 1.636e-01, -1.181e-01, -6.135e-02, 1.293e-01, 7.349e-02, -3.035e-02, 1.341e-03, 1.462e-01, -1.693e-01, -6.412e-02, 1.047e-01)); + r += mul(s2_6, M4(1.248e-01, -4.353e-02, -6.607e-02, 3.145e-02, -1.563e-01, -1.372e-01, 1.860e-05, -4.642e-02, 1.864e-02, 7.264e-02, -9.642e-02, 1.876e-01, -4.477e-02, 1.522e-01, 1.609e-01, 1.611e-01)); + r += mul(s2_7, M4(6.286e-02, 1.131e-01, 4.975e-02, -6.677e-02, -2.744e-01, -4.851e-03, 1.142e-01, -8.230e-02, 1.155e-01, 1.004e-03, 3.373e-02, 1.243e-01, -1.140e-02, 3.190e-01, 1.543e-01, -2.304e-01)); + r += mul(s2_8, M4(1.952e-02, -3.838e-02, -1.306e-01, -2.731e-04, 1.470e-01, 2.674e-03, -2.177e-01, 2.223e-02, -2.050e-02, 2.408e-01, 6.113e-02, -1.239e-01, 1.999e-01, -7.711e-02, -1.954e-01, -2.232e-01)); + r += mul(s3_0, M4(1.559e-01, 3.444e-01, 2.037e-01, -1.465e-01, 1.257e-03, -9.577e-02, -6.696e-02, -1.273e-02, 4.017e-02, 3.491e-02, -8.291e-04, 1.119e-01, -8.614e-02, 7.718e-02, 2.744e-02, 5.497e-02)); + r += mul(s3_1, M4(-4.794e-02, -1.063e-01, -2.436e-02, 7.873e-02, 5.152e-02, 1.919e-02, 1.182e-01, 1.987e-01, -3.528e-02, -3.674e-02, -6.329e-02, 6.244e-02, 3.650e-02, -1.095e-01, -6.021e-02, 9.880e-02)); + r += mul(s3_2, M4(8.782e-02, -2.972e-02, -5.703e-02, 1.306e-01, 8.701e-02, -8.616e-02, 4.235e-03, -4.952e-02, -2.192e-02, 2.296e-02, -4.155e-02, 4.513e-03, -8.387e-02, 9.915e-02, 6.844e-02, 4.860e-02)); + r += mul(s3_3, M4(1.609e-01, 1.368e-01, 6.485e-02, 7.794e-02, -2.713e-03, -1.406e-02, -3.511e-03, 4.021e-02, -1.304e-01, 1.322e-02, 4.628e-02, -8.888e-02, -2.384e-02, -7.348e-03, 8.772e-02, 1.494e-02)); + r += mul(s3_4, M4(-1.010e-01, -9.351e-02, -1.047e-01, 2.298e-02, 1.029e-01, 9.090e-02, 6.553e-03, -7.160e-02, -8.154e-05, 2.276e-02, 9.211e-03, 1.375e-02, 1.216e-02, -5.997e-02, -2.742e-02, 9.009e-02)); + r += mul(s3_5, M4(1.007e-01, 3.867e-01, -5.350e-02, 4.043e-02, -1.333e-01, 2.413e-02, -5.320e-02, -5.714e-02, -3.149e-02, 1.844e-03, 4.310e-02, -1.117e-01, -2.069e-02, 2.987e-03, -1.693e-02, 4.311e-02)); + r += mul(s3_6, M4(1.419e-01, 5.698e-02, 5.687e-03, 6.534e-02, 8.299e-05, 9.204e-02, 6.563e-02, -2.972e-02, 2.281e-01, -1.643e-02, 4.670e-02, 1.436e-01, -1.159e-01, -5.028e-02, 1.157e-01, -7.019e-02)); + r += mul(s3_7, M4(-4.644e-02, -4.332e-02, -2.600e-01, 6.519e-02, -5.878e-02, 1.854e-02, 1.260e-01, -2.437e-02, 6.990e-02, 3.990e-02, 2.418e-02, 8.748e-02, 2.327e-02, 5.167e-03, -5.180e-02, -1.586e-01)); + r += mul(s3_8, M4(-7.066e-03, 1.938e-01, 2.519e-02, -5.551e-03, 1.238e-01, -1.177e-01, -8.184e-02, -6.361e-02, 7.968e-02, 3.828e-02, -2.906e-02, -2.967e-02, 7.054e-02, -6.567e-02, -4.154e-02, -8.246e-02)); + r += mul(s4_0, M4(-5.226e-03, -5.506e-02, 1.084e-01, 6.104e-02, -7.084e-02, -1.369e-02, 5.770e-02, 9.369e-03, 7.137e-02, 3.812e-02, -1.163e-01, -1.801e-01, -2.820e-02, -4.895e-02, -3.198e-02, -1.199e-01)); + r += mul(s4_1, M4(-7.016e-03, -1.376e-01, 1.139e-01, 3.146e-02, 4.089e-02, 1.179e-01, -7.702e-02, 7.841e-02, -9.047e-03, 5.866e-02, 1.310e-02, -1.178e-01, -1.314e-03, 9.423e-02, -1.281e-01, 8.136e-02)); + r += mul(s4_2, M4(-2.164e-02, -1.157e-01, 2.769e-03, -1.675e-01, 2.046e-02, -3.547e-02, -5.441e-02, 1.125e-01, 6.445e-02, 9.884e-02, 5.804e-02, -5.950e-02, -7.545e-02, 1.221e-02, -1.022e-01, 1.132e-02)); + r += mul(s4_3, M4(-5.272e-02, 9.168e-02, 7.058e-02, -1.049e-01, -1.167e-01, 1.090e-01, 3.349e-02, 1.393e-02, -2.392e-02, -2.323e-02, -7.106e-02, 5.207e-02, 1.703e-01, -1.519e-02, 1.283e-02, 2.979e-02)); + r += mul(s4_4, M4(1.874e-01, -1.966e-03, -5.432e-02, 5.125e-02, -2.042e-01, -2.495e-02, 5.208e-02, 1.425e-02, 7.137e-02, -1.373e-01, 5.823e-02, 1.683e-01, 1.286e-01, -1.747e-01, -9.492e-02, -4.643e-02)); + r += mul(s4_5, M4(1.174e-01, 9.680e-02, -3.946e-02, -5.568e-02, 1.585e-01, -1.490e-01, 5.321e-02, -2.085e-01, -1.114e-02, 2.950e-02, -2.756e-02, -1.348e-01, -1.992e-01, -3.037e-01, -9.906e-03, 1.090e-01)); + r += mul(s4_6, M4(1.406e-02, 1.203e-01, 8.815e-02, -7.641e-02, 1.344e-01, -8.767e-02, -1.154e-01, -7.974e-02, 9.987e-03, -6.403e-02, -8.446e-02, -3.506e-02, 5.380e-02, 6.236e-02, 7.203e-04, -3.519e-02)); + r += mul(s4_7, M4(1.950e-01, 3.658e-02, 6.341e-03, 7.395e-03, -7.013e-03, -1.984e-01, 9.983e-02, 2.809e-02, 1.391e-01, 9.638e-03, -1.054e-01, 3.263e-02, 5.608e-02, 6.084e-02, -4.368e-02, -1.041e-01)); + r += mul(s4_8, M4(-8.957e-03, 3.743e-02, -3.755e-02, -3.630e-03, -5.561e-02, -9.400e-02, 2.827e-02, 9.386e-02, -8.381e-02, -1.216e-02, 1.436e-01, 1.519e-02, -8.019e-02, -1.944e-01, -6.004e-02, -7.721e-02)); + r += mul(s5_0, M4(-3.770e-02, -3.560e-02, 1.079e-01, 1.237e-02, 5.456e-03, -3.473e-02, 7.107e-02, 1.288e-01, -1.319e-01, -2.477e-02, -2.558e-02, -2.240e-01, -2.778e-02, -5.581e-02, 2.795e-02, -1.127e-01)); + r += mul(s5_1, M4(4.309e-02, -7.101e-02, -1.136e-01, -4.001e-02, -5.329e-02, 2.163e-02, -1.911e-01, 1.211e-01, 3.649e-01, 1.931e-01, -1.604e-01, -1.174e-01, -1.129e-01, -7.409e-02, 8.196e-02, -1.679e-02)); + r += mul(s5_2, M4(1.002e-02, 4.060e-03, 2.965e-02, 6.396e-02, -2.982e-02, -2.065e-02, -5.606e-02, 6.198e-02, -2.529e-01, -8.005e-02, -2.032e-01, 1.956e-01, 2.000e-01, 8.002e-02, 1.950e-01, -8.213e-02)); + r += mul(s5_3, M4(-5.168e-02, -4.015e-02, -1.103e-01, -2.920e-01, 7.194e-02, 4.356e-02, 5.759e-02, -4.769e-04, -2.601e-01, -2.538e-02, -2.720e-02, -9.741e-02, -1.163e-02, 4.238e-02, 1.093e-02, 1.766e-01)); + r += mul(s5_4, M4(9.714e-02, -2.569e-01, -2.301e-01, 2.634e-01, 5.650e-02, 2.310e-01, 1.422e-01, -1.343e-01, 1.473e-01, 1.136e-01, -2.179e-02, 1.065e-02, -3.685e-01, -1.466e-01, 2.465e-01, -1.020e-02)); + r += mul(s5_5, M4(-2.277e-01, 3.379e-02, 1.500e-01, -1.561e-01, 8.026e-02, -1.333e-01, 1.360e-01, 1.892e-03, -2.089e-01, -1.216e-01, -2.138e-01, 3.848e-02, -2.357e-02, -5.525e-02, 1.345e-01, -6.481e-02)); + r += mul(s5_6, M4(-4.744e-01, 1.842e-01, -1.170e-01, 6.383e-02, 4.721e-02, 8.683e-02, 1.298e-02, -1.428e-02, -1.004e-01, 7.408e-02, 7.070e-02, 5.249e-02, -8.792e-02, -8.696e-02, 2.164e-02, -1.972e-02)); + r += mul(s5_7, M4(6.505e-02, 7.645e-02, 5.747e-02, -2.255e-02, 7.953e-02, 3.267e-02, -4.402e-02, 6.919e-02, 3.438e-02, 1.884e-01, -1.650e-01, 9.716e-03, -2.982e-02, -1.122e-02, 2.533e-01, -6.158e-02)); + r += mul(s5_8, M4(-3.201e-02, 4.400e-02, -3.622e-02, 1.647e-02, 1.066e-01, 5.696e-02, -9.074e-02, 9.751e-02, -2.343e-01, 7.295e-02, -1.021e-01, -1.173e-02, -4.465e-02, -3.944e-02, -8.525e-02, -4.887e-02)); + r += mul(s6_0, M4(9.043e-02, 7.508e-03, 2.129e-02, 1.540e-02, -7.932e-02, -1.045e-01, -1.140e-01, -1.687e-01, 1.802e-01, 7.851e-02, -7.967e-02, -7.167e-03, 6.469e-02, -8.154e-02, 4.053e-02, 5.567e-02)); + r += mul(s6_1, M4(2.664e-02, 1.391e-01, 3.487e-02, -4.714e-02, -1.321e-01, 9.727e-02, -6.896e-02, -8.919e-02, -1.120e-01, -8.822e-02, 1.392e-01, -3.856e-02, 2.445e-02, 1.520e-01, 5.066e-02, -7.421e-02)); + r += mul(s6_2, M4(3.721e-02, -1.419e-01, -2.924e-02, 1.156e-01, 7.611e-03, -9.886e-02, -1.088e-01, 1.078e-01, -1.667e-01, -4.557e-03, -1.189e-03, -5.344e-02, -2.037e-01, -1.039e-01, -5.445e-02, -1.639e-02)); + r += mul(s6_3, M4(-8.285e-02, 7.651e-02, 6.793e-02, -1.327e-01, -7.476e-02, 2.957e-02, -1.402e-01, -1.597e-02, 1.087e-01, 4.267e-02, -4.866e-02, 7.123e-02, 1.043e-01, 7.398e-02, 6.662e-02, 2.380e-01)); + r += mul(s6_4, M4(2.692e-02, -1.144e-02, -1.297e-01, 4.209e-02, -2.075e-02, -2.428e-01, 1.093e-01, -4.059e-03, 8.527e-02, -3.677e-02, -2.378e-01, 2.416e-01, 3.793e-02, 5.192e-02, -1.743e-01, 1.241e-01)); + r += mul(s6_5, M4(-6.434e-02, 4.554e-03, 5.535e-02, 1.197e-01, 1.461e-01, -2.247e-01, 2.250e-02, 1.630e-01, 1.435e-01, -2.037e-01, -1.038e-01, 3.018e-01, -1.497e-01, 2.416e-02, -1.329e-01, -1.305e-01)); + r += mul(s6_6, M4(1.234e-02, 2.975e-02, -1.131e-01, -9.281e-02, -1.997e-01, 8.254e-02, -4.436e-02, 2.195e-01, 2.740e-02, -1.035e-02, 7.146e-02, 4.567e-02, 9.093e-03, 1.457e-01, 1.219e-01, 6.470e-02)); + r += mul(s6_7, M4(-1.102e-01, 1.520e-01, 2.849e-02, 4.548e-02, -8.167e-02, -2.549e-02, 1.165e-01, 1.373e-02, -5.793e-02, -1.113e-01, 1.568e-01, 9.372e-02, -1.171e-01, 1.456e-01, -9.427e-02, -9.116e-02)); + r += mul(s6_8, M4(6.940e-03, -1.677e-01, -2.775e-01, -3.968e-02, -8.314e-02, 4.127e-02, -1.911e-02, -6.754e-02, -1.979e-01, -1.001e-02, 1.990e-01, 5.028e-03, 1.673e-02, 4.765e-02, -9.003e-02, 4.037e-02)); + r += mul(s7_0, M4(-1.145e-01, 8.301e-02, 6.208e-02, 2.215e-01, 9.340e-02, 5.217e-02, -2.014e-02, -1.154e-01, 1.706e-01, -7.559e-02, -3.903e-02, -7.485e-02, 3.039e-02, -1.909e-02, 3.608e-02, -6.160e-02)); + r += mul(s7_1, M4(1.332e-01, 1.916e-01, -3.478e-02, 9.917e-02, -4.658e-02, -7.133e-03, -2.925e-02, -1.005e-01, 8.696e-02, -1.323e-01, 6.163e-02, -1.092e-01, 5.049e-02, -9.551e-02, 2.854e-02, -2.305e-02)); + r += mul(s7_2, M4(1.011e-01, 1.017e-01, -3.755e-02, 1.079e-01, 6.972e-03, 1.535e-02, -2.212e-02, 6.117e-02, -7.020e-02, -7.368e-03, -1.125e-01, -3.364e-03, 6.300e-02, 6.487e-02, -1.301e-02, -2.883e-02)); + r += mul(s7_3, M4(-1.024e-01, -5.268e-03, 4.721e-03, -1.492e-01, 3.963e-03, 1.960e-02, -1.616e-01, 5.027e-02, -1.413e-01, -2.592e-02, 4.603e-02, -9.958e-02, 1.733e-02, -3.088e-03, -3.237e-02, 3.956e-02)); + r += mul(s7_4, M4(2.329e-02, -1.743e-01, 2.547e-02, -4.663e-02, -4.411e-02, -1.693e-01, 1.284e-01, -1.596e-02, 9.503e-03, -6.803e-02, -7.773e-02, 7.189e-02, 2.373e-01, 1.594e-03, 9.512e-03, 6.362e-02)); + r += mul(s7_5, M4(7.467e-02, -9.632e-03, 9.902e-03, 2.912e-03, 1.204e-01, 4.616e-02, 4.934e-04, 1.086e-01, -5.761e-02, -5.369e-02, 1.120e-01, 7.251e-02, 1.033e-01, 1.897e-01, -2.853e-02, -2.763e-01)); + r += mul(s7_6, M4(-6.446e-02, -1.460e-02, 2.759e-02, -5.637e-03, -1.906e-01, 1.099e-03, 5.191e-02, 7.905e-02, 3.978e-02, 1.765e-02, -2.394e-02, 3.328e-02, 6.826e-03, -1.233e-01, -1.859e-02, 3.114e-02)); + r += mul(s7_7, M4(-4.130e-02, 1.134e-01, 7.295e-02, 6.430e-02, 1.268e-01, 1.275e-01, 1.018e-01, 5.018e-02, 6.976e-04, -6.946e-02, -7.873e-02, -3.524e-02, -1.080e-02, 2.427e-02, 1.715e-01, 3.197e-02)); + r += mul(s7_8, M4(-6.846e-02, -2.906e-02, -1.030e-01, -8.272e-02, -1.521e-01, 8.494e-02, 8.006e-02, -1.111e-02, -3.839e-02, 5.480e-02, 1.092e-01, 7.521e-02, 2.335e-01, -1.159e-01, 1.500e-02, 4.809e-02)); + r += V4(-6.811e-02, 2.117e-02, 1.429e-02, 1.018e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-8.859e-02, 6.322e-02, 2.170e-02, -5.152e-02, -9.958e-02, -5.578e-02, -3.615e-02, -3.355e-02, 8.025e-02, -4.163e-01, 1.121e-01, -7.176e-02, 8.557e-02, -6.191e-02, -8.471e-03, 1.507e-01)); + r += mul(s0_1, M4(-1.701e-02, 6.147e-02, -2.650e-02, -1.173e-01, 2.083e-01, 2.300e-01, -2.547e-01, 9.541e-02, 2.725e-01, -1.345e-01, -7.233e-02, 5.820e-02, 2.792e-02, 1.222e-02, -6.244e-02, 3.862e-03)); + r += mul(s0_2, M4(2.715e-02, 3.736e-02, 2.085e-02, -7.133e-02, 1.404e-01, -6.124e-02, -9.407e-02, 9.857e-02, -9.245e-02, -3.528e-01, 2.955e-02, 1.487e-01, 5.531e-02, 6.103e-04, 1.288e-02, 7.338e-02)); + r += mul(s0_3, M4(1.545e-02, -9.382e-02, -1.221e-01, 5.337e-02, 1.660e-01, 1.326e-01, 6.217e-02, -1.993e-01, -8.731e-04, -1.082e-01, -3.228e-01, 3.264e-01, -1.631e-01, 1.135e-01, -6.737e-02, -5.544e-02)); + r += mul(s0_4, M4(-3.348e-02, -3.130e-02, -3.545e-02, -5.740e-02, -1.497e-02, -2.188e-01, 1.266e-01, 1.900e-01, -1.525e-01, -3.757e-01, -9.728e-02, 1.389e-01, -2.877e-02, -1.662e-01, -1.350e-01, 1.802e-01)); + r += mul(s0_5, M4(-1.022e-01, -5.672e-03, -6.475e-02, -9.060e-03, -1.336e-01, -2.171e-02, 2.962e-02, -2.329e-01, 6.908e-02, -4.969e-02, -2.102e-01, 8.120e-03, 1.004e-01, 2.172e-01, 1.249e-01, 5.419e-02)); + r += mul(s0_6, M4(1.206e-01, 6.503e-02, -1.735e-02, 4.117e-02, -3.794e-02, 5.942e-02, 7.644e-02, -2.022e-01, 2.359e-01, 1.387e-01, -2.564e-01, 2.318e-01, -5.847e-03, -3.742e-02, -1.891e-01, 1.058e-01)); + r += mul(s0_7, M4(8.511e-02, 9.170e-02, -1.099e-01, 1.121e-01, -5.193e-02, 8.655e-02, 1.825e-01, 1.038e-01, 3.536e-02, 2.280e-01, 9.794e-02, 1.558e-01, -2.327e-02, -7.911e-02, 5.903e-02, -1.061e-01)); + r += mul(s0_8, M4(-1.947e-02, -3.460e-02, 1.847e-01, -5.716e-02, 8.408e-02, 2.013e-01, -1.249e-01, -8.687e-02, -4.326e-01, 2.425e-01, -2.433e-01, -1.076e-01, 1.519e-01, -1.366e-02, 6.007e-02, -6.044e-02)); + r += mul(s1_0, M4(-6.492e-02, -2.177e-02, -7.928e-03, 5.827e-02, -1.468e-02, -8.457e-02, -9.106e-02, -6.499e-02, 7.837e-02, 3.092e-03, 2.215e-02, -1.439e-01, 1.451e-01, -1.505e-01, 1.376e-02, -4.341e-02)); + r += mul(s1_1, M4(-1.178e-01, 1.866e-01, 9.888e-02, -9.040e-02, -1.063e-03, 1.315e-01, -4.355e-02, 1.449e-02, 3.112e-02, -4.436e-02, -2.143e-02, -3.597e-02, 1.361e-02, -9.821e-02, -1.438e-01, -1.918e-02)); + r += mul(s1_2, M4(1.723e-01, 9.304e-02, -5.218e-02, 1.128e-01, -2.047e-02, -3.860e-02, -1.294e-01, 5.198e-02, -2.544e-02, -5.171e-02, 5.576e-02, 2.431e-02, 2.018e-03, 1.655e-01, 1.084e-01, -7.961e-02)); + r += mul(s1_3, M4(1.431e-01, 1.264e-01, 2.663e-01, -7.861e-02, -2.184e-01, 2.635e-02, -8.562e-02, -8.998e-02, -5.756e-02, -1.606e-01, -1.756e-02, -5.559e-03, 9.406e-03, 1.182e-01, -3.550e-01, 4.964e-02)); + r += mul(s1_4, M4(1.833e-01, 4.676e-03, 3.887e-02, 1.008e-01, 3.551e-02, 7.158e-02, 9.204e-02, -1.118e-01, -1.294e-01, -6.548e-02, -8.796e-02, -2.444e-02, -2.510e-02, -2.399e-01, -2.330e-01, 1.147e-01)); + r += mul(s1_5, M4(6.889e-02, -1.060e-03, -1.414e-01, 5.237e-02, 2.064e-02, -8.606e-03, 1.036e-01, 2.518e-02, -1.206e-02, 1.614e-02, -8.587e-02, -7.272e-02, 5.863e-02, -9.660e-03, -6.705e-02, -1.294e-01)); + r += mul(s1_6, M4(7.851e-02, -8.792e-02, 1.285e-01, -1.097e-01, 6.968e-02, 3.942e-02, 8.202e-02, 1.824e-02, 6.559e-02, 2.335e-02, 1.814e-02, 6.063e-02, 6.711e-03, 1.874e-04, -1.354e-01, 1.427e-01)); + r += mul(s1_7, M4(-7.320e-02, 2.411e-01, 4.008e-02, -1.093e-01, 7.410e-03, -8.033e-04, 5.956e-02, -1.279e-01, 8.079e-02, -6.280e-03, -6.561e-02, 1.949e-02, 1.092e-02, 2.485e-02, 5.006e-03, 8.929e-03)); + r += mul(s1_8, M4(4.080e-02, -4.657e-02, -1.572e-01, -1.018e-01, 1.786e-02, 3.402e-02, -1.280e-01, 4.746e-02, 1.041e-01, -6.042e-02, 6.023e-02, -2.181e-02, 1.002e-01, 1.014e-01, 5.495e-02, -8.713e-02)); + r += mul(s2_0, M4(1.541e-02, -1.639e-02, 2.137e-02, -8.870e-02, -2.421e-01, 1.216e-01, 8.481e-03, -1.146e-01, -8.107e-03, -1.252e-01, 1.507e-02, -1.246e-02, 1.972e-01, -2.484e-01, 1.043e-02, 9.379e-02)); + r += mul(s2_1, M4(7.847e-02, 2.798e-02, 4.175e-02, 4.999e-02, 3.078e-02, 6.905e-02, -1.325e-01, -7.661e-02, -2.406e-01, 4.476e-02, 1.869e-02, -7.977e-02, 4.665e-02, -1.554e-01, -5.259e-02, -2.868e-02)); + r += mul(s2_2, M4(-8.034e-03, -5.185e-02, -3.441e-02, 6.072e-02, -1.140e-01, 1.360e-01, -5.936e-02, -2.627e-01, 5.693e-02, -1.242e-01, -1.696e-01, -9.322e-02, -1.642e-01, 3.444e-02, -8.812e-02, -2.324e-01)); + r += mul(s2_3, M4(-5.840e-02, 4.646e-02, -1.123e-01, 1.928e-02, -5.682e-02, 1.747e-01, -1.014e-02, -3.330e-03, -8.661e-02, 1.380e-01, 1.094e-01, 2.438e-01, 1.066e-01, 1.375e-01, -6.315e-02, 1.153e-02)); + r += mul(s2_4, M4(2.190e-01, -4.420e-02, 4.606e-02, -4.466e-02, 2.064e-01, 4.502e-02, -9.403e-03, 3.311e-02, -8.319e-02, 5.644e-01, -6.043e-02, 5.784e-02, -1.040e-01, 3.443e-02, -1.610e-02, -3.490e-01)); + r += mul(s2_5, M4(-1.043e-01, -2.458e-01, 5.993e-02, 1.761e-02, 8.658e-02, 1.438e-02, 2.610e-02, 9.675e-02, 4.919e-02, 2.783e-01, -3.483e-02, -6.773e-02, 2.021e-01, -5.194e-02, 2.805e-01, -3.834e-02)); + r += mul(s2_6, M4(-2.257e-03, -8.335e-03, -8.261e-02, 3.041e-02, -7.001e-02, 1.016e-02, -2.078e-01, -1.798e-01, -3.439e-02, 2.334e-01, 2.715e-01, 3.900e-02, 8.569e-02, -1.696e-01, -3.142e-01, 3.344e-02)); + r += mul(s2_7, M4(7.281e-02, 6.357e-02, 1.869e-03, 4.939e-02, -1.440e-01, 4.230e-02, -8.573e-02, -1.929e-01, 8.670e-02, -1.610e-01, 2.438e-01, 6.542e-02, 1.191e-01, -1.737e-01, 5.330e-02, -1.193e-01)); + r += mul(s2_8, M4(-8.114e-02, 2.759e-02, -6.027e-02, 8.285e-02, 1.792e-01, -8.401e-02, 2.750e-02, 7.538e-02, -4.844e-02, -4.237e-01, 1.403e-01, -6.548e-02, 1.480e-01, -1.369e-01, -1.154e-02, -1.547e-01)); + r += mul(s3_0, M4(-5.719e-02, 7.109e-02, -1.870e-01, 9.910e-02, -8.484e-03, 4.225e-02, 1.670e-01, -2.912e-02, 4.867e-02, -1.222e-01, 9.972e-02, -8.255e-02, 2.865e-02, -7.476e-02, 8.196e-02, -6.590e-02)); + r += mul(s3_1, M4(2.877e-02, 1.382e-01, 2.887e-02, -1.182e-01, 1.169e-01, -7.158e-02, -5.751e-02, -4.480e-02, 4.399e-02, -1.564e-01, -5.860e-02, -1.704e-02, 5.989e-02, -6.261e-02, -6.685e-02, 6.785e-02)); + r += mul(s3_2, M4(2.990e-01, -1.112e-01, -5.059e-02, 3.883e-02, -1.041e-01, 1.087e-01, 9.495e-02, 7.279e-02, -1.640e-02, -1.135e-01, -9.370e-02, -1.178e-01, 6.239e-02, 1.673e-03, -1.302e-01, -9.523e-02)); + r += mul(s3_3, M4(2.498e-01, 1.568e-01, 3.136e-01, -8.231e-02, 1.195e-02, 3.503e-02, -1.552e-01, -1.291e-02, -6.553e-02, 1.039e-01, 5.600e-02, 7.192e-02, -6.076e-02, 1.011e-01, 1.472e-01, -7.687e-02)); + r += mul(s3_4, M4(-1.014e-01, 6.270e-03, -5.076e-02, -3.396e-01, -7.819e-02, -1.129e-01, -3.489e-02, -1.763e-02, 3.223e-02, 1.190e-01, -4.928e-02, -1.744e-01, 4.950e-02, 1.974e-02, 8.423e-02, 6.390e-02)); + r += mul(s3_5, M4(1.391e-01, -9.050e-02, -1.123e-01, -9.988e-02, -9.684e-02, -1.058e-01, 5.984e-02, 8.850e-03, 1.291e-01, -6.571e-02, -9.918e-03, -3.292e-02, -9.868e-02, -2.736e-02, 2.082e-02, -7.922e-02)); + r += mul(s3_6, M4(-1.762e-02, -3.843e-02, 8.312e-02, 2.341e-02, 8.817e-02, 3.820e-03, -5.261e-02, 3.950e-02, -3.782e-04, 5.334e-02, 8.513e-02, 1.353e-02, 1.302e-02, -7.066e-02, 5.351e-03, 7.396e-02)); + r += mul(s3_7, M4(2.824e-02, 1.439e-01, 5.632e-02, 1.106e-01, 5.794e-02, -2.101e-02, 1.550e-01, -1.765e-02, -5.208e-02, -4.870e-02, -1.031e-01, -2.989e-02, 3.582e-02, 1.657e-02, -6.794e-02, 8.592e-02)); + r += mul(s3_8, M4(-1.181e-01, 1.001e-01, -2.164e-01, -3.062e-02, -9.708e-03, -1.127e-01, -5.188e-02, 1.136e-01, -4.283e-02, -4.564e-02, -1.081e-01, -8.180e-02, 1.733e-01, 7.093e-03, 4.043e-02, 2.436e-02)); + r += mul(s4_0, M4(1.127e-01, 4.974e-02, -1.839e-02, 6.811e-02, -4.333e-02, -1.329e-02, 2.155e-02, -2.119e-02, -2.256e-02, -2.323e-02, -1.048e-01, -1.424e-01, -1.072e-02, -1.559e-02, -1.120e-01, -7.210e-02)); + r += mul(s4_1, M4(-4.518e-02, 1.753e-01, 1.002e-02, 1.710e-01, -2.514e-02, 1.031e-01, -1.332e-01, 1.399e-01, 9.279e-03, -2.348e-01, 1.424e-03, 9.753e-02, -1.115e-01, 2.149e-02, 5.900e-02, 6.065e-02)); + r += mul(s4_2, M4(-7.765e-02, -5.987e-02, -4.236e-02, 1.478e-02, -1.527e-01, 1.483e-02, 5.999e-02, -1.747e-01, 2.443e-02, -3.329e-02, 2.952e-02, -1.813e-02, 1.967e-01, 6.332e-02, -5.620e-02, -3.235e-02)); + r += mul(s4_3, M4(5.791e-02, 4.891e-04, 9.614e-02, -6.785e-02, -2.669e-02, 5.869e-04, 1.837e-02, -1.353e-01, -3.811e-02, 1.064e-01, 1.230e-01, -4.896e-02, -8.791e-03, -6.746e-02, -1.566e-01, 6.373e-02)); + r += mul(s4_4, M4(-7.715e-02, -3.067e-02, 1.021e-02, -7.027e-02, -9.642e-02, -1.257e-01, 1.760e-01, 8.355e-02, 8.765e-02, -2.392e-02, 1.708e-02, 4.817e-02, 2.521e-01, -1.255e-02, -2.305e-03, 1.793e-01)); + r += mul(s4_5, M4(-1.933e-02, 2.397e-03, 3.774e-02, -6.654e-02, 1.599e-01, -1.372e-02, 7.945e-02, 3.182e-03, -1.790e-01, -1.861e-02, 2.315e-02, 6.369e-02, 3.996e-01, 2.077e-01, 4.052e-02, 7.993e-02)); + r += mul(s4_6, M4(5.925e-02, 5.322e-02, -6.143e-02, -2.917e-02, -4.093e-02, 2.257e-02, -2.946e-03, -1.271e-02, -9.013e-02, 8.076e-02, 2.692e-02, -1.875e-02, -1.806e-02, -2.695e-02, -2.003e-01, -7.395e-02)); + r += mul(s4_7, M4(6.590e-02, -1.440e-01, 7.175e-02, 5.587e-02, 3.464e-02, -1.546e-02, -3.360e-01, -6.753e-02, 2.334e-02, 3.456e-02, 1.933e-01, -7.728e-02, 8.228e-02, -4.761e-02, -3.199e-01, 8.885e-02)); + r += mul(s4_8, M4(-6.131e-02, 1.422e-01, -9.556e-02, -4.104e-04, 2.658e-01, -2.449e-01, 9.356e-02, -5.919e-02, 1.237e-01, -1.111e-01, 1.264e-01, 5.252e-03, 2.174e-01, -8.301e-02, -7.269e-02, -2.009e-02)); + r += mul(s5_0, M4(1.831e-01, 7.275e-02, -1.597e-01, -1.629e-02, 1.007e-01, -1.088e-01, 2.198e-01, 3.281e-03, 1.399e-02, -1.966e-02, -1.126e-01, -2.027e-01, -4.323e-02, 7.016e-02, -5.852e-02, 6.758e-03)); + r += mul(s5_1, M4(1.122e-02, -1.328e-01, 4.639e-02, -2.091e-01, 8.031e-02, 1.596e-01, -4.684e-02, 9.917e-02, -2.690e-01, -1.123e-02, 1.155e-01, -3.281e-02, -2.568e-02, -8.358e-03, -7.061e-03, -3.356e-02)); + r += mul(s5_2, M4(-5.978e-02, -8.479e-02, -2.281e-03, -1.070e-01, -4.493e-02, -2.604e-03, -4.411e-02, -1.266e-01, 2.704e-01, 2.154e-01, -8.255e-02, -4.801e-02, -1.554e-02, -2.474e-02, 5.383e-02, -9.288e-02)); + r += mul(s5_3, M4(-1.631e-01, 6.184e-02, -1.479e-01, 1.227e-01, 2.307e-02, -1.954e-02, 1.013e-01, 3.237e-03, 1.406e-01, -9.271e-02, -7.872e-02, -2.719e-03, -1.360e-01, -2.956e-02, 1.636e-01, -7.006e-02)); + r += mul(s5_4, M4(-1.343e-01, 2.153e-01, -2.239e-01, -4.053e-01, 6.613e-02, -1.090e-02, 8.518e-02, 2.240e-01, -2.118e-01, 3.041e-02, -1.363e-01, 4.843e-03, 8.550e-02, 3.119e-02, 2.152e-01, -1.063e-01)); + r += mul(s5_5, M4(-1.837e-02, -1.025e-01, 7.395e-02, -1.662e-01, 1.619e-01, -8.360e-02, 7.945e-02, 1.230e-01, -4.636e-02, 1.337e-01, 1.534e-01, 1.928e-01, -1.803e-02, 9.599e-03, 6.785e-02, -4.536e-02)); + r += mul(s5_6, M4(9.095e-02, 3.857e-01, 1.256e-01, 3.445e-01, 5.654e-02, 1.214e-01, 2.543e-02, 9.302e-02, 1.886e-02, 1.136e-02, -1.681e-01, -7.571e-02, -1.642e-01, 7.446e-02, 1.367e-01, -3.871e-02)); + r += mul(s5_7, M4(2.691e-02, -2.795e-01, -5.362e-02, 1.358e-01, -1.050e-02, 2.046e-01, 1.565e-01, 1.966e-02, 2.583e-02, 2.795e-01, -1.754e-01, -5.247e-02, -1.777e-01, -3.521e-02, 1.460e-01, -4.981e-02)); + r += mul(s5_8, M4(1.238e-01, 3.012e-02, -5.700e-02, 1.567e-01, -6.813e-02, 5.409e-02, 1.131e-01, -1.371e-01, 1.154e-01, 1.418e-01, -2.079e-01, -6.996e-02, 1.199e-01, -5.648e-02, 1.825e-01, 1.458e-02)); + r += mul(s6_0, M4(1.084e-01, 3.154e-02, -5.662e-02, -4.178e-02, -3.714e-02, -1.673e-02, -5.654e-03, 3.828e-02, 3.957e-02, 1.144e-01, -1.146e-01, -2.938e-02, 9.624e-02, -3.715e-02, 1.081e-02, 5.599e-02)); + r += mul(s6_1, M4(9.831e-02, 1.365e-01, 2.181e-01, 1.342e-01, -7.069e-02, -2.787e-02, -1.186e-02, 1.142e-01, 8.013e-02, 1.136e-01, 9.548e-02, -2.485e-02, -6.512e-02, 2.538e-02, -2.026e-01, -4.354e-03)); + r += mul(s6_2, M4(-4.875e-02, 1.382e-01, 1.491e-01, -3.889e-02, -9.066e-02, -9.944e-02, 4.987e-02, 1.776e-01, -1.305e-03, -6.886e-02, 7.841e-02, 4.010e-02, -4.577e-02, 8.094e-04, 1.313e-01, 2.731e-02)); + r += mul(s6_3, M4(-1.122e-01, 1.067e-01, -3.704e-02, 9.418e-02, -5.281e-02, -2.233e-02, -1.779e-02, 5.766e-02, 1.673e-01, 9.455e-02, -2.049e-01, -2.730e-01, 4.742e-02, -1.450e-01, 5.864e-02, -6.473e-02)); + r += mul(s6_4, M4(9.616e-03, 1.394e-01, -9.777e-02, 7.508e-02, 4.562e-02, 7.005e-02, -2.097e-02, 1.196e-01, -5.153e-02, -1.973e-01, 2.292e-03, -1.021e-01, 2.530e-02, -9.281e-03, -1.504e-02, -1.252e-02)); + r += mul(s6_5, M4(-1.160e-01, 7.719e-02, 3.648e-02, 1.606e-01, -3.811e-02, -6.909e-02, 1.368e-01, -7.477e-03, -1.987e-03, -1.014e-01, -4.389e-02, -2.695e-02, -9.125e-02, -9.361e-02, 2.056e-01, -8.178e-02)); + r += mul(s6_6, M4(4.554e-02, 9.046e-02, 1.427e-01, -4.304e-02, -5.428e-03, 3.404e-01, 7.173e-02, -1.005e-02, -2.768e-03, -3.836e-03, -2.008e-01, -1.955e-01, -9.907e-03, -2.243e-03, 1.743e-02, -3.008e-03)); + r += mul(s6_7, M4(7.102e-02, -2.657e-02, 2.590e-01, 1.251e-01, 1.525e-01, 2.843e-02, -1.870e-01, 1.200e-01, -1.873e-01, 9.268e-02, 6.729e-02, -1.300e-01, 4.985e-02, 1.899e-01, -2.683e-02, 1.719e-02)); + r += mul(s6_8, M4(-1.102e-01, -7.919e-02, 4.716e-02, 8.983e-02, -1.152e-01, -1.537e-02, 9.967e-02, 1.016e-01, -5.071e-03, 1.274e-01, -8.979e-02, -4.063e-02, -1.750e-02, 1.179e-01, 1.003e-01, 1.702e-01)); + r += mul(s7_0, M4(5.322e-02, 4.872e-02, -1.469e-01, -6.719e-02, -1.084e-01, 7.051e-03, 6.497e-02, -5.518e-02, 4.297e-02, 1.155e-01, 2.677e-02, -2.306e-02, -2.999e-03, 5.815e-02, -1.004e-03, 1.849e-04)); + r += mul(s7_1, M4(-2.266e-02, 7.573e-02, -9.788e-02, -7.704e-02, 4.220e-02, -4.042e-03, -4.925e-02, -8.605e-04, -1.845e-02, 1.041e-01, 3.969e-02, -3.472e-02, -3.225e-02, 3.116e-02, 1.546e-01, -4.323e-02)); + r += mul(s7_2, M4(1.660e-02, -7.495e-03, 2.067e-02, 3.520e-02, -3.139e-02, -9.697e-02, -5.558e-02, 1.428e-01, 1.365e-02, -8.805e-02, 1.264e-01, 7.365e-02, 1.769e-01, -6.739e-02, 4.272e-02, 5.651e-02)); + r += mul(s7_3, M4(-1.428e-02, -8.794e-03, -1.057e-01, -8.956e-02, -5.146e-02, -3.932e-02, 2.349e-01, -9.492e-03, 3.855e-02, -6.862e-02, -1.941e-02, -9.356e-02, 3.056e-02, -1.250e-01, -1.463e-03, -1.228e-02)); + r += mul(s7_4, M4(6.866e-02, -5.040e-02, -1.617e-01, -3.817e-02, 6.959e-02, -1.202e-03, 1.496e-01, 9.141e-03, -1.083e-02, -6.471e-02, -6.832e-03, -1.079e-01, -2.617e-02, 1.955e-02, -6.724e-03, -9.354e-02)); + r += mul(s7_5, M4(-2.314e-03, 4.890e-03, 5.785e-02, 1.236e-01, 5.738e-03, 7.076e-02, -1.323e-01, -6.343e-02, 6.766e-02, -2.212e-02, -1.208e-01, -3.177e-02, 1.398e-02, -1.363e-02, 7.405e-03, 6.040e-02)); + r += mul(s7_6, M4(8.466e-03, 1.666e-02, 2.547e-02, 1.559e-02, 3.841e-03, -5.136e-02, -1.325e-01, -1.469e-01, 3.979e-02, 1.388e-02, -7.887e-02, -5.276e-02, 1.130e-02, 3.160e-03, 1.594e-01, -2.606e-02)); + r += mul(s7_7, M4(-9.227e-02, -1.744e-01, -2.784e-03, -9.271e-02, 1.806e-01, -1.583e-01, -7.984e-02, -5.001e-02, -2.769e-02, -5.131e-02, -2.440e-02, -9.161e-02, -1.992e-01, -1.213e-01, 5.715e-02, 4.442e-02)); + r += mul(s7_8, M4(-2.856e-02, 1.083e-01, -8.283e-02, -1.027e-01, -9.016e-02, -1.557e-01, -2.356e-02, -1.704e-02, -3.342e-02, 1.072e-01, 5.325e-02, -1.008e-01, 1.323e-01, -1.333e-01, -1.458e-02, -6.686e-02)); + r += V4(-3.986e-02, 1.071e-02, 2.911e-02, -3.068e-02); + return r; +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 6 +//!DESC conv5 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(5.352e-02, -6.003e-02, -2.211e-02, 4.433e-02, 4.953e-02, -9.998e-03, 5.663e-02, 3.074e-02, -1.461e-01, -7.925e-02, 1.596e-01, 1.770e-02, 3.897e-02, -4.709e-02, 7.008e-02, 2.094e-01)); + r += mul(s0_1, M4(1.156e-01, 6.543e-03, 1.958e-01, 5.386e-02, 3.292e-02, -5.440e-02, -6.076e-02, -3.825e-02, 6.736e-03, 1.117e-01, 4.416e-02, -1.791e-01, -4.239e-02, -2.295e-02, 4.666e-02, -1.696e-02)); + r += mul(s0_2, M4(-1.186e-01, -4.513e-02, -5.032e-02, 1.159e-01, 6.085e-02, -9.254e-02, -2.380e-02, -1.167e-01, -1.666e-01, 1.264e-01, -5.614e-02, -1.464e-01, -4.733e-02, -4.771e-02, 3.514e-03, -6.613e-02)); + r += mul(s0_3, M4(2.116e-02, 1.091e-01, -3.674e-02, -5.242e-02, 6.910e-02, -8.886e-02, 8.346e-02, 5.647e-02, -1.209e-01, -3.142e-01, -7.856e-04, 1.316e-01, 7.465e-02, 6.473e-02, 2.104e-02, 1.197e-01)); + r += mul(s0_4, M4(1.050e-01, 4.251e-02, -2.173e-01, -1.448e-01, -5.025e-02, 2.500e-01, 4.104e-02, 2.790e-02, -1.075e-01, 7.254e-02, 2.046e-01, 1.821e-02, 2.764e-02, -1.820e-01, 2.087e-01, 3.294e-02)); + r += mul(s0_5, M4(-3.813e-03, -3.270e-02, -5.619e-02, -6.237e-02, -8.666e-02, -5.051e-02, -4.036e-02, 4.378e-02, -9.974e-02, 1.327e-01, -1.024e-01, 1.066e-01, 3.751e-02, 1.488e-01, -7.101e-02, -4.326e-02)); + r += mul(s0_6, M4(-3.769e-02, 1.476e-01, 5.328e-03, 8.061e-02, -1.262e-01, 2.233e-02, -2.814e-02, 5.004e-02, -1.220e-03, 1.606e-01, 2.252e-02, 2.342e-01, 1.199e-01, 1.138e-01, -6.507e-02, 2.135e-01)); + r += mul(s0_7, M4(-4.111e-02, -3.667e-02, -4.015e-02, 7.155e-02, 9.064e-02, -1.618e-01, 1.331e-01, 6.908e-03, 1.260e-01, -7.548e-02, -1.234e-01, -3.638e-02, -1.596e-02, 1.630e-01, -3.623e-02, 4.260e-02)); + r += mul(s0_8, M4(-1.971e-02, 3.143e-02, -1.254e-01, -8.149e-02, -3.711e-02, -2.610e-02, -9.066e-02, -7.625e-02, 4.546e-02, -3.873e-02, 7.123e-02, -1.698e-01, 5.370e-02, 1.463e-01, -8.262e-02, 5.914e-02)); + r += mul(s1_0, M4(-4.456e-02, 9.059e-02, -5.945e-03, 8.410e-03, -5.316e-02, 1.826e-01, 3.103e-02, -1.228e-01, 4.338e-02, 7.191e-03, -6.517e-02, 4.575e-02, -5.153e-02, 2.403e-02, -6.266e-02, -2.030e-02)); + r += mul(s1_1, M4(-2.190e-03, 4.591e-03, 8.481e-03, 1.023e-02, -2.173e-02, -2.044e-01, -3.023e-01, -4.659e-02, -1.410e-02, -4.269e-02, 2.231e-03, 9.745e-02, 8.884e-02, -9.949e-02, 9.654e-02, 1.859e-02)); + r += mul(s1_2, M4(-1.202e-01, -8.493e-02, -1.220e-01, -5.496e-02, 1.908e-02, 1.421e-01, -5.350e-02, 8.420e-02, -1.172e-01, 1.707e-01, 3.295e-02, 3.927e-02, -5.777e-04, -1.265e-01, -2.614e-02, -7.910e-02)); + r += mul(s1_3, M4(3.880e-02, -6.074e-02, -1.442e-03, -1.654e-01, 9.664e-02, 3.303e-02, 3.226e-02, -7.141e-02, -1.076e-01, -1.150e-01, -2.339e-02, 1.651e-01, -3.130e-02, -1.802e-02, 4.318e-03, 3.434e-02)); + r += mul(s1_4, M4(-5.016e-03, -5.464e-02, -1.663e-02, 8.145e-02, 4.547e-02, -1.759e-01, -9.492e-02, -1.771e-01, -9.245e-03, 5.359e-02, 7.425e-02, 1.827e-03, -4.575e-02, 1.871e-01, 1.058e-01, -1.076e-01)); + r += mul(s1_5, M4(-1.027e-02, -1.968e-02, 1.940e-02, -5.706e-03, 7.775e-02, 2.402e-02, 1.660e-01, 2.068e-01, 7.661e-02, -5.935e-02, -1.205e-01, 1.600e-02, -1.972e-02, 1.954e-02, -7.720e-02, 4.828e-02)); + r += mul(s1_6, M4(-3.072e-02, -1.125e-01, 1.832e-02, 6.532e-02, -7.836e-02, -9.945e-03, 4.052e-04, -9.759e-02, 2.556e-02, 3.925e-02, -1.759e-02, 1.740e-04, 7.344e-02, 2.705e-02, -5.123e-03, 3.745e-02)); + r += mul(s1_7, M4(-1.653e-01, 2.419e-01, 1.005e-02, 1.101e-01, 2.671e-02, -1.024e-01, 5.774e-02, 4.496e-02, 1.642e-01, -1.284e-01, 1.539e-02, -1.165e-01, -9.144e-02, -3.061e-02, 8.213e-02, 3.711e-02)); + r += mul(s1_8, M4(-7.638e-02, -1.175e-01, -1.755e-01, -5.906e-02, -8.853e-02, 2.312e-01, -4.578e-02, 1.944e-01, -1.024e-02, 4.099e-02, -1.490e-01, -1.750e-02, -1.237e-02, -6.959e-02, 6.619e-02, 4.807e-02)); + r += mul(s2_0, M4(-9.459e-02, -4.447e-02, 1.633e-02, 7.760e-02, 1.663e-02, 5.495e-02, -2.123e-01, -8.782e-02, 9.847e-02, -2.494e-01, 5.506e-02, 2.300e-02, 9.393e-02, -2.066e-03, -2.887e-02, -1.034e-02)); + r += mul(s2_1, M4(-6.060e-04, -2.843e-02, 1.193e-01, 7.511e-02, 8.971e-02, -1.729e-01, 8.784e-02, 2.401e-01, -3.400e-03, -1.440e-02, 6.544e-02, 1.016e-01, 1.385e-01, -8.282e-02, 2.683e-02, 3.792e-02)); + r += mul(s2_2, M4(-3.848e-02, -1.576e-01, 7.396e-02, -3.511e-03, -4.518e-02, 1.550e-02, -8.470e-02, -1.773e-03, -4.893e-02, 6.221e-02, 2.359e-02, 3.289e-02, -1.229e-02, -2.320e-01, 1.303e-01, 5.384e-02)); + r += mul(s2_3, M4(8.782e-02, 6.043e-02, 3.194e-02, 6.482e-02, -4.146e-02, 3.285e-02, 2.284e-01, -1.415e-01, 1.369e-01, 2.055e-01, 5.368e-02, -1.428e-01, 7.800e-02, -1.314e-01, 3.297e-02, -9.278e-02)); + r += mul(s2_4, M4(1.024e-01, 1.363e-01, -5.119e-02, 2.565e-02, 6.343e-03, 2.352e-01, 6.082e-02, -1.322e-02, 7.261e-02, -4.301e-02, 4.693e-02, -7.804e-02, -5.509e-02, -4.039e-02, -1.404e-01, -8.930e-02)); + r += mul(s2_5, M4(-7.801e-04, 2.480e-02, -1.927e-02, -7.391e-02, 1.095e-01, 3.992e-02, -1.621e-01, 6.971e-02, 6.987e-02, 1.023e-01, 1.446e-01, 1.184e-01, -1.187e-01, -1.961e-01, 4.407e-02, 2.737e-02)); + r += mul(s2_6, M4(-8.438e-02, -3.381e-02, -2.215e-02, -1.105e-01, 3.365e-02, 4.503e-02, -1.213e-01, -4.697e-02, 3.623e-02, 3.179e-02, -2.838e-02, 1.815e-02, 7.213e-02, -5.479e-02, 2.897e-02, -3.150e-02)); + r += mul(s2_7, M4(-4.557e-02, 1.267e-02, -1.491e-02, 1.259e-01, 8.430e-02, 1.924e-01, -8.617e-02, -1.781e-01, -2.634e-02, 1.175e-02, -7.070e-02, 3.062e-03, 6.270e-02, -2.510e-01, -2.626e-01, -4.705e-02)); + r += mul(s2_8, M4(7.945e-02, -3.057e-02, -2.458e-02, -8.121e-02, -7.519e-03, 9.856e-02, -5.723e-02, -5.697e-02, -2.230e-02, -6.706e-02, -9.360e-02, -7.224e-02, 4.266e-02, -7.034e-02, -2.337e-02, -1.566e-02)); + r += mul(s3_0, M4(3.309e-03, -2.313e-01, -1.734e-01, 1.240e-01, -6.625e-03, 4.509e-02, -1.171e-01, -1.023e-02, -3.997e-02, 7.266e-02, -7.198e-02, -1.391e-01, 5.669e-02, 2.601e-02, 8.004e-02, 2.964e-03)); + r += mul(s3_1, M4(1.932e-02, -2.117e-01, -2.602e-01, 1.114e-01, 8.733e-02, 1.008e-04, 1.115e-01, -2.662e-02, -1.801e-02, -9.135e-02, 1.514e-02, -2.415e-02, 6.470e-02, -5.469e-02, 2.046e-02, -1.580e-02)); + r += mul(s3_2, M4(-3.705e-02, -6.550e-02, 7.516e-02, 7.553e-02, -3.976e-02, -7.871e-02, 4.571e-02, -7.532e-02, -1.995e-03, -6.403e-02, -4.546e-02, -8.693e-02, 9.039e-02, -1.995e-02, 1.345e-01, 2.267e-02)); + r += mul(s3_3, M4(1.233e-01, 1.825e-02, 1.725e-01, 1.778e-01, -6.270e-02, -9.425e-02, 1.606e-01, -4.391e-02, 4.171e-02, 1.050e-01, 4.089e-02, -1.759e-01, 2.781e-02, 2.277e-01, -9.549e-02, 5.448e-02)); + r += mul(s3_4, M4(-5.577e-02, 2.223e-01, -1.202e-01, -1.332e-01, 4.160e-02, 1.016e-01, 8.875e-02, -5.305e-02, 3.856e-02, -1.608e-01, -2.582e-02, 5.141e-02, 2.402e-02, 3.814e-02, -6.229e-02, -2.160e-02)); + r += mul(s3_5, M4(-6.361e-02, -1.239e-01, -4.564e-02, -8.119e-02, -2.117e-02, -1.723e-01, 9.629e-02, 1.111e-03, 1.104e-01, -2.014e-01, 1.567e-01, -1.490e-01, -1.243e-01, 7.170e-02, 7.954e-02, -2.288e-02)); + r += mul(s3_6, M4(-2.335e-03, -3.111e-01, 1.315e-01, -1.854e-01, 7.739e-02, -8.189e-02, -3.183e-02, 1.474e-01, -3.443e-02, 1.429e-01, 2.144e-02, 1.426e-01, 4.218e-02, 9.482e-02, 3.630e-02, 5.003e-02)); + r += mul(s3_7, M4(-4.766e-02, 2.926e-01, 9.300e-02, -1.212e-01, -1.090e-01, 5.079e-03, -9.414e-02, 4.644e-02, 8.026e-02, -1.791e-01, 1.471e-01, -1.189e-01, 1.029e-01, 5.472e-04, -3.538e-02, -1.394e-02)); + r += mul(s3_8, M4(-2.357e-02, -4.969e-03, -2.550e-04, 3.760e-02, -2.231e-02, -1.263e-01, 5.122e-02, -7.788e-03, 3.626e-02, 9.300e-02, 1.542e-02, -1.115e-01, -2.267e-02, -1.465e-01, -2.458e-02, -4.479e-02)); + r += mul(s4_0, M4(-4.892e-02, 9.998e-02, -5.485e-02, 1.855e-01, -5.268e-02, -1.911e-01, 1.163e-02, 7.336e-02, -2.790e-02, -8.902e-02, 3.962e-02, 4.102e-02, -1.586e-01, 2.007e-01, -2.357e-01, -6.856e-02)); + r += mul(s4_1, M4(-2.509e-02, -3.597e-02, 1.446e-01, -1.538e-01, 2.892e-02, -3.173e-01, 7.050e-03, -7.445e-02, 3.322e-02, 2.004e-01, -2.075e-02, -2.078e-01, -1.709e-01, 4.825e-01, 1.906e-01, -3.348e-01)); + r += mul(s4_2, M4(-1.132e-01, 8.326e-02, -2.015e-01, -4.702e-02, -3.892e-02, -4.820e-03, 6.851e-02, 1.764e-01, 4.587e-02, -1.529e-01, -1.397e-01, 5.021e-02, -5.849e-02, -6.847e-02, -2.166e-01, -1.023e-01)); + r += mul(s4_3, M4(5.897e-02, 7.060e-02, 4.643e-02, -8.388e-02, -1.709e-01, 3.313e-02, 1.540e-02, 7.965e-02, -2.783e-02, -1.881e-01, -3.390e-02, -5.080e-02, 6.688e-02, 4.503e-01, 1.965e-01, -8.467e-02)); + r += mul(s4_4, M4(-4.286e-02, 1.484e-01, 1.718e-01, 5.384e-02, -6.666e-04, -4.541e-02, 1.290e-02, -1.162e-01, 1.018e-01, 5.695e-02, -1.323e-01, -8.818e-02, 4.924e-02, -5.815e-02, 2.325e-01, -3.691e-01)); + r += mul(s4_5, M4(-8.393e-03, -3.255e-02, 1.356e-01, 1.442e-01, -6.673e-02, 1.993e-01, 5.101e-02, -9.806e-02, -3.885e-03, 1.357e-01, -1.321e-01, 4.698e-02, -2.377e-02, -3.456e-01, -1.460e-01, -7.247e-01)); + r += mul(s4_6, M4(1.248e-01, -2.129e-01, -5.683e-02, 2.821e-02, 1.448e-02, 6.522e-02, -6.485e-02, 9.213e-02, 4.527e-03, -5.447e-02, 1.178e-01, -9.981e-02, 1.705e-01, 5.612e-01, 3.391e-02, -1.881e-01)); + r += mul(s4_7, M4(-7.766e-03, 3.777e-02, -7.707e-02, -2.180e-01, 1.218e-01, -4.018e-02, 5.356e-02, -5.481e-02, -8.015e-02, 1.189e-01, -1.416e-02, 6.809e-02, 4.150e-01, -2.107e-01, 5.750e-01, -1.605e-01)); + r += mul(s4_8, M4(9.450e-02, 1.037e-01, -2.835e-01, 4.411e-02, 9.148e-02, 4.021e-02, 3.793e-02, 8.474e-02, -1.125e-03, 7.616e-02, -4.859e-02, 1.827e-01, -5.379e-01, 5.020e-01, -4.090e-01, -4.793e-03)); + r += mul(s5_0, M4(-1.701e-01, 2.345e-01, -1.211e-02, 4.541e-02, 2.961e-02, 5.266e-02, 1.307e-01, 3.609e-02, 4.605e-02, -3.558e-02, 9.512e-02, 5.914e-02, 2.102e-02, -4.461e-02, -1.372e-02, 1.155e-01)); + r += mul(s5_1, M4(1.162e-01, -7.298e-02, -5.473e-02, 3.611e-02, 2.875e-03, 5.088e-02, 1.664e-02, -1.070e-01, -7.892e-02, -5.052e-02, -1.098e-01, 5.957e-02, -2.351e-02, 3.243e-03, -7.514e-02, -8.618e-02)); + r += mul(s5_2, M4(4.716e-02, -1.338e-01, -3.799e-02, 7.695e-02, 1.243e-01, 2.060e-02, 8.676e-02, 2.973e-02, 1.469e-01, -1.873e-01, 5.251e-02, -6.448e-02, 6.517e-02, -1.267e-01, -3.481e-02, 8.992e-02)); + r += mul(s5_3, M4(4.285e-02, 1.942e-02, -1.713e-01, 6.129e-02, 1.438e-02, 8.497e-02, -6.502e-02, 6.613e-02, -4.295e-02, 4.772e-02, -1.120e-01, 3.155e-04, -2.833e-02, 1.322e-02, 3.454e-02, -4.402e-02)); + r += mul(s5_4, M4(-1.615e-02, -6.930e-02, -1.766e-02, 3.873e-02, 1.728e-02, -4.751e-02, -7.133e-02, -2.653e-01, 3.786e-02, 1.410e-01, -1.479e-01, -3.649e-02, -6.534e-02, 5.146e-02, -7.122e-02, 4.381e-02)); + r += mul(s5_5, M4(-7.064e-03, -4.123e-02, -2.359e-02, 8.662e-02, 1.207e-01, 2.232e-01, 2.433e-01, 2.088e-01, 8.066e-02, 3.004e-02, 6.141e-02, -1.586e-01, -7.547e-02, -5.052e-02, 1.366e-01, 6.911e-02)); + r += mul(s5_6, M4(6.411e-03, 1.104e-01, -1.522e-02, 1.386e-01, 9.939e-03, -5.831e-02, -8.926e-04, 1.216e-02, -2.496e-02, 5.979e-02, 8.775e-03, 1.148e-01, 8.860e-02, 5.929e-02, -4.231e-02, 4.682e-02)); + r += mul(s5_7, M4(4.344e-02, -8.809e-02, 8.222e-02, -2.465e-01, -7.923e-02, 1.042e-01, 1.952e-01, -7.851e-02, -1.864e-02, 7.215e-02, -3.670e-02, 4.777e-02, -1.010e-01, 6.572e-02, -1.777e-02, -7.108e-02)); + r += mul(s5_8, M4(-8.068e-02, 9.414e-02, 9.702e-03, 1.204e-02, 3.309e-02, 8.979e-02, 3.771e-02, 1.383e-01, 8.931e-04, -5.920e-02, 9.616e-02, 5.289e-02, 8.472e-02, -1.638e-02, 4.468e-03, -2.117e-03)); + r += mul(s6_0, M4(3.289e-02, 2.284e-02, -3.119e-02, 2.566e-02, 1.347e-02, -5.070e-02, 5.969e-02, -1.102e-01, -1.147e-01, 1.709e-02, 1.959e-02, 1.604e-01, 6.736e-02, -7.710e-02, -4.285e-02, 4.158e-02)); + r += mul(s6_1, M4(7.121e-03, -1.564e-01, -1.001e-02, 8.996e-02, -2.329e-02, -1.416e-01, 1.530e-01, 5.474e-03, 9.776e-02, -1.706e-01, 1.192e-02, 2.380e-01, 7.527e-02, -1.134e-01, 6.843e-02, 3.326e-02)); + r += mul(s6_2, M4(-6.078e-02, 5.706e-02, -4.583e-02, -3.080e-02, 2.456e-02, 6.659e-02, 1.978e-01, -7.113e-02, 1.076e-02, 2.574e-02, 6.166e-02, 6.886e-02, -3.931e-02, 1.514e-02, -2.805e-02, 1.093e-01)); + r += mul(s6_3, M4(1.128e-02, 1.021e-01, 3.399e-02, -1.714e-01, 1.169e-01, 1.522e-01, 2.364e-02, 6.626e-02, -4.942e-03, -6.701e-02, -7.163e-02, 2.261e-02, 5.026e-03, 5.555e-02, -8.452e-02, -1.618e-01)); + r += mul(s6_4, M4(8.260e-02, 1.394e-01, -3.437e-02, -2.420e-02, 6.662e-02, -1.391e-01, -5.692e-03, 1.088e-01, 1.298e-01, 2.227e-01, -2.342e-02, -1.256e-01, 9.542e-02, 1.525e-01, -7.337e-02, -1.027e-01)); + r += mul(s6_5, M4(8.994e-02, 1.354e-01, -9.008e-02, 3.849e-02, -2.093e-02, 4.631e-02, -2.421e-01, -1.823e-02, -3.046e-02, 3.960e-02, -4.829e-02, -1.686e-01, 1.191e-01, -4.892e-02, -5.579e-02, -1.901e-02)); + r += mul(s6_6, M4(-9.693e-02, -5.007e-02, 1.206e-02, -8.020e-02, 5.086e-02, 1.398e-01, -5.576e-02, 8.612e-03, -1.313e-01, -1.433e-02, -1.111e-02, 1.519e-01, 3.079e-02, 2.418e-02, 1.939e-02, 1.782e-02)); + r += mul(s6_7, M4(7.926e-02, -6.074e-02, -1.768e-01, -1.748e-02, -7.136e-02, -1.362e-01, -1.489e-01, -1.288e-01, -7.120e-02, -1.594e-01, 1.150e-01, -9.829e-04, 1.873e-01, 5.800e-03, 5.079e-02, -9.980e-03)); + r += mul(s6_8, M4(-1.047e-01, 1.090e-02, -4.031e-02, 4.984e-02, 1.676e-02, 8.937e-02, -5.950e-02, 9.969e-02, 5.061e-02, -8.463e-02, -1.415e-01, 6.871e-02, 6.857e-02, 9.592e-02, 1.104e-02, 4.270e-02)); + r += mul(s7_0, M4(-8.631e-02, 4.665e-02, -8.895e-02, -1.461e-01, -2.317e-01, 2.104e-01, 3.963e-02, -3.532e-01, 3.795e-02, -1.502e-01, 8.124e-02, -7.193e-02, 2.124e-01, -2.453e-01, -1.988e-01, 8.618e-02)); + r += mul(s7_1, M4(2.688e-02, 8.635e-02, 1.727e-02, -6.922e-02, 1.255e-02, 9.244e-02, -5.431e-03, 5.034e-02, -5.071e-02, 1.155e-01, 7.984e-02, 2.210e-01, -1.402e-02, -3.125e-01, 5.878e-02, 1.485e-01)); + r += mul(s7_2, M4(3.618e-02, 1.528e-02, 2.318e-02, -6.709e-03, 2.473e-02, 1.492e-01, -6.647e-02, 1.353e-01, -3.731e-02, 1.335e-01, -3.427e-02, -1.461e-01, 1.150e-01, -1.794e-01, -3.323e-02, 1.929e-01)); + r += mul(s7_3, M4(-5.277e-02, -1.363e-01, -7.280e-02, -1.981e-01, 1.313e-01, 9.950e-02, -3.511e-02, -8.209e-02, 5.781e-03, -1.295e-01, -7.449e-02, -6.425e-02, 1.034e-01, -1.895e-01, -1.182e-01, -3.808e-02)); + r += mul(s7_4, M4(2.256e-02, 3.111e-01, -8.042e-02, 2.155e-02, -6.160e-02, 4.626e-02, -1.338e-01, 1.412e-02, 3.955e-02, 4.709e-02, 2.207e-02, 2.577e-02, 3.569e-01, -2.610e-01, 1.168e-01, -3.008e-01)); + r += mul(s7_5, M4(-7.569e-02, -1.578e-01, -1.208e-01, -6.976e-02, 1.733e-01, -1.821e-01, -1.684e-01, 4.462e-02, 1.019e-01, -7.874e-03, 5.979e-02, -9.098e-02, 2.738e-01, -7.549e-02, 2.460e-02, 1.610e-01)); + r += mul(s7_6, M4(-9.334e-02, 4.098e-02, -6.013e-03, -3.997e-02, -3.501e-02, -2.160e-01, -9.129e-02, 8.863e-02, -3.472e-02, -8.100e-02, 9.985e-02, -5.826e-02, 1.061e-01, 6.911e-03, -7.054e-02, -1.854e-01)); + r += mul(s7_7, M4(2.929e-02, 1.824e-01, -1.064e-01, 1.334e-01, 5.439e-02, 3.309e-02, -8.525e-02, -6.769e-02, -1.394e-01, -2.316e-02, -1.789e-02, 9.320e-03, 1.287e-01, -1.315e-01, -1.557e-01, -2.243e-02)); + r += mul(s7_8, M4(-7.720e-02, 3.992e-02, -2.953e-02, -2.195e-02, -1.529e-01, -6.945e-02, 1.175e-01, 2.004e-02, 4.700e-03, 4.726e-02, 1.380e-01, 8.438e-02, -6.662e-02, 1.652e-01, -2.526e-01, 3.833e-01)); + r += V4(8.813e-03, -2.534e-02, -1.974e-02, 3.490e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(4.122e-02, 8.383e-02, 3.338e-02, 1.074e-01, 6.216e-02, 1.477e-01, -1.380e-01, -6.255e-02, 1.996e-01, -3.722e-02, 5.276e-02, 1.167e-01, -7.417e-02, 5.647e-02, -5.606e-02, 1.827e-02)); + r += mul(s0_1, M4(-1.417e-01, 1.031e-01, 8.936e-02, -2.534e-02, 4.818e-02, -1.266e-02, 7.397e-02, -8.276e-03, 3.212e-02, 1.235e-01, -5.494e-02, -1.396e-01, 5.486e-02, 3.028e-02, 1.206e-01, 3.576e-02)); + r += mul(s0_2, M4(7.835e-03, -2.252e-02, -6.244e-02, 5.518e-02, 1.196e-02, 6.549e-02, -9.242e-02, 6.011e-02, 7.738e-02, 5.543e-02, 4.658e-02, 1.604e-01, -1.440e-01, -2.203e-01, 2.094e-01, 5.055e-02)); + r += mul(s0_3, M4(1.827e-02, 2.703e-01, -1.737e-02, -1.322e-01, 1.175e-01, 5.652e-03, -7.876e-02, -7.422e-02, -8.563e-02, 1.274e-01, 2.360e-01, -9.820e-02, -1.245e-01, -1.965e-01, -1.857e-01, -4.621e-02)); + r += mul(s0_4, M4(3.942e-02, -1.100e-02, 2.788e-01, -2.336e-01, 8.881e-02, -1.333e-01, 1.003e-01, 1.164e-01, 7.883e-02, 8.033e-02, 2.043e-02, 1.624e-01, 1.383e-01, -2.519e-02, -5.318e-02, 1.520e-01)); + r += mul(s0_5, M4(-3.216e-02, 5.697e-02, -5.701e-02, 4.658e-02, 3.494e-02, 3.336e-02, -7.436e-02, 7.364e-02, 8.382e-02, -2.202e-02, -1.743e-01, -1.839e-01, 1.417e-01, -1.635e-01, -3.120e-01, -8.505e-02)); + r += mul(s0_6, M4(-7.515e-03, 4.343e-02, -5.703e-02, -7.557e-02, 3.959e-02, -5.061e-02, 3.424e-03, -7.574e-02, 5.731e-02, 1.219e-01, 2.614e-01, -1.606e-01, -1.916e-01, 1.833e-01, 2.208e-01, 2.684e-02)); + r += mul(s0_7, M4(-3.595e-02, 7.421e-02, -8.721e-02, -1.584e-01, -2.372e-01, 6.072e-03, 1.349e-01, -6.225e-02, 7.371e-02, -1.628e-02, 9.391e-03, 5.127e-02, 1.093e-01, -6.543e-02, -6.491e-02, -1.025e-01)); + r += mul(s0_8, M4(1.666e-01, -6.585e-02, -3.190e-02, -1.870e-02, -2.152e-02, 2.299e-02, -7.148e-02, -5.189e-02, 3.208e-02, 3.908e-02, 9.250e-02, -5.921e-02, -1.940e-01, 2.204e-01, 7.993e-02, -1.537e-01)); + r += mul(s1_0, M4(-2.020e-01, -8.556e-02, 1.969e-01, 4.061e-02, -2.778e-01, 2.953e-02, -8.298e-02, 2.652e-02, -1.784e-01, -2.365e-01, -9.816e-02, 4.397e-02, -4.537e-02, -2.409e-03, 7.802e-02, -3.368e-02)); + r += mul(s1_1, M4(-4.658e-02, 8.550e-02, 7.848e-03, -3.993e-02, 1.831e-01, 1.087e-01, -4.718e-02, 1.597e-01, -2.204e-02, 8.192e-03, -1.598e-01, 1.384e-03, -8.970e-02, 1.891e-02, -6.039e-02, 1.696e-01)); + r += mul(s1_2, M4(-1.113e-01, -4.528e-02, 1.062e-01, 1.203e-01, 9.039e-02, 7.922e-03, -4.449e-02, 2.844e-02, -1.424e-01, -7.929e-02, 2.053e-02, -3.185e-03, 6.237e-02, -1.203e-01, 8.945e-02, -3.631e-02)); + r += mul(s1_3, M4(-5.671e-03, -3.451e-02, 1.500e-01, -1.569e-01, 1.268e-01, -1.226e-01, 6.915e-04, -2.778e-02, -3.379e-02, 4.271e-02, 1.354e-01, -9.886e-02, -1.241e-01, 7.640e-03, 3.774e-02, -3.300e-02)); + r += mul(s1_4, M4(1.919e-01, -9.819e-02, -3.668e-02, -1.755e-01, -6.389e-02, 1.992e-01, -3.709e-02, 5.672e-02, -9.270e-02, 1.810e-02, -5.355e-02, -1.391e-01, 3.739e-02, 1.339e-01, -1.631e-01, 1.079e-01)); + r += mul(s1_5, M4(-2.162e-02, 1.105e-01, -3.219e-02, 5.079e-02, -8.272e-02, -5.169e-02, 1.417e-01, 1.084e-01, 1.151e-01, -3.524e-03, 8.167e-02, -7.284e-02, 4.550e-02, -3.263e-02, 3.912e-03, 2.087e-02)); + r += mul(s1_6, M4(-1.941e-01, -9.595e-02, 3.628e-02, -6.315e-02, -1.131e-01, -7.642e-02, 7.751e-03, -1.500e-01, -2.586e-02, 1.070e-02, 1.509e-01, 2.150e-02, 2.203e-01, -4.958e-02, -7.005e-02, -5.625e-02)); + r += mul(s1_7, M4(-1.845e-01, 2.111e-01, -1.228e-01, -1.447e-01, -3.468e-01, 3.155e-01, 1.562e-01, -3.004e-03, 1.479e-02, -1.853e-01, 3.506e-03, 6.235e-02, -2.529e-02, 5.032e-02, 1.300e-02, 4.364e-02)); + r += mul(s1_8, M4(1.870e-01, 4.247e-02, -1.505e-01, -1.313e-01, 3.916e-01, 7.202e-03, 9.710e-02, -8.099e-02, 1.619e-01, 9.561e-04, 2.766e-02, 6.239e-03, -4.921e-03, 7.273e-02, -1.201e-02, -2.343e-02)); + r += mul(s2_0, M4(2.606e-01, -5.136e-02, -1.860e-01, 8.581e-02, 9.574e-02, 1.568e-01, -1.763e-02, -5.978e-03, 5.550e-02, 1.568e-02, -5.198e-02, -4.383e-02, -4.906e-02, 1.675e-01, 1.535e-01, 4.199e-02)); + r += mul(s2_1, M4(8.128e-02, -4.188e-02, 8.789e-02, 1.059e-02, 5.317e-02, -2.500e-01, -1.083e-02, -1.010e-02, 1.883e-02, -1.240e-03, -6.292e-02, -7.543e-02, -2.780e-01, 1.280e-02, 7.872e-02, -1.048e-01)); + r += mul(s2_2, M4(-7.516e-02, 3.264e-02, 1.078e-02, 1.077e-01, -1.360e-01, -1.113e-01, -2.177e-03, -5.041e-02, 1.569e-01, 1.391e-01, -4.030e-02, -1.241e-01, -8.740e-02, -5.520e-02, 2.185e-02, 1.608e-02)); + r += mul(s2_3, M4(2.306e-03, 1.567e-01, -7.209e-02, -4.525e-02, -9.415e-02, 6.306e-02, 1.450e-01, -1.169e-01, 5.780e-02, -1.133e-01, -5.446e-02, 3.259e-02, -4.961e-02, -1.976e-02, -2.447e-02, -2.290e-02)); + r += mul(s2_4, M4(2.386e-02, -5.238e-02, 1.207e-01, 3.735e-02, 9.597e-02, -4.174e-02, -6.034e-03, 1.518e-01, 4.247e-02, 2.257e-01, 1.078e-01, 1.793e-03, -5.901e-02, 9.326e-02, -1.338e-01, 1.885e-01)); + r += mul(s2_5, M4(7.380e-02, -1.063e-02, 3.345e-02, 1.479e-02, 3.495e-02, 4.331e-02, -8.697e-02, 7.413e-02, -4.597e-02, 7.436e-02, -3.669e-02, 5.752e-02, -5.799e-02, -8.522e-02, 3.640e-02, 2.072e-01)); + r += mul(s2_6, M4(-1.877e-01, 1.551e-02, -6.828e-02, -1.350e-01, -6.230e-02, -3.119e-02, -7.443e-02, -4.987e-03, -2.192e-01, -1.508e-02, 2.726e-02, -3.162e-02, 9.302e-02, -3.278e-02, -1.812e-01, -1.685e-01)); + r += mul(s2_7, M4(-8.226e-02, -9.430e-03, -2.783e-02, -1.507e-01, -1.194e-01, 4.480e-02, -3.096e-01, 1.221e-02, -1.314e-02, -3.632e-02, -8.366e-02, -9.450e-02, 8.160e-02, -4.070e-02, 1.684e-02, -1.591e-01)); + r += mul(s2_8, M4(-9.698e-02, -5.036e-02, 7.702e-03, -1.561e-02, -2.036e-01, 7.757e-04, -7.320e-03, -9.980e-02, 1.523e-03, -1.953e-02, 1.121e-01, -2.196e-02, 6.359e-02, -2.313e-02, -9.189e-02, 9.476e-02)); + r += mul(s3_0, M4(1.074e-01, 6.465e-02, 5.784e-02, -5.239e-02, 1.372e-01, 2.996e-02, 4.506e-02, -4.303e-02, 6.389e-02, -1.702e-01, 5.712e-02, -2.878e-02, 1.114e-01, -1.466e-01, 1.442e-02, -4.859e-02)); + r += mul(s3_1, M4(2.695e-01, 8.036e-02, 1.020e-02, -2.345e-02, -3.177e-02, -1.214e-01, 2.157e-02, -9.057e-02, -7.833e-02, -1.223e-01, 1.324e-01, 4.208e-02, 4.594e-02, -3.441e-02, -1.455e-01, -1.347e-01)); + r += mul(s3_2, M4(-1.776e-01, 3.311e-02, 2.599e-01, 1.538e-01, 5.780e-02, -1.014e-01, -6.213e-02, -6.103e-02, -8.818e-02, 6.784e-03, 2.126e-01, -4.447e-02, 2.162e-02, 6.845e-03, 2.213e-02, 4.315e-02)); + r += mul(s3_3, M4(-3.691e-02, 9.170e-02, -1.353e-01, -3.568e-02, -2.263e-02, -3.033e-02, 1.431e-01, 1.084e-02, -8.167e-02, -1.828e-01, -1.531e-02, 1.328e-01, -1.693e-01, -1.139e-01, -1.416e-01, 4.815e-03)); + r += mul(s3_4, M4(2.757e-01, 1.825e-01, 5.124e-02, -2.846e-02, 1.989e-02, 4.312e-02, -7.312e-02, 5.850e-02, 1.033e-01, -1.880e-02, -4.189e-02, 2.893e-02, 3.430e-02, -6.482e-03, 6.486e-02, 1.755e-01)); + r += mul(s3_5, M4(-9.942e-02, -2.805e-01, -7.769e-02, 3.849e-03, -1.070e-01, -4.338e-02, 6.514e-02, 3.316e-02, 1.383e-01, 2.742e-01, 3.685e-02, -4.829e-02, 1.068e-01, 1.266e-01, -2.506e-03, 1.291e-01)); + r += mul(s3_6, M4(-2.415e-01, -8.057e-02, 3.857e-02, -1.124e-02, -2.277e-02, 9.862e-02, -2.556e-02, 1.701e-01, 1.303e-01, -4.520e-02, 1.029e-01, -3.540e-03, -2.677e-02, -2.015e-01, 4.437e-02, -9.656e-02)); + r += mul(s3_7, M4(-1.609e-01, 7.057e-02, 6.680e-02, -8.852e-02, -4.068e-02, 5.505e-02, -2.040e-01, 4.326e-02, 2.155e-01, -7.881e-02, 2.034e-01, -2.155e-02, 1.404e-01, -6.881e-02, 1.369e-02, -1.505e-01)); + r += mul(s3_8, M4(3.738e-02, -4.088e-02, -9.538e-02, -1.224e-02, -5.381e-02, 7.745e-02, 1.922e-03, -4.366e-02, -2.528e-01, 6.610e-02, 1.876e-01, 3.537e-02, 1.399e-01, 1.328e-02, -2.380e-02, -1.166e-02)); + r += mul(s4_0, M4(1.192e-01, -4.218e-02, 8.971e-02, -1.203e-03, 3.736e-02, 7.362e-02, 1.852e-01, -9.320e-02, -8.487e-03, -4.872e-02, 4.593e-02, 5.427e-02, -1.876e-01, 2.224e-01, 1.574e-01, -6.391e-02)); + r += mul(s4_1, M4(2.717e-02, -2.327e-01, 2.254e-02, -1.382e-01, 9.461e-02, 5.224e-02, -1.192e-02, 1.295e-01, -1.813e-01, 1.258e-01, 1.800e-01, 4.340e-02, -1.290e-01, -1.914e-01, -4.951e-02, -1.204e-01)); + r += mul(s4_2, M4(-1.256e-01, 6.234e-03, 9.556e-02, -4.207e-02, -5.784e-02, -5.792e-02, 8.548e-02, 1.648e-01, -9.047e-02, -2.218e-03, -3.778e-02, 8.343e-02, -9.056e-02, -1.292e-02, 2.514e-03, 1.063e-01)); + r += mul(s4_3, M4(-8.758e-02, -7.533e-02, -3.850e-02, -9.839e-02, -5.909e-02, 9.537e-02, -2.824e-02, -4.447e-02, -1.371e-01, 5.032e-03, -6.339e-02, -2.186e-01, 3.136e-02, 2.361e-02, -9.746e-02, -8.115e-02)); + r += mul(s4_4, M4(-2.552e-01, 9.643e-02, -6.941e-02, 1.155e-01, 3.640e-02, -4.759e-02, 1.096e-02, 1.720e-01, 6.097e-03, 1.744e-02, -5.312e-02, -2.109e-03, 1.329e-01, -3.999e-01, 2.500e-01, -3.880e-01)); + r += mul(s4_5, M4(-1.890e-01, -2.049e-02, -5.403e-02, 6.456e-02, -6.021e-02, 1.860e-02, -3.521e-02, 1.354e-01, 8.995e-02, -9.078e-02, 4.511e-02, 1.512e-01, -4.562e-01, -1.406e-01, 2.990e-01, 2.225e-01)); + r += mul(s4_6, M4(-1.407e-01, 3.263e-02, -1.526e-01, -7.049e-02, 2.062e-01, -2.468e-03, 1.486e-01, -2.106e-01, -1.939e-02, -2.468e-02, 9.569e-03, 3.444e-02, -7.420e-01, 7.872e-01, 3.873e-01, 2.897e-02)); + r += mul(s4_7, M4(3.851e-02, 9.604e-02, -1.353e-01, 5.790e-02, 1.505e-01, 9.856e-02, 2.537e-02, -3.294e-02, -4.353e-02, -1.397e-01, -7.199e-02, -2.963e-02, 5.805e-01, -3.699e-01, 1.366e-01, 3.243e-01)); + r += mul(s4_8, M4(2.228e-01, -1.228e-01, 2.280e-01, 5.593e-02, 3.115e-02, -7.131e-02, -7.441e-02, 1.255e-01, 8.876e-02, -2.383e-02, -8.210e-02, 4.344e-02, 8.000e-02, 3.931e-02, -1.807e-01, -1.223e-02)); + r += mul(s5_0, M4(1.995e-01, 1.478e-02, 1.361e-01, 1.052e-01, -6.110e-02, -1.409e-02, 4.745e-02, -9.579e-02, -6.960e-02, 1.767e-02, 6.126e-02, -6.174e-02, 2.066e-02, 1.905e-02, 1.185e-01, 4.444e-02)); + r += mul(s5_1, M4(1.894e-02, 5.374e-02, -3.895e-02, -6.910e-02, -8.066e-02, -7.286e-03, 3.333e-02, -2.389e-02, 4.153e-02, 1.519e-01, 5.669e-02, 1.348e-01, 1.514e-01, 8.843e-02, -5.111e-02, -6.478e-02)); + r += mul(s5_2, M4(-1.113e-02, -4.382e-02, 1.277e-01, -6.272e-02, 3.374e-02, 5.874e-02, 1.987e-02, 5.267e-03, -1.998e-01, -1.951e-02, -1.162e-01, 5.619e-02, 2.949e-02, -2.699e-02, 1.024e-01, 1.130e-01)); + r += mul(s5_3, M4(1.431e-01, -9.473e-02, -2.907e-02, 1.652e-02, -8.979e-02, -5.443e-02, 8.121e-02, -1.787e-01, 7.685e-03, 2.413e-02, -5.653e-02, -7.984e-02, -8.301e-02, 5.845e-02, 7.076e-02, -7.164e-02)); + r += mul(s5_4, M4(9.803e-02, -2.012e-01, -3.996e-02, -3.059e-02, 1.333e-01, 1.097e-01, 4.228e-02, 1.154e-01, 4.830e-02, 4.718e-02, -1.158e-01, -2.837e-02, -4.066e-02, -1.155e-01, 2.084e-02, 2.952e-03)); + r += mul(s5_5, M4(-1.548e-01, 2.054e-02, 5.285e-02, 2.615e-02, -1.192e-01, -1.256e-02, -2.001e-02, 9.973e-02, 1.317e-01, -2.092e-02, 6.632e-02, -7.791e-02, 4.145e-03, 1.330e-01, -1.645e-01, 1.230e-02)); + r += mul(s5_6, M4(-3.373e-02, 1.389e-03, -5.204e-02, 1.417e-01, 2.750e-02, -2.121e-01, 3.537e-02, -1.165e-01, -9.734e-02, -1.177e-01, -1.281e-01, 1.998e-03, -2.342e-02, -2.233e-02, 2.014e-02, -3.454e-02)); + r += mul(s5_7, M4(1.243e-01, 4.646e-02, 3.739e-02, -1.566e-01, -1.649e-01, 3.730e-03, -1.185e-01, -4.354e-02, -1.778e-02, -1.135e-01, -1.348e-01, 5.122e-02, 2.792e-02, -7.921e-02, -1.160e-01, 8.289e-02)); + r += mul(s5_8, M4(8.194e-03, 4.714e-02, 2.052e-01, 1.422e-01, -4.842e-02, 2.618e-02, -2.984e-02, -6.504e-02, 1.182e-01, -1.162e-02, -1.997e-01, 2.425e-02, 4.452e-02, -1.259e-02, -9.730e-02, 7.280e-02)); + r += mul(s6_0, M4(-8.571e-02, 1.186e-01, 1.233e-02, 1.387e-02, 9.057e-02, 1.115e-01, 2.179e-03, 6.594e-02, 2.330e-01, 3.804e-02, 1.034e-01, 4.844e-02, -9.847e-03, -2.905e-02, 5.979e-03, 1.292e-01)); + r += mul(s6_1, M4(2.553e-02, 2.555e-02, -7.010e-02, -3.260e-02, 4.235e-02, -8.416e-03, 5.704e-03, -6.912e-02, 3.865e-02, -2.399e-01, 2.407e-02, -6.606e-02, 1.687e-02, -1.138e-01, 2.578e-02, -1.999e-02)); + r += mul(s6_2, M4(-3.888e-02, 1.122e-02, -2.795e-02, 9.662e-02, 2.591e-02, 2.528e-02, 8.205e-02, 2.072e-02, -3.163e-01, -5.685e-02, -1.805e-01, -1.097e-01, -2.322e-01, 4.193e-02, 2.081e-01, 6.192e-02)); + r += mul(s6_3, M4(-1.587e-01, -1.986e-01, -1.124e-01, -5.828e-02, 6.789e-02, 9.969e-02, -3.875e-03, 8.378e-02, -1.463e-01, 3.364e-02, 5.932e-02, 4.116e-03, 7.922e-02, -6.432e-03, 1.013e-02, -2.830e-02)); + r += mul(s6_4, M4(8.316e-02, -6.667e-02, -1.269e-01, -2.602e-02, -1.090e-01, -2.036e-02, 4.056e-02, -1.868e-02, -1.277e-01, -9.614e-02, -1.978e-03, 3.213e-02, 1.918e-01, -3.292e-03, 3.931e-02, 4.892e-02)); + r += mul(s6_5, M4(-1.863e-02, 7.901e-02, -1.337e-01, -2.784e-02, -2.582e-02, -4.822e-02, -1.474e-01, -1.068e-01, -9.478e-02, -4.451e-02, -5.566e-02, -2.354e-02, 1.543e-01, -2.465e-02, -1.499e-01, 2.466e-02)); + r += mul(s6_6, M4(1.499e-01, -4.591e-02, -8.516e-03, -6.246e-02, -1.189e-01, 3.637e-02, -1.299e-01, 1.627e-01, 1.222e-01, 8.852e-02, 3.212e-03, 7.118e-02, -1.357e-02, 2.100e-02, 4.570e-03, -1.142e-01)); + r += mul(s6_7, M4(4.662e-02, 1.060e-02, 5.149e-02, 4.118e-02, -6.327e-02, 1.159e-01, -1.288e-03, -8.900e-02, 2.912e-01, -6.238e-02, 1.235e-02, -9.573e-02, 1.437e-02, 5.830e-02, 1.094e-01, 2.949e-02)); + r += mul(s6_8, M4(-3.209e-02, 5.889e-02, -1.010e-01, -6.904e-02, 6.960e-02, 3.590e-03, -5.111e-02, -4.199e-02, 1.622e-01, 3.661e-02, -3.815e-02, 8.151e-02, -1.278e-01, 1.048e-01, -1.009e-01, -2.499e-02)); + r += mul(s7_0, M4(4.288e-02, 2.392e-01, 2.314e-02, 8.080e-02, 2.546e-02, -1.373e-01, 1.117e-01, -1.685e-01, -1.819e-01, 1.207e-01, 2.163e-01, 8.378e-02, 3.947e-02, -3.408e-01, 8.933e-02, -4.154e-02)); + r += mul(s7_1, M4(9.584e-02, -4.602e-02, 1.720e-03, -1.151e-01, 1.934e-02, 6.536e-02, -3.617e-02, -3.821e-02, -2.219e-01, 1.750e-02, 1.317e-02, -3.834e-02, 5.001e-01, 2.384e-01, 1.160e-01, 9.402e-03)); + r += mul(s7_2, M4(1.313e-01, 3.794e-02, 7.627e-02, -4.446e-02, 1.638e-02, -5.554e-02, -4.113e-02, 7.161e-02, -2.881e-02, 6.571e-02, -5.144e-02, -2.328e-02, 1.982e-01, 4.741e-02, 7.447e-02, -1.606e-01)); + r += mul(s7_3, M4(3.180e-01, -6.699e-02, -2.630e-02, 2.820e-01, -7.095e-02, -2.209e-02, 4.669e-02, 2.435e-01, 1.191e-01, 5.831e-02, -1.845e-01, 5.569e-03, -2.660e-01, 1.263e-01, 1.461e-01, -1.263e-01)); + r += mul(s7_4, M4(-9.744e-02, -9.800e-03, 4.730e-02, 2.280e-01, -4.158e-02, 1.705e-01, -1.756e-01, -7.229e-02, 9.002e-02, 1.229e-01, -1.380e-02, 1.392e-01, -2.255e-01, 8.003e-03, 1.042e-01, 3.708e-01)); + r += mul(s7_5, M4(1.447e-01, 2.116e-02, 9.953e-02, -3.550e-02, 5.017e-02, 1.057e-01, -8.217e-02, 8.744e-03, 1.345e-01, 2.721e-02, 1.029e-01, -9.431e-02, -3.924e-01, 1.097e-01, -2.832e-01, 1.546e-01)); + r += mul(s7_6, M4(-3.546e-02, -1.809e-01, -9.086e-02, 1.546e-01, -8.928e-02, -8.232e-02, -8.062e-02, -8.184e-02, -1.697e-02, -1.375e-01, -7.805e-02, -1.554e-02, 2.654e-02, -3.685e-01, -3.469e-01, -1.440e-01)); + r += mul(s7_7, M4(-2.244e-01, -2.795e-02, -3.074e-02, 7.153e-02, 2.436e-02, -7.016e-02, -1.603e-02, 1.083e-01, -1.362e-01, -5.181e-02, 7.052e-02, 5.746e-02, 1.820e-01, 6.327e-03, -1.165e-02, 1.657e-01)); + r += mul(s7_8, M4(-1.553e-01, 2.828e-02, 5.395e-02, -4.372e-02, 3.593e-01, 2.295e-02, -4.591e-02, -4.522e-02, -5.343e-02, 6.061e-02, 3.451e-02, -3.592e-02, 4.702e-02, 2.445e-01, 5.456e-02, -4.491e-02)); + r += V4(1.188e-02, -5.847e-03, -6.198e-03, 1.196e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(4.931e-02, -1.620e-02, 3.840e-02, -1.549e-01, 1.687e-01, 1.586e-01, 2.465e-02, 7.422e-03, -8.848e-02, 1.450e-01, -2.154e-01, 6.384e-02, 6.475e-02, 1.585e-01, -6.220e-02, 8.563e-02)); + r += mul(s0_1, M4(9.150e-02, 6.232e-02, 1.791e-02, -2.287e-02, 1.222e-02, -7.374e-03, -9.660e-02, 1.058e-01, 2.371e-02, -6.607e-02, 2.011e-01, -4.123e-02, 4.755e-02, 8.962e-02, -7.784e-02, 1.115e-01)); + r += mul(s0_2, M4(1.253e-01, -1.645e-02, 2.724e-03, -7.293e-03, 1.582e-02, -7.734e-02, 9.847e-02, -6.540e-02, 5.937e-02, -1.585e-02, -2.111e-01, -6.382e-02, 1.210e-01, -9.965e-02, 1.147e-01, 2.300e-02)); + r += mul(s0_3, M4(1.140e-01, -1.274e-01, -5.101e-02, -5.984e-02, -9.800e-02, -1.215e-01, 1.383e-01, 1.945e-02, 4.608e-02, 8.946e-02, 1.683e-02, 7.257e-03, 1.265e-01, 6.532e-02, 1.383e-01, -1.018e-01)); + r += mul(s0_4, M4(2.188e-02, 1.722e-01, 1.655e-02, -3.794e-02, -3.063e-02, 1.064e-01, -6.348e-02, 2.423e-02, 1.154e-01, -2.119e-01, 6.006e-02, 4.074e-02, 6.261e-02, 2.229e-02, -1.479e-01, 1.137e-01)); + r += mul(s0_5, M4(1.043e-01, 3.299e-02, -2.709e-02, 2.662e-02, 7.709e-02, 3.069e-02, 5.687e-02, -7.690e-03, -1.651e-01, 2.707e-02, -3.127e-02, -1.623e-02, 3.458e-02, 6.050e-02, -5.945e-02, -4.582e-02)); + r += mul(s0_6, M4(8.463e-02, 3.034e-02, -3.368e-02, -5.227e-03, 1.183e-02, 2.833e-02, 7.060e-02, -5.617e-02, -7.726e-02, -1.494e-01, 8.997e-02, 2.586e-02, -9.478e-02, -1.036e-01, -1.880e-01, 3.941e-02)); + r += mul(s0_7, M4(-5.805e-03, -5.985e-02, -2.067e-02, 6.773e-02, 8.510e-02, -4.496e-02, -9.020e-02, 4.270e-03, 9.125e-02, -7.380e-02, 1.010e-02, -8.613e-03, -1.178e-01, -1.061e-01, -2.188e-02, 1.044e-02)); + r += mul(s0_8, M4(-3.733e-02, 3.283e-02, -4.618e-02, -4.285e-02, -6.668e-02, -9.029e-02, 1.092e-01, -2.894e-03, -5.508e-02, -6.132e-02, 5.218e-02, -8.097e-02, -6.732e-02, 1.058e-01, -5.113e-02, 1.074e-01)); + r += mul(s1_0, M4(1.130e-01, 6.127e-02, -1.749e-02, -6.534e-02, -1.035e-01, 1.048e-01, -1.353e-01, 3.567e-02, -1.930e-02, -7.667e-02, -7.980e-02, -1.448e-01, -5.444e-02, 1.102e-01, -9.306e-03, 5.581e-02)); + r += mul(s1_1, M4(-7.318e-02, 2.685e-02, 2.261e-02, 7.976e-02, -1.067e-01, -2.688e-01, 4.436e-02, 7.789e-02, -6.288e-02, 1.418e-01, 3.727e-02, 1.334e-01, -1.607e-01, -1.550e-01, -2.507e-02, -2.109e-03)); + r += mul(s1_2, M4(-1.630e-01, 9.466e-02, -4.983e-02, -4.514e-02, -2.337e-01, -1.183e-01, 8.778e-03, 1.591e-02, -2.269e-02, -3.599e-02, 3.178e-02, 1.132e-02, 4.432e-02, 1.343e-01, 2.935e-02, 9.254e-02)); + r += mul(s1_3, M4(1.499e-02, -2.522e-01, -5.516e-02, -3.723e-02, 7.456e-02, -5.486e-02, 3.520e-02, 1.543e-01, -6.918e-02, 1.577e-01, 8.023e-02, -2.114e-02, -2.013e-02, -9.676e-03, -5.755e-02, 1.012e-02)); + r += mul(s1_4, M4(9.258e-02, -1.213e-01, -1.900e-01, -1.896e-02, -5.598e-02, 8.796e-02, 9.096e-03, 7.257e-02, -1.115e-01, -1.095e-01, -1.428e-01, 4.173e-02, 1.057e-02, -1.400e-01, -6.675e-02, 1.536e-01)); + r += mul(s1_5, M4(1.526e-01, 2.140e-01, 1.820e-02, -3.331e-02, 3.502e-01, 7.911e-03, -1.580e-01, -2.524e-02, 6.383e-02, 9.007e-02, 5.348e-03, -1.111e-01, -6.671e-02, 1.192e-01, -6.390e-02, 1.555e-02)); + r += mul(s1_6, M4(1.229e-02, -9.820e-04, 5.709e-03, 1.341e-02, -6.908e-02, 5.506e-02, -2.072e-01, 2.365e-02, 1.616e-01, -2.105e-02, -2.334e-02, 4.507e-02, 7.618e-02, -1.112e-01, 1.182e-01, -1.386e-01)); + r += mul(s1_7, M4(-2.034e-02, 7.350e-02, 1.186e-01, 4.581e-02, 2.846e-02, 5.164e-02, 1.030e-01, 5.490e-02, -4.674e-02, -1.053e-02, -4.939e-02, 8.077e-03, 1.548e-01, 1.337e-01, -3.645e-03, 1.178e-01)); + r += mul(s1_8, M4(-1.786e-01, 5.271e-02, -1.207e-02, 8.533e-02, -2.222e-01, 1.050e-01, 1.098e-01, -4.937e-02, 9.097e-02, 3.310e-02, -4.412e-03, -1.354e-02, -3.125e-02, 1.033e-01, 6.936e-02, 2.212e-02)); + r += mul(s2_0, M4(-5.982e-02, 5.552e-02, -9.409e-03, -2.199e-02, 7.282e-02, -6.714e-02, 3.140e-02, 1.189e-01, -1.357e-01, 4.945e-02, -9.183e-02, -4.823e-02, -1.664e-01, -6.090e-02, 1.250e-01, 2.663e-02)); + r += mul(s2_1, M4(2.770e-02, 4.888e-02, -1.101e-01, 1.018e-02, 1.057e-01, 3.733e-02, -3.370e-02, -9.007e-02, 5.054e-02, 1.355e-01, 5.278e-02, -4.290e-02, -4.388e-02, 1.006e-01, -8.385e-02, -5.278e-02)); + r += mul(s2_2, M4(4.752e-03, 1.637e-02, 4.455e-02, -8.651e-02, -4.853e-02, 5.247e-04, 4.343e-02, 4.057e-02, 4.907e-02, 4.106e-02, -4.624e-02, -3.159e-02, 2.605e-02, 4.703e-02, 8.334e-03, -1.679e-01)); + r += mul(s2_3, M4(5.956e-03, -9.383e-02, -3.345e-02, 1.447e-02, 1.451e-01, 1.704e-02, -2.239e-01, 4.357e-02, 7.440e-02, -1.747e-01, 3.916e-02, 5.365e-02, 8.042e-02, 7.271e-02, -2.975e-01, -3.541e-02)); + r += mul(s2_4, M4(-5.668e-02, -1.859e-01, 1.089e-02, 4.463e-02, 2.718e-02, -3.862e-02, 1.065e-01, -3.353e-02, -5.402e-02, -9.449e-02, -1.545e-01, -1.360e-01, -1.601e-01, -1.531e-01, -1.968e-01, 8.281e-02)); + r += mul(s2_5, M4(-5.144e-02, -3.722e-02, 8.142e-02, -5.951e-02, 1.182e-01, -9.840e-02, 2.036e-01, 7.334e-02, -1.637e-02, 1.501e-02, 1.250e-01, 1.854e-03, -9.333e-04, 1.256e-02, 1.218e-01, 1.536e-01)); + r += mul(s2_6, M4(-1.765e-01, 4.622e-02, 2.283e-02, 5.905e-02, -9.045e-02, 1.294e-01, -8.123e-02, -1.271e-01, -2.697e-02, 6.241e-03, -6.811e-02, 4.590e-02, -1.384e-01, 6.209e-02, 3.734e-02, 1.074e-01)); + r += mul(s2_7, M4(-1.121e-02, 2.714e-03, -1.445e-02, 8.286e-02, -1.551e-01, 1.810e-01, -1.864e-02, -7.961e-02, -2.730e-02, 8.485e-02, 7.550e-02, -1.792e-02, 2.392e-02, -9.604e-02, 1.290e-01, 8.964e-02)); + r += mul(s2_8, M4(6.928e-02, -4.838e-02, -3.492e-02, 5.158e-02, -7.985e-02, -5.230e-03, -1.658e-01, 1.593e-01, -4.329e-02, 6.040e-03, 8.456e-02, 4.682e-02, 6.816e-02, 4.399e-02, 5.442e-02, -1.964e-02)); + r += mul(s3_0, M4(5.881e-02, -4.879e-02, 1.846e-01, 1.472e-01, 5.419e-02, 8.747e-02, -6.970e-02, 4.205e-02, -5.599e-02, -7.376e-03, -3.877e-02, 5.823e-02, -9.323e-02, -8.103e-02, -2.349e-03, 8.759e-02)); + r += mul(s3_1, M4(1.022e-01, 2.743e-01, -2.047e-01, 7.111e-02, 7.135e-02, 1.308e-01, -1.863e-01, -1.110e-01, 1.067e-01, 6.486e-02, 3.243e-02, -6.511e-02, -2.439e-02, 1.668e-04, -1.157e-02, 2.933e-02)); + r += mul(s3_2, M4(1.244e-01, -7.002e-02, 7.723e-03, -3.796e-02, 8.125e-02, -3.934e-02, -4.218e-02, -1.444e-03, 7.520e-03, -1.074e-01, -1.940e-02, 7.347e-02, -2.173e-02, 5.112e-02, 2.157e-02, -1.640e-01)); + r += mul(s3_3, M4(6.504e-02, -2.780e-02, -7.144e-02, -3.487e-02, 1.528e-02, 6.679e-02, 2.926e-02, 5.872e-02, 1.227e-01, 6.072e-02, 1.872e-02, 7.009e-02, 3.820e-02, 1.365e-01, 3.983e-02, -2.216e-02)); + r += mul(s3_4, M4(-1.734e-02, 9.274e-02, -6.396e-02, 5.379e-02, 5.036e-02, -2.001e-01, 2.124e-03, -1.608e-02, -1.849e-01, 7.496e-02, -4.061e-02, -2.447e-03, 2.164e-02, -1.727e-01, -6.730e-02, 1.730e-01)); + r += mul(s3_5, M4(-2.699e-02, 6.652e-02, 7.248e-02, 2.157e-01, 3.969e-02, -1.370e-01, -5.664e-02, 1.411e-02, 1.697e-02, -6.388e-02, 5.160e-02, 9.844e-03, -1.168e-03, -5.753e-02, -9.196e-02, 1.159e-01)); + r += mul(s3_6, M4(1.232e-01, -1.785e-01, 1.381e-01, -1.288e-01, -1.078e-01, -2.209e-02, -3.333e-02, -1.648e-01, -5.078e-02, -7.208e-02, 1.227e-01, -9.708e-02, -4.265e-02, 1.253e-02, 2.227e-02, 1.477e-01)); + r += mul(s3_7, M4(1.667e-01, 1.519e-01, 5.866e-02, 6.226e-02, -4.685e-02, 5.933e-02, 2.744e-02, -1.257e-03, -1.805e-02, 1.363e-01, -1.859e-02, 1.137e-02, 1.244e-01, -4.332e-02, 7.954e-02, -6.113e-02)); + r += mul(s3_8, M4(-4.157e-02, -2.668e-02, 1.479e-01, 9.121e-02, -7.333e-02, -5.809e-02, 3.353e-02, 8.973e-02, -1.920e-04, -1.736e-02, 3.458e-02, -8.027e-02, 5.179e-02, -7.594e-02, 6.121e-02, -1.449e-01)); + r += mul(s4_0, M4(2.404e-02, -1.164e-01, -1.138e-02, 5.854e-02, 9.871e-02, -1.055e-01, 2.907e-02, -1.130e-01, 1.670e-03, 3.422e-01, -1.455e-01, 3.089e-03, -8.779e-03, -4.675e-02, 1.635e-02, 5.429e-02)); + r += mul(s4_1, M4(-4.927e-02, -1.494e-01, -1.363e-01, -8.057e-02, -3.957e-02, 5.070e-03, 9.885e-02, -2.611e-01, 1.661e-02, 7.429e-02, -1.809e-01, -3.009e-02, 1.109e-02, -5.557e-02, 1.494e-01, -1.566e-02)); + r += mul(s4_2, M4(-1.329e-01, -6.097e-02, 2.384e-01, -4.837e-02, 5.236e-02, 6.857e-02, 4.869e-02, -6.618e-02, -2.154e-01, 9.844e-02, 2.918e-02, 1.900e-02, -1.062e-02, -7.214e-02, 1.180e-01, -6.203e-02)); + r += mul(s4_3, M4(1.281e-01, -1.015e-02, -8.635e-02, 6.216e-02, 5.106e-02, 4.553e-02, -9.132e-02, -2.484e-02, -7.828e-02, -2.227e-01, -1.006e-01, 1.009e-01, -1.450e-01, -1.877e-01, 1.200e-01, 3.058e-02)); + r += mul(s4_4, M4(8.169e-03, 9.841e-02, 1.590e-01, -8.570e-02, -4.525e-03, -7.477e-02, -2.096e-01, -1.915e-01, 8.201e-03, -1.837e-01, 2.929e-03, 1.767e-01, -3.800e-01, 4.100e-01, -7.337e-02, -5.512e-01)); + r += mul(s4_5, M4(-1.698e-02, 9.353e-03, -2.194e-02, 3.709e-02, 1.136e-01, 1.274e-01, -2.037e-02, -7.867e-02, -3.543e-02, -2.823e-02, -1.751e-01, -1.625e-02, -6.710e-01, 2.510e-01, -8.507e-02, 8.663e-02)); + r += mul(s4_6, M4(-5.057e-02, -1.664e-01, 4.927e-02, 1.236e-01, 6.142e-02, -7.625e-02, -1.189e-01, 4.509e-02, -4.795e-02, 4.793e-02, 2.019e-01, 3.237e-02, 3.277e-01, 9.403e-03, 4.279e-01, -4.445e-01)); + r += mul(s4_7, M4(5.685e-02, -3.024e-02, 1.198e-01, -1.815e-01, 1.008e-01, -5.949e-02, -1.880e-02, 6.315e-02, 1.006e-01, 1.555e-01, 1.411e-02, 2.371e-02, -1.432e-01, -4.057e-01, 2.001e-01, 4.903e-01)); + r += mul(s4_8, M4(-1.706e-01, -2.948e-02, 1.877e-02, 6.796e-02, 4.987e-02, 2.595e-02, -6.672e-02, -1.169e-01, 1.050e-01, -2.239e-02, -1.343e-01, 1.567e-01, 1.017e-01, 7.630e-02, 6.812e-02, -2.360e-01)); + r += mul(s5_0, M4(-1.026e-01, -9.333e-03, -1.186e-02, 1.726e-02, -2.644e-02, 6.857e-02, -9.080e-02, -1.468e-02, -4.155e-02, 9.636e-03, -1.099e-02, -2.825e-02, -1.214e-01, -1.480e-01, 8.756e-02, -6.853e-02)); + r += mul(s5_1, M4(4.594e-02, 1.473e-02, -3.919e-02, -9.868e-02, 5.624e-02, -9.622e-03, -1.046e-01, 7.397e-03, -4.288e-02, -9.412e-02, -7.071e-02, 6.349e-02, -3.281e-02, 7.772e-02, 1.582e-02, 1.875e-03)); + r += mul(s5_2, M4(-6.861e-02, -5.088e-04, -2.646e-02, -9.557e-03, -6.081e-03, 7.298e-02, -2.280e-02, 9.319e-04, -7.459e-02, 1.291e-01, -2.097e-01, 1.813e-02, -1.205e-01, -2.198e-02, -3.807e-02, 1.295e-02)); + r += mul(s5_3, M4(-1.126e-01, 3.457e-02, -1.185e-01, -3.790e-02, -9.439e-02, 2.656e-01, -5.974e-02, -3.799e-02, -2.252e-02, -1.287e-01, -1.324e-01, 1.224e-02, 1.002e-01, 4.122e-02, 7.938e-02, -1.297e-01)); + r += mul(s5_4, M4(1.739e-01, 2.231e-01, 7.596e-02, 9.382e-02, 1.795e-02, 1.921e-02, -1.567e-01, -1.585e-01, 1.174e-01, -4.394e-02, 2.407e-01, 1.421e-01, 1.677e-01, 1.530e-02, 9.640e-02, -9.999e-02)); + r += mul(s5_5, M4(1.078e-01, -1.276e-01, 9.488e-03, 9.875e-02, 1.593e-01, -8.027e-02, -1.457e-01, 7.180e-02, 1.792e-01, 1.438e-01, -6.062e-02, -1.219e-02, -4.171e-02, 8.089e-02, -9.086e-02, -8.803e-02)); + r += mul(s5_6, M4(9.794e-02, -3.144e-02, 4.680e-02, -1.580e-01, -4.249e-03, -1.870e-01, 3.627e-03, 7.838e-03, 1.359e-02, -1.264e-01, 6.932e-02, 7.400e-02, -1.946e-02, 1.593e-02, 4.671e-02, 7.116e-02)); + r += mul(s5_7, M4(-1.665e-01, 1.720e-02, 5.782e-02, -4.723e-02, 6.617e-02, -5.133e-02, 8.327e-02, 1.317e-01, 6.304e-02, -1.321e-02, 1.514e-01, -1.111e-03, -1.049e-01, 1.437e-01, -6.917e-02, 1.570e-01)); + r += mul(s5_8, M4(-5.411e-02, -4.024e-02, -6.895e-02, 9.743e-02, 3.381e-02, -5.184e-02, 7.640e-02, -4.415e-02, 1.827e-01, -5.557e-02, -9.430e-02, 9.215e-02, 2.314e-02, -1.233e-01, 9.551e-02, -5.879e-02)); + r += mul(s6_0, M4(1.019e-01, -1.160e-01, -1.117e-01, 7.118e-02, 2.089e-03, 5.206e-02, 4.623e-02, 3.643e-02, -3.738e-02, 5.755e-04, -1.176e-01, -2.748e-02, 7.664e-02, 5.507e-03, 4.856e-02, -7.044e-02)); + r += mul(s6_1, M4(-3.747e-02, -3.721e-02, -1.471e-01, -9.818e-02, -1.362e-02, 9.418e-02, -8.926e-02, -1.773e-01, 1.578e-02, 1.980e-01, 1.631e-02, -1.074e-01, -2.902e-02, -5.877e-02, -1.368e-01, -1.013e-01)); + r += mul(s6_2, M4(9.635e-02, -3.415e-02, -1.096e-02, -2.239e-02, -3.471e-02, -4.742e-02, -2.250e-03, -8.195e-02, 2.635e-01, 1.230e-01, -1.409e-01, -2.675e-02, 8.787e-02, 1.284e-01, 3.220e-02, 1.199e-01)); + r += mul(s6_3, M4(-7.406e-02, 2.281e-02, -7.353e-02, -2.355e-02, -1.000e-01, -8.840e-02, 6.633e-02, 9.242e-02, -3.978e-02, -9.135e-02, -4.362e-02, 8.695e-02, 1.624e-01, 2.886e-02, 4.969e-02, -2.134e-02)); + r += mul(s6_4, M4(2.339e-01, 1.537e-01, -8.771e-02, 4.382e-02, 1.293e-01, 7.131e-03, -3.922e-02, 1.542e-01, -9.606e-02, -1.144e-01, 1.106e-01, 1.071e-01, -2.441e-02, -5.037e-02, 9.706e-03, 5.595e-03)); + r += mul(s6_5, M4(-1.365e-02, -6.977e-02, -1.378e-01, -1.183e-01, -1.450e-01, 4.283e-02, 3.894e-02, 9.293e-04, -9.859e-03, 1.991e-03, 1.413e-01, -1.364e-01, -2.911e-02, -1.346e-01, 2.264e-01, -7.699e-02)); + r += mul(s6_6, M4(1.082e-01, -3.785e-02, 1.042e-01, 7.449e-02, -4.076e-02, 1.806e-01, -4.700e-02, -3.203e-02, 3.828e-02, -1.632e-01, 6.974e-02, 8.389e-02, 2.752e-02, 7.529e-02, -2.757e-02, 9.510e-03)); + r += mul(s6_7, M4(1.642e-02, -3.839e-02, -1.549e-01, 1.151e-01, 3.545e-03, -1.838e-01, 6.595e-02, -9.679e-03, -2.515e-02, -1.950e-01, 7.938e-02, 7.677e-03, 2.181e-02, 1.664e-01, -3.621e-03, 3.611e-02)); + r += mul(s6_8, M4(2.674e-02, -2.492e-02, -5.711e-02, 9.085e-02, 1.396e-01, 2.214e-02, -6.456e-02, 4.379e-02, 2.190e-01, 1.475e-01, -9.394e-02, 8.779e-02, 6.055e-03, -1.129e-01, 3.130e-02, -1.999e-02)); + r += mul(s7_0, M4(2.205e-02, -2.363e-02, 1.441e-01, 5.374e-02, 7.935e-02, -1.424e-02, -2.127e-01, 1.159e-01, -4.861e-02, 1.047e-01, -9.611e-02, -5.270e-02, -4.079e-01, 6.181e-02, -5.217e-02, 1.953e-01)); + r += mul(s7_1, M4(-1.238e-01, 7.560e-02, 1.279e-01, 4.693e-02, 5.662e-02, -1.319e-01, 3.728e-02, 1.490e-02, -4.409e-02, -1.999e-02, 2.201e-01, -6.893e-04, 3.261e-01, -1.014e-01, 1.694e-01, -4.550e-02)); + r += mul(s7_2, M4(1.303e-01, 1.327e-01, 2.736e-02, -4.189e-03, 8.621e-03, -1.371e-01, 2.693e-03, 2.422e-02, -3.366e-01, 2.291e-02, -7.167e-02, -6.592e-02, -1.545e-01, 2.992e-01, -1.505e-01, 2.432e-01)); + r += mul(s7_3, M4(-1.066e-01, 4.974e-02, 1.635e-02, 1.384e-01, -2.753e-01, 2.443e-02, 1.366e-01, 7.437e-02, -1.866e-02, -2.068e-01, 4.871e-02, -9.708e-05, 1.050e-01, -1.108e-01, 2.510e-01, -7.779e-02)); + r += mul(s7_4, M4(9.602e-03, 1.465e-01, 2.812e-01, 1.650e-01, 1.805e-01, 9.178e-02, -1.847e-01, 5.806e-02, 8.891e-02, -2.861e-01, -9.379e-02, -2.006e-03, 2.273e-02, -1.977e-01, -2.671e-01, -1.935e-01)); + r += mul(s7_5, M4(-9.270e-03, -5.858e-03, 8.844e-02, 1.220e-01, -1.117e-01, -3.462e-02, -1.914e-01, -9.941e-02, 4.355e-02, 3.478e-02, 2.279e-02, -8.352e-03, 9.003e-02, -1.728e-01, 1.180e-01, 4.944e-02)); + r += mul(s7_6, M4(-2.112e-02, 1.466e-01, -3.346e-02, -5.734e-03, 4.897e-02, -1.847e-01, -1.042e-01, 3.695e-02, 9.723e-02, 9.810e-03, 3.768e-02, 5.925e-02, -2.518e-02, 9.795e-02, -1.788e-01, 1.317e-01)); + r += mul(s7_7, M4(-5.512e-02, -1.030e-01, -1.277e-01, 2.627e-03, 9.803e-03, -1.003e-01, 1.075e-01, -8.339e-02, 4.143e-02, 1.451e-01, 6.543e-02, 2.479e-02, 1.328e-01, 2.868e-01, 5.144e-02, -1.236e-01)); + r += mul(s7_8, M4(-1.184e-01, -4.673e-02, 7.195e-02, -4.310e-02, 6.142e-02, 9.421e-02, 1.790e-02, -4.130e-02, -4.211e-02, 1.720e-01, -6.732e-02, 1.266e-01, -5.543e-01, -4.747e-02, -1.062e-01, -2.950e-02)); + r += V4(-1.614e-02, 2.521e-02, 3.821e-02, 2.764e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-8.326e-02, -5.179e-02, -1.014e-01, -1.485e-02, 3.131e-02, -1.124e-01, 1.421e-01, 7.704e-02, 1.518e-01, 1.619e-01, -1.141e-01, -4.209e-02, 1.536e-01, -4.357e-02, 1.472e-01, -1.684e-01)); + r += mul(s0_1, M4(1.771e-02, 4.705e-02, -2.494e-02, 1.011e-01, -8.396e-02, -1.821e-02, 3.958e-02, -3.671e-02, 2.742e-02, 2.128e-01, 5.280e-02, -9.654e-02, 1.749e-01, -3.305e-02, -8.326e-02, -2.381e-02)); + r += mul(s0_2, M4(1.627e-02, -1.545e-02, 3.017e-02, -9.665e-02, 5.771e-02, 4.235e-02, 1.274e-01, -1.735e-02, 1.687e-02, 4.105e-02, -9.621e-02, -7.352e-02, 1.323e-01, 1.548e-01, -3.420e-01, -1.192e-01)); + r += mul(s0_3, M4(-6.757e-02, -1.626e-01, -9.287e-03, 2.987e-02, 5.314e-02, 9.609e-02, 2.799e-02, 2.372e-02, 8.674e-02, -8.658e-02, 4.220e-02, 1.121e-02, 1.641e-01, 7.780e-02, -1.461e-01, 4.765e-02)); + r += mul(s0_4, M4(1.400e-02, 2.520e-02, 1.801e-01, -1.146e-02, 2.241e-02, 9.752e-03, 6.976e-02, 1.272e-02, -1.368e-03, -1.537e-01, 2.347e-01, -8.087e-02, -1.757e-02, -1.548e-01, -3.905e-01, 1.874e-01)); + r += mul(s0_5, M4(-6.312e-03, 7.002e-02, 1.704e-01, -9.698e-02, 2.582e-02, 9.538e-02, -1.104e-01, 6.735e-02, 1.160e-01, -1.162e-01, 1.503e-01, 1.101e-01, 1.290e-01, -1.252e-01, 2.056e-01, 1.420e-01)); + r += mul(s0_6, M4(-9.448e-02, -6.469e-02, 2.558e-02, -6.533e-02, -4.299e-02, -6.361e-02, -6.671e-02, 1.207e-01, -3.438e-02, -4.458e-02, 3.696e-02, -9.743e-03, 7.698e-02, 1.019e-01, 1.135e-01, -1.964e-01)); + r += mul(s0_7, M4(-1.068e-01, 1.702e-01, 1.021e-01, 5.547e-02, -2.979e-02, 1.792e-01, 6.735e-02, -9.586e-02, 3.221e-02, -2.029e-02, 2.762e-02, 1.472e-01, 1.998e-01, 1.276e-01, -3.826e-01, -1.524e-01)); + r += mul(s0_8, M4(7.166e-02, -1.939e-02, -4.533e-02, 5.115e-02, 3.885e-02, 3.863e-02, 5.534e-02, 6.745e-02, -1.177e-01, 5.674e-02, -1.116e-01, 1.091e-02, 7.271e-02, 1.971e-01, 5.825e-02, 2.648e-02)); + r += mul(s1_0, M4(-2.985e-02, -6.977e-03, 1.502e-02, -4.237e-02, -6.150e-02, -8.724e-02, 9.823e-02, -7.877e-02, -1.148e-01, -2.574e-02, -7.462e-02, 3.344e-03, -6.090e-02, 5.604e-02, 8.273e-03, -1.186e-02)); + r += mul(s1_1, M4(-6.173e-02, 1.370e-01, -6.726e-02, -3.736e-02, -1.585e-01, 9.732e-04, 3.423e-02, -2.137e-01, -1.070e-01, -6.263e-02, 1.619e-01, -1.274e-01, 9.716e-02, -8.335e-02, 2.113e-01, -1.214e-01)); + r += mul(s1_2, M4(-6.260e-02, -5.981e-02, -8.393e-02, 8.858e-02, -8.536e-02, 2.466e-02, -7.342e-02, -1.135e-01, -8.817e-02, -6.417e-02, 2.687e-01, -9.302e-03, 4.948e-02, -2.724e-02, 3.542e-02, -1.847e-01)); + r += mul(s1_3, M4(-1.947e-01, -5.129e-02, -5.875e-02, -1.294e-01, 7.456e-02, -3.294e-02, -1.164e-01, 3.147e-02, 1.105e-01, -7.800e-03, -1.341e-01, 5.646e-02, 2.305e-02, 6.816e-02, -1.050e-01, -1.981e-02)); + r += mul(s1_4, M4(-1.374e-01, 1.100e-01, -1.970e-01, -2.040e-01, 1.153e-01, -8.221e-03, 1.182e-02, 7.866e-02, -4.267e-02, -2.319e-02, 1.139e-01, 5.023e-02, 1.279e-02, -2.280e-01, -2.890e-02, 5.584e-02)); + r += mul(s1_5, M4(5.544e-03, -9.470e-02, 1.354e-02, 7.002e-02, -7.227e-02, 5.340e-04, 1.252e-01, -3.061e-02, -2.357e-02, -3.469e-04, -3.324e-02, 1.115e-01, 1.028e-01, 4.810e-02, -1.727e-02, -3.001e-02)); + r += mul(s1_6, M4(4.899e-02, 1.318e-01, 1.553e-01, -2.315e-02, -4.646e-03, 1.413e-01, 1.029e-01, -2.475e-02, -1.527e-02, 9.400e-02, 1.119e-03, -2.839e-02, -4.058e-02, -6.777e-02, -6.896e-02, 7.025e-04)); + r += mul(s1_7, M4(1.038e-02, 7.905e-02, 2.851e-02, -1.023e-01, -2.227e-02, 1.609e-01, -1.733e-01, -1.304e-01, 2.193e-01, 3.562e-02, -5.497e-02, -2.394e-02, -7.459e-03, -1.197e-01, -2.663e-02, -1.888e-02)); + r += mul(s1_8, M4(6.417e-02, 1.725e-01, 1.523e-01, 2.154e-04, 1.190e-01, -1.166e-01, -7.423e-02, 7.596e-02, 1.757e-02, 1.179e-01, -6.535e-02, -3.646e-02, -1.772e-01, 5.586e-03, -8.030e-02, -2.306e-03)); + r += mul(s2_0, M4(-9.393e-02, 1.287e-02, -1.328e-01, -6.069e-02, -7.914e-03, -6.174e-03, -2.700e-02, -2.981e-02, -4.530e-03, 1.140e-01, -1.473e-01, 5.666e-03, -3.409e-02, 2.801e-02, -1.334e-02, 2.986e-02)); + r += mul(s2_1, M4(2.549e-02, 2.743e-02, -2.851e-02, -2.756e-02, 9.961e-02, -3.039e-01, 8.524e-02, -2.354e-01, 3.423e-02, -2.416e-01, -3.891e-02, -2.150e-02, 2.046e-01, -8.982e-02, -1.130e-01, -4.199e-02)); + r += mul(s2_2, M4(-2.586e-02, -4.030e-02, 3.935e-02, -7.586e-02, 4.642e-02, 9.049e-02, 2.109e-01, 6.298e-02, 4.561e-02, 1.509e-01, -1.819e-02, 1.339e-05, 2.124e-01, -5.311e-02, 7.544e-02, -5.366e-02)); + r += mul(s2_3, M4(-1.209e-01, -1.920e-02, 9.824e-02, -6.932e-02, -1.006e-01, 7.977e-02, -1.363e-01, -1.108e-01, 1.081e-01, -9.327e-02, 1.160e-01, -1.157e-01, -1.970e-01, 4.551e-02, -1.152e-01, 8.449e-02)); + r += mul(s2_4, M4(1.356e-01, -8.713e-02, -1.004e-01, 1.772e-01, -9.327e-02, 2.204e-03, 6.566e-02, 1.488e-01, 2.008e-01, -2.937e-04, 1.013e-03, -1.063e-02, -8.310e-02, 9.119e-02, -1.981e-01, 2.115e-01)); + r += mul(s2_5, M4(-2.908e-02, 5.297e-02, 1.070e-02, -7.295e-02, 4.276e-02, 7.299e-02, -1.499e-01, -6.412e-02, 3.087e-03, -1.347e-01, -3.191e-02, 5.867e-02, -5.096e-02, 1.994e-01, -9.654e-02, -6.233e-02)); + r += mul(s2_6, M4(-7.457e-02, 1.169e-02, 6.041e-02, 2.287e-02, 3.144e-02, 9.443e-02, 8.522e-02, 2.124e-01, 1.692e-02, 8.102e-02, 5.781e-02, -7.383e-02, 7.701e-03, -1.401e-01, 6.164e-02, 4.776e-02)); + r += mul(s2_7, M4(2.287e-02, 7.160e-02, 5.061e-02, 2.345e-01, 3.142e-04, 3.323e-02, 4.118e-02, 1.516e-01, -1.404e-01, -5.534e-02, 8.570e-02, -1.280e-01, -1.049e-01, 6.664e-02, -8.468e-03, -4.457e-02)); + r += mul(s2_8, M4(3.520e-02, -6.025e-02, 8.219e-03, -1.640e-02, 3.324e-02, -8.053e-02, 4.475e-02, -5.586e-02, -7.904e-02, 1.436e-01, -1.856e-01, 1.145e-01, -1.769e-02, -4.285e-02, 1.588e-01, 1.335e-02)); + r += mul(s3_0, M4(4.944e-02, -1.403e-01, -2.631e-02, 1.373e-02, -5.896e-02, -2.219e-02, 2.393e-02, 1.464e-01, -1.430e-01, -1.396e-01, 9.449e-02, -2.762e-03, 2.572e-02, 1.400e-01, -2.872e-02, -6.719e-02)); + r += mul(s3_1, M4(-2.288e-01, -6.178e-02, -7.404e-03, 2.681e-01, -2.969e-02, 7.872e-02, -2.298e-01, 1.912e-02, 7.017e-02, -1.880e-01, -2.289e-02, 1.225e-01, 3.652e-03, -2.404e-02, -1.499e-02, -1.863e-02)); + r += mul(s3_2, M4(1.464e-01, 1.823e-02, 3.211e-02, -9.460e-02, 5.059e-02, 3.725e-02, 9.253e-02, 1.757e-01, -1.211e-02, -8.096e-02, 2.255e-01, -8.469e-02, -1.871e-02, 3.408e-02, -4.358e-02, 9.228e-02)); + r += mul(s3_3, M4(5.826e-02, -1.529e-01, 2.406e-01, -4.118e-02, 9.928e-03, -2.752e-02, -7.460e-02, -1.104e-01, 2.616e-02, -4.191e-03, 1.889e-01, 2.877e-02, -2.461e-02, 1.909e-01, -1.758e-02, 8.529e-02)); + r += mul(s3_4, M4(1.911e-01, -1.963e-01, 7.031e-02, -2.712e-02, -6.693e-02, -1.253e-01, -7.022e-02, 1.076e-01, 7.105e-02, -5.231e-02, -1.812e-02, -2.366e-01, -5.431e-02, -3.507e-02, 1.831e-01, 1.688e-01)); + r += mul(s3_5, M4(1.715e-01, 3.983e-02, 3.387e-02, -7.281e-02, 8.577e-02, 4.920e-02, -4.497e-02, 3.540e-02, -1.968e-01, 1.319e-02, -5.080e-03, 1.160e-01, -1.669e-02, 9.543e-02, 8.210e-02, 3.069e-02)); + r += mul(s3_6, M4(1.154e-01, 3.190e-01, -1.976e-01, -5.784e-02, 2.538e-04, -1.695e-02, 6.684e-03, -4.450e-02, -1.390e-01, -2.080e-02, 7.287e-03, 2.422e-02, -5.000e-02, 6.889e-03, 1.045e-02, -9.254e-02)); + r += mul(s3_7, M4(2.238e-01, 2.884e-01, -3.807e-02, -5.091e-02, 6.215e-02, 3.840e-02, 4.476e-02, -6.182e-02, -1.363e-01, -3.109e-02, 5.029e-02, -1.368e-01, -8.338e-02, 8.136e-02, -9.682e-02, 2.684e-02)); + r += mul(s3_8, M4(3.755e-03, 6.056e-02, 2.195e-01, -1.970e-01, -1.904e-02, 5.641e-02, 2.645e-02, -1.016e-01, -1.372e-01, 6.136e-02, -6.334e-02, 1.308e-02, 3.897e-03, 9.121e-02, -4.263e-02, -3.809e-02)); + r += mul(s4_0, M4(-3.172e-03, 9.071e-02, -7.858e-02, 3.536e-02, -2.662e-02, -3.669e-02, 1.433e-01, 1.548e-01, -1.451e-02, -5.087e-02, -9.387e-02, -2.118e-02, -8.711e-02, -1.018e-01, -5.008e-02, 4.548e-02)); + r += mul(s4_1, M4(-1.240e-01, -8.119e-02, -5.809e-02, 8.099e-02, 5.574e-02, 1.394e-01, -1.312e-02, 3.641e-02, -1.113e-01, 1.367e-02, 1.004e-01, 9.562e-02, 3.457e-01, 1.205e-01, -3.849e-02, -1.098e-01)); + r += mul(s4_2, M4(8.104e-02, -4.261e-03, 1.763e-02, 9.447e-02, -1.450e-01, -1.122e-01, 8.809e-03, 5.113e-02, -1.808e-02, 1.881e-02, -5.663e-02, 1.502e-01, 6.830e-03, 2.102e-01, -1.834e-01, -2.410e-01)); + r += mul(s4_3, M4(-4.601e-02, 4.841e-02, 6.165e-02, 1.204e-01, 9.738e-03, -6.767e-02, -1.472e-01, -5.010e-02, 9.939e-02, 1.177e-02, 9.922e-02, 8.187e-02, -2.389e-01, 1.037e-02, -1.685e-01, -1.365e-01)); + r += mul(s4_4, M4(-1.151e-01, 8.682e-02, 1.895e-01, -7.463e-02, 2.587e-01, -1.716e-01, -1.493e-02, 2.002e-02, -4.644e-02, 1.521e-02, -1.502e-01, 4.996e-02, 6.616e-02, 2.235e-01, 1.984e-01, -5.131e-01)); + r += mul(s4_5, M4(1.073e-01, 2.072e-02, -1.991e-02, 1.435e-02, 1.780e-02, -1.657e-01, -1.046e-01, 1.713e-01, 2.489e-02, 1.301e-01, 1.917e-01, -1.351e-02, 1.654e-01, 3.337e-01, 1.812e-01, 3.168e-01)); + r += mul(s4_6, M4(8.512e-02, -3.333e-02, 9.591e-02, -5.042e-02, -3.283e-02, -3.229e-02, -8.062e-02, 1.006e-01, 2.752e-02, 7.964e-02, 1.035e-02, -1.490e-01, -3.342e-01, 2.648e-01, 6.379e-01, -3.260e-01)); + r += mul(s4_7, M4(-9.161e-02, 1.714e-02, 1.680e-01, 4.406e-02, -1.144e-01, 8.663e-02, -9.845e-02, -2.556e-02, 9.220e-02, 1.626e-01, 1.158e-01, 4.907e-02, 6.413e-01, 1.818e-02, -1.944e-01, -1.364e-01)); + r += mul(s4_8, M4(1.471e-01, -1.823e-01, -4.626e-02, 1.105e-02, -8.338e-02, 2.499e-02, -9.495e-02, 1.042e-02, 1.812e-01, -1.420e-01, 1.377e-01, -2.406e-02, 1.597e-01, -3.467e-02, 2.917e-01, 4.666e-01)); + r += mul(s5_0, M4(4.290e-02, -1.051e-01, -1.730e-01, -4.856e-02, 7.349e-02, -1.305e-01, -1.831e-01, -7.525e-02, -1.404e-01, -1.015e-01, 4.582e-02, -5.793e-02, 4.695e-02, -2.183e-01, -4.480e-02, -1.574e-02)); + r += mul(s5_1, M4(-7.326e-02, -1.802e-02, 1.807e-01, 9.089e-02, 4.307e-02, -8.857e-02, -7.099e-03, -1.575e-01, 2.923e-03, 2.159e-02, -6.711e-02, 7.366e-02, 5.115e-02, -1.337e-02, 5.908e-02, 2.605e-02)); + r += mul(s5_2, M4(1.969e-02, -1.552e-02, 1.921e-02, 5.537e-02, -1.210e-02, 7.316e-02, -1.328e-01, 2.061e-01, -1.096e-01, -6.762e-03, -1.443e-01, -1.107e-01, -2.937e-02, -7.886e-02, -3.149e-02, 1.044e-02)); + r += mul(s5_3, M4(-2.950e-02, 1.404e-02, -5.290e-02, 5.214e-02, -1.184e-01, 5.320e-03, 1.431e-01, -1.879e-02, 6.318e-02, 2.520e-02, 1.179e-01, 1.001e-01, -6.463e-02, 6.231e-02, 9.036e-02, 5.547e-02)); + r += mul(s5_4, M4(-5.697e-02, -5.924e-02, -8.632e-02, -1.336e-01, 8.515e-02, 5.283e-02, -4.273e-02, 3.461e-02, -2.676e-02, 3.365e-03, -1.777e-01, 2.445e-03, -2.637e-03, -7.010e-02, 1.057e-01, -3.378e-02)); + r += mul(s5_5, M4(6.635e-02, 1.693e-01, 1.918e-02, -1.284e-01, -7.682e-02, -1.273e-02, 1.417e-01, 9.456e-02, 5.123e-02, -4.772e-02, -1.610e-01, 1.006e-01, -3.030e-02, -1.897e-02, 1.088e-02, -1.585e-02)); + r += mul(s5_6, M4(-1.387e-02, 9.614e-02, 1.759e-01, -3.788e-02, 4.005e-03, -9.632e-02, -4.344e-02, 4.098e-02, -8.902e-02, 3.125e-03, 1.147e-01, -1.063e-01, 6.886e-02, 1.318e-01, -2.154e-02, -4.268e-02)); + r += mul(s5_7, M4(4.173e-02, -6.529e-02, -1.632e-01, 3.183e-03, 4.653e-02, 1.396e-01, 1.584e-01, -7.254e-02, -5.903e-02, -6.068e-02, 2.455e-01, -2.021e-02, -1.261e-02, 9.884e-02, -6.960e-02, -6.464e-02)); + r += mul(s5_8, M4(1.284e-01, -6.525e-02, -1.950e-02, -1.040e-01, -1.568e-02, 2.538e-02, 9.358e-02, 1.045e-01, -4.221e-02, 1.556e-02, 6.616e-02, 1.142e-02, 1.454e-02, 1.488e-01, -1.081e-01, -1.477e-02)); + r += mul(s6_0, M4(-6.093e-02, 2.248e-03, -8.130e-02, -1.425e-02, 8.215e-02, -5.382e-02, -1.047e-01, 3.129e-02, -1.344e-01, 6.608e-02, -1.014e-01, 7.938e-02, 1.366e-01, 8.760e-02, 3.761e-02, -1.142e-01)); + r += mul(s6_1, M4(5.009e-02, 4.462e-02, 2.240e-02, -1.194e-01, 1.243e-02, -3.372e-02, -5.153e-02, -8.741e-02, 1.277e-01, 1.411e-01, 1.695e-01, 5.858e-03, -1.322e-01, -5.106e-02, 1.162e-02, 1.106e-01)); + r += mul(s6_2, M4(5.891e-02, -5.044e-02, -1.767e-01, -1.131e-01, 2.995e-02, 7.030e-02, 8.358e-02, 1.021e-01, -7.201e-02, -1.833e-01, 1.101e-01, 1.205e-01, 4.529e-02, -5.023e-02, 3.671e-02, -1.023e-01)); + r += mul(s6_3, M4(-7.851e-02, 8.958e-02, -9.316e-02, 8.568e-02, 8.191e-02, -4.034e-02, 4.920e-02, 1.540e-02, 8.760e-02, -1.744e-01, 3.072e-02, -3.126e-02, 1.852e-02, 1.684e-01, 1.118e-02, 7.336e-02)); + r += mul(s6_4, M4(5.848e-02, -4.185e-02, -2.660e-01, 3.162e-02, -1.262e-01, -6.983e-02, -2.314e-02, -1.236e-01, 1.154e-01, -1.043e-01, 1.932e-01, -1.070e-03, -1.083e-02, -1.873e-02, -1.542e-02, 1.558e-01)); + r += mul(s6_5, M4(9.919e-02, -5.183e-02, 1.712e-01, 7.021e-02, 6.103e-02, -5.641e-02, -1.925e-02, 5.430e-02, 2.400e-02, -3.171e-02, -1.422e-01, 9.672e-02, -1.302e-01, 8.982e-02, -5.216e-02, 6.914e-02)); + r += mul(s6_6, M4(-2.529e-02, -4.269e-02, 3.431e-02, -7.789e-02, -3.025e-02, -6.084e-02, -7.403e-03, 2.723e-02, -2.770e-03, 6.116e-03, -4.762e-02, -8.540e-02, -7.102e-02, -2.686e-01, 5.625e-02, -5.279e-02)); + r += mul(s6_7, M4(5.902e-02, 1.260e-01, 9.028e-03, 1.185e-02, 1.374e-01, -1.108e-01, 2.352e-02, 1.627e-01, 2.036e-02, 1.147e-01, 1.065e-02, -3.920e-02, -5.666e-02, -6.423e-02, -6.060e-02, -6.509e-02)); + r += mul(s6_8, M4(1.495e-01, 5.329e-02, -6.012e-02, 1.328e-01, 6.940e-03, 6.332e-02, -7.290e-02, 3.060e-02, -7.503e-02, 4.437e-03, -2.798e-02, 3.905e-02, 4.219e-02, 3.876e-02, -5.012e-02, -1.359e-02)); + r += mul(s7_0, M4(-1.164e-01, 1.792e-01, 4.010e-02, -3.066e-02, 2.112e-02, 8.116e-02, -1.087e-01, 8.802e-02, -3.026e-02, 3.427e-02, -7.300e-02, -8.114e-02, 2.046e-01, 9.216e-02, -1.241e-01, -2.265e-01)); + r += mul(s7_1, M4(-1.077e-02, 8.379e-02, 1.208e-01, 1.856e-01, 1.915e-01, 5.306e-02, -8.455e-02, -1.074e-01, 3.913e-02, 2.445e-02, 1.463e-01, -7.386e-02, -1.439e-01, -2.010e-01, 3.727e-02, 8.294e-02)); + r += mul(s7_2, M4(8.590e-02, -6.305e-02, 1.390e-01, 1.522e-01, -1.772e-01, -3.036e-03, -2.138e-01, -1.135e-02, 3.467e-03, 6.760e-02, -2.061e-01, 4.310e-02, 3.918e-02, 3.694e-03, 2.378e-02, 8.366e-02)); + r += mul(s7_3, M4(-1.140e-01, 3.584e-02, 7.992e-02, 1.224e-01, -7.709e-02, -1.140e-01, 1.688e-01, 6.161e-02, -8.701e-02, 7.456e-02, 2.540e-02, -1.134e-01, -1.200e-01, 2.231e-01, 2.131e-01, -2.430e-01)); + r += mul(s7_4, M4(8.158e-02, -4.486e-02, -4.271e-02, -1.219e-02, -1.679e-01, -3.975e-01, -2.590e-01, -1.145e-02, 1.170e-02, 4.893e-02, 2.747e-01, -2.922e-02, -3.748e-01, -5.284e-02, -6.393e-02, -3.812e-02)); + r += mul(s7_5, M4(1.268e-02, -1.999e-01, -2.030e-01, -7.797e-02, -1.315e-01, -1.759e-01, -6.633e-02, 6.865e-02, 3.407e-02, 5.684e-02, 1.380e-01, 9.850e-02, -2.613e-01, 5.287e-02, -6.630e-02, -2.530e-01)); + r += mul(s7_6, M4(-3.978e-02, 2.952e-02, 1.307e-02, 1.344e-01, -1.538e-01, 3.273e-02, 1.265e-01, -1.160e-01, 1.004e-01, -6.073e-02, -3.581e-02, 1.277e-01, 6.928e-02, 1.397e-02, -2.060e-01, -3.559e-02)); + r += mul(s7_7, M4(2.507e-02, -8.799e-02, 1.324e-01, 1.161e-01, -1.029e-02, -1.028e-01, -1.763e-01, 7.636e-02, -5.917e-03, -8.763e-02, -3.848e-02, -3.817e-02, -1.545e-01, -9.856e-02, 1.606e-01, -2.051e-02)); + r += mul(s7_8, M4(-6.716e-02, 9.246e-02, -6.366e-02, 5.885e-02, -6.213e-03, 6.166e-02, -4.931e-03, 1.102e-02, 3.940e-02, 6.723e-02, -2.166e-02, 1.470e-02, 1.415e-01, 8.718e-02, -1.368e-01, 7.478e-03)); + r += V4(1.274e-02, -1.970e-05, 2.618e-02, -7.558e-02); + return r; +} + +void Pass6(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 7 +//!DESC conv6 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-6.420e-02, -1.408e-02, 6.421e-02, -1.120e-01, 6.585e-02, 1.732e-01, -7.802e-02, 1.447e-01, -3.318e-02, -1.495e-01, 1.499e-01, -1.212e-01, -5.066e-03, 1.468e-01, -4.072e-02, 5.189e-02)); + r += mul(s0_1, M4(-1.060e-02, 3.925e-02, 6.566e-02, -7.563e-03, -7.597e-02, 1.650e-01, 1.341e-02, -2.018e-01, 1.312e-01, -8.820e-02, -1.806e-01, -2.112e-01, 4.851e-02, -1.760e-02, 5.317e-02, 9.209e-02)); + r += mul(s0_2, M4(1.814e-02, 1.049e-02, 9.710e-02, -3.510e-02, -6.424e-04, 1.102e-01, 6.702e-02, 7.945e-03, -1.158e-01, 1.386e-01, 1.870e-01, -4.197e-02, 1.954e-03, 7.398e-02, -6.745e-02, -2.169e-02)); + r += mul(s0_3, M4(-5.532e-02, -9.536e-03, -8.015e-02, 1.653e-01, -7.766e-02, 4.322e-02, -1.833e-02, -1.230e-01, 5.629e-02, 4.360e-02, -1.794e-01, 8.885e-02, -2.327e-02, 1.834e-02, 3.619e-02, 1.028e-01)); + r += mul(s0_4, M4(7.088e-02, 6.654e-04, -1.787e-01, 2.024e-02, 2.222e-01, 2.436e-01, 8.089e-02, 2.634e-02, 6.307e-02, 8.557e-02, -1.021e-01, 2.179e-01, -2.483e-02, 1.462e-01, -5.920e-02, -1.461e-01)); + r += mul(s0_5, M4(1.415e-02, -6.086e-02, 1.375e-01, -8.521e-02, -1.530e-01, -3.861e-02, 3.039e-02, -9.823e-02, 4.396e-02, 1.644e-02, -6.289e-02, -1.010e-01, -4.978e-02, -1.729e-01, 3.676e-02, 1.843e-02)); + r += mul(s0_6, M4(7.050e-02, -3.634e-02, 9.230e-03, 1.214e-01, 2.396e-01, 2.339e-02, 1.581e-02, 3.219e-02, -2.194e-01, 1.217e-01, 1.276e-01, 7.059e-02, -1.698e-02, 1.083e-02, -5.574e-02, 8.680e-03)); + r += mul(s0_7, M4(7.696e-02, 3.400e-02, -4.987e-02, -2.888e-02, -2.122e-01, -9.791e-02, -9.778e-02, -1.035e-01, -1.108e-01, 2.658e-02, 2.048e-01, 5.571e-02, 1.599e-01, -2.214e-01, 6.763e-02, 5.798e-02)); + r += mul(s0_8, M4(6.366e-02, -5.842e-03, 6.962e-02, 2.804e-02, 1.311e-01, 2.959e-01, -7.168e-02, -9.768e-03, -4.726e-02, -1.068e-01, 2.900e-02, 8.545e-02, -8.765e-03, -1.213e-01, -2.983e-02, 1.298e-02)); + r += mul(s1_0, M4(-1.797e-01, 1.253e-01, 2.317e-02, 1.295e-01, 3.940e-04, -3.863e-02, -2.826e-02, -6.212e-02, 2.816e-02, -1.279e-01, 3.808e-02, 7.194e-02, -1.989e-01, 2.267e-02, 5.064e-02, -4.622e-02)); + r += mul(s1_1, M4(-1.049e-01, -2.455e-02, 1.040e-01, -7.724e-02, 1.184e-01, 8.219e-02, -1.300e-01, 5.256e-02, 1.790e-01, 2.355e-02, -1.684e-01, -3.476e-02, 8.151e-02, -1.007e-01, 2.998e-02, 2.710e-02)); + r += mul(s1_2, M4(1.268e-01, 3.741e-01, -3.234e-02, 8.925e-03, -7.476e-03, -3.866e-02, 2.604e-02, 2.359e-02, 5.237e-02, -1.980e-02, 4.127e-02, 4.160e-03, 7.368e-02, 9.397e-02, 4.852e-02, 3.071e-02)); + r += mul(s1_3, M4(8.868e-02, 9.922e-02, -2.954e-02, 5.330e-02, -1.325e-01, -1.474e-01, -8.702e-03, -3.887e-02, 4.102e-02, 9.639e-02, 7.465e-02, -1.872e-01, 3.933e-02, 7.587e-02, -1.168e-01, 2.842e-02)); + r += mul(s1_4, M4(1.347e-01, 1.460e-01, 1.790e-01, -3.097e-02, 1.784e-01, 1.079e-01, 1.010e-01, 2.803e-02, 1.493e-01, -1.321e-01, -5.476e-02, 1.066e-01, 1.362e-01, 8.967e-02, 1.136e-01, 7.082e-02)); + r += mul(s1_5, M4(-8.171e-02, -1.510e-01, -2.092e-02, 7.351e-02, -3.666e-02, 9.393e-02, -2.663e-02, 1.266e-01, 8.663e-04, -2.387e-02, 3.578e-02, -1.449e-01, -8.520e-02, 2.782e-01, -1.312e-01, -2.170e-01)); + r += mul(s1_6, M4(2.298e-01, 2.012e-01, -7.026e-02, 1.761e-01, 8.041e-02, -1.424e-01, 1.218e-01, -1.337e-01, -7.734e-02, 4.400e-02, 8.352e-02, -3.196e-03, -1.645e-01, 1.180e-01, 1.074e-01, -1.028e-01)); + r += mul(s1_7, M4(-2.012e-02, 1.786e-01, -3.740e-02, -1.502e-01, -2.025e-02, 8.334e-02, -1.648e-01, -4.983e-02, -6.138e-02, -1.115e-01, -4.707e-03, 2.583e-02, 7.925e-02, -5.074e-02, -9.400e-03, 9.414e-02)); + r += mul(s1_8, M4(-3.650e-02, -1.678e-01, -3.133e-02, -1.101e-01, 1.610e-02, -4.766e-02, 1.251e-02, 7.809e-02, 5.504e-03, 1.289e-01, -2.330e-02, 5.268e-02, 1.673e-02, -3.482e-01, 2.402e-02, 2.704e-01)); + r += mul(s2_0, M4(-7.827e-02, -3.079e-02, 1.018e-02, 2.897e-03, -3.808e-02, -9.789e-02, 1.090e-02, 1.135e-01, 3.019e-01, 2.101e-01, -1.533e-01, -1.324e-01, -3.141e-02, 9.272e-02, 4.798e-02, -3.225e-02)); + r += mul(s2_1, M4(-5.913e-02, 4.003e-02, -2.838e-03, -1.787e-02, -1.815e-02, -1.663e-02, -1.608e-02, -5.393e-02, 6.822e-02, 4.517e-02, -5.463e-02, 6.754e-02, -5.816e-02, 4.339e-02, 1.504e-01, 4.613e-02)); + r += mul(s2_2, M4(-5.325e-03, -1.351e-01, 4.729e-02, -3.259e-02, 4.487e-02, -1.025e-01, 4.505e-02, 1.417e-01, -1.208e-01, 9.062e-02, 9.642e-02, -4.068e-02, -6.048e-02, 4.366e-02, 2.457e-01, 4.117e-02)); + r += mul(s2_3, M4(-7.820e-02, 1.105e-04, 2.462e-03, -2.818e-02, -4.798e-02, 9.599e-02, 3.060e-02, 1.989e-02, -3.079e-02, 4.186e-02, -9.482e-02, 1.349e-02, -1.401e-01, 2.226e-02, 8.061e-02, -1.628e-02)); + r += mul(s2_4, M4(5.090e-02, 9.183e-02, 2.343e-02, -1.108e-01, 7.770e-02, -9.551e-03, -7.795e-03, 7.014e-02, 3.370e-04, 2.133e-01, -1.324e-01, 1.027e-01, 1.202e-01, -2.202e-01, 2.004e-01, -3.432e-03)); + r += mul(s2_5, M4(9.979e-02, -2.255e-01, 3.197e-02, 2.156e-02, 3.894e-02, 5.951e-02, 3.748e-02, -5.444e-02, -2.226e-02, -3.578e-02, -7.463e-02, 6.022e-02, 3.890e-02, 2.723e-01, 1.939e-01, 3.002e-02)); + r += mul(s2_6, M4(9.141e-02, -7.810e-02, -4.206e-02, -4.264e-03, -2.025e-02, 1.348e-01, 6.310e-02, -1.474e-02, 3.425e-02, -3.399e-01, 5.481e-03, -1.614e-02, -2.261e-02, 3.572e-02, -6.834e-02, -1.787e-02)); + r += mul(s2_7, M4(-3.352e-02, 1.370e-01, -2.628e-03, -8.065e-02, -8.944e-03, -8.675e-03, -9.380e-02, 6.008e-03, 5.125e-02, 8.410e-02, -5.763e-02, -3.422e-02, -6.367e-03, -6.574e-02, -9.948e-03, 1.323e-01)); + r += mul(s2_8, M4(-5.706e-02, -1.304e-01, -6.616e-02, -1.274e-01, -1.153e-02, -1.034e-01, 1.546e-02, -3.198e-02, 7.165e-02, 4.668e-02, -5.618e-02, -4.781e-02, 8.207e-02, 8.158e-02, 4.356e-02, 1.783e-01)); + r += mul(s3_0, M4(4.395e-02, 9.069e-02, -1.479e-01, -1.507e-02, -6.110e-02, 1.443e-02, -3.081e-02, 1.250e-01, -5.002e-02, 1.119e-01, -3.709e-02, -9.127e-02, -5.768e-02, -7.338e-02, 2.705e-02, -1.133e-01)); + r += mul(s3_1, M4(-8.219e-02, 1.077e-01, 1.075e-01, 6.657e-02, 1.384e-03, 6.907e-02, -2.241e-02, -2.334e-02, -1.096e-01, 6.372e-02, 4.354e-02, 7.508e-02, 2.263e-01, 4.511e-03, -1.133e-01, -2.272e-02)); + r += mul(s3_2, M4(9.062e-02, -1.120e-03, -4.467e-02, 7.195e-02, -2.795e-01, 2.950e-01, 7.506e-02, 1.363e-01, -4.873e-02, -2.462e-02, 3.791e-02, 9.310e-02, 8.765e-02, 1.191e-01, -8.419e-02, -9.431e-02)); + r += mul(s3_3, M4(1.655e-02, 5.639e-02, -4.404e-03, -9.981e-02, 1.148e-01, -8.272e-02, -1.390e-01, 5.672e-02, 6.335e-02, 1.684e-01, -7.871e-02, -1.311e-01, 3.674e-02, -1.126e-02, 3.918e-02, 1.493e-02)); + r += mul(s3_4, M4(-6.878e-02, 4.664e-02, 1.452e-02, 7.693e-02, 3.577e-01, -8.820e-02, -8.393e-02, 4.577e-01, 9.147e-02, 2.705e-02, -9.579e-02, 2.560e-02, 3.955e-01, -3.702e-02, 1.694e-01, -1.545e-01)); + r += mul(s3_5, M4(-9.865e-03, 5.487e-02, 7.788e-02, 1.573e-01, 3.029e-01, 1.801e-01, -2.430e-02, -2.202e-01, -5.984e-02, -7.608e-02, -8.305e-02, 4.260e-02, 2.726e-01, 1.149e-01, 2.123e-01, -1.036e-01)); + r += mul(s3_6, M4(3.139e-02, -1.450e-02, -3.944e-02, -7.140e-02, 1.567e-01, -3.974e-02, -5.314e-02, -2.869e-01, -6.654e-03, -5.902e-02, 2.141e-02, 1.691e-02, -6.033e-02, 1.637e-03, 2.190e-02, -7.007e-02)); + r += mul(s3_7, M4(-4.549e-02, 1.072e-01, 9.260e-02, -9.793e-03, -5.349e-01, -8.617e-03, 2.488e-02, 2.192e-01, -5.906e-02, -9.705e-02, -7.527e-03, 2.429e-02, -1.062e-01, 2.541e-02, -1.134e-01, 9.786e-03)); + r += mul(s3_8, M4(-2.199e-02, -5.997e-03, 1.201e-01, -2.807e-03, -1.319e-01, 8.213e-01, 2.262e-01, 1.678e-01, 6.571e-02, 6.900e-02, 3.964e-02, 7.886e-02, 8.979e-02, -3.309e-02, 1.736e-02, -6.964e-02)); + r += mul(s4_0, M4(2.849e-02, 7.353e-02, -3.211e-02, -8.134e-02, 4.359e-02, -1.312e-01, 5.510e-02, 2.191e-02, 7.560e-02, -7.792e-02, 7.575e-03, 9.009e-02, 1.078e-02, -2.289e-01, 9.238e-02, 1.665e-01)); + r += mul(s4_1, M4(1.843e-02, -9.599e-03, -6.341e-03, 5.533e-02, -5.045e-02, -1.068e-01, -9.065e-02, -4.897e-03, 1.227e-02, -4.836e-02, -5.728e-02, 4.099e-02, 6.644e-02, -9.681e-02, -8.812e-02, -1.834e-01)); + r += mul(s4_2, M4(-7.421e-02, -5.071e-02, 4.374e-02, -4.480e-02, -1.981e-02, 1.823e-01, 5.533e-02, -2.245e-01, 1.109e-03, -4.267e-02, 2.546e-04, 1.194e-02, 6.800e-02, -4.168e-02, 1.175e-01, -5.438e-02)); + r += mul(s4_3, M4(-5.560e-02, -3.011e-02, -1.020e-04, -3.312e-02, -1.877e-02, -1.602e-01, 1.846e-02, 5.569e-02, 1.562e-03, 8.524e-02, 9.787e-02, -8.073e-02, 7.374e-02, 1.205e-01, -1.051e-01, 1.893e-01)); + r += mul(s4_4, M4(5.501e-02, -7.378e-02, -1.440e-02, 9.642e-02, 2.593e-02, 2.482e-02, -1.697e-02, 5.443e-02, -2.308e-01, 1.820e-01, 1.203e-01, 3.431e-02, -8.536e-03, 5.633e-02, -7.276e-02, 2.111e-02)); + r += mul(s4_5, M4(-5.069e-02, -3.155e-02, 3.042e-03, 1.255e-01, 1.033e-01, 4.543e-02, 4.432e-02, 1.121e-01, 5.034e-04, 1.365e-01, -1.683e-02, -2.542e-01, 1.169e-01, 2.666e-01, 8.883e-02, -6.667e-02)); + r += mul(s4_6, M4(4.482e-04, 6.168e-02, -2.344e-02, 1.217e-01, -1.227e-01, -6.663e-02, -1.012e-01, -1.716e-02, -9.351e-02, 5.405e-02, -4.279e-02, 2.607e-02, -5.496e-02, 1.796e-02, -5.819e-03, 7.678e-02)); + r += mul(s4_7, M4(9.358e-02, -3.402e-02, -2.491e-02, -1.184e-01, 8.778e-02, 4.548e-02, -5.376e-02, -1.011e-01, -4.591e-02, 7.198e-02, 3.395e-02, 7.768e-03, -1.642e-01, -9.311e-02, -4.887e-02, -1.391e-03)); + r += mul(s4_8, M4(-4.708e-02, 3.883e-03, 3.773e-02, -3.976e-02, -9.629e-02, 6.306e-02, 4.530e-02, 1.733e-01, 1.792e-02, 5.135e-02, 5.748e-02, -2.131e-02, -1.650e-02, 1.543e-01, -1.282e-02, 3.647e-02)); + r += mul(s5_0, M4(9.375e-02, -1.108e-02, -2.394e-02, -7.815e-02, 8.934e-02, -1.021e-01, 1.423e-02, 6.238e-03, 8.501e-02, 3.090e-02, 5.463e-02, -1.356e-01, -2.088e-02, -1.338e-01, 1.054e-01, 8.263e-02)); + r += mul(s5_1, M4(1.654e-01, 3.170e-02, -9.520e-02, 1.154e-01, -7.324e-02, 8.083e-02, -1.294e-01, 1.087e-01, 1.302e-01, 1.537e-01, -2.128e-01, 1.744e-01, -1.119e-03, -5.597e-02, -1.003e-01, -6.166e-02)); + r += mul(s5_2, M4(-1.160e-01, 1.011e-01, -2.055e-03, -2.750e-01, 1.208e-01, 1.557e-01, -2.758e-02, 7.742e-02, 2.210e-01, -1.112e-01, -8.638e-02, 1.312e-01, -4.294e-02, 5.259e-02, 4.718e-02, -1.276e-01)); + r += mul(s5_3, M4(-4.629e-02, -1.478e-01, -3.667e-02, 2.156e-01, -4.458e-02, -1.432e-01, 1.463e-02, 3.319e-02, -2.418e-02, 5.942e-03, -7.920e-02, 6.860e-02, 5.644e-02, 8.405e-02, -1.435e-01, 1.163e-02)); + r += mul(s5_4, M4(5.875e-02, 2.975e-01, 2.153e-01, -7.192e-03, -7.809e-02, 5.250e-03, 7.988e-02, 5.700e-02, -2.801e-02, 6.919e-02, 4.960e-02, 1.850e-01, 6.536e-02, 2.241e-02, 5.430e-03, 4.156e-03)); + r += mul(s5_5, M4(-1.805e-02, 3.047e-02, -1.341e-01, 1.430e-01, -5.530e-02, 4.731e-02, 6.101e-02, 1.117e-01, 2.167e-02, 1.384e-01, 1.504e-01, 1.005e-01, 5.895e-03, 1.357e-01, 5.338e-02, -1.135e-01)); + r += mul(s5_6, M4(3.260e-02, 9.813e-03, 8.360e-02, -4.573e-02, 1.927e-02, 6.459e-02, 4.829e-03, 4.062e-02, 1.382e-01, -7.578e-02, -1.764e-01, -2.434e-02, -6.793e-02, 4.379e-03, -1.479e-02, 3.745e-02)); + r += mul(s5_7, M4(6.105e-02, 9.865e-02, 1.202e-01, 1.670e-01, -3.379e-02, -1.383e-01, 8.565e-02, -2.551e-02, 1.332e-02, -6.082e-02, 3.290e-02, -1.452e-01, -4.283e-02, -6.428e-02, -5.956e-02, 7.832e-02)); + r += mul(s5_8, M4(-3.303e-02, 4.059e-03, 1.373e-01, -3.176e-02, -1.815e-02, 6.239e-02, -1.368e-02, 1.090e-01, 1.357e-02, -9.123e-02, -1.375e-01, -6.569e-03, 9.805e-02, 1.702e-01, 6.732e-02, -4.295e-02)); + r += mul(s6_0, M4(-1.183e-01, -5.906e-04, 1.914e-02, 9.910e-02, 6.500e-02, 5.681e-02, -2.011e-02, -5.261e-02, 1.462e-01, -5.584e-02, -3.575e-02, 1.046e-01, -7.241e-02, -1.346e-01, -2.918e-03, 1.026e-02)); + r += mul(s6_1, M4(2.892e-02, 1.631e-01, -3.074e-02, -8.735e-02, 8.155e-02, -2.551e-02, -2.036e-01, -1.591e-01, -3.379e-02, 6.374e-02, -1.008e-01, -9.758e-02, 4.372e-02, -3.677e-02, 1.248e-01, 3.071e-02)); + r += mul(s6_2, M4(8.023e-02, 2.997e-02, 9.962e-03, -4.284e-02, -8.497e-02, -7.355e-02, -1.333e-03, -1.613e-02, 7.704e-02, 1.743e-01, -1.519e-01, -2.440e-02, -2.547e-02, 5.258e-02, 3.014e-02, -5.597e-02)); + r += mul(s6_3, M4(-2.592e-02, -2.281e-03, 1.595e-02, 2.075e-01, 6.063e-02, 2.393e-02, -1.292e-02, 6.372e-02, -6.155e-03, -1.293e-01, 7.879e-03, 3.101e-01, -2.769e-03, 8.199e-02, 5.373e-02, 2.221e-02)); + r += mul(s6_4, M4(7.649e-02, 1.889e-03, 1.334e-01, -1.798e-01, -1.333e-01, 2.581e-01, -2.934e-01, -2.100e-01, -1.362e-01, -2.313e-02, -4.790e-02, -1.720e-02, 1.017e-01, 3.175e-02, -5.074e-02, -2.047e-02)); + r += mul(s6_5, M4(4.797e-02, 9.415e-02, -3.937e-02, -7.297e-02, -7.981e-02, 9.044e-02, 4.661e-02, 2.250e-01, 9.266e-02, -4.083e-02, -9.222e-02, -5.229e-02, -8.320e-02, 1.539e-01, 3.377e-02, -1.144e-02)); + r += mul(s6_6, M4(7.769e-02, 5.734e-02, 6.139e-02, 6.552e-03, -6.897e-02, 2.645e-01, -5.784e-02, -7.483e-02, 4.382e-02, 6.870e-02, 8.941e-02, 1.264e-01, -8.896e-02, -1.026e-01, 8.097e-02, -9.642e-02)); + r += mul(s6_7, M4(-2.266e-02, 2.104e-02, -3.382e-02, -3.462e-02, 9.502e-02, -1.571e-01, -1.757e-02, -1.374e-01, -3.187e-01, 7.587e-02, 2.071e-01, 1.266e-02, 4.439e-02, 9.037e-02, 8.120e-03, 1.309e-01)); + r += mul(s6_8, M4(8.620e-02, -4.691e-02, 2.277e-02, 1.641e-02, 1.618e-01, -2.630e-02, -1.355e-01, 6.158e-02, 5.100e-02, -1.458e-01, 1.792e-01, 5.744e-02, 4.810e-02, -1.794e-01, 1.061e-02, -5.211e-02)); + r += mul(s7_0, M4(1.782e-01, -1.285e-01, -4.400e-02, 2.687e-02, -3.637e-02, -6.006e-02, 5.586e-04, -2.999e-03, -4.826e-02, -1.120e-01, -1.835e-02, -4.651e-02, -5.114e-02, -3.110e-02, 4.545e-02, -6.177e-02)); + r += mul(s7_1, M4(1.359e-01, -6.796e-02, 9.988e-02, -1.300e-01, 2.658e-02, 1.018e-02, -2.886e-02, 3.023e-02, 1.529e-01, -2.311e-02, -1.042e-01, -1.049e-01, 5.034e-02, 1.629e-01, 1.013e-01, 1.131e-01)); + r += mul(s7_2, M4(1.665e-01, -6.278e-02, -1.281e-02, -9.744e-02, -5.604e-02, 4.553e-02, -3.810e-02, -1.705e-01, -4.072e-02, 3.091e-02, -4.414e-02, -1.192e-02, -8.198e-02, 5.345e-02, -4.512e-02, -1.265e-01)); + r += mul(s7_3, M4(-1.983e-01, -1.369e-01, 2.508e-03, 2.284e-01, 2.661e-02, 1.146e-01, -3.674e-02, -5.617e-02, -1.196e-02, 7.266e-02, 1.285e-01, 6.571e-02, -2.698e-02, -6.623e-02, -5.135e-02, -2.303e-02)); + r += mul(s7_4, M4(4.264e-02, -2.439e-02, 1.718e-01, 5.959e-02, -2.041e-02, 6.559e-02, -6.376e-02, 5.568e-02, -4.303e-02, -7.649e-02, -1.284e-01, -1.305e-02, 1.496e-01, 4.092e-02, 5.225e-02, 3.690e-02)); + r += mul(s7_5, M4(-4.911e-02, -1.207e-01, -7.077e-02, -6.792e-02, 1.525e-01, -9.885e-02, 9.327e-02, -2.998e-02, 6.104e-02, -7.417e-03, 9.437e-02, -8.500e-02, -6.068e-03, 1.033e-02, 6.057e-02, 1.605e-02)); + r += mul(s7_6, M4(3.902e-02, 1.314e-01, 9.531e-02, 1.205e-01, -3.243e-02, -6.969e-02, -1.650e-02, 1.962e-02, -1.102e-01, -6.405e-03, -1.269e-02, -6.945e-02, 6.765e-02, 3.643e-02, -6.798e-02, 5.591e-03)); + r += mul(s7_7, M4(-5.830e-02, 3.405e-02, 8.745e-02, -5.615e-02, -5.131e-02, 4.023e-03, -1.450e-01, 8.837e-02, -4.772e-02, -9.055e-02, -1.177e-01, 5.408e-02, -1.225e-01, 1.633e-01, -3.784e-02, 6.974e-02)); + r += mul(s7_8, M4(1.450e-01, 3.805e-02, -8.117e-02, 1.247e-01, -2.925e-03, 5.569e-02, -1.040e-02, 1.200e-01, -1.094e-01, 2.800e-03, 7.729e-02, -1.525e-02, -1.471e-02, 1.847e-02, -1.044e-01, 2.992e-02)); + r += V4(2.304e-02, 2.078e-02, -1.771e-02, -7.974e-04); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(7.999e-02, -1.620e-03, 2.616e-03, 8.012e-02, -7.071e-02, -9.453e-02, -1.397e-01, 3.745e-02, 5.401e-02, -9.012e-02, -4.641e-02, -3.268e-02, -2.616e-02, 1.138e-02, -3.360e-02, 2.415e-02)); + r += mul(s0_1, M4(-1.609e-01, -1.081e-01, 1.692e-02, 5.023e-03, -1.996e-01, 2.482e-02, -3.838e-02, 8.200e-02, -1.350e-02, -8.538e-02, -2.835e-02, -2.008e-01, 1.623e-02, 9.711e-02, 7.625e-02, -2.950e-03)); + r += mul(s0_2, M4(-7.191e-02, 1.884e-02, 1.333e-01, -2.174e-02, -1.457e-01, 3.607e-02, 2.046e-02, -2.411e-02, 1.312e-01, 1.607e-02, 5.230e-02, -9.962e-02, -1.802e-01, 7.325e-02, -1.963e-02, -1.184e-01)); + r += mul(s0_3, M4(-8.421e-02, 8.179e-02, -3.473e-02, 1.207e-01, 1.599e-01, -6.990e-02, 9.169e-02, 1.210e-01, -1.189e-01, -1.529e-01, 9.446e-02, 1.413e-01, 5.838e-02, -6.803e-02, -7.839e-02, 7.547e-02)); + r += mul(s0_4, M4(-3.628e-02, -8.204e-03, 2.381e-02, 1.897e-01, -1.094e-01, 3.305e-03, -1.440e-01, 2.822e-01, 4.703e-01, -4.863e-02, -4.803e-02, -7.620e-03, -4.566e-02, 6.281e-03, 4.042e-02, -4.703e-03)); + r += mul(s0_5, M4(-1.283e-01, -3.658e-02, -1.392e-02, -2.535e-02, 4.043e-02, -1.622e-01, -2.733e-02, -8.380e-02, 3.171e-02, -2.183e-01, -8.571e-02, 1.247e-01, 1.086e-01, -1.587e-01, -1.130e-01, -4.680e-02)); + r += mul(s0_6, M4(-1.463e-02, 4.385e-03, 6.179e-03, -5.464e-02, 6.592e-02, 8.451e-02, -1.945e-02, 5.451e-02, -2.327e-01, -3.674e-03, 7.472e-02, 1.153e-01, -1.051e-02, 1.979e-02, 9.898e-02, 7.161e-02)); + r += mul(s0_7, M4(5.507e-02, -5.591e-02, -2.784e-02, -5.306e-02, -4.063e-02, 3.657e-01, -1.359e-01, 1.332e-01, 4.139e-02, -8.004e-02, -9.011e-03, -1.151e-01, 8.198e-02, -1.104e-01, 9.509e-03, -1.679e-02)); + r += mul(s0_8, M4(8.065e-02, 1.347e-02, 3.794e-02, -5.829e-02, 1.720e-01, 4.496e-02, 3.187e-02, 1.112e-01, -4.350e-02, 8.388e-02, 1.034e-01, -1.892e-02, -2.137e-01, -5.312e-02, 1.345e-01, -3.885e-02)); + r += mul(s1_0, M4(-8.546e-02, -7.385e-02, -1.005e-01, -9.412e-02, -6.122e-02, 3.501e-02, -2.484e-02, -6.044e-02, -1.262e-03, -4.078e-02, 8.990e-03, 3.442e-02, 1.667e-01, -5.292e-02, -8.578e-02, -1.017e-01)); + r += mul(s1_1, M4(-1.591e-01, 8.649e-02, 1.936e-01, -1.675e-01, 5.445e-02, -5.177e-02, 1.256e-01, -3.050e-02, 1.217e-02, -5.676e-02, 1.510e-01, -5.381e-02, 7.436e-03, 1.418e-02, -3.656e-03, -5.954e-02)); + r += mul(s1_2, M4(-1.587e-01, -4.549e-02, -1.032e-01, -2.612e-02, -9.453e-03, 4.694e-02, -3.459e-02, -2.469e-02, 7.204e-02, -2.343e-02, -3.167e-02, 4.009e-02, 4.523e-02, 1.167e-01, -2.459e-02, -6.980e-02)); + r += mul(s1_3, M4(5.893e-02, -1.072e-01, -2.244e-01, 9.065e-02, 8.110e-02, -2.783e-02, 1.611e-01, -1.078e-01, 9.832e-02, -1.784e-01, -3.238e-02, 3.152e-03, 2.439e-01, 3.922e-04, 2.059e-02, 1.391e-01)); + r += mul(s1_4, M4(-1.186e-01, 1.060e-01, 2.611e-01, 2.536e-02, 3.097e-02, 2.366e-02, -3.775e-02, -3.918e-02, -3.971e-02, -4.988e-02, -6.630e-02, 1.792e-01, 4.950e-02, 2.763e-01, -1.243e-01, -6.249e-02)); + r += mul(s1_5, M4(-5.325e-02, -7.006e-02, 1.882e-01, 3.596e-02, 7.997e-02, -8.836e-02, -1.889e-01, 4.237e-02, -2.434e-01, 7.031e-02, -1.146e-02, -1.163e-02, 2.505e-01, -4.368e-02, -2.249e-01, -1.309e-01)); + r += mul(s1_6, M4(8.930e-02, -3.275e-02, 9.076e-02, -9.582e-02, 3.224e-02, -1.109e-01, 8.775e-02, -3.492e-02, -3.691e-03, -1.507e-01, 1.391e-01, 1.705e-02, 5.119e-02, 1.323e-01, -8.738e-02, 3.481e-02)); + r += mul(s1_7, M4(-7.445e-02, -9.360e-02, 1.949e-01, -2.640e-01, 7.285e-02, 6.967e-02, 1.469e-01, -1.146e-01, 7.315e-05, 6.037e-02, 3.447e-02, -2.215e-03, -9.679e-02, -1.745e-01, 3.088e-02, 1.227e-02)); + r += mul(s1_8, M4(-2.196e-02, -2.083e-01, 6.747e-02, -5.662e-02, -8.165e-02, 5.244e-02, 1.161e-01, -2.417e-02, 2.538e-02, -1.887e-02, -5.177e-02, -8.775e-02, -1.634e-01, 2.158e-01, 1.430e-01, -1.185e-01)); + r += mul(s2_0, M4(1.416e-02, -2.729e-03, -6.712e-03, 1.679e-01, 9.746e-02, -3.308e-02, 3.119e-02, -2.757e-02, 1.507e-03, -1.257e-01, -1.163e-02, 1.702e-02, 2.982e-02, 5.174e-02, -5.909e-02, -9.968e-03)); + r += mul(s2_1, M4(-6.318e-02, 6.115e-02, -1.590e-01, -1.880e-01, 2.189e-01, -5.003e-02, 5.183e-02, -7.485e-02, 7.363e-02, 1.857e-02, 1.678e-01, -1.711e-02, 1.165e-01, 3.703e-02, -2.523e-01, -2.358e-02)); + r += mul(s2_2, M4(3.419e-02, 3.932e-02, 7.664e-02, 1.920e-01, -5.878e-02, -4.309e-02, -7.888e-02, -1.210e-02, 9.921e-02, -7.974e-02, -7.892e-02, -1.368e-01, -1.106e-01, 6.524e-02, 4.530e-02, 3.731e-02)); + r += mul(s2_3, M4(1.122e-01, 4.290e-02, 6.229e-03, -5.814e-02, -8.716e-02, -3.329e-02, -3.989e-02, 5.030e-02, -8.717e-02, 7.386e-02, 1.774e-02, -1.027e-01, 1.304e-02, 6.706e-02, -3.998e-03, -1.373e-02)); + r += mul(s2_4, M4(7.571e-02, 9.108e-02, 1.515e-01, -3.229e-02, 1.363e-01, -6.226e-02, 8.916e-02, -3.928e-02, -2.106e-03, 3.530e-02, -4.458e-02, -9.075e-03, 4.753e-02, -6.515e-02, 1.183e-01, 3.796e-02)); + r += mul(s2_5, M4(-6.416e-02, -7.677e-02, 1.812e-01, 4.696e-02, -7.591e-02, 6.010e-02, 2.132e-03, -8.509e-02, -2.236e-02, 5.432e-02, 3.393e-02, 1.354e-01, 1.391e-01, 2.640e-02, -1.029e-02, 4.559e-02)); + r += mul(s2_6, M4(2.302e-02, 1.361e-01, 1.578e-01, 8.174e-03, 3.473e-02, 7.651e-02, -1.799e-02, 6.476e-02, 4.752e-03, -4.013e-02, 1.431e-01, -7.007e-02, -9.436e-02, 1.011e-01, -4.980e-02, -1.059e-02)); + r += mul(s2_7, M4(9.090e-02, 5.888e-02, 9.000e-02, -3.742e-02, 5.067e-02, 2.931e-03, 5.460e-02, 4.517e-03, -1.107e-02, 1.579e-01, 3.834e-02, 9.015e-02, 7.654e-02, -8.582e-04, -3.566e-02, -3.666e-02)); + r += mul(s2_8, M4(-1.902e-01, -6.281e-02, 1.274e-01, 1.703e-02, -6.806e-02, 8.779e-02, -1.885e-01, 3.918e-02, 8.967e-02, -8.760e-02, 9.290e-02, -2.748e-02, 6.798e-03, 1.434e-02, -1.477e-02, -1.213e-01)); + r += mul(s3_0, M4(-4.167e-02, -6.296e-02, 1.694e-01, -1.619e-01, -5.172e-02, 3.976e-02, 1.511e-02, -6.793e-02, -4.164e-02, 7.498e-02, -3.502e-02, 1.430e-01, 2.042e-02, 1.119e-03, -5.693e-02, 4.382e-02)); + r += mul(s3_1, M4(-2.133e-02, -8.130e-02, -1.180e-01, -1.165e-01, -3.496e-01, 1.115e-01, 1.996e-01, 4.404e-02, 8.458e-02, 7.163e-02, 1.094e-02, 4.361e-02, -4.895e-03, 3.641e-02, -3.300e-02, -8.676e-03)); + r += mul(s3_2, M4(-4.757e-02, 5.155e-02, -3.629e-03, 5.710e-02, 2.900e-01, -7.436e-02, 2.527e-01, -1.666e-01, 2.816e-02, -6.282e-02, -7.414e-02, 9.729e-02, -1.450e-01, -2.380e-02, 6.081e-02, 7.674e-02)); + r += mul(s3_3, M4(-5.416e-02, 4.368e-02, -9.068e-02, -9.732e-02, -4.119e-01, 6.885e-02, -9.626e-02, -2.044e-02, 2.765e-02, -9.241e-04, -3.130e-03, 1.777e-02, -2.878e-02, -7.006e-02, 6.017e-02, -8.017e-02)); + r += mul(s3_4, M4(6.011e-02, 2.476e-02, -7.871e-03, -3.067e-02, -1.666e-01, 2.471e-01, -1.384e-01, -1.670e-01, -2.046e-02, 4.711e-04, -4.056e-02, -1.043e-02, -6.928e-02, -1.034e-01, 1.530e-01, -1.081e-01)); + r += mul(s3_5, M4(-6.437e-02, -2.059e-03, 1.335e-01, -9.360e-02, -6.898e-01, 3.446e-01, 8.080e-02, -1.326e-01, 2.047e-01, -1.710e-01, 1.809e-02, 8.229e-02, 1.048e-01, -2.134e-02, 2.202e-01, -5.295e-03)); + r += mul(s3_6, M4(-8.757e-02, 3.572e-02, -1.984e-02, 3.324e-02, 1.682e-01, -9.900e-02, -3.028e-02, -6.892e-02, -1.136e-01, 1.243e-02, -1.335e-01, 9.428e-02, -4.810e-02, -5.990e-02, 2.229e-02, -9.369e-02)); + r += mul(s3_7, M4(1.028e-03, -9.446e-02, -1.577e-01, 3.588e-02, -3.531e-01, 1.472e-01, -3.714e-01, -7.450e-03, 5.036e-02, 7.070e-02, -2.362e-03, -8.038e-02, -3.325e-03, 1.388e-03, -1.553e-02, 9.550e-02)); + r += mul(s3_8, M4(-8.163e-02, -1.237e-01, -7.440e-02, -4.663e-02, -5.525e-01, 2.020e-01, -9.522e-02, -5.103e-02, 6.793e-02, -4.918e-02, 7.321e-02, 2.461e-02, 1.587e-01, 1.852e-02, -3.698e-02, -6.401e-03)); + r += mul(s4_0, M4(7.860e-03, 3.884e-02, 5.483e-03, 5.174e-03, -3.893e-02, -6.120e-02, 1.887e-03, 1.142e-01, 3.721e-02, -1.634e-01, 1.052e-01, -4.523e-02, 8.545e-02, -6.886e-02, 1.515e-01, -1.370e-01)); + r += mul(s4_1, M4(9.963e-02, -9.395e-02, 1.836e-02, -3.191e-02, -1.768e-01, 3.712e-02, -4.629e-02, 6.314e-02, 2.586e-02, -9.573e-02, 1.088e-01, 1.255e-01, 1.712e-01, 1.425e-01, 1.209e-01, 2.170e-02)); + r += mul(s4_2, M4(-2.128e-02, 5.724e-02, 1.005e-02, 5.762e-02, -1.540e-01, 5.492e-02, -4.018e-02, 1.316e-01, 6.741e-02, 7.806e-02, -6.408e-02, 4.491e-02, 5.640e-02, 5.481e-02, 4.540e-02, -5.886e-02)); + r += mul(s4_3, M4(-1.801e-02, -1.554e-01, 1.069e-01, 1.312e-01, -1.322e-02, -1.649e-01, -1.783e-01, -5.706e-02, 1.447e-01, 5.617e-02, -8.603e-03, 8.138e-02, -1.422e-01, 7.130e-02, 3.093e-02, -1.061e-01)); + r += mul(s4_4, M4(5.832e-02, -2.313e-02, -7.741e-03, -1.037e-01, 1.611e-01, 1.601e-01, -1.605e-01, 6.488e-02, 4.587e-02, 7.448e-02, 4.622e-05, 1.951e-01, 1.929e-01, 2.702e-01, 3.835e-02, 1.293e-01)); + r += mul(s4_5, M4(-3.686e-02, 2.329e-02, -1.811e-02, -2.372e-02, 3.541e-02, -5.643e-02, -7.094e-02, 7.574e-02, -6.198e-02, -5.071e-02, 1.720e-02, 1.528e-01, -5.980e-02, 1.051e-01, -1.504e-01, 9.834e-02)); + r += mul(s4_6, M4(-3.692e-02, 1.263e-02, 3.742e-02, -1.246e-01, -1.047e-01, 2.976e-02, 1.274e-01, 6.227e-02, 1.215e-01, 1.237e-01, -9.143e-02, 1.762e-02, -6.094e-02, -2.488e-03, 9.006e-02, -1.239e-02)); + r += mul(s4_7, M4(4.501e-02, 7.817e-02, 1.274e-02, -3.442e-02, 1.184e-01, 5.412e-02, -6.588e-03, -9.060e-02, -2.775e-02, 9.915e-02, -2.931e-02, -1.805e-02, -9.197e-02, -4.583e-02, 5.251e-02, 8.606e-02)); + r += mul(s4_8, M4(-6.336e-03, 7.080e-02, 7.193e-02, -8.116e-02, -6.069e-02, 7.960e-02, 4.466e-02, -1.234e-01, 3.669e-02, -1.267e-01, 4.789e-02, 9.546e-02, -1.251e-01, 1.596e-01, -5.935e-03, -1.457e-02)); + r += mul(s5_0, M4(-1.131e-01, 2.031e-03, -2.170e-01, 9.359e-02, 7.448e-02, -6.712e-02, 2.227e-02, -8.335e-03, -1.201e-01, 2.041e-03, -7.554e-02, 3.226e-02, 4.219e-02, 1.368e-02, -7.555e-02, -3.879e-02)); + r += mul(s5_1, M4(-8.146e-02, -4.065e-02, -6.824e-02, 2.306e-01, 9.171e-02, -9.450e-02, -9.963e-02, -1.612e-01, -1.035e-01, -2.014e-01, -5.606e-02, -1.103e-01, 6.326e-02, 8.356e-02, -1.101e-01, -3.499e-02)); + r += mul(s5_2, M4(-6.828e-02, 4.353e-02, 4.308e-02, -5.173e-02, -7.365e-02, 1.957e-04, -9.301e-02, 3.017e-02, 9.228e-02, -1.073e-01, 9.554e-02, -6.032e-03, 4.334e-02, 8.985e-02, 1.425e-02, 3.057e-03)); + r += mul(s5_3, M4(-1.820e-01, -3.243e-01, 4.594e-01, 2.017e-01, 2.362e-02, 1.450e-01, -3.296e-02, -1.968e-01, 4.588e-02, -1.155e-01, 1.189e-01, -8.530e-02, -2.630e-02, -9.324e-02, 1.008e-01, 7.584e-02)); + r += mul(s5_4, M4(2.302e-01, -1.174e-01, 3.527e-03, 1.558e-02, 9.033e-02, 9.558e-02, 8.247e-02, -1.767e-01, -3.612e-02, 8.947e-02, -5.578e-03, -2.079e-01, -1.438e-01, 6.322e-02, 3.686e-02, 5.993e-03)); + r += mul(s5_5, M4(9.295e-02, 1.557e-01, 6.068e-02, 2.223e-02, -1.008e-02, -1.868e-02, 1.444e-01, -6.666e-03, 2.329e-02, 2.803e-02, 5.175e-02, 2.903e-02, -3.607e-02, 3.059e-02, 5.683e-02, 5.372e-02)); + r += mul(s5_6, M4(2.503e-01, 4.217e-03, 2.266e-02, -9.277e-02, -8.317e-02, 5.995e-02, 6.297e-03, 2.473e-02, -1.268e-01, 1.181e-01, 1.405e-01, -1.039e-01, -6.847e-03, -4.872e-02, -9.843e-02, 8.868e-02)); + r += mul(s5_7, M4(1.709e-01, -5.937e-02, 4.764e-01, 3.679e-02, 7.183e-02, -1.304e-01, 1.472e-03, 1.494e-02, 2.637e-02, -5.433e-02, -3.825e-03, -8.601e-02, -5.135e-02, 1.348e-01, 1.563e-01, -7.091e-02)); + r += mul(s5_8, M4(-9.578e-02, 3.730e-02, 1.338e-02, -3.171e-03, -9.686e-02, 2.701e-02, 2.475e-02, 5.352e-02, 1.130e-01, 1.037e-01, -7.294e-02, 1.155e-01, -1.874e-02, -4.895e-02, -8.928e-02, 4.230e-02)); + r += mul(s6_0, M4(4.921e-02, -2.864e-02, 9.962e-02, -1.616e-01, -4.359e-02, 3.743e-02, -8.323e-02, 8.412e-04, -3.199e-02, 2.923e-02, -7.784e-02, -1.324e-01, 1.150e-01, 6.624e-02, 2.112e-02, -9.146e-03)); + r += mul(s6_1, M4(2.497e-02, 8.942e-02, 4.448e-03, -4.835e-02, -1.912e-01, 1.012e-02, 1.050e-01, -1.184e-01, 2.731e-03, 1.496e-02, -1.011e-02, -8.049e-02, 3.949e-02, 1.983e-02, -6.960e-02, 3.667e-02)); + r += mul(s6_2, M4(-1.267e-01, 1.528e-01, 1.032e-01, -1.747e-02, -2.512e-01, 6.264e-02, 4.478e-02, -7.420e-02, 1.971e-02, -1.210e-02, 6.577e-02, -1.485e-01, 8.669e-02, 2.808e-02, -2.027e-01, -9.546e-03)); + r += mul(s6_3, M4(-5.131e-02, 2.670e-02, -7.644e-02, 3.217e-02, -1.250e-01, -2.789e-02, 1.098e-01, 1.061e-01, 5.801e-02, -1.449e-01, -1.785e-01, 6.115e-03, -1.588e-01, 5.088e-02, 9.708e-02, 8.950e-02)); + r += mul(s6_4, M4(6.558e-02, -8.009e-02, -1.274e-01, -9.429e-02, -1.121e-01, -1.436e-01, 1.718e-01, 1.111e-01, -9.270e-02, -9.540e-02, 7.031e-02, -1.759e-01, 5.097e-02, -1.088e-01, 4.010e-02, 4.602e-02)); + r += mul(s6_5, M4(-2.712e-02, -1.345e-01, 2.232e-02, 2.669e-02, 3.503e-01, -5.749e-03, 8.629e-02, 4.931e-02, 7.628e-03, 1.580e-01, 1.701e-01, -8.106e-02, 1.535e-02, -5.827e-02, 1.061e-01, -7.086e-02)); + r += mul(s6_6, M4(1.574e-02, 2.954e-02, -3.332e-03, -9.029e-02, -6.109e-02, -3.010e-02, -7.304e-02, 8.621e-02, 2.478e-02, -1.433e-02, -9.557e-02, 1.353e-01, 1.971e-01, -6.685e-02, 4.943e-02, 1.762e-01)); + r += mul(s6_7, M4(1.119e-01, 3.119e-02, 9.649e-02, -1.372e-01, 9.541e-02, 1.287e-01, -1.209e-01, -1.336e-01, -5.958e-02, -9.256e-02, -4.365e-02, 9.556e-02, 1.614e-02, 4.822e-02, 7.347e-02, 1.568e-01)); + r += mul(s6_8, M4(-5.852e-03, -5.064e-02, 1.870e-01, -5.772e-02, -1.432e-01, 3.698e-03, 1.276e-01, -1.390e-01, -6.877e-03, -3.936e-02, 8.278e-02, 3.605e-03, -3.207e-02, -1.130e-01, -1.071e-01, 2.186e-02)); + r += mul(s7_0, M4(1.382e-01, 1.066e-02, 1.112e-01, -2.032e-01, 1.821e-02, 6.938e-04, 5.903e-02, 1.176e-02, -1.381e-01, -7.692e-02, -1.254e-01, -6.592e-03, 1.094e-02, -2.691e-02, -3.247e-02, -1.120e-01)); + r += mul(s7_1, M4(3.450e-01, -9.358e-02, -7.091e-03, -1.259e-02, 1.100e-01, -2.740e-02, -6.952e-02, -4.548e-02, 7.692e-02, -4.172e-02, -3.717e-02, -8.551e-02, 3.655e-02, 5.882e-02, -9.729e-03, -3.302e-02)); + r += mul(s7_2, M4(6.189e-03, 4.251e-02, -6.474e-02, 2.334e-02, -4.998e-02, 6.903e-02, 7.593e-02, -1.005e-01, 1.668e-03, 1.395e-02, -9.261e-02, -3.769e-02, -5.664e-02, 4.900e-02, -1.949e-02, -3.177e-02)); + r += mul(s7_3, M4(4.695e-02, 8.681e-02, 7.893e-02, 4.932e-02, -4.687e-02, 8.626e-03, -2.098e-03, -1.537e-01, 5.335e-02, -7.062e-02, 1.179e-01, 9.341e-03, -3.518e-02, 1.097e-01, -1.079e-01, -9.958e-02)); + r += mul(s7_4, M4(2.912e-01, 8.360e-02, 3.996e-02, -1.659e-01, -7.513e-02, -2.769e-02, 5.027e-02, 2.613e-01, 1.307e-01, 4.392e-03, -4.296e-02, 1.132e-01, 6.710e-02, -1.828e-01, 3.217e-02, -5.482e-02)); + r += mul(s7_5, M4(1.063e-01, -6.933e-03, -1.220e-01, 2.037e-01, 1.207e-01, -2.337e-02, -2.119e-02, 4.235e-02, -9.710e-02, 7.359e-03, 1.674e-01, 1.008e-01, 1.264e-01, 6.562e-02, 2.172e-02, -2.759e-02)); + r += mul(s7_6, M4(-2.009e-01, -3.470e-02, 4.713e-02, -2.868e-01, -1.296e-02, 5.734e-02, 8.538e-02, -8.481e-03, -1.077e-01, -3.312e-02, 4.065e-02, 1.199e-01, -3.355e-02, 4.408e-02, 6.869e-02, 7.852e-02)); + r += mul(s7_7, M4(-1.488e-02, -9.504e-02, -5.499e-02, -1.036e-01, -8.776e-02, 5.539e-02, -7.250e-02, 8.325e-02, 4.185e-02, 8.300e-02, 3.218e-02, 2.410e-01, 1.812e-01, -3.772e-02, -7.048e-02, -2.958e-02)); + r += mul(s7_8, M4(5.901e-02, 5.462e-02, 1.877e-02, 1.296e-02, -3.299e-04, 4.156e-02, -1.330e-01, -1.156e-01, 9.044e-03, 1.101e-02, -1.311e-01, -1.336e-01, 1.801e-01, 7.683e-02, -1.230e-01, 2.210e-02)); + r += V4(-3.601e-02, 2.785e-02, 1.684e-04, -7.248e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.511e-01, 6.291e-02, -4.318e-02, 4.797e-03, 7.914e-02, 5.092e-02, -2.714e-02, 3.254e-03, -6.260e-02, 1.222e-01, 7.157e-02, -5.641e-02, -1.388e-01, -7.529e-02, 1.045e-01, -1.507e-03)); + r += mul(s0_1, M4(8.468e-02, 2.170e-01, 1.647e-01, 8.689e-03, 9.086e-02, 1.816e-01, 9.235e-03, 7.385e-02, -1.458e-02, -2.316e-02, 2.719e-02, 8.236e-02, -4.202e-02, 6.168e-02, -6.452e-02, 4.523e-03)); + r += mul(s0_2, M4(-9.053e-02, 5.428e-02, -7.223e-02, -1.326e-01, -1.022e-01, 1.603e-01, 1.160e-01, -1.811e-01, -1.709e-01, -1.445e-01, 2.000e-01, 9.591e-02, 1.133e-01, 6.793e-02, 5.220e-02, 3.587e-02)); + r += mul(s0_3, M4(1.357e-01, -4.035e-02, -7.542e-02, -1.238e-01, -1.316e-01, -1.207e-02, -1.653e-02, -6.519e-02, 5.752e-02, -1.875e-01, 4.793e-02, -9.157e-02, -2.477e-02, -1.841e-02, -2.652e-02, 1.056e-01)); + r += mul(s0_4, M4(1.456e-01, 1.258e-02, -7.971e-02, -4.622e-03, 2.229e-01, 9.433e-02, 1.444e-01, 1.025e-01, 9.825e-02, 2.889e-02, -4.671e-02, 2.770e-02, 7.783e-02, -9.808e-02, -6.966e-02, 8.734e-02)); + r += mul(s0_5, M4(-1.451e-01, -4.161e-02, -7.557e-02, -4.700e-02, -2.362e-01, -4.973e-02, 1.858e-03, -4.063e-02, 2.257e-01, -2.317e-02, -1.216e-01, 9.751e-02, 4.200e-02, 2.132e-02, -4.459e-02, 7.851e-03)); + r += mul(s0_6, M4(-5.414e-03, 6.019e-02, -3.878e-02, 1.580e-01, 4.609e-01, -1.148e-01, 4.239e-02, 7.252e-02, 1.666e-02, 6.721e-02, 1.354e-01, 4.201e-02, 5.184e-02, 2.784e-02, -2.395e-02, 4.399e-02)); + r += mul(s0_7, M4(9.582e-02, -1.141e-01, 1.359e-01, 1.092e-01, 2.539e-02, -1.223e-01, -8.734e-03, -2.589e-01, -2.802e-02, 2.185e-01, 2.233e-02, -1.094e-01, 6.573e-02, 1.053e-01, 6.570e-02, -4.853e-02)); + r += mul(s0_8, M4(-4.426e-02, -2.154e-02, 3.121e-03, -3.852e-02, -5.457e-02, 1.207e-03, 1.977e-01, -7.048e-02, 9.662e-02, 9.187e-02, -3.300e-02, -9.639e-02, -5.657e-02, -1.752e-02, 1.350e-01, -2.091e-02)); + r += mul(s1_0, M4(-1.525e-01, 1.467e-01, -1.882e-01, -9.671e-02, -8.896e-02, -4.265e-02, -3.285e-02, 1.379e-02, 7.665e-02, 5.953e-03, 6.120e-02, -6.980e-02, -7.102e-02, -2.386e-02, -2.886e-02, -1.403e-01)); + r += mul(s1_1, M4(1.566e-01, 1.743e-01, 6.076e-02, -8.198e-02, -2.121e-02, -1.783e-02, 8.034e-02, -4.218e-03, 4.352e-02, 2.568e-02, -2.166e-01, -7.963e-03, 6.286e-02, 1.651e-01, -1.422e-01, -8.033e-02)); + r += mul(s1_2, M4(-1.433e-01, 1.123e-01, 1.040e-01, -1.238e-01, -3.072e-02, -1.398e-02, 1.370e-01, -9.717e-02, 6.875e-03, -1.036e-01, 1.524e-01, 6.576e-02, 3.454e-02, 3.696e-02, -6.506e-02, 9.144e-03)); + r += mul(s1_3, M4(3.416e-02, 1.681e-01, 4.973e-03, -1.784e-02, 1.433e-01, -3.586e-02, -6.171e-02, 5.305e-02, 1.669e-01, -6.170e-02, 2.149e-01, -1.927e-02, 1.244e-02, -5.511e-02, -2.138e-01, -5.206e-02)); + r += mul(s1_4, M4(4.795e-02, 5.456e-02, -8.320e-02, 2.625e-02, -4.835e-03, 5.101e-02, 1.760e-01, -4.447e-02, 3.431e-02, 4.533e-02, -7.802e-02, -6.762e-02, 1.274e-01, -3.599e-04, -2.310e-01, 5.342e-02)); + r += mul(s1_5, M4(-9.627e-02, 1.068e-02, -1.488e-01, 8.066e-02, 5.780e-02, 8.952e-02, -1.372e-01, -1.067e-02, 2.786e-02, 6.846e-02, -9.092e-02, -1.315e-01, 1.724e-01, 1.178e-02, -1.968e-01, 6.223e-02)); + r += mul(s1_6, M4(-1.254e-01, -1.597e-02, -9.678e-02, 4.643e-03, -1.380e-02, -4.123e-02, -4.689e-02, 4.474e-02, 2.131e-02, 1.007e-01, 1.677e-02, 4.482e-02, 1.120e-01, 5.918e-02, -1.698e-01, -6.037e-02)); + r += mul(s1_7, M4(8.032e-02, 1.316e-02, 1.546e-01, 1.446e-03, -1.208e-01, -1.349e-02, -1.146e-01, 5.901e-02, 3.247e-02, 2.274e-01, 1.425e-02, -5.002e-02, -6.477e-02, 3.140e-01, 2.705e-01, 3.952e-02)); + r += mul(s1_8, M4(1.709e-02, 5.680e-02, 5.499e-02, -1.227e-02, 5.584e-02, -2.021e-02, -5.704e-02, 1.497e-01, -1.477e-01, -1.815e-02, 5.863e-02, 3.848e-02, -1.686e-01, -4.496e-03, -2.946e-02, 3.112e-01)); + r += mul(s2_0, M4(-5.505e-02, -8.205e-02, -9.848e-03, 4.871e-02, -6.139e-02, 7.057e-02, -9.198e-02, -4.509e-02, 1.479e-01, 7.591e-02, 3.395e-02, 7.347e-02, -4.939e-02, 5.614e-02, 8.329e-02, -1.111e-01)); + r += mul(s2_1, M4(-1.309e-02, 3.023e-02, 5.362e-02, -5.243e-04, 6.147e-03, 1.381e-01, 7.541e-02, 6.699e-03, -3.014e-02, 9.167e-02, 5.641e-02, 5.618e-02, 3.285e-02, 6.500e-02, -9.409e-02, -1.153e-01)); + r += mul(s2_2, M4(-8.556e-02, 2.180e-02, -1.062e-01, -4.832e-02, 4.095e-02, 4.088e-02, 5.495e-02, -3.893e-02, 7.819e-02, 7.236e-03, -2.708e-01, 3.541e-02, 1.015e-02, 1.035e-01, 4.725e-02, -1.274e-01)); + r += mul(s2_3, M4(-1.225e-02, -4.637e-02, -1.343e-01, 5.954e-04, -5.979e-02, -5.189e-02, -3.641e-02, -1.261e-02, 7.673e-02, 2.366e-02, -5.871e-02, 1.500e-01, 1.868e-03, 7.390e-02, 3.925e-02, -2.019e-01)); + r += mul(s2_4, M4(6.534e-02, -3.131e-02, -3.835e-02, 1.024e-01, 2.550e-02, -2.252e-01, -1.511e-01, 3.961e-02, 6.134e-02, -8.533e-02, 2.423e-01, 3.648e-03, 3.086e-02, 2.955e-01, 4.568e-02, -1.388e-01)); + r += mul(s2_5, M4(9.419e-02, 6.610e-02, -9.343e-03, 1.570e-01, 1.163e-01, -6.123e-02, 1.423e-01, -2.549e-02, -3.459e-02, -4.878e-02, 1.442e-01, 2.279e-02, 2.613e-03, 8.214e-02, -9.397e-02, -3.986e-01)); + r += mul(s2_6, M4(7.450e-02, -3.281e-02, 6.155e-02, 1.158e-01, 4.246e-02, -4.966e-03, -4.972e-02, 1.587e-01, 3.878e-02, -1.533e-02, -7.299e-02, 9.350e-02, -1.141e-01, 2.466e-02, 4.101e-02, -5.603e-02)); + r += mul(s2_7, M4(1.040e-01, -1.231e-03, 3.496e-02, 1.730e-01, -4.405e-02, -6.203e-02, -1.186e-01, -2.842e-02, 2.053e-01, -7.939e-03, 1.057e-02, 4.796e-02, -1.228e-01, 8.233e-02, 3.011e-03, 5.167e-02)); + r += mul(s2_8, M4(3.120e-03, -1.315e-01, -3.174e-02, -3.502e-03, -2.544e-02, -1.667e-01, 9.923e-02, 5.171e-02, -4.162e-02, 4.400e-02, -1.520e-03, -1.266e-01, 3.305e-02, -4.683e-02, -6.430e-02, -2.213e-01)); + r += mul(s3_0, M4(1.791e-01, 8.767e-02, -1.035e-01, -5.746e-02, 1.932e-01, 1.365e-01, -1.041e-01, -1.190e-01, -5.021e-02, -9.372e-02, 1.518e-01, -8.073e-03, 8.354e-02, 1.443e-02, -3.073e-02, 4.436e-04)); + r += mul(s3_1, M4(1.749e-01, 1.111e-01, -9.453e-02, 6.243e-03, -8.159e-02, 1.294e-01, -2.138e-01, -2.047e-01, -6.285e-02, -1.430e-01, 1.827e-01, -1.306e-01, 1.589e-01, -1.626e-01, 9.451e-02, 1.382e-01)); + r += mul(s3_2, M4(1.246e-01, 3.725e-02, -2.836e-02, -7.300e-02, -4.093e-01, -2.419e-01, 8.773e-02, -2.995e-01, 1.032e-02, 6.422e-02, -1.224e-01, -3.108e-02, 5.925e-02, -2.313e-02, 7.758e-02, 2.530e-02)); + r += mul(s3_3, M4(2.111e-02, 1.246e-01, 9.110e-02, -5.404e-03, -2.955e-01, -1.573e-01, -3.030e-02, -1.409e-01, -1.599e-01, 3.792e-02, 1.226e-01, -4.558e-02, 3.511e-02, -4.587e-02, -1.939e-02, 2.639e-02)); + r += mul(s3_4, M4(7.720e-02, 2.084e-02, 8.917e-02, 1.445e-02, 4.673e-01, 2.153e-01, 1.753e-01, -2.931e-01, 6.767e-02, 6.694e-02, 1.008e-01, 2.111e-02, 7.684e-02, -4.892e-02, 1.308e-01, 1.391e-01)); + r += mul(s3_5, M4(-1.162e-02, -7.123e-03, -1.536e-02, -1.242e-01, -5.258e-01, 2.133e-01, 9.476e-02, -2.865e-01, -1.337e-01, 6.592e-02, 1.598e-01, 5.042e-02, 1.470e-01, -2.654e-01, -7.099e-02, 6.915e-02)); + r += mul(s3_6, M4(2.419e-02, 1.382e-02, 1.608e-01, -4.062e-02, 3.155e-01, -8.156e-02, 1.359e-01, -2.261e-02, -8.081e-02, -1.597e-02, -8.854e-02, -8.236e-02, 3.261e-02, 1.229e-02, -1.461e-02, -8.523e-03)); + r += mul(s3_7, M4(-6.260e-02, 3.053e-02, -1.282e-01, 3.057e-02, 3.895e-01, 7.897e-02, -1.159e-01, -3.442e-02, 1.594e-01, -2.652e-02, 1.039e-01, -5.180e-02, -7.941e-04, 2.955e-02, 8.395e-02, 3.421e-01)); + r += mul(s3_8, M4(-1.731e-01, -4.189e-02, 4.495e-02, -4.503e-02, -6.134e-03, 1.436e-01, -2.564e-02, -5.602e-01, 3.015e-02, 3.406e-02, -1.213e-01, -8.228e-02, 9.247e-02, -1.539e-01, -7.955e-02, 1.516e-01)); + r += mul(s4_0, M4(-5.650e-02, -1.684e-01, 4.824e-02, 1.060e-01, 3.347e-02, 2.775e-03, 1.072e-01, 4.624e-02, 2.303e-02, 3.802e-02, 1.435e-02, 2.096e-02, -2.840e-02, -6.238e-02, -6.434e-02, -8.860e-03)); + r += mul(s4_1, M4(4.276e-02, -2.617e-03, -1.503e-02, 8.597e-02, 2.836e-02, -3.066e-02, 1.198e-01, 1.579e-01, 7.397e-02, 3.772e-02, -1.218e-02, -4.990e-02, 2.318e-01, 1.096e-02, -2.781e-02, 8.578e-02)); + r += mul(s4_2, M4(5.226e-03, -2.910e-02, 7.799e-03, 5.320e-02, 6.157e-02, -6.031e-02, 2.537e-01, 2.064e-02, -1.604e-02, -5.053e-02, 3.938e-02, 4.349e-02, 9.145e-02, 5.544e-02, -1.214e-01, 1.123e-01)); + r += mul(s4_3, M4(-6.617e-02, -9.174e-02, -9.785e-02, -1.779e-02, -6.589e-03, -9.741e-03, 4.801e-03, -2.262e-02, 1.830e-01, 1.203e-01, -6.135e-02, 1.599e-03, 2.769e-01, -7.650e-02, -2.721e-02, -5.785e-02)); + r += mul(s4_4, M4(2.603e-02, 1.843e-02, -4.574e-02, 1.129e-01, -2.174e-02, 9.855e-02, 9.537e-03, 1.742e-01, -8.932e-02, -7.845e-02, 6.868e-02, -3.103e-02, 2.979e-02, 1.596e-01, 1.723e-01, -1.214e-01)); + r += mul(s4_5, M4(4.591e-02, 1.412e-02, 1.122e-01, 4.529e-02, 4.654e-02, -8.696e-02, -1.899e-01, -1.347e-01, 1.343e-01, -2.258e-02, -1.283e-01, 1.312e-01, 5.779e-02, -5.568e-02, -2.225e-01, -1.675e-01)); + r += mul(s4_6, M4(5.667e-02, 5.176e-02, -6.943e-02, -6.500e-02, 1.061e-01, -8.167e-02, -9.606e-02, 1.021e-01, -1.497e-02, 5.960e-02, 1.350e-02, -9.938e-02, 1.023e-01, -2.373e-02, -6.568e-02, 1.451e-01)); + r += mul(s4_7, M4(1.299e-01, -7.720e-03, 7.879e-02, 4.712e-02, 1.314e-01, 1.196e-03, -1.748e-02, -1.014e-01, 3.276e-02, -4.651e-02, -5.061e-02, -5.489e-02, 8.441e-02, 1.724e-02, -1.137e-01, -6.442e-02)); + r += mul(s4_8, M4(5.671e-02, 4.130e-02, 6.218e-02, -2.464e-02, -6.562e-02, -4.754e-02, 4.596e-02, -6.954e-02, 3.372e-02, -1.087e-01, 1.087e-01, 1.233e-01, 1.730e-01, -1.170e-01, 6.077e-02, -1.734e-01)); + r += mul(s5_0, M4(4.058e-02, -1.032e-01, 2.975e-01, -6.645e-03, -3.148e-02, 1.065e-01, -3.101e-02, -1.095e-02, -4.078e-02, 3.742e-02, 3.146e-02, 1.516e-02, -8.725e-02, 5.109e-02, 8.896e-02, -1.127e-02)); + r += mul(s5_1, M4(-1.999e-01, 2.049e-01, -7.905e-02, 1.578e-01, -3.631e-03, 3.358e-02, -6.989e-02, 2.414e-02, 1.249e-01, 6.485e-02, -8.061e-02, -9.337e-02, -1.166e-01, 1.110e-03, -1.951e-02, 3.245e-03)); + r += mul(s5_2, M4(-2.752e-01, -5.711e-02, 5.200e-02, -1.697e-01, -1.621e-02, 1.003e-01, -5.501e-02, -9.240e-02, -3.534e-01, -9.596e-02, -3.872e-03, -1.727e-01, 4.123e-02, 1.767e-02, -5.866e-02, 7.239e-02)); + r += mul(s5_3, M4(1.877e-01, 1.430e-01, 1.501e-01, -1.773e-01, 5.531e-02, 2.192e-02, 2.932e-02, -1.146e-01, 9.320e-02, -3.421e-02, 1.994e-02, 3.734e-02, -2.613e-02, -4.764e-02, 7.802e-02, -2.090e-02)); + r += mul(s5_4, M4(-1.952e-01, 3.962e-02, -3.031e-01, -2.278e-01, -4.020e-02, 4.581e-02, 1.124e-02, 4.139e-02, 1.343e-01, 4.335e-02, 3.382e-03, 1.526e-01, -1.343e-01, -1.078e-01, 1.909e-01, 5.437e-02)); + r += mul(s5_5, M4(-1.601e-01, 1.749e-02, 8.916e-02, -8.576e-02, 4.084e-02, -8.203e-03, -5.241e-02, -3.195e-02, 1.517e-01, -9.517e-02, -1.732e-02, 1.465e-03, 1.045e-01, -4.128e-02, 3.516e-02, 5.704e-03)); + r += mul(s5_6, M4(1.836e-01, -1.892e-01, -1.645e-02, -2.708e-01, -2.982e-02, 1.039e-01, -5.709e-02, 6.855e-03, 1.518e-01, -2.389e-01, 7.761e-02, 1.398e-01, 3.486e-02, 8.888e-03, -4.878e-03, 7.297e-02)); + r += mul(s5_7, M4(-3.935e-01, -3.510e-01, -1.532e-02, -3.040e-01, 1.982e-01, -7.558e-02, 4.646e-03, -4.347e-02, -2.576e-02, -2.793e-02, -1.142e-01, 1.072e-02, -1.271e-01, 2.376e-02, -2.919e-02, 1.754e-01)); + r += mul(s5_8, M4(-1.721e-01, 9.197e-02, 4.387e-02, -4.041e-02, -4.891e-02, 1.678e-01, 9.660e-02, 2.045e-02, -8.280e-02, 1.116e-02, 5.892e-02, -1.442e-02, 4.817e-02, -5.926e-02, -4.628e-02, -1.710e-01)); + r += mul(s6_0, M4(4.076e-02, 1.745e-02, -4.922e-02, -5.246e-03, -5.877e-02, -1.241e-01, -9.602e-02, 1.698e-01, -3.920e-02, 5.714e-02, -9.600e-03, -2.555e-02, 5.121e-02, -9.031e-02, 4.058e-02, 4.738e-02)); + r += mul(s6_1, M4(6.272e-02, 3.070e-02, -1.185e-02, 7.250e-02, -2.702e-01, -3.660e-02, 2.202e-01, 4.325e-02, 2.098e-02, -6.503e-02, -1.124e-01, 2.763e-02, -8.107e-02, 9.856e-03, -1.116e-01, 5.262e-02)); + r += mul(s6_2, M4(2.319e-02, 8.325e-03, 4.507e-02, 4.918e-02, 3.141e-02, -4.429e-02, -1.345e-01, -7.937e-02, 8.226e-02, 2.372e-02, -6.866e-02, 1.314e-02, -6.150e-02, 5.907e-02, 2.756e-03, 1.298e-01)); + r += mul(s6_3, M4(-7.615e-02, 1.892e-02, 9.781e-03, 9.630e-02, 1.171e-03, 1.199e-01, -2.645e-01, 9.165e-02, 6.175e-02, 1.452e-02, -1.812e-01, -3.817e-02, -1.544e-01, -7.561e-02, 2.979e-02, 7.044e-02)); + r += mul(s6_4, M4(-1.160e-01, -1.998e-01, 3.482e-03, -1.134e-01, 8.170e-02, -1.845e-01, 1.016e-01, -2.109e-03, 4.811e-02, 8.274e-02, 1.119e-01, 8.181e-02, 3.400e-02, 9.369e-02, 1.696e-01, -3.128e-02)); + r += mul(s6_5, M4(1.119e-01, 5.919e-02, -1.984e-01, -2.139e-01, 1.810e-01, -8.087e-02, 2.033e-01, 6.013e-02, 8.696e-02, 1.415e-01, -1.027e-01, -2.925e-02, 1.681e-01, -1.126e-01, 8.547e-02, 4.805e-02)); + r += mul(s6_6, M4(1.747e-01, 3.428e-02, 7.244e-03, -5.035e-02, 8.084e-02, 6.784e-02, -1.583e-01, 7.107e-02, -4.842e-02, 1.178e-03, -5.551e-02, 8.604e-02, -8.817e-02, 6.024e-02, -1.826e-02, 7.900e-02)); + r += mul(s6_7, M4(1.577e-02, -1.867e-01, -1.363e-01, -1.760e-01, 2.417e-01, -5.955e-02, -2.692e-02, 3.311e-02, -9.635e-03, -3.663e-02, -9.130e-02, -8.953e-02, 5.533e-02, 6.716e-02, 2.129e-01, 4.762e-02)); + r += mul(s6_8, M4(-1.495e-01, -7.207e-02, 1.694e-01, 2.312e-02, 4.239e-02, -3.068e-01, 1.220e-01, -1.715e-02, 1.505e-03, 5.579e-02, 4.435e-02, 4.457e-02, 8.412e-02, 7.905e-02, -1.441e-01, -6.379e-02)); + r += mul(s7_0, M4(1.149e-01, 4.722e-02, 2.133e-02, 1.424e-02, 1.859e-04, -5.039e-02, 8.952e-02, -5.078e-02, 5.445e-02, -6.543e-02, 1.279e-03, -1.213e-02, 3.382e-02, 7.118e-02, 1.440e-01, -8.769e-02)); + r += mul(s7_1, M4(3.510e-02, 1.894e-01, 2.843e-02, 2.383e-01, 1.534e-02, -4.750e-02, 5.790e-02, 1.143e-01, 1.421e-01, 8.004e-02, -1.165e-01, 3.773e-02, 7.492e-02, -9.116e-03, -6.398e-02, 4.644e-02)); + r += mul(s7_2, M4(-8.713e-02, -7.557e-02, 1.099e-01, 2.060e-01, 1.509e-01, 2.796e-02, 1.096e-01, 1.008e-02, -4.055e-03, -1.191e-01, 6.065e-02, -5.197e-02, -2.927e-02, 1.449e-02, 2.451e-02, 1.353e-02)); + r += mul(s7_3, M4(-1.485e-01, -4.476e-03, -1.168e-02, -3.132e-02, -2.069e-02, 9.355e-02, -5.367e-02, -2.892e-02, 4.750e-02, -6.783e-03, 3.523e-02, 6.289e-02, -2.410e-02, -9.802e-04, -1.093e-01, -1.362e-01)); + r += mul(s7_4, M4(-3.914e-02, 7.821e-02, -7.844e-02, -1.809e-01, -5.085e-02, -4.832e-02, -1.591e-01, 4.669e-02, -4.627e-02, -5.537e-03, 1.355e-01, -7.233e-02, 4.326e-02, 9.683e-02, -2.192e-02, -9.251e-02)); + r += mul(s7_5, M4(-1.110e-01, 6.131e-02, -1.894e-01, -3.019e-01, -2.254e-01, -6.784e-03, 1.363e-01, 6.567e-02, 8.258e-02, -1.173e-01, 1.777e-02, -1.289e-02, -4.105e-02, -1.242e-01, 3.670e-02, 2.759e-03)); + r += mul(s7_6, M4(8.101e-02, 1.016e-01, 1.437e-02, 8.071e-02, 4.204e-02, 6.510e-02, 7.350e-02, 6.318e-02, -1.183e-02, -8.410e-02, 8.633e-02, 1.911e-02, 2.520e-02, 1.194e-01, 6.253e-02, 7.282e-02)); + r += mul(s7_7, M4(-1.113e-02, -1.675e-01, -5.954e-02, 6.680e-02, 6.762e-02, 3.308e-02, -1.173e-01, 3.375e-02, 1.235e-01, -1.005e-01, 3.653e-02, 1.881e-02, -2.134e-01, -8.765e-02, -1.791e-02, 3.922e-02)); + r += mul(s7_8, M4(-1.919e-01, 1.342e-01, 1.823e-01, 9.905e-02, -5.655e-02, 1.171e-02, 2.232e-01, -2.340e-01, -1.177e-01, 3.993e-04, 7.806e-03, -1.813e-01, -4.798e-02, -2.597e-01, -6.103e-02, 1.486e-02)); + r += V4(-6.495e-04, -3.848e-02, 1.047e-02, 2.015e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.159e-02, -6.705e-02, 1.406e-02, 8.403e-02, -3.497e-03, 5.376e-02, 7.178e-02, -3.957e-02, -3.736e-02, 3.345e-02, 2.996e-02, -1.880e-01, 4.603e-02, 1.004e-01, -8.805e-02, 3.509e-02)); + r += mul(s0_1, M4(-5.347e-02, -5.708e-02, -1.278e-01, 2.970e-02, -3.279e-02, 1.100e-01, -2.325e-02, 1.411e-01, 4.305e-02, 3.041e-02, -2.008e-02, -4.983e-02, -2.828e-02, 4.511e-02, 1.111e-01, 4.899e-02)); + r += mul(s0_2, M4(5.854e-05, -5.055e-02, 1.885e-02, 4.298e-02, -1.795e-02, 8.471e-02, 2.948e-03, 7.111e-02, -1.569e-01, -6.827e-02, -4.522e-02, 2.649e-03, -5.399e-03, -2.765e-02, -1.723e-02, -1.115e-02)); + r += mul(s0_3, M4(2.824e-02, -1.127e-01, 2.280e-02, -3.797e-02, -1.024e-02, -2.479e-02, 5.625e-02, 2.136e-01, 4.185e-01, -3.488e-02, -3.187e-01, -1.726e-01, 1.141e-01, -9.213e-02, 1.387e-02, -4.887e-02)); + r += mul(s0_4, M4(3.438e-02, -6.397e-02, -5.044e-02, -1.373e-01, 3.172e-02, -1.102e-01, 2.146e-01, 9.459e-03, 9.015e-02, 4.659e-02, -1.226e-01, -5.517e-02, 5.106e-02, 5.617e-02, -1.364e-02, -1.413e-03)); + r += mul(s0_5, M4(-4.550e-02, -3.588e-02, 7.481e-02, 4.303e-02, 5.176e-02, 2.496e-01, 1.190e-01, -2.034e-02, -6.099e-02, 1.139e-01, -3.098e-02, -1.325e-02, -3.811e-02, 3.667e-03, 9.665e-02, -1.792e-01)); + r += mul(s0_6, M4(2.194e-02, -5.856e-02, 1.208e-01, 1.092e-02, -5.891e-02, 9.008e-02, -5.660e-02, 9.063e-02, -6.928e-02, -6.372e-02, 1.417e-01, 1.157e-01, -3.776e-02, 7.638e-02, 8.563e-02, -7.157e-02)); + r += mul(s0_7, M4(-7.473e-02, -1.458e-01, 2.874e-02, -4.756e-02, -1.148e-01, 1.311e-02, -1.951e-01, 6.415e-02, -1.319e-01, 7.188e-03, 1.038e-01, -1.325e-01, -1.117e-01, 3.131e-02, -8.221e-02, -1.748e-02)); + r += mul(s0_8, M4(-1.608e-02, -1.291e-01, -7.986e-02, 1.517e-01, -5.935e-02, 1.569e-01, -4.486e-02, 4.935e-02, -5.661e-02, 1.693e-01, 1.163e-01, 7.722e-02, -1.655e-02, 3.182e-02, -4.696e-02, 6.367e-02)); + r += mul(s1_0, M4(-6.963e-02, -3.487e-02, -1.631e-01, -1.077e-01, -4.652e-02, -1.246e-02, -2.988e-02, -1.017e-02, 1.289e-01, -6.770e-02, 3.828e-02, -5.813e-02, -3.392e-02, -8.819e-02, -1.841e-01, -1.657e-01)); + r += mul(s1_1, M4(-2.651e-01, 7.461e-03, 5.940e-02, 1.063e-01, -8.123e-02, 1.449e-01, -6.067e-02, 1.113e-01, -3.371e-03, -1.156e-01, -9.170e-02, -4.502e-02, -1.540e-02, 1.307e-01, 4.423e-02, 4.225e-02)); + r += mul(s1_2, M4(1.609e-01, 4.713e-02, -5.573e-03, -6.268e-02, -1.734e-02, -1.168e-01, 1.544e-02, -5.528e-02, -2.135e-03, 2.001e-02, 3.267e-02, -1.550e-02, -2.343e-02, 5.691e-02, 1.771e-01, 1.450e-02)); + r += mul(s1_3, M4(1.445e-02, -1.291e-01, 1.128e-01, -1.166e-01, -1.172e-01, -7.896e-02, 3.477e-04, -1.610e-02, 1.550e-01, 6.943e-02, 3.919e-02, -9.096e-02, 1.666e-02, -6.882e-02, 2.141e-01, 1.631e-01)); + r += mul(s1_4, M4(-1.641e-01, -1.595e-01, 3.082e-02, -6.409e-02, -6.885e-02, 1.860e-02, 6.810e-02, -5.758e-02, 2.216e-02, 2.606e-02, -5.397e-02, 3.736e-02, -7.740e-02, 1.974e-01, -3.292e-01, -1.246e-01)); + r += mul(s1_5, M4(-8.176e-02, -1.095e-01, -1.079e-01, -6.857e-02, 3.199e-02, 7.660e-03, -7.556e-02, -5.478e-02, -4.592e-02, 1.068e-01, -3.159e-02, 6.177e-02, 1.165e-01, -1.117e-01, 1.025e-01, -1.159e-01)); + r += mul(s1_6, M4(5.382e-02, -2.003e-01, -2.884e-01, -1.508e-01, -4.384e-03, -6.806e-03, 5.757e-02, 6.070e-03, -2.252e-02, -3.155e-02, 4.240e-02, 1.965e-02, -5.008e-02, -1.744e-01, 2.378e-02, 1.946e-01)); + r += mul(s1_7, M4(7.065e-03, -1.934e-01, -2.132e-01, 7.295e-02, 1.288e-01, -1.343e-01, -1.200e-01, -1.150e-01, 1.013e-01, 1.330e-02, -2.353e-02, -2.356e-02, 2.432e-01, -2.708e-02, 2.156e-02, -8.016e-02)); + r += mul(s1_8, M4(1.323e-01, -7.270e-02, -1.247e-02, -3.603e-02, 1.168e-01, -8.455e-02, -1.407e-01, 1.392e-01, 3.015e-02, -3.628e-02, -3.984e-02, 5.628e-02, 7.819e-02, -5.224e-02, -2.634e-02, 9.179e-02)); + r += mul(s2_0, M4(3.436e-02, -1.717e-01, 8.809e-02, 9.274e-02, 4.959e-02, -9.298e-03, -3.566e-03, -7.553e-02, 2.096e-01, 1.975e-03, -1.735e-01, -3.056e-02, 2.566e-03, 6.520e-02, -2.846e-02, 7.517e-02)); + r += mul(s2_1, M4(6.167e-02, -2.440e-02, -1.303e-02, -1.264e-02, 1.362e-01, 4.410e-03, -5.669e-02, 5.663e-03, 1.103e-01, 2.676e-02, -2.537e-01, 1.203e-01, -1.513e-01, 5.708e-02, -1.333e-01, -1.070e-02)); + r += mul(s2_2, M4(1.883e-02, -7.058e-02, -1.755e-02, -9.315e-02, -7.877e-02, -2.632e-02, 1.701e-03, 1.646e-02, 3.673e-02, -7.855e-02, 1.919e-01, 8.576e-02, -1.538e-01, 2.239e-01, -3.572e-02, 3.906e-02)); + r += mul(s2_3, M4(-1.348e-01, 9.100e-02, 4.750e-02, -4.391e-02, 9.521e-02, -1.966e-03, -1.461e-04, -4.581e-02, 6.728e-02, 2.979e-03, -1.134e-01, 1.917e-01, 5.138e-02, 6.732e-03, -7.290e-02, 2.281e-02)); + r += mul(s2_4, M4(3.199e-02, -6.431e-02, 6.152e-02, -3.397e-03, -5.878e-02, -7.019e-02, 3.432e-03, 3.159e-02, -8.416e-02, -1.046e-02, -1.246e-01, 1.008e-01, -1.923e-01, -9.668e-02, 2.751e-01, 2.231e-02)); + r += mul(s2_5, M4(-2.464e-03, 6.058e-02, -5.415e-02, -1.797e-01, -5.133e-02, 6.884e-02, -3.154e-02, -1.150e-01, 2.659e-03, -1.659e-02, 1.029e-01, 1.120e-01, -1.944e-01, 1.481e-01, 1.409e-01, 2.801e-02)); + r += mul(s2_6, M4(4.667e-02, -1.064e-01, 2.535e-02, 7.449e-02, 6.001e-02, -1.077e-01, 7.760e-02, 7.536e-02, 8.011e-02, -1.427e-01, 2.664e-02, -9.192e-02, -8.377e-02, 2.433e-02, 2.297e-02, 7.049e-02)); + r += mul(s2_7, M4(-5.233e-02, -4.193e-02, -1.651e-02, -2.001e-01, 4.654e-02, -1.089e-02, -4.719e-02, 2.223e-02, -9.302e-03, -4.426e-02, 2.210e-02, 5.566e-02, -5.970e-02, 1.829e-01, -3.557e-02, 1.214e-01)); + r += mul(s2_8, M4(4.346e-02, 4.874e-02, -4.945e-02, 1.150e-01, -6.326e-02, -2.545e-02, -9.380e-02, 3.646e-02, 1.263e-02, 1.527e-01, 4.378e-02, -7.941e-02, -4.891e-02, 6.231e-02, 6.571e-03, 2.289e-02)); + r += mul(s3_0, M4(-7.488e-02, 9.791e-02, -1.234e-02, -5.133e-02, 1.198e-02, 1.559e-01, -8.043e-02, -1.844e-01, 3.635e-02, -3.648e-03, 1.702e-02, 1.019e-02, 1.025e-01, -1.095e-01, 4.961e-02, 3.802e-02)); + r += mul(s3_1, M4(-8.831e-02, 2.470e-02, -1.290e-02, 6.433e-02, 1.367e-01, 5.948e-02, -7.454e-03, -2.061e-01, 4.250e-03, -1.381e-02, -2.909e-03, -3.023e-02, -1.174e-02, -1.387e-02, 1.262e-01, -3.551e-02)); + r += mul(s3_2, M4(-1.020e-01, -2.204e-02, -1.008e-01, -5.846e-02, -3.360e-02, -5.542e-02, 9.575e-02, 4.696e-01, 6.233e-02, 1.315e-02, 1.965e-01, -1.491e-02, -1.792e-02, -5.658e-02, -1.228e-01, 1.385e-02)); + r += mul(s3_3, M4(-1.464e-01, 1.780e-02, -6.589e-03, 4.249e-02, -2.666e-01, -1.224e-01, 2.077e-01, -2.693e-01, -1.083e-01, 2.998e-03, -1.048e-01, -1.893e-02, 6.824e-02, -2.977e-02, 3.503e-02, -9.067e-02)); + r += mul(s3_4, M4(4.541e-02, 1.987e-02, 3.981e-02, 1.155e-01, 1.834e-02, -3.408e-01, 8.681e-02, 8.392e-02, -9.493e-04, -1.603e-02, -5.829e-02, -1.074e-01, 1.148e-01, -8.128e-02, 1.758e-01, 1.002e-01)); + r += mul(s3_5, M4(-8.241e-03, 1.119e-01, 1.522e-02, 6.100e-03, 2.799e-01, 6.308e-02, -2.371e-01, 5.814e-01, 1.264e-02, 1.048e-01, 1.182e-01, -1.407e-01, 4.045e-02, -8.033e-02, -7.350e-02, 1.101e-02)); + r += mul(s3_6, M4(-1.178e-01, 2.715e-02, -2.532e-02, 1.441e-01, 1.125e-01, 1.964e-02, -1.099e-01, 2.368e-01, -4.709e-02, -4.507e-02, -6.058e-02, 1.336e-01, -1.799e-02, -5.699e-02, -4.070e-02, -4.618e-02)); + r += mul(s3_7, M4(1.472e-02, 7.965e-02, 1.853e-01, 8.426e-02, -1.636e-01, 1.166e-02, -1.313e-01, -1.365e-01, -5.363e-02, -5.387e-02, -8.976e-02, -4.583e-03, 2.475e-02, 4.945e-02, 7.521e-02, -7.343e-02)); + r += mul(s3_8, M4(-4.064e-02, 2.779e-02, -1.238e-01, 2.416e-01, 1.142e-01, 1.864e-01, 3.874e-01, 2.042e-01, 1.807e-02, 4.914e-02, -3.000e-02, 1.741e-02, -2.914e-02, -3.022e-02, -6.543e-02, -2.672e-02)); + r += mul(s4_0, M4(9.304e-02, -6.264e-02, 9.383e-02, 1.634e-01, 1.372e-01, -2.039e-02, 1.033e-02, -5.786e-02, -6.631e-03, -7.443e-02, -6.661e-02, 2.310e-02, 1.424e-01, 3.788e-02, -1.264e-01, -4.219e-02)); + r += mul(s4_1, M4(1.216e-01, -2.118e-02, -1.199e-01, 4.403e-03, -2.567e-03, 2.012e-02, -2.615e-02, 4.259e-02, 3.099e-02, -1.118e-02, 1.342e-01, -2.927e-02, 1.122e-01, -6.252e-02, 1.628e-04, 5.354e-02)); + r += mul(s4_2, M4(7.041e-02, -2.867e-02, -6.862e-02, 1.254e-01, -1.607e-01, -1.539e-01, -1.603e-02, -6.348e-02, -7.181e-03, 1.102e-02, 2.390e-02, -5.249e-02, 2.245e-02, 2.943e-02, -1.301e-01, -7.982e-02)); + r += mul(s4_3, M4(-1.387e-01, 1.504e-02, 5.752e-02, 1.460e-01, 6.862e-02, -4.562e-02, 2.216e-02, -1.442e-01, -7.839e-02, 8.544e-02, -1.911e-02, 2.450e-01, -6.672e-02, -1.830e-01, 1.179e-01, -2.594e-02)); + r += mul(s4_4, M4(1.345e-02, -9.782e-04, 3.954e-02, -7.969e-02, 8.862e-02, 1.382e-01, -1.612e-01, -2.439e-01, -5.334e-02, 1.125e-01, -1.972e-02, -4.682e-02, -2.351e-01, 7.199e-02, 1.401e-01, 3.230e-01)); + r += mul(s4_5, M4(-5.918e-02, 2.463e-02, 1.006e-01, 1.020e-01, -1.814e-01, -2.376e-02, -3.208e-02, -9.613e-02, -9.487e-02, 2.239e-02, 1.853e-01, -8.625e-03, -1.745e-02, 3.099e-01, 3.099e-02, 4.514e-02)); + r += mul(s4_6, M4(-1.100e-01, 6.927e-02, -3.307e-03, 4.325e-02, 7.185e-02, -1.418e-01, 1.960e-02, -1.491e-01, -3.582e-02, 2.388e-03, -1.615e-02, -2.427e-02, -5.100e-02, -2.715e-02, 1.057e-01, 7.667e-02)); + r += mul(s4_7, M4(-1.088e-01, 3.447e-02, -8.186e-02, -1.929e-02, -1.827e-02, 4.351e-02, -1.521e-01, -3.296e-02, 6.372e-02, 4.873e-02, 6.193e-02, -3.363e-03, 3.941e-02, 2.499e-02, 1.494e-05, -2.036e-01)); + r += mul(s4_8, M4(-6.369e-02, -9.751e-02, -5.096e-02, 3.151e-02, 2.607e-03, -3.898e-02, 4.627e-02, -4.268e-02, 6.320e-02, -3.223e-02, 4.721e-02, 4.625e-02, -3.867e-02, 2.063e-01, -2.207e-02, 9.412e-02)); + r += mul(s5_0, M4(6.974e-02, 8.564e-02, 7.933e-02, 9.495e-02, 1.332e-02, 7.017e-02, 1.227e-01, 2.723e-02, -4.159e-02, -2.808e-02, -1.196e-01, -1.683e-01, 4.032e-02, 7.238e-02, -3.882e-02, -1.556e-03)); + r += mul(s5_1, M4(-3.493e-01, -3.536e-02, -2.793e-02, -5.408e-02, -3.388e-02, 7.883e-02, 8.640e-02, 2.626e-01, 2.461e-02, 5.804e-02, -1.028e-02, -2.498e-02, -5.312e-02, -2.278e-03, 7.602e-02, -2.814e-02)); + r += mul(s5_2, M4(-5.896e-02, -7.446e-02, 4.512e-02, -1.565e-02, -7.230e-02, 6.534e-02, -4.524e-02, 6.396e-02, -2.574e-02, 1.876e-01, -4.695e-02, -7.508e-02, 4.575e-02, 3.811e-02, 2.866e-02, -1.009e-01)); + r += mul(s5_3, M4(-1.961e-01, -2.216e-01, -1.780e-01, 8.053e-02, -6.757e-02, -1.014e-01, 8.794e-02, 1.344e-01, 3.602e-02, -2.847e-02, -9.301e-02, 1.179e-01, -4.238e-02, -1.468e-01, -6.555e-02, 1.076e-01)); + r += mul(s5_4, M4(1.393e-01, -1.116e-01, -7.653e-02, -4.183e-01, 1.063e-01, 1.543e-01, 2.485e-02, -1.206e-01, -8.073e-02, -1.115e-03, -2.209e-01, 9.112e-02, -1.151e-01, 8.174e-02, 8.418e-02, 4.526e-03)); + r += mul(s5_5, M4(1.294e-01, -9.347e-02, 4.701e-02, 6.548e-03, -1.788e-02, 2.933e-02, 4.442e-02, 1.193e-01, -1.264e-01, -6.081e-02, -6.942e-02, 4.344e-02, 8.834e-03, 3.778e-02, -4.672e-02, -5.692e-02)); + r += mul(s5_6, M4(-1.012e-01, 1.823e-01, -2.943e-01, 8.989e-02, 1.116e-01, -6.032e-02, 4.802e-02, 1.264e-01, -8.633e-02, -8.766e-02, -5.426e-02, -1.209e-01, -1.093e-01, -5.602e-02, 6.528e-02, -2.349e-03)); + r += mul(s5_7, M4(-1.343e-01, 2.034e-01, -2.338e-02, -3.787e-01, 8.539e-02, -7.340e-03, 8.520e-03, 2.321e-01, 1.384e-01, -3.985e-02, 1.855e-02, 3.639e-02, 3.894e-02, -7.443e-02, 2.595e-02, -8.674e-02)); + r += mul(s5_8, M4(1.689e-01, -2.342e-02, 2.393e-02, -1.950e-01, 6.529e-02, -6.319e-02, -7.799e-02, 1.220e-01, -6.824e-02, -1.041e-01, 2.629e-02, -8.307e-02, 1.752e-02, 6.461e-02, -3.246e-02, -6.551e-02)); + r += mul(s6_0, M4(1.200e-01, -3.222e-02, -7.397e-02, -8.474e-03, 8.909e-02, -2.369e-01, 8.842e-02, 5.556e-02, -1.596e-01, 1.205e-01, 9.819e-02, 7.308e-02, 1.404e-01, -6.079e-02, -4.573e-02, 5.620e-02)); + r += mul(s6_1, M4(1.630e-01, -1.440e-02, 8.108e-02, 1.382e-01, -7.006e-02, 8.254e-02, -1.172e-01, -1.031e-01, -1.038e-01, -5.618e-02, -9.967e-02, -5.119e-02, 8.641e-03, 1.343e-01, 2.280e-03, 5.211e-03)); + r += mul(s6_2, M4(-9.802e-03, 7.003e-02, -8.588e-02, -2.847e-02, 1.438e-01, 2.113e-01, 2.720e-01, -4.048e-02, -1.776e-02, -8.309e-02, -1.082e-01, 9.919e-02, 1.127e-01, 3.300e-02, 1.817e-02, 1.831e-02)); + r += mul(s6_3, M4(3.430e-02, 1.022e-01, -1.684e-02, -1.033e-01, 9.907e-03, 3.670e-02, 1.316e-01, -1.978e-01, 5.608e-02, 6.323e-03, 2.175e-02, 9.470e-03, 1.618e-01, -5.147e-02, 2.082e-02, -7.631e-03)); + r += mul(s6_4, M4(5.919e-02, 6.625e-02, -1.752e-01, -1.298e-02, -7.827e-04, -3.498e-02, -4.234e-02, -7.318e-02, 1.128e-01, -6.479e-02, 7.675e-03, -9.922e-02, -1.988e-01, -1.343e-01, -1.326e-02, 7.510e-02)); + r += mul(s6_5, M4(-1.705e-01, 7.369e-02, 8.016e-02, 1.041e-01, -1.254e-01, -2.403e-02, -1.032e-01, 7.417e-02, 1.379e-01, 1.247e-02, 1.053e-01, 7.891e-02, -1.324e-01, -6.220e-02, -5.934e-02, -1.184e-02)); + r += mul(s6_6, M4(3.798e-02, 3.235e-02, -8.631e-02, -8.324e-02, -2.662e-02, -1.828e-02, 8.318e-03, 1.571e-01, 7.448e-02, 3.965e-02, 1.034e-01, 5.027e-02, -2.221e-02, -4.772e-02, 1.205e-01, 8.987e-02)); + r += mul(s6_7, M4(7.982e-02, 8.061e-03, -1.110e-02, -5.908e-03, -1.598e-01, 2.922e-03, -6.329e-04, 1.107e-01, -6.714e-02, -5.439e-02, 6.170e-02, 7.335e-02, -1.515e-01, -4.121e-02, -4.943e-02, 6.217e-02)); + r += mul(s6_8, M4(2.163e-02, 4.998e-03, -8.032e-02, -1.288e-02, -3.356e-03, 1.042e-02, 9.992e-02, 2.381e-02, -1.914e-03, 7.537e-02, -1.247e-01, 5.745e-02, 2.516e-02, -1.156e-01, -1.405e-01, -8.950e-02)); + r += mul(s7_0, M4(-3.289e-02, 1.005e-01, -1.664e-01, 3.229e-02, -3.539e-02, 3.863e-02, 6.170e-02, 4.147e-02, -1.179e-01, 3.243e-02, 6.216e-02, 3.144e-01, 8.066e-02, 6.194e-02, 2.796e-02, -1.411e-01)); + r += mul(s7_1, M4(5.339e-02, -8.321e-02, -5.431e-02, 6.086e-02, 1.075e-02, -8.840e-03, -2.585e-02, 3.593e-02, -1.144e-01, 1.016e-01, 2.744e-02, -5.795e-02, 1.170e-01, 1.227e-01, 5.666e-02, -4.038e-02)); + r += mul(s7_2, M4(3.297e-02, 8.648e-02, -7.364e-02, 8.771e-02, 1.966e-02, -7.449e-02, -1.062e-01, -2.157e-02, -4.111e-03, -1.992e-01, 8.065e-02, 1.122e-02, 7.465e-02, 1.470e-01, 7.417e-03, 2.789e-02)); + r += mul(s7_3, M4(2.386e-02, 2.310e-01, -2.804e-01, -1.344e-01, -1.257e-01, 1.236e-01, 4.911e-02, -5.946e-02, 9.337e-02, 1.174e-02, -1.178e-01, -1.304e-01, 9.177e-02, -1.628e-02, -2.609e-02, -1.484e-01)); + r += mul(s7_4, M4(1.706e-01, 9.529e-02, -1.168e-01, 3.993e-01, 6.630e-02, -2.149e-02, 1.253e-02, 1.085e-01, 1.032e-01, 2.347e-01, -7.329e-02, 2.084e-01, -1.968e-01, -1.076e-01, -4.499e-02, 1.235e-01)); + r += mul(s7_5, M4(6.215e-02, -1.109e-01, 4.257e-02, -3.023e-02, -1.347e-01, -1.378e-02, 1.745e-02, -6.140e-02, -6.328e-02, -3.266e-02, 1.099e-02, -2.281e-01, -9.936e-02, 2.338e-02, -3.435e-02, 6.569e-02)); + r += mul(s7_6, M4(7.853e-02, -1.601e-01, -2.296e-01, 1.106e-02, -6.369e-02, -1.100e-02, 3.984e-02, 9.032e-03, -6.465e-03, -1.149e-01, -4.831e-02, 1.183e-01, 6.930e-02, -9.638e-02, -1.169e-01, -8.368e-02)); + r += mul(s7_7, M4(-9.578e-02, 5.014e-02, 1.226e-01, 5.544e-02, -9.871e-03, -5.196e-02, -3.061e-02, 5.372e-02, 1.728e-01, -4.305e-02, 1.110e-02, -9.630e-02, -2.940e-02, -5.671e-02, 4.415e-02, 7.129e-02)); + r += mul(s7_8, M4(2.522e-02, -1.403e-01, -6.496e-02, -1.193e-02, -1.643e-02, 1.158e-01, 1.042e-01, -1.039e-01, -7.068e-02, -6.970e-03, -9.279e-02, -1.022e-01, 5.406e-02, -1.954e-02, 4.493e-02, 2.320e-02)); + r += V4(1.686e-02, -1.796e-02, -1.824e-02, -3.183e-02); + return r; +} + +void Pass7(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 8 +//!DESC conv7 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.094e-01, 6.562e-02, -6.635e-02, -1.664e-02, -1.806e-01, 2.640e-02, -1.701e-02, -5.263e-02, 7.153e-03, 3.917e-02, -1.463e-01, -2.494e-03, -7.370e-02, 5.850e-02, 3.016e-02, -2.480e-03)); + r += mul(s0_1, M4(-7.920e-02, -8.043e-03, -2.752e-01, -4.750e-02, 4.629e-03, 1.198e-02, 5.820e-02, 1.212e-01, -1.189e-01, -1.568e-02, -1.020e-01, -4.512e-02, 7.941e-02, 1.033e-01, -7.474e-02, 4.613e-02)); + r += mul(s0_2, M4(6.666e-03, -1.996e-01, 4.654e-02, 2.725e-02, 4.535e-03, 1.941e-02, -5.382e-02, -8.718e-03, -7.150e-02, 7.631e-02, 2.212e-03, -7.028e-03, 1.519e-02, -3.138e-02, -1.136e-01, 2.660e-02)); + r += mul(s0_3, M4(7.989e-02, 6.219e-02, 9.160e-02, -2.573e-04, 3.206e-02, -1.039e-01, 7.312e-02, 1.047e-01, 6.879e-02, 8.840e-02, -1.349e-01, -3.978e-03, 6.849e-03, 3.076e-02, -4.591e-02, 1.194e-01)); + r += mul(s0_4, M4(-4.859e-02, 8.254e-02, -1.963e-01, 1.308e-01, -1.704e-01, -4.744e-02, -5.579e-02, 8.055e-02, -5.632e-02, 9.476e-02, -6.365e-02, -7.092e-02, 1.192e-02, -1.031e-01, -2.293e-02, -8.946e-02)); + r += mul(s0_5, M4(-1.563e-01, -2.251e-01, -9.731e-02, 1.459e-02, -5.055e-02, 7.008e-03, -1.155e-01, 5.742e-02, -1.312e-01, 9.039e-03, 5.052e-02, -3.125e-02, -9.343e-02, 6.572e-03, -3.902e-02, -3.151e-03)); + r += mul(s0_6, M4(3.861e-02, -4.867e-02, -2.904e-02, 7.517e-02, -1.442e-02, -1.293e-01, 4.353e-02, 6.431e-03, -9.509e-03, -3.284e-02, -1.018e-01, -7.121e-02, 8.958e-02, -3.625e-02, 1.074e-02, -5.836e-02)); + r += mul(s0_7, M4(8.242e-02, 8.586e-02, -3.794e-03, 2.286e-01, 6.752e-02, 1.001e-01, 7.140e-02, 5.756e-02, 9.403e-03, -2.792e-02, 4.411e-02, 1.023e-01, 6.906e-02, -5.778e-02, -4.631e-02, -1.568e-01)); + r += mul(s0_8, M4(-1.261e-01, -1.466e-01, -2.753e-02, 1.525e-01, 2.462e-02, -1.988e-01, -1.876e-02, -4.975e-02, 8.446e-03, 1.146e-01, 1.661e-02, -2.526e-02, 7.944e-02, -4.065e-02, 1.104e-03, -1.452e-01)); + r += mul(s1_0, M4(-6.833e-02, -2.755e-02, 7.771e-02, -1.336e-02, 1.837e-02, 8.552e-02, 1.024e-01, -1.455e-01, -2.393e-01, 1.461e-01, 1.814e-02, -4.929e-02, -1.674e-01, -6.073e-02, 7.018e-02, -2.940e-02)); + r += mul(s1_1, M4(4.082e-03, 1.052e-01, 3.842e-03, -1.587e-01, 9.835e-02, 2.139e-02, 8.668e-02, 2.165e-02, -2.663e-02, 8.162e-02, -3.735e-02, -1.560e-02, -6.443e-02, -1.933e-01, -1.607e-01, 1.926e-01)); + r += mul(s1_2, M4(5.791e-02, -1.695e-02, 1.488e-02, -6.007e-02, -7.197e-02, -8.771e-02, -1.639e-02, 5.865e-02, -5.406e-02, 2.676e-02, -4.350e-02, -3.119e-02, 2.097e-02, 3.892e-02, 1.711e-02, 9.656e-03)); + r += mul(s1_3, M4(-2.595e-01, 5.729e-02, 9.107e-02, 1.011e-01, -2.027e-01, 9.770e-02, 8.845e-02, -5.835e-03, 9.669e-02, -7.060e-02, 2.229e-02, -1.412e-02, -5.990e-02, -1.414e-01, -7.674e-02, 2.120e-02)); + r += mul(s1_4, M4(-1.436e-02, 2.800e-01, 2.802e-02, 1.332e-01, -5.009e-02, 3.089e-02, 4.827e-02, -1.219e-01, 5.801e-02, 4.186e-01, -1.186e-02, -8.125e-02, 2.004e-01, -1.175e-02, 1.882e-01, -6.973e-02)); + r += mul(s1_5, M4(-7.152e-02, 9.121e-02, 3.823e-02, 5.854e-02, 1.238e-01, 8.679e-02, -5.175e-02, 4.602e-02, 3.259e-03, 2.433e-01, -6.799e-02, -1.315e-02, -1.607e-01, 1.049e-01, 1.580e-01, -1.395e-01)); + r += mul(s1_6, M4(-4.564e-02, -9.812e-02, 2.360e-02, 7.224e-02, -1.049e-01, 7.822e-02, 1.828e-01, -6.342e-02, -2.681e-01, 3.954e-02, -1.436e-01, -9.720e-03, 8.550e-02, -2.766e-06, 1.479e-02, 3.158e-03)); + r += mul(s1_7, M4(2.409e-02, 1.690e-01, -1.089e-02, 4.386e-02, -3.649e-02, -4.483e-02, 5.605e-02, -9.339e-02, 2.634e-01, 3.361e-02, -1.892e-01, 1.432e-01, 1.738e-01, -8.852e-02, -4.950e-02, 2.896e-02)); + r += mul(s1_8, M4(8.637e-02, 6.159e-02, -3.538e-02, -7.172e-03, 1.321e-01, 1.411e-01, 4.990e-03, -7.475e-02, -1.335e-02, -4.698e-02, -6.161e-02, 5.824e-02, 2.177e-01, 2.154e-01, 5.215e-02, -7.138e-02)); + r += mul(s2_0, M4(2.174e-01, 4.123e-02, -8.292e-02, -5.252e-02, 7.025e-02, -9.466e-03, -5.904e-02, -5.454e-02, 6.876e-02, -9.031e-02, 5.278e-02, 6.207e-02, 5.541e-02, 6.821e-02, -9.390e-02, 9.013e-02)); + r += mul(s2_1, M4(-9.286e-02, 9.109e-05, 1.541e-01, -1.452e-01, -5.603e-02, 1.127e-01, 1.113e-02, -1.623e-01, -1.885e-02, -5.289e-02, 1.681e-02, 4.134e-02, -7.683e-03, 2.098e-01, 1.892e-01, 7.369e-02)); + r += mul(s2_2, M4(-1.571e-01, -5.388e-02, -1.203e-02, -1.810e-01, -4.085e-02, -7.166e-02, -3.599e-02, -4.272e-02, -5.114e-02, -5.852e-03, -4.504e-02, 2.492e-02, 6.584e-02, 5.583e-02, 3.093e-02, -8.858e-02)); + r += mul(s2_3, M4(2.666e-01, -4.169e-02, 1.900e-01, 5.311e-02, -2.534e-02, 1.372e-01, -5.283e-02, -4.175e-02, -1.966e-01, 8.141e-02, 7.509e-02, 2.269e-02, 1.992e-01, 1.056e-01, 1.450e-01, -2.845e-01)); + r += mul(s2_4, M4(-1.957e-01, 6.279e-02, 1.413e-01, -4.627e-02, 5.501e-02, 4.127e-02, -2.825e-03, -7.130e-02, -4.654e-02, -6.061e-02, -1.728e-02, 1.095e-01, -8.350e-02, -8.155e-02, -2.309e-02, 1.007e-01)); + r += mul(s2_5, M4(2.874e-02, -1.179e-02, 2.625e-01, -1.289e-01, -5.714e-02, -1.313e-01, -9.355e-02, 7.594e-02, 1.294e-01, 1.140e-01, -7.714e-02, 1.368e-01, -4.774e-02, -8.704e-02, -3.935e-02, -6.379e-03)); + r += mul(s2_6, M4(2.177e-01, 4.111e-02, -2.230e-01, -2.031e-02, -1.858e-02, 2.777e-02, 8.506e-04, 5.563e-02, 1.100e-02, 1.980e-02, 3.650e-02, 6.127e-02, -9.054e-02, 1.709e-01, -8.951e-02, -5.801e-02)); + r += mul(s2_7, M4(1.400e-01, 4.955e-02, -9.260e-02, 7.465e-02, -1.298e-02, 6.411e-02, 9.795e-02, 9.944e-02, 5.240e-02, 2.981e-02, 3.059e-02, -9.015e-02, -2.287e-02, -2.969e-02, 1.617e-01, -4.204e-02)); + r += mul(s2_8, M4(-1.400e-01, -1.179e-01, -1.857e-02, 1.462e-01, -2.608e-02, -1.977e-02, 2.275e-02, 5.050e-04, 2.683e-02, -4.022e-02, 1.758e-03, 2.148e-02, 1.991e-01, -4.555e-02, -6.202e-03, -3.282e-02)); + r += mul(s3_0, M4(4.792e-02, 2.772e-02, -4.696e-02, -9.907e-03, 3.481e-02, -1.944e-01, -1.612e-01, 4.374e-02, -1.935e-01, 1.306e-01, -6.511e-02, 6.717e-02, 4.647e-02, 1.421e-01, -6.739e-02, 6.141e-02)); + r += mul(s3_1, M4(-1.762e-01, 3.426e-02, 2.946e-02, -2.496e-02, 7.081e-02, -5.038e-02, -6.860e-02, 1.665e-01, -4.061e-02, -9.788e-03, 7.561e-02, 1.779e-01, -4.286e-02, -8.701e-03, -1.064e-02, -1.274e-01)); + r += mul(s3_2, M4(5.357e-02, 3.849e-03, 4.175e-02, 3.995e-02, 9.148e-02, 8.457e-02, -7.468e-03, -4.689e-02, -1.287e-01, 6.958e-02, -6.353e-02, -1.477e-01, -1.527e-02, -8.207e-02, 3.981e-02, 2.494e-02)); + r += mul(s3_3, M4(-1.686e-01, -3.953e-02, -3.361e-02, 2.511e-02, -3.888e-01, -1.431e-01, -9.050e-03, 2.181e-01, -1.614e-01, -1.381e-01, 1.600e-01, 9.912e-02, 2.128e-02, 3.555e-02, 4.428e-02, -1.076e-01)); + r += mul(s3_4, M4(-6.739e-02, 4.936e-02, 1.614e-02, 3.981e-02, -1.361e-01, 1.175e-01, 2.878e-02, 6.779e-02, 6.161e-02, -1.996e-01, 1.353e-01, -2.633e-02, -6.449e-02, -9.670e-02, -1.103e-01, 2.391e-02)); + r += mul(s3_5, M4(-5.152e-02, -2.634e-02, 5.286e-02, -3.503e-02, -1.304e-01, -1.028e-01, -9.635e-02, -3.594e-02, -4.392e-02, 7.375e-02, -1.322e-01, -4.372e-02, -1.225e-01, -1.920e-01, -1.833e-01, 1.163e-01)); + r += mul(s3_6, M4(2.130e-02, -1.053e-01, -1.098e-01, -3.188e-02, 2.582e-02, 6.738e-02, -3.954e-02, 4.855e-02, 1.174e-02, -1.605e-02, 5.528e-02, -1.552e-01, 3.938e-04, 6.833e-02, -1.971e-03, -1.657e-02)); + r += mul(s3_7, M4(9.644e-02, -5.851e-02, 9.306e-02, -2.021e-02, -8.492e-02, 1.645e-01, 6.027e-02, 1.179e-01, 1.247e-03, 7.693e-02, 1.392e-01, 2.644e-02, 5.708e-02, 8.585e-02, -1.024e-02, -9.170e-02)); + r += mul(s3_8, M4(-6.458e-02, -1.116e-01, 1.344e-02, -3.360e-03, -4.397e-02, -9.242e-02, -1.821e-01, 9.750e-02, -1.151e-01, 2.338e-02, 2.907e-02, 6.785e-02, 1.321e-01, -2.873e-03, 2.923e-02, -3.002e-02)); + r += mul(s4_0, M4(-3.549e-03, 6.322e-02, 2.612e-02, -5.727e-02, -2.213e-02, -4.023e-02, 1.537e-01, -8.374e-02, -1.378e-01, -1.213e-02, 1.032e-01, 1.619e-01, -4.502e-02, -3.248e-02, 9.939e-02, -1.793e-02)); + r += mul(s4_1, M4(-1.140e-01, -8.592e-02, -1.221e-02, 3.357e-02, -1.243e-01, -3.610e-02, 3.100e-02, 1.831e-01, 6.935e-02, 1.819e-01, -5.356e-02, 1.049e-03, 1.013e-01, -2.999e-02, 1.002e-01, 8.481e-02)); + r += mul(s4_2, M4(-5.074e-02, -3.137e-02, -3.769e-04, -3.506e-02, 1.469e-01, 1.220e-01, -2.581e-02, 4.897e-02, -1.507e-01, -1.310e-01, 1.399e-01, 2.146e-04, 3.223e-02, -1.708e-01, 1.749e-02, 8.381e-02)); + r += mul(s4_3, M4(-5.579e-02, -7.501e-02, -7.396e-02, -4.601e-02, -2.077e-01, -1.652e-01, 4.609e-02, -6.161e-02, 1.811e-01, -1.606e-01, 8.042e-02, 1.432e-02, 2.420e-02, 1.674e-01, -2.385e-01, 3.584e-02)); + r += mul(s4_4, M4(-6.357e-02, -5.652e-02, -1.085e-01, -3.533e-02, -1.473e-01, -6.429e-02, 1.168e-01, 4.730e-02, 1.236e-01, 1.275e-02, 1.529e-01, 1.793e-01, 9.834e-02, 9.068e-02, 1.846e-02, 1.012e-01)); + r += mul(s4_5, M4(-3.437e-02, -2.088e-02, 1.614e-02, 4.094e-02, -3.318e-02, 6.311e-02, 3.066e-02, 1.211e-01, -4.561e-01, 2.340e-01, 2.849e-01, -2.053e-02, 9.824e-03, 1.057e-01, 5.524e-02, 1.544e-01)); + r += mul(s4_6, M4(5.406e-02, -5.447e-02, 6.346e-02, -9.657e-02, -7.456e-02, 1.372e-01, 9.634e-02, 1.778e-02, 7.337e-02, 1.822e-01, -2.336e-01, -1.964e-01, 8.473e-03, 5.990e-02, -7.229e-02, 3.078e-02)); + r += mul(s4_7, M4(-7.364e-02, 9.612e-03, 1.151e-01, -4.973e-02, 6.506e-02, -5.687e-02, 1.550e-02, -8.652e-02, 2.556e-01, -1.941e-01, 9.252e-02, -1.199e-01, 1.270e-01, -9.150e-03, 5.799e-02, 4.810e-02)); + r += mul(s4_8, M4(-1.076e-02, 8.242e-03, 3.017e-02, -3.253e-02, -1.396e-02, 6.635e-02, -2.149e-02, -3.102e-04, -4.295e-01, 1.726e-01, -1.077e-01, -8.705e-02, 1.395e-01, -4.076e-03, 3.286e-03, -3.146e-02)); + r += mul(s5_0, M4(-2.396e-01, 1.979e-01, 2.413e-01, -4.524e-02, 8.260e-03, -5.002e-02, 5.122e-02, 5.775e-02, 3.915e-04, -1.085e-01, 1.610e-01, -7.457e-02, -8.283e-02, -1.344e-01, 2.998e-02, 1.015e-01)); + r += mul(s5_1, M4(1.838e-01, -1.508e-01, 3.490e-02, -6.098e-02, -1.108e-01, 7.571e-02, -2.342e-01, 1.749e-02, -5.878e-02, -8.278e-03, -1.278e-01, -4.971e-04, -1.599e-02, -9.004e-02, -1.931e-02, -3.202e-02)); + r += mul(s5_2, M4(-5.693e-02, -3.888e-02, 6.823e-02, -8.981e-02, -4.565e-02, -4.465e-03, -8.576e-02, 7.549e-02, 4.481e-02, 8.552e-02, 1.471e-02, 2.529e-03, -7.205e-02, 3.349e-02, -7.793e-02, -3.257e-03)); + r += mul(s5_3, M4(-2.139e-01, 1.130e-01, -2.518e-03, -2.936e-02, 9.204e-02, -6.996e-02, -6.080e-02, 4.065e-02, 9.765e-03, -5.972e-02, -1.922e-02, -7.230e-02, -1.372e-01, 1.099e-01, -9.439e-03, -1.574e-01)); + r += mul(s5_4, M4(3.184e-01, -9.080e-02, -8.721e-02, 1.063e-01, 8.019e-02, 1.045e-01, 5.305e-02, 3.222e-02, -2.309e-02, -3.510e-02, 5.475e-02, 6.860e-03, -2.792e-02, -4.265e-02, 7.462e-02, -1.000e-01)); + r += mul(s5_5, M4(1.510e-01, 9.356e-02, 9.770e-02, 8.079e-02, -2.077e-02, -3.420e-03, 5.702e-02, -9.642e-02, -3.297e-02, 4.951e-02, 1.024e-02, 1.284e-02, -9.640e-02, 1.576e-01, 5.242e-02, -9.510e-02)); + r += mul(s5_6, M4(-1.131e-01, -5.611e-03, 1.260e-01, -6.498e-02, 5.024e-02, -3.976e-02, -9.643e-02, 6.266e-02, -1.811e-02, 2.216e-02, -5.924e-02, -9.393e-02, -7.965e-02, -8.372e-02, 1.139e-01, 1.632e-02)); + r += mul(s5_7, M4(1.830e-01, -1.842e-01, -8.532e-03, 1.438e-01, -2.825e-02, -8.659e-02, 1.932e-02, 2.102e-02, 1.315e-01, -5.124e-03, 1.882e-02, -5.826e-03, -8.456e-03, 8.750e-02, 8.762e-02, 2.405e-02)); + r += mul(s5_8, M4(3.706e-02, 1.210e-01, -2.112e-02, 3.086e-02, -4.797e-02, -1.435e-02, -3.086e-02, 6.154e-02, 7.946e-03, -1.777e-02, -5.891e-02, 8.237e-02, -1.340e-02, 7.156e-02, 7.066e-02, 2.263e-02)); + r += mul(s6_0, M4(-1.527e-01, -2.308e-02, 1.031e-01, -9.168e-02, 2.770e-02, 4.815e-02, -9.231e-02, -2.969e-02, -1.034e-01, -1.003e-01, -1.571e-01, 8.946e-02, -7.229e-02, 2.558e-02, -1.862e-02, -2.815e-03)); + r += mul(s6_1, M4(3.370e-04, 1.161e-01, 7.904e-03, 5.765e-03, -1.190e-01, 4.035e-03, 1.496e-01, -1.210e-01, -1.030e-01, 2.940e-01, -2.090e-01, 1.547e-01, 3.627e-02, 1.232e-02, 8.922e-02, 5.802e-03)); + r += mul(s6_2, M4(-6.811e-02, 1.911e-02, 1.301e-01, -2.034e-02, -1.718e-02, -8.416e-02, -3.227e-02, 1.555e-02, -6.753e-02, 1.002e-01, -1.419e-01, 4.822e-02, -3.553e-02, -6.993e-03, -2.976e-02, 5.075e-02)); + r += mul(s6_3, M4(1.028e-01, -9.826e-02, -2.043e-02, -4.282e-02, -1.015e-02, 5.062e-02, -7.217e-02, -6.270e-02, 2.555e-01, 1.865e-04, 4.284e-02, 3.526e-02, 1.324e-01, 1.001e-01, 7.429e-02, -1.082e-01)); + r += mul(s6_4, M4(9.982e-02, 8.600e-02, 5.599e-02, 1.019e-01, -1.997e-01, 2.329e-02, 9.757e-02, -9.782e-02, -9.100e-02, 5.542e-03, 8.590e-02, 1.341e-01, 6.680e-02, 1.142e-01, -3.108e-02, 1.664e-02)); + r += mul(s6_5, M4(5.982e-02, -6.894e-02, 1.043e-02, 7.815e-02, -2.898e-02, -1.015e-01, 8.456e-02, -2.549e-02, -1.887e-01, 3.432e-02, -2.346e-01, -1.983e-01, 2.210e-02, -5.568e-02, -1.398e-02, -6.344e-03)); + r += mul(s6_6, M4(1.528e-01, -2.713e-02, 1.025e-01, 2.460e-02, -1.941e-01, -8.926e-02, -6.091e-02, 3.426e-02, 1.105e-01, 1.129e-03, 1.461e-02, 7.950e-03, -1.082e-01, -5.553e-02, -7.231e-02, 3.168e-02)); + r += mul(s6_7, M4(1.582e-01, 5.038e-02, 1.620e-01, 4.924e-02, 1.917e-01, 1.978e-01, 1.543e-01, 4.565e-02, 7.157e-02, -2.466e-02, 2.363e-01, 1.268e-01, 3.259e-02, -1.481e-02, 5.469e-03, -1.927e-02)); + r += mul(s6_8, M4(-3.274e-02, 9.584e-02, 7.582e-02, 3.171e-02, 1.340e-02, 3.428e-02, 1.861e-02, 8.205e-04, -1.479e-01, -7.486e-02, 6.135e-02, -2.173e-01, 8.072e-02, 1.614e-02, 7.643e-02, 7.684e-02)); + r += mul(s7_0, M4(-3.495e-03, -7.745e-02, 1.208e-01, -5.674e-02, -2.190e-02, -5.537e-03, -7.982e-02, -5.191e-02, -1.259e-01, 2.270e-02, -2.604e-02, 4.138e-02, -2.793e-02, 3.706e-02, -7.213e-03, -1.064e-01)); + r += mul(s7_1, M4(1.135e-01, -1.115e-02, 5.197e-02, -6.151e-02, -5.704e-02, 2.382e-02, -1.556e-03, -7.628e-02, 4.024e-02, 1.051e-01, -1.004e-02, 3.111e-02, -9.817e-02, 5.772e-02, 7.609e-02, -6.089e-02)); + r += mul(s7_2, M4(-4.865e-03, 1.303e-01, -1.522e-02, 7.056e-02, 1.393e-01, 4.428e-02, -8.566e-02, 7.840e-02, -8.433e-02, -1.209e-02, -4.745e-02, -1.311e-01, 9.661e-02, 3.457e-02, 2.445e-02, 1.207e-01)); + r += mul(s7_3, M4(-4.056e-02, 1.168e-02, -3.411e-02, -3.930e-02, -2.268e-01, 2.466e-03, -1.287e-01, -2.696e-02, 1.901e-02, 2.281e-02, 1.648e-01, -1.948e-02, -3.647e-02, -5.890e-02, 2.151e-02, 1.851e-02)); + r += mul(s7_4, M4(1.090e-01, 4.344e-02, 9.973e-02, 5.995e-02, -9.834e-02, 4.627e-02, 1.579e-02, -4.333e-02, -4.674e-02, 6.055e-02, 1.179e-01, 1.393e-01, -2.063e-01, -7.704e-02, -5.538e-02, 9.934e-02)); + r += mul(s7_5, M4(1.671e-01, -5.808e-02, 1.099e-02, 3.928e-02, -5.584e-03, -9.890e-02, 3.165e-02, 3.469e-02, 1.424e-01, 6.162e-02, 8.217e-03, -1.903e-02, 5.581e-02, -4.533e-02, -1.257e-01, -4.698e-02)); + r += mul(s7_6, M4(-1.862e-02, -1.673e-02, 8.471e-02, 4.346e-02, -2.911e-03, -7.640e-02, -1.346e-02, 1.239e-01, 1.030e-01, -3.699e-02, 2.548e-02, -3.951e-02, -7.803e-02, -6.745e-02, 2.174e-01, 1.784e-01)); + r += mul(s7_7, M4(5.429e-02, -3.890e-02, -3.193e-02, 1.398e-01, -3.613e-02, -4.545e-03, 1.747e-01, -3.892e-02, -7.894e-03, 8.938e-02, -6.410e-02, 9.848e-02, 2.122e-04, -1.926e-02, 1.200e-01, -3.913e-02)); + r += mul(s7_8, M4(9.066e-02, 1.003e-01, 8.942e-02, 2.592e-02, 7.945e-02, 6.145e-02, -3.691e-02, -3.155e-02, 6.701e-02, 6.215e-02, 1.511e-02, 7.923e-02, -1.016e-01, -5.384e-02, 1.000e-01, -9.423e-02)); + r += V4(-1.138e-02, 2.464e-02, 4.285e-02, 3.644e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.729e-03, -2.254e-02, -1.744e-01, -9.125e-02, 2.046e-02, 2.762e-02, 5.534e-02, -1.482e-03, -8.561e-02, -3.427e-02, -2.666e-02, -8.006e-02, 8.221e-02, -9.192e-02, -1.170e-02, -1.189e-02)); + r += mul(s0_1, M4(-1.153e-01, -3.245e-02, 1.439e-02, -5.300e-02, 9.817e-03, -4.178e-02, -5.941e-02, -2.114e-02, -1.534e-01, -1.211e-01, 1.250e-01, -3.088e-02, -7.634e-02, 5.608e-02, -9.212e-02, 3.649e-02)); + r += mul(s0_2, M4(7.142e-02, -7.284e-02, 1.252e-01, -6.431e-02, -2.975e-02, 1.125e-03, 5.923e-02, -3.391e-03, -1.135e-01, 7.447e-03, 2.585e-02, -8.507e-02, 2.353e-02, -3.786e-02, 2.699e-03, -5.200e-02)); + r += mul(s0_3, M4(6.371e-02, 6.001e-02, -9.614e-02, -2.696e-02, -5.555e-02, -7.743e-02, -1.671e-02, 3.010e-02, -7.935e-02, 8.815e-02, -2.964e-02, 3.666e-02, -6.476e-03, 1.233e-01, -5.623e-02, -1.599e-02)); + r += mul(s0_4, M4(-3.674e-02, -3.006e-01, 2.545e-01, 5.876e-02, -6.356e-02, 8.586e-02, -1.498e-02, -2.176e-02, -9.812e-02, -1.406e-01, 6.643e-02, -1.857e-01, -3.230e-02, 1.400e-01, 2.269e-01, 1.178e-01)); + r += mul(s0_5, M4(2.677e-02, -2.124e-01, -1.017e-01, 1.325e-01, 1.358e-02, -6.512e-02, -1.187e-01, 1.181e-02, -3.256e-02, -3.551e-02, -2.776e-02, 7.520e-02, -6.454e-02, -1.606e-01, -9.878e-02, -1.435e-02)); + r += mul(s0_6, M4(-4.028e-02, -4.131e-03, 3.061e-02, 3.977e-02, -2.998e-02, -8.966e-02, 1.969e-01, 2.158e-02, -7.177e-02, 3.715e-02, 4.213e-02, -6.517e-02, -3.402e-02, -1.920e-02, 9.819e-03, -3.927e-02)); + r += mul(s0_7, M4(6.645e-02, 2.757e-02, -4.598e-02, 9.114e-02, -6.951e-02, -2.374e-02, 8.852e-03, -1.130e-01, -8.206e-02, 3.982e-03, 4.484e-02, -3.798e-02, 7.936e-02, -9.804e-03, 5.960e-02, -8.859e-02)); + r += mul(s0_8, M4(-5.545e-02, 8.134e-02, 5.924e-02, 9.790e-02, -6.810e-02, -1.845e-02, 7.648e-02, -5.781e-02, 2.050e-02, 6.863e-02, 1.100e-01, 1.235e-01, 3.950e-02, -3.621e-02, -7.890e-02, 1.882e-02)); + r += mul(s1_0, M4(6.511e-02, 8.445e-03, -1.497e-01, 6.653e-02, 1.452e-01, 1.195e-01, 5.411e-02, -1.902e-02, 1.324e-01, 4.688e-02, 6.516e-03, -5.909e-02, 1.871e-01, -1.824e-02, 1.369e-01, -6.272e-02)); + r += mul(s1_1, M4(8.474e-02, -6.450e-02, 9.576e-02, 1.412e-01, 3.813e-02, -4.383e-02, 7.060e-03, 1.103e-01, -3.901e-03, -1.998e-01, 1.009e-01, 8.163e-02, -1.379e-01, -1.609e-01, 4.879e-02, 7.951e-02)); + r += mul(s1_2, M4(7.465e-02, 4.864e-02, 1.110e-01, -1.301e-02, -1.781e-03, 5.838e-02, 6.237e-03, 5.422e-02, -3.923e-02, 3.430e-02, 1.450e-01, -1.343e-01, -1.255e-01, 5.629e-02, 3.434e-02, -4.666e-02)); + r += mul(s1_3, M4(-1.247e-01, -7.170e-03, -1.318e-01, -4.028e-02, 1.527e-01, -8.669e-03, -1.660e-02, -1.115e-01, 3.093e-02, -3.498e-02, -1.031e-01, 1.345e-01, -2.094e-02, 1.080e-01, 9.520e-02, 3.174e-02)); + r += mul(s1_4, M4(1.960e-02, -2.152e-01, -5.272e-02, -5.503e-03, -8.628e-02, 1.454e-01, -4.903e-02, -1.123e-02, -1.059e-01, -3.930e-01, 5.321e-02, -1.420e-01, 5.778e-02, 1.822e-01, 2.204e-01, 9.447e-02)); + r += mul(s1_5, M4(-1.345e-02, 8.841e-02, -4.786e-02, 4.486e-02, 5.109e-02, 2.087e-02, -1.878e-01, -4.448e-02, -8.064e-03, 8.815e-02, -3.612e-01, 2.870e-02, 1.136e-01, -5.213e-02, -3.119e-03, -1.653e-01)); + r += mul(s1_6, M4(-1.411e-01, -3.511e-02, -8.128e-02, 2.965e-03, -5.781e-02, -1.920e-02, 2.130e-02, -9.284e-02, 1.356e-01, -1.645e-02, -6.429e-02, -2.718e-02, 2.786e-02, 9.168e-02, -1.069e-01, 1.189e-02)); + r += mul(s1_7, M4(-1.228e-01, 3.885e-02, -9.280e-02, 2.120e-02, -6.863e-02, 5.108e-02, 6.707e-03, 1.515e-03, 6.154e-02, -5.704e-02, 1.902e-01, -5.617e-02, -6.916e-03, 4.455e-03, -1.171e-01, 8.232e-02)); + r += mul(s1_8, M4(6.637e-02, 3.427e-02, -2.434e-03, -5.802e-03, -1.092e-01, 5.008e-02, -8.846e-02, -9.690e-02, -4.769e-02, 1.564e-01, -1.072e-02, 6.166e-02, 1.037e-03, 1.656e-01, -6.777e-02, 1.529e-01)); + r += mul(s2_0, M4(-8.657e-02, -1.683e-01, -8.438e-02, -1.262e-02, 4.213e-02, 4.478e-02, -2.364e-02, 4.354e-02, -1.705e-02, 1.555e-02, 1.303e-02, 3.047e-02, 1.144e-02, 1.014e-02, 7.722e-02, 2.513e-02)); + r += mul(s2_1, M4(-6.315e-03, 1.265e-01, -1.380e-01, 7.690e-03, 3.008e-03, -1.853e-02, 1.512e-02, 1.030e-01, -1.156e-01, -2.136e-02, 4.247e-02, 2.354e-02, -1.225e-01, 4.783e-02, -3.002e-02, -8.387e-02)); + r += mul(s2_2, M4(9.205e-03, -1.489e-01, 1.314e-01, 7.084e-02, -3.115e-02, -3.416e-02, -6.130e-02, -3.573e-03, -4.312e-02, -5.232e-03, -7.607e-02, -3.608e-02, -5.017e-02, 7.033e-02, -6.234e-02, 5.745e-02)); + r += mul(s2_3, M4(-2.089e-02, -5.410e-02, -8.417e-02, 1.406e-01, -2.626e-02, 7.849e-02, 5.396e-02, -2.270e-02, 5.093e-02, -6.051e-02, 7.279e-02, -6.783e-02, -5.295e-02, -9.793e-02, -9.941e-02, -2.618e-02)); + r += mul(s2_4, M4(4.010e-02, 9.791e-02, 1.034e-01, 4.750e-02, 2.232e-02, 1.779e-01, 8.533e-02, 4.675e-02, 3.716e-02, -1.273e-02, -6.090e-02, 2.041e-02, 6.300e-02, 1.023e-01, -5.345e-02, -2.280e-02)); + r += mul(s2_5, M4(1.838e-01, -1.282e-02, -1.857e-01, 4.965e-02, 4.660e-02, -8.981e-02, 4.691e-02, 7.740e-03, 4.574e-02, -6.827e-02, 6.828e-03, -5.449e-02, -4.020e-02, -6.164e-02, 3.373e-01, 2.602e-03)); + r += mul(s2_6, M4(1.133e-02, -7.793e-02, 8.614e-02, 2.504e-02, 1.168e-02, -5.753e-02, 9.968e-02, -1.020e-01, -8.425e-02, -1.568e-02, 1.029e-01, -2.861e-02, 1.748e-02, 1.094e-01, 1.656e-01, -1.022e-02)); + r += mul(s2_7, M4(-7.059e-02, -2.138e-02, -8.280e-02, -5.897e-02, 5.956e-02, -9.888e-02, -2.802e-02, -1.706e-02, -1.351e-01, 1.230e-01, 7.644e-03, -1.443e-02, 1.220e-01, -2.055e-01, -1.977e-02, 2.979e-02)); + r += mul(s2_8, M4(5.847e-02, -1.408e-01, 1.266e-01, -6.190e-03, 1.284e-01, -1.300e-03, 8.179e-02, -9.440e-02, -7.300e-03, -9.068e-02, 8.197e-02, -1.620e-01, 2.682e-01, -8.393e-02, -1.648e-01, -7.976e-02)); + r += mul(s3_0, M4(-4.442e-02, -1.199e-02, 2.013e-02, -3.906e-02, 1.005e-01, -3.356e-02, -2.241e-01, -2.535e-02, -1.345e-02, -1.797e-02, -4.521e-02, 8.524e-02, 5.123e-02, 2.131e-02, -4.011e-02, 5.759e-02)); + r += mul(s3_1, M4(-3.284e-02, 8.050e-02, -2.653e-02, 3.487e-03, 3.558e-02, -6.469e-02, -6.312e-02, 1.345e-01, -1.509e-01, 9.212e-02, 5.070e-02, -1.091e-01, 4.122e-02, -1.963e-02, -7.931e-02, 4.906e-03)); + r += mul(s3_2, M4(-1.307e-02, 3.662e-02, -3.480e-02, -5.718e-02, -4.032e-02, 2.256e-02, 2.065e-01, 6.587e-02, 4.812e-04, 9.005e-02, -1.605e-01, -1.074e-01, 1.466e-01, -2.251e-02, 2.198e-02, -2.744e-02)); + r += mul(s3_3, M4(2.438e-02, -2.521e-02, -2.428e-02, 3.919e-02, 2.668e-01, 9.647e-02, 2.685e-01, -7.129e-02, -1.831e-01, -8.313e-02, 4.906e-02, 2.703e-02, -1.305e-01, 1.368e-02, -5.768e-02, 2.027e-03)); + r += mul(s3_4, M4(5.094e-02, 1.714e-02, 1.254e-01, -5.990e-02, -8.004e-02, 1.239e-01, -1.881e-01, -2.966e-02, -7.121e-02, -1.130e-01, -6.533e-02, 4.745e-02, -1.690e-01, -5.414e-02, -3.060e-02, -2.106e-02)); + r += mul(s3_5, M4(-8.028e-02, 8.060e-02, 5.008e-02, 8.212e-02, 3.428e-02, 3.296e-02, 9.783e-02, -1.039e-01, 8.464e-02, 1.349e-01, -1.113e-01, 5.679e-02, -3.802e-03, 8.044e-03, 4.043e-03, 8.317e-02)); + r += mul(s3_6, M4(3.431e-02, 3.496e-02, 7.038e-02, -3.111e-02, 4.533e-02, -7.965e-02, -9.517e-02, 3.298e-02, -1.059e-01, 2.824e-02, 1.665e-02, -1.818e-02, -6.589e-02, 1.536e-01, -4.950e-02, -4.898e-02)); + r += mul(s3_7, M4(4.211e-02, -4.371e-02, -3.204e-03, -2.210e-02, -1.089e-01, -2.018e-01, 1.237e-01, -5.726e-02, 2.136e-01, -8.615e-02, -5.962e-02, -6.428e-02, -2.377e-04, -6.213e-02, -1.395e-01, -6.767e-02)); + r += mul(s3_8, M4(3.999e-02, -7.282e-03, 2.643e-02, -6.319e-02, 4.068e-02, 1.332e-01, 1.258e-01, -3.770e-02, 7.424e-03, 2.775e-02, 1.428e-01, -1.009e-01, 1.093e-01, 4.892e-02, -4.967e-02, -3.466e-02)); + r += mul(s4_0, M4(8.501e-02, 3.383e-02, -5.774e-02, 5.613e-02, -1.644e-01, 6.198e-02, 1.756e-01, 2.447e-02, -3.727e-02, -1.792e-02, -5.936e-03, -1.228e-01, 4.292e-03, -7.566e-02, 1.219e-01, 8.670e-02)); + r += mul(s4_1, M4(-5.112e-02, -2.118e-02, -4.575e-02, -3.103e-02, -4.089e-02, 2.808e-02, -6.258e-02, -1.398e-03, -1.654e-01, 5.027e-02, 9.681e-03, 3.003e-02, 2.430e-01, -6.095e-02, -1.100e-01, 8.279e-02)); + r += mul(s4_2, M4(-1.179e-02, -1.193e-02, -2.892e-02, -6.276e-02, -4.313e-02, 1.351e-01, -7.226e-03, -9.222e-02, 1.969e-01, -7.974e-02, 7.191e-02, -3.026e-02, 2.339e-01, 2.023e-02, 5.213e-02, -2.488e-02)); + r += mul(s4_3, M4(1.140e-02, -8.909e-03, -7.873e-02, -1.323e-02, -3.844e-02, 5.506e-02, -3.270e-01, 8.929e-03, -2.479e-02, -5.558e-02, -1.106e-01, -7.180e-02, 9.166e-02, 6.054e-02, -1.063e-01, -6.591e-02)); + r += mul(s4_4, M4(5.246e-02, -2.151e-01, -2.901e-02, 2.235e-02, 1.954e-01, 8.036e-02, -4.048e-02, 3.235e-02, 8.113e-02, 1.880e-01, 1.339e-01, -4.269e-02, 6.020e-02, 1.229e-01, -7.152e-02, -3.236e-02)); + r += mul(s4_5, M4(-8.713e-02, 2.404e-02, -2.114e-02, -2.255e-02, 2.670e-03, -5.205e-02, -1.573e-01, 1.249e-01, 1.333e-01, -6.351e-04, -2.112e-02, -1.401e-01, 5.596e-02, -8.153e-02, 1.756e-02, -1.811e-01)); + r += mul(s4_6, M4(3.555e-02, -2.808e-03, 3.335e-02, 1.557e-02, -9.802e-02, -2.207e-02, 7.928e-02, 1.615e-02, 1.877e-01, 7.622e-02, 3.180e-01, -1.062e-01, 1.027e-01, -3.208e-02, 3.778e-02, -7.205e-02)); + r += mul(s4_7, M4(-2.431e-02, 5.161e-03, -1.301e-01, 3.674e-02, -6.399e-02, 1.295e-02, 1.965e-02, -5.844e-02, 2.093e-01, -1.116e-01, -2.932e-01, -1.137e-01, 1.008e-01, -9.623e-02, -1.487e-01, -2.974e-02)); + r += mul(s4_8, M4(2.896e-02, -4.900e-02, -7.973e-02, -3.879e-02, -6.687e-02, -3.486e-02, 6.078e-03, -6.165e-02, 4.432e-02, 2.105e-01, -3.078e-01, 3.181e-01, 7.459e-02, -6.405e-02, 2.317e-02, -1.176e-01)); + r += mul(s5_0, M4(-6.517e-02, -8.222e-02, 2.485e-03, 7.133e-02, -9.379e-02, -6.617e-02, -4.647e-02, 8.790e-03, -1.087e-02, 1.703e-02, 1.943e-02, -2.242e-02, -4.682e-03, 1.004e-02, -3.044e-03, -5.008e-02)); + r += mul(s5_1, M4(-1.715e-01, 8.083e-02, -2.528e-01, -2.576e-01, -5.399e-02, 4.051e-02, 1.056e-02, 4.317e-02, -1.016e-01, -3.923e-02, -6.484e-02, 4.782e-02, -6.041e-02, 3.697e-02, 2.728e-02, -2.102e-02)); + r += mul(s5_2, M4(-1.143e-01, -4.641e-02, 1.607e-01, -2.974e-02, 5.074e-02, 2.419e-02, 1.151e-01, 2.244e-03, 2.447e-03, -7.315e-03, -2.897e-02, 1.462e-02, -6.107e-02, 9.528e-02, 1.742e-02, -1.224e-01)); + r += mul(s5_3, M4(3.139e-02, -1.042e-01, 3.502e-01, 1.316e-01, 4.263e-06, 7.788e-02, -1.001e-01, 1.368e-01, -1.714e-02, 1.022e-01, -7.639e-02, 9.745e-02, -1.262e-01, 1.065e-01, 1.174e-02, 1.209e-01)); + r += mul(s5_4, M4(-2.895e-03, 2.755e-02, 2.364e-01, -5.040e-02, 7.315e-03, 8.171e-02, 6.080e-02, 9.226e-02, -4.391e-02, -1.097e-01, 9.140e-02, 1.581e-01, -4.942e-02, 1.714e-01, -7.025e-02, 7.177e-02)); + r += mul(s5_5, M4(-1.390e-01, -5.209e-02, 1.909e-01, 5.650e-02, -3.279e-02, -3.216e-02, 1.367e-02, 1.828e-02, -1.384e-02, 5.998e-02, -7.345e-03, 8.610e-02, -7.273e-02, 1.086e-01, -6.571e-02, -1.284e-01)); + r += mul(s5_6, M4(5.034e-02, -1.828e-01, 1.473e-01, -1.247e-01, 1.692e-02, -1.216e-01, -4.455e-02, -8.595e-03, 1.730e-02, 6.486e-02, -6.406e-02, 2.810e-02, -6.608e-02, -9.698e-02, 7.211e-02, -1.030e-02)); + r += mul(s5_7, M4(-9.417e-02, 5.507e-02, 1.827e-02, -9.012e-02, -6.143e-02, 1.451e-02, 5.554e-02, 1.589e-01, -9.232e-02, -2.268e-01, -5.109e-02, -2.465e-02, -9.571e-03, 1.592e-02, 7.154e-02, 3.195e-02)); + r += mul(s5_8, M4(1.122e-01, 2.526e-02, 1.345e-01, -1.378e-02, -3.588e-02, 3.086e-02, -1.854e-02, -2.739e-03, -6.601e-03, -2.751e-02, 6.659e-02, -4.153e-02, -4.977e-02, 5.489e-02, 1.320e-01, -2.698e-02)); + r += mul(s6_0, M4(2.353e-02, -1.135e-02, 4.539e-02, -3.690e-02, 1.588e-02, 1.124e-01, -6.236e-03, -5.471e-02, 3.487e-02, 9.397e-02, 1.113e-01, -5.103e-02, 9.854e-03, -1.466e-02, -4.279e-02, -4.463e-02)); + r += mul(s6_1, M4(4.381e-02, 2.766e-02, 5.584e-02, -9.172e-02, 1.119e-02, -4.151e-02, -1.586e-01, 3.715e-02, -1.467e-01, -3.013e-02, -2.240e-03, 4.990e-02, -1.190e-01, 1.239e-02, 4.518e-02, -6.321e-02)); + r += mul(s6_2, M4(-9.381e-02, -6.567e-02, -4.507e-02, 2.242e-02, 5.115e-02, 2.880e-02, 6.675e-02, -1.370e-02, -1.846e-01, -3.458e-02, 1.782e-01, -5.527e-02, 3.700e-02, 3.149e-02, -1.797e-02, 2.019e-03)); + r += mul(s6_3, M4(-8.134e-02, 3.108e-02, -7.165e-02, 6.033e-02, 4.819e-02, 4.188e-03, -3.899e-02, 4.798e-02, -5.876e-02, -7.930e-02, 6.983e-03, -4.751e-02, 6.847e-02, 2.736e-02, -9.448e-02, -1.418e-01)); + r += mul(s6_4, M4(-1.572e-01, 9.606e-02, -7.588e-02, -8.889e-02, -1.969e-02, 3.222e-02, -1.435e-01, -4.279e-02, 1.019e-01, -4.039e-02, -2.811e-02, 8.374e-02, 3.470e-02, -8.823e-02, 1.589e-02, 8.212e-02)); + r += mul(s6_5, M4(-1.517e-01, 2.006e-03, 1.170e-01, 1.126e-02, -9.371e-02, 2.775e-03, 9.179e-02, -7.633e-03, -9.387e-02, -1.895e-02, -3.722e-03, -6.392e-02, 6.610e-02, -3.510e-02, 1.864e-02, -8.583e-02)); + r += mul(s6_6, M4(1.429e-02, -1.857e-01, -9.834e-02, 4.205e-02, -9.407e-02, 2.892e-02, 1.544e-02, 9.583e-02, 4.782e-02, 9.223e-02, 6.596e-02, -5.788e-02, -8.740e-02, -3.463e-02, 1.284e-01, -5.233e-03)); + r += mul(s6_7, M4(3.617e-02, -2.159e-01, -1.628e-01, -1.198e-01, -4.794e-03, 2.093e-02, 1.201e-02, -8.133e-02, -2.638e-02, 5.355e-02, -1.989e-01, 1.090e-01, 9.783e-02, 1.182e-01, 1.286e-01, -1.415e-02)); + r += mul(s6_8, M4(4.042e-02, 3.907e-02, -5.044e-02, 1.136e-01, 1.351e-02, -8.938e-02, 7.441e-02, 1.342e-01, 1.967e-01, -5.279e-02, 7.863e-04, -9.997e-02, -2.084e-03, 9.957e-02, 4.909e-02, -7.433e-02)); + r += mul(s7_0, M4(-4.084e-02, -3.212e-02, -1.794e-01, -1.152e-01, -2.028e-02, 2.823e-02, 1.099e-01, -1.089e-02, -8.768e-02, 4.997e-02, -6.084e-02, -8.466e-02, 1.776e-01, 1.520e-01, -2.691e-02, 8.493e-02)); + r += mul(s7_1, M4(1.827e-01, 7.171e-02, 1.889e-01, -3.011e-02, 7.199e-02, -3.183e-02, 2.527e-02, -2.381e-03, -1.245e-01, -3.135e-03, 1.139e-01, -1.420e-02, -1.221e-01, -7.495e-02, -1.821e-01, -1.277e-02)); + r += mul(s7_2, M4(6.847e-02, -1.250e-01, -2.697e-02, 2.539e-02, 5.613e-02, -3.526e-02, -1.045e-01, 1.635e-02, -6.816e-02, -6.310e-02, -2.924e-02, 1.206e-02, -1.618e-02, 2.308e-01, 1.952e-02, -9.057e-02)); + r += mul(s7_3, M4(2.349e-05, -3.286e-02, -1.420e-01, -4.500e-02, 3.800e-02, 3.560e-02, 4.923e-02, -4.756e-02, 1.107e-03, -1.182e-01, -1.359e-01, 2.942e-02, -3.852e-02, 1.207e-01, -1.016e-01, -1.363e-02)); + r += mul(s7_4, M4(-1.065e-01, -3.812e-03, 2.468e-02, -5.968e-02, 8.090e-02, 9.870e-02, -1.239e-01, -1.764e-01, -4.936e-02, -4.465e-02, 1.318e-01, -1.242e-02, 1.837e-02, -9.250e-02, -6.623e-02, 3.246e-01)); + r += mul(s7_5, M4(-2.284e-02, -6.922e-02, 9.377e-02, 1.242e-01, -5.890e-02, 4.002e-02, -1.991e-02, -1.977e-02, -4.955e-02, 1.347e-02, 5.959e-02, -7.623e-03, -1.573e-02, 1.391e-01, 1.271e-01, -6.322e-02)); + r += mul(s7_6, M4(-5.371e-03, 2.781e-04, -2.736e-02, -6.368e-02, -7.276e-02, -5.966e-02, 4.382e-02, -1.299e-03, 5.660e-02, 7.911e-02, -4.893e-02, 1.085e-02, 1.875e-03, -5.381e-02, -1.343e-01, -8.671e-03)); + r += mul(s7_7, M4(-8.190e-02, -3.811e-02, 3.224e-02, -8.747e-02, -9.664e-02, -8.016e-02, -4.470e-02, -4.944e-02, 3.981e-03, 9.605e-02, -3.912e-02, -2.054e-02, -2.541e-02, -7.516e-02, -1.922e-01, -8.374e-02)); + r += mul(s7_8, M4(-2.115e-01, 1.455e-01, 7.647e-02, 1.253e-01, -5.201e-02, 2.892e-02, -1.013e-01, 1.007e-01, 1.441e-01, 6.061e-02, -9.842e-03, -1.271e-01, -3.331e-02, -3.965e-02, 8.546e-02, -1.729e-01)); + r += V4(-6.477e-03, 3.775e-02, -2.850e-02, -2.859e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-8.396e-02, 6.257e-02, 2.046e-01, 4.715e-02, 8.119e-02, 3.792e-02, -5.588e-02, -4.133e-02, -7.613e-02, 6.871e-02, 3.404e-03, -3.979e-02, -3.971e-02, 5.339e-02, 5.938e-02, -6.581e-03)); + r += mul(s0_1, M4(-8.824e-02, 2.829e-02, -1.519e-01, 1.856e-03, 3.305e-02, -6.249e-02, 6.808e-03, -5.331e-02, 1.412e-01, -3.578e-02, -3.772e-02, 5.877e-02, 3.664e-02, -1.509e-01, 6.215e-03, -1.218e-01)); + r += mul(s0_2, M4(-1.489e-02, -9.221e-02, -5.628e-02, -7.312e-02, -4.338e-02, 4.579e-02, 1.131e-01, -2.055e-02, -3.589e-02, -3.988e-02, 1.304e-02, -1.342e-01, 3.737e-02, 2.073e-01, 1.726e-02, 3.786e-02)); + r += mul(s0_3, M4(-2.719e-02, -3.733e-02, -3.726e-02, 1.658e-02, 1.124e-01, 1.442e-02, 2.669e-02, -8.319e-02, -1.084e-02, 1.207e-02, -1.302e-01, 2.021e-03, -7.741e-02, -1.868e-02, -1.055e-01, -2.045e-02)); + r += mul(s0_4, M4(4.953e-02, -1.142e-01, -1.336e-01, 5.330e-02, 6.012e-02, 7.107e-02, 1.274e-02, -1.497e-01, 1.689e-01, -1.159e-01, -1.850e-01, -9.835e-03, 6.214e-02, 2.969e-02, 2.910e-02, 1.322e-01)); + r += mul(s0_5, M4(2.856e-02, 4.993e-02, -1.014e-01, -4.757e-02, 1.783e-02, 2.150e-02, 6.456e-02, -8.850e-03, 4.848e-02, 1.232e-01, -1.940e-02, 2.387e-02, 2.107e-02, 7.759e-02, 1.312e-02, -8.911e-02)); + r += mul(s0_6, M4(1.169e-01, -3.837e-03, -1.560e-01, -6.673e-02, 2.033e-02, -7.245e-02, -2.519e-02, -6.096e-02, -7.451e-02, -4.305e-02, -1.036e-01, 2.808e-02, 1.520e-02, -1.628e-02, -3.010e-02, 2.474e-02)); + r += mul(s0_7, M4(-5.442e-03, -1.942e-02, -1.479e-02, 2.604e-02, 3.023e-02, 1.808e-02, 4.114e-02, 6.142e-02, -3.394e-02, 3.443e-02, 4.459e-03, -4.054e-02, -5.407e-02, -7.083e-02, 3.597e-02, 2.541e-02)); + r += mul(s0_8, M4(1.896e-01, -6.667e-02, -2.874e-02, -1.202e-01, -2.225e-02, -1.835e-02, 4.985e-02, -1.007e-03, -4.515e-02, -1.612e-02, -6.822e-02, 2.557e-02, -4.599e-03, 2.108e-02, 1.646e-02, 1.822e-01)); + r += mul(s1_0, M4(8.316e-02, 4.972e-02, 5.940e-02, -3.601e-02, -2.570e-02, -3.406e-02, 1.026e-01, 1.224e-01, 3.587e-02, 6.016e-03, 1.227e-01, -9.876e-02, 1.664e-02, 9.671e-02, -1.050e-03, 8.077e-02)); + r += mul(s1_1, M4(1.094e-02, 6.376e-02, 1.504e-01, 9.308e-02, 1.487e-01, 1.374e-01, -7.933e-02, 1.163e-01, 6.827e-02, -1.194e-01, 1.051e-01, 6.315e-03, 1.051e-01, -1.439e-01, 3.877e-02, -1.417e-01)); + r += mul(s1_2, M4(-8.041e-03, 1.070e-01, -3.588e-02, 7.473e-02, 5.798e-02, -3.620e-02, -1.266e-01, -2.034e-02, -6.879e-02, -1.322e-01, 8.333e-02, -1.265e-01, 7.251e-02, -6.806e-02, -2.084e-01, 3.071e-02)); + r += mul(s1_3, M4(-1.856e-02, 3.163e-02, 7.100e-02, -9.220e-02, -1.164e-01, -1.826e-01, 1.023e-01, 9.672e-02, 1.518e-01, -6.912e-02, 2.306e-01, 1.432e-01, -9.888e-02, -5.789e-02, -6.058e-02, -1.396e-01)); + r += mul(s1_4, M4(-5.385e-02, -1.271e-01, 8.932e-02, 5.435e-02, 2.220e-03, 1.040e-01, -1.587e-01, -4.732e-02, -1.000e-01, 7.554e-02, 5.049e-02, 4.207e-02, -1.358e-01, 1.794e-02, 8.754e-02, 6.202e-02)); + r += mul(s1_5, M4(4.379e-03, 1.836e-02, 7.793e-02, -1.987e-02, -6.199e-02, 9.397e-02, -7.464e-02, 5.137e-02, 5.078e-02, -3.277e-02, -3.652e-02, 1.251e-01, -7.142e-02, -2.838e-02, 1.065e-01, 9.548e-02)); + r += mul(s1_6, M4(2.106e-02, 1.698e-02, 1.823e-01, -1.049e-02, -1.023e-01, 7.420e-02, 6.849e-02, -2.680e-02, 1.588e-02, -2.739e-01, 6.962e-02, -6.241e-02, 2.071e-02, -2.705e-02, 1.457e-01, -1.072e-01)); + r += mul(s1_7, M4(-5.482e-02, 9.531e-02, 2.216e-02, 1.134e-01, 6.476e-02, 1.798e-01, -5.868e-02, -5.474e-02, 1.957e-02, 8.487e-03, -1.855e-02, -5.016e-02, -9.990e-02, -4.489e-03, -4.724e-02, -1.959e-02)); + r += mul(s1_8, M4(-5.301e-02, -3.228e-02, -3.431e-02, -1.756e-02, -5.888e-02, -1.213e-01, -7.543e-02, -1.655e-02, -8.627e-02, -8.473e-02, -1.860e-01, -9.430e-02, -9.301e-02, -1.847e-02, -1.846e-01, -2.675e-02)); + r += mul(s2_0, M4(-1.116e-03, 1.984e-01, 8.365e-02, -1.080e-01, 3.379e-02, 1.197e-01, -6.953e-02, -1.108e-01, -8.161e-02, -1.120e-01, -1.096e-01, 3.466e-02, -8.288e-02, -2.875e-02, 5.789e-02, -1.645e-01)); + r += mul(s2_1, M4(-4.304e-02, -3.669e-01, -1.122e-03, 3.619e-02, 1.861e-02, -1.261e-02, 1.334e-02, 4.227e-02, -2.607e-02, -1.268e-01, -4.250e-02, 9.329e-02, 2.142e-02, 2.399e-01, -3.785e-02, -2.165e-01)); + r += mul(s2_2, M4(1.717e-01, 4.601e-02, 7.285e-02, 1.641e-01, 3.082e-02, 4.744e-02, 1.024e-01, -1.805e-02, 1.980e-02, 7.636e-02, 1.307e-01, 3.199e-03, 1.031e-01, 6.953e-02, 5.313e-02, 8.578e-02)); + r += mul(s2_3, M4(-3.959e-02, 9.776e-02, -3.648e-02, -6.317e-02, -1.881e-02, -1.470e-01, -7.160e-02, -2.812e-02, -9.151e-02, -2.812e-02, 3.204e-02, -1.560e-01, -2.561e-01, 2.194e-02, -1.352e-01, 5.714e-02)); + r += mul(s2_4, M4(1.179e-01, -9.014e-02, -1.723e-01, -3.048e-02, 2.041e-01, 1.623e-01, -5.811e-02, 2.392e-02, 2.907e-02, 7.798e-02, -1.402e-01, 5.638e-02, -1.392e-01, -6.102e-02, -1.454e-01, -8.343e-02)); + r += mul(s2_5, M4(-6.665e-02, -1.900e-01, 3.355e-02, -3.858e-02, 1.273e-01, 5.205e-02, 6.613e-02, -1.686e-01, 5.957e-02, 2.084e-01, 4.997e-02, -1.078e-01, -1.243e-01, 1.989e-01, 3.212e-02, -7.405e-02)); + r += mul(s2_6, M4(2.811e-02, 2.827e-02, -8.699e-02, 5.391e-03, -1.677e-02, 5.194e-02, -1.205e-01, -4.094e-02, 2.947e-02, -1.030e-01, -8.520e-02, -4.418e-03, -8.797e-02, -7.821e-02, -1.151e-01, -6.517e-02)); + r += mul(s2_7, M4(-1.696e-01, -1.334e-01, 3.738e-02, -2.598e-02, -5.932e-02, 1.256e-02, -3.674e-02, 4.801e-02, -2.234e-02, -5.203e-02, -2.763e-02, 4.107e-02, -2.292e-01, 1.440e-02, -7.571e-02, -8.242e-02)); + r += mul(s2_8, M4(4.696e-02, 1.477e-01, 8.007e-02, -9.830e-02, 3.139e-02, -1.273e-01, 7.095e-03, -6.454e-02, -5.214e-02, 2.861e-02, 1.585e-01, 1.357e-02, 1.475e-01, -3.230e-02, -4.577e-02, -1.059e-01)); + r += mul(s3_0, M4(-2.252e-02, 1.338e-02, -1.358e-01, -6.354e-02, 4.917e-02, 4.904e-02, -8.928e-02, -1.837e-01, 1.973e-02, 4.783e-03, 1.707e-01, -3.115e-02, 7.732e-02, -5.697e-03, 4.607e-02, 5.169e-02)); + r += mul(s3_1, M4(1.296e-02, -1.086e-01, -5.551e-02, -5.133e-02, 1.030e-02, -1.204e-01, 1.266e-01, -3.791e-03, -8.365e-02, 1.510e-01, 5.554e-02, -4.317e-02, 2.934e-02, -4.857e-02, -9.000e-02, -8.174e-02)); + r += mul(s3_2, M4(2.791e-02, -5.920e-02, 8.420e-03, 2.844e-02, -1.942e-02, -1.035e-01, 5.318e-02, 1.820e-02, 5.579e-02, 2.278e-02, 1.166e-01, 2.042e-01, 5.956e-02, 1.558e-03, 1.131e-01, 7.742e-02)); + r += mul(s3_3, M4(8.458e-02, -6.920e-02, 5.021e-02, -4.591e-02, 1.219e-01, 1.240e-01, 4.117e-02, -1.473e-02, 2.851e-02, -9.032e-02, 1.518e-01, 1.034e-01, -7.163e-02, -1.222e-01, 2.318e-02, 9.159e-02)); + r += mul(s3_4, M4(1.075e-01, 4.295e-02, 1.551e-02, -1.004e-02, 6.269e-02, 1.251e-01, -1.786e-01, 2.617e-01, 5.514e-02, 1.150e-01, 3.540e-02, -4.715e-02, -2.372e-02, -2.104e-02, -3.770e-02, -3.209e-02)); + r += mul(s3_5, M4(-2.779e-02, -1.541e-02, 3.910e-02, 6.499e-02, 1.786e-01, 2.785e-02, 1.080e-01, 6.422e-02, 8.128e-02, -5.753e-04, 6.851e-02, -9.486e-03, 5.335e-02, -2.182e-02, 7.441e-02, -1.735e-01)); + r += mul(s3_6, M4(-6.663e-02, 1.650e-02, -7.590e-04, 5.202e-02, 1.628e-03, -4.763e-02, -9.231e-03, -6.298e-02, -1.443e-01, -1.467e-01, -1.160e-02, -3.925e-02, -5.014e-02, -3.762e-02, 3.894e-02, 7.811e-02)); + r += mul(s3_7, M4(-1.213e-01, 9.338e-02, 7.873e-02, 1.343e-01, 1.435e-01, 2.037e-01, -2.489e-01, 1.518e-02, 9.782e-02, 6.091e-02, -7.512e-02, -9.813e-02, -1.407e-01, -3.279e-02, 2.124e-02, 8.105e-02)); + r += mul(s3_8, M4(-9.767e-02, 1.018e-01, 7.291e-02, 6.664e-02, 1.303e-01, 9.835e-02, -3.387e-02, -1.254e-01, -7.274e-02, -6.490e-02, -8.099e-02, 8.225e-02, 1.014e-01, -9.791e-02, 1.910e-02, -1.359e-02)); + r += mul(s4_0, M4(6.237e-02, -2.098e-02, -3.524e-02, -1.842e-02, -6.025e-03, -1.339e-02, -1.479e-01, -1.391e-01, -1.004e-01, 7.957e-02, 1.256e-01, 2.416e-02, 4.468e-02, -5.897e-02, 4.880e-02, 1.064e-01)); + r += mul(s4_1, M4(1.518e-01, 2.555e-02, -2.027e-02, 4.877e-02, -1.120e-01, -7.960e-02, -1.309e-01, -1.421e-02, -1.558e-01, -1.644e-01, -5.664e-02, -3.298e-02, -8.471e-02, -1.006e-01, 1.330e-01, -1.327e-01)); + r += mul(s4_2, M4(-4.238e-02, -1.514e-02, 2.417e-02, 3.420e-03, 4.216e-02, 9.089e-02, 8.527e-02, 8.454e-03, -7.779e-02, -7.727e-03, 2.072e-01, -1.901e-01, -1.010e-02, 3.595e-02, -1.229e-01, 4.745e-02)); + r += mul(s4_3, M4(-6.419e-02, -5.132e-02, -6.213e-02, 5.459e-02, -6.912e-02, 2.448e-02, 8.193e-02, -3.265e-02, -1.697e-01, 2.783e-01, -1.581e-01, -2.180e-01, -7.239e-03, 7.356e-02, 1.891e-01, -7.637e-02)); + r += mul(s4_4, M4(-1.999e-02, -1.105e-01, 6.978e-02, -4.610e-02, -6.091e-02, 5.815e-02, -1.244e-01, -8.634e-02, -2.659e-01, -2.601e-01, 3.830e-01, 2.879e-02, -1.113e-01, 1.182e-01, 1.204e-01, 5.876e-02)); + r += mul(s4_5, M4(-1.752e-02, 9.014e-02, 1.071e-01, -1.964e-02, 1.287e-02, -1.100e-01, -1.074e-01, -5.476e-02, 2.557e-02, -1.321e-01, 5.911e-02, -6.585e-02, 8.432e-02, -7.117e-02, 3.125e-02, -3.480e-02)); + r += mul(s4_6, M4(4.746e-03, -7.302e-02, -7.874e-02, -6.524e-02, -1.089e-01, 5.395e-02, -1.219e-01, 8.346e-02, -6.767e-02, 2.019e-01, 5.432e-02, 9.575e-02, -5.070e-04, -5.142e-02, 1.177e-01, 1.051e-01)); + r += mul(s4_7, M4(-5.428e-02, 5.602e-02, 2.857e-02, 1.466e-01, -2.225e-02, -3.678e-02, -7.365e-02, -4.064e-02, -2.390e-02, 8.908e-02, -1.751e-01, -3.483e-01, 2.630e-03, -2.349e-01, 1.084e-01, 2.868e-02)); + r += mul(s4_8, M4(-1.447e-02, 1.434e-01, 6.197e-02, 8.041e-02, 1.320e-01, -2.235e-01, -7.857e-02, 1.787e-02, -2.865e-02, 3.762e-01, -1.852e-01, 4.053e-01, 7.515e-03, -3.925e-02, 8.597e-02, 3.091e-02)); + r += mul(s5_0, M4(-2.934e-02, 2.034e-01, -7.735e-02, -3.193e-02, -1.826e-02, 8.642e-03, 2.075e-02, 3.363e-02, 4.635e-02, 7.288e-02, -1.349e-01, 4.498e-02, -5.291e-02, 3.082e-02, -9.191e-03, 1.239e-02)); + r += mul(s5_1, M4(4.098e-02, 2.291e-01, -4.124e-02, 2.460e-02, -5.447e-02, -1.430e-01, -2.160e-02, 3.745e-02, 1.351e-02, -3.519e-02, -1.025e-02, -1.228e-02, 1.061e-01, -2.792e-02, -1.762e-01, -1.218e-02)); + r += mul(s5_2, M4(-1.660e-01, -1.650e-01, -6.943e-02, 6.321e-03, 1.122e-03, -6.026e-03, -1.170e-02, -3.466e-02, 2.570e-02, 1.026e-01, -1.145e-02, -8.252e-02, 1.478e-01, 1.972e-02, -3.516e-02, 6.122e-02)); + r += mul(s5_3, M4(-1.429e-01, -1.929e-01, 7.832e-02, 2.394e-02, -5.004e-02, 7.451e-02, -1.243e-01, -1.176e-01, 6.872e-02, -1.054e-01, -4.966e-02, -1.551e-01, 3.259e-02, 1.193e-01, -7.004e-02, -7.566e-02)); + r += mul(s5_4, M4(1.310e-02, 2.361e-01, 6.625e-02, -1.173e-01, -8.591e-02, 5.668e-02, -9.263e-02, 1.032e-02, -6.205e-02, -2.309e-01, 6.959e-02, 5.009e-02, 1.304e-02, 1.370e-01, 1.630e-02, -1.975e-02)); + r += mul(s5_5, M4(-4.677e-02, -1.617e-01, -1.872e-01, -1.676e-02, -2.004e-02, 4.053e-02, -8.716e-02, 1.984e-02, -7.915e-02, 1.496e-01, 9.948e-02, 4.470e-02, -7.834e-02, 1.324e-01, -5.536e-03, -9.670e-02)); + r += mul(s5_6, M4(-1.224e-01, 4.783e-02, 8.669e-02, -2.685e-01, 1.836e-03, 1.538e-02, 7.833e-03, -3.458e-02, 1.378e-02, 4.714e-02, 2.101e-02, -6.275e-02, 8.374e-02, -2.789e-02, -1.473e-01, -7.346e-02)); + r += mul(s5_7, M4(1.537e-01, 7.065e-03, 2.026e-01, 6.581e-02, -2.118e-02, -1.650e-01, -5.360e-02, -7.651e-02, 1.194e-01, 6.108e-02, 6.343e-02, -2.503e-02, 3.796e-02, 8.866e-03, 1.308e-02, 1.915e-02)); + r += mul(s5_8, M4(-6.067e-02, -1.150e-01, -5.869e-02, -1.623e-01, 7.534e-02, 1.269e-01, -1.742e-02, 1.379e-01, 1.134e-01, 4.200e-05, 1.298e-01, -1.327e-01, -2.172e-02, -6.698e-02, -8.754e-02, -1.267e-01)); + r += mul(s6_0, M4(-4.926e-02, -1.760e-02, 3.980e-02, 2.508e-02, -3.775e-02, 2.076e-02, -1.081e-01, -1.151e-02, 1.168e-02, -1.424e-01, -1.228e-01, 2.343e-02, -8.698e-02, 6.311e-02, 6.102e-03, 2.650e-02)); + r += mul(s6_1, M4(-5.825e-02, -4.032e-02, 1.147e-01, 2.680e-02, 1.414e-01, 5.679e-02, -9.813e-02, -3.283e-02, -1.126e-01, -7.862e-02, 1.747e-01, 1.466e-03, 1.422e-02, -6.262e-02, -1.687e-02, -2.384e-02)); + r += mul(s6_2, M4(4.493e-03, 1.772e-02, -7.808e-02, -9.538e-02, 1.030e-02, 7.319e-02, -1.175e-01, -1.446e-02, -4.204e-02, 1.384e-01, 5.252e-02, -2.037e-02, -1.241e-01, -1.529e-03, -4.452e-04, -3.906e-02)); + r += mul(s6_3, M4(4.798e-02, -2.439e-02, -3.515e-02, 4.722e-02, 2.403e-01, -1.254e-01, 1.519e-02, 3.248e-02, 7.053e-02, -1.518e-02, -2.982e-02, -6.945e-02, -1.278e-01, 1.423e-01, 5.746e-02, -3.201e-02)); + r += mul(s6_4, M4(-3.080e-02, 1.561e-02, 2.768e-02, 2.669e-01, 1.858e-01, 3.711e-02, 7.019e-02, 1.011e-01, -5.844e-02, 9.352e-02, -3.138e-02, 1.805e-01, 1.252e-02, 5.338e-02, 2.771e-02, -2.996e-02)); + r += mul(s6_5, M4(-1.191e-01, -2.572e-02, -2.064e-02, 6.565e-02, 1.165e-01, -2.582e-02, 1.819e-01, -4.633e-02, 3.829e-02, 4.124e-02, 2.464e-01, -7.015e-02, 7.747e-02, -4.253e-02, -5.876e-02, 1.819e-02)); + r += mul(s6_6, M4(-1.295e-02, 1.090e-01, -9.345e-02, 9.165e-02, -2.594e-02, -9.639e-02, -4.320e-02, 8.351e-02, -1.298e-01, -1.167e-02, -1.856e-01, -1.277e-01, 9.414e-03, 1.105e-01, -3.130e-02, 6.221e-02)); + r += mul(s6_7, M4(3.444e-03, -2.271e-01, 3.192e-02, 1.126e-01, 4.436e-02, 9.578e-02, -1.035e-01, 1.076e-01, 5.406e-02, -7.431e-02, -4.377e-03, -5.885e-02, -6.816e-02, -2.066e-01, -2.767e-02, -5.711e-02)); + r += mul(s6_8, M4(-1.181e-01, 2.311e-01, -2.831e-02, 1.385e-01, -3.545e-02, 1.610e-01, 3.820e-02, 4.875e-02, 2.075e-01, -1.311e-01, -3.649e-02, 6.766e-02, 2.862e-02, 9.259e-03, 9.902e-02, -6.406e-02)); + r += mul(s7_0, M4(-5.053e-02, -2.136e-02, -1.745e-02, 4.108e-02, 2.853e-03, -6.905e-02, -4.749e-02, 7.346e-02, 1.699e-02, -1.325e-01, -7.159e-02, 1.764e-02, -4.939e-02, -9.260e-02, 1.443e-02, -1.301e-02)); + r += mul(s7_1, M4(1.267e-02, 2.617e-02, -1.330e-02, 1.707e-01, 2.547e-02, -6.053e-02, -5.732e-02, -4.795e-02, -8.427e-02, 3.053e-04, -2.630e-02, -6.587e-02, 3.033e-02, -9.484e-02, -9.448e-02, 8.488e-02)); + r += mul(s7_2, M4(-3.125e-02, 6.712e-02, 7.077e-03, -7.110e-02, -2.445e-02, 9.009e-02, -7.513e-02, 7.024e-02, -2.663e-02, -5.283e-03, -3.805e-02, 2.211e-03, 1.910e-02, 3.563e-02, -1.786e-02, -7.405e-02)); + r += mul(s7_3, M4(2.590e-02, -9.905e-02, -3.188e-02, -5.647e-02, 8.916e-02, -1.490e-01, 1.306e-01, -3.185e-02, -1.564e-02, 3.120e-02, 1.037e-01, 1.424e-02, 7.963e-02, 4.173e-03, 9.197e-02, 1.390e-01)); + r += mul(s7_4, M4(1.105e-01, -2.338e-01, 2.474e-02, 5.860e-02, 2.509e-01, 1.125e-01, 1.008e-01, 9.209e-02, -5.383e-02, 1.035e-01, 4.327e-02, 3.693e-02, 1.499e-01, 3.680e-02, 6.475e-02, 1.107e-01)); + r += mul(s7_5, M4(-2.218e-02, -1.690e-01, -4.273e-02, -1.628e-02, 3.745e-02, 2.656e-02, 1.162e-01, 1.176e-01, 5.675e-03, -7.341e-02, 7.056e-02, 4.805e-02, 7.543e-02, 1.842e-02, 4.577e-02, 1.140e-02)); + r += mul(s7_6, M4(-1.473e-02, -2.266e-02, -7.544e-02, 1.373e-02, 1.795e-01, -4.898e-02, -1.475e-02, 1.673e-01, 5.746e-03, -9.764e-02, -5.334e-02, 9.037e-02, -4.687e-03, 1.181e-01, -1.183e-02, 6.647e-02)); + r += mul(s7_7, M4(1.130e-01, -1.421e-01, 4.181e-02, 6.442e-02, 1.211e-01, -2.209e-02, -3.875e-04, -5.382e-02, -1.280e-01, 1.888e-01, 1.304e-01, 1.089e-02, -6.041e-02, 2.358e-01, 4.996e-03, -1.448e-01)); + r += mul(s7_8, M4(-1.302e-01, -8.352e-02, -1.199e-01, 1.755e-01, -1.887e-02, -9.609e-03, -1.532e-02, 9.148e-02, 6.745e-02, -8.519e-02, -1.737e-01, -1.573e-01, 9.497e-02, -3.531e-02, -1.095e-01, -2.586e-01)); + r += V4(2.920e-02, 2.727e-03, 2.658e-02, 7.202e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.324e-01, 3.098e-02, -5.610e-02, -4.146e-02, 2.549e-03, -7.130e-02, -1.110e-01, 7.283e-02, 1.845e-02, -9.657e-02, 3.844e-03, 2.523e-02, -5.975e-02, -1.173e-01, 3.095e-02, 9.501e-02)); + r += mul(s0_1, M4(-1.426e-01, -1.228e-01, -1.281e-01, 2.389e-02, -2.692e-02, -4.660e-02, -9.459e-02, -2.750e-02, -8.558e-02, -4.009e-02, 3.787e-02, 7.417e-03, -6.477e-02, 6.138e-02, 9.170e-02, -4.381e-02)); + r += mul(s0_2, M4(-1.730e-01, -5.044e-02, -1.070e-01, 6.243e-02, -5.736e-02, -5.904e-02, 4.201e-02, 1.623e-02, -1.295e-01, 2.815e-03, 2.265e-02, 5.778e-02, -9.029e-02, 1.748e-02, 1.011e-02, -2.713e-02)); + r += mul(s0_3, M4(-6.038e-02, 8.951e-02, -7.483e-02, -5.819e-02, 9.276e-02, -1.233e-01, -8.590e-02, -1.409e-01, -3.259e-02, 4.039e-02, -4.817e-03, -1.018e-01, -1.746e-01, -6.002e-02, -7.211e-02, -8.386e-02)); + r += mul(s0_4, M4(-1.231e-01, 3.177e-02, 2.555e-03, -1.744e-01, -1.353e-02, -1.207e-01, 1.175e-01, -1.910e-01, -2.753e-02, -1.688e-01, 5.652e-02, 1.298e-01, -4.414e-02, 1.817e-01, 8.418e-02, -5.938e-02)); + r += mul(s0_5, M4(4.654e-02, -2.205e-01, -9.299e-02, 6.869e-02, -2.001e-02, -1.276e-02, 1.292e-01, -1.016e-01, 6.783e-02, -5.872e-02, -7.446e-02, 8.189e-02, 4.320e-02, 1.480e-02, 6.159e-02, -6.716e-02)); + r += mul(s0_6, M4(1.373e-01, 1.687e-02, -8.457e-02, 1.829e-03, -2.415e-02, 5.841e-03, 2.825e-02, 2.903e-02, -6.525e-02, 9.673e-02, 2.568e-02, -8.981e-02, 4.992e-02, -1.157e-01, -1.493e-02, 2.380e-02)); + r += mul(s0_7, M4(8.304e-02, 1.067e-01, 1.375e-01, -2.046e-02, -7.120e-02, 6.355e-02, -3.328e-02, -1.201e-01, -2.240e-02, 2.329e-02, 6.133e-02, -8.430e-03, 5.728e-02, 1.656e-02, 1.803e-02, -1.021e-01)); + r += mul(s0_8, M4(1.901e-03, -1.383e-01, 7.359e-02, -1.758e-02, -3.607e-02, -3.771e-02, -2.204e-02, 1.258e-01, -6.274e-03, 2.672e-02, 8.706e-02, -3.942e-02, -3.532e-02, -6.095e-02, -2.052e-02, -1.085e-01)); + r += mul(s1_0, M4(8.602e-04, 8.446e-02, 5.132e-02, 3.456e-02, -1.910e-02, 1.225e-01, -5.233e-02, 1.062e-01, 1.613e-01, -1.027e-01, -9.407e-02, -2.353e-01, 6.873e-02, 5.204e-02, 9.637e-02, -2.035e-03)); + r += mul(s1_1, M4(-3.258e-02, 3.236e-02, 1.585e-01, 1.841e-01, 6.017e-03, -8.298e-02, -9.286e-02, -7.971e-02, -3.589e-02, 9.189e-02, 8.931e-02, -7.546e-02, 6.572e-02, 2.627e-02, -1.412e-01, -1.148e-01)); + r += mul(s1_2, M4(-9.811e-03, 8.210e-03, -1.522e-02, -1.445e-01, -1.339e-01, -2.461e-02, -3.405e-03, 1.385e-01, -2.507e-01, -3.665e-02, -9.215e-02, -6.471e-03, -1.242e-01, 2.180e-01, 3.822e-02, 1.153e-01)); + r += mul(s1_3, M4(6.760e-02, 4.069e-02, -4.637e-02, 1.038e-01, -3.410e-02, 1.763e-01, 1.336e-01, 8.865e-03, -5.930e-02, 3.881e-01, -2.500e-01, -2.094e-02, 9.937e-02, 5.619e-02, 1.528e-01, -1.557e-01)); + r += mul(s1_4, M4(6.235e-02, 2.350e-01, -7.527e-02, -1.680e-02, -1.354e-01, -7.105e-02, -2.478e-02, -6.784e-02, -1.697e-01, -2.013e-01, 2.555e-01, 2.012e-01, -2.957e-02, -1.685e-02, 1.850e-01, 6.340e-02)); + r += mul(s1_5, M4(3.966e-02, -1.723e-01, -1.228e-01, 8.802e-02, 4.809e-02, -9.692e-02, -6.344e-03, -7.978e-02, 1.669e-01, -1.779e-01, 6.291e-03, 6.059e-02, 3.993e-03, -2.133e-01, -3.395e-02, 1.342e-02)); + r += mul(s1_6, M4(-5.133e-02, -1.657e-02, 1.592e-02, 5.386e-02, 1.022e-01, 1.524e-01, 8.132e-02, 4.798e-02, 1.245e-01, 1.184e-01, -1.525e-01, -1.911e-01, -1.381e-02, -5.021e-02, -8.596e-03, 3.106e-01)); + r += mul(s1_7, M4(-1.818e-02, 2.232e-01, -4.030e-02, 4.948e-03, -3.336e-02, 9.223e-02, -2.868e-02, -1.187e-01, 2.736e-02, 1.980e-01, -4.715e-02, -6.880e-02, -3.155e-02, 2.444e-02, 1.473e-01, 9.269e-02)); + r += mul(s1_8, M4(-5.057e-02, 2.620e-02, 2.795e-02, 8.563e-03, 1.028e-01, 4.350e-02, 7.139e-03, -1.082e-01, 1.869e-01, -1.683e-02, 3.172e-02, 8.075e-02, 8.004e-02, -1.671e-01, -4.036e-02, 3.039e-01)); + r += mul(s2_0, M4(1.180e-02, -1.184e-02, -6.574e-02, -1.048e-01, -3.686e-02, -6.367e-03, -8.990e-02, 6.084e-02, -8.842e-02, 5.817e-02, 8.876e-02, 1.218e-01, 1.387e-01, -1.046e-01, 3.739e-02, 7.670e-02)); + r += mul(s2_1, M4(7.315e-02, -5.979e-02, 1.453e-01, -1.965e-01, 2.788e-02, -2.264e-02, -6.827e-02, 5.191e-02, -1.430e-01, 3.145e-03, 4.147e-02, 1.067e-01, -3.394e-02, -2.290e-01, -2.004e-01, 8.971e-02)); + r += mul(s2_2, M4(-2.835e-01, -5.782e-02, -4.523e-02, -4.873e-02, 7.985e-02, 1.438e-02, 6.609e-03, -8.096e-02, 1.605e-02, -3.455e-02, -4.986e-02, 7.196e-03, -6.071e-02, -5.082e-02, 4.941e-02, -7.414e-02)); + r += mul(s2_3, M4(-5.490e-02, 9.303e-02, 1.482e-01, 9.336e-02, -6.466e-02, -2.197e-02, -1.126e-01, 3.278e-02, 1.232e-01, 6.193e-02, -4.222e-02, -7.636e-02, 1.215e-02, -4.407e-02, 3.692e-02, -2.260e-01)); + r += mul(s2_4, M4(-3.508e-02, -1.683e-01, 8.935e-02, -4.702e-02, 1.243e-01, -5.206e-02, 7.074e-02, 1.417e-01, 5.942e-02, -1.391e-03, 2.317e-02, 2.775e-02, 2.099e-01, -4.575e-02, 1.754e-01, -1.390e-01)); + r += mul(s2_5, M4(-2.350e-01, 1.311e-01, -1.077e-01, 1.464e-01, 1.809e-01, -2.669e-04, -6.422e-02, -3.863e-02, 8.083e-02, 1.275e-01, -3.113e-02, 4.285e-03, -2.051e-01, 1.352e-01, -8.773e-02, 2.944e-02)); + r += mul(s2_6, M4(-3.024e-01, 6.652e-02, 8.992e-03, 1.629e-01, -7.257e-02, -3.108e-03, 2.366e-02, -1.174e-01, 1.096e-01, 1.167e-01, -6.839e-02, 2.846e-02, 4.916e-02, -6.639e-02, 2.082e-02, -5.918e-02)); + r += mul(s2_7, M4(-7.346e-02, -1.019e-01, -1.342e-02, 3.497e-02, 9.453e-03, 3.333e-03, 1.372e-01, 3.164e-02, -8.270e-02, 7.919e-02, -5.118e-02, 5.324e-02, -8.807e-02, 1.842e-01, 2.262e-01, 2.480e-02)); + r += mul(s2_8, M4(-1.017e-01, 2.074e-02, 5.007e-03, -1.334e-01, 4.727e-02, 2.447e-02, 7.362e-02, 1.111e-02, 1.149e-03, -5.188e-02, -4.095e-02, -7.496e-02, -4.799e-02, -1.475e-01, -1.799e-01, 1.101e-01)); + r += mul(s3_0, M4(-2.391e-02, -3.236e-02, -2.395e-02, -4.702e-02, -7.691e-02, 1.187e-02, -1.053e-01, 6.970e-03, 6.224e-02, -7.598e-02, 1.975e-01, -8.765e-02, 8.864e-02, 2.953e-02, -3.562e-02, 4.886e-02)); + r += mul(s3_1, M4(1.388e-01, -5.233e-02, 3.314e-03, 4.763e-02, 1.214e-01, 1.726e-01, 2.096e-02, 1.307e-01, 6.915e-02, -3.984e-02, 1.126e-01, -1.379e-01, -4.996e-03, -1.407e-02, -9.647e-02, -6.811e-02)); + r += mul(s3_2, M4(8.456e-02, -1.095e-02, -6.309e-02, -1.719e-02, 5.154e-02, 1.870e-01, 1.533e-01, -2.619e-02, 2.311e-01, -1.102e-01, -3.136e-02, -1.424e-02, -9.414e-02, 4.845e-02, 2.837e-02, -1.301e-02)); + r += mul(s3_3, M4(9.095e-02, -8.971e-02, -1.215e-02, -9.743e-02, -1.912e-01, -1.567e-02, -6.240e-02, 1.240e-01, -1.016e-01, -2.279e-01, -7.441e-02, 2.731e-02, 2.472e-02, -1.268e-01, 4.355e-03, 1.256e-01)); + r += mul(s3_4, M4(1.504e-01, 2.074e-01, -4.563e-02, 9.107e-02, 6.975e-02, -2.268e-02, 2.963e-01, 7.521e-02, -2.058e-01, 1.920e-01, -8.490e-02, -2.095e-01, 1.311e-01, -4.082e-02, 1.062e-01, -3.973e-02)); + r += mul(s3_5, M4(-2.132e-01, -1.528e-01, -2.973e-02, -4.818e-02, 2.202e-01, -1.653e-01, -3.830e-03, -1.751e-02, -7.094e-02, 8.933e-03, -5.851e-03, -5.554e-02, -2.070e-01, -1.704e-02, 2.700e-02, -4.136e-02)); + r += mul(s3_6, M4(6.727e-02, -8.846e-02, 6.397e-02, -1.020e-02, -1.508e-01, -9.940e-02, 7.372e-02, 2.904e-02, -3.266e-02, -1.201e-01, -3.074e-02, 3.378e-03, 3.186e-02, 2.257e-02, 5.726e-02, 4.939e-02)); + r += mul(s3_7, M4(7.298e-02, 9.083e-02, 1.039e-01, -2.973e-02, -1.102e-01, 4.364e-03, 1.732e-01, 3.399e-02, 2.200e-02, 8.622e-03, -8.569e-02, 1.179e-01, -1.467e-01, -5.801e-03, 6.517e-02, 1.460e-03)); + r += mul(s3_8, M4(7.603e-02, 1.223e-02, -3.089e-02, 7.583e-02, 1.471e-03, 3.472e-02, 3.394e-02, -1.371e-02, -4.960e-04, -1.172e-01, 1.305e-03, -1.425e-01, 2.795e-03, -2.720e-03, -5.219e-03, -6.907e-02)); + r += mul(s4_0, M4(5.505e-03, -8.819e-03, -4.014e-03, -1.244e-02, -1.126e-01, 8.135e-04, 9.623e-02, 3.676e-02, -1.720e-01, -2.153e-01, 1.957e-01, -2.243e-02, 1.522e-01, 2.124e-01, -1.470e-01, -1.231e-01)); + r += mul(s4_1, M4(3.341e-03, 4.275e-02, -1.164e-03, -1.806e-01, -5.828e-03, -2.089e-01, -1.698e-02, 2.542e-01, -2.417e-02, 6.649e-02, 4.735e-02, 1.805e-01, 1.254e-01, 2.262e-01, -1.286e-01, 1.099e-01)); + r += mul(s4_2, M4(6.665e-03, 6.571e-02, 3.929e-03, 5.843e-02, 1.745e-01, -4.256e-02, -7.602e-02, -1.089e-01, 2.779e-01, -2.963e-03, -1.710e-01, -1.459e-01, 1.136e-01, 4.401e-02, -2.303e-02, -8.939e-04)); + r += mul(s4_3, M4(5.475e-03, -4.728e-02, -1.140e-01, 2.885e-03, -4.551e-02, 1.432e-01, 2.654e-02, 6.426e-02, 7.432e-02, -1.165e-01, -3.742e-02, -7.785e-02, 7.110e-02, -1.741e-02, -1.233e-01, 7.455e-02)); + r += mul(s4_4, M4(-4.256e-02, -1.787e-01, 1.026e-01, -1.440e-01, 1.520e-01, -4.770e-02, -1.609e-01, 1.174e-01, -1.175e-01, 2.220e-01, -1.170e-01, -2.981e-01, 8.738e-02, 2.689e-01, 1.559e-01, -7.396e-02)); + r += mul(s4_5, M4(4.258e-02, -1.854e-02, -4.987e-03, 6.224e-02, 2.917e-02, 7.084e-02, 1.824e-02, 7.055e-02, 2.346e-01, -3.433e-02, -1.227e-01, 4.578e-01, 2.186e-01, 1.179e-01, 3.552e-02, 5.816e-02)); + r += mul(s4_6, M4(-1.070e-01, -1.684e-02, -2.785e-03, 8.654e-03, -1.638e-01, -1.542e-02, 6.987e-02, -2.803e-02, 6.555e-02, 5.128e-02, 1.075e-01, -8.537e-02, -1.663e-01, -5.931e-02, -5.726e-02, -9.191e-02)); + r += mul(s4_7, M4(-6.936e-02, 1.002e-03, 7.349e-02, 6.119e-02, 1.093e-01, -1.090e-01, -6.324e-02, -1.775e-01, 8.135e-02, -2.989e-01, 1.243e-02, -4.426e-01, -1.983e-02, 6.458e-02, 5.099e-02, 2.519e-02)); + r += mul(s4_8, M4(8.977e-02, -4.270e-02, -9.077e-02, 8.007e-02, 1.254e-02, -9.442e-04, 7.493e-02, -1.994e-01, 6.976e-02, -3.175e-01, 1.506e-01, 9.311e-02, 1.037e-01, 1.043e-01, -1.493e-03, 4.551e-02)); + r += mul(s5_0, M4(-1.727e-02, 1.585e-01, 1.534e-01, -5.754e-02, -5.938e-02, -1.065e-01, 9.408e-02, -8.807e-02, 9.271e-03, 8.039e-03, 3.994e-02, -5.039e-03, -3.846e-02, -4.278e-02, -1.818e-02, -3.180e-03)); + r += mul(s5_1, M4(2.943e-01, -1.033e-01, -2.299e-02, -7.260e-02, 9.817e-02, 1.076e-01, 3.952e-03, -8.467e-02, -3.766e-03, -2.596e-02, -8.277e-02, 2.235e-02, -5.010e-02, -2.123e-01, -1.041e-01, 1.164e-01)); + r += mul(s5_2, M4(-1.255e-01, 9.527e-02, 2.084e-02, -1.064e-01, 1.871e-01, 3.352e-02, 1.445e-02, -6.823e-02, 1.345e-01, -2.076e-02, -1.071e-02, 1.201e-02, -1.752e-01, -8.163e-02, 4.540e-02, 1.464e-01)); + r += mul(s5_3, M4(-6.320e-02, 4.607e-02, -1.641e-01, 1.168e-01, 2.291e-02, -1.263e-01, 1.411e-02, 1.158e-01, -2.324e-03, 2.538e-02, -2.747e-02, 3.697e-02, -1.417e-01, -3.010e-02, -5.565e-02, 2.884e-02)); + r += mul(s5_4, M4(2.759e-01, 7.719e-02, -6.069e-02, -1.723e-01, 6.530e-02, -3.680e-02, -1.286e-01, 3.704e-02, -6.962e-02, 4.937e-02, -5.575e-02, -1.117e-01, -1.436e-02, -2.980e-02, 4.777e-04, 5.934e-02)); + r += mul(s5_5, M4(9.114e-02, -1.044e-01, -1.212e-04, -1.155e-02, 1.682e-01, 4.008e-02, 2.893e-03, 4.331e-02, -6.575e-02, -9.670e-02, -5.614e-02, 4.495e-02, -2.225e-02, -2.677e-02, 2.591e-03, 7.499e-02)); + r += mul(s5_6, M4(8.086e-02, -2.553e-02, -4.962e-02, -8.640e-03, -9.971e-02, -9.166e-02, 4.908e-02, 4.199e-03, -2.085e-02, -3.249e-02, 7.374e-02, 1.215e-01, -2.563e-02, 1.172e-01, 4.091e-03, 9.290e-03)); + r += mul(s5_7, M4(2.444e-01, 9.946e-02, -2.357e-01, 2.056e-01, -8.576e-02, 4.988e-02, 1.605e-01, 6.576e-02, 1.992e-02, 1.709e-01, -1.757e-02, -1.304e-02, -1.743e-01, 5.605e-02, 4.984e-02, 5.520e-02)); + r += mul(s5_8, M4(3.324e-01, -2.642e-03, 6.995e-02, -3.095e-01, 7.170e-02, -5.416e-02, -4.709e-02, -2.787e-02, -1.225e-02, 1.133e-01, 2.788e-02, 6.324e-02, 4.011e-04, -1.079e-01, -4.384e-02, 3.440e-02)); + r += mul(s6_0, M4(6.823e-02, 1.395e-02, 2.126e-01, 1.723e-02, 2.425e-02, -5.360e-02, -7.007e-02, -1.563e-02, 1.486e-01, -1.016e-01, -2.771e-02, -1.009e-02, 7.417e-03, -8.494e-02, -2.465e-02, -8.861e-02)); + r += mul(s6_1, M4(-1.253e-01, -1.780e-02, 1.781e-02, 1.649e-01, -1.070e-01, 3.239e-02, 9.397e-02, 8.197e-02, 7.706e-02, 1.910e-02, -8.823e-03, 1.915e-01, -2.050e-02, -3.583e-02, 1.441e-01, 4.264e-03)); + r += mul(s6_2, M4(1.674e-01, 1.013e-01, -3.249e-02, 1.133e-01, -5.729e-02, -1.209e-01, 1.029e-01, -1.727e-02, -1.044e-01, -8.951e-02, 4.933e-02, 5.606e-03, -6.829e-02, 2.937e-02, 6.334e-02, -1.043e-01)); + r += mul(s6_3, M4(-7.871e-02, 7.712e-02, 5.320e-03, 1.078e-01, 5.800e-02, -6.099e-02, 3.971e-02, 1.548e-01, 8.109e-03, 7.153e-03, -1.472e-02, 5.344e-02, 2.580e-02, -1.591e-01, 8.739e-02, -3.712e-02)); + r += mul(s6_4, M4(-2.111e-03, 7.265e-03, 3.405e-02, 3.830e-02, -1.882e-01, 8.751e-02, 2.707e-01, 6.005e-02, -9.426e-02, 3.892e-02, 4.236e-02, 8.881e-02, -1.930e-02, -6.025e-02, -1.088e-01, 3.448e-02)); + r += mul(s6_5, M4(1.040e-01, -6.059e-03, -2.065e-01, 5.620e-02, 6.967e-02, 4.260e-02, -7.522e-03, 1.348e-01, -5.463e-03, -5.115e-02, 7.386e-02, -6.398e-02, -5.838e-02, 4.261e-02, 7.914e-02, 2.047e-01)); + r += mul(s6_6, M4(1.204e-01, -9.487e-03, -7.892e-03, 3.023e-02, 2.809e-02, -1.235e-01, 8.795e-03, 5.906e-02, 4.211e-02, -1.922e-01, 1.323e-01, -2.147e-02, -6.397e-02, 1.699e-02, 2.492e-02, -5.821e-02)); + r += mul(s6_7, M4(-8.937e-02, 4.861e-02, 1.152e-01, -1.752e-03, -4.958e-02, -1.301e-02, 1.152e-01, -8.827e-02, 2.432e-03, -2.854e-02, 2.425e-01, 9.628e-02, 3.210e-02, 6.959e-02, -7.081e-02, 4.659e-02)); + r += mul(s6_8, M4(1.240e-01, 6.450e-04, 1.001e-01, -3.919e-03, 9.075e-02, -7.864e-02, 6.180e-03, 6.982e-02, -2.058e-01, 5.582e-02, 2.836e-02, 3.417e-02, -6.305e-02, -2.997e-02, -9.853e-02, -7.349e-02)); + r += mul(s7_0, M4(-3.616e-02, 2.904e-02, -4.637e-02, 6.489e-02, 1.687e-02, 3.052e-02, -7.060e-02, -1.709e-02, -5.865e-02, -1.052e-01, 7.886e-03, -5.491e-02, 1.828e-02, -1.454e-02, 1.469e-01, -2.592e-03)); + r += mul(s7_1, M4(4.408e-02, -1.408e-01, 1.018e-01, 7.013e-03, -1.115e-01, 7.468e-02, 2.113e-02, 7.162e-03, 1.654e-02, -3.238e-03, -2.668e-02, -2.272e-01, -4.302e-02, -1.023e-01, 1.773e-01, 1.297e-01)); + r += mul(s7_2, M4(-1.150e-02, 1.006e-01, 3.549e-02, -6.321e-03, 5.296e-02, -1.781e-02, 2.044e-02, 1.454e-02, 5.696e-02, 9.587e-03, 6.962e-02, -5.980e-02, -4.713e-02, -5.724e-02, 8.912e-02, 5.109e-02)); + r += mul(s7_3, M4(-8.174e-02, 3.685e-03, -3.495e-02, 1.455e-01, 6.260e-02, 2.383e-03, 4.992e-02, 6.729e-02, -3.815e-02, 1.347e-01, -1.718e-02, -5.853e-04, 3.688e-02, 2.927e-02, 1.373e-01, -3.212e-02)); + r += mul(s7_4, M4(2.866e-02, 8.243e-02, 2.121e-02, 1.013e-01, -8.082e-02, -1.408e-01, 7.371e-02, 8.894e-02, -1.777e-02, 4.296e-02, -1.108e-01, -1.136e-01, -1.227e-01, -3.547e-02, -2.402e-01, 6.650e-02)); + r += mul(s7_5, M4(5.925e-02, 3.045e-03, -1.978e-02, -1.133e-01, 1.058e-01, -1.937e-02, 4.209e-02, 6.281e-02, 7.367e-02, -2.282e-02, 7.629e-02, 7.343e-02, -1.214e-01, 2.380e-01, -1.013e-01, -1.038e-01)); + r += mul(s7_6, M4(-6.004e-02, -3.998e-02, -2.468e-02, 1.327e-02, 1.390e-01, 1.489e-01, -4.934e-02, 6.800e-05, -4.161e-02, -1.543e-02, 2.262e-02, -4.679e-02, 1.088e-02, -4.917e-02, 2.046e-01, 2.502e-01)); + r += mul(s7_7, M4(-1.189e-01, -1.247e-02, -1.499e-01, -3.603e-02, -1.487e-01, -3.209e-02, 2.411e-02, -8.302e-02, -9.279e-02, 7.351e-02, -1.716e-02, 5.948e-02, 4.101e-02, -1.640e-01, 2.718e-02, 1.554e-02)); + r += mul(s7_8, M4(6.335e-02, -9.028e-02, 6.124e-02, -9.490e-02, 8.380e-02, -1.819e-01, -3.584e-02, -2.386e-02, 5.599e-02, 1.539e-02, 5.710e-03, 3.523e-02, 7.735e-02, -1.713e-02, -1.353e-01, 1.159e-01)); + r += V4(-8.380e-03, -1.580e-02, 2.399e-02, 1.683e-02); + return r; +} + +void Pass8(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 9 +//!DESC conv8 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.124e-02, -5.130e-02, -1.787e-03, 8.818e-02, 4.984e-02, -1.121e-01, 9.460e-02, 3.339e-02, 7.946e-02, -3.062e-02, 5.278e-02, 5.440e-03, 1.711e-02, 4.054e-02, -1.141e-01, 1.288e-01)); + r += mul(s0_1, M4(5.309e-02, -3.390e-02, 1.321e-01, 1.092e-01, -1.388e-01, -8.624e-03, 2.151e-02, 1.979e-02, -8.138e-02, 7.397e-03, 1.617e-01, 1.973e-01, -4.960e-02, 1.862e-02, -3.960e-03, -8.960e-02)); + r += mul(s0_2, M4(7.778e-02, 3.204e-02, 8.291e-02, -1.327e-01, 3.065e-02, -7.843e-02, -3.681e-02, 2.162e-02, 7.884e-03, -1.019e-01, -2.793e-02, 1.007e-01, -6.140e-04, -1.387e-01, 1.230e-01, 7.260e-02)); + r += mul(s0_3, M4(-6.266e-02, -1.561e-03, 9.634e-02, -9.794e-02, 6.055e-02, -5.282e-02, 9.105e-02, 7.726e-02, -5.617e-02, 8.143e-02, 4.430e-03, -7.008e-02, -1.017e-01, -8.139e-02, -9.779e-02, 4.958e-02)); + r += mul(s0_4, M4(1.650e-02, 4.615e-02, 2.963e-03, -1.076e-01, -1.435e-01, -1.646e-01, 6.921e-02, 7.088e-02, 1.342e-01, -7.077e-02, 1.517e-01, 2.435e-02, 2.455e-01, -3.089e-02, -1.496e-02, 2.904e-02)); + r += mul(s0_5, M4(1.193e-01, -1.123e-01, -2.309e-02, -5.959e-02, 1.889e-05, 1.146e-01, 1.482e-01, 4.795e-02, 2.048e-01, 2.549e-02, -3.481e-02, -3.296e-02, 4.897e-02, -8.950e-02, -4.280e-03, -1.567e-03)); + r += mul(s0_6, M4(1.450e-01, 2.803e-02, 1.840e-02, -8.276e-02, -6.706e-02, -3.088e-02, 1.742e-02, 3.721e-02, -2.495e-02, 2.764e-02, 8.144e-03, -1.611e-01, 1.160e-01, -4.256e-03, 2.108e-02, -5.918e-02)); + r += mul(s0_7, M4(-1.328e-01, 7.529e-02, 1.391e-01, 2.563e-02, -1.192e-01, -2.107e-02, -1.355e-01, 9.936e-02, -2.000e-01, 3.467e-01, -6.629e-02, -1.384e-01, -6.821e-02, 7.839e-02, 7.809e-02, -1.136e-01)); + r += mul(s0_8, M4(-3.557e-02, -8.994e-02, 5.623e-02, -3.802e-02, 4.783e-02, 1.048e-01, -5.520e-02, -4.335e-02, -1.120e-01, -1.588e-02, -3.592e-02, -4.832e-02, 4.606e-02, 2.668e-02, 1.129e-01, -4.405e-02)); + r += mul(s1_0, M4(-8.748e-02, 1.203e-02, -5.771e-02, -3.631e-02, -5.534e-02, -7.679e-02, 2.935e-03, 1.125e-01, 4.920e-02, -1.189e-01, 4.831e-02, 6.072e-02, 2.471e-02, 1.016e-02, -2.971e-02, 5.571e-02)); + r += mul(s1_1, M4(-2.075e-02, 2.195e-01, -6.517e-02, 9.558e-02, -5.612e-02, 6.054e-02, 4.759e-02, 8.232e-02, -1.495e-01, -1.329e-02, 9.611e-02, -3.800e-02, -1.341e-01, 1.209e-02, 1.875e-01, -5.003e-03)); + r += mul(s1_2, M4(7.123e-02, 1.644e-01, -4.899e-02, 2.409e-02, 4.736e-02, -4.217e-02, -1.069e-01, 1.008e-01, -7.966e-02, -1.316e-01, -4.347e-03, 6.992e-02, -3.527e-02, -7.104e-03, -2.468e-02, -5.213e-02)); + r += mul(s1_3, M4(-9.210e-02, -3.570e-02, 3.117e-02, -1.709e-02, 6.131e-02, -8.828e-02, 4.977e-02, 9.107e-02, 1.307e-01, -9.185e-02, -7.107e-04, -1.570e-01, 3.320e-02, -4.837e-02, 9.636e-02, -1.617e-01)); + r += mul(s1_4, M4(1.071e-01, -1.831e-01, -8.204e-02, -8.219e-02, 2.204e-01, -1.851e-01, -5.428e-02, 2.497e-02, 1.762e-02, -1.438e-01, 4.264e-02, -1.996e-01, -5.739e-02, -2.194e-02, 2.351e-02, -1.555e-01)); + r += mul(s1_5, M4(1.995e-02, -7.711e-02, -3.127e-01, -1.545e-01, 1.087e-01, -6.717e-02, -3.386e-02, -3.439e-02, -6.425e-02, -1.417e-02, -9.704e-03, 3.258e-02, -1.786e-01, -7.814e-02, 7.339e-02, 1.150e-01)); + r += mul(s1_6, M4(3.818e-03, 8.071e-02, -1.410e-02, -1.407e-02, -1.744e-01, -5.763e-02, -1.927e-01, 3.831e-02, 1.584e-01, 7.533e-02, 3.399e-02, 7.658e-03, -8.963e-02, -4.506e-02, -4.843e-02, -1.250e-01)); + r += mul(s1_7, M4(-1.005e-01, 1.244e-01, -1.298e-01, 1.610e-01, -1.078e-01, 3.248e-02, -1.189e-01, 2.034e-02, -1.146e-01, 2.402e-01, 2.474e-02, -8.074e-02, 1.049e-01, 1.237e-01, 1.052e-01, 7.348e-02)); + r += mul(s1_8, M4(-9.359e-02, -1.032e-01, -1.450e-01, 4.403e-02, 6.272e-02, 1.148e-01, -2.433e-01, -5.160e-03, 7.080e-02, -2.387e-02, 4.122e-02, 7.547e-02, -9.222e-02, -9.204e-02, 1.121e-01, -6.432e-02)); + r += mul(s2_0, M4(5.519e-02, 6.806e-02, 9.748e-02, -2.149e-02, 2.170e-01, 5.902e-02, -9.839e-02, 1.249e-01, -1.353e-02, 2.961e-02, 1.827e-01, 6.570e-02, 1.371e-02, 5.585e-02, -4.444e-02, 1.533e-01)); + r += mul(s2_1, M4(4.773e-02, -1.665e-01, 5.589e-02, -8.909e-02, -2.246e-01, 3.200e-01, -1.041e-01, -8.414e-02, 1.580e-01, 3.261e-02, 5.637e-02, 1.576e-02, 2.131e-02, -3.541e-02, -4.463e-02, -2.095e-02)); + r += mul(s2_2, M4(1.036e-01, 2.589e-02, 2.904e-02, -7.143e-02, -1.696e-02, 1.774e-02, -7.084e-02, 3.157e-02, 6.868e-02, -2.945e-01, 5.057e-02, -1.008e-01, 8.910e-02, 8.413e-02, -6.435e-02, -4.051e-03)); + r += mul(s2_3, M4(-7.418e-02, -6.193e-02, 8.327e-02, 5.191e-02, 1.067e-01, -1.189e-01, -4.391e-02, -2.164e-01, 1.092e-02, 4.494e-02, 7.165e-02, -7.741e-02, 4.445e-02, -7.055e-02, -2.172e-01, -5.087e-02)); + r += mul(s2_4, M4(2.256e-02, 7.924e-02, -1.469e-02, 1.252e-02, 2.585e-02, 4.741e-01, 3.142e-02, -3.884e-03, 4.117e-02, -7.126e-02, -1.259e-01, -5.454e-02, 1.522e-01, -1.540e-01, 1.024e-01, 9.160e-04)); + r += mul(s2_5, M4(2.940e-02, -1.633e-01, -2.713e-02, 5.467e-02, -1.586e-01, 7.529e-03, 8.433e-02, 5.386e-02, 3.499e-02, -4.718e-02, 4.734e-02, 1.388e-03, -7.104e-02, -9.965e-02, 4.349e-02, 4.619e-03)); + r += mul(s2_6, M4(2.689e-02, 3.681e-02, 1.567e-01, -5.400e-02, 4.834e-02, 7.037e-02, -4.478e-02, 1.335e-01, -3.430e-02, -4.039e-02, -1.772e-02, 5.856e-02, 8.410e-02, -2.157e-01, 2.341e-02, 1.083e-02)); + r += mul(s2_7, M4(-2.147e-01, -1.803e-03, 1.678e-02, -6.482e-02, 2.857e-02, 1.997e-01, -4.290e-03, -9.494e-02, 8.619e-03, 2.462e-02, 2.078e-02, 6.033e-02, -3.540e-02, -5.141e-02, -1.394e-01, -3.610e-03)); + r += mul(s2_8, M4(1.420e-01, 1.700e-02, 7.208e-02, 6.421e-02, 8.379e-02, 4.514e-02, 7.367e-02, 6.906e-02, 2.213e-02, -7.140e-02, -7.118e-02, -4.912e-02, 1.182e-02, 1.214e-01, -1.221e-01, -5.433e-03)); + r += mul(s3_0, M4(-4.753e-02, 6.931e-02, -6.908e-02, -3.847e-02, 5.197e-03, 8.429e-03, -2.645e-02, -1.567e-02, 1.779e-01, -1.916e-02, 3.229e-02, -7.113e-02, -1.738e-02, -4.335e-02, -2.875e-02, -6.892e-02)); + r += mul(s3_1, M4(-3.704e-02, -8.374e-03, -1.469e-01, -2.289e-02, 1.303e-01, 6.911e-02, -2.123e-02, 4.849e-03, 2.334e-05, 9.183e-02, -1.740e-01, 1.321e-01, -1.002e-01, 1.355e-01, 7.750e-03, 5.359e-02)); + r += mul(s3_2, M4(-2.163e-01, -7.288e-03, -8.867e-02, -1.089e-01, -1.800e-02, 1.012e-01, 4.257e-02, -7.175e-02, -1.499e-01, 3.862e-02, -8.924e-02, 8.527e-02, -4.943e-02, 8.094e-02, 4.473e-03, -5.153e-02)); + r += mul(s3_3, M4(-1.294e-01, -9.081e-02, 1.170e-01, -6.287e-02, -2.409e-02, -7.843e-02, 1.081e-01, -1.228e-01, 6.556e-02, 3.745e-02, -1.265e-01, -1.149e-01, 7.995e-02, -1.064e-01, -4.103e-02, -1.770e-01)); + r += mul(s3_4, M4(7.123e-02, 1.010e-01, -2.841e-02, 1.940e-02, 2.383e-02, 1.075e-01, 1.142e-01, 7.052e-02, -1.260e-01, -2.067e-01, -2.080e-02, 9.393e-02, 7.256e-02, -1.270e-01, -6.239e-02, 9.681e-03)); + r += mul(s3_5, M4(-7.998e-03, -5.025e-02, 4.491e-02, 1.617e-01, 4.243e-03, 1.684e-02, 9.257e-02, 5.489e-02, -4.699e-02, -2.670e-02, 1.045e-01, -1.136e-01, 5.581e-02, -3.778e-02, 7.120e-02, 6.498e-02)); + r += mul(s3_6, M4(4.308e-03, -7.796e-03, 1.878e-01, -2.290e-01, -7.900e-02, 4.194e-02, -1.923e-02, -7.889e-02, 1.046e-01, 2.528e-02, -2.173e-01, 4.992e-02, -5.578e-02, -4.376e-02, -8.227e-02, -1.437e-01)); + r += mul(s3_7, M4(-1.714e-01, 4.549e-02, -5.604e-02, 1.178e-01, -1.652e-02, 9.276e-02, 7.400e-02, -2.862e-02, -1.776e-01, -1.282e-02, -4.909e-02, -1.218e-01, -8.023e-02, -1.684e-02, 7.598e-03, 4.162e-02)); + r += mul(s3_8, M4(9.347e-02, -8.901e-02, -1.304e-01, -6.794e-02, -4.605e-03, 5.944e-02, 1.136e-01, -9.450e-02, 2.883e-02, 5.992e-02, -6.510e-02, 1.774e-01, 1.138e-02, 5.769e-02, -1.313e-01, -5.079e-02)); + r += mul(s4_0, M4(5.049e-02, -3.380e-02, -1.054e-01, 6.772e-02, 4.020e-02, -1.913e-02, -3.185e-02, -2.899e-02, -6.698e-02, -2.485e-02, -6.283e-02, 1.835e-01, -1.031e-01, 5.999e-03, 1.490e-02, -1.784e-01)); + r += mul(s4_1, M4(-1.064e-01, -2.199e-01, 1.466e-01, 8.568e-03, 4.387e-02, -7.174e-02, -1.109e-02, 6.368e-02, 7.634e-02, 2.304e-02, 4.350e-02, -2.990e-02, -2.516e-02, -1.149e-02, -9.044e-03, -7.891e-03)); + r += mul(s4_2, M4(9.905e-02, 6.525e-02, 9.026e-02, 4.470e-02, 1.315e-03, -1.239e-02, -1.332e-01, 9.560e-02, -8.204e-02, 1.249e-01, 2.174e-01, -2.688e-02, 3.787e-02, -1.084e-01, -5.702e-02, 4.371e-02)); + r += mul(s4_3, M4(2.746e-02, 4.288e-02, -2.094e-01, -2.320e-02, -4.883e-02, -1.676e-02, -8.811e-02, 8.300e-02, 6.404e-02, 1.274e-01, 2.470e-02, -6.681e-02, 1.095e-01, 1.005e-01, 4.594e-02, -4.148e-02)); + r += mul(s4_4, M4(-1.802e-01, 5.122e-02, -5.737e-02, -1.075e-01, -1.205e-02, 3.844e-02, -4.666e-02, -1.431e-01, 2.786e-02, -3.764e-02, 5.673e-02, 6.577e-02, 4.471e-02, -1.484e-01, -5.142e-02, -5.054e-03)); + r += mul(s4_5, M4(2.149e-02, 6.915e-02, 1.145e-01, -1.189e-01, -5.877e-03, 2.061e-02, 5.708e-03, 6.259e-04, 3.541e-02, 3.211e-02, -2.670e-02, -1.668e-02, -9.188e-02, 1.706e-01, -4.118e-02, -2.265e-01)); + r += mul(s4_6, M4(9.618e-02, 6.636e-02, 7.511e-02, 9.216e-02, 1.049e-01, 4.239e-02, 4.988e-02, 2.455e-02, -9.820e-02, -1.717e-01, -2.323e-02, -8.221e-02, 1.424e-02, -2.146e-02, 2.142e-02, 8.807e-02)); + r += mul(s4_7, M4(-1.336e-02, 2.385e-02, -3.954e-02, -8.075e-03, 5.851e-02, -6.145e-02, 4.194e-02, 1.621e-01, -7.676e-02, -1.024e-01, 4.955e-02, 1.105e-01, -6.878e-03, -2.184e-02, -4.202e-03, 1.053e-01)); + r += mul(s4_8, M4(-1.642e-02, 1.181e-01, -2.130e-02, -8.524e-02, -1.581e-01, 1.508e-03, -1.421e-02, 6.296e-02, 7.533e-02, -2.317e-02, -1.147e-02, -6.421e-02, -2.778e-04, -2.823e-03, -5.921e-02, -2.012e-02)); + r += mul(s5_0, M4(8.415e-03, -6.028e-02, -1.751e-02, 1.548e-01, -3.270e-02, -4.412e-02, -9.120e-02, -8.805e-02, 4.282e-02, 5.193e-02, -1.485e-01, -2.785e-02, -2.536e-01, 1.438e-02, 2.982e-02, -2.936e-02)); + r += mul(s5_1, M4(-4.830e-02, -1.672e-01, 2.365e-01, -6.885e-02, -5.857e-02, -9.746e-02, -2.243e-02, 1.249e-01, 7.829e-02, 4.724e-03, -4.529e-03, -1.414e-01, 1.286e-01, 1.551e-02, -7.143e-02, -8.086e-02)); + r += mul(s5_2, M4(-2.138e-02, 8.716e-02, 8.449e-02, -1.476e-02, 1.615e-01, 5.934e-02, -9.844e-02, 5.758e-02, -1.017e-01, 8.195e-02, -8.195e-02, -2.562e-02, -6.803e-03, -1.538e-01, -9.863e-02, 5.102e-02)); + r += mul(s5_3, M4(-2.320e-02, 1.252e-02, 8.025e-02, 2.578e-02, 5.918e-02, -1.294e-03, -7.928e-02, 4.524e-02, -1.868e-02, 2.818e-02, -1.418e-01, 4.349e-02, 1.122e-01, 7.893e-02, 1.034e-01, 1.828e-01)); + r += mul(s5_4, M4(-1.013e-01, -7.174e-02, 4.332e-02, -1.217e-01, -6.073e-02, 1.344e-01, 2.161e-01, -3.117e-02, 3.859e-03, 2.590e-02, 4.436e-02, 4.931e-03, 1.766e-01, -5.240e-02, -9.816e-02, -7.606e-02)); + r += mul(s5_5, M4(-2.801e-02, -1.917e-02, 7.311e-02, -6.512e-02, 1.118e-01, 3.616e-02, 3.519e-02, -1.820e-01, -9.950e-02, -7.744e-03, 1.019e-01, 4.537e-02, 2.160e-02, 6.432e-02, 7.089e-02, 1.152e-02)); + r += mul(s5_6, M4(2.180e-03, 5.749e-02, 1.428e-01, 5.017e-02, 1.869e-01, 4.851e-02, 1.389e-01, 6.792e-04, 1.045e-01, -2.566e-02, -8.384e-02, -1.547e-01, 1.075e-01, 3.059e-02, -4.049e-02, 1.085e-02)); + r += mul(s5_7, M4(5.945e-02, 3.321e-02, -9.207e-02, -2.509e-02, 4.073e-02, -1.089e-01, -1.625e-02, 1.379e-02, -1.111e-03, -3.685e-02, -1.283e-01, -1.069e-01, -1.421e-01, -9.058e-02, -1.036e-01, 1.138e-01)); + r += mul(s5_8, M4(-2.612e-02, 1.336e-01, -9.740e-02, -4.554e-02, 2.979e-02, -7.052e-03, -4.852e-02, -4.545e-03, 4.801e-02, -2.438e-04, -5.739e-02, 4.333e-02, 3.390e-03, -1.577e-01, 2.403e-02, 5.650e-02)); + r += mul(s6_0, M4(3.236e-03, 5.534e-02, -4.418e-02, -4.075e-02, 3.276e-02, -3.427e-02, 2.378e-02, 6.431e-02, 7.598e-02, -7.310e-02, -5.276e-02, 3.837e-02, 6.279e-02, 3.920e-02, 1.304e-01, -2.992e-02)); + r += mul(s6_1, M4(-6.662e-02, 3.669e-02, 1.856e-01, -1.944e-02, 1.691e-01, -1.861e-01, -5.360e-02, -6.607e-03, 3.012e-02, -1.674e-02, 1.990e-02, -9.414e-03, 8.199e-02, -3.132e-02, -8.794e-02, 5.374e-02)); + r += mul(s6_2, M4(8.092e-02, -4.533e-02, -1.180e-02, -6.647e-02, -1.927e-01, -3.415e-02, -1.194e-01, 9.213e-02, 6.425e-02, -3.130e-02, -2.920e-02, -1.045e-02, -5.180e-02, 9.980e-02, -7.554e-03, -9.558e-02)); + r += mul(s6_3, M4(-5.473e-02, 9.536e-02, 2.336e-01, 2.384e-01, -3.298e-02, -6.046e-02, -4.906e-02, -4.915e-02, -7.887e-03, -5.920e-02, -1.091e-01, 7.041e-02, -8.894e-02, -4.592e-02, 2.140e-02, 1.735e-01)); + r += mul(s6_4, M4(6.208e-02, -2.608e-02, -1.066e-01, 1.094e-03, 1.958e-01, -9.529e-03, 1.481e-01, 5.728e-02, -6.199e-02, 2.168e-02, -3.580e-02, -1.589e-01, -2.392e-01, -1.200e-01, -4.925e-02, -7.520e-02)); + r += mul(s6_5, M4(6.825e-02, 9.426e-02, -5.642e-02, 4.394e-03, 5.755e-02, -1.180e-01, 5.083e-02, -8.122e-02, 5.416e-02, -3.902e-02, -2.842e-01, -1.468e-01, -1.510e-01, -9.730e-02, 8.778e-02, -8.978e-02)); + r += mul(s6_6, M4(-8.778e-02, 6.068e-02, 3.351e-02, -2.043e-01, -7.854e-02, 7.319e-02, 1.240e-02, -1.226e-01, 1.410e-02, 6.336e-03, 6.147e-02, 4.013e-02, 8.160e-02, 1.311e-01, -2.852e-02, -3.663e-02)); + r += mul(s6_7, M4(7.029e-02, -1.694e-01, 1.453e-01, 1.021e-01, -7.482e-02, -7.034e-02, 1.067e-01, -6.123e-02, 1.143e-01, -4.063e-02, -6.213e-02, -1.135e-01, -1.179e-01, -1.617e-01, -2.237e-01, -2.470e-02)); + r += mul(s6_8, M4(-5.890e-02, 8.569e-02, 1.383e-01, -6.613e-02, -5.659e-02, 6.558e-02, 1.452e-04, -2.267e-02, -3.246e-02, -9.784e-02, -1.064e-01, 3.582e-02, 5.807e-02, 5.263e-02, 2.534e-02, 9.334e-02)); + r += mul(s7_0, M4(2.126e-01, -3.965e-02, -6.040e-03, 1.445e-02, -4.166e-02, 4.742e-02, -8.664e-02, 5.568e-02, 6.662e-02, -8.717e-02, 1.718e-01, 2.674e-02, -1.090e-01, -1.022e-02, 1.726e-02, 1.323e-01)); + r += mul(s7_1, M4(8.978e-02, 6.943e-02, 1.115e-02, 6.326e-02, 1.048e-01, 1.267e-01, 5.543e-03, 1.492e-01, -9.506e-02, -3.199e-02, 1.146e-02, -3.752e-03, -7.690e-02, 1.866e-01, 2.825e-02, 5.971e-02)); + r += mul(s7_2, M4(-6.166e-02, 3.839e-02, -2.729e-02, 7.142e-02, 2.880e-02, 1.294e-01, -5.555e-02, -3.306e-03, 4.696e-02, -7.416e-03, 9.044e-02, -8.489e-02, 1.730e-02, 4.475e-02, -5.271e-02, 2.289e-02)); + r += mul(s7_3, M4(-3.251e-02, 6.556e-03, -3.212e-03, -1.414e-02, 1.450e-02, -8.137e-02, 9.345e-02, 5.045e-02, -2.866e-02, -3.191e-02, 6.239e-02, 1.788e-03, -2.839e-02, -5.030e-02, -1.687e-02, 2.879e-02)); + r += mul(s7_4, M4(-4.092e-02, -7.110e-02, -9.456e-02, 1.835e-02, 7.861e-02, 5.918e-02, 2.124e-01, 8.800e-02, -1.590e-01, 1.002e-01, 4.616e-02, -8.691e-02, 2.178e-02, -1.333e-02, 2.349e-02, -7.837e-02)); + r += mul(s7_5, M4(7.628e-02, 5.166e-02, -1.514e-02, -1.198e-01, -3.981e-02, -7.346e-03, 1.422e-01, 9.788e-02, 4.526e-02, 7.582e-02, 3.629e-02, 3.230e-01, -7.322e-02, -9.469e-02, -1.592e-02, -7.371e-02)); + r += mul(s7_6, M4(-5.822e-02, -1.253e-02, -8.465e-02, -3.110e-02, -1.102e-02, -3.346e-04, 2.758e-02, 9.501e-02, -1.179e-01, -1.198e-01, 1.321e-01, -6.008e-02, -8.633e-03, -1.439e-02, -2.763e-02, -5.576e-02)); + r += mul(s7_7, M4(9.527e-02, -7.521e-02, -5.449e-02, 8.966e-02, 9.526e-02, -1.341e-02, -1.805e-02, -4.273e-03, 1.667e-01, -2.610e-02, -3.437e-02, -3.887e-02, -1.084e-01, -9.102e-02, -1.352e-01, -3.787e-02)); + r += mul(s7_8, M4(-1.350e-01, -5.633e-02, -8.886e-02, 1.508e-01, 1.708e-02, 8.339e-02, 5.514e-02, 5.951e-03, 2.865e-02, -3.225e-02, 1.506e-01, 8.627e-02, 8.100e-02, 2.859e-03, 7.099e-02, -1.714e-02)); + r += V4(-1.959e-02, -1.607e-02, 2.678e-03, 3.647e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.590e-02, -3.110e-02, -5.343e-02, 1.844e-02, 7.342e-03, 3.303e-02, 1.736e-01, 3.715e-03, -4.183e-02, -3.792e-02, 3.487e-02, 2.242e-01, -9.557e-02, -2.172e-02, 8.764e-02, -3.268e-02)); + r += mul(s0_1, M4(6.388e-02, -3.818e-02, 4.609e-02, 3.996e-02, -6.768e-02, -1.970e-02, 1.285e-02, -8.580e-02, 5.195e-02, 5.021e-02, 7.898e-03, -2.762e-02, -3.387e-04, 5.472e-02, 3.084e-02, 1.434e-01)); + r += mul(s0_2, M4(3.166e-02, -5.894e-02, 5.915e-02, 3.985e-02, -4.421e-02, -3.876e-02, 7.038e-03, -6.328e-02, -4.077e-02, -8.372e-02, 5.535e-02, 3.628e-02, -1.039e-03, -5.280e-04, -6.163e-02, 9.013e-02)); + r += mul(s0_3, M4(2.717e-02, -1.076e-02, 1.405e-02, 1.800e-01, 3.261e-02, -5.774e-03, -5.891e-02, 1.254e-01, 1.157e-01, -6.435e-02, 3.639e-02, -4.932e-02, 3.157e-02, -5.578e-02, 4.085e-02, 1.272e-01)); + r += mul(s0_4, M4(4.370e-03, -1.914e-01, 2.501e-02, 6.614e-03, 4.624e-02, 9.182e-02, -1.160e-01, 1.983e-02, -1.177e-01, -2.725e-01, -2.444e-01, -5.267e-02, 1.670e-01, 3.638e-03, 1.756e-02, 1.624e-02)); + r += mul(s0_5, M4(3.460e-02, -2.745e-01, 1.139e-01, 1.029e-01, 2.006e-02, 9.844e-02, 2.325e-02, -1.451e-02, 1.353e-01, 9.225e-02, -4.067e-02, 1.927e-01, 1.685e-01, -1.072e-02, -5.754e-02, 2.313e-02)); + r += mul(s0_6, M4(-5.847e-02, 3.581e-02, -2.012e-02, -6.789e-02, 1.380e-02, -4.919e-02, -1.183e-01, -6.287e-02, 2.998e-02, 1.971e-01, 2.000e-01, -1.879e-03, -1.302e-02, -8.027e-02, -1.116e-01, -1.198e-01)); + r += mul(s0_7, M4(3.188e-02, 1.171e-02, -2.499e-01, 2.871e-02, 8.527e-02, -8.520e-02, -2.996e-02, -2.599e-02, -4.322e-02, -8.885e-02, -1.367e-01, 6.895e-02, 6.368e-02, 2.506e-02, -2.611e-02, -1.471e-01)); + r += mul(s0_8, M4(-5.684e-02, -7.597e-02, -1.881e-01, -5.687e-02, 3.325e-02, 1.130e-01, -6.854e-02, 2.383e-03, 1.246e-01, 1.974e-02, 8.954e-02, -6.438e-03, -1.599e-01, -2.827e-02, 4.963e-02, -3.256e-02)); + r += mul(s1_0, M4(-4.162e-02, 1.987e-01, -2.119e-02, -8.128e-02, 1.152e-02, 8.857e-02, 1.907e-01, 8.436e-02, -3.137e-02, 8.293e-02, -1.405e-02, 2.704e-02, 8.655e-02, -3.663e-02, 1.099e-01, 5.457e-02)); + r += mul(s1_1, M4(-5.241e-02, 7.595e-02, -8.220e-02, -2.789e-02, 5.280e-02, 1.105e-01, 2.167e-02, 9.167e-02, -3.544e-03, -7.002e-02, 1.089e-03, 1.149e-02, -2.671e-02, -3.081e-02, -7.905e-02, 8.840e-02)); + r += mul(s1_2, M4(-1.234e-02, 9.964e-02, 8.661e-02, -1.308e-01, -6.105e-02, 6.715e-02, -2.462e-02, -9.010e-02, 1.985e-02, 1.281e-02, -2.655e-02, -5.739e-02, -1.161e-01, -2.814e-02, 6.001e-02, 2.301e-02)); + r += mul(s1_3, M4(-5.796e-02, 1.801e-01, -9.560e-02, 1.335e-01, 1.099e-02, -6.033e-02, -5.202e-02, 1.635e-01, 1.770e-02, -1.323e-01, 4.147e-02, 8.472e-02, -1.089e-01, 2.684e-01, -6.604e-02, 9.575e-02)); + r += mul(s1_4, M4(-1.456e-01, 8.853e-02, -4.921e-03, -7.301e-02, 9.708e-02, 8.831e-02, 1.773e-03, 1.722e-01, -4.105e-02, -1.195e-01, -6.714e-02, -7.913e-02, -2.420e-02, 5.004e-02, 1.116e-01, 1.293e-01)); + r += mul(s1_5, M4(-7.842e-02, 2.580e-02, 1.357e-01, -1.304e-01, -8.766e-02, -4.131e-02, -4.978e-02, 8.374e-02, 1.924e-01, -6.467e-02, -7.540e-02, -1.794e-02, 1.040e-01, -1.228e-01, -1.131e-01, -6.994e-02)); + r += mul(s1_6, M4(-2.254e-02, -7.200e-03, -5.046e-02, 9.115e-03, -1.037e-01, -3.738e-02, -8.190e-02, -1.775e-01, 3.517e-03, 3.532e-02, 7.561e-02, 4.706e-03, 4.358e-02, -7.764e-02, 8.520e-02, -1.015e-02)); + r += mul(s1_7, M4(-1.611e-02, 2.476e-01, -2.432e-02, -1.583e-02, -4.018e-02, 1.071e-01, 8.575e-02, 3.484e-02, 1.520e-01, 4.716e-03, -2.497e-02, 1.696e-01, -1.108e-01, -4.485e-02, 4.509e-02, -4.546e-02)); + r += mul(s1_8, M4(-6.576e-03, -9.228e-02, 1.373e-01, -9.949e-03, -7.073e-03, 4.201e-02, 2.402e-02, -4.963e-02, -6.371e-03, 9.453e-02, -2.234e-02, 1.741e-02, -9.118e-02, 1.014e-01, 3.908e-03, -4.366e-02)); + r += mul(s2_0, M4(6.863e-02, 3.414e-02, -2.653e-02, 2.633e-02, 1.998e-01, -2.907e-02, 1.251e-01, -7.200e-02, 7.589e-02, -1.495e-03, -1.100e-02, 9.860e-02, 1.296e-02, -9.353e-02, 2.282e-01, -1.075e-01)); + r += mul(s2_1, M4(-6.242e-02, -1.099e-01, 3.064e-02, 1.066e-01, -2.413e-03, -1.544e-01, -3.160e-02, -1.192e-01, -5.256e-02, -1.040e-01, -5.898e-02, 1.146e-02, -1.371e-01, 1.769e-01, 1.877e-01, -7.063e-02)); + r += mul(s2_2, M4(6.056e-03, 1.196e-01, 6.621e-03, 5.906e-02, 1.004e-02, 7.377e-02, -4.564e-02, 1.033e-01, 4.088e-02, 6.371e-02, 4.876e-02, -9.700e-02, 6.133e-02, 3.265e-03, 9.627e-02, -4.853e-02)); + r += mul(s2_3, M4(-8.618e-02, 2.361e-02, 2.306e-02, 2.338e-02, 1.455e-01, 2.213e-02, 2.779e-01, -1.930e-01, 1.413e-01, 3.542e-02, -1.063e-01, -2.486e-02, 1.034e-01, -1.830e-01, -3.106e-02, -2.160e-01)); + r += mul(s2_4, M4(-1.389e-02, 8.223e-02, 1.201e-01, 1.706e-01, -1.186e-01, -1.825e-01, 9.056e-03, 7.458e-02, -4.595e-02, -1.092e-01, 1.343e-01, -2.154e-02, -2.586e-01, -1.892e-03, -3.889e-03, 5.850e-03)); + r += mul(s2_5, M4(-1.136e-01, 1.982e-02, 3.331e-04, -3.975e-02, 1.881e-03, -3.929e-02, 9.770e-02, 8.461e-02, -1.051e-01, 3.264e-03, -2.358e-02, 5.921e-02, 2.493e-02, 4.953e-02, 7.066e-02, -3.072e-02)); + r += mul(s2_6, M4(1.886e-02, 1.277e-02, 3.918e-02, 7.419e-02, -6.785e-02, 3.313e-02, -1.838e-02, 2.983e-02, 5.504e-02, 2.602e-02, -6.254e-02, 3.636e-02, 1.083e-01, -1.801e-01, -8.274e-02, 1.667e-02)); + r += mul(s2_7, M4(7.890e-02, -7.701e-02, -9.203e-02, 8.960e-02, -3.452e-02, 1.039e-01, -1.697e-01, 1.136e-01, -3.416e-02, 8.475e-02, -9.041e-02, 9.931e-03, 8.038e-02, -1.292e-01, -4.484e-03, 2.517e-01)); + r += mul(s2_8, M4(-1.775e-03, 1.181e-01, 2.082e-02, 5.564e-02, 1.352e-01, 1.749e-01, -2.519e-02, -9.326e-02, 1.243e-02, -3.042e-03, -3.586e-02, -1.242e-02, -1.886e-02, 7.223e-02, 4.050e-03, -1.565e-01)); + r += mul(s3_0, M4(-1.018e-01, 1.416e-02, -1.012e-01, -2.921e-01, -3.719e-03, 4.453e-02, 6.208e-02, 6.302e-02, -2.990e-02, -5.834e-02, 2.999e-02, -5.499e-02, 1.150e-02, -1.652e-02, -4.041e-02, -4.852e-02)); + r += mul(s3_1, M4(-1.862e-01, -6.003e-02, 6.366e-03, -1.071e-01, 9.639e-02, 5.844e-03, 9.584e-02, -5.846e-03, 8.865e-02, -1.776e-01, -1.649e-01, -4.859e-03, -4.392e-02, -7.421e-02, 2.612e-03, -3.602e-02)); + r += mul(s3_2, M4(-1.813e-01, 1.274e-01, -7.660e-02, -1.141e-01, -7.542e-03, -1.065e-02, 9.674e-03, 2.061e-02, -6.439e-03, -1.230e-02, -1.123e-01, -5.417e-02, 3.718e-02, -6.594e-02, 5.417e-02, 6.099e-02)); + r += mul(s3_3, M4(-9.982e-02, 1.328e-01, -6.842e-02, 7.849e-03, 8.076e-02, 6.480e-02, 3.816e-02, -4.609e-02, -5.017e-02, 2.053e-02, -1.976e-02, -8.944e-02, 1.305e-01, -9.935e-02, 5.136e-02, -8.934e-02)); + r += mul(s3_4, M4(2.970e-02, 2.572e-01, 2.418e-01, -2.262e-02, -5.754e-02, -5.101e-02, 1.061e-01, 1.150e-02, 1.246e-01, -5.406e-02, -1.217e-01, -5.303e-02, -2.295e-03, 1.318e-01, 1.894e-01, -2.032e-01)); + r += mul(s3_5, M4(-7.984e-02, -2.415e-02, 5.613e-03, -2.421e-01, -1.214e-02, -2.134e-01, -6.162e-02, 2.134e-02, -1.086e-01, -1.858e-01, 4.495e-02, -2.550e-02, 5.153e-02, 6.359e-02, -7.375e-03, -6.049e-02)); + r += mul(s3_6, M4(-5.793e-02, 1.419e-02, -6.912e-02, 3.221e-02, -5.328e-02, 4.926e-02, 4.026e-02, 6.780e-02, -3.927e-02, 5.506e-02, 5.599e-02, -5.676e-02, 1.683e-02, -1.534e-02, -3.519e-02, -7.504e-02)); + r += mul(s3_7, M4(3.167e-02, -1.083e-01, 1.771e-01, 7.894e-02, 7.396e-03, -7.355e-02, -7.834e-02, -1.662e-01, -1.075e-01, 1.908e-01, -4.130e-02, -1.544e-01, 4.089e-02, 2.716e-02, 4.991e-02, 1.006e-01)); + r += mul(s3_8, M4(8.130e-02, -2.307e-02, 2.258e-01, -1.702e-01, -5.174e-03, 7.052e-02, -1.868e-02, -2.389e-02, 1.193e-01, -5.332e-02, 2.500e-02, 7.811e-02, 1.416e-02, 7.437e-02, 1.395e-02, -1.400e-01)); + r += mul(s4_0, M4(4.121e-02, -2.477e-02, 3.316e-02, -5.069e-02, 1.654e-02, 2.731e-02, -5.538e-02, 1.603e-02, 7.782e-02, 6.690e-02, -6.802e-02, -9.293e-02, -1.002e-02, 1.889e-02, -1.141e-01, -3.351e-02)); + r += mul(s4_1, M4(-5.217e-02, 6.161e-02, -4.193e-02, -6.295e-02, 9.101e-04, 1.352e-01, -1.879e-02, -5.071e-02, 1.265e-01, 2.192e-02, -1.221e-03, -6.355e-02, 4.557e-02, -5.211e-04, -5.280e-02, -1.080e-03)); + r += mul(s4_2, M4(8.481e-02, 7.269e-03, 7.575e-04, 7.683e-02, 3.219e-02, 2.291e-02, -1.431e-02, 6.218e-03, 6.404e-02, -1.347e-03, -1.045e-01, -5.670e-02, -9.829e-02, -1.256e-01, -6.270e-02, -3.812e-02)); + r += mul(s4_3, M4(-2.407e-01, -4.111e-02, 1.126e-01, 4.934e-02, -3.288e-02, 8.998e-02, -7.661e-02, -4.144e-04, -3.361e-03, 1.834e-02, -1.180e-01, -3.153e-02, 4.357e-02, 5.519e-02, -4.810e-02, -1.510e-02)); + r += mul(s4_4, M4(-1.391e-01, -5.460e-02, 1.079e-01, -1.000e-01, -6.315e-02, 1.372e-01, -1.143e-01, 6.703e-02, 1.145e-01, -1.889e-02, 9.035e-02, 1.031e-01, 1.311e-01, 3.571e-02, 8.697e-02, 2.037e-03)); + r += mul(s4_5, M4(9.495e-02, 9.257e-02, 9.830e-02, 1.196e-03, 2.120e-02, -7.858e-02, 2.426e-02, 5.286e-02, 1.095e-01, -3.742e-02, -1.869e-02, 3.153e-02, -4.070e-02, -6.705e-02, 7.344e-02, -2.352e-02)); + r += mul(s4_6, M4(-1.948e-01, 1.040e-01, 1.133e-01, -5.870e-02, 4.856e-02, 5.025e-02, 3.938e-02, -3.379e-02, 4.228e-02, 1.662e-01, 5.011e-02, -4.837e-02, -2.938e-02, 2.523e-03, -2.985e-02, 2.952e-03)); + r += mul(s4_7, M4(1.069e-01, -1.289e-02, -1.304e-01, 1.352e-01, 5.513e-02, -6.971e-02, -5.266e-03, 9.616e-02, 2.094e-01, -1.744e-02, 4.187e-02, 1.510e-01, 1.264e-01, 4.890e-03, -6.329e-02, -1.145e-02)); + r += mul(s4_8, M4(-1.590e-02, 3.200e-03, 8.619e-02, -1.428e-01, 8.215e-02, -9.859e-02, -4.675e-02, 2.689e-02, -3.777e-02, 1.753e-01, -7.441e-02, -3.221e-02, -2.031e-02, -1.153e-01, 1.363e-01, 1.844e-01)); + r += mul(s5_0, M4(1.515e-01, -2.016e-02, -5.047e-03, 3.562e-02, -1.864e-01, -1.531e-01, 1.729e-02, 5.759e-02, 8.454e-03, 3.906e-02, -5.451e-02, 1.320e-02, -6.558e-03, 9.826e-02, -1.302e-01, -6.306e-02)); + r += mul(s5_1, M4(-2.160e-02, 2.547e-02, 6.341e-02, -1.780e-02, 2.864e-03, -4.790e-02, 8.105e-02, -1.995e-01, -1.708e-03, 2.987e-02, 1.015e-02, -8.499e-02, -3.817e-02, 9.095e-02, -1.822e-01, -1.101e-02)); + r += mul(s5_2, M4(2.311e-02, -2.671e-02, 5.648e-02, 6.421e-02, -1.514e-01, -1.763e-01, -8.416e-02, 6.549e-02, -9.514e-02, 5.539e-02, -8.377e-04, 7.457e-02, -1.727e-01, -1.828e-01, -1.163e-01, -6.494e-02)); + r += mul(s5_3, M4(-2.060e-01, -7.085e-03, 7.469e-02, 4.907e-02, -2.527e-01, -2.608e-01, 5.313e-02, 1.760e-01, -8.158e-02, -7.513e-02, -4.557e-02, -8.827e-02, 2.459e-01, 2.116e-01, 4.177e-02, 9.706e-02)); + r += mul(s5_4, M4(-8.207e-02, 6.713e-02, -1.241e-01, -4.085e-02, -4.241e-01, -2.051e-01, -2.963e-02, -1.237e-01, 1.151e-02, 7.767e-02, 1.969e-01, -1.872e-01, 7.294e-04, -1.502e-01, 2.878e-01, 2.020e-01)); + r += mul(s5_5, M4(-1.524e-02, -8.226e-02, 1.199e-01, 9.689e-02, -3.195e-02, -5.830e-02, 3.958e-02, -1.260e-01, 1.386e-01, -2.399e-02, -1.258e-01, 5.275e-02, 1.007e-01, 4.434e-02, 1.313e-01, 2.186e-02)); + r += mul(s5_6, M4(-1.757e-01, -3.246e-02, -1.415e-02, 4.278e-02, -5.272e-02, 4.532e-02, 1.979e-02, 5.368e-03, -4.621e-02, 1.805e-02, 4.910e-02, 7.873e-02, -5.996e-02, 1.241e-01, -1.192e-01, 1.480e-01)); + r += mul(s5_7, M4(6.249e-02, -3.158e-02, -2.466e-02, -5.536e-02, -2.296e-01, -1.226e-01, 6.021e-02, 1.391e-01, -3.438e-02, -1.871e-02, -5.718e-02, 9.415e-02, 6.558e-02, 1.641e-02, -1.713e-01, -2.197e-01)); + r += mul(s5_8, M4(-3.256e-03, -3.506e-02, 1.623e-01, -6.274e-02, 7.654e-03, -1.939e-01, -4.765e-03, -1.295e-01, 9.933e-02, 2.003e-02, 9.965e-03, 5.707e-02, -2.183e-02, -2.713e-02, 1.215e-01, 9.745e-02)); + r += mul(s6_0, M4(-1.090e-01, -1.152e-01, 2.460e-03, 4.796e-02, 2.755e-02, 2.983e-02, 4.337e-02, 2.985e-02, 1.026e-01, 1.752e-02, 5.032e-02, -1.199e-01, 1.055e-01, -1.054e-01, 4.206e-02, 1.051e-01)); + r += mul(s6_1, M4(2.026e-01, -4.514e-02, -1.722e-01, 3.648e-02, -4.413e-02, 1.019e-01, 2.769e-02, -8.508e-02, 4.680e-02, 7.041e-02, -2.414e-02, 1.759e-02, -2.522e-02, 8.350e-02, -2.077e-02, 6.958e-03)); + r += mul(s6_2, M4(-3.116e-02, -1.302e-01, -1.195e-02, 2.560e-02, -9.033e-02, -5.811e-02, 3.653e-02, -3.877e-02, 6.517e-02, 2.153e-01, 1.416e-01, 1.106e-01, -1.300e-02, 1.685e-01, 2.437e-03, 1.604e-04)); + r += mul(s6_3, M4(1.487e-01, 4.270e-02, 1.526e-01, 6.293e-02, -1.350e-01, -2.366e-01, -7.172e-02, 5.710e-02, -2.211e-02, -4.022e-02, 4.349e-02, 2.249e-03, 2.711e-01, 4.068e-02, -2.228e-01, 1.218e-01)); + r += mul(s6_4, M4(-1.098e-02, -4.339e-02, 1.202e-01, -8.716e-02, -2.145e-01, -1.815e-01, -1.335e-01, -3.087e-02, -1.077e-01, -1.044e-02, -6.819e-02, -8.384e-02, 1.648e-02, 5.418e-02, 6.638e-02, -1.989e-01)); + r += mul(s6_5, M4(3.548e-02, 1.502e-01, -1.120e-02, -1.836e-02, -6.830e-02, 5.039e-02, 7.547e-02, -7.908e-02, -4.105e-02, -2.698e-01, -2.745e-02, -6.625e-02, 4.007e-02, -8.148e-02, -1.544e-01, 5.530e-02)); + r += mul(s6_6, M4(9.983e-02, -5.071e-02, 9.065e-02, -4.156e-02, -6.846e-02, -6.359e-02, 2.337e-02, -2.178e-01, 3.106e-02, -4.579e-02, 1.234e-01, -7.015e-02, 7.251e-02, -1.132e-01, 3.569e-02, 6.967e-02)); + r += mul(s6_7, M4(1.498e-01, -2.674e-02, -2.983e-02, -6.621e-02, 1.042e-02, -7.544e-02, -8.134e-03, 1.430e-01, 5.768e-02, -1.481e-02, 1.103e-02, 5.662e-02, -4.365e-03, 4.097e-02, 1.368e-01, 8.332e-02)); + r += mul(s6_8, M4(7.457e-02, -3.248e-02, -8.708e-02, -9.401e-02, 1.810e-02, -6.809e-02, 8.030e-02, -2.026e-01, 2.086e-02, 1.320e-02, -2.712e-03, 7.659e-02, 5.204e-02, 8.754e-02, 1.649e-01, 1.309e-01)); + r += mul(s7_0, M4(-3.163e-02, 3.620e-02, -3.094e-02, 1.678e-01, 5.136e-02, 9.350e-02, -1.322e-02, 6.079e-04, 1.147e-01, -1.646e-01, 5.951e-02, -6.937e-02, 4.531e-02, 4.423e-03, -1.073e-01, 2.816e-02)); + r += mul(s7_1, M4(6.097e-02, -3.721e-03, 1.297e-01, 2.043e-03, 4.694e-02, 7.640e-02, -6.045e-02, 9.431e-02, 4.080e-02, 4.768e-02, -1.154e-01, -6.744e-02, 2.081e-03, 7.172e-03, 5.627e-02, 9.120e-03)); + r += mul(s7_2, M4(-3.928e-02, -5.564e-03, 4.455e-02, 3.625e-02, -3.834e-02, -1.259e-02, -2.303e-02, -5.317e-02, 2.345e-02, -2.036e-01, -5.532e-02, -1.497e-02, -2.768e-03, 1.263e-02, -8.531e-02, -7.579e-04)); + r += mul(s7_3, M4(8.706e-03, -5.076e-02, -1.403e-01, -7.356e-02, -4.071e-02, 1.831e-01, -3.623e-02, 9.057e-02, 5.028e-02, 4.501e-02, 2.522e-02, -9.645e-02, 1.070e-01, 1.581e-02, 2.683e-02, 1.828e-02)); + r += mul(s7_4, M4(-4.991e-02, 7.480e-02, 4.730e-02, 7.487e-02, -3.904e-02, 5.020e-02, 1.929e-01, 2.029e-01, -2.223e-02, -6.129e-02, -1.038e-01, -8.008e-02, 1.022e-01, -2.192e-02, 1.285e-01, 3.667e-02)); + r += mul(s7_5, M4(-2.875e-02, -2.168e-02, -9.253e-02, 1.754e-02, 2.110e-01, 1.509e-01, 2.660e-02, 2.250e-01, 4.431e-02, -3.156e-01, -1.295e-01, 7.250e-02, -4.291e-02, -9.290e-02, 6.327e-02, 2.415e-02)); + r += mul(s7_6, M4(-5.187e-02, -8.576e-02, 4.336e-02, 9.868e-02, 6.822e-02, 9.902e-02, 1.039e-02, 4.301e-02, -5.396e-02, 1.798e-04, 1.348e-01, -7.698e-02, 2.397e-02, -7.087e-03, -4.507e-02, -8.344e-02)); + r += mul(s7_7, M4(-2.913e-02, 6.067e-02, 4.437e-02, -7.690e-02, -1.153e-01, -1.310e-02, 6.175e-02, 1.078e-01, 8.214e-02, 1.278e-01, -2.377e-01, -1.081e-01, 8.874e-02, -7.998e-02, 2.247e-02, -1.505e-02)); + r += mul(s7_8, M4(-6.634e-02, 1.412e-01, 3.046e-02, 2.276e-02, 6.834e-02, -1.809e-02, -2.036e-02, 1.500e-02, 1.822e-02, 1.063e-01, -9.810e-02, -6.572e-03, -8.266e-02, 1.112e-02, 1.025e-01, -3.403e-02)); + r += V4(-6.306e-03, 8.006e-03, 6.571e-03, 1.262e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.412e-02, 3.051e-02, 8.225e-02, -1.024e-01, 7.559e-02, -1.081e-01, 2.765e-02, -9.220e-03, -8.263e-02, -1.271e-01, 2.911e-02, -4.563e-02, -1.370e-03, 1.308e-02, -5.000e-02, -8.016e-02)); + r += mul(s0_1, M4(-1.237e-01, 3.243e-02, -2.655e-03, -2.804e-02, 1.442e-01, -7.090e-02, -1.282e-01, -8.525e-02, -3.692e-02, -3.291e-02, 4.044e-02, -2.841e-02, 9.882e-02, -1.408e-01, 7.823e-02, 7.577e-02)); + r += mul(s0_2, M4(-4.266e-03, -1.042e-01, -2.781e-03, 1.189e-01, -4.019e-02, 4.284e-02, 3.647e-02, -3.917e-02, 2.519e-02, 2.713e-02, -2.684e-02, -5.395e-02, -3.944e-02, 1.009e-01, 1.614e-02, -3.454e-02)); + r += mul(s0_3, M4(5.231e-02, -1.247e-03, 4.723e-02, 3.690e-02, -3.373e-02, -8.962e-02, 6.245e-02, 7.159e-02, -9.852e-03, -1.045e-01, -3.930e-02, -7.045e-02, 3.296e-03, -2.380e-02, -6.767e-02, 1.425e-02)); + r += mul(s0_4, M4(-5.032e-02, 1.616e-01, 2.793e-02, -6.640e-04, -4.264e-02, -1.984e-03, 7.180e-02, -1.646e-01, -1.706e-03, 1.785e-01, 1.815e-01, 7.529e-02, -1.628e-02, -9.082e-02, -8.031e-04, 2.452e-02)); + r += mul(s0_5, M4(-5.703e-02, -8.024e-02, 7.129e-02, -4.083e-02, 5.158e-03, -2.297e-02, 3.771e-02, 9.569e-03, -1.159e-01, 1.244e-03, 1.017e-01, 1.623e-02, -3.491e-02, 4.120e-02, -2.884e-02, 5.029e-02)); + r += mul(s0_6, M4(-1.580e-02, -3.969e-02, 6.006e-03, 4.013e-02, -7.220e-02, -2.069e-02, 1.059e-01, -3.396e-02, 1.080e-03, -4.293e-02, -2.114e-02, 9.736e-02, -1.408e-02, -1.083e-02, -1.794e-01, 1.529e-02)); + r += mul(s0_7, M4(-8.711e-03, -6.727e-02, 1.151e-02, 8.747e-02, 1.200e-01, 4.677e-02, -2.146e-02, -5.483e-02, -1.362e-01, 7.609e-02, -6.761e-02, 2.470e-02, -1.201e-02, 2.028e-02, -5.247e-02, 2.490e-02)); + r += mul(s0_8, M4(-1.624e-01, -1.219e-02, 8.491e-03, -1.459e-02, -6.650e-02, 5.021e-02, 8.549e-02, 1.407e-01, -4.661e-02, -1.431e-02, -8.125e-02, -1.043e-01, 5.141e-02, 8.380e-02, 3.613e-02, 7.793e-02)); + r += mul(s1_0, M4(1.073e-02, 2.244e-02, 6.590e-02, -5.961e-02, -2.497e-02, 1.584e-02, 5.910e-02, -5.007e-02, 3.457e-03, -2.497e-02, 2.563e-02, -1.448e-02, 4.348e-02, -1.611e-01, 3.845e-02, -1.537e-02)); + r += mul(s1_1, M4(-2.009e-01, -3.046e-03, -9.702e-02, -1.795e-03, 2.785e-02, -3.820e-02, -7.666e-02, -6.119e-02, 8.082e-02, 2.343e-02, 3.437e-02, -2.124e-02, 4.315e-03, -3.716e-02, -5.562e-02, -1.339e-01)); + r += mul(s1_2, M4(-1.177e-01, -6.318e-02, 1.391e-01, 1.453e-01, -5.396e-02, 2.690e-02, -6.522e-02, -8.111e-02, 3.221e-02, 4.900e-02, -4.916e-02, -1.069e-01, -4.759e-02, -8.181e-02, -4.537e-02, 1.752e-02)); + r += mul(s1_3, M4(1.239e-01, 3.868e-03, 4.798e-02, 6.149e-02, -7.047e-02, -8.473e-02, -8.423e-02, 3.387e-02, -8.123e-02, -1.806e-02, -4.482e-03, -6.674e-02, 1.751e-01, -1.346e-01, -7.599e-02, -3.724e-02)); + r += mul(s1_4, M4(2.541e-02, 2.182e-01, -1.363e-01, 2.653e-01, -2.468e-01, -8.282e-03, -3.059e-02, -1.982e-01, 1.033e-01, -1.830e-02, 2.651e-01, 1.480e-01, 1.894e-01, 1.038e-01, -1.743e-01, -1.837e-02)); + r += mul(s1_5, M4(5.579e-02, 2.130e-02, 1.556e-01, -1.163e-02, -1.242e-01, -2.948e-02, 1.057e-01, 8.414e-02, -8.984e-02, 4.301e-02, 4.905e-02, 1.295e-01, 5.426e-02, 2.003e-01, -5.208e-03, 2.143e-02)); + r += mul(s1_6, M4(6.868e-02, 6.985e-02, 2.228e-02, 7.773e-02, -6.927e-02, 7.728e-02, 2.786e-02, -3.791e-02, -5.486e-02, -3.999e-02, -1.448e-02, 3.122e-02, 2.140e-01, -1.048e-02, -3.925e-02, -9.152e-02)); + r += mul(s1_7, M4(-7.144e-02, -1.071e-01, 3.923e-02, 1.559e-01, -2.798e-02, -2.728e-02, -4.025e-02, -3.240e-02, -6.226e-03, 3.900e-02, -3.820e-02, 6.497e-02, 1.878e-01, 1.847e-02, -7.449e-02, -1.152e-02)); + r += mul(s1_8, M4(1.180e-01, -7.984e-03, 1.227e-02, 1.984e-01, -1.984e-04, 3.139e-03, -8.780e-02, -4.568e-02, -5.782e-03, 1.962e-02, -1.174e-02, 7.476e-02, 5.276e-02, -5.013e-02, 8.049e-02, -3.299e-02)); + r += mul(s2_0, M4(6.274e-02, -3.757e-02, 4.032e-03, -2.051e-02, 1.837e-03, -1.974e-01, 7.678e-02, -3.274e-01, 5.467e-02, -1.360e-01, -9.023e-03, -2.604e-02, -2.287e-01, -2.250e-01, 1.458e-01, -9.307e-02)); + r += mul(s2_1, M4(3.460e-02, -2.941e-02, -6.582e-02, 6.535e-04, 2.547e-02, -6.687e-02, 1.538e-01, -1.264e-01, -5.819e-02, -8.358e-02, -1.514e-01, -1.045e-01, -8.542e-02, -2.199e-02, -4.406e-02, -3.557e-02)); + r += mul(s2_2, M4(4.431e-02, 5.701e-02, 5.174e-02, 5.303e-02, -1.493e-01, 2.189e-02, -5.680e-02, -5.502e-02, -2.864e-02, 2.450e-02, -2.843e-02, 3.108e-02, 1.450e-01, -8.313e-02, 2.644e-02, 5.104e-03)); + r += mul(s2_3, M4(1.122e-02, -2.386e-02, 1.068e-01, -1.310e-01, 1.776e-01, -2.188e-02, 8.302e-02, 4.588e-02, 1.062e-01, 3.849e-02, 7.865e-03, -3.305e-02, -1.519e-01, 2.451e-01, 1.687e-01, -2.046e-01)); + r += mul(s2_4, M4(-6.623e-02, 3.129e-02, -4.648e-02, 3.467e-02, -1.850e-01, 8.717e-02, -1.732e-01, -1.963e-02, 9.305e-02, 5.430e-02, -5.825e-02, 7.086e-02, 4.960e-02, 1.532e-01, 1.190e-01, -6.810e-02)); + r += mul(s2_5, M4(1.002e-02, 4.163e-02, 9.688e-02, -5.312e-02, -8.392e-02, -1.959e-02, -3.397e-02, -5.165e-02, 4.046e-02, 6.424e-02, -6.193e-02, 1.590e-01, -8.976e-02, -5.310e-02, 3.080e-02, 1.688e-02)); + r += mul(s2_6, M4(1.002e-01, 1.196e-01, 1.772e-02, 1.004e-01, -4.129e-02, 4.581e-02, 1.411e-01, -5.263e-02, 7.226e-02, 1.399e-03, -4.616e-02, 2.784e-02, 1.522e-01, 1.689e-01, 5.797e-02, -1.318e-01)); + r += mul(s2_7, M4(-1.265e-01, 1.050e-02, -1.032e-01, 9.369e-02, -9.171e-02, -1.025e-01, 7.834e-04, 8.201e-02, -1.705e-02, -9.497e-02, 6.001e-03, -3.667e-03, -8.255e-02, 1.664e-01, -1.120e-01, -7.231e-02)); + r += mul(s2_8, M4(-2.385e-02, -2.511e-02, -7.844e-02, 5.658e-02, -2.686e-01, -3.069e-02, -1.796e-01, -9.825e-02, 2.015e-02, -4.184e-02, -1.065e-02, 4.393e-02, -2.244e-02, -2.192e-02, 1.323e-01, 7.817e-02)); + r += mul(s3_0, M4(-4.813e-02, 1.033e-01, -1.239e-01, 6.212e-02, 1.533e-01, -2.186e-02, -2.467e-02, 3.213e-02, -1.298e-01, -1.125e-01, -9.258e-02, -1.515e-01, -7.075e-02, -2.428e-03, 7.884e-02, 6.828e-03)); + r += mul(s3_1, M4(5.317e-02, -3.419e-02, 1.167e-01, 9.786e-02, 2.063e-01, -3.340e-02, -9.822e-02, -5.292e-02, -1.874e-02, -2.924e-02, -5.514e-02, -1.358e-01, 9.802e-02, 3.840e-02, 1.669e-01, 3.956e-02)); + r += mul(s3_2, M4(4.065e-02, 2.140e-01, -8.572e-02, 1.068e-01, 7.986e-03, -3.785e-02, -1.737e-01, -1.523e-01, -7.119e-02, 7.340e-02, -2.244e-03, -2.892e-01, -5.133e-02, -6.415e-02, 1.915e-02, 6.207e-02)); + r += mul(s3_3, M4(5.088e-02, -1.354e-02, 1.215e-01, -1.009e-01, 1.794e-02, -2.970e-02, -4.194e-02, 1.087e-02, 8.814e-02, 3.610e-02, 1.164e-01, -1.416e-03, 7.457e-02, -1.265e-01, 6.800e-02, -5.186e-02)); + r += mul(s3_4, M4(2.519e-01, -9.643e-02, 1.317e-01, 1.881e-01, -7.010e-02, 1.428e-01, -2.471e-01, 8.716e-02, 5.371e-02, -4.401e-03, 1.313e-01, 2.013e-01, -3.015e-03, -7.544e-02, 8.619e-03, -9.775e-02)); + r += mul(s3_5, M4(-1.297e-02, 4.165e-03, -7.193e-03, 1.914e-02, -7.983e-02, -1.689e-02, -5.825e-02, 1.628e-02, -1.490e-01, -8.353e-02, -2.394e-02, 2.999e-01, 3.445e-02, -2.139e-02, -1.960e-01, -1.962e-02)); + r += mul(s3_6, M4(1.288e-01, 3.710e-02, 3.362e-02, 6.824e-02, 6.457e-02, 4.753e-02, -9.122e-03, 1.365e-01, -1.270e-01, -9.739e-02, -4.656e-02, -4.114e-02, -9.094e-02, 5.110e-02, 1.864e-02, 8.357e-02)); + r += mul(s3_7, M4(-5.630e-02, 4.449e-02, -1.477e-02, 2.891e-02, -5.712e-02, -6.406e-02, -9.827e-02, 1.897e-01, -4.576e-02, -1.096e-02, 1.091e-01, 1.543e-01, -2.284e-02, 7.189e-02, -7.863e-02, 3.536e-02)); + r += mul(s3_8, M4(1.807e-02, -2.432e-02, 1.106e-01, 1.279e-01, -7.728e-02, -3.835e-02, -3.217e-02, 4.699e-02, 3.170e-02, 1.835e-01, -8.757e-02, -1.042e-01, 2.634e-02, 6.942e-02, 8.071e-02, 9.839e-02)); + r += mul(s4_0, M4(-1.538e-01, -2.602e-02, 1.891e-02, -8.224e-03, -1.816e-01, 1.599e-01, 1.674e-01, 1.331e-02, 3.443e-02, 1.460e-01, 1.016e-01, -3.254e-02, 4.689e-02, -5.821e-02, -1.509e-01, 9.806e-02)); + r += mul(s4_1, M4(-1.019e-01, -9.918e-02, 6.636e-02, -1.472e-01, -6.238e-03, -1.363e-02, 1.981e-02, -1.289e-01, 7.159e-02, -1.360e-02, 5.624e-02, 4.866e-03, 1.200e-01, -3.857e-02, 8.216e-02, 1.323e-02)); + r += mul(s4_2, M4(-6.875e-02, -7.830e-03, -1.558e-02, 4.815e-02, 5.018e-02, 4.960e-02, 3.086e-02, 4.705e-02, -8.153e-02, -9.493e-02, 4.873e-02, 8.447e-02, -2.078e-01, -7.069e-02, -3.925e-02, -2.010e-03)); + r += mul(s4_3, M4(-7.266e-02, -1.724e-01, -1.151e-01, -8.214e-02, -8.020e-02, 5.829e-03, 5.332e-02, 9.192e-03, 7.477e-02, 1.010e-01, 5.775e-02, 5.201e-02, 4.840e-02, -3.975e-02, -1.657e-01, 3.806e-02)); + r += mul(s4_4, M4(-1.093e-01, -1.978e-01, 5.909e-03, -1.565e-02, 6.150e-02, -2.998e-02, -6.659e-02, 2.187e-02, -1.034e-01, 1.714e-02, -5.031e-02, 2.722e-02, 1.045e-01, -1.683e-01, 7.837e-02, -6.727e-02)); + r += mul(s4_5, M4(-8.220e-02, -7.769e-02, 7.054e-02, -3.796e-03, -4.555e-02, 6.034e-03, -1.139e-01, -1.368e-02, -5.277e-02, 1.387e-02, -7.393e-02, 1.994e-02, 1.304e-02, -1.483e-01, 2.206e-02, 7.279e-02)); + r += mul(s4_6, M4(-1.813e-02, -2.197e-02, 3.052e-02, 6.914e-02, 2.676e-02, -2.537e-02, -6.034e-02, 5.296e-02, 6.004e-02, -6.212e-02, 1.521e-01, 1.622e-01, 1.602e-03, -7.222e-02, -1.101e-02, 2.386e-02)); + r += mul(s4_7, M4(-9.607e-02, 9.544e-03, 5.164e-02, 1.002e-01, -1.144e-01, -1.255e-01, -6.142e-02, 6.983e-02, 1.422e-02, -1.975e-02, -2.548e-01, 7.490e-02, 2.403e-03, -5.085e-02, 5.349e-02, -6.616e-02)); + r += mul(s4_8, M4(-5.790e-02, -8.023e-03, 8.465e-02, 2.406e-02, 1.545e-02, 1.657e-01, -1.985e-01, 4.316e-03, 6.002e-02, -3.890e-02, 7.282e-02, 9.914e-02, 1.402e-02, -7.114e-02, 1.408e-02, -1.331e-02)); + r += mul(s5_0, M4(-3.207e-02, -4.764e-02, -1.063e-02, -5.365e-02, -2.986e-01, -7.219e-02, 1.914e-02, 2.135e-02, 1.244e-02, 1.220e-01, 3.385e-02, 8.071e-03, -4.089e-03, -6.386e-02, -6.054e-02, 9.283e-02)); + r += mul(s5_1, M4(-8.330e-02, -5.058e-02, 3.033e-02, -1.244e-01, 6.578e-03, -1.420e-01, -2.040e-01, 9.333e-02, -2.393e-03, -3.761e-02, -8.599e-02, -6.285e-02, -7.790e-02, -1.335e-01, 2.028e-02, 1.926e-02)); + r += mul(s5_2, M4(7.877e-02, -8.371e-02, -1.002e-02, -5.710e-02, -1.489e-02, 2.493e-02, -5.286e-02, -1.145e-01, -2.937e-02, 2.563e-02, -1.184e-03, -6.418e-02, -1.025e-01, -2.766e-02, 5.943e-02, -8.713e-02)); + r += mul(s5_3, M4(1.068e-01, -9.158e-02, 1.896e-03, 2.792e-02, -8.789e-02, -1.465e-01, -1.578e-01, 8.613e-02, -1.861e-01, -2.352e-03, -7.249e-02, 2.557e-02, -1.146e-02, -1.058e-01, -1.144e-01, 3.150e-02)); + r += mul(s5_4, M4(-4.093e-02, -1.068e-01, -1.299e-01, -6.073e-02, -7.316e-02, -3.338e-02, -2.007e-01, 1.125e-01, -1.403e-02, -3.370e-02, 7.929e-02, 8.132e-03, -2.240e-02, -9.686e-02, -7.760e-02, -2.562e-01)); + r += mul(s5_5, M4(-1.838e-02, -1.478e-01, 8.911e-02, -9.288e-02, -9.564e-02, 4.152e-02, 5.487e-03, 2.782e-02, 1.532e-05, -6.229e-02, 1.078e-01, -1.351e-01, 8.619e-02, -1.287e-01, 6.642e-02, 6.440e-02)); + r += mul(s5_6, M4(1.045e-01, 5.052e-02, 6.712e-02, -2.107e-02, -9.970e-02, -5.605e-02, -8.618e-02, -1.237e-01, -1.632e-02, -3.516e-02, 9.641e-02, -3.554e-02, 2.565e-02, -1.265e-01, -3.317e-02, 1.816e-01)); + r += mul(s5_7, M4(-1.556e-01, -1.894e-01, 8.868e-02, 1.173e-01, -6.388e-02, -4.323e-02, -4.345e-02, -1.665e-01, -5.893e-02, -1.059e-01, -1.366e-01, 1.528e-01, -6.229e-02, -2.086e-02, -5.327e-02, -4.549e-02)); + r += mul(s5_8, M4(8.045e-03, 4.800e-02, 5.039e-02, 7.994e-02, -1.237e-03, 2.920e-02, -9.876e-02, -1.288e-01, -5.698e-03, -5.525e-02, -3.574e-02, 9.175e-03, 6.371e-02, -1.240e-02, 1.351e-01, 6.024e-02)); + r += mul(s6_0, M4(2.828e-02, 1.965e-02, 1.490e-01, 2.001e-02, -4.527e-02, -1.522e-02, -6.560e-02, 1.899e-02, -7.161e-04, 3.954e-02, 2.182e-02, -8.346e-02, 5.627e-02, 7.930e-02, -4.601e-02, 8.760e-02)); + r += mul(s6_1, M4(8.574e-02, -3.002e-02, -2.937e-02, 1.063e-01, 2.496e-02, -9.862e-02, -1.770e-01, -1.110e-01, -9.144e-02, 1.810e-02, -1.744e-01, -5.627e-02, 1.310e-01, 4.031e-03, -1.585e-01, -4.121e-02)); + r += mul(s6_2, M4(9.241e-02, -2.275e-02, -1.309e-02, 7.213e-02, 1.015e-01, -1.783e-02, -7.431e-03, -5.384e-03, 2.948e-02, 6.340e-03, 3.754e-02, -5.846e-02, 1.207e-01, 1.802e-02, -1.990e-02, 3.674e-02)); + r += mul(s6_3, M4(-1.832e-02, -6.422e-02, 4.143e-02, -1.453e-01, -9.265e-02, 5.180e-02, 9.589e-02, -5.447e-03, 5.748e-03, -7.328e-02, -9.024e-03, 3.179e-02, -1.236e-01, -4.200e-02, 2.265e-01, -2.848e-02)); + r += mul(s6_4, M4(-7.733e-02, -7.612e-03, 6.791e-02, 2.708e-01, -1.297e-01, -3.706e-02, 5.483e-03, -1.733e-01, 2.083e-02, -7.664e-02, -1.085e-02, 6.393e-02, -1.301e-01, -7.315e-02, 6.778e-02, 5.425e-02)); + r += mul(s6_5, M4(1.491e-01, -4.559e-02, -1.056e-01, 9.649e-02, -1.003e-01, -7.416e-02, -1.937e-01, -2.114e-01, -1.697e-02, 2.611e-02, -3.463e-02, 1.848e-01, -1.343e-01, -6.746e-02, 1.171e-01, 1.465e-01)); + r += mul(s6_6, M4(1.543e-02, 5.129e-02, 1.042e-01, -7.938e-02, 3.628e-03, 2.461e-02, 1.371e-01, -4.618e-02, -9.957e-02, 8.025e-02, 6.072e-02, -3.525e-02, -9.659e-03, 9.961e-02, -6.593e-02, -1.723e-01)); + r += mul(s6_7, M4(9.083e-02, -1.791e-02, -1.005e-01, -1.208e-01, -2.040e-01, 9.235e-02, -2.770e-01, -3.079e-02, 1.300e-02, 1.283e-01, 4.077e-02, -3.577e-02, -1.172e-02, -1.946e-02, -8.694e-03, -1.229e-01)); + r += mul(s6_8, M4(1.806e-01, -1.792e-01, -3.185e-02, 2.441e-03, -3.422e-03, 8.922e-02, -4.029e-02, 6.716e-02, -3.862e-02, 8.746e-03, -4.267e-02, -1.570e-02, 1.449e-01, 2.626e-02, -7.646e-02, -5.130e-02)); + r += mul(s7_0, M4(-2.875e-02, 8.786e-02, 1.334e-02, 6.942e-02, 8.895e-02, -7.631e-02, -1.100e-01, 9.594e-02, 2.409e-02, -8.514e-02, -3.642e-02, -9.446e-02, -5.823e-02, 3.858e-02, -8.311e-02, 4.733e-02)); + r += mul(s7_1, M4(-3.546e-02, -3.038e-02, -2.878e-02, 4.740e-02, -8.561e-02, -1.373e-01, 8.296e-02, -1.290e-02, -3.045e-02, 1.038e-01, -2.055e-01, -2.954e-02, 3.776e-02, -5.471e-02, 6.002e-02, 7.293e-02)); + r += mul(s7_2, M4(6.057e-02, 1.546e-02, 5.669e-02, 3.663e-02, 4.085e-02, -2.098e-02, 7.776e-02, 6.614e-02, -1.542e-02, -9.302e-02, 5.701e-02, -9.272e-02, -1.250e-01, 4.578e-02, -8.437e-03, 9.115e-02)); + r += mul(s7_3, M4(6.185e-02, -2.202e-01, 7.967e-03, -9.857e-02, -2.723e-02, 1.841e-02, 1.273e-01, 3.254e-02, 2.590e-01, 8.602e-02, 9.159e-02, -4.988e-02, 3.447e-02, 2.103e-02, 6.784e-02, 3.961e-02)); + r += mul(s7_4, M4(2.385e-02, -7.818e-02, 1.619e-01, -3.209e-03, -9.151e-02, 1.217e-01, 6.772e-02, 1.509e-02, -2.823e-02, -8.978e-03, -1.980e-01, -1.466e-01, -9.524e-02, -5.529e-02, -7.337e-02, -6.094e-02)); + r += mul(s7_5, M4(2.174e-03, -5.178e-02, 6.162e-02, 4.149e-02, 7.712e-02, 5.177e-02, 5.302e-02, 3.632e-03, 2.082e-02, -7.131e-02, -1.286e-01, 8.588e-02, -7.042e-02, -7.842e-02, 8.844e-02, 9.351e-02)); + r += mul(s7_6, M4(-6.205e-02, 4.255e-02, -8.082e-02, -2.441e-02, -3.209e-02, -2.325e-02, 9.024e-02, 5.961e-02, 1.037e-03, -7.930e-02, 3.916e-02, 2.978e-02, -4.254e-02, -5.137e-03, 5.158e-02, -1.721e-01)); + r += mul(s7_7, M4(1.791e-02, 9.385e-02, -8.735e-02, -2.050e-02, -1.073e-01, -4.466e-03, 3.865e-02, 9.567e-02, -9.625e-03, -7.356e-03, -2.480e-01, 7.732e-04, -6.223e-02, -5.133e-02, -3.773e-02, -5.325e-02)); + r += mul(s7_8, M4(1.846e-01, 8.673e-02, -3.553e-02, -6.473e-02, -1.091e-02, 9.152e-02, 1.568e-01, 1.596e-01, 3.280e-03, 7.550e-02, -1.286e-01, -1.041e-01, 3.720e-02, 1.909e-02, 3.416e-02, 1.090e-01)); + r += V4(-1.363e-02, -2.900e-02, -2.210e-02, 1.111e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-7.126e-02, -7.247e-04, -3.122e-02, 2.116e-02, -3.723e-02, -9.954e-02, 2.972e-02, 4.943e-02, 8.003e-03, -2.690e-02, 7.984e-02, 6.263e-02, -8.211e-02, -6.764e-02, -4.135e-02, 6.039e-02)); + r += mul(s0_1, M4(5.830e-02, 1.285e-02, 7.099e-02, -4.560e-03, 8.228e-02, -1.186e-03, -2.813e-02, 3.480e-02, 5.724e-03, 1.300e-01, -1.130e-01, 9.393e-02, -1.396e-01, -1.024e-01, 1.050e-01, -3.689e-02)); + r += mul(s0_2, M4(-1.445e-01, -1.361e-02, -1.928e-03, -7.339e-02, 4.109e-02, 3.337e-02, -7.051e-02, 4.490e-02, 1.794e-02, 1.487e-01, 6.199e-02, -3.464e-02, 9.704e-03, 1.357e-01, -1.067e-01, -3.007e-03)); + r += mul(s0_3, M4(3.851e-02, 8.292e-02, 6.964e-02, 1.097e-01, -9.202e-03, -3.033e-02, 2.125e-02, -1.388e-01, 1.225e-01, 2.218e-01, -7.917e-02, 3.989e-02, 7.261e-02, 8.114e-02, 2.188e-02, 1.304e-01)); + r += mul(s0_4, M4(-7.015e-02, -1.509e-01, 3.573e-02, 9.922e-02, -1.031e-01, -1.975e-01, -1.503e-01, -1.347e-01, -1.262e-02, 9.282e-02, 1.539e-02, 9.161e-02, -1.169e-01, -9.168e-02, 1.710e-01, -3.346e-01)); + r += mul(s0_5, M4(1.647e-02, 4.881e-02, 1.414e-01, -6.869e-02, 6.553e-02, 1.547e-01, -1.020e-01, -5.219e-02, -8.433e-02, 4.309e-02, 1.166e-01, -1.088e-01, 1.023e-02, -5.849e-02, 1.620e-02, 8.377e-02)); + r += mul(s0_6, M4(-4.575e-02, 6.467e-02, 1.170e-03, -2.556e-02, 3.145e-02, 9.286e-03, 3.206e-02, -6.519e-02, -2.116e-02, -6.765e-02, 1.437e-01, -2.522e-02, 3.575e-02, 4.578e-02, 2.849e-02, -1.786e-02)); + r += mul(s0_7, M4(-4.970e-02, -1.295e-02, 6.483e-02, 1.023e-02, 3.229e-02, -1.275e-01, 1.493e-02, -1.851e-02, 2.568e-02, -1.378e-01, -1.731e-01, 7.308e-02, -1.196e-02, -1.353e-01, -5.414e-02, -5.738e-02)); + r += mul(s0_8, M4(-5.610e-02, 1.264e-02, -2.637e-02, -6.981e-03, -7.200e-02, -3.804e-02, -3.347e-02, -6.615e-02, -6.806e-02, 1.835e-02, 5.020e-03, -5.896e-02, -8.009e-03, -5.058e-02, -2.846e-02, -3.286e-03)); + r += mul(s1_0, M4(2.826e-02, -6.972e-02, 6.646e-02, -1.937e-02, -4.960e-02, 4.112e-02, -4.586e-03, 4.412e-02, -1.050e-01, 7.587e-02, -3.497e-02, 1.925e-02, -4.369e-02, -5.518e-03, -8.173e-02, 9.181e-04)); + r += mul(s1_1, M4(8.079e-02, 5.067e-02, -4.166e-02, -1.012e-01, -4.963e-02, 3.017e-01, -1.724e-02, 8.974e-03, -3.852e-02, 1.977e-01, -4.152e-02, 5.895e-03, -1.592e-01, -3.467e-02, -1.516e-01, -2.013e-02)); + r += mul(s1_2, M4(-8.313e-02, -8.997e-02, 3.377e-02, -2.411e-02, -2.017e-01, 1.865e-01, -5.694e-02, 8.816e-02, 1.376e-01, 1.049e-01, -2.350e-02, 4.271e-02, 9.899e-02, 6.745e-02, -3.975e-02, -6.863e-02)); + r += mul(s1_3, M4(5.925e-02, 1.559e-01, 6.156e-03, 4.048e-02, 1.250e-02, 2.398e-02, 8.804e-02, -4.237e-02, -4.170e-02, 4.149e-02, 2.181e-02, 7.136e-02, 2.892e-02, 8.658e-02, 2.268e-03, -2.417e-02)); + r += mul(s1_4, M4(-4.868e-02, 7.944e-02, -1.372e-01, -2.727e-02, 2.788e-02, -1.206e-01, 8.242e-02, -3.327e-02, -2.795e-01, 1.392e-01, 1.089e-01, 2.941e-02, -1.655e-01, 8.764e-03, 1.022e-01, -1.603e-01)); + r += mul(s1_5, M4(-2.580e-02, -2.588e-02, 4.601e-02, 8.667e-02, 1.813e-02, 3.697e-02, 1.031e-01, -8.340e-02, 5.900e-02, -5.842e-02, 3.073e-02, -6.901e-02, -8.921e-02, -1.443e-01, 9.465e-02, -5.144e-02)); + r += mul(s1_6, M4(-1.368e-02, -1.491e-01, 2.026e-02, 2.348e-02, 3.990e-02, -2.543e-02, -4.058e-02, -1.896e-03, 5.931e-02, -4.988e-02, 9.193e-02, 3.546e-02, -6.484e-02, -4.291e-02, -1.792e-01, -6.140e-02)); + r += mul(s1_7, M4(2.351e-02, 6.548e-02, -1.768e-01, -9.328e-02, -1.765e-02, 2.108e-03, -2.375e-01, -3.263e-02, 1.729e-01, -2.271e-01, -5.603e-02, 4.236e-02, 8.235e-02, 8.566e-02, 2.103e-01, -1.981e-02)); + r += mul(s1_8, M4(9.506e-02, 1.263e-02, 1.854e-02, 6.688e-04, -1.127e-02, 1.481e-03, 3.282e-02, -4.943e-02, 7.309e-02, -3.832e-02, -2.619e-02, 5.873e-03, 6.340e-02, -4.196e-02, -5.152e-03, -2.681e-02)); + r += mul(s2_0, M4(-7.027e-02, -6.243e-02, -1.227e-01, 4.001e-03, -5.670e-02, 1.637e-01, 1.078e-02, 1.421e-01, -9.280e-02, 8.196e-03, 4.920e-02, -1.188e-02, -7.051e-03, -1.646e-01, 1.017e-01, 9.108e-02)); + r += mul(s2_1, M4(5.477e-02, 9.244e-02, -7.607e-02, 8.935e-02, -4.048e-02, -1.733e-01, 1.030e-01, -2.324e-01, 1.824e-02, -6.173e-02, -9.619e-02, -1.130e-02, 4.611e-01, -5.878e-02, 9.265e-02, -2.637e-02)); + r += mul(s2_2, M4(-1.460e-01, 5.308e-02, -2.754e-02, 2.505e-02, -1.231e-01, 1.615e-01, -8.279e-02, -1.995e-02, -9.777e-02, -4.318e-02, -2.021e-02, 8.769e-02, -4.796e-02, 1.218e-01, -2.690e-02, -6.418e-02)); + r += mul(s2_3, M4(1.154e-02, 1.256e-01, 8.180e-02, 2.951e-02, -3.289e-02, 2.349e-02, 1.231e-01, 3.046e-01, 3.799e-02, 6.160e-02, -1.790e-01, 2.928e-02, 9.993e-02, -1.598e-02, 2.755e-02, 8.909e-02)); + r += mul(s2_4, M4(5.434e-02, 1.287e-02, -1.480e-01, -2.214e-02, 2.240e-01, -2.299e-02, 4.142e-02, 3.686e-01, -5.318e-02, -1.182e-02, 1.023e-01, 9.137e-02, -6.045e-02, 3.257e-01, -1.219e-01, -1.693e-01)); + r += mul(s2_5, M4(-5.925e-02, -3.942e-02, 2.518e-02, 1.538e-01, 9.416e-02, 4.207e-02, 1.825e-03, -2.104e-02, -2.411e-02, 1.415e-02, -1.914e-02, 3.616e-02, -4.726e-02, 7.355e-02, -9.432e-02, -4.546e-02)); + r += mul(s2_6, M4(-7.602e-02, 1.276e-01, 1.036e-01, 6.267e-02, -8.192e-03, 1.104e-01, 3.117e-02, 7.487e-02, -1.647e-01, 3.801e-02, -3.767e-02, 1.686e-02, 8.254e-02, 2.054e-01, 1.282e-01, 1.160e-01)); + r += mul(s2_7, M4(2.513e-02, -4.661e-02, -1.646e-01, 1.059e-01, 4.162e-02, -3.316e-02, -2.061e-02, 1.450e-01, 3.720e-02, 2.961e-02, -2.373e-02, -2.018e-02, -1.052e-02, 1.730e-02, -2.290e-01, 1.025e-01)); + r += mul(s2_8, M4(-1.045e-02, 1.154e-01, -3.244e-02, -1.231e-02, 2.361e-02, 2.220e-01, -4.822e-02, 1.373e-02, 3.730e-02, 2.129e-02, 3.138e-02, -9.593e-02, 5.947e-02, 7.943e-02, -5.219e-02, -5.749e-02)); + r += mul(s3_0, M4(-3.113e-02, -1.308e-01, -1.655e-01, -2.455e-01, 2.974e-02, 9.728e-02, -3.163e-02, 4.485e-02, 5.875e-02, -3.217e-02, 6.025e-02, 4.174e-02, 1.693e-02, 1.690e-01, 2.334e-01, -6.609e-02)); + r += mul(s3_1, M4(7.416e-02, -5.665e-03, -3.190e-02, 2.151e-01, -1.274e-01, 1.052e-01, -7.711e-02, -1.091e-01, 1.016e-01, 2.149e-02, -6.221e-02, 7.541e-02, -5.916e-02, 9.233e-02, 2.208e-02, -1.398e-01)); + r += mul(s3_2, M4(-1.293e-01, -2.505e-01, -4.961e-02, -4.325e-03, -8.909e-02, -5.243e-02, -4.196e-02, -8.554e-02, -6.182e-02, -6.022e-03, -9.220e-02, 1.184e-01, -2.595e-02, 6.671e-02, 5.215e-02, 3.472e-02)); + r += mul(s3_3, M4(6.543e-02, 2.403e-02, -6.021e-02, -1.541e-01, 6.134e-02, -1.275e-03, 1.020e-01, 5.550e-02, 1.106e-01, 1.172e-01, 2.676e-02, 3.177e-02, 1.433e-01, -2.681e-02, 2.454e-01, 9.405e-05)); + r += mul(s3_4, M4(4.769e-02, 6.531e-08, -1.651e-01, -6.990e-02, 1.442e-01, 9.021e-02, -2.311e-01, 1.189e-01, 1.624e-01, 2.221e-02, -7.074e-03, -2.182e-01, 5.419e-02, 5.294e-02, 4.279e-02, -1.160e-01)); + r += mul(s3_5, M4(1.730e-01, -9.544e-02, 2.981e-02, 7.448e-02, 9.235e-02, -5.488e-02, 2.427e-02, 1.188e-02, 1.206e-01, 5.847e-02, -1.284e-01, -7.989e-02, 8.960e-02, -6.168e-02, -1.400e-01, -3.726e-02)); + r += mul(s3_6, M4(-7.833e-02, -6.758e-02, -9.048e-03, -1.075e-01, 3.901e-04, -8.693e-02, 2.570e-02, -6.323e-02, -9.009e-02, -2.170e-02, -2.026e-01, -5.850e-02, -4.147e-02, -3.386e-02, 5.005e-02, -3.887e-02)); + r += mul(s3_7, M4(-1.351e-01, -1.409e-01, -2.287e-01, 7.224e-02, 2.347e-02, -5.290e-02, 7.585e-02, 3.648e-03, -1.602e-02, -4.140e-02, 5.352e-02, -9.657e-02, -2.243e-02, -1.901e-01, -1.525e-01, -2.205e-02)); + r += mul(s3_8, M4(7.836e-02, -2.075e-02, -9.366e-03, -6.423e-02, -2.740e-02, -8.143e-02, -4.192e-02, -4.417e-02, 1.902e-01, 6.497e-02, -6.885e-02, 5.171e-02, 1.274e-02, 2.152e-03, -4.668e-02, -6.472e-02)); + r += mul(s4_0, M4(2.638e-03, -1.166e-01, -5.258e-02, -2.857e-02, 3.570e-03, 5.189e-02, -5.776e-03, 5.083e-02, 7.780e-02, 4.474e-02, -1.454e-02, 6.246e-02, -1.195e-02, 7.774e-02, 5.109e-02, 3.706e-02)); + r += mul(s4_1, M4(2.811e-02, -1.047e-01, -2.778e-02, 1.325e-03, -1.898e-01, -1.619e-01, 8.374e-02, -2.495e-02, -1.249e-01, 1.881e-02, 5.845e-02, -1.045e-02, -7.614e-02, 7.772e-02, 3.462e-03, 4.337e-03)); + r += mul(s4_2, M4(-1.081e-01, -1.844e-02, -2.425e-02, -8.712e-02, 9.626e-02, -2.870e-02, 8.200e-03, 7.206e-02, -9.789e-02, -7.239e-02, 1.054e-01, -7.063e-02, 4.623e-02, -9.341e-03, -1.331e-02, -4.766e-03)); + r += mul(s4_3, M4(-1.731e-01, -7.794e-02, -1.038e-01, -3.761e-02, 1.016e-01, -4.419e-02, -3.551e-02, -1.499e-02, -6.625e-02, 6.585e-02, 9.775e-02, 5.992e-02, 3.358e-02, -9.332e-03, 3.855e-04, -3.535e-02)); + r += mul(s4_4, M4(1.253e-01, 3.168e-02, -1.671e-02, 8.456e-02, 7.014e-02, 9.437e-02, -2.283e-02, 8.996e-02, -3.857e-02, 1.580e-01, -1.474e-01, -1.849e-02, -2.709e-03, 1.190e-01, 1.056e-01, -1.893e-02)); + r += mul(s4_5, M4(-1.756e-02, -1.646e-01, -7.868e-02, 1.745e-02, -1.479e-01, 6.384e-02, -7.011e-02, -8.907e-02, 2.050e-02, 1.111e-01, 1.083e-02, -2.870e-03, 6.436e-02, -3.034e-02, 5.931e-02, -1.074e-02)); + r += mul(s4_6, M4(1.168e-01, -3.357e-02, -1.142e-02, -2.795e-02, 2.887e-02, 3.087e-02, 2.933e-02, 8.376e-02, -1.546e-01, 3.309e-02, 4.429e-02, -1.846e-02, 5.743e-02, -3.849e-02, 1.220e-02, -5.880e-02)); + r += mul(s4_7, M4(-5.729e-02, -3.743e-02, 8.573e-04, -4.337e-03, 7.938e-02, 8.001e-02, 2.539e-02, 1.801e-02, 6.096e-02, 1.785e-01, -3.002e-01, -1.276e-01, -1.379e-02, 1.333e-01, 6.545e-02, -2.278e-02)); + r += mul(s4_8, M4(7.242e-02, -6.667e-02, 1.744e-02, -2.643e-02, 6.848e-02, 9.631e-02, 6.165e-03, -1.126e-02, -1.362e-01, -1.566e-01, 1.365e-01, -5.437e-02, -1.701e-02, 3.085e-02, 6.582e-02, 2.239e-02)); + r += mul(s5_0, M4(3.485e-02, -3.668e-02, -9.379e-02, 4.608e-02, -1.083e-01, 5.040e-02, 1.639e-01, 3.794e-02, 1.706e-01, -1.481e-01, -5.080e-02, -2.196e-02, -6.141e-02, 1.909e-02, 1.798e-01, 1.096e-01)); + r += mul(s5_1, M4(-3.518e-03, -5.432e-02, 3.152e-02, 7.982e-02, 3.017e-02, -2.535e-02, 6.478e-02, -6.047e-02, -1.980e-04, 6.107e-03, -7.009e-03, -9.743e-03, -1.016e-01, -8.004e-03, -4.981e-02, -5.635e-02)); + r += mul(s5_2, M4(-2.465e-02, 7.711e-02, 5.971e-02, -7.692e-02, -6.137e-02, -8.310e-02, -3.373e-02, -1.117e-02, 1.393e-02, -1.375e-02, 1.055e-01, -3.949e-04, 4.322e-02, -6.462e-02, -3.294e-03, -3.712e-02)); + r += mul(s5_3, M4(-2.744e-01, -4.534e-03, -8.512e-02, -6.026e-03, -2.157e-02, -2.000e-01, 9.898e-02, -1.939e-01, -6.091e-02, 5.310e-03, -7.081e-03, -6.215e-02, 9.339e-02, 1.379e-01, 7.161e-02, 9.545e-02)); + r += mul(s5_4, M4(1.003e-01, -7.804e-02, -1.457e-03, 2.427e-01, 5.836e-02, 1.779e-01, 1.408e-01, -2.114e-01, -1.353e-01, 5.005e-03, 5.650e-02, -3.772e-02, -2.707e-01, -6.220e-02, -1.619e-01, 1.186e-02)); + r += mul(s5_5, M4(7.088e-02, -1.084e-01, -2.443e-02, 2.401e-02, -1.880e-01, -2.545e-02, -1.450e-01, -1.789e-01, 9.716e-02, 2.647e-02, 1.603e-02, -4.438e-02, 5.517e-02, 1.466e-01, 2.205e-01, 6.970e-02)); + r += mul(s5_6, M4(1.056e-01, -9.803e-02, -6.850e-02, -1.242e-01, 9.164e-02, -4.308e-02, -1.765e-01, -3.360e-02, -1.304e-01, -1.477e-01, 1.813e-02, 3.482e-02, 4.066e-02, -5.432e-02, 7.176e-02, 2.660e-02)); + r += mul(s5_7, M4(1.009e-01, -1.469e-02, -1.816e-02, -4.866e-02, -7.614e-02, 4.750e-02, 1.168e-01, 4.196e-03, 5.586e-02, -2.694e-02, 1.265e-02, -6.180e-02, 5.872e-03, -1.858e-02, -7.922e-02, 3.537e-02)); + r += mul(s5_8, M4(9.445e-02, -8.611e-02, -3.937e-02, -1.209e-02, -7.928e-02, -3.819e-02, 1.253e-02, 3.102e-02, -5.391e-02, 5.845e-02, 5.602e-03, -3.271e-02, 7.669e-02, 2.167e-02, 6.229e-02, -1.607e-02)); + r += mul(s6_0, M4(-3.817e-02, 7.554e-02, 3.810e-02, 1.102e-02, -5.145e-02, -7.391e-02, -9.496e-02, -3.712e-02, -7.890e-02, -3.002e-02, -1.056e-01, -2.948e-02, 7.406e-02, 1.170e-02, -5.408e-02, -9.105e-02)); + r += mul(s6_1, M4(-6.936e-02, -6.525e-02, -4.428e-02, -1.449e-01, -1.186e-01, -2.092e-01, 8.401e-02, -2.094e-02, -8.598e-02, -3.757e-02, -7.045e-02, 3.440e-02, -6.230e-02, 8.633e-02, -3.325e-02, -8.327e-03)); + r += mul(s6_2, M4(6.608e-02, -5.417e-02, -2.856e-02, 4.913e-02, 1.676e-01, -7.598e-02, -1.182e-01, -9.989e-02, -9.116e-02, 3.218e-03, -6.444e-02, -6.054e-02, 1.373e-03, -1.817e-01, 7.257e-02, 4.189e-02)); + r += mul(s6_3, M4(-8.349e-02, -1.761e-01, 1.926e-02, -8.051e-03, 2.890e-02, 5.756e-02, 5.508e-02, 7.201e-02, -6.564e-02, 1.493e-01, 1.557e-01, -4.202e-02, -1.503e-01, -5.685e-02, 4.112e-02, -4.202e-02)); + r += mul(s6_4, M4(-2.522e-01, 9.170e-02, -5.085e-02, 4.871e-02, 1.325e-02, 5.299e-02, -9.726e-02, -1.509e-01, 2.206e-02, 5.833e-02, -1.520e-01, 1.705e-01, 1.815e-01, -8.368e-02, 7.542e-02, -4.656e-02)); + r += mul(s6_5, M4(5.460e-02, -1.326e-01, 9.804e-03, 4.768e-02, 1.948e-02, 1.841e-01, -3.059e-02, -4.378e-02, -5.324e-02, -1.720e-01, 1.640e-01, -1.044e-02, -1.189e-01, -2.363e-02, -3.141e-03, 3.028e-03)); + r += mul(s6_6, M4(-1.256e-02, -2.922e-03, -3.309e-02, 6.149e-02, 1.574e-01, 3.430e-02, -1.243e-02, 5.443e-02, 6.524e-02, -2.052e-02, -6.959e-02, 1.194e-02, 4.077e-02, -5.340e-02, -1.997e-02, 1.787e-01)); + r += mul(s6_7, M4(-2.905e-02, 2.323e-02, -7.598e-02, 3.521e-02, 1.162e-01, -8.760e-02, 8.426e-02, -4.821e-02, -1.586e-01, 3.038e-02, 1.047e-02, -4.834e-02, -5.454e-02, 8.682e-02, -5.218e-02, 4.248e-02)); + r += mul(s6_8, M4(4.779e-02, -2.518e-01, -3.873e-02, -9.263e-02, -8.541e-02, 3.432e-02, -4.013e-02, -4.646e-03, 1.992e-02, 3.899e-02, 2.601e-02, -2.114e-02, -1.431e-02, 1.219e-02, -4.987e-02, 5.297e-02)); + r += mul(s7_0, M4(-2.618e-02, 6.244e-02, 4.090e-02, -1.789e-02, -1.284e-01, 7.390e-02, -2.420e-02, -4.756e-02, 2.653e-02, 1.199e-02, 1.500e-02, -8.259e-02, -1.319e-01, 7.918e-02, 7.601e-02, 2.422e-02)); + r += mul(s7_1, M4(-1.305e-01, 4.577e-02, 1.122e-01, -2.843e-03, -3.707e-02, 5.125e-02, 1.366e-01, -1.041e-02, 5.613e-02, -9.621e-02, -8.862e-02, 7.272e-02, -3.575e-02, -5.884e-02, 7.940e-02, -9.598e-02)); + r += mul(s7_2, M4(9.856e-02, 7.648e-02, 9.986e-02, -7.698e-02, 2.003e-01, 1.748e-02, -2.276e-02, -8.972e-02, 3.264e-02, 1.762e-01, 5.533e-02, 1.765e-02, -8.475e-03, 7.884e-02, 3.955e-02, 5.335e-03)); + r += mul(s7_3, M4(-9.397e-02, -3.801e-02, 6.843e-02, 2.567e-02, 1.120e-01, 7.550e-02, 9.477e-02, 9.341e-04, 1.808e-01, 5.695e-02, -3.728e-02, 2.665e-02, 7.058e-03, -5.299e-02, 8.963e-02, -3.561e-03)); + r += mul(s7_4, M4(-3.374e-02, -1.857e-02, -3.614e-02, 8.170e-02, -1.471e-01, 2.489e-02, -1.751e-01, -7.823e-02, 6.604e-02, 1.243e-01, 2.393e-02, 3.367e-01, 1.737e-01, -1.744e-01, -5.261e-02, 6.651e-02)); + r += mul(s7_5, M4(-7.199e-02, -3.134e-02, 5.129e-02, 9.402e-02, -1.373e-01, 3.540e-02, 1.971e-02, 3.625e-02, 1.792e-01, 3.117e-02, 2.413e-02, 2.022e-02, 6.528e-03, 5.192e-02, -8.928e-02, 7.384e-02)); + r += mul(s7_6, M4(-8.301e-02, 7.934e-02, -1.039e-01, -1.798e-02, -6.817e-02, 1.008e-01, 3.415e-02, 2.928e-02, 1.649e-01, 1.101e-02, 1.681e-01, -1.356e-02, 9.431e-02, -3.286e-02, -9.072e-03, 2.947e-02)); + r += mul(s7_7, M4(5.793e-02, -8.944e-02, -2.206e-02, 1.027e-01, -4.778e-03, -2.849e-02, 1.601e-02, -2.093e-02, 1.085e-02, 1.385e-01, 7.147e-02, -2.726e-02, -1.256e-01, 7.112e-02, -8.096e-02, -9.431e-02)); + r += mul(s7_8, M4(-3.778e-02, -2.232e-03, -7.789e-02, 2.074e-02, -2.782e-02, -2.413e-02, -1.399e-01, -5.036e-02, 6.334e-02, 2.185e-02, -8.764e-02, 1.518e-02, 4.458e-02, 7.670e-02, -1.081e-03, -5.524e-02)); + r += V4(1.175e-02, 3.705e-03, 4.932e-03, 6.758e-03); + return r; +} + +void Pass9(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 10 +//!DESC conv9 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.746e-02, -1.063e-02, -8.489e-02, -3.248e-02, 5.400e-02, 4.809e-01, -2.443e-03, 5.665e-01, -3.019e-02, -2.483e-02, 4.610e-02, 1.613e-01, -5.230e-02, 5.904e-02, 8.962e-02, 5.099e-03)); + r += mul(s0_1, M4(-2.674e-02, -7.768e-03, 6.571e-02, 8.847e-02, -3.693e-01, -3.040e-01, 1.111e-01, 3.321e-01, -3.900e-02, -5.431e-02, -7.441e-02, 2.141e-02, 3.205e-02, 1.226e-01, -8.272e-02, 8.495e-02)); + r += mul(s0_2, M4(1.091e-01, 3.933e-02, -3.238e-03, 6.477e-02, -3.042e-02, 4.195e-01, 1.768e-03, -8.476e-02, -6.201e-02, 2.166e-03, 4.313e-02, 5.970e-02, 1.758e-01, -7.663e-02, 1.494e-01, 4.458e-02)); + r += mul(s0_3, M4(-1.272e-02, 5.261e-02, 9.092e-02, -7.005e-03, 9.554e-02, 3.528e-02, -4.595e-01, -2.179e-01, 1.709e-01, 1.809e-02, -2.651e-02, 6.484e-02, -4.482e-02, 1.309e-01, -6.953e-04, -1.990e-02)); + r += mul(s0_4, M4(-9.892e-03, -1.371e-01, -1.750e-01, 2.506e-03, -1.380e-01, 1.237e-01, 3.433e-01, 1.102e-01, 8.615e-02, 3.165e-02, 1.141e-01, 6.900e-02, -5.708e-02, -2.790e-01, -1.438e-01, -4.015e-02)); + r += mul(s0_5, M4(5.655e-02, 8.936e-02, 3.225e-02, -1.050e-01, 1.298e-01, -2.953e-01, -8.823e-02, 1.979e-01, 2.702e-02, 1.291e-01, -7.544e-02, 4.281e-02, -7.200e-02, 6.900e-02, 8.878e-02, -1.220e-01)); + r += mul(s0_6, M4(9.888e-02, -1.017e-02, 2.095e-02, -3.808e-02, -8.481e-01, 3.486e-01, -3.861e-01, -6.509e-01, -7.530e-02, 5.185e-02, 4.696e-02, 3.270e-02, -8.286e-02, 7.794e-02, -2.126e-02, 9.819e-03)); + r += mul(s0_7, M4(-2.224e-02, -6.169e-02, -1.213e-01, -4.395e-02, 1.505e-01, -5.035e-02, 2.646e-01, 3.699e-02, -1.477e-01, 4.680e-02, -9.876e-02, 1.034e-01, 4.846e-02, -3.061e-02, 3.804e-02, -1.026e-01)); + r += mul(s0_8, M4(1.750e-02, 3.698e-02, -3.884e-02, -1.252e-02, -1.126e-01, -2.019e-02, 2.261e-01, -1.821e-02, -3.083e-02, 8.816e-02, 3.508e-02, 9.136e-05, -1.009e-02, 3.874e-03, -3.811e-02, 8.129e-02)); + r += mul(s1_0, M4(-4.230e-03, -2.298e-04, 1.523e-01, -6.054e-02, -6.622e-02, -2.713e-02, -6.034e-02, -6.909e-02, -6.743e-02, -1.200e-01, -3.476e-02, -2.414e-02, -7.619e-02, -1.420e-01, -5.435e-02, 4.688e-02)); + r += mul(s1_1, M4(1.250e-01, 1.436e-01, 8.983e-02, 3.571e-01, -2.948e-02, -3.203e-02, -2.803e-02, -4.456e-02, -1.587e-01, 5.683e-02, 1.874e-01, -1.582e-01, 1.688e-03, -1.092e-02, -3.675e-02, -1.959e-02)); + r += mul(s1_2, M4(-2.990e-03, 1.935e-01, 3.609e-02, 1.016e-02, -2.889e-02, 2.713e-02, -2.222e-02, -9.948e-02, 1.432e-02, 6.372e-02, -6.483e-02, -1.950e-01, 1.130e-01, -2.562e-03, 6.376e-02, -1.279e-01)); + r += mul(s1_3, M4(-6.518e-02, 6.288e-02, 2.188e-01, 6.239e-02, 3.956e-03, -7.743e-03, 1.854e-01, -8.057e-02, 3.433e-02, -3.217e-02, 5.964e-02, -1.372e-01, 9.831e-02, -7.300e-02, 1.069e-01, -3.985e-02)); + r += mul(s1_4, M4(4.612e-02, -2.329e-01, -1.418e-01, 6.836e-02, 2.446e-02, 1.418e-01, 1.809e-02, -4.002e-02, -3.699e-02, 2.437e-01, -1.954e-01, 1.403e-01, 7.050e-02, 4.695e-02, -2.736e-02, 3.702e-02)); + r += mul(s1_5, M4(1.983e-03, -9.384e-02, -1.356e-01, -1.143e-01, 4.695e-02, -9.757e-02, -3.093e-03, -5.178e-02, 9.466e-03, -1.137e-01, -1.644e-01, -9.603e-02, 1.446e-01, 1.238e-01, -1.503e-01, -6.958e-02)); + r += mul(s1_6, M4(-6.411e-02, 5.986e-02, 3.339e-02, -3.836e-02, 8.848e-02, 6.235e-03, 5.062e-02, -6.397e-02, 5.037e-02, 5.440e-04, -5.456e-02, -9.788e-02, 2.729e-02, 5.484e-02, -8.123e-02, -1.712e-03)); + r += mul(s1_7, M4(-2.417e-03, -1.044e-01, -2.962e-02, -1.297e-01, -8.800e-02, 1.249e-01, -1.124e-01, 4.114e-02, -4.990e-02, 5.685e-02, -2.012e-01, 1.477e-01, 3.261e-02, 6.424e-02, 1.169e-01, -6.667e-02)); + r += mul(s1_8, M4(-3.504e-02, -1.125e-01, -9.715e-02, -6.348e-02, -7.047e-04, 1.460e-01, -2.088e-02, 3.352e-02, -1.966e-02, -7.403e-03, 7.786e-02, -8.478e-02, -2.298e-02, 1.993e-01, -8.736e-02, 7.286e-03)); + r += mul(s2_0, M4(7.822e-02, 3.222e-02, -1.139e-02, -1.078e-01, -6.741e-02, 4.379e-02, -7.163e-02, -1.814e-01, 4.628e-02, -6.410e-02, -6.759e-02, 1.772e-01, -5.401e-02, 8.797e-02, -1.319e-01, -1.376e-01)); + r += mul(s2_1, M4(1.437e-02, 1.314e-02, 1.388e-01, 1.275e-01, 5.123e-02, 1.599e-01, 2.074e-01, 8.537e-02, 3.981e-02, -1.166e-01, 1.190e-01, -4.606e-05, 2.425e-01, -3.455e-02, 7.800e-02, 3.116e-02)); + r += mul(s2_2, M4(-2.019e-02, 1.103e-01, -8.463e-02, 8.798e-02, 6.113e-02, 8.817e-02, 6.368e-02, -9.119e-02, -2.864e-02, -1.606e-01, 1.183e-02, -1.085e-01, 9.193e-02, -8.555e-02, 7.734e-02, -2.207e-02)); + r += mul(s2_3, M4(2.104e-02, 1.017e-01, 1.313e-01, 3.233e-02, 1.792e-01, 4.766e-02, -1.387e-01, -4.995e-02, -6.797e-02, 1.045e-01, -8.918e-02, 6.009e-02, -4.944e-02, 3.522e-02, 8.032e-02, -1.131e-01)); + r += mul(s2_4, M4(-1.039e-02, -8.695e-02, -1.877e-03, 8.820e-02, 6.811e-02, 4.012e-01, -7.742e-02, -1.753e-01, 1.521e-01, 1.809e-02, -1.851e-01, -1.758e-01, -2.625e-02, 8.133e-02, 8.653e-02, -2.330e-02)); + r += mul(s2_5, M4(-6.217e-02, -1.063e-01, -5.858e-02, 9.908e-02, 2.980e-01, 1.273e-01, 2.495e-01, -2.617e-01, 3.477e-02, -8.927e-02, 4.738e-02, -8.602e-03, 6.325e-03, -2.658e-02, 8.764e-02, -1.869e-03)); + r += mul(s2_6, M4(1.724e-02, -3.356e-02, -5.360e-02, -2.070e-02, -1.485e-02, 5.440e-04, 4.944e-02, -1.195e-01, -1.795e-01, -9.285e-02, 6.373e-02, 1.272e-01, 1.539e-01, -3.187e-02, -2.463e-02, -2.533e-02)); + r += mul(s2_7, M4(9.800e-02, 4.864e-02, -1.405e-01, 4.289e-02, 5.748e-02, -1.004e-01, 3.591e-01, -1.286e-01, -2.668e-02, -1.087e-01, -6.929e-03, -1.889e-01, 2.281e-02, 6.056e-02, -2.504e-02, 4.111e-02)); + r += mul(s2_8, M4(9.116e-03, -7.397e-02, -2.395e-02, -2.543e-02, 1.348e-01, 2.859e-01, 1.487e-02, 1.299e-01, -2.400e-02, -7.597e-02, 9.158e-02, -6.117e-02, 8.673e-02, -5.555e-02, -8.472e-02, -5.237e-02)); + r += mul(s3_0, M4(-2.097e-02, -8.450e-02, 4.401e-02, -5.289e-02, -3.726e-02, -1.816e-03, -6.092e-02, -1.044e-01, 8.542e-02, -7.704e-02, 1.390e-02, 1.826e-02, -3.318e-02, 2.143e-01, 2.861e-02, 1.425e-01)); + r += mul(s3_1, M4(8.973e-02, -1.307e-01, -1.220e-01, -6.823e-02, 1.085e-02, -3.867e-02, 6.683e-02, 9.690e-02, 1.379e-02, 6.643e-02, -9.761e-02, 2.067e-02, -2.649e-02, 1.353e-01, -1.133e-01, 9.359e-03)); + r += mul(s3_2, M4(-6.108e-02, 6.151e-02, 4.260e-02, 5.874e-02, 1.029e-02, -1.497e-01, 3.234e-02, -3.021e-02, -7.880e-02, -1.415e-01, 1.451e-02, 4.842e-02, -7.586e-02, 5.174e-02, -1.371e-01, -6.023e-02)); + r += mul(s3_3, M4(-4.003e-02, 5.891e-02, 4.206e-02, 2.595e-02, -4.624e-04, -5.921e-02, -9.890e-02, -1.363e-01, -5.014e-02, 6.857e-02, -1.145e-01, -7.750e-02, 1.879e-01, 2.044e-01, 3.940e-02, 9.905e-02)); + r += mul(s3_4, M4(-1.609e-01, -1.548e-01, -9.780e-02, -7.351e-03, -1.326e-01, 1.169e-01, -2.296e-02, 1.092e-01, 6.315e-03, 5.901e-02, -1.063e-01, 8.840e-02, 1.121e-01, -8.684e-02, -7.752e-02, -1.278e-01)); + r += mul(s3_5, M4(2.414e-02, -9.160e-02, -3.597e-02, 2.934e-02, -7.608e-04, 1.769e-01, 1.248e-01, 5.430e-02, 4.766e-02, -9.389e-03, 1.599e-01, 3.968e-02, 1.360e-01, -1.909e-01, 7.670e-02, 2.346e-01)); + r += mul(s3_6, M4(-1.540e-01, -1.227e-01, -5.861e-02, 5.111e-03, -3.895e-02, -7.678e-02, -4.987e-02, -4.852e-02, 5.709e-02, 4.096e-03, -2.075e-02, 5.795e-02, -9.926e-02, -1.391e-01, 1.977e-02, -3.418e-02)); + r += mul(s3_7, M4(7.224e-03, 1.367e-01, -1.950e-01, -1.376e-02, -8.416e-02, -7.056e-02, 3.701e-02, -5.724e-02, 1.451e-02, 1.350e-02, -6.172e-02, -1.460e-01, -6.291e-02, 3.015e-03, -9.637e-02, -4.193e-02)); + r += mul(s3_8, M4(-2.196e-02, 4.590e-02, 3.496e-02, 7.020e-03, 2.558e-02, 3.325e-02, -7.600e-02, 6.062e-02, 4.024e-02, 6.211e-02, 5.333e-02, -1.504e-02, -7.808e-03, 1.016e-02, -2.206e-01, 5.911e-02)); + r += mul(s4_0, M4(1.141e-01, 5.394e-02, -1.343e-01, 2.928e-02, 7.011e-02, 4.775e-02, -4.685e-02, 6.133e-02, 1.138e-01, 4.619e-02, 1.735e-01, 1.146e-01, -9.044e-02, -5.406e-02, -7.782e-02, -2.691e-02)); + r += mul(s4_1, M4(-7.985e-02, -2.100e-03, -7.870e-02, -1.137e-01, -1.776e-01, 2.370e-01, -6.002e-02, -2.018e-02, -2.921e-02, 4.966e-02, -1.691e-01, -1.508e-01, -1.700e-01, -9.586e-02, -6.593e-02, -8.877e-02)); + r += mul(s4_2, M4(6.554e-02, 8.899e-02, -6.075e-02, 1.546e-01, -4.404e-02, -2.603e-02, 1.095e-01, -3.720e-02, -1.349e-01, -1.897e-02, 1.437e-01, -7.739e-02, -2.546e-02, -4.693e-02, -7.665e-02, -1.305e-01)); + r += mul(s4_3, M4(-5.318e-02, 4.581e-02, 1.697e-02, 1.420e-01, 8.522e-02, -1.481e-01, 8.667e-02, -6.982e-02, -1.240e-01, 3.817e-02, -1.287e-01, 2.916e-01, -1.577e-01, 4.871e-02, 2.518e-02, -4.820e-02)); + r += mul(s4_4, M4(-1.995e-02, -2.088e-01, -2.847e-01, 3.681e-02, -2.316e-01, 1.783e-02, 3.289e-02, -1.353e-01, -1.544e-01, -2.805e-01, -4.242e-02, -3.656e-02, 1.272e-01, -2.347e-02, -1.163e-01, -1.299e-01)); + r += mul(s4_5, M4(-6.111e-02, -2.113e-01, -1.109e-01, -2.393e-01, 1.231e-03, -8.862e-02, 1.512e-03, 4.795e-03, 8.128e-02, -4.732e-02, 9.545e-02, -1.428e-01, 7.313e-02, -1.712e-01, 8.908e-02, -5.289e-02)); + r += mul(s4_6, M4(-2.565e-02, 9.523e-02, 2.868e-02, 9.607e-02, 8.456e-02, -6.568e-02, -3.945e-02, 2.131e-02, 9.711e-02, -1.713e-01, -7.788e-02, -6.405e-02, 1.443e-01, 1.123e-01, -1.300e-02, -3.461e-02)); + r += mul(s4_7, M4(-5.005e-02, 7.592e-02, 1.669e-02, 4.349e-03, -1.532e-01, -2.379e-02, -2.528e-02, -9.192e-02, 1.091e-01, -3.325e-01, 1.515e-01, -4.767e-02, -1.799e-02, 1.858e-02, 2.747e-02, -1.220e-01)); + r += mul(s4_8, M4(8.359e-02, -6.374e-02, -2.862e-02, 1.920e-01, 9.856e-02, 5.909e-02, 4.765e-02, 9.984e-02, -9.049e-02, 2.197e-01, -1.110e-01, 8.181e-02, 8.008e-02, -2.233e-01, 5.183e-02, 3.313e-02)); + r += mul(s5_0, M4(-3.837e-02, -2.881e-02, -7.039e-02, -1.581e-02, 1.084e-01, 8.017e-02, -3.526e-02, 7.989e-02, -5.161e-02, -3.689e-02, 1.312e-02, -1.418e-01, -6.472e-02, 9.976e-02, 6.215e-02, 3.439e-02)); + r += mul(s5_1, M4(1.005e-01, 5.089e-02, 3.631e-02, 5.686e-02, -3.182e-02, 6.229e-02, -4.766e-02, 3.519e-02, 6.282e-02, 9.004e-02, 3.656e-02, -8.098e-02, -1.453e-01, -9.801e-02, -1.470e-01, -1.058e-01)); + r += mul(s5_2, M4(1.509e-01, 5.055e-02, 3.283e-02, -2.458e-03, 4.075e-02, -9.094e-02, 6.107e-02, -4.027e-02, -9.183e-02, 1.017e-01, 3.388e-02, -1.159e-01, -6.618e-02, -9.384e-03, 5.678e-02, 2.558e-02)); + r += mul(s5_3, M4(6.552e-02, 2.544e-03, -1.213e-01, -3.366e-02, 4.405e-02, -6.705e-03, 1.764e-01, 2.911e-02, -1.539e-01, 3.564e-02, 5.182e-02, 3.039e-02, -8.454e-03, 5.669e-02, 1.040e-01, -6.103e-02)); + r += mul(s5_4, M4(-1.056e-01, 2.622e-02, -1.184e-01, 4.384e-02, -2.213e-01, 1.339e-02, 1.164e-01, -3.597e-02, -7.377e-02, -6.467e-02, -1.230e-01, -1.001e-02, 9.561e-03, 1.961e-02, 7.407e-03, 8.048e-02)); + r += mul(s5_5, M4(-1.135e-01, 1.733e-03, -1.516e-02, -1.174e-01, -1.122e-02, -1.409e-01, -1.217e-01, 1.516e-02, 7.002e-02, -1.479e-01, -1.264e-01, 1.806e-01, -7.715e-02, 2.212e-02, 7.268e-02, 5.128e-02)); + r += mul(s5_6, M4(-5.913e-02, 8.225e-02, 1.093e-01, -1.635e-02, 8.655e-02, 5.632e-02, -8.827e-02, -3.750e-02, -5.103e-02, 2.164e-02, -8.208e-02, 4.437e-02, 9.051e-02, -3.004e-02, 7.682e-02, -1.150e-01)); + r += mul(s5_7, M4(8.483e-02, -2.123e-02, 6.602e-02, 1.265e-01, 6.961e-02, 3.508e-02, -3.658e-02, -6.129e-02, 3.346e-02, 5.090e-02, 5.109e-02, 2.092e-02, -4.876e-02, 6.051e-02, -1.371e-01, 9.408e-02)); + r += mul(s5_8, M4(9.064e-02, -7.780e-02, 4.702e-02, 9.918e-02, 4.362e-02, 3.045e-02, -3.481e-03, -2.898e-02, -2.436e-02, 5.820e-03, 2.807e-02, -1.516e-01, 6.365e-02, -5.261e-02, 2.994e-04, 6.343e-02)); + r += mul(s6_0, M4(1.103e-01, -9.613e-02, -2.650e-01, -1.155e-01, -1.295e-02, -5.461e-02, -9.266e-02, -1.543e-01, 8.375e-02, -2.737e-03, 3.942e-02, -8.917e-02, -1.190e-02, 7.522e-02, 9.202e-02, 3.157e-02)); + r += mul(s6_1, M4(-1.159e-01, 6.081e-02, -1.577e-01, 1.546e-01, 1.597e-01, 6.105e-02, 1.902e-02, 1.799e-02, -2.426e-01, -1.284e-01, 1.009e-01, -1.209e-01, -7.567e-02, -5.278e-02, -2.751e-01, -8.851e-02)); + r += mul(s6_2, M4(-1.282e-01, 1.184e-01, 9.726e-02, 5.932e-03, 1.127e-01, -1.175e-02, 1.653e-02, 1.528e-01, 4.978e-02, -1.152e-01, -1.831e-02, 7.319e-02, -3.583e-02, 2.718e-02, -6.719e-03, 6.104e-02)); + r += mul(s6_3, M4(-1.288e-01, 5.219e-02, -1.049e-01, -5.532e-02, 9.779e-02, 1.473e-02, 7.424e-03, -5.110e-02, 1.360e-02, -7.652e-02, 2.543e-01, -1.142e-03, -1.404e-02, 4.899e-02, -1.146e-01, -6.175e-02)); + r += mul(s6_4, M4(9.685e-02, -9.235e-02, -2.548e-01, 1.457e-01, 1.395e-01, 1.691e-01, 2.257e-02, 8.836e-02, -2.705e-01, -2.745e-01, 9.071e-02, 2.695e-03, 1.882e-01, 1.345e-01, 9.377e-02, -9.117e-02)); + r += mul(s6_5, M4(-9.752e-02, -2.447e-01, -8.646e-02, 9.079e-02, -5.149e-02, 3.259e-02, -1.244e-01, 3.561e-02, -1.179e-01, -5.298e-02, -1.130e-01, 6.661e-02, 7.199e-02, -1.177e-01, -4.176e-03, 6.025e-02)); + r += mul(s6_6, M4(-1.983e-02, -1.090e-01, 3.897e-02, -9.106e-03, 1.161e-01, -2.020e-01, -1.279e-02, 5.516e-02, 3.743e-02, -4.719e-02, -1.047e-02, -7.391e-02, -1.281e-02, -7.450e-02, -1.839e-02, 2.007e-02)); + r += mul(s6_7, M4(-1.649e-01, -1.227e-01, -7.207e-02, 1.142e-01, 1.175e-01, 1.422e-01, 8.784e-02, 1.011e-01, -1.071e-01, -1.152e-01, 1.256e-01, -1.020e-02, -9.848e-03, -6.948e-02, 4.149e-02, 4.457e-02)); + r += mul(s6_8, M4(-9.028e-03, -1.629e-01, 5.114e-02, 1.825e-02, -6.076e-04, 3.088e-02, -1.628e-02, 6.999e-02, -3.939e-02, -1.080e-01, 1.004e-01, -1.541e-02, -6.098e-03, 3.446e-03, -2.326e-02, 1.133e-01)); + r += mul(s7_0, M4(-1.475e-02, -2.118e-02, 8.626e-02, 2.885e-02, -3.604e-02, -1.372e-01, -7.127e-02, -7.823e-02, -4.009e-02, -2.106e-02, -7.218e-02, 1.072e-02, 8.193e-02, 5.086e-02, 1.940e-01, 8.729e-02)); + r += mul(s7_1, M4(3.113e-02, 1.907e-02, -2.103e-03, 4.830e-02, -3.199e-02, -2.161e-01, 1.361e-01, -9.514e-02, 7.175e-02, 8.776e-02, 9.203e-02, 9.890e-03, -2.824e-01, 9.455e-02, -2.438e-01, -1.908e-01)); + r += mul(s7_2, M4(7.323e-02, -2.772e-03, 5.055e-02, 5.675e-02, -6.364e-02, -1.402e-01, -3.153e-02, 4.667e-02, 1.014e-01, 9.252e-02, -6.996e-02, 1.175e-01, 1.235e-01, -1.825e-01, 9.365e-02, 2.135e-01)); + r += mul(s7_3, M4(-8.998e-03, 1.080e-02, -6.999e-02, -5.092e-02, 1.320e-01, -4.903e-02, -7.523e-02, -6.216e-02, -1.324e-02, -7.563e-02, 1.143e-01, 3.717e-02, -1.831e-01, 1.333e-01, -4.844e-01, 6.768e-02)); + r += mul(s7_4, M4(9.990e-02, 1.006e-01, 1.993e-01, -3.065e-02, 8.778e-03, 1.426e-01, 3.600e-02, -5.554e-02, -1.035e-01, -5.076e-02, -1.843e-01, 1.523e-01, 7.083e-02, -2.764e-01, 6.749e-01, -1.867e-01)); + r += mul(s7_5, M4(6.027e-02, -4.173e-02, -4.620e-02, 2.347e-02, -1.925e-02, 1.530e-01, -6.159e-02, -1.304e-01, 4.629e-03, 2.058e-01, -1.747e-01, 1.216e-01, 1.216e-01, -1.857e-01, 1.719e-01, 2.433e-02)); + r += mul(s7_6, M4(-1.687e-02, -4.113e-02, 2.712e-02, -1.166e-02, 2.373e-02, -1.509e-01, -3.001e-02, 2.273e-02, 7.160e-02, 1.554e-02, -7.796e-02, 1.504e-01, -1.118e-01, -6.248e-02, -1.827e-01, -9.236e-02)); + r += mul(s7_7, M4(-5.314e-02, 5.682e-02, -1.427e-01, 8.743e-02, -2.283e-02, -1.107e-01, 5.489e-02, -1.170e-01, -2.970e-02, 9.758e-02, 2.663e-02, 1.618e-01, -1.143e-02, -2.074e-01, -9.698e-02, -2.611e-01)); + r += mul(s7_8, M4(2.741e-02, 6.521e-02, -8.427e-02, -1.180e-01, -1.419e-01, 2.104e-01, -3.639e-02, -8.045e-02, 1.144e-01, 5.270e-02, -4.358e-02, -1.047e-04, -4.723e-02, -1.555e-01, -1.430e-01, 3.500e-02)); + r += V4(1.195e-03, 2.615e-02, -6.276e-04, 5.206e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.986e-02, 6.822e-03, 5.753e-02, -1.520e-02, -3.827e-01, -1.135e-01, -1.228e-01, -2.208e-01, -8.115e-03, -1.213e-01, 3.219e-02, 1.296e-02, -2.308e-02, -1.520e-01, 5.786e-03, -7.677e-02)); + r += mul(s0_1, M4(-4.018e-02, 3.547e-02, 3.448e-02, -1.302e-02, 9.990e-02, 1.241e-01, 9.569e-02, 1.260e-01, -1.066e-01, -1.508e-01, -8.142e-02, 1.013e-01, -1.144e-01, -1.158e-01, -1.012e-01, -2.324e-01)); + r += mul(s0_2, M4(5.159e-02, 6.002e-02, -2.956e-02, -7.201e-02, -6.089e-02, 8.855e-02, -1.617e-02, -2.567e-01, 2.620e-02, -3.999e-02, 1.103e-02, -1.445e-02, -1.175e-01, -7.938e-02, -2.475e-02, 8.953e-02)); + r += mul(s0_3, M4(4.320e-02, -8.602e-02, 5.374e-02, -7.855e-03, -1.130e-01, 5.571e-01, -6.088e-01, 3.930e-01, 4.070e-02, 9.112e-02, -5.713e-02, -9.503e-02, -1.969e-02, 2.999e-02, 1.179e-01, 7.852e-02)); + r += mul(s0_4, M4(1.322e-01, 1.195e-01, -8.777e-02, 5.713e-02, -9.338e-02, 8.141e-02, 6.251e-02, -2.821e-02, -1.453e-01, -1.133e-02, -1.109e-01, 2.815e-01, 1.847e-02, 4.758e-02, -1.011e-01, 1.856e-01)); + r += mul(s0_5, M4(5.368e-02, 5.577e-02, -4.115e-02, 5.112e-02, -1.030e-02, 6.773e-02, -2.048e-02, -8.983e-02, -1.955e-02, -5.164e-02, -4.356e-02, -1.102e-01, -2.729e-02, 4.254e-02, -8.942e-02, -1.909e-01)); + r += mul(s0_6, M4(8.982e-02, 1.226e-01, 1.211e-02, -3.601e-02, 1.469e+00, -6.215e-01, 1.481e+00, -8.902e-01, -6.957e-02, -3.144e-02, -3.928e-02, -1.012e-02, -2.337e-02, -6.282e-02, -1.052e-02, -1.148e-01)); + r += mul(s0_7, M4(-6.852e-03, 1.922e-01, -1.358e-01, 1.629e-01, 5.828e-02, 1.423e-01, 4.401e-01, 1.005e-02, -7.037e-02, 7.975e-02, -1.301e-01, 3.026e-02, 6.336e-02, 6.242e-02, 7.869e-02, 1.673e-01)); + r += mul(s0_8, M4(-2.037e-02, -1.039e-01, 3.734e-02, -1.426e-02, -1.313e-01, -2.785e-01, -3.737e-02, 1.120e-01, -5.427e-02, 1.937e-01, 5.597e-02, -1.602e-01, -5.150e-02, 2.788e-02, -5.521e-02, -4.702e-03)); + r += mul(s1_0, M4(-6.905e-03, -1.235e-01, -2.206e-02, -1.223e-01, -3.602e-02, 1.295e-01, -2.288e-02, 4.790e-02, 3.671e-02, 2.010e-01, 1.423e-03, -5.066e-02, 6.901e-02, 2.673e-02, 1.370e-03, 8.036e-02)); + r += mul(s1_1, M4(7.908e-02, -1.020e-01, -4.662e-02, -1.245e-01, 3.420e-02, -6.349e-02, -6.105e-03, 6.072e-02, 1.535e-01, -3.444e-02, -2.064e-02, 5.312e-02, 2.549e-02, -8.848e-02, 1.074e-02, -1.463e-01)); + r += mul(s1_2, M4(-5.909e-02, -5.896e-02, -5.014e-02, -1.310e-01, -1.054e-02, 2.578e-02, 5.319e-03, -3.809e-02, -1.382e-02, 2.672e-02, -6.116e-02, 3.984e-02, 8.119e-02, 7.160e-02, -1.933e-02, 1.464e-01)); + r += mul(s1_3, M4(-6.907e-02, -1.581e-01, 7.727e-02, 5.224e-02, -2.186e-02, 1.105e-01, 1.388e-02, 8.138e-02, 5.225e-02, -7.229e-02, 7.312e-02, -2.665e-02, 2.633e-02, -1.984e-02, 3.467e-03, 1.131e-01)); + r += mul(s1_4, M4(2.284e-02, -7.500e-02, -2.100e-01, 4.631e-02, 8.797e-02, -1.436e-01, 1.444e-02, -9.599e-02, -4.338e-02, -1.270e-01, -1.653e-01, -7.205e-02, -5.843e-03, -5.963e-02, -4.226e-02, 1.703e-01)); + r += mul(s1_5, M4(1.025e-02, 1.461e-01, -1.463e-01, 2.240e-01, -8.927e-03, 1.690e-01, 3.487e-02, -2.054e-02, -1.247e-01, -5.465e-02, 7.278e-02, 5.314e-02, 3.617e-02, 6.145e-03, -6.355e-02, 1.058e-01)); + r += mul(s1_6, M4(-1.548e-01, -5.650e-02, -1.887e-02, -7.951e-02, -5.997e-02, 1.256e-01, -9.311e-03, -4.018e-02, -5.056e-02, 2.421e-02, -7.796e-02, 4.530e-02, 7.250e-03, -3.022e-02, -3.022e-04, -9.500e-02)); + r += mul(s1_7, M4(-1.235e-01, -2.393e-01, 1.156e-01, 6.419e-02, 4.952e-03, -8.780e-02, 1.671e-01, 6.066e-02, 5.458e-02, 1.372e-01, 2.770e-02, 4.037e-02, -2.154e-02, -3.429e-02, 1.164e-01, -1.703e-01)); + r += mul(s1_8, M4(-2.125e-02, -2.629e-01, -4.850e-02, -1.570e-01, -2.562e-02, -8.194e-02, 7.728e-02, -6.326e-02, 6.335e-02, 1.607e-02, 1.454e-02, -3.578e-03, 6.082e-02, -5.576e-02, -7.196e-03, 2.837e-02)); + r += mul(s2_0, M4(-1.354e-01, 6.717e-02, 9.011e-02, 3.049e-02, 7.582e-02, -3.819e-02, 5.722e-02, -6.210e-02, -3.614e-02, 1.732e-01, 2.581e-02, -1.227e-01, -9.396e-02, -3.042e-02, -5.997e-02, 7.691e-02)); + r += mul(s2_1, M4(1.229e-01, 1.254e-01, -1.884e-02, -1.712e-01, 1.013e-01, -5.457e-02, 1.000e-01, 8.942e-02, 1.469e-01, 1.948e-01, 1.774e-01, 2.932e-02, -7.101e-02, -9.929e-02, 8.720e-02, 6.885e-02)); + r += mul(s2_2, M4(-6.101e-02, -2.211e-02, 4.748e-02, -3.044e-02, -6.137e-02, 1.617e-01, 5.654e-02, 3.131e-01, 2.435e-02, -2.560e-02, -2.790e-02, 2.966e-01, -1.586e-01, 4.883e-02, -8.577e-02, 3.480e-02)); + r += mul(s2_3, M4(1.129e-01, -3.321e-02, -8.915e-02, 2.356e-01, -1.635e-02, -3.314e-02, 4.316e-02, -6.194e-02, 1.002e-01, -8.583e-02, 8.522e-02, -1.088e-01, 7.135e-02, -7.710e-02, -9.304e-02, -1.736e-01)); + r += mul(s2_4, M4(4.598e-02, -1.520e-01, -3.031e-02, -1.877e-01, 2.440e-01, 7.995e-02, 9.139e-02, -1.998e-01, 1.518e-01, 8.131e-03, 1.610e-02, -5.858e-02, -1.043e-01, -8.885e-02, 2.743e-02, 5.627e-02)); + r += mul(s2_5, M4(7.694e-02, -1.287e-01, 5.624e-03, 1.564e-01, -7.886e-02, -3.133e-01, -1.258e-01, -6.350e-03, 6.086e-02, 2.019e-02, 1.615e-01, -6.151e-02, 8.397e-03, 1.362e-02, 6.039e-02, -5.354e-02)); + r += mul(s2_6, M4(-1.030e-02, 1.151e-02, -1.782e-02, -7.826e-02, -1.055e-01, -1.306e-01, 5.878e-02, 1.993e-01, -4.366e-01, 5.715e-02, 7.987e-03, 7.973e-02, 7.199e-02, 1.254e-01, 2.116e-02, -1.284e-01)); + r += mul(s2_7, M4(-5.671e-02, -7.490e-02, -3.972e-02, 9.643e-02, -1.404e-01, -7.338e-02, -1.980e-01, -2.044e-01, -3.438e-01, 1.263e-01, -5.936e-02, 1.683e-01, 1.006e-01, -2.836e-02, -5.692e-03, 1.708e-01)); + r += mul(s2_8, M4(8.514e-02, 6.994e-02, -1.013e-01, 1.542e-03, -7.350e-02, -1.482e-01, 3.252e-03, -4.503e-02, -1.242e-01, 9.806e-02, -1.469e-01, -2.331e-01, -4.649e-02, -1.426e-02, -2.206e-02, 2.883e-02)); + r += mul(s3_0, M4(-3.571e-02, -4.402e-02, -2.314e-02, -6.688e-02, 9.486e-03, -3.747e-02, -1.563e-02, 3.119e-02, 4.435e-02, 9.836e-02, -6.820e-02, 2.743e-02, -9.986e-02, 1.237e-01, -7.874e-02, -1.298e-01)); + r += mul(s3_1, M4(1.066e-01, 1.047e-01, 9.566e-02, -1.392e-02, 5.801e-02, -1.048e-01, 8.749e-02, -3.110e-02, 4.480e-02, -1.415e-01, 3.925e-02, 5.377e-03, -5.845e-02, 3.466e-02, -3.553e-02, 1.092e-01)); + r += mul(s3_2, M4(-1.030e-03, 1.358e-01, 4.032e-02, -2.549e-01, -7.238e-02, 7.306e-03, -1.746e-02, -7.322e-02, 9.659e-02, 6.262e-02, -3.096e-02, 1.859e-01, -6.342e-02, 4.422e-02, -6.971e-03, -8.132e-02)); + r += mul(s3_3, M4(-1.036e-02, -4.418e-02, 8.162e-03, -1.495e-02, -1.440e-02, 3.705e-03, -7.110e-02, 1.073e-01, 2.864e-02, -1.342e-01, 1.404e-02, -1.110e-01, 2.607e-01, 6.219e-03, -6.216e-03, -6.741e-02)); + r += mul(s3_4, M4(-1.665e-01, -1.968e-01, 1.526e-01, 1.592e-01, -4.854e-02, 7.407e-02, 3.999e-02, -1.254e-01, -7.611e-02, -1.210e-01, 4.380e-02, -1.194e-01, 1.725e-01, 4.165e-02, 7.649e-02, -2.743e-02)); + r += mul(s3_5, M4(7.561e-02, -1.677e-01, 7.481e-02, 1.806e-01, -5.974e-02, -1.384e-01, -4.500e-02, -5.669e-02, -1.689e-01, -1.343e-01, 1.064e-01, -1.950e-01, -1.293e-02, 9.747e-02, -3.915e-02, -1.126e-01)); + r += mul(s3_6, M4(-4.416e-02, -2.337e-03, -2.228e-02, -1.596e-01, 4.264e-02, 9.522e-02, -2.105e-02, 1.255e-01, 2.971e-04, 1.379e-01, 1.845e-02, -6.566e-02, 4.997e-02, -3.263e-02, 9.562e-02, 6.351e-02)); + r += mul(s3_7, M4(-8.212e-03, 1.173e-01, 1.054e-01, 7.538e-02, -1.105e-01, 8.184e-02, -4.330e-02, 5.937e-02, -1.263e-01, 2.382e-01, -4.970e-02, 7.818e-02, 6.604e-02, -9.197e-02, 4.356e-02, 6.887e-02)); + r += mul(s3_8, M4(5.619e-02, 9.161e-02, 1.717e-01, -5.996e-02, -1.034e-02, -4.474e-02, -3.005e-02, 1.492e-01, 9.353e-02, -4.681e-02, -1.008e-01, -1.182e-01, 2.204e-04, 1.649e-02, 5.204e-02, -3.632e-02)); + r += mul(s4_0, M4(1.618e-01, 1.440e-01, 2.946e-03, 1.621e-02, 5.865e-02, -4.953e-02, -1.768e-03, -2.534e-02, 1.805e-01, -1.013e-01, 5.760e-02, -1.804e-01, -2.309e-02, -5.472e-03, -4.686e-02, 1.970e-03)); + r += mul(s4_1, M4(2.577e-01, 2.609e-01, 2.364e-03, 8.060e-02, 1.305e-03, 7.613e-02, -1.744e-01, 1.190e-01, -2.858e-02, -2.411e-02, -2.228e-03, 4.932e-02, 1.226e-02, 1.388e-01, -9.890e-02, -4.603e-03)); + r += mul(s4_2, M4(3.535e-02, -9.065e-02, -7.221e-02, 1.631e-02, 5.460e-02, -6.296e-02, 1.561e-01, -3.317e-02, -3.537e-02, -8.641e-02, 3.827e-02, 3.396e-02, -3.916e-02, 1.825e-02, 7.734e-02, -5.479e-02)); + r += mul(s4_3, M4(8.959e-02, -1.148e-01, 2.783e-02, -7.273e-02, 1.180e-01, -2.340e-02, -3.300e-02, -1.382e-01, -2.296e-01, 1.344e-01, -1.764e-01, 3.269e-02, 4.208e-02, -1.100e-01, 4.651e-02, -2.444e-01)); + r += mul(s4_4, M4(5.090e-02, -1.270e-01, 3.455e-02, -1.171e-01, 2.836e-01, 3.501e-03, -2.007e-01, 1.595e-03, -2.573e-03, 3.730e-02, 5.707e-02, -1.163e-01, 5.308e-03, -1.788e-02, 1.323e-01, -7.576e-03)); + r += mul(s4_5, M4(4.045e-02, 8.176e-02, -7.298e-04, -1.575e-01, 1.848e-01, -8.641e-02, -6.485e-03, -7.190e-02, 3.545e-02, -6.044e-02, -1.548e-02, -1.619e-01, -2.513e-01, -1.906e-01, 1.054e-01, 1.993e-01)); + r += mul(s4_6, M4(-2.613e-02, 5.697e-02, 5.588e-02, -1.724e-02, -3.324e-02, 6.878e-02, 4.254e-02, 1.559e-01, 1.419e-01, -1.105e-01, -8.374e-02, 1.909e-01, -5.703e-02, 2.265e-02, -1.545e-02, -1.220e-01)); + r += mul(s4_7, M4(-1.032e-01, 1.042e-01, 7.131e-02, 9.728e-03, 1.273e-01, -1.841e-01, 4.237e-02, 9.465e-03, 2.121e-02, 1.010e-01, -1.246e-01, 8.562e-02, 4.298e-02, -1.094e-01, 6.559e-05, -2.083e-02)); + r += mul(s4_8, M4(-4.223e-02, 2.901e-01, 1.016e-02, 4.068e-02, -2.434e-02, 7.889e-02, 5.558e-02, -1.286e-01, -5.586e-02, 1.887e-01, 7.568e-02, -1.709e-01, 3.264e-03, 1.424e-01, 2.017e-01, 5.328e-02)); + r += mul(s5_0, M4(2.289e-02, -1.985e-02, 1.911e-03, -1.736e-03, 4.676e-03, -6.639e-02, 4.538e-02, -8.575e-02, 4.059e-02, -6.372e-02, -2.257e-02, -3.280e-02, 2.993e-02, -3.446e-02, -2.250e-02, -3.210e-02)); + r += mul(s5_1, M4(6.721e-02, 6.856e-02, 8.555e-03, -6.471e-02, -1.120e-01, -1.849e-02, 6.424e-03, 1.815e-01, -7.078e-02, 8.081e-03, -5.353e-02, -4.131e-02, -9.650e-02, 1.586e-01, -7.327e-02, 1.632e-01)); + r += mul(s5_2, M4(-4.732e-02, -1.429e-01, -2.673e-02, 1.000e-01, 6.563e-02, 1.312e-02, -2.896e-03, 5.481e-03, 7.972e-03, 3.480e-02, 4.318e-02, 2.591e-02, -7.582e-02, -1.581e-02, -7.921e-04, -1.373e-01)); + r += mul(s5_3, M4(2.248e-03, -2.054e-02, 6.049e-02, 5.953e-02, 1.831e-01, 2.980e-02, 8.772e-02, -1.613e-01, 6.612e-02, -9.887e-02, 3.382e-03, 1.342e-01, -8.885e-02, 8.843e-03, -1.922e-02, -1.048e-02)); + r += mul(s5_4, M4(2.009e-01, 4.434e-02, 1.210e-02, 1.625e-02, 1.055e-01, 6.703e-02, -7.031e-02, 8.557e-02, -1.515e-02, -8.043e-02, -8.440e-02, 1.456e-01, -5.386e-02, -1.905e-02, 6.964e-02, 8.901e-05)); + r += mul(s5_5, M4(1.971e-02, 1.069e-02, -5.078e-02, -2.352e-02, 1.143e-01, -1.166e-01, -6.763e-02, 8.697e-02, -6.270e-02, 9.384e-02, -1.432e-03, -5.115e-02, -6.805e-02, -1.019e-01, -2.959e-02, 6.550e-03)); + r += mul(s5_6, M4(-2.476e-02, -3.203e-02, 1.215e-01, 1.066e-01, 3.076e-02, 1.604e-01, 1.136e-02, -1.005e-01, 1.175e-02, -9.072e-02, 3.472e-02, -1.048e-01, 3.720e-02, -4.246e-02, 2.792e-02, 5.211e-02)); + r += mul(s5_7, M4(-9.140e-02, -3.781e-02, 1.193e-01, -5.316e-02, -4.498e-02, 1.195e-02, 2.241e-02, -2.840e-02, 6.979e-02, -6.277e-02, 5.606e-02, 1.864e-02, -7.186e-02, 9.823e-02, 1.157e-01, -1.972e-02)); + r += mul(s5_8, M4(-3.606e-02, 3.154e-01, 7.019e-02, 8.492e-02, 7.642e-03, -9.079e-02, 2.235e-02, -3.428e-02, 1.301e-03, -1.673e-02, 7.810e-02, 2.383e-02, 2.591e-02, 1.089e-01, 5.137e-02, 1.102e-01)); + r += mul(s6_0, M4(4.447e-02, -6.679e-02, 1.368e-02, 2.215e-01, 3.629e-02, -7.108e-02, -7.062e-02, -1.028e-01, 1.437e-01, -1.505e-02, 1.080e-01, 7.552e-02, -1.821e-02, -8.658e-02, 6.763e-02, -6.099e-02)); + r += mul(s6_1, M4(3.100e-01, 8.374e-02, 2.073e-03, -3.276e-01, -3.915e-02, -4.229e-02, -4.466e-02, 8.721e-02, 2.250e-01, -1.480e-01, 1.668e-01, 3.566e-02, -1.490e-01, 1.378e-01, 1.024e-01, -8.984e-02)); + r += mul(s6_2, M4(1.756e-01, -7.324e-02, -7.008e-02, -1.154e-01, 4.844e-02, 9.920e-02, -9.209e-03, 2.015e-02, 6.384e-02, -2.329e-01, -1.049e-02, -1.012e-01, -3.391e-02, -7.065e-02, 1.926e-02, -2.107e-01)); + r += mul(s6_3, M4(4.980e-02, 5.197e-03, -1.297e-01, 2.779e-01, -5.990e-02, -1.192e-01, -1.931e-02, -1.196e-01, 1.153e-02, 3.145e-02, 1.005e-01, 4.722e-02, 4.867e-02, -5.040e-02, 9.400e-02, -7.509e-03)); + r += mul(s6_4, M4(2.063e-01, 4.555e-01, -8.566e-02, 1.910e-01, 1.244e-01, 1.213e-01, 9.179e-02, 3.046e-01, 8.209e-02, 1.313e-01, -4.185e-02, -5.557e-02, -7.440e-02, -3.492e-02, 1.808e-02, 8.745e-03)); + r += mul(s6_5, M4(-6.659e-02, 2.696e-01, 6.018e-02, -1.151e-01, 1.323e-01, -1.084e-01, -4.879e-02, 1.189e-01, -1.340e-02, -1.495e-02, -6.454e-02, -9.728e-02, 1.110e-01, -8.076e-02, 1.528e-01, 6.004e-02)); + r += mul(s6_6, M4(1.959e-01, 1.357e-01, 1.386e-01, -1.108e-01, -2.578e-02, 7.557e-03, -8.948e-02, 1.025e-01, 6.418e-02, -9.741e-02, 7.775e-02, 6.005e-02, 5.412e-02, -3.505e-03, -3.937e-02, 3.171e-02)); + r += mul(s6_7, M4(1.635e-01, 5.757e-02, 9.609e-02, -4.970e-02, -8.507e-02, -1.574e-01, 2.353e-02, -3.626e-01, 3.217e-02, -3.547e-02, 3.992e-02, 1.444e-01, -1.525e-01, -9.004e-02, -5.342e-02, 8.247e-02)); + r += mul(s6_8, M4(7.167e-02, 8.103e-02, 1.406e-01, -9.421e-02, -2.194e-02, -1.048e-03, 7.390e-02, -9.496e-02, -4.531e-02, -1.193e-02, -4.896e-02, 8.149e-02, -4.743e-02, 1.126e-01, -8.515e-02, -6.805e-02)); + r += mul(s7_0, M4(-1.121e-01, -1.278e-02, -8.863e-02, 2.300e-02, -2.201e-02, -1.399e-01, -1.536e-02, -5.916e-02, -2.724e-03, 9.229e-02, 7.675e-02, 3.021e-02, -3.299e-02, 5.690e-02, -1.076e-01, 2.151e-02)); + r += mul(s7_1, M4(-2.834e-02, -3.503e-04, -1.616e-02, 1.354e-01, 6.473e-02, -2.932e-02, 8.013e-03, -6.101e-02, 4.714e-02, 2.595e-02, 1.401e-01, 6.043e-02, -3.675e-01, -5.563e-02, -1.248e-01, -1.177e-01)); + r += mul(s7_2, M4(-6.996e-04, -6.545e-02, -1.193e-02, -8.868e-03, 3.028e-02, 7.079e-02, 1.105e-02, -6.737e-02, -3.216e-04, -1.410e-01, 5.733e-02, -8.100e-02, 2.609e-02, -8.698e-02, 1.476e-03, -3.165e-03)); + r += mul(s7_3, M4(7.213e-03, 1.243e-02, -5.497e-02, -4.193e-02, 5.657e-03, -2.381e-02, -8.174e-02, -5.500e-02, 1.466e-02, 1.246e-01, 9.604e-03, 1.296e-01, -1.523e-01, 1.194e-01, -4.447e-02, -1.892e-01)); + r += mul(s7_4, M4(-4.059e-02, -2.957e-02, 2.509e-02, 1.123e-01, 3.719e-02, 2.019e-01, 1.411e-02, 5.486e-02, 6.567e-02, 1.341e-01, -1.903e-02, -1.988e-02, -1.076e-01, -1.181e-01, -3.153e-02, 4.874e-02)); + r += mul(s7_5, M4(2.475e-04, -1.004e-01, -5.032e-02, 1.755e-01, 2.327e-02, -4.958e-02, 1.686e-02, 1.904e-01, -7.471e-02, -7.154e-02, -7.071e-02, -8.508e-02, 1.441e-01, -9.300e-02, 2.251e-01, -8.240e-02)); + r += mul(s7_6, M4(6.208e-02, -2.324e-02, 6.566e-02, -2.202e-01, 3.150e-02, -1.214e-02, -2.382e-02, 1.375e-01, 4.706e-02, 8.534e-02, 1.390e-01, -1.694e-01, 2.515e-01, -1.183e-01, -1.791e-01, -2.025e-01)); + r += mul(s7_7, M4(-1.438e-03, -1.104e-03, 7.943e-02, -6.275e-02, -2.148e-02, -1.899e-01, 1.292e-01, -1.585e-01, -1.296e-02, 6.378e-02, 4.362e-02, -4.874e-02, 1.644e-01, -5.298e-02, -1.193e-01, 1.313e-01)); + r += mul(s7_8, M4(-1.098e-01, -6.661e-02, 1.479e-01, 7.224e-02, 6.840e-02, 5.659e-02, 2.417e-01, -6.040e-02, -1.625e-01, 7.338e-02, 7.262e-02, -4.029e-03, -3.036e-02, 1.887e-01, -4.878e-02, -1.562e-01)); + r += V4(-2.014e-02, 4.866e-03, 2.592e-02, 7.493e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.008e-01, -3.916e-02, 1.010e-01, 6.845e-02, 3.399e-01, -1.606e-01, -5.091e-02, 1.703e-02, -4.789e-02, -9.451e-03, 4.035e-02, 9.350e-02, 2.814e-02, -6.340e-02, -2.785e-02, 1.318e-01)); + r += mul(s0_1, M4(3.055e-02, -4.238e-02, 7.044e-02, -2.653e-02, -3.053e-01, -8.548e-02, 8.775e-02, 6.196e-02, -1.515e-03, -1.879e-01, 2.451e-02, -8.076e-02, 2.415e-01, 4.137e-02, 6.065e-02, -5.155e-02)); + r += mul(s0_2, M4(-5.146e-02, 3.458e-02, -4.560e-02, 3.390e-02, -1.537e-01, 7.741e-02, -6.436e-02, -1.263e-02, -7.760e-02, 5.328e-02, -1.590e-02, 1.418e-01, 8.197e-02, -6.740e-02, 9.695e-02, 8.204e-02)); + r += mul(s0_3, M4(-6.662e-02, 1.059e-01, 8.080e-04, -5.476e-02, 1.777e-01, -3.447e-01, -4.629e-01, -1.898e-01, 1.417e-01, 1.464e-01, -3.460e-03, 6.394e-03, 1.006e-01, -1.495e-01, -3.108e-02, -1.354e-01)); + r += mul(s0_4, M4(2.424e-02, 1.511e-02, -4.347e-02, -1.348e-02, -3.829e-01, 4.027e-01, -6.761e-01, 1.932e-01, -2.802e-02, -6.379e-02, -1.569e-03, -9.520e-02, 2.448e-01, -3.034e-01, -4.726e-02, -9.743e-02)); + r += mul(s0_5, M4(9.581e-02, -1.057e-01, -2.580e-02, 2.276e-02, -1.719e-01, -5.643e-02, 5.240e-02, 1.784e-02, -8.405e-03, -1.637e-01, 1.738e-01, 3.104e-02, 6.445e-02, -1.124e-01, -3.481e-02, -6.256e-02)); + r += mul(s0_6, M4(2.497e-02, 1.180e-02, 4.137e-02, -5.304e-02, -6.517e-01, 9.059e-02, -1.180e+00, -5.478e-01, -2.872e-02, 1.207e-02, 8.378e-03, -6.560e-02, -2.827e-02, 5.121e-02, 6.275e-02, 6.468e-02)); + r += mul(s0_7, M4(2.286e-02, 9.867e-02, 2.494e-02, 8.407e-02, 6.351e-03, 2.930e-02, 2.861e-01, 1.563e-01, -6.029e-02, 9.740e-02, 5.489e-02, -7.501e-02, 5.178e-02, -1.199e-01, -1.896e-02, 1.576e-01)); + r += mul(s0_8, M4(-2.593e-02, 7.859e-02, 7.854e-02, 1.255e-01, -4.862e-02, -1.027e-01, 8.092e-02, 7.862e-02, 7.277e-03, -3.922e-02, 3.413e-02, -4.888e-02, 9.482e-03, 6.531e-02, 6.533e-02, -1.082e-02)); + r += mul(s1_0, M4(-1.092e-01, -1.309e-01, 2.092e-01, 1.873e-01, -1.052e-02, -3.344e-02, -6.844e-02, -6.866e-02, -2.449e-02, -2.638e-02, 3.731e-02, -2.757e-02, -5.234e-02, 6.412e-03, 8.183e-03, 4.165e-03)); + r += mul(s1_1, M4(-1.361e-01, -8.128e-02, 9.945e-02, 2.035e-01, -1.116e-01, -4.339e-02, 3.222e-02, -4.847e-02, 1.682e-02, 5.067e-02, 7.394e-02, 1.533e-02, 4.123e-02, 6.091e-02, -6.075e-03, 4.583e-02)); + r += mul(s1_2, M4(-7.005e-03, -4.706e-02, -1.425e-01, 3.193e-02, 2.992e-02, 2.992e-02, -5.016e-02, -8.966e-02, -4.400e-02, -6.918e-03, -5.816e-02, -8.850e-02, 1.587e-02, -3.190e-02, 3.408e-02, -9.859e-02)); + r += mul(s1_3, M4(-1.231e-01, 4.057e-02, 1.879e-01, -6.859e-02, -1.013e-01, 9.888e-02, -3.559e-02, -7.970e-02, 6.961e-02, 5.653e-02, -7.000e-02, 6.589e-02, -1.478e-02, -1.277e-01, 2.843e-02, -5.412e-02)); + r += mul(s1_4, M4(-4.678e-02, -5.131e-02, -3.270e-03, 1.746e-01, 1.063e-01, 4.275e-03, -2.326e-02, -7.831e-02, -9.495e-02, -4.468e-02, -9.005e-02, 1.607e-01, 1.817e-01, -1.634e-01, 1.678e-03, -1.685e-01)); + r += mul(s1_5, M4(-1.658e-02, 5.195e-02, -1.156e-01, -1.038e-01, -3.957e-02, -1.587e-02, -5.761e-02, -6.746e-02, 1.075e-02, 5.486e-02, 5.067e-02, -7.913e-02, 2.812e-02, 1.681e-01, -3.576e-02, -2.748e-01)); + r += mul(s1_6, M4(-2.988e-02, -5.385e-03, 2.411e-01, -2.254e-01, -6.225e-02, -4.964e-02, -2.798e-02, 4.692e-02, -3.240e-02, -5.173e-02, 7.077e-02, -1.813e-01, -2.905e-02, 8.668e-02, 4.005e-02, -7.292e-02)); + r += mul(s1_7, M4(-2.912e-02, 9.705e-02, 1.795e-01, -2.074e-02, -1.717e-02, -1.351e-01, 1.336e-02, 6.337e-02, -5.253e-02, 8.671e-02, 2.364e-02, -7.599e-02, 1.080e-02, 7.499e-02, -6.414e-02, -4.584e-02)); + r += mul(s1_8, M4(-1.087e-01, 5.665e-02, -1.078e-01, 2.889e-02, 4.027e-02, -4.402e-02, 5.508e-02, 6.452e-02, -3.496e-02, -7.027e-02, -2.180e-02, -1.293e-01, 5.026e-02, -5.078e-04, 1.732e-02, -1.501e-01)); + r += mul(s2_0, M4(-3.671e-02, 7.856e-03, -2.443e-02, -1.531e-02, -5.352e-02, 6.252e-02, -2.774e-02, 5.152e-03, 2.753e-02, -9.750e-02, -9.400e-02, 2.185e-02, -7.625e-03, 4.389e-02, 7.790e-03, 1.192e-02)); + r += mul(s2_1, M4(-2.192e-01, 3.237e-02, -8.307e-02, 1.826e-01, 1.406e-02, -4.539e-02, 1.026e-01, 1.231e-01, 6.083e-02, 1.084e-01, 1.293e-02, -1.103e-01, 4.078e-02, -7.499e-02, 1.408e-01, -9.862e-02)); + r += mul(s2_2, M4(-1.668e-02, -8.959e-02, -2.557e-03, 4.462e-03, 1.400e-01, 1.951e-01, -7.688e-02, 1.030e-01, 3.071e-03, 3.503e-03, 5.728e-03, -3.091e-01, -4.406e-02, 2.517e-02, -1.225e-01, -1.108e-01)); + r += mul(s2_3, M4(-5.356e-02, 1.346e-01, 1.545e-01, -4.702e-02, 4.145e-02, -1.388e-01, -5.938e-02, 1.375e-01, 1.042e-01, 1.030e-01, -2.785e-02, -3.777e-02, -1.161e-01, -1.252e-01, 7.686e-02, 1.854e-01)); + r += mul(s2_4, M4(8.108e-02, 1.545e-01, -9.479e-02, -9.649e-02, -2.991e-02, 1.379e-01, -1.532e-01, -7.997e-02, 1.364e-01, 1.927e-01, -1.008e-01, -1.324e-01, -9.811e-03, -1.180e-01, 2.127e-02, -2.427e-02)); + r += mul(s2_5, M4(1.284e-02, 6.614e-02, -2.443e-02, 1.896e-02, -3.812e-02, 1.838e-01, -4.791e-02, -3.341e-02, 5.916e-02, 1.480e-01, -5.429e-02, -3.131e-02, -1.818e-02, 3.326e-02, 3.054e-02, 9.534e-02)); + r += mul(s2_6, M4(-3.783e-02, -3.659e-02, 1.287e-02, -2.306e-02, -8.206e-02, -5.854e-02, -1.082e-01, 1.305e-01, -2.610e-02, -1.637e-01, 1.781e-01, 1.751e-01, 8.290e-02, 1.125e-01, -2.836e-02, 2.926e-02)); + r += mul(s2_7, M4(6.580e-03, 5.339e-02, -8.924e-02, 6.558e-02, -1.142e-01, 6.927e-02, -5.860e-02, 2.088e-02, 1.394e-01, 8.620e-02, 5.780e-02, -1.332e-01, -2.791e-02, 6.050e-02, -2.671e-02, 4.709e-02)); + r += mul(s2_8, M4(5.441e-03, 5.049e-02, -4.891e-02, -4.387e-02, -9.769e-03, -3.104e-02, 4.246e-02, -3.497e-01, 1.708e-01, -2.097e-01, -1.046e-02, -6.400e-04, -7.334e-02, 2.186e-02, -1.378e-02, 4.845e-02)); + r += mul(s3_0, M4(2.658e-01, 5.756e-02, 8.229e-04, 5.429e-02, 9.246e-02, -3.045e-03, -3.316e-02, -1.508e-02, -9.681e-03, -2.741e-02, -3.148e-02, 5.369e-02, -8.267e-02, -3.702e-02, 3.685e-02, 3.233e-02)); + r += mul(s3_1, M4(-2.783e-01, -6.481e-02, -1.671e-01, 6.387e-02, 1.322e-01, 6.065e-02, 1.008e-01, 7.013e-02, 1.295e-01, 9.118e-02, 6.833e-02, 2.235e-01, -6.723e-02, 1.381e-03, -6.050e-02, -7.017e-02)); + r += mul(s3_2, M4(1.354e-02, 9.146e-03, -5.852e-02, -1.990e-01, -4.583e-02, -5.566e-02, -3.195e-02, 6.211e-03, 2.113e-02, 1.178e-02, -2.842e-02, -6.357e-02, -1.509e-01, 2.841e-02, -9.607e-02, 4.578e-02)); + r += mul(s3_3, M4(1.028e-01, -3.498e-02, 1.358e-01, -4.864e-02, 1.267e-01, 7.651e-02, -6.803e-02, -3.683e-02, -3.046e-02, -3.911e-04, -8.320e-02, -1.267e-02, -9.810e-02, -5.653e-02, 2.510e-01, 7.365e-02)); + r += mul(s3_4, M4(9.615e-02, -6.909e-02, 2.706e-03, 8.430e-02, 6.544e-02, -8.425e-02, -1.024e-01, 4.985e-02, 4.863e-02, 6.675e-02, 1.092e-01, 5.400e-03, -1.514e-02, 2.700e-01, -1.184e-01, -1.450e-01)); + r += mul(s3_5, M4(5.401e-02, -1.141e-01, 1.341e-01, 6.636e-02, 2.054e-02, 9.014e-03, -1.037e-02, -9.457e-03, -9.401e-02, -1.564e-02, 3.566e-02, 8.868e-02, -6.657e-04, 5.736e-02, -2.355e-02, 2.052e-02)); + r += mul(s3_6, M4(-3.002e-02, -3.003e-02, -4.129e-02, 1.064e-01, 8.557e-02, 6.851e-02, -1.117e-01, 2.930e-02, -2.581e-02, 1.051e-01, -8.577e-02, -6.659e-02, -2.273e-02, -1.715e-01, -1.470e-02, -7.442e-03)); + r += mul(s3_7, M4(1.125e-01, 1.331e-02, -1.246e-02, 5.825e-02, 1.756e-02, -8.701e-02, 6.392e-02, 1.824e-02, 8.065e-02, 7.785e-02, 2.374e-02, -1.394e-01, -2.717e-02, 1.178e-01, 9.271e-02, -1.530e-01)); + r += mul(s3_8, M4(1.323e-02, -7.379e-02, 2.424e-02, -1.389e-01, -5.386e-02, -3.862e-02, -4.150e-02, 2.732e-02, 6.020e-02, -1.514e-01, 1.884e-02, -5.014e-02, -2.019e-02, 1.374e-01, -1.084e-01, 6.373e-02)); + r += mul(s4_0, M4(-8.808e-02, 3.429e-02, 1.013e-01, -3.341e-01, -7.959e-02, 1.194e-01, -5.354e-02, 5.882e-02, -5.993e-02, -2.896e-02, 8.839e-02, -1.031e-01, -4.339e-02, 7.098e-02, -2.804e-02, 2.744e-02)); + r += mul(s4_1, M4(9.648e-04, 2.408e-01, 2.017e-02, 3.314e-02, -2.142e-03, -1.039e-02, 1.387e-02, -1.590e-02, -3.084e-02, 7.650e-02, -4.970e-02, -1.405e-02, -4.142e-02, -2.851e-01, -6.241e-02, 3.391e-03)); + r += mul(s4_2, M4(-8.964e-02, 2.112e-02, -3.446e-02, 6.552e-03, 7.698e-02, -6.241e-02, 9.886e-02, 5.670e-02, 8.158e-02, -9.800e-02, 1.245e-01, 2.136e-01, 2.148e-02, -1.728e-03, -1.094e-01, 2.660e-01)); + r += mul(s4_3, M4(-3.053e-02, -2.172e-01, 1.221e-01, 9.487e-02, 2.165e-02, -8.438e-02, 1.171e-02, 1.298e-03, 7.446e-02, 1.966e-01, -2.453e-01, 1.166e-01, 1.633e-02, -2.862e-03, 6.918e-02, 2.827e-02)); + r += mul(s4_4, M4(-1.083e-01, -2.386e-02, 1.212e-01, 1.950e-01, 1.193e-01, 1.277e-01, 3.869e-02, -1.443e-03, 5.198e-02, 7.316e-02, -1.790e-01, 8.578e-02, 5.957e-03, 1.503e-01, 2.082e-01, 2.256e-01)); + r += mul(s4_5, M4(-1.064e-01, 1.372e-01, -1.877e-02, -1.348e-02, -5.566e-02, -9.086e-02, 2.651e-02, 6.765e-02, -6.230e-02, -1.736e-01, 3.186e-02, -1.308e-02, 5.899e-02, 1.227e-01, 2.968e-02, 3.636e-02)); + r += mul(s4_6, M4(-6.961e-03, -1.043e-01, 8.080e-02, -6.653e-02, -1.519e-02, -3.350e-02, -4.550e-02, -5.541e-02, 5.834e-02, -1.444e-01, -2.354e-03, -4.099e-03, 2.516e-02, 1.928e-01, 1.690e-01, -8.711e-02)); + r += mul(s4_7, M4(-1.251e-01, 7.012e-02, 6.696e-02, -4.789e-02, 5.799e-02, 1.241e-01, 1.203e-03, 3.225e-02, -6.642e-02, 1.606e-01, 2.856e-02, 7.654e-02, 6.001e-02, -1.007e-01, 6.379e-02, 6.739e-02)); + r += mul(s4_8, M4(7.172e-02, 1.117e-01, -8.373e-02, 5.826e-02, 8.146e-03, 9.333e-02, -8.752e-02, -1.107e-01, 5.070e-02, -8.970e-02, 2.036e-02, -8.545e-02, -1.303e-02, -6.134e-02, 1.345e-01, -2.092e-02)); + r += mul(s5_0, M4(-3.420e-02, -4.286e-02, 5.820e-02, -4.666e-02, -1.387e-02, -9.200e-03, 8.678e-02, 7.305e-02, -1.391e-02, -1.391e-01, -2.701e-03, -2.183e-02, 2.512e-02, 8.134e-02, 1.446e-02, -6.738e-02)); + r += mul(s5_1, M4(2.315e-02, 1.556e-02, 9.546e-02, -8.261e-02, -4.704e-02, -1.952e-02, 9.274e-03, 8.298e-02, -2.296e-02, 1.468e-01, 9.009e-02, 9.296e-02, -4.212e-02, -3.072e-03, 2.659e-02, -9.766e-02)); + r += mul(s5_2, M4(-3.174e-02, 7.869e-02, 6.973e-02, 9.292e-02, 4.679e-02, -7.720e-02, -6.137e-02, -3.849e-02, 3.740e-02, -2.823e-02, 2.053e-02, -1.582e-02, -2.069e-02, -4.009e-02, 1.819e-02, 5.816e-02)); + r += mul(s5_3, M4(-2.408e-02, -1.402e-02, 8.030e-02, -7.538e-04, -6.870e-02, 6.374e-02, -2.680e-02, 6.959e-02, -5.629e-02, 1.634e-02, -1.036e-01, 8.784e-02, 4.241e-02, 1.180e-01, 2.967e-02, 5.800e-02)); + r += mul(s5_4, M4(8.412e-03, -1.434e-01, 2.068e-02, 1.126e-01, -2.408e-02, 5.598e-02, 6.866e-02, -6.318e-02, -7.580e-03, -1.624e-01, 1.690e-01, 1.567e-01, 3.018e-02, 3.697e-02, 1.022e-01, 5.549e-03)); + r += mul(s5_5, M4(-2.753e-02, 6.398e-03, -1.630e-04, 8.630e-02, -7.732e-02, -1.156e-01, -3.458e-02, -5.389e-03, 2.963e-02, -4.424e-02, -6.577e-02, 1.644e-01, 5.889e-02, -1.389e-02, 5.595e-02, 3.759e-02)); + r += mul(s5_6, M4(-3.680e-02, -1.139e-01, 2.077e-01, 8.976e-03, 3.099e-02, -2.741e-03, -3.880e-02, -6.108e-02, 7.025e-03, -1.111e-01, -7.592e-02, -7.150e-02, 6.323e-02, 1.183e-01, 1.126e-01, -1.196e-01)); + r += mul(s5_7, M4(-9.568e-02, -1.353e-01, 9.138e-05, 1.103e-01, 4.564e-02, 4.804e-02, 2.993e-02, -2.955e-05, 6.861e-02, -1.118e-02, -5.157e-02, -1.125e-01, -1.353e-02, 2.401e-02, -7.281e-02, 7.283e-02)); + r += mul(s5_8, M4(3.341e-02, 9.544e-03, 3.887e-02, 1.109e-02, -1.954e-02, 1.813e-01, -3.185e-02, -1.201e-01, 6.204e-03, 4.629e-02, -1.049e-01, -1.434e-02, -2.835e-02, -2.521e-02, 1.122e-01, 2.099e-02)); + r += mul(s6_0, M4(1.845e-01, 7.140e-02, -2.296e-02, -4.522e-01, -8.474e-02, -1.015e-01, -5.319e-02, 1.101e-01, 1.284e-01, 1.388e-02, -9.949e-02, -4.648e-02, -9.801e-02, 7.390e-02, 9.625e-02, 6.383e-02)); + r += mul(s6_1, M4(7.417e-02, 2.408e-01, 5.027e-02, -1.958e-01, -2.282e-02, -2.957e-02, 2.635e-02, 2.840e-02, 1.937e-01, 3.764e-02, 2.358e-02, 2.496e-01, -1.975e-01, 6.422e-03, -1.198e-01, -2.480e-01)); + r += mul(s6_2, M4(-6.464e-03, 6.839e-02, 1.742e-02, 5.729e-02, 5.231e-02, -4.147e-02, 1.640e-02, -3.261e-03, 3.850e-02, -2.173e-02, -5.474e-02, 2.090e-02, -5.563e-02, 3.820e-02, -9.706e-04, 5.112e-02)); + r += mul(s6_3, M4(5.517e-02, -1.175e-01, -1.632e-02, -2.209e-01, -7.623e-02, 1.509e-01, 1.945e-02, 1.775e-01, -5.883e-02, -1.288e-01, 1.850e-02, 9.276e-02, 1.260e-01, 8.077e-03, -3.461e-02, -1.318e-01)); + r += mul(s6_4, M4(2.518e-01, 2.093e-01, -3.392e-01, -3.699e-01, -1.533e-01, -5.155e-03, 2.224e-01, 1.088e-01, 1.740e-02, 7.310e-02, 1.691e-02, 1.149e-01, -2.053e-01, 7.916e-02, -8.970e-03, -6.942e-02)); + r += mul(s6_5, M4(7.398e-03, -1.139e-02, 5.134e-02, 4.369e-02, 3.504e-02, -2.624e-02, -1.932e-02, -8.406e-02, -5.470e-02, 1.063e-01, 1.024e-01, 3.087e-02, 3.550e-02, 2.259e-02, 1.528e-01, 1.171e-01)); + r += mul(s6_6, M4(-3.539e-02, -2.213e-01, -7.176e-02, 8.298e-02, -4.917e-02, 3.198e-02, -3.727e-02, 9.915e-03, -2.613e-03, -8.586e-02, 1.400e-03, -7.126e-02, -3.262e-02, -2.155e-02, -1.023e-01, 1.680e-02)); + r += mul(s6_7, M4(4.906e-02, -2.937e-02, -2.405e-02, 8.911e-02, -2.111e-02, -7.278e-02, 6.475e-02, -2.134e-01, 1.474e-02, -6.657e-02, 1.101e-01, -1.045e-01, -6.551e-02, 9.400e-02, 7.675e-03, 1.311e-02)); + r += mul(s6_8, M4(-7.607e-03, 1.046e-01, 6.737e-02, 1.893e-01, -3.036e-02, -3.295e-02, -5.342e-02, -1.158e-02, 2.028e-02, 3.606e-03, 6.415e-02, 1.806e-02, -2.892e-03, -7.854e-02, 6.960e-04, -1.778e-02)); + r += mul(s7_0, M4(4.071e-02, -8.310e-02, 3.682e-02, 2.568e-01, 1.042e-02, 5.130e-03, -1.003e-01, 1.055e-01, -1.793e-01, -1.907e-02, -8.712e-02, 5.897e-02, 1.614e-01, -1.085e-01, 1.510e-01, -5.698e-02)); + r += mul(s7_1, M4(1.770e-02, 6.137e-02, 4.827e-02, 2.183e-02, 5.895e-02, 1.145e-02, -9.344e-02, 2.193e-01, -1.240e-01, -7.384e-02, 1.296e-01, 1.158e-02, 1.164e-03, 1.199e-01, 2.286e-02, -3.528e-02)); + r += mul(s7_2, M4(3.107e-02, -8.318e-02, -3.956e-02, 2.245e-02, 6.957e-02, 2.143e-02, -6.687e-02, 1.294e-02, 1.749e-02, -2.284e-02, 1.119e-01, 8.031e-03, -3.842e-02, -1.248e-01, 1.727e-01, 7.133e-02)); + r += mul(s7_3, M4(5.292e-02, 8.579e-02, -3.654e-02, -9.238e-02, -1.451e-02, 1.251e-01, 1.535e-03, 8.453e-02, -3.673e-02, -6.283e-02, 1.648e-02, 4.689e-02, 1.727e-01, -3.341e-03, -1.745e-01, -9.239e-02)); + r += mul(s7_4, M4(9.484e-03, 1.112e-01, -3.333e-02, -1.463e-01, -4.327e-02, 7.501e-02, 6.370e-02, -5.664e-02, -5.735e-02, -2.845e-02, -4.963e-02, 8.615e-02, 4.983e-02, 2.317e-01, 9.224e-03, 3.614e-02)); + r += mul(s7_5, M4(-4.812e-02, -1.102e-01, -3.947e-02, -7.464e-02, 4.965e-02, 1.356e-01, 7.896e-02, 5.029e-03, -3.786e-02, -5.606e-02, 2.761e-02, -2.699e-02, 7.781e-02, -2.900e-01, 9.253e-02, 1.759e-01)); + r += mul(s7_6, M4(2.075e-02, 9.525e-02, 3.078e-02, 7.512e-02, -3.387e-02, 1.460e-01, -7.847e-02, 3.001e-02, -3.900e-02, 2.520e-01, 3.179e-02, -1.522e-01, 1.620e-01, 5.111e-02, -6.999e-02, 1.060e-01)); + r += mul(s7_7, M4(8.715e-03, 5.712e-02, -5.084e-02, 2.632e-02, 3.210e-02, -5.303e-02, -4.228e-02, -7.933e-02, -1.078e-01, 9.788e-02, 1.451e-01, -1.347e-01, 3.977e-02, -1.189e-01, -1.289e-01, 7.335e-02)); + r += mul(s7_8, M4(-5.911e-02, 1.363e-02, 3.665e-02, 4.175e-02, 1.131e-02, -3.395e-02, -1.990e-02, -9.542e-03, -2.874e-02, -4.118e-02, 1.667e-02, -7.509e-02, 3.075e-02, -9.451e-02, 9.701e-02, 1.274e-01)); + r += V4(-2.087e-02, 8.365e-03, 3.822e-02, -1.004e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-5.186e-02, -5.537e-03, 2.369e-02, -5.261e-02, -1.789e-01, -5.089e-02, -7.291e-03, -1.065e-01, -2.531e-02, -3.347e-02, 5.976e-03, 8.518e-02, 1.445e-02, -3.676e-02, 1.651e-02, -1.422e-02)); + r += mul(s0_1, M4(7.365e-03, -5.514e-02, -5.771e-02, -3.514e-03, 4.978e-01, -1.261e-01, 2.827e-01, 8.266e-02, 9.005e-02, -1.923e-01, 4.595e-02, 2.282e-01, 9.758e-02, -4.172e-03, 7.787e-02, 9.638e-02)); + r += mul(s0_2, M4(5.664e-03, 6.797e-03, 4.860e-02, 7.314e-03, -2.101e-01, -8.617e-02, -4.768e-02, 5.512e-02, -3.253e-02, -1.228e-01, -3.881e-02, -5.309e-02, 6.288e-02, -7.397e-02, 1.233e-01, -6.095e-03)); + r += mul(s0_3, M4(9.081e-02, 2.910e-02, 2.229e-02, 5.761e-02, -1.061e-01, 5.928e-01, -9.201e-01, -1.884e-02, 6.669e-02, -1.057e-02, -8.456e-03, -1.213e-01, 6.156e-02, 4.088e-03, 4.034e-02, -9.234e-03)); + r += mul(s0_4, M4(1.579e-02, 1.365e-02, 2.926e-02, 5.481e-02, 8.783e-01, 5.205e-01, -1.973e-01, 2.039e-02, -1.500e-02, 4.624e-02, -3.470e-02, 6.766e-02, 1.261e-01, -9.425e-02, 1.121e-01, -1.359e-01)); + r += mul(s0_5, M4(8.009e-03, -7.407e-02, -7.806e-02, 3.210e-02, 1.047e-01, 6.242e-02, -1.968e-01, 1.861e-02, 5.884e-02, 1.685e-01, 3.460e-02, 9.654e-02, -1.731e-02, 5.947e-02, 1.207e-02, -2.139e-02)); + r += mul(s0_6, M4(-6.461e-04, -7.738e-02, -6.154e-02, 2.020e-02, 6.114e-01, 8.338e-01, -2.539e-01, 1.945e-01, 3.715e-02, 8.291e-02, -1.281e-01, -1.962e-02, -8.719e-02, -5.726e-02, 1.984e-02, -2.301e-02)); + r += mul(s0_7, M4(8.944e-03, -1.180e-01, -3.260e-02, -8.288e-02, 5.580e-01, 1.819e-01, -1.055e-01, -4.954e-02, -2.117e-03, 6.767e-02, 1.805e-01, -2.507e-02, -7.247e-02, -2.483e-02, 6.030e-02, 7.295e-02)); + r += mul(s0_8, M4(8.174e-02, -1.312e-02, -1.040e-03, -7.740e-02, 1.428e-01, -1.759e-01, 2.594e-01, 3.677e-03, 4.413e-02, 5.259e-02, -1.474e-02, 1.009e-02, -1.727e-02, -1.805e-02, 1.198e-01, 3.637e-03)); + r += mul(s1_0, M4(-1.429e-01, -3.566e-02, -3.506e-02, -1.421e-02, -9.504e-02, -8.328e-02, -2.046e-02, -1.898e-02, 8.700e-03, 1.643e-02, 4.432e-02, 9.043e-02, -3.787e-02, -4.207e-02, 3.174e-03, -5.154e-02)); + r += mul(s1_1, M4(7.942e-02, 1.860e-02, 1.844e-01, -1.654e-02, 6.416e-02, -5.572e-02, 4.771e-02, 5.403e-02, -2.890e-02, -6.370e-02, -6.170e-02, 9.796e-03, 9.547e-03, -1.810e-02, -5.481e-02, -3.555e-02)); + r += mul(s1_2, M4(-6.011e-02, -8.863e-02, 1.471e-01, 8.182e-02, -3.416e-02, -1.819e-02, -4.760e-02, 7.041e-03, -2.055e-02, 1.766e-03, 1.199e-01, 1.697e-01, -1.382e-01, 3.750e-02, 2.883e-02, -1.150e-01)); + r += mul(s1_3, M4(-7.160e-02, 5.820e-02, 8.351e-02, 1.104e-01, -3.941e-02, 1.396e-02, -1.016e-01, 6.790e-02, 1.341e-01, 5.572e-02, -5.254e-02, -8.326e-02, 5.552e-02, 1.079e-01, 9.147e-02, 1.187e-01)); + r += mul(s1_4, M4(1.958e-02, -2.435e-01, -7.386e-02, 2.677e-02, 2.828e-02, -1.012e-02, -3.403e-02, 4.901e-03, 4.395e-03, 3.006e-02, 5.583e-02, -7.418e-02, 1.435e-01, -5.313e-02, -5.963e-02, 6.607e-02)); + r += mul(s1_5, M4(-2.027e-01, -1.539e-02, -1.039e-01, -1.066e-01, 4.238e-03, 2.528e-02, -1.514e-02, 4.263e-02, -9.176e-02, -6.023e-02, 4.109e-02, -5.228e-03, -7.283e-02, 1.362e-01, 3.012e-02, -1.114e-01)); + r += mul(s1_6, M4(2.261e-02, 1.224e-02, 8.824e-02, 1.870e-01, 1.328e-01, 2.603e-03, -5.553e-02, 1.089e-01, 1.792e-02, -2.273e-03, -1.100e-01, 3.768e-02, -1.192e-01, -8.333e-03, -4.026e-03, 1.260e-02)); + r += mul(s1_7, M4(5.822e-02, 7.994e-02, 1.209e-01, 3.664e-02, 9.369e-02, -1.645e-01, -6.200e-02, -1.238e-01, -1.373e-01, 3.009e-02, 1.260e-02, -1.854e-01, 7.095e-02, 6.288e-02, -6.089e-02, 1.096e-01)); + r += mul(s1_8, M4(-7.676e-02, 1.116e-01, -7.151e-02, 3.894e-02, 1.127e-02, 3.901e-02, -1.526e-02, 9.455e-02, -8.459e-02, -1.803e-01, -9.597e-02, -9.799e-02, -1.211e-01, 3.434e-02, 5.533e-02, -1.471e-02)); + r += mul(s2_0, M4(-2.601e-03, 1.487e-02, 8.944e-02, -6.765e-03, 6.154e-02, 1.853e-02, -2.615e-02, 1.179e-02, -2.676e-02, 7.439e-02, 2.106e-02, -2.097e-02, 3.093e-02, -8.687e-02, 5.337e-02, 3.475e-03)); + r += mul(s2_1, M4(-1.257e-01, 1.475e-01, 1.754e-02, -5.200e-02, -2.007e-01, 7.249e-02, 5.663e-02, -1.978e-01, -1.158e-01, -6.672e-02, 1.056e-01, -1.236e-01, 8.004e-03, -8.248e-02, 9.023e-03, 6.292e-02)); + r += mul(s2_2, M4(7.154e-02, 1.291e-01, -3.540e-02, 1.070e-01, -1.601e-01, -4.243e-02, 1.840e-01, -4.435e-02, 9.572e-03, 1.984e-01, -1.119e-02, -1.818e-01, 3.998e-02, -1.749e-01, -4.146e-02, -4.526e-03)); + r += mul(s2_3, M4(4.276e-02, -8.397e-02, 2.252e-02, 4.302e-02, -9.437e-02, -3.633e-02, 1.379e-01, -1.274e-01, 5.799e-02, 2.590e-01, -2.307e-02, -2.396e-02, 4.338e-02, 1.536e-02, 2.341e-02, 1.301e-01)); + r += mul(s2_4, M4(-1.189e-01, -2.813e-02, 1.248e-01, 1.002e-01, -1.256e-01, -9.355e-02, 4.911e-02, -1.429e-01, -1.471e-02, 8.852e-02, -1.080e-01, 8.591e-02, -4.678e-02, 6.644e-02, 2.068e-01, 9.896e-02)); + r += mul(s2_5, M4(-7.221e-02, -4.096e-02, -6.655e-03, -1.175e-02, 3.419e-02, -1.814e-01, -1.664e-02, 1.859e-02, 9.448e-02, 1.096e-01, -9.677e-02, -8.571e-02, -3.325e-02, 1.176e-01, 7.737e-02, 9.706e-02)); + r += mul(s2_6, M4(2.384e-02, 5.983e-02, -2.370e-02, 4.370e-02, -4.737e-02, 1.621e-01, 1.168e-01, 5.861e-02, 4.508e-02, -5.758e-02, 5.496e-02, 1.514e-02, -1.413e-01, -5.880e-02, -2.348e-02, -3.277e-03)); + r += mul(s2_7, M4(-9.729e-02, 2.059e-03, 3.092e-02, -3.349e-02, 1.803e-01, 7.051e-02, -6.959e-02, 4.558e-02, 1.372e-02, 1.978e-01, 9.990e-02, 9.627e-02, -4.061e-02, -7.983e-02, 5.642e-02, 1.376e-02)); + r += mul(s2_8, M4(-6.405e-02, 1.463e-03, -4.239e-02, -2.958e-03, -3.957e-02, -2.787e-02, -1.185e-01, 8.049e-02, 5.641e-02, -7.839e-02, -1.478e-02, 5.297e-02, -1.052e-01, -2.614e-02, 3.366e-02, -2.113e-02)); + r += mul(s3_0, M4(1.090e-01, 9.970e-02, 4.323e-02, -8.377e-02, 1.532e-01, -3.260e-02, -9.920e-03, -8.339e-03, 2.015e-02, -2.037e-02, -5.245e-02, 7.484e-02, 2.756e-02, 1.694e-02, 9.995e-02, -1.103e-01)); + r += mul(s3_1, M4(-1.893e-01, 7.219e-02, -2.182e-01, -1.209e-01, -5.359e-02, 2.129e-02, -1.369e-02, 4.418e-03, -6.035e-02, -5.682e-02, 4.640e-02, 6.798e-02, 6.897e-02, -9.109e-02, -2.157e-02, -3.555e-02)); + r += mul(s3_2, M4(4.793e-02, -1.105e-01, -5.833e-02, 3.987e-02, -4.859e-03, 1.350e-02, -2.157e-02, 4.861e-02, 7.932e-03, -8.539e-02, 3.680e-03, -1.255e-01, -4.018e-02, 3.915e-04, -1.081e-01, -7.704e-02)); + r += mul(s3_3, M4(-1.794e-02, -8.453e-02, 4.418e-02, 8.425e-02, 4.736e-02, -2.773e-02, 3.350e-02, 3.066e-02, -8.311e-02, -7.339e-02, -6.158e-02, 4.223e-03, 1.174e-01, -1.238e-01, -1.274e-01, -3.384e-02)); + r += mul(s3_4, M4(-7.496e-02, -2.420e-02, 1.306e-01, 1.313e-01, 3.237e-02, -2.935e-02, 2.360e-01, 1.410e-01, -8.003e-02, -5.377e-02, -7.184e-02, 5.397e-02, -1.046e-01, 1.099e-02, -1.025e-01, -3.341e-02)); + r += mul(s3_5, M4(-6.928e-03, -1.140e-01, 4.074e-03, -1.397e-01, 7.265e-02, 8.207e-02, 1.169e-01, 9.509e-02, 5.067e-02, 7.042e-02, -4.241e-02, -7.893e-02, -1.161e-01, 1.654e-01, -1.081e-01, 4.884e-02)); + r += mul(s3_6, M4(1.366e-01, 3.662e-02, 2.739e-02, 1.724e-02, 3.352e-02, -8.119e-02, 4.512e-02, -6.095e-02, 3.507e-02, -2.561e-03, 1.192e-02, 2.593e-02, 2.648e-02, -2.577e-04, -6.683e-02, -1.309e-01)); + r += mul(s3_7, M4(1.421e-01, 5.626e-02, 4.526e-02, -5.287e-02, 1.482e-02, -3.645e-03, 4.018e-02, -1.501e-02, -9.036e-03, 6.154e-02, 3.938e-02, 6.123e-02, 9.404e-02, -3.154e-02, 2.484e-02, -8.405e-02)); + r += mul(s3_8, M4(1.375e-01, 5.461e-02, -1.043e-01, 2.440e-02, 5.996e-02, 5.291e-02, 5.187e-03, 2.591e-02, 3.009e-02, -3.233e-02, -5.586e-02, 2.986e-02, -4.387e-02, 4.745e-02, -8.559e-02, -2.285e-02)); + r += mul(s4_0, M4(-1.211e-01, 1.178e-02, -1.310e-01, -1.610e-01, 6.647e-02, 9.235e-02, 1.913e-02, -4.300e-02, -7.907e-02, -1.415e-02, 2.711e-02, 2.482e-01, -2.717e-02, 2.984e-02, 1.237e-02, 6.918e-02)); + r += mul(s4_1, M4(-5.608e-02, 1.109e-01, 9.857e-03, -1.931e-01, 3.601e-02, -1.316e-01, 2.161e-01, 1.744e-01, -1.295e-01, -4.955e-02, -1.476e-02, 8.352e-02, 8.242e-03, -1.597e-01, -2.525e-01, 3.094e-02)); + r += mul(s4_2, M4(-5.709e-02, 4.297e-02, -6.204e-03, -1.451e-01, 1.232e-01, 4.680e-02, -7.479e-02, 1.103e-01, 4.190e-02, -4.362e-04, 1.231e-01, -7.856e-02, -4.570e-02, 1.372e-01, -8.977e-02, -6.952e-03)); + r += mul(s4_3, M4(-6.566e-02, 7.624e-02, -8.168e-02, 3.406e-02, -3.176e-02, 3.998e-02, -6.471e-02, -5.760e-02, -5.961e-02, 1.382e-01, 6.583e-03, -1.000e-01, -1.831e-01, -1.154e-01, -1.380e-01, -6.434e-02)); + r += mul(s4_4, M4(6.403e-02, -2.450e-01, 2.792e-01, 7.430e-02, 1.759e-01, -1.725e-01, -1.845e-02, 3.502e-02, -3.869e-01, -3.381e-02, -4.831e-02, 1.339e-01, -1.262e-01, 1.916e-01, -2.321e-01, 2.760e-02)); + r += mul(s4_5, M4(6.912e-02, -2.182e-01, -1.369e-02, -9.720e-02, 5.835e-02, 9.719e-02, -8.650e-03, -5.648e-02, 1.346e-01, -3.950e-02, 2.490e-02, 8.071e-02, 2.864e-01, -2.457e-01, 1.326e-01, -3.384e-02)); + r += mul(s4_6, M4(-7.945e-02, -5.946e-02, -2.633e-02, -3.784e-03, 7.830e-02, 3.878e-02, -6.279e-02, 6.114e-02, -1.734e-01, -2.466e-01, -7.366e-02, 7.738e-03, 2.224e-01, -4.195e-02, 1.471e-01, 2.958e-02)); + r += mul(s4_7, M4(-3.012e-02, -4.977e-02, 2.020e-02, -1.498e-02, 5.735e-03, -1.735e-02, -1.485e-01, -1.320e-01, -3.990e-03, 3.615e-02, -4.818e-02, 1.846e-01, 2.854e-02, 4.293e-02, -8.675e-02, 9.421e-02)); + r += mul(s4_8, M4(1.392e-02, 7.286e-02, 1.063e-01, 9.212e-02, 4.501e-02, 1.268e-01, -5.199e-02, 1.414e-01, 7.748e-02, 1.673e-01, 1.055e-01, 4.698e-02, 9.681e-02, 1.470e-02, -1.188e-02, 2.931e-02)); + r += mul(s5_0, M4(-2.778e-02, 7.259e-03, -1.944e-02, -8.716e-02, 7.302e-02, -1.512e-02, 1.064e-01, -1.628e-02, 1.714e-02, -1.652e-02, -6.626e-02, 9.806e-03, 7.275e-02, 2.299e-02, -1.215e-02, 5.639e-02)); + r += mul(s5_1, M4(-1.405e-02, 6.730e-02, 4.423e-03, -1.781e-02, -5.842e-02, -5.191e-02, 8.031e-02, 1.321e-01, 9.880e-02, -8.154e-02, 1.329e-01, 8.414e-02, 2.984e-02, -8.208e-02, -2.121e-01, 3.640e-02)); + r += mul(s5_2, M4(4.550e-02, -5.010e-02, 9.170e-02, -9.450e-02, 3.401e-02, -2.799e-03, -3.152e-02, 9.824e-02, 2.436e-02, -2.393e-02, -2.029e-02, 6.419e-02, 4.413e-02, -2.446e-03, -2.047e-02, 1.147e-01)); + r += mul(s5_3, M4(-2.840e-02, -7.165e-02, 7.020e-02, -8.385e-02, 5.416e-02, 3.992e-02, 1.816e-02, 7.835e-02, -1.086e-01, -1.053e-02, -1.199e-01, -4.429e-02, 5.368e-02, 9.435e-02, 5.670e-02, 3.818e-02)); + r += mul(s5_4, M4(1.626e-02, -8.816e-02, -4.112e-02, 3.632e-02, 7.316e-02, 6.900e-02, -1.175e-02, 9.253e-02, -1.146e-02, -3.119e-02, 1.160e-01, 8.238e-02, -4.943e-02, 1.098e-01, 8.908e-05, 1.880e-01)); + r += mul(s5_5, M4(5.537e-02, -1.917e-01, 7.857e-03, -3.432e-02, 1.487e-01, 4.405e-02, -1.281e-02, -5.382e-02, -4.463e-02, -7.680e-02, 4.381e-02, 9.402e-02, -5.056e-03, 1.299e-01, 1.167e-01, 3.922e-02)); + r += mul(s5_6, M4(7.272e-02, -1.016e-01, 7.230e-02, -6.521e-03, 3.164e-02, -8.305e-04, -1.127e-01, 4.924e-03, -1.151e-01, 8.617e-04, -2.434e-02, -4.304e-02, -5.518e-03, 1.628e-02, 8.518e-02, -2.193e-02)); + r += mul(s5_7, M4(5.172e-02, -4.530e-02, -9.674e-03, 5.678e-02, 2.628e-02, -2.106e-02, -7.339e-02, 2.674e-02, -1.353e-01, -6.893e-02, -4.627e-03, 9.704e-02, 3.844e-02, -1.789e-02, 6.056e-02, 7.471e-02)); + r += mul(s5_8, M4(1.141e-01, 2.858e-02, 1.699e-02, 5.123e-02, 5.192e-02, 7.121e-02, -7.796e-03, 3.388e-02, -3.085e-02, -6.441e-02, 4.687e-02, 8.458e-02, 4.593e-02, 5.097e-02, -4.951e-02, 1.479e-02)); + r += mul(s6_0, M4(1.509e-01, -2.423e-01, -1.645e-01, -1.774e-01, -4.401e-02, 4.057e-02, -4.384e-02, 5.572e-02, 1.538e-01, 2.158e-02, 1.057e-01, 3.614e-03, -1.312e-01, 1.864e-02, -2.287e-02, -1.765e-02)); + r += mul(s6_1, M4(-9.463e-02, -2.675e-02, -2.744e-01, 3.083e-02, 4.534e-02, 1.117e-01, 5.833e-02, 1.769e-01, -2.594e-02, 1.808e-01, 4.166e-02, -2.937e-01, 5.296e-02, -1.342e-01, -1.411e-01, -7.903e-04)); + r += mul(s6_2, M4(1.083e-02, -1.455e-01, 7.690e-02, -8.471e-02, -5.061e-02, -3.029e-02, -1.210e-01, -2.602e-02, 6.333e-03, 1.336e-01, 5.051e-02, 1.871e-01, -6.528e-02, -9.885e-03, -1.005e-01, 4.859e-02)); + r += mul(s6_3, M4(1.362e-01, 1.818e-01, -1.686e-01, -5.426e-02, 2.551e-02, -1.676e-01, 5.143e-02, 1.961e-01, 8.181e-02, 1.676e-02, 2.382e-02, 7.826e-02, -8.471e-02, 1.614e-02, -7.251e-02, -1.279e-01)); + r += mul(s6_4, M4(-1.480e-01, -9.214e-02, -2.628e-01, -1.936e-01, 4.964e-03, -1.214e-01, 3.683e-02, -2.254e-02, -6.095e-02, -6.845e-02, -9.225e-02, -1.052e-01, -5.893e-02, 1.209e-01, -1.291e-01, -2.439e-01)); + r += mul(s6_5, M4(3.835e-03, -6.351e-02, -9.311e-02, -7.334e-03, -1.209e-01, 5.373e-02, -1.767e-01, 4.527e-02, 3.871e-02, -4.796e-02, 9.536e-02, -4.771e-02, -4.192e-02, 1.118e-01, -1.233e-01, 1.245e-01)); + r += mul(s6_6, M4(1.432e-01, 3.526e-02, -8.615e-03, -5.596e-03, -8.724e-02, -1.202e-05, 1.354e-01, 1.440e-02, 1.297e-01, 3.398e-02, 2.254e-02, 8.219e-03, -1.078e-01, -8.905e-02, -7.674e-02, -9.887e-02)); + r += mul(s6_7, M4(4.810e-03, -3.622e-02, 1.863e-03, 6.525e-03, -9.613e-02, 5.216e-02, -1.495e-01, 4.366e-02, 2.703e-02, -6.686e-02, -1.057e-01, -5.789e-02, -5.245e-02, -4.184e-02, 4.506e-02, -8.658e-02)); + r += mul(s6_8, M4(8.041e-02, 3.344e-02, -1.428e-01, -7.399e-03, -3.671e-02, -4.430e-02, 4.760e-02, 5.942e-02, 9.507e-02, -9.050e-02, 5.261e-02, 1.832e-02, 3.325e-02, 4.292e-02, -3.349e-02, 7.272e-02)); + r += mul(s7_0, M4(4.832e-02, -4.859e-02, 9.221e-02, 6.436e-02, 1.284e-01, 7.635e-03, -4.190e-02, -3.847e-02, -6.610e-03, 7.178e-03, 4.257e-02, -3.234e-02, 1.537e-01, -7.209e-02, -2.415e-02, 1.619e-02)); + r += mul(s7_1, M4(4.043e-02, -7.037e-02, 2.596e-02, 5.261e-02, 1.106e-03, 5.223e-02, -6.416e-04, -4.329e-02, -4.023e-02, 2.219e-02, 1.111e-01, 6.614e-03, 2.460e-01, -2.125e-01, -1.595e-01, 4.183e-01)); + r += mul(s7_2, M4(-1.914e-02, -7.829e-02, -1.925e-02, 4.404e-02, 5.143e-02, 6.391e-03, 6.923e-02, -1.394e-01, -7.542e-02, -3.202e-02, -9.787e-02, -1.178e-02, 3.474e-01, 9.006e-02, 4.054e-02, 1.489e-01)); + r += mul(s7_3, M4(8.782e-02, 4.702e-02, 9.328e-03, -6.809e-02, 5.176e-02, -8.772e-02, 3.221e-02, -2.936e-02, 1.601e-02, 5.282e-02, 4.535e-02, 1.194e-01, -1.057e-01, -1.289e-01, -8.726e-02, 4.259e-02)); + r += mul(s7_4, M4(-8.521e-02, 1.477e-02, -3.481e-02, -5.570e-03, 1.721e-02, -6.721e-02, -2.596e-02, 1.141e-01, -9.061e-02, 5.655e-02, 4.107e-02, 4.639e-02, -8.228e-02, 3.297e-01, -1.414e-01, -7.709e-02)); + r += mul(s7_5, M4(-4.180e-02, 6.089e-02, -3.896e-02, 2.700e-02, 2.710e-02, 4.737e-02, 5.740e-03, -8.129e-02, -1.322e-01, 6.664e-02, 1.096e-01, 1.229e-01, -2.098e-02, -4.566e-03, -1.994e-01, 1.441e-01)); + r += mul(s7_6, M4(4.268e-02, -5.790e-02, -1.169e-02, -4.623e-02, -9.484e-02, -1.706e-02, -3.299e-02, 5.764e-02, 6.127e-02, -1.248e-01, -1.142e-01, -8.612e-02, -3.881e-02, -1.761e-01, -7.335e-04, -1.433e-01)); + r += mul(s7_7, M4(-8.534e-02, -1.768e-02, 1.435e-01, -3.567e-02, 9.773e-03, 1.343e-01, -1.104e-02, -5.990e-02, -2.977e-02, -7.796e-02, 3.464e-02, 3.066e-02, -1.320e-01, 8.156e-02, -2.225e-01, -1.418e-01)); + r += mul(s7_8, M4(-7.379e-02, 1.992e-02, -8.426e-02, 2.140e-02, 9.552e-02, 2.910e-02, -4.536e-03, 2.327e-02, -4.916e-02, -2.219e-02, -8.136e-02, 5.215e-02, 2.548e-01, 3.631e-02, -7.345e-02, -4.995e-02)); + r += V4(-2.222e-02, -7.473e-03, 1.007e-02, 2.959e-02); + return r; +} + +void Pass10(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 11 +//!DESC conv10 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-7.443e-02, -2.491e-02, 5.251e-03, 2.278e-03, 7.358e-03, 3.232e-02, 6.993e-04, 8.365e-02, 1.406e-01, -1.463e-02, -4.391e-02, -5.550e-02, -8.043e-02, -2.190e-02, 5.767e-03, 1.002e-01)); + r += mul(s0_1, M4(-9.990e-03, 3.600e-02, 1.256e-01, 8.596e-02, -3.771e-02, 3.026e-02, -8.820e-02, -1.091e-01, -7.490e-02, -3.519e-02, -1.456e-01, 1.001e-01, -6.054e-02, 1.960e-02, 1.759e-02, -8.544e-02)); + r += mul(s0_2, M4(1.548e-02, -6.636e-02, 7.641e-02, -6.122e-02, -7.148e-02, -6.168e-02, -1.443e-01, 2.369e-02, -2.372e-03, 1.086e-01, -4.066e-02, 3.865e-02, -5.652e-02, -1.638e-02, -3.904e-02, 9.811e-03)); + r += mul(s0_3, M4(7.849e-02, -3.489e-02, -5.032e-02, 2.365e-02, -1.519e-01, 2.447e-02, 9.641e-02, -6.077e-02, 7.374e-02, 1.298e-01, 3.015e-02, 6.463e-02, -7.396e-02, -7.937e-02, -3.670e-02, 1.363e-01)); + r += mul(s0_4, M4(1.697e-01, -6.906e-02, 7.260e-02, 1.256e-01, -2.378e-01, -1.406e-01, -2.639e-01, 4.905e-02, -5.264e-02, -1.421e-01, 1.012e-02, 1.198e-01, -3.589e-02, 8.241e-02, 1.130e-02, -5.852e-02)); + r += mul(s0_5, M4(2.248e-01, -2.462e-01, -5.329e-02, 1.711e-02, -2.105e-01, -8.250e-02, -4.665e-02, 8.654e-02, 9.518e-02, 1.235e-01, 6.224e-02, -4.567e-02, -8.524e-02, -5.822e-02, 6.281e-02, 1.470e-01)); + r += mul(s0_6, M4(-7.451e-02, 1.371e-01, 1.032e-02, -5.772e-02, -1.636e-01, -1.318e-01, -8.817e-02, -8.507e-02, 6.603e-02, 1.297e-02, -4.173e-02, 1.295e-01, -6.974e-02, 4.060e-02, 5.109e-02, 1.011e-01)); + r += mul(s0_7, M4(-1.447e-01, 4.492e-02, 4.542e-02, -1.319e-01, -1.156e-01, -3.776e-02, -2.669e-02, -1.197e-01, -6.502e-02, 4.076e-02, 1.353e-02, -1.404e-01, -9.392e-02, 3.072e-02, -9.582e-02, -2.347e-01)); + r += mul(s0_8, M4(4.432e-02, -3.884e-02, 1.475e-01, -8.128e-02, 8.425e-02, -1.151e-01, 3.016e-02, 1.669e-02, 7.371e-02, 9.169e-02, -2.322e-02, 5.721e-02, -1.104e-01, 7.700e-03, -6.599e-02, -9.597e-02)); + r += mul(s1_0, M4(-4.455e-02, -1.914e-01, 1.410e-01, 8.961e-03, -4.278e-02, -3.920e-02, 1.085e-02, 4.053e-02, 4.494e-02, -1.222e-01, 3.310e-02, -9.746e-02, -4.951e-02, 2.743e-02, -2.652e-03, -1.583e-01)); + r += mul(s1_1, M4(4.930e-02, 1.019e-01, 4.969e-02, -8.982e-02, 7.666e-02, 7.991e-03, 6.734e-02, -6.309e-02, -1.391e-01, -1.741e-01, -1.507e-01, 2.061e-02, -1.027e-01, 9.436e-02, -2.202e-02, 6.426e-02)); + r += mul(s1_2, M4(-8.979e-02, -1.902e-01, 9.320e-02, 9.425e-02, 6.001e-03, 1.530e-02, -4.575e-02, -5.501e-03, -1.559e-01, -2.938e-02, 3.153e-02, -2.076e-02, -2.337e-02, -1.082e-01, -1.046e-01, -2.440e-01)); + r += mul(s1_3, M4(-1.011e-01, -6.263e-02, -8.168e-02, 1.249e-04, 3.329e-02, 6.307e-03, 7.876e-03, -3.334e-02, -1.334e-01, 4.970e-02, 1.646e-01, 1.806e-04, 2.558e-01, 3.352e-02, -7.658e-02, -4.535e-02)); + r += mul(s1_4, M4(1.211e-02, 1.877e-01, 7.255e-02, 9.566e-02, -5.844e-02, -1.944e-01, -1.337e-01, -8.781e-03, -1.953e-01, -3.020e-01, -2.492e-02, -1.460e-01, -7.804e-02, 1.704e-01, -1.269e-01, -2.229e-01)); + r += mul(s1_5, M4(-7.728e-03, 1.592e-01, -1.386e-01, 2.610e-02, 9.133e-02, 1.079e-01, -7.939e-02, -2.377e-02, -8.809e-02, 5.482e-02, 1.655e-01, -1.374e-01, 8.858e-02, -1.962e-01, 5.542e-03, -1.841e-01)); + r += mul(s1_6, M4(8.820e-02, 1.275e-01, 2.735e-02, -1.304e-01, 1.287e-03, -3.758e-02, -5.923e-02, -1.295e-02, -1.429e-01, -1.411e-01, 1.145e-03, 6.254e-02, -5.481e-02, -9.508e-03, -1.431e-01, -5.138e-02)); + r += mul(s1_7, M4(1.947e-02, 9.372e-02, 2.449e-01, -1.107e-01, 3.266e-02, -1.639e-02, 1.476e-01, -5.366e-02, -2.258e-01, -1.325e-01, 1.323e-01, -2.162e-02, 1.078e-02, 2.901e-01, -1.970e-01, 7.560e-02)); + r += mul(s1_8, M4(-5.304e-02, 5.358e-02, 5.178e-02, -7.842e-02, 2.189e-01, 2.961e-02, 3.601e-02, -7.544e-02, -7.790e-02, -3.727e-02, 4.402e-02, 9.522e-02, 3.025e-02, -4.522e-02, -2.606e-02, -1.037e-01)); + r += mul(s2_0, M4(-1.154e-02, 1.779e-02, 2.786e-02, 4.385e-02, -1.039e-01, -1.154e-01, 1.159e-03, -6.035e-02, -8.846e-02, 2.845e-01, -1.438e-01, -1.155e-01, -4.495e-03, 1.542e-03, -1.795e-02, -7.339e-02)); + r += mul(s2_1, M4(-5.627e-02, -8.331e-04, 7.773e-02, 6.266e-02, 8.456e-03, 3.525e-02, -8.042e-02, 1.025e-01, -1.650e-01, 1.121e-01, -5.114e-02, -8.998e-02, -9.728e-03, 1.164e-01, 2.115e-02, -1.314e-01)); + r += mul(s2_2, M4(8.318e-02, 4.123e-02, 5.981e-02, 5.632e-02, -1.790e-01, -7.988e-02, -2.205e-02, 2.235e-03, -1.223e-01, 1.745e-01, 9.004e-02, 1.518e-01, 2.596e-02, 6.662e-03, -1.266e-02, -6.990e-02)); + r += mul(s2_3, M4(-3.095e-02, -2.132e-02, -7.726e-02, -1.791e-01, -8.083e-02, 4.191e-02, -1.401e-01, -1.086e-01, 6.522e-02, -1.365e-01, 2.249e-03, -7.607e-03, 6.612e-02, -5.534e-02, -9.837e-03, 8.180e-02)); + r += mul(s2_4, M4(-1.416e-01, -1.603e-02, 1.606e-01, -8.351e-02, -5.110e-02, -7.777e-02, -1.040e-01, -4.049e-02, 2.306e-01, 1.093e-01, 2.252e-01, -2.493e-01, -8.255e-03, -3.764e-02, -1.309e-01, -2.717e-01)); + r += mul(s2_5, M4(1.661e-02, 4.872e-02, 4.107e-03, -4.751e-03, -2.718e-01, -2.163e-01, -1.021e-01, -8.131e-02, -4.775e-02, -1.105e-01, -2.879e-01, -6.566e-02, 1.796e-02, -1.296e-01, 5.435e-02, -1.363e-01)); + r += mul(s2_6, M4(-9.353e-03, -1.422e-01, 1.012e-01, 8.128e-02, -7.881e-02, 1.535e-02, 2.834e-02, -1.763e-02, -9.610e-02, -6.383e-02, 2.115e-01, -8.985e-02, -7.448e-02, -1.822e-02, -3.225e-02, -1.333e-01)); + r += mul(s2_7, M4(-2.725e-02, 6.905e-02, -3.076e-02, -4.011e-02, 1.015e-02, 3.368e-02, -2.343e-02, -8.495e-02, -3.992e-03, 1.671e-03, -1.227e-01, 8.509e-02, 2.870e-02, 1.598e-01, -8.185e-02, -3.310e-01)); + r += mul(s2_8, M4(2.471e-01, -4.800e-02, -1.871e-02, -1.288e-01, -1.883e-01, -1.221e-02, -1.073e-01, -3.823e-02, -2.284e-01, -6.102e-02, -2.213e-01, 9.720e-02, -2.910e-02, 1.480e-01, 1.344e-01, -1.623e-01)); + r += mul(s3_0, M4(-6.625e-02, -1.278e-01, 9.770e-03, 5.171e-03, 6.653e-02, -1.805e-02, 2.589e-02, -1.813e-02, 3.330e-02, 3.777e-02, -1.823e-02, 2.923e-02, -1.319e-02, -3.354e-02, 1.742e-02, 5.094e-02)); + r += mul(s3_1, M4(-1.434e-02, 1.097e-01, 6.345e-02, -2.408e-02, 1.177e-02, 3.874e-02, -3.680e-02, 9.753e-02, 5.189e-02, -8.722e-03, -2.205e-02, 4.765e-02, -2.195e-02, 8.370e-02, 9.772e-03, -5.094e-02)); + r += mul(s3_2, M4(-1.681e-03, -9.392e-02, 2.710e-02, -2.582e-02, -7.773e-02, 3.851e-03, -8.569e-03, -1.003e-01, 1.439e-01, 1.497e-01, 1.401e-01, 1.280e-01, 2.289e-02, 2.038e-02, 1.827e-03, 1.431e-01)); + r += mul(s3_3, M4(-7.496e-02, -7.651e-02, -3.282e-02, 5.612e-02, 7.913e-02, 9.839e-02, 1.302e-01, -1.087e-02, -5.945e-03, -3.930e-02, 7.875e-02, 1.072e-01, -9.937e-02, -6.158e-02, 1.335e-03, 8.555e-02)); + r += mul(s3_4, M4(4.512e-03, 1.190e-01, 1.447e-01, -2.056e-01, -2.066e-02, -1.238e-01, 7.966e-02, 3.101e-02, -8.977e-02, 5.626e-02, -1.133e-02, 4.758e-02, -1.055e-01, 6.230e-02, 2.705e-02, 6.279e-02)); + r += mul(s3_5, M4(1.472e-01, 1.695e-02, 1.212e-02, 5.352e-02, 4.469e-02, 1.318e-01, 6.005e-02, 4.229e-02, 9.179e-02, 3.110e-02, 8.078e-02, -1.048e-01, 3.237e-02, -9.219e-02, 5.152e-03, 4.264e-02)); + r += mul(s3_6, M4(1.300e-02, -1.061e-01, 7.375e-02, 3.379e-02, -9.305e-03, -2.850e-02, 1.233e-02, -5.745e-02, 4.290e-02, -2.740e-02, 1.314e-01, -4.919e-02, -8.486e-02, -1.148e-01, 4.299e-04, 2.414e-02)); + r += mul(s3_7, M4(8.039e-02, 1.853e-01, 4.320e-02, -7.451e-02, 3.218e-02, -3.793e-02, 2.989e-02, 1.206e-01, -6.504e-02, 4.453e-02, 6.640e-02, -4.566e-02, -8.682e-02, -6.757e-02, -4.119e-02, -5.214e-02)); + r += mul(s3_8, M4(1.294e-01, 1.236e-02, -3.015e-03, -1.850e-02, 1.132e-01, 8.811e-02, 4.899e-02, -1.517e-02, 1.160e-01, 1.911e-02, -1.769e-03, 1.473e-02, 1.039e-01, 2.801e-02, 4.685e-02, 1.242e-01)); + r += mul(s4_0, M4(7.631e-02, -4.578e-02, -3.046e-02, -4.457e-02, 5.473e-02, -7.389e-02, -1.755e-04, 4.127e-02, -1.170e-01, -2.983e-02, -7.294e-02, 5.359e-02, 2.324e-02, -1.268e-01, -1.275e-02, 5.312e-02)); + r += mul(s4_1, M4(-4.916e-02, -1.148e-01, 6.671e-02, 1.366e-01, 1.663e-02, 7.092e-02, -3.335e-02, 7.167e-02, 6.764e-02, -1.304e-01, 9.522e-02, 4.013e-02, 2.975e-03, -1.921e-02, -3.123e-03, 6.587e-02)); + r += mul(s4_2, M4(3.974e-02, 2.701e-02, 1.490e-01, -4.959e-02, 1.142e-02, -1.092e-02, -2.233e-02, 1.269e-02, -1.083e-01, -1.432e-01, -2.230e-02, -7.123e-03, -3.790e-02, 9.273e-02, 4.352e-02, 1.492e-01)); + r += mul(s4_3, M4(1.076e-01, 8.338e-02, 1.369e-01, 1.978e-01, -1.222e-01, -6.275e-02, -9.606e-03, -1.044e-02, 3.965e-02, -8.872e-02, -8.133e-02, 5.930e-02, 1.145e-01, -3.811e-02, 2.754e-02, -8.336e-03)); + r += mul(s4_4, M4(-9.006e-02, -4.032e-03, 2.766e-02, 3.885e-02, -1.054e-01, -8.623e-02, -4.689e-02, 1.125e-01, 4.194e-02, 1.574e-01, 5.132e-02, 2.440e-01, 6.018e-02, 1.935e-01, 2.242e-02, 3.120e-02)); + r += mul(s4_5, M4(2.035e-01, 1.040e-01, 3.855e-02, -1.105e-01, -7.236e-02, 1.349e-01, 3.039e-02, 3.597e-02, 9.036e-02, -1.388e-01, 1.510e-02, 2.250e-02, 8.773e-02, -3.393e-02, -5.885e-02, -1.410e-01)); + r += mul(s4_6, M4(-1.548e-02, 8.332e-02, 4.180e-02, -1.206e-01, -8.834e-02, -1.059e-02, -5.289e-02, -1.763e-02, -1.925e-01, -4.318e-03, -1.554e-02, -1.647e-01, 6.316e-02, 1.847e-02, 1.278e-01, -1.416e-01)); + r += mul(s4_7, M4(-1.227e-02, -1.627e-01, -4.217e-02, 7.992e-02, -6.900e-02, -6.248e-02, -9.509e-02, -8.385e-02, -4.068e-02, 2.836e-02, 3.756e-02, -7.659e-03, -4.632e-02, 7.643e-02, 1.584e-01, -1.997e-01)); + r += mul(s4_8, M4(1.493e-01, 1.310e-01, 1.165e-01, -6.345e-03, 4.877e-02, -1.047e-01, -2.436e-02, -9.326e-02, 3.606e-02, -4.826e-02, -6.154e-02, -8.805e-02, 6.623e-02, 2.476e-02, -3.313e-02, 1.438e-02)); + r += mul(s5_0, M4(4.002e-02, -4.604e-02, 1.079e-02, -2.595e-02, -1.511e-02, 1.383e-02, -6.206e-03, 1.663e-01, 2.442e-03, 7.468e-02, -1.914e-02, -5.290e-02, 6.660e-03, -4.580e-02, 4.426e-02, 9.765e-02)); + r += mul(s5_1, M4(2.268e-02, 3.311e-03, -5.642e-02, 5.526e-02, -4.185e-02, 2.070e-01, -4.005e-02, 5.109e-03, 1.991e-01, -1.220e-01, -3.855e-03, -6.648e-02, 4.016e-02, 5.523e-02, 5.913e-02, 2.709e-02)); + r += mul(s5_2, M4(5.045e-02, -1.075e-01, 5.775e-02, 2.553e-02, 2.226e-01, -2.762e-02, 5.378e-02, 2.212e-01, -6.276e-02, -1.003e-01, -9.307e-02, 1.153e-01, 1.548e-01, -2.839e-02, 2.308e-02, -4.647e-02)); + r += mul(s5_3, M4(4.041e-02, -2.040e-02, 5.813e-02, 7.076e-02, 1.038e-01, 2.165e-02, 2.669e-02, 9.714e-02, 6.983e-02, 4.840e-02, 2.288e-02, 1.349e-01, -3.476e-03, -1.698e-01, -3.940e-02, 9.288e-02)); + r += mul(s5_4, M4(-1.457e-02, -8.850e-04, 1.924e-01, -8.157e-02, 2.653e-01, 5.694e-02, 2.479e-02, -9.264e-02, 4.335e-02, -2.322e-02, 1.174e-01, 1.562e-01, -1.132e-02, 3.073e-02, -7.017e-02, -7.108e-02)); + r += mul(s5_5, M4(4.294e-02, -9.940e-02, 6.106e-02, 7.317e-02, -2.000e-02, 3.526e-01, 5.818e-02, 9.275e-02, -4.098e-02, 5.951e-02, -5.892e-02, -8.403e-02, -3.986e-02, 1.269e-02, -6.863e-02, 9.262e-03)); + r += mul(s5_6, M4(-1.937e-02, 1.004e-01, 1.476e-01, -3.112e-02, 9.642e-02, 7.890e-02, 7.655e-02, -1.539e-02, 7.540e-02, -7.181e-02, 9.812e-02, -1.215e-01, -7.593e-02, 5.633e-02, -2.945e-02, 1.604e-02)); + r += mul(s5_7, M4(-1.254e-01, 1.085e-01, 1.342e-01, 7.889e-02, 1.345e-01, 4.788e-02, 8.275e-02, -1.645e-01, 1.020e-01, 4.708e-02, -4.581e-02, -5.674e-02, -2.400e-02, -2.977e-02, 1.032e-01, -3.530e-02)); + r += mul(s5_8, M4(6.052e-03, -1.642e-03, 8.608e-02, -1.069e-02, 3.222e-01, 1.826e-01, 1.620e-01, 1.563e-01, -7.066e-02, 3.619e-02, 6.661e-02, 2.880e-02, 2.164e-02, -3.399e-02, 4.539e-02, 6.845e-02)); + r += mul(s6_0, M4(-3.648e-02, -2.017e-01, 1.000e-01, 1.529e-01, 7.845e-02, -6.088e-02, 1.111e-01, -3.186e-02, 7.117e-02, 7.150e-02, 7.475e-02, 3.168e-02, 9.885e-02, -6.205e-02, 1.774e-02, 2.101e-02)); + r += mul(s6_1, M4(6.775e-02, 1.105e-01, -3.794e-02, -4.679e-02, -9.210e-04, 2.132e-02, 4.863e-02, 1.474e-01, -4.066e-02, 8.994e-03, 4.689e-03, 3.249e-02, 6.345e-02, 1.142e-01, 5.827e-03, 2.765e-02)); + r += mul(s6_2, M4(-2.740e-02, -3.172e-02, 2.257e-02, 1.599e-01, 1.841e-01, 8.856e-03, 9.857e-02, -8.046e-02, -5.979e-03, -7.011e-02, 8.870e-02, -6.540e-02, -3.557e-02, -5.823e-02, 3.080e-02, 2.604e-02)); + r += mul(s6_3, M4(2.035e-01, 1.193e-01, -2.087e-01, -2.412e-02, 6.845e-02, 8.483e-03, -4.780e-02, -1.101e-02, -1.232e-01, 3.333e-02, 1.352e-01, -8.236e-02, -9.331e-02, 7.760e-02, 8.397e-02, 8.919e-02)); + r += mul(s6_4, M4(1.049e-03, 9.564e-02, -1.644e-01, 2.448e-02, 6.858e-02, -1.599e-01, 1.023e-02, 1.491e-01, -2.848e-01, -1.326e-01, 1.483e-01, -1.295e-01, -2.292e-01, 8.333e-02, 5.031e-02, -3.056e-02)); + r += mul(s6_5, M4(-8.747e-02, 6.372e-02, 3.113e-02, -1.207e-01, -3.647e-02, 9.549e-02, 2.753e-03, -1.689e-01, 1.110e-01, -1.858e-01, 5.344e-02, 3.468e-02, -1.239e-01, 2.882e-02, -1.640e-01, -1.889e-02)); + r += mul(s6_6, M4(7.600e-02, 2.307e-02, -1.154e-01, -4.341e-02, -5.626e-02, 1.161e-01, 9.799e-02, -1.271e-02, -1.660e-01, -8.406e-02, -2.498e-02, 6.879e-02, -1.858e-01, 6.289e-02, -5.796e-03, 7.048e-02)); + r += mul(s6_7, M4(1.530e-02, -2.446e-03, -5.280e-02, -2.562e-02, -2.290e-01, -5.978e-02, 1.380e-02, 7.379e-02, -2.215e-01, 4.228e-02, -9.744e-03, 5.206e-03, 2.473e-03, -2.494e-01, -5.735e-02, -3.795e-02)); + r += mul(s6_8, M4(4.346e-02, -6.795e-02, -7.263e-02, 9.408e-03, 1.760e-03, -5.968e-02, 7.807e-02, -5.669e-03, -8.536e-02, -3.529e-02, -1.235e-01, 8.264e-02, -2.686e-02, 6.961e-02, -7.985e-02, -2.726e-02)); + r += mul(s7_0, M4(-1.965e-02, -1.024e-01, 2.780e-02, 4.523e-02, 5.808e-02, -2.898e-02, -7.969e-02, -7.242e-02, 1.474e-02, 7.838e-02, -4.207e-02, 9.601e-02, 3.767e-02, -2.420e-03, -8.921e-03, -5.666e-02)); + r += mul(s7_1, M4(6.580e-02, 8.650e-02, -4.842e-02, -1.011e-01, 7.914e-02, 8.096e-02, -1.058e-01, -2.214e-01, 1.170e-01, 1.292e-01, -1.103e-01, -9.182e-02, -4.913e-02, 4.079e-02, -1.382e-02, 1.543e-01)); + r += mul(s7_2, M4(4.480e-02, 2.242e-02, -3.295e-02, 1.505e-02, -2.991e-02, 1.106e-01, -1.135e-01, -6.180e-02, -1.640e-01, 9.148e-02, 1.115e-02, 2.166e-02, -6.263e-03, -1.288e-02, 3.800e-02, 1.038e-02)); + r += mul(s7_3, M4(-2.111e-03, 6.777e-02, 2.279e-02, 2.671e-03, 8.600e-02, -1.305e-01, -1.917e-01, 6.077e-03, -4.187e-02, -3.021e-02, 1.626e-01, 2.004e-02, 2.541e-02, -1.497e-02, 9.814e-03, -9.956e-02)); + r += mul(s7_4, M4(2.606e-03, -1.421e-01, -2.209e-02, 3.440e-03, -1.119e-01, -8.762e-02, -2.179e-01, -5.119e-02, -5.721e-02, 5.455e-02, -2.261e-02, -1.807e-01, -1.014e-01, -8.314e-02, 7.701e-02, -2.304e-02)); + r += mul(s7_5, M4(-1.804e-01, 3.145e-02, 7.818e-02, 2.986e-02, -7.967e-02, 2.417e-01, -1.876e-01, 9.897e-02, -6.768e-02, -8.387e-02, 6.883e-02, -1.885e-02, 1.335e-01, 1.319e-02, -4.611e-03, -1.010e-01)); + r += mul(s7_6, M4(-7.514e-03, 1.403e-01, -9.496e-02, -3.883e-02, -1.375e-01, 2.459e-02, -6.966e-02, -1.159e-01, 3.025e-02, -3.703e-02, -3.195e-02, -3.322e-02, 1.218e-01, 3.971e-02, 4.001e-02, -2.509e-02)); + r += mul(s7_7, M4(8.323e-03, -8.484e-02, -1.194e-01, 6.347e-02, -1.486e-01, 2.084e-02, -1.090e-01, -6.186e-04, 1.719e-02, 8.260e-02, 7.180e-02, -3.495e-02, -1.865e-01, -1.264e-01, -2.023e-01, 1.941e-01)); + r += mul(s7_8, M4(-2.118e-02, -1.962e-02, -1.044e-02, -7.627e-02, -1.039e-01, -1.081e-01, -5.646e-03, -4.796e-02, -1.246e-01, -4.691e-02, -2.350e-02, -2.893e-02, -3.219e-02, 8.975e-02, -8.877e-02, 8.794e-02)); + r += V4(8.022e-02, 3.824e-02, -1.685e-02, 7.276e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-6.531e-02, 8.363e-02, -2.453e-02, -1.133e-01, -3.351e-02, 8.886e-02, -1.069e-02, -2.856e-02, 5.394e-02, 1.066e-02, 3.697e-02, -6.636e-02, 4.380e-02, -1.576e-01, 6.195e-02, 3.535e-02)); + r += mul(s0_1, M4(-2.962e-02, -4.826e-02, 9.580e-02, 1.516e-02, 3.193e-02, 2.322e-01, -1.121e-01, -9.229e-02, 8.198e-02, -4.713e-02, 4.354e-02, -3.634e-02, 5.267e-02, 1.190e-02, 1.165e-01, 4.146e-02)); + r += mul(s0_2, M4(-6.669e-03, -1.348e-02, -3.063e-02, -3.563e-03, -1.313e-02, -1.708e-02, 1.210e-01, -1.440e-01, 4.929e-02, 3.481e-02, 2.003e-02, 3.635e-02, 1.006e-01, -1.957e-02, 1.193e-02, 5.155e-02)); + r += mul(s0_3, M4(6.497e-02, 5.019e-02, 1.077e-01, 8.697e-03, -1.177e-01, 5.571e-02, 4.494e-02, 6.833e-02, -2.233e-02, 3.133e-02, 2.125e-02, -6.909e-02, -6.918e-02, -9.107e-03, -5.253e-02, 4.534e-02)); + r += mul(s0_4, M4(8.947e-02, -6.261e-02, -5.880e-03, 4.299e-02, -1.991e-01, 2.456e-02, -1.289e-01, -1.432e-01, -9.228e-02, 7.622e-02, -1.118e-01, 7.130e-03, -8.899e-03, 9.077e-02, -1.068e-02, -3.357e-01)); + r += mul(s0_5, M4(1.132e-02, -8.700e-03, 2.290e-02, -1.156e-02, -6.521e-02, 6.597e-02, 1.345e-01, -2.025e-02, -1.252e-02, 1.239e-01, -4.732e-02, -4.772e-03, 5.255e-02, 4.451e-02, 1.591e-02, -4.018e-02)); + r += mul(s0_6, M4(-9.199e-02, 5.236e-02, 3.563e-03, -6.109e-02, -7.301e-02, 8.333e-02, 3.957e-02, 1.803e-01, 9.601e-02, -5.646e-02, 4.394e-02, -1.050e-01, -2.629e-02, -8.527e-02, 2.850e-02, 1.179e-02)); + r += mul(s0_7, M4(-7.447e-02, 7.901e-03, 6.238e-02, -5.329e-02, 2.586e-02, 2.208e-01, 2.816e-02, -4.848e-02, 7.694e-02, 4.204e-02, 5.779e-03, -6.463e-02, -1.001e-01, 1.100e-01, 1.566e-02, 3.791e-02)); + r += mul(s0_8, M4(-5.701e-02, -1.697e-02, -1.691e-03, 1.403e-01, 3.156e-01, 5.799e-02, 1.759e-02, 9.619e-02, 2.202e-02, -9.488e-02, -2.590e-02, -8.366e-02, 1.210e-01, -7.470e-02, 1.093e-02, 1.380e-01)); + r += mul(s1_0, M4(7.212e-02, 7.621e-02, 4.314e-02, -9.247e-02, -3.266e-02, 8.508e-02, -6.421e-02, -4.211e-02, 1.010e-02, -1.950e-01, 3.861e-02, 9.782e-02, -6.655e-02, 1.424e-02, -2.223e-02, -9.446e-03)); + r += mul(s1_1, M4(-4.007e-02, -7.846e-02, -7.986e-03, -7.439e-02, 4.449e-02, -4.818e-02, 1.385e-02, 3.797e-03, -1.347e-02, -1.561e-01, 9.846e-02, 3.801e-02, 1.061e-01, -2.401e-01, 1.055e-01, -4.129e-02)); + r += mul(s1_2, M4(-9.659e-03, 6.957e-02, -4.025e-02, 3.089e-02, 2.816e-02, 2.426e-02, 6.533e-02, -7.188e-02, -1.259e-01, -4.684e-02, -4.673e-02, 1.800e-01, 7.202e-02, 6.580e-02, -8.023e-02, 5.023e-02)); + r += mul(s1_3, M4(1.242e-01, 1.553e-01, 1.128e-01, 4.895e-02, 1.656e-02, -3.309e-02, -5.443e-03, -6.188e-02, 5.006e-04, -1.107e-02, 1.815e-02, 2.253e-01, -1.705e-01, 3.067e-02, 9.370e-02, 3.231e-02)); + r += mul(s1_4, M4(3.221e-02, -5.710e-02, -1.182e-01, -1.338e-01, -1.968e-02, 7.005e-02, -9.953e-02, 1.901e-02, -2.082e-02, 6.905e-03, -1.449e-01, 1.060e-01, 7.296e-02, -1.160e-01, -3.750e-03, -9.807e-02)); + r += mul(s1_5, M4(-6.937e-03, -1.401e-01, 4.706e-02, -6.033e-02, -1.286e-01, -1.091e-03, 7.979e-02, -2.000e-01, 8.344e-02, 2.063e-01, -1.064e-01, 1.659e-01, -1.475e-02, 2.853e-02, -5.520e-02, -1.019e-02)); + r += mul(s1_6, M4(-6.763e-02, 1.883e-01, -9.662e-02, 3.104e-02, 1.231e-02, -3.119e-02, -1.991e-02, 3.078e-02, -5.155e-02, 2.583e-02, 1.259e-01, 6.456e-02, -3.722e-02, -3.557e-02, 6.927e-02, -1.265e-02)); + r += mul(s1_7, M4(-1.071e-01, 1.035e-01, -1.620e-02, -1.328e-01, -1.581e-02, 1.080e-02, 7.568e-03, 5.949e-03, -1.235e-01, 7.834e-02, 3.517e-02, 1.922e-02, -1.788e-01, -9.244e-02, 1.602e-01, 2.443e-01)); + r += mul(s1_8, M4(-3.491e-01, -3.154e-02, -2.781e-02, 1.418e-01, 8.823e-02, -3.127e-03, 1.060e-02, -3.518e-02, -9.877e-02, -1.844e-02, -1.164e-01, -5.210e-02, -5.500e-03, 6.936e-02, -4.136e-03, -5.199e-02)); + r += mul(s2_0, M4(6.302e-05, 8.151e-02, 2.809e-03, -9.597e-02, 1.061e-01, -4.590e-03, -2.120e-02, 2.740e-03, -9.974e-03, 1.571e-02, -9.565e-02, 4.850e-03, -1.734e-01, 3.947e-02, -1.271e-01, 8.010e-02)); + r += mul(s2_1, M4(1.761e-02, 5.682e-02, -6.697e-02, 1.736e-02, 1.359e-01, 1.748e-01, -2.451e-02, -1.687e-01, -1.217e-02, 1.712e-01, 8.703e-02, -7.423e-02, -1.478e-01, 8.708e-02, 2.012e-02, 1.992e-01)); + r += mul(s2_2, M4(3.907e-02, 3.121e-02, 4.380e-02, -1.444e-01, -6.491e-02, 2.175e-02, -2.573e-02, 1.739e-01, -1.612e-01, 1.411e-01, -6.071e-02, -1.919e-01, -9.105e-03, 1.180e-02, 8.358e-02, 1.398e-01)); + r += mul(s2_3, M4(-1.036e-01, 5.616e-02, -1.672e-03, -1.342e-01, -3.416e-03, -2.802e-02, 1.312e-01, -5.189e-02, -7.886e-02, 1.854e-01, 2.333e-01, 3.057e-02, -6.496e-02, 6.900e-03, -9.251e-02, 1.079e-01)); + r += mul(s2_4, M4(7.962e-02, 4.148e-02, 1.700e-01, 1.411e-01, 2.020e-01, 4.063e-01, -9.855e-02, 2.657e-01, -2.426e-01, -8.691e-02, -9.874e-03, 2.689e-01, -3.121e-02, 1.277e-01, -7.531e-02, 2.294e-02)); + r += mul(s2_5, M4(-1.290e-02, -1.242e-01, 9.744e-02, -3.125e-02, 4.110e-02, 3.022e-02, 5.287e-02, 1.693e-01, -2.782e-01, 1.429e-02, 6.180e-02, 1.119e-01, 1.331e-01, 1.371e-01, -3.178e-02, 1.428e-01)); + r += mul(s2_6, M4(-4.450e-03, -2.106e-02, 5.610e-02, -3.013e-02, 2.708e-02, 6.517e-02, -1.312e-01, -6.858e-02, 1.219e-01, -4.922e-02, -1.524e-02, 4.892e-02, -6.322e-03, -3.311e-02, -8.963e-02, 2.326e-01)); + r += mul(s2_7, M4(-2.796e-01, 1.151e-01, -3.706e-02, -4.687e-02, 1.712e-02, -1.499e-01, -2.080e-01, 2.191e-01, -1.121e-01, -5.119e-02, 8.154e-03, -1.510e-01, 7.341e-02, 7.382e-02, -2.254e-01, 1.950e-01)); + r += mul(s2_8, M4(8.772e-02, 4.794e-04, 8.423e-02, -9.496e-02, 9.113e-02, 2.194e-02, -7.011e-02, 7.785e-02, -7.759e-02, 5.424e-03, 3.423e-02, -6.881e-02, 1.640e-01, 6.449e-02, -1.571e-01, 8.336e-02)); + r += mul(s3_0, M4(-3.536e-02, -1.423e-02, -1.029e-02, 6.093e-02, 1.408e-01, 1.073e-01, -1.959e-02, -1.156e-01, 9.141e-02, 5.451e-02, -9.794e-03, 9.786e-03, -3.202e-02, -2.081e-02, -5.315e-02, -7.173e-02)); + r += mul(s3_1, M4(9.979e-02, 3.956e-02, 6.290e-02, 2.500e-02, 8.001e-02, -2.648e-02, 1.628e-01, -6.749e-02, 2.743e-02, 2.207e-01, -1.027e-01, 2.202e-03, -4.041e-02, 1.042e-02, 1.899e-02, -1.587e-01)); + r += mul(s3_2, M4(-2.281e-02, 1.592e-01, 5.933e-02, 3.911e-02, -8.032e-02, 5.788e-02, -2.891e-02, -3.402e-02, 3.244e-02, 1.186e-01, 1.060e-02, 5.644e-02, -2.286e-02, 6.649e-03, 1.901e-02, -1.472e-01)); + r += mul(s3_3, M4(-2.360e-02, 2.092e-02, -4.649e-02, -3.947e-02, -1.107e-01, -8.375e-02, 1.062e-01, -7.387e-02, -8.245e-02, 4.132e-02, -9.372e-02, -4.904e-02, 1.590e-02, 3.342e-02, -5.126e-02, -6.318e-02)); + r += mul(s3_4, M4(8.237e-02, 7.139e-03, 6.742e-02, -1.915e-01, -2.912e-02, 1.144e-01, -2.675e-02, -3.869e-02, 2.174e-02, 8.929e-02, -7.668e-02, 3.221e-02, 1.072e-01, -2.226e-02, 5.747e-03, -2.013e-01)); + r += mul(s3_5, M4(1.296e-01, 5.830e-02, 2.741e-02, -5.396e-02, 4.137e-02, 1.081e-01, 5.407e-02, -1.613e-02, 4.416e-02, -4.413e-02, -1.919e-02, 2.234e-02, 1.641e-02, 3.827e-02, -5.873e-02, -3.625e-02)); + r += mul(s3_6, M4(-2.643e-02, -2.107e-02, -9.774e-04, -4.033e-02, 6.831e-02, 1.818e-02, -2.285e-02, -5.126e-02, 1.056e-01, 2.481e-02, -4.970e-02, -4.118e-02, 2.507e-03, -7.200e-02, 3.521e-02, -1.958e-02)); + r += mul(s3_7, M4(-1.419e-01, -1.939e-02, -8.252e-02, -1.226e-01, -2.463e-02, -3.110e-02, 7.933e-02, 2.225e-02, -1.125e-01, 3.870e-02, -8.449e-02, 4.435e-02, 5.833e-02, 1.431e-02, 1.874e-02, -4.861e-02)); + r += mul(s3_8, M4(6.873e-02, -1.913e-02, 5.468e-02, 3.804e-02, 8.904e-03, -1.028e-02, -5.703e-02, -3.723e-02, -1.214e-01, 3.273e-04, 5.522e-02, -5.589e-02, 6.761e-02, -2.091e-03, -1.855e-02, -1.967e-03)); + r += mul(s4_0, M4(-1.337e-02, 4.971e-02, -9.402e-02, -6.570e-02, -5.679e-02, -4.823e-02, -1.141e-02, 1.180e-01, 1.361e-02, -6.839e-02, 9.484e-03, 9.146e-02, -2.355e-02, -4.336e-02, -6.835e-02, 1.216e-01)); + r += mul(s4_1, M4(-2.744e-02, 6.125e-02, -1.307e-02, 1.858e-02, -6.888e-02, -3.563e-02, -8.818e-02, 7.357e-02, 5.087e-02, -6.322e-03, -8.044e-02, 2.251e-02, 4.736e-02, -1.195e-02, 9.993e-02, 5.329e-02)); + r += mul(s4_2, M4(-6.653e-02, -3.248e-02, -6.335e-02, -3.976e-03, 4.284e-03, -1.610e-02, 8.281e-02, 4.614e-02, 5.601e-03, 1.024e-02, 1.040e-04, 7.251e-02, -1.801e-02, -1.080e-02, 8.329e-02, 1.218e-01)); + r += mul(s4_3, M4(9.951e-02, -4.194e-02, -1.135e-01, -9.185e-02, 1.388e-02, -5.043e-02, -1.166e-01, 5.228e-03, -9.241e-02, 3.263e-02, -5.675e-02, -1.811e-02, 5.921e-03, 4.205e-02, -1.074e-01, -8.103e-02)); + r += mul(s4_4, M4(1.362e-02, 1.894e-01, -6.613e-02, 3.271e-02, -5.054e-02, 7.819e-02, 8.748e-03, 1.025e-01, 1.009e-01, 1.459e-03, -7.589e-02, 4.555e-02, 1.301e-01, 1.124e-01, -1.082e-01, 7.705e-02)); + r += mul(s4_5, M4(3.963e-03, -2.820e-01, -9.095e-02, 2.806e-02, 2.366e-02, 2.021e-02, 7.765e-02, 8.085e-02, 4.199e-04, 7.992e-02, -5.011e-02, 3.407e-02, -1.385e-01, 1.887e-01, -7.216e-02, 7.838e-02)); + r += mul(s4_6, M4(-6.037e-02, -3.763e-03, 1.471e-02, 1.150e-01, -7.843e-02, -5.048e-02, -1.682e-02, 2.911e-02, -5.132e-02, -2.132e-03, 8.913e-03, -2.785e-02, -1.884e-01, 1.110e-01, -9.448e-02, 1.446e-01)); + r += mul(s4_7, M4(2.501e-01, -4.783e-02, 1.722e-02, -7.884e-02, -2.728e-02, 1.207e-01, 2.019e-02, -4.461e-02, -7.270e-02, -5.856e-02, -5.372e-02, 1.377e-01, 5.869e-02, 1.418e-02, 3.952e-02, 2.244e-01)); + r += mul(s4_8, M4(-7.346e-04, -1.191e-01, -5.872e-04, -4.034e-01, 5.788e-02, 8.150e-03, 5.508e-02, 1.020e-01, 1.064e-02, -4.955e-02, 1.782e-02, 7.286e-02, -1.265e-02, 1.003e-01, -2.336e-02, 1.336e-01)); + r += mul(s5_0, M4(-5.339e-02, 7.846e-02, -8.860e-02, -1.306e-02, -4.494e-02, 9.703e-02, -2.568e-02, -1.864e-02, 5.356e-02, 1.834e-01, -7.658e-02, -1.251e-01, 2.626e-02, -7.564e-03, -1.302e-01, -7.820e-02)); + r += mul(s5_1, M4(-4.511e-03, -2.958e-02, -7.545e-02, 5.787e-03, -7.416e-02, -4.669e-02, -2.174e-01, -9.766e-02, 4.142e-02, -1.570e-01, -8.905e-02, -4.624e-02, 6.675e-02, -9.388e-03, 4.803e-03, -1.196e-01)); + r += mul(s5_2, M4(-2.193e-02, 2.750e-02, -1.122e-01, 3.679e-02, 5.083e-02, -2.510e-01, 9.296e-02, -3.115e-01, -2.605e-02, -1.356e-01, 6.583e-02, 3.942e-02, -1.008e-02, -6.099e-02, -6.722e-03, -1.421e-01)); + r += mul(s5_3, M4(6.270e-02, -5.455e-02, -3.061e-02, 2.553e-02, -6.918e-02, -6.845e-02, 2.002e-03, -1.087e-01, -9.001e-03, -1.043e-01, -2.609e-02, -4.171e-02, 1.259e-02, 1.074e-02, 1.576e-02, -1.376e-01)); + r += mul(s5_4, M4(4.290e-02, 6.627e-02, 6.619e-02, -7.427e-02, -1.362e-01, 9.203e-02, -9.883e-02, 4.936e-03, 3.701e-01, -1.101e-02, -1.109e-01, -2.545e-02, 1.058e-02, 1.841e-01, -2.051e-02, -1.984e-01)); + r += mul(s5_5, M4(-1.378e-02, -1.164e-01, -1.474e-01, -2.092e-02, 6.278e-02, -1.969e-02, -5.004e-02, -1.315e-01, 6.378e-02, -3.266e-02, 6.565e-02, -4.789e-03, -3.663e-02, 2.888e-03, 1.072e-01, -1.854e-02)); + r += mul(s5_6, M4(-1.867e-01, 3.630e-02, 1.074e-01, -1.969e-02, -4.586e-02, 8.270e-04, -1.691e-02, 7.145e-02, 3.306e-02, 1.202e-01, 9.361e-02, -1.188e-01, -1.424e-01, -9.281e-03, -1.762e-02, -1.624e-01)); + r += mul(s5_7, M4(-2.975e-02, 1.625e-01, 1.835e-01, 1.283e-01, 7.336e-02, 2.088e-02, -8.031e-03, -1.193e-01, -2.883e-02, 1.103e-01, -7.452e-02, 4.335e-02, -4.197e-02, -3.907e-02, 5.596e-02, -3.380e-02)); + r += mul(s5_8, M4(-1.787e-01, -7.071e-02, 3.320e-02, 6.293e-02, 1.479e-01, -5.684e-02, -4.662e-02, -3.199e-02, 1.532e-01, -4.096e-02, -7.867e-02, 1.348e-01, -2.767e-02, 2.444e-02, 3.138e-02, 1.569e-01)); + r += mul(s6_0, M4(-4.936e-02, -6.285e-02, 1.092e-01, 6.430e-03, -7.977e-02, -3.174e-02, -5.940e-02, -1.876e-02, -1.700e-01, -4.452e-02, -2.191e-03, 4.175e-02, 9.124e-02, -7.633e-02, -9.207e-02, 1.279e-02)); + r += mul(s6_1, M4(7.761e-02, -3.035e-01, 9.776e-02, 2.043e-02, 1.105e-01, 8.576e-02, -3.475e-02, 1.911e-02, 6.507e-02, 1.586e-01, -2.161e-02, -1.698e-01, -2.493e-02, -8.030e-02, -7.785e-02, -1.270e-02)); + r += mul(s6_2, M4(-2.316e-01, -8.060e-02, 3.453e-02, -9.948e-02, -1.113e-02, 6.366e-02, -1.612e-02, -9.916e-02, 8.003e-02, 5.027e-02, -6.328e-02, -3.902e-02, 1.103e-01, -8.845e-02, 5.553e-02, -5.790e-02)); + r += mul(s6_3, M4(-8.073e-02, 1.479e-02, 1.683e-01, -8.615e-02, -6.766e-02, 3.079e-02, -8.450e-02, -3.041e-03, 8.573e-03, 1.075e-01, 1.413e-01, 4.908e-04, 1.775e-01, -1.729e-01, -6.040e-02, -1.862e-01)); + r += mul(s6_4, M4(1.651e-01, -1.345e-01, 8.919e-02, 1.627e-01, 3.617e-02, 6.256e-02, -5.313e-02, 1.040e-01, -1.922e-01, 1.232e-01, 2.177e-01, -8.445e-02, 1.100e-01, 3.956e-02, -1.707e-01, -7.690e-02)); + r += mul(s6_5, M4(2.440e-01, -7.621e-02, -1.231e-01, 3.360e-02, -2.065e-01, 7.486e-02, -1.006e-01, 2.970e-02, 2.299e-02, -1.838e-02, -3.900e-02, -7.157e-02, 2.590e-01, 3.458e-02, 7.639e-02, -5.959e-02)); + r += mul(s6_6, M4(-1.726e-01, 1.210e-01, -5.207e-02, 9.992e-04, -8.525e-02, 1.903e-02, 7.126e-02, -1.506e-01, 4.732e-02, -1.352e-01, 4.222e-02, 3.378e-02, 1.458e-02, 2.951e-02, -2.284e-01, 1.385e-01)); + r += mul(s6_7, M4(2.148e-02, -1.776e-01, 5.554e-02, 1.467e-02, -1.089e-01, -4.249e-02, 1.303e-01, -2.949e-02, -1.481e-01, -1.179e-01, 5.531e-03, -1.111e-01, 1.654e-01, 3.117e-02, -2.830e-01, 7.572e-02)); + r += mul(s6_8, M4(-1.686e-01, 2.905e-02, 3.980e-02, 1.041e-01, -9.216e-02, -3.281e-02, -4.067e-02, -2.761e-02, -8.116e-02, -4.699e-02, -2.380e-02, 4.070e-02, 8.295e-02, 6.351e-02, -1.073e-01, -3.275e-02)); + r += mul(s7_0, M4(-1.322e-02, -6.995e-02, 5.721e-02, 1.080e-01, -1.460e-01, -1.359e-01, -2.668e-02, 4.627e-02, -1.111e-01, -6.099e-02, 2.290e-02, 1.181e-01, -2.615e-02, 3.432e-02, -2.858e-02, 2.675e-02)); + r += mul(s7_1, M4(-1.126e-03, -1.130e-01, 3.254e-02, 3.818e-02, 1.163e-01, -8.689e-02, 2.397e-02, 6.867e-02, 7.187e-02, 2.244e-01, -1.445e-01, -3.121e-02, -2.282e-02, 4.401e-02, -2.783e-02, -1.091e-01)); + r += mul(s7_2, M4(-1.843e-02, -5.802e-03, 1.001e-01, 4.639e-02, -2.342e-03, -1.522e-01, 1.646e-01, -3.452e-02, 9.018e-02, 2.567e-02, 7.340e-02, -4.319e-02, 3.794e-02, 5.017e-03, -5.138e-02, 5.661e-02)); + r += mul(s7_3, M4(-3.781e-02, -7.300e-02, 5.104e-02, 1.752e-01, -1.776e-01, 5.212e-02, -1.431e-02, 3.923e-02, 1.007e-01, -4.295e-02, -3.411e-02, 1.345e-01, -4.227e-02, 4.720e-02, -3.708e-02, -9.001e-02)); + r += mul(s7_4, M4(7.349e-02, -6.402e-02, -7.391e-02, 4.688e-02, -1.185e-01, 1.355e-01, -4.110e-02, -4.597e-02, -1.750e-01, -1.666e-02, -2.820e-01, -9.678e-02, 1.918e-02, 4.531e-02, -4.430e-02, 9.198e-02)); + r += mul(s7_5, M4(8.797e-02, 7.461e-02, -3.057e-02, 8.326e-02, -8.005e-02, -9.788e-02, -6.236e-02, 1.772e-01, 2.214e-01, 5.500e-02, 8.411e-03, -1.437e-02, -8.313e-02, 1.458e-01, -8.669e-02, -1.330e-01)); + r += mul(s7_6, M4(-7.259e-02, -1.133e-02, 1.599e-02, 3.559e-02, -1.174e-01, -2.649e-02, 7.589e-02, 5.969e-02, -3.749e-02, 6.788e-03, 8.883e-02, 1.105e-01, 4.012e-02, 5.030e-02, -1.823e-01, -1.431e-01)); + r += mul(s7_7, M4(-1.153e-02, -7.054e-02, 1.015e-02, -2.166e-02, -1.574e-01, -2.663e-02, 8.243e-02, -9.755e-02, -1.577e-02, -1.518e-01, -1.140e-02, -1.944e-01, 3.587e-02, 3.820e-02, -1.761e-01, 2.044e-01)); + r += mul(s7_8, M4(-1.494e-02, 5.534e-03, 1.283e-02, 6.255e-02, 9.911e-02, 4.208e-02, -6.697e-02, 5.858e-02, 2.691e-02, 1.197e-01, -4.191e-02, 1.493e-01, -1.131e-01, 3.449e-02, -1.062e-01, -5.003e-02)); + r += V4(4.014e-02, -9.109e-03, -2.919e-02, -6.429e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-6.856e-02, 5.958e-02, -5.491e-02, -5.121e-04, -7.082e-03, 7.145e-02, 3.433e-02, -1.884e-02, 8.259e-02, -1.835e-02, -2.053e-02, -8.796e-03, -1.567e-02, -3.514e-02, -3.883e-02, 8.266e-03)); + r += mul(s0_1, M4(5.622e-02, -1.245e-01, -1.000e-02, -5.029e-02, 1.284e-01, 1.304e-01, -1.182e-01, -8.880e-02, 1.180e-01, -1.074e-01, -1.839e-03, -6.479e-02, -1.055e-01, -9.332e-02, 4.050e-02, 3.166e-02)); + r += mul(s0_2, M4(8.057e-02, -2.366e-02, 8.458e-03, -4.003e-02, 1.657e-01, -2.993e-02, -9.130e-02, -2.685e-02, 4.415e-02, -2.073e-02, 4.602e-02, 4.820e-02, -1.642e-02, -3.753e-02, -1.158e-02, -1.363e-02)); + r += mul(s0_3, M4(-3.562e-03, 3.501e-02, -5.760e-02, -8.561e-02, 1.803e-01, 9.548e-03, -4.489e-02, -1.129e-01, 1.759e-01, 2.651e-02, -9.603e-02, 4.567e-02, -2.577e-02, -7.679e-02, 1.399e-02, 2.476e-01)); + r += mul(s0_4, M4(1.174e-01, -1.482e-01, 8.969e-03, -5.009e-02, 1.937e-01, 1.298e-01, -4.125e-02, -4.130e-01, 2.279e-02, -3.595e-02, 1.802e-01, 3.422e-02, -9.759e-02, -1.371e-02, -3.081e-02, 6.392e-02)); + r += mul(s0_5, M4(5.218e-02, 4.926e-02, -3.858e-02, 4.533e-02, 1.352e-01, -2.891e-03, 1.177e-02, -2.800e-02, 4.966e-02, 5.795e-02, -3.262e-02, -4.464e-02, -9.950e-03, -5.546e-02, -7.840e-02, 2.773e-02)); + r += mul(s0_6, M4(2.823e-02, 8.325e-02, 9.167e-02, -6.963e-02, 8.086e-02, -2.964e-02, 7.290e-02, -1.416e-01, -1.841e-02, -6.262e-02, -2.303e-02, -1.569e-02, -6.437e-02, 6.167e-02, -1.038e-01, -1.276e-02)); + r += mul(s0_7, M4(-4.809e-02, 1.415e-02, 6.225e-02, 8.075e-02, 2.388e-01, 9.211e-03, 9.064e-02, -8.200e-02, 1.694e-01, -5.018e-02, -1.079e-01, -6.351e-02, -9.142e-02, -1.697e-02, -3.814e-02, -5.625e-02)); + r += mul(s0_8, M4(9.382e-02, 3.186e-02, 1.185e-02, 5.386e-02, 8.376e-02, -1.031e-01, -6.309e-02, -6.205e-02, 6.931e-02, -6.939e-02, 2.833e-02, -8.955e-03, 2.056e-02, 2.309e-02, 8.990e-02, -2.922e-02)); + r += mul(s1_0, M4(9.485e-02, 1.059e-01, -9.448e-02, 2.837e-02, -8.034e-02, 4.231e-02, 5.874e-02, 4.376e-02, -7.212e-02, -4.734e-02, -6.407e-02, -5.659e-02, -2.286e-02, 2.383e-02, -5.526e-02, 7.060e-02)); + r += mul(s1_1, M4(2.772e-02, -2.769e-02, 1.203e-01, 2.210e-02, -9.925e-02, 3.654e-02, -6.549e-02, 5.695e-02, -3.272e-02, 7.289e-02, -6.519e-02, -4.937e-02, -1.087e-02, -9.866e-02, -5.929e-02, -5.427e-03)); + r += mul(s1_2, M4(1.325e-01, 1.090e-01, -9.989e-02, -1.232e-03, -1.037e-01, -7.060e-02, -4.878e-02, -1.971e-02, -1.156e-01, 1.161e-01, 7.762e-02, -4.978e-02, 2.168e-01, 4.355e-02, -5.945e-02, -1.258e-01)); + r += mul(s1_3, M4(-4.632e-02, 5.148e-03, -1.895e-01, -1.120e-02, -1.101e-02, 4.767e-02, 6.131e-02, -3.472e-02, -1.414e-01, 3.136e-02, -3.091e-02, -1.837e-02, 2.242e-01, -2.307e-02, -9.898e-02, 3.374e-02)); + r += mul(s1_4, M4(5.654e-02, -2.654e-01, 2.302e-02, -1.334e-01, -3.182e-03, 9.778e-03, -4.265e-02, -9.722e-02, -2.784e-01, 3.271e-03, -2.737e-02, -4.457e-02, 2.242e-02, -1.072e-02, -2.345e-01, -5.289e-02)); + r += mul(s1_5, M4(1.214e-02, -1.508e-02, 1.561e-01, -3.749e-02, -1.257e-01, 1.359e-02, -5.384e-02, -7.271e-03, -1.746e-01, 1.859e-01, -8.594e-02, 3.682e-02, 1.924e-01, 6.397e-02, -1.499e-01, 5.283e-02)); + r += mul(s1_6, M4(6.474e-02, 4.045e-02, 7.998e-02, -1.044e-01, -1.953e-02, 2.763e-02, 3.969e-02, -1.972e-02, -1.679e-01, 3.241e-03, 8.822e-03, -2.743e-02, 3.568e-02, 3.401e-02, -2.322e-02, 6.862e-02)); + r += mul(s1_7, M4(9.398e-02, 5.614e-02, -8.502e-02, 2.310e-02, -5.239e-02, -5.140e-03, 1.602e-01, 8.341e-02, -2.886e-01, 1.867e-01, -9.800e-02, -1.470e-02, -1.087e-01, -1.298e-01, -2.362e-02, -1.400e-01)); + r += mul(s1_8, M4(-3.051e-02, -6.396e-02, 4.116e-02, 1.117e-01, -4.641e-02, 2.690e-02, -3.111e-02, -1.250e-02, -2.173e-01, 1.572e-02, -1.499e-01, 1.580e-02, 1.703e-01, 1.144e-01, 1.479e-02, -8.993e-02)); + r += mul(s2_0, M4(1.244e-01, 4.970e-02, -1.070e-03, 7.773e-02, -8.029e-02, -2.884e-02, -3.637e-02, -1.304e-01, -1.312e-01, 2.130e-01, -4.910e-02, -4.770e-03, 1.294e-01, 2.274e-02, 9.818e-02, 7.503e-02)); + r += mul(s2_1, M4(-5.884e-02, 8.183e-02, -8.185e-02, 1.065e-02, -1.435e-01, 6.958e-02, 3.902e-02, -2.534e-02, -1.494e-01, 5.753e-02, -2.062e-01, -7.096e-02, -2.066e-02, 1.252e-01, 4.390e-02, 7.641e-02)); + r += mul(s2_2, M4(8.514e-03, 6.807e-03, -6.165e-02, -3.644e-03, 2.428e-03, -4.480e-02, 5.208e-02, 6.871e-02, -1.813e-02, -4.059e-02, -6.820e-02, -3.910e-02, 1.024e-01, 3.187e-02, 5.265e-02, -2.288e-02)); + r += mul(s2_3, M4(3.685e-02, 9.477e-02, -3.593e-02, 4.419e-02, -1.481e-01, 1.218e-01, -7.954e-03, -1.694e-01, -1.576e-02, 1.485e-02, -1.178e-01, -2.675e-02, 7.700e-03, 1.516e-01, -1.317e-02, 3.000e-02)); + r += mul(s2_4, M4(-1.040e-01, 1.802e-01, 4.063e-02, 1.970e-01, 4.039e-02, 1.686e-01, 9.372e-02, 3.956e-02, -8.978e-02, -1.824e-01, -4.485e-01, 1.647e-01, -4.233e-02, 2.078e-01, 2.783e-02, -1.687e-02)); + r += mul(s2_5, M4(-5.382e-02, 7.601e-03, 1.531e-01, -1.537e-01, 5.936e-02, 4.119e-02, 1.738e-02, -9.958e-02, -1.441e-02, -1.581e-01, 1.304e-01, 1.832e-01, 2.048e-01, 3.706e-02, -6.517e-02, -2.421e-02)); + r += mul(s2_6, M4(-1.290e-01, 5.078e-02, 3.001e-02, -1.579e-02, -3.427e-02, -3.255e-02, 1.423e-01, 2.828e-02, -6.385e-02, 6.381e-02, -1.872e-01, 1.009e-01, -2.196e-02, 5.991e-02, 1.937e-02, -7.034e-02)); + r += mul(s2_7, M4(8.300e-02, 1.168e-01, -2.442e-01, 7.992e-02, -5.251e-02, 9.426e-02, -1.091e-01, 3.191e-02, 2.351e-02, 8.493e-02, -1.876e-01, 9.190e-02, 1.258e-01, 8.952e-02, 6.551e-02, -1.797e-03)); + r += mul(s2_8, M4(-4.132e-02, 9.893e-02, -1.577e-01, 5.819e-02, -1.900e-02, -1.933e-03, 2.067e-02, -6.572e-03, 1.310e-01, -1.984e-01, -1.135e-01, -2.853e-02, 1.574e-01, 3.272e-02, 1.053e-01, 2.372e-02)); + r += mul(s3_0, M4(-1.278e-02, 1.956e-02, 1.472e-02, -1.047e-01, 2.871e-02, -7.708e-03, -2.284e-02, -5.125e-02, 6.561e-02, 5.233e-02, 6.514e-02, 4.392e-02, -6.372e-02, -5.538e-02, 2.252e-02, 9.895e-02)); + r += mul(s3_1, M4(8.760e-02, 4.733e-02, -2.399e-03, 1.374e-01, -3.357e-02, -1.288e-02, -1.132e-01, -1.851e-02, -1.350e-01, 3.614e-02, 2.063e-02, 1.047e-02, -2.650e-02, 3.096e-02, -2.657e-02, 7.451e-02)); + r += mul(s3_2, M4(-9.244e-02, 1.058e-01, -3.306e-02, -2.094e-02, 6.164e-02, -1.685e-04, 2.070e-02, 4.812e-02, 1.481e-02, 7.771e-02, -1.181e-01, 3.211e-02, -2.177e-02, -1.001e-01, 2.578e-02, -4.489e-02)); + r += mul(s3_3, M4(-9.743e-02, 1.029e-01, 5.503e-02, 1.500e-01, 1.386e-01, 2.629e-02, -1.115e-02, -7.039e-02, 7.579e-02, -2.209e-02, -2.975e-02, -3.635e-02, -1.220e-01, -3.563e-02, 2.109e-02, -5.721e-03)); + r += mul(s3_4, M4(6.016e-02, 1.031e-01, -3.935e-02, 1.274e-01, -7.301e-02, 1.266e-01, 4.915e-02, 2.390e-02, 9.129e-03, -8.408e-02, -8.331e-02, 2.235e-01, -1.669e-01, -8.030e-04, -1.628e-02, -9.797e-02)); + r += mul(s3_5, M4(8.097e-02, -8.802e-02, 8.909e-02, -5.042e-02, 1.494e-02, 2.353e-02, -4.345e-02, -4.842e-03, -4.746e-02, -4.925e-02, 8.130e-02, 3.014e-02, -1.431e-02, -1.187e-01, 3.601e-02, -1.568e-03)); + r += mul(s3_6, M4(-3.983e-02, 8.975e-02, -1.097e-01, -2.354e-03, 2.544e-02, -6.747e-02, -2.813e-02, 2.132e-02, 4.415e-02, 2.316e-02, -7.110e-02, -1.807e-02, -1.686e-02, 1.114e-02, -6.167e-02, -9.863e-03)); + r += mul(s3_7, M4(8.592e-02, 1.271e-02, -9.243e-02, 5.828e-02, -1.367e-01, -4.598e-02, -1.202e-02, -4.801e-02, 9.932e-02, 9.926e-03, -5.031e-02, 1.650e-01, 3.893e-02, 6.156e-03, -3.474e-02, -1.083e-01)); + r += mul(s3_8, M4(1.120e-02, -3.952e-02, -4.259e-02, 1.034e-01, -7.690e-02, -7.688e-03, 2.102e-02, 5.862e-02, -3.920e-02, 2.724e-02, -1.052e-01, 3.477e-02, -7.197e-02, -3.307e-03, 1.187e-01, 1.826e-03)); + r += mul(s4_0, M4(-1.181e-01, -2.105e-03, 6.659e-02, -9.693e-02, -5.381e-02, 4.990e-02, 1.051e-01, 6.149e-02, 2.631e-03, -3.388e-02, -5.817e-02, -8.667e-02, -1.396e-01, 2.351e-02, -1.353e-02, 2.335e-02)); + r += mul(s4_1, M4(-1.239e-02, 8.171e-02, -5.804e-03, 9.792e-02, -4.276e-02, 1.050e-01, 3.182e-02, -3.140e-02, -5.871e-02, -6.173e-02, 1.148e-02, -5.548e-02, -8.282e-02, -1.731e-03, -4.758e-02, 4.939e-03)); + r += mul(s4_2, M4(-1.961e-01, 7.310e-02, 7.496e-02, 2.064e-02, 5.587e-03, 4.173e-02, -4.753e-02, 1.277e-02, 4.601e-02, -5.450e-02, -4.348e-02, -5.249e-02, -2.668e-02, -4.040e-02, -5.659e-04, -4.179e-02)); + r += mul(s4_3, M4(4.018e-02, 1.174e-01, 2.993e-02, 1.726e-01, 7.344e-03, 3.771e-02, 4.385e-02, 7.180e-02, -3.315e-02, 1.868e-02, 2.308e-02, 2.335e-02, -7.644e-02, -1.776e-03, 1.021e-01, -5.989e-02)); + r += mul(s4_4, M4(-1.093e-01, 1.338e-01, -3.326e-03, 2.741e-01, 5.125e-03, -5.155e-02, 7.349e-02, 2.113e-02, -3.361e-02, 4.438e-02, 4.073e-02, 3.265e-02, -5.326e-02, 1.109e-01, 9.573e-02, 1.011e-01)); + r += mul(s4_5, M4(-1.444e-02, -9.394e-02, 1.325e-01, 1.078e-02, -9.762e-02, -2.065e-02, -1.430e-01, -3.443e-02, 1.513e-02, 1.041e-01, -8.205e-02, -5.684e-02, -1.067e-01, -5.092e-02, 3.510e-02, 4.930e-02)); + r += mul(s4_6, M4(-1.771e-01, 6.409e-02, 2.086e-03, 3.445e-02, 1.277e-01, -2.445e-02, -1.331e-01, 2.405e-02, 4.605e-02, 2.834e-02, -1.150e-01, 1.056e-01, -1.270e-01, -1.430e-02, 1.851e-01, 5.202e-02)); + r += mul(s4_7, M4(-2.581e-01, -9.130e-04, 1.984e-02, 1.706e-02, 8.904e-02, 2.445e-02, 1.978e-02, -6.087e-02, 8.202e-02, 5.114e-02, -3.343e-02, 1.372e-01, -3.990e-02, 5.799e-02, 5.534e-02, 3.146e-03)); + r += mul(s4_8, M4(-1.432e-01, 6.463e-02, -1.744e-01, 7.203e-02, -5.544e-02, 7.475e-03, -1.297e-01, -1.000e-01, -4.786e-02, -8.024e-03, -8.030e-02, 4.410e-02, 3.240e-02, -1.657e-02, -5.415e-02, -2.264e-02)); + r += mul(s5_0, M4(-1.678e-02, 6.369e-02, 6.262e-02, 3.990e-03, -2.285e-02, -2.102e-02, -8.417e-02, 3.454e-02, 5.631e-02, -5.209e-03, -5.349e-02, 7.355e-02, 5.594e-02, -2.378e-02, -1.036e-01, 1.916e-02)); + r += mul(s5_1, M4(-8.472e-02, 1.004e-01, -4.666e-03, 2.850e-02, -7.198e-02, -4.735e-02, 4.020e-02, -9.596e-02, -6.666e-02, 4.326e-02, -1.966e-02, -1.055e-01, 1.041e-01, 2.119e-02, -5.364e-02, 1.074e-02)); + r += mul(s5_2, M4(-2.121e-02, 5.410e-02, 4.190e-02, 6.076e-02, 1.067e-01, 1.614e-02, 7.708e-02, -5.968e-02, 5.507e-02, -2.250e-02, 3.645e-02, -9.764e-03, -7.478e-03, -4.927e-02, 5.171e-03, -4.723e-02)); + r += mul(s5_3, M4(1.332e-01, 4.441e-02, -1.963e-01, 1.094e-02, 6.102e-02, 5.773e-03, -7.517e-02, 3.987e-02, 8.065e-02, -1.551e-01, -7.945e-02, -2.277e-01, 2.685e-01, 8.098e-03, -1.217e-01, -3.998e-02)); + r += mul(s5_4, M4(1.234e-01, 1.988e-02, -3.497e-02, 2.864e-03, -5.985e-02, -6.099e-02, -1.039e-01, -5.193e-02, 8.858e-02, 3.707e-02, -7.891e-02, 4.948e-02, 8.762e-02, 5.560e-02, -1.161e-01, 4.470e-02)); + r += mul(s5_5, M4(4.512e-02, -8.772e-02, 3.896e-02, -5.043e-02, 2.819e-02, -4.574e-02, 1.322e-01, 5.389e-02, -9.003e-02, 9.038e-02, -3.016e-02, 6.598e-02, 4.502e-02, -6.214e-02, 6.204e-02, 2.493e-02)); + r += mul(s5_6, M4(-1.418e-01, -6.317e-03, 4.192e-02, 1.090e-01, 4.677e-02, 2.775e-02, -4.033e-02, 1.778e-02, 1.161e-01, 8.278e-02, 1.486e-02, 1.899e-01, 4.682e-02, -1.269e-02, -5.939e-02, -4.191e-02)); + r += mul(s5_7, M4(3.808e-02, -1.382e-01, 1.624e-01, -8.867e-03, -4.352e-02, 4.613e-02, -8.485e-02, 1.962e-02, -9.782e-02, -1.877e-02, 2.046e-01, 1.311e-01, 5.570e-02, 7.291e-02, -1.629e-01, 2.372e-02)); + r += mul(s5_8, M4(7.632e-02, 2.845e-02, 9.477e-02, 4.662e-02, 2.186e-02, -1.191e-02, 3.877e-02, -2.086e-03, -7.178e-02, 9.062e-03, -1.733e-02, -2.907e-03, -1.850e-02, 4.354e-02, 2.838e-02, 1.076e-01)); + r += mul(s6_0, M4(-1.285e-01, 8.991e-02, -8.087e-02, -3.216e-02, 1.349e-02, 3.391e-02, -1.215e-02, -7.211e-02, 1.965e-03, 4.863e-02, 3.047e-02, -3.501e-02, -6.125e-02, -2.897e-03, -8.380e-02, 2.357e-03)); + r += mul(s6_1, M4(-5.757e-02, 1.372e-01, -6.267e-03, 8.437e-02, 6.323e-02, 4.447e-02, -3.204e-02, -6.612e-02, -2.887e-02, 1.434e-02, 5.392e-02, 1.096e-01, 4.232e-02, 1.115e-01, 2.729e-02, -1.276e-02)); + r += mul(s6_2, M4(2.303e-02, 5.033e-02, -8.145e-03, -1.038e-02, -1.006e-01, 2.948e-03, 6.525e-02, 1.323e-01, -1.220e-02, -4.912e-02, -2.678e-02, 4.257e-02, 6.654e-02, -1.227e-01, 2.506e-02, 9.650e-03)); + r += mul(s6_3, M4(-6.970e-02, 3.303e-02, 6.222e-02, -2.478e-02, 4.571e-02, -7.910e-03, 1.174e-01, -1.023e-01, -2.077e-01, 2.470e-02, 3.463e-02, 5.387e-03, 3.708e-02, 6.542e-02, 1.682e-02, 1.489e-01)); + r += mul(s6_4, M4(-9.703e-02, 1.665e-01, -2.678e-02, -2.903e-01, 2.605e-02, -1.837e-01, -6.346e-02, -1.372e-01, 5.312e-02, -6.241e-02, -1.031e-01, 1.010e-01, -1.034e-01, 1.392e-01, 6.485e-02, 4.498e-02)); + r += mul(s6_5, M4(1.261e-01, -5.379e-02, 1.237e-01, -7.338e-02, 9.243e-02, 1.550e-02, -3.866e-02, 1.153e-03, -1.044e-01, -9.256e-03, 3.221e-02, 2.840e-02, 1.068e-01, 7.488e-03, 3.220e-02, 2.742e-02)); + r += mul(s6_6, M4(6.648e-02, -5.628e-02, 5.234e-02, 1.653e-02, -2.854e-02, 1.178e-01, -4.848e-02, 4.922e-02, -3.475e-02, 3.335e-03, 5.270e-03, -6.638e-02, 1.035e-01, -1.178e-02, -1.055e-01, 1.954e-02)); + r += mul(s6_7, M4(2.042e-03, -2.349e-02, 5.391e-02, -1.043e-01, -3.899e-02, -8.002e-02, -3.419e-02, 1.665e-01, -2.302e-02, 7.047e-02, -4.170e-02, 1.918e-01, -1.703e-01, 3.008e-02, 3.182e-01, -1.188e-01)); + r += mul(s6_8, M4(-1.147e-01, 6.406e-02, 5.628e-02, 3.118e-02, -2.687e-02, 3.499e-02, 1.447e-02, 1.556e-01, 2.801e-02, -6.173e-02, 7.045e-02, 2.199e-02, -1.085e-01, -1.537e-02, 6.824e-02, -5.379e-02)); + r += mul(s7_0, M4(-9.648e-04, 4.945e-02, -1.580e-02, -1.928e-02, -4.261e-02, -2.873e-02, -1.721e-02, 1.632e-02, -1.067e-01, -5.818e-02, 5.599e-02, 2.674e-02, -2.359e-02, 1.152e-02, -2.086e-02, -3.648e-02)); + r += mul(s7_1, M4(-2.092e-02, 2.304e-01, 1.555e-02, -8.347e-02, 4.209e-02, 4.611e-02, 1.706e-02, -2.508e-01, 1.402e-01, -4.404e-02, 4.809e-02, -9.401e-02, 1.312e-02, 3.321e-02, -7.290e-04, -2.567e-02)); + r += mul(s7_2, M4(4.520e-03, 1.837e-01, -2.464e-02, 8.720e-02, 3.954e-02, -5.138e-02, 7.894e-02, -3.457e-02, 6.971e-02, 8.567e-02, -4.265e-02, 8.670e-02, 8.301e-03, -1.132e-02, 6.187e-02, 9.753e-03)); + r += mul(s7_3, M4(-7.253e-02, 4.552e-02, 5.727e-02, 7.030e-02, 5.003e-02, -5.706e-02, 6.038e-02, -9.776e-02, 3.438e-01, 2.599e-02, 1.471e-01, 8.453e-02, 1.411e-01, -3.744e-03, -1.581e-01, 6.738e-02)); + r += mul(s7_4, M4(8.551e-02, 1.264e-01, 1.721e-01, -1.666e-01, 1.878e-01, -1.705e-02, -9.005e-02, -1.883e-01, 1.931e-01, 4.764e-02, 1.080e-01, 3.652e-02, 2.942e-01, 6.859e-02, 1.697e-01, 2.116e-01)); + r += mul(s7_5, M4(4.700e-02, 4.284e-02, 5.256e-02, -6.432e-03, 1.579e-01, 9.761e-02, 9.664e-02, 4.621e-02, -1.928e-02, 2.427e-02, 7.885e-02, -1.989e-02, 9.267e-02, 1.829e-02, 2.837e-02, 2.120e-02)); + r += mul(s7_6, M4(-7.245e-02, -9.423e-03, 2.428e-02, -4.536e-03, -5.552e-02, 1.170e-01, -1.154e-02, 4.014e-03, -1.365e-01, -8.392e-02, 3.284e-02, 5.963e-02, 6.726e-02, 4.487e-02, -1.220e-01, -6.849e-03)); + r += mul(s7_7, M4(2.217e-02, 5.807e-02, 9.221e-02, -1.460e-01, -1.247e-01, 7.984e-02, -1.802e-01, 5.983e-02, -5.921e-03, 8.551e-02, 1.390e-01, -4.825e-03, 2.548e-01, -2.803e-02, 1.412e-01, -1.232e-01)); + r += mul(s7_8, M4(3.988e-02, 1.072e-01, -7.641e-02, -1.410e-02, 5.329e-02, 6.393e-02, -8.930e-02, 9.357e-02, -3.666e-02, -4.053e-02, -6.295e-02, 1.096e-01, -4.078e-02, 7.128e-02, 2.716e-02, -1.274e-01)); + r += V4(3.638e-02, -7.591e-03, 2.399e-02, -1.691e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(6.122e-03, 3.221e-02, 6.415e-02, 4.652e-02, 8.167e-02, -7.381e-03, -1.575e-01, 5.303e-02, -3.314e-02, 4.220e-02, -6.893e-02, 8.922e-02, 5.497e-02, -1.050e-02, -9.800e-03, 1.611e-02)); + r += mul(s0_1, M4(-7.614e-02, -1.554e-02, -1.860e-02, 5.877e-02, -1.895e-01, 1.341e-01, 1.389e-02, 1.928e-01, 1.424e-02, 8.202e-02, -6.702e-02, 7.446e-02, -3.006e-03, -1.742e-02, 4.742e-02, -2.605e-02)); + r += mul(s0_2, M4(2.639e-02, -2.988e-03, 2.989e-03, 2.073e-02, -1.949e-01, -6.508e-02, 2.486e-02, 3.480e-02, 8.375e-02, 7.751e-02, -4.652e-02, 1.217e-01, -5.198e-02, 9.123e-02, -5.920e-03, 3.304e-02)); + r += mul(s0_3, M4(-1.204e-01, -4.366e-02, 2.654e-03, -6.506e-02, -1.330e-01, -9.774e-02, -1.598e-01, -2.019e-04, 3.756e-02, -7.465e-02, 7.482e-03, -8.724e-03, 3.620e-02, 1.860e-01, -1.966e-02, -8.684e-04)); + r += mul(s0_4, M4(-1.747e-02, -1.114e-01, 8.636e-03, -6.313e-02, 6.846e-02, -1.909e-01, -2.234e-02, 8.443e-02, 1.353e-01, -9.527e-02, -1.467e-01, -1.548e-01, 2.765e-02, 4.913e-02, -4.537e-02, 4.330e-02)); + r += mul(s0_5, M4(-6.526e-02, 1.994e-02, 9.515e-03, 2.388e-01, 1.288e-02, 4.491e-02, 9.393e-02, 1.422e-01, -1.040e-01, 1.134e-01, 2.765e-02, 4.279e-02, 2.043e-02, -4.010e-02, 7.829e-03, 5.488e-02)); + r += mul(s0_6, M4(2.325e-02, -3.689e-02, 1.841e-02, -1.125e-02, -9.640e-02, 2.857e-02, 3.618e-02, 7.420e-02, 5.475e-02, -1.613e-01, -4.310e-02, 9.549e-02, -2.707e-02, -1.212e-02, 5.390e-02, -1.092e-01)); + r += mul(s0_7, M4(6.382e-02, 6.961e-02, 1.197e-01, 3.837e-02, -1.072e-01, 6.384e-02, -4.949e-02, 1.343e-01, -1.190e-01, 1.769e-01, 9.607e-02, -5.057e-02, -1.692e-02, 4.203e-02, -4.110e-02, -7.169e-02)); + r += mul(s0_8, M4(4.697e-02, 1.007e-02, -4.633e-02, 8.848e-02, -1.414e-01, 1.499e-01, 2.593e-02, 1.987e-01, 8.290e-03, -5.479e-02, -1.708e-02, -1.725e-02, 7.765e-02, -6.238e-02, 1.249e-01, -4.455e-02)); + r += mul(s1_0, M4(2.614e-01, 2.343e-02, 3.152e-02, 1.098e-01, -2.643e-02, -1.966e-04, 1.151e-01, -1.548e-01, 7.466e-02, -3.097e-02, 3.441e-02, 1.095e-01, -9.348e-03, -3.311e-02, -2.857e-02, -1.503e-03)); + r += mul(s1_1, M4(-1.104e-01, -1.027e-01, -7.884e-02, 1.515e-01, -4.450e-02, 1.904e-03, 1.575e-01, 1.348e-01, 2.675e-01, 4.051e-02, 4.181e-02, -6.693e-02, -1.196e-01, -3.777e-02, 1.405e-02, 3.628e-02)); + r += mul(s1_2, M4(4.757e-02, -3.474e-02, 4.202e-02, -1.033e-03, -3.025e-02, 3.884e-02, -5.866e-02, 7.499e-03, 1.294e-01, -7.598e-02, 6.225e-02, -4.151e-03, -1.089e-01, -5.878e-02, 2.673e-02, -1.284e-01)); + r += mul(s1_3, M4(1.354e-01, -7.675e-02, 1.490e-01, -1.352e-01, -1.379e-02, -7.271e-02, 5.067e-02, -4.590e-02, 1.333e-01, -8.517e-02, 2.664e-03, 9.784e-02, -1.174e-01, -2.460e-02, -2.588e-01, -1.138e-01)); + r += mul(s1_4, M4(1.426e-01, 4.353e-02, -8.672e-02, 2.046e-01, 1.461e-01, -5.507e-02, -5.606e-02, -2.271e-01, 3.839e-01, -8.524e-02, 1.733e-01, -1.155e-01, -1.682e-01, 4.905e-02, -3.108e-02, -4.578e-02)); + r += mul(s1_5, M4(-5.940e-02, 8.678e-02, 5.285e-02, 1.852e-01, 1.069e-02, 6.512e-02, -6.523e-02, 1.531e-01, 1.185e-01, 9.120e-02, 9.382e-02, -4.945e-02, -1.033e-01, -3.036e-02, -3.838e-02, 4.350e-02)); + r += mul(s1_6, M4(-2.573e-03, 4.625e-02, 2.330e-01, -1.964e-01, 2.848e-02, 7.280e-03, 6.167e-03, 5.167e-02, 2.095e-01, -1.352e-01, 2.159e-02, -6.436e-02, -9.951e-02, -8.163e-02, -1.740e-03, 2.527e-02)); + r += mul(s1_7, M4(9.526e-02, -1.897e-02, -8.165e-02, 1.262e-01, 2.268e-02, -4.038e-02, -5.099e-03, -8.172e-03, 2.524e-01, -1.105e-01, 1.456e-01, 6.517e-02, -3.381e-02, -1.969e-01, 8.836e-02, -3.430e-02)); + r += mul(s1_8, M4(-9.836e-02, 2.153e-01, 3.308e-02, 4.850e-02, 7.915e-02, -4.260e-02, -4.212e-02, 6.764e-02, 1.521e-01, -1.361e-02, -1.244e-02, 5.889e-02, -2.020e-02, 1.773e-01, 3.557e-02, 1.293e-02)); + r += mul(s2_0, M4(-5.097e-02, -9.996e-02, -4.337e-02, -5.412e-02, 9.223e-03, 4.125e-02, 5.016e-02, -4.998e-02, 1.616e-01, 2.576e-02, 9.513e-02, -8.481e-02, 7.373e-02, -1.837e-02, -1.033e-02, -5.529e-02)); + r += mul(s2_1, M4(-2.437e-01, -4.166e-02, 4.871e-02, 1.655e-02, -1.347e-02, -3.214e-03, -4.125e-02, -2.885e-02, 2.194e-02, 6.872e-02, 1.936e-01, -1.485e-01, 5.305e-02, 1.856e-02, -5.319e-03, -8.416e-02)); + r += mul(s2_2, M4(3.580e-02, -5.350e-03, -2.500e-02, -8.106e-02, -8.992e-03, -1.122e-02, 2.935e-02, -6.553e-02, -9.298e-02, -6.000e-02, -8.810e-02, 8.355e-02, 3.968e-02, 4.725e-02, 5.453e-02, -2.341e-03)); + r += mul(s2_3, M4(5.013e-02, 9.650e-02, 2.147e-01, -5.275e-02, -9.588e-03, -1.680e-01, -1.210e-03, -1.026e-01, 1.637e-01, -1.267e-01, -7.794e-02, -1.377e-01, -9.077e-02, 2.929e-02, -2.320e-02, -3.885e-02)); + r += mul(s2_4, M4(3.537e-02, 2.861e-01, 1.609e-01, 6.069e-02, 1.520e-04, -9.723e-02, 1.479e-01, -1.614e-01, -2.389e-01, 7.053e-02, 4.435e-02, 2.380e-01, -2.154e-01, -2.414e-01, -5.209e-02, -2.076e-01)); + r += mul(s2_5, M4(1.066e-01, -9.957e-02, 4.507e-02, 2.493e-01, 4.581e-02, 7.302e-02, 8.105e-02, -1.858e-02, -2.431e-01, -3.741e-02, -1.559e-01, -6.571e-02, -1.112e-01, 8.648e-02, -8.348e-02, -4.152e-02)); + r += mul(s2_6, M4(-3.644e-02, 4.572e-02, -1.032e-03, 1.560e-02, -4.409e-02, -3.401e-02, -4.472e-02, 6.646e-02, 2.683e-01, 1.611e-01, 1.700e-01, -1.747e-01, 6.751e-02, 1.890e-01, 7.886e-02, -1.023e-01)); + r += mul(s2_7, M4(-6.757e-02, -9.052e-02, 2.832e-02, -1.465e-01, -4.922e-03, -9.794e-02, 1.553e-02, -1.279e-01, -8.987e-02, -1.094e-02, -6.688e-02, -1.655e-02, 7.677e-03, 2.611e-01, -2.482e-01, -6.622e-02)); + r += mul(s2_8, M4(2.499e-02, -1.209e-02, -1.115e-01, -9.601e-02, 9.146e-02, -5.758e-02, 4.364e-02, -1.167e-01, -1.097e-01, 1.173e-01, -1.560e-02, 3.475e-02, 2.849e-02, 1.014e-01, 8.593e-02, -1.590e-01)); + r += mul(s3_0, M4(-4.347e-02, -1.048e-01, -7.880e-02, -4.102e-02, 4.306e-02, 7.258e-02, -5.191e-02, 1.725e-01, 2.428e-02, 3.249e-02, -2.611e-02, -2.422e-02, 2.404e-02, -1.102e-01, 5.414e-02, -1.339e-01)); + r += mul(s3_1, M4(-1.674e-01, -7.777e-02, 2.180e-02, -5.083e-02, 8.096e-02, -3.833e-02, 6.135e-02, 1.318e-01, -1.086e-01, 8.571e-02, 1.685e-01, 6.385e-02, -1.179e-02, 2.198e-02, -2.858e-02, -1.056e-02)); + r += mul(s3_2, M4(-2.490e-02, -1.257e-02, -4.781e-02, -1.123e-01, -5.627e-02, -7.591e-02, -1.466e-01, 3.166e-02, -3.609e-02, -1.089e-02, 5.629e-02, 1.126e-01, 5.704e-02, -9.620e-03, 3.629e-02, 2.746e-02)); + r += mul(s3_3, M4(-6.588e-03, 1.090e-01, 1.702e-01, -7.298e-02, 2.883e-02, -1.147e-01, -7.601e-02, -1.193e-01, -8.575e-02, -3.435e-02, -3.054e-02, -1.020e-02, -4.655e-02, 7.413e-02, -3.645e-02, 1.089e-02)); + r += mul(s3_4, M4(-1.556e-01, 6.820e-02, 9.447e-02, 1.239e-01, -2.075e-02, -1.816e-01, 1.924e-02, -3.414e-02, -1.080e-03, 8.237e-02, 1.103e-01, 8.603e-03, 1.465e-02, -5.061e-02, -5.050e-02, -3.169e-02)); + r += mul(s3_5, M4(2.978e-03, -6.699e-02, -8.814e-03, 1.197e-01, -1.398e-02, 7.272e-02, -9.433e-02, -6.428e-02, -6.159e-03, -1.572e-01, -1.129e-02, 3.149e-02, 7.888e-02, 1.852e-02, 2.549e-02, 5.520e-02)); + r += mul(s3_6, M4(3.404e-02, 7.653e-02, 1.350e-04, -1.302e-02, -6.493e-03, 5.301e-02, 5.665e-02, 2.738e-03, 4.903e-02, 2.003e-01, -5.064e-02, 2.970e-02, -5.188e-03, 7.445e-02, 2.803e-02, -8.723e-02)); + r += mul(s3_7, M4(-3.503e-02, -9.320e-02, -7.654e-03, -2.625e-02, 4.191e-02, 4.742e-02, 1.224e-02, -8.254e-02, 3.761e-02, -9.573e-02, -9.324e-02, 2.324e-03, 4.435e-02, 4.660e-02, -2.731e-02, -5.098e-02)); + r += mul(s3_8, M4(1.075e-01, -8.816e-02, -1.708e-02, 4.243e-02, 2.831e-02, -1.839e-02, -4.368e-03, 6.529e-02, 6.338e-02, 6.434e-02, 1.710e-02, -3.947e-02, 3.058e-02, -8.413e-02, 1.092e-01, 6.166e-02)); + r += mul(s4_0, M4(9.885e-02, -2.609e-02, -6.038e-03, -2.739e-03, -3.237e-02, -1.558e-02, -5.483e-03, -9.643e-02, -7.105e-04, 5.494e-02, 5.130e-02, -1.732e-01, 5.518e-02, -3.268e-02, 1.108e-01, -2.745e-02)); + r += mul(s4_1, M4(1.702e-01, -2.706e-02, 3.278e-02, -7.609e-02, 2.094e-01, -3.217e-02, -1.866e-01, 1.230e-02, 7.174e-02, -3.407e-02, -3.769e-02, 1.175e-02, 3.143e-02, 2.576e-02, 1.159e-01, 2.635e-03)); + r += mul(s4_2, M4(1.988e-01, -8.267e-02, -5.928e-02, -1.172e-01, -8.113e-02, 1.107e-01, -3.047e-02, 1.694e-01, 6.036e-02, -1.419e-02, 4.958e-02, 1.981e-02, 2.589e-02, -2.834e-02, 9.015e-02, -4.404e-02)); + r += mul(s4_3, M4(4.116e-02, -2.129e-02, -8.551e-02, -3.631e-02, 7.849e-02, 2.772e-02, 1.066e-03, -1.486e-02, 8.836e-02, -1.642e-01, 5.603e-02, -2.434e-03, 7.719e-03, -2.007e-01, -1.681e-02, -2.288e-02)); + r += mul(s4_4, M4(-8.271e-02, 1.803e-01, -1.171e-01, -1.581e-02, 8.760e-03, -5.076e-02, -1.394e-01, 3.074e-02, -3.828e-02, 1.644e-01, 9.983e-02, 3.880e-01, -1.280e-01, -9.624e-02, 8.672e-02, -5.392e-02)); + r += mul(s4_5, M4(2.466e-01, -4.310e-02, -6.503e-02, -6.675e-02, -5.871e-02, 3.674e-02, 3.985e-02, 4.898e-02, 7.213e-02, 2.410e-02, 9.558e-02, -1.250e-02, -1.169e-02, 1.462e-01, 2.751e-02, 4.281e-02)); + r += mul(s4_6, M4(1.422e-01, 8.493e-02, -9.694e-02, -1.459e-01, -5.319e-03, 8.994e-02, -4.240e-02, -3.156e-03, -1.720e-02, 3.089e-02, -5.515e-02, 1.201e-02, 1.678e-02, 1.007e-01, 5.532e-02, -5.257e-02)); + r += mul(s4_7, M4(7.604e-02, 1.544e-01, -5.326e-03, -1.967e-04, -7.441e-02, -5.191e-02, -8.681e-03, -7.487e-02, -3.499e-02, -1.164e-01, -5.844e-02, -1.431e-01, 4.536e-02, 1.019e-01, -7.182e-02, 5.217e-02)); + r += mul(s4_8, M4(2.261e-01, 1.346e-01, -1.518e-01, -2.409e-01, 1.922e-02, -1.039e-01, 6.183e-02, 1.770e-02, -1.253e-02, -2.157e-01, 6.423e-02, -6.254e-02, -1.110e-02, -9.760e-02, 7.552e-02, 2.143e-01)); + r += mul(s5_0, M4(-1.293e-02, 3.530e-02, -4.793e-02, -2.026e-02, 7.835e-02, 7.064e-02, 1.519e-01, -3.133e-02, -8.069e-02, 7.062e-02, -3.027e-02, -7.878e-02, 1.247e-01, 5.229e-02, -5.594e-02, 1.688e-01)); + r += mul(s5_1, M4(-5.445e-03, 2.280e-02, -1.896e-02, 5.078e-02, 7.634e-02, 1.594e-03, 8.450e-03, -5.722e-03, 5.616e-02, 1.795e-02, 3.200e-02, 7.405e-03, -1.824e-01, 1.101e-01, -1.799e-02, 1.183e-01)); + r += mul(s5_2, M4(-4.660e-02, -2.095e-03, -3.316e-02, -3.642e-02, -2.935e-02, 6.936e-02, 4.058e-03, 2.224e-01, 1.972e-02, 9.378e-02, 5.085e-02, -1.652e-03, -2.666e-02, 2.385e-02, -7.901e-02, 4.147e-02)); + r += mul(s5_3, M4(6.111e-02, -1.291e-03, 9.123e-02, -7.596e-03, -3.088e-02, -2.414e-02, 8.107e-02, -4.347e-02, -1.154e-01, -7.597e-02, 3.140e-02, -9.213e-02, 2.709e-01, -3.836e-02, -6.321e-02, 1.294e-01)); + r += mul(s5_4, M4(-1.904e-01, -1.248e-02, 4.863e-02, 2.437e-02, 9.780e-02, -3.476e-02, 6.971e-02, -8.925e-02, -3.409e-02, -8.134e-02, 2.155e-01, 2.111e-01, 1.145e-01, 6.807e-02, -7.996e-02, 1.059e-01)); + r += mul(s5_5, M4(-2.078e-02, -5.235e-02, -3.481e-02, -1.279e-01, -4.278e-02, 2.770e-01, -1.707e-01, 1.052e-01, 1.513e-01, 7.838e-03, 1.237e-01, -1.042e-01, -1.111e-03, 2.036e-02, -1.705e-01, 4.461e-02)); + r += mul(s5_6, M4(9.067e-02, -8.189e-02, 8.152e-02, -1.201e-01, 5.614e-02, 4.607e-03, -1.237e-01, 1.687e-03, 4.723e-02, 4.826e-02, 1.634e-02, 9.371e-02, 5.985e-02, 7.587e-02, -6.067e-02, -7.720e-03)); + r += mul(s5_7, M4(2.234e-03, -4.154e-02, -3.238e-02, 1.585e-01, 1.864e-01, -4.508e-02, -5.011e-02, 4.000e-02, 1.318e-02, -1.099e-01, 8.159e-02, 4.908e-02, 5.029e-02, 1.217e-01, -3.190e-02, -1.377e-01)); + r += mul(s5_8, M4(-5.333e-02, 2.052e-01, -7.738e-02, -2.935e-02, 7.547e-02, -1.209e-03, -4.432e-02, -4.809e-02, -4.971e-02, 9.437e-02, 2.678e-04, 6.610e-03, 6.923e-02, -3.818e-02, -8.644e-02, -1.157e-01)); + r += mul(s6_0, M4(7.258e-02, 4.038e-02, 1.938e-01, 1.214e-01, 1.843e-02, -1.689e-02, 2.054e-02, 3.456e-02, 4.086e-02, -2.171e-03, -1.661e-02, 4.080e-02, -7.667e-03, -2.033e-02, 6.338e-02, -5.188e-02)); + r += mul(s6_1, M4(3.389e-02, 2.022e-02, 1.517e-01, -1.794e-02, 4.552e-02, -1.331e-02, -2.784e-02, 5.282e-02, -3.981e-02, 4.961e-02, -4.364e-02, -2.406e-02, 4.692e-02, 6.920e-03, -1.967e-01, 3.697e-02)); + r += mul(s6_2, M4(-8.807e-02, 5.278e-02, -1.307e-01, -5.170e-02, 6.900e-03, -2.587e-02, -2.910e-02, 1.182e-01, -1.332e-02, -8.885e-02, -3.295e-02, 6.568e-02, 7.378e-02, -1.441e-01, 1.888e-01, -6.906e-02)); + r += mul(s6_3, M4(2.135e-01, 9.815e-02, -2.317e-03, 7.926e-02, 7.139e-02, 3.568e-02, 1.005e-01, 5.436e-02, 3.522e-02, -7.201e-02, 2.137e-02, 1.105e-01, 7.763e-02, 1.051e-02, -2.502e-02, -1.303e-01)); + r += mul(s6_4, M4(1.521e-01, -4.022e-02, 1.071e-01, 2.978e-01, 2.251e-01, 1.372e-02, -8.842e-02, 8.195e-02, -1.758e-02, 1.544e-01, -7.503e-02, 2.309e-02, -1.311e-01, 6.786e-02, 6.556e-02, 1.875e-01)); + r += mul(s6_5, M4(1.487e-01, -3.226e-03, 5.028e-02, -3.164e-02, 8.212e-02, 7.144e-02, -5.512e-02, 1.067e-01, -2.526e-02, -8.824e-02, -7.579e-02, -4.321e-03, 1.298e-02, 1.498e-01, 7.777e-02, 7.105e-02)); + r += mul(s6_6, M4(-1.814e-01, -5.366e-03, 1.055e-01, 3.065e-02, -7.632e-02, 5.231e-02, -6.155e-03, -1.296e-01, 1.567e-01, -5.051e-03, -4.366e-02, 1.596e-01, -1.374e-01, 2.075e-01, -2.219e-02, -2.834e-01)); + r += mul(s6_7, M4(-1.007e-01, -1.339e-01, -1.843e-01, -2.791e-02, 1.473e-02, -3.861e-02, 9.616e-02, 4.250e-03, 4.409e-02, 1.443e-01, -3.020e-02, -3.879e-02, 1.685e-01, -1.937e-01, 5.492e-02, 1.600e-01)); + r += mul(s6_8, M4(-1.563e-01, 1.558e-01, 9.023e-02, 4.422e-02, 9.521e-02, -7.681e-03, -1.173e-01, 7.469e-03, -1.848e-02, -1.545e-01, -2.770e-02, 3.015e-02, 5.900e-02, -2.075e-02, 4.698e-02, 1.772e-01)); + r += mul(s7_0, M4(8.200e-02, 5.756e-02, 9.661e-02, 4.352e-02, -1.281e-01, 6.157e-02, 9.155e-03, -1.418e-02, 1.909e-02, 1.285e-01, -1.552e-02, -7.421e-02, -1.242e-02, -2.537e-02, -6.863e-02, 7.735e-02)); + r += mul(s7_1, M4(9.257e-03, 5.372e-04, 2.077e-02, 2.666e-02, 2.836e-02, 6.640e-02, -8.838e-02, 1.422e-01, -7.205e-03, 6.888e-02, -1.126e-01, 4.261e-02, 5.900e-02, -3.620e-02, -1.208e-01, -8.781e-02)); + r += mul(s7_2, M4(-1.545e-03, 3.824e-02, 1.034e-02, 1.034e-02, 9.623e-02, -6.105e-02, 2.341e-02, -5.951e-02, -3.689e-02, 3.023e-02, 5.508e-02, 1.259e-01, -7.240e-03, 1.046e-02, 3.698e-02, 7.760e-02)); + r += mul(s7_3, M4(8.481e-02, -1.001e-02, -3.504e-02, -6.743e-02, 1.274e-01, 1.218e-01, 1.881e-01, 1.043e-01, 1.663e-02, 1.255e-01, 6.236e-02, 1.034e-02, -1.097e-01, 2.092e-02, -9.565e-03, 3.159e-02)); + r += mul(s7_4, M4(-1.373e-01, -8.443e-02, 1.654e-02, 8.372e-02, 1.719e-01, 1.171e-01, -6.373e-02, -1.725e-01, 1.305e-01, 1.391e-01, -7.093e-02, 2.045e-01, 9.758e-03, 7.522e-02, 1.208e-01, 1.445e-01)); + r += mul(s7_5, M4(3.136e-02, 7.428e-02, 2.377e-02, -5.426e-02, -5.774e-02, -2.218e-02, 3.419e-02, -1.563e-01, 2.884e-02, 7.586e-02, 1.002e-01, -9.772e-02, -6.488e-02, 1.270e-01, -5.999e-02, 1.439e-02)); + r += mul(s7_6, M4(-6.248e-02, 2.695e-03, 5.388e-02, -4.867e-02, -1.261e-01, 1.290e-01, 8.692e-02, -2.300e-01, 1.047e-01, 1.861e-02, 1.120e-01, -9.187e-02, -1.043e-02, 6.918e-02, -1.089e-02, -1.033e-01)); + r += mul(s7_7, M4(6.349e-03, -8.782e-02, 7.311e-02, 7.680e-02, 4.490e-02, -5.650e-02, 2.508e-02, -2.658e-02, 3.531e-02, 4.187e-02, 1.274e-01, 2.816e-02, -6.453e-03, -1.352e-01, 2.652e-03, 1.852e-01)); + r += mul(s7_8, M4(1.649e-02, 2.633e-02, -2.998e-02, -5.636e-02, 1.651e-02, 3.758e-02, -1.289e-02, -2.705e-02, 1.393e-02, -3.522e-02, 9.666e-03, -5.731e-02, 3.139e-02, -3.168e-02, -3.724e-02, 1.530e-01)); + r += V4(-4.198e-02, -1.482e-02, 6.775e-02, 1.910e-02); + return r; +} + +void Pass11(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 12 +//!DESC conv11 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.027e-02, 3.350e-02, 1.428e-02, 6.321e-02, 4.238e-02, -7.210e-02, 9.887e-02, 5.361e-02, 3.456e-02, 1.275e-01, -1.207e-01, -2.408e-03, -5.999e-02, -1.340e-02, 7.271e-02, 4.116e-02)); + r += mul(s0_1, M4(-4.305e-03, 7.823e-02, 1.873e-01, 8.701e-03, -1.526e-01, -1.319e-01, 1.577e-01, -1.227e-01, -5.320e-02, 5.097e-02, 4.025e-02, -2.267e-02, 4.957e-02, 6.809e-02, -1.239e-01, 4.480e-02)); + r += mul(s0_2, M4(2.936e-02, 2.545e-01, -2.636e-02, 9.335e-02, 1.159e-03, -1.849e-01, -8.767e-02, 3.542e-02, -2.835e-02, -6.257e-02, 5.403e-02, 7.037e-02, 6.892e-02, 9.439e-02, -1.353e-02, -3.231e-02)); + r += mul(s0_3, M4(-8.178e-02, -3.406e-02, 1.900e-01, -2.007e-01, 1.111e-01, 2.181e-02, 1.365e-01, 5.202e-02, -1.072e-01, 2.550e-02, -2.048e-01, -3.998e-02, 9.361e-02, 3.129e-04, -1.810e-02, -9.585e-02)); + r += mul(s0_4, M4(2.677e-02, 8.322e-02, 4.133e-02, 1.011e-02, -3.875e-04, -5.028e-02, 8.225e-02, 1.470e-01, 1.969e-01, 1.437e-01, 9.116e-03, -7.058e-02, -5.259e-02, 1.331e-01, -3.768e-02, -1.607e-01)); + r += mul(s0_5, M4(-1.911e-01, -2.324e-01, -5.346e-02, 4.299e-02, 2.093e-03, -1.989e-02, 5.742e-02, -3.412e-02, -3.372e-03, -9.658e-03, -1.687e-01, 1.714e-01, 5.278e-02, 1.814e-02, -2.189e-02, 5.993e-02)); + r += mul(s0_6, M4(8.498e-03, -1.256e-02, 2.460e-02, -5.061e-02, -2.528e-03, -8.265e-02, 8.679e-02, -7.378e-02, 2.867e-02, 1.050e-01, -2.118e-02, 3.698e-02, -1.119e-02, -2.787e-02, 4.916e-02, 5.612e-02)); + r += mul(s0_7, M4(-6.157e-02, 1.318e-01, 2.041e-02, -2.950e-02, 1.291e-03, 9.943e-02, 3.344e-02, -1.789e-01, 9.008e-02, -1.986e-01, -1.095e-01, 1.193e-01, -5.329e-02, -8.055e-03, 1.282e-01, -3.014e-02)); + r += mul(s0_8, M4(8.404e-02, 5.007e-02, -3.086e-02, -7.501e-02, 2.014e-02, -4.887e-02, 3.471e-02, 2.891e-02, -9.537e-02, -7.650e-02, 6.983e-02, 9.330e-02, -6.610e-02, 9.508e-02, 1.635e-02, -1.666e-02)); + r += mul(s1_0, M4(-4.352e-03, 4.436e-02, -1.928e-02, 6.807e-02, -1.587e-02, -1.885e-02, -3.777e-02, 1.072e-01, 1.705e-02, -1.222e-02, 8.458e-03, 2.098e-02, -6.163e-03, -4.240e-02, 7.474e-02, 2.001e-02)); + r += mul(s1_1, M4(-3.622e-03, 1.108e-01, 1.199e-03, -6.871e-02, -1.247e-01, -2.878e-02, -2.393e-04, -6.960e-02, 1.740e-03, -6.211e-02, 7.958e-02, 6.123e-02, -5.215e-02, -8.874e-02, -5.463e-02, -1.427e-03)); + r += mul(s1_2, M4(7.850e-02, 5.071e-04, 1.559e-02, -4.436e-02, 1.704e-02, -5.994e-02, -2.089e-03, 6.444e-02, -3.355e-02, 7.442e-02, -2.970e-02, 1.828e-02, 7.916e-03, 7.635e-03, 6.492e-02, -3.487e-04)); + r += mul(s1_3, M4(-2.986e-02, 4.872e-02, 3.465e-02, -1.199e-01, -1.510e-01, 7.831e-02, 1.277e-01, -5.327e-03, 3.122e-04, -1.056e-02, -2.310e-02, 5.508e-02, 1.646e-01, -4.631e-02, 4.147e-02, 1.827e-02)); + r += mul(s1_4, M4(9.032e-02, 5.492e-02, -1.526e-01, -9.587e-02, 3.079e-02, -5.876e-02, -4.757e-02, 5.352e-02, 3.499e-03, 9.725e-02, 1.254e-01, 2.497e-01, -1.745e-01, -3.897e-02, 1.041e-02, -1.713e-01)); + r += mul(s1_5, M4(1.645e-02, -2.164e-03, -2.104e-02, 5.136e-03, 5.553e-02, 2.830e-03, 1.231e-01, -6.042e-02, 6.551e-02, 2.146e-02, -2.451e-02, 9.546e-02, -1.102e-01, -4.046e-02, 9.812e-03, 5.146e-02)); + r += mul(s1_6, M4(-8.014e-02, -5.806e-02, -4.910e-02, 4.263e-02, 2.588e-02, 7.238e-02, 8.806e-02, -4.957e-02, 1.304e-01, -1.696e-02, -1.334e-01, 6.098e-03, 6.232e-02, -2.102e-02, 7.446e-02, 4.797e-02)); + r += mul(s1_7, M4(-6.799e-02, 5.962e-02, 4.658e-02, -1.627e-02, 3.097e-02, 7.660e-02, 9.734e-02, -1.109e-02, 8.521e-02, 2.398e-02, 1.091e-01, -5.402e-02, -3.233e-02, 2.900e-02, 8.171e-02, 5.457e-02)); + r += mul(s1_8, M4(1.813e-02, -5.903e-02, -8.821e-02, 3.328e-02, -2.800e-02, 5.176e-02, -2.824e-02, 2.950e-02, -2.937e-02, -2.933e-02, 1.451e-02, -1.768e-02, -1.294e-01, -2.657e-02, -1.093e-01, -2.449e-02)); + r += mul(s2_0, M4(-5.576e-02, 4.547e-02, 2.213e-02, -5.612e-02, -6.076e-02, -5.283e-02, 9.674e-02, -2.427e-02, 5.090e-02, -6.185e-02, 6.215e-02, 3.471e-02, 2.439e-02, 8.819e-02, -6.768e-02, -3.541e-02)); + r += mul(s2_1, M4(9.906e-02, 1.856e-04, 7.059e-02, -1.346e-02, -2.085e-02, -3.300e-02, -2.753e-02, -1.480e-02, -3.965e-03, -5.163e-03, -9.676e-02, 2.592e-02, -6.613e-02, 2.231e-02, -2.559e-02, 1.414e-02)); + r += mul(s2_2, M4(7.066e-02, -9.089e-02, 4.294e-02, 6.855e-03, 1.019e-01, 1.653e-02, 4.635e-02, 1.610e-02, 3.856e-02, 2.143e-02, 1.450e-01, -1.787e-02, -8.608e-03, 8.413e-02, -4.002e-02, -1.867e-02)); + r += mul(s2_3, M4(-8.533e-02, 1.267e-01, -2.513e-02, 5.467e-02, -1.880e-01, -3.546e-02, -1.445e-01, -9.015e-03, -6.507e-02, 8.525e-02, 8.733e-02, 6.145e-02, -1.146e-01, -1.366e-02, -3.453e-02, 1.275e-01)); + r += mul(s2_4, M4(7.469e-02, -4.658e-02, -9.446e-02, -3.588e-02, 1.319e-01, -2.569e-01, 9.587e-02, 6.460e-02, 2.033e-01, 6.380e-03, 7.087e-02, 3.207e-01, 1.154e-02, 4.893e-02, -2.383e-02, -7.729e-02)); + r += mul(s2_5, M4(1.595e-03, -9.912e-03, 7.094e-02, 8.387e-03, 1.157e-02, 7.051e-03, 1.257e-01, -6.340e-02, -8.918e-02, 4.954e-02, -1.062e-01, -3.136e-02, 2.560e-02, 4.426e-02, -5.615e-02, -6.095e-02)); + r += mul(s2_6, M4(-1.794e-02, -6.748e-02, -3.150e-03, -4.137e-02, -5.120e-02, -1.737e-01, -1.672e-01, 7.418e-02, -2.338e-03, -1.807e-02, 1.264e-02, 1.958e-02, -5.312e-02, 2.362e-02, -3.098e-02, 6.951e-03)); + r += mul(s2_7, M4(-6.799e-02, 7.544e-02, 9.480e-02, 6.688e-02, -9.454e-02, -2.227e-01, 1.661e-01, 7.126e-02, -2.606e-02, -6.588e-02, -1.764e-02, -2.451e-02, 5.529e-03, 2.664e-02, -2.938e-02, -2.994e-02)); + r += mul(s2_8, M4(-6.069e-02, 4.577e-02, -1.125e-01, -2.641e-02, 6.419e-02, 3.102e-02, -6.568e-02, 6.058e-02, -4.397e-02, 1.204e-01, -1.435e-01, 6.724e-02, -2.214e-02, 3.005e-02, -8.460e-02, -1.527e-03)); + r += mul(s3_0, M4(6.388e-02, 7.439e-02, 8.771e-02, -1.071e-01, 4.175e-03, 1.651e-02, -2.110e-02, 1.469e-02, 4.442e-02, -1.501e-01, 3.256e-02, 8.358e-03, 5.984e-02, -1.449e-02, -6.451e-02, -3.232e-02)); + r += mul(s3_1, M4(9.124e-02, -8.852e-02, -2.820e-02, 1.702e-01, -4.896e-02, -1.029e-02, -1.056e-01, 4.557e-02, 1.048e-01, -4.230e-02, -2.151e-02, 3.457e-02, 2.427e-01, 1.553e-02, 1.304e-01, 1.004e-01)); + r += mul(s3_2, M4(5.056e-02, -1.253e-01, 7.898e-02, -1.561e-02, 8.035e-02, 2.565e-02, 2.106e-02, -3.900e-02, 1.127e-01, 2.851e-02, -1.163e-02, 1.693e-01, 4.379e-02, 1.405e-02, -6.421e-02, -1.927e-02)); + r += mul(s3_3, M4(-1.382e-01, 2.770e-01, -9.469e-02, -6.580e-02, -1.002e-01, -1.981e-02, -3.225e-02, 5.722e-02, -1.903e-01, -1.401e-01, -4.509e-02, 5.948e-02, -1.062e-01, 1.216e-01, -3.595e-02, -5.656e-02)); + r += mul(s3_4, M4(-1.419e-01, 1.411e-02, -1.486e-01, -4.730e-02, 1.046e-01, 3.938e-02, 4.987e-02, 1.105e-01, 2.553e-01, 1.622e-01, -1.038e-01, 2.154e-01, 1.589e-01, 5.712e-02, -2.280e-02, -1.275e-01)); + r += mul(s3_5, M4(-1.119e-01, -6.812e-02, 1.085e-02, -8.278e-02, -7.537e-02, -7.356e-02, 8.431e-03, -7.313e-02, -1.562e-01, 7.580e-02, -1.201e-01, 4.693e-02, 1.688e-03, 8.123e-02, -1.526e-01, -1.570e-02)); + r += mul(s3_6, M4(3.434e-02, 2.129e-02, 3.146e-02, 1.193e-01, -6.725e-02, -9.413e-02, -4.018e-02, 6.954e-02, 2.412e-02, 2.030e-02, -3.220e-04, 2.826e-02, -6.690e-02, -9.628e-02, -1.657e-01, 1.762e-01)); + r += mul(s3_7, M4(-1.295e-01, -1.515e-01, 1.126e-01, 5.264e-02, -4.873e-02, -5.810e-02, 7.918e-02, 2.426e-02, -4.044e-03, -5.359e-02, -7.819e-02, 6.580e-02, -7.700e-02, -1.432e-01, -1.003e-01, 4.793e-02)); + r += mul(s3_8, M4(5.132e-02, 1.318e-01, -5.807e-02, 1.543e-02, 3.343e-02, -2.358e-02, 5.884e-02, 8.555e-02, -9.917e-02, 2.705e-02, 1.954e-02, 7.334e-03, -2.228e-02, -1.743e-01, -1.065e-02, 6.207e-03)); + r += mul(s4_0, M4(-7.787e-02, -1.987e-02, -3.034e-02, 4.881e-02, 4.049e-02, 9.913e-02, 1.125e-01, -5.630e-03, -1.277e-01, -1.210e-01, 2.032e-02, -1.912e-02, 1.864e-01, -3.915e-02, 1.205e-01, 7.250e-02)); + r += mul(s4_1, M4(-1.865e-01, 7.176e-02, -2.071e-02, 9.617e-02, -3.999e-02, 8.436e-04, 5.868e-02, 5.035e-02, 1.638e-01, 5.488e-02, 5.034e-02, 1.074e-01, 3.680e-03, 1.552e-02, 1.061e-01, 1.514e-02)); + r += mul(s4_2, M4(-1.471e-01, -7.088e-02, -5.048e-02, -1.602e-02, 2.071e-02, 1.244e-02, -5.500e-02, -1.124e-01, 8.448e-02, 8.704e-02, 3.572e-03, 2.569e-02, 6.429e-02, 3.312e-02, 7.447e-03, -7.709e-02)); + r += mul(s4_3, M4(-3.323e-03, -7.480e-02, -6.359e-03, -8.133e-02, -2.709e-02, 1.274e-01, 2.387e-01, -1.789e-03, -1.244e-01, -9.319e-02, -1.313e-02, -5.737e-02, 2.097e-01, 1.328e-01, 1.389e-01, -7.027e-02)); + r += mul(s4_4, M4(-1.533e-02, 5.026e-02, -1.738e-01, -1.808e-01, 9.018e-02, 3.262e-01, 1.511e-01, 1.832e-01, 1.525e-01, 1.369e-01, -2.421e-01, -1.503e-01, -4.022e-02, 6.431e-02, 2.300e-01, 2.242e-01)); + r += mul(s4_5, M4(1.554e-01, -1.086e-01, 2.606e-02, -2.852e-02, 2.103e-02, 9.414e-02, -8.182e-02, 1.710e-01, -1.289e-03, 9.796e-02, -7.346e-02, 6.094e-03, 2.657e-02, 3.331e-02, -6.860e-02, -1.547e-02)); + r += mul(s4_6, M4(-4.565e-02, -1.252e-01, -1.260e-01, 1.447e-01, 1.822e-02, 1.472e-01, 2.370e-01, 6.200e-02, 3.505e-02, 7.348e-02, -3.147e-02, 1.064e-02, 8.525e-02, -9.476e-02, -2.267e-02, -3.494e-02)); + r += mul(s4_7, M4(1.060e-01, -8.831e-02, 4.264e-02, 2.502e-02, 1.012e-01, 2.622e-01, 4.551e-03, -5.335e-02, -1.870e-02, 2.267e-02, -8.892e-02, -1.373e-02, -4.082e-02, 2.549e-02, 1.202e-01, -1.565e-01)); + r += mul(s4_8, M4(-5.898e-02, 2.479e-02, -3.942e-02, -3.697e-03, -1.255e-01, 1.387e-01, -1.316e-01, 8.861e-02, 4.392e-02, -1.030e-01, 6.450e-03, -3.109e-02, -1.278e-02, 7.887e-02, -6.572e-02, -4.646e-02)); + r += mul(s5_0, M4(-1.887e-02, -1.250e-01, -1.006e-02, 4.404e-02, -5.668e-02, 6.629e-02, 2.281e-02, 5.303e-03, 6.988e-02, -2.749e-02, -5.214e-02, 3.167e-02, -4.101e-02, -1.666e-02, -1.159e-02, 1.014e-02)); + r += mul(s5_1, M4(4.428e-02, 5.437e-02, 1.288e-01, 9.757e-03, 2.095e-02, -4.681e-02, -3.719e-02, 9.138e-02, 1.123e-01, -3.314e-02, -8.532e-02, 1.557e-01, -5.768e-02, -2.301e-02, 6.916e-02, 1.345e-02)); + r += mul(s5_2, M4(1.333e-02, 6.317e-02, -2.239e-02, 2.617e-02, -3.199e-02, 5.707e-02, -1.811e-02, -4.650e-02, 2.383e-02, -8.511e-02, 1.560e-02, -9.861e-02, 1.960e-02, 4.809e-02, -3.470e-02, -4.688e-02)); + r += mul(s5_3, M4(-3.797e-02, -4.800e-02, -1.630e-02, 4.191e-02, -2.801e-02, -3.671e-02, 1.489e-04, -3.431e-02, 1.077e-01, 7.320e-02, 4.196e-02, 5.629e-02, -1.585e-01, -1.169e-01, 2.734e-02, 6.072e-02)); + r += mul(s5_4, M4(5.287e-02, 1.144e-01, 2.570e-02, -2.558e-02, 1.581e-02, 4.501e-02, 1.703e-01, 5.116e-02, 1.633e-01, 9.708e-02, -1.226e-01, -1.191e-01, -1.863e-01, -5.411e-02, 1.863e-01, 2.647e-01)); + r += mul(s5_5, M4(7.694e-02, 1.329e-01, -3.723e-02, 1.601e-02, 6.980e-02, 3.843e-02, -1.607e-02, 5.129e-02, 8.915e-02, 1.707e-01, 3.626e-02, -6.211e-02, -1.157e-02, -2.449e-02, -6.461e-02, 3.962e-02)); + r += mul(s5_6, M4(1.273e-02, 4.972e-03, 8.003e-02, 3.084e-03, -2.563e-02, 1.600e-01, 1.369e-01, -6.358e-03, 3.958e-02, 7.380e-02, -6.270e-02, -2.599e-02, -2.974e-02, -5.836e-02, -9.067e-04, -1.933e-02)); + r += mul(s5_7, M4(-8.110e-03, 6.509e-02, 4.287e-02, -1.191e-02, 9.469e-02, 1.269e-01, -5.862e-03, -1.099e-01, -4.246e-02, 8.094e-05, 6.560e-02, 7.111e-02, -1.085e-01, 4.565e-04, 3.576e-02, -3.619e-02)); + r += mul(s5_8, M4(-7.703e-02, 4.137e-02, -1.278e-01, 1.509e-01, -4.019e-02, 1.154e-01, -4.917e-02, -4.517e-03, -2.795e-02, -9.270e-02, 7.441e-03, -4.642e-02, -3.896e-03, 5.552e-02, -9.551e-04, -1.710e-04)); + r += mul(s6_0, M4(3.741e-02, 2.367e-02, 4.031e-02, 5.234e-02, 5.206e-03, 3.340e-02, 1.847e-01, -1.892e-02, -1.302e-01, 1.977e-02, -1.397e-02, 1.557e-01, -4.721e-02, 5.117e-02, 5.815e-03, 5.388e-02)); + r += mul(s6_1, M4(1.490e-01, -6.906e-03, -2.045e-02, 9.492e-02, -2.670e-01, 3.501e-02, 4.475e-02, 3.518e-02, -8.545e-02, -7.837e-02, 5.113e-02, 3.450e-02, 9.494e-02, 5.137e-02, -2.409e-02, -1.630e-02)); + r += mul(s6_2, M4(1.001e-01, 7.930e-02, -2.769e-02, -8.569e-03, -2.833e-02, 7.423e-02, 5.656e-02, 1.955e-02, -2.687e-02, -1.095e-01, 5.323e-02, 1.263e-04, 9.546e-02, -3.930e-03, 1.200e-02, 7.090e-02)); + r += mul(s6_3, M4(8.177e-02, 2.099e-01, 1.219e-03, 5.645e-02, 1.903e-01, 1.848e-01, -9.204e-02, 6.842e-02, 1.345e-02, 1.227e-01, 1.105e-01, -1.011e-01, -2.807e-01, 5.359e-02, 7.534e-03, -1.919e-01)); + r += mul(s6_4, M4(-2.466e-01, -1.215e-01, -2.944e-02, -1.788e-01, -3.305e-02, -2.583e-02, 6.965e-02, 1.115e-01, -8.188e-03, 1.026e-01, 1.654e-01, -9.167e-02, 5.933e-02, -3.517e-03, -5.736e-02, 4.267e-02)); + r += mul(s6_5, M4(-4.238e-02, 1.966e-02, -4.612e-02, 4.649e-02, 3.077e-02, 1.443e-01, -1.214e-01, 5.610e-02, 6.675e-02, 2.343e-02, -8.010e-04, 6.241e-02, -7.188e-02, 9.264e-03, -5.050e-02, 2.200e-02)); + r += mul(s6_6, M4(-3.730e-02, 1.765e-02, -2.157e-02, 4.494e-02, -8.190e-02, -3.957e-02, -1.489e-02, -2.375e-02, -6.377e-02, 5.413e-02, -4.732e-02, 1.595e-02, 1.908e-02, -8.771e-03, 7.518e-02, -6.149e-02)); + r += mul(s6_7, M4(3.306e-03, -1.145e-01, -1.196e-01, 3.965e-02, -1.753e-01, 1.538e-01, 7.509e-02, 3.034e-02, -9.531e-02, -9.182e-02, 9.663e-02, -3.387e-02, 1.423e-02, -4.138e-03, -3.193e-02, -9.204e-02)); + r += mul(s6_8, M4(8.442e-02, 1.387e-01, -5.979e-02, 2.392e-02, -8.845e-02, -2.704e-02, -1.145e-01, -2.621e-03, -1.899e-02, -2.144e-02, 1.663e-02, -3.083e-02, 1.069e-02, 3.161e-02, -3.266e-03, -4.319e-02)); + r += mul(s7_0, M4(-5.123e-02, -3.657e-02, -3.869e-02, -6.674e-02, -1.697e-02, -1.439e-01, -2.765e-02, 1.092e-02, 3.349e-03, -2.223e-02, -7.069e-02, -1.561e-03, -2.408e-01, -1.226e-01, -1.894e-02, 6.401e-02)); + r += mul(s7_1, M4(-3.214e-02, -2.262e-02, -5.277e-02, -3.741e-02, -1.213e-01, -7.707e-02, -3.552e-02, 1.140e-02, -5.600e-02, 8.291e-02, 2.238e-02, -2.037e-02, 2.020e-02, -3.163e-02, 3.580e-02, 1.581e-04)); + r += mul(s7_2, M4(4.871e-02, 7.530e-02, 1.845e-02, -8.611e-02, -4.128e-02, -1.197e-02, 3.897e-02, -3.884e-02, -8.676e-02, 1.107e-01, -5.836e-02, -3.621e-03, 3.486e-04, -3.959e-02, -3.687e-03, -3.999e-02)); + r += mul(s7_3, M4(1.186e-01, 2.908e-02, -1.749e-01, 5.311e-02, 6.358e-02, 8.960e-02, -3.391e-02, -1.091e-01, -6.161e-03, -2.794e-02, -1.079e-01, -1.413e-01, -2.787e-01, -1.223e-02, -1.932e-02, -1.993e-01)); + r += mul(s7_4, M4(-1.706e-01, -1.186e-02, -2.587e-02, -7.661e-02, -7.354e-02, -1.035e-02, -5.896e-02, -7.232e-03, 6.542e-02, 2.646e-01, -1.570e-03, 8.268e-02, 1.450e-01, 2.379e-02, -5.428e-02, -1.446e-01)); + r += mul(s7_5, M4(7.825e-02, 6.404e-02, 1.175e-01, -1.253e-02, 5.442e-02, -1.039e-02, 7.739e-02, 2.245e-02, 7.787e-02, 2.019e-01, -3.032e-02, 1.489e-01, -1.078e-02, 1.432e-01, -1.520e-02, -4.307e-03)); + r += mul(s7_6, M4(1.518e-02, -5.765e-03, 1.038e-01, 4.877e-03, -6.176e-03, -4.776e-02, -5.405e-03, 4.432e-02, 3.103e-02, -3.598e-03, -1.491e-02, 6.369e-02, -4.103e-02, -8.058e-02, -1.370e-02, 7.380e-02)); + r += mul(s7_7, M4(-4.558e-02, 3.477e-02, 2.722e-02, 6.162e-02, -9.459e-02, 1.393e-02, -1.056e-02, 3.824e-02, -1.973e-01, -3.068e-02, -3.340e-02, 1.884e-02, 3.135e-02, -7.181e-02, 4.414e-02, -9.669e-02)); + r += mul(s7_8, M4(3.346e-02, 2.025e-02, 9.116e-02, -7.691e-03, -1.319e-02, -7.742e-02, -2.411e-02, -2.220e-02, -1.083e-02, -5.092e-02, -1.051e-01, 9.269e-02, 3.190e-02, -1.121e-01, 7.497e-02, -3.024e-02)); + r += V4(2.404e-02, 4.612e-02, -2.197e-02, 2.583e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.279e-01, -3.366e-02, -6.106e-02, 7.234e-02, 2.575e-01, 9.499e-02, -1.607e-01, 1.009e-01, -2.040e-01, 4.915e-03, 2.064e-01, -1.916e-01, -7.249e-03, -5.613e-02, 9.941e-03, 7.943e-02)); + r += mul(s0_1, M4(2.299e-02, -4.077e-02, -1.152e-01, -3.534e-02, 2.206e-01, 1.534e-01, 2.909e-02, -6.460e-02, -1.576e-01, -1.292e-01, 7.251e-02, 1.202e-01, -7.741e-02, -3.712e-02, -3.224e-02, -1.461e-01)); + r += mul(s0_2, M4(-7.144e-02, -1.591e-02, -7.688e-02, 4.094e-02, 2.130e-01, 1.416e-01, -1.073e-02, -4.519e-03, -9.835e-02, 2.078e-02, 8.720e-04, -8.038e-02, -8.616e-02, -6.062e-02, -5.964e-02, 4.167e-02)); + r += mul(s0_3, M4(-8.807e-02, -3.156e-02, 4.208e-02, 4.239e-02, 3.079e-02, 7.417e-02, -1.900e-01, 3.545e-03, 1.907e-01, -9.657e-02, 2.109e-01, -7.153e-02, -5.825e-02, 1.168e-01, 1.213e-01, -9.887e-02)); + r += mul(s0_4, M4(3.984e-02, 4.145e-02, -3.554e-01, 4.647e-02, -1.710e-01, 2.289e-01, 5.428e-02, 1.081e-01, 1.071e-01, -7.097e-02, 7.493e-03, -4.977e-02, 1.458e-02, 1.337e-01, -1.108e-01, -5.505e-02)); + r += mul(s0_5, M4(1.629e-01, 8.552e-02, -2.103e-01, -5.147e-02, -8.944e-03, 1.279e-02, 3.232e-02, 1.568e-01, 1.322e-02, 1.217e-01, -3.924e-02, -8.033e-02, 1.411e-01, -1.682e-02, -6.152e-02, -4.818e-02)); + r += mul(s0_6, M4(-1.434e-01, -3.193e-02, -1.201e-01, 1.157e-01, 1.336e-01, -2.133e-02, -6.619e-02, -1.335e-01, 9.345e-02, -8.295e-02, 7.208e-02, 9.369e-02, 1.030e-01, -6.211e-02, 1.510e-02, -6.255e-03)); + r += mul(s0_7, M4(-1.815e-01, -1.262e-01, -1.561e-01, -4.939e-03, -9.261e-02, 2.095e-01, -1.511e-02, -1.741e-01, 1.317e-01, -1.158e-01, -4.331e-04, 8.866e-02, -7.553e-02, -2.143e-02, -5.532e-02, -5.052e-02)); + r += mul(s0_8, M4(-2.470e-02, 4.386e-02, -2.729e-02, 3.006e-02, 1.152e-01, 7.602e-02, -2.523e-02, -1.453e-01, 7.368e-02, -1.033e-01, 7.891e-02, -1.697e-01, -9.816e-02, 8.418e-02, 3.900e-02, -7.317e-02)); + r += mul(s1_0, M4(-5.105e-02, -1.298e-02, 2.407e-02, 5.246e-03, 6.278e-02, -8.287e-02, 3.093e-02, 3.455e-02, -4.439e-02, -3.267e-02, -8.388e-02, 4.421e-02, -7.185e-02, 2.124e-02, -3.238e-02, 1.844e-01)); + r += mul(s1_1, M4(3.112e-02, -5.267e-03, -1.198e-02, 4.540e-02, 6.870e-03, 1.315e-02, -2.940e-02, 2.722e-03, 1.363e-02, -3.576e-02, -4.401e-02, 6.896e-02, 3.213e-02, 9.293e-02, 9.204e-02, 4.444e-02)); + r += mul(s1_2, M4(-5.652e-02, -8.733e-02, -1.603e-02, -6.631e-02, -3.792e-02, -2.115e-02, -4.981e-02, -3.937e-02, 3.890e-02, -3.225e-02, -6.633e-03, 2.197e-03, -1.018e-01, -1.744e-01, -6.023e-02, 7.670e-02)); + r += mul(s1_3, M4(-1.227e-01, -1.529e-02, 8.825e-02, -1.428e-01, 6.615e-03, -7.766e-02, 2.002e-02, 1.091e-01, -3.180e-02, -8.135e-02, -1.047e-01, 9.672e-02, -1.086e-01, 4.518e-02, -1.033e-01, 6.713e-03)); + r += mul(s1_4, M4(6.562e-02, 6.497e-02, -5.178e-02, -4.144e-02, -1.119e-01, -4.966e-02, 1.830e-01, 5.417e-02, -1.265e-01, -8.944e-02, -1.583e-01, -2.233e-01, 1.434e-01, 2.096e-01, 3.577e-03, 2.237e-01)); + r += mul(s1_5, M4(1.973e-02, -7.771e-02, -3.364e-02, 7.592e-02, -1.178e-01, -8.351e-02, -7.591e-02, 9.629e-02, -1.125e-01, -1.656e-02, -8.947e-03, 2.037e-01, 2.216e-01, -1.094e-01, 2.478e-02, -1.058e-01)); + r += mul(s1_6, M4(-7.549e-02, -8.087e-02, 1.030e-01, 9.166e-02, 2.592e-02, 3.428e-02, 3.391e-02, 9.272e-03, -1.093e-02, -7.403e-02, -1.135e-01, 3.686e-02, 1.685e-02, 8.050e-03, -8.718e-02, 1.131e-02)); + r += mul(s1_7, M4(4.517e-02, -1.805e-01, -1.346e-01, -8.709e-02, -2.236e-02, 6.259e-03, 6.235e-02, -6.781e-02, -2.046e-01, 8.657e-04, -9.356e-02, -1.557e-02, -8.873e-02, 6.786e-02, -5.795e-02, 2.236e-01)); + r += mul(s1_8, M4(-1.957e-02, 1.486e-02, -4.926e-02, 3.940e-02, -4.909e-02, 2.867e-02, 6.275e-02, -1.317e-01, 1.014e-02, -2.516e-02, 1.059e-01, 2.583e-02, 7.398e-02, -1.656e-01, -1.807e-02, -3.563e-02)); + r += mul(s2_0, M4(3.905e-02, 4.195e-02, 2.408e-02, -5.066e-02, 2.877e-03, -5.353e-02, 6.722e-03, 3.963e-02, 3.936e-02, 1.759e-02, -5.413e-02, -1.006e-01, -1.032e-01, 1.407e-02, -1.816e-03, -4.725e-02)); + r += mul(s2_1, M4(-8.659e-02, -6.690e-02, -2.369e-02, 8.620e-02, -9.957e-02, -4.924e-02, 4.479e-03, -7.542e-02, 7.070e-02, 6.020e-02, 6.204e-02, 8.101e-02, 5.890e-03, -6.030e-02, 7.879e-02, 4.558e-02)); + r += mul(s2_2, M4(-2.502e-02, -6.068e-02, 4.304e-02, 4.327e-02, 3.423e-02, -5.258e-02, 5.167e-02, 2.573e-02, 4.979e-02, -5.505e-02, 2.229e-02, 6.239e-02, -1.855e-02, 8.684e-02, 7.693e-02, 1.272e-02)); + r += mul(s2_3, M4(5.067e-03, -2.459e-02, 2.352e-03, 9.823e-02, 1.921e-02, -8.460e-02, 1.165e-01, -3.759e-02, 4.493e-02, 2.649e-02, 6.970e-02, 1.018e-01, -4.113e-02, -6.144e-02, -7.839e-03, -7.800e-02)); + r += mul(s2_4, M4(3.751e-02, -2.392e-02, 1.363e-01, -1.343e-01, 1.052e-01, -2.994e-02, -2.411e-01, 6.704e-02, 1.204e-02, 2.038e-02, -7.752e-02, -1.959e-01, 1.357e-01, 7.517e-02, 3.376e-02, 2.537e-01)); + r += mul(s2_5, M4(-6.996e-02, 4.220e-02, 3.621e-02, -2.892e-02, -7.755e-02, 2.388e-02, 1.379e-02, 1.424e-01, -7.725e-02, 1.225e-01, 1.115e-01, 3.311e-02, -1.300e-01, 1.373e-01, 2.819e-02, 3.427e-02)); + r += mul(s2_6, M4(-5.453e-02, 4.552e-02, -2.180e-02, 2.561e-02, -1.318e-01, 2.918e-03, -2.844e-02, 2.129e-01, 3.826e-02, -5.138e-02, 7.055e-02, -2.352e-03, -9.334e-03, -3.794e-02, -2.176e-02, -5.253e-02)); + r += mul(s2_7, M4(1.225e-01, -1.233e-01, 9.290e-03, 1.017e-01, -1.422e-01, 2.826e-02, -4.448e-02, -7.646e-02, -4.387e-02, 1.225e-01, -5.220e-03, 1.012e-02, 4.607e-02, 5.358e-02, 5.278e-02, 8.320e-02)); + r += mul(s2_8, M4(5.630e-02, 7.660e-02, -1.488e-02, -4.350e-02, -2.135e-01, 8.763e-02, 6.009e-03, 3.679e-02, 2.555e-02, -1.223e-02, -4.974e-02, 1.520e-02, 3.425e-02, -6.627e-02, 6.280e-03, -1.601e-02)); + r += mul(s3_0, M4(1.028e-01, 1.002e-01, 6.515e-02, 1.996e-02, 1.337e-03, -7.797e-03, 1.001e-01, -1.168e-02, -2.405e-02, -5.076e-02, 1.096e-02, -1.736e-01, -1.959e-02, -7.104e-02, 9.389e-02, 2.230e-02)); + r += mul(s3_1, M4(1.421e-02, -2.660e-03, -1.353e-01, 8.574e-02, 4.807e-03, -2.868e-02, 1.437e-02, -7.176e-02, 3.202e-02, 8.032e-02, 2.433e-02, -2.427e-03, -4.091e-01, -5.981e-02, -1.661e-01, -4.668e-02)); + r += mul(s3_2, M4(-8.635e-02, -9.985e-02, 1.037e-02, -5.507e-02, 6.209e-02, 1.184e-01, 8.178e-02, -3.382e-02, -2.840e-02, 1.776e-02, 5.276e-02, -9.830e-02, -8.517e-02, 1.753e-01, -7.723e-02, -2.443e-02)); + r += mul(s3_3, M4(4.088e-02, 3.340e-02, -3.264e-03, 7.110e-02, 9.310e-02, -1.009e-01, 1.956e-02, -1.012e-01, 1.309e-01, -3.251e-03, 2.671e-02, 1.055e-01, -1.091e-01, 6.270e-02, 1.444e-01, 3.960e-02)); + r += mul(s3_4, M4(9.021e-02, 4.735e-02, 2.571e-02, 2.710e-02, 1.292e-01, -3.198e-02, -1.173e-01, 2.232e-02, 9.124e-02, -3.346e-02, 2.053e-02, 1.608e-02, -1.201e-01, 8.022e-02, 1.262e-01, 4.209e-02)); + r += mul(s3_5, M4(1.415e-02, -6.527e-02, 2.029e-02, 1.816e-02, 4.115e-02, -2.240e-02, 4.729e-02, 7.265e-02, -5.411e-02, -2.636e-02, 8.844e-02, -1.105e-02, -2.662e-01, 1.428e-01, 8.907e-02, 8.336e-02)); + r += mul(s3_6, M4(6.805e-02, 1.395e-01, -9.585e-02, 4.461e-02, 2.142e-02, 6.744e-04, 1.476e-02, 6.879e-03, -5.205e-02, 1.953e-02, 9.391e-02, -9.982e-02, 1.105e-01, -1.812e-02, 7.876e-02, 4.754e-03)); + r += mul(s3_7, M4(1.061e-02, -1.649e-01, 7.892e-02, 2.179e-02, -2.028e-02, 5.663e-02, 1.090e-01, -2.495e-01, -1.095e-01, 1.663e-01, 5.676e-02, 1.104e-01, 2.350e-01, -1.737e-05, 6.275e-02, -5.373e-02)); + r += mul(s3_8, M4(-3.147e-02, 2.322e-01, 1.393e-02, 6.937e-02, -6.567e-02, 2.665e-02, -1.422e-02, 5.359e-02, 1.171e-01, -3.963e-02, 2.992e-02, -6.924e-02, 1.773e-01, -3.631e-02, 1.185e-01, -1.028e-02)); + r += mul(s4_0, M4(-5.687e-02, 7.693e-03, 6.385e-02, 2.993e-02, -1.034e-01, -2.592e-02, 1.640e-03, -6.905e-02, -1.666e-02, -2.981e-02, -3.767e-02, -1.516e-02, 1.333e-01, -2.744e-02, -2.162e-01, 2.590e-01)); + r += mul(s4_1, M4(-8.843e-02, -1.823e-03, 8.369e-02, 1.177e-01, -1.970e-02, 1.472e-02, 1.363e-01, 1.451e-01, 8.558e-02, -2.608e-02, 1.457e-02, -2.496e-02, -4.943e-02, 7.773e-03, -1.686e-01, 1.476e-01)); + r += mul(s4_2, M4(-3.009e-02, -1.751e-02, 5.983e-02, -1.646e-01, -6.494e-02, -5.240e-02, 1.187e-01, -1.996e-03, -2.502e-02, 4.830e-02, -1.864e-01, 6.051e-02, -2.417e-02, -6.760e-02, -6.460e-02, 2.814e-04)); + r += mul(s4_3, M4(-8.744e-02, 5.006e-02, -2.079e-02, 9.829e-02, 1.772e-01, -1.175e-01, 9.265e-02, 9.310e-02, 3.576e-02, 6.130e-02, 4.680e-02, 1.632e-01, -1.245e-01, 8.862e-02, -2.980e-01, 2.297e-02)); + r += mul(s4_4, M4(8.308e-02, 7.544e-02, 1.629e-01, 2.374e-01, 5.315e-02, 7.817e-02, 2.026e-01, -2.530e-01, 1.486e-01, -4.941e-02, 9.442e-02, 3.796e-02, -1.124e-01, -5.497e-02, -2.431e-01, 6.729e-02)); + r += mul(s4_5, M4(3.043e-01, -2.249e-01, 8.679e-02, 8.587e-03, -3.091e-02, 1.281e-01, 1.058e-01, 1.238e-01, -1.237e-01, -2.999e-02, -7.889e-02, -1.038e-01, -4.301e-02, -7.012e-02, -1.021e-01, -2.089e-02)); + r += mul(s4_6, M4(2.500e-02, -2.472e-02, 8.017e-02, 4.952e-02, 3.016e-02, -1.007e-01, -5.812e-02, -2.393e-02, 1.429e-02, -1.669e-02, -5.088e-03, 1.606e-01, -5.341e-02, -4.039e-02, -1.368e-01, 8.641e-02)); + r += mul(s4_7, M4(1.258e-01, 2.161e-02, 2.055e-01, 1.858e-01, -2.128e-01, 1.039e-01, -1.339e-01, -1.722e-01, -1.349e-01, -8.110e-02, -7.639e-02, 6.999e-02, -2.331e-01, 8.396e-02, -4.020e-02, 1.369e-01)); + r += mul(s4_8, M4(-1.218e-01, -3.218e-02, 6.787e-02, 5.315e-02, 5.871e-02, 9.026e-02, -1.272e-01, -1.574e-02, 3.738e-02, -1.998e-02, -1.137e-02, -1.836e-02, 5.407e-04, -2.666e-02, -1.174e-01, 2.181e-01)); + r += mul(s5_0, M4(-4.300e-02, 1.067e-01, -1.611e-02, 1.408e-02, -1.082e-01, 6.859e-03, -3.246e-02, 1.440e-02, 3.127e-02, 4.931e-03, -2.008e-02, -1.087e-01, 1.869e-01, -3.763e-02, 9.214e-02, -6.139e-02)); + r += mul(s5_1, M4(2.163e-02, -1.327e-02, -7.326e-02, -1.634e-02, -9.571e-03, -1.920e-02, -6.807e-02, 6.684e-02, 1.094e-01, -1.085e-01, 5.202e-02, 7.035e-03, 2.908e-02, 1.155e-01, 1.365e-01, -9.210e-02)); + r += mul(s5_2, M4(-1.816e-01, 4.733e-02, -5.288e-02, 3.186e-02, -1.024e-01, -2.162e-02, -1.839e-02, 9.413e-03, 2.192e-02, 4.179e-02, 1.054e-02, 8.923e-03, -3.371e-02, 1.072e-02, 8.813e-02, -6.179e-02)); + r += mul(s5_3, M4(4.771e-02, 4.805e-02, 2.398e-02, -2.165e-03, 8.587e-02, -6.910e-03, 5.999e-02, -4.768e-03, 7.841e-04, -7.417e-02, -9.176e-02, 2.145e-03, 1.780e-01, 7.821e-02, 2.110e-01, 2.032e-02)); + r += mul(s5_4, M4(7.944e-02, -1.616e-02, 9.059e-05, -3.100e-02, 7.670e-02, 2.298e-03, 6.435e-02, -1.431e-01, 6.823e-02, -9.556e-02, 1.325e-02, -8.277e-02, 1.527e-01, 8.532e-02, 3.696e-01, -7.039e-02)); + r += mul(s5_5, M4(6.107e-02, -1.836e-02, 5.551e-02, 6.896e-02, 1.774e-01, 2.109e-02, 9.409e-02, -1.408e-02, -5.310e-02, -2.290e-01, 9.082e-02, -9.762e-03, 5.386e-02, 1.275e-01, 1.664e-01, -9.219e-02)); + r += mul(s5_6, M4(2.762e-02, -6.232e-02, -8.595e-02, 2.939e-02, -2.988e-02, -5.534e-03, 3.508e-03, 2.618e-04, -4.541e-02, 7.138e-02, 7.792e-02, -4.452e-02, 5.544e-02, 2.857e-02, 1.264e-01, 5.603e-02)); + r += mul(s5_7, M4(-3.177e-02, 1.966e-02, -5.840e-03, -3.527e-02, -1.901e-01, 2.063e-01, -5.171e-02, -1.624e-02, -4.568e-02, -1.109e-02, -9.732e-02, -1.645e-01, 3.332e-02, 1.855e-01, 1.683e-01, -5.122e-02)); + r += mul(s5_8, M4(-6.366e-02, -6.204e-02, -4.810e-02, 1.133e-02, -4.249e-02, 6.398e-02, -6.416e-02, -4.528e-03, 7.454e-02, -7.559e-02, -6.606e-02, -6.678e-03, 7.244e-02, 2.279e-02, -1.285e-03, 1.840e-02)); + r += mul(s6_0, M4(-1.078e-01, -2.801e-02, 7.246e-02, 4.707e-02, -1.894e-01, -2.593e-02, 2.423e-02, -3.650e-02, 1.273e-01, -4.858e-03, -5.637e-03, 1.420e-01, 1.411e-02, -6.332e-02, 3.113e-02, 1.083e-01)); + r += mul(s6_1, M4(-2.501e-02, -5.155e-02, -5.300e-02, 6.727e-03, 1.291e-02, 3.799e-02, 6.704e-02, -8.818e-03, 1.438e-01, 3.135e-02, -3.689e-02, -8.483e-02, -8.690e-02, 6.428e-02, -1.973e-02, -4.476e-02)); + r += mul(s6_2, M4(-1.165e-01, 2.959e-01, 4.430e-02, 7.807e-02, -9.287e-02, 1.312e-01, -1.275e-01, 4.720e-02, 1.288e-02, -3.103e-02, -4.469e-02, -1.277e-01, -1.358e-01, 3.453e-02, 7.734e-03, -1.723e-02)); + r += mul(s6_3, M4(1.415e-02, 4.357e-02, 9.350e-02, -1.290e-02, -1.784e-01, -1.194e-01, -6.447e-02, -1.538e-01, 3.828e-02, 8.403e-04, -9.760e-02, 3.470e-02, 7.904e-02, 6.751e-02, 6.433e-02, 9.591e-03)); + r += mul(s6_4, M4(4.858e-02, 9.642e-03, -1.416e-01, -1.765e-01, 7.997e-02, -2.211e-01, 2.500e-02, 4.583e-02, -1.211e-01, 7.273e-02, -1.105e-01, -6.780e-02, 1.552e-01, -6.610e-02, 1.221e-01, 9.713e-02)); + r += mul(s6_5, M4(8.812e-02, 1.163e-01, -3.385e-02, 5.496e-02, -1.088e-01, 5.046e-02, -9.608e-02, -6.503e-02, -5.554e-02, -1.426e-01, -1.015e-01, 7.827e-02, 2.886e-02, -9.295e-02, -1.683e-02, -5.637e-02)); + r += mul(s6_6, M4(6.530e-02, 1.705e-01, 5.023e-02, 5.670e-03, -2.066e-01, -1.008e-01, -2.584e-02, -1.695e-01, -3.472e-02, -1.673e-02, 1.132e-02, 7.294e-02, 1.806e-02, 8.904e-02, 1.301e-02, 7.587e-02)); + r += mul(s6_7, M4(-7.182e-02, 1.284e-02, 2.450e-02, 2.996e-01, -1.210e-01, 5.583e-02, -2.610e-02, 1.577e-01, 6.162e-02, -1.601e-01, 4.786e-02, 2.652e-02, -1.700e-01, -4.436e-03, 3.861e-02, 9.871e-02)); + r += mul(s6_8, M4(-1.442e-01, -6.627e-02, 5.110e-02, 8.370e-02, 9.951e-02, -8.591e-02, -5.248e-02, -9.295e-02, 4.298e-02, -1.054e-01, -3.308e-02, -1.683e-02, -1.237e-01, 4.142e-03, -1.229e-01, -1.149e-01)); + r += mul(s7_0, M4(-2.132e-02, 4.805e-02, 6.327e-02, 1.387e-01, 1.004e-02, -1.254e-02, 5.425e-02, -4.269e-03, -1.508e-01, 4.787e-02, -5.652e-02, 2.234e-02, -3.006e-02, 5.317e-02, -5.235e-02, 1.102e-01)); + r += mul(s7_1, M4(8.749e-02, 6.059e-02, -3.881e-02, -2.855e-03, 5.492e-02, 2.269e-02, -6.710e-02, -7.440e-02, 2.012e-01, -1.893e-02, 1.096e-02, -1.808e-02, 9.809e-02, -4.306e-02, 1.054e-03, -3.551e-02)); + r += mul(s7_2, M4(-6.344e-03, 6.911e-02, -8.138e-02, 3.389e-03, -3.893e-02, -2.793e-02, -3.421e-02, 1.312e-02, -1.058e-02, 2.910e-02, -6.469e-02, -5.796e-02, 5.012e-02, 8.912e-02, 3.562e-02, 4.717e-02)); + r += mul(s7_3, M4(-5.413e-02, 7.983e-02, -4.328e-02, -7.536e-02, -6.332e-02, 1.174e-02, 1.104e-01, -4.694e-02, -9.745e-02, 6.358e-02, 2.715e-02, -2.189e-02, -2.204e-01, 3.593e-02, 5.257e-02, 1.805e-01)); + r += mul(s7_4, M4(2.007e-01, -1.128e-02, -6.092e-02, 1.107e-02, 1.147e-01, 4.907e-02, 9.380e-02, -1.132e-01, -1.031e-01, 1.390e-01, 1.988e-01, -3.743e-02, -2.523e-01, -8.979e-02, 1.662e-01, 6.444e-02)); + r += mul(s7_5, M4(1.083e-01, 1.595e-02, -1.725e-01, 5.724e-03, -1.331e-01, 3.038e-02, 8.289e-03, 4.838e-02, 2.811e-02, -1.325e-02, -1.052e-01, 9.198e-03, -2.897e-02, 7.183e-02, -7.798e-03, 2.164e-02)); + r += mul(s7_6, M4(7.601e-02, -6.164e-02, -1.161e-01, -5.035e-02, 2.184e-02, 1.047e-01, 4.872e-02, 4.256e-02, -1.560e-04, -1.473e-02, -1.539e-02, 1.090e-01, 4.757e-02, -1.741e-01, -3.318e-03, 3.241e-02)); + r += mul(s7_7, M4(-2.860e-02, 1.666e-01, -1.005e-02, 1.621e-01, 5.307e-02, 8.958e-02, -5.114e-02, -3.572e-02, -2.053e-02, -8.701e-02, 1.172e-01, -1.191e-01, 1.827e-02, -9.646e-02, -1.384e-02, 1.939e-01)); + r += mul(s7_8, M4(-1.288e-01, 1.404e-02, -1.802e-01, -1.037e-01, 7.175e-03, -5.918e-02, 4.387e-02, -6.959e-02, -4.688e-02, -8.219e-02, -6.531e-02, 2.699e-02, 1.969e-02, -6.182e-02, -5.690e-02, -4.697e-02)); + r += V4(4.057e-02, 6.685e-02, 6.700e-02, -1.475e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.130e-02, 2.971e-03, -1.697e-02, 1.723e-01, -2.969e-02, 5.238e-02, -6.038e-02, -1.713e-01, -2.884e-02, 2.689e-02, 1.049e-01, 9.709e-02, 7.953e-03, 5.953e-02, 8.925e-02, -3.547e-02)); + r += mul(s0_1, M4(-1.699e-01, -1.627e-01, -2.591e-02, 1.405e-01, -1.440e-01, -9.739e-02, -1.221e-02, -4.837e-02, 8.099e-02, -1.887e-02, -8.460e-02, 7.115e-02, 6.461e-02, -1.117e-02, 9.771e-02, 9.994e-02)); + r += mul(s0_2, M4(-1.191e-01, -4.843e-02, -8.051e-02, -8.403e-03, -6.654e-02, 1.643e-02, -3.486e-02, -1.460e-01, 7.868e-02, -3.679e-02, 8.537e-02, 6.625e-02, 6.675e-02, -3.406e-02, -3.296e-02, 1.288e-02)); + r += mul(s0_3, M4(-1.254e-02, 1.587e-01, -1.404e-01, -9.692e-02, -1.083e-01, -3.384e-02, -5.840e-02, -1.022e-01, -5.070e-03, 1.152e-02, 1.578e-02, 8.507e-02, -1.561e-03, -1.234e-01, 8.587e-02, -8.467e-03)); + r += mul(s0_4, M4(-2.645e-01, 1.098e-01, -1.588e-01, -4.149e-03, -2.474e-02, -6.423e-02, -5.085e-02, -3.228e-01, -2.512e-02, -1.876e-01, 1.527e-01, 7.138e-02, 3.622e-02, -7.167e-02, 5.379e-02, 1.440e-01)); + r += mul(s0_5, M4(-8.099e-02, 7.277e-03, -1.015e-01, -2.088e-02, -2.608e-02, -9.005e-02, 5.451e-02, -3.323e-02, -5.641e-02, 6.354e-02, -1.074e-01, 3.794e-02, 2.156e-02, -1.153e-01, 6.865e-02, 1.685e-01)); + r += mul(s0_6, M4(-1.037e-01, 8.738e-03, -2.146e-01, 2.522e-02, 7.273e-02, 1.562e-01, 5.971e-02, -6.630e-02, 2.006e-02, -6.878e-03, -4.255e-02, 6.355e-02, -7.011e-02, 7.403e-03, -1.004e-03, 6.821e-02)); + r += mul(s0_7, M4(3.246e-02, -9.474e-03, 3.046e-02, 8.857e-02, -1.451e-01, 1.464e-01, -2.309e-01, -2.253e-01, 1.313e-01, -1.055e-01, 1.011e-02, 8.469e-02, 1.286e-01, -1.157e-01, 4.310e-02, 2.545e-02)); + r += mul(s0_8, M4(1.688e-02, -5.121e-02, 3.931e-02, 5.312e-03, -4.991e-02, -5.310e-02, 6.352e-02, -1.698e-01, 1.023e-02, -3.237e-02, -3.106e-02, 1.126e-02, -3.956e-02, 7.182e-02, 3.462e-02, 1.284e-01)); + r += mul(s1_0, M4(-7.200e-02, -1.373e-01, 1.461e-01, 1.011e-01, -5.594e-02, 1.675e-02, 9.083e-02, 8.184e-02, -5.444e-02, -3.439e-02, -3.755e-02, -4.670e-02, -5.214e-02, 1.053e-01, -8.947e-03, -3.363e-02)); + r += mul(s1_1, M4(-9.103e-02, 1.141e-01, 6.822e-02, 2.195e-02, -3.647e-02, 2.817e-02, -6.649e-02, 1.015e-02, -2.507e-03, 6.292e-02, -3.789e-02, -5.327e-02, 3.955e-02, -4.437e-03, -5.890e-02, 1.282e-02)); + r += mul(s1_2, M4(4.900e-02, -7.565e-03, -4.303e-02, -7.926e-02, -1.776e-02, 1.173e-01, 1.059e-01, -4.628e-03, 2.424e-02, 5.327e-02, 8.622e-02, 9.865e-03, -5.063e-02, 2.139e-02, -3.283e-02, -1.076e-01)); + r += mul(s1_3, M4(4.076e-02, -1.201e-01, -1.468e-02, -7.557e-02, -9.181e-02, -5.838e-02, -9.056e-02, 8.927e-02, -8.817e-02, 4.257e-02, 1.622e-01, -3.077e-02, -2.824e-01, -1.754e-01, -4.173e-02, -3.796e-03)); + r += mul(s1_4, M4(-1.808e-01, -9.420e-02, 2.362e-01, 5.666e-02, 1.558e-01, 6.239e-02, -1.044e-01, -6.183e-02, 7.567e-03, 1.080e-01, -1.334e-01, -5.788e-03, -3.618e-02, -2.672e-01, -4.522e-02, 6.275e-02)); + r += mul(s1_5, M4(-8.083e-02, -3.153e-02, 7.586e-02, -2.597e-02, -8.238e-02, -1.108e-01, -6.356e-02, 8.368e-02, 3.564e-02, 1.147e-01, -2.603e-02, 5.378e-02, -1.115e-01, -6.909e-02, -7.367e-02, 1.292e-01)); + r += mul(s1_6, M4(4.447e-03, 3.811e-02, -7.687e-02, -2.917e-02, 6.661e-02, -5.593e-02, -9.921e-03, -2.300e-02, -3.419e-02, 2.266e-02, -7.253e-03, 3.770e-02, -9.734e-03, -5.784e-03, 4.182e-02, 2.311e-02)); + r += mul(s1_7, M4(6.541e-02, 1.123e-01, -8.935e-02, 7.808e-02, -2.990e-02, -1.476e-02, 1.612e-01, 6.376e-02, 6.191e-02, -5.186e-02, 1.945e-01, 1.909e-02, 2.268e-02, -4.369e-02, 3.291e-02, -2.180e-01)); + r += mul(s1_8, M4(-1.179e-02, 5.091e-03, -2.284e-02, 6.567e-02, 7.615e-03, 6.188e-02, 2.730e-02, 1.096e-01, -6.861e-02, -3.396e-02, -1.333e-01, -3.154e-02, -8.793e-02, -1.780e-03, -5.642e-02, 6.958e-02)); + r += mul(s2_0, M4(8.236e-02, 4.958e-02, -6.726e-03, 4.602e-03, 1.930e-02, -4.994e-02, -8.625e-03, -6.828e-02, -4.857e-02, 5.621e-04, 2.747e-02, 1.256e-01, 2.196e-02, 1.233e-02, 4.565e-02, -3.883e-02)); + r += mul(s2_1, M4(-4.036e-02, 1.410e-02, 4.532e-02, -3.577e-02, -1.095e-01, 8.027e-04, 6.363e-03, -7.028e-02, -1.750e-02, -8.614e-02, 3.487e-02, 1.905e-02, 5.269e-02, 1.186e-01, 5.839e-02, 1.010e-01)); + r += mul(s2_2, M4(-6.230e-02, -7.290e-02, -2.559e-02, 8.144e-03, -6.011e-02, -5.373e-02, 1.737e-02, -3.953e-02, -8.449e-02, 5.397e-02, 1.553e-01, -8.801e-02, 5.439e-02, -4.834e-02, 2.087e-02, 3.786e-02)); + r += mul(s2_3, M4(1.312e-03, 4.385e-02, -1.052e-01, 1.187e-01, 1.796e-01, -1.156e-01, 1.312e-01, 7.027e-03, 7.043e-02, 5.154e-02, -7.434e-02, -7.112e-02, -6.333e-02, -3.389e-02, 1.071e-02, -4.098e-03)); + r += mul(s2_4, M4(-6.067e-02, 1.708e-02, 3.357e-02, -8.908e-02, 1.508e-01, 3.211e-02, -8.900e-02, -1.560e-02, -1.030e-02, -3.789e-02, 5.975e-03, -9.308e-02, -1.062e-02, -1.841e-01, 1.484e-02, 1.164e-01)); + r += mul(s2_5, M4(-8.618e-02, -4.005e-02, 6.690e-02, 5.596e-02, -2.935e-01, -4.283e-02, -1.875e-01, 1.296e-01, -8.753e-02, -1.694e-02, -2.331e-02, -8.463e-02, -1.410e-01, -1.085e-01, 3.978e-02, -7.041e-02)); + r += mul(s2_6, M4(5.414e-02, 3.529e-02, 5.805e-02, 3.732e-04, 3.671e-02, -7.497e-03, 1.048e-01, 8.747e-02, -2.924e-02, 3.028e-02, -7.909e-02, -4.302e-03, 4.368e-02, -2.135e-02, 9.883e-02, -3.821e-02)); + r += mul(s2_7, M4(-1.323e-01, 3.368e-02, -7.996e-02, -2.019e-01, 7.806e-02, 2.529e-02, 3.800e-02, 7.636e-02, -4.905e-02, -7.635e-02, 2.038e-01, 1.055e-01, 1.360e-02, 1.124e-01, -9.663e-03, -1.235e-02)); + r += mul(s2_8, M4(1.595e-02, 8.632e-03, -3.587e-02, -1.464e-02, -3.107e-02, 4.872e-02, 4.783e-02, 5.343e-02, -9.727e-03, 7.866e-02, -2.355e-01, 6.806e-04, 2.052e-02, 7.465e-02, -3.182e-03, -7.256e-02)); + r += mul(s3_0, M4(1.681e-02, 4.112e-02, -7.830e-02, -1.704e-02, 6.887e-02, 1.142e-02, 1.087e-02, -6.558e-03, -7.073e-02, -9.275e-02, 1.470e-01, 2.685e-02, -2.009e-02, -4.445e-02, 6.022e-02, -5.597e-02)); + r += mul(s3_1, M4(-9.797e-02, 6.285e-02, 2.181e-01, -5.413e-02, -3.854e-02, 2.665e-02, -5.676e-02, -9.111e-02, -9.869e-02, -4.973e-02, 4.636e-02, 7.394e-02, -2.655e-01, -1.570e-01, 8.760e-02, 1.714e-01)); + r += mul(s3_2, M4(-8.892e-02, -8.366e-02, -5.989e-02, 1.579e-01, 2.242e-03, -7.445e-02, 2.149e-02, -6.127e-02, 1.714e-02, 5.657e-03, 2.707e-01, -3.986e-02, 5.101e-02, -1.289e-01, -7.053e-02, -3.878e-02)); + r += mul(s3_3, M4(2.501e-01, -1.295e-01, 1.204e-01, 7.735e-02, 1.076e-01, 3.385e-02, 7.283e-02, -1.388e-03, 6.986e-02, 3.354e-02, 6.278e-02, -5.167e-02, 4.310e-02, 1.368e-01, 2.254e-02, -9.449e-02)); + r += mul(s3_4, M4(4.136e-01, 5.363e-02, -1.864e-01, -4.797e-02, 4.818e-02, 8.905e-03, -2.680e-02, 2.409e-02, 8.077e-02, 1.417e-01, -1.978e-01, -3.116e-02, -7.184e-03, 4.112e-03, 1.164e-01, 6.615e-02)); + r += mul(s3_5, M4(-1.137e-01, 9.477e-02, 4.973e-02, 1.354e-01, 8.513e-04, -7.474e-02, -1.031e-01, 1.021e-01, 8.874e-02, 6.081e-02, -4.876e-01, 1.334e-02, 5.266e-02, -3.342e-02, 2.331e-01, -6.394e-02)); + r += mul(s3_6, M4(3.396e-02, 4.118e-02, 6.863e-02, -1.918e-03, -3.163e-02, -5.579e-02, -5.444e-02, 1.387e-02, 7.337e-03, -8.970e-02, 1.428e-01, 4.239e-02, 5.511e-02, -1.222e-01, 1.717e-01, -1.737e-01)); + r += mul(s3_7, M4(2.141e-01, -1.514e-01, -9.737e-03, 1.630e-02, -6.909e-03, -1.175e-02, -7.800e-02, 1.107e-01, 6.107e-03, 1.227e-02, 4.571e-01, -1.420e-02, 1.533e-01, 2.142e-02, 8.917e-02, -6.197e-02)); + r += mul(s3_8, M4(1.040e-01, -2.908e-02, -8.072e-02, 1.600e-01, 1.254e-01, -8.766e-02, 5.691e-02, 3.955e-02, 4.178e-02, 4.562e-02, -2.707e-01, -5.604e-02, 1.596e-01, -1.549e-02, -4.298e-02, 4.781e-02)); + r += mul(s4_0, M4(1.448e-02, -3.126e-03, 2.671e-01, 4.931e-02, -2.637e-02, 1.546e-02, -9.846e-02, 1.047e-01, -5.858e-03, 1.219e-01, -2.739e-02, 1.359e-01, -1.015e-01, -4.477e-02, -7.104e-02, -3.550e-02)); + r += mul(s4_1, M4(7.703e-02, 5.984e-02, 1.710e-01, 6.703e-02, -1.361e-01, 1.835e-02, 1.559e-01, -3.566e-02, 5.388e-02, 1.998e-02, -3.225e-02, -3.200e-02, -4.279e-02, -2.967e-02, -7.387e-03, -1.738e-03)); + r += mul(s4_2, M4(-3.824e-02, 1.635e-01, 2.978e-01, -6.955e-02, -2.575e-02, -4.559e-03, -8.055e-02, -3.102e-02, -1.214e-01, -1.347e-01, -1.159e-01, 8.305e-02, -5.546e-02, -7.330e-02, -9.751e-02, -2.338e-01)); + r += mul(s4_3, M4(-4.372e-02, 2.133e-01, 1.651e-01, 3.397e-02, 2.440e-01, 6.838e-02, -4.116e-02, -8.257e-02, -2.768e-02, -1.725e-01, 2.180e-01, -5.268e-02, -6.686e-02, -1.075e-02, -1.183e-01, -5.245e-02)); + r += mul(s4_4, M4(1.160e-01, -1.089e-01, -5.048e-01, -1.137e-01, 4.091e-01, -3.321e-01, 1.600e-02, -4.297e-01, 2.936e-02, -3.944e-02, 3.683e-02, -7.692e-02, 2.271e-01, -5.484e-03, 1.696e-01, -9.252e-02)); + r += mul(s4_5, M4(5.579e-02, -1.714e-02, 2.592e-01, 4.721e-02, 5.725e-03, -9.406e-02, 5.586e-02, -1.122e-01, 6.267e-02, 1.959e-01, 8.754e-02, -3.318e-02, -3.323e-02, 3.019e-02, 2.118e-02, -9.360e-03)); + r += mul(s4_6, M4(-4.322e-02, -7.796e-02, 3.892e-01, 2.526e-02, -3.016e-02, 8.906e-02, -6.579e-02, 1.023e-01, -1.739e-02, 5.853e-03, -8.500e-02, -8.065e-02, 6.131e-02, 6.489e-02, 8.976e-02, 9.891e-02)); + r += mul(s4_7, M4(8.236e-03, -1.290e-01, 1.230e-01, -4.372e-02, -1.949e-02, -3.142e-01, 1.221e-01, -3.426e-02, 4.860e-02, 2.884e-02, -2.031e-02, 2.863e-02, -1.006e-01, -1.666e-02, 7.729e-02, -4.938e-02)); + r += mul(s4_8, M4(-1.731e-01, -3.786e-02, 3.364e-02, 7.386e-02, 1.000e-01, 1.514e-01, 6.428e-02, 5.606e-02, 2.937e-03, -2.062e-02, -3.646e-02, 1.250e-02, 6.276e-02, 5.917e-02, -4.737e-02, -6.429e-02)); + r += mul(s5_0, M4(-8.829e-03, 6.971e-02, 3.541e-02, 1.809e-02, 1.065e-02, 3.218e-02, -3.046e-02, 2.121e-02, -3.480e-02, -1.838e-02, -1.096e-01, 9.808e-02, 7.617e-02, -9.112e-03, 6.525e-02, -2.143e-02)); + r += mul(s5_1, M4(-7.099e-03, 6.124e-02, 6.715e-02, 8.212e-02, 3.622e-02, 2.778e-02, 7.969e-02, -3.336e-02, 1.245e-01, 1.119e-01, 5.280e-02, -8.330e-02, 1.314e-01, 5.461e-02, 1.524e-01, 4.624e-02)); + r += mul(s5_2, M4(-8.457e-02, -3.261e-02, -5.095e-02, -1.353e-02, 6.270e-03, 5.844e-02, -7.710e-02, 2.764e-03, -2.814e-02, -6.648e-02, -5.475e-02, 3.788e-02, -2.322e-03, 7.006e-02, 4.403e-02, -3.014e-02)); + r += mul(s5_3, M4(1.144e-02, 8.144e-02, 3.215e-02, 3.551e-02, 9.567e-02, 1.714e-02, 5.139e-02, -8.414e-02, -2.402e-02, -9.189e-02, 4.432e-02, -2.306e-03, 6.175e-02, 6.143e-02, -2.534e-02, 6.956e-02)); + r += mul(s5_4, M4(1.330e-02, 5.467e-02, 2.696e-03, 1.003e-01, 1.212e-02, -1.570e-01, -1.143e-01, -1.127e-01, 1.103e-01, -9.396e-02, 3.722e-02, -1.026e-01, 2.694e-01, 9.083e-02, 6.893e-02, 1.128e-01)); + r += mul(s5_5, M4(1.806e-02, 1.141e-02, -3.325e-03, -7.681e-02, -3.333e-02, 2.338e-02, 1.130e-02, -1.251e-01, 1.135e-01, 1.119e-01, 2.687e-01, 1.495e-01, 6.523e-02, 1.089e-01, -5.855e-02, 2.052e-03)); + r += mul(s5_6, M4(-6.760e-02, 8.641e-03, -8.452e-02, -1.880e-02, -3.117e-02, -8.947e-02, -3.013e-02, 2.015e-02, -3.476e-02, 7.563e-02, -4.600e-02, 1.577e-02, 3.989e-02, -9.229e-02, 1.108e-01, -1.162e-02)); + r += mul(s5_7, M4(2.417e-02, -3.430e-02, 1.091e-01, 4.174e-02, -3.418e-02, -1.862e-01, 5.662e-02, -1.414e-02, -6.064e-02, 1.133e-02, -9.183e-02, 4.677e-02, -4.738e-02, -1.836e-02, 1.063e-02, -5.357e-02)); + r += mul(s5_8, M4(-6.421e-02, -4.079e-02, -1.277e-02, -7.411e-02, 3.395e-02, 4.268e-02, 2.553e-02, 1.373e-01, -2.912e-02, 1.767e-03, -1.459e-01, -3.006e-02, 3.724e-02, 3.353e-02, 3.408e-02, 4.427e-02)); + r += mul(s6_0, M4(-4.232e-02, -7.914e-02, -1.564e-03, -1.452e-02, -1.207e-02, -1.994e-02, -1.146e-01, -3.629e-02, 3.144e-02, -1.077e-02, -1.133e-02, 1.898e-03, -2.618e-02, -1.414e-02, 2.598e-02, 1.485e-01)); + r += mul(s6_1, M4(-1.344e-02, -7.809e-02, 1.238e-01, -4.426e-02, -7.205e-02, -4.869e-02, 6.605e-02, 1.397e-01, 1.556e-01, 1.359e-01, 3.377e-02, 3.126e-02, 3.122e-02, 6.361e-02, -9.625e-02, 2.292e-02)); + r += mul(s6_2, M4(2.738e-03, -6.746e-02, -2.412e-02, -1.041e-01, -4.460e-02, -2.827e-02, -2.519e-02, -3.964e-02, -2.333e-04, 7.666e-02, 1.125e-01, 1.630e-01, 2.247e-02, -7.670e-02, -2.341e-03, 9.385e-02)); + r += mul(s6_3, M4(-4.602e-02, 1.061e-01, -1.320e-01, 1.191e-01, -5.133e-02, 2.162e-01, -1.131e-02, -3.731e-02, -7.102e-02, -8.420e-02, 4.596e-02, 4.180e-02, 1.479e-01, -8.129e-02, 2.973e-02, -1.772e-01)); + r += mul(s6_4, M4(5.118e-02, -6.224e-02, -8.713e-03, -2.333e-02, 9.701e-02, 1.313e-01, 2.580e-02, -3.051e-01, 8.869e-02, 7.954e-02, -1.263e-01, 5.000e-02, 1.479e-01, 6.071e-02, 1.515e-01, 1.184e-02)); + r += mul(s6_5, M4(-3.628e-02, -1.571e-04, -2.533e-02, 1.203e-02, -3.362e-02, 1.008e-02, -2.868e-02, -7.585e-02, 2.280e-02, 1.390e-01, -7.603e-02, 9.155e-02, -4.598e-03, 1.854e-02, -3.700e-01, 1.594e-02)); + r += mul(s6_6, M4(7.287e-02, 1.326e-01, 2.261e-01, 6.618e-02, -2.454e-02, 9.443e-02, 9.664e-03, -9.044e-02, 9.658e-02, 1.071e-01, -1.143e-01, -9.359e-02, 2.635e-02, -3.715e-02, -2.057e-01, 1.043e-01)); + r += mul(s6_7, M4(7.176e-02, -1.768e-02, 1.042e-02, -1.732e-01, -1.269e-01, 4.046e-02, 6.187e-02, -2.409e-01, 1.033e-01, 2.011e-02, -1.234e-02, 1.175e-01, -1.142e-02, -2.077e-02, 2.295e-01, 8.248e-02)); + r += mul(s6_8, M4(4.521e-02, -7.314e-02, 1.829e-01, 4.214e-02, 8.360e-02, -1.261e-02, -5.154e-02, -5.521e-03, -3.743e-02, 3.988e-02, -3.584e-02, 1.117e-02, -3.909e-02, -7.878e-03, -5.536e-02, -4.494e-02)); + r += mul(s7_0, M4(2.108e-02, 9.581e-02, -5.059e-02, -8.275e-02, 6.127e-02, 7.517e-02, -2.609e-02, 5.254e-02, 2.820e-02, -2.989e-02, 5.125e-02, -1.002e-01, -9.809e-02, 1.553e-01, 1.773e-01, -1.271e-02)); + r += mul(s7_1, M4(3.650e-02, 1.098e-01, 2.168e-02, -1.848e-01, -9.009e-02, 3.414e-02, -3.631e-02, -3.021e-02, 1.459e-01, 2.078e-01, 6.669e-02, -1.034e-01, -5.624e-02, 1.201e-01, 2.647e-01, -9.297e-02)); + r += mul(s7_2, M4(-1.393e-02, -4.577e-02, -2.512e-02, -3.470e-02, -3.612e-03, -6.107e-02, -2.231e-02, -1.145e-01, 5.848e-02, 6.392e-02, 1.092e-01, 5.121e-02, 3.987e-02, -3.849e-02, 4.369e-02, -1.615e-02)); + r += mul(s7_3, M4(3.431e-02, 1.207e-01, 2.357e-02, 7.447e-02, 1.074e-01, 3.534e-02, 5.660e-02, -5.106e-02, -2.719e-02, -1.355e-01, 1.572e-01, -9.630e-03, 1.186e-01, -1.010e-01, 9.250e-02, 1.751e-02)); + r += mul(s7_4, M4(-4.175e-03, 7.941e-02, -3.054e-02, 6.754e-02, 1.715e-01, 1.252e-01, -3.678e-02, -2.089e-01, 9.548e-02, -2.263e-01, -1.472e-01, 1.701e-01, 1.344e-01, 6.099e-02, -9.534e-02, -1.026e-01)); + r += mul(s7_5, M4(-2.515e-02, -5.992e-02, -6.514e-02, -1.457e-02, -5.400e-03, -8.003e-03, 6.131e-02, 2.058e-03, 2.917e-02, 2.659e-02, 5.068e-02, -4.705e-02, -2.029e-02, 8.965e-02, 5.807e-02, -2.764e-02)); + r += mul(s7_6, M4(1.002e-01, -8.008e-02, 1.664e-01, -3.319e-02, -3.740e-02, -5.018e-02, -7.137e-02, -2.204e-02, 5.796e-02, 5.437e-03, 1.174e-01, 6.421e-02, 1.287e-02, -9.052e-02, 1.385e-01, -9.506e-02)); + r += mul(s7_7, M4(-5.514e-02, 2.037e-02, -1.808e-01, 2.922e-02, 1.873e-02, 6.722e-02, 3.174e-02, -4.081e-02, 1.462e-01, 5.952e-02, -5.291e-02, 1.107e-01, -2.713e-02, -1.114e-01, -1.489e-01, -2.283e-02)); + r += mul(s7_8, M4(-7.015e-02, -3.209e-02, -1.040e-02, -1.341e-01, 2.126e-02, -7.423e-02, -5.012e-02, -5.814e-02, -1.321e-02, -2.905e-02, 1.278e-01, -1.232e-01, -8.702e-02, -8.247e-02, -4.964e-02, -7.967e-02)); + r += V4(1.486e-02, -7.566e-03, 9.998e-03, -3.041e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.941e-02, -2.434e-02, 3.064e-02, 3.302e-02, 1.836e-02, -5.816e-02, 2.547e-03, 8.317e-03, -4.260e-02, 5.378e-02, -3.433e-02, 1.682e-02, 2.882e-02, -8.481e-02, 4.265e-02, -1.639e-01)); + r += mul(s0_1, M4(-6.492e-05, -8.144e-03, 7.875e-02, 1.499e-01, -3.348e-03, 6.657e-03, -8.380e-02, 1.824e-02, 1.018e-01, 9.654e-02, 1.187e-01, -2.555e-02, -1.917e-02, -6.752e-02, -2.161e-02, 1.374e-01)); + r += mul(s0_2, M4(-8.274e-02, 1.562e-02, 1.319e-02, -9.855e-02, 3.076e-02, 3.649e-02, -7.952e-02, -3.981e-02, -1.685e-01, -1.958e-01, 1.766e-01, -1.922e-01, -3.663e-02, -8.032e-02, -8.093e-02, -2.836e-02)); + r += mul(s0_3, M4(-3.552e-02, 5.147e-02, 6.993e-02, -1.504e-01, 1.389e-01, -1.305e-01, 5.882e-02, -8.144e-02, -2.227e-01, -3.246e-02, -5.250e-02, 9.812e-02, -2.614e-02, 6.418e-02, -2.796e-02, 9.152e-03)); + r += mul(s0_4, M4(-9.613e-02, 6.954e-02, 4.513e-02, 2.010e-02, -4.087e-02, -1.491e-01, -1.704e-01, 7.098e-02, -2.632e-01, 3.257e-02, -1.977e-01, 1.738e-01, -5.193e-02, -5.701e-02, -5.153e-03, 1.801e-02)); + r += mul(s0_5, M4(-2.098e-01, 6.119e-02, 1.018e-02, -8.911e-02, 1.275e-01, -1.889e-02, -3.978e-02, -9.157e-02, -1.794e-01, 1.100e-01, -1.205e-01, 1.971e-02, -8.632e-02, -1.528e-01, -1.608e-01, 2.589e-02)); + r += mul(s0_6, M4(7.168e-02, -6.540e-02, 1.441e-01, -1.025e-01, -5.714e-02, -4.127e-02, -1.118e-01, 1.293e-01, -8.594e-02, -3.036e-03, -9.054e-03, 8.522e-02, -8.144e-02, -7.556e-02, -6.968e-02, -4.779e-02)); + r += mul(s0_7, M4(1.375e-01, 7.936e-02, 2.420e-02, -1.316e-02, 8.160e-02, -4.091e-02, 3.408e-02, 1.206e-01, 2.924e-02, -2.734e-02, -2.835e-02, 2.180e-02, -1.312e-01, 2.011e-02, -9.516e-02, 1.499e-01)); + r += mul(s0_8, M4(6.966e-02, -5.254e-04, 4.887e-02, 2.101e-02, 2.118e-02, -3.386e-02, -5.333e-02, -1.047e-01, 9.082e-02, -8.253e-02, 1.156e-01, -6.557e-02, -2.685e-03, 5.894e-02, -1.469e-01, 6.893e-03)); + r += mul(s1_0, M4(-6.617e-03, 2.358e-03, 5.778e-03, -1.314e-01, -4.964e-02, 4.517e-02, 2.407e-02, 4.159e-02, -3.152e-02, -2.996e-02, -6.864e-02, 8.028e-02, -2.853e-04, 9.098e-03, 3.602e-02, 7.354e-02)); + r += mul(s1_1, M4(7.501e-05, -1.132e-02, -8.446e-02, 7.754e-02, 1.079e-01, 3.431e-02, -4.273e-02, -5.457e-02, -1.149e-02, 9.484e-02, 7.025e-02, 3.203e-02, 6.332e-02, 3.711e-02, -1.046e-01, -1.178e-02)); + r += mul(s1_2, M4(-5.175e-03, -8.121e-02, -4.146e-02, 7.301e-03, 4.025e-02, 1.223e-02, 2.791e-02, 6.913e-02, 1.473e-02, 8.574e-03, 6.186e-02, -6.230e-02, 1.604e-01, 5.132e-02, 4.447e-02, 1.278e-01)); + r += mul(s1_3, M4(4.298e-02, -4.138e-02, 3.455e-02, -1.189e-01, -1.010e-01, -4.456e-02, 7.414e-02, -5.421e-03, -1.060e-01, -2.736e-02, -8.738e-02, 7.068e-02, 1.642e-02, 1.671e-02, -1.072e-02, 1.842e-01)); + r += mul(s1_4, M4(-1.000e-01, -2.205e-02, 2.536e-02, -3.240e-02, -2.124e-01, -2.686e-01, -6.321e-02, -3.527e-02, -4.732e-02, 9.274e-02, -8.798e-02, -1.492e-02, 3.769e-02, -3.042e-02, -4.481e-02, -1.013e-01)); + r += mul(s1_5, M4(1.502e-02, -3.009e-02, -6.098e-02, -3.087e-02, -4.889e-02, 1.041e-01, 1.036e-02, 8.926e-03, -6.989e-02, 2.073e-02, 3.148e-02, -3.322e-02, 8.676e-02, 5.504e-02, -2.828e-02, 1.090e-01)); + r += mul(s1_6, M4(-3.217e-02, -2.251e-02, -1.199e-01, -7.757e-02, -1.901e-02, -1.472e-02, 2.263e-02, 1.015e-02, -8.386e-02, 2.263e-02, 2.092e-02, 1.614e-01, 5.835e-02, -2.891e-02, -3.444e-02, 8.383e-02)); + r += mul(s1_7, M4(7.769e-02, 7.688e-02, 5.715e-02, 5.460e-02, -2.923e-02, -1.379e-02, -6.241e-02, -1.554e-01, -1.097e-01, 5.052e-02, 1.053e-01, -9.672e-02, -8.070e-02, -3.313e-02, 1.447e-01, 7.332e-02)); + r += mul(s1_8, M4(4.008e-02, 2.988e-02, 3.386e-03, 1.049e-01, -1.996e-02, 2.128e-02, -5.514e-02, 5.612e-02, 8.421e-02, -3.584e-02, 2.540e-02, -9.043e-02, 9.265e-02, -3.730e-02, 1.231e-01, 7.827e-02)); + r += mul(s2_0, M4(-2.922e-02, 3.207e-02, 2.187e-02, -1.270e-02, 5.509e-02, 4.332e-02, 3.532e-02, -1.507e-03, 8.765e-02, 2.985e-02, -1.912e-02, 1.365e-01, 1.321e-02, -8.696e-03, -5.703e-02, 9.480e-02)); + r += mul(s2_1, M4(2.217e-02, -1.560e-02, -4.137e-02, -4.427e-02, 2.507e-03, -4.972e-02, -9.183e-02, 6.844e-02, 1.052e-01, 3.160e-02, 7.056e-02, -1.322e-01, 2.606e-02, -7.859e-02, 6.381e-02, -7.867e-02)); + r += mul(s2_2, M4(-3.012e-02, -4.216e-02, 7.229e-02, -9.155e-02, 2.943e-02, -2.606e-03, 3.110e-02, -1.333e-01, 1.004e-01, 1.753e-01, 8.786e-02, 4.076e-02, 3.666e-02, 1.754e-02, -6.180e-03, -2.207e-02)); + r += mul(s2_3, M4(-8.370e-02, 2.884e-02, 8.631e-03, -1.328e-01, -9.496e-02, 2.105e-02, 1.071e-01, 3.868e-02, 1.920e-02, 6.074e-02, 6.057e-02, -1.481e-02, -1.764e-02, 3.089e-02, 4.285e-02, 2.929e-03)); + r += mul(s2_4, M4(5.171e-02, -2.145e-01, 1.069e-01, 1.768e-01, -3.950e-02, -4.857e-02, 1.101e-01, -8.912e-02, 2.121e-02, -1.057e-01, 1.876e-01, -3.850e-02, -6.895e-02, -1.441e-01, 7.299e-02, -1.498e-01)); + r += mul(s2_5, M4(-1.724e-02, 7.455e-02, -1.210e-01, 5.442e-02, 3.774e-02, 1.450e-01, 3.625e-02, -6.742e-02, 1.074e-01, 1.191e-01, 1.185e-01, 8.685e-02, -3.812e-02, 2.477e-02, 5.426e-02, 3.321e-02)); + r += mul(s2_6, M4(-8.299e-02, 9.370e-02, -1.897e-03, 3.656e-02, 5.494e-02, -1.649e-02, 2.368e-02, 2.402e-01, 2.794e-02, -6.437e-02, -4.304e-02, -2.421e-02, -6.197e-03, -1.259e-02, 4.286e-05, 5.580e-03)); + r += mul(s2_7, M4(9.556e-02, -4.561e-02, -5.381e-02, -7.346e-02, -5.055e-02, 4.323e-02, -5.527e-02, 6.885e-02, -1.879e-03, 9.991e-03, 1.194e-01, -1.043e-01, -1.687e-01, -6.391e-02, 8.169e-02, -1.353e-01)); + r += mul(s2_8, M4(8.130e-03, -4.097e-02, 6.273e-02, 1.351e-02, -1.246e-02, 5.399e-02, -2.276e-01, 1.029e-01, 4.988e-02, 5.196e-02, -6.267e-02, 8.859e-02, -3.213e-02, 1.041e-02, -3.953e-02, 7.868e-02)); + r += mul(s3_0, M4(4.746e-02, 1.205e-02, -1.137e-01, -5.023e-02, 5.604e-02, -3.252e-02, -1.373e-02, -8.422e-02, 4.960e-02, -5.724e-02, 7.550e-02, -3.787e-02, -8.279e-03, 8.360e-03, 8.743e-02, -6.190e-02)); + r += mul(s3_1, M4(-2.924e-01, -8.317e-02, -2.259e-01, 7.349e-03, -5.453e-02, -7.694e-02, -1.276e-02, 1.190e-01, 6.733e-02, -8.522e-02, 4.811e-02, -2.259e-01, 2.148e-02, -1.042e-01, 2.048e-01, 1.342e-01)); + r += mul(s3_2, M4(3.738e-02, 7.917e-02, -4.409e-02, 2.271e-02, 3.073e-02, 2.148e-02, -5.104e-02, -1.077e-01, 3.773e-03, 2.199e-01, 4.947e-02, 9.371e-02, -1.079e-01, -7.741e-02, -9.130e-02, -1.159e-01)); + r += mul(s3_3, M4(1.659e-01, 1.031e-01, 1.433e-02, 7.288e-02, 1.501e-02, -4.722e-02, 3.735e-02, -1.021e-02, 1.170e-02, 3.726e-02, -2.700e-02, 1.136e-02, -2.785e-03, -1.009e-02, 2.770e-02, -1.443e-01)); + r += mul(s3_4, M4(3.255e-02, -1.812e-01, 1.272e-01, 3.350e-01, -8.198e-02, 3.625e-02, 1.054e-01, -1.767e-02, 9.021e-02, 1.111e-01, 3.447e-02, -3.343e-02, 3.322e-02, 4.625e-03, -8.646e-02, -8.394e-02)); + r += mul(s3_5, M4(1.526e-01, 1.363e-01, -9.677e-02, -5.682e-02, -4.038e-02, -7.338e-02, 8.752e-03, -6.069e-02, 1.428e-01, 2.273e-02, 1.126e-04, 1.274e-01, 7.145e-02, -2.907e-02, -2.007e-01, 3.065e-02)); + r += mul(s3_6, M4(-2.294e-02, 1.287e-01, -1.624e-02, 2.358e-01, -2.306e-02, -4.672e-02, -4.526e-02, -1.324e-01, 5.202e-02, -1.641e-01, -1.009e-01, -3.234e-02, -4.125e-02, 2.795e-02, -1.845e-01, -4.797e-02)); + r += mul(s3_7, M4(5.458e-02, -4.348e-02, 5.484e-03, -7.000e-02, -1.933e-01, -3.845e-02, 9.439e-02, 1.540e-01, -2.157e-02, -6.742e-02, -4.266e-03, -6.691e-02, -1.698e-02, -9.325e-02, -1.731e-01, 8.950e-02)); + r += mul(s3_8, M4(4.234e-02, 1.992e-02, 4.979e-02, 8.673e-02, -7.786e-02, -3.021e-02, -1.684e-01, -6.725e-02, 9.105e-02, -7.153e-02, 2.010e-02, -3.460e-02, 1.903e-02, 2.604e-02, 2.235e-03, 3.280e-02)); + r += mul(s4_0, M4(-1.972e-02, 2.761e-02, -3.976e-02, 6.334e-03, -1.171e-02, -2.757e-02, 8.276e-02, -2.144e-02, 3.288e-02, 4.816e-02, 5.422e-02, -4.026e-02, 3.477e-02, -7.479e-02, 1.246e-01, -3.651e-02)); + r += mul(s4_1, M4(-3.185e-02, 1.588e-01, -9.285e-02, -2.698e-01, 9.317e-02, -4.110e-02, 7.298e-02, -2.632e-04, -2.814e-03, 2.064e-01, -2.521e-02, -7.196e-03, -1.124e-01, -4.184e-02, -6.549e-02, -1.707e-02)); + r += mul(s4_2, M4(9.104e-02, 1.520e-01, -1.255e-01, -9.692e-03, 6.623e-02, -4.432e-02, -1.353e-01, -7.303e-02, -9.051e-02, 9.743e-02, -2.979e-04, 1.306e-01, -2.737e-02, 2.494e-02, -1.054e-01, 8.342e-03)); + r += mul(s4_3, M4(1.346e-02, 2.161e-01, 8.065e-02, 7.728e-02, 5.731e-02, -1.770e-01, -1.715e-02, 3.805e-02, 4.969e-02, 1.818e-02, -5.811e-02, 7.452e-02, 3.335e-03, 4.923e-02, 1.136e-01, -1.628e-01)); + r += mul(s4_4, M4(2.199e-01, 9.253e-02, -4.181e-02, -6.793e-02, -1.518e-01, -2.418e-01, 6.812e-02, 4.592e-01, 4.662e-02, 1.026e-01, 3.776e-03, 6.055e-02, -2.150e-01, -1.610e-01, 2.031e-02, -9.005e-02)); + r += mul(s4_5, M4(9.640e-02, -1.482e-03, 3.158e-02, 2.938e-01, -8.727e-02, -1.316e-01, -3.092e-02, -1.496e-01, -5.053e-02, 1.870e-01, -1.285e-01, -1.277e-02, -5.118e-02, -1.790e-02, -2.986e-02, 9.309e-02)); + r += mul(s4_6, M4(-7.843e-03, -3.903e-02, -6.224e-02, 1.617e-01, 7.246e-02, 6.096e-02, -1.730e-02, -1.947e-01, -6.006e-02, -5.627e-02, -5.049e-03, 7.108e-02, -1.217e-02, 3.679e-02, 3.234e-02, -1.250e-02)); + r += mul(s4_7, M4(8.958e-02, 1.249e-01, 8.882e-02, -1.415e-02, 5.857e-02, -4.284e-03, 4.301e-04, 1.450e-01, 3.354e-02, 5.289e-02, -1.237e-01, 1.296e-02, -3.919e-02, -3.139e-04, -1.332e-02, 1.314e-02)); + r += mul(s4_8, M4(-6.471e-02, -4.788e-02, -2.585e-02, -5.434e-02, -4.475e-02, -3.525e-03, -5.352e-02, -1.057e-01, 8.034e-02, 7.279e-02, -6.442e-03, 6.861e-02, -2.375e-02, 1.316e-02, -4.083e-02, 1.011e-01)); + r += mul(s5_0, M4(1.985e-02, 3.916e-02, 3.798e-02, -1.692e-02, 6.397e-03, -1.355e-02, -2.031e-02, -2.817e-02, -6.168e-02, -4.464e-02, -5.530e-02, -1.036e-02, -9.005e-03, -5.387e-02, 1.100e-02, -4.647e-03)); + r += mul(s5_1, M4(-2.538e-02, 3.361e-02, 4.501e-02, 8.380e-02, -2.978e-02, 4.141e-03, 2.197e-02, 5.906e-04, -1.235e-01, -4.108e-02, -2.486e-02, 9.339e-02, -2.567e-02, -4.833e-02, -1.186e-01, -6.430e-02)); + r += mul(s5_2, M4(-8.502e-03, -6.589e-03, -2.242e-02, -7.214e-02, 2.742e-02, -5.241e-04, -1.054e-01, -2.671e-02, 3.098e-03, 1.298e-01, 5.643e-02, 1.503e-02, -1.707e-02, 3.305e-02, -9.092e-03, -1.961e-02)); + r += mul(s5_3, M4(7.064e-02, 3.020e-02, -4.359e-02, 1.923e-03, 2.287e-02, -5.756e-02, -1.084e-01, 6.016e-02, 3.864e-02, -4.458e-02, 1.514e-03, -3.369e-02, 5.824e-02, 4.178e-02, 5.598e-02, -2.114e-01)); + r += mul(s5_4, M4(3.226e-02, 1.058e-01, -1.451e-01, 8.814e-02, -2.822e-02, -6.766e-02, 1.211e-01, 2.170e-01, -9.701e-02, -5.907e-02, 6.719e-02, 1.080e-01, 1.558e-01, -1.136e-01, -2.143e-02, 2.146e-02)); + r += mul(s5_5, M4(-4.396e-02, -6.496e-02, -1.304e-01, 1.161e-02, -2.445e-03, -2.884e-02, -8.961e-02, -1.887e-02, 2.963e-02, 1.861e-01, -1.455e-02, -1.838e-02, 8.519e-02, -6.405e-02, 4.370e-03, -2.149e-02)); + r += mul(s5_6, M4(-5.832e-02, 2.541e-02, -2.242e-02, 5.767e-02, 7.318e-02, 3.783e-03, -1.337e-02, -4.162e-02, -3.102e-02, -3.865e-02, -1.660e-02, -1.414e-02, 3.334e-02, 5.308e-02, -2.364e-02, -1.214e-01)); + r += mul(s5_7, M4(1.185e-01, 4.647e-02, 1.327e-02, 1.248e-02, -5.879e-03, -7.769e-02, 3.028e-02, 1.912e-01, -5.491e-02, 4.429e-02, -1.218e-01, -3.657e-02, 1.110e-01, 3.904e-02, -1.309e-01, 3.424e-02)); + r += mul(s5_8, M4(7.027e-02, -5.632e-02, 3.849e-02, -1.103e-01, 7.069e-02, 4.495e-02, 6.745e-02, -4.776e-02, -4.377e-02, -1.048e-02, 5.689e-02, -1.101e-01, -7.026e-02, -1.670e-02, -4.685e-03, 2.207e-03)); + r += mul(s6_0, M4(-7.398e-03, 7.106e-02, 7.153e-02, -1.247e-01, -1.109e-02, -3.335e-02, -5.528e-02, 1.849e-01, -4.156e-02, -3.664e-02, -8.374e-03, 6.961e-03, 8.130e-03, -1.577e-02, 1.137e-01, -3.156e-02)); + r += mul(s6_1, M4(1.129e-01, -1.189e-01, 4.488e-02, -7.788e-02, 8.643e-02, 1.837e-01, -1.265e-01, -5.615e-02, -6.547e-02, -1.769e-02, -9.807e-02, -8.365e-02, -5.531e-02, -6.001e-02, -6.394e-03, -1.411e-02)); + r += mul(s6_2, M4(4.292e-02, -1.760e-02, -3.858e-02, -1.170e-01, 4.675e-03, 1.659e-01, -1.608e-01, 1.794e-01, -5.545e-02, -1.435e-02, 3.114e-03, 5.590e-03, -2.390e-02, 1.322e-01, 4.663e-02, 8.771e-02)); + r += mul(s6_3, M4(1.761e-02, 1.445e-02, -6.184e-03, -2.207e-02, -1.204e-01, -4.987e-03, -1.052e-01, 1.484e-01, 1.165e-01, 9.513e-03, -1.542e-02, 7.135e-02, -8.224e-03, -9.027e-02, -2.602e-02, -6.959e-02)); + r += mul(s6_4, M4(9.770e-02, 3.180e-02, -1.041e-01, -1.179e-01, -2.815e-01, -1.230e-02, -2.789e-01, 3.166e-02, -6.205e-02, -8.654e-02, -3.476e-02, 1.611e-01, -1.815e-01, -2.143e-01, 2.580e-02, -2.844e-01)); + r += mul(s6_5, M4(-8.180e-02, -7.580e-02, -4.291e-02, -1.427e-01, 5.695e-02, 9.596e-02, -2.144e-01, 2.188e-01, -1.982e-02, 7.858e-02, 8.475e-02, 1.514e-01, -8.808e-02, 1.089e-01, -4.070e-02, 4.048e-02)); + r += mul(s6_6, M4(-5.352e-02, 5.043e-03, 6.957e-02, 1.354e-01, -1.226e-01, 7.943e-03, -3.156e-01, 1.567e-01, -3.719e-02, 9.156e-03, -1.380e-02, 1.031e-02, 1.340e-01, -5.385e-02, 1.223e-01, -4.012e-02)); + r += mul(s6_7, M4(1.955e-01, -5.829e-02, -8.694e-03, -1.059e-02, -1.216e-01, -3.718e-02, 1.009e-01, -8.280e-02, -3.044e-02, 8.333e-02, -2.832e-02, 1.731e-02, -2.077e-02, -4.440e-02, 7.065e-02, -1.658e-02)); + r += mul(s6_8, M4(-6.162e-02, 3.611e-02, 4.474e-02, -6.702e-02, 4.254e-02, 7.628e-02, -8.461e-02, 1.893e-01, 1.412e-01, -7.279e-03, -1.031e-01, 1.474e-01, 9.579e-03, 3.764e-02, 3.468e-02, 1.618e-02)); + r += mul(s7_0, M4(-6.511e-03, 5.876e-02, -3.252e-02, 1.532e-02, 1.296e-02, 1.271e-02, 3.561e-02, -2.071e-01, -1.077e-03, 4.308e-03, 4.691e-02, -3.955e-02, -1.679e-02, 4.783e-02, 4.004e-02, -5.206e-02)); + r += mul(s7_1, M4(-5.695e-02, -8.795e-03, -1.695e-01, 6.811e-03, 1.029e-01, 1.873e-01, 1.672e-01, -1.096e-01, 2.623e-02, 2.085e-02, -2.169e-02, -1.332e-01, -4.755e-02, 1.629e-01, -4.913e-03, 7.361e-02)); + r += mul(s7_2, M4(-4.002e-02, -1.634e-02, -5.634e-03, 2.352e-02, -1.252e-02, -1.325e-01, 6.089e-02, -1.321e-02, -3.389e-02, 6.810e-03, 3.805e-02, 6.999e-02, -2.533e-02, 2.231e-02, -1.077e-01, -2.867e-02)); + r += mul(s7_3, M4(5.882e-02, -1.398e-02, 9.710e-02, 1.346e-01, 7.443e-02, 1.350e-02, 2.343e-01, -7.184e-02, 3.826e-02, 9.868e-02, 6.762e-02, -3.686e-03, -3.982e-02, 1.061e-01, -3.359e-03, 8.183e-02)); + r += mul(s7_4, M4(-3.034e-02, 1.242e-01, 2.341e-02, 1.437e-01, -4.342e-02, -1.100e-01, 5.104e-02, 1.598e-01, -1.597e-01, -9.473e-02, 2.654e-01, 9.261e-02, 1.447e-01, 4.627e-02, -3.673e-03, 1.930e-01)); + r += mul(s7_5, M4(5.936e-03, -3.868e-02, -4.165e-02, 5.440e-02, 1.597e-02, 5.686e-02, 1.738e-02, 4.651e-02, -1.686e-01, 3.454e-02, -4.771e-02, 1.642e-01, 2.743e-02, 1.034e-01, 1.139e-01, -7.928e-02)); + r += mul(s7_6, M4(-6.469e-02, -2.850e-02, 1.345e-02, 8.227e-02, 4.457e-02, -7.836e-02, -3.138e-02, -9.617e-02, 9.191e-02, -2.904e-02, -1.003e-02, 1.651e-02, 9.393e-03, -6.901e-02, -4.156e-02, 5.873e-02)); + r += mul(s7_7, M4(-1.520e-01, 1.625e-02, -1.396e-01, -4.198e-02, -4.981e-02, -3.990e-02, -7.033e-04, -2.569e-02, 5.678e-02, 4.969e-02, 1.535e-02, -9.080e-02, 1.120e-01, -1.128e-02, -7.185e-02, 8.777e-02)); + r += mul(s7_8, M4(-5.642e-02, -5.412e-04, -9.692e-02, -1.431e-01, -2.732e-02, 2.014e-03, 4.999e-02, 7.397e-03, 1.207e-01, 3.546e-02, -5.119e-02, 1.183e-01, -4.234e-02, 6.823e-05, 7.049e-02, -1.245e-01)); + r += V4(7.978e-02, 1.297e-02, 1.591e-02, 3.718e-02); + return r; +} + +void Pass12(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 13 +//!DESC conv12 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.727e-02, 1.860e-02, -4.458e-03, -5.726e-03, -1.577e-01, -4.286e-02, 6.106e-02, -1.690e-02, 8.294e-02, 5.488e-02, -1.047e-01, 1.538e-01, 8.408e-02, -1.312e-01, -3.116e-02, -2.641e-02)); + r += mul(s0_1, M4(2.394e-02, -5.627e-02, 6.819e-03, -9.615e-02, -9.697e-02, 6.977e-02, 7.691e-02, 1.391e-02, 2.159e-01, -6.007e-02, 4.664e-02, 1.180e-01, -4.526e-03, -4.731e-02, -4.678e-02, 1.449e-01)); + r += mul(s0_2, M4(-3.139e-02, -4.410e-04, 7.677e-03, -5.838e-02, 2.037e-02, 3.019e-03, -5.542e-02, 1.924e-02, 1.627e-01, -6.640e-02, 1.498e-02, -6.856e-03, 1.605e-02, 7.046e-03, 2.963e-02, 1.696e-02)); + r += mul(s0_3, M4(-1.782e-02, -1.719e-02, -7.225e-02, 1.626e-01, -7.848e-03, 6.311e-03, 1.660e-01, -9.640e-02, 1.901e-01, 1.536e-01, -8.131e-02, 1.568e-01, 6.787e-02, -2.616e-01, -1.657e-01, -1.217e-01)); + r += mul(s0_4, M4(2.402e-02, -5.180e-02, 3.238e-01, 5.130e-02, -1.186e-01, -3.297e-02, 3.518e-02, -8.813e-02, -1.821e-01, 2.735e-03, -2.272e-02, -1.587e-01, 6.347e-04, -4.115e-01, 8.288e-02, -1.711e-01)); + r += mul(s0_5, M4(-4.711e-02, 5.101e-02, 4.590e-02, 1.843e-02, -4.606e-02, -5.505e-02, 1.978e-02, -9.190e-02, -5.407e-02, -6.751e-02, 6.788e-02, -1.819e-01, -3.097e-02, 5.234e-02, 2.505e-02, -1.620e-02)); + r += mul(s0_6, M4(-8.327e-02, 3.394e-02, -2.762e-02, -1.267e-02, -3.118e-02, -4.203e-02, -1.915e-02, -2.694e-02, 4.255e-02, 9.391e-02, -8.181e-02, 8.793e-02, -2.567e-02, -1.241e-01, -3.868e-02, 4.641e-03)); + r += mul(s0_7, M4(-1.265e-01, -1.142e-02, 3.078e-02, -5.078e-02, -4.318e-02, 2.410e-02, -4.860e-02, -1.372e-01, 8.228e-03, 5.283e-02, -1.799e-02, -1.085e-01, -1.308e-02, -1.410e-01, 4.549e-03, -7.541e-02)); + r += mul(s0_8, M4(-1.404e-01, -1.130e-01, 1.942e-02, -8.287e-02, 2.416e-02, -1.117e-03, 3.450e-02, -1.568e-01, -2.204e-02, -3.736e-02, -2.581e-02, -6.577e-02, 2.257e-02, -4.211e-02, -9.675e-03, 7.848e-03)); + r += mul(s1_0, M4(-1.753e-02, -1.061e-01, -1.941e-02, -3.498e-02, 3.414e-03, -2.544e-03, 7.220e-02, -5.898e-02, -5.010e-02, -7.170e-02, -4.184e-02, 1.056e-01, -3.951e-02, 7.392e-02, 3.164e-02, 6.455e-03)); + r += mul(s1_1, M4(2.132e-02, -5.912e-02, 2.541e-02, -2.657e-02, -2.691e-02, 3.691e-02, 1.310e-01, -9.893e-03, 9.223e-02, -1.409e-02, -1.525e-04, 2.587e-02, -2.021e-02, 4.569e-02, -3.204e-03, -1.490e-02)); + r += mul(s1_2, M4(1.962e-02, 2.759e-03, -7.012e-03, -8.703e-02, 2.837e-02, 3.851e-02, -6.665e-02, 6.604e-03, -5.036e-02, -1.401e-01, 4.164e-02, -8.759e-03, 8.031e-02, -5.369e-03, 5.276e-02, 3.227e-02)); + r += mul(s1_3, M4(1.417e-01, -1.105e-01, -9.827e-03, 1.260e-01, 5.429e-02, -2.316e-02, 1.919e-01, -6.179e-02, -1.329e-02, -2.445e-02, -8.840e-02, -1.849e-02, 1.295e-02, -2.922e-03, 1.952e-02, 1.974e-02)); + r += mul(s1_4, M4(1.472e-01, 6.149e-02, 3.121e-01, 9.670e-02, -2.147e-01, 1.758e-01, 3.195e-02, -2.337e-01, -1.342e-01, -1.795e-01, -1.035e-01, -1.530e-01, -8.237e-02, -2.517e-02, 1.130e-01, 4.597e-03)); + r += mul(s1_5, M4(1.694e-01, -3.804e-02, 1.006e-02, 9.050e-03, 5.036e-03, -1.017e-01, -1.326e-02, -6.292e-02, -4.628e-02, 3.079e-02, 1.096e-01, -4.227e-02, 4.572e-02, -3.040e-03, 3.162e-03, -1.087e-01)); + r += mul(s1_6, M4(8.446e-02, -7.063e-02, -5.259e-02, -4.093e-02, -2.239e-04, -6.035e-02, 3.502e-02, -4.368e-02, 5.139e-02, -2.759e-03, -5.726e-03, -4.317e-02, -5.761e-02, -1.266e-01, 1.389e-02, -4.611e-02)); + r += mul(s1_7, M4(-3.608e-03, 4.023e-02, 1.596e-01, 4.010e-02, -1.215e-01, 1.654e-01, 2.359e-02, -2.235e-02, 1.056e-02, -1.449e-02, 5.011e-02, -4.273e-02, -4.232e-02, 4.341e-02, 1.237e-02, -3.922e-02)); + r += mul(s1_8, M4(3.516e-02, -2.109e-01, 3.301e-02, -3.649e-02, 6.930e-02, -2.073e-02, -1.740e-02, -8.705e-02, -9.455e-02, -3.697e-02, -2.815e-03, -8.541e-02, 3.530e-02, -2.403e-02, -8.748e-03, 1.030e-02)); + r += mul(s2_0, M4(2.361e-03, -3.106e-02, -2.550e-02, -6.701e-03, 4.618e-02, -1.025e-02, -4.397e-02, -3.041e-02, -4.155e-02, 1.014e-01, 1.181e-02, -8.906e-02, 1.174e-02, -3.898e-02, -4.353e-02, 3.231e-03)); + r += mul(s2_1, M4(7.291e-02, -8.222e-03, 2.432e-02, -9.432e-02, -6.975e-03, 1.698e-02, 6.933e-02, -6.820e-02, 1.979e-03, 9.689e-02, -6.392e-02, 6.149e-02, 9.482e-02, -1.486e-02, 1.717e-02, -6.117e-02)); + r += mul(s2_2, M4(1.253e-02, 9.987e-02, 1.337e-02, -2.675e-02, 8.067e-02, -1.307e-02, -7.607e-02, 3.150e-02, 5.124e-02, -9.682e-02, 3.614e-02, 6.068e-02, -1.912e-02, 9.636e-02, 1.016e-01, -6.173e-02)); + r += mul(s2_3, M4(3.080e-02, 8.748e-03, 4.271e-02, 1.727e-02, 1.344e-01, 3.503e-03, 2.901e-02, 5.350e-02, 1.002e-01, -6.725e-02, 5.393e-02, 8.506e-02, 6.896e-02, -4.581e-03, -2.349e-02, 5.707e-02)); + r += mul(s2_4, M4(8.581e-03, 3.780e-02, -3.451e-02, 7.881e-03, 3.311e-02, -4.575e-03, -1.255e-01, -1.687e-01, 6.803e-03, 1.502e-01, 1.383e-01, 1.094e-01, -3.518e-02, 1.349e-01, -9.125e-02, 4.415e-02)); + r += mul(s2_5, M4(-3.615e-02, -1.587e-01, 2.283e-02, 4.116e-02, 3.003e-02, -3.900e-03, 5.371e-02, 1.825e-02, 1.142e-02, 3.305e-02, -2.310e-02, 1.300e-02, -1.215e-01, -3.361e-02, 1.091e-01, -1.498e-02)); + r += mul(s2_6, M4(-5.844e-02, -2.646e-02, 1.989e-03, -1.116e-01, -6.720e-03, -3.411e-02, 6.304e-02, -3.175e-02, 7.671e-02, -8.020e-02, -3.955e-02, -3.694e-02, 3.225e-02, -7.695e-03, -4.875e-02, 3.363e-02)); + r += mul(s2_7, M4(4.558e-02, -8.029e-02, -6.802e-02, -6.150e-03, -2.910e-02, 6.603e-02, -6.762e-02, 1.363e-02, -2.614e-02, 1.409e-01, 1.719e-01, 5.272e-02, 3.533e-02, 2.433e-02, 5.292e-04, -1.978e-02)); + r += mul(s2_8, M4(-4.681e-02, -2.284e-03, 2.970e-02, 1.770e-02, -6.477e-02, -2.109e-02, -1.928e-02, 3.776e-02, -5.004e-02, -6.337e-02, 1.841e-02, -7.409e-02, 2.241e-02, -1.198e-02, -3.168e-02, 4.554e-02)); + r += mul(s3_0, M4(-1.284e-01, -1.481e-02, 2.644e-02, -4.418e-02, -3.164e-02, -1.110e-02, -1.029e-01, -8.182e-02, 7.835e-02, -8.458e-02, -5.275e-03, 5.095e-02, 1.407e-01, 1.449e-03, 2.904e-02, 1.161e-02)); + r += mul(s3_1, M4(9.676e-02, -5.340e-03, 5.016e-03, -3.486e-02, -2.752e-02, 4.908e-02, 9.737e-02, 1.105e-01, -1.245e-02, 9.282e-02, -3.309e-02, -4.545e-03, -1.213e-01, 3.467e-01, 1.786e-01, 1.421e-02)); + r += mul(s3_2, M4(-4.647e-03, -1.704e-02, 1.311e-02, 6.546e-02, -1.045e-01, -9.303e-02, 2.685e-02, -2.411e-01, 1.881e-02, -4.017e-02, -3.385e-02, 7.156e-02, -3.419e-02, 3.292e-02, -1.625e-01, -4.494e-02)); + r += mul(s3_3, M4(-7.864e-02, 2.998e-02, 2.436e-01, 4.854e-02, -1.240e-01, 1.856e-01, -2.961e-02, 1.470e-01, 4.504e-02, 3.820e-02, -5.622e-03, 4.088e-02, -2.922e-02, -1.591e-01, 2.946e-02, 6.375e-02)); + r += mul(s3_4, M4(8.610e-03, 2.178e-01, -1.730e-02, 1.876e-01, 1.427e-01, -1.139e-01, -1.017e-01, -1.174e-01, -6.733e-02, 6.476e-02, 2.005e-01, -8.975e-02, -3.286e-01, 4.354e-02, -6.958e-02, 2.841e-01)); + r += mul(s3_5, M4(-7.768e-02, -7.174e-02, 1.144e-02, 2.104e-04, -4.001e-02, -6.212e-02, 2.774e-02, -1.382e-01, -2.531e-02, -4.546e-02, -2.461e-02, 4.117e-02, -1.225e-01, -2.037e-01, 1.509e-01, -1.360e-01)); + r += mul(s3_6, M4(-9.252e-02, -1.613e-01, 1.242e-01, -1.272e-02, 4.808e-02, 8.239e-02, 2.337e-02, 1.347e-01, 8.926e-02, 3.812e-03, -1.770e-02, 3.758e-03, -1.071e-01, -1.821e-02, -7.877e-02, 2.461e-02)); + r += mul(s3_7, M4(-1.637e-02, -2.741e-02, -5.856e-02, -8.586e-02, 1.410e-01, -1.654e-01, -1.053e-01, 1.232e-02, 5.715e-02, 4.892e-02, 4.200e-03, -8.125e-02, 5.374e-03, 1.650e-02, -7.558e-02, -2.371e-02)); + r += mul(s3_8, M4(-7.801e-03, -4.560e-02, 2.301e-02, 1.503e-02, -5.623e-02, -1.960e-02, 5.290e-03, -5.354e-02, 1.856e-03, 7.829e-02, -1.709e-02, -8.791e-02, -5.414e-02, -1.351e-01, 1.224e-02, 1.740e-01)); + r += mul(s4_0, M4(1.194e-01, -2.520e-02, -1.475e-02, 7.062e-02, -4.557e-02, 5.231e-03, 1.203e-01, -8.481e-02, -1.178e-01, -1.132e-04, 1.303e-01, 1.029e-01, 1.042e-01, 6.363e-02, -1.091e-02, -3.500e-02)); + r += mul(s4_1, M4(7.939e-02, 5.106e-02, 1.646e-02, 1.241e-02, -1.149e-01, 9.415e-03, -6.784e-02, -8.083e-02, -9.802e-02, 8.836e-02, 1.063e-01, 1.512e-03, -2.318e-02, 1.173e-01, 2.231e-02, -6.367e-02)); + r += mul(s4_2, M4(6.719e-02, -5.960e-02, -1.792e-02, -9.391e-02, -2.777e-02, 4.057e-03, 9.521e-02, -3.255e-02, -5.838e-02, 1.602e-01, 5.509e-02, -3.045e-02, -3.952e-02, -5.492e-02, 1.377e-02, -5.226e-02)); + r += mul(s4_3, M4(-1.063e-01, -1.305e-01, -1.833e-01, -1.631e-02, 6.600e-02, -3.358e-02, 1.500e-01, -7.903e-02, 4.510e-02, 8.916e-02, 1.552e-01, 1.120e-01, 2.318e-01, -5.822e-02, -6.089e-02, 9.544e-02)); + r += mul(s4_4, M4(-1.171e-01, -9.434e-02, 9.676e-02, -1.079e-01, 1.392e-01, -8.776e-02, 7.483e-02, 1.604e-01, -6.664e-02, 3.942e-02, 1.006e-02, -1.421e-02, 6.416e-03, -3.308e-02, -3.635e-02, 5.277e-02)); + r += mul(s4_5, M4(-7.795e-02, -1.596e-01, 5.188e-02, -2.112e-02, 2.212e-02, -2.934e-02, -4.072e-02, 3.688e-02, -8.796e-02, 4.554e-02, 4.613e-02, 2.220e-02, 3.293e-02, -2.018e-02, 6.763e-02, -4.271e-02)); + r += mul(s4_6, M4(5.885e-02, -2.973e-02, -2.401e-02, -3.537e-02, -3.940e-02, -8.817e-02, 2.230e-02, -6.296e-02, -1.419e-01, 1.132e-01, 1.068e-01, 3.630e-02, 4.039e-02, -4.258e-02, -1.289e-02, -7.648e-03)); + r += mul(s4_7, M4(-1.078e-01, -3.099e-02, 1.007e-01, 5.882e-02, -8.772e-03, -4.877e-02, 2.467e-02, -5.927e-02, -6.206e-02, 1.329e-01, 6.958e-02, -3.148e-02, 1.597e-01, 5.097e-02, 2.388e-02, 8.372e-02)); + r += mul(s4_8, M4(-7.924e-02, -8.737e-02, -7.882e-02, 5.712e-02, -6.122e-02, 7.157e-02, -2.899e-02, 8.360e-03, 9.493e-03, 1.692e-01, -1.013e-02, 2.343e-02, -5.570e-02, -8.550e-02, 2.770e-02, -9.453e-02)); + r += mul(s5_0, M4(8.819e-03, 2.501e-02, -3.208e-03, -4.834e-03, 1.048e-02, -1.934e-02, -1.325e-02, -1.168e-01, 2.640e-02, 1.421e-03, -1.342e-03, 2.189e-02, 4.175e-02, -2.015e-02, -4.324e-02, -4.105e-02)); + r += mul(s5_1, M4(-1.479e-01, -1.196e-02, 1.970e-01, -1.033e-01, -1.251e-01, -8.718e-02, -9.094e-02, 5.674e-03, 3.394e-02, -2.784e-02, -1.616e-02, 3.621e-03, -5.221e-02, -2.673e-02, 8.328e-03, 9.240e-02)); + r += mul(s5_2, M4(1.195e-02, 2.268e-02, -6.757e-02, -9.740e-02, -6.331e-02, -1.480e-01, -1.055e-03, 6.490e-02, 1.953e-02, -3.462e-03, 1.991e-02, 8.000e-02, -5.332e-02, -8.138e-02, 5.713e-02, -6.742e-02)); + r += mul(s5_3, M4(5.748e-02, 4.795e-02, -7.095e-02, 5.452e-02, 1.210e-01, 1.292e-01, -6.480e-02, -1.008e-01, 1.286e-01, -5.994e-02, -1.638e-02, 3.202e-02, -6.433e-02, 2.687e-02, -6.258e-02, -1.186e-01)); + r += mul(s5_4, M4(-1.261e-01, 1.389e-01, 1.851e-01, 9.956e-02, 2.162e-01, 5.637e-02, -8.870e-02, 1.544e-01, -5.564e-02, -3.457e-02, -9.623e-02, 3.453e-02, 4.398e-02, -7.065e-02, -1.803e-01, 1.055e-01)); + r += mul(s5_5, M4(4.411e-02, -5.767e-02, -2.048e-02, -6.275e-02, 3.149e-02, 6.071e-02, -7.995e-02, -3.809e-03, -4.299e-02, -2.634e-02, -1.301e-02, -2.465e-02, 4.801e-02, -5.315e-02, -7.519e-02, -1.131e-01)); + r += mul(s5_6, M4(5.184e-02, -2.393e-02, 2.978e-02, 7.869e-03, 4.903e-02, -5.779e-03, -7.840e-02, -6.297e-02, -3.251e-03, -3.650e-02, -7.022e-02, -9.058e-02, 2.345e-01, -4.798e-02, -6.965e-02, -1.047e-02)); + r += mul(s5_7, M4(-1.676e-01, 9.611e-02, 8.009e-02, 6.452e-02, -1.739e-02, 8.875e-02, -9.899e-03, 2.454e-02, -5.297e-02, -8.019e-02, -1.924e-02, -2.413e-02, 5.407e-02, -1.561e-01, -6.469e-03, -2.123e-01)); + r += mul(s5_8, M4(-1.075e-01, -1.276e-01, -4.091e-02, -2.845e-02, -5.205e-02, 6.212e-02, -4.472e-02, 5.580e-02, 1.958e-02, 6.661e-04, 3.652e-03, -3.716e-02, 4.124e-02, 7.924e-02, -1.599e-03, -1.593e-02)); + r += mul(s6_0, M4(-6.587e-02, 3.429e-02, 9.400e-02, -1.117e-01, 2.646e-02, 3.866e-02, 2.226e-02, 1.353e-01, -3.900e-02, 4.505e-02, 8.604e-02, 3.070e-02, 2.779e-02, 1.100e-02, -6.750e-02, -2.839e-02)); + r += mul(s6_1, M4(4.552e-03, 9.451e-02, -9.898e-02, -2.833e-02, -1.488e-02, 4.382e-02, -1.861e-03, 1.074e-02, 6.540e-02, -6.682e-02, 5.989e-02, -5.892e-02, 7.685e-02, 5.708e-02, -3.337e-02, 8.336e-02)); + r += mul(s6_2, M4(-2.814e-02, 2.122e-02, 4.532e-02, 5.069e-02, 3.021e-02, -7.122e-03, -8.282e-02, 1.747e-02, 9.267e-02, 3.989e-02, 1.376e-02, -3.627e-02, 1.038e-02, -1.479e-02, -2.792e-02, 2.023e-02)); + r += mul(s6_3, M4(-4.933e-02, -7.599e-02, 4.385e-02, 1.319e-02, -1.735e-02, -6.550e-02, 1.971e-01, -1.535e-02, -3.634e-02, -9.941e-03, -1.027e-01, 1.121e-01, 2.485e-02, -7.526e-02, -3.840e-02, -1.220e-01)); + r += mul(s6_4, M4(-4.438e-03, 1.176e-01, -3.435e-02, -9.323e-03, 9.122e-03, -7.171e-02, 2.342e-02, -1.868e-02, -3.795e-02, 4.210e-02, 2.311e-02, -1.366e-01, 1.071e-01, 1.121e-01, 7.830e-02, -1.276e-01)); + r += mul(s6_5, M4(4.036e-02, -1.007e-01, 9.589e-03, -4.900e-02, 1.301e-02, -6.537e-02, 9.083e-04, -1.008e-02, 4.379e-02, -7.682e-02, -4.417e-02, 1.213e-01, 2.464e-02, -9.770e-02, 2.136e-02, -2.157e-02)); + r += mul(s6_6, M4(1.200e-02, 2.753e-02, -1.293e-01, -4.456e-02, 7.210e-02, 8.744e-02, 1.314e-01, 5.778e-02, -1.460e-03, 6.104e-02, 5.880e-02, 5.512e-02, 1.923e-02, -5.177e-02, -9.954e-02, -1.065e-01)); + r += mul(s6_7, M4(3.210e-02, 1.409e-02, -4.716e-02, 6.395e-02, -1.541e-01, 1.459e-02, -3.552e-02, 1.028e-02, -6.029e-02, 2.254e-02, -4.864e-02, -1.185e-01, 5.626e-02, -3.494e-02, -1.985e-02, -1.366e-01)); + r += mul(s6_8, M4(-3.078e-02, -1.036e-01, -1.301e-02, 7.125e-03, -4.409e-02, 7.586e-03, -2.456e-02, -1.396e-02, 4.438e-02, -6.712e-03, 6.092e-03, 1.195e-01, 2.095e-02, 3.639e-02, 2.807e-02, 7.034e-03)); + r += mul(s7_0, M4(-1.064e-01, -2.040e-02, 5.966e-02, -1.144e-01, -2.941e-02, 2.149e-02, 1.126e-01, 6.964e-02, 8.057e-03, -7.718e-02, 1.058e-01, 9.676e-03, -1.611e-01, 9.774e-02, -3.619e-02, -2.273e-02)); + r += mul(s7_1, M4(1.222e-01, 7.370e-02, 6.350e-02, -2.778e-02, -5.785e-02, 1.356e-01, -3.859e-03, 1.072e-01, -1.608e-01, 5.626e-02, 4.248e-02, -1.326e-02, 1.310e-01, 3.131e-02, -7.499e-02, 1.239e-01)); + r += mul(s7_2, M4(-1.147e-01, 6.275e-02, 6.749e-02, 2.058e-03, 5.421e-02, 5.281e-03, -4.117e-02, -9.046e-03, 1.216e-02, -1.044e-01, -7.101e-02, 1.551e-01, -4.748e-02, -1.559e-02, 1.106e-01, -8.485e-02)); + r += mul(s7_3, M4(-1.428e-01, 2.568e-02, 4.139e-03, -1.026e-01, 1.045e-01, 9.863e-02, 2.783e-01, 1.663e-01, 2.259e-02, 1.498e-02, -1.033e-01, 2.928e-02, -1.649e-01, 9.702e-03, -2.491e-02, 1.559e-02)); + r += mul(s7_4, M4(4.773e-02, -2.796e-01, -1.417e-01, 3.005e-02, 4.236e-02, -1.432e-01, -4.318e-02, 2.015e-01, 2.713e-03, 7.427e-02, 1.661e-02, -1.379e-02, 8.511e-02, 1.124e-01, 1.307e-01, -4.609e-02)); + r += mul(s7_5, M4(-1.353e-01, -5.591e-02, 1.868e-02, 1.359e-02, -3.074e-03, -8.917e-02, 5.121e-02, -1.265e-02, 2.939e-02, -7.794e-02, 4.117e-02, -8.684e-02, 4.066e-02, 1.425e-02, 4.589e-02, 7.110e-02)); + r += mul(s7_6, M4(7.543e-02, 5.722e-02, -5.474e-02, 9.440e-02, 1.591e-02, 8.331e-04, 1.057e-01, 1.237e-01, 4.280e-02, 7.995e-02, 1.499e-02, 5.792e-03, -1.117e-01, 6.059e-02, 1.653e-02, 5.193e-03)); + r += mul(s7_7, M4(8.459e-02, -9.582e-02, -4.502e-02, -5.642e-02, 4.828e-02, 5.482e-02, 9.157e-02, 1.656e-01, 9.634e-03, 6.346e-02, 2.196e-02, -6.270e-02, 5.168e-02, -7.061e-03, -4.080e-03, -1.748e-01)); + r += mul(s7_8, M4(-1.326e-02, 3.705e-02, 1.875e-02, -9.729e-03, 7.288e-02, 5.390e-02, -8.273e-03, -1.523e-02, 1.830e-03, 2.142e-02, 4.581e-02, 1.018e-01, 2.352e-02, 4.956e-02, 1.290e-02, -6.372e-02)); + r += V4(-1.451e-02, 4.205e-03, -1.874e-02, 5.987e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.344e-03, -9.805e-03, -2.839e-02, -3.089e-02, 1.048e-01, 2.998e-02, 4.555e-02, 7.427e-03, -8.033e-02, 2.833e-02, 6.935e-02, -9.568e-02, -6.328e-02, 2.050e-02, -5.968e-02, 2.476e-02)); + r += mul(s0_1, M4(2.240e-02, -5.632e-02, -4.306e-03, -1.022e-01, -1.093e-01, 4.465e-02, -9.950e-03, 2.156e-02, -2.778e-02, 3.831e-02, 1.898e-02, -2.200e-02, 1.370e-02, 2.085e-01, 4.253e-02, 5.573e-02)); + r += mul(s0_2, M4(9.375e-02, -2.336e-02, 8.234e-02, 4.651e-02, 6.124e-03, -1.309e-02, 9.795e-02, 1.715e-02, -6.332e-02, -8.201e-02, 8.917e-02, -2.506e-02, -4.990e-02, 1.082e-01, 5.102e-02, -9.560e-02)); + r += mul(s0_3, M4(-7.771e-02, -2.765e-02, 1.889e-02, -3.040e-02, -8.316e-03, -1.270e-02, -4.994e-02, 4.334e-02, -7.231e-02, 4.457e-02, 1.622e-01, -4.778e-02, 3.031e-02, 1.168e-01, -1.184e-02, -1.127e-02)); + r += mul(s0_4, M4(2.607e-02, -5.881e-02, 1.845e-01, -2.438e-01, -7.663e-02, -7.072e-02, -1.595e-01, 4.675e-02, -1.333e-01, -1.456e-01, -7.031e-04, 1.973e-03, -1.121e-01, -3.679e-02, 1.236e-02, -1.194e-01)); + r += mul(s0_5, M4(2.403e-02, -8.932e-02, 1.899e-02, 3.188e-02, 2.199e-03, -9.581e-02, 1.759e-02, 5.322e-02, 3.552e-02, -1.322e-01, 1.539e-01, 6.338e-03, -4.610e-02, 1.254e-01, -4.811e-02, 7.377e-03)); + r += mul(s0_6, M4(1.081e-01, -1.338e-02, 5.300e-02, -1.344e-02, 3.532e-02, 6.458e-02, 5.362e-02, -4.402e-02, -9.941e-02, -3.865e-02, -5.581e-03, -3.485e-02, -7.281e-02, 7.453e-02, -5.049e-02, -1.314e-02)); + r += mul(s0_7, M4(1.238e-01, -2.739e-02, 1.134e-01, -3.758e-02, 1.081e-01, 1.110e-01, -7.975e-02, -1.989e-02, 2.296e-03, -2.101e-01, -1.088e-02, 8.849e-02, 7.111e-02, 1.474e-01, 6.939e-02, -4.090e-02)); + r += mul(s0_8, M4(3.295e-02, -9.573e-02, -4.783e-02, -6.984e-02, 8.421e-02, -1.479e-02, -4.707e-02, 4.174e-02, 1.252e-02, -1.382e-01, 1.412e-03, 1.788e-02, -5.869e-02, -1.024e-01, 3.305e-02, 2.486e-01)); + r += mul(s1_0, M4(5.386e-02, -2.332e-04, -6.319e-02, -6.575e-02, 9.518e-02, 2.111e-02, 4.273e-02, 2.367e-03, 3.327e-03, -1.247e-02, 1.171e-01, -6.264e-04, -8.330e-02, 3.086e-02, -2.737e-02, 1.056e-01)); + r += mul(s1_1, M4(5.131e-02, -9.459e-02, 1.391e-02, -8.433e-02, -2.116e-02, 3.004e-02, -4.266e-02, -4.011e-02, 1.038e-01, 5.195e-02, 2.186e-02, 4.653e-03, 6.983e-02, 1.010e-02, -1.459e-01, 1.511e-01)); + r += mul(s1_2, M4(1.060e-01, -2.618e-02, 6.360e-02, 1.022e-01, 8.176e-03, 2.611e-02, 1.113e-01, 3.624e-02, -9.619e-03, 8.484e-03, 9.125e-02, 4.374e-02, -3.693e-02, 7.485e-04, 1.215e-02, -3.590e-02)); + r += mul(s1_3, M4(-2.750e-02, -4.485e-02, 9.890e-02, -6.311e-02, 1.524e-02, -6.605e-02, -1.650e-02, 5.478e-02, 3.148e-02, -8.228e-02, 1.109e-01, -7.090e-02, -1.741e-02, -4.447e-02, 5.358e-02, 7.489e-02)); + r += mul(s1_4, M4(2.716e-02, 1.156e-01, 6.520e-02, -1.864e-01, -5.167e-02, -2.354e-01, -2.550e-02, 1.401e-01, -3.611e-02, 5.682e-04, -1.600e-01, -1.332e-01, -1.504e-01, 4.322e-02, -1.132e-01, -2.514e-01)); + r += mul(s1_5, M4(1.397e-03, -3.174e-02, -6.599e-02, 1.456e-01, -3.225e-02, -6.391e-02, 1.805e-01, 6.478e-03, 6.731e-03, 4.296e-02, -2.010e-03, -7.334e-02, 2.013e-02, -3.991e-02, 1.716e-01, 1.178e-01)); + r += mul(s1_6, M4(2.465e-02, -1.753e-02, -5.721e-02, -6.484e-02, 6.290e-02, -6.025e-02, 1.484e-01, -5.859e-02, 1.292e-01, 1.726e-02, -2.810e-02, 2.979e-02, -4.972e-03, -2.422e-02, 5.687e-02, 2.980e-02)); + r += mul(s1_7, M4(1.368e-01, 1.486e-01, -3.852e-02, 8.255e-02, -3.771e-02, 1.432e-01, -1.848e-01, -3.078e-02, 3.680e-02, 1.328e-02, -1.199e-01, -6.755e-04, -7.118e-02, 7.359e-02, -1.523e-02, 4.401e-02)); + r += mul(s1_8, M4(2.924e-02, -8.047e-02, -7.385e-02, -1.716e-02, 5.621e-02, -2.854e-02, 6.091e-02, 5.831e-03, 3.138e-02, -6.197e-02, -1.127e-02, 3.365e-02, -6.421e-02, -1.412e-02, -1.351e-02, 1.070e-01)); + r += mul(s2_0, M4(-1.293e-02, -5.838e-02, 6.240e-02, -1.877e-01, 5.550e-02, -9.230e-02, 1.265e-01, 2.416e-02, 1.493e-04, -2.588e-02, -1.854e-01, 2.477e-01, -7.828e-02, -4.300e-02, -1.092e-01, -6.587e-02)); + r += mul(s2_1, M4(8.245e-02, -9.892e-02, 1.759e-01, 3.533e-02, -1.654e-03, 1.972e-02, -2.587e-02, 8.446e-02, -7.160e-03, 5.482e-02, -2.511e-01, -1.212e-01, -7.406e-03, -5.994e-02, 7.520e-02, -3.798e-02)); + r += mul(s2_2, M4(7.256e-03, -1.018e-01, 4.222e-02, 2.550e-03, -1.391e-02, -1.088e-02, -1.922e-02, -6.792e-02, -4.209e-02, -1.057e-02, 1.268e-03, 7.285e-02, -6.543e-02, 3.588e-02, 2.900e-02, -2.981e-02)); + r += mul(s2_3, M4(1.095e-01, -5.158e-02, -6.584e-03, -4.608e-02, 1.458e-02, -4.648e-03, 2.637e-02, 6.995e-03, -2.911e-01, 5.837e-02, 5.528e-02, -1.085e-01, -4.009e-04, 3.748e-03, 7.829e-02, -9.249e-02)); + r += mul(s2_4, M4(1.073e-01, 8.664e-03, -1.294e-01, 1.669e-01, 1.476e-03, -2.677e-02, -9.490e-03, 3.585e-02, -3.018e-01, 2.565e-01, -1.070e-01, -2.619e-01, 1.232e-02, 1.379e-01, 2.695e-02, 9.244e-02)); + r += mul(s2_5, M4(5.018e-02, -7.074e-02, -8.919e-02, 3.096e-02, -1.395e-02, 3.363e-02, -1.519e-02, 4.761e-02, -4.412e-02, -9.688e-03, 1.550e-01, -1.703e-02, 3.976e-02, 5.225e-02, -8.573e-02, 1.902e-02)); + r += mul(s2_6, M4(-2.469e-02, 2.743e-02, -4.801e-02, -3.503e-02, -1.190e-01, -3.653e-03, -6.111e-02, -7.555e-02, 8.907e-02, 3.066e-02, 4.436e-02, 6.102e-02, -1.557e-02, -1.555e-02, -9.236e-04, 1.793e-02)); + r += mul(s2_7, M4(9.239e-02, 2.052e-02, -6.767e-03, -1.165e-01, -1.404e-02, 1.715e-02, -2.119e-02, -2.680e-02, 4.578e-02, 4.789e-02, -2.823e-02, 1.048e-01, 8.417e-02, 5.547e-02, -7.012e-03, -1.722e-02)); + r += mul(s2_8, M4(-4.989e-02, -1.010e-01, 4.738e-02, 1.326e-02, -1.239e-01, -4.745e-02, 1.063e-02, 8.686e-03, 1.224e-02, 1.136e-02, 1.073e-02, 1.489e-01, -2.937e-02, 5.295e-02, -4.437e-02, 2.992e-02)); + r += mul(s3_0, M4(4.208e-02, -3.703e-04, -1.569e-01, -6.008e-02, 7.651e-02, -6.044e-02, 1.212e-01, 3.376e-02, 2.591e-02, 5.298e-03, 4.234e-02, -1.362e-01, -6.371e-02, -3.483e-04, 4.701e-02, -9.569e-02)); + r += mul(s3_1, M4(-1.979e-01, -4.556e-02, 5.086e-02, -1.667e-01, 2.496e-02, 6.713e-03, 1.485e-01, 2.983e-02, 9.088e-02, -4.213e-03, 7.270e-02, 1.251e-02, -1.967e-01, 8.713e-02, 6.949e-02, 6.630e-02)); + r += mul(s3_2, M4(-1.449e-01, -6.095e-02, 5.335e-02, -5.547e-02, 1.107e-01, -1.202e-02, 1.260e-01, 4.970e-04, -1.520e-02, -1.878e-02, 1.131e-02, -2.673e-02, -3.896e-02, -3.462e-02, 1.693e-01, 8.253e-02)); + r += mul(s3_3, M4(-7.279e-02, 1.513e-01, -6.853e-02, -9.554e-02, -4.488e-01, 1.131e-01, -3.079e-01, -9.108e-02, -3.125e-02, -1.483e-01, 1.877e-01, -4.281e-02, 2.693e-01, 5.360e-02, 1.042e-01, -8.684e-04)); + r += mul(s3_4, M4(1.337e-01, 5.500e-02, -1.685e-01, -8.454e-03, -1.487e-01, 6.688e-02, 1.803e-01, 4.025e-02, -2.288e-03, 1.328e-01, -2.330e-02, 3.808e-02, 6.442e-02, 2.379e-01, 1.518e-01, 2.082e-02)); + r += mul(s3_5, M4(-8.197e-02, -4.012e-02, -5.432e-03, -2.579e-02, 5.816e-02, -4.647e-02, -6.499e-02, -9.443e-02, -4.104e-02, -3.033e-02, 3.759e-02, -3.847e-02, 4.894e-02, 3.089e-01, 5.030e-02, 6.165e-02)); + r += mul(s3_6, M4(3.228e-03, 3.505e-02, -8.164e-02, -2.838e-02, -8.041e-02, 2.948e-02, -2.364e-02, -1.109e-01, 1.239e-02, 1.975e-02, 1.805e-02, 7.607e-02, -8.639e-02, 6.563e-02, -8.454e-02, -3.153e-03)); + r += mul(s3_7, M4(-4.569e-02, -2.611e-02, 4.161e-02, -7.171e-03, -1.369e-02, -2.254e-01, 7.921e-02, -1.140e-01, 1.063e-01, 4.064e-02, 6.557e-02, -6.192e-02, 2.136e-02, -8.469e-03, 2.204e-01, -6.957e-02)); + r += mul(s3_8, M4(-1.402e-01, -2.294e-02, 1.230e-02, 5.118e-02, -1.238e-01, -1.030e-01, 5.372e-02, -1.356e-02, 2.047e-02, -2.680e-02, -2.303e-02, 2.664e-02, 6.519e-02, 1.141e-01, -4.940e-02, -1.001e-01)); + r += mul(s4_0, M4(1.462e-01, 1.360e-02, -1.126e-03, -6.671e-02, 1.432e-01, 4.517e-03, 4.074e-02, 4.777e-02, -4.704e-02, 1.196e-01, -3.582e-02, 6.838e-02, 4.898e-02, 1.875e-02, -9.463e-03, -3.076e-02)); + r += mul(s4_1, M4(-2.508e-02, 2.834e-02, 1.278e-01, 9.014e-02, 4.667e-02, -5.542e-02, -2.249e-02, 3.323e-02, -2.201e-02, 1.178e-01, -7.320e-02, 4.906e-02, 1.513e-02, -1.928e-02, -2.721e-02, 5.136e-02)); + r += mul(s4_2, M4(1.140e-01, 4.113e-02, -8.168e-02, -3.008e-02, -3.237e-02, 1.575e-02, -1.213e-01, 8.478e-02, -1.921e-02, 1.392e-01, 5.893e-03, 1.796e-01, 2.867e-02, -1.158e-02, 5.610e-02, 1.319e-02)); + r += mul(s4_3, M4(1.879e-02, 4.729e-03, 1.568e-02, 7.471e-02, 4.814e-02, -6.075e-03, -4.665e-03, 5.821e-02, 7.452e-02, 1.179e-01, 1.133e-02, 1.727e-01, -8.217e-02, 5.060e-02, 3.670e-02, -9.247e-02)); + r += mul(s4_4, M4(6.469e-02, 1.039e-01, -8.760e-02, -1.319e-01, -7.083e-02, 7.944e-02, -4.220e-02, -2.130e-01, 1.128e-01, 5.147e-02, -5.449e-02, 2.036e-01, -5.375e-02, 6.902e-02, 5.372e-02, 2.733e-03)); + r += mul(s4_5, M4(-1.676e-01, 9.467e-03, -5.490e-02, -1.960e-01, -1.715e-01, 1.034e-01, -9.772e-02, 8.503e-03, 9.439e-02, 1.480e-01, -5.971e-02, 1.124e-01, 2.235e-02, -3.263e-02, 5.998e-02, 8.289e-02)); + r += mul(s4_6, M4(-1.038e-01, -8.226e-02, -1.567e-01, 2.170e-02, 1.291e-01, 8.024e-02, 1.948e-03, 4.178e-02, 2.871e-03, 1.282e-01, 2.812e-02, 1.055e-01, 1.089e-01, 1.132e-02, 2.708e-02, -2.635e-02)); + r += mul(s4_7, M4(-8.934e-02, -4.735e-02, -5.800e-02, -5.941e-03, -4.215e-02, 1.354e-01, 4.278e-02, -6.081e-02, -2.853e-02, 2.969e-02, -8.708e-02, 1.214e-01, 5.987e-02, 8.078e-02, 2.372e-02, -5.396e-03)); + r += mul(s4_8, M4(-4.907e-02, -1.079e-02, 1.005e-01, -6.986e-02, -5.939e-02, 1.774e-01, -1.692e-01, -1.039e-02, -4.109e-02, 9.287e-02, -6.562e-02, 9.642e-02, 6.208e-02, 1.334e-02, -5.994e-02, -1.001e-02)); + r += mul(s5_0, M4(8.164e-02, 4.011e-02, 6.939e-03, 8.032e-03, 3.212e-02, -9.251e-02, 1.170e-02, -2.023e-02, -3.300e-02, -1.767e-02, -4.043e-03, -5.685e-02, 8.116e-02, -5.937e-02, -6.877e-02, 2.343e-02)); + r += mul(s5_1, M4(-1.003e-01, -5.262e-02, 9.624e-02, 8.277e-02, -8.407e-03, -1.362e-01, -2.456e-02, -2.344e-02, -4.793e-02, -4.068e-02, 1.046e-02, -1.440e-01, 6.938e-03, 5.128e-02, 1.006e-01, 9.821e-02)); + r += mul(s5_2, M4(5.356e-02, 2.052e-02, -4.819e-02, 6.780e-03, -1.889e-02, -3.426e-02, 6.200e-02, 1.969e-02, 1.825e-02, 2.155e-02, 1.092e-01, -4.080e-02, 1.203e-01, 4.247e-02, -1.231e-02, -1.020e-01)); + r += mul(s5_3, M4(-5.032e-02, 4.816e-02, 7.752e-02, -2.300e-02, -1.198e-01, -1.443e-01, 2.161e-02, 5.853e-02, 2.987e-02, -4.013e-02, -2.463e-02, -5.912e-02, -1.189e-01, 6.843e-02, -3.794e-01, 4.953e-02)); + r += mul(s5_4, M4(2.933e-02, 9.212e-02, -2.216e-01, 3.046e-02, -6.752e-02, -1.066e-01, -5.734e-02, 1.999e-02, 4.349e-02, -9.789e-02, 5.922e-02, 1.075e-02, 3.302e-02, -4.449e-03, 7.140e-02, 8.827e-02)); + r += mul(s5_5, M4(2.336e-02, 5.605e-02, -1.168e-01, -2.738e-02, -1.068e-01, -1.342e-01, 7.226e-03, 5.034e-02, 6.440e-02, -2.983e-02, -3.400e-02, -4.761e-02, 6.747e-02, -1.067e-01, 7.144e-02, 1.726e-02)); + r += mul(s5_6, M4(-1.062e-02, -6.428e-02, -7.855e-02, -3.672e-02, 3.085e-03, -4.312e-02, -1.639e-02, -6.597e-02, -4.954e-03, -6.910e-02, 7.293e-03, -7.641e-02, -7.643e-02, 1.781e-01, -1.830e-01, 4.187e-02)); + r += mul(s5_7, M4(-7.771e-03, 6.857e-02, 1.087e-02, -8.714e-02, -1.724e-01, -5.215e-02, -8.151e-02, -8.315e-02, 1.904e-02, -3.969e-02, -2.525e-03, -9.843e-02, 9.320e-02, -6.709e-03, -1.866e-01, 1.070e-01)); + r += mul(s5_8, M4(3.333e-02, 9.300e-04, 7.545e-03, 3.959e-02, -2.973e-02, -5.905e-02, 2.414e-02, -7.486e-02, 3.673e-02, -3.521e-02, -2.630e-02, -1.747e-02, -1.480e-01, -2.285e-02, -3.720e-02, 9.578e-02)); + r += mul(s6_0, M4(-7.760e-02, 7.478e-02, -1.705e-01, -6.227e-02, -9.431e-02, 8.710e-02, 5.079e-02, -3.262e-03, 1.351e-02, 8.079e-02, -1.082e-01, 1.096e-02, -2.534e-02, -1.407e-02, -4.151e-02, 2.357e-03)); + r += mul(s6_1, M4(-6.798e-02, -6.315e-02, 5.991e-03, 8.121e-02, -8.420e-02, -2.555e-02, 5.151e-04, 1.331e-02, 4.140e-02, -4.308e-02, 1.862e-01, -7.537e-02, -1.481e-02, 1.059e-02, 1.700e-04, 5.745e-03)); + r += mul(s6_2, M4(-6.967e-02, 6.080e-02, 2.605e-02, -3.930e-02, -4.282e-02, -4.539e-02, 3.064e-02, 2.046e-02, -1.580e-02, -4.079e-02, -2.028e-02, -4.854e-02, -5.895e-02, -3.993e-02, -2.290e-03, -6.917e-02)); + r += mul(s6_3, M4(-1.466e-01, -4.031e-03, 3.942e-02, -1.060e-01, 4.836e-02, -6.690e-02, 1.602e-01, -1.370e-01, -1.037e-01, -2.140e-02, -2.851e-02, -6.891e-02, 8.217e-03, -6.676e-02, 8.412e-03, -9.480e-02)); + r += mul(s6_4, M4(9.009e-02, -4.851e-02, -2.332e-02, 1.002e-01, 6.230e-02, -1.825e-01, 3.208e-02, 1.192e-01, 6.470e-02, 3.821e-02, -2.345e-01, 4.967e-02, 2.437e-03, 1.159e-01, -1.056e-02, 2.428e-02)); + r += mul(s6_5, M4(-1.050e-01, 1.916e-02, 8.092e-02, 1.318e-02, 5.276e-02, 7.569e-03, -4.164e-02, -7.187e-02, -1.271e-01, 6.620e-02, 4.655e-02, -1.058e-01, 3.835e-03, 1.104e-02, -7.979e-02, -3.507e-02)); + r += mul(s6_6, M4(-2.637e-02, 1.293e-02, -5.877e-02, 2.606e-02, 3.808e-02, -1.190e-01, 1.087e-01, 6.010e-02, -1.083e-01, 2.846e-03, 6.642e-02, 1.077e-02, -6.618e-02, -1.059e-01, 4.072e-02, 9.681e-02)); + r += mul(s6_7, M4(-2.846e-01, -4.732e-02, -1.686e-01, 2.317e-02, -2.674e-02, -2.176e-01, 2.488e-04, -7.701e-02, 4.202e-02, -8.379e-03, -2.075e-01, -1.442e-01, 6.196e-02, -3.720e-02, 2.136e-02, -6.112e-02)); + r += mul(s6_8, M4(1.342e-02, 5.232e-03, -4.976e-02, 1.687e-02, -3.436e-02, -5.854e-02, 5.797e-02, 2.055e-04, -3.105e-02, 3.903e-03, 4.700e-02, 7.226e-02, 7.196e-02, 1.413e-02, 4.264e-02, 3.840e-02)); + r += mul(s7_0, M4(2.040e-02, 1.497e-02, 2.256e-02, 1.460e-02, -9.725e-02, 6.526e-02, -2.696e-02, 2.454e-02, 1.266e-01, -3.398e-02, 6.082e-02, 5.908e-03, -1.409e-01, 3.909e-03, -7.887e-02, 6.916e-02)); + r += mul(s7_1, M4(8.005e-02, 6.767e-02, -1.255e-01, 1.218e-01, -1.372e-01, 2.646e-02, 5.793e-02, 1.054e-02, -1.809e-01, 1.230e-04, -1.754e-01, -2.163e-01, 3.360e-02, -2.203e-02, 1.462e-01, -1.898e-02)); + r += mul(s7_2, M4(7.027e-02, 4.845e-02, 1.684e-01, -8.964e-02, -3.446e-02, -3.309e-02, 8.349e-02, 5.183e-02, -1.477e-01, 1.125e-01, -1.548e-01, -2.234e-03, -4.121e-02, 1.182e-02, 3.854e-02, -3.380e-02)); + r += mul(s7_3, M4(-5.837e-03, 3.345e-02, -7.857e-02, 1.266e-01, -8.752e-02, 1.598e-01, -1.238e-01, 3.737e-02, -4.614e-02, 6.373e-02, -1.753e-01, -9.201e-02, -9.938e-02, 1.970e-02, -7.991e-02, -1.997e-01)); + r += mul(s7_4, M4(-2.545e-01, 8.458e-02, 1.423e-01, 3.587e-02, 2.393e-02, 1.303e-01, 8.049e-02, 1.975e-01, 1.185e-01, 6.992e-03, 8.926e-03, 2.984e-02, -6.371e-02, 5.806e-02, 2.618e-01, -1.410e-02)); + r += mul(s7_5, M4(-3.266e-02, 1.660e-02, 5.827e-02, 1.212e-02, -7.540e-02, 5.256e-02, -2.155e-02, -6.183e-02, 2.177e-01, 6.338e-02, -5.074e-02, -9.758e-03, -5.153e-03, 2.625e-03, -2.261e-01, -1.068e-01)); + r += mul(s7_6, M4(5.063e-02, 1.280e-01, 6.280e-02, 4.254e-03, 2.437e-02, 3.588e-02, -4.090e-02, 7.597e-02, -3.171e-02, 4.712e-02, -1.201e-01, 2.630e-02, 2.330e-02, -4.809e-02, 6.096e-02, 1.435e-01)); + r += mul(s7_7, M4(-4.286e-02, -1.806e-01, 7.417e-02, 4.447e-02, 7.957e-02, 3.068e-01, -3.460e-02, 1.750e-01, -4.259e-03, 4.266e-03, -3.093e-02, -4.012e-02, 2.650e-01, 1.086e-01, 1.919e-01, -7.284e-02)); + r += mul(s7_8, M4(-2.275e-02, -1.156e-02, 1.682e-02, 4.636e-02, 1.449e-01, 7.291e-02, -3.986e-03, 3.819e-02, 5.878e-02, 2.092e-03, 2.601e-02, 2.768e-02, -3.431e-03, 7.016e-03, -4.034e-02, 5.688e-02)); + r += V4(-9.645e-03, 2.342e-02, -1.139e-02, 1.378e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.235e-02, 1.758e-02, -1.337e-02, -1.894e-02, -1.007e-02, 3.774e-02, 3.656e-02, 4.967e-02, 3.830e-02, -5.293e-02, 5.697e-02, 1.559e-02, -3.131e-03, 1.521e-01, -7.018e-02, -2.493e-02)); + r += mul(s0_1, M4(-6.261e-02, 3.260e-02, -8.457e-03, -1.056e-01, 2.296e-02, 3.729e-02, -5.025e-02, 1.684e-02, -4.360e-02, -7.328e-03, 7.443e-03, 4.313e-02, 1.082e-02, 1.483e-01, 2.137e-02, 5.946e-02)); + r += mul(s0_2, M4(-2.812e-02, -1.622e-02, 7.932e-03, -1.053e-01, -1.674e-02, 2.378e-02, -1.198e-02, -1.223e-02, -3.115e-02, 2.124e-02, 1.056e-02, 9.962e-03, 6.215e-02, 3.674e-02, 3.996e-03, 1.621e-02)); + r += mul(s0_3, M4(4.894e-02, -4.206e-02, -1.521e-02, 3.288e-02, 9.242e-03, -3.116e-02, -1.769e-02, -1.571e-02, -5.983e-02, -1.275e-01, 1.573e-01, -5.620e-02, 1.653e-02, -6.470e-02, 8.811e-02, 2.235e-02)); + r += mul(s0_4, M4(1.422e-01, 1.464e-01, 1.462e-01, -2.127e-02, -2.044e-02, 2.511e-02, 6.318e-02, 4.210e-03, 4.909e-02, 1.642e-02, 3.219e-02, -7.011e-02, 4.381e-01, -5.414e-02, 3.604e-01, -9.337e-02)); + r += mul(s0_5, M4(-7.263e-02, 6.901e-02, -7.342e-02, 2.163e-02, 1.265e-01, 1.449e-02, 1.719e-02, 8.816e-02, 6.955e-02, -2.453e-02, -1.210e-01, -3.800e-02, 2.995e-01, 4.550e-02, 9.555e-02, 5.017e-02)); + r += mul(s0_6, M4(-2.159e-02, -1.025e-01, -2.438e-02, -3.804e-02, 3.301e-02, 4.639e-02, 1.628e-02, -3.390e-02, -6.444e-02, 5.609e-02, -1.486e-02, -4.606e-02, 6.844e-02, 2.264e-02, 2.765e-02, 4.180e-02)); + r += mul(s0_7, M4(-1.188e-01, 1.812e-02, 9.982e-02, -5.810e-02, -2.826e-02, -7.934e-02, 1.336e-01, 2.213e-02, -6.742e-02, -6.453e-02, -3.554e-03, -6.146e-02, 1.484e-02, 9.664e-02, 7.380e-03, 1.108e-01)); + r += mul(s0_8, M4(-1.089e-01, -4.001e-02, -1.961e-02, 5.396e-02, 2.787e-02, -1.072e-01, 1.750e-02, 5.926e-02, -6.103e-02, -2.486e-02, -7.206e-02, 6.456e-02, -7.149e-03, 8.422e-02, -3.065e-02, 4.495e-02)); + r += mul(s1_0, M4(4.277e-02, -2.244e-02, 2.755e-02, -2.349e-02, 9.466e-04, -5.494e-02, -4.354e-03, 6.013e-02, 2.384e-02, 2.095e-02, 6.241e-02, 3.497e-02, -1.033e-01, -1.233e-02, -3.484e-02, -3.777e-02)); + r += mul(s1_1, M4(-9.936e-03, 3.308e-02, 2.330e-02, 4.952e-02, 8.436e-02, -6.737e-02, 6.331e-03, 1.179e-01, 5.664e-02, 1.490e-01, 1.005e-02, -5.524e-02, -3.491e-02, -2.057e-01, 6.983e-03, 2.871e-02)); + r += mul(s1_2, M4(6.342e-02, 6.945e-02, -2.345e-03, -2.712e-02, -6.100e-02, 1.269e-03, -5.115e-02, -3.713e-02, -7.851e-02, 3.446e-02, 2.732e-03, 7.302e-02, 1.977e-02, -5.482e-02, 1.593e-02, 6.747e-02)); + r += mul(s1_3, M4(9.142e-02, 5.510e-02, 1.406e-02, 3.289e-02, 1.004e-01, 2.006e-02, -1.215e-01, 1.250e-01, 4.927e-02, -8.154e-02, -3.912e-02, 1.928e-01, 2.688e-02, 5.277e-02, 2.352e-02, -2.572e-02)); + r += mul(s1_4, M4(1.341e-01, 1.110e-01, 7.819e-02, -2.257e-02, 1.074e-01, 4.469e-02, -5.829e-02, -6.687e-03, 1.695e-01, -5.354e-02, -4.332e-02, -8.639e-02, 2.138e-01, -2.395e-01, 4.136e-03, -3.769e-01)); + r += mul(s1_5, M4(6.165e-03, 5.052e-02, -6.019e-02, 8.176e-02, 1.499e-01, 6.896e-02, -1.783e-01, 9.658e-02, 5.354e-02, -6.761e-02, -1.791e-02, 5.131e-02, 1.450e-01, 4.687e-03, -1.662e-03, -1.127e-01)); + r += mul(s1_6, M4(9.139e-03, -1.031e-01, 2.417e-02, 8.615e-02, 2.571e-02, -6.374e-02, 2.505e-02, -3.085e-03, 5.163e-04, -9.310e-03, -9.462e-03, -8.005e-03, 3.816e-02, -6.714e-02, 1.051e-02, 2.327e-02)); + r += mul(s1_7, M4(-8.160e-02, 3.700e-02, 1.831e-01, 1.902e-01, -1.498e-01, 7.021e-02, -2.213e-02, 3.722e-02, -2.187e-02, -3.998e-02, -9.834e-02, -9.257e-02, -5.951e-02, 3.060e-02, -7.497e-02, -3.321e-02)); + r += mul(s1_8, M4(1.002e-01, 6.325e-02, -6.622e-02, 5.457e-02, -6.951e-02, -6.234e-02, -4.655e-02, 4.833e-02, -4.063e-03, 1.984e-02, -1.433e-01, 1.359e-02, -1.088e-01, 7.846e-02, -8.814e-02, -1.061e-01)); + r += mul(s2_0, M4(5.541e-02, -6.334e-02, -3.905e-02, 1.848e-02, -2.151e-02, -2.225e-02, 2.208e-02, -8.518e-02, -1.885e-01, -1.277e-01, 3.760e-02, -2.416e-02, 3.120e-02, 6.864e-02, -6.249e-02, -2.824e-02)); + r += mul(s2_1, M4(-9.894e-03, -6.898e-02, 5.062e-02, 1.003e-02, -9.902e-03, -3.368e-02, -3.137e-02, -9.003e-02, 9.006e-02, -9.906e-02, 9.512e-02, -1.653e-01, 4.891e-02, 3.604e-02, 9.485e-03, -1.121e-01)); + r += mul(s2_2, M4(2.684e-02, -1.131e-02, -9.315e-03, -2.647e-02, 5.042e-02, 6.034e-02, -5.949e-03, -2.403e-02, -4.189e-02, 3.209e-02, -1.610e-02, 4.315e-02, 8.602e-03, -9.444e-03, 5.661e-02, 7.441e-03)); + r += mul(s2_3, M4(3.760e-02, -5.971e-02, 1.612e-02, 4.358e-02, -2.775e-02, -6.953e-02, 9.300e-03, 3.969e-02, 3.650e-02, 4.474e-02, -1.545e-01, 9.842e-03, 6.761e-02, -9.414e-02, -4.700e-02, -4.308e-02)); + r += mul(s2_4, M4(4.029e-02, 2.047e-02, 2.195e-01, 2.094e-02, 1.096e-01, -8.999e-03, 4.867e-02, 7.446e-02, -2.145e-01, -1.152e-01, -2.319e-01, -1.897e-01, -7.171e-02, 7.327e-02, -9.723e-03, 7.528e-02)); + r += mul(s2_5, M4(-1.142e-02, 4.584e-02, -3.761e-02, 5.939e-02, 3.169e-02, -6.628e-03, 3.920e-02, 4.302e-02, -2.027e-03, -8.319e-02, 1.982e-02, -3.272e-02, -3.107e-02, 9.817e-02, -1.047e-01, 1.156e-02)); + r += mul(s2_6, M4(-3.706e-02, -8.973e-03, -6.978e-02, -2.422e-02, -2.611e-02, 1.547e-02, -6.433e-02, 8.861e-02, -4.678e-02, 2.963e-02, 3.487e-02, -2.119e-02, -9.221e-03, 6.193e-02, 5.284e-02, -2.915e-02)); + r += mul(s2_7, M4(7.955e-03, -1.196e-01, 5.242e-02, -3.935e-02, -2.996e-02, -1.235e-01, 2.109e-02, -1.208e-02, -2.038e-02, 4.355e-02, -1.435e-01, 1.085e-01, -4.006e-02, 6.133e-03, 8.911e-02, -1.343e-02)); + r += mul(s2_8, M4(-2.854e-02, 2.309e-03, -9.021e-02, 3.283e-03, 8.163e-03, 3.751e-02, -1.850e-02, 2.969e-02, -7.961e-02, -1.022e-01, -1.138e-01, -7.536e-03, -3.915e-02, -4.643e-02, 1.830e-02, -4.550e-02)); + r += mul(s3_0, M4(-4.359e-02, 8.218e-02, 4.245e-02, -6.153e-03, 2.215e-03, 8.774e-02, -1.116e-02, -1.783e-01, 9.785e-02, 3.987e-02, 3.351e-02, 9.085e-02, 1.105e-01, 7.231e-02, -2.175e-02, 1.808e-02)); + r += mul(s3_1, M4(2.311e-02, -7.074e-02, 5.332e-02, -1.278e-01, -7.900e-02, -3.699e-03, 8.018e-02, -1.846e-02, 1.852e-02, -1.070e-01, -4.489e-02, -1.393e-02, 6.422e-02, 1.015e-01, 1.476e-02, -1.419e-01)); + r += mul(s3_2, M4(-1.279e-01, 5.823e-02, 2.759e-02, 2.461e-04, 1.599e-01, 1.177e-01, 5.009e-02, 2.986e-02, 1.039e-01, 5.782e-02, -8.445e-03, 1.354e-02, 1.678e-01, 2.078e-02, 7.488e-02, 1.490e-02)); + r += mul(s3_3, M4(-1.045e-01, -2.048e-01, 1.335e-01, 2.545e-02, -1.538e-01, 7.217e-02, -1.410e-01, 2.025e-01, 3.006e-02, -1.259e-01, 7.962e-02, 1.833e-02, 1.473e-01, -1.813e-01, 1.545e-01, 3.044e-02)); + r += mul(s3_4, M4(-6.705e-02, -3.876e-01, 1.550e-01, -5.262e-02, -6.240e-03, 9.039e-02, -4.094e-02, 1.597e-01, -1.069e-02, -2.105e-02, 8.805e-02, -5.060e-02, 2.486e-01, 3.276e-01, 1.192e-01, 1.125e-01)); + r += mul(s3_5, M4(-9.089e-02, -3.676e-02, 1.768e-02, -2.449e-02, 1.264e-01, -1.428e-01, 8.995e-02, -1.325e-02, -4.953e-02, -1.252e-03, 1.282e-02, 4.847e-02, 1.030e-01, 2.459e-01, -5.811e-02, -9.666e-03)); + r += mul(s3_6, M4(-3.776e-02, -3.515e-02, -2.210e-01, -4.582e-02, -6.582e-02, 8.034e-02, -2.810e-02, 1.405e-02, 4.400e-03, 7.410e-02, 6.586e-02, -5.710e-02, -3.626e-02, 5.664e-02, -4.422e-02, 1.008e-01)); + r += mul(s3_7, M4(-6.564e-02, -1.099e-01, -1.156e-01, -2.495e-02, -2.987e-02, -3.499e-02, 4.216e-02, -9.705e-02, 4.734e-02, -3.642e-02, -9.714e-02, 2.750e-02, 2.755e-02, 2.139e-01, 5.166e-02, -1.298e-02)); + r += mul(s3_8, M4(-6.614e-02, 1.701e-02, -1.155e-01, -3.313e-02, -2.935e-02, 4.937e-02, -1.587e-02, 1.261e-02, 4.688e-02, -3.234e-02, 8.733e-02, 3.656e-03, 1.723e-01, 7.253e-02, -6.274e-02, 3.554e-02)); + r += mul(s4_0, M4(4.480e-02, 2.594e-02, 2.363e-02, 3.143e-02, -6.157e-03, -1.087e-02, -3.910e-02, 8.682e-03, -8.595e-02, -2.033e-02, -2.067e-02, 1.186e-02, 6.745e-02, 4.664e-02, -3.684e-02, -1.060e-02)); + r += mul(s4_1, M4(1.330e-01, 1.048e-01, -1.090e-03, -1.513e-02, -5.701e-02, -8.469e-02, 7.335e-02, -6.900e-02, -1.178e-01, -5.928e-03, 2.121e-02, -3.117e-02, 8.259e-03, 4.294e-02, -1.111e-02, 2.341e-02)); + r += mul(s4_2, M4(-3.063e-03, 2.585e-02, 2.443e-02, 9.037e-02, -2.431e-02, 1.401e-01, -2.404e-02, 1.288e-01, -2.103e-02, -7.508e-02, -4.025e-02, -6.713e-02, 2.879e-02, -4.965e-03, -1.746e-02, 2.180e-03)); + r += mul(s4_3, M4(1.529e-02, 2.120e-01, 7.481e-03, 9.703e-03, 9.358e-03, -1.427e-02, -2.549e-02, 2.064e-02, -1.091e-01, 3.638e-02, -6.525e-02, -6.546e-02, 4.348e-02, 9.187e-02, -1.321e-01, 3.164e-02)); + r += mul(s4_4, M4(7.303e-02, 1.989e-01, -1.306e-01, -1.258e-01, -1.637e-02, -2.482e-02, -7.988e-02, -1.356e-01, -3.034e-02, -9.231e-02, -4.667e-02, 2.811e-03, -8.526e-02, 8.804e-02, 2.945e-03, 1.102e-01)); + r += mul(s4_5, M4(-7.600e-02, 1.258e-01, -1.129e-01, 6.477e-02, -1.874e-02, -2.770e-02, 6.853e-02, 7.639e-03, -4.900e-02, -3.740e-02, -8.046e-02, -7.855e-02, 3.098e-03, -1.509e-02, 4.403e-03, 6.419e-02)); + r += mul(s4_6, M4(-1.058e-02, -1.679e-02, 9.233e-02, 1.145e-01, 1.764e-02, -6.489e-03, 4.585e-02, 2.846e-02, -1.908e-03, -3.274e-02, -1.032e-01, 3.809e-02, -4.379e-04, -2.255e-02, 1.266e-01, -1.212e-02)); + r += mul(s4_7, M4(-2.584e-02, 1.015e-01, -2.037e-01, -1.191e-01, -2.744e-03, 3.012e-02, -2.084e-02, -9.935e-02, -1.232e-02, 5.877e-02, -1.169e-01, -4.421e-02, 5.101e-02, 6.005e-02, 1.136e-01, -7.112e-02)); + r += mul(s4_8, M4(2.779e-02, 2.092e-02, 3.396e-03, -6.018e-02, -5.603e-02, -6.637e-02, -6.031e-03, 8.090e-03, -7.464e-03, -6.229e-02, -1.222e-01, -5.995e-02, 1.425e-03, -7.597e-02, -8.031e-02, 3.117e-02)); + r += mul(s5_0, M4(-1.024e-03, -6.547e-02, 4.057e-02, -2.702e-02, 5.368e-02, -2.424e-02, -7.984e-02, -2.550e-02, -2.558e-02, 2.972e-03, 1.810e-02, -4.180e-02, -2.523e-03, -1.060e-01, 2.874e-02, -1.072e-01)); + r += mul(s5_1, M4(-4.545e-02, -1.463e-01, -9.852e-03, 7.608e-02, 4.909e-04, -1.093e-01, -9.807e-02, -2.105e-02, 3.853e-02, 6.383e-02, 9.544e-02, -2.734e-02, -1.198e-02, 3.637e-02, 1.506e-01, -6.324e-02)); + r += mul(s5_2, M4(-5.263e-02, 3.993e-02, 4.378e-03, -7.435e-02, 3.669e-02, 5.376e-02, -6.510e-03, -4.577e-03, 1.390e-02, 3.165e-02, 3.503e-02, -3.090e-02, -3.346e-02, -3.649e-02, -3.415e-02, 4.699e-02)); + r += mul(s5_3, M4(4.829e-02, 1.096e-01, -6.535e-02, -3.855e-02, 7.956e-02, -9.014e-03, 2.271e-02, -1.032e-01, -1.620e-02, 9.095e-03, -2.873e-02, 2.084e-02, -1.571e-02, -5.614e-02, -2.876e-02, 1.440e-02)); + r += mul(s5_4, M4(-1.019e-01, -1.715e-02, -1.982e-01, -1.791e-01, -1.267e-02, -2.079e-01, -6.014e-02, -2.296e-01, 2.631e-02, 6.505e-02, 5.515e-02, 1.870e-02, -1.078e-01, 1.743e-02, -8.485e-02, 3.567e-01)); + r += mul(s5_5, M4(4.436e-02, 8.221e-02, -2.439e-02, -7.809e-02, -6.170e-02, -9.551e-02, 1.609e-02, -7.136e-02, 4.165e-02, -4.817e-03, 4.287e-02, 2.408e-03, -4.165e-02, -1.652e-02, 2.293e-02, 8.002e-02)); + r += mul(s5_6, M4(-2.911e-02, -2.198e-04, -4.255e-02, 1.860e-02, 2.319e-02, -1.573e-02, 7.366e-02, -6.204e-02, 3.113e-02, -3.469e-02, 2.894e-02, 1.208e-02, -3.099e-02, -4.035e-02, -6.633e-02, 1.054e-01)); + r += mul(s5_7, M4(1.281e-02, -4.798e-02, -1.861e-01, -2.665e-02, -9.502e-03, 3.205e-02, -3.900e-02, -2.003e-02, 6.833e-02, 3.285e-03, 3.673e-02, -1.353e-02, 1.255e-01, 1.799e-01, 3.839e-01, -9.228e-03)); + r += mul(s5_8, M4(-4.002e-02, -4.090e-02, -5.090e-02, 2.001e-02, -1.085e-01, -5.435e-02, 8.609e-02, 4.208e-03, 5.129e-02, -3.246e-02, 1.357e-03, -1.166e-02, 2.425e-02, -4.722e-02, -5.910e-02, -1.943e-02)); + r += mul(s6_0, M4(-4.657e-02, -4.365e-02, 1.855e-02, 1.686e-02, -5.174e-03, -2.762e-02, 3.044e-02, -4.493e-03, -4.455e-02, -6.010e-02, 5.274e-02, 4.504e-02, -5.616e-02, -8.778e-02, -1.062e-02, -2.164e-02)); + r += mul(s6_1, M4(-6.377e-03, -9.762e-02, -2.371e-02, -2.708e-02, -3.744e-02, -1.692e-01, -2.715e-02, 9.584e-02, 1.100e-01, -3.519e-02, -2.840e-02, 1.568e-02, 3.629e-02, -5.320e-02, 6.654e-02, -3.467e-02)); + r += mul(s6_2, M4(-8.152e-02, -7.546e-03, -2.494e-02, 5.560e-02, 2.547e-02, 6.245e-03, -3.433e-02, -5.634e-03, 4.568e-02, 7.593e-03, 2.321e-02, 6.044e-04, 2.275e-03, 6.351e-02, 2.180e-02, 2.776e-02)); + r += mul(s6_3, M4(3.938e-02, -4.175e-02, -1.703e-02, 6.867e-02, 1.547e-02, 4.972e-03, 1.693e-01, 3.867e-02, 1.203e-02, 1.163e-01, -1.831e-02, 4.121e-02, -1.257e-02, -3.650e-02, -5.777e-02, 3.162e-02)); + r += mul(s6_4, M4(-1.089e-01, -1.033e-02, -1.311e-01, -1.149e-02, -1.438e-03, 1.024e-01, -1.596e-01, -2.566e-01, 3.615e-02, 1.599e-01, 1.222e-01, 9.341e-02, -2.748e-02, 2.028e-02, 1.122e-02, -1.354e-01)); + r += mul(s6_5, M4(-1.415e-01, 3.870e-02, -1.051e-01, -6.938e-03, -6.274e-02, 5.740e-02, 8.951e-02, -5.256e-02, 6.098e-02, 3.744e-02, 1.375e-02, -1.474e-02, 1.328e-02, -6.412e-02, 6.594e-03, -9.309e-02)); + r += mul(s6_6, M4(-1.109e-02, -2.355e-02, 6.765e-02, 5.406e-02, -2.680e-02, 1.297e-02, 7.848e-02, -6.058e-02, 6.114e-02, 7.744e-02, 1.494e-02, -4.207e-02, 4.035e-02, 4.003e-02, -7.107e-02, -1.884e-02)); + r += mul(s6_7, M4(-8.515e-02, 2.844e-02, 7.553e-02, 1.075e-02, 1.320e-01, -2.922e-01, -1.579e-01, -6.371e-02, 1.280e-02, -1.929e-02, -1.225e-01, -1.209e-02, -4.205e-02, 3.344e-02, -5.664e-02, 4.004e-02)); + r += mul(s6_8, M4(-9.777e-02, -6.582e-02, 3.920e-02, 6.389e-03, 3.322e-02, -2.803e-02, -5.406e-02, -1.865e-02, 2.119e-02, 1.149e-01, -3.422e-02, -3.817e-02, -3.067e-02, 4.716e-02, -1.032e-01, 2.235e-02)); + r += mul(s7_0, M4(8.496e-03, 5.987e-02, 4.584e-02, 5.824e-02, -3.919e-02, -4.895e-02, 1.100e-01, 3.522e-02, -7.386e-03, -9.289e-02, 5.341e-02, 1.582e-02, -7.591e-02, -6.651e-04, 9.645e-02, 6.396e-02)); + r += mul(s7_1, M4(3.472e-02, 3.446e-02, 1.222e-02, 7.000e-02, -2.298e-02, -6.086e-02, -5.135e-02, 7.823e-02, 5.182e-03, 3.576e-02, 1.775e-01, 6.364e-02, 6.662e-02, -9.483e-04, -5.751e-03, -9.270e-03)); + r += mul(s7_2, M4(3.358e-02, 2.218e-02, 1.207e-01, 1.815e-02, -9.561e-03, -1.385e-02, -8.877e-02, -3.040e-03, 5.780e-02, 5.741e-02, -5.731e-02, 8.420e-02, -2.273e-02, 6.520e-02, 1.004e-02, 3.590e-02)); + r += mul(s7_3, M4(5.142e-02, -8.508e-03, -1.159e-02, -2.834e-02, -4.526e-02, 6.746e-02, 1.057e-01, -1.036e-01, 5.933e-02, -1.548e-01, 4.227e-02, 8.383e-02, -5.301e-02, 8.128e-02, -1.236e-01, -3.375e-03)); + r += mul(s7_4, M4(-5.605e-02, -1.155e-01, -9.862e-02, 4.081e-02, 1.326e-02, 4.698e-02, -2.698e-02, 6.181e-02, -3.167e-02, -6.973e-02, 2.052e-01, 8.491e-02, -3.532e-02, 6.526e-02, -3.787e-02, -5.853e-02)); + r += mul(s7_5, M4(-5.731e-02, 1.031e-01, -9.473e-02, -8.531e-03, 2.696e-02, 8.500e-02, 5.398e-02, 1.124e-01, 1.754e-01, 5.313e-02, 1.291e-01, -1.637e-02, -3.436e-02, 9.291e-02, -1.553e-02, -4.011e-02)); + r += mul(s7_6, M4(-5.448e-03, -7.619e-02, 2.262e-01, -1.853e-02, 3.073e-03, 2.042e-01, -1.654e-01, 7.605e-02, 4.162e-02, -9.305e-02, 9.896e-02, -3.766e-02, 2.231e-02, 7.078e-02, -2.020e-02, -2.443e-02)); + r += mul(s7_7, M4(2.423e-02, 1.639e-01, 3.665e-02, 4.298e-02, 5.405e-02, 9.624e-02, -2.319e-01, -1.499e-01, 2.105e-02, -4.502e-02, -1.867e-01, -1.004e-01, -1.021e-01, -1.292e-01, -3.473e-02, 3.257e-02)); + r += mul(s7_8, M4(6.504e-03, 1.106e-01, 7.303e-02, 1.007e-01, 1.818e-01, 1.160e-02, -9.286e-03, -1.160e-01, -4.789e-02, 4.493e-02, -8.081e-02, -8.938e-02, 2.520e-02, 4.247e-02, 3.953e-02, 6.861e-02)); + r += V4(2.674e-02, 5.572e-03, -1.712e-02, -1.438e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.286e-02, -5.558e-02, 5.368e-03, 3.030e-02, -1.262e-02, 1.150e-02, -3.379e-02, -7.153e-02, -4.218e-03, -5.102e-02, 1.123e-01, -7.896e-02, 6.458e-03, 3.456e-02, -1.064e-01, -1.553e-01)); + r += mul(s0_1, M4(-4.861e-02, 3.643e-02, -3.731e-02, -9.669e-02, -1.237e-03, 6.117e-02, 9.559e-02, 1.336e-01, -1.348e-01, -8.136e-03, 1.444e-01, 1.200e-01, 6.525e-02, -8.019e-02, -1.538e-01, 2.949e-02)); + r += mul(s0_2, M4(-1.352e-01, 1.882e-02, -1.003e-01, 1.790e-02, -8.007e-02, -5.233e-03, -6.482e-03, 1.315e-01, 1.054e-03, 1.917e-02, 1.244e-02, 3.169e-02, 1.513e-01, 4.209e-02, 1.209e-02, 1.450e-01)); + r += mul(s0_3, M4(3.892e-02, -5.554e-02, -1.809e-02, 3.134e-02, -6.139e-02, 6.550e-02, -2.202e-02, -1.255e-01, 1.639e-02, 7.508e-02, 2.674e-01, -2.016e-02, -3.796e-02, -7.200e-02, -1.230e-01, -2.679e-02)); + r += mul(s0_4, M4(-5.442e-02, 2.733e-02, -1.801e-03, -2.134e-02, 9.374e-02, -2.165e-02, -1.593e-01, -1.614e-01, 2.401e-01, -1.075e-01, 9.997e-02, -5.962e-02, 2.714e-01, 2.665e-01, -2.994e-01, 8.561e-02)); + r += mul(s0_5, M4(-1.654e-01, -6.863e-02, -4.063e-02, 1.015e-02, -3.911e-03, -4.761e-02, 3.745e-02, -4.014e-02, -8.392e-03, 4.647e-03, 4.148e-02, -1.184e-01, 1.168e-01, -1.158e-01, -1.479e-02, -4.154e-02)); + r += mul(s0_6, M4(3.566e-02, 1.329e-01, 5.007e-02, 1.029e-01, -5.539e-02, -1.019e-02, 4.680e-02, -5.687e-03, 6.479e-03, -1.294e-02, 1.244e-01, 7.200e-02, 2.546e-02, 5.989e-02, -6.528e-02, 8.630e-02)); + r += mul(s0_7, M4(-8.646e-02, 1.731e-01, 9.482e-02, 8.726e-02, -8.075e-02, -5.263e-02, 1.207e-01, 9.825e-02, 3.145e-02, 3.826e-02, 7.561e-02, 8.651e-02, 2.112e-01, -2.415e-02, -1.399e-01, -1.141e-01)); + r += mul(s0_8, M4(5.204e-02, 1.790e-03, 4.330e-02, 1.759e-01, 7.105e-02, 5.055e-02, 1.284e-01, 2.851e-02, 6.014e-04, -3.710e-02, 5.909e-02, 4.122e-02, 9.774e-02, -8.960e-02, 6.467e-02, -2.063e-02)); + r += mul(s1_0, M4(1.483e-04, -8.302e-02, 2.958e-02, -9.205e-02, -8.072e-02, -9.046e-03, 1.077e-02, 3.121e-02, -5.374e-02, 1.027e-02, -4.329e-02, -9.952e-02, -1.073e-02, -4.699e-02, 2.717e-02, -9.122e-02)); + r += mul(s1_1, M4(7.848e-02, 6.429e-02, -2.606e-02, 4.148e-02, 2.932e-02, -2.389e-02, 1.253e-01, 1.724e-01, -1.283e-01, 6.969e-02, -4.918e-02, 2.244e-01, 3.852e-02, -3.114e-02, 9.021e-02, 3.441e-02)); + r += mul(s1_2, M4(-1.460e-02, 6.312e-02, -4.304e-02, 5.499e-02, -1.616e-01, 1.703e-02, -1.842e-02, 1.267e-01, -6.582e-02, -1.423e-03, 3.774e-02, -6.782e-02, 1.845e-02, 2.383e-02, -7.941e-03, -3.615e-02)); + r += mul(s1_3, M4(3.129e-02, -3.667e-02, -8.854e-02, -1.084e-01, -1.353e-02, 7.837e-02, -7.423e-02, 8.332e-02, -2.522e-05, -1.771e-03, -2.036e-01, -1.220e-03, -6.209e-02, 5.654e-03, 8.280e-02, -2.826e-02)); + r += mul(s1_4, M4(1.353e-02, 1.617e-01, 1.995e-01, 3.197e-02, 1.677e-01, -5.916e-02, -4.875e-02, -1.233e-01, 2.653e-01, 8.528e-03, -2.483e-01, -1.420e-01, 1.367e-01, 1.041e-01, -4.963e-02, 2.426e-01)); + r += mul(s1_5, M4(2.180e-02, -4.062e-02, 1.189e-03, -3.950e-02, -9.090e-02, -6.138e-02, -5.669e-03, -2.549e-02, 1.697e-02, 1.645e-01, 7.228e-02, -8.677e-03, -6.223e-02, -8.186e-02, 2.890e-02, 8.978e-02)); + r += mul(s1_6, M4(5.162e-02, 4.894e-02, -7.496e-02, -1.230e-01, -4.356e-02, -3.984e-02, 5.458e-02, 4.174e-02, -1.513e-01, -1.285e-02, -5.100e-02, -3.304e-03, -3.399e-02, 1.256e-01, 1.920e-02, 1.121e-01)); + r += mul(s1_7, M4(-4.488e-02, 1.591e-01, -1.624e-01, -8.467e-02, -1.239e-01, 2.445e-02, 1.997e-02, 1.639e-02, -6.134e-02, -2.105e-02, -2.311e-02, 4.681e-02, 8.311e-02, 1.185e-02, -9.454e-02, -3.118e-02)); + r += mul(s1_8, M4(-3.601e-03, -5.132e-02, 2.023e-02, -1.442e-02, 9.517e-02, 1.182e-01, 1.941e-02, 1.040e-02, -3.218e-02, -2.666e-02, 2.423e-02, -4.021e-02, -9.206e-02, 6.915e-02, 6.492e-03, -7.742e-02)); + r += mul(s2_0, M4(-3.682e-02, 7.720e-03, 6.424e-02, -4.937e-02, -9.706e-02, -4.218e-02, -1.654e-02, -1.787e-01, 1.144e-02, -2.757e-02, -6.188e-03, -2.244e-01, -1.874e-02, -2.107e-02, -5.218e-02, -1.371e-01)); + r += mul(s2_1, M4(7.475e-02, -1.432e-02, 9.054e-02, -1.267e-01, 7.644e-02, 4.824e-02, 1.019e-01, -8.470e-02, -4.102e-02, 1.201e-01, -1.280e-01, -7.696e-02, -5.082e-02, -7.261e-03, 3.320e-03, -2.208e-02)); + r += mul(s2_2, M4(-4.459e-02, -3.978e-02, 6.344e-02, 8.882e-02, -5.338e-02, -8.955e-02, 4.356e-02, 4.547e-02, -5.658e-04, 8.348e-02, 1.458e-02, -4.076e-02, 8.107e-02, -2.509e-03, 7.839e-02, 1.782e-02)); + r += mul(s2_3, M4(-3.090e-02, 5.185e-02, -2.392e-03, 9.523e-02, -4.333e-02, 4.541e-02, -1.031e-02, -7.619e-02, -1.044e-01, 1.775e-01, -1.146e-01, -1.942e-02, 1.260e-01, -2.625e-02, 1.437e-01, -1.042e-01)); + r += mul(s2_4, M4(-7.213e-02, -1.645e-01, -1.091e-02, -1.120e-01, 1.002e-01, -6.332e-02, 4.277e-02, 9.251e-02, 1.158e-01, 1.273e-01, -1.918e-03, -3.486e-01, -1.529e-01, -9.992e-02, -6.274e-02, -1.043e-01)); + r += mul(s2_5, M4(1.229e-01, 4.003e-02, -2.590e-02, -1.465e-02, -5.746e-02, -3.314e-02, -1.168e-02, 5.900e-03, -6.562e-03, 9.581e-02, -4.788e-02, 2.590e-02, 8.387e-02, 6.084e-02, -3.417e-02, -1.483e-01)); + r += mul(s2_6, M4(-8.101e-02, -2.068e-03, 1.652e-02, 6.369e-03, 1.646e-02, -3.940e-02, -2.581e-02, -7.013e-02, 1.263e-02, -2.072e-02, -1.128e-03, 2.931e-03, 2.252e-02, -6.322e-02, 5.656e-02, -6.017e-02)); + r += mul(s2_7, M4(-1.199e-02, -4.779e-02, -2.128e-02, -9.445e-02, 6.734e-03, 8.381e-03, 4.290e-03, 1.321e-01, -2.633e-02, 2.928e-02, -2.738e-01, -8.694e-02, 7.871e-02, -1.597e-02, -4.260e-02, -1.368e-01)); + r += mul(s2_8, M4(3.368e-02, 1.480e-02, 8.021e-02, -2.883e-02, -2.109e-02, 4.231e-02, 5.099e-02, -3.689e-02, -1.832e-02, 7.323e-02, -1.495e-01, -1.800e-02, 1.340e-01, -5.175e-02, 2.019e-02, 6.081e-02)); + r += mul(s3_0, M4(-3.042e-02, 4.959e-02, -1.628e-01, -7.205e-02, -1.110e-01, 1.615e-01, 6.012e-02, -2.321e-01, 7.382e-02, -3.659e-02, 3.137e-02, -3.680e-02, -1.551e-02, -8.853e-03, 4.016e-02, 4.906e-02)); + r += mul(s3_1, M4(2.155e-02, -2.277e-02, 2.107e-02, -1.744e-01, -1.159e-01, 1.025e-02, -8.337e-02, 1.050e-01, -5.261e-02, -9.656e-04, 7.279e-02, 6.861e-02, 3.067e-02, -9.676e-02, 1.065e-01, 1.728e-01)); + r += mul(s3_2, M4(-5.673e-02, -2.262e-02, -7.513e-02, -1.410e-02, -3.755e-02, 4.702e-02, 4.710e-02, 1.799e-01, 3.802e-02, -9.390e-02, 2.261e-02, 6.757e-02, 3.401e-02, -6.094e-02, -3.018e-02, 1.129e-01)); + r += mul(s3_3, M4(-1.184e-01, 8.267e-02, 3.124e-02, 4.827e-02, 6.686e-02, -1.382e-01, -2.837e-01, -9.711e-02, 1.829e-02, 1.057e-01, -1.102e-02, -3.657e-02, 9.744e-02, 5.657e-02, 6.135e-02, 8.644e-02)); + r += mul(s3_4, M4(-1.887e-02, 1.702e-02, 3.265e-02, -1.128e-01, 5.493e-02, 6.220e-02, -4.533e-02, 1.263e-01, -1.606e-01, -9.614e-02, -8.556e-02, -7.665e-02, -2.717e-01, 1.143e-01, 5.673e-02, -1.683e-01)); + r += mul(s3_5, M4(-1.469e-01, 9.381e-02, -3.942e-02, 5.832e-03, 1.271e-02, -9.744e-02, -8.614e-02, 1.966e-01, 5.591e-02, 4.514e-02, 1.686e-02, -3.298e-02, -2.189e-01, -5.892e-02, 7.834e-02, -9.585e-02)); + r += mul(s3_6, M4(-1.253e-01, -5.154e-02, -1.194e-01, -5.413e-03, 1.384e-01, 1.661e-02, -5.402e-02, -1.933e-01, -1.163e-01, -8.053e-02, 8.213e-02, 1.045e-01, -6.139e-02, 1.008e-02, 2.682e-02, 1.001e-01)); + r += mul(s3_7, M4(4.268e-02, -3.266e-02, -1.395e-01, -6.734e-02, -5.044e-02, -6.236e-02, 7.355e-03, 4.860e-02, 8.122e-02, 3.594e-03, 3.387e-02, -1.894e-02, -2.146e-02, -6.920e-02, -9.184e-02, 6.043e-02)); + r += mul(s3_8, M4(-4.537e-02, 5.305e-02, -3.223e-02, -1.099e-01, -1.532e-03, 1.298e-02, 1.705e-03, -8.713e-02, 5.724e-02, 4.936e-02, -7.022e-03, 1.310e-01, -1.455e-01, -1.002e-01, -8.992e-02, 8.949e-02)); + r += mul(s4_0, M4(-3.697e-02, 4.361e-02, -2.610e-02, 1.920e-01, -3.173e-02, -2.363e-02, 4.914e-02, -7.926e-02, 5.642e-03, 2.515e-02, -1.312e-02, 8.478e-02, -4.425e-02, 2.898e-02, -4.333e-02, 6.784e-02)); + r += mul(s4_1, M4(-8.282e-02, -2.400e-02, 1.423e-02, 1.713e-01, -6.289e-02, 6.815e-02, 4.288e-02, 2.298e-02, 6.929e-03, 1.687e-01, -1.356e-01, 2.107e-01, -2.664e-02, 7.643e-02, -4.448e-02, -1.391e-01)); + r += mul(s4_2, M4(2.527e-02, 5.359e-02, 8.032e-02, 9.965e-02, -4.367e-02, 1.715e-01, -2.134e-02, 1.266e-02, 1.772e-02, 4.026e-02, -1.407e-02, 2.958e-02, -1.089e-01, 1.555e-02, -9.794e-02, 7.503e-02)); + r += mul(s4_3, M4(9.838e-02, -9.237e-02, 3.059e-02, 7.839e-02, -1.064e-01, -8.478e-02, -1.488e-01, 3.929e-02, -5.543e-02, -1.410e-01, -1.136e-01, 4.460e-02, 1.230e-01, 9.507e-02, -1.495e-02, -3.650e-02)); + r += mul(s4_4, M4(2.052e-01, 2.177e-01, 3.370e-03, -2.671e-01, 1.070e-01, 3.138e-02, -2.049e-02, 5.616e-03, 6.318e-02, 7.942e-02, 1.357e-01, 2.034e-01, 5.404e-02, 5.505e-04, -1.138e-01, 1.522e-02)); + r += mul(s4_5, M4(3.260e-03, -1.496e-02, -1.751e-02, -4.540e-02, 8.747e-02, -1.052e-01, -4.517e-02, -4.559e-02, 7.249e-02, 2.862e-02, -9.665e-02, 2.203e-02, 1.041e-02, -2.682e-02, -5.846e-02, -5.967e-03)); + r += mul(s4_6, M4(6.904e-02, -1.075e-02, -1.133e-02, 5.641e-03, -2.339e-02, 3.848e-02, 1.617e-02, 1.096e-01, -4.710e-02, 3.860e-02, -9.364e-02, 2.710e-01, -2.919e-02, 5.043e-02, 6.705e-02, 6.533e-02)); + r += mul(s4_7, M4(-7.306e-02, -4.041e-02, 5.129e-02, -2.081e-01, -2.225e-02, -7.482e-02, -1.030e-01, 1.455e-01, -1.215e-03, 1.760e-02, -5.955e-02, 1.670e-01, -2.868e-02, 2.015e-02, -1.719e-02, -7.865e-03)); + r += mul(s4_8, M4(3.469e-02, -1.440e-01, -2.074e-02, 4.942e-04, -6.512e-02, -1.207e-02, 3.899e-02, 4.409e-02, -4.425e-02, 1.354e-01, -5.201e-02, 5.339e-02, -1.733e-02, -1.068e-02, -4.068e-02, -8.847e-02)); + r += mul(s5_0, M4(-1.367e-02, -4.043e-03, -1.633e-01, 5.634e-02, -5.067e-02, 1.046e-02, 7.233e-02, -1.344e-01, -1.035e-02, 2.570e-02, 3.489e-02, -7.652e-02, -4.584e-02, 6.309e-02, 1.590e-02, 6.861e-03)); + r += mul(s5_1, M4(5.945e-02, -2.522e-03, 4.441e-02, -9.294e-02, 5.266e-02, 3.490e-02, 1.257e-01, -1.192e-01, 6.795e-02, -3.041e-02, 1.785e-03, -2.690e-02, 7.588e-03, -5.267e-02, -2.006e-01, 5.573e-02)); + r += mul(s5_2, M4(-2.075e-02, 2.788e-02, -6.472e-02, -1.199e-03, -7.163e-02, -1.456e-02, 8.793e-02, -3.651e-02, -2.027e-02, -9.538e-02, 4.177e-02, 6.392e-02, 1.057e-01, 1.165e-01, 1.953e-01, -1.987e-02)); + r += mul(s5_3, M4(7.941e-03, 2.435e-03, 1.518e-01, 2.483e-02, -8.035e-02, -3.836e-02, 1.098e-01, 1.050e-04, -2.651e-02, -8.799e-02, -9.291e-02, -1.290e-01, 2.334e-01, 1.460e-01, 1.496e-01, -1.747e-01)); + r += mul(s5_4, M4(1.113e-01, -2.526e-03, 2.216e-02, -2.775e-02, 1.863e-01, 1.006e-01, 2.760e-01, -1.442e-02, -7.379e-02, -4.404e-02, 7.299e-02, 4.975e-02, -5.748e-02, 2.627e-01, 3.282e-02, -4.485e-03)); + r += mul(s5_5, M4(-1.942e-01, -3.099e-02, -3.280e-02, 2.924e-02, -7.790e-02, 6.897e-02, 2.725e-02, -1.199e-01, -1.954e-02, 5.724e-03, -1.389e-02, 1.754e-02, 5.152e-02, 4.100e-03, 5.325e-02, -4.843e-02)); + r += mul(s5_6, M4(4.486e-02, 3.828e-02, -2.625e-02, -1.493e-01, -3.824e-02, 2.764e-02, 1.300e-02, -4.650e-02, 3.270e-02, 4.272e-02, -2.518e-02, -4.634e-02, 1.756e-01, 2.771e-01, -1.520e-01, -8.914e-02)); + r += mul(s5_7, M4(-2.086e-02, -6.906e-02, 3.829e-02, -5.269e-02, 1.029e-01, -7.007e-03, 2.265e-02, -7.822e-02, -6.078e-02, -6.558e-02, 1.199e-02, 3.040e-02, 1.628e-01, -1.604e-03, 7.271e-02, 7.569e-02)); + r += mul(s5_8, M4(3.105e-02, -2.946e-02, -3.952e-02, 1.162e-01, -9.094e-02, 1.909e-02, 8.129e-02, -1.816e-02, -4.012e-02, -3.152e-02, -4.212e-02, 6.397e-03, -6.599e-02, -2.003e-02, 3.821e-02, -3.672e-02)); + r += mul(s6_0, M4(4.164e-02, 2.871e-02, 3.507e-02, -2.261e-01, 4.606e-02, -5.383e-02, -3.568e-02, -4.125e-02, 1.077e-01, 4.435e-02, 1.753e-02, -4.489e-02, 5.527e-02, 4.169e-02, 6.259e-02, -1.311e-01)); + r += mul(s6_1, M4(2.440e-02, 5.512e-02, 1.154e-02, 1.627e-01, 2.868e-02, -9.865e-02, 4.651e-02, -3.851e-02, -4.680e-02, -2.606e-02, 7.456e-02, 3.369e-02, -1.593e-02, 3.015e-02, 2.959e-02, -2.216e-02)); + r += mul(s6_2, M4(2.586e-03, 1.205e-02, 3.081e-02, 1.315e-02, 2.864e-02, -2.026e-02, -2.931e-04, -6.621e-02, -6.064e-02, 3.550e-02, -1.861e-02, -5.051e-02, 2.220e-02, 2.607e-02, 7.668e-04, -5.065e-02)); + r += mul(s6_3, M4(3.330e-03, 4.963e-02, -7.611e-03, -1.310e-01, -1.458e-01, 9.381e-02, 1.141e-02, -4.524e-03, 7.509e-02, -1.537e-01, -6.698e-02, -9.922e-02, 1.177e-02, 7.349e-02, 3.935e-02, -2.087e-01)); + r += mul(s6_4, M4(-5.103e-03, 2.970e-01, -2.611e-02, -2.675e-03, -1.750e-01, -1.254e-01, 1.185e-01, -1.050e-01, -2.447e-02, -1.954e-02, -1.246e-01, 1.211e-01, -9.887e-02, -5.790e-02, 1.165e-01, -5.261e-02)); + r += mul(s6_5, M4(-5.677e-02, 5.446e-02, -7.142e-02, -2.665e-01, -9.979e-02, -7.104e-02, 2.303e-02, 4.827e-02, 2.460e-02, -1.170e-01, -2.289e-02, -1.120e-01, -1.888e-02, 1.559e-04, 4.714e-02, -4.896e-03)); + r += mul(s6_6, M4(1.157e-01, 1.833e-01, 5.169e-02, -1.331e-01, 6.034e-02, 9.362e-02, 1.058e-01, -1.742e-01, -1.202e-01, -8.411e-02, 7.993e-02, 6.521e-02, 6.710e-04, -1.153e-01, 2.861e-02, 4.359e-02)); + r += mul(s6_7, M4(-1.565e-02, 1.589e-01, 1.325e-01, 1.399e-02, 7.785e-02, -3.601e-02, 5.482e-02, -1.596e-02, 9.978e-02, 1.804e-02, 1.243e-01, 2.677e-02, -1.812e-01, -1.244e-02, -4.210e-02, -1.077e-01)); + r += mul(s6_8, M4(-2.383e-02, 9.994e-02, -1.086e-02, -1.963e-01, 9.990e-02, 6.315e-02, 1.856e-02, 5.164e-04, 2.123e-02, -6.209e-02, 3.422e-02, 1.179e-02, 7.026e-02, 9.449e-02, -2.705e-02, -1.677e-01)); + r += mul(s7_0, M4(-2.026e-02, 1.936e-02, 9.827e-03, 9.698e-02, 1.444e-02, -9.873e-02, -5.168e-02, 3.925e-02, -2.737e-02, -1.437e-02, -7.603e-02, 1.765e-01, 1.151e-01, 1.229e-02, 5.526e-02, -7.669e-02)); + r += mul(s7_1, M4(5.080e-02, -7.782e-03, 2.317e-02, 1.043e-01, 2.755e-02, -1.340e-01, 3.490e-02, 3.466e-02, 3.210e-02, 9.935e-02, 1.024e-03, 7.715e-02, -8.228e-02, -1.998e-03, 6.091e-02, 1.252e-01)); + r += mul(s7_2, M4(-7.570e-02, -7.490e-02, -6.378e-02, 1.928e-01, 6.937e-03, -4.652e-03, -1.323e-02, -6.992e-02, -1.105e-01, -1.010e-01, -6.574e-02, 5.709e-02, 1.372e-01, -5.210e-02, -1.230e-02, -2.030e-01)); + r += mul(s7_3, M4(-7.554e-02, 5.436e-03, 3.024e-02, 3.049e-02, -4.402e-02, 6.087e-02, 1.615e-01, -1.553e-01, -8.422e-02, -6.811e-02, -1.066e-01, 1.607e-02, 1.481e-01, 5.693e-02, -1.473e-01, -1.700e-01)); + r += mul(s7_4, M4(-4.538e-02, -7.853e-02, -1.336e-01, -1.226e-02, 5.502e-02, 3.131e-01, 1.725e-01, 6.103e-03, 2.748e-02, 6.264e-02, 4.061e-03, 3.578e-01, -7.168e-02, -1.584e-01, 7.857e-02, 6.613e-02)); + r += mul(s7_5, M4(-1.230e-01, -1.534e-01, -2.561e-03, -1.971e-02, 4.204e-02, -7.444e-03, 1.724e-03, -3.681e-02, 6.427e-02, -8.760e-03, 2.615e-02, -3.291e-02, 4.349e-02, -4.618e-02, 1.045e-04, -1.185e-01)); + r += mul(s7_6, M4(-7.975e-03, 7.494e-02, 1.482e-01, 1.397e-01, -2.783e-02, -1.124e-01, 3.515e-02, -6.958e-02, -1.207e-01, 9.840e-02, 6.669e-02, 4.788e-02, -4.741e-03, -3.077e-02, 8.715e-02, 9.574e-02)); + r += mul(s7_7, M4(1.235e-01, 1.762e-02, 8.508e-04, 5.826e-02, -2.263e-01, -1.343e-01, 1.080e-01, 3.797e-02, 4.121e-02, 4.401e-02, 6.985e-02, 4.008e-03, -2.648e-01, 1.059e-01, -2.543e-02, 2.075e-01)); + r += mul(s7_8, M4(-2.713e-02, -7.462e-02, 6.808e-02, 5.364e-03, 7.419e-02, -6.790e-02, -8.942e-03, 2.821e-02, -1.603e-02, 8.469e-03, 2.062e-03, 8.538e-03, 7.823e-02, 4.336e-02, -7.648e-02, -1.649e-01)); + r += V4(6.569e-03, 6.884e-03, -2.693e-02, 6.307e-03); + return r; +} + +void Pass13(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 14 +//!DESC conv13 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.293e-01, -3.164e-02, -4.305e-02, -4.153e-02, 1.083e-02, 3.443e-02, 9.194e-03, 3.305e-03, 7.443e-02, -6.503e-02, -1.639e-02, 1.467e-02, 1.861e-01, -7.023e-03, -1.710e-02, 2.163e-03)); + r += mul(s0_1, M4(-1.831e-01, -9.969e-02, -9.017e-02, 6.240e-02, 1.156e-01, 1.227e-01, -5.195e-02, -1.414e-01, 2.352e-03, -3.372e-02, -2.448e-02, -1.223e-01, 8.668e-02, 3.721e-02, 4.414e-02, -7.547e-02)); + r += mul(s0_2, M4(-3.738e-02, 5.256e-03, -5.123e-02, -1.541e-03, 4.179e-02, -8.270e-02, -4.111e-02, -5.965e-02, 7.282e-02, 1.006e-04, -1.048e-02, 4.996e-03, -1.551e-02, 2.408e-02, 4.269e-03, -3.604e-02)); + r += mul(s0_3, M4(1.295e-03, -4.506e-02, 5.793e-02, 4.585e-02, -7.363e-02, -1.660e-01, -5.246e-02, -1.778e-01, 1.307e-01, -5.384e-02, 2.766e-02, -2.498e-02, 1.321e-01, -1.912e-02, -2.638e-02, -6.578e-02)); + r += mul(s0_4, M4(5.700e-02, -1.475e-02, 3.855e-02, -4.138e-02, 1.441e-02, 3.087e-01, -4.294e-03, 3.341e-02, -2.105e-02, -1.786e-01, 1.411e-01, 9.892e-02, 2.347e-01, 2.646e-03, 1.039e-01, -3.054e-02)); + r += mul(s0_5, M4(3.613e-02, 3.202e-02, 6.025e-02, -2.920e-03, -2.235e-02, 2.182e-02, -2.037e-03, -5.479e-02, -7.034e-02, -2.745e-01, -1.952e-01, -1.087e-01, -2.984e-02, 1.009e-01, -4.008e-02, -8.082e-02)); + r += mul(s0_6, M4(-4.063e-02, -2.746e-02, -1.033e-02, 1.731e-02, 2.938e-01, 3.660e-02, 4.929e-02, -3.881e-02, 5.079e-02, -3.675e-02, 1.402e-02, 7.380e-02, 3.691e-02, 1.402e-02, 5.604e-02, 3.162e-02)); + r += mul(s0_7, M4(9.007e-02, 7.977e-03, 3.092e-02, 1.681e-02, 1.467e-01, 9.255e-02, 2.936e-02, -1.374e-01, 2.972e-01, 7.402e-03, -2.492e-02, -5.690e-02, -1.368e-01, 5.772e-02, 9.572e-02, 3.849e-02)); + r += mul(s0_8, M4(4.821e-02, -9.248e-02, -2.006e-02, -5.064e-02, 7.278e-02, 2.933e-02, -3.106e-03, -5.832e-02, -4.986e-02, -1.631e-01, -1.189e-01, 6.368e-02, -5.563e-02, 1.126e-01, 1.435e-02, 2.510e-02)); + r += mul(s1_0, M4(-5.301e-02, -1.038e-01, -4.458e-02, 2.034e-02, -3.081e-02, 2.101e-02, -5.794e-02, 8.038e-02, -4.012e-02, -3.779e-02, 1.219e-02, 5.236e-03, 1.165e-01, 1.454e-03, 3.993e-03, -2.968e-03)); + r += mul(s1_1, M4(-7.366e-02, -1.320e-01, -7.431e-02, 6.910e-03, -9.079e-02, -1.094e-02, 5.943e-02, -1.248e-02, -7.584e-03, 3.828e-02, 1.393e-02, -7.314e-02, -3.506e-02, -8.241e-03, 7.150e-02, -3.944e-02)); + r += mul(s1_2, M4(4.268e-02, 8.010e-02, -2.553e-01, -3.435e-03, -3.556e-02, -1.023e-01, -5.027e-02, 4.978e-03, 3.110e-02, 6.913e-02, -3.337e-03, 2.373e-03, 1.011e-01, -1.089e-03, 1.063e-02, -3.405e-02)); + r += mul(s1_3, M4(-3.829e-02, -1.711e-02, 2.150e-02, 4.636e-02, -8.814e-02, -1.242e-01, 7.370e-03, -7.349e-02, 6.136e-02, 9.069e-03, 2.292e-02, -6.553e-03, -3.025e-02, 3.977e-02, -2.112e-02, -7.195e-02)); + r += mul(s1_4, M4(-1.946e-01, -6.032e-02, 1.162e-02, -1.805e-01, -1.251e-02, 2.560e-01, 2.557e-02, -2.600e-02, -6.459e-02, -1.665e-01, 2.803e-02, 2.584e-01, -1.652e-01, -8.344e-02, -2.681e-02, 5.222e-02)); + r += mul(s1_5, M4(5.220e-02, -9.697e-03, -5.502e-02, 3.250e-02, -5.076e-02, 1.502e-02, -2.256e-02, 2.525e-02, 1.060e-01, -3.222e-02, 1.589e-02, -8.002e-02, -9.389e-02, -5.412e-02, -6.140e-02, -1.801e-02)); + r += mul(s1_6, M4(2.848e-02, 2.843e-03, 1.340e-02, -1.990e-02, 8.199e-02, 1.340e-02, -2.848e-02, 4.275e-04, -5.901e-03, 3.337e-02, -3.732e-03, 1.351e-02, 2.862e-02, -3.562e-02, 5.545e-02, 4.850e-02)); + r += mul(s1_7, M4(6.224e-02, 5.282e-02, 8.999e-03, 7.328e-02, 1.065e-01, -1.930e-02, 6.903e-04, -4.263e-03, -9.940e-02, 3.987e-02, -6.854e-02, 1.161e-02, -2.139e-01, 5.007e-02, 1.676e-02, 5.381e-02)); + r += mul(s1_8, M4(3.104e-02, -7.188e-02, -8.811e-02, -8.890e-02, -1.515e-02, 3.806e-02, 2.338e-02, 7.566e-02, 6.664e-02, -1.111e-01, -5.906e-02, 7.995e-02, 5.745e-02, 2.909e-02, 4.472e-02, 2.643e-02)); + r += mul(s2_0, M4(1.828e-01, 7.161e-02, -5.382e-02, 8.720e-02, 2.770e-01, 3.190e-01, 1.109e-02, 1.240e-01, 8.591e-02, 3.506e-03, -1.773e-02, -1.598e-02, -3.199e-02, 7.221e-02, 2.003e-02, -2.213e-02)); + r += mul(s2_1, M4(-1.358e-01, 1.293e-01, -7.900e-02, 3.686e-02, 4.384e-02, 1.363e-01, -4.337e-03, 8.852e-02, 9.720e-02, 7.495e-02, 4.065e-02, -2.138e-01, -8.145e-02, -1.186e-02, -1.101e-01, 1.461e-02)); + r += mul(s2_2, M4(-1.170e-01, -1.441e-01, -1.500e-01, -4.904e-02, -1.960e-02, 3.165e-02, 8.069e-02, 4.081e-02, -1.704e-01, 2.608e-02, -1.044e-01, -6.156e-02, 1.214e-03, 8.534e-02, -3.623e-02, 2.545e-02)); + r += mul(s2_3, M4(2.213e-01, -1.509e-01, -1.883e-01, 1.552e-01, 2.009e-01, 1.419e-01, 8.180e-02, 2.316e-03, -7.246e-02, 2.027e-01, 6.280e-02, -3.107e-02, -7.235e-02, -1.269e-03, -7.734e-02, 1.197e-01)); + r += mul(s2_4, M4(7.125e-02, 1.309e-02, -9.378e-02, -2.830e-02, 8.757e-02, 2.510e-01, 3.676e-02, -7.276e-02, -1.265e-01, -1.840e-01, -2.401e-01, -2.073e-01, -1.402e-01, 2.373e-01, -1.422e-01, 4.767e-02)); + r += mul(s2_5, M4(7.518e-03, 1.447e-01, 3.432e-03, 7.814e-03, 5.761e-02, 7.782e-02, 9.553e-02, 7.358e-02, -1.961e-01, 1.566e-01, 2.421e-02, -6.521e-02, -1.866e-01, 2.626e-02, -1.451e-01, 9.506e-02)); + r += mul(s2_6, M4(-1.379e-02, -2.517e-02, -8.003e-02, -3.300e-02, 4.645e-02, 8.662e-02, -1.803e-04, 1.413e-02, 9.272e-02, 1.265e-01, 8.964e-02, 2.082e-02, -6.214e-02, -4.795e-02, -6.786e-02, -6.251e-02)); + r += mul(s2_7, M4(2.703e-02, 4.670e-02, -9.434e-04, -1.845e-02, -3.249e-02, -4.444e-02, 3.395e-02, -4.206e-03, 1.843e-01, -6.523e-02, 2.182e-02, 6.783e-03, -6.421e-02, 8.621e-02, 1.154e-02, -6.480e-02)); + r += mul(s2_8, M4(-1.291e-02, -4.946e-02, -5.154e-02, -1.505e-03, -1.007e-01, 7.686e-02, 1.823e-02, 4.207e-02, -4.411e-02, 6.437e-02, 2.013e-02, -1.028e-01, -5.362e-02, 1.166e-01, 4.791e-02, 1.842e-02)); + r += mul(s3_0, M4(-1.644e-01, -9.269e-02, -3.241e-02, 4.436e-02, 4.739e-02, -1.260e-02, -1.015e-02, 4.152e-02, 3.481e-02, -6.795e-03, 6.743e-02, 3.118e-02, 1.704e-01, 2.709e-02, 7.192e-02, 1.166e-02)); + r += mul(s3_1, M4(-1.415e-01, -1.133e-01, -6.851e-02, 9.254e-03, 9.259e-02, 5.742e-02, -1.557e-01, 1.312e-01, 8.420e-02, 5.161e-02, -3.297e-02, -3.678e-02, 3.088e-02, 1.418e-01, -3.458e-02, 7.107e-02)); + r += mul(s3_2, M4(1.333e-02, -4.073e-02, -6.169e-02, -1.772e-02, -1.517e-02, 7.551e-02, -1.725e-02, -4.259e-02, 8.651e-03, -4.193e-02, -9.653e-03, 2.440e-02, 6.045e-02, 2.762e-03, 2.259e-02, -2.915e-02)); + r += mul(s3_3, M4(2.404e-03, -8.257e-02, -2.909e-03, 4.371e-02, -1.488e-02, -1.551e-02, 1.178e-02, -2.847e-02, -7.495e-02, -1.241e-02, -4.033e-02, -5.520e-02, 7.617e-02, -7.622e-02, 9.625e-03, -4.128e-02)); + r += mul(s3_4, M4(3.427e-03, 2.843e-01, -5.127e-02, -1.121e-01, -7.183e-02, -9.594e-02, -5.560e-02, -2.600e-01, -1.983e-02, -3.479e-02, -8.615e-02, -1.030e-01, 1.907e-02, 3.300e-02, -3.690e-02, -2.423e-02)); + r += mul(s3_5, M4(7.713e-02, -2.037e-02, 6.728e-04, -1.092e-01, -1.090e-01, 7.324e-02, 1.815e-02, 1.934e-01, 5.432e-04, 8.149e-03, 3.377e-02, 1.305e-02, -1.524e-02, -6.798e-02, 6.293e-02, 3.912e-03)); + r += mul(s3_6, M4(3.055e-02, -1.343e-02, -3.071e-02, -1.147e-02, 1.201e-01, -2.132e-02, 3.523e-02, -1.675e-01, -1.808e-02, 9.744e-02, 4.147e-02, -4.336e-02, 5.417e-02, -3.875e-02, -1.704e-02, -1.180e-01)); + r += mul(s3_7, M4(-4.933e-02, 1.147e-01, 2.018e-02, 6.987e-02, 1.260e-01, -1.100e-01, -1.945e-02, -1.754e-02, 3.246e-02, 6.750e-02, -3.140e-02, -2.553e-02, 1.028e-01, 5.734e-03, 7.628e-02, 8.382e-02)); + r += mul(s3_8, M4(-1.928e-02, 1.842e-02, 8.692e-02, 5.390e-02, 4.568e-02, -4.408e-02, 4.308e-02, -1.523e-01, -2.739e-02, -1.093e-02, 4.826e-02, -4.495e-02, 9.926e-02, 5.030e-02, 5.833e-02, -2.268e-02)); + r += mul(s4_0, M4(6.914e-02, 6.811e-02, 3.761e-02, -5.384e-02, -3.618e-03, 1.077e-02, 1.239e-02, 7.690e-02, -6.556e-02, 7.989e-02, -7.325e-03, -6.799e-02, 2.380e-02, 6.660e-02, 5.495e-02, -4.745e-02)); + r += mul(s4_1, M4(4.836e-03, 2.223e-01, 6.137e-02, 1.015e-01, 1.798e-02, -5.505e-03, -8.869e-03, -7.210e-02, -1.624e-01, -6.293e-02, -2.680e-02, 9.346e-02, 1.380e-01, -8.661e-02, 4.515e-02, 4.240e-02)); + r += mul(s4_2, M4(-6.804e-02, 1.127e-02, -2.572e-02, 3.496e-02, -8.525e-03, -6.274e-02, -3.604e-02, -4.725e-02, -5.312e-02, -8.806e-02, -3.529e-02, -3.447e-02, 4.698e-02, -4.421e-04, 3.393e-02, -8.021e-03)); + r += mul(s4_3, M4(-6.061e-02, 4.285e-02, -1.218e-01, 6.028e-02, 1.878e-02, -8.686e-03, -2.590e-02, 3.975e-02, 7.884e-02, 4.833e-02, -3.334e-02, 2.402e-02, 7.826e-02, 5.420e-02, -5.012e-02, 4.519e-02)); + r += mul(s4_4, M4(-1.290e-01, 8.998e-03, -1.931e-01, 1.198e-01, -3.256e-02, 1.311e-01, -1.032e-01, 1.870e-02, -3.907e-02, -1.459e-01, -4.729e-02, 1.109e-01, -3.523e-02, -1.051e-01, -2.727e-01, -1.157e-02)); + r += mul(s4_5, M4(-7.794e-02, 4.458e-02, 6.071e-02, 9.567e-02, 4.364e-03, -1.410e-01, -1.162e-01, 5.534e-03, 4.978e-02, 1.119e-02, 4.228e-02, 1.397e-01, -3.146e-02, 3.212e-03, -7.790e-02, 4.042e-02)); + r += mul(s4_6, M4(2.847e-02, 1.136e-01, -1.699e-02, 2.081e-02, 1.397e-01, -9.842e-02, -8.220e-03, -3.794e-02, 5.221e-03, -3.120e-02, -4.743e-02, -2.860e-02, -8.238e-02, 2.748e-02, 2.975e-02, 4.382e-02)); + r += mul(s4_7, M4(-5.192e-02, -3.529e-02, -6.300e-03, -3.442e-02, 4.006e-03, 2.780e-02, 3.457e-02, -8.588e-02, -3.943e-02, 1.708e-02, -7.220e-02, -4.744e-02, -1.007e-01, -5.533e-02, 9.120e-02, 1.251e-01)); + r += mul(s4_8, M4(-8.423e-02, 1.019e-02, 1.364e-02, 6.687e-02, -1.504e-03, 3.593e-02, 4.618e-02, 5.315e-03, -8.243e-04, 7.704e-02, -1.088e-02, -6.900e-02, 3.929e-02, 3.804e-03, -6.372e-02, 7.549e-02)); + r += mul(s5_0, M4(6.951e-02, 1.242e-01, 1.260e-02, -6.354e-02, -1.712e-01, -1.040e-01, 6.037e-02, -3.848e-02, 1.165e-01, 4.112e-02, -4.670e-02, -1.185e-01, -2.004e-02, 3.090e-03, 2.409e-02, -1.401e-01)); + r += mul(s5_1, M4(9.496e-02, -2.072e-02, -1.813e-02, -2.687e-02, -6.891e-02, 3.486e-02, -6.161e-02, 1.796e-01, 1.867e-02, -4.973e-02, -4.871e-02, -4.193e-03, 5.222e-02, 6.034e-02, 6.994e-02, -1.049e-01)); + r += mul(s5_2, M4(1.804e-02, 9.443e-03, 7.466e-03, 5.868e-03, -2.372e-02, -4.161e-02, -2.034e-01, -7.148e-02, 1.966e-02, -9.410e-02, -4.377e-02, 1.338e-02, -3.065e-02, 2.147e-02, -8.088e-02, 3.253e-02)); + r += mul(s5_3, M4(-2.028e-01, -1.635e-02, 9.419e-03, 1.403e-01, 2.690e-02, 6.186e-03, -2.235e-04, 1.512e-01, 5.191e-03, 7.650e-02, -1.496e-02, 2.694e-02, -1.087e-01, 3.270e-02, -4.237e-02, -1.219e-01)); + r += mul(s5_4, M4(-1.851e-01, -1.129e-01, 1.542e-01, 3.088e-01, -1.642e-02, 8.798e-02, -2.252e-01, -5.032e-03, 3.164e-02, -3.783e-02, -1.338e-01, 1.552e-01, 3.572e-02, 1.806e-01, -8.001e-02, -3.658e-01)); + r += mul(s5_5, M4(-5.363e-02, 2.284e-03, 5.092e-03, 6.959e-02, 1.681e-01, -1.139e-01, -7.353e-02, 3.406e-02, 5.790e-02, -1.616e-01, -7.873e-03, 1.148e-01, -1.011e-01, 4.432e-02, -1.495e-02, 2.734e-02)); + r += mul(s5_6, M4(1.455e-03, -2.003e-02, -4.008e-02, -4.248e-02, -9.500e-04, 2.532e-02, 2.507e-02, -4.032e-02, -1.464e-02, -5.039e-02, -9.448e-02, -7.201e-02, -2.984e-02, -7.626e-02, -3.012e-02, 8.289e-02)); + r += mul(s5_7, M4(-4.532e-02, 5.196e-03, 9.819e-02, -4.660e-02, -1.468e-01, 2.021e-01, -1.945e-02, -5.247e-02, 2.540e-02, -6.217e-03, 1.489e-02, -7.293e-02, 5.411e-04, 2.687e-03, 4.745e-02, 1.391e-01)); + r += mul(s5_8, M4(-4.290e-02, 8.605e-02, 1.447e-02, 1.077e-01, 1.379e-02, 2.592e-02, -3.428e-02, -4.561e-02, 6.433e-03, -3.262e-02, 2.071e-02, -2.374e-02, -1.034e-02, -8.245e-02, -9.518e-02, -8.323e-02)); + r += mul(s6_0, M4(8.349e-02, -9.421e-02, 5.134e-02, -1.707e-02, -8.791e-03, -5.482e-03, 3.633e-02, -4.853e-02, -1.052e-01, -3.966e-02, 1.599e-02, 2.633e-03, -4.443e-01, 1.229e-01, -1.745e-02, -1.713e-01)); + r += mul(s6_1, M4(-6.439e-03, -6.925e-02, -9.048e-02, -1.107e-01, 1.839e-02, -1.161e-01, 1.470e-02, 4.258e-02, 4.419e-02, 7.738e-02, -2.092e-02, 2.483e-02, -3.099e-01, -2.297e-01, -1.084e-02, 8.280e-02)); + r += mul(s6_2, M4(9.559e-03, -4.827e-02, -9.305e-02, -6.416e-02, -4.962e-02, -3.425e-02, 2.045e-02, 4.995e-02, -5.017e-02, 8.078e-02, 6.253e-03, -7.790e-02, -1.252e-03, -4.170e-02, -5.411e-02, 6.629e-02)); + r += mul(s6_3, M4(7.144e-02, 6.963e-02, -2.132e-02, 3.176e-02, -1.230e-01, 5.421e-02, 1.039e-02, 1.929e-02, -8.516e-02, 7.294e-02, -6.347e-02, 8.635e-03, -4.146e-01, -1.885e-02, 5.569e-03, 4.442e-01)); + r += mul(s6_4, M4(-9.567e-02, -1.112e-01, 1.220e-02, 2.581e-02, 8.993e-02, -1.958e-01, 6.408e-02, -2.905e-02, 2.708e-01, 8.480e-02, -7.526e-02, -1.466e-01, -2.280e-01, -6.046e-01, -1.809e-01, 2.190e-01)); + r += mul(s6_5, M4(-5.326e-02, 3.185e-02, -2.826e-02, 9.173e-02, -2.587e-02, -1.031e-01, -5.476e-04, -2.940e-02, -2.556e-02, -5.077e-02, 1.519e-01, 8.880e-03, -2.770e-01, 5.091e-01, 4.729e-02, 4.104e-01)); + r += mul(s6_6, M4(1.048e-01, 2.843e-02, 4.574e-03, -7.368e-02, 4.516e-02, 5.945e-02, 8.593e-03, -1.984e-02, -1.941e-01, 2.632e-03, -4.851e-02, -1.535e-02, -2.538e-01, 3.488e-01, 1.115e-01, 2.634e-01)); + r += mul(s6_7, M4(1.284e-02, 5.093e-02, -3.114e-02, -1.141e-02, 5.259e-02, 1.410e-01, 7.318e-02, -6.865e-03, 1.326e-02, -6.977e-02, 3.681e-02, -2.936e-02, -4.111e-01, -3.874e-02, -1.568e-01, 1.240e-01)); + r += mul(s6_8, M4(5.288e-03, -8.600e-02, 2.698e-02, -1.037e-01, 2.803e-02, 3.289e-02, 8.457e-02, -3.984e-02, -2.265e-02, -4.761e-02, -7.385e-03, -1.757e-02, -1.316e-01, 3.142e-01, -1.175e-01, 1.735e-01)); + r += mul(s7_0, M4(7.491e-02, -1.450e-01, 8.576e-02, 5.617e-02, -3.344e-02, 2.116e-02, -1.389e-03, -3.231e-02, -1.292e-02, 2.020e-02, -3.643e-02, -1.820e-02, -3.722e-02, -1.588e-02, -2.642e-02, 1.483e-02)); + r += mul(s7_1, M4(-3.321e-03, -1.353e-01, -1.682e-01, -5.332e-02, 1.267e-02, -9.879e-02, -7.646e-02, 1.006e-01, -5.750e-02, 7.676e-02, 2.614e-03, 3.882e-04, -1.424e-02, -3.769e-02, 2.924e-02, 2.567e-02)); + r += mul(s7_2, M4(-1.003e-01, -1.138e-01, 3.058e-02, -3.240e-02, -1.618e-02, -6.702e-04, 1.148e-01, 8.773e-02, -9.451e-02, -1.554e-02, -7.062e-02, 5.267e-03, 8.207e-03, 3.608e-03, 2.141e-02, -3.004e-03)); + r += mul(s7_3, M4(5.165e-02, -1.245e-01, -9.583e-02, -5.102e-02, 1.679e-04, 7.018e-02, -3.064e-02, 4.600e-02, 5.836e-02, -2.394e-03, -2.430e-02, -1.958e-02, -2.138e-02, 2.727e-02, 2.183e-02, 8.838e-02)); + r += mul(s7_4, M4(-7.226e-02, 2.005e-03, 1.071e-01, 6.017e-02, -6.424e-02, -1.035e-01, 2.689e-02, 2.654e-02, 3.096e-01, -1.818e-02, -9.951e-02, -2.597e-02, -1.332e-03, -8.858e-02, 3.340e-03, -1.278e-02)); + r += mul(s7_5, M4(-2.447e-02, -8.722e-02, -2.832e-02, 5.315e-02, 1.127e-01, -5.947e-02, 1.750e-01, 9.817e-02, -7.845e-02, -1.398e-01, -7.258e-02, 1.423e-02, 2.023e-02, -6.150e-03, -3.309e-02, 1.687e-02)); + r += mul(s7_6, M4(4.939e-02, -4.202e-02, 6.098e-03, -9.119e-02, -5.623e-02, 1.111e-01, 3.059e-02, -1.916e-01, -8.250e-02, -2.965e-02, -3.353e-02, 3.289e-02, 2.014e-02, 8.089e-03, 3.355e-02, 1.456e-02)); + r += mul(s7_7, M4(8.793e-02, 4.620e-02, 1.772e-02, -3.698e-02, -1.754e-01, 2.195e-02, -5.225e-02, -8.223e-02, 5.500e-02, -3.353e-02, 6.219e-02, 7.981e-02, 5.135e-03, 2.492e-02, 9.620e-03, 7.272e-03)); + r += mul(s7_8, M4(4.143e-02, -1.326e-01, -2.472e-02, -2.514e-02, -4.798e-02, 5.893e-02, 2.183e-02, 1.762e-02, -1.149e-01, -1.014e-01, -1.063e-01, -8.116e-02, -1.596e-02, -1.941e-02, 3.506e-03, -1.465e-02)); + r += V4(1.946e-02, -2.094e-02, 4.776e-03, -2.408e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.395e-02, -9.589e-02, -4.729e-02, 2.096e-02, -1.275e-02, -3.719e-02, -2.352e-02, -4.158e-02, -3.476e-02, -5.034e-02, -2.781e-03, -3.350e-02, 5.401e-02, -1.746e-02, 7.324e-02, -1.488e-01)); + r += mul(s0_1, M4(-5.659e-02, -3.776e-02, 5.233e-02, 1.225e-02, 1.488e-02, -9.189e-03, -6.037e-02, -1.054e-01, 2.389e-02, 8.954e-03, 3.286e-02, -2.802e-02, 2.526e-03, -5.204e-02, -9.638e-02, -7.811e-02)); + r += mul(s0_2, M4(4.380e-02, 2.243e-02, -6.518e-02, 1.486e-02, 1.977e-02, 1.358e-02, -7.893e-03, 2.714e-02, 1.557e-01, 1.050e-02, -2.645e-02, 5.235e-04, 6.345e-02, -5.627e-02, -6.260e-02, 6.337e-03)); + r += mul(s0_3, M4(2.202e-02, 8.299e-02, -7.433e-03, -1.090e-02, 1.698e-01, -1.941e-01, -2.176e-02, 1.132e-01, -5.596e-02, -2.321e-02, -4.190e-03, 3.170e-02, 1.381e-02, 3.042e-02, 7.087e-03, -1.939e-01)); + r += mul(s0_4, M4(6.661e-02, -1.181e-01, -6.948e-02, -9.609e-02, 8.766e-02, -9.867e-02, 2.585e-02, 2.689e-02, -1.971e-01, -5.699e-03, 2.427e-01, -2.323e-01, -1.815e-01, -2.223e-01, -2.233e-01, 9.791e-06)); + r += mul(s0_5, M4(1.100e-02, 6.502e-02, -8.512e-02, 2.930e-03, -4.183e-02, 2.569e-02, 1.240e-01, -2.598e-03, 6.811e-02, 2.417e-02, -7.222e-02, -1.529e-02, -1.149e-01, 1.964e-02, 1.848e-02, -8.152e-03)); + r += mul(s0_6, M4(-2.243e-02, -1.350e-01, -3.474e-02, -1.115e-01, 6.877e-02, -1.045e-01, 7.582e-03, 3.097e-02, -4.251e-02, -2.396e-02, 7.987e-02, 4.079e-02, 7.451e-03, 1.029e-02, -4.421e-02, -1.016e-01)); + r += mul(s0_7, M4(-6.681e-02, -1.440e-01, 1.375e-02, -6.566e-02, -5.482e-03, -2.256e-01, -4.365e-02, -6.433e-02, -5.602e-02, 1.195e-01, 1.284e-01, -6.954e-02, -4.635e-02, 1.018e-01, 4.243e-02, 7.804e-02)); + r += mul(s0_8, M4(-2.005e-02, -9.452e-02, 1.240e-02, -2.132e-02, -4.244e-02, -5.110e-02, -2.936e-02, 1.298e-02, 1.999e-01, 9.645e-02, 1.696e-01, 7.761e-02, -6.641e-02, -7.953e-02, -5.310e-02, 2.967e-02)); + r += mul(s1_0, M4(-1.961e-02, -3.548e-02, -6.879e-03, 4.221e-02, 4.586e-02, 7.381e-02, 2.304e-02, -5.193e-02, 3.275e-02, -3.699e-02, -5.471e-03, -5.813e-02, 1.009e-01, -3.249e-02, 4.309e-02, -2.740e-02)); + r += mul(s1_1, M4(-3.215e-02, -1.087e-02, -3.967e-02, 7.976e-02, -1.784e-02, -1.108e-03, -2.064e-02, 1.705e-02, 1.204e-02, -4.359e-03, -3.299e-02, -3.450e-02, -9.809e-02, 4.950e-02, -8.091e-02, -8.869e-02)); + r += mul(s1_2, M4(5.335e-02, 8.173e-02, 7.282e-02, -8.374e-02, 7.212e-02, 4.329e-03, 4.667e-02, 2.098e-03, 2.786e-02, 3.365e-02, -1.238e-02, -4.857e-02, 4.691e-02, 8.225e-03, -8.823e-03, 5.750e-02)); + r += mul(s1_3, M4(5.744e-02, 4.194e-02, -3.313e-02, 6.049e-03, 1.886e-01, -1.162e-01, -6.582e-02, 1.155e-01, -4.434e-02, 1.850e-02, -6.507e-02, 2.807e-03, 2.068e-02, 7.649e-02, 1.290e-02, -1.866e-01)); + r += mul(s1_4, M4(-9.720e-02, 6.204e-02, -1.551e-01, 7.558e-03, 1.356e-01, -8.411e-02, 5.918e-02, 4.090e-02, -1.603e-01, 4.126e-02, 3.591e-01, -2.834e-01, -9.463e-02, -1.566e-01, -2.453e-01, 6.982e-02)); + r += mul(s1_5, M4(1.202e-01, 3.790e-02, -1.541e-01, 3.033e-02, 2.230e-02, -6.520e-02, 7.259e-02, 1.530e-02, -1.810e-01, -3.178e-02, -5.645e-02, -9.320e-02, -4.207e-02, 8.256e-02, -4.344e-03, 3.775e-02)); + r += mul(s1_6, M4(-1.816e-02, -1.946e-01, -7.062e-02, -2.821e-02, 3.543e-02, 9.863e-03, -3.540e-02, -3.527e-02, 2.861e-02, 2.633e-03, 2.951e-02, 2.566e-02, -2.643e-02, 8.349e-02, -2.609e-02, 5.036e-05)); + r += mul(s1_7, M4(-6.358e-02, 9.779e-02, -1.806e-02, -3.364e-03, -3.616e-02, -1.403e-01, -5.088e-02, -5.711e-02, 1.034e-01, 2.291e-01, -2.781e-02, -1.797e-01, -3.687e-02, 1.431e-01, -9.658e-04, 9.394e-02)); + r += mul(s1_8, M4(-2.238e-02, -1.364e-01, 7.675e-02, -1.033e-01, -1.334e-02, 5.042e-03, 2.727e-02, -2.448e-02, 4.085e-03, 9.684e-02, 4.109e-02, 3.931e-02, -6.314e-04, -3.643e-02, -5.133e-02, 5.306e-02)); + r += mul(s2_0, M4(1.440e-01, 1.221e-02, -3.043e-02, -3.155e-02, 7.192e-03, 1.139e-01, -7.501e-02, 1.577e-02, -5.249e-02, 1.461e-02, -1.227e-01, -4.077e-02, -9.838e-02, -1.887e-03, 1.208e-02, 3.039e-02)); + r += mul(s2_1, M4(-3.265e-01, 7.092e-02, -7.443e-02, -2.228e-01, 6.019e-04, -4.634e-03, 3.208e-02, -8.908e-02, -5.714e-02, 6.949e-02, -9.750e-02, 9.873e-02, -1.033e-01, -7.925e-02, 2.384e-02, -8.670e-03)); + r += mul(s2_2, M4(1.060e-01, -5.553e-03, 8.267e-02, 7.304e-02, -1.123e-01, 1.227e-02, -2.054e-02, -6.947e-02, 9.932e-04, 9.875e-04, 2.093e-02, 8.527e-02, 1.137e-02, -4.719e-02, -3.301e-02, -7.840e-02)); + r += mul(s2_3, M4(-1.497e-01, -1.588e-01, -2.209e-02, -1.475e-01, 1.243e-01, -9.028e-02, -3.921e-02, 1.594e-03, 6.488e-03, 8.669e-02, -7.208e-02, 2.819e-01, -6.688e-02, 3.715e-02, 3.647e-02, 9.818e-03)); + r += mul(s2_4, M4(2.873e-01, -1.763e-02, 6.469e-02, 2.568e-01, -1.292e-01, 5.614e-02, -6.700e-02, -3.543e-01, -2.074e-01, 7.342e-02, -1.800e-01, -2.728e-02, -3.366e-02, -7.661e-02, -1.497e-01, 5.849e-02)); + r += mul(s2_5, M4(8.426e-02, -7.830e-02, -5.613e-02, -2.017e-02, -4.353e-02, -1.004e-02, -1.765e-02, 4.949e-02, 1.320e-02, -4.037e-02, 2.652e-02, -9.777e-02, 2.225e-02, -7.359e-02, 7.803e-03, -3.173e-02)); + r += mul(s2_6, M4(-3.595e-04, 2.186e-01, 4.415e-02, 9.376e-03, 1.158e-02, -2.255e-02, 4.156e-02, -4.312e-02, -1.507e-01, -7.404e-02, -1.392e-02, -5.161e-02, -4.031e-02, -5.684e-02, -7.447e-03, -3.963e-02)); + r += mul(s2_7, M4(1.258e-02, 8.734e-03, -4.634e-02, 1.838e-02, -4.625e-02, -2.428e-02, 8.228e-02, -3.899e-02, 1.047e-02, 6.135e-02, 1.386e-01, 5.174e-02, -7.872e-02, -6.136e-02, 6.400e-02, 5.132e-02)); + r += mul(s2_8, M4(1.466e-02, -2.522e-03, -4.948e-02, 8.298e-02, 6.032e-02, 2.008e-02, -1.315e-02, -9.797e-02, -2.315e-03, -6.006e-02, -6.912e-03, -7.048e-02, 3.588e-02, 1.049e-03, 5.280e-02, -8.946e-02)); + r += mul(s3_0, M4(5.880e-03, 2.187e-02, 9.695e-02, 1.129e-02, -2.751e-02, 7.634e-02, 3.715e-02, 1.509e-01, -3.497e-02, 6.772e-02, 1.171e-02, 5.376e-02, 2.103e-02, 3.223e-03, 9.906e-02, 1.251e-03)); + r += mul(s3_1, M4(1.289e-01, -8.753e-02, -1.653e-02, 1.067e-02, -4.931e-02, 1.911e-02, -4.758e-02, 8.006e-02, 1.592e-02, 1.208e-01, -1.515e-02, 2.233e-02, 5.549e-02, 7.252e-02, 2.836e-02, -1.307e-01)); + r += mul(s3_2, M4(3.939e-02, 4.301e-02, 3.538e-02, -1.421e-03, -1.255e-01, -9.881e-03, -1.600e-02, 1.885e-03, -1.582e-02, -1.391e-02, 1.638e-02, 2.772e-02, -3.851e-03, -4.136e-02, -8.607e-02, -4.196e-02)); + r += mul(s3_3, M4(-5.763e-03, -6.492e-02, -1.938e-02, -5.154e-02, 3.994e-02, -2.938e-02, -3.035e-02, 1.098e-02, 4.031e-02, -7.904e-02, 7.387e-04, -5.600e-02, 2.163e-01, 7.884e-02, 4.752e-02, -1.372e-01)); + r += mul(s3_4, M4(-4.021e-02, -1.533e-01, -1.095e-01, -1.224e-02, 5.862e-02, 2.303e-01, -8.903e-03, -5.954e-03, -2.905e-03, -2.389e-02, -1.345e-01, -1.546e-01, -8.585e-02, 5.629e-02, -1.464e-01, 9.047e-02)); + r += mul(s3_5, M4(-1.371e-01, 3.602e-02, -4.653e-02, 3.862e-03, 9.918e-02, 6.156e-03, -9.347e-02, -5.303e-02, 2.418e-02, -2.661e-02, -5.576e-02, -5.462e-02, -3.618e-02, -4.450e-02, -2.666e-02, -3.199e-02)); + r += mul(s3_6, M4(7.845e-02, 2.929e-02, -3.866e-02, -1.019e-01, 5.534e-02, -1.504e-02, -1.424e-03, 3.384e-02, 1.208e-02, -4.149e-02, 2.009e-02, -3.681e-02, 5.807e-02, 3.756e-03, 3.506e-03, -1.057e-01)); + r += mul(s3_7, M4(1.580e-03, -9.761e-02, -1.217e-02, -5.899e-02, -1.577e-02, -4.953e-02, 4.743e-02, 1.048e-01, -5.623e-02, 4.937e-02, 5.772e-02, -1.982e-03, 1.069e-01, 3.703e-02, 3.209e-02, 1.242e-01)); + r += mul(s3_8, M4(-7.262e-02, -7.243e-02, -5.480e-02, 2.458e-02, -4.308e-02, -7.542e-04, 4.099e-03, 1.009e-02, -4.056e-02, -3.928e-02, 1.769e-02, 6.411e-03, -4.956e-03, -8.744e-03, 2.739e-02, -7.552e-02)); + r += mul(s4_0, M4(6.766e-02, 1.101e-01, 5.535e-02, -5.821e-02, -4.446e-02, 3.118e-02, -1.659e-02, -3.922e-02, 4.098e-02, 8.866e-02, -5.734e-02, 5.606e-02, -3.286e-02, -2.125e-02, -3.408e-02, 8.307e-04)); + r += mul(s4_1, M4(3.255e-02, -5.237e-02, 2.893e-02, -3.657e-02, 1.381e-02, -1.508e-02, 6.604e-03, -8.554e-02, -5.511e-02, -1.482e-02, 7.612e-03, 1.046e-01, 1.003e-01, 1.696e-03, 6.910e-02, -9.445e-03)); + r += mul(s4_2, M4(-3.187e-02, -3.224e-02, 1.335e-01, 7.862e-03, -1.605e-02, -8.148e-02, 1.674e-02, 5.493e-02, 7.531e-02, -6.184e-03, -7.809e-02, -2.515e-02, -8.351e-02, -3.192e-02, -3.464e-02, -4.521e-02)); + r += mul(s4_3, M4(2.831e-01, -1.631e-01, 1.548e-01, -1.239e-01, -5.712e-02, -7.808e-02, 2.809e-02, -6.122e-03, -3.826e-02, 3.327e-02, 2.472e-02, -1.333e-01, -5.070e-02, 1.184e-01, 4.815e-02, -1.672e-01)); + r += mul(s4_4, M4(3.058e-01, -4.137e-03, 2.509e-01, -4.459e-02, -5.592e-02, 1.314e-01, -3.724e-02, -1.911e-01, 3.084e-02, 2.114e-02, 6.667e-02, -3.520e-02, 1.591e-01, -3.472e-03, -7.565e-02, -2.143e-03)); + r += mul(s4_5, M4(7.745e-02, -6.270e-02, 2.258e-02, 3.454e-02, 9.153e-03, 3.901e-02, 1.860e-02, 1.441e-02, 1.151e-01, -7.555e-02, -6.658e-02, 2.956e-02, 1.446e-01, 7.539e-02, 5.197e-02, 4.735e-03)); + r += mul(s4_6, M4(1.182e-01, 1.898e-02, 2.580e-02, -9.786e-02, -3.308e-03, -1.419e-02, 2.067e-02, -3.452e-02, 9.884e-02, 2.306e-02, -3.982e-02, 9.304e-02, -2.952e-02, 5.388e-02, -2.612e-02, -1.135e-01)); + r += mul(s4_7, M4(6.055e-02, 8.741e-02, 7.538e-02, -7.519e-02, 6.237e-02, -1.765e-01, -7.180e-02, 7.595e-02, -3.554e-02, 1.320e-01, 6.164e-02, 1.003e-01, 4.490e-02, 9.272e-02, 1.068e-01, 1.002e-01)); + r += mul(s4_8, M4(4.963e-02, -2.594e-02, 2.272e-02, 5.546e-02, -3.866e-02, -1.926e-02, 2.816e-03, 6.527e-02, 4.404e-02, 1.315e-02, -2.071e-02, -7.344e-02, -2.113e-02, 3.652e-02, -1.171e-02, 2.945e-03)); + r += mul(s5_0, M4(6.229e-02, -3.058e-02, -1.340e-02, 1.391e-02, -1.281e-03, 1.176e-01, 1.312e-01, 6.465e-02, 1.029e-01, 4.557e-03, -5.970e-02, 1.116e-02, -4.103e-02, -1.465e-02, -7.680e-02, -6.180e-02)); + r += mul(s5_1, M4(5.943e-02, -1.416e-01, 3.715e-04, -1.057e-01, -2.718e-02, 8.088e-02, -3.600e-02, 1.384e-01, -9.003e-02, -5.988e-02, 7.010e-02, 7.876e-03, 4.775e-02, 3.654e-02, -2.713e-02, 2.708e-02)); + r += mul(s5_2, M4(-3.837e-02, -1.565e-02, 2.219e-02, -6.472e-02, 3.717e-02, 1.940e-02, 1.092e-01, -3.138e-02, 1.501e-02, 1.286e-02, -1.041e-01, 2.203e-02, 3.194e-02, 9.982e-02, -1.251e-02, 1.355e-02)); + r += mul(s5_3, M4(5.853e-02, 1.831e-01, 2.112e-02, -1.985e-03, -1.118e-01, -1.320e-02, 4.744e-02, -1.625e-01, 4.859e-02, 1.197e-01, 2.712e-02, -5.726e-02, 1.294e-01, -1.522e-01, 1.166e-01, -8.268e-02)); + r += mul(s5_4, M4(4.261e-02, 2.541e-03, 1.922e-01, -5.571e-02, 4.345e-02, 1.164e-01, -1.463e-01, -5.412e-02, -1.351e-02, 6.018e-02, 1.114e-01, -9.633e-02, -6.375e-02, -2.481e-01, -7.674e-02, 1.197e-02)); + r += mul(s5_5, M4(-2.830e-03, 8.239e-02, 5.292e-02, 3.019e-02, 3.622e-02, 5.292e-02, 7.374e-02, 1.421e-01, 1.218e-01, 5.201e-02, -2.951e-02, 2.387e-02, -7.925e-02, 8.075e-02, -1.673e-02, -2.172e-02)); + r += mul(s5_6, M4(3.188e-02, 6.711e-02, -8.440e-03, 3.255e-02, 3.366e-02, -3.691e-02, -2.101e-02, 1.522e-02, 6.729e-02, 8.272e-02, 2.308e-02, -4.969e-03, 3.456e-02, -1.226e-01, 5.143e-03, 5.949e-02)); + r += mul(s5_7, M4(-1.538e-02, 8.052e-02, 3.767e-02, 3.576e-02, 1.192e-02, -1.285e-01, -1.240e-01, -1.155e-01, 1.604e-02, 9.288e-02, 7.225e-02, 1.157e-01, 1.540e-02, -1.841e-01, -4.204e-02, -7.928e-02)); + r += mul(s5_8, M4(2.011e-02, 5.070e-02, -1.360e-02, 1.586e-02, -1.096e-01, -8.943e-03, -2.883e-02, -3.937e-02, 3.098e-02, -4.729e-04, -3.013e-02, -4.197e-02, -4.662e-02, -3.031e-02, -1.028e-04, 2.773e-02)); + r += mul(s6_0, M4(-1.481e-02, 3.680e-03, 1.813e-02, 1.702e-02, 1.819e-03, -4.377e-03, -7.187e-04, -4.898e-02, 3.108e-02, -2.606e-02, -4.699e-02, 7.789e-02, -2.826e-01, 5.431e-02, 1.970e-01, 1.209e-01)); + r += mul(s6_1, M4(6.911e-02, -1.416e-02, -9.693e-02, 4.817e-02, -4.358e-02, 3.826e-02, 4.636e-02, 2.814e-02, 8.591e-02, 1.173e-02, -7.589e-02, -2.467e-03, -1.815e-01, 7.563e-02, 3.060e-02, 3.624e-01)); + r += mul(s6_2, M4(3.458e-02, -3.127e-02, 3.727e-02, 2.375e-02, 2.581e-02, -5.655e-02, 5.606e-02, -4.198e-03, -1.001e-01, 4.610e-02, 1.557e-03, -5.766e-02, -1.539e-01, 6.067e-02, 2.534e-01, -9.922e-02)); + r += mul(s6_3, M4(-2.950e-02, 7.285e-02, 7.015e-03, -1.711e-02, 4.936e-02, 4.560e-02, -4.614e-02, -1.085e-01, -1.144e-01, -1.815e-02, 2.214e-02, 5.016e-02, -4.194e-01, -2.851e-01, 2.156e-01, -2.858e-02)); + r += mul(s6_4, M4(1.187e-01, 8.610e-02, 1.643e-01, 1.513e-01, 6.280e-02, -3.520e-02, -8.953e-02, -1.304e-01, 1.336e-01, -9.314e-03, -4.739e-02, -2.907e-01, -1.902e-01, -5.034e-01, 9.737e-02, 4.044e-01)); + r += mul(s6_5, M4(2.395e-02, 2.296e-02, 3.555e-02, -1.153e-02, 5.003e-02, 4.784e-03, -3.631e-03, 2.028e-02, -1.492e-01, 5.326e-02, -1.009e-01, 2.862e-02, 1.364e-01, -4.536e-01, -3.812e-01, 3.319e-02)); + r += mul(s6_6, M4(3.443e-02, 3.912e-02, -6.775e-03, -1.025e-01, -2.452e-02, -9.198e-02, 3.827e-02, -8.314e-02, 3.334e-02, 3.962e-02, -2.296e-02, 2.559e-02, -3.215e-01, -4.423e-02, 4.716e-02, 3.208e-01)); + r += mul(s6_7, M4(-5.346e-02, 4.934e-02, 5.720e-02, 8.442e-05, -2.611e-02, -1.257e-01, -3.897e-02, 6.115e-03, 1.121e-03, -1.353e-01, -6.367e-02, -8.777e-02, -3.356e-02, 8.508e-01, -6.710e-02, 2.437e-01)); + r += mul(s6_8, M4(-3.279e-02, 4.803e-02, 1.084e-02, -1.469e-03, -1.084e-01, -3.015e-02, -1.432e-02, -2.730e-02, 7.939e-02, 1.648e-01, 5.432e-02, 6.842e-02, -2.190e-01, 4.760e-01, 1.189e-01, -1.566e-01)); + r += mul(s7_0, M4(3.039e-02, -4.042e-02, 3.707e-02, 9.572e-02, -9.552e-02, 5.214e-02, 2.919e-02, 7.078e-02, 6.632e-02, 5.363e-02, 2.971e-02, -1.054e-02, -1.973e-02, -3.739e-02, -8.141e-03, 3.426e-02)); + r += mul(s7_1, M4(1.056e-01, 8.106e-02, -1.423e-02, -1.197e-01, -9.589e-03, 7.807e-02, 1.234e-02, 1.724e-02, -9.390e-02, -6.459e-03, 6.137e-02, 6.781e-03, -2.780e-02, 3.426e-03, -4.210e-02, -2.111e-03)); + r += mul(s7_2, M4(5.906e-02, -6.837e-02, -1.526e-03, 5.797e-02, 6.039e-02, -1.754e-02, 1.022e-01, -1.539e-01, -1.161e-04, -2.746e-02, 1.525e-01, -1.059e-01, 5.706e-03, -1.552e-02, 1.843e-02, 2.712e-02)); + r += mul(s7_3, M4(1.317e-01, 8.557e-02, -3.533e-02, -1.620e-01, 2.243e-01, 5.795e-06, -4.773e-02, -4.165e-02, 1.745e-02, -1.104e-01, -4.065e-02, -7.965e-03, -1.062e-01, 5.844e-02, -1.964e-02, 2.352e-02)); + r += mul(s7_4, M4(3.415e-01, 2.737e-01, 1.364e-01, 3.602e-03, 5.410e-02, 1.907e-02, -6.755e-03, -2.686e-01, -4.820e-02, 1.914e-02, -9.279e-02, -8.144e-02, 3.230e-03, -1.465e-03, 8.159e-02, 6.788e-02)); + r += mul(s7_5, M4(4.674e-02, -1.400e-02, -1.856e-03, -2.987e-02, 8.853e-03, 2.427e-02, -2.821e-02, 8.432e-02, 7.288e-02, 7.845e-03, -6.109e-02, 1.150e-01, 2.873e-02, -5.371e-02, -3.119e-02, -7.531e-03)); + r += mul(s7_6, M4(1.787e-01, 4.830e-02, -1.587e-01, -1.785e-01, -1.709e-02, -1.119e-01, -4.879e-02, -6.821e-02, -1.583e-02, 8.248e-03, -2.813e-02, 4.111e-02, -2.005e-02, -5.693e-02, -9.208e-03, 4.814e-03)); + r += mul(s7_7, M4(-6.885e-02, 8.600e-02, -1.341e-01, -6.768e-02, -1.075e-01, 1.010e-01, -7.839e-02, 5.196e-02, -5.968e-02, -1.472e-01, 4.703e-02, -7.549e-02, -4.898e-02, 2.312e-02, -4.982e-02, 4.178e-02)); + r += mul(s7_8, M4(-1.335e-02, 1.696e-02, -9.544e-02, 2.342e-02, 7.584e-02, -6.788e-02, 5.264e-02, -3.575e-02, 6.591e-02, 4.831e-02, 5.604e-02, 5.523e-02, -3.696e-02, 4.328e-02, 4.806e-02, 1.609e-02)); + r += V4(1.163e-02, 1.111e-02, -2.674e-02, 2.569e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.044e-02, 3.976e-02, -2.713e-03, -2.239e-02, -8.208e-02, -6.853e-02, -2.467e-02, 5.687e-03, 1.560e-02, -2.362e-03, -3.105e-03, -3.978e-02, -2.943e-02, 7.069e-03, 5.994e-02, -4.156e-02)); + r += mul(s0_1, M4(-4.410e-02, -6.369e-04, 1.340e-02, -3.625e-02, -4.683e-03, -4.686e-02, -2.602e-02, 1.932e-02, -7.814e-02, 2.679e-03, 4.407e-02, -9.170e-03, -5.525e-02, -9.645e-02, 4.266e-02, 1.245e-01)); + r += mul(s0_2, M4(-1.824e-02, -1.584e-02, 1.381e-02, 8.685e-03, -1.348e-02, 3.479e-02, -6.780e-02, 7.429e-02, -8.274e-03, -1.247e-02, 4.504e-02, 5.583e-03, -9.213e-02, -7.559e-02, 2.572e-02, 2.736e-02)); + r += mul(s0_3, M4(2.819e-02, -5.725e-02, -1.996e-02, -8.497e-03, -6.542e-02, -7.037e-02, -3.119e-02, 1.333e-02, 3.837e-02, -3.085e-02, -6.294e-02, 2.702e-02, 3.653e-02, 1.286e-02, 2.269e-02, -4.996e-03)); + r += mul(s0_4, M4(4.286e-02, 6.463e-02, 4.030e-03, 4.109e-02, 8.496e-02, -7.991e-02, -3.770e-02, -2.812e-02, -2.714e-02, -7.114e-02, 4.348e-01, -2.067e-01, -3.885e-02, 1.662e-01, -9.803e-02, -1.303e-01)); + r += mul(s0_5, M4(-6.420e-02, 7.644e-02, -3.945e-03, 4.843e-02, -2.071e-02, -1.295e-01, 1.053e-03, 3.202e-02, -1.646e-01, 1.946e-01, 1.409e-01, -1.667e-01, -5.841e-02, -1.496e-01, -1.809e-02, 4.810e-02)); + r += mul(s0_6, M4(-2.353e-02, 3.602e-02, -1.510e-02, -1.081e-02, -3.546e-02, 2.930e-02, -6.499e-02, -2.120e-02, 3.098e-02, 1.991e-02, -1.106e-01, 1.454e-02, -3.027e-02, -8.608e-03, -6.525e-03, -2.450e-02)); + r += mul(s0_7, M4(8.452e-02, 8.519e-02, -4.352e-02, -5.994e-02, 1.038e-01, 9.383e-02, -5.287e-02, -9.052e-02, 1.258e-01, -3.955e-02, 1.836e-01, 3.711e-02, -3.958e-02, 2.734e-02, -1.068e-01, -8.196e-02)); + r += mul(s0_8, M4(-1.004e-01, -1.266e-02, -7.782e-02, 5.638e-02, 4.760e-02, -4.956e-02, -2.831e-02, -9.750e-02, 1.263e-01, -2.467e-02, 3.367e-02, 4.778e-02, -8.891e-03, 8.083e-02, 8.543e-04, -6.149e-02)); + r += mul(s1_0, M4(7.064e-02, -3.305e-02, -6.078e-02, 4.083e-03, -3.250e-02, -6.921e-02, -4.862e-03, 5.376e-02, 6.239e-02, 1.770e-02, -2.288e-02, -6.141e-03, -5.434e-03, 2.557e-02, 4.303e-02, -7.795e-03)); + r += mul(s1_1, M4(-1.217e-02, -4.375e-03, 9.265e-02, -2.436e-02, -1.084e-03, 2.425e-02, 1.093e-01, 2.275e-02, 7.563e-03, -1.642e-02, 1.754e-02, 2.647e-02, -8.377e-02, -7.736e-02, 1.067e-01, 2.718e-02)); + r += mul(s1_2, M4(-5.520e-02, 1.269e-01, -8.435e-02, 8.448e-02, 3.444e-02, 1.022e-01, -1.862e-02, 6.598e-02, 3.329e-02, -2.967e-02, 1.146e-02, 1.238e-01, -5.812e-02, -3.343e-02, 2.878e-03, 6.665e-02)); + r += mul(s1_3, M4(2.062e-02, -1.695e-01, -7.799e-02, 1.398e-02, -1.468e-01, -2.432e-02, -3.870e-03, 1.403e-02, 5.847e-02, -2.735e-02, -6.518e-02, -1.040e-02, -5.155e-04, 1.307e-01, 1.418e-02, 9.161e-02)); + r += mul(s1_4, M4(-9.368e-02, -9.932e-02, 1.302e-01, -1.556e-01, -2.554e-02, -2.008e-01, 1.359e-02, -2.680e-02, 6.190e-02, -4.853e-02, 9.166e-02, 2.847e-02, -2.106e-01, 2.206e-01, -5.797e-02, -1.267e-01)); + r += mul(s1_5, M4(-2.516e-01, 1.810e-01, 2.429e-03, 9.165e-02, -4.417e-02, -5.359e-02, 1.532e-02, 1.811e-03, -3.734e-02, 1.192e-01, 9.250e-03, -4.431e-02, -1.382e-01, -1.226e-01, 6.487e-03, -1.302e-02)); + r += mul(s1_6, M4(-1.248e-02, 7.933e-02, -5.322e-02, 2.219e-02, 4.028e-04, 3.178e-02, 7.705e-03, 1.915e-02, 6.281e-02, 5.458e-03, -3.504e-02, -2.088e-02, -3.229e-02, -6.282e-02, -1.913e-03, -2.264e-03)); + r += mul(s1_7, M4(1.022e-01, 1.444e-01, -3.742e-03, -6.361e-02, 1.558e-01, 1.217e-01, -5.079e-02, -4.364e-02, 1.397e-01, 4.067e-02, 1.062e-01, 9.020e-02, -1.307e-01, -1.261e-02, -5.311e-02, -5.734e-03)); + r += mul(s1_8, M4(3.088e-03, -4.309e-02, -6.999e-02, -9.212e-02, 1.614e-01, 1.125e-02, 1.084e-02, -7.279e-02, 3.876e-03, 6.041e-03, -9.399e-02, 8.275e-02, -5.889e-02, 7.190e-02, -2.825e-03, 1.959e-02)); + r += mul(s2_0, M4(-8.445e-03, 1.907e-02, -2.446e-01, 1.165e-01, 5.851e-02, -3.507e-02, -2.351e-01, -5.242e-02, 2.138e-02, 1.019e-02, -1.577e-02, 4.669e-03, 9.699e-02, -4.522e-02, 3.881e-02, -7.113e-02)); + r += mul(s2_1, M4(-4.277e-02, -4.543e-02, 1.424e-02, 1.687e-02, -5.888e-03, 1.411e-02, -1.075e-01, 3.964e-02, -5.187e-02, -3.288e-02, 1.687e-01, -2.589e-01, -4.321e-03, 9.293e-02, -1.014e-02, -1.401e-01)); + r += mul(s2_2, M4(-1.085e-01, 3.257e-02, -4.122e-03, -1.857e-03, 9.057e-02, -8.602e-03, -1.726e-02, 8.968e-02, 4.722e-02, 4.112e-02, 7.975e-02, -7.304e-02, 8.705e-02, 4.329e-02, 4.175e-02, -1.431e-01)); + r += mul(s2_3, M4(2.931e-02, 2.431e-02, 4.895e-02, 8.375e-02, 2.388e-02, 1.423e-01, -2.586e-01, 1.456e-01, -6.493e-02, -1.082e-03, -6.684e-02, -7.866e-02, 2.237e-02, 6.424e-02, 9.272e-02, -3.164e-02)); + r += mul(s2_4, M4(1.627e-01, 2.827e-02, 3.059e-02, -9.400e-02, 1.152e-01, -7.012e-02, -1.099e-01, -1.231e-01, 6.033e-02, -1.182e-01, 2.324e-01, 8.117e-03, -1.818e-02, -4.512e-02, 2.717e-02, 1.116e-01)); + r += mul(s2_5, M4(-2.338e-02, 3.575e-02, -7.326e-02, 4.292e-02, -2.192e-02, 7.025e-03, -8.236e-02, 1.137e-01, -7.215e-02, 3.959e-02, 2.721e-02, 3.182e-02, -1.178e-02, 1.383e-01, 3.623e-02, -7.956e-02)); + r += mul(s2_6, M4(6.068e-02, -1.599e-01, 1.563e-02, 9.910e-02, 6.822e-02, -6.330e-03, -8.954e-02, -4.047e-02, -4.017e-02, 4.640e-02, 6.713e-02, -1.995e-02, -1.653e-02, -6.367e-02, -5.527e-02, 1.112e-03)); + r += mul(s2_7, M4(-6.325e-02, 3.405e-02, 4.576e-04, 3.678e-02, 7.667e-02, 6.580e-02, -3.727e-03, 9.988e-02, 7.735e-02, -1.517e-01, -1.384e-02, 1.075e-01, 3.524e-02, -3.428e-02, -7.878e-03, -7.362e-03)); + r += mul(s2_8, M4(2.430e-02, -2.020e-02, 6.423e-02, 1.522e-02, 6.724e-02, -6.726e-02, 2.133e-02, 1.430e-03, 6.096e-03, -2.198e-02, 2.264e-02, 3.697e-02, 1.264e-01, 1.037e-01, -1.240e-02, 5.197e-02)); + r += mul(s3_0, M4(2.169e-02, 2.504e-02, 1.037e-01, 3.411e-02, 5.118e-02, 6.041e-02, -3.748e-02, 8.401e-02, 5.622e-02, 2.256e-02, 1.919e-02, -3.586e-02, 9.630e-02, 1.443e-02, -2.763e-02, -5.427e-02)); + r += mul(s3_1, M4(8.570e-02, -8.241e-02, 4.385e-02, 4.663e-02, -2.042e-02, 2.747e-02, -1.350e-01, -5.734e-02, -2.608e-02, -2.238e-02, 3.917e-02, -6.221e-02, -5.165e-02, -4.383e-02, -8.010e-02, 1.202e-01)); + r += mul(s3_2, M4(-1.071e-01, 1.806e-02, -4.676e-02, 7.724e-02, 5.927e-03, -1.329e-02, 3.595e-02, -9.005e-02, -2.000e-02, 6.244e-02, 3.114e-02, 3.745e-02, 2.687e-02, 4.229e-02, 5.796e-02, 5.742e-03)); + r += mul(s3_3, M4(-5.645e-02, -3.711e-02, 3.449e-02, 8.653e-02, -1.124e-01, 1.150e-01, -5.544e-02, -3.011e-02, -4.542e-02, 5.861e-02, -4.486e-02, -2.321e-02, 8.433e-02, 7.359e-02, 1.777e-02, -7.155e-03)); + r += mul(s3_4, M4(9.085e-02, -2.114e-01, 1.875e-02, -8.487e-02, 8.074e-02, -8.667e-02, 5.562e-02, 6.822e-02, -2.129e-03, 8.832e-02, 9.762e-03, 1.188e-02, -2.961e-02, 3.126e-02, 2.638e-02, 1.059e-01)); + r += mul(s3_5, M4(1.846e-03, -1.158e-01, -6.764e-03, 4.821e-02, -9.654e-02, 8.251e-02, 5.516e-02, -2.189e-02, -4.252e-02, 3.444e-02, -2.140e-02, 2.518e-02, -2.090e-02, 7.050e-03, 5.051e-02, 3.882e-02)); + r += mul(s3_6, M4(1.362e-02, 7.373e-03, 2.864e-02, 6.477e-02, -3.102e-02, -9.494e-02, -9.035e-04, -1.018e-01, 7.756e-02, 2.593e-04, -5.842e-02, -4.771e-02, 2.515e-02, -5.837e-02, 5.781e-03, -3.702e-02)); + r += mul(s3_7, M4(1.259e-02, 1.449e-01, -1.064e-01, -1.181e-01, 5.822e-02, -9.368e-03, -7.623e-03, -9.724e-02, 9.437e-03, -1.174e-01, -6.045e-02, -9.132e-03, -2.510e-01, 1.724e-01, -7.339e-02, -2.843e-02)); + r += mul(s3_8, M4(-1.154e-02, -3.014e-02, 3.173e-02, -2.898e-02, -2.838e-02, -1.094e-01, 2.581e-02, -1.161e-02, 2.622e-02, 2.845e-02, -8.344e-03, -3.107e-02, 9.083e-02, 1.809e-02, -1.382e-02, 5.248e-02)); + r += mul(s4_0, M4(-1.701e-02, -1.681e-01, 3.634e-02, 4.505e-02, 1.752e-02, -4.231e-03, 7.473e-03, -4.035e-02, -7.602e-02, -7.688e-02, 1.905e-02, 1.685e-02, -1.871e-02, 6.926e-02, -4.040e-02, 8.013e-02)); + r += mul(s4_1, M4(-2.121e-02, -7.814e-02, -8.739e-02, 1.494e-01, -4.835e-02, 7.097e-02, -1.636e-02, 7.893e-02, -4.042e-02, -6.655e-03, 5.261e-02, 4.836e-02, -3.004e-02, 1.275e-01, 1.666e-02, 6.189e-02)); + r += mul(s4_2, M4(4.286e-02, -8.761e-03, 1.687e-02, 3.043e-02, 1.437e-02, -3.089e-02, 7.082e-03, -8.197e-02, -5.931e-02, 1.408e-01, 9.069e-02, -1.079e-01, 1.112e-02, 3.030e-02, -2.776e-02, 4.441e-02)); + r += mul(s4_3, M4(-1.980e-01, -7.147e-02, 2.292e-02, 2.596e-01, 6.282e-02, 2.365e-02, -9.268e-03, 1.502e-02, -1.860e-01, -1.795e-02, 4.955e-02, -4.942e-02, 8.161e-03, -2.767e-02, 4.257e-03, 6.855e-02)); + r += mul(s4_4, M4(-2.060e-01, 2.178e-01, 1.186e-01, 3.833e-01, 9.743e-02, -1.941e-01, 1.368e-01, 4.863e-02, -2.060e-01, 1.257e-01, 1.337e-01, 1.871e-01, 1.676e-01, -2.828e-02, 5.465e-02, 3.017e-01)); + r += mul(s4_5, M4(4.660e-02, -2.261e-02, -3.447e-02, 1.099e-01, -3.801e-02, -9.249e-02, -1.074e-04, -1.763e-02, -2.638e-02, 8.448e-02, -5.157e-02, 5.928e-03, 4.440e-02, -2.713e-02, 6.287e-03, 1.153e-01)); + r += mul(s4_6, M4(-5.474e-03, 1.672e-01, -6.861e-02, -1.988e-02, -1.599e-03, -7.052e-02, 3.060e-02, 3.427e-02, -3.871e-02, 4.070e-02, 4.571e-03, -3.487e-02, 3.488e-02, -1.459e-02, -3.410e-02, -9.642e-02)); + r += mul(s4_7, M4(3.777e-02, -1.552e-01, 5.797e-02, 3.750e-03, -8.534e-02, 1.230e-01, -6.731e-02, -8.019e-02, -6.913e-03, -1.968e-01, 6.628e-02, 8.678e-02, -9.774e-02, 3.702e-02, -5.709e-02, 8.746e-03)); + r += mul(s4_8, M4(-3.814e-02, -8.854e-03, 2.903e-02, 1.036e-01, -3.554e-03, 9.865e-02, -7.659e-03, -6.438e-02, 7.322e-02, -3.479e-02, 3.099e-02, 8.696e-02, 2.780e-02, -9.543e-02, -9.496e-03, 7.311e-02)); + r += mul(s5_0, M4(-3.094e-02, -4.254e-02, 6.911e-02, 6.352e-02, 1.081e-01, 4.253e-02, 1.324e-01, 8.483e-02, -7.910e-02, -1.033e-02, -2.140e-02, 8.306e-03, -7.538e-03, -1.534e-02, -2.465e-02, -1.115e-01)); + r += mul(s5_1, M4(-7.244e-02, 9.141e-02, -3.903e-02, -4.721e-02, 1.216e-02, 1.243e-02, 3.216e-02, -2.703e-04, -3.561e-02, 1.223e-01, -4.937e-02, 5.409e-02, 1.824e-02, -3.436e-02, -3.153e-02, 2.583e-02)); + r += mul(s5_2, M4(1.840e-02, -5.725e-02, -9.852e-03, 1.039e-01, 4.117e-02, 1.614e-01, 3.339e-02, -6.907e-02, 8.800e-03, 8.399e-02, 6.212e-02, -1.548e-01, -4.436e-02, -1.146e-01, 5.010e-03, -3.829e-02)); + r += mul(s5_3, M4(-7.835e-02, 5.730e-02, -4.555e-02, 4.678e-02, -3.569e-02, -3.450e-02, 1.276e-02, -1.623e-02, -8.050e-02, 5.898e-02, 6.266e-02, 6.138e-02, 3.209e-02, -5.514e-02, 3.587e-02, -1.388e-02)); + r += mul(s5_4, M4(5.885e-03, 1.810e-02, -1.060e-02, -1.665e-01, -1.328e-01, -5.792e-02, 2.064e-01, 2.821e-03, -6.837e-03, 1.071e-01, 1.029e-01, 3.014e-02, 1.233e-01, 3.574e-02, 8.126e-02, 1.382e-01)); + r += mul(s5_5, M4(-4.135e-02, -5.230e-02, -1.185e-02, -9.550e-03, -5.861e-04, 5.239e-02, -9.890e-02, 1.078e-01, -1.583e-02, 4.469e-02, 8.494e-02, -7.841e-02, -7.250e-03, -6.059e-02, -2.741e-02, 5.696e-02)); + r += mul(s5_6, M4(2.748e-02, -3.039e-02, 4.701e-02, -1.181e-02, 1.432e-02, -9.720e-03, 8.727e-03, 4.597e-02, -4.833e-02, -8.645e-02, 7.821e-02, -1.231e-02, -1.624e-02, 1.332e-01, -8.124e-02, 2.425e-02)); + r += mul(s5_7, M4(6.926e-02, 9.884e-02, 2.223e-03, -5.294e-02, -5.509e-02, 6.520e-03, 3.129e-02, -5.234e-02, -9.890e-02, -9.283e-02, 5.143e-02, 1.258e-01, -3.169e-02, 1.379e-01, -2.241e-03, -1.374e-01)); + r += mul(s5_8, M4(-2.339e-02, 4.610e-02, 9.191e-03, 3.271e-02, 3.843e-02, -7.156e-02, 9.077e-03, 5.775e-02, 5.044e-02, 2.279e-02, -6.811e-02, 4.273e-02, -2.611e-02, -6.971e-02, -1.611e-02, 6.006e-02)); + r += mul(s6_0, M4(2.505e-02, 3.766e-02, 4.054e-02, -4.322e-02, 3.788e-02, 7.105e-02, 2.766e-02, 1.881e-02, 1.769e-02, 3.557e-02, 3.017e-02, 2.461e-02, -7.103e-02, 2.165e-01, -1.108e-01, 1.162e-02)); + r += mul(s6_1, M4(-5.639e-02, -1.563e-02, 7.611e-03, -8.263e-02, 1.390e-02, 5.897e-02, 7.291e-02, 3.333e-02, -1.985e-02, -3.859e-02, -7.172e-02, 3.897e-02, 3.314e-01, -2.184e-01, 1.485e-01, -3.358e-01)); + r += mul(s6_2, M4(-4.237e-02, 2.323e-02, 2.123e-02, -6.459e-02, 3.581e-02, 4.408e-02, -2.169e-02, -1.021e-02, -5.736e-02, -1.571e-01, 3.566e-02, -9.504e-02, -2.761e-02, -2.743e-02, 3.164e-02, -1.290e-01)); + r += mul(s6_3, M4(4.800e-02, -3.069e-02, -1.506e-01, 2.946e-02, -9.870e-03, -1.109e-01, 1.050e-01, 2.674e-02, 1.095e-01, -1.704e-02, -5.751e-02, 5.679e-02, -2.885e-01, 1.443e-01, 9.681e-02, 8.549e-02)); + r += mul(s6_4, M4(-1.101e-01, -1.287e-01, 5.712e-02, 4.843e-02, -8.649e-02, 9.301e-02, 1.086e-01, -7.359e-02, 1.531e-01, 3.441e-02, -7.657e-02, 6.034e-02, 1.580e-01, 2.032e-01, 1.741e-01, -1.615e-01)); + r += mul(s6_5, M4(-1.083e-02, 3.367e-02, -2.426e-03, -3.333e-02, 2.701e-02, -1.209e-01, -2.016e-02, 1.091e-02, -7.320e-02, -2.307e-01, 2.703e-02, -2.442e-02, 1.112e-01, 1.876e-01, -2.411e-01, 1.322e-01)); + r += mul(s6_6, M4(1.312e-02, -2.243e-02, -4.825e-02, 1.113e-03, -1.856e-02, 4.430e-03, -4.659e-02, -1.388e-02, -2.014e-02, -4.208e-03, 2.505e-02, -1.212e-03, 2.191e-01, 1.855e-01, -5.144e-01, -9.088e-02)); + r += mul(s6_7, M4(1.318e-01, -3.862e-02, 1.896e-03, 1.051e-02, -2.931e-02, -5.311e-02, -3.125e-02, -7.541e-02, -4.191e-03, 2.775e-02, 5.166e-02, -6.969e-02, 4.320e-01, -3.483e-01, -2.535e-01, 2.253e-01)); + r += mul(s6_8, M4(1.595e-02, -1.106e-02, 1.332e-03, 6.104e-02, 2.432e-03, 3.079e-02, -3.708e-02, -9.332e-02, 8.644e-03, -8.420e-02, 5.762e-02, -3.917e-02, 4.429e-01, -4.880e-01, 3.146e-01, 7.947e-02)); + r += mul(s7_0, M4(1.022e-02, 4.535e-02, -1.894e-02, -2.478e-02, 2.549e-02, -7.074e-02, 7.360e-02, -9.483e-02, -4.662e-02, -3.848e-02, -1.675e-02, 3.406e-02, 2.726e-02, 5.894e-02, -3.353e-02, -1.439e-02)); + r += mul(s7_1, M4(-1.249e-01, 1.182e-01, -1.116e-01, 1.554e-01, -1.324e-02, 3.877e-02, 8.164e-02, 1.246e-01, 1.247e-02, -8.276e-02, -3.699e-04, 6.295e-02, -3.020e-02, -3.135e-02, -5.228e-03, -3.271e-02)); + r += mul(s7_2, M4(-7.242e-02, 7.685e-02, 1.332e-03, -1.996e-02, 7.416e-02, 5.993e-02, 3.891e-03, 1.467e-01, 3.795e-02, -2.813e-02, 6.276e-03, -8.736e-02, 6.288e-03, -1.671e-02, 3.157e-02, 1.137e-02)); + r += mul(s7_3, M4(2.462e-02, -5.562e-02, -1.547e-01, 7.757e-02, -5.447e-02, -2.082e-02, 6.462e-02, 3.818e-02, 7.313e-02, 3.180e-02, 6.349e-02, 4.597e-02, 3.639e-02, -5.572e-02, -2.767e-02, -3.854e-03)); + r += mul(s7_4, M4(-2.440e-01, 8.688e-02, -9.817e-02, -7.993e-03, -5.542e-02, 1.178e-01, 2.146e-01, -2.587e-01, -2.360e-02, -3.656e-02, 1.491e-01, -7.644e-02, -4.282e-02, 1.621e-03, 6.814e-02, -5.522e-03)); + r += mul(s7_5, M4(-1.154e-01, -7.271e-02, -8.417e-02, 3.505e-02, 3.125e-02, -1.586e-01, -5.020e-02, 1.130e-01, -5.825e-02, -9.207e-02, 3.888e-02, -1.213e-02, -4.179e-03, 7.084e-02, -3.835e-02, -1.931e-02)); + r += mul(s7_6, M4(-5.889e-02, -2.156e-02, -2.017e-01, 1.316e-02, 1.188e-02, 5.863e-02, -7.675e-02, -1.943e-02, 7.086e-03, -4.504e-02, 5.562e-03, 2.955e-03, 2.118e-02, -1.524e-03, 2.187e-02, -4.319e-02)); + r += mul(s7_7, M4(-1.654e-01, 2.251e-01, -8.720e-02, -6.604e-02, -7.955e-02, -3.832e-02, -4.145e-02, -3.334e-03, 5.834e-02, 1.713e-03, -4.636e-02, -3.712e-02, 9.400e-03, -4.264e-02, 2.888e-02, 4.214e-02)); + r += mul(s7_8, M4(-3.719e-02, -7.805e-02, -4.505e-02, -1.529e-02, 9.692e-02, -1.162e-02, 3.538e-02, -6.834e-02, -9.472e-02, -1.191e-01, 1.172e-01, 4.890e-02, -1.099e-02, -3.897e-02, -5.892e-03, 5.192e-03)); + r += V4(-1.394e-02, -2.699e-02, 2.224e-02, 3.281e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-6.270e-02, 8.462e-02, 6.047e-02, -1.295e-03, -4.220e-02, 8.864e-02, 1.259e-01, 1.254e-01, -2.959e-03, 1.666e-02, -4.736e-02, 5.655e-03, 5.340e-02, -9.372e-02, -1.861e-02, 6.647e-03)); + r += mul(s0_1, M4(7.213e-02, 4.957e-02, 3.529e-02, 6.272e-02, 1.731e-03, -1.453e-01, -1.714e-01, 4.357e-02, 7.614e-02, -2.309e-02, -7.446e-02, -4.053e-03, 8.282e-02, -1.530e-01, -7.690e-02, 8.619e-02)); + r += mul(s0_2, M4(1.100e-02, 5.907e-02, -6.664e-02, -9.434e-02, -9.546e-03, -1.168e-03, -7.016e-02, -2.523e-03, 8.582e-03, -3.931e-02, 5.454e-02, -5.242e-02, 1.809e-03, 3.973e-02, 7.846e-02, 1.502e-01)); + r += mul(s0_3, M4(-4.294e-02, -8.568e-02, 4.840e-02, -5.056e-02, -1.042e-02, 1.703e-01, 2.530e-02, 1.320e-01, -9.460e-02, -1.044e-02, 2.013e-03, -3.328e-02, -6.248e-02, -1.069e-01, -9.958e-02, 3.909e-02)); + r += mul(s0_4, M4(-1.629e-02, -2.062e-01, -6.175e-02, 1.759e-01, -2.276e-02, -1.242e-01, 1.792e-01, -5.800e-02, -2.607e-01, -1.996e-01, -3.020e-01, -2.794e-01, -1.831e-01, -6.461e-03, -1.025e-01, 9.115e-02)); + r += mul(s0_5, M4(4.015e-02, 1.667e-02, 1.189e-02, 9.986e-03, -2.513e-02, -3.772e-02, -9.172e-02, 1.288e-02, 5.936e-02, -3.270e-02, -4.656e-02, -1.971e-02, -1.216e-01, 2.782e-02, 2.057e-02, -8.124e-02)); + r += mul(s0_6, M4(7.638e-03, 9.634e-02, -9.590e-03, 9.920e-02, -5.291e-02, -1.379e-01, -4.254e-02, 8.970e-02, -4.908e-02, -3.514e-02, 2.161e-02, 1.228e-01, -2.859e-02, 1.776e-02, 5.965e-02, -5.740e-02)); + r += mul(s0_7, M4(1.415e-02, 9.476e-02, 5.657e-02, -5.985e-02, -1.921e-02, -1.729e-01, 1.587e-01, -3.043e-02, -3.281e-02, -1.685e-01, 8.746e-02, -5.009e-02, -7.444e-02, 4.368e-02, -1.751e-01, 8.580e-03)); + r += mul(s0_8, M4(-9.291e-02, 8.831e-02, -4.124e-04, 1.910e-02, -1.098e-01, -3.635e-03, -1.324e-01, 3.700e-02, -6.666e-02, 1.568e-02, 1.271e-01, 3.878e-02, -8.765e-03, -4.062e-03, 5.793e-02, -2.290e-02)); + r += mul(s1_0, M4(-1.502e-02, 1.077e-01, 3.124e-02, 1.075e-02, -3.177e-02, -9.244e-03, 7.909e-03, 1.386e-02, -2.435e-02, -2.333e-02, -3.894e-03, 1.001e-01, 2.044e-02, -3.405e-02, 3.784e-02, 1.062e-01)); + r += mul(s1_1, M4(-1.820e-02, 1.045e-01, 3.270e-02, -1.421e-01, 2.323e-02, -1.212e-01, -5.815e-02, -3.536e-02, 7.484e-02, -5.398e-02, -7.591e-02, -3.319e-03, 2.994e-02, -1.205e-01, -1.216e-01, 6.506e-04)); + r += mul(s1_2, M4(-4.254e-02, 4.430e-02, 1.334e-01, -3.396e-02, -2.487e-02, 3.670e-03, -4.728e-03, 4.544e-02, -3.614e-03, -1.019e-04, 1.291e-01, 8.122e-02, 6.363e-02, -6.492e-02, 2.131e-02, 8.853e-02)); + r += mul(s1_3, M4(-6.085e-02, -7.155e-02, 6.387e-02, -3.618e-04, 9.316e-04, 1.234e-02, 3.535e-02, 1.134e-01, -8.018e-03, 2.279e-02, -7.397e-02, -1.708e-02, -2.241e-02, -7.837e-02, 6.655e-03, -3.115e-02)); + r += mul(s1_4, M4(1.340e-01, -1.899e-01, -2.069e-01, 1.068e-01, 1.531e-01, -5.987e-02, 1.272e-01, -1.749e-01, -1.591e-01, 1.187e-01, -2.502e-01, -4.704e-02, -1.795e-01, 1.684e-01, -2.850e-02, 1.977e-02)); + r += mul(s1_5, M4(2.706e-01, 1.037e-01, 1.069e-02, 6.212e-02, 9.405e-02, 5.078e-02, -1.521e-02, 1.053e-01, 5.587e-02, -1.079e-01, -1.956e-01, 1.002e-01, -9.972e-02, -4.369e-02, 2.127e-02, 3.292e-02)); + r += mul(s1_6, M4(3.024e-02, 3.504e-02, 9.144e-03, 1.017e-01, 2.052e-02, -7.253e-02, -8.188e-03, -2.040e-03, -1.034e-01, 5.712e-02, 5.073e-02, 5.633e-02, 2.876e-02, 6.714e-02, -5.819e-02, -4.248e-03)); + r += mul(s1_7, M4(-5.796e-02, -2.728e-03, 1.136e-03, -1.841e-01, -7.350e-02, -1.392e-02, -5.575e-02, -1.827e-02, 2.426e-01, -9.720e-02, 1.825e-01, -1.503e-01, 2.962e-02, 1.437e-02, -5.738e-02, -2.191e-02)); + r += mul(s1_8, M4(-4.604e-02, -2.008e-02, 1.302e-01, 5.240e-02, 5.437e-02, 9.805e-02, 1.975e-04, 2.174e-02, -2.043e-02, -6.386e-02, 2.431e-02, -2.866e-02, 5.969e-03, -5.000e-02, -2.949e-02, -3.303e-03)); + r += mul(s2_0, M4(-7.025e-02, 1.479e-02, 7.548e-02, 1.139e-01, 4.150e-02, -1.592e-02, -5.557e-02, 4.076e-02, 7.414e-02, -1.435e-02, -1.292e-01, -6.525e-03, -5.171e-02, -1.093e-02, -2.271e-02, 8.183e-02)); + r += mul(s2_1, M4(-1.431e-01, -9.856e-02, -5.329e-02, -1.580e-01, -9.302e-02, 4.383e-02, -1.289e-01, -5.199e-02, -1.177e-02, 2.312e-01, -1.031e-01, -1.042e-01, 7.490e-03, 1.560e-01, 1.859e-02, 1.362e-02)); + r += mul(s2_2, M4(-1.654e-01, 4.700e-02, 5.856e-02, 1.700e-03, -2.360e-02, -2.999e-02, -8.010e-02, -2.625e-03, -8.773e-02, 1.326e-01, -1.586e-02, -1.976e-01, -9.224e-02, 7.006e-02, -4.945e-02, 6.917e-02)); + r += mul(s2_3, M4(1.462e-01, 1.627e-01, -2.231e-01, 1.400e-01, 2.744e-02, -6.890e-02, 7.386e-02, 9.150e-02, 2.180e-02, -3.675e-02, 2.772e-01, 1.038e-02, -1.079e-01, -2.438e-02, -7.679e-02, 4.643e-02)); + r += mul(s2_4, M4(2.548e-03, 2.263e-01, -4.712e-02, 1.055e-01, 9.417e-02, 1.440e-02, -1.447e-01, 1.665e-01, -6.626e-02, -8.829e-02, -1.114e-01, 1.139e-02, -9.637e-03, 1.324e-01, 7.510e-04, 1.059e-01)); + r += mul(s2_5, M4(9.021e-04, -2.296e-02, 9.399e-02, 3.111e-02, 9.295e-02, 1.431e-02, -6.671e-02, 2.304e-02, 7.550e-02, 3.776e-02, 3.722e-02, -2.511e-01, 1.162e-01, 7.330e-02, 8.577e-02, 5.076e-02)); + r += mul(s2_6, M4(-9.806e-02, 5.990e-03, 7.983e-02, 3.230e-02, 2.660e-02, -8.077e-02, 1.827e-02, -6.324e-03, 4.518e-03, 8.430e-02, 2.824e-03, -1.820e-01, -1.280e-02, -5.062e-02, -1.508e-02, 1.335e-01)); + r += mul(s2_7, M4(-5.362e-02, -1.034e-01, 1.001e-01, -9.435e-02, 4.474e-02, 1.027e-01, 1.235e-02, -2.666e-02, -6.371e-02, 1.592e-01, -3.220e-01, 1.046e-01, -1.547e-01, 3.331e-02, 9.219e-02, 7.064e-02)); + r += mul(s2_8, M4(-6.826e-02, -9.174e-03, -3.788e-03, 5.773e-02, 3.371e-02, -5.084e-02, -4.686e-02, -3.770e-02, 3.344e-02, -3.752e-02, 8.161e-02, -9.928e-02, 1.241e-01, 1.417e-02, -2.901e-02, 6.009e-02)); + r += mul(s3_0, M4(4.130e-02, -1.358e-01, 3.513e-02, 6.201e-02, 8.036e-02, -1.246e-01, 4.951e-02, -1.407e-01, -4.433e-02, 2.066e-02, 3.889e-02, 2.549e-02, -3.015e-02, 3.265e-02, 7.889e-03, 3.115e-02)); + r += mul(s3_1, M4(-4.670e-02, 8.732e-04, 1.647e-01, 9.340e-02, -1.168e-01, 2.860e-01, 1.757e-02, -1.000e-01, -8.647e-02, -1.377e-01, -7.349e-03, -1.694e-01, 2.130e-02, -7.362e-02, 3.712e-02, -6.965e-02)); + r += mul(s3_2, M4(8.724e-03, 8.015e-02, 1.968e-02, -1.248e-02, -8.995e-02, 1.331e-01, -1.200e-01, -2.515e-02, -6.216e-02, 3.787e-02, -6.308e-02, 1.125e-02, -5.869e-02, -3.633e-02, -6.967e-02, -1.544e-02)); + r += mul(s3_3, M4(1.343e-01, 2.496e-02, -1.764e-02, -2.927e-02, 6.824e-02, 1.185e-01, -4.921e-02, 1.657e-02, 9.625e-02, -8.617e-03, 4.970e-02, 1.233e-01, 1.569e-02, -1.895e-01, -4.864e-02, 2.237e-02)); + r += mul(s3_4, M4(7.422e-02, 8.261e-02, 1.292e-02, 3.619e-02, 5.032e-03, 6.076e-02, 4.037e-02, -5.895e-02, 1.668e-01, -2.898e-02, 1.089e-01, -4.847e-02, -5.325e-03, -6.417e-02, 1.832e-01, 6.374e-02)); + r += mul(s3_5, M4(-2.028e-01, 7.917e-02, -1.164e-01, 6.820e-02, 1.844e-01, -7.435e-02, 1.413e-01, -8.915e-02, 3.007e-02, -9.739e-03, -1.970e-02, 1.246e-02, 3.375e-02, 7.348e-02, -5.958e-03, 2.043e-02)); + r += mul(s3_6, M4(7.156e-02, 4.010e-02, -1.883e-02, -1.154e-02, -6.288e-02, -1.291e-01, -6.146e-03, -5.936e-02, -1.177e-01, 3.663e-02, 4.254e-02, 2.737e-02, -2.011e-02, -1.161e-02, 2.790e-02, -9.127e-02)); + r += mul(s3_7, M4(-5.564e-02, -1.684e-02, 6.531e-03, -1.219e-02, 1.358e-02, 7.495e-02, -7.805e-03, -1.208e-01, -2.593e-02, -3.261e-02, 4.597e-02, -2.609e-02, 1.406e-02, 5.211e-02, -2.130e-02, -3.469e-02)); + r += mul(s3_8, M4(-6.137e-03, 3.430e-02, -2.481e-02, 9.161e-02, 3.659e-02, -9.274e-03, 1.206e-02, -3.099e-03, 1.999e-02, 1.012e-01, 1.704e-02, 6.203e-02, 1.089e-01, -1.184e-01, 5.553e-02, 2.902e-02)); + r += mul(s4_0, M4(1.461e-02, -1.551e-01, -2.045e-01, -9.320e-02, 1.397e-02, -7.823e-02, -1.060e-02, 2.077e-02, 3.258e-02, 5.564e-03, -2.344e-02, -4.559e-02, 4.307e-02, 7.499e-02, -5.465e-03, -1.686e-01)); + r += mul(s4_1, M4(-2.851e-02, -3.928e-02, 4.599e-02, -2.926e-02, 2.746e-02, 1.332e-01, -9.686e-02, 3.274e-02, -4.287e-02, -4.209e-02, -2.688e-02, -1.460e-01, -3.641e-03, 2.614e-02, -1.196e-01, 2.033e-03)); + r += mul(s4_2, M4(-7.486e-02, 9.743e-03, 8.107e-02, 4.110e-02, 6.560e-02, -1.585e-02, 4.181e-02, 7.497e-04, -1.332e-02, -3.652e-02, -2.131e-01, 2.791e-02, 1.158e-01, 2.130e-02, 4.607e-02, -4.526e-02)); + r += mul(s4_3, M4(3.577e-02, 1.180e-01, -1.284e-01, 1.409e-01, -3.428e-02, -1.468e-02, 2.435e-02, -1.144e-01, 7.831e-03, -2.041e-03, -4.198e-02, 4.532e-02, 1.086e-01, -1.578e-01, -3.489e-01, -1.284e-01)); + r += mul(s4_4, M4(8.087e-02, -1.882e-01, -1.204e-01, 7.248e-02, 1.596e-02, -7.498e-02, -6.765e-02, -2.250e-01, 1.619e-01, -6.744e-02, 2.562e-01, 1.598e-01, 1.870e-01, 2.074e-02, 1.371e-01, -1.859e-02)); + r += mul(s4_5, M4(-6.755e-03, -9.031e-02, 7.718e-02, 2.074e-02, -1.469e-01, -6.064e-02, -7.587e-02, -2.157e-02, -2.652e-02, -6.666e-02, 8.185e-02, 8.830e-02, -6.855e-03, -2.328e-04, -8.399e-03, 6.273e-03)); + r += mul(s4_6, M4(9.535e-02, -8.699e-02, 6.513e-02, 1.189e-02, 2.226e-03, -6.388e-02, 2.262e-02, 8.386e-02, 2.201e-02, -7.890e-02, -1.358e-02, -6.582e-02, -4.518e-02, -1.899e-02, -3.881e-02, -7.528e-02)); + r += mul(s4_7, M4(8.222e-02, 1.725e-02, 5.878e-02, 8.868e-02, 6.476e-03, -5.890e-02, 4.474e-02, -7.509e-03, 5.000e-02, -9.954e-02, 1.173e-02, 1.237e-01, 1.682e-01, 1.572e-01, -1.142e-01, 4.117e-02)); + r += mul(s4_8, M4(7.252e-03, -6.196e-02, 8.715e-03, -5.785e-02, 7.157e-02, -8.942e-03, -2.105e-02, 4.087e-02, 3.523e-02, 1.719e-04, 1.897e-02, -4.083e-02, 9.964e-02, 3.833e-02, 8.276e-02, -4.575e-02)); + r += mul(s5_0, M4(9.987e-02, 1.522e-01, -3.985e-02, -1.547e-01, -4.716e-02, -1.756e-01, 7.039e-02, -8.765e-02, 7.006e-02, -2.538e-02, -3.653e-02, 6.944e-02, -3.986e-02, 3.677e-03, -5.529e-02, 4.638e-02)); + r += mul(s5_1, M4(7.532e-02, 8.089e-02, -1.424e-01, -5.129e-02, -2.033e-01, 2.939e-01, 1.290e-01, -1.441e-01, 1.903e-02, 2.039e-02, 3.890e-02, 9.281e-02, -8.341e-02, -2.442e-01, 1.805e-01, -6.189e-02)); + r += mul(s5_2, M4(9.542e-02, 9.900e-03, 5.219e-02, 7.857e-03, -1.400e-02, 6.051e-02, -8.673e-03, -2.225e-03, 4.251e-02, 1.045e-01, -1.373e-01, -1.750e-02, -9.371e-02, -1.106e-02, 1.124e-02, -2.598e-02)); + r += mul(s5_3, M4(-1.135e-01, 4.119e-02, -1.192e-01, -6.486e-02, 1.198e-01, 1.593e-01, -1.023e-01, 2.825e-02, 1.075e-02, -4.070e-02, 2.593e-02, 4.017e-04, 1.167e-02, -1.509e-01, -2.241e-02, 6.627e-02)); + r += mul(s5_4, M4(-1.352e-01, 8.684e-02, -9.081e-02, 1.318e-02, 2.321e-01, 1.925e-02, 4.704e-02, -4.626e-02, 1.427e-01, -2.476e-02, 1.993e-01, -1.589e-01, 8.974e-02, -1.029e-01, 2.140e-01, 1.650e-01)); + r += mul(s5_5, M4(-1.082e-02, 2.109e-02, 2.120e-02, -1.738e-02, -5.482e-02, -8.325e-02, -1.201e-01, 7.260e-02, 1.420e-01, -1.273e-01, 3.075e-02, -5.140e-02, 2.211e-02, -1.099e-01, 2.451e-03, -3.116e-02)); + r += mul(s5_6, M4(-1.259e-01, -3.762e-02, -2.236e-02, 8.611e-03, 6.435e-03, -9.660e-03, 4.382e-02, -7.011e-02, 2.592e-03, -4.034e-02, 1.003e-02, -3.352e-02, -3.696e-02, -9.924e-02, -3.357e-02, -8.388e-02)); + r += mul(s5_7, M4(-8.994e-02, 4.535e-02, 1.292e-01, 2.478e-02, 7.344e-02, -6.045e-03, -2.870e-02, -3.708e-02, 3.571e-03, -7.399e-02, 5.175e-02, -6.299e-02, 1.470e-01, 2.108e-02, 1.206e-01, -7.442e-02)); + r += mul(s5_8, M4(5.749e-02, -7.397e-02, 2.704e-02, -8.600e-02, 3.798e-02, 8.833e-02, -2.785e-01, -5.148e-02, 1.037e-01, 3.420e-02, 5.157e-03, -4.084e-02, 1.466e-02, 5.439e-02, -1.119e-01, -7.561e-03)); + r += mul(s6_0, M4(2.742e-02, -6.494e-02, -4.966e-02, 1.804e-02, 2.692e-02, 2.641e-02, 9.796e-02, -1.535e-02, 7.345e-02, -4.529e-02, 3.403e-02, -6.150e-02, -5.188e-01, -1.514e-01, 2.775e-01, -2.474e-01)); + r += mul(s6_1, M4(-2.972e-02, 1.199e-01, 1.224e-01, -8.796e-02, -8.309e-04, -1.016e-01, 7.922e-02, -2.243e-02, 6.198e-02, 2.305e-01, -1.473e-02, 1.081e-03, -1.955e-01, -4.880e-01, -1.037e-01, -1.343e-01)); + r += mul(s6_2, M4(-7.005e-02, 1.528e-01, 3.989e-02, -4.230e-03, 1.467e-02, 2.213e-02, 7.629e-03, 7.286e-02, 6.699e-02, -4.735e-02, 4.664e-02, -5.761e-02, -2.448e-01, 4.807e-02, 1.443e-01, 1.173e-01)); + r += mul(s6_3, M4(-4.551e-02, 1.228e-01, 8.698e-03, -7.878e-02, 1.082e-01, 5.290e-02, 1.432e-02, 8.177e-03, -4.298e-02, -4.812e-02, -6.579e-02, 4.557e-04, 4.737e-01, 2.855e-01, -2.542e-02, -5.632e-02)); + r += mul(s6_4, M4(4.002e-02, 1.973e-01, -9.862e-02, -5.884e-02, 1.733e-01, 6.369e-02, -7.105e-02, -8.774e-03, -1.410e-01, -1.482e-02, -1.963e-01, 9.780e-02, 3.215e-01, -1.311e-01, -2.946e-01, 7.310e-02)); + r += mul(s6_5, M4(9.056e-02, 5.977e-02, 2.272e-02, -8.342e-04, -6.023e-02, -2.184e-03, -4.654e-02, -2.816e-02, 3.157e-02, -1.729e-01, -9.960e-02, -6.080e-02, 5.023e-01, 3.177e-01, 3.157e-01, 5.138e-02)); + r += mul(s6_6, M4(2.325e-02, -5.307e-03, 5.136e-02, 6.784e-02, -2.850e-03, -5.042e-03, -6.052e-02, 9.778e-02, 1.223e-01, -7.348e-02, -2.520e-02, -6.094e-02, 5.661e-02, -4.876e-01, 1.305e-01, 1.261e-01)); + r += mul(s6_7, M4(6.601e-02, 3.159e-02, -4.271e-02, -2.147e-02, -1.041e-01, -9.329e-02, 7.523e-02, 5.455e-02, -1.355e-01, 1.018e-01, 3.088e-02, 1.566e-02, -3.210e-01, -4.737e-01, -1.933e-01, 3.030e-01)); + r += mul(s6_8, M4(-1.088e-02, -4.249e-03, -5.350e-02, 1.114e-02, -2.212e-02, 2.213e-02, -7.774e-02, -2.154e-04, 9.930e-03, 4.165e-02, 4.084e-02, -1.120e-01, -1.455e-01, 1.165e-01, 2.067e-01, 2.070e-01)); + r += mul(s7_0, M4(1.044e-02, -6.727e-02, -4.017e-02, 1.033e-01, -9.177e-02, 3.331e-02, 6.122e-02, -5.224e-02, -5.090e-03, -4.668e-02, -5.551e-02, 9.474e-02, -9.753e-03, 1.037e-02, 8.602e-03, -2.863e-02)); + r += mul(s7_1, M4(7.762e-02, 1.322e-01, 3.840e-02, -1.186e-01, -4.493e-02, -7.851e-02, 1.150e-01, -9.820e-02, 1.233e-01, 6.101e-03, -5.135e-02, -8.499e-02, -8.867e-03, 5.269e-02, 2.433e-02, 2.043e-02)); + r += mul(s7_2, M4(-2.944e-02, -4.536e-02, 1.101e-01, 5.716e-02, -2.883e-03, -1.386e-01, -3.786e-02, 1.342e-01, 1.657e-03, 8.720e-02, 4.810e-02, -4.211e-02, 2.582e-02, -2.044e-02, 4.881e-02, 1.751e-02)); + r += mul(s7_3, M4(-2.891e-02, 2.374e-02, -1.471e-01, -1.000e-01, 1.052e-01, 1.472e-01, -3.824e-02, 1.123e-01, -1.409e-02, 1.099e-02, 3.039e-03, 2.359e-02, -3.325e-03, -1.590e-02, 5.841e-03, 6.255e-03)); + r += mul(s7_4, M4(-7.364e-02, 4.827e-02, -1.356e-01, -2.224e-01, 2.287e-01, -1.602e-01, -2.252e-01, -1.203e-02, -1.392e-01, 1.150e-01, -2.736e-01, 2.918e-02, 2.921e-02, 9.439e-02, -3.142e-02, 1.954e-02)); + r += mul(s7_5, M4(-4.423e-02, -1.939e-01, -1.404e-02, 1.489e-01, 2.856e-02, 4.562e-02, 1.123e-01, 3.050e-02, 7.547e-02, 1.469e-01, -4.395e-02, 4.094e-02, 2.145e-02, 4.439e-02, -4.958e-02, 2.650e-02)); + r += mul(s7_6, M4(2.550e-02, -1.502e-01, 6.659e-02, -3.936e-02, -8.184e-02, -5.807e-02, 2.311e-01, -5.460e-02, 2.067e-03, -1.785e-02, -8.422e-03, -1.067e-01, 1.877e-02, 3.037e-02, -8.897e-04, 1.152e-01)); + r += mul(s7_7, M4(-2.983e-02, -2.250e-01, 9.011e-02, -2.115e-01, -1.220e-01, -1.328e-01, -5.793e-02, -1.087e-01, 2.961e-03, 4.061e-02, -2.935e-02, 1.711e-02, 4.291e-02, 3.807e-02, -5.370e-02, 2.247e-02)); + r += mul(s7_8, M4(-4.709e-02, -6.844e-03, -1.067e-01, 9.606e-02, -4.139e-02, 5.400e-02, 6.785e-02, 2.060e-02, 1.034e-02, 1.440e-02, -3.056e-02, -4.364e-02, -5.640e-02, -2.057e-03, -3.250e-02, 5.321e-02)); + r += V4(3.028e-04, 1.783e-02, -7.192e-03, -2.224e-02); + return r; +} + +void Pass14(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 15 +//!DESC conv14 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(4.520e-02, 9.061e-03, 7.165e-03, -5.094e-02, -1.655e-02, 4.687e-02, -6.105e-02, 3.508e-02, -1.043e-01, 2.113e-01, 4.505e-03, 8.240e-02, -4.876e-02, -9.482e-02, -7.693e-02, 1.161e-01)); + r += mul(s0_1, M4(4.078e-02, 1.289e-02, -8.984e-04, 3.727e-04, 4.315e-02, 1.979e-02, 2.196e-02, 9.723e-03, 8.258e-02, -1.150e-01, 5.587e-02, -2.661e-02, 3.970e-02, -8.844e-02, -1.401e-01, -3.586e-02)); + r += mul(s0_2, M4(1.090e-01, 4.265e-02, 2.487e-02, 7.634e-03, 5.478e-02, 9.598e-03, 5.827e-04, 8.746e-03, -9.220e-02, 5.258e-03, -1.203e-03, 3.657e-02, -1.666e-01, 1.389e-01, -1.942e-01, 3.285e-02)); + r += mul(s0_3, M4(-1.605e-02, 1.765e-02, -2.930e-02, 5.127e-02, 1.461e-01, 5.289e-02, 9.307e-02, -3.053e-02, -1.241e-02, 5.344e-02, 1.463e-01, -1.771e-03, -7.651e-02, -2.550e-01, -6.467e-02, 1.428e-01)); + r += mul(s0_4, M4(-2.380e-02, -1.061e-01, -7.748e-02, 6.133e-02, -3.672e-03, 3.549e-02, 1.001e-01, 7.683e-02, 9.351e-02, 1.099e-01, 1.646e-02, -1.912e-01, -2.428e-01, 1.356e-01, -2.369e-01, -1.945e-01)); + r += mul(s0_5, M4(7.313e-02, 1.045e-01, 2.327e-02, 2.226e-02, -1.120e-01, 2.471e-03, -9.548e-02, -5.460e-02, -4.372e-02, -6.836e-03, 3.352e-02, -3.382e-02, -1.532e-01, -9.464e-02, -4.272e-02, -5.013e-02)); + r += mul(s0_6, M4(-9.837e-03, -1.886e-02, -3.552e-02, 2.822e-02, -3.780e-02, 4.716e-02, 5.778e-03, 1.144e-02, -5.002e-02, -1.351e-01, -5.085e-02, 8.309e-03, -6.870e-02, -3.705e-02, -3.342e-02, 1.377e-01)); + r += mul(s0_7, M4(-2.443e-02, 6.948e-02, -2.911e-02, 3.220e-02, 3.894e-02, 2.226e-03, -5.461e-02, -3.934e-03, 2.554e-02, 1.352e-01, 5.141e-02, 1.554e-02, -1.683e-01, -5.017e-02, -1.017e-01, 6.619e-02)); + r += mul(s0_8, M4(-3.691e-03, 2.178e-02, -3.334e-02, -1.720e-02, 2.287e-02, -3.840e-03, -1.644e-02, 7.498e-03, 2.399e-02, -4.482e-03, 7.416e-02, 9.260e-02, -1.417e-02, 4.232e-02, -2.408e-02, 2.538e-02)); + r += mul(s1_0, M4(9.408e-02, 1.775e-02, 9.153e-02, -9.538e-02, -3.261e-02, -2.224e-01, 4.754e-03, 1.990e-02, 4.197e-02, -7.009e-02, 4.511e-02, -6.852e-02, -8.372e-02, -2.756e-02, -5.055e-02, 9.655e-02)); + r += mul(s1_1, M4(1.575e-01, -3.467e-01, 1.024e-01, -1.172e-01, -1.326e-01, 1.287e-01, -1.606e-01, -7.318e-02, 1.028e-01, 8.486e-02, 1.902e-01, 3.564e-02, -3.728e-02, -5.029e-02, -1.036e-01, -3.348e-02)); + r += mul(s1_2, M4(-6.901e-02, 1.075e-01, -1.412e-01, -1.619e-01, 4.286e-02, 1.851e-02, -3.348e-02, 2.328e-02, -1.746e-02, -1.882e-01, 7.348e-02, -3.591e-02, -1.307e-01, 4.958e-02, -4.621e-02, 2.819e-02)); + r += mul(s1_3, M4(-8.378e-02, -2.983e-02, -1.650e-01, 1.058e-01, -4.490e-02, -4.505e-02, -1.475e-01, 4.760e-02, 1.118e-01, -1.117e-01, 8.818e-02, 1.330e-01, -5.398e-02, -8.459e-02, -2.726e-02, 1.897e-02)); + r += mul(s1_4, M4(-9.076e-02, -1.981e-01, -3.018e-01, 3.338e-01, 1.259e-01, 2.661e-01, -3.924e-01, 1.243e-01, -5.296e-02, 5.934e-02, 3.021e-02, 1.854e-01, -1.158e-01, -3.895e-02, -1.878e-01, -2.013e-02)); + r += mul(s1_5, M4(1.515e-01, 2.450e-01, 1.439e-01, -9.116e-02, 1.503e-01, -4.574e-02, 8.095e-04, 2.724e-02, 9.322e-02, -7.895e-02, 9.896e-02, 1.443e-02, -6.188e-02, 1.225e-02, -1.009e-01, 8.640e-02)); + r += mul(s1_6, M4(-5.459e-02, -2.330e-02, 6.539e-02, -9.657e-02, -3.381e-02, -8.115e-02, -1.265e-01, -9.178e-02, -2.206e-02, -2.231e-02, 5.822e-02, 9.318e-02, -2.048e-02, 2.159e-02, -2.368e-02, 1.695e-02)); + r += mul(s1_7, M4(7.189e-02, -2.730e-02, -3.870e-02, 7.726e-03, 3.076e-02, -4.018e-02, -2.288e-01, -1.779e-02, -6.300e-02, 5.290e-02, 5.645e-02, 2.506e-03, 2.425e-04, -1.690e-02, -2.466e-02, -1.409e-02)); + r += mul(s1_8, M4(-2.173e-02, 8.077e-02, 7.793e-02, 6.126e-02, 4.669e-02, -1.724e-01, 3.943e-02, 5.754e-02, -1.738e-03, 2.877e-02, -1.084e-01, 2.391e-02, 1.628e-04, 5.397e-02, 2.224e-02, -8.554e-03)); + r += mul(s2_0, M4(-4.331e-03, -5.867e-02, -3.177e-02, 6.613e-02, 9.585e-02, -7.778e-02, -2.847e-02, 7.286e-02, 1.227e-02, -2.133e-02, 2.420e-02, -6.251e-02, 6.880e-02, -3.772e-02, -3.381e-02, -4.090e-02)); + r += mul(s2_1, M4(-1.133e-02, 5.064e-02, -1.051e-01, 3.497e-02, 1.026e-01, 1.407e-02, 1.533e-01, -4.692e-02, -1.064e-01, 1.273e-01, 1.226e-01, 4.135e-02, 7.598e-03, -3.096e-01, 1.763e-01, -7.671e-02)); + r += mul(s2_2, M4(2.982e-02, -2.636e-02, -6.052e-03, 1.350e-02, -1.630e-02, 6.478e-02, 1.856e-01, -1.314e-01, 9.092e-02, 1.019e-02, 6.708e-02, 2.351e-02, -4.831e-03, -1.038e-01, 5.885e-02, 7.098e-02)); + r += mul(s2_3, M4(1.248e-01, -6.314e-02, -4.520e-02, 4.632e-02, 2.206e-02, 7.499e-03, 6.513e-02, 1.149e-02, 4.001e-02, 8.358e-02, 9.838e-02, -1.446e-01, -1.403e-01, 4.578e-02, 1.191e-01, 1.155e-02)); + r += mul(s2_4, M4(-1.180e-01, 6.489e-02, -3.881e-02, -7.020e-02, 2.950e-01, 5.144e-02, 5.632e-02, 2.020e-01, -2.347e-01, -1.468e-01, 1.400e-02, -9.934e-02, -8.827e-02, 4.938e-02, 1.991e-01, -2.667e-01)); + r += mul(s2_5, M4(-1.132e-02, -1.172e-01, 4.511e-02, -7.538e-03, 1.501e-01, 4.288e-02, 7.285e-02, 8.796e-02, -1.319e-02, -6.738e-03, -1.969e-02, 2.739e-02, -9.828e-02, -3.924e-01, -6.699e-02, 8.011e-02)); + r += mul(s2_6, M4(-1.414e-01, 1.814e-02, -1.186e-03, -1.490e-02, -3.390e-02, 2.791e-02, -1.435e-02, 3.310e-02, 3.829e-02, -2.508e-02, 2.153e-02, 1.156e-02, 3.513e-02, 9.952e-03, 1.705e-02, 4.052e-02)); + r += mul(s2_7, M4(-7.547e-02, 7.771e-03, 5.824e-02, -1.556e-02, -6.012e-03, 2.857e-02, -4.784e-03, -3.217e-02, -4.018e-02, 4.216e-02, -6.089e-02, 6.490e-02, 1.298e-01, -7.631e-02, 5.301e-02, -1.944e-02)); + r += mul(s2_8, M4(-6.371e-02, -1.356e-02, -1.181e-02, -1.188e-02, -5.207e-03, -1.543e-02, -1.549e-02, -1.720e-02, 3.732e-02, 8.105e-03, -6.487e-02, -1.049e-01, 5.857e-02, -7.736e-02, -1.105e-01, 9.924e-03)); + r += mul(s3_0, M4(1.502e-02, 2.684e-02, -1.408e-02, 3.120e-02, 9.221e-02, -2.707e-02, 5.912e-02, 1.144e-02, -1.262e-02, 1.442e-02, -1.448e-02, -6.457e-03, -1.292e-02, -3.652e-02, 3.969e-02, 2.617e-02)); + r += mul(s3_1, M4(-1.324e-01, 4.015e-02, 9.011e-02, -6.464e-02, 1.221e-01, 1.459e-01, 1.162e-01, -1.016e-01, 9.380e-02, 7.543e-02, 9.329e-02, 5.384e-02, 2.409e-03, -7.853e-02, -7.205e-02, 2.276e-02)); + r += mul(s3_2, M4(4.667e-02, -7.248e-02, 4.400e-02, -2.877e-02, 1.212e-01, -1.721e-01, 9.711e-02, -6.876e-02, 9.737e-02, 4.954e-03, -1.006e-02, 6.376e-03, -6.289e-02, 1.046e-01, -2.945e-02, 4.816e-03)); + r += mul(s3_3, M4(1.090e-02, 5.755e-02, 4.615e-02, -1.015e-01, -9.423e-02, -2.077e-02, -3.823e-02, 6.670e-02, -5.715e-03, 1.179e-01, 3.552e-02, -1.993e-02, 4.955e-02, -4.377e-02, -1.386e-02, -2.830e-02)); + r += mul(s3_4, M4(-3.189e-01, 1.007e-01, -1.225e-01, -2.353e-01, 1.082e-01, -1.464e-01, -1.124e-01, -5.875e-03, -3.078e-01, -1.370e-01, -4.142e-02, -1.133e-01, 6.690e-03, 3.615e-02, 1.928e-01, -7.727e-02)); + r += mul(s3_5, M4(3.230e-02, -4.933e-02, 2.392e-02, 5.883e-03, 1.015e-02, 5.707e-02, -4.644e-02, -3.431e-03, 1.505e-02, -2.291e-02, 4.979e-02, 5.956e-02, -1.830e-01, -1.241e-02, -6.895e-03, -2.365e-01)); + r += mul(s3_6, M4(-3.668e-03, -1.234e-02, -5.362e-02, -3.682e-02, -1.528e-02, -8.507e-03, -3.542e-02, 1.764e-02, -1.484e-02, -3.709e-02, 5.443e-02, 4.533e-02, -5.981e-03, -6.767e-04, 4.552e-02, 2.928e-02)); + r += mul(s3_7, M4(-1.871e-03, 1.066e-01, 1.668e-01, 2.620e-02, 3.353e-02, -3.347e-02, -2.437e-02, -8.058e-02, -2.368e-03, -1.985e-02, -3.304e-02, 9.884e-02, 4.154e-02, 2.005e-02, 2.131e-03, 8.056e-03)); + r += mul(s3_8, M4(-5.042e-03, -2.574e-05, 4.282e-02, -2.479e-03, -4.246e-02, 3.508e-02, 8.919e-03, -4.266e-02, -4.886e-04, -5.371e-02, -3.269e-02, -2.143e-02, 1.787e-02, -1.015e-01, -5.345e-02, -1.446e-02)); + r += mul(s4_0, M4(-9.572e-02, 2.337e-02, -7.165e-02, -4.728e-02, 6.882e-02, -1.117e-01, -3.524e-03, 9.028e-02, 3.381e-02, -5.153e-02, 3.482e-02, 7.497e-03, -7.289e-02, 3.443e-02, -2.239e-02, -7.685e-02)); + r += mul(s4_1, M4(1.020e-01, -1.108e-01, -8.442e-03, -5.390e-02, 1.077e-01, 3.672e-01, -1.139e-01, 6.457e-03, -1.226e-01, 3.169e-02, -2.123e-02, -1.163e-01, -2.036e-02, -4.896e-02, 8.310e-03, -5.898e-03)); + r += mul(s4_2, M4(-5.486e-02, -4.220e-02, 1.462e-03, 2.205e-02, 1.165e-01, -7.539e-02, 4.054e-02, 2.338e-02, -1.018e-01, -3.474e-02, 3.885e-02, -2.415e-02, -3.153e-02, -1.360e-01, 1.864e-02, -5.074e-03)); + r += mul(s4_3, M4(5.259e-02, 2.241e-02, 7.516e-02, -3.380e-02, -1.733e-01, -2.973e-03, -3.853e-02, 1.565e-01, 5.921e-02, -4.893e-02, 2.826e-02, -3.375e-02, -1.792e-02, -7.939e-02, 1.088e-01, -3.121e-03)); + r += mul(s4_4, M4(6.188e-02, -8.176e-02, -7.622e-02, 7.011e-02, 4.541e-03, 9.299e-02, -9.473e-02, 2.367e-01, -5.478e-03, 3.866e-03, 2.117e-01, -2.372e-01, -8.894e-03, 2.171e-01, 3.491e-02, 1.617e-01)); + r += mul(s4_5, M4(-4.379e-02, -1.588e-02, 5.588e-02, 6.287e-02, -5.315e-02, 3.877e-02, -7.158e-02, 4.891e-02, -5.016e-02, -1.108e-01, 5.351e-02, -5.953e-02, -7.411e-02, -6.710e-02, -2.914e-02, 3.205e-02)); + r += mul(s4_6, M4(-7.663e-03, 9.218e-02, 6.042e-02, -1.563e-03, -7.020e-02, 1.212e-02, -3.199e-02, 9.813e-03, -4.041e-02, 6.082e-02, 9.936e-02, 3.931e-02, -1.277e-03, -7.841e-02, 1.318e-02, 7.572e-02)); + r += mul(s4_7, M4(3.668e-02, -9.931e-02, 1.053e-01, -9.885e-03, 4.724e-02, -8.108e-02, -7.853e-02, -5.578e-02, 1.704e-02, 1.017e-01, 2.760e-02, -4.296e-02, -2.657e-02, 2.308e-02, -7.335e-02, 4.273e-02)); + r += mul(s4_8, M4(5.012e-02, 1.806e-02, -4.231e-02, 1.013e-02, 7.312e-02, -4.376e-02, -4.732e-02, 3.329e-02, -4.193e-02, -7.620e-02, -1.200e-01, -3.128e-02, 6.742e-02, 3.106e-02, 8.698e-03, 1.523e-03)); + r += mul(s5_0, M4(-1.065e-01, 4.137e-02, -1.773e-01, -2.320e-03, -1.468e-02, -8.848e-02, -6.165e-02, 3.390e-02, 7.447e-02, 6.137e-02, 1.329e-02, -4.660e-02, 1.301e-01, -5.199e-02, 2.477e-02, -3.378e-02)); + r += mul(s5_1, M4(-1.872e-01, -2.124e-01, -2.401e-02, 4.330e-02, 8.121e-02, 1.337e-01, -1.342e-03, 4.743e-02, -2.453e-02, -9.993e-02, -3.550e-02, -6.161e-02, 1.301e-01, 2.105e-01, -5.685e-02, 7.897e-02)); + r += mul(s5_2, M4(2.227e-03, -4.188e-02, -1.059e-02, -2.907e-02, -1.974e-02, -1.839e-02, -5.555e-02, -2.603e-02, 6.994e-03, 1.302e-01, 4.003e-02, 1.953e-02, 7.807e-02, 3.923e-02, 9.819e-03, -1.881e-02)); + r += mul(s5_3, M4(5.839e-02, 1.733e-02, -1.987e-02, -4.801e-02, -1.187e-01, -4.304e-02, -3.163e-02, -3.404e-03, -1.042e-01, 9.301e-02, -6.772e-02, -1.653e-02, -1.270e-02, 7.968e-02, 3.777e-02, -6.998e-02)); + r += mul(s5_4, M4(1.221e-01, -9.160e-02, 1.586e-02, 1.643e-01, -1.095e-02, 5.873e-02, 8.203e-03, 2.496e-02, -3.622e-02, -9.396e-02, 3.843e-02, 1.405e-01, -4.632e-02, -2.124e-02, 5.954e-02, -4.145e-02)); + r += mul(s5_5, M4(1.068e-02, -1.200e-01, 1.693e-02, 6.453e-03, 1.618e-02, -8.935e-03, 1.501e-02, 1.059e-02, 1.518e-01, -5.564e-02, 8.039e-02, 1.901e-01, -7.955e-02, -5.667e-02, 1.806e-02, 1.921e-02)); + r += mul(s5_6, M4(-8.751e-02, -1.101e-02, 6.480e-03, 4.478e-02, -2.982e-02, -1.486e-02, 2.435e-02, 2.169e-02, -1.068e-02, -2.210e-04, 1.688e-02, -2.216e-02, 9.956e-02, 5.408e-02, 1.172e-02, -9.444e-02)); + r += mul(s5_7, M4(-1.755e-02, -3.678e-02, 8.716e-02, -2.128e-02, -1.422e-02, 4.511e-02, -4.518e-02, -9.332e-03, 4.277e-03, 4.233e-02, -2.754e-02, 2.549e-02, 1.461e-01, -1.102e-01, -4.402e-02, -9.835e-02)); + r += mul(s5_8, M4(1.146e-02, 2.644e-02, -1.486e-02, -3.141e-02, 4.945e-02, -6.558e-02, -2.407e-02, 1.095e-02, 2.877e-02, -2.930e-02, -1.080e-01, 8.884e-02, -1.842e-02, 1.077e-01, -4.447e-02, -3.499e-02)); + r += mul(s6_0, M4(1.392e-01, -1.479e-01, 1.441e-01, -1.314e-01, 9.271e-02, 4.641e-02, -1.609e-02, -4.029e-02, 1.813e-01, -1.382e-01, 1.622e-01, -2.597e-03, 7.643e-03, 4.263e-02, 4.371e-02, -1.623e-02)); + r += mul(s6_1, M4(-8.154e-02, 1.029e-01, -8.564e-02, -2.568e-02, -2.019e-02, 5.061e-02, -4.032e-02, -9.548e-02, -2.244e-01, 2.325e-01, 6.855e-02, -1.146e-02, -4.743e-02, -1.411e-01, 6.163e-02, 8.083e-02)); + r += mul(s6_2, M4(1.761e-01, 1.044e-01, 5.591e-04, 6.322e-02, 1.806e-01, -1.985e-02, -1.248e-03, -7.764e-02, 1.630e-03, 1.104e-01, 2.046e-02, -1.451e-02, -3.078e-02, 9.157e-03, 1.067e-02, 7.708e-02)); + r += mul(s6_3, M4(-1.378e-01, -8.672e-02, -9.009e-02, 1.598e-01, -1.113e-01, 1.244e-01, 8.502e-02, -8.122e-02, -3.216e-02, -6.801e-02, -1.447e-01, 1.940e-01, -5.016e-02, -4.162e-03, 2.224e-02, 1.066e-02)); + r += mul(s6_4, M4(-8.368e-02, -2.311e-02, 7.956e-02, -1.802e-01, -2.031e-01, 1.252e-01, -2.962e-01, -2.342e-01, -2.382e-01, 2.492e-01, 1.083e-01, 6.798e-03, -3.791e-02, -3.949e-02, 1.389e-01, 8.435e-02)); + r += mul(s6_5, M4(-3.845e-02, 9.809e-02, 3.301e-02, -2.086e-02, 4.575e-02, -6.166e-02, -1.299e-02, -3.755e-02, 1.417e-01, -2.103e-01, 1.357e-01, 1.637e-01, -1.740e-02, -4.272e-02, -1.657e-02, -4.078e-02)); + r += mul(s6_6, M4(6.272e-02, 9.681e-03, 4.013e-02, 4.030e-03, 1.786e-01, -9.018e-02, 7.949e-04, -1.558e-01, 7.173e-02, -1.150e-02, -3.239e-03, -1.407e-02, -1.486e-02, 1.438e-02, 1.303e-03, 1.205e-01)); + r += mul(s6_7, M4(8.837e-02, -3.278e-05, 2.059e-02, 4.563e-02, 2.288e-01, -3.011e-02, 3.220e-02, -1.446e-01, 3.866e-02, 1.946e-01, -6.171e-02, 4.608e-02, -7.248e-03, -9.509e-02, -1.101e-01, 2.514e-02)); + r += mul(s6_8, M4(6.715e-02, -3.799e-02, 1.233e-02, 3.845e-02, 4.095e-02, 1.035e-01, -7.094e-04, 1.179e-02, -5.727e-02, 7.091e-02, 1.233e-02, 3.150e-02, -1.288e-01, -6.140e-02, 3.551e-02, 1.475e-02)); + r += mul(s7_0, M4(1.559e-02, -4.546e-02, -2.498e-02, -8.263e-03, -2.595e-02, 8.080e-02, 1.693e-02, -8.455e-03, 3.973e-02, 4.903e-02, -2.466e-02, -3.502e-02, 2.535e-02, -1.616e-02, 5.256e-02, 1.726e-02)); + r += mul(s7_1, M4(6.141e-02, -1.139e-01, 9.047e-02, -1.018e-02, 1.799e-02, -2.033e-02, 5.764e-02, -2.952e-02, 2.253e-02, -2.111e-02, 2.504e-02, -4.482e-02, 4.218e-02, -1.337e-01, 4.234e-02, -1.179e-02)); + r += mul(s7_2, M4(3.754e-02, 4.565e-02, 2.561e-02, 4.498e-04, 2.121e-02, -3.992e-02, 3.079e-02, -1.898e-02, 2.024e-03, 1.188e-02, 4.278e-02, -1.906e-02, -5.639e-02, 8.857e-02, -1.211e-02, -6.092e-02)); + r += mul(s7_3, M4(3.959e-02, 6.645e-02, 4.770e-02, 6.584e-02, -1.431e-02, 6.619e-02, 3.308e-02, -2.684e-02, 5.084e-02, 1.619e-02, -3.862e-02, 6.822e-02, -8.166e-02, -8.542e-03, 3.875e-02, 5.607e-02)); + r += mul(s7_4, M4(3.997e-03, -4.094e-02, 1.107e-01, 4.913e-02, -4.575e-02, 5.941e-03, -5.463e-02, -2.670e-02, 9.170e-02, -5.643e-03, -3.445e-02, -7.159e-03, 6.417e-02, -6.162e-02, 9.118e-02, 1.656e-02)); + r += mul(s7_5, M4(3.627e-02, 4.509e-02, 1.639e-02, 4.892e-02, -1.727e-02, 5.673e-02, 1.879e-03, 1.862e-02, -5.158e-02, 3.773e-02, -9.557e-02, -4.059e-02, -1.096e-02, -6.294e-02, -9.984e-02, -4.299e-02)); + r += mul(s7_6, M4(4.716e-02, 1.426e-02, -6.331e-03, 6.964e-02, -4.209e-02, 4.494e-02, -8.120e-03, -3.092e-02, 1.962e-02, -6.431e-02, -1.813e-02, 1.151e-02, -2.888e-02, -7.188e-02, -1.600e-02, 4.232e-02)); + r += mul(s7_7, M4(-9.275e-03, 3.692e-02, 1.951e-02, 3.721e-02, -9.679e-03, -6.941e-02, 5.761e-02, -1.165e-01, 4.882e-03, 1.219e-01, -3.004e-02, -5.402e-03, -7.408e-03, -5.023e-02, -3.673e-02, 2.835e-02)); + r += mul(s7_8, M4(2.414e-02, -1.292e-02, 7.358e-03, -1.921e-02, -6.249e-02, 8.742e-02, 3.769e-02, -2.660e-02, 2.220e-02, 4.532e-02, 1.897e-02, -5.224e-02, -4.258e-02, -6.833e-02, 6.953e-02, -4.832e-02)); + r += V4(1.156e-02, -9.983e-03, -9.430e-03, 1.703e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(4.276e-02, 3.292e-03, 4.991e-02, 1.294e-02, -2.029e-02, 5.556e-02, -3.299e-02, 2.250e-02, -3.107e-02, 1.075e-01, -4.560e-02, -7.375e-02, 8.055e-02, -4.802e-02, 1.061e-01, -7.426e-03)); + r += mul(s0_1, M4(-1.422e-03, -5.646e-02, 9.268e-02, 4.075e-02, -3.098e-02, 2.911e-03, 9.203e-02, -2.840e-02, 2.220e-02, -5.553e-02, -1.990e-01, 4.259e-02, -4.396e-02, -4.299e-02, 6.749e-02, 6.026e-02)); + r += mul(s0_2, M4(-5.280e-02, -5.906e-02, -1.509e-01, -4.675e-02, 1.426e-03, -1.100e-02, -2.137e-02, -6.311e-02, 3.037e-03, 3.620e-02, 1.628e-01, 1.833e-03, -1.729e-02, 1.082e-01, 3.468e-02, -4.741e-02)); + r += mul(s0_3, M4(4.507e-02, -1.317e-02, -4.522e-03, -3.344e-02, -8.228e-02, -4.930e-02, 2.119e-02, 1.648e-01, -1.810e-01, -4.608e-02, -4.415e-02, 6.678e-02, -7.990e-02, -3.225e-02, 2.832e-02, 8.739e-02)); + r += mul(s0_4, M4(5.128e-02, 9.946e-02, 9.086e-03, -2.913e-02, -6.700e-02, -2.583e-02, -3.194e-01, 1.770e-01, -1.268e-02, -8.137e-03, 3.033e-01, 1.250e-01, 2.784e-01, 1.067e-01, 3.498e-02, -4.576e-02)); + r += mul(s0_5, M4(4.732e-03, -5.209e-02, -2.234e-02, 3.862e-03, 1.159e-02, 2.303e-02, 1.346e-01, -2.197e-02, -6.371e-03, -1.477e-02, 2.830e-02, -7.637e-02, -3.757e-02, -7.859e-02, 2.042e-02, -1.091e-02)); + r += mul(s0_6, M4(6.354e-03, -3.975e-02, 2.362e-03, 2.314e-03, 6.096e-02, -2.846e-02, 4.908e-02, 1.946e-02, -3.325e-02, -3.436e-02, -6.850e-02, 2.433e-02, 3.478e-02, 1.909e-02, 4.114e-02, 1.673e-02)); + r += mul(s0_7, M4(4.378e-02, -5.888e-03, -3.222e-02, -4.879e-02, 4.807e-02, 1.345e-02, 1.072e-01, 3.260e-02, -9.788e-02, 1.194e-01, 2.676e-03, 4.524e-02, -4.491e-02, -1.929e-02, 6.574e-02, 1.128e-03)); + r += mul(s0_8, M4(-2.666e-02, -8.200e-03, -4.130e-03, -2.068e-02, 3.399e-02, 1.087e-02, -5.854e-04, -1.516e-02, 6.953e-02, -8.813e-02, -5.320e-02, 8.908e-03, -2.878e-02, -1.133e-02, 7.375e-02, -8.952e-03)); + r += mul(s1_0, M4(-2.563e-02, -7.491e-02, -9.621e-03, -1.227e-01, -3.171e-02, -8.522e-02, 6.067e-02, 9.253e-02, -2.715e-02, -7.993e-02, 1.507e-01, 1.848e-02, 1.079e-02, -7.792e-02, 7.729e-02, -4.658e-02)); + r += mul(s1_1, M4(1.892e-01, -3.155e-01, 1.893e-01, 3.392e-01, 3.614e-02, 4.659e-01, 1.033e-01, -1.887e-02, 4.401e-02, 6.167e-03, 2.962e-02, -8.396e-02, 2.313e-02, 1.164e-01, -3.696e-02, -5.339e-03)); + r += mul(s1_2, M4(-9.636e-03, -8.526e-02, 4.554e-02, -1.826e-01, 2.461e-02, -3.144e-02, -4.324e-02, 1.182e-02, -6.682e-02, 1.107e-02, 1.519e-01, 2.678e-03, -6.869e-03, -9.399e-03, -1.718e-01, -2.958e-02)); + r += mul(s1_3, M4(-4.291e-02, 4.962e-02, -1.813e-01, -1.020e-01, 9.803e-02, -9.502e-02, 1.171e-01, 1.104e-01, -3.743e-02, -4.506e-02, -1.147e-02, 6.518e-02, 1.088e-03, -1.364e-01, -1.902e-02, 4.867e-03)); + r += mul(s1_4, M4(3.483e-01, -1.074e-01, -8.095e-02, 4.811e-01, -2.252e-01, -1.259e-02, -6.223e-02, 2.527e-02, 4.883e-02, -2.908e-02, -1.090e-01, -1.353e-01, 8.798e-02, 1.076e-01, 4.333e-02, -1.812e-01)); + r += mul(s1_5, M4(1.269e-01, -9.028e-02, -1.876e-01, 7.151e-03, 1.090e-01, -1.186e-01, 1.518e-02, 1.353e-01, 1.843e-02, -3.524e-02, 4.887e-02, 7.737e-02, 3.845e-02, -2.931e-02, -5.800e-02, -3.861e-03)); + r += mul(s1_6, M4(2.065e-02, -7.747e-02, -1.618e-01, 4.603e-02, -1.025e-01, 4.145e-02, -1.041e-02, -9.511e-03, 4.930e-02, -8.431e-02, -5.450e-02, 3.956e-02, 1.689e-02, 1.266e-02, -5.726e-02, -1.788e-02)); + r += mul(s1_7, M4(-1.094e-01, -2.040e-01, -2.577e-01, 1.804e-01, 2.995e-01, 1.311e-01, 1.994e-01, 1.968e-01, -2.506e-02, 6.480e-02, 4.503e-02, -8.401e-04, 2.338e-02, -6.157e-02, 6.680e-03, -4.095e-02)); + r += mul(s1_8, M4(5.983e-02, 4.815e-02, -6.628e-02, -7.729e-02, -1.298e-02, 1.405e-02, -4.417e-02, 1.491e-02, 4.179e-02, 1.836e-02, 2.254e-02, 3.150e-02, 3.149e-03, -9.375e-03, 3.689e-02, 5.557e-02)); + r += mul(s2_0, M4(-8.824e-03, -9.181e-03, 1.247e-02, 3.677e-02, 5.064e-03, -2.996e-02, -7.275e-02, 2.607e-01, -4.401e-02, 2.083e-02, -8.657e-02, 1.279e-02, -7.093e-03, 1.259e-01, -8.790e-02, -2.425e-02)); + r += mul(s2_1, M4(-3.213e-02, 7.979e-02, 3.086e-02, 2.087e-02, -1.761e-02, -4.059e-02, -2.408e-01, -5.827e-02, 2.238e-02, 1.286e-01, -3.605e-02, 3.392e-02, 1.497e-01, -2.942e-03, 1.615e-02, -1.117e-01)); + r += mul(s2_2, M4(-3.316e-03, -7.754e-04, 1.906e-02, -3.301e-02, -6.677e-02, -9.814e-02, -2.373e-02, -3.117e-02, 2.255e-02, 2.341e-03, -3.144e-02, -1.664e-02, -5.039e-02, 1.212e-01, -1.224e-01, 1.795e-02)); + r += mul(s2_3, M4(2.668e-02, -2.402e-02, -2.967e-02, -1.336e-02, -8.306e-02, -4.632e-02, 1.108e-02, 3.735e-02, -3.451e-02, 5.058e-02, 9.300e-02, 3.711e-02, 3.423e-02, -4.741e-02, -1.095e-02, -3.585e-02)); + r += mul(s2_4, M4(1.100e-01, 7.056e-02, 8.282e-02, 1.530e-01, -2.870e-01, -1.375e-01, -9.497e-02, 2.027e-01, -1.328e-01, 8.856e-02, 6.696e-02, -2.361e-01, 7.305e-03, 1.722e-01, -2.136e-01, -1.850e-01)); + r += mul(s2_5, M4(8.884e-03, -5.168e-02, -2.905e-02, 5.265e-04, -5.872e-02, -1.102e-01, -6.247e-02, 1.192e-02, 1.088e-02, 1.461e-02, 3.859e-02, 1.057e-01, -4.766e-02, -2.917e-02, -1.091e-01, 2.825e-02)); + r += mul(s2_6, M4(5.104e-02, -1.285e-02, 3.879e-02, 5.841e-02, 1.865e-02, -2.791e-02, -6.057e-02, -2.473e-02, -1.034e-01, -2.275e-02, 7.575e-02, 8.749e-03, 9.533e-03, -4.123e-02, -4.295e-02, 3.354e-02)); + r += mul(s2_7, M4(3.173e-02, -4.333e-02, -1.121e-02, 4.112e-02, -1.273e-01, -4.428e-02, 5.146e-02, 1.548e-01, 5.952e-02, 1.433e-01, 6.134e-03, -1.373e-01, -2.142e-02, 3.747e-02, -1.377e-01, -1.155e-01)); + r += mul(s2_8, M4(-8.402e-04, 1.866e-02, -4.637e-02, 4.500e-02, 4.600e-02, 1.506e-02, 1.679e-02, 2.722e-02, -7.383e-02, -2.785e-02, -3.831e-02, -7.404e-02, -2.059e-02, 5.708e-02, -6.070e-02, 3.715e-02)); + r += mul(s3_0, M4(-5.871e-02, -2.882e-02, -8.989e-02, 1.916e-02, -1.357e-03, -6.500e-02, 6.279e-02, 1.670e-01, -1.998e-02, 3.869e-02, -5.028e-02, 2.372e-02, 8.465e-03, -9.501e-02, 9.572e-03, 3.625e-02)); + r += mul(s3_1, M4(7.759e-02, 2.860e-01, -4.943e-02, 1.303e-02, 9.473e-03, 1.482e-02, 8.304e-02, -1.259e-01, 1.394e-02, 1.905e-02, -4.888e-02, -8.349e-04, 7.359e-02, 1.769e-01, -7.259e-02, 5.760e-02)); + r += mul(s3_2, M4(3.953e-02, -2.994e-02, -9.992e-02, 5.483e-02, 3.137e-02, -9.053e-02, 3.821e-02, -3.146e-02, 4.306e-02, -1.100e-01, -5.266e-02, 2.499e-02, 6.665e-03, 5.605e-02, -3.151e-02, -9.822e-02)); + r += mul(s3_3, M4(-5.678e-02, -1.999e-02, 4.683e-03, 4.502e-02, 6.835e-02, -6.257e-03, 8.348e-02, -1.255e-01, -6.151e-02, 4.114e-02, 5.202e-02, -1.032e-01, -7.695e-03, -2.938e-02, -5.447e-02, -2.429e-02)); + r += mul(s3_4, M4(6.987e-02, 2.668e-01, -5.651e-02, 8.007e-02, -1.721e-01, 8.798e-03, -2.829e-02, 6.085e-02, -5.505e-02, 2.291e-01, 3.556e-02, -9.195e-02, 4.035e-03, 8.894e-02, -1.989e-01, 1.181e-01)); + r += mul(s3_5, M4(5.965e-02, -3.104e-02, -1.408e-01, 3.235e-02, 7.710e-02, 1.257e-01, 1.401e-01, -3.038e-02, -9.247e-04, -1.607e-01, -1.240e-01, 1.509e-01, -2.938e-02, 1.128e-01, 1.232e-01, 3.870e-03)); + r += mul(s3_6, M4(-1.191e-01, 2.971e-02, 4.848e-02, -4.148e-02, 8.461e-02, -3.591e-02, -5.379e-02, -4.579e-02, -9.728e-02, -3.381e-02, 8.742e-02, 6.678e-02, 5.242e-02, -4.325e-02, -1.544e-02, 6.422e-03)); + r += mul(s3_7, M4(-8.118e-02, -5.023e-02, -9.631e-02, 1.053e-01, -2.222e-01, -3.769e-02, -5.832e-02, -4.684e-03, -3.051e-02, 1.057e-01, -1.021e-01, -4.790e-02, 6.369e-02, 5.099e-02, -2.839e-02, -1.842e-02)); + r += mul(s3_8, M4(4.156e-02, 1.869e-02, -1.280e-01, 7.942e-02, 2.336e-02, 6.081e-02, 2.017e-02, -2.476e-02, -1.121e-02, -3.994e-02, -2.009e-02, -3.126e-02, -8.150e-02, 1.338e-02, -1.150e-01, -1.282e-01)); + r += mul(s4_0, M4(-6.037e-02, 1.656e-02, -7.948e-03, 1.622e-01, 5.929e-02, -3.813e-02, 9.060e-02, 1.889e-01, 4.751e-02, -2.354e-02, -6.490e-02, 3.297e-02, 9.593e-03, -3.460e-02, 4.031e-02, -7.292e-02)); + r += mul(s4_1, M4(7.467e-02, -2.272e-01, -7.408e-03, -8.043e-02, 3.338e-02, -1.559e-01, 9.580e-02, -9.651e-02, 2.954e-02, 1.834e-01, 7.362e-02, 1.125e-01, 9.323e-03, -4.286e-02, 1.238e-01, -4.365e-02)); + r += mul(s4_2, M4(1.466e-02, -1.622e-02, 5.112e-02, 5.212e-04, 1.046e-02, -5.693e-02, 1.186e-01, -4.618e-02, 2.398e-04, 2.288e-02, -1.616e-01, -6.572e-02, -5.077e-02, -5.950e-02, 4.100e-02, -5.900e-03)); + r += mul(s4_3, M4(8.164e-02, -1.050e-02, 7.152e-02, 1.763e-01, 1.477e-02, 9.761e-02, 2.061e-01, -2.763e-01, -4.070e-02, -1.053e-02, -1.024e-01, 2.789e-03, 1.721e-02, -1.149e-01, -5.316e-02, 2.251e-01)); + r += mul(s4_4, M4(-6.152e-02, -4.566e-02, 3.206e-02, -6.381e-02, 4.282e-02, 1.616e-01, -2.449e-02, 1.099e-01, 1.488e-02, 2.030e-02, -2.205e-01, -2.367e-02, -7.245e-03, 5.897e-02, -2.508e-02, -1.578e-01)); + r += mul(s4_5, M4(-5.024e-02, 1.279e-01, 7.120e-02, 3.258e-03, 5.894e-02, 9.921e-02, 2.215e-01, 5.180e-02, 7.799e-02, 9.747e-02, 7.793e-02, -9.364e-03, -1.348e-02, -3.141e-02, -3.236e-02, 5.742e-02)); + r += mul(s4_6, M4(9.676e-02, 3.061e-03, 3.606e-02, 8.734e-02, -1.257e-01, 2.836e-02, 7.202e-02, 2.260e-02, 5.660e-02, -4.800e-02, -2.797e-02, 1.071e-02, 9.543e-02, -5.729e-02, 9.324e-03, -1.327e-02)); + r += mul(s4_7, M4(7.406e-02, -3.215e-02, 5.902e-02, 1.374e-02, -1.285e-01, -1.385e-03, -3.255e-02, -1.147e-01, -1.568e-02, 3.902e-02, -1.221e-02, 1.601e-02, 5.597e-02, -2.408e-02, 9.760e-02, 4.917e-02)); + r += mul(s4_8, M4(-3.236e-02, 4.447e-02, 5.241e-02, -2.944e-02, -3.300e-02, -7.686e-02, 1.495e-01, 1.105e-02, 8.006e-02, -4.901e-03, -8.337e-02, 8.960e-02, -5.067e-02, -2.423e-02, 3.160e-03, -6.541e-02)); + r += mul(s5_0, M4(-9.224e-03, -3.012e-02, 1.654e-02, -1.741e-02, 6.193e-02, 1.194e-02, -1.138e-01, 5.334e-02, 4.494e-02, 3.817e-02, -6.868e-02, -1.206e-01, 3.800e-02, -6.390e-02, -2.415e-02, 1.189e-01)); + r += mul(s5_1, M4(1.780e-02, 2.528e-01, 2.053e-02, -2.688e-02, 2.794e-02, 9.420e-03, 3.903e-02, -2.132e-02, -2.308e-02, -1.706e-01, 2.881e-01, -9.967e-04, -7.117e-02, -1.004e-01, 1.655e-01, -2.542e-02)); + r += mul(s5_2, M4(-9.963e-04, 2.048e-02, 1.102e-01, -2.183e-02, 5.955e-02, -4.351e-02, 2.002e-02, 3.162e-02, -3.611e-02, -1.422e-02, -8.736e-02, 4.948e-02, 1.035e-03, -6.009e-02, -7.072e-02, -4.935e-04)); + r += mul(s5_3, M4(-1.619e-02, -4.737e-02, 3.625e-03, 2.913e-02, -5.581e-02, -6.796e-02, 1.227e-02, 7.568e-03, 2.369e-02, 8.190e-03, -2.758e-02, -2.920e-02, 8.449e-02, 6.266e-02, -1.172e-01, 3.430e-03)); + r += mul(s5_4, M4(-7.080e-02, -1.188e-01, -4.568e-02, -9.712e-02, 3.748e-02, -1.157e-02, -5.191e-02, 1.316e-02, 6.758e-02, -1.323e-01, -8.590e-03, -1.927e-01, -8.623e-02, 1.222e-01, -1.296e-01, -4.125e-02)); + r += mul(s5_5, M4(-3.687e-02, 5.063e-02, 5.207e-02, -6.672e-04, 8.033e-03, 1.983e-02, -1.978e-02, 1.045e-01, -2.183e-02, -4.931e-02, 2.080e-02, 4.839e-02, 1.567e-02, 1.159e-02, 2.069e-03, 3.931e-02)); + r += mul(s5_6, M4(5.989e-02, -5.406e-02, 8.648e-02, 3.946e-02, -2.754e-02, -8.323e-02, 2.472e-03, 1.602e-02, -2.517e-02, -2.794e-02, -8.966e-03, 3.794e-02, 3.412e-02, 9.293e-02, -1.335e-01, -9.274e-02)); + r += mul(s5_7, M4(9.180e-02, 3.718e-02, 2.621e-02, -2.298e-03, -1.593e-02, 3.107e-02, -1.137e-02, 3.554e-02, -5.966e-02, 5.259e-02, 1.049e-02, -4.814e-02, -2.135e-01, -5.053e-02, -1.286e-02, -1.475e-01)); + r += mul(s5_8, M4(-8.856e-03, -1.193e-02, 4.319e-02, -6.940e-03, -9.507e-03, -3.423e-03, -2.602e-02, 2.562e-02, -4.796e-02, 2.255e-02, 2.904e-02, 7.446e-02, -5.113e-03, 7.435e-02, 8.579e-02, 4.102e-02)); + r += mul(s6_0, M4(4.389e-02, -1.258e-01, 2.059e-02, 2.629e-01, 4.985e-03, 2.844e-02, -9.665e-02, 5.919e-02, 3.232e-02, -1.705e-01, 1.044e-03, 2.950e-02, 3.746e-02, 3.500e-02, 7.514e-02, 4.464e-03)); + r += mul(s6_1, M4(7.756e-02, 8.989e-02, -3.914e-03, -7.133e-02, 1.374e-02, -4.715e-03, 9.700e-02, -5.065e-02, -8.952e-03, 4.056e-02, -9.037e-02, 3.634e-02, -1.191e-02, -1.558e-01, 1.932e-01, 4.759e-02)); + r += mul(s6_2, M4(6.396e-02, -2.024e-01, -5.218e-02, -1.168e-02, 3.868e-03, -1.990e-02, -3.047e-02, 4.044e-02, -2.089e-02, -5.308e-02, -9.645e-02, 5.635e-03, -5.880e-02, -4.208e-02, 1.858e-01, 1.659e-02)); + r += mul(s6_3, M4(-1.069e-01, 4.536e-02, -7.838e-02, -1.381e-01, -2.307e-02, -1.687e-02, 4.232e-02, 4.459e-02, -1.205e-01, -1.834e-01, 6.953e-02, -3.404e-01, 5.913e-02, -7.332e-02, 1.579e-01, 6.973e-02)); + r += mul(s6_4, M4(-1.341e-01, 6.876e-02, 8.401e-03, 7.779e-02, 2.819e-02, 3.532e-01, -5.884e-02, 3.457e-02, -5.755e-02, 1.192e-01, 3.875e-01, -2.875e-01, 4.455e-02, 6.725e-02, 3.007e-02, 3.482e-02)); + r += mul(s6_5, M4(1.390e-03, 1.684e-02, -3.724e-02, 5.731e-02, 1.727e-02, -3.421e-03, -3.177e-02, 5.035e-02, 8.179e-02, -1.725e-01, -2.584e-02, 7.276e-02, 1.489e-01, -1.704e-03, 8.725e-02, -6.646e-04)); + r += mul(s6_6, M4(-8.179e-02, 3.857e-02, -5.471e-02, 4.253e-02, -5.446e-02, 8.346e-02, -5.433e-02, 1.508e-01, -9.701e-02, -1.837e-02, 2.487e-02, -3.024e-02, 1.529e-01, -1.409e-02, 5.809e-02, -6.421e-02)); + r += mul(s6_7, M4(-1.585e-01, -3.170e-02, -1.190e-01, -5.164e-02, 1.605e-02, -6.011e-03, -1.041e-01, -1.551e-02, 2.374e-02, 1.426e-01, 1.592e-01, -1.714e-01, 1.089e-02, 5.988e-02, 3.814e-02, 1.214e-02)); + r += mul(s6_8, M4(-7.375e-02, 8.089e-02, 4.910e-02, 1.020e-02, 5.646e-03, -1.113e-02, -1.495e-02, 2.421e-02, 4.807e-02, -9.493e-02, 8.929e-02, -2.548e-02, 8.632e-02, 4.068e-02, -1.793e-02, -6.503e-03)); + r += mul(s7_0, M4(-5.631e-02, 1.109e-01, -1.101e-01, 4.318e-02, -7.555e-03, 3.286e-02, 4.230e-03, -9.658e-04, 2.501e-02, 5.393e-02, -1.639e-02, -4.477e-02, 1.108e-02, -3.988e-02, 7.409e-02, -8.132e-02)); + r += mul(s7_1, M4(-1.264e-02, 3.910e-02, -4.864e-02, 8.381e-02, 7.497e-03, 6.824e-03, 5.936e-02, 2.502e-03, -5.776e-02, -5.663e-02, -2.980e-02, -1.511e-02, 2.048e-02, -1.101e-01, 7.526e-02, 6.164e-02)); + r += mul(s7_2, M4(-7.927e-03, -5.440e-02, -4.622e-02, -1.667e-02, 1.467e-03, -4.640e-02, 3.214e-02, 6.492e-03, -3.743e-02, 1.046e-01, 6.501e-03, 2.285e-02, 2.592e-02, 3.474e-02, 1.183e-01, -1.976e-02)); + r += mul(s7_3, M4(3.091e-02, -7.652e-02, -1.263e-01, 3.743e-02, -8.994e-02, 8.575e-02, 5.046e-03, -1.179e-02, 1.578e-03, -7.156e-02, 3.312e-02, -1.918e-02, -5.160e-03, -9.209e-02, 1.189e-01, -8.571e-02)); + r += mul(s7_4, M4(-6.698e-02, -7.674e-02, -6.401e-02, 8.097e-03, 1.107e-03, 7.725e-02, 5.622e-02, 1.300e-01, -4.175e-02, 1.169e-01, -1.043e-01, -1.587e-02, -1.078e-02, 4.202e-02, -7.352e-02, -2.581e-02)); + r += mul(s7_5, M4(-3.235e-02, -4.669e-03, -9.562e-02, 4.169e-02, 3.150e-02, 3.311e-02, 7.651e-02, -2.521e-02, -4.377e-03, 1.004e-01, 6.654e-02, -6.585e-02, 7.707e-02, 5.189e-02, 1.077e-01, -3.349e-02)); + r += mul(s7_6, M4(3.353e-02, -2.998e-02, -5.213e-02, -1.200e-02, 5.783e-03, 1.861e-02, 6.618e-02, -1.388e-02, -2.233e-02, -3.001e-03, 8.674e-02, -1.458e-03, 2.875e-02, 3.304e-02, 3.299e-02, -1.312e-01)); + r += mul(s7_7, M4(4.818e-02, 2.206e-02, 4.522e-02, -2.785e-02, -9.122e-02, -1.578e-02, 8.113e-02, 1.068e-01, -1.008e-02, -6.218e-02, 8.925e-02, 2.618e-02, -3.574e-02, 2.934e-02, 5.164e-02, 7.347e-03)); + r += mul(s7_8, M4(-4.466e-02, 2.015e-02, -4.114e-02, -1.581e-02, 1.891e-02, 1.774e-02, -2.226e-02, -1.056e-02, -4.274e-02, -2.640e-02, 2.741e-02, -2.143e-02, -1.275e-02, 6.932e-03, -2.697e-03, -1.463e-02)); + r += V4(8.595e-03, -1.310e-02, -3.312e-02, -2.644e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.614e-02, -1.005e-02, -6.784e-04, 7.722e-03, -3.042e-02, -1.959e-02, -3.936e-02, 5.703e-02, 8.608e-02, 4.361e-02, 1.607e-02, -2.982e-02, 2.736e-02, 5.653e-02, -3.001e-02, 1.120e-01)); + r += mul(s0_1, M4(-7.235e-02, 6.935e-04, 1.750e-02, -2.335e-02, -5.189e-02, -2.955e-02, -1.816e-02, 5.784e-02, 4.978e-02, -6.952e-02, 7.076e-02, 5.682e-02, -7.723e-02, -6.976e-03, -5.206e-02, 3.530e-02)); + r += mul(s0_2, M4(5.367e-02, 1.343e-02, 3.025e-02, 1.073e-02, 3.161e-02, -2.087e-02, 9.620e-03, 2.695e-02, -4.723e-02, -4.379e-03, -7.545e-02, 5.364e-02, 8.785e-02, 1.200e-01, 4.003e-02, 5.509e-02)); + r += mul(s0_3, M4(-4.535e-02, 3.388e-02, 2.809e-02, 1.864e-02, -2.166e-02, -3.582e-03, -1.579e-02, 8.943e-02, -6.513e-02, 6.274e-02, -5.861e-02, 7.508e-02, 1.783e-01, 3.014e-02, 2.979e-02, -2.050e-02)); + r += mul(s0_4, M4(5.496e-02, 3.936e-02, -4.381e-02, -3.945e-03, 1.082e-01, 5.698e-02, -4.709e-02, -5.350e-02, 2.057e-01, -1.831e-01, 7.740e-03, 4.411e-02, 5.539e-02, 1.397e-01, 4.735e-02, 3.319e-02)); + r += mul(s0_5, M4(2.261e-02, 4.806e-02, 1.373e-02, 8.694e-03, 3.159e-02, 8.759e-02, -6.888e-02, -8.022e-02, 2.395e-02, 6.523e-02, -1.166e-01, 1.327e-02, -3.381e-02, 7.779e-02, -3.434e-02, 1.555e-02)); + r += mul(s0_6, M4(1.209e-02, 4.585e-03, 9.905e-03, 1.364e-02, -1.354e-02, 5.577e-03, 5.213e-02, -6.017e-02, -6.375e-02, -3.437e-02, -2.421e-02, -2.313e-02, 8.052e-02, 2.112e-04, 5.610e-02, 3.765e-03)); + r += mul(s0_7, M4(-3.386e-02, 4.527e-02, -5.707e-03, -4.910e-02, -2.961e-02, -3.020e-02, -4.442e-02, 8.489e-02, -1.019e-03, 1.266e-01, 6.886e-02, 7.071e-02, -2.010e-02, -5.389e-02, -2.552e-02, -1.134e-02)); + r += mul(s0_8, M4(-4.069e-03, -1.052e-02, -3.589e-02, -8.961e-02, -8.542e-03, -2.531e-03, -2.576e-02, 1.685e-02, -3.091e-02, -1.611e-02, 8.161e-02, 3.644e-02, -1.251e-02, 2.749e-02, 8.363e-02, -1.902e-02)); + r += mul(s1_0, M4(-4.832e-02, 6.763e-02, 1.693e-01, -6.850e-02, -1.692e-01, 2.277e-02, -2.988e-02, 9.687e-02, -1.567e-01, 9.008e-02, 7.988e-02, 8.103e-02, 6.072e-02, 1.966e-02, 1.069e-02, 3.073e-02)); + r += mul(s1_1, M4(1.153e-03, 7.128e-02, -1.417e-01, -7.013e-02, -9.707e-02, -4.268e-02, 1.928e-02, 9.093e-02, 1.453e-01, -8.077e-02, -1.019e-01, -7.137e-02, 6.369e-02, -1.047e-01, 5.034e-02, 2.306e-03)); + r += mul(s1_2, M4(1.078e-01, 1.220e-01, 6.449e-02, 9.159e-02, -1.270e-02, -1.106e-01, 5.049e-02, -2.057e-02, -5.039e-02, -1.887e-02, -6.338e-02, 2.725e-02, 8.502e-02, 1.561e-01, -7.513e-02, 6.572e-02)); + r += mul(s1_3, M4(5.530e-02, -1.115e-03, 2.046e-01, -4.519e-02, -3.028e-01, -7.232e-03, 4.014e-03, 5.096e-02, 1.179e-01, -9.105e-02, 8.114e-02, -1.298e-01, 7.743e-02, 4.449e-02, 1.560e-03, -4.073e-02)); + r += mul(s1_4, M4(3.588e-01, 2.170e-01, -3.909e-02, -3.370e-01, 1.009e-01, 1.970e-01, 8.040e-02, 9.738e-03, 2.206e-01, 4.629e-01, 1.215e-01, -1.200e-01, -1.037e-02, 9.784e-02, -3.198e-02, 1.268e-02)); + r += mul(s1_5, M4(1.062e-01, 1.983e-01, 1.405e-01, -9.432e-02, -5.090e-02, -7.232e-02, 1.303e-01, -3.199e-01, -3.193e-02, -3.759e-02, -1.594e-02, -3.530e-02, -2.546e-03, 5.905e-02, 7.533e-02, 1.445e-01)); + r += mul(s1_6, M4(2.811e-02, 1.152e-01, -3.190e-02, -1.156e-01, -5.659e-02, -1.193e-01, -2.246e-02, 1.797e-03, 1.593e-02, 4.996e-02, -1.089e-02, 1.032e-02, 2.749e-02, 2.593e-02, 1.733e-02, 8.334e-02)); + r += mul(s1_7, M4(5.944e-03, 5.876e-02, -2.158e-01, -5.041e-03, 9.495e-02, 1.840e-02, 6.503e-02, 1.521e-01, -1.170e-01, -1.226e-01, -3.012e-03, 1.956e-02, -4.297e-02, 5.649e-02, -7.710e-03, 1.146e-02)); + r += mul(s1_8, M4(-2.399e-03, 4.167e-02, -1.627e-01, -1.377e-01, 7.009e-03, -7.871e-02, 1.943e-02, 2.779e-02, 6.534e-02, 6.369e-02, -1.158e-02, 3.674e-02, -2.975e-02, 4.651e-02, 3.913e-02, 2.652e-02)); + r += mul(s2_0, M4(2.828e-02, -2.513e-02, 5.672e-03, -6.423e-02, 5.520e-02, -4.920e-02, 6.411e-02, -7.914e-02, 3.132e-02, -5.132e-02, -4.550e-02, 1.351e-03, 8.561e-02, -8.241e-02, -9.329e-03, 6.873e-02)); + r += mul(s2_1, M4(4.571e-02, 1.052e-01, 1.285e-02, 1.897e-02, 7.375e-02, -1.896e-01, 1.410e-02, -2.172e-01, -2.555e-02, 1.003e-01, -2.963e-02, 7.566e-02, -2.315e-01, -1.842e-02, 3.806e-03, -7.120e-02)); + r += mul(s2_2, M4(3.116e-03, 1.334e-02, 6.586e-02, -1.674e-02, -4.504e-02, -2.114e-02, -1.535e-01, -6.773e-02, -5.222e-02, -1.381e-01, 3.592e-02, -9.189e-03, 1.369e-02, -4.472e-03, -8.508e-02, -5.183e-02)); + r += mul(s2_3, M4(8.123e-02, -1.267e-02, -1.060e-02, -1.223e-01, -8.796e-03, 4.947e-02, 1.094e-02, -5.961e-02, 1.707e-02, -4.949e-02, 1.499e-02, 1.971e-01, -7.364e-03, 5.166e-02, 1.375e-02, -2.088e-02)); + r += mul(s2_4, M4(1.981e-01, 1.402e-02, -7.671e-02, -1.057e-01, -3.672e-02, 2.139e-01, -3.502e-02, 4.998e-02, -6.810e-02, 2.711e-01, 2.190e-03, 2.389e-01, -1.027e-01, -1.934e-01, 2.007e-01, -1.764e-01)); + r += mul(s2_5, M4(-3.131e-02, 9.458e-02, -4.505e-02, -8.858e-02, -8.754e-02, -1.370e-02, -3.300e-03, -1.630e-02, -7.052e-02, -1.271e-01, 9.199e-03, 1.163e-01, -1.271e-01, -1.099e-01, -1.599e-01, 8.651e-02)); + r += mul(s2_6, M4(8.813e-02, 5.411e-02, -1.438e-02, -1.103e-01, 2.566e-02, 4.505e-02, 7.295e-03, -1.022e-02, -1.135e-02, 1.019e-02, 7.076e-02, 1.678e-02, 1.878e-02, -1.681e-02, -7.175e-02, 5.882e-02)); + r += mul(s2_7, M4(-6.547e-02, -6.367e-03, -9.245e-03, 5.503e-02, 1.068e-02, -2.441e-02, -4.558e-02, -1.253e-01, 1.120e-01, 5.100e-02, -8.709e-03, 1.022e-02, 1.694e-02, 6.852e-02, -1.305e-01, -1.105e-02)); + r += mul(s2_8, M4(2.468e-02, 4.679e-02, -4.744e-02, -2.494e-03, -5.273e-03, -9.415e-03, 2.700e-02, 1.370e-02, 2.330e-02, -7.538e-02, -1.354e-02, -9.254e-02, 3.925e-02, -2.982e-02, -4.716e-02, -4.827e-02)); + r += mul(s3_0, M4(5.471e-02, -4.965e-02, -5.552e-02, 9.349e-02, -3.978e-02, 4.671e-02, 1.510e-02, -1.010e-02, -5.610e-03, -6.650e-03, -4.539e-02, -3.078e-02, -2.028e-02, -1.076e-02, 4.225e-03, 6.920e-03)); + r += mul(s3_1, M4(-8.981e-02, 2.075e-02, -1.545e-01, 8.362e-02, 3.681e-02, -6.842e-02, -3.677e-03, 7.273e-02, 3.630e-02, 1.168e-01, 9.331e-02, -1.641e-02, 8.741e-02, -1.807e-02, 5.136e-02, -5.999e-03)); + r += mul(s3_2, M4(7.980e-03, 9.239e-03, -7.776e-02, 4.121e-02, -6.007e-02, -6.668e-02, 2.904e-02, -1.818e-02, -4.358e-02, -6.546e-02, 7.827e-02, -1.531e-02, 8.845e-02, 4.541e-02, -3.799e-02, 4.981e-02)); + r += mul(s3_3, M4(-7.097e-02, 3.464e-02, -3.664e-02, 9.282e-02, -1.353e-02, -4.447e-02, 6.376e-05, -1.620e-02, 4.777e-02, 6.054e-03, -2.632e-02, 5.112e-02, -1.545e-02, -5.129e-02, 3.346e-02, 2.137e-03)); + r += mul(s3_4, M4(2.378e-01, -1.779e-01, -1.635e-01, 9.949e-02, -1.099e-02, -9.495e-02, 1.901e-02, 6.277e-02, -1.515e-01, 1.612e-01, -2.442e-02, 9.298e-03, 1.148e-01, 5.676e-02, -5.988e-02, -5.545e-02)); + r += mul(s3_5, M4(-2.331e-02, 3.820e-02, -9.015e-02, -7.512e-02, -3.124e-02, -8.487e-02, -3.251e-02, 3.748e-03, -4.415e-02, 2.026e-02, 5.457e-02, -8.815e-02, 5.846e-02, 5.226e-02, -9.859e-02, -1.642e-02)); + r += mul(s3_6, M4(4.007e-02, -6.566e-02, 5.973e-02, 1.621e-02, 3.186e-02, 4.911e-02, 5.198e-03, 2.869e-02, -3.761e-02, 6.225e-02, 3.270e-02, 7.359e-02, -2.176e-02, 5.753e-03, 1.074e-02, 3.768e-03)); + r += mul(s3_7, M4(-8.123e-02, 1.709e-01, 3.004e-03, 2.824e-01, -1.991e-02, -7.908e-03, -3.733e-02, -1.250e-01, 1.101e-01, 1.949e-02, -5.755e-02, 6.645e-02, -2.663e-03, -9.784e-03, 2.276e-02, -2.752e-02)); + r += mul(s3_8, M4(4.444e-02, 5.957e-02, -4.221e-02, -5.451e-04, 9.534e-03, -1.278e-02, -6.812e-02, -1.361e-03, 2.192e-04, -2.833e-02, 7.063e-04, -1.225e-01, 3.637e-04, -5.070e-02, 1.058e-02, -6.452e-02)); + r += mul(s4_0, M4(6.307e-02, -2.681e-02, -6.274e-02, 8.770e-02, -7.101e-02, 4.855e-02, -5.002e-02, 4.840e-02, -2.804e-02, 5.773e-02, -6.244e-03, 8.325e-02, -2.861e-02, -6.126e-03, -3.282e-03, -5.171e-02)); + r += mul(s4_1, M4(3.098e-02, 3.295e-02, 7.547e-02, 1.486e-01, 7.005e-02, 2.076e-03, -9.338e-02, 1.963e-02, -1.268e-01, -3.594e-02, -1.551e-02, 6.473e-02, 5.155e-02, -1.451e-01, 9.073e-02, 6.502e-03)); + r += mul(s4_2, M4(-2.531e-02, 4.505e-02, -9.723e-02, 1.119e-02, -3.923e-02, -1.286e-01, 3.625e-02, -9.626e-02, 1.758e-02, -9.467e-02, -3.204e-02, -1.593e-01, -4.865e-02, 3.704e-02, -5.239e-03, -6.411e-02)); + r += mul(s4_3, M4(1.624e-02, 7.311e-02, -8.259e-02, 7.513e-02, 2.413e-02, 5.958e-02, -1.074e-01, 2.137e-01, 1.173e-03, -8.354e-02, -1.466e-02, -2.959e-02, -1.028e-02, -1.759e-02, -4.829e-02, -6.977e-02)); + r += mul(s4_4, M4(-6.667e-02, -9.253e-02, -1.928e-01, 1.053e-01, 2.976e-02, 2.397e-02, 1.972e-02, -3.608e-01, -1.562e-01, -1.104e-01, -6.679e-02, 8.189e-03, 1.051e-01, 2.159e-02, -1.574e-01, 1.478e-01)); + r += mul(s4_5, M4(1.064e-03, 4.455e-02, 2.994e-02, 6.062e-02, 2.440e-02, -8.779e-02, -7.318e-03, -3.991e-02, 2.143e-02, -1.107e-01, 3.086e-02, -2.095e-01, 3.533e-02, 2.910e-03, 3.938e-02, -9.010e-02)); + r += mul(s4_6, M4(1.370e-02, 1.084e-02, 1.671e-02, 4.518e-02, -7.201e-02, -1.957e-02, 4.449e-02, 9.856e-02, 4.019e-03, 2.327e-02, 2.099e-02, 2.433e-03, -4.614e-03, -2.463e-03, -7.228e-02, 5.710e-02)); + r += mul(s4_7, M4(-6.763e-02, -7.056e-02, 4.111e-02, 1.589e-01, -5.957e-02, -1.089e-01, -4.845e-03, 5.609e-03, -1.616e-01, -7.913e-02, -8.164e-02, -6.088e-02, 2.263e-02, 1.072e-01, 9.865e-02, -1.330e-03)); + r += mul(s4_8, M4(3.810e-02, -3.161e-03, 1.591e-02, 9.792e-02, 2.835e-02, -2.595e-02, -8.879e-04, -2.684e-02, 9.845e-03, 2.914e-02, 7.703e-03, -5.742e-02, -3.035e-02, 4.963e-03, 1.384e-01, 3.100e-03)); + r += mul(s5_0, M4(4.256e-02, -1.008e-01, -1.556e-02, 6.923e-04, -5.312e-02, 5.749e-02, -2.727e-03, 5.390e-02, 7.619e-02, 5.960e-02, -2.679e-02, 1.117e-01, -1.577e-01, 1.731e-02, -1.404e-02, -3.958e-02)); + r += mul(s5_1, M4(-1.362e-01, -1.371e-02, -5.278e-02, 1.414e-01, 3.202e-02, -3.197e-02, -6.661e-02, 8.865e-03, -2.686e-01, 1.049e-02, -6.292e-02, 7.536e-02, 1.076e-01, -7.338e-03, -1.221e-01, -5.864e-02)); + r += mul(s5_2, M4(5.416e-02, 7.805e-03, 5.544e-02, 2.636e-02, -3.914e-02, -7.272e-02, -7.414e-02, 1.661e-02, -7.668e-02, -1.847e-02, 6.360e-02, -2.105e-02, 7.998e-02, 5.067e-02, -8.429e-03, -2.027e-02)); + r += mul(s5_3, M4(-2.233e-05, 5.755e-02, -3.404e-02, 1.183e-01, -1.258e-02, -2.209e-02, -8.305e-02, 7.034e-02, -5.047e-02, 6.752e-03, 5.717e-02, 4.605e-02, 3.183e-03, -1.312e-01, -5.560e-02, -1.314e-02)); + r += mul(s5_4, M4(5.782e-02, -6.304e-02, -2.183e-01, 1.023e-01, -8.243e-02, 5.312e-02, 6.326e-02, -1.013e-01, -1.482e-01, 8.340e-02, 5.306e-02, 9.693e-02, -7.022e-02, -1.078e-01, -9.109e-02, 1.012e-01)); + r += mul(s5_5, M4(2.845e-02, 7.967e-03, 3.953e-02, 8.160e-02, -3.188e-02, -8.098e-02, 8.965e-03, -2.233e-02, -1.713e-01, -9.800e-02, -1.032e-02, 1.329e-01, -4.141e-02, 2.611e-02, 2.263e-02, 4.147e-02)); + r += mul(s5_6, M4(4.944e-03, 1.338e-03, -1.373e-03, -4.234e-03, 1.164e-02, 4.842e-02, -5.568e-02, 3.876e-02, -7.883e-02, -4.474e-02, -1.606e-02, 4.802e-04, -7.402e-02, -8.163e-02, -6.966e-03, 6.448e-02)); + r += mul(s5_7, M4(-6.236e-02, 1.289e-02, 4.420e-02, 5.460e-02, 9.722e-02, 1.146e-02, -9.578e-02, -4.473e-02, -6.389e-02, 2.631e-03, -9.886e-02, 5.113e-02, -9.276e-02, -1.233e-01, 4.618e-02, 1.873e-02)); + r += mul(s5_8, M4(1.536e-02, -5.075e-04, 1.344e-02, 4.274e-02, 3.285e-02, -2.907e-02, -2.249e-02, -4.175e-02, 3.839e-02, -1.694e-02, -2.087e-02, 4.188e-02, 2.566e-02, -3.312e-02, -4.870e-02, -1.486e-02)); + r += mul(s6_0, M4(2.035e-02, 8.914e-02, -9.176e-02, 2.293e-03, 7.181e-02, 3.651e-02, 3.323e-03, 2.469e-02, 2.294e-02, 8.999e-02, -3.079e-02, 1.147e-02, 1.319e-01, 5.067e-02, -3.103e-02, 5.246e-02)); + r += mul(s6_1, M4(-2.348e-02, 1.186e-01, -1.171e-01, 8.115e-02, -1.357e-01, -3.212e-02, -1.983e-02, 9.504e-02, 7.592e-02, 2.967e-01, -2.224e-01, -7.604e-02, -4.854e-02, -2.008e-02, -2.428e-02, -1.026e-01)); + r += mul(s6_2, M4(1.907e-02, 4.944e-02, -9.952e-03, -2.858e-02, -2.479e-02, -5.892e-02, 7.922e-02, -4.188e-02, 4.176e-04, -9.048e-02, -6.651e-03, 9.881e-03, -1.035e-03, 3.687e-02, 1.044e-02, -9.535e-02)); + r += mul(s6_3, M4(-6.968e-03, -3.589e-02, -1.763e-02, -1.608e-01, -1.312e-01, 1.096e-01, -3.024e-02, 7.163e-02, -1.194e-01, 8.786e-02, -6.113e-02, -5.211e-02, 1.995e-02, 5.899e-02, 8.763e-02, -5.822e-02)); + r += mul(s6_4, M4(1.126e-01, -7.308e-02, -3.569e-02, -1.747e-01, -3.388e-02, -2.617e-01, 3.035e-02, -1.474e-01, -1.557e-02, 2.141e-01, -3.721e-01, 2.118e-01, 1.199e-01, -9.028e-02, 1.809e-01, -1.871e-01)); + r += mul(s6_5, M4(4.094e-02, 2.008e-01, -1.845e-01, 1.834e-02, -2.787e-02, -4.992e-04, -1.641e-02, -1.007e-01, -8.758e-02, -1.699e-01, 9.544e-03, 8.977e-05, -5.093e-02, -6.207e-02, 8.674e-02, -1.359e-01)); + r += mul(s6_6, M4(4.932e-03, 3.054e-02, 2.115e-02, 2.666e-02, -5.442e-02, -7.033e-02, -3.701e-02, -2.965e-02, 7.123e-03, -3.562e-03, -4.148e-02, 1.772e-01, 2.837e-03, 8.905e-02, 1.370e-02, 2.342e-02)); + r += mul(s6_7, M4(-2.051e-02, -5.216e-02, -6.625e-02, 1.395e-02, -3.178e-02, -6.876e-02, 5.125e-03, 4.695e-02, 1.508e-01, 5.439e-02, 5.286e-02, -1.213e-01, 2.426e-02, -7.170e-02, -2.314e-02, -7.301e-02)); + r += mul(s6_8, M4(-2.376e-02, -2.696e-02, -8.676e-02, -5.200e-02, 4.308e-02, -6.324e-02, 3.243e-03, 2.496e-02, 6.805e-02, 5.926e-02, -1.166e-01, -9.171e-02, 3.429e-02, -2.809e-02, -2.414e-02, -3.571e-02)); + r += mul(s7_0, M4(8.384e-02, -2.706e-02, 1.013e-01, -9.113e-02, 1.056e-02, -6.677e-03, -1.244e-02, 1.388e-02, 2.674e-03, -5.477e-02, 1.808e-02, -3.460e-02, 3.550e-02, 2.373e-02, 1.164e-02, 1.138e-01)); + r += mul(s7_1, M4(-3.523e-05, -1.094e-01, 1.434e-02, -4.561e-02, -1.619e-02, -2.251e-02, 5.216e-02, 4.408e-02, -8.221e-02, -4.150e-03, -4.067e-02, -2.003e-02, -3.024e-02, 5.350e-03, -5.504e-02, 4.436e-02)); + r += mul(s7_2, M4(3.702e-02, 7.436e-02, -1.237e-01, -3.169e-02, -5.443e-03, -4.782e-02, 2.405e-02, -1.102e-02, -3.942e-02, -8.225e-02, -4.205e-02, 3.668e-02, 5.289e-02, 7.724e-03, 1.813e-02, 4.858e-02)); + r += mul(s7_3, M4(2.945e-03, 7.254e-02, 2.675e-02, -8.459e-02, 6.718e-02, 3.384e-02, -3.787e-02, 4.449e-02, 4.266e-02, 5.028e-02, -3.294e-02, -6.624e-03, -3.943e-02, 9.206e-02, 5.073e-02, 3.251e-02)); + r += mul(s7_4, M4(-1.042e-01, -6.166e-02, -1.833e-01, 9.123e-02, 1.202e-01, -7.532e-03, -2.004e-02, 5.601e-02, 5.176e-02, -2.966e-02, -1.028e-01, 2.305e-02, 8.507e-02, -4.308e-02, 4.660e-02, -3.810e-02)); + r += mul(s7_5, M4(1.993e-03, 3.307e-02, -3.692e-02, 6.274e-02, 8.841e-02, 6.889e-02, 3.847e-02, 3.557e-03, -1.397e-02, -6.079e-02, -1.159e-01, 5.036e-02, -7.212e-02, -8.696e-02, -1.473e-02, -8.401e-05)); + r += mul(s7_6, M4(1.307e-03, 5.465e-02, 8.376e-02, 6.102e-04, 1.640e-02, -3.675e-02, 1.247e-02, 7.030e-03, 1.581e-02, -2.842e-02, 4.472e-02, 5.942e-03, -3.620e-02, -4.640e-02, 4.822e-02, 7.610e-02)); + r += mul(s7_7, M4(5.411e-03, -4.896e-02, 7.239e-02, 2.750e-02, 3.379e-03, -6.351e-02, 1.287e-02, 9.011e-03, 2.150e-02, 2.218e-02, 1.566e-02, -8.448e-03, 2.273e-02, 4.030e-02, 9.586e-03, -1.124e-02)); + r += mul(s7_8, M4(-1.543e-03, -1.198e-02, -4.999e-02, 2.786e-02, 6.909e-02, 3.947e-02, -2.309e-03, 5.050e-03, 3.616e-02, -2.521e-02, 4.467e-02, -6.620e-02, 6.803e-02, 1.796e-02, -1.544e-02, 8.682e-03)); + r += V4(1.619e-02, 2.435e-02, 1.969e-02, 4.142e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.543e-02, -1.793e-02, -2.661e-02, -8.442e-03, -3.030e-02, 7.954e-02, 4.205e-03, -1.372e-02, 1.039e-01, 1.134e-01, 4.211e-02, -3.359e-02, 5.462e-02, 2.089e-02, -4.002e-02, 7.973e-02)); + r += mul(s0_1, M4(-2.038e-03, -1.586e-02, -2.208e-02, -3.928e-03, -3.969e-02, 2.543e-03, 1.281e-02, 1.219e-01, -7.411e-03, 9.921e-02, -1.890e-02, -1.119e-02, 3.633e-02, -2.426e-02, -7.610e-02, 2.071e-01)); + r += mul(s0_2, M4(1.747e-02, -1.930e-02, 2.645e-02, -3.918e-02, 5.670e-02, -3.047e-02, -2.489e-02, -2.364e-02, 1.529e-02, 3.475e-02, -1.780e-02, 3.892e-02, 7.479e-02, -6.135e-02, -2.016e-01, -2.309e-02)); + r += mul(s0_3, M4(4.840e-02, -2.967e-02, -4.442e-02, 1.850e-02, -1.576e-01, -3.390e-02, 3.628e-02, 5.351e-03, -3.437e-02, -1.698e-02, 8.110e-03, -1.047e-02, -5.011e-02, -2.863e-01, 2.716e-02, 1.364e-01)); + r += mul(s0_4, M4(1.586e-01, 3.869e-02, 4.948e-02, -1.398e-02, 1.052e-01, -1.790e-03, 1.426e-02, -1.089e-01, -2.705e-01, -4.928e-02, -1.672e-01, 8.665e-02, -3.057e-01, 1.749e-01, 6.896e-02, -2.513e-01)); + r += mul(s0_5, M4(4.361e-03, -2.893e-02, 4.167e-04, 3.450e-02, -4.229e-02, -7.990e-03, 3.401e-02, -7.631e-03, 1.931e-02, 7.449e-02, 1.669e-02, -4.852e-02, -8.031e-02, 3.757e-02, -1.409e-01, -1.088e-01)); + r += mul(s0_6, M4(1.656e-02, 7.477e-02, -2.463e-02, 2.680e-02, 6.169e-02, -1.259e-04, 6.755e-03, 2.400e-02, 4.599e-02, 2.553e-03, -1.681e-02, 5.101e-02, 1.828e-03, 6.240e-02, -2.371e-02, 5.791e-02)); + r += mul(s0_7, M4(1.073e-03, -3.600e-02, -2.445e-02, 1.764e-02, 5.542e-02, 4.947e-02, 6.553e-03, 7.913e-03, 4.885e-02, -9.868e-02, -1.274e-01, -2.245e-02, 7.077e-02, 4.840e-02, 1.096e-03, 7.830e-02)); + r += mul(s0_8, M4(1.959e-02, 2.715e-02, -7.821e-03, 2.369e-02, -8.008e-02, 4.740e-02, 7.136e-03, 1.902e-02, 9.085e-02, -4.548e-02, 9.690e-03, 7.554e-03, -2.610e-02, 8.072e-02, -1.039e-01, -9.924e-03)); + r += mul(s1_0, M4(-5.144e-02, -1.175e-01, 1.031e-01, -3.103e-01, -6.668e-02, 1.290e-01, -9.938e-02, 1.466e-01, -5.311e-02, 2.059e-02, -1.172e-01, -3.354e-02, 4.052e-02, 1.589e-02, 1.572e-02, 5.945e-02)); + r += mul(s1_1, M4(-1.275e-01, -2.448e-02, -1.102e-03, -3.236e-01, -7.084e-02, 1.954e-01, 2.087e-02, -9.257e-03, 1.603e-02, -1.903e-03, 5.313e-02, -1.290e-02, 1.675e-01, 8.260e-02, 4.374e-02, 4.229e-02)); + r += mul(s1_2, M4(-1.615e-01, -7.282e-02, -4.499e-02, -1.533e-01, 6.813e-02, 3.466e-03, 6.690e-03, 7.790e-02, -5.786e-02, 5.432e-02, 2.534e-02, -3.596e-03, 5.707e-03, -2.349e-02, -6.428e-02, -4.131e-03)); + r += mul(s1_3, M4(1.267e-01, -6.708e-03, -9.339e-02, -4.356e-02, -5.323e-02, 6.610e-02, -7.934e-02, 1.313e-01, 2.120e-02, -1.163e-02, 6.597e-02, -4.234e-02, 7.453e-02, -7.786e-02, -1.250e-02, 1.125e-01)); + r += mul(s1_4, M4(3.665e-01, 1.590e-01, -1.122e-01, 1.540e-01, 3.349e-02, -1.161e-01, 5.773e-02, 2.363e-02, 2.738e-02, 1.204e-01, -3.128e-01, 1.291e-02, -9.935e-02, -1.122e-01, -2.228e-02, 3.867e-02)); + r += mul(s1_5, M4(1.587e-01, 1.904e-01, -3.435e-01, 1.060e-01, -1.010e-01, 5.352e-02, 3.700e-02, -5.632e-02, 8.173e-02, 9.857e-02, -1.753e-01, 4.469e-03, -4.160e-02, -7.350e-02, -8.057e-02, -4.027e-02)); + r += mul(s1_6, M4(9.872e-02, 1.109e-01, 8.054e-02, -1.404e-02, -1.189e-02, -2.346e-02, 8.571e-03, -6.556e-02, 7.537e-02, 9.442e-02, 8.507e-03, 6.746e-02, -8.489e-02, -9.849e-02, -2.410e-02, 1.358e-02)); + r += mul(s1_7, M4(2.501e-02, 3.615e-02, 5.910e-02, 1.600e-02, -5.374e-02, 7.789e-02, -2.909e-02, -5.501e-03, 1.722e-01, 1.175e-01, -1.703e-01, 5.649e-02, -1.099e-01, -6.323e-02, -1.531e-02, -1.376e-02)); + r += mul(s1_8, M4(-2.230e-02, -1.235e-01, 1.060e-01, 4.907e-02, 1.111e-01, 2.390e-02, 1.095e-01, 1.002e-01, -6.648e-03, 1.470e-02, 6.574e-02, -4.105e-02, -4.060e-02, 1.489e-01, -3.608e-02, -1.929e-02)); + r += mul(s2_0, M4(-3.375e-04, 3.077e-03, 9.368e-02, 4.429e-03, 7.119e-02, 1.765e-02, 1.129e-01, -1.313e-01, -1.649e-02, -5.255e-02, 6.884e-03, 2.996e-04, -6.845e-02, -2.114e-02, 6.361e-02, -8.461e-02)); + r += mul(s2_1, M4(-5.765e-02, -1.345e-02, -5.231e-02, -1.190e-02, 2.960e-02, 7.591e-02, 5.526e-02, -1.188e-01, -9.837e-02, -1.191e-02, -2.463e-02, -1.121e-01, -4.223e-02, 5.940e-02, -6.918e-02, 1.428e-01)); + r += mul(s2_2, M4(1.044e-02, -7.264e-03, -5.687e-02, -2.949e-02, -7.140e-03, -1.959e-01, 5.113e-02, -3.021e-02, 2.139e-02, 6.523e-02, 1.018e-01, 2.631e-02, -8.545e-03, 8.892e-02, 7.265e-02, -1.880e-01)); + r += mul(s2_3, M4(1.447e-02, -8.470e-02, 7.456e-02, 2.647e-02, 8.139e-02, -6.002e-02, 2.721e-03, -9.280e-02, -1.130e-01, 7.528e-02, -1.555e-02, -6.099e-03, 9.102e-02, 3.104e-02, 1.528e-02, 3.439e-02)); + r += mul(s2_4, M4(-1.070e-01, -2.137e-04, 1.489e-01, -1.613e-01, -1.706e-01, -4.034e-03, -3.536e-02, 1.709e-02, -1.556e-01, 7.692e-02, -1.919e-01, -2.885e-01, 1.885e-01, 4.700e-02, 9.758e-02, 1.347e-02)); + r += mul(s2_5, M4(-1.500e-02, -4.942e-02, 3.868e-02, -5.220e-03, 2.736e-02, 1.739e-01, 1.519e-01, -2.263e-02, -4.559e-02, 1.208e-01, -1.024e-01, -5.350e-03, -1.364e-02, -1.881e-01, -5.185e-02, 2.927e-02)); + r += mul(s2_6, M4(3.519e-02, 5.847e-02, 1.829e-02, 5.118e-02, 2.837e-02, 5.415e-02, 1.346e-02, 6.121e-04, -3.360e-02, -1.113e-01, -2.197e-03, -3.458e-02, -1.608e-02, 3.809e-02, 9.094e-03, 3.780e-02)); + r += mul(s2_7, M4(4.761e-03, -7.532e-02, -5.880e-02, 1.926e-02, -5.061e-03, -1.367e-02, 5.288e-03, -2.457e-02, 4.434e-02, 5.182e-02, 9.892e-02, -8.780e-02, -4.478e-02, -6.584e-02, -1.004e-01, 3.399e-02)); + r += mul(s2_8, M4(-2.743e-02, 4.934e-02, 7.882e-02, -4.161e-02, 1.574e-01, 1.455e-02, -5.685e-04, 1.564e-02, -4.879e-02, -1.029e-01, -3.130e-02, -8.087e-02, 1.363e-01, -2.098e-02, 5.942e-02, -8.210e-03)); + r += mul(s3_0, M4(-1.374e-02, 7.180e-02, 6.022e-02, 8.669e-02, 6.722e-03, -5.428e-02, 4.097e-02, -5.723e-02, 1.829e-02, 6.248e-03, 8.819e-03, -1.820e-02, 2.561e-02, -8.226e-02, 3.573e-04, 9.531e-03)); + r += mul(s3_1, M4(-6.223e-03, -1.018e-01, -7.917e-02, 2.553e-02, -7.636e-02, 8.827e-02, -7.935e-02, 1.071e-02, -1.663e-02, -6.168e-02, 3.786e-02, -1.260e-01, -5.847e-02, -4.430e-02, 1.259e-02, 1.584e-02)); + r += mul(s3_2, M4(-3.766e-02, -1.428e-02, 5.059e-02, 4.313e-02, -4.671e-03, -8.941e-02, 3.086e-02, 1.166e-02, 1.931e-02, -1.941e-03, 5.253e-02, 8.497e-03, -4.258e-03, -1.427e-03, -2.630e-02, -2.615e-02)); + r += mul(s3_3, M4(1.434e-02, -7.968e-03, 3.015e-02, 1.365e-01, 3.686e-02, 2.893e-02, -1.644e-02, -6.397e-02, -1.086e-01, -3.093e-02, 3.116e-02, -2.145e-03, -2.071e-03, 7.337e-02, -8.286e-02, -3.803e-02)); + r += mul(s3_4, M4(-1.682e-01, 1.651e-01, 3.779e-01, -2.107e-01, -2.688e-01, 3.074e-02, -4.198e-02, -3.471e-02, -1.145e-01, 6.096e-02, -7.697e-02, -1.275e-01, 2.926e-02, -8.640e-02, -4.570e-02, 9.212e-03)); + r += mul(s3_5, M4(-1.003e-01, -4.532e-02, 1.185e-01, -5.018e-02, 3.630e-02, 1.052e-01, 2.156e-02, 7.973e-02, -1.824e-01, 7.134e-02, -2.839e-02, -2.972e-02, 4.604e-02, 1.248e-01, 2.946e-02, -1.004e-01)); + r += mul(s3_6, M4(-1.382e-01, -6.426e-03, 1.134e-02, -1.395e-02, 8.457e-03, 9.588e-02, 3.972e-02, 1.257e-02, -2.861e-02, -6.084e-02, 7.592e-03, -2.295e-03, 5.240e-02, 2.015e-02, 2.799e-03, 5.091e-02)); + r += mul(s3_7, M4(-9.311e-02, -6.947e-02, -1.227e-02, 3.166e-02, 1.794e-02, -5.997e-02, -3.435e-02, -8.391e-03, -2.391e-02, -3.329e-02, 7.543e-02, 3.182e-02, 1.551e-01, 9.505e-02, -5.603e-02, 5.696e-02)); + r += mul(s3_8, M4(-1.488e-02, 3.870e-02, 8.138e-02, 3.946e-02, -9.627e-03, -3.451e-02, -1.232e-02, -6.382e-03, -1.180e-02, -1.047e-01, -3.782e-02, -3.286e-02, -9.891e-02, -7.887e-02, 2.781e-03, 4.923e-03)); + r += mul(s4_0, M4(-1.543e-02, 3.364e-02, 5.388e-02, -2.789e-02, 6.275e-02, 6.273e-02, -4.797e-02, -1.786e-02, 7.741e-03, 4.798e-02, -6.949e-03, 4.940e-02, 6.214e-02, -2.850e-02, -8.802e-02, 6.091e-02)); + r += mul(s4_1, M4(-4.386e-02, -6.506e-02, -1.533e-01, -1.125e-01, -1.905e-01, 9.027e-02, -2.007e-02, 4.063e-02, -2.500e-02, -5.204e-02, -2.493e-02, 8.728e-02, 2.507e-02, -1.505e-02, 7.927e-02, 7.878e-02)); + r += mul(s4_2, M4(-4.679e-02, -6.334e-02, 2.318e-02, -3.277e-02, 2.703e-02, 9.973e-03, 4.462e-02, 5.833e-02, -3.777e-03, -6.446e-02, 3.411e-02, 4.109e-02, -4.104e-02, 3.981e-02, -2.533e-02, -1.180e-02)); + r += mul(s4_3, M4(1.431e-02, 1.575e-01, -1.386e-02, -5.258e-02, 1.460e-02, 9.747e-02, -3.904e-02, -4.721e-02, 8.228e-03, -5.732e-02, 5.413e-03, 2.851e-02, -8.223e-02, 9.280e-02, 2.836e-02, 8.095e-02)); + r += mul(s4_4, M4(2.747e-01, -2.144e-01, -8.618e-02, -1.085e-01, 2.071e-01, 2.453e-01, 1.405e-01, 1.423e-01, -1.439e-01, -1.044e-01, -7.646e-02, -2.316e-04, -3.095e-02, -5.742e-02, 6.407e-02, -1.216e-02)); + r += mul(s4_5, M4(1.107e-01, -1.380e-02, 3.637e-02, 2.192e-02, 1.241e-01, 1.456e-01, -9.515e-02, 6.993e-02, -2.811e-02, -2.833e-02, 1.723e-01, 6.716e-02, -2.092e-02, -9.826e-03, -3.243e-02, -5.823e-02)); + r += mul(s4_6, M4(-1.301e-02, -2.547e-03, 3.076e-02, -3.167e-02, 4.486e-02, 1.551e-01, -8.204e-02, 8.202e-03, 7.205e-02, 1.082e-01, -4.516e-02, 5.511e-02, -4.204e-02, -5.157e-02, -9.286e-02, 2.381e-02)); + r += mul(s4_7, M4(4.478e-02, 9.730e-02, -6.711e-02, -1.573e-02, 1.599e-01, 7.125e-04, 8.364e-02, -3.236e-02, 1.162e-01, 1.376e-01, -1.624e-03, 1.320e-02, -2.593e-02, -2.171e-02, -3.284e-02, 1.282e-02)); + r += mul(s4_8, M4(1.971e-02, 6.643e-02, 2.099e-02, -4.846e-02, 6.507e-02, 4.767e-02, -9.634e-02, -7.191e-02, -4.709e-02, 2.865e-02, 5.879e-02, 6.538e-03, 6.665e-02, 3.576e-02, -6.457e-02, -9.140e-03)); + r += mul(s5_0, M4(4.701e-02, 6.554e-02, 2.684e-02, -6.159e-02, -1.654e-02, 3.635e-02, -1.624e-02, -6.742e-02, -3.377e-03, 4.140e-02, -5.588e-02, -3.041e-02, -4.195e-02, 4.853e-02, -5.634e-02, -1.474e-02)); + r += mul(s5_1, M4(-1.170e-01, 1.339e-02, -1.540e-01, -2.260e-01, 1.586e-04, 1.326e-02, 6.470e-02, 8.587e-02, -1.361e-03, -2.062e-02, -4.187e-02, 2.213e-01, -1.077e-01, -9.440e-03, 9.954e-02, 7.201e-02)); + r += mul(s5_2, M4(5.207e-02, -2.738e-02, -5.539e-02, -7.207e-02, 2.145e-02, -1.095e-01, 4.503e-03, 3.237e-02, 1.250e-01, 6.253e-02, 3.567e-02, 8.087e-02, -2.536e-02, 1.982e-02, -5.218e-02, 2.405e-02)); + r += mul(s5_3, M4(2.712e-02, 3.756e-02, 1.426e-02, -5.842e-02, -2.080e-02, 2.785e-02, -4.829e-02, -5.688e-02, 4.663e-02, -4.626e-02, 1.441e-02, 4.917e-03, 1.114e-02, 4.579e-02, 5.647e-02, 5.263e-02)); + r += mul(s5_4, M4(6.849e-02, 6.650e-02, 1.116e-01, -1.122e-02, 3.089e-02, 1.219e-01, 3.562e-02, 7.543e-02, 1.527e-02, 2.334e-01, 1.237e-01, -8.526e-02, -2.102e-02, 1.828e-02, -7.334e-02, -8.291e-02)); + r += mul(s5_5, M4(8.283e-02, 2.493e-03, -4.096e-02, -2.882e-02, -4.520e-02, 1.016e-01, -8.556e-03, -2.354e-02, -1.379e-01, -8.510e-02, 3.279e-03, 9.362e-05, 2.788e-02, 2.075e-02, 1.810e-02, 6.659e-02)); + r += mul(s5_6, M4(-2.978e-02, -3.484e-02, 4.946e-03, -6.533e-02, 5.183e-02, 4.185e-02, -8.027e-03, 2.699e-02, 4.217e-03, 8.874e-02, -1.477e-02, -7.149e-03, -3.662e-02, -1.436e-02, -3.279e-02, -1.546e-02)); + r += mul(s5_7, M4(6.874e-02, 1.554e-02, -8.766e-02, 6.074e-03, -4.165e-02, -4.333e-02, 1.862e-02, 1.334e-02, 5.034e-02, -4.829e-02, 6.277e-02, -3.653e-02, -3.265e-02, 5.262e-02, 9.616e-02, -6.061e-02)); + r += mul(s5_8, M4(3.213e-02, 4.388e-02, -8.565e-03, -3.604e-02, 7.287e-02, -4.806e-02, -4.418e-02, 3.579e-02, 3.314e-02, 2.701e-02, 1.081e-01, 6.094e-02, -6.816e-02, 9.806e-02, 1.029e-01, -7.738e-02)); + r += mul(s6_0, M4(-8.138e-02, 1.322e-02, -3.581e-02, -3.120e-02, -3.381e-02, -1.631e-02, 1.721e-02, -5.026e-02, 2.633e-02, -6.625e-02, -3.507e-02, 6.763e-02, -2.698e-02, -7.282e-02, -1.106e-02, -3.171e-02)); + r += mul(s6_1, M4(-1.629e-01, -2.432e-02, -1.139e-01, -1.502e-01, -1.430e-01, 3.546e-02, -6.622e-02, -6.755e-02, -1.659e-02, -1.034e-01, -3.824e-02, 2.842e-02, -3.059e-02, -1.435e-02, -1.019e-03, 1.160e-01)); + r += mul(s6_2, M4(5.043e-02, 8.559e-02, -6.544e-02, 2.216e-02, 8.079e-02, 7.001e-02, -2.152e-02, -2.082e-02, -1.592e-02, -5.787e-02, 7.907e-02, 8.431e-02, -2.620e-02, -1.735e-02, 7.290e-03, 7.536e-02)); + r += mul(s6_3, M4(1.025e-01, 1.100e-01, 2.124e-02, 8.235e-02, 1.642e-02, 1.116e-02, -7.398e-02, 1.650e-01, 1.213e-01, 8.291e-02, -1.093e-01, 1.562e-01, 5.294e-02, 1.630e-02, 1.326e-02, 1.377e-02)); + r += mul(s6_4, M4(-1.477e-01, 2.659e-01, 2.785e-01, -1.702e-02, 5.381e-02, 6.180e-02, 2.255e-01, -9.250e-02, 8.226e-03, -3.286e-02, -1.113e-01, 2.175e-01, 9.517e-02, -5.973e-02, 1.623e-01, 3.531e-03)); + r += mul(s6_5, M4(-1.445e-02, 8.014e-02, 7.004e-02, -3.550e-03, -6.454e-03, -9.602e-02, -1.880e-02, -1.644e-02, -3.149e-02, 1.437e-02, -2.019e-01, 9.239e-02, 4.385e-02, -1.358e-03, 6.189e-02, 1.075e-01)); + r += mul(s6_6, M4(9.062e-02, 1.489e-01, -5.916e-03, -1.207e-02, -1.120e-01, -1.501e-01, -2.361e-02, -1.059e-01, -1.591e-02, -6.948e-02, -4.074e-02, -9.472e-02, 4.182e-02, 8.275e-02, -4.424e-02, -7.178e-03)); + r += mul(s6_7, M4(7.850e-02, -7.276e-02, 1.766e-02, -1.310e-03, -1.840e-02, -5.427e-02, 1.062e-01, -9.790e-02, -1.919e-01, -3.143e-02, -9.817e-02, -2.075e-01, -4.986e-02, -1.970e-01, 5.752e-02, 4.696e-02)); + r += mul(s6_8, M4(5.489e-02, 5.411e-02, -1.389e-02, 1.155e-02, 3.127e-02, -5.246e-02, 4.381e-02, 1.885e-02, 1.272e-01, -7.766e-03, 8.421e-02, -9.790e-02, -4.710e-02, -4.770e-02, 1.131e-02, -1.135e-02)); + r += mul(s7_0, M4(-4.978e-03, -9.304e-03, 5.564e-02, -1.056e-01, 9.026e-02, 1.009e-01, 1.372e-02, 4.889e-02, 1.704e-02, 1.378e-02, 1.201e-02, -2.870e-02, -2.174e-02, 2.770e-02, 1.059e-02, -9.119e-02)); + r += mul(s7_1, M4(4.211e-02, -8.833e-02, 3.120e-02, -7.391e-02, 2.726e-03, 3.702e-02, -1.289e-02, -5.863e-02, -1.261e-02, 1.227e-03, -5.446e-02, -1.759e-03, -3.410e-02, 5.307e-02, 7.014e-03, 7.216e-02)); + r += mul(s7_2, M4(-3.874e-02, -5.673e-02, 6.557e-02, -1.465e-02, 9.029e-03, 6.085e-03, 2.823e-02, 2.922e-02, 1.630e-02, 1.792e-02, 2.728e-02, 1.165e-02, 4.924e-02, 2.339e-02, -3.065e-03, 2.313e-02)); + r += mul(s7_3, M4(1.333e-01, 7.813e-02, 3.825e-03, 8.089e-02, -9.015e-02, -5.962e-02, 4.826e-02, 3.799e-03, 2.899e-02, -4.016e-02, -2.577e-02, 6.960e-02, 6.438e-02, -3.648e-02, -3.539e-02, -5.573e-03)); + r += mul(s7_4, M4(6.912e-02, 4.926e-02, 3.699e-02, 4.860e-02, -5.898e-02, -5.131e-02, 3.639e-02, 1.246e-02, 9.589e-02, 7.513e-02, -8.154e-02, 1.216e-01, 4.442e-02, -2.519e-02, 6.937e-02, 4.519e-02)); + r += mul(s7_5, M4(3.508e-02, 5.932e-02, 2.428e-02, 4.604e-02, -3.467e-02, -7.627e-02, -9.483e-02, 2.525e-02, 1.713e-02, -7.534e-02, 7.597e-03, 1.922e-02, -1.988e-02, -2.680e-02, 8.487e-02, 2.842e-02)); + r += mul(s7_6, M4(-4.320e-02, -3.851e-02, 2.136e-02, 3.206e-02, 2.904e-02, 5.274e-02, -1.922e-02, 6.804e-03, 1.996e-02, 4.963e-02, -7.505e-02, -1.301e-02, 3.653e-02, 6.219e-04, -3.436e-02, -5.197e-02)); + r += mul(s7_7, M4(-2.822e-02, 8.996e-03, 2.703e-02, -2.764e-02, -6.224e-02, 1.296e-01, -8.946e-02, 4.212e-04, 2.426e-02, -2.303e-02, -1.459e-02, -9.867e-03, -6.928e-03, 1.073e-02, 3.072e-02, 4.473e-02)); + r += mul(s7_8, M4(1.142e-02, 4.861e-03, -2.562e-02, 1.876e-02, 8.979e-02, 7.380e-02, 1.226e-02, 4.447e-02, -3.478e-02, 4.412e-02, -2.905e-03, -1.803e-02, -8.343e-02, -6.483e-02, 3.517e-02, -5.902e-02)); + r += V4(5.284e-03, -1.982e-02, -9.108e-03, -1.052e-02); + return r; +} + +void Pass15(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 16 +//!DESC conv15 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.682e-02, 2.425e-02, 6.033e-03, -1.354e-02, 4.902e-02, -2.423e-02, -2.191e-02, -1.244e-02, 2.555e-02, 1.398e-02, 1.653e-03, 1.456e-02, 1.836e-02, 4.914e-02, -1.220e-02, 2.402e-02)); + r += mul(s0_1, M4(-1.415e-01, -5.750e-02, 1.052e-01, 2.448e-02, -6.458e-02, 8.314e-03, 7.921e-03, -2.826e-02, 8.116e-02, 2.587e-02, -1.653e-01, 1.925e-02, -3.094e-02, 1.172e-02, 2.412e-02, -1.644e-02)); + r += mul(s0_2, M4(2.726e-02, -2.295e-02, 3.601e-02, -3.420e-02, 2.458e-02, -3.328e-02, 2.415e-02, -3.922e-03, 1.014e-01, 3.177e-02, 8.030e-02, -3.316e-04, -1.294e-01, -3.477e-03, -2.168e-03, -4.343e-02)); + r += mul(s0_3, M4(5.999e-02, -1.569e-03, 4.338e-03, -3.949e-03, 5.016e-02, 1.272e-02, -5.577e-02, 9.612e-03, -4.428e-02, -1.158e-01, -1.747e-03, -3.783e-02, 2.071e-02, -6.513e-02, -9.574e-02, 1.217e-02)); + r += mul(s0_4, M4(-6.832e-02, 3.306e-02, 3.602e-02, -1.176e-01, -1.634e-02, 8.846e-03, -4.939e-02, 3.965e-02, -1.110e-01, 4.198e-02, -1.919e-01, 3.252e-02, -3.235e-01, -4.438e-02, 3.970e-02, -9.790e-02)); + r += mul(s0_5, M4(7.532e-04, -3.379e-02, 1.206e-01, -1.262e-02, 5.126e-03, 1.393e-02, 9.355e-02, -1.952e-02, 1.288e-02, -5.581e-02, 4.603e-02, 6.297e-03, 7.578e-02, 6.956e-02, 1.251e-02, 8.053e-02)); + r += mul(s0_6, M4(-8.240e-03, -4.579e-02, 8.483e-03, 5.418e-02, 6.400e-02, -3.510e-02, -4.983e-02, 3.210e-02, -7.979e-03, 3.418e-02, 1.844e-02, -1.444e-02, 3.045e-02, -5.388e-02, -3.282e-02, -1.676e-02)); + r += mul(s0_7, M4(-5.499e-02, 4.137e-03, 6.030e-02, 7.094e-02, -6.567e-02, -7.267e-02, -1.662e-02, -7.336e-02, -6.070e-03, -8.680e-02, -3.998e-03, 1.843e-02, -7.485e-02, 3.307e-02, 4.805e-02, 3.993e-02)); + r += mul(s0_8, M4(1.536e-02, 2.666e-02, 3.056e-02, -1.114e-02, 2.242e-02, 6.991e-03, -2.173e-03, 3.283e-02, -5.911e-02, 2.191e-02, 3.963e-02, -2.860e-02, 5.899e-02, -3.175e-02, 5.169e-02, -2.245e-02)); + r += mul(s1_0, M4(6.282e-02, 4.438e-02, -3.383e-02, -3.169e-03, -1.302e-02, 6.962e-02, -1.602e-02, 1.114e-02, -1.264e-02, -3.295e-02, 2.876e-02, 2.982e-02, -3.565e-02, 6.198e-02, -4.253e-02, 1.322e-02)); + r += mul(s1_1, M4(-2.256e-01, 4.261e-02, 7.907e-02, -4.247e-02, -1.645e-02, -4.936e-02, 4.114e-02, -4.063e-02, -2.386e-02, 4.551e-02, -1.485e-01, -1.643e-02, -2.257e-01, -4.944e-02, -2.584e-03, -9.462e-03)); + r += mul(s1_2, M4(4.578e-02, 2.807e-02, -8.221e-03, -6.325e-02, -8.816e-02, -6.825e-03, -2.188e-02, -5.079e-02, 1.666e-02, 2.919e-03, 3.045e-02, -3.505e-02, 2.878e-02, 1.035e-03, 2.864e-02, 1.354e-02)); + r += mul(s1_3, M4(2.020e-01, -8.129e-02, 4.874e-03, -4.772e-02, 1.859e-02, 2.295e-01, -8.389e-02, 1.096e-01, 1.270e-02, 9.785e-02, 1.383e-01, -8.565e-02, 3.833e-02, 5.794e-02, 1.007e-03, -1.219e-01)); + r += mul(s1_4, M4(3.020e-01, 1.345e-01, 1.214e-01, -1.476e-01, 2.721e-02, 5.156e-02, 1.066e-01, -7.236e-02, -6.882e-03, -1.193e-01, -2.321e-01, 1.603e-01, -2.560e-01, -9.591e-02, -7.915e-02, -7.280e-02)); + r += mul(s1_5, M4(5.588e-02, -3.970e-02, 5.567e-02, 9.945e-02, 7.813e-02, 2.689e-03, -4.028e-02, 5.141e-02, -1.109e-01, -1.519e-01, -2.412e-02, 5.308e-02, 4.241e-02, 1.421e-01, 8.538e-02, -3.083e-02)); + r += mul(s1_6, M4(4.734e-02, 1.498e-03, -1.688e-02, 8.204e-02, 6.827e-02, -9.353e-02, -6.352e-02, -7.212e-02, -7.166e-03, -2.990e-02, -6.335e-02, 5.694e-02, 3.916e-02, -5.895e-02, -5.882e-02, 2.516e-02)); + r += mul(s1_7, M4(4.143e-02, -8.794e-02, 2.631e-02, 2.151e-01, 5.745e-02, -2.816e-02, 2.094e-02, 4.433e-02, 1.357e-01, -8.145e-02, -5.290e-02, 1.230e-01, -1.741e-02, -3.334e-02, 7.489e-02, -1.465e-02)); + r += mul(s1_8, M4(4.237e-02, 4.545e-02, -1.561e-02, 1.046e-01, 6.236e-02, -3.658e-02, 3.117e-03, -5.313e-02, -5.248e-02, -3.058e-02, 5.975e-02, -4.943e-02, -4.859e-03, 2.759e-02, -6.380e-02, 5.511e-03)); + r += mul(s2_0, M4(-1.698e-02, 2.008e-02, 4.178e-02, 5.254e-03, -1.866e-02, 6.468e-02, 3.264e-02, 4.248e-02, -1.613e-02, 1.055e-02, 4.652e-03, -2.110e-02, -1.071e-02, -2.709e-03, -9.281e-03, 1.559e-02)); + r += mul(s2_1, M4(-1.443e-01, 8.375e-02, 2.033e-02, -3.873e-03, 1.029e-01, 4.871e-02, 5.010e-02, -1.688e-02, -7.934e-02, -9.075e-02, 3.202e-03, 6.989e-03, -1.968e-02, 4.563e-02, -3.979e-02, 5.553e-02)); + r += mul(s2_2, M4(2.630e-02, 5.112e-02, 3.405e-02, -1.654e-01, 4.047e-02, 4.512e-02, 2.716e-02, -1.259e-02, 2.451e-02, -2.394e-02, 4.361e-02, -2.896e-02, 2.850e-02, 1.479e-02, -5.123e-02, 4.183e-02)); + r += mul(s2_3, M4(7.516e-02, -8.210e-03, -4.111e-03, 4.448e-02, -5.627e-02, 2.278e-03, 4.189e-02, -4.233e-02, -2.394e-02, -2.066e-02, 1.324e-03, -3.099e-02, 3.708e-03, 3.445e-02, -1.211e-02, 8.406e-03)); + r += mul(s2_4, M4(4.450e-02, 3.494e-02, 5.925e-03, 2.108e-02, -2.367e-01, -4.596e-03, -5.752e-02, -2.565e-01, 3.773e-01, -1.032e-02, -8.405e-02, -8.625e-02, -6.044e-02, -1.957e-02, 6.003e-02, -1.532e-01)); + r += mul(s2_5, M4(-1.677e-02, -8.374e-02, 9.221e-04, -7.647e-02, -1.666e-02, -1.848e-02, -7.573e-03, -1.446e-02, -4.613e-02, -2.494e-02, 4.640e-02, 2.289e-02, -5.411e-02, 8.961e-03, 1.322e-01, -1.519e-01)); + r += mul(s2_6, M4(-4.333e-02, 4.873e-03, -2.330e-02, -1.203e-02, -1.656e-02, -5.966e-02, -2.807e-03, 2.686e-02, -4.143e-02, -4.584e-03, -2.170e-03, 8.825e-03, 1.156e-02, -5.674e-03, -1.562e-02, -5.008e-03)); + r += mul(s2_7, M4(4.915e-02, 2.856e-02, -4.347e-02, 6.325e-02, -1.408e-02, 7.563e-02, 1.125e-01, -1.700e-01, 6.698e-03, -9.449e-02, -1.623e-03, 7.157e-02, -3.837e-02, 1.442e-02, 6.002e-02, 3.344e-02)); + r += mul(s2_8, M4(1.424e-03, -1.889e-02, -2.530e-02, -1.584e-02, 3.956e-02, 2.339e-02, 1.996e-02, 9.705e-03, -5.004e-02, -4.270e-02, 1.243e-02, 2.805e-02, -2.629e-02, 1.128e-02, -2.821e-02, 4.977e-02)); + r += mul(s3_0, M4(2.344e-03, -1.510e-02, 2.258e-02, -2.662e-02, 4.342e-03, -2.228e-02, -1.850e-02, 1.700e-02, -4.349e-02, 1.978e-02, -7.904e-03, 6.129e-02, -8.322e-03, -3.489e-02, 3.685e-02, 1.218e-02)); + r += mul(s3_1, M4(1.137e-01, -2.344e-01, -2.425e-01, -2.087e-01, -7.206e-02, 2.724e-03, 2.830e-02, -3.068e-02, -8.735e-02, -2.185e-03, 3.007e-02, 5.390e-02, 1.068e-01, -1.539e-02, -8.570e-02, 4.549e-02)); + r += mul(s3_2, M4(3.499e-02, 1.594e-02, -9.933e-02, -7.474e-02, 1.051e-02, -2.210e-02, 3.491e-02, -8.810e-03, -4.210e-02, 1.901e-02, 2.575e-02, -1.876e-02, 9.872e-03, 2.527e-02, 1.904e-02, 3.485e-02)); + r += mul(s3_3, M4(1.892e-02, 5.257e-03, 6.593e-02, -1.068e-01, -1.009e-01, -2.306e-02, -1.774e-02, -1.073e-01, -6.668e-02, -3.584e-02, -7.708e-03, -3.551e-02, -3.283e-03, 3.566e-03, 1.590e-03, -2.380e-02)); + r += mul(s3_4, M4(6.213e-04, -3.430e-02, -1.038e-01, 1.743e-01, -1.318e-01, -3.481e-02, 4.391e-02, -5.380e-02, 1.526e-01, -7.420e-03, -4.670e-02, -1.651e-02, -1.487e-01, -8.589e-02, -1.704e-01, 1.922e-02)); + r += mul(s3_5, M4(1.096e-01, -4.456e-02, 1.130e-01, -2.652e-01, -4.299e-02, -1.735e-02, 1.206e-01, -5.507e-02, -2.571e-02, 7.543e-03, -5.169e-02, 1.070e-01, -6.630e-02, -2.834e-02, 1.781e-01, 4.697e-02)); + r += mul(s3_6, M4(-1.036e-02, -2.780e-02, 7.376e-03, -2.156e-02, -2.542e-02, -4.496e-02, 1.697e-02, 2.637e-02, -2.082e-02, 2.391e-02, -2.514e-03, 7.337e-02, 7.580e-02, -4.554e-02, -2.737e-02, 3.262e-02)); + r += mul(s3_7, M4(-7.323e-03, 4.827e-02, -2.236e-02, 3.041e-02, 1.176e-02, -5.546e-02, 7.340e-02, -6.719e-03, -2.321e-02, 4.693e-02, -3.563e-02, 1.035e-02, -1.612e-02, -9.980e-02, -9.701e-02, -6.659e-02)); + r += mul(s3_8, M4(-1.683e-02, 1.837e-02, -1.291e-02, -6.702e-02, -8.351e-02, 1.838e-02, 4.611e-03, 1.597e-02, -1.131e-02, -5.924e-03, -2.187e-02, 7.444e-02, 1.927e-02, 1.152e-01, 1.523e-01, 2.847e-02)); + r += mul(s4_0, M4(-1.699e-02, -1.167e-02, -1.304e-03, 2.278e-02, -6.011e-02, -8.199e-03, -2.469e-02, -2.724e-02, -7.896e-02, 3.514e-02, -4.913e-02, 4.323e-02, -4.083e-02, -2.205e-02, 4.041e-03, -3.569e-02)); + r += mul(s4_1, M4(-1.868e-02, -1.643e-03, 4.291e-02, 2.588e-02, 4.562e-02, 2.170e-02, 4.389e-02, 2.023e-02, -3.285e-02, 1.572e-01, -5.427e-02, -3.453e-02, 1.614e-01, -3.315e-02, 9.291e-02, 2.523e-02)); + r += mul(s4_2, M4(-5.716e-02, 2.952e-02, 3.345e-03, 1.417e-02, 7.218e-02, 3.521e-02, 5.576e-02, -1.665e-02, 5.210e-02, -1.169e-03, -1.382e-02, 2.093e-02, 1.369e-01, 2.903e-02, 6.485e-03, 2.985e-02)); + r += mul(s4_3, M4(-5.154e-03, -7.552e-02, -4.847e-02, -1.900e-02, -4.854e-02, -1.988e-02, 2.748e-02, 1.465e-02, -4.519e-02, -1.215e-01, -8.303e-02, -5.047e-02, 5.773e-03, 8.883e-02, -3.625e-02, 3.724e-02)); + r += mul(s4_4, M4(1.215e-01, -6.698e-02, 1.472e-01, 4.760e-02, 4.249e-02, -9.318e-02, 2.074e-01, -7.291e-02, -2.521e-02, -1.567e-01, 5.401e-02, 1.068e-01, -1.088e-01, -4.689e-02, 1.450e-01, -2.702e-02)); + r += mul(s4_5, M4(-7.204e-04, 4.627e-02, 8.406e-02, -4.793e-02, -1.044e-01, -1.752e-02, -1.223e-01, -2.838e-02, -1.513e-02, 2.866e-02, -6.117e-02, 6.588e-02, -4.003e-02, 2.135e-02, -3.993e-02, 3.138e-02)); + r += mul(s4_6, M4(-1.858e-02, 1.069e-01, 1.917e-02, -5.787e-03, 2.869e-03, -1.855e-02, 2.874e-02, -6.176e-03, 5.764e-02, -4.411e-02, 2.198e-02, -9.514e-02, -6.286e-02, 4.674e-02, -6.493e-02, 1.780e-02)); + r += mul(s4_7, M4(1.470e-01, -3.257e-02, -4.421e-02, 5.879e-02, -2.092e-02, 7.217e-02, 6.232e-02, 8.477e-03, 1.634e-02, -4.973e-03, -1.537e-02, -8.101e-02, -1.271e-01, 1.621e-01, 7.392e-02, 2.400e-02)); + r += mul(s4_8, M4(-3.555e-02, -3.285e-02, 7.891e-02, -3.678e-02, -6.757e-02, 4.733e-02, -1.116e-02, -2.527e-02, -2.691e-02, 3.767e-02, 2.019e-02, -2.048e-02, -5.428e-02, 3.365e-02, -4.707e-02, -7.004e-02)); + r += mul(s5_0, M4(-2.108e-02, 1.779e-02, -2.833e-03, 3.292e-03, 5.820e-02, 7.601e-02, 6.101e-02, -1.330e-03, -1.138e-01, 7.445e-02, -7.096e-02, 6.305e-02, -2.494e-02, 2.407e-02, 8.140e-03, -1.834e-02)); + r += mul(s5_1, M4(-2.880e-02, 5.085e-03, 2.434e-02, 2.508e-02, 1.602e-01, 1.564e-02, 5.507e-02, -7.678e-02, 9.892e-02, 3.094e-02, -2.829e-02, -7.596e-02, -3.132e-02, -5.615e-02, 1.057e-02, -3.444e-02)); + r += mul(s5_2, M4(-1.577e-01, 8.848e-03, 2.636e-02, 5.279e-02, 1.705e-02, 1.786e-02, 2.434e-02, -7.094e-02, -7.860e-03, -5.334e-03, 1.383e-02, -5.143e-03, -2.998e-02, 2.062e-02, -4.367e-02, 5.676e-04)); + r += mul(s5_3, M4(-1.425e-02, -4.238e-02, -6.793e-02, 4.202e-02, -7.728e-02, -9.246e-02, -7.324e-02, 1.616e-01, -1.074e-01, -2.153e-01, 5.838e-02, -1.196e-01, 1.985e-02, -5.314e-03, 5.793e-02, 3.626e-02)); + r += mul(s5_4, M4(-1.126e-01, 3.180e-02, 2.878e-01, -1.492e-02, 1.474e-01, -1.059e-01, 4.287e-01, -1.011e-01, -1.807e-01, -2.479e-01, 1.394e-01, 1.323e-02, -1.739e-02, 1.892e-01, -1.471e-02, 7.751e-02)); + r += mul(s5_5, M4(-2.764e-01, -2.786e-02, -2.956e-02, 3.056e-02, 1.450e-01, 8.922e-02, -3.140e-02, 9.458e-03, -8.860e-02, -2.775e-03, 1.361e-02, -3.029e-02, 6.369e-02, -1.583e-02, 2.707e-02, 3.723e-02)); + r += mul(s5_6, M4(-2.289e-02, 4.078e-02, 3.194e-02, -9.888e-02, 2.521e-02, 2.108e-02, -9.092e-03, 2.643e-02, -9.351e-03, 3.902e-02, 3.222e-02, 4.324e-02, 2.597e-02, -2.693e-02, 3.525e-02, 3.717e-04)); + r += mul(s5_7, M4(1.820e-02, 2.229e-01, 1.687e-01, 6.165e-02, -3.438e-02, -2.363e-02, 3.922e-02, -5.587e-02, -6.921e-02, 5.308e-02, -7.363e-02, -1.111e-01, 2.304e-02, -2.049e-02, 2.426e-02, -3.094e-02)); + r += mul(s5_8, M4(-3.477e-02, 7.511e-02, -1.020e-02, 1.072e-02, 1.418e-02, -1.736e-02, 4.884e-02, -1.775e-02, -5.870e-02, 2.241e-02, 1.193e-02, -3.586e-02, -1.182e-02, 3.054e-03, 8.716e-03, -5.858e-02)); + r += mul(s6_0, M4(1.296e-02, 4.521e-02, 5.553e-02, 7.200e-03, 2.228e-01, 1.527e-01, 8.212e-02, 3.697e-02, -1.005e-01, 2.319e-02, 6.224e-02, -3.363e-02, -2.382e-02, 4.781e-03, 2.328e-02, 2.167e-02)); + r += mul(s6_1, M4(6.700e-02, -8.083e-02, -5.504e-02, -4.573e-02, 8.391e-02, -1.270e-01, 1.212e-01, 1.590e-01, 5.235e-02, 5.567e-03, -9.952e-02, 5.426e-02, 1.405e-01, 2.182e-02, 2.855e-02, -5.070e-02)); + r += mul(s6_2, M4(1.222e-02, -1.297e-02, 3.429e-02, 5.959e-03, 3.504e-02, 2.729e-02, -1.018e-01, 2.322e-02, -2.360e-02, 5.587e-02, 2.109e-02, -2.796e-02, 1.145e-02, 1.383e-02, -4.523e-02, 4.067e-02)); + r += mul(s6_3, M4(3.378e-02, 4.414e-02, 4.556e-02, 9.837e-02, 9.577e-02, -1.596e-01, 5.565e-02, 3.433e-02, -3.580e-02, 6.830e-02, -8.804e-03, 1.014e-01, 6.641e-03, -3.026e-02, -1.278e-02, -8.684e-02)); + r += mul(s6_4, M4(1.560e-02, 1.232e-01, -8.372e-03, -1.660e-01, -3.968e-01, 2.330e-01, 2.028e-02, 2.646e-01, 6.223e-02, 2.275e-01, 2.140e-01, -2.405e-02, 4.579e-01, -4.541e-02, 9.614e-02, -7.645e-02)); + r += mul(s6_5, M4(-3.295e-02, -2.743e-02, -4.794e-02, 1.720e-02, -7.849e-02, 1.575e-01, -1.164e-02, -2.347e-01, 4.284e-02, 1.307e-02, 4.038e-02, -1.861e-02, 1.120e-01, -9.979e-02, 4.783e-02, 6.604e-02)); + r += mul(s6_6, M4(8.715e-03, 2.297e-02, 6.982e-02, 3.485e-02, 6.133e-04, -7.565e-02, 5.968e-02, 3.948e-02, 4.576e-02, -4.137e-02, 2.947e-02, -2.206e-02, 4.394e-02, 9.766e-03, 2.729e-02, 4.977e-02)); + r += mul(s6_7, M4(6.549e-02, -8.800e-02, -4.819e-03, 5.421e-02, -1.500e-01, 2.293e-01, -4.927e-02, -8.280e-02, -7.289e-02, 4.010e-02, -1.511e-03, 1.767e-02, 1.719e-01, -1.322e-01, 4.552e-02, 5.485e-02)); + r += mul(s6_8, M4(-2.826e-03, 3.567e-02, -1.578e-02, -4.386e-02, 1.998e-02, 5.044e-02, -6.815e-02, 9.634e-02, -1.888e-02, -3.089e-02, 2.895e-02, -3.653e-02, 2.965e-03, -2.441e-02, 6.077e-03, 1.507e-02)); + r += mul(s7_0, M4(1.559e-02, -2.477e-02, 1.646e-02, -2.687e-03, 3.154e-02, -3.535e-02, -4.525e-02, -4.382e-03, -1.899e-02, 4.651e-02, -2.069e-02, 4.084e-02, -9.179e-02, 7.984e-02, -2.161e-02, 8.234e-03)); + r += mul(s7_1, M4(3.448e-01, -1.541e-01, 2.134e-02, 1.274e-01, -2.197e-02, 1.101e-01, 5.202e-02, 5.462e-04, -2.449e-02, 3.539e-02, 5.465e-03, 5.482e-04, -5.681e-02, 6.304e-02, 3.042e-02, -6.518e-02)); + r += mul(s7_2, M4(3.896e-02, -5.241e-02, -1.705e-02, 1.453e-02, -2.905e-02, 2.946e-02, -3.594e-02, 9.768e-03, 5.370e-02, 3.684e-02, 4.114e-02, -2.328e-02, -1.514e-02, 1.587e-02, -4.130e-02, 2.270e-02)); + r += mul(s7_3, M4(5.886e-02, -1.587e-02, -1.114e-01, 1.225e-01, -1.327e-02, -1.252e-02, 1.093e-03, -2.380e-02, -7.611e-02, 1.196e-01, 3.984e-02, 1.318e-02, -2.988e-02, 2.065e-02, -2.236e-02, -1.081e-02)); + r += mul(s7_4, M4(1.103e-01, 4.765e-02, -5.338e-02, -2.094e-01, -8.771e-03, 1.563e-02, 4.116e-02, 1.174e-01, 6.051e-02, 1.695e-01, -2.698e-01, 4.236e-02, 2.473e-01, 7.253e-02, 6.745e-02, -5.874e-02)); + r += mul(s7_5, M4(1.658e-02, -2.497e-02, 2.890e-02, -4.542e-02, -1.103e-02, 8.418e-03, -6.931e-02, -4.678e-02, -5.040e-02, 3.276e-02, 5.987e-02, -8.001e-03, 6.639e-03, -1.172e-02, -1.427e-02, 3.096e-02)); + r += mul(s7_6, M4(-4.578e-02, 2.777e-02, 1.938e-02, -5.613e-02, -1.625e-03, 6.662e-03, 3.588e-03, 1.572e-02, 6.106e-02, -8.091e-02, -8.481e-03, 2.078e-02, -1.129e-02, 1.477e-02, 1.200e-02, 5.556e-02)); + r += mul(s7_7, M4(-1.667e-02, -8.641e-03, -6.570e-02, -6.897e-02, 2.494e-02, -2.685e-02, 3.873e-02, 7.151e-02, 1.727e-02, -1.895e-01, 5.898e-03, 8.379e-02, 3.347e-02, -7.388e-03, -1.169e-02, 9.291e-02)); + r += mul(s7_8, M4(-4.583e-02, 4.776e-02, 3.371e-02, 9.204e-02, 5.692e-03, -2.458e-02, -2.136e-03, -6.759e-03, -1.187e-02, -4.081e-02, -3.127e-04, -5.903e-02, -2.843e-02, 2.085e-02, 7.936e-03, -3.616e-02)); + r += V4(-3.635e-03, -1.028e-02, -3.650e-04, -1.679e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-8.601e-03, -6.582e-03, -3.411e-02, 1.939e-02, 4.268e-02, 6.203e-02, -8.224e-03, -3.973e-02, 1.102e-03, -2.386e-02, 2.198e-03, -2.039e-02, 1.227e-02, 5.214e-02, 8.288e-02, -2.762e-02)); + r += mul(s0_1, M4(-1.028e-01, -3.378e-02, 3.985e-03, -6.257e-02, -8.930e-02, 2.849e-02, 2.765e-02, 5.202e-03, -6.127e-02, 5.845e-02, 3.884e-02, 3.214e-02, 5.885e-02, 1.254e-01, -4.733e-02, 1.318e-02)); + r += mul(s0_2, M4(-1.913e-02, -2.165e-02, -4.609e-02, -4.058e-03, 5.175e-02, -4.665e-02, -2.321e-03, 3.057e-02, 1.789e-02, -5.633e-03, 5.981e-04, 2.946e-02, -7.567e-02, 7.726e-02, -1.335e-02, -2.231e-02)); + r += mul(s0_3, M4(-4.422e-04, 3.932e-03, -3.747e-02, 4.274e-02, -9.728e-02, -3.055e-02, 8.176e-02, -1.070e-02, -5.158e-03, 3.761e-02, -6.999e-02, -7.594e-02, 5.391e-02, 5.079e-02, 1.043e-02, -5.175e-02)); + r += mul(s0_4, M4(1.082e-01, 5.860e-02, 3.859e-02, -4.705e-02, 1.514e-01, 6.110e-02, -4.016e-02, 4.433e-02, -3.790e-02, 7.997e-02, 1.072e-01, 1.340e-01, -5.815e-02, 1.407e-01, -1.296e-01, -4.753e-03)); + r += mul(s0_5, M4(-9.001e-02, -7.560e-02, -1.656e-02, -2.018e-02, -3.487e-02, -4.206e-02, -1.692e-02, -9.395e-02, -5.462e-02, -5.607e-02, -5.984e-02, -4.008e-02, -6.659e-03, 8.550e-02, 1.101e-01, -5.317e-02)); + r += mul(s0_6, M4(6.021e-02, 4.988e-03, 2.403e-02, 4.262e-02, 1.789e-02, -1.716e-02, 3.987e-02, -1.076e-01, -4.748e-02, -1.764e-02, -2.308e-02, -6.401e-03, -4.589e-02, 5.688e-03, 6.622e-02, -1.210e-02)); + r += mul(s0_7, M4(-3.895e-02, -7.578e-02, -1.271e-02, 3.790e-02, 2.351e-02, -7.459e-03, -7.894e-02, 8.670e-02, 3.250e-02, 2.430e-02, 1.983e-01, -5.784e-02, 7.884e-03, 3.934e-02, 3.260e-02, -2.293e-03)); + r += mul(s0_8, M4(-5.708e-03, 7.520e-03, 7.843e-03, -5.089e-02, 1.425e-02, -3.691e-02, 2.940e-02, -5.235e-02, -5.730e-03, 4.717e-02, -6.712e-02, -1.439e-02, 3.397e-03, -2.686e-02, -3.211e-02, 3.169e-02)); + r += mul(s1_0, M4(1.214e-01, 3.325e-02, -3.898e-02, -2.344e-02, -9.315e-04, -2.041e-02, 5.530e-02, -2.324e-02, 4.248e-02, 2.593e-02, 3.256e-02, -7.669e-02, 1.557e-01, 4.900e-02, 3.043e-02, 7.165e-02)); + r += mul(s1_1, M4(1.686e-02, -3.408e-03, 2.398e-02, 1.666e-02, 1.708e-01, 1.248e-01, 1.701e-02, 3.409e-02, 3.102e-02, -3.924e-02, -2.257e-02, 9.244e-02, -2.682e-01, -5.456e-04, -3.090e-02, -5.209e-03)); + r += mul(s1_2, M4(-2.618e-02, -1.067e-01, -4.634e-02, 4.220e-02, 2.643e-02, 9.070e-02, -5.263e-02, 3.797e-02, -5.021e-02, -1.616e-02, 4.030e-03, -3.554e-02, 2.286e-02, -1.367e-02, 3.119e-02, 1.015e-02)); + r += mul(s1_3, M4(-2.950e-02, 1.674e-02, 2.861e-02, 1.752e-02, 4.014e-02, -2.871e-02, 8.586e-02, -8.958e-02, -7.844e-02, 1.151e-01, -2.350e-02, -7.249e-02, -9.022e-02, -1.141e-02, -2.103e-02, 4.276e-02)); + r += mul(s1_4, M4(2.100e-01, 1.922e-01, 1.508e-01, -5.379e-02, -3.848e-02, 1.576e-02, -9.808e-02, -1.428e-02, 1.951e-02, 5.196e-02, 9.626e-02, 1.713e-01, 2.680e-02, 3.359e-02, 1.538e-01, -1.678e-01)); + r += mul(s1_5, M4(-2.676e-02, -5.017e-02, 1.711e-02, -8.962e-03, 4.523e-02, -7.491e-03, 1.255e-02, 5.464e-02, -1.861e-02, 1.094e-02, -1.444e-01, 1.939e-01, -5.640e-02, -8.468e-02, 8.226e-02, -5.069e-02)); + r += mul(s1_6, M4(3.551e-02, -5.975e-03, -9.157e-02, 7.229e-02, 2.613e-02, 4.336e-02, 5.629e-02, -4.503e-02, 8.956e-02, -2.216e-02, 4.932e-02, -9.898e-02, -3.663e-02, -5.785e-03, -4.379e-03, -2.736e-02)); + r += mul(s1_7, M4(-4.389e-02, -6.559e-02, -5.097e-02, 1.689e-01, 3.005e-02, -8.020e-02, -5.782e-02, -5.943e-02, -4.287e-02, -4.531e-02, 1.082e-01, 9.099e-02, 2.794e-02, -8.429e-02, 5.045e-02, 3.386e-02)); + r += mul(s1_8, M4(-5.677e-02, 1.692e-02, 1.682e-02, -4.944e-02, -3.779e-03, 7.763e-03, 6.527e-02, 4.990e-02, 6.281e-02, 4.041e-02, -3.078e-02, -1.150e-01, -2.607e-02, 1.357e-02, 2.250e-02, -1.061e-03)); + r += mul(s2_0, M4(1.168e-02, -3.367e-03, -1.838e-02, -4.618e-04, 7.513e-02, -1.033e-01, 2.657e-02, 4.725e-02, -3.215e-02, -1.714e-02, -2.661e-02, 7.999e-03, -4.803e-02, 4.129e-02, 2.604e-02, 7.083e-03)); + r += mul(s2_1, M4(-5.143e-02, -3.167e-02, -1.929e-02, 2.222e-02, 5.725e-02, -7.263e-02, -2.430e-02, 4.913e-02, 3.209e-02, -1.623e-02, -1.380e-02, -4.886e-03, 7.903e-02, -4.085e-02, 4.204e-02, 7.106e-02)); + r += mul(s2_2, M4(-1.937e-02, -3.364e-02, -1.153e-01, -3.498e-02, 1.881e-02, -5.985e-02, 1.939e-02, 1.030e-02, 9.003e-03, 4.089e-02, -1.083e-02, 3.718e-03, -1.791e-02, -4.284e-02, -4.109e-02, 2.672e-02)); + r += mul(s2_3, M4(5.432e-02, -5.027e-03, 2.724e-02, -6.687e-03, 5.104e-02, -1.343e-01, 9.001e-04, -9.696e-02, -2.136e-02, 1.197e-02, -6.296e-02, -2.861e-02, 2.310e-02, 7.527e-03, 2.415e-02, -4.223e-03)); + r += mul(s2_4, M4(-4.222e-03, -6.228e-02, -1.509e-01, -8.201e-02, 1.155e-01, -4.310e-01, -1.757e-01, 2.800e-01, -4.084e-02, 4.121e-02, -8.184e-02, -4.839e-02, -1.301e-02, 2.227e-02, -1.339e-01, 5.871e-02)); + r += mul(s2_5, M4(-9.458e-03, 7.936e-02, -1.990e-01, 1.793e-02, 2.483e-02, 4.884e-02, -5.488e-02, 4.765e-02, -5.672e-02, -6.385e-02, 4.981e-02, -2.746e-02, 7.430e-03, -3.283e-02, -6.200e-02, -5.240e-02)); + r += mul(s2_6, M4(1.438e-02, 9.243e-03, 3.567e-02, 4.069e-02, 2.272e-02, -7.911e-02, 8.448e-02, 5.721e-02, 7.491e-03, -1.728e-03, -9.171e-04, -3.387e-02, -1.349e-02, -5.936e-03, 4.996e-02, 1.358e-02)); + r += mul(s2_7, M4(-9.619e-03, -6.562e-02, 1.305e-01, -3.528e-02, -4.942e-02, -1.092e-01, 1.221e-01, -1.901e-01, 2.335e-02, -5.793e-03, 3.086e-02, 1.013e-02, -1.117e-02, -3.033e-02, 6.326e-03, -3.675e-02)); + r += mul(s2_8, M4(-2.431e-02, 9.791e-03, 2.136e-02, 6.850e-02, -1.999e-02, -4.306e-02, -1.675e-03, 5.616e-02, 2.023e-02, 2.193e-02, -1.861e-03, -1.003e-01, 4.180e-02, 1.627e-02, -4.788e-02, 3.224e-02)); + r += mul(s3_0, M4(3.267e-02, -3.499e-02, -7.962e-02, -7.279e-02, 1.523e-02, 1.956e-02, 8.232e-03, 2.147e-02, 2.011e-02, 5.484e-02, 3.180e-03, 3.681e-02, -3.856e-02, 1.450e-02, -2.829e-02, 1.920e-02)); + r += mul(s3_1, M4(-2.308e-01, -1.285e-01, -1.482e-01, 1.029e-02, -2.365e-02, -3.520e-02, -4.704e-02, 5.336e-02, 1.440e-01, -3.500e-03, -1.131e-02, 6.858e-02, -1.470e-02, 7.266e-03, -5.802e-02, -2.166e-02)); + r += mul(s3_2, M4(2.163e-02, 7.730e-02, 6.253e-02, 5.941e-03, 1.301e-02, -2.332e-02, -1.305e-02, -1.120e-02, -1.467e-03, 7.855e-03, -3.129e-03, 3.219e-02, -2.022e-02, -1.941e-01, -6.155e-02, 2.742e-02)); + r += mul(s3_3, M4(-2.398e-03, -1.927e-03, -2.212e-02, -6.206e-02, 1.487e-01, -7.284e-03, -3.888e-02, 1.234e-01, 1.429e-01, 1.726e-02, -2.675e-03, 3.603e-02, -9.363e-03, -2.200e-02, 4.138e-02, -2.461e-02)); + r += mul(s3_4, M4(5.053e-02, 5.732e-02, -1.047e-01, 3.853e-02, 1.817e-01, -5.273e-02, -1.095e-02, 6.719e-05, -5.379e-02, -2.017e-02, 6.698e-02, 1.299e-03, 8.860e-03, 6.885e-02, -2.918e-01, -2.079e-01)); + r += mul(s3_5, M4(-1.218e-02, -4.579e-02, -2.587e-01, -3.481e-02, -1.030e-01, -1.140e-01, -5.113e-02, -2.633e-02, -2.690e-02, 7.111e-02, 2.261e-02, 5.536e-02, 3.036e-03, -1.335e-01, 1.926e-01, -7.739e-03)); + r += mul(s3_6, M4(9.332e-04, -2.945e-03, -9.446e-03, -2.501e-03, -3.572e-02, -3.509e-02, -5.397e-02, 6.588e-02, -1.826e-02, 1.116e-02, -2.588e-02, 6.469e-02, -3.611e-02, -3.796e-02, -4.616e-02, 5.419e-02)); + r += mul(s3_7, M4(-3.571e-02, 5.027e-02, -3.355e-02, 1.392e-02, -4.209e-02, 1.325e-02, 9.041e-02, 6.816e-03, 5.562e-02, 5.222e-02, 7.295e-02, 3.209e-02, 9.354e-02, 6.756e-02, 2.764e-01, -6.637e-02)); + r += mul(s3_8, M4(-2.529e-02, -8.631e-03, 2.602e-02, 8.646e-03, -1.568e-02, 9.391e-02, -1.124e-01, -4.199e-02, 1.958e-02, 3.264e-02, 7.642e-02, 5.693e-02, 6.536e-02, -8.633e-02, -3.207e-01, -1.304e-01)); + r += mul(s4_0, M4(-4.815e-02, 6.615e-02, 3.727e-02, -3.287e-02, 3.298e-02, 7.714e-02, -4.852e-02, 8.404e-02, 6.533e-02, -1.181e-02, 3.470e-02, -5.589e-04, 2.704e-02, -9.294e-02, -5.252e-03, -4.516e-02)); + r += mul(s4_1, M4(9.419e-02, 2.509e-02, 2.955e-03, -2.840e-02, 5.822e-02, -8.378e-02, -7.562e-03, 4.876e-02, -6.550e-02, 1.759e-02, -7.945e-02, 8.628e-02, -1.377e-01, -9.751e-02, -2.122e-02, -1.384e-01)); + r += mul(s4_2, M4(-3.766e-02, 2.824e-02, 3.679e-02, -4.829e-02, 4.727e-02, -5.843e-02, -3.079e-03, -2.803e-03, -5.569e-03, -7.261e-02, 4.468e-02, 1.710e-02, 6.518e-02, 1.492e-03, 2.624e-02, 2.573e-02)); + r += mul(s4_3, M4(7.605e-02, -7.299e-02, 6.526e-03, 3.207e-02, -1.266e-02, -4.438e-03, -3.606e-03, 3.420e-02, 2.385e-02, 2.516e-02, 6.072e-02, -3.670e-02, -2.751e-02, -6.161e-02, 9.597e-02, 9.638e-02)); + r += mul(s4_4, M4(-2.112e-02, -2.241e-02, 7.786e-02, 5.784e-02, -1.561e-01, -4.273e-02, -2.194e-02, -1.705e-01, 6.815e-02, -6.799e-02, -2.013e-03, 1.239e-01, 2.514e-02, -1.079e-01, -7.378e-03, -1.233e-01)); + r += mul(s4_5, M4(-2.638e-02, -1.183e-01, -4.616e-02, -3.746e-03, -2.811e-02, 1.230e-01, -5.174e-02, 7.346e-02, -1.421e-02, -3.076e-02, -1.259e-02, 1.408e-02, -1.610e-02, 2.928e-02, 1.535e-01, 8.465e-02)); + r += mul(s4_6, M4(3.141e-04, 3.631e-02, -1.663e-02, 6.612e-02, -7.921e-03, -8.523e-03, -8.086e-02, 5.970e-02, -1.761e-02, 1.568e-02, -4.761e-02, 2.993e-02, 1.266e-02, -2.407e-02, 4.900e-02, 7.400e-02)); + r += mul(s4_7, M4(-8.669e-02, -9.038e-02, -1.487e-03, -1.967e-02, -1.130e-02, 2.207e-02, -5.322e-03, -8.261e-02, -3.107e-02, -6.808e-02, 4.068e-02, 4.049e-02, -5.777e-02, 1.814e-01, -1.391e-01, -3.705e-02)); + r += mul(s4_8, M4(8.472e-03, 2.173e-02, -4.385e-03, -1.425e-01, 2.747e-02, 1.203e-01, -2.519e-02, 7.543e-02, 1.353e-02, -1.914e-03, -6.678e-02, -4.278e-02, 3.939e-02, 1.849e-02, -1.356e-01, 2.123e-02)); + r += mul(s5_0, M4(2.253e-02, -3.000e-03, 2.766e-02, 4.011e-03, -1.012e-01, -1.245e-01, -1.756e-02, 1.333e-02, -5.544e-02, 8.260e-02, 6.904e-02, -6.567e-02, -2.223e-02, 1.762e-02, -5.783e-03, -1.178e-02)); + r += mul(s5_1, M4(4.984e-02, 5.925e-02, 2.222e-03, -1.903e-02, 6.858e-02, -1.444e-02, -8.342e-02, 2.042e-02, -2.418e-02, -8.804e-03, -3.907e-02, 8.371e-02, -2.057e-02, -3.821e-02, 1.112e-03, -5.150e-02)); + r += mul(s5_2, M4(-5.659e-02, 1.374e-02, 1.761e-02, -2.710e-02, 9.238e-02, 2.094e-02, -6.518e-02, -2.742e-02, 3.162e-02, 3.946e-02, 2.735e-02, -1.013e-02, 1.573e-02, 6.860e-02, 2.466e-02, 2.488e-02)); + r += mul(s5_3, M4(-5.858e-02, 3.525e-02, -2.676e-02, -7.051e-02, 1.653e-02, -1.366e-01, 6.492e-02, -1.073e-01, -4.261e-02, -1.184e-02, 4.031e-02, -7.042e-02, 2.538e-02, -6.503e-03, 1.881e-02, 6.144e-03)); + r += mul(s5_4, M4(-1.599e-01, 3.139e-01, 4.613e-02, 1.092e-01, -4.946e-02, -6.471e-02, -2.505e-01, -4.021e-01, 3.832e-02, -1.744e-01, -1.179e-01, 3.837e-02, 1.725e-02, -6.918e-03, 8.461e-02, -6.419e-03)); + r += mul(s5_5, M4(9.392e-04, 3.029e-02, -1.711e-02, -3.376e-02, -2.033e-02, -1.260e-01, 2.772e-02, 1.451e-01, 7.234e-03, 1.555e-02, 2.413e-02, -2.120e-02, -1.125e-02, -2.683e-02, -1.099e-02, -3.049e-02)); + r += mul(s5_6, M4(7.385e-03, 7.153e-02, -3.701e-03, -5.904e-03, -7.381e-03, -1.438e-02, -4.615e-02, 4.650e-04, -5.119e-02, -3.074e-02, -7.760e-02, 4.370e-03, -1.500e-02, -3.557e-02, 2.629e-02, 4.037e-02)); + r += mul(s5_7, M4(-2.458e-02, 1.187e-02, 8.390e-02, -1.328e-01, 5.794e-03, 3.009e-03, -9.953e-02, -5.775e-02, 2.016e-02, 1.212e-02, -1.308e-02, -3.056e-02, 1.866e-02, 3.148e-02, -5.295e-02, -7.660e-02)); + r += mul(s5_8, M4(2.084e-03, 1.588e-02, 2.125e-01, 3.881e-02, -1.416e-02, -3.428e-03, -8.714e-02, -1.141e-01, 1.929e-02, 1.721e-03, -6.971e-02, -2.685e-02, 2.040e-02, 2.086e-02, -6.267e-02, 1.943e-02)); + r += mul(s6_0, M4(-3.636e-02, 1.058e-02, 1.897e-02, -7.365e-03, -2.463e-01, 1.546e-01, -1.253e-02, -1.471e-01, -3.383e-02, -1.681e-01, -3.166e-02, 1.180e-02, -1.471e-02, -3.981e-02, 1.965e-02, 1.843e-02)); + r += mul(s6_1, M4(4.107e-02, 5.065e-02, 1.605e-02, -3.976e-03, 1.275e-02, 1.578e-01, 5.165e-02, -1.212e-01, 2.598e-02, -1.050e-02, 4.818e-02, -1.796e-02, -1.129e-01, 6.415e-02, -1.066e-01, 1.731e-02)); + r += mul(s6_2, M4(2.760e-03, -4.237e-03, -2.683e-02, 1.691e-02, 3.426e-02, 1.694e-01, 2.113e-03, 5.037e-02, -7.108e-04, 4.483e-03, -7.325e-03, -7.323e-03, -4.189e-02, -9.192e-03, 4.703e-02, -1.762e-02)); + r += mul(s6_3, M4(-1.324e-01, -1.428e-02, 1.538e-01, 8.059e-02, 7.247e-02, 2.456e-03, 1.478e-01, -1.011e-01, -4.779e-02, 1.501e-02, -4.858e-02, -5.523e-02, -1.955e-03, -1.013e-01, -6.627e-02, -1.644e-02)); + r += mul(s6_4, M4(7.496e-02, 1.511e-01, -1.250e-01, -6.264e-02, -9.391e-02, -2.261e-01, -2.047e-01, 7.962e-02, -2.133e-02, -8.040e-02, 4.176e-02, -3.172e-02, -8.418e-02, 2.017e-01, -1.799e-02, 3.392e-02)); + r += mul(s6_5, M4(2.756e-02, -7.527e-03, -1.715e-02, 3.766e-02, -3.255e-02, -1.122e-01, 2.799e-03, 1.650e-03, -1.328e-03, 4.273e-03, 7.832e-02, -1.947e-02, 9.840e-02, 2.162e-02, 6.598e-02, 1.782e-02)); + r += mul(s6_6, M4(-2.405e-02, -2.950e-02, -2.338e-02, -4.016e-02, -2.421e-02, -2.384e-02, -9.502e-02, 6.252e-02, -1.692e-02, -9.645e-03, 1.187e-02, 1.228e-02, -8.531e-03, -3.788e-02, 1.097e-01, 6.519e-02)); + r += mul(s6_7, M4(3.889e-02, 9.208e-03, -7.941e-02, 5.483e-02, -4.879e-02, 2.173e-02, 1.911e-02, -8.286e-03, 2.036e-02, 2.867e-02, -5.079e-02, -2.555e-02, -6.251e-02, 2.804e-02, -2.654e-01, -4.549e-02)); + r += mul(s6_8, M4(8.329e-04, -1.819e-02, -1.321e-02, 1.572e-02, -2.137e-02, -4.870e-03, -1.199e-02, 1.285e-01, 1.001e-02, 8.940e-03, -4.492e-02, -4.014e-02, 5.302e-04, 1.071e-02, -5.256e-02, 6.449e-02)); + r += mul(s7_0, M4(2.445e-03, -8.298e-02, 4.078e-02, 1.082e-01, -3.606e-03, 1.108e-02, 1.089e-02, 8.458e-03, -5.579e-02, 5.668e-03, -8.777e-03, 1.130e-02, 9.521e-02, -1.845e-02, 3.895e-02, -2.967e-02)); + r += mul(s7_1, M4(-4.886e-02, 5.319e-02, 2.510e-02, -7.867e-02, -2.855e-03, 3.997e-02, -5.932e-02, 3.093e-02, 9.879e-02, 1.408e-02, -2.786e-02, 5.785e-02, -9.988e-02, 3.938e-02, -6.418e-02, -6.088e-03)); + r += mul(s7_2, M4(1.294e-01, 1.028e-01, -4.343e-02, 2.712e-02, 1.849e-02, -5.441e-02, 4.991e-02, 1.062e-02, -1.364e-02, 4.261e-03, 2.003e-02, -3.499e-02, -3.174e-03, 3.305e-02, 5.740e-02, -3.909e-03)); + r += mul(s7_3, M4(-8.290e-02, 6.939e-02, -7.261e-04, -9.136e-03, -3.160e-03, 6.662e-02, -4.139e-02, 1.310e-04, -9.320e-03, 8.518e-02, 5.456e-03, 2.266e-02, -4.379e-02, 2.903e-02, -1.230e-02, -4.747e-02)); + r += mul(s7_4, M4(1.445e-01, -5.596e-02, -1.909e-01, 1.829e-01, -2.868e-02, 1.546e-02, 7.682e-04, -4.849e-02, 1.428e-01, 1.040e-01, 4.236e-02, 1.480e-02, 1.023e-01, 4.592e-02, -6.634e-02, -3.505e-03)); + r += mul(s7_5, M4(-6.373e-02, -8.968e-02, -1.350e-02, -6.032e-02, 8.111e-03, 6.144e-03, -4.249e-02, 3.479e-02, 1.916e-02, -2.201e-02, 4.903e-02, -3.262e-02, 2.203e-02, 1.923e-02, 2.391e-03, 6.177e-02)); + r += mul(s7_6, M4(1.166e-02, 2.687e-02, 2.417e-02, 2.158e-02, 3.096e-02, 1.273e-02, 3.190e-02, 2.190e-02, -2.712e-02, 7.696e-04, -2.388e-02, 3.130e-02, -1.967e-02, -1.655e-02, 7.839e-02, -2.687e-02)); + r += mul(s7_7, M4(-2.005e-02, -2.380e-02, 9.220e-02, -1.530e-02, -8.630e-03, -1.587e-02, 1.488e-02, -1.791e-03, 1.460e-02, -2.440e-02, 8.963e-02, 9.009e-02, -1.182e-02, 4.140e-02, -5.368e-02, 6.374e-02)); + r += mul(s7_8, M4(3.577e-02, 5.104e-02, 3.665e-02, -5.901e-02, -1.713e-02, -6.524e-02, 1.610e-05, 1.340e-02, 3.049e-02, 4.299e-02, -7.341e-02, -3.923e-02, 2.563e-02, -2.483e-02, -2.340e-02, 1.129e-02)); + r += V4(-5.671e-03, 3.110e-03, -6.021e-03, -1.366e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.193e-01, -1.048e-02, -3.243e-02, 6.826e-02, -2.322e-02, 1.210e-03, -1.252e-02, 2.977e-02, -2.499e-02, -5.046e-02, 3.024e-02, -2.444e-02, -5.177e-02, 3.895e-02, -1.225e-02, 5.118e-03)); + r += mul(s0_1, M4(5.871e-02, 6.552e-02, 9.391e-02, -5.961e-02, 8.066e-02, -6.908e-03, -5.695e-02, -1.936e-02, 2.658e-02, -7.954e-04, -1.973e-01, 9.164e-02, 5.022e-02, 2.750e-02, 1.419e-01, -1.292e-01)); + r += mul(s0_2, M4(-4.930e-03, 2.126e-02, -1.352e-02, 3.179e-03, 3.715e-02, -4.556e-03, -1.389e-03, 8.522e-03, 3.613e-02, -9.409e-03, 1.362e-01, -6.348e-02, 1.835e-01, -3.183e-02, -1.226e-01, -2.973e-02)); + r += mul(s0_3, M4(1.495e-02, 1.022e-02, 4.584e-02, 5.812e-02, 8.536e-02, -1.778e-02, -9.100e-03, -1.304e-02, -7.851e-02, 5.556e-02, 9.545e-03, -7.593e-02, 4.848e-02, -1.739e-02, -2.347e-02, -4.931e-02)); + r += mul(s0_4, M4(3.443e-02, 5.691e-02, -2.898e-02, 6.016e-03, -7.971e-03, -2.753e-02, 9.017e-02, 8.716e-02, 3.454e-02, 4.737e-02, 1.046e-01, -4.884e-02, -8.845e-02, 6.886e-02, 8.998e-03, 3.288e-02)); + r += mul(s0_5, M4(5.182e-02, -2.280e-02, -6.605e-03, -4.182e-02, -1.409e-03, -6.527e-02, -5.406e-02, 3.059e-02, -4.931e-03, -3.169e-02, -6.431e-02, -2.430e-02, -1.669e-01, 9.000e-02, 7.812e-02, 1.873e-02)); + r += mul(s0_6, M4(-2.305e-03, -3.680e-02, 1.371e-02, -1.402e-02, -1.577e-01, 8.153e-03, -2.799e-04, 3.356e-02, -3.522e-03, 3.556e-02, 2.578e-02, -2.129e-03, 2.127e-02, 7.063e-02, 2.276e-02, -6.245e-02)); + r += mul(s0_7, M4(7.349e-02, -9.005e-02, -3.976e-02, -4.591e-02, 5.945e-02, -8.111e-02, 5.657e-02, -6.026e-02, 6.361e-02, -1.248e-02, 4.749e-03, -7.141e-02, -8.539e-02, -3.081e-02, -4.733e-02, 2.692e-02)); + r += mul(s0_8, M4(-5.297e-03, 2.553e-02, 1.188e-02, -3.252e-02, -9.584e-02, -4.811e-03, -5.877e-02, 1.540e-02, 5.976e-02, -6.359e-03, 1.851e-02, -7.055e-03, 1.543e-02, -6.709e-02, -5.741e-03, -1.938e-02)); + r += mul(s1_0, M4(1.027e-01, 2.668e-02, -2.635e-02, 1.201e-01, 1.320e-01, 6.001e-02, 1.124e-03, 3.838e-02, -2.793e-02, -1.959e-02, -1.672e-02, -2.363e-02, -7.897e-03, -1.262e-02, -7.426e-02, 4.808e-02)); + r += mul(s1_1, M4(8.846e-02, 1.041e-01, 1.462e-01, -7.651e-02, 8.646e-02, -6.220e-03, -1.981e-01, -1.132e-02, 7.620e-03, -5.404e-02, -1.321e-01, 1.981e-02, -1.342e-02, -4.427e-02, 4.065e-03, -8.502e-02)); + r += mul(s1_2, M4(1.697e-02, -5.188e-02, -5.134e-02, -4.116e-02, 2.190e-02, 4.750e-03, 7.617e-03, -1.969e-03, -2.589e-02, 3.952e-03, 5.049e-02, -2.411e-02, 6.015e-02, 1.877e-02, -1.146e-01, 2.766e-02)); + r += mul(s1_3, M4(2.173e-02, 3.303e-02, 5.705e-02, -2.306e-02, 2.437e-01, -4.184e-02, -4.673e-02, 1.375e-01, -1.449e-01, -7.996e-03, 8.527e-03, -3.652e-02, 1.508e-02, 6.097e-03, -2.012e-02, 5.567e-03)); + r += mul(s1_4, M4(-1.053e-01, 1.183e-01, 1.644e-01, 7.715e-02, -8.496e-02, -2.526e-01, -3.290e-01, 7.366e-02, -5.064e-02, 5.169e-02, -2.557e-01, 1.666e-01, -2.381e-03, 2.116e-01, 4.961e-02, 1.979e-03)); + r += mul(s1_5, M4(2.273e-02, 8.168e-02, 6.637e-02, -2.085e-02, -1.772e-02, -2.210e-03, 8.962e-03, 3.793e-02, 1.632e-01, 1.286e-03, -2.699e-04, -1.422e-01, -8.545e-02, -6.054e-02, -1.027e-01, -3.499e-02)); + r += mul(s1_6, M4(-2.285e-02, -1.042e-01, 9.875e-03, -2.335e-02, 1.999e-01, -6.262e-02, -6.712e-02, -1.885e-02, -1.402e-02, -6.218e-02, 2.995e-02, -1.346e-02, -1.627e-02, 5.953e-02, 2.362e-02, -7.447e-02)); + r += mul(s1_7, M4(-1.448e-02, -3.298e-01, 2.114e-02, -3.973e-02, 1.320e-02, 4.253e-02, 4.971e-02, 1.446e-02, 1.347e-01, 1.912e-02, 4.576e-02, -5.504e-02, -1.043e-02, -1.144e-01, 9.768e-03, 7.366e-03)); + r += mul(s1_8, M4(2.272e-02, -9.619e-03, -7.821e-02, 3.434e-02, 5.688e-02, -2.394e-02, 8.326e-03, 1.352e-02, -7.261e-02, 6.078e-03, -4.177e-04, 2.965e-02, -5.145e-02, 4.692e-02, -5.279e-02, 3.568e-02)); + r += mul(s2_0, M4(4.702e-02, 2.630e-03, 8.904e-03, -6.280e-03, -2.836e-02, -2.194e-02, -5.604e-02, 6.956e-03, 3.226e-02, -2.308e-02, 1.880e-02, 1.596e-02, -2.796e-04, 2.646e-02, 4.453e-03, -2.054e-02)); + r += mul(s2_1, M4(8.921e-02, 1.728e-02, -1.029e-01, 1.150e-02, -3.810e-02, 3.608e-02, -3.375e-02, -3.864e-02, 5.210e-02, 1.303e-05, -1.235e-02, 3.002e-03, 4.356e-03, -1.328e-02, 8.026e-03, -3.341e-02)); + r += mul(s2_2, M4(-3.228e-02, -3.432e-03, -2.823e-02, 5.476e-03, -1.503e-02, -2.776e-02, -2.735e-03, -3.880e-02, 2.066e-03, 6.393e-03, 4.266e-02, -2.297e-02, -5.792e-02, 5.058e-03, 5.737e-02, 1.521e-02)); + r += mul(s2_3, M4(-2.400e-02, -3.390e-02, 1.933e-02, 3.088e-03, 8.121e-02, 4.240e-02, -7.497e-02, -3.773e-02, -4.244e-02, -2.049e-02, 2.547e-02, -1.778e-04, -2.168e-02, -2.191e-02, -1.730e-02, 1.369e-02)); + r += mul(s2_4, M4(-6.762e-02, 1.373e-02, -2.620e-03, -5.704e-03, -1.055e-02, -9.187e-02, -3.544e-01, 2.344e-02, 1.125e-01, 7.307e-02, -3.805e-02, -1.709e-02, 7.488e-02, -7.591e-02, 2.404e-02, -2.459e-02)); + r += mul(s2_5, M4(3.818e-02, 2.380e-02, 3.533e-02, 3.720e-03, 2.004e-02, -9.902e-03, -3.566e-02, 1.956e-02, -4.887e-02, 2.246e-02, -7.669e-03, -2.876e-02, 5.484e-02, 1.748e-02, 1.909e-02, -5.162e-02)); + r += mul(s2_6, M4(5.008e-02, -2.726e-02, -4.822e-02, -1.584e-02, 8.802e-02, -2.961e-02, -5.532e-02, -2.233e-02, -1.975e-02, -1.024e-02, 4.558e-03, 1.366e-02, -8.069e-03, 2.464e-02, 9.495e-03, -2.194e-02)); + r += mul(s2_7, M4(-1.258e-01, 2.334e-02, -6.292e-02, 3.725e-02, 2.089e-02, 2.861e-01, -8.859e-02, 1.913e-02, 1.198e-01, -7.910e-02, 6.599e-02, -2.165e-02, -8.741e-02, 3.565e-02, 9.328e-03, -7.032e-03)); + r += mul(s2_8, M4(3.636e-02, -4.528e-03, -1.155e-02, -1.413e-02, -4.810e-02, -2.686e-02, -6.808e-02, 1.915e-02, 1.687e-02, -1.481e-03, -2.301e-02, -1.282e-02, -3.844e-02, -1.030e-02, -4.729e-02, 3.916e-02)); + r += mul(s3_0, M4(1.657e-02, -3.355e-03, 2.242e-03, -2.040e-02, 1.946e-02, 2.196e-02, -3.503e-02, 1.040e-01, 4.469e-02, 4.250e-02, -4.958e-02, 8.767e-02, -3.961e-02, 2.544e-02, -5.104e-02, -6.615e-02)); + r += mul(s3_1, M4(2.691e-02, -9.850e-02, 7.498e-02, -1.756e-01, 1.345e-02, 3.622e-02, 8.593e-02, -1.123e-02, 1.538e-02, -5.349e-03, -7.513e-02, -8.709e-03, -3.121e-02, 3.571e-02, 3.203e-02, 3.256e-02)); + r += mul(s3_2, M4(4.858e-02, -4.627e-02, 6.215e-02, -4.991e-02, 1.327e-02, 1.824e-02, 3.843e-03, -1.558e-02, -9.773e-02, -3.629e-02, -7.307e-02, 1.832e-02, 1.002e-01, -2.333e-02, 1.016e-01, -1.728e-02)); + r += mul(s3_3, M4(-4.198e-02, -2.814e-02, 3.682e-02, -2.208e-02, -7.201e-02, -7.191e-02, -1.578e-02, -5.985e-02, 3.886e-03, -3.420e-02, -9.592e-02, -4.945e-02, 5.593e-02, 1.735e-02, -6.147e-02, 3.669e-02)); + r += mul(s3_4, M4(6.282e-03, 1.437e-02, -1.631e-02, 9.143e-03, -5.601e-02, 1.139e-02, -1.762e-01, 1.844e-02, 7.251e-02, -1.309e-02, -9.165e-03, -3.403e-02, 3.223e-01, 8.518e-02, 2.687e-01, 7.502e-02)); + r += mul(s3_5, M4(-5.705e-02, 3.598e-02, 1.709e-01, -1.577e-02, 3.992e-02, -3.111e-02, -2.667e-02, -4.231e-02, -5.149e-02, 1.962e-02, -9.283e-02, 4.647e-02, -1.110e-01, -3.535e-02, -6.284e-02, 6.710e-02)); + r += mul(s3_6, M4(5.424e-02, -3.279e-02, 2.262e-02, -1.659e-02, 3.338e-02, 9.182e-03, 2.441e-02, -5.288e-02, -6.987e-02, 7.276e-03, -2.352e-02, 4.192e-02, 9.676e-02, 5.903e-02, 5.415e-02, 2.282e-03)); + r += mul(s3_7, M4(6.329e-03, 2.330e-02, 6.207e-03, -1.762e-03, 7.843e-03, -6.581e-03, -1.549e-02, -5.284e-02, -5.859e-02, 3.369e-03, -8.589e-02, 6.869e-02, 1.320e-01, -4.220e-02, -4.965e-02, -1.204e-01)); + r += mul(s3_8, M4(3.998e-02, 1.873e-02, -5.480e-03, -2.049e-02, 2.348e-02, 9.441e-03, -8.933e-02, 4.237e-02, -1.849e-02, -3.625e-02, -3.664e-02, -1.576e-02, -1.291e-01, 4.700e-02, 4.163e-02, -2.971e-02)); + r += mul(s4_0, M4(1.778e-02, 4.602e-02, -4.267e-02, 6.398e-02, -5.671e-02, -2.924e-03, -4.187e-02, 1.654e-03, -1.492e-01, -3.897e-02, 7.856e-04, -1.644e-02, -3.210e-02, -7.924e-02, -7.994e-02, -4.341e-03)); + r += mul(s4_1, M4(4.051e-03, -3.196e-03, -6.325e-02, 6.896e-02, -3.616e-02, 1.553e-02, 6.633e-02, -3.962e-02, 8.586e-02, -4.538e-02, 4.091e-02, -2.947e-02, 3.580e-03, -5.921e-02, 7.983e-02, -2.921e-02)); + r += mul(s4_2, M4(-5.366e-02, -1.155e-02, 1.324e-02, 5.973e-02, -1.614e-01, -1.876e-02, -1.515e-02, -6.311e-03, 3.802e-02, -1.583e-02, -5.839e-02, -2.794e-02, 1.295e-01, -2.356e-02, -1.839e-01, -7.227e-02)); + r += mul(s4_3, M4(-7.246e-02, -3.522e-02, 4.562e-02, -1.176e-01, 2.272e-02, 3.581e-02, -2.271e-02, 4.263e-03, 1.996e-02, 1.176e-02, 4.654e-02, -1.948e-02, -1.231e-01, 3.247e-02, 6.979e-02, -4.604e-02)); + r += mul(s4_4, M4(5.858e-02, 1.111e-02, 8.530e-02, -1.032e-01, 5.611e-02, -1.437e-02, -2.400e-03, -2.226e-01, 1.002e-01, -7.950e-02, 1.473e-01, -9.665e-03, -8.286e-02, 1.785e-01, 9.023e-03, 1.517e-01)); + r += mul(s4_5, M4(5.408e-02, -1.242e-02, -1.583e-02, -9.518e-02, 7.813e-02, 5.192e-02, -7.247e-03, 4.595e-02, -7.544e-02, -1.103e-02, -6.453e-02, 8.274e-02, -1.114e-01, 9.855e-03, 1.171e-01, 6.824e-02)); + r += mul(s4_6, M4(-2.487e-02, -8.571e-02, -2.031e-02, 7.946e-02, 7.761e-02, -5.634e-02, -1.243e-02, -8.920e-03, 7.505e-03, -5.841e-03, 4.098e-02, -7.230e-02, 4.304e-02, -8.549e-02, -4.022e-02, 7.888e-03)); + r += mul(s4_7, M4(5.391e-02, -2.885e-02, 2.040e-03, 7.783e-02, 1.013e-02, 1.372e-02, 5.941e-03, 1.007e-02, 7.528e-03, 8.803e-03, 7.997e-02, 1.211e-02, -1.978e-02, -4.108e-03, -6.149e-02, 7.661e-02)); + r += mul(s4_8, M4(-2.713e-02, 2.661e-02, 6.520e-02, 3.927e-02, 6.751e-02, -3.023e-02, -3.414e-02, -3.336e-02, -7.493e-03, 2.083e-02, 1.082e-03, 1.085e-02, -1.243e-02, 1.775e-02, -1.800e-02, -2.309e-02)); + r += mul(s5_0, M4(2.636e-03, 1.020e-02, -8.215e-03, -1.118e-02, -6.102e-02, 9.464e-03, 4.914e-02, -8.655e-03, -2.183e-01, 7.846e-02, 1.189e-01, 7.140e-02, -1.094e-01, -1.413e-02, 1.744e-02, 2.393e-02)); + r += mul(s5_1, M4(-1.423e-01, -4.273e-02, -8.643e-02, 1.187e-01, 5.468e-02, 3.297e-02, -7.118e-02, -7.003e-02, -5.719e-02, -1.376e-02, -1.145e-01, 7.838e-02, 1.588e-02, 5.242e-03, 1.901e-02, 9.863e-03)); + r += mul(s5_2, M4(-2.814e-02, 1.728e-03, 7.216e-03, -5.635e-03, -2.159e-02, 1.430e-03, -3.330e-02, -4.216e-02, -9.500e-02, 1.609e-02, 5.971e-02, 2.658e-02, 8.857e-02, 1.595e-02, -1.716e-02, -4.230e-03)); + r += mul(s5_3, M4(1.062e-01, 5.689e-02, -1.330e-02, 2.666e-02, 7.092e-03, 1.980e-02, 2.738e-02, -2.801e-02, 1.831e-01, 4.358e-02, 5.407e-02, -1.202e-01, -1.107e-01, 3.718e-02, 7.248e-03, 2.525e-02)); + r += mul(s5_4, M4(-1.238e-02, 1.395e-01, -1.252e-03, -2.829e-01, -9.818e-02, -3.294e-02, -4.447e-02, 6.977e-02, 2.017e-01, 2.108e-03, 3.438e-01, -3.522e-02, -9.348e-02, -7.006e-02, -1.200e-01, 1.091e-01)); + r += mul(s5_5, M4(8.951e-02, -2.079e-03, 1.120e-02, 9.600e-03, -7.474e-02, -1.677e-02, -3.101e-02, 7.161e-03, 4.322e-02, -1.732e-02, -1.563e-02, -1.477e-02, -4.726e-02, 2.835e-02, 1.115e-01, 5.753e-03)); + r += mul(s5_6, M4(-3.872e-02, -6.123e-02, -3.729e-02, -4.733e-02, 9.770e-02, -2.838e-02, 2.167e-02, -1.190e-02, 5.474e-02, 1.812e-02, 1.595e-02, 1.379e-03, 7.593e-02, -2.542e-02, 9.389e-03, -5.855e-03)); + r += mul(s5_7, M4(-2.313e-01, 1.062e-01, -3.426e-02, 1.271e-01, 3.036e-01, -9.294e-02, 2.472e-02, -5.963e-02, 6.419e-02, -3.441e-02, 3.463e-02, -4.723e-03, -4.168e-02, 2.209e-02, -1.402e-03, 4.069e-02)); + r += mul(s5_8, M4(2.594e-02, -4.756e-03, 1.606e-04, -4.234e-02, 4.783e-02, 1.375e-02, 4.254e-03, 5.236e-02, -3.643e-02, -2.451e-02, 3.766e-02, -2.563e-02, 7.765e-03, -2.148e-02, 2.163e-02, 2.688e-03)); + r += mul(s6_0, M4(-5.101e-02, 3.157e-02, -3.214e-02, -4.207e-02, 1.297e-01, 1.005e-01, 6.350e-02, 4.027e-02, 5.571e-03, -3.522e-02, 2.588e-02, -2.043e-02, -1.711e-01, -2.976e-02, -5.791e-02, 2.916e-02)); + r += mul(s6_1, M4(-2.868e-02, 1.625e-01, 2.508e-02, 8.476e-02, -2.056e-01, 3.901e-02, -1.984e-01, 7.514e-02, -1.486e-01, 2.581e-03, -1.576e-02, 1.384e-01, -1.174e-01, 4.931e-03, 5.469e-03, 7.396e-02)); + r += mul(s6_2, M4(-6.032e-02, 1.680e-02, 7.920e-02, 5.261e-02, -4.495e-02, -5.333e-03, 1.461e-01, 3.072e-02, 5.915e-02, 2.529e-02, 2.345e-02, 3.672e-02, 4.769e-02, 2.155e-02, 4.651e-02, -4.791e-02)); + r += mul(s6_3, M4(8.897e-02, 1.405e-01, -4.785e-02, 9.822e-02, 2.122e-01, -2.388e-03, -7.205e-02, 1.454e-02, 2.498e-01, 5.825e-02, -1.677e-02, 1.144e-01, 2.749e-02, -3.975e-02, 2.507e-03, -4.595e-02)); + r += mul(s6_4, M4(-3.615e-02, 1.473e-01, -1.444e-02, 3.093e-02, 1.880e-01, -1.874e-01, 3.967e-01, -3.327e-01, 6.655e-03, -5.960e-02, 1.457e-02, -8.659e-02, 3.706e-02, 1.732e-01, 5.386e-02, -2.847e-03)); + r += mul(s6_5, M4(-6.235e-02, -1.365e-03, 3.703e-02, 1.494e-02, 8.596e-03, -3.396e-02, -5.405e-02, -2.135e-01, 3.474e-02, 6.117e-02, -1.904e-03, -2.267e-02, 1.194e-01, 1.011e-02, 1.022e-01, -2.839e-02)); + r += mul(s6_6, M4(-2.210e-02, 3.452e-02, 2.616e-02, 5.119e-02, 4.545e-02, 2.557e-02, 1.251e-02, -2.704e-02, 9.683e-02, 1.015e-02, -3.893e-02, -2.042e-02, 7.684e-02, 1.147e-03, -2.620e-02, 2.036e-02)); + r += mul(s6_7, M4(-1.416e-02, -9.191e-02, 7.322e-02, -6.055e-02, -6.098e-02, 1.048e-01, -8.375e-02, 1.151e-02, 8.122e-02, -2.470e-02, 3.802e-02, 8.203e-03, -1.134e-01, 3.271e-02, 1.489e-01, 1.718e-03)); + r += mul(s6_8, M4(4.236e-02, -1.279e-02, -1.218e-02, -1.721e-03, -5.267e-02, 6.120e-02, -6.863e-03, -2.339e-02, 6.142e-03, 1.334e-02, 3.840e-02, -3.469e-03, 2.074e-02, -9.623e-02, 1.174e-01, -7.104e-03)); + r += mul(s7_0, M4(2.177e-02, -2.506e-02, -2.084e-03, -5.860e-02, 7.460e-02, 5.126e-02, 6.503e-03, -1.016e-02, -1.348e-01, 2.695e-02, 8.662e-02, -3.217e-02, -9.947e-02, 1.355e-02, -3.887e-02, 3.671e-02)); + r += mul(s7_1, M4(1.094e-01, 4.306e-02, 1.773e-01, 3.608e-03, -5.451e-02, 6.465e-02, 9.147e-02, 2.774e-03, 2.776e-03, 4.393e-02, -2.924e-02, 1.923e-02, -4.374e-02, -3.220e-02, -6.576e-02, -3.276e-02)); + r += mul(s7_2, M4(-4.620e-02, 2.048e-02, 3.234e-03, 5.520e-02, -8.348e-02, -3.598e-02, -8.111e-02, -5.441e-02, -6.498e-03, -2.542e-02, -3.379e-02, 2.636e-02, -8.450e-03, 2.900e-02, -4.491e-02, -1.654e-02)); + r += mul(s7_3, M4(8.686e-02, 1.716e-01, 4.222e-03, 9.780e-03, -4.509e-02, 3.392e-02, 1.422e-02, -6.869e-02, -1.595e-01, 1.251e-02, 2.913e-02, 1.104e-01, -1.547e-01, 1.027e-02, -3.729e-02, -4.566e-02)); + r += mul(s7_4, M4(1.953e-01, 2.315e-01, 1.299e-01, -1.347e-01, -4.060e-02, -7.278e-03, 3.684e-02, -6.666e-02, -1.027e-01, -5.207e-02, -2.175e-01, 1.131e-01, -6.486e-02, 8.321e-02, -2.594e-01, 1.169e-01)); + r += mul(s7_5, M4(-3.169e-02, 3.697e-03, -6.472e-02, 2.523e-02, 9.826e-03, 1.689e-02, 6.241e-02, -5.949e-04, 2.145e-02, 5.707e-02, 8.911e-02, -1.135e-02, 4.783e-02, 1.337e-02, 3.065e-02, 5.884e-02)); + r += mul(s7_6, M4(-3.587e-03, -5.693e-02, -1.929e-02, 3.601e-02, -1.485e-02, -3.759e-03, -8.937e-03, -9.948e-03, -6.001e-02, -4.211e-02, 1.906e-02, -3.279e-02, 2.514e-02, -2.525e-02, -3.893e-02, 1.116e-02)); + r += mul(s7_7, M4(8.653e-02, 7.210e-02, 1.310e-02, -4.096e-02, -4.513e-04, -1.231e-02, 3.749e-02, -3.147e-02, 6.759e-02, -3.613e-02, 7.838e-02, -5.501e-02, 9.405e-04, -3.234e-02, 1.654e-02, -6.506e-03)); + r += mul(s7_8, M4(-6.108e-02, 1.114e-02, -2.635e-03, 1.568e-02, 3.447e-02, -1.793e-02, -1.050e-02, -3.319e-02, -2.087e-02, -1.826e-02, 1.520e-02, -2.765e-02, -2.238e-02, -3.305e-02, 7.283e-03, -1.178e-02)); + r += V4(-3.774e-03, 1.083e-02, -6.759e-03, -9.530e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.119e-02, 1.358e-02, 1.799e-02, -4.454e-02, 2.054e-02, -5.854e-03, 1.624e-02, 6.525e-02, 5.159e-03, -3.186e-03, 3.916e-02, 1.704e-02, 5.611e-02, -1.617e-02, -1.256e-02, 7.283e-02)); + r += mul(s0_1, M4(1.160e-01, 3.042e-02, -1.713e-03, 9.930e-03, 9.846e-03, 1.950e-02, -2.891e-02, -6.443e-02, -9.151e-02, -7.299e-02, -6.384e-02, 4.439e-02, 2.638e-02, 6.729e-02, 4.567e-02, -7.430e-02)); + r += mul(s0_2, M4(-1.383e-02, 5.194e-02, -2.212e-02, -3.194e-02, 8.244e-03, -7.213e-03, 2.814e-02, 1.385e-02, 3.453e-02, 1.509e-03, 1.318e-02, -1.627e-02, -1.133e-02, 4.094e-02, 2.866e-02, -3.095e-02)); + r += mul(s0_3, M4(6.626e-03, 1.096e-01, 8.185e-02, -7.438e-02, 2.120e-02, 4.990e-02, -2.814e-02, 4.095e-02, 6.020e-02, -5.083e-02, -3.477e-02, 4.169e-02, 3.500e-03, -1.066e-01, 2.729e-02, -2.682e-02)); + r += mul(s0_4, M4(5.029e-02, 5.265e-02, 1.227e-01, -5.728e-02, -8.232e-02, 4.752e-02, 1.365e-02, -7.925e-02, -1.746e-01, -7.669e-02, 2.813e-02, -2.357e-01, 5.048e-02, 8.521e-02, -3.115e-02, 1.222e-01)); + r += mul(s0_5, M4(5.230e-03, 8.784e-03, -8.968e-02, -3.714e-03, 1.858e-02, 1.223e-02, -3.019e-02, 1.194e-01, -1.540e-03, 5.142e-03, -3.950e-02, 2.637e-02, -1.422e-02, 2.503e-02, 5.833e-02, -7.592e-02)); + r += mul(s0_6, M4(2.645e-02, 4.579e-02, 5.154e-02, -6.487e-02, 2.590e-02, -4.852e-02, -2.230e-02, 2.822e-02, -8.089e-03, 4.508e-02, -1.007e-03, 1.641e-02, -8.618e-03, -6.170e-02, 5.916e-03, 1.807e-02)); + r += mul(s0_7, M4(-1.205e-02, 1.208e-02, 1.299e-02, 1.292e-02, 2.139e-02, -4.657e-03, 5.206e-02, -6.348e-02, 5.026e-02, -1.533e-03, 1.388e-02, 3.909e-02, 3.115e-02, 2.324e-02, 1.077e-02, -4.992e-02)); + r += mul(s0_8, M4(-3.625e-02, -2.060e-02, -3.559e-02, -2.658e-02, -7.602e-02, -1.888e-02, 1.993e-02, -6.798e-03, -4.016e-02, -1.825e-02, -2.749e-03, -2.123e-02, -4.565e-02, 7.279e-02, 5.850e-02, -1.219e-02)); + r += mul(s1_0, M4(-4.673e-02, 8.304e-03, 1.845e-02, 3.609e-02, 6.619e-02, -4.322e-02, 2.001e-02, 6.990e-02, 4.898e-02, -2.925e-02, 3.682e-02, 5.622e-02, 5.427e-02, -1.565e-02, 6.430e-02, 2.369e-02)); + r += mul(s1_1, M4(8.584e-02, 9.906e-03, 3.857e-02, -1.294e-01, 1.388e-02, -1.809e-02, -3.039e-02, 6.939e-02, -5.358e-02, -3.650e-03, 5.765e-02, -3.854e-02, -2.511e-03, -7.474e-02, 1.191e-02, 6.923e-03)); + r += mul(s1_2, M4(-7.259e-02, 5.779e-02, -7.958e-04, 1.217e-02, -3.072e-02, 1.986e-02, 1.220e-01, -1.087e-01, 4.577e-02, -2.321e-02, 1.628e-03, 1.921e-02, -4.586e-02, 1.968e-02, 1.483e-03, 1.715e-02)); + r += mul(s1_3, M4(-3.292e-03, 1.909e-01, -3.450e-02, 1.198e-01, 4.543e-02, -1.122e-01, 4.538e-02, 6.129e-02, -1.421e-03, 1.433e-03, -8.913e-02, 3.648e-02, -2.991e-02, -2.841e-03, -8.902e-02, -3.131e-03)); + r += mul(s1_4, M4(9.129e-02, 1.292e-03, 3.234e-02, -1.102e-01, -1.538e-01, 6.513e-02, -1.428e-01, 2.280e-01, -1.913e-01, -1.113e-01, -1.587e-01, -6.260e-02, -2.673e-02, 2.310e-01, 1.841e-01, 2.302e-02)); + r += mul(s1_5, M4(7.514e-02, -7.496e-02, -1.185e-01, 6.727e-02, 1.283e-02, -1.325e-02, 4.783e-02, 7.133e-02, -1.067e-01, -3.173e-02, 1.006e-01, -5.277e-02, -5.772e-02, 4.918e-02, 1.965e-02, 4.883e-03)); + r += mul(s1_6, M4(1.239e-01, -2.347e-02, 3.807e-04, 2.581e-03, 3.535e-02, -1.283e-01, -3.376e-02, 7.976e-02, 8.961e-02, -5.067e-02, 2.711e-02, -7.218e-03, 2.020e-02, -3.719e-02, -6.588e-03, 6.166e-03)); + r += mul(s1_7, M4(-6.321e-03, 3.338e-02, 3.880e-03, -5.857e-02, -3.765e-02, -1.477e-02, -7.070e-02, -3.418e-02, -1.490e-01, -1.317e-01, -1.427e-02, -8.478e-02, 3.837e-02, 5.323e-02, -3.820e-02, -7.255e-02)); + r += mul(s1_8, M4(-5.847e-02, 8.692e-03, -1.138e-01, -1.477e-02, 5.279e-03, 1.000e-02, 2.666e-02, -6.471e-02, -8.655e-02, -2.361e-02, 6.719e-02, -5.628e-03, -7.243e-02, -1.572e-02, -1.048e-02, 3.315e-02)); + r += mul(s2_0, M4(-2.288e-02, 4.273e-02, -5.397e-03, -1.220e-02, -1.201e-02, -2.187e-02, -3.532e-02, -4.551e-02, 9.285e-03, 3.374e-02, 1.875e-02, -8.249e-03, -2.919e-03, -3.004e-02, -1.433e-02, 2.247e-03)); + r += mul(s2_1, M4(4.909e-03, -3.626e-03, -8.456e-02, -1.639e-02, 2.864e-02, -6.078e-02, -1.204e-02, 3.552e-02, 5.636e-02, 5.088e-02, -6.840e-02, 5.427e-02, -4.091e-02, -5.403e-02, -1.069e-02, -6.340e-02)); + r += mul(s2_2, M4(-6.592e-02, -2.740e-03, -3.038e-02, 3.380e-02, 1.451e-02, 3.328e-02, 1.135e-02, 5.250e-02, 3.590e-02, 5.332e-02, 4.767e-02, -1.844e-02, -1.593e-02, -1.534e-02, 8.423e-03, 9.414e-03)); + r += mul(s2_3, M4(2.674e-02, -6.853e-02, 1.209e-02, 5.819e-03, 3.120e-02, -3.808e-02, 7.106e-02, 9.283e-02, 5.854e-02, -1.697e-02, 2.859e-03, -7.568e-03, -9.800e-03, -3.899e-02, -2.672e-03, 4.075e-02)); + r += mul(s2_4, M4(-4.709e-02, -2.890e-02, 2.291e-02, 2.118e-02, -1.154e-01, -1.106e-01, -5.390e-02, 4.718e-02, -3.647e-03, 2.600e-02, 7.251e-02, 5.167e-04, -4.533e-02, 1.011e-01, 3.346e-03, -1.104e-03)); + r += mul(s2_5, M4(2.589e-03, -1.781e-03, 6.664e-02, -1.719e-02, -3.848e-02, 7.312e-02, 9.325e-02, -1.775e-02, 6.812e-02, -1.021e-02, -7.496e-02, 7.818e-02, 8.188e-02, 1.107e-01, -1.334e-02, -6.705e-03)); + r += mul(s2_6, M4(3.958e-02, -1.072e-02, 1.728e-02, 1.769e-02, 3.835e-02, -1.062e-01, 1.055e-01, -2.397e-02, 6.102e-02, 7.622e-03, 6.822e-03, -1.394e-02, 6.069e-04, -2.514e-02, 4.367e-03, 1.006e-02)); + r += mul(s2_7, M4(-4.576e-02, -3.503e-02, -2.550e-02, 5.796e-02, -1.616e-01, -1.555e-01, -4.845e-02, 1.949e-01, 4.617e-03, 4.358e-02, 7.745e-02, -4.498e-02, 4.802e-02, 6.770e-02, -1.855e-02, 9.254e-03)); + r += mul(s2_8, M4(1.620e-02, -9.293e-03, -1.498e-02, -6.009e-02, -4.502e-02, -3.743e-02, -2.937e-04, 1.492e-02, -1.969e-02, -1.425e-02, -2.911e-02, 4.641e-03, -2.503e-02, 5.544e-02, 1.834e-04, -3.613e-02)); + r += mul(s3_0, M4(3.928e-02, 2.935e-02, -2.501e-03, 1.496e-02, 4.948e-03, -8.874e-03, -9.731e-03, -1.796e-02, 2.398e-02, -3.080e-02, 9.156e-03, -2.164e-03, -5.478e-02, -1.927e-02, -3.898e-02, -9.236e-03)); + r += mul(s3_1, M4(2.248e-01, 3.951e-02, 6.371e-02, 7.861e-02, 3.972e-02, 4.355e-02, 5.376e-02, -4.597e-02, 1.764e-03, -1.735e-02, 2.192e-03, -6.358e-02, 5.442e-02, 4.629e-02, 5.818e-02, 1.219e-01)); + r += mul(s3_2, M4(5.976e-02, -3.294e-02, -8.479e-02, -7.148e-02, -1.224e-02, 2.466e-02, -2.088e-02, 1.072e-02, 4.963e-02, 3.583e-02, 5.843e-02, 1.725e-02, -2.510e-02, -9.845e-02, 4.370e-02, -1.114e-01)); + r += mul(s3_3, M4(-4.351e-02, 2.608e-02, -5.137e-02, 1.292e-02, -6.503e-02, 1.173e-01, 4.085e-02, -1.101e-01, -9.750e-03, -6.098e-03, 2.314e-02, 1.507e-02, -3.360e-02, -9.189e-02, -2.685e-02, -2.520e-02)); + r += mul(s3_4, M4(1.052e-01, 3.103e-02, -2.944e-02, 1.189e-01, -6.053e-02, 3.701e-02, 6.101e-02, -1.819e-02, -6.428e-02, 1.806e-02, 8.849e-02, 4.567e-02, 2.044e-01, 3.910e-01, 2.417e-01, 2.521e-01)); + r += mul(s3_5, M4(-4.004e-02, 3.818e-02, 5.069e-02, -1.849e-02, 1.759e-02, 4.760e-02, 1.503e-02, -1.385e-02, 1.787e-02, -3.867e-03, -6.038e-03, 8.384e-02, 4.322e-01, 2.483e-02, -8.805e-02, 4.945e-02)); + r += mul(s3_6, M4(-6.358e-03, 2.774e-02, 1.409e-02, -1.912e-02, -2.601e-02, 9.155e-02, -9.666e-03, -5.030e-02, 1.351e-02, -6.274e-04, -2.323e-03, -1.666e-02, -7.202e-02, 1.367e-01, -1.121e-02, -2.320e-02)); + r += mul(s3_7, M4(-3.894e-02, -4.333e-02, -2.975e-02, 1.759e-02, -7.628e-02, 5.628e-02, -2.755e-02, -1.604e-02, 1.228e-02, 5.447e-03, 1.208e-02, -1.111e-02, 1.015e-01, -2.272e-02, -1.026e-02, 2.289e-01)); + r += mul(s3_8, M4(1.033e-02, -2.939e-02, 9.221e-03, -4.853e-03, -4.509e-02, 3.057e-02, 5.076e-04, 2.493e-02, 1.218e-02, 1.955e-03, 2.909e-02, 1.197e-02, -7.722e-02, 9.091e-03, -1.093e-01, 1.004e-02)); + r += mul(s4_0, M4(2.847e-02, 1.798e-02, 2.402e-02, -1.499e-02, -4.161e-02, 2.984e-02, -1.717e-02, -1.074e-01, 8.853e-02, -2.182e-02, 2.203e-02, -1.270e-02, 5.534e-04, 6.492e-02, -1.116e-01, -4.453e-02)); + r += mul(s4_1, M4(-3.555e-03, -4.297e-02, -3.233e-02, 5.099e-02, 1.639e-02, -4.216e-03, 2.458e-02, -3.030e-02, -7.720e-02, 1.163e-02, 5.789e-02, -1.570e-01, 1.173e-01, -5.750e-02, -1.676e-01, 5.915e-02)); + r += mul(s4_2, M4(3.692e-02, -1.966e-03, 2.417e-02, 7.655e-03, -1.724e-02, -2.010e-02, -9.557e-02, 5.027e-02, -2.674e-02, -4.045e-02, -3.849e-02, 1.158e-03, -1.059e-01, -1.222e-01, -1.585e-01, 3.536e-02)); + r += mul(s4_3, M4(-4.414e-02, 4.738e-02, 1.238e-02, -3.550e-02, -6.015e-02, 6.606e-02, 3.811e-02, 1.630e-02, 7.631e-02, -9.726e-02, 3.757e-04, 5.798e-02, -1.588e-01, 8.633e-02, -2.406e-01, -1.821e-01)); + r += mul(s4_4, M4(-1.407e-01, 3.247e-03, -1.042e-01, -1.164e-01, 1.234e-01, 2.662e-03, 3.201e-02, 1.218e-01, 1.021e-01, 3.044e-02, 1.459e-01, -9.150e-02, 3.452e-01, 2.593e-02, -4.509e-01, 1.172e-01)); + r += mul(s4_5, M4(-1.300e-02, 7.091e-03, -9.407e-03, -1.165e-01, 3.124e-02, 1.197e-02, -1.064e-02, -5.295e-02, -4.681e-02, 4.255e-02, -3.933e-02, 2.827e-02, 1.341e-01, -5.583e-02, -6.000e-02, 5.605e-02)); + r += mul(s4_6, M4(-4.452e-02, 2.418e-02, 1.215e-02, -3.909e-02, -3.841e-03, 2.902e-02, -1.198e-03, -2.443e-02, 4.346e-02, -6.597e-03, -3.284e-02, 5.183e-02, -1.531e-01, -5.310e-02, -6.984e-02, -1.613e-01)); + r += mul(s4_7, M4(-1.110e-01, 1.335e-01, -3.714e-02, 4.381e-02, -5.944e-02, -4.948e-02, -2.352e-04, -7.582e-02, 7.761e-02, 1.434e-02, 7.149e-03, -2.226e-02, -1.007e-01, -6.605e-02, -4.902e-02, 4.838e-02)); + r += mul(s4_8, M4(2.891e-02, -2.002e-02, 3.372e-02, -9.957e-03, 5.108e-02, -1.830e-02, 1.883e-02, -2.309e-04, 1.934e-02, -1.329e-02, -1.824e-02, 3.953e-02, -1.713e-03, -3.680e-02, -2.755e-02, -2.758e-02)); + r += mul(s5_0, M4(-1.945e-02, 4.566e-02, 2.691e-02, -1.237e-02, -9.059e-02, -5.569e-02, -1.480e-01, -7.409e-02, 1.380e-01, -4.678e-02, 9.957e-02, 3.396e-02, 1.657e-02, 4.533e-02, -4.204e-03, 4.965e-03)); + r += mul(s5_1, M4(-1.747e-02, -2.485e-02, -3.850e-02, 5.838e-02, -6.083e-02, -1.110e-02, -3.806e-02, 9.352e-02, -7.105e-02, 3.170e-02, -3.524e-02, -1.404e-01, 5.260e-02, 4.554e-02, 4.306e-02, -2.121e-02)); + r += mul(s5_2, M4(2.305e-02, -5.438e-03, -3.145e-03, -5.896e-02, -1.098e-01, 5.827e-03, -3.090e-02, -4.094e-02, 2.971e-02, -3.924e-03, 1.197e-02, -8.427e-03, -4.808e-02, -3.403e-02, -3.744e-03, -5.660e-02)); + r += mul(s5_3, M4(7.114e-02, -1.410e-01, 4.887e-02, 9.254e-02, -1.004e-03, -8.354e-03, 1.798e-01, 3.584e-02, 2.084e-01, 3.576e-02, -3.979e-03, 1.487e-01, -2.453e-02, 8.486e-02, -4.554e-02, -2.278e-02)); + r += mul(s5_4, M4(3.145e-02, 2.088e-01, 2.877e-01, -2.699e-02, -2.542e-02, -3.369e-01, -3.053e-01, 1.283e-01, 5.013e-02, -7.967e-04, 3.206e-01, -3.933e-01, 2.370e-02, -5.881e-02, -1.360e-01, 7.423e-02)); + r += mul(s5_5, M4(5.378e-02, 8.026e-03, 1.786e-01, -1.762e-01, -1.796e-02, 5.854e-02, -6.146e-02, 7.364e-02, 3.034e-02, 2.706e-03, 5.823e-02, -5.505e-02, 3.285e-02, -1.949e-02, -2.971e-02, 1.573e-02)); + r += mul(s5_6, M4(3.967e-02, -8.323e-02, -1.240e-02, 4.045e-03, -4.268e-02, -2.509e-02, 1.613e-02, 1.509e-02, 2.492e-02, -1.008e-02, -1.946e-02, 6.058e-03, -3.131e-02, -8.883e-03, -4.384e-03, -3.910e-02)); + r += mul(s5_7, M4(-5.820e-02, 1.896e-01, -3.927e-02, 1.346e-02, -5.433e-02, -1.373e-01, 6.063e-02, -8.771e-02, 9.632e-02, 4.942e-02, 3.403e-02, 4.189e-03, -1.560e-02, 4.081e-02, 1.924e-02, 3.389e-02)); + r += mul(s5_8, M4(8.582e-02, 1.073e-03, 6.109e-02, -3.919e-02, -1.008e-01, 4.365e-03, -7.030e-02, 7.613e-03, 7.648e-03, 5.992e-03, 2.352e-02, -3.063e-05, 9.644e-04, -8.910e-03, 7.871e-03, -2.945e-02)); + r += mul(s6_0, M4(-6.219e-02, 9.462e-03, -5.277e-02, -4.953e-02, 1.254e-03, 6.616e-02, -4.846e-02, 6.083e-02, 5.975e-03, -2.187e-02, -1.862e-01, -2.549e-02, 2.145e-02, 1.917e-02, -2.752e-02, 1.375e-02)); + r += mul(s6_1, M4(-1.423e-03, 3.281e-02, 3.788e-02, 1.870e-02, 1.062e-01, 5.928e-02, -2.608e-01, 4.040e-01, 6.632e-02, 2.468e-02, -4.309e-02, -3.497e-03, -2.710e-03, 7.529e-02, 7.273e-02, 3.638e-02)); + r += mul(s6_2, M4(4.697e-02, 8.655e-03, -1.530e-02, 8.707e-03, -5.490e-02, 3.734e-03, 4.887e-02, -2.584e-01, -2.511e-02, -2.146e-02, -7.291e-02, -8.669e-03, -5.014e-02, -4.938e-02, -3.849e-02, 9.120e-03)); + r += mul(s6_3, M4(-9.087e-02, 2.014e-02, -5.573e-02, -6.906e-02, 1.306e-01, 1.068e-02, -2.333e-02, 3.826e-02, 4.739e-02, -5.959e-02, 1.413e-01, -1.084e-02, 1.212e-02, 2.388e-02, 1.633e-03, -7.772e-02)); + r += mul(s6_4, M4(-5.059e-02, -5.444e-02, -1.575e-01, 6.070e-02, -1.403e-01, -8.611e-02, 3.665e-01, -5.033e-01, -7.558e-02, 1.507e-01, 2.363e-01, -2.066e-01, -2.612e-02, 9.084e-02, 1.593e-01, -4.946e-02)); + r += mul(s6_5, M4(6.337e-02, -5.715e-02, -1.101e-02, 3.131e-02, 2.204e-01, -1.842e-02, -4.902e-02, -4.250e-02, 1.490e-02, -4.777e-02, -6.084e-02, 2.862e-02, -4.229e-03, -2.731e-02, -1.090e-01, -9.158e-02)); + r += mul(s6_6, M4(-4.239e-02, 6.636e-02, -5.269e-03, -5.080e-02, -2.288e-02, 1.264e-01, 1.378e-02, -1.793e-02, -3.185e-02, -2.347e-02, -4.334e-02, 7.789e-03, -2.840e-02, -7.845e-02, -4.260e-02, -3.045e-02)); + r += mul(s6_7, M4(-1.550e-02, 2.201e-02, 2.482e-02, -4.016e-03, 6.373e-02, -9.692e-02, 1.513e-02, 2.924e-02, 5.650e-02, 3.793e-02, 2.864e-02, -9.912e-03, -1.011e-02, 1.524e-01, -2.573e-02, -1.628e-02)); + r += mul(s6_8, M4(8.834e-03, -3.029e-02, -4.205e-02, -1.419e-02, 3.865e-02, -2.833e-02, -2.787e-02, 4.741e-04, 1.460e-02, -2.973e-03, -6.640e-04, 4.523e-03, -4.376e-03, 3.053e-02, -1.818e-02, -1.190e-01)); + r += mul(s7_0, M4(-8.747e-02, 6.773e-02, -8.260e-02, -1.418e-01, 2.004e-03, -3.556e-02, 1.986e-02, -3.789e-02, -1.301e-02, -7.598e-02, -8.963e-02, -2.681e-02, 4.820e-02, 2.518e-02, 3.622e-02, 1.282e-02)); + r += mul(s7_1, M4(5.156e-02, -1.921e-01, -1.351e-01, 1.500e-01, 2.851e-02, -2.615e-02, -5.104e-02, -1.337e-02, -4.990e-02, 6.353e-02, 1.466e-02, -8.082e-03, -2.539e-02, 5.501e-02, 6.142e-02, -8.426e-03)); + r += mul(s7_2, M4(2.623e-02, -2.479e-02, 1.455e-02, -4.765e-02, 3.307e-02, -7.951e-03, -1.461e-02, 2.652e-02, -3.376e-02, 7.495e-05, -8.081e-02, -3.611e-02, -2.016e-02, -3.285e-02, -3.195e-02, -1.305e-02)); + r += mul(s7_3, M4(6.073e-02, -6.791e-02, 3.322e-02, 6.663e-02, 1.369e-02, 3.348e-02, 5.260e-02, -1.204e-02, -3.278e-02, -1.580e-02, -1.213e-02, -4.230e-02, 1.845e-02, -7.809e-03, 2.595e-02, 4.624e-02)); + r += mul(s7_4, M4(-1.441e-01, -2.309e-02, 1.171e-01, -2.103e-01, 5.903e-02, -5.578e-02, 5.480e-03, 2.120e-02, 1.271e-02, 3.046e-02, 9.099e-02, 7.909e-03, -1.087e-01, -1.353e-02, 1.480e-01, -9.692e-02)); + r += mul(s7_5, M4(-2.112e-02, 1.730e-02, -1.068e-01, 2.903e-02, 6.340e-03, 1.157e-02, 5.409e-02, -6.964e-02, 1.177e-01, -6.366e-02, -3.753e-02, 5.627e-02, 4.602e-03, -5.148e-03, -2.803e-02, -3.905e-02)); + r += mul(s7_6, M4(4.832e-02, 7.096e-02, 2.105e-02, 2.247e-02, 2.229e-02, -3.705e-02, -5.896e-03, -1.092e-02, -3.567e-02, -2.236e-02, -1.916e-02, -3.311e-02, 9.405e-03, -3.580e-02, -2.565e-02, 2.150e-02)); + r += mul(s7_7, M4(-4.603e-02, 1.223e-02, -4.120e-02, 3.812e-02, 5.082e-02, -5.348e-02, 3.159e-02, 2.287e-02, 2.515e-02, 9.725e-02, 6.245e-02, -2.659e-02, -3.666e-02, 1.681e-02, 1.776e-02, -3.267e-02)); + r += mul(s7_8, M4(4.920e-02, -2.549e-02, -4.974e-03, 4.321e-03, 2.267e-02, 2.283e-02, -5.713e-03, -3.793e-02, -1.503e-02, -2.335e-02, -1.049e-02, 3.006e-02, 1.663e-02, 2.374e-03, -1.261e-02, -8.854e-03)); + r += V4(1.240e-02, 8.097e-03, 8.971e-03, 1.472e-02); + return r; +} + +void Pass16(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 17 +//!DESC conv16 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.029e-02, 4.879e-04, -8.758e-03, 1.475e-04, -1.389e-02, 2.532e-02, 9.795e-03, 8.629e-03, -1.347e-02, 2.776e-02, -1.624e-02, -1.667e-02, -1.381e-02, -1.287e-03, -1.868e-01, -6.917e-02)); + r += mul(s0_1, M4(-9.130e-03, 8.736e-03, 2.281e-02, -2.990e-02, 1.027e-02, -8.698e-02, -2.193e-02, -6.665e-02, 2.959e-02, -6.816e-02, -7.055e-03, -3.538e-02, -1.003e-01, 1.809e-02, 3.144e-02, 6.967e-03)); + r += mul(s0_2, M4(-5.170e-03, 1.345e-03, -2.046e-02, 1.422e-02, 7.018e-03, 5.548e-02, 3.150e-02, 8.275e-02, -3.599e-02, 6.821e-02, 1.848e-02, -3.536e-02, 3.895e-02, -1.010e-01, -3.898e-02, -8.420e-02)); + r += mul(s0_3, M4(8.822e-03, -4.687e-03, -4.076e-02, 2.655e-02, -1.233e-01, -1.322e-02, -1.004e-01, -4.416e-02, -9.892e-03, 1.772e-02, -3.910e-02, 4.194e-02, -8.128e-02, 5.313e-02, 1.397e-01, -8.644e-02)); + r += mul(s0_4, M4(-1.556e-03, 8.777e-02, 3.236e-02, -1.485e-02, 8.951e-02, 1.694e-01, -3.848e-02, 8.485e-02, -7.786e-02, 1.043e-01, 8.126e-02, -4.799e-02, -3.285e-01, -1.213e-01, -1.267e-01, 1.159e-01)); + r += mul(s0_5, M4(2.269e-02, -4.562e-02, -2.481e-03, -4.575e-02, -3.557e-02, -9.145e-03, 2.451e-02, 6.366e-02, 4.814e-02, 4.098e-02, -1.829e-03, 1.207e-01, -2.781e-02, 1.333e-02, -5.533e-02, -7.421e-02)); + r += mul(s0_6, M4(3.073e-02, 1.740e-02, -4.553e-02, -2.311e-02, 2.805e-02, 4.819e-03, -3.860e-02, -7.901e-03, 3.804e-02, 4.899e-02, 4.114e-02, -5.725e-02, -8.765e-02, -1.678e-03, -4.249e-02, -2.247e-02)); + r += mul(s0_7, M4(4.033e-02, -6.968e-02, -1.117e-02, 2.634e-02, 4.042e-03, 2.353e-02, -6.108e-03, -9.028e-03, 2.724e-02, -5.808e-02, 6.985e-03, 2.448e-02, -1.314e-02, -2.960e-02, 8.396e-03, 3.892e-02)); + r += mul(s0_8, M4(-6.932e-03, 1.874e-02, -5.344e-03, 1.371e-03, -1.015e-02, 2.203e-02, -2.644e-03, -4.730e-04, 1.663e-03, -5.859e-02, 1.909e-03, -1.679e-02, -8.083e-02, -2.215e-02, 5.046e-05, -8.255e-02)); + r += mul(s1_0, M4(-9.475e-03, 1.204e-02, -2.413e-02, -9.667e-03, -2.342e-02, 6.405e-03, 5.050e-02, 9.364e-03, 1.852e-02, -1.010e-02, -2.013e-02, -2.629e-02, -1.507e-02, -3.515e-02, 5.260e-02, -1.952e-02)); + r += mul(s1_1, M4(4.171e-03, 1.215e-02, -7.334e-03, -6.108e-02, -3.149e-02, 8.647e-02, -4.531e-02, 3.942e-02, 3.959e-02, -1.430e-01, 5.422e-02, 1.595e-02, 5.384e-02, -3.547e-02, 2.321e-02, -1.340e-02)); + r += mul(s1_2, M4(-4.578e-03, -8.472e-03, -1.729e-02, 1.731e-02, -2.589e-02, -6.663e-02, -9.824e-03, -5.955e-02, -3.549e-04, 5.133e-02, 2.353e-02, 2.833e-02, -2.995e-03, -1.346e-01, -9.590e-03, -1.103e-01)); + r += mul(s1_3, M4(2.615e-02, -1.288e-02, 2.305e-02, -5.901e-03, 2.133e-03, 2.662e-02, -1.271e-02, -2.611e-02, -3.497e-02, 1.143e-02, -2.019e-01, 7.622e-02, 1.902e-03, -1.654e-02, -2.624e-02, -2.593e-02)); + r += mul(s1_4, M4(-2.800e-02, -5.191e-02, 5.445e-02, -9.295e-03, 7.110e-02, -1.563e-02, -1.133e-01, 5.510e-03, -1.650e-01, 3.389e-01, -6.446e-02, -1.019e-01, -6.448e-03, 5.044e-02, -2.881e-02, -6.928e-02)); + r += mul(s1_5, M4(3.185e-02, -9.858e-02, -1.038e-02, -1.009e-01, -4.432e-02, 5.652e-03, 1.895e-03, 5.691e-02, 1.375e-03, -7.187e-02, -2.144e-03, 7.523e-02, 5.999e-03, 4.358e-03, -3.024e-02, 2.206e-03)); + r += mul(s1_6, M4(-1.016e-02, -2.193e-02, -3.216e-03, -1.971e-02, -2.304e-02, -1.332e-02, -3.761e-02, 2.391e-02, -3.940e-02, 2.974e-02, 3.480e-02, -4.996e-02, 1.474e-02, 4.306e-02, -1.169e-02, -2.536e-02)); + r += mul(s1_7, M4(-4.764e-02, -5.189e-02, 1.263e-02, 6.666e-02, -4.749e-02, -2.246e-02, 5.629e-03, 6.944e-03, -4.536e-02, -1.074e-01, 5.213e-02, 6.953e-02, -5.418e-02, -1.421e-02, -6.040e-03, 2.238e-03)); + r += mul(s1_8, M4(-8.634e-04, -1.264e-03, -2.779e-03, 5.727e-03, -2.094e-02, -4.706e-03, 3.547e-04, -1.252e-02, -2.854e-03, -3.222e-02, 8.115e-03, -4.713e-02, -1.804e-02, 2.007e-03, 1.290e-02, 1.889e-02)); + r += mul(s2_0, M4(1.122e-02, -3.919e-02, -1.199e-02, -3.199e-02, 7.789e-03, -2.405e-04, -4.022e-03, 3.353e-02, -1.405e-01, 2.972e-02, 1.821e-01, 3.272e-02, -1.102e-02, 2.506e-02, 1.137e-01, 3.607e-02)); + r += mul(s2_1, M4(6.040e-03, -7.862e-02, -8.569e-02, 2.748e-02, 7.167e-03, -7.995e-02, 3.430e-02, -1.828e-02, 2.266e-01, 1.570e-01, 3.770e-03, -5.764e-02, 3.176e-03, 6.758e-02, 9.676e-02, -6.685e-04)); + r += mul(s2_2, M4(4.624e-03, -1.709e-01, -2.355e-02, -1.184e-01, 2.503e-02, 5.855e-02, 4.564e-03, 4.577e-02, 2.700e-02, -3.105e-02, 7.368e-03, -9.087e-03, -1.610e-02, -6.342e-02, 1.078e-02, -1.683e-01)); + r += mul(s2_3, M4(-1.038e-01, -5.620e-02, 5.057e-02, -1.136e-01, 4.072e-03, -2.350e-02, -1.162e-01, 5.121e-02, 6.767e-02, 3.072e-02, -7.594e-02, -2.475e-02, 3.785e-02, 9.335e-02, 1.050e-01, 2.552e-02)); + r += mul(s2_4, M4(1.875e-01, -3.006e-01, 1.249e-01, 2.122e-01, -8.097e-02, 8.200e-02, 4.680e-02, 1.175e-01, -2.058e-01, -9.523e-02, 6.787e-03, 2.453e-01, -1.272e-01, -2.239e-01, 2.698e-01, -3.082e-01)); + r += mul(s2_5, M4(-5.849e-02, 1.395e-01, -1.039e-01, 2.000e-02, -3.971e-02, -5.141e-03, -1.663e-02, -2.269e-02, -1.253e-03, -7.699e-02, -6.501e-03, 7.125e-04, -2.391e-03, -7.280e-02, 3.890e-02, 7.424e-02)); + r += mul(s2_6, M4(-9.839e-02, -4.239e-02, -1.770e-01, -4.000e-02, -1.275e-02, 1.437e-02, 3.002e-02, -3.845e-02, -6.443e-03, -7.469e-03, -1.396e-02, 3.598e-02, -4.157e-03, -1.014e-02, 5.567e-03, -7.055e-02)); + r += mul(s2_7, M4(4.702e-02, -5.766e-02, -3.152e-02, -2.920e-01, 9.183e-02, -5.891e-02, 2.027e-02, 1.401e-01, 6.066e-03, -2.295e-02, -1.850e-02, -9.314e-03, -2.626e-02, 2.560e-02, 1.554e-02, -9.582e-02)); + r += mul(s2_8, M4(-5.667e-02, 8.757e-03, -1.063e-02, -1.721e-02, 5.953e-02, -2.878e-02, -3.201e-02, 2.957e-02, -3.086e-05, -4.542e-03, -4.631e-03, -2.061e-02, 1.549e-02, 6.634e-03, 1.177e-02, -4.624e-02)); + r += mul(s3_0, M4(-3.817e-02, -6.516e-02, 6.351e-03, -4.393e-02, 2.210e-03, -8.994e-04, -1.707e-02, -2.814e-02, 3.967e-02, 4.164e-02, -2.078e-04, 5.480e-02, -4.200e-03, 2.909e-02, 3.827e-03, 5.186e-02)); + r += mul(s3_1, M4(-1.730e-02, 5.633e-02, -6.677e-03, -2.826e-02, 1.806e-02, 5.101e-02, 4.220e-02, 9.208e-02, -1.975e-02, -3.465e-02, -4.827e-02, -2.703e-02, 3.934e-03, -5.528e-02, 1.503e-02, -1.808e-02)); + r += mul(s3_2, M4(-2.771e-03, -2.343e-02, 2.906e-03, 2.482e-02, -5.288e-03, 7.238e-03, -3.635e-03, -2.292e-02, -2.141e-03, 7.963e-02, 2.594e-02, 9.728e-02, 3.985e-03, 8.611e-03, 2.922e-02, -5.558e-02)); + r += mul(s3_3, M4(-3.361e-02, -1.143e-02, 1.257e-03, -5.252e-02, -3.814e-02, -4.063e-02, 5.435e-02, -4.041e-02, -4.214e-02, 1.422e-02, -2.499e-03, -1.482e-02, 3.867e-02, 5.719e-02, -4.521e-03, 1.558e-01)); + r += mul(s3_4, M4(3.368e-03, 2.704e-02, -4.608e-03, -2.946e-02, -4.495e-02, 1.209e-03, 5.002e-02, 1.787e-01, 8.587e-03, -7.436e-03, -1.222e-02, 1.216e-03, -8.122e-02, -1.045e-01, 6.610e-03, -2.437e-01)); + r += mul(s3_5, M4(1.244e-02, -1.966e-02, 7.155e-02, -2.583e-02, -5.333e-02, 1.328e-02, 9.831e-03, -5.143e-02, 2.557e-02, -1.996e-02, 3.396e-03, 1.462e-02, -3.415e-02, -3.247e-02, -1.521e-03, -4.948e-02)); + r += mul(s3_6, M4(3.160e-02, 1.019e-02, -4.078e-02, -6.326e-02, -4.169e-02, -5.095e-02, 7.710e-03, -5.661e-02, 1.123e-02, -1.954e-02, -1.598e-02, -2.833e-02, -2.837e-02, 4.464e-02, 2.500e-02, -1.005e-02)); + r += mul(s3_7, M4(1.256e-02, -5.163e-02, -2.687e-02, 3.633e-02, 9.786e-03, -3.010e-02, 9.320e-03, 5.050e-02, 3.911e-02, -2.263e-02, 1.790e-02, 2.694e-02, -3.798e-02, 1.774e-02, -2.614e-02, -4.926e-02)); + r += mul(s3_8, M4(-2.943e-03, 6.765e-02, 1.541e-02, 4.533e-02, 6.659e-03, -6.111e-03, 3.756e-03, 5.907e-03, 2.580e-02, 5.163e-03, -1.929e-03, 2.221e-02, -1.588e-02, -1.152e-02, 1.484e-04, 3.090e-03)); + r += mul(s4_0, M4(2.188e-03, 1.233e-02, -6.039e-03, -3.271e-03, -4.075e-02, -1.018e-01, 5.750e-02, -4.206e-02, 2.208e-02, 4.427e-03, 4.322e-03, 2.824e-02, 2.442e-02, -1.590e-03, -5.812e-02, 2.286e-02)); + r += mul(s4_1, M4(1.203e-02, -2.723e-02, 2.792e-02, 4.277e-03, -1.371e-02, 1.807e-01, -7.397e-02, 5.375e-02, 6.859e-03, -1.108e-02, 6.324e-03, -2.214e-02, 1.751e-02, -1.223e-01, 2.908e-02, -3.164e-02)); + r += mul(s4_2, M4(-1.789e-02, 2.918e-02, -2.021e-02, 1.429e-02, -8.790e-02, -1.048e-01, 4.032e-03, -1.448e-01, 4.563e-03, 1.140e-02, 1.336e-02, -5.594e-03, -9.261e-03, 2.338e-02, 2.334e-03, 4.670e-02)); + r += mul(s4_3, M4(-3.749e-02, -3.339e-02, 7.810e-03, -2.643e-02, 1.792e-02, -2.154e-02, 1.367e-02, 2.375e-02, -2.629e-02, -1.692e-02, -4.533e-02, 8.193e-03, 9.931e-03, 2.730e-02, -2.520e-02, 2.837e-02)); + r += mul(s4_4, M4(9.309e-03, 8.926e-02, -6.059e-02, 6.958e-02, 1.646e-01, -1.409e-03, 6.206e-02, -1.003e-01, -8.349e-03, 2.249e-01, -1.141e-01, 6.166e-02, -1.402e-01, -2.770e-01, -1.820e-01, 7.931e-02)); + r += mul(s4_5, M4(2.799e-02, -6.673e-02, -8.643e-03, -1.263e-02, 4.470e-02, 3.160e-02, -4.880e-02, 4.270e-04, 2.690e-03, -4.527e-02, 2.218e-02, 2.813e-02, -1.331e-02, -4.080e-02, -8.969e-03, -1.897e-01)); + r += mul(s4_6, M4(1.957e-02, 5.533e-03, -2.370e-02, 3.365e-04, -2.176e-02, -1.949e-02, 6.947e-03, -2.445e-02, 8.425e-02, 6.763e-02, -5.106e-02, 2.826e-02, -6.571e-02, -1.743e-02, -3.430e-02, 5.619e-02)); + r += mul(s4_7, M4(-4.661e-03, -3.377e-02, -3.822e-02, 1.310e-02, -2.800e-02, -8.351e-04, -1.781e-02, 1.310e-02, 4.303e-02, -2.057e-02, -1.517e-02, -2.399e-02, -8.678e-02, 1.040e-01, 3.349e-03, -1.960e-02)); + r += mul(s4_8, M4(1.441e-03, 1.279e-02, 1.996e-03, 1.996e-02, -7.728e-03, -4.151e-03, 2.081e-04, -2.267e-02, -4.927e-03, 1.815e-02, -3.295e-02, 1.235e-02, -2.610e-02, -7.928e-02, -2.486e-02, 5.305e-02)); + r += mul(s5_0, M4(-3.050e-02, 5.988e-02, 4.850e-02, 2.003e-03, -1.797e-02, -3.545e-02, 6.963e-02, -2.332e-02, -1.274e-02, 5.089e-02, -5.318e-02, 3.672e-02, 1.798e-02, 1.617e-02, -1.356e-02, -2.160e-03)); + r += mul(s5_1, M4(-9.221e-03, -3.095e-02, -1.260e-01, -3.726e-02, -2.899e-02, -1.172e-01, -1.080e-02, -8.622e-02, 5.263e-03, 6.370e-04, 2.907e-02, 8.421e-02, 3.938e-02, -8.754e-04, -2.304e-02, 5.670e-02)); + r += mul(s5_2, M4(-7.475e-02, 2.010e-02, -1.580e-02, -9.556e-02, -3.775e-02, 3.063e-02, 2.983e-02, 3.424e-03, 1.927e-02, 6.518e-02, 2.239e-02, 8.391e-02, -5.363e-03, 4.095e-02, 1.163e-02, -6.192e-03)); + r += mul(s5_3, M4(5.238e-03, -1.321e-01, -3.892e-02, -1.501e-02, 2.361e-02, -3.405e-02, 4.850e-02, -4.595e-02, -7.598e-02, 6.938e-02, 1.223e-01, 2.470e-02, 3.387e-02, 3.784e-02, -7.823e-02, 8.717e-02)); + r += mul(s5_4, M4(-5.467e-03, 4.775e-02, -2.619e-01, 1.500e-02, 3.519e-01, 2.178e-02, 1.416e-02, 5.189e-02, 9.114e-02, 3.850e-01, 1.040e-01, 2.825e-01, -1.140e-01, 5.294e-02, 1.356e-02, 8.804e-02)); + r += mul(s5_5, M4(4.030e-02, -8.907e-02, 4.373e-02, -2.419e-01, 2.962e-02, 9.283e-02, -4.764e-02, -1.203e-01, -6.276e-02, -8.897e-02, 2.790e-02, 3.085e-02, 4.104e-02, -1.067e-01, -2.282e-02, 2.516e-03)); + r += mul(s5_6, M4(8.130e-02, 3.593e-02, -1.512e-02, -2.130e-02, 3.846e-04, 1.084e-02, -1.571e-02, -3.920e-02, -1.775e-02, 3.412e-03, -3.112e-03, 3.362e-02, 2.540e-02, 2.637e-02, 1.125e-02, 2.104e-02)); + r += mul(s5_7, M4(5.058e-02, -1.020e-01, -1.193e-01, 3.974e-02, -6.795e-02, 1.397e-02, 9.795e-03, -4.394e-02, -6.067e-04, -2.733e-02, 2.546e-02, 2.650e-02, 1.741e-02, -3.900e-02, -4.500e-02, -2.266e-02)); + r += mul(s5_8, M4(5.627e-02, -6.886e-02, -2.271e-02, 2.678e-02, -2.336e-02, -8.212e-03, 9.465e-03, 1.759e-02, 1.935e-02, 7.441e-02, -3.217e-02, 3.104e-02, -9.202e-03, -7.706e-03, -1.861e-02, -6.168e-02)); + r += mul(s6_0, M4(-2.576e-02, 1.129e-01, 2.133e-03, 2.901e-03, 1.055e-02, -2.322e-02, -1.345e-02, 1.667e-02, 1.038e-02, 4.466e-02, 1.365e-02, -1.159e-02, 4.424e-03, -4.547e-02, 1.407e-02, 5.112e-02)); + r += mul(s6_1, M4(-3.459e-02, -7.740e-02, 3.556e-02, -1.876e-02, 5.276e-02, 4.453e-02, 7.842e-03, -4.327e-02, -3.259e-02, -4.898e-02, -5.375e-02, -2.294e-02, -4.080e-02, 7.458e-02, 9.618e-03, -1.069e-02)); + r += mul(s6_2, M4(5.067e-02, 5.603e-03, -1.500e-02, 5.532e-02, -2.757e-02, -7.832e-02, -2.900e-02, 1.476e-02, -1.282e-02, 4.474e-02, -1.039e-02, 1.193e-02, 3.010e-03, -1.267e-02, 1.180e-02, -3.899e-02)); + r += mul(s6_3, M4(3.801e-02, -3.406e-03, -5.913e-02, 4.177e-03, 4.284e-02, 3.299e-02, -3.133e-02, 1.868e-02, 5.249e-02, -7.641e-02, -7.446e-02, 3.435e-02, -6.008e-03, 4.739e-02, 1.140e-01, 5.544e-02)); + r += mul(s6_4, M4(-5.225e-02, 1.877e-02, 1.099e-01, 8.499e-02, -6.215e-02, -1.398e-01, 7.367e-03, -1.408e-02, 6.409e-02, 1.008e-01, -2.400e-01, -8.376e-02, 3.692e-02, 3.405e-04, -1.053e-01, 1.195e-01)); + r += mul(s6_5, M4(2.643e-02, -1.569e-02, -1.466e-02, -2.440e-02, 3.143e-02, 8.849e-02, -3.169e-02, -3.565e-02, 1.911e-02, 4.959e-02, 9.990e-03, 7.206e-02, -6.857e-03, -3.100e-02, 1.367e-02, -2.076e-02)); + r += mul(s6_6, M4(-2.594e-03, 1.125e-02, -7.613e-02, 2.455e-02, -3.486e-02, -2.049e-02, -1.238e-02, 2.837e-02, -4.413e-02, -1.939e-02, 8.693e-02, 9.861e-03, 1.894e-02, 3.119e-02, 4.539e-02, 1.646e-02)); + r += mul(s6_7, M4(-4.224e-02, -2.552e-02, -1.879e-03, 5.658e-03, 3.094e-02, 5.435e-03, 3.240e-03, 2.593e-02, -9.090e-04, 7.022e-02, -1.395e-02, 2.027e-02, 3.540e-02, 2.128e-02, -2.508e-02, 1.386e-02)); + r += mul(s6_8, M4(-3.235e-02, -6.805e-03, 1.986e-02, -5.980e-02, -1.837e-02, 4.823e-02, -4.069e-02, 4.946e-02, -5.257e-02, -5.763e-02, 1.689e-02, -7.420e-02, -7.487e-03, 3.107e-02, 1.590e-02, 4.157e-03)); + r += mul(s7_0, M4(-4.054e-02, 8.705e-03, 1.678e-02, -2.195e-02, -3.439e-02, 2.712e-02, 2.326e-02, 8.939e-03, 2.003e-03, 2.375e-02, 2.251e-02, -7.720e-03, 2.541e-02, -3.579e-02, 2.579e-02, 3.495e-02)); + r += mul(s7_1, M4(-1.394e-03, 5.266e-02, 1.313e-02, -8.081e-02, 6.935e-02, 1.109e-01, 5.465e-02, -1.385e-02, -2.348e-02, -6.624e-02, -3.073e-03, 9.761e-03, 5.321e-03, -1.357e-02, -3.844e-02, -4.074e-02)); + r += mul(s7_2, M4(-2.816e-02, -8.956e-02, -3.648e-02, -1.188e-01, 1.016e-02, -8.859e-02, 1.397e-02, 6.185e-02, -2.495e-03, -3.623e-02, 9.019e-04, -5.670e-02, -1.218e-02, 4.075e-02, -7.870e-04, -2.862e-02)); + r += mul(s7_3, M4(7.425e-02, 2.433e-02, -2.468e-02, -1.552e-02, 1.614e-02, 3.148e-02, -4.567e-02, 4.686e-03, 1.311e-02, 6.546e-03, 1.486e-02, 8.595e-03, -4.130e-03, 5.457e-02, 1.922e-02, 1.460e-02)); + r += mul(s7_4, M4(-3.507e-02, -8.057e-02, -5.361e-02, 4.949e-02, -2.027e-03, -2.628e-01, 1.921e-02, 7.200e-02, -1.868e-02, 1.318e-02, -4.976e-02, 1.480e-02, -1.817e-02, -1.079e-01, -1.634e-01, -2.196e-01)); + r += mul(s7_5, M4(9.744e-03, 7.544e-02, -6.728e-03, -9.174e-02, -2.069e-02, 3.819e-02, 5.528e-02, 1.131e-01, 2.880e-03, 1.658e-03, 1.095e-02, 6.066e-03, 1.375e-02, -4.229e-02, 5.933e-03, 1.083e-01)); + r += mul(s7_6, M4(-4.652e-02, 6.894e-03, -3.310e-02, -3.747e-02, -1.242e-02, -2.874e-02, -4.627e-02, 2.411e-02, -2.237e-03, -1.494e-03, 3.698e-02, 1.460e-03, 6.772e-03, -3.464e-03, 4.233e-02, 1.243e-02)); + r += mul(s7_7, M4(3.051e-03, -5.852e-02, -2.634e-02, 7.790e-02, 3.050e-02, 4.398e-02, 7.121e-02, -7.453e-02, 1.137e-02, 7.497e-02, -1.589e-02, 2.040e-02, -2.012e-02, 1.422e-02, -5.347e-03, -7.258e-02)); + r += mul(s7_8, M4(-1.066e-02, -4.479e-02, -6.650e-03, -8.960e-02, -1.850e-02, 1.870e-02, 1.950e-02, 5.075e-02, -4.336e-02, -3.843e-02, 1.724e-04, -2.057e-02, -6.799e-04, -2.837e-02, 6.789e-03, 5.809e-03)); + r += V4(-2.027e-03, 2.169e-03, -4.476e-03, 9.853e-04); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.602e-02, 1.944e-02, 2.057e-02, 2.308e-02, -1.138e-03, -8.687e-02, -3.301e-02, -1.328e-02, -9.598e-03, 4.121e-02, -3.381e-02, 3.510e-02, -2.909e-02, -1.159e-01, -1.384e-02, 1.851e-02)); + r += mul(s0_1, M4(-7.722e-03, -1.367e-02, 4.892e-02, -8.374e-02, 1.866e-02, -3.208e-01, -2.756e-02, -1.617e-01, 2.663e-02, 5.331e-02, -1.181e-01, 2.712e-02, -1.752e-03, 8.796e-02, -2.881e-01, 7.150e-03)); + r += mul(s0_2, M4(6.638e-03, -2.737e-02, 7.612e-03, -3.036e-02, -3.045e-02, 4.160e-03, -4.504e-02, 1.067e-01, -1.567e-02, 7.296e-03, -2.313e-02, 4.067e-02, -3.013e-02, -6.589e-02, -3.004e-02, 1.242e-01)); + r += mul(s0_3, M4(4.358e-02, 8.719e-02, 1.000e-02, -9.082e-02, -5.349e-02, 1.008e-01, -4.839e-02, -6.958e-02, 4.046e-02, 4.825e-03, 7.882e-03, -5.125e-02, -1.245e-01, -8.734e-02, -3.235e-02, 3.103e-02)); + r += mul(s0_4, M4(-6.380e-03, 5.468e-01, 8.698e-03, 3.629e-01, -4.939e-02, 3.985e-01, 1.027e-02, 1.595e-01, 2.367e-02, -4.577e-03, 6.186e-02, 6.269e-02, 5.279e-01, -3.631e-02, -3.986e-02, 7.478e-02)); + r += mul(s0_5, M4(3.021e-02, -2.171e-02, 5.334e-03, -9.941e-02, 4.840e-02, 7.097e-02, -9.422e-02, 2.264e-02, 8.765e-02, -2.527e-02, 5.788e-02, -4.662e-02, -4.417e-02, 3.999e-02, 4.138e-02, -9.514e-02)); + r += mul(s0_6, M4(6.274e-02, 7.143e-02, -2.353e-02, -6.923e-03, 6.274e-02, 6.906e-04, 1.702e-02, 1.087e-02, 6.130e-02, 3.286e-02, 1.669e-02, 2.142e-02, -2.491e-02, 1.652e-02, 2.754e-03, 2.682e-03)); + r += mul(s0_7, M4(-3.790e-03, 7.876e-02, -2.640e-02, -6.914e-02, -3.984e-02, 4.054e-02, -7.473e-03, -6.250e-03, 1.066e-02, -5.058e-02, -1.415e-02, 1.820e-03, -1.421e-01, -3.648e-03, -7.857e-03, 5.938e-03)); + r += mul(s0_8, M4(1.763e-02, 1.438e-02, -4.884e-03, 1.350e-02, -1.027e-02, 2.633e-02, 1.813e-02, 1.996e-02, -4.629e-02, -4.395e-02, -1.242e-02, -6.677e-04, 5.819e-02, 3.354e-02, 8.380e-03, 4.855e-03)); + r += mul(s1_0, M4(-2.772e-02, -8.495e-04, 2.062e-02, -1.064e-02, -1.575e-02, 6.163e-02, 4.156e-02, -1.324e-02, -2.506e-02, 2.186e-03, -3.599e-02, 6.467e-02, -4.567e-02, -6.696e-02, 2.431e-02, -2.636e-02)); + r += mul(s1_1, M4(8.213e-03, -1.493e-01, 1.733e-02, -9.047e-02, 2.738e-02, 5.871e-02, -1.676e-02, 7.615e-02, -2.371e-02, 6.123e-02, -1.460e-01, 2.579e-01, 7.783e-02, 2.332e-02, -8.649e-02, -1.917e-01)); + r += mul(s1_2, M4(7.924e-03, -8.634e-02, 2.742e-03, -3.775e-02, 2.520e-02, 1.735e-02, -4.828e-02, 1.161e-03, -2.588e-02, -1.229e-02, -1.090e-02, -1.852e-02, -1.063e-02, 6.307e-02, -3.989e-03, 4.773e-02)); + r += mul(s1_3, M4(-2.502e-02, -1.060e-02, 3.879e-02, -7.326e-02, 1.117e-02, -7.896e-03, -4.144e-02, -5.220e-02, -9.789e-03, 1.152e-02, -6.842e-02, 1.956e-02, -2.119e-02, 1.520e-02, -8.919e-04, -1.018e-02)); + r += mul(s1_4, M4(-1.167e-01, 2.910e-01, -1.212e-01, 4.229e-01, 4.912e-02, -3.232e-03, -3.651e-02, 2.089e-02, -1.031e-01, -4.355e-01, 1.091e-01, 4.724e-01, 8.100e-02, 1.825e-02, 5.617e-03, -2.689e-03)); + r += mul(s1_5, M4(2.900e-02, -8.883e-02, -1.598e-02, -4.842e-02, 7.936e-03, -4.950e-02, -4.480e-02, 4.077e-02, 2.631e-02, -3.377e-02, 1.693e-02, -1.655e-01, -7.887e-02, -5.963e-02, -2.036e-02, -4.150e-02)); + r += mul(s1_6, M4(3.880e-02, 4.446e-02, 5.978e-03, 7.119e-03, 3.092e-02, -9.694e-03, -6.194e-04, -3.426e-03, -9.249e-03, -5.280e-02, 3.246e-03, 4.194e-02, -2.533e-04, -2.178e-03, -4.813e-04, 1.268e-04)); + r += mul(s1_7, M4(-1.088e-01, 8.445e-03, -2.497e-03, -4.470e-02, -1.973e-02, -3.454e-02, 1.519e-02, -1.348e-02, -5.420e-02, -1.657e-01, -5.679e-02, 6.335e-02, 2.859e-03, -1.316e-02, -1.629e-02, 3.222e-03)); + r += mul(s1_8, M4(-3.045e-02, -1.485e-02, -4.530e-03, 1.115e-02, 5.975e-02, 6.526e-02, 7.257e-03, -2.211e-02, -1.775e-02, -3.347e-02, -1.223e-02, -7.104e-03, 1.594e-03, 1.353e-02, 1.825e-02, -1.211e-02)); + r += mul(s2_0, M4(8.946e-03, -6.597e-02, -2.839e-02, -3.916e-03, 3.179e-02, -3.892e-02, -9.478e-05, -3.378e-02, -6.274e-02, 1.027e-01, 1.648e-02, -3.358e-02, -7.541e-03, 1.094e-01, -3.723e-02, 6.050e-03)); + r += mul(s2_1, M4(-2.421e-02, -7.951e-02, 8.080e-02, 2.107e-02, 2.139e-02, 8.789e-02, -9.062e-02, 1.134e-01, 2.512e-01, 2.997e-02, 1.545e-01, -3.472e-01, 4.823e-02, 1.155e-01, -1.848e-03, -1.546e-01)); + r += mul(s2_2, M4(-4.584e-02, -6.638e-02, -1.879e-02, 9.948e-02, -3.548e-02, 4.780e-03, 5.200e-04, -6.878e-05, 8.401e-02, 2.047e-01, -6.270e-03, -2.448e-02, 1.475e-02, -4.832e-02, -8.098e-02, -1.398e-01)); + r += mul(s2_3, M4(-1.042e-01, -4.714e-02, -3.402e-02, -4.442e-02, -3.004e-02, 1.026e-01, 1.361e-02, 7.693e-02, 1.192e-01, -3.831e-02, -1.074e-02, 1.415e-02, 2.575e-02, 5.908e-02, -4.298e-02, 6.081e-03)); + r += mul(s2_4, M4(2.861e-01, 9.207e-02, -1.888e-01, 1.447e-01, -1.294e-01, -8.026e-03, 1.666e-01, 3.225e-02, -4.795e-01, -1.216e-01, -4.461e-03, -6.152e-02, -1.341e-01, 6.710e-02, -3.464e-03, 2.860e-02)); + r += mul(s2_5, M4(3.118e-02, 6.361e-02, 1.517e-01, -2.066e-01, 9.298e-02, -3.544e-02, 4.673e-02, -1.113e-01, -1.257e-02, 7.547e-03, 5.694e-03, 5.824e-02, 1.598e-02, 3.417e-02, 2.197e-02, -8.769e-02)); + r += mul(s2_6, M4(-9.891e-02, -1.657e-02, -1.123e-02, -2.551e-02, 1.037e-01, 3.091e-02, -2.608e-02, -2.351e-02, -1.279e-02, -1.557e-02, -5.095e-03, -2.388e-03, -1.545e-02, -8.277e-03, 5.444e-02, 8.015e-03)); + r += mul(s2_7, M4(1.387e-01, 6.281e-02, -1.489e-02, -1.101e-01, -4.190e-02, -5.878e-02, -1.273e-02, -9.930e-02, -1.888e-02, 2.096e-02, 7.184e-03, -2.086e-02, -3.886e-02, 2.067e-02, -1.168e-02, 3.008e-02)); + r += mul(s2_8, M4(-7.592e-02, -7.535e-02, -1.099e-02, 3.415e-02, 2.166e-02, -5.147e-02, -6.420e-03, -7.174e-03, -3.075e-02, -1.896e-02, 1.354e-03, -1.838e-03, -2.213e-02, 7.056e-03, -8.424e-03, 3.090e-02)); + r += mul(s3_0, M4(-9.595e-03, 1.169e-02, -2.990e-02, -2.690e-02, -2.824e-03, -6.516e-02, -4.582e-02, -1.122e-02, 6.029e-02, 4.170e-02, -3.837e-02, 3.339e-02, -1.077e-02, 7.446e-02, -2.923e-02, 6.549e-02)); + r += mul(s3_1, M4(-5.111e-03, 6.225e-02, -3.003e-02, 3.308e-02, 2.977e-02, -2.156e-02, -5.577e-02, 3.275e-02, -3.608e-02, -4.971e-02, 1.715e-02, -4.390e-02, -1.254e-02, -3.575e-02, 2.269e-02, -3.128e-02)); + r += mul(s3_2, M4(1.598e-02, -4.655e-02, 7.151e-02, -8.702e-02, 2.685e-03, 4.683e-02, -8.830e-03, -8.845e-03, -2.099e-02, 4.414e-02, 2.528e-02, 5.378e-02, 1.731e-02, 5.451e-02, -7.642e-02, 4.291e-02)); + r += mul(s3_3, M4(2.179e-02, -9.353e-03, 5.185e-03, -4.819e-02, -4.619e-02, -9.819e-03, -2.010e-02, 5.645e-03, -6.820e-02, 5.689e-02, 1.793e-02, -6.441e-03, 8.032e-02, 3.136e-02, -5.345e-02, 5.041e-02)); + r += mul(s3_4, M4(1.961e-02, -3.295e-02, -2.769e-02, 2.888e-02, -9.280e-02, 1.242e-01, 2.179e-02, 1.752e-01, -4.600e-03, -5.721e-02, 1.043e-01, 3.595e-02, -8.385e-02, -6.855e-02, -8.819e-02, -9.500e-03)); + r += mul(s3_5, M4(4.813e-03, 4.495e-02, -1.845e-02, 5.395e-02, 2.067e-02, -7.696e-03, 1.179e-02, -1.813e-02, 2.336e-02, -3.331e-03, 1.134e-02, 1.571e-03, -1.120e-02, 2.340e-02, 4.389e-02, -3.137e-02)); + r += mul(s3_6, M4(7.093e-03, -5.993e-03, 3.377e-02, -6.898e-03, -1.301e-02, -3.597e-03, -2.272e-02, -3.425e-02, 3.097e-02, -2.057e-02, 2.067e-04, -2.046e-03, -3.947e-02, -2.045e-02, 2.302e-02, 1.487e-02)); + r += mul(s3_7, M4(7.510e-03, -2.502e-02, -1.432e-02, -3.276e-03, -8.301e-02, -2.406e-02, -1.457e-02, -2.138e-02, -9.351e-02, -4.553e-02, -6.659e-03, -1.025e-02, -6.618e-03, 2.044e-03, -9.260e-03, 2.205e-02)); + r += mul(s3_8, M4(-3.799e-02, 3.744e-03, 2.620e-02, 1.641e-02, -3.945e-04, -2.924e-02, -1.826e-02, 1.369e-02, -1.016e-03, 4.311e-03, 1.674e-03, 1.285e-02, -7.228e-02, -5.898e-02, -1.358e-02, -2.097e-02)); + r += mul(s4_0, M4(1.102e-02, -1.521e-02, -6.008e-03, 7.398e-04, -7.296e-02, -1.620e-02, -2.665e-02, -5.316e-02, 3.510e-02, 1.202e-02, -4.560e-02, 1.993e-02, 2.631e-02, 2.444e-02, -2.006e-02, 7.418e-02)); + r += mul(s4_1, M4(-1.188e-02, -2.811e-02, -2.370e-02, -1.292e-04, -6.479e-03, 8.834e-02, 1.566e-01, 5.438e-02, -3.613e-03, 1.996e-02, -4.270e-02, -1.585e-03, -2.553e-02, -4.177e-02, -1.004e-01, 6.837e-02)); + r += mul(s4_2, M4(-7.127e-03, -5.692e-03, 1.363e-02, -1.185e-02, 2.465e-02, -7.546e-02, -3.101e-02, -4.175e-02, 5.013e-03, 1.110e-01, 2.746e-03, 1.005e-01, -6.006e-02, -1.028e-01, 7.587e-02, -4.006e-02)); + r += mul(s4_3, M4(-1.252e-02, 7.058e-03, -2.447e-02, -1.218e-02, 8.639e-02, 4.769e-02, -4.430e-02, 1.939e-02, -1.963e-02, -2.812e-02, 2.546e-02, 2.933e-02, 6.862e-02, -3.934e-03, -1.195e-02, 7.803e-02)); + r += mul(s4_4, M4(3.983e-02, 5.575e-02, 5.597e-02, -1.163e-02, 2.775e-02, 8.033e-02, -5.456e-02, 3.303e-02, 1.158e-01, -1.946e-02, 1.562e-01, -4.842e-02, 3.132e-03, 2.551e-01, -2.822e-01, -2.206e-01)); + r += mul(s4_5, M4(3.133e-02, -1.668e-03, -1.536e-02, -6.250e-03, -2.136e-02, -6.014e-02, 2.088e-02, -8.109e-02, -4.080e-02, 3.456e-02, -4.960e-03, 9.921e-02, -2.083e-03, 8.930e-02, -1.308e-02, -2.485e-02)); + r += mul(s4_6, M4(1.548e-02, -5.147e-03, 2.563e-03, 6.285e-03, -3.622e-02, 1.740e-03, -7.185e-03, 6.668e-04, 7.383e-02, -4.558e-02, 4.190e-02, 1.590e-03, -1.153e-01, 4.337e-02, 5.815e-03, 6.703e-03)); + r += mul(s4_7, M4(-4.941e-02, -3.662e-02, 8.337e-03, 1.284e-02, 1.697e-03, 2.466e-02, -2.523e-02, -6.149e-03, 2.508e-02, -4.108e-02, 1.502e-02, 1.478e-02, -3.471e-02, 1.253e-01, 3.462e-02, 7.212e-02)); + r += mul(s4_8, M4(-1.663e-02, 1.343e-02, 1.031e-02, 9.448e-03, -5.931e-02, -3.506e-02, -1.118e-04, 1.849e-02, -2.685e-02, 5.631e-03, 6.119e-03, 9.226e-03, -6.665e-04, -5.032e-02, -2.241e-02, -8.407e-03)); + r += mul(s5_0, M4(1.395e-02, 7.699e-02, -2.657e-02, -6.239e-02, -3.012e-02, 1.488e-02, -3.474e-02, -6.028e-02, 2.408e-02, 1.424e-01, -9.118e-02, 3.961e-02, -3.046e-03, -3.075e-03, -7.734e-03, 7.127e-03)); + r += mul(s5_1, M4(2.541e-02, -8.866e-02, -3.268e-02, -3.691e-02, -3.625e-02, -1.827e-01, 2.963e-01, -1.750e-01, 2.180e-02, -3.934e-02, 4.675e-02, 2.028e-01, 1.810e-02, 4.776e-02, -3.834e-02, 3.822e-02)); + r += mul(s5_2, M4(4.619e-02, -2.099e-02, 3.328e-02, 2.555e-02, -4.572e-02, -1.028e-01, 8.279e-02, -1.159e-01, 1.228e-02, 1.202e-02, 6.422e-03, -4.052e-03, 2.107e-02, -3.524e-02, 3.312e-02, 1.123e-02)); + r += mul(s5_3, M4(-1.942e-02, -4.602e-02, 7.330e-02, 4.523e-03, -1.554e-03, 6.221e-02, 1.401e-02, -1.852e-03, -6.867e-02, -1.528e-01, -7.001e-02, 3.589e-02, -1.640e-02, 5.321e-02, 4.126e-02, 8.950e-02)); + r += mul(s5_4, M4(-6.138e-02, -7.446e-02, 2.528e-01, -1.488e-01, 4.429e-02, 1.152e-01, -1.438e-03, 1.705e-01, 9.691e-02, 3.186e-02, 1.741e-01, -1.788e-01, -8.488e-02, -8.629e-02, 2.150e-02, -5.496e-02)); + r += mul(s5_5, M4(-1.204e-01, -7.041e-02, 1.513e-01, -1.264e-01, 1.680e-02, -6.029e-02, 2.497e-02, 3.013e-03, 4.852e-02, 1.025e-01, -3.057e-02, 1.842e-01, 4.622e-03, -1.222e-02, -2.721e-02, 1.157e-02)); + r += mul(s5_6, M4(-1.136e-02, -6.073e-02, 1.539e-02, -4.126e-02, -3.847e-02, 4.261e-02, 1.198e-02, 5.306e-03, 6.251e-02, -5.855e-02, 1.477e-02, -2.672e-02, -2.354e-02, -6.440e-03, 1.940e-02, 1.360e-02)); + r += mul(s5_7, M4(7.322e-02, -3.496e-02, -3.640e-02, 3.208e-02, -4.454e-02, 2.890e-02, 5.658e-03, 2.411e-02, 2.417e-02, -2.461e-02, 9.286e-03, -2.945e-02, 2.810e-02, 6.893e-02, -3.579e-03, 1.289e-02)); + r += mul(s5_8, M4(1.222e-02, 6.441e-02, 3.220e-03, -1.404e-01, -3.137e-02, -4.174e-02, -1.381e-03, 3.463e-02, -6.317e-02, -7.367e-03, 2.803e-03, 6.163e-02, -6.373e-02, 6.113e-03, 1.058e-03, -2.774e-02)); + r += mul(s6_0, M4(3.013e-02, 5.727e-02, 9.003e-03, 1.051e-02, -1.073e-02, -7.736e-02, 4.207e-02, -2.627e-03, 1.345e-02, 3.167e-02, 8.640e-03, -5.891e-03, -1.514e-02, 7.545e-02, -1.225e-02, 6.570e-02)); + r += mul(s6_1, M4(-2.280e-02, -1.438e-01, -3.557e-02, 4.780e-03, 6.770e-03, -2.738e-02, 1.773e-02, -2.045e-01, -2.987e-02, 1.402e-01, -1.868e-02, 5.535e-02, -2.222e-02, 9.094e-02, 5.169e-02, 1.007e-01)); + r += mul(s6_2, M4(2.146e-02, -1.793e-01, 7.339e-03, -2.134e-01, -1.581e-02, 1.097e-01, -6.332e-03, 1.482e-01, -7.301e-03, -2.931e-02, -7.941e-03, 2.457e-02, -2.584e-03, 2.997e-02, -1.088e-02, 2.071e-02)); + r += mul(s6_3, M4(-3.318e-03, 4.875e-02, 2.340e-02, 3.773e-02, 2.270e-02, 1.642e-03, -4.309e-02, 1.612e-02, -1.588e-02, 2.905e-03, -2.409e-02, 1.876e-02, 9.508e-02, 2.535e-03, -1.075e-01, 2.218e-02)); + r += mul(s6_4, M4(-8.570e-02, 1.575e-01, 1.420e-02, 7.448e-02, -9.357e-03, -4.404e-02, -3.117e-02, 4.668e-02, 4.308e-02, 1.477e-01, -7.957e-02, 2.017e-01, 1.419e-01, 1.729e-02, -7.883e-02, 2.105e-02)); + r += mul(s6_5, M4(7.094e-02, -9.315e-02, -5.066e-02, -8.118e-02, -8.276e-02, 1.049e-03, -3.332e-02, 5.612e-02, -4.293e-02, -1.420e-01, -1.876e-02, 1.258e-03, 2.879e-02, 1.642e-02, -4.920e-02, 7.644e-02)); + r += mul(s6_6, M4(-2.574e-02, 3.665e-02, -4.206e-03, 1.549e-04, 1.494e-02, 8.348e-03, -1.097e-02, 1.059e-03, -2.022e-02, 1.593e-02, 5.309e-03, 1.297e-02, -4.626e-03, -2.908e-02, 3.458e-02, 2.192e-02)); + r += mul(s6_7, M4(-5.627e-02, 2.994e-02, 6.623e-03, 4.265e-03, -3.984e-03, 7.208e-02, 6.464e-03, -4.584e-02, 1.076e-01, 1.804e-02, 1.932e-03, -6.164e-04, -6.080e-03, -2.187e-02, 2.705e-03, 1.286e-02)); + r += mul(s6_8, M4(-1.513e-02, -2.513e-02, 4.140e-03, 2.426e-02, 5.146e-02, 1.706e-02, 7.609e-03, 5.859e-03, 8.542e-03, 1.324e-02, -1.776e-02, -5.070e-02, -2.553e-02, 8.175e-03, -1.150e-03, 9.181e-03)); + r += mul(s7_0, M4(-3.418e-02, -6.199e-02, 2.500e-02, 1.762e-02, -4.787e-04, -1.350e-02, 2.896e-02, -2.513e-02, 1.715e-02, 9.849e-02, 9.936e-03, -1.941e-02, 1.119e-02, 5.483e-02, 1.483e-02, -9.959e-03)); + r += mul(s7_1, M4(-2.115e-02, -5.361e-03, -6.951e-02, -2.643e-02, 7.225e-02, 1.490e-01, -2.051e-02, -2.152e-01, -1.302e-02, 1.838e-01, -5.824e-03, 9.439e-02, -4.313e-02, -5.206e-03, -5.797e-03, -1.539e-02)); + r += mul(s7_2, M4(3.885e-02, 1.548e-02, -3.074e-02, -8.288e-02, 8.367e-02, -1.723e-02, 6.387e-02, -5.094e-02, -2.516e-02, -9.869e-03, -3.897e-02, 1.550e-02, 3.287e-03, 4.700e-02, 1.602e-02, 1.415e-01)); + r += mul(s7_3, M4(7.152e-02, 1.291e-02, -4.047e-02, -5.024e-03, 8.264e-03, -2.978e-02, 1.805e-02, 2.057e-02, 1.471e-02, 4.593e-02, -4.297e-03, 8.084e-03, -2.278e-02, -2.440e-02, -8.421e-02, -2.865e-02)); + r += mul(s7_4, M4(-1.690e-02, -6.615e-02, -4.751e-02, -5.177e-02, 7.508e-02, 1.110e-01, -5.521e-02, 1.004e-01, 3.390e-02, 4.147e-01, -4.732e-02, 2.533e-01, 2.246e-03, -1.897e-01, -4.463e-02, -1.165e-01)); + r += mul(s7_5, M4(-1.400e-02, 2.606e-02, -1.921e-02, 8.409e-02, -1.727e-01, -1.105e-01, -4.348e-02, 1.222e-01, -5.675e-03, 4.957e-02, -4.451e-03, 3.969e-02, -3.335e-02, -1.468e-01, -2.498e-02, 4.853e-02)); + r += mul(s7_6, M4(-2.972e-02, 1.554e-02, 8.900e-03, -1.799e-02, 1.196e-02, 3.115e-02, -1.041e-02, -1.689e-02, 6.761e-03, 2.227e-02, 8.203e-03, 8.347e-03, 4.691e-02, -5.568e-02, 1.686e-02, 2.405e-02)); + r += mul(s7_7, M4(9.098e-03, 4.191e-02, 1.316e-02, -1.388e-02, 5.256e-02, 1.206e-01, 2.180e-02, -7.718e-03, -1.895e-02, 5.764e-02, 1.609e-02, -5.554e-03, 3.754e-02, -7.394e-02, -7.340e-03, -6.948e-03)); + r += mul(s7_8, M4(2.982e-02, -4.833e-03, 7.510e-03, -3.615e-02, 1.915e-02, 4.432e-02, 8.503e-03, -5.412e-03, -6.377e-03, 2.350e-02, -3.145e-03, -4.699e-02, -2.206e-02, -1.076e-02, -1.340e-02, -2.481e-02)); + r += V4(-2.186e-03, -5.805e-03, 2.543e-03, 1.149e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.403e-02, -1.690e-02, 1.299e-02, 3.203e-03, 2.440e-02, -1.197e-02, -1.009e-02, -6.249e-02, 1.807e-02, 4.913e-02, -4.275e-03, -8.835e-03, 6.496e-02, 8.654e-02, 1.910e-03, 1.207e-01)); + r += mul(s0_1, M4(5.920e-03, -1.504e-02, 1.881e-03, -9.575e-04, -5.896e-04, 1.827e-01, 9.696e-03, 7.658e-02, -1.416e-03, 7.247e-02, -6.856e-02, -5.048e-02, 5.380e-02, -1.966e-01, 4.374e-02, 1.527e-03)); + r += mul(s0_2, M4(1.538e-03, -2.058e-02, 2.313e-02, -7.205e-03, -2.522e-02, -6.646e-02, -6.501e-03, -3.213e-03, 1.374e-02, -3.847e-02, 2.466e-02, -2.767e-04, -1.093e-02, 1.194e-04, -5.333e-03, -2.761e-03)); + r += mul(s0_3, M4(1.686e-02, 5.654e-03, -1.679e-02, 3.514e-02, 6.360e-03, 3.797e-02, 1.691e-02, 5.090e-02, -3.749e-02, -1.215e-02, -2.332e-02, -4.429e-02, 3.758e-02, -1.332e-01, 4.014e-03, 1.573e-02)); + r += mul(s0_4, M4(-6.120e-02, -1.186e-02, 1.148e-02, -2.868e-03, -7.267e-02, 3.952e-01, -3.060e-01, 1.900e-02, -1.422e-02, -6.225e-02, 1.324e-01, 1.330e-01, -2.278e-01, -7.845e-02, 1.004e-01, -3.004e-03)); + r += mul(s0_5, M4(1.674e-02, 7.387e-03, -2.501e-03, 5.247e-03, -1.203e-01, 5.827e-02, -6.139e-02, 4.977e-03, -1.665e-01, -4.193e-02, 3.967e-02, 4.625e-02, -3.007e-02, 2.687e-02, -1.890e-01, 8.096e-02)); + r += mul(s0_6, M4(1.294e-02, 1.304e-02, 1.941e-02, 5.546e-03, -1.190e-02, -3.708e-02, 7.536e-03, -1.192e-02, -2.948e-03, 1.828e-02, 2.791e-02, -4.490e-02, -8.575e-03, 1.167e-03, 7.475e-03, 1.347e-02)); + r += mul(s0_7, M4(-3.207e-02, 3.181e-02, -6.068e-02, 2.886e-02, -2.113e-02, 1.308e-02, 2.800e-02, 3.551e-02, -1.551e-02, -3.550e-03, 1.875e-02, -3.930e-02, 1.351e-02, 2.954e-02, -9.125e-03, 4.094e-02)); + r += mul(s0_8, M4(-1.797e-02, -6.222e-03, 1.149e-02, -1.110e-02, 6.957e-03, -1.365e-02, 3.144e-03, 1.116e-02, -2.026e-02, 3.616e-02, -4.472e-02, -5.475e-03, 5.175e-03, -4.265e-02, -2.606e-02, -1.281e-03)); + r += mul(s1_0, M4(-2.377e-02, -1.403e-02, 3.900e-04, -8.035e-03, 6.275e-03, -3.050e-03, -1.774e-03, 3.272e-02, 1.894e-02, -3.193e-02, -1.214e-02, -3.307e-02, 1.437e-02, 4.558e-02, -5.583e-03, 3.869e-02)); + r += mul(s1_1, M4(1.842e-02, -8.569e-03, 1.108e-02, -5.062e-03, -5.962e-02, -1.085e-01, 6.689e-02, -5.252e-02, 4.097e-02, 2.783e-02, -2.389e-02, 1.015e-01, -2.116e-02, 7.249e-02, -4.569e-02, -8.347e-02)); + r += mul(s1_2, M4(-8.921e-03, 6.302e-03, -1.726e-02, -2.883e-02, 2.381e-02, 3.157e-02, 5.104e-03, 4.095e-02, -1.652e-02, -5.781e-03, -1.703e-02, -4.506e-02, 3.096e-02, 3.062e-02, -2.861e-02, 4.570e-02)); + r += mul(s1_3, M4(3.239e-02, -2.966e-02, -4.143e-02, -9.498e-02, 4.698e-02, 5.041e-02, 4.006e-02, -1.036e-02, -2.415e-02, 5.212e-02, 5.013e-03, 6.275e-02, -6.031e-03, 2.356e-02, -8.598e-03, 4.139e-02)); + r += mul(s1_4, M4(5.942e-02, -1.007e-01, 2.275e-02, -1.153e-01, -7.684e-02, 1.358e-01, -1.941e-01, 7.818e-02, -1.434e-01, -2.566e-02, 7.518e-02, 3.408e-01, -5.866e-02, 5.014e-02, 2.169e-02, 5.445e-02)); + r += mul(s1_5, M4(3.588e-02, 2.354e-02, 1.821e-03, -2.777e-02, -1.232e-02, 3.485e-03, 2.677e-02, -4.053e-02, 9.010e-02, 2.494e-02, -4.014e-02, 2.349e-02, 3.918e-02, -6.701e-03, -4.987e-02, -2.935e-02)); + r += mul(s1_6, M4(-2.898e-03, 1.400e-02, 2.918e-02, -5.003e-02, -1.320e-02, -4.515e-03, 3.208e-03, -2.984e-02, 4.325e-03, 1.145e-02, 1.401e-02, -4.900e-02, 1.295e-02, 1.852e-02, -1.632e-04, 5.149e-03)); + r += mul(s1_7, M4(2.847e-02, 8.097e-05, -6.482e-02, -3.287e-02, 2.657e-02, 7.181e-02, -1.687e-02, 2.339e-02, -5.849e-02, 5.947e-02, 1.580e-02, -2.197e-02, -1.386e-02, 7.761e-03, 1.187e-02, -1.775e-03)); + r += mul(s1_8, M4(-2.004e-02, 1.634e-02, -2.024e-02, -2.733e-04, -3.457e-02, -2.186e-02, -8.031e-03, -8.043e-04, 3.190e-02, -4.545e-03, -4.383e-03, -4.025e-02, -1.792e-02, -1.601e-02, -1.268e-02, 1.806e-02)); + r += mul(s2_0, M4(1.513e-02, 2.842e-02, -3.138e-02, 2.632e-02, -4.687e-02, 1.712e-04, -8.254e-03, -1.088e-01, 4.773e-02, 2.275e-01, 2.114e-02, -5.353e-03, -6.060e-03, 4.668e-02, -1.113e-02, -5.220e-02)); + r += mul(s2_1, M4(-4.822e-02, 5.140e-02, -6.251e-02, -2.174e-02, 6.402e-02, -1.157e-03, 4.772e-02, 3.744e-02, -8.134e-02, 2.443e-01, -1.516e-01, -2.051e-01, 3.214e-02, -1.325e-01, -1.257e-02, -1.309e-01)); + r += mul(s2_2, M4(2.891e-02, -1.136e-02, 1.066e-02, 1.125e-02, 5.477e-02, 1.702e-02, -4.648e-03, -3.087e-02, -1.364e-01, 4.794e-02, 9.983e-04, 6.055e-02, 6.746e-02, 4.293e-02, -7.007e-02, -7.499e-02)); + r += mul(s2_3, M4(1.072e-01, -6.466e-02, -1.973e-02, 1.030e-01, -1.527e-02, 4.362e-02, -4.451e-02, 1.481e-02, -1.033e-02, -5.968e-03, -8.813e-03, -3.673e-02, 8.713e-03, 4.114e-02, 1.480e-02, -2.310e-02)); + r += mul(s2_4, M4(-2.133e-01, -1.865e-02, 1.325e-01, -6.998e-03, 1.527e-02, -1.194e-01, 1.117e-02, 2.825e-02, -1.395e-01, -5.398e-02, -8.579e-02, 5.018e-02, 2.483e-01, -3.317e-04, -4.582e-02, -1.580e-01)); + r += mul(s2_5, M4(-2.050e-01, 3.415e-02, -2.992e-01, -4.628e-02, -2.045e-02, -4.760e-02, 2.570e-02, -1.789e-02, 3.450e-02, 4.739e-02, -5.818e-02, -3.416e-03, 6.491e-02, -1.692e-02, 5.092e-02, -1.269e-03)); + r += mul(s2_6, M4(5.677e-03, 2.475e-02, -2.882e-02, 4.985e-02, 4.226e-02, 2.465e-02, 1.665e-02, 1.778e-02, -1.488e-02, 8.427e-03, 7.982e-03, 4.308e-03, -2.665e-03, -2.505e-03, -2.439e-04, -8.472e-02)); + r += mul(s2_7, M4(1.160e-01, -1.843e-01, -1.577e-01, 1.610e-01, -3.755e-02, -5.904e-02, -3.916e-02, -7.304e-02, -6.454e-03, -4.502e-03, -1.938e-02, -1.544e-02, 9.156e-02, 5.040e-02, 2.782e-02, -9.789e-02)); + r += mul(s2_8, M4(-2.775e-02, 4.488e-02, -1.165e-01, -5.318e-02, 9.845e-03, -1.098e-02, 1.870e-02, -5.903e-03, 1.401e-02, -1.890e-03, -4.127e-03, -3.702e-02, 2.593e-02, 7.027e-04, 7.886e-03, -3.557e-02)); + r += mul(s3_0, M4(1.291e-02, 3.182e-02, -1.102e-02, -2.050e-02, 4.529e-02, 4.164e-02, -8.264e-03, 1.008e-02, -4.363e-03, 1.615e-02, -2.964e-02, -5.398e-03, -4.292e-02, -4.069e-02, 1.873e-02, -1.873e-02)); + r += mul(s3_1, M4(2.319e-02, 6.311e-03, -2.371e-02, 2.499e-02, -8.716e-02, 8.744e-03, 6.422e-03, -4.807e-02, 5.915e-02, -2.334e-04, 3.317e-02, 5.564e-04, 2.568e-02, 7.700e-03, -4.957e-02, -3.943e-02)); + r += mul(s3_2, M4(-6.014e-02, 2.336e-02, -3.495e-02, -4.863e-02, 1.457e-02, 4.577e-02, -2.609e-02, 4.501e-03, -3.028e-03, -2.968e-02, 1.283e-02, 1.761e-02, 3.232e-02, -2.981e-02, 3.126e-02, 3.211e-02)); + r += mul(s3_3, M4(5.939e-02, 3.790e-02, -2.935e-03, 1.023e-02, 3.549e-02, -3.577e-02, -3.997e-02, 7.911e-02, -2.807e-02, -2.543e-02, -1.237e-02, -1.489e-03, -7.716e-02, 5.463e-02, -1.140e-02, -1.054e-01)); + r += mul(s3_4, M4(1.049e-02, -8.659e-02, 1.828e-02, 3.782e-02, -1.330e-01, -1.978e-01, 1.279e-01, -7.684e-02, 1.022e-02, -5.645e-02, 3.045e-02, -1.374e-02, 5.891e-02, 5.058e-05, 5.187e-02, 2.808e-02)); + r += mul(s3_5, M4(-8.350e-02, -8.552e-02, 1.150e-01, 6.334e-02, 5.096e-03, -3.212e-02, 7.190e-04, 3.291e-02, -1.503e-02, 1.068e-02, 1.800e-02, -2.819e-02, -6.397e-03, 4.068e-02, -7.728e-02, 9.243e-02)); + r += mul(s3_6, M4(2.811e-02, -3.419e-02, 9.763e-03, 3.038e-02, 4.606e-02, 6.689e-02, -7.211e-03, -3.238e-03, 7.453e-03, -4.438e-03, 4.286e-03, -1.259e-02, 2.434e-02, 1.957e-02, 1.739e-02, -3.466e-02)); + r += mul(s3_7, M4(-2.912e-02, 4.566e-02, 2.812e-02, 6.141e-02, 2.646e-02, 1.708e-02, -9.021e-02, -6.151e-02, 1.318e-02, -2.526e-02, -6.394e-03, -3.100e-02, -3.370e-02, 2.925e-02, -1.284e-02, -2.997e-02)); + r += mul(s3_8, M4(-1.161e-02, -3.455e-02, -2.899e-02, 8.177e-04, -1.710e-02, 1.641e-02, 1.267e-02, 1.571e-03, 9.097e-04, -1.243e-02, -1.539e-02, 8.939e-03, 3.341e-02, 1.295e-02, -2.326e-03, -5.243e-02)); + r += mul(s4_0, M4(-4.412e-03, 1.828e-02, -1.095e-02, -2.410e-02, 2.840e-02, 3.710e-02, -3.585e-02, 4.476e-02, 2.330e-03, 1.955e-03, 8.971e-03, -1.877e-02, 9.953e-04, -1.743e-02, 3.433e-03, -2.462e-02)); + r += mul(s4_1, M4(-1.554e-02, -2.013e-02, 2.762e-04, 3.799e-02, -5.711e-02, -2.065e-01, 1.722e-02, 8.711e-04, 2.270e-02, 1.405e-02, -4.327e-02, -1.802e-02, 1.177e-02, 3.746e-02, -4.345e-02, 1.503e-01)); + r += mul(s4_2, M4(-1.062e-02, -3.258e-02, 1.399e-02, -8.539e-03, -1.272e-02, -5.984e-03, 4.600e-02, 7.976e-02, -9.065e-03, -9.190e-03, -7.083e-03, 3.493e-02, 1.741e-02, -4.484e-02, 2.416e-02, -5.574e-02)); + r += mul(s4_3, M4(1.711e-02, -1.917e-02, 1.507e-02, 1.370e-02, -3.999e-02, -1.430e-02, -2.882e-02, -4.323e-02, 2.306e-02, 2.904e-02, 2.624e-02, 1.199e-01, 1.480e-02, -7.574e-02, -4.492e-02, -4.352e-02)); + r += mul(s4_4, M4(-3.018e-02, -1.425e-02, -1.949e-02, 2.164e-02, 1.122e-01, -1.606e-02, 7.957e-02, 4.065e-02, -1.238e-01, -6.310e-02, 8.791e-02, 3.338e-02, -1.513e-01, 2.885e-01, 1.534e-01, 1.775e-02)); + r += mul(s4_5, M4(-4.428e-02, -1.291e-02, 5.516e-03, 3.881e-02, -2.859e-02, 3.528e-02, -6.541e-02, -5.225e-03, 5.673e-02, 2.624e-02, 2.976e-02, 5.959e-03, -1.149e-02, 1.513e-01, -2.427e-01, 1.607e-01)); + r += mul(s4_6, M4(-9.989e-03, 5.560e-03, 3.646e-03, 2.434e-02, 9.529e-03, 4.031e-02, -2.572e-02, -3.478e-02, -3.300e-02, -1.053e-01, 4.016e-02, 5.015e-02, -1.599e-02, 2.333e-02, -4.798e-02, 5.170e-02)); + r += mul(s4_7, M4(-5.325e-02, 1.108e-02, 1.797e-02, -6.959e-03, 1.219e-02, 2.704e-02, 1.240e-02, 3.468e-02, -6.517e-02, -3.558e-02, 7.436e-02, -1.016e-02, -1.981e-02, -7.618e-02, -8.978e-02, -2.096e-02)); + r += mul(s4_8, M4(-2.308e-03, -3.165e-02, -8.688e-03, -9.730e-03, -1.522e-02, 2.069e-02, -1.754e-02, -7.927e-03, 2.645e-02, -1.085e-02, -8.390e-03, -2.116e-02, -3.234e-02, 1.489e-02, 3.660e-03, 2.375e-03)); + r += mul(s5_0, M4(-3.510e-02, -6.012e-04, -3.174e-02, -1.242e-02, 1.226e-02, 1.917e-02, -3.964e-02, -2.023e-02, -2.311e-02, 6.712e-03, 1.517e-02, 9.062e-02, 8.459e-03, 1.153e-02, -8.272e-03, -5.246e-03)); + r += mul(s5_1, M4(8.618e-02, -8.955e-02, 5.913e-02, -1.343e-01, 1.315e-01, 4.682e-02, -2.247e-02, -3.938e-02, -1.179e-01, -5.145e-02, 5.443e-02, 3.886e-02, -5.119e-02, -3.541e-02, -3.904e-03, 1.687e-02)); + r += mul(s5_2, M4(2.460e-02, -8.600e-02, 4.964e-02, 1.386e-01, 2.402e-02, -2.157e-02, 2.303e-02, -3.927e-02, -5.104e-02, -7.479e-03, -4.165e-02, -2.199e-02, -2.173e-03, -6.519e-02, 7.790e-02, 4.227e-02)); + r += mul(s5_3, M4(-3.211e-02, 5.888e-02, 1.003e-02, -4.182e-03, -2.656e-02, -9.108e-02, -2.803e-02, -3.697e-02, 3.665e-02, -4.259e-03, 2.178e-02, 1.014e-01, -4.813e-02, -9.389e-03, -3.660e-02, 1.633e-02)); + r += mul(s5_4, M4(-5.209e-02, 3.928e-01, -6.218e-02, 4.783e-02, 9.125e-02, -1.971e-01, 1.959e-01, -2.692e-02, -3.901e-01, -3.436e-01, 3.787e-01, -1.215e-01, 6.860e-02, -6.028e-03, 6.013e-02, -5.339e-02)); + r += mul(s5_5, M4(2.825e-01, 3.197e-01, -4.535e-01, 2.451e-01, -2.704e-02, 2.347e-02, -3.970e-02, 3.455e-02, -4.792e-02, -6.546e-02, 8.310e-02, 5.270e-02, 5.672e-02, -5.222e-03, -1.584e-02, 2.659e-02)); + r += mul(s5_6, M4(-2.092e-03, 3.342e-02, -5.297e-02, -4.897e-03, 1.136e-02, 1.686e-02, 1.156e-02, 1.235e-02, -5.949e-02, -7.796e-02, 3.761e-02, -7.960e-02, -1.237e-02, -6.790e-02, 3.845e-02, 9.075e-03)); + r += mul(s5_7, M4(-9.697e-02, -1.451e-02, -7.202e-05, 1.720e-02, 3.105e-02, -4.131e-02, -1.384e-02, -1.688e-02, -4.193e-02, 1.140e-02, -4.853e-02, 4.692e-02, -6.361e-02, 1.024e-01, -1.936e-02, -1.864e-02)); + r += mul(s5_8, M4(2.417e-01, -5.670e-02, -2.683e-02, 1.479e-01, 1.790e-02, 1.712e-02, -4.796e-03, 5.586e-03, 1.011e-02, 7.180e-03, 2.503e-02, 7.192e-03, 1.102e-01, 5.415e-02, -9.293e-02, -7.912e-02)); + r += mul(s6_0, M4(-1.310e-02, -1.998e-02, 2.870e-02, -8.365e-03, 1.781e-02, 8.472e-02, 1.358e-02, 1.241e-02, -2.426e-02, -4.126e-02, -1.173e-02, -1.491e-02, -4.943e-02, -3.414e-03, -3.452e-02, -2.191e-02)); + r += mul(s6_1, M4(4.033e-02, -5.505e-03, 2.442e-02, 3.121e-02, -5.878e-02, 4.726e-02, -1.153e-01, 2.513e-02, 6.441e-02, -9.327e-04, 3.285e-02, 3.715e-02, -3.968e-02, -1.051e-02, -2.114e-02, 2.643e-02)); + r += mul(s6_2, M4(7.410e-03, 5.971e-02, -1.034e-02, -7.923e-02, -1.158e-02, -5.324e-02, 2.883e-02, 1.165e-01, -4.792e-02, -2.314e-02, -3.930e-03, -4.429e-02, -1.138e-02, -5.562e-03, -2.705e-02, 6.022e-03)); + r += mul(s6_3, M4(3.052e-02, 4.170e-02, -2.362e-02, 8.131e-02, 1.667e-02, -6.426e-02, -1.831e-02, -6.213e-02, -3.947e-02, 1.147e-01, -2.621e-02, -7.330e-03, -2.931e-02, -7.846e-03, -7.073e-03, -7.834e-02)); + r += mul(s6_4, M4(-8.755e-02, 2.748e-02, -1.013e-02, 1.164e-01, 3.070e-02, -2.129e-02, 3.427e-02, 2.312e-02, 2.883e-01, 1.168e-01, -9.151e-02, 5.790e-03, -1.971e-01, -7.913e-02, 1.532e-01, 9.796e-02)); + r += mul(s6_5, M4(4.299e-02, -3.184e-02, 7.484e-03, -9.968e-02, 3.603e-03, 9.438e-02, -1.002e-01, 2.020e-02, 1.727e-02, 2.998e-02, 1.908e-03, -3.632e-02, -1.007e-01, -3.760e-02, -1.044e-02, 5.097e-02)); + r += mul(s6_6, M4(-4.278e-03, -3.464e-03, -1.115e-02, 3.740e-02, -6.866e-03, 1.981e-02, -4.597e-03, -7.353e-03, 7.228e-03, -2.904e-02, -1.460e-02, -6.438e-02, -2.136e-02, -2.832e-02, 6.746e-03, -7.435e-02)); + r += mul(s6_7, M4(-7.721e-03, -8.583e-03, -2.239e-02, -4.383e-02, 5.328e-02, -2.903e-02, -3.415e-02, 2.400e-02, 5.608e-02, -6.022e-03, 5.820e-02, -5.756e-02, 9.681e-04, 3.779e-02, 4.654e-02, 2.699e-02)); + r += mul(s6_8, M4(6.099e-02, 2.623e-02, -2.629e-02, -2.693e-02, -8.624e-03, 7.311e-03, 1.022e-02, 2.943e-02, 3.370e-02, 2.451e-02, -2.233e-02, -1.465e-02, -1.173e-02, 1.131e-02, 5.762e-06, -7.292e-03)); + r += mul(s7_0, M4(8.332e-02, 7.678e-02, 7.580e-03, 1.804e-03, -7.609e-03, 3.395e-02, 4.078e-02, 2.980e-02, -1.210e-02, -3.697e-02, -3.218e-03, 5.016e-03, -6.317e-02, 8.456e-03, -4.200e-04, -7.168e-02)); + r += mul(s7_1, M4(-3.934e-02, 1.361e-02, -7.507e-02, 6.378e-02, -1.096e-01, 1.727e-02, -1.115e-01, -7.854e-03, 6.183e-03, 5.731e-03, -6.347e-03, 1.489e-02, 6.840e-02, -4.007e-03, -9.997e-03, 3.514e-02)); + r += mul(s7_2, M4(4.943e-02, 3.648e-03, 7.915e-03, -1.559e-02, -2.015e-01, -4.453e-02, -2.992e-03, 1.325e-02, 4.230e-02, -3.058e-02, -8.181e-04, 5.134e-04, -4.491e-02, -3.514e-02, -3.413e-03, 1.651e-02)); + r += mul(s7_3, M4(1.072e-02, 5.109e-02, -4.012e-02, 5.480e-04, 1.733e-02, -3.440e-02, -1.915e-02, -1.607e-02, -2.231e-02, 1.018e-02, 1.327e-02, 6.020e-02, 4.541e-02, 6.761e-02, 3.457e-02, -3.275e-02)); + r += mul(s7_4, M4(-1.640e-01, 1.440e-02, 5.193e-02, 1.382e-01, 6.455e-02, 1.126e-01, -9.477e-02, 6.182e-02, 2.558e-02, 3.332e-01, -7.944e-02, -1.218e-01, 2.038e-01, -3.560e-02, -1.784e-01, 4.907e-02)); + r += mul(s7_5, M4(-1.175e-03, 6.281e-02, -4.109e-02, 1.431e-02, -1.255e-01, 8.339e-02, -1.665e-02, -1.578e-02, -6.914e-02, -2.823e-02, -5.028e-02, 3.387e-02, -7.128e-02, 3.301e-02, 3.751e-02, 1.426e-02)); + r += mul(s7_6, M4(2.105e-02, 1.378e-02, -5.892e-03, 4.059e-02, -1.739e-02, -1.169e-02, 1.199e-02, 2.551e-02, -9.290e-03, -2.204e-02, -1.168e-02, -2.180e-02, -2.759e-02, -4.014e-02, 1.618e-02, -8.665e-02)); + r += mul(s7_7, M4(-5.288e-02, 4.240e-02, -6.100e-02, 3.484e-02, 6.646e-02, -3.599e-02, -1.280e-02, -5.046e-02, 1.113e-02, -1.903e-02, 4.971e-02, -6.456e-03, 1.147e-01, 2.411e-02, 5.515e-02, -1.526e-02)); + r += mul(s7_8, M4(6.872e-02, 1.276e-02, -1.522e-02, 2.267e-03, -3.264e-02, -3.042e-02, 6.525e-03, 3.154e-02, 5.193e-02, 3.588e-03, -3.750e-02, -3.729e-02, -6.051e-03, 3.412e-03, -8.244e-03, -2.269e-02)); + r += V4(-3.695e-03, -2.757e-03, 3.166e-03, 9.638e-04); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-6.092e-02, 1.339e-02, -4.554e-03, -1.395e-02, -4.689e-03, 1.342e-02, 1.254e-02, -5.389e-03, -2.701e-03, -7.961e-03, 3.034e-02, 4.658e-02, 4.680e-02, -1.012e-02, 2.197e-02, 1.284e-01)); + r += mul(s0_1, M4(-1.607e-02, 3.157e-04, -3.379e-02, 4.748e-02, 3.269e-01, -5.248e-02, -3.816e-02, -4.362e-02, 6.960e-02, -2.778e-02, 1.179e-01, -2.154e-02, 3.771e-02, 1.121e-01, 8.252e-02, -2.881e-01)); + r += mul(s0_2, M4(-2.736e-02, 1.314e-02, 3.129e-02, 2.014e-02, 3.678e-02, 1.444e-02, 8.184e-03, -9.470e-02, 3.379e-02, -5.792e-03, 2.617e-02, -3.427e-02, -2.049e-01, -1.673e-02, -2.069e-02, 5.914e-02)); + r += mul(s0_3, M4(5.289e-03, -2.656e-04, -1.474e-02, 1.589e-02, 8.334e-02, -7.580e-02, 8.112e-02, 4.596e-02, 8.272e-02, 3.640e-02, -4.519e-03, -2.104e-02, -9.967e-02, 9.191e-02, 1.247e-01, -5.016e-03)); + r += mul(s0_4, M4(-2.227e-01, -4.817e-02, 4.974e-02, 3.682e-03, 1.109e-01, 1.803e-01, 1.511e-01, -1.226e-01, 3.325e-03, 4.576e-02, -1.119e-01, 2.615e-02, -1.788e-01, -3.027e-01, 6.403e-02, -6.056e-02)); + r += mul(s0_5, M4(5.268e-02, 9.762e-03, -4.245e-02, 1.849e-02, -1.549e-02, -2.268e-02, 7.592e-02, 3.212e-02, -2.411e-02, 3.668e-02, -7.544e-02, -4.866e-04, 5.589e-02, -2.498e-02, 1.852e-02, -9.691e-02)); + r += mul(s0_6, M4(5.347e-02, 7.022e-03, 1.116e-02, 2.763e-02, -1.022e-02, -5.476e-03, 2.628e-03, -1.506e-03, 2.884e-02, 4.020e-02, -7.338e-03, 2.429e-02, 3.548e-02, 9.521e-03, 1.640e-02, 3.307e-03)); + r += mul(s0_7, M4(1.294e-01, 1.416e-02, 2.088e-02, 5.070e-03, 6.003e-02, 8.699e-02, -4.962e-03, -1.115e-02, -9.649e-03, 8.960e-02, -2.227e-02, -6.014e-03, 2.850e-02, 4.964e-02, -4.095e-02, 1.140e-02)); + r += mul(s0_8, M4(8.239e-03, -1.648e-02, 8.895e-03, 1.556e-02, -1.977e-03, 4.565e-03, -6.811e-03, 3.908e-04, -2.284e-02, -2.758e-02, -4.311e-03, -3.926e-03, -1.034e-02, -4.269e-02, -1.208e-02, 2.784e-03)); + r += mul(s1_0, M4(-2.469e-02, 1.714e-02, 8.543e-05, -5.085e-02, -1.303e-02, -3.196e-02, -2.564e-02, 6.273e-04, 7.077e-03, -3.943e-02, 3.150e-02, 2.382e-02, -1.410e-02, 2.598e-02, -2.507e-02, -2.179e-03)); + r += mul(s1_1, M4(3.810e-02, 4.908e-03, 1.884e-02, 1.513e-01, -1.236e-01, 2.266e-02, 1.428e-01, 3.874e-03, 4.718e-02, -8.402e-03, 1.354e-01, -1.199e-01, 1.073e-01, 3.814e-02, 1.159e-01, -7.765e-02)); + r += mul(s1_2, M4(1.599e-02, 2.881e-03, 2.173e-02, -1.978e-03, 6.860e-02, 1.676e-03, 3.226e-02, -4.287e-02, -1.956e-02, -1.512e-03, -1.690e-02, 2.575e-02, -7.158e-02, -3.975e-02, -5.693e-02, -1.984e-02)); + r += mul(s1_3, M4(-5.212e-02, -1.270e-02, -2.785e-02, 2.345e-03, 2.414e-03, -5.106e-03, 7.129e-02, 3.647e-03, 1.405e-01, 1.130e-01, -4.176e-02, 2.924e-02, -9.127e-03, -1.925e-02, 6.571e-03, -3.509e-02)); + r += mul(s1_4, M4(-3.083e-01, -5.015e-02, 1.360e-01, 1.230e-03, 7.366e-02, -6.510e-02, 8.663e-02, 5.433e-02, 5.167e-02, 1.388e-01, -6.000e-02, 5.815e-03, 1.775e-02, -3.996e-02, -3.119e-02, -2.133e-02)); + r += mul(s1_5, M4(8.512e-03, -2.324e-02, -3.587e-02, 2.867e-02, -1.335e-02, 1.153e-02, 5.781e-02, -7.299e-02, 9.194e-02, 5.748e-03, 3.511e-02, 2.288e-02, -3.146e-02, -9.689e-03, 6.297e-02, -4.233e-02)); + r += mul(s1_6, M4(3.845e-02, 5.955e-02, -3.526e-02, 1.363e-02, -4.100e-02, 1.820e-02, -1.857e-02, 4.804e-03, 2.709e-02, 4.436e-02, 1.196e-02, 4.346e-02, -3.318e-02, -2.132e-02, 2.696e-02, -1.101e-02)); + r += mul(s1_7, M4(5.951e-02, -7.423e-02, 3.198e-02, -8.567e-03, 1.034e-01, 5.462e-02, -5.424e-02, 2.838e-02, -8.256e-02, -1.197e-01, -3.751e-02, -9.126e-03, 5.554e-03, -1.258e-02, -2.857e-02, -3.262e-02)); + r += mul(s1_8, M4(3.627e-03, -5.324e-02, 1.199e-02, -8.622e-04, 2.323e-02, 2.939e-02, -1.582e-02, -6.721e-04, -2.331e-02, -2.138e-02, 7.226e-04, -2.562e-04, 6.237e-03, -1.082e-02, -5.595e-03, -2.109e-03)); + r += mul(s2_0, M4(2.653e-02, -1.926e-02, 1.199e-02, 9.048e-02, 5.699e-02, -1.210e-02, 8.090e-02, -2.781e-02, -1.199e-01, 4.468e-02, -8.449e-02, -8.375e-02, 5.640e-02, 1.763e-02, 1.440e-02, -2.498e-02)); + r += mul(s2_1, M4(-1.279e-01, 2.318e-02, -7.002e-02, -7.665e-02, 1.274e-01, -2.412e-02, -1.367e-02, 1.274e-02, 2.671e-01, -4.063e-02, -8.018e-02, -1.074e-01, 1.408e-02, -8.455e-03, -5.504e-02, -9.651e-02)); + r += mul(s2_2, M4(-2.118e-01, -2.689e-02, 5.992e-02, -1.555e-02, 5.128e-03, 2.630e-03, 2.998e-03, -3.045e-03, -5.662e-03, 1.739e-03, 6.779e-03, 1.238e-01, 1.021e-01, -6.391e-02, 1.074e-02, -2.479e-02)); + r += mul(s2_3, M4(-1.237e-01, -1.143e-01, 1.900e-02, 1.004e-01, -8.614e-02, -6.276e-02, -1.237e-01, -7.810e-02, -1.022e-01, -3.045e-02, 5.377e-03, -3.440e-03, 1.157e-01, 4.332e-02, 1.922e-02, 4.073e-02)); + r += mul(s2_4, M4(4.169e-01, -2.430e-02, 1.544e-01, -2.009e-02, -2.080e-01, 1.991e-03, -8.177e-02, 3.955e-02, -8.963e-02, -2.454e-01, -2.471e-03, -5.004e-02, 1.816e-01, -2.853e-01, -1.108e-01, -2.023e-02)); + r += mul(s2_5, M4(1.104e-01, -1.946e-02, -9.055e-02, -7.604e-02, 3.566e-02, 3.569e-02, 1.540e-03, 3.796e-02, 2.840e-02, 5.791e-02, -4.812e-03, 1.310e-02, -1.732e-02, -5.084e-03, 1.198e-03, 7.456e-03)); + r += mul(s2_6, M4(1.001e-01, 7.259e-02, -4.347e-02, 6.588e-03, 9.344e-02, -2.240e-02, 5.820e-02, 2.779e-02, 2.240e-02, 3.854e-02, -2.927e-02, 1.077e-02, -2.607e-02, -2.483e-02, 1.267e-02, 7.444e-03)); + r += mul(s2_7, M4(-1.756e-01, -3.373e-01, 1.279e-01, 1.143e-02, -4.817e-02, 4.807e-03, 6.882e-02, -7.765e-02, 7.232e-03, -1.424e-02, 1.421e-02, -1.518e-02, -4.488e-02, 8.130e-02, 2.393e-03, 1.352e-02)); + r += mul(s2_8, M4(-3.442e-02, -1.452e-01, 2.308e-02, -6.505e-02, -3.899e-02, -1.270e-02, -4.442e-02, 2.083e-02, -2.833e-02, 5.497e-03, 1.044e-02, -2.984e-02, -3.160e-02, 1.488e-02, 1.989e-03, 4.249e-03)); + r += mul(s3_0, M4(7.697e-03, -2.455e-02, 1.960e-02, 2.386e-02, 2.703e-02, 4.728e-04, 3.220e-02, 3.976e-02, 5.094e-02, 1.657e-02, 2.947e-02, 3.609e-02, 2.286e-03, 5.603e-03, -3.366e-03, -6.941e-02)); + r += mul(s3_1, M4(-3.139e-02, 3.118e-02, 5.715e-03, 4.829e-02, 1.522e-01, 3.641e-03, -1.124e-02, -2.385e-02, -2.348e-02, -9.260e-02, 3.022e-03, -2.601e-02, 8.581e-02, -4.490e-02, -9.472e-02, 9.227e-02)); + r += mul(s3_2, M4(1.119e-02, -4.276e-04, -7.717e-02, 2.749e-02, 3.030e-02, -2.482e-02, -8.644e-03, 2.635e-03, 2.281e-02, 3.169e-02, -5.975e-03, 2.119e-02, 3.481e-02, 1.181e-03, 4.729e-02, 8.681e-02)); + r += mul(s3_3, M4(2.554e-02, -4.979e-02, -1.684e-02, 7.986e-02, -2.542e-02, -1.278e-02, -1.498e-02, -2.338e-02, -1.193e-02, 2.727e-02, 4.043e-03, -1.298e-02, 9.618e-02, 5.320e-02, -1.820e-03, -5.392e-03)); + r += mul(s3_4, M4(-4.170e-02, 3.055e-02, 1.440e-01, -8.131e-02, -3.459e-01, -7.154e-02, 9.196e-02, -1.495e-02, -1.284e-01, 9.553e-02, -9.523e-02, 2.806e-02, 3.195e-02, -1.082e-01, -5.087e-02, -5.880e-03)); + r += mul(s3_5, M4(-7.607e-02, -3.552e-02, -2.089e-03, -1.679e-02, 1.089e-02, 3.870e-02, 1.625e-04, 1.265e-02, -2.999e-03, -1.181e-02, -1.651e-02, 1.686e-02, 3.755e-02, 4.108e-03, -7.984e-02, 4.348e-02)); + r += mul(s3_6, M4(-3.543e-02, -1.071e-02, -2.990e-02, 2.091e-03, 1.102e-01, -4.086e-02, 1.002e-02, 4.791e-02, 2.896e-02, -2.758e-02, 1.350e-02, 2.303e-02, 2.593e-02, 3.618e-02, -7.147e-03, 8.067e-03)); + r += mul(s3_7, M4(9.047e-02, 6.579e-02, -3.227e-02, -3.021e-03, 2.239e-02, 1.050e-02, 5.045e-02, -2.806e-02, -2.622e-02, -2.204e-02, 1.993e-02, 1.491e-02, -7.789e-02, -1.594e-02, 3.182e-02, -1.013e-02)); + r += mul(s3_8, M4(-2.664e-02, 6.552e-03, 2.155e-02, -6.864e-02, 9.120e-03, -3.208e-02, 1.887e-03, -4.354e-04, -1.106e-02, 1.752e-02, 2.864e-02, 9.538e-04, -3.001e-02, -3.979e-02, 1.779e-02, 1.888e-02)); + r += mul(s4_0, M4(-3.185e-02, -8.707e-03, 4.138e-02, 1.122e-02, -1.942e-02, -2.564e-02, -5.853e-02, 4.479e-02, 5.085e-02, 2.594e-02, 9.553e-03, -7.572e-06, -2.425e-02, -2.492e-02, 2.955e-03, 3.709e-02)); + r += mul(s4_1, M4(-1.505e-02, 2.215e-02, 2.096e-03, -7.442e-02, -2.422e-01, 8.188e-02, -5.513e-02, -3.617e-02, 1.075e-01, -1.344e-02, -7.358e-02, -1.538e-01, 3.205e-02, -5.018e-02, 1.508e-02, -7.682e-02)); + r += mul(s4_2, M4(-4.506e-02, -8.178e-03, 4.501e-03, -6.760e-03, 3.139e-03, -3.794e-03, 1.589e-03, 7.435e-04, 7.842e-03, -8.196e-03, -1.594e-02, -1.751e-02, -1.772e-01, -1.600e-02, -3.758e-02, 1.123e-01)); + r += mul(s4_3, M4(-1.714e-02, 6.968e-03, 2.990e-03, 1.276e-02, 3.480e-02, -1.006e-03, 3.230e-02, 4.203e-02, 2.386e-02, 1.318e-02, -1.422e-01, 6.164e-02, 2.804e-02, -2.677e-03, 9.084e-02, 3.863e-02)); + r += mul(s4_4, M4(-7.837e-02, -1.313e-02, 4.041e-02, -5.725e-02, 4.064e-02, -1.787e-02, -1.496e-03, -2.503e-02, -1.233e-01, 3.710e-02, 2.943e-03, -8.191e-04, -2.615e-01, 3.901e-01, 4.065e-01, -2.056e-01)); + r += mul(s4_5, M4(-2.254e-02, -8.021e-03, -2.046e-02, 8.472e-03, -5.713e-02, -2.529e-02, 1.694e-02, -2.429e-02, -2.456e-02, 9.997e-04, -2.681e-02, -1.772e-02, 1.567e-01, -1.521e-01, -7.455e-02, 4.456e-02)); + r += mul(s4_6, M4(1.749e-04, 7.458e-03, -2.304e-03, -2.706e-04, 1.273e-02, 1.013e-02, 2.770e-02, -9.043e-03, -8.916e-03, 9.403e-02, 1.837e-02, 1.516e-02, 1.039e-01, 9.260e-03, -6.735e-02, 2.598e-02)); + r += mul(s4_7, M4(1.304e-03, 2.834e-02, -6.175e-02, -2.986e-02, 3.633e-02, 3.645e-02, 7.959e-03, 1.110e-02, -4.079e-02, 7.844e-02, -2.673e-02, 1.087e-02, -3.180e-02, -1.883e-01, 8.895e-02, -1.684e-02)); + r += mul(s4_8, M4(-1.649e-02, 1.449e-02, -4.270e-03, -9.407e-03, -1.764e-02, -4.406e-02, -1.302e-02, 4.296e-03, -1.590e-02, 2.225e-02, 2.974e-02, -2.259e-02, 5.141e-03, -2.828e-02, 1.497e-02, -6.642e-03)); + r += mul(s5_0, M4(4.863e-03, -8.128e-03, 7.881e-02, 2.253e-02, 2.186e-02, -3.181e-02, 4.433e-02, 6.014e-02, -5.438e-02, -3.977e-02, 9.079e-02, 1.664e-01, 3.258e-02, -3.838e-03, 2.394e-02, -7.544e-02)); + r += mul(s5_1, M4(5.463e-02, -1.269e-02, 5.354e-02, -5.412e-02, 2.823e-02, -2.180e-02, -1.627e-01, -2.816e-02, 3.666e-02, -9.861e-03, -4.098e-02, -1.916e-01, -5.313e-02, 3.226e-02, 6.151e-02, -4.472e-02)); + r += mul(s5_2, M4(-1.858e-02, 2.438e-02, -8.904e-02, -8.276e-02, -4.238e-02, 1.803e-03, -2.575e-02, 3.027e-02, 6.873e-02, 2.042e-02, -3.134e-03, -4.847e-02, -7.084e-03, 2.288e-02, -2.983e-02, 3.942e-02)); + r += mul(s5_3, M4(5.432e-04, -1.867e-02, -1.480e-01, -2.152e-02, 2.918e-02, -7.173e-02, 6.171e-02, 4.939e-02, -4.211e-03, 2.114e-01, 7.824e-02, -2.539e-03, -3.472e-02, -2.033e-02, -7.886e-02, 8.806e-04)); + r += mul(s5_4, M4(1.612e-01, -1.711e-01, -2.185e-01, 3.588e-02, -2.282e-01, 1.369e-01, -7.013e-02, -9.328e-02, -1.700e-01, -5.438e-02, -1.002e-01, -1.638e-01, -3.944e-02, 4.242e-02, 9.511e-02, -2.098e-02)); + r += mul(s5_5, M4(2.925e-01, -1.649e-02, -3.357e-01, 3.610e-02, -5.713e-02, -7.204e-02, 3.271e-02, -1.081e-02, -7.453e-02, -1.894e-02, 1.793e-02, 4.783e-02, 1.899e-02, -3.242e-03, -6.474e-02, -1.724e-02)); + r += mul(s5_6, M4(-2.085e-02, 4.713e-04, 8.945e-02, -4.255e-02, 3.323e-02, 2.878e-03, 4.455e-03, 8.880e-03, 1.175e-02, -2.069e-02, 1.593e-02, 7.256e-03, 5.510e-02, 6.245e-02, -1.073e-03, 1.568e-02)); + r += mul(s5_7, M4(-1.434e-01, -4.155e-02, -1.088e-01, -1.304e-02, -2.387e-02, 5.490e-03, -2.673e-03, -3.037e-02, 2.007e-02, 2.010e-01, 7.576e-03, 2.663e-02, -7.244e-02, 1.493e-02, -1.855e-02, -7.471e-03)); + r += mul(s5_8, M4(1.646e-01, 1.502e-02, 1.110e-03, 1.039e-01, -1.581e-02, -6.403e-03, -1.292e-02, -8.818e-04, -2.267e-02, 5.440e-02, 1.540e-02, -3.064e-02, -5.141e-03, -6.073e-02, 1.734e-02, -9.134e-03)); + r += mul(s6_0, M4(-1.421e-02, -3.183e-02, 7.745e-02, 1.166e-01, -2.863e-02, 4.455e-02, -5.321e-02, -1.932e-02, 2.177e-02, -6.390e-04, 7.195e-02, -1.108e-01, -5.191e-02, -6.434e-03, -4.275e-02, -7.569e-02)); + r += mul(s6_1, M4(2.328e-02, -2.048e-02, 1.015e-02, -5.549e-02, 7.075e-02, -1.188e-02, -4.826e-02, -8.752e-02, -1.681e-01, -8.244e-03, 1.652e-01, -2.174e-02, -5.658e-02, 2.067e-02, -5.496e-02, -1.015e-01)); + r += mul(s6_2, M4(7.667e-02, 2.027e-02, 3.300e-02, -4.803e-03, -1.036e-01, 1.408e-02, 6.761e-03, 3.002e-02, -1.679e-02, 1.096e-03, 5.178e-02, 2.883e-02, -1.849e-02, 1.547e-03, -2.402e-02, 7.750e-03)); + r += mul(s6_3, M4(-5.791e-02, 3.708e-02, -5.342e-02, -3.697e-02, -1.007e-01, -4.903e-02, 1.126e-01, -1.490e-02, 4.938e-02, 2.699e-02, -3.827e-02, 4.307e-02, 1.603e-01, 2.214e-02, 1.549e-01, 5.679e-02)); + r += mul(s6_4, M4(3.202e-02, -4.333e-02, -7.531e-02, -3.408e-02, -4.115e-03, -5.899e-02, 3.860e-03, 6.792e-05, 1.024e-01, 3.625e-02, -7.766e-02, -3.037e-02, -1.183e-01, -1.473e-03, 9.003e-02, 9.680e-05)); + r += mul(s6_5, M4(2.412e-02, 6.634e-04, 6.732e-02, 1.420e-02, 5.810e-02, 2.585e-02, 3.066e-02, -6.959e-02, 7.319e-02, 3.423e-03, 5.851e-03, 1.705e-02, 1.814e-02, -9.285e-03, 5.824e-02, -2.749e-03)); + r += mul(s6_6, M4(-2.116e-02, -1.237e-02, 1.453e-02, 1.679e-02, 1.266e-02, 4.516e-03, 2.093e-03, -2.252e-03, 7.470e-03, -3.528e-02, 2.554e-03, 3.067e-02, 3.109e-02, 4.026e-02, -1.157e-03, -1.176e-02)); + r += mul(s6_7, M4(-3.356e-02, -7.234e-02, -2.846e-02, -3.722e-02, 3.819e-03, 1.008e-02, 1.845e-02, -2.988e-02, 8.924e-03, 4.281e-02, 6.533e-02, 4.260e-02, 6.298e-02, 2.217e-01, -2.953e-02, 3.301e-02)); + r += mul(s6_8, M4(1.080e-02, -4.984e-02, -2.745e-02, 3.396e-02, 1.743e-02, 2.391e-02, 1.372e-02, -6.250e-03, -1.041e-02, -3.854e-02, 1.843e-02, 2.394e-02, -2.537e-02, -3.912e-02, -1.326e-02, 2.354e-04)); + r += mul(s7_0, M4(-5.068e-02, 3.950e-03, -5.902e-02, 7.300e-02, -3.290e-02, 2.441e-02, -3.235e-02, -6.914e-02, 3.399e-02, -5.761e-03, -5.664e-03, 5.919e-03, 7.203e-02, -1.170e-03, -2.160e-02, -5.705e-02)); + r += mul(s7_1, M4(5.436e-02, -5.736e-03, 4.414e-02, -3.565e-01, -8.737e-04, 7.903e-03, 3.357e-02, -6.803e-02, -7.882e-02, 1.270e-02, 1.038e-01, -8.629e-02, -1.847e-02, -1.421e-03, -1.047e-01, 2.133e-01)); + r += mul(s7_2, M4(-9.254e-03, -1.177e-02, 3.797e-02, 2.816e-02, -8.729e-03, 1.623e-02, -7.377e-02, 9.038e-02, -5.011e-02, 7.134e-03, 3.944e-03, 7.992e-03, -1.552e-02, -7.506e-03, -2.047e-03, -2.762e-02)); + r += mul(s7_3, M4(-1.106e-02, 4.900e-02, 9.696e-02, -6.278e-02, -9.456e-02, -5.866e-02, 1.324e-02, -3.110e-02, 1.114e-01, 2.215e-02, -6.409e-02, 5.640e-02, 7.093e-02, -5.043e-02, 4.602e-02, 7.310e-02)); + r += mul(s7_4, M4(-2.782e-02, -7.644e-02, 1.632e-01, -8.296e-02, 2.731e-01, -7.495e-02, -1.284e-01, 4.100e-02, 1.436e-01, -1.086e-01, -1.241e-01, 9.629e-04, 5.897e-02, -3.782e-02, 1.235e-01, 4.526e-02)); + r += mul(s7_5, M4(-1.037e-02, -6.389e-03, 3.870e-02, 2.252e-02, 6.446e-04, -8.442e-02, 6.690e-02, -1.524e-02, 2.123e-02, -2.318e-02, 5.155e-03, -3.023e-02, -1.198e-02, 1.326e-02, 1.593e-02, -1.281e-02)); + r += mul(s7_6, M4(1.916e-02, -5.670e-02, 7.568e-03, -4.093e-03, 2.333e-02, 3.350e-02, -2.574e-02, 1.999e-03, 5.158e-02, 7.096e-03, -4.866e-03, 2.298e-02, -3.357e-02, 5.315e-02, -1.359e-02, 1.251e-02)); + r += mul(s7_7, M4(5.981e-03, 8.174e-02, -4.382e-02, -7.868e-02, 6.846e-02, 4.553e-02, 6.389e-02, -4.031e-02, 1.263e-02, 1.191e-01, 5.344e-02, 1.252e-02, 2.144e-02, 4.242e-03, 3.567e-03, 7.250e-02)); + r += mul(s7_8, M4(4.586e-02, -2.087e-03, -2.998e-02, 2.130e-02, 5.013e-03, 3.517e-02, -1.515e-02, 1.582e-02, -4.815e-03, -1.898e-02, 3.161e-02, 4.237e-03, -1.922e-02, -2.204e-02, -4.973e-03, -7.648e-03)); + r += V4(-4.054e-03, 1.792e-03, -1.629e-03, -2.239e-04); + return r; +} + +void Pass17(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 18 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0, t1, t2, t3 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.170e-02, 2.448e-02, -6.568e-02, -4.856e-03, 7.756e-03, 2.695e-02, -2.034e-02, -1.152e-02, -9.123e-03, 1.096e-02, -1.194e-02, 3.436e-03, 5.642e-02, -6.383e-02, 8.516e-03, -7.348e-03)); + r += mul(s0_1, M4(1.707e-02, -2.103e-02, 3.186e-02, 6.137e-02, -7.320e-03, -3.172e-02, 8.073e-02, 7.483e-02, -3.905e-02, 1.540e-02, 7.500e-02, -8.741e-03, -6.373e-02, 7.616e-02, -2.019e-02, -4.717e-02)); + r += mul(s0_2, M4(2.383e-02, -3.848e-02, 4.500e-02, -8.700e-03, -9.590e-04, 1.952e-02, 4.646e-03, 3.524e-02, -1.623e-01, 8.300e-02, -4.089e-02, 2.941e-02, 8.041e-04, -1.368e-02, -3.280e-03, 1.389e-02)); + r += mul(s0_3, M4(1.305e-02, 1.089e-02, 2.462e-02, -9.653e-03, 3.373e-02, -2.580e-02, -4.511e-02, 1.383e-02, -2.521e-02, -8.344e-03, -1.552e-03, 1.083e-02, -2.801e-02, 1.817e-02, 1.458e-02, -5.040e-02)); + r += mul(s0_4, M4(-2.626e-02, -3.885e-02, 3.225e-02, -8.319e-03, 2.861e-02, 9.400e-02, -1.323e-02, -9.068e-02, -5.325e-02, -3.405e-02, -1.348e-01, 3.636e-02, -1.412e-03, -8.639e-03, -2.458e-02, 8.211e-02)); + r += mul(s0_5, M4(-2.241e-02, 2.480e-02, -5.949e-02, 3.283e-02, 1.708e-02, -3.191e-02, 1.170e-02, -8.865e-02, 1.219e-02, 1.932e-01, -1.464e-01, 2.297e-01, 1.451e-02, -1.641e-02, 3.309e-02, -3.171e-02)); + r += mul(s0_6, M4(3.591e-03, -7.527e-03, -1.827e-02, -5.322e-03, -8.911e-03, 4.973e-03, -1.666e-03, -2.836e-02, 1.501e-04, 8.513e-03, -1.642e-02, 3.185e-03, -9.765e-03, -5.416e-03, 8.468e-03, 1.827e-02)); + r += mul(s0_7, M4(-6.142e-03, 1.179e-02, 9.797e-03, -7.066e-03, -4.748e-02, -4.553e-02, -6.670e-03, 6.607e-02, 3.616e-02, 1.246e-02, 7.919e-03, -2.878e-02, 2.383e-02, 1.226e-02, 1.933e-02, 1.379e-02)); + r += mul(s0_8, M4(5.489e-03, -6.949e-03, -1.249e-02, -1.273e-02, -1.335e-02, -1.046e-02, -1.467e-02, 1.301e-02, -3.634e-04, 2.387e-02, -6.627e-03, 6.708e-02, 1.014e-02, 7.646e-03, -3.052e-03, -1.804e-02)); + r += mul(s1_0, M4(-5.993e-02, 3.872e-02, -8.385e-02, -2.055e-02, 3.127e-02, 2.489e-02, -1.823e-02, -7.680e-03, -1.837e-02, -3.850e-03, -1.939e-02, -4.987e-03, -3.555e-02, -8.473e-02, -2.959e-03, -9.691e-04)); + r += mul(s1_1, M4(1.471e-01, -1.696e-01, 7.404e-02, 1.358e-01, -2.949e-02, -7.137e-02, 7.931e-02, 7.524e-02, -1.398e-03, 1.968e-02, 6.140e-02, 1.004e-03, -1.041e-01, 2.717e-01, 2.352e-02, -9.992e-02)); + r += mul(s1_2, M4(-2.605e-02, 1.538e-01, 1.949e-02, -7.537e-02, 4.956e-03, 2.166e-02, 4.880e-03, 4.470e-02, -1.016e-01, -5.237e-02, -3.256e-02, 4.931e-02, 4.893e-03, -9.205e-02, -8.567e-03, 8.578e-03)); + r += mul(s1_3, M4(2.993e-02, 1.200e-02, 6.862e-02, -6.863e-04, -6.337e-02, -3.137e-02, -6.470e-02, 5.289e-02, -3.570e-02, -1.412e-02, 2.902e-03, 5.407e-03, 1.897e-01, -4.217e-02, 7.446e-02, -1.463e-01)); + r += mul(s1_4, M4(1.145e-01, 4.910e-02, -3.034e-01, 4.102e-03, -1.989e-01, 5.177e-01, 5.979e-02, -3.146e-01, 9.415e-03, -3.219e-02, 5.995e-02, 4.015e-02, -1.892e-01, -3.401e-02, -2.563e-01, 4.216e-01)); + r += mul(s1_5, M4(-3.214e-02, 4.432e-02, 1.106e-02, -1.413e-01, 4.720e-02, -1.067e-02, 1.894e-02, -1.091e-01, 9.069e-02, 3.072e-02, -1.896e-02, -1.035e-01, 2.268e-02, -5.219e-03, 4.358e-02, -9.705e-02)); + r += mul(s1_6, M4(1.562e-03, -9.512e-03, -2.244e-02, -1.654e-02, 5.496e-03, -5.039e-03, 4.016e-02, -3.034e-02, 4.426e-03, 8.628e-03, -2.512e-02, -9.072e-03, -1.572e-02, -2.058e-02, 4.438e-02, 8.002e-03)); + r += mul(s1_7, M4(-1.548e-02, 6.459e-03, 4.658e-02, 1.722e-02, -1.577e-02, -8.439e-02, -1.723e-02, 6.488e-02, 3.528e-02, 7.008e-03, 3.369e-05, -2.880e-02, 2.789e-02, 4.783e-03, -2.586e-02, -5.774e-02)); + r += mul(s1_8, M4(6.076e-03, -2.590e-03, -4.608e-03, 1.910e-02, -1.267e-02, -3.100e-02, -1.022e-02, 2.922e-02, -1.394e-02, 2.198e-02, 5.267e-03, 3.279e-02, 9.616e-03, 1.532e-02, 2.373e-03, -6.028e-03)); + r += mul(s2_0, M4(-6.614e-02, -2.182e-02, -1.388e-02, -2.440e-03, -9.562e-03, 2.454e-03, 5.310e-04, 8.280e-03, -2.452e-02, -3.225e-02, 3.147e-02, 1.186e-02, -5.556e-03, 1.584e-03, 3.588e-02, 1.071e-03)); + r += mul(s2_1, M4(-1.006e-01, -1.033e-01, 6.797e-02, -1.167e-01, 5.872e-02, -6.635e-04, -1.559e-02, 4.107e-03, 5.115e-02, 1.119e-01, -6.172e-02, -3.804e-02, -3.999e-02, 2.446e-03, 3.854e-02, 3.619e-02)); + r += mul(s2_2, M4(-3.021e-02, 2.507e-02, -2.031e-02, 1.310e-01, 1.319e-02, 2.045e-02, 8.442e-03, -1.568e-02, 2.613e-03, -8.475e-03, 4.010e-02, 1.786e-02, 5.968e-03, 2.538e-02, -2.774e-03, 2.409e-02)); + r += mul(s2_3, M4(1.206e-02, -9.481e-03, -3.632e-02, -7.142e-03, 6.906e-02, 2.239e-02, 3.409e-02, 3.912e-03, 1.751e-02, 2.146e-02, 7.899e-02, -1.307e-03, -8.623e-02, 6.119e-02, -5.315e-02, 4.924e-02)); + r += mul(s2_4, M4(8.546e-02, 7.753e-02, -6.961e-02, 9.094e-02, -3.058e-01, -1.135e-01, -1.563e-01, -9.692e-02, 5.813e-02, -3.372e-02, -7.690e-02, 7.416e-04, 9.672e-02, -2.104e-01, -6.485e-02, -1.636e-01)); + r += mul(s2_5, M4(6.507e-02, -1.349e-02, 5.284e-02, -9.991e-02, -4.316e-03, -8.037e-02, 6.635e-03, -8.991e-03, -2.188e-02, -5.964e-02, -3.078e-02, 1.082e-01, 1.135e-02, 4.011e-02, 2.729e-02, 6.621e-02)); + r += mul(s2_6, M4(-3.503e-03, 7.632e-03, 9.369e-03, 7.759e-03, -7.768e-03, -3.471e-03, 2.214e-02, 1.091e-02, -6.653e-02, -1.895e-02, -2.878e-02, 3.137e-02, 2.943e-02, 1.752e-02, -5.594e-03, 4.309e-02)); + r += mul(s2_7, M4(7.778e-03, 6.771e-03, 1.779e-03, 7.087e-03, 6.034e-02, 2.083e-02, -3.374e-02, -7.667e-03, 1.442e-02, 1.245e-02, -1.346e-02, -6.898e-02, 3.395e-03, 4.285e-02, 5.205e-02, -3.860e-02)); + r += mul(s2_8, M4(1.034e-02, 1.446e-02, 8.920e-03, -1.042e-02, 2.126e-03, 1.193e-02, 5.882e-03, -2.283e-02, -4.069e-03, -7.869e-03, 2.932e-02, -6.078e-02, 7.783e-03, 1.763e-02, 6.087e-03, 1.066e-02)); + r += mul(s3_0, M4(-1.117e-02, -3.773e-02, 4.783e-02, 1.799e-02, -1.312e-02, -1.550e-03, 1.286e-03, 5.248e-03, -3.082e-02, -3.944e-02, 3.241e-02, 4.450e-03, -8.538e-03, -6.150e-03, 3.260e-02, -5.224e-03)); + r += mul(s3_1, M4(-4.113e-01, 8.007e-02, 1.653e-01, -2.705e-01, 4.841e-02, -2.195e-05, -2.048e-02, 4.801e-04, -4.779e-04, 7.444e-02, -4.292e-02, -6.109e-04, -5.756e-02, -1.337e-02, 3.434e-02, 3.096e-02)); + r += mul(s3_2, M4(-5.278e-02, 1.767e-01, -3.923e-02, 1.401e-01, 1.043e-02, 8.874e-03, 1.041e-02, -1.616e-02, 1.214e-02, -2.069e-02, 3.683e-02, 1.199e-02, 2.470e-03, 1.989e-02, -7.253e-03, 1.975e-02)); + r += mul(s3_3, M4(3.710e-03, -6.151e-03, -4.808e-02, -1.869e-02, 6.567e-02, 1.809e-02, 2.937e-02, 2.441e-03, 1.323e-01, -1.969e-02, 2.207e-02, -8.042e-03, -1.013e-01, 7.105e-02, -6.565e-02, 4.941e-02)); + r += mul(s3_4, M4(7.788e-02, 8.375e-02, -8.080e-02, 1.812e-01, -2.842e-01, -8.450e-02, -1.230e-01, -7.689e-02, 2.533e-01, 4.913e-01, -1.572e-01, -2.203e-01, 2.837e-02, -3.141e-01, -1.344e-01, -2.363e-01)); + r += mul(s3_5, M4(5.774e-02, -3.128e-02, 5.944e-02, -7.505e-02, -1.588e-03, -7.247e-02, -1.595e-04, -1.232e-02, -1.557e-02, -1.428e-01, -2.084e-02, 1.199e-01, 1.154e-02, 2.753e-02, 2.961e-02, 5.043e-02)); + r += mul(s3_6, M4(-6.184e-03, 7.996e-03, 3.448e-03, 5.530e-03, -8.169e-03, -2.379e-03, 1.869e-02, 7.190e-03, -6.045e-02, -2.534e-02, -3.620e-03, 3.928e-02, 2.574e-02, 1.327e-02, -1.794e-02, 4.131e-02)); + r += mul(s3_7, M4(3.847e-03, 3.405e-03, 4.610e-03, 1.175e-02, 5.643e-02, 1.664e-02, -3.929e-02, -3.633e-03, 5.540e-02, 5.807e-02, -2.154e-01, -3.165e-01, -9.234e-03, 4.162e-02, 2.476e-02, -6.623e-02)); + r += mul(s3_8, M4(1.258e-02, 1.675e-02, 1.004e-02, -3.470e-04, 7.424e-04, 7.711e-03, 4.932e-03, -2.514e-02, 2.154e-02, 2.952e-02, 3.343e-02, -4.860e-02, 6.836e-03, 1.422e-02, 4.818e-03, 4.429e-03)); + r += mul(s4_0, M4(-1.991e-02, -7.818e-02, 1.753e-03, -7.982e-03, -8.459e-03, 1.715e-02, -3.093e-04, 1.164e-02, -2.252e-02, -1.032e-02, -3.003e-02, 7.538e-04, 9.509e-03, 4.878e-04, -1.529e-02, -2.684e-04)); + r += mul(s4_1, M4(2.582e-02, -7.369e-02, 1.761e-02, -9.257e-03, 6.032e-02, -7.150e-02, 1.203e-01, 5.405e-02, 3.053e-02, -7.387e-02, 6.575e-02, 6.294e-02, -4.751e-02, 3.480e-02, -4.056e-02, -3.144e-02)); + r += mul(s4_2, M4(-2.343e-02, 1.343e-02, -3.803e-03, 6.644e-02, -1.866e-02, 2.669e-02, -1.160e-02, 2.351e-02, -5.340e-04, -1.773e-02, -2.394e-02, -6.879e-02, -6.449e-03, -5.503e-02, 1.298e-02, 1.473e-02)); + r += mul(s4_3, M4(2.549e-01, 2.205e-03, 1.876e-01, -7.721e-02, 3.052e-02, 2.247e-02, 3.544e-03, -9.289e-03, -3.133e-02, 2.939e-02, 6.849e-02, -2.967e-02, -3.779e-02, -1.874e-02, 1.963e-02, -1.928e-02)); + r += mul(s4_4, M4(2.826e-01, -3.722e-01, 2.660e-01, -4.212e-01, 3.193e-01, 2.409e-01, -6.020e-01, -2.057e-01, -1.676e-02, 1.411e-01, -7.284e-02, -4.007e-02, 9.450e-02, 1.185e-02, 8.403e-02, 1.109e-01)); + r += mul(s4_5, M4(-1.082e-02, -4.602e-02, -1.661e-02, -8.544e-02, 5.456e-02, 7.028e-02, 2.377e-02, -2.090e-01, 1.850e-03, -5.624e-02, 2.329e-02, 5.773e-02, -1.543e-02, -6.734e-03, -1.854e-02, -2.026e-02)); + r += mul(s4_6, M4(-2.066e-02, -2.805e-02, 5.004e-02, -5.649e-03, -1.240e-02, -1.120e-03, 1.128e-02, 1.740e-02, -8.836e-03, -1.918e-02, 1.448e-02, 3.832e-02, 1.178e-02, 1.012e-02, -1.370e-02, 6.817e-03)); + r += mul(s4_7, M4(-1.574e-02, 6.095e-03, 1.873e-02, -2.419e-02, 1.698e-02, -2.177e-03, 1.293e-02, 3.237e-02, 1.892e-02, -6.214e-03, 3.640e-03, -3.150e-02, 9.240e-03, 7.956e-03, 1.666e-02, -3.750e-02)); + r += mul(s4_8, M4(6.335e-03, 3.346e-02, -7.784e-03, 1.793e-02, 1.143e-02, 1.155e-02, 3.650e-02, -3.451e-02, -1.071e-02, 1.390e-02, 1.723e-02, -7.698e-03, -2.032e-03, 1.901e-02, -2.404e-02, -7.579e-03)); + r += mul(s5_0, M4(-4.975e-02, -5.695e-02, 2.088e-02, 4.877e-03, 1.484e-02, 1.283e-02, -1.485e-02, -1.493e-03, -8.537e-02, 1.514e-02, 2.568e-02, 2.130e-02, -2.429e-02, -2.395e-04, -1.912e-02, -6.880e-03)); + r += mul(s5_1, M4(-4.932e-02, 2.237e-02, 2.594e-02, -2.794e-02, -3.335e-02, -7.771e-02, 9.786e-02, 3.368e-02, -4.771e-03, -3.063e-01, 1.260e-01, 1.167e-01, 2.342e-02, 2.135e-02, -5.706e-02, -2.458e-02)); + r += mul(s5_2, M4(-1.410e-02, -1.026e-02, -1.484e-02, 4.841e-02, -1.918e-02, 1.553e-02, -3.033e-02, -5.088e-03, 2.929e-03, 3.870e-02, -1.440e-02, -3.313e-02, -1.371e-02, -7.303e-02, 2.006e-02, 3.500e-02)); + r += mul(s5_3, M4(7.503e-02, 6.763e-02, -2.062e-02, -1.734e-02, -2.692e-02, 5.217e-03, 2.626e-02, 1.809e-02, -5.349e-02, 4.840e-02, 7.110e-02, -8.833e-02, -8.620e-02, -2.215e-03, -5.047e-02, 6.569e-03)); + r += mul(s5_4, M4(9.741e-03, 7.098e-04, -4.540e-02, -8.394e-03, -4.509e-02, 6.875e-02, -3.755e-02, -3.042e-02, -1.652e-02, -9.887e-02, -8.768e-02, 4.416e-01, 3.894e-01, -1.719e-01, 4.596e-01, -4.144e-02)); + r += mul(s5_5, M4(1.393e-02, -2.573e-02, 1.315e-02, -6.274e-02, 4.479e-02, -9.074e-02, 2.770e-02, -2.673e-02, -6.560e-03, -2.809e-02, 4.211e-02, -8.775e-03, -5.896e-02, -6.478e-03, -4.895e-02, -1.257e-01)); + r += mul(s5_6, M4(-1.757e-02, -1.089e-02, 4.492e-02, 2.950e-02, -9.883e-03, -1.252e-03, -3.068e-03, 1.144e-02, -8.864e-03, -2.111e-02, 1.158e-02, 4.651e-02, 7.449e-03, 8.998e-03, -3.528e-02, 6.337e-03)); + r += mul(s5_7, M4(7.524e-03, -8.958e-03, 5.724e-03, 1.279e-02, 2.790e-02, 1.130e-02, -2.957e-02, -1.026e-02, 1.624e-02, 1.139e-02, 1.602e-03, -5.824e-02, -1.166e-03, -9.779e-03, 3.063e-02, -1.145e-01)); + r += mul(s5_8, M4(2.655e-03, 2.348e-02, -6.856e-03, -4.147e-03, 1.467e-02, 2.920e-02, 2.897e-02, -4.703e-02, -1.137e-02, 5.183e-03, 2.331e-02, -9.601e-03, 1.926e-03, 4.874e-03, -3.124e-02, 3.141e-02)); + r += mul(s6_0, M4(-1.876e-02, -1.165e-02, 1.643e-02, -5.137e-03, -2.971e-02, -2.814e-02, 4.568e-02, -2.350e-03, 1.767e-02, -2.508e-02, 9.430e-03, 8.928e-03, -1.128e-02, 1.397e-02, 1.078e-03, 2.444e-03)); + r += mul(s6_1, M4(-1.603e-02, 4.883e-03, -3.543e-02, 2.508e-02, 6.303e-02, -1.529e-03, -5.630e-02, -5.166e-03, 3.772e-03, 1.235e-01, -9.009e-02, -4.480e-02, 9.319e-04, 1.023e-02, -1.260e-02, 4.646e-03)); + r += mul(s6_2, M4(2.779e-02, 4.112e-03, 1.610e-02, -2.811e-02, -5.176e-03, 1.470e-01, -3.409e-02, -4.477e-02, 1.400e-02, -2.700e-02, 2.512e-02, -3.769e-03, 3.306e-03, 1.269e-03, -4.562e-03, -5.639e-03)); + r += mul(s6_3, M4(-3.967e-02, -2.166e-02, 4.781e-03, 3.840e-03, 4.313e-02, 8.753e-03, -6.067e-03, 8.509e-04, -3.141e-02, 2.118e-02, 7.875e-02, -1.663e-02, -6.424e-02, 3.583e-02, -4.025e-02, 2.640e-02)); + r += mul(s6_4, M4(1.583e-01, 6.976e-02, -1.724e-01, -2.359e-01, -6.695e-02, -3.676e-02, 1.117e-01, -1.440e-02, -3.013e-01, -1.920e-01, 3.894e-01, 2.940e-01, 2.106e-01, -1.568e-01, 7.986e-02, -4.248e-04)); + r += mul(s6_5, M4(9.537e-06, 7.261e-02, 4.889e-02, 1.255e-01, -8.090e-03, -5.074e-02, -2.435e-02, 4.686e-02, 2.466e-02, -1.938e-01, 1.402e-03, 2.188e-01, -3.894e-02, 1.007e-02, 4.435e-03, 2.815e-02)); + r += mul(s6_6, M4(-1.334e-02, -1.331e-03, -1.216e-02, -2.382e-03, 1.886e-03, 6.733e-04, 6.726e-03, -2.342e-03, -5.683e-03, -1.460e-02, -2.966e-02, 1.401e-02, 1.776e-03, 8.501e-03, -2.850e-02, 2.280e-02)); + r += mul(s6_7, M4(-5.622e-02, -4.385e-02, 8.323e-02, 2.082e-02, -3.004e-03, 6.389e-03, -3.528e-02, -6.396e-03, 3.363e-02, 5.651e-02, -7.056e-02, -1.323e-01, 7.776e-02, -1.758e-01, 2.205e-01, -3.272e-01)); + r += mul(s6_8, M4(-2.244e-02, -1.364e-02, -4.934e-03, 5.392e-02, -9.519e-03, -8.369e-03, 1.478e-03, 2.273e-03, -1.052e-02, 1.176e-02, -7.981e-03, -5.322e-02, -1.382e-02, 3.117e-02, -5.119e-02, 3.360e-02)); + r += mul(s7_0, M4(3.142e-03, -7.278e-03, 1.644e-02, -5.327e-03, -2.223e-02, -1.115e-02, 3.171e-02, 8.298e-03, 1.801e-02, -3.280e-02, 2.533e-02, 8.254e-03, -8.665e-03, -1.002e-02, 1.963e-02, 3.322e-03)); + r += mul(s7_1, M4(1.425e-02, 3.507e-02, -4.513e-02, 1.871e-02, 4.639e-01, -2.210e-01, -2.253e-01, 2.526e-02, 5.002e-02, 1.415e-01, -6.515e-02, -2.384e-02, -3.711e-03, 2.317e-02, -2.549e-02, 7.058e-03)); + r += mul(s7_2, M4(3.407e-02, 1.647e-02, 1.747e-02, -2.392e-02, 4.284e-02, -3.438e-02, -3.217e-02, -3.457e-02, -6.072e-04, -6.325e-03, 4.104e-02, 2.124e-02, 7.593e-03, 4.929e-03, -1.343e-03, 1.344e-03)); + r += mul(s7_3, M4(-4.416e-02, -2.313e-02, 9.369e-03, 1.215e-03, 4.700e-02, 9.235e-03, 4.517e-03, -1.758e-03, -4.952e-02, 1.684e-02, 3.661e-02, -1.082e-02, 2.024e-02, 4.450e-03, -1.514e-02, -3.430e-03)); + r += mul(s7_4, M4(5.847e-02, -4.423e-02, -1.759e-01, -1.619e-01, -1.243e-01, -1.296e-02, 1.304e-01, -5.431e-02, 3.184e-02, -6.372e-02, 1.027e-02, 8.180e-02, 2.514e-02, -1.069e-01, 1.008e-01, -1.097e-02)); + r += mul(s7_5, M4(1.223e-02, 6.869e-02, 5.970e-02, 1.265e-01, -9.771e-03, -8.498e-03, -8.782e-04, 8.668e-02, -2.578e-02, -6.804e-02, -3.425e-02, 1.549e-02, -1.045e-03, 1.012e-01, 1.609e-02, 5.317e-02)); + r += mul(s7_6, M4(-9.363e-04, -1.286e-03, -2.189e-02, -1.560e-02, -2.089e-03, 5.252e-04, 9.796e-03, 4.041e-04, -2.468e-03, -9.992e-03, -1.344e-02, 1.703e-02, -2.202e-02, -1.400e-02, -9.243e-03, -1.864e-02)); + r += mul(s7_7, M4(-4.014e-02, -2.973e-02, 1.028e-01, 4.114e-02, 1.664e-03, -3.040e-03, -2.677e-02, -1.250e-02, -2.429e-03, 4.016e-02, -6.270e-03, -5.730e-02, -2.520e-02, 5.750e-02, -8.956e-02, -2.129e-02)); + r += mul(s7_8, M4(-2.496e-02, -4.739e-03, -1.972e-02, 6.053e-02, -9.747e-03, -3.898e-03, -4.896e-03, -3.456e-04, -1.009e-02, -8.804e-03, -9.338e-05, -1.609e-02, 1.923e-02, -6.568e-02, 2.607e-02, -2.522e-02)); + r += V4(-9.206e-04, -7.637e-04, -7.861e-04, -5.855e-04); + return tanh(r); +} + +void Pass18(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-16x16C-NVL.hlsl b/src/Effects/CuNNy/CuNNy-16x16C-NVL.hlsl new file mode 100644 index 000000000..d61d2eab6 --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-16x16C-NVL.hlsl @@ -0,0 +1,7635 @@ +// CuNNy 16x16C BILINEAR RGB NVL - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-D16N16 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t2; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t3; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t4; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t5; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t6; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t7; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0, t1, t2, t3 + +#define l0(x, y) min16float((dot(float3(6.280e-01, 1.208e+00, 2.567e-01), O(INPUT, float2(x, y)).rgb) + -3.744e-01)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-1.822e-02, -1.642e-02, -6.093e-02, -3.689e-02) * s0_0; + r += V4(-1.147e-02, -2.455e-02, 1.336e-01, 1.564e-02) * s0_1; + r += V4(4.406e-02, -2.573e-02, -4.749e-02, 1.795e-02) * s0_2; + r += V4(4.456e-02, -1.651e-02, 3.591e-02, 3.674e-02) * s0_3; + r += V4(8.599e-04, 5.169e-02, 1.467e-02, -5.650e-02) * s0_4; + r += V4(-7.104e-02, 9.408e-02, -1.980e-02, 3.932e-03) * s0_5; + r += V4(-1.400e-02, -3.072e-02, -3.497e-02, 2.479e-02) * s0_6; + r += V4(1.641e-02, -3.119e-02, 2.443e-02, 1.844e-02) * s0_7; + r += V4(2.346e-02, 7.281e-03, -2.342e-02, -3.454e-02) * s0_8; + r += V4(7.451e-03, 5.332e-03, 9.656e-03, -1.242e-02); + return r; +} + +V4 f1(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(2.452e-02, -8.793e-04, 2.884e-02, 1.094e-02) * s0_0; + r += V4(-1.529e-01, 1.290e-03, -2.407e-01, 5.000e-02) * s0_1; + r += V4(1.185e-01, -4.263e-03, -7.048e-02, -3.493e-04) * s0_2; + r += V4(5.017e-02, 6.435e-04, -7.056e-02, -9.790e-02) * s0_3; + r += V4(7.300e-02, -9.583e-03, 2.147e-01, -1.977e-01) * s0_4; + r += V4(-9.542e-02, 8.440e-03, 6.774e-02, 2.347e-01) * s0_5; + r += V4(-7.992e-02, 2.175e-01, 4.417e-02, -4.492e-02) * s0_6; + r += V4(3.449e-02, -2.192e-01, 1.774e-02, 4.249e-02) * s0_7; + r += V4(2.266e-02, 4.264e-03, 8.381e-03, 7.037e-04) * s0_8; + r += V4(-6.910e-03, 1.934e-02, -1.482e-02, -5.155e-03); + return r; +} + +V4 f2(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-1.788e-02, -3.939e-03, 3.515e-03, 5.372e-02) * s0_0; + r += V4(9.512e-03, -1.173e-01, 1.768e-02, -1.150e-02) * s0_1; + r += V4(2.307e-02, 1.062e-01, -1.145e-02, 1.127e-02) * s0_2; + r += V4(-3.332e-02, -1.779e-02, 7.936e-04, 1.545e-01) * s0_3; + r += V4(9.399e-02, -2.767e-02, -2.517e-03, -1.732e-01) * s0_4; + r += V4(-5.877e-02, 4.553e-02, 1.549e-04, -3.632e-02) * s0_5; + r += V4(3.380e-02, 2.040e-02, 1.815e-02, -1.465e-02) * s0_6; + r += V4(-8.618e-02, -1.800e-02, -2.228e-02, 7.153e-02) * s0_7; + r += V4(1.953e-02, 6.456e-03, 3.444e-02, -4.943e-02) * s0_8; + r += V4(-1.078e-02, 4.113e-02, -1.578e-02, 4.495e-03); + return r; +} + +V4 f3(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(3.349e-03, -4.958e-02, -5.999e-02, -2.301e-02) * s0_0; + r += V4(-4.576e-02, -7.846e-02, 1.042e-01, 1.803e-02) * s0_1; + r += V4(-2.080e-02, 1.088e-02, -3.553e-02, 2.650e-02) * s0_2; + r += V4(4.505e-02, -1.640e-01, 2.588e-02, 8.898e-02) * s0_3; + r += V4(-1.618e-02, 2.072e-02, -1.249e-01, -8.836e-02) * s0_4; + r += V4(6.673e-02, -7.729e-02, 2.106e-01, 4.868e-03) * s0_5; + r += V4(2.755e-02, 4.993e-02, 1.931e-02, -1.542e-02) * s0_6; + r += V4(-7.155e-02, 3.289e-02, -1.268e-01, 1.315e-02) * s0_7; + r += V4(-5.611e-02, -4.307e-02, -2.551e-02, -1.216e-03) * s0_8; + r += V4(-2.374e-02, -6.784e-02, -1.047e-02, 6.831e-03); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.405e-02, 1.208e-01, 8.406e-02, 1.417e-02, 3.850e-02, -1.326e-02, 1.209e-01, -1.793e-02, 9.657e-02, 1.077e-01, 7.998e-02, -2.348e-02, 2.585e-01, -1.255e-01, -7.102e-02, -2.930e-01)); + r += mul(s0_1, M4(-4.104e-02, -1.013e-01, -5.548e-02, -5.558e-02, -7.116e-02, -7.352e-02, 9.299e-02, 1.656e-01, 3.298e-03, -9.411e-02, -1.419e-02, 6.433e-02, 9.729e-02, -5.107e-03, 8.303e-02, -7.063e-02)); + r += mul(s0_2, M4(2.247e-02, 6.092e-03, 7.178e-02, 5.407e-02, 1.271e-01, -3.168e-02, -1.879e-02, -1.943e-01, -7.868e-02, -7.007e-02, 1.446e-01, -4.654e-03, -5.827e-01, 3.794e-02, -3.417e-01, 2.123e-01)); + r += mul(s0_3, M4(-3.101e-02, -1.111e-01, -2.802e-02, -1.350e-02, 1.242e-01, 1.283e-03, -2.013e-02, 7.201e-02, -5.530e-02, -4.907e-02, -1.342e-01, -6.673e-02, -9.311e-02, 1.897e-01, 9.046e-02, 2.898e-01)); + r += mul(s0_4, M4(-1.784e-02, -5.918e-02, 5.845e-02, 8.362e-02, -1.333e-01, -2.641e-01, -3.031e-02, -1.676e-02, 6.354e-02, 8.276e-02, 1.069e-01, 5.125e-02, -4.843e-02, 2.944e-01, 1.044e-01, 2.449e-01)); + r += mul(s0_5, M4(1.501e-02, 7.449e-02, -8.033e-02, -1.139e-01, 1.853e-02, 1.338e-01, -8.268e-02, -1.636e-01, -6.581e-03, -1.703e-02, -9.384e-03, 7.188e-02, -4.501e-02, -1.583e-01, -2.663e-01, -8.279e-02)); + r += mul(s0_6, M4(1.077e-02, 5.902e-02, -5.638e-02, 4.160e-02, 1.254e-01, 1.971e-01, -6.279e-02, -4.505e-02, -1.191e-01, -1.344e-01, -3.873e-02, 6.566e-02, -2.099e-01, 1.337e-01, 1.642e-01, -2.878e-01)); + r += mul(s0_7, M4(-2.701e-02, 1.882e-02, -2.052e-02, 7.546e-02, 2.460e-02, 1.171e-01, 8.827e-03, -2.274e-02, 4.265e-02, 1.404e-01, 7.646e-02, -8.588e-03, 2.457e-01, -1.233e-01, -1.493e-01, 5.879e-02)); + r += mul(s0_8, M4(-1.900e-03, 5.256e-02, 2.608e-02, -9.680e-02, -5.792e-03, 8.072e-03, 7.046e-02, -1.668e-02, -1.813e-01, 7.429e-02, -1.250e-01, 3.976e-03, 7.827e-02, 7.365e-02, 3.113e-01, -4.493e-01)); + r += mul(s1_0, M4(-2.589e-01, -2.860e-02, 3.458e-02, 1.518e-01, -8.472e-02, -2.572e-02, 1.100e-02, 2.920e-01, -4.118e-02, -8.286e-02, 1.441e-02, 3.005e-01, -2.189e-02, 3.752e-03, 6.699e-02, 2.245e-02)); + r += mul(s1_1, M4(-1.753e-02, 1.462e-01, -1.202e-01, 3.685e-02, 6.990e-02, -6.744e-02, 1.121e-01, -2.440e-01, -9.690e-02, 5.161e-02, 6.374e-02, -2.208e-01, 6.330e-02, -2.542e-02, -1.029e-01, -5.961e-03)); + r += mul(s1_2, M4(-8.907e-03, 7.720e-02, 2.137e-01, -1.415e-01, -2.575e-01, -1.161e-01, -2.410e-02, 1.523e-01, -2.136e-02, -1.143e-01, 6.256e-02, -2.706e-01, 6.094e-02, 1.216e-01, 7.402e-03, 2.078e-02)); + r += mul(s1_3, M4(-1.839e-01, 2.521e-02, -6.316e-02, -4.873e-04, -1.836e-01, 1.820e-01, -3.747e-03, -2.238e-02, -2.794e-02, -1.929e-01, 9.416e-03, 1.670e-01, 7.294e-02, -4.497e-02, 3.260e-02, -8.590e-02)); + r += mul(s1_4, M4(1.612e-01, 2.591e-01, -1.860e-01, 5.530e-01, -1.690e-02, 2.011e-02, -6.756e-02, -4.013e-02, 2.219e-01, 9.343e-02, -6.113e-02, -2.694e-01, -5.443e-02, 4.693e-02, -1.964e-03, -6.205e-02)); + r += mul(s1_5, M4(-9.952e-02, -4.111e-01, 2.484e-01, 2.142e-01, 3.349e-02, -8.974e-02, 9.233e-03, 1.371e-01, 9.616e-02, -2.994e-01, -3.412e-02, -2.624e-01, -2.777e-02, 5.845e-02, -4.256e-02, 5.163e-03)); + r += mul(s1_6, M4(-2.489e-01, 3.013e-01, -1.623e-01, 4.737e-02, -2.570e-01, -3.679e-01, 8.586e-02, -8.836e-02, 1.821e-01, -1.438e-01, -3.181e-02, 1.531e-01, -4.456e-02, 6.053e-02, -4.692e-03, 1.889e-01)); + r += mul(s1_7, M4(5.682e-02, 7.344e-02, 5.417e-02, 2.450e-01, 4.026e-03, -1.124e-01, 5.952e-02, 1.936e-01, 3.018e-01, -1.236e-02, 3.948e-02, -5.773e-02, -4.648e-02, -5.322e-02, -5.548e-03, -7.449e-02)); + r += mul(s1_8, M4(4.563e-02, 4.610e-02, 4.906e-02, -3.220e-01, 2.408e-02, 6.485e-02, -5.831e-03, -6.206e-02, 1.508e-01, 1.450e-01, -2.797e-02, -3.810e-01, -1.370e-02, -1.935e-01, 5.435e-02, 5.075e-02)); + r += mul(s2_0, M4(-3.448e-02, -6.980e-02, 5.779e-02, 6.469e-02, 3.711e-02, -3.232e-02, -7.098e-02, -3.179e-03, -8.000e-03, 2.579e-02, -2.380e-02, 3.991e-02, -1.541e-02, 9.551e-02, 1.269e-01, 1.477e-01)); + r += mul(s2_1, M4(-6.560e-02, 1.108e-02, 9.036e-02, 2.225e-02, 2.757e-02, 2.216e-01, 4.036e-02, -4.563e-02, -7.943e-02, 3.388e-02, -2.570e-02, 1.062e-02, -1.880e-01, 2.051e-01, 1.599e-02, 9.407e-02)); + r += mul(s2_2, M4(-6.447e-02, 2.987e-02, 7.575e-02, -2.814e-03, -2.009e-01, 7.443e-02, 5.020e-02, 1.241e-02, -1.464e-02, -2.127e-02, -1.959e-02, -9.106e-02, -1.876e-02, 6.003e-02, -1.087e-01, 4.157e-03)); + r += mul(s2_3, M4(1.373e-01, 8.434e-02, -7.087e-02, -2.916e-02, 9.784e-02, -1.666e-01, 4.713e-02, 2.128e-02, -7.791e-02, 1.843e-01, 1.793e-02, -6.299e-03, 5.664e-02, -1.375e-02, -5.741e-02, 5.340e-03)); + r += mul(s2_4, M4(-1.958e-01, 1.879e-01, 3.784e-02, -1.695e-01, 1.220e-01, 1.435e-02, 6.042e-02, 6.976e-02, -4.420e-02, -2.048e-01, 1.407e-01, 8.324e-02, 9.824e-02, -4.772e-03, 3.171e-02, -9.108e-02)); + r += mul(s2_5, M4(1.473e-01, 4.592e-04, -3.968e-01, -3.998e-02, 5.250e-02, -1.122e-02, 4.195e-03, -6.805e-02, -7.127e-02, -7.509e-02, -5.716e-02, -9.791e-02, 1.336e-01, 1.031e-01, 1.315e-01, 7.645e-02)); + r += mul(s2_6, M4(1.184e-01, 1.619e-01, 4.172e-02, -8.332e-02, -7.003e-02, 9.098e-03, -1.272e-02, 6.432e-02, -7.801e-02, -7.251e-02, 4.113e-02, 5.624e-02, 1.280e-03, -8.267e-02, -5.059e-03, -8.324e-02)); + r += mul(s2_7, M4(3.987e-02, 8.653e-02, -1.324e-01, 3.768e-02, 1.034e-02, -7.325e-02, -2.599e-02, 6.754e-02, -2.531e-02, 5.051e-02, 6.231e-02, -2.843e-02, -9.684e-02, 3.120e-02, 2.869e-02, -1.306e-01)); + r += mul(s2_8, M4(1.452e-01, 2.250e-02, -3.331e-02, -2.652e-01, -7.785e-02, 7.697e-02, 5.306e-02, -1.206e-02, 2.950e-02, -1.129e-02, -1.240e-02, 8.520e-02, -1.194e-01, -5.583e-03, 1.318e-01, 4.145e-02)); + r += mul(s3_0, M4(2.435e-01, 2.473e-02, -3.799e-03, 5.391e-02, -7.654e-02, 1.633e-02, 2.528e-03, 2.085e-03, -7.200e-02, -1.247e-01, 9.995e-02, 1.192e-01, 4.968e-02, -3.659e-02, 7.987e-02, -1.196e-01)); + r += mul(s3_1, M4(1.970e-02, -9.596e-02, 2.596e-02, 9.929e-02, -9.603e-02, 7.357e-02, 3.207e-02, -1.129e-01, 1.304e-01, 3.305e-02, -3.747e-02, -1.177e-02, -8.392e-02, -1.410e-02, -1.620e-01, 4.514e-02)); + r += mul(s3_2, M4(1.392e-02, -1.797e-01, -6.554e-02, -6.099e-02, -2.959e-01, 1.745e-01, 1.944e-01, -2.134e-01, 2.435e-02, -5.074e-02, 5.244e-02, -2.474e-02, -5.085e-02, -9.063e-02, -1.671e-01, -1.548e-02)); + r += mul(s3_3, M4(8.034e-02, 1.214e-01, 5.679e-02, -1.209e-01, 1.102e-01, -1.500e-03, 7.307e-02, -2.826e-02, 6.892e-02, 9.349e-02, -2.101e-02, 1.285e-01, 8.814e-02, -2.099e-02, -1.133e-01, 1.462e-02)); + r += mul(s3_4, M4(1.852e-02, -1.295e-01, -3.895e-03, 6.403e-02, -9.664e-02, -7.860e-02, 1.175e-01, -2.920e-01, 1.508e-01, -2.417e-01, -2.569e-01, -2.920e-02, -1.046e-01, 9.367e-02, -2.173e-01, 2.044e-01)); + r += mul(s3_5, M4(-8.709e-02, -3.534e-02, -9.278e-02, 3.574e-02, 3.987e-02, -8.859e-02, 1.562e-02, -2.034e-01, 1.528e-01, -3.803e-02, -2.427e-01, -5.409e-02, -3.348e-03, 7.453e-02, 6.794e-02, 3.836e-02)); + r += mul(s3_6, M4(1.220e-02, 2.105e-01, 4.535e-02, 6.563e-02, 6.836e-02, 5.600e-02, 1.560e-02, -6.164e-02, -1.412e-01, -1.103e-01, -8.076e-02, -6.528e-02, 9.211e-02, 1.990e-01, -5.119e-02, -8.996e-02)); + r += mul(s3_7, M4(-1.457e-01, -1.085e-01, 1.784e-02, -2.185e-03, 4.281e-02, 5.354e-02, -1.300e-02, -1.125e-01, 7.293e-02, 3.576e-02, -1.282e-01, -5.457e-02, -1.048e-02, 7.982e-02, 6.479e-02, -6.245e-02)); + r += mul(s3_8, M4(3.417e-02, 9.233e-02, 8.488e-02, 2.901e-02, -9.098e-02, 1.505e-01, 3.521e-02, -4.778e-02, 1.625e-01, -1.207e-01, 4.013e-02, 5.975e-02, -1.180e-02, -3.180e-03, 1.255e-01, -6.819e-03)); + r += mul(s4_0, M4(1.537e-01, 2.518e-01, 1.641e-01, -4.800e-01, -8.067e-02, -4.263e-02, -1.103e-02, 5.467e-03, -1.560e-01, 1.149e-01, -1.443e-02, 2.685e-02, 4.818e-03, -6.548e-02, -2.986e-02, -9.109e-02)); + r += mul(s4_1, M4(-2.142e-01, 2.512e-02, 1.720e-01, -1.679e-01, 8.108e-02, 6.832e-02, 9.115e-02, -2.173e-03, 1.329e-01, -3.490e-02, -5.578e-02, 1.104e-01, -4.784e-02, -3.582e-02, -9.612e-02, -6.821e-02)); + r += mul(s4_2, M4(-4.783e-02, 3.973e-01, 1.161e-01, 1.626e-01, -2.491e-02, 1.686e-02, -3.443e-02, -1.059e-01, 5.977e-02, -6.490e-02, -6.506e-02, -4.471e-02, -2.849e-02, -7.122e-03, 1.789e-02, 1.267e-01)); + r += mul(s4_3, M4(2.408e-01, -8.330e-02, -8.275e-02, -6.422e-02, 9.443e-02, -1.883e-01, -2.351e-02, 2.905e-02, 1.478e-02, 6.625e-03, 1.380e-01, 1.312e-01, -9.895e-02, -1.294e-01, -1.465e-02, -2.186e-01)); + r += mul(s4_4, M4(6.419e-01, -3.723e-01, -2.565e-01, -4.735e-01, -3.051e-02, -1.246e-01, 3.242e-02, 2.361e-02, -4.670e-02, 4.870e-02, 8.938e-02, -7.246e-02, 8.033e-02, 1.626e-01, 4.382e-02, -3.981e-02)); + r += mul(s4_5, M4(4.743e-01, 1.601e-02, 1.143e-01, 1.107e-01, -8.403e-02, -2.654e-02, 3.615e-02, -3.300e-02, 1.836e-02, -6.122e-02, -3.072e-02, 1.730e-02, -2.944e-01, -8.219e-02, 1.862e-01, -1.063e-01)); + r += mul(s4_6, M4(-1.115e-02, 4.246e-01, -5.974e-03, -1.735e-02, 5.712e-02, 3.389e-01, 3.325e-02, 9.249e-02, -3.867e-02, -1.899e-01, -1.033e-01, -7.591e-02, -8.186e-02, 1.306e-01, -3.340e-02, -9.847e-03)); + r += mul(s4_7, M4(4.828e-01, 2.198e-01, -2.150e-01, 6.362e-03, -1.232e-01, 1.776e-01, -5.929e-02, -4.373e-03, -5.813e-03, 1.354e-02, 3.540e-03, 4.267e-02, 1.379e-03, 9.057e-02, 2.345e-02, -2.193e-02)); + r += mul(s4_8, M4(-8.730e-02, 3.307e-01, -3.251e-02, -2.900e-01, 7.154e-02, 3.098e-02, 1.980e-02, -4.968e-02, 1.170e-02, 1.528e-01, 2.639e-02, -9.142e-02, 1.829e-02, -5.244e-02, 9.641e-03, -5.492e-02)); + r += mul(s5_0, M4(6.116e-02, 8.943e-02, -5.531e-02, -2.696e-02, -1.376e-01, 1.998e-01, 5.097e-02, 6.659e-03, 5.555e-02, 6.673e-02, 8.981e-02, 8.069e-02, 1.494e-01, -3.698e-02, -1.053e-01, -8.436e-02)); + r += mul(s5_1, M4(-4.488e-02, 7.922e-02, 1.704e-02, 4.343e-02, 3.390e-02, -6.945e-02, 4.841e-02, 1.197e-01, 6.671e-02, -2.253e-02, 1.270e-02, -3.095e-01, 2.412e-01, -5.589e-02, -8.286e-02, 3.333e-02)); + r += mul(s5_2, M4(-4.554e-02, -1.439e-02, 7.602e-02, -3.426e-02, -1.577e-01, 7.685e-02, 6.846e-02, 5.846e-02, -1.920e-01, -1.635e-01, -1.462e-01, 1.753e-01, -6.704e-02, 1.433e-01, 1.133e-02, -2.406e-02)); + r += mul(s5_3, M4(-1.738e-03, -8.318e-02, 2.537e-03, -1.054e-02, -1.916e-01, -2.127e-01, 4.746e-02, 7.098e-03, 2.601e-02, -8.985e-02, -7.906e-03, 3.103e-01, 7.611e-02, -5.874e-02, 3.964e-02, 6.860e-02)); + r += mul(s5_4, M4(-1.138e-01, -1.847e-01, -1.228e-02, -1.407e-01, -1.218e-01, 1.584e-01, 3.814e-02, 2.447e-01, 4.855e-02, 8.447e-03, 9.850e-02, -3.130e-02, 1.525e-01, 2.940e-01, 7.898e-02, 1.821e-01)); + r += mul(s5_5, M4(3.111e-02, -5.260e-02, -6.819e-02, 5.936e-03, 1.700e-01, 9.746e-02, 1.690e-02, 4.625e-02, -2.485e-02, 7.180e-02, 5.899e-02, -3.583e-02, 6.990e-02, 8.228e-02, 4.081e-02, 2.349e-02)); + r += mul(s5_6, M4(3.099e-02, 8.576e-02, -3.833e-03, 2.430e-03, 1.976e-01, 2.695e-01, 9.600e-02, 2.032e-01, 2.092e-01, -8.031e-02, -1.265e-01, 1.841e-02, 6.259e-02, 8.119e-02, 1.225e-02, 5.224e-02)); + r += mul(s5_7, M4(-8.051e-02, -2.677e-02, -2.939e-02, 4.138e-02, -1.644e-01, -1.498e-01, 1.202e-01, 7.046e-02, -6.764e-02, -1.100e-01, 3.688e-02, -2.193e-01, 9.462e-02, -1.360e-01, 2.253e-02, -5.379e-02)); + r += mul(s5_8, M4(5.739e-03, -2.755e-02, -2.324e-02, 1.058e-01, -7.982e-02, -1.026e-01, 2.569e-01, -1.335e-01, -1.647e-01, 9.644e-02, 9.909e-02, -1.077e-01, 3.698e-02, 6.935e-03, 5.650e-03, 6.725e-02)); + r += mul(s6_0, M4(4.240e-01, -2.918e-01, -3.133e-02, 2.327e-01, -2.695e-01, 1.043e-01, -2.850e-01, 3.208e-01, 2.258e-01, -2.135e-01, -1.859e-01, 7.547e-03, 9.245e-02, -5.596e-03, 9.937e-02, 2.943e-02)); + r += mul(s6_1, M4(2.101e-01, -3.744e-01, 1.696e-01, -1.703e-02, -9.047e-02, -1.152e-01, -1.393e-01, 3.266e-01, 7.654e-02, 2.414e-01, 3.308e-02, 3.706e-03, -1.490e-01, 4.122e-02, -5.556e-02, 1.216e-01)); + r += mul(s6_2, M4(5.715e-02, -2.730e-01, -3.063e-01, 2.882e-01, -3.335e-01, -2.502e-01, -6.934e-02, 6.301e-01, 5.091e-02, -6.457e-02, 8.581e-02, -1.191e-01, 9.755e-03, -8.909e-02, 2.613e-02, -9.604e-02)); + r += mul(s6_3, M4(5.395e-01, -1.115e-01, 1.102e-01, 1.496e-01, 1.512e-01, 1.230e-01, 1.636e-01, -5.026e-01, -1.340e-01, -7.226e-02, -2.846e-01, 8.937e-02, -4.186e-02, -9.760e-02, -4.782e-02, 8.662e-02)); + r += mul(s6_4, M4(5.268e-01, -5.914e-02, 3.737e-01, 1.957e-01, -5.624e-01, -4.037e-03, 1.324e-01, -1.029e-01, 1.476e-01, -8.394e-03, -1.478e-01, 3.448e-02, 2.002e-02, 6.459e-02, 2.170e-02, 9.597e-03)); + r += mul(s6_5, M4(4.312e-01, -1.827e-01, 3.503e-01, -1.471e-02, 2.903e-01, -6.051e-02, 4.530e-02, 1.642e-01, 1.965e-01, -9.838e-02, -2.136e-01, -2.281e-02, 4.991e-02, -5.003e-02, 9.837e-03, 2.202e-02)); + r += mul(s6_6, M4(-2.240e-02, -4.757e-01, 2.564e-02, 1.577e-01, 3.382e-01, -7.741e-02, -2.645e-01, -1.375e-01, -2.008e-01, 8.069e-03, -2.638e-02, 5.265e-02, 2.141e-02, 3.570e-02, 4.337e-03, 5.465e-02)); + r += mul(s6_7, M4(1.627e-01, -2.946e-01, 3.193e-01, 2.728e-02, -7.392e-02, -3.848e-01, 1.789e-01, 1.881e-01, 1.077e-01, -4.805e-02, 1.793e-02, -6.635e-02, 7.963e-02, 5.029e-02, -4.372e-02, 1.278e-02)); + r += mul(s6_8, M4(3.372e-01, -1.486e-01, -9.936e-02, 2.261e-01, -5.973e-02, -8.111e-02, -8.219e-02, -1.498e-01, 1.329e-02, -2.499e-02, -1.245e-01, -3.871e-02, -1.800e-02, 6.380e-03, 1.756e-02, -1.356e-01)); + r += mul(s7_0, M4(9.764e-02, -7.088e-02, 3.507e-02, -4.570e-02, 1.675e-01, 1.448e-02, -2.842e-02, -1.203e-01, 6.582e-02, -3.174e-03, -1.471e-02, 1.304e-01, -1.061e-01, 1.586e-01, 1.244e-01, 2.246e-01)); + r += mul(s7_1, M4(1.038e-01, -1.092e-02, -9.483e-02, -1.292e-02, 5.143e-02, 2.371e-02, -1.147e-01, 8.272e-02, 2.127e-02, -1.111e-02, 1.006e-02, 4.855e-02, 2.508e-01, -2.958e-01, -1.424e-01, -3.592e-02)); + r += mul(s7_2, M4(-7.218e-04, 1.665e-01, 2.076e-02, 1.902e-02, -1.085e-02, 7.997e-04, -5.125e-02, 1.165e-01, 1.279e-02, -7.016e-03, 1.398e-01, 4.262e-02, -1.469e-01, 3.665e-01, 2.113e-01, -2.286e-01)); + r += mul(s7_3, M4(-2.811e-02, -1.652e-02, -4.187e-02, -5.403e-02, 1.064e-01, 1.429e-02, 2.725e-02, -6.861e-02, 1.013e-01, 6.601e-03, 2.723e-02, -8.197e-02, -2.954e-01, -6.184e-02, 6.907e-02, -3.243e-01)); + r += mul(s7_4, M4(-2.777e-02, 6.185e-02, -5.263e-02, 2.158e-02, -6.411e-02, 1.157e-01, 4.594e-02, -6.461e-02, -4.041e-02, 6.406e-02, 1.326e-01, -9.146e-02, 3.969e-02, 1.986e-01, 1.955e-01, 1.678e-01)); + r += mul(s7_5, M4(-1.103e-01, 3.382e-03, -3.581e-02, 4.331e-02, -8.628e-02, 5.449e-02, 1.768e-02, 1.167e-01, -7.616e-02, 1.801e-03, -1.890e-01, 1.538e-01, 1.400e-01, 2.025e-02, -9.030e-02, -1.654e-01)); + r += mul(s7_6, M4(-4.734e-02, -1.122e-02, -7.030e-02, -3.296e-02, -4.429e-02, -1.066e-02, 5.152e-02, -4.982e-02, 7.570e-02, -6.662e-02, -2.449e-02, 4.657e-02, 3.968e-02, 1.738e-01, -6.707e-02, 8.950e-02)); + r += mul(s7_7, M4(2.593e-02, -8.762e-02, 2.691e-02, 2.567e-02, -2.261e-02, -8.078e-02, 2.215e-02, -1.150e-01, 4.263e-02, 3.631e-02, 2.093e-01, -1.596e-01, -2.217e-01, -5.036e-02, 3.256e-01, -4.437e-01)); + r += mul(s7_8, M4(9.507e-02, -1.750e-01, 5.547e-02, 2.569e-02, -6.372e-02, -6.292e-02, 1.046e-01, 4.929e-02, -1.081e-01, -5.278e-02, -1.532e-01, 9.554e-02, 1.158e-01, -2.101e-01, 9.519e-02, 9.693e-02)); + r += V4(1.201e-02, 1.083e-03, -4.431e-02, -2.855e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(4.593e-02, 6.965e-02, -5.918e-03, 3.008e-02, 4.446e-02, -6.509e-02, 6.633e-02, -1.259e-01, 1.482e-01, 4.613e-02, -7.350e-03, 3.255e-02, 1.542e-01, -1.126e-01, 1.986e-01, -1.911e-01)); + r += mul(s0_1, M4(-8.134e-02, -8.285e-02, -2.539e-03, -4.290e-02, -6.715e-02, 1.421e-02, -1.284e-02, -1.869e-02, -1.153e-01, -8.007e-02, -3.951e-02, -1.923e-01, 2.231e-01, 1.209e-03, -1.015e-01, -1.954e-01)); + r += mul(s0_2, M4(-1.721e-02, 1.054e-01, -2.497e-02, -1.724e-02, -2.214e-02, -1.559e-02, -1.890e-02, -3.045e-02, 6.823e-03, -5.394e-02, -9.170e-03, -3.634e-02, 3.752e-02, 1.746e-01, 1.125e-01, 8.602e-03)); + r += mul(s0_3, M4(-5.703e-02, -9.171e-02, 1.361e-02, -3.684e-02, -4.692e-02, 3.388e-02, -1.831e-02, 1.106e-01, -1.198e-01, 1.311e-02, 3.582e-02, 3.354e-03, -2.226e-01, 6.446e-02, -1.017e-02, 2.029e-01)); + r += mul(s0_4, M4(-6.122e-02, 1.638e-02, 2.959e-02, 7.356e-02, 3.443e-02, 2.426e-02, 1.646e-02, 6.604e-02, -1.895e-02, -1.110e-02, -3.352e-02, 8.554e-02, -1.042e-01, -1.617e-01, -9.264e-02, 8.956e-02)); + r += mul(s0_5, M4(1.202e-01, -8.740e-02, -5.419e-02, 2.845e-02, 6.640e-02, -1.831e-02, -9.561e-02, 2.954e-02, -5.409e-02, -1.691e-02, 4.133e-02, 6.700e-02, 1.537e-01, -2.451e-02, 2.398e-01, 4.803e-02)); + r += mul(s0_6, M4(1.267e-01, -2.486e-02, 1.156e-02, -1.058e-01, 1.161e-01, 1.732e-03, 1.920e-02, -7.541e-02, 9.098e-02, 5.822e-02, -6.071e-03, -2.099e-02, -2.110e-01, 1.116e-01, 3.081e-02, -9.744e-02)); + r += mul(s0_7, M4(5.810e-02, 9.912e-02, -3.762e-02, 5.625e-02, 4.588e-02, 6.554e-02, -4.150e-02, 1.172e-01, -6.263e-02, -5.435e-02, 1.023e-01, -2.423e-02, -7.424e-02, -1.023e-01, 1.303e-01, 1.908e-01)); + r += mul(s0_8, M4(3.099e-03, 2.677e-02, 7.748e-02, 3.166e-03, -1.029e-01, 5.916e-02, 1.292e-01, -4.601e-02, 1.430e-01, -3.183e-02, -2.547e-02, 4.187e-02, -1.713e-01, -1.143e-01, -7.347e-02, -3.159e-01)); + r += mul(s1_0, M4(3.408e-01, -2.309e-01, -1.278e-02, -2.090e-01, -1.190e-01, 5.742e-02, 2.033e-03, -8.545e-02, -7.710e-02, -2.276e-01, -3.857e-02, 1.980e-01, 3.238e-02, -9.525e-02, -2.383e-02, 7.602e-02)); + r += mul(s1_1, M4(1.939e-01, -1.004e-02, 8.190e-02, 1.505e-01, 1.839e-01, 3.155e-03, -3.659e-02, 1.297e-01, 5.044e-02, -1.145e-01, -7.815e-03, 1.335e-01, -9.720e-03, 2.058e-02, -1.485e-02, 1.543e-02)); + r += mul(s1_2, M4(8.855e-02, 2.239e-03, -4.702e-02, -3.631e-01, -7.713e-02, -6.766e-03, 2.233e-03, 9.525e-03, 1.930e-02, -1.823e-01, 1.342e-01, -6.774e-03, 1.559e-02, 6.537e-02, -1.795e-02, -4.694e-02)); + r += mul(s1_3, M4(-4.576e-02, -6.144e-02, -6.138e-02, 1.686e-01, -2.395e-02, 9.951e-02, 7.405e-02, 3.068e-02, -1.074e-01, -4.246e-02, -2.766e-01, -1.405e-01, -9.167e-02, 1.779e-02, 5.818e-02, -5.015e-02)); + r += mul(s1_4, M4(-4.979e-01, -9.989e-03, 1.713e-01, -1.294e-01, -7.555e-02, -2.530e-02, -1.001e-01, 1.538e-01, -3.042e-02, 1.180e-01, 1.838e-02, 4.506e-02, 4.005e-02, -9.967e-03, -4.310e-02, -8.393e-02)); + r += mul(s1_5, M4(-2.830e-02, -3.461e-01, 1.282e-01, 3.577e-02, -1.393e-02, 1.394e-02, -1.023e-01, -1.135e-02, 1.627e-01, -3.099e-03, -2.302e-01, 1.441e-01, -3.462e-02, 6.403e-02, 5.452e-02, 1.090e-01)); + r += mul(s1_6, M4(-1.832e-01, 7.516e-03, 1.466e-02, 3.691e-01, -1.559e-01, -4.819e-02, 7.241e-03, 9.579e-02, -2.765e-01, -1.187e-01, -2.102e-01, -5.277e-02, 1.527e-01, 8.032e-02, 4.021e-02, -6.910e-04)); + r += mul(s1_7, M4(-2.208e-01, -1.051e-01, -2.252e-01, 9.503e-02, 1.986e-01, -2.264e-01, 1.020e-01, 2.529e-02, 1.254e-02, -1.091e-02, 1.148e-01, -1.693e-01, -6.387e-02, -3.649e-02, -2.184e-02, -5.988e-02)); + r += mul(s1_8, M4(-1.843e-01, 1.650e-02, 4.734e-02, -2.337e-01, -4.757e-02, -1.540e-01, 8.401e-02, -3.806e-02, -1.397e-01, -9.808e-02, -1.770e-03, -9.557e-02, -2.257e-02, -4.093e-02, -6.275e-02, 3.909e-02)); + r += mul(s2_0, M4(2.269e-01, 4.167e-02, -2.057e-01, 1.879e-01, -1.647e-01, 4.798e-03, -4.511e-02, 3.278e-02, -3.302e-02, -1.713e-02, -4.034e-02, -8.332e-02, -1.108e-02, -4.393e-02, -1.385e-01, -1.401e-01)); + r += mul(s2_1, M4(1.445e-01, 2.818e-02, -1.505e-01, 1.646e-02, 2.380e-02, 4.553e-02, 1.564e-01, 6.671e-02, 3.429e-02, 1.754e-02, -5.880e-02, 6.240e-02, -2.534e-01, 3.632e-01, 7.507e-02, 9.621e-02)); + r += mul(s2_2, M4(-1.752e-01, -2.406e-02, -8.135e-02, -7.007e-02, 3.708e-02, -1.097e-01, -2.867e-02, 6.910e-02, -1.798e-01, -2.029e-02, 3.509e-02, -5.026e-02, -1.245e-01, -2.131e-03, -3.901e-02, 2.729e-02)); + r += mul(s2_3, M4(6.957e-02, 5.150e-02, -9.893e-02, -3.423e-02, 1.625e-02, 1.428e-02, 4.128e-03, 2.378e-02, 6.833e-02, 7.371e-02, -4.309e-02, 2.038e-01, -5.441e-02, -1.090e-01, -5.788e-02, 6.229e-02)); + r += mul(s2_4, M4(-2.013e-01, 5.460e-03, -5.024e-02, -4.798e-02, 3.047e-02, -1.250e-01, -2.914e-02, -2.341e-02, 8.051e-02, -1.409e-01, 1.108e-02, -2.132e-01, 1.834e-01, 3.419e-01, -1.062e-02, -5.360e-02)); + r += mul(s2_5, M4(1.434e-01, -1.899e-02, 2.671e-02, -7.373e-02, 7.201e-02, -1.086e-01, -3.585e-02, 1.098e-01, -2.883e-03, 6.731e-02, 1.686e-02, -4.630e-02, -6.464e-02, -1.193e-01, 7.557e-02, 1.030e-02)); + r += mul(s2_6, M4(5.960e-03, 1.333e-01, 2.685e-01, 5.152e-02, -1.077e-01, -5.213e-03, -3.739e-02, 1.658e-02, -1.064e-01, 1.801e-01, -6.249e-02, 2.196e-02, -4.823e-02, 2.505e-03, -3.396e-02, 6.965e-02)); + r += mul(s2_7, M4(3.899e-02, 1.850e-01, -1.271e-01, -9.524e-02, -1.084e-02, -1.428e-02, 8.129e-03, -1.078e-02, -9.281e-02, 1.151e-02, -1.851e-02, 4.171e-02, 6.663e-02, 1.382e-01, 4.955e-02, -9.132e-02)); + r += mul(s2_8, M4(-3.082e-02, -8.253e-02, -3.504e-02, 8.185e-02, -1.313e-02, 2.212e-02, 2.972e-02, 5.000e-02, -1.166e-01, -5.651e-02, -5.432e-02, 4.694e-02, -1.099e-01, 2.039e-02, 2.069e-02, -4.557e-03)); + r += mul(s3_0, M4(-1.585e-02, 1.466e-02, -4.267e-02, -5.614e-02, 1.164e-01, -6.371e-02, -9.176e-02, 7.206e-02, -1.356e-02, 4.092e-02, -5.365e-02, -1.641e-03, 4.063e-02, -6.379e-02, 2.210e-02, 1.436e-03)); + r += mul(s3_1, M4(6.984e-02, -5.214e-02, -4.253e-02, -1.445e-02, 4.849e-02, 8.436e-02, 3.124e-01, -4.697e-01, 1.115e-01, -6.359e-02, -1.039e-01, 7.934e-02, -5.735e-02, -1.229e-01, 9.510e-02, 6.278e-02)); + r += mul(s3_2, M4(1.688e-02, -1.271e-01, -4.236e-03, -6.955e-03, 2.137e-01, 1.715e-01, -1.716e-01, -1.164e-01, 5.225e-02, -3.736e-02, 8.417e-02, -1.413e-02, -2.981e-02, 6.164e-03, -5.309e-02, -7.856e-03)); + r += mul(s3_3, M4(1.087e-01, -1.362e-01, 4.361e-02, -9.546e-02, 3.759e-02, -2.007e-02, 1.285e-02, 3.452e-02, 5.193e-03, 9.202e-02, 4.827e-02, 8.008e-02, 3.363e-02, 1.744e-02, 3.567e-02, -9.209e-02)); + r += mul(s3_4, M4(-1.720e-01, 2.331e-01, 3.583e-02, 1.114e-01, 1.937e-02, 1.706e-01, 9.833e-02, 9.969e-05, -1.900e-01, 8.046e-02, 9.143e-02, -1.431e-01, 1.450e-01, -1.233e-01, 7.880e-02, -3.435e-01)); + r += mul(s3_5, M4(-1.042e-01, 2.382e-02, -2.653e-02, 1.030e-01, 1.121e-01, 9.533e-02, 1.601e-01, -1.002e-01, 1.912e-01, -6.912e-02, -4.182e-02, -7.941e-03, 1.306e-01, 5.380e-02, 6.607e-02, -1.478e-01)); + r += mul(s3_6, M4(-4.080e-02, 4.066e-02, 1.245e-01, 1.445e-02, 7.831e-03, -4.103e-02, 4.853e-02, 2.722e-02, 4.773e-02, 7.711e-02, 5.442e-02, 1.356e-02, 1.513e-02, -1.077e-01, -1.314e-01, 1.031e-01)); + r += mul(s3_7, M4(8.426e-03, 9.401e-02, 2.827e-04, -7.014e-03, -1.776e-02, 2.451e-03, -3.457e-02, -3.021e-02, 1.775e-01, 1.547e-01, -4.098e-02, 8.217e-02, -6.828e-02, -1.023e-01, -1.618e-01, 1.609e-01)); + r += mul(s3_8, M4(-3.647e-02, -7.072e-02, -2.245e-02, 3.874e-02, 1.091e-01, 5.723e-02, 8.320e-02, -5.249e-02, -1.304e-01, -9.974e-03, 5.121e-03, -2.301e-02, 4.467e-02, 1.580e-02, 1.349e-02, -3.634e-02)); + r += mul(s4_0, M4(2.720e-01, 5.417e-02, 6.718e-03, 1.218e-01, -3.485e-02, 4.959e-02, 3.606e-02, 1.036e-01, -2.731e-02, 2.062e-01, -5.221e-02, 1.929e-02, -1.417e-02, -6.537e-02, -7.646e-02, 8.719e-02)); + r += mul(s4_1, M4(-1.510e-01, 1.426e-01, -2.365e-02, 1.612e-01, 6.854e-02, 8.572e-02, -2.559e-03, -2.420e-03, -7.717e-02, -6.347e-02, -2.160e-02, -4.633e-03, -9.307e-02, 1.950e-01, -9.713e-02, -5.982e-02)); + r += mul(s4_2, M4(-6.662e-02, -2.942e-01, -1.199e-01, -1.262e-01, -6.380e-02, -6.602e-02, -6.361e-02, 7.754e-02, 2.637e-02, 2.350e-02, -1.359e-02, 7.084e-03, -1.043e-04, -6.505e-02, -2.926e-02, -6.666e-02)); + r += mul(s4_3, M4(3.742e-02, -8.908e-02, 5.752e-02, -1.911e-01, -8.525e-02, -4.037e-02, 1.399e-01, -1.822e-01, 7.731e-02, -1.682e-01, 1.426e-01, -1.243e-02, 6.935e-02, -1.187e-01, 7.949e-02, -1.088e-01)); + r += mul(s4_4, M4(-6.495e-02, -3.361e-01, -5.254e-01, -1.997e-01, 5.195e-02, 7.322e-03, -8.742e-02, 7.757e-02, -6.494e-02, -4.323e-02, -6.081e-02, 3.559e-02, 1.329e-01, 6.854e-02, 1.363e-02, 2.562e-02)); + r += mul(s4_5, M4(1.904e-01, -5.556e-02, 1.531e-01, -3.999e-01, -8.230e-02, -8.662e-03, -5.523e-03, 1.186e-01, 8.731e-02, 7.370e-02, 9.071e-03, -6.335e-02, 2.061e-02, -1.069e-01, 1.168e-01, -6.441e-02)); + r += mul(s4_6, M4(-2.114e-01, 2.655e-01, -1.276e-01, 1.694e-02, 1.000e-01, -6.448e-03, 5.444e-02, -1.185e-01, -7.768e-02, 6.957e-02, -4.817e-02, -3.406e-02, -2.532e-02, 6.830e-03, 5.735e-02, -6.098e-03)); + r += mul(s4_7, M4(-1.183e-01, 2.141e-01, 3.308e-02, 8.856e-02, 1.164e-01, 7.402e-02, 5.863e-02, 2.378e-02, 8.073e-02, 8.703e-02, 1.127e-01, -3.097e-02, 6.431e-02, 7.934e-02, -1.421e-01, -1.177e-01)); + r += mul(s4_8, M4(1.911e-02, 6.064e-02, -1.190e-01, -1.652e-02, 1.772e-02, -3.202e-02, 3.647e-02, -1.037e-01, -4.549e-02, -8.411e-02, -9.706e-02, 3.851e-02, 6.431e-02, -1.152e-02, -4.240e-02, -1.974e-03)); + r += mul(s5_0, M4(7.443e-02, -6.928e-02, 2.351e-02, 4.930e-02, 1.168e-01, -1.628e-01, -4.279e-02, -1.200e-01, -1.192e-01, -2.831e-02, -1.701e-01, -3.926e-02, -5.642e-03, -3.749e-02, 3.475e-02, -7.582e-03)); + r += mul(s5_1, M4(9.183e-02, 1.007e-02, -4.422e-02, -3.565e-02, -1.265e-01, 4.932e-02, 6.144e-03, -1.169e-01, 1.116e-01, 8.644e-02, 7.402e-02, -7.735e-02, 4.172e-02, 8.963e-02, 5.663e-02, -9.079e-02)); + r += mul(s5_2, M4(1.033e-01, 1.121e-01, -6.600e-02, -7.067e-02, 8.008e-02, 5.074e-03, 2.116e-02, 1.021e-01, -2.078e-01, -3.313e-02, 2.813e-02, 2.715e-01, -7.777e-02, -5.784e-03, 4.669e-02, 1.617e-01)); + r += mul(s5_3, M4(-3.387e-02, 6.943e-04, -3.529e-02, 3.672e-02, 9.172e-03, 1.474e-01, -1.151e-01, 3.213e-01, -6.365e-02, 8.501e-02, 3.243e-02, -1.321e-02, 9.236e-03, -1.537e-02, 8.765e-02, -1.485e-01)); + r += mul(s5_4, M4(-4.213e-02, 4.288e-02, -3.889e-02, -4.191e-02, 9.213e-02, 1.161e-02, 2.505e-02, 6.496e-02, 2.284e-01, 1.543e-02, 1.013e-01, 2.034e-01, -4.164e-02, 1.233e-01, 3.504e-02, 1.310e-01)); + r += mul(s5_5, M4(-5.845e-02, -3.905e-02, 1.690e-01, 9.346e-02, 1.464e-01, -3.299e-02, 7.769e-02, 1.264e-01, -1.575e-01, -5.546e-02, 1.137e-01, -3.536e-01, -1.947e-01, 6.519e-02, -9.272e-02, 2.226e-01)); + r += mul(s5_6, M4(-8.464e-02, -2.263e-02, 3.167e-02, 2.957e-02, 8.858e-02, 3.201e-02, 1.245e-01, 7.283e-02, 1.784e-01, -3.979e-01, -1.130e-01, 2.907e-01, -4.962e-02, 5.083e-02, -3.487e-02, 8.182e-02)); + r += mul(s5_7, M4(5.534e-03, -1.940e-02, 3.034e-02, 4.015e-02, -2.632e-01, -5.820e-02, 2.558e-01, 5.220e-03, 2.039e-01, 1.333e-01, 1.526e-01, -7.479e-03, -2.902e-02, -1.619e-02, -9.738e-02, 1.753e-01)); + r += mul(s5_8, M4(-4.383e-02, 3.273e-02, -6.726e-02, -1.053e-01, -3.610e-02, 5.577e-04, 4.668e-02, 2.124e-01, -1.050e-01, -7.297e-02, 1.163e-01, 6.734e-02, 1.420e-01, 3.221e-02, -5.040e-02, -6.009e-02)); + r += mul(s6_0, M4(-1.058e-01, 3.534e-04, -1.015e-01, -9.649e-02, -4.728e-01, 1.573e-01, -3.659e-01, -2.692e-02, -4.242e-02, 1.645e-01, -6.681e-02, 1.180e-01, 7.109e-02, -1.146e-01, 3.909e-03, -1.175e-01)); + r += mul(s6_1, M4(3.918e-01, -2.804e-01, 5.742e-02, -2.888e-01, -1.907e-01, 3.226e-01, -2.286e-01, 9.371e-02, -1.390e-01, 3.166e-02, -1.320e-01, -1.048e-01, 2.455e-02, 8.532e-02, -3.389e-02, 3.829e-02)); + r += mul(s6_2, M4(-1.743e-01, 5.073e-03, -1.500e-01, 1.373e-01, -9.960e-02, 4.622e-01, -1.019e-01, 2.484e-01, 6.440e-02, 1.303e-01, -4.173e-02, -2.134e-01, -1.767e-03, -2.657e-02, 5.925e-03, -1.008e-01)); + r += mul(s6_3, M4(1.635e-01, 7.175e-02, -4.210e-01, 1.910e-01, 7.190e-02, -1.673e-01, -6.935e-01, 2.881e-01, 5.265e-03, 1.428e-01, -1.461e-02, -2.738e-01, -8.529e-02, 4.118e-02, 3.806e-02, -9.775e-02)); + r += mul(s6_4, M4(-9.109e-02, 5.585e-02, -2.096e-01, 1.308e-01, 2.013e-02, -2.556e-01, -3.225e-01, -3.229e-02, 1.656e-01, 2.901e-02, -5.006e-02, 2.315e-01, 5.263e-02, 5.577e-02, -7.882e-02, 1.656e-01)); + r += mul(s6_5, M4(-4.867e-02, -3.351e-01, -2.222e-01, 2.360e-01, 2.046e-01, -4.787e-02, -2.503e-01, -1.280e-02, -1.055e-01, 1.078e-01, -7.193e-02, 3.649e-02, -1.851e-02, -5.966e-02, 3.070e-02, 6.660e-02)); + r += mul(s6_6, M4(1.316e-02, 2.458e-01, 5.478e-02, -2.485e-01, -1.203e-02, 3.974e-01, -7.347e-02, -2.075e-01, 6.866e-02, 4.869e-02, 4.098e-02, -3.162e-02, 9.583e-02, 1.312e-01, 9.177e-02, 6.787e-02)); + r += mul(s6_7, M4(1.409e-01, 3.337e-02, 2.823e-01, -2.116e-01, 1.947e-01, -2.228e-01, -5.020e-03, -1.945e-01, 1.279e-01, -8.825e-03, 1.868e-02, -1.351e-01, -6.698e-03, -1.001e-01, -1.419e-02, -9.151e-02)); + r += mul(s6_8, M4(-1.715e-01, -1.497e-01, 2.395e-01, 1.945e-01, -1.208e-01, 1.860e-01, -1.754e-01, -1.688e-01, -1.292e-01, 1.353e-01, -1.265e-01, 4.984e-02, -5.171e-02, 1.889e-02, -7.019e-02, 3.984e-02)); + r += mul(s7_0, M4(9.075e-02, 8.094e-02, -2.149e-02, -6.784e-02, 2.235e-02, -5.264e-02, -5.977e-02, 5.019e-02, 1.832e-01, -1.327e-01, -1.141e-01, 1.721e-02, -2.538e-01, -2.081e-01, -2.252e-02, 1.571e-01)); + r += mul(s7_1, M4(1.406e-01, 8.845e-02, 1.564e-01, 8.247e-02, 1.556e-02, 2.105e-02, 6.567e-02, 5.888e-02, -1.152e-01, 6.683e-02, -2.451e-04, -1.430e-02, -2.407e-01, 1.812e-02, 2.030e-01, -6.477e-02)); + r += mul(s7_2, M4(5.620e-02, -5.195e-02, -1.895e-01, 2.763e-02, -2.476e-02, -2.111e-03, -1.099e-02, 1.055e-01, -4.108e-02, -6.049e-03, 7.379e-02, -6.549e-04, 3.281e-02, -2.394e-01, 1.271e-01, -2.240e-01)); + r += mul(s7_3, M4(-9.938e-02, -7.305e-02, -4.676e-02, -6.918e-04, 6.597e-02, 3.119e-03, 8.736e-02, -1.131e-01, 5.605e-02, -5.043e-02, 7.552e-02, 1.419e-01, 2.350e-01, 1.710e-01, -3.276e-03, 5.825e-02)); + r += mul(s7_4, M4(-4.384e-02, 1.668e-01, -7.362e-02, 3.042e-03, -5.063e-02, -9.813e-02, 1.210e-01, -3.284e-02, 9.641e-02, 2.211e-01, -4.330e-03, 1.904e-01, 2.611e-01, 2.184e-01, 4.924e-02, 2.242e-01)); + r += mul(s7_5, M4(6.132e-02, 6.682e-03, 4.634e-02, 6.060e-02, -3.296e-02, 7.170e-02, 5.495e-02, 8.781e-03, -4.459e-02, 1.182e-02, -9.498e-03, -1.233e-01, -3.024e-01, 2.043e-01, -1.788e-01, -2.906e-01)); + r += mul(s7_6, M4(2.928e-02, 3.034e-02, 3.178e-02, -1.419e-03, -3.095e-02, 3.273e-02, 5.712e-02, 7.300e-03, 2.612e-02, -1.759e-02, 7.935e-02, -6.945e-02, -1.108e-01, 1.324e-01, -5.111e-02, 2.983e-01)); + r += mul(s7_7, M4(-1.131e-02, 6.068e-02, -3.666e-02, 9.163e-02, 3.126e-02, -3.859e-02, 6.360e-02, -1.437e-01, 2.207e-02, -6.323e-02, 1.945e-02, -2.169e-02, 1.127e-01, -1.509e-01, -1.471e-01, 2.640e-01)); + r += mul(s7_8, M4(-1.762e-01, -1.563e-01, 3.054e-02, -1.832e-02, 2.611e-02, -1.305e-02, 6.246e-02, 1.010e-02, 1.143e-02, -3.576e-02, -9.686e-02, 1.106e-01, -1.309e-01, -2.044e-01, -2.538e-02, -1.611e-01)); + r += V4(-2.302e-03, 5.607e-03, 2.301e-01, -2.287e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.704e-02, 4.119e-02, -1.107e-02, 1.828e-02, -4.632e-02, 1.098e-01, 4.771e-02, -5.593e-02, -3.240e-02, -7.122e-02, -3.222e-02, -9.674e-03, -6.694e-02, 2.575e-02, 1.674e-02, 1.786e-01)); + r += mul(s0_1, M4(4.567e-02, -1.971e-02, 1.176e-02, 5.100e-02, -3.136e-02, -2.753e-03, -1.019e-02, -1.290e-01, 5.878e-02, 2.891e-02, -1.587e-01, -8.956e-02, -7.898e-02, -3.058e-01, 1.099e-02, 2.315e-02)); + r += mul(s0_2, M4(-1.749e-02, -3.178e-02, -1.223e-02, 1.942e-03, 9.650e-02, 7.544e-02, -9.168e-02, -9.126e-02, -4.168e-02, -4.084e-02, 9.683e-02, -4.930e-02, 2.975e-02, 4.417e-02, 1.359e-01, -2.128e-01)); + r += mul(s0_3, M4(-5.991e-02, -3.668e-02, -1.044e-02, -7.535e-02, 1.101e-01, 1.680e-01, 2.074e-02, 7.483e-02, 1.019e-01, 7.522e-02, 6.974e-02, -1.916e-02, -9.800e-02, -1.716e-01, -1.917e-02, -4.692e-03)); + r += mul(s0_4, M4(1.008e-01, 4.064e-02, -7.615e-02, 1.377e-02, -2.184e-02, 8.471e-02, 2.393e-01, 3.580e-02, -2.858e-01, 1.165e-02, -1.603e-01, 3.959e-02, -3.520e-01, 2.286e-01, 3.095e-01, 8.209e-02)); + r += mul(s0_5, M4(5.105e-02, 9.846e-02, -4.108e-02, 4.437e-02, 1.197e-01, 1.152e-02, 2.312e-01, 9.272e-02, -6.910e-02, 1.006e-02, 6.812e-02, 8.346e-02, -8.388e-02, -9.457e-02, -1.832e-01, -5.835e-02)); + r += mul(s0_6, M4(3.764e-02, -1.213e-01, -4.007e-03, -1.155e-01, -1.117e-01, -1.373e-01, -8.250e-02, 5.700e-02, -5.925e-02, -5.457e-02, -1.250e-01, -2.913e-02, 2.378e-01, 1.483e-01, 4.152e-02, -1.239e-01)); + r += mul(s0_7, M4(-3.888e-02, 7.090e-02, 4.034e-02, 3.863e-02, 2.613e-02, -1.153e-02, -2.250e-01, 1.420e-01, 1.299e-01, -1.318e-01, -7.944e-02, 8.884e-02, -6.052e-02, -6.205e-02, 2.843e-02, 2.440e-01)); + r += mul(s0_8, M4(-6.049e-02, -3.063e-02, 8.144e-02, 1.838e-02, 7.390e-02, -1.663e-02, -3.010e-02, -2.802e-02, -1.919e-01, 3.979e-03, 2.597e-01, 7.772e-02, 7.641e-02, 1.254e-01, 1.572e-01, 5.541e-02)); + r += mul(s1_0, M4(-3.671e-01, 7.588e-02, 6.621e-02, 2.213e-01, 1.526e-01, -5.132e-02, -1.760e-02, -8.277e-02, 2.952e-01, -1.816e-01, 8.647e-03, -1.812e-03, 2.321e-03, 2.707e-02, -7.167e-03, -1.863e-02)); + r += mul(s1_1, M4(-2.608e-02, 1.196e-01, -3.251e-03, -5.070e-02, 8.266e-02, -1.079e-01, -9.725e-02, -1.451e-02, 4.633e-03, 1.263e-01, -9.275e-02, 9.332e-03, 8.173e-02, 9.255e-03, -4.296e-03, 2.027e-02)); + r += mul(s1_2, M4(-1.099e-01, -6.099e-02, -5.077e-02, -5.578e-02, 3.175e-02, -4.658e-02, 2.868e-02, 5.742e-02, 9.943e-02, -6.563e-02, 8.187e-02, 9.414e-03, -4.800e-02, -8.106e-03, 1.858e-02, 1.403e-02)); + r += mul(s1_3, M4(9.989e-02, -8.257e-02, -1.219e-02, -6.931e-02, -6.557e-02, -5.428e-02, -8.126e-02, -9.715e-02, 9.848e-02, -8.316e-02, 7.289e-03, -3.675e-01, 6.269e-03, 3.037e-02, 1.013e-01, -8.937e-02)); + r += mul(s1_4, M4(-2.259e-01, -1.506e-01, -1.612e-01, 6.220e-02, -1.166e-01, 7.580e-02, 4.015e-02, -5.506e-02, 2.777e-01, 1.057e-01, -9.536e-02, 6.809e-02, 9.972e-02, -1.624e-02, -5.486e-02, -6.122e-02)); + r += mul(s1_5, M4(1.593e-01, 1.424e-01, -1.001e-01, 6.205e-02, -1.405e-01, -5.176e-02, 3.637e-02, -1.473e-02, 3.244e-01, 7.078e-02, -1.401e-02, -7.216e-02, -3.488e-03, 2.172e-02, -8.795e-02, 5.266e-02)); + r += mul(s1_6, M4(1.479e-01, -1.625e-01, -3.014e-03, 3.855e-02, 1.537e-01, 4.484e-02, -4.470e-02, 1.050e-01, -1.931e-01, 1.252e-01, -2.244e-02, 1.603e-02, -7.039e-02, -1.379e-01, -7.265e-02, -4.966e-02)); + r += mul(s1_7, M4(-3.219e-01, 2.311e-01, 4.962e-02, 8.471e-02, -1.078e-01, 7.598e-02, -9.423e-02, 8.618e-03, 1.271e-01, 1.630e-01, -9.076e-02, -3.370e-02, 5.630e-02, -4.028e-02, 9.981e-02, 1.128e-01)); + r += mul(s1_8, M4(-3.702e-02, 7.422e-02, 1.541e-01, -1.435e-01, -1.915e-01, 1.921e-02, -6.697e-02, 2.995e-03, 1.786e-01, 1.306e-01, 8.562e-02, 1.303e-01, -1.676e-02, 1.746e-01, 2.462e-02, 2.796e-03)); + r += mul(s2_0, M4(3.366e-01, -4.757e-03, -3.828e-02, -1.766e-01, -8.184e-02, -2.720e-02, 2.907e-02, 1.040e-02, 1.321e-02, -6.691e-04, 1.528e-02, -8.315e-02, 6.216e-02, -4.957e-02, 7.242e-03, -5.866e-02)); + r += mul(s2_1, M4(-5.713e-02, 1.256e-01, -4.166e-02, 9.245e-02, 2.673e-02, 1.048e-01, 8.198e-02, -2.185e-01, 7.978e-02, 2.214e-02, 3.528e-02, -1.856e-02, 9.841e-02, -5.330e-02, 1.050e-01, 4.323e-02)); + r += mul(s2_2, M4(9.369e-02, -1.272e-01, 7.039e-02, 4.077e-02, -5.287e-02, 3.679e-02, -2.594e-01, 9.411e-02, -9.286e-02, -4.824e-02, 3.952e-02, 5.785e-02, 9.744e-02, 1.937e-02, 7.451e-02, 8.110e-02)); + r += mul(s2_3, M4(-2.094e-02, 3.822e-02, 3.571e-03, 7.506e-02, 1.505e-01, 3.495e-02, 2.335e-02, 1.998e-02, 1.719e-01, 3.476e-02, 2.892e-02, -9.099e-02, 1.641e-01, -1.109e-01, -7.744e-02, 1.493e-01)); + r += mul(s2_4, M4(-1.261e-01, 9.058e-02, -1.296e-01, 6.449e-02, 8.341e-02, 9.417e-02, 1.340e-01, -1.847e-01, -2.666e-02, -2.729e-02, 5.653e-03, 1.913e-02, 2.631e-01, -2.436e-01, 2.820e-01, 2.498e-01)); + r += mul(s2_5, M4(-3.645e-02, -9.327e-02, 1.228e-01, -1.038e-01, 9.788e-02, -4.783e-02, -2.781e-01, -1.842e-01, -8.214e-02, -5.840e-02, 1.142e-01, -7.196e-02, -2.648e-03, 7.155e-02, -1.845e-01, 1.416e-01)); + r += mul(s2_6, M4(1.153e-01, -3.824e-03, -8.120e-03, 4.746e-02, 7.938e-02, -9.856e-02, 3.290e-03, -2.859e-02, 1.614e-02, 6.969e-02, 1.000e-03, 2.190e-01, 4.881e-02, -2.336e-02, 1.215e-02, 1.337e-01)); + r += mul(s2_7, M4(-2.900e-01, -1.165e-01, 1.287e-01, -3.523e-02, -7.180e-02, -3.137e-02, -1.323e-02, -1.565e-02, -3.780e-02, 2.262e-01, 7.472e-02, 1.192e-01, -1.169e-01, 1.284e-01, 1.988e-01, 6.072e-02)); + r += mul(s2_8, M4(-2.491e-01, -1.996e-01, -9.671e-02, -1.332e-01, -5.703e-04, -1.790e-02, -1.025e-01, 2.045e-02, -3.150e-03, -9.094e-03, -2.392e-01, 4.622e-02, 9.624e-03, -1.643e-02, 1.445e-01, 4.723e-02)); + r += mul(s3_0, M4(-3.168e-02, -8.366e-02, -2.659e-02, 3.702e-02, -8.294e-02, 1.381e-02, 5.830e-02, 1.874e-01, 7.915e-02, -3.209e-02, 5.806e-04, -2.349e-02, 6.262e-03, 2.250e-02, -8.750e-03, 2.664e-02)); + r += mul(s3_1, M4(-4.382e-02, -4.891e-02, 2.312e-02, 8.793e-02, 1.782e-01, -1.829e-01, 1.088e-01, 9.672e-02, 5.949e-02, -1.157e-02, -2.554e-05, 6.398e-02, -6.874e-02, -9.942e-02, 1.337e-02, 6.515e-02)); + r += mul(s3_2, M4(-4.311e-02, -3.124e-02, 7.939e-02, -1.573e-02, -3.802e-04, -1.142e-01, 4.008e-02, 2.125e-01, 6.733e-02, 9.101e-03, 1.485e-02, -2.233e-02, -4.651e-02, 3.218e-02, 6.055e-02, -3.819e-02)); + r += mul(s3_3, M4(-1.060e-01, -1.063e-01, -4.364e-03, 3.000e-02, -2.110e-01, -1.171e-01, 2.608e-02, 1.687e-01, -1.311e-01, 8.075e-02, -1.713e-02, 5.390e-02, -2.806e-02, 3.210e-02, -8.352e-02, 1.346e-01)); + r += mul(s3_4, M4(1.472e-02, 8.261e-02, -1.026e-01, -3.850e-02, -7.406e-02, 2.109e-01, 1.440e-01, 2.049e-01, 6.016e-03, -4.091e-02, -2.023e-02, 4.947e-02, -8.169e-02, -1.949e-01, 3.551e-01, 1.635e-01)); + r += mul(s3_5, M4(1.951e-04, 5.917e-02, 1.018e-01, -3.967e-02, -1.626e-01, 3.372e-02, 4.666e-02, 1.537e-01, 2.234e-01, 1.110e-01, 1.100e-01, -9.203e-02, -2.231e-02, -4.048e-02, -1.615e-01, 2.928e-03)); + r += mul(s3_6, M4(1.628e-01, 1.837e-02, -7.830e-03, 1.189e-01, 4.063e-02, 5.098e-02, -2.359e-02, 1.483e-02, -1.524e-02, -4.939e-02, -1.179e-01, 1.755e-01, -6.125e-02, 5.565e-02, 6.117e-02, 1.694e-02)); + r += mul(s3_7, M4(1.042e-01, 6.966e-02, 1.168e-01, -5.148e-02, 9.320e-03, 1.017e-01, -3.727e-03, -1.017e-02, 2.702e-02, 3.255e-02, -1.465e-01, 2.200e-01, -3.505e-03, 2.000e-01, 2.568e-01, -2.003e-01)); + r += mul(s3_8, M4(1.459e-01, -2.493e-02, -1.126e-01, -1.174e-01, 3.386e-02, 1.020e-01, -1.687e-01, -2.750e-02, 4.996e-02, 1.166e-01, -2.836e-01, 6.469e-03, -2.094e-02, 6.162e-02, 1.218e-01, -3.918e-02)); + r += mul(s4_0, M4(-5.989e-02, -1.275e-01, -1.069e-02, -1.562e-01, 1.509e-02, -3.651e-02, -2.489e-02, 5.102e-02, -9.109e-03, 8.227e-02, 2.166e-02, 2.914e-02, 1.190e-01, 3.730e-02, 2.797e-02, -7.265e-02)); + r += mul(s4_1, M4(-1.879e-01, 1.295e-01, 1.702e-01, 9.584e-02, -1.735e-02, 7.957e-02, 5.189e-02, 6.472e-02, -8.307e-02, 6.680e-02, 3.311e-02, 4.058e-02, -1.224e-02, 4.445e-02, -7.185e-02, -8.199e-02)); + r += mul(s4_2, M4(5.254e-01, -3.634e-01, 1.662e-01, -2.114e-01, 1.411e-01, -2.478e-02, -5.932e-02, 3.418e-02, -1.432e-02, -3.958e-02, 1.435e-02, -8.904e-03, 8.548e-02, -5.572e-03, -2.835e-02, 6.525e-02)); + r += mul(s4_3, M4(-1.715e-01, 7.230e-02, 6.736e-02, 1.704e-01, 7.206e-02, 2.094e-02, -2.504e-02, -1.880e-02, -5.422e-02, -1.249e-02, 1.255e-02, 7.395e-02, -1.611e-02, -2.501e-02, -2.455e-02, -5.480e-02)); + r += mul(s4_4, M4(-1.826e-01, 3.723e-01, 1.412e-02, 5.877e-02, 4.934e-03, -3.280e-02, 5.855e-02, 6.245e-02, 1.276e-01, -9.475e-03, 3.968e-02, -1.804e-01, 4.664e-02, -1.505e-02, -1.136e-01, 6.298e-02)); + r += mul(s4_5, M4(3.330e-01, -3.862e-01, 1.600e-01, 6.900e-02, 2.724e-02, 1.359e-01, -2.742e-02, -9.879e-03, -1.471e-02, 4.607e-02, -4.352e-02, -3.443e-02, -1.949e-01, 7.837e-02, -2.380e-01, -1.305e-01)); + r += mul(s4_6, M4(5.690e-01, -7.987e-02, 7.209e-02, -1.874e-01, 9.314e-02, -5.132e-02, 3.539e-02, -7.483e-02, -5.569e-02, 5.700e-03, -3.336e-02, 2.188e-02, -2.922e-02, -3.125e-02, 1.676e-02, -1.950e-02)); + r += mul(s4_7, M4(-3.250e-01, -1.844e-01, 7.516e-02, 3.934e-02, -2.323e-01, 2.582e-02, 1.669e-01, 6.127e-02, 1.601e-02, -1.399e-01, -8.601e-02, 2.964e-02, -6.863e-02, 2.621e-02, 8.180e-02, -2.546e-02)); + r += mul(s4_8, M4(-2.926e-01, -1.685e-01, 1.073e-01, -1.693e-01, 2.371e-02, -7.114e-03, -1.195e-01, -6.144e-02, -1.576e-03, 1.150e-03, 4.983e-02, 2.978e-02, 4.129e-02, -8.080e-02, -1.987e-01, -5.699e-02)); + r += mul(s5_0, M4(-5.547e-02, 5.564e-02, -3.508e-02, 6.241e-02, 9.668e-03, 1.678e-01, 1.267e-02, 1.256e-01, 1.004e-01, -2.921e-02, 8.177e-02, 1.503e-02, -5.694e-02, 2.443e-02, -1.275e-03, 4.726e-02)); + r += mul(s5_1, M4(1.197e-01, -5.710e-02, 4.705e-02, -5.166e-02, 1.163e-05, 5.874e-02, 3.331e-02, -6.039e-02, -4.023e-02, -4.927e-02, -1.532e-02, -4.419e-02, -1.910e-01, 6.584e-02, -1.220e-01, 1.078e-02)); + r += mul(s5_2, M4(-1.161e-01, -2.322e-02, -6.482e-02, -2.701e-02, -3.645e-02, 3.033e-03, -4.483e-02, 1.678e-02, -2.286e-01, -1.095e-02, 1.366e-02, 6.744e-02, 5.017e-02, 1.416e-02, -7.673e-03, -6.489e-02)); + r += mul(s5_3, M4(-8.640e-02, 1.609e-02, 2.547e-02, 3.356e-02, 1.516e-01, 7.014e-02, -3.872e-02, -1.409e-01, 2.643e-01, -2.685e-01, -3.616e-02, 8.870e-02, 1.947e-01, -1.644e-01, -7.013e-02, 1.115e-03)); + r += mul(s5_4, M4(-1.069e-01, -5.233e-02, -9.943e-02, -1.125e-01, -6.419e-02, -1.226e-02, 1.305e-02, 2.079e-01, -2.032e-01, 2.021e-01, 1.009e-01, -6.994e-02, 9.324e-02, 3.447e-02, -5.153e-02, 1.470e-01)); + r += mul(s5_5, M4(5.229e-02, 1.494e-02, 8.839e-02, 3.341e-02, -1.026e-01, 1.777e-02, -1.833e-02, -1.176e-03, 2.667e-01, 5.999e-02, -8.439e-02, -1.041e-01, 9.569e-02, 9.370e-02, -1.087e-01, -1.321e-03)); + r += mul(s5_6, M4(8.497e-02, -5.761e-03, 5.910e-02, -4.775e-02, -3.771e-02, 2.980e-01, -7.506e-02, -1.460e-01, -7.385e-03, 2.882e-02, -5.731e-02, 2.208e-01, 1.702e-02, 5.164e-02, -3.545e-02, 2.515e-02)); + r += mul(s5_7, M4(-2.783e-02, -8.353e-02, -2.801e-02, 2.072e-03, 8.138e-02, 2.568e-01, -3.335e-01, 1.420e-02, 8.183e-02, 1.000e-01, -9.473e-02, -1.010e-01, -1.686e-01, 6.347e-02, 1.110e-01, -7.072e-02)); + r += mul(s5_8, M4(1.209e-01, 4.431e-03, 7.417e-02, 5.090e-02, 9.886e-03, -2.089e-02, -2.449e-01, -1.106e-03, 1.540e-01, 1.048e-01, 8.591e-02, 1.617e-01, -8.105e-02, -4.884e-02, -1.619e-01, -1.208e-01)); + r += mul(s6_0, M4(1.547e-01, -1.760e-01, -4.262e-03, -7.136e-03, 1.178e-02, 4.044e-02, -5.617e-02, -2.178e-01, 1.054e-02, -8.355e-02, -5.946e-02, -3.096e-01, 5.723e-02, 4.477e-02, 6.218e-02, -7.704e-02)); + r += mul(s6_1, M4(1.116e-01, 2.293e-01, 5.531e-02, -2.665e-02, -6.177e-04, 7.252e-02, 1.280e-02, 3.330e-01, 8.625e-02, -3.961e-02, -6.907e-03, -6.870e-02, -3.605e-02, -5.864e-02, -7.659e-02, 1.959e-02)); + r += mul(s6_2, M4(1.637e-01, 1.836e-01, 1.450e-01, -8.673e-02, -2.056e-01, -2.885e-01, -4.169e-02, -1.281e-01, -1.098e-02, -7.054e-03, 3.404e-02, -1.208e-01, 4.963e-02, -7.820e-02, 2.431e-03, -3.848e-02)); + r += mul(s6_3, M4(-1.303e-01, -2.074e-02, 1.467e-03, -6.021e-01, -3.079e-01, -6.886e-02, 1.388e-01, 4.609e-01, 7.498e-02, -1.014e-01, -9.830e-02, -9.333e-03, -8.884e-03, 1.278e-02, -1.449e-02, -3.219e-02)); + r += mul(s6_4, M4(-1.143e-01, 1.794e-02, 2.329e-01, -1.630e-02, 8.577e-02, -1.221e-01, -7.285e-02, 2.222e-01, -1.776e-01, -2.464e-01, 2.885e-01, 8.662e-02, 8.845e-02, 1.274e-01, -2.733e-02, 5.147e-02)); + r += mul(s6_5, M4(3.759e-01, 2.169e-01, -3.161e-02, 5.758e-02, 2.007e-01, -2.794e-02, -3.164e-02, 1.858e-02, -2.600e-02, -3.249e-02, -1.475e-01, 3.748e-02, 2.038e-02, 1.406e-01, -8.668e-02, 4.246e-02)); + r += mul(s6_6, M4(-1.611e-01, 1.157e-01, 2.592e-02, 1.471e-01, -2.053e-01, 2.816e-01, 4.534e-02, -6.995e-02, -4.584e-02, 1.075e-01, 5.757e-03, 1.091e-01, 2.551e-02, -9.300e-03, -2.473e-02, 4.925e-02)); + r += mul(s6_7, M4(-2.261e-01, 9.102e-02, 2.608e-02, 3.204e-01, 3.703e-01, -1.067e-01, -2.237e-02, -5.974e-02, -1.959e-02, 1.104e-01, -1.742e-01, -5.755e-02, -2.644e-02, -4.180e-02, 1.236e-01, -5.114e-02)); + r += mul(s6_8, M4(3.876e-01, 4.009e-01, 2.010e-01, -2.782e-02, -1.762e-01, 1.997e-01, 4.941e-02, -1.995e-01, -8.606e-02, -1.616e-02, 3.457e-02, 1.450e-01, -1.172e-01, 1.029e-02, 2.820e-02, -1.025e-02)); + r += mul(s7_0, M4(1.647e-02, -5.545e-02, -2.490e-02, -1.293e-02, -7.596e-02, -8.660e-03, -2.008e-02, 3.545e-02, -5.051e-02, -3.824e-02, 1.400e-02, 1.164e-02, -4.095e-02, -1.023e-01, 1.365e-01, 9.583e-02)); + r += mul(s7_1, M4(1.787e-02, -4.811e-02, -3.144e-02, -6.334e-02, -7.119e-03, 4.822e-02, -1.003e-01, 9.954e-02, 1.284e-01, -4.100e-02, 1.033e-01, -6.504e-02, 9.758e-02, -3.433e-01, -1.493e-02, 1.551e-01)); + r += mul(s7_2, M4(-6.225e-02, -9.759e-02, -1.172e-01, -5.663e-02, -4.019e-02, -2.128e-02, -2.677e-02, 2.618e-02, -3.027e-02, 3.373e-02, -9.503e-02, -1.677e-02, -2.570e-01, 1.408e-01, 7.404e-03, -6.859e-03)); + r += mul(s7_3, M4(1.006e-01, -5.592e-02, 6.538e-02, -6.578e-02, -2.373e-02, -1.070e-01, 6.332e-02, -4.559e-02, 1.217e-01, 9.091e-02, 1.280e-01, 1.921e-02, 1.554e-01, -1.313e-01, -5.379e-02, -1.016e-01)); + r += mul(s7_4, M4(2.410e-02, -4.736e-02, 2.086e-01, -9.212e-02, -9.352e-03, 6.391e-02, -1.045e-01, 5.797e-02, 6.874e-02, 4.194e-02, 3.504e-01, 1.034e-02, 6.150e-01, -2.923e-01, 1.252e-01, 1.000e-01)); + r += mul(s7_5, M4(-5.019e-02, 6.172e-02, 2.991e-02, -8.221e-02, 1.669e-02, -1.289e-01, -6.205e-02, -1.288e-02, 2.764e-03, 6.436e-02, -4.164e-02, -1.640e-02, 9.159e-02, -1.842e-01, 1.880e-01, 1.557e-01)); + r += mul(s7_6, M4(7.682e-03, -5.789e-02, -2.496e-02, -2.562e-02, -8.123e-02, -1.616e-04, 4.920e-02, 6.997e-02, -1.362e-01, -7.653e-02, -1.092e-01, -2.418e-02, -2.005e-01, -1.490e-01, -1.786e-02, 8.711e-02)); + r += mul(s7_7, M4(2.645e-02, -3.008e-03, -8.682e-03, -9.011e-02, 1.142e-01, 5.017e-02, 5.841e-02, -3.095e-02, 7.273e-02, 4.477e-02, -1.209e-01, -1.854e-01, -3.026e-01, -1.674e-01, 1.946e-01, 9.873e-02)); + r += mul(s7_8, M4(2.496e-03, 8.130e-02, -3.001e-02, -1.115e-02, 3.656e-03, 2.615e-01, 8.014e-02, -4.353e-02, 3.521e-02, 1.113e-02, 1.758e-01, 1.082e-01, -9.329e-02, 2.969e-01, 2.252e-02, -8.666e-02)); + r += V4(7.159e-03, -3.758e-03, 2.665e-02, -4.427e-04); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-4.947e-02, 8.051e-02, -2.570e-02, 1.068e-02, -1.013e-01, -1.273e-01, 4.331e-02, -9.162e-03, -7.351e-02, 4.432e-02, -9.921e-02, 1.293e-02, -3.328e-01, -1.960e-01, 1.701e-01, -6.579e-02)); + r += mul(s0_1, M4(2.520e-02, -7.575e-03, -5.535e-03, -9.261e-03, -3.295e-02, 4.939e-02, 4.853e-02, 4.567e-02, 1.335e-01, 8.591e-02, 9.288e-03, -6.721e-02, -5.151e-03, -3.211e-02, 3.165e-01, 1.095e-01)); + r += mul(s0_2, M4(-5.395e-02, -3.941e-02, 7.402e-02, -8.885e-03, 1.486e-02, 8.849e-02, -4.877e-02, -1.498e-03, -1.978e-02, -2.693e-02, 8.446e-02, -2.550e-02, 3.174e-01, -8.859e-02, -9.404e-02, -1.002e-01)); + r += mul(s0_3, M4(2.095e-02, 3.997e-02, 8.597e-02, -3.698e-02, 1.128e-01, -1.407e-01, -7.529e-02, 1.333e-01, 8.401e-03, -5.745e-02, -9.140e-02, -1.229e-02, -2.379e-01, 4.148e-02, 2.596e-01, -9.078e-02)); + r += mul(s0_4, M4(8.500e-03, -1.577e-02, 8.539e-03, 4.712e-02, -2.357e-03, -1.641e-02, 9.686e-02, -7.687e-02, 5.286e-02, 3.013e-02, 1.254e-01, 7.323e-02, -1.755e-01, -1.398e-01, 4.896e-01, 3.644e-02)); + r += mul(s0_5, M4(-4.679e-02, 1.382e-01, 3.106e-03, 7.874e-03, -3.112e-03, 6.567e-02, -9.367e-03, -7.449e-02, -1.115e-01, 8.487e-02, 1.757e-02, 1.521e-02, 1.222e-01, -4.063e-02, -1.877e-02, 1.400e-01)); + r += mul(s0_6, M4(-6.195e-03, -8.237e-02, -8.502e-02, 1.466e-02, -1.581e-01, 1.419e-01, -1.118e-01, -2.992e-03, -4.297e-02, -7.925e-02, -1.709e-01, -8.587e-02, 1.534e-01, -1.336e-01, 6.284e-02, -2.807e-02)); + r += mul(s0_7, M4(4.441e-02, 8.600e-03, 1.872e-02, -2.364e-02, 1.109e-01, -6.685e-02, 3.983e-02, -1.081e-01, 2.319e-02, 1.708e-02, 7.053e-02, 6.897e-02, 2.854e-01, -3.227e-04, -2.221e-01, 3.208e-02)); + r += mul(s0_8, M4(3.877e-02, -5.845e-02, -4.021e-02, 5.879e-03, 1.469e-02, 4.745e-02, 4.817e-02, 9.870e-02, -4.810e-02, -1.029e-02, 2.124e-01, 4.661e-02, 5.580e-02, 2.815e-01, -3.902e-02, 7.007e-02)); + r += mul(s1_0, M4(1.851e-01, 1.982e-01, -8.419e-02, -6.920e-02, 3.217e-02, -1.721e-02, -1.412e-01, 3.260e-02, 1.819e-03, -1.164e-01, 9.689e-02, 7.218e-02, -6.876e-02, -6.621e-03, 4.684e-02, 3.613e-02)); + r += mul(s1_1, M4(1.213e-01, -5.728e-02, 6.839e-02, -6.103e-02, 5.075e-02, 3.953e-02, 1.449e-02, -1.030e-01, 3.124e-03, -8.353e-02, 1.924e-01, -4.414e-02, 8.485e-03, 6.598e-03, -8.983e-02, 1.859e-02)); + r += mul(s1_2, M4(-1.130e-01, -5.227e-03, 1.393e-01, 2.985e-02, -6.197e-02, -1.564e-01, 1.609e-02, 9.519e-02, 1.025e-01, 7.023e-02, -4.166e-02, 6.305e-02, 3.051e-02, 1.308e-02, 7.536e-03, -7.300e-02)); + r += mul(s1_3, M4(-1.041e-01, -7.686e-02, -3.057e-01, 1.432e-02, -1.866e-01, 2.454e-02, -1.028e-01, 2.281e-01, 8.083e-02, -8.418e-02, -1.414e-01, 3.425e-02, -3.559e-03, -7.205e-02, -3.959e-02, -3.331e-02)); + r += mul(s1_4, M4(7.435e-02, -2.117e-01, -1.796e-02, 1.073e-01, -1.093e-01, -1.060e-02, 9.349e-02, -1.212e-01, 9.875e-02, -1.692e-01, 1.687e-02, 1.410e-02, 2.623e-02, -1.389e-02, -5.446e-02, 2.347e-03)); + r += mul(s1_5, M4(-9.770e-02, 1.584e-01, 5.546e-02, 8.240e-02, -3.345e-03, 7.296e-03, -2.089e-02, -1.144e-01, -1.143e-01, -1.234e-01, -2.419e-01, 3.390e-02, -3.451e-02, 7.053e-02, 5.652e-02, 3.383e-03)); + r += mul(s1_6, M4(2.227e-01, -2.378e-01, -7.366e-03, 1.727e-01, 3.661e-03, 7.563e-02, 1.158e-01, 4.744e-02, 1.571e-01, -7.862e-02, 6.584e-02, 6.647e-02, 3.937e-02, 3.059e-02, 7.411e-02, -5.458e-02)); + r += mul(s1_7, M4(1.126e-01, -1.318e-01, 6.760e-02, -7.502e-02, -8.506e-02, 6.361e-02, -1.131e-01, -3.764e-02, -9.932e-02, 1.804e-02, -1.972e-01, -1.371e-01, -5.165e-03, -1.804e-02, -5.013e-02, 2.435e-02)); + r += mul(s1_8, M4(-1.332e-01, -1.041e-01, 4.485e-02, -1.181e-01, -2.204e-02, -6.541e-03, -9.146e-02, 4.346e-02, 6.873e-02, 1.732e-01, -1.697e-01, 1.764e-02, -2.447e-02, -3.191e-02, 1.106e-02, 6.543e-02)); + r += mul(s2_0, M4(-7.285e-02, -8.081e-02, -2.882e-02, 5.116e-02, -9.595e-02, -3.134e-02, -5.852e-03, 4.262e-02, 2.221e-02, -7.012e-02, 6.421e-02, -5.627e-02, 1.563e-02, -1.101e-01, -3.699e-02, -5.017e-03)); + r += mul(s2_1, M4(-1.895e-02, -1.349e-01, 4.072e-02, -8.592e-02, 9.225e-02, 5.737e-02, -3.620e-01, -6.333e-02, -1.187e-02, -5.571e-02, 6.266e-02, -1.954e-02, -9.780e-02, -9.125e-03, 2.080e-02, -7.937e-02)); + r += mul(s2_2, M4(-3.584e-02, -2.629e-02, -1.013e-02, -1.518e-02, -1.020e-01, 2.964e-02, -2.759e-02, -7.744e-02, 3.073e-02, 7.335e-02, 2.058e-02, 8.198e-03, 3.367e-02, -2.003e-01, 1.013e-01, 4.883e-03)); + r += mul(s2_3, M4(6.493e-02, -1.854e-01, -3.271e-02, -1.926e-03, -3.097e-02, 1.588e-02, 1.135e-01, 8.857e-02, 3.956e-02, -5.410e-02, 7.366e-02, 2.646e-01, -2.577e-01, -8.662e-02, 1.704e-01, 6.882e-03)); + r += mul(s2_4, M4(-8.632e-02, 5.495e-02, -1.114e-01, -7.373e-02, -1.587e-01, -5.582e-02, 4.193e-02, -5.838e-03, -1.714e-01, 7.996e-02, 1.193e-01, 1.880e-01, 6.001e-02, -2.543e-02, -5.180e-02, -1.304e-01)); + r += mul(s2_5, M4(-4.703e-02, -7.633e-02, -4.091e-02, 7.437e-02, -3.345e-02, -6.761e-02, -3.374e-02, -1.427e-01, 1.371e-01, 1.601e-01, 4.610e-02, 1.685e-02, 1.720e-01, -2.320e-01, -7.731e-02, 3.362e-03)); + r += mul(s2_6, M4(-4.414e-02, 2.003e-02, -1.657e-01, -1.797e-01, 3.099e-02, -7.790e-03, -2.465e-03, 7.397e-02, -4.738e-02, 3.872e-02, -4.779e-03, -1.760e-01, 5.178e-02, 1.254e-01, 4.062e-02, -1.959e-02)); + r += mul(s2_7, M4(7.194e-03, 9.460e-02, 1.942e-02, -1.386e-01, -3.507e-02, 9.574e-02, -4.142e-02, 5.273e-02, 1.645e-01, 2.898e-02, -1.497e-02, -9.664e-02, 7.539e-02, 4.883e-02, -3.846e-02, 2.114e-02)); + r += mul(s2_8, M4(-2.434e-02, 7.672e-02, -2.805e-02, 7.755e-02, -3.408e-02, -4.293e-02, 5.407e-02, 3.070e-02, -6.660e-02, -4.597e-02, 1.648e-01, -2.008e-02, -2.521e-02, -1.408e-01, -4.347e-02, -9.289e-03)); + r += mul(s3_0, M4(1.012e-01, 6.055e-03, 4.149e-02, 7.759e-02, 1.419e-01, -1.508e-01, 2.115e-03, -1.247e-01, 3.161e-02, 5.292e-02, -9.686e-02, -3.136e-02, 1.120e-01, -4.613e-02, 7.987e-02, 2.106e-02)); + r += mul(s3_1, M4(2.114e-01, 1.876e-02, -1.502e-02, -6.683e-02, 3.650e-02, 4.148e-02, 1.993e-01, 1.808e-01, -3.406e-02, -9.241e-02, -2.440e-02, 6.059e-02, -1.536e-01, 5.586e-02, -2.191e-02, -1.032e-01)); + r += mul(s3_2, M4(3.923e-02, -1.849e-02, 2.949e-02, 2.600e-02, -2.316e-01, 7.968e-02, -5.900e-02, -9.448e-02, -6.059e-02, 1.785e-02, 3.615e-02, -1.606e-02, -5.339e-02, 4.620e-03, -2.418e-03, 1.238e-03)); + r += mul(s3_3, M4(7.162e-02, -8.434e-03, 3.068e-02, -3.827e-02, -3.382e-02, 6.050e-02, 5.946e-02, -1.926e-02, 2.248e-02, 7.102e-02, -1.229e-01, 5.999e-02, -5.849e-02, 5.146e-02, 8.067e-02, 2.308e-01)); + r += mul(s3_4, M4(-1.226e-01, 2.114e-02, -5.832e-02, 6.470e-02, 1.614e-01, 1.545e-02, 2.034e-01, -9.283e-02, -1.988e-01, 7.386e-02, 1.004e-01, 7.819e-02, 3.907e-02, -1.181e-02, -5.301e-03, -2.760e-01)); + r += mul(s3_5, M4(9.548e-02, 1.890e-01, 4.343e-02, -5.520e-02, -8.640e-02, -2.114e-01, 1.054e-01, 2.162e-02, 4.599e-02, 9.750e-02, 4.538e-02, 1.413e-02, 3.427e-02, -1.288e-02, 1.115e-01, 2.414e-02)); + r += mul(s3_6, M4(-1.637e-01, -6.740e-02, 2.678e-02, 4.619e-02, -5.243e-02, -2.195e-02, -1.711e-01, 1.221e-01, -1.425e-02, -7.919e-02, 2.556e-03, -5.645e-02, -4.940e-02, 1.180e-01, 1.081e-01, 1.125e-01)); + r += mul(s3_7, M4(-8.975e-03, -8.873e-02, 1.425e-03, -3.873e-02, 1.571e-02, -2.780e-02, 3.422e-03, 8.513e-02, 3.560e-02, -2.184e-01, 1.488e-02, 3.050e-02, 6.037e-03, -1.835e-01, 2.810e-02, -8.370e-02)); + r += mul(s3_8, M4(1.999e-03, -5.704e-02, -1.171e-02, 2.724e-02, -9.886e-02, -1.501e-01, 3.304e-02, 8.635e-02, -5.235e-02, -1.000e-01, -1.874e-02, -2.803e-02, -6.573e-02, 9.588e-03, 1.990e-03, -1.261e-03)); + r += mul(s4_0, M4(3.205e-02, -9.399e-02, -6.611e-02, -9.457e-03, -6.284e-02, -1.119e-01, 3.832e-02, 5.318e-02, 9.238e-03, -7.695e-02, -9.238e-03, -4.017e-02, -2.402e-02, 8.154e-03, -1.508e-02, 1.066e-02)); + r += mul(s4_1, M4(-1.332e-01, -1.601e-02, 2.876e-03, -8.483e-02, -4.600e-02, 8.589e-02, 3.150e-02, 6.373e-02, -3.873e-02, -2.948e-02, 2.025e-02, 3.242e-02, -2.013e-02, -8.295e-02, -4.246e-02, -2.076e-02)); + r += mul(s4_2, M4(4.445e-01, 2.664e-03, 7.754e-02, 6.927e-02, -3.116e-02, 3.909e-02, -1.533e-01, -8.617e-02, 1.688e-01, 5.947e-02, 7.590e-02, 2.037e-04, 1.257e-01, -5.657e-02, 8.065e-02, -8.020e-02)); + r += mul(s4_3, M4(-1.600e-01, -2.895e-01, 6.957e-02, -2.824e-01, -3.329e-02, 3.042e-02, 1.389e-01, 1.333e-01, -8.765e-03, 7.704e-02, -3.357e-02, 7.192e-02, 1.768e-01, 1.249e-02, 7.956e-02, -4.813e-02)); + r += mul(s4_4, M4(-3.023e-01, 2.612e-01, 7.912e-02, -1.408e-01, -1.839e-01, 5.725e-03, -9.410e-03, -8.858e-02, 2.097e-02, -1.919e-02, -1.433e-02, -6.510e-02, 8.973e-02, 1.642e-02, -3.251e-01, -1.458e-01)); + r += mul(s4_5, M4(3.546e-02, -5.626e-02, 4.315e-02, 6.790e-02, -1.167e-01, 9.421e-04, -5.773e-02, -9.024e-02, -9.923e-02, -4.666e-02, 1.084e-01, -2.120e-02, 4.064e-02, -1.441e-01, -6.815e-02, 2.756e-02)); + r += mul(s4_6, M4(-9.100e-02, -3.818e-01, -4.212e-02, -1.122e-01, 4.123e-02, -8.688e-02, 8.607e-02, 1.704e-01, -4.244e-03, 4.439e-03, -8.617e-02, -2.501e-03, 7.886e-02, -8.174e-03, 1.000e-01, 6.876e-02)); + r += mul(s4_7, M4(1.803e-01, 3.233e-01, 2.117e-01, -1.421e-01, 1.628e-01, 4.659e-02, 3.521e-02, -8.703e-02, 1.315e-02, 5.076e-02, -3.939e-02, 3.778e-02, -1.133e-01, 1.274e-01, 2.083e-02, 8.438e-02)); + r += mul(s4_8, M4(2.643e-02, -1.661e-01, 1.507e-01, -1.802e-01, 1.450e-01, -1.900e-03, 8.990e-03, 5.479e-02, -6.823e-02, -3.225e-02, -4.038e-02, -1.074e-02, -5.143e-02, 4.436e-02, -8.175e-02, 9.150e-02)); + r += mul(s5_0, M4(-6.334e-02, -9.309e-02, -3.135e-02, -3.076e-02, -8.207e-02, -3.958e-02, -5.049e-02, 1.179e-01, -8.811e-02, -8.125e-03, 4.048e-02, 8.021e-02, -1.135e-01, 4.696e-03, -6.265e-02, 3.116e-02)); + r += mul(s5_1, M4(1.649e-01, 3.319e-02, 3.213e-02, 3.601e-02, 2.087e-01, 2.173e-01, -9.656e-02, 4.548e-02, -7.523e-02, -1.476e-01, -3.387e-02, -1.510e-01, -4.976e-02, 1.161e-01, 2.292e-01, -4.014e-02)); + r += mul(s5_2, M4(9.400e-02, 6.971e-02, -7.026e-02, 6.791e-02, -5.204e-02, 3.833e-03, -6.243e-02, -2.815e-02, -1.347e-01, 8.703e-02, -2.383e-01, -2.568e-02, 3.358e-02, 2.220e-02, -5.739e-02, -1.125e-01)); + r += mul(s5_3, M4(1.075e-01, 3.235e-02, -3.910e-02, 1.644e-02, 1.422e-01, 1.588e-01, -5.108e-01, -6.150e-02, 1.071e-01, -2.141e-01, -1.436e-01, 3.852e-02, -2.299e-01, 1.624e-03, 1.762e-01, -2.059e-01)); + r += mul(s5_4, M4(-2.080e-01, 2.677e-02, 6.496e-02, -1.136e-01, -9.508e-02, -5.801e-02, 3.331e-03, -1.121e-01, -2.301e-03, -8.857e-02, -5.760e-02, 3.679e-02, 6.023e-02, -2.398e-01, 3.457e-02, 1.996e-01)); + r += mul(s5_5, M4(-1.309e-02, -4.267e-02, -6.198e-02, 7.217e-02, 1.339e-01, 1.572e-02, -8.580e-03, -4.055e-02, 3.805e-02, -2.509e-02, -2.210e-02, -2.344e-02, 1.386e-01, 1.647e-02, 7.771e-03, -2.897e-02)); + r += mul(s5_6, M4(8.146e-04, -5.321e-02, 1.214e-02, 5.411e-03, 1.198e-01, 3.410e-01, -7.843e-01, 3.745e-01, 1.624e-02, 1.452e-01, 2.391e-01, -1.338e-01, -6.920e-02, 7.982e-02, 1.024e-01, -2.698e-02)); + r += mul(s5_7, M4(2.488e-02, -9.830e-02, -7.100e-03, -5.339e-02, -1.140e-01, -2.442e-01, -1.729e-01, -8.192e-02, -4.503e-02, 2.773e-02, 1.752e-01, 9.685e-02, 9.042e-02, 9.545e-02, 1.029e-01, 1.363e-01)); + r += mul(s5_8, M4(-8.806e-03, 1.473e-01, 1.098e-01, 2.410e-02, 2.657e-02, -1.344e-01, -1.432e-01, -7.008e-02, 8.632e-02, 5.413e-02, -5.891e-02, -1.166e-01, 1.192e-01, -1.548e-02, -8.526e-02, 7.829e-02)); + r += mul(s6_0, M4(2.450e-01, 7.761e-02, 3.561e-01, -1.446e-01, 4.047e-01, 1.167e-01, 1.745e-01, 2.382e-03, 1.416e-01, -7.666e-02, -1.050e-01, -6.977e-02, 7.872e-02, 2.556e-02, -5.153e-03, 9.230e-02)); + r += mul(s6_1, M4(3.802e-01, 1.542e-02, 4.255e-01, -9.081e-02, 2.666e-01, 2.526e-01, 8.105e-02, 1.429e-01, -4.946e-02, 2.067e-02, 4.394e-02, -1.590e-01, 6.722e-02, 5.235e-02, -4.439e-02, -8.620e-02)); + r += mul(s6_2, M4(8.481e-02, -2.127e-01, 2.365e-01, 4.175e-02, 7.039e-02, -7.309e-02, -3.229e-01, -1.621e-01, 1.070e-01, -1.676e-01, -4.287e-02, 3.196e-03, -5.899e-03, 5.405e-02, 4.468e-03, -4.450e-02)); + r += mul(s6_3, M4(-1.412e-01, 3.433e-01, -2.101e-01, -3.041e-01, -2.616e-01, -1.542e-01, -9.384e-02, -2.081e-01, 1.167e-01, 8.468e-03, -2.153e-01, -1.362e-01, -4.372e-02, 2.660e-02, 3.163e-02, -5.996e-02)); + r += mul(s6_4, M4(-2.437e-01, 3.818e-01, -1.644e-01, 1.349e-01, -8.518e-02, 2.374e-01, 6.275e-02, -1.298e-01, -1.531e-02, 6.807e-02, -3.120e-01, 7.608e-02, 2.047e-02, 1.276e-02, -1.387e-01, 7.087e-02)); + r += mul(s6_5, M4(-1.766e-01, 4.749e-03, -1.868e-01, -1.667e-01, 2.142e-01, -1.492e-01, -1.395e-01, -1.233e-01, -8.077e-02, -8.612e-02, -1.417e-02, -6.332e-02, -6.332e-02, 5.724e-03, 5.660e-02, -1.328e-02)); + r += mul(s6_6, M4(1.225e-01, 2.327e-01, 1.340e-01, -2.051e-01, 1.648e-02, -2.087e-01, -3.521e-01, -8.182e-02, 9.954e-02, -8.236e-02, 1.116e-01, -2.865e-02, -1.693e-03, -6.582e-02, -1.012e-02, -2.251e-02)); + r += mul(s6_7, M4(1.455e-01, 1.452e-01, -1.597e-01, 6.380e-02, -1.356e-01, 9.840e-02, 1.455e-01, -3.021e-01, -5.766e-02, 1.418e-02, -2.394e-01, 1.894e-02, 6.877e-02, 1.787e-02, -2.350e-02, 9.355e-02)); + r += mul(s6_8, M4(9.602e-02, -1.250e-01, 1.102e-01, -1.857e-01, 8.558e-02, 1.271e-01, 6.047e-02, -5.316e-02, 6.796e-03, 7.151e-02, -4.392e-02, -4.725e-02, -9.142e-02, -1.618e-01, 5.364e-02, -5.614e-02)); + r += mul(s7_0, M4(1.304e-01, 1.012e-01, -1.233e-02, -1.521e-01, 3.336e-02, 1.118e-01, -4.931e-02, -1.025e-02, 8.478e-02, -2.517e-02, 3.591e-02, 4.479e-02, 3.422e-02, -2.221e-02, -7.071e-02, 4.681e-02)); + r += mul(s7_1, M4(1.072e-01, -4.905e-02, -7.925e-02, -2.376e-02, -2.374e-02, -5.839e-02, -4.969e-02, 3.719e-02, -8.523e-02, -4.878e-02, -1.247e-01, -5.047e-02, -1.749e-01, -3.122e-01, 5.099e-02, 1.078e-01)); + r += mul(s7_2, M4(-8.340e-02, -2.672e-02, -1.344e-01, 7.982e-02, 1.739e-02, -8.374e-02, -1.019e-01, -7.983e-02, 8.299e-02, -6.768e-02, 9.196e-02, -1.012e-02, -5.912e-01, 2.317e-01, 2.071e-01, -7.993e-02)); + r += mul(s7_3, M4(-1.920e-02, 8.939e-02, 4.551e-02, -6.470e-02, -5.342e-02, -8.767e-04, 5.145e-02, -1.230e-01, -8.962e-02, 7.062e-02, 2.208e-01, 1.125e-01, -3.309e-02, 9.531e-02, -2.021e-01, -2.237e-01)); + r += mul(s7_4, M4(-7.611e-02, -6.968e-02, 2.872e-02, 5.603e-02, -1.452e-02, -8.395e-02, 1.104e-01, -8.210e-02, 1.484e-01, 5.219e-02, -1.372e-01, -6.380e-02, 2.342e-01, -5.879e-01, 6.368e-03, 2.740e-01)); + r += mul(s7_5, M4(-7.619e-02, -6.900e-04, -1.678e-01, 4.266e-02, 7.434e-02, -1.416e-01, -1.516e-01, -1.948e-01, -8.917e-02, 5.449e-02, -8.011e-02, 1.085e-02, -5.211e-03, 2.239e-01, -7.341e-02, 2.314e-01)); + r += mul(s7_6, M4(2.660e-03, 3.388e-02, 7.300e-02, -1.387e-01, -5.790e-02, 8.450e-02, 1.907e-01, -1.624e-01, -3.738e-02, 2.133e-02, -1.700e-02, 7.501e-03, -1.577e-02, -4.350e-02, 2.273e-01, 2.076e-02)); + r += mul(s7_7, M4(3.879e-02, -4.186e-03, 7.965e-02, 5.320e-02, -4.165e-02, -4.829e-02, -1.182e-02, -2.168e-01, -4.293e-02, -9.301e-02, 3.442e-02, -6.015e-02, 1.039e-01, -8.428e-03, 2.337e-02, -7.242e-02)); + r += mul(s7_8, M4(1.261e-02, -4.033e-02, -1.000e-01, -3.256e-02, 1.559e-02, 6.993e-02, 4.303e-02, -8.646e-02, -7.752e-04, -8.626e-03, 7.203e-03, -1.830e-02, -1.236e-02, -4.381e-03, 1.953e-01, -9.189e-02)); + r += V4(8.737e-03, -8.144e-02, 2.217e-03, 4.256e-03); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.402e-02, -1.835e-01, 6.598e-02, -1.548e-01, 5.878e-02, -5.101e-03, -2.593e-03, -7.877e-02, 4.725e-02, -9.164e-03, -1.804e-02, -3.882e-02, -3.345e-02, -7.255e-02, -3.130e-02, -5.227e-02)); + r += mul(s0_1, M4(-2.789e-02, 7.644e-02, 9.874e-02, -9.174e-02, -1.019e-01, 8.883e-02, -1.955e-02, -1.056e-01, -2.237e-02, -1.551e-01, 9.123e-02, 1.360e-04, 1.684e-01, -2.681e-03, 7.962e-03, -3.443e-02)); + r += mul(s0_2, M4(-1.995e-01, -6.410e-02, 1.272e-02, 6.584e-02, 2.568e-02, 4.247e-02, 2.778e-03, -4.476e-03, 7.411e-02, -4.275e-02, 1.673e-03, -1.613e-01, -2.389e-01, 1.794e-02, -4.776e-02, 1.287e-01)); + r += mul(s0_3, M4(2.962e-01, 2.757e-01, 1.657e-02, -1.746e-01, 3.828e-02, -1.434e-01, 4.698e-02, 3.134e-02, 1.626e-01, 1.804e-01, 2.535e-01, 9.081e-02, 6.562e-02, -5.491e-01, 2.183e-03, 2.761e-01)); + r += mul(s0_4, M4(-1.751e-01, 1.097e-01, -1.143e-01, 9.638e-02, -1.796e-01, -2.485e-01, -9.071e-03, 2.298e-02, -1.646e-01, -5.756e-02, 5.953e-02, 4.503e-02, 2.645e-02, 2.751e-02, 7.197e-02, 4.666e-02)); + r += mul(s0_5, M4(5.779e-02, 2.535e-01, -9.481e-02, -3.661e-02, 3.967e-02, 8.468e-02, 8.229e-03, 2.280e-02, 6.538e-02, 1.966e-02, -2.516e-02, 1.073e-01, 4.655e-02, 1.398e-01, -1.964e-02, 5.247e-02)); + r += mul(s0_6, M4(-7.597e-02, -4.377e-01, 8.354e-02, -1.889e-02, 1.302e-01, -9.488e-02, -5.391e-03, 2.192e-01, 1.187e-01, -5.301e-01, 4.636e-02, 6.813e-02, -3.251e-01, 1.225e-02, -6.616e-02, -8.459e-02)); + r += mul(s0_7, M4(-1.257e-01, 1.175e-02, 1.530e-01, 2.926e-02, -2.093e-02, 1.052e-01, 4.853e-03, -8.562e-02, 4.867e-02, -8.620e-02, -3.634e-03, 9.557e-02, 7.241e-02, -3.576e-02, -1.028e-01, -1.234e-01)); + r += mul(s0_8, M4(-1.866e-01, -3.209e-02, -2.943e-02, -5.855e-02, 2.794e-04, 3.564e-02, 4.627e-03, -3.217e-02, -1.304e-01, 5.295e-02, -1.062e-01, 6.101e-02, 5.828e-04, -1.247e-01, -5.507e-02, 3.192e-02)); + r += mul(s1_0, M4(1.164e-01, -7.474e-02, -2.857e-02, -6.668e-02, 2.265e-01, -5.164e-02, -1.004e-02, 6.276e-02, -3.215e-02, 5.380e-02, -3.521e-02, -4.491e-02, 3.978e-02, -5.505e-02, -2.772e-02, -3.175e-02)); + r += mul(s1_1, M4(8.632e-02, -2.542e-02, 4.180e-02, -1.574e-03, 1.349e-01, 5.947e-02, -1.205e-01, 1.823e-01, -4.069e-02, -1.011e-01, 2.783e-02, 5.925e-02, 8.958e-03, 5.379e-03, 2.129e-02, -1.198e-02)); + r += mul(s1_2, M4(8.469e-02, -6.346e-02, -3.136e-02, 3.843e-02, -1.799e-01, 1.197e-01, 6.340e-02, 2.738e-02, -2.734e-03, -2.145e-02, -1.682e-03, -3.851e-02, -1.415e-01, 4.641e-02, -3.239e-02, 8.861e-02)); + r += mul(s1_3, M4(3.989e-03, 1.631e-01, 3.869e-03, 1.441e-01, 1.246e-01, -1.019e-01, 1.252e-01, 4.696e-02, -1.789e-02, 2.595e-01, 1.141e-01, 4.445e-03, -9.038e-02, -7.196e-03, -2.213e-03, -1.800e-02)); + r += mul(s1_4, M4(-1.560e-02, -9.329e-02, 2.696e-02, -2.862e-02, -1.369e-01, -2.104e-01, 2.051e-01, 9.208e-02, 2.653e-02, 1.976e-02, -8.770e-04, -1.043e-01, -1.875e-02, -1.249e-01, 8.375e-02, 1.273e-02)); + r += mul(s1_5, M4(-1.079e-01, 2.550e-02, 1.182e-02, 5.653e-02, 4.882e-02, 1.534e-01, 3.794e-03, -4.484e-02, 8.980e-02, 6.711e-03, -2.761e-02, -2.076e-02, 7.742e-02, -9.804e-02, 1.470e-02, -2.654e-02)); + r += mul(s1_6, M4(-6.320e-02, 9.802e-02, -2.265e-02, 2.200e-02, -5.478e-02, -1.041e-01, -2.820e-02, 6.110e-03, -9.439e-02, -2.064e-02, -7.192e-03, -3.419e-02, 1.188e-01, 1.138e-01, -4.089e-03, 2.878e-02)); + r += mul(s1_7, M4(-1.218e-02, 3.202e-02, 2.396e-03, -8.416e-02, 1.513e-01, 2.373e-02, 3.390e-02, -8.421e-02, 4.312e-02, 1.243e-01, -3.633e-02, -1.061e-01, -6.732e-03, 3.347e-02, -4.659e-02, 7.279e-02)); + r += mul(s1_8, M4(5.111e-02, 4.784e-02, -1.719e-02, 3.597e-02, -1.890e-01, 6.043e-02, -7.685e-03, 4.533e-02, -9.896e-02, 1.394e-02, -1.045e-01, -8.497e-02, 1.063e-01, 5.015e-02, 5.828e-03, -6.269e-02)); + r += mul(s2_0, M4(2.285e-01, -3.687e-02, 6.187e-02, -1.497e-02, 9.003e-02, 3.358e-02, -2.267e-02, 1.189e-01, 3.308e-03, -3.413e-02, 1.361e-02, -5.823e-02, -1.229e-01, -1.184e-01, -3.262e-02, -7.547e-02)); + r += mul(s2_1, M4(-5.018e-02, 2.910e-02, -4.259e-03, 5.347e-02, 5.978e-02, -2.165e-02, -2.054e-03, -1.875e-02, -5.987e-02, 6.383e-02, 5.556e-02, 2.577e-02, 7.559e-02, 7.018e-02, 2.289e-02, 6.137e-03)); + r += mul(s2_2, M4(-6.007e-02, -4.349e-02, 1.945e-02, -1.882e-02, -3.415e-02, -5.427e-02, 2.093e-02, 8.056e-02, 1.078e-01, -5.950e-02, 1.008e-01, 2.744e-02, 2.153e-02, -1.352e-01, -5.578e-03, -1.774e-01)); + r += mul(s2_3, M4(2.468e-01, -9.416e-02, -1.857e-01, -8.229e-02, -4.687e-02, -3.897e-02, -3.143e-02, -2.024e-02, 2.018e-01, -1.233e-02, 1.028e-01, -8.787e-02, 5.332e-02, -6.006e-02, 5.621e-02, -5.948e-02)); + r += mul(s2_4, M4(-3.283e-01, -4.601e-03, 6.829e-05, -1.760e-02, 7.659e-02, 1.189e-02, 5.334e-02, 1.243e-02, -4.190e-02, -6.888e-02, -2.090e-02, 4.872e-02, -2.428e-01, 1.594e-01, -6.708e-02, -4.925e-02)); + r += mul(s2_5, M4(-5.396e-02, 4.187e-02, -4.452e-02, 9.492e-02, -1.380e-01, 1.535e-01, 2.170e-02, -1.384e-01, 4.016e-02, 2.773e-02, 1.567e-02, -8.716e-02, -9.485e-02, 2.299e-01, 8.372e-02, -1.730e-01)); + r += mul(s2_6, M4(2.509e-02, 1.046e-01, 1.253e-01, -2.533e-01, -8.064e-03, -1.683e-01, -3.098e-02, 2.254e-02, 1.242e-01, -5.642e-02, 9.277e-02, -4.424e-03, -5.620e-02, -2.193e-01, 9.624e-02, -8.891e-02)); + r += mul(s2_7, M4(-2.898e-01, 1.479e-01, -2.662e-02, -2.493e-01, -3.300e-02, 1.319e-02, -3.639e-02, 4.491e-02, -1.567e-02, 7.557e-02, -7.205e-04, -3.277e-02, 4.877e-02, 3.076e-01, 6.273e-02, 6.527e-02)); + r += mul(s2_8, M4(-1.365e-01, -3.416e-02, 5.075e-03, -7.978e-02, 4.832e-02, -5.712e-02, 7.845e-02, -5.435e-02, 4.310e-02, 2.942e-03, 8.831e-02, -1.021e-01, -1.602e-01, 8.820e-02, 3.409e-02, 3.347e-02)); + r += mul(s3_0, M4(9.747e-02, 2.207e-02, 1.317e-02, -9.682e-02, 2.017e-01, -6.119e-03, 1.220e-02, 2.629e-02, -1.393e-02, 5.451e-02, 8.014e-02, -3.469e-02, -3.029e-02, -7.257e-02, -2.248e-02, 4.954e-02)); + r += mul(s3_1, M4(1.043e-01, 9.222e-02, -1.606e-02, -1.053e-01, 1.731e-01, -5.331e-02, -7.956e-03, -1.294e-01, 1.511e-01, 4.706e-02, -3.527e-04, 2.963e-01, -3.024e-02, -2.760e-02, 6.659e-02, 3.214e-02)); + r += mul(s3_2, M4(4.838e-02, -9.306e-02, -6.023e-03, 4.483e-02, -3.687e-02, -9.697e-02, -3.225e-02, -1.746e-01, 1.721e-02, -4.435e-02, 1.228e-01, 3.103e-01, 5.914e-04, -3.446e-02, -1.994e-02, 5.386e-02)); + r += mul(s3_3, M4(-1.939e-01, -1.580e-01, -4.895e-02, 1.412e-03, -4.898e-03, -1.639e-01, -4.504e-02, -2.401e-02, 1.711e-01, -1.822e-01, 1.008e-01, 1.034e-02, 3.407e-02, -4.789e-02, -9.452e-03, 7.574e-02)); + r += mul(s3_4, M4(5.688e-02, -1.474e-01, 4.510e-02, -4.189e-02, 1.122e-01, -1.644e-01, 4.926e-02, -5.456e-02, -6.581e-02, -4.086e-01, 2.528e-01, -1.017e-01, 3.020e-02, -8.277e-02, -1.366e-02, 8.426e-02)); + r += mul(s3_5, M4(-7.230e-03, 5.720e-02, 3.569e-02, -3.742e-02, 1.180e-01, 1.606e-01, -2.458e-02, -1.471e-02, -1.738e-01, 7.709e-03, 2.068e-01, -1.951e-02, 4.671e-02, 7.615e-02, 1.091e-03, 7.760e-02)); + r += mul(s3_6, M4(2.175e-03, 1.722e-01, 2.253e-02, 2.303e-01, -9.743e-02, -1.569e-01, -1.029e-01, -3.046e-02, -5.847e-02, 1.876e-01, 9.724e-02, -1.385e-01, -3.451e-03, 2.051e-01, -4.468e-02, -3.382e-02)); + r += mul(s3_7, M4(-2.835e-02, -4.078e-03, -4.575e-02, 4.651e-02, 4.832e-02, -1.773e-01, -3.178e-03, -1.705e-01, 8.595e-02, -3.243e-01, 2.306e-01, -3.663e-01, 6.979e-02, 7.939e-02, -6.449e-02, -8.105e-02)); + r += mul(s3_8, M4(6.073e-03, -3.105e-04, 2.874e-03, 8.638e-02, -5.350e-02, 2.503e-01, -2.975e-02, 1.869e-01, 2.408e-01, -1.582e-02, 1.506e-01, -1.568e-01, -1.046e-01, -4.320e-03, 1.539e-02, -8.431e-02)); + r += mul(s4_0, M4(3.759e-02, 5.250e-02, 2.585e-02, -1.190e-01, -2.020e-01, -1.738e-01, -1.521e-01, -2.284e-01, 1.748e-01, -6.553e-03, 3.122e-02, 1.068e-01, -4.484e-02, 6.121e-02, 5.943e-03, -2.170e-02)); + r += mul(s4_1, M4(-6.344e-02, 1.005e-01, 1.640e-02, -1.009e-01, 2.528e-01, 1.125e-01, -6.701e-02, -5.406e-02, -1.221e-01, -1.250e-01, 1.085e-01, 1.870e-01, -8.933e-03, 5.957e-02, -1.302e-02, -1.364e-01)); + r += mul(s4_2, M4(1.354e-01, -4.092e-02, 2.189e-02, -2.721e-02, -2.853e-01, 1.884e-01, 3.689e-02, -4.418e-01, 1.393e-02, -2.359e-03, 8.880e-03, -7.302e-02, 5.377e-02, -9.811e-02, 9.482e-02, -7.366e-02)); + r += mul(s4_3, M4(-4.037e-02, -6.727e-02, -1.165e-02, 5.868e-02, 1.135e-01, 1.528e-02, -9.808e-02, 3.825e-02, -9.465e-03, -3.143e-02, -2.417e-01, -1.116e-02, -3.929e-02, -7.951e-02, 3.391e-02, -9.762e-02)); + r += mul(s4_4, M4(-2.092e-02, -2.204e-01, -1.407e-02, 1.174e-01, 1.338e-01, 2.886e-01, -1.430e-01, -2.413e-01, -3.739e-02, -1.132e-01, 1.138e-01, -2.281e-02, -9.746e-02, 5.206e-02, 1.268e-01, 1.185e-01)); + r += mul(s4_5, M4(-7.319e-02, 1.822e-02, -1.961e-02, -1.779e-03, 2.727e-01, 1.450e-01, -2.256e-02, -1.149e-01, 4.839e-02, 7.495e-02, 5.834e-03, 2.721e-02, 5.098e-03, -4.490e-02, 1.423e-01, 1.510e-02)); + r += mul(s4_6, M4(1.201e-01, -1.831e-01, -3.828e-02, 3.226e-02, -3.452e-01, -1.794e-01, -1.403e-02, -9.156e-02, 1.067e-01, -4.264e-02, -2.530e-02, 1.946e-02, 2.963e-02, -2.627e-01, -6.242e-02, -5.993e-02)); + r += mul(s4_7, M4(-1.202e-01, 1.040e-01, 2.414e-02, -6.253e-02, 8.447e-02, -2.435e-01, -2.410e-02, 9.560e-03, 2.747e-02, 5.268e-03, 2.616e-02, 1.051e-01, -6.211e-02, 7.567e-02, -8.283e-03, -1.307e-01)); + r += mul(s4_8, M4(3.009e-02, -6.296e-03, 1.735e-02, 5.828e-02, 1.022e-01, -9.718e-02, 2.726e-03, -1.039e-01, 1.170e-02, -4.387e-02, 5.666e-03, -3.432e-02, -9.777e-02, -4.244e-02, -1.039e-01, -1.724e-01)); + r += mul(s5_0, M4(-7.951e-02, -3.666e-02, 5.021e-02, 1.265e-01, -1.276e-01, 4.888e-02, -2.024e-02, 7.572e-02, -5.228e-02, 7.304e-02, 4.496e-02, 1.333e-01, -3.121e-02, 1.321e-01, -1.742e-02, 3.417e-02)); + r += mul(s5_1, M4(1.194e-01, -7.352e-02, 6.184e-02, 2.131e-01, -2.250e-02, -1.762e-01, 4.106e-03, 9.620e-02, -1.975e-01, -6.519e-02, 1.051e-01, 1.721e-01, 5.495e-02, 1.228e-01, -7.668e-02, -1.179e-01)); + r += mul(s5_2, M4(1.922e-01, -4.991e-02, -4.261e-02, 1.487e-01, 5.878e-02, -5.169e-02, -6.040e-02, -1.530e-02, 1.252e-02, -2.512e-02, -2.008e-02, 2.385e-02, 6.140e-03, 8.180e-04, 1.241e-02, 8.055e-02)); + r += mul(s5_3, M4(-1.951e-01, 1.221e-01, 4.860e-02, -1.653e-01, 6.190e-02, 2.393e-01, 6.098e-02, 1.480e-01, 1.565e-03, -1.047e-01, -2.786e-01, -3.447e-03, 7.533e-03, 1.296e-01, 1.720e-02, 6.717e-02)); + r += mul(s5_4, M4(9.892e-03, -2.393e-01, -6.465e-02, 7.031e-03, -8.029e-02, 1.763e-01, 2.851e-02, 1.690e-02, 1.832e-02, -1.088e-01, 6.066e-02, 1.175e-01, 1.053e-01, -1.132e-01, 8.121e-02, 1.441e-01)); + r += mul(s5_5, M4(-6.115e-02, 3.854e-02, 2.771e-02, -2.917e-01, 8.953e-02, 8.536e-02, -5.394e-02, 4.004e-02, -5.287e-02, 7.258e-02, 1.048e-02, 2.022e-02, 8.242e-02, -3.174e-02, -3.985e-02, 1.039e-01)); + r += mul(s5_6, M4(9.701e-02, -2.529e-01, 5.811e-02, 1.978e-01, -6.219e-02, 2.106e-01, 2.179e-02, -1.517e-01, -1.459e-01, 6.421e-02, 2.274e-02, 3.116e-03, 1.915e-02, -1.672e-01, -6.981e-02, 4.877e-02)); + r += mul(s5_7, M4(2.666e-01, -5.971e-02, 2.709e-02, 4.966e-02, 4.099e-03, 1.801e-02, -3.899e-02, -8.567e-02, 2.451e-02, -1.638e-02, 2.967e-02, 6.272e-02, -6.061e-02, 6.665e-02, 7.756e-03, 7.877e-02)); + r += mul(s5_8, M4(-1.622e-01, 3.626e-02, -2.753e-02, 2.666e-01, 5.491e-02, -7.876e-02, 4.837e-02, -6.127e-02, -4.035e-02, 5.379e-03, 1.362e-02, -5.027e-02, -5.938e-02, -1.966e-02, -4.458e-02, 1.219e-01)); + r += mul(s6_0, M4(-1.073e-01, -2.268e-02, -5.501e-03, -1.056e-01, 9.025e-02, 5.071e-02, 8.031e-02, 5.300e-02, -1.550e-02, -3.503e-02, -7.288e-03, -7.107e-02, -9.045e-03, 5.579e-02, -7.203e-04, -7.520e-02)); + r += mul(s6_1, M4(8.352e-02, -7.482e-03, 1.065e-02, -1.990e-02, -5.408e-02, -3.289e-02, -7.212e-03, 2.428e-01, -5.297e-02, -3.798e-02, -1.253e-02, -8.234e-02, 3.211e-02, -5.380e-02, -1.238e-01, -2.021e-02)); + r += mul(s6_2, M4(9.075e-02, -4.485e-02, -2.579e-02, 2.019e-02, -2.033e-02, 1.251e-02, -2.849e-03, -6.163e-02, -1.619e-02, 9.033e-02, -5.164e-02, -1.889e-01, -2.819e-02, 1.528e-01, -7.519e-02, 8.862e-02)); + r += mul(s6_3, M4(8.999e-03, 1.290e-02, 3.210e-02, -3.479e-02, -1.223e-01, 3.118e-02, -1.110e-01, -1.087e-01, -7.495e-02, 1.912e-02, -2.434e-02, -2.479e-02, 8.889e-02, -9.007e-02, -9.235e-02, -5.254e-02)); + r += mul(s6_4, M4(-1.233e-01, 8.849e-02, 8.545e-02, 4.886e-03, 9.337e-02, 1.546e-01, -4.189e-02, 1.109e-01, 4.722e-02, 2.499e-01, 3.091e-02, 6.654e-02, -1.390e-01, -9.918e-02, -2.202e-01, -2.963e-02)); + r += mul(s6_5, M4(8.837e-02, 5.132e-02, -9.932e-02, -2.470e-02, 1.230e-01, -1.539e-02, -1.717e-03, 9.658e-02, -9.416e-02, -1.711e-01, -2.158e-02, -4.066e-02, 1.597e-01, 2.003e-01, -1.581e-01, -4.412e-02)); + r += mul(s6_6, M4(7.939e-02, 1.110e-01, -1.445e-02, 1.051e-01, 1.901e-01, -1.137e-01, 6.565e-03, 2.951e-02, -3.434e-02, 2.136e-01, 4.927e-02, -1.229e-01, -4.579e-02, -7.279e-02, 3.103e-03, 9.711e-02)); + r += mul(s6_7, M4(3.305e-02, -6.877e-02, 4.758e-02, -9.783e-03, -1.096e-01, 1.412e-01, 4.420e-04, 6.503e-02, 8.466e-02, -6.394e-02, 8.932e-02, 5.979e-02, 1.053e-01, -7.737e-02, -6.675e-02, 1.117e-02)); + r += mul(s6_8, M4(-9.356e-02, 1.015e-01, -2.942e-03, -4.394e-02, 5.934e-02, 4.966e-03, -5.267e-02, -8.942e-02, 6.933e-02, 1.135e-02, -3.072e-02, 1.648e-01, -7.249e-03, -2.476e-02, 6.294e-02, 4.891e-02)); + r += mul(s7_0, M4(1.170e-01, -1.665e-02, -5.844e-02, 2.310e-01, -1.053e-01, -3.471e-02, 3.217e-02, 5.597e-03, -7.930e-03, -1.157e-01, -3.151e-02, 1.755e-02, -4.055e-01, 2.308e-01, 1.365e-01, -1.197e-02)); + r += mul(s7_1, M4(-1.190e-02, -2.708e-02, 2.930e-02, 1.566e-01, -8.527e-03, 2.480e-02, 3.832e-03, -7.143e-02, 1.495e-02, -3.769e-02, -6.756e-02, 2.383e-02, 2.074e-01, -7.796e-02, -2.250e-01, -1.741e-01)); + r += mul(s7_2, M4(-7.789e-02, -1.086e-02, -1.675e-02, 1.756e-01, 7.499e-02, 4.148e-02, -2.802e-02, 1.173e-02, 1.317e-01, 5.065e-02, -2.032e-02, 1.834e-01, 1.752e-01, 2.146e-01, -3.923e-02, -1.357e-01)); + r += mul(s7_3, M4(-6.958e-02, 9.580e-03, 8.851e-02, -7.930e-02, -1.558e-01, -3.778e-02, -6.097e-02, 2.811e-02, -2.334e-02, 1.998e-02, -1.205e-02, 1.894e-02, 5.476e-03, -1.280e-02, -1.022e-01, 1.428e-01)); + r += mul(s7_4, M4(6.006e-02, -1.223e-01, 1.589e-01, 6.598e-02, 1.343e-01, 1.586e-01, -6.632e-02, 5.271e-02, 9.392e-02, 1.218e-01, -5.771e-02, 9.682e-03, 5.840e-01, -3.489e-01, -3.396e-01, -1.786e-01)); + r += mul(s7_5, M4(-3.016e-02, -6.150e-02, -8.569e-02, -4.323e-02, -6.244e-02, -2.575e-02, -6.612e-02, 5.142e-02, -2.008e-01, 1.263e-02, 7.861e-03, 1.032e-01, 6.973e-01, 3.473e-04, -2.266e-01, -4.332e-02)); + r += mul(s7_6, M4(1.376e-01, 5.724e-02, 4.438e-02, -7.243e-02, -5.893e-02, -3.061e-02, 1.273e-02, -1.863e-02, 7.112e-03, 2.118e-02, -6.513e-03, 6.162e-02, 1.186e-01, 2.414e-01, 1.728e-01, -6.667e-02)); + r += mul(s7_7, M4(1.400e-01, 1.383e-02, -7.103e-02, -1.366e-01, -1.084e-01, 1.477e-01, -2.983e-02, -8.506e-02, -5.073e-02, -4.197e-02, 2.801e-02, 5.587e-02, 7.099e-01, -9.430e-02, -1.155e-01, 1.032e-01)); + r += mul(s7_8, M4(-4.641e-02, 1.204e-02, 4.037e-02, -1.185e-01, -8.472e-03, 4.617e-02, -4.495e-02, 8.632e-03, 3.586e-02, 9.629e-02, -2.544e-02, 6.781e-02, 4.206e-01, 4.385e-02, 1.276e-01, -1.737e-01)); + r += V4(-2.705e-02, 3.527e-02, 3.627e-01, -1.176e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.051e-01, -1.114e-01, -1.216e-01, 1.105e-01, 8.499e-02, -6.663e-02, 3.991e-03, 6.458e-02, -7.665e-02, -8.809e-02, 1.040e-01, -2.485e-02, -1.012e-01, -9.021e-02, 8.377e-02, 2.340e-01)); + r += mul(s0_1, M4(-1.931e-01, -7.857e-02, 1.292e-01, -1.716e-01, 6.330e-02, -2.973e-02, 1.371e-02, 5.827e-02, 8.144e-02, 9.241e-02, 2.017e-03, 3.057e-03, -1.029e-01, -4.298e-02, 1.090e-01, -3.017e-01)); + r += mul(s0_2, M4(-1.690e-02, 1.600e-01, -1.662e-01, 1.337e-01, -7.108e-02, 2.756e-02, 4.058e-02, -9.522e-02, 4.137e-03, -9.349e-03, -1.089e-01, -9.164e-02, -3.924e-02, -9.906e-03, -1.061e-01, -1.744e-01)); + r += mul(s0_3, M4(-3.095e-02, 2.730e-01, 1.041e-01, 3.858e-01, 2.290e-02, 9.362e-02, 3.441e-02, 6.260e-02, 1.465e-02, -7.561e-02, 5.372e-02, 3.513e-02, 1.353e-02, -1.222e-01, -3.884e-02, -1.430e-01)); + r += mul(s0_4, M4(-2.386e-01, 3.587e-02, -1.912e-01, -3.013e-01, -6.317e-02, -1.056e-01, -1.407e-01, -1.712e-01, -2.643e-02, 7.672e-03, -2.874e-02, 1.912e-01, 7.532e-02, -8.689e-02, 3.293e-02, 1.470e-01)); + r += mul(s0_5, M4(-1.332e-01, -1.546e-01, 9.643e-02, -2.599e-02, 8.934e-03, -1.228e-01, -4.493e-02, 2.042e-01, -8.228e-02, 1.075e-02, -5.997e-02, -6.339e-02, -8.336e-02, -3.334e-03, 3.381e-02, 3.368e-01)); + r += mul(s0_6, M4(5.030e-02, -4.626e-01, -1.869e-01, 4.965e-01, 9.817e-03, 5.342e-02, 5.986e-02, 5.861e-02, -1.183e-01, 2.203e-02, 5.520e-02, -1.429e-01, -5.020e-02, 2.073e-01, 7.864e-03, -2.852e-01)); + r += mul(s0_7, M4(2.176e-01, 2.152e-01, -1.788e-01, -5.080e-01, -1.122e-01, 8.691e-03, -9.614e-03, -3.106e-02, 4.894e-02, 6.166e-03, 7.439e-02, 1.690e-01, 1.803e-01, -9.130e-02, 2.321e-02, -1.665e-01)); + r += mul(s0_8, M4(9.277e-02, -9.217e-02, -2.009e-01, 2.199e-01, 5.131e-02, -6.504e-02, -6.013e-02, -5.409e-02, -2.226e-02, -2.786e-03, -1.661e-02, 4.217e-02, -1.431e-02, -3.201e-02, -6.179e-02, 8.740e-02)); + r += mul(s1_0, M4(5.791e-02, -1.342e-01, 1.181e-02, 1.169e-01, -2.401e-01, 1.451e-01, 5.932e-02, 1.637e-01, 3.013e-02, -4.731e-02, 2.029e-03, 1.286e-01, -5.358e-02, -1.128e-01, -1.991e-02, 1.099e-01)); + r += mul(s1_1, M4(6.223e-02, 6.101e-03, 1.201e-01, 8.313e-02, -8.887e-02, 1.035e-01, 2.002e-01, -1.245e-01, -3.890e-02, -7.083e-02, -1.125e-01, 2.253e-01, 7.171e-02, 2.084e-02, 1.537e-01, -6.827e-03)); + r += mul(s1_2, M4(2.259e-02, 1.401e-02, -8.702e-02, -5.172e-02, -8.238e-02, 3.951e-01, 4.718e-04, -2.622e-01, -7.200e-03, -3.022e-02, -1.731e-02, -1.051e-01, 6.473e-02, -5.861e-02, 6.686e-02, -1.402e-02)); + r += mul(s1_3, M4(1.818e-02, 1.564e-01, -5.097e-02, -6.576e-02, 7.355e-02, 1.180e-01, -7.400e-03, -5.576e-02, 1.021e-01, -5.665e-03, 5.615e-02, 2.363e-01, -2.641e-02, 1.016e-01, -1.045e-02, 4.678e-02)); + r += mul(s1_4, M4(2.484e-02, 1.577e-01, -1.466e-02, -1.573e-01, 2.610e-02, -3.650e-01, -1.553e-02, 1.881e-01, -3.113e-02, -1.647e-01, 9.964e-02, -7.754e-02, -6.019e-02, 8.320e-02, -8.445e-02, -1.022e-01)); + r += mul(s1_5, M4(-3.212e-02, -9.051e-02, 1.200e-02, 2.126e-01, 8.613e-02, -1.371e-01, 1.414e-01, 3.601e-03, 6.368e-02, -2.634e-02, 2.608e-02, -5.757e-02, -1.123e-02, -8.145e-02, -5.911e-02, 4.555e-02)); + r += mul(s1_6, M4(-2.184e-02, -4.911e-02, -3.778e-02, 1.812e-01, 5.428e-02, -1.398e-01, 2.522e-02, 7.816e-02, -9.922e-02, 3.210e-02, -4.167e-02, 9.021e-04, 6.767e-03, -6.228e-02, 6.149e-02, -7.836e-02)); + r += mul(s1_7, M4(-1.528e-03, -1.183e-01, 7.824e-03, 8.210e-02, -2.946e-02, 3.472e-01, 1.122e-01, -3.994e-04, 3.110e-02, 1.902e-02, -5.257e-02, 6.723e-02, -6.471e-02, -1.212e-02, -2.051e-02, -7.960e-02)); + r += mul(s1_8, M4(-4.947e-02, 1.218e-01, 1.486e-02, -1.141e-01, 5.986e-02, -9.915e-02, -3.834e-03, 2.886e-02, -2.646e-02, 3.957e-03, -3.546e-02, 1.315e-01, -3.214e-02, 2.432e-02, 2.367e-02, 6.176e-02)); + r += mul(s2_0, M4(5.123e-02, 5.989e-02, -6.815e-02, 9.743e-02, -3.462e-02, -8.530e-02, 6.288e-02, -2.515e-02, -6.732e-02, -1.728e-02, -4.539e-02, -2.716e-03, -6.797e-02, 8.885e-02, -9.367e-02, -9.246e-03)); + r += mul(s2_1, M4(6.126e-02, 1.034e-01, -1.436e-01, 1.068e-01, 6.247e-02, 9.696e-02, -2.424e-02, 1.688e-01, -3.992e-02, -9.006e-02, 7.081e-02, 6.454e-02, -1.187e-01, -5.075e-02, 7.586e-02, -5.934e-02)); + r += mul(s2_2, M4(-7.571e-02, 1.086e-01, -1.870e-01, -4.357e-02, -3.151e-02, 4.510e-03, 1.129e-02, 6.293e-02, 4.355e-02, 1.003e-02, -2.586e-02, 1.030e-02, -3.310e-02, 1.197e-01, 1.317e-02, -1.287e-01)); + r += mul(s2_3, M4(-1.821e-01, -5.221e-02, -6.573e-02, -1.790e-02, -3.118e-02, 5.614e-02, 5.421e-02, 1.202e-02, -6.092e-02, 2.164e-02, 2.650e-02, -1.051e-01, 2.480e-02, -1.092e-01, 2.620e-01, -1.028e-01)); + r += mul(s2_4, M4(-1.148e-01, -3.157e-01, -2.715e-01, 1.825e-01, -4.377e-02, -3.740e-02, -1.248e-01, -2.072e-04, 1.325e-02, 2.442e-02, -6.368e-02, 5.754e-02, -1.426e-01, 2.100e-02, 1.788e-02, -2.557e-01)); + r += mul(s2_5, M4(1.143e-02, 4.976e-03, -5.215e-04, 2.190e-01, 5.087e-02, -1.155e-02, 1.746e-01, 1.392e-01, -1.745e-02, -4.258e-02, 1.637e-01, -3.673e-02, -1.797e-01, 3.567e-03, 1.886e-01, 3.290e-01)); + r += mul(s2_6, M4(8.078e-02, -5.942e-02, -6.795e-02, 3.094e-01, -6.489e-02, -6.927e-02, -2.532e-02, -4.067e-02, -1.402e-02, -6.488e-03, 7.648e-02, 9.025e-02, 1.130e-01, -1.261e-01, -8.126e-02, -9.004e-02)); + r += mul(s2_7, M4(3.884e-02, 2.023e-01, 6.625e-02, 6.108e-02, 1.205e-02, 9.322e-03, 6.163e-02, -8.168e-02, 5.798e-02, 1.973e-02, 8.854e-03, 9.005e-02, 1.195e-01, -1.639e-01, -1.471e-01, 6.780e-02)); + r += mul(s2_8, M4(-3.076e-01, 2.985e-01, -1.318e-01, 6.138e-02, 1.681e-02, -3.053e-02, 8.927e-02, 3.131e-02, 4.618e-02, 7.467e-02, -8.657e-02, -1.095e-01, 1.209e-01, -2.500e-01, -3.690e-01, 1.925e-02)); + r += mul(s3_0, M4(-3.576e-02, -5.877e-02, -4.620e-02, 1.437e-02, 7.191e-02, -1.027e-01, -1.292e-01, 2.874e-03, -2.148e-02, -2.312e-01, 1.928e-01, 1.189e-01, 4.168e-03, 9.991e-02, 2.619e-02, -9.109e-02)); + r += mul(s3_1, M4(8.459e-02, -5.825e-02, 1.192e-01, 9.633e-04, 1.364e-01, 1.481e-01, -2.684e-01, -2.158e-02, 2.941e-02, 5.594e-01, 2.594e-02, -9.532e-02, -2.989e-02, 8.519e-02, 9.496e-02, 2.129e-01)); + r += mul(s3_2, M4(-8.653e-02, 1.469e-02, 3.830e-02, -3.940e-02, 1.102e-03, -1.709e-01, -1.039e-01, -2.331e-01, -1.292e-01, 3.492e-01, -1.175e-01, -3.262e-02, 2.708e-02, 6.536e-02, 1.758e-02, -1.578e-01)); + r += mul(s3_3, M4(1.008e-01, 5.336e-03, 1.987e-02, 2.761e-02, 1.080e-01, -2.102e-01, 3.839e-02, 1.827e-01, -1.258e-01, -1.818e-01, 2.859e-02, -5.134e-02, 7.261e-02, -9.374e-02, 1.013e-01, 9.994e-03)); + r += mul(s3_4, M4(-7.610e-02, 8.429e-02, -5.786e-02, 1.165e-01, -9.639e-02, -1.452e-01, -1.060e-01, 4.033e-01, -4.765e-02, -3.415e-02, -3.791e-02, 2.257e-01, 5.831e-02, -3.548e-02, 1.256e-03, -3.516e-02)); + r += mul(s3_5, M4(1.950e-02, 1.418e-02, -6.498e-03, -6.196e-02, 1.770e-02, -5.317e-02, -1.264e-01, -4.619e-01, -1.301e-01, -1.639e-03, 1.875e-02, -2.892e-01, -6.607e-02, -6.523e-03, 5.238e-02, 1.304e-01)); + r += mul(s3_6, M4(-6.517e-02, -4.494e-02, 5.729e-02, -4.323e-03, -5.584e-02, -1.577e-02, 3.936e-02, 9.087e-02, 3.935e-02, -1.382e-01, -1.728e-02, 1.404e-01, -2.122e-02, -2.349e-02, -9.270e-02, -9.217e-02)); + r += mul(s3_7, M4(1.215e-01, -1.047e-02, 6.047e-03, -1.998e-01, -4.287e-02, 1.967e-02, -2.010e-01, 2.046e-01, -3.913e-02, -8.821e-02, -2.432e-01, 4.894e-02, 7.895e-02, 1.095e-01, 4.640e-02, -3.483e-02)); + r += mul(s3_8, M4(2.018e-03, -2.716e-03, -1.596e-02, -1.916e-01, -3.315e-02, -1.665e-01, -1.173e-01, -8.551e-02, -3.230e-02, 1.273e-01, -8.199e-02, -1.331e-01, -5.627e-02, 5.167e-02, -3.181e-02, -1.333e-01)); + r += mul(s4_0, M4(6.189e-02, 9.147e-03, -1.257e-02, 1.175e-01, 5.045e-02, -2.184e-01, 8.163e-02, -5.065e-02, -9.018e-02, -9.549e-02, 9.756e-02, 1.040e-01, -7.600e-02, -1.415e-01, 5.857e-02, 1.576e-01)); + r += mul(s4_1, M4(-3.889e-04, -3.811e-03, -7.045e-02, -1.290e-01, 1.675e-01, -1.672e-01, -7.413e-02, 1.036e-01, 1.580e-01, 1.675e-01, -2.016e-01, -9.586e-02, 7.723e-02, -1.377e-01, -6.372e-02, 3.977e-02)); + r += mul(s4_2, M4(-1.076e-02, 4.677e-02, -7.688e-03, -9.809e-02, 2.044e-01, 8.981e-02, -4.126e-02, 3.244e-01, -2.681e-02, 8.702e-02, 4.261e-02, 2.472e-02, -3.153e-02, -8.140e-02, -4.853e-02, 1.707e-01)); + r += mul(s4_3, M4(2.806e-02, -1.416e-01, -7.901e-02, 1.045e-02, 6.336e-03, -4.741e-01, -2.487e-01, 5.238e-02, 1.209e-02, 2.783e-01, 1.835e-01, -4.384e-02, -9.517e-02, -7.745e-02, -1.052e-01, -1.239e-01)); + r += mul(s4_4, M4(-9.428e-02, -1.833e-02, -2.601e-02, 7.356e-02, -1.486e-01, 2.948e-02, 2.502e-01, -2.314e-01, 1.104e-01, -1.343e-01, -2.981e-01, -9.674e-02, -2.600e-02, -1.356e-01, -5.424e-02, -2.577e-03)); + r += mul(s4_5, M4(8.080e-02, 3.808e-02, -5.635e-02, -1.385e-01, 1.128e-01, 2.107e-01, 4.262e-02, 5.565e-01, 1.292e-02, 5.516e-02, -5.578e-02, 1.006e-01, -1.165e-02, -1.283e-01, -1.195e-02, 3.710e-02)); + r += mul(s4_6, M4(-7.956e-02, -3.898e-02, -3.705e-02, -6.141e-02, 1.204e-01, -1.527e-01, 9.714e-02, -1.856e-01, -8.911e-02, 2.328e-02, -4.341e-02, 5.749e-02, -1.302e-01, -1.636e-01, -1.153e-01, 4.291e-02)); + r += mul(s4_7, M4(2.943e-02, 1.157e-01, 7.623e-04, -2.658e-02, -2.001e-01, -4.169e-02, 5.792e-02, 2.248e-01, 5.508e-03, 5.892e-03, -3.907e-02, 6.471e-02, -2.077e-03, -2.288e-01, -3.274e-02, -1.980e-01)); + r += mul(s4_8, M4(1.155e-02, -8.695e-03, 4.365e-02, 8.192e-02, 1.437e-03, 2.241e-01, 1.615e-01, -1.287e-03, -6.141e-02, -4.430e-02, 3.162e-02, 5.725e-02, -8.957e-02, -1.880e-02, -3.154e-02, 2.495e-01)); + r += mul(s5_0, M4(6.698e-03, 2.446e-01, 1.064e-01, -2.333e-01, 8.398e-02, 1.259e-01, 1.300e-01, -4.527e-03, -1.450e-01, -1.342e-01, 1.158e-01, -6.956e-02, -7.463e-02, -4.917e-02, 6.212e-02, 1.041e-01)); + r += mul(s5_1, M4(-5.109e-02, 1.202e-01, 1.301e-01, 2.447e-01, -2.661e-02, 1.459e-01, 4.081e-03, 9.255e-02, 6.270e-03, 9.833e-03, -1.827e-01, 1.978e-01, 1.042e-01, -6.184e-02, 1.129e-01, 2.724e-02)); + r += mul(s5_2, M4(-9.528e-02, 7.638e-02, -6.379e-02, -3.093e-01, 7.835e-03, 5.860e-02, -1.221e-02, -7.426e-02, -8.886e-03, -8.248e-02, -2.452e-02, 4.934e-02, 6.254e-02, -9.158e-02, -1.431e-01, -8.059e-02)); + r += mul(s5_3, M4(5.516e-02, 1.060e-01, 1.076e-01, -2.141e-01, -6.265e-02, -9.982e-02, -2.924e-02, 2.221e-02, -3.913e-02, 3.002e-01, 1.047e-01, -9.692e-02, -2.233e-02, 8.167e-02, -1.788e-02, 9.185e-02)); + r += mul(s5_4, M4(-1.033e-01, -1.085e-01, -1.545e-01, 2.828e-01, 3.280e-02, 6.244e-02, 4.462e-02, -4.086e-02, 2.783e-02, -7.351e-02, -1.294e-01, -1.053e-01, 1.550e-01, 1.063e-01, -8.768e-02, -3.760e-02)); + r += mul(s5_5, M4(-1.171e-01, -2.230e-01, -7.730e-02, -1.134e-01, -1.817e-02, -1.421e-01, -5.315e-02, -4.395e-04, -6.779e-02, 1.147e-02, -2.157e-02, 3.958e-02, 4.972e-02, -1.580e-02, 2.947e-02, -1.692e-01)); + r += mul(s5_6, M4(-9.404e-02, -7.301e-02, -3.278e-02, 1.819e-01, 9.315e-02, 1.353e-01, 4.351e-02, 8.531e-03, -4.843e-02, 9.107e-02, -4.694e-02, -1.637e-02, 4.969e-03, -1.248e-02, -1.670e-02, -2.470e-04)); + r += mul(s5_7, M4(6.462e-02, 4.478e-02, 8.927e-02, -1.264e-01, -5.183e-02, 1.347e-03, 2.785e-02, 4.317e-02, -7.223e-03, -1.428e-02, 7.643e-04, 1.053e-01, 2.188e-02, -1.182e-01, 1.179e-01, 6.074e-02)); + r += mul(s5_8, M4(1.897e-01, -8.327e-02, 7.416e-02, -1.695e-02, 4.508e-02, 1.182e-01, 6.454e-02, -1.191e-01, -1.309e-02, 4.376e-02, 1.515e-02, -3.969e-02, 5.553e-02, -2.351e-02, 9.684e-02, -1.221e-02)); + r += mul(s6_0, M4(-4.527e-02, -9.728e-02, 9.743e-03, 5.919e-02, -1.075e-01, -8.651e-02, 5.333e-02, 4.852e-03, 1.093e-02, -3.015e-02, -8.744e-02, -1.731e-02, -8.862e-02, 6.434e-03, 3.455e-02, -8.617e-03)); + r += mul(s6_1, M4(6.181e-02, -4.134e-02, -1.274e-01, -2.780e-02, 9.931e-02, 3.614e-02, 1.857e-01, -2.142e-01, 3.274e-02, -2.344e-01, -1.061e-01, 1.320e-01, -1.500e-01, -1.382e-01, -3.919e-03, 1.593e-03)); + r += mul(s6_2, M4(-1.193e-02, -5.966e-02, -4.917e-02, -1.712e-02, -1.225e-01, 6.671e-02, 8.546e-02, -1.695e-01, 4.812e-02, 1.271e-01, -1.225e-01, -7.102e-03, -2.274e-01, -4.425e-02, -1.270e-03, 6.183e-02)); + r += mul(s6_3, M4(1.253e-01, 1.469e-01, -5.953e-02, -1.821e-01, 1.218e-02, -6.910e-02, 8.267e-02, -2.724e-01, 6.948e-02, 6.374e-02, -1.253e-01, -1.573e-01, 3.327e-02, -2.731e-02, -5.794e-02, -8.764e-02)); + r += mul(s6_4, M4(-1.470e-01, -1.318e-02, 1.086e-01, 2.166e-02, 1.640e-01, -2.715e-02, 7.695e-02, -7.937e-02, 4.435e-02, 6.541e-02, 1.362e-01, 9.317e-02, -1.293e-01, 6.498e-02, 6.115e-02, -2.732e-02)); + r += mul(s6_5, M4(-1.062e-01, 7.520e-02, -1.024e-03, -2.449e-02, 1.120e-01, 3.211e-02, -7.203e-02, 3.927e-01, -1.297e-02, -6.928e-06, -7.026e-02, 1.230e-01, 1.202e-02, 2.639e-02, -6.227e-02, 5.431e-02)); + r += mul(s6_6, M4(-8.389e-02, 1.630e-02, 7.006e-02, 1.890e-02, -5.597e-03, -1.783e-01, -6.151e-03, -1.118e-01, -5.326e-02, 2.527e-01, 3.765e-02, -2.475e-02, -1.187e-01, 1.054e-01, 6.812e-02, -1.675e-04)); + r += mul(s6_7, M4(1.121e-01, 1.753e-03, 1.272e-02, -3.274e-02, -1.554e-02, 1.828e-02, 7.942e-02, -4.052e-02, 1.010e-01, -2.468e-02, 8.385e-02, 1.280e-01, -3.523e-02, -7.376e-02, 5.747e-02, 6.557e-02)); + r += mul(s6_8, M4(7.530e-02, 1.398e-03, -3.553e-02, 1.229e-01, 6.035e-02, -3.222e-02, 6.983e-03, 1.919e-01, 1.019e-01, -1.546e-03, -1.724e-02, 2.234e-01, 7.674e-02, 3.747e-02, -3.599e-02, -1.326e-02)); + r += mul(s7_0, M4(-1.416e-01, -2.534e-01, 6.959e-02, 4.909e-02, 5.168e-02, 1.637e-02, 4.515e-02, -4.846e-02, 5.483e-02, -3.688e-02, -3.717e-02, -6.822e-02, 7.247e-02, 3.603e-01, 1.765e-01, -2.696e-01)); + r += mul(s7_1, M4(-1.218e-02, -3.408e-01, 2.292e-02, -1.435e-01, 1.395e-01, 1.190e-02, 7.423e-03, -3.210e-02, 1.329e-02, -1.986e-01, -9.972e-03, -3.261e-02, 1.019e-01, -6.167e-01, -2.890e-02, -3.107e-01)); + r += mul(s7_2, M4(8.059e-02, -5.207e-02, -3.553e-02, -1.822e-01, -3.392e-02, 1.620e-02, -1.094e-01, -3.267e-02, -6.855e-02, 2.069e-02, 5.989e-02, -1.125e-01, -1.526e-01, -8.064e-02, -5.341e-02, 8.461e-02)); + r += mul(s7_3, M4(2.222e-01, -1.026e-01, 1.373e-02, 1.260e-01, -5.944e-02, -7.264e-02, 2.722e-02, -2.172e-02, 7.230e-02, -3.942e-03, -1.013e-01, 7.697e-02, -3.213e-01, -1.067e-02, 1.809e-01, 2.742e-01)); + r += mul(s7_4, M4(-5.250e-02, -3.701e-02, -1.064e-01, 1.576e-01, 4.320e-02, 8.763e-02, 4.999e-02, 2.818e-02, 1.200e-01, -1.260e-01, -2.616e-02, -6.935e-02, -9.278e-02, -4.988e-02, 4.001e-01, -3.712e-01)); + r += mul(s7_5, M4(-3.058e-02, 4.468e-02, -1.586e-01, 1.079e-01, 2.566e-02, -2.422e-02, 5.597e-02, -6.913e-02, 8.615e-02, -4.592e-02, -8.361e-02, -2.085e-01, -2.418e-01, -5.534e-02, 4.826e-02, -1.660e-01)); + r += mul(s7_6, M4(-4.661e-02, -2.368e-01, -7.790e-02, -1.367e-01, 1.023e-01, 2.490e-02, -4.375e-02, 2.494e-01, 2.857e-02, 3.380e-02, 9.195e-02, -4.993e-04, 8.457e-02, 1.930e-01, -1.219e-01, -2.545e-01)); + r += mul(s7_7, M4(2.656e-02, 8.901e-03, -9.662e-02, 6.076e-02, 1.533e-02, -9.063e-02, -1.670e-01, 5.721e-02, 2.601e-02, 8.227e-02, 1.087e-01, 7.810e-03, 4.441e-02, 4.057e-01, 4.137e-02, 1.901e-01)); + r += mul(s7_8, M4(-9.190e-02, -1.211e-02, -5.945e-02, -1.734e-03, -9.425e-02, 5.072e-02, -9.240e-02, 3.546e-02, -1.003e-01, -2.467e-02, -2.442e-02, 9.600e-02, 2.682e-01, -3.375e-02, 7.697e-02, 1.923e-01)); + r += V4(3.100e-02, -1.857e-02, -6.864e-02, 8.263e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(6.963e-02, -2.252e-02, -1.448e-01, 2.277e-02, -3.770e-02, -1.320e-02, -1.452e-01, -1.153e-01, -3.604e-02, -5.332e-02, -9.732e-02, -6.503e-02, 7.152e-02, -1.186e-01, 3.170e-01, -2.347e-01)); + r += mul(s0_1, M4(4.306e-02, -4.997e-04, -4.649e-01, -2.117e-01, -1.229e-01, 2.042e-02, -2.969e-02, -7.675e-02, 9.907e-02, -4.484e-02, 1.025e-02, 2.717e-02, -2.286e-01, -7.177e-02, 8.781e-02, -1.635e-01)); + r += mul(s0_2, M4(1.548e-01, -8.983e-02, -1.161e-01, -1.496e-01, -2.478e-02, -9.860e-02, 1.299e-01, 1.345e-01, 1.057e-03, -3.365e-02, 2.476e-02, 8.473e-03, -1.789e-01, 3.040e-02, 9.071e-02, -4.536e-02)); + r += mul(s0_3, M4(-2.717e-01, 2.572e-03, 2.506e-01, -9.201e-02, 1.012e-02, 6.912e-02, 1.405e-01, 8.530e-02, -7.757e-02, -6.426e-02, 2.328e-01, 5.584e-02, 3.363e-01, 5.617e-02, 1.332e-02, 1.869e-02)); + r += mul(s0_4, M4(-2.072e-02, -1.647e-01, 6.103e-02, -7.830e-02, 3.174e-01, -1.248e-01, -5.962e-02, -9.131e-02, 9.164e-02, 2.699e-03, -1.759e-01, -8.091e-03, 7.812e-02, -1.084e-01, 1.606e-01, 4.792e-02)); + r += mul(s0_5, M4(-1.059e-02, -5.786e-02, -4.665e-03, -2.343e-01, -8.026e-03, 5.300e-02, 2.951e-02, -5.558e-02, -5.785e-02, 9.929e-02, 2.625e-02, 3.589e-02, -4.308e-02, -8.178e-02, 2.463e-01, 2.174e-01)); + r += mul(s0_6, M4(-2.118e-01, -1.159e-01, 1.469e-01, 1.273e-01, 3.896e-02, 2.342e-02, -6.598e-02, -7.138e-02, 8.046e-02, -9.429e-02, -2.509e-01, 5.849e-02, -1.988e-01, -6.292e-02, 2.115e-01, -2.360e-01)); + r += mul(s0_7, M4(9.683e-02, -6.480e-02, 1.769e-01, -8.659e-02, -2.455e-02, 5.751e-02, 1.021e-01, 7.640e-02, -1.255e-01, -1.125e-01, -1.075e-01, -1.787e-01, 4.571e-03, -1.284e-01, -7.242e-02, 5.501e-02)); + r += mul(s0_8, M4(2.078e-02, -8.726e-04, -4.439e-02, 1.042e-01, 2.962e-02, -7.772e-02, -1.045e-01, -7.690e-03, 9.935e-02, 2.160e-02, 9.949e-02, 4.120e-02, -9.711e-02, -6.273e-02, 1.308e-01, 1.546e-02)); + r += mul(s1_0, M4(6.029e-02, 4.278e-02, 5.769e-02, 1.442e-02, -1.756e-01, 4.015e-02, -7.010e-02, 1.137e-01, -2.495e-03, -6.332e-02, 1.721e-02, 1.212e-02, 6.808e-02, -8.886e-03, -2.085e-01, -4.819e-02)); + r += mul(s1_1, M4(6.642e-02, -4.447e-02, -2.059e-02, -7.015e-02, -7.824e-02, -1.525e-02, -9.670e-02, -2.820e-02, -8.941e-02, -3.547e-02, -1.716e-02, -3.731e-02, 3.170e-02, 4.354e-02, -6.386e-02, 9.780e-02)); + r += mul(s1_2, M4(-3.524e-02, 4.504e-02, 4.981e-02, 8.960e-02, -8.128e-02, -1.493e-01, 1.161e-01, 1.018e-01, -1.724e-03, 2.717e-02, -1.312e-01, -1.677e-02, 2.003e-02, 9.196e-03, 1.623e-01, 7.206e-02)); + r += mul(s1_3, M4(-3.745e-02, 2.151e-02, -6.851e-02, -6.277e-02, 3.111e-02, 4.912e-02, 2.569e-01, 5.158e-02, 6.237e-02, -8.986e-02, 5.848e-02, -6.014e-02, 1.487e-02, 4.474e-02, -2.111e-02, 3.169e-02)); + r += mul(s1_4, M4(8.277e-02, -2.327e-02, -1.339e-01, 6.819e-03, 6.774e-02, -1.567e-01, -3.982e-02, 1.687e-01, 5.872e-02, 1.736e-01, -1.741e-02, 1.435e-01, -1.060e-01, -5.864e-02, 1.010e-01, 3.586e-02)); + r += mul(s1_5, M4(-2.038e-01, -6.883e-03, -3.415e-03, -4.187e-02, -1.266e-01, 2.903e-01, -7.235e-02, 1.061e-01, -3.151e-02, -7.008e-04, -7.758e-02, 8.958e-02, -3.681e-02, -2.896e-03, -1.110e-01, 5.328e-02)); + r += mul(s1_6, M4(-1.477e-02, -6.669e-02, 3.919e-02, -4.940e-02, -1.115e-02, 1.150e-01, -2.095e-01, 4.050e-02, 8.635e-02, 1.358e-01, 8.773e-02, -8.047e-02, -2.117e-02, -7.302e-04, 4.855e-02, 5.096e-02)); + r += mul(s1_7, M4(4.030e-02, 5.739e-02, 3.768e-02, -9.838e-02, 3.036e-02, 1.810e-01, 4.812e-02, 1.165e-02, -1.139e-01, 5.497e-02, 1.245e-01, 2.073e-02, -8.034e-03, 2.284e-02, 1.594e-02, 3.528e-02)); + r += mul(s1_8, M4(2.289e-02, -8.156e-02, 5.705e-02, 1.072e-01, 1.589e-03, 3.123e-03, 2.118e-01, 1.151e-02, -2.937e-02, 9.989e-02, -5.238e-02, 1.003e-01, 4.185e-02, -8.527e-02, -6.298e-03, -6.371e-02)); + r += mul(s2_0, M4(1.107e-01, 2.752e-02, 1.281e-01, 1.714e-01, 1.164e-01, -7.284e-02, -1.190e-01, 8.111e-02, -8.035e-03, -6.762e-02, 1.776e-02, -1.030e-01, 2.693e-02, 5.256e-02, 1.354e-01, -2.064e-02)); + r += mul(s2_1, M4(-4.988e-03, -6.097e-02, -4.041e-02, -3.558e-02, -1.518e-01, 1.663e-03, -2.821e-02, -1.361e-02, 2.478e-02, 8.943e-02, -1.308e-01, -6.894e-02, -1.212e-01, 1.641e-02, 2.404e-01, 1.694e-01)); + r += mul(s2_2, M4(-7.777e-03, 5.803e-02, 2.689e-01, 1.791e-01, 6.316e-04, 4.763e-02, -9.638e-02, -3.617e-02, 5.087e-02, 4.604e-02, -7.884e-02, 3.332e-02, 6.054e-02, -1.208e-01, -3.011e-01, -1.355e-01)); + r += mul(s2_3, M4(-4.400e-02, 2.170e-01, 1.681e-01, 1.320e-01, -3.089e-02, -8.828e-02, 5.103e-02, -1.408e-01, -2.118e-01, 3.802e-02, 5.672e-02, -5.139e-03, -7.158e-02, 1.651e-01, -3.334e-01, 5.201e-02)); + r += mul(s2_4, M4(-7.727e-02, -9.328e-02, -1.671e-01, -6.937e-02, -1.072e-01, 8.187e-02, -6.199e-02, 8.513e-02, 3.583e-02, -4.073e-02, 2.146e-01, 7.136e-02, 2.726e-01, 1.700e-01, -1.868e-01, -1.756e-02)); + r += mul(s2_5, M4(-4.307e-02, -2.301e-02, 1.690e-01, -2.337e-01, -9.058e-02, -1.756e-02, 8.350e-03, -4.909e-02, -9.059e-02, -6.068e-02, 1.240e-02, -1.104e-01, -1.323e-01, 4.555e-02, 6.956e-02, -5.998e-02)); + r += mul(s2_6, M4(-1.516e-01, 1.953e-01, -7.080e-02, 9.881e-03, -9.600e-02, 8.595e-02, 1.051e-01, 3.365e-02, -9.591e-03, 8.353e-02, -1.220e-01, 9.008e-04, -1.879e-01, 3.896e-02, -6.064e-02, -1.157e-01)); + r += mul(s2_7, M4(-9.644e-02, -1.368e-01, 2.848e-02, 7.543e-02, -8.532e-02, -1.060e-01, -1.384e-02, -1.303e-01, 8.781e-03, -7.783e-02, -5.889e-02, 9.188e-02, -5.361e-02, -6.329e-02, -7.719e-02, 1.732e-01)); + r += mul(s2_8, M4(-4.416e-02, -1.316e-01, -1.092e-01, -6.056e-02, 2.408e-02, -7.075e-02, -1.626e-01, -4.830e-02, 4.568e-02, -8.975e-03, -3.471e-02, 4.751e-03, -9.605e-03, 1.540e-01, -2.195e-01, -1.289e-02)); + r += mul(s3_0, M4(6.641e-02, -6.722e-02, 4.204e-02, 2.671e-02, 1.593e-01, -8.964e-02, -1.204e-01, 1.247e-01, -2.959e-01, 2.200e-02, 2.822e-02, -1.165e-01, -1.091e-03, 5.943e-03, -1.847e-03, 2.274e-02)); + r += mul(s3_1, M4(-7.450e-02, -2.852e-02, 2.931e-02, -3.538e-02, -4.187e-02, 8.734e-03, 2.365e-01, -5.349e-02, 8.154e-03, -9.164e-02, -8.136e-02, -6.776e-02, -1.918e-01, -4.653e-02, 6.557e-02, 5.742e-02)); + r += mul(s3_2, M4(-4.312e-02, -4.438e-03, -1.442e-01, -2.492e-02, 2.585e-01, 3.696e-02, 1.406e-01, 1.574e-01, 8.978e-02, 9.296e-02, -6.429e-02, 3.938e-02, 1.450e-01, 3.289e-02, -8.818e-02, -4.964e-02)); + r += mul(s3_3, M4(5.244e-02, -1.366e-03, 3.703e-02, 2.949e-02, -9.850e-02, 1.177e-01, 4.466e-02, 1.044e-01, -8.709e-02, 9.429e-02, 1.804e-01, -1.564e-01, 1.509e-01, -7.775e-04, 2.783e-03, 1.885e-02)); + r += mul(s3_4, M4(2.353e-02, 8.922e-03, 2.605e-02, -1.310e-01, -5.740e-03, 9.074e-02, -1.173e-01, -5.668e-02, 1.622e-01, -4.949e-02, 1.809e-01, 4.451e-01, 4.553e-02, 6.796e-02, -1.264e-01, 8.167e-02)); + r += mul(s3_5, M4(-1.271e-02, 1.309e-02, 5.765e-02, -1.710e-01, 1.858e-01, 2.290e-01, -2.852e-01, -1.578e-01, 3.224e-02, -1.279e-01, 3.270e-03, -1.556e-01, 3.685e-03, 5.137e-02, 8.328e-02, 7.098e-02)); + r += mul(s3_6, M4(-2.447e-02, -4.218e-02, -1.423e-01, -8.276e-02, 2.283e-01, 1.799e-01, -2.549e-01, 9.259e-02, 2.579e-01, 4.986e-02, -5.594e-02, 3.486e-02, -2.440e-02, 7.812e-02, 8.794e-02, 1.029e-02)); + r += mul(s3_7, M4(5.380e-02, 1.180e-01, 1.594e-01, 7.409e-02, 7.728e-02, 3.271e-02, -6.874e-02, 2.154e-01, -3.518e-02, -1.103e-01, 9.688e-02, 9.100e-02, -6.668e-02, -7.959e-02, 1.811e-02, -1.359e-01)); + r += mul(s3_8, M4(9.416e-02, 3.417e-03, -5.169e-02, 1.764e-01, 2.548e-01, 1.715e-01, 2.134e-01, 2.238e-01, 1.845e-01, 1.622e-02, 4.207e-01, -3.196e-02, -9.406e-02, -2.390e-01, 2.510e-01, -2.296e-02)); + r += mul(s4_0, M4(1.006e-02, -1.331e-02, -7.423e-02, 1.326e-01, 1.829e-01, 2.875e-02, 2.266e-01, -2.989e-01, -5.792e-02, 3.672e-03, -2.179e-02, -1.589e-01, 7.813e-02, -1.747e-02, 8.522e-02, -5.229e-02)); + r += mul(s4_1, M4(-9.481e-02, 1.766e-02, -1.016e-01, 1.965e-02, 1.501e-01, -2.166e-01, 4.671e-01, 2.102e-01, 5.749e-02, -8.126e-02, 8.996e-02, 4.967e-02, -4.917e-02, -1.199e-01, -1.097e-01, 7.642e-02)); + r += mul(s4_2, M4(1.216e-01, -8.073e-03, 9.726e-03, 2.027e-02, -5.802e-02, 8.700e-02, 1.012e-01, 4.803e-02, 8.010e-02, -1.332e-02, 1.150e-01, 5.811e-04, -1.260e-01, 5.098e-02, 1.144e-02, 1.190e-01)); + r += mul(s4_3, M4(-3.559e-02, -1.380e-02, 4.833e-02, 1.924e-02, 2.151e-01, 3.367e-03, 2.616e-02, -1.804e-02, -1.903e-01, 2.271e-01, 7.762e-02, 2.965e-02, 3.447e-02, 5.939e-02, -1.552e-03, -5.304e-02)); + r += mul(s4_4, M4(5.806e-02, 6.617e-02, 6.822e-02, -3.996e-02, 1.884e-01, -1.212e-01, 4.396e-02, 1.790e-01, 9.638e-02, 3.977e-01, 4.136e-02, -2.153e-01, 1.812e-01, -1.566e-01, 1.537e-01, -1.567e-01)); + r += mul(s4_5, M4(-6.260e-02, -5.870e-02, 2.292e-02, -4.240e-02, 1.163e-01, -3.122e-02, -1.069e-01, 1.949e-01, -8.848e-02, 7.051e-02, 1.155e-01, 3.087e-03, -1.480e-01, -3.976e-02, 1.350e-01, -7.956e-02)); + r += mul(s4_6, M4(9.145e-02, -9.454e-02, -4.262e-02, 3.224e-02, 1.158e-01, 3.783e-02, -4.701e-02, -1.675e-01, 5.831e-02, -1.611e-01, -3.235e-02, -2.857e-02, -3.740e-02, -5.521e-02, 5.499e-03, 1.700e-01)); + r += mul(s4_7, M4(1.547e-01, -2.014e-02, 2.154e-02, -1.294e-01, -1.114e-02, 2.956e-02, 6.697e-02, -6.797e-02, 7.370e-02, -6.777e-02, 5.050e-02, -8.313e-02, -5.077e-02, -1.236e-01, 1.549e-01, 3.329e-02)); + r += mul(s4_8, M4(-7.738e-02, 7.239e-02, 6.842e-02, -2.829e-02, -9.568e-03, -8.991e-02, -8.371e-03, 1.752e-01, -1.749e-02, 8.862e-03, 5.179e-02, 1.371e-01, -8.463e-03, -2.560e-01, -6.020e-02, -9.990e-03)); + r += mul(s5_0, M4(3.632e-02, 8.914e-02, -8.888e-02, -1.491e-02, -1.101e-01, -3.298e-02, 1.195e-01, -1.208e-01, -1.501e-01, 1.288e-02, 1.467e-01, -1.018e-01, 1.483e-02, 5.684e-02, 1.103e-02, -7.856e-03)); + r += mul(s5_1, M4(-9.384e-02, 1.459e-01, -2.468e-01, 3.597e-01, 7.546e-02, -2.376e-02, 1.857e-01, 3.916e-02, 8.217e-02, -7.327e-02, -3.876e-02, 1.894e-01, 9.173e-03, -8.247e-02, -6.545e-03, 7.009e-02)); + r += mul(s5_2, M4(2.878e-01, -4.084e-02, 1.117e-01, 2.117e-01, 1.199e-01, 3.482e-02, 1.172e-01, -2.750e-02, 4.719e-02, 3.888e-02, -2.976e-02, -6.914e-02, -9.877e-02, -2.585e-02, 3.017e-02, -7.733e-03)); + r += mul(s5_3, M4(-1.588e-01, -3.354e-02, -3.294e-01, 6.194e-02, 8.878e-02, -8.921e-02, 2.573e-03, -4.779e-02, 4.467e-02, 2.385e-01, 7.285e-02, -7.890e-02, -1.010e-02, -3.659e-02, -7.703e-03, -2.427e-02)); + r += mul(s5_4, M4(-1.063e-01, -2.223e-01, 8.846e-02, -5.654e-02, -2.017e-01, 1.409e-02, -1.011e-01, 9.476e-02, 3.337e-02, 2.004e-01, -1.191e-01, -1.504e-01, 1.119e-01, 3.131e-02, -1.772e-01, -1.625e-01)); + r += mul(s5_5, M4(6.892e-02, 8.256e-02, -1.301e-01, -1.556e-01, 4.404e-02, 1.371e-01, -9.619e-03, 9.117e-02, -8.526e-02, 9.741e-02, -4.309e-02, 1.467e-02, -6.020e-02, -1.205e-01, 1.305e-03, 3.461e-02)); + r += mul(s5_6, M4(-9.202e-02, -8.236e-02, -5.605e-03, -1.625e-01, 3.579e-02, 7.673e-02, -8.021e-02, 9.903e-03, -4.991e-02, 2.040e-03, 1.293e-02, -7.739e-02, -4.150e-02, -3.881e-02, -6.908e-02, -8.282e-02)); + r += mul(s5_7, M4(-1.834e-01, 1.597e-02, 1.049e-01, -5.524e-03, -6.159e-02, -1.697e-02, 6.824e-02, -5.493e-02, 5.603e-02, -1.096e-01, 4.400e-02, 2.910e-02, 9.645e-02, -1.648e-02, 6.329e-02, 1.038e-01)); + r += mul(s5_8, M4(8.771e-02, 1.508e-01, 2.217e-01, 3.061e-02, -3.918e-02, 1.947e-02, 2.294e-02, 9.695e-02, 2.748e-03, -8.599e-02, 3.772e-02, 8.646e-02, 1.298e-01, 1.420e-02, 5.255e-02, -1.048e-01)); + r += mul(s6_0, M4(-4.407e-02, -2.682e-02, 4.526e-02, 2.739e-02, -1.935e-01, 7.011e-02, 9.171e-03, 4.940e-02, -1.467e-01, -3.275e-02, 6.104e-02, -6.878e-02, 1.220e-02, 9.451e-02, -1.911e-04, -5.313e-02)); + r += mul(s6_1, M4(-7.799e-02, 4.824e-02, -4.685e-02, 1.330e-01, 9.253e-02, 4.494e-02, -4.259e-02, -1.034e-02, 4.323e-02, 3.399e-02, -7.307e-02, -8.715e-02, 3.315e-02, 9.641e-02, -2.714e-02, 6.442e-02)); + r += mul(s6_2, M4(1.280e-01, -3.284e-02, -2.368e-02, -1.081e-01, -4.936e-02, 3.070e-02, -1.759e-02, -1.002e-01, -6.439e-02, -4.279e-02, 4.613e-02, -2.717e-01, -1.198e-01, 1.773e-02, -7.221e-03, -6.637e-02)); + r += mul(s6_3, M4(-7.689e-02, -3.825e-02, -7.242e-02, 8.280e-02, 4.194e-02, 1.121e-01, 2.622e-01, -1.725e-02, -1.986e-01, -1.177e-02, -5.650e-02, 8.646e-02, -4.196e-02, -2.977e-03, 2.090e-02, -6.451e-02)); + r += mul(s6_4, M4(1.851e-01, -1.181e-01, -2.515e-01, -9.251e-02, -6.227e-02, 6.769e-02, 2.017e-01, 1.323e-01, 5.400e-02, 6.620e-02, 1.503e-02, -7.147e-02, -1.018e-01, 4.994e-02, 8.060e-02, 8.856e-02)); + r += mul(s6_5, M4(5.242e-02, -8.571e-02, -1.166e-02, 2.723e-02, -2.491e-02, 9.272e-02, -7.715e-02, -1.098e-01, -8.076e-02, 6.484e-02, 5.065e-02, -1.996e-01, -6.703e-02, 7.041e-02, -1.129e-01, -4.052e-02)); + r += mul(s6_6, M4(1.693e-01, -5.234e-02, -2.291e-02, -1.243e-01, 6.377e-02, 7.930e-02, -4.347e-02, -3.595e-02, -1.106e-01, 6.135e-02, 4.739e-02, -1.549e-01, -3.269e-03, 6.855e-03, -1.141e-02, -5.771e-02)); + r += mul(s6_7, M4(-2.193e-02, 1.859e-01, 1.001e-01, 1.196e-01, 1.000e-01, -6.727e-02, -3.770e-02, 1.140e-01, 1.902e-01, -6.595e-03, -2.541e-01, -8.262e-02, 1.284e-01, -2.159e-01, 5.541e-03, -2.051e-02)); + r += mul(s6_8, M4(4.735e-02, 1.489e-01, 5.903e-02, 2.421e-02, -4.557e-02, -1.645e-02, 3.871e-02, 4.318e-02, 2.065e-01, 7.512e-03, -1.383e-01, -8.699e-02, 7.859e-03, -1.348e-01, 2.014e-02, -7.657e-02)); + r += mul(s7_0, M4(-3.504e-02, 4.054e-02, 1.591e-01, 1.507e-01, -6.738e-02, -6.840e-03, 1.195e-02, -1.019e-01, 2.478e-03, -3.514e-03, 2.032e-01, 9.057e-02, -1.931e-01, 1.626e-01, -1.275e-01, -2.999e-01)); + r += mul(s7_1, M4(-2.528e-01, 7.853e-02, 9.488e-02, 8.846e-02, 8.768e-02, 6.529e-02, -1.257e-02, 1.981e-02, 6.481e-02, -1.187e-02, 3.204e-02, 1.175e-02, -3.030e-01, -1.115e-01, -1.117e-01, -3.533e-01)); + r += mul(s7_2, M4(4.824e-02, 4.947e-02, 2.454e-01, -6.911e-02, 9.860e-02, 3.005e-02, 5.433e-04, -1.456e-03, 3.297e-02, 1.039e-01, 2.388e-01, 1.363e-01, 1.138e-01, 7.708e-02, -1.577e-01, -3.101e-01)); + r += mul(s7_3, M4(-5.749e-02, 4.330e-02, 2.857e-02, 1.032e-01, 1.035e-01, 1.604e-01, 1.927e-02, -1.796e-02, 3.267e-02, 3.284e-02, 5.265e-02, 1.932e-03, -3.115e-01, 5.623e-04, -7.705e-01, -2.119e-01)); + r += mul(s7_4, M4(2.663e-02, -1.189e-01, -2.258e-02, 2.241e-01, 5.599e-02, 8.080e-02, 2.917e-02, -1.648e-01, -1.288e-02, -8.442e-02, -1.462e-01, 1.041e-01, -7.283e-01, 1.270e-01, -7.735e-01, -1.935e-01)); + r += mul(s7_5, M4(-2.610e-01, -8.824e-03, 9.837e-02, -4.825e-02, 1.038e-01, 1.235e-02, -7.313e-02, -8.378e-03, -1.153e-01, 8.859e-02, 1.082e-01, 1.711e-02, -6.367e-01, -1.746e-01, -1.139e-01, -4.174e-01)); + r += mul(s7_6, M4(-4.065e-02, -1.208e-02, 2.632e-03, 6.209e-02, -4.002e-02, -3.493e-02, 4.478e-03, -3.306e-02, -6.063e-02, 1.362e-01, 4.370e-02, 2.847e-03, 2.528e-01, -1.677e-01, -8.459e-02, -3.713e-01)); + r += mul(s7_7, M4(-2.196e-01, 7.684e-02, -1.451e-01, -4.506e-02, -7.435e-02, -1.734e-01, 3.935e-02, 2.225e-02, -2.907e-02, -1.242e-01, -2.926e-02, -5.695e-02, 2.201e-01, 6.264e-02, -1.411e-01, -3.046e-01)); + r += mul(s7_8, M4(-1.804e-01, 9.045e-02, 9.856e-02, -9.440e-02, -2.864e-02, -5.568e-02, 1.081e-01, 6.273e-02, -9.816e-02, 5.960e-02, 4.801e-03, 2.349e-02, 3.701e-01, -1.769e-01, -1.709e-01, -5.965e-02)); + r += V4(2.457e-02, -1.213e-02, 5.035e-03, 4.694e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.209e-02, 4.511e-02, -1.259e-01, 1.801e-02, -1.489e-01, -7.744e-02, -4.519e-02, -1.305e-01, -1.101e-01, 9.679e-02, -8.833e-02, 1.070e-03, 1.064e-01, -1.194e-01, 3.602e-01, 6.349e-02)); + r += mul(s0_1, M4(-4.945e-02, -2.867e-01, 1.859e-01, 5.646e-02, -8.603e-02, 8.764e-02, 4.271e-02, -5.464e-03, 1.297e-01, -9.843e-02, 4.122e-02, -9.408e-02, 6.374e-02, 5.450e-02, 4.749e-02, 1.137e-01)); + r += mul(s0_2, M4(2.290e-03, 8.589e-02, -1.364e-01, 2.320e-02, -5.637e-02, -5.776e-02, 4.048e-02, 4.646e-02, -2.496e-02, -4.359e-02, 4.649e-02, -4.578e-02, 1.724e-01, -4.250e-02, 9.008e-02, -2.097e-02)); + r += mul(s0_3, M4(1.328e-01, -5.113e-02, 9.661e-02, 3.584e-01, -1.447e-04, 6.085e-02, -2.173e-03, -4.150e-02, -2.510e-01, -2.243e-02, 1.615e-02, 1.553e-01, 3.936e-02, 5.902e-03, -3.577e-02, -8.622e-02)); + r += mul(s0_4, M4(1.282e-01, -3.511e-01, 3.612e-01, -3.413e-02, -1.100e-01, -1.439e-01, -9.836e-03, 1.909e-03, 1.541e-01, 1.199e-02, -3.416e-02, 7.726e-02, -1.726e-01, -2.366e-01, -1.770e-02, -1.034e-01)); + r += mul(s0_5, M4(-1.842e-01, 1.090e-01, 2.646e-02, 2.030e-01, 9.180e-02, -2.091e-01, -6.783e-02, 8.176e-02, 5.056e-02, 9.988e-02, -6.093e-03, 7.029e-02, 1.497e-01, -1.331e-01, 1.205e-01, 1.406e-01)); + r += mul(s0_6, M4(1.557e-01, -8.295e-02, -7.064e-02, 6.583e-02, 5.184e-02, -4.663e-02, -3.992e-02, -8.000e-02, -2.593e-02, -6.642e-02, 2.625e-02, 1.322e-02, 1.973e-01, 1.182e-01, 1.352e-02, -6.801e-02)); + r += mul(s0_7, M4(-1.737e-01, -2.135e-01, 6.720e-02, -3.688e-01, 1.874e-01, 6.782e-02, -1.867e-02, 6.146e-02, 1.519e-01, 2.078e-01, -9.241e-02, -1.143e-01, 1.275e-01, -2.259e-04, 1.173e-01, 3.520e-02)); + r += mul(s0_8, M4(1.675e-01, -1.018e-01, 7.274e-02, 2.925e-01, -5.030e-02, 5.771e-02, 2.714e-02, -3.318e-02, 4.824e-02, -9.005e-02, 5.480e-03, -5.039e-02, 1.241e-03, 1.391e-01, 7.588e-02, 2.549e-03)); + r += mul(s1_0, M4(-1.242e-01, -2.302e-02, 2.898e-02, -1.824e-02, -2.151e-01, 1.024e-01, 1.221e-01, -1.509e-01, -2.634e-02, 2.601e-02, -1.735e-01, -5.896e-02, -6.527e-02, -5.204e-03, -4.855e-02, -6.708e-02)); + r += mul(s1_1, M4(7.801e-03, 3.848e-02, 1.033e-02, -1.174e-02, -1.183e-02, 1.345e-01, 4.000e-01, 1.013e-02, 1.159e-01, 4.982e-02, 7.533e-03, -1.085e-03, -5.009e-03, 6.178e-03, 3.397e-02, 3.831e-02)); + r += mul(s1_2, M4(-4.237e-02, 1.380e-01, 7.769e-02, -1.055e-01, -2.220e-01, -7.308e-02, -2.130e-01, -2.470e-02, 1.043e-01, 6.163e-02, 3.809e-02, -4.389e-02, -4.513e-02, -1.382e-01, 1.205e-01, -6.402e-02)); + r += mul(s1_3, M4(1.881e-01, -1.599e-01, -5.364e-02, -8.743e-02, -1.890e-01, 1.899e-02, -9.322e-02, 1.086e-01, -4.464e-02, -1.280e-01, 3.037e-01, -9.327e-02, -1.184e-02, 5.401e-02, -3.374e-02, 4.372e-02)); + r += mul(s1_4, M4(1.199e-01, -5.957e-02, -9.253e-03, 1.498e-01, -1.688e-01, -5.394e-02, -2.666e-02, -5.413e-02, -9.928e-02, -1.324e-02, 2.534e-01, -6.590e-02, -5.032e-02, -8.629e-02, -1.970e-03, -6.077e-02)); + r += mul(s1_5, M4(-6.083e-03, 7.846e-02, -6.000e-02, 4.341e-02, -5.467e-03, 4.813e-02, -2.310e-02, 3.208e-02, -6.983e-02, -7.852e-02, -1.998e-02, 2.221e-02, 1.777e-01, -8.882e-03, -2.707e-02, 1.037e-01)); + r += mul(s1_6, M4(-3.420e-02, 1.844e-02, 3.980e-02, -1.475e-01, 7.866e-02, -6.863e-02, 1.889e-02, -3.398e-03, 3.107e-02, -1.148e-01, -5.329e-02, -1.722e-02, 2.352e-02, 1.929e-01, 7.786e-03, -9.830e-03)); + r += mul(s1_7, M4(-2.781e-02, 6.691e-02, -9.545e-02, 4.231e-02, -9.000e-02, 1.266e-01, 4.665e-02, 2.711e-02, -6.153e-02, 9.310e-02, 4.351e-02, 9.940e-02, 1.262e-01, 3.219e-02, -6.487e-02, -5.552e-02)); + r += mul(s1_8, M4(-8.366e-02, -2.192e-02, 4.450e-02, 2.514e-02, 8.095e-02, 1.241e-01, 5.547e-02, -1.171e-01, 6.950e-02, -4.450e-02, -1.699e-03, 1.040e-01, -4.120e-02, -7.339e-02, -3.509e-02, 3.632e-02)); + r += mul(s2_0, M4(-1.192e-01, -4.334e-02, -8.963e-02, 3.132e-02, -8.887e-02, -2.458e-02, -3.446e-02, 2.002e-01, -4.401e-02, -4.960e-02, -7.173e-02, 3.750e-02, -4.877e-02, 1.376e-01, -1.326e-01, -1.893e-01)); + r += mul(s2_1, M4(-3.186e-01, -1.066e-01, -1.339e-02, 1.365e-01, -8.995e-02, 3.117e-02, 1.870e-02, 1.136e-02, 1.429e-01, 1.345e-01, -2.291e-02, 9.036e-02, 5.507e-02, -3.593e-04, 8.528e-03, -4.938e-02)); + r += mul(s2_2, M4(4.532e-02, -9.502e-02, -3.770e-02, 5.776e-02, -1.369e-01, -1.988e-01, -3.887e-02, 1.325e-01, 6.813e-02, -4.984e-02, -1.904e-01, -2.815e-02, 1.425e-01, 7.566e-02, 3.262e-03, -2.347e-01)); + r += mul(s2_3, M4(-7.322e-02, -1.502e-01, 6.263e-02, -2.979e-01, -5.895e-02, 4.781e-02, 4.113e-02, -2.937e-02, 2.070e-02, 5.476e-02, 9.087e-02, -8.571e-02, 7.881e-02, 1.784e-01, 1.621e-01, -9.743e-02)); + r += mul(s2_4, M4(-1.010e-01, 2.538e-01, -2.909e-02, -5.773e-03, -9.496e-02, -4.057e-02, -1.766e-02, 4.822e-02, -2.439e-02, -2.124e-02, 2.763e-02, -1.495e-01, 1.182e-01, -5.139e-02, 2.486e-01, -1.643e-01)); + r += mul(s2_5, M4(9.509e-02, -1.751e-02, 1.060e-01, 8.923e-03, 4.730e-02, -5.171e-03, 5.679e-02, 4.725e-02, -5.932e-02, -1.748e-02, 9.510e-02, 2.206e-02, 1.533e-01, 6.775e-02, 1.194e-01, 1.154e-02)); + r += mul(s2_6, M4(2.332e-01, 3.231e-01, -1.061e-01, -4.654e-02, 6.794e-02, 1.202e-01, 9.481e-02, 1.935e-02, -9.529e-02, -1.021e-02, -6.697e-02, 7.863e-02, 2.771e-02, 2.880e-01, -2.041e-01, 2.709e-01)); + r += mul(s2_7, M4(1.713e-01, 2.813e-01, -2.426e-01, 2.852e-02, 8.356e-02, -7.763e-02, -1.161e-01, 8.172e-02, 5.172e-03, 6.301e-02, 8.229e-02, -1.098e-01, -2.023e-01, -2.939e-01, -1.067e-03, 1.228e-01)); + r += mul(s2_8, M4(1.953e-01, 2.606e-01, -1.670e-01, 1.798e-01, 5.119e-02, -6.810e-02, -1.451e-02, -1.260e-01, 2.329e-01, -1.182e-01, 2.804e-02, 1.311e-01, 1.462e-01, -1.083e-01, -4.603e-02, 1.439e-01)); + r += mul(s3_0, M4(-9.441e-02, -3.264e-02, 1.507e-01, 1.408e-02, 3.475e-02, -4.064e-02, -1.473e-01, -1.309e-01, -7.233e-02, 2.080e-01, -1.367e-02, -1.229e-01, 3.507e-02, -8.483e-02, 2.097e-03, -7.173e-02)); + r += mul(s3_1, M4(4.412e-02, -5.134e-02, -1.370e-02, -4.422e-03, 1.779e-01, 2.364e-01, -2.182e-01, -1.791e-01, -7.399e-02, -5.241e-01, 6.908e-02, 1.578e-01, 3.525e-03, 2.769e-02, 3.628e-02, -1.244e-01)); + r += mul(s3_2, M4(1.648e-01, 9.574e-02, -6.849e-02, -3.637e-02, 5.263e-02, 9.854e-02, 6.326e-02, 4.128e-02, 1.046e-01, 8.906e-02, -2.206e-01, 8.683e-03, -2.032e-02, -1.181e-02, 5.512e-02, -1.051e-01)); + r += mul(s3_3, M4(-2.212e-01, 5.570e-02, -1.141e-01, -1.006e-01, -7.169e-04, 3.264e-01, -1.139e-01, -2.577e-01, -8.693e-02, 1.239e-01, 1.803e-01, 1.125e-01, 1.068e-02, -2.922e-02, 3.197e-02, 1.931e-03)); + r += mul(s3_4, M4(-4.622e-02, 3.270e-02, 1.921e-01, -2.309e-02, -1.523e-01, 1.259e-01, -3.838e-01, 1.271e-01, -2.085e-01, -4.757e-01, -1.196e-01, -1.820e-01, 4.175e-02, -2.550e-02, -5.265e-02, -1.096e-02)); + r += mul(s3_5, M4(-8.994e-03, -1.081e-01, 1.343e-02, -5.891e-02, -2.103e-01, 1.690e-01, -1.553e-01, -1.295e-01, -6.184e-02, 3.514e-01, -7.091e-02, 2.888e-01, 4.795e-02, -6.023e-04, 6.025e-03, 2.650e-02)); + r += mul(s3_6, M4(2.547e-02, 2.874e-02, -1.511e-01, -5.070e-02, 3.203e-02, -4.604e-02, 2.209e-02, -1.229e-01, -1.056e-02, -1.707e-01, -7.084e-02, 5.550e-02, 1.163e-02, -4.525e-02, -3.029e-02, 8.571e-02)); + r += mul(s3_7, M4(-2.294e-02, -1.865e-02, 2.194e-02, 1.160e-01, -2.235e-01, -5.123e-02, 5.762e-02, 2.479e-01, -1.523e-01, -1.528e-01, 5.740e-02, -1.966e-02, -2.238e-02, 1.913e-01, -5.490e-02, -2.322e-02)); + r += mul(s3_8, M4(5.871e-02, 2.765e-02, 4.142e-02, 3.517e-02, -8.669e-02, 3.216e-01, -1.062e-01, -4.092e-01, 2.025e-01, 2.163e-01, -3.333e-02, 3.378e-01, 4.835e-02, -3.763e-02, -3.008e-02, 1.014e-01)); + r += mul(s4_0, M4(-9.938e-02, 5.662e-02, 1.075e-01, 3.380e-02, 4.261e-01, -7.606e-03, -2.433e-02, 5.942e-02, 1.104e-01, 4.452e-02, 1.949e-01, -5.846e-02, -2.881e-02, -3.737e-03, -7.296e-02, 2.033e-02)); + r += mul(s4_1, M4(-5.953e-02, -1.803e-01, 3.228e-02, -7.362e-02, 1.650e-01, -1.759e-01, 7.852e-02, 7.242e-02, -1.063e-01, 8.676e-02, 2.192e-01, 2.149e-02, 7.670e-02, 6.286e-02, -1.718e-01, 3.052e-02)); + r += mul(s4_2, M4(2.066e-02, -3.229e-02, -1.272e-02, 3.003e-03, 3.458e-01, -3.572e-01, -4.341e-02, 2.942e-02, -1.202e-01, -5.292e-02, 3.636e-02, -2.075e-02, 3.415e-02, 1.072e-01, 3.300e-03, 2.872e-01)); + r += mul(s4_3, M4(3.300e-02, 1.055e-01, -1.042e-01, 6.901e-03, 2.957e-01, 2.021e-01, -2.375e-02, 5.408e-02, -9.200e-02, 8.006e-02, -1.759e-01, -5.489e-02, -1.236e-01, 2.300e-02, 1.019e-01, 9.595e-02)); + r += mul(s4_4, M4(-1.879e-02, -3.074e-02, 1.715e-02, 1.419e-01, 3.305e-01, -2.458e-02, -1.699e-01, -1.895e-02, -2.189e-01, 2.476e-01, -8.551e-03, 1.496e-01, 4.431e-02, -1.024e-01, -3.112e-02, -4.159e-02)); + r += mul(s4_5, M4(1.866e-01, -4.232e-02, 6.446e-02, -1.000e-01, -3.971e-03, -2.469e-01, -1.169e-01, -8.591e-02, 3.592e-02, -1.996e-02, -3.942e-02, 7.812e-02, -1.576e-01, 9.317e-02, 6.869e-02, 2.633e-01)); + r += mul(s4_6, M4(4.973e-02, 5.596e-02, -2.942e-02, 3.945e-02, 1.295e-01, 5.590e-02, -6.419e-02, 9.570e-02, -3.299e-02, 1.413e-01, 1.897e-02, 1.479e-03, 1.753e-01, -4.112e-02, 4.182e-02, -2.565e-04)); + r += mul(s4_7, M4(-1.027e-02, -3.947e-02, -2.293e-02, -1.055e-02, 4.375e-03, 2.412e-01, -1.684e-01, 1.426e-01, -4.759e-02, 4.362e-02, -2.873e-02, -1.426e-01, 1.059e-01, -7.505e-02, 5.282e-02, -4.923e-02)); + r += mul(s4_8, M4(1.793e-02, 9.091e-02, -1.470e-02, -9.641e-02, 1.120e-01, -3.178e-02, -5.480e-02, -1.602e-01, -3.819e-02, 1.581e-02, 4.480e-02, 1.712e-01, -4.538e-02, 1.732e-01, 1.257e-01, 2.426e-01)); + r += mul(s5_0, M4(1.128e-01, 5.831e-02, -4.761e-02, 2.190e-01, 1.296e-02, 4.718e-02, -7.204e-03, -7.652e-02, 5.361e-02, 1.115e-01, 2.294e-01, 6.526e-02, -9.302e-02, -6.147e-02, -1.335e-01, 4.724e-02)); + r += mul(s5_1, M4(-3.076e-02, -5.031e-03, -2.038e-02, 3.392e-02, 3.513e-02, 1.646e-02, 1.014e-01, 1.512e-03, 1.196e-01, -2.991e-03, 1.386e-01, 2.146e-01, 2.277e-02, -5.620e-02, -2.236e-01, 1.751e-01)); + r += mul(s5_2, M4(1.017e-01, 2.476e-01, -2.771e-02, 6.961e-02, -3.064e-02, -6.149e-02, 7.209e-02, -3.906e-02, -1.153e-01, 6.624e-03, 6.405e-02, 4.708e-02, 1.956e-01, -8.011e-02, -1.242e-01, 1.444e-02)); + r += mul(s5_3, M4(1.568e-01, 5.347e-02, -5.782e-02, -6.519e-02, -8.024e-02, 3.140e-02, -1.768e-01, 8.107e-02, -1.213e-01, -5.629e-02, -2.261e-01, 5.272e-02, -1.467e-01, -1.843e-02, 5.187e-02, -1.378e-01)); + r += mul(s5_4, M4(-1.650e-01, -3.911e-01, 4.699e-03, 2.402e-01, -4.692e-02, 6.856e-03, 2.062e-02, -2.151e-01, -1.859e-01, -1.778e-02, 3.559e-02, 1.345e-01, -6.697e-02, -2.813e-02, 6.408e-02, 1.028e-01)); + r += mul(s5_5, M4(-1.920e-02, -9.734e-02, -6.633e-02, -1.076e-01, -1.382e-01, -3.244e-02, -1.126e-01, -2.778e-03, 5.546e-02, 5.411e-02, -1.391e-02, 1.117e-01, -8.756e-02, 9.652e-02, -3.436e-02, -1.543e-01)); + r += mul(s5_6, M4(-8.966e-02, -4.120e-02, -8.011e-02, -1.364e-01, 5.269e-02, -2.837e-02, 1.709e-02, 2.249e-02, 9.687e-03, -1.441e-01, -1.440e-02, 5.554e-02, -2.181e-01, -2.442e-02, 5.119e-02, -1.257e-01)); + r += mul(s5_7, M4(2.125e-01, 6.544e-02, -3.881e-02, 2.105e-01, -2.400e-02, 3.021e-02, -2.993e-02, 1.163e-01, -9.385e-02, -2.801e-02, 1.703e-02, 7.201e-03, -3.281e-02, 1.150e-01, -3.042e-03, 7.310e-02)); + r += mul(s5_8, M4(-1.190e-01, 5.190e-03, 1.481e-01, -7.708e-02, 9.729e-02, 1.799e-01, 2.982e-02, 2.960e-02, 1.851e-02, 7.781e-02, 2.263e-02, -2.539e-02, 7.774e-03, 1.886e-02, 1.686e-02, -1.899e-01)); + r += mul(s6_0, M4(2.732e-02, -8.044e-02, 5.805e-02, 7.451e-02, 6.343e-02, -7.971e-02, 6.930e-02, 1.125e-01, -6.981e-02, 5.407e-02, 8.634e-02, 1.879e-01, -6.081e-02, -1.187e-01, 4.764e-02, 4.740e-02)); + r += mul(s6_1, M4(-5.672e-02, 2.655e-02, 2.719e-03, 9.895e-02, -7.568e-02, 5.300e-02, -5.080e-02, 1.155e-01, -1.366e-02, -1.794e-01, -9.348e-02, -6.803e-02, 1.234e-02, -2.631e-02, 1.041e-01, 1.539e-03)); + r += mul(s6_2, M4(-8.914e-02, 9.367e-02, -8.082e-03, -5.017e-02, 6.531e-03, 9.440e-02, -9.546e-02, -2.078e-01, 6.303e-02, -7.267e-02, -6.052e-02, 3.390e-02, 1.199e-02, 1.099e-01, 5.302e-02, -3.439e-02)); + r += mul(s6_3, M4(-1.370e-03, -1.555e-01, 4.913e-02, -6.440e-02, -1.153e-01, -1.462e-01, -3.976e-02, -1.655e-01, -7.278e-02, 1.135e-01, -1.266e-02, -8.936e-02, -1.615e-02, -4.130e-03, 3.085e-02, -5.194e-02)); + r += mul(s6_4, M4(-4.025e-02, 1.696e-01, 1.808e-02, -1.851e-01, 4.826e-02, -1.989e-01, -1.621e-02, 3.878e-02, 2.924e-02, 1.500e-01, 7.301e-02, 1.156e-01, 8.438e-02, -2.838e-02, -2.140e-02, -7.899e-02)); + r += mul(s6_5, M4(-5.516e-03, -3.767e-02, -1.080e-02, 2.163e-02, -1.908e-01, -1.105e-01, -4.079e-03, -2.976e-01, -2.688e-02, 1.270e-01, 1.558e-01, 1.177e-01, -7.772e-02, -9.525e-02, -4.954e-02, -1.848e-01)); + r += mul(s6_6, M4(1.304e-01, -5.792e-02, -8.074e-03, 1.746e-02, -1.216e-01, 5.129e-02, -6.421e-02, 7.717e-02, 2.829e-02, -5.381e-03, 3.045e-03, 2.908e-02, -4.127e-02, -1.850e-02, -6.188e-02, 4.142e-02)); + r += mul(s6_7, M4(5.040e-02, -7.565e-02, -6.751e-02, -1.051e-01, 7.771e-02, -3.772e-02, -2.842e-02, 7.751e-02, -1.013e-01, 1.450e-01, -3.471e-02, 1.379e-02, 2.554e-02, 3.765e-03, -3.954e-02, 3.068e-02)); + r += mul(s6_8, M4(3.067e-03, -1.769e-01, 6.014e-02, 1.301e-01, 3.985e-02, 2.602e-01, 3.536e-03, 2.116e-01, -8.207e-02, -6.224e-03, -5.648e-02, -9.683e-02, 2.825e-02, 2.959e-02, -8.088e-02, 6.939e-02)); + r += mul(s7_0, M4(1.006e-02, 2.047e-01, -2.866e-02, -8.099e-02, 1.373e-01, 1.434e-01, 1.495e-01, -8.041e-02, 7.476e-02, 5.637e-02, 5.705e-02, -1.046e-01, -1.410e-01, 2.264e-01, 1.041e-02, -9.869e-02)); + r += mul(s7_1, M4(5.110e-02, 5.172e-02, -5.743e-02, 8.524e-02, 1.275e-01, 1.149e-02, 5.671e-02, 4.428e-02, 2.453e-01, -2.659e-01, -2.023e-02, 7.128e-02, 2.033e-02, 1.359e-01, 1.923e-01, 2.281e-02)); + r += mul(s7_2, M4(-2.172e-01, 1.265e-01, -9.141e-02, 8.283e-02, 1.425e-01, 3.770e-02, 2.065e-02, -1.220e-01, -2.166e-02, -1.593e-01, -1.264e-01, -1.340e-01, -3.660e-01, -1.286e-02, 3.190e-02, 1.368e-01)); + r += mul(s7_3, M4(6.927e-02, -1.486e-01, 2.885e-02, 9.393e-02, -5.625e-02, -1.521e-01, -7.625e-02, 1.589e-01, -2.040e-03, -1.351e-01, -8.796e-02, 5.367e-03, 5.634e-02, -3.133e-01, 9.720e-02, 2.741e-01)); + r += mul(s7_4, M4(7.587e-02, -5.238e-02, 1.154e-01, 2.532e-02, 2.177e-02, 2.078e-01, -1.090e-01, 9.705e-03, 2.140e-01, -2.165e-01, 3.438e-02, -2.241e-02, 1.044e-01, -2.104e-01, -3.646e-01, 4.702e-01)); + r += mul(s7_5, M4(-7.825e-02, 5.564e-03, -4.894e-02, 3.624e-01, 5.070e-02, 5.833e-02, -6.385e-02, 8.356e-02, 1.689e-01, 7.976e-03, -4.881e-02, -4.230e-02, -6.128e-01, -1.089e-01, -4.774e-01, -1.708e-01)); + r += mul(s7_6, M4(1.434e-02, 1.574e-01, -9.762e-03, 7.883e-03, -7.872e-02, -4.435e-02, 7.324e-02, 1.883e-02, -3.976e-02, -1.412e-02, -3.203e-02, 4.119e-02, -2.741e-01, -1.385e-01, 3.347e-02, -2.693e-01)); + r += mul(s7_7, M4(9.450e-02, 9.057e-02, -1.375e-01, 2.256e-01, -5.546e-02, 2.531e-02, -3.054e-02, -1.491e-02, -2.629e-03, 2.303e-03, 8.818e-03, -1.034e-01, 2.138e-01, 6.280e-01, 1.307e-02, -2.183e-01)); + r += mul(s7_8, M4(-1.732e-02, 2.528e-01, -1.357e-01, 9.682e-02, 1.232e-01, -4.775e-02, -3.421e-02, 1.176e-02, -7.255e-03, -8.638e-02, -6.195e-02, 1.069e-02, -2.368e-01, 3.942e-01, -1.728e-01, -2.484e-01)); + r += V4(-2.040e-02, 5.008e-03, 2.850e-02, 3.035e-02); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(5.544e-02, -5.758e-03, 2.108e-02, 4.552e-02, -1.077e-01, -7.150e-02, 2.178e-01, -7.830e-02, -2.320e-02, 4.724e-02, -8.105e-02, -6.811e-02, 1.483e-01, -1.327e-02, 1.257e-01, -1.698e-01)); + r += mul(s0_1, M4(2.066e-02, 1.023e-01, -1.745e-02, -2.050e-02, 1.418e-01, 3.869e-02, -4.629e-02, 3.321e-01, -1.605e-02, 2.144e-02, -2.054e-02, -5.619e-02, 2.438e-01, 1.045e-01, -1.384e-01, 2.647e-01)); + r += mul(s0_2, M4(7.674e-03, 7.657e-02, -9.888e-03, -1.328e-01, -3.430e-01, -1.643e+00, 5.308e-01, 8.012e-01, 4.700e-02, -1.216e-01, 1.025e-01, 5.615e-02, -1.697e-03, 1.288e-01, -1.155e-01, 2.061e-01)); + r += mul(s0_3, M4(-1.864e-02, -7.868e-02, 1.318e-01, 2.944e-02, 1.153e-01, -2.453e-02, 3.245e-01, -8.511e-03, 8.246e-02, 9.155e-02, -2.596e-01, -4.688e-02, -5.379e-02, 1.681e-02, -4.082e-02, 2.190e-02)); + r += mul(s0_4, M4(6.601e-02, -7.595e-02, 2.497e-02, 6.535e-02, -5.133e-01, -1.201e-01, -4.400e-01, -4.921e-02, 1.192e-01, 6.046e-02, 2.065e-01, 4.049e-03, 1.598e-01, -2.106e-03, -1.876e-02, -1.145e-01)); + r += mul(s0_5, M4(1.073e-01, 1.299e-01, -3.370e-02, -8.442e-02, -1.831e-01, 6.161e-02, -2.270e-01, -3.732e-02, 9.468e-02, -1.475e-01, 4.612e-02, -6.901e-02, -1.292e-01, 1.520e-01, -4.510e-02, -3.038e-01)); + r += mul(s0_6, M4(-1.179e-02, 9.368e-03, -2.160e-02, 2.662e-02, -7.871e-02, -8.766e-02, -3.862e-02, -9.715e-02, -5.973e-02, 5.893e-02, -1.214e-02, -9.276e-02, -3.892e-03, 6.275e-02, -1.077e-01, 3.067e-01)); + r += mul(s0_7, M4(1.529e-02, -5.643e-02, -9.501e-02, 1.355e-01, 8.102e-02, -8.228e-02, -1.104e-01, -2.338e-01, -1.540e-02, 1.110e-02, 2.905e-02, -3.618e-02, 1.395e-01, -2.328e-01, -6.577e-02, 3.591e-02)); + r += mul(s0_8, M4(-5.025e-02, -2.154e-02, -1.012e-01, 1.160e-01, 2.285e-01, 1.782e-01, 4.228e-03, -1.791e-01, -2.313e-01, -1.870e-02, 1.116e-02, 9.506e-02, -4.598e-02, 6.144e-02, -6.470e-02, -9.153e-03)); + r += mul(s1_0, M4(3.908e-02, 6.316e-03, -1.158e-01, -1.560e-01, -5.848e-02, 8.426e-02, 6.576e-02, -5.478e-02, -8.282e-02, -1.216e-01, -9.145e-02, 5.931e-01, -5.063e-02, 9.802e-02, -6.655e-02, -1.409e-01)); + r += mul(s1_1, M4(-1.087e-01, 2.195e-01, -6.529e-03, -9.650e-02, 1.615e-01, 1.374e-03, 1.127e-02, 2.669e-02, -2.843e-02, 1.035e-01, 1.358e-01, 5.600e-01, -8.128e-02, 1.057e-01, -2.377e-03, 9.913e-02)); + r += mul(s1_2, M4(-3.318e-01, -3.460e-01, 2.275e-01, -8.662e-03, 9.161e-02, -8.165e-02, 1.890e-02, 5.559e-03, -2.743e-01, -2.447e-01, 1.763e-01, 4.261e-01, 1.459e-01, 4.110e-02, 7.020e-03, -9.591e-02)); + r += mul(s1_3, M4(-2.150e-01, 2.711e-01, 3.466e-01, -1.744e-01, -2.085e-02, 8.328e-03, 1.817e-02, -6.482e-02, -2.019e-01, 4.386e-02, -8.107e-01, 1.272e-01, -3.355e-03, 1.445e-02, -1.531e-01, 3.756e-03)); + r += mul(s1_4, M4(-3.456e-01, -1.712e-01, 1.056e-01, -4.463e-01, -9.244e-02, -2.191e-02, 1.196e-03, 8.891e-02, 4.539e-01, 3.934e-01, 7.296e-01, 7.285e-01, 8.415e-02, -3.430e-02, -5.900e-04, 5.239e-02)); + r += mul(s1_5, M4(-7.564e-02, 9.153e-03, -1.038e-01, -4.872e-01, -7.808e-02, 1.069e-03, 2.316e-02, -5.820e-03, 8.850e-02, 1.468e-01, 1.475e-02, 2.014e-01, 5.136e-02, 1.069e-01, 1.706e-02, 6.026e-03)); + r += mul(s1_6, M4(-2.049e-01, 4.374e-03, 1.735e-01, -9.108e-02, -1.021e-01, -1.778e-04, -2.652e-02, -3.118e-02, 2.540e-01, 1.640e-01, 8.107e-02, 7.021e-02, -1.428e-01, 3.771e-02, -1.237e-01, 1.380e-01)); + r += mul(s1_7, M4(1.263e-01, 2.799e-02, -1.731e-02, -1.820e-01, 6.205e-02, -4.359e-03, 4.270e-02, 6.013e-02, 2.310e-01, -7.026e-02, 2.328e-01, 2.288e-01, 8.425e-02, -6.989e-02, 1.053e-01, 6.417e-02)); + r += mul(s1_8, M4(-4.784e-02, 3.269e-02, 5.087e-02, 4.355e-01, -1.589e-02, 4.944e-02, -3.893e-02, 1.440e-01, -4.526e-01, 1.852e-02, -8.770e-02, 2.989e-01, -6.335e-03, 9.053e-03, 2.872e-02, 1.917e-02)); + r += mul(s2_0, M4(-1.034e-01, -1.212e-01, -5.062e-02, 2.220e-01, 1.032e-01, 1.242e-01, -9.939e-02, -5.839e-03, 4.070e-02, 1.123e-01, 6.500e-02, 3.366e-02, 6.602e-04, 4.235e-02, 7.555e-02, -3.979e-02)); + r += mul(s2_1, M4(-1.470e-01, 1.341e-01, -4.356e-02, 1.161e-01, 8.236e-02, 8.301e-02, -5.493e-02, -2.833e-02, 1.886e-01, 2.338e-02, 5.674e-02, 4.431e-02, 2.927e-02, 1.126e-01, -2.959e-02, 2.052e-02)); + r += mul(s2_2, M4(-2.107e-01, -2.339e-01, -5.360e-02, -2.191e-01, 9.702e-02, -2.443e-02, -3.053e-02, 1.584e-01, 1.841e-01, -3.244e-02, -6.325e-04, 8.851e-02, 7.295e-03, 4.954e-02, -2.203e-02, 2.285e-02)); + r += mul(s2_3, M4(-2.654e-02, 5.692e-02, -1.150e-01, 5.888e-02, -9.731e-02, -5.500e-02, -4.047e-02, 1.866e-01, -1.104e-02, 1.897e-02, -2.299e-01, 5.012e-02, -7.490e-02, 3.292e-02, -4.738e-02, -2.095e-02)); + r += mul(s2_4, M4(-8.243e-02, -1.117e-01, -1.419e-02, -8.975e-02, -7.969e-02, 8.002e-02, 3.929e-02, -2.430e-01, -6.524e-02, 2.296e-01, -8.671e-02, -7.930e-02, 1.248e-01, 1.087e-01, -7.137e-02, 4.833e-02)); + r += mul(s2_5, M4(-9.138e-02, -8.530e-02, 1.243e-01, -7.925e-02, -8.227e-02, 9.701e-02, -2.030e-05, 1.916e-01, 7.244e-02, 9.469e-02, 3.060e-02, -1.714e-01, -5.628e-03, -3.548e-02, 4.991e-02, 3.440e-02)); + r += mul(s2_6, M4(-3.592e-01, -1.287e-02, 6.441e-02, 1.201e-01, 1.077e-01, 6.000e-02, 4.156e-02, 7.145e-02, 1.351e-01, 6.877e-03, -4.731e-02, 1.923e-01, -5.772e-02, -7.441e-03, 2.667e-02, 7.189e-02)); + r += mul(s2_7, M4(7.838e-02, 1.265e-01, 1.347e-01, -1.894e-01, 1.915e-02, 6.450e-02, -1.783e-02, 7.198e-02, 1.192e-01, 9.598e-02, -5.289e-02, -9.756e-02, 5.461e-02, -1.719e-01, -3.899e-02, -3.137e-02)); + r += mul(s2_8, M4(-1.130e-01, 2.902e-03, 7.685e-02, -2.195e-01, -6.129e-02, 1.072e-01, -2.869e-02, 1.294e-01, 4.824e-02, -9.565e-02, 3.023e-02, -1.908e-01, -2.689e-02, -5.843e-02, -6.372e-02, -1.017e-01)); + r += mul(s3_0, M4(3.443e-02, -2.186e-02, -7.398e-02, 2.109e-02, 4.728e-02, -2.613e-02, -5.230e-02, -4.760e-02, -2.659e-02, 5.841e-02, 2.548e-02, 1.497e-01, -1.254e-02, -3.591e-02, -4.055e-02, 1.223e-01)); + r += mul(s3_1, M4(5.113e-03, 9.075e-02, -9.528e-02, 1.000e-01, -1.631e-02, -1.926e-01, -5.704e-02, 1.599e-01, 9.760e-03, -2.674e-01, 4.985e-02, 1.509e-01, 7.294e-02, -2.058e-01, 7.094e-02, 1.679e-01)); + r += mul(s3_2, M4(-3.314e-02, -5.458e-02, 4.053e-02, -6.458e-02, -3.707e-02, -7.450e-02, -5.707e-02, -1.021e-01, 3.070e-02, 4.210e-03, 4.633e-02, -5.045e-02, -1.479e-02, -3.339e-02, 8.471e-02, -8.356e-02)); + r += mul(s3_3, M4(-1.641e-02, -4.805e-02, -9.906e-02, 7.602e-02, 1.348e-01, 6.798e-03, -2.020e-02, -9.736e-02, 1.131e-01, 1.257e-01, 1.013e-01, -1.374e-01, 6.402e-02, 1.710e-01, 1.075e-01, -1.846e-01)); + r += mul(s3_4, M4(2.332e-02, 7.788e-02, -1.213e-01, -2.642e-02, -9.024e-02, 1.647e-01, 4.472e-02, -8.873e-02, -8.534e-02, -1.443e-01, -1.676e-01, 2.141e-02, -3.493e-01, 2.111e-01, 1.340e-01, 2.345e-02)); + r += mul(s3_5, M4(6.754e-02, -7.302e-02, 7.720e-02, 7.302e-02, 1.413e-02, -9.789e-02, 5.252e-02, 7.653e-02, -3.572e-02, 5.287e-02, 1.009e-01, -1.622e-02, 7.554e-02, -1.290e-01, -2.322e-03, 1.709e-01)); + r += mul(s3_6, M4(3.987e-02, 2.268e-02, -5.675e-02, -3.855e-02, 5.678e-03, 2.296e-02, -4.488e-03, 7.918e-02, -3.615e-02, 5.164e-02, 9.196e-02, -1.309e-01, -2.668e-01, -1.025e-01, 9.466e-02, 1.381e-01)); + r += mul(s3_7, M4(2.574e-02, 1.908e-01, 1.321e-01, 1.192e-01, 1.048e-02, 6.638e-02, 1.597e-01, 1.485e-01, -2.010e-01, 2.928e-02, -1.044e-01, -1.083e-01, -1.064e-01, -1.828e-01, 2.316e-02, -1.473e-01)); + r += mul(s3_8, M4(-6.137e-02, -5.335e-02, 6.966e-02, -6.205e-02, -1.061e-01, 3.473e-02, 1.788e-02, -9.480e-02, 5.560e-02, -4.706e-02, 5.202e-02, -1.420e-01, 6.509e-03, -1.294e-02, 3.418e-02, -5.897e-02)); + r += mul(s4_0, M4(1.175e-01, 1.390e-04, 6.281e-02, -7.248e-02, 1.378e-02, -9.862e-02, -5.127e-02, -6.545e-02, 8.631e-02, 3.223e-02, -7.405e-02, 5.335e-02, 3.574e-02, -4.053e-02, 1.716e-01, 1.277e-01)); + r += mul(s4_1, M4(-1.075e-02, 5.488e-02, -6.465e-02, -2.294e-01, 5.212e-02, -1.647e-01, -2.144e-02, 3.401e-02, 1.802e-01, -1.097e-01, -1.165e-01, 2.270e-01, 1.037e-01, -3.603e-01, 1.609e-03, 3.745e-01)); + r += mul(s4_2, M4(-2.245e-01, 2.581e-01, -1.418e-01, 4.069e-03, 1.000e-01, -8.272e-02, -6.738e-03, 5.556e-02, -4.491e-02, 3.893e-02, -6.781e-02, 2.742e-02, -3.531e-02, 7.826e-04, 7.051e-02, 1.496e-02)); + r += mul(s4_3, M4(1.454e-02, -2.678e-02, -7.264e-02, -3.251e-02, -2.511e-02, 3.208e-02, -1.461e-01, -1.288e-01, 1.411e-02, 1.038e-02, -1.715e-01, 5.874e-02, 3.128e-01, 2.134e-01, -4.003e-02, -2.441e-02)); + r += mul(s4_4, M4(7.052e-02, 5.386e-02, 1.227e-01, -1.277e-01, -2.847e-04, -2.280e-01, -3.854e-02, -1.228e-01, -2.950e-02, 9.786e-02, -9.868e-02, -3.455e-01, 1.065e-01, 1.068e-01, -5.596e-03, 2.004e-02)); + r += mul(s4_5, M4(-6.593e-02, -2.414e-01, 1.366e-01, -1.910e-01, 1.177e-03, 4.601e-03, -3.388e-02, 7.391e-02, 5.703e-02, -1.842e-01, -7.501e-02, 2.527e-02, -5.273e-02, -1.680e-01, -1.124e-01, 8.422e-02)); + r += mul(s4_6, M4(1.933e-03, -2.671e-02, 1.166e-01, 4.585e-02, -5.652e-02, -2.965e-02, -3.518e-02, 9.324e-02, 6.895e-02, -5.980e-02, 9.371e-03, -4.457e-02, -1.102e-01, 1.673e-02, 4.024e-02, 6.059e-02)); + r += mul(s4_7, M4(-1.293e-01, -1.190e-02, -4.142e-02, 2.233e-01, 2.569e-03, -1.057e-01, -7.533e-02, 1.966e-02, 1.172e-01, 4.392e-02, 4.434e-02, 2.997e-01, -1.758e-02, 9.199e-02, 1.395e-01, 2.846e-01)); + r += mul(s4_8, M4(-1.066e-01, -1.434e-01, 4.095e-02, -1.961e-01, -1.314e-02, -4.131e-02, 1.397e-02, 8.244e-02, 1.721e-01, -1.346e-01, -1.487e-01, -4.334e-02, 4.678e-02, 1.914e-02, 2.189e-02, -2.744e-01)); + r += mul(s5_0, M4(3.505e-02, -1.893e-02, -6.174e-02, 7.140e-02, 5.862e-02, -7.290e-02, -4.338e-02, -1.928e-01, -6.280e-02, 2.682e-02, 6.347e-02, -1.203e-01, 6.547e-04, 1.097e-01, 1.354e-01, 2.270e-02)); + r += mul(s5_1, M4(1.004e-01, -6.857e-03, -1.677e-01, -5.980e-02, -3.806e-02, 1.204e-01, 1.410e-01, -1.282e-01, 1.340e-01, -1.460e-01, 2.107e-03, -2.031e-02, 8.105e-02, -2.499e-01, -1.983e-02, 1.698e-02)); + r += mul(s5_2, M4(5.069e-02, -8.407e-02, 8.328e-02, 1.169e-01, -2.470e-02, 9.539e-02, 1.750e-02, 1.075e-01, -8.137e-02, 3.938e-02, -1.778e-02, 1.271e-01, -1.941e-02, -9.652e-03, 3.720e-02, 1.866e-01)); + r += mul(s5_3, M4(-5.189e-02, 9.037e-02, -1.478e-01, 1.589e-01, -7.698e-02, 1.711e-02, 7.525e-04, 1.649e-02, 1.388e-02, 2.732e-02, -1.145e-02, -1.458e-02, -4.554e-02, -1.471e-02, 5.291e-02, 1.993e-03)); + r += mul(s5_4, M4(7.997e-03, -1.809e-02, 2.333e-01, 7.397e-02, -1.589e-01, -4.639e-02, 2.745e-02, -1.362e-01, -1.537e-02, 5.553e-03, -8.906e-02, -5.364e-02, -2.010e-02, 9.191e-02, -4.399e-02, -8.397e-02)); + r += mul(s5_5, M4(-3.699e-02, 4.243e-02, 6.462e-02, -1.352e-01, -9.268e-02, 1.362e-01, 2.535e-02, 1.417e-01, 2.323e-02, -9.547e-02, 2.809e-02, 4.400e-02, -7.593e-02, -2.419e-01, -6.286e-02, 8.863e-02)); + r += mul(s5_6, M4(8.169e-02, 1.890e-02, 7.678e-02, -3.580e-02, -1.117e-01, 1.031e-02, 5.636e-02, 1.345e-02, 3.059e-03, 8.812e-02, -2.234e-02, -1.886e-01, -2.863e-02, 4.068e-02, -3.889e-02, -3.714e-02)); + r += mul(s5_7, M4(-9.569e-02, -2.113e-02, -6.659e-02, 5.730e-02, -8.431e-02, -3.651e-03, -1.317e-03, 5.162e-02, -1.783e-02, 1.012e-01, 9.033e-02, 1.092e-01, -2.062e-02, 7.236e-02, -2.073e-03, 4.080e-02)); + r += mul(s5_8, M4(-1.102e-01, -4.933e-02, 1.970e-02, -1.550e-01, -1.312e-02, 4.186e-02, -3.089e-02, 7.010e-02, 1.350e-01, -3.695e-02, -3.183e-02, 9.297e-02, -7.847e-02, -3.134e-02, 5.704e-02, -1.429e-01)); + r += mul(s6_0, M4(7.000e-03, -7.842e-02, 1.152e-01, 3.083e-02, 4.837e-02, 7.915e-02, -8.436e-02, -1.769e-01, 4.239e-02, -3.625e-02, -7.933e-02, -1.820e-02, 8.132e-03, -3.315e-02, -7.444e-02, -5.075e-02)); + r += mul(s6_1, M4(3.911e-02, 7.855e-02, 3.165e-02, 1.165e-01, -6.093e-02, -1.520e-01, -1.830e-02, -2.941e-03, 1.659e-02, 1.346e-01, 2.275e-02, -7.598e-03, -1.072e-01, 6.206e-02, -1.064e-01, 1.363e-01)); + r += mul(s6_2, M4(-3.572e-02, -1.017e-01, -3.197e-02, -1.633e-01, 3.489e-02, -8.531e-02, 1.401e-01, -1.635e-02, -6.238e-02, -1.510e-01, 6.321e-02, 1.244e-01, -9.047e-02, -8.580e-02, 3.259e-02, 7.443e-02)); + r += mul(s6_3, M4(1.415e-02, 3.246e-02, 6.484e-02, 2.144e-01, -1.114e-02, 6.499e-02, -2.476e-01, -9.417e-02, 7.232e-02, -9.882e-02, -4.253e-02, -8.914e-02, 1.202e-01, -4.441e-02, -1.108e-01, -1.293e-01)); + r += mul(s6_4, M4(1.240e-01, -1.149e-02, 4.045e-02, -4.748e-02, -6.501e-02, -3.682e-02, 9.060e-02, 1.020e-01, -5.793e-02, 4.212e-02, 1.733e-01, -3.874e-02, -8.168e-02, 2.860e-01, 7.321e-02, -9.710e-02)); + r += mul(s6_5, M4(-1.567e-01, -8.936e-02, 1.170e-01, -1.707e-01, 1.136e-01, 1.074e-01, 6.826e-02, 8.583e-02, -1.006e-01, 2.438e-01, 9.477e-03, 1.312e-01, 1.575e-01, -8.688e-04, 9.217e-02, 5.701e-03)); + r += mul(s6_6, M4(-6.751e-02, -1.474e-01, 7.120e-02, -1.665e-01, 2.879e-02, -3.259e-02, -4.251e-02, 1.488e-02, 9.614e-02, -5.991e-02, 1.763e-01, 1.456e-02, 3.111e-02, 5.714e-02, -1.470e-01, -8.239e-02)); + r += mul(s6_7, M4(1.637e-01, -5.533e-02, -2.331e-01, -1.128e-01, 1.022e-01, -8.590e-02, -4.325e-02, 1.635e-01, -1.229e-01, 9.051e-02, 6.749e-02, 3.805e-02, -5.861e-02, -2.092e-02, 2.744e-02, 5.822e-02)); + r += mul(s6_8, M4(2.953e-01, 1.238e-01, -9.868e-03, 3.260e-01, 1.625e-01, 5.333e-02, 5.171e-02, -6.420e-03, -2.182e-01, 3.314e-02, -1.977e-02, 1.174e-01, 9.432e-04, -5.159e-02, -3.402e-02, -2.400e-02)); + r += mul(s7_0, M4(-1.764e-03, -4.008e-02, 2.166e-02, 1.506e-01, -1.484e-01, -3.399e-02, 2.949e-02, 5.770e-02, -8.984e-02, 5.094e-02, 5.657e-03, 5.733e-02, -2.079e-02, -8.383e-03, -3.833e-02, 6.736e-02)); + r += mul(s7_1, M4(-6.793e-02, -3.721e-02, 4.122e-02, -8.749e-02, 2.187e-02, 1.206e-01, -5.195e-02, 1.270e-01, -1.035e-02, 7.504e-02, 4.991e-02, -7.404e-02, 4.917e-02, 7.537e-03, -2.237e-02, 8.293e-02)); + r += mul(s7_2, M4(-9.762e-02, 3.176e-02, 3.020e-02, -2.093e-01, -6.846e-02, 6.975e-02, -5.583e-03, 1.134e-02, -1.819e-01, 2.187e-01, 6.703e-02, 9.355e-02, 2.201e-02, -1.026e-01, -1.755e-02, -4.572e-02)); + r += mul(s7_3, M4(1.023e-01, 4.612e-02, 7.730e-02, 1.702e-01, 1.045e-01, 2.049e-02, -2.142e-01, -2.667e-02, -1.555e-01, -3.456e-02, 2.766e-02, 2.139e-01, -6.208e-02, -3.642e-02, -3.100e-02, -7.824e-02)); + r += mul(s7_4, M4(-3.174e-02, 3.798e-02, 3.780e-02, -1.580e-01, 7.051e-03, -7.929e-02, -7.760e-02, -1.234e-01, -4.611e-02, 1.214e-01, -6.567e-03, 1.542e-01, 1.697e-03, -9.982e-02, 1.262e-02, 6.704e-03)); + r += mul(s7_5, M4(-5.593e-02, 7.511e-02, 1.113e-02, 6.125e-02, 1.121e-01, 2.987e-02, 1.123e-01, 8.575e-02, -1.579e-01, 1.389e-01, 1.399e-02, 1.299e-01, 1.241e-01, 1.171e-01, 8.747e-02, -8.339e-02)); + r += mul(s7_6, M4(3.187e-02, -2.891e-02, -8.751e-02, -1.304e-01, 7.755e-02, -7.256e-02, -5.659e-02, -3.308e-02, 9.382e-02, -4.571e-02, 2.681e-02, -3.575e-03, -8.575e-03, 9.085e-02, -1.202e-01, -9.952e-02)); + r += mul(s7_7, M4(-5.262e-02, -1.290e-02, -9.283e-02, -8.499e-02, 1.011e-02, -2.862e-03, 1.140e-01, 7.442e-02, 1.927e-01, -2.441e-02, 1.301e-02, -1.838e-01, 2.864e-02, 2.634e-02, 1.810e-01, 4.947e-02)); + r += mul(s7_8, M4(2.631e-02, -3.746e-02, 3.510e-02, 7.880e-02, -9.427e-02, 8.804e-02, 3.893e-02, 8.391e-02, 2.058e-01, 7.800e-02, -5.539e-02, -8.611e-02, 1.941e-03, -2.240e-02, 4.233e-02, 1.454e-01)); + r += V4(-1.409e-02, -5.309e-03, -2.197e-02, 3.849e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.910e-02, -1.346e-03, 2.454e-02, 9.515e-02, -2.567e-02, -2.216e-01, -9.989e-02, 2.110e-01, -8.455e-02, -4.117e-04, 4.897e-03, -2.244e-02, -1.606e-01, 1.286e-01, 1.370e-02, 1.414e-01)); + r += mul(s0_1, M4(1.206e-01, 4.406e-02, 7.571e-02, -7.560e-02, -3.542e-01, -2.639e-01, 4.388e-02, 3.168e-01, -5.599e-02, 1.577e-01, -1.223e-01, -8.276e-02, 1.556e-01, -2.275e-01, 1.533e-01, -9.174e-02)); + r += mul(s0_2, M4(1.023e-01, 3.794e-02, 2.122e-02, 1.274e-02, 7.343e-01, -1.151e+00, -1.053e-01, -1.710e-01, -7.460e-02, -1.285e-01, -2.051e-02, 3.872e-02, -3.458e-01, -2.282e-01, -3.145e-01, -2.288e-02)); + r += mul(s0_3, M4(-2.358e-01, -4.073e-02, -2.179e-02, 4.840e-02, 3.132e-02, 8.168e-02, 9.321e-02, -9.937e-02, -4.727e-02, 6.235e-02, -9.533e-03, 1.042e-01, 1.246e-01, -2.124e-01, -1.625e-01, 2.047e-01)); + r += mul(s0_4, M4(-6.673e-02, 3.943e-02, 1.112e-01, -4.216e-02, -3.215e-01, -1.560e-01, 5.723e-01, 6.170e-01, 2.162e-01, 9.800e-03, -1.509e-01, -1.570e-01, 1.151e-01, 2.296e-01, -1.126e-01, -1.495e-01)); + r += mul(s0_5, M4(7.771e-02, -1.602e-01, -7.557e-02, -1.400e-02, 2.698e-01, 1.202e-01, -2.564e-01, -1.989e-01, 6.265e-02, -6.266e-02, 9.504e-02, -1.056e-01, -1.345e-01, 3.163e-02, -5.555e-02, -5.943e-02)); + r += mul(s0_6, M4(5.680e-02, 9.451e-02, 4.010e-02, -3.810e-02, -1.349e-01, 1.115e-01, -9.722e-02, -3.056e-02, -5.044e-02, -9.835e-02, -4.577e-02, -1.480e-02, 3.269e-02, 2.935e-02, 7.189e-02, 2.296e-01)); + r += mul(s0_7, M4(6.537e-02, -6.818e-04, -1.670e-02, 3.719e-02, -3.406e-02, 1.657e-01, -1.210e-01, 1.763e-01, -2.455e-02, 7.424e-02, 2.091e-01, 3.205e-02, 1.519e-02, -1.908e-01, 1.565e-01, -1.786e-02)); + r += mul(s0_8, M4(-1.446e-01, -8.694e-03, -7.337e-02, 5.499e-02, 2.439e-01, -9.560e-02, 1.041e-01, -1.820e-02, 4.636e-02, -3.565e-03, 1.219e-02, 6.830e-04, -3.370e-01, -3.640e-01, -2.115e-01, 9.002e-02)); + r += mul(s1_0, M4(1.342e-01, -1.733e-01, -1.008e-01, 1.538e-01, 1.408e-01, -2.912e-02, -4.282e-02, 3.716e-04, -3.969e-01, 4.119e-01, 3.072e-01, -4.043e-01, 6.660e-03, 9.480e-02, 2.302e-02, -2.741e-02)); + r += mul(s1_1, M4(-2.493e-01, -1.930e-01, -1.874e-01, 9.163e-02, -5.913e-02, 1.647e-01, -6.130e-02, 1.338e-01, 1.508e-01, -2.693e-01, -6.343e-02, -5.786e-01, -8.325e-02, 1.340e-01, -7.264e-02, 6.374e-02)); + r += mul(s1_2, M4(2.620e-01, -2.452e-01, -8.229e-02, 1.755e-01, -1.724e-02, -1.251e-02, 1.613e-02, 1.246e-04, 3.248e-01, 1.872e-01, 4.909e-02, -2.383e-01, 1.286e-01, 1.288e-03, -4.551e-03, -7.112e-03)); + r += mul(s1_3, M4(-1.368e-01, -2.489e-01, 8.261e-02, 1.465e-01, -9.940e-02, 5.532e-02, -6.636e-03, 3.957e-02, 3.887e-02, -4.220e-02, -1.266e-01, -2.484e-01, 1.653e-02, -1.762e-01, -6.384e-02, 6.650e-02)); + r += mul(s1_4, M4(-1.322e-01, -2.508e-01, 9.695e-02, 2.759e-01, 1.711e-01, 1.154e-01, 8.387e-02, -9.068e-02, -9.442e-02, -3.201e-01, -4.418e-01, -8.104e-01, 5.357e-02, 1.851e-03, -1.177e-01, -1.326e-01)); + r += mul(s1_5, M4(-3.025e-01, -2.253e-01, 9.386e-03, -3.228e-01, -1.313e-03, -5.290e-02, 1.473e-02, -7.331e-02, 2.042e-01, -3.828e-01, -2.386e-01, -3.321e-01, -1.579e-02, -3.431e-02, 2.951e-02, -4.669e-02)); + r += mul(s1_6, M4(8.848e-02, 1.254e-01, 9.570e-02, 4.379e-02, -3.451e-02, -1.055e-01, 2.921e-02, -6.082e-02, -5.281e-02, -2.216e-01, 2.752e-01, -2.568e-02, 7.631e-02, 3.464e-02, -6.274e-02, 9.265e-02)); + r += mul(s1_7, M4(4.651e-03, -1.185e-01, 6.418e-02, 2.380e-01, -1.132e-01, -2.253e-02, 2.184e-02, 4.280e-02, 9.103e-02, -2.528e-01, 2.170e-01, -1.235e-01, 5.277e-02, -5.816e-02, 1.368e-02, 6.526e-03)); + r += mul(s1_8, M4(3.757e-02, 1.687e-01, 1.237e-01, 7.899e-02, 2.324e-02, -8.211e-03, -1.375e-01, 1.172e-02, -2.546e-01, 1.848e-01, -7.504e-02, -6.435e-02, -1.833e-02, -1.265e-02, 1.700e-02, -2.657e-02)); + r += mul(s2_0, M4(-9.294e-02, 1.013e-02, -7.953e-02, -1.553e-02, -2.615e-02, 6.623e-02, 1.323e-01, 4.525e-03, 1.813e-01, -6.395e-02, 2.938e-02, -6.379e-02, 1.081e-01, 1.796e-02, 9.546e-02, -2.721e-02)); + r += mul(s2_1, M4(-1.086e-01, -1.629e-01, -2.253e-01, 8.716e-02, 1.048e-01, -4.209e-02, 5.305e-02, 2.594e-02, -1.080e-01, -7.385e-02, 1.227e-01, 1.193e-01, 2.164e-02, -5.632e-02, -3.401e-02, 2.353e-03)); + r += mul(s2_2, M4(-1.562e-01, 1.025e-02, 3.543e-03, -1.539e-01, -2.406e-01, 6.562e-02, -1.145e-01, 1.565e-01, 1.130e-01, 5.315e-02, 6.167e-02, -1.898e-02, -1.894e-01, -1.898e-02, -2.112e-02, 4.767e-02)); + r += mul(s2_3, M4(-1.481e-01, 3.875e-01, 2.640e-02, -6.552e-02, -9.991e-02, -1.803e-01, 2.441e-02, -6.851e-02, 5.002e-02, -1.789e-01, 2.154e-02, -3.099e-01, 7.189e-02, -3.120e-02, 2.074e-03, -1.275e-01)); + r += mul(s2_4, M4(-8.695e-02, 1.189e-01, -1.396e-01, 2.270e-02, -8.596e-02, -2.440e-01, 3.163e-01, 4.942e-02, -2.263e-01, 3.490e-02, 1.214e-01, 2.689e-01, -1.303e-01, -1.430e-01, -2.505e-02, 4.238e-02)); + r += mul(s2_5, M4(-4.874e-01, 4.404e-01, -2.059e-01, -2.963e-01, 4.127e-02, -2.824e-01, 6.026e-02, -1.425e-01, 1.109e-04, -1.551e-01, -1.165e-01, -2.714e-02, 1.095e-01, 7.539e-02, -4.453e-02, 4.011e-02)); + r += mul(s2_6, M4(2.985e-01, 4.725e-02, 6.356e-02, -6.901e-02, -8.920e-02, 8.782e-02, -3.832e-02, 6.666e-02, 7.696e-02, 4.822e-02, -3.021e-02, -2.950e-01, -7.946e-02, -7.889e-03, -1.475e-01, 1.356e-01)); + r += mul(s2_7, M4(2.859e-01, -1.710e-01, -8.794e-02, -2.221e-01, 1.431e-01, 1.285e-01, 3.526e-02, -6.195e-02, 1.243e-01, 1.154e-01, -4.975e-02, 2.524e-01, 6.860e-02, 1.609e-01, 1.547e-01, -3.198e-02)); + r += mul(s2_8, M4(-8.444e-02, 9.977e-03, -5.097e-02, -8.137e-02, -2.073e-02, 1.751e-01, 1.544e-02, 1.690e-02, 8.271e-02, 1.022e-01, -2.783e-03, 6.596e-02, 6.483e-02, -4.162e-02, 1.039e-01, -6.861e-02)); + r += mul(s3_0, M4(4.422e-02, 1.581e-02, -9.880e-03, -5.861e-02, 1.328e-02, 3.390e-02, 1.026e-01, -4.730e-02, -6.963e-02, -5.586e-02, -6.261e-02, -1.890e-02, 2.215e-01, -3.017e-02, -9.074e-02, 1.421e-01)); + r += mul(s3_1, M4(-7.360e-02, -4.079e-02, -4.623e-02, -4.983e-02, -9.094e-02, -5.667e-03, -8.593e-02, -6.646e-03, -3.160e-02, 4.342e-02, 5.631e-02, 3.603e-02, 1.442e-02, 4.006e-02, 8.321e-02, -3.482e-02)); + r += mul(s3_2, M4(9.281e-03, 9.378e-03, 3.397e-03, -7.809e-02, -3.123e-03, 9.304e-02, 3.233e-02, -7.080e-02, -9.136e-02, -1.127e-02, 4.820e-02, -2.933e-02, 2.321e-01, 1.058e-01, 1.952e-02, -2.344e-02)); + r += mul(s3_3, M4(7.925e-02, 1.257e-02, 1.241e-01, -8.064e-02, 1.121e-01, -2.614e-02, -6.152e-03, 3.064e-03, 9.139e-02, -1.356e-01, -6.281e-02, -1.211e-01, -3.034e-03, -4.127e-01, 1.209e-01, -1.396e-02)); + r += mul(s3_4, M4(2.607e-02, -2.639e-02, -3.220e-02, -3.878e-02, -6.525e-02, 4.209e-02, -1.721e-02, 1.705e-01, -2.014e-01, -2.019e-01, 6.405e-02, 1.089e-01, -2.453e-02, -6.004e-02, -5.524e-02, -1.662e-01)); + r += mul(s3_5, M4(9.753e-02, 1.020e-01, 1.356e-02, -1.618e-01, 4.161e-02, 9.689e-02, -9.058e-02, -3.701e-02, -6.306e-02, 1.040e-02, -8.205e-02, 2.906e-02, -1.028e-01, -2.493e-02, -8.458e-02, -7.241e-03)); + r += mul(s3_6, M4(2.107e-01, 3.179e-02, 7.236e-02, -1.228e-02, 3.019e-02, -5.722e-02, -2.034e-02, 6.010e-02, -6.609e-02, -3.052e-02, -4.948e-02, -1.441e-01, 2.229e-02, 2.236e-02, -1.392e-01, 2.960e-02)); + r += mul(s3_7, M4(-8.226e-02, -3.166e-02, -1.714e-01, -1.936e-01, -5.084e-02, -4.090e-02, -3.542e-03, 2.023e-02, 1.543e-01, -2.059e-02, 1.047e-01, -3.326e-02, 3.940e-02, -1.899e-01, 1.399e-01, -4.406e-03)); + r += mul(s3_8, M4(1.131e-01, 6.519e-02, 4.679e-02, -2.254e-02, 2.231e-02, 8.131e-02, -6.404e-02, 2.411e-02, 5.227e-02, 2.303e-02, 8.176e-02, -4.657e-03, 1.590e-02, 3.467e-01, 1.593e-03, 2.102e-02)); + r += mul(s4_0, M4(8.652e-02, -1.284e-01, -2.884e-02, 1.138e-01, 1.322e-01, 1.236e-02, 1.831e-03, -2.478e-02, 1.264e-01, 1.572e-01, 3.455e-02, -3.646e-03, -2.191e-01, -3.167e-01, -6.969e-02, -5.147e-02)); + r += mul(s4_1, M4(-5.470e-03, 9.407e-02, -8.282e-02, -3.270e-02, -6.543e-02, -7.489e-02, 1.091e-01, -1.572e-01, -6.172e-02, 7.606e-02, 5.424e-02, 1.236e-02, -2.037e-02, -5.677e-02, -1.353e-01, 6.602e-02)); + r += mul(s4_2, M4(-7.001e-02, -4.892e-02, 3.917e-02, -1.065e-01, -1.347e-01, -5.105e-02, 7.258e-02, 7.781e-03, 3.858e-02, -2.034e-01, -1.135e-02, -4.247e-02, 2.080e-01, -7.468e-02, -1.163e-01, 4.210e-02)); + r += mul(s4_3, M4(1.452e-01, 9.147e-02, 1.409e-01, -1.277e-02, -6.944e-02, 3.940e-03, -1.402e-01, 1.620e-01, 5.567e-02, 5.582e-02, 2.918e-02, -1.192e-01, 6.997e-02, 2.027e-01, 1.534e-01, -3.064e-02)); + r += mul(s4_4, M4(2.874e-02, 3.527e-01, -5.966e-02, -1.812e-01, -5.436e-02, -1.563e-02, -1.353e-01, 2.086e-01, 1.382e-01, 1.635e-01, 2.479e-01, -5.897e-02, 2.445e-01, -1.174e-01, 6.195e-02, -9.888e-03)); + r += mul(s4_5, M4(1.757e-01, 2.398e-01, -5.628e-02, 1.164e-01, -8.532e-02, 8.031e-02, -6.165e-02, 2.103e-02, -6.666e-02, 1.506e-01, 8.690e-02, -2.243e-01, -8.303e-02, -5.884e-02, -1.021e-01, -4.417e-02)); + r += mul(s4_6, M4(-4.968e-02, -1.657e-01, -2.294e-02, -1.225e-02, 2.877e-02, -9.153e-02, 1.038e-01, 2.791e-02, 2.448e-02, 1.086e-02, -8.549e-02, 2.828e-02, 3.220e-02, -1.614e-01, 3.436e-01, -1.104e-01)); + r += mul(s4_7, M4(-6.889e-02, 1.727e-02, -1.053e-01, 1.116e-01, 7.029e-02, -2.184e-02, 5.901e-02, -2.185e-02, 2.445e-02, 2.161e-02, 1.922e-01, 9.900e-02, -8.082e-02, -9.316e-02, 5.300e-02, -6.988e-03)); + r += mul(s4_8, M4(-6.002e-02, -1.637e-01, -1.488e-01, -9.243e-02, -4.750e-02, -3.129e-02, -1.950e-02, 1.272e-02, 4.738e-02, 1.805e-01, 2.190e-02, -1.054e-02, -1.530e-01, 2.715e-02, 2.288e-02, 1.901e-02)); + r += mul(s5_0, M4(3.872e-02, 1.505e-01, -8.235e-02, 8.083e-02, -4.113e-03, 7.945e-02, -1.057e-01, -2.004e-02, -4.455e-02, 1.053e-01, -1.596e-02, 5.646e-03, -1.078e-01, 1.295e-02, -4.217e-02, 4.942e-02)); + r += mul(s5_1, M4(5.422e-02, 3.384e-02, -8.833e-02, -8.370e-03, -2.118e-01, 1.877e-01, -7.199e-02, -8.065e-02, -1.102e-02, -5.483e-02, -8.391e-02, 1.016e-01, 4.257e-02, 9.185e-02, -1.008e-02, -1.069e-02)); + r += mul(s5_2, M4(7.060e-02, -6.159e-02, -1.882e-02, 5.668e-02, -2.046e-01, -1.083e-02, 5.236e-02, -3.329e-02, -1.156e-02, 3.036e-02, 7.037e-02, -1.499e-02, 7.131e-02, -4.113e-02, -2.594e-02, 2.417e-03)); + r += mul(s5_3, M4(-6.863e-05, -1.094e-02, 2.728e-02, -5.025e-02, 4.458e-02, 1.265e-01, 4.250e-01, 1.585e-01, 1.626e-01, -1.132e-02, -4.823e-03, 4.450e-02, -2.444e-01, -1.569e-03, 9.930e-02, -1.348e-01)); + r += mul(s5_4, M4(-1.646e-01, -7.413e-02, -8.431e-02, 4.586e-02, -1.124e-02, 3.074e-02, 3.820e-01, 1.120e-01, -7.599e-02, -5.630e-02, 2.247e-02, -3.743e-02, 4.557e-02, 4.874e-02, 1.513e-01, -2.207e-02)); + r += mul(s5_5, M4(-5.701e-05, -1.207e-01, 3.735e-03, 2.883e-02, 3.513e-02, 1.253e-01, 1.534e-01, -2.376e-03, -1.092e-01, -7.901e-03, -3.538e-02, -6.236e-02, -7.819e-02, -2.731e-02, -6.718e-02, -8.002e-05)); + r += mul(s5_6, M4(1.743e-02, -6.440e-02, 1.479e-01, -1.086e-02, -6.552e-02, 2.856e-02, -5.184e-03, -5.313e-02, 3.404e-02, 4.957e-02, -6.585e-02, -1.676e-03, -7.506e-02, 5.887e-02, -1.964e-02, 2.131e-02)); + r += mul(s5_7, M4(4.939e-02, 5.003e-02, 9.394e-02, -7.249e-02, 4.573e-02, -3.546e-03, 1.632e-02, 2.770e-03, -4.211e-02, -4.193e-02, -4.854e-02, 5.925e-02, -1.671e-02, 1.784e-01, 2.091e-02, -5.425e-02)); + r += mul(s5_8, M4(-1.224e-01, -9.666e-03, -1.224e-02, -3.442e-02, -3.580e-02, -1.629e-02, -2.274e-02, 2.918e-03, 4.514e-02, -3.651e-02, 3.280e-02, -4.425e-02, -3.234e-02, -3.197e-02, -3.062e-02, -3.570e-02)); + r += mul(s6_0, M4(-1.514e-01, 1.460e-01, 6.691e-02, -1.046e-01, 5.241e-02, -1.531e-01, -4.389e-02, 4.566e-02, -1.250e-01, 5.954e-02, -2.963e-02, 1.083e-02, 1.568e-01, 8.511e-02, 4.253e-02, -3.083e-02)); + r += mul(s6_1, M4(1.804e-01, -1.404e-01, 1.849e-02, -9.253e-03, 1.736e-01, -3.482e-01, 4.334e-02, -5.884e-03, -6.969e-02, -5.277e-02, 6.883e-02, -1.137e-01, 4.973e-02, 1.512e-01, 6.824e-02, -1.195e-02)); + r += mul(s6_2, M4(-8.969e-02, 8.610e-02, 7.556e-03, -9.505e-02, 8.152e-02, 8.838e-03, 4.312e-02, 1.702e-01, -8.846e-02, 1.011e-01, 2.993e-02, -6.197e-02, -1.164e-01, -2.880e-02, 5.386e-02, 3.726e-02)); + r += mul(s6_3, M4(-3.430e-01, 1.598e-01, -1.753e-01, -3.650e-02, -9.155e-02, -4.481e-02, 3.697e-02, -6.169e-02, -4.163e-02, -3.134e-02, -1.469e-01, 1.078e-02, 2.664e-02, 5.054e-02, 8.484e-02, -9.056e-02)); + r += mul(s6_4, M4(-1.396e-01, 1.672e-01, -3.294e-02, 6.874e-02, -1.064e-01, 1.469e-01, 9.447e-02, 8.395e-03, 4.008e-02, -2.137e-02, -7.051e-02, -8.558e-02, -4.070e-02, -2.618e-02, 4.764e-02, 5.799e-02)); + r += mul(s6_5, M4(-1.929e-01, -8.364e-02, 4.608e-02, -3.928e-02, -1.528e-01, -4.131e-02, 1.455e-02, -5.769e-02, -1.404e-01, -4.487e-02, -8.932e-02, -5.865e-02, -1.973e-03, 9.755e-03, -8.237e-02, -9.573e-02)); + r += mul(s6_6, M4(-3.181e-01, -1.968e-02, -2.393e-01, 4.048e-02, -6.190e-02, 3.130e-02, 7.706e-02, -4.039e-03, -8.436e-02, 5.525e-02, 7.837e-02, 1.824e-02, 1.744e-02, -1.078e-01, -1.738e-01, 8.633e-02)); + r += mul(s6_7, M4(1.399e-01, 1.522e-01, 3.627e-02, -1.353e-01, -1.899e-01, -1.745e-01, 1.224e-02, 1.400e-03, -3.581e-02, -6.041e-02, 1.819e-01, 1.422e-01, 2.414e-02, 2.798e-02, 1.311e-01, 3.283e-02)); + r += mul(s6_8, M4(-5.285e-02, 2.779e-01, -8.815e-02, 1.794e-01, 6.063e-02, -6.624e-02, 9.881e-02, -1.517e-02, 1.415e-01, -1.082e-01, 3.647e-02, -8.643e-04, -6.027e-02, 1.962e-02, 3.728e-03, 1.870e-03)); + r += mul(s7_0, M4(1.560e-01, -6.796e-02, 1.324e-01, -6.313e-02, -5.917e-02, 2.029e-01, 2.140e-04, -1.140e-01, -1.656e-01, 8.067e-02, -5.688e-02, 2.288e-02, -3.817e-02, 1.267e-04, 9.234e-03, -6.582e-02)); + r += mul(s7_1, M4(-1.886e-01, 1.397e-02, -1.929e-01, 7.354e-02, 7.400e-02, -8.945e-02, -3.444e-02, -1.032e-02, 6.883e-03, -1.109e-01, -3.822e-02, -6.947e-02, -2.281e-02, 3.418e-02, 1.238e-01, -9.853e-02)); + r += mul(s7_2, M4(9.857e-02, -1.256e-01, 2.090e-02, -2.557e-02, -6.201e-02, 1.175e-01, -6.024e-02, 9.497e-02, 1.153e-02, -1.424e-01, -9.995e-02, 8.095e-02, -2.387e-02, -9.371e-03, 2.388e-02, -4.448e-02)); + r += mul(s7_3, M4(-8.901e-02, 3.328e-02, 4.116e-02, 4.701e-02, 1.475e-01, 1.681e-01, -6.418e-03, 1.747e-02, 8.457e-02, 8.317e-02, 7.036e-02, -1.067e-02, 2.067e-01, -1.148e-01, 4.731e-02, -8.135e-03)); + r += mul(s7_4, M4(-1.313e-02, 1.246e-02, -4.024e-02, -1.197e-02, 4.666e-02, 7.153e-02, -1.310e-01, 5.437e-02, 2.997e-02, -3.104e-02, 1.004e-01, -1.977e-01, 8.515e-02, -9.532e-02, -4.444e-02, -6.407e-02)); + r += mul(s7_5, M4(-1.106e-01, 1.017e-01, -1.921e-02, -3.096e-02, -4.821e-02, -3.793e-03, -3.594e-02, 4.888e-02, 3.435e-02, -1.044e-01, 5.074e-02, 6.108e-03, 1.200e-02, -1.113e-01, -1.319e-01, 2.259e-02)); + r += mul(s7_6, M4(-5.923e-02, 6.856e-02, -6.103e-02, -4.680e-02, -5.272e-02, 4.327e-02, 5.696e-02, 3.384e-03, -6.412e-02, -6.521e-02, -6.295e-02, -1.029e-01, -1.510e-01, 5.642e-02, -1.521e-02, 5.165e-02)); + r += mul(s7_7, M4(-6.138e-03, -1.991e-02, 7.163e-02, 4.382e-02, -1.717e-03, -7.319e-02, 4.448e-02, -1.656e-02, 3.101e-02, -1.616e-01, -2.131e-01, 1.369e-01, -4.679e-02, -5.206e-02, -1.034e-01, 3.881e-02)); + r += mul(s7_8, M4(4.015e-02, -1.615e-01, -1.872e-02, -2.719e-02, 8.566e-02, 5.263e-02, 6.620e-02, -8.191e-02, 2.352e-02, -6.728e-03, -2.114e-01, 1.403e-01, -6.310e-02, -1.614e-02, -4.952e-02, 8.593e-03)); + r += V4(1.236e-02, 1.038e-02, 6.374e-03, 9.167e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-9.451e-02, -8.048e-02, 5.369e-02, -5.667e-02, -2.072e-01, -5.991e-02, 5.108e-02, 5.683e-02, 5.525e-02, 4.765e-02, 4.061e-02, -3.167e-02, -8.613e-02, -7.323e-03, -7.469e-02, -4.435e-01)); + r += mul(s0_1, M4(8.076e-03, -7.539e-02, -7.807e-02, -5.834e-02, -8.527e-02, -2.725e-01, -1.052e-01, 4.591e-02, 6.218e-02, 5.475e-02, 7.220e-02, 7.611e-02, 1.854e-01, -2.912e-02, 2.338e-01, -1.314e-01)); + r += mul(s0_2, M4(2.337e-02, 5.448e-02, -4.442e-02, -1.076e-02, 7.776e-02, 4.370e-01, 1.315e-01, 9.939e-01, -3.630e-02, -1.193e-01, -6.207e-02, -2.774e-02, 1.735e-01, -1.294e-01, 9.411e-02, 3.621e-01)); + r += mul(s0_3, M4(1.539e-01, -6.189e-02, 3.427e-03, -5.864e-02, 7.490e-02, -2.073e-02, 1.147e-01, 4.822e-02, 1.203e-02, 4.232e-02, -1.874e-02, -5.372e-02, -1.737e-01, 1.252e-01, -8.991e-02, 2.871e-01)); + r += mul(s0_4, M4(6.459e-02, 6.310e-02, -8.822e-02, 7.055e-02, 1.086e-01, 4.429e-01, 3.774e-01, -2.788e-01, -1.163e-01, -1.472e-01, -3.744e-02, 2.693e-02, -2.319e-02, -1.295e-01, 1.367e-01, 4.929e-01)); + r += mul(s0_5, M4(7.148e-03, -1.485e-02, 2.525e-02, -1.562e-01, 6.559e-03, -1.490e-01, -2.802e-01, -9.884e-02, -5.677e-02, 2.006e-01, 5.245e-02, 1.053e-02, -1.087e-01, -7.758e-02, -1.782e-01, 9.191e-03)); + r += mul(s0_6, M4(1.809e-05, 3.315e-02, 2.993e-02, -2.552e-02, 6.115e-02, -4.337e-02, -6.610e-02, -2.199e-02, -6.712e-02, 1.125e-01, 3.812e-02, -7.054e-02, -3.886e-02, -5.372e-03, -8.008e-02, -2.972e-01)); + r += mul(s0_7, M4(-8.793e-02, -1.775e-02, 1.255e-01, 6.161e-02, -1.181e-01, -6.883e-02, 7.986e-02, -2.173e-01, 8.186e-02, -7.056e-02, -1.130e-01, -1.919e-02, 1.098e-01, 1.044e-01, 5.733e-02, 1.101e-01)); + r += mul(s0_8, M4(6.637e-02, -6.283e-02, 1.001e-01, -6.000e-02, -5.875e-02, 5.409e-02, -1.717e-01, 4.719e-02, 2.879e-02, -8.806e-03, -1.647e-02, 5.500e-02, -4.014e-01, -5.139e-02, 7.386e-02, 1.470e-01)); + r += mul(s1_0, M4(2.307e-01, -4.144e-01, -2.797e-02, -4.707e-02, 1.286e-01, 1.800e-02, -4.013e-02, -7.657e-03, 3.454e-02, -4.379e-01, -2.028e-01, 2.585e-01, -4.224e-02, -1.576e-01, -4.730e-02, -6.219e-02)); + r += mul(s1_1, M4(-5.665e-02, 3.092e-01, -2.782e-01, 4.133e-01, -2.158e-02, -1.469e-01, 2.683e-03, 6.333e-02, 4.256e-01, -1.403e-01, 4.976e-01, -7.155e-02, -1.295e-01, -3.946e-02, -6.470e-02, 6.424e-02)); + r += mul(s1_2, M4(-1.032e-01, -1.789e-01, 1.003e-01, -1.901e-01, -1.455e-01, 2.151e-02, 3.974e-02, -3.572e-02, 1.815e-01, 1.426e-02, -4.524e-01, 6.970e-01, 1.297e-01, 3.475e-02, -1.025e-01, -4.876e-02)); + r += mul(s1_3, M4(-8.318e-04, -3.434e-01, 9.668e-02, 2.092e-01, -3.396e-02, 4.078e-02, 4.601e-03, -6.833e-02, 5.617e-02, -3.474e-01, -2.318e-01, -4.633e-02, 1.066e-02, -1.118e-01, -3.939e-02, 7.404e-02)); + r += mul(s1_4, M4(2.567e-02, 7.887e-01, 2.479e-01, 1.984e-01, -3.046e-02, 9.736e-03, 8.833e-02, 1.055e-01, -5.627e-01, -4.178e-02, -5.462e-01, 7.956e-01, -2.458e-02, 6.355e-02, 2.734e-02, 7.263e-02)); + r += mul(s1_5, M4(2.634e-02, -2.494e-01, 1.911e-01, -2.585e-01, -5.987e-02, 9.074e-03, -1.284e-01, 5.261e-02, -1.073e-01, -2.491e-03, -5.666e-02, 1.647e-01, 1.558e-01, 8.153e-02, 6.985e-02, -4.197e-02)); + r += mul(s1_6, M4(-1.840e-01, -5.581e-03, -3.012e-01, 1.223e-01, 5.605e-03, -1.080e-01, -1.131e-01, -4.307e-02, -1.180e-01, 5.656e-02, -1.219e-01, 4.325e-01, 4.198e-02, 5.790e-02, -1.440e-01, 6.205e-03)); + r += mul(s1_7, M4(-3.818e-01, 1.567e-01, -2.418e-01, -1.454e-01, -9.046e-02, 4.173e-02, 3.588e-02, 1.034e-01, 3.701e-02, 1.275e-01, -3.048e-01, 2.711e-01, 6.572e-02, -4.940e-03, -9.529e-02, 4.867e-02)); + r += mul(s1_8, M4(-8.676e-02, 1.389e-03, 3.468e-01, 4.816e-01, -1.848e-02, -6.333e-02, -3.837e-02, -1.234e-01, 2.136e-02, 6.841e-02, 6.743e-02, 1.107e-01, 8.400e-02, -1.297e-02, -1.264e-01, -8.508e-02)); + r += mul(s2_0, M4(-2.131e-02, -2.104e-01, 1.638e-01, -8.293e-02, -9.037e-02, 4.916e-02, -1.957e-01, -8.879e-02, 9.205e-02, 1.654e-01, -1.031e-01, -2.214e-01, -1.743e-01, 1.449e-01, -4.730e-02, -6.000e-02)); + r += mul(s2_1, M4(-3.365e-01, -1.978e-01, 3.707e-02, 6.115e-02, -4.289e-02, -5.301e-02, -1.001e-02, 1.202e-02, 1.495e-01, 1.076e-01, -5.975e-02, 1.433e-02, 6.840e-02, -1.202e-01, 3.306e-02, -1.025e-01)); + r += mul(s2_2, M4(3.111e-02, 1.102e-01, -4.688e-02, -1.577e-01, -2.359e-02, -1.470e-02, -4.882e-02, -3.525e-02, -2.976e-02, -1.291e-01, -4.188e-02, -4.209e-02, -8.532e-02, -8.075e-02, 5.718e-02, -6.961e-02)); + r += mul(s2_3, M4(1.919e-02, -1.528e-01, -2.901e-01, -8.766e-02, -4.717e-02, -6.101e-02, 3.063e-01, -2.145e-01, -4.494e-02, -1.846e-02, -1.263e-01, -3.688e-02, 2.173e-01, 2.255e-02, -8.102e-02, -7.545e-02)); + r += mul(s2_4, M4(7.429e-02, 1.561e-02, -2.923e-01, 2.937e-01, 2.279e-01, 6.875e-02, 4.931e-02, 1.392e-01, 1.645e-01, 2.231e-03, 9.785e-02, -3.869e-02, 1.152e-02, 4.158e-02, 4.528e-02, 7.134e-03)); + r += mul(s2_5, M4(2.593e-01, 1.893e-01, -2.803e-01, 3.636e-02, 7.413e-02, -4.588e-02, 5.632e-02, -1.740e-01, 1.797e-01, -5.844e-02, 2.958e-02, 1.210e-02, 6.115e-02, -1.162e-01, 4.693e-02, -3.177e-02)); + r += mul(s2_6, M4(-4.160e-02, 7.391e-02, -9.092e-02, -2.082e-01, 8.563e-04, 1.162e-01, -8.575e-02, -9.850e-02, -1.915e-02, 3.551e-03, 2.256e-01, 3.933e-02, -1.784e-01, -5.231e-02, 5.357e-02, 5.941e-02)); + r += mul(s2_7, M4(-1.422e-01, -1.924e-01, -5.101e-01, 2.173e-01, 1.098e-01, -8.625e-02, -1.524e-01, 1.452e-01, 9.209e-02, 4.612e-02, 8.386e-02, 1.956e-02, 1.502e-02, 1.187e-01, -5.087e-02, 5.047e-02)); + r += mul(s2_8, M4(4.191e-01, 3.144e-01, -2.598e-01, -1.059e-01, -3.728e-02, -1.192e-01, 2.195e-01, 2.741e-01, 1.262e-01, 1.995e-02, 4.819e-03, -1.496e-01, -4.040e-02, -5.422e-02, -2.942e-02, 6.987e-02)); + r += mul(s3_0, M4(2.280e-02, 1.997e-02, 2.485e-02, 7.311e-03, 4.967e-02, -2.036e-01, 1.536e-01, -1.270e-02, 3.613e-02, -1.530e-01, -7.195e-02, -8.008e-02, -1.026e-01, 1.341e-01, -1.314e-01, -9.852e-02)); + r += mul(s3_1, M4(-3.465e-02, 3.304e-02, 6.561e-02, 1.011e-03, -1.050e-01, -3.483e-02, 8.504e-03, -1.852e-01, 1.206e-01, 3.555e-03, -9.008e-02, 6.713e-02, 1.782e-01, -2.761e-04, 1.040e-01, 1.370e-02)); + r += mul(s3_2, M4(-2.094e-02, -5.554e-02, -8.848e-02, -5.421e-03, 1.241e-01, 1.073e-01, -7.738e-02, 2.719e-02, 1.018e-01, 5.091e-02, 2.561e-02, -1.949e-01, 1.462e-01, -1.130e-01, -5.079e-02, -2.196e-01)); + r += mul(s3_3, M4(-4.744e-02, -1.533e-01, -1.130e-01, 1.053e-01, -6.196e-02, -2.036e-02, -1.566e-01, 5.937e-03, 5.398e-02, 1.856e-01, 5.320e-02, 1.518e-01, 1.420e-01, -2.743e-01, 2.931e-01, -2.954e-01)); + r += mul(s3_4, M4(1.409e-01, -3.840e-02, -8.466e-02, -2.254e-03, 4.529e-02, 1.144e-02, -4.301e-02, -2.002e-02, -6.700e-02, 7.169e-03, 6.365e-03, -1.614e-01, 1.332e-01, 1.322e-01, 1.185e-02, 2.030e-01)); + r += mul(s3_5, M4(-2.429e-02, -4.346e-03, 6.631e-02, -5.519e-02, 1.193e-01, 1.089e-01, 2.066e-02, -7.436e-02, 1.470e-01, 1.749e-02, -4.831e-02, 2.142e-02, 1.873e-01, 3.743e-02, 1.066e-01, 3.061e-02)); + r += mul(s3_6, M4(-2.675e-02, 2.948e-03, -6.466e-02, 4.417e-02, -2.908e-02, 1.421e-01, 1.190e-02, -2.533e-02, -1.258e-01, -4.587e-02, -5.339e-03, -3.764e-02, 2.061e-01, -1.013e-01, 4.204e-02, 2.417e-01)); + r += mul(s3_7, M4(6.761e-02, -3.450e-03, 6.839e-02, 5.766e-02, 3.136e-02, 4.135e-02, 3.988e-02, 2.941e-02, -2.604e-02, -7.398e-02, -8.330e-02, -6.429e-02, -2.953e-01, -3.694e-02, -9.535e-02, 4.862e-02)); + r += mul(s3_8, M4(-9.741e-02, 1.621e-02, 1.165e-01, 2.534e-02, -5.319e-02, -6.189e-03, -1.302e-01, 1.306e-01, 7.124e-02, 1.972e-02, -1.041e-01, -9.609e-02, -1.352e-01, 1.664e-01, 2.183e-01, -1.409e-01)); + r += mul(s4_0, M4(1.180e-02, 7.883e-02, -4.791e-02, -1.474e-03, 1.532e-02, 1.588e-01, 1.788e-01, -3.478e-02, 1.967e-01, 2.111e-01, 1.125e-01, 7.545e-03, 3.395e-01, -1.968e-01, 3.471e-01, -6.706e-02)); + r += mul(s4_1, M4(-6.497e-03, -5.378e-02, 1.254e-01, -9.081e-02, -4.558e-02, 2.297e-01, 5.248e-02, -6.681e-02, 5.794e-02, -1.210e-01, 3.244e-01, -2.096e-01, 9.240e-02, -1.008e-01, 2.173e-01, 1.466e-01)); + r += mul(s4_2, M4(4.711e-02, 1.109e-01, 4.282e-02, 5.398e-03, -3.371e-03, -3.552e-02, -2.389e-02, 2.334e-02, -7.598e-03, -3.631e-02, 1.836e-01, -1.318e-01, -1.984e-02, -2.181e-02, 2.962e-01, 1.040e-02)); + r += mul(s4_3, M4(-4.528e-02, -1.030e-01, 2.697e-02, 8.268e-02, 3.725e-02, 1.390e-01, 1.037e-01, -2.451e-02, 1.553e-01, 4.668e-02, -1.755e-01, -1.063e-01, -3.500e-02, -2.009e-01, -1.195e-01, 3.388e-01)); + r += mul(s4_4, M4(-4.469e-02, -1.694e-01, -1.762e-01, 2.289e-01, 3.673e-02, -1.784e-02, 3.723e-02, -6.764e-02, 3.353e-01, 1.479e-02, -7.523e-02, 5.749e-02, 2.422e-01, 2.620e-01, -2.914e-02, 4.852e-02)); + r += mul(s4_5, M4(-2.729e-03, -1.220e-01, -1.255e-01, 8.206e-02, -1.935e-02, 8.960e-02, -1.099e-02, -9.541e-02, 1.033e-01, -2.499e-01, -3.623e-01, 1.030e-03, 1.350e-01, 4.517e-02, 1.837e-01, 1.359e-02)); + r += mul(s4_6, M4(-1.412e-02, 5.391e-02, 5.106e-02, 3.886e-02, -3.444e-03, 5.329e-02, 4.036e-02, 1.804e-01, -1.043e-01, 1.357e-01, 2.349e-01, -3.465e-02, 1.738e-01, 5.530e-02, -3.362e-01, -1.475e-01)); + r += mul(s4_7, M4(3.751e-02, -7.403e-02, -3.850e-02, -1.265e-03, -6.263e-02, 8.607e-02, -1.028e-02, -1.403e-01, 6.326e-02, -1.558e-01, 1.075e-01, 1.408e-01, -2.220e-01, -9.187e-02, -1.448e-01, 1.872e-01)); + r += mul(s4_8, M4(-1.246e-01, -2.591e-02, -1.611e-01, -9.790e-02, -1.941e-02, -6.551e-03, -5.051e-02, -5.462e-02, -2.144e-01, 1.762e-02, 2.371e-01, 4.996e-02, -3.406e-01, 1.177e-01, -2.264e-01, -8.656e-02)); + r += mul(s5_0, M4(1.343e-03, -3.725e-02, -1.066e-01, -1.740e-02, 6.081e-02, 1.362e-01, 3.578e-02, 2.253e-01, -9.466e-02, -9.187e-02, -1.175e-02, -8.719e-02, 2.989e-02, -7.776e-02, -6.628e-02, 8.013e-02)); + r += mul(s5_1, M4(1.604e-01, -1.780e-02, 1.005e-02, -5.130e-02, -7.391e-02, -1.475e-01, -1.789e-01, 9.968e-02, -4.946e-02, -1.164e-01, 1.152e-01, 1.142e-01, 1.409e-01, 3.844e-02, -2.048e-02, 6.226e-02)); + r += mul(s5_2, M4(1.160e-01, -5.686e-04, -4.562e-03, -1.512e-01, -2.057e-02, 7.273e-02, -5.500e-02, -1.783e-01, -1.018e-01, 1.135e-01, -1.292e-01, 1.754e-02, 2.182e-02, -5.689e-02, 8.034e-02, -4.859e-02)); + r += mul(s5_3, M4(7.867e-02, -3.518e-03, 1.173e-01, -1.195e-02, 1.984e-01, -5.090e-02, 1.352e-02, -5.604e-02, -2.063e-02, 1.019e-01, -7.606e-02, 5.175e-02, 2.911e-02, -5.707e-02, -7.261e-02, -3.138e-02)); + r += mul(s5_4, M4(-1.652e-01, -8.234e-02, -3.650e-02, 9.660e-02, 7.855e-02, -1.732e-02, 3.844e-02, 6.549e-02, 4.526e-02, -9.386e-02, -1.017e-01, 4.589e-02, -8.000e-05, 1.011e-01, 9.537e-02, -1.192e-01)); + r += mul(s5_5, M4(1.697e-02, -7.522e-03, -6.139e-02, 1.325e-01, 1.054e-02, -1.900e-02, 4.979e-02, -1.371e-02, 1.106e-01, -1.330e-03, 2.616e-03, 1.090e-01, -5.065e-02, 1.158e-02, -9.399e-02, 3.210e-02)); + r += mul(s5_6, M4(-3.785e-02, -2.064e-02, 1.184e-01, -1.578e-01, 1.431e-01, -7.638e-02, -7.632e-02, -7.275e-03, -2.357e-03, 6.810e-02, -4.248e-02, 7.840e-02, -6.778e-02, 6.966e-02, -4.366e-02, -1.338e-01)); + r += mul(s5_7, M4(4.235e-02, 5.252e-02, 5.567e-02, -5.948e-02, -3.842e-02, 1.416e-02, 6.482e-02, 5.589e-02, -4.599e-02, -2.528e-03, 4.268e-03, -5.436e-02, -5.918e-02, 1.537e-02, 1.195e-01, 2.206e-02)); + r += mul(s5_8, M4(-1.554e-01, -9.753e-03, 1.619e-01, 4.940e-02, 2.986e-02, 2.573e-02, 1.149e-02, -5.308e-02, 1.188e-02, 2.052e-02, 1.330e-02, -7.705e-02, -5.218e-02, 3.419e-02, 9.731e-02, -8.245e-03)); + r += mul(s6_0, M4(-1.272e-01, 2.994e-02, 6.812e-02, -1.304e-01, -1.582e-01, -1.328e-02, -1.314e-01, 1.909e-01, -9.384e-02, 1.006e-01, 1.616e-01, 6.443e-02, -1.253e-02, 2.781e-01, 1.011e-01, -9.925e-03)); + r += mul(s6_1, M4(1.135e-01, -9.899e-02, -1.533e-01, 8.067e-02, -2.713e-02, 1.412e-01, -2.803e-01, -6.846e-02, -2.906e-02, 3.917e-02, 3.451e-02, -1.968e-01, -1.413e-01, 2.291e-01, -1.214e-01, 6.061e-03)); + r += mul(s6_2, M4(8.897e-02, 8.721e-02, 3.431e-01, 6.751e-02, -2.026e-01, -1.703e-01, -2.079e-01, 8.543e-02, 3.969e-02, 7.591e-02, 9.177e-02, 1.330e-01, -1.698e-01, 1.654e-01, -1.421e-01, 2.989e-02)); + r += mul(s6_3, M4(-1.355e-01, -1.909e-01, 1.446e-02, -7.383e-03, -2.487e-02, 5.117e-02, -2.610e-01, -1.720e-02, -6.448e-02, -7.445e-02, -5.439e-02, 7.059e-02, 8.874e-03, 2.675e-01, 1.977e-02, 1.241e-02)); + r += mul(s6_4, M4(-2.034e-01, -3.518e-02, -5.094e-02, 1.420e-01, -1.284e-01, -4.634e-02, -1.887e-01, -2.672e-01, -1.703e-02, -1.749e-01, -6.780e-02, 6.431e-02, -9.908e-02, 1.532e-01, 2.047e-01, 9.582e-02)); + r += mul(s6_5, M4(-3.252e-02, 5.439e-02, -8.179e-02, -2.995e-02, 7.181e-02, -1.978e-01, -6.903e-02, 1.071e-01, -2.747e-02, -1.366e-01, 2.972e-02, 1.530e-02, 1.017e-01, -8.830e-03, 3.876e-04, -8.333e-02)); + r += mul(s6_6, M4(-1.410e-01, 2.223e-01, 1.674e-01, 2.109e-01, 8.660e-03, -1.001e-01, -6.640e-02, 8.090e-02, -3.113e-02, -7.455e-02, -4.265e-02, 5.721e-02, -4.509e-02, -2.046e-02, 1.302e-02, -1.582e-01)); + r += mul(s6_7, M4(2.310e-02, 4.775e-02, -7.455e-02, -2.721e-01, 8.084e-04, -7.578e-02, 5.419e-02, 1.823e-01, 1.182e-01, -4.500e-02, -6.240e-02, 1.592e-01, -1.012e-01, -4.963e-02, 2.983e-02, -3.703e-02)); + r += mul(s6_8, M4(4.231e-02, -6.410e-02, 2.523e-01, 1.876e-01, 2.866e-02, 1.888e-02, 7.973e-02, -1.213e-01, -1.863e-02, -4.818e-03, -3.015e-02, 4.286e-02, 1.226e-02, -6.055e-02, -7.081e-02, -3.247e-02)); + r += mul(s7_0, M4(2.255e-02, 1.087e-01, -1.311e-01, -1.097e-01, 4.896e-02, -1.972e-02, 1.006e-01, 1.179e-01, -4.401e-02, 5.579e-02, 4.111e-02, 1.389e-02, 1.450e-01, -1.107e-01, 8.167e-02, 2.141e-02)); + r += mul(s7_1, M4(8.651e-02, -4.113e-02, -5.502e-02, 8.078e-02, -6.702e-02, -8.295e-02, 8.445e-02, 3.853e-02, -7.218e-02, 9.200e-03, 7.979e-02, -5.762e-02, 5.357e-02, -3.144e-02, -4.562e-02, 9.623e-03)); + r += mul(s7_2, M4(6.184e-03, 7.383e-02, 3.246e-02, -4.805e-02, -6.375e-02, -9.116e-02, -2.184e-02, 1.442e-01, -1.226e-01, 1.353e-02, -1.435e-01, -1.307e-01, 5.528e-02, -6.102e-02, 8.355e-02, -1.100e-01)); + r += mul(s7_3, M4(-5.722e-02, -2.702e-03, -1.345e-01, -2.826e-02, -7.891e-02, 5.526e-02, 1.364e-01, -3.028e-03, -1.513e-01, 7.168e-02, -9.428e-03, -1.712e-01, 6.925e-02, -2.112e-02, -5.069e-02, 1.105e-01)); + r += mul(s7_4, M4(-1.807e-02, -2.998e-02, 2.074e-01, -3.249e-03, -1.228e-01, -1.524e-01, -3.277e-02, 2.149e-02, -1.580e-01, -7.356e-02, 1.138e-01, -6.868e-02, 5.897e-03, 4.738e-02, 8.964e-02, -1.103e-01)); + r += mul(s7_5, M4(6.567e-02, -2.314e-02, 5.409e-02, -5.161e-02, 2.199e-02, -2.075e-01, 3.238e-02, -2.085e-01, 1.615e-01, 7.379e-03, -8.971e-03, 1.529e-01, -4.548e-03, -2.031e-03, -9.448e-02, -1.690e-02)); + r += mul(s7_6, M4(-6.829e-02, 3.009e-02, 2.554e-02, 6.114e-02, 3.201e-02, -3.606e-02, 4.506e-02, -5.041e-02, -1.341e-01, 7.531e-02, 1.137e-01, 2.316e-02, -1.997e-02, -1.762e-01, -7.972e-02, -4.369e-03)); + r += mul(s7_7, M4(1.185e-01, -7.930e-02, 5.674e-02, -4.796e-02, 4.371e-02, 7.916e-02, 5.691e-02, 1.335e-01, 7.223e-02, 3.957e-02, 1.429e-01, -9.440e-02, 1.036e-01, -6.944e-02, -1.023e-02, 1.916e-02)); + r += mul(s7_8, M4(-1.116e-02, 4.448e-02, -1.403e-01, 1.527e-03, -1.705e-02, 1.049e-02, 9.845e-03, -1.456e-02, 4.949e-02, 5.920e-02, 1.153e-01, 1.217e-01, 1.128e-02, -3.924e-02, -5.223e-02, -2.814e-02)); + r += V4(1.050e-02, -3.156e-02, -8.312e-03, 6.905e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-4.867e-02, -5.197e-02, 1.534e-02, -4.213e-02, -4.926e-02, 2.104e-01, -9.779e-02, -2.507e-01, 5.090e-02, 7.115e-02, -4.519e-02, 2.813e-02, 3.021e-01, 2.627e-02, 1.871e-01, -1.671e-01)); + r += mul(s0_1, M4(-1.230e-01, 1.430e-02, 8.499e-02, -2.918e-02, -5.297e-02, -1.011e-01, -2.859e-02, 1.525e-04, -7.938e-02, -1.685e-02, 2.875e-02, -7.803e-03, 2.980e-01, -1.973e-01, 1.285e-01, 1.956e-02)); + r += mul(s0_2, M4(1.857e-02, -8.956e-02, 4.124e-02, -1.190e-01, 6.922e-01, -5.653e-02, -2.747e+00, -1.397e+00, 4.512e-04, -4.355e-02, -7.532e-02, 8.435e-02, 8.140e-02, 1.354e-01, 2.064e-01, 7.384e-04)); + r += mul(s0_3, M4(4.223e-02, -2.270e-05, 8.174e-02, 4.525e-02, -2.709e-01, -3.039e-01, 3.262e-02, 6.482e-03, 2.639e-03, 3.351e-02, 2.799e-02, 2.269e-02, 1.039e-01, 7.289e-02, -1.397e-02, 1.272e-02)); + r += mul(s0_4, M4(-1.995e-02, -4.059e-02, -2.450e-02, -2.946e-02, 2.466e-01, 1.479e-01, 1.595e-01, -6.020e-02, -1.812e-02, -2.821e-02, -1.184e-01, -5.749e-02, -3.615e-01, 2.186e-01, -8.206e-02, 1.362e-01)); + r += mul(s0_5, M4(1.111e-01, 1.478e-01, 3.247e-02, -8.840e-02, -2.125e-01, 2.289e-01, -6.042e-02, -1.573e-01, 5.331e-02, 3.785e-02, -3.423e-02, -2.230e-02, -5.637e-02, 1.008e-01, 1.283e-01, 2.773e-01)); + r += mul(s0_6, M4(7.693e-03, 2.148e-02, 3.975e-02, -3.273e-03, 1.910e-01, 2.068e-02, 8.654e-02, -1.955e-01, -1.766e-02, -1.189e-01, -8.424e-02, 3.049e-02, -8.869e-02, -3.213e-01, -1.007e-01, -9.598e-02)); + r += mul(s0_7, M4(-7.415e-02, -6.800e-02, -6.287e-03, 3.431e-02, 2.036e-01, -7.898e-02, 9.472e-02, -3.371e-02, 1.412e-01, 1.044e-01, 5.694e-02, 1.724e-02, 1.776e-02, -1.814e-01, -6.565e-02, -2.985e-01)); + r += mul(s0_8, M4(1.124e-01, 1.099e-01, -1.031e-01, 7.196e-02, -1.338e-01, 1.914e-01, -1.861e-01, 4.604e-03, -9.489e-03, -1.205e-01, 6.328e-02, 1.043e-02, -8.468e-02, 2.194e-01, -1.679e-02, 4.313e-02)); + r += mul(s1_0, M4(1.964e-01, -5.630e-02, -6.981e-02, 7.879e-02, 1.014e-01, 6.166e-02, 5.840e-02, -7.727e-02, 4.637e-01, -1.051e-01, 4.454e-01, 5.713e-01, 6.147e-02, 5.523e-02, -1.295e-01, 1.058e-01)); + r += mul(s1_1, M4(2.160e-01, -1.344e-01, 1.974e-01, -3.814e-01, -5.308e-02, -6.718e-02, -5.522e-02, -1.466e-01, -4.706e-01, -2.553e-01, -1.625e-02, -3.204e-01, 4.533e-02, -3.726e-03, -2.634e-02, -4.952e-02)); + r += mul(s1_2, M4(-1.988e-02, -3.152e-01, -2.705e-01, -8.406e-03, 1.017e-01, -9.464e-02, 9.255e-02, -3.722e-02, 1.070e-01, -2.718e-01, -4.225e-01, -9.374e-02, -1.769e-02, 5.608e-03, 3.674e-02, -1.047e-01)); + r += mul(s1_3, M4(3.746e-01, 3.046e-03, -3.719e-01, 1.350e-01, -7.644e-02, 3.353e-03, 1.027e-01, -2.490e-02, 5.892e-01, -2.039e-01, -8.033e-02, -1.380e-02, 1.610e-01, -6.900e-02, -6.275e-02, -6.425e-03)); + r += mul(s1_4, M4(2.208e-01, -4.707e-02, -3.095e-01, -1.714e-01, 1.478e-01, -4.646e-02, -1.003e-01, -2.538e-02, 2.015e-01, -4.198e-01, -2.134e-01, 4.196e-01, 1.473e-02, -6.650e-02, 1.519e-01, 8.087e-03)); + r += mul(s1_5, M4(4.357e-02, 1.217e-01, -2.680e-01, 1.812e-01, 7.153e-02, -3.633e-02, 1.021e-01, 4.253e-02, -8.424e-01, 1.775e-01, 1.127e-01, 2.770e-01, 2.008e-02, 9.776e-03, 1.154e-01, -7.201e-02)); + r += mul(s1_6, M4(-2.616e-01, -2.362e-02, 9.223e-02, 1.190e-01, -6.313e-02, 5.472e-02, -7.627e-02, 1.430e-01, 2.428e-01, 1.364e-01, -4.211e-02, -7.360e-02, -2.196e-02, -4.156e-02, 1.096e-02, 9.701e-03)); + r += mul(s1_7, M4(7.828e-03, -4.790e-01, -6.233e-03, -2.202e-01, -1.206e-01, 9.250e-02, -5.049e-02, -2.543e-03, -3.836e-01, 2.872e-01, -1.163e-01, -1.762e-01, -6.531e-02, 1.423e-01, -1.852e-03, 4.767e-02)); + r += mul(s1_8, M4(2.448e-01, -7.609e-03, -7.481e-02, 2.388e-03, -7.668e-02, 5.488e-02, 2.885e-02, 1.482e-01, -2.660e-01, -1.742e-01, -6.981e-02, -2.010e-02, 5.693e-03, -9.158e-02, -1.314e-01, 5.001e-02)); + r += mul(s2_0, M4(-2.589e-01, 1.859e-01, -2.660e-01, 1.760e-01, 1.168e-01, -1.407e-01, 7.098e-02, -4.685e-03, 2.893e-02, 2.214e-01, 1.994e-01, 1.514e-02, -4.815e-02, -1.155e-01, -1.169e-01, -1.717e-02)); + r += mul(s2_1, M4(-1.254e-01, 1.755e-02, -2.098e-02, -5.563e-03, -7.331e-02, -4.930e-02, -4.675e-02, -2.075e-01, -9.922e-02, -6.976e-02, 4.046e-02, -9.828e-02, 9.790e-02, -6.473e-02, -1.184e-01, -7.581e-02)); + r += mul(s2_2, M4(2.077e-01, -1.477e-01, -4.115e-01, -1.527e-02, -1.842e-02, -2.502e-01, 1.818e-01, -1.264e-01, 3.775e-02, 1.694e-01, 8.082e-02, -2.606e-02, -4.469e-02, -2.510e-02, 4.211e-02, -3.362e-02)); + r += mul(s2_3, M4(1.471e-01, -2.066e-01, -2.028e-01, 3.669e-01, 1.146e-01, 5.714e-02, -4.225e-03, -1.538e-01, -6.134e-02, -1.076e-01, 1.210e-01, -5.962e-02, -5.265e-02, 8.768e-02, 5.668e-02, -1.023e-02)); + r += mul(s2_4, M4(-9.993e-02, -7.184e-02, -4.461e-02, -1.498e-01, 9.753e-02, 1.023e-01, 3.349e-01, 6.508e-02, 1.059e-01, 1.041e-01, 2.029e-01, -1.877e-01, 3.264e-02, -5.429e-02, -7.859e-02, 1.105e-01)); + r += mul(s2_5, M4(1.262e-02, 3.543e-01, 2.701e-02, 1.023e-01, 1.665e-01, 2.847e-02, -1.573e-01, 2.408e-01, -2.000e-01, 1.185e-02, 1.601e-01, -5.772e-02, 2.326e-01, 2.310e-01, 8.624e-02, 7.525e-04)); + r += mul(s2_6, M4(-2.438e-01, 1.119e-01, -1.388e-01, -4.616e-03, -1.179e-01, -1.265e-01, -2.000e-01, 2.295e-02, 1.358e-02, -1.272e-01, 1.886e-02, 2.249e-01, 5.756e-02, 2.086e-01, 1.538e-02, 5.748e-02)); + r += mul(s2_7, M4(-1.796e-02, -1.771e-01, 1.848e-02, 4.765e-02, -1.709e-01, 1.274e-01, -1.274e-01, 1.016e-02, 4.295e-02, 4.648e-02, 1.067e-01, 9.483e-02, -7.017e-02, -3.615e-02, -3.152e-02, 2.260e-02)); + r += mul(s2_8, M4(3.672e-01, 5.534e-02, -2.330e-01, -2.461e-01, -3.854e-02, 1.713e-01, 6.842e-03, 3.879e-01, 1.905e-01, 1.151e-01, -4.301e-02, -1.332e-01, 1.137e-02, -1.343e-01, -2.136e-02, 5.372e-03)); + r += mul(s3_0, M4(-6.807e-02, 2.391e-02, 1.462e-02, 1.118e-01, 6.041e-02, -4.362e-02, -1.058e-02, 1.364e-01, 1.872e-01, 8.151e-02, -3.826e-02, -1.238e-01, 6.197e-02, -9.422e-02, 2.467e-01, -2.102e-01)); + r += mul(s3_1, M4(6.616e-03, -1.001e-01, 7.494e-02, -9.462e-04, -5.799e-02, 1.314e-01, -4.394e-03, -3.465e-03, 4.980e-03, -4.246e-02, -4.996e-02, -1.167e-01, 1.994e-01, 8.534e-02, -4.727e-02, 1.655e-01)); + r += mul(s3_2, M4(-1.278e-01, 1.321e-01, -9.006e-03, 1.384e-01, 5.992e-02, 7.091e-02, 1.501e-01, -5.937e-02, -4.941e-03, -1.158e-01, -5.998e-02, -2.486e-02, -1.105e-01, 1.812e-03, -1.348e-01, -1.650e-02)); + r += mul(s3_3, M4(9.376e-02, -1.023e-01, 4.212e-02, -1.545e-02, -6.994e-03, -3.425e-02, 1.686e-01, -1.371e-01, -1.737e-01, -1.316e-02, -5.657e-02, 1.056e-01, 7.410e-02, -1.012e-01, 3.571e-03, 2.081e-01)); + r += mul(s3_4, M4(-2.418e-02, 1.363e-01, -1.405e-01, -1.319e-01, 1.411e-02, -2.162e-02, 2.907e-03, -8.183e-02, 1.188e-01, 1.219e-01, -4.666e-03, -8.269e-02, -2.099e-01, -1.123e-01, 8.460e-02, 2.129e-02)); + r += mul(s3_5, M4(-3.303e-02, -1.845e-03, -1.138e-01, 7.996e-02, 7.868e-02, -4.075e-02, -5.790e-02, -9.637e-02, 1.098e-02, -2.270e-01, 1.366e-01, 1.204e-01, -5.277e-03, 5.143e-02, 1.918e-02, -5.697e-02)); + r += mul(s3_6, M4(-1.092e-02, -3.216e-02, 5.721e-02, -6.585e-02, -1.009e-01, -4.632e-02, -7.862e-02, 1.653e-01, 5.131e-02, -3.722e-03, 1.728e-01, -9.095e-02, -1.789e-01, 1.442e-01, -4.662e-02, 3.658e-02)); + r += mul(s3_7, M4(1.101e-02, -1.593e-02, -1.144e-01, 7.734e-02, -1.481e-01, 4.628e-02, -8.941e-02, -7.569e-03, -2.673e-02, 1.117e-01, 7.346e-02, -4.990e-02, -2.966e-01, 9.371e-02, 2.772e-01, -2.145e-02)); + r += mul(s3_8, M4(7.676e-02, -5.079e-03, 6.983e-02, -9.453e-02, -8.591e-02, 8.574e-02, -9.288e-02, 4.522e-02, -3.042e-02, -2.422e-02, -3.078e-02, 3.838e-02, -1.997e-01, 3.532e-02, -2.855e-01, 9.401e-02)); + r += mul(s4_0, M4(-1.704e-01, -5.635e-02, -4.514e-02, 1.377e-01, -5.299e-02, -1.445e-02, -9.418e-02, -8.632e-02, 7.442e-02, 3.310e-01, 7.629e-02, 3.254e-02, 1.062e-01, -2.861e-01, 1.315e-01, -1.645e-01)); + r += mul(s4_1, M4(4.894e-02, 1.057e-01, -1.611e-01, -1.634e-01, 2.585e-02, 5.547e-02, 3.944e-02, 2.779e-02, 8.338e-02, 5.388e-02, 3.696e-02, 1.097e-03, -9.750e-02, -2.696e-01, 1.603e-02, -2.061e-02)); + r += mul(s4_2, M4(2.440e-02, 6.216e-02, -4.641e-03, 6.110e-02, 6.277e-02, -6.637e-03, 2.735e-02, 7.741e-02, 1.263e-01, 6.328e-02, 1.572e-01, -1.274e-01, -6.511e-03, 6.195e-02, 6.708e-02, -6.191e-02)); + r += mul(s4_3, M4(3.447e-01, 1.650e-01, 8.541e-02, 5.321e-02, 1.518e-01, 3.891e-02, 8.751e-02, 1.890e-01, 8.485e-02, 1.050e-01, 7.165e-02, 7.063e-02, 1.793e-01, 1.331e-01, -4.657e-01, -1.884e-01)); + r += mul(s4_4, M4(-2.046e-01, -1.057e-02, 1.325e-02, 1.328e-02, -6.494e-02, -5.777e-03, -2.663e-02, -5.531e-02, -6.887e-02, -1.272e-02, 8.530e-02, -9.672e-02, -2.877e-01, 1.913e-01, 1.664e-01, -3.438e-02)); + r += mul(s4_5, M4(2.999e-02, -1.264e-01, 4.113e-02, 9.133e-04, -1.103e-01, -2.734e-02, 8.656e-02, -4.796e-02, -1.031e-01, -5.929e-04, -2.045e-01, 1.844e-01, 1.026e-01, 1.939e-02, -2.593e-01, 1.734e-02)); + r += mul(s4_6, M4(7.705e-02, -5.389e-02, -5.880e-02, 9.477e-02, 1.908e-02, -3.981e-02, 1.341e-02, 5.069e-02, -5.309e-02, 6.803e-02, 1.399e-01, 1.974e-01, -2.990e-02, -3.216e-01, -7.345e-03, 1.540e-01)); + r += mul(s4_7, M4(1.846e-02, -8.625e-02, -8.892e-02, 3.352e-01, -9.511e-02, -4.309e-02, -6.184e-02, -1.939e-01, -5.934e-02, 9.256e-02, -6.655e-02, -1.265e-01, -3.136e-01, -9.817e-02, 2.529e-02, -2.793e-02)); + r += mul(s4_8, M4(1.994e-01, 2.776e-01, -1.158e-01, -3.117e-02, -4.502e-02, -8.706e-02, 7.736e-02, 8.342e-02, 2.195e-01, -1.207e-01, -1.143e-02, -4.740e-01, 1.024e-01, 3.412e-02, -8.559e-02, 8.561e-02)); + r += mul(s5_0, M4(-1.424e-01, -3.827e-02, 9.982e-03, 1.728e-01, -1.270e-02, 6.425e-02, 7.333e-02, 1.429e-01, -5.168e-02, -3.531e-03, -1.018e-01, -7.135e-02, 1.500e-02, -4.705e-02, 1.095e-03, 5.302e-02)); + r += mul(s5_1, M4(1.636e-01, -1.645e-01, 4.886e-02, 2.773e-02, 7.834e-02, -2.487e-02, 2.299e-02, 1.313e-01, -2.820e-02, -1.568e-01, 8.729e-02, 8.737e-02, 9.738e-04, -5.445e-04, 5.576e-02, -4.024e-02)); + r += mul(s5_2, M4(5.442e-02, 1.993e-04, -1.460e-01, -1.173e-01, -9.994e-02, 4.632e-02, 5.769e-02, 1.391e-01, -1.149e-01, 6.631e-03, 6.934e-02, 3.662e-02, -4.205e-02, 1.630e-02, -1.077e-01, 7.098e-02)); + r += mul(s5_3, M4(4.663e-02, -6.306e-02, 4.831e-02, 2.122e-02, -2.886e-02, 2.370e-01, 3.475e-02, -5.125e-02, 4.117e-02, -2.344e-02, -1.420e-02, -6.763e-03, 9.431e-02, -6.717e-02, -9.945e-02, -1.036e-01)); + r += mul(s5_4, M4(-2.250e-01, -6.616e-02, -1.702e-02, 1.078e-01, -8.410e-03, 2.029e-01, -1.195e-01, -1.684e-02, 4.381e-02, -1.808e-02, -7.171e-02, 1.174e-01, -1.759e-01, -2.192e-02, 1.143e-01, -5.823e-02)); + r += mul(s5_5, M4(-1.406e-01, -2.651e-02, 2.532e-02, 1.900e-01, 4.971e-02, -1.710e-02, -5.660e-02, 1.032e-01, 3.520e-02, 3.673e-02, 9.979e-02, -7.656e-02, -3.376e-02, 1.289e-02, -9.658e-03, -5.253e-02)); + r += mul(s5_6, M4(8.909e-02, 1.214e-01, 4.088e-02, -7.486e-02, 6.216e-02, 6.290e-02, 4.715e-02, -3.857e-03, -4.435e-02, 1.425e-01, -8.248e-02, 8.130e-02, -2.014e-01, 1.266e-01, -5.059e-02, -2.273e-02)); + r += mul(s5_7, M4(-6.331e-02, 8.323e-02, -7.675e-02, -2.001e-01, 6.727e-02, 8.620e-02, 2.074e-02, 6.153e-02, 9.192e-02, 1.586e-02, -2.733e-02, 1.233e-01, 2.027e-01, 6.006e-02, -2.490e-02, -1.344e-01)); + r += mul(s5_8, M4(-1.838e-02, -7.457e-02, -5.620e-02, 1.654e-01, -6.773e-02, 8.304e-04, -6.201e-03, 1.674e-01, 7.353e-02, 7.026e-03, -9.155e-02, -1.844e-02, -1.289e-01, 1.754e-02, -9.762e-02, 2.202e-02)); + r += mul(s6_0, M4(-4.472e-03, -1.005e-01, 2.258e-01, -9.578e-02, 1.777e-01, -3.474e-02, 3.183e-02, 6.274e-02, -5.911e-03, -1.968e-02, 1.346e-01, -2.485e-02, -2.839e-02, 6.432e-02, -1.317e-02, -1.667e-01)); + r += mul(s6_1, M4(1.821e-02, 1.915e-01, -2.842e-02, -8.619e-02, -7.271e-02, -2.150e-01, 1.525e-01, -3.161e-01, 6.814e-02, 2.463e-02, -1.319e-02, 1.482e-02, -7.062e-02, 6.184e-02, 9.224e-02, -2.110e-02)); + r += mul(s6_2, M4(2.366e-02, 2.179e-01, -2.260e-01, -4.090e-02, 3.450e-02, -2.221e-01, -1.917e-01, -1.253e-03, 2.459e-02, -3.095e-02, -2.960e-02, 6.917e-02, 1.787e-02, -6.407e-02, -1.363e-01, 1.354e-02)); + r += mul(s6_3, M4(7.358e-02, -1.112e-01, 1.565e-01, -3.123e-02, -2.158e-02, -4.314e-02, -3.589e-02, 4.137e-02, -6.222e-02, 4.665e-02, 1.050e-01, 8.966e-02, -2.252e-02, 5.703e-05, -2.254e-02, -1.032e-01)); + r += mul(s6_4, M4(1.371e-02, -1.477e-01, 3.052e-01, 6.823e-02, -5.356e-02, -1.059e-01, 1.759e-01, 1.190e-01, -9.110e-02, -2.838e-02, -1.095e-02, 9.377e-02, 3.022e-02, -2.750e-01, 1.233e-01, -8.033e-02)); + r += mul(s6_5, M4(-2.452e-02, 3.526e-02, -3.008e-01, -6.037e-02, 9.324e-02, -6.850e-02, -8.138e-02, 4.399e-02, -7.511e-02, 5.993e-02, 4.646e-02, 4.527e-02, 3.678e-02, 9.254e-02, -3.808e-02, -1.133e-01)); + r += mul(s6_6, M4(-1.469e-01, -6.462e-02, 3.747e-02, -2.681e-01, -1.403e-01, 2.458e-01, 3.866e-02, -2.417e-02, -8.566e-03, 8.890e-03, 4.941e-02, 4.937e-03, 3.387e-02, 4.419e-02, 9.334e-02, 3.426e-02)); + r += mul(s6_7, M4(-1.553e-01, 9.161e-02, -1.463e-01, -1.822e-01, -2.398e-01, 5.129e-02, 1.587e-01, -2.893e-02, -1.408e-01, 2.464e-01, -4.796e-02, -6.968e-02, 1.199e-01, 2.698e-02, -1.137e-02, -1.715e-01)); + r += mul(s6_8, M4(-1.989e-01, 7.418e-02, -2.803e-01, -7.758e-02, -6.718e-02, 1.768e-02, -2.800e-02, -6.137e-02, 1.191e-01, -1.811e-02, -1.433e-01, 1.586e-01, 1.840e-01, 4.482e-02, -3.477e-02, -4.043e-02)); + r += mul(s7_0, M4(6.616e-03, -7.259e-02, -3.522e-02, -2.314e-02, 1.343e-02, 9.060e-02, 3.314e-02, 2.074e-02, 6.596e-03, -3.655e-02, -7.397e-02, -8.277e-02, -2.683e-02, 7.246e-02, 4.985e-02, -1.649e-01)); + r += mul(s7_1, M4(2.994e-02, 3.844e-02, -2.174e-02, -6.217e-02, 1.322e-01, -5.597e-02, -8.560e-02, -3.968e-02, 3.835e-02, 9.182e-04, -7.721e-02, -4.019e-02, -1.608e-01, 6.102e-02, 4.120e-02, 4.026e-02)); + r += mul(s7_2, M4(5.433e-02, 7.448e-02, -6.959e-02, 3.255e-02, 2.247e-02, -2.882e-02, -7.430e-02, -6.994e-03, -3.539e-02, -9.467e-02, 4.148e-02, -1.225e-01, 1.195e-02, 8.160e-03, 1.256e-02, 1.419e-01)); + r += mul(s7_3, M4(-1.717e-01, 3.816e-02, 5.009e-02, -3.195e-02, -2.244e-02, 3.889e-03, 5.482e-03, -1.720e-01, 1.087e-02, -2.089e-01, 4.557e-02, -1.794e-01, -1.795e-03, 2.095e-02, 1.293e-01, -1.453e-01)); + r += mul(s7_4, M4(7.199e-02, -1.367e-01, 2.238e-01, -9.105e-02, -5.963e-02, 1.711e-01, 5.876e-02, 3.460e-02, -6.988e-02, 2.286e-02, -1.684e-01, -7.710e-02, -1.110e-01, -7.015e-03, -3.769e-02, 2.094e-01)); + r += mul(s7_5, M4(2.787e-02, -2.768e-02, -1.053e-01, 4.257e-02, -1.150e-01, -2.859e-02, 2.506e-02, -1.080e-01, 1.821e-01, 8.441e-02, -1.393e-01, -4.062e-03, 1.600e-01, 1.051e-01, -2.439e-02, -3.265e-02)); + r += mul(s7_6, M4(1.021e-01, 4.107e-02, -5.459e-02, -1.254e-02, -1.705e-02, -3.163e-02, 8.226e-02, 1.609e-01, 1.601e-01, -3.518e-01, 4.768e-03, 2.022e-01, -2.541e-01, 6.005e-02, 4.948e-02, 2.458e-01)); + r += mul(s7_7, M4(-1.719e-01, -4.408e-02, 1.150e-02, -3.852e-02, -8.845e-02, 7.843e-02, -1.203e-03, -1.014e-01, -9.808e-03, -1.778e-01, 2.647e-02, 5.050e-02, 2.294e-01, 1.987e-02, -1.390e-01, 1.359e-01)); + r += mul(s7_8, M4(5.449e-02, 1.212e-01, 1.297e-01, 8.956e-02, 1.642e-01, -1.450e-01, -8.208e-02, 1.213e-01, -2.843e-01, -2.460e-01, -6.178e-02, 1.192e-02, -4.585e-02, 1.380e-01, -3.861e-02, 9.575e-02)); + r += V4(-1.765e-02, -1.522e-02, -7.955e-02, -2.431e-02); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 5 +//!DESC conv4 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.911e-01, 4.293e-02, -7.165e-02, -2.189e-01, 6.837e-02, 3.073e-02, -7.802e-02, 7.100e-02, -4.834e-02, -4.366e-02, 2.767e-02, 1.131e-01, 7.377e-02, 1.025e-01, 6.654e-02, -1.978e-01)); + r += mul(s0_1, M4(2.610e-01, -9.888e-03, -3.636e-02, -1.254e-01, 2.710e-02, -1.770e-03, 3.422e-02, 1.038e-01, 6.573e-02, -8.588e-02, -1.277e-01, 7.383e-02, 9.637e-02, -9.595e-02, -1.055e-01, 5.184e-02)); + r += mul(s0_2, M4(-4.114e-03, -1.705e-02, -1.095e-03, -4.263e-02, -1.645e-01, 3.551e-02, -6.546e-03, -1.143e-02, -1.965e-01, 8.983e-02, 2.647e-02, 2.088e-02, -8.941e-02, 1.012e-01, 6.557e-02, -8.384e-02)); + r += mul(s0_3, M4(-2.234e-02, 5.056e-02, -2.678e-02, 5.041e-02, 1.036e-01, 5.828e-02, 2.647e-02, 1.489e-02, -9.072e-02, -7.207e-03, -6.385e-02, -4.407e-02, 2.517e-01, 6.323e-02, -2.668e-02, 1.636e-01)); + r += mul(s0_4, M4(-8.313e-02, -1.719e-01, 7.980e-02, 4.337e-02, -1.653e-01, -6.344e-02, -1.659e-02, 3.679e-02, -9.873e-03, -8.276e-02, 8.539e-02, 1.783e-01, 2.292e-01, -8.759e-02, -2.398e-03, 2.628e-01)); + r += mul(s0_5, M4(9.412e-02, -7.811e-02, 2.076e-02, -1.257e-01, 1.631e-01, -2.990e-02, -3.103e-02, -9.241e-04, -1.431e-01, 6.844e-02, 3.843e-02, 9.549e-02, -1.608e-01, -6.787e-03, -2.569e-02, 1.675e-02)); + r += mul(s0_6, M4(-1.088e-01, -6.189e-02, -4.222e-02, 2.319e-02, 1.818e-02, 1.727e-02, -4.928e-02, 6.700e-02, 6.315e-02, 1.296e-02, 9.611e-02, 1.014e-02, 2.973e-03, 1.245e-03, 2.553e-01, 1.779e-02)); + r += mul(s0_7, M4(-3.605e-02, -5.594e-02, -9.233e-02, -4.208e-03, -7.568e-02, -4.813e-02, -2.752e-02, 3.998e-02, -1.366e-01, -4.549e-02, -2.460e-02, 1.400e-01, -9.195e-02, 4.020e-02, 6.491e-02, -1.215e-01)); + r += mul(s0_8, M4(4.148e-02, -3.769e-02, -9.859e-03, -2.839e-02, -9.498e-03, -6.769e-02, -4.398e-02, 5.395e-02, -1.100e-01, -3.161e-02, 1.902e-02, 1.477e-01, -1.482e-01, 3.158e-02, 3.634e-02, 2.209e-02)); + r += mul(s1_0, M4(1.445e-02, 8.049e-02, 2.826e-02, -2.931e-01, 1.958e-01, 1.026e-01, -8.516e-02, 1.093e-01, 1.909e-01, 2.248e-02, -6.748e-02, 3.476e-03, -3.636e-02, 3.906e-03, 8.562e-02, -2.096e-02)); + r += mul(s1_1, M4(9.437e-02, 1.674e-01, -5.992e-03, -2.529e-01, -1.090e-02, -1.137e-02, -3.747e-02, 3.806e-02, 7.053e-02, -1.197e-01, 1.002e-02, -9.692e-02, 3.400e-02, -1.558e-02, -1.905e-02, 9.034e-02)); + r += mul(s1_2, M4(7.877e-03, 9.344e-02, 1.036e-02, 1.783e-01, -7.734e-02, 6.793e-02, 5.569e-02, 1.347e-02, -1.236e-01, 2.216e-02, 1.008e-02, 1.481e-02, 4.541e-02, 7.055e-02, 7.153e-02, 1.067e-02)); + r += mul(s1_3, M4(1.127e-02, 1.351e-01, 6.345e-03, -2.964e-01, -9.091e-02, 2.207e-01, 9.727e-02, 8.008e-03, 3.675e-02, 4.381e-02, -7.209e-02, -2.023e-02, 1.003e-01, 1.810e-02, -1.049e-01, 9.784e-02)); + r += mul(s1_4, M4(2.354e-02, -1.543e-01, 1.898e-01, 9.106e-02, -7.247e-03, -5.448e-02, 3.855e-02, -2.363e-02, 7.327e-02, -2.247e-01, 1.712e-01, 1.105e-01, -1.091e-01, -1.434e-01, 1.633e-02, -2.095e-02)); + r += mul(s1_5, M4(2.805e-02, 9.803e-02, 8.733e-02, -3.386e-02, -7.457e-03, 3.605e-02, 7.829e-03, 7.178e-02, 1.123e-01, -9.636e-03, -1.486e-01, -4.111e-02, 5.418e-02, -8.534e-02, 2.637e-02, 2.664e-02)); + r += mul(s1_6, M4(-1.378e-02, 6.325e-02, 1.101e-02, -1.564e-02, 7.033e-02, 1.592e-01, -7.700e-02, 5.286e-02, -5.330e-02, -7.951e-02, -3.969e-02, 2.130e-02, -6.225e-02, -1.139e-02, -4.066e-02, 3.364e-02)); + r += mul(s1_7, M4(-3.912e-02, 4.691e-02, -1.572e-02, -5.204e-02, -9.345e-02, 2.114e-02, 7.954e-02, -3.320e-03, -1.472e-02, -6.786e-03, 1.106e-01, -1.127e-01, -5.276e-02, 8.257e-03, -7.303e-03, -3.415e-02)); + r += mul(s1_8, M4(8.577e-02, 2.541e-02, -2.907e-02, 4.950e-02, -6.884e-02, -8.209e-02, -1.475e-02, -1.545e-01, -7.352e-02, -2.567e-02, -5.289e-02, 5.263e-02, 3.229e-02, 2.618e-02, -1.540e-02, 9.362e-02)); + r += mul(s2_0, M4(3.062e-02, 1.857e-02, -8.636e-03, -1.682e-02, 5.537e-01, -3.514e-02, -9.186e-02, 2.693e-02, -3.343e-02, -7.040e-02, 1.051e-01, 6.563e-02, 3.258e-02, 3.417e-03, 1.605e-02, 1.267e-02)); + r += mul(s2_1, M4(-7.151e-02, 1.227e-01, -7.326e-02, 2.537e-02, -2.078e-01, 7.278e-02, 1.412e-01, -1.122e-01, 1.843e-01, -4.381e-02, 1.476e-02, 1.920e-01, 7.397e-02, 2.101e-02, -3.174e-02, 1.029e-01)); + r += mul(s2_2, M4(-6.594e-02, 1.115e-02, 6.316e-02, -5.414e-02, 4.450e-02, -2.294e-02, -2.767e-02, 6.729e-02, 3.957e-02, 7.448e-02, 1.265e-01, 2.011e-01, -1.474e-02, -2.215e-02, 1.555e-02, -8.592e-04)); + r += mul(s2_3, M4(1.156e-01, 1.432e-02, -7.871e-02, -1.026e-02, 1.357e-02, 3.515e-02, 5.604e-02, -2.915e-02, -3.069e-02, -8.177e-02, 1.823e-03, 1.532e-01, -1.250e-03, 5.676e-02, -4.616e-02, 1.024e-01)); + r += mul(s2_4, M4(9.017e-02, -3.800e-02, 8.001e-02, -9.478e-03, -2.834e-02, 7.740e-02, 3.474e-01, 3.802e-02, 2.971e-02, 1.012e-01, -4.683e-02, -8.751e-02, 1.497e-01, 3.183e-01, -2.329e-01, -7.118e-02)); + r += mul(s2_5, M4(1.648e-01, 4.245e-02, -7.537e-03, 1.075e-01, -1.819e-01, 2.302e-02, -7.246e-03, 8.852e-02, 3.767e-03, 4.556e-02, 5.827e-03, 1.754e-01, 3.513e-02, 1.574e-01, -1.155e-01, 5.588e-02)); + r += mul(s2_6, M4(5.341e-02, -1.367e-02, -7.464e-02, -6.560e-02, -1.519e-01, -1.422e-01, -5.583e-02, -3.300e-02, 7.439e-02, -1.706e-02, -5.464e-02, -4.118e-02, -9.160e-02, -1.090e-01, -1.034e-01, -9.711e-03)); + r += mul(s2_7, M4(-1.381e-01, -5.258e-02, 6.994e-02, 1.294e-02, 2.060e-02, 6.963e-02, 2.264e-02, -2.272e-02, -1.541e-01, -6.683e-03, -8.892e-02, 4.178e-02, 5.846e-02, -3.678e-02, -9.322e-02, -1.214e-01)); + r += mul(s2_8, M4(3.280e-02, 3.080e-02, -3.820e-02, 3.021e-02, -4.802e-02, 1.730e-02, 6.540e-02, 1.268e-01, -2.986e-02, -5.638e-02, -4.957e-02, 1.004e-01, -1.840e-01, 2.708e-02, -7.470e-02, 5.920e-02)); + r += mul(s3_0, M4(1.189e-01, 7.786e-02, -1.425e-02, 4.844e-02, -7.746e-02, 4.289e-02, 1.183e-01, 5.976e-02, 1.026e-01, -4.106e-02, 6.290e-02, -1.676e-01, 9.636e-02, -8.836e-03, 1.345e-01, 1.377e-01)); + r += mul(s3_1, M4(-1.363e-01, 7.409e-02, 9.795e-03, -9.447e-02, -8.569e-02, 1.214e-02, -8.428e-03, 5.571e-02, -1.615e-01, 6.046e-02, 1.770e-01, -5.997e-02, -1.165e-02, -7.313e-02, 5.458e-02, 1.325e-01)); + r += mul(s3_2, M4(-2.265e-01, -3.161e-02, -3.418e-02, -2.184e-02, -2.011e-02, -2.432e-02, 1.297e-02, 3.494e-02, 4.333e-02, 1.234e-02, 7.744e-03, 1.454e-03, 7.516e-02, -1.982e-02, 7.248e-02, 2.420e-02)); + r += mul(s3_3, M4(2.222e-02, 3.278e-02, 7.987e-02, 1.096e-02, 1.140e-02, 8.110e-03, -1.275e-02, -7.840e-02, -1.195e-01, 4.069e-02, -2.466e-04, 5.544e-02, -1.178e-01, 2.951e-02, -7.497e-02, -6.892e-02)); + r += mul(s3_4, M4(1.492e-01, -2.627e-02, -9.715e-03, -1.093e-01, 7.752e-02, 2.010e-02, 7.764e-02, -1.783e-01, -2.181e-02, 2.569e-01, 9.805e-02, -8.017e-02, -1.439e-01, 1.977e-01, -3.024e-01, -2.178e-01)); + r += mul(s3_5, M4(-2.819e-02, 4.856e-03, 3.206e-02, 4.678e-02, 1.187e-01, -6.616e-02, -1.151e-01, -3.546e-02, 6.754e-02, 1.009e-01, -7.493e-03, 2.946e-03, 1.973e-01, 1.871e-01, 8.823e-02, 1.684e-02)); + r += mul(s3_6, M4(8.810e-02, 8.072e-02, -4.034e-02, 4.030e-02, -5.572e-02, 9.288e-02, 9.060e-02, 3.320e-02, 8.505e-02, -1.192e-02, -9.003e-02, 5.440e-02, 3.771e-02, -9.408e-02, 5.702e-02, -5.567e-02)); + r += mul(s3_7, M4(1.188e-01, -3.642e-02, 7.194e-02, 4.704e-02, 9.579e-02, -1.549e-02, -5.090e-02, 1.119e-01, -4.576e-02, 6.161e-02, -1.018e-01, 4.477e-02, 3.698e-02, 3.252e-02, -1.882e-01, 1.689e-02)); + r += mul(s3_8, M4(-2.570e-01, 2.082e-03, -8.574e-02, 7.213e-03, -1.976e-02, 7.299e-02, 2.060e-02, -9.470e-03, -5.948e-02, -2.919e-02, 6.735e-02, 2.437e-04, 1.029e-01, 9.595e-02, -1.881e-02, -2.288e-02)); + r += mul(s4_0, M4(2.582e-02, -1.106e-01, -1.961e-01, -1.136e-01, 2.005e-02, 1.980e-03, 9.052e-02, -5.878e-02, 1.522e-02, 1.750e-02, -5.777e-03, 5.241e-02, 1.188e-01, -6.191e-02, -5.085e-02, 1.577e-02)); + r += mul(s4_1, M4(4.442e-02, -7.196e-03, 5.665e-02, 1.553e-02, -1.322e-01, -4.538e-02, -8.256e-03, 4.163e-02, 5.614e-02, -4.783e-02, 6.511e-03, -4.385e-02, -1.286e-01, 7.950e-02, 7.059e-02, -1.275e-01)); + r += mul(s4_2, M4(-1.467e-01, 1.017e-01, -7.759e-02, 3.511e-02, 9.203e-02, -2.830e-02, 1.622e-02, 5.200e-02, -4.604e-02, -1.184e-02, 1.596e-02, -7.837e-02, -3.537e-02, 7.057e-02, 8.211e-03, 9.773e-02)); + r += mul(s4_3, M4(-3.116e-01, 1.405e-01, -1.825e-01, -1.602e-01, 3.794e-03, 1.349e-02, 1.305e-03, -3.067e-02, -1.701e-01, -2.518e-02, 1.034e-01, 1.046e-02, -6.726e-02, 6.267e-02, -1.063e-02, -3.378e-03)); + r += mul(s4_4, M4(-1.171e-01, 4.148e-02, 5.117e-02, 7.748e-02, -6.945e-02, -1.305e-01, -2.528e-02, 7.143e-02, -3.070e-02, 7.727e-02, -4.977e-03, 7.770e-02, -5.829e-02, -2.211e-02, 1.441e-01, 2.339e-02)); + r += mul(s4_5, M4(-1.988e-01, 9.871e-02, 1.277e-01, 4.608e-02, 7.262e-02, -2.677e-03, -1.809e-02, -6.628e-03, 1.154e-01, -6.355e-02, -3.575e-02, -2.338e-02, 2.810e-01, 6.067e-02, 8.032e-02, -2.597e-02)); + r += mul(s4_6, M4(1.806e-01, 1.486e-01, 2.705e-01, 4.881e-02, -1.508e-01, -1.798e-03, 7.290e-02, 1.463e-02, -2.177e-01, -1.193e-02, -3.910e-02, 9.460e-02, -8.807e-02, -2.762e-02, -5.861e-02, -2.578e-02)); + r += mul(s4_7, M4(1.671e-01, 8.773e-02, -1.050e-01, 2.206e-01, 1.209e-01, 6.165e-03, -1.402e-01, 1.012e-02, 8.527e-02, -6.815e-02, -4.452e-02, -5.306e-02, -3.056e-02, -4.385e-02, -5.908e-02, 1.483e-02)); + r += mul(s4_8, M4(1.712e-01, -1.575e-01, -4.181e-02, -1.116e-01, -3.266e-04, -3.737e-02, -1.091e-01, 8.541e-02, -2.812e-02, 6.552e-02, 8.679e-02, 4.192e-02, -3.681e-02, 7.918e-03, -1.257e-02, -2.467e-03)); + r += mul(s5_0, M4(-8.263e-02, -6.845e-02, 9.966e-03, -1.256e-02, -9.009e-03, 9.184e-03, 5.524e-02, -5.402e-02, 2.346e-02, -6.712e-02, -1.016e-01, 5.388e-02, 4.347e-01, -2.237e-01, -1.300e-01, 4.637e-02)); + r += mul(s5_1, M4(8.672e-02, -6.151e-03, 2.086e-02, 6.444e-02, 2.230e-01, -5.404e-02, 3.705e-02, 9.646e-02, 3.340e-02, -1.704e-01, -1.833e-01, 9.220e-02, 2.407e-01, 5.851e-02, -1.318e-01, -4.974e-02)); + r += mul(s5_2, M4(-6.637e-02, 4.372e-02, 9.089e-02, -1.496e-02, -3.321e-01, 1.390e-01, 1.479e-01, -2.773e-01, 1.138e-01, 8.449e-02, 1.444e-01, -2.207e-01, -1.901e-01, 1.075e-01, -3.949e-02, -2.799e-02)); + r += mul(s5_3, M4(8.775e-02, 1.043e-01, -1.374e-02, 5.217e-02, 1.646e-01, 4.211e-02, 3.824e-02, 5.914e-02, 7.882e-03, -1.040e-01, 2.567e-02, 1.414e-01, -1.820e-01, 1.737e-01, 1.967e-01, -3.163e-02)); + r += mul(s5_4, M4(-1.284e-01, 7.496e-02, -2.371e-02, -2.662e-02, 2.125e-01, 2.143e-01, -2.473e-01, -2.820e-01, -1.093e-01, 9.356e-02, -3.271e-01, 4.151e-01, -4.591e-02, -1.298e-01, 1.714e-01, 1.069e-01)); + r += mul(s5_5, M4(-7.078e-03, 8.816e-02, -5.712e-02, 6.085e-03, -1.278e-01, -1.966e-01, 9.780e-02, -1.756e-01, 1.354e-01, -3.525e-01, -1.815e-01, -1.969e-02, -1.791e-02, -7.485e-02, 7.644e-02, 4.745e-02)); + r += mul(s5_6, M4(-8.955e-02, 2.475e-02, 9.383e-02, -7.093e-03, 3.655e-02, 5.342e-02, 7.128e-02, 2.780e-02, 3.943e-02, 3.385e-02, 2.130e-01, -1.660e-01, 1.458e-02, -8.553e-02, -1.646e-01, 7.980e-02)); + r += mul(s5_7, M4(1.467e-02, -4.959e-02, -2.671e-02, 3.923e-02, 1.022e-01, -1.061e-01, 1.377e-01, -1.619e-01, 6.276e-02, -2.224e-01, -1.624e-01, 3.056e-01, 1.421e-01, -6.040e-02, -8.337e-02, -7.135e-02)); + r += mul(s5_8, M4(1.780e-01, 9.652e-03, 1.017e-01, -8.695e-02, 8.021e-01, 1.471e-01, -9.219e-02, -2.656e-01, 5.916e-01, 2.761e-01, 2.245e-01, -1.467e-02, 2.326e-02, 1.662e-02, -4.968e-02, 2.504e-02)); + r += mul(s6_0, M4(1.436e-01, -3.030e-03, 9.921e-03, 7.837e-02, -1.959e-01, 1.429e-01, 2.338e-01, 3.925e-02, -1.750e-01, -7.659e-02, -1.538e-01, 9.592e-02, -7.076e-02, 1.303e-02, 7.885e-02, -1.405e-01)); + r += mul(s6_1, M4(1.935e-02, 8.829e-03, -2.350e-02, 8.725e-03, 4.762e-02, -2.891e-02, 9.243e-02, 1.887e-01, -6.199e-02, -2.533e-02, 1.109e-01, 2.641e-01, -2.384e-02, 1.634e-01, 6.726e-02, -1.385e-01)); + r += mul(s6_2, M4(-1.315e-01, 8.924e-02, 7.963e-02, -2.570e-03, -5.893e-02, 4.241e-02, -1.835e-02, 1.171e-01, 2.796e-01, 5.362e-02, -2.055e-02, 2.369e-01, -8.197e-02, -6.894e-02, -2.458e-02, -1.200e-01)); + r += mul(s6_3, M4(3.153e-02, 2.093e-02, 4.118e-03, -1.320e-02, -4.870e-03, -1.532e-01, 1.172e-02, 2.641e-04, -2.587e-01, 1.882e-02, 2.345e-01, -7.079e-02, -4.541e-01, 2.772e-02, -1.567e-01, 1.183e-01)); + r += mul(s6_4, M4(-7.829e-02, -4.471e-02, -1.744e-02, 2.243e-02, -7.376e-02, 1.616e-01, -1.833e-03, 1.263e-02, 2.839e-01, 6.697e-02, 1.050e-01, 1.745e-01, 3.243e-02, -3.538e-02, -6.101e-02, -4.361e-02)); + r += mul(s6_5, M4(1.262e-01, -8.569e-02, -3.884e-02, -2.852e-02, 1.949e-01, 5.632e-02, 9.793e-02, -6.019e-02, 1.199e-01, -2.750e-02, -5.027e-02, 1.635e-01, 6.961e-02, 4.372e-02, 7.290e-02, 5.485e-02)); + r += mul(s6_6, M4(-1.680e-01, -5.327e-02, -4.961e-03, 2.590e-02, 1.749e-01, 8.292e-02, 9.512e-02, 1.474e-01, 8.935e-02, 2.390e-02, 9.264e-02, 9.805e-02, -7.415e-02, -4.113e-02, -1.309e-01, 1.563e-01)); + r += mul(s6_7, M4(3.112e-02, -4.886e-02, 8.220e-02, -6.343e-02, -8.350e-02, -8.932e-02, -1.057e-01, -3.488e-02, -1.522e-01, 3.794e-02, 1.197e-01, 1.177e-01, -1.665e-01, -6.301e-02, -4.422e-02, 1.613e-01)); + r += mul(s6_8, M4(1.694e-01, -2.333e-02, -5.932e-02, -1.183e-02, -9.355e-02, -2.778e-03, 9.447e-02, 2.451e-02, 1.867e-01, 7.640e-02, 2.108e-01, -1.665e-02, 1.878e-01, 8.773e-03, -2.257e-01, 1.594e-01)); + r += mul(s7_0, M4(-8.965e-03, 2.624e-01, 1.210e-01, -1.458e-01, 8.647e-02, -1.773e-02, -2.364e-02, -1.276e-01, 5.775e-03, -9.750e-04, -1.712e-02, -6.345e-02, -6.886e-03, 1.129e-01, 1.148e-01, 2.896e-03)); + r += mul(s7_1, M4(1.351e-01, -1.312e-01, -4.994e-02, -2.724e-01, 2.359e-01, 1.263e-02, 9.888e-02, 2.917e-02, 8.381e-02, 2.099e-02, 3.319e-02, -7.182e-03, -3.381e-02, -4.513e-03, -1.517e-01, 2.118e-02)); + r += mul(s7_2, M4(-3.996e-01, -1.398e-02, -7.864e-02, 2.525e-02, -1.230e-01, 7.733e-02, -1.769e-02, 5.334e-02, 1.540e-02, -3.895e-02, 3.792e-03, -7.897e-02, 4.750e-02, -7.005e-02, 4.263e-02, -4.919e-02)); + r += mul(s7_3, M4(-2.755e-02, -4.463e-02, -6.112e-02, 6.444e-02, -1.315e-02, -8.691e-02, -8.237e-02, -7.589e-02, -6.343e-02, -4.937e-02, 3.279e-02, 3.611e-02, -1.459e-01, 4.092e-02, 8.283e-02, 1.265e-01)); + r += mul(s7_4, M4(4.206e-02, 1.399e-01, -4.043e-01, -1.555e-01, -1.589e-01, 3.718e-02, -2.416e-03, -8.547e-02, -1.886e-01, -2.620e-02, 1.115e-02, 1.535e-02, 3.972e-02, 8.115e-02, 4.437e-02, 2.503e-02)); + r += mul(s7_5, M4(6.628e-02, -2.132e-02, -1.014e-01, 1.091e-01, -2.270e-02, -1.182e-02, -1.225e-02, 2.201e-01, -1.923e-01, 7.464e-02, -4.376e-02, 4.027e-02, -3.831e-03, 1.229e-01, 1.291e-01, 3.764e-02)); + r += mul(s7_6, M4(2.760e-02, 5.997e-02, 8.050e-02, 6.740e-02, 1.042e-01, 2.560e-02, 4.031e-05, 1.214e-02, 9.502e-02, 3.927e-02, -4.782e-02, -2.899e-03, 4.771e-02, -6.955e-03, -2.022e-02, 2.951e-02)); + r += mul(s7_7, M4(7.620e-03, 8.834e-02, 7.506e-02, -8.427e-02, -1.852e-01, 1.048e-03, 6.660e-02, -1.567e-02, 1.079e-02, -2.159e-02, 8.645e-03, -3.696e-02, 6.173e-02, 5.077e-02, 9.091e-02, -1.181e-02)); + r += mul(s7_8, M4(-1.885e-01, -2.594e-02, -7.269e-02, -1.936e-03, -2.901e-02, 5.401e-02, 8.591e-02, -5.534e-02, 2.194e-02, 4.357e-02, 3.574e-02, 1.611e-02, -7.285e-03, 1.269e-03, -5.859e-02, -9.392e-02)); + r += V4(-5.407e-02, -7.067e-02, 1.950e-02, 9.545e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(6.717e-02, -1.085e-01, 4.679e-03, 3.374e-02, -2.934e-02, -1.138e-01, -1.215e-02, -2.990e-02, 1.305e-01, 8.300e-02, 3.335e-02, 2.778e-02, 3.377e-03, -2.101e-01, -2.875e-02, 9.519e-02)); + r += mul(s0_1, M4(3.467e-02, -1.759e-02, 7.026e-02, 7.086e-02, -1.204e-01, -7.126e-02, -3.318e-02, -5.756e-02, 1.470e-01, -2.150e-01, 2.192e-01, -1.131e-01, -1.367e-01, 4.720e-02, 1.611e-01, 7.222e-02)); + r += mul(s0_2, M4(2.817e-02, 1.225e-01, -1.667e-02, -1.570e-02, 6.310e-02, 1.152e-03, -2.590e-02, -1.524e-01, 6.243e-02, -8.887e-02, 1.872e-01, 6.808e-02, -9.254e-02, -1.959e-01, -8.782e-02, -8.248e-02)); + r += mul(s0_3, M4(-4.263e-02, 1.467e-01, 6.943e-02, -1.812e-01, 5.025e-02, -5.220e-02, -1.407e-01, 7.781e-02, -1.816e-01, 6.821e-02, -9.870e-03, 4.308e-02, -1.741e-01, -4.907e-02, 1.264e-02, -7.239e-02)); + r += mul(s0_4, M4(1.230e-02, 8.330e-02, -6.574e-02, 4.610e-02, -1.526e-01, -1.785e-01, -3.353e-01, -4.067e-03, -3.103e-02, 1.791e-01, -6.876e-03, -1.204e-01, -8.336e-02, -6.957e-04, 9.618e-02, 6.912e-02)); + r += mul(s0_5, M4(-1.966e-02, -1.273e-01, 1.176e-03, -5.498e-02, -1.138e-01, -5.583e-02, -5.138e-02, -1.405e-02, 6.958e-02, 3.695e-01, 1.345e-01, -1.498e-01, 9.589e-02, -6.320e-02, -9.379e-02, -3.164e-01)); + r += mul(s0_6, M4(4.593e-02, -2.886e-02, -2.430e-02, -4.046e-02, 5.057e-03, 4.365e-02, -6.719e-02, 1.602e-01, -7.978e-02, 1.209e-01, 8.314e-02, -2.215e-02, -4.360e-02, -7.496e-02, -1.355e-01, -5.498e-02)); + r += mul(s0_7, M4(-2.778e-02, -6.251e-03, -3.910e-04, 4.534e-02, -1.031e-02, -9.855e-02, -9.771e-02, 1.508e-01, -2.924e-02, -4.297e-02, -3.887e-02, 1.542e-01, -1.705e-01, 1.007e-01, -8.629e-02, 8.864e-02)); + r += mul(s0_8, M4(3.027e-02, 1.940e-01, 2.927e-02, 9.326e-02, 4.644e-03, -7.984e-02, 4.678e-03, 1.474e-01, -1.669e-02, -4.209e-02, 1.043e-02, 9.069e-02, -1.063e-03, -1.228e-01, -3.037e-02, -9.812e-02)); + r += mul(s1_0, M4(-2.453e-01, -1.051e-01, 1.144e-02, 2.509e-01, -6.616e-02, -1.942e-01, 1.415e-01, -2.385e-01, -3.674e-02, -1.015e-01, -4.910e-03, -7.619e-02, -2.013e-02, 3.080e-02, -9.871e-02, -1.100e-01)); + r += mul(s1_1, M4(5.337e-02, -1.877e-01, 5.125e-02, -8.332e-02, 3.972e-02, 4.788e-02, 1.087e-01, 2.028e-02, -8.553e-02, -2.028e-02, -3.157e-03, -1.191e-02, 1.873e-02, -6.994e-02, -4.772e-03, 8.186e-02)); + r += mul(s1_2, M4(-6.383e-02, 1.453e-02, -2.684e-02, 1.285e-01, -3.137e-02, -8.488e-02, 8.024e-02, 5.607e-03, -2.146e-01, -1.763e-01, -2.075e-01, 1.566e-01, 7.104e-02, -3.803e-02, 4.318e-02, 3.814e-03)); + r += mul(s1_3, M4(-1.838e-03, -1.025e-01, -7.961e-02, -6.186e-02, 1.782e-02, -6.383e-02, -4.327e-02, -1.232e-01, 3.138e-02, -5.431e-02, -4.407e-02, -4.343e-02, -1.232e-01, -1.710e-02, 5.223e-02, -1.937e-01)); + r += mul(s1_4, M4(-4.092e-02, -2.729e-02, -6.141e-02, -6.268e-02, 4.153e-02, -1.508e-01, 2.181e-01, 4.653e-02, -2.279e-03, 7.404e-02, -1.050e-01, 1.125e-01, 7.147e-02, 6.433e-02, 4.226e-02, -1.862e-02)); + r += mul(s1_5, M4(-1.234e-01, -1.315e-01, -1.492e-01, 5.749e-02, 4.985e-02, 1.179e-01, -2.306e-02, 1.022e-01, -1.237e-01, -9.024e-02, -3.798e-04, 6.366e-03, -5.423e-02, 3.528e-03, 1.255e-01, 2.933e-02)); + r += mul(s1_6, M4(-1.164e-01, -1.124e-01, -5.387e-02, 6.608e-02, 1.339e-01, 1.405e-02, 9.894e-03, -2.333e-01, -2.912e-02, -1.399e-02, -4.016e-02, -3.425e-02, -1.855e-02, 7.181e-02, -4.869e-02, -3.256e-02)); + r += mul(s1_7, M4(1.152e-01, -4.542e-02, -1.117e-01, -6.471e-02, 1.022e-01, 1.087e-01, 4.418e-02, 7.917e-02, -7.525e-02, 6.215e-03, 9.068e-02, -4.588e-02, 1.149e-02, 1.724e-01, -5.913e-02, 1.584e-01)); + r += mul(s1_8, M4(1.605e-01, -5.167e-02, -4.253e-02, -3.335e-03, 1.543e-02, -6.644e-04, 9.621e-02, 1.979e-02, -6.140e-02, 9.012e-04, -1.190e-02, 5.535e-02, 8.267e-04, 1.517e-02, -3.087e-02, 2.612e-02)); + r += mul(s2_0, M4(-1.238e-01, -1.578e-01, -1.481e-02, -1.196e-01, -2.092e-01, -3.426e-02, -1.754e-02, -6.567e-02, 9.243e-02, 1.628e-01, 2.501e-02, 5.175e-02, -1.744e-01, -7.428e-02, -8.889e-02, -7.489e-02)); + r += mul(s2_1, M4(7.879e-03, 6.343e-02, -5.226e-02, 1.424e-01, -1.023e-01, -1.032e-01, 1.761e-01, 9.949e-02, 5.968e-02, -1.766e-01, -8.374e-02, 2.282e-01, 1.070e-01, -1.683e-02, -1.689e-02, 2.043e-02)); + r += mul(s2_2, M4(-5.229e-03, 1.663e-01, 9.149e-02, 4.612e-02, 1.723e-01, 2.314e-02, 4.265e-02, 6.821e-02, -5.956e-02, -7.686e-02, 1.719e-01, 2.495e-01, 2.147e-03, -3.642e-02, 1.616e-01, 7.041e-02)); + r += mul(s2_3, M4(1.973e-01, -3.390e-02, -7.258e-02, 1.437e-01, -6.018e-02, 1.360e-01, -8.374e-02, -2.892e-03, -5.402e-02, -1.989e-02, 4.853e-02, -4.518e-02, -5.872e-04, -8.122e-02, -3.853e-02, 1.739e-02)); + r += mul(s2_4, M4(1.295e-01, -4.896e-02, -1.630e-02, -1.248e-01, -1.512e-01, 6.255e-02, 1.608e-01, 1.656e-01, -7.696e-02, -5.137e-02, 1.514e-01, -2.473e-02, -2.328e-02, 1.922e-01, -4.038e-02, 3.598e-02)); + r += mul(s2_5, M4(-1.866e-02, 9.645e-02, -8.320e-02, 1.142e-01, 6.941e-02, 1.367e-01, 7.091e-03, -1.837e-01, -1.827e-02, -2.980e-02, -7.110e-02, -3.696e-02, -9.701e-02, -9.448e-02, -8.521e-02, 2.434e-02)); + r += mul(s2_6, M4(-2.143e-02, -1.493e-01, 9.606e-02, -3.440e-02, 7.487e-03, 9.767e-02, -3.191e-02, -7.964e-03, 1.168e-01, 1.179e-01, 3.998e-02, -1.623e-01, -8.569e-02, 8.994e-03, -3.421e-02, -1.244e-02)); + r += mul(s2_7, M4(-9.054e-02, -5.686e-02, -1.341e-02, 1.066e-01, 1.938e-01, -2.331e-01, 1.364e-01, 1.616e-01, -6.815e-02, -5.872e-03, -2.192e-03, -2.340e-02, -9.640e-02, 1.529e-01, 8.484e-03, 1.001e-01)); + r += mul(s2_8, M4(6.817e-02, -1.288e-01, 8.888e-02, 9.576e-02, 1.225e-01, 1.583e-02, -2.757e-02, -4.511e-02, 5.512e-02, -1.010e-01, -3.620e-02, 1.102e-02, 6.741e-03, -1.227e-02, 8.649e-02, 2.877e-02)); + r += mul(s3_0, M4(6.229e-02, 9.592e-02, 1.294e-02, -2.348e-01, -1.230e-02, 1.214e-01, -2.806e-03, 3.487e-02, -1.279e-01, -2.212e-01, -1.472e-01, 2.606e-02, -1.845e-01, -2.816e-03, -6.874e-02, -1.281e-02)); + r += mul(s3_1, M4(-9.062e-02, 1.683e-01, 1.611e-02, 5.872e-02, 5.554e-02, -7.704e-02, 6.382e-02, -2.322e-01, -3.895e-02, -3.454e-03, -2.921e-01, -5.875e-02, 3.566e-02, 1.108e-01, -1.710e-01, -2.502e-01)); + r += mul(s3_2, M4(1.173e-01, 1.708e-02, 6.776e-03, 1.691e-02, 1.499e-01, 2.921e-02, -9.225e-03, 1.394e-01, -1.358e-01, 9.238e-02, -5.082e-02, 1.636e-01, -2.493e-02, -6.122e-02, 5.968e-02, 8.655e-02)); + r += mul(s3_3, M4(-5.715e-02, -1.511e-02, -6.212e-02, -6.473e-02, 3.562e-02, -3.158e-02, -1.098e-01, 1.478e-02, 6.033e-02, -1.016e-01, -1.135e-01, -1.125e-01, -1.030e-01, 2.885e-01, 2.408e-02, -4.063e-02)); + r += mul(s3_4, M4(-4.288e-02, 1.547e-02, -1.791e-01, -2.682e-01, -3.370e-02, 1.680e-02, 2.958e-03, 5.127e-02, -2.809e-02, -4.347e-02, -1.414e-01, 3.814e-02, -1.420e-01, 2.483e-01, 2.089e-01, -1.261e-01)); + r += mul(s3_5, M4(-1.184e-01, 1.336e-02, -9.600e-02, -9.977e-02, -2.192e-01, -1.367e-01, 1.383e-01, -5.234e-02, 2.522e-02, 1.420e-03, -2.163e-01, 6.757e-02, -3.138e-01, -1.640e-01, -3.642e-02, 1.417e-01)); + r += mul(s3_6, M4(-2.282e-02, -1.051e-01, -5.613e-03, -4.427e-02, -2.312e-02, 7.340e-02, 6.455e-02, 4.537e-03, 1.161e-01, -4.970e-02, -6.218e-02, 1.148e-01, -6.845e-02, 8.079e-02, -7.447e-03, 6.897e-02)); + r += mul(s3_7, M4(2.117e-02, 4.091e-02, 4.972e-02, -3.314e-02, -4.211e-03, -9.433e-02, -8.061e-02, 7.014e-02, 6.691e-02, -2.260e-01, -1.036e-01, 4.619e-02, -5.972e-02, 1.259e-01, -3.887e-02, 6.878e-02)); + r += mul(s3_8, M4(6.344e-03, -2.309e-02, 6.478e-02, 1.424e-03, -1.296e-03, -5.976e-02, -2.716e-02, 1.685e-01, -3.233e-02, -7.415e-02, -2.596e-02, -8.568e-02, -2.194e-01, 1.018e-01, 5.171e-02, 1.192e-01)); + r += mul(s4_0, M4(1.224e-01, -1.173e-01, -7.758e-02, -1.832e-02, -1.574e-01, 1.337e-01, 2.167e-02, 7.167e-02, -1.695e-02, -5.784e-02, 6.215e-02, -5.441e-02, -2.862e-02, 4.763e-02, -1.056e-01, -3.591e-03)); + r += mul(s4_1, M4(6.239e-03, 1.669e-01, -1.077e-01, 1.890e-01, 2.274e-01, 6.625e-03, 2.943e-03, 6.447e-02, 2.389e-02, -4.926e-02, -5.119e-02, -6.227e-02, 7.541e-02, 2.031e-02, 1.013e-01, -9.943e-02)); + r += mul(s4_2, M4(4.436e-02, -2.437e-01, 1.763e-01, -5.477e-02, 7.136e-02, 4.379e-02, 6.421e-02, -2.688e-02, -6.556e-04, 1.186e-01, 2.484e-02, 9.693e-02, -7.738e-02, -6.154e-02, -1.354e-02, -9.314e-03)); + r += mul(s4_3, M4(-9.485e-02, 1.547e-01, -2.700e-02, -3.183e-01, -4.993e-02, -1.461e-01, 7.904e-02, -5.630e-02, -4.300e-02, 8.780e-02, -7.001e-02, -5.912e-02, 1.305e-01, 4.421e-02, 1.705e-02, -5.380e-02)); + r += mul(s4_4, M4(9.836e-03, -8.879e-02, -6.089e-02, -2.735e-01, -9.374e-02, -3.482e-02, 1.593e-01, 1.053e-01, -1.949e-02, -9.097e-03, -1.308e-02, 6.561e-02, -2.487e-03, -7.772e-02, 4.211e-02, 6.781e-02)); + r += mul(s4_5, M4(-1.164e-01, 1.692e-01, -1.707e-01, 2.116e-01, 4.292e-02, -1.785e-02, -3.180e-03, 1.698e-02, -3.298e-02, 9.171e-02, 1.148e-02, -2.111e-03, -2.830e-02, -4.993e-02, 6.376e-02, 6.920e-02)); + r += mul(s4_6, M4(-1.619e-01, -1.771e-01, 6.520e-02, -7.375e-02, 9.529e-02, -1.258e-01, -1.184e-01, 8.212e-02, 2.637e-02, -1.539e-01, 5.094e-02, -1.113e-01, 5.879e-02, -8.586e-02, 3.324e-02, 8.466e-02)); + r += mul(s4_7, M4(3.179e-03, -7.854e-02, -8.955e-02, 7.403e-02, -9.935e-02, -2.673e-02, -7.363e-02, -3.045e-03, -2.528e-02, -1.061e-01, -3.504e-02, 4.410e-02, -4.636e-02, 7.533e-02, -1.717e-02, 1.112e-01)); + r += mul(s4_8, M4(2.715e-02, -6.483e-03, -1.431e-01, 2.788e-01, -2.372e-02, 2.763e-02, 1.733e-02, -1.354e-01, 5.190e-02, 3.126e-02, -3.235e-02, 3.248e-02, 2.591e-02, 1.613e-02, -1.572e-03, -4.472e-02)); + r += mul(s5_0, M4(4.632e-03, 1.782e-01, -5.608e-02, 6.302e-03, -7.340e-02, -7.737e-03, -9.241e-02, 4.781e-02, -1.791e-01, 2.817e-02, 9.707e-02, -6.090e-02, -2.927e-01, 1.862e-01, -2.333e-01, 1.628e-01)); + r += mul(s5_1, M4(-6.151e-02, -2.405e-02, -4.087e-02, -1.285e-01, -7.012e-02, 2.614e-01, 4.987e-02, -4.459e-02, -2.275e-02, 7.123e-02, 9.935e-02, 3.843e-02, 1.603e-01, -1.680e-01, 3.095e-03, -1.928e-01)); + r += mul(s5_2, M4(6.417e-02, 1.484e-01, 1.279e-01, -1.789e-01, -3.257e-01, -7.609e-02, -1.704e-02, -3.876e-01, -3.879e-02, 1.197e-01, -1.642e-01, 2.962e-03, 2.338e-01, -1.852e-01, 6.983e-02, -1.616e-01)); + r += mul(s5_3, M4(-1.193e-01, 3.477e-02, -4.285e-02, 2.139e-02, 6.788e-02, 2.079e-02, -5.760e-02, 3.218e-02, -1.667e-01, -2.546e-01, -1.955e-01, 7.686e-02, 2.924e-03, 1.215e-01, 8.917e-02, 1.097e-01)); + r += mul(s5_4, M4(8.016e-02, 1.009e-01, 5.233e-02, -3.305e-02, 3.103e-02, 2.751e-01, 2.513e-01, 1.388e-01, 2.166e-01, -3.219e-01, 7.068e-02, 4.566e-02, -9.380e-02, 5.389e-02, -1.850e-01, 1.512e-01)); + r += mul(s5_5, M4(-8.857e-02, -2.837e-03, -7.693e-02, -9.155e-03, 2.678e-01, 1.357e-01, 4.491e-01, 2.434e-01, 1.613e-01, -1.161e-01, -6.304e-02, 1.556e-01, -2.363e-02, -2.454e-02, 3.906e-04, 4.633e-02)); + r += mul(s5_6, M4(2.677e-03, -3.706e-03, 9.228e-02, 9.541e-03, -4.820e-02, -7.482e-02, -1.183e-03, -1.958e-01, -1.223e-02, 7.255e-02, 1.093e-01, -1.465e-01, 1.202e-01, 1.210e-01, 4.326e-02, 1.447e-01)); + r += mul(s5_7, M4(-5.090e-02, 2.420e-02, -7.492e-02, 1.324e-01, 7.820e-02, 5.833e-02, -6.090e-02, -3.127e-01, -1.220e-01, -1.101e-03, -1.968e-01, 2.021e-01, -4.462e-02, 1.252e-02, 7.164e-02, 5.059e-02)); + r += mul(s5_8, M4(4.707e-02, -3.480e-02, 7.551e-03, -1.208e-01, -5.298e-01, -1.951e-01, -7.116e-01, 1.388e-01, -2.367e-01, 1.583e-02, 4.278e-02, -1.337e-01, 7.089e-02, -7.687e-02, -8.515e-02, 7.922e-02)); + r += mul(s6_0, M4(-9.214e-02, 7.529e-03, -6.248e-02, 2.070e-01, 7.201e-02, 8.504e-03, 5.225e-02, -4.419e-03, -9.270e-02, 4.535e-02, 9.824e-03, 9.299e-02, -1.235e-01, 1.056e-02, -1.423e-01, 3.974e-02)); + r += mul(s6_1, M4(-4.171e-02, -1.087e-01, 1.045e-02, 1.400e-01, -1.984e-01, -6.324e-02, 7.945e-02, -2.961e-02, -6.015e-02, 6.839e-02, -1.243e-01, 1.203e-02, -8.761e-02, 1.410e-01, -4.083e-03, 3.386e-02)); + r += mul(s6_2, M4(-4.386e-03, 3.460e-02, 1.422e-01, 4.385e-03, -1.620e-01, 1.497e-01, -4.923e-02, -9.036e-02, -2.182e-01, 1.379e-01, 6.954e-02, -3.739e-01, 3.684e-01, 1.336e-01, 5.649e-02, 7.316e-02)); + r += mul(s6_3, M4(1.649e-01, -5.692e-02, 4.225e-02, 1.613e-01, -2.164e-01, -8.842e-02, -2.972e-03, -3.522e-01, -1.062e-01, 7.149e-02, 3.555e-02, 2.059e-01, -3.451e-02, -7.517e-02, 6.511e-02, 4.864e-02)); + r += mul(s6_4, M4(-1.413e-01, -4.342e-02, -3.003e-02, -2.848e-02, -1.319e-02, 5.676e-02, 1.667e-03, -1.223e-01, 1.484e-02, -1.597e-01, -6.809e-02, -2.543e-01, 3.355e-01, -1.252e-01, -5.322e-02, 1.983e-02)); + r += mul(s6_5, M4(1.219e-02, -5.971e-02, 5.168e-02, -1.013e-01, -9.651e-02, 4.657e-02, -1.561e-01, 2.110e-01, -9.167e-02, 8.247e-02, -1.410e-01, 1.799e-02, -1.964e-02, -2.807e-01, 2.726e-01, -2.514e-01)); + r += mul(s6_6, M4(4.917e-02, -1.633e-02, -5.837e-02, 6.642e-02, -5.624e-02, 1.192e-01, 4.469e-02, 1.458e-02, -5.512e-02, 1.182e-01, -1.312e-02, 4.652e-02, 3.621e-01, -4.627e-02, 1.101e-01, -2.360e-02)); + r += mul(s6_7, M4(3.303e-02, 2.066e-01, -1.654e-02, -1.911e-01, -8.888e-03, 1.789e-02, -1.533e-01, 1.223e-01, -1.661e-01, -1.740e-01, 1.145e-01, -1.386e-01, -5.269e-02, -4.546e-02, 7.470e-02, -2.027e-01)); + r += mul(s6_8, M4(2.025e-02, -1.004e-02, 3.841e-02, 7.722e-02, 7.963e-02, -1.474e-01, -3.591e-02, 1.618e-01, -4.981e-02, 4.029e-02, -5.320e-02, -1.589e-01, -2.171e-01, -8.416e-03, -1.029e-01, -1.187e-01)); + r += mul(s7_0, M4(2.325e-01, -8.288e-02, 1.457e-01, -4.569e-02, 9.574e-02, -1.053e-01, 9.755e-02, 6.378e-02, 3.830e-03, -1.469e-01, -9.631e-02, 5.755e-02, -5.083e-02, -5.378e-02, -3.056e-02, -2.397e-02)); + r += mul(s7_1, M4(-2.248e-01, -8.623e-02, 2.123e-02, -1.490e-01, -8.108e-03, 6.108e-02, 1.224e-02, 1.078e-02, -5.741e-02, -7.249e-02, 6.195e-02, -8.693e-02, -9.347e-02, 1.273e-02, 5.736e-02, 5.040e-02)); + r += mul(s7_2, M4(1.596e-01, 8.328e-02, -2.612e-01, -8.180e-02, -3.636e-02, 2.406e-03, 5.303e-02, -3.305e-02, -1.257e-01, -8.973e-02, 4.427e-02, 5.241e-03, 1.629e-01, 4.851e-02, 6.754e-03, 3.417e-02)); + r += mul(s7_3, M4(2.611e-01, -2.005e-01, -8.642e-02, 5.382e-02, -8.290e-03, 2.400e-02, 1.224e-02, -5.875e-02, -1.908e-02, 6.390e-02, -6.675e-02, -8.961e-02, 5.823e-02, -1.251e-01, 2.823e-02, 7.424e-02)); + r += mul(s7_4, M4(-1.325e-01, -9.612e-03, -8.170e-02, -3.149e-02, -5.403e-02, -1.366e-01, 1.227e-01, 3.794e-02, 1.192e-02, -4.303e-02, 1.750e-01, -8.316e-02, -1.106e-02, -4.174e-02, -3.208e-02, 1.970e-01)); + r += mul(s7_5, M4(3.196e-02, 1.542e-02, 8.516e-03, -6.807e-02, -1.445e-02, 7.338e-02, 2.065e-02, 9.911e-03, -1.440e-02, 1.396e-02, -4.628e-02, -5.420e-02, 5.802e-02, 7.582e-02, 1.022e-01, -4.560e-02)); + r += mul(s7_6, M4(-5.374e-02, -1.868e-02, -2.215e-02, 1.477e-04, 7.862e-02, 1.092e-01, -1.139e-02, -1.158e-02, -7.233e-02, 2.963e-02, -4.970e-02, -8.032e-02, -3.154e-02, 2.469e-03, 1.429e-02, -3.882e-03)); + r += mul(s7_7, M4(8.257e-03, -1.201e-01, -6.238e-02, 7.351e-02, 1.150e-02, 1.626e-01, -9.431e-02, 5.197e-02, -2.246e-02, 2.016e-01, 2.822e-02, 1.362e-01, -9.953e-03, -3.172e-02, -9.940e-04, -1.536e-01)); + r += mul(s7_8, M4(1.146e-01, 1.110e-01, -5.326e-02, 2.054e-01, -1.047e-01, 6.589e-02, -2.937e-02, -1.905e-02, 3.019e-02, 7.729e-02, -1.218e-03, 6.997e-02, -1.298e-01, -1.623e-02, -5.119e-02, 6.137e-02)); + r += V4(4.130e-02, 1.983e-02, 1.131e-02, -4.202e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(9.776e-03, 1.133e-02, -3.355e-02, -1.531e-01, -6.215e-02, 7.207e-02, -2.910e-02, -7.251e-02, 7.522e-02, -3.966e-02, 2.699e-02, 7.452e-02, -4.976e-02, 1.731e-01, -1.035e-01, 9.129e-02)); + r += mul(s0_1, M4(-1.396e-01, 1.442e-01, -1.480e-01, -9.944e-02, 5.011e-02, 7.390e-02, -8.576e-02, -9.798e-03, -6.092e-02, -3.941e-02, 1.679e-02, -7.403e-02, -5.263e-03, 6.009e-02, 7.368e-02, 1.100e-01)); + r += mul(s0_2, M4(1.877e-02, 1.486e-01, -9.979e-02, 1.587e-01, 4.095e-02, 3.952e-02, -2.612e-02, 3.233e-02, 1.062e-01, -1.490e-01, 8.490e-02, -1.303e-01, 1.267e-01, -3.028e-01, 8.821e-02, 2.048e-03)); + r += mul(s0_3, M4(1.393e-02, -3.075e-02, 1.460e-01, -4.840e-02, -8.010e-02, 3.560e-02, 3.412e-02, -4.544e-02, 3.805e-02, -3.483e-02, 1.444e-01, -2.263e-02, 6.338e-02, -1.065e-01, 1.063e-01, -3.890e-02)); + r += mul(s0_4, M4(4.691e-02, 2.480e-02, 1.178e-01, -8.325e-02, 1.123e-02, 4.891e-02, -5.960e-02, -2.912e-02, 4.684e-02, 2.167e-01, 2.077e-01, 9.285e-02, 5.381e-03, -5.640e-02, -2.302e-01, -4.384e-02)); + r += mul(s0_5, M4(1.248e-01, -5.576e-04, 2.938e-02, 7.179e-02, -6.587e-02, 1.190e-01, 4.798e-02, -4.063e-02, -2.519e-02, 1.964e-01, 8.330e-02, -4.101e-02, -1.757e-01, 1.975e-02, 8.995e-02, -2.210e-03)); + r += mul(s0_6, M4(-9.828e-02, -7.343e-02, 9.764e-02, -2.942e-02, -3.018e-02, -1.102e-01, -1.853e-01, -1.000e-01, 6.522e-02, 4.472e-02, 1.528e-01, -8.913e-02, -1.660e-01, -5.624e-02, -3.116e-01, 3.415e-02)); + r += mul(s0_7, M4(-6.710e-02, -2.627e-02, -3.502e-02, 1.176e-01, 3.238e-01, -1.860e-01, -1.790e-01, 4.971e-02, -3.690e-02, 3.052e-02, 5.722e-02, -2.977e-02, -7.715e-02, -1.292e-01, -2.234e-01, 1.173e-01)); + r += mul(s0_8, M4(7.600e-02, -2.084e-02, 2.253e-02, 6.215e-02, -8.462e-02, 1.150e-01, 3.212e-02, -7.986e-03, -8.954e-03, 1.414e-01, 1.038e-01, -8.033e-02, -2.475e-01, 2.992e-04, -1.844e-02, -7.008e-02)); + r += mul(s1_0, M4(2.073e-01, 4.388e-02, -2.668e-01, -9.685e-02, -2.238e-02, 1.252e-01, 1.399e-01, 8.699e-02, 2.333e-02, 3.715e-02, -3.618e-02, 9.009e-02, 1.044e-01, -9.673e-02, -3.876e-02, 7.851e-03)); + r += mul(s1_1, M4(-7.281e-02, -4.151e-02, -2.723e-02, -8.388e-02, 5.295e-03, 2.041e-02, -6.911e-02, 8.872e-02, -1.674e-02, -6.488e-03, -1.469e-02, -9.139e-02, 1.281e-02, -3.248e-02, 1.431e-02, 1.847e-02)); + r += mul(s1_2, M4(1.544e-02, -1.046e-01, -1.693e-01, -1.224e-01, 5.646e-02, -2.360e-02, -2.544e-02, 3.755e-02, 4.592e-02, 3.989e-02, 7.647e-02, 1.185e-02, -5.048e-02, 1.897e-01, -2.199e-02, -4.915e-02)); + r += mul(s1_3, M4(6.552e-02, 4.922e-03, -1.820e-01, -2.364e-02, -1.075e-01, 2.728e-02, -1.005e-01, 7.385e-02, -4.606e-02, 4.673e-02, 1.874e-03, 5.370e-02, -8.265e-03, 1.487e-02, -6.224e-02, -4.375e-02)); + r += mul(s1_4, M4(-5.894e-02, -1.075e-01, -6.922e-02, -6.830e-03, -9.902e-02, -1.249e-01, -6.997e-02, 1.181e-03, -3.054e-04, -8.467e-02, 5.810e-02, 6.620e-02, 1.048e-01, -3.457e-02, 6.104e-03, -1.209e-01)); + r += mul(s1_5, M4(6.688e-02, -2.488e-02, -7.699e-02, -1.275e-01, 2.364e-02, 1.505e-01, 1.489e-01, -5.790e-02, 1.957e-01, -1.594e-01, -1.788e-01, 4.084e-02, -7.009e-02, 6.747e-02, -8.610e-02, -6.766e-02)); + r += mul(s1_6, M4(2.521e-01, 4.219e-02, 4.813e-02, -4.289e-02, 1.826e-01, 9.599e-02, 6.430e-02, -4.017e-02, -1.407e-02, -3.707e-02, -9.009e-02, 2.826e-02, 1.050e-01, 7.109e-02, -4.801e-02, 2.410e-02)); + r += mul(s1_7, M4(-1.602e-01, 1.661e-01, 4.057e-02, -2.172e-02, 7.024e-02, 4.228e-02, 2.632e-01, -7.107e-02, -8.244e-02, 2.700e-02, -1.450e-01, 7.160e-02, 1.951e-01, -2.251e-01, 3.322e-02, 4.582e-03)); + r += mul(s1_8, M4(-1.262e-01, 1.854e-03, 1.178e-01, -1.497e-01, 1.528e-01, -2.994e-02, 2.364e-02, 9.387e-02, 7.943e-02, -1.429e-01, -7.044e-02, 3.041e-02, -7.487e-02, 5.990e-02, -5.066e-02, -2.487e-02)); + r += mul(s2_0, M4(-5.076e-02, 1.631e-01, -6.869e-02, 1.424e-02, 3.466e-02, -7.814e-02, 1.173e-01, -1.744e-01, 4.356e-02, 7.307e-02, 6.131e-02, 8.219e-02, -3.229e-02, 1.386e-03, 3.682e-02, -2.855e-02)); + r += mul(s2_1, M4(-1.844e-01, -2.906e-02, 1.676e-01, -6.373e-02, -9.257e-02, -3.190e-01, -2.114e-01, -4.582e-02, 2.055e-01, 8.519e-02, 1.576e-01, 2.338e-01, 4.294e-02, -3.009e-02, -9.665e-02, 1.986e-01)); + r += mul(s2_2, M4(-1.547e-01, -2.221e-01, -2.853e-02, -1.768e-03, 5.880e-02, 1.788e-02, 1.798e-01, -1.395e-01, -4.826e-02, 2.575e-01, -3.729e-02, 1.299e-01, 8.164e-02, -7.465e-03, -6.420e-02, -8.713e-02)); + r += mul(s2_3, M4(-5.749e-03, 1.248e-01, -9.157e-02, 5.040e-02, -3.994e-02, 1.028e-02, -5.703e-02, 8.672e-04, 6.620e-02, -1.704e-01, 6.747e-02, -9.601e-04, 4.844e-02, 1.800e-02, -3.440e-02, -4.046e-02)); + r += mul(s2_4, M4(1.493e-01, 2.121e-01, 1.150e-01, 1.215e-01, -3.332e-02, 7.949e-02, 2.340e-02, 1.034e-01, 1.920e-01, -1.273e-02, -2.382e-01, -6.905e-02, -1.858e-01, 1.172e-02, -6.687e-03, -9.036e-02)); + r += mul(s2_5, M4(2.826e-02, -2.022e-02, 4.561e-03, 5.506e-02, 2.404e-01, -5.027e-02, -2.026e-02, 1.156e-01, 2.665e-01, 5.267e-02, -9.174e-02, -3.432e-02, -4.251e-02, -4.464e-02, -1.960e-02, 5.370e-02)); + r += mul(s2_6, M4(1.694e-01, 6.852e-03, -5.605e-02, 1.458e-01, -1.626e-01, -1.851e-03, -1.486e-01, 3.850e-02, -1.147e-01, -3.283e-02, 1.666e-01, -4.694e-02, 5.731e-03, -2.958e-02, -3.499e-02, 7.312e-02)); + r += mul(s2_7, M4(-1.078e-01, 2.190e-01, -1.589e-01, -5.892e-02, -2.788e-01, 5.833e-02, 1.172e-01, -1.219e-02, 1.304e-02, -2.758e-02, -5.106e-02, 1.364e-01, -1.217e-01, 1.659e-01, 3.810e-02, -6.095e-03)); + r += mul(s2_8, M4(1.371e-01, 1.274e-01, 3.390e-02, -7.796e-02, -1.237e-01, -1.328e-01, 1.829e-01, -4.622e-02, 9.045e-03, 7.476e-04, 8.001e-02, 7.130e-03, -1.655e-03, 7.937e-03, 1.888e-01, -2.302e-02)); + r += mul(s3_0, M4(1.458e-02, -5.959e-02, 1.004e-01, 4.872e-02, -4.672e-02, -1.137e-03, -1.197e-02, 3.033e-03, -7.688e-02, 1.447e-01, -7.802e-02, -1.378e-01, -8.679e-02, -1.352e-01, -6.163e-03, -2.407e-02)); + r += mul(s3_1, M4(8.416e-03, -5.473e-02, -1.909e-02, -1.767e-03, -2.313e-02, 8.180e-02, -9.585e-02, 3.619e-02, 2.346e-02, 1.528e-01, -7.843e-02, 4.169e-02, -4.233e-02, 7.463e-02, 8.223e-02, 2.546e-01)); + r += mul(s3_2, M4(-1.631e-02, 5.895e-02, -1.996e-02, -2.241e-02, -6.741e-02, 5.341e-02, -8.862e-02, 1.549e-02, -7.137e-02, -1.477e-01, 1.356e-01, -1.492e-01, 7.808e-02, 7.700e-02, 5.673e-02, -2.241e-01)); + r += mul(s3_3, M4(-4.562e-02, -4.127e-02, -1.699e-02, -1.378e-01, -2.514e-02, 3.447e-02, -1.057e-01, -5.026e-02, -3.746e-02, 4.040e-02, -8.313e-02, 3.113e-02, 1.021e-01, 1.583e-02, -2.427e-02, -8.659e-02)); + r += mul(s3_4, M4(1.637e-01, -7.124e-02, -6.697e-03, 5.279e-02, 8.765e-02, 1.006e-01, 1.053e-01, 5.971e-02, -8.269e-02, -7.273e-02, 3.931e-02, -5.453e-02, -1.746e-01, 1.707e-01, -1.048e-01, -1.554e-01)); + r += mul(s3_5, M4(-5.668e-02, 1.609e-02, 3.673e-02, -1.317e-01, -5.147e-02, -8.183e-02, -8.940e-03, 2.933e-02, 5.024e-02, -9.863e-02, 1.304e-01, -1.980e-02, -2.414e-01, 1.176e-02, 5.452e-03, -4.053e-02)); + r += mul(s3_6, M4(3.254e-02, -1.210e-01, -1.227e-01, 1.183e-02, -2.902e-02, 1.240e-02, 4.859e-02, 7.308e-02, 4.321e-02, -4.560e-02, -1.068e-01, -2.553e-02, 7.134e-02, 8.717e-03, 1.321e-02, 2.268e-02)); + r += mul(s3_7, M4(-1.497e-01, 6.714e-02, -5.428e-02, -2.406e-02, 4.013e-02, 8.743e-02, 6.129e-02, -1.235e-01, -1.228e-01, 1.277e-02, -1.850e-02, 8.048e-02, -4.311e-02, 2.646e-01, 9.466e-02, -1.634e-02)); + r += mul(s3_8, M4(5.882e-02, 2.052e-02, 4.387e-02, -1.041e-01, -5.720e-02, -1.479e-01, 2.832e-02, -2.621e-02, 3.333e-02, -1.262e-02, -1.641e-01, 5.123e-02, -1.047e-01, 2.765e-01, 4.264e-03, -2.939e-02)); + r += mul(s4_0, M4(1.501e-01, 2.633e-02, 9.344e-02, 1.136e-02, 2.741e-02, 4.448e-03, -9.019e-02, 1.810e-02, -2.366e-02, 5.233e-03, 1.797e-02, -2.754e-02, 7.400e-02, -1.927e-03, -6.300e-02, 3.018e-02)); + r += mul(s4_1, M4(6.356e-02, 3.185e-02, 3.746e-02, 1.777e-02, -1.896e-02, 2.099e-02, -7.546e-02, -5.559e-02, 6.748e-03, 2.509e-02, -7.122e-02, -1.022e-01, 6.756e-02, -4.133e-02, -9.943e-02, 7.718e-02)); + r += mul(s4_2, M4(2.397e-01, -2.176e-01, 2.273e-01, -1.299e-02, -3.828e-02, -2.349e-02, -8.658e-02, 4.308e-02, -1.366e-02, -4.806e-02, 6.817e-02, 1.123e-01, -5.003e-02, -9.895e-02, -4.029e-02, -2.023e-01)); + r += mul(s4_3, M4(2.090e-01, 8.440e-02, -1.067e-01, -1.367e-01, 8.654e-03, -1.849e-02, 2.142e-02, -2.211e-02, -1.145e-01, -6.497e-02, 3.199e-02, 5.907e-02, -1.756e-02, 9.862e-02, -7.888e-03, -5.537e-02)); + r += mul(s4_4, M4(1.236e-01, -6.157e-02, 1.013e-01, -2.011e-01, -2.019e-02, -9.063e-02, -2.651e-02, -1.942e-01, 2.906e-02, -6.897e-04, -6.750e-02, -1.401e-01, 1.443e-02, 2.090e-02, 9.017e-03, 2.131e-03)); + r += mul(s4_5, M4(-9.932e-02, 4.428e-01, 5.511e-02, 1.661e-01, 2.183e-02, -1.299e-03, 1.319e-01, 1.633e-01, -3.275e-02, -7.492e-02, 1.048e-03, 5.778e-02, -1.204e-02, -8.126e-03, -9.239e-02, -3.791e-02)); + r += mul(s4_6, M4(3.221e-01, -1.157e-01, -6.856e-02, 3.304e-02, -8.065e-02, -3.694e-04, -1.317e-01, -2.974e-02, -7.071e-02, -1.253e-02, -9.960e-02, 1.396e-01, -1.033e-01, -5.537e-03, 1.319e-01, 1.306e-01)); + r += mul(s4_7, M4(5.416e-02, -1.607e-01, -3.796e-02, -2.196e-01, 7.504e-03, 1.137e-01, 8.092e-02, 4.933e-02, -1.354e-01, 2.499e-02, 7.708e-03, 3.970e-02, 7.704e-02, 1.006e-01, -7.237e-02, 8.995e-02)); + r += mul(s4_8, M4(9.289e-02, 3.223e-01, 1.134e-01, -3.694e-03, 2.888e-02, -1.124e-01, 2.244e-01, -5.135e-02, -7.507e-02, 6.290e-02, -2.024e-01, -8.959e-02, -1.329e-02, 6.541e-03, 3.294e-03, -6.241e-02)); + r += mul(s5_0, M4(-4.517e-02, 1.235e-01, 5.442e-03, 1.839e-02, -1.477e-02, 1.191e-02, 1.370e-01, -7.028e-02, 6.019e-02, 3.786e-02, -1.040e-01, -5.986e-02, -6.938e-02, 9.119e-03, 2.456e-01, -1.818e-01)); + r += mul(s5_1, M4(-1.001e-01, 1.577e-01, 1.060e-02, 3.452e-02, 7.981e-02, 1.807e-01, -1.247e-02, 1.419e-01, 2.852e-02, -2.555e-01, 8.737e-02, -2.522e-02, -1.450e-01, -7.592e-02, -7.450e-02, 1.140e-01)); + r += mul(s5_2, M4(1.068e-01, -8.475e-02, 3.917e-02, 7.297e-02, 2.496e-03, 3.705e-01, -7.435e-02, 1.772e-01, 3.024e-02, -3.389e-01, 2.800e-01, 4.755e-02, 8.825e-02, -1.821e-01, 6.686e-02, 2.805e-01)); + r += mul(s5_3, M4(-1.572e-01, 2.899e-02, 1.777e-02, -1.451e-02, 6.746e-02, 5.148e-02, -8.362e-02, 3.644e-03, -9.239e-02, -2.605e-01, -2.584e-01, -1.230e-01, -9.658e-02, 6.736e-02, -1.154e-01, -2.769e-03)); + r += mul(s5_4, M4(1.405e-01, 8.583e-02, -5.553e-02, -4.588e-02, -3.077e-01, -9.568e-02, -8.990e-02, 1.626e-01, 3.272e-01, -1.636e-01, -3.025e-01, 4.880e-02, 1.392e-01, -2.253e-02, -1.375e-01, 1.687e-02)); + r += mul(s5_5, M4(3.336e-03, -1.191e-01, 1.268e-01, 9.806e-03, 9.515e-03, -4.439e-03, -1.028e-01, -3.148e-01, 4.599e-01, -1.263e-01, 2.576e-01, -2.934e-01, 1.677e-01, 3.158e-02, 2.495e-01, 1.056e-01)); + r += mul(s5_6, M4(2.726e-03, -1.399e-01, 6.948e-02, -5.882e-02, -1.246e-01, 2.279e-02, -1.248e-02, 1.166e-01, 4.558e-01, 1.242e-01, 1.545e-01, 2.175e-02, -1.811e-01, 1.832e-01, 4.592e-02, 6.989e-04)); + r += mul(s5_7, M4(3.254e-03, 1.120e-01, 1.067e-01, -1.189e-01, 3.137e-01, -3.508e-02, -3.266e-01, 2.504e-01, -5.192e-02, -3.085e-01, 3.621e-01, -4.971e-01, -5.263e-02, -1.859e-01, 2.221e-02, 1.537e-01)); + r += mul(s5_8, M4(4.073e-02, -2.734e-02, -1.628e-02, 3.140e-02, 2.658e-01, -7.154e-01, -6.487e-01, -8.120e-03, 9.670e-02, 8.390e-02, -2.580e-02, -1.697e-01, -1.257e-01, 7.393e-03, 4.072e-02, -1.198e-01)); + r += mul(s6_0, M4(-2.325e-02, -1.803e-02, 1.682e-02, -1.560e-02, 1.823e-01, -4.043e-01, -4.110e-02, 1.549e-02, -1.683e-01, 9.149e-02, -2.927e-01, 1.546e-02, -2.405e-01, 7.029e-02, 2.755e-02, 5.652e-02)); + r += mul(s6_1, M4(9.453e-02, -1.540e-01, -5.654e-02, 3.796e-02, -2.715e-02, 1.640e-02, -2.742e-01, -2.748e-02, 1.565e-01, 2.170e-01, 5.316e-02, 2.978e-02, 2.052e-02, 5.915e-02, -1.925e-01, 4.712e-02)); + r += mul(s6_2, M4(7.061e-02, 6.826e-02, 1.215e-02, 1.393e-01, -4.831e-02, 1.440e-01, -8.814e-02, 6.905e-03, 3.821e-02, 7.138e-02, -8.313e-02, 1.865e-01, -7.926e-02, -2.579e-02, 4.882e-03, 7.233e-02)); + r += mul(s6_3, M4(8.800e-02, 8.067e-02, -9.297e-03, 4.975e-02, 6.075e-02, -5.985e-02, -6.378e-02, 2.492e-02, 2.305e-01, -8.931e-02, -1.370e-02, 2.229e-01, 1.409e-03, -1.928e-01, 6.843e-02, 9.708e-02)); + r += mul(s6_4, M4(-7.920e-02, -6.951e-03, 3.889e-02, 2.709e-02, 3.035e-03, -1.535e-01, 1.865e-01, -8.006e-02, -3.663e-01, -4.063e-03, 1.480e-01, 8.084e-02, 2.003e-01, -3.478e-01, 7.709e-02, 1.318e-01)); + r += mul(s6_5, M4(-4.351e-02, -4.782e-02, 1.095e-01, 9.693e-03, 1.025e-02, -2.202e-01, 3.284e-01, 7.600e-02, -3.967e-01, 1.752e-01, 2.691e-02, -1.441e-01, 6.026e-02, 1.437e-02, -3.212e-01, 1.947e-01)); + r += mul(s6_6, M4(-1.174e-01, -6.388e-02, -4.659e-02, 1.936e-03, -1.898e-01, -2.308e-02, 5.991e-02, -2.405e-02, 4.370e-01, 4.343e-02, 1.119e-01, -1.649e-01, -2.892e-02, 9.907e-02, -5.520e-03, 3.411e-02)); + r += mul(s6_7, M4(-8.114e-02, -5.151e-02, -7.774e-02, -1.555e-01, -1.185e-01, -1.694e-01, 1.159e-01, -8.749e-02, -6.711e-02, 1.719e-01, 1.898e-02, -6.603e-02, -1.325e-02, -2.651e-01, -1.550e-01, 1.727e-01)); + r += mul(s6_8, M4(-1.288e-02, 9.244e-02, 6.123e-02, -5.099e-02, -2.744e-01, -2.457e-02, -4.723e-02, -1.700e-02, -2.435e-01, 1.399e-01, -9.896e-02, -1.818e-01, 1.429e-01, -4.617e-02, -1.876e-01, 1.280e-01)); + r += mul(s7_0, M4(-5.099e-03, 3.039e-01, -2.557e-01, 3.141e-01, 1.227e-01, -6.618e-02, 3.876e-02, -9.571e-04, 8.463e-02, -1.155e-02, 2.623e-03, 4.686e-02, -1.301e-01, 2.507e-02, -4.847e-02, -8.853e-02)); + r += mul(s7_1, M4(-6.691e-02, 1.228e-02, 1.907e-01, -2.765e-01, -2.665e-02, 2.057e-02, -1.354e-02, 1.639e-01, 1.116e-01, -3.604e-02, 9.093e-02, -2.966e-02, 1.148e-01, 1.590e-01, 9.577e-02, 1.755e-02)); + r += mul(s7_2, M4(6.837e-02, 4.151e-02, 7.356e-02, 9.749e-02, -1.206e-01, 5.819e-02, 4.687e-02, 4.458e-02, 3.543e-02, -5.109e-02, -9.728e-02, -6.498e-03, 1.709e-02, -1.455e-01, -3.758e-02, -7.217e-02)); + r += mul(s7_3, M4(6.112e-02, 8.671e-02, -5.929e-02, 1.550e-02, -1.377e-01, 1.549e-01, 1.891e-02, 1.595e-02, -1.383e-01, 1.029e-02, -1.557e-01, 1.254e-02, -2.617e-02, 4.528e-02, -2.293e-03, -3.976e-02)); + r += mul(s7_4, M4(-3.063e-01, -1.625e-01, 5.306e-02, 2.149e-01, 1.103e-01, -6.293e-02, 4.295e-03, -1.463e-01, -4.290e-02, -1.037e-01, -9.071e-03, 2.655e-02, 1.242e-02, 1.285e-01, 1.310e-01, 4.781e-02)); + r += mul(s7_5, M4(-1.194e-01, 5.512e-02, 9.580e-02, -8.311e-02, 1.029e-01, -2.544e-01, -4.095e-02, -1.304e-01, -6.057e-02, -3.751e-03, 1.997e-02, -7.348e-02, -3.760e-02, -6.867e-02, 3.274e-02, -3.654e-02)); + r += mul(s7_6, M4(3.135e-02, -4.777e-02, 7.366e-03, 6.426e-02, -7.916e-02, 4.317e-03, -7.686e-02, 7.508e-02, 5.427e-02, 2.379e-02, -7.021e-04, -2.270e-02, -6.493e-02, 9.292e-02, 1.616e-01, 1.064e-01)); + r += mul(s7_7, M4(3.473e-02, -4.689e-02, 3.023e-02, -2.066e-02, 3.786e-02, -9.247e-02, -1.944e-02, 2.745e-02, 1.188e-01, 2.311e-02, 3.494e-02, -9.034e-02, 5.060e-02, -5.944e-02, -3.373e-03, 2.270e-02)); + r += mul(s7_8, M4(2.340e-01, -2.407e-02, 1.864e-01, -1.941e-01, -4.516e-02, 9.371e-02, -2.157e-02, -3.420e-03, 9.046e-02, -4.400e-02, -1.742e-02, -7.749e-02, 1.728e-01, 4.933e-02, -3.722e-02, 3.021e-02)); + r += V4(3.286e-02, 5.771e-03, -2.598e-02, -1.940e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(6.007e-02, 8.815e-02, 8.787e-02, 6.673e-02, 4.531e-02, -1.136e-02, -6.746e-02, 8.082e-03, 6.723e-02, 6.495e-03, -3.572e-03, -8.810e-02, -2.639e-02, 5.251e-02, -3.715e-02, -1.949e-01)); + r += mul(s0_1, M4(-5.738e-03, 9.408e-02, 6.908e-02, 7.054e-03, 9.352e-03, -2.742e-03, -3.428e-02, -1.319e-03, 1.029e-01, 2.080e-02, -2.673e-02, 2.034e-01, 7.456e-02, 7.538e-02, -2.518e-02, -1.477e-01)); + r += mul(s0_2, M4(4.295e-02, 9.563e-02, 8.488e-02, 4.426e-02, -3.528e-02, 1.398e-01, 1.889e-02, 3.828e-02, -4.458e-02, -1.099e-01, -2.168e-01, 1.081e-01, -5.195e-02, 1.955e-02, -8.919e-02, -1.085e-01)); + r += mul(s0_3, M4(-2.999e-02, -3.070e-02, 4.703e-02, 6.359e-02, -1.680e-01, 7.294e-02, -1.817e-03, -9.498e-02, -1.203e-01, 9.638e-02, 2.234e-02, 1.904e-01, -6.926e-02, 7.531e-03, 7.118e-02, -1.135e-01)); + r += mul(s0_4, M4(-1.197e-01, 1.162e-01, -5.468e-02, -6.775e-02, -2.126e-03, 5.122e-02, 1.378e-01, 3.868e-02, -6.028e-02, -2.421e-02, -1.370e-02, 2.099e-01, 2.690e-02, 1.245e-01, -3.967e-02, 7.315e-02)); + r += mul(s0_5, M4(-1.513e-01, 1.097e-02, 8.977e-02, 2.209e-02, -6.021e-02, -3.080e-02, -1.322e-01, -7.357e-02, -2.849e-02, 6.843e-02, 6.890e-02, -2.563e-02, -8.341e-02, 1.888e-01, 3.539e-02, 1.158e-01)); + r += mul(s0_6, M4(6.849e-03, -6.993e-02, 2.321e-02, -1.048e-01, 8.459e-02, 4.537e-03, 8.299e-02, -2.428e-01, 1.755e-02, 5.857e-02, 2.179e-02, -1.344e-01, -9.762e-03, -3.408e-04, 1.007e-01, 2.005e-01)); + r += mul(s0_7, M4(-2.394e-02, -3.589e-02, -2.741e-02, -9.901e-03, 1.151e-01, -8.218e-02, 5.715e-02, 2.303e-03, -7.653e-02, -2.701e-03, -1.149e-02, -2.013e-01, -3.567e-02, -6.143e-02, -1.063e-02, -2.110e-02)); + r += mul(s0_8, M4(-1.736e-02, 5.056e-02, 3.740e-02, 9.058e-02, -9.973e-02, 3.741e-02, -6.764e-02, 3.525e-02, 1.265e-01, 3.735e-02, -1.814e-01, -1.235e-01, 4.104e-02, 1.338e-01, -1.512e-02, 1.449e-02)); + r += mul(s1_0, M4(-7.408e-02, 4.688e-02, -2.960e-01, 1.677e-01, -3.196e-02, -2.403e-02, -5.473e-02, 5.301e-02, -7.272e-02, 1.548e-03, -6.738e-02, -7.785e-02, -6.000e-02, 5.336e-02, 4.060e-02, 1.319e-01)); + r += mul(s1_1, M4(-1.255e-01, 5.550e-02, -2.639e-01, -1.696e-01, -1.958e-02, 5.529e-02, -1.100e-01, 9.454e-02, 9.261e-02, -7.395e-02, 5.238e-02, -2.436e-02, 5.194e-02, 7.147e-02, 9.864e-02, 6.991e-02)); + r += mul(s1_2, M4(-1.810e-01, -5.793e-02, -5.093e-02, 8.727e-02, -7.876e-02, -9.299e-02, -6.713e-02, -2.632e-02, 1.340e-02, -4.129e-02, -1.086e-02, 7.617e-02, -1.054e-02, 9.318e-03, -1.402e-01, -3.486e-02)); + r += mul(s1_3, M4(-3.670e-02, -5.741e-02, -4.090e-02, -9.587e-02, -7.562e-02, 1.266e-01, 1.659e-02, -6.550e-03, -2.260e-02, -1.221e-02, 3.792e-02, -1.947e-04, -1.043e-02, -1.396e-02, -1.095e-02, -2.223e-03)); + r += mul(s1_4, M4(4.028e-02, 3.228e-02, -1.232e-02, -8.317e-02, 5.324e-03, 1.219e-01, -7.543e-02, 1.053e-02, -3.967e-02, 1.101e-02, -6.909e-02, -1.647e-01, 5.278e-02, -9.361e-02, -1.093e-02, 2.990e-02)); + r += mul(s1_5, M4(3.109e-02, 6.628e-02, -3.311e-02, -1.217e-01, 1.338e-01, 1.042e-01, 2.909e-02, 3.355e-02, 8.824e-02, -1.579e-01, 7.428e-02, -1.650e-01, 8.633e-02, 2.387e-02, -4.965e-02, -7.690e-02)); + r += mul(s1_6, M4(-1.741e-02, 6.553e-02, -9.866e-02, 9.365e-02, 7.774e-02, -5.941e-02, 8.108e-02, -1.098e-01, -2.092e-02, -2.057e-02, 2.507e-02, -1.817e-01, 5.481e-02, 1.193e-02, -1.171e-02, -1.035e-01)); + r += mul(s1_7, M4(4.520e-02, 5.614e-02, -1.438e-01, 1.483e-02, -8.121e-02, 4.987e-02, -1.596e-02, 4.690e-02, -9.684e-02, 2.356e-02, -5.632e-02, -2.871e-02, 2.827e-03, 3.753e-03, 1.059e-01, -4.852e-02)); + r += mul(s1_8, M4(-1.003e-01, 1.382e-01, -7.615e-02, -1.036e-02, -8.650e-03, -9.770e-02, 6.611e-03, 1.062e-01, 4.865e-02, -1.524e-02, 6.637e-02, 4.955e-02, 5.024e-03, 1.599e-02, -2.878e-02, 1.989e-01)); + r += mul(s2_0, M4(5.293e-03, 1.021e-01, -1.295e-01, 1.045e-01, -7.228e-02, 1.200e-02, -1.228e-01, 3.596e-02, -9.114e-02, -4.430e-02, 8.587e-02, -1.528e-01, -2.979e-02, 1.066e-01, -3.788e-02, 1.052e-01)); + r += mul(s2_1, M4(2.227e-02, 4.026e-02, 6.702e-03, -1.372e-01, -8.303e-02, 2.734e-02, 7.988e-02, 1.928e-01, -1.743e-02, 9.606e-02, -2.045e-02, -5.722e-02, -8.917e-02, 1.423e-01, -8.891e-02, -2.011e-02)); + r += mul(s2_2, M4(-8.187e-02, -4.063e-02, -4.310e-02, 2.248e-02, 4.184e-02, -9.449e-02, 3.206e-02, 1.547e-01, -4.531e-02, -4.194e-03, 1.721e-02, 9.277e-02, -4.491e-02, -9.397e-03, 3.181e-02, 4.109e-02)); + r += mul(s2_3, M4(-7.735e-02, 1.386e-01, -1.647e-02, -1.481e-01, -1.363e-01, 7.268e-02, 2.435e-01, -1.078e-01, 2.014e-02, 1.318e-01, 7.131e-02, -3.314e-02, -1.665e-02, -4.534e-02, 7.938e-02, -1.114e-01)); + r += mul(s2_4, M4(-6.867e-02, 8.030e-02, 1.372e-02, 1.409e-01, 2.672e-02, -1.591e-01, -7.935e-02, 1.166e-01, -7.796e-03, -1.351e-01, -3.978e-02, -1.234e-02, -1.253e-01, -1.503e-01, 1.827e-01, 1.450e-02)); + r += mul(s2_5, M4(1.487e-02, 1.623e-01, 1.066e-01, 5.026e-02, 1.958e-01, 1.400e-01, 6.793e-02, 2.143e-01, -1.417e-01, -7.352e-02, 1.017e-01, -8.242e-02, -2.239e-02, 2.857e-02, -1.091e-01, -1.143e-01)); + r += mul(s2_6, M4(-1.680e-01, 4.522e-02, -1.074e-01, 3.068e-02, -3.510e-02, -3.975e-02, 8.872e-03, -1.323e-02, 8.092e-03, 1.302e-01, 3.036e-02, -1.005e-01, -5.281e-02, -5.411e-05, 7.608e-02, -5.882e-02)); + r += mul(s2_7, M4(-8.128e-02, 5.894e-02, 5.202e-02, -2.121e-01, -2.758e-03, 1.381e-01, 3.100e-02, 1.143e-01, 6.719e-02, 1.264e-02, 1.298e-01, -5.056e-02, -1.030e-01, -2.655e-02, -5.240e-02, 9.107e-02)); + r += mul(s2_8, M4(-1.098e-01, 7.137e-02, 1.447e-02, 1.095e-01, -1.075e-01, -7.541e-02, -2.493e-02, 5.744e-02, -2.321e-02, -4.715e-02, -1.222e-01, -1.038e-02, 4.308e-02, 1.199e-02, -2.390e-02, 8.004e-02)); + r += mul(s3_0, M4(1.514e-02, 6.142e-02, -1.679e-02, 1.421e-01, -4.314e-02, -3.753e-03, -4.478e-02, 5.450e-02, 4.830e-02, 1.841e-02, 8.349e-03, -7.602e-02, -4.918e-02, 6.954e-02, 3.566e-04, -1.164e-01)); + r += mul(s3_1, M4(3.458e-02, -2.655e-02, 2.074e-02, -6.764e-03, -4.779e-03, -1.050e-01, -1.285e-01, -3.547e-02, 6.100e-02, -1.033e-01, -1.220e-01, -2.488e-02, -1.735e-01, -3.269e-02, -2.998e-03, -7.536e-02)); + r += mul(s3_2, M4(-2.253e-02, -7.574e-02, -8.361e-03, -1.796e-01, -5.300e-03, 3.493e-02, -6.129e-03, 1.754e-02, -1.580e-01, -1.331e-01, -6.234e-02, 1.658e-01, -1.436e-01, -1.024e-01, 7.786e-02, 7.474e-02)); + r += mul(s3_3, M4(4.098e-02, 2.356e-03, -1.799e-01, 3.367e-02, -4.516e-02, -2.829e-02, 8.494e-02, 1.194e-01, 3.378e-02, 3.071e-02, 4.195e-02, 6.000e-02, -1.675e-01, 6.617e-02, 1.058e-01, -8.855e-02)); + r += mul(s3_4, M4(3.099e-02, 9.172e-02, -1.744e-01, 4.121e-02, -1.416e-01, -3.103e-02, 8.377e-04, 1.971e-02, 1.443e-01, -9.228e-02, -2.827e-02, 6.870e-03, -1.860e-01, -1.150e-01, 2.765e-01, -1.556e-02)); + r += mul(s3_5, M4(-4.603e-02, 4.315e-02, 1.547e-03, -2.338e-01, -1.147e-01, 6.940e-02, 2.080e-02, 5.854e-02, -7.289e-02, 2.040e-02, -5.947e-02, 5.971e-02, -2.226e-01, -1.241e-02, -1.104e-01, -9.166e-02)); + r += mul(s3_6, M4(2.643e-02, 6.164e-02, 8.340e-02, -1.133e-01, 1.595e-01, -3.999e-02, 2.242e-02, -1.665e-01, 2.607e-02, 3.441e-02, -1.012e-01, 1.000e-01, 1.596e-03, -2.365e-02, 7.891e-02, -5.897e-02)); + r += mul(s3_7, M4(3.983e-02, -2.678e-02, -3.517e-02, 1.224e-01, 1.005e-01, 1.754e-01, 7.284e-03, 3.945e-04, 8.451e-02, 4.457e-03, -9.681e-03, 4.125e-02, -4.995e-02, 4.584e-02, 1.199e-01, 7.346e-02)); + r += mul(s3_8, M4(5.846e-02, -2.888e-02, -1.479e-02, -8.501e-02, 6.400e-02, -1.157e-01, 2.165e-03, 1.854e-02, -3.113e-02, -2.208e-02, -7.875e-02, 5.719e-02, 1.033e-01, 1.480e-01, -1.909e-01, -5.007e-02)); + r += mul(s4_0, M4(-5.335e-03, -8.372e-02, -3.783e-02, -6.308e-02, -2.173e-01, 4.498e-03, -1.480e-01, -5.785e-02, -7.680e-02, 3.288e-02, -5.400e-02, 3.452e-03, 9.755e-03, -5.644e-02, 3.565e-02, 3.149e-02)); + r += mul(s4_1, M4(-8.715e-03, -5.051e-02, -1.442e-01, 4.613e-02, -8.669e-02, -6.579e-02, 9.057e-02, 1.023e-01, -7.638e-02, -1.862e-02, 6.867e-02, 1.036e-01, 3.587e-02, -1.297e-02, -1.062e-01, -8.614e-02)); + r += mul(s4_2, M4(-1.108e-01, -7.149e-02, 6.670e-02, 3.969e-02, 6.626e-02, 2.668e-02, -7.490e-03, 1.006e-02, -6.542e-03, -3.122e-02, -6.992e-02, 9.288e-02, -9.591e-02, -2.018e-02, -3.362e-02, 5.792e-02)); + r += mul(s4_3, M4(4.713e-02, 1.357e-02, -9.522e-03, -1.636e-01, 3.770e-02, -4.667e-02, 3.558e-02, 2.709e-02, -5.774e-02, -5.880e-02, -7.833e-02, -1.101e-01, 4.765e-02, 1.874e-02, 1.798e-02, -3.002e-02)); + r += mul(s4_4, M4(-1.328e-01, -2.495e-01, 4.802e-01, -2.588e-01, -2.041e-02, 1.240e-03, 6.136e-02, -1.834e-01, 1.875e-02, 8.841e-02, 4.815e-02, 5.687e-02, 4.647e-02, 3.939e-02, 1.162e-01, -6.678e-02)); + r += mul(s4_5, M4(-1.451e-01, -1.558e-01, -1.371e-01, 1.189e-01, 5.499e-02, 1.002e-01, 9.618e-02, -7.548e-03, 4.639e-02, 3.723e-02, 5.066e-02, -1.086e-02, -1.201e-01, -9.438e-02, -1.117e-01, 1.166e-01)); + r += mul(s4_6, M4(-2.174e-02, -3.346e-02, -7.233e-05, 2.218e-02, 5.312e-02, -5.567e-02, 2.738e-02, 1.525e-01, -7.337e-03, 8.831e-03, -1.704e-02, -3.560e-02, 2.228e-02, -7.597e-02, 8.787e-02, 5.019e-02)); + r += mul(s4_7, M4(1.841e-01, -3.949e-02, -3.770e-01, -1.809e-02, 4.194e-02, 9.110e-02, 1.813e-02, 6.084e-02, 8.008e-02, -4.040e-02, -1.450e-01, -1.446e-01, 1.486e-01, 1.302e-01, 1.444e-02, -1.374e-01)); + r += mul(s4_8, M4(-4.637e-02, 3.020e-01, 7.106e-02, 3.649e-01, 3.670e-02, 9.764e-02, -5.934e-02, -2.662e-02, -2.775e-02, 4.676e-02, 4.966e-03, -1.345e-01, -6.302e-02, 1.107e-01, -5.849e-03, -6.808e-02)); + r += mul(s5_0, M4(8.359e-02, 5.374e-02, -4.313e-02, -2.436e-02, 3.929e-02, -1.240e-02, 1.682e-03, -1.782e-01, -1.870e-01, 5.488e-02, -1.758e-01, -2.389e-02, 1.188e-01, -1.904e-01, 1.247e-01, -1.392e-02)); + r += mul(s5_1, M4(-7.821e-02, -9.252e-02, 5.281e-02, -8.872e-02, -1.075e-01, -1.824e-01, -2.427e-01, -8.470e-03, -1.865e-01, 1.573e-01, -8.853e-02, -6.354e-02, 1.242e-01, 3.494e-01, 6.722e-02, 9.009e-02)); + r += mul(s5_2, M4(1.812e-02, -2.485e-02, 4.002e-02, -5.939e-02, 3.988e-01, 2.575e-01, -3.490e-02, -3.557e-02, -2.122e-01, -7.069e-02, -1.837e-01, -1.297e-01, -6.653e-03, 7.863e-02, 1.937e-01, -3.095e-02)); + r += mul(s5_3, M4(1.034e-01, 4.052e-02, 7.462e-02, 8.144e-02, 1.522e-02, 1.363e-01, 1.426e-01, -1.123e-01, -6.815e-02, 1.544e-01, 2.515e-01, 2.701e-01, 7.650e-02, 9.157e-02, 7.519e-02, 1.269e-01)); + r += mul(s5_4, M4(-3.329e-02, -7.946e-02, 2.374e-02, -3.992e-03, 1.988e-01, -1.010e-01, -2.744e-02, -2.057e-01, 7.306e-02, 6.289e-02, 2.636e-01, 1.719e-01, 2.019e-01, 2.024e-01, 1.578e-03, -7.458e-02)); + r += mul(s5_5, M4(2.503e-02, 2.133e-02, -9.472e-02, 7.377e-02, -1.815e-01, 1.723e-01, 2.228e-01, 5.270e-03, -8.618e-02, -1.739e-01, 4.445e-01, -8.829e-02, -6.907e-02, -3.994e-01, -2.503e-02, -3.677e-02)); + r += mul(s5_6, M4(-5.551e-03, 1.916e-02, 1.607e-02, 8.480e-02, 1.256e-01, 1.284e-03, 2.887e-02, 7.081e-02, -3.412e-02, 5.326e-02, 1.446e-01, 2.570e-02, 4.649e-02, -5.663e-02, -5.735e-02, 1.437e-01)); + r += mul(s5_7, M4(-5.587e-02, -2.545e-02, -7.304e-02, -2.056e-01, -3.248e-01, 1.037e-01, -8.052e-02, 3.424e-01, -2.857e-01, 1.753e-01, 2.171e-02, 5.623e-01, 1.190e-01, 2.630e-03, -2.992e-02, -1.056e-01)); + r += mul(s5_8, M4(4.104e-03, 3.006e-02, 6.708e-02, 5.476e-02, 1.159e-01, -1.890e-01, 9.861e-02, -6.230e-01, 1.115e-01, 2.542e-01, 2.328e-01, -2.674e-01, -3.685e-02, 5.212e-02, -5.101e-02, 2.267e-02)); + r += mul(s6_0, M4(1.097e-01, -3.157e-02, -2.447e-02, -6.866e-03, -5.199e-02, -1.318e-01, -5.015e-03, 1.001e-01, -3.609e-02, -1.647e-01, -3.436e-01, -1.239e-01, 1.174e-01, 5.202e-02, 5.041e-03, 1.765e-01)); + r += mul(s6_1, M4(1.075e-01, 5.891e-02, -1.506e-01, 1.390e-02, 9.483e-03, 3.482e-02, -7.002e-02, 3.417e-02, -1.864e-01, -1.731e-01, 1.920e-01, -3.235e-01, 1.519e-02, 2.306e-01, -1.803e-02, -1.142e-01)); + r += mul(s6_2, M4(3.703e-02, 1.002e-01, -9.431e-02, 1.172e-01, 1.087e-01, 1.026e-01, -5.506e-02, -2.139e-02, 5.829e-02, -1.291e-01, -1.210e-01, 1.784e-02, 1.006e-01, -5.195e-02, 1.410e-01, 2.518e-01)); + r += mul(s6_3, M4(-6.842e-02, -3.220e-02, -2.200e-02, 1.114e-01, -9.742e-02, 7.600e-03, -1.156e-01, 5.088e-02, -1.513e-01, 8.029e-03, -1.525e-01, 2.514e-01, 7.538e-02, 5.196e-02, 1.078e-01, 1.843e-01)); + r += mul(s6_4, M4(-1.403e-01, -6.977e-02, 6.207e-02, 3.357e-02, 4.830e-03, 1.300e-01, 3.562e-02, 3.321e-01, -4.602e-01, -1.534e-02, 3.213e-01, 2.686e-01, -3.848e-02, 4.393e-02, 2.456e-01, 6.241e-02)); + r += mul(s6_5, M4(6.375e-02, 5.588e-02, 9.716e-02, 1.277e-01, -9.360e-02, 4.105e-02, 3.492e-02, -1.612e-01, -6.545e-02, -8.377e-02, -2.876e-02, -2.144e-01, 6.335e-03, 1.984e-02, 6.474e-02, -6.783e-02)); + r += mul(s6_6, M4(6.793e-02, -9.859e-02, 3.992e-02, -1.174e-01, 1.460e-01, -9.544e-03, -7.754e-02, -7.563e-02, -1.802e-02, 1.469e-01, 1.574e-01, 9.190e-02, 7.771e-02, -5.878e-02, -1.506e-01, 1.035e-01)); + r += mul(s6_7, M4(6.509e-02, -4.260e-02, 3.931e-02, -1.217e-01, -5.386e-02, -1.153e-01, -7.771e-02, -9.431e-02, 3.619e-02, -1.283e-01, -1.441e-01, -9.697e-02, -1.190e-01, -6.006e-02, -2.354e-01, -2.855e-02)); + r += mul(s6_8, M4(-2.388e-01, -1.365e-03, 6.138e-03, 6.787e-02, 9.083e-02, 1.217e-02, 4.726e-02, 7.028e-02, -7.880e-02, -1.273e-02, 2.881e-02, -1.127e-01, 6.820e-02, 8.305e-02, -4.687e-02, 5.309e-02)); + r += mul(s7_0, M4(6.293e-02, 1.524e-01, 2.426e-03, 1.345e-01, -1.927e-03, 2.500e-02, -2.269e-02, -1.720e-02, -5.706e-02, -4.078e-02, -1.441e-02, -1.935e-02, -2.046e-02, 1.077e-01, 2.896e-02, -1.657e-02)); + r += mul(s7_1, M4(-1.546e-01, 6.167e-02, -2.003e-01, 5.352e-03, -1.040e-02, 1.479e-02, -1.393e-02, 6.118e-02, 1.881e-01, -1.391e-01, 2.568e-02, 5.596e-02, 2.507e-02, -8.752e-02, -5.587e-02, 8.935e-02)); + r += mul(s7_2, M4(4.904e-02, -5.495e-03, 3.301e-02, -1.341e-01, -1.041e-01, 3.251e-02, 3.666e-02, 2.863e-02, 8.393e-02, -6.058e-02, -3.575e-02, -1.365e-01, 7.400e-02, -2.014e-01, 7.808e-02, -5.946e-02)); + r += mul(s7_3, M4(9.064e-02, 1.583e-01, -5.479e-02, -1.758e-01, -1.038e-01, 2.272e-02, 4.570e-02, 1.087e-01, 2.277e-03, 7.368e-02, -1.043e-01, -8.020e-02, -8.471e-02, -2.488e-02, 6.476e-02, -5.082e-02)); + r += mul(s7_4, M4(-3.333e-02, 1.693e-01, 5.872e-02, -1.505e-01, 2.054e-02, 4.561e-02, 7.449e-02, 6.552e-02, -4.158e-02, -9.157e-02, 3.637e-02, -1.193e-01, -1.161e-01, 5.117e-02, 7.775e-03, -6.346e-02)); + r += mul(s7_5, M4(4.869e-03, 4.115e-02, -1.571e-02, 1.777e-01, -2.857e-02, 6.103e-02, -7.811e-02, -5.197e-02, 4.437e-02, -4.254e-02, 1.840e-02, 2.180e-03, 7.496e-02, -3.691e-02, -8.768e-02, -1.503e-02)); + r += mul(s7_6, M4(1.570e-01, 1.453e-01, 8.288e-02, -1.437e-01, 5.945e-02, -3.665e-02, -4.857e-02, 4.723e-02, -6.723e-02, 2.362e-02, 5.484e-04, 1.404e-01, 4.498e-02, -4.389e-02, 3.433e-02, -8.667e-02)); + r += mul(s7_7, M4(7.684e-03, 9.443e-02, 1.095e-01, -5.099e-03, -3.467e-02, -4.402e-02, 7.357e-02, -2.241e-01, 1.074e-01, 4.893e-02, -1.257e-02, 1.393e-01, -2.778e-02, -1.809e-02, -5.013e-02, 4.069e-02)); + r += mul(s7_8, M4(-1.282e-01, -1.216e-01, -1.371e-01, -1.585e-01, 5.509e-02, 4.474e-02, -1.432e-01, -8.277e-02, -7.159e-02, 7.818e-02, 4.878e-02, -2.018e-02, 6.765e-02, 2.236e-02, -5.455e-02, 5.913e-02)); + r += V4(1.868e-01, -7.070e-04, 1.093e-03, 1.982e-02); + return r; +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 6 +//!DESC conv5 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.593e-01, -1.449e-01, 3.714e-01, 1.405e-01, -4.623e-02, 5.640e-02, -1.160e-03, -9.562e-02, -2.084e-01, 6.325e-02, 9.785e-02, -1.426e-01, -1.095e-01, 8.190e-02, 2.229e-02, -7.590e-02)); + r += mul(s0_1, M4(-7.741e-03, -8.691e-02, -7.332e-02, 2.748e-02, 4.440e-02, -5.428e-02, 2.901e-02, -1.183e-01, 9.970e-02, 2.708e-01, 1.508e-01, 7.405e-02, 4.042e-02, -1.176e-01, -2.240e-02, 1.138e-02)); + r += mul(s0_2, M4(-3.340e-01, 3.690e-02, -1.333e-01, 1.089e-02, 1.071e-01, 1.043e-01, -2.977e-02, 1.843e-02, -1.331e-01, 2.865e-02, 1.945e-02, -2.371e-01, 9.608e-02, -3.186e-02, -3.169e-02, 7.562e-03)); + r += mul(s0_3, M4(-1.696e-01, 5.110e-02, -1.223e-01, 3.763e-01, 6.949e-02, -1.285e-01, 5.979e-02, -4.475e-02, -3.222e-02, 2.133e-02, 3.789e-02, 7.654e-02, -8.666e-02, 1.017e-01, -6.865e-02, 1.487e-01)); + r += mul(s0_4, M4(-3.830e-02, -3.887e-02, 3.243e-01, 1.188e-01, 8.237e-02, -2.333e-01, 4.899e-02, 5.874e-02, -1.512e-01, 3.577e-01, -6.326e-02, -5.181e-01, -9.063e-02, 1.055e-01, -1.263e-02, -6.279e-02)); + r += mul(s0_5, M4(5.145e-02, -5.159e-02, 2.208e-01, -2.513e-02, 2.955e-01, 5.265e-02, -1.501e-01, -1.800e-02, -8.289e-02, -6.283e-02, 2.911e-01, -6.234e-02, 1.212e-01, -1.165e-01, 1.745e-01, -7.912e-02)); + r += mul(s0_6, M4(-7.277e-03, 5.578e-02, 6.717e-02, -5.013e-02, -2.340e-02, 6.085e-04, -5.489e-02, 8.570e-02, -1.369e-01, 6.509e-02, 1.394e-01, 1.283e-01, -7.018e-02, -7.364e-02, -1.514e-01, 5.337e-02)); + r += mul(s0_7, M4(2.544e-01, 1.554e-02, 2.218e-02, 6.236e-02, 2.742e-03, 7.685e-02, 1.068e-01, -1.142e-01, -6.585e-02, -1.644e-02, 2.755e-01, 2.080e-01, -1.312e-01, -9.234e-03, 1.023e-01, -6.273e-03)); + r += mul(s0_8, M4(-1.956e-01, -1.169e-01, 1.682e-01, -2.850e-02, 7.649e-02, 2.673e-02, -7.830e-02, -3.505e-02, -7.021e-02, -6.493e-03, -1.337e-03, 1.508e-01, 3.269e-02, -8.421e-02, -7.092e-03, 1.377e-02)); + r += mul(s1_0, M4(4.571e-02, 1.489e-02, 5.416e-02, 1.011e-01, -1.022e-01, 4.644e-02, -6.923e-03, -8.190e-03, -8.223e-03, -7.293e-02, 2.387e-02, 2.188e-02, -1.679e-01, 7.147e-02, 8.029e-02, 2.890e-02)); + r += mul(s1_1, M4(-1.266e-01, 2.499e-02, 9.610e-02, 3.170e-02, -4.388e-03, -8.444e-03, -1.798e-01, -2.290e-01, 9.415e-02, -2.611e-02, 3.209e-02, 2.049e-02, 2.385e-01, -1.341e-01, 2.982e-01, -3.020e-01)); + r += mul(s1_2, M4(3.859e-02, 1.301e-02, -5.288e-02, -5.663e-03, -4.897e-02, -7.929e-02, -3.821e-03, 3.302e-02, -4.841e-02, 8.560e-02, -1.186e-01, -1.017e-01, -1.501e-01, -2.028e-01, 6.390e-02, -1.024e-01)); + r += mul(s1_3, M4(-6.727e-02, 7.422e-03, -1.856e-01, -7.918e-02, -2.854e-02, -8.520e-02, -2.669e-02, 5.885e-02, -1.794e-02, 8.124e-02, 1.336e-02, 1.901e-01, 1.303e-01, -1.194e-02, 2.400e-01, 2.973e-01)); + r += mul(s1_4, M4(1.843e-01, 2.696e-02, -8.015e-02, 2.126e-02, -1.040e-02, -1.869e-01, 9.212e-02, 1.331e-01, -7.399e-02, 1.301e-01, 2.411e-02, -7.758e-02, -2.738e-01, -1.194e-01, -3.516e-01, -1.917e-01)); + r += mul(s1_5, M4(1.323e-02, 2.475e-03, -5.999e-02, 2.355e-02, 1.032e-01, 1.678e-01, -1.085e-02, 3.518e-02, -1.019e-03, -7.839e-02, 8.874e-02, 8.545e-02, 4.608e-01, -1.934e-01, 3.594e-01, 9.396e-02)); + r += mul(s1_6, M4(1.144e-02, -1.090e-02, -4.158e-02, -1.052e-01, -2.696e-02, -1.749e-02, -3.617e-02, -7.374e-03, 4.635e-02, -2.878e-02, -5.198e-02, -3.870e-02, -2.749e-01, -1.257e-01, -1.258e-01, -1.133e-01)); + r += mul(s1_7, M4(9.177e-02, -5.037e-02, -6.891e-02, 3.292e-02, 5.608e-02, 6.303e-02, -1.957e-03, 7.045e-02, -7.113e-02, -1.582e-02, -5.409e-02, -1.036e-01, -1.992e-01, -1.431e-01, 2.032e-01, -2.459e-01)); + r += mul(s1_8, M4(-8.794e-02, -6.965e-02, 1.735e-01, -1.279e-01, -1.199e-01, 6.993e-02, -2.056e-02, 1.078e-01, -5.771e-02, 8.518e-02, 1.168e-03, 4.438e-03, -3.108e-02, -9.562e-02, -3.943e-01, -3.110e-02)); + r += mul(s2_0, M4(1.800e-02, 9.367e-03, 3.256e-02, 4.693e-02, 8.038e-02, 5.966e-02, -2.208e-02, -3.095e-02, -6.173e-03, -1.560e-02, 8.999e-02, 3.326e-03, -6.089e-02, -2.851e-02, -1.533e-01, -1.779e-02)); + r += mul(s2_1, M4(-9.123e-02, 1.396e-01, -9.372e-02, -3.482e-03, 2.263e-01, -8.542e-02, -8.919e-02, 6.860e-02, 7.627e-04, 4.359e-03, -6.620e-02, -7.796e-02, 5.674e-02, 4.389e-02, 2.222e-01, -1.597e-01)); + r += mul(s2_2, M4(-3.900e-03, -1.853e-02, 8.436e-02, 1.238e-01, 1.459e-01, -3.454e-02, 5.319e-03, -1.830e-02, 4.723e-02, 8.957e-02, -7.618e-02, 2.639e-02, -5.243e-02, 8.689e-02, -6.727e-02, 2.487e-02)); + r += mul(s2_3, M4(3.173e-02, -1.429e-01, -1.436e-01, -1.011e-01, 1.308e-01, 2.880e-02, 5.304e-02, -1.223e-01, -8.882e-02, -2.668e-02, -1.300e-01, -1.422e-01, -7.629e-02, -5.281e-02, 1.080e-01, -2.089e-01)); + r += mul(s2_4, M4(3.714e-02, 4.249e-02, 1.392e-01, -4.845e-02, 1.593e-01, 2.331e-02, -6.485e-02, 7.681e-02, -1.677e-01, -1.248e-01, 1.185e-01, 5.576e-03, -2.337e-02, -6.052e-02, -1.761e-01, 1.118e-01)); + r += mul(s2_5, M4(9.538e-02, -2.208e-02, -5.149e-02, 1.184e-02, -7.266e-02, -7.028e-02, 1.615e-01, -7.057e-02, 5.134e-02, -9.027e-02, -1.213e-01, -8.401e-03, 4.649e-02, -3.823e-02, 1.478e-01, 7.446e-02)); + r += mul(s2_6, M4(5.187e-02, -3.907e-02, 1.271e-01, -1.752e-02, -6.796e-02, 1.812e-03, -6.672e-02, 7.822e-02, -6.372e-02, -5.372e-02, -1.701e-02, 3.405e-02, -8.316e-02, -5.269e-02, -1.587e-02, -5.865e-02)); + r += mul(s2_7, M4(3.859e-02, -2.469e-02, 4.657e-03, -7.371e-02, -5.297e-02, 1.771e-02, 5.469e-02, 6.538e-02, 5.576e-02, 7.541e-02, -1.886e-01, -3.162e-02, 1.101e-01, 1.082e-01, 1.659e-01, 6.491e-02)); + r += mul(s2_8, M4(1.356e-01, -2.891e-02, -1.754e-02, 1.395e-02, 9.832e-03, -1.633e-02, 9.524e-03, -6.391e-02, 1.459e-01, -2.841e-02, 2.557e-01, -1.733e-02, 4.526e-03, -1.524e-02, -1.301e-01, 9.234e-02)); + r += mul(s3_0, M4(1.686e-01, -7.967e-03, -5.276e-02, -6.824e-02, -1.409e-01, -6.803e-03, -2.278e-02, 2.534e-01, -1.090e-01, 1.592e-01, -1.918e-02, 1.525e-01, -9.105e-02, -1.546e-01, -1.817e-01, 1.213e-01)); + r += mul(s3_1, M4(1.456e-01, 9.600e-03, -7.036e-02, 3.050e-02, 7.232e-02, -2.325e-02, 1.812e-02, 5.203e-03, -1.446e-01, 5.967e-02, -6.930e-02, 2.963e-02, -1.145e-01, 1.560e-01, 8.045e-03, -6.323e-02)); + r += mul(s3_2, M4(-1.978e-01, 7.857e-02, 4.272e-02, 1.615e-01, -2.981e-02, 4.013e-02, -1.305e-01, -4.224e-02, -1.542e-01, 1.689e-02, 1.822e-01, 4.598e-02, -1.028e-01, 3.473e-02, -1.626e-01, -6.575e-02)); + r += mul(s3_3, M4(1.631e-02, -6.029e-02, -6.514e-02, 5.342e-02, -2.351e-01, 4.921e-02, 1.373e-02, 5.400e-03, 1.064e-01, 8.515e-02, 8.165e-02, 9.797e-02, -2.146e-01, -1.766e-01, -2.541e-01, 2.036e-01)); + r += mul(s3_4, M4(-1.687e-01, -8.273e-02, 2.427e-01, 6.374e-02, 1.014e-02, 8.861e-02, -7.684e-02, -5.828e-04, -4.240e-02, -1.492e-01, 9.594e-02, 5.766e-02, 3.729e-02, -3.388e-02, 5.206e-02, 2.805e-01)); + r += mul(s3_5, M4(-2.261e-01, -2.347e-02, -1.408e-01, -6.628e-02, -7.631e-02, 5.802e-02, 1.586e-01, -9.652e-02, 1.278e-01, -1.849e-01, -3.748e-02, 1.138e-01, 8.441e-02, -7.556e-03, 9.137e-02, 4.942e-02)); + r += mul(s3_6, M4(-1.926e-01, -7.638e-02, 1.927e-01, -9.560e-03, -2.627e-03, 6.517e-02, 8.063e-03, 2.457e-01, 1.541e-02, -9.795e-02, 2.329e-01, 4.059e-02, -2.558e-01, 4.837e-02, 3.723e-01, 9.836e-02)); + r += mul(s3_7, M4(-8.015e-02, -2.217e-01, -9.742e-02, 3.284e-02, 4.386e-02, -1.525e-01, -1.135e-01, -3.457e-02, 1.242e-02, 1.271e-01, 1.722e-02, 6.478e-02, 3.261e-01, -9.475e-02, 3.055e-01, -2.176e-01)); + r += mul(s3_8, M4(-1.313e-02, 8.794e-03, 3.080e-02, -9.745e-02, -2.133e-02, -1.201e-03, 1.012e-01, -2.072e-03, -1.021e-02, 6.600e-02, -4.266e-02, 1.259e-01, -9.550e-02, 2.597e-02, 3.693e-03, -2.470e-01)); + r += mul(s4_0, M4(-6.060e-02, -1.058e-01, -2.090e-01, 2.018e-01, 1.870e-02, 7.160e-02, 4.896e-02, 1.460e-01, 9.698e-02, -1.006e-01, -3.931e-02, 7.669e-03, 6.799e-02, -7.640e-02, -1.680e-02, 4.238e-03)); + r += mul(s4_1, M4(-6.669e-02, 3.466e-01, 4.606e-01, -2.850e-01, -9.875e-02, 3.052e-02, -5.354e-02, 2.134e-02, -1.507e-02, 2.892e-02, 4.354e-02, -1.145e-01, 4.007e-02, -7.408e-02, 5.854e-02, -9.265e-02)); + r += mul(s4_2, M4(-2.933e-02, -2.052e-01, 1.255e-01, 7.623e-02, -3.320e-02, -7.672e-02, 5.793e-02, -3.595e-02, 1.820e-01, -1.765e-02, 6.252e-02, 1.768e-02, 2.279e-02, 2.221e-02, -3.028e-02, 1.992e-02)); + r += mul(s4_3, M4(-1.204e-01, 6.957e-03, 2.086e-01, 1.671e-01, 3.611e-02, -2.151e-02, -9.956e-02, 1.377e-01, 4.875e-02, -9.599e-02, -6.171e-02, 1.036e-01, -1.594e-04, 5.487e-02, -1.711e-02, -1.317e-02)); + r += mul(s4_4, M4(-2.026e-01, -1.324e-01, -2.880e-01, -3.087e-01, -7.400e-02, 1.247e-01, 3.866e-02, 3.537e-02, 1.193e-02, -1.004e-01, 1.116e-01, -6.525e-03, 2.387e-02, 1.488e-01, -1.725e-01, -6.723e-02)); + r += mul(s4_5, M4(-2.304e-01, 1.438e-02, 4.557e-02, -1.209e-01, -9.595e-02, 3.186e-02, 5.878e-02, 8.973e-02, -4.991e-02, -2.682e-02, 7.052e-02, 4.856e-02, -2.973e-02, -1.402e-01, 1.139e-01, 7.068e-02)); + r += mul(s4_6, M4(-2.595e-01, -2.338e-01, -1.782e-02, -1.115e-01, 8.535e-03, -4.087e-03, -5.792e-02, -3.489e-02, -1.135e-02, 3.207e-02, 1.845e-01, 1.173e-01, 1.513e-02, 4.795e-02, -7.076e-02, -3.634e-02)); + r += mul(s4_7, M4(1.555e-01, -1.871e-01, 1.115e-02, -3.373e-01, 6.702e-02, 8.417e-02, -9.702e-03, -1.049e-02, 1.540e-01, -2.517e-02, -1.271e-01, 6.054e-02, -8.063e-02, -1.205e-02, -9.046e-03, -1.368e-02)); + r += mul(s4_8, M4(-1.995e-01, 4.294e-02, 1.778e-01, 7.162e-02, -9.654e-02, -6.709e-02, 4.075e-02, -2.107e-02, 9.859e-03, 7.697e-02, -3.300e-02, -8.696e-03, -1.934e-01, 2.050e-02, 2.544e-02, 4.246e-02)); + r += mul(s5_0, M4(1.797e-03, -1.507e-02, -8.254e-02, 1.316e-01, 4.059e-02, -2.017e-01, -1.458e-01, -2.957e-02, 7.678e-02, -6.134e-02, 2.763e-02, -8.168e-02, -1.657e-01, -8.973e-02, -9.496e-02, 8.552e-02)); + r += mul(s5_1, M4(-9.079e-03, -3.645e-02, 5.065e-02, -6.918e-02, -1.352e-01, 1.009e-01, 9.036e-03, -3.022e-02, -1.773e-02, -8.595e-04, 1.643e-01, 1.657e-01, 5.667e-02, 6.397e-02, -9.064e-02, -1.421e-01)); + r += mul(s5_2, M4(4.747e-02, 3.982e-02, 1.864e-03, 1.680e-02, 5.211e-04, 6.533e-02, -1.592e-01, 2.480e-02, -6.936e-02, -2.677e-02, -1.436e-01, 5.381e-03, -1.316e-01, -1.451e-02, -4.014e-02, 1.184e-01)); + r += mul(s5_3, M4(8.830e-02, 1.459e-02, 3.689e-02, 3.786e-02, 8.277e-02, 1.448e-02, -1.858e-01, 1.230e-01, -4.004e-03, 2.619e-02, -7.552e-02, 2.365e-01, -1.443e-02, 1.764e-01, -1.532e-02, 1.333e-01)); + r += mul(s5_4, M4(5.831e-03, 1.558e-02, -7.070e-02, -7.543e-03, -1.030e-01, 1.221e-02, 9.362e-03, -4.782e-02, -2.168e-01, 3.110e-02, -2.005e-01, -1.732e-01, -1.224e-01, 1.919e-01, -1.375e-01, -8.618e-02)); + r += mul(s5_5, M4(-7.694e-02, -6.734e-03, -5.183e-02, 8.451e-02, 1.593e-01, 4.950e-02, 9.033e-02, -1.817e-01, 5.260e-02, 1.583e-01, 1.939e-01, 5.432e-02, -3.731e-02, 1.443e-02, 4.282e-02, -7.614e-03)); + r += mul(s5_6, M4(5.421e-02, 3.801e-02, 7.990e-02, -5.669e-02, -3.197e-03, -5.307e-02, -1.686e-01, -2.793e-02, 7.266e-02, -3.287e-02, -1.096e-02, -7.894e-02, -6.755e-02, 9.185e-02, -7.624e-02, -1.063e-01)); + r += mul(s5_7, M4(-2.342e-03, 1.948e-02, -1.712e-01, 8.762e-02, 4.195e-02, 2.889e-02, 4.223e-02, 5.608e-02, -7.354e-02, -1.436e-02, 1.933e-02, -2.791e-02, 7.066e-02, -4.649e-02, 7.903e-02, -1.466e-01)); + r += mul(s5_8, M4(1.634e-02, 1.019e-02, 1.465e-01, -2.513e-02, 3.443e-02, 4.545e-02, -3.206e-02, -2.666e-02, -6.391e-02, 6.831e-03, -1.037e-01, 1.542e-01, 5.077e-02, 1.304e-01, 8.766e-02, -5.962e-02)); + r += mul(s6_0, M4(5.300e-02, 4.058e-02, 2.100e-02, -5.358e-02, 3.789e-02, 8.811e-02, 7.047e-02, 3.690e-02, -6.714e-02, 1.748e-02, -1.716e-02, 9.108e-02, 9.203e-02, -1.446e-02, 2.172e-02, 9.838e-03)); + r += mul(s6_1, M4(4.856e-02, 1.403e-02, 4.168e-04, 2.314e-02, 6.438e-02, -1.745e-01, -1.415e-02, 3.319e-02, -1.101e-01, 3.500e-02, 8.767e-03, -1.145e-02, 5.330e-02, -6.173e-02, 8.167e-02, -8.512e-02)); + r += mul(s6_2, M4(-1.802e-01, 3.493e-02, -4.916e-02, 5.625e-02, -1.352e-02, 6.816e-02, -6.138e-02, -4.388e-02, 4.193e-02, -1.397e-03, 5.147e-03, -1.394e-02, -1.075e-01, -8.984e-02, -7.328e-02, -3.107e-02)); + r += mul(s6_3, M4(-1.057e-01, -1.532e-01, -7.526e-02, -1.512e-02, -8.986e-03, 5.307e-02, -8.631e-02, -8.224e-02, 2.179e-03, 9.205e-02, -1.353e-02, 3.498e-03, 2.002e-02, -1.008e-01, -6.140e-02, -9.357e-02)); + r += mul(s6_4, M4(-1.763e-01, -8.677e-02, 1.356e-01, 7.325e-02, -9.521e-02, -2.784e-02, 9.741e-02, 4.460e-02, 1.058e-01, -4.458e-02, 2.446e-01, 1.419e-01, 4.335e-02, -7.998e-02, -1.716e-02, -1.886e-02)); + r += mul(s6_5, M4(1.377e-01, 2.626e-02, -8.382e-02, -8.873e-02, -7.494e-04, -1.027e-01, 3.345e-02, -5.583e-02, -1.120e-01, 1.128e-01, -1.429e-02, -1.038e-01, 6.440e-02, -1.174e-02, -9.148e-02, -9.121e-02)); + r += mul(s6_6, M4(-1.220e-02, 7.280e-04, 9.590e-02, -2.118e-02, -6.208e-02, -1.376e-02, 9.231e-03, -7.983e-02, 1.724e-01, 4.970e-02, 1.091e-02, 1.787e-02, -5.790e-02, 2.204e-02, 2.296e-02, -3.396e-02)); + r += mul(s6_7, M4(-6.873e-02, -6.783e-02, -5.188e-02, 1.125e-01, -7.886e-02, 5.728e-03, 1.074e-02, 2.625e-03, 4.340e-02, -1.089e-02, -4.658e-02, 5.693e-02, 2.289e-01, -1.841e-02, -3.196e-02, 1.033e-01)); + r += mul(s6_8, M4(1.020e-02, 2.300e-02, -1.233e-01, 1.133e-02, 5.054e-02, 5.580e-02, -3.536e-02, 8.082e-02, -7.543e-02, -6.158e-02, -7.700e-02, -4.762e-02, -1.917e-03, 8.030e-02, -5.618e-02, 1.056e-01)); + r += mul(s7_0, M4(-7.155e-02, 5.366e-02, -6.934e-02, -1.348e-01, -2.525e-01, -1.416e-01, 7.005e-02, -1.845e-01, -4.754e-02, -3.789e-02, 9.503e-03, -1.842e-01, 6.595e-02, 1.083e-01, 3.167e-02, 1.319e-01)); + r += mul(s7_1, M4(1.935e-01, -3.082e-02, -7.510e-02, -1.959e-01, 1.813e-01, -8.668e-02, -5.187e-02, 3.750e-01, 8.887e-02, 3.637e-02, 1.732e-01, -2.686e-01, -5.073e-02, 2.332e-01, 1.732e-01, -1.090e-01)); + r += mul(s7_2, M4(1.014e-01, 4.496e-02, -1.815e-01, 6.039e-02, -1.712e-02, 6.855e-02, 1.200e-01, -1.739e-02, 7.690e-02, -9.285e-02, 1.465e-01, -2.754e-01, -1.797e-02, -1.101e-01, 1.034e-01, 2.012e-01)); + r += mul(s7_3, M4(1.153e-01, -2.387e-02, -3.031e-01, 9.983e-02, 1.243e-01, -1.599e-01, 1.985e-02, -1.537e-02, -3.631e-02, -3.993e-02, -1.432e-01, 9.270e-02, -4.222e-02, -1.006e-01, -1.633e-01, 1.216e-01)); + r += mul(s7_4, M4(-2.473e-01, -1.253e-01, 5.134e-02, 5.411e-02, 4.270e-03, -2.251e-01, -2.865e-01, 2.612e-02, 1.701e-01, -2.289e-01, 2.527e-01, 2.396e-02, 9.859e-02, 1.566e-01, 5.256e-02, -7.774e-02)); + r += mul(s7_5, M4(3.443e-01, -5.217e-02, 4.137e-02, -1.492e-01, -1.627e-01, -4.718e-02, 9.112e-02, 3.721e-02, 1.699e-01, -1.953e-02, -2.286e-01, -4.415e-03, 9.506e-02, 8.536e-02, 2.329e-02, -1.689e-01)); + r += mul(s7_6, M4(1.489e-01, 9.847e-02, -2.211e-02, 7.476e-02, 2.856e-01, 1.026e-01, 4.397e-02, -1.761e-01, -3.738e-02, -3.216e-02, 5.437e-02, 9.685e-03, -1.461e-01, 1.146e-02, 7.179e-02, 8.610e-02)); + r += mul(s7_7, M4(2.367e-01, 2.734e-01, 6.172e-02, 2.847e-02, -7.795e-02, -1.774e-01, -4.325e-01, -2.145e-02, -1.804e-01, 3.168e-02, 1.158e-02, -1.556e-01, -1.721e-01, 2.168e-01, -7.359e-02, 9.723e-02)); + r += mul(s7_8, M4(1.146e-01, 2.635e-01, -1.467e-01, 7.112e-02, -1.555e-01, 2.464e-02, -5.305e-02, -1.821e-01, 7.086e-02, -2.014e-01, -1.698e-01, 1.242e-02, 1.664e-01, -2.012e-04, -2.096e-01, 1.916e-01)); + r += V4(3.398e-02, 8.938e-03, 1.353e-02, -8.796e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.375e-02, 2.798e-01, -1.470e-01, -5.311e-02, -8.125e-02, -7.627e-02, 1.194e-02, -5.468e-03, 1.475e-02, -3.906e-02, 1.189e-01, -2.163e-02, 3.681e-02, -1.008e-01, -8.270e-02, -1.379e-01)); + r += mul(s0_1, M4(-2.061e-01, -3.075e-01, -5.014e-02, 8.560e-03, -6.088e-02, 2.363e-03, -3.140e-02, -5.582e-02, -1.874e-01, -2.888e-01, -2.317e-01, -2.082e-02, -2.264e-02, 1.194e-01, -1.154e-01, -1.054e-01)); + r += mul(s0_2, M4(1.052e-02, 2.800e-02, -1.830e-01, -1.295e-01, -7.716e-02, -5.475e-02, 1.633e-01, -8.246e-02, 5.170e-03, 1.827e-01, -1.566e-01, -4.217e-02, -3.526e-02, -8.951e-03, -4.627e-02, -2.115e-01)); + r += mul(s0_3, M4(-1.369e-02, -6.965e-02, -2.260e-02, 1.922e-01, 1.657e-01, -1.204e-01, 2.074e-01, -2.557e-02, -1.918e-01, 1.600e-02, -5.847e-03, 5.701e-02, 4.591e-02, 1.044e-02, 7.813e-02, 1.338e-01)); + r += mul(s0_4, M4(-2.651e-02, -5.623e-02, 1.692e-01, 2.944e-01, 3.676e-01, -2.678e-01, 7.910e-02, -5.427e-02, -3.895e-01, -1.209e-03, -1.830e-01, 2.640e-02, -4.157e-02, 1.224e-01, -1.023e-01, -1.106e-01)); + r += mul(s0_5, M4(-3.291e-02, 2.830e-01, -1.074e-01, 1.037e-01, -2.113e-02, 7.772e-02, -2.562e-02, -1.429e-01, -3.368e-02, 9.965e-02, -3.097e-01, -5.621e-02, 5.623e-02, 5.823e-02, 4.831e-02, 8.101e-02)); + r += mul(s0_6, M4(7.714e-02, 2.832e-01, 1.379e-01, 2.980e-02, -4.772e-02, 1.149e-01, -5.162e-02, 4.892e-02, 3.031e-02, -4.875e-02, -1.706e-01, 1.062e-01, 6.854e-02, -6.142e-02, -3.681e-02, -6.287e-02)); + r += mul(s0_7, M4(1.141e-02, 1.057e-03, 2.687e-01, 1.141e-01, 1.536e-01, -7.787e-02, -1.281e-01, 9.285e-02, -2.556e-03, 8.295e-02, 3.284e-02, -7.739e-02, 5.670e-02, -7.100e-02, 1.715e-01, 1.063e-01)); + r += mul(s0_8, M4(1.031e-02, 1.300e-01, -3.971e-02, -8.338e-02, 1.007e-01, 1.414e-01, 4.320e-02, 3.740e-02, -1.931e-02, 2.386e-01, -2.975e-02, 1.151e-01, -2.265e-02, 2.763e-02, 7.752e-02, 2.444e-02)); + r += mul(s1_0, M4(-3.686e-02, -9.159e-02, -8.028e-02, 1.835e-02, -8.461e-02, -1.200e-02, 6.251e-02, -8.569e-03, 2.942e-02, 1.142e-02, -1.239e-03, 6.275e-02, 4.019e-02, -7.689e-02, 5.405e-01, -1.596e-01)); + r += mul(s1_1, M4(-3.502e-02, 7.394e-02, -7.702e-02, -7.895e-02, -3.823e-02, -9.194e-02, 8.241e-02, -1.065e-01, -1.548e-01, -4.368e-02, 5.633e-02, 3.878e-02, 1.329e-01, 3.322e-01, -1.968e-01, -6.005e-02)); + r += mul(s1_2, M4(6.887e-03, -9.829e-03, 9.403e-02, 3.742e-02, -1.004e-01, 2.043e-02, 1.535e-01, 1.164e-01, -3.009e-03, -1.695e-02, 1.062e-01, -1.576e-01, -1.230e-01, -1.787e-01, 3.823e-01, -3.247e-01)); + r += mul(s1_3, M4(-6.858e-02, 4.149e-02, 6.125e-02, -2.038e-02, 2.113e-01, 5.205e-02, 2.541e-02, 1.713e-02, -1.713e-01, 2.402e-02, 3.943e-02, -7.622e-02, -7.784e-02, 1.272e-01, -6.592e-02, -2.368e-01)); + r += mul(s1_4, M4(8.601e-02, 6.665e-02, -2.816e-02, 1.915e-01, 2.949e-01, -1.469e-01, 4.550e-02, -8.922e-02, -1.569e-01, 2.114e-01, -5.545e-02, -2.952e-02, -1.437e-01, -1.079e-01, -2.190e-01, -1.519e-01)); + r += mul(s1_5, M4(-4.649e-02, 1.652e-02, 1.695e-02, -9.256e-02, -7.806e-03, -4.356e-02, 7.523e-02, 5.328e-02, -1.176e-01, -1.434e-01, -6.019e-02, 3.271e-02, 1.978e-01, 9.020e-03, 1.538e-01, -1.316e-02)); + r += mul(s1_6, M4(9.073e-04, 2.590e-02, 2.343e-02, -6.273e-02, -6.788e-02, 5.727e-02, -2.748e-02, -8.991e-03, 2.195e-02, -5.233e-02, -1.133e-01, -1.009e-03, 8.307e-02, -3.261e-01, 1.990e-01, 1.394e-01)); + r += mul(s1_7, M4(-1.678e-02, 6.919e-03, 1.559e-02, -5.822e-02, 1.391e-01, 2.534e-02, -5.715e-02, 4.260e-02, 1.618e-02, -2.235e-02, 2.680e-02, 2.541e-02, 1.304e-01, 6.041e-02, 2.798e-01, 7.458e-02)); + r += mul(s1_8, M4(-1.878e-02, -6.450e-02, -4.473e-02, 1.398e-02, 6.149e-02, -6.300e-03, -6.856e-02, -2.932e-02, 6.106e-02, -7.571e-02, -2.065e-02, 6.149e-03, 7.303e-02, -5.909e-02, 7.749e-02, -2.457e-01)); + r += mul(s2_0, M4(-5.323e-03, 8.889e-02, 2.062e-01, 1.460e-01, -2.143e-02, -6.834e-02, -1.091e-02, 5.811e-02, 6.127e-02, 9.073e-02, -9.772e-02, -8.533e-02, 1.797e-02, -3.454e-02, 2.204e-02, -1.434e-02)); + r += mul(s2_1, M4(1.952e-04, -2.399e-02, 4.873e-02, 6.787e-02, 9.244e-02, 8.997e-03, -7.317e-02, 2.043e-02, 2.031e-02, 8.249e-02, -7.486e-02, 7.437e-02, 9.161e-04, 6.212e-02, -8.652e-03, -4.886e-02)); + r += mul(s2_2, M4(8.736e-03, -2.015e-02, -3.063e-02, -1.487e-01, 1.903e-02, 1.047e-01, -1.922e-01, -2.611e-02, 3.349e-02, 8.046e-03, -4.341e-02, -8.011e-02, 5.500e-02, -1.323e-02, -8.428e-02, -8.787e-02)); + r += mul(s2_3, M4(4.261e-02, 1.504e-01, -3.430e-02, -2.808e-02, -9.004e-02, -8.427e-02, 4.767e-02, -6.548e-03, -8.702e-02, 1.069e-01, 2.933e-02, 6.920e-02, -1.814e-02, -1.345e-02, 7.182e-03, 3.332e-02)); + r += mul(s2_4, M4(-9.987e-03, -3.337e-02, 9.504e-02, 7.318e-02, 1.830e-01, -5.087e-02, 1.253e-01, 5.065e-02, 5.035e-02, -3.376e-02, 7.512e-02, -3.120e-02, 7.876e-03, -5.337e-02, 5.403e-02, -1.625e-01)); + r += mul(s2_5, M4(-5.509e-02, 1.477e-02, 1.874e-02, -9.997e-02, 3.357e-02, 6.800e-02, 6.103e-03, 6.213e-02, -2.270e-03, 7.609e-02, 1.223e-02, 6.646e-02, -8.543e-02, 2.444e-02, 2.058e-01, 1.535e-03)); + r += mul(s2_6, M4(-1.647e-02, 4.307e-02, -5.204e-02, -1.042e-01, -6.802e-02, 2.655e-02, -2.501e-02, -1.171e-01, 7.035e-02, -1.546e-02, -5.354e-02, 3.666e-02, 1.758e-02, -5.853e-02, -1.206e-01, 1.440e-01)); + r += mul(s2_7, M4(5.312e-02, -1.499e-01, -9.236e-02, 4.211e-02, 3.915e-02, -2.907e-02, -3.453e-02, 1.694e-01, -6.624e-02, 2.467e-02, 1.791e-02, -2.548e-02, 2.711e-02, 8.394e-02, 1.691e-02, 8.298e-02)); + r += mul(s2_8, M4(-5.714e-02, -4.092e-02, -9.204e-02, 4.022e-02, 4.659e-02, -2.774e-02, -7.553e-02, -3.057e-02, -7.433e-02, -3.282e-02, 1.134e-02, 7.285e-02, -1.579e-02, -6.273e-02, -3.069e-02, 5.203e-02)); + r += mul(s3_0, M4(-5.174e-02, 1.647e-01, 3.898e-02, 5.137e-02, -4.043e-02, 6.276e-02, -3.940e-02, -6.088e-02, 3.568e-02, 2.550e-02, 2.432e-01, 4.463e-02, -5.921e-02, -1.272e-01, -6.032e-02, -2.023e-01)); + r += mul(s3_1, M4(-7.720e-02, 1.650e-01, 2.768e-01, 8.936e-02, 1.765e-02, 7.190e-02, -8.169e-02, -1.016e-01, 3.540e-02, -1.089e-01, -1.615e-02, 1.515e-02, 6.506e-02, 6.977e-02, 1.474e-01, -2.054e-01)); + r += mul(s3_2, M4(-2.389e-04, -8.709e-02, -1.466e-01, -8.087e-02, 3.527e-02, 2.416e-01, 8.849e-02, -4.678e-02, 2.143e-02, -1.220e-02, 6.466e-03, -6.907e-02, 2.572e-02, 1.743e-02, -3.125e-02, 2.166e-01)); + r += mul(s3_3, M4(-7.812e-02, 1.668e-02, -9.618e-02, 1.031e-01, -7.556e-02, -2.006e-01, -1.922e-01, 5.740e-02, -1.155e-02, 9.923e-02, -1.006e-01, 2.153e-01, 9.290e-02, -7.649e-02, -1.424e-01, -1.187e-01)); + r += mul(s3_4, M4(5.798e-02, 6.748e-02, -2.225e-02, 6.382e-02, 1.155e-01, -3.644e-02, -4.444e-02, -4.808e-02, -1.056e-01, -2.862e-01, 1.351e-01, -5.455e-02, 5.645e-02, 1.314e-02, -2.308e-01, -3.675e-01)); + r += mul(s3_5, M4(-1.338e-01, -3.274e-01, -8.780e-02, -4.520e-02, 3.288e-02, 9.451e-02, 2.472e-02, 1.685e-01, -8.192e-02, 6.392e-02, 4.328e-02, -3.828e-02, -1.164e-03, -8.118e-02, -1.933e-01, 8.546e-02)); + r += mul(s3_6, M4(-8.778e-02, -1.244e-02, 1.515e-01, -1.361e-01, 5.845e-02, 5.789e-02, -2.808e-02, 1.687e-02, 1.422e-02, 5.649e-02, -2.284e-01, -1.832e-01, 9.666e-02, 3.020e-01, 7.284e-02, -9.252e-02)); + r += mul(s3_7, M4(1.047e-01, 8.983e-02, -2.817e-01, -6.086e-03, -2.664e-02, 9.771e-02, -3.212e-02, 4.423e-02, 1.027e-01, 1.199e-01, 1.894e-02, -3.824e-01, 2.164e-02, -5.713e-02, 4.713e-02, 2.997e-01)); + r += mul(s3_8, M4(-9.134e-02, -1.796e-01, 2.139e-01, 1.304e-01, 9.993e-04, -5.535e-03, 1.107e-01, -7.480e-03, -3.411e-04, 6.908e-02, -1.557e-01, -2.594e-01, -5.713e-02, -2.292e-01, 5.080e-02, -6.568e-02)); + r += mul(s4_0, M4(-8.382e-02, 3.862e-01, 1.418e-02, 1.371e-01, 3.885e-02, 7.614e-02, -2.130e-01, -1.384e-02, 3.449e-02, 1.616e-01, -6.491e-02, 1.743e-01, -1.643e-03, 6.216e-02, 4.557e-02, -5.585e-02)); + r += mul(s4_1, M4(2.163e-01, 9.199e-02, 4.379e-01, -1.450e-02, 1.899e-02, -1.992e-02, -1.880e-01, -4.115e-02, 5.857e-02, -1.024e-01, 2.446e-01, 8.298e-02, 3.299e-03, 1.530e-01, -8.514e-02, -7.577e-02)); + r += mul(s4_2, M4(-1.229e-02, 4.657e-02, 1.324e-01, 2.859e-01, -1.945e-02, 8.632e-02, 1.029e-03, -5.752e-02, -3.405e-02, -1.149e-01, -3.139e-02, 8.736e-02, 2.617e-02, 3.285e-02, 2.662e-02, -1.293e-01)); + r += mul(s4_3, M4(-8.512e-02, 5.312e-02, 2.727e-01, -1.651e-01, -5.700e-02, 5.245e-04, -9.154e-02, -3.120e-02, 7.641e-02, 7.932e-02, 1.516e-01, 7.887e-02, -5.095e-02, -9.988e-03, 2.493e-03, -4.794e-03)); + r += mul(s4_4, M4(-3.992e-02, 1.917e-01, 9.522e-02, 2.879e-02, -3.965e-02, -1.703e-02, -2.368e-02, 1.041e-01, -1.361e-01, 6.245e-02, -5.546e-02, -2.044e-01, -4.529e-02, -5.278e-02, 5.566e-02, -3.638e-02)); + r += mul(s4_5, M4(8.444e-02, 1.297e-01, 3.157e-01, -6.599e-02, 5.687e-02, 7.013e-02, 1.009e-01, 1.089e-01, 5.210e-02, 1.170e-02, -2.479e-02, -3.234e-02, -4.897e-02, 1.848e-03, 3.212e-02, -4.242e-03)); + r += mul(s4_6, M4(5.128e-02, -6.873e-02, -1.200e-01, 2.357e-02, 8.464e-03, 6.867e-02, -1.977e-02, -5.663e-02, 1.129e-03, 1.524e-01, 5.786e-02, -6.040e-02, -3.317e-02, 5.549e-02, -2.285e-02, 2.001e-02)); + r += mul(s4_7, M4(-1.778e-03, 2.180e-01, 2.588e-01, 2.249e-03, 5.801e-02, -3.429e-02, 4.188e-02, 1.153e-01, -2.219e-02, 1.133e-01, 6.736e-02, -4.814e-02, 7.633e-02, -1.251e-03, -8.278e-02, 1.314e-01)); + r += mul(s4_8, M4(-3.769e-02, -1.759e-01, -1.097e-01, 6.204e-02, -2.063e-02, 9.172e-02, 1.217e-01, -3.235e-02, 1.051e-01, 2.339e-01, -2.923e-02, 3.881e-02, 6.012e-02, 2.513e-01, 1.752e-03, 2.256e-02)); + r += mul(s5_0, M4(-3.723e-04, 6.100e-02, -8.285e-03, -6.124e-02, -7.198e-02, 4.131e-02, -5.697e-02, 5.607e-02, 3.343e-02, -1.694e-01, 1.277e-01, 2.201e-01, -4.906e-02, -3.500e-02, 1.787e-03, -2.144e-01)); + r += mul(s5_1, M4(-9.115e-02, -6.884e-02, 1.641e-02, -7.442e-02, -2.210e-02, 5.431e-02, 6.200e-02, -2.124e-01, -6.012e-03, 1.201e-01, -2.582e-02, 3.144e-02, 3.654e-02, -5.377e-02, 1.643e-01, -2.735e-01)); + r += mul(s5_2, M4(6.332e-03, 1.853e-01, 1.053e-01, 1.944e-02, -1.290e-02, -4.683e-02, -3.341e-02, 1.207e-01, -2.681e-02, -1.054e-01, 5.190e-02, 1.391e-01, 4.749e-03, 6.550e-02, -1.358e-01, 2.150e-02)); + r += mul(s5_3, M4(7.859e-03, -5.134e-02, -4.399e-02, -8.536e-02, -9.574e-02, -1.510e-01, -3.029e-02, -3.398e-02, 1.390e-02, -4.405e-02, 1.357e-01, -3.742e-02, -3.758e-02, -3.409e-02, 4.446e-02, -9.369e-02)); + r += mul(s5_4, M4(-1.086e-02, 1.335e-01, -7.505e-02, 2.578e-02, -2.335e-02, 7.722e-03, 9.271e-02, 4.046e-02, -8.313e-03, 1.330e-02, -1.720e-02, -1.942e-01, -1.159e-01, 7.048e-02, 2.139e-01, 2.432e-02)); + r += mul(s5_5, M4(-1.506e-02, -5.078e-02, 4.471e-02, -1.803e-02, -5.591e-02, -1.636e-01, 1.586e-01, 1.724e-01, 1.345e-03, -8.110e-02, 4.794e-02, -8.287e-02, -1.740e-02, -1.958e-01, -6.885e-02, 1.745e-02)); + r += mul(s5_6, M4(-5.305e-03, -1.426e-01, 1.051e-02, -8.768e-02, -1.354e-02, 2.778e-03, 1.055e-01, -1.119e-02, 2.422e-02, 1.937e-01, 1.025e-01, -1.818e-02, -3.728e-02, 1.294e-01, -2.123e-01, -2.478e-03)); + r += mul(s5_7, M4(2.103e-02, -2.761e-02, 1.809e-02, -5.856e-02, 3.352e-02, -8.026e-02, -6.127e-03, 2.577e-02, -5.896e-02, 6.966e-03, -6.744e-02, -5.771e-02, 7.337e-02, -2.228e-01, -2.579e-02, -1.155e-01)); + r += mul(s5_8, M4(-4.114e-02, 2.763e-03, -4.153e-02, -2.680e-02, -4.471e-02, -1.025e-01, -9.005e-03, -3.402e-02, 1.751e-02, 9.404e-02, -8.964e-02, 1.201e-02, -1.076e-02, -8.601e-03, -1.301e-01, -1.059e-01)); + r += mul(s6_0, M4(3.331e-03, 8.274e-02, 1.260e-01, 1.135e-01, 5.462e-02, 1.505e-02, -5.568e-02, -1.361e-02, -2.206e-02, -1.043e-01, -6.059e-02, -1.280e-02, 4.173e-02, -2.983e-02, 1.520e-01, 4.613e-03)); + r += mul(s6_1, M4(3.081e-02, 8.508e-02, 1.272e-01, 6.261e-02, -1.155e-01, 1.576e-02, -3.370e-02, -5.907e-02, 2.271e-02, -1.651e-02, -1.000e-01, -4.846e-02, -1.682e-03, -5.439e-02, -1.449e-01, 2.678e-02)); + r += mul(s6_2, M4(1.241e-02, 3.698e-02, 3.387e-02, 6.518e-02, 2.522e-02, -6.981e-02, 5.470e-02, -7.337e-02, -6.131e-02, -4.012e-02, -6.431e-02, -1.311e-01, 7.278e-03, 2.380e-02, 1.106e-02, 5.129e-02)); + r += mul(s6_3, M4(-5.088e-02, -1.362e-01, -1.841e-01, 8.520e-03, 7.464e-02, 2.412e-02, -2.240e-01, 7.396e-02, -8.567e-03, 3.698e-03, 7.064e-02, -1.045e-01, 4.836e-02, -1.381e-01, -5.552e-02, -6.792e-02)); + r += mul(s6_4, M4(-8.957e-02, -5.023e-02, -2.319e-03, -3.383e-02, -4.418e-02, -6.413e-02, -1.199e-02, 1.965e-02, 1.140e-01, 2.155e-02, -5.066e-02, -8.185e-02, -8.862e-02, 5.287e-02, -6.621e-02, -6.042e-02)); + r += mul(s6_5, M4(9.941e-03, -3.429e-02, -9.328e-03, -2.321e-02, -5.515e-02, 6.063e-02, 9.183e-02, 3.312e-02, 8.526e-02, 3.991e-03, 3.733e-02, 2.007e-02, 8.746e-03, -8.149e-03, 1.752e-01, -2.465e-03)); + r += mul(s6_6, M4(-1.963e-02, -2.332e-02, 6.692e-02, -1.371e-01, 2.745e-02, 9.046e-02, 8.091e-02, 2.885e-02, 6.670e-02, -1.123e-01, -2.297e-02, 5.476e-02, -2.732e-03, -1.063e-01, 8.884e-02, 4.369e-02)); + r += mul(s6_7, M4(-2.688e-02, 8.613e-02, -1.689e-02, 1.859e-02, -4.587e-02, -6.622e-02, 1.299e-01, -1.165e-01, -1.137e-02, 1.396e-01, 1.644e-01, 7.056e-02, 3.041e-03, 4.779e-02, -7.382e-03, 1.134e-02)); + r += mul(s6_8, M4(4.639e-02, -8.910e-02, 2.229e-02, -5.334e-02, 4.236e-02, 1.227e-01, 3.001e-02, 1.801e-01, -2.042e-03, 9.728e-03, 3.124e-02, 1.685e-02, -2.582e-03, 7.082e-04, -9.992e-02, 7.321e-02)); + r += mul(s7_0, M4(2.035e-02, -9.397e-02, 1.131e-01, 1.112e-01, 1.045e-01, 1.051e-01, 1.701e-01, 3.186e-01, -9.369e-02, -9.392e-02, -1.106e-01, 1.176e-01, 2.103e-02, 2.464e-01, -4.942e-02, -1.999e-02)); + r += mul(s7_1, M4(-2.264e-02, 3.158e-02, 8.701e-02, -2.893e-01, -1.537e-01, -1.782e-01, 8.104e-03, 1.830e-01, 9.748e-02, 6.874e-02, 5.698e-02, -2.618e-01, 1.413e-01, -1.715e-02, 1.973e-01, -4.910e-02)); + r += mul(s7_2, M4(2.451e-02, -3.421e-02, 1.512e-01, -1.254e-01, 1.938e-02, -6.550e-02, 3.108e-02, -2.869e-02, 1.086e-02, 1.887e-01, -1.990e-01, 2.933e-02, -7.444e-02, 1.322e-01, 7.581e-03, -1.204e-01)); + r += mul(s7_3, M4(9.478e-03, -2.528e-01, -1.993e-01, 1.319e-01, 1.392e-01, -1.184e-01, 1.748e-01, 4.578e-01, -6.064e-02, 1.102e-01, -1.556e-01, -1.085e-01, -4.835e-03, 8.928e-02, -7.895e-02, -1.913e-02)); + r += mul(s7_4, M4(-1.792e-01, 2.198e-01, -2.378e-01, 1.664e-01, -2.615e-01, -1.927e-01, -3.845e-01, 1.392e-01, 1.929e-01, -2.491e-01, 1.803e-01, -1.146e-01, -2.153e-02, -7.661e-02, 3.047e-02, -1.553e-01)); + r += mul(s7_5, M4(-4.536e-04, 2.779e-02, -1.637e-02, 6.501e-02, -7.290e-02, 8.468e-02, -2.876e-02, 3.537e-02, 1.322e-01, 1.287e-01, 9.628e-02, -5.629e-02, 5.159e-02, -1.237e-02, 1.812e-02, -1.645e-02)); + r += mul(s7_6, M4(-1.415e-02, -1.228e-01, -1.062e-01, -1.982e-03, 7.281e-02, 5.702e-01, 5.061e-01, 1.132e-01, 7.714e-02, 1.002e-01, 1.661e-02, 4.868e-02, 4.049e-02, 4.560e-01, -9.460e-02, 1.208e-01)); + r += mul(s7_7, M4(2.032e-02, 5.279e-02, -1.027e-01, 1.086e-01, 1.052e-02, -2.708e-01, -1.931e-01, 4.106e-02, 1.941e-01, 1.763e-01, 5.160e-01, 3.458e-02, -2.822e-02, 1.318e-02, -4.618e-02, 7.583e-02)); + r += mul(s7_8, M4(1.274e-01, 1.801e-02, -1.604e-02, 5.288e-02, 5.723e-02, -2.980e-01, 1.668e-01, -1.892e-01, -2.958e-02, 1.467e-01, 2.737e-02, -2.500e-01, 2.540e-03, -1.453e-01, -6.082e-02, 8.914e-02)); + r += V4(5.017e-02, -2.472e-02, -8.546e-03, 4.613e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.769e-01, 1.890e-01, -1.346e-01, 4.498e-02, -5.215e-02, -1.350e-01, 1.967e-01, -5.861e-02, -5.850e-02, -3.990e-03, 8.226e-02, -1.164e-02, 4.593e-02, 7.120e-03, -6.743e-03, 1.034e-01)); + r += mul(s0_1, M4(1.599e-01, -4.515e-01, 7.455e-02, 1.520e-01, 1.614e-01, 4.222e-03, 1.287e-01, 1.402e-01, 1.576e-02, 4.033e-01, -4.946e-02, -3.333e-02, -8.869e-02, 9.984e-02, 5.972e-02, 6.922e-02)); + r += mul(s0_2, M4(1.290e-01, -2.785e-02, -1.034e-01, 1.953e-01, 1.258e-02, -1.653e-01, 5.398e-03, 4.797e-02, -1.307e-01, -2.807e-02, -8.324e-02, -2.679e-01, 3.305e-02, -4.835e-02, 1.052e-03, 2.273e-04)); + r += mul(s0_3, M4(5.643e-03, 1.652e-01, -3.521e-01, 2.138e-01, 4.679e-02, -7.642e-02, 5.980e-02, 1.397e-01, 1.753e-01, 1.755e-01, 1.188e-01, -2.106e-02, -1.723e-02, 1.938e-02, 4.026e-02, 1.164e-02)); + r += mul(s0_4, M4(4.681e-02, -2.655e-01, -9.812e-02, -1.739e-01, 1.446e-02, -3.818e-02, -6.509e-03, -1.201e-02, -1.034e-01, 1.189e-01, 9.574e-02, 1.625e-01, -1.010e-01, 5.830e-02, 5.339e-02, 1.189e-01)); + r += mul(s0_5, M4(9.504e-02, 2.441e-01, -1.970e-01, -1.449e-01, -6.458e-02, 2.752e-02, 1.072e-01, 3.095e-02, -3.617e-02, 2.200e-02, -1.818e-01, 2.857e-02, -3.973e-02, 1.105e-01, 3.115e-02, -8.797e-02)); + r += mul(s0_6, M4(4.346e-03, 4.373e-01, -1.908e-01, 2.041e-01, -5.081e-02, -6.123e-02, -8.348e-02, 3.666e-02, -9.411e-02, -5.219e-03, 1.670e-02, 1.344e-01, 1.077e-02, -8.030e-02, 5.326e-03, 1.046e-01)); + r += mul(s0_7, M4(-1.004e-01, 2.293e-02, -6.024e-02, 1.088e-01, -1.450e-01, 6.992e-02, -9.334e-02, -1.212e-01, 3.736e-02, -2.315e-01, 1.410e-01, 5.660e-02, 1.862e-02, 4.613e-02, -1.584e-02, 7.047e-02)); + r += mul(s0_8, M4(4.643e-02, 4.579e-02, -5.918e-02, 6.728e-02, 4.241e-02, -2.387e-01, -6.879e-02, 5.496e-02, 1.884e-01, -2.569e-02, -1.007e-01, 8.366e-02, 7.408e-02, 4.701e-02, -3.806e-02, -1.014e-02)); + r += mul(s1_0, M4(1.395e-02, -8.334e-02, 4.931e-02, -9.005e-02, 8.142e-02, 1.999e-02, 1.284e-01, 1.331e-02, 8.799e-02, 3.287e-04, 8.323e-02, -7.941e-02, 3.745e-02, 5.978e-02, 4.903e-01, -2.100e-01)); + r += mul(s1_1, M4(-1.767e-01, -3.321e-02, 1.431e-01, 2.912e-02, 8.241e-02, 3.289e-02, 7.668e-02, 2.679e-02, -6.279e-02, 1.038e-01, -4.263e-02, -3.315e-02, -1.966e-01, -1.607e-01, -1.609e-01, -1.256e-01)); + r += mul(s1_2, M4(4.673e-03, -1.148e-01, -3.676e-02, 6.074e-02, -7.172e-03, -4.427e-02, 1.597e-01, -2.477e-02, -5.421e-02, 1.013e-01, 2.950e-02, -9.229e-02, -1.843e-01, -5.846e-01, -4.701e-02, -5.318e-01)); + r += mul(s1_3, M4(-5.804e-02, -9.971e-02, 1.895e-02, -1.380e-01, 2.497e-02, -1.329e-01, 7.613e-02, 3.214e-03, 1.170e-01, -1.944e-02, -2.948e-02, 8.534e-03, -2.418e-01, 1.964e-01, 5.537e-02, 4.935e-02)); + r += mul(s1_4, M4(7.893e-02, -7.963e-02, -5.421e-02, 4.740e-02, 1.656e-04, 6.275e-02, -1.644e-03, -4.518e-02, 7.364e-05, 8.150e-02, -4.849e-02, 1.505e-01, -1.415e-01, 1.660e-01, -2.366e-01, 1.480e-01)); + r += mul(s1_5, M4(5.773e-02, 1.815e-01, 1.106e-02, -5.591e-02, 7.044e-02, 1.733e-01, 6.559e-02, 9.476e-02, -8.762e-02, -4.256e-02, 1.034e-02, -3.661e-02, -2.631e-01, -2.397e-01, 1.254e-01, -2.470e-01)); + r += mul(s1_6, M4(3.183e-02, -1.378e-02, -5.040e-02, 9.623e-02, -2.844e-02, -1.717e-03, 1.396e-02, 5.313e-02, 2.970e-02, 4.093e-03, 9.051e-02, 1.720e-02, -6.049e-02, -3.179e-01, -2.223e-02, -1.061e-01)); + r += mul(s1_7, M4(6.208e-02, -3.232e-02, -7.263e-02, 1.046e-01, -7.699e-02, -1.372e-02, -3.265e-02, -3.966e-02, 7.673e-02, -9.687e-02, 5.634e-03, 3.653e-02, -1.708e-02, 4.199e-02, -1.387e-01, 2.571e-01)); + r += mul(s1_8, M4(-3.332e-03, -1.940e-03, 4.380e-02, -1.268e-01, 1.219e-01, 1.835e-02, -6.464e-02, 1.054e-02, -5.188e-02, 1.726e-02, -7.917e-02, -2.458e-02, -4.254e-01, -2.410e-01, 1.529e-01, 6.448e-02)); + r += mul(s2_0, M4(8.106e-02, -7.452e-02, -6.152e-02, -7.733e-02, -8.086e-03, -1.625e-02, -1.457e-01, -2.278e-01, 1.031e-02, -2.093e-02, -5.553e-03, -1.876e-02, -3.961e-02, -1.525e-01, 8.504e-02, -3.757e-02)); + r += mul(s2_1, M4(-4.215e-02, -3.202e-02, -2.231e-01, -1.936e-01, -2.420e-02, 1.097e-01, 2.642e-02, -1.006e-01, 8.334e-02, 3.013e-02, -1.267e-01, 2.271e-02, 1.836e-02, 6.251e-02, 7.906e-03, 1.516e-01)); + r += mul(s2_2, M4(-5.334e-02, -3.053e-02, -9.453e-02, -1.276e-01, -7.813e-02, -2.116e-01, -1.310e-01, -6.871e-02, 1.601e-02, -8.765e-02, 4.449e-02, 8.631e-02, 1.214e-01, 7.435e-02, 4.918e-02, 5.179e-02)); + r += mul(s2_3, M4(9.149e-03, 1.258e-01, 1.028e-01, 5.660e-03, 1.250e-01, 5.431e-02, 1.488e-01, 6.198e-02, 1.659e-01, -4.945e-02, 1.060e-01, 7.387e-03, -1.841e-02, 2.213e-02, -4.996e-02, -5.252e-03)); + r += mul(s2_4, M4(1.315e-01, -3.812e-02, -4.451e-03, -8.287e-02, 5.701e-02, -1.140e-01, -2.271e-02, -1.523e-02, -1.305e-01, -5.270e-02, -4.576e-02, 1.534e-02, -1.640e-01, -9.250e-02, -4.700e-02, -3.133e-02)); + r += mul(s2_5, M4(-1.770e-02, 6.482e-02, 3.561e-02, -7.077e-02, -2.529e-03, 2.154e-01, 4.663e-02, -2.664e-02, 4.379e-02, -1.185e-01, -2.281e-03, -7.470e-02, 8.433e-02, 7.771e-02, 2.258e-02, 6.568e-02)); + r += mul(s2_6, M4(6.766e-02, 2.108e-02, 1.270e-02, 6.760e-02, -7.641e-02, 1.843e-02, -7.528e-02, 2.763e-02, -5.471e-02, -4.682e-04, 7.135e-02, 3.514e-02, 3.918e-02, -2.661e-02, -2.037e-02, 2.320e-02)); + r += mul(s2_7, M4(6.360e-02, -9.693e-02, 4.105e-02, 1.151e-01, -5.643e-02, -8.626e-02, -3.453e-02, -6.207e-02, 6.755e-03, -8.766e-02, 8.486e-02, -6.175e-02, -9.593e-02, 5.843e-02, -2.683e-02, -6.402e-02)); + r += mul(s2_8, M4(1.430e-01, -3.707e-02, 5.535e-02, 2.933e-02, -1.453e-01, -1.584e-01, 6.419e-02, 2.175e-02, 1.458e-01, -8.063e-02, -5.444e-02, 5.310e-02, 5.165e-02, -2.809e-02, -8.537e-03, -2.830e-02)); + r += mul(s3_0, M4(5.564e-02, -6.129e-02, -3.133e-02, -1.520e-02, 6.671e-02, 3.121e-02, 6.149e-02, -7.370e-02, 5.043e-02, -1.037e-01, -2.812e-03, -5.590e-02, -9.103e-02, -2.737e-01, 1.371e-01, -2.925e-02)); + r += mul(s3_1, M4(-9.142e-02, 1.799e-01, -3.346e-02, -1.074e-01, 5.501e-02, 7.856e-03, 6.031e-03, -8.088e-02, -2.287e-02, 3.100e-01, 9.814e-02, 9.471e-02, -2.496e-01, 4.528e-01, 8.430e-02, -1.320e-03)); + r += mul(s3_2, M4(-5.782e-02, 6.318e-02, -1.789e-02, -1.461e-01, -1.515e-02, -1.336e-01, -2.058e-01, -2.046e-02, -6.158e-02, -1.607e-02, 1.050e-01, 3.337e-02, -2.079e-02, 3.494e-01, 7.088e-02, -2.304e-01)); + r += mul(s3_3, M4(-1.257e-01, -1.046e-01, 1.097e-01, -1.196e-01, 6.986e-02, -8.444e-03, 5.487e-02, 1.475e-01, -1.110e-01, -6.646e-02, 3.417e-01, -2.054e-01, -1.888e-03, 1.974e-02, -2.172e-01, 1.390e-01)); + r += mul(s3_4, M4(1.125e-01, -7.499e-02, 1.321e-02, 5.380e-02, -6.331e-02, 2.885e-01, -5.830e-02, 4.324e-02, -1.243e-01, 9.861e-02, 5.391e-02, -3.004e-02, -2.324e-01, 1.354e-02, 1.800e-01, -1.214e-01)); + r += mul(s3_5, M4(-8.864e-02, -5.277e-02, 1.193e-01, -2.296e-01, -2.080e-02, 5.544e-02, -1.609e-02, -1.959e-01, 7.995e-02, 1.137e-01, 1.110e-01, -1.081e-01, -2.164e-01, -6.165e-02, -4.176e-01, 2.230e-01)); + r += mul(s3_6, M4(-6.254e-02, -1.889e-02, 7.543e-03, 7.514e-03, -2.202e-02, 1.114e-02, 7.680e-02, -6.242e-02, -1.535e-01, 8.380e-03, -5.794e-02, 1.097e-01, 2.236e-02, 3.382e-01, -1.351e-02, 5.943e-01)); + r += mul(s3_7, M4(-1.897e-02, 9.350e-02, 1.919e-01, 1.368e-01, 4.243e-02, 1.988e-02, 5.595e-02, -6.999e-03, -1.053e-01, -3.987e-02, 6.187e-02, -8.294e-02, 2.611e-01, -1.698e-01, 1.027e-01, -1.852e-01)); + r += mul(s3_8, M4(1.389e-01, 3.013e-02, 1.155e-01, -2.411e-02, 5.874e-02, -5.473e-02, 1.171e-01, -1.363e-01, 3.909e-02, -9.738e-02, -1.541e-02, -1.007e-01, -2.509e-02, -7.693e-02, -7.053e-02, 2.380e-01)); + r += mul(s4_0, M4(2.173e-01, 6.941e-02, 9.238e-02, 1.529e-01, 1.223e-01, -8.523e-02, -7.405e-02, -1.887e-02, 2.059e-02, 4.005e-02, 1.043e-01, 5.467e-02, 3.185e-02, 5.738e-02, 5.115e-02, 2.614e-02)); + r += mul(s4_1, M4(-2.511e-01, -2.269e-01, -6.805e-02, -3.722e-01, 1.636e-01, -8.646e-03, 1.841e-02, -3.401e-02, -3.941e-02, -5.779e-02, -1.787e-01, -2.084e-02, 7.398e-02, -5.920e-02, -5.115e-02, 5.309e-02)); + r += mul(s4_2, M4(1.427e-01, -6.510e-01, -3.659e-01, 2.387e-01, -5.120e-02, 3.985e-02, 1.098e-02, 6.384e-03, 1.493e-02, -6.208e-02, 1.038e-01, -1.453e-02, -8.886e-02, -1.207e-01, -9.097e-02, 4.186e-04)); + r += mul(s4_3, M4(-1.664e-01, 3.055e-01, 2.488e-01, 1.361e-01, -8.428e-02, 2.974e-02, 3.773e-02, -5.941e-02, -2.358e-02, -9.824e-02, -2.237e-02, -5.959e-02, 9.347e-02, 5.989e-02, -1.025e-01, -1.510e-01)); + r += mul(s4_4, M4(5.308e-02, 3.620e-01, 1.495e-02, 2.223e-01, 1.276e-01, 7.906e-02, -5.030e-02, -4.890e-02, -9.022e-02, 2.121e-02, 1.389e-01, -3.985e-03, -4.994e-02, -9.489e-03, 1.424e-01, 5.117e-02)); + r += mul(s4_5, M4(-2.974e-01, -1.401e-01, -9.986e-02, 9.604e-02, 2.256e-02, 2.873e-01, 7.987e-03, -7.493e-02, 2.292e-01, 1.061e-01, 1.438e-01, -6.919e-02, -6.513e-02, 2.582e-02, -1.415e-02, -1.568e-01)); + r += mul(s4_6, M4(-9.840e-02, 2.586e-01, -8.025e-02, 1.717e-01, -5.787e-02, -6.555e-02, -8.939e-02, 9.012e-02, -1.863e-02, 7.153e-02, -6.297e-02, -1.737e-01, 3.308e-02, -1.636e-02, 5.709e-02, 9.285e-02)); + r += mul(s4_7, M4(-5.649e-02, -1.537e-01, -1.290e-01, 1.347e-01, -9.012e-02, -4.008e-02, 1.942e-02, 3.189e-02, 2.808e-02, -3.812e-02, -2.656e-02, 6.829e-02, 2.456e-04, 1.119e-02, -1.088e-01, 7.207e-02)); + r += mul(s4_8, M4(2.026e-01, 9.658e-02, -6.331e-02, 1.661e-01, 4.098e-02, -1.569e-01, 3.871e-02, 1.962e-02, -1.005e-01, -5.360e-02, 5.993e-02, -6.681e-03, -6.750e-02, 3.160e-02, -6.498e-02, -4.712e-03)); + r += mul(s5_0, M4(-3.447e-02, 4.048e-02, -7.026e-02, 3.863e-02, 2.506e-01, 4.037e-02, 6.424e-02, -1.953e-04, -9.398e-02, -1.219e-01, 6.855e-02, -1.523e-02, 2.064e-02, 8.759e-02, 2.222e-01, 2.804e-02)); + r += mul(s5_1, M4(-6.874e-02, 3.698e-03, -1.326e-01, 8.570e-02, 8.906e-02, 9.961e-02, 1.252e-01, 1.808e-02, 1.472e-02, -2.685e-02, -6.776e-02, -4.234e-02, -8.621e-02, -5.135e-02, -8.713e-02, 6.227e-02)); + r += mul(s5_2, M4(4.261e-02, -8.624e-02, 6.382e-02, -4.885e-02, 5.271e-02, -4.970e-02, -9.689e-02, 1.942e-01, -4.576e-02, 2.971e-02, 1.246e-01, -2.061e-02, 5.367e-02, -1.649e-01, 7.247e-02, 3.256e-03)); + r += mul(s5_3, M4(2.156e-02, -3.901e-02, -7.494e-03, -3.963e-02, -9.054e-03, 9.909e-02, -1.919e-01, 4.831e-02, 1.045e-01, 6.352e-02, 2.492e-02, 9.664e-02, 1.664e-01, -9.012e-02, 1.774e-03, -6.163e-02)); + r += mul(s5_4, M4(-1.641e-02, 1.565e-02, -4.928e-02, 1.270e-01, -1.709e-01, 8.356e-02, 6.605e-02, 2.227e-02, -1.865e-01, -7.826e-02, 3.809e-02, -2.962e-01, -1.612e-01, -3.100e-01, 1.306e-01, 8.932e-02)); + r += mul(s5_5, M4(1.355e-02, 1.604e-01, 1.241e-01, -1.277e-01, 6.250e-02, 8.501e-02, 6.262e-02, -3.701e-02, -1.594e-01, -8.186e-02, 1.233e-01, -2.602e-03, -1.631e-01, 9.353e-02, 1.754e-01, 7.126e-02)); + r += mul(s5_6, M4(1.222e-02, 1.920e-02, 7.184e-02, 8.490e-03, 6.222e-02, 8.496e-02, -1.072e-01, 9.971e-02, 4.149e-02, -1.792e-02, -4.386e-02, -2.008e-02, 9.563e-02, 1.153e-01, -3.472e-02, 6.497e-02)); + r += mul(s5_7, M4(-9.252e-02, 6.629e-02, 1.663e-02, -6.928e-04, -1.021e-02, -5.721e-02, 7.302e-02, 4.191e-02, 1.064e-01, 2.094e-01, -2.151e-02, -5.463e-02, 1.049e-01, -1.570e-02, -1.413e-01, 1.091e-01)); + r += mul(s5_8, M4(7.991e-02, -9.547e-02, -1.492e-02, -1.840e-02, 9.470e-02, -5.424e-02, -2.479e-01, 3.591e-02, 2.842e-02, -3.262e-02, -6.494e-02, 3.304e-02, -3.533e-02, 1.561e-01, 3.110e-02, 1.859e-01)); + r += mul(s6_0, M4(5.014e-02, -2.430e-02, 2.443e-02, 2.271e-02, -2.687e-02, 1.429e-02, -1.183e-01, -3.453e-02, 3.654e-02, 4.275e-02, -1.271e-02, 5.869e-02, -7.522e-02, 3.797e-02, -1.045e-01, 2.725e-02)); + r += mul(s6_1, M4(-4.329e-02, -6.575e-02, 1.149e-01, -1.062e-01, -3.751e-02, -1.205e-01, -2.328e-02, 9.374e-02, 1.240e-01, -3.746e-03, -2.983e-02, 1.182e-01, -4.737e-02, 1.003e-01, -1.484e-02, 2.041e-02)); + r += mul(s6_2, M4(-3.182e-02, 9.013e-02, -7.199e-02, 3.300e-03, 5.126e-02, 1.061e-01, 9.584e-02, 4.920e-02, -3.661e-02, 3.929e-03, 4.104e-02, 2.964e-02, -6.641e-02, -1.847e-01, 1.518e-01, 4.210e-02)); + r += mul(s6_3, M4(3.809e-02, 1.141e-01, -2.025e-02, -3.144e-02, 7.610e-02, 6.201e-02, -1.278e-01, -2.698e-02, 2.440e-02, -3.292e-02, -1.224e-01, -1.537e-01, -8.080e-02, 4.410e-02, -6.527e-02, -8.392e-02)); + r += mul(s6_4, M4(-7.064e-02, 7.199e-02, -5.253e-02, 3.848e-02, 8.472e-03, -2.751e-02, 2.397e-01, -4.625e-02, -1.421e-01, 4.324e-02, -1.043e-01, -1.444e-02, -1.365e-02, -6.261e-02, -2.455e-02, -1.644e-02)); + r += mul(s6_5, M4(-1.123e-01, -4.866e-02, 7.136e-02, -7.598e-02, 1.370e-01, -5.719e-02, 1.156e-01, 8.149e-02, 1.303e-02, 5.168e-02, 5.268e-02, -8.777e-02, 1.678e-01, -4.289e-02, 4.631e-02, 1.116e-02)); + r += mul(s6_6, M4(7.914e-02, -6.892e-02, 3.845e-02, 9.035e-02, 1.392e-02, 6.559e-02, 9.920e-02, -6.218e-02, 7.775e-03, 4.465e-02, 2.889e-02, -2.887e-02, 2.551e-02, 7.788e-02, 1.951e-02, 8.912e-02)); + r += mul(s6_7, M4(6.857e-03, 8.237e-02, -6.959e-02, 3.558e-02, 1.657e-02, 1.091e-01, -1.466e-02, -8.162e-02, 5.031e-02, 2.547e-02, 2.146e-02, -5.752e-02, -2.984e-02, -1.845e-02, 1.276e-01, -3.895e-02)); + r += mul(s6_8, M4(-5.837e-02, -1.167e-01, 3.480e-02, 2.391e-01, 1.824e-01, -6.553e-02, 1.919e-02, -1.807e-02, -7.098e-02, -6.898e-02, 7.033e-02, -3.931e-02, 7.109e-02, -2.093e-02, -2.907e-02, -6.944e-02)); + r += mul(s7_0, M4(7.593e-02, -7.650e-02, 1.065e-01, -2.098e-02, 1.823e-03, 5.921e-02, -1.012e-02, -1.369e-02, 7.431e-02, 4.661e-02, -1.114e-01, -1.332e-01, -6.575e-02, -1.473e-01, -4.117e-02, 6.442e-02)); + r += mul(s7_1, M4(1.165e-03, 1.285e-01, 1.139e-01, 1.315e-02, 2.552e-02, 2.377e-01, -1.837e-01, -1.272e-01, -7.107e-02, -1.150e-01, 3.009e-02, -1.161e-02, -2.478e-03, -3.300e-01, 1.672e-02, -3.657e-02)); + r += mul(s7_2, M4(-3.815e-02, 1.411e-01, -1.306e-02, -1.292e-02, -5.307e-02, 3.563e-02, -1.113e-01, -2.583e-01, 3.122e-02, 2.440e-01, -2.140e-01, 1.636e-01, 2.650e-01, 9.879e-02, -6.845e-02, 4.067e-02)); + r += mul(s7_3, M4(-2.669e-01, -1.675e-01, -2.877e-01, 3.672e-02, 5.038e-01, -1.527e-01, 3.725e-01, -1.534e-01, 1.373e-01, 6.892e-03, 4.146e-02, -2.252e-01, -6.766e-03, 1.246e-01, -9.469e-02, 2.020e-02)); + r += mul(s7_4, M4(6.280e-02, 6.246e-03, -9.958e-02, 1.966e-01, 2.409e-01, -1.813e-02, 9.328e-02, -1.684e-01, -1.074e-01, -1.992e-01, -1.228e-01, 3.328e-01, 9.693e-02, 9.424e-02, 1.686e-01, -1.767e-01)); + r += mul(s7_5, M4(-6.688e-03, -3.218e-03, 9.671e-02, 1.111e-01, -1.830e-01, 1.717e-03, -3.666e-01, -7.725e-02, 1.551e-01, 4.519e-01, -1.876e-01, -2.229e-02, 6.402e-02, -1.702e-01, -6.232e-02, 6.659e-02)); + r += mul(s7_6, M4(-2.714e-02, -1.401e-01, 5.579e-02, -6.691e-02, -6.501e-01, 2.028e-02, -1.425e-01, -6.160e-02, 1.076e-01, -1.367e-02, 1.475e-01, 1.244e-01, 1.753e-01, -3.359e-02, -1.749e-01, 1.348e-02)); + r += mul(s7_7, M4(-4.595e-02, 6.315e-02, -1.296e-01, -3.428e-02, -4.203e-01, -2.758e-01, -2.281e-01, 4.241e-02, -3.090e-02, -1.202e-01, -4.892e-02, -1.343e-01, -1.120e-01, 8.944e-03, 3.657e-02, -2.739e-01)); + r += mul(s7_8, M4(-5.656e-02, -1.289e-01, -1.099e-01, 2.239e-02, -3.330e-01, 2.338e-01, 1.498e-01, -2.335e-02, 5.013e-02, 1.028e-01, 6.603e-03, 2.763e-01, 1.396e-01, -1.444e-02, -8.525e-02, 1.582e-01)); + r += V4(2.102e-02, -1.610e-02, -6.966e-03, 4.475e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-5.645e-02, 1.760e-01, -4.482e-02, 2.093e-01, 1.129e-01, -9.060e-02, -6.022e-02, -2.884e-02, -8.049e-02, -5.757e-02, -1.983e-01, -5.161e-02, 6.733e-02, -1.468e-02, 1.825e-02, -7.886e-02)); + r += mul(s0_1, M4(7.052e-02, 8.472e-02, 1.384e-02, -3.865e-01, 2.505e-02, 1.044e-01, -1.588e-01, -8.696e-02, -1.147e-01, -4.587e-02, 2.314e-01, -7.237e-02, -1.459e-01, 2.923e-02, -2.466e-02, 5.795e-05)); + r += mul(s0_2, M4(-1.017e-01, -3.273e-01, -5.922e-02, -3.409e-02, 5.008e-03, -8.695e-02, 1.576e-02, -4.982e-02, -1.707e-01, -1.656e-01, -6.513e-02, 1.027e-01, -5.422e-02, 3.751e-02, 1.630e-03, 1.394e-02)); + r += mul(s0_3, M4(1.792e-01, 1.756e-01, -1.590e-01, -2.943e-01, -4.548e-02, 1.650e-02, -3.772e-02, 3.859e-02, -9.898e-03, -1.431e-01, 8.251e-03, -2.807e-02, 4.794e-02, 1.859e-02, 4.840e-02, 7.029e-02)); + r += mul(s0_4, M4(-2.933e-02, -5.911e-02, 3.245e-02, -7.475e-02, 5.743e-03, -5.643e-02, -1.475e-01, -7.419e-02, -1.841e-01, -4.314e-01, 1.638e-01, 1.587e-01, 3.185e-03, -4.497e-02, 1.671e-01, 5.262e-02)); + r += mul(s0_5, M4(-2.551e-02, -4.022e-02, -1.042e-01, -2.873e-01, -1.970e-01, -1.287e-02, 1.395e-01, 5.512e-02, -7.468e-03, 7.047e-02, -4.884e-03, -1.041e-01, -3.473e-02, 2.868e-02, -3.518e-02, -1.248e-01)); + r += mul(s0_6, M4(-4.804e-02, -6.818e-02, 1.479e-02, -1.791e-02, 4.143e-02, 1.913e-02, -7.768e-02, -2.089e-02, 2.541e-02, 1.560e-02, -2.345e-02, -2.922e-03, 5.163e-03, 4.283e-02, 7.362e-02, 2.913e-02)); + r += mul(s0_7, M4(-2.509e-02, -7.775e-02, -1.760e-01, -3.009e-01, -6.017e-03, -3.875e-02, 5.245e-02, -4.555e-02, 2.316e-01, -1.733e-02, -1.193e-02, -1.439e-02, 9.735e-03, 6.273e-02, -4.499e-02, 1.782e-02)); + r += mul(s0_8, M4(2.454e-03, -1.476e-01, -4.833e-02, -9.239e-02, -6.864e-02, -7.381e-03, 2.536e-02, 8.117e-02, 6.630e-02, -1.927e-01, -7.264e-02, -2.849e-02, -2.473e-02, 8.794e-02, -2.888e-02, -1.933e-02)); + r += mul(s1_0, M4(-2.151e-02, -6.765e-02, -1.943e-02, -5.016e-03, 6.872e-02, 5.624e-02, -7.056e-02, 6.857e-03, -2.249e-02, -4.201e-02, -4.781e-02, 3.855e-03, 8.819e-02, 1.557e-03, 2.673e-02, 3.076e-02)); + r += mul(s1_1, M4(3.736e-02, -2.357e-03, 5.260e-02, -2.960e-03, 7.647e-02, 9.028e-02, -1.166e-01, -1.988e-02, -1.324e-01, -6.872e-02, 1.139e-01, -1.696e-02, -2.381e-01, 4.614e-02, -1.438e-01, -7.316e-02)); + r += mul(s1_2, M4(-2.225e-02, 4.959e-02, 5.656e-02, -7.849e-02, -1.077e-01, -6.801e-02, -4.746e-02, 4.597e-02, 9.405e-02, 9.797e-02, 2.597e-02, 2.789e-02, -6.002e-02, -2.756e-02, 2.752e-01, 5.346e-01)); + r += mul(s1_3, M4(5.664e-02, -5.358e-02, -1.431e-01, 9.291e-02, 7.151e-02, -1.090e-01, 2.102e-02, 5.345e-02, -3.568e-02, 8.393e-03, 2.778e-02, 1.294e-01, -4.460e-02, -7.108e-02, 9.762e-03, 9.799e-02)); + r += mul(s1_4, M4(1.030e-02, -3.228e-02, 1.313e-01, -1.178e-01, 1.518e-02, -1.056e-01, -1.023e-01, -5.527e-02, -1.071e-01, -2.202e-03, 5.149e-02, -3.783e-02, -2.376e-01, 1.557e-01, 1.959e-01, -2.218e-01)); + r += mul(s1_5, M4(-2.501e-02, -1.042e-01, -6.742e-02, 5.240e-02, -2.588e-01, -2.092e-02, 4.741e-02, 8.323e-02, 2.351e-01, -5.172e-02, 1.022e-01, 9.954e-04, 1.106e-01, 3.128e-02, -1.142e-01, 3.759e-02)); + r += mul(s1_6, M4(-1.075e-02, -1.126e-01, -4.243e-02, 1.313e-01, 1.141e-02, 4.852e-02, -4.503e-02, 2.348e-02, -6.812e-02, -7.743e-02, 5.062e-02, -2.036e-02, 1.831e-01, 1.960e-01, 2.308e-02, -7.480e-03)); + r += mul(s1_7, M4(-6.154e-02, 9.237e-03, -6.533e-02, 7.397e-02, 2.490e-02, -7.969e-02, -1.440e-01, -1.737e-02, 6.058e-02, 2.642e-03, -4.691e-03, 6.744e-02, -8.907e-02, 7.858e-02, -5.016e-02, -4.814e-02)); + r += mul(s1_8, M4(3.128e-02, -1.888e-03, -1.742e-02, 3.375e-03, -4.486e-02, 3.767e-03, -5.692e-02, 1.178e-01, 6.251e-02, 3.410e-02, 5.704e-02, 4.748e-02, -7.627e-02, 2.175e-02, 1.323e-01, -3.631e-01)); + r += mul(s2_0, M4(3.771e-02, 1.045e-01, 6.410e-03, 9.760e-03, 3.716e-02, -3.304e-02, -8.617e-02, 2.439e-02, 1.424e-02, -1.170e-01, -2.423e-02, -9.178e-02, -4.673e-02, 3.414e-02, -2.216e-02, 1.508e-01)); + r += mul(s2_1, M4(2.048e-03, -1.847e-02, -3.567e-03, -1.800e-04, -3.242e-02, 1.191e-01, 2.908e-02, 3.510e-02, 2.226e-02, -5.740e-02, 4.474e-02, -5.995e-02, 2.677e-02, -1.098e-01, -2.528e-02, -3.653e-03)); + r += mul(s2_2, M4(-4.418e-03, -2.628e-02, 4.605e-02, -5.577e-02, 3.294e-02, 1.789e-01, 7.153e-02, -9.860e-02, -7.700e-03, -9.162e-02, -2.459e-02, -6.051e-04, 3.210e-02, -3.459e-02, -2.111e-02, -9.662e-02)); + r += mul(s2_3, M4(2.483e-02, 1.000e-01, 6.958e-02, 2.590e-01, 2.166e-02, 9.445e-02, -8.639e-03, -1.070e-02, 2.945e-02, 4.727e-02, -3.682e-03, -8.194e-03, -4.184e-02, -5.784e-02, 5.353e-02, 1.007e-01)); + r += mul(s2_4, M4(-1.562e-01, 6.380e-02, 1.257e-01, -1.055e-01, -8.258e-02, -1.609e-01, -3.429e-03, 2.550e-02, -3.265e-03, 7.062e-02, -2.790e-02, 1.797e-01, -8.019e-02, -8.687e-02, -7.915e-03, 3.030e-02)); + r += mul(s2_5, M4(-4.810e-02, -1.096e-01, -4.925e-02, 6.176e-02, 1.106e-01, 1.277e-01, -1.977e-02, -4.168e-02, -9.010e-02, -7.625e-02, -9.524e-03, 1.310e-01, 1.174e-01, 8.116e-02, 4.536e-02, -5.032e-03)); + r += mul(s2_6, M4(7.342e-03, 1.611e-02, 1.339e-01, 6.824e-02, 1.531e-02, 4.872e-02, -4.677e-02, -3.873e-02, -3.992e-02, 8.549e-02, 1.598e-02, 1.353e-01, 1.762e-02, 9.697e-02, -3.610e-04, -2.265e-03)); + r += mul(s2_7, M4(-2.853e-02, -2.286e-03, -1.737e-02, 4.676e-02, -9.264e-02, -9.177e-02, 3.828e-02, -1.984e-02, -1.343e-01, -5.662e-02, 5.047e-02, 1.247e-01, 6.769e-02, -1.240e-02, -2.290e-02, -1.459e-01)); + r += mul(s2_8, M4(-1.718e-02, 1.173e-01, 1.001e-01, 1.079e-02, 7.155e-03, -9.204e-02, 3.039e-03, 7.101e-02, 8.618e-02, -3.785e-02, 1.479e-02, -1.640e-02, -4.611e-02, 7.484e-02, -5.833e-02, 1.304e-03)); + r += mul(s3_0, M4(8.277e-03, -1.595e-01, -4.103e-02, -8.835e-02, -9.754e-03, -3.271e-02, -9.689e-02, 1.119e-01, -7.056e-02, -4.024e-02, -4.186e-02, -1.469e-01, -1.754e-01, 5.795e-02, 1.035e-01, 2.900e-01)); + r += mul(s3_1, M4(-1.051e-01, -4.836e-02, -1.145e-02, -1.092e-01, -2.848e-02, 1.265e-01, -7.104e-02, -1.082e-01, 3.574e-03, 2.179e-01, -6.690e-02, -2.639e-01, 9.831e-02, 2.523e-01, 1.111e-01, 3.380e-01)); + r += mul(s3_2, M4(9.900e-03, 2.524e-01, 4.151e-02, -5.739e-02, -7.735e-02, 1.450e-01, -3.100e-02, -1.813e-03, -6.282e-02, 8.410e-02, -6.079e-02, 9.522e-02, -6.133e-02, 1.919e-02, 3.723e-02, 1.790e-01)); + r += mul(s3_3, M4(1.168e-02, -5.182e-02, -8.629e-03, -4.605e-03, -7.267e-02, 7.730e-02, 4.429e-02, 1.348e-01, 1.449e-01, 4.781e-02, 1.475e-01, -3.369e-01, 8.468e-02, -1.454e-01, -1.867e-02, 3.212e-01)); + r += mul(s3_4, M4(-2.233e-01, -1.076e-01, -4.514e-03, -6.716e-02, 8.486e-02, 7.153e-02, -1.538e-01, -1.175e-01, -2.348e-01, 1.887e-01, 1.131e-01, 1.229e-01, -1.095e-01, 1.724e-01, -6.757e-02, 2.029e-01)); + r += mul(s3_5, M4(-9.904e-03, -2.001e-01, -5.167e-02, 4.948e-02, 2.417e-02, 2.727e-02, -3.665e-02, -1.685e-01, -1.830e-01, -5.641e-02, -1.353e-01, -3.405e-01, 1.950e-01, -1.364e-03, 6.277e-02, 1.835e-01)); + r += mul(s3_6, M4(-4.270e-02, 7.641e-02, -1.429e-02, -2.781e-03, -6.238e-02, -1.662e-01, -6.906e-02, 1.265e-01, -9.252e-02, 9.206e-02, -1.256e-01, -1.363e-01, -7.575e-02, -5.237e-02, -9.822e-02, -2.591e-01)); + r += mul(s3_7, M4(-7.352e-02, 8.076e-02, -2.072e-02, 1.302e-02, -2.489e-02, 1.206e-01, -1.319e-03, -3.979e-02, -1.784e-01, 8.184e-03, 2.105e-01, -1.754e-01, 3.353e-01, 2.512e-01, 2.847e-01, -2.669e-01)); + r += mul(s3_8, M4(8.568e-02, 7.808e-02, 4.092e-02, -6.036e-02, 6.053e-02, -2.914e-02, 1.325e-02, 1.042e-02, 1.044e-02, -2.953e-02, 2.030e-02, 1.755e-01, 1.239e-01, -2.500e-01, -7.221e-02, -2.046e-01)); + r += mul(s4_0, M4(5.722e-03, 4.126e-01, -3.613e-01, -2.603e-01, 2.610e-02, 1.147e-01, 1.529e-01, 1.412e-02, -1.607e-02, 9.082e-02, -5.023e-02, 7.084e-02, -1.773e-02, -5.129e-02, 4.868e-02, -8.452e-02)); + r += mul(s4_1, M4(-2.186e-01, 2.312e-01, -2.029e-01, -4.311e-01, -8.737e-02, 7.129e-02, -7.808e-02, -7.963e-02, 7.678e-02, -2.709e-02, 1.466e-01, 3.389e-02, 1.023e-01, 7.007e-02, -8.046e-02, 1.188e-01)); + r += mul(s4_2, M4(-3.997e-02, 3.701e-01, 2.073e-02, -4.759e-01, -5.424e-02, -9.812e-02, -1.673e-02, 1.760e-01, 2.068e-01, -1.508e-02, 9.934e-02, 1.182e-01, 7.226e-02, -2.287e-02, 5.119e-02, -4.459e-02)); + r += mul(s4_3, M4(-7.220e-02, -4.560e-02, 1.812e-01, 1.372e-01, -8.377e-02, 3.883e-02, -1.174e-01, -1.298e-01, 4.797e-02, 6.047e-02, -2.871e-02, 1.695e-01, 7.600e-02, -1.931e-02, -6.410e-02, -1.546e-01)); + r += mul(s4_4, M4(2.055e-01, -1.577e-01, 3.279e-01, -3.042e-02, 2.706e-02, -6.100e-02, -5.843e-02, 8.075e-03, 9.117e-02, 1.521e-01, -9.218e-02, -7.202e-02, -4.476e-02, -2.888e-02, 9.001e-03, 6.872e-02)); + r += mul(s4_5, M4(1.248e-01, 9.935e-02, 8.897e-03, 1.009e-02, 8.706e-02, -1.506e-01, -3.442e-02, -1.989e-01, -8.537e-02, 3.353e-02, 6.227e-02, -5.717e-02, -1.937e-02, -5.030e-02, 9.230e-02, 6.029e-02)); + r += mul(s4_6, M4(-2.969e-02, -1.519e-01, -5.197e-02, -2.457e-02, -3.014e-02, -3.094e-02, 3.255e-02, 1.176e-01, -2.714e-02, 6.242e-02, 2.370e-02, -3.218e-02, -6.600e-02, 1.331e-01, -4.780e-02, -8.487e-03)); + r += mul(s4_7, M4(-4.082e-02, 9.736e-03, -2.189e-01, -1.217e-01, -5.973e-02, 1.184e-01, 3.667e-02, 3.074e-03, -8.391e-03, -1.761e-01, 2.629e-02, -3.397e-02, 1.468e-01, -1.218e-01, -1.082e-02, 4.480e-02)); + r += mul(s4_8, M4(1.307e-01, -6.793e-02, 1.310e-01, 3.999e-02, 1.941e-02, -6.439e-02, 7.756e-02, -1.308e-01, -9.374e-02, 5.016e-02, 5.744e-02, 8.592e-02, -1.647e-02, -1.153e-02, 1.120e-02, 7.166e-02)); + r += mul(s5_0, M4(3.852e-02, -1.823e-02, -4.158e-03, -1.270e-01, 4.434e-02, 1.546e-01, -4.803e-02, -4.949e-02, -3.976e-03, 1.076e-01, 6.032e-02, -7.862e-02, -1.530e-01, 1.587e-01, -8.905e-02, -1.204e-01)); + r += mul(s5_1, M4(4.364e-02, 2.356e-02, 5.051e-02, 1.375e-01, -1.265e-01, -1.293e-01, -1.487e-01, 5.630e-02, -1.801e-01, -1.593e-02, 5.268e-02, 1.203e-02, 1.246e-01, 1.258e-01, -1.507e-02, 1.034e-01)); + r += mul(s5_2, M4(-4.812e-02, -2.163e-01, 1.380e-02, 3.241e-02, -8.598e-02, -2.474e-02, -3.968e-02, 3.293e-03, 8.254e-04, 1.689e-01, -6.810e-02, 6.548e-02, -5.382e-02, -4.789e-02, 3.962e-02, 1.161e-01)); + r += mul(s5_3, M4(-4.842e-02, 1.189e-01, -9.461e-03, -5.474e-03, -6.373e-02, -2.870e-02, -3.926e-02, 9.671e-02, 5.846e-02, -5.715e-02, 1.402e-01, -6.238e-02, 6.486e-02, 2.418e-01, -2.878e-02, -1.650e-02)); + r += mul(s5_4, M4(2.068e-01, 1.636e-02, 2.102e-02, -4.572e-02, -2.002e-01, -3.611e-02, -4.307e-02, 2.087e-01, -1.261e-01, -4.365e-02, -7.447e-02, 1.491e-02, -7.207e-02, 2.605e-01, -3.666e-02, -8.913e-03)); + r += mul(s5_5, M4(-1.282e-01, 1.015e-01, -4.817e-02, 4.154e-02, -4.017e-02, -2.169e-01, -3.384e-02, 8.651e-02, 5.858e-02, -7.025e-02, -8.502e-02, -3.576e-02, -3.016e-02, 1.050e-01, -9.661e-03, 1.072e-01)); + r += mul(s5_6, M4(-6.082e-02, 5.885e-02, 6.084e-02, 8.203e-02, -7.865e-02, 1.687e-01, -1.191e-01, 2.030e-01, -5.271e-02, -1.547e-01, -8.204e-02, -9.515e-02, -1.284e-01, -9.417e-02, -1.433e-01, 3.083e-02)); + r += mul(s5_7, M4(-2.314e-03, 4.900e-02, 9.433e-04, 2.277e-02, 2.046e-01, 6.075e-03, 1.429e-01, 2.992e-02, 8.627e-02, 3.431e-02, 4.757e-04, -4.053e-02, -3.486e-02, -1.552e-01, -6.662e-02, 7.499e-02)); + r += mul(s5_8, M4(1.484e-03, -1.217e-01, -9.341e-03, 1.989e-02, -5.407e-02, 2.549e-01, 2.559e-02, -3.438e-02, -1.755e-02, 4.560e-02, -2.738e-02, 2.149e-02, -8.420e-02, 1.302e-01, -1.704e-02, 4.946e-02)); + r += mul(s6_0, M4(7.802e-03, 2.278e-02, -1.119e-01, 2.124e-02, 1.067e-01, -3.640e-03, 2.168e-02, 1.909e-01, -1.058e-03, 1.661e-02, 6.322e-02, -3.902e-02, 2.047e-02, 2.947e-02, 5.102e-03, -6.674e-02)); + r += mul(s6_1, M4(2.630e-02, 3.320e-02, 6.457e-02, -1.115e-01, -1.025e-01, -4.112e-02, -5.155e-02, -1.293e-01, 4.904e-02, -9.469e-02, -5.323e-02, 8.098e-02, 9.945e-02, -1.583e-02, -7.869e-03, -9.910e-02)); + r += mul(s6_2, M4(-7.884e-02, 5.598e-02, 1.559e-02, -1.138e-01, 1.061e-01, -4.671e-03, -7.834e-02, -2.055e-02, -5.253e-02, 9.596e-02, -4.976e-02, 2.746e-02, -8.401e-02, -9.472e-02, 4.217e-02, 8.313e-03)); + r += mul(s6_3, M4(4.689e-03, -1.601e-01, 1.074e-02, -8.228e-02, -9.069e-02, -5.569e-02, -1.131e-01, 1.340e-03, 3.793e-03, 5.536e-02, 1.857e-02, 3.456e-02, -8.922e-02, 8.750e-02, -7.929e-02, -3.118e-02)); + r += mul(s6_4, M4(4.851e-02, 9.989e-02, 5.487e-02, 5.841e-02, -6.260e-02, -1.672e-01, -2.811e-02, -2.638e-02, 9.154e-02, -6.306e-02, 3.378e-02, -4.780e-02, 4.078e-02, -8.938e-02, -1.635e-02, 1.603e-01)); + r += mul(s6_5, M4(4.805e-02, 5.252e-02, -7.036e-04, 6.648e-02, -1.602e-03, 2.032e-02, 4.342e-02, 3.548e-02, 1.421e-02, -7.686e-04, -3.262e-02, -9.598e-02, -1.952e-02, -4.057e-02, 3.503e-02, -6.934e-03)); + r += mul(s6_6, M4(-1.160e-01, -7.098e-02, -8.229e-02, -8.787e-02, -6.369e-02, 1.392e-02, 1.723e-01, 2.007e-02, -1.959e-02, 1.816e-02, -5.830e-02, 4.776e-02, 5.662e-03, 1.789e-02, 8.129e-02, -3.355e-02)); + r += mul(s6_7, M4(9.544e-02, -1.052e-01, -2.019e-02, -1.729e-01, 9.737e-02, -4.807e-02, 6.829e-02, 7.081e-02, 3.774e-02, -2.293e-02, -9.601e-03, 7.726e-03, 3.375e-02, 8.823e-02, -9.270e-03, -1.209e-01)); + r += mul(s6_8, M4(-6.015e-02, -2.346e-02, 6.853e-02, 1.356e-02, 1.668e-02, -5.883e-02, 9.045e-02, 5.750e-02, 3.076e-03, 1.077e-01, 3.610e-02, -6.277e-02, 1.998e-02, 2.734e-02, -8.978e-04, -4.823e-02)); + r += mul(s7_0, M4(-5.415e-03, 1.558e-01, -4.855e-02, 8.577e-02, -2.017e-01, 1.171e-01, -4.947e-02, 2.320e-01, -6.848e-02, 1.222e-01, 4.514e-02, -3.591e-01, -3.449e-02, -1.256e-01, -9.128e-03, -3.761e-02)); + r += mul(s7_1, M4(1.660e-01, -4.438e-03, -4.232e-02, -1.495e-01, -1.065e-01, 6.896e-02, 1.516e-01, 4.233e-01, -2.965e-01, -2.973e-02, -5.702e-02, 8.025e-02, -2.896e-02, 1.296e-01, -1.252e-02, -3.026e-01)); + r += mul(s7_2, M4(-1.426e-01, -4.235e-02, 1.376e-01, 3.093e-01, 1.945e-02, 1.804e-01, -1.236e-01, -8.763e-02, 1.984e-03, 7.175e-03, -4.381e-02, 6.005e-02, 2.612e-02, 1.254e-02, -1.059e-01, 1.869e-01)); + r += mul(s7_3, M4(-1.239e-02, -5.878e-02, 1.832e-01, -4.102e-02, 3.603e-02, -2.616e-02, -7.645e-03, -2.384e-01, -4.864e-02, -6.193e-01, -5.429e-02, -2.627e-01, -5.015e-02, -1.542e-01, -1.451e-01, -2.159e-01)); + r += mul(s7_4, M4(2.153e-01, -6.111e-02, 6.958e-02, 9.047e-02, -1.440e-01, -4.363e-01, 1.504e-01, 1.883e-01, -3.967e-02, 1.885e-01, 3.403e-01, 3.098e-02, 1.393e-01, 2.831e-01, 8.139e-02, 3.354e-01)); + r += mul(s7_5, M4(-6.636e-02, 2.313e-01, 3.849e-02, -6.318e-02, 4.492e-02, -4.309e-02, 6.280e-02, -9.903e-02, -5.876e-02, -2.058e-02, -6.639e-02, -2.525e-02, -1.642e-02, 1.677e-01, 8.417e-02, 9.576e-02)); + r += mul(s7_6, M4(-1.045e-01, 1.402e-01, 1.075e-01, 5.835e-02, 2.309e-01, 1.113e-01, -3.922e-01, -2.443e-01, 1.291e-02, -2.973e-01, 1.194e-01, -1.155e-01, -3.812e-02, 6.339e-02, 3.666e-02, -1.579e-01)); + r += mul(s7_7, M4(6.005e-02, 9.900e-02, 1.610e-01, 3.982e-02, 2.791e-01, -2.210e-01, 1.709e-01, 1.389e-01, -4.963e-02, -1.665e-01, 1.041e-01, 2.385e-01, -7.028e-02, 6.870e-02, 3.517e-02, 4.387e-02)); + r += mul(s7_8, M4(-1.033e-01, -1.854e-02, 1.973e-01, 2.803e-01, -1.608e-01, -3.143e-01, -1.644e-01, 1.757e-01, -3.980e-02, -1.228e-01, -2.332e-02, -4.506e-02, -1.868e-01, -1.363e-01, 6.617e-02, -2.526e-01)); + r += V4(1.765e-02, -1.702e-03, 6.218e-02, 1.051e-01); + return r; +} + +void Pass6(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 7 +//!DESC conv6 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.718e-02, -8.202e-02, -1.018e-02, -2.853e-02, 1.284e-01, 1.685e-01, 1.379e-01, -3.260e-02, 9.981e-02, -8.816e-02, -2.800e-02, -1.010e-01, 8.629e-02, 7.835e-02, -1.294e-01, 1.779e-01)); + r += mul(s0_1, M4(-6.872e-02, 1.356e-01, -9.820e-02, -9.784e-02, -1.085e-01, 3.509e-01, -5.979e-02, -8.904e-02, -2.321e-02, -1.062e-01, -5.778e-02, -4.640e-02, 9.204e-02, 1.361e-01, -1.740e-02, -1.916e-02)); + r += mul(s0_2, M4(-1.198e-02, -6.108e-02, -1.008e-01, 3.357e-02, -6.802e-02, -5.062e-02, 6.497e-02, 1.741e-01, -1.439e-02, -5.226e-02, -5.589e-02, -7.474e-02, -2.590e-02, 3.298e-01, 8.048e-02, 1.229e-01)); + r += mul(s0_3, M4(1.860e-02, 9.250e-02, 3.856e-02, -1.645e-02, -7.198e-02, -1.508e-01, -3.717e-01, 4.135e-02, 6.046e-02, -2.484e-02, -9.424e-02, -3.065e-02, 9.249e-02, 1.777e-01, -6.886e-02, 8.792e-02)); + r += mul(s0_4, M4(5.500e-02, 3.055e-03, 6.353e-02, -6.284e-02, 2.039e-01, 3.722e-01, 1.045e-01, 2.724e-01, -1.155e-02, -8.339e-02, -1.767e-01, -1.583e-02, 1.247e-02, -2.453e-01, 5.363e-02, -4.501e-02)); + r += mul(s0_5, M4(1.291e-02, -5.202e-02, 2.248e-02, -8.016e-02, -5.284e-04, 2.620e-02, -2.476e-01, -9.224e-02, 1.196e-02, 2.394e-02, 1.143e-01, -4.312e-02, 2.713e-01, -3.638e-02, 8.680e-02, -2.256e-02)); + r += mul(s0_6, M4(6.454e-03, -8.439e-02, -9.302e-02, 3.210e-02, -6.510e-02, 1.940e-01, 2.069e-01, 1.123e-01, -4.923e-02, 4.551e-02, -2.022e-02, 3.547e-02, -6.468e-02, 5.224e-02, -1.750e-01, 1.339e-03)); + r += mul(s0_7, M4(5.755e-02, 2.565e-02, 4.476e-02, -9.249e-02, -1.089e-01, -2.646e-01, 1.455e-01, -2.039e-01, -1.507e-02, -1.947e-02, -1.136e-01, 9.155e-02, 1.617e-02, -5.956e-02, 9.479e-02, -1.012e-01)); + r += mul(s0_8, M4(-3.537e-02, 2.375e-03, 6.685e-02, 6.837e-02, -7.694e-02, 5.882e-02, 6.707e-02, -4.092e-02, 4.985e-02, -1.126e-02, -3.756e-02, -5.434e-02, -7.286e-02, -4.212e-02, 2.833e-01, 1.419e-01)); + r += mul(s1_0, M4(4.212e-02, 6.631e-02, 4.420e-01, -3.196e-01, 6.577e-02, -8.087e-02, -1.349e-01, 5.879e-02, -6.299e-02, -2.390e-01, 5.608e-01, 7.497e-02, -3.163e-02, -1.203e-01, 9.255e-03, 2.999e-02)); + r += mul(s1_1, M4(1.229e-02, 4.567e-01, 2.967e-02, -7.213e-02, -7.642e-02, 8.068e-02, -8.311e-02, -4.991e-02, -7.415e-02, 7.353e-03, 2.453e-01, -9.773e-03, -1.255e-03, -5.383e-02, -1.016e-03, 3.668e-02)); + r += mul(s1_2, M4(-6.950e-02, -9.309e-02, 1.066e-01, 1.217e-01, -4.669e-03, -4.614e-02, -5.387e-03, -1.617e-02, -4.648e-02, -4.560e-02, 5.137e-01, 1.159e-01, -4.001e-02, 6.399e-02, -3.983e-02, 2.518e-02)); + r += mul(s1_3, M4(1.325e-01, 3.240e-03, 4.325e-02, -5.013e-02, -8.231e-02, 3.874e-03, -6.022e-02, -4.994e-04, 1.329e-01, 4.500e-03, 3.934e-01, 2.831e-01, 4.297e-02, -8.310e-02, -4.804e-02, -2.274e-02)); + r += mul(s1_4, M4(-2.826e-02, -4.163e-01, -4.497e-02, -1.253e-01, 2.032e-01, 1.174e-01, 7.893e-02, -6.914e-02, 1.188e-01, 2.026e-02, 5.545e-01, 1.031e-01, -1.924e-02, -1.102e-01, 7.859e-02, -1.259e-01)); + r += mul(s1_5, M4(-9.041e-03, -1.733e-02, -1.069e-01, -1.481e-01, -2.549e-02, -8.548e-02, 3.717e-02, -3.341e-02, 6.513e-02, -8.513e-02, 3.118e-01, 2.700e-01, -5.118e-02, -6.066e-02, -3.830e-02, -5.354e-02)); + r += mul(s1_6, M4(-1.181e-01, -2.687e-01, 3.724e-01, -1.612e-02, -4.786e-02, 2.826e-02, -9.258e-03, -2.180e-02, 6.983e-02, 6.348e-02, 3.651e-01, 2.823e-01, -1.002e-03, -2.888e-02, -8.482e-02, 1.129e-02)); + r += mul(s1_7, M4(1.131e-01, 3.864e-01, 1.880e-01, 2.186e-02, -6.575e-02, -2.174e-02, -6.042e-02, -1.454e-01, 1.182e-01, 8.384e-02, 8.385e-02, 8.963e-02, 6.162e-02, 3.319e-02, 5.248e-02, 6.789e-02)); + r += mul(s1_8, M4(-1.187e-01, -4.859e-02, 3.125e-01, 1.742e-01, -2.494e-02, 1.867e-02, -6.954e-03, 1.037e-01, 5.038e-02, 1.575e-01, 4.483e-01, 8.318e-02, 4.146e-02, 1.397e-02, -2.191e-02, 1.146e-01)); + r += mul(s2_0, M4(8.602e-02, 8.163e-02, -7.871e-02, 4.891e-02, 7.932e-02, -9.431e-03, 3.915e-02, -8.264e-02, 1.969e-02, -2.648e-02, -3.828e-02, 3.037e-01, 2.901e-02, -1.117e-02, 7.057e-02, -1.210e-01)); + r += mul(s2_1, M4(6.053e-02, 8.860e-02, -1.744e-02, -1.174e-01, -2.299e-02, 5.356e-02, -6.969e-02, 3.081e-02, 1.885e-01, -1.362e-01, 9.485e-02, -2.466e-02, -1.316e-02, -3.166e-02, 1.153e-01, -4.538e-02)); + r += mul(s2_2, M4(-1.955e-03, -1.712e-02, -4.516e-02, 4.962e-03, -3.919e-03, -5.276e-02, 8.403e-02, -1.010e-01, -2.613e-02, 1.075e-02, -1.780e-02, 9.267e-02, -3.857e-02, 3.668e-02, 8.124e-03, 9.187e-02)); + r += mul(s2_3, M4(-9.735e-02, -2.692e-02, -1.537e-03, 6.853e-03, 1.831e-02, -6.472e-02, -4.426e-02, -7.690e-02, -5.143e-02, 1.587e-01, -1.201e-01, 2.530e-01, -4.265e-02, 6.130e-02, -1.247e-01, 5.403e-02)); + r += mul(s2_4, M4(-2.698e-01, -1.373e-01, -1.386e-01, -1.182e-01, 1.186e-01, 4.950e-02, 3.269e-02, 1.779e-02, -5.773e-04, -9.727e-02, -1.069e-02, -7.154e-02, -1.154e-02, -1.324e-01, 2.176e-01, 1.509e-01)); + r += mul(s2_5, M4(-7.123e-02, -9.497e-02, 2.233e-02, -4.065e-02, -4.490e-02, -2.027e-03, -6.521e-02, -1.152e-01, 3.282e-02, -1.918e-01, -1.698e-01, -9.488e-02, -5.553e-03, -4.072e-02, -2.341e-02, -4.568e-02)); + r += mul(s2_6, M4(1.462e-02, 1.500e-02, 1.312e-01, -2.640e-02, 6.620e-03, 6.753e-02, -5.512e-02, -6.508e-03, 3.561e-02, 1.192e-01, -2.606e-01, 2.067e-02, 9.152e-03, -1.196e-01, -8.657e-02, 2.429e-03)); + r += mul(s2_7, M4(-1.313e-01, 4.200e-02, 4.716e-03, -1.465e-02, -4.390e-02, 3.163e-02, -1.107e-01, 1.014e-01, 6.789e-02, 4.372e-02, 2.473e-02, -1.312e-01, 5.449e-02, 1.128e-01, -1.488e-01, 3.109e-02)); + r += mul(s2_8, M4(4.463e-02, 2.577e-02, -1.173e-01, 3.381e-02, 5.763e-02, 1.161e-01, 8.468e-03, -5.726e-02, -2.768e-03, 2.967e-01, 8.417e-02, 2.286e-01, -8.721e-02, -4.468e-02, 5.642e-02, 3.393e-02)); + r += mul(s3_0, M4(2.785e-02, -1.059e-02, -6.081e-02, 6.824e-02, 1.019e-01, -4.879e-02, 2.369e-01, 5.359e-02, -1.638e-02, 7.854e-02, -4.884e-03, 2.060e-02, -4.335e-02, -1.612e-01, -1.381e-01, 1.541e-02)); + r += mul(s3_1, M4(5.914e-03, 1.536e-01, -7.971e-02, -5.216e-02, -1.790e-01, -6.957e-02, 2.063e-01, 1.364e-01, -4.783e-02, -5.646e-02, 3.047e-02, 1.945e-01, -1.324e-02, -1.024e-01, 3.042e-02, -1.551e-01)); + r += mul(s3_2, M4(-6.871e-03, -8.348e-03, -1.394e-01, -1.532e-01, -1.077e-02, 1.294e-02, 9.267e-02, 1.262e-02, -2.229e-02, 3.517e-02, -3.753e-02, 3.624e-02, -5.983e-02, 1.235e-01, -2.210e-01, 2.210e-01)); + r += mul(s3_3, M4(-9.624e-02, 6.428e-02, -1.367e-01, -1.417e-01, -6.039e-02, -1.822e-01, 9.018e-02, -3.782e-02, 1.110e-02, 8.363e-02, -6.648e-02, 6.115e-02, -6.637e-02, 1.008e-01, -6.245e-02, -1.080e-01)); + r += mul(s3_4, M4(-2.911e-01, -2.933e-02, -1.957e-01, -1.033e-01, -1.247e-01, -2.825e-02, 1.588e-01, -3.565e-02, -6.421e-02, 1.126e-01, -3.898e-02, -2.168e-01, -9.752e-02, 5.089e-02, -6.571e-02, -1.463e-01)); + r += mul(s3_5, M4(-1.158e-02, -1.587e-01, -7.006e-02, -1.320e-01, -1.008e-01, 8.504e-02, -1.884e-01, 1.124e-01, -1.977e-02, -2.067e-02, 2.861e-02, 9.328e-02, -9.525e-03, 1.805e-02, 7.065e-02, 9.978e-02)); + r += mul(s3_6, M4(6.207e-02, -4.493e-02, -3.972e-02, 2.911e-02, -6.406e-02, 1.111e-01, 2.152e-01, 7.599e-02, 6.640e-02, 3.720e-02, 1.199e-02, -3.906e-02, -3.175e-02, -9.507e-02, 1.091e-02, 1.388e-01)); + r += mul(s3_7, M4(-4.887e-02, 2.451e-01, 7.242e-02, -3.975e-02, -1.296e-02, -4.460e-03, 1.782e-01, 5.657e-02, 7.401e-02, -1.326e-01, 1.258e-01, -1.073e-01, 6.708e-02, 1.576e-02, -1.016e-01, -5.122e-02)); + r += mul(s3_8, M4(5.060e-02, 1.423e-01, 3.109e-02, 5.104e-02, -5.501e-03, 1.125e-01, 2.659e-01, 1.597e-01, -5.607e-02, -7.684e-02, 7.059e-02, 7.088e-03, -7.478e-02, -1.325e-01, -5.496e-02, -9.790e-02)); + r += mul(s4_0, M4(1.218e-02, 2.151e-02, 1.673e-01, 1.103e-02, -7.209e-02, 4.635e-02, -4.470e-02, -5.261e-02, -4.842e-02, -7.382e-02, -1.920e-02, 6.781e-02, 2.520e-02, 2.799e-03, -9.569e-03, 6.933e-02)); + r += mul(s4_1, M4(1.102e-02, -1.021e-01, -2.305e-03, -8.108e-02, 5.554e-02, 2.008e-01, 3.914e-01, 1.719e-01, 1.665e-01, 7.755e-02, 4.383e-02, -2.259e-01, 1.642e-02, 2.737e-02, -3.519e-02, -3.913e-02)); + r += mul(s4_2, M4(3.852e-02, 1.418e-01, -3.151e-02, -8.120e-02, 4.332e-02, 3.105e-02, 1.170e-02, -3.059e-01, -6.805e-02, 8.697e-02, 1.121e-01, 7.421e-02, -7.301e-02, -7.581e-02, -2.036e-02, 2.260e-02)); + r += mul(s4_3, M4(-1.036e-02, -8.341e-03, 5.250e-02, 7.334e-02, -1.674e-01, 9.886e-02, 1.259e-01, 9.253e-02, 4.640e-02, -1.901e-01, 1.677e-03, -2.622e-01, 4.458e-02, 1.031e-01, -3.293e-02, -8.904e-02)); + r += mul(s4_4, M4(1.049e-01, 2.466e-01, 7.060e-02, -1.470e-01, 2.054e-01, -1.733e-01, 2.426e-01, 2.133e-01, 5.399e-02, -1.486e-01, 3.915e-01, 1.883e-02, 5.895e-02, 6.983e-02, -3.277e-02, 1.228e-02)); + r += mul(s4_5, M4(-1.389e-02, 5.923e-02, -3.840e-02, -8.082e-02, 4.228e-02, -7.239e-02, 8.146e-02, -1.037e-01, 2.015e-03, -2.648e-01, 8.532e-04, -1.395e-01, -1.921e-02, -1.184e-01, 2.170e-01, -1.655e-01)); + r += mul(s4_6, M4(2.296e-02, -1.426e-01, -6.419e-02, 1.021e-01, -1.598e-01, 1.131e-02, -9.871e-02, -1.011e-01, -1.533e-02, 3.144e-02, 1.096e-01, -4.111e-02, -2.128e-02, 7.637e-02, 7.540e-03, 7.346e-03)); + r += mul(s4_7, M4(-5.896e-02, 3.258e-03, -2.254e-02, -2.093e-02, 7.896e-02, 2.150e-01, -3.433e-02, 2.686e-01, -1.904e-01, -1.228e-01, -7.412e-02, 7.048e-02, -4.302e-02, -4.938e-02, -5.080e-03, 1.112e-01)); + r += mul(s4_8, M4(4.020e-04, 3.734e-02, 4.885e-02, 1.918e-02, -3.440e-02, 2.046e-01, 1.329e-01, 1.036e-01, 8.043e-02, 1.284e-01, -1.279e-01, 3.412e-02, -3.367e-02, -5.869e-03, 1.358e-01, -9.986e-02)); + r += mul(s5_0, M4(1.178e-01, -7.698e-02, 3.072e-01, -1.409e-01, 3.090e-02, -2.276e-02, -1.763e-02, 7.773e-03, -2.988e-02, 1.588e-02, -2.691e-02, 1.206e-02, -9.949e-03, -7.274e-02, 4.947e-03, -1.092e-01)); + r += mul(s5_1, M4(6.552e-02, -1.524e-01, -3.714e-02, 2.934e-02, 2.685e-02, -6.010e-02, 7.930e-02, -4.521e-02, 2.721e-02, -4.498e-02, -1.168e-01, 4.876e-02, 9.393e-02, -1.674e-01, -8.292e-02, -1.659e-01)); + r += mul(s5_2, M4(1.975e-02, -1.259e-01, 5.959e-02, 1.999e-01, -1.048e-02, 4.878e-03, -1.101e-01, -2.550e-02, -1.032e-01, 1.116e-01, -1.352e-02, -2.595e-02, -8.715e-02, 2.644e-02, 4.559e-02, 9.168e-02)); + r += mul(s5_3, M4(-6.535e-03, -9.066e-02, -6.275e-02, -6.391e-02, -6.771e-02, 8.922e-03, -6.393e-02, 2.557e-02, 4.329e-02, -1.172e-02, -5.372e-02, -6.124e-02, -2.495e-02, 8.173e-02, 7.240e-02, 4.019e-04)); + r += mul(s5_4, M4(6.099e-02, -2.626e-03, 3.458e-02, -1.937e-01, -2.284e-02, 6.375e-02, 1.479e-01, 2.374e-02, 9.197e-02, -5.561e-02, 9.715e-02, 3.070e-02, -5.534e-02, -3.551e-03, -1.670e-01, 7.501e-02)); + r += mul(s5_5, M4(-1.184e-01, -5.429e-02, 6.619e-02, 2.845e-01, -6.615e-02, 9.224e-02, -5.191e-02, -4.821e-02, -6.042e-02, 1.088e-01, -1.289e-01, -1.072e-02, -2.543e-02, 9.190e-02, 1.927e-01, 1.001e-01)); + r += mul(s5_6, M4(4.843e-02, -7.430e-03, -1.792e-01, 5.214e-02, 1.266e-01, 2.282e-02, 4.797e-02, -6.325e-02, -2.908e-02, 5.965e-02, 1.469e-01, -3.692e-02, 4.308e-02, -5.790e-02, 1.247e-01, -2.660e-02)); + r += mul(s5_7, M4(-1.313e-01, 7.601e-02, 1.268e-01, 4.791e-02, 1.074e-02, 2.548e-03, 1.374e-02, -1.269e-02, 3.504e-03, -4.516e-02, -1.498e-01, -7.743e-03, -1.316e-01, -1.548e-01, 1.141e-01, 1.482e-03)); + r += mul(s5_8, M4(-1.067e-01, 1.248e-01, 1.804e-02, 1.316e-01, 6.398e-03, -4.954e-02, -9.201e-02, 6.812e-02, -6.143e-04, 1.105e-01, 8.999e-02, 4.903e-03, 2.723e-02, -6.166e-02, -2.427e-01, 8.678e-02)); + r += mul(s6_0, M4(1.701e-02, 1.660e-02, -9.245e-02, 1.947e-02, 3.430e-02, 2.100e-01, 5.866e-02, -2.613e-02, 1.074e-01, 8.556e-02, 2.093e-02, -8.908e-02, -1.178e-02, -8.203e-03, -2.976e-02, 3.667e-02)); + r += mul(s6_1, M4(-2.865e-02, 2.035e-03, -5.570e-02, -1.005e-01, -3.399e-02, -7.246e-02, -2.762e-02, -8.589e-02, 4.469e-02, 1.807e-02, 1.042e-01, -1.067e-01, -9.374e-02, 3.830e-02, -1.881e-02, -9.340e-02)); + r += mul(s6_2, M4(1.604e-02, 1.328e-01, -3.945e-02, -1.031e-02, -6.211e-02, 7.338e-02, 1.133e-01, -9.377e-02, 4.133e-02, 9.163e-02, 1.248e-02, -1.140e-01, 2.810e-02, 1.988e-03, 9.841e-02, 1.403e-02)); + r += mul(s6_3, M4(2.370e-02, 2.131e-01, 2.920e-02, 1.542e-01, 2.497e-03, -1.257e-01, -2.220e-02, -5.964e-02, 1.046e-01, 3.582e-02, -1.902e-02, 1.093e-01, 2.618e-02, -3.815e-02, 8.446e-03, 7.871e-02)); + r += mul(s6_4, M4(-1.371e-02, 2.870e-02, 7.790e-02, 1.004e-01, 8.721e-02, -2.532e-02, -6.529e-02, 4.617e-02, 1.389e-01, -3.184e-02, -9.137e-02, 1.153e-01, -1.178e-02, -2.234e-02, 1.553e-01, 6.849e-02)); + r += mul(s6_5, M4(4.234e-02, 5.281e-02, -3.948e-02, 1.317e-01, 1.076e-02, 4.162e-02, -1.819e-02, -3.984e-02, -4.877e-03, 1.394e-01, -2.293e-02, -1.243e-01, 2.700e-02, -9.312e-02, 9.248e-02, 1.556e-02)); + r += mul(s6_6, M4(9.469e-03, 6.629e-02, -5.120e-02, -6.551e-02, 3.036e-02, 1.461e-01, -1.170e-01, -3.343e-02, -1.426e-02, -1.603e-01, -1.183e-01, 1.528e-01, 1.619e-02, -3.091e-02, -1.457e-01, 6.681e-02)); + r += mul(s6_7, M4(-9.827e-02, -7.408e-02, 2.393e-01, -5.931e-02, -2.101e-02, -7.364e-02, -1.364e-01, 2.930e-02, 1.234e-01, 4.634e-03, 6.662e-03, 3.655e-02, -2.111e-02, -9.549e-02, -1.476e-01, -1.065e-01)); + r += mul(s6_8, M4(3.380e-02, -3.526e-02, 1.144e-01, 1.057e-01, -6.014e-02, 2.498e-02, 1.163e-02, 6.108e-02, 8.905e-02, -8.968e-02, -7.837e-02, 2.151e-02, -2.935e-02, 9.576e-02, -1.446e-02, 6.072e-02)); + r += mul(s7_0, M4(-5.216e-02, -2.012e-01, -7.780e-02, -8.909e-02, 1.070e-01, 1.525e-02, -4.843e-02, -7.690e-02, 9.561e-02, 2.095e-01, -1.436e-01, 4.353e-02, -1.491e-02, 1.593e-01, 1.141e-01, -9.252e-02)); + r += mul(s7_1, M4(-6.736e-02, -2.779e-01, -1.413e-01, -1.304e-02, -1.817e-01, -1.142e-01, -9.634e-02, -1.399e-01, 8.054e-02, 5.630e-03, 9.265e-03, 6.376e-03, -8.055e-02, -8.913e-03, 7.448e-02, -2.056e-02)); + r += mul(s7_2, M4(2.290e-02, -1.795e-01, -1.198e-01, 9.352e-03, 6.372e-03, -8.201e-02, 7.925e-02, -1.324e-01, 6.270e-02, 2.219e-01, -1.141e-02, -8.024e-02, -3.504e-02, -4.899e-02, 1.160e-02, 9.325e-02)); + r += mul(s7_3, M4(2.246e-02, 1.945e-02, 1.189e-02, 2.097e-02, -1.055e-01, -1.246e-01, -5.041e-02, 8.908e-02, 2.917e-02, 2.635e-01, 4.136e-02, 1.109e-01, 1.061e-01, -1.678e-01, -4.121e-02, -4.184e-02)); + r += mul(s7_4, M4(-7.905e-02, -6.206e-02, -1.191e-01, -1.134e-01, 9.001e-02, 1.080e-01, -2.708e-02, 5.197e-02, 4.085e-02, -3.313e-01, 1.441e-01, 6.758e-02, 2.182e-02, 3.359e-02, -3.803e-02, -8.723e-03)); + r += mul(s7_5, M4(-5.527e-02, -1.682e-01, -1.711e-01, -7.321e-02, 3.732e-02, 2.499e-02, 2.254e-01, 3.600e-03, -1.251e-03, 8.911e-02, -1.379e-01, -2.739e-01, -6.599e-02, 2.278e-01, -5.999e-02, 1.317e-01)); + r += mul(s7_6, M4(-1.773e-02, 4.685e-02, 4.355e-02, -1.319e-01, -4.260e-02, 8.012e-02, -1.192e-01, -4.538e-02, -1.433e-01, -8.129e-02, 2.356e-01, 6.303e-02, -5.751e-02, 1.925e-02, 3.169e-02, 5.002e-02)); + r += mul(s7_7, M4(-1.623e-02, 1.433e-01, 4.231e-02, -5.840e-02, 2.220e-02, -1.479e-01, 3.722e-02, -6.694e-02, 5.373e-02, -1.452e-01, -3.046e-01, -2.998e-01, -8.402e-02, 9.354e-02, -2.874e-02, -5.484e-02)); + r += mul(s7_8, M4(-3.694e-02, 1.443e-01, -9.271e-02, -8.724e-02, 1.410e-02, -2.405e-02, 7.101e-02, 6.669e-02, -2.002e-04, -1.301e-01, 1.144e-01, 7.191e-02, -1.026e-01, 6.668e-02, 1.914e-01, -2.186e-01)); + r += V4(5.993e-02, 1.598e-03, 2.229e-02, -1.137e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.449e-03, -3.788e-02, -4.817e-02, -8.433e-02, 8.525e-02, -1.420e-01, 3.820e-02, -7.027e-02, 4.990e-02, 3.606e-02, 3.455e-02, 1.869e-02, 7.023e-02, 5.933e-03, -7.956e-02, -5.912e-02)); + r += mul(s0_1, M4(-4.060e-02, 9.119e-02, 1.689e-02, 4.286e-02, -6.902e-02, -1.270e-01, -2.063e-01, -8.242e-02, 3.139e-02, 6.294e-03, -1.535e-02, -5.555e-02, 1.062e-01, 7.910e-02, -6.309e-02, -1.927e-01)); + r += mul(s0_2, M4(-1.844e-02, 8.690e-02, 3.352e-02, -1.314e-01, -5.473e-02, 1.449e-01, -6.495e-02, -1.065e-01, -1.411e-03, 3.043e-02, 9.582e-02, 3.510e-02, 1.066e-01, -7.012e-02, 4.948e-02, 6.898e-02)); + r += mul(s0_3, M4(6.229e-02, -7.540e-02, -9.648e-03, 5.513e-02, -2.084e-02, 1.864e-01, -9.347e-02, 1.115e-01, -8.091e-02, 3.918e-03, -9.502e-02, -1.516e-02, -6.488e-03, -9.567e-02, 3.291e-02, 1.383e-01)); + r += mul(s0_4, M4(3.967e-02, -2.203e-02, -8.478e-03, -1.041e-01, -2.230e-01, 3.606e-01, -3.268e-02, -3.595e-02, 5.534e-02, 5.569e-03, 1.233e-01, 3.963e-02, 1.913e-01, -6.230e-02, -6.251e-02, -5.561e-02)); + r += mul(s0_5, M4(2.654e-02, -9.555e-02, -2.090e-02, -3.020e-02, -1.122e-01, 5.650e-02, 4.807e-03, -1.044e-01, 2.242e-02, 6.972e-02, -4.355e-02, 4.324e-02, 1.548e-01, -1.739e-01, 1.006e-01, 1.897e-01)); + r += mul(s0_6, M4(-8.032e-02, -4.289e-02, -6.317e-02, -2.093e-03, -2.295e-01, 1.410e-01, -1.817e-01, 4.578e-02, 5.401e-04, 8.403e-02, 1.777e-02, 4.613e-02, -4.502e-02, -2.727e-01, 4.863e-02, 1.278e-01)); + r += mul(s0_7, M4(4.624e-02, -2.305e-02, -5.084e-02, 3.623e-02, -6.729e-02, -8.742e-02, 1.357e-02, 4.308e-02, -2.050e-02, 6.628e-02, 6.019e-02, 8.383e-02, -1.248e-01, -3.781e-01, 2.430e-01, 1.500e-01)); + r += mul(s0_8, M4(-1.514e-01, 6.219e-02, -5.926e-02, 5.399e-02, -1.571e-02, -4.434e-02, -3.300e-02, 6.370e-02, -2.834e-02, -1.266e-02, 2.889e-02, 4.484e-02, 1.327e-01, -3.212e-01, 9.202e-02, -2.347e-02)); + r += mul(s1_0, M4(-1.321e-01, -1.242e-01, -2.568e-01, -3.193e-01, -3.062e-03, -9.563e-02, 5.720e-02, 1.353e-01, -5.070e-02, -1.281e-01, -1.126e-01, 3.806e-02, -3.963e-03, -1.976e-02, 5.618e-03, -2.852e-02)); + r += mul(s1_1, M4(1.399e-01, -4.819e-01, 6.187e-02, -7.253e-02, -2.269e-02, -5.228e-02, -2.679e-02, -1.321e-01, -1.060e-01, 6.649e-02, -7.585e-02, 1.077e-01, -1.755e-02, 6.109e-02, 1.470e-02, -3.969e-02)); + r += mul(s1_2, M4(-2.312e-01, 1.536e-01, -1.916e-02, -5.501e-01, 2.476e-03, 6.083e-02, -3.332e-02, -9.714e-03, -1.185e-01, -3.233e-02, -1.179e-01, 5.697e-02, 4.700e-02, 4.405e-02, 7.180e-02, -9.035e-02)); + r += mul(s1_3, M4(8.690e-02, 1.792e-01, 3.614e-01, 2.789e-01, 6.603e-02, -1.305e-01, -8.178e-02, -2.337e-02, 1.146e-01, 5.369e-03, -3.311e-01, -4.870e-02, 1.021e-02, -3.203e-02, 3.860e-02, 1.048e-01)); + r += mul(s1_4, M4(-2.147e-03, 1.028e-01, 1.122e-01, 6.106e-02, -9.106e-02, -8.087e-02, -3.363e-02, -2.920e-02, 1.440e-01, -7.080e-03, -1.764e-01, -6.676e-03, 3.276e-02, -4.324e-02, -1.823e-01, 3.185e-02)); + r += mul(s1_5, M4(-2.293e-01, 3.774e-01, 8.887e-02, 1.508e-01, -6.186e-02, 1.094e-03, 2.554e-02, -4.087e-02, 6.849e-02, -3.018e-02, -2.673e-01, 8.643e-02, -1.038e-01, 7.105e-02, -5.722e-02, -2.102e-02)); + r += mul(s1_6, M4(-1.462e-01, -1.410e-01, 3.153e-02, 6.715e-02, 4.593e-02, -1.019e-01, 3.027e-02, 7.161e-02, 8.211e-02, -8.201e-02, -3.403e-01, 2.494e-02, 8.837e-03, 1.183e-01, 7.194e-03, 2.351e-02)); + r += mul(s1_7, M4(-1.986e-01, -1.980e-01, -3.486e-01, -3.035e-02, -3.950e-02, -1.109e-02, 7.599e-02, -7.312e-02, 1.314e-01, -2.475e-01, -1.176e-01, 1.737e-02, -6.319e-02, -1.021e-01, 8.240e-02, 9.805e-02)); + r += mul(s1_8, M4(-1.356e-01, -2.959e-01, 4.435e-02, 1.862e-01, 1.078e-01, -4.282e-02, 6.068e-02, 7.163e-02, 1.571e-01, -9.376e-02, -1.499e-01, -1.781e-01, 7.484e-02, 9.013e-04, 1.280e-02, 8.788e-02)); + r += mul(s2_0, M4(4.763e-02, 1.396e-02, -9.853e-03, 4.906e-03, 6.963e-02, -1.599e-02, 4.233e-02, 1.564e-03, 3.934e-02, 1.577e-01, -1.629e-01, 3.703e-02, 1.013e-01, -3.866e-02, 4.691e-02, 6.083e-02)); + r += mul(s2_1, M4(7.211e-03, -7.470e-02, -1.213e-01, 1.652e-01, -4.090e-02, 9.714e-02, 1.208e-01, -4.375e-02, 8.932e-02, 7.984e-02, 9.466e-02, 9.899e-02, -6.070e-02, -1.282e-01, -8.630e-02, 9.200e-02)); + r += mul(s2_2, M4(-1.533e-02, 5.005e-02, 6.821e-02, 1.985e-02, 2.655e-03, -4.315e-02, -5.581e-02, -8.586e-03, 7.206e-02, -1.813e-01, -1.084e-01, -3.554e-02, -6.248e-03, -1.485e-01, 1.336e-03, -5.949e-02)); + r += mul(s2_3, M4(1.412e-01, -5.786e-03, 5.339e-02, -3.114e-02, 4.882e-02, 2.980e-02, -5.618e-02, 4.602e-02, -1.591e-02, -2.496e-02, 2.719e-03, 2.095e-01, -9.699e-02, 1.175e-01, -1.799e-01, 6.091e-03)); + r += mul(s2_4, M4(1.876e-01, 2.454e-03, -3.717e-01, -4.158e-02, -3.681e-02, -2.193e-03, 6.434e-02, 4.568e-02, 1.723e-01, 1.819e-01, -1.759e-01, 3.188e-02, 7.610e-02, 1.203e-02, 7.259e-02, -8.133e-02)); + r += mul(s2_5, M4(3.860e-02, 5.260e-02, -9.462e-02, 1.284e-01, -8.714e-04, 4.515e-02, 6.459e-02, 8.037e-02, -1.382e-01, 1.994e-01, 5.684e-02, 2.759e-02, 2.841e-02, -2.879e-02, 4.136e-02, -4.358e-02)); + r += mul(s2_6, M4(-2.640e-02, 3.660e-02, 4.060e-03, 1.007e-01, -1.999e-02, 9.608e-02, -1.717e-02, -5.437e-02, 9.631e-02, 9.981e-02, -5.108e-02, -1.873e-01, 1.695e-02, 1.345e-02, -6.706e-03, -3.257e-02)); + r += mul(s2_7, M4(-4.966e-03, 6.084e-03, 3.677e-03, 6.284e-02, -7.393e-02, 1.200e-02, -4.300e-02, 1.821e-01, 4.616e-03, 8.187e-02, -1.945e-02, -8.441e-02, -8.252e-02, -1.106e-01, -8.671e-02, 1.818e-02)); + r += mul(s2_8, M4(-1.126e-01, 1.018e-01, -6.764e-03, -4.124e-03, -7.969e-03, -3.700e-02, -5.286e-02, -4.189e-02, -4.189e-04, -1.389e-01, -7.266e-02, -1.118e-02, -5.688e-02, -1.906e-02, -1.242e-02, -9.303e-02)); + r += mul(s3_0, M4(4.366e-02, 1.495e-02, -2.451e-02, -7.179e-02, -1.431e-02, 9.672e-02, 2.586e-03, -4.483e-02, -2.061e-02, 8.246e-02, -1.682e-02, -1.404e-02, -2.087e-02, -2.342e-01, 1.283e-01, 2.121e-01)); + r += mul(s3_1, M4(1.261e-01, -6.772e-03, 8.997e-02, 1.745e-01, -4.420e-02, -4.272e-01, 8.984e-02, -2.352e-03, -4.986e-02, 4.329e-02, 8.367e-02, 1.726e-03, 3.509e-03, -4.385e-02, -4.679e-02, -1.314e-01)); + r += mul(s3_2, M4(-7.726e-02, 9.546e-02, 1.853e-01, 1.082e-01, 5.563e-02, -8.235e-02, 5.996e-03, -1.406e-01, -1.509e-01, 6.791e-02, -8.118e-03, 1.205e-02, 1.217e-01, -5.449e-02, 8.749e-02, 6.664e-02)); + r += mul(s3_3, M4(2.327e-01, 1.858e-01, 8.372e-02, -7.687e-03, 7.035e-02, 1.204e-01, 4.574e-02, 6.539e-02, -7.779e-03, 4.843e-02, -3.028e-02, 3.303e-03, -2.231e-01, 1.219e-02, -1.511e-01, 5.250e-03)); + r += mul(s3_4, M4(3.289e-01, 9.314e-02, -3.150e-01, 2.754e-02, -2.214e-01, -2.299e-01, 1.017e-01, -2.399e-03, 6.282e-02, -1.404e-01, -1.809e-02, -4.570e-02, -9.357e-02, -3.785e-03, -5.860e-02, -1.008e-01)); + r += mul(s3_5, M4(1.649e-01, 1.409e-02, -4.145e-02, 9.666e-02, 1.632e-02, 2.878e-02, -6.932e-02, -7.232e-02, -1.695e-02, 2.192e-02, 3.412e-02, -1.426e-02, -7.193e-02, 2.589e-02, -3.586e-03, 7.870e-02)); + r += mul(s3_6, M4(4.423e-03, -2.012e-01, -3.371e-02, 1.100e-01, 2.680e-02, -9.639e-02, 4.793e-03, -5.579e-02, 8.033e-02, -3.963e-02, 4.941e-02, -3.197e-02, 3.032e-02, -2.103e-01, 4.275e-02, 1.724e-02)); + r += mul(s3_7, M4(2.560e-01, 1.953e-02, 2.999e-02, -5.016e-02, 2.512e-02, -2.743e-02, -3.543e-02, 1.478e-01, 6.823e-02, 4.485e-02, 1.025e-01, 8.565e-04, -2.563e-02, 1.238e-01, -5.389e-02, -8.330e-02)); + r += mul(s3_8, M4(-4.668e-02, 5.297e-03, -6.290e-04, -1.017e-01, -3.426e-02, -3.534e-02, -3.185e-01, 9.515e-02, 2.685e-02, -6.823e-02, 2.208e-02, 3.880e-02, -1.261e-02, 2.131e-02, 7.511e-02, 5.117e-02)); + r += mul(s4_0, M4(-5.593e-02, 1.841e-01, -1.357e-01, -3.179e-02, 9.801e-02, 2.526e-01, 8.905e-02, 4.473e-02, -2.999e-02, 1.005e-02, 4.075e-02, -9.351e-02, 1.532e-01, -4.976e-02, 8.538e-02, 4.393e-02)); + r += mul(s4_1, M4(-2.394e-02, 6.815e-03, -4.025e-03, 4.245e-02, 6.386e-02, -2.665e-01, -2.094e-02, -1.530e-01, 4.269e-02, 9.683e-02, -5.149e-02, -1.819e-01, 1.264e-01, 8.284e-02, 5.323e-02, -2.883e-02)); + r += mul(s4_2, M4(-3.400e-02, -4.448e-02, 4.493e-02, -4.677e-03, 1.381e-02, 1.036e-02, -2.390e-03, -5.164e-02, -4.366e-02, 9.971e-02, -1.188e-01, -5.287e-02, -1.093e-01, 4.205e-02, 5.267e-02, -1.054e-01)); + r += mul(s4_3, M4(-9.811e-02, 3.292e-02, -9.772e-02, -4.517e-02, 2.219e-01, -1.472e-01, 2.143e-01, -4.187e-02, -1.091e-01, -2.002e-01, -6.219e-02, -6.699e-02, 2.276e-02, 1.721e-03, -1.623e-02, 7.721e-02)); + r += mul(s4_4, M4(-5.358e-03, 4.215e-02, -1.726e-01, -1.417e-03, 2.992e-01, 1.606e-01, -1.260e-01, -3.107e-01, -6.310e-02, -1.652e-01, 1.243e-01, -1.744e-01, 5.203e-02, -9.819e-02, -1.667e-01, -4.351e-03)); + r += mul(s4_5, M4(-7.169e-03, -1.085e-01, 8.616e-02, -1.154e-01, 7.473e-02, 1.240e-01, 1.243e-01, 7.347e-02, -1.291e-01, 1.030e-01, -3.936e-02, -5.861e-03, 3.724e-02, 1.333e-01, 5.432e-02, -1.142e-02)); + r += mul(s4_6, M4(-5.442e-02, 1.410e-01, -1.242e-02, -4.804e-02, -3.588e-01, 1.485e-01, 9.070e-02, 4.851e-02, 1.056e-01, -9.155e-02, 1.454e-01, 1.721e-01, -4.976e-02, -8.033e-02, 4.698e-02, -1.097e-01)); + r += mul(s4_7, M4(3.759e-02, -1.753e-02, 8.650e-02, 8.278e-02, 1.691e-01, 2.585e-02, -1.371e-01, -1.095e-02, 1.284e-01, -4.929e-02, 5.555e-02, 6.373e-02, -2.994e-02, -1.625e-01, -6.110e-02, 3.357e-02)); + r += mul(s4_8, M4(-3.512e-02, -3.154e-02, -5.124e-02, -1.319e-02, -1.003e-01, 4.170e-02, -6.858e-03, -8.647e-02, -2.632e-02, 8.094e-02, -4.234e-02, -1.669e-01, -9.504e-02, 6.567e-03, 7.794e-03, 2.035e-02)); + r += mul(s5_0, M4(-3.748e-02, -1.013e-01, -6.086e-02, 5.491e-02, 5.568e-02, -1.378e-02, -1.937e-02, 2.341e-02, -5.457e-02, -6.810e-02, 1.025e-01, 1.463e-02, -1.227e-02, 6.973e-02, 1.869e-03, 5.927e-02)); + r += mul(s5_1, M4(7.857e-02, -1.122e-01, -1.831e-01, -6.384e-03, -3.573e-02, -5.984e-03, -5.606e-02, 3.123e-02, -6.017e-02, -5.027e-02, 1.882e-03, 2.747e-02, 4.596e-02, -2.555e-02, -1.390e-02, 1.651e-02)); + r += mul(s5_2, M4(9.069e-02, -4.829e-03, 7.600e-04, 8.162e-02, -1.057e-01, 3.041e-02, 6.462e-02, -1.854e-02, 6.003e-02, -7.012e-02, 1.015e-01, 2.188e-02, 3.183e-02, 2.465e-01, 1.209e-01, 1.290e-01)); + r += mul(s5_3, M4(-8.769e-02, 1.002e-01, 4.918e-02, -1.161e-01, 5.834e-02, -9.595e-03, 1.756e-02, -3.156e-02, -4.703e-02, 4.591e-03, -2.730e-02, -7.831e-02, -6.395e-02, 1.042e-01, -3.743e-02, 1.027e-01)); + r += mul(s5_4, M4(-6.949e-02, 1.673e-01, -2.080e-02, 1.337e-01, 7.394e-03, -3.346e-02, 3.889e-02, -7.691e-03, -9.445e-03, 1.433e-01, -9.238e-02, 8.740e-02, -2.214e-01, -1.635e-02, -1.793e-01, 1.913e-02)); + r += mul(s5_5, M4(7.810e-02, -1.914e-02, 8.791e-02, -1.753e-01, -8.120e-02, -5.981e-02, 1.780e-03, -4.513e-03, 1.851e-02, 1.581e-01, -5.446e-02, 2.837e-02, 6.029e-02, 3.778e-02, 3.337e-03, -2.154e-02)); + r += mul(s5_6, M4(-8.270e-02, 1.873e-01, 1.084e-03, 1.234e-01, 3.482e-02, -6.640e-02, -4.890e-02, 2.690e-02, 8.170e-02, -5.379e-02, 7.963e-02, 7.949e-02, 1.166e-01, 4.965e-02, 7.708e-02, -1.939e-02)); + r += mul(s5_7, M4(1.636e-01, 8.186e-02, 2.299e-01, 2.185e-01, 5.592e-02, -6.926e-02, -5.674e-03, 1.554e-02, -1.033e-02, -9.731e-02, -1.354e-02, -4.377e-02, -1.938e-02, -5.919e-02, 2.697e-02, 8.038e-02)); + r += mul(s5_8, M4(1.209e-01, -4.855e-02, 1.896e-02, 1.332e-01, 1.123e-02, -1.378e-02, 2.569e-02, 6.872e-02, 1.880e-01, -1.099e-02, -8.497e-02, -9.332e-02, 6.168e-02, 7.522e-02, 8.633e-02, 4.627e-02)); + r += mul(s6_0, M4(7.403e-03, 8.145e-03, -6.153e-02, 9.502e-02, 2.571e-02, -8.655e-02, -1.008e-02, -7.699e-02, -4.673e-02, 2.114e-02, -1.793e-02, -5.573e-02, 2.973e-02, 2.998e-02, -8.564e-02, -9.803e-02)); + r += mul(s6_1, M4(-1.991e-02, 5.373e-03, -1.266e-02, -8.695e-02, 1.257e-02, 1.773e-03, 1.552e-02, 1.419e-01, -2.419e-02, 6.933e-02, 3.161e-02, 8.895e-02, -1.247e-01, -1.227e-01, 1.894e-02, -1.078e-01)); + r += mul(s6_2, M4(-2.697e-02, 1.469e-01, 3.410e-03, -2.511e-02, -1.563e-01, 1.888e-02, 2.321e-02, 2.802e-02, -9.077e-02, 5.797e-02, -7.054e-02, 4.460e-02, -3.892e-03, -6.443e-02, -5.339e-02, 5.030e-02)); + r += mul(s6_3, M4(-6.725e-02, -3.061e-01, -1.303e-02, -2.865e-02, 5.562e-02, -2.992e-02, 9.534e-03, 7.976e-02, 2.254e-03, 4.044e-03, -1.261e-01, -5.085e-02, 3.802e-03, 7.943e-02, -2.536e-02, -4.610e-02)); + r += mul(s6_4, M4(1.153e-01, -1.238e-01, -2.200e-02, 1.969e-01, -1.159e-01, 1.269e-01, -3.461e-03, 1.008e-01, -9.917e-02, -2.784e-02, 6.399e-02, 3.246e-02, 1.811e-01, 1.800e-02, 1.404e-01, -2.439e-02)); + r += mul(s6_5, M4(4.798e-02, 2.964e-04, 7.136e-02, 2.714e-02, 8.785e-03, -8.585e-02, -2.140e-02, -1.955e-02, 6.315e-03, -9.827e-02, -2.721e-02, 9.465e-02, 1.597e-02, -2.619e-02, 3.501e-02, 1.766e-01)); + r += mul(s6_6, M4(-1.248e-01, -1.063e-01, 5.070e-02, 1.713e-01, 1.704e-02, -1.348e-01, 8.956e-03, -2.971e-02, 5.520e-02, 2.207e-02, 1.438e-02, 4.820e-02, 3.794e-02, 2.495e-01, -2.920e-03, -1.143e-01)); + r += mul(s6_7, M4(2.949e-02, 9.060e-02, 1.596e-01, 1.733e-01, 7.490e-03, -3.273e-03, 7.705e-02, -3.255e-03, -7.283e-02, 8.520e-02, -1.070e-01, 3.652e-02, -4.259e-02, -1.457e-02, 5.281e-02, -1.463e-03)); + r += mul(s6_8, M4(4.699e-02, 1.872e-03, 3.358e-02, -6.305e-02, 3.424e-02, -4.051e-02, 3.316e-02, -5.758e-02, -3.096e-02, -4.929e-02, 3.446e-02, -4.537e-02, -6.640e-02, -3.523e-02, -3.002e-02, 5.090e-02)); + r += mul(s7_0, M4(-1.065e-01, -1.382e-01, -1.350e-02, -1.311e-01, -2.758e-02, -1.097e-01, -7.546e-02, 6.526e-02, 2.005e-02, 1.840e-01, 8.076e-02, 7.751e-02, -2.236e-02, -1.061e-02, -2.241e-03, -5.586e-02)); + r += mul(s7_1, M4(4.041e-02, -9.250e-02, 9.476e-02, -2.626e-02, -5.993e-02, 1.773e-01, 1.289e-01, -1.513e-01, 1.215e-01, -4.172e-02, 6.705e-02, 6.648e-02, -1.480e-02, 1.078e-01, -8.067e-02, -9.147e-02)); + r += mul(s7_2, M4(2.040e-02, -1.313e-01, 5.566e-02, -5.936e-02, -1.045e-01, -3.466e-02, 9.358e-02, 2.059e-01, 3.470e-02, 7.119e-02, 1.052e-01, -1.533e-02, -7.687e-02, -3.042e-02, -1.086e-01, -1.487e-03)); + r += mul(s7_3, M4(-6.280e-02, 2.873e-01, -6.877e-02, -4.537e-02, -7.636e-02, -8.482e-03, -6.059e-03, 3.606e-02, -1.828e-01, -8.439e-02, -1.074e-01, 6.662e-02, -6.266e-02, -1.555e-01, 5.204e-02, 8.863e-02)); + r += mul(s7_4, M4(-1.813e-02, 5.239e-02, -1.131e-01, -3.252e-01, -6.431e-03, 2.313e-01, -1.137e-01, 1.332e-02, -2.509e-01, -1.914e-01, -7.171e-02, 2.377e-02, -2.707e-02, 1.690e-01, 4.611e-02, -1.564e-01)); + r += mul(s7_5, M4(1.837e-02, -1.174e-01, 4.699e-02, -1.890e-01, -1.476e-02, -1.837e-01, 1.861e-02, -5.965e-02, -1.214e-01, 3.479e-01, 6.584e-02, -2.092e-02, -3.067e-02, 5.263e-02, 1.100e-01, -2.483e-02)); + r += mul(s7_6, M4(-1.633e-01, 7.453e-02, -1.681e-01, -1.765e-01, -7.604e-02, 1.990e-01, 8.703e-02, 1.021e-01, -3.659e-02, -1.725e-01, 8.921e-02, 1.931e-01, -6.031e-02, 1.625e-01, -1.275e-01, -2.902e-01)); + r += mul(s7_7, M4(1.879e-01, 1.781e-01, -5.226e-02, -7.865e-02, -6.034e-02, -7.494e-02, -1.315e-02, -1.429e-02, -1.961e-01, -9.814e-02, 6.942e-02, 9.747e-02, -1.413e-01, 7.411e-03, 1.198e-01, -2.484e-03)); + r += mul(s7_8, M4(-2.555e-02, -4.452e-02, -8.810e-02, -5.188e-02, 8.389e-02, 7.605e-02, 2.617e-02, 5.358e-02, -3.156e-02, 7.391e-02, 4.719e-03, -1.311e-01, -1.674e-01, -8.511e-02, 2.953e-03, 1.282e-02)); + r += V4(-7.951e-02, -3.356e-02, 3.358e-02, -6.548e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(7.160e-02, 3.959e-02, -2.049e-02, -2.823e-02, 2.106e-01, 1.167e-01, 6.484e-03, 1.342e-01, 3.913e-02, 6.992e-02, -7.948e-02, -4.343e-02, -4.116e-02, -1.167e-02, -5.642e-02, 4.596e-02)); + r += mul(s0_1, M4(-7.604e-02, -4.577e-02, 1.024e-02, -1.992e-02, -1.748e-02, 1.316e-01, -3.491e-02, -1.980e-01, -3.151e-02, -2.771e-02, -5.439e-02, 7.762e-03, -8.407e-02, -5.413e-02, -3.735e-02, 1.801e-01)); + r += mul(s0_2, M4(-6.569e-03, 6.431e-03, 8.401e-02, 7.388e-02, 7.487e-02, 8.721e-02, -1.593e-01, 1.559e-01, -5.005e-02, -2.007e-02, -7.181e-02, -2.917e-02, -7.560e-02, 9.458e-03, -2.257e-01, 7.142e-02)); + r += mul(s0_3, M4(2.403e-02, 1.892e-01, -6.111e-02, -1.264e-01, -1.122e-01, 1.511e-01, -1.540e-01, -5.612e-02, 4.382e-02, 1.491e-01, 7.364e-02, -3.691e-02, -8.071e-02, 1.279e-02, 2.362e-01, 2.400e-02)); + r += mul(s0_4, M4(1.469e-02, -6.055e-02, 2.503e-02, 5.858e-02, -4.516e-02, 1.825e-02, 1.345e-01, 8.446e-03, 8.555e-02, -2.525e-02, -4.071e-02, -5.975e-02, -1.230e-01, 2.685e-01, -2.223e-01, 1.558e-04)); + r += mul(s0_5, M4(-3.817e-02, 1.296e-02, 6.674e-02, 5.253e-02, -1.203e-01, 1.307e-01, -1.799e-02, -1.414e-01, -2.474e-02, 4.198e-02, -5.009e-02, -3.853e-02, 3.552e-02, -1.767e-01, 3.049e-01, 7.852e-02)); + r += mul(s0_6, M4(-1.278e-02, 5.320e-02, -7.747e-02, -9.217e-02, 2.075e-01, -2.841e-02, -2.475e-01, -1.104e-01, 2.502e-02, 1.639e-02, 2.095e-02, -7.334e-02, -1.649e-01, -1.055e-01, -7.679e-03, 1.140e-01)); + r += mul(s0_7, M4(2.875e-02, -1.929e-01, -3.281e-02, -4.322e-02, 8.614e-02, 4.138e-02, -1.009e-01, 1.346e-02, 1.151e-02, -1.695e-01, -1.484e-02, 2.440e-03, -4.026e-01, 1.997e-01, 1.299e-01, 8.595e-03)); + r += mul(s0_8, M4(-5.243e-02, -7.821e-02, 1.424e-02, 1.868e-02, 1.832e-02, 3.947e-02, -4.653e-02, 1.648e-01, 1.558e-02, -1.228e-01, -8.312e-04, 7.195e-02, -7.671e-02, 1.973e-01, -7.385e-03, 9.162e-02)); + r += mul(s1_0, M4(1.297e-01, 2.295e-01, 3.568e-01, -8.589e-02, 2.555e-02, -1.081e-01, -1.752e-02, 7.812e-02, 1.454e-01, -7.115e-02, 1.180e-01, 3.964e-02, -6.218e-02, -7.728e-02, -6.482e-02, 2.299e-02)); + r += mul(s1_1, M4(-2.977e-02, 7.989e-02, -8.135e-02, 9.227e-02, -5.965e-02, -5.951e-02, -5.747e-02, -1.525e-01, 6.371e-02, 2.378e-01, 1.418e-02, -3.680e-01, -5.501e-02, 1.312e-01, 3.657e-02, -1.418e-01)); + r += mul(s1_2, M4(-3.865e-02, -4.865e-02, -1.064e-01, 3.632e-02, 8.938e-03, 1.680e-02, -3.495e-02, 5.834e-02, -4.729e-02, 2.859e-01, 1.917e-01, -2.803e-01, -4.299e-02, 1.389e-02, -6.079e-02, 7.421e-02)); + r += mul(s1_3, M4(-7.702e-02, 5.567e-02, -4.026e-01, -2.588e-01, -1.012e-01, -1.251e-01, -3.978e-02, 4.585e-02, 1.430e-01, 2.736e-01, 1.218e-01, -2.842e-01, 6.805e-02, -5.404e-03, 1.303e-02, 1.760e-03)); + r += mul(s1_4, M4(7.792e-02, -2.391e-01, 1.851e-01, -4.511e-02, 4.014e-03, -9.545e-02, -3.032e-02, 7.058e-02, 2.689e-02, 1.940e-01, 1.164e-01, -6.738e-02, -5.119e-02, 8.169e-02, 3.390e-03, -6.561e-03)); + r += mul(s1_5, M4(-1.986e-01, -3.959e-01, -9.847e-02, 1.115e-01, -4.450e-03, 7.398e-02, -1.009e-01, 5.284e-04, 5.953e-03, 1.209e-01, -5.698e-02, -7.200e-02, -2.860e-02, -2.696e-02, 4.271e-02, -2.730e-02)); + r += mul(s1_6, M4(3.630e-02, 2.963e-01, -7.472e-02, 3.535e-01, 1.700e-01, 6.717e-02, 1.545e-03, -3.432e-02, -2.697e-02, -1.726e-02, 1.410e-01, -1.381e-01, -1.918e-03, -7.448e-02, -2.806e-02, 8.082e-02)); + r += mul(s1_7, M4(6.192e-02, -2.643e-02, 3.315e-01, 3.428e-02, 6.790e-02, 8.312e-02, 1.480e-01, -2.098e-02, -7.603e-02, 2.345e-01, 1.325e-01, -7.989e-02, 8.093e-02, -3.724e-02, -4.953e-03, -6.597e-03)); + r += mul(s1_8, M4(-1.254e-01, 1.435e-01, -1.409e-01, 1.166e-01, -1.503e-02, 7.107e-03, -3.024e-02, -4.282e-02, 2.150e-01, 1.797e-01, 1.386e-01, -8.273e-02, 5.003e-02, -2.082e-01, 6.736e-02, -1.456e-02)); + r += mul(s2_0, M4(5.291e-02, 4.493e-02, 1.754e-02, -1.373e-02, 6.290e-02, -4.889e-02, 1.684e-02, 2.581e-02, 1.057e-01, 6.659e-02, -1.783e-01, 2.118e-01, -3.852e-04, -9.217e-02, 6.860e-02, 1.082e-02)); + r += mul(s2_1, M4(-8.459e-02, 5.697e-02, 1.011e-02, 4.819e-02, -2.456e-02, -7.738e-02, -1.274e-01, -2.464e-02, 5.461e-02, 1.497e-01, 1.442e-01, 1.067e-01, 3.481e-02, -3.209e-02, -5.797e-02, 2.584e-03)); + r += mul(s2_2, M4(-7.790e-02, 5.481e-02, 1.761e-02, -1.736e-02, 3.235e-02, -3.660e-02, 1.891e-01, 4.832e-02, -7.073e-02, -1.419e-01, 3.421e-02, 7.474e-02, 6.528e-02, -1.652e-01, 1.266e-01, 5.471e-02)); + r += mul(s2_3, M4(1.544e-01, 3.630e-02, -7.690e-02, 1.275e-01, 2.818e-03, 3.921e-02, 7.246e-04, -3.142e-02, -1.288e-01, 1.560e-02, -9.013e-02, -1.366e-01, 1.448e-02, -1.190e-01, -1.364e-01, 6.835e-02)); + r += mul(s2_4, M4(-5.506e-02, -1.212e-01, -4.502e-02, 2.112e-02, 2.754e-02, 1.100e-01, 7.503e-02, 5.057e-02, 4.632e-02, -4.281e-02, 8.304e-02, -1.328e-02, 5.502e-02, -1.287e-01, 8.940e-02, 4.167e-02)); + r += mul(s2_5, M4(-6.920e-03, -5.461e-03, 8.102e-02, -1.422e-01, -6.519e-02, 5.124e-02, 7.573e-02, -5.185e-03, 7.927e-02, -6.519e-02, 1.565e-02, -7.461e-02, -1.193e-01, 1.284e-01, 1.002e-01, -2.378e-02)); + r += mul(s2_6, M4(1.436e-01, -6.016e-02, -8.235e-02, 3.355e-02, 7.730e-02, 1.633e-01, -9.259e-02, -4.074e-02, 9.246e-02, -1.497e-01, 6.639e-02, 3.161e-03, -3.123e-02, 8.584e-02, 1.533e-01, -4.745e-02)); + r += mul(s2_7, M4(-1.694e-01, 7.646e-02, 1.865e-01, 3.646e-02, 1.345e-02, -7.526e-02, -1.048e-01, -6.824e-02, 3.487e-01, -2.505e-01, 2.535e-01, -6.138e-02, 9.207e-02, -1.351e-01, 5.327e-02, -1.056e-02)); + r += mul(s2_8, M4(-5.303e-03, -2.066e-02, -1.876e-02, 7.126e-02, 2.470e-02, -8.398e-02, 1.877e-01, 3.652e-03, -1.164e-01, 1.180e-01, 3.439e-01, -2.599e-01, -5.798e-02, -6.116e-02, -7.023e-02, 1.361e-01)); + r += mul(s3_0, M4(4.821e-02, -1.252e-02, 1.707e-01, 9.750e-02, -2.193e-02, -1.987e-01, 3.003e-01, -7.428e-02, 3.308e-03, 9.133e-02, -2.076e-02, 3.022e-02, -1.687e-01, -1.213e-02, -9.804e-03, -2.377e-02)); + r += mul(s3_1, M4(3.894e-02, 4.871e-03, -9.353e-02, 1.207e-01, -1.865e-02, -2.623e-01, 5.401e-02, -1.799e-01, -6.579e-04, -1.822e-01, 6.615e-02, -1.295e-02, -1.341e-02, 1.533e-01, -1.098e-01, -5.478e-02)); + r += mul(s3_2, M4(-1.066e-01, -1.817e-01, -1.226e-01, -7.961e-02, 4.815e-02, 1.080e-02, -2.573e-01, -1.078e-01, -5.921e-02, 4.376e-02, -1.201e-01, -8.660e-02, 3.234e-03, -1.401e-01, 2.607e-02, -2.978e-02)); + r += mul(s3_3, M4(8.189e-02, -2.529e-02, -4.234e-02, -8.320e-02, -1.539e-01, -2.681e-01, 1.780e-02, 1.052e-01, -1.338e-01, 5.823e-02, 8.980e-04, 3.932e-02, 2.042e-02, -9.173e-02, -1.073e-01, -8.834e-02)); + r += mul(s3_4, M4(-1.762e-01, -1.966e-01, 3.809e-02, 3.330e-03, -7.671e-03, -8.328e-02, 7.621e-02, -1.166e-01, 3.125e-02, -8.722e-03, 3.297e-02, 5.293e-02, 4.815e-02, -3.511e-02, -1.132e-02, 7.434e-02)); + r += mul(s3_5, M4(-2.362e-03, -1.247e-01, 1.143e-02, -2.894e-01, -2.176e-02, -2.243e-01, -3.407e-01, 2.051e-02, 7.622e-02, -5.383e-02, 3.306e-02, 9.119e-02, 2.668e-03, 1.585e-01, 4.227e-02, -2.221e-01)); + r += mul(s3_6, M4(8.474e-02, -4.372e-02, -3.331e-02, -9.092e-02, 3.010e-02, 1.381e-01, 6.560e-02, 8.138e-02, -9.129e-02, -1.421e-01, 4.482e-02, 1.409e-02, -1.773e-01, 1.386e-01, -3.668e-02, -9.246e-02)); + r += mul(s3_7, M4(7.007e-03, 9.400e-02, -4.633e-02, -3.270e-02, -6.299e-02, 1.121e-01, 2.946e-02, -1.837e-01, -4.667e-02, 6.520e-02, 6.433e-02, -1.248e-01, -4.962e-02, -4.701e-02, 7.428e-02, -1.523e-02)); + r += mul(s3_8, M4(5.168e-02, 5.371e-02, 1.820e-02, -1.103e-02, 5.489e-02, -1.232e-01, -1.559e-01, -8.951e-02, -5.942e-02, 9.437e-02, -4.487e-02, -1.499e-02, -3.793e-02, 1.250e-01, -9.444e-03, 1.480e-01)); + r += mul(s4_0, M4(6.804e-02, 8.773e-02, -5.802e-02, 1.059e-02, -3.449e-01, 5.425e-02, 4.849e-01, -3.830e-01, -2.164e-02, -4.346e-02, 4.221e-02, 8.661e-02, 7.835e-02, -1.134e-01, -5.008e-02, 1.706e-02)); + r += mul(s4_1, M4(-2.090e-02, -7.901e-03, 1.995e-02, 4.231e-02, 1.159e-01, -7.329e-02, -3.859e-01, 2.212e-01, 9.159e-02, 1.050e-01, 1.248e-01, 3.892e-03, -8.009e-02, -7.096e-02, -4.613e-05, -1.465e-01)); + r += mul(s4_2, M4(-4.982e-02, 1.360e-02, 9.749e-02, -2.727e-02, -8.277e-02, -1.126e-01, 3.561e-01, -2.532e-01, -6.652e-02, 1.013e-02, 4.226e-03, 1.232e-01, -9.655e-04, -6.240e-02, 4.407e-02, -4.948e-02)); + r += mul(s4_3, M4(-1.101e-01, -8.031e-02, 2.043e-02, 3.359e-02, 4.259e-02, 3.845e-01, -1.398e-01, 8.792e-02, -1.081e-01, 1.561e-01, 1.212e-01, -1.222e-01, -9.512e-02, 3.958e-02, -4.668e-02, 2.330e-02)); + r += mul(s4_4, M4(3.035e-02, 3.979e-02, -1.812e-01, 3.794e-02, -2.506e-01, -1.023e-01, 8.688e-02, -1.077e-01, -1.579e-02, -1.744e-02, 2.586e-01, -9.867e-02, 1.761e-01, 7.282e-02, -4.643e-02, 3.582e-02)); + r += mul(s4_5, M4(-3.809e-02, 1.975e-02, 9.684e-02, -1.193e-01, 1.942e-02, -2.486e-02, -1.234e-01, 1.601e-01, -3.902e-02, -1.317e-01, 1.647e-02, -4.586e-02, -9.337e-02, 4.688e-02, 8.111e-03, 2.885e-02)); + r += mul(s4_6, M4(-3.425e-02, -7.043e-02, -1.114e-03, 1.759e-02, 7.011e-02, -9.291e-02, -8.938e-02, 6.154e-02, -2.224e-01, 1.780e-02, -1.546e-01, -9.397e-02, -5.968e-02, 2.124e-01, -8.652e-03, 2.938e-02)); + r += mul(s4_7, M4(5.105e-02, 3.220e-02, -2.474e-02, -1.252e-01, 3.565e-01, -1.327e-01, -2.616e-01, -1.379e-01, -3.732e-02, 4.103e-01, -1.346e-02, 1.390e-01, 4.298e-02, 7.243e-02, -8.570e-02, -4.937e-02)); + r += mul(s4_8, M4(-6.515e-02, -4.400e-02, 6.307e-02, -1.577e-02, -7.272e-02, -2.053e-03, 3.536e-01, 1.460e-01, -1.441e-01, 3.251e-02, 9.263e-02, 1.137e-01, -1.025e-02, 1.260e-01, 5.062e-02, 1.262e-01)); + r += mul(s5_0, M4(-1.054e-01, -1.766e-02, -6.873e-02, 2.811e-02, 2.432e-02, -1.167e-02, 8.383e-02, 9.496e-03, 1.142e-02, 2.015e-03, -1.043e-01, -1.470e-02, 2.569e-02, 3.491e-02, -1.137e-01, 1.066e-01)); + r += mul(s5_1, M4(-8.272e-02, -6.838e-02, -6.908e-02, 5.116e-02, 1.982e-02, 6.091e-02, -1.664e-02, 2.784e-02, -8.694e-02, -8.021e-02, 4.080e-03, -7.106e-02, -1.658e-01, -1.153e-01, -6.929e-03, 9.907e-02)); + r += mul(s5_2, M4(3.934e-02, -1.248e-02, 5.516e-02, 2.029e-01, -8.927e-02, -2.967e-02, 2.056e-03, -2.477e-02, -2.253e-02, 6.042e-02, -6.240e-02, -1.136e-01, -3.062e-02, -1.227e-01, -1.619e-01, 5.870e-02)); + r += mul(s5_3, M4(-2.155e-01, -7.198e-02, -2.248e-02, -1.516e-01, -7.367e-02, 9.895e-02, -2.285e-02, -9.157e-03, 1.455e-01, -1.178e-02, -9.028e-04, 9.636e-02, 5.316e-02, -2.890e-02, -8.603e-02, -8.960e-02)); + r += mul(s5_4, M4(-2.287e-01, -3.977e-03, -1.611e-01, 9.548e-03, -2.645e-02, 8.231e-02, 2.449e-02, -2.528e-02, -8.187e-02, -3.543e-02, 1.589e-01, -2.174e-02, 1.426e-01, 1.432e-01, 4.631e-02, -1.521e-01)); + r += mul(s5_5, M4(1.351e-01, 1.218e-02, 1.307e-01, -2.186e-01, -1.043e-02, 9.595e-03, 6.245e-02, -3.976e-02, 7.011e-02, -1.027e-01, -2.723e-02, 4.857e-02, 2.734e-03, 1.324e-02, -6.431e-02, -4.493e-02)); + r += mul(s5_6, M4(-1.890e-01, -2.725e-01, 2.737e-02, 5.769e-02, 1.379e-02, -3.245e-02, -1.497e-02, -2.878e-02, -1.284e-01, -4.622e-02, -2.642e-02, -1.175e-01, -1.880e-01, 1.827e-01, -1.657e-01, 7.942e-02)); + r += mul(s5_7, M4(-9.118e-02, 2.842e-01, -2.173e-01, 7.063e-02, 7.538e-02, 5.894e-02, -1.075e-01, 4.333e-02, -7.566e-02, -1.142e-01, 4.044e-02, 5.305e-02, 1.803e-01, 2.164e-01, -2.646e-03, -2.533e-01)); + r += mul(s5_8, M4(-7.487e-02, 7.629e-02, 1.009e-01, 4.286e-02, -1.482e-02, 3.950e-02, 1.299e-01, 6.164e-02, -5.029e-02, 1.053e-01, -9.843e-02, -1.814e-01, 9.668e-03, 1.559e-01, -9.271e-02, 1.445e-01)); + r += mul(s6_0, M4(1.327e-01, 7.610e-03, 1.497e-02, -3.495e-02, 4.965e-02, -9.208e-02, -2.610e-02, -3.640e-02, -4.734e-02, -1.242e-02, 2.328e-02, 4.842e-02, 9.447e-03, 3.225e-02, -1.092e-02, -5.538e-02)); + r += mul(s6_1, M4(-5.815e-02, 7.916e-02, 8.095e-02, -3.877e-02, 1.158e-02, 1.736e-02, -2.863e-02, 7.492e-03, 3.507e-02, -2.058e-02, 7.275e-02, -3.094e-02, -1.646e-01, 5.667e-02, 1.120e-01, -9.152e-02)); + r += mul(s6_2, M4(2.724e-02, 4.259e-02, -1.252e-01, 1.258e-01, -4.152e-02, 1.946e-01, 9.554e-02, -4.344e-02, -1.059e-01, -8.092e-03, -1.775e-01, 8.023e-02, 6.634e-04, 6.857e-02, 6.193e-02, 8.829e-02)); + r += mul(s6_3, M4(-1.191e-01, 5.508e-03, -1.777e-02, -8.923e-03, 1.028e-01, -6.273e-02, -1.418e-02, -1.355e-02, 1.433e-02, -5.940e-02, 2.669e-02, -7.870e-02, -3.210e-02, 1.420e-01, 3.797e-02, -2.151e-03)); + r += mul(s6_4, M4(-2.208e-03, -1.520e-02, 1.149e-02, -1.017e-01, -1.460e-02, 6.870e-02, 1.064e-01, -1.518e-02, 6.100e-02, 2.155e-02, -2.400e-02, -1.117e-01, 2.676e-02, 5.622e-02, 1.851e-02, -1.208e-01)); + r += mul(s6_5, M4(-7.373e-02, 7.088e-02, 1.293e-01, -8.229e-02, -3.831e-02, -4.459e-02, 1.489e-01, 1.245e-01, 1.331e-01, -3.549e-03, 7.487e-02, -1.423e-01, -3.767e-02, -8.141e-03, 6.819e-02, 2.041e-02)); + r += mul(s6_6, M4(8.549e-02, -1.567e-02, -3.462e-02, 9.251e-02, 5.768e-02, 1.247e-02, -2.593e-02, -1.606e-01, 1.465e-02, 6.304e-02, 1.089e-03, 7.985e-02, 4.038e-02, 2.702e-02, -1.045e-01, 1.279e-01)); + r += mul(s6_7, M4(1.256e-01, -3.685e-02, 1.034e-01, 8.036e-02, -4.022e-02, 5.942e-02, 7.540e-02, 4.034e-02, 9.507e-02, -7.610e-02, -2.457e-02, -1.315e-02, 6.766e-02, 3.344e-02, -1.278e-02, -5.312e-02)); + r += mul(s6_8, M4(-2.078e-02, -2.733e-02, -4.110e-02, -4.928e-02, 2.954e-02, -4.459e-02, -5.428e-02, 1.163e-01, -2.850e-02, 7.975e-02, -7.055e-03, 1.909e-01, 1.503e-02, -8.133e-02, 9.719e-02, -4.011e-02)); + r += mul(s7_0, M4(5.745e-02, 8.149e-03, 5.020e-03, 8.201e-02, -3.625e-02, 1.546e-01, -2.182e-01, -1.970e-02, -7.573e-02, 1.147e-01, -9.093e-02, -1.201e-01, -4.465e-02, 1.782e-01, -3.328e-02, 6.867e-02)); + r += mul(s7_1, M4(1.278e-01, -1.294e-01, 1.435e-01, 8.846e-02, 1.641e-02, 7.880e-02, -4.295e-04, 7.376e-03, -5.733e-02, -1.213e-01, 1.887e-01, 2.530e-01, 5.237e-02, 9.056e-02, -1.693e-01, 1.058e-02)); + r += mul(s7_2, M4(1.051e-01, -1.403e-01, -2.152e-02, 1.688e-01, -8.572e-02, 2.348e-01, 6.435e-02, -4.347e-02, -1.404e-01, 1.339e-02, -5.361e-01, 5.124e-02, -9.061e-02, -9.480e-02, 1.026e-01, -2.842e-02)); + r += mul(s7_3, M4(5.644e-02, 4.302e-02, 1.369e-01, 1.658e-02, 7.890e-02, -1.081e-01, 2.689e-02, 2.602e-01, -1.591e-01, 2.769e-02, -3.205e-01, -4.162e-02, -5.145e-02, -3.690e-02, 1.473e-01, 4.620e-02)); + r += mul(s7_4, M4(2.471e-02, 6.913e-02, -1.824e-01, 5.231e-02, 2.212e-01, -7.666e-04, -6.935e-02, -1.392e-01, 2.019e-01, 1.681e-01, 5.797e-02, 1.882e-02, 2.256e-01, -2.192e-01, -2.268e-01, 2.986e-02)); + r += mul(s7_5, M4(-2.425e-02, 1.309e-02, -5.389e-03, 4.018e-02, -5.452e-02, 7.173e-02, 1.344e-01, -1.893e-02, 1.381e-01, -2.273e-01, -2.084e-01, -1.921e-01, 2.261e-02, -2.222e-01, -8.991e-02, -4.589e-02)); + r += mul(s7_6, M4(-2.215e-02, 5.801e-02, -1.661e-01, 6.336e-02, 3.630e-02, -1.046e-01, -5.412e-02, -4.792e-02, -8.750e-02, 9.563e-02, -2.661e-01, 8.485e-02, 1.802e-01, -6.451e-02, -1.092e-01, 1.444e-01)); + r += mul(s7_7, M4(-1.363e-02, 6.278e-02, -2.490e-01, 2.426e-02, -1.696e-02, 1.782e-02, 2.960e-02, 2.952e-02, 2.385e-02, 3.648e-02, 2.804e-02, -1.231e-01, 9.748e-02, -9.324e-02, 1.107e-02, 1.279e-01)); + r += mul(s7_8, M4(5.383e-02, -6.809e-02, -5.792e-02, -2.678e-02, 6.552e-03, 5.476e-02, -4.569e-01, -1.632e-02, -3.676e-02, 1.094e-01, -1.814e-01, 3.622e-02, -1.013e-01, -2.603e-02, 1.067e-01, -1.683e-01)); + r += V4(6.961e-03, -1.709e-02, -1.395e-02, 4.358e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.310e-02, -1.047e-02, 4.446e-03, -1.065e-01, 7.155e-02, 2.532e-01, 6.451e-02, 8.162e-02, 5.626e-03, -1.614e-01, 5.181e-02, -4.982e-02, -1.051e-01, -7.541e-02, 9.694e-02, 2.524e-02)); + r += mul(s0_1, M4(-7.002e-02, -4.159e-02, -9.201e-02, -1.644e-01, -7.874e-02, 9.247e-02, -1.248e-01, -1.276e-01, -6.662e-02, 4.484e-02, -1.165e-01, -2.996e-02, 8.438e-02, 7.111e-02, 1.909e-01, -8.361e-02)); + r += mul(s0_2, M4(-2.634e-02, 9.918e-02, -2.247e-02, 2.263e-02, 5.951e-02, -1.362e-03, -8.698e-02, 8.262e-02, 5.791e-03, -8.029e-03, -1.377e-01, -4.139e-02, 2.222e-01, 8.128e-02, -3.793e-01, 4.095e-01)); + r += mul(s0_3, M4(2.156e-02, 3.676e-02, 8.860e-02, 1.185e-01, 1.836e-01, -3.042e-01, -1.642e-02, 3.151e-01, -4.662e-02, -7.437e-02, -8.175e-02, -3.873e-02, 6.315e-02, -1.889e-01, -1.107e-01, -1.695e-01)); + r += mul(s0_4, M4(8.632e-02, -9.448e-02, 1.125e-01, 1.151e-02, 3.563e-02, 1.770e-01, -4.597e-02, -4.560e-02, 1.678e-02, -1.753e-01, 2.854e-02, -1.035e-02, -8.369e-03, 1.948e-01, 1.128e-01, -1.325e-01)); + r += mul(s0_5, M4(7.694e-04, -3.053e-02, 5.700e-02, -8.030e-02, -7.811e-03, -7.819e-02, -7.453e-02, 1.973e-01, 4.691e-02, 1.434e-01, -3.162e-02, -7.189e-02, -6.287e-02, -5.170e-01, -8.335e-02, 9.343e-02)); + r += mul(s0_6, M4(3.037e-03, -3.342e-02, -5.670e-02, -2.919e-02, 2.281e-01, 4.177e-03, 4.988e-02, 4.675e-02, 3.656e-02, 1.294e-01, 4.818e-02, 4.157e-02, -3.660e-02, -5.739e-02, -2.379e-01, -8.158e-02)); + r += mul(s0_7, M4(-1.601e-02, 6.322e-02, -7.389e-02, 2.617e-02, -1.247e-01, -3.870e-03, 9.629e-02, 2.878e-02, -7.404e-02, 7.158e-02, -1.819e-02, 8.241e-02, -2.284e-02, -1.365e-01, -1.954e-02, -1.304e-01)); + r += mul(s0_8, M4(-4.040e-02, -2.005e-02, -3.412e-02, 1.068e-01, -2.835e-02, -9.637e-02, -6.092e-02, -1.578e-01, -1.812e-02, -4.020e-02, -3.961e-02, -8.512e-02, 6.531e-02, 1.034e-02, 7.084e-02, 1.837e-02)); + r += mul(s1_0, M4(-7.400e-02, -2.048e-01, 4.059e-01, -2.345e-01, -6.631e-02, 2.262e-02, 1.462e-01, 1.211e-01, 2.671e-01, 2.154e-01, 5.736e-02, -7.656e-02, -3.599e-02, -1.662e-02, -3.786e-02, 1.181e-02)); + r += mul(s1_1, M4(-1.018e-01, -2.402e-01, 9.216e-02, -2.519e-02, 5.146e-02, -1.889e-02, -6.131e-02, -1.159e-01, 2.021e-01, 2.082e-01, -2.572e-01, 7.096e-03, 9.477e-02, -9.545e-02, -5.969e-02, -8.974e-02)); + r += mul(s1_2, M4(5.776e-02, -3.268e-02, 2.991e-01, 3.318e-01, 7.870e-02, 5.555e-02, 2.808e-02, -1.384e-02, 1.006e-02, 9.396e-02, -1.541e-01, 1.746e-01, 1.021e-01, 8.361e-02, -8.241e-02, -3.679e-02)); + r += mul(s1_3, M4(4.243e-01, 2.424e-01, 2.313e-01, -1.975e-01, -4.512e-02, -1.958e-01, 8.668e-02, 1.917e-01, 1.927e-02, 1.191e-02, -1.364e-02, -1.923e-01, 2.884e-02, 1.516e-02, -5.113e-02, -9.241e-02)); + r += mul(s1_4, M4(5.372e-01, 1.449e-01, 2.751e-02, 2.169e-01, -6.685e-02, 2.181e-01, -6.593e-02, -9.706e-02, 1.719e-01, 2.444e-02, 1.299e-01, -4.927e-02, 4.953e-02, -1.792e-02, 8.077e-02, 1.125e-01)); + r += mul(s1_5, M4(1.730e-03, -4.827e-01, 2.841e-01, -1.782e-01, -4.474e-02, 5.705e-02, -6.480e-02, 1.102e-02, 2.095e-01, -2.511e-02, 4.695e-02, -4.598e-02, -1.538e-01, 7.887e-02, 3.751e-02, -1.082e-01)); + r += mul(s1_6, M4(-1.657e-01, -1.450e-01, -2.219e-02, 1.530e-01, 1.049e-01, 3.284e-02, -1.333e-01, 1.505e-02, 9.291e-02, 1.182e-01, 6.159e-02, 3.700e-02, -3.568e-04, -2.911e-02, 1.279e-02, -5.009e-02)); + r += mul(s1_7, M4(-2.588e-01, -4.585e-01, -1.425e-02, -5.658e-03, -3.324e-02, -2.476e-01, 8.082e-02, 5.820e-02, 1.353e-01, 1.031e-01, -7.908e-02, 1.681e-01, 3.531e-02, 4.241e-02, -8.830e-02, -1.067e-02)); + r += mul(s1_8, M4(1.759e-01, 1.195e-01, -5.958e-02, -1.166e-01, 1.525e-02, 2.479e-02, 4.186e-02, -3.045e-02, 3.070e-01, 3.415e-01, -1.288e-01, 4.186e-02, -6.287e-02, -5.575e-02, 1.085e-02, -2.990e-02)); + r += mul(s2_0, M4(-3.779e-02, 8.502e-02, 3.670e-02, -5.649e-02, 2.161e-02, 4.964e-02, 7.737e-02, 3.889e-02, 2.972e-02, -2.336e-02, 2.660e-02, 4.314e-02, 2.544e-02, -1.368e-01, -8.023e-02, 2.406e-02)); + r += mul(s2_1, M4(2.148e-03, 2.318e-01, -1.050e-01, 2.773e-02, 7.478e-02, -8.558e-02, -4.437e-02, -2.647e-02, -3.378e-02, 7.765e-03, 1.097e-01, -6.929e-02, 6.677e-02, -6.817e-02, -1.492e-02, 1.052e-01)); + r += mul(s2_2, M4(-1.446e-01, 5.619e-02, 7.113e-02, -1.084e-03, 4.138e-02, 8.402e-02, -1.147e-02, 2.059e-02, 3.658e-02, 1.095e-01, 1.113e-02, 2.767e-02, -1.791e-01, -2.241e-02, 8.983e-02, -2.046e-02)); + r += mul(s2_3, M4(-3.334e-02, -8.473e-02, -1.845e-01, -2.242e-02, -4.167e-03, 1.196e-01, -1.121e-01, -1.514e-02, 4.081e-02, 2.293e-01, -2.307e-01, 4.014e-01, 1.353e-01, 1.334e-01, -1.107e-01, 6.926e-02)); + r += mul(s2_4, M4(9.510e-02, -1.627e-02, 1.553e-01, -8.809e-02, -1.072e-01, 9.681e-02, -3.757e-02, -1.325e-01, -5.078e-02, 2.735e-01, 1.794e-01, 3.084e-01, 7.204e-02, -8.301e-02, -6.917e-02, -1.438e-02)); + r += mul(s2_5, M4(-4.562e-02, 1.039e-01, 1.454e-01, -2.231e-01, 1.059e-02, 5.584e-02, -1.292e-01, -1.031e-02, -1.368e-01, -1.102e-01, 1.740e-01, 7.728e-02, -1.854e-01, 4.047e-02, -2.346e-02, -1.281e-01)); + r += mul(s2_6, M4(9.606e-02, -4.460e-02, 1.095e-01, -2.996e-02, -2.107e-02, -2.014e-02, 2.350e-01, -7.004e-02, -8.360e-02, 8.714e-02, -1.265e-01, -2.492e-02, -4.162e-02, -1.169e-02, 4.289e-02, -1.474e-01)); + r += mul(s2_7, M4(1.057e-01, -2.529e-01, 4.756e-02, -1.517e-03, -3.801e-02, -8.089e-02, 2.027e-02, 1.219e-01, -1.119e-01, 6.527e-02, 3.577e-02, -1.732e-01, 1.022e-01, -4.923e-02, -1.197e-02, 8.317e-03)); + r += mul(s2_8, M4(1.844e-02, 4.025e-02, -1.694e-01, 1.480e-01, 3.920e-02, -8.898e-02, 3.544e-02, 1.283e-01, 1.745e-01, -1.828e-01, -8.220e-02, -3.703e-02, -9.434e-02, 8.537e-02, -2.254e-03, -2.373e-01)); + r += mul(s3_0, M4(-4.414e-02, 1.265e-01, 1.132e-01, 8.795e-02, 1.179e-03, -9.083e-02, 4.055e-02, 1.348e-01, -2.953e-02, 3.096e-02, -1.500e-02, -2.655e-02, 1.048e-01, -1.834e-01, 2.040e-02, 1.876e-02)); + r += mul(s3_1, M4(-5.469e-02, 3.613e-01, 1.175e-01, 1.847e-02, 2.442e-01, -7.088e-02, -2.256e-01, 4.352e-02, -6.138e-02, -1.043e-01, 1.027e-02, 6.533e-02, 2.383e-01, 1.044e-04, 1.277e-02, -9.124e-02)); + r += mul(s3_2, M4(-1.481e-01, -2.980e-01, 3.422e-02, -7.770e-02, 1.320e-01, 3.090e-01, -6.432e-02, -1.516e-01, 8.392e-02, -1.138e-01, 8.590e-02, -1.004e-01, 1.834e-02, 4.498e-02, -7.585e-02, 8.780e-02)); + r += mul(s3_3, M4(1.041e-01, 3.988e-03, -2.560e-01, 1.170e-01, -7.932e-02, 7.428e-03, -1.451e-01, 1.531e-01, -4.874e-02, -7.994e-04, -4.455e-02, 3.958e-03, 3.660e-03, 1.042e-01, -9.601e-02, -1.359e-02)); + r += mul(s3_4, M4(-1.098e-02, -2.595e-01, 6.339e-02, 4.795e-02, -4.507e-02, 2.261e-01, -2.230e-01, 5.826e-02, 8.636e-04, 1.241e-01, 1.149e-01, -6.624e-02, -5.745e-02, 7.117e-02, 2.267e-02, -2.648e-01)); + r += mul(s3_5, M4(9.860e-02, 2.700e-01, 1.025e-01, -3.150e-01, -1.326e-01, 1.185e-01, -4.155e-02, -1.511e-02, -4.764e-02, -6.836e-02, -2.254e-02, 7.437e-02, 2.222e-02, -7.851e-02, 1.590e-02, 1.601e-01)); + r += mul(s3_6, M4(1.393e-01, 8.621e-03, -2.183e-02, -2.017e-02, 1.148e-01, -1.643e-02, 1.285e-01, -2.263e-02, -1.718e-02, 7.851e-02, 1.532e-02, 3.675e-02, 4.451e-02, 2.104e-01, -1.268e-01, 5.241e-02)); + r += mul(s3_7, M4(-2.816e-02, -1.125e-01, 1.205e-01, -3.454e-03, 4.550e-02, -5.359e-03, 6.028e-02, 1.263e-01, 5.069e-02, -3.246e-02, 8.699e-02, -1.295e-01, 2.440e-02, 7.617e-02, -2.258e-02, 1.113e-01)); + r += mul(s3_8, M4(-1.014e-01, 2.407e-02, -9.447e-02, 5.400e-02, 2.735e-02, -1.241e-01, -6.878e-02, 4.014e-01, 6.577e-02, -3.158e-02, 7.358e-02, 1.269e-02, 5.336e-02, -4.621e-02, 4.812e-02, -4.715e-02)); + r += mul(s4_0, M4(-1.007e-02, -7.450e-02, -5.531e-02, -4.589e-02, -1.065e-01, 1.119e-01, 9.279e-02, 2.339e-01, -4.817e-02, 1.139e-01, -3.075e-02, 5.132e-02, -1.425e-01, -6.251e-02, -3.181e-02, -6.615e-02)); + r += mul(s4_1, M4(-1.282e-01, -6.415e-02, 3.188e-02, -6.217e-02, -5.337e-02, -1.369e-01, -3.857e-01, -2.054e-03, -1.344e-01, -9.127e-02, 1.287e-01, -1.084e-01, -8.630e-02, 9.341e-02, -3.698e-02, 5.253e-02)); + r += mul(s4_2, M4(-2.931e-02, -5.825e-02, -1.005e-01, 1.509e-03, -1.696e-01, 2.150e-01, -3.179e-02, -4.437e-02, 1.685e-02, 2.196e-01, -1.043e-01, 1.195e-01, -6.515e-02, -1.503e-01, 5.465e-02, -8.743e-03)); + r += mul(s4_3, M4(2.526e-02, -7.600e-02, 1.376e-01, -1.703e-03, -3.037e-01, 4.758e-01, 1.650e-01, 1.253e-01, -5.532e-02, -1.535e-01, 1.407e-01, -2.290e-01, -3.009e-02, -8.620e-02, 1.428e-02, -1.516e-02)); + r += mul(s4_4, M4(9.797e-02, 8.982e-02, -1.111e-01, -1.288e-01, 9.594e-02, 2.375e-01, -7.565e-02, -1.898e-01, -7.647e-02, -4.225e-02, 2.417e-02, -9.900e-02, -3.667e-03, 2.601e-02, -1.526e-02, 6.440e-02)); + r += mul(s4_5, M4(4.779e-02, 1.333e-01, 5.828e-02, -2.145e-01, -3.738e-02, 2.204e-04, 8.182e-02, 3.155e-02, -7.055e-02, -7.680e-02, -6.878e-02, 1.135e-01, 6.465e-02, -2.374e-02, -3.712e-02, -3.930e-02)); + r += mul(s4_6, M4(-2.554e-02, 8.685e-03, 1.068e-01, -9.349e-02, -6.696e-02, 3.094e-02, 5.133e-01, 6.619e-02, 1.682e-01, -8.019e-02, -4.694e-02, 4.223e-02, 2.999e-02, 1.180e-01, 1.023e-01, 3.307e-02)); + r += mul(s4_7, M4(-6.283e-02, -2.698e-02, 1.240e-01, -1.550e-02, -3.459e-02, 1.235e-01, -2.164e-01, -8.066e-02, 2.666e-01, 1.082e-01, -2.279e-01, 1.298e-01, -7.096e-02, 1.398e-02, -1.126e-01, 1.729e-02)); + r += mul(s4_8, M4(-3.592e-02, -2.696e-02, -1.240e-01, 1.922e-01, 5.469e-02, 1.087e-02, 6.027e-02, 1.188e-01, 5.205e-02, -3.912e-02, 5.425e-02, 1.714e-02, 1.377e-01, -4.612e-03, 1.018e-01, 2.339e-02)); + r += mul(s5_0, M4(3.069e-01, 1.913e-02, -1.458e-01, 1.175e-02, -9.135e-03, 2.076e-02, 2.156e-02, 1.665e-01, 4.221e-02, -7.960e-03, 2.793e-02, 6.755e-03, -1.430e-02, -7.548e-02, 4.690e-02, 6.693e-02)); + r += mul(s5_1, M4(-1.602e-01, -1.685e-01, 1.020e-01, 7.694e-02, 1.949e-02, -1.012e-01, -1.111e-01, -1.018e-01, -3.667e-02, 4.584e-02, 6.979e-02, -8.594e-02, -1.077e-01, -7.754e-02, -1.661e-02, 5.381e-02)); + r += mul(s5_2, M4(-2.663e-02, 6.025e-02, 5.513e-02, 1.875e-01, -1.171e-02, -5.782e-02, 5.743e-02, 9.628e-03, -1.430e-01, -1.016e-02, 1.429e-02, -6.060e-03, -1.659e-07, -1.252e-01, 3.171e-02, -1.015e-01)); + r += mul(s5_3, M4(1.024e-01, 2.345e-02, -1.129e-03, 2.063e-01, -3.990e-02, 7.542e-02, -5.214e-02, 1.172e-02, -9.788e-02, 7.562e-02, 1.072e-01, -6.372e-02, 7.381e-02, -4.092e-02, 6.908e-02, -2.314e-01)); + r += mul(s5_4, M4(4.877e-02, 6.891e-02, 8.940e-02, -4.991e-02, 7.390e-02, 1.307e-01, -1.390e-02, -3.120e-02, -5.540e-02, 1.779e-01, 1.120e-02, -8.706e-02, 1.261e-01, -1.925e-01, -1.373e-02, -6.053e-02)); + r += mul(s5_5, M4(-2.393e-02, 7.691e-02, 2.439e-01, 7.700e-02, 1.795e-02, -7.796e-02, -6.049e-02, -2.466e-03, 1.844e-02, -2.038e-01, -2.290e-01, -1.484e-01, 3.392e-02, 9.811e-02, -1.844e-01, -4.141e-02)); + r += mul(s5_6, M4(-4.138e-02, -1.700e-01, 3.278e-01, 3.828e-02, -6.868e-02, -1.343e-01, 3.616e-02, 1.254e-01, 3.440e-03, -7.632e-02, -1.761e-02, 1.819e-01, 1.342e-02, 1.132e-01, -1.276e-01, 3.600e-02)); + r += mul(s5_7, M4(-1.716e-01, -5.100e-02, 1.250e-01, 8.485e-02, 3.682e-02, 1.543e-02, 1.113e-01, -9.796e-02, 1.395e-01, 4.326e-02, -4.639e-02, 1.166e-01, 3.806e-02, 1.657e-01, 2.174e-01, 4.814e-02)); + r += mul(s5_8, M4(-2.120e-01, -2.207e-02, -1.430e-01, 8.618e-02, -5.200e-02, -6.653e-02, 4.128e-02, 1.239e-01, -1.761e-01, 1.531e-01, -2.303e-01, -5.517e-03, -1.222e-02, -5.068e-02, 7.704e-02, -1.185e-01)); + r += mul(s6_0, M4(-3.371e-02, -6.558e-02, 1.376e-03, -1.345e-01, 1.465e-02, 1.052e-01, -2.816e-02, 9.085e-02, 5.106e-02, 6.937e-02, 7.058e-02, 8.025e-02, 3.013e-02, -2.639e-02, 1.649e-02, 1.785e-02)); + r += mul(s6_1, M4(-6.707e-02, 1.761e-01, -1.615e-02, -4.314e-02, -1.076e-01, 1.287e-01, -1.184e-02, 7.810e-03, 1.243e-01, -2.929e-02, 7.740e-02, -1.041e-01, -5.104e-02, -4.334e-02, 1.242e-01, 1.227e-01)); + r += mul(s6_2, M4(4.123e-02, 3.019e-02, -2.243e-02, -4.618e-02, -9.245e-02, -1.620e-01, 5.712e-02, -7.876e-02, -1.682e-02, 2.681e-02, -5.675e-03, -1.721e-01, -2.870e-02, -3.578e-02, 7.775e-03, -1.233e-01)); + r += mul(s6_3, M4(-1.652e-01, -7.928e-04, -4.806e-02, 4.656e-02, 2.087e-02, 6.688e-02, -6.866e-02, 4.033e-02, 4.829e-02, -2.848e-02, -5.616e-02, 1.375e-02, -5.552e-02, -1.680e-02, -5.849e-02, -1.262e-01)); + r += mul(s6_4, M4(1.233e-01, -9.312e-02, -3.187e-02, 6.720e-02, 6.422e-02, 3.899e-02, 3.527e-02, 1.679e-02, -9.803e-02, -2.406e-02, -6.811e-02, 3.042e-02, 1.062e-01, 1.312e-01, -5.425e-02, 9.650e-02)); + r += mul(s6_5, M4(-1.846e-02, -1.606e-02, 7.972e-02, -2.837e-02, 2.431e-02, -1.271e-01, -3.847e-02, -7.955e-02, -2.000e-02, 1.166e-02, -3.156e-02, 5.790e-02, 3.407e-02, -4.190e-02, -1.782e-02, -2.151e-02)); + r += mul(s6_6, M4(-2.544e-02, -5.915e-02, 1.843e-01, 9.362e-02, 4.990e-03, -1.615e-02, 7.778e-02, -3.237e-02, -1.791e-01, 6.874e-02, 5.214e-03, -4.972e-02, 3.849e-02, 3.411e-02, 9.507e-02, 6.376e-02)); + r += mul(s6_7, M4(1.284e-01, -5.599e-02, 1.323e-01, 5.644e-02, -4.202e-02, 5.244e-02, -8.550e-02, -2.918e-02, -3.015e-02, 3.385e-02, 3.261e-02, -8.972e-02, -3.890e-02, 6.428e-02, -3.145e-02, -2.355e-02)); + r += mul(s6_8, M4(8.571e-02, 2.069e-02, 5.270e-02, 4.432e-02, -3.353e-02, 1.355e-01, 1.678e-02, 8.254e-02, -3.588e-04, 9.659e-03, 5.636e-02, 1.541e-02, -1.879e-02, -4.235e-02, -6.655e-02, -2.892e-02)); + r += mul(s7_0, M4(5.057e-02, -1.094e-01, -2.501e-02, 2.854e-02, 2.897e-02, -2.094e-01, -1.915e-02, 4.697e-02, -4.821e-02, 5.449e-02, -1.429e-01, -1.043e-01, 4.462e-02, 5.616e-02, -5.757e-02, 5.563e-02)); + r += mul(s7_1, M4(-5.853e-02, 1.182e-01, 1.179e-02, 3.173e-02, -4.999e-02, 6.827e-02, -1.233e-01, 4.053e-02, 2.026e-01, 7.719e-02, -2.028e-01, -2.474e-02, -2.304e-02, -3.564e-02, -1.264e-02, -9.711e-02)); + r += mul(s7_2, M4(4.403e-02, 1.681e-02, 9.049e-03, -8.114e-04, -1.040e-01, 3.131e-03, -5.200e-02, -7.127e-02, -6.435e-02, -8.722e-02, -7.123e-02, 2.914e-02, -6.080e-03, 1.132e-01, 9.580e-02, 6.098e-02)); + r += mul(s7_3, M4(5.650e-02, -1.091e-01, 2.306e-01, 7.148e-02, 9.243e-02, 2.465e-02, -1.498e-01, 7.515e-02, 5.593e-02, 1.612e-02, -1.273e-01, -2.858e-02, 1.162e-01, -2.942e-01, 7.213e-02, -2.089e-01)); + r += mul(s7_4, M4(7.218e-02, -3.291e-02, -1.870e-02, -2.636e-01, 3.129e-01, 6.619e-02, 1.584e-01, 1.509e-01, -2.000e-01, -6.524e-02, -5.895e-01, 3.878e-02, 6.326e-02, 2.978e-02, 3.527e-03, 1.264e-02)); + r += mul(s7_5, M4(-6.218e-02, 2.090e-02, 1.216e-01, -6.139e-02, 1.023e-01, -1.329e-01, 4.503e-02, 2.290e-02, 1.011e-01, -1.292e-01, 1.513e-02, -1.150e-01, 2.092e-03, 2.387e-01, 2.038e-01, 5.367e-02)); + r += mul(s7_6, M4(-5.686e-02, -2.890e-02, -3.717e-02, 7.714e-02, 4.568e-02, -6.003e-02, 1.097e-01, -1.933e-02, -2.708e-01, -6.252e-03, -7.931e-02, -1.325e-01, 3.035e-02, 1.009e-01, 1.392e-01, 3.232e-01)); + r += mul(s7_7, M4(-2.405e-01, -6.710e-02, -2.377e-02, 3.145e-02, 9.669e-02, -1.496e-01, 3.671e-02, -7.840e-03, -1.736e-01, 1.203e-01, -1.745e-01, -1.564e-01, 1.998e-01, 1.556e-01, -1.469e-01, -1.946e-01)); + r += mul(s7_8, M4(7.734e-02, -1.404e-01, -8.468e-02, 1.537e-01, 3.650e-02, 1.036e-01, -7.442e-02, 4.586e-02, 8.895e-02, 2.057e-01, -9.627e-02, 2.055e-01, -7.919e-02, -3.901e-02, 9.436e-02, -9.957e-02)); + r += V4(7.049e-03, -5.164e-03, 7.833e-03, 3.095e-02); + return r; +} + +void Pass7(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 8 +//!DESC conv7 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(7.326e-02, -1.680e-02, -8.532e-03, -8.328e-02, -2.611e-02, 2.756e-02, -2.479e-02, 6.118e-02, 6.048e-02, 6.589e-02, -6.333e-03, 1.630e-02, -8.749e-02, -1.124e-01, 2.867e-02, 6.888e-02)); + r += mul(s0_1, M4(5.320e-02, -7.348e-02, -7.345e-02, -1.411e-01, 1.352e-01, -3.202e-02, 4.852e-02, -9.364e-02, -1.231e-01, -1.377e-01, -7.887e-02, 2.989e-02, -1.630e-01, -1.394e-01, -1.876e-01, 8.178e-02)); + r += mul(s0_2, M4(-1.359e-01, -1.165e-01, -2.628e-01, -5.544e-02, -2.101e-02, 2.755e-03, -1.010e-01, 7.707e-02, 4.659e-02, -4.174e-02, 3.209e-02, 3.284e-02, -9.646e-02, -4.638e-02, -2.890e-02, -6.471e-03)); + r += mul(s0_3, M4(4.069e-02, 1.116e-01, -7.851e-02, 2.314e-02, -1.063e-01, -1.398e-02, -8.177e-02, 7.154e-02, 6.327e-02, 1.028e-01, -1.067e-01, 2.547e-02, 3.345e-02, -1.358e-01, -7.660e-02, 2.743e-01)); + r += mul(s0_4, M4(-1.749e-01, 1.672e-01, 1.233e-01, 2.579e-02, 1.416e-01, 1.264e-02, 3.625e-02, 1.070e-02, 4.777e-02, 5.466e-02, -7.225e-02, 2.460e-02, -3.268e-02, -8.000e-02, 1.727e-03, 5.799e-02)); + r += mul(s0_5, M4(-7.982e-02, 3.175e-02, -9.588e-02, 1.356e-01, 9.916e-02, -1.107e-02, -2.543e-02, 1.025e-01, 2.149e-02, -8.852e-02, -9.233e-02, -6.055e-02, -1.254e-01, -3.054e-02, -4.695e-02, 2.900e-02)); + r += mul(s0_6, M4(6.681e-02, 2.395e-01, -3.717e-03, 7.605e-03, -8.234e-02, -4.648e-02, 1.036e-01, -1.684e-02, -2.558e-02, 1.999e-02, 5.742e-02, 1.543e-02, 4.634e-02, -2.336e-01, -7.873e-03, -1.503e-02)); + r += mul(s0_7, M4(9.118e-03, 1.362e-01, 1.276e-02, 2.263e-02, -8.010e-02, 5.689e-02, 3.855e-02, -8.891e-02, -1.977e-02, 5.464e-02, -1.107e-01, -1.507e-01, 7.280e-02, 3.315e-01, 1.247e-01, -1.315e-01)); + r += mul(s0_8, M4(4.262e-02, 3.590e-02, -5.887e-02, 1.078e-01, 4.775e-02, 4.336e-02, 2.300e-02, 8.044e-02, 1.452e-02, 1.896e-02, 7.814e-02, -1.559e-01, -2.269e-01, -9.475e-02, -3.582e-02, 6.094e-02)); + r += mul(s1_0, M4(-7.673e-02, 3.620e-02, -1.194e-02, -1.948e-01, 5.772e-03, -8.285e-02, -6.731e-02, -1.318e-01, -3.922e-02, 3.819e-02, -1.171e-01, 3.155e-02, 9.811e-02, -3.604e-02, -4.110e-02, -8.592e-03)); + r += mul(s1_1, M4(-9.692e-02, -1.232e-01, -2.234e-01, -3.303e-01, -1.204e-01, -1.037e-01, -6.013e-02, -6.288e-02, -2.922e-02, -4.687e-02, 3.154e-03, 4.076e-02, -5.423e-02, -3.524e-02, -1.036e-01, 2.995e-02)); + r += mul(s1_2, M4(-7.888e-02, -8.202e-02, -4.395e-01, 9.045e-02, 2.978e-03, -8.826e-02, -1.084e-01, -2.192e-01, 5.059e-02, 2.395e-02, 5.964e-03, 1.145e-01, -1.370e-02, 4.883e-02, -6.958e-02, -6.560e-02)); + r += mul(s1_3, M4(1.279e-01, -1.432e-01, -5.269e-02, 3.730e-02, -1.142e-01, 1.388e-01, -3.105e-02, 4.774e-02, 7.469e-02, 4.862e-02, 2.075e-02, -5.505e-02, -4.774e-02, 1.962e-01, -9.652e-03, -2.483e-02)); + r += mul(s1_4, M4(-2.781e-02, 6.737e-02, 2.201e-01, 7.259e-02, -2.564e-01, 3.118e-02, -3.571e-02, -2.165e-02, -5.012e-02, 2.662e-02, 3.784e-02, 6.786e-03, 4.114e-02, 3.618e-02, 4.794e-02, -1.455e-02)); + r += mul(s1_5, M4(-1.191e-01, -1.130e-01, -1.292e-02, -9.387e-02, -3.200e-01, 6.517e-02, 4.728e-02, -8.181e-02, 9.274e-02, 4.584e-02, 8.052e-02, 7.160e-02, -3.136e-02, -3.421e-03, 1.210e-02, 4.128e-02)); + r += mul(s1_6, M4(9.538e-02, 1.158e-01, 5.522e-02, -5.608e-02, -1.713e-02, -1.104e-01, 3.938e-02, 1.422e-01, -1.006e-01, -8.636e-02, 1.612e-02, 7.535e-02, 7.791e-02, -3.914e-02, 6.032e-03, -7.430e-02)); + r += mul(s1_7, M4(1.279e-01, 1.155e-01, -5.522e-02, 1.038e-02, -6.844e-02, -1.568e-02, 3.897e-02, 6.627e-02, -2.525e-02, 5.406e-02, 1.185e-02, -1.289e-01, 5.105e-02, 1.325e-02, -2.018e-02, 1.429e-01)); + r += mul(s1_8, M4(-5.325e-03, 2.080e-02, -1.756e-02, -1.535e-02, -5.934e-02, 1.038e-01, 1.131e-03, 1.061e-01, 1.334e-02, -4.694e-02, 1.311e-02, 3.889e-02, 3.943e-02, -4.937e-03, -8.483e-02, 2.961e-02)); + r += mul(s2_0, M4(2.529e-02, 1.284e-01, -6.831e-02, 8.552e-02, 1.688e-02, -2.406e-02, -3.573e-02, -8.468e-02, -3.537e-02, -2.755e-02, -1.551e-01, -8.238e-02, 1.797e-03, 7.391e-04, 9.301e-02, 7.129e-02)); + r += mul(s2_1, M4(5.069e-02, 1.511e-02, -8.571e-02, 1.932e-01, 6.459e-02, -6.110e-02, 6.249e-02, -4.744e-02, 8.680e-02, -2.431e-02, -1.476e-01, -8.965e-03, -4.096e-02, -2.333e-02, 5.975e-02, -4.329e-02)); + r += mul(s2_2, M4(9.870e-02, -5.693e-02, 2.317e-01, -1.962e-01, 2.122e-03, 8.375e-02, -9.086e-03, 4.394e-03, -9.753e-02, -1.918e-02, 1.314e-02, -6.324e-02, -1.367e-01, 6.549e-03, 1.832e-02, -1.994e-03)); + r += mul(s2_3, M4(6.551e-02, -2.222e-01, -9.212e-02, -3.601e-02, 2.795e-02, -9.005e-04, 1.452e-02, 1.030e-05, 6.994e-03, 2.014e-02, -5.785e-03, 5.890e-02, 5.451e-02, 8.729e-03, 2.239e-02, -6.286e-02)); + r += mul(s2_4, M4(1.156e-01, -4.127e-01, -7.728e-02, -5.974e-03, -7.359e-03, 8.215e-02, 3.321e-03, 8.025e-02, -4.020e-02, 3.870e-02, 1.025e-01, 1.783e-01, -6.440e-03, -6.624e-02, 1.581e-03, -1.802e-02)); + r += mul(s2_5, M4(2.858e-01, 9.141e-02, 5.689e-02, -4.694e-02, -1.589e-02, -1.341e-01, 1.310e-01, 1.950e-03, -5.580e-02, -7.293e-02, -8.763e-02, 1.233e-01, -2.732e-02, -1.022e-01, 5.317e-02, 7.542e-02)); + r += mul(s2_6, M4(-5.645e-02, -1.140e-01, -6.738e-03, -4.646e-02, 5.276e-02, 3.075e-02, -1.642e-02, 3.556e-02, -6.125e-03, 3.828e-03, 1.131e-02, -1.482e-01, 6.052e-02, -4.016e-02, -2.530e-02, -1.002e-01)); + r += mul(s2_7, M4(8.396e-04, 4.828e-03, -4.947e-02, -4.133e-03, -2.696e-03, 2.795e-02, 2.310e-02, -6.906e-02, -4.635e-02, 2.880e-01, -5.403e-02, 1.196e-01, 4.968e-02, 6.836e-04, -6.956e-02, 1.108e-01)); + r += mul(s2_8, M4(1.626e-01, -5.639e-02, 2.667e-02, -4.575e-02, -1.474e-02, -8.975e-02, 1.452e-02, -1.030e-02, -5.135e-02, 1.374e-01, -5.451e-02, -3.441e-02, 1.314e-01, 4.312e-02, 4.449e-03, 4.756e-02)); + r += mul(s3_0, M4(-1.225e-02, -8.804e-02, -8.613e-03, 1.151e-01, -2.185e-02, 5.594e-02, -3.407e-02, -1.318e-02, -3.389e-02, -1.279e-01, -2.860e-02, -1.436e-02, -1.869e-01, 4.171e-02, -6.930e-02, 4.920e-02)); + r += mul(s3_1, M4(-6.642e-02, -9.350e-02, 4.312e-02, 1.403e-01, 4.267e-02, -6.881e-02, 8.049e-02, 1.490e-01, 2.641e-03, -1.042e-01, -5.050e-02, -2.284e-02, -3.493e-02, 1.340e-01, 1.530e-01, 4.731e-02)); + r += mul(s3_2, M4(1.421e-01, 3.422e-02, 1.132e-01, -1.175e-02, -6.359e-03, 5.818e-03, -1.565e-02, 2.434e-02, 3.349e-02, -7.312e-02, -2.611e-03, 8.518e-02, -2.472e-02, -1.053e-01, 5.248e-02, 1.049e-01)); + r += mul(s3_3, M4(-1.025e-03, -6.495e-02, 3.081e-02, -1.668e-02, 3.482e-02, 1.118e-01, -1.849e-02, -6.570e-02, 1.400e-02, 8.527e-02, -5.726e-02, 3.651e-02, 9.570e-02, -3.766e-01, 1.071e-01, 4.728e-02)); + r += mul(s3_4, M4(-2.700e-02, -5.728e-02, -6.008e-02, -6.256e-02, 7.111e-02, 2.445e-01, 1.668e-01, 1.348e-01, 7.866e-02, 1.338e-01, 4.739e-02, 8.424e-02, -9.877e-02, 1.519e-01, 6.522e-02, 4.844e-02)); + r += mul(s3_5, M4(-3.520e-03, 3.914e-02, 7.109e-02, -5.600e-03, 1.450e-01, 1.508e-01, 7.065e-02, -4.504e-02, 1.067e-02, -2.594e-02, 3.621e-02, 6.891e-02, -3.996e-02, 1.098e-01, 1.622e-01, 8.866e-02)); + r += mul(s3_6, M4(-2.979e-02, 1.837e-02, 2.692e-02, 1.753e-02, -9.040e-02, -1.203e-02, 4.506e-02, 2.294e-02, -1.252e-02, 3.441e-03, 4.172e-02, -8.800e-02, 2.268e-01, -1.349e-01, -4.966e-02, 1.769e-02)); + r += mul(s3_7, M4(-6.246e-02, 8.865e-02, 2.051e-02, 4.051e-03, -4.327e-02, -2.075e-03, -5.887e-02, 1.245e-01, 1.396e-02, 1.486e-01, -5.456e-02, 9.013e-03, -1.614e-01, 1.545e-01, -2.425e-02, 1.473e-01)); + r += mul(s3_8, M4(2.256e-02, 2.442e-02, -5.480e-03, -3.222e-02, 2.511e-02, -9.302e-02, -8.771e-02, 1.445e-01, 1.329e-02, 4.852e-02, 1.820e-02, -7.992e-02, 6.865e-02, -1.213e-01, -5.716e-02, 7.035e-03)); + r += mul(s4_0, M4(-4.963e-02, -1.699e-02, 6.164e-03, -7.748e-03, -3.923e-03, -4.458e-02, -7.514e-02, 8.713e-03, 1.502e-01, -6.000e-02, -1.562e-02, -3.699e-02, -3.769e-02, -3.684e-02, 3.194e-02, -1.316e-02)); + r += mul(s4_1, M4(-5.813e-03, -1.652e-02, -1.160e-01, 2.473e-02, -1.542e-02, 1.090e-01, 9.416e-02, -7.546e-02, -8.721e-02, -1.016e-01, 2.455e-02, 1.760e-01, -1.113e-02, -2.072e-02, -4.201e-02, 1.097e-01)); + r += mul(s4_2, M4(-8.947e-02, -2.912e-02, -1.152e-01, 5.044e-03, -5.288e-02, 9.921e-03, -2.049e-03, -8.057e-02, 3.600e-02, 1.285e-02, -3.229e-02, -4.312e-02, -3.473e-02, -4.958e-02, 6.987e-02, -1.037e-03)); + r += mul(s4_3, M4(-7.642e-02, -7.968e-02, -9.396e-02, -6.916e-02, -4.851e-02, -2.054e-02, 1.220e-02, 2.135e-02, 5.939e-02, -5.235e-02, -2.498e-02, -3.225e-02, -5.900e-02, -1.074e-01, -2.890e-02, 7.450e-02)); + r += mul(s4_4, M4(-2.375e-01, -8.494e-03, -5.648e-02, -3.120e-04, -1.023e-02, -1.651e-02, -5.311e-02, -2.120e-02, -7.515e-02, 1.600e-01, 3.359e-02, -3.815e-02, 5.274e-02, 5.381e-03, -4.513e-03, 6.992e-03)); + r += mul(s4_5, M4(2.012e-02, 2.432e-02, -1.255e-01, 9.230e-02, 3.945e-03, -8.647e-03, 5.221e-02, 5.038e-02, 2.594e-02, -5.321e-02, 1.549e-01, -5.618e-02, 6.376e-02, -1.068e-02, 3.395e-02, 7.709e-02)); + r += mul(s4_6, M4(-9.253e-02, 3.873e-02, -6.516e-02, -3.406e-02, 7.811e-02, 3.248e-02, -3.646e-02, -9.657e-02, 4.082e-02, -2.504e-02, -3.095e-02, -5.005e-02, 2.961e-02, -1.038e-02, 4.306e-02, -9.953e-02)); + r += mul(s4_7, M4(5.463e-02, 1.190e-02, 1.297e-02, 5.601e-02, -3.863e-02, 1.963e-04, -6.829e-03, 6.013e-03, -3.543e-03, -3.119e-02, -6.587e-02, 7.723e-02, -1.266e-02, 6.149e-02, -1.960e-02, -2.855e-02)); + r += mul(s4_8, M4(1.575e-01, 2.761e-02, -5.675e-02, -1.007e-01, -8.759e-02, -1.615e-02, -8.099e-02, -3.102e-02, 6.366e-02, 6.322e-02, -9.137e-02, 4.410e-02, -5.092e-02, 9.966e-02, -5.606e-02, -3.986e-02)); + r += mul(s5_0, M4(-4.827e-02, -2.112e-02, 1.171e-01, 2.011e-02, -6.445e-02, -6.014e-02, 1.968e-02, 5.059e-02, 2.412e-01, 3.018e-01, -1.182e-01, 3.469e-01, 4.179e-03, -1.971e-02, 8.742e-02, -2.170e-01)); + r += mul(s5_1, M4(2.099e-01, 3.944e-02, -5.078e-02, 8.941e-02, 2.386e-02, 1.028e-01, 8.036e-02, 3.725e-02, 5.418e-01, -3.762e-02, 1.630e-01, 5.946e-03, -1.113e-02, 1.548e-02, -6.941e-02, 2.013e-01)); + r += mul(s5_2, M4(-1.795e-02, -1.886e-02, -1.009e-01, -4.983e-02, 2.362e-02, 3.002e-05, -1.109e-01, 7.296e-02, 4.747e-02, 1.245e-01, -1.547e-01, -4.602e-02, -1.247e-01, -7.803e-02, 6.664e-02, 4.027e-02)); + r += mul(s5_3, M4(1.690e-01, -5.633e-02, 9.141e-03, 9.393e-02, 1.621e-01, -1.821e-01, 2.860e-02, 4.112e-02, -3.096e-01, -5.739e-02, -4.703e-02, -6.314e-02, -2.309e-01, 3.593e-01, -2.317e-01, -2.547e-01)); + r += mul(s5_4, M4(-1.832e-01, 4.333e-02, -7.308e-02, -1.365e-01, 2.838e-03, 6.438e-03, -1.606e-01, 2.236e-01, 8.184e-02, 1.572e-01, -1.139e-01, -1.861e-01, -1.266e-01, 2.496e-01, 4.770e-02, 2.003e-01)); + r += mul(s5_5, M4(8.375e-02, -2.241e-01, 7.085e-03, -2.314e-02, -7.755e-03, -4.227e-02, -5.664e-02, 1.815e-01, -2.513e-01, -4.317e-03, -5.752e-02, 4.155e-02, 2.129e-01, 1.094e-01, -8.033e-02, -4.873e-02)); + r += mul(s5_6, M4(-3.701e-03, 4.241e-02, -2.325e-02, 1.078e-01, 1.505e-01, 7.398e-02, -6.607e-02, -9.741e-02, -1.005e-01, 8.321e-03, -8.565e-02, 3.101e-02, -1.696e-01, 4.588e-02, -1.441e-02, -1.626e-01)); + r += mul(s5_7, M4(-7.378e-02, 8.560e-02, 6.674e-02, 5.409e-03, -6.194e-02, -5.892e-02, -4.005e-02, 1.344e-01, 2.073e-01, 5.668e-02, -1.387e-02, -3.150e-01, 8.191e-02, -6.871e-02, 5.037e-02, -1.086e-01)); + r += mul(s5_8, M4(-2.458e-03, -1.181e-01, -5.747e-02, 2.519e-02, 1.344e-02, 1.043e-01, -4.944e-02, 1.466e-01, -2.087e-01, -1.148e-01, -8.578e-02, 1.443e-01, -1.480e-01, 1.237e-01, -1.129e-01, -3.773e-02)); + r += mul(s6_0, M4(4.005e-02, -2.725e-02, -1.015e-02, -7.039e-02, 1.608e-01, -4.107e-02, -9.941e-03, 3.837e-02, 1.689e-01, -3.619e-03, -1.047e-01, -6.650e-02, -1.400e-02, -1.305e-01, 1.761e-02, -3.565e-02)); + r += mul(s6_1, M4(1.733e-01, 8.246e-02, 4.388e-02, -4.900e-02, -5.356e-02, 7.296e-03, -1.740e-02, -1.121e-01, 3.921e-03, -1.254e-01, 9.160e-02, -2.737e-03, 9.598e-02, -5.571e-02, 3.035e-02, -1.184e-01)); + r += mul(s6_2, M4(-1.958e-01, -1.807e-01, 4.089e-03, 6.421e-02, -3.751e-02, -4.583e-02, 5.623e-02, 1.439e-01, 2.941e-02, -4.083e-02, -7.173e-02, 1.034e-01, -1.830e-01, -3.674e-02, -2.433e-01, -1.585e-01)); + r += mul(s6_3, M4(5.681e-02, 9.114e-02, -5.818e-02, -1.150e-01, -2.396e-02, -5.568e-02, 2.690e-02, 4.784e-02, 2.844e-02, 7.175e-03, 2.195e-02, -1.767e-02, 1.565e-01, -2.681e-03, 3.860e-02, 1.748e-01)); + r += mul(s6_4, M4(-5.019e-02, -8.297e-02, -4.837e-02, -3.295e-02, 1.713e-01, -1.906e-02, -8.949e-02, 2.852e-02, -6.062e-02, -7.712e-02, 3.262e-02, 9.320e-02, -2.939e-02, -1.105e-01, -9.520e-02, 1.855e-01)); + r += mul(s6_5, M4(-1.230e-03, 4.906e-02, -3.178e-02, -9.381e-02, -1.184e-01, -1.199e-02, 9.639e-02, 2.335e-03, 1.050e-01, 8.792e-02, 3.049e-02, 1.773e-01, -4.611e-03, -1.566e-01, 1.912e-01, 1.975e-01)); + r += mul(s6_6, M4(-3.560e-02, 3.421e-02, 4.112e-02, 3.693e-03, 4.651e-02, -4.041e-02, 1.695e-02, -1.561e-02, 1.389e-02, -1.670e-02, 2.384e-02, -1.596e-01, 3.418e-01, -2.786e-01, -5.399e-02, -1.179e-01)); + r += mul(s6_7, M4(-3.193e-02, 6.029e-02, 7.003e-02, 1.943e-01, 4.110e-02, 2.286e-03, -7.037e-02, 4.680e-02, -2.689e-02, -7.342e-03, 3.395e-02, -5.840e-02, 1.831e-01, -2.382e-01, -2.290e-01, 2.881e-01)); + r += mul(s6_8, M4(6.764e-02, 6.010e-02, 3.488e-02, -6.655e-02, -1.382e-01, 7.262e-02, 1.931e-03, 5.726e-02, 2.771e-02, 1.279e-02, -5.664e-02, 2.941e-02, 3.236e-02, -6.130e-02, 7.286e-02, 1.960e-02)); + r += mul(s7_0, M4(1.140e-01, -9.028e-02, -7.418e-02, -3.781e-02, 8.795e-02, -1.327e-01, 2.363e-02, -3.158e-02, -1.305e-01, -2.282e-02, -7.596e-02, -1.521e-01, 5.714e-03, -2.941e-02, -1.806e-02, -4.892e-02)); + r += mul(s7_1, M4(6.662e-02, -1.102e-01, 1.024e-02, -1.127e-01, 2.631e-02, 1.655e-02, 9.290e-02, -2.096e-02, 4.017e-02, -1.329e-02, -1.061e-01, 1.428e-01, -2.756e-02, 5.955e-02, 5.337e-02, -2.459e-03)); + r += mul(s7_2, M4(-7.093e-02, -7.899e-02, -6.242e-02, 3.688e-02, 9.974e-02, 5.688e-02, 4.065e-02, 3.704e-03, -3.071e-02, -1.038e-02, -1.084e-02, -1.189e-01, 3.222e-02, 1.077e-02, -5.902e-02, -4.529e-02)); + r += mul(s7_3, M4(9.765e-03, -3.826e-02, -5.524e-03, -1.026e-02, 5.695e-02, 8.373e-02, 1.365e-01, -8.447e-02, -1.099e-01, -7.848e-02, 1.076e-01, -1.478e-01, 6.166e-02, 2.279e-02, 3.756e-03, 6.037e-02)); + r += mul(s7_4, M4(3.144e-02, -7.181e-02, -1.136e-01, 7.372e-02, -1.149e-02, 1.041e-01, 2.243e-01, 9.610e-02, 9.288e-02, 2.646e-02, 6.833e-02, -1.361e-01, 8.454e-02, -7.442e-02, -8.176e-02, 6.451e-02)); + r += mul(s7_5, M4(2.190e-02, 3.460e-02, 5.477e-02, 5.199e-03, 1.706e-01, -6.069e-02, 7.966e-02, 1.299e-01, 2.831e-02, 1.627e-01, -2.140e-02, -1.353e-01, -1.005e-01, 2.623e-02, -2.245e-02, -5.307e-02)); + r += mul(s7_6, M4(4.158e-02, 1.699e-03, -1.020e-02, -1.288e-02, 3.594e-02, 9.277e-02, 5.293e-03, -5.628e-02, 1.309e-02, -2.879e-03, -1.329e-02, -2.155e-01, 7.836e-03, 1.077e-02, -1.834e-04, -1.107e-01)); + r += mul(s7_7, M4(-1.636e-01, -1.068e-01, 6.692e-02, -2.375e-02, 5.312e-02, 1.139e-01, 5.986e-02, -1.624e-02, -3.511e-02, 1.058e-01, -1.126e-03, 4.797e-02, 9.381e-02, 8.404e-04, -1.213e-02, -3.095e-02)); + r += mul(s7_8, M4(-4.623e-02, -2.666e-02, -1.416e-03, 5.373e-02, 2.451e-02, -6.059e-02, -1.433e-02, -7.040e-02, -8.232e-02, 1.101e-01, 1.666e-03, -9.323e-02, -8.185e-02, -3.667e-02, 2.303e-02, 8.545e-02)); + r += V4(-7.571e-03, 5.112e-02, 3.188e-02, -3.901e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-4.983e-02, 8.130e-02, -1.138e-02, 4.118e-02, 5.649e-03, -9.228e-03, -1.186e-02, -1.179e-02, 2.165e-01, -1.457e-02, 1.679e-01, -5.205e-02, 4.256e-02, -3.807e-02, -1.365e-01, 2.195e-01)); + r += mul(s0_1, M4(-1.686e-02, 9.503e-02, 1.076e-01, 1.273e-01, -8.605e-02, 9.071e-02, -7.611e-03, -8.845e-02, -1.404e-02, 2.858e-02, -5.158e-04, -1.584e-01, -2.366e-02, 5.117e-02, 1.026e-01, -8.642e-02)); + r += mul(s0_2, M4(-4.043e-02, 6.732e-02, 2.941e-02, -6.264e-02, 7.007e-02, -1.641e-02, -1.074e-01, 4.130e-02, 1.209e-01, -5.749e-02, 6.632e-02, -4.021e-02, 6.562e-03, 1.650e-02, 2.568e-02, -6.852e-02)); + r += mul(s0_3, M4(1.294e-02, 4.328e-02, -4.724e-02, -5.972e-02, -1.507e-02, 2.535e-02, 2.248e-01, -6.332e-03, 1.194e-01, -2.328e-02, 5.724e-03, 2.047e-02, 6.483e-02, 1.967e-01, 2.347e-01, -1.109e-01)); + r += mul(s0_4, M4(8.289e-02, 2.530e-01, -1.704e-01, 1.225e-01, -8.388e-02, 8.772e-03, -8.346e-02, 5.466e-03, 4.914e-02, 1.743e-01, 6.609e-02, 2.067e-02, -1.537e-01, 2.375e-01, -2.492e-02, 7.719e-02)); + r += mul(s0_5, M4(7.153e-02, 6.322e-02, -5.479e-02, 7.153e-02, 2.188e-02, -9.131e-03, -3.086e-02, 2.073e-02, 2.361e-02, 9.672e-04, -1.130e-01, 7.425e-02, 2.527e-02, 8.043e-02, -9.048e-02, 8.426e-02)); + r += mul(s0_6, M4(1.098e-01, 2.966e-03, 1.052e-01, 5.585e-02, -6.872e-02, -1.584e-02, -5.762e-02, -1.458e-02, 2.922e-02, 2.922e-02, 9.750e-02, -1.465e-01, -4.324e-02, 2.296e-02, 3.301e-01, 1.642e-01)); + r += mul(s0_7, M4(2.132e-02, -1.219e-01, -1.895e-02, -1.970e-01, 8.192e-03, -4.112e-02, 8.180e-02, 1.796e-01, 7.669e-03, 9.340e-03, -3.267e-02, -1.316e-01, -1.190e-01, -3.573e-03, -3.089e-02, 5.706e-02)); + r += mul(s0_8, M4(-5.650e-02, 4.687e-03, 6.372e-02, 2.141e-02, 1.214e-02, -2.395e-02, 1.049e-02, 8.106e-03, 1.389e-01, -8.622e-02, -8.147e-03, 2.103e-02, -5.347e-03, -1.049e-01, 7.973e-02, 6.136e-02)); + r += mul(s1_0, M4(-7.653e-02, 3.994e-03, -5.756e-02, 2.404e-01, 1.302e-01, 5.167e-02, 2.087e-02, -5.515e-03, 7.363e-02, 2.574e-02, 1.132e-02, -9.038e-02, 3.098e-02, 3.781e-02, -1.340e-02, -2.394e-02)); + r += mul(s1_1, M4(-9.000e-03, 1.987e-01, 8.059e-02, 1.542e-01, -2.150e-01, -1.165e-02, -7.362e-02, 7.066e-02, 3.051e-02, 8.714e-02, -1.183e-01, 4.917e-02, -1.030e-01, 2.695e-03, -2.187e-02, -1.496e-01)); + r += mul(s1_2, M4(-9.477e-02, 2.519e-02, -8.799e-03, 1.311e-01, 6.297e-02, 3.574e-02, -5.792e-02, -1.065e-01, -1.605e-02, -3.194e-02, -3.883e-02, 3.980e-02, 2.591e-02, -5.805e-03, 1.617e-01, -1.757e-02)); + r += mul(s1_3, M4(-7.379e-02, 2.148e-02, -2.515e-01, 4.130e-02, 1.027e-01, -5.710e-02, -2.321e-02, -9.060e-02, -3.771e-03, -3.679e-02, 7.993e-02, 2.636e-02, 4.529e-02, -1.192e-01, -9.298e-02, 9.453e-02)); + r += mul(s1_4, M4(-1.416e-01, 3.067e-01, -1.597e-02, 6.555e-02, 7.348e-02, 6.710e-02, 1.118e-02, -3.230e-01, -1.052e-01, -5.553e-03, -4.233e-02, 1.119e-01, -1.417e-01, -4.051e-04, 7.640e-02, 1.348e-01)); + r += mul(s1_5, M4(1.876e-01, 7.973e-02, 6.309e-02, 1.341e-01, 1.048e-01, -1.145e-01, -4.912e-02, 1.186e-01, -4.735e-04, -8.400e-02, 5.898e-02, 1.145e-01, -3.536e-02, 3.478e-02, 3.221e-02, 2.792e-03)); + r += mul(s1_6, M4(7.400e-02, -1.982e-02, 1.309e-02, 7.803e-02, -1.180e-01, 5.490e-02, 3.897e-02, -1.823e-03, 1.359e-02, 7.597e-02, -4.269e-03, 5.470e-02, 1.117e-01, -6.178e-02, -1.678e-01, 1.173e-01)); + r += mul(s1_7, M4(5.562e-03, -1.539e-01, 1.129e-01, -1.149e-01, -1.811e-01, -1.305e-01, 7.671e-02, -8.567e-03, -5.196e-02, -8.907e-03, -1.275e-01, 1.491e-03, 8.662e-02, 1.298e-02, -2.805e-02, 3.214e-02)); + r += mul(s1_8, M4(6.226e-02, -9.723e-02, 1.803e-01, 8.396e-02, 4.782e-02, -3.540e-02, 2.286e-02, -2.551e-01, -3.612e-02, 1.695e-02, -3.034e-02, 3.821e-02, -2.678e-02, 1.757e-02, 4.701e-02, -2.584e-02)); + r += mul(s2_0, M4(4.271e-02, 1.435e-02, 2.132e-01, 1.766e-01, -1.605e-02, -4.804e-03, 1.161e-01, -5.321e-02, 6.169e-02, -3.735e-02, 1.685e-01, -1.118e-01, -1.765e-02, -1.293e-01, -7.445e-02, -1.613e-02)); + r += mul(s2_1, M4(1.435e-01, -1.773e-01, 1.688e-01, 1.635e-01, 1.118e-01, 2.187e-02, 2.361e-02, 2.987e-02, 3.904e-02, -5.927e-03, -1.409e-01, 2.267e-02, 8.298e-02, 5.953e-02, 7.070e-02, 5.210e-02)); + r += mul(s2_2, M4(7.118e-02, -2.985e-02, 3.018e-01, 2.618e-01, 2.843e-02, 2.776e-02, 1.730e-02, -2.799e-02, 3.650e-02, -4.304e-02, 2.567e-02, -6.747e-02, -1.003e-01, -5.359e-02, -1.348e-01, -1.880e-02)); + r += mul(s2_3, M4(-7.627e-02, -1.043e-01, 3.388e-02, 6.354e-02, 9.222e-02, 5.182e-02, 1.433e-01, -6.476e-02, -4.451e-02, 4.682e-02, 1.235e-01, 1.353e-01, -1.592e-01, 1.212e-02, 6.325e-03, 8.417e-02)); + r += mul(s2_4, M4(-1.529e-02, -2.805e-01, -7.125e-02, -1.981e-01, -7.305e-02, -1.251e-01, -1.964e-01, -6.126e-02, 2.572e-02, 1.607e-01, -6.845e-03, 1.296e-01, -5.740e-02, 9.053e-02, -4.555e-02, -8.953e-02)); + r += mul(s2_5, M4(-2.202e-01, -4.550e-03, 9.138e-03, 3.973e-02, 3.042e-02, 1.072e-02, -1.120e-01, -5.238e-02, -7.899e-02, -1.440e-01, 7.793e-03, 8.766e-03, -9.321e-02, 5.336e-02, 4.382e-02, -4.917e-02)); + r += mul(s2_6, M4(-5.794e-02, 5.949e-02, 5.868e-02, -1.305e-01, 9.007e-02, 2.468e-04, -1.032e-01, -8.668e-02, 1.054e-02, -9.556e-02, -2.210e-01, -6.770e-02, -2.962e-02, -5.007e-03, 8.031e-02, 4.016e-03)); + r += mul(s2_7, M4(-2.297e-01, -2.320e-02, -2.605e-01, 1.769e-01, 9.884e-03, -4.773e-02, -5.952e-02, 1.874e-01, -1.411e-02, -1.034e-01, 4.831e-02, -1.217e-02, 7.414e-03, -7.133e-02, 5.140e-02, 3.610e-02)); + r += mul(s2_8, M4(6.926e-02, -1.764e-02, 2.437e-02, -3.945e-02, -4.159e-02, -7.148e-04, -1.669e-01, 4.848e-02, 1.362e-02, -8.426e-02, 3.363e-02, -1.062e-01, 3.699e-02, -5.910e-02, -3.259e-02, -6.000e-02)); + r += mul(s3_0, M4(3.266e-02, -1.635e-02, -9.268e-03, -1.444e-02, -6.789e-02, -8.125e-02, -1.372e-01, -1.201e-01, -8.273e-02, -1.520e-02, -1.226e-01, 1.313e-01, 1.256e-01, 3.671e-02, -4.320e-02, 1.295e-01)); + r += mul(s3_1, M4(1.616e-01, -1.483e-01, 1.007e-02, -4.777e-02, 2.154e-03, -3.533e-02, 6.062e-02, 1.115e-01, -5.480e-02, 1.195e-01, 4.853e-02, 1.008e-02, 9.364e-02, -8.944e-02, 9.676e-02, 1.877e-01)); + r += mul(s3_2, M4(-5.110e-03, -7.102e-03, 4.880e-02, -5.596e-02, 3.291e-02, 2.714e-02, -2.460e-01, 9.288e-02, -1.284e-01, -6.344e-02, 5.529e-02, -7.398e-02, 6.091e-02, 5.879e-04, -8.177e-02, 2.660e-01)); + r += mul(s3_3, M4(1.075e-01, -2.717e-02, 8.766e-02, 3.620e-02, -7.465e-02, 9.704e-03, 8.892e-02, -4.939e-02, 7.569e-02, 7.576e-02, 2.762e-02, -6.882e-02, -2.166e-01, 1.711e-01, 2.567e-01, 1.157e-01)); + r += mul(s3_4, M4(5.295e-02, -1.808e-01, 6.740e-03, -3.125e-02, -1.375e-01, -7.829e-02, 1.252e-01, -5.333e-02, -1.218e-02, 1.515e-01, -5.023e-02, -3.915e-02, -1.488e-01, 1.708e-01, 1.073e-01, -1.787e-01)); + r += mul(s3_5, M4(-4.310e-02, 5.330e-04, 5.296e-02, 6.118e-02, 2.424e-03, -3.204e-02, -4.205e-02, 1.344e-01, -5.280e-02, -2.408e-02, 7.708e-03, -7.485e-02, 3.816e-02, 1.564e-01, -1.596e-01, -1.008e-01)); + r += mul(s3_6, M4(2.846e-02, 4.028e-04, 5.228e-02, 2.090e-02, -4.003e-02, 4.156e-02, -1.400e-02, -5.232e-02, 6.428e-02, -7.429e-02, -1.345e-01, -2.075e-02, 1.101e-01, 3.909e-02, 2.293e-02, -2.696e-02)); + r += mul(s3_7, M4(-1.517e-01, -4.912e-02, 7.853e-03, 8.566e-02, -2.849e-02, 7.938e-02, 1.102e-01, -6.195e-02, -1.233e-02, 3.340e-03, 7.615e-03, 1.410e-01, -1.641e-01, -5.606e-02, -4.188e-02, -1.104e-01)); + r += mul(s3_8, M4(9.102e-02, 1.308e-02, -7.194e-02, -4.243e-02, -1.015e-01, 6.016e-02, -1.065e-01, -2.126e-03, 4.485e-02, -1.344e-01, 1.097e-01, 1.222e-01, -1.174e-01, 5.216e-02, 3.645e-02, -8.502e-02)); + r += mul(s4_0, M4(3.162e-02, 2.047e-02, -8.768e-02, 7.178e-02, -4.136e-02, 1.451e-02, -2.009e-02, 8.590e-02, -3.504e-02, 7.200e-02, -6.707e-03, 4.850e-02, -1.159e-01, -6.577e-02, 2.659e-03, 3.697e-02)); + r += mul(s4_1, M4(-4.373e-02, -1.432e-02, -1.025e-01, 1.218e-01, 1.213e-01, -6.253e-02, 1.098e-01, -4.396e-02, -1.435e-01, -8.083e-02, -4.730e-02, 1.345e-02, 4.332e-03, -8.223e-02, 1.521e-01, 7.768e-03)); + r += mul(s4_2, M4(5.797e-02, -5.495e-02, -4.579e-02, 5.029e-02, -3.201e-02, 4.360e-02, -6.783e-02, 6.364e-02, -3.153e-02, 1.680e-02, -3.927e-02, 2.867e-03, -3.550e-02, -1.989e-02, -3.437e-02, 8.254e-03)); + r += mul(s4_3, M4(-5.274e-02, -6.960e-03, 4.422e-02, 6.757e-02, 1.767e-03, -1.234e-03, 1.035e-02, -1.149e-01, -1.261e-01, -9.945e-02, 6.946e-02, 1.117e-01, -3.838e-02, 4.983e-02, 1.507e-01, -9.068e-02)); + r += mul(s4_4, M4(2.547e-01, 2.034e-02, 9.511e-02, -1.113e-01, -3.309e-03, 2.511e-02, 8.591e-02, -9.586e-02, -1.905e-02, -3.965e-02, -6.569e-02, 1.327e-01, -3.292e-02, 6.606e-02, 1.077e-01, -7.837e-02)); + r += mul(s4_5, M4(3.427e-02, -6.044e-02, 9.947e-02, -2.948e-02, 1.903e-01, -8.142e-02, 7.408e-02, 8.765e-02, 1.294e-01, 2.937e-02, 1.490e-02, 1.741e-02, 8.611e-02, -2.960e-02, 1.621e-02, 7.118e-02)); + r += mul(s4_6, M4(7.072e-03, -3.489e-02, 1.474e-01, -6.758e-02, 8.396e-02, -1.278e-02, 1.239e-01, 1.219e-02, -4.617e-04, 2.750e-02, 3.367e-02, -1.125e-01, 4.128e-02, 3.849e-03, 1.559e-04, -9.764e-03)); + r += mul(s4_7, M4(6.998e-02, 5.240e-02, -6.551e-02, -1.124e-01, 4.662e-02, -5.076e-02, -4.813e-02, 3.977e-02, 2.835e-02, 9.788e-02, -1.083e-01, -9.348e-02, -7.555e-04, -9.514e-02, -1.318e-01, -6.260e-02)); + r += mul(s4_8, M4(-9.893e-02, 1.792e-02, 3.363e-02, -3.017e-02, -1.834e-02, 5.261e-02, -2.149e-02, 4.543e-02, 7.473e-02, 5.478e-02, -6.775e-04, -9.799e-02, -8.036e-03, -4.525e-02, -1.196e-01, -5.956e-02)); + r += mul(s5_0, M4(-7.606e-02, 1.697e-02, 3.356e-02, 8.496e-02, -4.369e-02, -4.666e-02, -4.516e-02, 1.232e-01, -1.148e-01, -4.079e-01, 6.891e-02, -1.204e-01, 6.006e-02, -7.587e-02, -3.940e-03, -8.972e-02)); + r += mul(s5_1, M4(-6.836e-02, 9.268e-02, 2.397e-02, 1.580e-01, 8.314e-02, -1.127e-01, -9.302e-02, -1.983e-02, -2.236e-01, -1.783e-01, 4.923e-02, -1.979e-01, -3.718e-02, -8.209e-02, -2.694e-01, -2.831e-01)); + r += mul(s5_2, M4(-1.496e-02, -1.074e-01, -1.865e-01, 3.621e-02, -7.354e-02, -3.669e-02, 1.025e-02, 1.348e-01, 7.306e-02, 3.492e-03, 1.151e-02, -2.423e-02, -1.517e-01, 1.229e-01, -9.210e-03, -3.804e-01)); + r += mul(s5_3, M4(-8.295e-02, -1.333e-02, -8.697e-03, 1.018e-01, -7.267e-02, 5.075e-03, -3.312e-02, -5.572e-02, 2.651e-02, 2.441e-01, 1.070e-01, -3.334e-01, -1.323e-01, -1.150e-01, -4.954e-02, 3.022e-02)); + r += mul(s5_4, M4(3.561e-01, -6.514e-03, -1.319e-01, -4.857e-02, -8.872e-02, -1.063e-01, 1.702e-01, 2.295e-02, -1.136e-01, -6.823e-03, -8.805e-02, 3.722e-01, -1.308e-01, -4.237e-02, -8.764e-02, -2.649e-01)); + r += mul(s5_5, M4(-1.473e-01, -3.242e-02, 3.013e-01, -2.271e-01, 1.636e-01, -1.054e-01, 1.489e-01, 1.410e-01, 1.229e-01, -1.184e-01, 1.967e-01, 1.486e-01, -1.744e-01, -3.491e-02, 2.146e-01, -1.210e-01)); + r += mul(s5_6, M4(-9.205e-02, -6.729e-02, 6.481e-02, 3.591e-02, 1.497e-01, -2.646e-02, -5.646e-02, 7.942e-02, -2.862e-01, -7.300e-02, 4.249e-01, -2.225e-01, 1.466e-02, -4.618e-02, 1.342e-01, -6.644e-02)); + r += mul(s5_7, M4(-2.634e-02, -8.729e-02, -1.921e-01, 5.986e-02, -1.045e-01, -1.319e-01, -3.851e-02, -1.897e-02, 4.674e-02, -2.551e-01, -3.060e-01, -3.126e-02, 1.321e-01, 1.323e-03, 1.625e-01, -1.780e-01)); + r += mul(s5_8, M4(3.455e-02, -9.137e-02, -3.155e-02, 1.371e-01, 1.961e-02, -4.806e-02, 2.904e-02, 1.206e-01, -1.439e-01, 4.326e-02, 2.013e-01, -1.088e-01, 1.038e-01, 5.389e-02, -1.012e-01, -1.293e-01)); + r += mul(s6_0, M4(2.883e-02, 6.336e-02, -1.109e-01, 4.430e-02, -2.181e-03, -1.027e-02, 3.495e-02, 3.424e-02, 6.241e-02, 9.888e-02, 1.465e-01, 8.500e-03, -1.464e-02, 1.196e-01, 3.633e-02, 2.138e-02)); + r += mul(s6_1, M4(-7.511e-02, -1.257e-01, -9.774e-03, -2.211e-02, 2.084e-02, -7.529e-02, 3.955e-02, -1.850e-02, -7.271e-02, -9.222e-02, -5.359e-02, 9.605e-02, 1.401e-01, -2.953e-02, -7.327e-02, 2.252e-01)); + r += mul(s6_2, M4(-1.618e-01, 3.175e-02, -2.588e-02, -4.267e-02, 6.425e-02, -4.883e-02, -7.712e-03, 2.210e-02, 4.166e-02, -2.958e-02, -5.538e-02, 8.871e-02, -1.667e-02, 7.975e-02, -1.422e-01, -1.361e-01)); + r += mul(s6_3, M4(3.054e-02, 2.819e-02, -5.269e-02, 7.123e-02, -6.542e-03, -5.129e-02, 6.952e-02, -6.234e-02, 3.632e-02, 4.244e-02, 1.470e-02, -5.504e-02, -1.515e-01, -5.525e-02, 9.169e-02, 1.957e-01)); + r += mul(s6_4, M4(1.392e-01, 3.304e-02, 6.401e-03, -1.179e-01, 1.116e-01, 1.009e-01, -7.350e-02, -7.339e-02, -9.488e-02, -9.942e-02, 4.254e-02, -1.008e-03, 5.917e-01, -1.942e-01, 3.094e-02, 2.252e-01)); + r += mul(s6_5, M4(-1.635e-01, 8.334e-02, -1.226e-01, -1.008e-01, 7.124e-02, -3.881e-02, 4.288e-02, 1.904e-02, 7.209e-02, -1.087e-03, -2.940e-02, -5.550e-02, 2.476e-02, -1.826e-01, -2.270e-02, -6.354e-02)); + r += mul(s6_6, M4(-1.222e-02, 7.961e-02, 1.913e-01, 1.164e-01, -1.009e-01, -8.551e-03, -4.957e-02, 1.115e-01, -3.098e-02, 2.290e-02, 2.158e-02, 2.287e-02, -5.155e-02, -5.953e-02, -5.373e-01, 1.594e-01)); + r += mul(s6_7, M4(-8.832e-02, 1.443e-01, -7.009e-02, -1.701e-02, 1.220e-01, -2.167e-02, 7.703e-02, 7.268e-03, -2.231e-02, -8.895e-03, -4.785e-02, -1.486e-02, 4.795e-01, 1.363e-01, -2.954e-01, -6.300e-02)); + r += mul(s6_8, M4(-2.199e-03, 1.266e-01, -1.642e-02, -3.579e-01, -3.643e-02, -9.964e-02, -1.666e-02, 3.015e-02, 6.638e-03, 2.863e-02, -7.006e-03, 7.253e-03, 2.145e-01, 1.449e-01, -9.513e-02, -1.265e-02)); + r += mul(s7_0, M4(-3.209e-02, 4.605e-02, 1.079e-01, 1.989e-02, -5.784e-02, 5.088e-02, -1.273e-01, 1.108e-01, 3.986e-02, -6.515e-03, 1.124e-01, 6.504e-02, -7.102e-02, 8.049e-02, -3.219e-02, 2.202e-02)); + r += mul(s7_1, M4(3.292e-03, -1.202e-01, 3.605e-02, 7.924e-02, -1.870e-01, 6.843e-02, -1.121e-01, 1.240e-01, -1.255e-01, -5.002e-02, 5.503e-02, -3.135e-02, -3.943e-02, -2.022e-02, -2.647e-02, -6.932e-02)); + r += mul(s7_2, M4(-1.361e-01, 2.581e-02, -3.699e-02, -5.262e-02, -4.777e-02, -1.642e-02, 2.150e-02, -1.078e-02, -8.589e-03, -1.309e-02, -2.344e-01, -1.915e-01, -4.557e-03, 4.928e-02, -3.167e-02, -7.839e-02)); + r += mul(s7_3, M4(-9.894e-02, 5.377e-03, -6.016e-02, -6.025e-03, 9.087e-02, -1.221e-01, -3.059e-02, -4.253e-02, 4.615e-02, 6.281e-02, 5.827e-02, -9.779e-02, 9.307e-03, -4.204e-02, -1.247e-02, 1.284e-03)); + r += mul(s7_4, M4(8.887e-03, -1.019e-01, 4.785e-02, 2.651e-02, -8.583e-02, 7.502e-03, 3.123e-02, 2.588e-01, -1.680e-02, -2.212e-01, 1.478e-01, -1.283e-01, 1.045e-01, 5.040e-02, 8.075e-02, -9.190e-02)); + r += mul(s7_5, M4(-8.777e-02, 1.522e-02, 1.612e-01, 1.873e-02, -6.293e-03, 4.181e-02, 6.272e-02, 8.647e-02, 1.739e-01, 4.823e-02, 5.520e-02, -1.238e-01, 3.481e-02, 2.858e-03, -4.327e-02, -2.633e-02)); + r += mul(s7_6, M4(-1.609e-03, -9.164e-02, -8.422e-04, -1.272e-02, 6.570e-02, 7.809e-02, -7.719e-02, -4.676e-02, 1.452e-02, 1.919e-02, -3.864e-02, -3.212e-02, 1.593e-02, -8.736e-02, 2.733e-03, 1.538e-01)); + r += mul(s7_7, M4(-9.294e-02, -1.275e-01, -1.928e-02, 1.771e-01, 1.928e-01, -1.129e-01, 3.681e-02, 9.247e-02, 1.539e-01, -1.166e-02, 6.051e-03, -2.803e-03, 1.723e-02, -4.953e-02, -1.479e-01, 8.169e-02)); + r += mul(s7_8, M4(2.105e-03, -1.920e-02, 1.970e-01, -1.863e-02, 7.787e-02, 7.357e-02, 1.981e-02, -4.003e-02, 5.348e-02, -7.818e-02, 1.926e-01, 6.939e-02, -5.906e-02, -4.222e-02, 1.214e-01, -6.501e-02)); + r += V4(9.425e-03, -6.502e-03, -1.215e-02, -4.379e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.030e-01, 5.180e-02, -6.760e-02, 6.079e-02, -2.889e-02, -1.103e-01, 2.358e-03, -1.889e-02, -7.512e-02, 2.994e-02, -3.116e-02, -1.178e-01, 1.637e-02, 5.190e-02, -4.271e-02, -3.379e-02)); + r += mul(s0_1, M4(1.105e-01, -1.012e-01, -6.958e-02, -1.877e-01, 2.586e-02, 1.008e-02, 1.192e-01, 3.894e-02, 3.049e-02, -1.959e-02, -7.397e-02, 5.311e-02, 1.822e-02, 1.226e-01, 5.007e-02, 1.588e-01)); + r += mul(s0_2, M4(-1.353e-01, 6.049e-02, 7.127e-02, 3.606e-02, -2.965e-02, 3.285e-02, -2.194e-02, -5.989e-02, 4.794e-02, -1.144e-01, -6.334e-02, -1.359e-01, 6.982e-02, -4.216e-02, -8.722e-02, 1.876e-01)); + r += mul(s0_3, M4(1.121e-01, 6.245e-02, -4.799e-02, -7.511e-02, -1.780e-01, -2.139e-02, -7.060e-02, -1.224e-01, 1.616e-01, -1.183e-01, 1.730e-02, 1.742e-01, 1.555e-01, -8.945e-03, 1.938e-01, -1.454e-01)); + r += mul(s0_4, M4(-1.167e-01, -4.252e-03, -1.382e-01, -2.507e-02, 1.098e-01, -4.058e-02, -3.589e-02, -3.798e-02, -3.234e-02, -1.313e-01, 7.806e-02, -8.148e-02, 6.749e-03, 6.031e-02, 1.802e-01, -7.930e-02)); + r += mul(s0_5, M4(1.668e-02, 1.406e-01, 4.683e-02, 8.918e-03, 4.626e-02, 1.492e-01, -6.812e-02, 5.677e-03, -1.095e-01, -7.690e-02, 2.882e-02, 1.502e-01, -5.098e-02, 1.111e-01, 1.075e-02, -8.344e-02)); + r += mul(s0_6, M4(-1.359e-02, -1.378e-02, 1.096e-01, -1.421e-02, -5.066e-02, -4.940e-02, 4.683e-02, 5.285e-02, 2.956e-02, 7.916e-02, -4.273e-02, -2.343e-01, -2.529e-01, 1.738e-01, 2.344e-02, -2.723e-02)); + r += mul(s0_7, M4(1.619e-02, -5.444e-02, 1.244e-01, 2.961e-02, 1.911e-02, 3.931e-02, 1.964e-02, 2.953e-02, 3.841e-02, 3.794e-02, 9.847e-02, -1.168e-02, 1.175e-01, 2.568e-01, 1.892e-01, 1.959e-01)); + r += mul(s0_8, M4(8.046e-03, -8.461e-02, 8.705e-02, 7.092e-02, -4.998e-02, -8.167e-02, 8.622e-02, 7.721e-02, -3.905e-02, -1.948e-01, 1.263e-01, -7.006e-02, -9.074e-02, -5.137e-02, -1.423e-01, -1.055e-01)); + r += mul(s1_0, M4(2.037e-01, -4.134e-02, 1.431e-03, -8.682e-02, 8.096e-02, -4.626e-02, -4.857e-02, 4.149e-02, -9.669e-02, 5.387e-02, 1.704e-01, 6.811e-02, 1.236e-01, -6.518e-02, -3.237e-02, 8.485e-04)); + r += mul(s1_1, M4(2.443e-01, -3.109e-02, -5.216e-02, -1.151e-02, -1.877e-02, 4.912e-02, -1.501e-02, -8.259e-02, 9.409e-02, 3.537e-02, -5.657e-02, -1.651e-02, 3.525e-02, 2.796e-03, -8.593e-02, -3.281e-02)); + r += mul(s1_2, M4(-2.203e-01, 6.307e-02, 2.666e-01, 6.146e-02, -1.000e-01, 1.418e-01, 1.547e-02, 2.503e-01, 1.396e-01, -1.719e-02, 9.498e-02, 8.809e-02, 4.643e-02, -2.165e-03, -2.136e-02, 7.491e-02)); + r += mul(s1_3, M4(1.313e-01, -2.397e-01, -1.541e-01, -9.527e-02, -1.657e-01, 4.067e-03, 3.172e-02, 6.652e-03, -3.691e-02, -4.660e-02, -1.886e-02, 1.112e-02, 8.538e-03, 7.209e-02, -5.522e-02, 1.420e-02)); + r += mul(s1_4, M4(-8.042e-02, -6.499e-02, 7.468e-02, 2.530e-01, -2.417e-01, -9.201e-02, 5.306e-02, -2.171e-01, 9.399e-02, 2.275e-02, -1.106e-01, -4.740e-02, -8.718e-02, -9.990e-02, 4.451e-02, 9.443e-02)); + r += mul(s1_5, M4(-5.618e-02, 3.624e-02, 2.160e-01, 2.765e-02, 9.392e-02, -4.092e-02, 1.667e-02, 1.583e-01, -1.460e-01, 2.101e-01, -6.971e-02, 8.168e-02, 3.349e-02, 6.209e-02, 7.637e-02, -1.209e-03)); + r += mul(s1_6, M4(-7.459e-02, 9.316e-02, 1.352e-02, -5.870e-02, 8.489e-03, 1.144e-01, 3.504e-02, -1.692e-01, 9.801e-02, -4.062e-02, -7.359e-02, 7.979e-02, -5.365e-02, -1.372e-01, 9.992e-03, -1.625e-01)); + r += mul(s1_7, M4(-2.180e-02, 1.464e-01, 3.081e-01, 9.825e-02, -6.650e-03, 3.962e-02, 8.037e-03, -2.993e-01, -1.635e-02, 9.104e-02, -2.753e-02, -2.897e-02, -8.581e-02, -2.014e-02, 3.952e-03, 5.674e-02)); + r += mul(s1_8, M4(-1.155e-01, -1.423e-02, 1.558e-01, -9.508e-02, -4.453e-03, 9.155e-02, 8.825e-02, -2.214e-01, 4.655e-02, 1.176e-03, 9.797e-03, 1.354e-01, -8.679e-02, 1.240e-02, 2.236e-02, -1.652e-01)); + r += mul(s2_0, M4(-1.724e-01, -9.182e-03, -2.293e-01, 2.517e-01, -8.243e-03, 4.365e-02, -2.093e-02, -1.078e-02, 2.551e-02, -3.522e-02, -6.930e-02, 2.232e-01, -1.114e-01, 1.489e-02, 1.974e-02, -7.529e-02)); + r += mul(s2_1, M4(-7.519e-02, -3.149e-02, -8.839e-02, 1.913e-02, 5.915e-02, 6.902e-02, -2.554e-02, -1.764e-01, 7.881e-02, 8.444e-03, -9.908e-02, 1.643e-02, -1.592e-02, 9.464e-02, 9.421e-03, -1.186e-01)); + r += mul(s2_2, M4(-8.639e-02, -1.400e-01, -1.564e-01, -2.477e-02, 1.336e-01, 1.401e-01, 2.463e-02, -1.280e-01, 1.352e-01, -7.146e-02, -1.183e-01, 1.704e-01, -3.626e-02, -3.917e-02, -6.329e-02, -7.060e-02)); + r += mul(s2_3, M4(-2.485e-02, 1.315e-01, 2.486e-02, 1.226e-02, 1.236e-01, -5.331e-02, 4.234e-02, 1.228e-01, -1.372e-01, -5.923e-02, -2.059e-01, -1.074e-01, -1.556e-02, 1.669e-02, 3.191e-02, 1.972e-02)); + r += mul(s2_4, M4(1.913e-01, -8.388e-02, 2.797e-02, 1.087e-01, 1.333e-01, -2.377e-01, -1.375e-01, -1.833e-01, -8.198e-02, -1.642e-01, -2.019e-01, -1.915e-01, 5.206e-02, -3.148e-02, -7.719e-02, 1.506e-01)); + r += mul(s2_5, M4(-2.432e-01, 7.371e-02, 8.178e-02, -1.324e-01, -6.953e-02, 7.647e-02, 1.093e-02, -3.195e-02, -1.536e-01, -9.011e-02, 1.355e-01, -2.528e-02, 5.627e-02, -1.369e-02, -1.940e-02, -1.594e-02)); + r += mul(s2_6, M4(-6.367e-02, 4.086e-02, 1.115e-01, 1.332e-01, -6.442e-02, 9.489e-03, 5.163e-02, 4.962e-02, -4.676e-02, 6.107e-02, 9.852e-04, -8.369e-02, -2.810e-02, 7.742e-02, 1.460e-02, -5.245e-02)); + r += mul(s2_7, M4(2.499e-02, 9.525e-02, -1.504e-01, -4.846e-02, -1.431e-01, 6.899e-02, -2.462e-02, 1.607e-01, -4.203e-02, -6.801e-02, 8.834e-02, -1.126e-01, 9.556e-02, 6.370e-02, 5.367e-02, 9.592e-03)); + r += mul(s2_8, M4(-3.189e-02, -8.031e-03, -2.310e-02, -6.561e-02, -6.205e-02, -3.060e-02, 1.363e-01, -2.286e-03, 5.340e-03, 5.857e-02, 1.316e-01, 1.853e-01, -7.457e-02, -7.660e-02, 3.847e-02, -4.925e-02)); + r += mul(s3_0, M4(-1.402e-01, 3.435e-02, -1.059e-01, 6.012e-02, -1.999e-01, -8.057e-02, 2.603e-02, 1.750e-01, 9.291e-02, 1.118e-02, -1.018e-01, 1.539e-01, -9.788e-02, -1.960e-02, 1.499e-02, 2.258e-01)); + r += mul(s3_1, M4(-7.307e-03, 1.649e-01, 2.268e-02, -3.546e-02, 2.505e-02, -7.354e-02, -3.064e-02, 1.181e-01, 6.936e-02, 1.284e-01, 5.474e-02, 1.417e-01, -1.496e-01, 6.069e-02, 1.271e-01, 1.387e-01)); + r += mul(s3_2, M4(3.283e-02, -1.561e-01, -5.818e-02, -1.495e-02, 1.001e-01, -1.087e-01, 8.460e-04, 1.990e-01, -1.162e-01, -2.836e-02, 5.896e-03, 9.634e-02, 9.890e-02, 2.249e-01, -5.960e-02, 6.524e-02)); + r += mul(s3_3, M4(6.081e-02, 1.426e-02, 1.743e-01, -1.444e-01, -3.796e-02, -6.453e-02, -1.042e-01, 1.164e-03, -7.472e-02, 5.873e-02, -8.279e-02, -6.472e-02, -2.282e-01, 3.751e-02, -9.872e-02, 2.167e-01)); + r += mul(s3_4, M4(1.067e-01, 3.005e-02, 9.707e-02, 2.358e-02, 1.526e-01, 7.697e-02, -1.288e-01, -2.510e-01, 6.505e-02, -1.854e-03, -2.127e-02, -6.822e-03, -1.081e-02, -1.770e-01, -2.613e-02, -1.239e-01)); + r += mul(s3_5, M4(-7.202e-02, -8.924e-02, 7.588e-02, -5.286e-03, 6.626e-02, 3.781e-02, 7.665e-02, 2.771e-02, 1.221e-02, 1.257e-01, -7.619e-02, 2.011e-02, 2.040e-01, 1.483e-01, -2.693e-02, 5.554e-02)); + r += mul(s3_6, M4(1.010e-02, 1.521e-02, 2.979e-02, -2.894e-02, -3.475e-02, 5.665e-02, 8.026e-03, 5.070e-02, -9.067e-02, 7.642e-02, 2.394e-02, 3.811e-02, 3.641e-02, 2.143e-02, 2.236e-01, 2.299e-02)); + r += mul(s3_7, M4(3.547e-02, 1.399e-02, 6.926e-04, 4.710e-02, -1.626e-01, 1.325e-02, -2.253e-02, 2.237e-01, -8.841e-02, 8.182e-02, 3.332e-03, 1.174e-01, 2.193e-01, -6.652e-02, -2.395e-01, -1.140e-01)); + r += mul(s3_8, M4(-4.354e-02, -9.374e-03, 3.428e-02, 7.002e-03, 3.499e-02, 5.109e-02, 1.783e-02, -2.013e-02, 7.523e-02, -2.629e-02, 6.597e-02, 5.925e-02, -1.579e-01, -2.344e-02, 2.092e-03, -1.340e-01)); + r += mul(s4_0, M4(-1.014e-02, 4.065e-03, 6.117e-02, 8.470e-02, -1.956e-01, -5.316e-02, -8.530e-02, 4.568e-02, 5.893e-02, -1.609e-02, -1.872e-02, 1.529e-01, 6.092e-02, -3.838e-02, -8.225e-02, -1.279e-03)); + r += mul(s4_1, M4(-2.075e-01, -2.361e-02, 3.200e-02, 1.327e-02, -7.797e-02, 7.097e-03, 5.049e-02, -8.874e-02, 1.309e-02, -6.570e-02, 8.799e-02, -7.405e-02, -4.478e-02, -1.109e-02, 7.107e-02, 7.869e-02)); + r += mul(s4_2, M4(-1.392e-02, 1.512e-01, 4.739e-02, 2.017e-02, 2.974e-02, -1.291e-01, 9.301e-02, 6.238e-02, 2.105e-02, 1.060e-02, 3.152e-02, 2.685e-02, -5.795e-02, -2.800e-02, -4.912e-03, -8.161e-02)); + r += mul(s4_3, M4(-5.809e-02, -5.480e-02, 5.728e-02, -4.076e-02, -1.091e-01, -1.717e-01, 7.309e-03, 6.975e-02, -2.645e-02, 9.944e-02, -5.127e-02, -9.457e-02, -1.426e-02, -8.327e-02, -7.884e-02, -1.611e-02)); + r += mul(s4_4, M4(9.950e-02, -7.039e-02, 4.891e-02, -3.545e-02, -5.392e-02, 8.991e-02, -5.298e-02, -3.121e-03, 2.157e-02, -8.715e-02, 5.429e-03, -5.276e-02, 1.285e-02, 1.285e-02, -1.142e-01, -5.807e-02)); + r += mul(s4_5, M4(4.575e-02, 7.011e-02, -3.673e-02, -1.608e-01, -3.663e-03, -5.043e-03, -6.263e-03, -2.703e-02, 7.943e-02, -5.537e-03, 5.793e-02, -4.219e-02, -6.355e-02, 6.953e-02, 8.290e-02, 7.658e-02)); + r += mul(s4_6, M4(7.795e-02, -7.372e-02, 6.093e-03, 1.056e-02, -2.125e-02, 8.681e-02, 3.893e-02, -7.483e-02, 5.271e-02, 2.211e-02, 9.069e-02, 1.073e-01, -8.662e-03, -5.212e-03, 2.982e-02, -5.739e-03)); + r += mul(s4_7, M4(-1.717e-02, 2.283e-02, -3.941e-02, 4.109e-03, 8.527e-02, 1.001e-01, 1.846e-02, -1.255e-01, 2.752e-02, 3.294e-02, 2.106e-02, -5.608e-02, 8.715e-03, -1.594e-02, 5.878e-02, -1.700e-02)); + r += mul(s4_8, M4(-1.026e-02, 1.383e-02, 7.065e-02, -1.091e-01, 3.015e-02, 4.376e-02, -4.114e-03, -1.954e-01, 2.104e-02, 1.412e-02, 4.828e-02, -6.711e-02, -3.568e-02, -8.016e-02, 3.111e-02, -4.615e-02)); + r += mul(s5_0, M4(4.547e-02, 7.190e-02, -3.506e-02, -4.392e-02, -6.403e-02, -1.067e-01, 4.684e-02, -1.001e-01, 1.809e-01, -1.215e-01, -6.191e-01, 1.910e-01, 1.988e-02, -7.828e-02, 3.948e-02, -9.714e-02)); + r += mul(s5_1, M4(-9.265e-02, -2.044e-01, 2.601e-03, 1.321e-01, 4.113e-02, 5.837e-02, -4.895e-02, -3.625e-01, -3.132e-02, 3.531e-01, -2.826e-01, -3.891e-01, -5.710e-02, 2.568e-01, 9.742e-02, -5.672e-03)); + r += mul(s5_2, M4(-1.338e-02, -1.515e-01, -1.535e-02, 2.606e-01, -1.096e-01, -4.259e-02, 8.187e-02, 1.577e-01, 2.463e-02, -5.034e-02, -1.115e-02, -4.038e-02, -1.086e-01, -1.483e-02, 1.037e-01, -1.948e-01)); + r += mul(s5_3, M4(-1.922e-02, -6.215e-02, 1.517e-02, 1.218e-01, -3.071e-02, 1.773e-02, -3.607e-02, 1.047e-01, -2.256e-02, -9.207e-02, -3.169e-02, -1.181e-01, 2.580e-01, -3.072e-01, -1.991e-01, 3.262e-01)); + r += mul(s5_4, M4(1.372e-01, -1.362e-01, 1.134e-02, 2.881e-01, 8.171e-02, 1.333e-01, -1.464e-01, 2.722e-02, 1.081e-01, 3.382e-02, -3.378e-01, -3.254e-01, -2.377e-02, -4.981e-02, -2.661e-01, -2.726e-01)); + r += mul(s5_5, M4(8.029e-02, -2.610e-03, -5.205e-02, -1.134e-01, -2.463e-03, -2.045e-02, -4.503e-02, -4.423e-02, -1.089e-02, 1.997e-02, -1.041e-01, -1.421e-01, 5.208e-03, -1.065e-01, -1.208e-01, 1.845e-02)); + r += mul(s5_6, M4(6.110e-02, 5.042e-02, -7.398e-02, -1.530e-01, -6.443e-02, 9.451e-02, 1.408e-02, 8.396e-02, -5.996e-03, -6.634e-02, -2.270e-01, 8.724e-02, 1.584e-01, 1.196e-02, -1.147e-01, -2.780e-02)); + r += mul(s5_7, M4(-3.316e-02, 9.211e-02, -4.164e-02, -5.648e-02, 9.994e-02, -7.862e-02, -8.656e-02, -2.388e-01, 3.321e-02, 7.988e-02, -7.226e-02, -1.103e-01, -1.496e-01, 3.867e-01, 2.678e-01, -1.186e-01)); + r += mul(s5_8, M4(-7.234e-02, -7.924e-02, 1.794e-01, 2.319e-02, 7.658e-02, 4.797e-02, 1.451e-01, 2.210e-02, 5.900e-02, 1.386e-01, -1.373e-01, 7.030e-02, 5.726e-02, 1.062e-01, 5.655e-02, 7.951e-02)); + r += mul(s6_0, M4(-1.131e-01, -3.577e-02, 1.421e-01, -3.580e-02, 6.043e-02, 6.665e-02, -1.767e-02, -1.889e-02, -9.003e-02, 3.426e-02, -2.185e-02, 6.045e-02, -4.273e-03, 1.100e-01, 2.061e-01, 7.638e-02)); + r += mul(s6_1, M4(-1.003e-01, -4.393e-02, -1.226e-01, -1.006e-01, -1.930e-02, 4.019e-02, 4.542e-02, 1.739e-01, 2.843e-02, -9.876e-03, 3.212e-02, 7.995e-02, 6.001e-02, -2.006e-01, 8.412e-02, -1.438e-01)); + r += mul(s6_2, M4(1.870e-01, 6.808e-03, 3.708e-02, 1.286e-01, 5.755e-02, -3.608e-02, -4.639e-02, 3.624e-02, 2.504e-03, 1.275e-01, -1.453e-02, 4.201e-02, -1.102e-01, 2.665e-02, 2.633e-01, -2.948e-01)); + r += mul(s6_3, M4(-8.279e-02, 1.014e-01, 2.264e-01, 2.429e-01, 1.484e-02, 4.490e-02, 4.040e-02, -8.325e-02, 4.054e-02, -3.042e-03, 6.527e-02, -4.170e-02, -1.438e-01, 6.808e-02, -9.250e-02, -2.674e-01)); + r += mul(s6_4, M4(2.200e-01, -3.817e-02, 8.343e-02, 7.486e-02, -3.402e-01, -1.026e-01, -1.268e-02, -8.661e-03, 7.670e-02, -1.422e-01, 3.638e-02, 2.780e-03, 2.017e-01, 2.283e-01, -1.223e-01, -1.577e-01)); + r += mul(s6_5, M4(-3.965e-02, 1.235e-01, 2.024e-01, 2.421e-01, 7.390e-02, -9.736e-02, 5.491e-02, -1.747e-02, -8.821e-02, 3.791e-02, -3.107e-02, -1.047e-01, 2.079e-01, -4.630e-01, 7.189e-02, 1.396e-01)); + r += mul(s6_6, M4(5.692e-02, 8.449e-02, 8.547e-02, -2.314e-02, -9.132e-02, 4.575e-02, -2.234e-02, -9.140e-02, 1.246e-02, 5.259e-02, -5.461e-02, -2.873e-02, 5.578e-03, 6.954e-02, -5.065e-02, -5.574e-02)); + r += mul(s6_7, M4(1.269e-01, 4.871e-02, 9.764e-02, 5.221e-02, 3.510e-02, 3.839e-02, 4.264e-02, 1.651e-01, -2.245e-02, 3.480e-02, -2.074e-02, -1.590e-01, -1.197e-01, -1.028e-01, 3.397e-01, -1.966e-01)); + r += mul(s6_8, M4(-1.619e-02, -2.535e-02, 5.190e-02, 1.855e-02, -5.307e-02, -1.313e-01, 8.308e-02, -5.597e-02, 1.891e-02, 7.614e-02, -5.868e-02, -4.543e-02, -1.567e-01, 1.922e-01, 1.221e-01, -4.377e-02)); + r += mul(s7_0, M4(1.028e-01, 7.548e-03, -1.111e-01, -4.530e-02, 7.687e-02, -2.370e-03, 9.737e-02, 6.218e-02, -8.472e-02, -4.753e-03, -1.759e-02, 7.301e-02, -6.980e-02, -5.817e-02, 2.753e-02, 3.833e-03)); + r += mul(s7_1, M4(9.802e-02, -1.691e-03, -1.026e-01, -1.141e-01, 4.108e-02, 1.704e-02, 5.610e-02, -7.644e-02, 1.787e-02, -2.503e-01, 3.689e-02, 5.876e-02, -5.987e-02, -7.223e-02, 1.370e-02, 5.564e-03)); + r += mul(s7_2, M4(1.022e-01, -1.052e-02, 1.894e-02, -5.264e-03, 4.457e-02, 1.206e-01, 4.648e-02, 8.187e-02, 5.019e-02, -7.684e-02, -4.761e-02, 3.577e-02, 6.005e-03, 3.018e-02, -6.777e-03, -2.671e-02)); + r += mul(s7_3, M4(7.466e-02, -5.545e-02, 5.147e-02, -9.105e-02, 1.714e-01, 3.147e-02, -7.164e-02, 1.448e-01, -1.964e-01, 4.534e-02, -1.009e-01, -1.424e-01, -1.958e-01, -1.056e-01, -7.003e-02, -1.141e-02)); + r += mul(s7_4, M4(6.642e-02, -4.793e-02, 9.713e-02, -1.437e-01, -2.930e-03, 1.499e-02, -1.431e-01, -8.737e-02, 4.347e-03, -2.981e-01, 1.137e-01, 1.970e-01, -2.402e-02, 5.391e-03, -4.417e-02, 1.113e-01)); + r += mul(s7_5, M4(9.509e-02, -7.006e-02, 4.524e-02, 9.689e-02, 1.099e-01, 6.772e-02, -4.856e-02, 2.939e-01, -1.818e-02, -5.436e-02, -1.028e-01, 1.111e-01, 7.046e-02, -7.240e-02, 1.357e-03, -4.907e-02)); + r += mul(s7_6, M4(-1.812e-02, -1.753e-02, -5.269e-02, -8.065e-02, 3.366e-02, -1.626e-02, -5.052e-02, -1.878e-02, -3.922e-02, 1.056e-01, -6.507e-02, -5.286e-02, 8.207e-02, 5.148e-02, 7.783e-02, 1.681e-02)); + r += mul(s7_7, M4(3.352e-02, -6.558e-02, -3.283e-02, 1.152e-02, -1.507e-02, 5.144e-02, 3.858e-02, -1.565e-01, 1.527e-01, -9.508e-02, 1.196e-02, -1.377e-01, 1.485e-02, 2.016e-01, -3.342e-02, -4.941e-02)); + r += mul(s7_8, M4(2.321e-02, 6.616e-02, -4.715e-02, -1.106e-01, 7.326e-02, 1.498e-01, 7.544e-02, 1.409e-01, 3.549e-02, 8.089e-02, -5.649e-02, -1.224e-01, 1.137e-01, -1.970e-02, -8.010e-02, 1.944e-01)); + r += V4(-6.752e-03, -7.566e-03, 6.314e-03, 1.825e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.717e-02, 9.769e-03, 9.986e-02, -9.887e-02, 3.267e-02, 1.003e-02, 1.651e-03, -1.743e-01, 8.331e-02, 3.077e-01, -2.175e-02, 1.084e-02, -1.637e-01, 1.951e-01, 6.243e-02, -1.436e-01)); + r += mul(s0_1, M4(6.098e-02, -4.669e-02, -5.103e-02, 5.120e-02, -6.543e-02, 9.551e-02, -1.651e-01, 8.219e-03, -8.131e-02, 1.417e-01, 5.506e-02, -6.124e-03, -2.257e-02, 2.200e-01, 3.792e-02, -1.043e-01)); + r += mul(s0_2, M4(-1.397e-02, 7.591e-02, -1.604e-01, -1.153e-01, -1.530e-01, 1.447e-01, 1.756e-02, 1.792e-02, 1.607e-01, -1.341e-01, 1.823e-01, 5.549e-02, 1.516e-01, 1.876e-01, 2.927e-02, -1.047e-01)); + r += mul(s0_3, M4(-3.772e-02, 1.171e-01, 4.045e-03, 2.626e-02, -4.379e-02, 1.676e-02, 1.781e-01, -2.848e-02, 1.034e-01, 9.025e-02, 1.324e-01, -9.010e-02, -1.003e-01, 1.520e-01, -2.431e-01, -8.147e-02)); + r += mul(s0_4, M4(2.643e-02, -7.991e-02, 3.294e-02, -1.099e-01, 9.339e-02, 3.811e-02, -6.230e-02, 1.012e-01, 6.919e-02, 1.717e-01, 3.213e-02, -1.369e-01, -1.733e-01, -2.811e-01, -1.270e-01, -1.960e-01)); + r += mul(s0_5, M4(-1.091e-01, -6.180e-02, -2.301e-02, 8.213e-04, 9.512e-02, -3.803e-02, -1.084e-02, 1.420e-01, -1.947e-02, 2.276e-01, -6.797e-02, -9.134e-02, -2.017e-02, -1.052e-01, -4.217e-02, 2.666e-01)); + r += mul(s0_6, M4(-5.222e-02, 4.017e-02, 7.455e-02, 2.275e-02, -2.500e-02, 8.089e-02, -4.076e-02, -4.002e-02, -8.283e-02, 2.970e-01, 1.390e-01, -7.048e-02, 2.858e-02, -1.936e-01, 1.580e-01, -8.027e-03)); + r += mul(s0_7, M4(5.542e-02, 2.002e-02, -2.675e-02, 1.503e-02, -1.431e-01, -2.823e-02, 8.154e-03, -1.270e-01, 1.633e-03, -1.870e-01, -2.306e-01, -7.430e-02, 5.849e-02, 1.180e-02, 2.341e-02, 2.886e-01)); + r += mul(s0_8, M4(-8.929e-02, 1.351e-01, 7.252e-02, 2.595e-02, -7.732e-02, -8.429e-02, -1.554e-01, -1.018e-01, 9.823e-03, 2.202e-01, 1.353e-02, 7.901e-04, 3.251e-02, -1.569e-01, 1.351e-01, 2.692e-01)); + r += mul(s1_0, M4(-8.562e-02, 8.848e-02, 2.261e-01, -1.160e-01, 2.243e-02, -6.960e-02, -2.434e-02, 1.238e-01, -5.562e-02, 2.180e-02, -8.743e-02, 2.182e-02, 8.208e-02, 9.049e-02, -6.469e-02, 7.688e-03)); + r += mul(s1_1, M4(1.996e-01, 3.454e-02, 2.105e-01, 2.091e-01, -6.999e-02, -9.015e-02, 4.232e-02, 5.711e-02, -1.312e-01, -3.303e-02, -1.230e-01, 2.519e-02, 1.419e-02, -6.896e-02, 2.025e-02, 6.401e-02)); + r += mul(s1_2, M4(-9.447e-02, 8.105e-02, -1.036e-01, -1.588e-01, 1.464e-01, -9.827e-02, 8.542e-02, -2.070e-02, -6.366e-03, 1.370e-01, -7.298e-02, -1.725e-02, 1.101e-01, 2.035e-02, -9.348e-02, -7.200e-02)); + r += mul(s1_3, M4(1.109e-02, 1.264e-01, -1.209e-01, -1.554e-01, -1.159e-01, -1.892e-01, 5.685e-02, 8.991e-02, 1.312e-01, 5.569e-02, 1.933e-01, 7.793e-02, 1.109e-01, 5.463e-02, -1.602e-03, -1.379e-01)); + r += mul(s1_4, M4(5.509e-02, -1.017e-01, 8.452e-02, -1.655e-01, -9.169e-03, -1.362e-01, -1.786e-01, -5.513e-02, -6.821e-04, -8.778e-02, 5.897e-02, -5.619e-02, -5.489e-02, -4.180e-02, 4.755e-03, -3.080e-03)); + r += mul(s1_5, M4(-1.489e-01, -5.831e-02, -1.427e-01, -1.644e-01, 1.730e-02, 1.156e-03, 2.227e-02, 1.556e-01, -4.279e-02, -3.060e-02, -1.932e-02, -4.620e-02, -1.522e-02, -2.339e-02, 9.479e-02, 2.281e-02)); + r += mul(s1_6, M4(-8.987e-02, 1.801e-01, 1.497e-01, -2.205e-02, 1.658e-01, 2.336e-01, -1.755e-01, 1.293e-01, -2.428e-02, 1.084e-01, 3.095e-02, 1.159e-01, -1.136e-01, 1.618e-01, -1.261e-01, -7.645e-02)); + r += mul(s1_7, M4(-2.607e-02, -1.290e-01, -1.531e-02, -9.560e-02, -9.710e-02, -7.514e-02, -8.585e-02, -1.072e-01, 1.509e-01, -2.000e-01, -8.177e-02, 6.025e-02, -2.661e-03, -9.347e-03, -5.421e-02, 1.047e-01)); + r += mul(s1_8, M4(-1.385e-01, 1.662e-01, 5.869e-02, -2.730e-02, -6.720e-02, -2.388e-01, -6.281e-02, -3.390e-01, 7.931e-03, 9.052e-02, -1.473e-02, -4.222e-02, -3.454e-02, 2.653e-02, -6.030e-02, 4.434e-02)); + r += mul(s2_0, M4(-1.198e-01, 5.241e-02, 6.661e-02, 1.784e-01, -5.096e-02, -1.366e-01, 2.038e-02, 2.957e-02, -1.052e-02, -2.031e-01, -4.690e-02, -2.149e-01, 2.957e-02, -2.198e-02, -5.177e-02, -3.467e-02)); + r += mul(s2_1, M4(4.485e-02, 6.809e-02, 1.478e-01, -2.134e-01, 1.127e-01, 3.646e-02, -8.540e-02, -7.599e-02, -1.459e-01, -1.487e-01, -1.248e-01, 2.611e-01, -6.606e-02, 2.024e-01, 1.783e-02, -1.899e-02)); + r += mul(s2_2, M4(1.640e-02, -3.178e-02, -1.399e-01, -3.473e-02, 1.110e-02, 1.010e-01, -1.280e-01, -2.861e-02, -4.956e-02, -1.929e-01, -5.167e-02, 3.184e-02, -1.111e-01, 2.306e-02, 1.435e-01, -9.536e-02)); + r += mul(s2_3, M4(-5.429e-02, 1.061e-02, -4.774e-01, 2.036e-02, -1.635e-02, 6.595e-02, -3.063e-02, 7.685e-02, -1.179e-01, -1.091e-01, 3.535e-02, 1.078e-01, -6.447e-02, -1.197e-01, 1.170e-01, 2.519e-02)); + r += mul(s2_4, M4(-2.047e-02, 2.278e-02, -7.021e-02, 1.275e-01, -1.218e-01, -1.286e-01, -1.497e-01, -7.172e-02, 4.386e-02, -6.259e-02, -7.381e-02, -2.430e-01, 7.806e-02, -7.124e-02, 1.390e-02, 2.018e-01)); + r += mul(s2_5, M4(-1.143e-01, 1.832e-02, -1.345e-01, 1.271e-01, -3.451e-02, -3.570e-03, 7.250e-02, 9.202e-02, 9.418e-02, -1.007e-01, 1.772e-01, 1.791e-01, -2.399e-02, 7.251e-02, -6.399e-02, -4.066e-02)); + r += mul(s2_6, M4(1.195e-02, 3.455e-02, -9.223e-02, 7.473e-02, -4.358e-02, -5.941e-02, 4.774e-02, 2.804e-02, 1.101e-01, -1.979e-01, -2.947e-02, -7.232e-02, -1.633e-02, -9.952e-03, 9.117e-02, -1.100e-01)); + r += mul(s2_7, M4(1.648e-01, -1.768e-01, -3.981e-02, -5.506e-02, -3.355e-03, -1.699e-02, 6.906e-02, -3.916e-02, 4.472e-02, -1.261e-01, 9.100e-02, -1.264e-01, -2.708e-02, -1.563e-01, -3.189e-02, 1.403e-01)); + r += mul(s2_8, M4(-1.306e-01, 1.916e-02, -2.621e-01, -2.006e-03, 3.518e-02, 1.125e-01, -1.299e-02, -9.027e-02, 2.894e-02, -2.493e-01, -2.424e-02, -8.538e-03, -3.670e-02, -4.606e-02, -1.190e-01, 1.236e-02)); + r += mul(s3_0, M4(2.467e-02, 1.228e-01, 1.307e-03, 2.644e-02, -1.061e-01, -2.063e-01, -1.047e-01, -4.946e-02, -6.557e-02, 3.756e-02, 1.079e-02, 4.868e-02, 7.785e-02, -2.759e-03, -1.427e-01, 1.646e-01)); + r += mul(s3_1, M4(4.235e-02, 1.720e-01, 1.786e-02, -9.564e-02, -8.153e-02, -1.325e-02, -2.990e-01, -2.348e-02, -5.725e-02, 7.600e-02, 8.733e-02, 9.938e-02, -7.687e-02, 2.715e-01, -2.000e-02, -1.306e-01)); + r += mul(s3_2, M4(-2.426e-02, -6.496e-02, 1.602e-02, -5.793e-02, -1.478e-01, -1.127e-01, 4.043e-02, -7.110e-02, 5.491e-02, -6.065e-02, 2.713e-02, -3.371e-03, -4.914e-02, 2.346e-02, -4.165e-02, -2.006e-01)); + r += mul(s3_3, M4(-1.237e-01, 1.560e-02, -1.648e-02, 2.805e-02, 1.293e-01, -2.959e-02, 1.850e-01, 1.106e-01, -9.726e-02, 6.985e-02, 4.132e-02, 2.229e-01, -9.818e-02, -2.129e-02, -5.933e-02, 3.006e-01)); + r += mul(s3_4, M4(-1.240e-01, -1.789e-02, 1.252e-02, 1.278e-01, 4.614e-02, 5.781e-02, 1.586e-01, -5.546e-02, 1.474e-01, -1.586e-01, -3.989e-02, -3.110e-02, -2.245e-01, 1.206e-01, 3.395e-01, 3.636e-02)); + r += mul(s3_5, M4(1.174e-01, 1.871e-01, 5.915e-02, 9.390e-03, 2.787e-02, 1.518e-01, 2.257e-01, 9.543e-02, -1.490e-01, -9.546e-02, -8.699e-02, 6.155e-03, 1.305e-01, 1.351e-02, -1.750e-01, 7.539e-02)); + r += mul(s3_6, M4(-1.004e-01, 1.196e-01, 3.374e-02, -2.762e-02, 3.960e-02, -7.232e-02, 8.549e-02, -1.199e-01, 4.529e-02, -1.194e-01, -1.098e-01, 3.108e-02, -6.552e-02, -4.890e-02, -7.023e-02, -1.666e-01)); + r += mul(s3_7, M4(2.712e-02, -6.507e-02, 4.040e-02, -1.978e-02, -2.940e-02, -1.641e-01, 1.689e-01, 1.310e-02, 1.428e-01, 7.349e-02, 5.750e-02, -4.696e-02, -1.200e-01, -1.444e-01, 1.115e-01, -1.165e-01)); + r += mul(s3_8, M4(-9.200e-02, -5.976e-03, -7.335e-02, -2.183e-02, 1.265e-01, 6.324e-03, 1.261e-02, -4.413e-02, 8.816e-02, -1.475e-01, 2.062e-02, 5.278e-02, 1.811e-01, -7.741e-02, -2.011e-01, -4.065e-02)); + r += mul(s4_0, M4(-1.153e-02, -9.074e-03, -5.989e-02, -2.584e-03, -5.218e-02, 7.476e-02, -8.211e-03, 4.237e-02, -6.923e-02, -1.856e-02, 1.326e-02, -5.366e-02, -2.938e-03, -6.377e-02, -1.163e-01, -1.680e-01)); + r += mul(s4_1, M4(-9.843e-02, 6.010e-02, -2.105e-03, 6.071e-03, 5.479e-02, -3.386e-03, -1.102e-02, -4.528e-02, -1.727e-02, 4.790e-02, 2.434e-02, -7.955e-02, -3.570e-02, -4.003e-02, 1.576e-02, 9.249e-03)); + r += mul(s4_2, M4(-8.375e-02, 5.498e-02, 1.497e-01, -4.453e-02, -5.135e-02, 1.353e-01, -6.536e-02, -1.258e-01, -9.036e-02, -5.740e-02, 1.372e-01, 8.847e-02, 5.655e-02, -7.369e-02, -4.204e-02, 7.450e-02)); + r += mul(s4_3, M4(2.723e-03, -4.919e-02, -1.869e-02, 6.287e-03, 6.744e-02, 3.939e-02, 4.791e-02, -2.436e-02, 9.017e-05, 5.233e-02, 1.542e-02, -3.984e-02, -6.053e-02, 1.544e-02, 1.268e-01, -1.184e-01)); + r += mul(s4_4, M4(-2.181e-01, -7.282e-02, -1.797e-02, -4.704e-03, 2.914e-02, -4.503e-02, -1.223e-01, -3.956e-02, -1.158e-01, -1.896e-02, -2.473e-02, 4.600e-02, 2.196e-02, 1.015e-01, -5.597e-02, 4.143e-02)); + r += mul(s4_5, M4(-5.904e-02, 2.190e-02, 7.496e-02, 1.693e-01, -4.697e-02, -2.332e-02, 1.691e-01, 2.206e-02, 4.138e-02, -5.012e-02, 1.875e-02, 1.281e-01, 3.007e-02, 3.105e-02, 3.457e-02, -6.314e-02)); + r += mul(s4_6, M4(-4.867e-02, 6.212e-03, 3.773e-02, 2.992e-02, 8.184e-02, -1.817e-02, 1.572e-01, -2.633e-02, 5.866e-02, 3.204e-02, 7.714e-03, -2.036e-01, -4.740e-02, -1.816e-02, 1.361e-01, -1.599e-02)); + r += mul(s4_7, M4(1.790e-01, 4.099e-02, 4.671e-02, -1.282e-01, 1.517e-01, 9.965e-02, 9.696e-03, -3.455e-02, 1.063e-01, 9.338e-02, 1.373e-01, -2.089e-02, 1.111e-02, 5.577e-02, 2.652e-02, 5.652e-02)); + r += mul(s4_8, M4(1.772e-02, 6.870e-02, -4.726e-02, 6.538e-02, 4.101e-02, 4.077e-02, 8.750e-02, 4.385e-02, 1.588e-02, 1.211e-02, -3.037e-02, 3.300e-02, -1.147e-01, -9.400e-02, -1.073e-01, -4.589e-02)); + r += mul(s5_0, M4(-7.043e-02, 1.106e-01, 5.424e-02, -2.076e-01, -2.114e-02, -6.736e-03, -1.078e-01, 2.016e-01, 1.544e-01, 1.365e-01, 2.181e-01, -1.275e-02, 7.327e-03, 3.924e-02, -1.393e-01, -2.110e-01)); + r += mul(s5_1, M4(-1.034e-01, 1.045e-02, 1.199e-01, -9.829e-02, 9.394e-02, -1.407e-01, -1.636e-01, -2.472e-03, 1.985e-01, 5.539e-02, 2.328e-01, -2.959e-01, -7.821e-02, 2.372e-01, 3.268e-01, -1.386e-01)); + r += mul(s5_2, M4(7.605e-02, 1.772e-01, 8.793e-02, 4.894e-02, -2.980e-02, -1.566e-02, -1.525e-01, -1.220e-02, 3.103e-02, 1.313e-01, 2.984e-01, 2.059e-01, -4.579e-02, 1.324e-01, -4.685e-02, 1.377e-01)); + r += mul(s5_3, M4(8.444e-02, -7.072e-02, -8.838e-02, -1.174e-01, 1.293e-01, -5.982e-02, 4.509e-02, 5.332e-03, -5.717e-03, -7.399e-02, -3.717e-02, -6.773e-02, 9.809e-03, -1.349e-01, -4.095e-02, -4.058e-02)); + r += mul(s5_4, M4(-1.147e-01, -1.989e-02, 2.123e-01, -1.439e-01, -5.222e-03, -2.707e-01, -1.859e-01, -8.678e-02, 3.698e-01, -1.723e-01, -4.526e-01, -2.236e-01, -5.285e-02, 2.418e-01, 1.577e-01, -1.008e-01)); + r += mul(s5_5, M4(-1.191e-02, -3.442e-02, 1.354e-01, -5.484e-02, -9.487e-02, -2.576e-01, 6.984e-02, 1.090e-01, 1.431e-01, 2.262e-01, 2.292e-01, 2.876e-01, -1.843e-01, -2.636e-01, 9.564e-03, -3.201e-01)); + r += mul(s5_6, M4(-3.639e-02, 3.560e-02, 5.691e-02, 1.310e-01, 7.157e-02, -2.525e-02, 6.015e-02, 2.008e-01, 2.294e-01, -6.681e-02, 1.624e-01, -2.359e-01, 2.105e-01, 1.970e-01, 8.391e-03, 2.562e-01)); + r += mul(s5_7, M4(1.713e-01, 1.372e-01, 2.792e-02, -3.161e-02, 1.123e-01, -3.285e-02, -8.937e-02, 2.190e-02, 1.601e-01, -4.079e-02, -1.160e-01, 3.078e-02, 3.789e-02, -1.248e-02, -4.555e-01, -5.014e-02)); + r += mul(s5_8, M4(3.548e-02, -1.643e-02, -2.761e-02, 1.177e-01, 7.889e-02, 4.827e-03, 1.050e-02, 1.173e-01, 7.210e-02, -2.071e-01, -1.656e-01, -5.126e-02, -1.370e-01, -2.145e-02, -4.849e-01, 2.399e-01)); + r += mul(s6_0, M4(-5.271e-02, -4.056e-02, -1.902e-02, 9.367e-02, 1.910e-01, 8.001e-02, -1.315e-01, -5.283e-02, 5.967e-02, -1.328e-02, -1.207e-01, 5.678e-02, 1.117e-01, 8.106e-02, -1.247e-01, -5.059e-02)); + r += mul(s6_1, M4(-1.485e-01, -5.026e-03, -2.219e-02, -3.135e-02, 1.374e-01, 1.129e-01, -1.310e-01, -8.498e-02, -1.109e-02, -2.860e-02, -5.555e-02, 1.483e-02, -1.202e-01, 7.081e-02, -1.195e-02, 2.183e-01)); + r += mul(s6_2, M4(-3.599e-02, 1.129e-01, -2.252e-02, -3.895e-02, -1.965e-02, -8.345e-02, -2.432e-02, -3.539e-02, -4.661e-02, -4.581e-02, 6.963e-02, -6.548e-03, -1.589e-01, 4.702e-02, -9.506e-02, 8.096e-02)); + r += mul(s6_3, M4(9.022e-03, 7.867e-02, 4.089e-02, -1.734e-01, -2.731e-04, -1.261e-02, -1.128e-01, -4.965e-02, -6.452e-03, 2.955e-02, 1.457e-01, 5.475e-02, -1.956e-01, -2.854e-04, 1.397e-01, 4.013e-01)); + r += mul(s6_4, M4(-7.334e-02, -3.460e-02, 1.664e-01, -5.664e-03, 3.891e-01, 9.504e-02, 7.005e-02, -5.228e-02, -9.434e-02, -1.497e-01, 1.776e-01, -2.187e-02, -2.030e-01, 4.712e-01, 4.174e-01, 1.477e-01)); + r += mul(s6_5, M4(4.229e-02, 6.727e-02, -1.072e-01, 1.023e-02, 2.305e-01, -2.663e-02, -5.938e-02, 2.253e-02, 1.888e-01, -1.715e-01, -3.109e-02, -2.876e-02, 9.730e-02, 1.614e-01, 4.309e-02, -1.132e-02)); + r += mul(s6_6, M4(-4.604e-02, 1.801e-01, 1.316e-01, 2.310e-01, -7.241e-02, 1.791e-02, 2.261e-01, -1.032e-03, -8.607e-02, 1.205e-02, 1.081e-01, -1.049e-01, -3.676e-01, 1.789e-01, -2.022e-01, 4.011e-01)); + r += mul(s6_7, M4(-3.995e-02, 1.906e-01, 1.376e-01, 9.652e-02, -4.697e-02, 1.448e-02, -1.473e-02, -2.965e-02, -4.528e-02, -9.930e-02, 2.797e-03, 3.674e-02, -4.603e-01, -3.507e-01, 2.416e-01, -3.515e-01)); + r += mul(s6_8, M4(-7.833e-02, 9.354e-02, -3.600e-02, 5.854e-02, 1.679e-01, -5.118e-02, 3.355e-02, 3.831e-02, -1.616e-03, -8.032e-02, 1.246e-01, -1.168e-01, -2.178e-01, -3.155e-01, 8.497e-02, 9.947e-02)); + r += mul(s7_0, M4(1.317e-02, 6.127e-02, -1.356e-01, -1.334e-01, -1.430e-01, -9.201e-02, -6.164e-02, 9.808e-02, 1.071e-01, -1.024e-01, -2.085e-01, 3.279e-02, -1.141e-01, -9.217e-02, 3.365e-02, -2.446e-02)); + r += mul(s7_1, M4(1.214e-01, -1.052e-01, 3.639e-02, -7.164e-02, -1.102e-01, 8.978e-02, -5.414e-02, 2.847e-02, -6.224e-02, 2.982e-02, -4.916e-02, -6.726e-02, 1.001e-01, 2.355e-03, -3.565e-02, 5.170e-02)); + r += mul(s7_2, M4(-7.410e-02, -4.689e-02, -1.308e-02, -7.921e-03, 6.083e-02, -4.359e-02, 1.145e-01, 4.113e-02, -4.052e-02, 3.164e-02, 1.010e-01, 2.453e-02, 5.970e-02, 3.461e-02, 4.648e-02, -2.663e-04)); + r += mul(s7_3, M4(-1.003e-01, 2.135e-02, -1.455e-01, 5.218e-02, -2.309e-01, 6.783e-02, 1.318e-02, -1.827e-01, 1.621e-01, 5.029e-02, -7.106e-02, 1.621e-01, 1.541e-02, 1.652e-02, 2.895e-02, 1.683e-01)); + r += mul(s7_4, M4(-1.522e-01, -3.963e-04, 3.581e-03, -2.834e-02, -3.330e-01, 1.078e-01, 1.278e-01, -2.416e-02, 2.028e-01, 2.793e-01, 3.217e-01, 1.514e-01, -1.400e-01, -1.050e-02, -6.733e-02, 5.471e-02)); + r += mul(s7_5, M4(1.353e-01, 4.765e-02, -1.027e-01, 1.058e-01, -6.736e-02, 5.301e-02, -5.598e-02, 6.322e-02, 1.928e-03, 1.496e-01, 3.799e-02, -3.298e-02, 5.269e-02, 1.571e-01, -9.930e-02, -1.330e-01)); + r += mul(s7_6, M4(-8.066e-03, -5.750e-02, 2.862e-02, -3.012e-02, -1.507e-01, 1.030e-01, 6.889e-02, -3.722e-02, 4.417e-02, 2.836e-02, -1.419e-01, 1.423e-01, 4.511e-02, -3.572e-02, -3.069e-02, -8.108e-03)); + r += mul(s7_7, M4(4.494e-02, 5.384e-02, -3.370e-02, -2.500e-02, -2.476e-01, -4.991e-03, -2.138e-03, -2.498e-02, -1.399e-01, 1.500e-02, -6.577e-02, 7.640e-02, 1.067e-01, -7.990e-02, 2.787e-02, 2.408e-03)); + r += mul(s7_8, M4(-3.526e-02, -2.719e-02, 5.664e-02, -7.852e-02, -4.183e-02, 1.072e-01, -8.091e-02, 5.822e-02, 1.535e-01, -3.325e-02, 1.665e-01, 4.851e-03, 3.232e-03, 1.497e-01, 2.886e-02, -5.168e-02)); + r += V4(-1.078e-02, 4.187e-02, -7.080e-03, 1.714e-02); + return r; +} + +void Pass8(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 9 +//!DESC conv8 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.179e-01, -1.154e-01, 2.741e-02, 6.196e-02, -1.986e-01, -4.433e-02, 1.182e-01, -2.432e-02, 3.710e-02, 4.281e-02, -1.404e-02, -5.320e-02, -4.772e-02, 6.207e-02, -1.707e-01, -4.863e-02)); + r += mul(s0_1, M4(3.627e-02, -2.468e-01, 8.430e-03, 5.969e-02, -1.338e-02, -4.213e-02, -1.246e-02, 1.387e-01, 4.174e-02, 1.655e-01, 6.350e-02, -8.760e-02, 1.408e-01, -1.162e-01, 7.822e-02, -1.009e-02)); + r += mul(s0_2, M4(6.093e-03, 8.566e-05, -3.650e-02, -1.255e-01, 2.387e-03, 1.066e-01, 2.412e-02, 1.019e-02, -1.140e-01, -1.713e-01, -5.116e-02, -7.987e-03, -9.589e-02, -2.405e-02, 1.606e-01, 9.482e-02)); + r += mul(s0_3, M4(7.013e-02, -1.590e-02, 8.185e-02, 1.062e-01, -1.050e-02, -6.553e-03, -4.859e-03, 1.272e-01, 1.492e-01, 5.653e-02, -1.592e-01, 8.731e-02, -2.231e-02, 8.736e-02, -3.930e-01, 1.555e-04)); + r += mul(s0_4, M4(1.191e-02, 1.250e-01, 1.416e-01, 4.392e-02, 1.373e-01, 4.931e-03, 2.598e-03, -6.627e-02, -7.282e-02, -3.920e-02, 4.955e-02, 4.273e-02, 6.893e-02, -1.407e-01, 1.878e-02, 5.470e-02)); + r += mul(s0_5, M4(1.175e-01, -5.407e-02, -6.647e-02, -5.952e-02, 8.890e-02, -6.018e-02, 1.071e-01, 5.453e-02, -1.330e-01, -1.647e-01, -3.316e-02, -9.201e-02, -2.849e-02, -6.046e-02, -4.337e-02, 1.021e-01)); + r += mul(s0_6, M4(-1.529e-01, -2.821e-02, 1.086e-01, 3.927e-02, -1.485e-01, -2.149e-02, 6.814e-02, -4.126e-03, 1.656e-01, 9.400e-02, 8.804e-02, 7.325e-02, 3.157e-02, 8.224e-03, -1.134e-01, 5.558e-02)); + r += mul(s0_7, M4(-2.497e-02, -8.253e-02, -1.913e-01, -5.884e-02, -9.665e-02, -6.127e-02, 2.678e-02, 1.353e-01, -3.921e-02, 1.782e-01, 1.702e-01, 1.518e-01, 9.343e-02, -3.666e-02, -5.675e-03, 7.092e-02)); + r += mul(s0_8, M4(-4.033e-02, -2.407e-02, 2.819e-01, 9.621e-03, -2.139e-02, 6.438e-02, -2.593e-02, 1.095e-02, -1.087e-01, -1.379e-01, 2.617e-01, 7.200e-02, 3.833e-02, -1.586e-01, -1.300e-01, 8.798e-02)); + r += mul(s1_0, M4(1.711e-02, -9.933e-03, -7.461e-02, -2.927e-03, 8.384e-02, -7.661e-03, 2.646e-01, 9.328e-03, -6.376e-02, -5.568e-03, -4.761e-03, -4.269e-02, -2.487e-02, -2.441e-02, -1.288e-01, -2.240e-02)); + r += mul(s1_1, M4(2.904e-02, -3.527e-02, 3.301e-02, -1.035e-01, 7.388e-02, 1.715e-01, -4.880e-02, 1.106e-01, 9.189e-03, 2.177e-01, 1.389e-02, -7.839e-02, -4.192e-02, 2.435e-01, -1.254e-01, 8.114e-02)); + r += mul(s1_2, M4(1.409e-02, 7.445e-02, -5.300e-02, -1.246e-01, 1.162e-01, 2.528e-02, 6.691e-02, 1.195e-01, -1.119e-01, -2.102e-01, -1.194e-02, 7.495e-02, -2.134e-02, 8.568e-03, -1.460e-02, 7.260e-02)); + r += mul(s1_3, M4(2.169e-03, -5.947e-02, -2.129e-02, 7.883e-02, -8.182e-02, -1.660e-02, 1.747e-01, 7.014e-02, 1.355e-01, 1.056e-01, -3.046e-02, 6.735e-02, -6.171e-02, 1.020e-01, -8.185e-02, 5.354e-03)); + r += mul(s1_4, M4(1.269e-02, 8.096e-04, -1.583e-01, 8.178e-02, -3.959e-01, 1.193e-01, 8.464e-02, -1.127e-01, 1.944e-01, 7.556e-02, 1.168e-01, -1.384e-02, -3.573e-02, -1.009e-01, 1.742e-02, -2.912e-02)); + r += mul(s1_5, M4(7.504e-03, -4.270e-02, -3.003e-03, -7.649e-02, -8.989e-02, -2.783e-01, -1.515e-01, 2.359e-01, -9.594e-03, -1.769e-01, 5.729e-02, -5.659e-02, 1.098e-01, -1.249e-01, 6.557e-03, -2.786e-03)); + r += mul(s1_6, M4(7.453e-02, -5.096e-02, -1.363e-02, 6.269e-03, -5.956e-03, -4.357e-02, -1.768e-01, -1.273e-01, 1.452e-02, -7.577e-03, 3.611e-02, -1.243e-01, 1.268e-02, -8.653e-02, 1.094e-02, 1.110e-01)); + r += mul(s1_7, M4(-2.039e-02, 8.292e-02, -1.957e-01, 4.652e-02, -1.223e-01, 9.855e-02, 9.949e-03, 7.547e-02, -6.217e-02, 1.737e-01, 1.743e-02, 3.541e-02, 1.905e-03, -1.051e-01, 1.078e-01, -6.229e-02)); + r += mul(s1_8, M4(-8.799e-02, -1.323e-01, 1.781e-01, 1.228e-01, -5.019e-02, 4.793e-02, -6.260e-02, 4.792e-03, -8.148e-02, -1.676e-01, -6.797e-03, 1.878e-01, 1.042e-02, -1.123e-01, 1.075e-01, -6.213e-03)); + r += mul(s2_0, M4(1.137e-02, -3.013e-02, -5.909e-02, -3.283e-02, 6.601e-02, -3.383e-02, -1.308e-02, 3.534e-02, 1.833e-02, -4.259e-02, 2.534e-02, -1.148e-01, 8.100e-02, 3.943e-02, -1.851e-01, -2.159e-02)); + r += mul(s2_1, M4(-6.727e-02, 4.950e-02, -1.069e-01, -1.365e-01, 2.731e-01, -7.385e-03, 4.301e-02, -4.144e-02, 1.794e-03, 1.076e-01, 1.060e-01, -1.038e-01, -6.883e-04, 1.702e-01, -1.243e-01, 8.879e-02)); + r += mul(s2_2, M4(6.783e-03, 9.341e-03, -3.123e-02, -4.762e-02, -3.262e-02, 1.387e-01, -3.649e-02, -4.453e-02, -5.958e-02, -8.626e-03, 6.617e-03, -4.030e-02, 1.290e-01, 1.421e-02, 1.811e-01, 1.369e-01)); + r += mul(s2_3, M4(-1.794e-01, -2.994e-02, -1.668e-02, -1.213e-01, -1.363e-01, -3.924e-02, -7.976e-02, 8.185e-02, -1.239e-01, -6.484e-02, 2.573e-02, -6.085e-02, -8.119e-02, -8.224e-02, 7.713e-02, -4.880e-02)); + r += mul(s2_4, M4(-8.398e-02, -3.546e-02, -3.728e-02, 1.082e-01, 1.656e-01, -1.626e-01, -3.856e-02, -1.520e-01, 5.293e-02, -1.090e-01, -1.047e-01, 5.184e-02, 2.178e-01, -6.848e-02, 3.055e-04, -3.072e-02)); + r += mul(s2_5, M4(8.491e-02, 8.328e-02, 2.124e-01, -6.988e-02, 8.466e-03, -1.768e-01, -1.258e-01, 8.647e-03, -7.203e-02, -8.992e-02, 5.066e-03, 5.165e-02, 1.724e-01, 1.102e-01, -1.397e-01, 1.527e-01)); + r += mul(s2_6, M4(-2.680e-02, 3.200e-02, 6.107e-02, 1.293e-01, -4.097e-02, -1.291e-02, -2.858e-02, -5.949e-02, 8.323e-02, -1.926e-02, -4.581e-02, -1.917e-01, -3.910e-02, -1.339e-01, -1.078e-01, 4.290e-02)); + r += mul(s2_7, M4(-9.518e-02, 4.218e-02, 7.849e-02, 1.521e-01, -9.498e-02, -8.458e-03, 6.540e-03, 5.923e-04, -4.768e-02, 7.709e-02, 2.112e-01, -9.940e-02, -8.771e-02, -1.498e-01, -1.108e-02, -1.136e-01)); + r += mul(s2_8, M4(1.078e-01, -4.815e-02, -2.572e-02, 9.091e-03, -4.395e-02, -5.938e-03, 5.717e-02, -9.596e-02, 2.518e-03, -2.338e-02, 9.928e-02, -3.081e-02, -2.289e-01, 6.257e-02, 7.739e-02, -8.178e-02)); + r += mul(s3_0, M4(1.702e-02, 4.195e-02, 3.265e-02, 3.802e-02, 1.608e-01, -1.130e-01, 7.652e-02, 7.043e-02, 5.133e-02, 7.681e-02, -2.191e-02, 8.329e-02, 4.514e-02, -1.296e-03, 7.691e-02, -1.759e-02)); + r += mul(s3_1, M4(2.979e-01, -6.374e-03, 3.151e-02, -1.477e-01, 1.650e-02, 1.401e-01, 1.159e-01, 1.215e-02, -3.441e-02, 1.056e-01, 2.866e-02, 1.198e-01, -2.216e-02, 1.299e-01, -1.351e-02, -3.889e-02)); + r += mul(s3_2, M4(5.457e-02, 1.967e-01, 1.839e-01, 4.967e-02, -2.029e-02, 6.465e-02, -1.635e-02, 9.906e-02, -2.052e-03, 2.073e-02, 1.095e-02, 6.171e-02, 1.244e-01, 8.828e-02, 1.131e-01, 1.311e-02)); + r += mul(s3_3, M4(-8.743e-02, -4.343e-03, 7.079e-02, 1.165e-01, 3.384e-02, -1.121e-01, -2.762e-02, 1.460e-01, 4.518e-02, -5.515e-02, -4.282e-02, -3.432e-02, 6.203e-02, -1.352e-02, 8.449e-02, -2.627e-02)); + r += mul(s3_4, M4(2.033e-02, -8.235e-02, -3.525e-02, 5.705e-02, 1.744e-01, -4.119e-02, 3.571e-02, -4.678e-01, 5.413e-02, 5.668e-02, -1.704e-01, 7.155e-02, 4.698e-02, -1.107e-01, 7.151e-02, -7.922e-02)); + r += mul(s3_5, M4(2.104e-01, 5.732e-02, 1.803e-01, 1.305e-01, -1.986e-02, -1.181e-01, -2.979e-01, 7.342e-02, 7.892e-02, 5.552e-02, 1.002e-01, 3.011e-02, 8.980e-02, 6.477e-02, -1.311e-01, 1.142e-01)); + r += mul(s3_6, M4(1.590e-02, -1.074e-01, -9.888e-03, -9.625e-04, 2.324e-02, -3.032e-02, -1.638e-02, -8.205e-02, -3.143e-02, 5.804e-02, -6.501e-02, 5.997e-03, -2.892e-02, -1.674e-02, 1.130e-01, -3.797e-02)); + r += mul(s3_7, M4(4.608e-02, -4.051e-02, -1.537e-01, 1.102e-01, 7.405e-02, -5.225e-02, -2.455e-02, 1.665e-01, -9.942e-02, -1.276e-02, -1.324e-02, -4.027e-02, -1.495e-01, -4.079e-02, -1.258e-02, 2.045e-02)); + r += mul(s3_8, M4(-2.022e-03, -2.816e-02, -9.246e-02, 9.098e-02, -1.344e-01, -6.834e-02, -5.595e-02, -1.213e-01, -7.136e-02, -3.156e-02, -7.149e-02, -3.323e-02, -9.120e-02, 5.753e-03, 2.105e-02, -3.760e-02)); + r += mul(s4_0, M4(2.183e-03, -1.813e-02, 5.619e-02, -1.930e-02, -3.846e-02, 1.212e-02, 1.508e-02, -4.844e-02, -3.569e-02, -3.011e-02, -1.244e-01, -6.167e-03, -1.967e-01, -5.375e-01, -2.783e-01, 1.626e-01)); + r += mul(s4_1, M4(-2.039e-01, 3.192e-02, 7.417e-02, -3.648e-02, 3.189e-03, 1.163e-01, -6.010e-02, 3.590e-02, -4.391e-02, 2.428e-02, -4.231e-02, -4.152e-02, 1.568e-01, -5.492e-01, -7.005e-03, 5.425e-02)); + r += mul(s4_2, M4(-1.202e-01, 4.948e-02, 1.097e-01, -7.178e-02, 1.647e-01, 5.601e-02, 6.415e-02, 3.912e-02, 6.567e-02, 1.682e-02, -3.581e-02, 5.655e-02, -1.028e-01, 3.299e-01, -1.259e-01, -6.016e-02)); + r += mul(s4_3, M4(-1.557e-01, 2.920e-02, -1.330e-01, -1.201e-01, -1.904e-01, 3.443e-02, 3.807e-02, 1.420e-03, -3.260e-02, 5.182e-02, 3.727e-02, 2.890e-02, 3.998e-01, 5.939e-02, -2.319e-01, -5.014e-01)); + r += mul(s4_4, M4(-6.019e-02, -4.538e-02, -7.342e-02, -2.403e-02, -8.856e-02, -2.028e-01, 1.239e-01, 3.872e-02, -1.875e-01, -6.518e-02, 3.180e-02, 1.602e-01, 2.449e-01, 1.173e-01, -1.286e-01, -1.252e-01)); + r += mul(s4_5, M4(-1.410e-01, 1.971e-02, 1.741e-01, -1.175e-02, -1.192e-01, 1.440e-01, 1.631e-01, -3.816e-02, 1.021e-01, 1.529e-02, -8.363e-02, 8.811e-02, 3.363e-01, 2.851e-01, 3.174e-01, 1.586e-01)); + r += mul(s4_6, M4(-8.199e-02, 1.145e-01, -8.508e-02, 1.497e-01, -1.427e-01, -1.310e-01, -2.610e-02, -9.997e-02, -3.202e-02, 1.776e-02, -2.032e-02, -2.544e-02, -2.676e-01, -7.637e-01, -1.128e-01, -4.935e-01)); + r += mul(s4_7, M4(4.811e-03, 1.367e-03, 4.257e-02, 2.274e-02, -9.530e-02, -2.415e-02, -1.019e-01, 1.026e-02, 4.476e-02, -6.365e-02, 4.689e-02, -1.648e-01, -3.233e-01, -3.906e-02, -1.925e-01, -1.158e-01)); + r += mul(s4_8, M4(3.481e-02, -1.392e-01, 2.833e-02, -2.713e-02, -3.550e-02, -7.110e-03, -1.792e-01, 1.430e-01, -3.660e-02, -2.613e-02, 1.949e-02, -1.605e-02, 6.411e-02, -2.967e-02, -1.464e-01, -3.527e-01)); + r += mul(s5_0, M4(9.120e-03, -2.916e-02, -3.723e-02, 5.875e-02, -3.905e-03, 1.901e-02, 4.643e-02, 3.110e-02, -1.356e-01, -6.962e-03, 5.408e-02, 1.339e-01, -1.158e-02, 3.588e-02, -6.353e-02, -6.984e-03)); + r += mul(s5_1, M4(-8.116e-02, -4.357e-02, -4.506e-02, 5.045e-02, -5.930e-02, 2.987e-02, -1.818e-01, 1.494e-02, -4.484e-03, 1.597e-01, -1.902e-01, 9.383e-03, -1.226e-01, 1.295e-02, 2.515e-02, 1.143e-02)); + r += mul(s5_2, M4(6.183e-02, 9.416e-02, -8.370e-03, 2.306e-02, 1.287e-02, 2.179e-02, 1.322e-02, 2.942e-03, 1.033e-01, 2.096e-01, -1.261e-02, 1.235e-01, -1.473e-02, 3.457e-02, -9.516e-02, 3.791e-02)); + r += mul(s5_3, M4(5.326e-02, -5.508e-02, -3.068e-02, 2.956e-02, 1.403e-01, -6.957e-03, 2.410e-02, -4.953e-02, -6.187e-02, 2.430e-01, -5.465e-02, -1.818e-01, 1.388e-02, -2.809e-02, -5.922e-02, 7.644e-04)); + r += mul(s5_4, M4(-1.857e-02, -8.718e-02, 1.568e-01, 1.803e-01, 6.858e-02, -2.801e-02, -7.750e-02, 3.642e-02, 4.941e-02, -3.754e-02, -2.961e-01, -2.356e-02, -9.099e-02, -4.424e-02, 6.180e-02, 3.793e-02)); + r += mul(s5_5, M4(1.441e-02, 6.786e-03, 1.331e-01, -2.061e-02, 1.087e-01, 2.706e-02, -1.322e-01, -4.488e-02, -1.955e-01, 6.401e-02, -4.254e-02, 7.051e-02, 5.702e-02, -1.098e-01, 2.223e-02, -2.229e-02)); + r += mul(s5_6, M4(-2.432e-02, -6.500e-02, -2.856e-02, -3.645e-02, -1.735e-01, 1.872e-02, 8.135e-02, 9.167e-02, 4.470e-02, -2.786e-02, -1.099e-02, -1.680e-01, 2.792e-02, 2.272e-04, 5.839e-02, 9.217e-02)); + r += mul(s5_7, M4(1.112e-01, -4.509e-02, 1.634e-01, -2.861e-01, 3.643e-02, 1.554e-02, -6.258e-02, -1.283e-02, 2.612e-02, 1.743e-01, 7.423e-02, -1.495e-01, -1.106e-01, 4.478e-02, 5.762e-02, 4.473e-02)); + r += mul(s5_8, M4(7.160e-02, -4.476e-02, 1.460e-01, -1.224e-01, -4.553e-02, 1.511e-01, 6.369e-02, 2.777e-02, 1.314e-01, -2.126e-01, -1.414e-01, 1.500e-02, 2.510e-02, 9.726e-03, -6.648e-02, -3.623e-02)); + r += mul(s6_0, M4(3.627e-02, 6.073e-02, -3.193e-02, 6.293e-02, -7.165e-02, -1.066e-02, -4.735e-02, -2.393e-02, 1.094e-02, 9.433e-03, -4.092e-02, -4.544e-02, -3.613e-04, 1.851e-02, 9.064e-02, 8.582e-02)); + r += mul(s6_1, M4(8.239e-02, 5.018e-02, -9.757e-03, -1.659e-02, 8.712e-02, 8.036e-02, 9.214e-02, 9.874e-02, 1.340e-01, 6.547e-02, -6.969e-02, -4.847e-02, -2.288e-02, 1.972e-02, -2.393e-01, -5.440e-02)); + r += mul(s6_2, M4(7.312e-02, 3.785e-02, 3.346e-02, 4.212e-02, -5.342e-02, 2.322e-02, -1.462e-03, 1.626e-02, 3.446e-02, 1.382e-01, -7.093e-02, -2.692e-02, -2.091e-01, 8.998e-02, 7.997e-02, -3.156e-02)); + r += mul(s6_3, M4(2.614e-04, 3.019e-02, 8.045e-02, -1.697e-02, 1.278e-01, -3.694e-02, 2.327e-02, -9.615e-03, 1.075e-01, -6.754e-03, 1.283e-01, 9.763e-02, -1.815e-01, -9.186e-02, 4.242e-02, -7.595e-02)); + r += mul(s6_4, M4(1.812e-01, 4.732e-02, 1.652e-02, -1.520e-02, -1.962e-01, 5.995e-02, 5.367e-02, -4.655e-02, -1.860e-01, -6.588e-02, -9.304e-02, -6.501e-02, 5.029e-01, 5.293e-02, -3.266e-02, 1.421e-01)); + r += mul(s6_5, M4(-1.821e-02, -2.075e-01, 3.785e-02, 8.904e-02, 1.940e-02, -9.087e-02, -9.922e-02, -1.461e-02, -9.858e-03, 9.537e-02, 3.711e-02, -6.326e-03, 6.543e-02, -2.522e-02, -4.784e-01, -1.630e-01)); + r += mul(s6_6, M4(-1.560e-01, -3.541e-02, -5.997e-02, 7.702e-02, 2.226e-02, -2.751e-02, -6.800e-02, -1.644e-01, 3.878e-02, -1.347e-01, -4.226e-02, -3.344e-03, 2.842e-01, -3.345e-02, -1.678e-01, 1.387e-02)); + r += mul(s6_7, M4(-7.609e-02, -1.081e-01, -8.324e-03, 9.140e-02, -3.538e-02, 1.694e-02, 5.020e-02, -1.722e-02, 4.702e-03, -8.951e-02, -5.850e-02, 8.111e-04, 3.122e-01, -1.631e-01, -1.316e-01, -4.874e-01)); + r += mul(s6_8, M4(-4.049e-02, -7.387e-02, -7.741e-02, 8.224e-02, 1.106e-01, -5.439e-02, 1.329e-02, -4.252e-02, -1.675e-01, 8.479e-02, 2.600e-02, -4.876e-02, 1.208e-01, 3.030e-01, 3.656e-01, -2.404e-01)); + r += mul(s7_0, M4(-5.614e-04, -2.112e-02, 5.199e-03, -1.329e-03, 3.697e-02, -9.503e-02, -1.489e-01, -1.401e-02, -2.104e-01, 8.389e-02, -1.425e-01, 1.198e-01, -4.003e-02, -2.232e-03, -9.903e-03, 3.664e-02)); + r += mul(s7_1, M4(1.116e-01, -4.805e-02, -9.508e-02, -8.863e-02, -8.594e-02, -4.690e-02, 9.794e-02, -3.391e-02, -1.790e-01, 1.526e-01, -7.496e-02, 2.324e-01, -7.781e-02, -7.806e-02, -6.675e-03, -1.827e-02)); + r += mul(s7_2, M4(5.768e-02, 4.320e-02, -6.567e-02, -4.387e-02, -6.329e-02, -1.164e-01, -2.220e-02, -3.429e-02, -5.967e-01, 7.330e-02, -3.555e-01, 8.054e-02, 1.392e-01, -1.012e-02, 3.216e-02, 1.496e-02)); + r += mul(s7_3, M4(-1.115e-01, 1.508e-02, -1.303e-01, 1.906e-02, -1.976e-03, 1.900e-01, -1.030e-02, -5.090e-03, -1.083e-01, -7.993e-03, -6.934e-03, -4.917e-02, -3.553e-02, 1.348e-02, -5.114e-02, 2.150e-02)); + r += mul(s7_4, M4(-3.374e-02, 3.009e-02, 5.857e-02, -9.649e-02, 7.674e-02, 1.091e-01, 2.042e-02, -4.355e-02, -1.533e-01, 1.921e-01, 3.850e-01, -4.044e-02, 5.562e-02, -5.540e-02, 7.332e-02, 3.248e-02)); + r += mul(s7_5, M4(1.772e-02, -7.301e-02, 3.140e-02, 6.169e-02, 1.574e-01, -1.557e-02, -6.124e-02, 3.520e-02, 2.846e-01, 7.785e-02, 2.583e-01, -1.309e-01, 1.899e-01, -1.479e-02, -1.239e-01, -7.160e-03)); + r += mul(s7_6, M4(-3.006e-02, -2.029e-02, 5.972e-02, 3.386e-03, -7.393e-02, 5.966e-02, -4.498e-02, -6.022e-02, 8.330e-02, 2.939e-02, 3.378e-03, 1.490e-01, -9.315e-02, 4.097e-02, 5.681e-03, 6.256e-02)); + r += mul(s7_7, M4(3.114e-02, 5.778e-02, -4.831e-02, 1.290e-01, 7.470e-02, -9.074e-02, -3.859e-02, 9.126e-02, 2.751e-02, 7.029e-02, 3.303e-02, 5.825e-02, -5.205e-02, 2.948e-02, 5.493e-03, -5.036e-02)); + r += mul(s7_8, M4(1.617e-01, -9.764e-02, 3.887e-02, 6.610e-02, -3.920e-02, -2.124e-01, -7.008e-02, 1.160e-01, -2.669e-01, 3.881e-02, 8.382e-02, 9.165e-02, -1.217e-01, 9.085e-02, 5.695e-03, -8.119e-02)); + r += V4(1.403e-02, -6.125e-03, -2.342e-02, -1.136e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-7.724e-02, 1.840e-01, 5.966e-02, 5.070e-02, 2.850e-02, 2.703e-02, 2.311e-04, 4.008e-02, -1.213e-02, -4.575e-02, -1.344e-01, 1.487e-02, 1.969e-01, -6.987e-02, -1.742e-02, -1.697e-01)); + r += mul(s0_1, M4(-1.275e-02, -4.802e-02, -8.417e-02, 4.384e-02, 2.220e-02, -9.401e-02, -1.768e-01, -2.247e-02, -1.508e-01, 9.067e-02, 1.209e-01, -6.212e-02, 1.372e-02, 2.142e-02, -1.100e-02, -4.698e-04)); + r += mul(s0_2, M4(7.217e-02, -4.635e-02, -4.113e-02, -1.422e-01, 6.937e-02, -1.280e-01, -5.054e-05, 1.096e-02, -1.155e-01, 9.489e-03, 6.707e-02, -1.829e-02, 3.346e-01, -7.680e-02, -3.614e-02, -2.940e-02)); + r += mul(s0_3, M4(-1.549e-02, 4.748e-02, -2.051e-02, 5.490e-02, 3.730e-03, 2.569e-02, 5.174e-02, 4.933e-02, -5.114e-03, 1.192e-01, -1.115e-01, -4.806e-03, -1.512e-01, -2.198e-01, -1.167e-01, -1.390e-01)); + r += mul(s0_4, M4(-1.188e-01, 3.509e-02, 3.543e-02, 1.111e-01, -7.166e-02, -1.343e-01, 1.096e-02, 1.033e-01, -2.424e-01, -7.907e-03, -5.037e-02, 1.417e-01, 1.377e-01, -2.075e-01, -9.802e-02, 3.139e-02)); + r += mul(s0_5, M4(-3.900e-02, -4.654e-03, 2.197e-02, 1.976e-01, -4.585e-02, -3.598e-02, 1.276e-02, -3.306e-02, -3.152e-02, 4.281e-02, 1.201e-01, 3.895e-02, 1.745e-01, -1.216e-01, 1.352e-01, -5.017e-02)); + r += mul(s0_6, M4(1.756e-02, 1.117e-01, 9.898e-02, 1.508e-01, -6.694e-03, -1.332e-04, 1.074e-01, 5.135e-02, 3.021e-02, -1.141e-02, -2.068e-01, -3.470e-02, -1.515e-01, 3.214e-02, 6.859e-02, 1.052e-01)); + r += mul(s0_7, M4(2.112e-01, 1.255e-01, -1.653e-01, -1.735e-01, -1.008e-01, 4.133e-02, 2.729e-02, -9.144e-02, 2.082e-01, 6.797e-02, 2.031e-01, -5.398e-02, -1.601e-02, 3.365e-01, -7.537e-02, -7.077e-02)); + r += mul(s0_8, M4(1.310e-01, -1.838e-01, -1.824e-01, 2.686e-02, -5.196e-03, 2.406e-02, -7.136e-02, -1.845e-03, 2.307e-01, 4.479e-02, 2.829e-01, 1.392e-01, 2.334e-01, 8.407e-02, 4.630e-02, -8.520e-02)); + r += mul(s1_0, M4(2.114e-03, 2.668e-02, -1.427e-02, -2.694e-02, -3.965e-04, 2.847e-02, 8.394e-02, -2.785e-01, 4.981e-02, -2.619e-02, -9.234e-02, 1.929e-01, 1.035e-01, 1.592e-02, -2.168e-02, -3.591e-02)); + r += mul(s1_1, M4(5.445e-02, -2.741e-03, -1.524e-02, 2.872e-02, 1.501e-02, 1.259e-01, 2.840e-02, -1.047e-01, -1.001e-01, 3.405e-02, 4.572e-02, -8.381e-02, 7.945e-02, -2.578e-02, -7.845e-02, 2.507e-02)); + r += mul(s1_2, M4(-4.223e-02, 2.079e-02, 7.160e-03, -3.529e-02, -2.844e-02, 5.381e-04, -1.173e-01, -5.351e-02, -3.105e-02, 2.772e-02, -1.366e-02, 9.494e-02, 2.181e-01, -7.016e-02, 1.038e-01, 7.938e-02)); + r += mul(s1_3, M4(3.777e-02, 5.161e-03, 7.237e-02, 6.253e-02, 1.684e-01, -1.439e-02, 1.439e-01, 8.895e-02, -8.568e-02, 2.804e-02, -1.905e-02, -3.772e-02, -1.072e-01, -1.312e-02, 5.895e-02, -1.760e-02)); + r += mul(s1_4, M4(1.762e-01, 2.947e-02, 1.324e-01, 3.135e-02, -3.291e-03, 5.106e-02, 2.093e-01, -1.266e-01, -1.403e-01, -9.251e-02, -1.132e-01, 1.206e-01, -1.451e-02, 4.177e-02, -4.161e-02, -3.751e-02)); + r += mul(s1_5, M4(3.326e-02, 6.515e-02, -1.019e-01, 1.452e-01, 1.364e-01, 3.613e-02, -4.264e-02, -1.150e-01, 3.261e-02, 2.848e-02, 1.705e-01, -5.300e-02, -4.921e-03, -8.104e-02, -9.616e-02, 5.653e-02)); + r += mul(s1_6, M4(-8.075e-02, 3.694e-02, -3.470e-02, 3.817e-02, -7.741e-02, -6.873e-02, 6.621e-02, 1.264e-01, 1.353e-01, 3.871e-02, -7.518e-02, -1.563e-03, -4.559e-02, 9.546e-02, 2.271e-02, -2.953e-02)); + r += mul(s1_7, M4(-1.537e-01, -4.538e-02, 3.473e-02, 6.479e-02, -1.853e-01, 9.301e-02, 1.812e-01, 7.560e-02, 4.490e-02, 1.386e-01, 6.909e-02, -2.373e-03, -8.195e-02, 1.602e-01, -5.891e-02, -8.507e-02)); + r += mul(s1_8, M4(-1.283e-01, -8.259e-02, 1.211e-01, 1.746e-02, -9.038e-02, 1.928e-01, 3.724e-02, 4.097e-02, 1.269e-01, -4.324e-02, 2.607e-01, 8.578e-02, 1.097e-01, -3.740e-02, 1.986e-02, 2.089e-02)); + r += mul(s2_0, M4(-4.258e-02, 8.224e-02, 1.048e-02, 7.118e-02, -9.890e-02, 5.473e-02, -3.945e-02, 3.890e-02, -5.820e-02, 1.354e-01, 9.434e-02, -2.227e-02, 1.141e-01, 3.242e-03, 1.918e-01, -1.271e-01)); + r += mul(s2_1, M4(3.310e-02, -2.224e-02, 2.981e-02, -1.571e-01, 6.848e-02, -3.456e-02, 9.113e-02, 4.380e-02, -6.264e-02, 2.110e-01, -6.695e-02, 4.847e-02, 6.284e-02, 1.602e-01, -8.717e-02, 1.223e-01)); + r += mul(s2_2, M4(7.283e-02, 7.600e-02, -6.312e-03, 1.792e-02, -4.016e-02, 6.400e-03, -3.479e-02, -8.229e-02, 6.997e-02, 1.148e-01, 1.087e-01, 1.213e-01, 1.358e-01, -1.168e-01, -4.992e-02, 1.194e-01)); + r += mul(s2_3, M4(-8.611e-02, -6.279e-02, 3.449e-03, -9.984e-02, -3.702e-02, -1.691e-02, 1.295e-01, -5.580e-03, -5.515e-02, 1.853e-01, -1.760e-02, -6.246e-02, -2.026e-01, -1.262e-03, 8.512e-02, 5.164e-02)); + r += mul(s2_4, M4(-8.568e-02, -7.055e-02, 8.443e-02, -1.094e-01, 3.930e-02, -1.755e-01, 2.495e-01, -1.392e-01, 4.257e-03, 2.378e-01, -1.398e-01, -3.154e-03, -2.801e-01, -1.587e-01, 9.718e-02, -7.764e-02)); + r += mul(s2_5, M4(-6.386e-02, -2.549e-02, -7.504e-02, 7.184e-02, 4.190e-02, 2.954e-02, -1.622e-01, 9.567e-02, 5.567e-02, 8.607e-02, -4.043e-02, 1.531e-02, -1.740e-01, -5.285e-02, -8.496e-02, 1.134e-02)); + r += mul(s2_6, M4(-1.170e-02, 1.031e-01, 3.717e-02, -7.724e-02, 9.260e-02, 2.144e-02, 1.584e-01, 7.160e-02, 6.875e-02, -6.063e-03, -2.960e-03, -8.502e-02, 8.992e-02, 2.183e-03, 4.504e-02, -5.336e-03)); + r += mul(s2_7, M4(2.265e-01, 2.020e-02, 1.221e-02, -6.520e-02, 1.199e-01, -3.575e-02, 8.107e-02, -2.064e-02, -4.061e-02, 2.520e-02, 1.486e-01, -1.737e-02, -6.462e-02, -1.468e-02, 3.628e-02, 3.528e-02)); + r += mul(s2_8, M4(1.223e-01, 6.030e-02, 5.806e-02, 1.269e-03, 6.043e-02, -3.870e-02, -7.984e-02, -1.056e-02, -2.352e-02, -5.047e-02, -4.811e-02, 1.098e-01, 1.659e-01, -8.937e-02, -3.395e-02, -2.555e-02)); + r += mul(s3_0, M4(-1.003e-01, 9.404e-02, -1.036e-01, 1.189e-01, -6.630e-02, 4.198e-02, -1.206e-01, -8.588e-02, 6.487e-02, -4.289e-02, -5.520e-02, 4.863e-02, 7.616e-02, -4.563e-02, 6.938e-02, -8.349e-02)); + r += mul(s3_1, M4(3.090e-02, -5.794e-02, 1.720e-02, -1.693e-01, 1.778e-02, -6.134e-02, -6.254e-02, 8.614e-03, 1.217e-01, -7.393e-02, -9.394e-02, -8.675e-02, 3.951e-02, 6.524e-02, -5.517e-02, -2.358e-03)); + r += mul(s3_2, M4(1.489e-01, -3.451e-02, -2.032e-01, -8.297e-02, 1.246e-01, -1.291e-01, -7.800e-02, -1.371e-01, -2.997e-03, -2.446e-02, 1.304e-02, 6.479e-02, -1.774e-02, -2.228e-03, 1.622e-02, 3.105e-02)); + r += mul(s3_3, M4(1.988e-02, -1.177e-01, 5.934e-02, 1.274e-01, -1.171e-01, -2.678e-03, 1.435e-01, -3.637e-02, -1.024e-01, 7.228e-02, 5.230e-02, -2.965e-02, -2.015e-02, 6.415e-02, 3.267e-02, -7.663e-02)); + r += mul(s3_4, M4(-1.052e-01, 4.454e-02, 8.595e-02, 2.379e-01, -1.163e-01, -1.496e-01, 1.817e-01, -2.559e-01, 5.475e-03, 3.444e-02, -1.957e-01, -9.725e-02, 7.565e-02, -1.111e-01, 3.272e-02, -7.525e-02)); + r += mul(s3_5, M4(-7.455e-03, -8.427e-02, -4.218e-02, 4.790e-02, 9.401e-02, -1.121e-01, -1.447e-01, 3.156e-02, 7.532e-02, -1.715e-02, 1.460e-02, 1.046e-01, -1.586e-01, -5.409e-02, -5.411e-02, -3.003e-02)); + r += mul(s3_6, M4(-1.436e-01, 1.030e-01, 5.531e-02, 1.360e-01, 9.290e-02, 1.553e-01, 1.950e-01, -3.723e-02, 3.381e-02, -6.820e-02, -1.381e-02, -1.080e-01, -5.238e-02, -1.522e-02, 4.833e-03, -1.492e-02)); + r += mul(s3_7, M4(5.130e-02, -5.561e-02, -2.519e-02, 1.129e-01, -3.739e-03, 6.413e-02, -2.109e-02, -1.821e-01, -1.472e-02, -9.493e-02, 2.997e-02, -3.930e-02, 2.223e-02, -9.339e-02, 5.872e-02, 9.591e-02)); + r += mul(s3_8, M4(5.037e-02, 3.653e-03, 2.274e-02, 1.995e-02, -5.041e-03, 2.161e-02, 9.535e-03, 1.757e-02, 4.400e-02, -8.377e-03, 1.082e-02, 3.105e-02, -8.612e-02, 4.627e-02, -8.301e-03, -2.559e-02)); + r += mul(s4_0, M4(5.987e-02, -1.482e-02, 3.351e-02, -6.310e-02, -4.229e-03, -6.841e-02, -6.166e-03, 1.118e-02, 4.439e-02, 4.798e-02, 1.299e-03, 5.123e-02, -3.027e-01, 1.494e-01, 4.118e-01, 3.597e-01)); + r += mul(s4_1, M4(6.472e-02, -3.500e-02, -3.729e-02, -1.382e-01, -1.232e-01, 2.478e-02, 1.782e-02, 7.169e-02, 1.073e-01, -9.537e-03, 6.582e-03, -3.679e-02, -3.886e-01, -1.123e-01, -3.075e-01, 1.037e-01)); + r += mul(s4_2, M4(-6.304e-02, 8.001e-03, 1.695e-01, 4.003e-02, -1.218e-02, 5.538e-02, 8.938e-02, 7.200e-02, 1.047e-01, 4.492e-02, -1.121e-01, 6.089e-02, -9.417e-03, -1.168e-01, -1.622e-02, 1.297e-02)); + r += mul(s4_3, M4(7.746e-03, -5.015e-04, 9.506e-02, 4.396e-02, 1.487e-02, -1.701e-01, -3.412e-02, -9.371e-02, 1.160e-01, 1.152e-01, 3.521e-03, 1.941e-02, 1.238e-01, 9.036e-02, -3.244e-01, 1.192e-02)); + r += mul(s4_4, M4(1.213e-01, 5.326e-02, -2.432e-01, 1.279e-01, 1.329e-01, -9.406e-02, 3.120e-02, 3.744e-03, -1.487e-01, 1.595e-02, 8.325e-02, 1.348e-02, -2.696e-01, 7.585e-02, -1.615e-01, 1.387e-01)); + r += mul(s4_5, M4(-6.099e-02, -4.308e-03, 2.213e-01, -8.152e-02, -9.230e-02, -3.547e-02, -7.342e-04, -1.075e-01, -9.105e-02, -2.387e-02, -7.690e-02, 1.122e-01, 4.953e-03, -5.518e-02, -5.026e-02, 1.549e-01)); + r += mul(s4_6, M4(1.285e-01, 1.177e-02, 1.186e-01, -6.946e-02, 8.408e-02, 2.238e-02, 6.817e-02, 1.605e-01, 3.962e-02, -3.125e-02, -4.336e-02, -4.538e-03, -3.323e-01, 2.379e-02, 7.357e-02, 1.818e-01)); + r += mul(s4_7, M4(3.967e-02, 1.028e-01, -2.228e-03, -1.031e-02, 1.905e-01, 6.901e-02, 2.475e-02, -1.720e-01, -6.732e-03, 8.988e-02, 2.008e-02, -5.934e-03, -1.280e-01, -2.170e-01, -3.173e-01, -9.140e-02)); + r += mul(s4_8, M4(3.521e-01, 5.174e-03, 4.608e-02, 1.210e-01, 1.417e-01, -8.578e-02, 8.202e-02, 5.828e-02, -1.003e-01, -5.269e-03, -1.538e-02, -8.339e-02, 7.307e-02, 3.105e-02, -2.396e-01, -3.173e-01)); + r += mul(s5_0, M4(-3.747e-02, 7.280e-03, -9.053e-02, 1.938e-01, 1.071e-01, -7.245e-02, -5.811e-02, -3.072e-02, 1.554e-01, -8.037e-02, 9.611e-02, 4.908e-02, 8.992e-02, -2.597e-02, -1.329e-02, -9.601e-02)); + r += mul(s5_1, M4(5.216e-02, 4.516e-04, -1.011e-01, -7.019e-02, -1.147e-02, 4.769e-02, -3.852e-02, -9.921e-02, 2.953e-01, -1.791e-01, 2.752e-02, -1.665e-02, -7.379e-02, 2.763e-02, 1.716e-02, -4.656e-02)); + r += mul(s5_2, M4(-1.576e-02, -1.048e-01, 5.974e-02, 4.993e-02, -6.986e-03, 1.785e-02, 1.141e-02, 3.672e-02, 7.632e-02, -7.359e-02, -2.939e-01, -1.764e-01, -3.114e-03, -8.975e-02, -1.164e-02, 4.968e-02)); + r += mul(s5_3, M4(-2.444e-02, -1.835e-02, -2.875e-02, 1.924e-01, -3.134e-02, -3.077e-02, 7.153e-02, 1.045e-01, 2.602e-01, -8.030e-02, -2.347e-01, -2.926e-02, -8.184e-02, 4.675e-02, -1.647e-02, -5.338e-02)); + r += mul(s5_4, M4(1.272e-01, 6.238e-02, -1.660e-01, 1.850e-01, 3.613e-02, 4.229e-02, -9.294e-02, 4.839e-02, -1.690e-01, -7.589e-02, -6.957e-02, 1.222e-02, 4.209e-02, 7.453e-02, 7.989e-02, 4.162e-02)); + r += mul(s5_5, M4(-4.380e-02, -1.028e-01, 3.865e-03, 1.097e-01, -3.281e-02, 8.102e-02, 1.514e-02, 8.578e-02, -2.046e-01, 1.479e-01, 2.743e-01, 1.518e-01, 1.183e-03, 6.509e-02, -2.623e-02, -3.482e-02)); + r += mul(s5_6, M4(6.535e-03, 6.839e-02, 2.374e-02, 5.277e-02, -1.126e-02, -2.428e-02, 6.067e-02, 2.483e-02, -8.028e-02, 7.573e-03, 8.035e-02, 1.940e-01, -2.065e-03, -3.858e-02, -1.287e-02, -4.179e-03)); + r += mul(s5_7, M4(-4.731e-02, 5.665e-02, -1.218e-01, -9.370e-02, -5.400e-02, 1.743e-02, -2.003e-01, -9.808e-02, 4.797e-02, 8.253e-02, 7.379e-02, 1.515e-01, -4.230e-02, -1.396e-01, 3.179e-02, -1.131e-03)); + r += mul(s5_8, M4(2.792e-02, -3.493e-02, 8.325e-02, 9.952e-03, -3.813e-02, 3.785e-04, -8.763e-04, 7.903e-02, 1.624e-01, 1.832e-01, 1.483e-01, 3.208e-02, -6.978e-02, 5.305e-02, -3.591e-02, -9.302e-02)); + r += mul(s6_0, M4(5.284e-02, -2.286e-02, 7.535e-02, 4.906e-02, 1.200e-02, -1.248e-01, -1.162e-02, -1.586e-02, -3.189e-02, 6.001e-03, 1.181e-03, -3.973e-03, 8.256e-02, 8.548e-02, 1.576e-01, -1.515e-01)); + r += mul(s6_1, M4(1.284e-01, -1.911e-02, 8.744e-02, 9.790e-02, 2.740e-02, 1.531e-02, -2.574e-02, -3.117e-02, -2.859e-02, 2.684e-02, 2.666e-02, 6.812e-03, -4.250e-02, -1.488e-01, -1.181e-01, -1.295e-01)); + r += mul(s6_2, M4(3.720e-02, 3.221e-02, 3.435e-02, 6.706e-02, -5.516e-03, 4.287e-02, 1.236e-02, 1.223e-02, 7.437e-03, 2.018e-02, 4.017e-02, 8.277e-02, 1.690e-01, -1.792e-02, -7.429e-03, -2.912e-01)); + r += mul(s6_3, M4(-5.532e-02, 6.336e-02, 6.260e-02, 9.205e-03, 1.285e-02, 6.509e-02, 3.856e-02, -2.021e-02, 6.904e-02, -6.493e-03, 1.655e-02, -4.652e-02, 4.887e-03, 6.818e-02, -2.779e-02, -9.416e-02)); + r += mul(s6_4, M4(-1.059e-01, -1.827e-02, -9.839e-02, 1.612e-01, -4.882e-02, 6.131e-02, 1.091e-01, -9.698e-02, 2.015e-02, -2.242e-02, 4.939e-02, -3.704e-02, 2.272e-01, 7.772e-02, 7.472e-02, -1.928e-01)); + r += mul(s6_5, M4(1.585e-01, -3.437e-02, -9.124e-02, -7.755e-02, 2.315e-02, 4.228e-02, 2.005e-02, 3.590e-02, -2.346e-02, 5.805e-02, 1.054e-02, 9.887e-02, -1.602e-01, 1.184e-01, 1.029e-01, -7.007e-02)); + r += mul(s6_6, M4(2.031e-02, 3.183e-02, -2.129e-02, -9.839e-03, -2.987e-02, -2.387e-02, -6.374e-02, 4.744e-02, -3.038e-02, 1.535e-01, -6.029e-02, -2.271e-02, 1.354e-01, 1.756e-01, 1.662e-01, 1.384e-01)); + r += mul(s6_7, M4(-1.276e-01, 2.346e-02, -1.002e-01, 3.707e-02, 1.235e-01, 3.925e-02, -1.002e-02, 3.066e-02, -1.582e-02, 7.662e-02, -7.702e-02, -1.235e-02, -1.770e-01, -6.730e-02, 1.516e-01, 1.566e-01)); + r += mul(s6_8, M4(3.088e-02, -2.108e-02, 8.897e-02, 7.054e-02, 1.846e-01, -2.521e-02, -9.622e-03, 7.985e-02, 7.300e-02, -1.049e-01, 6.219e-02, 1.374e-02, -1.286e-01, 4.387e-01, -7.610e-02, -6.778e-01)); + r += mul(s7_0, M4(-7.163e-02, -8.077e-03, -1.022e-01, 1.473e-02, -5.050e-02, 5.343e-02, 9.415e-02, 1.001e-01, 1.051e-01, -1.029e-01, -1.508e-01, -3.213e-01, -6.002e-02, -1.543e-02, -6.470e-02, 4.712e-02)); + r += mul(s7_1, M4(1.532e-02, 3.967e-02, -1.977e-01, 2.849e-02, 4.239e-02, 6.796e-02, 5.239e-02, 1.189e-01, 3.101e-01, -4.135e-02, 1.138e-01, 4.669e-02, 1.385e-02, 9.605e-03, 7.201e-02, 7.464e-03)); + r += mul(s7_2, M4(3.819e-02, 4.565e-02, 3.408e-02, 2.257e-02, 1.308e-01, 2.826e-02, 6.109e-02, -4.583e-02, 5.506e-02, -3.221e-01, 2.697e-03, 4.039e-01, 1.259e-01, 3.230e-02, 1.611e-02, -4.330e-02)); + r += mul(s7_3, M4(7.958e-02, -1.910e-02, 8.226e-02, 8.214e-02, -8.223e-02, 6.258e-02, -6.834e-02, -7.819e-02, 1.021e-02, 2.266e-02, 7.075e-02, -1.537e-01, 2.165e-02, 5.662e-02, -8.941e-02, 4.015e-02)); + r += mul(s7_4, M4(-2.136e-02, -6.519e-02, 1.436e-01, 1.326e-02, -2.437e-01, 6.408e-02, 1.316e-02, -8.968e-02, 1.674e-01, -6.148e-02, -7.849e-02, 8.925e-02, -2.668e-02, 1.509e-01, -4.980e-02, 5.554e-02)); + r += mul(s7_5, M4(-1.331e-01, 7.111e-03, 3.069e-03, 3.506e-02, -1.004e-01, -1.103e-01, -9.701e-02, 8.936e-02, -1.729e-01, -1.700e-01, -4.142e-02, 1.028e-01, -6.603e-02, 9.418e-02, 1.503e-02, -1.254e-02)); + r += mul(s7_6, M4(-5.121e-02, 2.249e-02, 1.791e-02, -2.844e-02, -4.702e-02, 3.654e-02, -8.635e-03, 1.394e-01, -9.286e-02, -8.097e-02, -1.817e-02, -1.863e-01, 9.467e-02, 4.542e-03, 2.574e-03, -5.944e-02)); + r += mul(s7_7, M4(1.354e-01, -3.398e-02, 8.637e-02, -4.478e-02, -1.265e-01, 9.978e-02, 3.152e-02, -6.899e-02, 1.511e-02, -1.379e-02, -1.369e-01, 4.768e-02, 2.655e-02, -1.006e-01, -8.618e-03, 8.087e-03)); + r += mul(s7_8, M4(-3.514e-02, 2.768e-02, 6.856e-02, -2.232e-02, 1.918e-01, -1.164e-01, 2.628e-02, 1.171e-01, 2.024e-01, -1.593e-01, 4.787e-02, 1.028e-01, -4.236e-02, -6.423e-02, 2.739e-03, 1.502e-02)); + r += V4(1.516e-02, 8.959e-02, -1.787e-02, -1.065e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.187e-02, 6.264e-02, -2.532e-02, 7.597e-02, 1.335e-02, -7.549e-02, 2.973e-02, -5.040e-02, -6.046e-02, -3.364e-02, 2.287e-02, -3.193e-02, 4.584e-02, 3.559e-02, 9.996e-02, -1.737e-01)); + r += mul(s0_1, M4(1.244e-02, 1.086e-01, 5.084e-02, -9.225e-02, 3.146e-02, 5.506e-02, -2.089e-02, 4.840e-02, 2.297e-02, -3.274e-05, -5.653e-02, 3.252e-01, -6.403e-02, 1.261e-02, 1.052e-01, 6.297e-02)); + r += mul(s0_2, M4(1.698e-02, 8.388e-02, -3.422e-02, -1.496e-02, 1.081e-01, -6.162e-02, -5.071e-02, -1.067e-01, -8.022e-02, -3.184e-02, -2.127e-02, 9.637e-03, 3.873e-02, -1.251e-01, 1.333e-01, -7.872e-02)); + r += mul(s0_3, M4(-7.789e-02, -6.662e-02, 1.294e-01, -9.989e-02, -7.352e-02, -4.578e-02, -7.462e-02, 2.127e-03, -8.138e-02, 6.226e-02, -2.799e-02, -3.625e-02, 2.212e-01, -1.652e-01, 1.428e-01, 2.478e-01)); + r += mul(s0_4, M4(5.669e-03, -1.183e-01, 4.026e-02, -2.221e-01, 1.133e-02, 4.156e-02, 1.597e-01, 7.346e-02, -2.419e-03, -2.949e-02, 3.021e-01, 2.356e-01, -6.494e-03, -1.026e-01, -8.227e-02, 1.740e-02)); + r += mul(s0_5, M4(-2.167e-01, 1.039e-01, -5.977e-02, -9.074e-03, -1.117e-01, 4.771e-02, 1.119e-01, 1.650e-01, -1.064e-01, -3.939e-02, 2.190e-02, 1.414e-01, 2.592e-02, 7.613e-02, -2.695e-02, -5.662e-02)); + r += mul(s0_6, M4(6.803e-02, -1.202e-01, -3.315e-03, 1.625e-01, -1.301e-01, -1.277e-02, -1.577e-01, 5.537e-02, 1.052e-01, 2.325e-01, 2.426e-02, -1.439e-01, 7.457e-02, -8.714e-02, 2.378e-01, 1.292e-02)); + r += mul(s0_7, M4(-1.714e-01, -3.724e-02, 1.219e-01, 1.407e-01, -1.354e-01, 1.838e-02, 2.183e-02, 7.493e-02, 7.883e-02, -5.048e-02, 4.846e-02, -1.183e-01, -2.238e-01, 9.183e-02, -1.035e-01, 4.537e-02)); + r += mul(s0_8, M4(-9.619e-02, 7.385e-02, -3.486e-02, 1.845e-02, -7.486e-02, 6.599e-02, -5.713e-02, 6.247e-02, -4.777e-02, -1.458e-02, 1.130e-02, -1.824e-02, 8.031e-02, -1.837e-02, -1.499e-01, 7.544e-02)); + r += mul(s1_0, M4(3.624e-02, 1.283e-01, -5.294e-02, -5.316e-04, -1.742e-01, 2.873e-02, -1.543e-01, -7.869e-02, -1.277e-01, 1.693e-02, 1.002e-02, 4.104e-02, -1.437e-02, -4.580e-02, -1.460e-01, 3.390e-02)); + r += mul(s1_1, M4(6.177e-02, 4.738e-02, -1.577e-02, 3.133e-03, -2.669e-02, 2.046e-01, -6.449e-02, -4.281e-02, -2.132e-02, 3.419e-02, -2.680e-02, 1.263e-01, -1.479e-03, 2.514e-02, 2.780e-02, 1.013e-01)); + r += mul(s1_2, M4(1.366e-02, 1.883e-02, 4.764e-02, 1.359e-01, -1.593e-02, -4.673e-02, -8.732e-02, -9.512e-02, -1.047e-01, -4.604e-02, -1.665e-02, 1.337e-01, -1.242e-02, 9.301e-02, -2.295e-02, 3.417e-02)); + r += mul(s1_3, M4(-1.517e-02, -3.551e-02, 4.704e-02, 7.443e-03, -1.477e-01, 3.772e-02, 1.282e-01, 1.508e-01, -7.728e-02, 1.692e-02, 4.276e-02, 7.591e-02, 1.067e-02, -4.369e-02, -5.641e-02, 9.330e-02)); + r += mul(s1_4, M4(4.017e-03, -2.550e-02, 5.103e-02, -2.922e-02, 6.819e-02, -1.601e-01, 3.182e-01, 1.085e-01, -8.375e-02, -1.219e-01, 1.285e-01, 9.498e-02, -5.342e-02, -8.636e-02, -7.220e-02, 2.510e-03)); + r += mul(s1_5, M4(-7.125e-02, 4.059e-02, -9.271e-02, 1.242e-02, -6.271e-02, 4.583e-02, 2.297e-01, 1.034e-01, -9.418e-02, -1.697e-01, -3.932e-02, -4.783e-02, -7.415e-02, 2.491e-02, -7.569e-03, 2.422e-02)); + r += mul(s1_6, M4(3.736e-02, -8.572e-02, 3.994e-02, 4.105e-02, -3.034e-02, 1.062e-01, -4.819e-02, 8.814e-02, 8.113e-02, 2.036e-01, 4.791e-02, -1.842e-01, 6.856e-02, -3.210e-02, 7.227e-03, -8.558e-02)); + r += mul(s1_7, M4(-1.797e-01, -2.560e-01, 1.961e-02, -5.501e-02, -1.478e-01, -1.228e-01, 2.369e-02, 1.566e-02, 6.271e-02, -3.219e-02, 3.045e-02, -7.925e-02, -2.949e-03, 1.436e-01, 1.822e-03, -3.287e-02)); + r += mul(s1_8, M4(-7.270e-02, -6.270e-02, -7.584e-02, -6.560e-02, -5.748e-02, 4.359e-02, 5.897e-02, -5.726e-02, 1.690e-02, -7.352e-02, -1.782e-02, -8.810e-02, -3.535e-02, -2.069e-02, -4.821e-02, 2.636e-02)); + r += mul(s2_0, M4(-2.083e-02, -2.477e-02, 7.071e-02, -4.785e-02, -3.290e-02, -9.460e-02, 7.853e-03, -1.785e-01, -1.315e-01, -1.173e-01, 9.080e-03, -3.729e-02, 6.511e-02, 4.687e-02, 7.942e-02, 9.895e-02)); + r += mul(s2_1, M4(-5.864e-02, -7.510e-02, -4.419e-02, -5.842e-02, 2.984e-01, 4.923e-02, 4.713e-02, -7.945e-02, -1.234e-01, -3.238e-02, -1.697e-02, 6.394e-02, -1.147e-01, 1.948e-01, 7.497e-02, 1.001e-01)); + r += mul(s2_2, M4(3.748e-02, -1.411e-01, 2.326e-02, 7.799e-02, 2.809e-03, 6.622e-03, 2.647e-02, -1.618e-01, 6.017e-02, -7.081e-02, -1.233e-02, 1.621e-01, 1.593e-02, 1.733e-01, 2.370e-01, -8.014e-02)); + r += mul(s2_3, M4(-6.741e-02, -3.408e-02, -1.608e-01, 3.781e-02, -1.464e-01, -1.036e-01, -1.046e-01, 1.298e-01, -5.866e-03, 5.742e-02, 2.522e-02, 2.219e-02, -1.086e-01, -2.096e-02, -2.311e-02, -4.706e-02)); + r += mul(s2_4, M4(4.554e-02, -2.465e-02, -7.264e-02, 6.134e-02, 1.440e-02, 3.166e-02, 2.649e-01, 1.871e-01, -3.898e-02, 4.163e-02, -5.572e-02, -1.161e-01, -2.489e-01, 6.822e-02, 4.626e-03, 1.167e-01)); + r += mul(s2_5, M4(-9.042e-03, -7.050e-02, 4.928e-03, 7.088e-02, 6.400e-02, 2.114e-01, -3.590e-03, -5.971e-02, 6.975e-02, 4.664e-02, 8.553e-02, -5.178e-02, 7.938e-02, -1.139e-01, -1.323e-01, -3.062e-02)); + r += mul(s2_6, M4(-6.992e-02, 9.754e-03, 2.215e-01, -3.585e-03, -1.091e-01, -1.363e-01, 7.880e-02, -2.762e-02, -9.997e-02, 2.966e-03, 3.448e-02, -1.366e-01, -1.227e-01, 1.809e-02, 1.784e-01, 1.268e-02)); + r += mul(s2_7, M4(-5.271e-02, -4.306e-02, 5.939e-04, 4.301e-02, -9.889e-02, 8.578e-04, -3.276e-02, -4.598e-02, -6.198e-02, -1.495e-01, 6.664e-02, -9.525e-02, 8.331e-03, 2.550e-03, -3.578e-02, 1.648e-02)); + r += mul(s2_8, M4(-2.549e-02, 3.285e-02, -3.637e-02, 4.108e-02, -6.827e-03, 4.565e-02, 2.817e-02, -3.361e-03, -2.407e-02, 1.267e-01, 5.727e-03, 1.873e-02, -7.541e-03, 6.248e-02, 8.835e-02, -7.862e-02)); + r += mul(s3_0, M4(5.343e-02, 2.287e-02, -5.620e-02, -2.606e-02, -2.511e-01, -7.720e-02, 9.825e-02, -2.432e-01, -5.303e-02, 2.462e-02, -1.625e-01, 3.261e-02, -3.086e-02, -6.193e-02, -1.012e-02, 1.096e-01)); + r += mul(s3_1, M4(-7.270e-02, 1.129e-01, 5.661e-02, 3.421e-02, 2.881e-01, -1.025e-01, 1.285e-02, -9.748e-02, -1.924e-02, 1.131e-01, 1.275e-01, 7.935e-02, 1.309e-01, -2.821e-02, 7.106e-02, 4.430e-02)); + r += mul(s3_2, M4(1.970e-02, -1.399e-01, 1.240e-01, 1.530e-01, -7.961e-02, -2.087e-02, -8.064e-02, -2.028e-01, -2.397e-02, 3.444e-02, -3.574e-02, 2.877e-02, 3.491e-02, 1.069e-01, 4.066e-02, -1.734e-02)); + r += mul(s3_3, M4(-7.098e-02, -3.545e-02, -2.872e-01, 4.210e-02, -2.199e-01, -1.064e-01, 1.770e-02, 1.409e-01, 1.472e-01, 6.215e-02, 3.474e-02, 8.492e-03, -1.342e-01, -5.386e-02, -7.196e-02, -1.235e-01)); + r += mul(s3_4, M4(2.290e-01, 2.270e-01, -3.990e-02, 1.824e-01, 1.359e-01, -6.382e-03, 2.662e-01, 1.919e-01, 1.891e-01, -7.892e-02, 1.116e-03, -2.814e-02, -2.490e-03, -8.516e-02, 6.357e-04, -1.859e-02)); + r += mul(s3_5, M4(7.154e-03, -7.141e-02, -5.607e-02, 8.124e-02, 1.304e-01, 6.471e-02, 5.742e-02, -7.536e-02, 8.766e-02, 8.102e-02, 4.359e-02, -7.945e-02, 8.818e-02, 5.471e-02, -9.640e-02, -2.967e-02)); + r += mul(s3_6, M4(3.370e-02, 1.582e-02, 2.462e-01, 8.418e-03, -1.115e-01, -1.536e-01, 2.464e-01, 4.604e-02, -1.071e-01, 1.075e-01, 4.826e-02, -6.138e-02, -1.089e-01, 1.311e-01, -1.740e-02, 1.328e-01)); + r += mul(s3_7, M4(4.537e-03, 8.950e-02, -2.725e-02, 6.522e-02, -1.668e-01, -9.552e-03, -4.403e-02, 2.931e-02, -2.110e-02, -4.998e-02, 7.868e-02, -6.865e-02, -2.296e-04, -1.233e-01, 1.752e-02, 9.049e-02)); + r += mul(s3_8, M4(7.438e-02, 1.333e-01, -5.143e-02, 7.645e-02, -1.764e-03, -1.389e-03, 9.569e-02, 8.844e-02, 6.421e-02, 8.276e-02, -3.275e-03, 5.360e-02, -2.844e-02, 2.104e-02, 1.068e-02, 4.859e-02)); + r += mul(s4_0, M4(-4.425e-02, -5.100e-03, -7.722e-03, -7.258e-02, -9.065e-02, -8.428e-02, -1.687e-01, -2.043e-01, -1.566e-02, 3.501e-02, -1.156e-01, 3.190e-02, -5.824e-01, 2.117e-01, -5.630e-01, 3.386e-01)); + r += mul(s4_1, M4(-2.990e-02, 2.483e-02, -3.320e-02, -3.348e-02, 3.499e-02, 8.377e-02, 1.786e-02, -1.637e-01, -5.283e-02, 4.014e-02, 6.757e-03, 1.080e-01, -1.626e-01, 2.261e-01, -7.782e-02, 8.233e-02)); + r += mul(s4_2, M4(-7.580e-02, 1.262e-01, 4.438e-03, 2.820e-02, -6.883e-02, 2.340e-01, -1.141e-01, -4.504e-02, -1.191e-01, -1.533e-02, 5.940e-02, 7.691e-02, 1.531e-01, 1.699e-02, -4.712e-02, 5.288e-02)); + r += mul(s4_3, M4(2.044e-02, 5.780e-02, 3.308e-02, 2.426e-01, 3.624e-02, -8.043e-02, -1.236e-01, -6.800e-04, 2.831e-02, -4.539e-02, 2.218e-01, -1.209e-01, -3.183e-01, -5.320e-02, -1.178e-01, -4.580e-01)); + r += mul(s4_4, M4(-6.078e-02, -1.130e-02, 7.346e-02, -2.086e-01, 4.780e-03, 2.893e-01, -3.576e-02, 7.024e-02, -2.882e-02, 3.997e-02, -7.593e-02, -1.394e-01, 1.736e-01, 3.615e-01, -4.872e-02, -2.340e-01)); + r += mul(s4_5, M4(-1.184e-01, 1.305e-02, 1.259e-01, -2.960e-02, -8.708e-02, 2.380e-01, 9.214e-02, -4.176e-02, 2.124e-02, 2.160e-02, -1.528e-01, -8.060e-02, -1.276e-01, 3.907e-02, -1.044e-01, -2.006e-01)); + r += mul(s4_6, M4(-8.076e-02, 7.261e-02, 1.449e-02, -3.624e-02, -1.490e-01, -2.071e-01, -4.868e-02, -9.661e-02, -1.495e-04, -7.154e-02, -5.241e-02, -1.307e-02, -4.913e-02, -1.356e-01, -2.297e-01, 2.350e-01)); + r += mul(s4_7, M4(-7.200e-02, 2.000e-01, 7.164e-02, -4.626e-02, -2.369e-02, 1.921e-01, 1.534e-02, -8.373e-02, -4.089e-02, 1.648e-02, 7.972e-02, -4.851e-02, 1.396e-02, -7.280e-02, -2.005e-02, 2.399e-01)); + r += mul(s4_8, M4(7.851e-02, 1.162e-01, -1.663e-01, -8.951e-02, 9.440e-02, 3.697e-02, -3.425e-02, -3.302e-02, 4.576e-02, -1.236e-02, 3.493e-02, -6.735e-02, -1.672e-02, 3.621e-03, 4.750e-02, 1.247e-01)); + r += mul(s5_0, M4(2.875e-02, -1.459e-02, -7.892e-02, -3.161e-02, -6.149e-03, -3.703e-02, 5.330e-02, -9.264e-03, -1.884e-01, 1.468e-01, -1.175e-01, -3.640e-02, 1.763e-02, -8.086e-03, -9.649e-02, -2.832e-03)); + r += mul(s5_1, M4(5.752e-02, -8.568e-02, 2.230e-02, -2.336e-02, -2.539e-02, -1.665e-01, 9.407e-02, 2.244e-02, -1.082e-01, 4.609e-01, -1.135e-01, -2.743e-01, 2.130e-02, -9.405e-03, -2.384e-02, 1.934e-02)); + r += mul(s5_2, M4(9.920e-02, -4.490e-02, -7.451e-03, 4.300e-02, -5.757e-02, 4.409e-02, 4.955e-02, 2.940e-02, 5.362e-02, 1.196e-01, 7.804e-02, -3.020e-01, 4.768e-02, -3.637e-02, -3.597e-02, 7.883e-02)); + r += mul(s5_3, M4(1.687e-02, 6.373e-02, -1.770e-01, 6.395e-02, 5.443e-02, -7.987e-03, -7.952e-02, 1.693e-02, 4.316e-02, 4.812e-02, 1.539e-01, 1.209e-01, 6.093e-02, 7.547e-02, -8.094e-02, -7.412e-02)); + r += mul(s5_4, M4(2.046e-01, -4.792e-02, -3.553e-02, -5.000e-02, 1.285e-02, 1.536e-01, 2.418e-02, 3.139e-02, -1.963e-02, 1.322e-01, -2.059e-01, 1.032e-01, 3.493e-02, -1.613e-01, 8.348e-03, -9.059e-02)); + r += mul(s5_5, M4(4.828e-03, -5.654e-02, 2.644e-02, -1.046e-01, -1.364e-01, -1.183e-02, 4.386e-02, -1.404e-02, 4.281e-02, 2.549e-02, -1.399e-01, 3.556e-02, -1.223e-02, 4.599e-02, 7.197e-02, 9.005e-03)); + r += mul(s5_6, M4(-3.068e-02, 1.637e-02, -3.605e-02, -2.284e-02, -8.174e-02, -8.312e-03, -1.846e-02, 6.306e-02, 1.487e-01, 2.973e-02, 4.767e-02, -1.547e-01, -3.608e-02, 5.432e-02, -1.291e-01, -6.812e-02)); + r += mul(s5_7, M4(8.072e-02, -8.949e-02, 1.318e-01, 3.961e-03, 5.239e-02, 7.092e-02, 5.027e-02, 8.323e-02, -8.531e-02, -9.251e-02, -1.205e-01, 1.944e-02, 3.560e-02, 1.352e-02, -7.741e-03, 9.714e-02)); + r += mul(s5_8, M4(-5.957e-03, -4.712e-02, -8.004e-02, 1.380e-03, 1.257e-02, -1.039e-01, 8.539e-02, 2.713e-02, 1.772e-01, -2.501e-01, -4.281e-02, -5.421e-02, -4.765e-02, -1.701e-02, -4.210e-03, -5.724e-02)); + r += mul(s6_0, M4(3.248e-01, -8.173e-02, -1.495e-02, 2.037e-01, 1.379e-04, -5.002e-02, 3.511e-02, 3.888e-02, -7.696e-02, 3.776e-02, -7.068e-02, 3.647e-02, -1.763e-01, -5.525e-02, -1.653e-03, -3.789e-02)); + r += mul(s6_1, M4(1.581e-01, 1.166e-01, -1.430e-01, 3.662e-01, 8.923e-03, -6.097e-02, -1.081e-01, 4.020e-03, 5.912e-02, -1.077e-01, 2.633e-02, 3.322e-02, -9.291e-03, -1.720e-01, -7.245e-02, 4.105e-02)); + r += mul(s6_2, M4(-2.491e-02, 7.043e-02, -2.281e-02, 9.317e-02, 5.158e-04, 2.068e-02, 3.190e-02, -2.739e-02, -2.196e-02, 6.502e-02, -3.907e-02, -7.474e-02, 2.471e-02, -1.875e-01, 2.229e-01, -3.467e-01)); + r += mul(s6_3, M4(1.787e-01, -1.382e-01, 1.516e-01, -7.465e-02, 1.683e-01, -1.877e-02, 1.229e-01, 5.404e-04, -4.836e-02, -6.131e-02, 1.219e-01, -6.233e-02, 8.620e-02, -5.384e-02, -1.505e-01, 1.688e-01)); + r += mul(s6_4, M4(-3.181e-02, 1.413e-01, 1.572e-01, -1.831e-01, -1.209e-01, -7.400e-02, -9.066e-02, -3.121e-02, 5.917e-02, -2.963e-02, -2.592e-02, 3.676e-02, 8.648e-02, 1.320e-01, -2.344e-01, -2.189e-01)); + r += mul(s6_5, M4(-5.787e-02, -3.066e-02, 1.830e-01, 4.347e-02, -8.943e-02, 7.870e-03, -2.022e-03, 2.099e-01, -1.212e-02, 6.373e-03, -1.052e-01, 6.339e-02, 3.479e-01, 1.135e-01, -3.867e-02, 1.417e-01)); + r += mul(s6_6, M4(-6.096e-02, -8.908e-02, 9.118e-02, 2.249e-02, 1.004e-01, 9.778e-02, 1.445e-01, 4.343e-02, 8.523e-02, -1.219e-02, 9.168e-02, 3.133e-02, -8.427e-02, -8.851e-02, -7.757e-02, -1.208e-01)); + r += mul(s6_7, M4(3.253e-02, -5.491e-05, 8.328e-03, 5.271e-02, 1.037e-02, -4.053e-02, 1.853e-02, 2.653e-06, 7.625e-02, -8.625e-02, 4.421e-02, 2.075e-03, 2.808e-02, -3.310e-02, -3.273e-01, -1.623e-01)); + r += mul(s6_8, M4(1.744e-01, 8.677e-02, -1.544e-01, 6.717e-02, -3.480e-02, 3.707e-02, 2.683e-02, -6.244e-03, -6.392e-02, 7.593e-02, 2.281e-02, -5.920e-02, -2.599e-01, -2.319e-01, 1.206e-01, -1.286e-02)); + r += mul(s7_0, M4(-7.140e-02, -8.283e-03, -3.386e-03, 6.842e-03, 5.496e-02, 1.178e-01, -8.400e-02, 6.142e-02, -9.335e-02, 2.164e-02, -5.461e-02, -1.402e-02, 7.100e-03, 2.545e-02, -7.884e-02, -3.492e-03)); + r += mul(s7_1, M4(1.466e-01, -1.069e-01, -7.955e-02, 6.205e-02, -5.911e-03, -2.119e-02, -1.017e-01, 5.296e-02, 1.210e-01, 1.498e-01, 1.012e-01, 4.255e-01, 9.488e-02, 5.354e-02, 2.343e-02, 8.102e-02)); + r += mul(s7_2, M4(4.775e-02, 9.955e-02, -5.355e-02, -2.145e-02, -7.153e-02, -6.200e-02, 9.512e-02, 6.380e-02, -6.797e-02, 8.805e-02, -5.872e-03, 1.400e-01, -2.788e-02, -1.014e-01, 1.960e-02, 1.287e-01)); + r += mul(s7_3, M4(-9.796e-02, 2.194e-02, 2.270e-02, -7.145e-02, 1.491e-01, 3.151e-02, 6.959e-02, -5.975e-03, 5.773e-02, -2.723e-02, 5.596e-02, -1.518e-01, -5.953e-03, 1.305e-01, -1.814e-02, -1.050e-02)); + r += mul(s7_4, M4(-1.528e-01, -1.884e-01, 9.503e-02, -1.620e-02, -4.942e-02, 1.145e-01, -3.125e-02, -4.450e-02, 8.714e-02, 8.496e-02, 1.369e-02, -1.238e-01, 8.809e-02, 1.352e-01, 6.701e-02, 9.438e-02)); + r += mul(s7_5, M4(-1.206e-02, 5.146e-02, 1.828e-02, 9.465e-02, -8.744e-02, 5.812e-02, 7.547e-02, -1.000e-01, -1.203e-01, 2.384e-01, 2.556e-01, -1.591e-01, -2.986e-02, 5.438e-02, -2.465e-03, -9.818e-02)); + r += mul(s7_6, M4(4.579e-02, 2.161e-02, -6.215e-02, -1.440e-01, -6.273e-02, 6.860e-02, 9.184e-02, -6.930e-02, -1.286e-01, -6.701e-02, -1.015e-01, 2.166e-02, -1.538e-03, -2.507e-02, -1.409e-03, -1.052e-02)); + r += mul(s7_7, M4(-7.298e-02, 4.161e-02, 7.292e-02, -3.918e-02, -2.996e-02, 9.260e-02, -5.889e-02, -3.589e-02, 1.480e-01, -9.639e-02, 7.466e-02, -1.959e-02, -3.891e-02, 1.057e-01, 2.922e-02, -1.112e-02)); + r += mul(s7_8, M4(1.070e-02, 9.460e-02, -7.803e-02, 4.756e-02, 8.328e-02, 9.591e-03, -4.323e-02, 2.402e-02, -6.350e-02, 1.936e-01, -1.345e-01, -2.261e-03, -4.487e-02, -1.049e-01, 1.739e-02, -3.981e-02)); + r += V4(8.304e-03, -2.532e-04, -1.555e-02, -1.432e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.078e-01, 2.929e-02, -4.399e-02, 2.702e-02, 6.722e-02, -1.047e-02, -1.084e-01, 2.719e-02, -4.003e-03, 1.428e-01, -1.008e-02, 5.808e-02, 1.081e-01, 3.698e-02, -4.159e-03, -1.861e-01)); + r += mul(s0_1, M4(8.120e-02, 2.297e-02, 4.784e-02, -8.514e-02, -5.841e-02, 1.494e-02, 3.262e-02, 1.257e-01, -1.968e-01, -7.450e-02, -2.756e-02, 6.421e-02, -5.311e-02, 1.711e-02, -1.783e-01, 3.959e-02)); + r += mul(s0_2, M4(-1.281e-02, 1.102e-01, 3.957e-02, 6.161e-02, -8.052e-02, 6.213e-02, -8.612e-02, -6.793e-02, 6.211e-03, -2.778e-02, -1.110e-03, -5.723e-02, 2.321e-02, 2.555e-02, -3.891e-02, -1.486e-01)); + r += mul(s0_3, M4(-1.464e-01, 7.861e-02, -1.260e-01, 5.190e-02, 8.147e-03, -6.885e-03, -8.846e-03, -3.854e-02, -1.392e-01, 2.317e-01, -3.088e-02, 1.164e-01, -1.851e-01, -1.803e-01, -1.080e-01, -1.995e-01)); + r += mul(s0_4, M4(1.864e-02, 1.735e-01, -5.772e-02, -7.150e-02, -7.699e-02, 1.665e-01, 3.301e-02, 1.179e-01, 1.048e-03, -9.361e-02, -6.161e-02, 1.365e-01, -7.781e-02, -1.322e-01, 5.536e-02, -1.287e-01)); + r += mul(s0_5, M4(-4.569e-02, -1.306e-01, 2.393e-02, 2.645e-02, 7.168e-04, -7.162e-02, -1.541e-01, -1.313e-01, 1.223e-01, -3.704e-02, -7.348e-02, 1.347e-03, 2.109e-01, 2.540e-02, 1.008e-01, 2.662e-02)); + r += mul(s0_6, M4(-1.176e-01, 1.405e-02, -1.864e-01, -9.889e-02, -8.677e-03, 1.556e-02, 4.215e-02, 4.996e-03, -7.073e-02, -1.384e-01, 4.933e-02, -1.048e-01, -2.920e-01, 1.230e-04, -1.791e-02, -1.323e-01)); + r += mul(s0_7, M4(-2.167e-02, -7.215e-03, -7.007e-02, -4.031e-02, 4.548e-03, 1.588e-02, -1.684e-02, 5.793e-02, 1.591e-01, -1.459e-01, -3.494e-02, -1.860e-01, -4.195e-02, 6.560e-03, -3.848e-02, -2.633e-01)); + r += mul(s0_8, M4(-1.739e-01, -1.198e-02, 1.739e-02, -2.050e-02, -5.031e-02, -1.678e-02, 7.162e-02, -1.646e-01, 3.139e-01, 1.098e-02, 7.505e-02, -1.072e-01, -8.199e-02, 6.768e-02, -1.450e-01, -2.652e-02)); + r += mul(s1_0, M4(7.592e-02, -5.963e-03, 6.044e-02, 1.364e-02, 1.071e-01, -9.543e-02, -7.565e-02, -1.106e-01, 2.125e-02, 5.735e-02, 6.093e-02, 1.055e-01, 1.116e-01, -7.962e-03, -3.009e-03, -6.391e-02)); + r += mul(s1_1, M4(-7.453e-04, 7.908e-02, 4.528e-02, -6.497e-02, -2.141e-01, 2.905e-02, 8.608e-02, 1.562e-02, -1.183e-01, -3.749e-03, -2.178e-02, 4.636e-02, 6.602e-02, 4.420e-02, -2.969e-02, 9.028e-02)); + r += mul(s1_2, M4(3.744e-02, -1.737e-02, 1.961e-02, 7.493e-02, 4.715e-02, 3.037e-02, 5.145e-03, 6.723e-02, -4.315e-02, -4.398e-02, 9.488e-02, -6.029e-02, 9.366e-02, 1.707e-03, -6.647e-02, -7.819e-02)); + r += mul(s1_3, M4(-4.314e-02, -4.926e-02, -4.821e-02, -8.898e-02, -1.631e-01, -7.705e-02, 4.109e-02, 8.442e-02, -1.584e-02, 4.423e-02, -6.416e-02, 2.429e-02, 7.325e-03, 1.435e-02, 1.782e-02, 2.860e-02)); + r += mul(s1_4, M4(5.343e-02, -4.696e-02, -3.947e-03, -1.846e-01, -3.936e-01, 2.581e-01, 3.814e-01, 2.552e-01, -1.361e-01, 6.894e-02, -6.958e-02, 7.073e-02, -1.028e-01, 1.209e-01, 1.421e-01, 3.052e-02)); + r += mul(s1_5, M4(-8.137e-02, -4.554e-02, -2.349e-02, 7.019e-02, 2.163e-01, -5.918e-02, 9.712e-02, 2.483e-01, 2.473e-02, 1.502e-02, -6.365e-02, -1.746e-01, -4.358e-02, 4.468e-02, -1.093e-01, 2.473e-02)); + r += mul(s1_6, M4(6.200e-02, -1.142e-01, 7.615e-02, -8.188e-03, 8.007e-02, -9.960e-02, 1.362e-01, -1.422e-02, -1.122e-01, -2.014e-02, -1.420e-01, -6.422e-02, 5.698e-02, 2.293e-02, 1.661e-02, 4.720e-02)); + r += mul(s1_7, M4(-4.076e-02, -4.555e-02, 3.997e-02, -4.453e-02, -9.484e-02, -2.221e-04, 8.423e-02, 7.494e-02, 2.933e-02, -1.380e-01, -4.507e-02, -5.689e-02, -6.361e-02, 4.682e-02, 7.517e-02, 2.378e-02)); + r += mul(s1_8, M4(5.169e-02, 3.204e-02, -7.214e-02, 2.993e-02, 4.542e-02, 2.339e-02, 1.878e-01, -7.268e-02, 1.460e-01, 1.770e-02, 4.084e-02, 6.963e-02, 2.724e-02, 1.184e-03, 8.351e-02, 1.108e-02)); + r += mul(s2_0, M4(-1.266e-02, -4.814e-02, 1.091e-01, 4.787e-02, 2.054e-03, -1.421e-01, -2.946e-02, -1.552e-02, 2.241e-02, 1.130e-01, -1.465e-02, 3.170e-03, -2.013e-01, 1.541e-01, -4.007e-02, -5.746e-02)); + r += mul(s2_1, M4(6.318e-02, 1.825e-01, 5.007e-02, 6.417e-02, -8.822e-02, -1.965e-01, -8.122e-02, 8.083e-02, 1.451e-01, 9.234e-02, 1.458e-02, 3.394e-03, 1.371e-01, -1.556e-01, 3.232e-03, -3.001e-01)); + r += mul(s2_2, M4(-1.333e-01, 4.256e-02, 8.831e-03, 6.197e-02, 7.894e-03, -7.217e-02, 7.846e-02, -1.896e-02, -5.892e-02, 1.073e-01, 1.912e-03, -3.306e-02, 1.492e-01, 6.274e-02, -1.672e-01, -1.554e-01)); + r += mul(s2_3, M4(-1.282e-02, 9.536e-02, 1.442e-03, 5.785e-02, -8.516e-02, 8.072e-03, -7.036e-02, -1.333e-01, -1.173e-01, 2.519e-02, 2.043e-01, 3.421e-03, -9.203e-02, 1.001e-01, -5.736e-02, 2.982e-01)); + r += mul(s2_4, M4(1.993e-03, 1.229e-01, 5.297e-02, -1.077e-01, 1.427e-01, 8.361e-02, 2.030e-01, 5.532e-02, 1.776e-02, -5.811e-02, 6.569e-03, -1.229e-01, -8.695e-03, -6.819e-03, 1.661e-01, 2.228e-01)); + r += mul(s2_5, M4(-4.533e-02, -1.245e-02, 3.212e-02, -5.504e-02, 8.230e-02, 5.109e-02, 1.173e-02, -9.066e-02, -1.094e-01, 7.413e-03, 8.526e-02, 3.235e-02, -4.342e-03, 3.551e-02, 1.901e-01, -5.356e-02)); + r += mul(s2_6, M4(8.229e-02, 9.509e-04, -2.815e-02, -4.377e-02, 3.677e-02, -3.288e-02, 3.554e-02, -2.960e-02, 1.177e-01, -3.922e-02, 1.236e-01, 4.912e-02, -7.920e-02, 4.889e-03, 6.535e-02, 5.023e-03)); + r += mul(s2_7, M4(9.450e-02, -4.877e-02, 2.180e-02, 5.390e-02, -7.268e-02, 5.576e-02, 7.790e-02, 7.471e-02, 1.869e-01, 4.203e-02, 3.673e-02, -2.178e-02, 3.943e-03, -7.662e-03, -2.875e-02, -7.668e-02)); + r += mul(s2_8, M4(-8.520e-02, -7.359e-02, -1.834e-02, -3.821e-02, -9.685e-02, 3.117e-02, 8.558e-02, 6.373e-02, 2.420e-02, -5.133e-02, 1.106e-01, -2.596e-02, 2.014e-02, 5.579e-02, 1.153e-02, 1.782e-01)); + r += mul(s3_0, M4(1.409e-01, -2.383e-02, 8.639e-02, -3.019e-02, 1.619e-02, -1.529e-01, -1.008e-01, -1.410e-02, -3.718e-03, 3.788e-03, -4.983e-02, -1.016e-02, 1.156e-02, 4.953e-05, 3.956e-02, -2.179e-02)); + r += mul(s3_1, M4(-7.084e-02, 4.899e-02, 6.816e-03, -2.448e-01, -7.345e-02, -3.557e-01, 7.551e-02, -8.657e-03, 6.795e-02, 5.192e-02, -3.077e-02, 1.232e-01, 2.197e-02, -5.256e-02, 4.122e-03, 1.707e-02)); + r += mul(s3_2, M4(-2.774e-01, 5.557e-02, 1.514e-01, 2.173e-02, 1.189e-01, 1.642e-02, 8.372e-02, -2.661e-01, 3.143e-02, 3.410e-02, 4.487e-02, -1.723e-02, 9.291e-02, -2.441e-02, -1.115e-01, -2.501e-02)); + r += mul(s3_3, M4(-1.631e-01, 8.468e-02, 1.434e-01, 1.023e-01, -3.229e-02, -3.164e-02, -2.104e-01, -3.320e-02, -1.149e-01, -3.901e-02, -6.834e-02, -1.947e-02, 1.840e-02, 1.051e-01, -4.235e-02, 2.019e-02)); + r += mul(s3_4, M4(-1.755e-01, 2.059e-01, 9.597e-02, -2.928e-02, 8.855e-02, 1.821e-02, 8.755e-02, 1.028e-01, -8.825e-02, -1.528e-01, -1.426e-01, 4.698e-02, -4.039e-02, 5.968e-02, 1.375e-01, 1.348e-02)); + r += mul(s3_5, M4(-2.542e-01, 4.282e-02, -1.192e-03, 4.731e-02, 2.510e-02, 1.486e-01, -1.245e-02, -5.749e-02, -9.773e-02, 6.700e-02, -1.882e-01, -4.323e-02, 1.196e-01, -4.808e-02, -1.725e-02, -4.272e-02)); + r += mul(s3_6, M4(4.213e-03, -7.704e-03, 3.259e-02, -2.062e-01, 1.587e-01, 1.062e-01, -1.548e-02, -3.156e-01, -1.016e-02, -3.972e-02, -4.383e-02, -2.585e-02, -5.732e-02, 1.351e-02, -2.787e-02, -9.469e-02)); + r += mul(s3_7, M4(-6.842e-02, -5.412e-02, -1.469e-02, -8.877e-02, -4.274e-02, 7.221e-02, 2.691e-02, 7.001e-02, 1.141e-01, 1.915e-03, -4.521e-02, 4.300e-04, 4.757e-02, 8.053e-02, 8.162e-02, 3.804e-02)); + r += mul(s3_8, M4(4.241e-02, -6.491e-03, -3.866e-02, 4.730e-02, 6.733e-02, 6.034e-02, 2.661e-02, 1.440e-01, -2.978e-03, 2.585e-02, -2.838e-02, 1.062e-02, -1.446e-02, 7.391e-02, 2.640e-02, -3.836e-02)); + r += mul(s4_0, M4(5.677e-02, -2.280e-01, -2.451e-02, -2.647e-02, -1.017e-03, 3.461e-02, -3.574e-02, -3.984e-02, 6.191e-02, 4.968e-02, 4.937e-02, 2.930e-02, -1.373e-01, -1.053e-01, -1.538e-01, 9.222e-02)); + r += mul(s4_1, M4(3.276e-02, 6.180e-02, 1.373e-01, 1.933e-01, 5.629e-02, 5.820e-02, 6.219e-02, -7.410e-02, -6.342e-02, 1.499e-01, -1.541e-02, -3.026e-02, 4.615e-01, 1.580e-01, 7.077e-02, -1.236e-01)); + r += mul(s4_2, M4(9.957e-02, 1.757e-03, -2.521e-02, -1.223e-01, 2.074e-01, 1.008e-02, -5.914e-02, 2.410e-02, 2.634e-02, 1.131e-01, 8.216e-02, 3.450e-02, 7.552e-02, 1.194e-01, -1.231e-01, 1.073e-01)); + r += mul(s4_3, M4(-1.172e-02, -4.300e-02, -3.565e-03, -1.941e-02, 3.217e-02, 7.148e-02, -8.495e-02, 5.670e-02, -2.033e-02, -1.364e-01, 3.429e-02, -2.695e-02, 5.680e-01, -8.805e-02, -1.195e-01, 5.252e-01)); + r += mul(s4_4, M4(-2.671e-02, -3.016e-01, -3.329e-02, 6.906e-02, 9.063e-02, 8.925e-02, -1.415e-01, -1.619e-01, -9.870e-02, -2.667e-01, -2.157e-02, 5.647e-02, -2.261e-01, 2.456e-01, -6.064e-02, 6.518e-02)); + r += mul(s4_5, M4(2.888e-02, -5.659e-02, 1.779e-01, 4.198e-02, 6.701e-02, 5.891e-02, -1.460e-01, 2.943e-01, 6.448e-02, -6.878e-02, -4.030e-02, -1.031e-01, 1.665e-01, 5.578e-02, 1.881e-02, 1.418e-01)); + r += mul(s4_6, M4(-1.263e-01, -2.191e-01, 1.366e-01, 1.068e-01, -5.129e-02, -6.460e-02, 5.811e-02, -6.006e-02, -2.854e-02, 2.742e-02, 8.994e-03, -1.187e-02, -7.366e-01, -2.287e-01, -6.164e-01, 1.882e-02)); + r += mul(s4_7, M4(2.390e-01, -8.987e-02, 8.313e-02, 2.841e-01, -1.874e-01, 1.459e-01, -2.012e-01, -4.406e-02, -1.596e-01, 4.841e-02, -1.880e-02, -6.834e-02, 2.578e-02, 1.440e-01, 2.849e-01, -1.272e-01)); + r += mul(s4_8, M4(3.170e-02, 3.228e-02, 6.340e-02, 2.932e-02, -6.048e-02, 3.031e-02, -7.810e-02, -4.953e-02, -1.134e-01, 2.523e-02, 1.023e-01, 6.335e-02, -1.582e-01, 2.533e-03, -1.131e-01, -6.075e-03)); + r += mul(s5_0, M4(4.625e-02, -3.412e-04, -1.077e-02, 9.450e-03, -1.525e-04, 2.312e-02, 2.327e-02, 1.667e-02, 3.442e-03, 1.683e-01, -3.592e-02, 8.428e-03, -1.301e-02, -1.088e-02, 1.010e-01, 9.448e-02)); + r += mul(s5_1, M4(3.945e-02, 2.023e-01, 1.848e-02, 4.163e-02, 5.773e-02, -2.633e-02, 7.292e-02, 1.495e-02, 5.110e-02, -9.728e-03, 3.200e-02, 3.356e-02, -4.969e-02, 1.035e-01, 3.067e-02, -5.841e-03)); + r += mul(s5_2, M4(-1.150e-01, 5.962e-02, 2.455e-03, -6.986e-02, 7.313e-02, -9.049e-02, 8.998e-02, -3.317e-02, -6.195e-02, 1.728e-01, -1.896e-02, 2.274e-02, 1.186e-02, 1.671e-02, -4.866e-02, -1.200e-03)); + r += mul(s5_3, M4(8.141e-03, 6.878e-02, -2.976e-02, 2.621e-02, 1.784e-01, 1.248e-02, -6.853e-03, -1.927e-03, 2.433e-01, 7.125e-02, -5.810e-02, -2.757e-01, -9.888e-02, -7.193e-02, 7.867e-03, 2.181e-02)); + r += mul(s5_4, M4(-2.204e-02, -1.130e-01, -7.535e-02, -2.516e-02, 1.713e-01, 5.264e-03, 4.772e-02, -5.924e-02, -2.486e-01, 1.055e-01, 1.328e-01, -2.502e-02, 5.369e-02, 6.078e-03, 1.386e-01, -6.224e-02)); + r += mul(s5_5, M4(-3.905e-02, 5.261e-02, -2.165e-03, -1.201e-01, 1.096e-05, -9.632e-02, 9.782e-02, -8.054e-02, -1.451e-01, 5.762e-02, 1.816e-01, 3.270e-01, 1.616e-01, -2.044e-02, -4.551e-02, 4.272e-02)); + r += mul(s5_6, M4(-8.485e-02, -1.450e-01, -3.045e-02, 1.074e-01, 2.179e-03, -3.846e-02, -1.863e-02, 1.871e-02, 5.108e-02, 6.767e-02, 1.797e-01, 1.684e-02, -6.264e-02, -1.048e-01, 7.191e-02, -6.097e-02)); + r += mul(s5_7, M4(-2.142e-02, 4.408e-02, 2.998e-02, -1.032e-01, -1.759e-01, -7.314e-02, -1.102e-01, -1.872e-02, 2.373e-01, -1.447e-01, 2.624e-03, -4.420e-02, 1.146e-01, 2.988e-02, 4.577e-02, -3.427e-02)); + r += mul(s5_8, M4(-6.060e-02, 1.362e-02, 2.039e-02, 1.276e-02, 1.533e-01, 7.360e-03, 4.864e-02, -1.255e-01, 1.827e-01, 1.825e-01, -6.931e-03, -4.778e-02, -6.961e-03, -9.526e-02, 7.587e-02, 3.179e-02)); + r += mul(s6_0, M4(-1.902e-01, -5.660e-02, -4.673e-02, -3.461e-02, -8.106e-02, -1.420e-03, -5.476e-02, -3.773e-02, 5.253e-02, 1.304e-01, 3.423e-02, 5.212e-02, -1.079e-01, 3.761e-02, -1.416e-02, 1.585e-01)); + r += mul(s6_1, M4(-6.599e-02, 2.471e-02, -1.303e-01, 4.336e-02, -1.889e-02, 1.351e-02, -1.452e-02, 1.405e-02, -9.896e-02, -5.456e-02, 9.939e-02, -5.145e-02, -3.063e-01, 2.081e-01, -3.441e-01, -1.881e-01)); + r += mul(s6_2, M4(4.816e-02, 3.052e-02, -1.526e-03, 4.331e-02, 3.285e-02, -4.175e-03, 2.715e-02, 9.212e-03, 8.280e-02, -4.270e-02, 7.012e-02, -1.039e-01, 5.353e-02, 1.396e-01, 1.261e-01, 1.622e-02)); + r += mul(s6_3, M4(2.246e-02, -2.545e-02, 9.849e-03, -2.770e-02, -1.433e-01, 5.388e-02, 3.819e-03, 7.418e-02, -9.538e-02, -3.039e-02, 2.717e-02, 3.077e-02, -9.002e-02, -4.116e-02, 1.307e-01, 2.951e-02)); + r += mul(s6_4, M4(-4.712e-02, -1.267e-01, 1.696e-01, 2.844e-01, 1.727e-01, 8.386e-02, -4.416e-02, -6.215e-02, -7.953e-02, 3.334e-02, -1.312e-01, -3.992e-03, -4.352e-01, -3.749e-02, -1.048e-01, -1.854e-01)); + r += mul(s6_5, M4(1.837e-01, 1.530e-01, 2.316e-03, 1.271e-03, -4.068e-02, 6.315e-02, 1.070e-01, -2.652e-02, 2.268e-03, 5.209e-02, -4.338e-02, -8.105e-02, -3.716e-01, 3.706e-01, -4.535e-01, 4.897e-02)); + r += mul(s6_6, M4(-1.581e-01, 2.024e-02, 1.671e-02, 6.905e-02, 9.779e-02, 3.306e-03, -3.781e-02, 5.754e-02, 1.739e-02, 9.850e-02, -3.945e-02, -4.551e-02, 6.723e-02, -1.263e-01, 1.761e-01, -6.037e-03)); + r += mul(s6_7, M4(7.310e-04, 4.816e-02, -1.714e-01, -7.055e-02, -3.067e-03, -6.569e-02, 1.492e-01, 8.049e-02, -6.071e-02, 3.919e-02, 8.535e-02, 1.471e-01, 1.040e-01, -6.098e-02, -1.973e-01, -6.420e-02)); + r += mul(s6_8, M4(3.342e-02, 1.310e-01, -4.144e-02, 1.077e-01, 1.826e-01, -3.139e-02, -5.015e-02, -2.887e-02, 5.592e-03, -1.517e-02, -7.853e-02, 1.861e-02, 2.662e-03, 3.045e-01, -2.314e-01, 6.279e-01)); + r += mul(s7_0, M4(5.693e-02, 8.945e-02, 2.642e-02, 1.688e-01, -1.557e-01, -5.788e-02, -5.250e-02, -8.901e-02, 2.862e-01, -7.066e-02, 1.859e-02, 2.941e-01, 5.849e-02, -6.006e-02, -1.287e-02, 3.286e-02)); + r += mul(s7_1, M4(-4.778e-02, -5.997e-02, -9.390e-02, -6.606e-02, 6.563e-02, 3.327e-02, 9.178e-02, -2.724e-02, -3.661e-01, -2.215e-02, -1.827e-01, 1.863e-01, -4.644e-02, -1.734e-02, -6.994e-02, 3.454e-02)); + r += mul(s7_2, M4(-4.597e-02, -9.081e-02, 5.254e-03, -8.460e-03, -5.847e-02, -2.472e-02, 6.248e-02, -6.373e-02, 2.489e-01, -5.080e-02, 1.111e-01, -3.054e-01, -1.587e-01, 1.984e-02, 7.628e-02, -2.618e-02)); + r += mul(s7_3, M4(-8.152e-02, -9.300e-02, 2.128e-01, 1.161e-01, 2.329e-01, -1.434e-02, 1.939e-01, -1.737e-01, 1.001e-01, 4.390e-02, -1.567e-02, 7.235e-02, 9.783e-02, -1.181e-02, -1.044e-01, -3.693e-03)); + r += mul(s7_4, M4(6.171e-02, 9.395e-02, -1.146e-02, -1.124e-01, 5.917e-02, 1.169e-02, -1.257e-01, 1.246e-01, 2.802e-02, -4.437e-02, 1.409e-01, -2.917e-02, -7.506e-02, -6.381e-03, 9.039e-02, -1.398e-01)); + r += mul(s7_5, M4(-1.411e-01, -6.319e-02, 3.546e-02, -2.351e-02, 3.979e-02, -3.030e-02, 6.296e-03, -4.836e-02, -4.265e-03, -2.079e-01, 1.140e-01, 1.732e-02, -1.823e-01, 2.664e-02, 9.901e-02, -4.536e-02)); + r += mul(s7_6, M4(-7.358e-02, -4.715e-03, 3.243e-03, 1.332e-01, 1.156e-01, 2.452e-02, 2.582e-01, -8.717e-02, 5.893e-02, -6.813e-02, -1.045e-01, -6.077e-02, 6.702e-02, -4.179e-02, 4.494e-04, 3.716e-03)); + r += mul(s7_7, M4(9.210e-02, -8.261e-03, 2.497e-02, -1.238e-01, -1.281e-01, 2.690e-02, -8.240e-02, -1.425e-01, -3.485e-02, -6.733e-03, -3.941e-02, -9.975e-02, 6.695e-05, 2.079e-02, 1.967e-02, -3.847e-03)); + r += mul(s7_8, M4(1.172e-02, -5.997e-02, -9.893e-02, 1.800e-02, 1.765e-01, 6.485e-02, -1.316e-03, -1.296e-01, -5.434e-02, -5.654e-02, -7.644e-02, 9.541e-02, 9.991e-02, -7.341e-02, -2.527e-02, -1.281e-02)); + r += V4(1.567e-02, -5.316e-03, 4.092e-02, 2.983e-02); + return r; +} + +void Pass9(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 10 +//!DESC conv9 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(9.979e-02, -3.801e-03, -4.195e-02, 4.491e-02, -3.034e-02, 9.629e-02, 2.217e-02, 2.348e-02, -2.610e-01, -6.449e-02, -2.193e-02, 2.201e-02, 3.639e-03, 2.767e-02, 4.953e-02, -2.463e-02)); + r += mul(s0_1, M4(4.585e-02, 9.533e-03, -2.058e-01, -1.379e-01, 6.451e-02, 1.004e-01, 1.222e-01, -1.450e-01, -1.402e-01, 2.992e-04, -1.222e-01, 1.367e-01, 8.661e-02, -2.011e-02, 9.221e-02, 1.822e-02)); + r += mul(s0_2, M4(8.522e-02, -9.951e-02, -1.250e-01, 5.328e-03, -6.122e-02, 6.165e-04, -3.026e-02, -7.362e-03, 8.463e-02, -5.863e-02, -5.406e-02, -8.633e-02, 2.756e-01, 1.402e-02, 1.001e-02, 2.433e-02)); + r += mul(s0_3, M4(-5.119e-02, -1.328e-01, 1.077e-01, 2.340e-01, 2.387e-01, 6.255e-03, -1.186e-01, -3.932e-02, 5.365e-02, 8.073e-02, -2.408e-02, -7.386e-02, -2.187e-01, -1.164e-01, -1.490e-02, -1.534e-01)); + r += mul(s0_4, M4(-2.374e-01, -1.060e-01, 3.018e-01, -1.544e-02, -7.730e-02, -9.786e-02, -1.404e-01, 4.522e-02, -6.048e-02, -3.616e-02, -2.369e-01, 2.317e-01, 2.292e-01, -6.602e-02, 1.553e-01, 3.055e-02)); + r += mul(s0_5, M4(-2.279e-01, 1.182e-01, 2.212e-01, 1.745e-01, 1.819e-03, -3.652e-02, -5.531e-02, 1.062e-01, 1.659e-01, -1.603e-01, -6.869e-02, -4.112e-02, -1.094e-01, 3.642e-02, 6.102e-02, 1.830e-01)); + r += mul(s0_6, M4(1.745e-01, -2.214e-02, 2.725e-02, 4.460e-02, -4.860e-02, -2.004e-03, -9.034e-02, 3.286e-02, -4.466e-03, -1.189e-01, 3.782e-02, 8.976e-02, -1.897e-01, 4.847e-02, -2.158e-02, 3.881e-02)); + r += mul(s0_7, M4(-1.962e-01, 4.814e-03, -7.059e-02, -5.031e-03, 4.430e-02, -4.664e-02, 2.091e-01, -9.975e-03, -4.257e-02, -4.231e-02, -1.577e-01, -7.857e-02, -7.814e-02, 1.149e-02, -1.294e-01, -3.907e-02)); + r += mul(s0_8, M4(-1.119e-01, 1.538e-01, 7.105e-02, 1.003e-01, 2.690e-02, -7.133e-02, 1.107e-01, -1.360e-01, 1.566e-01, -1.870e-01, 6.938e-02, 1.078e-01, -6.613e-02, 9.232e-02, -1.335e-01, 6.051e-02)); + r += mul(s1_0, M4(2.202e-02, -1.482e-02, -6.781e-03, 2.597e-02, -1.069e-01, 2.502e-02, 5.111e-02, -1.779e-01, -5.809e-02, -4.731e-03, -2.448e-02, -6.778e-02, 5.783e-02, 2.162e-02, -3.471e-02, 1.200e-01)); + r += mul(s1_1, M4(-9.422e-02, 1.303e-02, -8.666e-02, 1.045e-01, 6.160e-02, -2.952e-02, 1.353e-01, 1.667e-03, -1.587e-01, -3.928e-02, 3.997e-03, 4.294e-02, 5.702e-02, -3.176e-02, -3.976e-02, -2.130e-01)); + r += mul(s1_2, M4(-1.411e-03, 1.515e-02, 3.510e-02, -9.847e-02, 2.856e-02, -3.282e-02, -2.930e-02, -1.862e-02, 3.741e-02, -5.659e-02, -5.330e-02, 7.632e-03, -2.209e-02, -4.204e-02, 9.977e-02, 9.778e-03)); + r += mul(s1_3, M4(6.449e-03, -5.335e-02, 7.347e-02, -7.254e-02, -1.440e-01, -8.859e-02, 3.089e-02, -7.035e-02, 9.591e-02, 5.848e-02, -9.547e-03, -8.389e-02, -5.530e-02, 2.520e-02, 5.559e-02, 3.615e-02)); + r += mul(s1_4, M4(-1.427e-02, -2.663e-02, 5.490e-02, 1.628e-01, -2.134e-01, 1.559e-01, -2.144e-01, 1.769e-01, 5.934e-02, -3.494e-02, -2.390e-01, -3.932e-03, 1.214e-01, 1.626e-01, -1.064e-01, -1.502e-01)); + r += mul(s1_5, M4(4.727e-02, 4.170e-02, 4.571e-03, -1.416e-01, -9.562e-03, -2.348e-02, -7.062e-02, 1.835e-02, 5.295e-02, -4.510e-02, 8.317e-02, -1.414e-01, -9.661e-02, 9.667e-02, -1.732e-02, 1.292e-01)); + r += mul(s1_6, M4(-6.666e-02, 6.299e-02, -1.308e-02, -2.736e-02, 3.681e-02, -6.069e-02, -6.168e-02, -2.373e-02, -2.423e-02, 1.507e-01, 3.937e-02, -7.852e-02, 4.251e-02, -4.458e-02, -2.519e-02, 8.810e-04)); + r += mul(s1_7, M4(-4.931e-02, 3.555e-02, -5.431e-02, -1.428e-03, -5.725e-02, 1.013e-01, 1.402e-01, -6.326e-02, -1.939e-01, 1.234e-03, -7.806e-03, 3.255e-02, -4.776e-02, 1.070e-01, -5.987e-02, -3.669e-02)); + r += mul(s1_8, M4(3.066e-02, -4.837e-04, 1.517e-01, -1.362e-02, -4.811e-03, 7.739e-02, -1.789e-02, 1.743e-03, -1.418e-02, -4.379e-02, 2.601e-02, 7.546e-02, 1.224e-01, 5.496e-02, 7.564e-02, -1.265e-02)); + r += mul(s2_0, M4(-3.308e-02, -5.130e-02, -3.989e-03, -8.190e-02, -1.572e-01, -6.557e-03, -9.931e-02, 3.838e-03, 3.900e-02, -7.331e-02, 1.019e-01, 8.555e-02, -3.265e-02, 5.456e-02, 1.418e-01, 5.037e-02)); + r += mul(s2_1, M4(1.313e-01, 5.413e-03, 4.894e-02, -1.650e-01, 1.398e-01, -7.963e-03, -1.529e-01, -7.716e-02, 1.198e-01, 4.492e-02, -1.669e-02, -1.248e-03, -4.636e-02, 5.117e-03, 1.352e-01, -1.850e-01)); + r += mul(s2_2, M4(1.493e-02, 9.842e-03, 1.435e-02, 9.388e-02, -5.429e-02, 1.358e-02, -2.276e-02, 4.283e-02, 9.156e-02, -2.228e-02, 5.059e-02, -1.006e-01, 1.375e-01, -5.795e-02, 7.925e-02, 1.013e-01)); + r += mul(s2_3, M4(-5.631e-02, -8.639e-02, -4.036e-02, 7.269e-02, -3.657e-02, -7.294e-02, -1.657e-02, 1.943e-02, -4.664e-02, -9.360e-02, 1.649e-02, 1.378e-01, -8.848e-02, 1.032e-01, 4.494e-02, -4.874e-02)); + r += mul(s2_4, M4(-4.301e-02, -5.024e-02, -1.032e-01, -4.365e-02, 6.432e-02, 8.289e-02, -5.587e-02, -3.820e-02, -9.340e-02, -3.329e-02, -8.456e-02, 9.410e-03, -5.204e-02, 5.367e-02, -3.764e-02, 3.938e-02)); + r += mul(s2_5, M4(-6.791e-02, -1.080e-02, 2.013e-02, 5.786e-02, -6.448e-02, 3.220e-03, -4.325e-02, -1.603e-02, -6.848e-02, -1.132e-01, -1.624e-01, -4.266e-02, -1.756e-01, 4.226e-02, -1.251e-01, 6.439e-02)); + r += mul(s2_6, M4(7.324e-02, -2.496e-03, -8.930e-02, 2.952e-02, -5.135e-02, 4.454e-02, 6.500e-03, -6.366e-02, 6.126e-02, -3.274e-02, 3.179e-03, 1.504e-01, -1.652e-02, 3.339e-02, -7.815e-02, 7.146e-03)); + r += mul(s2_7, M4(6.184e-02, -5.592e-03, 9.693e-02, -6.885e-02, 1.027e-01, -1.237e-01, 6.530e-02, 8.315e-02, -7.620e-02, 6.753e-02, 1.437e-01, -6.247e-02, 1.395e-01, -9.828e-02, 8.287e-02, 7.834e-02)); + r += mul(s2_8, M4(1.685e-01, -1.294e-01, 3.398e-02, 1.019e-01, -2.826e-02, -1.546e-02, 9.223e-02, 1.609e-02, -1.707e-02, 6.645e-02, 1.119e-02, -6.590e-02, 3.923e-03, 7.958e-02, 1.053e-01, -3.085e-02)); + r += mul(s3_0, M4(2.589e-01, -2.329e-01, 1.983e-01, -7.556e-02, -1.354e-01, -2.610e-03, 1.614e-01, 1.125e-01, 1.494e-01, -2.117e-01, 4.815e-02, -3.878e-02, -2.583e-02, -8.455e-03, 6.600e-02, -3.506e-02)); + r += mul(s3_1, M4(1.764e-01, 3.371e-01, -3.997e-01, -1.511e-02, 1.700e-01, 1.261e-01, -1.216e-01, -1.091e-03, 1.045e-01, -2.192e-02, 7.441e-02, 2.696e-01, 1.014e-01, 1.057e-01, 1.715e-02, -5.898e-02)); + r += mul(s3_2, M4(1.639e-02, 2.217e-01, 1.512e-01, -1.281e-01, 1.477e-02, -3.184e-02, 2.148e-01, -8.227e-02, -1.780e-02, 7.008e-02, -1.461e-02, 9.458e-02, -6.548e-03, -1.904e-02, -6.767e-02, 7.074e-02)); + r += mul(s3_3, M4(-2.304e-01, 1.383e-01, -3.498e-02, 1.237e-01, -6.704e-02, -1.442e-02, 1.118e-01, 2.074e-01, -2.043e-02, -8.898e-02, 4.736e-02, 1.558e-01, 8.804e-02, -1.124e-01, 7.384e-03, -7.617e-02)); + r += mul(s3_4, M4(1.751e-01, 6.813e-02, 6.776e-02, -3.537e-01, -1.869e-01, 1.035e-02, 2.149e-01, 3.344e-01, -6.701e-02, 4.224e-02, 1.873e-01, -1.856e-01, -1.505e-02, 3.836e-02, -7.957e-02, 1.313e-01)); + r += mul(s3_5, M4(-3.251e-01, 3.145e-01, 9.404e-03, 2.142e-02, 1.153e-01, -1.625e-01, 1.867e-01, -8.547e-02, -5.732e-02, 1.582e-02, -7.585e-02, 7.211e-02, -9.616e-02, 1.182e-01, 1.738e-02, 1.165e-01)); + r += mul(s3_6, M4(-6.954e-02, 1.575e-01, -7.638e-02, -3.630e-02, 3.772e-02, 1.272e-01, -6.273e-02, -2.021e-03, 1.647e-01, -3.607e-03, 6.996e-02, -5.104e-02, 9.639e-03, 1.550e-02, -2.351e-02, -5.522e-02)); + r += mul(s3_7, M4(-5.362e-02, 1.053e-01, 1.142e-01, -7.457e-02, -9.222e-02, -1.492e-01, 2.242e-01, -2.638e-01, 3.647e-02, 1.253e-01, 1.070e-01, 1.552e-01, 1.142e-01, 3.438e-02, -9.715e-02, 4.454e-02)); + r += mul(s3_8, M4(1.371e-01, 4.977e-02, -4.478e-02, 3.495e-02, 2.572e-02, 7.444e-02, 1.017e-01, 5.641e-03, -7.497e-02, 5.295e-02, 8.355e-02, -1.566e-02, -2.130e-02, 3.331e-02, 3.913e-02, 1.173e-01)); + r += mul(s4_0, M4(1.499e-01, 7.687e-02, 1.185e-01, -3.580e-02, 1.065e-01, 3.268e-02, -8.007e-02, -9.122e-02, 4.249e-02, 7.703e-02, 5.687e-02, -3.351e-02, 4.208e-02, -1.613e-02, -5.066e-03, -4.203e-03)); + r += mul(s4_1, M4(-2.001e-02, -1.402e-02, 7.899e-02, -1.016e-01, 1.216e-01, -7.467e-02, -5.335e-02, 1.986e-01, -4.708e-02, 7.680e-02, 1.611e-02, -3.940e-02, 2.690e-02, 5.244e-02, -1.775e-02, -1.833e-01)); + r += mul(s4_2, M4(1.290e-01, 3.754e-02, 4.333e-02, 1.285e-01, -1.692e-01, -7.149e-02, 4.532e-02, 6.357e-02, 3.700e-02, -6.319e-02, -4.157e-02, 7.303e-02, 2.179e-02, 5.627e-02, 4.567e-02, 4.252e-03)); + r += mul(s4_3, M4(5.356e-02, 1.857e-02, 5.866e-03, -2.949e-02, -2.405e-02, -1.322e-01, 7.108e-02, 3.229e-03, 1.843e-01, -2.091e-01, -5.514e-02, -2.758e-02, -2.227e-02, 8.073e-02, -8.134e-04, 4.160e-02)); + r += mul(s4_4, M4(7.446e-02, -1.003e-01, -1.824e-02, -6.362e-02, 1.851e-01, -1.159e-01, 7.880e-02, 2.134e-01, -5.594e-02, -2.991e-02, -3.338e-03, -1.311e-01, -4.503e-02, -6.705e-02, 1.878e-02, -1.863e-01)); + r += mul(s4_5, M4(-1.527e-01, -1.209e-01, -2.515e-01, -1.239e-01, -1.336e-01, -5.718e-02, -1.228e-02, 2.261e-02, 6.291e-03, -1.348e-02, 2.181e-02, -6.074e-02, 1.343e-02, -4.235e-03, -1.892e-01, 3.363e-03)); + r += mul(s4_6, M4(2.154e-01, 1.255e-01, -1.835e-02, 9.993e-02, 1.802e-02, 5.546e-02, -1.146e-02, 3.926e-02, 1.277e-02, -3.354e-02, -1.901e-02, 1.212e-02, 2.635e-01, 2.701e-01, -2.231e-02, -2.811e-03)); + r += mul(s4_7, M4(-4.653e-02, 1.927e-01, -2.140e-01, -1.068e-01, 2.129e-02, 4.108e-02, 1.239e-01, -4.284e-02, -3.310e-02, 2.095e-02, 8.717e-02, 1.073e-02, 2.524e-01, 7.073e-02, -6.252e-02, 2.159e-02)); + r += mul(s4_8, M4(1.614e-01, 1.794e-01, 5.384e-02, -2.796e-02, -1.241e-01, 9.263e-03, -5.525e-02, -2.885e-02, 1.242e-01, 1.682e-01, 1.313e-02, -8.044e-02, 1.017e-01, 2.218e-01, 4.896e-03, 2.134e-02)); + r += mul(s5_0, M4(6.614e-02, 2.863e-02, 5.042e-02, -2.688e-02, -4.034e-02, 3.785e-02, -1.552e-01, 4.106e-02, -4.669e-03, 1.572e-01, 2.931e-02, -8.959e-02, 4.404e-02, 5.187e-02, 6.352e-02, -8.528e-03)); + r += mul(s5_1, M4(-4.113e-02, -3.485e-02, 9.744e-04, -1.129e-01, -4.184e-02, -5.097e-02, -4.278e-02, -1.950e-01, 4.177e-01, -4.765e-02, -7.144e-02, -5.964e-02, -1.823e-02, 1.985e-02, -2.863e-02, 1.105e-01)); + r += mul(s5_2, M4(-9.188e-02, -7.315e-02, -1.126e-01, -1.101e-01, -1.861e-01, -1.838e-02, 1.793e-02, 2.983e-02, -2.057e-01, 2.288e-01, -5.805e-02, 3.840e-01, -6.533e-02, -6.492e-03, -1.555e-02, 1.087e-02)); + r += mul(s5_3, M4(-5.148e-02, -5.261e-02, 1.157e-01, -5.552e-02, 9.241e-03, -5.474e-02, -7.290e-02, 9.177e-02, 1.089e-01, -6.549e-02, 5.312e-02, 1.273e-02, -4.502e-02, 7.933e-02, 1.235e-01, -5.523e-02)); + r += mul(s5_4, M4(1.154e-01, -7.080e-02, 2.820e-03, 5.855e-02, 3.476e-02, 5.606e-02, 1.551e-02, 2.757e-02, -4.717e-01, -3.164e-02, 1.945e-01, 2.011e-01, 4.240e-02, -4.828e-02, 1.131e-01, 2.007e-01)); + r += mul(s5_5, M4(-1.324e-02, 9.034e-02, -6.562e-02, -1.415e-01, 2.035e-01, 2.227e-02, 8.958e-02, -8.127e-02, -2.278e-02, 5.897e-01, -2.082e-02, 2.763e-01, 3.196e-03, -1.589e-02, 1.041e-01, -6.755e-03)); + r += mul(s5_6, M4(-1.162e-02, 3.651e-02, -1.074e-01, -1.459e-02, -4.629e-02, 2.949e-02, 4.951e-02, -8.052e-02, -3.776e-02, -4.897e-02, 1.135e-02, 1.967e-01, 1.796e-01, 3.094e-02, -2.135e-03, -9.460e-02)); + r += mul(s5_7, M4(3.535e-02, -2.418e-01, -1.202e-02, 1.840e-02, -6.386e-02, 5.459e-02, 1.255e-01, -7.740e-02, -1.249e-01, 2.402e-02, -4.647e-02, -5.060e-02, -8.733e-03, 9.626e-02, -1.413e-01, 8.242e-02)); + r += mul(s5_8, M4(4.101e-02, -1.777e-02, 4.835e-02, 1.135e-01, 4.554e-02, 1.219e-01, 4.158e-02, -5.097e-02, 1.331e-02, -1.184e-02, 3.028e-02, -1.485e-03, -3.645e-02, -4.552e-02, -6.050e-02, -4.123e-02)); + r += mul(s6_0, M4(5.351e-02, -1.293e-01, 5.374e-02, -5.108e-02, -1.440e-01, 1.143e-02, -1.114e-02, -1.379e-03, 2.562e-02, 2.804e-02, 3.620e-02, -3.392e-02, 5.778e-02, 6.113e-02, -6.055e-03, -4.381e-02)); + r += mul(s6_1, M4(-1.255e-01, 6.818e-02, 1.551e-01, -1.289e-01, -9.152e-02, -8.377e-02, -9.023e-02, -8.303e-02, 1.082e-01, 1.633e-02, -1.086e-01, -2.873e-02, 1.282e-01, 3.097e-02, 9.900e-03, -1.153e-01)); + r += mul(s6_2, M4(-1.054e-02, -3.995e-02, -2.186e-02, 4.059e-02, -1.168e-01, -1.012e-01, -6.086e-02, -2.419e-02, -6.052e-02, -7.367e-02, -4.577e-02, 3.211e-02, 4.124e-02, 1.235e-02, -3.945e-02, -6.397e-02)); + r += mul(s6_3, M4(1.669e-02, -2.115e-01, 2.236e-02, 1.717e-01, 1.757e-03, 3.770e-02, -3.654e-03, 6.192e-02, -4.239e-02, 4.960e-02, 6.035e-04, 2.639e-02, 1.147e-02, -6.225e-02, 7.189e-02, 5.251e-02)); + r += mul(s6_4, M4(9.938e-02, 3.995e-02, -1.010e-01, -5.093e-02, 4.820e-02, 3.492e-02, 1.436e-01, -1.534e-01, -1.845e-01, -2.132e-02, -1.343e-01, -9.454e-02, 7.165e-02, -1.747e-03, 3.244e-02, -6.289e-02)); + r += mul(s6_5, M4(4.139e-02, 8.645e-02, 9.399e-02, 3.691e-04, -4.433e-02, 7.535e-02, 9.574e-02, -5.203e-02, -2.959e-02, -8.498e-02, 6.047e-02, -4.950e-02, 2.226e-02, -3.868e-02, 8.249e-02, -3.529e-04)); + r += mul(s6_6, M4(8.061e-02, -5.656e-02, -5.368e-02, -5.495e-02, 1.414e-02, 2.116e-02, 1.308e-01, 8.124e-02, 3.390e-02, 1.219e-01, 4.394e-02, 4.619e-02, 3.129e-02, 2.178e-02, -1.655e-01, -6.343e-02)); + r += mul(s6_7, M4(1.022e-01, -6.860e-02, 2.375e-02, -2.220e-03, 4.988e-02, 3.299e-01, -1.765e-01, -2.914e-01, -2.302e-02, 8.937e-02, -1.611e-01, -3.162e-02, 1.135e-02, 1.151e-01, -8.842e-03, 2.983e-02)); + r += mul(s6_8, M4(-7.457e-02, -1.736e-01, 7.785e-02, 3.747e-02, -8.805e-02, 1.588e-01, -2.302e-01, -3.967e-02, 2.187e-02, 6.145e-03, 1.436e-02, 6.803e-03, -7.099e-02, -7.314e-02, 1.006e-02, 6.246e-02)); + r += mul(s7_0, M4(-7.723e-02, -5.801e-02, -6.587e-02, 8.060e-02, 3.730e-03, 1.916e-02, -1.449e-02, -5.394e-07, -2.390e-02, 9.347e-02, -4.700e-02, -9.968e-03, -1.154e-02, -4.739e-02, 8.297e-02, 6.254e-02)); + r += mul(s7_1, M4(-5.722e-02, 4.579e-02, 1.460e-01, -5.556e-02, 1.696e-01, -2.296e-02, 4.788e-03, -1.250e-01, -6.934e-02, -4.866e-02, 4.083e-02, -1.056e-01, 4.424e-02, -6.733e-02, -1.075e-01, 2.183e-01)); + r += mul(s7_2, M4(-5.592e-02, -1.700e-02, -2.670e-02, -1.004e-01, 1.008e-01, -1.744e-03, 1.410e-01, -5.661e-02, -3.807e-04, 1.436e-02, 9.305e-03, 3.034e-02, -1.248e-01, -1.910e-02, -1.741e-02, 2.880e-02)); + r += mul(s7_3, M4(6.801e-02, -7.140e-02, -1.237e-01, 1.951e-01, 6.394e-02, -5.840e-02, -1.713e-02, -5.907e-02, 5.134e-02, -3.309e-02, -1.370e-01, 7.279e-02, 6.317e-02, -2.810e-01, 1.142e-02, -2.649e-02)); + r += mul(s7_4, M4(5.304e-02, -8.568e-02, -1.015e-01, -3.312e-02, 2.344e-01, -9.537e-02, -3.585e-02, -5.287e-02, -8.116e-02, 6.407e-02, 8.409e-02, -8.888e-02, 2.251e-01, 7.453e-02, 4.974e-02, 6.232e-01)); + r += mul(s7_5, M4(1.903e-01, -4.089e-02, 6.271e-02, -1.552e-02, 6.145e-02, -1.402e-01, -2.340e-01, -8.503e-02, 7.063e-02, -5.336e-02, 1.063e-01, -5.122e-02, 1.896e-02, 7.259e-02, -3.176e-02, 8.089e-02)); + r += mul(s7_6, M4(2.801e-02, 5.953e-02, 1.169e-02, 3.472e-02, -1.428e-01, -5.415e-02, 1.115e-02, 1.077e-02, 4.924e-02, 5.381e-02, 1.765e-02, 3.125e-02, 1.124e-03, 1.649e-02, 3.846e-02, -1.732e-01)); + r += mul(s7_7, M4(-5.102e-02, 4.520e-02, -1.745e-02, -2.658e-03, -9.990e-02, -9.452e-02, 4.177e-02, -5.105e-02, 9.738e-02, 4.162e-02, -1.828e-01, 6.219e-02, 2.346e-01, 6.254e-02, -3.511e-02, 2.310e-01)); + r += mul(s7_8, M4(-1.098e-02, -3.532e-02, -3.789e-02, 6.616e-02, -7.425e-03, -8.286e-03, 8.732e-02, 8.728e-02, 1.692e-01, -1.588e-01, -6.698e-03, -3.937e-02, -1.749e-02, 1.068e-01, 2.430e-01, -1.154e-01)); + r += V4(1.466e-04, -5.235e-03, 7.258e-03, 7.678e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.290e-03, 9.370e-02, -4.320e-02, -9.472e-02, -6.005e-02, -4.003e-03, -9.464e-02, 8.462e-03, -1.773e-01, -2.048e-01, 1.052e-01, 1.815e-02, -8.543e-02, -2.965e-02, 7.096e-02, -1.312e-01)); + r += mul(s0_1, M4(1.154e-01, 7.756e-02, -2.212e-01, -4.084e-02, 1.015e-01, -9.616e-02, -9.514e-03, -1.778e-01, -2.940e-02, -1.525e-01, 1.598e-01, -1.158e-01, 1.203e-01, 2.498e-01, -2.661e-02, -1.841e-01)); + r += mul(s0_2, M4(2.721e-02, -5.147e-02, -3.403e-02, 1.207e-01, -5.816e-02, -2.122e-02, -3.382e-02, -3.588e-03, 1.734e-01, -1.531e-01, 1.519e-01, -9.517e-02, -1.556e-01, 7.585e-02, -2.048e-02, -9.128e-02)); + r += mul(s0_3, M4(-8.110e-02, 6.664e-02, 1.968e-02, -4.765e-02, 5.276e-02, -3.808e-02, 5.276e-02, 2.250e-01, -7.665e-02, -3.359e-02, -6.667e-03, -5.989e-02, 5.629e-02, -8.219e-02, 7.873e-02, -8.845e-02)); + r += mul(s0_4, M4(8.630e-02, -4.454e-02, 9.694e-02, 1.655e-02, 1.300e-03, 6.742e-02, 3.216e-02, -1.980e-02, 1.934e-02, 1.376e-01, -2.557e-01, -1.010e-01, 1.660e-01, -2.325e-02, -1.511e-01, 1.703e-01)); + r += mul(s0_5, M4(1.343e-01, -3.295e-02, 2.776e-02, -1.077e-01, 8.195e-02, -6.924e-02, -1.979e-02, -5.042e-02, -7.246e-02, 3.115e-02, 1.983e-01, 1.198e-01, 3.833e-02, -1.212e-02, -1.410e-01, -7.631e-02)); + r += mul(s0_6, M4(-9.589e-03, 3.830e-02, -4.812e-02, 6.645e-02, -8.617e-02, -5.045e-02, -1.132e-01, 1.357e-01, -9.929e-03, 3.030e-02, -1.809e-01, 1.052e-01, -1.058e-01, 3.901e-02, -6.436e-02, -4.964e-03)); + r += mul(s0_7, M4(-6.027e-02, -6.959e-02, 7.443e-03, 9.922e-02, 2.149e-02, 1.138e-01, -2.679e-02, 8.649e-02, 1.302e-01, -2.223e-01, 1.074e-02, 1.125e-01, -2.178e-02, 7.748e-02, 1.179e-02, 1.523e-02)); + r += mul(s0_8, M4(-3.916e-05, -5.470e-02, -2.190e-01, -3.828e-02, 2.979e-02, 6.529e-02, -1.606e-01, 3.706e-02, 2.055e-01, -1.164e-01, 9.257e-02, 5.272e-02, 3.183e-02, 4.857e-02, -9.588e-02, -6.533e-03)); + r += mul(s1_0, M4(7.540e-02, 9.764e-02, -1.319e-02, -2.100e-03, -8.562e-03, -7.786e-02, 6.762e-02, 8.959e-02, 1.179e-03, 8.631e-03, -2.432e-02, 1.476e-03, 2.655e-02, 1.271e-01, -5.520e-02, -3.024e-02)); + r += mul(s1_1, M4(4.190e-02, 5.008e-02, 2.203e-02, 6.023e-02, -7.306e-02, -1.108e-01, -8.751e-03, -1.636e-01, 8.796e-03, -7.475e-03, -8.521e-02, 3.632e-02, -2.483e-02, -1.363e-01, 8.583e-02, -1.698e-01)); + r += mul(s1_2, M4(5.378e-02, 1.391e-01, -2.328e-02, 1.559e-01, -3.753e-02, 1.481e-02, 4.905e-02, 1.148e-02, 1.044e-01, -1.197e-01, -3.189e-02, -1.087e-01, -9.864e-02, 2.428e-02, 6.278e-02, -1.561e-01)); + r += mul(s1_3, M4(1.942e-02, -1.416e-02, -1.320e-03, 1.718e-03, -9.708e-02, 3.691e-02, 8.199e-04, 1.795e-01, 6.060e-03, 4.807e-02, -9.477e-02, -1.175e-02, 1.004e-01, 2.195e-03, -4.551e-02, -1.336e-02)); + r += mul(s1_4, M4(2.728e-02, -6.010e-02, -6.956e-02, -1.666e-02, -1.604e-02, 4.916e-02, -1.192e-02, 1.337e-02, -6.281e-03, 5.109e-02, -1.169e-01, -9.832e-02, -1.918e-01, 1.490e-01, 1.082e-01, 2.273e-02)); + r += mul(s1_5, M4(-2.612e-02, 4.914e-02, 9.274e-02, -5.310e-02, 2.109e-02, -8.898e-02, 4.483e-02, -9.475e-02, 5.839e-02, 7.126e-03, 4.129e-03, 3.534e-02, -3.360e-02, -3.514e-02, -7.179e-02, -1.169e-01)); + r += mul(s1_6, M4(-5.941e-02, 2.700e-02, -2.747e-02, 3.563e-02, -5.072e-02, -4.170e-02, 1.761e-01, 3.688e-02, -1.132e-01, -4.068e-03, -9.349e-02, 6.556e-03, -5.830e-02, 7.343e-03, -1.387e-01, 3.240e-02)); + r += mul(s1_7, M4(-8.859e-03, -1.137e-02, 1.834e-02, -3.014e-02, -1.304e-02, 8.656e-02, 1.825e-02, -4.192e-02, 1.328e-01, -8.751e-03, -3.517e-02, -5.615e-02, 6.907e-02, 2.833e-02, -7.642e-02, 7.053e-02)); + r += mul(s1_8, M4(6.392e-02, 5.642e-02, 2.008e-02, 4.261e-02, -5.019e-02, 5.220e-02, 3.438e-02, 2.187e-02, 2.450e-02, -8.571e-03, 9.732e-02, 7.153e-02, 1.682e-03, -6.041e-02, -1.310e-02, -1.153e-02)); + r += mul(s2_0, M4(1.411e-02, -2.235e-02, 1.050e-02, 9.889e-03, -5.474e-02, -9.156e-02, 2.731e-02, -2.199e-02, -5.243e-02, -4.685e-02, 7.612e-02, 5.636e-02, -5.409e-02, 7.412e-03, 1.141e-01, 4.936e-02)); + r += mul(s2_1, M4(3.292e-02, 9.274e-02, 2.352e-02, 3.996e-02, -1.340e-01, -1.743e-01, -6.656e-02, -1.599e-01, 1.060e-01, -7.129e-02, 3.157e-02, -6.869e-02, -9.807e-02, -1.541e-02, 1.504e-01, 7.686e-02)); + r += mul(s2_2, M4(-1.548e-02, 1.043e-02, 1.075e-02, -7.252e-02, -3.533e-02, 1.450e-01, 2.824e-02, -6.352e-02, 9.156e-03, -2.765e-02, -3.235e-02, -9.487e-02, -1.019e-01, -1.105e-02, 7.342e-02, -1.165e-01)); + r += mul(s2_3, M4(-2.381e-02, -1.041e-01, -2.931e-02, -5.409e-02, -5.221e-02, -8.654e-02, 8.565e-02, 5.808e-02, 1.080e-01, 4.777e-02, 5.038e-02, -6.842e-02, -2.049e-02, 7.978e-04, 1.524e-02, 5.051e-02)); + r += mul(s2_4, M4(-1.083e-01, -1.008e-03, 1.155e-01, 1.658e-02, 2.956e-02, 1.441e-01, 5.986e-02, 1.595e-01, 3.039e-01, 9.038e-02, 6.069e-02, -1.482e-01, -1.366e-01, 7.371e-02, 1.787e-01, -1.498e-01)); + r += mul(s2_5, M4(1.529e-01, -7.698e-02, -4.434e-02, -8.404e-02, 8.368e-02, 1.152e-01, -1.935e-01, -2.468e-03, 2.219e-02, 9.952e-02, -3.702e-02, 3.885e-02, 3.898e-02, 1.067e-01, -8.168e-02, -4.104e-02)); + r += mul(s2_6, M4(8.873e-03, -6.697e-02, -7.962e-02, 8.146e-02, -1.902e-04, -2.720e-02, 9.140e-02, -4.676e-02, 3.858e-02, -7.333e-02, 3.452e-02, 3.836e-02, 5.023e-03, 7.728e-02, 5.099e-02, -3.697e-02)); + r += mul(s2_7, M4(-4.881e-02, 1.881e-02, 8.563e-02, 3.162e-02, 1.113e-01, -5.356e-02, -9.279e-02, 3.903e-02, -2.803e-01, -1.296e-01, 9.978e-02, -3.956e-02, 2.544e-01, 1.759e-02, -4.664e-02, 4.580e-02)); + r += mul(s2_8, M4(3.493e-02, -7.852e-03, -6.132e-02, 1.860e-02, 6.619e-02, 5.271e-02, -2.771e-05, 5.595e-03, -4.999e-02, 4.761e-02, 2.335e-03, 3.123e-02, 9.426e-02, -9.228e-02, -3.116e-01, 5.790e-03)); + r += mul(s3_0, M4(4.187e-01, 1.570e-01, 3.416e-01, -1.476e-01, 2.762e-02, -6.821e-02, 1.792e-01, -7.145e-02, -1.132e-01, -1.147e-01, 4.687e-03, -8.415e-02, -6.371e-02, -9.179e-02, 8.568e-02, 5.282e-02)); + r += mul(s3_1, M4(2.006e-01, -1.368e-01, -1.491e-01, 1.522e-01, 1.591e-01, -2.736e-01, -7.281e-03, -1.058e-01, 1.874e-01, 6.724e-02, -2.351e-02, 1.403e-01, -3.835e-02, -1.348e-01, 4.668e-03, 2.206e-02)); + r += mul(s3_2, M4(-2.926e-02, 3.517e-01, 1.707e-01, -1.408e-03, -3.285e-02, -3.590e-02, 1.927e-01, -2.824e-02, 3.858e-03, 6.156e-02, -5.746e-02, -4.645e-02, -4.775e-02, 1.494e-02, 1.537e-01, -8.107e-02)); + r += mul(s3_3, M4(-5.339e-02, -2.462e-02, 1.801e-01, 6.735e-02, 1.110e-01, -7.172e-02, 1.427e-02, -2.208e-02, 4.538e-02, 1.186e-02, -6.721e-02, 2.340e-01, 7.496e-02, -1.336e-02, -1.109e-01, -1.291e-02)); + r += mul(s3_4, M4(-4.793e-01, -1.920e-02, 2.296e-01, 2.338e-01, 3.594e-02, 3.332e-01, -1.317e-02, 1.741e-01, 1.125e-01, -8.630e-02, 2.258e-01, -6.323e-02, -2.961e-02, -5.608e-02, 5.575e-02, -3.323e-02)); + r += mul(s3_5, M4(-1.526e-01, -7.034e-02, 6.900e-02, -1.000e-01, 2.197e-01, -8.525e-02, -1.881e-01, 7.698e-03, 1.491e-02, -2.469e-02, -7.523e-02, -8.245e-02, -3.840e-02, 1.184e-01, -1.436e-01, -9.901e-02)); + r += mul(s3_6, M4(-5.958e-02, 8.086e-03, 9.842e-02, -1.083e-01, 9.213e-02, -5.961e-02, 7.362e-02, -8.798e-02, 1.607e-01, -1.811e-02, -8.682e-02, 1.032e-02, 3.486e-02, 3.203e-02, 5.213e-02, -3.576e-02)); + r += mul(s3_7, M4(7.391e-03, 1.972e-01, -3.796e-03, 6.523e-02, -1.108e-01, -4.348e-02, -8.562e-02, -1.051e-01, -9.360e-02, -1.299e-02, 2.863e-02, -1.352e-01, 2.290e-01, 8.414e-02, -1.579e-01, 4.872e-03)); + r += mul(s3_8, M4(-2.161e-02, 8.326e-02, 3.877e-02, 1.170e-01, 1.057e-01, 9.497e-02, 5.896e-02, 8.480e-02, -3.514e-03, 4.368e-02, -6.036e-02, 8.517e-02, 1.221e-01, -3.263e-02, -1.092e-01, -4.857e-02)); + r += mul(s4_0, M4(-6.439e-03, 2.590e-02, -1.078e-02, 4.911e-03, -1.234e-01, -3.479e-02, -2.544e-02, -9.376e-04, -1.409e-02, -1.044e-01, 2.975e-02, -5.554e-03, 8.809e-02, -1.176e-01, -8.657e-03, -9.243e-02)); + r += mul(s4_1, M4(8.806e-02, 1.608e-01, 3.592e-02, 6.373e-02, 1.539e-02, 2.080e-02, 1.398e-01, -4.859e-02, 4.323e-02, -3.154e-02, 5.207e-02, 5.971e-02, 8.081e-02, -6.827e-02, -7.255e-02, -9.584e-02)); + r += mul(s4_2, M4(1.009e-01, -3.291e-02, -3.984e-02, 1.348e-01, 9.537e-02, -9.646e-02, -1.498e-01, 3.553e-02, 9.186e-02, -1.523e-02, 8.877e-02, 7.689e-02, -2.311e-02, 3.178e-02, 7.619e-02, -2.005e-02)); + r += mul(s4_3, M4(8.980e-03, -1.306e-01, 5.834e-03, 4.182e-02, 1.066e-01, 3.029e-02, 3.420e-02, -9.790e-03, -2.331e-02, 2.927e-02, -1.827e-02, 3.896e-03, 5.993e-02, 1.324e-01, 7.162e-03, 7.392e-02)); + r += mul(s4_4, M4(5.454e-03, 7.056e-02, 5.852e-02, 1.588e-01, 1.488e-01, 4.709e-02, -3.013e-02, 6.137e-04, 1.686e-02, 2.025e-02, -4.921e-02, -6.366e-02, 1.511e-01, 5.800e-02, 9.437e-03, 2.178e-01)); + r += mul(s4_5, M4(-8.298e-02, -3.234e-02, -1.870e-01, -7.194e-04, -9.680e-02, -6.716e-02, -6.514e-02, 4.009e-03, -6.493e-02, 9.135e-03, 6.310e-02, -2.987e-02, 5.917e-02, -3.049e-02, -6.810e-02, 1.133e-01)); + r += mul(s4_6, M4(2.475e-01, -1.028e-01, -1.012e-01, 5.798e-02, 4.370e-02, -8.948e-02, 2.015e-02, -3.095e-02, 1.079e-02, 4.546e-03, -1.499e-01, -2.327e-02, 2.749e-01, 4.021e-02, 3.484e-02, 8.184e-03)); + r += mul(s4_7, M4(-8.256e-02, 8.792e-02, 1.201e-01, -9.130e-02, 1.690e-02, 7.735e-02, -2.095e-01, -9.881e-02, 1.303e-01, 3.493e-02, 1.233e-01, 7.466e-02, -2.529e-01, 1.450e-01, -2.835e-01, -6.814e-03)); + r += mul(s4_8, M4(1.216e-01, -3.621e-02, -2.386e-01, 9.544e-02, -4.847e-02, -5.423e-02, 2.443e-02, 1.336e-02, -4.633e-02, -3.880e-02, -7.520e-02, -2.050e-03, -2.050e-02, -2.381e-02, -1.815e-02, 9.741e-03)); + r += mul(s5_0, M4(-3.185e-02, 1.057e-02, -2.817e-02, -3.388e-02, -5.074e-02, 1.164e-01, 1.036e-01, -3.044e-02, -4.854e-02, -4.310e-03, 1.276e-01, 3.407e-03, 4.557e-02, -4.412e-02, -8.336e-03, -4.077e-02)); + r += mul(s5_1, M4(1.156e-01, 1.292e-01, 2.385e-02, 7.971e-02, 5.013e-02, -3.915e-02, 6.567e-02, 3.506e-02, 2.449e-02, 1.782e-01, -2.747e-01, 1.516e-01, -8.615e-05, -3.332e-02, -1.616e-02, -2.850e-02)); + r += mul(s5_2, M4(1.542e-01, -6.468e-03, 1.698e-02, 4.033e-02, -5.540e-02, 1.855e-02, -8.269e-02, -8.155e-02, -1.974e-01, -6.811e-02, 6.998e-02, -2.765e-02, 1.672e-02, 5.377e-02, -6.681e-03, 7.280e-02)); + r += mul(s5_3, M4(-2.420e-02, -1.241e-01, 1.166e-03, -2.627e-02, 3.275e-02, -1.170e-01, -3.455e-03, 6.929e-02, -1.627e-01, 1.008e-01, -5.692e-02, -1.333e-01, -1.243e-01, 3.192e-02, -9.795e-02, -1.679e-01)); + r += mul(s5_4, M4(-6.568e-02, 2.003e-01, 1.194e-01, 7.742e-02, -2.088e-01, 7.239e-02, 1.333e-01, 1.790e-01, 1.797e-01, 7.090e-02, -1.162e-01, -1.318e-01, 1.639e-02, -1.386e-01, -2.500e-02, -1.245e-01)); + r += mul(s5_5, M4(-9.647e-02, 8.403e-02, -2.060e-03, 2.117e-01, 5.347e-02, -1.115e-01, -7.547e-02, 8.582e-02, -1.059e-01, 2.511e-02, -1.307e-01, 9.496e-02, -7.920e-02, 3.782e-02, 3.762e-02, 2.821e-02)); + r += mul(s5_6, M4(-3.827e-02, -1.998e-02, -1.564e-02, -5.277e-02, -1.666e-01, 6.916e-02, 8.288e-02, -8.635e-02, 4.294e-02, -8.745e-02, 1.111e-01, -2.236e-02, -1.172e-01, 1.120e-01, -4.274e-05, -1.752e-01)); + r += mul(s5_7, M4(1.626e-01, -5.202e-02, 3.022e-02, 9.743e-02, -6.804e-02, 2.098e-02, 2.797e-03, 4.546e-02, -1.249e-01, 2.074e-01, -1.100e-01, -9.678e-02, -1.159e-01, -4.199e-02, 3.311e-01, -1.160e-01)); + r += mul(s5_8, M4(-7.867e-02, 6.140e-02, 1.652e-02, 1.256e-02, -1.360e-01, 6.991e-02, 1.342e-01, 2.675e-02, 1.578e-01, -1.088e-01, -7.167e-02, 6.907e-02, -6.101e-02, -4.746e-02, -1.126e-01, -1.637e-02)); + r += mul(s6_0, M4(-2.515e-02, -1.885e-02, -5.777e-02, -2.252e-01, -3.739e-02, -2.786e-02, 5.066e-02, 1.577e-02, 1.946e-02, 1.096e-01, -7.705e-02, 5.620e-02, -5.276e-02, 3.204e-02, -1.163e-01, 4.990e-02)); + r += mul(s6_1, M4(3.752e-02, 6.195e-02, -1.753e-01, -1.356e-02, 5.523e-02, 1.733e-01, 4.054e-02, 4.283e-02, 8.276e-02, -8.704e-02, -2.671e-02, 9.156e-02, 6.897e-02, 1.421e-01, 1.076e-02, 7.837e-02)); + r += mul(s6_2, M4(-3.095e-02, -3.410e-02, 1.191e-01, -7.677e-03, 5.647e-02, 4.696e-02, -1.132e-02, -2.785e-02, 6.987e-02, -4.717e-02, 4.592e-03, 6.663e-02, -5.682e-02, -4.635e-02, 1.832e-03, 5.830e-03)); + r += mul(s6_3, M4(-8.835e-02, -1.053e-01, 1.292e-01, -7.368e-02, 1.444e-01, 2.780e-02, -7.350e-02, 3.015e-02, 1.847e-02, 1.329e-01, -1.123e-01, -1.848e-02, 8.562e-02, 9.074e-02, 8.054e-02, 2.418e-02)); + r += mul(s6_4, M4(7.310e-03, -1.969e-02, 1.007e-01, 1.113e-01, -9.991e-03, -1.812e-01, 1.073e-01, -2.093e-01, -4.888e-02, -1.555e-01, -3.170e-02, -1.123e-01, -3.621e-02, 1.387e-02, -3.134e-02, -1.403e-01)); + r += mul(s6_5, M4(-5.748e-02, -8.762e-02, 5.587e-02, -9.407e-02, -1.383e-01, 7.550e-03, 6.783e-03, -1.238e-01, -1.612e-02, 2.597e-02, 1.169e-02, 6.839e-02, 1.773e-02, -4.397e-02, -2.984e-02, 2.920e-02)); + r += mul(s6_6, M4(6.376e-02, 2.743e-02, -4.821e-03, -6.324e-02, -4.138e-02, 2.202e-02, 4.987e-02, 8.916e-02, 5.525e-02, 2.917e-02, -3.244e-02, -7.117e-02, -3.278e-02, 3.070e-02, -5.857e-02, 2.783e-02)); + r += mul(s6_7, M4(1.624e-02, 4.970e-02, -4.111e-02, -4.877e-02, -2.928e-01, 5.539e-02, 1.247e-01, -4.042e-02, -1.235e-01, -1.679e-04, -2.364e-02, -6.966e-02, 6.416e-02, -2.049e-02, -7.345e-02, -1.792e-02)); + r += mul(s6_8, M4(1.609e-01, -4.346e-02, 9.700e-02, 9.440e-03, -3.306e-02, -2.842e-02, -8.993e-03, -6.132e-02, -6.560e-02, -5.271e-02, 1.367e-01, 1.921e-02, 7.265e-02, -4.142e-02, 2.049e-02, 1.043e-02)); + r += mul(s7_0, M4(1.894e-02, 6.258e-02, 8.076e-02, -2.416e-02, 5.400e-04, -9.984e-04, 7.170e-03, -3.911e-02, 6.118e-02, 6.144e-02, 8.159e-02, 4.385e-02, -9.262e-02, 9.228e-02, -1.446e-01, 2.100e-02)); + r += mul(s7_1, M4(6.674e-02, 5.568e-03, -7.251e-02, -4.187e-02, 1.851e-02, 5.571e-02, 2.995e-02, 5.407e-02, 2.183e-01, -4.680e-02, 1.004e-01, 1.623e-01, -8.901e-02, -1.236e-01, -1.153e-01, -3.801e-02)); + r += mul(s7_2, M4(-7.886e-03, -3.350e-02, 4.360e-02, 7.645e-02, 1.056e-01, -3.103e-02, 6.506e-02, 8.190e-02, 3.249e-02, -7.791e-02, 6.712e-02, -1.043e-01, -1.930e-02, 5.442e-02, -1.336e-01, 1.288e-02)); + r += mul(s7_3, M4(7.036e-02, 6.907e-02, -7.875e-02, 8.528e-02, -2.936e-02, -1.772e-01, -2.183e-01, -5.999e-02, -3.511e-03, 2.193e-03, 1.227e-01, 1.089e-02, 9.310e-02, 1.357e-01, -2.419e-02, -5.080e-02)); + r += mul(s7_4, M4(4.810e-02, 8.335e-02, -3.029e-02, 8.989e-03, 1.662e-01, -2.807e-01, -1.801e-01, 2.570e-02, -1.548e-01, -1.555e-01, 3.219e-01, -1.050e-01, -1.471e-01, -1.995e-01, -1.833e-01, -3.143e-01)); + r += mul(s7_5, M4(-8.325e-02, 2.041e-02, 9.360e-02, 9.553e-02, -1.848e-02, -1.377e-01, 1.536e-01, -2.651e-02, 8.098e-02, -2.200e-02, -9.656e-02, -8.114e-02, -3.943e-02, 8.533e-02, 2.998e-02, 8.909e-02)); + r += mul(s7_6, M4(5.689e-02, -6.169e-02, 1.352e-02, 5.280e-02, -2.217e-02, -1.066e-01, -7.912e-02, 8.141e-02, 2.398e-02, -9.405e-02, 1.244e-01, -5.454e-02, -8.083e-04, 8.872e-02, -1.283e-01, -3.437e-02)); + r += mul(s7_7, M4(-1.204e-02, -1.334e-02, -7.140e-02, -8.917e-03, -1.646e-02, -1.025e-01, -8.074e-02, 1.460e-01, -1.283e-01, -3.602e-02, 9.681e-02, 1.732e-02, 8.187e-02, -3.115e-01, -1.832e-01, -2.510e-02)); + r += mul(s7_8, M4(-6.913e-02, 4.574e-02, 4.980e-02, 6.209e-04, 8.980e-02, -1.209e-01, 1.175e-01, -4.394e-02, 5.081e-03, -6.635e-02, -3.629e-03, -4.362e-02, -3.733e-02, -8.044e-02, 2.265e-01, 1.154e-02)); + r += V4(-7.481e-03, 3.894e-03, -5.502e-03, -4.914e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.724e-02, 3.479e-03, 1.567e-03, -1.214e-01, 4.689e-03, 1.444e-02, 8.451e-02, 1.177e-03, 3.629e-02, -1.559e-01, 1.897e-01, 5.087e-02, -1.851e-01, -9.648e-03, -1.368e-01, -2.592e-01)); + r += mul(s0_1, M4(1.487e-01, 1.007e-01, 1.845e-01, 1.220e-01, -2.131e-01, 1.747e-02, -1.074e-02, 1.567e-02, 1.012e-01, -1.598e-01, -8.153e-03, 1.760e-02, 5.070e-02, -1.675e-01, 1.655e-01, 3.491e-02)); + r += mul(s0_2, M4(3.821e-01, 1.155e-01, -1.487e-02, 1.180e-01, -4.396e-02, 6.640e-03, 5.773e-02, 4.614e-02, 5.837e-02, -4.640e-02, -1.066e-01, -7.149e-02, -3.485e-02, -2.152e-01, 9.750e-02, 1.785e-02)); + r += mul(s0_3, M4(-1.623e-02, -1.133e-01, -2.455e-02, -1.347e-01, 1.359e-02, 2.334e-01, 3.878e-01, -2.031e-01, -1.815e-01, 1.718e-02, -9.717e-03, 7.400e-02, 6.369e-02, -1.304e-01, 3.659e-02, -1.692e-01)); + r += mul(s0_4, M4(7.610e-02, 1.194e-02, 2.673e-02, 3.135e-01, 2.760e-02, 1.648e-01, 1.564e-01, 4.504e-02, -2.295e-01, 5.353e-02, 3.624e-03, -3.883e-02, 4.702e-02, -1.814e-01, 6.465e-03, 2.531e-02)); + r += mul(s0_5, M4(4.372e-02, 3.090e-02, -1.860e-01, 9.564e-03, 9.612e-03, -3.647e-02, -1.468e-02, 8.505e-03, -5.727e-02, 8.499e-02, -1.629e-01, 5.623e-02, 1.557e-01, -2.105e-01, -6.874e-02, 2.368e-03)); + r += mul(s0_6, M4(3.906e-02, -6.603e-02, -1.071e-02, -2.715e-02, 2.176e-02, -1.145e-01, -8.425e-02, -2.132e-02, 8.685e-02, -1.174e-01, -8.648e-02, -2.373e-02, 3.716e-02, -7.980e-02, -8.063e-02, 1.802e-02)); + r += mul(s0_7, M4(-6.397e-02, -3.391e-02, 7.026e-02, 1.341e-01, 8.867e-02, -1.932e-01, -5.525e-02, 5.118e-02, 1.150e-01, -2.245e-02, -9.164e-02, 8.815e-02, -2.175e-02, 5.732e-02, 3.391e-02, -7.967e-02)); + r += mul(s0_8, M4(6.969e-02, 2.481e-02, -5.758e-02, -1.122e-01, 6.473e-02, -1.135e-01, 1.552e-02, 4.858e-02, 2.165e-02, -2.840e-02, -5.521e-02, 4.638e-02, -6.026e-02, 8.548e-02, -6.542e-02, 2.422e-02)); + r += mul(s1_0, M4(-1.324e-01, -9.657e-02, 5.681e-02, -5.161e-02, -2.698e-02, -2.448e-02, -2.619e-01, -6.980e-02, 5.807e-02, 5.673e-02, 5.349e-02, -2.276e-02, 2.047e-02, 7.038e-02, 6.933e-02, -2.684e-02)); + r += mul(s1_1, M4(-5.279e-02, -4.786e-02, 2.147e-02, 1.050e-02, -2.486e-02, 2.043e-01, -3.822e-03, -5.897e-02, 1.032e-01, -7.920e-02, -1.604e-01, 4.115e-02, -2.427e-01, 2.145e-02, -1.255e-01, -1.657e-01)); + r += mul(s1_2, M4(-1.010e-01, -1.010e-01, -7.991e-02, 6.059e-02, 1.697e-02, -4.796e-02, 1.962e-02, 4.203e-02, -8.784e-02, -1.023e-01, 1.271e-02, -3.299e-02, 1.298e-01, 1.214e-01, 6.105e-02, -2.229e-02)); + r += mul(s1_3, M4(-4.770e-02, 5.913e-02, 5.020e-02, -7.131e-03, -2.060e-01, 1.241e-01, 2.388e-02, -2.635e-02, 1.238e-01, 7.247e-02, -8.043e-02, -3.076e-02, 1.658e-01, -4.568e-02, -7.240e-02, 1.014e-01)); + r += mul(s1_4, M4(8.966e-02, 1.616e-01, 5.095e-02, -2.455e-03, 5.593e-02, 1.857e-01, 1.454e-01, -7.192e-03, 6.316e-02, 5.048e-02, 1.297e-02, -1.763e-01, -1.952e-01, -1.460e-01, 4.980e-02, -8.409e-02)); + r += mul(s1_5, M4(-9.377e-02, 1.184e-01, 5.613e-02, -7.132e-02, -1.259e-03, 5.138e-02, 4.730e-02, -2.482e-02, -3.313e-02, 8.537e-02, 2.716e-02, 1.247e-02, 1.137e-01, -7.160e-02, 7.234e-02, 6.762e-02)); + r += mul(s1_6, M4(-6.539e-02, -7.457e-02, -6.890e-02, 4.739e-02, -1.187e-01, -7.289e-02, -8.715e-02, 1.226e-01, 4.527e-02, 1.590e-01, 3.229e-02, -1.924e-02, 5.422e-02, 7.550e-02, -2.298e-02, 1.753e-02)); + r += mul(s1_7, M4(6.078e-03, 4.910e-02, -2.113e-02, 2.665e-02, 1.067e-01, -1.883e-01, 1.012e-02, 4.174e-02, -1.408e-01, -4.429e-02, 2.566e-02, 1.960e-02, -1.501e-01, 1.980e-02, -1.924e-02, 8.359e-02)); + r += mul(s1_8, M4(-8.162e-02, -2.046e-01, -4.726e-03, 6.651e-02, 5.891e-02, 4.803e-02, 4.659e-03, 4.186e-02, 5.667e-02, -2.298e-02, 1.575e-02, -3.687e-02, 4.226e-02, -8.163e-03, 2.719e-02, -2.139e-02)); + r += mul(s2_0, M4(-1.300e-01, -2.071e-02, -2.876e-02, 3.092e-02, -4.768e-02, -8.427e-02, -1.292e-02, 2.653e-02, 3.043e-02, -2.351e-02, 1.032e-01, -1.242e-01, -5.527e-02, 5.803e-02, -2.570e-01, -5.117e-02)); + r += mul(s2_1, M4(-4.517e-02, -1.088e-02, 1.822e-01, -1.161e-01, -9.387e-02, 1.175e-01, -3.744e-02, -5.978e-02, 1.616e-01, 2.280e-02, 2.708e-01, 1.192e-02, 4.039e-02, -1.120e-01, 1.062e-02, 8.678e-02)); + r += mul(s2_2, M4(8.099e-02, -3.482e-02, -1.468e-02, -2.101e-02, 1.372e-03, -7.233e-03, 1.620e-02, -7.852e-02, 6.260e-04, 8.281e-04, 8.617e-02, -3.288e-02, -9.138e-03, 3.785e-02, 5.528e-02, -3.426e-02)); + r += mul(s2_3, M4(-9.734e-02, 1.518e-01, 4.747e-02, 5.377e-02, -7.703e-02, -7.372e-02, 5.939e-02, 1.732e-02, -6.665e-02, -7.835e-04, 1.318e-01, -1.620e-01, -1.705e-01, 1.204e-01, -2.448e-01, -8.184e-02)); + r += mul(s2_4, M4(1.516e-01, 1.280e-01, -1.038e-01, 4.404e-02, 1.640e-02, -4.291e-02, 4.069e-03, -5.652e-02, -1.271e-01, 9.381e-04, -1.282e-02, 1.567e-03, 1.468e-01, 2.367e-01, -3.452e-02, 8.083e-02)); + r += mul(s2_5, M4(3.144e-02, -5.804e-03, -4.107e-02, 1.045e-02, 4.592e-02, -5.854e-02, -1.150e-01, 1.797e-02, 3.444e-02, 1.098e-02, -1.012e-01, 2.547e-02, 1.607e-01, -1.564e-02, 1.843e-01, -1.313e-01)); + r += mul(s2_6, M4(-1.530e-02, 1.923e-02, -2.923e-02, -4.513e-02, -4.605e-03, -4.504e-02, -2.672e-02, 4.149e-02, 8.869e-02, -9.748e-04, -2.265e-02, -3.775e-02, -2.417e-01, -1.869e-01, -4.442e-02, 2.009e-02)); + r += mul(s2_7, M4(-6.688e-02, -2.039e-01, -1.457e-01, 1.024e-01, -5.316e-02, -1.241e-01, -4.952e-02, 1.172e-01, 6.062e-02, 1.271e-02, -3.552e-02, 6.410e-02, -4.152e-02, -7.543e-02, -5.036e-02, -6.374e-02)); + r += mul(s2_8, M4(5.725e-02, -3.547e-02, 3.825e-03, 5.203e-02, 9.482e-02, 4.143e-02, 3.479e-02, 1.198e-02, -7.491e-02, -3.943e-02, 6.759e-02, 2.261e-02, 8.649e-02, 4.145e-02, -1.313e-01, -9.109e-03)); + r += mul(s3_0, M4(1.255e-01, 1.725e-01, -3.055e-01, -2.458e-01, -8.173e-02, -1.575e-01, -1.474e-01, -7.916e-02, 5.298e-02, 1.096e-01, -8.465e-02, -1.728e-01, -2.717e-02, 4.167e-02, 5.676e-02, -8.003e-02)); + r += mul(s3_1, M4(7.220e-01, 4.352e-02, -3.478e-02, -8.018e-02, -1.801e-01, -1.849e-02, 2.605e-01, -5.339e-02, 2.113e-01, 1.412e-01, -2.476e-01, 8.284e-02, 4.750e-02, 8.815e-02, 6.282e-02, 1.604e-01)); + r += mul(s3_2, M4(-4.143e-03, 7.700e-02, -3.138e-01, -2.956e-03, 3.122e-02, 1.719e-01, 6.956e-02, -2.989e-02, 1.377e-01, -4.360e-02, -1.460e-01, 4.187e-02, 6.419e-02, -8.990e-03, 5.880e-02, -5.041e-02)); + r += mul(s3_3, M4(-1.803e-01, 1.234e-01, 1.836e-02, 1.601e-01, -2.087e-01, -8.290e-02, -2.584e-01, 1.591e-03, 1.041e-01, -7.367e-02, 2.035e-01, 1.431e-01, 3.019e-02, 8.365e-03, -4.384e-03, -6.875e-02)); + r += mul(s3_4, M4(-1.539e-01, -7.009e-02, 2.010e-02, 2.897e-02, -1.929e-01, 1.293e-01, 9.247e-02, -3.863e-01, 1.431e-02, -1.628e-01, 5.519e-02, 6.598e-02, 1.763e-01, 1.272e-01, -4.278e-02, -3.687e-02)); + r += mul(s3_5, M4(-1.652e-01, 2.029e-01, 6.375e-02, -5.107e-02, -8.411e-02, -1.921e-01, 8.067e-02, 1.234e-01, 2.371e-02, -1.176e-01, -3.104e-02, -1.161e-01, 1.792e-01, 4.127e-02, 6.943e-02, -2.933e-02)); + r += mul(s3_6, M4(1.357e-01, -1.176e-01, 1.548e-01, -1.383e-01, -1.343e-02, -2.689e-02, 4.835e-02, -3.363e-02, 1.045e-01, 1.024e-01, 5.824e-04, -9.024e-02, -1.081e-01, 2.408e-02, 9.971e-02, -4.258e-02)); + r += mul(s3_7, M4(-7.603e-03, -1.731e-01, -2.018e-01, 1.153e-01, 2.150e-02, -1.564e-01, -5.606e-02, 3.453e-02, -5.661e-02, -8.458e-04, 3.539e-02, 2.572e-02, -5.008e-02, 1.264e-03, -1.862e-02, -7.547e-02)); + r += mul(s3_8, M4(1.869e-01, 1.199e-03, 2.327e-01, 3.469e-02, -2.002e-01, -5.345e-02, -4.882e-02, 1.097e-01, -6.799e-03, -8.035e-02, 6.958e-02, 3.092e-02, 1.516e-01, -2.645e-02, 1.494e-02, -7.511e-02)); + r += mul(s4_0, M4(-4.345e-02, 1.484e-01, -3.307e-02, -1.443e-01, 3.420e-02, 1.968e-01, 1.511e-01, 1.753e-01, -5.399e-02, 4.521e-02, -8.903e-02, 2.441e-03, -6.223e-02, -1.027e-02, -1.484e-01, -9.376e-02)); + r += mul(s4_1, M4(9.929e-02, -8.539e-02, -3.433e-02, 1.081e-01, 1.316e-02, -7.666e-02, -9.274e-03, 7.100e-02, 2.924e-02, 1.153e-01, -1.325e-01, 1.813e-01, 1.966e-02, 1.587e-01, -1.035e-01, -1.638e-01)); + r += mul(s4_2, M4(2.708e-01, -1.017e-02, -2.049e-02, 7.113e-02, 1.763e-02, -1.943e-02, -1.875e-02, -8.156e-03, -8.886e-02, -5.002e-02, -8.962e-02, 2.196e-02, -1.531e-02, 9.384e-03, 1.349e-01, 2.866e-02)); + r += mul(s4_3, M4(1.972e-01, 3.371e-01, 3.877e-02, -1.122e-01, 1.490e-01, -1.346e-01, 1.772e-02, -1.519e-01, 1.265e-01, 5.859e-02, 1.013e-01, -1.786e-01, 8.366e-02, -2.262e-02, 3.623e-01, -5.263e-02)); + r += mul(s4_4, M4(-1.303e-01, -1.114e-01, -4.405e-02, -1.153e-02, -4.026e-02, -3.629e-02, -2.858e-02, -1.237e-01, 8.082e-02, -1.807e-01, 1.310e-01, 5.274e-03, -1.675e-01, 2.320e-03, -3.316e-02, 3.970e-01)); + r += mul(s4_5, M4(3.387e-01, 2.381e-01, -2.503e-01, -2.648e-01, -2.016e-02, 4.843e-02, 1.089e-01, 1.411e-02, 4.083e-02, 1.763e-01, 8.014e-03, -6.033e-02, -9.262e-02, 1.614e-03, -1.027e-01, -5.194e-03)); + r += mul(s4_6, M4(1.025e-01, -7.352e-02, -1.283e-01, -6.252e-02, 5.032e-02, 1.491e-01, -2.388e-02, -1.248e-02, -6.140e-03, 7.336e-02, -5.229e-03, 8.707e-03, -6.721e-02, 1.560e-01, -9.837e-02, -1.118e-01)); + r += mul(s4_7, M4(1.544e-01, -7.410e-02, 7.230e-02, -1.136e-01, 4.004e-02, 2.621e-02, -7.795e-02, -1.646e-01, -4.100e-02, -3.569e-02, -2.735e-02, 5.582e-03, -1.329e-01, 3.085e-01, -9.390e-02, -4.508e-02)); + r += mul(s4_8, M4(5.530e-02, -2.003e-01, -9.624e-03, 5.853e-03, -5.959e-02, -5.444e-02, -5.310e-02, -7.021e-03, -3.355e-03, 9.593e-03, -1.768e-02, 1.376e-01, -2.936e-02, 1.131e-01, 1.300e-02, -5.178e-02)); + r += mul(s5_0, M4(2.089e-02, 7.627e-02, -5.233e-02, 1.727e-03, -2.312e-02, -2.415e-02, 8.087e-02, 1.105e-01, 5.380e-02, 6.047e-02, 9.878e-02, 1.792e-01, 7.137e-02, -4.554e-02, 5.170e-02, -4.089e-02)); + r += mul(s5_1, M4(8.278e-02, -2.174e-02, -1.357e-01, 1.360e-01, -9.989e-02, -1.275e-02, -2.446e-02, -1.053e-01, 3.093e-01, 1.811e-01, 1.872e-01, 1.046e-01, 1.129e-01, 9.270e-02, -2.918e-02, -5.317e-02)); + r += mul(s5_2, M4(-5.674e-02, -5.405e-02, 5.388e-02, -1.392e-01, 1.325e-02, 9.537e-02, 1.691e-02, -4.745e-02, -4.153e-01, -1.919e-01, 7.634e-01, 1.745e-01, -1.831e-02, -6.676e-02, -2.360e-02, 8.935e-02)); + r += mul(s5_3, M4(-4.224e-02, 1.140e-01, -1.521e-01, 3.822e-02, 1.009e-01, -3.675e-02, 2.922e-02, -4.790e-02, -6.382e-02, -5.778e-02, 1.177e-01, -1.353e-01, 1.389e-02, -6.895e-02, 1.290e-02, -1.372e-01)); + r += mul(s5_4, M4(1.019e-01, 1.057e-01, 2.167e-01, 5.358e-02, -6.091e-02, -1.346e-01, 8.352e-02, 1.510e-01, 3.459e-01, -3.201e-01, 2.022e-02, 1.186e-01, -7.381e-02, 3.842e-02, -6.365e-03, 4.917e-02)); + r += mul(s5_5, M4(-2.177e-01, 7.857e-02, -9.041e-03, -4.998e-02, -1.335e-01, -3.964e-02, -1.952e-02, -3.630e-02, -5.583e-02, 3.286e-01, 3.291e-01, 5.013e-02, -8.821e-02, 5.026e-02, -3.832e-02, 8.286e-02)); + r += mul(s5_6, M4(-9.074e-03, -6.962e-02, 3.649e-02, -7.817e-02, -4.316e-02, -1.803e-02, 7.155e-02, 8.809e-02, 8.425e-02, 2.773e-03, 1.149e-01, -4.313e-02, 1.502e-01, 1.178e-01, 2.887e-02, -9.576e-02)); + r += mul(s5_7, M4(1.853e-02, 5.554e-02, -1.362e-02, 1.089e-01, 1.525e-02, -3.206e-02, 5.188e-02, -1.121e-01, -3.825e-02, 1.523e-01, -2.211e-01, 1.098e-01, -7.643e-02, -1.417e-01, 2.906e-02, -7.706e-02)); + r += mul(s5_8, M4(-1.191e-01, 1.898e-02, 1.788e-02, 3.737e-02, 3.536e-03, 6.179e-02, -5.817e-02, -6.689e-02, -1.716e-01, 4.923e-02, 2.359e-01, -1.999e-02, -2.406e-02, 1.206e-01, 7.974e-02, 3.987e-02)); + r += mul(s6_0, M4(1.564e-01, -4.075e-03, -5.306e-02, -1.509e-01, -2.168e-02, 5.235e-02, 5.114e-02, 2.245e-02, 4.140e-02, 3.912e-03, 4.988e-02, 1.402e-02, -4.568e-03, -4.931e-02, 1.121e-01, 8.629e-03)); + r += mul(s6_1, M4(1.568e-01, 1.313e-01, 2.104e-01, -1.382e-02, -3.340e-03, -1.998e-01, -4.474e-03, 2.388e-01, 2.306e-02, 7.058e-03, 8.281e-02, 1.972e-01, 5.567e-03, -1.003e-01, 2.785e-02, 1.325e-01)); + r += mul(s6_2, M4(1.315e-01, 3.444e-03, -1.424e-01, 1.338e-01, 4.565e-02, -1.154e-02, -1.043e-01, -9.730e-02, 4.468e-03, -1.064e-01, -2.686e-03, -2.296e-02, -1.643e-02, 4.290e-02, -2.169e-02, -6.759e-04)); + r += mul(s6_3, M4(2.111e-01, 7.370e-02, 1.378e-01, 1.076e-01, 3.595e-02, 1.512e-02, 6.182e-02, -8.250e-03, 2.864e-02, -2.418e-02, -5.792e-02, -8.632e-02, 1.329e-02, 1.308e-01, 1.324e-02, -1.909e-01)); + r += mul(s6_4, M4(2.761e-02, -9.232e-02, -7.072e-02, 1.274e-01, -1.256e-01, -2.173e-02, -2.066e-01, -1.762e-01, -9.244e-02, 6.477e-02, 8.639e-02, -7.299e-02, -2.310e-02, 2.001e-01, -3.782e-02, -1.407e-02)); + r += mul(s6_5, M4(4.425e-02, 8.803e-02, -2.316e-02, -5.090e-02, 7.087e-02, 1.304e-01, -1.393e-01, 4.976e-02, -1.760e-01, 1.144e-01, 3.124e-02, -1.090e-01, 1.166e-01, -5.929e-02, -1.331e-02, -1.527e-02)); + r += mul(s6_6, M4(-8.310e-02, -6.365e-02, 5.384e-02, 5.216e-02, -7.219e-02, 1.717e-01, 1.047e-01, 6.846e-03, 2.238e-02, 5.175e-02, -1.590e-01, 4.551e-02, 1.098e-02, 4.537e-02, -1.444e-02, -3.147e-03)); + r += mul(s6_7, M4(5.190e-02, -1.786e-02, -3.339e-02, 9.677e-02, 8.330e-02, -2.030e-01, -7.853e-02, -3.209e-02, -1.065e-01, 1.165e-01, -7.677e-02, 9.941e-03, 3.012e-02, -9.097e-02, -3.735e-02, -2.587e-03)); + r += mul(s6_8, M4(4.789e-02, -1.061e-02, -7.132e-02, -5.019e-03, 9.382e-02, 1.028e-01, 1.170e-02, -7.021e-02, 3.152e-02, -7.632e-03, -8.255e-03, 5.736e-03, -4.599e-02, -1.164e-02, 2.437e-02, -5.784e-03)); + r += mul(s7_0, M4(-6.864e-02, -1.860e-01, 1.525e-02, -1.176e-01, -4.191e-02, 7.252e-02, -9.173e-02, -9.618e-02, -1.308e-01, -6.063e-02, 7.648e-02, 1.441e-01, 1.102e-01, -5.888e-02, -1.897e-01, 1.819e-01)); + r += mul(s7_1, M4(3.338e-02, 1.776e-02, 1.064e-03, -7.578e-02, 7.495e-02, -6.697e-04, -1.239e-01, 2.847e-02, -2.279e-01, -7.285e-02, 2.095e-01, 2.356e-01, 1.557e-02, -5.456e-02, 1.157e-01, 1.708e-02)); + r += mul(s7_2, M4(-8.398e-02, -1.220e-02, -9.205e-02, -5.427e-02, -4.993e-02, -3.610e-02, 4.436e-02, -7.408e-02, -4.743e-02, 1.245e-01, -2.716e-02, 9.896e-02, 2.923e-02, 1.461e-01, 2.598e-02, 3.604e-02)); + r += mul(s7_3, M4(1.723e-01, -3.550e-02, 5.951e-03, 1.184e-01, -1.509e-02, -4.806e-02, -1.090e-01, -6.005e-02, 1.048e-01, 6.892e-02, -2.529e-02, -8.196e-02, -5.180e-02, 1.710e-01, 4.785e-02, 1.247e-01)); + r += mul(s7_4, M4(-8.579e-03, -2.017e-01, -5.922e-04, -3.642e-02, 1.614e-01, -1.148e-02, -1.775e-01, 3.465e-02, -4.133e-02, -3.195e-01, -1.085e-01, 1.358e-01, -2.694e-01, 5.217e-01, 1.432e-01, 7.962e-02)); + r += mul(s7_5, M4(7.401e-02, 1.154e-01, -9.251e-02, 6.368e-02, -5.794e-02, -6.226e-02, 1.083e-01, -1.099e-01, 1.265e-01, -1.227e-01, -5.823e-02, -6.630e-02, 1.804e-01, -2.071e-01, -1.919e-01, 1.828e-01)); + r += mul(s7_6, M4(-1.028e-01, 2.171e-02, -2.036e-02, 2.199e-02, 1.960e-02, -1.265e-01, 8.657e-02, -4.917e-02, -1.625e-01, 2.369e-02, 2.032e-03, 9.057e-02, 1.559e-01, -3.505e-03, -1.472e-01, -4.203e-02)); + r += mul(s7_7, M4(5.730e-02, 1.471e-02, 3.550e-02, -4.377e-02, 1.763e-01, -1.098e-01, -1.233e-01, -5.830e-02, 5.248e-02, -5.650e-02, -6.141e-02, 4.860e-02, 6.842e-02, 2.463e-02, 1.543e-01, 4.593e-02)); + r += mul(s7_8, M4(-1.436e-01, 5.420e-02, 5.503e-02, 1.841e-02, 2.891e-02, 1.041e-02, -2.625e-02, -9.218e-03, 2.278e-01, 1.894e-02, 7.521e-02, -7.641e-02, -1.396e-01, -1.410e-01, -4.627e-02, 1.331e-02)); + r += V4(1.177e-02, 5.260e-02, 3.830e-03, 2.828e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-5.420e-02, -7.304e-02, 5.277e-02, -8.869e-02, 1.686e-01, -1.185e-01, -7.823e-02, -1.419e-02, -7.016e-02, 4.279e-02, 1.397e-01, -6.678e-02, -7.461e-02, 4.892e-02, 1.971e-02, -1.535e-01)); + r += mul(s0_1, M4(9.894e-02, 9.302e-02, 2.232e-01, 1.508e-01, 9.585e-02, -9.040e-02, 6.740e-02, 1.110e-01, 4.021e-02, -1.659e-01, 6.178e-03, 1.271e-01, -1.103e-01, -3.351e-02, 1.574e-02, -2.217e-01)); + r += mul(s0_2, M4(3.914e-02, -9.947e-02, 1.792e-01, 2.109e-01, 3.220e-02, 5.070e-02, 3.215e-03, 7.735e-02, 1.384e-01, 6.271e-02, 1.338e-01, 1.247e-01, 7.330e-03, -5.024e-02, 1.091e-01, -3.214e-03)); + r += mul(s0_3, M4(-6.904e-02, 6.638e-02, 1.790e-02, 1.519e-01, 3.409e-02, -1.532e-01, 2.553e-02, -2.543e-02, -1.985e-01, 1.071e-01, -2.576e-01, 3.422e-02, -5.628e-02, 5.652e-02, -1.303e-02, -7.028e-03)); + r += mul(s0_4, M4(1.500e-01, 2.434e-01, -1.500e-02, 3.231e-01, -1.509e-01, -7.602e-03, -4.109e-02, -9.278e-02, -1.289e-01, -6.994e-02, -1.377e-01, 6.863e-03, 6.510e-02, 3.974e-01, 2.942e-01, -5.929e-02)); + r += mul(s0_5, M4(6.724e-02, 8.496e-02, 3.149e-01, 2.017e-01, -1.027e-01, -1.890e-02, 3.621e-02, -1.653e-02, 1.159e-02, -1.567e-02, -9.433e-02, -3.668e-02, -1.123e-01, -1.213e-01, -9.673e-03, 1.116e-01)); + r += mul(s0_6, M4(-8.626e-03, -1.019e-01, 3.571e-02, -1.988e-02, 2.925e-01, -8.985e-02, -1.971e-01, -3.109e-02, -6.574e-02, 7.465e-02, -8.963e-02, 1.973e-02, -4.528e-02, -7.994e-02, -5.249e-02, -1.425e-01)); + r += mul(s0_7, M4(-3.254e-02, -1.168e-01, -8.821e-02, -7.827e-02, -1.321e-01, -5.463e-02, -5.164e-02, -1.108e-01, 5.041e-02, 4.703e-02, -3.961e-02, -1.529e-01, 6.714e-02, -1.969e-02, -7.377e-02, -9.472e-02)); + r += mul(s0_8, M4(3.334e-03, 1.417e-02, 8.190e-02, -1.376e-04, -2.449e-02, 4.059e-02, 8.879e-03, -4.109e-02, 8.034e-02, 6.292e-02, -1.015e-01, 4.121e-02, -1.284e-01, -1.013e-01, -6.948e-02, 7.312e-02)); + r += mul(s1_0, M4(-2.675e-02, 1.906e-02, 2.541e-02, -2.440e-02, 5.500e-02, -4.655e-02, 6.749e-03, -3.896e-02, 5.153e-03, 3.243e-02, -8.898e-02, 3.766e-02, 9.183e-02, -3.276e-02, -4.676e-02, 1.069e-01)); + r += mul(s1_1, M4(-2.266e-02, 1.928e-01, 9.742e-02, 1.642e-02, 8.187e-02, -9.148e-02, 6.413e-02, 1.039e-01, 1.705e-02, -9.384e-02, 5.862e-02, -4.673e-02, -9.744e-02, -6.756e-02, -3.035e-02, -1.984e-04)); + r += mul(s1_2, M4(-1.107e-01, -7.741e-02, 3.105e-02, 4.790e-02, 6.117e-02, 6.582e-02, 1.098e-02, 4.631e-02, 6.814e-02, 4.898e-02, 1.393e-01, 5.313e-02, 1.945e-01, -8.519e-02, -2.937e-02, 1.174e-01)); + r += mul(s1_3, M4(-6.930e-02, -3.082e-02, -1.272e-02, -3.149e-02, 1.487e-01, 6.645e-02, 5.823e-02, -9.166e-02, -2.352e-02, 7.973e-02, -1.351e-01, -2.607e-02, 1.642e-02, 2.832e-02, 3.065e-02, -1.574e-01)); + r += mul(s1_4, M4(-1.663e-02, 8.963e-02, -1.319e-01, -7.464e-02, -1.524e-01, 3.038e-02, -4.666e-02, 4.148e-02, -1.216e-01, -1.376e-01, 3.993e-02, 8.549e-02, -1.050e-01, 5.940e-02, 1.719e-03, 7.842e-02)); + r += mul(s1_5, M4(4.745e-02, -9.467e-02, 4.188e-02, -1.220e-01, 2.856e-02, -2.749e-02, -2.924e-02, 1.370e-02, -1.048e-04, 1.409e-02, 1.199e-01, 4.892e-02, 9.664e-03, 9.891e-02, -3.513e-02, 4.525e-02)); + r += mul(s1_6, M4(3.134e-02, 6.452e-03, 2.488e-02, -9.823e-02, 1.444e-01, -8.021e-02, 5.082e-03, -1.454e-02, 6.573e-03, -5.563e-02, 3.470e-02, 2.968e-02, -1.047e-01, 3.429e-02, -4.992e-02, -2.500e-02)); + r += mul(s1_7, M4(-8.099e-02, -1.564e-03, -1.092e-01, -8.339e-02, -1.682e-01, 1.413e-02, 3.937e-02, -1.882e-02, -3.873e-02, 7.467e-02, 4.002e-02, 3.199e-02, -2.999e-03, 6.933e-02, -5.280e-03, 4.954e-02)); + r += mul(s1_8, M4(1.375e-02, 3.525e-02, 2.171e-02, 5.732e-03, -7.219e-02, 8.751e-03, -2.031e-02, 1.222e-01, 2.149e-02, 1.194e-02, -2.729e-02, -2.466e-02, 8.901e-03, 8.588e-02, -1.194e-01, 4.372e-02)); + r += mul(s2_0, M4(3.096e-02, -1.865e-02, 8.717e-03, -4.015e-02, 1.252e-02, 7.766e-04, -4.010e-02, 2.630e-02, 2.955e-02, 3.460e-02, 4.094e-02, -3.779e-02, 9.973e-02, -8.498e-02, 8.707e-02, 6.292e-02)); + r += mul(s2_1, M4(-5.360e-02, -1.160e-01, 5.175e-02, -1.341e-02, 4.235e-02, -2.533e-02, -6.446e-02, -6.719e-02, -2.407e-02, -7.542e-02, -4.934e-02, -2.011e-02, 2.883e-02, 5.149e-02, 7.340e-02, -1.130e-01)); + r += mul(s2_2, M4(5.325e-02, 2.018e-02, -2.125e-02, -2.671e-02, -1.863e-02, -5.310e-02, -5.639e-02, 2.808e-02, 9.939e-02, -1.404e-02, -6.949e-02, 5.210e-02, -2.614e-02, -6.005e-02, -1.941e-01, 3.242e-02)); + r += mul(s2_3, M4(-2.594e-02, -8.161e-02, -6.955e-02, 3.421e-02, 4.341e-02, -8.308e-02, 1.558e-01, -6.705e-02, 3.444e-02, 1.463e-01, -3.094e-02, 1.097e-01, -1.576e-01, 1.454e-01, -4.296e-02, 4.286e-02)); + r += mul(s2_4, M4(1.169e-01, 1.054e-01, -5.935e-02, 1.220e-01, 1.240e-01, -8.460e-02, 4.287e-02, -3.474e-02, -1.073e-01, 6.625e-02, -1.882e-01, 7.456e-02, 1.547e-01, 3.164e-01, -5.810e-02, -6.758e-03)); + r += mul(s2_5, M4(-4.905e-02, 1.096e-01, 3.387e-02, 1.092e-01, -2.121e-01, 8.539e-03, -9.386e-02, -3.087e-02, -6.711e-02, 1.921e-02, -2.491e-02, 1.355e-01, -2.342e-01, 2.275e-02, -1.088e-01, 3.979e-02)); + r += mul(s2_6, M4(2.875e-02, -3.923e-02, 5.558e-02, 6.616e-02, 2.325e-02, 3.941e-02, 8.712e-02, 4.491e-02, 9.766e-02, 7.752e-03, -4.710e-02, -2.966e-02, -5.660e-03, -5.816e-03, 1.293e-01, 7.674e-02)); + r += mul(s2_7, M4(3.993e-02, 4.968e-02, 3.402e-02, -5.904e-02, -3.339e-02, 4.979e-02, 1.015e-01, 3.036e-02, -6.656e-02, -2.128e-02, -3.100e-02, -1.940e-01, -4.330e-04, 3.630e-02, 2.386e-01, 2.487e-03)); + r += mul(s2_8, M4(-2.667e-03, 4.797e-02, -3.047e-02, 2.558e-02, -4.603e-02, 9.783e-02, 5.871e-03, -1.708e-02, -1.082e-01, 1.274e-01, 2.123e-02, -9.464e-02, 2.113e-02, -2.157e-02, 5.258e-02, 7.613e-02)); + r += mul(s3_0, M4(2.095e-01, -2.461e-01, 2.168e-02, -4.770e-01, -2.994e-02, -7.197e-02, -1.038e-02, 2.126e-01, -5.237e-02, -1.790e-01, 7.885e-02, -2.092e-01, 4.132e-02, -4.664e-03, 2.313e-02, -1.060e-01)); + r += mul(s3_1, M4(1.656e-01, 4.463e-01, 3.476e-01, -6.969e-02, 9.353e-02, -8.283e-02, -4.747e-02, 5.884e-02, -1.672e-01, -2.816e-02, -1.464e-01, 2.782e-02, 8.619e-02, 5.981e-02, -1.124e-02, -2.580e-02)); + r += mul(s3_2, M4(-5.303e-02, -3.766e-02, 7.818e-02, -6.730e-02, 4.065e-02, -8.358e-02, -2.606e-02, -5.207e-02, 3.543e-02, -1.616e-02, -5.832e-02, -4.686e-02, 1.371e-03, 6.335e-04, -1.149e-01, -8.662e-03)); + r += mul(s3_3, M4(-2.392e-02, 4.994e-02, -2.354e-01, -4.360e-01, 2.895e-02, -7.065e-02, 1.539e-01, 1.482e-01, 1.209e-01, 7.392e-02, -3.122e-02, 1.076e-01, -3.736e-02, 2.119e-02, -1.690e-02, 7.682e-02)); + r += mul(s3_4, M4(-5.199e-02, 4.982e-01, -4.490e-02, -3.677e-01, 2.666e-01, 2.291e-01, 2.176e-01, 4.174e-01, -7.979e-02, -1.923e-01, -1.458e-01, 2.467e-01, 1.223e-01, 1.135e-01, -3.184e-02, 7.319e-02)); + r += mul(s3_5, M4(-2.068e-01, 1.166e-01, 1.265e-01, 1.820e-01, -4.397e-02, 1.649e-01, 7.162e-02, -1.500e-01, -9.937e-02, -3.798e-02, -7.080e-02, 4.055e-02, -2.191e-02, -3.392e-02, -9.337e-02, -2.936e-02)); + r += mul(s3_6, M4(8.804e-02, -3.708e-02, 8.988e-02, 5.097e-02, 6.321e-02, -9.117e-02, 5.456e-02, 1.528e-01, 1.286e-01, 1.083e-01, 8.429e-02, 9.675e-02, 7.243e-02, 1.105e-02, -6.020e-02, -1.369e-02)); + r += mul(s3_7, M4(1.798e-02, 7.436e-02, -4.497e-02, 9.345e-02, 1.947e-01, 4.985e-03, 1.264e-01, 3.177e-01, -1.771e-01, -7.012e-02, -6.451e-02, 4.775e-02, -8.161e-03, 7.974e-02, 1.857e-02, 1.192e-02)); + r += mul(s3_8, M4(-1.301e-02, -2.212e-01, -5.738e-02, 1.239e-01, -1.303e-01, -1.040e-02, -1.057e-01, 6.472e-02, -1.315e-01, 5.404e-02, 1.043e-01, -2.273e-02, 4.702e-02, 2.117e-02, -2.228e-02, 7.511e-02)); + r += mul(s4_0, M4(2.086e-02, -1.315e-01, -8.666e-02, -4.610e-02, 8.425e-02, 4.468e-02, -2.225e-03, -7.460e-02, 2.345e-02, 2.699e-02, 1.948e-02, -9.559e-03, 3.792e-02, -1.551e-02, -3.949e-02, -8.766e-02)); + r += mul(s4_1, M4(3.962e-02, -2.166e-01, -5.640e-03, 2.654e-02, -7.836e-03, -9.792e-02, 1.355e-02, 1.633e-02, 9.491e-02, -8.847e-02, 5.093e-02, 7.195e-02, -4.667e-02, -2.085e-01, 9.252e-02, 1.966e-02)); + r += mul(s4_2, M4(4.441e-02, 1.353e-01, 3.278e-02, 9.538e-02, 6.115e-02, 5.018e-02, 1.687e-01, 4.744e-02, -2.649e-02, -6.554e-02, -1.157e-01, -2.995e-02, 7.393e-02, 1.172e-02, -1.931e-02, -2.015e-02)); + r += mul(s4_3, M4(6.047e-02, 1.781e-01, 2.752e-02, 1.275e-01, 2.304e-02, -3.967e-03, -1.261e-01, 1.192e-01, -6.222e-02, -5.305e-02, 2.415e-02, 6.835e-02, -4.043e-02, 1.336e-01, -2.237e-01, 2.707e-01)); + r += mul(s4_4, M4(8.054e-02, -6.908e-02, 9.448e-02, 2.953e-02, 7.394e-02, -5.142e-02, 4.425e-02, 1.082e-01, 1.580e-01, -6.362e-02, 1.552e-02, 2.589e-02, -2.180e-01, 1.362e-01, -1.225e-01, -1.116e-01)); + r += mul(s4_5, M4(3.335e-02, -3.172e-01, 2.059e-01, -9.299e-02, 1.196e-01, -6.233e-02, 1.577e-01, -9.054e-03, -1.706e-01, 6.904e-02, -1.745e-02, -5.810e-02, -1.765e-02, -6.382e-02, 4.746e-02, -1.047e-01)); + r += mul(s4_6, M4(2.181e-01, 2.339e-02, -1.321e-02, 2.600e-02, -6.821e-02, -4.525e-02, 1.166e-01, -1.033e-01, 1.230e-03, -5.596e-02, 4.015e-02, 6.311e-02, 2.021e-01, -1.335e-01, 1.062e-02, -1.941e-01)); + r += mul(s4_7, M4(-1.137e-01, -3.561e-02, 3.031e-03, 3.526e-02, -4.821e-02, 3.615e-02, -3.799e-02, -1.222e-01, -5.634e-02, 9.488e-02, 3.017e-02, 5.925e-03, 1.082e-01, -1.733e-01, -8.112e-02, -2.205e-01)); + r += mul(s4_8, M4(-2.704e-02, 5.156e-02, -3.898e-02, -5.168e-02, 6.469e-02, 9.312e-02, 2.891e-01, 5.176e-02, -7.136e-02, 3.696e-02, 6.148e-02, -1.558e-01, 1.179e-01, -1.230e-01, -2.152e-03, -3.342e-03)); + r += mul(s5_0, M4(3.099e-02, -5.721e-02, -5.837e-02, 3.273e-02, 4.061e-02, 6.728e-02, -6.397e-02, -1.035e-02, -8.410e-03, 1.368e-01, 6.434e-02, -2.079e-01, -7.008e-02, 5.762e-02, 6.261e-02, -2.827e-02)); + r += mul(s5_1, M4(-9.156e-02, 4.647e-02, 3.607e-02, -4.526e-02, 1.105e-02, 1.226e-02, -1.190e-01, -2.288e-02, -3.213e-01, 1.775e-01, -5.493e-02, -4.106e-02, -8.115e-03, -3.186e-02, -5.598e-03, 8.977e-02)); + r += mul(s5_2, M4(-2.462e-02, 5.685e-02, 1.396e-01, 1.148e-01, 9.165e-02, -2.105e-02, -1.515e-02, -2.034e-02, 2.218e-01, 5.889e-01, -2.518e-01, -2.986e-01, 1.975e-02, 1.469e-02, 4.941e-02, 8.084e-02)); + r += mul(s5_3, M4(-1.178e-02, -2.493e-02, 7.091e-02, -1.486e-01, -5.103e-02, 1.093e-01, -5.768e-02, 1.968e-01, -1.203e-01, 1.732e-01, -9.788e-02, 8.279e-02, -1.132e-01, 1.107e-01, 5.348e-02, 1.640e-01)); + r += mul(s5_4, M4(1.597e-01, 1.346e-01, 8.000e-02, -6.103e-02, 5.216e-02, 1.608e-02, 1.724e-02, -1.406e-01, 1.497e-02, 1.972e-02, -4.815e-01, 3.832e-02, 2.854e-03, 5.729e-02, 3.926e-02, 1.587e-01)); + r += mul(s5_5, M4(-1.130e-01, 5.619e-02, 4.022e-02, 7.986e-02, 3.390e-02, -5.944e-02, -7.646e-02, -8.342e-02, -2.934e-01, 5.609e-02, -9.369e-02, -2.271e-01, 1.012e-01, 2.281e-02, 3.580e-02, 3.448e-02)); + r += mul(s5_6, M4(-8.420e-02, 1.951e-02, -6.040e-02, 3.208e-02, -2.316e-02, 4.509e-02, -4.663e-03, 4.133e-02, 9.793e-02, -1.538e-01, -1.707e-01, 3.171e-02, -1.908e-02, -1.000e-01, -1.606e-02, 6.926e-02)); + r += mul(s5_7, M4(1.478e-02, 1.569e-01, 3.403e-02, -1.223e-01, -2.366e-05, 1.023e-01, -7.845e-02, -4.515e-02, -1.406e-01, 5.345e-02, 1.959e-02, 6.173e-02, -1.724e-01, -2.036e-01, 9.326e-02, 6.862e-03)); + r += mul(s5_8, M4(1.094e-02, 1.636e-01, 7.616e-02, 2.121e-02, 3.014e-02, -3.134e-02, -1.714e-01, 2.918e-02, 1.450e-02, 1.552e-01, 1.083e-02, -1.165e-01, 1.077e-01, 3.238e-02, 1.702e-02, 4.353e-04)); + r += mul(s6_0, M4(5.214e-02, -2.078e-02, 3.474e-01, -3.284e-02, 5.659e-02, 1.059e-02, 3.253e-02, 2.559e-02, -8.278e-02, 3.339e-02, -4.612e-02, 2.310e-02, -7.745e-03, 8.922e-03, -6.390e-03, 3.328e-02)); + r += mul(s6_1, M4(2.072e-01, 1.430e-01, 1.083e-01, 3.261e-01, -1.827e-02, 4.296e-02, -4.245e-02, -1.057e-01, -6.319e-04, 1.182e-01, 7.292e-02, 1.245e-02, 4.914e-02, 1.210e-01, -1.695e-02, -1.774e-02)); + r += mul(s6_2, M4(7.776e-02, 1.307e-01, 2.705e-01, 2.352e-01, -2.479e-02, -4.149e-03, -4.609e-02, 5.144e-02, 1.693e-02, 5.089e-03, 3.080e-02, -1.144e-01, -7.009e-02, 7.680e-02, 5.151e-02, -1.967e-02)); + r += mul(s6_3, M4(-7.626e-02, -9.855e-02, 1.796e-01, -1.230e-01, 1.587e-01, 2.963e-02, -2.519e-02, 7.055e-02, 3.005e-02, 1.080e-01, -5.835e-02, 6.189e-03, 5.797e-02, -1.002e-01, -3.244e-02, 1.482e-02)); + r += mul(s6_4, M4(1.329e-02, -1.075e-01, -4.617e-02, -3.313e-01, -7.339e-02, -1.085e-01, 3.176e-02, -1.469e-01, 4.723e-02, 4.985e-02, -1.166e-01, -5.758e-02, 5.751e-02, -1.193e-01, -1.725e-01, -7.011e-02)); + r += mul(s6_5, M4(8.138e-02, -7.430e-02, 6.983e-02, -2.314e-01, -7.402e-03, -2.964e-04, 4.611e-02, 4.329e-03, 8.225e-02, 5.095e-02, -2.180e-02, 6.560e-02, 5.276e-03, -6.051e-02, 1.080e-02, -1.288e-01)); + r += mul(s6_6, M4(8.902e-02, 1.368e-02, 1.088e-01, 1.724e-01, -3.034e-02, 2.497e-02, 2.480e-02, 1.805e-01, 5.329e-02, -1.677e-02, -4.939e-02, -8.117e-02, -5.250e-02, 6.730e-03, -1.006e-02, -1.544e-01)); + r += mul(s6_7, M4(-7.771e-02, 5.097e-02, 1.006e-01, 3.524e-01, 9.951e-04, -6.529e-02, -1.195e-01, -3.114e-02, 4.553e-02, -1.189e-01, -4.951e-02, 1.090e-01, 1.217e-03, 2.041e-02, -6.306e-02, -1.119e-01)); + r += mul(s6_8, M4(1.128e-03, 5.455e-02, 1.584e-01, 6.287e-03, 1.108e-02, 1.749e-02, 5.138e-02, 1.153e-01, 6.400e-02, -9.560e-02, 7.986e-02, -2.861e-02, 5.823e-02, -6.431e-03, -2.695e-02, -4.033e-02)); + r += mul(s7_0, M4(8.310e-02, -5.055e-02, -7.900e-02, -6.566e-02, 8.914e-02, -9.624e-02, -9.637e-02, -3.749e-02, -7.631e-02, -1.133e-01, -1.827e-02, -1.260e-01, -6.900e-02, -1.341e-02, 5.361e-02, -1.120e-01)); + r += mul(s7_1, M4(-1.704e-02, 1.459e-02, 2.970e-02, -4.414e-02, -6.875e-03, -2.423e-01, -1.020e-01, 3.640e-02, -1.879e-01, 1.759e-02, 9.057e-02, 5.173e-02, -4.979e-02, 1.295e-01, 1.190e-01, -7.415e-02)); + r += mul(s7_2, M4(-1.018e-01, -3.924e-03, 5.875e-02, 9.527e-02, -5.239e-02, -6.705e-02, 1.451e-03, 1.266e-01, -2.502e-02, 1.718e-02, 1.324e-01, 1.751e-01, 4.871e-02, -9.750e-02, 2.023e-01, 5.529e-02)); + r += mul(s7_3, M4(-6.367e-02, -3.484e-02, -1.307e-01, 6.098e-02, -8.726e-02, -1.461e-03, 7.463e-02, -5.403e-02, -1.290e-02, -3.601e-02, 7.717e-02, -1.176e-01, -2.167e-01, -1.133e-01, 1.845e-01, 5.091e-02)); + r += mul(s7_4, M4(6.423e-03, 9.665e-02, -1.285e-02, -1.315e-01, -8.173e-02, -1.755e-01, -3.384e-02, -1.216e-01, -4.928e-02, 9.972e-02, -1.378e-01, -7.535e-02, -1.835e-01, 3.639e-01, -1.728e-01, 1.934e-02)); + r += mul(s7_5, M4(2.944e-02, 2.167e-02, -1.027e-01, 2.432e-02, -1.126e-01, -2.286e-01, 1.568e-01, -1.957e-03, 5.846e-02, 1.433e-01, 6.377e-02, -1.225e-02, 1.019e-01, -1.250e-01, 2.458e-01, 1.341e-01)); + r += mul(s7_6, M4(1.512e-02, -2.239e-02, -6.181e-02, -1.150e-01, -5.383e-02, 6.533e-02, 2.561e-02, -8.978e-02, -7.917e-02, -7.980e-03, 9.308e-02, -1.176e-02, -1.974e-01, 6.040e-02, 1.649e-01, -1.820e-01)); + r += mul(s7_7, M4(7.647e-03, -6.300e-03, -9.134e-02, 2.675e-02, 1.495e-01, 8.440e-04, -3.061e-02, -1.085e-01, 7.143e-03, -1.213e-01, -1.140e-01, -2.759e-02, 1.342e-01, 1.622e-01, 1.045e-01, -2.112e-01)); + r += mul(s7_8, M4(6.856e-02, -3.692e-02, -3.924e-02, 8.596e-03, -1.565e-01, -6.137e-03, 1.368e-01, 1.838e-01, 9.995e-03, -7.011e-02, 1.033e-01, -9.436e-02, -2.162e-02, -1.315e-01, 2.524e-01, 1.174e-02)); + r += V4(-3.934e-03, -2.827e-03, 2.210e-02, 1.100e-02); + return r; +} + +void Pass10(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 11 +//!DESC conv10 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-4.830e-02, -8.822e-02, -6.993e-02, 1.594e-02, 3.621e-02, 9.357e-02, -1.050e-01, 9.606e-02, -2.030e-01, -1.800e-01, -7.986e-02, 5.909e-02, -7.205e-02, 2.962e-03, 7.652e-03, 4.839e-02)); + r += mul(s0_1, M4(2.639e-02, -8.383e-02, 3.581e-02, -5.131e-02, -1.163e-01, -8.054e-02, 1.334e-01, -5.472e-02, -5.489e-02, -2.548e-01, -2.275e-02, 9.308e-02, 2.770e-02, -1.546e-01, 3.333e-02, 4.518e-02)); + r += mul(s0_2, M4(2.327e-02, -1.763e-02, -3.695e-02, -6.094e-02, 1.823e-02, -2.793e-02, -3.985e-03, 1.542e-02, 1.257e-01, -2.132e-01, 5.117e-02, 1.349e-01, -3.077e-01, -3.700e-03, -1.768e-01, 6.746e-02)); + r += mul(s0_3, M4(-6.500e-03, 1.339e-01, 1.237e-02, 8.131e-02, 8.515e-03, 1.218e-01, 5.627e-02, -1.326e-01, -3.635e-02, -5.367e-02, 1.512e-03, 1.246e-01, -1.685e-01, -4.678e-02, 4.310e-02, 8.479e-02)); + r += mul(s0_4, M4(4.810e-02, -2.654e-02, 9.668e-02, 3.588e-02, 2.012e-02, -1.805e-02, -4.505e-02, 1.030e-01, -2.129e-01, -1.533e-01, -2.805e-02, -5.564e-02, -1.120e-01, -9.410e-03, -9.620e-02, 1.147e-01)); + r += mul(s0_5, M4(9.775e-02, 7.503e-02, 9.817e-02, -1.275e-02, 9.842e-02, 5.709e-02, 2.653e-02, 2.764e-01, 2.105e-01, -2.408e-01, -1.928e-02, 1.570e-01, -1.637e-01, -1.838e-01, 2.108e-02, 1.434e-01)); + r += mul(s0_6, M4(1.222e-01, 9.284e-02, -2.491e-02, -4.565e-02, -5.174e-02, -2.526e-02, 4.163e-02, -4.810e-02, 1.309e-01, 7.007e-02, -1.747e-02, 6.446e-02, -1.266e-01, 6.643e-02, -9.471e-02, -4.707e-02)); + r += mul(s0_7, M4(4.659e-03, -2.968e-02, 2.761e-02, 2.542e-02, -1.407e-02, -7.931e-03, -6.463e-02, -2.256e-02, -4.864e-02, -8.343e-02, 1.271e-01, 7.443e-03, 2.849e-02, 3.836e-02, -1.504e-01, -1.918e-02)); + r += mul(s0_8, M4(-5.559e-02, 3.812e-02, -6.654e-02, 2.816e-02, 9.087e-02, 3.775e-02, -5.950e-02, -9.644e-02, 1.381e-01, 3.020e-02, 8.428e-02, 6.284e-02, 1.957e-02, 1.963e-01, -1.774e-02, -8.771e-02)); + r += mul(s1_0, M4(3.828e-02, -2.100e-02, -6.482e-02, -3.354e-03, -1.010e-01, -2.989e-02, 5.974e-03, 6.596e-02, -1.756e-02, -2.670e-02, 1.149e-02, 7.257e-02, 8.638e-02, 3.747e-02, 1.499e-01, -5.391e-02)); + r += mul(s1_1, M4(-9.688e-02, 3.194e-02, 2.861e-02, -3.458e-02, 1.374e-01, 1.100e-01, 1.814e-01, -2.460e-03, 6.353e-02, 1.382e-01, -3.290e-02, 6.976e-02, 4.006e-02, -4.895e-03, 9.192e-02, 2.136e-02)); + r += mul(s1_2, M4(-1.040e-01, -2.338e-02, -6.577e-02, 6.761e-02, -7.097e-02, -8.443e-02, 1.492e-01, 3.257e-02, -3.813e-02, 8.250e-03, 4.018e-03, -3.037e-03, -5.739e-02, 7.913e-02, 9.387e-02, -6.153e-03)); + r += mul(s1_3, M4(-9.678e-02, -4.993e-02, 1.814e-02, 1.246e-01, 4.507e-02, -5.729e-02, -6.812e-02, 2.663e-02, 5.196e-02, 1.187e-01, -3.827e-02, -1.047e-01, -4.993e-02, -4.158e-02, 1.339e-01, -7.618e-03)); + r += mul(s1_4, M4(7.790e-02, 1.260e-01, 1.135e-01, 2.632e-01, -1.092e-01, -1.550e-02, 1.861e-01, -7.397e-02, -8.339e-02, -1.461e-02, 1.268e-01, 8.259e-02, -1.115e-01, 4.943e-03, 3.752e-02, 2.763e-01)); + r += mul(s1_5, M4(-1.888e-01, 1.706e-01, 7.704e-02, 1.294e-01, 8.372e-02, -4.005e-03, -9.525e-02, -4.460e-02, 5.904e-02, 1.120e-01, -1.089e-01, 1.037e-01, 6.043e-02, -1.171e-01, -1.444e-02, 1.108e-02)); + r += mul(s1_6, M4(5.883e-02, -3.481e-02, 4.111e-02, -1.202e-01, -2.393e-02, -2.932e-02, 3.660e-03, 3.720e-02, 1.081e-01, 7.342e-02, -6.897e-02, 2.116e-02, 1.964e-01, -2.016e-02, 1.181e-02, -1.616e-01)); + r += mul(s1_7, M4(-1.357e-01, -1.062e-02, -1.336e-01, -6.073e-02, -9.438e-02, -8.379e-02, 8.636e-02, 1.988e-01, -2.967e-02, 2.383e-02, 9.142e-02, -1.144e-01, 5.726e-02, 3.158e-02, -6.711e-02, -7.534e-02)); + r += mul(s1_8, M4(-2.777e-01, -3.806e-02, 1.198e-02, -1.016e-02, -5.348e-02, -1.018e-01, 7.266e-02, 7.254e-03, 4.831e-02, 3.528e-02, -6.299e-02, -1.290e-01, -6.714e-02, 3.922e-02, 5.039e-03, 7.812e-02)); + r += mul(s2_0, M4(7.907e-02, -1.144e-02, 6.876e-03, 3.150e-02, 8.083e-02, -4.558e-02, 2.990e-02, -9.142e-02, -4.976e-02, 4.591e-02, -3.455e-02, -4.650e-02, 1.029e-02, 7.486e-02, 1.384e-01, 8.629e-02)); + r += mul(s2_1, M4(-3.017e-02, -6.396e-02, -2.038e-01, 8.795e-02, -5.005e-02, -7.113e-02, 6.117e-02, 6.995e-02, 6.845e-02, 2.185e-01, 1.031e-01, -7.041e-02, -1.091e-01, -2.143e-02, -6.323e-02, 5.283e-02)); + r += mul(s2_2, M4(-1.305e-01, -5.319e-02, -1.015e-02, 8.036e-02, -3.181e-02, -7.211e-02, -1.132e-01, -6.489e-02, -9.395e-02, -1.447e-02, -2.309e-02, 1.443e-02, -6.069e-03, 8.539e-02, -8.914e-03, 1.129e-02)); + r += mul(s2_3, M4(4.047e-02, 1.181e-01, -7.255e-02, -5.920e-02, -1.926e-03, 1.544e-01, -2.920e-02, 2.937e-02, -9.288e-02, 6.596e-02, -1.287e-02, 1.706e-01, -1.099e-02, -4.462e-02, 1.277e-01, 6.293e-02)); + r += mul(s2_4, M4(-1.109e-01, 1.195e-01, -2.507e-01, -5.579e-02, -3.435e-02, -1.160e-01, -2.351e-01, -5.408e-02, -2.173e-01, 5.764e-02, 4.518e-02, -1.061e-01, -9.047e-02, -3.664e-01, -1.069e-02, 1.379e-01)); + r += mul(s2_5, M4(-6.895e-02, 6.064e-02, 1.544e-01, 4.203e-02, -3.691e-02, 6.640e-03, -4.824e-02, -1.499e-01, 3.623e-02, -1.206e-01, -1.845e-02, 9.345e-02, 4.797e-02, -1.096e-01, 2.049e-02, 7.366e-02)); + r += mul(s2_6, M4(7.105e-02, 2.269e-02, -1.065e-01, -4.301e-02, 6.915e-02, 2.761e-02, -1.506e-01, 1.086e-03, -3.163e-02, -3.753e-02, 6.290e-02, -1.198e-02, -6.298e-02, -6.580e-02, 6.394e-02, 2.602e-01)); + r += mul(s2_7, M4(-1.115e-01, -4.285e-02, -9.923e-03, -1.589e-02, -2.703e-01, 2.065e-02, 5.478e-02, -1.091e-01, 4.712e-02, 8.844e-02, -1.995e-01, -2.274e-01, -1.262e-01, -1.413e-01, 1.099e-01, -1.430e-01)); + r += mul(s2_8, M4(1.737e-02, -3.458e-03, 1.451e-02, -2.577e-02, 7.890e-02, -1.703e-02, -7.593e-02, 7.170e-02, -7.863e-02, -8.196e-03, 6.095e-02, 1.259e-01, -3.556e-02, -3.050e-02, 2.776e-03, 7.601e-02)); + r += mul(s3_0, M4(-1.945e-01, -1.678e-01, -4.334e-02, 1.725e-02, 1.183e-01, -2.776e-02, -1.773e-02, -3.394e-02, -1.384e-02, 3.826e-02, -6.860e-02, 2.523e-02, -4.153e-02, 7.668e-02, -1.258e-02, 1.143e-02)); + r += mul(s3_1, M4(1.460e-01, -1.318e-01, 2.057e-01, -1.582e-01, -1.335e-01, 7.058e-02, 1.566e-02, -5.043e-02, 3.790e-02, 5.916e-02, 6.523e-03, -1.478e-01, 4.719e-02, 3.452e-02, 4.558e-02, -2.316e-02)); + r += mul(s3_2, M4(-1.304e-01, -1.636e-01, 8.311e-02, -6.273e-02, 1.521e-01, 2.406e-04, -5.286e-02, -1.113e-02, 4.382e-02, 1.461e-03, -1.054e-01, 2.726e-02, -2.819e-02, 8.521e-02, 4.887e-03, -8.432e-02)); + r += mul(s3_3, M4(-5.002e-02, -4.256e-02, 1.073e-01, 3.849e-02, 3.163e-02, -1.599e-01, -6.356e-02, -9.985e-03, -2.242e-02, -4.897e-02, -1.111e-01, 4.739e-02, 7.998e-02, -3.330e-04, 7.776e-02, 3.637e-02)); + r += mul(s3_4, M4(-1.776e-02, -9.190e-03, 3.079e-01, 6.996e-02, 7.989e-03, -1.833e-01, -1.111e-01, 5.754e-02, -2.713e-02, -3.977e-02, 3.147e-02, -4.155e-02, 3.760e-01, -2.202e-01, -6.909e-02, 9.248e-02)); + r += mul(s3_5, M4(-2.031e-01, 7.541e-02, -1.677e-02, -1.225e-01, 5.202e-02, 1.734e-02, 2.679e-02, -8.093e-04, -2.525e-03, -5.872e-02, 1.113e-02, 9.559e-02, 4.538e-02, -4.660e-02, -1.257e-01, -2.602e-02)); + r += mul(s3_6, M4(-3.247e-02, -5.548e-02, 2.158e-01, 6.249e-02, 1.102e-01, -3.439e-02, 6.277e-02, 6.411e-03, 3.234e-02, 4.465e-02, -7.207e-03, 6.141e-02, 5.406e-02, 1.072e-01, -1.963e-02, -3.139e-02)); + r += mul(s3_7, M4(-2.096e-01, -1.077e-01, 7.613e-02, 1.057e-01, -1.232e-01, 7.859e-02, -7.996e-02, 3.703e-02, -2.961e-03, 8.288e-02, -5.762e-02, -9.681e-02, -1.464e-02, 6.819e-02, 4.520e-02, 1.719e-01)); + r += mul(s3_8, M4(-8.420e-02, -1.095e-01, 1.215e-02, 3.436e-02, 6.192e-02, 3.912e-02, 7.981e-02, 3.137e-01, 2.237e-02, 2.784e-02, -4.654e-02, 8.559e-03, -8.479e-02, 1.238e-02, 1.205e-02, -3.051e-03)); + r += mul(s4_0, M4(1.511e-02, -9.252e-03, 5.790e-03, 3.753e-02, 7.565e-02, -9.476e-02, 1.589e-02, -6.392e-02, 3.726e-02, 6.833e-02, -4.575e-03, -1.903e-02, 9.657e-03, 8.578e-03, 2.505e-02, -7.624e-03)); + r += mul(s4_1, M4(3.406e-02, 5.978e-02, -4.693e-02, 7.297e-03, -7.254e-02, 9.246e-02, 9.101e-02, -9.069e-03, 9.585e-02, -5.271e-02, -5.087e-02, -6.168e-02, -9.070e-03, -8.431e-02, 2.675e-02, 9.518e-02)); + r += mul(s4_2, M4(2.053e-02, 4.954e-02, 4.810e-02, -7.866e-02, -4.082e-02, -4.793e-02, 6.687e-03, -1.911e-02, 2.363e-02, 1.289e-02, 2.680e-02, -6.178e-02, -1.386e-02, 4.961e-03, -4.103e-02, -2.998e-02)); + r += mul(s4_3, M4(1.265e-01, 6.002e-02, -5.993e-02, 2.239e-02, 4.585e-02, -7.239e-02, 8.562e-03, -5.115e-02, -3.372e-02, 1.246e-01, 3.284e-02, -7.553e-03, -8.669e-03, 2.122e-02, 6.986e-03, -5.931e-02)); + r += mul(s4_4, M4(3.469e-02, -1.594e-03, 2.107e-03, -2.065e-01, 3.869e-02, -4.408e-02, -3.861e-02, 1.545e-02, -1.170e-02, 9.890e-02, -6.240e-02, -6.230e-02, -2.656e-02, -9.663e-02, 9.523e-02, 4.473e-02)); + r += mul(s4_5, M4(1.138e-02, 6.733e-03, -1.154e-01, -4.058e-02, 4.270e-02, -2.085e-03, 2.183e-03, 1.249e-01, 1.701e-01, -5.283e-02, -4.708e-02, -5.570e-02, -5.821e-02, -1.068e-01, 1.369e-01, 1.492e-01)); + r += mul(s4_6, M4(-2.277e-02, -1.163e-01, -1.232e-02, 4.009e-03, -3.210e-03, -1.049e-01, 2.828e-02, 2.007e-02, 1.880e-01, 1.803e-01, 1.202e-01, -4.261e-03, -8.411e-03, -6.796e-02, 4.724e-02, -1.382e-01)); + r += mul(s4_7, M4(-1.065e-01, -2.180e-03, -7.782e-02, -1.642e-02, -3.885e-02, -1.393e-02, -3.343e-02, -5.628e-02, 1.699e-01, -6.423e-02, 9.618e-02, -9.226e-02, -1.071e-01, 2.686e-02, -9.786e-03, -1.699e-01)); + r += mul(s4_8, M4(3.426e-03, -4.191e-02, 2.161e-02, 1.556e-01, -1.128e-02, 1.101e-01, -4.549e-02, -1.002e-02, 8.034e-02, 2.625e-02, -2.265e-02, -5.273e-02, 3.267e-02, 2.136e-02, 1.819e-03, -8.445e-03)); + r += mul(s5_0, M4(-1.543e-01, -7.893e-02, -1.042e-01, -4.412e-02, -3.244e-02, 3.192e-03, 1.214e-01, -1.500e-02, -3.626e-02, 1.581e-02, 7.494e-03, 5.572e-02, -4.054e-03, 1.962e-02, -1.363e-01, 7.033e-02)); + r += mul(s5_1, M4(-2.474e-02, 3.145e-02, 5.622e-02, -6.296e-02, 5.492e-02, -4.185e-02, 7.467e-02, 5.331e-02, -2.310e-02, 2.976e-02, 5.042e-02, -5.251e-03, 1.195e-01, -6.692e-02, 6.893e-02, -7.826e-02)); + r += mul(s5_2, M4(2.109e-02, -5.746e-03, 5.626e-02, -1.174e-01, -1.033e-01, -2.948e-02, -1.530e-01, 9.934e-02, -6.408e-03, -1.846e-02, -4.852e-02, 4.990e-02, 1.627e-01, 4.525e-02, -6.003e-03, 1.175e-01)); + r += mul(s5_3, M4(-6.157e-02, -6.800e-02, 2.204e-02, 2.086e-02, -1.796e-01, 5.360e-02, -7.831e-02, 8.739e-02, -5.539e-02, 7.619e-02, 4.859e-02, -3.974e-02, 4.414e-02, 3.187e-02, -4.088e-02, -5.784e-02)); + r += mul(s5_4, M4(1.870e-01, 1.430e-01, 6.715e-02, -3.192e-01, -8.249e-03, -4.895e-02, -1.775e-01, -1.211e-02, 5.246e-02, -1.028e-01, -1.829e-01, -2.604e-02, 8.037e-02, -5.044e-02, -8.484e-03, -5.692e-02)); + r += mul(s5_5, M4(7.599e-02, 7.251e-02, -1.234e-01, 3.917e-02, -6.220e-02, -3.332e-02, -1.971e-01, -4.996e-03, -7.553e-02, 3.844e-02, -1.313e-01, -5.345e-02, -6.988e-02, -9.938e-02, 1.562e-01, -7.072e-02)); + r += mul(s5_6, M4(8.148e-02, 8.224e-02, 1.979e-01, 1.761e-01, -2.081e-01, -7.891e-03, 2.253e-02, -5.107e-02, -1.033e-01, -1.002e-01, 1.686e-02, 1.496e-02, 4.147e-02, -3.550e-02, 1.365e-03, -8.448e-02)); + r += mul(s5_7, M4(2.520e-02, -1.215e-02, -7.120e-02, 1.434e-01, 5.238e-02, -4.046e-02, 2.844e-02, 8.513e-02, -5.495e-02, -1.260e-01, -7.741e-02, -5.583e-02, -4.865e-02, 5.129e-02, -6.376e-02, -6.701e-02)); + r += mul(s5_8, M4(-4.859e-02, 2.361e-02, 1.190e-01, 1.642e-01, -9.418e-02, -1.948e-02, -1.088e-01, -5.529e-02, 6.670e-02, -7.794e-02, 7.296e-02, -2.764e-03, 1.796e-01, 1.090e-02, -2.495e-02, 4.582e-02)); + r += mul(s6_0, M4(-5.357e-02, -1.255e-02, 2.167e-02, 2.385e-02, -1.343e-01, -9.890e-02, 1.080e-01, -3.771e-02, 1.413e-02, 1.873e-02, 6.132e-02, -3.476e-02, -1.697e-02, 1.685e-01, -3.233e-02, 1.537e-01)); + r += mul(s6_1, M4(-7.470e-02, -1.743e-02, -1.708e-02, 1.016e-01, -2.885e-02, 2.759e-01, 8.246e-02, 8.111e-02, 2.180e-02, 1.975e-01, 4.139e-02, -5.838e-02, 2.880e-01, 3.660e-02, 1.437e-01, -2.724e-02)); + r += mul(s6_2, M4(2.805e-02, 4.007e-02, 3.674e-02, 9.905e-02, 1.284e-02, -1.238e-01, -5.328e-02, -1.677e-01, 2.169e-02, -6.419e-03, 2.584e-02, -5.450e-02, -9.392e-02, 1.529e-01, -1.063e-02, -4.322e-02)); + r += mul(s6_3, M4(-5.460e-02, -4.692e-02, -6.811e-02, -9.443e-02, -1.052e-01, 5.419e-02, 6.251e-02, -1.415e-01, 9.546e-02, -6.588e-02, -5.084e-02, 9.510e-02, 1.441e-01, 1.078e-01, 8.965e-02, -3.965e-02)); + r += mul(s6_4, M4(1.503e-01, -1.248e-01, 1.122e-01, -3.047e-02, -3.949e-02, -1.048e-01, -5.012e-02, -4.756e-02, 8.160e-02, -1.789e-01, 3.578e-02, 1.013e-01, 3.092e-02, 3.357e-02, -1.165e-02, 2.127e-01)); + r += mul(s6_5, M4(1.087e-01, -3.543e-02, -4.847e-03, 1.176e-03, -1.534e-03, -6.062e-02, 3.015e-02, -2.010e-02, -6.407e-02, -3.185e-02, 9.524e-02, 1.449e-01, -1.586e-01, 9.769e-02, -4.502e-02, -2.382e-02)); + r += mul(s6_6, M4(-7.326e-02, -1.414e-02, 8.860e-02, -1.082e-01, -1.479e-01, -5.904e-02, -3.166e-03, -3.973e-02, -1.012e-02, -2.806e-02, 5.465e-02, 9.396e-02, -6.241e-02, 6.763e-02, 4.198e-02, -9.115e-03)); + r += mul(s6_7, M4(9.795e-02, 1.486e-01, -4.049e-02, -2.027e-01, 3.147e-02, 2.180e-02, -5.846e-02, 6.627e-02, -9.838e-02, 1.340e-03, 5.795e-02, -3.384e-02, 5.155e-03, 1.505e-01, 1.303e-01, 9.453e-02)); + r += mul(s6_8, M4(7.614e-02, -5.840e-02, 7.337e-02, 9.552e-03, 6.193e-02, -6.492e-02, -3.637e-02, -1.006e-02, -9.000e-02, 5.542e-02, -1.137e-01, -1.461e-01, 9.594e-02, -4.110e-02, 2.555e-01, 1.538e-01)); + r += mul(s7_0, M4(-4.090e-02, -1.584e-01, 6.009e-03, 4.241e-02, -7.851e-02, -1.891e-02, -1.294e-01, 7.073e-02, -6.965e-02, -1.393e-01, -3.263e-02, 4.283e-02, -2.880e-02, -8.045e-02, -3.110e-02, -3.723e-02)); + r += mul(s7_1, M4(7.501e-02, 1.715e-01, -1.412e-01, -5.020e-02, -7.131e-02, 1.504e-02, -1.315e-01, 7.303e-02, 5.994e-02, 6.750e-04, -1.436e-01, -7.401e-02, 8.432e-02, -3.126e-02, 4.516e-02, 2.611e-02)); + r += mul(s7_2, M4(-9.791e-02, -3.010e-02, 3.730e-02, 2.211e-01, 9.011e-02, -2.170e-02, -1.503e-01, -1.351e-03, -3.374e-02, -1.586e-01, 4.537e-02, -4.016e-02, 3.440e-02, 8.301e-02, -2.345e-02, -1.047e-01)); + r += mul(s7_3, M4(4.726e-03, -1.440e-01, -1.918e-02, -6.087e-02, -2.039e-02, 1.680e-01, 7.678e-02, 3.246e-02, -9.188e-02, -1.428e-01, -7.054e-02, -4.589e-02, 1.097e-01, 4.248e-02, 9.779e-02, -5.377e-02)); + r += mul(s7_4, M4(1.140e-01, 7.803e-02, -1.629e-01, 3.934e-02, 1.460e-01, -8.277e-02, -2.556e-01, 2.779e-02, 1.764e-01, -5.843e-02, -1.032e-01, 6.965e-02, -1.226e-01, -1.023e-01, -3.626e-02, 1.119e-01)); + r += mul(s7_5, M4(2.249e-01, -2.232e-01, -4.929e-02, 6.349e-02, -4.084e-02, 1.465e-02, -1.406e-01, 1.851e-02, 2.453e-02, -7.126e-02, 1.207e-02, 9.391e-02, -4.615e-02, 1.280e-01, -4.366e-02, -4.766e-02)); + r += mul(s7_6, M4(-9.260e-02, 4.395e-02, -1.591e-02, 1.160e-01, -4.981e-02, -3.446e-03, 1.558e-02, -7.517e-03, 9.090e-03, -1.203e-01, -1.635e-02, 3.306e-03, -8.822e-02, -1.986e-03, 4.776e-03, 7.837e-02)); + r += mul(s7_7, M4(-1.941e-01, -1.647e-01, -1.748e-01, -2.672e-01, 1.248e-01, 6.852e-02, -1.307e-01, 2.776e-02, -2.553e-02, 8.394e-02, 7.780e-02, -4.222e-02, -1.564e-01, 5.224e-02, -8.394e-02, -9.676e-03)); + r += mul(s7_8, M4(-3.461e-02, -9.314e-03, -7.496e-02, 1.333e-01, 6.383e-02, -6.956e-03, 5.022e-02, 1.759e-01, 6.323e-02, 7.062e-02, -2.913e-03, -5.013e-02, -1.868e-02, -1.214e-01, 4.730e-02, 8.127e-02)); + r += V4(-9.248e-03, 2.579e-02, -3.134e-03, 9.669e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.663e-02, 3.573e-02, 3.160e-02, 3.179e-02, -1.139e-02, -2.377e-01, 2.398e-02, -6.937e-02, 8.542e-02, 1.260e-02, 5.305e-02, -1.027e-01, -2.232e-02, -3.775e-02, -1.290e-01, 1.404e-02)); + r += mul(s0_1, M4(4.846e-02, 9.449e-02, 1.311e-01, 3.851e-02, -1.737e-02, -2.450e-01, 6.982e-02, -7.079e-02, -1.920e-02, -1.712e-01, 1.282e-01, -2.338e-02, -4.105e-02, -1.808e-01, 2.626e-01, 1.768e-01)); + r += mul(s0_2, M4(-1.921e-02, -1.367e-02, 1.222e-01, 4.826e-02, 5.288e-02, -3.231e-02, -1.508e-01, -6.038e-02, -1.523e-02, 4.821e-02, 3.348e-03, 4.788e-02, 5.726e-02, -5.976e-02, 1.916e-01, 1.611e-01)); + r += mul(s0_3, M4(8.216e-03, 1.343e-01, 9.327e-02, 5.472e-02, 7.263e-02, -4.809e-02, -7.638e-02, -7.174e-02, -5.490e-02, 2.225e-01, -1.328e-01, -3.526e-02, 1.267e-02, 2.465e-02, -5.855e-02, -2.627e-02)); + r += mul(s0_4, M4(-1.407e-01, -8.979e-02, 1.193e-01, -1.609e-01, -5.270e-02, 5.800e-02, -1.368e-01, 2.581e-03, 1.552e-01, -1.095e-01, -7.026e-02, 9.304e-02, -3.085e-02, -4.825e-03, -3.428e-01, 2.084e-01)); + r += mul(s0_5, M4(-1.373e-01, 4.969e-02, -1.441e-01, 9.402e-02, 1.693e-01, -1.024e-01, -1.389e-01, -8.313e-02, 1.894e-01, -1.958e-01, 1.754e-01, 7.405e-02, 1.316e-01, -2.585e-01, -4.974e-02, 2.022e-01)); + r += mul(s0_6, M4(-2.163e-02, 5.919e-02, -1.898e-02, -3.715e-02, -1.390e-02, -2.874e-02, -4.194e-02, -5.698e-02, 1.136e-01, 8.847e-02, -2.969e-01, -2.920e-02, 9.156e-02, 1.106e-01, -1.933e-02, 6.158e-02)); + r += mul(s0_7, M4(-3.552e-02, 1.153e-01, -1.916e-02, -6.388e-02, -3.472e-02, -6.423e-02, 3.745e-02, 9.731e-02, 2.254e-02, 8.993e-02, -2.307e-01, -9.372e-03, 1.145e-01, 4.845e-02, -6.438e-02, 1.110e-01)); + r += mul(s0_8, M4(6.302e-02, 2.867e-02, 1.221e-01, 4.384e-02, 5.529e-02, -6.174e-02, -2.728e-02, -1.700e-02, -8.450e-03, 1.797e-01, -1.562e-01, -4.187e-02, 8.052e-02, -2.005e-01, 1.629e-01, -3.207e-02)); + r += mul(s1_0, M4(-9.700e-02, -1.379e-02, -5.259e-02, 4.906e-02, 7.132e-02, -9.649e-02, -6.448e-03, -3.662e-02, 7.594e-04, 1.604e-02, 5.802e-03, 2.184e-04, -1.599e-02, 1.653e-02, 3.313e-02, -4.064e-02)); + r += mul(s1_1, M4(1.299e-01, 1.212e-02, -1.715e-01, -1.813e-02, 1.272e-01, 2.094e-02, -1.047e-01, 1.227e-01, 7.166e-02, -2.531e-02, -2.501e-02, -6.219e-02, -3.725e-02, -1.393e-02, 2.518e-03, 5.498e-03)); + r += mul(s1_2, M4(1.017e-01, -1.189e-01, 1.943e-02, -1.318e-03, -7.588e-02, 1.398e-01, 9.943e-02, -2.078e-02, -1.502e-02, -2.359e-02, -3.587e-02, 4.861e-02, 3.589e-02, 9.188e-02, 3.024e-02, -5.091e-02)); + r += mul(s1_3, M4(4.126e-02, -2.310e-01, 1.046e-01, 2.332e-01, -7.066e-02, -6.313e-03, -4.567e-03, 1.480e-02, 3.615e-02, -4.300e-02, 2.026e-02, 7.725e-02, -9.011e-02, -2.195e-02, 6.867e-02, 4.186e-03)); + r += mul(s1_4, M4(-7.472e-02, -4.031e-02, -1.416e-01, -1.453e-01, 3.689e-02, 5.077e-02, 2.386e-01, 8.009e-02, -1.776e-02, -1.754e-02, 1.543e-02, 1.298e-01, -1.286e-02, 1.735e-02, -4.372e-02, 5.004e-02)); + r += mul(s1_5, M4(7.579e-02, 4.976e-04, -1.751e-01, 3.413e-02, -4.651e-02, 1.532e-01, 2.524e-03, 3.198e-02, 1.865e-02, 7.633e-02, -8.661e-03, -8.857e-02, 7.275e-02, 6.012e-03, -4.343e-02, 2.194e-01)); + r += mul(s1_6, M4(3.561e-02, -1.975e-01, -1.553e-01, 7.688e-02, 7.823e-02, 3.841e-02, -7.595e-02, -9.052e-03, 2.869e-02, -2.724e-02, -5.237e-02, 3.539e-02, 7.125e-02, 6.169e-02, -5.338e-02, -1.247e-01)); + r += mul(s1_7, M4(8.636e-02, -1.742e-01, -1.153e-01, -5.277e-02, 6.017e-02, 8.485e-02, 2.407e-02, 6.711e-02, 2.165e-02, -6.010e-02, 1.138e-01, 4.745e-02, 3.359e-02, -9.350e-02, -4.098e-03, 1.539e-02)); + r += mul(s1_8, M4(1.893e-02, -1.004e-01, -1.206e-01, 6.257e-02, -9.174e-02, 4.435e-02, 8.294e-02, -1.323e-02, 3.174e-03, 1.658e-02, 9.627e-02, 4.803e-02, 5.214e-02, 4.139e-02, 1.213e-01, -6.390e-02)); + r += mul(s2_0, M4(-1.003e-01, -2.060e-02, 7.643e-02, 8.917e-03, -1.845e-03, 1.274e-01, -6.574e-03, 3.525e-02, 4.085e-02, -6.564e-02, 6.702e-02, -2.237e-02, -5.356e-02, -5.959e-02, -1.211e-01, -6.724e-03)); + r += mul(s2_1, M4(8.614e-02, -1.609e-01, 7.359e-02, 6.662e-02, -5.600e-03, 3.820e-02, -1.706e-01, -9.107e-02, 4.658e-03, -1.218e-01, -6.839e-02, -1.595e-01, -1.676e-01, -3.686e-02, 1.929e-01, 1.209e-02)); + r += mul(s2_2, M4(1.763e-01, 6.953e-02, 4.514e-02, -5.794e-02, 1.325e-02, -6.248e-02, 1.411e-01, 1.559e-02, 4.946e-02, -2.265e-01, 1.125e-01, 5.394e-02, -1.750e-02, 4.500e-02, -1.248e-01, -1.765e-02)); + r += mul(s2_3, M4(-1.029e-01, -1.013e-01, -1.063e-01, -9.505e-02, 1.116e-01, -1.684e-02, -6.668e-02, -1.788e-02, -4.550e-02, -6.653e-02, -2.074e-02, -7.063e-02, -9.743e-02, 4.777e-02, -3.770e-02, -4.267e-02)); + r += mul(s2_4, M4(2.858e-01, 7.134e-02, -8.271e-02, 1.058e-01, -1.034e-01, 1.048e-02, -1.470e-01, -8.929e-02, -2.202e-01, -4.561e-01, -7.260e-02, -1.335e-01, 8.094e-03, -1.764e-01, 6.452e-02, -8.200e-02)); + r += mul(s2_5, M4(-7.771e-02, 6.318e-02, 2.476e-02, -1.248e-01, 8.019e-02, -1.921e-01, -8.554e-02, 1.549e-01, 2.037e-01, -2.268e-01, -8.223e-02, -1.499e-02, 8.035e-02, -5.437e-02, 9.512e-02, 3.877e-02)); + r += mul(s2_6, M4(5.986e-02, 1.200e-02, -6.885e-02, -3.239e-03, 7.123e-02, -4.501e-02, -8.298e-02, 6.258e-03, 3.682e-02, 2.750e-02, 5.589e-02, -4.066e-02, -7.148e-02, 1.147e-02, 1.011e-01, 4.193e-02)); + r += mul(s2_7, M4(-5.912e-02, -4.001e-02, -1.402e-01, -1.004e-01, 6.142e-02, 1.464e-02, -3.567e-01, -2.251e-01, 3.545e-02, -5.070e-02, -1.870e-01, -1.834e-01, -3.509e-02, -5.190e-02, 1.373e-01, 1.254e-01)); + r += mul(s2_8, M4(-4.374e-02, -1.312e-02, -3.855e-02, -4.781e-02, 4.914e-02, -1.227e-01, -1.656e-01, -1.675e-02, -5.363e-02, -4.299e-03, -1.644e-01, -1.710e-02, -6.290e-02, 2.749e-03, 4.811e-04, 7.494e-02)); + r += mul(s3_0, M4(7.418e-02, 2.413e-02, 2.179e-01, 1.002e-01, -6.673e-03, 1.446e-01, -4.374e-02, 1.830e-02, 7.794e-02, 4.814e-02, 5.367e-02, -2.835e-02, 4.846e-02, 1.012e-01, -5.740e-02, -2.012e-02)); + r += mul(s3_1, M4(-1.076e-01, 2.386e-01, 4.229e-02, 1.792e-01, 2.957e-02, -7.132e-03, 3.811e-02, 3.139e-02, 2.056e-01, -6.285e-02, 7.038e-02, 8.697e-02, 7.951e-03, -1.323e-01, 4.550e-02, -9.253e-02)); + r += mul(s3_2, M4(-1.992e-02, 4.952e-02, 2.321e-02, -6.068e-02, 5.577e-03, -3.496e-02, 1.100e-02, 4.701e-02, -6.097e-02, 3.211e-02, 7.370e-02, 5.026e-02, -1.176e-03, 9.178e-03, 2.910e-02, 6.707e-02)); + r += mul(s3_3, M4(-1.284e-01, 1.173e-02, 2.513e-01, 4.469e-02, 1.003e-02, -1.182e-01, -4.960e-02, 2.868e-02, 1.974e-03, -4.529e-02, 1.680e-02, 1.219e-02, 6.489e-02, 3.270e-02, 2.856e-03, -2.437e-02)); + r += mul(s3_4, M4(-2.342e-01, 4.384e-01, 2.001e-01, -4.318e-02, -2.596e-03, -5.227e-02, 2.451e-01, -6.867e-02, -1.310e-01, 6.207e-02, -3.253e-02, -9.466e-02, 1.424e-02, -5.704e-02, 1.606e-01, -3.421e-01)); + r += mul(s3_5, M4(-1.507e-01, 2.010e-01, 8.911e-02, -8.064e-02, 1.800e-01, 1.194e-01, -1.134e-02, 1.642e-01, -2.123e-01, -1.612e-02, -3.988e-02, -7.009e-02, 9.302e-02, -3.479e-02, -2.334e-02, -8.769e-03)); + r += mul(s3_6, M4(-1.859e-02, -1.743e-02, 2.540e-01, 3.481e-02, -5.009e-02, -4.857e-02, -7.388e-02, 6.612e-02, -1.630e-02, 1.151e-01, -8.329e-02, 2.283e-02, 1.228e-01, 8.869e-02, 8.183e-02, 2.168e-02)); + r += mul(s3_7, M4(-1.030e-01, -3.114e-03, 1.257e-01, 5.785e-02, 7.695e-02, 7.875e-03, -1.929e-01, -6.950e-05, -4.482e-02, -4.113e-03, -1.171e-01, 8.258e-02, -8.507e-03, -1.046e-01, 6.857e-02, -3.029e-02)); + r += mul(s3_8, M4(2.446e-02, 5.971e-02, 5.623e-02, 1.255e-01, -8.937e-03, 3.395e-02, 1.949e-02, -3.246e-02, -2.415e-02, -1.666e-02, -6.608e-02, -4.302e-02, 1.015e-01, -9.010e-02, -5.383e-02, -8.861e-02)); + r += mul(s4_0, M4(2.870e-02, 5.795e-02, -4.586e-02, -8.181e-03, 2.942e-02, 5.082e-02, -5.689e-03, 9.240e-03, 4.014e-02, 3.368e-02, -3.481e-02, -1.248e-01, -5.246e-02, 9.443e-02, -2.095e-02, 1.967e-02)); + r += mul(s4_1, M4(-2.031e-02, -2.830e-02, 1.334e-02, -8.101e-03, -1.085e-01, 2.567e-02, 7.094e-02, 5.787e-02, 4.140e-02, 1.147e-02, 9.587e-02, -1.425e-01, 2.933e-02, 3.509e-02, 1.724e-01, -2.731e-02)); + r += mul(s4_2, M4(-1.860e-03, -4.974e-02, 3.231e-02, 7.355e-03, -2.778e-02, 1.611e-01, 2.444e-02, 8.233e-02, 7.862e-02, 2.351e-02, -1.022e-02, -7.801e-02, -3.894e-02, 1.514e-02, 8.644e-02, 4.537e-02)); + r += mul(s4_3, M4(3.963e-02, -1.404e-03, 4.396e-02, 8.070e-02, -6.941e-02, 6.948e-02, 4.566e-02, -2.384e-02, 1.194e-01, 2.032e-01, 8.894e-02, -1.514e-01, 4.098e-02, -3.902e-02, 1.593e-02, 1.235e-01)); + r += mul(s4_4, M4(5.303e-02, 6.492e-02, -3.854e-02, 4.180e-02, -2.960e-03, -5.687e-02, -1.223e-01, 2.594e-02, -1.240e-01, -8.107e-02, 1.643e-01, -7.760e-02, 1.478e-02, -1.137e-02, 5.575e-02, 1.585e-01)); + r += mul(s4_5, M4(4.148e-02, 3.501e-02, -1.017e-02, -5.189e-03, -5.128e-02, -1.322e-01, 6.951e-02, 9.929e-03, -3.974e-02, -5.354e-02, -7.879e-02, -1.421e-01, -1.860e-01, -6.520e-02, 2.233e-01, -3.510e-02)); + r += mul(s4_6, M4(1.584e-02, -1.890e-01, 6.972e-02, 7.349e-02, 3.867e-02, 1.448e-02, -9.357e-02, 4.793e-02, 2.141e-01, 3.160e-01, 6.640e-02, -1.808e-01, -3.964e-02, -3.513e-02, -1.367e-01, 1.663e-02)); + r += mul(s4_7, M4(4.660e-02, -1.508e-01, -8.482e-02, 7.494e-02, 3.497e-02, -1.054e-01, -5.170e-02, 7.540e-02, 5.040e-02, 8.157e-02, -1.403e-01, -2.148e-03, 6.321e-03, -3.587e-02, -1.364e-01, 1.370e-01)); + r += mul(s4_8, M4(-6.566e-02, -2.770e-02, -1.794e-02, 2.547e-02, 7.739e-02, -1.032e-01, -1.455e-01, -7.336e-02, 6.629e-02, 2.620e-02, -8.784e-02, -3.572e-02, 2.198e-02, -1.402e-01, 1.540e-02, -3.120e-02)); + r += mul(s5_0, M4(7.106e-02, 2.706e-02, 2.206e-02, 2.482e-02, 5.481e-02, 1.434e-01, 9.398e-02, -9.003e-03, -5.256e-03, -1.218e-01, 2.484e-03, -4.917e-02, -4.145e-02, 4.515e-02, -3.105e-02, 2.698e-02)); + r += mul(s5_1, M4(1.633e-01, 2.688e-01, 4.356e-02, -4.851e-02, -1.091e-01, 2.399e-01, -8.502e-02, 4.810e-03, -1.683e-02, -4.995e-02, -1.154e-02, -2.006e-03, 1.245e-01, -3.599e-03, 1.517e-01, -6.009e-02)); + r += mul(s5_2, M4(5.579e-02, -6.718e-02, 5.835e-02, -5.433e-02, -6.252e-02, 1.592e-01, -2.415e-01, -3.498e-03, -4.435e-02, -8.581e-02, -1.006e-01, 4.444e-02, -6.519e-02, 1.304e-01, -1.404e-01, 3.976e-02)); + r += mul(s5_3, M4(1.713e-01, 2.055e-01, 2.371e-02, 9.614e-02, -1.554e-01, -5.299e-02, 1.901e-01, -1.948e-01, 3.469e-03, -8.674e-02, -1.929e-01, 6.825e-02, 3.659e-02, -1.318e-01, 4.520e-02, -4.470e-02)); + r += mul(s5_4, M4(-3.117e-02, 3.879e-01, -4.774e-02, -9.195e-02, -1.407e-01, -1.282e-01, -4.174e-02, 2.968e-02, 3.210e-02, 9.048e-03, 2.540e-02, 1.136e-01, 1.598e-01, -1.205e-01, 5.497e-02, -1.612e-01)); + r += mul(s5_5, M4(-1.579e-02, 2.492e-01, -1.458e-01, -7.247e-02, -2.458e-01, -1.097e-01, 1.808e-01, -2.019e-03, 5.261e-02, -1.370e-01, -1.082e-01, 7.321e-02, 7.172e-02, 4.009e-02, 4.015e-02, -2.922e-01)); + r += mul(s5_6, M4(7.843e-02, 2.011e-01, -2.855e-02, -1.125e-01, 1.346e-01, 1.834e-01, 2.303e-02, 2.286e-02, 5.182e-02, -6.831e-03, 2.276e-01, -1.567e-02, 3.638e-02, -3.162e-02, -1.084e-01, -3.247e-02)); + r += mul(s5_7, M4(3.780e-03, 1.666e-01, 1.484e-01, -1.534e-01, -5.629e-02, 7.632e-02, 1.376e-01, -3.399e-02, 2.648e-02, -3.358e-02, 1.152e-01, 7.617e-02, 4.750e-03, -9.384e-02, 4.874e-03, 9.212e-02)); + r += mul(s5_8, M4(-2.864e-01, 2.671e-01, -8.083e-02, -9.173e-02, -3.624e-02, -1.240e-01, -4.755e-02, -1.147e-01, 8.944e-03, 4.023e-02, -6.581e-02, 3.924e-02, 1.563e-01, -2.466e-01, 2.736e-01, -2.829e-02)); + r += mul(s6_0, M4(3.454e-02, 4.907e-03, 1.759e-01, -2.260e-02, -6.025e-02, -9.115e-02, 1.239e-01, 9.932e-02, 4.134e-02, 3.477e-02, 7.780e-03, 1.504e-02, -6.135e-02, 1.305e-01, 2.783e-01, -8.530e-02)); + r += mul(s6_1, M4(-7.489e-02, 1.256e-01, 6.719e-02, -1.312e-02, 5.527e-02, 1.743e-01, -2.603e-02, -1.856e-02, -4.894e-02, -5.220e-03, -9.751e-02, -6.255e-02, 1.721e-02, 1.428e-01, 8.609e-02, -1.931e-01)); + r += mul(s6_2, M4(-3.560e-03, 3.581e-03, 4.381e-03, -5.233e-02, -7.191e-02, 2.366e-01, 7.837e-02, -7.278e-02, -1.503e-02, 1.267e-02, 7.984e-02, 8.166e-02, 3.384e-03, -3.630e-02, 2.452e-01, -4.846e-02)); + r += mul(s6_3, M4(1.409e-01, -5.460e-02, -7.154e-02, 9.662e-03, 2.511e-02, -5.559e-02, -2.400e-01, 4.025e-02, -7.148e-02, -2.844e-02, 4.508e-02, -1.666e-02, 1.084e-02, 1.696e-01, -1.175e-01, -1.194e-01)); + r += mul(s6_4, M4(-1.884e-01, 3.910e-02, -1.289e-01, -1.088e-01, 3.357e-02, 6.130e-02, 8.805e-03, -1.275e-01, -1.181e-01, -3.456e-02, 5.731e-02, -2.597e-01, -1.981e-01, 3.257e-02, -9.962e-02, -1.001e-01)); + r += mul(s6_5, M4(9.773e-02, 1.062e-02, 1.079e-01, 7.377e-02, -1.285e-02, 1.068e-01, 3.774e-02, -2.815e-02, -8.242e-02, 7.684e-02, -9.929e-02, -6.032e-02, -1.261e-02, -1.095e-01, 2.211e-01, -1.299e-02)); + r += mul(s6_6, M4(1.216e-02, 5.718e-02, -3.228e-02, 9.389e-03, -5.748e-02, 1.410e-01, -7.183e-02, -6.529e-02, -2.041e-02, 4.350e-02, 8.421e-02, -1.284e-02, -4.060e-02, 4.178e-02, 5.984e-02, -5.652e-02)); + r += mul(s6_7, M4(1.156e-01, 5.795e-02, 9.929e-03, -2.015e-02, -1.636e-01, -6.938e-02, -1.737e-01, 4.317e-02, 1.722e-02, -1.930e-02, 1.952e-01, -2.675e-02, -2.033e-01, 2.390e-02, -1.255e-01, 2.645e-03)); + r += mul(s6_8, M4(1.267e-02, -1.096e-02, 7.513e-02, -1.194e-02, -1.441e-03, 1.111e-02, -1.026e-01, -4.972e-02, -4.979e-02, -6.357e-02, -1.229e-02, -1.549e-02, 4.419e-02, 2.950e-01, 5.272e-02, -2.957e-02)); + r += mul(s7_0, M4(-4.764e-02, -6.165e-02, 1.538e-01, 9.830e-03, 4.903e-02, -1.458e-01, 1.383e-01, 5.074e-02, -1.344e-02, -6.074e-03, -5.067e-02, -4.629e-02, -4.420e-03, -1.331e-02, -3.949e-02, -6.188e-04)); + r += mul(s7_1, M4(-1.795e-02, 6.593e-02, -3.127e-01, -1.740e-01, 3.898e-02, -3.126e-02, -1.538e-01, 2.400e-02, -2.798e-02, -3.590e-02, 7.410e-02, 2.130e-02, -4.461e-03, 5.434e-02, 2.883e-02, 2.667e-02)); + r += mul(s7_2, M4(1.237e-02, 5.499e-02, -1.273e-01, -1.156e-01, 1.424e-01, -1.258e-03, 1.371e-02, 2.027e-02, 1.008e-01, 6.674e-02, 4.499e-02, 2.501e-02, -8.317e-02, 7.529e-02, -6.850e-02, -3.784e-02)); + r += mul(s7_3, M4(-3.737e-02, -5.870e-03, 3.237e-02, -1.124e-01, -9.034e-02, -1.274e-01, 3.342e-02, 7.105e-02, -1.314e-01, 2.508e-02, -3.401e-02, -9.204e-02, 5.544e-02, 1.018e-01, -6.085e-02, -6.019e-03)); + r += mul(s7_4, M4(-2.714e-01, -2.963e-01, -1.685e-01, -2.489e-01, 4.202e-02, 3.484e-02, -3.542e-02, -7.937e-02, -8.434e-02, 2.660e-02, -9.034e-02, 6.340e-03, -4.910e-02, -1.012e-01, -3.295e-02, 3.424e-02)); + r += mul(s7_5, M4(-9.240e-02, 1.731e-01, -1.717e-01, -1.064e-01, 1.709e-01, -5.175e-02, 1.502e-01, 1.928e-02, -4.976e-03, 7.204e-02, 6.187e-02, -3.323e-02, 6.619e-02, -3.219e-02, -4.676e-03, -3.696e-03)); + r += mul(s7_6, M4(-5.651e-03, -8.852e-02, 6.467e-02, -7.485e-02, 2.775e-02, 5.363e-02, 3.246e-02, -7.201e-02, -1.304e-02, 1.058e-01, -1.672e-01, -6.017e-02, -8.539e-02, -1.457e-01, -1.979e-02, 5.357e-02)); + r += mul(s7_7, M4(-2.462e-02, -2.363e-01, -1.963e-02, -1.183e-01, 1.425e-02, 8.463e-02, -7.534e-02, 5.798e-02, -9.598e-02, 2.081e-02, 2.889e-02, -1.178e-02, 7.588e-02, -9.646e-02, 3.119e-02, -1.140e-02)); + r += mul(s7_8, M4(2.760e-02, -7.598e-02, 1.765e-01, -5.347e-02, 3.620e-02, -7.794e-02, 4.037e-02, -1.442e-02, 1.240e-02, 6.169e-02, -1.550e-02, -8.514e-03, -1.049e-02, 1.607e-01, 1.066e-01, 3.096e-02)); + r += V4(7.479e-03, -6.194e-03, 1.320e-03, -7.964e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.430e-02, -3.763e-02, 2.044e-02, 2.159e-02, 8.186e-02, 2.197e-02, -7.806e-02, -3.065e-02, -7.532e-03, 1.757e-01, -2.800e-01, -1.676e-02, 1.776e-02, 7.224e-02, -1.259e-01, 2.911e-02)); + r += mul(s0_1, M4(3.012e-02, 3.678e-05, -1.356e-02, -2.604e-02, -1.241e-01, -1.935e-01, 5.610e-03, 1.341e-01, -8.708e-02, 1.234e-01, 4.167e-02, 1.259e-01, -6.455e-02, -6.625e-03, 5.397e-02, 4.600e-02)); + r += mul(s0_2, M4(-2.491e-02, 5.725e-02, -1.763e-02, 3.437e-02, -5.220e-02, 8.810e-02, -1.411e-01, 7.865e-02, 7.891e-02, -4.207e-03, 4.996e-02, -9.937e-02, 2.713e-02, -4.735e-02, -1.598e-01, 7.724e-02)); + r += mul(s0_3, M4(-4.406e-02, 6.415e-02, -4.933e-02, 5.740e-02, 3.741e-02, 9.063e-02, -1.393e-02, -7.392e-02, 6.447e-02, 5.608e-02, -2.735e-02, -1.100e-01, -5.250e-03, 9.540e-02, -3.442e-02, 5.128e-02)); + r += mul(s0_4, M4(1.509e-01, 1.112e-01, 1.287e-02, 3.433e-02, -5.654e-02, 4.547e-02, 6.433e-03, -9.677e-02, -1.243e-01, 2.642e-01, -1.367e-01, -1.725e-01, -6.274e-02, -2.195e-03, 1.876e-01, 7.703e-02)); + r += mul(s0_5, M4(-5.247e-02, -3.219e-03, -3.712e-02, -7.956e-02, -6.361e-02, 4.913e-02, -4.173e-02, -7.252e-02, -8.908e-02, -7.582e-02, -5.119e-02, -1.812e-01, 1.378e-01, 1.358e-01, 3.654e-02, 1.169e-01)); + r += mul(s0_6, M4(-2.404e-02, 2.365e-02, 7.012e-02, -7.389e-03, -2.636e-02, 5.250e-02, -1.963e-02, 7.453e-02, -3.787e-02, -1.015e-01, 1.352e-02, -1.706e-01, 1.528e-02, -8.125e-02, 5.031e-02, -5.389e-02)); + r += mul(s0_7, M4(-5.166e-02, -7.660e-03, 2.108e-02, 6.046e-03, -5.190e-02, -4.010e-03, -6.305e-02, 4.929e-03, -6.187e-03, 2.259e-01, 7.503e-02, 7.217e-02, -9.784e-02, -8.002e-02, 4.003e-02, 7.586e-02)); + r += mul(s0_8, M4(-3.035e-03, -1.403e-02, 1.668e-02, -3.657e-02, 9.571e-04, -1.242e-01, -4.894e-02, -1.703e-02, -8.390e-02, -4.609e-02, -4.676e-02, -5.820e-02, 7.860e-02, 8.434e-03, 4.485e-02, 9.159e-02)); + r += mul(s1_0, M4(-3.446e-02, -1.680e-01, 3.259e-02, 7.439e-03, -1.256e-02, 7.677e-02, -1.888e-01, -1.630e-01, 7.922e-02, 9.013e-03, -5.748e-02, 1.094e-01, -2.331e-02, 5.989e-02, -4.874e-02, -2.901e-02)); + r += mul(s1_1, M4(1.509e-01, 3.699e-02, 1.630e-02, -1.730e-01, -2.339e-02, 3.929e-02, -2.129e-01, -3.177e-01, -6.895e-03, 1.291e-02, -9.745e-02, 1.131e-01, 4.728e-04, 2.713e-03, -7.145e-02, -8.768e-04)); + r += mul(s1_2, M4(-9.738e-02, 1.201e-01, 9.683e-02, -8.188e-03, -8.669e-03, -2.850e-02, 4.446e-03, 1.838e-02, 6.435e-02, -1.051e-01, 5.622e-02, 1.079e-01, -8.372e-02, 5.742e-02, -9.410e-02, -1.178e-01)); + r += mul(s1_3, M4(-1.274e-01, -3.866e-02, -5.815e-02, 7.166e-02, -3.350e-02, -1.201e-02, -1.950e-02, 1.569e-01, -4.578e-02, -4.500e-02, 1.124e-02, -1.199e-02, 3.623e-02, 1.357e-01, 3.165e-02, 7.996e-02)); + r += mul(s1_4, M4(1.977e-01, -8.830e-02, 1.915e-02, 9.525e-02, 1.441e-02, 9.018e-02, 5.043e-02, 3.517e-02, -4.349e-02, -7.091e-02, 1.408e-01, -3.890e-02, -5.690e-02, -3.911e-04, -6.398e-02, -9.388e-02)); + r += mul(s1_5, M4(8.464e-02, -4.588e-02, 7.772e-02, -7.091e-02, -7.751e-02, -7.299e-02, -1.756e-02, -8.245e-02, 8.981e-02, -1.468e-01, 4.185e-02, 1.570e-03, -5.250e-02, 1.306e-01, -2.109e-03, -1.039e-02)); + r += mul(s1_6, M4(-9.170e-02, -2.145e-01, 7.325e-02, -3.369e-02, 5.012e-02, -2.545e-02, 2.888e-03, -3.379e-02, 3.622e-02, -5.937e-03, 5.761e-02, -1.032e-03, -2.598e-02, -1.295e-02, -2.028e-02, -2.185e-02)); + r += mul(s1_7, M4(-7.833e-02, -1.620e-01, -1.643e-01, 2.656e-02, -5.703e-02, 1.094e-03, 6.141e-04, -5.740e-02, 1.012e-02, 2.077e-01, 8.564e-02, 1.496e-02, -1.041e-01, 4.901e-02, -9.006e-02, 5.301e-02)); + r += mul(s1_8, M4(2.529e-02, -6.643e-02, 4.730e-02, 4.944e-02, 4.444e-02, -1.255e-01, 2.971e-02, 1.443e-02, -5.764e-02, -1.901e-02, -5.798e-02, 1.340e-02, -3.938e-02, 5.376e-02, 1.825e-02, 7.330e-02)); + r += mul(s2_0, M4(1.808e-02, 9.112e-02, 9.888e-02, -5.158e-02, 8.239e-02, 5.468e-02, 1.934e-02, -1.203e-01, -8.099e-02, 1.578e-01, 2.154e-01, -5.725e-02, -4.725e-02, -1.062e-01, -1.264e-01, 4.121e-02)); + r += mul(s2_1, M4(-5.263e-02, 6.941e-02, 6.418e-02, 2.440e-01, 2.653e-02, 4.805e-02, 1.915e-02, -1.560e-01, -1.711e-01, 1.073e-01, -1.012e-01, -4.586e-02, -6.455e-03, -4.339e-03, 1.678e-01, -4.486e-03)); + r += mul(s2_2, M4(-4.876e-02, -1.993e-02, -6.981e-02, 3.753e-02, -3.258e-02, 3.133e-02, 3.133e-02, -5.825e-02, 4.361e-02, 8.687e-02, -1.392e-02, -1.155e-01, 7.298e-02, 8.208e-03, -1.453e-01, 1.300e-02)); + r += mul(s2_3, M4(-4.487e-02, 1.990e-02, -1.695e-01, 1.582e-02, -2.516e-02, -6.183e-02, -1.899e-01, 9.599e-02, 1.729e-02, 4.271e-02, 1.829e-01, -4.410e-03, -3.464e-02, -1.282e-01, -3.271e-03, -3.695e-02)); + r += mul(s2_4, M4(-2.379e-01, -1.111e-01, -7.252e-02, 2.307e-02, 1.419e-01, 1.810e-01, -1.036e-01, 2.302e-02, 5.548e-03, -2.212e-01, 8.717e-02, 1.478e-01, 1.195e-01, 1.247e-01, 3.053e-02, 1.035e-01)); + r += mul(s2_5, M4(-5.517e-02, -4.884e-03, 4.262e-02, -2.404e-02, -1.153e-01, 5.702e-02, -3.170e-02, 1.302e-02, -5.458e-02, 1.273e-01, 3.232e-02, 7.172e-03, -2.841e-02, 4.835e-02, -4.832e-02, 2.811e-02)); + r += mul(s2_6, M4(5.292e-03, -9.579e-02, -1.466e-02, -4.698e-02, 3.646e-02, -1.541e-01, -4.505e-02, 7.720e-02, -1.202e-02, -6.506e-02, -3.220e-02, -2.880e-02, 1.926e-02, 6.278e-02, 5.749e-03, -1.510e-02)); + r += mul(s2_7, M4(4.021e-02, -9.431e-02, 4.499e-02, 1.060e-01, 3.923e-02, 1.659e-01, -4.001e-02, 1.043e-01, -7.788e-02, -1.557e-01, 2.371e-02, -2.362e-02, 4.443e-02, -9.105e-03, 4.307e-02, -1.228e-02)); + r += mul(s2_8, M4(-1.876e-02, -9.252e-02, -6.358e-03, -1.258e-03, 1.082e-01, -7.880e-02, 5.527e-02, -1.771e-02, 6.621e-02, -1.951e-01, -3.723e-02, 7.825e-02, 3.900e-02, 5.570e-02, 2.862e-02, 1.219e-02)); + r += mul(s3_0, M4(-5.432e-03, 8.206e-02, 1.544e-01, 4.700e-02, 9.866e-02, 5.702e-02, -6.146e-02, -6.524e-02, -6.292e-02, 4.738e-02, -5.601e-02, 5.550e-02, 1.044e-02, 5.213e-02, -1.029e-01, 1.060e-01)); + r += mul(s3_1, M4(-6.110e-02, -8.903e-02, 1.822e-01, 8.739e-02, -1.033e-01, -6.282e-02, 1.867e-03, 1.235e-02, -4.629e-02, -3.759e-02, -8.009e-02, 5.644e-02, 1.260e-01, -7.835e-02, 1.230e-01, 1.167e-01)); + r += mul(s3_2, M4(-7.750e-02, -4.775e-03, -2.441e-02, 1.267e-03, -3.243e-03, -2.793e-02, 1.633e-03, -9.708e-02, 6.038e-02, 6.274e-02, -1.096e-01, -3.582e-03, 6.546e-02, -7.050e-03, -4.282e-02, 6.356e-02)); + r += mul(s3_3, M4(-5.984e-02, 2.632e-01, 2.347e-02, -1.079e-01, -1.953e-02, 1.136e-02, 1.678e-01, -1.807e-02, -9.635e-02, 1.001e-01, 3.262e-02, -1.118e-02, 1.446e-02, 4.709e-02, 2.822e-01, -2.795e-03)); + r += mul(s3_4, M4(-6.552e-03, 1.214e-01, 2.273e-02, -6.601e-03, 9.477e-02, 7.205e-02, 7.632e-02, 3.441e-01, 8.145e-03, -3.942e-02, 2.930e-02, 1.833e-02, 1.496e-01, 3.105e-02, 2.039e-01, 3.777e-01)); + r += mul(s3_5, M4(-3.077e-02, -8.840e-03, 3.186e-02, -2.523e-02, -1.409e-01, 1.073e-02, -7.960e-02, -2.800e-02, 6.708e-02, 3.546e-02, -1.679e-02, -2.793e-02, -4.138e-02, 9.311e-02, -3.216e-03, 3.354e-02)); + r += mul(s3_6, M4(-3.083e-02, 2.603e-01, 4.596e-02, -1.330e-03, -8.299e-02, -2.662e-02, -3.704e-02, -7.918e-02, -1.663e-02, 3.238e-02, 4.395e-03, -2.074e-02, 2.951e-02, 2.525e-02, -8.565e-04, -7.202e-02)); + r += mul(s3_7, M4(6.224e-02, -1.555e-01, -1.481e-01, -2.135e-02, -1.010e-01, -1.670e-02, 6.495e-02, -6.264e-02, 2.055e-02, -1.442e-01, 4.283e-02, -1.181e-02, 4.616e-02, -1.277e-01, -1.033e-01, -1.062e-01)); + r += mul(s3_8, M4(-5.627e-02, -4.951e-02, -7.015e-02, -4.087e-02, -1.958e-02, 3.923e-02, -6.462e-02, -2.031e-02, -4.688e-02, -6.262e-02, 1.886e-02, -2.789e-02, 1.281e-02, 6.714e-02, -1.233e-02, 4.252e-02)); + r += mul(s4_0, M4(-5.900e-02, -1.120e-02, 2.173e-02, 7.670e-02, -2.079e-02, -3.550e-02, -3.565e-02, -8.277e-02, -8.467e-02, -6.557e-02, 1.665e-02, 4.077e-03, 6.528e-03, 3.253e-02, -1.869e-02, -4.227e-02)); + r += mul(s4_1, M4(-3.366e-02, -8.582e-02, -1.080e-01, 1.154e-01, -5.242e-02, -1.353e-01, -2.081e-02, 1.130e-01, -5.835e-02, -4.013e-02, 2.943e-02, 2.138e-02, -1.349e-02, 8.821e-03, -4.397e-02, -2.010e-02)); + r += mul(s4_2, M4(6.710e-02, 1.435e-02, -6.456e-03, 8.669e-02, -3.837e-02, 8.042e-02, -1.042e-01, 2.781e-02, -5.994e-02, -7.096e-02, 1.048e-02, 1.026e-01, -5.373e-03, 2.659e-02, -9.218e-02, -1.542e-02)); + r += mul(s4_3, M4(-1.931e-02, 1.020e-01, 3.003e-01, 6.374e-02, -9.365e-03, 9.304e-03, 5.861e-02, -8.938e-03, -4.927e-02, -3.379e-02, -1.179e-01, 1.485e-01, 1.077e-02, -8.328e-02, 8.623e-02, 5.525e-02)); + r += mul(s4_4, M4(-7.206e-02, 1.434e-01, 1.026e-01, 1.033e-01, -8.858e-02, -5.545e-02, 6.758e-02, 2.602e-03, -1.438e-01, -3.117e-02, 8.943e-02, 1.704e-01, -9.621e-02, 8.983e-02, 2.294e-01, -7.152e-02)); + r += mul(s4_5, M4(-4.845e-02, -7.738e-02, 5.348e-02, -6.079e-04, -1.698e-02, 8.341e-02, -3.714e-03, -6.279e-02, 4.644e-02, -2.537e-02, 1.567e-01, -1.895e-01, 2.102e-01, -2.693e-02, 2.032e-02, -1.407e-02)); + r += mul(s4_6, M4(-4.629e-03, 1.149e-02, -1.967e-02, -1.199e-02, -2.276e-02, -6.675e-02, -9.782e-02, -6.834e-02, 1.175e-02, -1.861e-01, 5.953e-02, -9.044e-02, 4.873e-02, 7.172e-02, -3.632e-02, 3.668e-02)); + r += mul(s4_7, M4(-1.014e-01, -2.352e-02, -4.855e-02, -3.958e-02, -9.258e-02, -8.375e-04, -2.510e-02, -6.243e-02, -4.628e-02, -5.860e-02, -1.068e-01, -9.276e-02, -8.298e-02, -8.761e-02, 5.141e-03, -5.266e-02)); + r += mul(s4_8, M4(-9.811e-03, 5.491e-02, -1.249e-02, -2.922e-02, -1.158e-04, -2.626e-02, -5.066e-02, -9.524e-02, -1.889e-02, 1.679e-02, -5.950e-02, 2.263e-03, 5.595e-02, 1.231e-01, -4.877e-02, -2.439e-02)); + r += mul(s5_0, M4(7.424e-02, -2.488e-02, -9.549e-02, -4.383e-02, -8.372e-02, -8.668e-02, 1.996e-01, -2.088e-02, -2.865e-02, -4.754e-03, 1.455e-02, 1.933e-02, 4.837e-02, -8.041e-03, -9.727e-03, 1.869e-02)); + r += mul(s5_1, M4(9.111e-02, -1.293e-01, -4.139e-02, 1.554e-02, 2.723e-03, -1.077e-01, 2.327e-01, -1.201e-01, 8.658e-02, 1.344e-02, -3.333e-03, -4.063e-02, -1.110e-01, -1.188e-01, -4.989e-02, 2.422e-01)); + r += mul(s5_2, M4(-6.935e-05, 6.199e-02, -9.141e-03, 5.849e-02, -5.873e-02, 4.224e-02, 4.203e-02, 5.583e-03, -2.714e-02, 2.112e-02, 2.515e-02, 3.705e-02, -7.747e-02, -5.863e-02, -6.713e-02, -6.701e-02)); + r += mul(s5_3, M4(5.835e-03, -1.858e-01, 1.874e-01, 2.459e-02, -3.903e-02, 1.430e-01, -1.457e-01, 5.243e-03, 5.728e-02, -7.350e-02, -1.569e-02, -6.887e-02, -1.258e-03, 8.374e-02, 5.132e-02, 1.807e-03)); + r += mul(s5_4, M4(3.215e-02, -3.627e-01, 3.379e-01, 1.343e-01, -1.749e-01, -2.623e-01, 5.681e-03, 1.008e-01, 7.570e-02, -6.389e-02, 7.694e-02, 5.292e-03, -1.308e-02, -5.631e-02, 7.423e-02, -7.345e-02)); + r += mul(s5_5, M4(1.426e-01, -5.201e-02, 1.086e-01, 3.665e-03, 1.046e-01, -2.734e-02, 3.172e-03, -8.829e-02, 1.569e-01, -1.352e-02, 6.193e-02, -2.948e-02, 1.041e-01, -1.714e-01, -7.391e-02, 1.037e-01)); + r += mul(s5_6, M4(5.654e-02, -1.197e-01, -2.979e-02, -1.301e-01, -1.731e-02, -1.315e-01, -3.223e-02, 2.595e-02, 1.343e-01, -4.696e-02, -1.933e-01, 9.608e-02, -7.795e-03, 5.023e-02, -9.176e-02, 6.673e-02)); + r += mul(s5_7, M4(-6.545e-02, -1.544e-01, 3.045e-01, -2.001e-01, -9.368e-02, -1.918e-02, 8.318e-02, 2.702e-02, -5.971e-02, -3.530e-02, -1.033e-01, -8.986e-02, -6.790e-02, -3.885e-02, -2.127e-01, 2.390e-02)); + r += mul(s5_8, M4(2.971e-02, 1.405e-01, 1.113e-01, -2.698e-02, -8.093e-02, -1.345e-01, -5.902e-02, -1.384e-01, 6.276e-02, 1.945e-01, -5.075e-02, -8.079e-02, 7.333e-02, 2.016e-01, -6.289e-02, -2.979e-02)); + r += mul(s6_0, M4(1.445e-03, -4.488e-02, -2.415e-02, 1.054e-01, 6.162e-03, -1.355e-01, 9.439e-02, 9.080e-03, -4.235e-02, -2.195e-02, -1.061e-01, 4.653e-02, -7.711e-02, 4.756e-02, 9.480e-02, 1.635e-01)); + r += mul(s6_1, M4(1.183e-01, -3.675e-02, -9.809e-03, -5.022e-02, -1.591e-01, -1.402e-01, -3.720e-02, 9.323e-02, 4.738e-02, -1.776e-02, 3.707e-02, -8.379e-02, -8.759e-02, -1.718e-01, -8.317e-04, 1.322e-01)); + r += mul(s6_2, M4(-2.019e-02, -6.700e-02, -7.803e-02, -3.497e-02, -8.818e-02, -7.762e-02, 7.796e-02, 1.219e-01, 2.817e-02, 9.422e-02, 5.756e-02, 2.422e-03, -1.135e-01, 3.744e-02, 1.005e-01, 9.330e-02)); + r += mul(s6_3, M4(-3.284e-02, -1.332e-01, -5.783e-02, 6.113e-03, 5.051e-02, -1.081e-01, 1.299e-02, -1.549e-01, 4.547e-02, -1.470e-02, 9.428e-02, 3.999e-02, -1.083e-01, -1.736e-02, 2.101e-02, -1.027e-01)); + r += mul(s6_4, M4(1.999e-01, -2.282e-01, -5.370e-02, 1.913e-01, 6.297e-02, 3.588e-01, 5.296e-02, -4.865e-03, 2.450e-01, -1.021e-02, 9.037e-02, 1.332e-01, -1.555e-01, 4.639e-02, -1.286e-01, 1.229e-01)); + r += mul(s6_5, M4(-1.487e-01, 3.054e-02, -5.555e-03, 1.334e-01, -1.896e-01, -3.180e-03, 6.536e-02, 2.874e-02, -9.890e-02, -5.342e-02, 4.329e-02, 2.736e-02, 4.534e-02, 2.542e-01, -4.647e-02, -2.836e-02)); + r += mul(s6_6, M4(5.066e-02, 1.479e-01, 5.918e-02, 6.793e-02, 3.198e-02, -5.455e-02, -8.127e-02, 1.549e-01, -8.015e-02, -1.433e-01, -3.517e-02, -2.834e-03, 9.370e-03, 6.598e-02, -1.077e-01, 1.052e-02)); + r += mul(s6_7, M4(-5.899e-02, 2.721e-02, 2.773e-02, 2.215e-02, -6.861e-02, -3.365e-02, 4.181e-03, -2.475e-02, 5.120e-03, 5.010e-02, 3.918e-02, -2.160e-02, -2.827e-01, -5.059e-04, -2.682e-02, 1.556e-01)); + r += mul(s6_8, M4(1.836e-02, -1.041e-01, -2.065e-03, -3.724e-02, -3.146e-02, -2.356e-01, -1.360e-03, 2.028e-02, 2.248e-03, -2.449e-02, 2.114e-02, -7.193e-02, 8.894e-02, -5.642e-02, 7.970e-02, 3.218e-02)); + r += mul(s7_0, M4(-1.689e-02, -8.536e-02, 6.276e-02, -8.715e-05, 1.777e-02, 3.522e-02, 6.070e-02, 6.502e-02, -2.579e-02, 2.513e-02, -2.430e-02, -3.051e-02, 1.130e-02, -6.306e-02, 5.845e-02, 9.750e-02)); + r += mul(s7_1, M4(-2.998e-01, 1.075e-01, 7.024e-02, -5.273e-02, -7.570e-02, 2.087e-02, -3.490e-02, 7.225e-02, 8.222e-02, 6.358e-02, -4.160e-02, -7.680e-02, 2.715e-02, -6.176e-02, -4.450e-02, -1.597e-01)); + r += mul(s7_2, M4(-1.689e-01, 3.540e-02, -5.165e-02, -4.040e-03, 8.521e-02, 6.889e-02, 4.339e-02, 6.233e-02, -6.550e-02, 3.086e-02, 1.696e-03, 1.017e-02, 5.270e-02, -3.555e-02, 3.351e-02, 2.184e-02)); + r += mul(s7_3, M4(-2.087e-02, -9.450e-03, -5.071e-02, -4.235e-02, -2.721e-02, -2.547e-02, 4.675e-02, -9.540e-02, 4.705e-02, 1.342e-01, -2.493e-01, -4.504e-03, -7.887e-02, 4.524e-02, 3.062e-02, -1.208e-01)); + r += mul(s7_4, M4(1.069e-01, -1.192e-01, 4.192e-02, -2.510e-02, 3.077e-02, 4.369e-02, 8.598e-02, 5.367e-02, 2.032e-01, 4.614e-02, -1.152e-01, 1.086e-01, -2.778e-02, 1.948e-01, 2.998e-03, 1.059e-01)); + r += mul(s7_5, M4(-3.729e-01, 4.792e-02, 3.370e-02, -4.074e-02, -7.209e-02, 2.201e-03, -3.790e-02, 8.322e-02, 3.937e-02, 1.004e-01, -3.502e-02, 2.757e-02, -1.634e-02, -3.628e-02, -2.714e-02, -2.970e-03)); + r += mul(s7_6, M4(-2.734e-02, 5.581e-02, 7.820e-04, 4.765e-02, 5.535e-02, -5.839e-02, -1.618e-02, 1.199e-01, -2.490e-02, -6.918e-02, -2.497e-02, 4.537e-04, 4.291e-02, 1.673e-01, 5.112e-02, 1.837e-02)); + r += mul(s7_7, M4(-3.817e-02, -2.869e-01, 1.028e-01, 5.419e-02, -6.207e-02, -1.313e-01, -5.905e-02, -8.926e-02, 2.281e-03, 1.567e-01, 5.908e-02, 5.334e-02, -2.630e-02, -6.773e-02, -5.950e-03, 1.270e-02)); + r += mul(s7_8, M4(-8.944e-02, -2.145e-01, 3.549e-02, -4.949e-02, 4.003e-02, 4.562e-02, 2.117e-02, 2.740e-02, 5.503e-02, -1.659e-02, -3.981e-02, -9.706e-02, 1.123e-02, -1.722e-02, 1.784e-02, 5.567e-02)); + r += V4(5.351e-03, -4.746e-03, 1.930e-02, 9.374e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(4.969e-02, -6.798e-02, -1.631e-02, 1.910e-02, 4.617e-02, 7.579e-02, -8.022e-02, 4.732e-02, 1.322e-01, 2.937e-02, -6.443e-02, 2.656e-02, -4.332e-02, 2.143e-02, -3.290e-02, 8.648e-02)); + r += mul(s0_1, M4(4.241e-02, -2.179e-02, -2.367e-02, -1.959e-02, -9.025e-02, 1.486e-02, -8.249e-02, -2.254e-02, 1.462e-01, 7.592e-02, -1.422e-01, 1.703e-01, -3.402e-03, 1.878e-02, 3.641e-02, -1.572e-01)); + r += mul(s0_2, M4(-5.421e-02, -4.759e-03, 2.244e-02, -8.270e-03, -1.239e-01, 7.347e-03, -8.013e-02, -1.097e-01, 4.680e-02, -1.434e-02, 9.645e-02, 3.507e-02, -1.215e-01, 4.258e-02, 1.093e-01, -5.892e-02)); + r += mul(s0_3, M4(4.112e-03, -4.066e-02, -4.552e-03, 4.021e-02, -3.030e-02, 8.381e-02, -5.320e-02, 1.506e-01, 1.518e-02, -4.773e-02, -8.290e-02, 7.934e-02, 8.910e-02, -3.802e-02, 1.241e-01, -1.218e-01)); + r += mul(s0_4, M4(-8.291e-02, 2.520e-02, 6.809e-02, -1.336e-01, 2.307e-01, 9.379e-02, -9.976e-02, 1.416e-01, 2.085e-01, 4.064e-02, 7.195e-02, 5.775e-02, 2.698e-02, 7.983e-02, 2.770e-01, 5.432e-02)); + r += mul(s0_5, M4(5.054e-02, 6.358e-03, -7.003e-02, -4.608e-02, -6.481e-02, -8.573e-03, 4.816e-02, 2.117e-02, 9.525e-02, 1.064e-01, 3.040e-02, -9.385e-02, -9.724e-02, -3.018e-02, 3.774e-03, 2.656e-02)); + r += mul(s0_6, M4(-9.158e-03, 6.796e-02, -3.302e-02, -1.438e-02, 3.986e-02, 3.158e-02, -2.615e-02, -2.499e-02, 1.700e-02, -4.627e-02, -3.077e-02, 1.543e-01, -1.178e-02, 6.861e-02, 1.617e-01, -1.282e-02)); + r += mul(s0_7, M4(-5.207e-02, 3.744e-02, -2.341e-02, 6.656e-02, 5.310e-02, -3.374e-03, -2.944e-02, -1.715e-02, 1.132e-01, -1.197e-01, -4.874e-02, -2.272e-02, -2.206e-01, 1.917e-01, 2.666e-01, -3.541e-01)); + r += mul(s0_8, M4(9.013e-03, 5.871e-03, -1.161e-02, -4.242e-02, -9.910e-02, 2.651e-02, 1.013e-02, -5.149e-02, -7.412e-03, 8.343e-02, -1.898e-02, -5.075e-02, -1.845e-01, 3.698e-02, 9.461e-02, -2.120e-01)); + r += mul(s1_0, M4(2.429e-02, -7.648e-02, 3.298e-02, -3.032e-03, 7.314e-02, -6.164e-02, 3.428e-02, -1.245e-02, -4.498e-02, 1.093e-02, -2.699e-02, 7.298e-02, -5.168e-02, 1.331e-02, -7.590e-02, 7.366e-02)); + r += mul(s1_1, M4(1.099e-01, -4.955e-02, -1.224e-01, 7.346e-03, 4.914e-03, -4.370e-02, 1.103e-01, -7.748e-02, 7.237e-04, 4.676e-02, -9.357e-02, 4.997e-03, -4.390e-02, 3.752e-02, -2.866e-02, 3.281e-02)); + r += mul(s1_2, M4(-1.262e-02, 1.775e-02, -3.496e-02, 6.971e-02, 7.479e-02, 1.286e-02, 4.882e-02, 4.406e-02, 1.038e-01, 2.825e-02, -3.748e-02, -6.979e-02, 4.078e-02, 4.772e-03, -1.167e-02, -4.336e-02)); + r += mul(s1_3, M4(2.242e-02, -1.776e-01, 1.049e-02, 1.485e-02, -8.566e-02, 3.446e-02, -1.039e-02, -6.812e-02, 1.424e-03, -6.183e-03, 5.377e-02, -9.231e-03, -8.639e-02, -6.571e-02, 1.050e-02, 9.043e-02)); + r += mul(s1_4, M4(3.096e-02, -1.431e-01, -2.843e-02, -7.583e-02, 5.026e-03, 2.955e-02, -2.720e-02, -1.345e-01, -2.240e-02, -5.411e-02, 6.056e-03, -7.368e-02, 8.799e-02, -4.958e-02, -3.041e-02, 5.679e-02)); + r += mul(s1_5, M4(1.070e-01, -4.377e-02, -2.826e-02, 8.609e-02, 4.070e-02, -7.259e-02, -3.162e-05, -7.352e-02, 4.335e-02, 1.387e-02, -4.858e-02, -2.004e-02, 1.130e-01, -7.727e-02, -7.790e-02, 1.178e-01)); + r += mul(s1_6, M4(1.159e-01, 9.055e-02, 1.792e-01, 2.068e-01, 2.690e-02, -1.557e-02, 2.118e-02, 1.547e-01, -2.155e-02, 2.029e-02, -4.170e-02, -1.762e-02, -3.505e-02, -3.280e-03, -8.604e-02, 3.944e-02)); + r += mul(s1_7, M4(1.398e-01, -2.824e-02, -4.291e-03, 1.327e-01, 5.433e-02, -3.675e-02, 1.093e-01, 9.705e-02, 1.108e-02, -4.899e-02, 7.807e-03, -4.815e-02, 1.042e-01, 3.069e-02, -6.328e-02, -1.331e-02)); + r += mul(s1_8, M4(4.031e-02, -9.825e-02, -1.363e-02, -4.161e-02, -2.145e-02, -2.666e-02, 2.548e-02, -3.093e-02, -4.397e-02, -3.312e-02, -3.893e-02, -1.042e-01, 6.282e-03, 7.359e-02, -3.994e-02, -4.463e-02)); + r += mul(s2_0, M4(3.742e-02, 2.102e-03, -8.991e-02, 1.841e-03, 1.011e-01, 1.243e-02, 2.460e-02, 7.946e-02, 8.754e-02, 8.191e-02, -7.770e-02, -1.909e-01, -6.812e-02, -6.579e-02, 3.355e-02, 1.095e-02)); + r += mul(s2_1, M4(1.468e-01, -5.954e-03, -1.652e-01, 1.304e-01, 1.597e-01, 3.018e-02, 9.813e-03, 5.645e-02, 8.428e-02, 5.243e-03, -5.279e-02, 4.396e-02, -1.099e-01, -9.407e-03, -5.214e-02, -5.491e-02)); + r += mul(s2_2, M4(-4.267e-02, 3.507e-02, -1.628e-02, 1.785e-02, 6.790e-02, 4.417e-02, -9.246e-02, -4.941e-02, -1.218e-02, 3.533e-02, 1.662e-02, -7.595e-02, -1.858e-02, -6.408e-02, 8.688e-05, 8.104e-02)); + r += mul(s2_3, M4(-9.009e-02, 2.668e-02, -1.378e-01, 1.689e-01, 6.035e-03, 8.152e-03, 4.049e-02, 1.511e-01, 6.930e-02, 9.426e-02, -1.172e-01, -5.910e-02, 6.370e-02, -1.956e-01, -3.729e-02, 9.268e-02)); + r += mul(s2_4, M4(2.163e-01, -5.092e-02, -7.840e-02, 1.343e-01, 2.474e-02, 6.257e-02, 1.575e-02, -3.697e-02, -1.090e-01, -1.585e-01, -1.491e-01, -7.206e-02, 1.910e-01, -9.062e-02, 2.939e-01, -1.115e-01)); + r += mul(s2_5, M4(-7.395e-02, -1.166e-02, -3.892e-02, -4.676e-02, 9.469e-02, -1.064e-01, -1.998e-02, 6.847e-02, 1.670e-01, 2.912e-02, -8.629e-02, 1.202e-01, 5.849e-02, -1.098e-03, 5.147e-03, -6.240e-02)); + r += mul(s2_6, M4(-1.813e-02, -5.520e-02, -8.923e-03, 5.608e-02, -5.733e-02, -5.186e-02, -6.152e-02, -4.688e-03, 7.954e-03, -8.228e-02, -6.753e-02, -1.760e-02, 1.659e-01, -2.446e-01, -2.974e-02, -4.604e-02)); + r += mul(s2_7, M4(1.153e-01, -1.993e-02, -9.199e-02, -8.763e-02, 1.034e-01, -2.709e-01, -1.090e-01, -2.574e-02, 1.716e-01, -1.640e-01, -9.578e-02, -2.558e-02, -7.287e-02, -3.913e-01, 5.900e-03, -1.930e-01)); + r += mul(s2_8, M4(-5.687e-02, -6.216e-03, -4.808e-02, 1.562e-02, -5.671e-03, -5.044e-02, -6.741e-02, 7.378e-02, -5.833e-02, 2.604e-02, 2.209e-03, 9.277e-02, 1.407e-01, -6.460e-02, 1.792e-02, -2.688e-02)); + r += mul(s3_0, M4(1.301e-01, -6.673e-02, -4.673e-03, -1.528e-02, -7.053e-02, 1.336e-02, -5.414e-02, 5.152e-02, -5.916e-02, 9.150e-03, 1.437e-03, 4.084e-02, -5.096e-02, 4.691e-03, 1.502e-01, 5.893e-03)); + r += mul(s3_1, M4(-3.160e-02, 1.469e-01, -1.019e-01, -7.662e-02, 2.197e-01, 1.817e-02, 8.725e-02, -1.522e-01, -1.764e-02, -1.970e-02, 8.117e-03, 3.590e-02, -8.543e-02, 6.164e-03, -6.264e-02, -4.869e-02)); + r += mul(s3_2, M4(3.845e-02, 7.640e-03, -3.881e-02, 5.850e-03, 5.538e-03, -3.748e-02, 1.341e-02, -9.292e-03, 7.562e-02, -4.774e-02, -1.123e-02, 2.085e-02, 7.349e-02, -7.243e-03, 4.272e-02, 7.880e-02)); + r += mul(s3_3, M4(-1.380e-01, 4.229e-02, 2.785e-01, 3.801e-01, 5.090e-02, -1.460e-02, 6.248e-02, 1.078e-01, -2.172e-02, 1.705e-02, -3.401e-02, 2.091e-02, 1.108e-01, 6.484e-02, 6.153e-02, -1.278e-01)); + r += mul(s3_4, M4(1.742e-01, -4.719e-02, -3.431e-01, -1.191e-01, -2.863e-02, -1.824e-02, 1.543e-01, 2.224e-01, -1.706e-01, 9.533e-02, 1.460e-01, -2.465e-02, -2.429e-01, 1.371e-01, 1.065e-01, 2.320e-02)); + r += mul(s3_5, M4(-1.137e-01, 7.592e-03, 5.685e-02, -1.670e-02, -2.377e-02, -1.052e-01, 9.345e-02, 1.203e-01, -8.840e-03, 3.811e-03, 1.680e-02, -1.933e-02, -1.316e-02, 4.747e-02, -1.437e-02, 4.299e-02)); + r += mul(s3_6, M4(-6.346e-03, -2.647e-02, 1.117e-01, 6.339e-02, -7.772e-02, -1.288e-02, 1.283e-03, 2.006e-01, 2.522e-02, 4.820e-02, 3.984e-02, 8.423e-02, -4.313e-02, -4.313e-02, 5.460e-02, 1.835e-01)); + r += mul(s3_7, M4(2.887e-01, -2.041e-01, 4.232e-02, 3.653e-02, 2.437e-02, -2.630e-02, 1.212e-01, 9.045e-02, 5.126e-02, -2.397e-02, 2.083e-02, -2.262e-02, 1.174e-01, 2.861e-02, -1.233e-02, 7.360e-02)); + r += mul(s3_8, M4(-9.027e-02, -2.971e-02, 5.210e-02, 1.163e-01, -3.642e-02, 3.941e-02, 4.917e-02, 3.244e-02, -4.757e-02, -8.186e-02, 9.365e-03, -6.672e-03, -2.995e-02, -9.676e-02, -1.742e-02, -3.560e-02)); + r += mul(s4_0, M4(-2.413e-02, 5.733e-02, -3.662e-02, -1.290e-01, -2.503e-02, 5.071e-02, 2.409e-02, -1.394e-02, 5.004e-02, -6.543e-03, 1.458e-02, 2.920e-02, -2.374e-02, -2.253e-02, 4.324e-02, 2.165e-02)); + r += mul(s4_1, M4(-6.530e-02, 1.960e-02, 2.366e-02, 3.427e-02, 4.881e-02, 1.563e-02, -3.348e-02, 1.288e-03, -1.898e-02, 7.115e-02, -1.124e-01, 1.629e-01, 1.449e-01, 3.789e-02, 4.047e-02, 7.443e-02)); + r += mul(s4_2, M4(4.340e-02, -3.738e-03, -1.450e-02, -3.659e-02, -8.028e-03, -4.039e-02, 3.604e-03, 8.585e-02, -2.229e-02, -3.620e-02, 5.393e-02, 6.269e-02, -1.034e-02, 2.669e-02, -7.571e-03, 3.290e-02)); + r += mul(s4_3, M4(-6.251e-02, 6.566e-02, -1.510e-01, -1.397e-01, 5.000e-02, 3.678e-02, 1.271e-01, -6.640e-02, -1.842e-01, 1.340e-01, 2.975e-02, -4.881e-02, 1.065e-01, -8.574e-02, -5.624e-02, 3.385e-02)); + r += mul(s4_4, M4(7.366e-02, 2.984e-02, -1.919e-01, -7.495e-02, -1.070e-01, -9.427e-02, 7.523e-02, 2.054e-02, -3.354e-02, 1.520e-01, 1.800e-01, 8.748e-02, 1.248e-01, -2.123e-01, 2.753e-02, 7.032e-02)); + r += mul(s4_5, M4(8.098e-03, -1.405e-02, 9.718e-02, 1.855e-02, 1.367e-01, -2.222e-02, -2.826e-02, 9.571e-03, -5.042e-02, 3.500e-02, 6.680e-02, -1.410e-02, -5.061e-02, -6.171e-03, 1.901e-01, 9.228e-02)); + r += mul(s4_6, M4(2.685e-02, 6.709e-02, -3.187e-02, 1.025e-01, -8.084e-03, -2.715e-02, -3.167e-02, 1.119e-01, -4.820e-02, 1.070e-01, -3.387e-03, 9.772e-02, -5.765e-03, 7.395e-02, -6.708e-02, 1.610e-01)); + r += mul(s4_7, M4(-1.945e-02, -4.996e-02, 1.571e-02, 2.683e-02, -8.484e-02, 2.589e-02, 2.119e-02, 1.573e-01, -1.111e-01, 2.642e-02, -9.621e-03, 3.552e-03, 4.055e-02, 6.634e-02, -1.663e-02, -4.428e-04)); + r += mul(s4_8, M4(-6.808e-02, 4.646e-02, 3.264e-02, 5.362e-03, 1.139e-02, -7.739e-03, -4.597e-02, 5.726e-03, -4.609e-02, 2.209e-02, 3.058e-03, -1.594e-02, -9.159e-02, 2.359e-02, 6.326e-02, 1.340e-03)); + r += mul(s5_0, M4(2.998e-01, 6.184e-02, -3.594e-02, 1.837e-02, 4.618e-02, 6.769e-02, 7.110e-02, 5.140e-02, -3.745e-02, 3.319e-02, 1.519e-02, -7.435e-03, -1.101e-01, -1.383e-02, -3.191e-02, -2.942e-03)); + r += mul(s5_1, M4(1.386e-02, -3.171e-02, 2.864e-02, 5.892e-02, -1.940e-01, -1.093e-02, -7.669e-02, 5.503e-02, -6.708e-03, -4.250e-03, -1.225e-02, 2.570e-02, -2.052e-02, -4.288e-02, -1.539e-01, -1.591e-01)); + r += mul(s5_2, M4(1.389e-01, 1.930e-02, -8.161e-03, 3.241e-02, -3.284e-02, 2.070e-02, -8.118e-02, 2.614e-02, -4.448e-03, -1.930e-02, 1.276e-02, -2.240e-02, -1.700e-01, 1.309e-03, -9.330e-03, -1.701e-01)); + r += mul(s5_3, M4(2.932e-02, 1.999e-02, 1.099e-01, -2.874e-02, 1.307e-01, 1.732e-01, 1.651e-01, -2.842e-01, -8.364e-02, -6.895e-02, -3.170e-03, 7.641e-02, 1.224e-01, 5.736e-02, -1.010e-01, -1.254e-02)); + r += mul(s5_4, M4(5.407e-02, -1.252e-01, -2.510e-01, -2.635e-01, -2.482e-01, 1.820e-02, 4.384e-02, -2.852e-01, 1.575e-01, 1.870e-02, 4.663e-03, 5.438e-02, -1.672e-01, -1.272e-01, -1.955e-01, 2.434e-01)); + r += mul(s5_5, M4(-2.337e-02, -3.724e-02, -4.250e-02, 6.240e-02, 8.226e-02, -9.472e-03, -7.522e-02, -2.530e-01, -5.063e-02, 4.506e-03, -2.741e-02, -1.436e-01, -1.672e-01, 1.117e-01, -1.196e-01, 4.086e-02)); + r += mul(s5_6, M4(4.256e-02, -1.184e-01, 4.905e-02, 1.424e-01, 5.343e-02, -6.726e-02, 4.591e-03, -1.502e-03, -1.130e-01, -4.209e-02, -1.286e-02, 1.288e-01, -5.045e-02, 3.221e-02, -9.325e-02, 1.091e-01)); + r += mul(s5_7, M4(1.177e-01, -1.748e-02, 4.951e-02, -2.133e-02, 8.531e-02, -3.125e-02, -9.790e-02, -6.702e-02, -2.695e-03, -6.694e-02, -8.823e-05, -2.169e-02, -1.462e-01, -1.821e-02, -1.749e-01, -3.450e-02)); + r += mul(s5_8, M4(6.438e-02, 2.157e-02, 1.113e-02, -6.066e-02, 3.259e-02, 3.472e-02, -1.023e-01, -1.054e-01, -2.501e-02, -6.527e-02, 2.978e-02, -1.117e-01, -4.648e-02, 6.757e-02, 1.049e-02, -6.068e-02)); + r += mul(s6_0, M4(7.482e-02, -4.795e-02, 1.097e-01, -1.281e-02, -9.006e-02, 4.621e-02, 1.410e-02, -1.494e-01, -1.193e-02, 6.909e-02, 6.081e-02, 1.735e-03, 8.806e-02, 4.528e-04, -9.205e-03, -4.175e-02)); + r += mul(s6_1, M4(-1.338e-01, 3.338e-02, 5.148e-02, 1.026e-02, -5.311e-02, -3.567e-02, -1.388e-02, -7.732e-02, 5.164e-02, -5.179e-03, 5.707e-03, -6.708e-02, -1.417e-01, 3.568e-02, -4.803e-02, 1.829e-02)); + r += mul(s6_2, M4(4.183e-02, 2.789e-02, -4.511e-02, 7.451e-02, -2.456e-01, 6.764e-02, -1.296e-01, 1.734e-02, 2.853e-03, -2.430e-03, -2.769e-02, -4.073e-02, 1.062e-02, 9.062e-02, 1.312e-01, 6.774e-03)); + r += mul(s6_3, M4(9.957e-02, -8.275e-02, -7.864e-03, -1.183e-01, 3.777e-02, -1.597e-01, 1.157e-02, 2.856e-02, -1.445e-01, -7.529e-02, 1.460e-01, 7.438e-02, 2.038e-01, 1.289e-01, -8.220e-02, 2.154e-01)); + r += mul(s6_4, M4(-1.303e-01, 1.338e-01, 1.708e-01, -1.345e-01, 4.503e-02, -1.507e-01, -1.312e-01, 7.709e-02, -5.646e-02, 9.898e-02, 6.089e-02, -2.127e-01, -1.776e-01, 1.346e-01, -1.501e-01, 4.479e-01)); + r += mul(s6_5, M4(-2.287e-02, 1.816e-02, 6.476e-03, -6.000e-03, 2.583e-02, 1.181e-01, 2.830e-02, 4.493e-02, 6.293e-02, -1.721e-02, -3.978e-02, 1.178e-02, 5.155e-02, 7.724e-02, 7.545e-02, 2.235e-01)); + r += mul(s6_6, M4(1.089e-02, 9.929e-03, 3.291e-02, -4.004e-02, -3.411e-02, -1.482e-01, 8.173e-02, -1.300e-01, -9.727e-02, 1.038e-01, 2.665e-02, 7.097e-02, 1.546e-01, -6.419e-03, -6.211e-02, 1.378e-02)); + r += mul(s6_7, M4(-3.750e-02, -6.492e-02, 1.402e-01, -8.250e-02, 1.420e-01, -7.486e-02, -1.294e-01, 7.090e-03, -6.692e-02, -5.347e-02, 5.752e-02, 9.145e-02, -1.170e-01, 2.695e-02, 3.856e-02, -1.005e-01)); + r += mul(s6_8, M4(-7.991e-02, -2.176e-02, 1.824e-02, -2.861e-02, 6.101e-02, 2.592e-02, 3.747e-02, 6.699e-02, 2.198e-02, 5.217e-02, 1.520e-02, 2.060e-02, -3.168e-02, 5.624e-02, -2.631e-02, 1.539e-01)); + r += mul(s7_0, M4(-2.234e-03, -8.024e-02, -4.880e-02, -3.628e-02, 6.282e-02, -2.876e-02, -2.045e-02, -9.916e-02, -1.951e-02, 2.522e-02, 5.536e-02, -1.400e-02, 4.454e-02, -5.995e-02, -1.083e-02, 3.267e-02)); + r += mul(s7_1, M4(1.363e-01, -1.627e-02, -7.451e-02, -2.096e-01, 5.555e-02, -8.325e-02, 4.384e-02, 3.756e-02, -4.115e-02, -4.452e-02, 5.062e-02, 1.560e-01, -6.836e-02, -9.891e-03, 7.183e-02, -8.960e-02)); + r += mul(s7_2, M4(6.864e-02, 4.224e-02, 6.149e-02, -2.407e-01, -1.763e-01, -3.429e-03, -8.891e-02, 5.303e-02, -1.704e-01, 1.372e-02, 5.493e-02, -2.983e-02, -9.787e-02, -1.998e-02, 1.076e-02, 8.817e-02)); + r += mul(s7_3, M4(6.177e-02, 9.915e-02, -2.032e-01, 1.047e-01, -3.088e-02, 1.345e-02, 4.722e-02, 2.977e-02, 7.561e-03, 1.231e-01, 5.659e-03, 1.742e-01, 1.224e-01, 5.175e-03, -4.049e-02, 9.241e-02)); + r += mul(s7_4, M4(2.840e-01, 1.155e-01, 1.048e-01, 1.054e-01, -4.146e-02, -1.333e-01, 8.523e-02, 2.791e-01, 1.161e-01, 8.725e-02, 4.376e-01, 4.971e-02, -8.106e-02, -6.255e-02, -1.393e-01, 1.394e-02)); + r += mul(s7_5, M4(-1.513e-03, -8.884e-03, -6.384e-02, 4.657e-02, -1.265e-02, 5.092e-02, 1.608e-02, 3.075e-02, -1.785e-01, -6.070e-02, 6.474e-02, 1.909e-02, 7.206e-02, 9.939e-03, 4.189e-02, -4.761e-02)); + r += mul(s7_6, M4(1.031e-01, 1.450e-02, 1.205e-01, -2.352e-02, -1.105e-02, -1.111e-01, 9.057e-02, -7.851e-02, -3.125e-02, 4.754e-02, -7.714e-02, -1.526e-02, 1.171e-01, 7.729e-03, 6.555e-03, -4.499e-02)); + r += mul(s7_7, M4(3.134e-01, 1.278e-01, 3.646e-02, -1.853e-01, -8.604e-02, 1.031e-02, -6.833e-02, -9.669e-02, -1.471e-01, -1.839e-03, 1.054e-01, -9.685e-03, 4.999e-03, -4.691e-02, 8.529e-02, -4.799e-02)); + r += mul(s7_8, M4(1.734e-01, -9.505e-05, 6.501e-02, -2.566e-02, 9.206e-02, 4.209e-02, 3.785e-02, 2.968e-02, -6.790e-02, 5.441e-02, 5.599e-02, 6.679e-03, 7.613e-03, 7.214e-03, -4.941e-02, 6.328e-02)); + r += V4(1.524e-02, 2.854e-03, -1.639e-02, -2.178e-02); + return r; +} + +void Pass11(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 12 +//!DESC conv11 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.915e-01, -1.169e-01, 1.190e-01, 5.826e-02, -6.870e-03, 1.762e-02, 5.676e-02, -3.206e-03, 5.004e-02, -1.895e-02, 2.043e-02, -8.222e-02, 1.991e-01, 4.123e-02, -4.290e-02, -2.150e-02)); + r += mul(s0_1, M4(-1.158e-01, -1.896e-01, -1.275e-01, -1.708e-01, -2.118e-02, 3.400e-02, -1.248e-01, -3.245e-02, -2.000e-02, 1.093e-01, 7.554e-03, 4.626e-02, 7.514e-02, 1.358e-02, 3.031e-02, 6.514e-02)); + r += mul(s0_2, M4(-7.302e-02, 1.582e-02, 2.889e-02, -1.082e-01, 1.996e-02, -4.931e-02, 1.216e-02, -5.767e-02, -8.223e-02, -7.295e-02, 7.653e-03, 2.531e-02, 8.554e-02, 2.361e-02, 2.901e-02, 3.306e-02)); + r += mul(s0_3, M4(-8.676e-02, -2.726e-01, 2.079e-01, 1.976e-01, 6.214e-02, -3.046e-02, -4.187e-02, -9.174e-02, 6.240e-02, -1.140e-01, -3.985e-02, 5.791e-02, -2.981e-01, -1.307e-03, 8.713e-02, -1.130e-01)); + r += mul(s0_4, M4(-2.886e-01, 2.820e-02, -1.374e-01, 2.046e-01, 6.850e-02, -7.892e-02, 2.798e-01, -4.937e-02, 1.450e-01, -3.480e-03, -2.459e-01, -1.068e-01, -4.921e-02, 1.577e-01, 1.469e-01, -1.741e-01)); + r += mul(s0_5, M4(-1.059e-01, 1.108e-01, -9.792e-02, -2.284e-02, -2.672e-02, -1.763e-02, -5.990e-02, -8.621e-02, -1.893e-02, -1.552e-02, -4.040e-02, 3.691e-02, -1.275e-01, -3.942e-02, 6.189e-02, -1.852e-01)); + r += mul(s0_6, M4(-1.295e-01, -1.477e-02, -8.103e-02, -1.089e-02, 1.002e-02, 4.025e-02, -2.811e-02, 7.984e-02, 5.059e-02, 8.992e-02, 2.116e-02, 2.413e-02, 5.388e-02, -7.130e-02, -9.228e-02, -1.752e-03)); + r += mul(s0_7, M4(4.537e-02, 1.065e-02, -1.856e-01, 1.258e-03, -4.945e-02, -1.652e-01, 1.330e-01, 1.227e-01, 9.099e-02, -2.081e-02, 4.318e-02, 1.210e-03, -8.341e-02, 2.881e-02, -1.661e-02, -6.715e-02)); + r += mul(s0_8, M4(9.870e-03, 5.725e-02, 3.166e-02, 2.656e-02, -4.916e-02, -1.751e-02, 3.296e-02, 3.218e-02, -1.498e-02, 1.853e-02, 7.123e-03, 7.339e-02, -7.305e-02, -1.655e-01, 2.114e-02, -5.433e-03)); + r += mul(s1_0, M4(6.634e-02, -2.533e-02, 8.221e-02, -3.207e-02, -8.808e-02, 3.723e-02, 6.756e-02, 1.038e-01, -4.526e-02, 2.944e-02, 3.895e-02, -1.382e-01, 1.313e-01, 1.640e-02, -1.587e-02, 6.436e-02)); + r += mul(s1_1, M4(6.915e-02, 1.180e-01, -2.793e-03, -7.074e-02, -6.017e-02, -1.370e-01, -7.107e-02, -9.355e-02, 1.330e-02, -2.301e-02, 5.857e-02, -2.194e-02, 1.753e-02, -7.409e-02, -4.217e-02, 9.795e-02)); + r += mul(s1_2, M4(6.861e-02, 2.690e-02, 2.689e-02, 1.245e-02, 5.594e-02, 3.109e-02, 3.444e-02, -7.013e-02, -2.348e-02, 1.369e-01, 6.688e-02, -4.010e-02, 2.610e-02, 5.055e-02, -7.790e-02, 5.747e-02)); + r += mul(s1_3, M4(1.062e-01, -5.265e-02, -3.452e-02, 7.210e-03, 1.180e-01, 4.908e-02, -8.465e-02, 3.929e-03, 4.042e-02, 2.169e-01, -1.626e-01, -2.743e-03, -1.461e-02, 1.247e-02, -3.060e-02, 1.619e-01)); + r += mul(s1_4, M4(-8.396e-02, 7.641e-02, 2.572e-02, 3.759e-02, 1.577e-01, -3.217e-01, 1.614e-01, -1.043e-02, 3.079e-02, -2.685e-01, -4.387e-02, -1.696e-01, 1.448e-01, 1.136e-01, -3.021e-02, 1.499e-01)); + r += mul(s1_5, M4(-8.857e-03, 5.715e-03, -6.301e-02, -4.624e-02, -4.868e-02, 2.177e-02, -2.426e-03, -1.321e-01, 5.648e-02, -6.338e-03, 4.336e-02, 2.730e-03, 4.977e-02, 4.718e-04, -1.090e-02, -3.998e-02)); + r += mul(s1_6, M4(-7.858e-03, 6.636e-02, 3.191e-02, -3.694e-02, 3.115e-02, -6.413e-02, 2.714e-02, 4.921e-02, 1.193e-01, 8.688e-02, 1.835e-01, 8.366e-02, -3.097e-02, -6.977e-02, -1.575e-02, 2.729e-02)); + r += mul(s1_7, M4(-4.780e-02, 2.397e-02, -1.122e-01, -5.331e-02, 4.054e-02, 1.939e-02, -1.676e-01, 4.318e-02, -5.867e-02, 1.400e-01, 2.432e-01, -2.698e-02, -3.587e-02, 1.348e-01, 7.058e-02, -6.223e-03)); + r += mul(s1_8, M4(-2.230e-02, 5.874e-02, -2.458e-03, -7.733e-03, -1.283e-02, -1.249e-01, -4.782e-02, 2.549e-02, 8.556e-02, -1.097e-01, 8.481e-02, -2.428e-03, -4.674e-03, -3.139e-02, -6.233e-02, 2.305e-02)); + r += mul(s2_0, M4(-9.413e-02, -4.736e-02, 4.676e-02, -1.255e-01, -1.187e-01, 9.706e-02, -1.397e-01, -9.898e-02, -1.176e-01, -3.923e-02, 8.698e-02, 1.274e-01, 2.611e-03, 1.828e-02, 2.152e-02, -4.350e-02)); + r += mul(s2_1, M4(6.654e-02, 2.420e-03, 5.422e-02, -7.395e-02, -2.491e-01, -1.371e-01, 3.299e-02, 5.930e-02, -1.434e-01, -1.292e-01, -5.649e-02, 4.648e-02, 8.573e-02, -2.748e-02, 1.604e-01, 1.473e-02)); + r += mul(s2_2, M4(1.804e-02, 4.354e-02, 8.807e-02, 1.544e-02, -1.448e-01, 4.923e-02, -3.842e-02, 1.971e-02, -4.645e-02, 8.470e-03, -1.666e-02, 6.277e-02, 8.557e-02, -1.134e-02, 6.399e-02, -5.746e-02)); + r += mul(s2_3, M4(-7.887e-02, 4.637e-02, -2.723e-02, 5.297e-03, -2.688e-01, 1.936e-02, -6.007e-02, 5.021e-02, 8.858e-02, 1.747e-01, 3.388e-02, 6.603e-02, 1.610e-02, -1.990e-01, 1.176e-01, 2.307e-01)); + r += mul(s2_4, M4(1.228e-01, -1.251e-01, -1.899e-01, -1.793e-01, -2.170e-01, 1.480e-01, 1.720e-01, 8.567e-03, 6.609e-02, -2.860e-01, -1.202e-01, 1.993e-01, 1.246e-01, -6.924e-02, -1.348e-01, -2.488e-01)); + r += mul(s2_5, M4(2.079e-02, 1.208e-01, 9.448e-02, -7.905e-02, -1.375e-01, 6.923e-02, -4.167e-02, 8.062e-03, 1.164e-01, 6.644e-02, -1.122e-01, 1.183e-01, 3.517e-02, 3.437e-02, -7.007e-04, 3.188e-02)); + r += mul(s2_6, M4(2.444e-01, 2.773e-01, -1.278e-01, -3.716e-02, -1.564e-01, -3.690e-02, 5.900e-03, -5.933e-03, -6.071e-02, 3.003e-02, -4.444e-02, -1.545e-02, -4.730e-02, -6.126e-02, 1.284e-01, 4.191e-02)); + r += mul(s2_7, M4(-9.036e-02, -6.615e-02, 1.670e-01, 1.248e-02, -1.547e-02, 1.639e-05, 4.639e-02, 9.701e-02, 5.673e-02, -5.440e-02, -1.628e-02, 6.036e-02, 1.977e-01, 1.232e-02, -8.781e-02, 6.303e-02)); + r += mul(s2_8, M4(-1.259e-03, -1.527e-01, -5.524e-02, 2.599e-02, -1.163e-01, 1.410e-01, -8.661e-02, -2.286e-02, 2.106e-02, 4.968e-02, -1.060e-01, 5.203e-02, -3.248e-02, 1.855e-02, 7.102e-02, -5.136e-02)); + r += mul(s3_0, M4(-2.382e-01, -4.257e-02, 1.572e-02, -6.708e-02, -8.716e-03, 3.438e-02, -3.772e-02, 3.491e-02, -1.030e-02, 6.835e-03, 6.272e-02, 1.167e-02, -1.005e-01, -1.801e-02, 1.201e-02, -7.896e-02)); + r += mul(s3_1, M4(-1.519e-01, -2.255e-02, -2.546e-02, -2.894e-02, -1.238e-01, -4.474e-02, 9.939e-02, 1.083e-01, -9.085e-03, 7.234e-02, -4.092e-02, -5.775e-02, -3.120e-02, -1.194e-01, -6.205e-02, 1.012e-01)); + r += mul(s3_2, M4(-7.892e-02, -3.893e-02, -1.227e-02, -3.555e-02, -1.229e-01, -1.627e-02, -2.630e-02, 3.135e-02, -7.737e-02, -7.661e-02, -4.174e-02, -1.994e-02, -1.121e-02, -1.866e-01, -3.032e-02, 2.742e-02)); + r += mul(s3_3, M4(-5.926e-03, 1.696e-01, -7.380e-04, 1.619e-01, -1.403e-01, 3.431e-04, 3.609e-02, -2.054e-02, 5.822e-02, 2.354e-02, 4.879e-02, -3.406e-02, -1.296e-01, 9.718e-02, -2.703e-02, -3.047e-02)); + r += mul(s3_4, M4(1.956e-02, -1.727e-01, 1.998e-01, 1.020e-01, -1.521e-01, -1.670e-02, 1.427e-01, 3.305e-02, -5.912e-03, -1.124e-01, 5.324e-02, 3.821e-02, 2.170e-01, -2.779e-01, 4.821e-01, -1.473e-01)); + r += mul(s3_5, M4(3.682e-02, 1.105e-01, 8.481e-03, 2.969e-02, 2.546e-02, 5.529e-02, -2.567e-02, -6.981e-03, 1.006e-02, -4.124e-02, -4.984e-02, -1.972e-02, -1.615e-02, -1.370e-01, -3.224e-02, -2.655e-02)); + r += mul(s3_6, M4(-9.353e-04, 4.101e-02, 6.678e-03, 4.895e-02, -2.400e-02, -2.993e-02, -1.824e-02, 8.775e-04, -6.785e-02, -2.088e-02, 4.766e-02, -2.042e-02, 1.184e-02, 5.569e-02, -1.253e-01, -7.349e-02)); + r += mul(s3_7, M4(-4.982e-03, -8.675e-02, 9.220e-03, 3.292e-02, -6.760e-02, 2.873e-03, 3.582e-02, 4.781e-02, 1.800e-01, 8.379e-02, -9.129e-02, 5.949e-02, -3.530e-02, -2.078e-01, 1.357e-01, 6.611e-02)); + r += mul(s3_8, M4(-5.324e-02, -1.870e-02, 1.161e-02, 2.989e-05, 6.348e-03, 6.436e-02, -4.735e-02, -2.747e-03, -1.014e-02, 4.067e-02, -4.654e-02, -2.985e-02, -6.898e-02, 5.993e-02, 5.866e-02, -5.755e-02)); + r += mul(s4_0, M4(-1.806e-01, -6.770e-02, 8.034e-02, -1.095e-01, 4.820e-02, -7.115e-02, 4.395e-02, -2.725e-02, -4.917e-03, 5.965e-03, -2.441e-02, 3.640e-02, 1.741e-02, 1.028e-01, 7.352e-03, 9.533e-04)); + r += mul(s4_1, M4(1.244e-02, 1.539e-02, -7.837e-02, -9.851e-02, -1.872e-01, 2.685e-02, -1.372e-02, -1.288e-01, -7.416e-02, -1.059e-03, 1.548e-01, -8.075e-02, -3.300e-02, -1.452e-01, -1.467e-02, 4.678e-02)); + r += mul(s4_2, M4(-6.319e-02, 2.047e-02, -3.524e-02, -8.293e-02, 1.226e-01, -2.587e-01, 2.677e-01, -5.000e-02, 4.268e-03, 6.761e-02, 4.449e-02, -5.548e-02, -6.619e-03, 6.231e-02, -5.589e-03, 5.727e-02)); + r += mul(s4_3, M4(2.026e-02, 1.317e-01, -1.516e-02, -2.647e-01, -4.952e-02, 5.845e-02, -5.285e-02, -4.733e-03, -5.163e-02, 2.782e-02, 3.565e-02, 3.868e-02, 2.486e-02, 8.674e-02, -5.657e-02, 1.044e-02)); + r += mul(s4_4, M4(3.854e-02, -1.673e-01, 3.037e-01, 1.016e-01, -1.080e-02, 6.207e-02, 8.249e-02, 1.300e-01, -8.758e-02, 3.024e-02, 1.130e-01, 8.472e-02, -3.803e-02, -7.029e-02, 1.651e-02, 3.407e-01)); + r += mul(s4_5, M4(9.317e-02, 6.574e-02, 1.338e-01, 9.046e-03, 3.361e-02, -1.313e-01, 1.517e-01, 4.762e-02, 7.507e-03, -8.164e-02, -1.650e-01, -2.319e-02, -1.172e-01, -8.297e-02, 9.258e-02, -4.425e-02)); + r += mul(s4_6, M4(4.385e-02, 8.606e-03, -1.949e-02, 3.010e-03, 5.382e-02, -4.354e-02, 1.261e-02, -5.949e-02, -1.466e-02, -1.887e-02, 4.227e-04, 9.980e-03, 2.933e-02, 8.980e-03, 9.546e-02, 3.022e-02)); + r += mul(s4_7, M4(5.263e-02, -1.155e-01, 8.001e-02, 1.899e-01, 1.548e-02, -4.710e-02, 1.056e-01, -4.798e-02, -1.586e-01, -1.727e-02, -1.260e-01, -3.351e-02, -1.084e-02, -1.788e-02, -2.251e-01, -1.177e-01)); + r += mul(s4_8, M4(3.676e-02, 8.270e-02, 1.031e-01, 5.032e-02, -1.351e-01, -1.402e-01, 1.820e-02, -5.783e-02, -2.887e-02, 3.201e-02, -6.555e-03, -6.838e-02, 5.485e-02, -5.880e-02, 1.818e-02, 5.227e-02)); + r += mul(s5_0, M4(-2.373e-02, -5.297e-02, 6.409e-02, -8.760e-03, 1.765e-02, 3.389e-02, -1.094e-02, -8.668e-02, 2.111e-02, -4.394e-02, 6.426e-03, 7.256e-03, 8.129e-03, 2.218e-02, -9.743e-04, -5.823e-02)); + r += mul(s5_1, M4(-4.578e-04, -1.229e-01, 5.579e-02, -1.009e-01, -1.058e-01, 3.231e-02, -1.274e-02, -7.973e-02, -6.806e-02, -2.071e-02, -2.131e-02, -8.127e-02, 3.821e-03, 1.196e-02, -5.473e-02, 1.707e-01)); + r += mul(s5_2, M4(-2.637e-02, 2.379e-03, -5.428e-02, -3.823e-02, -7.033e-03, 1.333e-01, -1.759e-03, -2.521e-02, 4.663e-02, -6.904e-02, 3.872e-02, -1.550e-03, 2.389e-03, -1.724e-02, 1.931e-02, 1.147e-03)); + r += mul(s5_3, M4(1.278e-01, -2.017e-01, 1.042e-01, -7.642e-02, -4.654e-02, -3.928e-02, -1.189e-01, 7.038e-02, 2.995e-01, 6.048e-02, 1.942e-02, 1.076e-01, -6.889e-03, -7.257e-02, -4.722e-02, 3.982e-02)); + r += mul(s5_4, M4(-4.283e-02, 2.050e-01, -3.413e-01, -1.939e-02, -6.313e-02, -4.603e-02, -3.493e-02, 4.534e-02, -8.708e-02, -1.357e-01, 1.964e-01, 2.077e-01, -5.591e-02, -1.052e-01, -1.626e-01, 9.349e-02)); + r += mul(s5_5, M4(3.414e-02, 6.631e-02, 5.159e-02, -9.068e-03, -4.447e-02, -8.555e-03, 1.280e-01, 5.758e-02, 1.168e-01, 5.714e-02, -1.075e-01, 4.452e-02, -4.317e-03, -7.799e-02, 1.165e-01, -3.921e-02)); + r += mul(s5_6, M4(5.624e-02, -1.040e-02, 3.795e-02, 2.532e-02, -2.144e-02, -2.567e-02, -2.597e-03, 1.717e-02, 1.543e-01, -3.063e-02, -5.567e-02, 1.063e-01, -2.135e-02, -4.029e-02, -4.940e-03, 1.869e-01)); + r += mul(s5_7, M4(-8.275e-02, 6.599e-02, 7.262e-02, 4.369e-02, -5.231e-02, -6.227e-02, 6.567e-02, 1.104e-02, 7.435e-02, -5.245e-02, -4.217e-02, 1.187e-01, -1.648e-01, -4.021e-01, 6.267e-03, 7.223e-02)); + r += mul(s5_8, M4(2.294e-02, -5.419e-02, -5.410e-02, 2.546e-02, 1.979e-02, 2.789e-03, -3.908e-02, 7.568e-03, 8.402e-02, -3.150e-01, 1.032e-01, 1.685e-02, -4.976e-02, 7.686e-02, 1.353e-01, 2.894e-02)); + r += mul(s6_0, M4(-1.030e-01, -2.008e-02, 4.383e-02, 2.176e-02, -7.612e-03, -1.877e-02, 1.396e-02, -1.144e-01, 2.938e-02, -2.272e-02, -5.583e-02, 5.873e-02, -1.096e-01, -2.677e-02, -3.503e-02, 8.138e-02)); + r += mul(s6_1, M4(-2.919e-02, -1.364e-01, 8.917e-02, 8.972e-02, -2.768e-02, -1.571e-01, -4.487e-02, -3.469e-01, -1.119e-01, -1.312e-01, -1.155e-01, -8.366e-02, -8.082e-02, -1.538e-02, 1.875e-02, 3.163e-02)); + r += mul(s6_2, M4(-5.143e-02, -5.594e-02, -2.071e-02, 5.367e-02, -2.671e-02, 3.961e-02, 7.607e-02, -8.289e-02, -3.423e-02, -1.023e-01, 2.431e-02, -4.870e-02, 2.373e-02, 2.842e-01, 2.028e-02, 7.859e-02)); + r += mul(s6_3, M4(-8.823e-02, 1.350e-01, 4.078e-02, -1.142e-01, 4.595e-02, -5.684e-02, -6.918e-02, 6.567e-02, 8.170e-02, 9.910e-02, -6.656e-02, -1.071e-01, -1.549e-01, 1.994e-02, 8.155e-03, 2.132e-02)); + r += mul(s6_4, M4(-1.350e-01, -1.147e-02, 9.153e-02, -3.440e-02, -1.150e-01, -1.059e-01, -7.237e-03, 1.449e-01, -2.214e-01, 6.290e-02, 2.961e-02, 3.729e-01, -1.385e-01, 1.320e-01, 1.545e-01, -6.541e-03)); + r += mul(s6_5, M4(1.651e-02, 1.181e-02, 3.175e-02, -7.075e-02, -1.815e-02, -1.944e-02, 2.353e-01, 1.433e-01, -1.284e-01, 6.948e-02, -1.235e-01, 1.018e-03, -1.500e-01, 1.535e-01, 2.512e-02, -4.040e-02)); + r += mul(s6_6, M4(3.686e-02, -2.102e-02, 2.925e-02, 5.221e-02, 7.893e-03, 1.431e-02, 1.292e-02, 4.678e-02, 7.289e-02, 2.643e-02, 4.998e-02, -1.128e-03, -4.384e-02, 1.055e-01, -7.729e-02, 1.467e-02)); + r += mul(s6_7, M4(-9.708e-02, -8.313e-02, 2.002e-02, 4.917e-02, 1.286e-01, -2.388e-02, -2.990e-02, 3.049e-02, -6.360e-02, -6.350e-02, 1.272e-01, 8.846e-02, 1.051e-01, 8.866e-02, 3.673e-03, 4.448e-02)); + r += mul(s6_8, M4(1.552e-02, -3.738e-02, -1.043e-02, 3.421e-02, 4.796e-02, 3.540e-02, 6.775e-03, -9.088e-03, -2.275e-02, 1.607e-03, -5.515e-02, -5.932e-02, -1.043e-01, 5.368e-02, 5.681e-02, -1.967e-02)); + r += mul(s7_0, M4(5.096e-02, -6.540e-02, 4.110e-02, 2.243e-02, -3.277e-02, 5.422e-02, -5.506e-03, -1.308e-01, -4.290e-02, -6.675e-02, -2.549e-02, 6.027e-02, -1.264e-02, -3.517e-02, -5.575e-03, -1.108e-02)); + r += mul(s7_1, M4(1.553e-02, 4.039e-01, 5.457e-02, 4.138e-02, 1.530e-01, -8.048e-02, 1.047e-01, -9.912e-02, 5.112e-02, 2.142e-02, -2.724e-02, 2.524e-02, -3.261e-02, -2.307e-02, -2.086e-02, -6.956e-02)); + r += mul(s7_2, M4(1.607e-01, 2.136e-01, 6.385e-02, 5.017e-02, -9.533e-03, -4.155e-02, 6.803e-03, -6.959e-02, 1.470e-02, -5.130e-02, 6.967e-02, 5.498e-03, 1.553e-02, 3.507e-02, -7.169e-03, -2.210e-02)); + r += mul(s7_3, M4(-2.897e-02, 3.350e-02, 7.739e-02, -8.739e-02, 2.262e-02, -6.763e-03, 1.308e-01, -1.563e-02, -3.232e-02, -2.581e-02, 4.054e-02, -2.837e-02, -4.473e-02, 4.890e-02, 1.346e-02, 2.120e-02)); + r += mul(s7_4, M4(-1.030e-02, -2.604e-02, -9.145e-02, -1.142e-01, 1.186e-01, 7.002e-02, 5.314e-02, -4.469e-03, -1.397e-01, 1.169e-01, -2.299e-01, 1.156e-01, 3.586e-02, -1.993e-01, 1.733e-01, 5.192e-02)); + r += mul(s7_5, M4(-9.763e-03, 1.068e-01, 1.519e-01, -1.555e-01, -6.049e-02, -8.637e-02, -1.997e-02, -3.331e-02, -7.498e-02, -9.361e-02, 7.936e-02, -6.039e-02, -2.886e-02, 1.489e-02, -4.254e-04, 1.233e-01)); + r += mul(s7_6, M4(1.101e-01, 1.534e-01, 9.363e-02, 2.898e-02, -5.850e-03, 7.591e-02, -3.393e-02, 1.964e-02, -7.931e-02, -4.888e-02, 1.382e-04, 5.309e-02, -4.719e-03, -1.116e-02, 4.092e-02, 4.215e-02)); + r += mul(s7_7, M4(-3.126e-02, -1.987e-01, 1.548e-01, 3.259e-02, 3.201e-02, -8.217e-02, -1.775e-02, 5.012e-02, 1.145e-02, 3.562e-02, 5.696e-02, -5.324e-02, -6.771e-02, -3.671e-02, -2.649e-02, -1.009e-02)); + r += mul(s7_8, M4(1.190e-01, 4.165e-02, -6.934e-03, 2.994e-02, -1.333e-01, -5.397e-02, 4.062e-02, 3.631e-02, -7.378e-02, -8.021e-03, -2.037e-03, -8.693e-02, 3.944e-02, -1.413e-02, 3.083e-02, 4.403e-02)); + r += V4(2.128e-02, 1.360e-02, 2.588e-03, 3.974e-04); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(9.861e-02, 1.810e-02, 6.419e-02, 2.502e-01, -1.083e-02, 2.346e-02, 7.291e-03, -1.595e-01, 7.255e-02, -5.626e-02, 6.578e-02, 6.141e-02, -1.428e-02, 8.072e-02, 3.138e-02, 1.041e-01)); + r += mul(s0_1, M4(-2.476e-02, -6.875e-02, -4.513e-02, 2.287e-02, 4.178e-02, 3.709e-04, 1.062e-01, 6.409e-02, -4.064e-02, -1.098e-01, 1.416e-02, -4.138e-02, -1.564e-01, 3.113e-02, 4.591e-02, 1.386e-01)); + r += mul(s0_2, M4(-7.755e-02, -6.462e-02, 1.357e-01, 8.907e-03, 8.772e-03, 9.210e-02, 3.133e-04, -7.970e-02, -1.008e-01, -2.192e-02, 2.674e-02, 1.210e-01, 6.194e-02, 1.382e-01, -4.793e-02, 1.693e-02)); + r += mul(s0_3, M4(2.218e-01, -7.579e-02, 1.663e-01, 2.604e-01, -5.619e-02, 1.028e-02, -1.192e-01, -1.204e-01, 3.293e-02, 1.888e-02, 1.239e-01, 1.883e-01, 2.025e-01, 1.058e-03, 3.568e-02, -7.469e-03)); + r += mul(s0_4, M4(-2.682e-01, -1.258e-01, 1.661e-01, -3.561e-01, -8.037e-02, -1.882e-01, 3.518e-02, 3.871e-02, 1.082e-01, 8.126e-02, -2.693e-01, -7.492e-02, -5.087e-02, 1.586e-01, -8.180e-02, -1.202e-02)); + r += mul(s0_5, M4(-1.311e-02, -1.008e-01, 8.735e-02, -4.129e-02, -8.179e-02, 1.128e-02, -4.526e-02, -1.084e-02, 3.870e-02, 1.408e-02, -1.383e-01, 1.011e-02, -5.819e-02, 4.581e-02, 6.933e-03, -8.229e-02)); + r += mul(s0_6, M4(1.606e-01, -8.163e-03, 3.041e-04, 1.293e-01, -1.172e-01, 7.398e-02, -2.390e-02, -4.539e-02, 3.576e-02, 5.539e-04, -7.322e-03, 3.657e-02, 6.095e-02, -4.478e-02, -8.989e-02, 9.781e-02)); + r += mul(s0_7, M4(-1.609e-01, -3.231e-02, -6.615e-02, 1.611e-02, 2.508e-01, -1.504e-01, 6.883e-02, -1.121e-02, 3.308e-03, 6.361e-03, 2.820e-03, 1.006e-01, -1.228e-01, 3.641e-02, -7.172e-02, -5.411e-02)); + r += mul(s0_8, M4(-3.758e-02, -8.353e-03, -1.148e-02, 8.195e-02, 1.702e-02, -4.740e-02, -1.646e-02, 6.989e-02, -4.097e-02, -3.479e-03, -1.575e-02, 1.765e-01, 2.417e-02, 6.013e-02, 6.908e-02, -5.958e-02)); + r += mul(s1_0, M4(-4.773e-02, -1.151e-02, 9.746e-03, 5.325e-02, -1.013e-01, 2.347e-02, 2.053e-02, -1.300e-03, 8.285e-03, 4.885e-03, -5.720e-02, -1.238e-01, 5.272e-03, -6.600e-02, 2.554e-02, 2.350e-02)); + r += mul(s1_1, M4(-6.101e-02, -3.187e-02, -1.678e-02, -4.436e-02, 1.294e-01, 7.714e-02, 2.367e-02, 9.440e-02, 1.046e-01, 2.280e-02, 2.139e-02, 4.107e-02, -3.158e-02, -1.144e-01, -6.250e-03, -7.049e-02)); + r += mul(s1_2, M4(2.380e-02, 2.683e-03, -5.063e-02, -1.144e-02, -2.753e-02, -3.963e-02, 4.263e-02, -1.172e-01, -1.153e-01, -4.556e-02, 7.579e-02, 7.674e-02, 4.053e-02, 4.403e-02, -7.944e-02, -8.035e-02)); + r += mul(s1_3, M4(1.527e-01, 3.639e-02, -2.715e-02, 1.643e-01, 5.152e-02, -9.064e-02, -1.411e-01, -2.476e-01, -9.955e-02, 1.098e-01, 1.724e-02, -1.867e-02, 7.503e-03, -6.265e-02, 6.342e-02, 1.467e-02)); + r += mul(s1_4, M4(4.156e-02, 1.587e-01, -9.157e-02, -1.867e-01, 2.170e-01, -2.680e-01, 3.912e-02, 2.190e-01, 2.286e-01, 2.782e-01, -6.481e-03, 2.057e-02, -4.271e-02, -2.637e-02, -3.364e-02, 9.123e-02)); + r += mul(s1_5, M4(8.277e-02, 1.521e-03, -2.840e-02, 3.656e-02, -1.687e-02, 4.315e-02, 4.808e-02, 5.464e-02, -7.452e-02, 4.107e-02, 6.658e-02, -9.818e-02, -4.003e-02, -2.246e-02, -5.058e-02, -8.255e-03)); + r += mul(s1_6, M4(-1.963e-02, -2.891e-02, 7.599e-02, 1.735e-02, -2.876e-02, -4.554e-02, -1.035e-01, 8.704e-02, -2.394e-02, 8.710e-02, -1.132e-01, 7.940e-02, -2.644e-02, 6.812e-03, -1.838e-02, 1.636e-01)); + r += mul(s1_7, M4(-3.273e-02, 3.213e-02, 5.066e-04, -6.173e-03, -2.231e-01, -1.584e-02, 7.763e-02, 1.087e-02, 1.274e-01, -1.020e-01, 8.857e-02, 1.486e-01, -1.360e-01, -6.677e-03, 1.223e-01, 3.943e-03)); + r += mul(s1_8, M4(-2.621e-02, 2.312e-02, 2.959e-02, -3.508e-02, -8.455e-02, -1.248e-01, 4.520e-02, -4.507e-02, 5.240e-02, 1.878e-01, -3.403e-02, -9.723e-02, 1.172e-02, 9.380e-03, -1.591e-02, 1.790e-02)); + r += mul(s2_0, M4(-4.653e-02, -5.118e-02, 3.158e-02, 1.532e-01, 4.989e-02, -4.555e-03, 1.874e-01, -3.272e-01, -1.386e-01, 5.731e-02, 7.671e-02, 2.568e-01, -3.503e-02, -4.580e-02, -1.435e-02, 9.380e-02)); + r += mul(s2_1, M4(1.101e-01, -2.787e-01, 7.885e-02, -1.669e-01, -1.885e-02, 6.133e-02, 4.324e-02, 8.274e-02, 2.189e-01, -5.179e-02, 1.711e-01, 8.551e-02, 9.272e-03, 2.682e-02, -2.505e-01, -3.644e-02)); + r += mul(s2_2, M4(-7.932e-02, -4.863e-02, -8.513e-03, 5.279e-02, -6.115e-02, 6.127e-02, 5.479e-02, 8.834e-02, -1.818e-01, 8.003e-02, -2.282e-02, 7.345e-02, -1.437e-01, -1.694e-01, 6.211e-02, 8.869e-02)); + r += mul(s2_3, M4(2.295e-01, 1.147e-01, -4.242e-02, -6.906e-02, -1.372e-01, -1.550e-01, 1.597e-01, 3.426e-02, -3.796e-02, 6.194e-02, 1.454e-01, 5.858e-02, 1.212e-01, -1.196e-01, 2.121e-01, 1.099e-01)); + r += mul(s2_4, M4(9.416e-02, 1.947e-01, -1.973e-02, -1.809e-01, -2.024e-01, -7.946e-02, 1.482e-01, 2.162e-01, -7.181e-02, -8.151e-02, 1.364e-01, 8.638e-02, 4.401e-02, 1.989e-01, -9.265e-02, -1.192e-01)); + r += mul(s2_5, M4(-9.022e-02, -7.998e-02, 1.039e-01, -3.487e-02, -1.025e-01, -9.302e-02, 1.054e-01, -1.959e-01, 1.635e-02, -1.455e-01, 8.270e-02, 1.031e-01, -2.780e-02, -3.080e-03, 7.018e-04, -1.151e-01)); + r += mul(s2_6, M4(-2.700e-02, 8.750e-02, -7.652e-02, -1.898e-02, 1.625e-02, -1.408e-03, 6.793e-02, 2.412e-02, -8.187e-02, 4.516e-02, -8.717e-03, 1.623e-01, 1.085e-01, -6.325e-02, 1.013e-01, 2.221e-02)); + r += mul(s2_7, M4(-7.386e-02, -1.695e-01, 4.037e-02, 5.409e-02, -7.252e-02, -1.691e-02, 2.455e-02, 8.284e-02, -1.143e-01, 1.054e-01, 1.559e-01, 1.949e-01, 4.537e-02, -3.583e-02, 9.264e-02, 6.329e-02)); + r += mul(s2_8, M4(-4.150e-02, 4.022e-02, 7.980e-02, -1.115e-01, -5.658e-02, -2.031e-03, -6.667e-02, 1.512e-01, -8.647e-02, -3.108e-02, -5.697e-02, 8.242e-02, -7.639e-02, -3.099e-02, -1.168e-02, 3.149e-02)); + r += mul(s3_0, M4(6.074e-02, -9.433e-02, -1.455e-02, 2.344e-02, 6.801e-02, 3.095e-02, -9.606e-02, 1.790e-02, -3.857e-03, -1.618e-02, -2.126e-02, 4.367e-02, 7.509e-02, 5.339e-02, -1.697e-01, 5.161e-02)); + r += mul(s3_1, M4(-1.572e-01, 3.433e-02, 1.056e-02, 1.624e-01, -1.115e-01, -6.066e-02, 1.438e-01, -5.687e-02, -5.053e-02, -6.958e-02, 1.284e-02, -1.181e-02, -6.860e-02, -5.012e-02, 6.389e-02, 2.225e-01)); + r += mul(s3_2, M4(9.430e-02, -7.752e-02, 2.481e-02, -8.512e-03, 2.760e-02, 7.608e-03, 8.042e-02, 9.668e-02, 1.143e-02, -2.981e-02, -1.074e-02, -2.974e-02, 8.299e-02, 2.113e-02, -1.003e-01, 2.120e-02)); + r += mul(s3_3, M4(-2.065e-01, 8.363e-02, -1.663e-01, 4.624e-02, -7.967e-02, 4.552e-02, -3.836e-02, -1.080e-02, 1.517e-02, 1.873e-02, -3.017e-02, -1.055e-01, -5.513e-02, 8.575e-02, -3.376e-01, 9.619e-02)); + r += mul(s3_4, M4(-4.406e-02, -7.260e-02, 9.714e-02, -5.473e-02, -1.266e-01, 5.401e-02, 1.478e-01, 2.376e-02, 8.741e-02, -6.798e-03, -8.303e-02, 9.785e-03, -3.452e-02, -2.306e-01, 1.248e-01, 9.723e-02)); + r += mul(s3_5, M4(1.082e-01, 3.750e-02, -7.444e-02, -5.845e-02, 5.428e-02, -2.493e-02, 1.083e-02, -2.629e-02, 1.151e-01, 3.029e-02, -1.490e-02, -3.747e-02, 4.242e-02, 2.020e-01, -1.222e-01, 1.628e-01)); + r += mul(s3_6, M4(-7.236e-02, -3.901e-02, 5.888e-02, 6.976e-02, -8.068e-02, -3.520e-02, -8.337e-03, 2.699e-03, 2.558e-02, -6.610e-02, -8.027e-03, -4.848e-02, -7.649e-02, 8.153e-02, -1.475e-01, -3.082e-03)); + r += mul(s3_7, M4(6.555e-02, 5.288e-02, -4.870e-02, -8.899e-03, 4.118e-02, -1.026e-01, -2.763e-03, 8.326e-02, 1.053e-02, -2.956e-03, -9.619e-02, -5.726e-03, 5.812e-02, -1.540e-01, 4.855e-02, 4.349e-02)); + r += mul(s3_8, M4(5.661e-02, 7.438e-03, -3.194e-02, 2.968e-02, -4.814e-02, -1.049e-02, 5.669e-02, 5.227e-02, 1.759e-02, 7.979e-04, -7.995e-02, -1.906e-02, 1.411e-02, 2.705e-02, -6.117e-03, 1.179e-01)); + r += mul(s4_0, M4(4.138e-02, 8.165e-03, -5.496e-02, 2.646e-01, 4.543e-02, 7.343e-02, -1.644e-01, 2.693e-03, -6.105e-03, -7.733e-02, 2.326e-02, -4.724e-03, -3.503e-02, 9.536e-03, 6.063e-03, -5.583e-02)); + r += mul(s4_1, M4(-5.709e-02, 3.212e-02, -1.460e-01, -3.559e-02, 2.203e-01, 4.014e-02, -9.035e-02, 6.420e-02, -5.015e-02, -2.371e-02, 1.237e-02, -2.477e-03, 9.596e-02, 2.574e-02, -7.583e-03, -1.077e-02)); + r += mul(s4_2, M4(1.413e-02, -2.921e-02, 1.968e-02, -7.302e-02, -1.112e-02, 8.178e-02, -7.203e-02, -8.773e-02, 3.204e-02, 1.049e-02, 3.123e-02, -1.198e-01, -7.578e-02, -2.187e-03, 5.386e-02, -5.244e-02)); + r += mul(s4_3, M4(-1.315e-01, 6.912e-02, -2.608e-01, 3.384e-03, 6.421e-02, -9.794e-03, -1.910e-01, 9.869e-02, -8.240e-02, 1.818e-02, 4.489e-02, 9.533e-02, -1.396e-02, 2.221e-02, -2.765e-02, -2.743e-02)); + r += mul(s4_4, M4(8.430e-02, 3.913e-02, -1.024e-01, 4.850e-02, 4.148e-02, -6.597e-02, 6.364e-02, 2.062e-01, -6.475e-02, 3.762e-02, -2.984e-04, -2.388e-02, -3.737e-02, 3.449e-01, -1.450e-01, 1.814e-01)); + r += mul(s4_5, M4(3.153e-02, 1.752e-01, -1.235e-01, 9.559e-02, 3.415e-01, 2.977e-02, 4.321e-02, 1.343e-01, 1.560e-01, 7.211e-02, 1.160e-01, 1.502e-02, 2.140e-02, -5.785e-02, -3.213e-02, 1.006e-01)); + r += mul(s4_6, M4(-5.924e-02, 8.264e-02, -2.766e-02, -2.691e-03, -1.265e-01, 3.158e-02, -7.472e-02, -4.071e-02, 6.202e-02, -7.105e-02, 2.099e-02, -2.883e-02, 1.067e-01, -1.619e-02, 2.582e-02, -1.193e-01)); + r += mul(s4_7, M4(2.155e-01, 6.200e-02, -2.614e-02, 1.846e-01, 3.051e-01, 3.577e-02, 1.145e-02, 2.007e-02, -1.371e-01, 1.737e-02, -2.292e-02, 1.920e-01, -2.273e-01, 4.998e-02, 8.282e-03, 1.758e-01)); + r += mul(s4_8, M4(9.429e-02, 4.540e-02, -4.373e-02, -1.160e-02, -4.470e-02, 1.167e-01, 1.250e-01, 1.890e-01, -1.202e-01, 2.071e-02, 9.206e-02, 7.676e-03, 3.892e-02, 5.756e-02, -1.227e-03, -2.003e-01)); + r += mul(s5_0, M4(-3.945e-02, -8.039e-02, 1.199e-02, 1.966e-01, -9.777e-03, 7.548e-02, -9.030e-02, -2.660e-02, -4.957e-02, -6.565e-02, -1.622e-02, -1.379e-01, 1.367e-02, 5.800e-02, 1.834e-02, 7.695e-02)); + r += mul(s5_1, M4(7.861e-02, 3.483e-02, -1.459e-01, -1.462e-01, 7.203e-02, -3.863e-02, -9.672e-02, -1.374e-01, 7.629e-02, 1.104e-01, 1.283e-01, 2.068e-02, 5.023e-02, -1.772e-02, 6.289e-02, 2.893e-02)); + r += mul(s5_2, M4(-5.870e-02, -2.446e-02, 3.727e-02, 8.240e-02, -3.007e-02, 2.914e-02, 7.667e-02, -7.461e-02, 2.255e-02, 1.335e-01, -1.895e-01, -2.057e-03, 3.962e-02, -4.255e-02, 6.384e-02, 6.045e-03)); + r += mul(s5_3, M4(1.911e-01, -6.249e-02, 2.068e-01, 5.497e-02, -1.472e-02, 9.144e-03, -2.697e-02, -5.151e-02, -1.496e-01, 1.497e-02, 4.692e-02, -9.169e-02, -6.766e-02, -5.018e-02, -5.010e-03, 2.945e-02)); + r += mul(s5_4, M4(1.793e-01, 2.389e-01, -2.089e-01, 2.215e-03, -8.109e-02, -4.797e-02, 7.083e-02, -1.459e-01, 8.028e-02, -2.240e-01, -3.341e-04, -1.007e-01, 1.094e-01, 1.557e-01, -1.603e-03, -1.452e-01)); + r += mul(s5_5, M4(-7.773e-02, -6.554e-02, 7.054e-02, 4.107e-02, -7.730e-04, -1.231e-01, 1.519e-02, 1.209e-01, 1.117e-01, 1.011e-01, 6.114e-02, -1.169e-01, -1.693e-01, -1.078e-01, -9.017e-02, 2.378e-01)); + r += mul(s5_6, M4(5.646e-02, -7.175e-04, 7.008e-02, -5.989e-02, -3.315e-02, 7.058e-03, -4.455e-02, 1.279e-02, 5.475e-02, 8.603e-03, 1.289e-01, -5.157e-03, 5.779e-03, -4.932e-02, -8.713e-02, 1.219e-02)); + r += mul(s5_7, M4(2.670e-02, -7.649e-02, 6.818e-02, 6.627e-02, 9.118e-02, 3.245e-02, 5.684e-02, 3.755e-02, -6.150e-02, -2.769e-02, -2.560e-02, -3.468e-02, 2.159e-01, -1.506e-01, 2.127e-02, 1.382e-01)); + r += mul(s5_8, M4(-1.019e-01, -6.627e-02, 1.043e-01, -2.586e-02, 2.302e-02, -4.713e-03, 3.749e-02, 1.670e-02, 1.364e-01, -4.719e-02, 1.037e-01, -2.311e-01, -1.186e-01, -2.605e-02, 9.677e-03, 3.698e-02)); + r += mul(s6_0, M4(-6.494e-03, 4.738e-02, -1.494e-02, 1.177e-03, 9.666e-02, -1.351e-01, 2.174e-02, -5.155e-02, -2.843e-02, -3.406e-04, -4.074e-02, -5.564e-02, -1.004e-01, -1.048e-02, 3.339e-02, -2.417e-02)); + r += mul(s6_1, M4(3.098e-02, -3.020e-02, 8.200e-02, 6.540e-02, 7.265e-02, -8.772e-02, 5.711e-02, -1.953e-01, 4.587e-03, 1.254e-01, 2.714e-01, 1.993e-01, -2.715e-02, -8.154e-02, -1.626e-01, 5.557e-02)); + r += mul(s6_2, M4(3.862e-02, 3.743e-03, 6.377e-02, -8.208e-02, -1.175e-01, -1.081e-01, 7.790e-02, 1.595e-02, 8.240e-02, 1.424e-02, 4.165e-02, -2.203e-02, -1.018e-01, -2.509e-01, -1.907e-02, 6.418e-03)); + r += mul(s6_3, M4(4.920e-02, 2.192e-02, -2.880e-02, -3.719e-02, 9.495e-02, -4.514e-02, 1.471e-01, 5.986e-02, -2.121e-02, 9.101e-02, -1.167e-01, -1.254e-01, -1.637e-01, -4.881e-02, 8.190e-02, 1.841e-01)); + r += mul(s6_4, M4(-3.650e-02, -5.998e-02, 3.780e-02, -1.643e-02, -6.872e-02, -1.650e-01, -2.220e-01, -3.357e-01, 4.948e-02, 4.375e-02, 2.535e-01, 1.119e-01, -3.487e-01, -2.786e-02, -8.273e-02, 1.548e-01)); + r += mul(s6_5, M4(4.252e-02, 7.157e-02, -5.603e-02, 8.876e-04, 6.368e-02, -3.234e-01, 1.929e-02, -3.111e-02, -2.031e-02, -1.139e-01, -5.647e-02, 8.034e-02, 1.249e-01, 2.387e-01, -9.530e-02, 1.842e-02)); + r += mul(s6_6, M4(3.609e-02, 1.545e-02, -4.462e-02, -2.606e-02, 7.387e-04, -1.032e-02, 1.415e-02, 9.009e-02, 9.100e-03, 4.897e-02, 5.259e-02, 1.973e-02, -2.394e-01, -2.421e-02, 1.286e-01, 3.185e-02)); + r += mul(s6_7, M4(-5.623e-02, -4.501e-02, -4.529e-02, -8.827e-02, -2.332e-02, 1.097e-02, -2.695e-02, 1.039e-03, 1.157e-01, -1.888e-01, 3.724e-02, -1.472e-01, -1.313e-01, -1.401e-01, -2.073e-01, 1.443e-01)); + r += mul(s6_8, M4(8.602e-02, -1.457e-02, -1.366e-02, -1.223e-01, 3.215e-02, 6.168e-02, -4.301e-02, -6.100e-02, -1.310e-01, 2.626e-02, 5.578e-02, 7.027e-02, 1.218e-01, -9.431e-02, 1.189e-01, 2.276e-01)); + r += mul(s7_0, M4(-2.098e-01, 3.773e-02, 9.078e-02, 4.973e-02, 1.670e-02, 3.135e-02, 8.829e-02, 1.958e-02, -2.725e-02, -6.102e-02, 8.044e-02, 8.231e-03, 5.179e-02, -3.102e-02, -6.230e-03, -5.581e-02)); + r += mul(s7_1, M4(3.210e-02, 2.371e-01, 5.989e-02, 1.034e-01, 2.786e-02, 3.655e-02, -4.385e-02, 1.105e-01, 8.371e-02, 1.169e-01, -1.223e-01, 2.819e-02, 6.550e-02, -6.923e-03, 5.620e-02, -3.798e-02)); + r += mul(s7_2, M4(-1.539e-01, 6.029e-02, -2.643e-02, -1.947e-02, 8.463e-02, 5.455e-02, 6.077e-03, 7.383e-02, -3.496e-02, -8.242e-02, 1.702e-02, -2.585e-03, -7.294e-02, 3.695e-03, -1.836e-03, 9.230e-02)); + r += mul(s7_3, M4(-3.393e-02, -7.903e-02, -4.367e-02, -7.064e-02, 6.230e-02, -7.311e-04, 8.585e-02, -5.261e-02, -1.791e-02, -1.270e-02, 1.843e-02, -6.434e-02, 4.059e-02, -5.534e-02, 4.663e-02, -2.061e-02)); + r += mul(s7_4, M4(-6.396e-02, 1.872e-01, 6.300e-03, -1.229e-01, 4.722e-02, 1.159e-01, -1.265e-01, -2.818e-02, 1.133e-01, 2.885e-01, -1.727e-01, 1.164e-02, 1.355e-02, 4.212e-02, -1.263e-03, 9.589e-03)); + r += mul(s7_5, M4(-1.061e-01, -7.650e-02, 2.641e-02, -6.247e-02, -1.294e-01, -4.841e-02, 3.652e-03, 2.337e-02, 1.409e-02, 6.725e-02, -1.023e-01, -7.833e-02, -4.138e-02, 4.985e-02, 5.410e-02, 7.188e-02)); + r += mul(s7_6, M4(-1.224e-01, 1.697e-01, -4.275e-02, -1.083e-01, -3.797e-02, -1.151e-02, -3.402e-02, 3.831e-03, 5.427e-02, -6.544e-03, 2.490e-03, 1.447e-02, 1.551e-02, -2.141e-02, 7.572e-03, -2.319e-02)); + r += mul(s7_7, M4(8.813e-02, 4.677e-02, 7.258e-02, -2.233e-02, 1.056e-01, 4.548e-02, -5.082e-02, 5.598e-02, 4.627e-02, 1.466e-02, -6.589e-02, -8.524e-02, -1.070e-02, -4.714e-02, 1.276e-02, 3.445e-02)); + r += mul(s7_8, M4(-1.312e-01, 1.265e-01, -2.302e-02, -1.697e-01, -2.171e-02, -3.595e-02, 5.103e-02, 2.643e-02, 1.650e-02, 2.831e-02, -2.564e-02, -9.242e-03, 1.215e-02, -2.933e-02, 1.630e-02, 2.074e-02)); + r += V4(-3.171e-03, 4.015e-03, -8.390e-03, -4.936e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.399e-03, -1.333e-01, 1.635e-01, 8.962e-02, 8.099e-02, 1.812e-02, 2.720e-02, -5.767e-02, -6.046e-03, 1.213e-02, 5.859e-02, 1.206e-02, -1.000e-01, 2.974e-02, -3.893e-01, -1.116e-01)); + r += mul(s0_1, M4(9.527e-02, 1.840e-02, 9.118e-03, -3.785e-03, 4.409e-03, 1.973e-02, 7.759e-03, 1.143e-01, 5.272e-02, 2.280e-02, 6.629e-02, -2.726e-02, -3.087e-02, -4.963e-02, -9.112e-02, 1.470e-01)); + r += mul(s0_2, M4(2.341e-01, -4.306e-02, -3.563e-02, -1.134e-01, 2.904e-02, -1.568e-02, 1.065e-01, 1.184e-01, 2.079e-02, -6.771e-02, 2.918e-02, -1.229e-01, -9.481e-02, 8.101e-02, 4.501e-02, 2.165e-01)); + r += mul(s0_3, M4(1.688e-01, 3.890e-03, 3.508e-01, -6.471e-02, -1.389e-01, -4.583e-03, -1.045e-01, 2.210e-02, 4.821e-02, -1.066e-01, -1.285e-01, 5.111e-02, -2.314e-02, 1.692e-02, -1.240e-01, -2.002e-02)); + r += mul(s0_4, M4(2.392e-02, 4.782e-02, -8.814e-02, -8.142e-02, 1.643e-02, 1.365e-02, -1.485e-01, -1.603e-01, 1.012e-01, 2.038e-01, -8.363e-02, 4.859e-02, 2.361e-02, 1.489e-01, -1.280e-01, 1.544e-01)); + r += mul(s0_5, M4(1.575e-01, -7.385e-02, 6.317e-02, -8.692e-02, 3.277e-02, 6.683e-02, 1.028e-02, -3.689e-02, 6.222e-02, -2.004e-03, 1.195e-01, 1.176e-01, -3.901e-02, 3.434e-02, -4.818e-02, 4.428e-02)); + r += mul(s0_6, M4(1.604e-01, 1.364e-01, -1.335e-01, -1.778e-02, -3.223e-02, -3.696e-02, 1.523e-01, -5.976e-02, 7.820e-02, 3.907e-02, -5.458e-02, 1.035e-01, 2.372e-01, 4.221e-02, 3.357e-02, 1.704e-01)); + r += mul(s0_7, M4(5.804e-02, 4.363e-02, 2.326e-01, -7.886e-02, -2.683e-02, 1.301e-01, 1.184e-01, -4.168e-02, 7.721e-02, 1.494e-01, 1.304e-01, -3.124e-03, -4.602e-02, 3.320e-02, -6.940e-02, -3.177e-02)); + r += mul(s0_8, M4(5.804e-02, 4.494e-02, 1.089e-02, -1.320e-01, 5.313e-02, -4.836e-02, -2.717e-02, -3.837e-02, -5.629e-02, 3.611e-02, -3.032e-02, -1.782e-02, -2.371e-01, 1.627e-02, -9.274e-02, 6.939e-02)); + r += mul(s1_0, M4(9.092e-02, -6.971e-02, -1.102e-01, 1.625e-02, 5.649e-03, 5.476e-02, 1.016e-01, 6.568e-02, -7.223e-02, 2.350e-02, 6.645e-02, -3.048e-02, -4.688e-02, -5.166e-02, 5.258e-02, -1.552e-01)); + r += mul(s1_1, M4(-4.626e-02, 1.886e-02, -1.651e-02, 1.383e-01, -1.149e-02, 1.385e-02, 1.109e-01, -1.262e-01, 4.312e-02, 4.767e-02, -4.446e-02, 1.129e-01, 6.379e-02, 4.591e-03, -1.581e-02, -2.110e-02)); + r += mul(s1_2, M4(1.796e-03, -4.724e-02, -2.260e-02, 8.648e-02, 6.675e-02, 8.029e-03, 9.495e-02, 1.093e-02, -1.169e-02, -4.569e-02, 2.030e-02, 1.498e-02, -1.522e-02, -2.637e-02, 9.407e-02, -3.015e-02)); + r += mul(s1_3, M4(8.454e-02, -1.288e-03, 1.089e-01, 4.196e-02, -5.778e-03, 2.723e-01, 9.079e-02, 1.495e-02, -1.471e-01, -3.339e-02, -3.628e-02, -2.691e-02, 1.211e-01, 1.982e-02, 4.722e-02, -8.535e-02)); + r += mul(s1_4, M4(4.551e-02, 6.671e-02, 6.336e-02, 4.779e-02, 8.950e-03, 1.359e-01, 1.117e-01, -2.095e-01, 1.470e-01, -1.039e-01, -3.483e-01, -1.662e-01, 1.200e-01, 7.917e-02, 4.670e-02, -6.803e-02)); + r += mul(s1_5, M4(1.159e-01, -5.814e-02, 9.309e-02, -3.716e-02, 5.534e-02, 4.731e-02, 8.854e-03, -2.103e-02, -4.678e-02, -8.613e-02, -2.210e-01, 1.242e-01, 6.801e-02, 6.014e-02, 1.043e-01, -6.511e-02)); + r += mul(s1_6, M4(4.771e-02, -2.302e-02, -4.998e-02, 1.032e-02, 3.177e-02, 2.740e-02, 4.264e-02, 6.545e-02, 1.176e-02, 1.915e-02, 7.586e-02, 2.345e-01, 9.349e-02, 5.581e-02, 4.117e-02, -3.574e-02)); + r += mul(s1_7, M4(-1.662e-01, -7.375e-02, 4.694e-02, -5.693e-02, 5.791e-02, 2.733e-02, 3.989e-02, -6.208e-02, -2.600e-02, 1.030e-01, -5.928e-02, 1.294e-01, -3.075e-02, -1.290e-02, 3.637e-02, 4.538e-02)); + r += mul(s1_8, M4(-3.063e-02, 4.667e-02, -1.916e-02, -3.819e-02, -6.651e-03, -1.333e-01, 1.463e-01, -6.487e-04, -9.355e-02, -1.384e-01, -1.062e-01, 1.348e-01, -3.547e-02, 8.893e-02, 2.088e-03, -8.661e-04)); + r += mul(s2_0, M4(-1.895e-01, -1.416e-02, 7.805e-02, -8.565e-02, -1.401e-01, 3.184e-02, -4.643e-02, -2.616e-02, -9.603e-02, -8.861e-03, 1.255e-02, -5.402e-02, -1.656e-01, -8.053e-02, 6.487e-02, 6.307e-02)); + r += mul(s2_1, M4(-8.296e-02, 1.041e-01, 1.727e-01, -1.569e-01, 1.221e-01, -7.730e-03, -2.385e-01, -8.320e-02, -9.366e-02, 4.158e-02, -4.438e-01, 1.181e-01, -1.054e-01, 2.307e-02, 1.032e-01, -1.626e-02)); + r += mul(s2_2, M4(-9.554e-02, -4.403e-02, 1.346e-01, -1.762e-02, -1.433e-03, -3.288e-02, -7.941e-02, -2.863e-02, 2.315e-02, -6.922e-02, -6.280e-02, -1.059e-01, 5.831e-02, -6.399e-02, 1.018e-02, -6.664e-02)); + r += mul(s2_3, M4(-1.866e-01, -1.658e-01, 2.559e-01, 6.668e-02, 8.053e-02, 2.897e-03, -2.089e-01, 1.255e-02, -2.093e-01, 5.987e-02, -2.858e-02, -2.345e-01, 2.634e-01, -5.074e-02, 3.121e-02, -1.120e-01)); + r += mul(s2_4, M4(2.180e-01, -2.126e-01, 2.746e-01, 1.975e-02, 3.325e-03, 2.304e-01, -3.438e-02, -1.664e-01, 4.373e-02, -4.749e-02, -2.693e-01, -1.300e-01, -1.810e-01, 6.749e-02, -1.728e-01, 2.882e-02)); + r += mul(s2_5, M4(2.956e-02, -1.380e-02, -3.309e-02, -1.515e-02, -7.372e-02, -1.184e-02, 9.779e-02, -1.747e-01, 1.042e-01, 5.176e-02, -6.874e-03, -1.106e-01, -9.242e-03, -7.972e-02, -1.311e-01, -5.237e-03)); + r += mul(s2_6, M4(5.180e-02, 6.270e-02, 1.315e-01, 9.924e-02, -3.893e-02, -7.067e-02, -3.561e-02, 9.771e-02, -2.707e-01, -6.668e-02, -2.077e-02, -1.102e-01, -4.309e-02, -1.039e-01, -1.013e-01, 3.611e-02)); + r += mul(s2_7, M4(6.780e-02, -1.590e-01, 2.963e-01, 4.530e-02, 8.558e-02, -6.683e-02, -2.691e-01, 4.346e-03, 9.810e-02, 3.145e-02, -1.352e-02, 8.583e-02, -1.840e-02, 1.639e-02, 2.761e-02, -1.049e-01)); + r += mul(s2_8, M4(-1.724e-01, -1.849e-01, -1.644e-01, -4.091e-02, 4.987e-02, 9.072e-02, 1.068e-02, -1.310e-01, 2.451e-01, 1.010e-03, 5.341e-02, -7.349e-02, 2.736e-03, 1.471e-01, 7.518e-02, -1.221e-02)); + r += mul(s3_0, M4(8.039e-02, -4.309e-02, -3.394e-02, -1.010e-01, -1.060e-01, -3.326e-03, 7.692e-02, -1.208e-01, 2.169e-03, -1.648e-02, 1.444e-02, 4.783e-02, 2.027e-02, -5.927e-03, -1.183e-02, -4.858e-02)); + r += mul(s3_1, M4(-1.124e-01, -1.963e-02, -1.640e-02, -5.947e-02, 1.237e-03, -8.583e-02, 2.876e-03, -9.743e-02, -6.886e-02, -7.141e-03, -3.896e-03, 1.536e-02, 4.794e-02, -1.096e-01, 2.549e-02, 1.263e-01)); + r += mul(s3_2, M4(1.250e-01, -4.949e-03, 3.525e-02, -1.308e-02, -3.695e-03, -5.323e-02, 1.222e-03, -3.075e-02, -1.388e-03, -9.577e-03, 4.296e-04, 6.666e-02, -5.979e-02, 3.506e-02, -1.869e-02, 5.446e-02)); + r += mul(s3_3, M4(-1.171e-01, 2.664e-02, 1.063e-01, -6.276e-03, -8.396e-03, 1.078e-01, -4.294e-02, 9.336e-02, -1.071e-02, 1.569e-02, 4.817e-02, 8.765e-02, -2.305e-01, 3.090e-02, 1.692e-01, 1.753e-01)); + r += mul(s3_4, M4(5.076e-03, -1.902e-01, -1.299e-01, 1.303e-01, -3.924e-02, 1.519e-01, -8.081e-02, -7.900e-02, 7.273e-02, -1.331e-01, -6.378e-02, -3.999e-02, -1.071e-01, -4.063e-03, 1.049e-01, 7.222e-03)); + r += mul(s3_5, M4(-1.152e-01, -2.036e-03, 7.410e-02, -6.259e-02, -6.560e-02, 6.826e-02, 6.316e-03, -7.414e-03, 4.555e-02, -3.406e-02, 1.181e-02, 4.307e-02, -3.666e-02, 5.070e-02, -5.521e-03, 4.305e-02)); + r += mul(s3_6, M4(2.281e-01, 3.983e-02, -6.538e-02, 1.858e-02, -4.044e-02, 4.944e-02, 1.055e-01, -1.410e-01, -4.775e-03, -3.619e-02, -4.327e-02, -1.120e-01, -5.352e-02, 8.676e-02, 1.774e-02, -4.081e-02)); + r += mul(s3_7, M4(-3.218e-03, -6.811e-02, -6.519e-02, 1.342e-01, -7.100e-03, -7.039e-02, 2.666e-02, -7.692e-02, 2.331e-02, 3.179e-02, 1.173e-01, 6.603e-02, -1.208e-01, -7.213e-02, 5.929e-02, 8.421e-02)); + r += mul(s3_8, M4(-3.993e-02, -9.329e-03, 2.565e-02, 8.653e-02, 3.002e-02, -6.917e-02, -5.029e-02, 1.622e-02, 1.050e-02, 8.425e-03, 1.787e-02, -1.812e-02, -4.378e-02, 4.965e-02, -3.166e-02, 1.749e-02)); + r += mul(s4_0, M4(9.219e-02, -1.606e-02, 9.817e-02, -3.357e-01, 1.134e-01, -4.664e-03, -1.514e-01, 9.937e-02, -5.159e-02, 8.918e-04, 1.750e-02, -4.721e-02, -4.224e-02, 3.066e-02, 1.144e-02, -3.345e-02)); + r += mul(s4_1, M4(-8.637e-02, -5.564e-02, 5.138e-02, -1.391e-01, -1.507e-01, 4.180e-03, 1.485e-02, -2.553e-02, -1.910e-02, 5.254e-02, 5.222e-02, 6.109e-02, 1.121e-01, -1.485e-02, -1.411e-02, -3.113e-02)); + r += mul(s4_2, M4(1.301e-01, -7.978e-03, -1.393e-02, -7.214e-02, 1.865e-01, -6.353e-04, -1.992e-02, 1.103e-02, 4.821e-02, 1.285e-02, -1.085e-01, -1.609e-02, -4.429e-02, -1.546e-02, 8.609e-02, -2.711e-02)); + r += mul(s4_3, M4(-2.509e-01, -1.322e-01, 1.045e-01, -9.006e-02, 1.093e-01, 1.637e-01, -8.574e-02, -8.930e-03, 5.782e-02, -5.858e-02, -1.084e-01, -2.936e-02, 1.714e-03, 5.823e-02, -1.273e-01, -4.853e-02)); + r += mul(s4_4, M4(1.060e-02, -2.217e-01, 2.880e-01, -3.584e-02, -2.595e-01, -1.631e-01, 1.352e-01, 2.201e-01, -1.267e-01, -9.749e-02, -2.792e-02, 1.037e-01, -3.967e-02, 9.149e-02, 4.318e-02, 1.166e-01)); + r += mul(s4_5, M4(6.478e-02, -4.484e-02, 3.809e-02, -3.552e-02, 8.834e-02, -3.385e-02, -6.293e-02, 2.586e-01, -1.035e-02, 2.827e-02, -2.354e-02, 5.704e-02, 2.633e-02, 1.038e-01, -5.300e-02, -8.980e-03)); + r += mul(s4_6, M4(9.930e-02, 1.884e-02, 4.265e-02, 1.050e-01, 6.271e-02, -2.960e-02, -6.198e-02, -3.177e-02, -1.982e-02, 3.138e-02, 2.802e-02, -7.681e-02, -1.440e-02, -3.626e-02, 2.283e-02, -9.296e-02)); + r += mul(s4_7, M4(1.740e-01, -5.531e-02, -1.019e-01, 2.412e-01, 6.407e-02, -9.663e-02, -2.068e-01, 2.792e-01, -3.690e-02, 6.203e-02, -7.736e-02, -7.056e-02, 2.487e-02, -1.859e-02, 4.167e-02, 1.364e-01)); + r += mul(s4_8, M4(5.267e-03, -2.518e-02, -5.685e-02, 9.379e-02, 1.073e-01, 6.220e-02, -4.334e-02, 1.590e-01, -2.332e-02, -6.173e-02, 1.565e-02, -2.200e-02, -9.786e-02, -9.415e-02, -5.747e-02, 5.885e-02)); + r += mul(s5_0, M4(-1.377e-03, -1.527e-02, 1.928e-02, 8.000e-02, -8.736e-02, -5.620e-03, -4.459e-02, 4.203e-02, 1.057e-01, 4.444e-02, 2.365e-02, -9.226e-02, 7.327e-02, 3.791e-03, 6.473e-02, 2.468e-02)); + r += mul(s5_1, M4(1.614e-01, 4.639e-02, -1.534e-01, 3.564e-02, 4.357e-02, 6.173e-02, 2.216e-02, -8.123e-02, -1.202e-01, 2.329e-02, 7.193e-02, 1.261e-01, 9.497e-02, -4.084e-02, 1.955e-01, 9.262e-02)); + r += mul(s5_2, M4(2.801e-02, -4.912e-02, 7.114e-02, 1.439e-03, -9.265e-03, -6.691e-02, -5.837e-03, -9.360e-02, -8.374e-02, 1.933e-02, 9.814e-02, 2.211e-01, 1.116e-01, -4.033e-02, 1.073e-01, -5.177e-02)); + r += mul(s5_3, M4(8.504e-02, -1.576e-01, -8.348e-02, -1.539e-01, 1.322e-02, 3.964e-02, -2.371e-02, -5.369e-02, 1.859e-01, -3.900e-02, -9.974e-02, 7.470e-02, 1.265e-01, 9.210e-02, -3.323e-02, 6.984e-02)); + r += mul(s5_4, M4(7.407e-03, 3.467e-02, -5.767e-02, -1.088e-01, -1.367e-01, -7.396e-02, 8.759e-02, 9.492e-02, -3.186e-04, -1.533e-01, -1.613e-01, -1.761e-03, 3.867e-01, 2.290e-01, -1.586e-01, -2.010e-01)); + r += mul(s5_5, M4(1.360e-02, -1.785e-02, -3.573e-02, -4.038e-02, 7.436e-02, 5.923e-02, -1.377e-02, -1.017e-01, -6.853e-02, -1.940e-02, -1.804e-01, -1.106e-01, 1.544e-01, -7.375e-02, 8.420e-02, -1.161e-01)); + r += mul(s5_6, M4(-7.538e-02, -7.231e-02, 5.213e-02, 4.965e-02, -5.179e-04, 1.038e-02, 2.649e-02, 3.156e-05, -4.038e-03, -1.578e-01, 2.497e-02, -8.121e-02, 4.998e-02, -1.159e-01, 1.821e-01, 7.560e-02)); + r += mul(s5_7, M4(1.023e-01, -1.525e-01, 4.211e-02, -2.338e-01, 2.877e-02, -1.010e-01, -1.719e-02, 9.882e-02, -3.067e-02, -7.396e-02, -1.246e-01, -7.543e-02, -1.324e-01, -1.461e-01, 1.423e-02, 2.280e-02)); + r += mul(s5_8, M4(-4.715e-03, -5.282e-02, 3.491e-02, -9.070e-02, 1.037e-01, 1.120e-02, 7.443e-04, 9.966e-02, -8.891e-03, 5.557e-02, -1.180e-02, -3.528e-02, -3.237e-02, -1.740e-01, 4.937e-02, -2.464e-02)); + r += mul(s6_0, M4(-8.665e-02, 9.442e-03, -3.271e-02, -1.549e-02, 8.047e-02, 1.667e-02, 6.394e-02, 8.030e-02, -1.415e-01, 7.993e-02, 7.679e-02, 5.065e-03, -7.120e-02, -2.077e-03, 1.013e-01, 5.265e-02)); + r += mul(s6_1, M4(1.001e-01, -4.013e-02, -1.035e-01, -5.445e-02, 1.306e-01, 1.969e-02, 4.396e-02, -2.667e-01, -5.041e-02, 4.116e-02, -8.319e-02, -2.392e-01, 1.150e-01, -1.910e-02, 7.817e-02, 9.090e-02)); + r += mul(s6_2, M4(3.404e-02, 8.336e-03, -1.125e-01, -1.510e-01, 1.460e-01, -3.987e-02, -8.898e-02, -2.425e-01, 1.092e-02, 5.936e-02, -3.514e-02, 6.529e-03, 3.837e-02, -2.472e-02, -9.547e-02, -9.396e-02)); + r += mul(s6_3, M4(-1.513e-01, 2.992e-02, 1.391e-01, 5.229e-02, 1.068e-01, -9.627e-02, 3.284e-02, 2.356e-02, -7.095e-03, 1.784e-01, 3.594e-02, 1.013e-01, 1.698e-01, -8.420e-02, 2.065e-01, 9.354e-02)); + r += mul(s6_4, M4(-1.394e-01, 2.430e-02, 8.983e-02, -8.055e-02, 1.957e-01, 1.421e-01, 4.197e-02, -1.625e-01, -1.161e-01, 4.852e-03, 2.710e-01, -3.648e-01, -1.721e-01, 5.707e-02, 3.576e-01, 3.643e-01)); + r += mul(s6_5, M4(-6.297e-02, 1.236e-01, 3.529e-02, 6.796e-02, -4.249e-02, -2.873e-02, 7.233e-03, -1.039e-02, -7.700e-02, -8.922e-03, -1.446e-02, -2.781e-01, 2.135e-01, 7.543e-02, 8.269e-02, 1.142e-01)); + r += mul(s6_6, M4(-5.073e-02, -3.300e-03, 4.523e-02, -2.752e-03, 1.079e-02, 1.085e-02, 8.277e-02, 1.070e-02, 9.057e-02, -1.403e-02, -4.971e-03, 1.269e-02, 1.669e-01, 1.033e-01, 1.315e-01, -1.391e-01)); + r += mul(s6_7, M4(-1.036e-02, -9.453e-02, 1.037e-02, -1.003e-03, 1.193e-01, 5.614e-02, 9.476e-02, -3.669e-02, -1.811e-01, 1.325e-01, -1.106e-02, -1.057e-01, 1.558e-01, 1.597e-01, 1.786e-01, 2.404e-02)); + r += mul(s6_8, M4(1.505e-03, -1.317e-01, -8.725e-02, -3.807e-02, 4.751e-02, -8.481e-02, 1.363e-02, 1.515e-01, -1.059e-02, 1.262e-01, -4.187e-02, 4.321e-04, 5.830e-02, 9.412e-02, 1.167e-01, -1.262e-01)); + r += mul(s7_0, M4(2.247e-01, -2.586e-02, -1.196e-01, 1.432e-01, -2.930e-02, -2.740e-02, -1.187e-01, 7.369e-02, -9.139e-03, -9.799e-03, -3.261e-02, 1.047e-01, -4.716e-02, 1.361e-02, 3.058e-02, 1.517e-02)); + r += mul(s7_1, M4(-2.263e-01, -6.201e-02, -5.533e-02, 1.001e-01, -1.249e-01, 7.313e-02, -7.635e-02, 6.038e-02, -7.559e-02, 1.219e-01, -4.758e-02, 1.272e-02, 9.694e-04, 8.565e-02, 3.794e-03, -2.245e-02)); + r += mul(s7_2, M4(-6.290e-02, -2.224e-03, -1.022e-02, 7.237e-02, 3.171e-02, -4.334e-03, -1.161e-01, 1.032e-02, -3.537e-02, -1.875e-02, -1.202e-01, -6.996e-02, -2.806e-05, -2.278e-02, -5.171e-03, 5.068e-02)); + r += mul(s7_3, M4(1.592e-01, -1.322e-01, -1.150e-01, 7.270e-02, -5.486e-02, -6.318e-02, -4.070e-02, -3.950e-02, 2.477e-02, 9.655e-02, 2.007e-02, -1.155e-01, -1.543e-02, -3.040e-02, -3.466e-02, 8.299e-03)); + r += mul(s7_4, M4(-6.151e-02, 1.051e-01, 8.776e-02, -1.650e-01, 5.560e-02, 2.300e-01, 2.273e-01, 1.336e-01, -3.419e-02, 1.624e-01, 1.338e-01, -4.111e-02, -1.190e-01, 2.853e-04, -1.343e-01, 1.454e-01)); + r += mul(s7_5, M4(9.564e-03, 6.705e-02, 7.108e-02, -1.017e-01, -5.055e-02, 3.334e-02, -5.725e-02, -8.431e-02, -1.571e-01, 5.455e-02, -1.318e-03, -6.714e-02, 7.995e-02, -1.517e-01, -1.170e-01, 2.803e-02)); + r += mul(s7_6, M4(9.725e-02, -2.363e-01, -1.968e-01, 2.334e-01, 7.791e-03, 4.165e-02, -2.619e-02, 1.913e-02, -3.451e-02, -4.748e-02, 2.229e-02, 5.546e-02, -4.080e-02, -2.234e-02, 5.510e-03, -2.945e-02)); + r += mul(s7_7, M4(2.238e-02, -1.256e-01, -9.673e-02, 5.074e-02, -3.286e-02, -4.920e-02, -3.279e-02, -1.358e-02, -1.090e-01, -3.961e-03, -3.684e-03, -7.970e-02, 7.199e-03, -3.798e-02, -9.125e-02, 3.217e-02)); + r += mul(s7_8, M4(-1.132e-01, -5.104e-02, -3.001e-02, 5.771e-02, -2.920e-02, -7.713e-02, -1.449e-01, -6.916e-03, 5.838e-04, 1.030e-02, 4.348e-02, 1.441e-02, 5.680e-03, -3.232e-02, 3.748e-02, -9.775e-02)); + r += V4(-9.878e-04, -3.319e-03, -7.274e-03, -4.102e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.585e-02, -5.447e-02, -6.410e-02, 9.135e-04, 3.840e-03, 7.830e-02, 2.980e-02, -4.394e-03, -1.362e-01, 2.882e-02, -8.374e-02, -5.660e-02, 8.662e-02, 3.281e-02, 2.442e-02, 2.502e-01)); + r += mul(s0_1, M4(-4.057e-01, 1.377e-01, 1.821e-01, 8.596e-02, -3.172e-02, -2.692e-02, -1.087e-02, -5.274e-02, -1.119e-01, -4.529e-02, 9.902e-02, 1.566e-01, 7.653e-02, -8.398e-02, -1.224e-01, -1.669e-01)); + r += mul(s0_2, M4(-1.571e-01, -1.348e-02, 9.840e-02, 1.100e-01, 1.837e-02, 2.951e-02, -4.254e-02, -4.719e-03, 1.283e-01, 8.471e-02, -1.224e-01, 4.960e-02, -9.345e-02, 3.713e-02, 3.409e-02, -5.489e-02)); + r += mul(s0_3, M4(-1.642e-02, 1.607e-01, 6.271e-02, -1.352e-02, -5.466e-02, -1.352e-01, -6.569e-02, 2.534e-02, 9.311e-03, -4.879e-02, 1.099e-01, -3.576e-02, 1.043e-01, 1.869e-01, -4.980e-02, 2.746e-01)); + r += mul(s0_4, M4(-2.632e-01, 1.476e-01, -5.304e-02, -2.674e-01, -2.612e-02, 3.601e-02, 2.724e-01, 1.413e-01, 3.967e-02, 1.108e-01, 1.607e-01, -7.162e-03, 3.205e-01, 1.498e-01, -1.481e-01, -3.898e-02)); + r += mul(s0_5, M4(-6.274e-02, 1.343e-02, -1.190e-03, 6.878e-02, 6.454e-02, -7.951e-02, 2.946e-02, 3.911e-02, 1.520e-02, -8.300e-02, 2.361e-02, -1.104e-01, 2.094e-01, 6.251e-03, -4.144e-02, 3.402e-02)); + r += mul(s0_6, M4(1.792e-01, -3.768e-02, 4.980e-02, 8.778e-02, -8.272e-03, 1.126e-01, 3.220e-02, -5.665e-02, 4.564e-02, 9.751e-02, -2.899e-02, 5.977e-02, 4.468e-02, -1.758e-02, -6.659e-02, 2.254e-01)); + r += mul(s0_7, M4(-1.326e-01, 4.870e-02, -8.994e-02, 1.498e-01, -6.912e-02, 1.986e-01, -6.959e-02, 5.150e-02, 5.573e-02, -5.805e-02, -6.558e-02, -2.183e-02, -2.339e-02, -3.981e-02, -1.137e-02, -3.469e-02)); + r += mul(s0_8, M4(-3.780e-02, -4.288e-02, 1.231e-02, 3.720e-03, -6.412e-02, 9.954e-02, 5.369e-02, 8.991e-02, -4.869e-02, -9.182e-02, 2.655e-02, 7.953e-02, -1.577e-03, -1.117e-01, -2.791e-02, 2.585e-03)); + r += mul(s1_0, M4(-4.467e-02, 1.163e-01, 1.010e-01, -3.280e-02, -3.160e-02, -1.412e-02, -1.160e-01, -1.212e-04, -9.141e-03, 1.402e-01, 4.036e-04, -2.182e-02, 1.524e-01, 8.302e-02, -9.509e-03, 6.960e-02)); + r += mul(s1_1, M4(-1.356e-02, 7.281e-02, 3.867e-02, -9.842e-03, -1.949e-01, 8.373e-02, 1.343e-01, 2.841e-02, 6.260e-02, -8.060e-02, 1.853e-01, 1.154e-01, -7.933e-02, 1.144e-02, 1.572e-02, -3.281e-02)); + r += mul(s1_2, M4(2.395e-03, 9.085e-02, 6.040e-02, 2.920e-02, 9.697e-02, -3.530e-02, -3.643e-02, 3.957e-02, 2.047e-01, 1.251e-02, -7.154e-02, 4.984e-02, -1.746e-02, 8.740e-02, 6.606e-02, -7.007e-02)); + r += mul(s1_3, M4(9.398e-02, 7.480e-04, -8.108e-02, -3.124e-02, -7.854e-02, 5.976e-02, 1.977e-02, -5.280e-03, 9.734e-02, -2.138e-01, -4.311e-02, -1.293e-01, -2.922e-02, 4.149e-02, -5.614e-02, 4.738e-02)); + r += mul(s1_4, M4(-5.260e-02, -1.144e-01, -2.939e-02, -2.192e-01, -2.769e-01, 8.326e-02, 1.198e-01, 1.079e-01, 3.293e-02, 7.920e-02, -6.125e-02, 5.985e-02, -5.756e-02, 2.575e-02, -2.339e-02, -2.205e-02)); + r += mul(s1_5, M4(-2.392e-02, -4.508e-02, -1.011e-01, 1.019e-01, 1.123e-01, -3.425e-02, 8.296e-02, 4.336e-02, -2.547e-02, -1.141e-01, 1.746e-01, -1.671e-01, -5.462e-02, 1.379e-01, -2.071e-02, 2.653e-02)); + r += mul(s1_6, M4(-4.377e-02, -3.417e-02, 1.735e-02, 6.546e-02, -2.559e-02, 6.914e-02, 8.200e-02, -1.373e-01, 1.990e-02, -6.559e-02, 1.056e-02, 2.410e-01, 1.999e-02, 2.395e-02, -9.721e-03, -7.985e-03)); + r += mul(s1_7, M4(-3.746e-03, -3.663e-02, -1.538e-02, 4.688e-02, -3.282e-02, -1.423e-01, 3.211e-02, -2.777e-01, 1.854e-01, -1.091e-01, 7.542e-03, 5.689e-02, -2.584e-02, 5.099e-02, 9.334e-03, -8.013e-02)); + r += mul(s1_8, M4(3.173e-02, -5.641e-02, 5.414e-03, -6.165e-02, -2.142e-01, 3.303e-02, 3.107e-02, -1.457e-01, 6.330e-02, 6.731e-02, -2.131e-02, 4.852e-02, 5.405e-02, -3.274e-02, 6.393e-02, -6.104e-02)); + r += mul(s2_0, M4(-2.087e-01, 4.199e-02, 1.233e-01, -7.187e-02, 1.128e-02, 3.308e-02, 1.059e-01, 2.926e-01, -8.745e-02, -9.784e-02, 3.644e-02, -9.938e-02, -7.204e-02, 2.681e-02, -6.540e-02, -2.048e-01)); + r += mul(s2_1, M4(7.861e-03, 1.970e-01, -4.907e-02, 5.596e-02, 1.791e-01, -1.221e-01, -1.436e-02, -6.716e-02, 1.264e-01, -1.511e-01, -1.275e-01, 1.137e-01, -4.364e-02, -2.254e-03, -5.332e-02, 2.385e-02)); + r += mul(s2_2, M4(6.989e-02, 9.156e-02, -3.446e-02, -5.190e-02, 9.404e-02, -3.471e-02, -5.609e-02, 1.443e-01, -7.219e-02, -1.295e-01, -1.534e-01, -4.947e-02, -6.313e-02, -7.399e-04, -1.878e-01, 4.217e-02)); + r += mul(s2_3, M4(1.321e-01, -6.419e-02, 7.203e-02, -1.379e-01, -2.549e-02, -1.886e-01, -8.105e-02, -2.253e-02, 2.186e-01, -1.831e-02, -7.786e-02, 1.768e-01, 2.367e-02, 1.452e-01, -1.983e-02, 1.518e-01)); + r += mul(s2_4, M4(2.156e-01, 6.378e-02, -1.064e-01, -1.022e-01, 1.701e-01, -1.038e-01, -1.264e-02, -1.800e-02, -2.390e-01, -2.270e-01, -2.519e-01, -6.581e-02, 1.633e-01, 1.256e-01, 6.411e-02, 6.647e-02)); + r += mul(s2_5, M4(6.107e-02, -9.067e-02, -1.240e-02, -2.628e-02, 1.194e-01, 1.773e-02, 2.114e-03, -4.391e-02, -2.084e-01, -1.128e-01, -4.914e-02, 1.101e-01, 1.726e-02, -1.220e-01, 7.833e-02, 3.529e-02)); + r += mul(s2_6, M4(-2.155e-02, 9.136e-02, -1.980e-02, -1.268e-01, 9.448e-02, 3.122e-02, 1.124e-02, 8.269e-02, -8.386e-02, -1.907e-01, 1.474e-01, -8.034e-02, -5.186e-02, -2.152e-01, -3.611e-02, 6.186e-02)); + r += mul(s2_7, M4(-1.224e-01, -1.328e-01, -4.884e-02, 5.879e-02, -5.024e-02, -8.542e-02, -1.082e-01, 1.971e-01, 1.982e-01, -3.316e-02, 1.471e-01, -2.310e-01, 9.502e-02, -1.520e-01, -1.229e-02, -4.399e-02)); + r += mul(s2_8, M4(-2.699e-02, 6.634e-02, -2.067e-02, -1.152e-01, 7.637e-02, -1.409e-01, 4.916e-02, 8.563e-02, 4.391e-02, 3.214e-02, 3.985e-02, -9.064e-03, 5.131e-03, -9.717e-02, 1.151e-02, -1.024e-01)); + r += mul(s3_0, M4(1.422e-01, 4.905e-02, 6.478e-02, 5.610e-02, 9.060e-02, -1.690e-02, -1.570e-02, 4.495e-02, -1.400e-02, 1.725e-03, 4.518e-02, 2.323e-02, 4.923e-02, -6.763e-03, 1.048e-01, 1.285e-01)); + r += mul(s3_1, M4(1.272e-01, -6.393e-02, -9.546e-02, 3.530e-02, 7.041e-02, 1.867e-02, -9.025e-02, -6.270e-02, 4.864e-02, 2.072e-02, 4.579e-02, 4.292e-02, 7.846e-02, -2.893e-02, 1.548e-02, -1.232e-01)); + r += mul(s3_2, M4(-4.305e-02, -2.403e-03, 9.340e-03, 4.241e-02, -5.840e-03, -2.270e-02, -2.000e-02, 8.703e-02, 6.364e-03, -1.474e-02, -3.378e-03, -3.872e-03, 2.892e-03, 4.422e-02, 1.212e-01, -8.838e-02)); + r += mul(s3_3, M4(-4.035e-02, -2.075e-01, -1.039e-01, 1.811e-01, -8.418e-02, -6.565e-02, -2.244e-02, 8.378e-02, 1.240e-02, -2.228e-02, -4.853e-02, 1.112e-01, -1.243e-01, 6.860e-02, -5.448e-03, 1.038e-01)); + r += mul(s3_4, M4(-1.920e-02, 8.644e-02, 3.819e-02, -1.893e-01, 1.294e-01, -4.752e-02, 8.818e-02, -5.266e-02, -1.246e-01, 6.341e-02, -4.990e-02, -1.671e-02, -5.725e-02, -9.683e-02, 9.638e-02, 5.240e-02)); + r += mul(s3_5, M4(2.002e-02, 1.140e-01, 8.538e-02, 1.559e-03, 1.636e-02, 1.223e-02, -1.186e-02, 2.168e-03, -1.009e-01, 9.316e-03, -9.731e-02, 8.741e-03, 2.632e-02, -1.135e-01, -1.423e-01, 8.233e-02)); + r += mul(s3_6, M4(1.938e-03, 5.731e-02, 5.645e-02, 1.169e-01, -6.159e-02, 4.560e-04, -1.568e-02, -1.016e-02, 2.723e-03, -6.959e-02, 5.848e-03, -1.724e-01, 3.066e-02, -4.452e-02, -4.712e-02, 1.104e-01)); + r += mul(s3_7, M4(1.694e-01, 8.618e-02, 1.097e-01, 9.419e-02, -4.966e-02, 4.364e-02, -3.877e-02, -6.220e-03, -1.216e-02, 9.959e-02, -3.456e-02, 2.029e-02, -1.582e-01, 6.902e-02, -6.370e-02, -3.300e-02)); + r += mul(s3_8, M4(2.493e-02, 9.216e-03, -2.540e-02, -4.810e-03, -2.827e-02, -9.963e-03, 2.732e-02, 2.375e-02, 4.082e-02, -2.226e-02, 6.804e-02, -4.862e-03, -1.570e-02, -1.027e-01, -3.080e-02, -9.270e-02)); + r += mul(s4_0, M4(4.554e-02, 1.888e-02, -2.795e-03, 4.838e-02, 4.199e-02, -2.067e-01, 4.532e-05, 5.182e-03, -1.319e-02, 6.469e-02, 3.205e-03, -1.996e-02, -3.419e-02, 5.210e-02, -1.860e-02, 1.883e-02)); + r += mul(s4_1, M4(7.305e-02, 9.797e-02, 1.991e-01, 3.324e-02, -8.276e-02, -1.215e-01, 5.238e-02, 1.681e-01, -2.036e-01, -6.233e-02, -1.087e-02, 2.798e-02, -7.057e-02, 2.005e-02, 9.847e-02, 4.418e-03)); + r += mul(s4_2, M4(-1.732e-02, -6.019e-02, 1.367e-01, 5.675e-02, 1.323e-01, -7.145e-02, 1.792e-01, -9.877e-02, 6.898e-02, 7.271e-02, 7.827e-02, 3.562e-02, 9.910e-02, -1.445e-02, -1.436e-02, -1.742e-02)); + r += mul(s4_3, M4(-1.205e-02, 8.219e-02, -7.588e-03, 1.023e-01, -8.441e-02, 3.061e-02, -8.574e-02, 3.020e-01, 7.354e-04, 5.538e-02, 4.402e-02, -1.078e-01, 2.473e-02, -5.658e-03, -1.434e-01, 3.323e-03)); + r += mul(s4_4, M4(-1.732e-01, 9.407e-02, -6.044e-02, -7.058e-02, 8.125e-02, -1.388e-02, 2.053e-02, 2.503e-01, -1.979e-02, -2.590e-01, -1.424e-01, -2.107e-01, 6.958e-02, -1.036e-01, -3.720e-01, 7.251e-02)); + r += mul(s4_5, M4(1.764e-02, 7.230e-02, 4.248e-02, 1.236e-01, -1.160e-01, -2.729e-01, -4.344e-02, 1.358e-01, 1.727e-02, 5.078e-02, -1.369e-01, 3.504e-02, 2.322e-02, 7.917e-02, 8.632e-02, 8.075e-03)); + r += mul(s4_6, M4(-2.007e-02, 4.633e-02, 3.940e-03, 3.681e-02, -1.385e-01, -1.386e-01, -3.241e-03, -5.116e-02, 5.133e-02, 5.488e-02, 1.790e-02, -7.664e-02, 1.172e-01, 4.431e-02, -4.884e-03, 4.572e-02)); + r += mul(s4_7, M4(2.038e-02, 9.023e-02, 5.058e-02, 8.869e-02, 4.808e-02, -2.131e-02, -4.527e-02, 2.153e-01, -9.389e-03, -6.293e-02, 2.132e-03, -1.629e-02, 5.003e-03, -1.940e-01, -5.580e-04, 3.800e-02)); + r += mul(s4_8, M4(-6.235e-03, -5.695e-03, 1.797e-03, -4.040e-02, -7.224e-02, -3.357e-01, -1.071e-01, 1.427e-01, -2.058e-02, -8.302e-02, 6.861e-02, -9.088e-02, 4.247e-02, -5.801e-03, 4.301e-02, -1.230e-02)); + r += mul(s5_0, M4(-1.851e-01, -8.309e-03, -1.097e-02, 4.908e-02, -1.638e-03, -1.982e-02, 1.282e-02, 7.954e-02, -2.730e-02, 1.104e-02, -6.913e-03, -1.020e-01, -1.184e-02, 7.049e-02, -9.210e-03, -2.910e-02)); + r += mul(s5_1, M4(-5.981e-02, 3.323e-02, 1.790e-01, 7.107e-02, -2.524e-02, 5.868e-02, 7.461e-02, 5.397e-03, 1.105e-02, -1.109e-02, -1.877e-01, 1.042e-01, 8.501e-02, -1.011e-01, -7.199e-02, -1.190e-01)); + r += mul(s5_2, M4(2.461e-02, 3.315e-02, -7.544e-02, 3.864e-02, 1.306e-01, -3.625e-02, 9.585e-03, 1.112e-01, 1.350e-01, 7.111e-03, 1.438e-02, -1.083e-01, -6.924e-02, -3.498e-02, -7.076e-02, 1.377e-02)); + r += mul(s5_3, M4(1.941e-01, 3.021e-02, 2.885e-02, -6.498e-02, -2.116e-02, 3.201e-02, 3.660e-03, 6.502e-02, -2.667e-02, 9.949e-03, 2.169e-02, -8.276e-02, -1.458e-01, -2.832e-02, -1.025e-01, 1.791e-01)); + r += mul(s5_4, M4(8.388e-02, -3.981e-02, -1.060e-01, 1.874e-02, -7.816e-02, 2.979e-03, 5.130e-03, -1.849e-02, 2.030e-01, -5.015e-02, -6.907e-02, 2.951e-02, -3.908e-01, 8.986e-02, -6.753e-02, -1.275e-01)); + r += mul(s5_5, M4(-5.842e-02, -1.493e-01, 5.994e-02, 2.976e-02, -7.239e-02, -1.667e-02, -4.189e-02, 3.085e-03, 2.728e-01, 2.139e-01, 1.643e-01, 5.303e-02, -1.843e-01, -9.559e-02, -1.233e-01, 8.742e-03)); + r += mul(s5_6, M4(-1.255e-01, 7.025e-02, -7.532e-02, -9.300e-02, 7.462e-02, -7.091e-02, 4.919e-02, -3.458e-02, 1.290e-01, -5.353e-02, -5.911e-02, 1.364e-01, 3.136e-02, -1.116e-01, 4.377e-02, -5.287e-03)); + r += mul(s5_7, M4(-2.363e-02, -3.794e-02, -1.007e-01, -8.856e-03, -7.124e-02, 5.981e-02, 2.890e-02, 3.538e-02, 6.294e-02, 1.174e-02, -5.866e-02, -2.908e-02, 2.588e-02, -2.905e-02, 4.247e-02, -1.011e-01)); + r += mul(s5_8, M4(-5.449e-02, 6.772e-02, -7.855e-02, -1.544e-01, 2.643e-02, 3.867e-02, -4.634e-02, 7.590e-02, -1.813e-01, 1.047e-01, -1.400e-01, 3.614e-02, -8.915e-02, 7.867e-02, 4.121e-02, -1.364e-01)); + r += mul(s6_0, M4(-1.979e-02, 4.119e-02, -4.703e-02, 7.739e-03, -9.471e-02, -5.100e-02, 1.301e-01, -1.197e-01, -2.493e-03, -2.455e-02, -1.174e-01, 4.942e-02, -1.109e-01, 6.456e-02, -7.671e-03, 1.920e-01)); + r += mul(s6_1, M4(-6.147e-02, 9.588e-02, -2.003e-02, -5.682e-02, -1.995e-01, -4.454e-02, 3.487e-01, -1.024e-01, -9.889e-02, 1.686e-02, 9.011e-02, -1.528e-02, -3.332e-01, 3.212e-01, 2.864e-01, 1.939e-01)); + r += mul(s6_2, M4(-8.544e-02, 1.802e-03, 6.422e-02, 4.824e-02, 5.645e-02, 1.208e-01, 9.078e-02, 2.502e-02, 2.094e-01, 2.460e-03, -1.114e-02, 4.912e-02, 4.944e-03, 9.641e-03, 1.579e-01, 1.204e-01)); + r += mul(s6_3, M4(1.219e-01, 8.069e-02, -4.334e-02, -1.310e-01, 1.147e-01, 1.878e-02, -5.557e-02, -1.546e-01, -1.256e-01, -7.466e-02, 1.412e-01, 1.766e-01, -3.323e-01, 7.367e-02, -4.014e-03, 7.780e-02)); + r += mul(s6_4, M4(4.867e-02, -1.692e-01, -3.644e-02, -1.731e-03, 4.042e-04, -7.033e-02, -1.145e-01, -1.296e-01, -1.945e-01, -7.274e-02, 1.330e-02, 1.813e-02, -1.548e-01, 5.739e-02, 1.718e-01, -1.194e-01)); + r += mul(s6_5, M4(1.175e-01, 1.082e-01, 6.635e-02, 1.117e-02, -6.449e-02, -1.730e-02, -1.054e-02, -3.358e-02, -4.538e-02, -1.515e-01, -3.798e-03, -1.860e-01, -1.559e-02, 1.447e-01, 1.498e-01, 3.090e-02)); + r += mul(s6_6, M4(-1.683e-03, 8.568e-03, 2.051e-02, -9.578e-02, -1.408e-02, 6.554e-02, -1.017e-02, -6.423e-02, 3.568e-02, 3.394e-02, -5.355e-03, 5.121e-02, -5.380e-02, 2.567e-01, -8.592e-03, 8.988e-02)); + r += mul(s6_7, M4(-8.702e-03, 1.090e-01, 3.289e-02, -6.309e-02, -1.532e-02, 5.859e-02, 1.011e-02, -1.282e-01, -2.085e-01, 1.303e-01, -7.007e-02, -5.548e-02, -1.617e-01, 1.559e-01, -1.230e-01, 1.372e-01)); + r += mul(s6_8, M4(-2.215e-02, 1.931e-02, -2.308e-02, 3.090e-02, -4.948e-03, 1.351e-01, -4.800e-02, -2.988e-02, -7.687e-02, 9.420e-03, -6.256e-04, -6.357e-02, -1.195e-01, -4.792e-02, -9.139e-02, 2.254e-01)); + r += mul(s7_0, M4(2.203e-03, -2.638e-02, 1.745e-02, 5.854e-02, 3.843e-02, 4.916e-02, 2.544e-02, -7.969e-03, -4.501e-02, 1.514e-02, 1.200e-02, -1.089e-01, 2.486e-02, 1.640e-02, 6.529e-03, -7.843e-02)); + r += mul(s7_1, M4(9.363e-02, -9.666e-02, -8.461e-02, -6.027e-02, 7.771e-02, 1.002e-01, -2.305e-01, 2.306e-02, -8.742e-02, 5.979e-02, 1.120e-01, 3.518e-02, -3.995e-02, -5.365e-03, -3.196e-02, 6.700e-02)); + r += mul(s7_2, M4(-7.439e-02, 8.947e-02, 5.290e-02, -1.557e-01, 1.587e-01, 1.261e-01, 9.843e-02, 2.207e-02, 1.622e-02, 1.095e-02, -6.562e-02, 5.018e-02, 1.767e-01, -1.163e-02, -1.534e-02, 3.771e-02)); + r += mul(s7_3, M4(-3.153e-03, -1.115e-01, -1.022e-01, -5.445e-02, 4.177e-02, 1.509e-01, -6.713e-02, 6.606e-02, 3.146e-02, 8.999e-02, 1.069e-01, 2.115e-01, -2.670e-02, -6.698e-03, -1.397e-01, -3.558e-02)); + r += mul(s7_4, M4(5.507e-02, 8.218e-02, 8.870e-02, 1.737e-01, 1.133e-01, 1.509e-01, 4.197e-03, 1.395e-01, 2.306e-03, 1.406e-01, 6.409e-02, -1.196e-01, -1.049e-02, -1.731e-01, -1.654e-01, 8.603e-02)); + r += mul(s7_5, M4(1.062e-01, 6.239e-02, -4.532e-02, -5.487e-02, 1.542e-01, -1.638e-01, 1.408e-02, 1.168e-01, 1.202e-01, -2.339e-02, 1.279e-01, -7.620e-02, -1.743e-02, -6.179e-02, 3.338e-02, -5.470e-02)); + r += mul(s7_6, M4(-1.300e-02, -2.047e-01, 8.754e-02, -4.462e-02, -3.802e-02, 4.599e-02, -1.657e-02, 4.666e-02, 3.600e-02, 3.374e-02, -2.250e-02, 1.236e-01, 2.197e-02, -4.661e-02, -1.053e-02, -3.981e-03)); + r += mul(s7_7, M4(-1.520e-01, 5.762e-03, 4.383e-02, -7.849e-02, 2.270e-02, -7.221e-02, 6.181e-02, 3.006e-02, -6.144e-03, 9.368e-03, -2.907e-02, -9.180e-02, 9.218e-02, -3.234e-02, 2.264e-02, 2.187e-02)); + r += mul(s7_8, M4(-2.746e-03, 6.316e-04, 6.491e-02, -9.523e-02, 1.773e-02, -7.556e-03, -3.814e-02, 2.902e-02, 1.077e-01, -2.771e-02, -8.368e-03, 1.335e-01, -6.737e-02, 3.118e-02, 5.495e-03, 4.450e-02)); + r += V4(-7.540e-03, -2.048e-02, -4.214e-03, -1.008e-02); + return r; +} + +void Pass12(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 13 +//!DESC conv12 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.397e-03, -2.507e-02, 3.115e-02, 2.415e-02, 3.982e-02, -3.972e-02, -6.782e-02, -8.096e-02, -1.642e-01, 6.040e-02, -3.850e-02, -1.102e-01, 3.201e-02, -8.033e-02, 4.230e-02, 2.037e-02)); + r += mul(s0_1, M4(-1.313e-02, 2.910e-02, -6.413e-02, -7.355e-02, -2.697e-02, 2.228e-02, 1.411e-01, -1.088e-02, -1.628e-01, -1.257e-01, 1.380e-01, -9.211e-02, -1.886e-01, -8.809e-04, -7.095e-02, -6.747e-02)); + r += mul(s0_2, M4(1.101e-01, 1.681e-02, -5.337e-02, 4.518e-02, -2.836e-02, 2.875e-02, 7.521e-02, -4.616e-02, -2.643e-02, 5.796e-02, 1.729e-01, -1.620e-02, 6.658e-03, 9.503e-03, 3.965e-02, -3.183e-02)); + r += mul(s0_3, M4(-8.337e-02, -1.290e-02, 2.836e-02, -2.081e-02, 1.285e-02, 6.316e-02, 1.410e-01, -1.300e-02, 8.592e-03, -6.168e-02, 1.673e-01, -7.256e-03, 9.658e-02, -1.437e-02, -4.260e-02, 9.467e-02)); + r += mul(s0_4, M4(-7.681e-02, -4.291e-02, -1.979e-02, -1.736e-01, 1.542e-01, -5.348e-02, -4.007e-02, 1.381e-01, -8.457e-02, -2.382e-01, 2.236e-01, -1.390e-01, -2.633e-01, -2.178e-01, -8.184e-02, 3.088e-01)); + r += mul(s0_5, M4(-3.499e-02, 2.610e-02, 1.548e-03, 4.187e-02, -2.211e-02, 8.575e-02, 6.978e-02, -2.701e-02, 2.594e-02, -5.905e-02, 1.040e-01, -3.981e-02, 1.144e-01, 1.922e-01, 2.564e-02, -1.208e-01)); + r += mul(s0_6, M4(-8.450e-02, -1.947e-02, -2.304e-01, -6.763e-03, -8.898e-02, 9.945e-02, 2.341e-02, -4.767e-02, -3.517e-02, -1.880e-01, 9.954e-02, -6.023e-02, 3.020e-02, 1.812e-02, -2.772e-02, 1.657e-03)); + r += mul(s0_7, M4(-6.004e-02, 1.237e-01, 1.032e-01, -2.493e-02, 2.888e-02, 1.012e-02, 2.147e-02, -4.955e-02, 5.550e-02, -8.621e-02, -1.082e-02, -2.324e-02, 2.239e-02, 1.733e-01, 2.695e-01, -1.750e-01)); + r += mul(s0_8, M4(-7.833e-02, -9.751e-02, -1.117e-02, -7.731e-03, -2.557e-02, 3.695e-02, 1.527e-02, 1.387e-02, -6.895e-02, -5.964e-02, -4.942e-02, -1.406e-03, 4.612e-02, -3.747e-03, 4.250e-02, -5.014e-02)); + r += mul(s1_0, M4(7.934e-02, -1.423e-02, 5.552e-02, 5.383e-02, -9.127e-02, -2.168e-01, 2.002e-01, 3.897e-02, 2.180e-01, 1.213e-01, 4.437e-02, 1.211e-01, 7.367e-02, -2.357e-02, 4.699e-02, 4.793e-02)); + r += mul(s1_1, M4(-1.863e-01, -1.093e-01, -3.252e-02, -1.021e-01, -1.603e-01, 1.726e-01, -4.782e-02, -6.979e-02, -3.930e-02, -8.150e-02, -1.062e-01, -7.686e-02, -1.973e-02, 1.232e-01, -2.288e-03, 1.158e-03)); + r += mul(s1_2, M4(-6.841e-02, -1.142e-02, -5.207e-02, 8.529e-02, -1.082e-01, 2.558e-02, 7.596e-02, 8.927e-02, 5.918e-02, 2.307e-03, 1.047e-02, -1.678e-02, 2.242e-02, -3.374e-02, -2.585e-02, -1.486e-02)); + r += mul(s1_3, M4(-2.578e-02, 2.106e-02, 2.695e-02, 1.425e-02, 1.449e-01, 1.877e-02, -5.360e-03, -9.501e-03, 5.998e-02, -1.275e-03, 1.868e-03, 1.023e-01, -4.661e-02, -1.993e-02, -4.572e-02, 3.942e-02)); + r += mul(s1_4, M4(-1.070e-01, 1.508e-01, 1.678e-01, -2.106e-01, -1.590e-01, -2.091e-01, 3.379e-01, -1.663e-01, -6.316e-02, 1.046e-02, -1.199e-02, 1.777e-02, -2.246e-01, -1.937e-01, -2.215e-02, 1.867e-01)); + r += mul(s1_5, M4(-2.094e-01, 9.463e-02, -6.119e-02, 1.755e-01, 2.438e-02, -1.608e-01, -6.362e-02, 9.098e-03, 1.941e-01, 6.520e-02, 1.421e-04, 6.321e-02, 7.592e-02, 2.601e-02, 7.691e-02, 1.529e-02)); + r += mul(s1_6, M4(-8.407e-03, 3.070e-02, -1.444e-01, 5.257e-02, -9.693e-02, -1.035e-01, 8.972e-02, -1.195e-01, -3.903e-02, 4.734e-02, 6.398e-02, 5.925e-02, 2.000e-02, 2.156e-02, -1.389e-01, 4.243e-02)); + r += mul(s1_7, M4(5.691e-02, 8.460e-02, 8.371e-02, -1.346e-01, -1.417e-01, -1.081e-01, 9.715e-02, -5.800e-02, 7.237e-04, 9.527e-03, -5.995e-02, 9.008e-03, 1.199e-01, -4.870e-02, 1.424e-01, -5.562e-03)); + r += mul(s1_8, M4(-4.500e-02, -8.459e-02, 6.573e-02, -6.560e-03, -4.299e-02, -9.009e-02, 1.337e-01, 3.893e-03, 4.751e-03, 1.002e-02, -2.441e-02, 1.833e-02, 2.459e-02, -1.182e-01, 5.638e-02, -4.341e-02)); + r += mul(s2_0, M4(1.669e-01, 1.216e-01, -2.061e-01, 1.809e-02, -9.047e-02, 1.353e-01, -1.150e-01, -8.150e-02, 4.362e-03, 4.912e-02, -2.679e-02, 7.399e-03, -5.346e-02, 6.318e-02, -4.539e-02, -3.901e-02)); + r += mul(s2_1, M4(4.280e-02, 2.413e-02, -1.279e-01, -3.465e-03, -8.710e-02, 2.938e-02, -2.686e-01, -1.687e-01, -4.013e-02, -9.156e-03, -3.806e-02, 5.557e-02, -2.322e-01, 1.293e-01, -5.066e-02, 1.309e-02)); + r += mul(s2_2, M4(1.148e-01, -4.865e-02, -1.102e-01, -5.574e-02, 4.287e-02, -9.630e-02, -4.427e-02, 4.931e-02, 8.348e-02, 1.964e-02, 9.953e-03, 7.870e-03, -3.408e-02, -2.018e-01, -9.873e-03, 1.215e-02)); + r += mul(s2_3, M4(1.765e-01, 5.511e-02, -3.297e-02, 1.033e-02, -7.185e-02, 7.548e-02, 8.293e-02, 2.149e-01, 1.026e-01, -1.676e-01, 3.156e-02, 1.246e-01, -2.424e-01, -2.664e-01, 6.883e-02, -7.061e-02)); + r += mul(s2_4, M4(-8.945e-02, -2.058e-03, 4.893e-03, 1.060e-02, -1.917e-01, 2.733e-02, 1.772e-01, -6.912e-02, 1.940e-02, -1.210e-01, -2.042e-01, -7.255e-02, -2.123e-01, 2.424e-01, 1.922e-01, 7.533e-02)); + r += mul(s2_5, M4(1.628e-01, -3.888e-02, -6.902e-02, -6.696e-02, 1.214e-01, -1.368e-01, 6.016e-03, 1.067e-01, 4.747e-02, -1.004e-01, -1.289e-01, 6.043e-02, -2.105e-02, 1.687e-01, 4.420e-02, -8.027e-03)); + r += mul(s2_6, M4(-1.510e-01, -5.490e-02, -1.297e-01, 3.940e-02, -8.791e-03, -1.223e-01, -1.458e-01, 8.238e-02, -1.630e-02, 4.674e-02, 1.099e-01, -6.963e-02, 2.266e-03, 1.970e-02, -4.032e-02, 4.027e-02)); + r += mul(s2_7, M4(3.974e-02, 1.602e-01, 1.467e-02, 4.403e-02, -6.402e-02, -8.992e-02, -1.441e-01, 1.088e-01, 3.071e-03, 1.187e-01, 1.116e-01, 6.374e-02, 1.102e-01, -1.638e-02, -5.427e-02, 6.011e-02)); + r += mul(s2_8, M4(-5.415e-03, 1.614e-02, -4.642e-02, 7.718e-03, -9.478e-02, -3.681e-02, -3.789e-02, 2.388e-02, -9.976e-02, 1.317e-02, 4.867e-03, 5.080e-02, 1.671e-02, -4.394e-02, 1.850e-01, -1.010e-01)); + r += mul(s3_0, M4(4.740e-02, -6.012e-02, -3.478e-02, -1.880e-03, -5.525e-02, -1.306e-01, 3.589e-02, 6.313e-02, 1.849e-01, 1.954e-03, 1.245e-01, 1.461e-02, 7.237e-02, 1.013e-01, -3.245e-02, -3.877e-02)); + r += mul(s3_1, M4(6.860e-02, 7.825e-02, -2.664e-02, -2.195e-02, -3.145e-02, 1.214e-01, 1.442e-01, -2.997e-02, -7.452e-02, 2.469e-01, 5.195e-02, 6.900e-02, 9.190e-03, 3.711e-02, -6.706e-02, 2.437e-02)); + r += mul(s3_2, M4(-1.527e-02, 4.180e-02, -8.521e-02, -3.332e-02, 1.535e-02, -1.797e-02, 4.460e-02, 3.166e-02, 1.183e-02, -6.006e-02, -6.930e-02, -1.633e-02, -1.657e-02, 1.111e-02, -1.051e-02, -2.269e-02)); + r += mul(s3_3, M4(-6.838e-02, 2.394e-02, 1.204e-02, 2.979e-02, 1.136e-03, 5.229e-05, 1.704e-01, 5.685e-02, 8.749e-02, -7.986e-02, -1.910e-01, 3.314e-02, -2.369e-02, 1.071e-02, 4.937e-03, -8.059e-02)); + r += mul(s3_4, M4(-9.546e-02, -9.481e-02, -3.515e-02, 1.106e-01, 1.133e-01, -1.358e-01, -7.766e-02, 2.247e-01, 8.541e-02, -1.843e-01, 2.519e-01, -4.787e-02, -8.646e-02, -5.881e-02, 7.200e-02, 5.031e-02)); + r += mul(s3_5, M4(4.109e-02, 1.957e-02, -4.424e-02, 2.449e-03, 7.263e-02, 7.316e-02, -3.963e-02, 3.027e-02, -2.475e-01, -2.881e-02, 9.413e-02, -9.791e-03, -1.517e-02, 1.484e-01, -2.884e-02, -3.080e-02)); + r += mul(s3_6, M4(-9.201e-02, 8.311e-02, 2.895e-02, 4.046e-03, -3.389e-02, 6.215e-03, 2.693e-01, -5.677e-02, -1.315e-02, 2.071e-01, 1.434e-01, -4.370e-02, 1.987e-02, -1.004e-02, 9.911e-02, 2.902e-02)); + r += mul(s3_7, M4(-1.415e-01, 4.222e-02, -2.972e-02, 1.147e-01, -5.209e-02, -2.712e-02, 6.404e-02, 3.877e-03, 4.556e-02, -3.120e-01, -2.042e-01, 6.441e-02, -7.920e-02, 7.186e-02, 1.415e-02, 3.390e-02)); + r += mul(s3_8, M4(-9.644e-02, 2.409e-02, 1.069e-01, 2.458e-02, -1.209e-01, 4.212e-02, -3.278e-03, 6.059e-04, -1.440e-01, 1.543e-01, -1.242e-03, 6.723e-02, -2.569e-02, -6.542e-02, 2.427e-02, 2.092e-02)); + r += mul(s4_0, M4(3.991e-03, -4.901e-02, -3.711e-02, -1.704e-02, 6.526e-02, 3.046e-02, -4.469e-02, -7.765e-02, 6.067e-02, -1.421e-01, 6.070e-02, 8.194e-02, -2.711e-02, 3.438e-03, 1.344e-02, -2.153e-02)); + r += mul(s4_1, M4(1.991e-02, -4.666e-02, 6.617e-02, 3.197e-03, -9.838e-02, 4.802e-02, -1.296e-01, -7.719e-02, -2.034e-02, 9.824e-02, 1.153e-02, -8.796e-02, -5.803e-02, 2.489e-01, -1.353e-01, -4.269e-02)); + r += mul(s4_2, M4(-3.196e-02, 4.000e-02, 5.576e-02, -1.748e-02, 4.365e-02, -1.031e-01, -2.163e-04, -4.052e-02, -2.934e-02, 3.231e-02, -6.368e-02, -4.976e-02, -1.307e-02, -3.818e-02, -1.569e-01, -3.249e-02)); + r += mul(s4_3, M4(1.348e-02, 1.970e-02, 1.851e-01, 4.361e-02, 1.209e-01, 6.510e-03, -1.089e-01, 4.626e-02, -9.024e-02, -5.603e-02, -9.403e-02, 1.504e-02, -4.843e-02, 2.236e-02, 1.679e-03, -3.139e-02)); + r += mul(s4_4, M4(1.056e-01, -1.621e-01, -2.201e-01, 1.882e-01, 1.910e-01, -1.139e-01, 2.343e-03, 3.804e-02, 4.681e-02, -2.508e-01, -8.845e-02, -1.726e-01, -1.087e-01, 8.428e-02, 3.052e-01, 1.148e-01)); + r += mul(s4_5, M4(7.644e-02, -3.893e-03, -8.044e-02, -1.695e-03, 6.434e-02, -6.097e-02, 3.127e-02, 3.009e-02, 6.361e-02, 1.526e-02, 6.214e-02, 1.346e-02, -2.310e-01, 2.733e-03, 1.991e-01, -1.005e-01)); + r += mul(s4_6, M4(3.758e-03, 1.069e-01, 6.274e-02, 3.918e-02, -3.376e-02, -4.262e-02, 1.166e-02, -5.441e-02, 1.276e-02, 5.302e-02, 4.024e-02, 4.031e-02, 2.924e-02, -1.500e-02, 1.072e-01, 9.864e-03)); + r += mul(s4_7, M4(4.560e-02, 3.091e-03, 8.830e-02, 2.415e-02, 7.097e-02, 4.856e-02, 1.249e-02, 3.016e-02, -4.067e-02, 5.510e-02, -5.353e-02, 4.705e-02, 6.581e-02, -1.605e-01, -2.567e-02, 1.360e-01)); + r += mul(s4_8, M4(-1.925e-02, 8.326e-02, 1.014e-01, 2.056e-02, -8.488e-03, -2.376e-02, -9.331e-02, -2.036e-02, -6.477e-02, -9.741e-02, 3.254e-02, 2.406e-02, 1.069e-01, -2.900e-02, -2.892e-02, 8.558e-03)); + r += mul(s5_0, M4(2.073e-02, 2.304e-02, 1.707e-02, -2.880e-02, 1.728e-01, 4.448e-02, 1.766e-02, 5.097e-02, 4.626e-02, 2.093e-03, 3.893e-02, 1.900e-02, 4.488e-03, -6.663e-02, -5.151e-02, 2.597e-02)); + r += mul(s5_1, M4(6.632e-03, 6.420e-02, -5.954e-02, -1.401e-02, -1.762e-01, 1.516e-02, -1.572e-01, 2.048e-03, -7.453e-02, 1.642e-01, -5.123e-02, -4.086e-03, 1.272e-01, 9.005e-02, -3.063e-02, 7.717e-02)); + r += mul(s5_2, M4(8.182e-02, -5.984e-02, 2.433e-02, -4.260e-02, 1.285e-01, 6.395e-02, -1.258e-01, -1.890e-02, -3.552e-02, -1.902e-01, 4.936e-02, -1.089e-02, 1.330e-02, -1.291e-02, 7.128e-02, 2.062e-02)); + r += mul(s5_3, M4(-1.814e-01, 7.015e-02, 1.626e-01, 1.078e-01, 3.475e-02, 1.650e-01, -2.960e-02, -2.535e-02, -1.735e-01, -1.667e-02, 1.841e-01, 2.134e-02, 2.445e-02, -1.719e-02, -2.750e-02, 3.184e-02)); + r += mul(s5_4, M4(-1.157e-01, 8.743e-02, -1.785e-01, 1.966e-01, 1.679e-01, -1.695e-01, 1.127e-01, -9.157e-03, 1.483e-01, -1.440e-01, 1.475e-01, -1.654e-01, 1.709e-01, -4.515e-02, 9.961e-02, -3.409e-02)); + r += mul(s5_5, M4(-5.665e-02, -6.123e-02, 2.767e-02, -2.876e-02, 3.362e-02, 2.646e-02, 2.164e-01, -1.702e-02, 2.073e-02, 3.452e-03, 1.165e-01, 1.009e-02, -7.879e-02, 7.307e-02, 7.719e-02, -1.188e-01)); + r += mul(s5_6, M4(1.073e-01, -5.339e-02, -1.523e-01, -4.295e-02, -5.219e-02, -5.274e-04, 9.527e-02, -3.133e-02, -8.614e-03, -1.059e-01, 9.820e-02, 1.012e-02, 2.436e-02, 9.228e-02, 8.783e-02, -5.642e-02)); + r += mul(s5_7, M4(2.379e-01, -1.225e-01, 9.882e-03, 9.354e-02, -1.101e-01, 1.232e-01, 3.131e-02, -2.203e-02, 3.696e-02, -3.172e-02, 1.365e-01, 7.894e-02, 4.409e-02, -1.210e-01, -9.969e-02, -1.792e-02)); + r += mul(s5_8, M4(1.454e-01, 2.593e-02, 1.723e-01, -5.088e-02, -1.571e-02, 5.285e-02, -1.257e-01, -3.311e-02, -5.324e-02, 6.856e-02, 7.979e-02, -1.566e-02, 7.197e-02, 3.892e-02, -2.399e-02, -5.203e-02)); + r += mul(s6_0, M4(-7.839e-02, 1.398e-03, -7.348e-02, -6.479e-02, -5.027e-02, 1.044e-01, -1.014e-02, 3.825e-02, 1.008e-01, 7.028e-02, 2.653e-02, 2.244e-02, -4.615e-02, -4.465e-02, 1.302e-01, -5.276e-03)); + r += mul(s6_1, M4(1.690e-01, 2.425e-01, -1.174e-01, 9.586e-03, -7.434e-02, -7.413e-02, -1.179e-01, -6.104e-02, -5.331e-02, -1.369e-02, 5.085e-03, 2.225e-02, -1.559e-01, -5.708e-02, -1.527e-02, -5.978e-02)); + r += mul(s6_2, M4(6.324e-02, -6.753e-02, -1.233e-01, 8.027e-03, -3.293e-02, -1.514e-01, 1.218e-02, -2.946e-02, 1.302e-01, -1.332e-02, -1.594e-02, -8.539e-02, -2.705e-01, -4.581e-02, 1.323e-01, 2.371e-01)); + r += mul(s6_3, M4(-1.933e-01, 6.691e-02, -2.664e-01, 1.988e-03, -8.715e-02, -5.864e-02, -1.366e-02, -5.674e-02, -2.230e-02, 4.250e-02, 4.865e-02, 4.707e-02, 3.169e-02, -5.539e-02, -1.776e-02, -9.227e-02)); + r += mul(s6_4, M4(-1.037e-01, 1.628e-01, -1.394e-01, -7.656e-02, 7.386e-03, 1.390e-02, 1.271e-02, 2.873e-02, -2.167e-02, 1.093e-01, 2.433e-01, -2.497e-01, 7.878e-02, -2.173e-01, -3.023e-01, -3.329e-01)); + r += mul(s6_5, M4(-6.338e-02, 3.530e-03, 3.445e-03, 4.110e-02, -2.004e-02, 6.223e-03, 1.371e-01, -2.474e-02, 1.499e-01, 1.088e-01, 4.740e-02, -1.042e-01, 2.863e-01, -1.401e-01, 1.633e-02, -8.017e-02)); + r += mul(s6_6, M4(-3.823e-02, 1.272e-01, -1.866e-03, 7.398e-02, 3.718e-02, -4.374e-02, 6.790e-02, -6.153e-03, -6.573e-02, -1.742e-01, -1.284e-03, -3.181e-02, -8.365e-02, -4.302e-02, -9.620e-03, -4.319e-02)); + r += mul(s6_7, M4(2.086e-01, -2.530e-01, -3.056e-01, -1.304e-01, -2.967e-02, 7.706e-02, 1.326e-01, -3.692e-02, 1.018e-01, -2.921e-02, 6.086e-02, 1.081e-01, -2.208e-02, 1.080e-01, 2.565e-01, -1.048e-02)); + r += mul(s6_8, M4(1.168e-01, -1.340e-02, -1.155e-01, 1.552e-03, -9.284e-03, -7.575e-03, -9.098e-02, 7.785e-03, 1.121e-01, -2.761e-02, 5.135e-02, -1.342e-01, -4.632e-02, 3.247e-02, 6.640e-02, -1.430e-01)); + r += mul(s7_0, M4(-5.174e-02, -3.802e-02, 2.501e-02, 3.943e-02, -8.928e-02, -4.067e-03, 1.307e-01, -5.172e-02, -7.696e-03, -5.941e-02, 1.575e-01, 6.193e-02, 3.222e-02, 1.874e-02, 4.168e-02, 5.419e-02)); + r += mul(s7_1, M4(5.314e-02, -2.911e-02, 6.485e-02, -5.315e-02, -1.255e-01, -1.392e-03, 1.852e-02, -6.462e-02, -1.095e-01, 5.401e-02, -4.038e-02, 1.017e-01, 2.572e-02, -8.017e-02, -3.257e-02, 3.747e-02)); + r += mul(s7_2, M4(-1.978e-02, -1.773e-03, -1.841e-02, 9.889e-03, 3.958e-02, -4.974e-03, 6.257e-02, 9.362e-02, -2.719e-02, -1.428e-02, 1.190e-02, -5.830e-02, -7.803e-02, -1.117e-01, 1.294e-01, 2.755e-02)); + r += mul(s7_3, M4(-1.558e-01, 1.600e-01, -1.352e-01, 3.225e-02, 6.283e-02, 3.267e-02, -1.065e-01, -4.893e-02, -7.680e-02, -1.147e-02, 2.283e-02, 9.253e-02, -1.045e-02, 7.882e-03, -3.457e-02, -6.666e-03)); + r += mul(s7_4, M4(3.745e-02, -1.399e-03, -2.196e-01, -5.805e-02, -5.250e-02, -2.187e-03, -6.491e-02, 1.274e-01, 5.554e-02, 4.309e-02, -8.571e-02, -1.076e-01, -1.224e-02, 2.910e-03, -1.020e-01, -1.131e-01)); + r += mul(s7_5, M4(6.560e-02, -1.500e-01, -7.759e-02, 2.132e-02, -1.682e-02, -4.589e-02, 1.357e-01, -5.383e-02, -4.839e-02, -7.040e-02, -1.398e-01, 3.404e-02, 3.647e-02, 9.801e-02, -1.931e-02, 2.339e-02)); + r += mul(s7_6, M4(-3.344e-02, -1.224e-01, 6.334e-02, -4.529e-02, 5.797e-03, 8.809e-02, 5.366e-02, -7.795e-03, -3.646e-02, -5.899e-03, -1.115e-01, -3.916e-02, -1.248e-02, 9.711e-03, -3.724e-02, 1.345e-02)); + r += mul(s7_7, M4(6.573e-02, -9.046e-02, 1.977e-01, -5.902e-02, 1.941e-02, -9.373e-02, -7.441e-02, -2.884e-02, -8.218e-02, -8.859e-02, -1.948e-01, 9.318e-02, -8.750e-03, 6.997e-02, 1.364e-01, -1.964e-02)); + r += mul(s7_8, M4(2.416e-02, -2.571e-02, 1.039e-01, -3.071e-02, 3.977e-02, -7.836e-02, -6.999e-02, 1.225e-02, -3.535e-02, 4.576e-02, -3.866e-02, 8.169e-02, 2.912e-03, 9.796e-02, -9.763e-02, -3.325e-02)); + r += V4(3.286e-03, -1.519e-02, -1.301e-02, 8.360e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-5.735e-02, 1.257e-01, 3.693e-02, 4.120e-02, -4.737e-03, -2.852e-02, -1.082e-01, 4.594e-02, 2.843e-01, -4.893e-02, 9.070e-02, -1.192e-01, -6.449e-02, -3.347e-02, 1.391e-03, 5.724e-03)); + r += mul(s0_1, M4(1.055e-01, 7.005e-02, -2.144e-01, -6.068e-02, -2.865e-02, -2.763e-02, 7.198e-02, 8.962e-02, 3.127e-02, -4.890e-02, -5.165e-02, 7.375e-03, 3.223e-02, -3.471e-03, 1.291e-02, 3.367e-02)); + r += mul(s0_2, M4(-5.957e-02, 5.653e-02, -3.870e-03, 3.876e-02, 6.014e-02, -1.175e-01, -4.020e-02, 7.707e-03, 6.639e-02, 5.481e-02, -7.995e-03, -2.986e-02, 6.320e-02, 8.378e-02, 2.165e-02, 8.805e-04)); + r += mul(s0_3, M4(8.128e-02, 7.841e-03, 4.775e-02, -9.427e-03, -2.025e-01, 3.781e-02, -1.438e-02, 1.348e-01, 7.946e-02, -7.094e-02, 2.525e-01, 8.006e-02, -4.424e-02, 1.466e-01, -5.797e-02, 2.469e-02)); + r += mul(s0_4, M4(1.546e-02, 3.020e-01, -7.200e-03, 1.162e-01, -1.535e-01, -1.677e-01, -1.889e-02, -7.218e-02, 1.884e-02, 1.402e-01, 1.545e-01, -1.332e-01, -3.194e-01, -2.470e-01, -2.017e-01, -1.419e-01)); + r += mul(s0_5, M4(-5.847e-02, -3.240e-02, 1.593e-01, 5.361e-02, 4.448e-02, -2.931e-02, -1.197e-01, 3.625e-02, 4.968e-02, 6.573e-02, 3.133e-02, 2.704e-02, -4.164e-02, 3.470e-02, 1.581e-03, 5.542e-02)); + r += mul(s0_6, M4(-5.268e-02, -5.200e-02, -1.156e-01, -2.042e-03, -3.021e-02, 5.858e-02, -8.935e-02, -7.148e-02, 1.408e-01, 1.224e-02, -3.358e-04, 7.851e-02, -4.945e-02, -7.446e-02, -1.190e-01, -2.825e-02)); + r += mul(s0_7, M4(1.893e-02, -1.762e-02, 4.501e-02, -9.682e-02, -1.633e-02, -4.567e-02, -4.076e-02, 3.502e-02, 1.411e-01, 1.478e-02, 2.074e-01, -1.008e-01, -6.023e-02, -2.280e-01, 1.658e-01, 9.096e-03)); + r += mul(s0_8, M4(2.054e-02, -3.343e-02, 5.537e-02, 3.073e-02, -4.820e-03, -4.301e-02, -5.973e-02, 5.608e-02, 2.994e-03, 3.080e-02, 4.691e-02, -7.288e-02, 5.702e-03, -1.103e-01, -1.659e-02, 1.058e-02)); + r += mul(s1_0, M4(1.202e-01, 9.633e-02, 1.875e-02, 6.053e-02, -7.085e-02, 2.520e-02, -3.057e-02, -2.569e-01, 2.196e-02, -1.229e-03, -4.392e-02, 5.924e-02, -7.505e-02, 8.129e-02, 2.044e-02, -2.790e-02)); + r += mul(s1_1, M4(2.244e-01, 9.546e-02, -1.172e-01, -1.139e-01, 3.678e-02, 7.077e-03, -4.363e-02, -4.238e-02, 7.629e-02, 1.122e-02, 1.186e-02, 4.842e-02, 1.057e-02, 9.070e-02, -7.385e-02, 9.828e-02)); + r += mul(s1_2, M4(1.885e-02, 5.890e-02, -9.566e-02, 7.835e-02, 6.631e-02, 1.552e-02, 1.447e-01, 3.533e-02, 9.990e-03, -1.430e-02, -1.986e-02, 2.274e-02, 4.836e-04, 8.606e-02, 9.545e-02, -3.450e-02)); + r += mul(s1_3, M4(4.787e-02, 5.541e-02, -3.048e-03, -3.691e-02, 7.478e-03, 1.421e-01, -8.807e-02, 1.014e-01, 1.648e-01, -5.264e-02, -9.696e-02, 2.583e-02, 7.668e-02, 2.025e-02, 3.494e-03, -1.676e-02)); + r += mul(s1_4, M4(1.184e-01, 4.471e-01, 5.125e-02, 2.426e-01, 8.767e-02, -6.652e-02, -2.313e-01, -2.243e-01, 3.143e-02, -1.016e-01, -1.747e-01, 1.479e-01, -9.054e-02, -3.289e-01, 2.006e-01, -9.388e-02)); + r += mul(s1_5, M4(-7.381e-02, 8.401e-02, 1.774e-01, -1.027e-01, -1.572e-01, -4.468e-02, 1.375e-01, 1.592e-01, -6.240e-02, -1.650e-02, 6.481e-02, 7.350e-02, -7.457e-02, -4.640e-02, -4.966e-02, 8.236e-02)); + r += mul(s1_6, M4(-5.735e-02, -3.059e-03, -8.445e-02, 4.472e-02, 1.572e-01, 4.602e-02, -1.526e-01, -1.439e-01, 9.462e-03, -5.136e-02, -4.954e-02, 9.235e-02, 4.871e-02, -5.367e-02, -1.627e-01, -2.489e-02)); + r += mul(s1_7, M4(-3.190e-02, 8.901e-03, -1.846e-02, -7.125e-02, 9.040e-02, -6.736e-03, -1.474e-01, 5.559e-02, 3.965e-02, -5.109e-02, 4.272e-02, 1.816e-02, -3.802e-02, -9.892e-02, 8.593e-02, 6.199e-02)); + r += mul(s1_8, M4(-1.994e-02, -1.086e-01, 1.812e-02, 6.639e-02, 1.711e-02, -3.822e-02, 1.984e-01, 6.729e-02, 1.585e-02, -6.321e-02, 1.131e-01, 1.278e-02, 6.772e-02, -4.626e-02, 5.356e-02, -3.447e-02)); + r += mul(s2_0, M4(-8.687e-02, -1.183e-01, -2.584e-02, -9.472e-02, 3.558e-02, 6.771e-02, -1.168e-01, -3.359e-03, -2.214e-02, -5.336e-02, -6.827e-02, -2.053e-02, 2.150e-01, -1.302e-01, 7.260e-03, -2.987e-02)); + r += mul(s2_1, M4(7.261e-03, 1.602e-02, 1.279e-01, -6.558e-02, 3.888e-02, 1.409e-01, 2.357e-02, -1.282e-01, -5.214e-02, 9.965e-02, 1.664e-01, -4.437e-02, 1.500e-01, 1.286e-02, 2.166e-03, -8.977e-02)); + r += mul(s2_2, M4(-7.545e-02, -1.094e-02, -2.077e-01, -6.931e-02, -3.558e-02, -8.829e-03, 8.852e-02, -6.157e-02, 4.585e-02, -1.417e-02, -7.058e-03, -9.259e-02, -2.151e-02, -8.374e-02, 5.304e-02, 7.869e-03)); + r += mul(s2_3, M4(-7.447e-02, -2.303e-03, -9.744e-02, -1.738e-01, 1.803e-01, 1.162e-01, 1.207e-01, -2.709e-01, -8.762e-03, 3.008e-02, -1.453e-01, -2.326e-02, -1.915e-01, -2.543e-02, 1.697e-02, 4.576e-02)); + r += mul(s2_4, M4(-9.671e-04, -2.203e-02, 1.922e-01, 2.322e-01, 4.921e-04, -1.192e-01, 1.833e-01, -2.379e-01, -6.566e-03, 7.472e-02, 1.633e-01, 3.039e-02, -7.424e-02, -6.433e-02, 5.091e-01, -1.973e-01)); + r += mul(s2_5, M4(6.253e-02, 2.878e-02, -6.217e-02, -7.024e-02, -1.695e-02, 5.659e-02, 1.182e-01, -1.998e-02, -4.351e-02, 3.363e-02, 1.152e-01, -2.473e-02, 1.603e-01, -1.438e-01, -2.102e-01, 6.272e-02)); + r += mul(s2_6, M4(-2.256e-01, 4.487e-02, -3.573e-01, 1.982e-02, 5.248e-03, 4.702e-02, -2.416e-03, 4.224e-02, 1.336e-02, -2.063e-02, 6.145e-02, -4.382e-02, -3.535e-02, -8.743e-03, 2.422e-01, 1.258e-02)); + r += mul(s2_7, M4(-5.776e-03, 2.321e-02, 1.058e-02, 2.529e-02, 4.207e-02, 8.916e-02, 1.648e-01, -1.728e-01, 7.123e-02, 8.087e-02, -6.373e-02, 2.193e-02, 6.426e-03, 4.454e-02, 1.290e-01, -1.136e-02)); + r += mul(s2_8, M4(2.321e-02, 4.309e-02, -1.516e-01, -1.444e-02, 4.668e-02, 3.631e-02, 1.015e-01, 3.489e-02, 4.619e-02, 2.500e-02, 3.346e-02, 1.279e-01, 4.694e-02, 1.132e-02, 2.681e-02, -5.030e-02)); + r += mul(s3_0, M4(-5.515e-02, -2.673e-02, -6.880e-02, 5.894e-03, -1.158e-01, 4.061e-02, -6.222e-02, 3.967e-02, -1.514e-01, -7.631e-02, -6.716e-03, 7.126e-02, 1.299e-02, -4.424e-03, 1.829e-02, 4.004e-02)); + r += mul(s3_1, M4(2.444e-03, -8.791e-02, 5.420e-02, 9.974e-02, 4.461e-02, 3.342e-03, 9.464e-02, 3.879e-02, -1.713e-01, -5.235e-02, 2.785e-02, 1.584e-01, -3.401e-02, 3.073e-02, -1.091e-01, -1.046e-02)); + r += mul(s3_2, M4(7.487e-03, -4.916e-02, -3.904e-02, 5.671e-02, 3.968e-02, -9.121e-03, 3.282e-03, -8.274e-02, -5.194e-02, -4.783e-02, 6.700e-02, -1.059e-01, 1.419e-02, -4.054e-02, -3.414e-02, 6.595e-02)); + r += mul(s3_3, M4(9.336e-02, 7.634e-02, -6.694e-02, 6.250e-02, -1.083e-02, -2.088e-02, 1.763e-01, 1.998e-01, 1.261e-02, 6.359e-02, -5.634e-02, 1.232e-01, 4.095e-02, -8.641e-02, -1.667e-02, -1.219e-01)); + r += mul(s3_4, M4(-1.574e-02, -4.739e-02, 1.616e-01, 6.044e-02, -3.058e-01, -7.430e-02, 2.627e-01, -1.224e-01, -1.501e-01, -2.186e-01, 5.963e-02, -8.538e-03, -7.729e-02, 7.135e-02, 1.614e-01, -4.085e-02)); + r += mul(s3_5, M4(7.641e-02, -1.052e-02, 4.771e-02, 8.236e-02, 4.211e-02, 7.796e-02, -5.710e-02, -3.686e-02, 1.179e-01, 1.063e-02, -1.615e-01, 2.818e-02, 6.312e-02, -5.913e-02, -2.742e-03, 6.620e-02)); + r += mul(s3_6, M4(6.190e-03, 7.873e-02, -1.249e-03, -7.502e-02, 8.608e-02, -1.502e-04, 7.447e-02, 1.288e-01, 2.167e-02, -1.348e-02, 3.409e-02, 3.650e-02, -3.395e-02, -2.510e-02, -3.911e-03, -3.501e-02)); + r += mul(s3_7, M4(3.712e-02, 1.711e-02, -3.699e-02, 2.076e-03, 5.682e-02, -3.576e-02, 1.728e-01, -2.616e-02, -8.619e-02, -3.032e-02, -8.233e-02, 8.712e-02, 5.074e-02, 3.775e-02, 2.481e-02, 1.785e-03)); + r += mul(s3_8, M4(-2.020e-02, 4.481e-02, -9.187e-02, 1.451e-01, 4.523e-02, 3.138e-02, -8.141e-02, 4.834e-02, 9.495e-02, 1.097e-02, -1.188e-01, 6.575e-02, -8.121e-02, -3.479e-02, 4.346e-02, 1.156e-01)); + r += mul(s4_0, M4(3.827e-02, 2.264e-02, 7.146e-03, 9.552e-03, -3.035e-02, -6.616e-02, 2.747e-02, 3.169e-02, -2.774e-02, 5.990e-02, 2.920e-02, 4.000e-02, -2.525e-02, 7.427e-02, 8.153e-02, -6.004e-02)); + r += mul(s4_1, M4(5.245e-02, -3.401e-02, 1.269e-02, 4.907e-02, -1.172e-02, 1.009e-01, -2.521e-02, -5.679e-02, 1.087e-01, -1.719e-03, -8.819e-02, 8.292e-03, 1.534e-01, 5.315e-02, 9.320e-03, -2.679e-02)); + r += mul(s4_2, M4(5.869e-02, 2.417e-02, 4.672e-03, 4.663e-02, -5.904e-02, 2.896e-03, -3.050e-02, -5.937e-02, 3.153e-02, -1.233e-02, 1.412e-02, 2.997e-02, 1.389e-01, 1.124e-01, -9.287e-02, -6.713e-02)); + r += mul(s4_3, M4(7.970e-03, 3.703e-02, -5.000e-02, 1.005e-01, -1.264e-02, 2.367e-02, 1.433e-02, -9.336e-02, -5.631e-02, 8.289e-03, -8.105e-02, 8.863e-02, -4.582e-02, -2.060e-02, 6.368e-02, 1.711e-01)); + r += mul(s4_4, M4(-2.150e-01, -3.557e-02, 5.991e-02, 7.201e-02, 1.718e-02, 4.575e-02, -1.644e-01, -1.481e-01, -8.840e-02, -4.702e-02, -1.691e-02, -1.494e-01, -4.138e-02, 1.532e-01, -8.112e-02, 1.186e-01)); + r += mul(s4_5, M4(1.845e-02, 1.165e-01, 3.826e-02, -2.388e-02, 1.116e-02, -4.152e-03, 9.363e-02, -1.166e-01, -1.029e-01, -1.316e-02, -2.975e-02, 1.340e-01, 4.297e-02, 1.181e-01, -1.261e-01, 1.395e-01)); + r += mul(s4_6, M4(6.679e-02, 5.499e-02, 2.737e-02, -3.309e-02, 1.534e-02, -3.395e-02, -6.059e-02, -3.089e-03, -1.570e-01, 5.654e-02, 1.502e-01, 9.533e-02, 1.103e-02, 1.742e-01, 3.309e-02, -1.453e-01)); + r += mul(s4_7, M4(5.906e-02, -5.997e-02, 1.131e-01, 4.987e-02, -2.817e-02, 2.907e-02, 1.089e-01, -1.133e-01, 7.584e-02, 2.061e-03, -1.358e-01, 5.933e-02, -5.213e-02, 2.508e-01, -2.433e-01, 6.354e-03)); + r += mul(s4_8, M4(-3.449e-02, 4.329e-02, -1.792e-02, 6.017e-02, -2.674e-02, 5.704e-02, -5.931e-02, 2.076e-02, -1.127e-02, -1.353e-02, 1.180e-01, 3.554e-02, -2.870e-02, 2.164e-01, -2.961e-02, 3.005e-02)); + r += mul(s5_0, M4(6.372e-02, 1.479e-01, -1.483e-02, -1.061e-02, 1.339e-01, -2.423e-02, 1.041e-02, 5.357e-02, -6.657e-02, 7.154e-03, 4.060e-02, 2.593e-02, -1.669e-02, 6.874e-03, -7.642e-02, -5.352e-03)); + r += mul(s5_1, M4(4.709e-02, 8.202e-02, -1.159e-01, 6.583e-02, -4.216e-02, 2.613e-01, 2.803e-02, 2.027e-02, 4.155e-02, 4.476e-02, -1.150e-01, -6.574e-02, -1.126e-01, -2.907e-02, 1.587e-01, 1.212e-02)); + r += mul(s5_2, M4(5.277e-02, 3.775e-02, 6.608e-03, 1.704e-02, 1.979e-02, 9.919e-02, -6.536e-02, 7.964e-03, -1.121e-01, 3.265e-02, 9.050e-02, 2.277e-02, 3.423e-02, 1.819e-02, 5.676e-02, -6.024e-02)); + r += mul(s5_3, M4(4.058e-02, -8.537e-02, 3.050e-02, 5.222e-02, 1.932e-02, -4.383e-02, 1.210e-02, 8.182e-02, 7.846e-02, -1.675e-02, -1.672e-02, 8.299e-02, -1.128e-01, -2.596e-02, -5.494e-02, 6.638e-02)); + r += mul(s5_4, M4(-5.082e-02, -2.212e-01, 2.288e-01, -1.747e-01, 1.390e-01, 6.747e-02, -9.713e-02, 1.997e-01, -1.472e-01, -1.280e-01, 3.035e-01, 2.744e-02, -1.600e-02, -2.677e-03, -9.433e-02, 8.948e-03)); + r += mul(s5_5, M4(1.866e-02, -1.780e-02, 3.817e-02, 4.707e-02, 3.935e-02, -4.192e-02, -8.523e-02, 1.779e-02, -8.066e-03, -5.122e-03, -9.794e-02, -4.393e-02, 1.824e-02, -3.186e-02, -8.565e-02, 6.290e-02)); + r += mul(s5_6, M4(-7.068e-02, 1.291e-02, 5.946e-02, -7.038e-02, -2.054e-02, -1.145e-02, 8.059e-02, 8.572e-02, -1.750e-02, 5.672e-02, 5.180e-02, -1.358e-03, 2.490e-02, 2.271e-02, 1.013e-01, -6.520e-02)); + r += mul(s5_7, M4(5.704e-02, 2.386e-02, 4.944e-02, -1.178e-01, 6.066e-02, -2.115e-02, 1.655e-01, 3.649e-03, 3.235e-04, 8.419e-02, 4.089e-02, -4.977e-02, -2.662e-02, 5.714e-02, 9.378e-03, 3.368e-02)); + r += mul(s5_8, M4(6.667e-02, -9.522e-03, 8.112e-02, -2.254e-02, -2.724e-02, -2.765e-02, -4.131e-02, 7.624e-03, 1.797e-02, 1.440e-01, -1.185e-01, 6.992e-02, 4.543e-02, 3.386e-02, -7.079e-03, -6.702e-02)); + r += mul(s6_0, M4(-1.054e-01, 2.923e-02, -5.852e-02, 1.290e-02, 4.864e-02, -8.424e-02, -5.442e-02, 6.261e-02, 5.102e-03, -1.792e-02, 4.114e-02, 1.532e-02, 8.135e-02, 2.076e-02, -3.238e-02, 5.559e-02)); + r += mul(s6_1, M4(1.189e-01, 2.617e-02, -7.202e-02, 1.978e-02, -1.336e-01, -5.021e-02, -2.016e-01, -5.546e-02, 4.317e-02, 7.192e-02, 3.775e-02, -6.866e-02, 1.697e-01, -5.667e-02, -4.493e-02, -9.397e-04)); + r += mul(s6_2, M4(-4.501e-02, 3.993e-03, -1.623e-02, 1.275e-02, 1.896e-02, -5.788e-02, -1.222e-02, -1.621e-02, -1.356e-02, -1.976e-02, -4.456e-02, 7.327e-02, -1.872e-02, -6.481e-02, 1.280e-02, -1.286e-01)); + r += mul(s6_3, M4(8.819e-02, -1.474e-02, -2.213e-01, -8.129e-02, -1.848e-02, 1.854e-02, -1.397e-01, 1.339e-01, -8.792e-02, -7.492e-02, -7.482e-03, -8.757e-02, -4.168e-02, -3.177e-03, -1.849e-01, 1.436e-02)); + r += mul(s6_4, M4(3.007e-02, -1.321e-01, 1.018e-01, -1.113e-01, -3.374e-01, 4.874e-02, 6.056e-02, 1.621e-01, 4.237e-03, -5.609e-02, 1.296e-01, 1.055e-01, -5.726e-02, 1.107e-01, 1.367e-01, 1.294e-01)); + r += mul(s6_5, M4(-1.999e-02, -9.000e-02, -6.576e-02, 1.829e-01, 2.109e-02, 5.075e-02, -2.304e-01, -3.625e-02, -4.882e-02, -1.409e-01, -1.035e-01, -6.867e-02, -2.310e-01, -5.908e-02, -2.994e-02, -2.707e-01)); + r += mul(s6_6, M4(-5.752e-02, -9.524e-02, -3.031e-02, -8.189e-03, -5.906e-02, -7.163e-02, 2.148e-02, 4.192e-04, -5.478e-02, -6.803e-02, -1.579e-01, 1.057e-01, 2.721e-02, -3.405e-02, -1.079e-01, -3.397e-02)); + r += mul(s6_7, M4(-9.961e-02, -3.182e-02, 4.586e-02, 2.162e-01, -4.933e-02, -1.123e-01, -1.063e-02, 4.250e-02, -1.083e-02, -6.599e-03, -1.339e-01, 1.093e-01, 1.649e-02, -2.208e-02, 1.595e-01, 2.718e-02)); + r += mul(s6_8, M4(-9.260e-02, -1.435e-01, 1.030e-01, 7.697e-02, -3.604e-02, 7.147e-02, -1.276e-01, 4.137e-02, 2.784e-02, -5.604e-02, -1.362e-02, 1.004e-03, -1.676e-02, -1.384e-01, -5.295e-02, -2.122e-01)); + r += mul(s7_0, M4(5.497e-02, -9.898e-03, -2.150e-02, 5.152e-03, 4.710e-02, -6.431e-02, 1.308e-02, -4.425e-02, 7.768e-03, 1.612e-02, -2.056e-02, 6.943e-02, 7.137e-02, -6.442e-02, 1.614e-02, 2.894e-03)); + r += mul(s7_1, M4(1.227e-01, 2.263e-02, 3.122e-02, -1.013e-01, -2.262e-02, -2.887e-02, -5.720e-02, -2.523e-02, -2.395e-02, 6.261e-02, 3.878e-03, 5.648e-02, 7.266e-02, -7.858e-02, 1.894e-02, -2.674e-02)); + r += mul(s7_2, M4(4.163e-02, 3.618e-02, -9.989e-02, 7.308e-02, -5.009e-03, -6.773e-03, 9.681e-02, -7.296e-03, 1.109e-01, 1.139e-02, 8.472e-02, -5.203e-02, 7.700e-02, -6.769e-02, 1.018e-01, -7.520e-02)); + r += mul(s7_3, M4(2.117e-02, -9.146e-02, -2.653e-02, 1.424e-02, -1.321e-01, 1.180e-01, -1.278e-01, 2.311e-02, -2.951e-02, 6.426e-02, 1.884e-01, -5.583e-02, 1.722e-02, -8.840e-03, -7.276e-02, 2.877e-02)); + r += mul(s7_4, M4(-1.094e-02, 7.987e-02, -6.903e-02, -4.263e-02, 1.432e-01, 6.285e-02, -8.872e-02, 3.220e-02, 8.790e-02, 4.975e-02, -1.061e-01, 1.059e-01, -2.514e-02, 3.016e-02, 4.896e-02, 1.158e-01)); + r += mul(s7_5, M4(-7.765e-02, 5.886e-02, 8.983e-02, 1.892e-02, 7.227e-02, -3.175e-02, 1.829e-02, 9.907e-02, -2.059e-02, 2.523e-02, 2.763e-02, -7.163e-02, -2.238e-02, -1.832e-02, -4.848e-02, -5.891e-02)); + r += mul(s7_6, M4(-9.411e-02, -1.666e-02, -1.545e-03, 2.425e-03, -3.430e-02, -7.519e-02, 5.942e-03, 2.857e-02, 1.001e-01, 5.960e-02, 1.215e-01, 1.813e-02, -4.309e-02, 4.279e-03, -8.778e-03, 5.906e-02)); + r += mul(s7_7, M4(-1.747e-02, 6.884e-03, 1.019e-01, 5.877e-02, -3.948e-02, -6.012e-02, -1.121e-01, 1.085e-01, 4.042e-02, 1.534e-02, 1.680e-02, -3.873e-02, -3.553e-02, -1.062e-02, 6.044e-02, -6.507e-02)); + r += mul(s7_8, M4(-7.981e-04, 5.306e-02, -2.019e-02, -4.539e-02, -8.438e-02, -2.326e-02, 2.243e-01, -2.713e-02, -2.316e-02, -4.965e-02, 1.727e-01, -6.824e-02, 3.320e-02, 2.942e-02, -4.822e-02, -8.593e-02)); + r += V4(-6.209e-03, -2.322e-02, -6.479e-03, 3.211e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.049e-02, 6.111e-02, -1.028e-01, -5.236e-02, 7.108e-02, -3.843e-02, 8.589e-02, 6.198e-02, 2.803e-01, -7.897e-02, 3.982e-02, 1.940e-01, -7.968e-02, 1.248e-02, -6.840e-02, -8.779e-02)); + r += mul(s0_1, M4(-4.217e-02, 7.107e-03, -1.340e-02, -1.964e-03, 7.188e-02, 2.237e-02, 1.352e-01, 4.602e-02, 1.348e-01, -8.037e-02, 1.357e-02, 3.992e-02, 4.499e-02, -5.245e-02, -8.448e-02, -7.756e-02)); + r += mul(s0_2, M4(-6.389e-02, 1.225e-02, -7.624e-02, -2.441e-02, -3.044e-02, 1.794e-03, -8.328e-03, 6.038e-03, -1.468e-02, -3.933e-02, -1.075e-01, 1.055e-01, -2.344e-02, 1.867e-02, 7.679e-02, 4.558e-02)); + r += mul(s0_3, M4(6.966e-03, 1.902e-01, 4.513e-02, 3.581e-02, -5.264e-02, 7.647e-02, 4.587e-02, -1.090e-02, 7.197e-02, -2.279e-01, -1.849e-01, -1.908e-01, -1.105e-01, 1.526e-01, -3.105e-02, -5.915e-02)); + r += mul(s0_4, M4(8.242e-02, -2.377e-02, -2.679e-03, -9.327e-02, 1.464e-01, -4.084e-02, -6.155e-02, -7.298e-02, -4.436e-02, -1.357e-01, 2.946e-03, -9.301e-02, -3.393e-02, -2.176e-02, 6.742e-02, 1.500e-01)); + r += mul(s0_5, M4(8.052e-03, 9.869e-02, 7.559e-02, 3.931e-02, -7.694e-02, 1.427e-02, 1.037e-01, 5.893e-03, -8.789e-02, -4.602e-02, -2.800e-02, 5.525e-02, -2.218e-01, -2.230e-02, 5.218e-02, -3.408e-02)); + r += mul(s0_6, M4(3.175e-03, 1.260e-02, 6.000e-03, -5.241e-02, -2.239e-03, -1.598e-02, 7.361e-02, 2.156e-02, -3.416e-02, -9.550e-02, -2.809e-02, -1.343e-01, 1.769e-02, -1.639e-02, -9.413e-02, -9.222e-02)); + r += mul(s0_7, M4(-1.767e-02, -8.772e-02, -2.203e-02, 2.698e-02, -3.413e-02, 4.100e-02, -1.129e-02, 8.770e-02, 1.864e-03, 5.817e-02, -3.644e-02, 7.676e-02, -1.540e-01, 1.908e-01, -6.473e-03, 7.013e-02)); + r += mul(s0_8, M4(-1.066e-02, -8.715e-02, 1.794e-02, -1.718e-02, 3.849e-02, 7.916e-02, -4.488e-02, 3.583e-02, 1.228e-01, -1.129e-01, 7.438e-02, -3.504e-02, -3.830e-02, 1.527e-01, -1.025e-01, -3.254e-02)); + r += mul(s1_0, M4(-4.279e-02, 2.805e-02, -6.478e-03, -6.321e-02, -1.135e-01, -1.576e-01, -2.845e-01, 1.893e-02, -1.380e-01, 5.029e-02, -1.375e-02, 7.180e-02, 6.596e-02, 6.460e-03, 4.896e-02, -1.015e-01)); + r += mul(s1_1, M4(6.118e-02, 5.593e-02, 2.367e-02, -2.426e-02, 1.968e-01, -1.561e-01, 2.239e-02, -2.262e-02, -1.069e-01, 1.221e-01, 2.280e-01, -4.787e-02, 8.200e-03, -1.714e-02, -1.427e-01, -3.809e-02)); + r += mul(s1_2, M4(9.895e-02, 1.919e-02, -4.780e-02, -8.210e-03, 3.898e-02, 1.383e-02, -8.105e-02, 5.345e-03, 2.320e-03, 5.456e-03, -2.477e-02, 5.313e-02, 7.198e-02, 2.065e-02, 2.893e-02, -2.668e-02)); + r += mul(s1_3, M4(-1.142e-02, 1.964e-01, 2.138e-01, -3.753e-02, -1.707e-01, -3.177e-03, -2.294e-01, 4.555e-02, 3.510e-02, -5.653e-02, -9.684e-02, 6.350e-02, 4.124e-02, 1.055e-01, 4.750e-02, -2.276e-01)); + r += mul(s1_4, M4(6.411e-02, -1.365e-01, -1.404e-01, 4.621e-02, 1.895e-01, -4.308e-01, 3.777e-03, -3.179e-02, -1.489e-01, -2.070e-03, 1.834e-02, -1.492e-02, 9.748e-02, -3.045e-02, 1.195e-01, 2.917e-01)); + r += mul(s1_5, M4(6.094e-02, 8.341e-02, 6.781e-03, 6.353e-02, 9.032e-02, 1.040e-01, -4.046e-02, -1.785e-01, -3.653e-02, 5.541e-02, -8.072e-02, -2.779e-02, -6.816e-02, -8.345e-02, 7.266e-02, -4.618e-03)); + r += mul(s1_6, M4(5.829e-02, 1.015e-01, 1.323e-01, -7.664e-02, 7.754e-04, -2.737e-02, 1.702e-02, 4.195e-02, -4.931e-04, 7.661e-02, 9.495e-03, 1.009e-01, 3.830e-02, -6.200e-02, -2.071e-02, -9.456e-02)); + r += mul(s1_7, M4(-1.042e-01, -3.669e-02, -5.626e-02, 9.150e-02, -3.230e-02, 1.431e-03, -1.231e-01, 2.449e-05, -7.608e-03, 4.813e-02, 5.640e-02, 6.504e-02, 8.351e-02, 1.287e-01, 1.897e-02, 1.281e-01)); + r += mul(s1_8, M4(6.730e-02, -3.591e-03, -8.770e-02, -2.447e-02, 5.591e-02, 1.574e-02, 1.151e-02, 5.971e-02, 9.974e-02, 8.794e-02, -9.162e-03, 8.676e-02, -5.142e-02, -3.354e-02, -6.216e-03, -9.497e-03)); + r += mul(s2_0, M4(1.247e-01, -1.061e-01, 7.567e-02, -9.196e-02, -4.168e-02, 5.182e-02, -2.263e-02, 2.585e-02, 7.074e-02, -4.756e-02, -8.012e-02, 5.472e-02, 2.105e-01, -1.859e-01, 2.797e-02, -2.537e-02)); + r += mul(s2_1, M4(-1.643e-02, 8.011e-02, -6.775e-02, 8.476e-02, -9.097e-02, 1.363e-01, -3.407e-03, 3.541e-02, 5.825e-02, -2.914e-02, -5.662e-02, 4.324e-02, 4.588e-02, 7.110e-02, 5.353e-02, 1.324e-01)); + r += mul(s2_2, M4(-5.658e-02, -1.516e-02, 1.473e-01, 9.355e-02, -1.680e-02, -5.163e-03, -3.425e-02, -1.223e-02, 2.527e-02, 8.016e-05, 5.707e-03, 4.130e-02, -9.216e-04, 8.233e-02, -7.631e-02, -8.187e-02)); + r += mul(s2_3, M4(2.722e-01, -1.805e-02, 2.268e-01, -1.378e-01, 3.957e-01, 4.214e-03, 3.564e-02, -3.481e-01, 1.546e-02, -6.001e-02, -8.961e-02, -1.552e-01, -1.299e-01, -1.272e-01, -2.106e-01, -2.713e-02)); + r += mul(s2_4, M4(-1.397e-01, -1.281e-01, 2.438e-02, -2.740e-01, -2.735e-02, -2.233e-01, 3.683e-02, 1.080e-01, -1.178e-01, 3.885e-02, 6.368e-02, -6.460e-02, 1.818e-01, 2.601e-02, -8.447e-02, -6.402e-02)); + r += mul(s2_5, M4(-2.968e-02, 7.313e-03, 1.234e-01, 6.614e-02, 2.951e-02, -4.474e-02, 3.406e-02, 9.092e-02, -1.723e-03, 9.511e-02, -1.077e-01, -1.058e-01, -5.453e-02, -5.614e-02, 1.976e-01, 1.356e-01)); + r += mul(s2_6, M4(2.218e-03, -8.320e-03, 1.214e-02, -2.105e-01, 7.525e-02, 8.546e-02, -1.028e-01, -1.400e-01, 1.727e-02, -4.834e-02, -2.688e-02, 8.080e-02, -2.590e-01, 9.844e-03, -1.536e-01, 8.033e-02)); + r += mul(s2_7, M4(2.509e-02, -9.194e-02, 2.487e-02, -2.968e-01, -1.407e-01, -5.506e-02, -1.618e-01, -2.409e-01, -5.373e-02, -5.760e-02, 1.002e-02, 6.405e-02, -1.862e-01, -3.112e-01, -1.192e-01, -9.978e-02)); + r += mul(s2_8, M4(-1.461e-02, -1.308e-02, 4.011e-02, -1.234e-02, -8.379e-02, -5.492e-02, 2.864e-02, 2.979e-03, 7.257e-02, 7.612e-03, 9.477e-02, 7.681e-02, -1.820e-01, 6.211e-02, 8.262e-03, 1.077e-01)); + r += mul(s3_0, M4(-6.968e-02, -5.070e-02, -2.716e-03, -3.253e-02, 8.401e-02, 3.863e-02, 1.304e-01, -6.395e-02, -2.248e-01, -1.729e-02, -1.963e-01, 1.173e-01, -4.031e-02, 1.214e-03, 4.012e-02, 6.283e-02)); + r += mul(s3_1, M4(-7.261e-02, -6.832e-02, -9.888e-02, -2.210e-02, 8.176e-02, 3.296e-02, -5.889e-02, -1.627e-02, -1.181e-01, -4.543e-02, -2.259e-01, -1.334e-01, 9.064e-02, -1.367e-02, 2.490e-02, -2.786e-02)); + r += mul(s3_2, M4(-7.879e-02, -1.776e-02, 8.378e-02, -2.233e-02, -3.494e-02, -1.043e-01, -2.752e-02, 3.997e-02, 3.593e-02, -1.131e-02, -2.270e-03, -1.207e-01, -4.468e-02, 5.429e-02, 4.384e-02, 2.097e-02)); + r += mul(s3_3, M4(-4.782e-02, 6.620e-02, 4.331e-02, -4.935e-02, -1.376e-01, 1.304e-01, 1.418e-01, -1.366e-01, -1.422e-01, 9.409e-02, -1.143e-02, 4.999e-02, 3.565e-02, -9.909e-02, -1.052e-01, 2.936e-02)); + r += mul(s3_4, M4(-3.187e-02, 5.804e-02, 8.012e-02, -1.070e-01, -9.085e-02, 8.674e-02, -1.134e-01, -1.266e-01, -1.155e-01, 5.538e-02, -1.342e-01, 1.065e-01, 9.555e-02, 8.111e-02, -1.443e-03, -3.278e-02)); + r += mul(s3_5, M4(-4.139e-02, 1.685e-01, 1.200e-02, -3.564e-02, -1.144e-02, -1.103e-01, -1.092e-01, 1.229e-03, 1.426e-02, 3.171e-02, 6.061e-02, -1.785e-02, -2.598e-03, 1.907e-02, 1.974e-02, -4.434e-02)); + r += mul(s3_6, M4(-1.181e-01, -1.052e-02, 6.572e-02, 7.846e-02, -6.855e-02, 8.623e-03, 6.590e-02, -7.199e-02, 5.471e-02, -2.935e-02, 7.180e-02, 3.095e-01, -6.600e-02, -1.445e-01, -8.351e-02, 2.923e-02)); + r += mul(s3_7, M4(8.374e-03, 5.133e-02, 5.361e-02, 5.602e-03, -2.386e-02, 1.944e-01, 4.868e-02, 9.251e-02, 8.391e-02, -3.245e-02, -2.114e-01, 1.310e-01, -5.889e-02, -7.836e-02, 3.905e-02, 5.868e-02)); + r += mul(s3_8, M4(1.170e-02, -2.110e-02, 1.028e-01, 3.098e-02, 8.379e-02, -7.586e-02, 2.728e-02, 2.511e-02, 2.429e-02, 1.361e-02, 1.711e-02, -6.559e-02, -1.880e-02, 3.721e-02, -6.136e-02, 1.502e-02)); + r += mul(s4_0, M4(-2.903e-02, 2.782e-02, 3.637e-02, 4.470e-02, -6.718e-02, -6.209e-03, -1.784e-01, 4.864e-02, -1.289e-01, 9.147e-02, -6.943e-02, -1.060e-04, 1.179e-02, -6.628e-02, 7.751e-02, 8.653e-02)); + r += mul(s4_1, M4(-4.804e-02, 1.197e-01, 1.061e-04, -1.308e-01, 8.009e-02, -1.467e-02, -7.909e-02, -7.073e-02, 1.717e-02, 3.563e-03, 6.698e-02, -6.229e-03, 2.390e-01, -7.585e-02, 1.767e-01, -8.543e-03)); + r += mul(s4_2, M4(8.455e-04, -6.319e-02, -1.001e-01, 5.009e-03, 8.602e-02, 2.202e-02, 1.195e-02, -1.289e-02, -1.784e-02, 1.404e-02, -5.001e-02, -2.940e-02, 4.222e-02, 9.494e-02, 1.836e-02, 1.291e-01)); + r += mul(s4_3, M4(-5.270e-02, 3.443e-02, 1.567e-02, 2.300e-02, -1.383e-01, 9.547e-03, -1.708e-01, 5.681e-02, -6.158e-02, 6.746e-02, -3.332e-02, 5.301e-02, -3.292e-02, -3.047e-02, 1.798e-01, 1.061e-01)); + r += mul(s4_4, M4(-2.134e-01, 1.616e-01, -1.514e-01, -5.183e-02, 9.008e-02, -2.940e-01, -1.443e-01, 4.565e-02, 1.334e-01, 4.397e-02, 1.387e-01, -4.403e-02, 1.702e-01, -3.888e-02, 2.274e-01, -3.092e-02)); + r += mul(s4_5, M4(-4.684e-02, 1.135e-01, -1.655e-02, -5.954e-02, -1.843e-03, -7.895e-02, -4.797e-02, 5.951e-02, -8.364e-02, 1.026e-02, 6.500e-02, 1.285e-02, -6.667e-02, 8.180e-02, 1.613e-01, 6.563e-02)); + r += mul(s4_6, M4(-2.634e-04, 9.299e-02, 7.852e-02, 8.226e-02, -1.197e-02, 3.085e-02, -1.568e-01, -1.472e-02, -3.594e-02, 7.249e-02, -3.770e-02, 3.388e-02, 8.718e-02, -1.001e-01, 7.622e-03, 1.176e-01)); + r += mul(s4_7, M4(2.207e-02, 2.432e-01, 3.523e-02, 1.816e-01, -9.710e-02, 4.736e-03, -1.222e-01, 3.955e-02, -4.860e-02, -7.728e-02, 5.649e-02, 2.784e-02, 1.035e-01, -1.065e-01, 9.028e-02, -1.230e-01)); + r += mul(s4_8, M4(2.260e-02, -7.260e-02, 3.843e-03, 8.803e-02, -5.237e-02, -1.789e-02, 3.016e-02, -4.373e-03, 1.306e-01, 2.003e-02, -9.999e-02, 2.054e-03, -1.402e-01, 7.245e-02, 1.464e-01, -8.916e-03)); + r += mul(s5_0, M4(-1.601e-02, 1.393e-02, 1.181e-02, 3.394e-03, -1.229e-01, 6.802e-02, -1.842e-01, -6.766e-02, 7.230e-02, 1.347e-02, 5.686e-02, -4.203e-02, -3.612e-02, -1.982e-02, -1.188e-01, -2.902e-02)); + r += mul(s5_1, M4(-4.992e-02, 1.008e-01, -1.494e-02, -1.397e-01, -1.269e-01, 8.456e-02, -1.805e-01, -2.866e-01, 8.366e-02, -1.583e-02, -4.218e-02, 5.203e-02, -6.240e-03, 2.175e-03, 9.360e-03, -3.346e-02)); + r += mul(s5_2, M4(5.649e-02, 1.308e-02, -1.441e-01, 9.456e-03, -4.964e-03, 3.503e-02, -4.653e-02, -4.979e-02, 3.381e-02, 2.546e-02, -7.040e-03, -6.521e-02, 1.067e-02, 1.476e-03, 1.089e-02, -1.066e-02)); + r += mul(s5_3, M4(2.385e-01, 8.693e-02, 5.615e-02, 7.840e-02, -8.442e-02, 2.659e-01, -2.063e-01, 3.308e-02, 1.993e-01, -1.056e-01, 1.347e-01, 5.594e-02, -1.517e-02, -7.605e-03, -2.544e-02, -2.971e-02)); + r += mul(s5_4, M4(1.917e-01, 1.077e-01, 2.896e-02, 9.657e-02, 1.121e-01, -2.141e-01, 1.619e-02, -1.274e-01, -1.621e-01, -1.772e-02, -1.214e-01, 7.936e-03, 8.032e-02, 4.870e-02, 5.302e-02, 1.499e-01)); + r += mul(s5_5, M4(-9.687e-02, 6.926e-02, 4.666e-02, 2.300e-02, 1.611e-01, 1.829e-02, -1.539e-01, 6.384e-02, -8.200e-02, -4.745e-02, 9.795e-02, 1.086e-01, -2.475e-02, -3.007e-02, 3.883e-02, 4.477e-02)); + r += mul(s5_6, M4(4.059e-02, -1.257e-01, -3.480e-02, -6.760e-02, 5.015e-03, 2.947e-01, 4.345e-02, 9.616e-02, 3.028e-02, -1.256e-01, -7.291e-02, -2.048e-01, -5.949e-02, -5.499e-02, -6.628e-02, 1.118e-01)); + r += mul(s5_7, M4(3.351e-02, -8.789e-02, -1.121e-01, -2.031e-01, -7.557e-02, -6.257e-02, 1.119e-01, -8.362e-02, -1.950e-01, 4.202e-02, 1.407e-01, -1.596e-01, 4.333e-03, -1.290e-01, 5.861e-02, -1.871e-02)); + r += mul(s5_8, M4(-1.307e-01, -7.025e-02, 1.999e-04, 2.356e-02, 6.944e-02, -6.780e-02, -5.046e-02, -1.036e-01, -1.561e-01, -5.692e-02, -3.962e-03, 1.088e-02, -7.979e-02, 3.374e-02, -2.321e-02, -3.199e-02)); + r += mul(s6_0, M4(-1.385e-01, 4.381e-02, -2.158e-01, 4.605e-02, 1.471e-02, 7.279e-03, 6.032e-02, 5.920e-02, 1.219e-01, -6.779e-03, 7.797e-02, -7.467e-02, -2.764e-02, 4.501e-02, 1.588e-01, 4.100e-02)); + r += mul(s6_1, M4(-2.624e-01, 8.061e-02, 2.686e-02, -4.546e-02, 2.055e-01, 1.347e-02, 2.128e-01, -7.747e-02, 2.447e-01, -3.511e-02, 1.096e-01, -5.124e-02, 1.995e-01, 2.708e-03, -1.469e-01, 1.762e-01)); + r += mul(s6_2, M4(1.169e-01, 1.378e-02, -1.995e-02, -1.262e-01, 8.891e-02, 4.851e-02, 5.745e-03, 7.311e-03, 5.321e-02, 3.894e-02, 9.423e-02, 3.472e-02, 9.081e-02, -1.221e-01, -1.803e-02, -3.834e-02)); + r += mul(s6_3, M4(2.919e-01, -4.288e-02, 4.526e-02, 1.353e-01, 7.313e-02, -1.146e-01, 1.485e-01, -1.512e-01, 4.126e-02, -1.755e-02, 4.917e-02, -3.759e-02, -2.168e-01, -3.541e-03, 1.531e-01, -1.089e-01)); + r += mul(s6_4, M4(1.324e-01, 1.892e-02, 6.047e-02, 5.046e-02, -1.281e-01, 1.125e-02, -1.346e-01, -1.634e-01, 1.617e-01, -3.114e-02, 1.048e-01, 5.844e-02, 6.869e-02, -4.318e-02, 7.772e-02, -2.413e-02)); + r += mul(s6_5, M4(6.001e-02, -9.286e-03, -1.376e-02, -3.059e-02, -1.660e-02, -1.350e-01, 4.725e-02, 2.385e-02, -2.122e-02, 8.138e-02, 8.902e-02, 7.671e-02, -2.388e-02, -3.311e-01, 5.147e-02, 6.933e-02)); + r += mul(s6_6, M4(-7.691e-02, -3.586e-02, -2.052e-01, 6.760e-02, -2.289e-02, -3.130e-02, -1.099e-02, -1.740e-01, -1.143e-02, -6.989e-02, -1.834e-01, -2.093e-01, 3.919e-02, -4.511e-02, 2.687e-02, -1.602e-03)); + r += mul(s6_7, M4(9.664e-02, -1.343e-01, -6.979e-02, -4.653e-02, -1.119e-01, 1.621e-01, 4.385e-02, -6.588e-02, 1.298e-01, -1.788e-01, -1.862e-02, -3.076e-01, 9.194e-02, -3.679e-02, -1.679e-01, 4.619e-02)); + r += mul(s6_8, M4(-8.280e-02, 5.923e-02, -1.304e-01, -5.302e-02, 1.437e-02, -1.157e-01, -1.198e-01, 7.447e-03, -1.042e-01, 6.184e-02, 5.286e-02, 5.646e-02, -4.282e-02, -2.231e-01, 3.049e-02, -2.179e-02)); + r += mul(s7_0, M4(3.199e-02, 9.747e-03, -3.185e-02, 2.609e-02, 3.812e-02, -1.070e-01, 7.742e-02, 6.550e-02, -1.438e-01, 8.593e-02, -7.431e-02, -5.507e-02, -3.623e-02, 2.192e-02, -7.684e-02, 8.442e-03)); + r += mul(s7_1, M4(-2.091e-02, 4.231e-02, 9.332e-03, -8.065e-03, 6.009e-02, -5.314e-02, 7.690e-02, 6.717e-02, -9.374e-02, -4.819e-02, -2.180e-02, -8.335e-02, 2.130e-02, 9.632e-03, -6.324e-02, 2.606e-02)); + r += mul(s7_2, M4(1.019e-02, -3.432e-02, 3.118e-02, -2.029e-02, 8.550e-02, 1.040e-02, -2.055e-02, -1.994e-02, -5.136e-02, 1.527e-02, -1.303e-02, -4.715e-03, -2.850e-03, 7.002e-02, -2.064e-02, 3.552e-03)); + r += mul(s7_3, M4(3.422e-02, -1.958e-01, -9.659e-02, 9.334e-02, -1.249e-01, 3.489e-02, -8.189e-03, -9.950e-02, -2.292e-01, 1.062e-01, -9.947e-02, 7.557e-02, -6.335e-02, -2.233e-02, 4.335e-02, -3.771e-02)); + r += mul(s7_4, M4(-1.342e-01, 1.333e-01, -1.353e-02, -9.543e-02, 1.696e-01, -6.520e-02, 2.230e-02, -7.316e-02, -2.897e-01, 8.430e-03, -1.392e-01, 3.032e-02, -2.450e-02, 6.235e-02, -4.062e-02, -1.210e-01)); + r += mul(s7_5, M4(-3.361e-02, 4.276e-02, 4.163e-02, -1.434e-01, -2.628e-03, -5.990e-02, -9.169e-02, 3.404e-02, 2.080e-02, 4.274e-02, 5.379e-02, -1.103e-01, -1.540e-01, -2.073e-01, -2.857e-02, 4.924e-02)); + r += mul(s7_6, M4(-1.460e-03, -1.044e-01, -1.040e-01, -9.133e-02, 9.227e-02, 3.829e-02, 6.551e-02, 1.675e-02, 2.176e-02, 2.104e-02, 4.837e-02, 1.493e-01, -5.540e-03, 3.481e-02, 2.152e-02, 6.388e-02)); + r += mul(s7_7, M4(-7.845e-02, 4.662e-03, 3.218e-02, -4.691e-02, 3.992e-02, -5.042e-02, -4.760e-02, -1.711e-02, 1.496e-01, -2.058e-01, -4.183e-02, 1.387e-01, 7.547e-02, 4.298e-02, -3.687e-02, -4.766e-02)); + r += mul(s7_8, M4(-1.745e-02, -9.501e-02, -7.995e-02, 7.595e-02, -3.949e-02, 8.468e-02, -2.336e-02, -6.546e-02, 6.669e-02, -1.837e-02, 9.980e-02, -4.351e-02, -1.122e-01, -6.903e-02, 2.983e-02, 2.586e-02)); + r += V4(-1.131e-02, -6.210e-03, -1.359e-03, 7.029e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.366e-02, -4.625e-02, -1.679e-05, -7.055e-02, -5.745e-02, 4.328e-02, -4.737e-02, 3.704e-02, 1.266e-01, -7.905e-02, 4.610e-02, 4.622e-02, -1.638e-03, -3.242e-03, -5.213e-02, 1.498e-02)); + r += mul(s0_1, M4(-4.821e-02, 7.161e-02, 1.783e-02, 2.136e-02, -1.947e-02, 1.331e-02, -8.429e-02, -6.556e-02, -3.016e-02, -3.791e-02, 6.900e-02, 1.032e-01, 6.970e-02, 4.629e-02, -2.488e-04, 4.221e-02)); + r += mul(s0_2, M4(-6.345e-02, -3.040e-02, -1.762e-02, -3.030e-02, 9.966e-03, 3.699e-02, -2.345e-02, 1.284e-01, 7.479e-02, 2.658e-02, 9.825e-02, 2.448e-03, -1.423e-02, 4.787e-03, -5.916e-03, -8.537e-03)); + r += mul(s0_3, M4(-6.308e-02, 6.292e-02, 8.276e-02, 8.984e-03, 1.197e-01, -2.621e-02, -1.714e-01, 1.645e-02, 7.338e-03, 1.754e-01, 3.165e-02, -4.041e-02, 1.649e-01, -1.146e-01, -2.231e-03, 1.743e-02)); + r += mul(s0_4, M4(1.518e-01, 4.422e-02, -2.570e-02, 4.261e-02, -4.614e-02, -1.646e-02, 3.071e-02, 1.416e-02, -1.199e-01, 7.698e-02, 7.249e-02, -2.018e-01, -6.424e-02, -6.171e-02, 3.563e-02, 9.237e-02)); + r += mul(s0_5, M4(6.048e-02, -2.544e-02, -3.806e-02, 6.680e-02, 5.752e-02, -3.774e-02, -1.997e-02, 3.231e-02, -1.899e-02, 1.783e-03, 6.195e-02, -8.508e-02, 1.436e-02, -7.429e-02, -1.926e-03, 4.569e-02)); + r += mul(s0_6, M4(-6.497e-02, -4.285e-02, -1.938e-02, 6.492e-02, -6.409e-02, 7.374e-03, 3.397e-03, -1.020e-02, -8.272e-02, 9.002e-02, 7.601e-02, 7.153e-02, -2.317e-01, -9.008e-02, -1.715e-01, 3.595e-02)); + r += mul(s0_7, M4(-2.158e-02, 8.395e-02, 1.559e-02, 2.708e-02, 4.025e-02, 1.614e-02, 3.012e-02, -6.034e-02, 1.020e-01, -5.890e-02, -9.097e-03, 5.405e-02, 6.755e-02, 8.994e-02, 9.884e-02, -3.034e-01)); + r += mul(s0_8, M4(-2.835e-02, 2.795e-02, 3.700e-02, -1.932e-02, 6.881e-02, 1.218e-01, -4.342e-02, -3.492e-02, 7.657e-03, -1.581e-01, 1.688e-02, -2.516e-02, 1.564e-02, 2.219e-01, -3.715e-04, -7.197e-02)); + r += mul(s1_0, M4(1.660e-02, -8.455e-02, -7.154e-02, -1.514e-02, -1.952e-02, -1.193e-01, -2.445e-02, -7.453e-02, 3.252e-02, -2.370e-01, -9.123e-02, 3.936e-02, -6.490e-02, 4.494e-02, -3.137e-02, -4.313e-02)); + r += mul(s1_1, M4(-7.434e-02, -2.393e-02, 8.495e-02, 1.451e-01, 5.770e-02, -2.610e-01, -3.929e-02, -8.307e-02, 1.142e-02, 6.720e-03, 2.579e-02, -6.570e-03, 3.456e-02, 1.460e-01, -1.962e-03, 1.058e-01)); + r += mul(s1_2, M4(-2.486e-02, 9.410e-02, -2.703e-02, -6.042e-02, -9.268e-02, -9.214e-02, 1.771e-02, -8.276e-02, 3.998e-02, -3.503e-02, 6.499e-03, 7.234e-02, 1.121e-02, -1.753e-02, 1.311e-02, 2.380e-02)); + r += mul(s1_3, M4(-1.391e-01, -8.952e-02, 1.701e-01, 8.006e-02, 1.100e-01, -2.031e-01, -2.448e-01, -8.485e-02, -1.531e-01, -6.221e-02, 1.598e-01, 1.236e-02, 3.185e-01, 2.210e-02, -6.903e-03, 1.081e-02)); + r += mul(s1_4, M4(1.768e-01, -1.338e-01, 4.228e-02, -1.443e-01, 1.189e-01, -1.057e-01, 3.814e-02, 3.880e-02, -7.824e-02, 8.927e-02, -4.384e-02, 1.260e-01, -1.051e-01, 1.095e-01, 2.348e-01, -7.785e-02)); + r += mul(s1_5, M4(3.181e-02, -2.058e-02, -2.484e-02, 2.095e-01, -1.002e-01, 6.903e-02, 9.806e-02, -8.338e-02, -4.461e-02, 1.080e-01, -4.480e-02, -1.608e-01, -2.168e-02, 1.402e-01, 9.112e-02, 3.137e-02)); + r += mul(s1_6, M4(-7.153e-02, -9.714e-03, -3.590e-02, -1.857e-02, -7.464e-02, -2.786e-01, -2.914e-02, 2.612e-02, 8.381e-02, 4.730e-02, -1.727e-02, 1.002e-01, -6.640e-03, -6.506e-02, 4.961e-02, 1.271e-01)); + r += mul(s1_7, M4(1.086e-01, -1.354e-02, -4.072e-02, -1.361e-02, -1.088e-02, -7.620e-02, 2.137e-02, -5.935e-02, 1.402e-01, -4.279e-02, -5.225e-02, -8.616e-04, -2.528e-02, -5.778e-02, 7.600e-02, 1.156e-01)); + r += mul(s1_8, M4(-2.794e-02, -4.535e-02, 7.499e-02, 7.316e-02, 4.056e-02, -9.792e-03, -3.258e-02, 3.545e-02, 4.778e-02, 2.453e-02, 1.394e-02, -3.347e-02, 1.539e-02, -1.344e-02, -1.629e-02, 8.918e-02)); + r += mul(s2_0, M4(-4.724e-02, 3.187e-02, -1.119e-01, -1.039e-01, 1.237e-01, -1.120e-01, -1.534e-01, -3.663e-02, 6.038e-02, 1.295e-02, -5.587e-02, 2.924e-02, -1.583e-01, 4.710e-02, 1.087e-01, 1.085e-01)); + r += mul(s2_1, M4(-1.190e-01, 1.962e-01, -4.033e-02, 4.048e-02, 4.540e-02, -1.960e-01, -3.113e-02, -1.396e-03, -1.057e-01, -1.123e-01, 4.961e-02, 1.273e-01, -6.559e-02, -1.155e-02, -1.244e-01, 9.591e-02)); + r += mul(s2_2, M4(8.596e-02, -1.154e-01, 1.762e-02, 7.911e-02, -4.364e-02, -4.480e-02, -5.516e-02, -9.699e-03, -2.152e-02, 5.203e-02, 1.410e-02, -7.408e-02, -9.102e-02, -1.625e-01, -3.805e-02, -9.314e-02)); + r += mul(s2_3, M4(2.788e-03, 1.312e-01, -8.558e-02, 2.490e-01, 4.095e-02, 1.776e-01, 1.262e-01, -1.899e-02, -9.067e-02, 8.022e-02, 9.244e-02, 1.264e-01, -1.084e-01, 3.207e-01, 1.156e-01, 2.557e-01)); + r += mul(s2_4, M4(-1.319e-01, 1.857e-01, -5.472e-02, -5.962e-02, -1.585e-02, -6.869e-02, 1.136e-01, -1.332e-01, 1.490e-01, -1.100e-02, -1.314e-01, -1.097e-02, 2.497e-02, 1.317e-03, -2.269e-02, 4.258e-02)); + r += mul(s2_5, M4(8.058e-02, 2.096e-02, -5.652e-02, 8.678e-02, 8.261e-03, 7.981e-02, -2.228e-02, -8.695e-02, -9.517e-02, 1.984e-03, -1.027e-01, 1.134e-01, 8.701e-02, -8.857e-02, -1.051e-01, 2.998e-01)); + r += mul(s2_6, M4(-1.376e-01, 2.994e-01, 2.178e-01, 1.225e-01, 2.191e-02, -1.421e-01, -6.894e-02, 5.654e-02, -4.585e-02, 7.526e-02, 2.837e-02, -9.054e-02, 2.730e-01, 6.572e-02, -1.035e-02, -5.567e-02)); + r += mul(s2_7, M4(-5.962e-02, 9.766e-02, 1.996e-02, -9.993e-02, 1.208e-02, -2.710e-01, 1.139e-01, -7.663e-02, -5.946e-03, 1.802e-04, 4.340e-02, -1.143e-02, 9.620e-02, 1.497e-01, 1.258e-01, 2.478e-01)); + r += mul(s2_8, M4(1.097e-03, 8.520e-02, -3.726e-02, -6.033e-02, 2.829e-02, -9.984e-02, 1.767e-02, -3.130e-02, 4.302e-02, 1.348e-01, 1.141e-02, 3.356e-02, 6.218e-02, -1.368e-01, -7.196e-02, -3.032e-03)); + r += mul(s3_0, M4(-5.851e-02, -2.836e-02, -7.747e-02, -7.154e-02, 5.586e-03, 1.857e-02, -3.549e-02, 2.444e-02, -1.147e-01, -1.289e-01, -1.063e-01, -9.112e-02, -1.265e-01, 4.177e-03, -5.100e-02, -8.817e-02)); + r += mul(s3_1, M4(-3.374e-02, -5.017e-02, -6.799e-02, -8.030e-02, -6.027e-02, 2.000e-02, -9.070e-02, 1.255e-02, -2.058e-02, 5.366e-02, -1.589e-01, -5.098e-02, -8.296e-03, 2.337e-02, 8.431e-03, -6.655e-02)); + r += mul(s3_2, M4(9.613e-03, 3.160e-04, 1.466e-02, -4.255e-02, -3.482e-02, 5.285e-02, -2.864e-02, 3.214e-02, -1.187e-01, 7.668e-02, -1.474e-02, -6.664e-02, -2.491e-02, 3.979e-02, -4.651e-02, -3.809e-03)); + r += mul(s3_3, M4(1.401e-01, -2.754e-02, -1.067e-01, -4.772e-02, 2.739e-01, 3.329e-02, -1.982e-01, -1.526e-01, 7.079e-02, 8.003e-02, -5.168e-02, -1.645e-01, -5.493e-02, 4.557e-02, 5.902e-02, 1.539e-02)); + r += mul(s3_4, M4(6.088e-03, -4.484e-02, -2.159e-01, -7.034e-02, -9.688e-02, 6.970e-02, -9.132e-02, -1.124e-02, 3.162e-02, -9.333e-02, -1.193e-01, -1.147e-01, 5.849e-02, 2.679e-02, -1.136e-01, 1.677e-01)); + r += mul(s3_5, M4(2.219e-02, 1.462e-01, -1.562e-03, 8.097e-02, 3.727e-02, 2.447e-02, 2.518e-02, -8.378e-03, 3.070e-02, 1.205e-01, -1.129e-01, -2.044e-02, 8.378e-02, 1.016e-02, 4.320e-03, -1.588e-02)); + r += mul(s3_6, M4(-3.378e-02, -2.538e-02, -6.935e-02, -1.635e-02, -1.603e-01, -4.665e-02, -1.968e-02, -9.107e-02, 1.206e-01, -1.389e-01, -1.143e-01, -1.128e-01, 1.049e-01, 1.733e-02, 8.835e-03, 1.055e-02)); + r += mul(s3_7, M4(5.519e-02, 4.811e-02, -1.458e-02, -4.860e-02, 2.804e-02, 5.491e-02, 6.081e-02, -8.758e-02, 7.463e-02, 3.244e-02, -1.801e-01, 3.040e-02, 6.663e-02, 2.937e-02, -6.186e-02, -4.246e-02)); + r += mul(s3_8, M4(3.751e-02, 7.449e-02, -1.401e-03, -6.031e-04, -7.415e-04, 2.114e-02, 9.190e-03, -1.090e-02, 5.044e-02, 9.171e-02, -5.598e-02, -4.324e-02, -5.242e-02, 2.832e-02, -2.034e-02, 3.673e-02)); + r += mul(s4_0, M4(2.718e-02, 3.039e-03, 5.032e-03, 6.351e-03, -1.961e-01, 1.204e-01, -4.799e-03, -9.374e-02, -1.310e-01, 1.085e-02, -4.200e-02, -1.989e-03, -6.987e-02, -3.062e-02, -2.286e-02, -1.975e-03)); + r += mul(s4_1, M4(5.857e-03, 3.052e-02, -3.996e-04, -1.045e-01, 9.168e-02, -4.770e-02, 7.762e-02, 1.046e-01, -5.771e-03, -1.048e-01, -4.878e-02, -2.485e-02, -3.981e-02, -9.546e-02, 1.040e-01, 5.840e-02)); + r += mul(s4_2, M4(-6.438e-02, 6.032e-02, -4.095e-03, 1.282e-02, -6.417e-03, 1.891e-02, -1.130e-02, 1.321e-02, 2.031e-02, 7.678e-04, -3.628e-02, 1.251e-02, -4.912e-02, -1.129e-01, 4.968e-02, 5.214e-02)); + r += mul(s4_3, M4(1.093e-01, -4.284e-02, -7.542e-02, 5.665e-02, -6.118e-03, 5.012e-03, -9.555e-02, -8.125e-02, 1.796e-01, 4.334e-02, 2.716e-02, 1.090e-01, 2.663e-02, 6.959e-02, -1.524e-02, -4.485e-02)); + r += mul(s4_4, M4(5.264e-02, 7.289e-02, -1.294e-01, -3.265e-02, 2.380e-02, 1.014e-01, 1.142e-02, 3.700e-02, -1.953e-02, 6.918e-02, -6.314e-02, -2.820e-01, -7.555e-02, 1.186e-01, 4.388e-01, -2.191e-01)); + r += mul(s4_5, M4(-4.334e-02, 1.732e-02, -2.172e-02, 4.795e-02, 2.192e-02, -3.595e-02, -3.459e-02, 7.788e-02, -5.273e-02, 1.390e-02, 1.792e-02, -4.107e-02, 8.022e-03, 8.154e-03, 1.100e-02, -1.620e-01)); + r += mul(s4_6, M4(-7.379e-02, -1.514e-01, -9.284e-02, -5.538e-02, -1.410e-02, 4.090e-02, -2.018e-02, 5.257e-03, -1.715e-02, 7.838e-02, 9.580e-02, -6.710e-02, 3.742e-02, -9.365e-02, 4.797e-02, -7.506e-02)); + r += mul(s4_7, M4(1.055e-01, -1.039e-01, 1.668e-02, 1.225e-02, -5.033e-05, 5.628e-02, -3.101e-02, -4.565e-02, 7.412e-02, 2.531e-03, 4.795e-02, 9.870e-02, -6.641e-02, -1.500e-01, -6.496e-02, -1.033e-01)); + r += mul(s4_8, M4(4.693e-02, 7.863e-02, -2.748e-02, -1.198e-02, -4.499e-02, -2.656e-02, 2.222e-03, 4.269e-03, 2.812e-03, 4.611e-03, 3.320e-02, 1.287e-02, -3.171e-02, -1.270e-01, -6.555e-02, -1.371e-02)); + r += mul(s5_0, M4(-1.372e-01, -5.882e-02, 2.866e-02, 7.556e-03, -6.512e-02, -6.292e-02, 1.342e-02, -6.174e-02, -5.772e-02, 9.548e-02, 2.712e-02, 6.947e-02, -1.070e-01, -4.828e-02, 2.676e-02, 3.725e-02)); + r += mul(s5_1, M4(2.337e-02, 5.177e-02, -4.465e-02, -8.847e-02, 1.230e-01, -5.090e-02, 3.594e-02, -5.058e-03, 1.487e-01, 4.051e-02, 2.267e-02, 8.719e-02, 5.190e-02, 1.139e-02, 4.265e-03, 4.696e-03)); + r += mul(s5_2, M4(-1.224e-02, 1.160e-02, 2.127e-02, 1.178e-02, -2.908e-02, -1.686e-02, 8.032e-02, 6.009e-03, -6.588e-02, 4.991e-02, 1.852e-02, 5.245e-02, -6.764e-02, 4.756e-02, 4.309e-02, 9.353e-02)); + r += mul(s5_3, M4(-5.198e-02, -9.044e-02, 8.528e-03, 7.007e-02, 1.540e-02, -2.175e-01, -1.793e-01, -6.205e-02, 6.765e-02, 1.716e-01, 8.623e-02, 2.179e-02, 5.780e-02, 3.796e-02, 9.230e-03, 7.359e-03)); + r += mul(s5_4, M4(6.367e-02, -1.669e-01, -6.597e-02, -1.224e-01, 6.007e-03, 7.739e-02, 1.129e-01, -7.896e-02, -1.081e-01, -1.099e-01, -5.489e-02, -2.851e-01, -5.496e-02, -5.978e-02, 1.354e-01, 2.308e-02)); + r += mul(s5_5, M4(5.751e-02, 1.276e-02, -5.097e-03, 5.008e-02, -1.904e-02, 1.818e-02, 6.886e-03, -1.936e-02, 6.154e-02, 3.243e-02, 2.901e-02, 1.013e-01, 4.000e-02, -1.378e-02, -2.156e-02, -4.824e-02)); + r += mul(s5_6, M4(5.950e-02, 3.787e-02, -1.758e-02, 1.207e-01, -4.966e-02, -7.477e-02, 4.130e-02, 9.592e-02, -1.577e-01, 1.225e-01, 1.179e-01, -5.457e-02, 8.628e-02, -4.253e-02, -4.329e-03, -6.608e-02)); + r += mul(s5_7, M4(7.467e-04, -6.582e-02, 2.856e-03, -2.982e-02, -1.069e-02, 2.294e-02, 1.185e-01, 3.513e-02, 1.033e-03, 3.475e-02, 2.014e-01, 4.276e-03, 3.863e-02, 7.000e-02, 1.326e-02, -1.444e-01)); + r += mul(s5_8, M4(1.070e-01, -7.495e-02, 1.787e-02, 1.062e-01, -1.778e-02, -4.706e-02, 3.628e-02, -5.452e-02, 3.944e-02, 3.673e-02, -6.037e-03, 5.675e-02, -1.452e-02, -4.372e-02, -1.022e-01, -6.924e-02)); + r += mul(s6_0, M4(-1.685e-01, -8.154e-02, -2.199e-03, -1.461e-01, 1.714e-01, -1.107e-01, -1.158e-02, 4.329e-03, -3.056e-03, 1.577e-02, -3.549e-02, 2.360e-03, 1.517e-02, 5.450e-02, 1.118e-02, -9.407e-03)); + r += mul(s6_1, M4(4.158e-02, 4.085e-02, -5.557e-02, -2.010e-01, 1.686e-01, -1.587e-01, 3.987e-02, 8.735e-02, 1.252e-01, 5.412e-03, -2.338e-02, 1.424e-01, -1.241e-01, 5.346e-02, 2.690e-02, -1.535e-01)); + r += mul(s6_2, M4(-3.336e-02, -1.172e-01, -2.737e-02, -1.304e-02, -6.357e-02, 2.670e-02, 4.281e-02, 1.914e-01, -1.642e-02, 2.325e-02, -1.774e-02, 1.394e-01, 1.639e-01, 2.105e-01, 9.449e-03, 1.014e-01)); + r += mul(s6_3, M4(-4.128e-02, -1.083e-01, -6.643e-02, -1.920e-01, 3.908e-02, 8.263e-02, 9.076e-02, 1.411e-01, -2.949e-02, 2.412e-02, 7.386e-02, 1.508e-01, -7.181e-02, 5.448e-02, 3.707e-03, 1.569e-01)); + r += mul(s6_4, M4(1.143e-01, -4.578e-02, 2.429e-02, -2.966e-01, 2.278e-02, 1.007e-01, 1.086e-01, 2.163e-01, 2.578e-01, -1.218e-01, -1.775e-02, -1.480e-01, 1.567e-01, 1.704e-01, -8.204e-02, -1.113e-01)); + r += mul(s6_5, M4(-1.680e-02, -5.282e-02, -9.898e-02, 3.483e-02, 5.711e-03, -2.308e-02, 3.101e-02, 2.295e-02, 2.181e-02, 1.015e-02, -1.960e-03, 2.662e-02, 9.698e-02, -1.443e-01, 6.841e-02, 1.892e-01)); + r += mul(s6_6, M4(2.444e-01, -1.486e-01, -8.018e-02, -7.961e-02, -1.087e-01, 1.903e-01, 2.236e-02, -1.990e-02, -4.210e-02, 4.249e-02, 8.970e-02, 6.274e-02, -9.654e-02, -1.470e-02, -2.475e-02, 4.849e-02)); + r += mul(s6_7, M4(-1.441e-01, -1.414e-01, -1.905e-01, 6.714e-02, 1.101e-01, 5.432e-03, -5.732e-02, -1.257e-01, 3.094e-02, -2.274e-01, 2.291e-02, -1.458e-01, 6.596e-02, 3.644e-01, 1.167e-01, -1.894e-01)); + r += mul(s6_8, M4(6.490e-02, -3.084e-02, -4.923e-02, -1.693e-01, -7.865e-02, -6.116e-02, -5.965e-02, 1.404e-02, 1.179e-01, -1.438e-01, -5.197e-02, 6.703e-02, 9.144e-02, 2.744e-01, -2.768e-03, 1.547e-01)); + r += mul(s7_0, M4(1.584e-02, -4.498e-02, 6.091e-02, -2.324e-02, -1.449e-02, -7.649e-02, 4.255e-02, -2.818e-02, -6.180e-02, 5.349e-02, -8.180e-02, -8.197e-03, 1.151e-01, -8.638e-02, -4.954e-03, -4.913e-05)); + r += mul(s7_1, M4(-1.801e-02, -2.371e-03, 3.753e-02, -5.704e-04, -7.683e-02, -3.341e-03, 3.097e-02, -1.380e-02, 9.815e-02, -3.270e-02, -7.837e-02, 1.160e-01, 7.501e-02, -8.401e-02, -3.338e-02, 1.049e-01)); + r += mul(s7_2, M4(-1.410e-02, -4.114e-02, 3.555e-02, -3.825e-02, -5.964e-02, 2.439e-02, 3.916e-02, -6.536e-02, -3.578e-02, 5.148e-02, -1.028e-02, 6.167e-03, 5.471e-02, -6.947e-03, -4.041e-02, 1.174e-01)); + r += mul(s7_3, M4(3.846e-02, 6.365e-03, 2.241e-02, -8.690e-02, -2.050e-02, 3.654e-02, -1.591e-03, -9.917e-03, -1.252e-01, -5.371e-03, 3.311e-02, 8.132e-03, -6.208e-02, 1.904e-02, 7.562e-02, -2.637e-03)); + r += mul(s7_4, M4(1.122e-01, -1.445e-02, 1.229e-02, -7.151e-02, 1.639e-01, 1.428e-01, -1.328e-01, 8.506e-02, -2.447e-03, 6.351e-02, -1.303e-01, -3.838e-02, -4.276e-02, 9.501e-02, 5.358e-02, 8.172e-03)); + r += mul(s7_5, M4(9.728e-03, 7.052e-02, -1.768e-02, 1.619e-01, -2.085e-02, -6.813e-02, 3.413e-02, -6.557e-02, -8.677e-02, 1.237e-01, -2.285e-02, -1.136e-01, 8.036e-02, -1.186e-01, -2.369e-02, -1.370e-01)); + r += mul(s7_6, M4(-8.722e-02, 1.628e-01, 1.063e-01, 4.557e-02, -2.343e-02, 1.109e-02, -1.094e-01, -6.930e-02, 1.664e-01, -4.381e-03, -1.127e-01, -1.101e-02, -6.311e-03, -2.693e-02, -3.793e-02, 3.911e-04)); + r += mul(s7_7, M4(-2.129e-02, -4.236e-02, -3.102e-02, 8.458e-03, -1.264e-02, 4.343e-03, -6.226e-02, 7.021e-02, -8.578e-03, -6.781e-02, -1.426e-02, 2.388e-01, -7.339e-02, 1.265e-01, 8.468e-02, -2.205e-02)); + r += mul(s7_8, M4(6.138e-03, -4.065e-03, 7.499e-02, 8.011e-02, -1.658e-02, -1.102e-02, 1.531e-02, -1.925e-02, 6.433e-02, 1.416e-01, 1.623e-02, 1.529e-01, 5.257e-03, 3.776e-02, -2.884e-02, -2.830e-02)); + r += V4(-1.151e-03, 4.418e-03, 4.805e-04, -9.419e-03); + return r; +} + +void Pass13(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 14 +//!DESC conv13 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(4.141e-02, -6.620e-02, -1.132e-02, 5.022e-02, -5.448e-02, -8.153e-02, 4.273e-02, -3.380e-02, -1.015e-01, 1.128e-01, 2.486e-02, -1.044e-01, -5.736e-02, -3.646e-02, -1.570e-01, 1.355e-01)); + r += mul(s0_1, M4(1.785e-02, 1.196e-02, -1.048e-01, 3.621e-02, -2.967e-02, 1.030e-01, 1.395e-01, 3.486e-03, -1.067e-01, 2.549e-01, 1.270e-01, -6.500e-02, -4.027e-02, 2.598e-02, -4.203e-02, 4.186e-02)); + r += mul(s0_2, M4(4.931e-02, -1.715e-02, -4.240e-02, 1.342e-02, 8.863e-03, 2.048e-02, 5.713e-02, -2.556e-02, -1.334e-01, 1.752e-01, -3.173e-03, -1.130e-01, 4.557e-02, 1.311e-01, -2.369e-02, -1.715e-02)); + r += mul(s0_3, M4(4.337e-02, 1.125e-01, 2.961e-02, -8.226e-02, 5.584e-02, 3.195e-02, -4.691e-02, -2.575e-02, 1.433e-01, -1.298e-01, -2.059e-01, 6.369e-02, 1.473e-02, 8.115e-02, 1.104e-02, 1.601e-01)); + r += mul(s0_4, M4(-4.852e-03, -4.469e-02, 4.528e-02, -1.032e-02, 8.592e-02, -2.401e-02, -1.961e-01, -6.586e-02, 3.530e-01, 4.468e-01, -2.665e-01, 2.346e-01, 2.854e-01, -8.168e-02, -1.710e-01, 1.831e-01)); + r += mul(s0_5, M4(2.041e-02, -8.185e-02, 6.674e-02, 1.875e-02, 5.817e-02, 1.368e-01, -1.589e-02, -1.966e-02, 1.995e-01, 9.087e-02, 2.804e-03, 1.407e-01, 1.640e-01, -9.252e-02, -4.291e-02, 3.763e-02)); + r += mul(s0_6, M4(1.015e-01, 2.379e-02, -1.526e-01, -4.942e-02, 1.086e-01, -2.635e-03, 5.455e-02, -6.096e-02, -9.946e-02, 2.923e-01, -8.444e-02, 5.240e-02, 1.745e-02, -3.779e-02, 2.604e-02, -4.151e-03)); + r += mul(s0_7, M4(1.162e-02, -6.650e-03, -2.116e-01, 1.831e-02, 9.878e-02, 6.680e-02, 5.200e-02, 7.785e-02, 1.277e-01, -2.542e-01, 6.069e-02, 2.395e-01, -1.033e-01, -2.852e-02, -1.343e-01, -1.052e-01)); + r += mul(s0_8, M4(9.261e-02, 2.752e-02, -9.165e-02, 2.324e-02, 5.415e-02, -1.891e-02, -3.516e-02, 3.924e-03, 1.036e-01, 1.315e-01, -9.339e-02, 3.699e-02, 3.385e-02, -5.174e-02, -6.744e-02, 6.598e-02)); + r += mul(s1_0, M4(1.558e-02, -6.553e-02, -1.348e-02, 4.312e-03, -3.356e-03, -8.843e-02, 1.666e-02, 1.349e-02, -2.128e-02, -3.381e-02, 3.920e-02, -1.670e-02, -2.264e-02, 1.329e-02, -3.409e-02, -6.181e-02)); + r += mul(s1_1, M4(5.805e-03, 4.477e-02, 1.858e-02, -1.022e-02, 2.212e-02, -4.813e-02, -9.164e-02, -2.738e-02, 8.371e-02, 1.352e-03, -2.860e-02, 3.639e-02, -1.791e-02, -2.523e-02, 7.703e-02, 2.617e-02)); + r += mul(s1_2, M4(3.957e-02, -2.351e-03, 4.658e-02, -1.662e-02, 4.351e-02, -4.744e-02, -8.098e-02, 4.911e-02, -1.091e-03, 2.249e-03, -5.664e-03, 5.230e-02, 1.899e-02, 1.876e-02, 9.931e-03, -1.547e-02)); + r += mul(s1_3, M4(1.265e-01, 1.980e-02, -8.764e-02, 6.612e-02, -7.751e-02, -1.122e-02, 6.755e-02, -4.610e-03, -1.964e-02, 5.122e-03, -7.697e-02, 3.198e-02, 3.231e-02, 9.638e-02, 4.545e-03, -4.199e-02)); + r += mul(s1_4, M4(-5.894e-02, -1.329e-01, -1.192e-01, 1.160e-03, -1.037e-01, -1.072e-01, -4.378e-02, -7.303e-02, 7.073e-02, 5.396e-02, -1.280e-02, -5.782e-02, 1.773e-01, -1.077e-01, -6.754e-02, 1.033e-01)); + r += mul(s1_5, M4(2.713e-03, -7.180e-02, -7.877e-02, 2.297e-02, -7.703e-03, 7.134e-02, 3.480e-02, 3.646e-02, -2.446e-02, -2.648e-02, -8.801e-02, -2.624e-02, 3.275e-02, -7.984e-02, 1.388e-02, -1.384e-02)); + r += mul(s1_6, M4(5.089e-02, -3.647e-03, -1.665e-02, 3.789e-02, 1.489e-01, -1.081e-01, 1.731e-02, 7.075e-02, 5.898e-02, 7.116e-02, -1.541e-02, 5.454e-02, -6.869e-02, -1.348e-01, 6.366e-02, -9.688e-02)); + r += mul(s1_7, M4(8.425e-02, -4.745e-02, -4.129e-02, 3.008e-01, 5.933e-02, -2.940e-02, 9.550e-02, 7.856e-02, 1.088e-01, 4.056e-02, -1.131e-02, 6.527e-02, -2.267e-01, 9.660e-02, 8.604e-03, -8.723e-02)); + r += mul(s1_8, M4(8.051e-02, 2.875e-02, -3.348e-02, 3.590e-02, 8.823e-02, -3.079e-02, -6.701e-02, 5.085e-02, 7.678e-02, 3.990e-02, -1.671e-02, -2.194e-02, -8.302e-03, 6.172e-02, -3.764e-02, -8.722e-02)); + r += mul(s2_0, M4(-1.144e-01, -9.280e-02, 2.135e-01, -1.004e-01, 8.034e-02, 2.011e-02, -1.735e-02, 4.454e-02, 1.625e-01, -1.156e-01, 5.881e-02, -1.255e-01, -7.047e-02, 7.272e-02, 4.270e-02, -1.440e-02)); + r += mul(s2_1, M4(1.486e-01, -7.381e-02, -1.984e-02, 3.769e-02, 3.620e-02, -7.317e-02, -1.095e-01, 2.855e-02, 1.671e-02, -5.344e-02, -7.885e-02, 7.999e-03, -1.142e-01, 4.843e-02, 3.625e-02, 7.313e-03)); + r += mul(s2_2, M4(7.284e-02, 4.662e-02, 1.131e-01, -7.068e-03, -4.402e-02, -4.379e-02, -6.864e-02, 3.116e-03, 1.202e-01, 1.590e-02, -5.161e-02, -2.509e-02, -1.140e-01, 6.822e-02, -3.929e-02, -4.498e-02)); + r += mul(s2_3, M4(-5.160e-02, 9.767e-02, 9.134e-02, -5.806e-02, -4.408e-02, 3.057e-03, 7.317e-02, -1.084e-01, -1.695e-01, -2.958e-01, 2.613e-01, -3.620e-01, 1.144e-01, 4.105e-02, -1.840e-01, 5.694e-02)); + r += mul(s2_4, M4(-2.702e-01, -2.149e-01, -1.500e-02, -2.169e-01, -1.774e-01, 7.561e-02, -1.166e-01, -1.276e-01, -1.435e-03, -5.159e-02, -2.791e-01, -2.208e-02, -1.261e-01, 6.767e-02, -7.217e-03, 6.273e-02)); + r += mul(s2_5, M4(9.956e-03, 2.633e-03, 2.125e-01, -1.653e-01, 3.382e-02, -7.555e-02, -5.575e-02, -2.742e-02, 4.812e-02, 3.444e-02, -1.328e-01, -1.345e-01, -5.003e-02, -2.473e-02, 5.229e-03, -5.086e-02)); + r += mul(s2_6, M4(-4.304e-02, 7.811e-02, 3.581e-02, 1.016e-01, 4.841e-02, 9.474e-03, -1.344e-01, -2.150e-02, 2.074e-02, 1.518e-01, -2.535e-03, -7.863e-02, -1.250e-01, -3.870e-02, 2.042e-02, 2.218e-02)); + r += mul(s2_7, M4(-1.801e-01, -1.262e-02, 9.160e-02, -2.435e-02, 1.284e-01, -1.978e-02, -6.965e-02, 2.254e-01, 1.300e-02, -5.303e-02, 1.294e-01, 3.205e-01, 2.125e-01, 1.100e-01, -1.555e-01, 2.775e-01)); + r += mul(s2_8, M4(-1.720e-01, 6.926e-02, -6.922e-02, -2.284e-01, 7.362e-02, 6.335e-02, -5.382e-02, 4.740e-02, -3.982e-02, 2.725e-02, 3.252e-02, -7.091e-02, -1.415e-02, 4.437e-02, 9.472e-02, 2.852e-02)); + r += mul(s3_0, M4(-3.278e-02, 2.729e-03, 6.004e-02, -1.436e-02, 1.062e-01, 1.824e-02, 7.212e-03, 3.931e-02, -2.166e-02, 1.406e-03, -1.046e-01, 1.478e-02, -6.831e-02, 4.965e-03, -5.771e-02, -1.770e-02)); + r += mul(s3_1, M4(-4.265e-02, 4.044e-02, 7.482e-02, 3.673e-02, -4.646e-02, 4.971e-02, -1.064e-01, -7.038e-02, 6.565e-03, -4.373e-03, -7.081e-02, -5.474e-02, 6.192e-04, 5.023e-02, 4.560e-02, 2.352e-02)); + r += mul(s3_2, M4(-3.566e-06, 3.208e-02, 1.266e-02, 2.033e-02, -3.041e-02, 2.652e-02, -3.761e-02, 2.411e-02, -6.918e-03, -3.238e-02, -4.250e-02, 1.925e-02, -7.340e-03, 1.245e-02, 3.399e-02, -2.518e-02)); + r += mul(s3_3, M4(8.491e-02, 2.422e-02, -4.230e-02, -4.731e-02, -7.405e-02, 1.836e-02, 1.932e-02, -1.100e-01, 8.063e-03, 1.991e-02, 9.470e-03, -1.448e-02, 4.615e-02, 1.156e-01, 2.012e-02, 4.059e-02)); + r += mul(s3_4, M4(1.380e-01, 2.457e-02, -1.354e-01, 6.016e-02, -6.588e-02, 1.419e-01, 1.135e-02, 7.055e-03, -5.355e-02, -2.477e-02, 4.363e-02, -1.144e-01, -1.527e-01, -2.229e-03, 1.809e-01, 4.934e-02)); + r += mul(s3_5, M4(6.089e-02, -6.852e-02, -8.328e-02, 1.592e-02, 1.130e-01, -3.196e-02, 7.612e-02, 2.826e-02, -6.319e-02, -1.673e-02, 4.322e-02, -8.615e-03, -1.638e-03, 2.452e-02, -3.442e-03, -2.506e-02)); + r += mul(s3_6, M4(-2.053e-02, 2.803e-02, 5.890e-03, 7.126e-02, -2.036e-02, -7.027e-02, -3.477e-02, 3.904e-02, -4.978e-02, -1.326e-02, -1.908e-03, -3.546e-02, -4.788e-02, 1.063e-01, 3.857e-02, -5.291e-02)); + r += mul(s3_7, M4(-9.241e-02, -5.720e-02, -6.293e-02, 1.304e-01, -3.444e-02, 5.718e-02, -7.650e-02, 7.219e-02, 1.636e-02, -1.342e-02, 2.611e-03, 1.026e-02, 7.974e-02, 1.007e-01, 1.014e-01, 2.116e-01)); + r += mul(s3_8, M4(-2.475e-02, 1.300e-02, 5.130e-02, 1.268e-01, 1.954e-02, 3.569e-02, -8.627e-02, -7.619e-02, 3.559e-02, 4.537e-02, 2.744e-02, 3.091e-02, 3.093e-02, -2.184e-03, 1.678e-02, -2.247e-02)); + r += mul(s4_0, M4(-6.316e-02, -8.194e-03, -1.663e-02, -1.174e-02, 9.211e-04, -1.028e-01, 3.682e-03, -1.636e-02, 8.008e-03, 3.940e-02, -3.416e-03, 4.771e-03, 5.515e-02, -3.139e-02, -9.479e-02, 3.571e-02)); + r += mul(s4_1, M4(2.325e-02, 8.383e-02, -2.387e-02, 4.355e-02, 1.666e-02, 3.317e-03, 1.584e-02, 3.469e-02, -2.404e-04, -6.334e-02, 1.957e-02, 5.153e-02, 2.300e-02, -4.056e-02, -8.582e-02, 7.260e-02)); + r += mul(s4_2, M4(-4.150e-02, 1.327e-02, 6.573e-03, 4.525e-03, 1.983e-02, -5.126e-02, -2.040e-02, -9.515e-03, -3.293e-03, 1.014e-02, -2.866e-02, 2.285e-02, -2.265e-02, -2.875e-02, -4.797e-02, -8.084e-02)); + r += mul(s4_3, M4(7.625e-02, -2.013e-02, -5.754e-02, 1.408e-02, -3.044e-02, -2.811e-02, -5.332e-02, -9.756e-03, -4.689e-02, 2.628e-02, -5.628e-02, 4.199e-02, 3.371e-02, -6.367e-02, 6.160e-03, 6.184e-02)); + r += mul(s4_4, M4(-1.169e-01, 2.696e-02, 7.835e-02, 1.769e-03, -1.204e-01, 2.394e-02, -5.374e-02, -2.369e-01, 2.384e-03, 1.122e-01, -1.815e-02, 9.182e-02, 6.527e-03, -6.919e-02, -1.969e-02, -7.163e-02)); + r += mul(s4_5, M4(-1.474e-02, -1.279e-02, -1.197e-02, -1.539e-02, -3.199e-02, 2.429e-02, 4.874e-02, 8.309e-05, 6.069e-02, -9.755e-02, -1.120e-01, 4.888e-02, -1.639e-02, 9.998e-02, 4.820e-02, -8.561e-02)); + r += mul(s4_6, M4(-6.376e-02, 2.606e-02, -1.121e-01, -3.997e-04, -1.059e-01, -8.696e-02, 2.277e-02, -1.555e-02, 2.321e-03, -1.682e-02, -8.266e-03, -4.173e-02, -5.731e-02, 9.822e-02, 5.153e-02, -2.178e-02)); + r += mul(s4_7, M4(2.946e-02, -1.019e-02, 5.550e-03, 9.156e-02, -1.290e-01, -9.453e-03, 7.614e-02, -9.659e-02, -9.576e-02, -2.089e-01, -1.220e-01, -2.603e-01, -1.564e-01, 1.091e-01, 1.010e-01, -2.357e-01)); + r += mul(s4_8, M4(-1.070e-02, -5.792e-02, -3.742e-02, 8.461e-03, 3.716e-02, -3.181e-02, 6.353e-03, 7.121e-02, -8.827e-02, -4.466e-02, -9.925e-04, -2.016e-02, -2.325e-02, 2.569e-02, -1.307e-02, -1.550e-01)); + r += mul(s5_0, M4(-7.021e-02, -5.956e-02, 1.236e-01, -1.058e-01, 1.359e-02, -9.547e-02, -1.473e-01, -1.460e-02, 3.398e-03, 2.339e-03, -4.774e-02, 4.883e-02, -5.353e-02, -4.559e-02, 1.437e-03, 3.843e-02)); + r += mul(s5_1, M4(-1.480e-01, 1.544e-01, 7.907e-02, -2.377e-01, -3.106e-02, 2.459e-01, 9.445e-02, -3.707e-03, -2.426e-02, 7.361e-03, -7.039e-02, -1.299e-02, -1.066e-02, -2.557e-02, -1.988e-01, 2.998e-02)); + r += mul(s5_2, M4(-9.891e-03, 3.242e-02, -5.727e-02, -4.602e-02, -2.228e-02, 1.389e-01, -1.326e-02, -8.314e-02, -1.987e-02, -3.938e-02, 2.049e-02, 4.203e-02, -2.140e-02, -2.455e-02, -9.610e-02, -6.004e-02)); + r += mul(s5_3, M4(9.126e-02, -7.572e-02, 5.318e-02, -9.806e-02, -8.060e-02, 1.901e-01, 2.908e-02, -8.675e-02, -2.088e-02, -1.928e-02, 1.792e-02, 8.698e-02, 1.420e-01, 4.968e-02, -5.260e-02, 1.072e-02)); + r += mul(s5_4, M4(-1.412e-01, 8.703e-02, 2.206e-01, -2.432e-02, -1.159e-01, 2.180e-01, -3.190e-01, -2.901e-01, 1.263e-02, -7.523e-02, 1.116e-01, 8.989e-02, 1.445e-01, 1.655e-01, 8.037e-02, 7.833e-02)); + r += mul(s5_5, M4(6.820e-02, -6.786e-02, 1.965e-02, 1.404e-03, 3.270e-02, 1.208e-01, 6.165e-02, -2.121e-01, 5.809e-02, -6.507e-02, -8.565e-02, -7.551e-02, 5.602e-02, 2.850e-02, 7.971e-02, -8.865e-03)); + r += mul(s5_6, M4(1.069e-01, -5.015e-02, -1.065e-01, 8.655e-02, -9.331e-03, 3.379e-02, 7.811e-03, 7.530e-03, 5.707e-02, 8.318e-02, 2.182e-02, -6.878e-03, -5.290e-02, -2.063e-02, -1.833e-02, -5.179e-02)); + r += mul(s5_7, M4(-4.091e-03, -4.725e-03, 5.285e-02, 5.689e-02, 2.501e-02, 1.192e-01, -1.275e-01, 5.202e-02, -2.421e-02, 8.988e-02, -3.053e-02, -1.231e-01, -1.260e-01, 4.334e-02, 2.688e-02, -8.457e-02)); + r += mul(s5_8, M4(6.478e-02, -1.301e-01, -4.297e-02, 8.666e-02, -3.498e-02, 3.045e-02, -4.299e-02, 2.292e-02, -2.781e-02, -1.764e-02, -2.209e-02, -2.507e-02, 1.476e-02, 3.149e-03, -4.379e-02, -3.836e-02)); + r += mul(s6_0, M4(3.491e-02, 3.212e-03, 6.530e-02, 8.442e-03, -3.985e-02, -4.695e-02, -4.522e-02, -6.292e-03, 1.154e-02, 3.974e-02, -1.493e-02, -1.318e-02, -3.157e-02, -4.850e-02, 3.412e-03, -2.990e-02)); + r += mul(s6_1, M4(-7.500e-02, -5.865e-02, 4.797e-02, 8.277e-02, 6.952e-02, 1.155e-01, -5.639e-02, -2.254e-03, -9.671e-02, 6.667e-02, -6.871e-02, -1.310e-02, -3.762e-02, 2.719e-02, -6.958e-02, -7.557e-02)); + r += mul(s6_2, M4(1.623e-02, 4.220e-02, -7.569e-03, 1.946e-02, -3.877e-02, -1.262e-01, 9.872e-03, -3.236e-03, 1.945e-02, 5.039e-02, -1.551e-01, 6.697e-02, 1.010e-02, -1.044e-02, -1.352e-02, -4.554e-03)); + r += mul(s6_3, M4(7.151e-02, 5.049e-02, 4.138e-02, 5.704e-02, 1.810e-01, -3.868e-02, 3.789e-02, 1.133e-03, -7.775e-02, -3.426e-02, -6.087e-02, -7.503e-02, -9.109e-02, 2.326e-02, -5.660e-02, 2.871e-02)); + r += mul(s6_4, M4(-6.362e-02, 2.525e-01, 9.566e-02, 3.626e-02, 1.463e-01, -2.089e-01, -1.323e-01, 1.257e-01, -7.023e-02, 7.012e-02, -1.454e-01, -2.342e-02, -8.142e-02, 6.109e-02, -7.436e-02, -8.377e-02)); + r += mul(s6_5, M4(-1.045e-01, -8.608e-02, 3.267e-02, 6.552e-02, 6.730e-03, 6.657e-02, -2.295e-02, -3.188e-02, -5.740e-02, -5.613e-04, -1.379e-01, -1.088e-01, 3.836e-03, 3.775e-02, -1.526e-02, 4.745e-02)); + r += mul(s6_6, M4(2.710e-02, 1.692e-01, 1.235e-01, 3.099e-02, 1.371e-02, -6.910e-02, -2.846e-02, 2.167e-02, -1.561e-02, -8.469e-02, 1.648e-02, 1.011e-03, -1.081e-03, -3.891e-02, -1.399e-01, 2.680e-02)); + r += mul(s6_7, M4(1.295e-01, 6.213e-02, 2.126e-01, 4.305e-03, -1.509e-01, -3.575e-02, 4.243e-02, 1.890e-01, -1.112e-01, -1.203e-01, -7.555e-02, -8.316e-02, -7.615e-02, 3.685e-02, 5.208e-02, 5.158e-02)); + r += mul(s6_8, M4(1.497e-02, -3.588e-02, 1.041e-01, -3.959e-02, 6.392e-02, 2.749e-02, 3.729e-02, 3.639e-02, 3.821e-03, -4.464e-02, -5.107e-02, 4.252e-02, 2.359e-02, 1.211e-03, 6.559e-02, 8.423e-02)); + r += mul(s7_0, M4(4.269e-02, 1.448e-02, -7.251e-02, 4.434e-02, 8.679e-03, -1.043e-01, 3.841e-02, 2.695e-02, 6.740e-02, -2.644e-03, -3.486e-02, 7.661e-03, -6.166e-02, 3.065e-02, 8.044e-02, -7.941e-02)); + r += mul(s7_1, M4(-8.077e-02, -2.410e-02, -2.501e-02, 8.880e-03, 2.563e-02, 5.505e-02, -4.794e-02, -1.256e-02, 1.219e-02, -1.130e-02, 4.149e-02, -4.619e-03, 3.638e-02, -7.627e-02, -6.329e-02, -6.666e-02)); + r += mul(s7_2, M4(-6.696e-02, 2.772e-02, -2.924e-02, -8.234e-04, -3.507e-02, -3.574e-02, -8.502e-03, -5.329e-02, -6.518e-02, 2.003e-02, 6.235e-02, 2.691e-03, 5.237e-02, 1.363e-02, -4.233e-03, 1.431e-02)); + r += mul(s7_3, M4(-1.845e-02, 7.724e-03, -7.974e-02, 4.336e-02, -3.622e-02, 1.163e-01, 1.091e-01, 2.923e-02, -4.799e-02, -8.489e-02, 3.857e-02, -2.627e-02, 1.565e-02, -6.398e-02, -1.880e-01, 9.351e-02)); + r += mul(s7_4, M4(-7.986e-02, 1.074e-01, -2.022e-01, -2.343e-02, -1.010e-01, -2.735e-02, -2.889e-02, 8.426e-02, -5.666e-02, -2.074e-01, 8.822e-02, -2.570e-01, -5.235e-02, 5.873e-02, 7.100e-03, -4.525e-02)); + r += mul(s7_5, M4(1.071e-01, -1.071e-01, -1.074e-01, -3.666e-03, 5.386e-02, 7.296e-02, 2.242e-02, 1.896e-03, -7.017e-02, 2.575e-02, -9.618e-03, -3.774e-02, 2.857e-02, 5.077e-02, 6.278e-02, 1.436e-01)); + r += mul(s7_6, M4(1.732e-02, -5.917e-02, -6.042e-02, 4.714e-02, 3.632e-02, -1.275e-02, -4.554e-02, 3.627e-02, -4.395e-02, -1.144e-02, 6.404e-02, 3.403e-02, -1.123e-02, 6.191e-02, 5.405e-02, -1.169e-03)); + r += mul(s7_7, M4(1.677e-01, -9.623e-02, -1.186e-02, 9.234e-02, -5.493e-02, 2.045e-02, -1.244e-01, 1.381e-01, -1.582e-02, 5.308e-03, 1.022e-02, -2.167e-01, -5.924e-02, -2.163e-02, 2.311e-02, 2.575e-02)); + r += mul(s7_8, M4(1.005e-01, 1.794e-02, -1.286e-01, -3.952e-02, 5.529e-03, 5.189e-02, 3.473e-02, 1.438e-02, -9.109e-02, -2.932e-02, 1.696e-02, -4.435e-02, -7.043e-02, 4.999e-02, -3.583e-03, 7.583e-03)); + r += V4(5.137e-03, -9.318e-03, -1.525e-02, 7.307e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.355e-02, 3.296e-02, -3.112e-02, -2.420e-02, 1.337e-01, -1.307e-02, -2.977e-02, 9.591e-02, -2.963e-02, 8.426e-02, 1.922e-02, 6.205e-02, -1.764e-01, -8.246e-02, -9.129e-02, 1.603e-01)); + r += mul(s0_1, M4(-4.793e-02, -8.006e-02, -5.812e-02, 6.274e-02, 2.285e-01, -9.298e-02, -1.421e-02, 6.073e-02, 9.368e-02, -9.190e-02, 1.002e-01, -1.052e-01, -7.702e-02, 2.329e-01, 2.498e-02, -1.130e-01)); + r += mul(s0_2, M4(2.014e-02, -3.787e-03, 6.612e-03, 3.495e-02, 1.220e-01, -1.040e-01, -4.782e-02, -1.094e-01, -4.148e-02, -4.323e-02, -7.381e-02, -1.293e-01, 1.620e-02, -4.429e-02, 8.386e-03, -9.178e-02)); + r += mul(s0_3, M4(-6.812e-02, -9.483e-02, 2.104e-01, -9.087e-02, 5.849e-02, -1.023e-01, -8.344e-02, 3.854e-02, -3.597e-01, -2.107e-02, -2.478e-01, 2.084e-01, -8.659e-02, 3.793e-02, -7.405e-02, 1.419e-01)); + r += mul(s0_4, M4(-1.178e-01, 1.949e-01, 1.408e-01, -1.079e-01, 8.445e-02, -4.453e-02, -2.549e-01, 1.182e-01, -4.696e-01, 1.053e-01, -2.247e-01, 2.390e-01, -1.133e-01, -1.850e-01, 1.923e-01, 1.488e-01)); + r += mul(s0_5, M4(-3.204e-02, -4.997e-02, 1.294e-01, -1.489e-01, 1.672e-01, -9.787e-03, -8.510e-02, -2.471e-03, -1.163e-02, 2.536e-01, 1.850e-01, 2.570e-01, -5.157e-02, -9.404e-02, 5.323e-02, -2.672e-02)); + r += mul(s0_6, M4(1.199e-01, 5.749e-02, -1.579e-01, -2.987e-03, 4.631e-02, -7.230e-04, 4.758e-03, 3.372e-02, 2.937e-03, 5.282e-03, 1.157e-01, 4.203e-03, -3.622e-01, 9.459e-02, -8.716e-02, 7.281e-02)); + r += mul(s0_7, M4(-4.052e-02, 8.018e-02, -9.348e-02, 1.070e-01, 1.703e-01, -1.062e-01, 1.111e-01, -8.738e-02, 6.301e-02, -1.002e-01, 2.808e-02, 1.008e-02, -9.902e-02, -6.596e-02, -1.198e-01, 5.289e-02)); + r += mul(s0_8, M4(-1.094e-02, -6.197e-02, 3.224e-02, 1.337e-01, 1.200e-01, -1.145e-01, 7.973e-03, 4.485e-02, -9.850e-02, -2.906e-02, -6.900e-02, 2.700e-02, -4.982e-02, -1.381e-02, -4.235e-02, 7.607e-02)); + r += mul(s1_0, M4(4.432e-02, 3.255e-03, -1.106e-02, 3.922e-03, -8.093e-02, 4.205e-02, -4.581e-03, -5.722e-02, -8.181e-04, 6.928e-03, 4.826e-03, -2.181e-02, -1.968e-02, 2.710e-02, 4.843e-02, 2.190e-02)); + r += mul(s1_1, M4(-1.437e-02, -2.659e-02, 1.607e-02, 3.518e-02, -4.626e-02, -6.232e-02, -4.290e-03, 9.698e-03, 4.383e-02, -1.006e-01, 3.067e-02, -9.389e-03, 5.026e-03, 2.198e-01, 9.707e-03, 4.045e-02)); + r += mul(s1_2, M4(-2.218e-02, -7.174e-02, 1.961e-04, 4.329e-02, -3.113e-02, -6.908e-03, -2.772e-02, 2.195e-02, -2.089e-02, 7.784e-02, 2.704e-02, 1.087e-02, 4.724e-02, 2.037e-02, -4.994e-02, -5.616e-02)); + r += mul(s1_3, M4(-1.365e-01, -1.411e-01, 1.842e-02, -4.131e-02, -8.218e-02, -6.265e-02, -6.018e-02, -3.876e-02, -4.951e-02, 1.718e-02, 6.618e-02, 8.092e-03, 2.514e-01, 1.053e-01, 3.172e-02, -2.182e-02)); + r += mul(s1_4, M4(-6.162e-03, 7.960e-02, -9.711e-03, 4.101e-02, -1.553e-01, 2.222e-02, -1.430e-01, -8.064e-02, -3.765e-02, -7.107e-02, -1.621e-02, 6.533e-02, 4.012e-02, 1.249e-01, 3.522e-01, 2.842e-01)); + r += mul(s1_5, M4(9.114e-02, -4.403e-02, 4.894e-02, -2.185e-02, -7.740e-02, 7.062e-02, -6.714e-02, -6.360e-02, -2.117e-02, 1.966e-02, 5.892e-02, 2.980e-02, -2.791e-02, -7.889e-03, 1.397e-01, -4.555e-02)); + r += mul(s1_6, M4(2.624e-02, -2.347e-02, 7.362e-02, -1.315e-01, -4.644e-02, 3.628e-03, -3.379e-02, -2.500e-02, -2.819e-04, -1.098e-02, -5.829e-02, 2.258e-02, 1.219e-01, -2.299e-05, 4.017e-02, 6.028e-02)); + r += mul(s1_7, M4(-6.700e-02, 1.226e-01, -1.034e-01, 2.465e-02, 5.769e-02, -3.604e-02, 6.603e-02, -9.597e-02, -1.400e-02, -2.878e-02, 1.592e-02, -7.906e-02, 1.371e-01, -3.026e-02, -7.622e-03, 1.931e-01)); + r += mul(s1_8, M4(9.961e-03, -6.016e-02, -7.028e-03, 3.942e-02, -3.351e-02, -4.909e-02, -4.676e-02, 1.124e-03, -1.712e-02, 3.273e-02, 3.916e-02, -2.822e-03, -2.449e-03, -6.221e-02, 8.854e-03, 5.159e-02)); + r += mul(s2_0, M4(-1.475e-01, 1.531e-01, 7.687e-02, 3.918e-02, -8.268e-02, 8.104e-02, -2.995e-02, 1.998e-03, 1.443e-03, -1.290e-01, 9.060e-02, -1.169e-01, -1.097e-01, -2.346e-02, 8.907e-02, -2.857e-02)); + r += mul(s2_1, M4(1.968e-01, 1.523e-01, -1.131e-01, -2.772e-02, -5.298e-02, 4.750e-02, 6.926e-03, 2.808e-02, 6.309e-02, 1.446e-01, -4.539e-02, 6.421e-02, -2.716e-01, 4.423e-02, 1.206e-01, 8.484e-02)); + r += mul(s2_2, M4(5.007e-02, 1.354e-02, 8.236e-03, -8.577e-02, -1.924e-02, 1.052e-01, 1.844e-02, -3.665e-03, 6.377e-02, -1.254e-01, -1.168e-01, -9.514e-02, 3.941e-02, -8.821e-02, 5.721e-02, 2.076e-02)); + r += mul(s2_3, M4(-1.420e-01, 2.196e-01, 1.402e-02, 1.813e-01, -5.681e-02, 2.581e-02, 3.326e-02, -1.044e-01, -1.836e-01, -1.137e-01, 1.483e-01, -1.946e-01, 2.172e-02, -6.671e-02, -1.008e-01, 9.928e-02)); + r += mul(s2_4, M4(8.464e-02, 1.939e-01, -5.625e-02, -1.635e-01, 3.426e-02, -1.247e-02, -1.889e-01, 2.819e-02, 2.517e-01, 4.228e-01, 1.090e-02, -1.809e-02, 3.795e-02, 6.264e-02, 4.873e-02, 1.749e-01)); + r += mul(s2_5, M4(-9.532e-02, 4.131e-01, 6.408e-02, 1.424e-01, -1.770e-02, 2.033e-02, 3.604e-02, -2.169e-02, 2.669e-02, -6.641e-02, -4.143e-02, -4.834e-02, -6.407e-02, -1.596e-03, -7.999e-03, -6.714e-02)); + r += mul(s2_6, M4(1.511e-02, 1.173e-01, 8.768e-02, 3.224e-02, 1.244e-02, 4.711e-02, 3.580e-02, 2.181e-02, 4.199e-02, -4.670e-02, 4.122e-02, 6.350e-02, 1.441e-01, -1.958e-01, 5.897e-02, -8.286e-03)); + r += mul(s2_7, M4(5.551e-02, 1.052e-01, -1.443e-01, -5.598e-02, 8.887e-02, -9.084e-02, 2.679e-02, -6.200e-02, 4.895e-02, -2.826e-02, -2.257e-02, -2.243e-02, -9.582e-03, 4.050e-02, -2.564e-01, 3.372e-01)); + r += mul(s2_8, M4(6.303e-02, 1.638e-01, 2.913e-02, 2.016e-02, -4.983e-02, -2.189e-02, 2.401e-02, -1.876e-02, 2.361e-02, 3.606e-02, 9.776e-03, -4.713e-02, -9.113e-03, -1.176e-02, 1.776e-03, 4.181e-02)); + r += mul(s3_0, M4(-3.698e-02, -7.844e-02, 3.304e-02, 1.946e-02, 2.104e-02, 1.238e-01, -3.353e-02, -5.077e-03, 6.806e-03, -1.067e-01, 1.479e-02, 2.172e-02, -5.576e-02, 5.420e-03, 6.912e-02, -8.622e-02)); + r += mul(s3_1, M4(1.883e-02, 7.470e-02, -1.346e-02, 3.899e-03, -1.025e-01, -1.516e-01, -1.512e-01, 7.255e-04, 2.723e-03, -5.665e-02, 3.034e-02, 3.748e-02, -9.337e-02, 9.985e-02, -8.224e-02, 8.647e-02)); + r += mul(s3_2, M4(5.797e-02, -6.478e-02, -1.018e-01, -1.559e-02, -6.313e-02, 5.765e-02, -7.845e-03, -5.224e-02, 1.853e-02, -5.100e-03, -6.391e-02, -9.475e-03, 4.800e-02, 5.181e-02, -8.398e-02, -4.074e-02)); + r += mul(s3_3, M4(9.260e-02, 1.461e-01, -3.438e-02, 1.098e-02, -9.020e-02, 3.844e-02, 1.045e-01, -1.025e-02, -1.485e-01, 5.880e-02, -6.117e-02, -2.831e-02, 2.787e-02, 3.503e-02, -6.832e-03, 5.772e-02)); + r += mul(s3_4, M4(-8.313e-02, 2.058e-01, -3.602e-02, 1.008e-01, 1.512e-02, 9.869e-02, -1.041e-01, -9.628e-02, -4.995e-04, 8.158e-02, 7.221e-02, -6.046e-02, 7.551e-02, 2.327e-01, 1.294e-02, -7.074e-02)); + r += mul(s3_5, M4(-3.551e-02, -1.121e-02, -1.310e-01, -7.403e-03, -9.578e-02, 8.996e-02, -3.934e-02, 5.779e-02, -3.206e-02, 6.389e-03, 1.956e-02, -1.075e-01, 5.801e-02, -4.545e-02, -4.307e-02, -2.752e-02)); + r += mul(s3_6, M4(1.101e-01, 1.159e-02, 3.846e-02, -4.415e-02, -1.184e-01, 3.789e-02, -1.983e-02, 5.537e-02, 6.800e-02, -2.283e-02, 2.770e-02, 7.486e-03, 9.361e-02, -1.646e-02, 1.286e-02, -3.693e-02)); + r += mul(s3_7, M4(-4.379e-03, 5.815e-02, -4.533e-02, 2.994e-02, 1.210e-01, -3.824e-02, -4.413e-02, 1.902e-02, -2.501e-02, 6.130e-03, 6.869e-03, 4.077e-02, -6.249e-02, 5.397e-02, -2.537e-02, 5.363e-02)); + r += mul(s3_8, M4(3.918e-02, -5.312e-02, 1.762e-02, -9.313e-02, -1.263e-01, 2.528e-02, 4.375e-03, 1.167e-01, -3.206e-02, 2.798e-02, -1.405e-03, 6.325e-02, 1.910e-02, -4.898e-02, -6.247e-02, -6.990e-02)); + r += mul(s4_0, M4(-6.139e-02, 5.437e-02, 2.453e-02, -4.600e-02, 9.849e-02, 5.693e-02, 1.948e-02, -5.378e-02, 3.049e-02, 1.166e-02, -3.754e-02, 4.416e-02, 1.464e-01, -3.982e-02, -6.770e-02, -1.988e-02)); + r += mul(s4_1, M4(-6.238e-02, 9.132e-02, 2.410e-02, 5.254e-02, 1.018e-01, -1.521e-01, -4.153e-02, 7.522e-02, -3.337e-02, -1.002e-01, -1.748e-02, 2.526e-02, -6.027e-02, 8.025e-02, -6.918e-02, 8.623e-02)); + r += mul(s4_2, M4(-1.953e-02, 9.979e-03, 9.253e-03, -2.069e-02, 1.839e-02, -6.225e-02, -5.314e-02, 5.695e-02, 4.989e-02, -1.253e-01, 3.972e-02, 9.918e-05, -3.562e-02, 3.550e-03, -8.320e-02, -2.578e-02)); + r += mul(s4_3, M4(-7.979e-02, 5.597e-02, -4.311e-02, 7.581e-02, -4.559e-03, 1.095e-01, 1.003e-02, -1.280e-01, 5.793e-02, 1.157e-02, 6.575e-03, 3.772e-02, 2.158e-01, -6.676e-02, -3.684e-02, -2.057e-02)); + r += mul(s4_4, M4(-8.690e-02, -7.181e-02, 2.561e-02, 9.186e-02, -1.037e-01, 1.092e-01, -8.658e-02, 2.412e-01, -1.142e-01, -1.252e-02, -7.673e-02, 9.523e-02, 1.178e-01, -6.940e-02, -9.813e-03, 9.323e-02)); + r += mul(s4_5, M4(3.644e-02, 4.980e-02, -6.765e-02, -7.746e-02, -1.077e-02, 1.492e-01, 4.657e-02, 2.160e-02, 1.068e-01, 2.339e-02, -1.746e-02, 4.236e-02, 4.431e-02, 2.937e-02, -1.591e-01, -2.176e-02)); + r += mul(s4_6, M4(-7.619e-02, -7.187e-02, -5.336e-02, 9.697e-03, -4.500e-02, -1.539e-02, 1.061e-02, 5.191e-03, -3.706e-03, 4.739e-02, -6.342e-02, 2.837e-02, 1.057e-01, -7.125e-02, -1.231e-02, -5.644e-02)); + r += mul(s4_7, M4(-6.363e-02, -1.806e-02, -1.648e-02, 1.506e-01, 5.504e-02, -1.806e-02, 6.314e-02, -5.645e-02, -6.528e-02, 1.484e-01, -1.558e-01, 1.457e-01, -3.353e-02, 8.053e-02, 7.623e-03, 1.454e-01)); + r += mul(s4_8, M4(-1.243e-02, -8.742e-02, -4.783e-02, 1.893e-02, -2.716e-02, -4.186e-02, -7.215e-03, 9.908e-03, -1.342e-02, -7.125e-02, 3.621e-02, 3.714e-02, 3.796e-03, -3.665e-02, -3.065e-03, -1.327e-02)); + r += mul(s5_0, M4(-9.733e-02, 8.479e-02, 1.697e-01, -1.471e-01, 8.367e-02, -9.701e-02, -3.827e-02, -2.734e-02, -5.654e-02, -6.484e-02, -8.206e-02, 5.448e-02, 2.060e-02, -3.717e-02, -6.715e-02, 1.540e-02)); + r += mul(s5_1, M4(-2.993e-01, -5.256e-03, 2.617e-01, -1.863e-01, -8.625e-02, -1.183e-02, 4.416e-02, 1.162e-01, -5.994e-02, -1.620e-01, -6.483e-02, 6.804e-02, -1.188e-01, -1.673e-01, -6.238e-02, 7.633e-02)); + r += mul(s5_2, M4(-1.263e-01, -6.865e-02, 7.806e-02, -8.792e-02, 1.829e-02, -1.938e-01, -5.768e-03, -4.756e-02, -3.244e-03, -3.858e-02, -1.668e-02, -9.638e-03, 3.290e-02, 3.288e-02, -7.609e-02, 2.349e-02)); + r += mul(s5_3, M4(-4.874e-02, 5.302e-02, -9.153e-02, 1.294e-01, 2.020e-02, 2.900e-02, -3.263e-05, -5.333e-02, 2.962e-02, -1.130e-01, 7.812e-02, 2.745e-02, 7.177e-02, -3.814e-03, -3.659e-02, -3.620e-02)); + r += mul(s5_4, M4(-3.291e-01, -1.647e-01, -1.479e-01, -1.474e-01, 1.411e-02, -2.628e-02, -2.961e-01, 4.556e-01, -1.681e-01, -2.356e-01, -1.208e-02, 2.806e-04, -8.358e-02, -1.067e-01, 5.848e-02, -1.800e-01)); + r += mul(s5_5, M4(1.800e-02, 4.092e-02, -1.346e-01, -1.005e-02, -6.972e-02, 3.784e-02, 1.267e-01, 5.999e-03, 3.652e-02, 1.367e-02, -1.292e-01, 3.011e-02, 2.985e-02, 2.337e-02, -3.303e-02, -1.028e-01)); + r += mul(s5_6, M4(-1.082e-01, 1.075e-02, -6.635e-02, -6.803e-02, -9.433e-02, 1.506e-02, -1.537e-02, 7.209e-02, 5.416e-03, 6.978e-04, -5.429e-02, -3.363e-02, 2.783e-02, -3.711e-03, -7.219e-02, 1.395e-02)); + r += mul(s5_7, M4(-5.227e-02, -2.485e-02, 8.963e-02, -3.228e-02, -7.512e-03, 2.353e-02, -2.470e-02, -6.803e-03, 2.963e-02, 4.069e-02, -4.585e-02, 2.629e-02, 7.656e-03, -6.855e-03, -1.474e-02, 5.023e-02)); + r += mul(s5_8, M4(-2.069e-02, -5.281e-02, -5.002e-02, 1.623e-03, 4.042e-02, -6.268e-02, 6.316e-02, 8.014e-03, -1.306e-02, 5.471e-02, -7.529e-02, -3.545e-02, -1.707e-04, 1.186e-02, -3.595e-02, 1.790e-02)); + r += mul(s6_0, M4(3.302e-02, 4.418e-03, -8.796e-02, 6.241e-02, -5.939e-02, -7.157e-02, 3.014e-02, -5.200e-02, -4.419e-02, 4.946e-02, 1.235e-02, -5.338e-02, 1.106e-02, -4.460e-02, 2.467e-02, 1.632e-02)); + r += mul(s6_1, M4(-1.165e-02, 1.717e-02, -4.187e-02, -5.148e-02, 6.087e-02, 1.062e-02, 7.774e-02, 1.445e-01, -2.212e-01, -2.557e-01, 6.949e-02, -4.760e-02, 1.232e-02, 7.268e-02, 2.847e-02, -3.986e-02)); + r += mul(s6_2, M4(1.145e-01, -3.325e-02, -1.196e-01, 1.100e-01, 2.360e-02, 2.100e-02, 7.523e-02, -4.792e-02, 6.078e-03, -9.426e-02, -1.088e-01, 3.007e-02, 1.658e-02, 3.352e-02, -6.354e-02, 1.246e-02)); + r += mul(s6_3, M4(1.542e-02, -6.429e-02, -4.317e-02, 4.337e-02, -8.402e-02, 7.670e-03, -8.973e-02, 1.792e-03, -1.674e-01, 4.020e-02, 4.307e-02, 3.281e-02, 1.149e-01, 1.195e-02, -4.625e-02, 7.528e-02)); + r += mul(s6_4, M4(1.931e-02, -2.168e-02, 1.550e-03, -2.958e-02, 7.771e-02, 8.194e-02, -3.328e-04, 2.268e-02, -2.748e-01, -1.987e-01, 4.499e-02, -1.982e-01, 1.425e-02, 1.992e-02, -1.424e-02, 1.139e-02)); + r += mul(s6_5, M4(1.625e-01, -1.027e-01, 1.266e-01, -1.504e-01, 2.596e-02, 2.140e-02, 9.871e-03, 1.368e-01, 2.167e-02, 6.120e-02, 3.605e-02, -1.278e-01, -3.467e-02, 4.308e-02, 8.625e-02, -2.846e-02)); + r += mul(s6_6, M4(3.814e-02, 7.893e-03, 2.085e-02, -2.793e-02, 3.963e-02, -2.560e-02, 3.084e-02, -6.913e-03, -1.248e-01, 9.007e-02, -1.532e-03, 5.562e-02, 3.947e-02, 1.783e-02, -1.404e-01, 3.774e-03)); + r += mul(s6_7, M4(1.173e-01, 2.692e-03, 9.529e-02, -6.932e-02, 3.259e-02, -1.284e-01, 4.636e-02, 8.723e-03, -8.999e-03, -3.237e-02, 3.534e-02, -3.240e-02, 5.066e-02, 3.524e-02, -5.775e-02, -8.170e-02)); + r += mul(s6_8, M4(1.331e-01, 2.378e-02, -1.813e-02, -3.624e-02, 1.946e-02, 2.003e-02, -1.204e-01, 2.374e-02, -5.333e-02, 1.888e-02, 4.138e-02, 2.068e-02, -5.053e-02, 3.120e-02, 1.305e-04, 2.335e-02)); + r += mul(s7_0, M4(-8.931e-03, 8.554e-03, -3.430e-02, 3.480e-02, 7.631e-03, 2.015e-02, -4.250e-02, 2.783e-03, -5.531e-02, 3.953e-02, -2.883e-02, 1.964e-02, 1.614e-02, -2.080e-02, 9.910e-02, -7.998e-02)); + r += mul(s7_1, M4(-1.219e-01, 3.186e-02, 6.263e-02, -6.382e-02, -7.854e-03, -1.781e-01, -2.866e-02, -2.123e-03, -1.213e-01, -1.335e-01, 6.055e-03, -5.242e-02, 7.391e-02, -2.211e-02, 2.785e-02, 9.733e-03)); + r += mul(s7_2, M4(-5.083e-02, 7.194e-02, 1.615e-03, 5.407e-02, 3.814e-02, -6.499e-02, 1.024e-01, -3.536e-02, 2.454e-02, -5.554e-02, 2.108e-02, -6.028e-02, -2.086e-02, 9.931e-02, -1.481e-02, 1.780e-03)); + r += mul(s7_3, M4(4.580e-02, -2.151e-02, -2.859e-02, -6.446e-02, -1.209e-01, -6.494e-02, 1.413e-01, -5.427e-03, 1.644e-01, 1.515e-02, 1.075e-02, -2.694e-02, 3.681e-02, -8.655e-05, -1.217e-01, 2.980e-02)); + r += mul(s7_4, M4(5.985e-03, 5.021e-02, -4.879e-02, 5.464e-03, -1.363e-01, -8.347e-03, 1.775e-01, -7.439e-02, 3.814e-02, -1.607e-01, -3.868e-02, 4.007e-02, 1.037e-01, -4.700e-02, -4.177e-02, -1.731e-02)); + r += mul(s7_5, M4(5.141e-02, -8.676e-02, 6.358e-02, 4.758e-02, 6.764e-02, -3.836e-02, -3.477e-02, 6.865e-02, -1.267e-01, -1.556e-01, -1.500e-02, -1.478e-02, -1.100e-02, 8.284e-02, 9.609e-02, -4.106e-02)); + r += mul(s7_6, M4(-4.043e-02, 3.920e-02, -2.731e-02, 1.030e-02, 2.214e-02, -3.011e-02, 7.946e-03, -5.982e-02, -5.416e-02, 1.414e-02, 7.464e-02, 1.179e-02, 2.691e-02, -2.210e-02, 6.503e-02, -9.833e-02)); + r += mul(s7_7, M4(-7.673e-02, 9.742e-03, 2.093e-02, -4.170e-02, 2.232e-02, -2.862e-02, -6.734e-02, 4.183e-02, 1.447e-02, 4.077e-02, 3.989e-03, 6.437e-02, 1.317e-02, 4.673e-03, -4.076e-02, -1.647e-01)); + r += mul(s7_8, M4(-5.286e-02, 4.923e-02, -9.718e-02, -2.947e-02, -3.456e-02, 1.459e-02, 3.428e-02, 2.702e-02, -1.755e-02, -2.393e-02, -1.549e-02, 4.208e-02, -5.462e-02, 4.807e-02, -1.638e-03, -1.464e-02)); + r += V4(-7.666e-03, 7.394e-03, -3.562e-03, -1.228e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.406e-03, -2.751e-02, 2.389e-02, -2.671e-02, 9.890e-02, 6.479e-02, 4.287e-01, 2.984e-02, 7.607e-02, -3.654e-02, -1.230e-01, 1.394e-02, 1.981e-02, 1.227e-01, -6.546e-02, 4.504e-02)); + r += mul(s0_1, M4(-6.942e-03, 8.442e-03, -3.946e-03, -8.129e-02, 8.599e-02, 8.176e-02, -7.871e-02, 5.741e-02, 2.062e-01, 1.058e-02, -5.040e-02, 4.902e-03, 1.612e-01, 1.634e-02, -4.898e-02, -9.324e-02)); + r += mul(s0_2, M4(-2.069e-02, -2.451e-02, 2.637e-02, -5.626e-03, 6.668e-02, 4.335e-02, 1.039e-01, 8.448e-02, 1.873e-01, 3.773e-03, -2.043e-01, -8.218e-03, -8.518e-03, 5.852e-02, 2.851e-02, 2.286e-02)); + r += mul(s0_3, M4(1.137e-01, -1.327e-01, -1.789e-02, -1.100e-01, 9.085e-04, -4.401e-02, 1.561e-01, 6.896e-02, -2.459e-02, 1.479e-01, 1.197e-01, 1.124e-01, -1.879e-01, 3.314e-01, 1.166e-01, 1.139e-01)); + r += mul(s0_4, M4(1.211e-01, -3.114e-02, 2.034e-02, -8.814e-02, 9.038e-02, 6.309e-02, 2.457e-02, 1.404e-01, 3.895e-02, 2.724e-01, 5.470e-02, -1.407e-01, -8.583e-02, -4.419e-02, -2.949e-01, -1.601e-01)); + r += mul(s0_5, M4(-6.675e-03, 5.540e-02, 1.858e-02, -2.243e-02, 2.324e-02, 1.130e-02, 1.134e-02, 1.121e-01, -2.965e-02, -2.725e-02, -2.064e-01, -1.259e-01, -6.429e-02, -4.605e-02, -4.295e-02, 5.142e-02)); + r += mul(s0_6, M4(-4.172e-02, -1.160e-01, -1.172e-01, -7.617e-03, 9.909e-02, 2.588e-02, 1.209e-01, -6.817e-02, 1.413e-01, -8.298e-02, -2.084e-03, -1.172e-01, -4.519e-02, 3.606e-02, 4.065e-02, 2.651e-01)); + r += mul(s0_7, M4(1.167e-01, -6.684e-02, -1.963e-02, 6.528e-02, 1.105e-01, 7.529e-02, 3.613e-02, -1.494e-01, 1.167e-01, -7.575e-02, 1.563e-01, -7.482e-02, 3.625e-02, -9.058e-02, -4.330e-02, 1.325e-01)); + r += mul(s0_8, M4(1.954e-02, 1.990e-03, 2.500e-02, -9.505e-02, 1.029e-01, -6.343e-03, 1.386e-01, -6.028e-02, 7.436e-03, 5.685e-02, 1.285e-01, 9.548e-03, 1.340e-02, -2.372e-02, -3.000e-02, -3.586e-02)); + r += mul(s1_0, M4(1.495e-01, -7.398e-02, 5.967e-02, -3.157e-02, -2.459e-02, 5.986e-02, 1.171e-02, 1.771e-02, 2.790e-02, 2.876e-02, 1.154e-02, 7.359e-03, -3.539e-03, 7.870e-02, -1.706e-02, 2.702e-03)); + r += mul(s1_1, M4(4.107e-02, 3.702e-02, 1.844e-02, -1.665e-02, -9.427e-02, 1.276e-02, -1.905e-01, -3.633e-02, -4.983e-02, -1.865e-02, -4.267e-02, 5.087e-02, 8.844e-02, -1.910e-02, 2.550e-02, -3.833e-02)); + r += mul(s1_2, M4(-1.749e-02, 1.086e-02, 6.475e-02, 5.569e-03, -3.156e-02, -1.079e-02, -1.211e-02, -4.422e-02, 3.946e-02, -5.961e-02, -3.821e-02, -6.025e-02, 1.419e-02, 1.226e-02, 8.469e-02, 1.532e-03)); + r += mul(s1_3, M4(-1.309e-02, -6.999e-02, 2.224e-02, 1.539e-02, -8.830e-02, 9.210e-04, -1.607e-01, 1.475e-02, 1.783e-02, -1.197e-02, -2.315e-02, 3.797e-03, -5.802e-02, 6.780e-02, -8.185e-02, -1.038e-02)); + r += mul(s1_4, M4(1.126e-01, 5.824e-02, -1.136e-01, -1.214e-01, 7.254e-02, 1.392e-01, 4.652e-02, 4.163e-03, -5.078e-03, 1.296e-01, -1.353e-02, 1.771e-02, -7.262e-02, -1.636e-01, 1.676e-01, -1.440e-01)); + r += mul(s1_5, M4(-2.273e-02, -7.281e-02, -9.637e-02, 5.021e-02, -3.087e-03, 6.811e-02, -8.194e-02, -1.708e-02, -5.312e-02, 5.169e-03, -4.103e-02, -8.083e-02, 4.278e-03, -1.891e-02, 4.832e-02, 9.931e-02)); + r += mul(s1_6, M4(3.184e-02, 1.020e-01, 4.867e-02, 1.185e-01, -3.356e-02, 3.062e-02, -2.851e-03, 1.745e-03, -6.452e-02, 5.120e-02, 1.792e-02, 1.147e-02, 6.849e-02, -9.902e-02, -4.764e-02, 5.710e-02)); + r += mul(s1_7, M4(2.578e-02, 1.367e-01, -1.210e-02, -1.233e-01, -1.480e-02, -2.218e-02, -7.835e-02, -8.176e-02, -5.371e-03, 7.040e-02, 2.773e-02, 3.655e-02, -5.689e-02, 2.871e-02, 1.214e-01, 9.351e-02)); + r += mul(s1_8, M4(-6.080e-02, 1.763e-02, -3.189e-02, -1.273e-01, 3.649e-02, -4.435e-02, 3.761e-02, -7.953e-03, 2.349e-02, -3.095e-02, 1.142e-02, 4.297e-03, -2.715e-02, 4.936e-02, 1.082e-02, 9.799e-02)); + r += mul(s2_0, M4(-2.776e-02, 1.293e-01, -1.783e-01, 1.529e-01, -2.290e-02, -8.428e-02, 4.169e-02, 1.767e-02, 1.679e-02, 2.074e-02, 1.537e-01, -8.099e-02, 4.739e-02, -3.675e-02, 1.037e-03, -6.920e-02)); + r += mul(s2_1, M4(-2.531e-02, -2.582e-02, 1.969e-02, -1.393e-02, -5.260e-02, 1.181e-01, -6.617e-02, 5.117e-02, -3.257e-03, 5.455e-02, 6.795e-02, 3.913e-02, 5.635e-02, -9.876e-03, 1.546e-01, -3.741e-02)); + r += mul(s2_2, M4(4.920e-02, 6.231e-02, 9.032e-02, 1.876e-01, -8.728e-03, -3.089e-02, -1.952e-02, -2.917e-02, -9.372e-02, 8.909e-03, -4.601e-02, 1.724e-01, -4.023e-02, 7.348e-02, -9.672e-03, -5.886e-02)); + r += mul(s2_3, M4(-7.343e-03, 1.215e-01, 7.028e-02, 6.470e-02, 3.450e-02, -1.520e-01, -1.476e-02, -2.044e-02, 2.039e-01, -8.401e-02, -3.152e-02, 5.093e-03, 8.280e-02, -1.049e-01, 2.218e-01, -3.287e-03)); + r += mul(s2_4, M4(1.706e-01, -2.375e-01, 2.617e-02, 1.590e-01, 2.274e-01, 2.259e-01, 1.094e-01, 1.597e-01, -4.165e-01, 3.057e-01, -2.471e-01, -1.313e-01, 8.047e-02, 1.695e-01, -6.232e-02, 7.251e-02)); + r += mul(s2_5, M4(1.763e-01, -6.319e-02, 2.457e-01, 1.073e-01, -4.014e-02, -1.392e-02, -6.100e-02, -9.742e-02, 2.239e-01, 1.344e-01, 1.617e-01, 1.206e-01, 1.906e-01, -2.518e-02, 9.673e-02, 9.477e-02)); + r += mul(s2_6, M4(-1.280e-01, 1.866e-02, 1.189e-02, 1.655e-01, 2.222e-02, -5.139e-02, 5.157e-03, -7.063e-03, 6.341e-03, -6.532e-02, -8.119e-03, 2.360e-01, 2.154e-01, 2.693e-02, -8.017e-02, -1.507e-01)); + r += mul(s2_7, M4(-1.199e-01, 2.403e-02, -1.075e-01, 1.460e-01, 6.546e-03, 5.782e-02, 3.762e-02, 1.417e-02, -1.522e-01, 1.086e-01, 8.618e-03, -1.528e-03, 8.101e-02, -1.232e-01, -1.925e-02, -3.502e-01)); + r += mul(s2_8, M4(3.414e-02, -9.485e-02, -1.213e-01, 1.146e-01, 5.566e-02, 2.470e-02, 2.864e-02, 7.492e-03, 1.215e-03, 6.868e-03, 1.428e-01, 5.823e-02, 3.237e-02, 5.224e-02, -1.235e-02, 5.870e-03)); + r += mul(s3_0, M4(7.992e-03, 2.747e-02, 6.278e-04, 2.961e-02, -8.173e-02, -4.045e-02, -5.339e-02, -2.496e-02, 2.246e-03, -3.510e-02, -7.723e-02, -3.039e-02, 8.896e-02, -6.327e-02, 5.694e-02, -4.376e-02)); + r += mul(s3_1, M4(6.935e-02, 6.694e-02, 1.354e-01, 5.321e-03, 1.836e-03, 1.534e-01, -9.230e-02, 6.509e-02, -7.726e-02, 2.394e-02, 9.035e-02, 2.274e-02, -1.634e-02, -9.872e-02, 1.383e-01, -4.576e-03)); + r += mul(s3_2, M4(-1.918e-02, 5.700e-02, -1.148e-02, -5.842e-03, 3.914e-02, -7.829e-02, -7.173e-02, -3.783e-02, -6.737e-03, 3.288e-02, 1.354e-02, 1.546e-02, -6.535e-02, 1.595e-02, 7.187e-02, 4.985e-02)); + r += mul(s3_3, M4(4.359e-02, -9.839e-02, 2.936e-02, -6.255e-02, 3.457e-02, -8.771e-02, -6.488e-02, -5.237e-02, 5.229e-02, 1.695e-01, 3.978e-02, 4.679e-02, 5.440e-02, -5.050e-02, -8.147e-02, 2.267e-03)); + r += mul(s3_4, M4(1.393e-01, -7.356e-02, 1.537e-01, -4.392e-03, 6.915e-02, 3.751e-01, 9.750e-03, 1.061e-01, -2.043e-01, 1.080e-01, -5.034e-02, 7.642e-02, 1.655e-01, 8.132e-02, 1.714e-01, -4.305e-03)); + r += mul(s3_5, M4(-4.915e-02, 4.699e-03, 1.028e-01, -1.004e-01, -1.316e-02, -4.125e-02, 2.526e-02, -8.864e-02, 3.462e-02, 2.840e-02, -2.860e-02, 3.815e-02, 4.406e-02, -4.006e-02, 1.662e-02, 1.365e-01)); + r += mul(s3_6, M4(-9.936e-02, -4.912e-02, -2.813e-02, -3.454e-02, -2.135e-02, 4.073e-02, 2.829e-02, 7.408e-03, 8.254e-04, -1.549e-02, -3.958e-02, 6.670e-02, 1.801e-02, -6.790e-02, -1.916e-02, -2.464e-02)); + r += mul(s3_7, M4(5.843e-02, -6.666e-02, 3.866e-02, -6.407e-03, -1.351e-02, -1.384e-02, -7.414e-02, -2.238e-02, 2.566e-02, -6.237e-02, -2.566e-02, -7.310e-02, 5.082e-02, -6.057e-02, -6.063e-03, -7.529e-02)); + r += mul(s3_8, M4(4.991e-02, 8.658e-02, 1.033e-01, -1.294e-02, 1.401e-01, -1.581e-02, 8.427e-03, 8.901e-02, -7.779e-03, -3.619e-02, -2.896e-02, -3.628e-02, 4.216e-02, -1.539e-02, 6.614e-02, 1.200e-02)); + r += mul(s4_0, M4(3.201e-02, -2.314e-02, -9.402e-02, -5.038e-02, -1.166e-01, 5.423e-02, 6.861e-02, 1.893e-02, -1.840e-02, 9.847e-03, 6.309e-02, 1.178e-02, 2.019e-02, -5.657e-02, 1.314e-01, 1.558e-02)); + r += mul(s4_1, M4(-2.175e-03, 1.075e-02, 9.153e-02, -1.479e-02, -2.999e-02, -9.316e-02, -1.268e-01, 1.921e-02, -3.825e-02, 9.109e-02, 4.885e-03, -1.273e-02, 1.376e-01, -1.753e-01, -1.395e-02, 1.021e-02)); + r += mul(s4_2, M4(1.570e-02, 5.170e-02, -5.630e-02, -1.281e-02, 1.520e-02, -5.054e-03, 7.073e-03, -5.578e-02, -4.189e-02, 8.996e-02, -3.176e-02, 1.501e-02, 2.722e-02, -1.370e-02, 1.001e-01, 2.858e-02)); + r += mul(s4_3, M4(-5.160e-02, -7.019e-03, 6.312e-02, 6.056e-02, 3.499e-02, -1.070e-01, 5.695e-02, -3.497e-03, 7.793e-02, 8.020e-02, 3.886e-02, -5.442e-02, 3.345e-03, -5.526e-02, 1.428e-01, 3.020e-03)); + r += mul(s4_4, M4(2.241e-02, 1.776e-01, 2.629e-02, 1.097e-01, -1.603e-01, 2.221e-02, -1.667e-02, 1.837e-01, -6.043e-02, 2.459e-02, -1.244e-01, -5.165e-02, 1.008e-01, -3.515e-01, 9.694e-02, -1.964e-02)); + r += mul(s4_5, M4(-2.891e-02, 3.557e-03, -6.549e-02, 4.918e-02, -1.580e-01, 8.213e-03, -1.304e-02, -6.337e-02, -1.537e-01, -2.118e-02, -1.707e-01, -3.469e-02, 1.904e-01, -9.866e-02, 1.235e-01, 1.615e-01)); + r += mul(s4_6, M4(7.846e-03, -2.377e-02, 5.844e-02, 2.195e-02, 1.136e-02, -4.959e-02, 7.519e-03, -6.324e-02, 2.230e-02, -2.616e-02, 5.732e-02, 6.415e-02, 2.430e-02, -3.475e-02, -1.582e-02, -4.139e-02)); + r += mul(s4_7, M4(-1.231e-01, 2.511e-02, -7.017e-02, -1.087e-02, 7.429e-02, -2.531e-02, -3.772e-03, 3.162e-02, 4.332e-02, -1.309e-01, 3.291e-02, 1.481e-02, -4.270e-02, -8.337e-02, -3.627e-02, 1.626e-01)); + r += mul(s4_8, M4(5.311e-02, -5.032e-02, 6.109e-02, 1.034e-02, -3.633e-02, 3.182e-02, 1.081e-02, -7.439e-03, 5.875e-02, 6.472e-02, 3.018e-02, 7.406e-03, 2.786e-02, 2.914e-03, 4.435e-02, 1.405e-01)); + r += mul(s5_0, M4(-6.742e-02, -9.053e-02, -4.419e-01, -1.174e-01, 2.639e-02, -5.764e-02, 7.424e-02, 5.791e-02, -2.769e-02, 6.398e-02, -1.846e-02, -2.339e-02, -2.575e-02, 3.780e-02, 1.045e-02, -1.989e-02)); + r += mul(s5_1, M4(-2.244e-02, 1.453e-01, 9.197e-04, -1.394e-03, 1.953e-01, -9.646e-02, 3.490e-02, 6.717e-02, 1.967e-02, 4.580e-02, -1.188e-01, -2.916e-02, 4.751e-02, -7.756e-02, -2.299e-01, 1.251e-03)); + r += mul(s5_2, M4(-6.215e-02, 5.819e-02, -1.518e-01, -1.638e-01, 1.215e-02, 3.033e-02, 9.672e-03, -2.266e-03, 1.194e-02, 6.312e-02, 2.088e-03, -5.166e-02, -1.069e-01, -7.006e-02, 2.260e-02, 4.479e-03)); + r += mul(s5_3, M4(-2.098e-01, -1.045e-01, -1.693e-01, 1.107e-02, 2.776e-02, -7.437e-02, -4.281e-02, -4.362e-02, 8.969e-03, 1.298e-01, -2.149e-02, 2.569e-02, -8.281e-02, -8.143e-03, 4.913e-02, -4.221e-03)); + r += mul(s5_4, M4(1.898e-02, 1.908e-01, 8.051e-02, 7.461e-02, 9.440e-02, -3.972e-02, 2.920e-02, -2.677e-02, -1.486e-01, 5.343e-02, 4.775e-02, 1.144e-02, 1.852e-02, -2.526e-01, 4.969e-02, -4.623e-02)); + r += mul(s5_5, M4(-8.362e-02, -3.926e-02, -2.237e-01, -6.170e-02, 6.227e-02, 6.022e-02, 1.709e-01, 1.088e-01, -1.182e-01, -7.202e-02, -7.402e-02, 4.212e-02, 1.368e-01, -3.192e-02, -6.101e-02, 1.636e-01)); + r += mul(s5_6, M4(-4.502e-02, -5.282e-02, 7.133e-04, -9.106e-02, -6.107e-02, 9.297e-03, 2.709e-03, -2.718e-02, 5.338e-02, 3.576e-02, -5.048e-04, -5.170e-03, 8.932e-03, -6.275e-02, -1.041e-03, -1.522e-02)); + r += mul(s5_7, M4(-1.108e-01, -6.070e-02, -3.770e-02, 4.358e-03, 2.461e-02, -3.630e-02, -5.192e-02, -1.041e-01, 7.605e-03, 3.053e-02, 4.332e-02, 5.214e-02, 1.711e-02, -1.180e-02, 2.084e-03, 6.377e-02)); + r += mul(s5_8, M4(9.699e-02, -2.121e-02, 4.909e-02, -1.573e-01, 4.100e-02, 1.046e-02, 7.242e-02, 8.504e-02, 2.716e-02, -3.748e-02, 2.484e-02, -1.300e-01, 9.252e-03, -5.036e-03, 2.020e-02, 3.686e-02)); + r += mul(s6_0, M4(-5.980e-02, 2.806e-02, 6.706e-02, 5.784e-02, 1.031e-01, -5.117e-02, 8.907e-03, -9.963e-03, -5.161e-03, -2.655e-02, -8.112e-02, -7.795e-03, 3.048e-02, 2.583e-02, 4.491e-02, 3.224e-02)); + r += mul(s6_1, M4(3.521e-02, 1.141e-01, 7.327e-02, 1.688e-02, 1.047e-01, 2.850e-02, -1.072e-01, -1.467e-01, -1.225e-02, 1.753e-02, -1.501e-03, -3.968e-02, -2.848e-02, -1.620e-02, -4.136e-02, 6.977e-03)); + r += mul(s6_2, M4(-1.329e-01, 1.051e-02, 9.320e-02, -3.299e-02, 5.434e-02, 2.181e-02, 5.300e-02, -8.050e-04, -1.119e-01, 2.738e-02, 4.885e-03, -6.312e-02, -1.215e-02, -5.119e-03, -7.759e-03, -2.041e-02)); + r += mul(s6_3, M4(-1.590e-02, 3.348e-03, 4.274e-02, 2.294e-02, -1.278e-01, 2.225e-02, 2.812e-02, -8.807e-03, 5.856e-02, 4.580e-02, 3.058e-02, 3.450e-03, 5.969e-02, -6.998e-03, 1.788e-02, 1.617e-02)); + r += mul(s6_4, M4(-2.731e-02, 2.275e-01, -9.676e-02, 1.035e-01, -3.410e-02, -4.834e-02, 1.038e-01, -1.541e-01, -7.447e-02, 7.343e-02, -7.526e-02, -2.714e-02, 3.142e-01, -4.096e-02, 2.155e-02, 8.973e-02)); + r += mul(s6_5, M4(-2.646e-02, -1.506e-02, -1.467e-01, -1.181e-01, 1.866e-02, -4.026e-02, 6.877e-03, 5.960e-02, -1.941e-02, 5.781e-02, -1.558e-01, -9.129e-02, 1.407e-02, 5.350e-02, 2.022e-02, -1.799e-02)); + r += mul(s6_6, M4(1.897e-02, 8.663e-02, 7.349e-02, 7.148e-03, -1.309e-02, 3.192e-02, 5.372e-02, 3.611e-02, -7.302e-03, 2.931e-02, 2.886e-02, 4.663e-02, -1.019e-01, -5.014e-02, -3.633e-02, 4.043e-02)); + r += mul(s6_7, M4(-7.477e-02, -2.522e-03, -3.245e-02, -1.565e-02, 8.996e-02, 8.576e-02, 9.061e-02, -4.715e-02, 2.940e-02, -6.892e-03, -5.590e-02, 3.380e-02, -5.756e-02, 9.486e-02, 9.597e-02, -8.792e-02)); + r += mul(s6_8, M4(-2.186e-02, -8.656e-02, -1.093e-01, -3.317e-02, 4.744e-02, -7.635e-03, 1.268e-01, -2.545e-02, -1.122e-01, 2.642e-02, -1.119e-01, -1.527e-02, 6.516e-03, -8.017e-04, 2.662e-03, -8.226e-02)); + r += mul(s7_0, M4(-2.019e-02, -3.796e-02, -5.359e-02, 8.589e-03, 1.746e-02, 7.377e-02, 9.361e-02, 3.278e-02, -8.813e-02, 2.210e-02, -1.049e-02, 1.715e-02, 1.041e-01, -4.550e-05, 4.012e-03, 2.279e-02)); + r += mul(s7_1, M4(1.146e-01, 5.459e-02, -6.725e-02, -9.273e-03, -2.321e-02, 1.333e-01, -1.210e-01, 1.025e-03, 3.982e-02, -1.381e-01, -1.400e-02, -2.607e-02, -6.757e-02, -4.364e-02, -5.068e-03, 4.358e-02)); + r += mul(s7_2, M4(7.601e-02, -3.050e-02, -1.273e-02, -3.718e-02, 4.217e-02, 7.081e-03, 4.354e-02, -1.709e-02, -5.820e-02, 1.073e-01, 8.721e-03, 7.082e-04, 1.634e-02, 3.915e-02, -3.449e-02, -9.085e-03)); + r += mul(s7_3, M4(-1.358e-02, -1.004e-02, -8.832e-02, -1.193e-01, -2.779e-02, 1.960e-01, 2.438e-02, 2.159e-03, 6.318e-02, -4.563e-02, 6.732e-02, -8.724e-02, -4.285e-02, 8.821e-02, 2.702e-02, 4.149e-02)); + r += mul(s7_4, M4(3.593e-02, 2.196e-02, 2.173e-02, -4.889e-02, 7.567e-03, -3.813e-02, 5.501e-02, -5.377e-02, 2.046e-01, -1.932e-01, -2.858e-01, 4.373e-02, 1.428e-01, 1.579e-01, 1.093e-01, 2.253e-02)); + r += mul(s7_5, M4(-1.377e-01, -1.132e-02, -5.150e-02, -6.566e-02, -6.390e-03, -3.158e-02, 4.235e-02, -6.105e-02, 1.130e-01, -4.081e-02, 5.491e-02, 1.827e-01, -2.194e-02, 1.053e-01, -6.559e-02, -4.982e-02)); + r += mul(s7_6, M4(-6.669e-02, 3.382e-03, -2.385e-02, -2.175e-02, -2.203e-02, 5.512e-02, 2.337e-02, 1.053e-02, 2.608e-03, -3.030e-02, 3.556e-03, 2.730e-02, 8.774e-02, 1.053e-02, -1.591e-02, -8.692e-02)); + r += mul(s7_7, M4(-1.432e-01, 4.026e-02, 1.347e-01, -2.317e-02, 8.572e-02, 1.513e-02, -5.679e-02, -9.989e-02, -5.869e-02, -1.023e-01, -4.056e-02, 1.494e-01, -2.377e-02, -1.866e-02, -6.367e-02, -2.192e-01)); + r += mul(s7_8, M4(4.881e-02, 1.017e-02, -3.493e-02, -9.660e-02, 3.625e-02, 2.386e-02, 9.414e-02, 3.077e-02, -7.044e-02, -2.566e-02, -2.265e-02, 2.627e-02, 6.538e-02, 4.887e-02, -4.800e-02, -3.489e-02)); + r += V4(4.173e-03, 1.409e-02, -1.527e-02, -1.778e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.167e-02, 7.113e-02, 3.822e-03, -1.144e-02, -2.224e-01, 1.053e-02, 3.899e-02, 1.143e-02, 6.980e-03, 1.377e-02, -2.633e-02, -1.480e-02, -2.798e-02, 8.914e-02, -5.357e-03, -9.741e-02)); + r += mul(s0_1, M4(7.215e-02, -2.533e-02, 8.928e-02, -3.974e-02, -1.013e-01, 9.533e-02, -4.489e-02, 2.747e-02, 3.177e-01, -2.748e-01, 7.242e-02, -2.502e-02, 2.636e-01, 5.869e-02, -1.809e-01, -9.835e-02)); + r += mul(s0_2, M4(-8.355e-05, -4.128e-02, -4.788e-02, -1.433e-02, -8.908e-02, -3.683e-02, 4.406e-02, 9.769e-02, -3.560e-02, -1.459e-02, 1.996e-01, -4.799e-02, -4.419e-03, -4.064e-02, 1.307e-02, -4.230e-02)); + r += mul(s0_3, M4(5.972e-02, -8.597e-02, -3.847e-02, 1.258e-02, -1.871e-01, 1.066e-01, 1.463e-01, -2.473e-02, -1.240e-01, -5.041e-02, 1.561e-02, 7.376e-02, -2.178e-02, 4.002e-02, 1.836e-01, -1.051e-01)); + r += mul(s0_4, M4(6.370e-02, -2.171e-02, 8.220e-02, -9.885e-02, -2.903e-01, -8.983e-02, -7.046e-02, -1.940e-01, 5.303e-02, 1.998e-01, -4.619e-01, -4.693e-02, -1.250e-01, -2.295e-01, -1.236e-01, -2.091e-01)); + r += mul(s0_5, M4(5.260e-02, -6.406e-02, -3.290e-02, -4.443e-03, -6.417e-02, 1.516e-02, -2.519e-02, -3.230e-02, 3.332e-02, -5.458e-01, -1.850e-01, 8.340e-02, -2.379e-02, 7.705e-02, -2.979e-02, 2.320e-02)); + r += mul(s0_6, M4(3.036e-03, 4.029e-02, -7.705e-02, 1.360e-01, -1.734e-01, 4.693e-02, 1.519e-01, 1.149e-01, -8.603e-02, 2.361e-02, 4.493e-02, -1.080e-01, -3.285e-02, -2.896e-02, 4.286e-02, -5.755e-02)); + r += mul(s0_7, M4(4.018e-02, -8.063e-02, 3.551e-02, -1.668e-01, -2.488e-01, 7.881e-02, 5.395e-02, -5.390e-02, -1.344e-01, -1.297e-01, 1.513e-01, -1.432e-01, 5.492e-02, 2.255e-01, -6.271e-02, -1.744e-01)); + r += mul(s0_8, M4(-5.268e-03, 1.191e-01, 3.927e-02, 1.076e-02, -2.728e-01, 8.725e-02, 1.057e-01, -1.054e-01, 1.970e-01, 1.924e-01, 6.983e-02, 1.276e-01, -1.113e-01, -1.624e-01, -4.850e-02, -7.036e-02)); + r += mul(s1_0, M4(-1.426e-01, 2.334e-02, -1.094e-02, -7.407e-02, -6.792e-02, 2.777e-02, 3.094e-02, -1.318e-02, -5.514e-03, -4.333e-02, 7.728e-04, -5.478e-03, -1.044e-01, -6.804e-02, 5.295e-02, 9.620e-03)); + r += mul(s1_1, M4(5.492e-02, -4.866e-02, 5.931e-02, 6.582e-02, -5.343e-02, 1.568e-01, -5.726e-02, 3.203e-02, -5.349e-02, 1.842e-03, -9.329e-03, 2.341e-03, 4.785e-02, 6.863e-02, -8.660e-02, -6.837e-02)); + r += mul(s1_2, M4(-2.827e-02, 5.348e-02, 3.606e-02, 1.615e-02, -3.296e-02, -1.077e-02, -1.476e-02, -1.344e-02, 1.092e-02, -7.954e-02, 2.188e-02, -7.613e-02, -9.541e-02, -9.093e-02, 7.970e-02, 5.269e-03)); + r += mul(s1_3, M4(-1.361e-03, -8.518e-02, -9.498e-02, 7.772e-02, 2.565e-02, 8.882e-02, 6.306e-02, -4.711e-02, -1.616e-02, 6.767e-02, 5.313e-02, -9.962e-02, 3.099e-02, -4.796e-02, 6.080e-02, 2.616e-02)); + r += mul(s1_4, M4(-5.855e-02, 2.104e-02, -9.982e-02, 9.721e-02, 1.517e-01, -9.210e-02, -8.640e-02, -8.518e-02, 6.706e-02, 2.991e-02, -1.464e-02, -6.294e-02, -2.638e-02, 1.248e-02, -3.616e-02, 1.223e-01)); + r += mul(s1_5, M4(7.110e-02, -7.086e-02, 6.703e-03, 3.567e-02, 5.312e-02, -6.496e-03, 8.943e-03, -5.466e-02, 1.895e-02, -3.669e-02, 2.584e-02, -2.765e-03, 3.016e-03, 7.781e-02, 2.094e-02, 8.166e-02)); + r += mul(s1_6, M4(-1.128e-01, 4.150e-02, 7.408e-02, 9.683e-02, 6.146e-02, -4.690e-02, 4.417e-02, 2.410e-02, 1.917e-02, -2.390e-03, -2.845e-02, -6.984e-03, -1.097e-01, -6.451e-02, -4.617e-02, -2.599e-02)); + r += mul(s1_7, M4(-5.970e-03, -2.779e-01, 4.142e-02, -1.805e-02, 4.826e-02, -7.818e-03, -1.021e-01, 1.013e-02, -2.918e-02, 1.025e-01, 1.875e-02, 1.042e-02, -1.240e-02, 1.919e-01, -5.672e-03, -4.655e-02)); + r += mul(s1_8, M4(4.831e-02, 1.212e-01, -1.350e-01, -6.402e-02, 9.789e-02, -4.909e-03, -2.798e-02, -6.644e-02, -9.080e-03, -1.093e-01, -5.033e-02, -1.946e-02, -1.293e-01, -2.352e-02, 4.836e-02, -1.554e-02)); + r += mul(s2_0, M4(-7.880e-02, 4.960e-02, 1.506e-01, 6.567e-03, 1.914e-02, -5.041e-02, 3.384e-02, -5.016e-02, -7.930e-02, -1.876e-01, -4.583e-02, 1.982e-01, 1.903e-01, 8.947e-02, 4.232e-02, -2.569e-02)); + r += mul(s2_1, M4(2.007e-01, -1.006e-01, 6.364e-02, -1.107e-01, 1.316e-01, -4.705e-02, 3.292e-02, -5.835e-02, 4.873e-01, 2.395e-01, -1.672e-01, 3.838e-02, 7.992e-04, 5.969e-03, 2.629e-01, 2.465e-02)); + r += mul(s2_2, M4(-2.404e-02, -4.473e-02, 1.559e-01, 6.163e-02, -4.415e-02, -6.521e-02, -4.490e-02, -6.924e-02, -2.854e-03, -1.285e-01, 2.472e-01, 6.413e-02, -1.113e-01, -4.878e-02, 1.202e-01, 4.318e-02)); + r += mul(s2_3, M4(1.076e-01, -5.476e-02, 2.212e-01, -8.185e-02, 9.430e-03, -9.689e-02, 1.046e-01, 1.132e-02, -5.601e-02, 1.755e-02, 4.833e-02, 1.235e-01, -1.952e-02, 1.777e-01, -3.257e-02, 1.061e-01)); + r += mul(s2_4, M4(2.772e-01, -1.444e-01, -3.958e-02, -9.223e-02, 1.794e-01, 5.433e-02, 2.304e-01, -2.276e-01, -2.422e-01, -7.740e-02, -1.114e-01, 1.253e-01, 7.317e-02, -4.988e-03, 2.970e-01, -2.830e-02)); + r += mul(s2_5, M4(-7.800e-02, 4.609e-02, 1.082e-01, 1.851e-01, -1.878e-03, -1.458e-02, -2.368e-02, -2.698e-02, 1.978e-01, 3.366e-01, 4.870e-02, 3.587e-02, 1.547e-01, 3.822e-02, 1.268e-01, 3.168e-02)); + r += mul(s2_6, M4(1.717e-03, -1.812e-01, -1.010e-01, -2.942e-02, 1.088e-02, -2.949e-02, -2.280e-02, 1.454e-02, 1.256e-01, 7.694e-02, -2.056e-01, -1.434e-02, -8.130e-02, 1.671e-01, -1.100e-02, -2.191e-02)); + r += mul(s2_7, M4(-1.673e-01, 4.049e-03, 2.133e-02, 2.319e-01, -3.630e-02, 9.826e-02, 2.562e-02, 1.294e-01, -4.803e-02, -2.150e-01, 1.226e-01, 1.012e-01, 1.222e-01, -1.323e-01, 9.364e-05, -7.661e-03)); + r += mul(s2_8, M4(-1.020e-01, -7.446e-02, -8.427e-03, 1.322e-01, 6.739e-02, 1.669e-02, 6.415e-02, -6.686e-02, 6.874e-02, 1.706e-01, -5.453e-02, 6.454e-02, -1.321e-02, 5.199e-02, 8.951e-02, 6.274e-02)); + r += mul(s3_0, M4(-9.301e-02, -9.281e-03, 3.177e-02, -4.602e-02, -2.858e-02, 1.100e-01, 5.152e-03, 8.481e-03, 4.755e-02, -4.921e-02, 3.039e-03, 2.944e-02, -2.085e-02, -1.190e-02, 1.241e-02, 1.770e-02)); + r += mul(s3_1, M4(1.206e-01, 3.190e-02, 2.019e-02, -5.287e-02, 2.960e-03, -3.259e-02, -3.770e-03, -1.537e-02, 1.747e-02, -1.034e-02, 1.876e-02, 4.201e-02, -1.092e-01, 2.903e-02, -2.532e-02, 7.270e-02)); + r += mul(s3_2, M4(-5.119e-03, 7.362e-03, -3.890e-02, 4.856e-02, -4.213e-02, -1.402e-01, -2.826e-02, -8.865e-02, -2.333e-02, -2.161e-02, -5.952e-02, 2.058e-02, -5.357e-02, -3.009e-02, 1.872e-02, 2.597e-02)); + r += mul(s3_3, M4(6.472e-02, -4.584e-02, -7.107e-03, 4.855e-02, -4.056e-02, 8.131e-02, 1.364e-01, -2.552e-02, -7.878e-02, -2.991e-02, 2.130e-02, 1.087e-01, 1.964e-02, 8.321e-02, -1.909e-02, 7.523e-02)); + r += mul(s3_4, M4(5.038e-02, 2.256e-02, -7.623e-03, -1.872e-02, 1.800e-01, 7.008e-02, 7.587e-02, -2.872e-01, 4.155e-03, 8.453e-04, -1.561e-02, 5.092e-02, -6.359e-02, -9.084e-02, 1.047e-01, -5.319e-02)); + r += mul(s3_5, M4(-9.609e-02, 1.714e-02, -2.219e-01, 2.245e-02, 1.040e-02, -1.293e-01, 1.025e-01, -7.594e-02, -3.987e-02, 2.213e-02, 6.019e-02, -1.122e-01, -7.775e-02, -5.578e-02, -3.080e-02, -4.930e-02)); + r += mul(s3_6, M4(-6.516e-02, 1.657e-02, -2.558e-02, -6.124e-02, -3.133e-02, -2.645e-02, -8.707e-03, 2.046e-02, -9.048e-03, -1.351e-02, -2.644e-02, -5.333e-02, -5.207e-02, 5.509e-02, -5.945e-02, -7.672e-02)); + r += mul(s3_7, M4(4.162e-03, -6.708e-02, 5.431e-02, 1.623e-02, -7.745e-02, 1.429e-01, -2.173e-02, 1.892e-01, -3.614e-02, -4.247e-02, 3.057e-03, 7.932e-02, -2.931e-03, -6.782e-02, 1.497e-02, -5.519e-04)); + r += mul(s3_8, M4(-1.726e-02, 9.547e-02, 5.438e-02, 6.943e-02, 1.914e-02, -2.154e-02, 1.912e-01, -1.096e-02, 2.614e-02, 1.948e-02, -1.711e-02, -1.831e-02, -3.373e-02, 3.467e-02, -1.693e-02, 1.689e-03)); + r += mul(s4_0, M4(-6.692e-02, -3.165e-02, 2.808e-02, -5.682e-02, -6.105e-02, -1.175e-02, 7.030e-02, 1.832e-02, 6.584e-02, 2.498e-02, -5.845e-02, 5.704e-02, 4.486e-02, 1.214e-03, -3.955e-02, -8.385e-02)); + r += mul(s4_1, M4(6.582e-02, -5.290e-02, 1.448e-02, -5.476e-02, -2.031e-01, -5.528e-02, -2.782e-02, -5.046e-02, -2.098e-02, 2.527e-02, 4.160e-02, 4.069e-02, -1.045e-01, -8.357e-02, -2.469e-02, -2.611e-02)); + r += mul(s4_2, M4(3.378e-03, 7.391e-02, 3.614e-02, -6.354e-02, -1.993e-02, -5.497e-02, -4.947e-02, 4.341e-02, 1.048e-01, 1.082e-01, -1.009e-01, 2.709e-02, -6.418e-02, -1.455e-01, 5.250e-02, -4.911e-03)); + r += mul(s4_3, M4(6.624e-02, 2.199e-02, -9.857e-03, -3.339e-02, 9.994e-02, -2.933e-02, 1.909e-02, -9.170e-02, -1.106e-02, -2.439e-02, 2.660e-02, -3.228e-02, -3.388e-02, -3.303e-02, -7.651e-02, -9.346e-02)); + r += mul(s4_4, M4(-8.180e-02, 7.600e-02, 1.538e-02, -1.216e-01, 1.981e-01, 1.502e-02, 4.975e-02, 2.431e-02, -7.522e-02, 3.625e-03, 1.104e-02, -5.448e-03, -1.209e-01, 5.751e-02, -3.896e-03, 1.092e-01)); + r += mul(s4_5, M4(4.550e-02, -1.330e-01, -3.371e-02, -4.553e-02, -3.309e-02, -4.712e-02, -1.272e-01, 7.661e-02, -1.339e-01, -6.610e-02, -1.053e-01, -1.248e-03, 2.452e-03, -4.445e-02, 1.917e-01, -6.756e-02)); + r += mul(s4_6, M4(2.202e-02, 6.361e-02, 6.040e-02, -9.084e-02, -2.731e-02, 2.606e-02, -3.115e-03, -5.223e-02, -6.156e-02, -1.456e-01, -6.265e-04, 2.337e-03, -6.716e-02, 1.083e-02, -1.455e-02, -2.764e-02)); + r += mul(s4_7, M4(-2.959e-02, 5.556e-02, 1.304e-02, 1.678e-02, -5.710e-02, 6.434e-02, -4.860e-02, 9.945e-02, -1.809e-02, -2.464e-01, 1.077e-01, -2.306e-02, -3.565e-02, -1.890e-02, -1.252e-02, -3.337e-02)); + r += mul(s4_8, M4(-9.485e-03, 1.104e-01, 1.211e-02, -2.105e-02, -1.236e-02, -5.495e-02, 7.685e-02, -4.815e-02, -6.525e-02, 1.109e-01, 1.661e-01, 3.075e-02, -8.373e-02, 9.925e-03, 1.961e-02, -5.981e-02)); + r += mul(s5_0, M4(9.801e-04, 9.458e-02, 1.762e-01, -1.281e-01, -1.489e-01, -4.986e-02, 5.098e-02, -1.408e-01, -9.999e-03, 3.848e-02, -5.507e-02, -1.034e-02, 4.173e-02, -6.904e-03, -1.325e-03, -6.998e-03)); + r += mul(s5_1, M4(1.370e-01, -3.919e-02, 2.025e-01, 1.048e-01, 9.064e-03, -8.848e-02, 1.585e-01, -8.881e-02, -7.091e-02, 2.845e-02, -1.275e-02, -1.239e-02, -3.869e-03, -1.170e-03, -5.842e-02, 3.705e-02)); + r += mul(s5_2, M4(1.817e-01, 1.459e-01, 5.089e-02, -1.269e-01, -5.881e-02, -7.738e-03, 1.224e-01, 3.632e-02, 7.139e-02, 6.373e-02, -3.948e-02, -4.856e-03, -3.270e-02, -2.634e-02, 2.104e-02, -2.706e-02)); + r += mul(s5_3, M4(1.502e-01, 1.379e-01, 1.538e-02, -7.861e-03, 1.042e-01, -2.230e-02, 4.135e-02, -2.683e-02, -2.117e-02, -7.226e-02, -3.989e-03, 8.275e-02, 8.715e-02, -6.580e-03, 1.703e-02, -9.040e-03)); + r += mul(s5_4, M4(2.142e-01, 1.061e-01, -3.152e-01, -2.537e-02, 1.539e-01, -8.227e-02, 8.715e-02, -2.106e-01, 1.039e-01, 4.502e-02, -1.235e-01, -1.842e-02, 4.672e-02, 1.679e-01, -2.057e-02, 1.910e-01)); + r += mul(s5_5, M4(1.570e-01, -6.616e-02, -3.374e-01, 4.379e-02, 9.364e-02, 1.886e-01, 9.683e-02, 1.772e-01, -8.627e-02, -7.643e-02, -1.468e-01, -3.264e-02, 6.554e-02, -2.150e-02, 1.439e-01, -2.549e-02)); + r += mul(s5_6, M4(2.129e-01, 7.809e-02, -3.730e-02, -9.549e-02, 3.745e-02, 6.747e-02, -3.004e-03, 9.262e-03, -1.267e-03, -3.688e-02, -2.417e-02, 1.145e-02, 1.774e-02, -8.307e-03, -2.078e-02, 1.897e-02)); + r += mul(s5_7, M4(1.043e-01, -3.778e-02, -1.563e-01, -3.084e-02, -2.736e-03, 8.034e-03, 5.689e-02, -4.467e-02, -4.726e-02, -3.165e-02, 4.804e-02, 1.631e-02, -2.546e-02, 7.656e-02, -2.755e-02, 2.819e-02)); + r += mul(s5_8, M4(6.321e-02, 7.741e-02, -1.271e-03, -3.950e-03, -2.887e-02, 5.842e-02, 8.823e-02, -3.011e-02, -1.819e-02, -2.790e-02, 1.703e-02, -9.489e-02, 6.141e-03, -1.879e-02, -1.695e-03, -4.482e-02)); + r += mul(s6_0, M4(-1.058e-01, 1.028e-02, 2.038e-02, 8.475e-02, 2.969e-02, -2.337e-02, -2.119e-02, -1.120e-01, 9.966e-02, 7.796e-02, 8.142e-02, -2.125e-02, -4.534e-02, 2.910e-02, 6.519e-02, 2.339e-03)); + r += mul(s6_1, M4(-4.818e-02, -2.566e-02, 1.065e-02, -7.687e-02, 6.712e-02, -6.220e-03, 1.980e-02, -9.644e-03, 1.204e-01, -8.267e-03, 1.451e-01, 2.302e-02, 4.919e-02, -4.909e-02, 3.303e-02, 7.169e-02)); + r += mul(s6_2, M4(-5.701e-02, 1.688e-01, -3.089e-02, 1.287e-03, 1.021e-01, 8.347e-02, 3.803e-02, -5.952e-03, 2.335e-01, 7.194e-02, -1.382e-02, 8.579e-02, -2.265e-02, 3.642e-02, -3.861e-03, -4.818e-02)); + r += mul(s6_3, M4(-1.187e-01, -4.119e-02, -2.789e-02, 4.240e-02, 7.966e-03, 3.998e-02, -9.259e-02, 1.155e-01, 3.001e-02, -4.175e-02, 5.960e-02, 5.011e-02, 5.314e-02, 8.689e-03, 1.480e-01, -2.816e-01)); + r += mul(s6_4, M4(-2.291e-01, 3.066e-02, -3.810e-02, -7.246e-02, 5.209e-02, 1.857e-01, -5.975e-02, 9.752e-02, -9.198e-02, 8.753e-03, 2.891e-04, 4.438e-02, 1.461e-01, -6.657e-03, 1.107e-01, -2.582e-02)); + r += mul(s6_5, M4(-1.170e-01, -9.598e-02, -1.396e-01, 3.307e-02, -1.292e-01, 6.836e-02, 1.472e-02, 4.140e-02, -1.075e-01, 6.799e-02, -7.403e-02, 7.355e-02, 2.433e-02, 4.886e-02, -2.368e-02, 3.433e-03)); + r += mul(s6_6, M4(-1.018e-01, 2.365e-03, 7.537e-02, 5.097e-02, -7.627e-03, 1.422e-02, 2.085e-02, 1.109e-01, 2.279e-02, -3.080e-02, 4.851e-02, -7.352e-02, 1.192e-01, 3.540e-02, -6.290e-02, -4.978e-02)); + r += mul(s6_7, M4(-1.705e-01, -6.102e-02, 7.558e-02, 8.990e-02, -1.164e-01, 1.767e-01, 9.091e-02, -7.596e-02, 2.049e-02, -7.114e-02, 2.842e-02, 1.798e-01, 5.774e-02, 1.200e-01, -8.578e-02, 2.649e-01)); + r += mul(s6_8, M4(-8.003e-02, -1.615e-02, 6.288e-03, 5.704e-02, -4.235e-03, 6.261e-03, 3.897e-02, 3.701e-02, 8.972e-02, -2.166e-02, 3.724e-02, -5.434e-02, 5.026e-02, -4.115e-02, -6.291e-02, 3.271e-03)); + r += mul(s7_0, M4(9.269e-02, 6.940e-02, 2.366e-02, -4.562e-02, -3.172e-02, 9.314e-03, -6.270e-02, 2.465e-02, -5.319e-03, 4.779e-02, 5.714e-02, 8.881e-02, 4.492e-02, -4.217e-02, -3.386e-02, 6.345e-02)); + r += mul(s7_1, M4(4.166e-02, -1.216e-01, 5.635e-02, -1.037e-01, 6.698e-02, 1.740e-01, -5.253e-03, 4.376e-02, -1.643e-01, -1.017e-01, -1.700e-02, 7.608e-02, -9.902e-02, -3.625e-02, -2.993e-02, 5.567e-02)); + r += mul(s7_2, M4(1.017e-01, 3.580e-02, -5.391e-02, -5.787e-02, 7.344e-02, 4.733e-03, 8.307e-02, 2.210e-02, -4.513e-02, 6.641e-02, 2.762e-02, 6.352e-02, -1.433e-02, -2.190e-02, -8.479e-02, 1.480e-02)); + r += mul(s7_3, M4(7.769e-02, -5.091e-03, -1.465e-02, -5.753e-03, -6.735e-02, 1.561e-01, 2.281e-02, 6.207e-02, -1.018e-02, -7.199e-02, 4.084e-03, 8.673e-02, -2.895e-02, -8.051e-02, 6.780e-03, -2.519e-02)); + r += mul(s7_4, M4(1.574e-01, 9.303e-02, 1.053e-03, -5.916e-02, 4.135e-02, 2.861e-01, 5.332e-02, 5.538e-02, -5.394e-02, -2.012e-02, -1.075e-01, 7.292e-02, 1.936e-01, 1.745e-01, 3.240e-02, -5.927e-02)); + r += mul(s7_5, M4(-8.653e-02, -6.805e-02, -1.058e-01, -1.375e-02, -6.762e-02, 5.523e-02, -4.983e-02, 5.782e-02, 4.544e-02, 1.240e-01, 5.341e-02, -9.391e-04, 1.718e-02, -2.817e-02, 9.757e-02, 3.799e-02)); + r += mul(s7_6, M4(1.094e-01, 2.844e-02, -4.059e-02, 3.584e-02, -3.615e-03, 2.496e-02, -3.317e-03, 5.475e-02, 5.129e-03, 3.327e-02, 7.196e-02, -1.179e-01, -3.309e-02, -4.331e-02, 2.087e-02, -2.335e-02)); + r += mul(s7_7, M4(1.671e-01, -1.219e-02, -1.035e-03, 2.249e-02, -6.740e-02, 1.508e-01, 5.615e-02, 9.415e-03, 9.352e-02, 2.030e-02, -4.382e-02, 1.021e-01, 4.900e-02, 2.809e-03, -9.955e-02, 5.268e-02)); + r += mul(s7_8, M4(2.353e-01, 1.480e-01, -2.603e-02, 9.030e-02, -1.118e-02, 1.376e-02, 1.244e-01, 6.137e-02, -1.080e-02, 1.309e-03, 3.426e-03, -9.580e-02, 1.014e-01, 8.073e-02, -4.193e-03, 1.369e-01)); + r += V4(1.206e-02, -1.488e-03, 7.432e-03, -5.892e-03); + return r; +} + +void Pass14(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 15 +//!DESC conv14 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-4.185e-02, 4.091e-03, 2.901e-02, -2.038e-02, -4.890e-02, 1.164e-01, 1.339e-02, 1.634e-03, 2.732e-02, 6.849e-02, 9.277e-02, -7.625e-02, -5.444e-02, 2.505e-02, 9.467e-02, 6.034e-02)); + r += mul(s0_1, M4(1.256e-02, 8.107e-02, 1.145e-01, -1.047e-01, 1.597e-01, 3.900e-02, 3.008e-02, -6.023e-02, 4.341e-02, -2.507e-02, -4.064e-02, 3.460e-01, 1.876e-02, -9.687e-02, -1.347e-02, -1.907e-01)); + r += mul(s0_2, M4(9.207e-02, -8.786e-03, -1.540e-02, 7.945e-03, -4.322e-03, -1.837e-01, -1.012e-01, 1.046e-01, -7.684e-02, 2.438e-02, 3.435e-02, -1.049e-01, 1.712e-02, 6.380e-03, -1.597e-02, 4.260e-02)); + r += mul(s0_3, M4(5.051e-02, 6.101e-02, 1.899e-02, -6.753e-02, 8.176e-02, -1.564e-01, -1.642e-02, -9.951e-02, 7.278e-03, 1.024e-01, 7.590e-02, -1.069e-01, -9.321e-02, -1.165e-01, -1.024e-01, 3.383e-02)); + r += mul(s0_4, M4(-2.707e-02, -9.375e-02, -2.150e-02, 2.158e-01, 7.356e-02, -1.083e-01, 2.409e-01, 7.838e-02, -2.262e-02, 1.347e-02, 1.614e-01, 9.256e-02, 5.947e-02, -9.669e-02, 5.556e-02, -7.074e-02)); + r += mul(s0_5, M4(1.220e-01, -5.686e-02, 2.460e-02, 6.564e-02, -1.398e-02, -1.607e-01, -1.804e-01, -2.429e-01, 2.244e-02, -2.300e-01, -1.902e-03, -1.811e-01, 4.572e-02, 1.910e-02, -1.423e-01, 7.379e-02)); + r += mul(s0_6, M4(-3.069e-03, 5.911e-02, 1.009e-02, -3.404e-02, -6.806e-03, 4.456e-02, 6.687e-03, -6.194e-02, -8.226e-02, 2.784e-03, -8.519e-02, -4.928e-02, -4.491e-02, 1.170e-02, -7.097e-02, -2.554e-02)); + r += mul(s0_7, M4(-9.643e-02, -3.923e-02, -8.407e-02, 4.769e-02, 4.032e-02, 2.404e-03, 2.330e-02, 6.803e-02, 2.597e-02, -1.978e-01, 4.945e-02, -1.115e-01, 9.908e-02, -6.704e-02, 2.845e-02, -1.589e-01)); + r += mul(s0_8, M4(3.223e-02, 3.978e-04, -2.161e-02, -5.624e-02, 7.873e-02, 1.726e-01, 1.821e-02, 5.381e-02, 1.479e-01, 6.089e-02, 3.424e-02, 1.370e-01, 1.544e-02, 1.084e-02, -5.044e-02, 8.863e-03)); + r += mul(s1_0, M4(-6.762e-02, 1.486e-01, -3.739e-04, 2.086e-02, -6.172e-03, 3.222e-03, -5.487e-02, 4.932e-02, -2.639e-02, 2.902e-02, -6.927e-03, -4.605e-02, -5.824e-02, 8.899e-02, 8.049e-02, 1.084e-02)); + r += mul(s1_1, M4(5.069e-02, 6.858e-02, 1.163e-01, -1.431e-03, -3.230e-03, -1.418e-01, -5.930e-02, -1.903e-02, 4.409e-02, -4.904e-02, 1.191e-01, 1.010e-01, 9.705e-03, 4.082e-02, 2.487e-01, -1.595e-01)); + r += mul(s1_2, M4(3.732e-02, -2.114e-02, -1.093e-01, 6.778e-02, -8.360e-02, 5.667e-02, -6.203e-02, 3.940e-02, -1.157e-02, -7.115e-02, -1.429e-02, -2.059e-02, 1.053e-01, -1.412e-02, 8.933e-02, 3.629e-02)); + r += mul(s1_3, M4(-5.475e-04, 4.279e-02, -2.862e-02, 3.817e-02, -5.141e-02, -8.078e-04, 1.451e-01, -5.715e-02, -1.689e-03, -3.475e-02, -3.324e-02, 5.880e-02, -3.765e-02, -2.832e-02, 4.040e-02, -6.736e-02)); + r += mul(s1_4, M4(6.867e-02, -4.313e-03, -7.039e-02, -8.560e-02, 2.589e-02, -6.891e-02, 1.898e-02, -2.068e-02, 2.681e-02, -1.478e-01, -7.782e-02, 6.469e-02, -5.886e-02, -5.819e-02, -4.400e-02, 4.008e-02)); + r += mul(s1_5, M4(8.103e-02, 2.388e-02, -8.507e-02, -1.557e-02, -7.126e-02, 4.465e-02, -1.902e-02, -2.335e-02, 4.500e-02, 7.150e-03, 1.615e-01, 1.738e-04, -7.395e-03, -5.280e-02, -4.631e-02, 5.364e-02)); + r += mul(s1_6, M4(2.050e-02, 1.850e-02, 1.206e-02, -6.519e-02, -2.588e-02, 3.833e-02, 5.943e-02, 7.126e-03, -2.178e-02, 1.797e-02, -3.683e-02, -4.195e-02, -6.068e-03, 2.638e-03, -8.274e-02, -3.730e-03)); + r += mul(s1_7, M4(-7.091e-03, -1.474e-02, -3.124e-02, 8.422e-02, 3.969e-03, -3.473e-02, -7.462e-02, 8.463e-02, -2.426e-02, -2.767e-02, 3.222e-03, -1.197e-01, 3.608e-02, -7.492e-03, 2.443e-02, 3.272e-02)); + r += mul(s1_8, M4(-3.515e-02, 7.756e-02, 2.323e-02, -2.068e-03, -3.174e-02, 2.793e-02, -9.549e-03, -3.011e-02, 4.602e-02, 1.534e-02, -2.368e-02, 4.073e-02, 7.721e-04, -5.720e-02, -4.916e-02, -3.721e-02)); + r += mul(s2_0, M4(-1.820e-02, 2.031e-03, 3.517e-02, -8.364e-05, 2.237e-02, -3.581e-02, -1.704e-02, 4.340e-02, -3.280e-02, -4.180e-02, 1.313e-03, 5.753e-02, 6.811e-02, -2.174e-02, -1.557e-02, 4.522e-02)); + r += mul(s2_1, M4(7.392e-02, -6.128e-03, -1.237e-02, 1.148e-02, -6.618e-03, -7.516e-02, -1.376e-01, 1.592e-02, -7.999e-02, -8.762e-02, 1.362e-01, -9.180e-02, -4.042e-02, -1.606e-02, -1.146e-01, 5.234e-02)); + r += mul(s2_2, M4(-8.599e-02, 4.130e-02, -5.231e-02, -1.015e-01, -1.451e-02, 1.074e-02, 7.664e-03, -1.946e-03, -2.727e-02, 2.546e-02, -2.923e-02, 1.054e-03, -1.998e-02, 2.887e-02, -2.058e-02, 6.172e-03)); + r += mul(s2_3, M4(-2.521e-02, 2.920e-02, -2.884e-02, 5.085e-02, 6.387e-03, 1.388e-01, -2.703e-02, 3.090e-02, -3.353e-02, 1.183e-01, 1.101e-01, -2.499e-03, -6.081e-03, 1.831e-02, 7.712e-02, 2.431e-02)); + r += mul(s2_4, M4(-2.356e-04, 3.388e-01, 1.425e-01, -9.105e-02, 1.227e-01, 2.337e-01, 3.663e-01, -8.101e-02, -1.345e-01, -1.060e-01, 7.330e-02, 7.492e-02, 6.390e-02, -6.327e-02, -1.059e-01, 2.532e-02)); + r += mul(s2_5, M4(-2.131e-02, 9.620e-02, 2.738e-02, 1.781e-01, 1.186e-02, -3.139e-02, 1.622e-01, 1.332e-02, 1.446e-02, 6.668e-02, -8.837e-03, -2.280e-02, -1.797e-02, -5.522e-02, -6.821e-02, 5.381e-03)); + r += mul(s2_6, M4(4.255e-03, 2.915e-02, -1.946e-02, -3.726e-02, 6.154e-02, 9.142e-02, -8.030e-02, -7.413e-02, 8.383e-03, -4.201e-02, 3.769e-02, -3.028e-02, 2.283e-02, 3.682e-02, -2.719e-02, -9.290e-02)); + r += mul(s2_7, M4(-5.389e-02, -6.357e-02, -3.235e-02, 2.059e-02, -4.903e-02, -4.402e-02, 9.317e-02, -9.028e-02, 3.474e-02, -5.685e-02, -4.770e-02, 6.897e-02, -1.592e-03, -5.508e-02, -1.093e-01, -4.087e-02)); + r += mul(s2_8, M4(-1.201e-02, 2.813e-02, 7.967e-03, -1.412e-02, -4.968e-03, -7.406e-02, -4.071e-02, 5.814e-02, -3.361e-02, 4.794e-02, 2.608e-02, -1.053e-01, 9.912e-03, -1.430e-02, -5.146e-03, -3.337e-02)); + r += mul(s3_0, M4(5.399e-03, -1.408e-02, -8.520e-03, 1.828e-02, -3.009e-02, 2.867e-02, 4.295e-02, -4.977e-03, 1.006e-02, 7.139e-02, -7.008e-02, 5.330e-02, 1.151e-02, 3.999e-02, 1.587e-01, -1.187e-02)); + r += mul(s3_1, M4(3.153e-02, -1.296e-03, -2.652e-02, -2.695e-02, -2.573e-02, 6.398e-02, 7.744e-02, -1.045e-01, -3.284e-02, -2.658e-02, -4.192e-02, 2.935e-02, 1.737e-01, 8.090e-02, 6.350e-02, -3.588e-03)); + r += mul(s3_2, M4(-1.284e-01, 5.570e-03, -8.051e-02, -3.249e-02, 5.010e-02, 2.614e-02, -1.879e-02, 4.068e-02, 8.508e-03, 7.683e-03, -7.388e-02, -2.009e-02, 1.327e-01, 1.299e-02, 1.105e-01, -5.868e-02)); + r += mul(s3_3, M4(-4.834e-02, 2.597e-03, 2.513e-02, 3.551e-02, 3.814e-03, -2.818e-02, -3.464e-02, 9.673e-02, -4.243e-02, 9.680e-02, 1.421e-01, -6.778e-02, -3.836e-02, -1.824e-01, -1.275e-01, 6.860e-02)); + r += mul(s3_4, M4(-3.610e-02, 1.469e-01, 9.565e-02, -7.052e-02, 2.388e-01, 3.281e-01, 1.572e-01, 1.141e-01, -5.664e-02, -9.020e-02, -4.204e-02, -2.142e-01, 9.626e-02, -2.115e-01, 1.149e-01, 3.155e-02)); + r += mul(s3_5, M4(-1.574e-01, 3.679e-02, -9.191e-02, 1.334e-02, 9.225e-02, 7.839e-02, 1.189e-01, 7.757e-02, -6.101e-03, -1.166e-02, -2.972e-02, 1.585e-02, 8.454e-02, 1.571e-03, -1.009e-01, 8.238e-02)); + r += mul(s3_6, M4(6.617e-03, 4.276e-03, -1.004e-02, 4.877e-03, -6.908e-02, 3.038e-01, 3.642e-03, 4.298e-02, -1.434e-02, -9.701e-03, 1.031e-01, -3.326e-02, -5.609e-02, 7.265e-02, -9.199e-02, -6.786e-02)); + r += mul(s3_7, M4(-3.657e-02, -3.907e-02, -2.614e-02, 5.053e-02, -4.674e-02, 3.897e-02, 6.179e-02, 6.518e-02, -2.290e-02, 1.168e-01, 2.693e-02, 1.716e-01, -1.123e-03, -1.441e-01, -7.503e-02, -2.011e-01)); + r += mul(s3_8, M4(3.712e-03, 9.737e-02, 4.266e-02, -1.522e-02, 1.506e-02, 5.124e-02, -1.050e-02, -2.398e-03, -6.193e-02, 6.949e-03, 9.181e-02, -1.413e-01, 3.692e-02, 3.907e-02, -1.620e-02, 4.723e-02)); + r += mul(s4_0, M4(-3.832e-02, 1.268e-02, 3.637e-02, -6.497e-03, 1.273e-02, -4.926e-03, 1.769e-02, 2.792e-02, 3.209e-02, 1.789e-02, -1.957e-02, 5.305e-03, -7.697e-02, 5.508e-02, 6.284e-02, 8.246e-02)); + r += mul(s4_1, M4(1.266e-01, 5.521e-03, 7.739e-02, -3.385e-02, -7.007e-02, -1.094e-01, -8.701e-02, -1.716e-02, 3.682e-02, 2.189e-02, -3.292e-03, 4.518e-02, 5.096e-02, 3.030e-02, -2.765e-02, 1.187e-02)); + r += mul(s4_2, M4(8.139e-02, -2.641e-03, 4.335e-02, 5.606e-02, -2.860e-02, -2.659e-02, -3.553e-02, -4.523e-02, 6.986e-03, 3.482e-02, 1.516e-02, 3.141e-04, -2.765e-02, 9.525e-02, -2.133e-03, 6.335e-02)); + r += mul(s4_3, M4(-1.219e-02, -4.573e-02, -7.869e-02, -4.889e-02, 5.090e-02, 6.583e-02, -1.673e-02, -6.859e-02, 2.518e-02, 9.782e-02, 1.145e-01, 3.710e-03, 3.850e-02, 3.397e-02, -8.315e-03, 1.324e-01)); + r += mul(s4_4, M4(2.313e-01, 7.508e-02, 1.163e-01, 1.122e-01, 2.720e-01, -7.170e-02, 4.317e-03, -1.224e-01, -5.580e-02, 6.872e-02, 6.504e-02, 2.753e-03, 1.016e-01, 1.616e-01, 1.761e-02, 1.978e-01)); + r += mul(s4_5, M4(6.699e-02, -1.902e-03, 6.323e-02, 2.092e-02, -1.227e-01, 5.553e-02, -1.090e-01, -9.668e-02, 5.901e-02, -4.978e-02, 1.158e-01, 3.121e-02, 3.099e-02, -4.958e-02, 1.446e-03, -1.305e-02)); + r += mul(s4_6, M4(1.685e-02, 4.815e-02, -7.986e-02, -5.999e-02, -1.223e-02, -7.677e-02, 1.721e-02, -1.356e-03, -1.691e-03, 6.180e-02, 1.034e-02, 6.818e-02, 4.716e-02, 5.924e-02, -5.670e-02, -2.003e-02)); + r += mul(s4_7, M4(-1.652e-02, -4.076e-02, 1.390e-02, -3.530e-02, -6.357e-02, 1.010e-01, -1.276e-02, -1.293e-01, 2.760e-02, -2.235e-02, 7.857e-02, -1.645e-01, 6.736e-03, -4.910e-02, -1.251e-01, 4.516e-02)); + r += mul(s4_8, M4(1.493e-02, 1.621e-02, 1.737e-02, -6.024e-02, -6.386e-02, 6.611e-02, 1.032e-02, -2.474e-02, 1.154e-01, -1.667e-02, -1.522e-02, 1.019e-01, 2.936e-02, -1.006e-02, 8.141e-03, -7.483e-02)); + r += mul(s5_0, M4(3.462e-02, 4.645e-02, -3.133e-02, -5.007e-02, 1.532e-02, 3.076e-02, 4.806e-02, 4.797e-02, 3.307e-02, -3.395e-02, -2.991e-02, 1.428e-02, 6.843e-03, 9.166e-02, 4.885e-02, -1.195e-01)); + r += mul(s5_1, M4(1.708e-01, -5.167e-02, -2.132e-01, 1.494e-01, -6.992e-02, -3.423e-02, 5.964e-03, -4.237e-02, 8.228e-04, -9.467e-03, -9.449e-02, 2.903e-02, 1.024e-01, -4.823e-02, 2.013e-01, 9.347e-02)); + r += mul(s5_2, M4(-5.565e-02, 3.143e-02, -7.558e-02, 1.739e-02, 3.807e-03, 4.152e-02, -4.658e-03, -3.292e-02, 2.328e-02, -2.435e-02, -3.833e-02, -2.558e-02, -9.393e-02, 2.188e-03, -1.499e-01, 9.052e-02)); + r += mul(s5_3, M4(9.999e-02, 3.884e-02, 1.246e-02, -2.378e-01, 3.749e-02, -8.477e-02, -1.596e-01, 5.331e-02, 7.443e-02, -2.068e-03, 7.423e-02, 2.302e-02, -1.746e-02, -8.780e-02, 2.456e-02, 2.892e-02)); + r += mul(s5_4, M4(1.431e-01, 2.626e-01, 1.768e-03, 2.086e-01, 1.998e-01, 1.039e-01, 1.180e-02, -3.161e-01, -5.660e-02, -7.840e-02, 3.081e-02, -4.673e-02, 1.560e-01, 1.627e-02, -6.563e-02, -2.814e-02)); + r += mul(s5_5, M4(1.096e-01, -3.109e-02, -2.397e-02, -1.734e-01, -6.526e-02, 3.306e-03, 8.834e-02, -1.194e-01, -1.396e-03, -5.618e-02, 7.349e-02, -4.823e-03, -2.177e-02, -5.782e-02, -1.893e-01, 2.426e-02)); + r += mul(s5_6, M4(-2.352e-03, -3.160e-02, -1.185e-02, -7.712e-02, -3.358e-02, 4.870e-02, 4.227e-02, -2.398e-02, 1.136e-01, -1.042e-01, 2.561e-02, -9.641e-02, -2.812e-02, 2.797e-02, -1.537e-02, 5.092e-02)); + r += mul(s5_7, M4(3.419e-02, -2.995e-02, -5.608e-03, 6.518e-02, 1.412e-03, 5.324e-02, -2.010e-02, -6.216e-02, -2.328e-02, -7.162e-02, 3.648e-03, -2.010e-03, 6.645e-02, -2.244e-02, -4.760e-02, -5.183e-02)); + r += mul(s5_8, M4(5.211e-02, -2.610e-02, -1.471e-02, -9.500e-02, -5.246e-02, -6.899e-02, 1.645e-02, 1.771e-02, 1.832e-02, -1.096e-01, -1.150e-02, 2.276e-02, 1.380e-02, 2.443e-02, 3.817e-03, -1.092e-02)); + r += mul(s6_0, M4(-3.862e-03, -4.457e-02, 3.379e-02, -4.069e-02, -1.630e-02, -1.174e-01, -8.180e-02, 1.008e-01, -1.150e-02, -2.984e-02, 4.478e-02, -7.575e-02, -2.913e-03, -2.517e-02, 1.156e-02, -2.614e-02)); + r += mul(s6_1, M4(-1.031e-02, 8.463e-02, -5.762e-02, 6.699e-02, 5.885e-02, -7.425e-02, 3.812e-01, -1.546e-01, 1.595e-01, 3.535e-02, 6.493e-02, 6.501e-02, 2.222e-01, -2.002e-02, 2.322e-01, -1.815e-01)); + r += mul(s6_2, M4(-6.760e-02, -2.382e-02, -5.488e-03, -4.010e-02, -2.833e-02, 2.207e-02, 4.522e-02, -8.453e-04, 5.499e-02, -7.280e-04, 9.686e-02, -3.247e-02, -3.098e-02, 5.225e-03, 2.746e-03, 4.568e-03)); + r += mul(s6_3, M4(-2.646e-02, -5.002e-02, -5.677e-02, 2.103e-02, -5.503e-02, 1.762e-01, 2.568e-01, -3.145e-02, -1.158e-01, -2.686e-02, -2.261e-03, 1.432e-01, -5.379e-02, 1.851e-01, -1.858e-01, 1.018e-01)); + r += mul(s6_4, M4(5.892e-02, 1.199e-01, 4.789e-03, -1.124e-02, 5.058e-03, -4.334e-02, -1.585e-01, -2.955e-02, 3.912e-02, 1.930e-02, 3.203e-01, -1.101e-01, 3.556e-02, -1.962e-01, -7.748e-03, 5.015e-02)); + r += mul(s6_5, M4(1.142e-02, -9.138e-02, 1.035e-02, 3.228e-02, 7.962e-03, -5.724e-02, 1.176e-01, -8.011e-02, 7.068e-02, 4.782e-02, 5.572e-02, 7.706e-03, -7.708e-02, -1.446e-02, -1.120e-01, 9.828e-02)); + r += mul(s6_6, M4(-9.079e-02, 1.029e-02, -1.688e-02, -7.920e-03, 2.706e-02, -5.572e-02, -2.741e-02, 2.909e-02, -5.032e-02, 1.062e-01, -3.936e-03, -3.121e-03, 6.855e-02, 8.418e-03, -4.407e-02, 2.442e-02)); + r += mul(s6_7, M4(-5.259e-02, -3.667e-02, 1.936e-02, -1.913e-02, -1.692e-01, -3.561e-02, 6.175e-03, -1.379e-01, 8.600e-03, -1.716e-02, -2.808e-02, -7.197e-02, 1.118e-01, 7.306e-02, 4.102e-04, -1.042e-02)); + r += mul(s6_8, M4(-2.531e-02, -5.164e-02, 5.493e-02, 4.854e-03, 4.111e-02, -7.896e-03, -3.981e-02, -1.526e-02, -2.048e-02, 7.083e-02, 4.344e-03, 3.989e-02, -1.630e-02, -1.826e-02, -8.093e-02, -6.308e-04)); + r += mul(s7_0, M4(3.179e-02, -3.518e-02, 8.211e-03, -3.691e-02, -4.746e-02, -5.822e-02, 5.080e-03, 4.791e-02, -8.146e-02, -4.501e-02, -8.433e-02, 5.157e-02, -5.932e-03, -7.118e-02, 8.522e-02, -2.141e-02)); + r += mul(s7_1, M4(-1.645e-02, 6.074e-02, -4.580e-02, 1.038e-01, 1.166e-03, 7.635e-02, 8.076e-02, 7.206e-02, -3.724e-02, -2.647e-02, -1.186e-01, -2.410e-01, 3.958e-02, 3.122e-02, 6.193e-02, -2.849e-02)); + r += mul(s7_2, M4(-4.077e-02, -6.603e-02, 1.554e-02, -9.247e-02, 4.797e-02, 1.116e-02, 5.387e-02, 6.568e-02, 9.707e-02, -1.168e-01, -2.311e-02, -6.518e-02, 5.924e-02, 2.867e-02, 1.313e-02, 3.773e-02)); + r += mul(s7_3, M4(-3.563e-03, 7.289e-03, 2.709e-02, -4.880e-02, 3.392e-02, 4.283e-02, -5.466e-02, 1.689e-01, -2.436e-01, 1.429e-01, 2.042e-02, -8.785e-03, 3.645e-02, -5.496e-02, -9.380e-02, 2.743e-02)); + r += mul(s7_4, M4(-1.373e-02, 2.219e-01, 1.243e-01, -7.018e-03, 5.706e-02, 9.000e-02, 6.202e-02, 5.073e-02, 6.981e-02, 1.098e-01, 6.312e-02, -2.690e-01, -6.929e-02, -1.420e-01, -4.161e-02, 4.677e-02)); + r += mul(s7_5, M4(8.620e-03, -1.344e-01, 1.955e-02, 3.364e-02, 2.047e-02, -2.089e-02, 2.387e-02, 3.861e-02, 8.585e-02, 1.298e-01, 9.145e-02, 1.203e-01, 2.402e-02, 2.671e-02, -5.028e-02, 2.961e-02)); + r += mul(s7_6, M4(-9.698e-02, -1.387e-01, -4.587e-02, -9.452e-02, -1.401e-02, 6.006e-02, -2.798e-02, -2.705e-02, 1.314e-02, -8.691e-02, 1.942e-01, -6.650e-02, 1.515e-02, -5.807e-02, -1.731e-02, 1.272e-02)); + r += mul(s7_7, M4(5.433e-02, -1.554e-01, 3.201e-02, -4.814e-02, -1.103e-02, -2.647e-02, -2.818e-02, -1.187e-01, -4.173e-02, -4.040e-02, 1.017e-01, 1.150e-01, 4.694e-02, 5.182e-02, 2.534e-02, 6.666e-02)); + r += mul(s7_8, M4(-1.063e-01, -1.207e-01, -1.622e-02, -1.400e-01, 1.736e-02, -2.373e-02, -4.898e-02, -7.461e-03, -1.487e-01, 1.490e-01, 6.622e-02, 6.458e-02, -3.789e-02, -8.266e-03, -4.094e-03, -2.403e-02)); + r += V4(-1.023e-03, -5.476e-03, -1.095e-03, 8.393e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.573e-02, 7.161e-03, -1.394e-01, 1.384e-01, -3.247e-03, 1.157e-02, 3.153e-02, 8.984e-02, -3.248e-02, -2.035e-02, -1.046e-01, 3.980e-03, -3.628e-02, -1.367e-01, 1.576e-02, -3.332e-04)); + r += mul(s0_1, M4(4.613e-02, -1.812e-01, -1.357e-01, -1.675e-01, -6.227e-02, -1.151e-01, 1.651e-01, -2.293e-01, -9.434e-03, -1.326e-02, -4.655e-02, 1.017e-01, 1.309e-01, -1.708e-01, -1.040e-01, 9.153e-02)); + r += mul(s0_2, M4(3.717e-03, -7.694e-02, 4.162e-02, 5.294e-03, -7.090e-04, 1.402e-01, 4.746e-01, 3.394e-01, -8.056e-03, 1.615e-01, -1.601e-01, -1.504e-01, -3.151e-02, -1.598e-01, 8.280e-02, -7.239e-02)); + r += mul(s0_3, M4(1.435e-01, -6.425e-02, 8.871e-02, -3.297e-03, -6.458e-03, -7.106e-02, 6.263e-02, 6.034e-02, -2.182e-02, -4.167e-02, 1.302e-01, -1.225e-01, -4.979e-02, -1.993e-02, -9.899e-02, 3.386e-02)); + r += mul(s0_4, M4(1.124e-01, 9.458e-02, 9.471e-02, -3.604e-02, 4.534e-02, -1.150e-01, 3.417e-01, -2.311e-01, -3.389e-01, -6.695e-02, -2.336e-01, 1.636e-01, -8.270e-02, -1.108e-01, 3.848e-02, 1.033e-01)); + r += mul(s0_5, M4(-4.395e-02, -3.300e-02, -1.609e-02, 8.713e-02, 1.459e-01, 5.272e-02, -1.280e-02, 1.466e-01, 1.860e-01, 9.840e-02, -5.078e-02, 2.870e-01, -6.188e-03, 1.225e-02, -9.529e-04, -3.990e-02)); + r += mul(s0_6, M4(-9.568e-02, -3.502e-02, 1.392e-02, -7.841e-02, -5.583e-02, -7.886e-02, 1.558e-01, 6.859e-03, -7.477e-02, 5.847e-02, -6.967e-02, -1.716e-02, 6.263e-02, 4.919e-02, -4.312e-02, 1.140e-01)); + r += mul(s0_7, M4(-4.097e-02, 7.478e-03, -4.635e-02, 1.104e-01, -4.515e-02, 2.753e-02, 8.515e-02, -7.188e-02, 2.395e-01, -2.184e-01, 8.484e-02, -3.227e-02, 1.199e-01, 9.138e-03, -3.123e-02, -1.577e-01)); + r += mul(s0_8, M4(-8.219e-03, -1.490e-02, -8.796e-03, -2.059e-02, -5.515e-02, -6.055e-02, 7.589e-02, -2.023e-01, -4.113e-02, 1.042e-01, -1.265e-02, -1.381e-01, 1.343e-02, 4.605e-02, -3.767e-03, 1.201e-03)); + r += mul(s1_0, M4(2.016e-04, -1.245e-02, 6.322e-02, -1.525e-02, 2.221e-02, 1.095e-02, 1.194e-02, 9.600e-02, -3.933e-02, 1.940e-02, 3.535e-02, 2.109e-02, -7.410e-03, -9.155e-02, -3.142e-02, -1.542e-02)); + r += mul(s1_1, M4(-6.849e-02, -1.181e-02, -6.625e-02, -5.465e-02, 9.366e-03, -6.903e-02, 9.370e-03, 2.291e-02, 7.970e-02, -9.807e-02, -5.992e-02, -2.611e-02, 8.352e-02, -3.817e-02, -2.603e-01, -9.509e-02)); + r += mul(s1_2, M4(-3.822e-02, 1.860e-03, 1.496e-02, 3.446e-02, -4.222e-02, -5.801e-02, -1.759e-02, -3.728e-02, -3.129e-02, -6.919e-02, 3.399e-02, 8.443e-02, -1.116e-02, -1.891e-01, 4.978e-02, -1.214e-01)); + r += mul(s1_3, M4(6.401e-02, -1.500e-02, 4.268e-02, 6.755e-02, -5.093e-02, 4.304e-03, 3.347e-02, 6.392e-02, -7.130e-03, -7.521e-02, -5.611e-02, 2.549e-02, 5.164e-02, -6.756e-02, -5.487e-02, -8.458e-02)); + r += mul(s1_4, M4(-1.466e-01, 1.242e-01, -1.156e-01, 5.293e-02, 8.086e-02, 9.438e-02, 8.676e-02, -5.026e-02, -6.387e-02, -3.977e-02, -2.101e-01, 4.470e-02, -1.718e-01, 8.650e-02, -6.104e-02, 2.093e-01)); + r += mul(s1_5, M4(7.186e-03, 1.095e-01, -3.019e-02, -7.367e-02, 4.784e-02, -2.984e-02, 1.548e-01, 4.014e-02, -2.274e-02, -6.932e-02, 8.882e-03, -8.839e-02, 1.595e-02, -8.845e-02, -3.382e-02, 2.831e-02)); + r += mul(s1_6, M4(7.803e-02, -3.916e-02, 4.040e-02, -8.003e-03, -2.292e-02, 3.720e-04, 2.760e-04, -9.340e-02, -8.947e-03, 3.776e-02, 1.220e-02, -1.332e-02, 3.498e-02, 8.433e-03, -4.860e-03, 2.310e-02)); + r += mul(s1_7, M4(9.365e-02, 5.603e-02, 5.709e-02, -2.146e-02, 3.112e-02, 5.482e-02, -1.228e-01, -1.350e-02, -1.083e-01, -6.226e-02, -3.162e-02, 1.401e-01, -9.468e-02, 2.175e-02, -2.463e-02, 1.193e-01)); + r += mul(s1_8, M4(-4.383e-02, -1.938e-03, 4.882e-02, 1.799e-02, -2.591e-02, -4.417e-03, -1.007e-02, -9.185e-03, 8.383e-02, 6.579e-02, -8.530e-03, -5.193e-02, 2.283e-02, -1.385e-02, -2.455e-02, 3.144e-02)); + r += mul(s2_0, M4(1.308e-02, -4.129e-02, 6.100e-02, -2.254e-02, 3.781e-03, -3.459e-03, -2.241e-02, 1.786e-02, 1.050e-01, -9.929e-03, 4.915e-02, 2.106e-02, 1.135e-02, 1.480e-02, 7.930e-02, -2.687e-02)); + r += mul(s2_1, M4(2.061e-02, 1.022e-01, 4.907e-03, -2.897e-04, -4.002e-02, 1.392e-02, 4.454e-02, 1.162e-01, -5.265e-02, -3.792e-02, -1.516e-01, -5.061e-02, -3.113e-03, 5.001e-02, 4.598e-02, 4.826e-02)); + r += mul(s2_2, M4(-3.237e-02, -2.990e-02, -2.588e-02, -4.802e-02, -3.918e-02, 8.142e-03, -2.035e-02, 4.014e-02, 1.608e-02, -1.177e-01, 7.554e-02, 7.580e-03, -4.732e-03, 7.434e-02, -1.306e-02, -3.381e-02)); + r += mul(s2_3, M4(-1.094e-02, -2.189e-02, 3.220e-02, -1.951e-02, -3.567e-02, 5.732e-02, -1.381e-02, 4.216e-02, 1.725e-01, 2.610e-02, 9.859e-02, -1.376e-02, -1.063e-02, -6.666e-02, 1.189e-01, -3.116e-02)); + r += mul(s2_4, M4(2.312e-02, -3.798e-02, 2.498e-01, -2.597e-02, -1.083e-01, -4.158e-02, -5.762e-02, -1.442e-01, -1.496e-01, 2.726e-01, 4.038e-02, 2.902e-01, 1.832e-01, 3.059e-01, 1.669e-01, 8.272e-02)); + r += mul(s2_5, M4(5.626e-02, 2.406e-02, -4.238e-03, -1.034e-01, 2.738e-02, -6.185e-02, -1.111e-02, 6.729e-02, 3.129e-02, 1.094e-01, 4.609e-02, -9.522e-02, -2.542e-03, 3.714e-02, 7.407e-02, 2.085e-02)); + r += mul(s2_6, M4(3.705e-02, 2.203e-02, -4.068e-03, -4.938e-03, 3.399e-02, 5.753e-02, -4.125e-02, 6.187e-03, 1.006e-03, -2.532e-02, -7.392e-02, -4.208e-02, 4.891e-04, 4.096e-02, -8.405e-03, -8.183e-02)); + r += mul(s2_7, M4(-5.748e-02, 1.425e-02, -4.628e-02, -3.472e-02, -8.695e-02, 3.295e-02, -1.161e-04, 7.621e-02, 2.446e-01, -2.509e-02, 4.684e-02, 3.021e-02, 1.064e-01, 3.324e-02, -5.020e-02, 3.505e-02)); + r += mul(s2_8, M4(1.471e-01, -8.890e-03, -2.134e-02, -5.017e-02, 1.289e-01, 1.870e-02, -1.334e-03, 6.631e-03, -5.090e-02, -2.298e-02, 8.366e-03, 4.866e-02, 2.154e-02, 2.739e-03, -2.546e-02, 1.433e-02)); + r += mul(s3_0, M4(5.419e-05, -3.101e-02, 1.352e-02, 5.712e-03, 1.911e-02, -5.360e-02, -3.807e-02, 2.918e-02, 6.095e-02, 6.460e-02, 4.945e-02, -3.199e-02, -2.867e-02, -3.446e-02, 3.003e-02, 9.988e-03)); + r += mul(s3_1, M4(3.341e-02, 4.134e-02, 7.844e-02, 5.281e-03, 3.089e-02, 8.452e-04, -8.457e-03, -3.729e-02, -7.936e-02, -7.109e-02, -9.334e-03, -1.782e-03, 6.665e-02, 8.836e-02, -6.156e-02, -2.519e-03)); + r += mul(s3_2, M4(3.780e-02, -7.643e-02, 7.086e-02, -1.028e-01, -6.268e-02, -2.949e-02, 3.898e-02, 3.796e-02, 1.433e-02, 5.399e-02, -1.050e-02, -1.622e-02, 6.334e-02, -1.028e-01, -1.726e-02, -4.961e-02)); + r += mul(s3_3, M4(3.620e-03, 2.926e-02, 3.623e-02, -3.697e-02, -1.348e-01, -1.576e-02, 5.374e-03, 1.145e-01, 9.554e-02, -4.427e-03, 1.754e-01, -1.602e-01, -4.351e-02, -9.182e-03, 2.175e-03, 1.934e-01)); + r += mul(s3_4, M4(1.113e-02, 3.872e-02, 8.718e-02, 2.359e-01, -2.204e-02, 1.151e-01, 8.256e-02, -1.061e-02, -1.436e-01, 2.401e-01, -1.791e-01, 6.773e-02, 1.237e-01, 6.238e-03, 8.617e-02, 1.325e-01)); + r += mul(s3_5, M4(6.094e-02, -4.889e-02, -4.181e-02, -5.091e-02, -1.361e-01, 5.353e-02, -1.405e-02, -2.197e-02, -1.387e-02, 1.213e-01, -4.815e-02, 5.208e-02, -8.138e-03, 8.380e-02, 9.473e-02, -2.112e-02)); + r += mul(s3_6, M4(3.975e-02, 2.303e-02, -2.360e-02, -4.115e-02, -1.806e-01, 6.812e-02, -5.047e-03, 3.640e-02, 4.126e-02, -1.231e-02, -1.294e-02, -1.101e-01, -5.582e-02, 4.136e-02, 5.346e-02, 1.579e-01)); + r += mul(s3_7, M4(5.396e-03, -1.426e-02, -4.466e-03, 1.801e-02, 1.972e-01, 4.674e-02, -3.575e-03, -2.665e-02, 6.105e-02, 2.017e-02, 6.524e-02, -8.115e-03, 2.212e-01, 2.100e-03, -4.350e-02, 1.854e-02)); + r += mul(s3_8, M4(-6.050e-02, -1.345e-02, -6.604e-03, -5.172e-02, -8.451e-02, 3.536e-03, 7.087e-02, 5.787e-02, -6.050e-02, -4.158e-02, -1.844e-03, 2.855e-02, -4.274e-03, 1.703e-02, -8.310e-03, -2.246e-02)); + r += mul(s4_0, M4(-4.596e-02, -1.015e-01, 2.650e-02, 7.098e-02, 3.649e-02, 4.925e-02, 5.334e-02, -8.714e-02, 8.703e-02, 4.129e-02, -2.322e-02, -1.195e-01, -7.584e-02, -5.361e-02, 1.496e-02, 1.593e-02)); + r += mul(s4_1, M4(4.722e-03, -4.852e-02, 2.723e-02, -2.235e-02, -2.773e-02, 1.028e-01, -1.812e-02, 1.027e-01, -2.067e-02, 1.548e-02, 8.800e-04, -8.573e-02, -9.428e-02, 2.252e-01, -4.724e-02, 9.352e-03)); + r += mul(s4_2, M4(7.280e-02, -2.637e-03, 4.746e-02, -1.859e-02, -4.016e-02, 9.909e-02, 9.699e-03, 5.240e-02, 1.797e-02, 1.215e-02, 1.597e-02, -4.656e-02, -3.596e-02, 7.321e-02, 4.878e-02, -7.422e-02)); + r += mul(s4_3, M4(-7.108e-02, 4.416e-02, -1.583e-01, 2.224e-02, 1.006e-02, 3.720e-02, -4.746e-02, -2.819e-02, 6.086e-02, -5.138e-02, 9.156e-02, -1.485e-01, -9.078e-02, -1.576e-02, -1.673e-01, -9.070e-02)); + r += mul(s4_4, M4(-9.867e-02, -5.303e-03, -1.966e-01, -1.493e-02, -5.443e-02, -1.451e-01, -9.871e-02, 3.058e-01, -1.140e-01, 9.568e-02, 7.999e-02, -2.637e-02, 2.875e-02, 1.180e-01, -7.128e-02, -4.867e-02)); + r += mul(s4_5, M4(3.243e-02, -1.208e-01, -3.402e-02, -2.245e-03, -4.308e-02, 1.428e-01, -5.843e-02, 9.481e-02, 9.735e-02, -1.653e-02, 9.207e-02, 1.336e-02, -6.181e-03, -8.522e-02, 2.598e-02, -3.312e-02)); + r += mul(s4_6, M4(1.656e-01, 3.756e-03, 7.547e-02, 7.159e-03, -4.749e-02, -4.120e-02, 4.557e-03, -2.466e-02, -4.164e-02, 1.486e-02, -4.251e-02, -4.409e-02, 1.866e-02, 9.305e-03, 1.946e-02, -3.284e-02)); + r += mul(s4_7, M4(-2.038e-01, 3.747e-02, -3.268e-02, 1.458e-01, -9.368e-02, 4.332e-02, -9.126e-03, 5.230e-02, 2.123e-02, -2.027e-02, -1.157e-02, 5.292e-03, -1.271e-01, 6.416e-02, -3.124e-02, 9.119e-02)); + r += mul(s4_8, M4(-1.941e-03, 1.871e-02, -3.535e-02, -5.836e-03, -1.567e-01, -6.096e-02, 8.461e-02, -3.906e-02, -2.157e-03, 4.560e-04, -3.187e-02, -6.424e-02, 9.766e-02, -3.718e-02, -8.667e-03, -5.434e-02)); + r += mul(s5_0, M4(-1.437e-02, -4.311e-02, 1.089e-01, -4.203e-02, 1.390e-02, -5.211e-02, 1.092e-01, -7.442e-02, 4.226e-02, 3.647e-02, -8.262e-03, 4.980e-03, 1.170e-01, -5.219e-03, -5.846e-02, 4.702e-03)); + r += mul(s5_1, M4(-6.668e-02, 1.068e-01, 7.230e-02, 4.628e-02, -2.293e-02, -9.088e-03, 4.155e-02, 4.672e-02, 3.436e-02, 2.117e-02, -6.884e-03, -4.562e-02, -2.062e-01, -2.815e-02, -4.868e-02, -8.531e-02)); + r += mul(s5_2, M4(-2.891e-02, 1.159e-01, -3.923e-02, -6.473e-02, -6.329e-02, 1.374e-01, -6.642e-02, 2.313e-02, 6.638e-02, -4.668e-02, -3.050e-02, -3.969e-02, 5.642e-02, 2.006e-02, 5.561e-02, -1.101e-02)); + r += mul(s5_3, M4(7.491e-02, -3.847e-02, 1.783e-01, -2.926e-01, -8.981e-02, 1.206e-02, -6.551e-02, 1.380e-01, -1.125e-02, 7.559e-02, -1.997e-02, 1.007e-01, -1.883e-02, -1.027e-02, -2.135e-01, -3.129e-01)); + r += mul(s5_4, M4(1.314e-01, 8.873e-02, 7.119e-02, 2.532e-02, 9.858e-02, -1.783e-01, 1.628e-01, -2.206e-02, 2.891e-02, 9.753e-02, 1.155e-01, -7.199e-03, 2.056e-03, -6.549e-02, -1.366e-01, 2.333e-03)); + r += mul(s5_5, M4(-1.068e-02, 1.411e-01, -4.501e-02, 2.911e-02, 9.187e-02, 1.723e-02, -5.198e-02, 1.612e-01, 9.846e-02, -3.100e-02, -2.407e-02, 6.244e-02, 3.320e-02, 9.104e-03, 9.100e-03, -6.003e-02)); + r += mul(s5_6, M4(1.696e-02, -3.728e-02, -8.617e-03, 8.058e-03, -3.662e-02, 2.435e-02, 1.711e-02, 3.828e-02, 7.287e-03, -3.812e-02, -1.860e-02, 3.019e-02, -1.127e-02, 6.945e-03, -1.815e-02, 1.496e-02)); + r += mul(s5_7, M4(9.003e-02, -5.317e-02, 2.473e-02, 1.538e-03, 3.161e-02, 4.279e-02, 6.593e-03, -1.882e-01, -1.911e-02, -2.540e-02, 2.651e-02, 4.482e-02, -4.051e-02, 3.864e-02, -1.276e-02, -6.016e-02)); + r += mul(s5_8, M4(3.874e-02, -2.374e-02, -1.513e-04, -1.703e-03, 9.969e-02, -5.193e-02, 3.489e-02, 8.153e-03, 2.798e-02, 2.703e-02, -2.741e-02, 5.867e-02, -3.113e-02, 3.298e-02, 2.207e-02, 5.378e-02)); + r += mul(s6_0, M4(-4.333e-02, -2.164e-02, -1.891e-02, -2.478e-02, -8.467e-02, -2.836e-02, 2.656e-02, 2.228e-01, 2.438e-02, -5.832e-02, -5.970e-03, -2.624e-02, -7.549e-02, -8.601e-02, 3.399e-02, 2.754e-02)); + r += mul(s6_1, M4(3.181e-02, 9.488e-02, 8.526e-02, 2.832e-02, 9.164e-02, 9.265e-02, 2.801e-02, 6.820e-03, 2.016e-02, 4.320e-02, 1.761e-02, 8.020e-02, -2.823e-03, 1.213e-01, -5.059e-03, 1.175e-02)); + r += mul(s6_2, M4(-3.608e-02, 9.750e-03, -5.425e-02, -1.999e-02, 2.937e-02, 1.068e-01, 1.056e-01, 6.960e-02, -5.103e-03, -1.125e-02, -3.947e-02, -1.366e-02, -2.286e-02, 2.616e-02, 3.017e-02, 2.436e-02)); + r += mul(s6_3, M4(-2.321e-02, -5.012e-03, -5.651e-02, 6.494e-02, 6.039e-02, -1.996e-02, 1.684e-01, -1.389e-01, 8.584e-03, -5.589e-02, -8.010e-02, -1.454e-02, -1.799e-01, 6.040e-02, -2.547e-04, 1.265e-02)); + r += mul(s6_4, M4(2.273e-02, 5.835e-02, 4.878e-02, -1.032e-01, -1.307e-01, 3.118e-01, 1.039e-01, 5.808e-02, 7.333e-02, -6.361e-02, 8.217e-02, -2.615e-02, -2.997e-02, 2.579e-01, -2.016e-01, -7.790e-03)); + r += mul(s6_5, M4(3.064e-02, -1.017e-01, -3.927e-02, -3.303e-04, -1.426e-02, -1.747e-01, -5.337e-02, 3.372e-02, -3.211e-02, 7.596e-02, -2.422e-02, -3.888e-02, 3.963e-03, 1.547e-01, 1.863e-02, 1.161e-02)); + r += mul(s6_6, M4(-6.109e-02, 5.664e-03, 4.154e-02, 3.335e-02, 1.658e-01, -8.779e-02, 4.245e-02, -9.629e-03, 1.819e-03, -1.133e-02, 5.707e-02, 2.083e-02, -7.984e-03, 3.163e-02, 3.429e-02, 7.658e-02)); + r += mul(s6_7, M4(5.135e-02, -1.260e-02, 6.356e-03, -7.248e-03, -1.947e-01, -3.594e-02, 1.387e-02, 1.234e-01, 4.695e-02, -1.174e-02, 6.543e-02, -8.830e-02, -6.473e-02, 9.215e-02, -8.994e-03, -5.335e-02)); + r += mul(s6_8, M4(2.110e-02, 9.331e-02, -2.727e-03, 8.553e-03, 1.118e-01, 1.612e-02, 3.993e-02, -6.754e-02, -7.692e-02, 3.361e-02, 2.152e-02, 2.288e-02, -1.812e-01, 1.344e-02, -2.324e-02, 1.259e-01)); + r += mul(s7_0, M4(-3.143e-02, 1.270e-02, 2.812e-03, -3.664e-02, -3.640e-02, -5.367e-02, -6.271e-02, 7.864e-02, 3.176e-02, -9.713e-02, -1.309e-01, 1.282e-01, -9.484e-03, -7.186e-02, -5.679e-02, 6.335e-03)); + r += mul(s7_1, M4(2.077e-02, 4.741e-02, 6.566e-02, 2.128e-02, 1.068e-02, -4.366e-02, 3.465e-03, -7.231e-02, -5.704e-04, 6.338e-02, -6.195e-03, 9.465e-02, -5.166e-02, 4.834e-03, -1.855e-02, 1.058e-01)); + r += mul(s7_2, M4(2.486e-03, -2.980e-02, -3.408e-02, -1.577e-02, -1.064e-02, -1.997e-02, 2.767e-02, 4.654e-03, 9.022e-03, -1.070e-01, 1.364e-02, 5.342e-02, 1.118e-02, -5.942e-02, 6.186e-02, 3.055e-02)); + r += mul(s7_3, M4(1.410e-02, -4.275e-02, 7.937e-02, 2.648e-02, -8.021e-03, -1.177e-02, 3.691e-02, 1.041e-01, 4.750e-02, -1.635e-02, -1.884e-02, -2.387e-01, -1.456e-02, 1.608e-02, -9.824e-03, -8.208e-02)); + r += mul(s7_4, M4(8.184e-02, 1.641e-01, 5.635e-02, -1.439e-01, -1.819e-01, 1.210e-01, -6.449e-02, -5.453e-02, 1.468e-01, 2.512e-01, -6.038e-02, 8.986e-02, -2.204e-01, 1.205e-01, -3.414e-02, -7.794e-03)); + r += mul(s7_5, M4(1.883e-02, -7.130e-04, 9.819e-02, 9.457e-02, -1.506e-02, 2.341e-03, -2.388e-02, -4.613e-02, 1.039e-01, 6.381e-02, -1.535e-01, -1.390e-01, 1.277e-02, 1.570e-01, 2.231e-02, -1.025e-02)); + r += mul(s7_6, M4(-1.148e-01, 3.073e-02, -1.026e-01, 2.518e-01, -5.945e-03, 2.623e-02, -1.987e-05, 1.573e-02, 2.702e-03, -7.482e-02, -1.182e-02, 1.288e-02, 2.059e-02, -1.404e-02, 2.169e-03, 1.898e-02)); + r += mul(s7_7, M4(7.418e-02, -1.265e-01, -1.586e-02, 1.307e-01, 2.444e-03, 2.544e-02, 2.896e-02, 5.353e-03, 3.958e-02, -6.330e-03, -4.618e-03, 1.731e-02, -4.983e-04, 4.898e-02, 3.626e-02, -1.872e-02)); + r += mul(s7_8, M4(-3.294e-02, 2.496e-02, -4.163e-03, 1.318e-01, 1.180e-01, -1.751e-02, 1.257e-02, -1.621e-02, -1.501e-01, -2.276e-02, 3.091e-02, 1.136e-02, -1.445e-01, 2.529e-02, 1.941e-02, 6.400e-02)); + r += V4(8.587e-03, 7.166e-03, 8.947e-03, 4.751e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.282e-02, -7.501e-03, -1.857e-02, 2.125e-02, -9.094e-02, -1.234e-02, 3.117e-02, -4.372e-02, 5.749e-02, 1.487e-02, 2.390e-02, -4.648e-02, 2.412e-02, 9.416e-02, -4.091e-02, -7.010e-03)); + r += mul(s0_1, M4(7.699e-03, -1.461e-03, -1.739e-01, -1.072e-01, 7.644e-02, 1.704e-01, 4.967e-02, -1.591e-01, -1.191e-01, 6.662e-02, 5.521e-03, 4.485e-02, 2.038e-01, 1.115e-01, 2.296e-02, -3.010e-02)); + r += mul(s0_2, M4(1.087e-01, -9.153e-02, 8.022e-02, 1.570e-03, -6.340e-02, -3.081e-01, -1.052e-01, 1.030e-01, 9.012e-03, 3.300e-02, -5.430e-02, -5.508e-02, -9.428e-02, 2.964e-02, -2.834e-02, -6.783e-02)); + r += mul(s0_3, M4(6.784e-03, -1.459e-02, 1.777e-02, 6.636e-02, -5.034e-03, -4.580e-02, 3.943e-02, -6.539e-02, 6.990e-02, -9.399e-02, 3.101e-02, 1.040e-03, 1.083e-01, -3.711e-02, -1.661e-02, 3.074e-02)); + r += mul(s0_4, M4(-3.250e-02, -1.161e-01, 1.656e-01, -1.243e-01, 1.532e-02, -1.397e-02, 3.399e-02, -7.091e-02, 1.263e-01, 1.034e-01, 3.741e-02, -4.138e-02, 2.900e-01, -9.898e-02, -9.064e-02, 1.358e-01)); + r += mul(s0_5, M4(1.519e-01, -1.029e-01, -4.519e-02, 1.114e-02, 6.039e-02, 1.862e-01, -4.409e-02, -4.450e-02, 1.858e-01, 5.967e-02, 6.574e-02, 6.440e-02, 5.312e-02, 5.240e-02, -7.643e-02, -4.162e-02)); + r += mul(s0_6, M4(-4.410e-02, -6.757e-05, 1.782e-02, 3.755e-02, -1.586e-01, -1.383e-02, -1.596e-02, -5.003e-02, -1.231e-01, -2.110e-02, 9.221e-02, 9.457e-02, -4.785e-02, 4.241e-02, -5.256e-02, 3.678e-02)); + r += mul(s0_7, M4(2.781e-02, -7.796e-02, 4.718e-02, 1.196e-01, -2.547e-01, -2.209e-02, -3.105e-02, -1.510e-01, 6.214e-02, 3.699e-02, -1.110e-02, -2.115e-02, -2.354e-03, 8.718e-02, 1.031e-01, 8.796e-02)); + r += mul(s0_8, M4(2.464e-02, 2.310e-02, -7.324e-02, -6.761e-02, 2.163e-01, 4.396e-02, 7.289e-03, -1.429e-01, -1.751e-01, -6.605e-02, 2.203e-02, -1.974e-01, -7.655e-02, 4.021e-03, -3.618e-03, 7.687e-02)); + r += mul(s1_0, M4(-8.670e-02, -4.925e-02, -5.791e-03, -1.917e-02, -4.395e-02, 3.902e-03, -7.226e-04, -2.517e-02, -1.135e-01, 1.866e-02, 7.547e-03, 4.762e-03, -3.201e-02, 7.010e-02, 7.117e-02, 2.464e-03)); + r += mul(s1_1, M4(-7.021e-02, -2.683e-02, 5.226e-02, -5.977e-02, -6.292e-02, -3.425e-03, 3.800e-02, 7.317e-03, -9.742e-02, -2.554e-02, -7.991e-02, -1.119e-01, 5.196e-02, 9.553e-02, -6.086e-02, -4.175e-02)); + r += mul(s1_2, M4(-4.576e-02, -3.827e-02, -5.359e-02, 7.820e-02, -8.255e-02, -5.313e-02, -6.502e-02, 3.165e-03, 9.783e-03, -2.458e-02, 1.439e-01, -4.686e-02, -1.003e-01, -8.425e-03, -5.771e-03, -4.758e-02)); + r += mul(s1_3, M4(5.966e-02, 9.595e-03, -3.856e-02, 5.273e-02, -1.175e-03, 4.427e-02, 3.991e-02, 2.095e-03, -8.498e-02, -2.060e-02, -1.845e-02, -1.425e-01, -7.576e-02, -5.931e-02, 3.845e-02, 7.610e-03)); + r += mul(s1_4, M4(3.796e-02, 1.447e-01, 9.259e-02, 1.260e-01, 1.608e-03, 7.995e-03, -6.667e-02, 2.510e-02, -9.106e-02, 2.393e-02, 7.300e-03, 9.369e-03, 4.809e-02, -2.241e-01, -1.092e-01, 1.567e-01)); + r += mul(s1_5, M4(-2.641e-03, 3.406e-02, -5.671e-02, -2.547e-02, -2.399e-03, -3.979e-02, 2.325e-02, 3.546e-02, -3.434e-02, -4.097e-02, -4.166e-02, -1.135e-01, -7.384e-03, -3.778e-02, 6.189e-02, 6.051e-02)); + r += mul(s1_6, M4(3.849e-02, -1.066e-05, -9.556e-03, -6.462e-02, 1.338e-02, -2.918e-02, -4.090e-04, -9.598e-02, -1.725e-02, -3.150e-02, -3.695e-02, -5.857e-02, -1.094e-02, 4.266e-02, -2.025e-02, 6.002e-02)); + r += mul(s1_7, M4(1.280e-01, 1.042e-02, -5.609e-03, -9.296e-03, 3.400e-02, -1.899e-02, -2.874e-02, -3.249e-02, 1.947e-03, 1.613e-02, -1.296e-02, -1.637e-02, -4.200e-02, -6.840e-02, 6.963e-02, 5.189e-03)); + r += mul(s1_8, M4(4.628e-02, -2.528e-02, -4.852e-02, -7.951e-02, -3.145e-02, 3.404e-02, 3.338e-02, -2.195e-02, 7.240e-03, -4.205e-02, 8.960e-02, -6.156e-02, -5.262e-02, 1.937e-02, -4.494e-03, 7.634e-02)); + r += mul(s2_0, M4(1.292e-03, -7.605e-03, 4.105e-02, -1.246e-01, 1.068e-01, 2.252e-02, -2.295e-02, 3.658e-02, -3.432e-02, -1.054e-01, -6.732e-02, 3.738e-02, -2.803e-02, 1.604e-02, -3.872e-02, -7.219e-02)); + r += mul(s2_1, M4(4.383e-02, 2.352e-02, 6.190e-03, 1.750e-02, 1.012e-03, -3.472e-03, 1.045e-01, 7.403e-02, -4.373e-02, -1.335e-01, -8.607e-02, 2.795e-03, 6.669e-02, -7.791e-02, -3.136e-02, 1.017e-01)); + r += mul(s2_2, M4(-7.371e-02, -8.063e-03, 1.434e-02, -7.305e-02, -6.319e-03, 2.461e-03, 1.882e-04, 6.418e-02, -2.161e-02, -1.958e-02, 6.567e-02, 5.888e-02, 2.914e-02, 2.875e-02, 9.276e-03, -3.553e-02)); + r += mul(s2_3, M4(-5.579e-02, -7.940e-02, -3.232e-04, -8.424e-02, -3.059e-02, -1.152e-01, 1.849e-03, 7.836e-02, -1.220e-02, -2.966e-02, 8.221e-03, 2.005e-02, -3.060e-04, -2.329e-02, -1.330e-02, 1.660e-02)); + r += mul(s2_4, M4(-2.695e-01, 3.710e-02, 8.364e-02, 5.437e-02, -4.420e-02, 2.428e-02, -1.992e-01, 1.470e-02, -1.727e-01, -2.678e-01, 2.076e-02, 1.628e-01, 9.073e-02, -3.265e-02, -1.376e-01, 1.633e-01)); + r += mul(s2_5, M4(-2.046e-01, -1.528e-01, -1.308e-01, -2.173e-01, -1.029e-02, -3.100e-03, 2.210e-02, 8.151e-02, 1.365e-02, -8.022e-02, 2.672e-03, 1.166e-02, 2.332e-03, -4.854e-02, -7.820e-03, 1.159e-01)); + r += mul(s2_6, M4(-5.626e-02, -3.503e-02, -1.289e-03, -8.236e-02, -3.173e-02, -2.268e-02, 1.947e-02, -2.506e-02, 1.289e-01, 1.053e-02, -3.061e-02, 2.538e-02, -9.946e-02, -6.173e-02, 2.473e-02, -2.692e-02)); + r += mul(s2_7, M4(-2.065e-01, -5.762e-03, 5.203e-02, -2.143e-02, -7.554e-03, -4.648e-02, 1.165e-01, 2.026e-01, -8.369e-02, -1.324e-01, 5.834e-02, -5.356e-02, -6.907e-02, -7.742e-02, 2.550e-02, 6.365e-03)); + r += mul(s2_8, M4(-2.044e-01, -5.655e-02, 7.710e-02, -1.028e-01, -3.904e-02, -3.208e-03, -5.546e-02, 4.769e-02, -1.686e-02, -2.435e-02, 2.537e-03, 5.641e-02, -2.325e-02, -3.456e-02, 6.530e-02, 9.904e-02)); + r += mul(s3_0, M4(1.496e-02, -2.228e-02, 1.076e-02, -3.184e-02, -2.095e-02, 2.914e-02, 1.139e-03, -4.919e-02, -6.869e-02, -5.692e-02, -4.088e-02, 2.431e-02, -8.697e-02, -4.383e-02, -6.886e-02, -1.106e-01)); + r += mul(s3_1, M4(1.025e-01, 4.740e-03, 2.346e-02, -1.739e-02, 1.687e-02, 4.287e-02, -2.448e-02, -2.855e-02, -1.337e-02, -3.874e-02, -1.126e-02, 2.951e-03, 1.042e-01, -5.224e-02, -5.520e-02, -6.284e-02)); + r += mul(s3_2, M4(2.283e-02, 2.206e-02, 3.470e-02, -8.382e-02, 1.281e-02, -2.636e-02, -2.876e-02, 2.666e-02, 6.015e-02, -2.311e-03, -3.086e-02, 4.340e-02, 6.960e-02, -3.253e-03, 1.285e-01, -6.811e-02)); + r += mul(s3_3, M4(4.885e-02, -3.848e-02, -3.027e-02, -8.035e-02, 1.811e-02, -2.526e-02, 3.228e-02, 9.239e-02, 2.841e-02, 5.519e-02, 6.826e-02, -3.739e-02, -1.170e-02, -2.528e-02, 5.262e-02, 1.422e-01)); + r += mul(s3_4, M4(-7.634e-02, -2.835e-02, 1.058e-01, 9.126e-03, 7.710e-02, 1.233e-01, 2.328e-01, 1.879e-01, 1.550e-01, -6.004e-03, -1.373e-01, 4.386e-02, 1.198e-01, -1.807e-02, 1.245e-03, -8.021e-02)); + r += mul(s3_5, M4(5.724e-02, -1.178e-01, 1.767e-01, -7.646e-02, -2.158e-02, -1.647e-02, -8.337e-02, -7.362e-02, 2.942e-03, 6.778e-02, 4.844e-02, 3.504e-02, -5.358e-03, 2.728e-02, -6.942e-02, 7.962e-03)); + r += mul(s3_6, M4(4.452e-02, -1.646e-02, -1.147e-02, -5.155e-02, 7.076e-02, -1.398e-02, 9.774e-02, 4.261e-02, 1.003e-01, -1.432e-02, 3.115e-02, 4.306e-02, -1.018e-01, -1.393e-01, 7.043e-02, 1.725e-02)); + r += mul(s3_7, M4(2.213e-02, -7.985e-02, 1.339e-01, -7.921e-02, 1.285e-01, -1.511e-01, 1.649e-01, 4.592e-02, 3.238e-02, -1.482e-01, -1.781e-03, -8.541e-02, -3.168e-02, -3.916e-02, 4.108e-02, 1.662e-01)); + r += mul(s3_8, M4(-2.440e-02, -6.708e-02, 7.709e-02, -4.256e-02, -8.491e-02, -6.524e-02, -1.045e-01, -5.467e-02, -5.471e-02, 2.201e-02, -7.658e-03, 8.222e-02, -5.803e-02, -4.567e-02, 1.506e-02, -3.535e-02)); + r += mul(s4_0, M4(-5.863e-03, -3.528e-02, -7.038e-03, -8.064e-02, 6.389e-02, -3.502e-02, 1.629e-02, -5.566e-02, -8.219e-02, 9.411e-02, -4.741e-02, -2.534e-01, -1.678e-02, -1.364e-02, 6.047e-02, -4.086e-02)); + r += mul(s4_1, M4(8.560e-02, 1.703e-02, -1.876e-02, 3.156e-02, 1.031e-01, -6.625e-03, 7.125e-02, 6.314e-02, -9.482e-02, -3.137e-02, -1.422e-01, -2.679e-01, 3.143e-02, -3.354e-02, 1.283e-01, -1.122e-01)); + r += mul(s4_2, M4(-6.814e-02, -4.490e-03, 1.070e-01, 2.167e-03, 1.036e-02, 2.412e-03, -2.831e-02, -1.058e-01, 4.681e-02, 2.511e-02, 2.371e-02, -2.076e-01, -1.166e-01, 3.197e-02, -6.517e-02, -9.961e-02)); + r += mul(s4_3, M4(5.385e-02, -7.896e-02, 8.853e-02, 6.126e-02, 2.545e-02, 2.880e-02, -5.458e-02, 1.890e-02, -8.357e-02, 5.410e-02, -3.906e-02, -1.762e-01, 1.609e-01, -2.666e-02, -3.527e-02, 3.515e-03)); + r += mul(s4_4, M4(1.972e-02, 6.908e-02, -9.858e-03, 1.035e-01, 2.817e-01, 2.094e-01, -3.090e-01, -1.431e-01, -1.254e-01, -1.514e-01, -1.544e-01, -7.765e-03, -7.615e-02, 2.419e-02, -5.651e-02, -2.255e-02)); + r += mul(s4_5, M4(-1.699e-01, -4.790e-02, -4.011e-02, -4.175e-02, 1.003e-01, 9.210e-02, 1.107e-01, 1.973e-02, -2.131e-02, -2.740e-02, 4.149e-02, -1.450e-01, -7.245e-02, -9.400e-02, 1.103e-01, 7.094e-02)); + r += mul(s4_6, M4(-1.019e-01, -1.939e-02, 5.795e-02, -7.520e-02, -4.166e-02, 3.584e-03, 2.277e-02, 2.896e-02, 8.013e-02, 1.190e-02, -1.715e-02, -5.244e-02, -1.147e-02, 3.578e-02, -4.712e-02, -2.901e-02)); + r += mul(s4_7, M4(1.753e-01, 6.022e-02, 9.818e-02, 1.606e-01, -2.239e-02, 9.389e-03, -2.591e-02, -3.310e-02, 1.002e-01, 2.405e-02, 3.143e-02, 1.206e-02, -3.505e-02, 2.515e-02, -1.970e-02, 4.895e-02)); + r += mul(s4_8, M4(-8.281e-02, 8.249e-02, -1.288e-02, -2.050e-02, -7.102e-02, 2.256e-02, -1.944e-03, 8.534e-03, -3.477e-02, -7.072e-04, 3.717e-02, -8.730e-02, -6.472e-02, -9.351e-03, 3.269e-02, -3.213e-02)); + r += mul(s5_0, M4(9.542e-02, -4.061e-02, -2.904e-02, -1.822e-01, -5.632e-02, -2.788e-02, -8.150e-03, -1.046e-01, 7.828e-03, -2.316e-02, 1.447e-03, -9.084e-02, 2.058e-02, -7.100e-03, 4.199e-02, -5.257e-02)); + r += mul(s5_1, M4(1.531e-01, -1.350e-01, -3.146e-02, 4.359e-02, -1.677e-02, -2.604e-02, 1.119e-01, 3.979e-02, -5.084e-02, 1.952e-02, 2.422e-02, -9.534e-02, -5.122e-02, 1.170e-02, -2.168e-01, 5.671e-02)); + r += mul(s5_2, M4(8.436e-02, -7.757e-03, 7.705e-03, -1.189e-01, 8.195e-03, -4.497e-02, -2.285e-02, -6.500e-02, 5.457e-02, -1.213e-02, -5.160e-02, -3.054e-02, -1.466e-01, -1.326e-02, 1.110e-01, 5.654e-02)); + r += mul(s5_3, M4(-2.890e-02, -6.597e-02, 1.778e-02, -1.974e-01, 7.849e-02, -2.527e-02, -3.367e-02, 4.053e-02, 4.037e-02, -5.426e-02, -2.900e-04, 1.747e-03, 2.388e-01, 1.038e-01, 1.033e-02, 4.588e-02)); + r += mul(s5_4, M4(-4.453e-03, -8.251e-02, -3.662e-01, -5.380e-02, 5.726e-02, 1.958e-01, 2.646e-02, -1.724e-01, -7.544e-02, -4.307e-02, -2.181e-02, 6.844e-03, 1.017e-01, 9.632e-02, -4.427e-02, -7.737e-02)); + r += mul(s5_5, M4(1.049e-01, 6.475e-03, -9.022e-03, 4.203e-02, 3.576e-02, 2.242e-02, 1.977e-01, 9.945e-02, 1.133e-02, -8.528e-02, 9.182e-02, 8.884e-02, 3.620e-03, -4.205e-02, 4.046e-02, 4.582e-02)); + r += mul(s5_6, M4(-7.955e-02, 7.959e-03, -3.231e-02, -1.656e-01, -9.445e-02, 1.195e-02, 7.625e-03, 1.623e-02, 2.095e-01, -1.135e-02, -2.451e-02, 5.500e-02, 2.445e-01, 4.941e-02, -3.756e-02, 7.325e-02)); + r += mul(s5_7, M4(6.233e-02, -4.545e-02, 8.869e-02, -1.112e-01, 5.572e-02, 4.290e-02, -6.435e-02, -5.352e-02, 1.452e-01, -3.088e-02, 5.818e-02, 6.814e-02, -4.096e-02, 3.249e-02, 1.081e-01, 6.255e-02)); + r += mul(s5_8, M4(8.141e-02, -7.149e-02, -7.663e-05, -1.042e-01, -8.317e-02, -1.451e-04, 4.788e-02, 2.973e-02, 1.225e-02, 2.898e-02, -3.275e-03, 9.347e-02, 5.865e-03, -1.833e-02, -1.982e-02, 3.362e-02)); + r += mul(s6_0, M4(-9.579e-03, -2.138e-02, -1.251e-02, 4.840e-02, 3.551e-02, -4.484e-02, 2.255e-02, 2.169e-01, 3.611e-02, 1.155e-01, 4.562e-02, -5.924e-02, 4.021e-02, -3.011e-02, 3.772e-02, -4.811e-02)); + r += mul(s6_1, M4(6.861e-02, -4.056e-02, -2.885e-02, -2.634e-02, 1.707e-01, -1.619e-02, -5.798e-02, 3.128e-02, -1.866e-02, 2.244e-02, -9.318e-02, -9.833e-02, 1.297e-01, -7.007e-02, -9.872e-02, -1.108e-01)); + r += mul(s6_2, M4(-2.595e-02, -4.442e-02, 2.363e-03, 3.570e-02, 3.570e-02, -7.211e-03, -3.487e-02, 9.863e-02, 9.703e-02, -1.322e-02, 9.269e-03, -3.481e-02, 3.874e-02, 1.048e-01, 1.030e-01, 5.039e-02)); + r += mul(s6_3, M4(4.133e-02, 2.960e-02, 1.587e-03, 9.920e-02, -9.105e-02, -5.457e-02, 1.076e-01, 4.433e-03, -8.342e-02, 3.623e-02, 2.680e-02, 7.488e-03, -9.311e-02, -9.511e-02, 1.247e-01, 7.153e-02)); + r += mul(s6_4, M4(1.213e-01, 3.029e-02, 1.789e-02, -2.763e-01, 1.399e-01, -2.771e-01, -4.651e-02, 6.321e-02, -1.600e-01, -3.096e-02, 3.674e-03, -1.188e-01, -9.643e-02, -3.469e-01, 9.532e-02, -9.500e-02)); + r += mul(s6_5, M4(-8.947e-02, 2.622e-02, 5.543e-02, 5.093e-02, 1.466e-01, -5.734e-02, -3.783e-03, 6.553e-02, 5.229e-02, 7.388e-03, -4.044e-02, -6.720e-02, -9.627e-02, -3.651e-02, -2.029e-02, -6.617e-02)); + r += mul(s6_6, M4(-9.539e-02, -1.852e-02, 8.695e-02, 1.081e-01, -5.356e-02, -9.769e-02, 1.502e-01, 8.052e-02, -2.488e-02, -2.784e-02, -7.563e-02, -1.241e-01, -2.712e-02, 2.305e-03, -2.850e-02, -5.494e-02)); + r += mul(s6_7, M4(-1.394e-01, -2.116e-02, -2.509e-02, -8.494e-03, -1.988e-01, -2.985e-02, 1.067e-01, 1.072e-01, -9.671e-02, -3.063e-02, -3.109e-02, -9.306e-02, 1.372e-01, 3.550e-02, -2.605e-02, -1.606e-02)); + r += mul(s6_8, M4(-8.505e-02, 2.873e-02, -4.789e-02, 1.880e-01, 8.025e-02, -5.238e-02, -9.657e-02, -9.533e-02, 3.661e-02, -4.519e-02, -6.684e-02, -6.811e-02, -5.086e-02, -2.249e-04, 1.957e-02, 8.516e-02)); + r += mul(s7_0, M4(1.007e-01, -2.372e-02, -5.813e-03, 1.802e-01, -6.819e-02, -5.721e-03, -2.483e-02, 4.769e-02, -1.184e-01, 1.802e-01, 6.399e-02, 1.555e-01, 2.522e-02, 7.729e-02, 6.577e-02, -1.325e-01)); + r += mul(s7_1, M4(-1.067e-01, -5.018e-02, -3.361e-02, 6.501e-03, -1.769e-01, -1.063e-02, -5.944e-02, 2.517e-02, -1.636e-02, 9.147e-02, 1.468e-01, -1.176e-02, 9.015e-02, 8.733e-02, -9.278e-03, -8.138e-02)); + r += mul(s7_2, M4(-5.646e-03, 6.273e-03, 5.086e-02, 1.846e-01, -7.993e-02, 4.087e-03, -2.429e-02, -5.967e-02, 3.460e-02, -1.314e-02, -7.778e-02, 1.111e-01, 1.842e-02, 1.330e-02, 1.344e-01, -1.669e-02)); + r += mul(s7_3, M4(6.190e-02, -7.613e-02, 4.570e-02, 3.076e-01, -4.863e-02, -7.173e-02, -5.912e-02, -3.602e-03, -7.035e-02, 1.620e-01, 8.289e-02, 7.636e-02, 8.863e-02, -5.070e-02, -5.642e-02, 3.230e-02)); + r += mul(s7_4, M4(-2.413e-02, 3.622e-02, -1.440e-01, -3.482e-01, -6.614e-02, -6.762e-02, 8.552e-02, 5.454e-02, -6.978e-02, 3.645e-01, -3.436e-03, -4.343e-02, 1.408e-01, -1.524e-01, 1.545e-01, 3.227e-02)); + r += mul(s7_5, M4(-3.345e-02, -1.264e-01, 2.253e-02, 2.380e-01, -7.325e-03, 6.590e-03, -9.107e-02, -5.741e-02, -2.025e-01, 1.058e-01, -1.391e-01, -1.825e-02, -1.094e-02, 4.747e-03, -3.962e-02, -4.997e-02)); + r += mul(s7_6, M4(7.408e-02, 2.698e-02, 4.046e-03, 2.575e-01, -6.166e-02, -3.985e-02, 1.817e-03, -7.654e-03, -7.017e-03, 5.605e-03, -1.276e-01, -8.364e-03, 5.742e-02, 2.732e-02, -6.909e-02, -3.055e-02)); + r += mul(s7_7, M4(-2.354e-02, -2.041e-02, 8.600e-02, 2.454e-02, -2.166e-02, -3.224e-02, 8.548e-02, 2.795e-02, -1.052e-01, -5.736e-02, -5.278e-02, -4.459e-02, 2.451e-01, 6.210e-03, -4.089e-02, -6.621e-02)); + r += mul(s7_8, M4(-1.025e-01, -7.455e-03, 4.737e-02, 2.978e-01, 4.053e-03, -4.398e-02, -2.958e-02, -5.774e-02, -1.732e-01, 7.104e-02, -5.482e-02, 5.566e-03, -5.713e-02, 1.674e-02, 1.219e-02, 4.699e-02)); + r += V4(-1.027e-02, 3.734e-03, -3.699e-03, -3.228e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.167e-02, -6.177e-02, 2.921e-02, -1.596e-01, 8.422e-02, 8.787e-04, 4.332e-02, -5.135e-02, -4.080e-02, -6.429e-03, -6.095e-02, -6.061e-02, -4.810e-02, 1.095e-02, 4.549e-02, 1.362e-02)); + r += mul(s0_1, M4(-1.199e-02, 1.175e-01, -5.049e-02, -2.183e-01, -2.373e-01, -1.157e-02, -2.956e-01, 1.045e-03, 1.205e-01, 4.900e-03, 8.430e-02, -1.391e-01, -1.061e-01, 5.416e-02, 1.508e-01, 9.890e-02)); + r += mul(s0_2, M4(-4.923e-02, 1.051e-01, 5.703e-02, -9.213e-02, 9.652e-02, -2.554e-01, 2.086e-01, -2.495e-01, -5.233e-02, -7.044e-02, -1.819e-02, -3.957e-02, 3.699e-02, 1.103e-02, 1.985e-03, 7.984e-02)); + r += mul(s0_3, M4(-1.503e-01, 9.970e-02, -2.322e-01, 1.074e-01, -1.112e-01, 4.230e-02, -1.247e-02, -7.363e-02, 3.949e-02, -3.524e-02, -9.946e-02, -3.385e-03, -6.269e-02, -9.424e-03, 1.054e-01, -1.609e-02)); + r += mul(s0_4, M4(-3.221e-02, 1.505e-01, -2.525e-02, 4.613e-02, -9.402e-02, -2.385e-02, 1.357e-01, -1.818e-01, -2.465e-01, 5.490e-02, -1.201e-01, -8.243e-02, -5.049e-02, 1.286e-01, 8.802e-02, -2.022e-01)); + r += mul(s0_5, M4(-7.998e-03, 1.040e-01, -2.233e-02, -3.037e-02, -2.865e-01, -2.228e-01, -1.800e-01, -5.392e-01, -2.904e-02, 6.367e-02, 1.076e-01, -5.523e-02, -4.917e-02, -2.926e-02, -2.309e-03, 5.027e-02)); + r += mul(s0_6, M4(9.511e-03, -3.289e-02, -3.204e-02, -1.909e-02, -9.725e-02, -1.774e-02, 3.847e-02, -1.456e-01, 9.323e-02, -1.696e-02, 5.904e-03, -6.039e-02, 1.211e-01, 3.649e-02, -8.103e-05, 2.501e-02)); + r += mul(s0_7, M4(3.629e-02, 4.800e-02, -4.105e-02, -1.008e-01, -1.499e-01, 5.788e-03, 4.249e-02, -8.500e-02, -6.269e-03, -5.566e-02, 6.120e-02, 4.844e-03, -1.538e-01, -3.678e-02, -2.316e-02, 1.633e-03)); + r += mul(s0_8, M4(2.647e-02, 3.629e-03, 1.782e-02, 7.636e-03, 3.270e-02, 1.552e-01, -2.059e-02, 3.761e-02, -1.712e-01, -1.871e-02, -1.158e-01, 1.193e-01, -1.106e-01, -2.810e-02, -5.419e-02, -4.526e-02)); + r += mul(s1_0, M4(-9.439e-02, -4.112e-02, -4.822e-02, -6.153e-02, -3.297e-02, 1.769e-02, 2.944e-02, -1.538e-02, -2.350e-02, -2.133e-02, -6.982e-02, -1.509e-02, 8.406e-02, 1.363e-02, 2.682e-02, 7.146e-02)); + r += mul(s1_1, M4(1.214e-02, 1.628e-02, 7.939e-03, 1.440e-01, -4.604e-02, -3.504e-02, -1.081e-03, -1.222e-02, -1.043e-01, 8.807e-03, -6.214e-02, -7.001e-02, -5.142e-02, -3.879e-02, 2.149e-02, -3.831e-02)); + r += mul(s1_2, M4(-3.897e-02, -8.496e-04, 1.726e-02, 6.070e-02, 2.546e-02, -1.543e-02, 1.621e-02, -6.286e-02, -7.894e-04, 8.739e-02, 2.938e-02, 8.286e-03, 9.474e-02, -3.996e-02, -2.151e-02, 8.518e-02)); + r += mul(s1_3, M4(-5.590e-02, -4.005e-02, -2.155e-02, 2.283e-02, 1.415e-02, -4.826e-02, 7.409e-02, -9.884e-03, -2.964e-02, 1.376e-02, 8.224e-03, 2.386e-02, -6.692e-02, -5.007e-02, 1.784e-02, -3.207e-02)); + r += mul(s1_4, M4(-2.278e-02, -6.852e-02, 1.079e-02, -1.495e-01, -1.100e-01, 4.839e-02, -1.024e-01, -7.718e-02, -2.104e-01, 2.885e-02, -8.595e-02, 5.267e-02, -9.748e-03, -1.048e-01, 1.062e-01, -1.449e-01)); + r += mul(s1_5, M4(-9.920e-02, 7.857e-03, -8.209e-02, 1.180e-02, 5.777e-02, -2.598e-02, -1.377e-02, 2.015e-02, 1.165e-01, 1.282e-02, 3.508e-02, -5.131e-02, 2.825e-02, 1.456e-02, 2.656e-02, 1.442e-02)); + r += mul(s1_6, M4(-4.397e-03, -2.337e-02, -4.174e-02, 1.481e-02, -1.727e-02, -3.315e-02, 1.128e-02, 1.027e-02, 7.468e-02, 1.628e-02, 2.491e-02, -5.500e-02, 5.107e-02, 3.508e-02, -3.237e-03, 9.850e-03)); + r += mul(s1_7, M4(6.569e-02, 5.595e-02, -1.277e-02, 9.735e-02, 1.102e-02, 4.480e-02, 7.978e-03, -4.399e-03, 4.086e-02, 4.144e-03, 1.167e-02, -1.048e-01, -9.323e-02, -6.502e-03, 3.126e-02, -5.535e-02)); + r += mul(s1_8, M4(3.960e-02, 2.156e-02, 1.231e-02, 1.308e-02, 1.848e-02, -2.566e-02, 4.084e-02, 9.212e-04, -8.791e-02, -1.301e-02, -6.939e-02, 8.119e-02, -6.844e-02, 1.982e-02, -3.903e-02, -3.668e-02)); + r += mul(s2_0, M4(2.252e-02, -2.096e-02, 2.735e-02, 1.510e-02, -1.478e-02, 1.116e-02, 1.077e-02, -4.348e-02, -1.584e-03, -3.120e-02, 7.883e-03, -1.087e-02, -3.217e-02, -2.267e-02, -8.129e-02, 2.307e-03)); + r += mul(s2_1, M4(8.523e-02, 1.456e-03, -3.770e-02, -1.888e-02, 1.382e-02, 5.320e-02, 4.826e-02, 3.317e-02, -7.548e-02, 4.860e-02, 1.353e-02, 1.160e-01, 8.436e-03, -1.882e-02, 1.261e-02, -7.137e-02)); + r += mul(s2_2, M4(-6.370e-02, 5.359e-03, -4.268e-02, -3.003e-02, 2.313e-02, 1.621e-02, 3.060e-03, -1.317e-02, 3.247e-03, 7.697e-02, -1.396e-02, -8.399e-04, -4.960e-04, 3.719e-02, -3.845e-02, 2.111e-02)); + r += mul(s2_3, M4(2.892e-03, -2.543e-02, 1.176e-02, 9.304e-03, 1.059e-01, -2.285e-02, 2.986e-02, 5.772e-02, -1.142e-01, -4.586e-02, -1.881e-01, 4.963e-03, -3.414e-03, -2.125e-02, -1.125e-01, -5.594e-02)); + r += mul(s2_4, M4(-5.479e-02, -5.892e-02, -3.187e-01, -1.107e-01, 1.198e-02, 4.036e-02, -6.781e-02, 3.168e-02, -1.152e-01, -5.754e-02, 2.135e-01, -2.608e-01, -8.551e-02, 9.319e-02, -1.178e-01, -8.393e-02)); + r += mul(s2_5, M4(3.025e-02, -1.882e-01, -7.997e-03, 1.191e-01, -5.916e-02, -5.380e-02, 4.412e-02, -8.905e-03, -1.309e-02, 1.924e-02, 9.663e-03, -8.942e-03, 3.374e-02, 7.815e-02, 3.357e-02, -5.947e-03)); + r += mul(s2_6, M4(-7.223e-03, 3.911e-02, -1.094e-02, 1.982e-03, 2.219e-02, -3.432e-02, -8.690e-02, 4.018e-02, -6.419e-02, 1.056e-02, -1.133e-01, 9.543e-02, -7.015e-02, 6.164e-02, -1.136e-01, 5.972e-03)); + r += mul(s2_7, M4(-1.036e-01, -5.610e-02, -4.592e-02, -3.629e-02, -1.625e-01, -2.212e-01, -1.272e-01, -5.642e-02, -4.976e-02, 3.654e-02, -3.748e-02, 9.545e-02, 1.100e-02, -4.112e-02, -6.416e-02, -4.099e-02)); + r += mul(s2_8, M4(1.330e-02, 1.735e-02, 6.827e-02, 3.413e-02, -1.748e-02, -1.577e-03, 1.129e-02, -2.462e-02, -3.445e-02, 7.756e-03, 2.076e-02, -4.676e-02, -4.606e-02, -2.034e-02, 1.257e-02, -3.683e-02)); + r += mul(s3_0, M4(-5.320e-02, 2.190e-02, 3.680e-02, -3.580e-02, 2.546e-02, -2.600e-03, 3.017e-02, 6.285e-03, -7.979e-02, 6.201e-02, -6.068e-02, -4.211e-02, 1.232e-01, -2.810e-02, -9.417e-02, 1.045e-01)); + r += mul(s3_1, M4(6.909e-02, -1.825e-03, -1.253e-02, -3.946e-02, -2.231e-02, 4.658e-02, 4.727e-03, -1.104e-02, 1.596e-02, -6.214e-02, 2.228e-02, -6.638e-02, 5.695e-02, 7.845e-02, -5.545e-02, 2.237e-03)); + r += mul(s3_2, M4(4.939e-02, -9.207e-02, -3.875e-02, -7.544e-02, 1.800e-03, 1.560e-02, -4.386e-03, -7.557e-03, 1.261e-02, 1.125e-02, 1.801e-02, 1.466e-02, 9.646e-03, 3.503e-02, -3.819e-02, 4.301e-02)); + r += mul(s3_3, M4(3.208e-02, -7.788e-02, 5.246e-02, 2.746e-03, -2.968e-02, -7.753e-03, 8.223e-02, -2.185e-02, -1.468e-02, -5.539e-02, -1.753e-01, -4.015e-02, 1.448e-01, -7.826e-03, -1.139e-02, -1.166e-01)); + r += mul(s3_4, M4(-3.785e-02, 1.809e-02, -1.625e-01, -5.018e-02, -2.031e-01, 3.498e-01, 9.538e-03, 1.843e-01, 1.125e-01, -9.325e-02, 1.736e-01, -1.565e-01, -1.106e-01, 3.306e-01, 9.144e-02, -8.085e-02)); + r += mul(s3_5, M4(2.068e-01, 6.464e-03, 5.312e-03, 8.993e-02, 1.227e-02, -1.338e-01, -4.559e-02, -3.015e-02, -2.496e-02, 4.393e-03, -1.793e-02, -9.457e-02, 5.664e-02, 1.082e-01, 3.646e-02, -1.086e-02)); + r += mul(s3_6, M4(2.355e-02, 1.312e-02, -4.126e-02, 2.181e-02, 1.985e-01, -1.919e-02, -1.470e-01, 9.793e-02, -1.152e-01, -3.593e-02, -1.032e-01, 4.051e-02, 1.958e-01, 7.270e-02, -4.232e-02, -5.498e-02)); + r += mul(s3_7, M4(-1.543e-02, -5.805e-02, 6.226e-02, 3.702e-03, -2.578e-01, -2.114e-01, -1.530e-02, 5.345e-02, 4.764e-02, 4.586e-02, 8.577e-02, 1.178e-01, -9.984e-02, -4.481e-02, -5.640e-02, -9.018e-02)); + r += mul(s3_8, M4(-1.854e-02, 8.643e-04, 1.124e-02, 2.094e-02, -1.009e-01, 3.480e-02, -1.669e-02, 3.460e-02, 3.137e-02, 8.575e-03, 4.781e-02, -2.018e-02, -1.207e-01, 1.016e-02, -4.369e-02, -1.682e-02)); + r += mul(s4_0, M4(-1.140e-02, 2.420e-02, 6.241e-03, 5.785e-03, 4.436e-02, -4.286e-02, -5.416e-02, 1.132e-02, 2.869e-02, 1.495e-02, -8.035e-02, 7.211e-02, -3.877e-02, -7.195e-02, 7.446e-02, -8.160e-04)); + r += mul(s4_1, M4(1.645e-03, 9.030e-02, -3.709e-02, 1.805e-02, -1.960e-02, -4.919e-02, 4.553e-02, -6.286e-03, 1.827e-02, -5.295e-02, 1.301e-02, 5.579e-02, 3.799e-02, -1.237e-03, 9.361e-02, 2.773e-02)); + r += mul(s4_2, M4(4.443e-02, -3.289e-02, -1.185e-02, -3.490e-04, 1.836e-02, -1.090e-01, 8.454e-04, 1.068e-02, 2.049e-02, 8.117e-03, 1.842e-02, 6.967e-02, 2.379e-02, 1.528e-02, 4.916e-03, 4.540e-02)); + r += mul(s4_3, M4(4.370e-02, -2.791e-02, 1.829e-01, -2.775e-02, -8.279e-02, 3.741e-02, -9.919e-03, 1.260e-01, -2.457e-02, -1.378e-02, -6.458e-02, 1.096e-01, 2.652e-02, 2.717e-02, 1.420e-01, 1.045e-01)); + r += mul(s4_4, M4(-8.799e-02, 1.364e-01, -1.368e-01, 4.078e-02, 1.992e-01, 4.889e-03, 8.435e-02, 4.096e-01, -3.620e-02, -1.450e-02, 3.992e-02, 2.025e-02, -9.919e-02, -2.361e-02, -1.246e-01, 1.875e-02)); + r += mul(s4_5, M4(2.370e-02, -1.002e-01, 7.320e-04, 8.148e-02, 1.327e-01, 2.469e-02, -5.037e-02, -1.869e-01, -2.726e-03, -9.535e-03, 2.615e-02, 1.987e-04, -4.594e-03, -1.220e-02, -4.273e-04, 1.355e-01)); + r += mul(s4_6, M4(1.071e-01, 2.113e-02, -4.413e-03, -4.318e-02, -4.169e-02, -1.027e-02, 2.563e-02, -1.989e-02, -2.810e-02, 2.508e-02, 1.188e-02, 3.574e-02, 7.083e-03, 4.946e-02, -4.187e-02, -9.132e-03)); + r += mul(s4_7, M4(4.131e-02, -9.007e-02, 2.709e-02, -1.204e-01, 2.620e-02, 1.383e-02, 1.197e-01, 6.891e-02, -6.550e-02, -5.608e-02, -5.058e-02, 6.390e-02, 1.111e-02, 4.349e-02, -9.714e-03, -8.326e-02)); + r += mul(s4_8, M4(-1.916e-02, -2.350e-02, -1.555e-02, -6.213e-02, 5.593e-02, 4.247e-03, -4.789e-02, 5.948e-02, -4.069e-02, 1.600e-02, 4.016e-02, 6.093e-02, -3.119e-02, 3.793e-03, 5.541e-03, -3.793e-02)); + r += mul(s5_0, M4(7.864e-02, -1.339e-02, -4.832e-02, -1.218e-01, -2.013e-02, -6.006e-02, -5.262e-02, 2.831e-02, 3.518e-02, 3.341e-02, -1.228e-02, -1.277e-02, 1.757e-04, 2.715e-03, 3.603e-02, 3.215e-02)); + r += mul(s5_1, M4(1.407e-01, 7.350e-02, 4.234e-02, -9.416e-02, 7.673e-02, -1.417e-01, 1.398e-02, -1.021e-01, 3.521e-02, 1.478e-03, -4.151e-02, -8.832e-04, 8.912e-02, -8.051e-02, 4.631e-02, 2.308e-03)); + r += mul(s5_2, M4(1.387e-03, 2.925e-02, -2.291e-02, 2.741e-02, -2.734e-02, 6.738e-02, 4.130e-05, -2.597e-02, 2.685e-02, 1.294e-02, 1.009e-02, -1.427e-02, -3.893e-02, 6.289e-02, 3.797e-02, 9.311e-03)); + r += mul(s5_3, M4(-1.679e-02, -1.508e-01, -1.222e-01, -5.291e-02, 1.247e-02, 1.753e-02, 1.583e-01, -1.962e-02, 9.588e-02, 2.281e-02, -2.183e-03, -2.845e-02, -1.690e-01, 1.808e-02, 1.314e-02, 1.856e-01)); + r += mul(s5_4, M4(2.043e-01, -5.004e-01, -1.569e-01, -2.278e-01, 6.594e-02, 2.738e-01, -2.904e-01, 1.078e-01, -3.630e-02, 1.078e-02, -8.383e-02, -1.540e-01, -2.902e-01, 1.926e-01, -3.632e-02, 1.952e-02)); + r += mul(s5_5, M4(-8.471e-03, -5.701e-02, 3.210e-02, -9.791e-02, -1.505e-01, 2.561e-02, 2.135e-02, -6.411e-02, -5.528e-02, 2.373e-02, 1.523e-02, -6.321e-02, -6.808e-02, 8.429e-02, -5.276e-02, -8.066e-03)); + r += mul(s5_6, M4(-5.639e-02, 4.162e-04, -1.973e-02, 3.684e-03, 5.583e-02, -6.831e-03, -5.978e-03, -4.626e-02, 2.059e-02, -4.358e-02, 1.707e-02, 2.277e-02, 8.244e-02, 5.466e-02, 4.525e-02, 3.121e-02)); + r += mul(s5_7, M4(9.632e-02, 5.535e-02, 5.899e-02, 1.290e-01, -3.629e-03, -8.752e-02, -3.750e-02, 5.661e-02, 2.227e-02, -8.346e-02, -2.730e-02, 3.271e-03, -9.233e-02, 6.866e-02, 6.858e-03, 5.474e-02)); + r += mul(s5_8, M4(4.825e-02, -4.488e-02, -3.505e-04, 6.830e-02, 9.736e-02, -3.523e-02, 8.631e-02, 2.603e-02, -5.879e-02, -3.617e-03, 1.704e-02, -3.680e-02, -6.214e-02, -1.883e-02, -4.150e-02, -5.404e-02)); + r += mul(s6_0, M4(-3.835e-02, -1.861e-02, 4.914e-03, -3.501e-03, -1.264e-02, -1.105e-02, 9.188e-02, -2.862e-02, -7.635e-02, -5.039e-02, 6.528e-03, 1.326e-02, 1.238e-01, -3.823e-02, 1.175e-01, -8.928e-03)); + r += mul(s6_1, M4(-1.286e-03, 2.701e-02, 3.120e-03, -8.478e-02, 4.093e-02, 1.146e-01, -5.110e-02, 2.109e-01, 1.133e-01, 4.819e-02, 1.865e-02, -3.246e-02, 1.944e-02, -9.811e-02, -2.425e-02, -1.544e-02)); + r += mul(s6_2, M4(-2.894e-02, -3.921e-02, -1.110e-02, -3.812e-02, 1.139e-01, -1.187e-01, 1.240e-02, -1.277e-01, 2.531e-02, -1.509e-03, 1.453e-02, 1.674e-03, 1.359e-03, -2.390e-02, 1.247e-02, -1.884e-01)); + r += mul(s6_3, M4(-7.157e-02, 2.103e-02, 2.297e-02, -8.590e-02, 1.141e-01, -9.066e-02, -7.427e-02, 1.697e-01, -6.793e-03, -1.810e-02, 1.913e-01, -2.418e-02, 1.057e-01, -9.030e-03, -7.905e-02, -1.755e-01)); + r += mul(s6_4, M4(-1.410e-01, 1.156e-02, -1.829e-01, 7.699e-02, 2.267e-01, -3.173e-01, 1.605e-01, -1.500e-01, -8.938e-02, -2.148e-01, -1.502e-02, 1.283e-01, -1.651e-01, -2.558e-01, 1.715e-01, -6.584e-02)); + r += mul(s6_5, M4(5.515e-02, -4.482e-02, 2.362e-02, 6.511e-02, 7.282e-02, 5.922e-02, 3.418e-02, 1.781e-02, -4.039e-02, -1.749e-02, 2.931e-03, 3.596e-02, 3.957e-02, 4.877e-03, 3.109e-02, 1.089e-01)); + r += mul(s6_6, M4(-1.752e-02, -8.149e-03, 1.448e-02, -4.546e-02, -2.351e-02, -3.851e-02, 7.208e-02, -1.704e-02, 8.197e-02, 7.848e-04, 6.083e-02, 4.167e-02, 7.052e-03, 4.192e-02, 1.003e-01, 2.385e-02)); + r += mul(s6_7, M4(-1.248e-01, -8.858e-02, -3.475e-02, 5.719e-02, 4.832e-02, -1.755e-01, -1.149e-01, -1.976e-01, -6.725e-02, -5.124e-02, 8.730e-04, 4.918e-03, -9.000e-02, 8.387e-02, -7.966e-03, 5.869e-02)); + r += mul(s6_8, M4(-3.407e-02, -4.766e-02, 2.795e-02, 5.029e-02, 1.758e-02, -9.677e-03, 2.503e-02, 4.713e-03, 4.451e-02, 1.719e-02, 3.436e-02, -2.860e-03, 5.170e-02, -6.352e-02, 4.891e-02, -5.977e-03)); + r += mul(s7_0, M4(-4.166e-03, -1.710e-02, 2.343e-02, 2.181e-02, -2.621e-03, -5.788e-02, 6.793e-02, 1.316e-02, -1.646e-01, 5.274e-02, 1.871e-01, 1.384e-01, 1.043e-01, -6.170e-02, 8.821e-02, 6.585e-02)); + r += mul(s7_1, M4(-4.526e-02, -1.652e-03, -2.061e-02, 1.634e-02, -8.333e-02, 6.583e-02, -3.144e-02, 6.665e-02, -4.615e-03, 1.102e-01, -2.718e-02, 1.222e-01, -5.337e-03, 4.908e-02, 3.141e-02, 3.663e-02)); + r += mul(s7_2, M4(-1.944e-03, -3.047e-02, -2.642e-02, -1.176e-02, 1.964e-02, -1.322e-03, -1.799e-02, 6.898e-03, 8.192e-02, -3.458e-02, -5.437e-02, -1.717e-02, 2.824e-02, 9.535e-03, -2.832e-02, -6.371e-02)); + r += mul(s7_3, M4(-5.550e-02, -4.231e-02, -3.512e-02, -9.145e-02, 1.784e-02, 1.736e-02, 1.449e-03, -8.046e-03, 5.450e-03, 6.594e-02, 2.530e-01, -1.471e-02, -6.283e-02, 7.689e-02, 4.334e-02, -6.218e-02)); + r += mul(s7_4, M4(-5.743e-02, -2.237e-01, -1.098e-01, -7.820e-02, 4.270e-02, -1.128e-01, -4.468e-02, 7.768e-02, -1.054e-01, -1.931e-01, 5.287e-03, -5.812e-02, 8.197e-02, -8.961e-02, 2.512e-01, -3.005e-02)); + r += mul(s7_5, M4(4.308e-02, -9.069e-03, 5.157e-02, 1.160e-02, 5.872e-02, -9.286e-02, 1.838e-02, 2.887e-02, -9.272e-02, -1.431e-01, -5.731e-02, 2.200e-02, -1.166e-02, 6.180e-02, -1.418e-02, 6.206e-02)); + r += mul(s7_6, M4(-4.014e-02, -3.412e-02, 6.475e-02, -9.437e-02, 4.607e-02, -2.936e-02, -2.778e-03, 3.266e-03, -1.539e-01, 9.698e-03, 4.587e-02, 4.971e-02, 3.766e-02, 2.926e-02, 5.401e-02, -1.517e-02)); + r += mul(s7_7, M4(-2.628e-02, 7.130e-02, 6.909e-02, 1.381e-01, -6.860e-02, -9.530e-02, -9.711e-02, -7.830e-02, -3.834e-02, 2.078e-02, 3.598e-02, 8.188e-02, 3.320e-02, 7.825e-02, 1.107e-01, 5.268e-02)); + r += mul(s7_8, M4(-5.938e-02, -5.766e-02, 7.675e-03, -5.305e-02, -2.769e-02, 3.557e-02, -2.854e-02, 2.338e-02, 3.690e-02, 7.973e-02, -3.343e-03, 5.789e-02, -1.221e-02, -4.225e-02, -1.986e-02, -4.257e-02)); + r += V4(-7.498e-03, -1.644e-03, -9.558e-03, 1.158e-03); + return r; +} + +void Pass15(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 16 +//!DESC conv15 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-5.160e-02, -3.732e-03, -6.784e-03, -4.348e-02, 2.435e-03, -1.543e-02, -2.768e-04, 8.964e-03, -3.942e-02, -8.685e-03, 1.014e-03, -3.346e-02, -4.470e-02, 1.599e-02, 8.741e-03, -4.553e-03)); + r += mul(s0_1, M4(2.393e-02, 1.892e-03, -1.528e-02, -3.904e-02, 1.591e-02, -8.766e-02, 2.341e-02, -1.420e-02, -2.348e-02, -3.359e-03, -3.797e-02, -5.681e-02, -4.980e-02, 4.332e-02, -7.959e-02, -2.571e-02)); + r += mul(s0_2, M4(-4.175e-02, -3.435e-03, 3.606e-02, -1.366e-01, 6.186e-03, 3.417e-03, 1.596e-02, -2.151e-02, 3.124e-02, -3.555e-02, -5.958e-02, -3.985e-02, 4.484e-02, -1.423e-02, -8.866e-02, -2.162e-02)); + r += mul(s0_3, M4(-2.821e-02, 1.630e-03, 5.268e-02, -7.234e-03, -4.657e-02, -9.417e-03, -2.170e-03, 7.203e-02, 1.426e-01, -6.417e-02, -4.125e-02, 5.846e-02, 1.178e-03, -7.917e-02, -1.304e-02, -1.637e-02)); + r += mul(s0_4, M4(1.462e-02, -3.415e-02, -4.206e-02, -5.123e-02, 3.062e-02, 1.558e-01, 6.164e-03, 3.124e-03, -4.108e-02, 1.781e-01, 1.494e-02, -5.756e-03, -1.418e-01, -6.695e-04, 3.986e-02, -1.386e-01)); + r += mul(s0_5, M4(3.383e-02, 4.660e-02, -1.419e-01, 5.439e-02, -4.897e-02, 1.464e-01, 4.731e-02, 4.934e-02, 8.982e-02, -1.290e-02, 1.695e-02, 9.934e-02, 7.482e-02, -9.254e-03, -1.693e-02, 6.521e-03)); + r += mul(s0_6, M4(9.238e-02, -2.743e-02, -8.319e-03, 4.795e-03, 1.365e-02, 4.003e-02, -3.559e-02, 7.802e-03, 6.003e-02, 2.358e-02, -3.161e-02, 2.074e-02, 7.416e-03, -4.118e-02, -1.775e-02, 2.159e-02)); + r += mul(s0_7, M4(4.396e-03, 1.038e-01, 6.255e-02, -9.141e-02, 3.115e-02, 1.052e-01, -4.497e-02, -6.776e-04, -1.772e-02, 9.394e-02, 4.760e-02, -1.007e-01, 1.137e-02, -5.003e-02, -1.147e-02, -1.026e-02)); + r += mul(s0_8, M4(-1.623e-02, -1.713e-02, 9.693e-02, 7.983e-03, -5.052e-03, -4.564e-02, 4.794e-02, 1.581e-02, -5.652e-03, 5.487e-02, 3.743e-03, -4.529e-02, -9.499e-02, 6.744e-03, -2.605e-03, 8.676e-03)); + r += mul(s1_0, M4(-2.647e-03, 6.805e-02, -2.540e-02, -1.147e-02, 3.226e-02, -1.310e-02, 2.787e-03, -4.002e-02, 5.170e-02, -3.792e-03, 3.074e-02, 2.460e-02, -7.882e-03, -6.123e-03, -8.279e-03, -8.565e-03)); + r += mul(s1_1, M4(1.982e-02, 3.020e-02, 3.416e-02, 1.569e-02, 1.891e-02, -2.180e-02, 6.800e-02, 3.906e-02, -1.298e-02, 9.551e-02, -1.343e-03, -4.806e-02, 1.454e-01, 1.524e-02, -2.852e-02, 2.072e-02)); + r += mul(s1_2, M4(-7.550e-03, 3.784e-02, 8.472e-02, 2.397e-02, 5.939e-04, -2.458e-02, -3.482e-02, -7.235e-02, 6.335e-02, -1.183e-02, -2.582e-02, 1.155e-01, 2.785e-02, 4.992e-03, 7.951e-03, -1.885e-02)); + r += mul(s1_3, M4(-7.998e-02, 1.170e-01, -8.495e-02, 3.313e-02, 8.535e-03, 3.094e-02, -6.041e-02, -3.323e-02, -8.466e-02, -1.169e-01, -1.145e-01, 1.189e-01, -5.433e-02, -7.204e-02, 2.027e-02, -3.973e-03)); + r += mul(s1_4, M4(2.866e-02, -3.144e-01, 1.411e-01, -2.229e-01, -6.961e-02, 8.606e-02, -2.027e-01, 6.622e-02, -2.599e-01, 2.075e-01, -3.785e-02, -3.399e-03, -3.173e-01, 6.855e-02, -1.812e-01, -1.432e-02)); + r += mul(s1_5, M4(1.590e-02, -3.465e-02, -1.312e-01, 7.710e-02, 3.443e-02, 2.024e-01, -4.113e-02, -2.574e-02, 6.197e-02, 7.433e-03, -5.301e-02, 2.927e-01, -1.177e-02, 2.959e-02, 1.459e-01, 2.358e-01)); + r += mul(s1_6, M4(-1.945e-03, -3.489e-02, 4.217e-02, 6.407e-03, 2.843e-02, 2.252e-02, -4.049e-02, -6.170e-02, -3.052e-02, 2.717e-02, 9.793e-03, 1.795e-01, 7.723e-02, 1.182e-02, 3.257e-02, -2.977e-02)); + r += mul(s1_7, M4(6.135e-02, 1.503e-01, 6.103e-02, 1.783e-02, 6.338e-02, 1.215e-01, -1.684e-02, -1.483e-01, -3.541e-02, -8.271e-02, 1.444e-01, 1.174e-01, 1.608e-01, -5.001e-02, 8.841e-02, -3.740e-02)); + r += mul(s1_8, M4(-5.199e-03, 3.357e-03, 1.415e-02, 3.064e-02, 4.466e-02, -2.478e-02, 2.871e-02, 1.724e-02, -3.197e-02, 1.664e-01, 4.358e-02, 1.283e-01, -2.226e-02, -6.676e-02, -9.041e-02, -4.091e-02)); + r += mul(s2_0, M4(4.154e-02, 6.276e-02, 2.301e-02, -4.867e-02, 9.251e-02, 3.564e-02, 3.035e-02, 2.159e-02, 1.382e-02, -5.546e-02, 4.188e-03, 4.685e-02, 1.802e-02, 4.610e-03, 6.181e-02, -2.626e-04)); + r += mul(s2_1, M4(-1.424e-01, -1.806e-02, -2.163e-02, -8.487e-02, 3.186e-02, 8.935e-02, 8.248e-02, -1.191e-02, 2.657e-03, 8.261e-03, -4.995e-02, -1.115e-02, 1.825e-03, -2.795e-02, 4.463e-02, 7.749e-02)); + r += mul(s2_2, M4(-6.608e-02, -7.340e-03, -3.298e-03, -2.887e-02, 8.648e-03, 5.588e-03, 4.404e-03, 2.505e-04, 1.036e-02, -2.976e-02, -3.113e-02, 6.618e-03, 3.879e-02, 5.171e-02, 6.177e-02, 1.429e-01)); + r += mul(s2_3, M4(-7.782e-03, -8.404e-02, -8.264e-03, 3.970e-02, -2.498e-01, -1.223e-01, -6.400e-02, 7.646e-02, 1.400e-03, -1.271e-02, -1.757e-02, 3.824e-02, 7.211e-02, -3.480e-02, -8.232e-02, -1.930e-02)); + r += mul(s2_4, M4(1.859e-02, 4.738e-02, -2.169e-02, 2.830e-02, -1.659e-01, -9.680e-03, 2.575e-02, 5.458e-02, -4.625e-02, -3.809e-02, 4.293e-02, -3.743e-02, -6.246e-02, 1.364e-01, 3.725e-02, -3.218e-02)); + r += mul(s2_5, M4(-4.900e-03, 5.655e-02, 6.519e-02, 1.092e-01, 1.312e-02, -9.273e-02, -9.127e-02, 5.506e-02, 2.990e-02, -1.273e-03, 1.594e-04, -7.715e-03, -1.155e-01, -4.271e-03, 2.533e-03, 8.421e-02)); + r += mul(s2_6, M4(1.121e-02, 1.366e-02, -3.063e-02, 4.596e-03, -1.070e-01, -4.462e-02, -6.899e-02, 3.754e-02, -8.587e-02, -7.628e-03, 1.734e-02, 4.919e-02, -2.737e-02, 3.897e-02, 1.264e-02, -1.606e-02)); + r += mul(s2_7, M4(2.536e-02, -3.417e-03, -1.311e-02, 3.645e-03, -1.238e-01, -7.702e-03, 1.116e-01, 1.310e-01, -2.044e-02, -4.430e-02, 1.056e-01, 5.321e-02, -2.567e-02, 1.916e-02, -8.229e-03, -4.110e-04)); + r += mul(s2_8, M4(-1.896e-03, 1.095e-02, -2.075e-02, 4.741e-02, 4.284e-02, 4.529e-02, 1.737e-02, 1.677e-01, 4.837e-02, -1.854e-02, -7.629e-02, -7.305e-02, 4.157e-02, -2.880e-02, -3.751e-02, 6.790e-02)); + r += mul(s3_0, M4(-2.048e-02, -5.507e-02, 3.214e-02, 2.952e-03, -5.350e-02, -2.185e-02, 5.288e-03, 1.490e-02, 1.499e-01, -5.311e-02, 1.924e-02, 1.557e-03, 1.908e-02, -1.304e-02, -1.599e-02, 1.458e-03)); + r += mul(s3_1, M4(1.545e-01, 1.503e-01, 1.255e-01, -2.177e-01, -4.032e-02, -3.143e-02, -3.432e-03, -1.945e-02, 2.517e-02, 4.947e-02, 3.295e-02, 6.607e-02, 1.789e-02, -1.440e-02, 5.069e-02, 3.252e-02)); + r += mul(s3_2, M4(-3.715e-02, 6.841e-02, -7.904e-02, -1.241e-02, -7.292e-03, -7.211e-03, -2.436e-02, -3.499e-02, 1.088e-02, -6.181e-03, 1.773e-02, 4.539e-02, 4.674e-02, 3.335e-02, 3.972e-02, 1.125e-01)); + r += mul(s3_3, M4(1.806e-01, -8.409e-02, 1.720e-02, -2.359e-02, 3.185e-03, 6.891e-02, -6.363e-02, 9.348e-02, -1.460e-01, -9.123e-03, 7.783e-04, 4.054e-02, 5.063e-02, 3.042e-03, -2.027e-02, 4.392e-03)); + r += mul(s3_4, M4(3.199e-02, 1.959e-01, 3.936e-02, -4.458e-02, -6.123e-02, 4.388e-02, -2.194e-01, 1.704e-01, -4.122e-01, -3.330e-01, 2.208e-02, -1.334e-02, -8.992e-02, 1.455e-01, 2.601e-03, -4.506e-02)); + r += mul(s3_5, M4(-3.373e-02, -2.912e-02, 9.928e-02, -1.240e-01, -2.624e-02, -5.850e-02, -1.761e-02, 1.840e-01, 3.596e-03, -1.025e-01, -7.362e-02, 2.339e-01, -9.203e-02, -4.071e-02, -6.140e-02, 3.832e-02)); + r += mul(s3_6, M4(9.961e-03, -6.187e-02, -1.933e-02, 8.530e-03, -2.826e-02, 2.153e-02, 9.054e-02, 3.754e-03, -7.562e-02, 4.114e-02, 3.296e-02, -1.196e-02, 8.955e-03, 3.717e-02, -2.063e-02, -5.733e-03)); + r += mul(s3_7, M4(-7.716e-02, 4.829e-02, -1.901e-02, -4.509e-02, -1.704e-01, 1.060e-01, 4.398e-01, -6.755e-02, 1.498e-01, -1.051e-01, 4.117e-02, 3.259e-02, -1.031e-04, 4.512e-02, -2.488e-02, 1.855e-02)); + r += mul(s3_8, M4(-2.843e-03, 2.916e-02, 6.268e-02, 6.978e-02, 2.159e-02, 4.315e-03, 5.635e-02, -2.766e-02, 1.239e-01, -4.260e-02, -6.276e-02, -3.447e-01, 1.432e-02, -2.211e-03, -3.597e-02, 8.178e-02)); + r += mul(s4_0, M4(3.200e-02, 1.179e-02, 1.528e-01, -1.062e-01, -3.512e-02, -3.233e-02, -5.515e-03, 8.355e-02, -2.727e-02, 1.016e-01, 4.321e-02, -7.570e-02, 1.983e-02, -3.089e-02, -3.552e-02, -1.610e-02)); + r += mul(s4_1, M4(8.069e-02, 9.256e-02, 1.331e-01, -2.860e-01, 5.044e-02, -7.307e-02, -4.262e-02, 7.402e-02, -2.703e-02, 4.398e-02, 2.634e-02, -9.939e-02, 4.440e-02, 2.116e-02, 3.077e-02, -7.031e-02)); + r += mul(s4_2, M4(1.557e-01, -8.318e-03, 4.308e-02, -1.724e-01, -1.435e-02, -2.526e-02, -2.729e-02, -5.076e-02, 1.765e-02, -2.867e-02, -6.233e-02, -5.362e-02, 1.678e-02, 1.497e-02, 1.017e-03, -5.645e-02)); + r += mul(s4_3, M4(-4.239e-02, -2.053e-01, -2.306e-01, 6.489e-02, 5.423e-02, -1.091e-02, -1.483e-02, 3.417e-02, -1.686e-02, 2.041e-02, -1.409e-01, 3.213e-02, -4.532e-02, 1.505e-02, 1.634e-02, 3.612e-02)); + r += mul(s4_4, M4(2.897e-01, -2.806e-01, -2.645e-01, 8.348e-02, 8.619e-02, 1.776e-01, 1.974e-01, 2.876e-02, -3.004e-02, 3.145e-01, 4.906e-02, 1.955e-01, -1.321e-01, -2.143e-02, -6.912e-02, 6.615e-02)); + r += mul(s4_5, M4(-7.748e-03, -2.532e-01, -3.545e-01, 3.033e-02, -1.635e-02, 1.869e-02, 7.738e-02, -2.264e-02, 9.954e-02, -5.047e-02, 3.741e-03, -1.794e-01, 1.474e-02, 1.596e-03, -1.864e-02, -6.731e-02)); + r += mul(s4_6, M4(-1.653e-03, -9.351e-03, 3.484e-02, -4.003e-02, -9.898e-02, -2.235e-02, -1.330e-02, 7.185e-02, -7.997e-03, 1.449e-02, 2.336e-02, 7.019e-02, 4.894e-03, -2.798e-02, -5.484e-02, 2.123e-02)); + r += mul(s4_7, M4(-9.205e-02, 5.108e-02, 1.527e-02, 1.392e-03, 4.100e-03, -5.082e-02, -3.365e-02, 2.265e-02, 2.186e-02, 1.873e-01, -7.729e-02, 6.681e-02, 2.867e-03, -2.340e-02, 8.103e-02, -5.483e-02)); + r += mul(s4_8, M4(-1.313e-02, 4.408e-03, -8.138e-03, 5.902e-02, -3.839e-02, -2.684e-02, -4.869e-02, 4.440e-02, -2.105e-02, -7.091e-02, 1.829e-02, -3.390e-02, 3.873e-02, 4.157e-02, 1.981e-02, 1.946e-02)); + r += mul(s5_0, M4(2.334e-02, 3.993e-02, 5.202e-03, -9.693e-03, -3.201e-02, -1.384e-02, -7.217e-02, 2.663e-02, -3.987e-02, 6.253e-03, 1.969e-02, -2.946e-02, -1.711e-02, 3.179e-02, 1.067e-02, 7.245e-04)); + r += mul(s5_1, M4(-8.893e-03, 1.023e-02, 2.342e-03, -3.203e-02, -5.524e-03, -8.312e-02, -1.058e-01, 4.977e-02, 2.486e-02, 3.833e-02, 6.300e-02, -7.201e-02, -1.001e-01, -5.215e-02, -2.100e-02, -6.421e-02)); + r += mul(s5_2, M4(5.109e-02, -4.186e-02, 2.191e-02, 2.581e-02, -1.433e-02, -7.580e-03, -3.783e-02, -8.056e-03, -3.377e-02, 8.525e-04, -1.128e-03, -6.093e-02, 2.147e-02, -6.303e-03, -6.241e-02, 6.017e-03)); + r += mul(s5_3, M4(-4.761e-02, -1.372e-02, -3.552e-02, 3.599e-04, 1.214e-01, -6.299e-03, 1.480e-02, -8.976e-03, 3.266e-02, 1.771e-02, 3.796e-02, -1.831e-02, -1.391e-02, -9.186e-02, -6.225e-03, 5.898e-02)); + r += mul(s5_4, M4(8.423e-02, 3.137e-02, 4.434e-02, 1.085e-01, 3.023e-01, -8.527e-02, 7.218e-02, -5.191e-02, -1.233e-02, 1.033e-01, 9.048e-03, -4.183e-02, -9.748e-03, -2.114e-02, -2.998e-02, -9.669e-02)); + r += mul(s5_5, M4(3.973e-02, -7.449e-03, -2.213e-02, -3.953e-02, -9.978e-02, 4.218e-02, 5.922e-02, -3.750e-02, 8.369e-02, -8.765e-03, -8.399e-03, -1.647e-01, 1.567e-02, 6.577e-02, 2.294e-02, 1.047e-02)); + r += mul(s5_6, M4(-2.095e-02, 4.695e-03, 4.147e-04, -1.174e-02, -5.668e-02, -3.437e-02, 2.057e-02, 3.149e-02, 4.650e-02, 3.221e-04, 1.821e-02, 5.422e-07, 2.965e-02, 1.407e-04, -1.562e-02, 2.078e-02)); + r += mul(s5_7, M4(-3.733e-02, -3.165e-04, -1.253e-02, -2.114e-03, -4.983e-03, 2.351e-02, -4.800e-03, -1.981e-02, -5.786e-02, 3.940e-02, 6.295e-03, -3.255e-02, 1.012e-01, 6.146e-02, 7.819e-02, -1.884e-02)); + r += mul(s5_8, M4(2.228e-02, -6.352e-03, 3.042e-03, 5.002e-03, -5.742e-02, -9.249e-02, -1.621e-02, -9.820e-03, -1.002e-02, 5.962e-03, 4.975e-02, 6.422e-02, -7.860e-03, -1.443e-02, -2.501e-02, -9.089e-02)); + r += mul(s6_0, M4(2.593e-02, -2.106e-02, -9.315e-02, -1.588e-02, -4.834e-04, 4.033e-02, 8.170e-02, -2.834e-02, 7.213e-02, -1.698e-02, 1.755e-02, 7.160e-03, 1.784e-03, 7.740e-03, -2.678e-02, -7.900e-03)); + r += mul(s6_1, M4(-8.337e-02, -1.989e-02, 3.130e-02, 5.416e-02, 1.566e-02, 1.793e-02, 2.356e-02, -4.471e-02, 1.057e-01, 1.272e-01, -6.899e-03, -3.338e-02, -8.641e-02, 5.219e-02, 3.352e-02, 3.797e-02)); + r += mul(s6_2, M4(-2.020e-02, 1.724e-02, -4.451e-02, -1.208e-01, 2.702e-02, 2.763e-02, 2.839e-02, -7.844e-02, -1.323e-01, -5.892e-02, 8.941e-02, -4.337e-02, 3.685e-02, 1.741e-02, 2.508e-02, 2.584e-02)); + r += mul(s6_3, M4(-7.072e-02, 1.945e-01, 2.835e-02, -3.890e-02, -3.469e-02, -1.846e-01, -3.491e-02, 8.323e-02, -2.530e-02, 3.240e-02, -5.642e-02, 3.022e-02, -4.714e-02, 1.422e-01, 2.458e-02, -3.335e-02)); + r += mul(s6_4, M4(-2.238e-01, -2.635e-01, -7.144e-02, 3.293e-01, -1.628e-02, -2.398e-01, 5.062e-02, -2.532e-02, -1.285e-01, -3.389e-01, 3.699e-02, -5.371e-03, -2.601e-01, 5.951e-03, 8.429e-02, -3.201e-02)); + r += mul(s6_5, M4(-5.851e-02, 9.951e-02, 1.034e-01, -1.406e-02, -4.372e-02, -1.853e-03, -1.871e-02, 8.760e-02, -7.957e-02, 7.662e-02, 2.337e-02, 3.486e-02, -5.366e-02, -4.282e-02, 5.851e-02, 6.527e-02)); + r += mul(s6_6, M4(-6.305e-02, 3.629e-03, -3.122e-03, 7.434e-02, 3.563e-02, 2.526e-02, -4.778e-02, -1.559e-02, 1.241e-02, 1.023e-02, 1.879e-02, -1.039e-02, 2.028e-02, 4.485e-02, 1.075e-01, 1.070e-03)); + r += mul(s6_7, M4(-3.823e-03, 2.645e-03, -1.776e-02, 2.066e-02, -2.833e-02, 3.550e-02, 1.445e-02, -4.985e-02, 1.181e-01, 3.897e-02, 2.937e-02, -9.098e-02, 7.650e-02, 3.899e-02, -4.831e-03, -3.704e-02)); + r += mul(s6_8, M4(-2.920e-02, -5.251e-02, 2.254e-02, -6.883e-02, 5.724e-03, 2.651e-02, -6.057e-03, 9.096e-03, 3.633e-02, -4.201e-02, -3.488e-02, -8.097e-02, 7.612e-02, 2.966e-02, 2.154e-02, -3.136e-02)); + r += mul(s7_0, M4(-3.748e-02, -3.980e-03, -3.379e-02, 4.711e-03, -6.762e-02, -5.473e-02, 8.404e-03, 2.376e-02, 1.582e-02, -3.229e-02, 1.971e-02, -1.053e-02, 1.884e-02, -3.556e-02, -1.367e-02, -1.498e-02)); + r += mul(s7_1, M4(5.522e-02, -4.141e-02, -6.959e-02, -1.048e-02, -2.853e-02, -4.921e-02, -8.172e-02, -5.187e-02, -2.900e-02, 2.090e-02, 2.240e-02, 7.973e-03, 7.124e-02, 4.352e-02, 1.140e-01, 5.031e-02)); + r += mul(s7_2, M4(6.579e-02, 2.978e-02, -5.727e-02, 3.299e-02, 3.543e-03, -2.614e-02, -7.571e-02, 5.378e-02, 2.043e-02, 3.240e-02, -8.687e-03, -1.598e-01, -7.516e-02, 5.146e-02, -1.406e-02, -8.078e-02)); + r += mul(s7_3, M4(-2.594e-02, 5.558e-02, -1.456e-02, -6.015e-02, 1.152e-02, -5.249e-03, -2.351e-02, 3.421e-02, -1.144e-02, 1.757e-02, -4.052e-02, 2.019e-02, -1.739e-01, 3.596e-02, -2.141e-02, 4.382e-02)); + r += mul(s7_4, M4(-1.003e-01, -5.029e-02, -6.679e-02, -4.947e-02, -3.767e-02, -1.240e-01, 5.460e-02, 1.720e-01, 7.184e-02, -2.759e-01, -8.478e-02, 9.016e-02, 1.112e-01, -7.125e-02, 2.813e-01, -5.169e-02)); + r += mul(s7_5, M4(-1.241e-01, -5.689e-03, 5.222e-02, -8.964e-02, -4.678e-03, 8.057e-02, -6.200e-03, 1.470e-01, -1.116e-01, -5.630e-02, -2.219e-02, -8.636e-02, 2.893e-03, -5.927e-03, 1.032e-01, -6.155e-02)); + r += mul(s7_6, M4(2.134e-02, -1.446e-02, -3.882e-03, 1.371e-02, 1.431e-01, 4.379e-02, -4.228e-02, -2.528e-02, -2.155e-04, 4.310e-02, 1.660e-02, 3.111e-02, 3.602e-02, 5.593e-02, 1.453e-01, -4.403e-02)); + r += mul(s7_7, M4(2.626e-02, 6.772e-03, 4.549e-02, -3.049e-02, 6.318e-02, 5.360e-02, -4.657e-02, -6.026e-02, 3.501e-02, 4.578e-02, 7.335e-02, -2.653e-03, -8.575e-02, -1.048e-01, -1.078e-01, 4.408e-02)); + r += mul(s7_8, M4(1.421e-02, 3.581e-02, -1.463e-02, -3.530e-02, -1.142e-02, 2.423e-02, 5.455e-02, -5.861e-03, -1.333e-02, 1.092e-02, -9.548e-03, -2.754e-02, 4.699e-02, 2.851e-02, 2.327e-02, 6.692e-02)); + r += V4(8.948e-04, 1.990e-03, 9.713e-04, 1.205e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.389e-03, 7.520e-03, 1.259e-02, -6.127e-03, -2.655e-02, 8.277e-02, 2.915e-02, 6.486e-03, 1.382e-02, 2.716e-02, 3.918e-02, -6.574e-04, 2.565e-03, -2.330e-02, 3.136e-02, 9.207e-03)); + r += mul(s0_1, M4(-4.758e-02, 2.047e-02, -1.812e-01, 8.253e-02, -6.141e-02, 5.434e-02, 1.445e-02, 2.395e-02, 8.711e-02, -8.225e-02, 9.429e-03, -8.062e-03, 3.237e-02, -6.446e-02, 3.525e-02, -4.911e-02)); + r += mul(s0_2, M4(-2.555e-02, -3.058e-02, 8.718e-02, 5.322e-03, 9.204e-02, -1.655e-02, 5.964e-02, 1.076e-04, -3.852e-04, 4.576e-02, -3.975e-02, 1.268e-02, -2.084e-02, 6.389e-02, -6.946e-02, -6.118e-03)); + r += mul(s0_3, M4(-2.187e-02, -1.655e-01, -6.551e-03, 2.522e-02, 2.641e-03, -2.192e-02, 1.919e-02, -3.912e-02, 4.619e-03, 5.472e-02, 9.158e-03, 2.166e-02, -5.676e-02, -9.017e-03, -5.924e-02, 2.695e-02)); + r += mul(s0_4, M4(4.154e-02, 2.009e-02, 9.076e-02, 4.883e-02, -4.819e-02, 6.009e-02, -6.625e-02, -1.990e-02, 4.523e-02, -8.145e-02, -6.002e-02, -4.633e-02, -5.778e-02, -1.835e-01, -5.321e-03, 2.915e-01)); + r += mul(s0_5, M4(-4.155e-03, 2.182e-02, -1.415e-01, -4.433e-02, -1.183e-01, -3.640e-02, -1.119e-03, -1.014e-01, -1.588e-02, 2.212e-02, 2.553e-02, 6.293e-03, 1.599e-04, -3.732e-03, 2.497e-02, 8.028e-02)); + r += mul(s0_6, M4(-6.323e-02, -1.055e-01, -1.316e-02, 2.636e-02, 9.896e-03, 1.086e-02, -8.842e-03, -3.453e-02, -8.107e-02, -1.236e-01, 4.905e-02, 3.644e-02, -2.301e-02, 2.756e-02, 2.838e-02, 1.820e-02)); + r += mul(s0_7, M4(1.173e-04, 1.045e-01, -4.923e-02, 1.502e-01, 1.812e-02, -3.892e-02, -6.967e-02, -9.212e-02, -4.975e-02, 9.300e-02, -3.681e-02, -3.613e-02, 2.705e-03, -4.590e-02, 5.355e-02, 4.852e-02)); + r += mul(s0_8, M4(-1.247e-02, 1.215e-02, 8.609e-02, 2.064e-02, 2.757e-02, 2.189e-02, 3.840e-03, 8.995e-02, -4.468e-02, 5.286e-02, 1.303e-02, -1.663e-02, -1.885e-02, -1.112e-01, 3.460e-02, -6.100e-02)); + r += mul(s1_0, M4(-6.209e-03, -3.165e-02, 7.816e-03, -3.634e-03, 8.947e-03, 6.921e-02, -3.440e-02, 1.416e-02, 6.665e-02, -7.318e-02, 1.376e-01, -2.433e-02, -6.348e-02, 1.719e-02, -9.155e-02, 4.758e-02)); + r += mul(s1_1, M4(-2.517e-02, -3.704e-02, -1.910e-01, 5.597e-02, 1.456e-02, 4.829e-02, -4.931e-02, -4.123e-02, 1.742e-01, 5.163e-02, 2.168e-02, 2.448e-02, 9.981e-02, -3.471e-02, 1.550e-01, 2.594e-02)); + r += mul(s1_2, M4(-2.252e-02, 6.209e-03, 8.043e-02, -6.079e-03, -3.058e-02, 2.503e-02, -5.771e-03, -5.417e-02, 2.677e-02, -2.000e-02, -8.863e-03, -2.313e-02, 3.216e-02, 6.615e-02, 7.237e-02, -1.405e-02)); + r += mul(s1_3, M4(-4.097e-02, -8.106e-02, -8.285e-02, -3.040e-02, -1.665e-02, -3.258e-02, 1.702e-02, -1.129e-01, -2.340e-02, -5.312e-02, -9.272e-02, 6.036e-02, -5.192e-02, 8.648e-02, 3.332e-02, 4.482e-02)); + r += mul(s1_4, M4(-2.622e-01, 1.383e-01, 1.763e-01, 3.596e-02, -1.208e-01, -2.145e-02, -4.241e-02, 1.184e-03, 7.848e-03, 5.837e-02, 2.803e-02, 1.070e-01, -2.225e-01, -1.420e-01, 1.186e-01, 3.776e-01)); + r += mul(s1_5, M4(1.724e-02, 2.162e-02, -9.960e-02, 2.283e-02, 9.574e-03, -1.244e-03, 8.956e-02, -6.425e-02, -1.665e-03, -6.402e-02, 1.868e-01, 1.540e-02, -5.122e-02, 5.534e-03, -5.407e-02, 3.521e-02)); + r += mul(s1_6, M4(-2.751e-02, -4.400e-02, -5.879e-02, 9.095e-02, -1.055e-02, -8.811e-02, -1.175e-02, -3.915e-02, -7.902e-02, 3.564e-02, 7.063e-02, 1.030e-01, -5.694e-02, 4.038e-02, 2.582e-02, 1.242e-02)); + r += mul(s1_7, M4(-4.710e-02, 5.456e-02, 1.631e-02, -4.291e-02, -4.871e-02, 1.195e-01, -6.078e-02, -1.251e-02, -6.566e-03, -5.065e-03, -6.147e-02, 1.044e-01, -2.083e-02, -1.261e-01, -1.621e-02, 1.057e-01)); + r += mul(s1_8, M4(-4.416e-02, -8.665e-03, 7.289e-02, 3.140e-02, -3.017e-02, -9.005e-02, -1.364e-02, 7.446e-02, -9.741e-02, -7.978e-02, 7.782e-02, -7.551e-02, 9.433e-03, 9.623e-03, -2.048e-02, -2.947e-02)); + r += mul(s2_0, M4(-2.772e-02, -2.164e-02, 7.659e-03, 3.794e-02, 3.537e-02, -2.254e-03, 4.604e-02, -2.204e-02, 1.463e-02, -9.815e-03, 4.111e-02, -2.033e-02, -2.097e-02, 5.408e-02, 6.952e-03, 7.498e-03)); + r += mul(s2_1, M4(-8.917e-02, 2.835e-02, -6.648e-02, 8.636e-02, 8.864e-02, -1.684e-02, 1.480e-02, -4.429e-02, 7.328e-02, -1.959e-02, -3.038e-02, -5.018e-02, -1.222e-01, -5.620e-02, -9.890e-02, -5.691e-03)); + r += mul(s2_2, M4(6.699e-03, -5.826e-02, 1.179e-01, 2.583e-02, 3.530e-02, 1.282e-02, -2.807e-02, 4.409e-02, -1.438e-02, -3.262e-02, -1.637e-02, 5.157e-02, 3.423e-02, -7.116e-02, -7.770e-03, -1.286e-01)); + r += mul(s2_3, M4(2.608e-03, 1.971e-03, 4.447e-02, 2.838e-02, 6.664e-02, 5.280e-02, -2.170e-02, 2.916e-02, 3.502e-02, 1.525e-02, 7.754e-03, -3.301e-02, -2.718e-02, 1.582e-02, -1.755e-02, -2.574e-02)); + r += mul(s2_4, M4(8.622e-02, -2.638e-02, 3.665e-02, 8.406e-02, 1.411e-01, 9.089e-02, 2.236e-02, 1.003e-01, -9.570e-02, -2.679e-02, -1.657e-02, 1.166e-02, -5.251e-02, 5.898e-02, -1.092e-01, -7.743e-02)); + r += mul(s2_5, M4(6.079e-03, -5.918e-02, -3.265e-02, -1.002e-01, 2.006e-02, 1.386e-02, 1.995e-03, 1.169e-01, -8.563e-02, 1.317e-02, 7.274e-03, -7.258e-02, 3.792e-02, -4.827e-02, -9.086e-02, -8.636e-03)); + r += mul(s2_6, M4(-3.646e-04, -2.339e-03, 2.319e-04, -1.914e-02, -3.163e-02, 9.229e-02, 5.341e-02, 6.546e-03, 2.506e-02, -1.426e-02, 9.487e-03, 1.894e-02, -4.312e-03, -1.777e-02, -3.552e-02, 1.463e-02)); + r += mul(s2_7, M4(2.148e-02, -9.688e-02, -9.999e-05, -2.830e-02, 4.155e-02, -6.702e-02, -6.066e-02, 1.354e-01, -3.162e-02, 3.835e-02, 1.017e-02, -3.827e-02, 2.321e-02, -1.329e-01, -7.002e-02, -3.650e-02)); + r += mul(s2_8, M4(7.319e-03, 5.381e-02, 2.118e-02, -9.138e-03, -2.484e-02, 5.284e-02, -1.983e-02, 1.940e-02, 3.993e-02, 8.693e-02, 1.614e-03, -2.157e-02, 2.590e-02, 3.161e-02, -1.333e-02, 1.129e-02)); + r += mul(s3_0, M4(-3.835e-02, -1.086e-01, -6.134e-02, 5.504e-02, 3.912e-02, 2.655e-02, 1.710e-02, 1.143e-02, 4.701e-02, 6.549e-02, 6.884e-02, -1.620e-02, -5.762e-02, 9.618e-02, 1.094e-02, 1.850e-02)); + r += mul(s3_1, M4(-3.649e-02, -6.180e-02, -6.058e-02, 3.240e-02, 7.819e-02, -3.521e-02, -4.401e-03, 3.315e-04, 2.334e-01, 2.764e-02, -2.176e-02, -5.948e-02, -5.164e-02, 4.412e-02, -4.379e-02, 2.517e-02)); + r += mul(s3_2, M4(7.464e-02, 6.763e-02, 4.442e-02, -1.586e-02, 4.191e-02, -1.634e-03, 2.254e-02, 1.911e-02, -6.519e-03, -1.946e-02, -1.119e-01, 2.122e-02, 1.112e-02, 4.285e-02, 3.919e-03, -8.520e-02)); + r += mul(s3_3, M4(4.564e-02, 3.957e-02, 4.070e-02, 4.562e-02, 9.257e-02, 1.840e-01, 1.044e-01, -7.292e-02, 1.704e-02, 1.096e-02, -1.576e-02, 1.956e-02, -7.280e-03, 2.829e-02, 4.816e-02, -5.652e-02)); + r += mul(s3_4, M4(-6.038e-02, -4.830e-02, -3.901e-02, -5.662e-02, 1.457e-01, -8.420e-02, -1.141e-01, 1.137e-02, -2.900e-01, -4.448e-02, 5.993e-02, 5.101e-02, -4.159e-02, 2.104e-02, -5.328e-02, -8.542e-02)); + r += mul(s3_5, M4(6.747e-02, -1.077e-01, -5.995e-02, 1.671e-02, -9.475e-03, 5.400e-02, 2.693e-02, 3.299e-02, 1.088e-01, -6.452e-02, -1.231e-01, -3.380e-02, 4.675e-02, 1.599e-01, -3.624e-02, -8.522e-02)); + r += mul(s3_6, M4(7.962e-03, -8.543e-03, -3.318e-02, -5.200e-03, 7.211e-03, -1.726e-01, 1.866e-02, 8.409e-02, -8.670e-03, -9.018e-03, 5.544e-03, 6.002e-03, -2.450e-02, -1.675e-02, -2.034e-03, -2.768e-02)); + r += mul(s3_7, M4(1.023e-02, -1.049e-02, -1.083e-02, -6.048e-03, -1.614e-02, 1.820e-01, -3.182e-02, 1.057e-01, -8.497e-02, -7.472e-02, -4.692e-02, -1.809e-01, 2.098e-03, -5.967e-02, -1.593e-02, -8.026e-02)); + r += mul(s3_8, M4(-6.207e-04, -7.989e-02, 2.571e-02, -8.136e-03, -7.600e-02, -8.224e-03, 2.579e-03, -9.677e-03, 1.446e-02, 5.286e-02, 5.722e-02, -2.114e-01, 2.445e-02, 1.152e-01, 2.590e-02, -3.222e-02)); + r += mul(s4_0, M4(1.107e-01, -1.932e-01, 1.181e-01, 1.244e-01, 1.052e-02, -2.045e-03, -5.702e-02, -5.084e-02, 1.022e-02, 1.248e-01, 3.951e-02, 2.430e-02, -1.406e-02, -7.153e-02, -1.988e-02, -3.406e-02)); + r += mul(s4_1, M4(2.168e-01, 3.301e-01, -3.037e-01, -1.521e-01, -1.148e-03, -1.835e-02, 1.568e-01, -4.724e-02, 9.126e-03, -4.870e-02, 4.026e-01, 3.410e-02, 2.907e-02, -5.150e-02, 1.901e-02, 1.412e-02)); + r += mul(s4_2, M4(-2.752e-02, 1.816e-01, 4.655e-02, -7.580e-03, -1.531e-02, -2.478e-02, 4.788e-02, 9.896e-03, 4.648e-02, -1.044e-03, -2.435e-01, 3.216e-02, -7.542e-03, 3.881e-02, -4.490e-02, -1.692e-02)); + r += mul(s4_3, M4(4.083e-03, -8.252e-02, 7.615e-02, -6.034e-03, -4.900e-02, -7.941e-04, -2.180e-02, 4.432e-02, -1.169e-01, -3.038e-01, -1.582e-01, 3.322e-02, 2.337e-02, 9.954e-02, 5.759e-02, -3.145e-02)); + r += mul(s4_4, M4(1.809e-01, 1.998e-01, -2.385e-01, -1.622e-01, -2.636e-01, -5.135e-02, -1.089e-01, -7.099e-02, -4.772e-02, -1.316e-02, 6.929e-02, 2.244e-01, -6.401e-02, -1.020e-01, -3.989e-02, -4.384e-02)); + r += mul(s4_5, M4(1.310e-01, -3.503e-02, -3.116e-02, 4.078e-02, -7.105e-02, 7.444e-02, 1.083e-01, -2.499e-02, -1.096e-01, 1.192e-01, 2.203e-01, 6.741e-02, 7.082e-02, -3.199e-02, -2.389e-02, -7.691e-03)); + r += mul(s4_6, M4(2.407e-02, -8.619e-02, 1.150e-02, 6.375e-02, -2.031e-02, -4.217e-03, -5.799e-02, -3.167e-02, -1.169e-02, 1.101e-01, 6.112e-02, 9.285e-02, -1.937e-02, -6.077e-02, -2.599e-02, -3.607e-02)); + r += mul(s4_7, M4(6.888e-02, 8.476e-02, -3.626e-02, -9.159e-02, 8.328e-02, 7.028e-02, 1.003e-01, -1.180e-01, 4.958e-02, 2.138e-01, -3.550e-02, -9.948e-02, 4.862e-02, 3.980e-02, 4.726e-02, 1.236e-01)); + r += mul(s4_8, M4(-6.420e-02, 1.342e-02, -1.840e-02, 4.381e-03, 5.058e-02, -4.503e-02, -1.165e-02, -5.512e-03, 1.917e-02, -8.328e-02, -4.917e-02, 8.961e-02, -1.187e-03, -6.838e-02, 8.467e-03, 1.941e-02)); + r += mul(s5_0, M4(-1.283e-02, -3.051e-02, -2.338e-02, 2.785e-02, -8.060e-02, -3.968e-02, -8.266e-03, -3.505e-02, -1.696e-02, 1.227e-03, -6.607e-02, 5.849e-02, 1.190e-02, -5.225e-02, 7.617e-02, -3.870e-02)); + r += mul(s5_1, M4(-1.012e-02, 2.933e-02, 6.008e-02, -5.941e-02, -1.743e-01, 1.073e-01, 8.960e-02, 3.006e-02, -7.397e-03, -8.892e-03, 1.642e-01, 8.724e-03, -8.483e-03, 3.919e-02, -3.170e-02, 3.047e-02)); + r += mul(s5_2, M4(-1.243e-02, 4.956e-02, -6.467e-02, -3.278e-03, -4.259e-02, -6.732e-02, 1.487e-01, 6.521e-02, 1.985e-03, 4.255e-02, -7.472e-02, 1.858e-02, 7.445e-02, -5.746e-03, 6.434e-02, 3.068e-02)); + r += mul(s5_3, M4(-3.875e-02, 3.297e-02, 4.139e-02, 3.460e-03, -8.803e-02, -4.263e-02, -3.565e-02, 1.007e-01, 3.837e-03, -2.905e-02, 7.172e-02, 1.306e-02, 2.493e-02, -5.609e-02, -8.131e-03, 6.291e-02)); + r += mul(s5_4, M4(1.073e-02, -3.688e-02, -2.959e-02, 9.288e-02, -2.241e-01, 3.321e-01, 1.306e-02, -1.070e-01, -1.482e-02, 2.946e-01, -8.599e-02, -4.609e-02, 2.239e-02, -2.737e-02, 2.610e-02, -4.906e-02)); + r += mul(s5_5, M4(2.187e-02, 1.376e-02, -3.030e-03, 7.956e-03, -5.125e-02, 1.164e-01, 1.200e-02, -9.520e-03, -5.519e-02, 4.714e-02, 2.154e-01, 1.160e-01, -1.639e-02, 1.855e-02, -1.008e-01, -9.249e-03)); + r += mul(s5_6, M4(-3.453e-03, 3.348e-02, 1.712e-03, -5.011e-04, -2.201e-02, 2.151e-02, 1.979e-03, -3.395e-03, -1.213e-02, 2.729e-02, -1.139e-02, 2.189e-02, -5.663e-03, -2.467e-02, 2.476e-02, -7.377e-03)); + r += mul(s5_7, M4(9.396e-03, -7.747e-02, -2.811e-03, -2.149e-02, -1.751e-02, -3.344e-02, 3.985e-02, -1.433e-01, 2.146e-02, 1.891e-01, 3.188e-02, -1.117e-03, -4.576e-02, 1.339e-02, 3.734e-02, 1.048e-01)); + r += mul(s5_8, M4(1.191e-02, -1.195e-03, -2.296e-02, -1.627e-02, 2.020e-02, -2.480e-02, 3.299e-03, -8.490e-03, 3.822e-03, -5.529e-02, -1.678e-02, 6.861e-02, -2.428e-02, -8.002e-03, 5.558e-02, 1.979e-02)); + r += mul(s6_0, M4(-5.811e-02, 4.039e-02, -6.513e-02, -6.604e-03, 1.075e-01, 2.321e-02, 5.469e-02, 8.040e-03, 2.506e-02, 1.163e-02, 3.306e-02, -1.456e-03, -1.529e-02, -5.932e-02, -5.833e-02, 2.738e-03)); + r += mul(s6_1, M4(-4.884e-02, -1.715e-01, -1.916e-01, 2.897e-02, 1.843e-01, -4.692e-02, -1.853e-02, -2.230e-02, 1.414e-01, 7.402e-02, -1.075e-01, -3.494e-02, -4.290e-02, 1.058e-01, -1.465e-01, -2.229e-02)); + r += mul(s6_2, M4(-1.153e-01, -7.394e-02, -6.556e-02, -6.556e-02, -8.043e-03, 1.193e-02, 5.607e-02, 1.844e-03, -1.837e-01, 9.215e-02, 9.504e-02, 9.817e-03, 2.725e-02, 1.198e-02, 5.897e-02, 3.098e-02)); + r += mul(s6_3, M4(1.761e-02, 4.537e-02, 1.067e-01, -8.746e-03, 1.787e-02, -9.920e-02, -3.239e-02, 5.108e-02, 4.780e-03, -9.644e-02, -1.311e-02, -3.046e-02, -2.323e-02, -3.263e-02, 5.415e-02, -4.851e-02)); + r += mul(s6_4, M4(-8.053e-02, 1.545e-02, 1.066e-01, -4.617e-02, -4.559e-02, 1.879e-01, 9.376e-02, -7.496e-02, 6.741e-02, 1.779e-01, 6.361e-02, -7.797e-02, -1.107e-01, 2.214e-01, -2.272e-02, 1.426e-01)); + r += mul(s6_5, M4(-6.805e-02, -3.927e-02, -6.014e-03, -7.738e-02, 9.368e-02, -1.609e-01, -1.065e-01, 1.225e-02, -6.853e-02, 1.932e-01, -1.668e-02, -9.059e-02, 6.658e-02, 3.841e-02, -5.200e-03, 5.170e-02)); + r += mul(s6_6, M4(-4.656e-02, 4.504e-02, -5.287e-02, -1.644e-02, -3.346e-02, 3.679e-02, 1.028e-02, -1.186e-02, 1.765e-02, -3.214e-02, -6.109e-03, 1.846e-02, 3.606e-02, -2.207e-02, -2.926e-02, 1.434e-02)); + r += mul(s6_7, M4(-8.875e-02, -1.138e-01, 3.614e-02, 5.972e-02, -4.303e-02, -6.824e-02, -6.290e-02, 3.037e-02, 8.320e-02, 5.187e-02, 2.754e-04, -2.584e-02, -1.107e-02, 9.315e-02, -6.093e-03, -6.393e-02)); + r += mul(s6_8, M4(1.157e-02, -4.621e-02, -6.926e-02, 8.876e-02, -9.391e-03, 3.727e-02, -4.458e-02, 2.121e-02, 2.106e-02, -3.948e-02, 8.082e-02, 2.461e-02, 6.079e-02, -4.652e-02, -2.836e-02, 2.681e-02)); + r += mul(s7_0, M4(5.114e-03, -2.569e-02, 3.260e-02, -2.151e-02, 7.769e-02, -4.802e-02, 5.664e-02, -1.079e-02, 1.984e-02, 1.183e-01, -1.139e-02, -1.175e-02, 3.094e-02, -1.100e-01, -4.499e-02, 1.390e-02)); + r += mul(s7_1, M4(-3.389e-02, 4.241e-02, 5.527e-03, -2.112e-02, 1.048e-01, 9.467e-02, 5.713e-02, 2.728e-02, 5.989e-03, -9.983e-02, 7.461e-03, 1.058e-02, -5.544e-02, -8.213e-02, -1.014e-01, -3.043e-02)); + r += mul(s7_2, M4(7.503e-02, -1.825e-02, 5.429e-02, -3.717e-02, 9.214e-02, -5.261e-02, 1.210e-02, -3.390e-03, -3.252e-02, -1.036e-02, 2.489e-02, 6.203e-02, -1.003e-01, -4.716e-02, 1.022e-01, -1.309e-02)); + r += mul(s7_3, M4(-7.687e-03, -3.867e-02, -1.057e-02, 6.437e-03, -2.496e-03, -3.949e-02, -1.606e-02, 8.343e-02, -5.804e-03, 4.233e-02, -4.025e-03, -6.402e-02, -1.085e-02, -5.191e-02, -2.972e-02, -5.956e-02)); + r += mul(s7_4, M4(3.281e-02, 1.172e-01, -3.093e-02, -1.255e-01, -4.895e-02, 2.468e-01, 5.429e-02, -1.111e-02, -1.089e-01, -1.431e-01, -1.288e-02, 4.952e-02, -7.425e-02, 2.862e-01, -1.732e-01, 1.773e-01)); + r += mul(s7_5, M4(-1.177e-02, -1.894e-02, 1.171e-01, 4.028e-02, -1.568e-02, -9.985e-02, 1.032e-02, 2.611e-02, 8.357e-02, -1.436e-01, -2.002e-02, 1.269e-01, -4.641e-02, 1.211e-01, 1.137e-01, -1.541e-02)); + r += mul(s7_6, M4(-4.138e-02, -2.104e-02, 3.944e-03, 2.774e-02, -4.084e-02, -2.422e-03, 6.718e-03, -5.330e-02, -4.369e-03, -1.373e-02, -5.662e-02, -2.131e-02, -3.799e-03, -1.844e-01, 9.049e-03, 1.410e-02)); + r += mul(s7_7, M4(-3.916e-02, 1.121e-02, 1.577e-02, 1.032e-01, -7.639e-02, -1.868e-02, -3.693e-03, 2.118e-02, 3.999e-02, 7.610e-02, -1.003e-02, 1.874e-02, 1.143e-02, 9.148e-02, 5.547e-02, -9.316e-02)); + r += mul(s7_8, M4(-8.276e-02, 4.820e-02, 1.563e-02, -8.235e-04, 1.295e-02, 5.992e-02, -1.244e-05, 4.372e-02, 2.411e-02, -1.194e-01, 5.303e-02, -8.634e-03, 1.812e-02, 4.170e-02, -3.776e-03, 3.476e-03)); + r += V4(6.314e-03, -4.083e-03, -5.575e-03, 2.198e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.317e-02, 2.140e-02, 5.416e-02, -3.220e-02, -5.220e-02, -5.906e-03, -1.792e-02, 3.296e-02, -4.396e-03, -9.631e-03, 6.281e-02, -4.701e-02, -6.206e-02, -7.251e-02, 4.026e-02, -1.600e-02)); + r += mul(s0_1, M4(2.089e-02, 6.746e-02, -5.823e-03, 5.393e-02, 5.533e-02, 1.709e-02, -3.691e-02, 8.463e-02, 4.272e-02, -3.489e-02, -4.922e-02, 7.840e-03, 5.114e-03, -1.452e-02, 3.875e-02, 1.932e-02)); + r += mul(s0_2, M4(5.195e-03, 1.274e-03, -5.657e-02, 1.168e-02, -3.941e-02, -1.394e-02, -7.360e-02, 3.376e-02, 2.325e-02, -7.857e-03, 2.286e-02, 8.450e-03, -4.495e-03, 2.191e-02, -1.329e-02, 1.460e-02)); + r += mul(s0_3, M4(1.573e-04, -4.436e-02, -2.109e-02, -1.335e-01, -1.842e-02, -4.377e-02, -6.477e-02, 2.106e-02, 4.104e-02, -5.521e-02, -9.645e-02, 5.413e-02, 3.379e-02, -1.389e-02, 2.989e-02, -2.019e-02)); + r += mul(s0_4, M4(1.023e-01, -1.913e-01, -2.146e-01, 6.818e-02, -1.643e-01, -1.071e-02, -1.921e-01, 1.987e-01, 1.573e-01, 8.096e-02, -2.236e-01, 9.724e-03, 1.502e-01, 9.444e-02, -7.957e-03, -2.216e-02)); + r += mul(s0_5, M4(1.138e-01, -4.090e-02, -2.311e-02, -7.045e-03, 8.998e-02, 9.199e-02, -1.691e-02, -9.715e-02, -9.920e-02, -4.076e-02, 2.764e-02, -2.509e-03, -9.408e-02, -3.508e-02, 3.689e-02, 3.136e-03)); + r += mul(s0_6, M4(-8.569e-02, -5.073e-02, 1.737e-02, -1.988e-02, 3.497e-02, -3.528e-02, 4.473e-02, 1.117e-02, -3.247e-02, 3.441e-02, 4.658e-02, 3.563e-03, -6.532e-02, 3.094e-02, -2.603e-02, -1.594e-02)); + r += mul(s0_7, M4(-3.660e-02, 2.247e-02, 9.556e-03, -3.751e-02, 3.481e-02, -7.663e-02, 5.949e-02, 3.491e-02, -6.302e-02, -1.131e-02, 4.001e-02, 4.220e-02, -7.085e-02, 8.020e-02, -1.445e-02, 2.678e-03)); + r += mul(s0_8, M4(-2.271e-02, 7.600e-03, -2.439e-02, 3.518e-02, 1.157e-01, 1.564e-02, -5.385e-02, 4.756e-02, -1.477e-02, 3.291e-02, 6.282e-02, -3.349e-02, -8.181e-02, 2.934e-03, 2.245e-02, 3.534e-03)); + r += mul(s1_0, M4(-4.513e-03, 5.126e-03, 2.772e-02, 3.921e-03, 5.944e-02, 3.714e-02, -2.007e-02, 1.012e-02, -5.767e-02, -1.997e-02, 3.428e-02, -5.718e-02, 1.012e-01, -7.172e-03, -3.383e-02, 1.009e-02)); + r += mul(s1_1, M4(6.715e-02, -5.956e-03, 4.303e-03, -3.528e-02, -1.212e-01, -1.498e-01, 3.718e-02, 3.506e-03, -1.220e-02, 1.185e-02, 4.649e-02, -4.015e-02, 8.337e-02, -8.463e-02, -4.352e-02, 8.703e-02)); + r += mul(s1_2, M4(-5.595e-02, -1.257e-02, -8.995e-03, 3.012e-03, 2.058e-02, 7.551e-02, -8.898e-02, 3.804e-02, -4.212e-04, 1.656e-02, -2.705e-02, -2.645e-03, 1.868e-03, -3.374e-02, -7.498e-03, -6.545e-03)); + r += mul(s1_3, M4(-7.578e-02, 5.953e-02, -4.641e-02, -1.321e-01, -1.298e-01, 8.306e-03, -3.359e-02, -7.056e-02, 1.799e-01, -1.448e-01, -1.019e-01, 9.464e-02, -3.545e-02, -9.008e-03, -5.548e-02, 2.500e-01)); + r += mul(s1_4, M4(-2.196e-01, 7.144e-02, -4.200e-02, -1.117e-01, -9.059e-03, -3.158e-01, -2.360e-01, 3.103e-01, 1.813e-02, 5.464e-01, -1.661e-01, -8.085e-02, 5.253e-01, 8.802e-03, -1.143e-01, -2.565e-02)); + r += mul(s1_5, M4(1.983e-02, -2.760e-02, 2.705e-03, 2.669e-03, 2.214e-01, 4.875e-02, 4.467e-02, -5.118e-02, -1.282e-01, -6.104e-02, -4.064e-02, 6.221e-03, -4.961e-02, 6.540e-02, 2.248e-02, -6.910e-02)); + r += mul(s1_6, M4(2.671e-02, 5.138e-02, -1.806e-02, 5.722e-02, 1.702e-02, -1.124e-02, 3.056e-02, 2.635e-02, 1.622e-02, 1.989e-02, -9.058e-02, -1.958e-01, 7.815e-02, -8.188e-02, 5.310e-03, 4.956e-02)); + r += mul(s1_7, M4(-4.606e-02, 2.003e-02, 9.441e-02, -1.335e-01, 5.994e-02, -4.677e-02, 2.642e-02, -1.509e-01, 1.292e-01, 1.304e-01, -2.094e-01, -1.505e-02, 3.770e-04, -1.385e-01, 4.338e-02, 8.370e-03)); + r += mul(s1_8, M4(3.139e-02, -1.360e-02, -2.001e-02, 1.669e-02, 6.420e-02, 3.712e-03, -3.536e-02, -1.866e-02, -8.856e-02, 7.694e-02, 3.597e-02, -3.272e-02, 5.091e-02, -7.110e-03, -2.042e-02, -1.545e-03)); + r += mul(s2_0, M4(-4.959e-02, -1.329e-02, 7.463e-02, -5.105e-02, -6.674e-02, -1.251e-02, -7.988e-02, -2.182e-02, -3.110e-02, 5.533e-03, -8.445e-03, 3.012e-02, -2.530e-03, -2.695e-02, -1.047e-02, 1.376e-03)); + r += mul(s2_1, M4(-2.554e-02, 3.709e-02, 2.154e-02, 2.171e-02, 1.773e-02, 3.657e-02, 4.825e-03, -2.766e-02, 7.339e-02, 4.920e-03, 1.234e-02, -2.246e-02, 2.705e-02, -3.987e-02, -6.584e-02, 5.236e-02)); + r += mul(s2_2, M4(-9.939e-02, -4.689e-03, 6.423e-03, -3.887e-02, 1.546e-02, -1.286e-03, -4.168e-03, -5.610e-03, 4.064e-02, -1.559e-02, 6.352e-02, 4.886e-03, 3.035e-02, 4.711e-02, -6.727e-02, -1.255e-02)); + r += mul(s2_3, M4(-6.140e-02, 2.251e-02, -7.983e-02, 4.682e-02, 7.263e-02, -9.888e-02, 1.013e-01, 2.559e-02, 4.873e-02, -3.090e-02, 1.298e-02, 3.469e-02, 1.480e-02, -1.960e-02, -2.372e-02, 7.838e-02)); + r += mul(s2_4, M4(1.145e-01, 3.273e-02, -9.605e-02, 9.318e-02, 7.247e-02, 2.200e-01, 6.268e-02, -5.290e-02, -6.122e-02, 1.093e-01, 8.228e-02, 1.287e-02, -1.459e-01, 8.318e-02, -4.853e-02, 5.085e-02)); + r += mul(s2_5, M4(-4.779e-02, 8.225e-02, 2.385e-02, -4.571e-02, -9.574e-02, -4.217e-02, -4.030e-02, 2.191e-02, -5.811e-02, 1.499e-01, 6.884e-03, -2.423e-02, 5.154e-02, 6.224e-02, -4.652e-03, -6.035e-02)); + r += mul(s2_6, M4(3.869e-03, -2.026e-02, 6.053e-03, -5.713e-02, -1.018e-01, 9.545e-02, -4.759e-02, -9.942e-02, -1.872e-02, 4.380e-02, -3.654e-02, 8.080e-02, -4.077e-02, -2.924e-03, 7.406e-03, 3.256e-02)); + r += mul(s2_7, M4(-8.854e-02, -4.094e-02, -2.080e-03, -1.691e-02, 9.260e-02, -6.120e-02, -1.187e-01, -1.536e-02, 8.251e-02, -8.033e-02, -8.385e-04, 2.646e-02, 1.253e-01, -1.084e-01, 2.630e-03, 4.265e-02)); + r += mul(s2_8, M4(-6.622e-03, 5.509e-02, -8.380e-03, -1.103e-02, -3.211e-02, -3.222e-02, 5.015e-02, -1.870e-02, -1.441e-02, 6.121e-02, -3.573e-02, 1.764e-02, -1.146e-02, 4.329e-02, -2.472e-02, -5.634e-02)); + r += mul(s3_0, M4(4.565e-02, -9.615e-02, 1.248e-01, 2.272e-02, 1.459e-02, -3.926e-04, 6.452e-02, -2.011e-02, -1.717e-02, 2.410e-02, -6.860e-02, 1.250e-02, -2.947e-02, 4.729e-02, -1.287e-02, -9.612e-03)); + r += mul(s3_1, M4(-9.178e-02, 1.314e-01, 1.739e-02, -1.607e-01, -1.972e-02, 1.872e-02, -1.539e-02, -6.145e-03, -1.497e-01, -6.470e-02, -8.867e-02, -6.312e-02, -7.243e-02, -1.322e-01, -8.911e-02, 7.905e-02)); + r += mul(s3_2, M4(2.068e-02, 5.043e-02, 2.388e-02, 3.200e-02, -1.141e-02, 9.786e-03, 4.909e-03, 1.095e-02, 7.105e-02, 8.328e-03, 7.145e-03, 3.723e-02, -7.693e-02, 4.742e-03, -1.476e-02, -4.959e-02)); + r += mul(s3_3, M4(-8.096e-02, 8.019e-02, -6.519e-02, -1.301e-02, -2.094e-01, 7.597e-03, -2.663e-02, -1.061e-01, 7.876e-02, -5.979e-02, -1.494e-02, -3.206e-03, -1.928e-02, -5.113e-03, -2.049e-03, 2.542e-02)); + r += mul(s3_4, M4(-1.123e-01, 4.295e-01, -7.619e-02, -1.627e-01, 2.100e-01, -1.334e-01, 3.049e-02, -5.113e-02, 1.782e-01, 9.771e-02, -7.037e-02, 1.228e-02, -2.200e-01, -4.146e-03, -1.990e-02, 2.668e-02)); + r += mul(s3_5, M4(-2.272e-02, -4.168e-02, 5.787e-03, 8.180e-02, -1.149e-01, 8.850e-03, 2.428e-02, -1.037e-04, 1.139e-03, -3.601e-02, -1.110e-01, 5.826e-02, -1.135e-01, -1.448e-02, -4.284e-02, -8.897e-02)); + r += mul(s3_6, M4(2.568e-02, -3.332e-02, -1.429e-02, -7.309e-03, 8.377e-02, -8.014e-02, 3.598e-02, 1.488e-02, -5.766e-02, 6.066e-02, 2.302e-02, -7.826e-02, -9.080e-02, -6.473e-03, 2.925e-02, -1.859e-02)); + r += mul(s3_7, M4(1.522e-02, 9.791e-02, -5.046e-02, -4.644e-02, -1.261e-01, 6.878e-02, 5.854e-03, -7.513e-02, -1.506e-01, -1.318e-01, 1.578e-01, 2.697e-02, -4.071e-03, -2.210e-02, 3.974e-02, 3.940e-02)); + r += mul(s3_8, M4(-1.474e-02, -1.738e-02, -8.259e-03, -5.853e-03, -2.127e-02, -3.961e-02, 3.582e-02, 3.172e-03, -1.144e-01, -2.996e-02, 3.036e-03, -3.267e-03, -2.002e-02, -2.635e-02, -1.136e-02, -2.688e-02)); + r += mul(s4_0, M4(4.594e-02, -2.099e-02, 7.548e-02, -2.075e-01, 5.565e-02, -1.654e-02, 2.922e-02, 1.133e-02, -9.739e-02, -6.425e-05, -7.070e-02, 7.836e-03, 6.286e-02, -2.553e-02, 5.762e-02, -2.851e-02)); + r += mul(s4_1, M4(-1.678e-01, 1.242e-01, 1.588e-01, -1.239e-01, 2.922e-02, 7.846e-02, 1.019e-02, 9.464e-02, -1.160e-03, -5.613e-02, -1.619e-01, 6.253e-02, -3.288e-02, 3.520e-02, -5.805e-02, 2.132e-02)); + r += mul(s4_2, M4(-1.046e-01, -6.424e-02, 9.591e-02, 4.972e-02, 6.181e-02, 2.169e-02, -1.502e-02, -1.632e-03, 8.814e-03, -1.752e-02, -2.157e-02, -2.900e-03, 1.169e-02, 5.567e-02, 3.245e-03, 1.681e-02)); + r += mul(s4_3, M4(-4.928e-02, 8.546e-02, -1.618e-01, -2.128e-01, -1.497e-02, 4.482e-02, 4.796e-02, 2.129e-02, 9.864e-02, -1.528e-01, -3.969e-02, 7.450e-02, -1.001e-01, 1.126e-01, -4.606e-02, -2.537e-02)); + r += mul(s4_4, M4(-2.053e-01, -5.939e-02, -1.161e-01, 1.419e-01, -2.759e-03, 6.388e-02, -1.553e-02, -1.023e-02, 1.505e-01, 8.522e-02, 3.198e-02, 5.720e-02, 2.473e-01, -3.964e-02, 5.463e-02, -4.795e-02)); + r += mul(s4_5, M4(-5.172e-02, -1.282e-01, -2.116e-01, 1.091e-01, -2.214e-02, 3.969e-02, 4.337e-02, -1.764e-02, 3.215e-02, 3.299e-02, 2.793e-02, 7.400e-03, 6.523e-02, -5.966e-02, -9.938e-03, 4.816e-02)); + r += mul(s4_6, M4(-8.111e-02, -3.650e-02, 2.723e-02, -1.057e-01, 3.222e-02, 4.503e-02, -1.267e-02, 4.413e-02, -1.724e-01, 1.213e-01, 2.988e-02, -1.441e-02, 5.656e-02, -5.868e-02, 2.248e-02, -4.360e-03)); + r += mul(s4_7, M4(-4.270e-02, 1.211e-01, 5.248e-03, 3.833e-02, -5.154e-02, -7.461e-02, -2.644e-02, 1.660e-02, -6.261e-02, 1.525e-02, 1.216e-01, -1.214e-01, 3.315e-02, 1.384e-01, -4.694e-02, -3.589e-02)); + r += mul(s4_8, M4(-1.661e-02, -7.148e-02, 1.711e-02, 1.639e-03, -1.166e-02, -2.743e-02, 7.393e-03, -1.306e-02, -6.771e-02, -5.656e-02, -1.044e-02, 3.744e-03, -3.449e-03, -2.178e-03, 1.882e-02, 2.167e-02)); + r += mul(s5_0, M4(-9.376e-04, -2.194e-02, -6.724e-02, -2.053e-02, -2.572e-02, 1.345e-02, 6.833e-03, 6.722e-02, 5.867e-02, 1.715e-02, -5.763e-02, 1.907e-02, -7.177e-02, -9.071e-03, 8.056e-02, -4.422e-02)); + r += mul(s5_1, M4(-3.062e-03, -6.996e-02, -1.865e-02, -5.978e-03, 4.911e-02, 9.800e-02, 1.017e-01, 8.813e-02, -5.140e-02, 1.862e-03, 3.940e-03, -2.055e-02, -1.394e-02, 7.200e-02, 3.466e-02, -4.142e-02)); + r += mul(s5_2, M4(-5.375e-02, 5.841e-02, -2.139e-02, 1.491e-03, 2.405e-02, -6.375e-02, 2.696e-02, -6.658e-04, -1.718e-02, 1.131e-02, -2.777e-02, 1.809e-02, -3.598e-02, -8.768e-02, 4.309e-02, 2.209e-02)); + r += mul(s5_3, M4(-9.237e-03, -1.952e-02, -4.659e-02, 3.577e-02, 7.323e-02, 3.267e-02, 3.138e-02, -6.273e-02, 5.708e-02, -4.532e-02, -7.609e-03, 7.341e-02, -1.255e-02, -4.444e-02, -2.852e-02, 2.135e-02)); + r += mul(s5_4, M4(6.220e-02, -5.132e-02, 5.121e-02, 3.518e-02, -2.193e-01, -1.582e-01, -8.165e-02, -1.271e-02, 6.141e-02, 7.492e-02, 5.174e-02, -6.458e-02, 1.016e-01, -2.288e-02, -2.656e-02, 7.862e-02)); + r += mul(s5_5, M4(6.700e-02, 4.641e-03, -1.019e-02, 2.040e-03, -1.044e-01, 8.057e-02, 6.980e-02, -8.519e-02, 7.276e-02, -7.165e-04, -2.067e-02, 7.258e-02, 9.370e-03, 6.664e-02, -3.490e-04, -4.492e-02)); + r += mul(s5_6, M4(2.889e-02, -2.632e-02, 3.510e-03, 3.752e-03, -3.652e-02, 6.490e-02, 1.142e-03, 7.348e-02, -4.659e-02, -9.205e-02, 7.735e-03, -7.285e-03, -1.560e-02, -2.538e-02, 2.098e-02, -4.471e-03)); + r += mul(s5_7, M4(2.529e-02, 3.207e-02, 1.551e-02, 2.410e-02, -1.143e-01, -1.243e-01, 4.026e-02, -2.224e-02, 2.706e-02, -1.125e-01, -7.444e-04, 1.665e-03, -1.117e-02, 3.164e-02, 5.121e-02, -6.398e-03)); + r += mul(s5_8, M4(-1.810e-02, 3.387e-02, -1.359e-02, -2.422e-02, -4.049e-02, 8.675e-03, -1.482e-03, 1.121e-02, -5.411e-02, -3.881e-02, 1.730e-02, -2.959e-02, 7.139e-02, 1.425e-02, -9.283e-04, 2.819e-02)); + r += mul(s6_0, M4(1.039e-01, -1.434e-02, 1.175e-02, 7.725e-03, -4.108e-02, 1.405e-02, 2.887e-02, -6.200e-02, -3.134e-02, -3.633e-02, 2.553e-02, -2.109e-02, -1.613e-02, 3.979e-02, -8.357e-02, -1.594e-03)); + r += mul(s6_1, M4(-1.276e-01, -1.901e-01, 3.361e-02, -5.895e-02, -8.170e-02, -5.827e-02, 1.289e-01, -5.021e-02, -1.530e-01, -1.626e-01, -5.100e-02, -1.558e-01, 4.318e-02, -5.978e-02, 2.184e-02, -6.124e-02)); + r += mul(s6_2, M4(1.289e-02, 1.777e-01, -5.825e-02, -9.785e-02, 4.007e-02, 3.836e-02, 3.456e-02, 7.433e-03, 2.243e-02, 3.302e-02, 4.167e-02, 9.464e-02, 4.477e-02, 2.537e-02, -7.013e-03, 3.242e-02)); + r += mul(s6_3, M4(-1.525e-01, 1.014e-01, -1.807e-03, -1.846e-01, 1.672e-01, -8.167e-02, -1.717e-01, 1.343e-01, 6.778e-03, -5.364e-02, -2.812e-02, -1.182e-01, 3.515e-02, 3.398e-02, -1.040e-02, -1.062e-01)); + r += mul(s6_4, M4(1.956e-01, -2.669e-01, -1.460e-01, 4.621e-01, -9.447e-02, 2.349e-01, -1.470e-01, -4.941e-02, 7.513e-02, 4.184e-02, -1.044e-01, -2.203e-01, 7.399e-02, -1.302e-01, -3.328e-02, -3.597e-03)); + r += mul(s6_5, M4(1.476e-01, 4.634e-02, 1.029e-01, -6.998e-02, -3.599e-03, -8.547e-03, -4.742e-02, -8.798e-03, -1.155e-01, -1.334e-02, 1.572e-02, -1.198e-01, -3.377e-02, -1.021e-03, -5.352e-02, 2.362e-02)); + r += mul(s6_6, M4(-2.038e-02, -1.433e-02, -5.558e-02, -3.899e-02, 5.073e-02, -4.008e-02, -2.158e-03, -4.432e-02, 4.568e-03, 6.172e-02, 1.382e-02, -1.944e-02, 4.529e-02, 1.661e-02, 1.656e-03, 2.364e-02)); + r += mul(s6_7, M4(-5.675e-02, -1.128e-01, 5.922e-03, -2.657e-02, 4.674e-02, 5.763e-02, -5.162e-03, -2.677e-02, -1.060e-01, -1.866e-02, 6.187e-02, -1.249e-01, 2.508e-02, -6.913e-02, 5.112e-02, 2.815e-02)); + r += mul(s6_8, M4(7.287e-03, 1.523e-02, -2.667e-02, -1.769e-02, 1.994e-02, -3.947e-02, 3.164e-02, -1.631e-02, 6.569e-02, 2.412e-02, -4.184e-02, 4.302e-02, 1.836e-02, -6.858e-02, 3.069e-02, -2.480e-02)); + r += mul(s7_0, M4(-6.234e-02, -3.536e-02, 2.735e-02, 1.608e-02, -1.563e-02, -7.788e-02, 9.835e-02, -1.025e-04, -4.066e-02, 7.144e-02, 4.273e-02, 1.307e-02, 2.383e-02, 7.450e-02, -9.114e-02, 2.195e-02)); + r += mul(s7_1, M4(1.126e-02, 9.804e-02, 9.452e-02, 1.843e-02, 2.402e-02, 1.024e-01, 1.977e-01, -1.763e-02, -6.715e-02, -3.739e-02, 1.339e-01, -1.311e-01, -9.503e-02, 1.242e-02, 4.849e-02, -3.676e-02)); + r += mul(s7_2, M4(5.123e-02, -1.605e-02, 1.521e-03, 4.712e-02, 2.437e-02, -4.237e-02, -2.470e-02, 7.330e-03, 9.882e-02, -1.058e-02, -1.584e-02, 5.144e-02, 2.768e-02, 9.235e-02, -1.769e-02, -3.762e-02)); + r += mul(s7_3, M4(-1.533e-02, -7.504e-03, -4.468e-02, -8.556e-02, 3.764e-02, -1.577e-01, -5.104e-02, 3.940e-02, -3.595e-02, -2.249e-02, -4.397e-02, -1.208e-01, 4.481e-02, 1.282e-01, 5.298e-03, -1.793e-01)); + r += mul(s7_4, M4(-1.851e-02, 7.804e-02, -3.354e-02, 9.737e-02, -6.438e-02, 2.125e-01, -1.693e-01, 2.251e-02, 6.170e-02, -2.258e-02, -7.436e-02, -2.415e-01, -2.285e-01, -2.941e-01, 9.524e-02, 1.657e-02)); + r += mul(s7_5, M4(-1.524e-01, -4.079e-02, -2.911e-02, -7.842e-03, -1.804e-03, -3.636e-02, 3.002e-02, -4.805e-02, 9.590e-02, -1.424e-01, -2.798e-02, 1.406e-01, -3.761e-02, 1.214e-01, -8.535e-03, -1.786e-02)); + r += mul(s7_6, M4(2.536e-03, -1.993e-02, -1.801e-02, 4.518e-02, -2.252e-02, -6.023e-02, 2.992e-02, -1.164e-02, 2.239e-02, 3.118e-02, 3.715e-03, -1.298e-02, -4.918e-02, 6.960e-02, 5.030e-02, -3.900e-02)); + r += mul(s7_7, M4(2.215e-02, 6.815e-03, -1.234e-02, 6.264e-02, 5.752e-02, -5.832e-03, 7.431e-03, -4.491e-02, -8.863e-02, 8.035e-02, 4.167e-03, -1.112e-01, 1.098e-01, -4.264e-03, -1.362e-01, 1.234e-01)); + r += mul(s7_8, M4(7.512e-03, -3.196e-03, 3.111e-02, -2.059e-02, -2.756e-02, 3.436e-02, 3.508e-02, -3.981e-03, 3.004e-02, -5.253e-02, -4.691e-02, 4.399e-02, -3.001e-02, 4.106e-02, 2.402e-03, -6.141e-02)); + r += V4(1.032e-02, 5.736e-03, -4.566e-03, 3.095e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.957e-02, -1.929e-02, -7.233e-02, 6.399e-02, 3.891e-02, -2.368e-02, -5.297e-02, 3.034e-02, 2.073e-02, -5.042e-02, 3.753e-02, -4.414e-03, 2.939e-02, -4.863e-02, -8.268e-03, 4.045e-02)); + r += mul(s0_1, M4(1.500e-02, 7.816e-03, -3.719e-02, -1.413e-01, -2.540e-02, -5.338e-02, 3.943e-02, 6.922e-02, -5.509e-03, -6.114e-02, 3.186e-02, -3.823e-03, 8.412e-02, -9.854e-02, 3.890e-02, 8.705e-04)); + r += mul(s0_2, M4(-3.633e-02, -1.220e-02, -1.844e-02, -7.541e-02, -1.891e-02, 3.772e-02, -4.209e-02, 6.710e-02, 4.396e-02, 1.547e-02, 4.528e-02, -2.666e-02, 5.815e-02, -3.918e-02, 1.937e-02, -4.644e-02)); + r += mul(s0_3, M4(1.167e-01, -3.639e-02, -4.217e-03, -9.302e-02, 6.612e-03, 5.179e-04, 5.621e-02, 2.659e-02, 2.275e-02, 9.503e-02, -2.446e-02, 4.318e-02, 3.466e-02, 4.579e-02, -1.291e-02, -2.690e-02)); + r += mul(s0_4, M4(-3.764e-03, 1.447e-01, 4.298e-02, 2.666e-01, -7.257e-02, 2.289e-01, -7.192e-02, 1.720e-01, 2.101e-02, 9.109e-02, -2.118e-02, 1.117e-01, -2.413e-02, -3.388e-02, -1.185e-01, 4.957e-02)); + r += mul(s0_5, M4(1.710e-01, -1.024e-02, 6.099e-02, -5.108e-03, -4.723e-02, -8.317e-02, 2.810e-02, -6.812e-02, 1.933e-02, -4.623e-02, -5.507e-02, -5.481e-02, 2.608e-02, 1.180e-01, -4.448e-02, 7.944e-02)); + r += mul(s0_6, M4(8.110e-04, 2.934e-02, 9.242e-02, -5.007e-02, -2.002e-02, 1.258e-02, 4.889e-02, -4.423e-03, -2.356e-02, -4.107e-02, 4.342e-02, -6.808e-02, 5.131e-02, 3.878e-02, 4.502e-02, -3.955e-03)); + r += mul(s0_7, M4(-1.708e-02, 6.205e-02, -6.230e-02, -8.522e-02, -9.137e-02, 9.945e-02, 1.625e-01, -9.107e-03, 9.373e-02, 2.157e-01, 1.192e-02, -1.947e-02, 5.642e-02, -1.830e-02, -4.282e-02, -3.323e-02)); + r += mul(s0_8, M4(-4.627e-02, -3.488e-02, -2.095e-02, 4.034e-02, -7.047e-02, 4.260e-02, -1.066e-02, 5.798e-02, -1.646e-02, 1.113e-01, 2.390e-02, -3.724e-02, 3.576e-02, -5.889e-02, 4.255e-03, -1.906e-02)); + r += mul(s1_0, M4(-5.832e-03, 3.125e-02, -1.642e-02, -3.459e-03, -1.119e-02, -2.803e-02, 1.157e-03, -3.958e-02, 4.631e-02, 1.482e-02, 7.727e-02, -3.134e-02, -4.881e-02, -2.130e-02, 6.989e-03, -7.346e-02)); + r += mul(s1_1, M4(-7.396e-02, 9.412e-02, 7.725e-03, 2.162e-02, 5.224e-02, -1.088e-02, -2.065e-02, -9.780e-02, -1.101e-02, 3.594e-02, 2.216e-02, -1.258e-01, -3.889e-02, 3.863e-02, -4.760e-02, 6.302e-02)); + r += mul(s1_2, M4(-5.090e-02, -2.363e-02, -3.503e-02, -1.487e-02, 5.198e-02, 4.417e-02, 3.775e-02, -5.158e-02, 3.694e-02, -1.838e-02, 4.035e-02, -2.728e-02, 2.090e-02, -3.677e-02, 1.538e-02, 5.569e-03)); + r += mul(s1_3, M4(1.207e-01, -3.502e-02, 2.359e-02, 4.536e-02, -1.038e-02, -3.105e-02, 9.460e-02, 3.270e-02, -1.572e-02, 7.465e-02, -4.742e-02, 5.380e-02, -1.301e-01, -8.177e-02, 2.412e-02, -4.786e-02)); + r += mul(s1_4, M4(8.592e-03, -1.604e-01, 1.551e-01, 1.448e-01, -2.117e-01, 1.461e-01, -8.745e-03, 2.140e-01, -7.602e-02, 9.228e-02, -5.262e-03, 1.787e-01, -2.019e-01, -1.864e-01, 9.426e-03, 1.811e-01)); + r += mul(s1_5, M4(6.368e-02, 3.336e-02, 3.970e-02, -1.583e-02, -2.103e-02, 6.325e-02, -3.413e-02, -7.886e-02, -7.021e-02, -1.133e-01, -1.177e-02, -2.928e-02, 2.705e-02, -1.405e-01, -5.630e-02, -3.552e-03)); + r += mul(s1_6, M4(-4.145e-02, -2.657e-02, -5.136e-02, 3.983e-02, -3.112e-02, 3.816e-02, 3.503e-02, -1.835e-02, -4.899e-02, -7.746e-02, -2.181e-02, 3.558e-02, -1.264e-02, 4.415e-02, 1.453e-02, -6.731e-03)); + r += mul(s1_7, M4(5.223e-03, -7.955e-04, 9.171e-02, -1.591e-01, -1.133e-01, 1.686e-01, -3.125e-02, -5.211e-02, 1.185e-01, -1.874e-01, -3.469e-01, 6.632e-02, 2.601e-02, 1.087e-01, 1.299e-01, -4.201e-02)); + r += mul(s1_8, M4(-2.578e-02, -7.088e-02, 4.434e-02, -5.593e-02, -1.833e-01, -7.441e-02, -1.421e-02, 3.444e-02, 1.911e-02, 1.664e-03, 7.838e-03, -1.117e-01, 5.608e-02, 5.974e-02, -3.714e-03, 2.898e-04)); + r += mul(s2_0, M4(2.406e-02, -5.912e-02, 1.298e-02, -1.281e-02, 1.433e-02, 9.674e-02, -3.115e-03, -7.976e-02, -5.297e-03, -1.302e-02, 4.163e-02, 7.091e-03, -8.644e-03, -4.116e-02, -2.294e-02, -2.830e-02)); + r += mul(s2_1, M4(-1.385e-03, -5.038e-02, 3.441e-02, -1.230e-01, -4.663e-02, 4.946e-02, 2.977e-02, 3.772e-02, -2.058e-02, 1.794e-02, 3.946e-02, -3.506e-02, -7.325e-02, 1.447e-02, -9.563e-02, -7.138e-03)); + r += mul(s2_2, M4(4.960e-02, 3.821e-02, -1.735e-02, 4.977e-02, 2.546e-02, -4.679e-02, -1.158e-02, 5.616e-03, -2.759e-03, 7.985e-02, 6.386e-03, 6.271e-02, -1.272e-02, 2.503e-03, 8.398e-03, 1.271e-01)); + r += mul(s2_3, M4(-1.038e-02, 2.823e-02, 4.136e-02, 7.295e-02, 1.117e-02, -6.778e-03, -2.525e-03, 1.712e-02, 2.669e-02, 7.388e-02, -4.112e-04, 4.131e-02, 5.201e-02, 4.515e-04, 1.094e-01, 2.320e-03)); + r += mul(s2_4, M4(-1.284e-01, 1.210e-01, -9.807e-02, 1.665e-01, 6.781e-02, 1.028e-01, 5.251e-02, -6.496e-02, -2.019e-03, -8.688e-02, -1.587e-01, -6.006e-02, -5.870e-02, -2.039e-02, 9.403e-02, 5.624e-02)); + r += mul(s2_5, M4(-6.943e-02, -3.505e-02, -1.387e-02, -3.432e-02, 1.336e-02, 1.029e-04, -2.559e-02, -5.021e-02, -1.027e-03, -9.446e-02, 7.434e-02, -6.495e-02, 5.121e-02, -1.844e-01, -1.339e-02, -2.068e-02)); + r += mul(s2_6, M4(-3.294e-02, 6.696e-03, 1.385e-02, -1.026e-02, 4.626e-02, -1.981e-02, -1.253e-01, 5.887e-02, 1.193e-02, 6.605e-02, -4.059e-02, 5.007e-02, 7.069e-03, -3.892e-02, 1.316e-02, -1.329e-03)); + r += mul(s2_7, M4(-5.214e-03, -2.388e-02, 8.716e-02, 7.790e-03, 9.102e-02, -1.228e-01, -2.001e-01, 1.346e-02, -9.074e-03, -2.822e-02, -7.053e-02, -2.615e-02, -1.325e-02, 1.692e-02, 1.367e-01, -6.891e-03)); + r += mul(s2_8, M4(9.435e-03, 8.678e-03, 1.546e-02, 1.673e-02, -4.425e-02, 3.501e-02, -2.438e-03, -2.117e-02, 9.269e-02, -1.192e-02, -9.051e-03, 1.089e-03, 2.173e-02, 6.414e-02, 8.247e-03, 1.388e-02)); + r += mul(s3_0, M4(1.310e-01, -1.323e-01, -3.285e-02, -6.364e-02, 1.523e-02, -5.964e-02, 2.667e-02, -2.718e-02, -5.582e-02, 4.316e-02, 2.139e-02, -3.882e-03, 2.538e-02, -2.755e-02, 8.806e-04, -2.173e-02)); + r += mul(s3_1, M4(8.679e-02, 5.650e-02, 1.789e-01, -1.361e-02, -6.412e-02, 2.237e-03, 3.455e-02, -1.238e-01, -1.592e-01, 6.738e-02, -9.433e-02, 5.789e-02, -3.984e-02, -4.034e-02, -1.080e-01, 1.221e-02)); + r += mul(s3_2, M4(-8.604e-02, 2.297e-02, -3.277e-02, -1.643e-02, 1.366e-02, 1.272e-02, -4.874e-03, -9.394e-03, -9.996e-03, -3.402e-02, 1.908e-02, 4.162e-02, -2.027e-03, 3.923e-02, 1.338e-02, -2.775e-02)); + r += mul(s3_3, M4(8.514e-02, 4.941e-02, 1.136e-01, 2.181e-02, 7.492e-02, 8.835e-02, 1.281e-01, 5.094e-02, -3.794e-02, 7.078e-02, -1.033e-01, 2.082e-02, 6.665e-02, -4.889e-02, 3.621e-02, -3.375e-03)); + r += mul(s3_4, M4(-1.772e-01, 1.164e-01, -2.896e-01, 7.526e-02, 6.486e-02, 3.452e-01, -9.239e-02, 1.387e-02, -1.371e-01, -4.009e-02, -1.604e-01, 8.436e-02, 3.300e-03, 5.823e-02, 3.795e-02, 6.376e-02)); + r += mul(s3_5, M4(-1.047e-01, -2.641e-02, -3.330e-02, 3.592e-02, -3.797e-03, -7.614e-02, -6.192e-02, -2.366e-02, 1.103e-01, 4.321e-02, 2.096e-02, 4.921e-02, 8.541e-02, 1.023e-02, 1.980e-02, -2.827e-02)); + r += mul(s3_6, M4(-5.483e-02, 6.945e-03, 6.125e-02, -1.114e-02, -3.109e-02, -7.767e-02, -1.777e-01, -1.954e-02, -4.336e-03, 3.103e-02, -9.225e-02, 2.032e-02, 6.714e-03, -9.436e-03, 4.314e-02, -1.701e-02)); + r += mul(s3_7, M4(-8.160e-02, -6.575e-02, 1.249e-01, 1.150e-02, 3.725e-02, 5.154e-02, -2.207e-01, 1.980e-02, 8.971e-02, -2.289e-02, 6.716e-02, -1.377e-01, 7.987e-03, 6.571e-02, 6.751e-02, -2.847e-02)); + r += mul(s3_8, M4(-2.108e-02, -6.825e-02, -5.537e-03, -1.622e-03, -7.742e-02, 7.133e-02, -9.959e-03, 1.210e-02, 9.613e-02, 2.147e-01, 4.867e-02, -6.740e-03, 5.943e-02, 1.073e-01, 1.379e-02, 2.028e-02)); + r += mul(s4_0, M4(8.846e-03, -1.414e-01, -6.767e-02, -1.371e-01, -3.770e-02, 5.099e-03, 9.724e-03, 6.872e-02, -3.600e-02, -2.685e-02, -1.448e-03, -1.022e-01, 3.550e-02, 4.504e-02, 3.020e-02, -2.420e-02)); + r += mul(s4_1, M4(1.128e-01, -1.848e-01, 2.358e-01, -2.083e-01, -2.705e-02, 9.311e-02, 2.559e-02, -8.758e-02, -2.868e-02, -8.436e-02, 4.441e-02, 1.806e-01, 4.224e-02, 1.685e-02, -9.670e-02, 2.653e-02)); + r += mul(s4_2, M4(-8.658e-02, -1.178e-01, -2.972e-02, -2.572e-01, 1.680e-02, -4.280e-02, 1.425e-02, 6.848e-03, 4.073e-02, 3.491e-02, 1.308e-02, -3.689e-02, 4.009e-02, -6.097e-02, 1.412e-02, -3.500e-02)); + r += mul(s4_3, M4(1.121e-01, -8.794e-02, 1.456e-01, 1.664e-01, -8.760e-02, 1.016e-01, -2.300e-02, -2.855e-02, -1.597e-02, 8.344e-02, -5.320e-02, 1.479e-01, -2.902e-03, -3.130e-02, -1.323e-02, -1.673e-02)); + r += mul(s4_4, M4(3.860e-02, -2.934e-02, 1.809e-01, 1.200e-01, -9.147e-02, 1.260e-01, -1.558e-01, -5.510e-02, 1.671e-01, -1.776e-01, -4.580e-03, -2.861e-01, -3.544e-02, 5.584e-02, 8.725e-02, 1.743e-02)); + r += mul(s4_5, M4(-3.700e-02, -1.345e-01, 4.381e-02, 1.123e-01, -3.882e-02, -1.464e-02, 1.230e-02, -2.082e-02, -2.821e-02, 4.478e-02, -2.708e-02, -4.165e-02, -1.810e-02, 1.548e-01, -8.655e-03, 2.190e-02)); + r += mul(s4_6, M4(3.707e-02, -3.001e-03, -5.624e-02, -2.532e-03, 3.347e-02, 1.560e-02, -5.180e-04, 7.213e-03, 1.037e-01, -1.121e-01, 1.677e-02, 3.223e-02, 2.705e-02, 8.575e-02, 3.078e-02, -2.267e-04)); + r += mul(s4_7, M4(2.250e-02, 1.487e-02, -1.650e-01, -1.409e-02, 7.617e-02, -7.459e-03, 1.392e-01, 6.905e-02, 9.223e-02, 2.307e-01, 1.427e-01, -5.743e-02, 8.216e-03, -1.208e-01, -8.752e-02, 3.296e-03)); + r += mul(s4_8, M4(1.453e-02, 7.337e-03, -1.524e-02, -1.241e-02, 7.055e-02, -1.957e-02, -2.605e-03, -3.496e-03, -4.691e-02, -1.215e-01, -2.121e-02, 1.250e-02, -1.895e-02, -7.662e-02, -9.187e-03, -4.188e-02)); + r += mul(s5_0, M4(2.253e-03, 2.895e-02, -3.356e-03, -3.946e-02, 6.570e-02, -1.506e-02, -7.567e-02, -3.304e-02, -8.512e-03, 1.700e-02, -3.170e-02, -9.021e-02, 2.892e-01, -4.648e-02, 2.220e-02, 1.739e-02)); + r += mul(s5_1, M4(1.111e-01, -4.749e-02, -1.770e-02, 2.719e-02, 4.371e-03, 6.664e-02, -4.068e-03, -2.892e-01, -1.957e-02, 2.262e-02, -5.655e-03, -5.166e-02, 2.833e-01, -7.883e-02, 8.146e-02, -5.218e-02)); + r += mul(s5_2, M4(2.774e-02, -5.780e-02, 7.517e-03, -7.034e-03, 4.927e-02, 3.076e-02, -3.758e-02, -9.627e-03, -1.044e-02, -1.818e-02, -2.164e-02, -2.569e-03, 2.926e-01, 4.791e-02, 1.676e-02, -4.867e-02)); + r += mul(s5_3, M4(1.016e-02, 2.146e-02, -1.199e-02, 2.797e-02, -7.898e-02, 9.582e-02, 3.174e-02, -3.970e-02, 9.055e-02, -4.866e-02, -4.249e-02, 2.899e-02, 2.318e-01, 3.959e-02, -6.066e-02, 1.228e-02)); + r += mul(s5_4, M4(-1.095e-01, 3.017e-02, -2.044e-02, -1.573e-03, -1.158e-01, -4.219e-02, -1.247e-01, 1.312e-01, 4.427e-02, -1.392e-01, 3.504e-02, -1.490e-01, 1.577e-01, 4.934e-02, -9.058e-02, 1.150e-01)); + r += mul(s5_5, M4(-4.875e-02, 9.593e-03, 3.234e-02, 5.133e-02, -2.486e-02, -8.642e-02, 3.262e-02, 1.322e-02, -6.464e-02, -3.717e-02, -3.904e-02, -2.541e-03, 2.439e-01, -2.009e-02, -2.255e-02, 2.965e-03)); + r += mul(s5_6, M4(7.269e-03, -5.754e-03, 1.268e-02, 1.436e-03, 6.907e-02, -7.310e-02, 1.493e-03, -2.275e-02, 2.023e-02, 3.361e-03, 2.436e-02, -2.308e-02, 2.661e-01, 1.148e-02, 7.581e-02, -3.858e-02)); + r += mul(s5_7, M4(-3.640e-02, -2.640e-02, 1.529e-02, 5.979e-03, 1.858e-01, 2.683e-02, 1.042e-01, -3.059e-02, 5.402e-02, -1.913e-02, -4.763e-02, 1.896e-02, 2.559e-01, -3.100e-02, 2.592e-04, -8.341e-02)); + r += mul(s5_8, M4(-2.953e-02, 3.158e-02, 2.555e-03, 1.784e-02, 1.153e-01, 5.659e-02, -2.682e-02, 1.226e-02, -3.080e-02, -2.664e-02, 7.513e-02, 2.257e-03, 2.487e-01, 1.699e-02, -6.703e-03, -3.605e-02)); + r += mul(s6_0, M4(-4.189e-02, -9.154e-02, 4.788e-02, -1.111e-01, 1.971e-02, -4.760e-02, 2.981e-02, -7.093e-02, -2.129e-02, -6.233e-02, -6.308e-04, -4.454e-03, -4.643e-02, -7.902e-03, -2.739e-02, 2.428e-03)); + r += mul(s6_1, M4(3.598e-02, -7.888e-03, -5.828e-02, 1.286e-01, -1.478e-02, -5.556e-02, -1.790e-02, -1.876e-01, -5.060e-02, 1.849e-01, -3.716e-02, -2.660e-02, -1.689e-02, -5.784e-03, 4.138e-04, -7.225e-02)); + r += mul(s6_2, M4(2.588e-02, -5.272e-02, 1.092e-01, 4.166e-02, -7.974e-03, -2.317e-03, 6.136e-03, 3.086e-03, 5.685e-02, -5.237e-02, -5.456e-03, 5.515e-02, -1.173e-02, -2.115e-02, 1.212e-02, 7.162e-02)); + r += mul(s6_3, M4(-1.605e-01, -1.559e-01, -6.509e-02, 1.481e-02, -1.061e-01, 1.587e-02, -2.719e-02, 9.496e-02, -1.964e-02, 1.008e-01, 3.483e-02, 6.958e-02, 6.500e-04, 5.742e-02, -2.891e-02, -8.490e-03)); + r += mul(s6_4, M4(-1.561e-02, 1.292e-01, 1.002e-01, 3.026e-02, -2.482e-02, -3.814e-01, 1.739e-01, -4.705e-02, 5.162e-02, -4.266e-02, 1.289e-02, 8.523e-02, -7.511e-03, -2.839e-02, -6.110e-02, -1.192e-01)); + r += mul(s6_5, M4(1.514e-01, 7.149e-02, -2.050e-02, -9.953e-02, 3.742e-02, -3.536e-02, -5.580e-02, -4.786e-02, -1.358e-01, 3.836e-02, 6.996e-02, 7.751e-02, -2.538e-02, -1.700e-03, 1.704e-02, 1.054e-02)); + r += mul(s6_6, M4(6.145e-02, 3.964e-02, -4.079e-02, -2.012e-04, 2.773e-02, 2.789e-02, -2.299e-02, 1.544e-02, -2.108e-02, 7.204e-03, -3.435e-02, -2.166e-02, -4.417e-02, -2.244e-02, -5.070e-02, 1.706e-02)); + r += mul(s6_7, M4(4.528e-02, -8.002e-02, 3.016e-02, -3.395e-02, 5.715e-02, -1.227e-01, 5.972e-04, -5.689e-03, 2.970e-03, 7.643e-02, -1.862e-02, -5.993e-02, 3.902e-02, -1.052e-01, 4.273e-02, -3.014e-02)); + r += mul(s6_8, M4(-3.764e-03, -5.511e-02, -9.549e-02, 8.376e-03, 2.026e-02, 2.640e-02, -1.948e-02, -4.695e-02, -6.268e-02, 3.103e-02, -2.264e-02, 1.770e-02, -3.231e-02, 6.016e-02, -2.159e-02, 1.004e-02)); + r += mul(s7_0, M4(-1.944e-02, -5.163e-02, 9.877e-03, 2.705e-02, 2.228e-02, -1.798e-02, 4.495e-02, -1.212e-02, -1.485e-02, -3.709e-02, 9.257e-03, -3.256e-02, -1.232e-01, -3.018e-03, -1.451e-02, 1.920e-02)); + r += mul(s7_1, M4(-3.847e-02, 1.337e-02, 5.937e-02, -1.280e-02, 1.287e-02, 2.614e-03, 1.622e-01, -1.062e-01, -6.421e-02, -5.570e-02, 9.379e-02, 1.947e-02, -3.728e-02, -5.490e-02, -6.420e-02, 4.407e-02)); + r += mul(s7_2, M4(1.588e-02, 7.788e-02, -2.490e-02, 1.825e-02, 1.764e-02, 5.213e-02, 2.868e-02, -1.027e-02, 2.788e-03, -2.785e-02, -6.366e-02, 1.291e-01, 2.413e-02, -6.380e-02, 7.746e-02, 1.257e-02)); + r += mul(s7_3, M4(-6.544e-02, 5.579e-02, -4.697e-02, 3.908e-02, -7.368e-02, 8.769e-02, -1.372e-01, 7.078e-02, -2.486e-02, 5.890e-02, 4.846e-02, 8.198e-02, 9.691e-02, -5.949e-02, 8.025e-02, 4.313e-02)); + r += mul(s7_4, M4(2.249e-02, -4.109e-02, -4.662e-02, -9.531e-03, -4.681e-03, -3.572e-01, -7.553e-02, -1.689e-01, -2.535e-02, 2.499e-01, 5.825e-02, 7.886e-02, -1.222e-01, -2.583e-01, -1.378e-01, -2.536e-01)); + r += mul(s7_5, M4(-2.349e-02, -1.093e-01, -1.163e-02, -4.259e-02, 8.261e-03, 1.136e-02, -2.641e-02, -5.521e-02, -3.955e-02, 3.419e-02, -4.726e-03, 6.041e-02, -6.211e-03, -2.036e-01, -3.757e-02, 4.397e-02)); + r += mul(s7_6, M4(-1.146e-02, -4.954e-03, 1.249e-02, -1.916e-02, 3.667e-02, 2.107e-02, 6.853e-02, -1.684e-02, -1.248e-02, -1.862e-02, -8.517e-03, -3.226e-02, -4.290e-03, 5.140e-02, -7.347e-02, -3.504e-02)); + r += mul(s7_7, M4(5.449e-02, 1.826e-02, -7.368e-02, 1.170e-02, -3.612e-02, 5.722e-02, 5.965e-02, -2.463e-02, -2.703e-02, -3.847e-02, 2.002e-02, -4.870e-02, 1.220e-01, -6.682e-02, 3.629e-02, 7.157e-02)); + r += mul(s7_8, M4(-2.929e-02, 6.953e-02, -1.551e-02, -2.604e-02, -2.354e-02, 4.386e-02, -2.369e-02, -4.634e-02, -2.935e-02, -1.085e-01, 3.650e-02, 8.587e-03, -5.270e-03, -4.489e-02, 1.397e-02, 3.679e-03)); + r += V4(-1.234e-02, 1.399e-03, -3.210e-03, 1.071e-02); + return r; +} + +void Pass16(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 17 +//!DESC conv16 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-5.156e-02, 6.165e-02, 2.515e-02, -4.821e-02, 4.392e-02, -1.782e-02, -2.298e-02, 5.459e-04, -6.055e-03, 7.091e-02, -1.707e-02, 3.750e-02, -2.571e-02, 4.308e-02, 8.761e-02, -7.659e-02)); + r += mul(s0_1, M4(3.125e-02, -2.226e-02, -3.384e-02, -6.259e-02, -2.213e-02, 2.235e-02, 7.938e-02, -4.267e-02, 3.313e-02, -9.646e-02, 2.131e-02, 1.986e-02, 7.270e-02, -5.378e-02, 1.201e-03, -9.428e-03)); + r += mul(s0_2, M4(-6.791e-02, 3.883e-02, -9.256e-02, 1.588e-02, 4.954e-02, -6.834e-03, -2.625e-02, 2.904e-02, -4.382e-02, 4.837e-03, 1.303e-02, -3.053e-02, 2.798e-03, 8.662e-04, 2.414e-02, -2.178e-02)); + r += mul(s0_3, M4(-4.441e-03, 3.012e-02, -1.029e-01, 3.504e-02, -6.492e-03, 4.360e-02, -2.207e-03, 3.454e-02, -3.369e-02, -8.222e-02, -5.546e-02, -2.461e-02, -1.658e-02, 2.443e-02, 8.769e-02, -9.597e-03)); + r += mul(s0_4, M4(-1.814e-01, -2.435e-01, 2.376e-01, -3.096e-01, -5.681e-02, -5.972e-03, 1.443e-01, -1.880e-01, 1.846e-02, -9.695e-02, 7.147e-02, 4.801e-02, 1.037e-01, 2.661e-02, 3.455e-02, 3.356e-02)); + r += mul(s0_5, M4(3.519e-02, 2.718e-02, 9.848e-02, 8.631e-02, 4.184e-02, -5.676e-02, -6.712e-02, 1.062e-01, -3.166e-03, 6.845e-02, 3.497e-02, -7.318e-03, 4.985e-02, 1.843e-02, 1.803e-02, 7.571e-03)); + r += mul(s0_6, M4(-3.625e-02, -5.030e-02, -3.556e-02, -9.736e-02, 1.095e-03, -1.929e-02, -5.579e-02, -1.876e-02, 1.891e-02, -3.657e-02, 3.884e-02, 4.106e-03, -1.988e-02, -2.700e-02, 1.826e-02, 8.161e-03)); + r += mul(s0_7, M4(-2.913e-02, -4.620e-03, 5.326e-02, 7.157e-02, 4.533e-02, 9.379e-02, 1.550e-01, 6.340e-02, 9.767e-03, -1.862e-02, -5.731e-02, -4.649e-03, 1.028e-03, -6.575e-02, -7.045e-02, -7.278e-03)); + r += mul(s0_8, M4(-3.700e-02, -6.231e-02, -8.645e-02, 2.889e-02, -4.881e-02, -1.095e-03, -4.147e-02, 2.005e-02, 9.187e-03, 1.735e-02, 6.280e-02, -7.805e-02, 1.408e-02, -1.202e-02, -2.381e-03, 8.901e-03)); + r += mul(s1_0, M4(-3.447e-02, 5.741e-02, 3.830e-03, -5.936e-03, 9.599e-03, -6.178e-02, 4.019e-03, -4.353e-02, 5.675e-02, -4.451e-02, 5.019e-03, 5.025e-02, 8.918e-02, -5.737e-02, 4.635e-02, -6.282e-02)); + r += mul(s1_1, M4(-1.228e-02, -6.435e-02, 7.405e-02, -1.218e-01, 5.559e-02, -1.284e-02, -4.044e-04, 9.694e-03, 9.755e-02, 4.876e-02, -3.523e-02, 2.152e-01, 8.096e-02, -2.807e-02, -4.256e-02, 9.904e-02)); + r += mul(s1_2, M4(-1.230e-02, 4.846e-02, 3.442e-03, 1.878e-02, 1.026e-02, 2.434e-03, -4.669e-02, -6.950e-02, -5.199e-02, -1.260e-02, 2.584e-03, -5.970e-02, -4.281e-02, 2.061e-02, 1.963e-02, -2.112e-02)); + r += mul(s1_3, M4(-1.030e-02, 2.621e-02, 6.983e-02, -2.046e-02, 1.633e-02, -6.756e-02, -1.049e-01, 3.083e-02, 7.832e-03, -1.279e-01, -4.689e-02, 8.196e-02, 2.190e-01, -1.967e-01, 2.125e-01, -3.281e-02)); + r += mul(s1_4, M4(4.708e-02, -4.659e-02, -6.770e-03, 4.344e-02, 1.488e-01, -6.091e-01, 1.748e-01, -3.358e-01, 2.832e-01, 1.045e-02, 8.715e-02, 9.823e-03, 5.027e-03, 9.443e-02, -1.500e-02, 4.938e-02)); + r += mul(s1_5, M4(8.091e-02, 2.685e-02, 4.437e-02, -5.866e-03, -5.661e-02, -6.368e-05, -2.218e-02, 1.242e-01, -8.986e-02, 4.820e-02, 3.979e-02, -4.334e-02, 8.221e-03, 6.210e-03, 3.771e-02, 9.308e-03)); + r += mul(s1_6, M4(-2.767e-02, -4.336e-02, -2.202e-02, -3.286e-02, -6.555e-03, -5.700e-02, -7.689e-02, -1.102e-02, -1.407e-03, -6.665e-02, -3.185e-02, 2.750e-02, -3.314e-02, -5.566e-02, 9.604e-03, 8.935e-02)); + r += mul(s1_7, M4(8.737e-04, 4.815e-02, 1.344e-02, 1.764e-02, 4.722e-02, -1.648e-02, 1.023e-01, -2.641e-02, 3.347e-02, 9.013e-03, -3.238e-02, -2.522e-03, 9.035e-02, 4.695e-02, -1.043e-01, 3.727e-02)); + r += mul(s1_8, M4(1.181e-02, -4.895e-02, -1.434e-02, 3.660e-02, 1.319e-02, 1.363e-02, -3.919e-02, 2.970e-02, -1.936e-03, 1.717e-02, 4.928e-02, -4.602e-02, 4.838e-02, 3.795e-02, 3.504e-02, 1.532e-02)); + r += mul(s2_0, M4(-6.113e-03, 3.948e-03, -1.352e-02, 6.239e-03, -3.047e-02, 1.207e-01, -3.000e-02, -3.911e-02, 1.401e-02, 1.932e-02, -5.285e-02, 3.610e-02, 3.287e-02, -3.219e-02, 9.459e-02, -4.369e-03)); + r += mul(s2_1, M4(3.883e-03, 1.594e-02, 1.902e-02, -1.362e-03, -1.255e-01, 1.213e-01, 1.349e-02, -4.341e-02, -1.824e-03, -2.703e-02, 2.730e-02, -2.791e-02, 6.127e-02, -1.142e-01, -7.652e-02, -9.776e-03)); + r += mul(s2_2, M4(-3.686e-02, 5.035e-04, -1.472e-02, 6.111e-03, -1.857e-02, 1.292e-02, 6.319e-03, -1.197e-02, 5.313e-02, 7.473e-03, 3.750e-02, -2.960e-04, -1.047e-01, 5.681e-02, -3.128e-02, -3.333e-02)); + r += mul(s2_3, M4(3.041e-03, 1.363e-01, 4.553e-02, -1.397e-02, 7.855e-02, -3.787e-01, -1.873e-02, 1.744e-02, 3.652e-02, 1.626e-01, 3.480e-01, 1.356e-02, -3.601e-02, 6.745e-02, -7.356e-02, 4.401e-02)); + r += mul(s2_4, M4(-2.452e-01, -2.038e-02, 1.651e-02, -4.464e-02, 1.841e-01, -2.265e-01, 1.834e-01, -2.317e-01, 1.901e-01, -1.588e-01, 8.960e-02, 4.321e-02, 3.730e-03, -1.431e-01, 1.646e-01, -1.784e-01)); + r += mul(s2_5, M4(-2.948e-02, 6.034e-02, 2.741e-02, -1.858e-03, -8.280e-02, -3.976e-02, -1.900e-02, -8.848e-04, 9.574e-03, 9.837e-02, -1.075e-02, -4.271e-02, 8.699e-02, 4.763e-02, 3.408e-02, 1.132e-02)); + r += mul(s2_6, M4(-5.689e-03, 1.968e-02, -9.377e-03, -7.532e-03, -4.090e-02, 4.229e-02, 6.487e-03, -4.127e-03, -3.308e-02, 5.425e-01, -6.888e-02, -5.967e-02, -4.626e-04, -2.241e-02, -1.314e-02, -9.065e-03)); + r += mul(s2_7, M4(8.726e-02, -1.536e-02, 5.006e-02, -3.736e-02, -8.779e-02, -7.939e-02, 7.228e-02, 6.325e-02, -2.287e-02, 7.908e-02, 7.380e-02, 1.010e-01, -5.090e-02, -3.593e-02, 2.127e-02, 3.041e-02)); + r += mul(s2_8, M4(-2.976e-02, -5.428e-03, 1.955e-02, -3.945e-02, 6.621e-02, 7.847e-02, 3.751e-02, -2.453e-02, 5.446e-02, -2.147e-03, 9.343e-02, 1.831e-02, -2.326e-02, 9.726e-03, 9.310e-03, 4.441e-02)); + r += mul(s3_0, M4(8.006e-03, 1.303e-02, -1.699e-02, 3.502e-02, 3.206e-02, -1.142e-02, 6.145e-02, 4.134e-03, 5.406e-03, -6.115e-02, -1.278e-02, -7.290e-03, 6.903e-02, -1.011e-02, -2.205e-02, 1.344e-01)); + r += mul(s3_1, M4(-1.503e-04, 1.265e-02, 9.939e-03, -7.723e-04, 6.080e-03, -7.056e-02, -4.305e-02, -3.895e-02, 4.111e-02, -2.079e-02, 3.906e-02, 2.248e-02, 6.166e-02, 3.592e-02, -1.023e-01, 1.410e-02)); + r += mul(s3_2, M4(4.089e-02, -6.372e-02, -1.870e-02, 2.216e-02, -1.168e-02, 3.430e-02, -3.698e-03, 1.849e-02, 9.200e-03, 2.253e-02, 1.825e-02, 1.871e-02, 4.994e-02, -3.840e-03, -5.043e-02, 7.897e-03)); + r += mul(s3_3, M4(1.894e-02, -2.918e-02, -3.547e-03, 9.431e-03, 3.416e-02, -1.264e-01, -1.016e-01, 6.763e-02, 6.354e-02, -4.594e-02, -5.100e-02, 6.429e-02, -1.147e-02, 8.992e-02, 2.033e-02, 2.801e-02)); + r += mul(s3_4, M4(-1.359e-01, -1.328e-01, -2.280e-01, 1.655e-01, 1.170e-01, -3.456e-02, -1.741e-02, 2.617e-02, -4.748e-02, -6.031e-02, -3.827e-02, 6.860e-02, 5.744e-02, -2.272e-01, 9.794e-02, -7.356e-02)); + r += mul(s3_5, M4(-1.267e-01, 5.624e-02, -5.376e-02, 7.105e-02, 8.256e-03, -1.507e-02, -1.542e-02, 1.324e-02, -4.601e-02, 5.123e-02, -1.844e-02, 2.398e-02, 7.308e-02, 1.566e-02, -3.331e-02, 7.307e-02)); + r += mul(s3_6, M4(2.185e-02, 1.127e-01, -3.688e-02, 4.312e-02, 5.437e-03, -2.636e-02, -6.787e-02, 2.842e-02, 6.742e-02, 9.200e-03, 3.874e-02, 8.117e-02, 1.908e-03, 2.687e-02, -3.284e-02, 4.026e-02)); + r += mul(s3_7, M4(-1.473e-02, -2.724e-02, -4.317e-02, -1.223e-01, 8.346e-03, -1.594e-02, 1.405e-03, -1.531e-02, 1.548e-01, -5.796e-03, 5.517e-02, -8.836e-02, -1.659e-02, 2.499e-02, -2.863e-02, 5.616e-02)); + r += mul(s3_8, M4(4.551e-02, -2.843e-02, 6.142e-03, -6.877e-02, -6.137e-04, -4.215e-03, 1.564e-02, -4.295e-03, 4.038e-02, -3.400e-02, 6.898e-02, -3.296e-02, 3.822e-02, -1.935e-02, -6.912e-03, 2.093e-02)); + r += mul(s4_0, M4(-2.593e-02, 2.093e-02, -1.977e-02, -8.374e-02, -3.038e-02, 7.975e-02, 2.651e-02, 3.106e-03, -1.552e-02, -2.949e-02, -3.688e-02, -3.919e-02, 2.234e-02, 2.978e-03, -6.088e-03, 1.161e-02)); + r += mul(s4_1, M4(-9.005e-03, -2.757e-02, 7.088e-02, -9.204e-02, 5.620e-02, -2.922e-02, -6.345e-02, 6.129e-02, 4.920e-02, -2.946e-02, 8.479e-02, -8.622e-03, -3.982e-03, 9.320e-02, 6.492e-02, 6.297e-02)); + r += mul(s4_2, M4(-2.727e-02, 4.210e-02, 2.567e-02, -3.893e-03, -1.296e-02, 1.579e-02, 2.187e-02, -1.296e-02, 4.373e-03, 2.246e-02, -1.385e-02, 3.281e-02, -2.399e-02, -1.604e-02, -2.746e-02, -5.847e-02)); + r += mul(s4_3, M4(1.420e-02, -9.644e-02, 8.939e-02, -6.151e-02, 3.273e-02, 7.034e-02, 1.963e-02, 4.333e-02, 7.394e-02, -1.392e-01, -8.823e-02, -1.900e-02, 2.113e-02, -5.623e-03, -3.419e-02, 2.875e-02)); + r += mul(s4_4, M4(1.148e-02, -1.850e-02, 3.121e-02, 7.214e-02, 1.078e-02, 9.795e-02, 1.532e-01, -6.949e-03, -2.142e-01, -9.572e-02, 1.877e-01, -1.601e-01, -2.678e-02, 8.389e-02, -8.181e-02, -1.043e-01)); + r += mul(s4_5, M4(-5.404e-02, 2.965e-02, 1.950e-02, -5.925e-02, 5.095e-02, 3.441e-02, 1.074e-01, -5.415e-02, 1.158e-01, 2.059e-02, 1.930e-02, 3.106e-02, 4.156e-02, -1.818e-02, 2.495e-02, -4.216e-02)); + r += mul(s4_6, M4(-5.912e-03, -2.800e-02, 2.312e-02, -3.870e-03, 8.954e-03, -1.878e-02, 4.475e-03, -3.050e-05, 4.467e-02, 4.924e-02, 2.961e-02, 3.709e-02, 3.406e-02, 3.756e-02, 4.557e-03, 1.972e-02)); + r += mul(s4_7, M4(-4.816e-03, 6.425e-02, -6.616e-02, -2.269e-02, -1.666e-02, -1.313e-02, -3.477e-02, -4.396e-03, -9.325e-02, -1.418e-02, -6.576e-02, -1.131e-01, -1.779e-03, 1.724e-02, 1.067e-01, -1.243e-02)); + r += mul(s4_8, M4(-1.228e-02, -7.142e-03, 1.009e-01, -4.010e-02, 2.561e-02, -2.763e-02, -1.154e-02, 4.042e-02, 8.868e-02, -7.952e-02, 3.336e-02, 2.707e-02, -2.938e-02, 2.756e-02, 3.208e-02, 4.626e-02)); + r += mul(s5_0, M4(-9.159e-02, 6.431e-02, -1.213e-02, -7.887e-02, -3.602e-02, 1.221e-01, -9.357e-02, 3.184e-02, 1.606e-02, -3.790e-02, -5.347e-02, 9.859e-03, -2.165e-02, 6.015e-02, 4.728e-03, 1.987e-02)); + r += mul(s5_1, M4(-7.130e-02, -7.513e-02, 1.332e-01, -2.038e-01, -2.621e-01, 1.490e-01, 9.949e-02, -1.379e-01, -3.441e-02, 7.215e-02, 8.020e-02, -2.985e-02, -1.117e-02, 2.100e-02, 2.912e-02, 9.687e-03)); + r += mul(s5_2, M4(-5.556e-02, -2.980e-02, 5.666e-02, -9.088e-02, 5.883e-02, -5.671e-02, 1.815e-02, 1.004e-02, 1.114e-04, 2.225e-03, 1.452e-02, -2.179e-02, 2.119e-02, -3.922e-02, 1.355e-01, -8.419e-02)); + r += mul(s5_3, M4(-8.849e-03, -2.438e-01, -3.316e-02, -1.082e-01, 1.853e-02, 1.050e-01, 2.802e-02, 6.987e-02, 1.900e-02, -9.460e-02, -4.280e-02, -7.711e-02, 1.582e-02, -1.505e-02, -4.113e-02, 7.351e-05)); + r += mul(s5_4, M4(-8.418e-02, 1.956e-01, -1.095e-01, 1.026e-01, 1.909e-01, -1.499e-01, 4.226e-01, -1.755e-01, 2.404e-02, -4.169e-02, 3.206e-02, 6.999e-02, -1.089e-02, 2.476e-01, -7.327e-02, 2.165e-01)); + r += mul(s5_5, M4(5.984e-02, -6.277e-02, 3.448e-02, -4.323e-02, -1.380e-02, -1.972e-02, 6.379e-02, 8.212e-03, 9.482e-02, -1.313e-02, 8.471e-02, -1.677e-02, 5.325e-01, -1.756e-01, 3.271e-01, -1.751e-01)); + r += mul(s5_6, M4(-6.568e-02, -1.029e-01, 5.247e-02, -1.028e-01, -1.212e-02, 2.766e-02, 1.504e-02, -1.584e-02, 1.014e-02, -3.362e-02, 4.607e-02, -1.774e-02, 1.367e-02, 5.357e-02, 1.847e-02, 7.664e-03)); + r += mul(s5_7, M4(2.299e-02, 9.632e-02, -1.358e-02, -1.970e-02, -5.210e-02, 1.483e-01, 1.090e-01, 6.136e-04, 3.251e-02, -1.370e-01, -8.047e-02, 9.135e-03, -2.789e-02, -5.084e-02, -8.085e-03, -1.059e-01)); + r += mul(s5_8, M4(-3.400e-02, -7.040e-03, 3.179e-02, -7.687e-02, 2.856e-02, -4.108e-02, -1.130e-01, 1.423e-02, -4.021e-02, 5.519e-03, 3.914e-03, 4.571e-02, 6.344e-02, -3.582e-02, 1.021e-01, 1.509e-02)); + r += mul(s6_0, M4(1.058e-01, -7.012e-02, -5.282e-03, 1.225e-01, -2.563e-02, 3.260e-02, 2.460e-02, 1.274e-02, 4.838e-02, -1.180e-01, -3.756e-02, 1.926e-02, 2.041e-02, -1.110e-02, -1.384e-03, 6.233e-03)); + r += mul(s6_1, M4(-3.115e-02, 7.858e-03, -7.703e-02, -6.222e-02, 3.918e-02, -5.431e-02, 3.723e-03, 3.550e-02, 1.788e-01, 7.275e-02, -2.121e-02, 8.663e-02, 1.253e-02, -5.224e-02, -1.388e-02, -1.968e-02)); + r += mul(s6_2, M4(7.874e-02, -6.188e-02, -9.159e-02, 1.215e-01, -2.795e-02, 2.348e-02, 1.755e-02, -5.391e-03, -1.187e-01, 3.654e-02, -3.447e-02, -3.986e-02, -9.454e-04, 3.849e-02, 3.628e-02, 4.309e-02)); + r += mul(s6_3, M4(-1.932e-02, 1.476e-01, -7.003e-02, 3.489e-02, 1.989e-02, -6.807e-02, -6.319e-02, -1.538e-02, -2.931e-02, 6.941e-02, 8.206e-03, 8.153e-02, 1.229e-02, -4.671e-02, -3.886e-02, 1.106e-03)); + r += mul(s6_4, M4(9.417e-02, -2.394e-01, 1.957e-01, 5.788e-02, -2.446e-02, 8.874e-03, 5.894e-02, 4.274e-02, 1.005e-01, -1.371e-01, -1.676e-01, -3.986e-02, -1.481e-01, 1.306e-02, 2.543e-02, -1.523e-01)); + r += mul(s6_5, M4(6.421e-02, 3.377e-02, -7.346e-02, -5.284e-03, 2.185e-02, 6.796e-03, 4.730e-02, -3.318e-02, 5.095e-03, 1.153e-01, 7.242e-02, 1.921e-02, 1.145e-01, -5.113e-02, 1.402e-04, 8.227e-02)); + r += mul(s6_6, M4(2.136e-02, -5.919e-02, -6.817e-02, 5.806e-02, 2.670e-02, -4.493e-03, -4.624e-03, 2.248e-02, 1.443e-02, -2.058e-02, -5.921e-03, 2.605e-02, 8.326e-03, -9.662e-02, -5.167e-02, -2.557e-02)); + r += mul(s6_7, M4(4.999e-02, -1.434e-01, -1.646e-01, 4.182e-02, -6.911e-02, -4.970e-02, -6.567e-02, -4.646e-03, 2.940e-02, 8.285e-02, 3.201e-02, 1.668e-02, -3.641e-02, 3.822e-02, 6.107e-03, 8.802e-02)); + r += mul(s6_8, M4(7.182e-02, 3.517e-02, -4.517e-02, 4.548e-02, 5.198e-02, -4.318e-03, 2.819e-02, -8.629e-03, 3.274e-04, -5.217e-03, 2.982e-02, -2.487e-02, -9.713e-03, -2.504e-02, -1.976e-02, 1.010e-02)); + r += mul(s7_0, M4(-1.477e-03, 1.436e-02, -5.882e-03, 2.125e-02, -6.409e-02, -2.897e-03, -9.065e-03, -2.558e-02, 1.167e-02, -5.244e-02, -7.526e-02, 2.297e-02, 1.680e-02, -1.422e-02, 8.842e-03, 2.474e-02)); + r += mul(s7_1, M4(2.233e-03, 2.633e-02, -2.334e-02, -3.533e-02, 8.916e-02, -1.052e-01, -4.954e-02, 7.028e-02, 4.928e-02, 1.115e-01, 8.849e-02, 7.943e-02, -3.440e-02, 4.251e-02, -2.732e-02, -2.890e-02)); + r += mul(s7_2, M4(8.775e-03, -2.610e-02, 2.123e-02, -3.954e-03, -7.393e-02, 6.974e-02, 2.462e-02, -6.027e-02, 1.582e-02, -1.214e-02, -2.985e-02, -4.174e-03, -1.580e-02, 8.317e-03, 9.589e-03, 2.570e-02)); + r += mul(s7_3, M4(5.606e-03, -1.468e-02, 1.723e-02, -4.016e-02, 1.854e-01, -3.051e-01, -3.555e-02, -7.989e-02, -8.124e-03, -1.631e-02, -3.592e-02, 2.174e-02, -2.360e-02, -8.126e-02, -5.451e-02, -6.362e-02)); + r += mul(s7_4, M4(-5.537e-02, 1.314e-02, 8.755e-02, -9.445e-03, -3.013e-01, -1.111e-01, -8.096e-02, 2.060e-01, -9.947e-02, 1.158e-02, 3.951e-02, -2.653e-02, -4.153e-01, 2.589e-01, -6.686e-01, -5.985e-02)); + r += mul(s7_5, M4(1.822e-02, 2.026e-02, -3.674e-02, 1.442e-02, 1.939e-01, -1.377e-03, 1.205e-01, -6.445e-02, 7.434e-02, 3.865e-03, 2.157e-02, -5.609e-02, -6.769e-02, -3.530e-02, 5.183e-02, 2.761e-02)); + r += mul(s7_6, M4(3.779e-02, 7.490e-03, -6.374e-03, 2.683e-02, 2.187e-02, 6.518e-02, 3.773e-02, 2.820e-02, -1.138e-02, -9.686e-03, 7.643e-03, -9.547e-03, -1.200e-02, 7.512e-02, -1.624e-03, 2.857e-02)); + r += mul(s7_7, M4(-7.202e-02, 4.839e-03, -6.140e-02, -1.012e-03, -1.585e-02, -2.412e-02, -1.058e-01, -2.273e-02, 2.286e-02, -6.744e-02, -5.347e-02, -2.545e-02, 8.405e-02, -5.129e-02, -5.990e-02, -2.139e-02)); + r += mul(s7_8, M4(2.350e-02, 1.945e-03, 1.996e-02, 3.091e-03, -6.660e-03, -6.128e-03, 3.594e-02, -5.780e-02, -2.585e-02, 1.893e-02, 6.346e-02, -4.523e-02, -7.251e-02, -1.237e-02, 1.889e-02, 3.806e-02)); + r += V4(-1.997e-03, -1.078e-03, -3.096e-04, -3.145e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.202e-02, -5.206e-02, -1.313e-02, -1.772e-02, 2.042e-02, -1.495e-02, 2.553e-02, -4.534e-02, 4.631e-02, 3.608e-02, -2.366e-02, -1.733e-02, 1.333e-02, -7.776e-02, -5.467e-02, -2.504e-02)); + r += mul(s0_1, M4(2.778e-03, -4.640e-02, -4.725e-02, 4.790e-02, 1.138e-02, 4.644e-02, -3.479e-02, -1.653e-01, 2.812e-02, -2.482e-02, 2.254e-02, -6.457e-02, 8.879e-03, 5.711e-02, 3.155e-02, -1.638e-01)); + r += mul(s0_2, M4(4.528e-02, -2.807e-02, -5.630e-02, -3.859e-02, -5.916e-02, 2.698e-02, 7.203e-02, -6.372e-02, -1.076e-02, -2.437e-02, 3.047e-02, -4.819e-02, 1.503e-04, -2.093e-02, -1.619e-03, -7.104e-02)); + r += mul(s0_3, M4(-1.326e-01, -1.443e-01, 2.782e-02, 2.616e-02, 5.204e-02, 2.389e-02, -8.061e-03, 3.848e-02, 1.095e-01, -4.839e-03, -3.616e-02, 1.281e-02, 4.160e-02, 6.821e-02, -6.438e-02, -8.374e-02)); + r += mul(s0_4, M4(-3.540e-01, -1.772e-02, -7.275e-02, 1.626e-02, 7.496e-02, 4.160e-02, -3.751e-03, -1.767e-02, 2.694e-02, -3.734e-02, -1.266e-01, 1.995e-02, -4.675e-02, -4.186e-02, 1.885e-01, 3.583e-02)); + r += mul(s0_5, M4(-5.912e-02, 2.717e-02, -1.152e-01, -2.959e-02, 6.588e-02, 5.299e-02, 1.552e-01, 2.535e-03, 5.990e-02, -2.895e-02, -2.847e-03, 6.045e-02, 2.351e-02, 2.236e-02, -6.715e-02, 9.303e-03)); + r += mul(s0_6, M4(4.031e-02, -2.244e-03, -1.873e-02, -2.074e-02, -1.312e-02, -1.007e-02, 2.915e-02, -1.229e-02, -3.107e-02, 2.602e-03, -3.643e-02, 3.225e-03, 4.314e-02, 4.228e-02, -6.631e-02, 3.520e-02)); + r += mul(s0_7, M4(-1.581e-01, -1.810e-01, 7.088e-02, 1.042e-02, 1.581e-01, 2.587e-02, -3.175e-03, 6.183e-02, 2.268e-02, -3.908e-02, -3.724e-02, 8.091e-03, 4.869e-02, 5.575e-03, 9.839e-02, 1.347e-03)); + r += mul(s0_8, M4(2.168e-02, 5.077e-02, 6.702e-03, -6.015e-03, 3.845e-02, -3.942e-02, 2.236e-02, 5.486e-03, -6.448e-03, -2.784e-02, 2.199e-02, -2.274e-02, 1.640e-02, 2.604e-02, 7.827e-04, -3.243e-03)); + r += mul(s1_0, M4(-1.666e-02, -4.525e-02, -4.417e-03, 6.513e-03, -5.906e-03, -4.944e-02, 3.514e-04, -1.025e-01, 2.019e-02, -3.469e-02, -1.699e-02, 3.441e-02, 4.242e-02, -3.344e-02, -7.866e-02, 1.878e-01)); + r += mul(s1_1, M4(2.426e-02, 2.344e-03, -4.044e-02, 2.125e-02, 8.551e-02, -3.998e-02, -5.706e-02, -2.259e-01, 2.623e-02, 6.970e-02, 2.346e-02, 1.463e-02, 3.045e-02, 2.641e-03, 7.730e-02, -4.346e-02)); + r += mul(s1_2, M4(1.810e-02, 8.911e-03, -2.008e-02, -6.464e-02, -2.773e-02, -2.173e-02, 4.828e-02, -7.345e-02, 1.638e-02, -8.087e-02, 8.820e-03, 2.566e-02, -1.187e-02, -4.421e-03, -5.535e-03, -3.896e-02)); + r += mul(s1_3, M4(3.406e-02, -1.770e-03, -1.062e-02, -3.381e-02, -1.629e-02, 2.031e-02, 1.762e-02, 4.063e-02, -3.554e-02, 1.917e-01, -9.696e-03, -8.665e-04, 7.837e-02, 2.752e-01, -2.005e-01, -8.668e-02)); + r += mul(s1_4, M4(-4.266e-02, 6.996e-02, 6.613e-02, -1.204e-02, 3.922e-03, 9.455e-02, -2.608e-01, 5.084e-02, -8.956e-02, 3.111e-02, -1.812e-02, -6.210e-02, -8.112e-02, 6.008e-02, 1.211e-01, -2.499e-02)); + r += mul(s1_5, M4(-3.288e-02, 1.581e-02, -4.622e-02, -4.464e-03, 6.258e-02, 7.641e-02, 1.685e-01, -2.340e-02, 2.054e-02, -1.410e-02, -4.083e-02, 5.616e-03, 5.094e-02, 2.548e-02, -6.280e-02, -8.699e-03)); + r += mul(s1_6, M4(4.355e-02, -4.089e-02, -7.083e-03, 2.218e-02, 8.966e-03, -3.881e-03, 4.896e-02, 2.680e-03, 2.440e-02, 3.871e-02, -1.186e-02, -1.416e-02, 3.558e-02, 1.948e-01, -6.474e-02, 7.606e-03)); + r += mul(s1_7, M4(3.047e-02, 2.547e-02, -6.296e-03, -1.136e-02, -5.031e-02, -5.999e-02, 4.286e-02, 4.152e-02, 1.145e-02, 2.706e-02, -1.632e-02, -3.617e-03, 4.819e-02, -2.707e-02, 1.276e-01, 3.283e-02)); + r += mul(s1_8, M4(6.520e-02, 4.851e-02, -8.113e-03, 1.440e-02, 4.084e-02, 4.061e-02, 2.539e-02, -1.643e-02, -7.473e-03, -1.430e-02, -5.003e-04, -3.345e-02, 5.729e-02, 1.718e-02, 1.987e-03, 3.236e-03)); + r += mul(s2_0, M4(-6.699e-03, -2.586e-02, 5.295e-03, 3.528e-02, -6.310e-03, -7.413e-02, 1.474e-02, 3.928e-02, 1.458e-02, 1.269e-02, 1.055e-02, -1.259e-01, 5.331e-02, -6.769e-02, -3.363e-02, -4.863e-02)); + r += mul(s2_1, M4(-3.673e-02, 1.092e-02, -1.691e-02, -4.693e-02, 4.304e-05, -2.996e-03, -7.283e-03, 5.531e-02, 7.316e-04, -6.074e-04, -6.141e-02, -5.570e-02, 7.371e-02, -2.323e-02, -4.505e-02, 3.014e-02)); + r += mul(s2_2, M4(-1.643e-03, -9.824e-03, 1.511e-02, -2.464e-02, 1.116e-02, 2.840e-02, -4.088e-02, -6.275e-02, 1.293e-02, 1.246e-02, 1.986e-02, -5.365e-02, -4.314e-02, -2.603e-02, -7.790e-02, 2.061e-03)); + r += mul(s2_3, M4(5.190e-02, -1.787e-01, 2.032e-02, 1.634e-02, 6.689e-03, 1.083e-01, -1.797e-02, 2.083e-02, -1.605e-02, 1.100e-04, -6.777e-02, 1.240e-01, -2.460e-02, 1.572e-01, 9.084e-02, -4.332e-02)); + r += mul(s2_4, M4(2.016e-01, 9.259e-02, 8.855e-02, 9.362e-02, 4.120e-02, 5.435e-02, -3.455e-02, -1.942e-01, 2.359e-02, 1.944e-02, -8.789e-02, 6.333e-02, -9.278e-02, -1.680e-02, -2.102e-01, 1.023e-02)); + r += mul(s2_5, M4(-1.789e-02, -5.531e-02, -1.321e-01, 2.582e-02, 4.118e-02, -1.022e-01, -5.847e-02, 5.383e-02, 4.017e-02, 5.977e-03, -9.539e-02, -4.341e-02, 3.636e-02, 2.195e-02, 9.371e-03, -3.872e-02)); + r += mul(s2_6, M4(-4.446e-02, 9.027e-03, -3.196e-02, 3.966e-03, 2.963e-03, -1.151e-01, 1.687e-03, 2.612e-02, 2.786e-01, -5.185e-02, -1.802e-01, -1.431e-01, -5.649e-02, -8.974e-02, 3.276e-02, 1.676e-03)); + r += mul(s2_7, M4(-7.750e-02, -3.620e-02, 1.018e-02, -1.904e-02, 5.416e-02, 3.335e-02, -1.312e-01, 6.845e-02, -5.062e-02, -2.519e-02, -2.546e-01, -3.405e-02, 9.969e-03, 2.791e-02, -2.363e-02, 4.162e-03)); + r += mul(s2_8, M4(2.480e-02, 6.180e-03, -5.884e-02, -6.062e-03, 1.768e-02, 1.448e-02, -3.561e-02, 1.611e-02, 8.485e-02, 7.662e-02, -4.287e-02, 5.595e-02, 4.231e-02, 4.136e-02, -1.284e-01, 2.549e-02)); + r += mul(s3_0, M4(-3.932e-03, 7.414e-03, 1.909e-02, 4.169e-03, 1.647e-02, -1.229e-02, -2.545e-02, 6.120e-03, -3.781e-02, 4.003e-02, -3.228e-03, 2.159e-02, -5.261e-02, 2.311e-02, -2.729e-02, 3.838e-02)); + r += mul(s3_1, M4(-7.097e-02, 3.959e-02, 1.671e-02, 1.431e-02, 6.091e-02, 4.520e-04, 3.667e-03, 3.791e-02, 2.410e-02, 3.634e-02, 2.145e-02, -9.994e-02, -1.372e-01, 4.290e-02, 1.226e-01, 1.016e-01)); + r += mul(s3_2, M4(2.725e-02, 1.776e-03, 3.241e-02, 3.211e-02, 3.056e-03, 8.507e-03, -1.008e-02, -4.513e-02, 2.783e-02, 2.291e-03, -5.210e-02, -2.966e-02, -5.094e-02, 4.124e-03, 6.788e-02, 6.846e-02)); + r += mul(s3_3, M4(5.710e-02, -8.337e-02, 3.944e-02, -1.798e-02, 5.437e-02, 1.179e-01, 1.762e-02, -4.683e-02, 2.977e-02, 4.506e-02, 2.724e-02, 6.091e-02, 1.104e-02, 2.352e-01, -2.861e-02, -1.725e-02)); + r += mul(s3_4, M4(2.343e-01, 6.819e-02, 4.281e-01, 1.513e-01, -7.984e-02, -4.274e-02, 7.711e-02, -9.257e-04, 5.407e-03, -8.743e-02, 5.039e-03, 8.324e-02, -2.269e-02, 1.410e-01, -1.995e-01, 2.319e-02)); + r += mul(s3_5, M4(8.306e-03, -5.039e-02, -5.041e-02, -1.082e-02, 1.194e-03, -9.653e-03, 8.765e-02, -1.638e-03, 4.377e-03, -2.279e-03, -1.560e-01, -2.593e-02, -5.295e-02, 5.573e-02, 1.626e-01, -1.273e-02)); + r += mul(s3_6, M4(-1.995e-01, 4.895e-02, 2.898e-02, -3.804e-02, -3.671e-02, 3.441e-02, 5.273e-02, -3.421e-03, -7.544e-02, 1.140e-01, 8.635e-03, 4.871e-02, -4.460e-02, 4.741e-03, 8.239e-02, -1.949e-03)); + r += mul(s3_7, M4(-2.320e-01, -4.818e-02, -1.578e-02, 1.857e-02, 3.682e-02, 4.703e-02, 3.702e-02, 2.967e-02, -4.824e-02, 6.226e-02, -6.276e-02, 1.757e-02, -3.064e-02, -1.469e-02, 7.422e-03, 2.195e-02)); + r += mul(s3_8, M4(-2.482e-02, -9.439e-03, 4.244e-03, 1.053e-02, 7.969e-03, 1.956e-02, -2.825e-02, 9.694e-03, 1.038e-02, 3.184e-02, -6.863e-02, 1.815e-03, -6.007e-03, 5.574e-02, -3.238e-02, 1.116e-02)); + r += mul(s4_0, M4(1.183e-02, -3.921e-02, -1.393e-02, 1.735e-02, -1.585e-03, 1.983e-02, -2.314e-02, -6.488e-03, 4.871e-02, -5.115e-02, 1.302e-02, 5.821e-02, -1.611e-02, -2.594e-02, 2.390e-02, -3.653e-03)); + r += mul(s4_1, M4(3.723e-02, 4.528e-04, -8.784e-02, -7.429e-02, 1.899e-02, -2.095e-03, 7.305e-02, -8.574e-02, 4.873e-02, -2.074e-02, -4.965e-02, -2.479e-02, -1.387e-02, 1.443e-02, -3.904e-02, -2.719e-03)); + r += mul(s4_2, M4(2.351e-02, -1.994e-02, -7.496e-02, -7.192e-02, 2.219e-02, -4.016e-02, 1.939e-02, 4.346e-03, 4.140e-02, 9.277e-04, -2.265e-02, -3.892e-02, -8.554e-03, -2.833e-02, 4.008e-02, 4.629e-02)); + r += mul(s4_3, M4(1.208e-01, -3.823e-02, -3.269e-02, -1.286e-03, 4.417e-03, -4.882e-02, 4.111e-02, 4.979e-03, -6.309e-02, -2.078e-02, 3.575e-03, -2.735e-02, -2.922e-02, -1.231e-02, 2.696e-02, -9.421e-03)); + r += mul(s4_4, M4(2.303e-02, -2.025e-02, -2.137e-03, -4.277e-02, 9.204e-02, -9.333e-02, 1.704e-02, 6.432e-02, -1.993e-01, 7.357e-02, -1.341e-01, 1.839e-01, -5.953e-02, -7.500e-02, -7.122e-02, 2.273e-02)); + r += mul(s4_5, M4(5.218e-02, -3.047e-02, -2.291e-01, -1.955e-03, -1.840e-02, 1.907e-02, -1.891e-01, 1.745e-02, 6.113e-03, -1.898e-02, -2.142e-03, -1.337e-02, 9.104e-03, -4.063e-02, 2.113e-01, -2.814e-04)); + r += mul(s4_6, M4(-1.083e-02, -1.247e-03, 8.832e-03, 2.131e-03, 1.051e-02, 5.914e-02, -3.088e-04, -1.683e-02, -8.502e-02, 2.476e-02, 2.931e-02, 1.084e-02, 9.516e-03, 1.040e-02, 1.929e-02, -8.289e-03)); + r += mul(s4_7, M4(5.415e-03, -2.825e-02, -3.766e-03, -2.316e-02, -4.971e-02, 5.587e-02, -4.335e-03, -2.868e-02, -3.055e-01, -4.066e-01, -4.920e-02, 2.395e-02, 1.180e-02, -8.303e-02, -9.242e-02, 3.838e-03)); + r += mul(s4_8, M4(2.608e-02, -3.677e-03, -6.193e-02, -2.307e-02, -1.527e-03, 1.554e-02, 2.510e-02, 1.332e-02, -4.938e-02, 9.703e-02, 6.559e-02, 7.017e-03, -1.648e-02, 1.028e-02, 3.660e-02, 1.336e-02)); + r += mul(s5_0, M4(2.651e-02, 2.183e-02, 2.040e-02, -2.505e-02, -2.446e-03, -1.051e-01, 3.534e-02, -9.687e-03, 3.315e-03, -2.465e-02, 3.093e-03, 4.203e-03, 2.357e-03, -1.258e-03, -5.855e-03, 4.044e-02)); + r += mul(s5_1, M4(3.443e-02, 2.962e-03, -4.624e-02, -1.007e-01, -8.151e-02, 6.946e-02, -9.300e-02, 5.324e-02, -2.258e-02, -2.535e-02, -4.632e-02, -6.499e-03, 5.106e-02, -4.213e-02, 2.573e-02, 1.418e-02)); + r += mul(s5_2, M4(8.939e-03, -1.427e-02, -1.178e-01, 2.562e-02, 4.116e-02, 2.258e-02, -3.821e-02, 1.074e-02, 3.980e-03, -1.391e-02, 1.545e-02, 2.426e-02, 2.410e-02, -4.835e-02, 3.651e-02, 8.455e-02)); + r += mul(s5_3, M4(7.596e-02, -2.409e-01, -1.144e-03, 6.046e-02, -2.197e-02, -2.105e-02, 4.999e-02, -1.540e-02, 5.708e-02, -4.963e-02, 1.332e-02, -1.245e-01, -1.575e-03, -4.965e-02, 1.620e-02, 2.225e-02)); + r += mul(s5_4, M4(1.679e-01, 1.666e-01, 1.548e-01, -1.212e-01, 1.830e-01, -1.489e-01, -4.146e-01, 7.725e-05, 2.168e-01, 6.509e-02, -6.296e-02, 7.612e-02, -1.388e-01, 2.861e-01, -3.157e-02, -3.745e-02)); + r += mul(s5_5, M4(7.487e-02, -2.872e-02, -5.950e-02, 5.356e-04, -7.855e-02, -7.150e-02, -2.891e-01, -4.871e-02, -9.485e-03, 1.422e-03, -6.204e-02, -8.279e-04, -3.348e-02, 1.801e-01, 3.021e-01, 1.198e-01)); + r += mul(s5_6, M4(-1.100e-01, -7.397e-02, -5.533e-02, 1.415e-02, 3.442e-02, -4.303e-02, 1.338e-03, 1.903e-03, 6.188e-03, 3.195e-02, 2.301e-02, 6.367e-03, -1.015e-02, -7.740e-03, 6.780e-03, -1.874e-02)); + r += mul(s5_7, M4(4.114e-02, 1.979e-02, -4.935e-02, 1.276e-04, -3.920e-02, -7.128e-02, -5.296e-02, 2.289e-02, -2.920e-02, 5.082e-02, -1.198e-02, -7.483e-02, 3.892e-02, -1.013e-01, -2.029e-02, 2.929e-02)); + r += mul(s5_8, M4(-6.149e-03, -1.169e-01, 1.167e-02, -1.381e-03, 5.609e-03, 3.120e-02, 4.072e-03, 1.250e-02, 1.698e-02, 3.357e-02, 1.476e-02, -3.236e-02, -1.193e-01, -9.789e-03, 1.314e-01, -2.507e-02)); + r += mul(s6_0, M4(-5.521e-02, 1.141e-01, 1.397e-02, 5.606e-02, -1.255e-02, 7.418e-03, -1.474e-02, 5.248e-02, -1.691e-03, 1.189e-01, -1.471e-02, 1.164e-02, -2.696e-02, -2.463e-02, 1.766e-02, 1.483e-02)); + r += mul(s6_1, M4(-7.491e-02, 6.301e-02, 1.107e-01, -8.152e-02, 2.512e-02, -3.004e-02, 4.235e-02, -7.317e-02, 9.981e-02, -1.609e-02, -6.883e-02, 2.647e-01, 1.224e-02, 3.268e-02, -6.409e-03, -5.662e-02)); + r += mul(s6_2, M4(-6.863e-02, 2.457e-03, 1.028e-01, 6.772e-02, -4.231e-03, -1.520e-02, -6.802e-02, 5.775e-03, 1.304e-02, -6.472e-02, 1.805e-02, 3.022e-02, 2.355e-02, 1.757e-02, -2.309e-02, -1.274e-01)); + r += mul(s6_3, M4(-1.865e-03, 1.707e-02, 5.189e-02, 1.018e-01, 6.379e-02, 4.276e-03, 2.532e-02, -5.459e-03, -1.039e-01, 1.345e-01, -2.931e-02, -2.603e-02, 1.687e-02, 9.416e-02, 3.137e-02, -6.282e-02)); + r += mul(s6_4, M4(2.264e-01, 2.890e-01, 1.939e-01, 6.100e-02, -7.269e-02, -4.193e-02, -6.819e-02, -2.447e-02, -1.979e-01, 3.552e-01, -1.022e-02, -8.885e-02, -5.533e-02, 2.902e-02, -1.629e-01, -7.129e-02)); + r += mul(s6_5, M4(-9.438e-02, -6.314e-02, -1.384e-01, 5.079e-02, 2.769e-02, 2.104e-02, -7.026e-02, 9.677e-03, -5.322e-02, -2.693e-02, -1.211e-03, -2.267e-02, 2.969e-02, 2.716e-02, -2.244e-02, 9.032e-03)); + r += mul(s6_6, M4(-1.249e-01, 1.369e-01, 1.281e-03, -3.176e-02, 1.988e-02, 1.462e-02, 1.571e-02, 4.847e-03, -1.322e-02, -1.532e-02, 3.146e-02, -6.801e-03, 9.289e-02, -6.906e-02, 2.312e-02, 2.619e-02)); + r += mul(s6_7, M4(-2.206e-01, -9.270e-03, 1.605e-01, 5.277e-02, 2.533e-02, -7.370e-03, -1.740e-02, 9.123e-04, 4.191e-02, 3.426e-02, -6.864e-02, -1.019e-03, 1.386e-01, 1.430e-01, 1.063e-02, -2.014e-02)); + r += mul(s6_8, M4(-3.436e-02, 1.079e-01, 6.215e-02, -1.613e-02, -3.978e-02, 2.359e-03, -5.127e-03, 8.879e-03, 3.608e-02, 2.312e-02, -3.583e-02, -9.984e-03, 7.083e-02, 1.000e-02, -6.521e-02, -3.528e-02)); + r += mul(s7_0, M4(5.631e-03, -1.099e-02, 2.470e-02, -1.599e-02, 4.649e-02, 1.875e-02, -9.796e-03, -1.112e-01, 1.249e-02, 2.323e-02, 8.349e-03, -1.331e-02, -2.411e-02, -7.650e-03, 3.156e-02, 8.621e-03)); + r += mul(s7_1, M4(-7.994e-03, -1.512e-03, 1.167e-04, 2.619e-02, 8.130e-02, -4.266e-03, 4.462e-02, -3.094e-01, -1.353e-01, 1.170e-01, -3.701e-02, -1.291e-01, 1.761e-03, 6.650e-05, -5.571e-03, 9.789e-02)); + r += mul(s7_2, M4(8.690e-03, 5.748e-04, 4.321e-03, -1.498e-02, -2.104e-03, -3.877e-02, -6.958e-02, -7.560e-02, 1.514e-02, -3.306e-03, 4.355e-02, -5.623e-02, 1.281e-03, 2.728e-02, -1.032e-02, -5.736e-02)); + r += mul(s7_3, M4(1.114e-02, 2.890e-02, 2.115e-02, -3.360e-02, 2.141e-02, 2.168e-02, -1.822e-02, -3.781e-02, -1.509e-02, -3.306e-02, -2.132e-02, 9.044e-02, -2.197e-02, 2.614e-03, -9.609e-03, 7.045e-02)); + r += mul(s7_4, M4(-1.577e-02, -6.364e-02, -1.450e-01, -2.065e-04, -2.633e-01, -1.561e-01, -6.868e-02, -1.999e-02, -1.703e-02, 2.350e-02, -1.037e-01, 6.314e-02, 1.968e-01, -2.012e-01, -1.368e-01, 4.188e-01)); + r += mul(s7_5, M4(-1.454e-02, 4.459e-02, 5.748e-02, -2.923e-02, -1.995e-02, 6.323e-02, 4.365e-03, 1.299e-01, 3.510e-02, -1.645e-01, -4.284e-02, 7.069e-02, -3.466e-03, 1.841e-02, -5.013e-03, -5.554e-02)); + r += mul(s7_6, M4(5.101e-03, -2.166e-02, 2.334e-02, 2.765e-02, 2.607e-02, 1.243e-01, -2.958e-02, -2.111e-03, -5.475e-04, 1.362e-03, -2.305e-02, -3.104e-02, 8.770e-03, -2.155e-04, 1.348e-03, 1.383e-03)); + r += mul(s7_7, M4(6.813e-02, 3.550e-03, -2.008e-02, -6.561e-03, -5.394e-02, -1.180e-01, 6.716e-02, -2.584e-03, -8.732e-02, 1.699e-02, -2.186e-02, -1.698e-02, 1.753e-01, -3.906e-02, 3.391e-01, 2.130e-02)); + r += mul(s7_8, M4(-2.760e-03, -2.192e-02, 2.986e-02, 2.327e-02, 2.508e-02, 3.758e-02, -2.277e-02, -8.764e-03, 5.948e-02, 3.695e-02, -3.128e-02, -1.495e-02, 7.690e-02, 4.470e-02, 4.537e-02, 6.489e-03)); + r += V4(6.699e-04, -8.118e-04, -2.993e-03, -1.333e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-8.061e-02, -3.221e-02, 3.068e-03, 3.472e-02, 4.412e-02, -1.193e-02, -8.794e-02, -3.656e-02, -1.386e-02, -2.252e-02, -4.376e-04, -5.507e-03, 5.252e-02, 1.102e-02, -1.491e-02, 7.509e-03)); + r += mul(s0_1, M4(2.060e-02, 1.551e-01, -1.293e-01, -2.524e-02, -4.112e-02, 2.800e-03, 5.090e-02, 2.449e-03, 1.466e-01, 2.774e-02, 4.342e-02, 1.754e-02, 2.758e-02, 4.681e-02, 1.196e-03, -2.348e-02)); + r += mul(s0_2, M4(-6.134e-02, -3.286e-02, -1.099e-02, 6.717e-02, -3.991e-02, -4.184e-02, 7.389e-02, 2.607e-02, 3.566e-02, 7.387e-04, -2.247e-02, 8.843e-03, 5.188e-02, 1.185e-02, -1.157e-02, 4.026e-02)); + r += mul(s0_3, M4(-6.452e-02, -8.306e-03, 4.224e-02, -1.468e-02, -1.446e-02, -2.163e-02, 4.541e-02, 7.892e-03, -5.107e-02, -4.796e-02, -3.330e-02, 4.337e-02, -7.062e-03, -7.219e-03, 3.193e-03, -2.538e-02)); + r += mul(s0_4, M4(7.595e-02, 1.609e-01, -2.564e-01, 8.810e-02, 1.163e-02, -8.844e-02, -2.578e-01, -5.648e-02, -2.456e-01, 2.254e-01, 1.372e-01, 8.619e-02, 1.230e-01, -2.682e-02, -1.530e-01, 5.659e-02)); + r += mul(s0_5, M4(-7.065e-02, -9.855e-03, 4.001e-02, 1.496e-02, 4.451e-02, 4.064e-04, 1.064e-01, -4.124e-02, -1.496e-02, -8.857e-02, -8.328e-02, 6.789e-02, -6.524e-04, 2.245e-03, -6.397e-03, 1.306e-02)); + r += mul(s0_6, M4(3.339e-03, 4.529e-02, -1.501e-02, 8.457e-02, -1.466e-02, 1.757e-02, 1.484e-02, -1.484e-02, 5.979e-02, 3.596e-02, -3.776e-02, -6.466e-02, -1.717e-02, -1.267e-02, 1.249e-02, 4.325e-02)); + r += mul(s0_7, M4(-9.548e-02, -2.117e-02, 1.458e-02, -1.303e-02, 6.065e-02, -4.683e-02, 4.435e-03, 1.593e-03, 7.541e-02, 3.867e-02, 8.515e-02, 6.758e-02, -2.376e-02, -1.054e-02, 4.306e-02, -7.642e-02)); + r += mul(s0_8, M4(-6.891e-02, 6.517e-02, 8.366e-02, -5.295e-02, 7.209e-03, 3.726e-02, -2.157e-02, 1.049e-01, 1.034e-02, 5.683e-02, -7.058e-02, 2.181e-02, 7.478e-03, -1.278e-02, -1.492e-02, -3.135e-02)); + r += mul(s1_0, M4(3.248e-03, 2.825e-02, 3.530e-02, 3.137e-03, 4.960e-02, -1.504e-02, -6.241e-02, 1.014e-02, 4.153e-02, -3.616e-02, 7.992e-02, 1.632e-03, -4.343e-02, -9.899e-02, -4.591e-02, 4.862e-03)); + r += mul(s1_1, M4(-8.626e-02, -6.787e-02, -4.015e-02, 1.020e-02, 8.740e-02, 7.088e-02, -2.460e-02, 1.434e-02, 1.836e-01, 3.922e-02, -2.350e-02, -5.132e-04, 3.726e-02, -8.695e-02, -5.849e-02, -1.450e-02)); + r += mul(s1_2, M4(-2.141e-02, -9.575e-02, -6.408e-02, 2.680e-03, -5.232e-02, -4.155e-02, 2.629e-02, 2.581e-02, 7.916e-02, -8.219e-02, -2.804e-02, 4.334e-03, 1.388e-02, 2.954e-02, 1.015e-02, 1.882e-02)); + r += mul(s1_3, M4(-4.166e-02, -4.172e-03, 5.090e-02, 3.572e-02, 5.285e-02, 8.259e-03, 2.529e-03, -2.082e-02, 6.636e-02, -2.586e-02, 4.243e-02, -1.078e-01, -3.849e-03, -1.010e-01, 1.536e-01, 6.421e-02)); + r += mul(s1_4, M4(-6.087e-02, 4.722e-02, 6.591e-02, 1.120e-02, 1.958e-01, -2.862e-02, -1.202e-01, -3.503e-03, 2.528e-02, -2.884e-03, 2.872e-01, 2.683e-01, 2.044e-02, -2.790e-01, -1.796e-01, 8.382e-02)); + r += mul(s1_5, M4(4.732e-02, -5.871e-02, -1.169e-01, -2.031e-02, 2.710e-03, -1.333e-01, 1.421e-01, 7.926e-02, 1.661e-02, -4.483e-02, -8.014e-02, -3.388e-04, 1.691e-02, -4.505e-02, 6.571e-02, -1.348e-02)); + r += mul(s1_6, M4(3.471e-02, 3.378e-02, -8.717e-02, 7.544e-02, -6.288e-02, 2.081e-02, 2.298e-02, -2.993e-03, 4.522e-03, -7.794e-03, -4.982e-03, -3.590e-02, -7.603e-02, -2.180e-02, 1.330e-01, -4.647e-02)); + r += mul(s1_7, M4(2.862e-02, -6.317e-02, -5.481e-02, -1.627e-01, 5.290e-02, 5.915e-04, 6.899e-02, 2.046e-01, 9.586e-02, -2.566e-02, -3.599e-02, 1.280e-01, -4.238e-02, -6.985e-02, 6.564e-02, -2.758e-02)); + r += mul(s1_8, M4(-1.938e-03, 1.242e-02, 3.918e-02, -3.277e-02, 1.170e-02, -1.706e-06, 8.554e-02, -1.090e-01, 2.830e-02, 3.105e-03, -3.886e-02, -6.436e-02, -1.203e-02, -2.878e-02, -5.267e-02, 1.945e-02)); + r += mul(s2_0, M4(5.528e-02, 9.739e-03, 2.004e-03, 3.160e-03, 4.355e-02, 1.221e-03, -8.703e-02, 2.689e-02, -1.351e-02, -1.205e-02, -3.093e-02, -1.370e-02, 7.025e-02, 9.179e-03, -9.626e-04, 6.785e-03)); + r += mul(s2_1, M4(-2.777e-02, -8.242e-03, -1.114e-02, -1.084e-03, -3.786e-02, -1.153e-01, 6.387e-02, 2.035e-02, 6.563e-02, -1.642e-02, -6.511e-02, -2.599e-02, 1.560e-02, 1.179e-01, -9.626e-02, -4.517e-02)); + r += mul(s2_2, M4(-9.169e-03, -2.774e-02, -4.434e-02, -3.142e-02, -2.064e-02, 4.408e-02, -2.709e-02, -4.789e-02, 1.567e-02, 7.772e-03, -2.729e-02, 8.944e-03, -4.433e-02, -6.372e-02, 8.256e-02, 4.168e-02)); + r += mul(s2_3, M4(3.159e-02, 7.792e-02, 4.675e-02, -2.718e-02, 6.998e-02, 7.288e-02, 1.208e-01, -1.726e-02, -1.381e-01, -3.946e-01, 9.947e-02, 4.269e-03, -4.221e-02, -7.491e-04, -1.230e-01, -4.242e-02)); + r += mul(s2_4, M4(2.414e-01, -6.188e-02, 6.128e-02, 1.082e-01, -3.194e-02, 2.273e-01, 6.298e-01, 1.135e-02, 1.151e-01, 2.199e-01, -1.289e-01, -6.079e-02, 4.429e-02, -9.703e-02, -4.727e-03, 3.353e-01)); + r += mul(s2_5, M4(1.498e-02, -4.472e-02, -6.941e-02, 1.081e-02, -9.958e-03, -8.749e-02, 8.206e-02, 9.772e-02, 1.517e-02, -1.153e-02, -1.528e-01, 4.725e-02, -1.250e-02, -4.491e-02, 2.843e-02, -1.356e-01)); + r += mul(s2_6, M4(4.861e-02, -2.473e-02, -2.656e-02, -2.268e-02, -5.055e-04, 4.137e-02, 6.287e-02, 1.033e-01, -2.895e-02, -2.813e-01, -9.524e-02, -9.396e-02, -6.787e-03, 2.429e-02, -6.387e-03, 2.739e-02)); + r += mul(s2_7, M4(7.428e-02, 2.142e-02, -4.112e-02, 1.135e-01, -5.415e-02, 2.890e-02, 3.036e-01, -1.455e-02, 1.582e-01, -3.256e-01, -3.263e-01, -3.641e-02, -2.871e-02, 6.356e-02, -9.071e-02, -1.593e-01)); + r += mul(s2_8, M4(8.001e-02, 2.992e-02, -2.762e-02, 3.298e-02, 3.404e-02, 2.531e-02, -1.675e-01, -3.540e-02, 8.403e-02, 1.865e-02, -6.212e-02, -3.236e-02, 2.719e-02, -2.761e-02, 1.600e-02, 6.571e-02)); + r += mul(s3_0, M4(3.398e-02, -4.895e-04, 1.614e-02, -2.467e-02, 5.071e-02, 6.983e-03, 2.970e-02, 9.543e-03, -9.451e-02, -3.030e-02, 1.116e-02, 9.318e-03, -4.048e-02, -5.480e-02, 1.110e-01, -7.784e-02)); + r += mul(s3_1, M4(-2.600e-02, 8.064e-03, 9.369e-02, 2.616e-02, 5.362e-02, 5.782e-02, -1.067e-01, 5.362e-03, 1.306e-02, -7.885e-03, -6.518e-02, 9.381e-03, 8.515e-02, -7.475e-02, -1.343e-01, -6.213e-02)); + r += mul(s3_2, M4(5.751e-03, 2.538e-02, 1.962e-02, -2.790e-02, -2.377e-02, -4.240e-02, 1.209e-02, 1.314e-02, 2.025e-02, 7.784e-03, -1.837e-02, 5.976e-03, -5.516e-02, -9.428e-02, 1.268e-01, 2.969e-02)); + r += mul(s3_3, M4(-8.950e-02, -8.800e-03, 1.640e-01, 2.897e-03, -3.997e-02, -3.237e-02, 5.528e-02, -6.530e-04, -1.817e-02, -3.339e-02, 9.188e-03, -2.534e-02, -1.597e-01, -8.423e-02, 2.096e-01, -2.920e-02)); + r += mul(s3_4, M4(-1.813e-01, -3.936e-01, 2.434e-01, 6.759e-02, -2.571e-04, -1.128e-01, 7.820e-02, 4.335e-02, 8.014e-02, -5.647e-02, -1.626e-01, -6.703e-02, -1.845e-01, 2.022e-03, -4.692e-01, 3.637e-01)); + r += mul(s3_5, M4(-6.688e-02, -1.025e-01, -5.000e-03, 3.304e-02, 1.616e-02, -1.465e-02, 3.482e-02, -3.793e-02, 3.067e-03, 2.227e-02, -1.094e-01, -4.482e-02, -8.340e-02, -1.492e-01, 1.591e-01, -1.313e-01)); + r += mul(s3_6, M4(3.724e-02, -5.114e-02, -5.431e-02, -1.917e-01, -9.272e-03, 1.720e-02, -1.993e-03, -2.050e-02, -5.101e-02, -2.640e-02, 1.151e-01, -9.473e-02, -2.140e-02, -1.962e-02, 8.209e-02, 9.717e-03)); + r += mul(s3_7, M4(1.382e-01, 1.193e-01, 1.643e-01, -2.855e-03, -2.987e-02, 2.089e-02, 3.535e-02, -6.606e-02, 1.455e-01, -6.018e-02, -8.022e-02, -6.451e-02, -4.068e-02, 1.163e-02, 2.179e-02, -1.519e-01)); + r += mul(s3_8, M4(1.612e-02, 1.713e-02, -2.903e-02, -4.417e-02, -2.605e-03, 1.627e-02, 7.745e-03, -3.383e-02, 3.019e-02, 4.177e-02, -6.136e-02, 1.523e-02, -1.122e-02, -1.083e-02, 7.795e-03, -1.774e-02)); + r += mul(s4_0, M4(8.440e-02, 3.624e-02, -1.217e-01, 2.625e-02, -3.526e-02, 7.012e-04, 1.416e-02, -2.009e-02, -5.261e-03, -3.329e-02, -6.662e-02, 4.990e-03, 6.008e-03, 2.910e-02, 2.946e-03, -5.679e-03)); + r += mul(s4_1, M4(5.815e-02, 3.498e-02, -1.135e-01, 3.051e-02, 2.724e-02, 1.011e-02, -2.088e-02, -5.432e-02, 7.455e-02, 6.235e-02, 1.335e-02, 1.592e-02, 2.837e-02, 5.222e-02, 1.051e-01, -1.813e-02)); + r += mul(s4_2, M4(-1.242e-02, -2.914e-02, -5.577e-02, 1.029e-02, 8.890e-02, 3.141e-02, -1.405e-01, -1.767e-02, 4.576e-02, -1.134e-03, -1.963e-02, 2.131e-02, 1.237e-03, -2.035e-03, 2.491e-02, 1.586e-02)); + r += mul(s4_3, M4(2.664e-02, 2.865e-02, 3.135e-02, 7.597e-02, 3.308e-03, -2.546e-02, 1.542e-02, -1.449e-02, -5.470e-02, 4.758e-02, -4.097e-02, -2.444e-02, -2.671e-02, -5.544e-03, -1.826e-02, -2.656e-02)); + r += mul(s4_4, M4(7.071e-02, 4.333e-02, -1.212e-01, 2.524e-02, 8.774e-02, 3.782e-02, -3.828e-02, 1.047e-02, 3.099e-01, -2.004e-02, -1.837e-02, 7.248e-02, 4.387e-02, -2.911e-02, -1.842e-01, 5.597e-02)); + r += mul(s4_5, M4(8.675e-02, 5.429e-02, -5.555e-02, 3.116e-02, -6.069e-02, 4.293e-02, -6.205e-02, 1.071e-02, 4.696e-03, 1.079e-02, -3.297e-02, 8.385e-02, 3.056e-02, 3.710e-02, -6.256e-02, -3.502e-02)); + r += mul(s4_6, M4(4.186e-02, 2.620e-02, -3.803e-02, -6.781e-03, 2.082e-02, -7.440e-03, -5.115e-02, -4.232e-02, -8.180e-02, -5.506e-02, 1.055e-02, 2.653e-02, -3.003e-02, -2.209e-02, -2.385e-02, -1.866e-02)); + r += mul(s4_7, M4(3.895e-02, -4.673e-02, -1.101e-01, 5.077e-02, 1.139e-02, 1.015e-02, -1.229e-01, -6.620e-04, -2.862e-02, -3.238e-02, -2.642e-01, 2.743e-01, -5.964e-02, 9.554e-03, 1.107e-01, -7.634e-03)); + r += mul(s4_8, M4(9.934e-03, 2.443e-02, -5.030e-02, 4.130e-03, -4.526e-02, -3.115e-02, 2.712e-02, -8.973e-03, -7.820e-02, 1.154e-01, 1.035e-01, -1.474e-01, -6.242e-02, 4.415e-02, 2.875e-02, 8.636e-02)); + r += mul(s5_0, M4(-4.351e-03, 4.779e-02, -6.555e-02, 4.382e-02, 2.463e-02, 2.040e-02, -5.210e-02, 1.533e-02, 1.934e-02, 7.709e-03, -8.085e-03, 1.167e-03, -4.305e-03, 1.138e-02, 5.643e-02, 5.774e-03)); + r += mul(s5_1, M4(-5.602e-02, 1.029e-01, 1.462e-02, 1.601e-02, -1.568e-01, -4.966e-02, -7.733e-02, -4.236e-02, 9.258e-03, 7.679e-03, 7.429e-02, 2.600e-02, 4.029e-03, 2.422e-02, 5.462e-02, -2.416e-02)); + r += mul(s5_2, M4(3.105e-02, 6.461e-02, 7.153e-02, 5.897e-02, -4.944e-02, 3.584e-02, 1.480e-02, -1.872e-02, 1.280e-02, -5.895e-02, -2.277e-02, -1.193e-02, -5.982e-02, -1.221e-01, -1.579e-02, -5.938e-02)); + r += mul(s5_3, M4(1.689e-01, 2.597e-02, -1.514e-01, 9.314e-02, -1.821e-02, 6.212e-02, 2.740e-02, -3.503e-02, 1.068e-01, 6.841e-02, -1.032e-01, 4.724e-02, -3.427e-02, 1.266e-02, -1.638e-02, -3.180e-02)); + r += mul(s5_4, M4(-1.264e-01, -7.195e-02, -1.812e-01, -7.201e-02, 1.049e-02, 1.705e-01, 6.078e-01, -6.711e-02, 1.288e-01, -8.016e-02, -6.572e-02, -2.269e-02, -5.248e-02, -9.859e-02, -5.316e-02, -4.961e-03)); + r += mul(s5_5, M4(-1.837e-02, 7.451e-02, 1.610e-01, 1.456e-01, 2.369e-02, -3.822e-02, 7.146e-02, -2.847e-02, 7.272e-02, 6.773e-02, -2.711e-02, 7.843e-03, 3.587e-02, 1.023e-01, 1.026e-01, -1.263e-01)); + r += mul(s5_6, M4(6.148e-02, 1.107e-01, 3.425e-02, 1.865e-02, 2.918e-02, 1.356e-02, -4.471e-02, 3.983e-02, 5.005e-02, -1.763e-03, -9.191e-02, 7.853e-04, 2.529e-02, -1.449e-03, -2.292e-02, -5.890e-02)); + r += mul(s5_7, M4(1.625e-01, -9.214e-02, -1.719e-01, 1.213e-01, 1.133e-01, -2.186e-02, -5.343e-02, 2.231e-02, -9.590e-02, -8.760e-03, -1.772e-02, 2.935e-02, 6.195e-02, 4.969e-02, -2.584e-03, 6.213e-02)); + r += mul(s5_8, M4(1.340e-02, 4.829e-02, -9.137e-02, 2.154e-01, -4.813e-02, 6.001e-02, -3.136e-02, -1.011e-01, 1.685e-02, -4.554e-02, 9.511e-02, -1.590e-02, -1.125e-01, 4.501e-02, -3.054e-02, 2.447e-01)); + r += mul(s6_0, M4(-1.053e-01, -8.765e-02, 1.532e-01, -4.659e-02, -2.224e-02, -3.997e-02, 3.599e-02, 1.014e-02, -1.010e-01, 5.718e-02, -1.816e-02, -4.517e-02, 1.942e-02, -2.291e-03, 2.359e-02, 7.930e-03)); + r += mul(s6_1, M4(-2.403e-01, -9.120e-02, 4.618e-02, 3.295e-02, 8.804e-02, 9.860e-02, -3.428e-02, -2.947e-02, -2.438e-01, -4.033e-01, -1.964e-01, -2.377e-02, -9.690e-03, 7.185e-02, -4.311e-02, 2.419e-03)); + r += mul(s6_2, M4(-7.526e-02, -5.431e-02, 1.785e-02, -5.604e-02, -8.780e-03, 2.806e-02, -3.138e-02, -2.028e-02, 4.689e-02, -1.410e-01, 6.928e-03, 4.146e-02, 4.977e-02, -6.971e-02, -3.579e-02, 5.402e-03)); + r += mul(s6_3, M4(-8.901e-02, -5.394e-02, 8.191e-02, -5.543e-03, 2.675e-02, -2.194e-03, -2.872e-02, -1.765e-02, 1.043e-02, -8.325e-02, 8.178e-02, 2.385e-02, -1.619e-02, 9.743e-03, -2.077e-04, -2.131e-02)); + r += mul(s6_4, M4(1.222e-01, -2.013e-01, 2.624e-01, -1.382e-01, 5.505e-02, 2.544e-02, -3.265e-02, 6.327e-02, -2.611e-02, 1.162e-01, 1.180e-01, 1.988e-01, -6.963e-02, 3.909e-02, 6.545e-03, 1.319e-01)); + r += mul(s6_5, M4(-9.865e-02, -9.627e-02, 9.023e-02, 7.924e-02, -3.129e-02, -4.344e-02, -3.674e-02, -1.218e-02, -1.191e-02, -2.820e-01, 3.967e-02, -3.797e-02, 5.883e-02, 6.910e-02, -6.787e-02, -3.560e-02)); + r += mul(s6_6, M4(-1.153e-01, -1.903e-02, 1.505e-01, -6.835e-02, -1.764e-02, 9.724e-03, 3.525e-02, -3.964e-02, -1.022e-04, 6.910e-03, 2.362e-02, -3.179e-02, 7.999e-02, 3.299e-02, -5.418e-02, 8.698e-02)); + r += mul(s6_7, M4(-5.041e-02, 4.131e-05, 2.529e-02, 1.773e-01, -1.839e-02, 1.195e-02, 7.255e-02, 5.290e-02, 9.252e-02, -8.837e-03, -2.834e-02, 6.677e-03, 9.508e-02, -7.959e-02, -8.882e-02, -1.404e-01)); + r += mul(s6_8, M4(-4.957e-02, -9.978e-02, 4.352e-02, -1.570e-01, 5.283e-03, -2.421e-02, -4.951e-02, -2.035e-02, 5.054e-02, 1.961e-02, -4.213e-02, -9.692e-02, 2.936e-02, -4.935e-02, 1.523e-02, 4.361e-02)); + r += mul(s7_0, M4(4.602e-02, -1.675e-02, -1.591e-02, -1.113e-02, -7.082e-02, -3.239e-02, -3.778e-02, 4.838e-03, 7.734e-03, 5.217e-03, -3.286e-02, -1.355e-02, 4.276e-02, 3.737e-03, 1.126e-02, -1.884e-02)); + r += mul(s7_1, M4(3.971e-02, 4.977e-02, -7.690e-02, -1.284e-02, 1.546e-01, 1.142e-01, -5.093e-02, -9.530e-03, 2.584e-02, 2.739e-02, 1.305e-01, 8.483e-03, -4.204e-02, 2.041e-02, 2.619e-02, 2.992e-02)); + r += mul(s7_2, M4(-3.270e-02, -3.031e-02, 3.564e-02, -8.750e-04, -2.558e-02, -4.335e-02, -1.101e-01, 2.464e-02, 8.774e-02, 2.588e-02, -2.431e-02, 1.396e-02, 2.774e-02, 7.791e-03, -1.916e-02, -2.248e-02)); + r += mul(s7_3, M4(-3.812e-02, 3.894e-02, -5.644e-02, -5.112e-02, 9.874e-02, 7.690e-02, -1.340e-01, -8.417e-03, 1.940e-02, -1.649e-02, 7.603e-03, 2.279e-02, -3.198e-02, 1.643e-02, 2.743e-02, 4.726e-02)); + r += mul(s7_4, M4(-1.155e-02, 4.982e-03, 1.675e-01, 1.221e-01, 2.341e-01, -1.924e-01, 5.387e-02, -3.978e-02, 4.156e-02, -2.215e-02, -5.457e-02, 2.207e-01, -7.213e-02, 3.254e-01, 4.576e-02, 7.598e-02)); + r += mul(s7_5, M4(3.181e-02, 6.784e-02, -9.888e-02, -5.729e-02, 1.654e-02, 1.320e-01, -1.027e-01, 6.525e-03, -5.506e-02, -7.947e-02, 6.971e-04, 1.817e-02, 1.188e-02, -2.524e-04, 3.319e-02, -4.904e-02)); + r += mul(s7_6, M4(1.565e-02, -2.185e-02, -1.770e-02, 1.763e-02, 3.587e-02, -5.422e-02, -4.322e-02, -4.995e-02, -6.359e-03, 9.156e-03, 6.813e-02, 1.967e-02, -4.337e-02, -5.229e-02, 9.372e-02, 5.021e-02)); + r += mul(s7_7, M4(-3.703e-03, -1.231e-03, 6.089e-02, -3.712e-02, -7.186e-02, 2.285e-02, 2.142e-02, -2.935e-02, 3.421e-02, 4.537e-02, -1.223e-01, -2.549e-02, -6.228e-04, 7.104e-02, -1.317e-02, 8.456e-02)); + r += mul(s7_8, M4(-1.091e-02, -2.291e-02, -3.018e-02, 6.189e-02, -2.207e-02, 1.936e-02, 4.075e-02, -2.174e-02, -8.365e-03, 1.849e-02, -3.259e-03, 1.701e-02, 1.962e-02, -2.471e-02, 4.502e-02, -5.257e-02)); + r += V4(2.322e-03, -2.560e-03, 2.890e-03, 2.624e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-6.769e-02, 3.430e-02, 2.986e-02, 7.093e-03, -3.817e-02, -3.516e-02, 2.524e-03, 1.512e-02, -5.755e-02, -1.135e-02, 2.727e-02, -1.513e-02, -4.468e-02, 7.272e-02, -3.816e-03, 6.695e-02)); + r += mul(s0_1, M4(1.413e-01, -7.748e-02, -2.507e-02, 3.006e-02, -4.637e-02, 1.332e-02, 8.965e-03, 2.699e-02, 1.203e-01, 9.388e-02, 1.500e-01, 1.828e-02, 6.282e-02, -3.408e-02, 4.916e-02, -2.253e-03)); + r += mul(s0_2, M4(-8.667e-02, 7.218e-02, 2.209e-02, -2.577e-02, 3.357e-02, 3.247e-02, -4.440e-02, -2.082e-02, -4.160e-02, 2.560e-02, 3.797e-02, 6.389e-03, -2.219e-02, 4.342e-02, 3.651e-02, 9.668e-03)); + r += mul(s0_3, M4(-1.324e-02, 4.538e-02, -3.497e-02, -4.271e-02, 1.946e-02, -1.026e-02, -2.813e-02, 1.558e-02, -4.352e-02, -8.227e-02, 4.169e-02, 4.614e-03, -3.101e-02, -1.294e-02, 7.845e-02, 8.703e-02)); + r += mul(s0_4, M4(-1.333e-01, 1.681e-02, -3.979e-01, 1.636e-01, -2.077e-02, 9.604e-02, -1.053e-02, 1.743e-01, -7.553e-03, 3.166e-01, 4.240e-01, -7.433e-02, -1.270e-03, 3.921e-02, -9.935e-02, 5.428e-02)); + r += mul(s0_5, M4(8.747e-03, 1.770e-01, -3.575e-01, -1.804e-02, 2.260e-02, -3.461e-02, 3.119e-03, -4.722e-02, -5.255e-02, 2.294e-03, 3.706e-02, 3.062e-02, 1.580e-03, -2.325e-02, -6.594e-02, -8.279e-04)); + r += mul(s0_6, M4(4.588e-02, 1.887e-02, 8.961e-02, 1.174e-02, 3.929e-02, -2.709e-02, 1.732e-02, -3.915e-02, -2.924e-02, -4.258e-03, -1.060e-02, 4.145e-02, 4.382e-02, 2.807e-02, -2.511e-02, -4.608e-02)); + r += mul(s0_7, M4(-3.458e-02, 8.814e-02, -9.911e-02, -9.226e-02, -1.297e-02, -5.636e-02, -3.204e-04, 1.668e-01, 2.376e-03, -1.050e-02, -4.832e-02, -9.028e-02, 1.056e-02, -5.040e-03, 6.895e-02, 1.722e-02)); + r += mul(s0_8, M4(1.385e-02, -1.727e-02, 3.000e-02, -2.268e-02, -7.698e-03, 4.236e-02, 1.831e-02, -2.752e-02, -1.781e-02, 1.875e-02, 6.718e-02, 4.487e-02, 8.638e-03, -4.369e-03, 2.690e-02, 1.162e-02)); + r += mul(s1_0, M4(9.663e-04, 6.421e-02, 2.348e-03, 3.662e-02, -6.243e-02, -4.922e-03, -1.718e-02, 1.109e-02, 3.626e-02, 6.479e-02, 1.096e-02, 5.092e-02, 2.318e-01, -8.631e-03, -1.290e-01, 1.068e-01)); + r += mul(s1_1, M4(1.098e-01, 3.674e-02, -3.759e-02, 9.372e-03, -1.836e-02, -8.044e-02, -1.933e-02, -1.283e-03, 2.495e-01, -7.519e-03, 1.665e-01, -6.518e-02, 6.714e-02, -7.408e-02, 8.451e-02, -4.340e-03)); + r += mul(s1_2, M4(-1.593e-02, -8.483e-02, 6.648e-02, -4.250e-03, 3.890e-02, 5.946e-02, -2.123e-02, 5.192e-04, -3.893e-02, 2.275e-03, 3.886e-02, 2.058e-02, 7.988e-03, 3.680e-02, -7.176e-03, 3.505e-03)); + r += mul(s1_3, M4(3.470e-02, -1.060e-02, 9.678e-02, 5.614e-02, 6.471e-02, 1.664e-02, -6.896e-02, -1.872e-01, 4.002e-03, -5.795e-02, -2.086e-02, -1.295e-01, 3.125e-01, 3.731e-02, 2.252e-01, 1.431e-03)); + r += mul(s1_4, M4(-3.841e-02, -7.289e-02, -1.620e-01, -3.709e-03, 2.840e-02, -9.839e-02, -2.641e-01, 9.547e-02, 4.226e-02, -1.201e-01, -1.102e-01, -1.398e-01, 1.233e-01, 9.042e-02, 1.528e-01, -1.018e-01)); + r += mul(s1_5, M4(2.895e-02, 3.205e-02, -9.373e-02, 2.594e-02, -1.824e-02, 9.412e-02, -1.096e-01, -4.941e-02, -4.628e-02, 3.342e-02, 3.307e-02, 2.623e-02, -3.789e-02, -8.026e-02, 7.834e-02, 1.145e-02)); + r += mul(s1_6, M4(1.077e-02, 6.507e-02, 9.357e-02, 5.691e-03, 2.728e-02, 1.283e-02, 2.645e-02, -2.391e-02, -1.328e-02, 1.523e-02, 1.871e-02, -4.346e-04, 2.688e-02, -1.068e-02, 3.216e-02, -8.827e-02)); + r += mul(s1_7, M4(4.487e-02, -4.437e-02, -2.437e-02, 5.312e-03, 1.198e-02, 3.746e-02, 2.767e-02, -6.274e-02, 4.431e-02, -3.679e-02, -1.600e-02, -3.319e-02, -7.497e-03, -9.400e-02, -5.401e-02, -5.201e-03)); + r += mul(s1_8, M4(4.668e-03, -3.940e-02, 4.642e-02, -1.533e-03, 7.793e-04, -9.078e-02, -2.667e-02, -1.779e-02, -8.360e-03, 3.010e-03, 3.381e-02, 3.870e-02, -6.294e-03, 1.986e-02, -1.353e-03, 1.982e-02)); + r += mul(s2_0, M4(-1.493e-02, 3.922e-02, -1.123e-02, 3.228e-02, 3.481e-02, -8.864e-02, -3.402e-02, -2.980e-02, -1.228e-03, -1.155e-02, -5.129e-03, -6.118e-04, -6.707e-03, 1.068e-01, 1.294e-01, 9.454e-02)); + r += mul(s2_1, M4(6.473e-02, 1.190e-02, 9.143e-03, -1.535e-02, -2.666e-01, 2.390e-02, -4.825e-02, -1.338e-02, 2.362e-02, -4.216e-02, 2.073e-03, 1.623e-02, 3.649e-02, -5.048e-02, 1.218e-01, -1.258e-01)); + r += mul(s2_2, M4(-1.461e-02, -2.262e-02, 1.377e-02, 8.270e-03, -1.703e-02, -3.705e-02, 3.457e-03, 7.981e-03, 9.442e-03, -3.095e-02, 1.217e-02, 1.122e-02, -6.379e-02, 5.288e-02, -4.319e-02, -1.166e-02)); + r += mul(s2_3, M4(6.899e-02, 8.014e-02, 1.904e-02, 1.901e-01, -6.712e-02, 9.053e-02, 6.660e-02, -1.686e-01, 1.100e-01, 2.037e-02, 1.298e-02, 1.362e-01, -3.602e-02, -1.374e-01, -4.468e-02, -4.311e-02)); + r += mul(s2_4, M4(-1.657e-01, -4.958e-02, 6.990e-02, -1.729e-01, 1.499e-01, 3.929e-02, 3.195e-01, -1.191e-01, 7.422e-02, -5.251e-04, 1.518e-01, -5.510e-03, 5.051e-02, 1.482e-01, -6.150e-02, -4.369e-02)); + r += mul(s2_5, M4(-4.362e-03, -2.337e-02, -5.092e-02, 9.135e-03, -3.870e-02, 2.134e-02, 1.440e-02, 3.260e-03, -4.886e-02, -7.149e-02, -5.947e-03, 1.620e-02, 1.830e-02, -6.629e-02, 2.087e-02, 1.329e-02)); + r += mul(s2_6, M4(2.216e-02, -4.352e-02, -2.959e-02, 4.755e-02, 2.413e-02, -2.525e-03, -4.118e-02, -7.323e-02, 8.579e-03, -3.825e-02, -6.078e-02, -9.820e-02, 7.013e-03, 1.992e-02, -6.306e-02, -2.990e-02)); + r += mul(s2_7, M4(-1.876e-03, 1.483e-02, 8.292e-02, -7.048e-02, -5.165e-02, 1.388e-02, -2.538e-02, -4.672e-02, -1.392e-02, 6.786e-02, 1.279e-01, 5.121e-02, 6.889e-03, 3.214e-02, 3.471e-02, 5.284e-02)); + r += mul(s2_8, M4(3.736e-03, -1.720e-02, 3.858e-02, 1.053e-02, 9.189e-03, -5.003e-03, -3.432e-02, 3.109e-02, 2.986e-02, -5.438e-04, -3.111e-02, 3.517e-02, 1.780e-02, -4.259e-04, -1.356e-03, -2.237e-02)); + r += mul(s3_0, M4(-1.569e-02, -1.265e-02, -1.442e-02, 1.176e-02, 6.370e-02, 1.240e-02, 2.896e-03, 7.663e-02, 3.791e-02, -3.755e-02, -5.019e-02, -1.640e-02, -3.169e-02, 1.265e-01, -3.333e-02, 8.391e-02)); + r += mul(s3_1, M4(2.693e-02, 9.806e-02, 8.333e-03, -4.201e-02, -2.995e-02, -2.224e-02, 3.885e-02, -3.163e-02, -7.559e-03, -7.851e-02, 5.818e-03, -2.593e-02, 2.332e-01, -2.682e-01, -3.702e-01, -7.207e-02)); + r += mul(s3_2, M4(1.338e-02, -9.557e-03, 4.011e-02, 1.176e-02, -2.853e-02, 2.508e-02, 3.737e-02, 2.593e-03, -8.633e-03, -2.363e-02, 1.359e-02, 3.321e-03, -1.082e-02, 2.184e-02, -1.364e-01, -1.985e-02)); + r += mul(s3_3, M4(7.322e-02, 6.110e-02, -6.938e-02, 1.362e-01, -2.306e-02, -1.182e-02, 4.187e-02, -9.458e-02, 4.396e-02, -3.955e-02, 2.733e-02, -4.115e-02, 4.264e-02, -1.599e-01, -2.753e-02, 6.181e-02)); + r += mul(s3_4, M4(-1.348e-01, -4.207e-02, 2.851e-02, -1.848e-01, 5.648e-02, 7.481e-04, -7.185e-02, 8.991e-02, -2.557e-02, -1.986e-02, -5.941e-03, 1.308e-02, 4.504e-02, 4.393e-01, 3.722e-01, 7.878e-03)); + r += mul(s3_5, M4(-3.470e-02, -1.129e-02, 3.449e-02, -2.631e-02, 3.715e-03, 3.386e-03, -5.315e-02, -6.027e-03, 1.135e-02, -7.620e-02, 2.472e-03, -1.909e-02, 6.653e-02, -9.478e-02, -1.071e-01, -2.663e-03)); + r += mul(s3_6, M4(2.318e-04, -4.771e-02, -2.045e-02, -1.086e-02, 4.472e-03, -4.367e-02, -1.824e-02, -6.762e-02, 6.207e-02, -5.995e-02, -6.440e-02, -1.439e-01, 2.135e-02, -4.208e-02, -9.296e-02, -8.298e-02)); + r += mul(s3_7, M4(2.881e-02, -1.097e-01, 1.400e-02, -1.062e-01, 1.823e-02, -7.184e-04, 6.146e-02, 3.571e-02, 1.869e-02, -1.228e-01, 3.933e-02, 1.774e-02, -7.991e-03, -6.415e-02, -7.725e-02, -5.734e-02)); + r += mul(s3_8, M4(-1.673e-02, 6.017e-03, -1.099e-02, 9.427e-03, 7.090e-03, -1.586e-02, -8.811e-03, 7.162e-03, -1.503e-03, 1.245e-02, 8.584e-02, 2.865e-02, 1.389e-02, -3.085e-02, -6.177e-03, -2.121e-02)); + r += mul(s4_0, M4(3.493e-03, -2.886e-03, 4.468e-03, -9.162e-03, -3.429e-02, 1.533e-02, -1.977e-02, 1.296e-02, -7.195e-02, -2.864e-02, 1.670e-02, 2.819e-03, 3.964e-02, 6.730e-03, 4.604e-03, -4.440e-03)); + r += mul(s4_1, M4(7.038e-02, -5.105e-02, 3.612e-02, 1.737e-03, 8.003e-03, -3.328e-03, 6.301e-02, 8.136e-02, 5.501e-02, 2.321e-02, 3.147e-02, 4.815e-02, 6.608e-02, -2.219e-02, -2.320e-02, 5.550e-03)); + r += mul(s4_2, M4(-5.361e-02, 6.124e-03, 6.197e-02, 1.861e-02, 5.837e-03, -2.258e-02, 2.421e-02, 2.352e-02, 9.489e-03, -2.379e-02, 2.291e-02, -9.015e-03, 1.667e-03, 5.427e-02, -5.367e-03, -1.688e-03)); + r += mul(s4_3, M4(-2.280e-03, 2.210e-02, 6.070e-02, 6.741e-02, 3.232e-02, -3.528e-02, -3.783e-02, 7.012e-02, 9.307e-02, 2.999e-02, -3.325e-02, 2.233e-02, -7.461e-03, 3.415e-02, 2.672e-02, -7.837e-02)); + r += mul(s4_4, M4(-8.615e-02, 1.665e-01, 2.015e-01, -1.366e-02, -2.904e-03, -9.961e-02, -1.997e-02, -2.561e-02, -8.201e-02, -2.483e-02, -2.690e-01, 4.378e-02, -6.957e-02, -7.747e-02, -5.177e-02, 1.053e-01)); + r += mul(s4_5, M4(-3.442e-02, 2.996e-02, 1.743e-02, 1.939e-02, 2.183e-02, 1.201e-02, -3.384e-03, 4.529e-02, 3.883e-02, 1.622e-02, -1.191e-01, 2.433e-02, -3.534e-03, 1.045e-01, 4.727e-02, 3.186e-02)); + r += mul(s4_6, M4(-3.222e-02, 7.750e-03, 1.940e-02, -9.354e-03, -2.196e-02, -1.724e-02, 3.045e-02, 5.532e-03, 6.117e-02, -7.738e-02, -9.238e-02, -6.940e-03, 2.963e-02, -3.707e-02, -2.099e-02, -5.924e-02)); + r += mul(s4_7, M4(2.275e-02, -3.647e-02, 7.601e-04, 4.326e-02, 4.676e-02, -2.737e-02, 1.456e-03, 4.308e-02, 2.049e-02, 1.226e-01, -7.456e-02, 1.435e-01, -3.338e-02, 3.721e-02, 2.961e-02, 1.217e-01)); + r += mul(s4_8, M4(-2.384e-02, 6.714e-02, 7.770e-02, 4.065e-02, 7.466e-03, 1.779e-02, -2.825e-02, -2.042e-02, -6.037e-03, -1.580e-02, -1.190e-01, -3.517e-02, 1.130e-02, 2.295e-02, -7.750e-02, -3.106e-02)); + r += mul(s5_0, M4(-4.721e-02, -5.382e-02, 4.564e-02, -6.371e-02, -5.454e-02, -5.059e-02, 9.380e-03, -2.820e-03, 2.496e-02, 1.014e-02, -2.499e-02, -1.110e-02, -1.481e-02, 1.522e-02, -1.918e-02, 1.641e-02)); + r += mul(s5_1, M4(-4.790e-01, 1.294e-01, 2.610e-01, -5.945e-02, -8.980e-02, 1.320e-02, -2.247e-02, -4.364e-02, 6.614e-02, 6.913e-02, -1.607e-02, 2.799e-02, -3.813e-02, 7.105e-02, 7.943e-02, 6.225e-02)); + r += mul(s5_2, M4(-6.457e-02, 1.694e-01, -3.872e-02, 4.488e-02, -4.822e-02, -8.567e-02, 7.933e-02, 1.024e-02, 1.063e-03, -4.451e-02, -1.357e-02, 7.988e-03, 1.240e-02, 5.276e-02, 3.980e-02, 6.461e-02)); + r += mul(s5_3, M4(-1.620e-01, 1.235e-01, 3.256e-02, 5.695e-02, 4.469e-02, -8.958e-02, 2.359e-03, -6.046e-02, 2.983e-02, -3.023e-03, 6.335e-02, 1.317e-02, 3.527e-02, -1.933e-02, -3.699e-02, 6.653e-02)); + r += mul(s5_4, M4(1.232e-01, 4.089e-01, 1.239e-01, 1.234e-01, 1.253e-02, 3.016e-02, 5.864e-02, -1.790e-01, -8.816e-02, 3.547e-02, 1.620e-01, 6.102e-03, -5.373e-02, -1.860e-01, -1.284e-01, 6.518e-02)); + r += mul(s5_5, M4(-4.312e-02, 2.143e-03, 1.073e-01, 5.873e-03, 2.948e-02, 3.319e-02, -7.224e-02, 1.756e-02, 6.773e-02, 2.521e-02, 1.380e-02, 4.037e-02, 3.288e-02, -1.908e-01, -1.050e-01, 3.035e-02)); + r += mul(s5_6, M4(-5.461e-02, 9.170e-02, 6.992e-02, 9.781e-02, -9.620e-03, 2.436e-02, 4.409e-02, -9.482e-02, 1.359e-03, -2.669e-02, -1.697e-02, -3.783e-02, -2.329e-02, -1.386e-02, -1.895e-02, 1.972e-02)); + r += mul(s5_7, M4(-4.409e-02, -2.055e-02, -1.209e-01, 5.981e-02, -4.746e-02, -9.839e-02, -8.667e-02, -3.684e-04, 3.992e-02, 4.212e-02, -2.215e-02, -2.174e-02, 1.628e-02, 2.727e-02, 1.283e-02, 1.170e-01)); + r += mul(s5_8, M4(-3.285e-02, 1.315e-01, 5.991e-02, 3.093e-02, 1.831e-02, -2.701e-02, -9.941e-03, -1.247e-02, -1.884e-02, -3.038e-02, 1.778e-02, -1.534e-02, 8.890e-03, 5.855e-02, -5.228e-03, 7.156e-03)); + r += mul(s6_0, M4(9.490e-02, -1.496e-02, -8.049e-02, -4.893e-02, -4.609e-02, 3.691e-02, 1.122e-02, 1.672e-02, 1.273e-01, 2.604e-02, -3.030e-02, 3.724e-02, -9.443e-03, 9.186e-03, -1.887e-02, 1.724e-02)); + r += mul(s6_1, M4(3.599e-02, 1.217e-02, -5.710e-02, -3.505e-02, 2.689e-02, -1.739e-03, 8.030e-02, 1.825e-02, 1.923e-01, -1.211e-01, -1.559e-02, -1.202e-02, 4.499e-02, -2.829e-02, -1.283e-02, -2.598e-02)); + r += mul(s6_2, M4(8.338e-02, -6.812e-02, -4.737e-02, -2.143e-02, -1.269e-02, -2.670e-03, -2.229e-02, 6.926e-04, 9.799e-03, -2.381e-02, -1.008e-02, 2.220e-02, -1.786e-02, -5.033e-02, 2.973e-02, -6.898e-03)); + r += mul(s6_3, M4(1.293e-01, -1.263e-01, -1.300e-01, 3.399e-02, -3.571e-02, -3.489e-03, 4.969e-02, -6.396e-04, -1.049e-01, -6.914e-02, -6.059e-02, -3.406e-02, -4.433e-02, -3.107e-02, 1.599e-02, -9.359e-02)); + r += mul(s6_4, M4(-1.572e-02, -4.111e-02, 1.207e-01, -2.561e-01, 8.708e-02, 5.039e-02, 5.332e-02, -7.156e-02, 1.751e-01, -9.601e-02, 8.257e-03, -2.388e-01, -1.028e-02, 5.476e-03, -1.537e-01, 4.572e-02)); + r += mul(s6_5, M4(5.549e-02, -1.367e-02, -6.034e-02, 2.235e-02, -2.252e-02, -5.300e-02, 4.816e-02, 2.981e-02, -1.995e-02, -1.877e-02, -1.219e-01, 1.896e-02, 7.559e-02, -1.734e-02, 1.740e-02, -9.721e-03)); + r += mul(s6_6, M4(5.600e-03, -4.349e-02, -3.722e-03, -1.021e-02, -4.450e-03, -1.587e-02, -3.889e-02, -3.046e-02, -2.679e-02, 1.161e-02, -1.043e-02, -2.748e-02, -1.829e-02, 5.464e-02, 6.469e-02, 9.752e-03)); + r += mul(s6_7, M4(8.553e-02, 1.512e-02, -1.313e-01, -1.637e-01, 8.015e-03, 2.808e-02, 7.920e-02, -1.984e-02, 1.717e-02, -6.763e-02, -8.202e-02, -3.641e-02, 5.455e-02, -8.316e-02, -1.613e-01, -8.608e-03)); + r += mul(s6_8, M4(2.581e-02, -9.600e-02, -3.212e-02, 4.638e-03, 1.617e-02, 3.466e-03, -2.201e-02, 1.713e-02, 2.723e-03, -6.574e-03, 3.141e-02, 2.810e-02, -5.310e-02, 1.756e-02, 1.155e-01, -9.499e-03)); + r += mul(s7_0, M4(-5.505e-02, -2.272e-02, -2.377e-03, 1.201e-02, -1.914e-01, 1.033e-02, 9.063e-02, -2.243e-02, -7.919e-02, 4.620e-02, 6.766e-02, -7.420e-02, 1.656e-03, -3.636e-02, -3.654e-02, -1.298e-03)); + r += mul(s7_1, M4(2.952e-02, -2.152e-02, -1.487e-02, -2.337e-02, 2.447e-02, -4.213e-02, 1.234e-01, 5.147e-03, 2.335e-01, -1.621e-01, -1.609e-01, -5.706e-02, -7.251e-02, 3.109e-02, 1.041e-02, -2.281e-02)); + r += mul(s7_2, M4(-1.849e-02, 2.297e-02, 2.471e-02, 4.394e-03, -7.618e-02, 2.530e-02, 1.500e-02, 5.996e-03, -5.243e-02, -6.869e-03, 2.549e-02, 6.482e-03, -2.084e-03, -2.925e-02, 1.508e-03, -1.377e-02)); + r += mul(s7_3, M4(2.360e-02, -2.729e-02, -3.317e-03, -1.427e-02, 2.136e-01, 8.746e-02, 6.818e-02, -6.063e-02, -4.529e-02, 2.893e-02, -8.883e-03, -2.803e-02, 5.528e-02, 3.423e-02, -1.193e-02, -1.598e-01)); + r += mul(s7_4, M4(-4.718e-02, 1.455e-01, 6.738e-02, 7.756e-02, -1.693e-01, -1.121e-01, -4.263e-01, 6.029e-02, 2.845e-02, -1.797e-02, -7.025e-02, -1.174e-01, -2.348e-01, -1.115e-02, -1.892e-02, -1.518e-02)); + r += mul(s7_5, M4(2.243e-02, -7.424e-02, -3.931e-02, -2.403e-02, 6.038e-02, 1.797e-02, -1.744e-01, 1.963e-02, -4.382e-02, 9.405e-02, 1.894e-02, 5.672e-02, 1.975e-02, 7.642e-02, 2.796e-02, -8.300e-03)); + r += mul(s7_6, M4(-9.019e-03, 9.798e-03, -4.330e-03, -1.891e-02, 1.371e-02, -6.128e-02, -3.102e-02, -1.832e-03, -5.050e-02, -1.091e-02, 1.292e-02, 1.969e-02, -1.757e-02, -6.372e-02, -3.461e-02, 2.646e-02)); + r += mul(s7_7, M4(4.840e-03, -1.643e-02, -1.586e-02, 1.015e-02, 1.271e-02, 1.577e-02, 6.421e-02, -5.505e-02, 2.839e-02, 3.304e-02, 1.282e-02, -3.135e-02, 9.328e-03, -3.226e-02, -1.949e-01, 6.956e-02)); + r += mul(s7_8, M4(-6.982e-03, 1.910e-02, 1.303e-02, 1.504e-03, 1.284e-02, -1.884e-02, 9.418e-02, 4.628e-02, -1.214e-02, 2.500e-02, 6.745e-02, 4.770e-02, -1.645e-02, 2.692e-02, 3.056e-02, -7.823e-03)); + r += V4(-2.282e-03, 2.426e-03, 3.717e-03, 9.910e-04); + return r; +} + +void Pass17(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 18 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0, t1, t2, t3 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(6.390e-02, 2.774e-02, -1.825e-02, -3.381e-02, 5.142e-03, 1.910e-02, -1.152e-02, -1.715e-02, 4.162e-02, -2.385e-02, -1.464e-02, 2.865e-02, 7.757e-02, -3.907e-02, -1.001e-02, 3.856e-02)); + r += mul(s0_1, M4(1.028e-02, 1.211e-02, 1.998e-02, 2.118e-02, 4.222e-02, -2.634e-02, -2.654e-02, 1.277e-02, 2.783e-03, -3.517e-03, 9.475e-03, -5.885e-02, 6.169e-02, 2.678e-02, 7.086e-03, -6.372e-02)); + r += mul(s0_2, M4(-6.586e-03, 3.657e-02, 2.724e-03, 4.726e-04, 5.419e-03, 3.249e-02, 5.477e-03, -3.069e-02, -1.690e-02, 5.874e-03, 1.759e-03, -2.633e-03, -1.629e-02, 5.009e-02, 5.186e-04, 1.089e-02)); + r += mul(s0_3, M4(-1.309e-01, 6.016e-02, -2.002e-01, 7.875e-02, -3.259e-02, 8.804e-03, -7.355e-04, 3.578e-02, -3.214e-02, 3.275e-03, 2.240e-02, -3.278e-02, 9.548e-02, 3.186e-02, 7.834e-02, -1.996e-02)); + r += mul(s0_4, M4(1.598e-01, -5.578e-01, 5.417e-02, -2.931e-02, -1.627e-01, -5.194e-02, 1.829e-01, -1.190e-02, 2.025e-01, -4.635e-03, -3.636e-02, 4.761e-02, -3.775e-01, 3.522e-01, 7.375e-02, 1.011e-01)); + r += mul(s0_5, M4(-4.034e-03, 3.094e-02, -1.570e-02, 5.374e-03, 3.153e-03, 3.226e-02, 1.686e-02, 1.480e-01, -8.735e-03, 8.374e-02, 2.216e-02, 7.613e-02, 1.636e-02, -8.414e-02, 2.502e-02, -5.713e-02)); + r += mul(s0_6, M4(5.457e-02, -9.016e-03, 1.209e-01, -1.264e-02, 1.254e-02, 1.198e-02, 3.556e-02, -9.702e-03, -3.455e-02, -8.410e-03, -7.007e-02, -4.583e-03, 3.383e-05, -3.308e-02, -2.099e-02, 2.184e-02)); + r += mul(s0_7, M4(-5.539e-02, 9.357e-02, -1.596e-01, 4.695e-03, 7.270e-02, -9.424e-03, -1.462e-01, 5.083e-03, -9.984e-02, -2.601e-02, 4.360e-02, -8.520e-02, 3.265e-02, -7.759e-02, -1.672e-01, -2.050e-01)); + r += mul(s0_8, M4(-2.128e-02, 3.748e-02, 1.617e-03, 4.106e-02, 8.666e-03, -6.636e-03, -4.336e-03, -8.958e-02, 5.448e-03, -8.355e-03, -5.292e-04, 2.481e-02, -5.910e-02, 3.845e-03, 2.578e-02, 8.474e-03)); + r += mul(s1_0, M4(4.021e-02, 2.152e-02, -1.957e-02, -1.908e-02, 9.527e-03, 1.594e-02, 3.373e-04, -1.741e-02, 5.456e-02, -1.393e-02, -1.739e-02, 3.796e-02, 8.570e-02, -4.315e-02, -1.061e-02, 3.398e-02)); + r += mul(s1_1, M4(9.609e-03, -1.568e-02, 3.150e-02, 2.950e-02, 7.791e-03, -2.504e-02, -1.304e-02, -6.383e-04, 1.061e-01, -1.333e-01, 3.358e-02, -6.262e-02, 1.793e-02, 7.228e-02, 1.766e-02, -5.212e-02)); + r += mul(s1_2, M4(-8.435e-05, 2.842e-02, -4.049e-04, 3.577e-03, -4.550e-03, 9.252e-02, 8.368e-03, -4.986e-02, -2.765e-02, 2.857e-02, -1.321e-02, 7.323e-03, -1.438e-02, 1.786e-02, -1.333e-03, 7.861e-03)); + r += mul(s1_3, M4(-1.434e-01, 1.037e-02, -6.555e-02, 6.823e-02, -4.099e-02, 3.701e-03, -6.747e-03, 3.308e-02, -1.705e-02, 1.096e-02, 3.467e-02, -1.692e-02, 7.100e-02, 4.063e-02, 1.319e-01, -2.094e-02)); + r += mul(s1_4, M4(-1.017e-01, -1.706e-01, 2.838e-02, -1.911e-01, -1.702e-01, 1.835e-02, 4.517e-02, 4.334e-02, 4.174e-01, -1.962e-01, 3.960e-01, -3.778e-01, -4.905e-02, 2.323e-02, -7.309e-02, 1.372e-01)); + r += mul(s1_5, M4(2.471e-03, 2.260e-02, -1.895e-02, 1.619e-02, 9.813e-02, -6.199e-02, 2.035e-02, 2.360e-01, -2.213e-02, 4.914e-02, 4.391e-04, 9.992e-02, -7.150e-03, -8.029e-02, 4.005e-02, -2.034e-02)); + r += mul(s1_6, M4(5.574e-02, -1.455e-03, 3.519e-02, -1.652e-02, 8.706e-03, 8.148e-03, 3.186e-02, -1.569e-02, -3.709e-02, -6.221e-03, -7.016e-02, 5.356e-04, -1.886e-03, -1.107e-02, -7.914e-02, 2.951e-02)); + r += mul(s1_7, M4(2.909e-02, 9.775e-03, -5.673e-02, -5.728e-03, 6.949e-02, -8.710e-03, -1.373e-01, 2.303e-02, -8.227e-02, -3.188e-02, 5.025e-02, -1.153e-01, -1.050e-02, -7.427e-03, -7.233e-02, -1.673e-01)); + r += mul(s1_8, M4(-7.709e-03, 3.031e-02, 4.567e-03, 1.352e-02, 5.188e-02, -2.272e-02, 8.991e-02, -1.821e-01, 8.971e-03, -5.960e-04, -1.277e-03, 7.427e-03, -6.498e-02, 1.294e-02, 1.555e-02, -1.687e-02)); + r += mul(s2_0, M4(1.187e-02, -2.125e-02, -1.424e-02, 9.688e-03, 2.789e-02, 3.693e-03, 3.572e-03, -3.632e-02, -7.399e-02, 1.812e-02, -1.472e-03, -8.793e-03, 1.395e-02, -2.879e-03, 7.715e-03, 3.446e-03)); + r += mul(s2_1, M4(4.279e-02, -8.375e-02, -6.323e-02, -2.238e-02, -2.822e-01, 7.203e-02, 5.882e-02, 1.752e-02, -5.393e-02, 1.155e-01, -4.163e-02, -5.853e-02, -3.211e-02, 1.291e-02, -4.688e-03, -9.039e-03)); + r += mul(s2_2, M4(-7.677e-02, -2.722e-02, 1.059e-02, -4.523e-02, 3.556e-02, -5.189e-03, -9.061e-03, 1.751e-02, -2.031e-03, -9.133e-03, 3.850e-03, -1.774e-02, 4.552e-03, -9.488e-03, 3.512e-03, 4.578e-03)); + r += mul(s2_3, M4(2.615e-02, -3.477e-03, -1.008e-02, -1.526e-02, -3.863e-02, -2.732e-03, 3.901e-03, 1.203e-02, -2.661e-01, -2.898e-02, -3.043e-01, 1.183e-02, -1.000e-02, 1.435e-02, -3.403e-02, -8.087e-03)); + r += mul(s2_4, M4(-9.355e-02, 8.081e-02, 1.895e-02, -3.090e-03, 3.487e-01, 5.679e-02, -7.838e-02, 2.550e-01, 1.324e-02, 3.883e-01, -3.071e-02, 5.176e-01, 1.501e-01, -3.565e-02, -1.969e-02, -8.083e-03)); + r += mul(s2_5, M4(4.897e-02, 4.358e-02, -2.250e-02, 3.900e-04, -6.904e-02, -1.540e-01, 2.712e-02, -3.057e-01, 2.473e-02, 1.154e-02, 2.076e-02, 4.656e-02, -5.647e-02, -3.986e-03, 9.239e-03, -2.458e-03)); + r += mul(s2_6, M4(4.262e-03, -1.035e-04, 6.255e-03, 3.230e-03, -2.473e-03, 1.006e-02, 3.284e-02, -2.630e-02, -3.128e-02, -1.357e-03, -8.228e-02, -8.119e-03, -1.882e-01, 8.072e-03, 2.790e-01, -6.116e-02)); + r += mul(s2_7, M4(9.436e-03, 1.770e-03, 9.203e-02, 1.249e-02, 2.308e-02, -2.907e-02, -2.786e-02, 4.915e-02, -1.968e-02, -3.430e-02, -1.912e-02, -3.145e-03, -3.159e-01, -4.435e-01, 2.317e-01, 6.173e-01)); + r += mul(s2_8, M4(3.184e-02, -2.678e-03, -6.755e-03, 7.788e-02, -2.232e-02, -5.733e-03, -2.362e-02, 1.816e-02, 1.913e-03, -1.934e-03, -2.528e-03, -1.285e-02, -1.946e-02, -5.783e-02, -2.675e-02, -7.862e-02)); + r += mul(s3_0, M4(1.347e-02, -6.464e-04, -1.733e-02, 2.701e-04, 1.661e-02, 2.890e-03, 7.689e-03, -1.548e-02, 3.021e-02, -6.908e-03, 5.846e-03, -1.131e-02, 1.495e-02, 2.671e-03, 7.160e-03, 4.655e-03)); + r += mul(s3_1, M4(-2.134e-01, -2.300e-01, 2.629e-02, 2.755e-02, -8.032e-02, 1.235e-02, 1.920e-02, 5.557e-02, -4.051e-02, -1.978e-02, -4.303e-02, -5.506e-02, -2.838e-02, 1.676e-02, -4.412e-03, -1.334e-02)); + r += mul(s3_2, M4(-5.847e-02, -1.401e-01, -5.565e-03, -1.700e-02, 2.641e-02, -8.521e-02, -1.057e-02, 3.629e-03, -3.943e-03, -3.006e-03, 4.014e-03, -1.514e-02, 1.348e-02, -3.349e-03, 2.767e-03, 4.664e-03)); + r += mul(s3_3, M4(2.605e-02, 5.798e-03, -9.194e-03, -1.654e-02, -2.398e-02, 2.579e-03, 3.456e-02, 1.623e-02, -4.851e-02, -7.558e-02, -3.631e-02, -3.477e-02, 1.831e-02, -3.549e-05, -1.464e-02, 2.457e-03)); + r += mul(s3_4, M4(-3.391e-01, -6.608e-02, 4.936e-01, 2.333e-01, 1.045e-01, 1.597e-01, 3.183e-02, 1.025e-01, 1.371e-01, 1.382e-01, 7.103e-02, 1.793e-01, 1.265e-01, -1.196e-02, 1.296e-02, 2.169e-02)); + r += mul(s3_5, M4(5.367e-02, -2.184e-02, -2.914e-02, 2.076e-01, -4.832e-02, -8.489e-02, -3.349e-02, -8.611e-02, 6.975e-03, 2.881e-02, 9.430e-03, 5.570e-02, -5.738e-02, -1.056e-02, 8.026e-03, 1.479e-02)); + r += mul(s3_6, M4(2.914e-03, -7.976e-03, -8.091e-03, 7.943e-03, -7.490e-03, 1.170e-02, 2.435e-02, -2.150e-02, -3.865e-02, 9.554e-03, -3.592e-02, -1.665e-02, 1.543e-02, -3.448e-03, 8.179e-02, -2.743e-02)); + r += mul(s3_7, M4(2.977e-02, 2.276e-02, 6.616e-02, -1.629e-02, 4.146e-02, -3.141e-02, -4.259e-02, 6.178e-02, -2.579e-02, -3.562e-02, 4.975e-03, -4.307e-02, -7.259e-03, 5.942e-02, -7.202e-02, 1.352e-01)); + r += mul(s3_8, M4(3.820e-02, 1.063e-02, -8.922e-03, 7.153e-02, -8.104e-03, -1.760e-02, 6.000e-03, -6.468e-02, 9.946e-05, 3.195e-04, -8.201e-03, -3.610e-03, -3.006e-02, -3.924e-02, -1.292e-02, -1.219e-01)); + r += mul(s4_0, M4(-2.191e-02, 1.599e-02, 8.420e-03, -1.086e-02, 2.063e-02, -2.956e-02, -7.893e-03, 2.008e-02, -5.920e-02, 3.584e-02, -4.143e-03, -6.872e-03, 1.034e-01, -3.611e-02, -3.573e-02, -1.497e-02)); + r += mul(s4_1, M4(1.523e-02, 2.486e-03, -4.195e-03, 2.462e-02, -3.835e-02, -2.553e-02, -1.036e-03, -2.996e-02, 1.417e-01, -1.515e-01, -4.471e-02, -3.221e-03, -6.459e-02, 1.255e-01, 2.417e-02, -6.074e-02)); + r += mul(s4_2, M4(1.616e-02, 1.592e-03, -3.734e-04, 2.990e-03, -1.443e-02, 5.308e-03, -5.084e-03, -1.204e-02, -6.789e-03, 2.961e-02, -2.044e-04, -7.314e-03, 2.294e-03, -4.090e-02, -5.013e-03, 9.427e-03)); + r += mul(s4_3, M4(3.806e-02, 5.284e-03, 2.402e-02, -8.429e-03, -1.201e-01, 6.213e-02, -4.133e-02, -1.134e-02, -1.802e-02, -3.513e-03, -7.280e-02, 2.804e-02, -4.772e-02, 9.656e-03, 3.899e-02, 2.222e-02)); + r += mul(s4_4, M4(7.007e-02, 6.686e-02, 8.883e-02, 1.203e-01, -2.129e-02, -1.151e-01, -6.914e-02, -4.476e-02, 2.397e-01, -2.011e-01, 4.795e-01, -2.961e-01, 2.885e-02, 1.705e-02, -1.239e-02, 9.686e-02)); + r += mul(s4_5, M4(3.543e-03, 3.091e-02, 6.047e-03, 8.998e-03, 2.942e-02, -1.122e-02, 2.545e-02, 3.955e-02, 6.927e-03, 1.410e-02, -3.543e-03, 8.472e-02, -5.371e-03, -3.867e-02, 3.605e-03, -6.078e-02)); + r += mul(s4_6, M4(-1.860e-02, 7.303e-03, 1.120e-02, 2.374e-02, 3.594e-02, 2.375e-04, 1.775e-02, 1.066e-03, -1.177e-02, 1.460e-02, -4.779e-03, 1.629e-02, 1.071e-02, -4.558e-03, -5.317e-03, 2.159e-03)); + r += mul(s4_7, M4(7.506e-03, -5.090e-02, -8.668e-02, -8.096e-02, 8.813e-02, -7.846e-03, -3.904e-02, 3.086e-03, -1.394e-02, -2.948e-02, -5.211e-02, -6.857e-02, 2.448e-02, -5.364e-03, 2.122e-02, 4.570e-03)); + r += mul(s4_8, M4(-4.065e-02, 6.996e-03, -2.539e-03, -2.608e-02, -2.044e-02, 1.642e-02, -3.448e-03, -3.108e-02, -1.187e-02, 1.531e-02, -1.770e-03, -2.646e-02, 1.698e-03, 3.386e-03, -4.599e-04, 1.508e-02)); + r += mul(s5_0, M4(-3.917e-02, 1.278e-02, 9.730e-03, -1.230e-02, 1.849e-02, -1.271e-02, -6.797e-03, 1.483e-02, -7.229e-02, 2.299e-02, 1.742e-03, -1.202e-02, -6.162e-02, -5.012e-02, -8.717e-02, 4.267e-02)); + r += mul(s5_1, M4(6.843e-03, -1.568e-02, 1.219e-02, 3.503e-02, -4.625e-02, -1.223e-02, 7.350e-03, -3.099e-02, 3.045e-02, -1.862e-02, -4.702e-02, 5.099e-03, -2.587e-01, 5.690e-01, 1.235e-01, -2.007e-01)); + r += mul(s5_2, M4(1.341e-03, -1.622e-02, 1.479e-03, 1.092e-02, -1.340e-02, -1.026e-04, -3.321e-03, -1.172e-02, 1.401e-03, 2.405e-02, 1.919e-03, -8.970e-03, 8.486e-03, -7.278e-02, -1.391e-03, 1.960e-02)); + r += mul(s5_3, M4(4.673e-02, -1.780e-03, 3.273e-03, -9.909e-03, -5.724e-02, 8.129e-02, -8.664e-02, 5.244e-03, -1.026e-02, -1.886e-02, -9.682e-02, 1.553e-02, -6.835e-02, -1.168e-02, 2.514e-01, -7.690e-02)); + r += mul(s5_4, M4(4.072e-01, 2.885e-01, -5.578e-02, -3.700e-03, 8.928e-02, -3.925e-01, -3.394e-02, 3.251e-02, 7.202e-02, -3.607e-02, 2.310e-01, -3.091e-02, 5.465e-02, -9.662e-03, -6.141e-02, 6.591e-02)); + r += mul(s5_5, M4(-2.544e-02, 1.331e-01, 8.448e-03, -3.918e-02, 2.213e-02, 2.838e-02, 1.538e-02, 2.403e-02, 1.911e-02, 2.710e-02, 7.796e-03, 8.987e-02, -9.308e-03, -4.751e-02, 6.841e-03, -3.261e-02)); + r += mul(s5_6, M4(-2.069e-02, -3.251e-03, -5.082e-03, 2.418e-02, 3.796e-02, -8.759e-03, 7.109e-02, -5.937e-03, -3.786e-03, 9.591e-03, 5.923e-03, 2.273e-03, 1.387e-02, -2.745e-04, 2.036e-04, -2.293e-04)); + r += mul(s5_7, M4(6.661e-02, -1.094e-02, -2.950e-01, -2.125e-01, 1.325e-01, 5.139e-02, -2.472e-01, 1.490e-01, -1.114e-02, -2.533e-02, -7.544e-02, -4.432e-02, 2.411e-02, -3.858e-03, 1.384e-02, 1.214e-02)); + r += mul(s5_8, M4(-3.374e-02, 2.999e-02, 2.523e-03, -1.093e-01, -3.038e-02, -1.977e-02, 1.127e-02, -1.055e-02, -1.511e-02, 9.979e-03, 6.345e-04, -2.642e-02, 4.375e-04, 3.018e-05, -5.478e-03, 4.899e-03)); + r += mul(s6_0, M4(-2.830e-02, 9.871e-03, 6.143e-05, -6.840e-03, 5.058e-02, -6.543e-03, -3.699e-04, 1.024e-02, -2.405e-02, 3.026e-02, 1.567e-02, -2.056e-02, -1.371e-02, 4.031e-03, 1.189e-02, -1.115e-02)); + r += mul(s6_1, M4(1.517e-02, -1.438e-02, -2.525e-02, -2.799e-04, 5.445e-02, -4.351e-02, 2.541e-02, 2.961e-02, 6.570e-03, 8.960e-02, -1.932e-02, -3.564e-03, -9.741e-02, 5.371e-02, -2.418e-03, 1.425e-02)); + r += mul(s6_2, M4(1.501e-03, -2.929e-02, 8.791e-03, -1.576e-03, 3.412e-03, 3.486e-03, 5.511e-03, 1.206e-02, 2.178e-02, -9.940e-03, -8.442e-04, -6.434e-04, 3.137e-02, -1.667e-02, -3.983e-02, 1.849e-02)); + r += mul(s6_3, M4(3.644e-02, -9.234e-03, 5.457e-02, -1.295e-02, 2.628e-02, -1.708e-02, 4.262e-02, 2.356e-02, -3.927e-02, 4.293e-03, 1.147e-02, 6.816e-03, 8.990e-03, -9.797e-03, -2.532e-02, 2.662e-02)); + r += mul(s6_4, M4(-1.298e-01, -1.232e-02, 2.945e-02, 1.442e-01, -2.046e-01, 1.258e-01, 9.591e-03, 4.317e-02, 6.559e-02, -1.613e-01, 1.265e-01, 1.035e-01, 2.189e-02, 7.561e-03, -3.462e-02, -1.081e-02)); + r += mul(s6_5, M4(1.315e-02, 3.954e-02, -2.993e-02, -3.069e-02, 7.408e-03, 4.444e-03, -2.900e-02, 1.511e-02, -2.136e-02, 4.019e-03, -9.298e-03, 2.916e-02, 6.284e-02, -1.254e-01, 1.126e-01, -8.404e-02)); + r += mul(s6_6, M4(-3.935e-02, -1.433e-03, -5.533e-02, 1.111e-03, -2.772e-02, 2.248e-02, -3.252e-03, -3.881e-02, 2.206e-04, -1.418e-02, -4.193e-02, 8.625e-03, -1.006e-03, 2.233e-03, -1.287e-02, -4.024e-03)); + r += mul(s6_7, M4(2.404e-01, -6.556e-02, -2.942e-01, -9.692e-02, 7.087e-02, -7.028e-02, -1.317e-01, -5.265e-02, -3.230e-02, 3.590e-02, -7.622e-02, -8.369e-02, 1.372e-02, -1.592e-02, -1.048e-02, -1.536e-02)); + r += mul(s6_8, M4(6.781e-02, -7.009e-03, 2.860e-02, 2.274e-01, -9.817e-03, 1.673e-02, 1.795e-02, -4.467e-02, 2.646e-03, -1.109e-02, 2.573e-03, -3.715e-02, -5.283e-02, 4.464e-02, 1.801e-02, 1.569e-02)); + r += mul(s7_0, M4(-2.424e-02, 1.016e-02, 3.816e-04, -8.696e-03, 8.698e-03, 2.044e-03, 1.042e-03, 1.691e-02, -2.034e-02, 3.552e-02, -5.592e-03, -2.426e-02, -4.224e-03, -3.754e-04, 1.771e-02, -1.581e-02)); + r += mul(s7_1, M4(1.328e-02, -1.689e-02, -2.691e-02, -2.634e-03, 1.176e-01, -1.653e-02, 5.465e-03, -2.580e-03, 4.724e-02, 9.344e-02, -4.456e-02, -3.431e-02, -3.025e-01, 9.328e-02, -1.246e-02, 2.387e-02)); + r += mul(s7_2, M4(-8.990e-04, -2.366e-02, 6.412e-03, -5.975e-03, -8.695e-03, 3.697e-03, 2.235e-02, 1.274e-02, 1.311e-02, 8.728e-03, -2.676e-04, -1.281e-02, 1.014e-02, 1.266e-01, -2.253e-02, 1.049e-02)); + r += mul(s7_3, M4(4.771e-02, -9.977e-03, 4.476e-02, -1.020e-02, 2.030e-01, -7.533e-03, 4.306e-02, -2.822e-02, -2.046e-01, 4.606e-03, 1.528e-01, -3.807e-03, 2.697e-02, -1.144e-02, -1.123e-02, 1.569e-02)); + r += mul(s7_4, M4(-8.972e-02, -1.116e-02, 1.027e-02, 1.296e-01, -9.394e-02, -2.126e-01, -1.783e-01, 3.546e-01, -9.159e-02, -4.756e-01, 2.339e-01, 3.902e-01, -1.207e-01, 7.496e-02, -3.850e-01, 9.175e-02)); + r += mul(s7_5, M4(-5.036e-03, 7.069e-02, -2.437e-02, -1.670e-02, 4.290e-03, 4.675e-02, -2.716e-02, 1.163e-02, -2.350e-02, -4.682e-03, -5.020e-03, 1.407e-02, -2.286e-02, 1.490e-01, 2.616e-02, 3.160e-01)); + r += mul(s7_6, M4(-3.243e-02, -3.122e-03, -5.286e-02, 7.820e-03, -2.416e-02, 4.868e-02, -1.168e-01, -1.736e-02, 1.028e-02, -1.177e-02, -1.715e-02, 3.846e-03, -2.036e-03, 2.827e-03, -1.091e-02, 1.062e-03)); + r += mul(s7_7, M4(1.390e-02, -8.685e-03, -2.752e-02, -1.309e-01, 5.806e-02, -5.186e-02, -7.078e-02, -8.618e-02, -3.155e-02, 4.158e-02, -8.837e-02, -4.630e-02, 1.249e-02, -4.217e-02, -8.350e-03, -2.970e-02)); + r += mul(s7_8, M4(1.165e-01, 1.334e-02, 1.667e-02, 9.542e-02, -1.276e-02, 1.212e-02, 1.332e-02, -4.138e-02, 2.104e-03, -5.048e-03, -1.939e-04, -5.239e-02, -4.374e-02, 5.139e-02, -7.104e-03, 3.513e-02)); + r += V4(-9.892e-05, -2.795e-04, -8.146e-04, -3.370e-04); + return tanh(r); +} + +void Pass18(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-2x4C-NVL-DN.hlsl b/src/Effects/CuNNy/CuNNy-2x4C-NVL-DN.hlsl new file mode 100644 index 000000000..d59cf8d1c --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-2x4C-NVL-DN.hlsl @@ -0,0 +1,340 @@ +// CuNNy 2x4C BILINEAR RGB NVL DN - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-DN-D04N02 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0 + +#define l0(x, y) min16float((dot(float3(-3.725e-01, -7.046e-01, -1.734e-01), O(INPUT, float2(x, y)).rgb) + 1.169e-01)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-2.745e-03, -2.925e-03, 1.135e-01, 3.162e-02) * s0_0; + r += V4(4.049e-03, -3.428e-01, -7.641e-02, 2.484e-02) * s0_1; + r += V4(-8.372e-03, 3.398e-01, 1.072e-01, -5.449e-02) * s0_2; + r += V4(1.592e-02, 1.884e-02, -3.160e-02, -7.727e-02) * s0_3; + r += V4(4.429e-01, -3.936e-01, -4.134e-01, -4.287e-01) * s0_4; + r += V4(4.556e-02, 3.754e-01, -2.300e-02, 4.971e-01) * s0_5; + r += V4(-2.031e-02, -6.662e-03, 8.906e-02, 4.602e-02) * s0_6; + r += V4(-4.365e-01, 2.183e-03, 8.609e-02, 9.402e-03) * s0_7; + r += V4(-3.845e-02, 5.695e-03, 9.645e-02, -5.310e-02) * s0_8; + r += V4(1.492e-02, -1.961e-02, -7.539e-03, -3.574e-03); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.218e-02, -1.208e-01, -1.955e-01, -1.217e-01, 3.123e-02, -2.317e-02, 1.961e-01, -9.984e-02, 3.038e-03, 2.863e-02, -1.042e-01, -5.529e-02, 1.266e-01, -3.877e-01, 2.315e-01, -1.334e-01)); + r += mul(s0_1, M4(-1.774e-02, 1.636e-01, 1.379e-01, 7.499e-03, -7.890e-02, -3.970e-02, -6.053e-02, -1.431e-02, 4.167e-02, 9.728e-02, 3.825e-02, -2.704e-02, -2.303e-01, -3.348e-01, 2.940e-01, 4.825e-02)); + r += mul(s0_2, M4(1.239e-02, 1.613e-02, -2.280e-01, 8.985e-02, 2.106e-03, 3.847e-02, -2.539e-02, -3.326e-02, -6.327e-02, -1.427e-01, 4.218e-02, 8.995e-02, -6.045e-02, -1.073e-01, -1.329e-01, -2.085e-02)); + r += mul(s0_3, M4(-1.601e-01, -2.448e-01, -3.950e-01, 9.169e-03, -3.694e-02, 2.018e-01, -2.524e-01, 1.719e+00, 3.009e-02, 4.927e-02, 1.564e-01, 3.509e-02, -2.630e-02, -3.986e-01, 1.326e-01, -1.037e-02)); + r += mul(s0_4, M4(-1.074e+00, -1.654e-01, 4.163e-01, 3.816e-02, 4.580e-01, 4.350e-01, -3.490e-01, -1.257e-02, 1.159e-02, -2.083e-01, -2.744e-01, -2.667e-02, 2.826e-03, 1.986e-01, -2.723e-01, 9.612e-02)); + r += mul(s0_5, M4(-3.195e-01, -1.450e-01, -1.523e-01, -2.999e-03, 1.166e-01, 1.304e-01, 1.475e-01, 7.286e-02, -4.077e-02, -3.477e-02, 1.496e-01, -1.199e-02, 7.881e-02, 8.911e-02, -1.082e-01, -6.762e-02)); + r += mul(s0_6, M4(2.020e-02, 1.556e-01, -9.837e-03, 1.537e-02, -1.047e-01, 2.095e-01, 2.025e-01, -3.522e-02, -3.407e-02, -8.949e-02, -7.721e-02, -8.910e-03, 9.305e-02, 2.231e-01, 2.178e-01, 1.502e-02)); + r += mul(s0_7, M4(-7.936e-02, 3.096e-01, 1.869e-01, -1.950e-03, -2.452e-01, -5.098e-01, 5.304e-01, -4.921e-02, -1.073e-01, 1.062e-01, 2.527e-01, 5.909e-04, 3.797e-02, 3.291e-01, -2.395e-01, 2.768e-02)); + r += mul(s0_8, M4(-5.559e-02, 1.090e-01, -1.757e-01, 1.261e-02, -1.632e-01, -2.476e-01, -5.674e-02, -4.843e-03, 1.064e-02, 1.023e-01, 2.540e-02, -1.336e-02, 1.362e-01, 1.833e-01, 3.772e-03, 5.118e-04)); + r += mul(s1_0, M4(1.383e-01, 3.469e-01, 3.568e-02, -1.958e-01, -3.170e-02, -1.076e-02, -2.012e-02, -2.104e-04, 2.046e-02, -1.268e-02, -1.618e-01, -6.370e-02, 2.615e-02, 1.494e-01, -1.523e-01, 3.702e-02)); + r += mul(s1_1, M4(-1.140e-02, 6.811e-01, 5.722e-02, 1.514e-01, -6.311e-02, -3.541e-02, -1.150e-01, 3.625e-02, 1.146e-01, -1.395e-03, 5.059e-01, -7.835e-02, -3.907e-01, 6.172e-02, -9.656e-02, -2.727e-02)); + r += mul(s1_2, M4(1.239e-01, 1.206e-01, 7.519e-01, 2.106e-02, 8.647e-03, 1.082e-02, 5.931e-02, -4.215e-02, -2.216e-02, -4.829e-02, -1.927e-01, 1.159e-01, -1.789e-01, -9.596e-02, 1.395e-01, -6.395e-02)); + r += mul(s1_3, M4(1.194e-01, -5.786e-01, -1.761e-03, -1.126e-02, -5.311e-02, -2.325e-01, 1.733e-01, 2.842e-01, -1.080e-01, -1.012e-01, 1.851e-01, 4.253e-02, 1.212e-01, 2.435e-02, -3.061e-01, -9.579e-02)); + r += mul(s1_4, M4(-4.651e-02, -1.299e+00, -5.020e-01, 5.830e-02, 5.098e-01, 7.344e-02, -1.358e-01, 1.725e-02, -2.980e-01, -6.077e-01, 6.308e-01, -4.014e-02, 3.497e-01, 3.700e-01, -6.035e-01, 8.026e-02)); + r += mul(s1_5, M4(-1.851e-02, -2.057e-01, 5.081e-01, -5.262e-02, 1.715e-01, 1.387e-01, -1.123e-01, 9.022e-02, -1.532e-01, -3.749e-02, -1.930e-01, 6.423e-02, 2.763e-02, 5.993e-02, 4.141e-01, -8.825e-02)); + r += mul(s1_6, M4(-6.324e-03, -9.461e-02, 3.044e-02, -4.139e-03, -2.925e-02, 3.975e-01, 1.161e-01, 9.726e-03, 1.353e-01, 2.762e-01, 3.297e-03, 1.076e-02, -8.503e-02, -7.010e-01, -1.967e-01, -1.360e-03)); + r += mul(s1_7, M4(1.873e-02, 1.099e-01, 1.229e-01, -1.232e-02, -5.723e-01, -4.599e-02, -1.236e-01, -2.003e-02, -4.268e-01, 5.929e-01, 2.942e-01, 3.485e-02, 4.326e-01, -9.250e-02, 3.736e-01, -2.393e-02)); + r += mul(s1_8, M4(-5.991e-02, 1.199e-03, -1.349e-02, -1.321e-03, -2.036e-01, -1.937e-01, -7.888e-02, -9.144e-03, 1.557e-01, 7.018e-02, -2.646e-01, -3.360e-06, 1.742e-01, 1.814e-01, 1.385e-01, -1.030e-02)); + r += V4(4.789e-02, 4.713e-03, -2.854e-02, 9.967e-03); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t1 +//!OUT t0 + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.565e-01, 1.307e-02, -5.269e-02, 5.465e-02, 2.936e-01, 1.626e-01, 4.589e-02, 2.478e-02, 3.520e-01, -5.445e-02, -2.480e-01, 2.838e-02, 1.841e-04, 1.264e-02, -1.370e-02, 2.588e-02)); + r += mul(s0_1, M4(2.350e-01, 2.116e-01, 2.167e-02, -1.559e-01, 2.502e-01, 4.320e-01, -7.152e-01, 2.270e-01, -2.668e-01, -2.117e-01, 5.598e-01, 2.261e-01, 4.101e-02, -4.860e-02, 3.530e-02, 8.932e-02)); + r += mul(s0_2, M4(-4.398e-02, -4.486e-02, -5.040e-02, 9.803e-02, 7.515e-02, 1.203e-01, -5.357e-02, -2.803e-01, -1.435e-01, 7.150e-03, -3.118e-02, -2.636e-01, -2.969e-02, -2.011e-02, 2.658e-02, -2.572e-02)); + r += mul(s0_3, M4(9.140e-02, -1.875e-01, 9.757e-02, 2.976e-02, -8.325e-02, 6.109e-02, -4.304e-02, 7.057e-02, 7.324e-01, -1.528e-01, 2.930e-01, 7.503e-02, -3.901e-02, 1.109e-03, -2.693e-02, -3.330e-02)); + r += mul(s0_4, M4(-9.944e-02, 1.858e-01, -2.436e-01, 3.822e-02, 6.685e-02, -1.758e-01, 1.382e-01, -1.715e-01, 3.252e-01, 5.176e-01, -2.939e-01, 4.311e-01, -6.125e-02, 1.905e-01, 8.140e-02, 2.095e-01)); + r += mul(s0_5, M4(3.193e-02, 6.029e-02, 1.869e-03, 8.627e-04, -1.402e-02, 4.288e-02, -5.756e-02, 8.813e-02, -2.758e-02, -5.267e-02, 1.702e-03, -6.676e-01, 6.373e-02, 5.766e-02, -6.325e-02, -2.744e-01)); + r += mul(s0_6, M4(4.918e-02, 5.420e-04, 3.692e-02, 7.796e-03, -1.163e-02, -4.074e-02, 2.057e-02, -2.837e-02, 1.083e-01, 1.958e-01, -5.078e-02, 2.750e-02, 5.323e-02, 5.953e-03, 4.766e-02, -2.265e-03)); + r += mul(s0_7, M4(-3.968e-02, -1.535e-01, 6.564e-02, -2.620e-02, 3.742e-02, 8.659e-02, -4.440e-02, 6.007e-03, -9.585e-02, -9.425e-02, -1.517e-01, 3.701e-01, -1.332e-01, -1.860e-01, -5.436e-02, 3.781e-01)); + r += mul(s0_8, M4(-1.145e-02, 6.045e-02, -4.676e-02, -5.604e-02, -1.576e-02, -3.528e-02, 2.252e-02, 1.997e-02, -2.546e-02, -6.894e-02, 7.238e-02, -3.495e-01, -6.323e-02, -1.042e-01, 1.091e-01, -4.170e-01)); + r += mul(s1_0, M4(-5.215e-01, 6.255e-01, 5.587e-02, -5.362e-02, 9.895e-02, -8.743e-03, 1.058e-01, -3.585e-02, -1.594e-02, -1.034e-01, 3.848e-02, -5.432e-02, -1.796e-02, 5.838e-02, 1.304e-01, -2.122e-02)); + r += mul(s1_1, M4(-6.987e-02, 8.696e-01, -1.130e+00, 5.558e-03, -1.080e-01, 4.195e-02, -1.323e-01, 2.270e-01, 3.451e-02, -1.616e-02, 4.251e-03, 1.470e-01, 2.442e-01, -5.904e-02, -3.467e-01, -2.056e-02)); + r += mul(s1_2, M4(4.884e-02, -1.034e-01, 5.823e-02, 1.131e-01, -4.126e-02, 6.519e-02, -1.532e-02, -2.420e-01, 1.092e-02, 1.869e-02, 1.913e-03, -1.787e-02, 1.122e-01, -1.481e-01, 1.843e-01, 3.454e-01)); + r += mul(s1_3, M4(-2.906e-01, -9.847e-01, 4.092e-01, 1.655e-01, 4.092e-02, 2.913e-01, 1.306e-01, -4.682e-02, 2.568e-01, -4.528e-02, 3.207e-02, 9.888e-02, -3.928e-01, -3.546e-01, -2.367e-01, -3.239e-01)); + r += mul(s1_4, M4(4.463e-01, -1.594e-01, 8.418e-01, -3.525e-01, 5.957e-01, 1.082e+00, -9.245e-01, 2.726e-01, 1.210e-01, 2.024e-01, -8.063e-03, -2.433e-01, -1.512e+00, 9.316e-01, 2.305e-01, -5.109e-01)); + r += mul(s1_5, M4(-2.393e-02, 1.286e-02, -9.453e-02, 3.071e-01, -1.402e-01, -2.436e-01, 1.202e-01, -1.409e-01, -1.857e-02, 2.421e-02, -2.642e-02, -7.415e-02, 8.786e-01, 5.260e-04, -9.212e-02, 1.849e-01)); + r += mul(s1_6, M4(8.958e-02, 9.057e-02, 1.712e-02, -2.838e-02, -1.405e-01, -6.455e-02, -2.695e-02, -1.110e-02, 8.731e-03, 6.531e-02, -3.752e-02, 1.194e-01, 4.585e-01, 6.270e-01, -1.367e-01, -2.529e-01)); + r += mul(s1_7, M4(-4.381e-02, -1.595e-02, -4.601e-02, 7.257e-02, -8.036e-02, -1.360e-01, 1.154e-01, -7.942e-02, -4.653e-02, -7.121e-02, 2.720e-02, 8.346e-02, -1.871e+00, -8.300e-01, -6.760e-01, 7.402e-01)); + r += mul(s1_8, M4(1.359e-02, -2.489e-02, 3.529e-02, -1.121e-01, -6.190e-02, -2.628e-02, -2.090e-03, 2.359e-01, -2.412e-02, -2.463e-02, 8.317e-03, -5.330e-02, 2.105e+00, 1.550e-01, 1.457e+00, -1.129e+00)); + r += V4(7.359e-03, -1.132e-02, 1.248e-02, 7.243e-04); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 4 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.642e-03, -1.295e-02, 1.998e-02, -1.289e-03, -4.147e-02, -4.021e-03, 1.491e-04, -7.275e-03, 1.574e-02, -4.122e-03, 1.126e-02, 8.962e-03, 5.174e-02, 3.405e-02, 4.993e-02, 4.529e-02)); + r += mul(s0_1, M4(-1.028e-01, -2.764e-02, -2.777e-02, -7.170e-03, -8.365e-02, 3.550e-02, 1.288e-01, 2.475e-02, 5.017e-02, 5.917e-02, 3.473e-02, 8.510e-03, 2.332e-02, 8.047e-02, 9.838e-02, 4.234e-02)); + r += mul(s0_2, M4(-2.319e-02, -4.432e-02, -1.679e-02, 8.855e-03, 3.259e-02, -1.974e-01, 5.938e-02, 1.616e-01, -5.605e-04, 3.183e-02, -3.356e-03, 3.138e-02, 9.572e-03, -3.887e-02, -2.632e-02, -1.161e-02)); + r += mul(s0_3, M4(-2.947e-02, -4.358e-02, 1.208e-03, -2.705e-02, -1.037e-02, -6.812e-02, -5.436e-02, -3.840e-02, 3.684e-02, 2.560e-02, 1.715e-02, -3.670e-02, -5.930e-02, -2.310e-02, -6.163e-02, -3.562e-02)); + r += mul(s0_4, M4(5.520e-01, 1.213e-01, 1.753e-01, 5.436e-02, 5.879e-01, 2.281e-01, -2.703e-01, 1.519e-01, 5.739e-01, 2.959e-01, 9.449e-02, 2.473e-02, -5.998e-01, -9.548e-02, -6.035e-01, -9.663e-02)); + r += mul(s0_5, M4(-9.740e-02, 2.744e-01, -1.522e-01, -7.204e-02, 1.178e-01, 6.112e-01, -4.801e-02, -5.176e-01, 1.480e-02, 8.323e-02, -6.764e-02, 4.138e-02, 1.121e-01, -8.141e-02, 1.211e-01, -8.737e-02)); + r += mul(s0_6, M4(6.315e-02, 6.323e-02, 1.146e-02, 3.378e-02, -9.598e-02, -1.089e-01, 2.780e-02, -6.091e-02, -1.194e-01, -1.038e-01, -2.147e-02, -4.236e-02, -2.300e-02, -3.184e-02, -1.560e-02, -2.206e-02)); + r += mul(s0_7, M4(-1.772e-01, -1.304e-01, 1.265e-01, -7.871e-02, 1.978e-01, 1.074e-01, 1.240e-02, 4.600e-02, 1.558e-02, -3.196e-02, 2.018e-01, 1.496e-01, 1.421e-01, 8.472e-02, 7.432e-02, 9.935e-02)); + r += mul(s0_8, M4(1.132e-02, -2.296e-03, 1.274e-01, 3.428e-01, -5.796e-02, -6.156e-02, -2.549e-01, -2.231e-01, -8.762e-02, -9.318e-02, -2.378e-01, -3.018e-01, 5.601e-03, -2.670e-02, 2.896e-02, -3.910e-02)); + r += mul(s1_0, M4(4.603e-02, -2.582e-02, -9.045e-03, 1.446e-02, -1.835e-02, -2.533e-02, 3.681e-03, -9.420e-03, -5.802e-02, 2.310e-02, 3.059e-02, 1.313e-03, 9.639e-02, 8.284e-02, 1.071e-01, -3.287e-02)); + r += mul(s1_1, M4(-2.480e-02, 2.321e-03, -3.594e-02, -1.101e-01, 2.850e-02, 2.912e-02, 2.597e-02, 2.777e-02, 5.701e-02, 9.536e-04, 2.533e-02, 1.102e-02, -3.714e-03, 7.838e-02, -1.716e-02, 1.723e-01)); + r += mul(s1_2, M4(-4.473e-03, 1.521e-02, -1.887e-02, 6.731e-03, 2.199e-03, 2.965e-02, -3.709e-03, 1.671e-02, 1.376e-02, -4.819e-02, -8.832e-04, 3.531e-02, -8.453e-03, -1.276e-02, -1.461e-02, 4.460e-03)); + r += mul(s1_3, M4(6.139e-02, -1.511e-01, 1.102e-01, -1.428e-01, -5.114e-02, -6.594e-02, -1.693e-02, -4.651e-02, 2.440e-01, 2.010e-02, -1.900e-01, -1.243e-03, -2.397e-01, 2.002e-01, -3.506e-01, 2.171e-01)); + r += mul(s1_4, M4(-6.189e-02, 5.137e-01, -8.132e-02, 4.526e-01, 3.263e-01, 2.134e-01, 1.027e-01, 2.067e-02, 2.407e-01, 2.591e-01, 4.489e-01, 2.042e-01, 1.932e-02, -4.463e-01, -1.479e-01, -6.843e-01)); + r += mul(s1_5, M4(-7.571e-03, -7.787e-02, 9.918e-03, -8.469e-02, 4.056e-02, -1.926e-02, -4.968e-02, 2.416e-02, 2.699e-02, 2.783e-01, -7.854e-02, -6.549e-02, 6.835e-03, 2.288e-02, 1.048e-02, -3.273e-02)); + r += mul(s1_6, M4(7.034e-02, 4.236e-02, 7.905e-02, -2.283e-03, -8.423e-02, -7.784e-02, -7.540e-03, -3.373e-02, -1.019e-01, -1.421e-01, 6.713e-02, -8.716e-02, -6.980e-02, -4.731e-02, -3.086e-02, -6.210e-03)); + r += mul(s1_7, M4(-1.597e-01, -2.036e-01, 5.194e-02, 8.457e-02, 1.387e-01, 7.910e-02, 2.030e-02, 5.848e-02, 2.154e-01, 1.382e-01, -8.617e-02, 7.552e-02, 3.127e-02, 5.899e-02, 1.733e-01, 1.657e-01)); + r += mul(s1_8, M4(3.595e-02, 3.243e-02, 1.450e-01, 2.046e-01, -2.939e-02, -1.306e-02, -1.587e-01, -2.607e-01, -8.980e-02, -5.350e-02, -2.627e-01, -2.861e-01, -1.585e-02, -2.032e-02, -1.662e-02, 1.560e-02)); + r += V4(-7.528e-04, -8.388e-04, -1.247e-03, -1.205e-03); + return tanh(r); +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-2x4C-NVL.hlsl b/src/Effects/CuNNy/CuNNy-2x4C-NVL.hlsl new file mode 100644 index 000000000..5e329b451 --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-2x4C-NVL.hlsl @@ -0,0 +1,340 @@ +// CuNNy 2x4C BILINEAR RGB NVL - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-D04N02 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0 + +#define l0(x, y) min16float((dot(float3(-6.049e-01, -1.145e+00, -2.540e-01), O(INPUT, float2(x, y)).rgb) + 1.794e+00)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(1.411e-01, -9.763e-03, -1.361e-01, -9.610e-04) * s0_0; + r += V4(6.068e-02, 7.238e-03, -1.182e-01, -1.535e-02) * s0_1; + r += V4(-8.549e-02, -2.876e-03, -8.740e-03, 1.652e-02) * s0_2; + r += V4(-3.249e-01, 5.392e-02, -8.518e-02, -7.437e-03) * s0_3; + r += V4(2.435e-02, -6.191e-01, 7.147e-01, 5.862e-01) * s0_4; + r += V4(1.968e-01, 1.868e-02, -1.723e-01, -5.801e-01) * s0_5; + r += V4(1.528e-01, -4.489e-02, 5.871e-03, 4.528e-03) * s0_6; + r += V4(-4.619e-01, 6.152e-01, -1.313e-01, -5.326e-02) * s0_7; + r += V4(2.902e-01, -1.801e-02, -6.907e-02, 5.105e-02) * s0_8; + r += V4(4.440e-03, -1.956e-04, 1.215e-03, 1.790e-03); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.120e-01, 8.150e-03, 7.146e-02, -4.942e-02, 3.623e-01, -1.678e-01, 1.189e-01, 1.372e-01, 1.225e-01, -2.568e-02, 6.959e-02, 1.788e-02, 1.962e-01, -1.870e-01, -6.548e-03, -4.334e-02)); + r += mul(s0_1, M4(1.805e-01, 4.881e-02, -2.342e-03, 2.035e-02, -2.427e-01, -2.197e-02, -2.036e-02, 3.919e-01, -3.037e-01, 7.047e-02, 3.426e-02, -8.694e-02, 2.144e-01, 1.431e-01, -7.851e-02, 2.247e-01)); + r += mul(s0_2, M4(6.328e-02, -4.140e-02, 3.362e-02, 5.204e-02, -1.052e-01, 1.698e-01, -2.727e-03, 1.110e-01, 7.156e-02, -1.108e-02, -2.717e-02, 5.680e-02, -6.118e-02, 2.435e-02, 1.743e-02, 8.179e-02)); + r += mul(s0_3, M4(1.557e-01, 1.189e-01, 8.836e-02, 2.178e-02, -3.954e-01, 2.466e-01, -2.166e-01, -7.051e-02, -2.857e-01, -1.611e-02, -8.667e-02, 1.895e-04, 2.744e-01, 1.499e-01, 8.228e-02, 2.938e-02)); + r += mul(s0_4, M4(2.441e-01, -3.694e-01, 1.751e-01, 6.833e-01, -1.087e-01, -2.065e-01, -1.557e-01, -6.945e-02, -1.403e-02, 2.171e-02, 3.748e-02, 2.646e-01, -3.718e-01, -1.188e-01, 1.569e-01, 8.554e-02)); + r += mul(s0_5, M4(-5.069e-02, 2.646e-01, -5.754e-02, -3.545e-01, 1.404e-01, 1.123e-01, 4.577e-02, -1.465e-01, -2.119e-02, -1.115e-02, 1.661e-01, -4.029e-01, -2.123e-01, 2.774e-01, -1.905e-02, -1.093e-02)); + r += mul(s0_6, M4(2.593e-02, -1.801e-02, 9.053e-02, -2.721e-02, 6.658e-03, 3.802e-02, -3.282e-02, -1.116e-01, 1.201e-01, 2.095e-02, -2.061e-02, 2.498e-03, -1.831e-01, -1.743e-01, 1.062e-01, -6.113e-01)); + r += mul(s0_7, M4(-1.172e-01, -1.130e-02, -6.727e-02, 7.753e-02, -3.958e-03, -9.790e-02, -1.635e-01, 1.049e-01, 2.862e-01, -2.733e-02, -1.566e-01, -2.900e-01, -1.050e-01, -3.441e-01, -8.690e-02, 8.659e-02)); + r += mul(s0_8, M4(2.145e-01, 4.613e-02, 1.590e-02, -4.749e-02, 3.291e-01, 1.012e-01, 8.647e-03, -2.282e-01, 2.215e-01, 1.713e-01, 1.414e-01, -3.916e-01, -2.488e-01, 1.458e-01, 2.518e-02, -9.979e-02)); + r += mul(s1_0, M4(-2.127e-02, 3.575e-02, 9.372e-02, -2.662e-02, 4.467e-02, 1.304e-02, 3.849e-02, 5.186e-02, 7.417e-02, 3.647e-02, 4.960e-02, -3.988e-02, -3.998e-02, 1.173e-01, 7.752e-03, -2.263e-02)); + r += mul(s1_1, M4(-1.283e-01, -1.460e-01, 1.963e-02, -1.108e-01, -4.171e-01, 2.397e-01, -5.886e-02, 7.788e-02, -2.820e-02, -1.719e-01, 9.334e-03, -1.255e-01, 1.392e-01, 9.532e-03, -5.163e-02, 8.641e-02)); + r += mul(s1_2, M4(-1.889e-01, 1.933e-01, 5.574e-02, 6.723e-02, -1.015e-01, -3.316e-01, -1.460e-02, -1.606e-01, 1.052e-01, 1.027e-02, -4.626e-02, 5.368e-02, -9.160e-03, -9.514e-02, 2.577e-02, 7.122e-02)); + r += mul(s1_3, M4(-1.958e-01, 1.276e-01, 7.303e-02, -1.135e-01, -2.277e-01, 2.017e-01, -5.223e-02, 1.379e-01, -1.737e-01, 4.871e-02, -8.142e-02, 1.392e-01, 8.113e-02, 4.415e-01, -1.174e-01, 1.910e-02)); + r += mul(s1_4, M4(-3.233e-01, -4.158e-01, 8.391e-02, 2.017e-01, 9.790e-02, -4.865e-02, -2.172e-01, 2.607e-01, -2.458e-01, -4.931e-01, 3.016e-01, 2.198e-01, -7.173e-02, -5.683e-01, -7.447e-02, -1.264e-01)); + r += mul(s1_5, M4(-4.189e-01, 3.271e-01, 8.844e-02, -5.295e-01, 6.365e-02, -1.513e-01, 1.246e-02, -2.005e-01, 1.764e-01, 5.796e-01, 7.286e-02, -1.428e-01, -1.130e-01, -6.883e-02, -1.303e-02, -1.091e-01)); + r += mul(s1_6, M4(-6.621e-02, 9.901e-03, 9.472e-02, -3.568e-02, 1.067e-01, -3.318e-02, 3.152e-01, -5.261e-02, 1.108e-01, 7.081e-02, -1.289e-01, 6.477e-03, 1.036e-01, -1.477e-03, 1.035e+00, -9.204e-02)); + r += mul(s1_7, M4(-2.721e-01, -5.458e-02, -1.707e-01, -1.096e-02, -1.302e-01, -9.074e-02, 1.694e-01, 6.307e-02, 4.233e-01, -5.112e-02, -3.545e-01, -2.589e-01, 8.276e-02, -3.975e-01, 7.705e-02, 4.482e-01)); + r += mul(s1_8, M4(1.175e-01, 2.212e-03, 5.751e-02, -8.666e-02, 2.532e-01, 1.303e-01, 7.291e-02, -2.126e-01, 4.815e-01, 1.649e-01, -4.748e-02, -3.330e-01, -1.252e-01, -8.987e-03, -4.285e-03, -1.106e-01)); + r += V4(3.566e-03, 2.403e-03, -1.451e-03, 4.304e-03); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t1 +//!OUT t0 + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.173e-02, 2.762e-03, -2.225e-03, -6.814e-03, 8.328e-02, -1.275e-02, 6.091e-02, -6.470e-02, -6.067e-02, -1.086e-01, 7.501e-02, 1.227e-01, -1.551e-02, -1.728e-02, -2.694e-02, 7.490e-02)); + r += mul(s0_1, M4(5.326e-02, 1.003e-02, 3.989e-02, -1.908e-03, -4.580e-02, -4.303e-03, 4.333e-02, 8.324e-02, 8.170e-01, 8.040e-01, -3.975e-01, -1.034e+00, 1.362e-01, 3.776e-04, -1.102e-02, -5.030e-02)); + r += mul(s0_2, M4(-6.068e-02, 6.212e-02, -4.979e-02, 9.626e-03, 1.301e-02, -2.045e-02, 1.798e-02, 2.091e-02, -2.290e-01, 3.612e-01, -7.014e-02, 1.669e-01, -5.191e-03, 1.304e-02, 9.444e-05, -2.137e-02)); + r += mul(s0_3, M4(-3.235e-02, -6.238e-02, 3.894e-02, 5.893e-02, -3.530e-02, -1.063e-01, 8.668e-02, 1.232e-02, -3.851e-02, 2.952e-02, 6.132e-02, -5.755e-02, 8.317e-02, 8.340e-02, -8.227e-02, 6.481e-03)); + r += mul(s0_4, M4(2.118e-02, 2.725e-01, -1.393e-01, -2.377e-01, 4.872e-01, 2.235e-01, -1.746e-02, -3.662e-01, -3.945e-01, -1.862e-01, -9.132e-02, 8.777e-02, -5.084e-01, -3.300e-01, -3.443e-02, 4.203e-01)); + r += mul(s0_5, M4(1.165e-01, -1.743e-01, 4.169e-03, -1.518e-01, 1.174e-01, -3.314e-02, 2.295e-02, -9.160e-02, -1.854e-01, -6.999e-02, -6.985e-02, 4.875e-04, -1.147e-01, 1.722e-01, -2.588e-02, 1.185e-01)); + r += mul(s0_6, M4(-8.881e-03, 1.907e-03, 9.002e-03, 8.085e-03, -8.728e-03, -1.074e-01, 7.035e-02, 6.519e-02, 4.323e-02, -4.675e-02, 4.382e-02, 1.091e-02, 3.357e-02, 4.384e-02, -8.031e-03, -1.945e-02)); + r += mul(s0_7, M4(-7.981e-02, 1.492e-02, -9.399e-02, -3.750e-02, -1.274e-01, -3.235e-02, -3.169e-02, 6.420e-02, 4.304e-02, 9.302e-02, 1.250e-02, 3.906e-03, 1.752e-01, -1.211e-02, 9.058e-02, -6.273e-02)); + r += mul(s0_8, M4(-1.290e-02, -4.309e-02, 3.384e-02, 3.819e-02, -3.309e-02, 3.986e-02, 3.783e-03, 5.361e-02, 5.473e-02, 1.574e-02, -2.385e-02, -7.630e-02, -1.778e-02, 1.375e-02, -2.936e-02, -1.778e-02)); + r += mul(s1_0, M4(1.219e-01, 1.166e-02, -5.932e-02, 1.191e-02, -2.487e-03, -5.945e-02, 6.637e-02, 5.775e-02, -1.705e-02, 5.538e-02, -5.130e-02, -3.602e-02, 5.461e-02, -1.253e-01, 6.953e-02, 1.066e-01)); + r += mul(s1_1, M4(6.504e-01, -9.638e-01, 1.371e+00, 5.682e-02, 1.583e-02, -2.371e-02, 5.201e-02, 3.845e-02, 3.478e-02, -1.477e-01, 1.763e-01, 5.129e-02, 2.992e-01, -3.335e-01, 2.490e-02, 4.873e-01)); + r += mul(s1_2, M4(2.415e-02, 8.838e-02, -1.519e-01, 9.012e-02, -6.676e-02, 3.422e-02, -2.380e-02, 5.608e-02, -1.744e-01, -9.595e-02, -7.627e-02, -5.823e-02, -9.466e-02, 5.554e-02, -1.024e-01, -1.763e-01)); + r += mul(s1_3, M4(8.380e-02, -7.972e-02, 8.813e-02, 3.371e-02, 5.392e-03, 4.385e-02, 1.207e-02, -5.728e-02, -3.427e-03, -2.027e-03, 1.211e-03, -7.897e-03, 3.360e-02, 4.603e-02, -1.240e-02, -2.219e-02)); + r += mul(s1_4, M4(-6.699e-01, -3.512e-01, -2.153e-01, 3.218e-01, -5.100e-01, 4.324e-03, 2.713e-01, -2.073e-01, 1.547e-01, -2.123e-03, 7.928e-02, -5.698e-02, 2.450e-02, -4.866e-02, 9.436e-02, 7.900e-02)); + r += mul(s1_5, M4(1.609e-01, -7.910e-02, 1.112e-01, -2.959e-02, -3.877e-01, -2.803e-01, -1.071e-01, -6.881e-03, 1.922e-02, 2.433e-02, -3.581e-02, -5.264e-02, -3.287e-01, -1.037e-02, -6.159e-02, 8.219e-02)); + r += mul(s1_6, M4(-4.263e-02, -6.372e-02, 2.607e-02, 5.285e-02, -6.156e-02, -7.837e-02, 7.299e-03, 8.959e-02, -8.706e-03, -1.642e-02, 1.825e-02, 1.850e-02, 2.735e-02, 2.413e-02, -3.236e-02, -9.612e-03)); + r += mul(s1_7, M4(-5.849e-02, 1.530e-01, -6.767e-02, -1.392e-02, -3.430e-01, -1.851e-01, -1.013e-01, 2.465e-01, -1.715e-02, 4.970e-03, -1.850e-02, -4.214e-03, 1.889e-02, -5.787e-02, 7.154e-02, 9.237e-02)); + r += mul(s1_8, M4(-2.084e-02, -2.484e-01, 5.767e-02, -2.550e-02, -9.126e-02, 4.292e-01, 1.983e-02, 2.979e-01, -3.807e-03, -3.367e-03, 1.835e-03, 8.694e-03, -9.074e-02, 4.820e-02, -2.886e-02, 5.975e-02)); + r += V4(5.508e-03, 4.690e-03, -5.708e-04, -7.674e-03); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 4 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.841e-04, -5.677e-02, 9.249e-03, -8.726e-03, 4.041e-02, -1.295e-01, 1.154e-01, 2.765e-02, 1.833e-01, -8.427e-02, 1.078e-01, -1.432e-01, 1.068e-01, -1.222e-01, 2.535e-02, 5.316e-02)); + r += mul(s0_1, M4(-3.609e-03, 5.812e-02, -4.650e-02, -2.093e-02, -3.442e-02, 7.643e-02, 1.424e-02, 7.195e-02, 1.552e-01, -8.291e-01, 1.547e-01, 4.354e-01, -2.851e-02, 1.023e-01, -8.481e-03, -6.567e-02)); + r += mul(s0_2, M4(1.724e-02, -1.165e-02, 1.007e-02, -3.008e-02, -9.814e-04, -2.007e-02, -5.905e-03, 6.714e-03, -1.736e-01, 2.035e-01, -1.333e-01, 1.250e-01, -9.118e-03, -4.989e-02, 2.142e-02, -4.038e-03)); + r += mul(s0_3, M4(7.885e-02, -8.350e-02, -6.025e-03, -1.139e-01, -8.380e-02, -6.836e-02, -5.589e-01, -4.614e-01, -6.742e-01, 2.118e-01, -4.442e-01, 2.197e-01, -5.873e-02, 1.902e-01, -4.687e-01, -4.712e-01)); + r += mul(s0_4, M4(-4.506e-01, 2.396e-01, -1.350e-02, 4.072e-01, 3.249e-01, 9.930e-02, 1.576e-02, -2.456e-01, 1.506e+00, 6.047e-02, 8.841e-01, -1.927e+00, -4.337e-01, -5.801e-01, 3.334e-01, 8.276e-02)); + r += mul(s0_5, M4(5.049e-02, -1.870e-01, 7.413e-02, -2.569e-02, -2.152e-02, 1.139e-01, -3.874e-02, 1.634e-02, -1.325e-01, 4.002e-02, -1.874e-01, 1.204e-01, 2.267e-02, 1.380e-02, -1.055e-02, 5.504e-02)); + r += mul(s0_6, M4(-2.855e-02, 1.255e-02, 3.941e-02, 4.466e-03, 4.814e-05, -9.003e-03, 1.231e-01, 5.676e-02, 5.020e-02, -5.407e-02, -1.951e-01, 4.240e-02, 3.525e-02, -1.021e-01, 4.517e-01, 2.399e-01)); + r += mul(s0_7, M4(-5.781e-02, -4.964e-02, -3.981e-01, -1.716e-01, 3.430e-02, -1.644e-02, 2.352e-01, 1.938e-01, 1.266e-01, -1.061e-01, 7.754e-01, 5.337e-01, 2.664e-01, 3.669e-01, -1.113e+00, -1.742e-01)); + r += mul(s0_8, M4(2.948e-02, 3.723e-02, 2.739e-02, -5.215e-02, -1.542e-02, -2.173e-02, -1.944e-02, 1.856e-02, -4.535e-02, 1.163e-02, -5.014e-02, 8.660e-02, 1.421e-01, 2.314e-01, 1.171e-02, -4.975e-01)); + r += mul(s1_0, M4(-4.408e-02, -3.573e-02, 3.842e-02, 2.571e-02, 2.872e-01, -4.960e-01, 2.569e-01, -6.254e-02, 2.158e-02, -6.452e-02, 7.495e-02, 1.997e-02, 4.094e-02, -9.741e-02, 3.542e-02, -8.115e-03)); + r += mul(s1_1, M4(3.480e-02, 1.949e-04, 1.780e-02, 4.483e-02, -2.814e-01, 4.229e-01, -5.482e-02, 1.512e-02, -3.120e-02, 3.945e-02, 4.626e-02, 7.013e-02, -6.686e-03, 5.832e-02, -4.408e-02, -1.262e-02)); + r += mul(s1_2, M4(-9.847e-03, 1.973e-03, 1.457e-02, 2.290e-02, 4.741e-02, 2.270e-02, 8.902e-04, 1.152e-02, -2.473e-02, -1.948e-02, -3.475e-03, 4.431e-02, 2.044e-02, 1.571e-04, 9.470e-03, -2.825e-02)); + r += mul(s1_3, M4(5.918e-02, -1.939e-02, -4.628e-02, -7.774e-02, -3.040e-01, 8.634e-02, -5.254e-01, -6.906e-01, -1.218e-01, -6.178e-02, -3.115e-01, -2.697e-01, -2.402e-02, -2.149e-02, -3.878e-01, -3.453e-01)); + r += mul(s1_4, M4(2.920e-01, 3.711e-01, -2.753e-01, -4.654e-02, 1.379e-01, 3.908e-01, -4.798e-01, 6.668e-01, 4.870e-01, -1.634e-01, -7.790e-02, -2.683e-01, -4.834e-01, -1.822e-02, -8.492e-03, 7.620e-02)); + r += mul(s1_5, M4(-4.786e-02, 2.412e-02, 4.992e-02, -1.913e-01, 9.058e-02, -4.485e-02, 8.249e-02, -9.418e-02, 3.555e-02, 3.543e-01, -1.140e-01, -1.358e-01, 5.079e-02, -2.007e-01, 6.132e-02, -2.373e-03)); + r += mul(s1_6, M4(6.553e-03, -7.804e-03, 8.569e-02, 4.875e-02, 5.085e-02, 1.728e-02, 6.949e-02, 1.313e-01, 1.825e-02, -5.557e-02, -7.548e-03, -5.534e-02, 7.059e-02, 4.382e-02, 2.807e-01, 1.919e-01)); + r += mul(s1_7, M4(-1.071e-01, -3.709e-02, -4.757e-01, -1.943e-01, 8.182e-02, -3.334e-02, 4.170e-01, 6.716e-02, 1.563e-01, 1.382e-01, 7.441e-01, 4.082e-01, -9.101e-02, -3.943e-02, -5.142e-01, -1.910e-01)); + r += mul(s1_8, M4(4.255e-03, 4.204e-02, 5.834e-02, -6.508e-02, -3.675e-02, 1.165e-02, -2.694e-02, -2.212e-02, -3.036e-02, -4.393e-02, 1.855e-03, 1.909e-01, 3.812e-02, 3.309e-02, 3.942e-02, -7.422e-02)); + r += V4(-1.734e-03, -1.825e-03, -1.635e-03, -1.665e-03); + return tanh(r); +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-3x4C-NVL-DN.hlsl b/src/Effects/CuNNy/CuNNy-3x4C-NVL-DN.hlsl new file mode 100644 index 000000000..6ec2927bd --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-3x4C-NVL-DN.hlsl @@ -0,0 +1,413 @@ +// CuNNy 3x4C BILINEAR RGB NVL DN - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-DN-D04N03 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0 + +#define l0(x, y) min16float((dot(float3(-2.683e-01, -5.217e-01, -1.382e-01), O(INPUT, float2(x, y)).rgb) + 7.973e-01)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(1.850e-01, -2.860e-02, -5.321e-01, 2.390e-03) * s0_0; + r += V4(-4.299e-01, -2.946e-02, -1.180e-01, -5.652e-02) * s0_1; + r += V4(-4.798e-01, -2.276e-02, 3.201e-02, 4.870e-02) * s0_2; + r += V4(2.783e-01, -2.262e-03, -1.864e-01, 1.793e-01) * s0_3; + r += V4(9.435e-04, 8.115e-01, 7.806e-01, -7.793e-01) * s0_4; + r += V4(2.180e-01, -2.564e-05, 2.774e-03, -7.015e-02) * s0_5; + r += V4(1.479e-03, -4.675e-02, 3.323e-02, 3.392e-01) * s0_6; + r += V4(1.203e-01, 1.509e-02, 5.239e-02, 3.194e-01) * s0_7; + r += V4(7.680e-02, -4.310e-02, -7.203e-02, 1.255e-02) * s0_8; + r += V4(3.156e-02, 7.379e-02, 1.078e-02, -5.510e-04); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.949e-01, -1.247e-01, -7.307e-02, 8.783e-02, -4.773e-02, 6.012e-02, 8.043e-02, -8.489e-02, 6.760e-02, -7.809e-02, -4.745e-02, -1.304e-02, -1.402e-01, -1.248e-01, 3.334e-01, -1.498e-01)); + r += mul(s0_1, M4(7.053e-02, 9.895e-02, 1.655e-01, 2.251e-01, 3.511e-02, -1.010e-01, -2.736e-01, 1.174e-01, -2.551e-01, 1.100e-01, 1.518e-01, -4.343e-02, -9.293e-01, 5.327e-01, -2.723e-01, 4.006e-01)); + r += mul(s0_2, M4(-2.390e-02, 8.154e-03, -2.332e-02, -3.708e-02, 2.814e-02, 5.506e-02, -2.627e-01, -8.081e-02, -1.062e-01, -6.819e-02, -9.498e-02, -2.749e-01, -2.457e-01, 6.868e-01, 6.527e-03, 7.676e-01)); + r += mul(s0_3, M4(2.704e-01, 4.055e-02, -4.756e-01, 2.506e-01, -9.498e-02, 5.838e-02, 1.733e-01, 3.420e-03, -7.051e-02, -8.233e-02, -3.006e-01, 6.824e-02, -1.308e-01, 1.196e-01, 2.560e-01, 8.304e-02)); + r += mul(s0_4, M4(4.190e-01, -1.207e-01, 2.708e-01, -6.375e-01, 1.740e-01, 1.955e-03, -1.816e-01, -7.933e-02, -9.308e-01, 1.333e-01, -1.335e-01, -1.401e-01, 3.447e-01, 3.389e-01, 6.660e-01, -3.387e-01)); + r += mul(s0_5, M4(7.310e-02, 1.403e-02, 8.114e-02, 7.400e-02, -2.552e-02, -1.607e-01, -1.208e-01, -3.943e-02, -2.743e-02, -7.229e-03, -1.749e-03, 3.062e-01, 1.429e-01, 8.105e-01, 3.562e-01, 4.580e-01)); + r += mul(s0_6, M4(2.115e-01, -1.686e-01, -1.948e-01, -1.191e-01, -5.798e-02, 3.493e-02, 8.264e-02, 1.579e-01, -1.081e-01, -1.775e-01, -8.196e-02, -2.085e-01, 6.791e-02, 1.652e-02, -4.933e-03, 2.833e-02)); + r += mul(s0_7, M4(-2.160e-01, -3.858e-01, -8.407e-01, -1.091e-01, 8.415e-03, 8.626e-02, 2.340e-01, 9.177e-02, -4.697e-01, -6.623e-02, -5.176e-01, 6.762e-02, -3.437e-03, 6.570e-02, 7.630e-02, 8.988e-02)); + r += mul(s0_8, M4(6.527e-02, -6.320e-02, 1.192e-02, -1.196e-01, -1.605e-02, -9.294e-03, 1.955e-01, -2.356e-02, -3.582e-02, 1.377e-02, 9.253e-02, -2.362e-02, 3.578e-02, 1.822e-01, 3.329e-01, 1.489e-01)); + r += mul(s1_0, M4(1.154e-01, -1.822e-01, -2.122e-01, 3.031e-02, 6.550e-01, -4.855e-02, 6.554e-02, 4.432e-02, 1.671e-02, -4.477e-02, -9.428e-03, 4.413e-03, -3.185e-02, -1.529e-01, -1.222e-01, 6.523e-02)); + r += mul(s1_1, M4(-4.920e-02, -1.697e-02, 4.141e-02, 1.997e-01, 6.972e-01, -5.157e-01, 2.031e-01, 2.829e-02, -5.005e-02, 2.335e-01, 2.985e-01, 6.871e-02, -5.232e-01, 2.146e-02, -1.418e+00, 2.193e-01)); + r += mul(s1_2, M4(-6.472e-02, 2.595e-02, -2.610e-02, -2.279e-02, 4.165e-01, -7.745e-01, 1.261e-01, -3.845e-01, 3.279e-02, 2.445e-02, 1.796e-01, -2.581e-01, -3.838e-01, 6.280e-02, -4.893e-01, -1.475e-01)); + r += mul(s1_3, M4(9.330e-02, 1.742e-01, -1.685e-01, 2.376e-02, -9.586e-01, -1.236e+00, -7.271e-01, -7.674e-01, 2.500e-01, -3.709e-02, -1.303e-01, 1.490e-01, -2.746e-01, -1.376e-01, -2.321e-02, -1.967e-02)); + r += mul(s1_4, M4(3.660e-01, 4.772e-02, 5.524e-01, -2.804e-01, -2.756e+00, -1.336e+00, 2.038e-01, 2.593e+00, 2.156e-01, 3.281e-01, 3.152e-01, 8.064e-01, 3.970e-01, -1.379e-01, -7.518e-02, -2.723e-01)); + r += mul(s1_5, M4(5.214e-03, 1.695e-02, 1.024e-01, 1.333e-01, -2.250e-01, -1.298e+00, 4.673e-01, 1.317e+00, 3.036e-01, -1.273e-01, 2.900e-01, 2.249e-02, -1.870e-01, -1.124e-01, -5.879e-01, 6.314e-02)); + r += mul(s1_6, M4(-8.225e-02, -1.149e-01, 1.598e-04, -3.662e-01, -8.572e-02, -8.909e-01, 9.891e-02, 1.818e-01, 1.715e-01, -2.348e-01, 1.178e-01, -6.289e-02, 1.522e-02, 1.973e-02, 3.707e-02, 2.911e-02)); + r += mul(s1_7, M4(-6.380e-02, 8.661e-02, -2.666e-01, 9.586e-02, -1.257e+00, -2.231e+00, -1.232e+00, 5.642e-01, 5.730e-02, -3.294e-01, -1.151e-01, 2.382e-01, 4.529e-02, 4.927e-02, 9.893e-02, 8.365e-02)); + r += mul(s1_8, M4(1.906e-02, -8.920e-02, 8.931e-02, -6.752e-02, -3.680e-01, -1.282e+00, -1.388e-01, -7.545e-02, 6.262e-02, -1.695e-01, 2.278e-01, -3.066e-01, -7.412e-02, 1.145e-02, 4.667e-02, -4.205e-04)); + r += V4(1.427e-02, -1.982e-02, 4.114e-03, -2.883e-02); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t1 +//!OUT t0 + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.965e-01, -1.919e-01, 9.202e-02, 8.775e-03, -4.948e-02, 1.061e-01, -3.754e-02, -1.900e-01, -2.114e-01, 1.267e-01, 1.989e-02, 2.570e-02, 4.634e-03, -2.718e-01, 2.171e-01, 1.512e-01)); + r += mul(s0_1, M4(-5.527e-01, -4.825e-01, 4.325e-01, 4.447e-01, -6.577e-02, 5.161e-01, 3.286e-02, -3.800e-01, 2.625e-02, 3.835e-01, -7.794e-02, -5.489e-02, -2.647e-01, -4.952e-01, 1.587e-01, 1.471e-01)); + r += mul(s0_2, M4(-3.687e-01, -1.096e-01, 1.849e-01, -6.915e-02, 2.257e-01, 2.760e-01, -8.875e-02, -8.871e-02, -8.394e-02, -6.714e-02, 5.322e-03, -3.252e-01, -7.885e-02, -2.723e-01, 6.149e-02, 2.998e-01)); + r += mul(s0_3, M4(1.606e-01, -1.199e-01, 3.573e-01, 2.833e-02, 6.514e-03, -2.242e-02, -6.231e-02, 6.702e-02, -8.717e-02, -2.227e-01, -1.626e-01, 5.313e-02, -1.411e-01, -2.445e-02, 1.194e-01, -1.101e-01)); + r += mul(s0_4, M4(-1.127e+00, 1.823e-01, 1.358e-01, -1.618e-01, -4.171e-04, -7.771e-02, 2.147e-01, 6.493e-01, 4.989e-01, 3.955e-01, -1.017e-01, -2.861e-01, 3.878e-01, -6.653e-01, -4.968e-01, -5.063e-01)); + r += mul(s0_5, M4(-2.270e-01, -3.965e-01, -2.794e-02, 1.487e-01, -2.667e-01, -1.410e-02, 1.475e-01, -4.992e-01, -1.071e-01, 2.096e-01, 1.159e-01, -6.073e-02, -7.157e-02, -2.446e-01, -4.807e-02, 1.968e-01)); + r += mul(s0_6, M4(8.199e-02, 8.336e-02, -3.090e-02, -1.287e-02, -6.954e-02, -7.544e-02, 1.272e-01, 7.930e-02, -3.647e-02, -2.685e-02, -4.235e-02, 3.214e-02, -4.526e-02, 1.479e-01, -4.963e-02, -3.035e-02)); + r += mul(s0_7, M4(-2.012e-02, -1.497e-02, -2.952e-01, -6.026e-02, 2.135e-03, 2.979e-02, -2.713e-02, 7.951e-03, -8.069e-02, -2.374e-01, 1.865e-01, 1.048e-01, -9.076e-02, 6.683e-02, 9.576e-02, -2.432e-02)); + r += mul(s0_8, M4(1.455e-01, 2.613e-01, -1.616e-01, -3.564e-01, 1.229e-01, -3.778e-02, 3.316e-02, 5.927e-02, -1.831e-01, -1.388e-01, 5.986e-02, 2.083e-02, -1.368e-03, 2.394e-01, -1.623e-01, -2.768e-02)); + r += mul(s1_0, M4(7.711e-03, -6.696e-04, -3.229e-02, 1.549e-02, -1.596e-01, 2.068e-01, -6.162e-02, -9.571e-02, -1.500e-01, 1.743e-01, 2.746e-02, -5.845e-02, -7.649e-03, -4.265e-03, 4.154e-03, 3.950e-03)); + r += mul(s1_1, M4(2.764e-01, -4.505e-02, 4.280e-02, 6.044e-02, 3.396e-02, 2.750e-01, -1.910e-01, -2.153e-01, 9.633e-02, -2.194e-02, -2.131e-01, -1.181e-01, -1.343e-01, 6.123e-02, 1.904e-02, -6.568e-02)); + r += mul(s1_2, M4(-3.643e-01, -1.709e-02, 1.528e-01, -1.405e-01, 3.307e-01, -1.979e-03, -1.819e-01, 7.635e-02, 1.266e-01, 2.162e-01, -7.492e-02, -9.075e-02, 4.120e-02, 1.521e-01, -2.790e-03, -4.330e-02)); + r += mul(s1_3, M4(1.913e-02, -5.373e-02, 5.748e-02, -1.443e-02, -2.776e-01, -1.162e-01, -1.994e-01, 1.430e-01, 9.058e-02, -3.720e-02, -3.585e-02, -8.516e-02, -2.228e-02, 7.507e-02, -9.620e-02, -1.013e-01)); + r += mul(s1_4, M4(-3.592e-01, 1.415e-01, 1.018e+00, -1.555e-01, 5.378e-01, 8.818e-02, 2.190e-01, 1.997e-01, -1.128e-01, 3.331e-02, -1.410e-01, 2.844e-01, 4.756e-01, -5.850e-02, -3.757e-01, -1.716e-01)); + r += mul(s1_5, M4(2.636e-02, -3.596e-01, -3.280e-01, 2.027e-01, 3.000e-01, -2.297e-01, 4.282e-02, 1.776e-01, 5.222e-02, 1.751e-01, 4.529e-02, -8.347e-02, -3.409e-01, -2.640e-01, 1.753e-01, -5.672e-01)); + r += mul(s1_6, M4(-1.699e-02, 4.941e-02, -2.642e-02, -1.406e-04, -1.655e-01, -1.464e-02, -4.353e-02, 1.946e-01, 6.067e-02, -1.429e-01, 1.170e-01, -4.644e-02, -6.567e-02, -2.264e-02, 6.666e-02, 9.009e-02)); + r += mul(s1_7, M4(7.805e-02, 2.173e-02, -3.276e-01, 2.004e-03, -7.789e-02, -1.466e-02, -1.560e-01, -1.126e-01, -3.823e-02, -2.446e-03, 1.465e-01, -2.744e-01, -2.129e-01, -2.141e-02, 4.456e-01, 1.240e-01)); + r += mul(s1_8, M4(1.315e-02, 2.686e-01, -1.987e-01, -2.093e-01, 3.184e-02, -8.723e-02, 3.012e-01, 3.580e-01, 1.198e-02, -2.655e-01, 1.455e-01, 7.602e-02, -4.605e-02, 3.276e-01, -2.036e-01, -2.590e-01)); + r += V4(-1.292e-02, 8.156e-04, -2.055e-03, -3.100e-03); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.151e-02, -4.754e-02, 3.454e-02, -1.338e-03, -4.337e-02, 4.608e-02, -1.116e-01, -2.296e-02, -2.839e-02, -3.878e-01, -2.317e-02, 5.774e-02, 4.317e-03, 6.680e-02, 6.325e-02, -1.449e-01)); + r += mul(s0_1, M4(-1.173e-01, -8.942e-02, -1.017e-01, 6.496e-02, 5.558e-02, 2.788e-02, 2.184e-02, -2.837e-03, -1.057e-01, -2.075e-01, -3.255e-02, -1.297e-02, -2.643e-02, -1.695e-02, -9.425e-02, 3.942e-02)); + r += mul(s0_2, M4(-1.773e-02, -4.118e-02, -2.141e-02, 4.282e-02, 4.234e-02, -1.221e-02, -3.375e-03, 4.469e-02, -2.586e-01, -1.112e-01, -7.688e-02, 3.426e-02, 8.170e-02, -2.355e-02, -3.737e-02, 3.004e-02)); + r += mul(s0_3, M4(2.192e-01, 1.955e+00, 2.012e-01, -2.598e-02, -7.453e-02, 5.510e-02, -1.517e-01, -2.571e-01, -2.182e-02, -2.345e-02, -5.767e-02, -5.534e-02, -1.996e-02, 2.329e-01, 4.447e-04, -1.111e-01)); + r += mul(s0_4, M4(3.476e-01, -4.368e-01, -1.180e-01, 5.371e-01, 5.294e-01, 1.509e-01, 2.456e-01, -7.875e-02, 2.055e-01, 9.732e-02, 1.285e-01, 5.178e-01, 3.256e-01, -2.842e-01, 4.421e-02, 3.426e-01)); + r += mul(s0_5, M4(6.119e-01, -1.393e-01, -1.144e-02, 2.438e-01, -5.126e-02, -1.049e-01, -7.847e-02, 9.942e-02, 5.371e-01, 9.985e-02, 9.193e-02, -3.067e-02, -1.962e-01, -4.272e-02, -7.821e-03, 2.557e-02)); + r += mul(s0_6, M4(1.224e-02, -5.098e-01, 3.052e-01, 5.332e-01, 2.249e-01, 4.201e-02, 5.423e-01, 1.106e-01, -1.056e-02, -4.091e-03, -1.267e-02, -5.280e-02, 1.898e-02, 9.430e-03, 1.470e-02, 7.235e-02)); + r += mul(s0_7, M4(-4.342e-01, 2.385e-01, -3.834e-02, -7.654e-02, -9.043e-01, -3.139e-01, -1.511e-01, 3.800e-01, -8.848e-02, -3.911e-02, -7.025e-03, -1.196e-02, -3.322e-03, -1.455e-01, 2.084e-02, 1.106e-01)); + r += mul(s0_8, M4(1.382e-01, -1.894e-01, -8.814e-02, 1.373e-01, 1.362e-01, -1.298e-01, -1.007e-01, 1.166e-01, -1.553e-02, 8.530e-02, 2.744e-02, -1.083e-01, -5.606e-02, 5.965e-02, 1.406e-02, -4.496e-02)); + r += mul(s1_0, M4(-4.828e-03, -1.035e-01, -5.021e-02, 1.972e-02, -9.942e-03, -3.057e-01, -7.373e-03, 4.274e-02, -3.475e-03, 4.653e-02, 9.115e-03, -5.794e-02, 1.170e-02, 1.322e-01, 1.195e-01, -2.535e-02)); + r += mul(s1_1, M4(-5.424e-02, -1.541e-01, -9.945e-02, 8.862e-02, -1.198e-01, -3.591e-05, 4.305e-02, -1.079e-01, 1.605e-02, -3.377e-02, -5.398e-02, 1.201e-02, 3.432e-02, 1.090e-02, 8.871e-02, 3.186e-02)); + r += mul(s1_2, M4(-1.108e-01, -3.481e-02, -1.616e-02, -4.136e-03, -3.382e-02, 1.836e-02, -3.071e-02, -3.186e-02, -1.014e-01, -1.412e-01, -7.790e-02, 9.763e-02, -1.624e-02, -2.520e-02, -2.152e-02, 2.524e-02)); + r += mul(s1_3, M4(3.337e-03, -1.439e-02, 2.317e-03, 2.097e-01, 5.091e-03, 4.138e-02, -5.988e-02, -2.348e-02, -5.626e-03, 1.695e-02, 2.371e-02, -1.652e-02, 8.541e-02, -1.851e-01, 1.130e+00, -1.181e-01)); + r += mul(s1_4, M4(1.184e-01, -3.385e-02, 2.659e-02, 3.233e-01, 2.333e-01, 1.694e-01, 1.915e-01, 1.162e-01, 4.309e-02, -3.793e-02, 1.412e-01, -1.345e-02, -6.074e-01, -2.408e-01, -1.306e-01, 1.033e-01)); + r += mul(s1_5, M4(3.452e-01, 1.401e-01, 3.650e-02, -4.950e-02, 1.755e-01, -1.210e-01, -1.041e-02, 1.281e-01, 4.262e-01, 2.166e-02, 3.851e-02, 1.295e-01, -1.910e-01, -2.029e-02, -2.151e-02, -1.537e-02)); + r += mul(s1_6, M4(4.989e-03, -5.730e-02, 5.803e-02, 2.946e-02, 1.825e-02, 2.660e-02, -4.900e-03, 3.848e-03, 1.078e-02, 1.823e-02, -4.751e-03, 4.219e-02, -1.024e-01, 7.721e-02, -6.709e-01, 8.423e-02)); + r += mul(s1_7, M4(-1.567e-01, 4.125e-02, -2.721e-02, -1.831e-01, 9.470e-03, -1.205e-01, 1.793e-02, 1.160e-01, -4.874e-02, -4.902e-02, -1.479e-01, 7.102e-02, 6.699e-01, -1.383e-01, 1.314e-01, 2.999e-01)); + r += mul(s1_8, M4(-2.625e-01, -9.735e-02, -6.038e-02, 3.588e-03, 2.247e-02, 4.993e-02, 1.171e-02, -2.071e-02, 2.066e-01, 2.852e-01, -5.781e-02, -3.231e-01, 6.922e-02, 8.960e-02, 9.107e-02, -2.880e-02)); + r += V4(3.045e-03, 3.707e-03, -6.011e-03, -5.162e-03); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 5 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t1 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.116e-01, 1.402e-01, 1.439e-02, 5.091e-02, -1.526e-02, -2.562e-02, -1.193e-02, -1.365e-02, -6.156e-02, -3.463e-02, 2.155e-02, -2.192e-02, -2.937e-02, -1.072e-01, -4.538e-02, -3.302e-02)); + r += mul(s0_1, M4(-1.192e-02, -1.724e-02, 9.899e-03, -5.861e-03, -1.552e-02, 2.422e-02, 4.929e-03, 7.339e-03, 4.700e-02, 1.993e-01, -6.323e-02, 5.778e-02, 1.499e-01, 3.916e-01, -4.578e-02, -2.026e-02)); + r += mul(s0_2, M4(5.431e-03, 1.916e-03, -2.064e-03, -6.545e-04, -1.731e-02, -8.081e-02, 1.391e-02, -7.036e-03, 7.739e-02, -1.588e-01, 2.970e-02, 3.357e-02, 3.869e-02, -7.824e-02, 1.813e-02, -6.252e-02)); + r += mul(s0_3, M4(5.283e-01, 8.076e-02, 3.430e-01, 2.332e-01, -3.540e-02, 1.903e-02, -1.354e-02, -1.415e-02, -1.644e-01, -1.319e-02, -9.781e-02, -3.256e-02, 2.768e-02, -3.914e-02, 1.596e-01, -1.067e-01)); + r += mul(s0_4, M4(-1.638e-02, 4.385e-01, -1.479e-01, -1.789e-02, -1.399e-01, -5.884e-02, -7.306e-02, -2.036e-03, 5.196e-01, -1.849e-01, 8.771e-01, 3.595e-01, -7.094e-01, 2.485e-02, -3.977e-02, 7.246e-01)); + r += mul(s0_5, M4(-1.647e-03, -6.027e-03, -3.787e-03, -1.975e-02, -4.810e-02, -4.557e-01, 4.921e-02, -1.313e-01, -2.044e-02, 3.533e-01, -7.591e-02, 1.249e-02, 2.648e-02, -5.215e-01, 1.204e-01, -2.254e-01)); + r += mul(s0_6, M4(-2.852e-02, -1.630e-02, 1.249e-01, -1.758e-02, 4.285e-02, 1.425e-02, -1.595e-02, 2.618e-02, 4.460e-03, 1.266e-02, -3.914e-02, 1.111e-02, 5.378e-02, 2.199e-02, 2.561e-03, 2.125e-02)); + r += mul(s0_7, M4(-6.567e-02, -4.333e-02, -4.153e-03, 1.692e-01, 5.376e-02, 5.736e-02, -1.860e-01, -9.094e-02, 3.357e-02, -3.186e-02, 1.244e-01, -9.606e-02, 6.227e-02, 6.827e-02, -2.086e-01, -6.625e-02)); + r += mul(s0_8, M4(4.553e-05, -3.116e-02, 1.023e-02, 2.322e-02, 8.623e-02, 1.125e-01, 2.802e-02, -2.768e-01, -1.003e-01, -2.143e-02, -2.413e-02, 1.460e-01, 5.421e-02, 5.798e-02, 3.478e-03, -1.421e-01)); + r += mul(s1_0, M4(2.165e-01, 1.123e-01, -3.653e-02, -6.070e-03, -1.021e-01, -6.901e-04, 6.256e-03, -3.182e-03, -4.285e-02, -6.763e-02, 2.278e-02, -1.860e-02, -2.689e-02, 2.567e-02, 2.634e-03, 3.600e-02)); + r += mul(s1_1, M4(-1.159e-01, -1.198e-01, 2.991e-02, -6.143e-02, 1.038e-01, -5.076e-02, -1.785e-02, -3.611e-02, 6.860e-02, 9.302e-02, -1.125e-02, 3.332e-02, 6.457e-02, -3.919e-02, 4.158e-03, -1.201e-02)); + r += mul(s1_2, M4(-6.554e-03, 3.359e-02, -2.003e-02, -2.227e-04, 3.354e-02, -3.700e-02, -9.588e-03, -3.740e-02, -1.336e-02, -2.556e-04, -4.733e-03, -1.636e-02, 1.127e-02, 1.421e-02, -1.019e-02, -2.731e-02)); + r += mul(s1_3, M4(3.642e-01, -3.756e-03, 6.584e-01, 1.773e-01, -1.638e-02, 1.109e-02, -7.427e-02, -1.572e-02, -1.869e-01, -3.059e-02, -8.088e-02, -5.092e-02, -5.794e-02, -4.431e-02, -7.912e-02, -9.767e-02)); + r += mul(s1_4, M4(-3.255e-02, 3.115e-01, -2.109e-01, 2.804e-01, -6.504e-01, -1.342e-02, 1.355e-01, 3.623e-01, 5.142e-01, 2.124e-01, 1.866e-01, 2.268e-01, -2.470e-02, 1.629e-01, 1.163e-01, 1.663e-01)); + r += mul(s1_5, M4(-1.093e-02, -1.640e-04, -3.502e-02, -3.746e-02, 1.836e-02, -5.959e-01, 1.323e-01, -2.388e-01, 3.482e-02, 1.823e-01, -3.895e-02, 5.164e-03, -7.314e-02, -3.897e-01, 6.275e-02, -3.974e-02)); + r += mul(s1_6, M4(7.922e-03, -3.284e-02, 1.274e-01, -2.930e-02, 6.307e-02, 2.548e-02, -4.094e-02, 2.130e-02, -1.123e-02, 1.824e-03, -9.595e-02, 1.808e-02, 7.955e-02, 3.285e-02, 4.592e-02, 7.153e-02)); + r += mul(s1_7, M4(-6.410e-02, -1.423e-02, -4.912e-02, 1.461e-01, 6.612e-02, 9.838e-02, -2.153e-01, -1.067e-01, -1.108e-02, -1.048e-01, 2.778e-01, -1.116e-01, 4.569e-02, 2.955e-02, -1.440e-01, -3.364e-02)); + r += mul(s1_8, M4(1.721e-02, 1.171e-02, 1.096e-02, -2.832e-02, 7.446e-02, 4.785e-02, 8.270e-03, -1.640e-01, -8.912e-02, -6.617e-02, 3.225e-03, 9.894e-02, 4.367e-02, 8.102e-02, -1.779e-02, -2.410e-01)); + r += V4(1.708e-05, 2.435e-04, 1.267e-03, 1.926e-03); + return tanh(r); +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-3x4C-NVL.hlsl b/src/Effects/CuNNy/CuNNy-3x4C-NVL.hlsl new file mode 100644 index 000000000..adfc942e2 --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-3x4C-NVL.hlsl @@ -0,0 +1,413 @@ +// CuNNy 3x4C BILINEAR RGB NVL - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-D04N03 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0 + +#define l0(x, y) min16float((dot(float3(6.094e-01, 1.148e+00, 2.568e-01), O(INPUT, float2(x, y)).rgb) + -1.542e+00)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-6.372e-02, 1.685e-01, -2.573e-02, -2.185e-02) * s0_0; + r += V4(-3.502e-02, -2.984e-03, 5.048e-02, -2.445e-01) * s0_1; + r += V4(9.644e-02, -7.557e-03, -1.770e-02, 3.162e-02) * s0_2; + r += V4(7.199e-02, -6.233e-01, -4.180e-01, 1.392e-01) * s0_3; + r += V4(-5.683e-01, 1.451e-01, -8.148e-02, 9.768e-02) * s0_4; + r += V4(4.702e-01, -1.319e-03, 3.745e-03, -4.204e-02) * s0_5; + r += V4(9.855e-03, 3.213e-01, 5.098e-01, 4.001e-02) * s0_6; + r += V4(8.216e-02, -1.219e-02, -3.347e-02, 5.017e-02) * s0_7; + r += V4(-6.691e-02, 5.417e-03, 1.235e-02, -9.640e-03) * s0_8; + r += V4(-4.952e-03, -2.750e-03, -9.137e-04, 6.736e-02); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.169e-01, 3.467e-01, -2.365e-01, 2.253e-01, 6.307e-02, 1.727e-01, -1.053e-01, 9.324e-02, -4.901e-02, -2.112e-01, 8.983e-02, -1.851e-01, -1.987e-01, 6.645e-02, 2.188e-02, 1.988e-02)); + r += mul(s0_1, M4(4.393e-02, 2.078e-01, -1.967e-01, 4.673e-02, -7.991e-02, 2.461e-01, -6.028e-02, 9.252e-02, 3.871e-01, 6.138e-02, -3.603e-01, -1.485e-01, 2.466e-01, 5.251e-02, -6.181e-02, 8.932e-02)); + r += mul(s0_2, M4(-1.707e-02, 2.598e-02, 1.641e-02, 2.780e-02, 2.425e-02, 1.769e-01, -8.461e-02, 1.067e-01, -2.503e-01, 6.051e-01, -2.782e-01, 1.311e-01, -8.456e-03, -1.370e-02, -6.391e-02, 6.935e-02)); + r += mul(s0_3, M4(-8.251e-01, -4.981e-01, -1.726e-01, -1.815e-01, 1.411e-01, 2.889e-02, -3.115e-01, -3.255e-01, 1.812e-03, -4.529e-02, 2.350e-01, 1.999e-01, -1.993e-01, -1.868e-02, 4.249e-02, -1.117e-01)); + r += mul(s0_4, M4(-4.732e-02, -5.673e-02, 1.274e-01, 4.894e-02, 9.126e-02, 1.717e-01, -3.294e-01, -2.378e-01, -7.089e-02, -8.116e-02, 2.510e-01, 7.381e-02, 1.275e-01, 8.030e-02, -1.671e-01, -1.824e-02)); + r += mul(s0_5, M4(3.373e-02, -4.163e-02, -4.077e-02, -2.085e-02, 1.265e-01, -4.133e-01, 7.433e-02, 7.763e-02, -1.466e-01, 3.291e-01, -7.784e-02, 9.472e-02, 2.725e-01, -2.393e-01, -6.913e-02, -9.445e-02)); + r += mul(s0_6, M4(3.043e-02, -9.985e-02, 1.538e-01, -2.529e-01, 2.379e-01, 1.079e-01, -1.517e-01, -9.289e-02, -1.396e-01, -4.354e-02, 8.463e-02, 7.052e-02, 5.629e-02, 3.293e-03, 5.342e-02, -1.606e-01)); + r += mul(s0_7, M4(3.626e-02, -1.421e-01, 4.017e-02, -3.963e-02, 2.148e-03, 5.522e-02, 3.174e-01, 2.270e-02, -5.590e-02, -9.875e-02, -1.683e-01, 5.415e-02, 1.509e-01, 7.709e-02, -1.161e-01, 1.440e-01)); + r += mul(s0_8, M4(-1.132e-02, 2.337e-02, 1.264e-02, 2.638e-03, -6.582e-02, -1.965e-01, 2.803e-01, 1.333e-01, 9.171e-02, 1.567e-01, -2.419e-01, -1.602e-01, -2.271e-01, 3.614e-02, 2.179e-01, 4.826e-02)); + r += mul(s1_0, M4(1.452e-01, 1.313e-01, -6.140e-02, 2.412e-01, -3.691e-02, 7.355e-02, -4.209e-02, 1.343e-01, -2.509e-02, -1.266e-01, 9.017e-02, -1.854e-02, -4.280e-01, -1.004e-01, 2.319e-01, 4.211e-02)); + r += mul(s1_1, M4(4.894e-02, 7.564e-02, -9.350e-02, 5.422e-02, -6.111e-02, 6.969e-02, -4.398e-02, 6.622e-02, 7.113e-01, 3.461e-01, -5.254e-01, -8.808e-02, 4.481e-01, 3.171e-01, -2.198e-01, 1.048e-01)); + r += mul(s1_2, M4(-3.483e-02, 3.150e-03, 2.215e-02, 2.616e-02, 1.468e-01, -1.295e-01, -1.470e-01, 3.371e-02, -4.514e-02, 4.677e-02, -1.313e-01, -1.176e-01, 1.507e-03, 2.290e-01, -2.163e-01, 3.895e-02)); + r += mul(s1_3, M4(-2.258e-01, -1.353e-01, -4.873e-01, -1.236e+00, 1.660e-01, -1.803e-02, -2.797e-01, -4.092e-01, -1.525e-01, -8.178e-02, 2.665e-01, 3.652e-01, -1.853e-01, -3.819e-02, 1.627e-01, -3.896e-01)); + r += mul(s1_4, M4(-1.005e-01, -3.821e-02, 9.917e-02, -1.324e-01, -2.040e-01, -3.586e-01, 9.776e-02, -1.376e-01, 2.065e-01, 2.017e-01, -1.320e-01, -2.225e-02, 2.944e-01, 5.393e-02, -4.301e-01, -7.240e-02)); + r += mul(s1_5, M4(5.353e-02, -4.257e-02, -4.131e-02, -3.943e-02, -6.151e-02, 3.059e-01, -1.481e-02, 3.662e-01, 3.098e-02, -8.774e-02, 1.790e-02, -1.332e-01, 8.670e-02, -6.985e-02, -1.359e-01, 2.063e-01)); + r += mul(s1_6, M4(-9.271e-02, 2.259e-01, 2.200e-02, -2.390e-01, 3.258e-01, 1.082e-01, -1.499e-01, -3.063e-02, -2.775e-01, -9.008e-02, 1.294e-01, 3.533e-02, 1.011e-02, 4.294e-02, 4.935e-02, -1.005e-01)); + r += mul(s1_7, M4(1.321e-02, -7.160e-02, 7.229e-02, -3.050e-02, 4.303e-02, -1.518e-01, 5.137e-01, 4.029e-02, 4.896e-02, 5.334e-02, -3.545e-01, 2.370e-02, 1.645e-01, 3.433e-02, -9.552e-03, 1.032e-01)); + r += mul(s1_8, M4(8.370e-03, -2.408e-02, 2.693e-02, -8.183e-03, -2.375e-02, -2.973e-01, 1.889e-01, 1.096e-01, 1.093e-02, 2.310e-01, -1.613e-01, -1.343e-01, -1.718e-01, -2.165e-02, 1.384e-01, 9.956e-02)); + r += V4(-1.511e-02, -2.848e-03, 7.160e-03, -2.555e-03); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t1 +//!OUT t0 + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(6.983e-02, 8.935e-03, -1.644e-01, -4.232e-04, -1.981e-01, 9.265e-02, 1.769e-01, 1.705e-01, -2.300e-02, -7.408e-03, -4.221e-02, -1.617e-02, -6.026e-02, -9.185e-03, -7.420e-02, -4.238e-02)); + r += mul(s0_1, M4(1.832e-01, -1.117e-01, 1.784e-02, 6.345e-02, -9.651e-02, 5.753e-02, 1.480e-01, 1.284e-01, 3.957e-01, -2.684e-01, 2.853e-02, -5.823e-02, -8.184e-02, 1.062e-01, -2.604e-02, -7.579e-02)); + r += mul(s0_2, M4(-1.753e-01, 5.019e-03, -1.285e-01, 8.470e-02, -2.566e-01, 6.556e-02, -9.751e-02, 7.653e-03, -9.466e-02, 3.098e-02, -9.617e-02, -4.826e-02, 3.951e-02, -5.446e-02, 1.297e-01, 1.076e-01)); + r += mul(s0_3, M4(-7.377e-02, -2.183e-01, 9.806e-02, 1.735e-01, 2.795e-01, 3.730e-01, 1.906e-01, 1.313e-01, 2.115e-01, 2.222e-01, 1.880e-01, 2.427e-01, -1.177e-01, 2.587e-02, -1.928e-01, -1.489e-01)); + r += mul(s0_4, M4(-3.487e-01, -3.194e-01, 7.963e-01, -1.044e-01, 3.136e-01, -5.467e-02, 5.059e-01, -4.801e-02, -4.943e-01, -1.466e-01, -5.938e-02, -9.473e-01, 2.661e-01, -1.545e-01, 1.986e-01, -2.172e-02)); + r += mul(s0_5, M4(-3.450e-01, 1.931e-01, -2.303e-01, -1.880e-01, -1.323e-01, 1.839e-01, -1.130e-01, -5.181e-02, 3.049e-02, 9.834e-02, -1.342e-01, -1.072e-01, 1.925e-02, -9.652e-02, 1.169e-01, 2.084e-01)); + r += mul(s0_6, M4(1.543e-02, 2.202e-01, 4.809e-02, 1.085e-01, 3.076e-02, -4.127e-01, 4.606e-02, 9.444e-02, 7.886e-02, -1.314e-01, -1.638e-02, 4.353e-02, 9.790e-02, -6.783e-02, -1.008e-01, -1.558e-01)); + r += mul(s0_7, M4(-4.453e-02, 3.133e-01, -2.217e-01, -5.271e-02, -2.055e-01, -1.000e-01, 8.374e-02, 6.141e-02, 2.147e-02, -3.844e-01, -2.203e-01, -1.105e-01, -3.596e-02, 2.026e-01, 3.174e-01, 1.519e-01)); + r += mul(s0_8, M4(-5.107e-03, 2.380e-01, 2.147e-02, -8.032e-02, -9.743e-02, 6.943e-02, 9.403e-02, 3.742e-02, -1.822e-02, -4.950e-02, 7.963e-02, -1.338e-01, -1.491e-01, 1.655e-02, -5.817e-02, 1.164e-01)); + r += mul(s1_0, M4(8.679e-02, -7.335e-02, -5.999e-02, -4.504e-02, -3.329e-02, 4.349e-03, -4.883e-02, 3.159e-02, -7.948e-02, 3.308e-02, 6.579e-02, 1.607e-01, 1.336e-01, -1.042e-01, -2.368e-01, -1.546e-01)); + r += mul(s1_1, M4(2.764e-01, -6.665e-02, 1.661e-02, -4.103e-02, 1.095e-01, -1.159e-01, -1.142e-01, -1.412e-01, 4.033e-01, -8.697e-02, 2.387e-01, 1.762e-01, 4.948e-01, -1.533e-01, 7.816e-02, 5.700e-02)); + r += mul(s1_2, M4(1.187e-01, -6.571e-02, 4.698e-02, 4.931e-02, -5.523e-02, 3.925e-02, -7.453e-02, -8.429e-02, -2.202e-01, 6.090e-02, -1.460e-01, 2.777e-02, 4.405e-01, 6.445e-03, 3.494e-01, 3.311e-01)); + r += mul(s1_3, M4(-4.333e-02, -8.517e-02, 1.372e-01, 2.066e-01, 4.728e-01, 1.195e-01, -2.627e-01, -2.280e-01, 1.606e-01, 2.216e-01, 2.269e-01, 3.505e-01, -2.499e-01, -3.977e-01, -3.659e-02, 1.460e-02)); + r += mul(s1_4, M4(-4.640e-01, -7.221e-01, -2.524e-01, -6.513e-01, 6.699e-01, -1.727e-01, 4.444e-01, -3.115e-01, -6.748e-01, 1.063e-01, 6.487e-01, -3.195e-01, -5.136e-01, -8.272e-01, 4.014e-01, 4.914e-01)); + r += mul(s1_5, M4(-1.112e-03, -1.293e-02, 1.567e-02, -1.266e-01, 1.185e-01, 4.940e-02, -9.925e-02, -1.034e-01, -1.041e-01, 1.822e-01, -4.277e-02, 1.313e-01, -6.459e-01, -1.562e-01, -3.961e-01, -7.262e-02)); + r += mul(s1_6, M4(1.499e-02, 3.135e-01, 2.187e-01, 2.386e-01, 1.171e-01, -4.899e-01, -1.987e-01, -1.717e-01, 5.232e-02, -1.984e-01, 9.338e-04, 1.092e-01, 1.545e-01, 4.183e-01, 1.180e-01, 1.102e-01)); + r += mul(s1_7, M4(-1.411e-01, 2.619e-01, -2.549e-01, -2.113e-01, -1.109e-01, -3.038e-01, 7.579e-02, -3.585e-02, -1.373e-03, -2.713e-01, -5.527e-02, 7.052e-02, -1.648e-01, 7.324e-01, 3.974e-01, 2.306e-01)); + r += mul(s1_8, M4(-1.861e-02, 9.414e-02, -6.739e-02, -8.921e-02, -2.337e-02, -2.657e-02, -3.376e-03, -7.209e-02, -1.042e-01, -2.504e-02, 1.287e-01, -1.459e-02, -1.617e-01, 2.384e-01, -6.969e-01, -3.760e-01)); + r += V4(-3.514e-03, 2.350e-03, 2.221e-03, 1.089e-03); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.869e-01, 8.774e-02, -6.451e-02, 6.682e-02, 8.374e-02, 1.313e-02, -2.649e-02, 2.741e-02, -3.609e-02, -9.330e-02, -8.233e-02, 1.117e-01, -1.203e-01, 1.719e-02, 1.288e-01, -9.851e-02)); + r += mul(s0_1, M4(3.100e-01, 5.063e-02, 1.169e-01, -3.828e-02, 3.428e-01, 4.869e-02, -1.232e-02, -1.003e-02, 2.756e-01, 3.916e-01, 1.450e-01, 1.078e-01, -2.568e-01, -2.157e-01, -1.057e-01, -1.338e-01)); + r += mul(s0_2, M4(1.199e-01, -1.890e-01, 5.870e-03, -5.995e-03, 2.255e-01, -2.325e-03, 7.916e-03, -2.038e-02, 1.353e-01, -9.590e-02, -2.119e-02, -5.860e-02, -7.698e-02, -3.608e-02, -3.571e-02, 2.010e-02)); + r += mul(s0_3, M4(9.889e-02, -2.665e-02, -2.627e-01, 3.583e-01, 7.891e-02, 8.737e-02, 5.322e-02, 5.246e-04, -5.188e-02, -8.491e-02, -4.991e-02, -3.735e-02, 5.711e-02, 4.482e-02, 5.660e-02, -1.322e-01)); + r += mul(s0_4, M4(-5.488e-01, 2.898e-01, 1.046e+00, 6.036e-01, -3.180e-01, -6.309e-01, -2.627e-01, 1.734e-01, -2.067e-01, 3.775e-02, -2.881e-01, -9.242e-02, 3.369e-01, 2.554e-02, -1.645e-01, 4.973e-01)); + r += mul(s0_5, M4(6.976e-03, -1.830e-01, 2.842e-01, 2.570e-02, -2.902e-01, 5.059e-01, 1.944e-01, 1.794e-02, -1.333e-01, 2.341e-01, 4.161e-01, -5.179e-02, 8.176e-02, -2.435e-02, -1.598e-02, 6.211e-02)); + r += mul(s0_6, M4(-2.668e-02, -6.958e-02, -5.015e-02, 8.035e-02, 4.451e-02, -1.290e-03, -7.688e-02, 1.708e-01, -5.133e-02, -2.768e-02, -1.780e-02, -6.317e-02, -9.692e-03, -2.748e-03, 9.070e-03, -1.314e-01)); + r += mul(s0_7, M4(1.402e-01, 4.997e-02, -4.973e-02, 6.839e-01, 2.079e-02, -2.511e-02, 3.403e-01, -3.077e-01, -2.831e-02, 4.816e-02, -9.142e-02, -8.176e-02, -2.999e-02, -5.749e-03, -5.579e-02, -2.355e-01)); + r += mul(s0_8, M4(-1.783e-02, -2.882e-02, 9.841e-02, 4.473e-02, 4.128e-02, -3.071e-02, -2.378e-01, 1.347e-01, -2.285e-02, 1.317e-02, -1.632e-02, 1.058e-01, -3.696e-02, -6.864e-03, -8.989e-02, -7.315e-02)); + r += mul(s1_0, M4(8.857e-02, 3.169e-02, -1.896e-02, 1.258e-02, 7.086e-02, 5.699e-02, 1.550e-02, -1.836e-02, 1.209e-01, 5.334e-02, -1.557e-02, -2.374e-02, -1.411e-02, 1.543e-02, 1.769e-02, -4.332e-02)); + r += mul(s1_1, M4(1.199e-01, -8.203e-03, -1.695e-02, -3.214e-02, 5.918e-01, 3.458e-01, 7.684e-02, -5.137e-01, 2.827e-01, -2.008e-02, -1.848e-01, 2.147e-01, 7.212e-02, -3.906e-03, -2.220e-01, -1.918e-01)); + r += mul(s1_2, M4(4.464e-02, 4.035e-02, 4.265e-03, 1.350e-02, -4.623e-01, -1.882e-01, 9.929e-02, -2.295e-01, 2.010e-01, 6.059e-01, 3.648e-01, -1.670e-02, -6.763e-02, -2.588e-01, -1.741e-01, 3.358e-02)); + r += mul(s1_3, M4(1.003e-01, -2.961e-02, -1.715e-01, 1.057e-01, 3.275e-03, 1.877e-02, -4.995e-02, 1.181e-01, 3.600e-02, 2.101e-02, -1.050e-01, 8.035e-02, -8.107e-02, -1.067e-01, -5.457e-02, 5.339e-02)); + r += mul(s1_4, M4(3.875e-01, 3.638e-01, 1.178e-01, -4.404e-02, 6.128e-02, -1.193e-01, -3.161e-01, 3.510e-01, -3.482e-02, -2.842e-01, -3.917e-01, 4.525e-01, 1.969e-01, 5.299e-01, 4.720e-01, -2.266e-01)); + r += mul(s1_5, M4(-1.420e-02, 2.325e-02, -8.697e-02, -4.296e-03, 8.697e-02, 7.490e-02, 1.773e-01, 4.010e-01, 2.380e-01, -1.182e-01, 9.121e-01, 2.252e-01, 1.348e-01, -7.448e-02, -8.496e-01, -3.335e-01)); + r += mul(s1_6, M4(-7.923e-02, -2.533e-02, -4.896e-02, -5.473e-02, -5.329e-03, 1.285e-02, -1.763e-02, 7.009e-02, 9.670e-04, -1.889e-02, -1.008e-01, 1.149e-01, 7.259e-03, 4.080e-02, 1.042e-01, -2.627e-01)); + r += mul(s1_7, M4(-9.746e-02, 6.679e-02, -1.421e-01, -2.202e-01, -9.918e-03, -2.413e-02, -1.554e-02, 7.011e-03, -3.226e-02, -3.024e-02, -5.431e-02, 7.446e-02, 5.860e-02, 2.851e-02, -2.367e-01, 2.562e-02)); + r += mul(s1_8, M4(-4.627e-02, 4.226e-02, -8.654e-02, -3.312e-02, 1.600e-02, 2.983e-02, 8.834e-03, -3.871e-02, -4.137e-03, 1.767e-02, 2.492e-02, -5.391e-02, 8.133e-03, 1.430e-02, -2.428e-02, -1.132e-01)); + r += V4(-4.349e-03, -3.760e-03, 4.684e-03, 4.745e-03); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 5 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t1 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-4.783e-03, 7.235e-03, -7.275e-03, -2.802e-03, 4.921e-02, 7.543e-02, -3.357e-02, 1.213e-02, 2.900e-02, 2.380e-03, -9.028e-03, -2.594e-02, 1.576e-03, 3.334e-04, -2.460e-02, -1.285e-02)); + r += mul(s0_1, M4(4.582e-02, 9.378e-04, 2.217e-02, 5.083e-02, -1.054e-02, 8.518e-02, -1.884e-02, -5.149e-02, 1.983e-02, -1.106e-02, -4.317e-03, 5.384e-02, -5.193e-02, 1.089e-02, -9.384e-03, 3.137e-02)); + r += mul(s0_2, M4(-5.241e-03, 3.821e-02, -1.136e-02, -3.033e-02, 3.186e-02, -3.270e-03, 1.422e-02, 2.401e-02, -1.360e-02, 1.024e-01, -6.042e-02, -2.325e-02, -1.248e-01, -1.377e-01, 1.654e-02, -1.347e-02)); + r += mul(s0_3, M4(-3.552e-02, -3.211e-02, -2.282e-03, 1.775e-02, 1.360e-01, 2.808e-02, 1.082e-01, -1.311e-02, -1.699e-02, -2.628e-02, 3.430e-02, -3.880e-03, 2.514e-02, -3.171e-02, 4.675e-02, -2.711e-02)); + r += mul(s0_4, M4(4.756e-01, 2.686e-01, 4.514e-02, -8.813e-02, 2.636e-01, -4.893e-01, 1.301e-01, 1.304e-01, 3.778e-01, 2.765e-01, 3.369e-01, 8.811e-02, 5.080e-02, 2.783e-01, -1.131e-01, 2.487e-01)); + r += mul(s0_5, M4(-2.961e-02, 7.757e-02, -8.471e-02, -4.636e-02, -6.862e-02, 1.733e-01, -7.301e-02, -1.408e-02, 1.636e-02, 9.982e-02, 5.704e-02, 2.568e-01, -2.224e-02, -2.588e-01, -2.202e-01, -4.898e-01)); + r += mul(s0_6, M4(1.058e-01, -2.810e-02, -2.960e-02, -8.398e-02, -9.106e-02, 6.642e-02, -2.574e-02, 7.841e-02, -1.978e-02, -3.700e-02, -1.504e-02, -3.186e-02, 2.438e-03, 6.191e-03, -1.155e-02, -1.161e-02)); + r += mul(s0_7, M4(-6.316e-01, -7.748e-02, 8.006e-01, 3.936e-01, 1.300e-01, -1.999e-01, 2.351e-01, -7.485e-01, -7.151e-02, -4.285e-02, -2.277e-02, 2.849e-02, -2.207e-02, -2.585e-02, -2.498e-02, -3.308e-02)); + r += mul(s0_8, M4(-2.002e-01, -6.934e-01, -1.093e-01, 3.325e-01, -5.778e-02, 2.138e-02, -2.930e-02, 1.794e-01, -3.028e-03, 2.300e-03, 5.845e-03, -1.959e-02, 1.403e-02, 1.565e-02, 1.840e-02, -6.027e-04)); + r += mul(s1_0, M4(2.228e-02, -8.352e-03, -1.007e-02, -1.911e-02, -1.489e-02, 2.785e-03, -9.190e-03, 5.858e-03, 2.420e-02, -7.701e-03, -2.327e-02, -2.494e-02, -8.526e-03, -2.384e-02, -2.601e-02, -4.833e-02)); + r += mul(s1_1, M4(5.671e-02, 3.666e-02, 3.309e-02, 1.011e-02, -8.053e-03, 4.673e-02, -5.358e-02, -2.451e-02, 3.779e-01, 5.642e-02, -2.324e-01, -3.499e-02, -3.479e-01, 1.179e-01, -4.630e-02, 1.118e-01)); + r += mul(s1_2, M4(-1.650e-02, 6.203e-04, -1.322e-02, -1.996e-02, 2.118e-02, -9.244e-03, 2.813e-02, 9.773e-03, -2.654e-02, -8.373e-02, 6.663e-04, -6.860e-02, -3.436e-02, -7.207e-01, 2.389e-01, 1.903e-01)); + r += mul(s1_3, M4(-8.045e-02, -2.073e-02, 3.380e-02, 1.327e-02, 1.247e-01, 1.129e-02, 6.421e-02, -8.326e-03, -4.675e-02, 4.920e-02, -3.699e-02, 4.601e-02, 3.389e-02, -4.151e-02, 3.012e-02, -2.241e-02)); + r += mul(s1_4, M4(5.223e-01, 1.394e-01, 1.222e-01, -7.687e-03, -3.115e-01, 3.989e-02, -1.679e-01, 2.607e-01, 4.393e-01, -1.821e-01, 1.006e+00, -2.920e-01, 8.062e-02, 2.231e-01, -1.282e-02, 2.495e-01)); + r += mul(s1_5, M4(-1.146e-01, 6.738e-02, -1.655e-02, 1.178e-02, -3.058e-02, 1.093e-01, 9.367e-03, 1.382e-02, -7.397e-02, 2.300e-01, -4.202e-02, 1.765e-01, -4.671e-02, -1.375e-02, -3.662e-01, -5.254e-01)); + r += mul(s1_6, M4(5.090e-02, 8.633e-03, -1.128e-02, -3.186e-02, -6.263e-02, 4.143e-02, -2.214e-02, 5.270e-02, -1.370e-02, -1.692e-02, -2.644e-02, -9.847e-03, -2.147e-03, -7.941e-03, -1.323e-04, -5.173e-03)); + r += mul(s1_7, M4(-9.353e-02, 6.696e-02, 2.744e-01, 2.743e-01, 9.809e-02, -1.439e-01, -2.583e-02, -3.717e-01, -5.135e-02, -1.889e-02, -1.775e-02, 9.383e-03, -2.496e-02, -2.936e-02, -2.578e-02, -1.586e-02)); + r += mul(s1_8, M4(-1.565e-02, -1.635e-01, -1.800e-01, -2.607e-01, 1.975e-02, 1.594e-02, -4.568e-02, 1.218e-01, -6.668e-03, 7.923e-03, -4.625e-02, 1.324e-02, -6.838e-03, 2.045e-02, 1.141e-02, 2.717e-02)); + r += V4(7.204e-05, -6.226e-05, 2.867e-04, -3.251e-05); + return tanh(r); +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-4x16C-NVL-DN.hlsl b/src/Effects/CuNNy/CuNNy-4x16C-NVL-DN.hlsl new file mode 100644 index 000000000..47aae938f --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-4x16C-NVL-DN.hlsl @@ -0,0 +1,2223 @@ +// CuNNy 4x16C BILINEAR RGB NVL DN - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-DN-D16N04 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t2; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t3; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t4; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t5; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t6; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t7; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0, t1, t2, t3 + +#define l0(x, y) min16float((dot(float3(1.716e-01, 3.403e-01, 7.642e-02), O(INPUT, float2(x, y)).rgb) + -3.175e-01)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(1.961e-02, -2.468e-02, -6.114e-02, -2.802e-01) * s0_0; + r += V4(1.970e-01, -1.878e-01, -2.297e-01, -3.432e-01) * s0_1; + r += V4(2.960e-01, -8.674e-02, -2.113e-03, 9.338e-03) * s0_2; + r += V4(-3.926e-02, -7.021e-02, -5.343e-03, 1.575e-01) * s0_3; + r += V4(-1.290e-01, 9.192e-02, 5.665e-03, 3.820e-01) * s0_4; + r += V4(3.069e-02, -3.926e-02, 8.457e-03, 6.161e-02) * s0_5; + r += V4(-1.308e-02, -7.427e-02, 8.740e-02, 3.248e-02) * s0_6; + r += V4(-2.776e-01, 1.103e-01, 2.417e-01, -9.734e-03) * s0_7; + r += V4(-9.002e-02, -2.320e-01, -4.780e-02, -1.075e-02) * s0_8; + r += V4(-1.870e-03, 2.595e-02, 1.016e-02, -2.176e-06); + return r; +} + +V4 f1(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(8.902e-03, 5.085e-03, 1.299e-01, 3.149e-02) * s0_0; + r += V4(-5.949e-02, -8.876e-02, -4.402e-02, 1.477e-01) * s0_1; + r += V4(2.232e-01, 5.137e-01, 1.173e-03, 2.469e-01) * s0_2; + r += V4(-3.289e-03, -3.042e-02, 1.745e-01, -5.937e-02) * s0_3; + r += V4(-2.485e-02, -4.547e-01, 1.545e-02, -1.043e-01) * s0_4; + r += V4(-3.966e-02, 5.600e-02, -4.284e-03, 1.237e-02) * s0_5; + r += V4(-2.038e-02, 2.123e-02, -2.101e-02, -8.507e-02) * s0_6; + r += V4(-6.178e-03, -9.692e-03, -1.255e-02, -1.780e-01) * s0_7; + r += V4(5.415e-03, -1.418e-02, 8.939e-04, -8.693e-03) * s0_8; + r += V4(2.045e-02, 1.725e-04, 6.812e-02, 7.844e-03); + return r; +} + +V4 f2(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(8.013e-02, 2.823e-02, 4.626e-02, -3.119e-02) * s0_0; + r += V4(1.225e-01, -7.935e-02, 2.124e-01, 9.123e-03) * s0_1; + r += V4(-1.392e-01, 1.348e-02, 8.561e-03, 1.242e-02) * s0_2; + r += V4(-3.970e-01, -3.109e-01, -1.089e-02, -5.265e-01) * s0_3; + r += V4(1.932e-01, -8.691e-02, -2.152e-01, 5.156e-01) * s0_4; + r += V4(1.372e-01, 7.844e-02, -2.459e-02, -1.269e-03) * s0_5; + r += V4(3.331e-01, -2.012e-02, 3.449e-03, -1.190e-02) * s0_6; + r += V4(-3.669e-01, 2.920e-01, -1.854e-02, 3.106e-02) * s0_7; + r += V4(3.676e-02, 8.013e-02, 2.494e-03, 3.553e-03) * s0_8; + r += V4(-1.500e-04, 7.524e-03, 1.305e-02, -1.741e-03); + return r; +} + +V4 f3(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(4.077e-01, -2.281e-01, 4.163e-02, 5.385e-02) * s0_0; + r += V4(-3.682e-01, 1.155e-01, 2.065e-01, -9.950e-02) * s0_1; + r += V4(-4.125e-02, -6.752e-02, -1.279e-01, 7.053e-02) * s0_2; + r += V4(1.673e-02, 2.529e-02, -7.845e-03, -9.448e-02) * s0_3; + r += V4(-3.023e-02, 3.602e-01, 3.584e-02, -1.269e-01) * s0_4; + r += V4(1.406e-02, -6.005e-02, -2.131e-01, 3.401e-01) * s0_5; + r += V4(7.944e-03, 2.771e-02, -1.800e-02, 1.497e-02) * s0_6; + r += V4(-1.022e-02, -1.165e-02, -2.743e-01, -1.461e-01) * s0_7; + r += V4(-1.086e-03, -1.616e-01, 3.584e-01, -1.958e-02) * s0_8; + r += V4(-1.022e-02, 3.503e-03, 3.744e-03, -1.218e-02); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(6.447e-02, -4.512e-02, 1.159e-01, 1.963e-01, -1.216e-02, -1.953e-01, 2.400e-01, 8.721e-02, -2.607e-01, 5.455e-02, -3.575e-02, 1.853e-01, 1.885e-01, -4.355e-02, 1.744e-01, -1.562e-03)); + r += mul(s0_1, M4(4.882e-02, 1.756e-01, 2.833e-01, -1.969e-01, -1.609e-01, 1.577e-01, -7.379e-02, 1.040e-03, -9.190e-02, 1.117e-01, -8.452e-02, 6.563e-02, 2.244e-01, -7.622e-02, 2.172e-01, -1.074e-01)); + r += mul(s0_2, M4(-1.819e-02, 7.914e-02, 5.432e-02, 6.196e-02, -8.671e-02, -1.125e-01, 9.079e-02, -3.972e-02, 2.892e-02, -1.757e-02, -1.603e-01, -2.536e-01, 2.739e-01, -1.412e-01, -6.144e-02, 4.285e-02)); + r += mul(s0_3, M4(-2.391e-01, -3.949e-02, 5.552e-03, -2.747e-02, 1.515e-01, -1.024e-01, 4.835e-02, -1.596e-01, 1.805e-01, 2.124e-02, 2.138e-01, -1.019e-01, 9.341e-02, -3.100e-02, 6.799e-02, -2.739e-02)); + r += mul(s0_4, M4(-1.808e-01, -1.196e-02, 1.694e-01, -5.965e-02, 1.045e-01, 3.773e-02, -8.986e-02, -2.565e-02, 8.836e-02, 2.979e-02, 2.807e-02, 2.927e-01, -3.812e-01, -1.014e-01, 2.351e-01, 8.724e-02)); + r += mul(s0_5, M4(-7.659e-02, -5.995e-02, -2.753e-01, 8.187e-02, -1.846e-02, -5.299e-02, -1.294e-01, -1.318e-01, 1.982e-02, -3.402e-02, 8.883e-02, -9.631e-03, 1.156e-01, -1.266e-01, 9.954e-03, 1.672e-02)); + r += mul(s0_6, M4(-6.659e-02, -1.459e-02, 4.547e-02, -5.029e-02, -2.629e-02, -7.733e-02, -6.958e-02, 8.358e-02, -6.566e-02, 6.261e-02, -6.624e-02, -4.336e-03, 3.083e-02, -1.137e-01, 7.962e-02, -2.424e-01)); + r += mul(s0_7, M4(-6.925e-02, 1.048e-02, -5.944e-02, 1.440e-01, 3.892e-02, -8.438e-02, 6.759e-02, 2.478e-02, 6.631e-02, 2.860e-02, -1.101e-01, 7.350e-02, 3.929e-02, -1.212e-01, -1.219e-02, -3.078e-01)); + r += mul(s0_8, M4(3.271e-02, 8.140e-02, 8.873e-03, 4.885e-02, 4.142e-02, 3.100e-02, 2.806e-03, -1.107e-01, 5.720e-03, -9.362e-03, 7.347e-02, -8.732e-02, 1.013e-01, -3.682e-01, 1.400e-01, 1.890e-01)); + r += mul(s1_0, M4(2.605e-02, -1.667e-01, 4.084e-02, 3.665e-02, -2.413e-02, -4.308e-02, 5.995e-03, 1.111e-01, 1.305e-01, -9.948e-02, -2.282e-01, 1.164e-01, 1.546e-01, 4.532e-02, -1.307e-02, -4.186e-02)); + r += mul(s1_1, M4(7.431e-02, -1.396e-01, 5.474e-02, -1.252e-01, -6.404e-02, -5.702e-02, 1.844e-01, 9.455e-02, -9.482e-02, -2.143e-01, 9.576e-02, -1.013e-01, -2.984e-02, 2.306e-01, 5.085e-02, -1.153e-01)); + r += mul(s1_2, M4(3.424e-03, 1.031e-02, 2.310e-01, -1.411e-01, 2.611e-03, -1.044e-01, -7.948e-03, -6.975e-02, -1.936e-01, -2.695e-01, 4.106e-01, -4.694e-02, -1.517e-03, 6.812e-02, 2.300e-01, 1.107e-01)); + r += mul(s1_3, M4(1.403e-01, -2.540e-02, -6.241e-02, 2.002e-01, 7.344e-02, -5.668e-02, -1.206e-02, -1.196e-01, 1.709e-01, -2.606e-01, 2.658e-01, -2.613e-01, -1.726e-01, 1.067e-01, 3.272e-02, 2.019e-01)); + r += mul(s1_4, M4(2.398e-01, 9.349e-02, -9.843e-04, -1.778e-01, -8.231e-02, -1.592e-01, -1.194e-01, 4.598e-02, -2.137e-01, -3.436e-01, 4.551e-01, -9.698e-02, -1.529e-01, 5.136e-01, -4.330e-01, 1.053e+00)); + r += mul(s1_5, M4(-1.360e-02, -7.805e-02, -1.060e-01, 7.678e-02, 1.654e-02, -9.582e-02, -6.834e-02, -1.780e-01, 5.788e-02, -8.168e-02, -5.377e-01, -1.501e-01, 1.023e-01, 3.892e-01, -4.508e-01, 4.937e-02)); + r += mul(s1_6, M4(7.759e-02, 6.410e-02, 1.925e-02, 6.902e-03, -5.195e-02, -3.779e-02, -6.816e-02, 1.005e-01, -7.297e-02, -1.634e-01, -7.858e-02, -1.307e-01, -5.741e-02, 1.518e-01, 9.429e-02, 5.891e-03)); + r += mul(s1_7, M4(8.913e-02, 1.226e-02, -5.255e-04, -1.558e-01, -4.548e-02, 5.190e-02, 9.607e-02, -3.042e-02, -9.908e-02, 4.117e-02, -6.336e-02, -1.498e-01, -1.247e-01, -1.318e-02, 2.337e-01, 2.582e-01)); + r += mul(s1_8, M4(-9.251e-02, -3.700e-02, -8.219e-02, -2.187e-02, 1.334e-01, -2.552e-02, 5.251e-02, -1.958e-01, 3.978e-02, 6.368e-02, 2.337e-01, 8.484e-02, -2.190e-01, 6.898e-02, -3.112e-02, 1.067e-01)); + r += mul(s2_0, M4(9.501e-02, -2.914e-02, -2.567e-03, -7.305e-02, -1.666e-01, 5.910e-02, -4.919e-02, 2.893e-01, 1.400e-02, -1.953e-02, 3.783e-02, -3.476e-01, 1.396e-01, -1.791e-01, 6.593e-02, -1.394e-01)); + r += mul(s2_1, M4(8.148e-02, 1.092e-01, -3.694e-04, 2.426e-02, -3.004e-01, -2.030e-02, -3.628e-02, 6.085e-02, 1.330e-01, 6.845e-02, 1.253e-01, -6.560e-02, -2.402e-02, -1.013e-01, 1.817e-01, 1.867e-01)); + r += mul(s2_2, M4(-1.819e-02, -1.737e-01, 7.773e-02, 7.842e-02, -1.850e-01, -9.368e-02, 2.441e-02, -4.203e-02, 1.254e-01, -7.443e-03, -2.320e-02, -4.614e-02, 9.496e-02, 2.893e-02, 4.553e-02, -1.310e-01)); + r += mul(s2_3, M4(-8.195e-02, -9.179e-02, 2.743e-03, -7.567e-02, -2.320e-01, 2.469e-02, -9.990e-02, 2.617e-01, -1.016e-01, 6.323e-02, -1.192e-01, -1.622e-01, 1.867e-01, 3.104e-02, -8.681e-02, 2.721e-01)); + r += mul(s2_4, M4(6.398e-03, 8.490e-02, -4.636e-02, 1.341e-01, -1.047e-01, 2.959e-01, -5.207e-02, -3.080e-01, -3.247e-02, 6.690e-02, 3.750e-02, -5.048e-02, 1.982e-01, -2.965e-02, -1.335e-01, -1.964e-01)); + r += mul(s2_5, M4(-4.427e-02, -8.750e-02, -2.997e-03, -7.999e-02, -2.644e-01, -1.020e-01, 2.151e-01, -9.969e-02, 6.641e-03, -7.456e-03, 2.008e-02, 6.420e-02, 2.294e-01, -1.701e-02, 3.415e-02, 1.749e-01)); + r += mul(s2_6, M4(-5.600e-02, 5.810e-02, 9.523e-02, -3.474e-02, 1.662e-02, -8.134e-02, -6.425e-02, 1.900e-01, -9.887e-02, 1.930e-02, -7.057e-03, 4.022e-03, -1.439e-01, -4.360e-02, -3.308e-02, 1.963e-02)); + r += mul(s2_7, M4(-5.579e-02, 1.073e-01, -2.163e-03, -2.249e-02, -2.898e-01, 3.912e-02, 3.370e-04, 5.760e-02, -9.889e-02, 5.421e-02, 1.554e-02, 2.053e-02, 2.035e-02, 6.746e-02, 1.007e-02, -1.987e-01)); + r += mul(s2_8, M4(7.718e-02, 2.845e-02, -2.573e-02, 5.771e-02, -3.623e-01, -1.526e-01, 3.717e-02, -6.208e-02, 6.221e-02, 6.867e-03, 8.471e-02, 4.190e-02, -8.339e-02, -9.281e-02, -1.494e-01, -1.699e-02)); + r += mul(s3_0, M4(3.535e-02, 2.739e-01, -6.901e-02, -5.246e-02, 1.448e-01, -1.386e-01, -1.050e-02, 3.544e-01, -6.436e-01, -4.722e-01, 4.010e-01, 4.993e-01, 4.557e-02, 8.169e-02, 2.293e-01, -7.771e-02)); + r += mul(s3_1, M4(-6.334e-03, 2.898e-01, 1.886e-02, 3.163e-01, 2.765e-01, -8.130e-02, -1.764e-01, 8.914e-02, -4.377e-01, -2.356e-02, -1.108e-01, -3.680e-01, -3.435e-02, 1.245e-01, -1.710e-01, -1.407e-01)); + r += mul(s3_2, M4(-2.790e-02, -3.380e-01, 1.541e-02, 9.081e-04, 9.270e-02, 1.595e-02, -3.041e-02, -6.194e-02, 6.213e-01, 1.921e-01, 2.577e-01, -7.313e-01, -1.798e-01, 5.474e-02, -1.474e-01, 9.967e-02)); + r += mul(s3_3, M4(1.118e-02, 4.461e-01, 1.895e-01, 2.578e-02, 2.339e-01, -1.504e-01, 7.090e-02, 5.091e-01, 1.061e-01, 5.312e-01, 3.560e-01, 6.841e-01, -3.614e-01, 2.816e-01, -1.585e-02, 1.301e-02)); + r += mul(s3_4, M4(3.878e-01, 2.609e-01, 3.861e-01, -6.379e-03, 5.563e-01, -3.751e-01, 5.401e-01, -1.423e-01, 4.032e-01, 5.927e-01, 3.945e-01, -2.637e-01, -2.969e-01, -1.072e-01, 5.569e-01, -8.054e-02)); + r += mul(s3_5, M4(6.107e-01, -3.043e-01, -1.916e-01, -3.358e-02, 1.300e-01, -1.824e-02, -6.283e-02, 5.680e-02, -5.793e-01, 4.155e-01, -4.202e-01, 7.750e-01, -7.841e-02, 2.290e-01, -3.769e-02, 5.627e-02)); + r += mul(s3_6, M4(-5.899e-01, -1.017e-01, -4.870e-01, -4.643e-01, 3.543e-01, -1.443e-01, -1.256e-02, -1.110e-01, -8.513e-01, -6.722e-02, 1.312e-01, 1.854e-01, -2.233e-01, 3.904e-01, -1.856e-02, -2.336e-01)); + r += mul(s3_7, M4(2.534e-01, -2.846e-01, -1.823e-01, 2.098e-01, 3.611e-01, -2.704e-02, -3.301e-01, -1.224e-01, -6.436e-02, 3.539e-01, 7.943e-01, -5.205e-01, -2.392e-01, -6.709e-03, -1.596e-01, 1.236e-01)); + r += mul(s3_8, M4(-5.994e-02, -9.951e-02, 1.625e-01, 1.883e-01, 3.550e-01, -9.783e-02, 2.766e-03, 5.728e-02, -9.677e-01, -1.069e+00, -1.305e+00, 1.258e-01, -1.179e-01, 1.214e-01, 1.990e-01, -2.664e-02)); + r += mul(s4_0, M4(-3.032e-02, -6.341e-02, 1.955e-01, 1.018e-01, -4.848e-02, 6.057e-02, -3.525e-02, 4.091e-01, 2.563e-02, 1.286e-01, -9.047e-03, -4.433e-02, 4.546e-02, 1.680e-02, -1.427e-01, -1.832e-01)); + r += mul(s4_1, M4(-2.021e-01, -5.621e-02, 1.996e-01, 1.614e-01, 1.218e-01, 3.792e-02, -2.175e-02, 2.088e-01, -8.895e-03, 1.143e-01, 5.991e-02, -3.941e-03, -1.036e-01, 9.451e-02, -2.531e-01, -6.738e-01)); + r += mul(s4_2, M4(-1.508e-01, -4.459e-02, 3.929e-01, -4.255e-02, -1.566e-02, -1.802e-02, -1.293e-01, 7.579e-02, 9.496e-02, -3.538e-02, -5.857e-02, 2.837e-02, -4.240e-01, -7.953e-02, -2.042e-01, -1.126e-02)); + r += mul(s4_3, M4(5.402e-02, 1.244e-01, -1.710e-01, 1.625e-01, 2.407e-01, 6.736e-02, 1.839e-02, 1.246e-01, -9.068e-02, -1.437e-01, -1.721e-01, -3.842e-02, 2.311e-02, -3.309e-02, -2.087e-01, -2.505e-01)); + r += mul(s4_4, M4(1.210e-01, 2.186e-01, -6.710e-01, 3.244e-01, 9.447e-02, 4.243e-02, 1.338e-01, 6.541e-02, 5.722e-02, 1.866e-01, -1.403e-02, -2.865e-01, -3.433e-01, 1.530e-01, -2.228e-01, -1.079e-01)); + r += mul(s4_5, M4(1.869e-01, -2.484e-01, -6.551e-02, 1.844e-01, 8.618e-02, 2.156e-01, -1.590e-01, -4.953e-02, -6.444e-02, -2.472e-03, 6.103e-02, 8.334e-02, 1.835e-01, 2.341e-01, -4.364e-01, -5.950e-01)); + r += mul(s4_6, M4(6.230e-02, -1.515e-01, -1.308e-01, 3.179e-02, -1.954e-02, -1.493e-01, -3.406e-02, 3.548e-02, -1.568e-01, -2.195e-02, 3.112e-02, -5.072e-02, -1.208e-02, -3.167e-02, 7.318e-03, -4.706e-02)); + r += mul(s4_7, M4(2.234e-01, 2.180e-01, -2.030e-01, 9.332e-02, -1.910e-01, -8.071e-02, 2.195e-02, -1.386e-01, -1.905e-02, -3.062e-02, -9.065e-02, -1.303e-02, 5.781e-02, -5.280e-02, -8.862e-02, -3.476e-01)); + r += mul(s4_8, M4(5.860e-02, -7.210e-02, -6.373e-02, 1.618e-01, 7.297e-02, -2.267e-01, 7.240e-02, -1.140e-01, 4.987e-03, -4.222e-04, 8.160e-02, 4.866e-02, 1.083e-01, 3.635e-01, -3.079e-01, -3.762e-02)); + r += mul(s5_0, M4(5.123e-02, 2.424e-02, -1.494e-02, 3.692e-02, -3.079e-01, -4.090e-02, -2.838e-01, -4.183e-01, 1.221e-01, -2.098e-01, 1.794e-01, 1.631e-01, -5.795e-02, -2.088e-04, -4.571e-02, 1.750e-02)); + r += mul(s5_1, M4(1.982e-01, 6.487e-02, 1.458e-01, -2.131e-01, -3.524e-01, 7.864e-02, -3.447e-01, 4.320e-03, 7.981e-02, 1.825e-01, -3.480e-01, 2.505e-01, 2.295e-01, -2.307e-02, 2.583e-01, 1.639e-01)); + r += mul(s5_2, M4(-8.136e-02, -5.082e-02, 2.298e-01, -4.061e-01, -2.214e-02, 2.459e-01, 3.081e-01, -6.793e-02, 3.698e-01, -2.927e-01, 1.958e-01, -1.898e-01, 5.514e-01, 3.832e-01, 1.916e-01, 1.643e-01)); + r += mul(s5_3, M4(-2.190e-01, 6.332e-02, 5.692e-02, 1.172e-01, 2.420e-03, 1.279e-01, 1.332e-01, 1.204e-01, -3.227e-01, -1.628e-01, 4.120e-01, 7.975e-01, 1.591e-01, 1.314e-01, -3.430e-02, 1.888e-01)); + r += mul(s5_4, M4(-1.219e-03, -3.286e-01, -1.121e-01, 6.413e-02, 1.490e-01, -2.987e-02, 2.308e-01, 1.192e-01, 5.129e-01, -8.246e-01, -3.672e-01, -3.180e-01, 5.412e-02, -2.440e-01, 1.461e-01, 9.319e-01)); + r += mul(s5_5, M4(1.519e-01, 1.086e-01, -2.430e-01, -1.782e-01, 2.780e-02, -1.388e-01, 1.403e-01, -2.707e-01, -1.823e-01, -7.689e-02, -3.769e-01, 2.586e-02, 4.413e-01, -1.567e-01, -5.041e-01, 3.854e-01)); + r += mul(s5_6, M4(-1.499e-01, 9.062e-02, 4.797e-02, -4.072e-02, -1.247e-01, 7.550e-02, -7.091e-03, 8.485e-02, 2.935e-01, -2.076e-01, 2.701e-01, 1.064e-01, -2.023e-02, 2.579e-02, -7.142e-02, 2.565e-02)); + r += mul(s5_7, M4(-1.786e-01, 8.747e-04, 4.484e-01, -1.268e-01, -1.581e-01, 6.147e-05, 6.692e-02, -8.129e-02, -8.691e-01, -3.015e-01, 9.455e-05, 3.964e-01, -3.570e-02, -7.192e-02, 1.507e-01, 9.453e-03)); + r += mul(s5_8, M4(-1.882e-01, -1.928e-01, -8.102e-02, -1.670e-01, 4.506e-02, -6.630e-02, -4.875e-02, 4.468e-02, 2.512e-01, -5.685e-01, 1.966e-01, -1.942e-02, 1.186e-01, -1.460e-01, 3.551e-01, 1.879e-01)); + r += mul(s6_0, M4(8.141e-02, -3.845e-02, 7.287e-02, 9.700e-02, 9.438e-02, -7.974e-03, 4.607e-02, -5.128e-02, 2.503e-01, 8.654e-02, -7.195e-02, -9.607e-02, 2.067e-01, 2.189e-01, -2.576e-02, -3.547e-02)); + r += mul(s6_1, M4(2.699e-01, -1.357e-01, -6.720e-02, 1.940e-01, 2.150e-01, -3.424e-02, 7.872e-02, -1.613e-01, -1.033e-01, 2.954e-01, -1.858e-01, -1.118e-01, 2.068e-01, -1.190e-01, 2.042e-01, 2.835e-01)); + r += mul(s6_2, M4(2.463e-01, 7.189e-03, -3.181e-02, 7.164e-02, 2.184e-01, 5.321e-02, 4.966e-02, -4.515e-02, -1.937e-01, -1.148e-01, 1.757e-01, 1.013e-01, 2.788e-01, 3.969e-01, -2.966e-01, 9.242e-02)); + r += mul(s6_3, M4(3.772e-02, -2.783e-01, 1.094e-01, 2.456e-01, 1.855e-02, 1.505e-01, -5.080e-02, -3.030e-01, -4.574e-02, -9.735e-02, -1.553e-01, 6.876e-02, 8.814e-02, 2.232e-01, 6.247e-01, 1.967e-01)); + r += mul(s6_4, M4(-1.978e-01, -1.439e-01, 4.346e-01, 5.743e-01, -4.299e-02, 4.223e-03, -1.153e-01, 2.788e-01, -3.678e-02, -3.505e-02, 1.128e-01, -1.830e-01, 2.464e-01, -1.722e-02, 5.527e-02, -2.253e-01)); + r += mul(s6_5, M4(-2.249e-01, 3.839e-01, 4.971e-01, -5.159e-03, 3.196e-01, 8.920e-02, -2.112e-01, 1.966e-01, -6.575e-02, 7.505e-02, 5.945e-02, 5.247e-03, 3.308e-01, 8.299e-02, 5.452e-01, 3.187e-02)); + r += mul(s6_6, M4(1.012e-01, 1.740e-01, 7.561e-02, 2.773e-01, -4.444e-02, 1.531e-01, -5.408e-02, -8.835e-02, 5.727e-02, -1.559e-01, 1.616e-01, -7.356e-02, -5.860e-02, 3.471e-01, 2.632e-01, 7.234e-02)); + r += mul(s6_7, M4(5.798e-01, -6.216e-01, 4.944e-01, 3.934e-01, 3.583e-02, -1.304e-01, 2.764e-01, -3.079e-02, 9.659e-03, -2.553e-02, -2.200e-01, -1.916e-02, -9.476e-02, -1.926e-01, 2.034e-01, -1.094e-01)); + r += mul(s6_8, M4(8.659e-02, 4.910e-01, 9.655e-02, 4.138e-01, 1.551e-01, 1.225e-01, -5.679e-02, 2.525e-01, 1.482e-02, -3.780e-02, 2.543e-04, 2.486e-02, 1.498e-01, 6.908e-03, 1.059e-01, -1.474e-01)); + r += mul(s7_0, M4(4.399e-02, -4.862e-02, 1.315e-02, 2.402e-02, -2.072e-01, -6.355e-03, 1.287e-01, 6.578e-02, -4.475e-02, 8.972e-02, 4.329e-01, -3.481e-02, 5.967e-02, -1.230e-01, 8.428e-02, 2.964e-02)); + r += mul(s7_1, M4(3.685e-02, 3.037e-02, 7.514e-02, 6.420e-02, -2.080e-01, -2.264e-01, 2.694e-01, -8.082e-02, -1.052e-01, 3.057e-01, 3.371e-01, -1.912e-01, -1.573e-01, 6.183e-02, 3.040e-01, -5.470e-02)); + r += mul(s7_2, M4(-6.083e-02, -7.165e-02, 2.446e-02, 1.037e-01, -1.651e-02, -5.348e-02, 1.667e-01, 4.276e-02, 5.923e-02, 9.977e-02, -6.333e-02, 2.133e-02, -1.822e-02, 7.829e-02, 1.517e-01, -2.538e-02)); + r += mul(s7_3, M4(1.227e-01, 5.079e-02, -9.135e-02, 1.125e-01, 2.348e-01, -2.053e-01, 1.615e-01, 1.555e-01, -3.941e-02, 3.071e-02, -1.090e-01, -9.880e-02, -5.530e-02, 1.262e-01, -3.636e-02, 4.667e-01)); + r += mul(s7_4, M4(-1.731e-01, 8.569e-03, -2.809e-01, -1.117e-01, -2.717e-01, 2.132e-01, 1.324e-01, 2.141e-01, -9.432e-02, -2.099e-01, 2.085e-01, 1.134e-01, 6.100e-02, -8.023e-02, -3.192e-01, -1.051e-01)); + r += mul(s7_5, M4(-8.209e-02, 4.550e-02, -1.288e-01, -2.638e-02, 3.431e-02, -5.752e-02, 4.472e-01, -6.426e-02, -2.833e-02, 6.020e-02, -1.754e-01, 3.198e-02, -6.964e-02, -5.407e-02, -1.586e-01, -1.372e-01)); + r += mul(s7_6, M4(1.140e-01, -5.882e-02, -1.879e-01, -1.477e-01, -2.029e-01, -2.467e-01, 1.996e-01, -2.316e-01, 9.466e-02, -7.990e-02, 1.334e-01, 1.830e-01, 1.063e-01, -9.067e-02, -4.511e-02, 1.682e-01)); + r += mul(s7_7, M4(-1.825e-01, 6.514e-02, -2.042e-02, 8.116e-02, 4.532e-02, 6.420e-02, -7.849e-02, -1.527e-01, -6.620e-02, -1.242e-02, 9.573e-02, 1.212e-01, 4.090e-02, -5.942e-02, -4.354e-02, -8.863e-03)); + r += mul(s7_8, M4(-2.034e-02, -1.613e-01, 2.712e-01, -5.498e-01, -2.044e-01, -1.601e-01, 1.976e-01, 1.045e-01, 5.450e-02, 1.137e-02, 1.852e-02, -5.018e-02, 4.689e-02, -3.913e-03, 4.239e-02, -8.641e-02)); + r += V4(2.400e-02, -8.304e-02, -1.885e-02, 5.432e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.418e-01, -4.907e-02, -5.833e-02, -2.070e-01, -1.209e-01, -5.461e-02, -5.275e-03, 1.633e-01, 4.137e-02, -2.121e-02, 1.462e-01, -5.782e-03, 1.371e-01, 5.658e-02, -3.937e-02, 1.050e-01)); + r += mul(s0_1, M4(-9.628e-02, -7.284e-02, -2.083e-02, 1.706e-01, -3.409e-03, -4.350e-02, 1.405e-01, -7.835e-02, -4.200e-03, -1.420e-01, 2.099e-01, -1.840e-01, -1.310e-01, 1.949e-01, -2.656e-01, -1.484e-02)); + r += mul(s0_2, M4(1.296e-01, 2.711e-02, -3.426e-01, -3.043e-02, 2.776e-03, -1.989e-02, -6.253e-02, -4.567e-02, -4.105e-02, -9.127e-02, 4.685e-02, -2.144e-02, 3.181e-02, 6.371e-03, -3.940e-02, -9.416e-02)); + r += mul(s0_3, M4(1.723e-01, -6.048e-02, -1.782e-01, -8.933e-01, 1.664e-01, -9.454e-02, -5.299e-02, 1.880e-02, -1.568e-02, 1.427e-01, -1.972e-01, 3.494e-01, -1.890e-01, -1.659e-01, 1.094e-02, -2.885e-01)); + r += mul(s0_4, M4(1.079e-01, -3.422e-01, -6.410e-02, -1.914e-01, -6.375e-03, -2.001e-01, 1.226e-01, -1.900e-01, -1.069e-01, -2.609e-01, 3.074e-01, 6.818e-02, 3.675e-01, -3.195e-01, 3.132e-01, -3.454e-01)); + r += mul(s0_5, M4(-7.183e-02, -1.126e-01, 5.538e-02, -2.937e-02, -1.519e-01, 1.925e-02, -4.814e-02, 1.253e-01, -7.361e-02, 1.049e-01, 7.996e-03, 4.405e-02, 2.253e-01, -4.899e-01, -3.483e-01, -6.220e-02)); + r += mul(s0_6, M4(9.797e-02, -2.673e-01, -2.695e-02, -1.713e-01, -3.551e-02, 1.042e-01, 2.053e-02, 4.270e-02, 9.539e-02, -4.254e-03, -8.561e-02, 3.853e-02, 1.075e-02, -1.115e-01, -3.529e-02, -5.024e-01)); + r += mul(s0_7, M4(7.233e-02, -1.376e-01, -3.618e-02, -1.990e-01, 1.102e-01, 1.261e-01, 1.678e-02, -1.131e-02, -1.971e-01, 1.376e-01, -1.146e-01, -2.335e-01, -9.289e-02, -5.176e-01, -1.518e-03, -5.518e-01)); + r += mul(s0_8, M4(2.104e-04, -5.291e-02, 6.975e-02, -1.103e-02, -7.167e-02, 1.589e-01, -1.667e-01, -1.297e-02, 1.497e-01, 9.252e-02, -3.696e-02, 6.041e-03, -1.474e-02, -3.651e-01, 1.605e-01, -3.741e-01)); + r += mul(s1_0, M4(1.663e-01, 2.512e-01, -1.903e-02, 2.369e-01, 4.960e-04, -1.401e-01, 8.121e-02, 7.009e-02, -1.363e-01, -1.888e-01, -1.621e-02, -4.053e-01, 2.978e-02, -3.047e-02, 4.469e-02, 3.221e-02)); + r += mul(s1_1, M4(5.846e-03, 3.942e-01, -2.466e-01, 3.845e-02, -1.473e-01, 1.353e-02, 5.121e-02, -8.860e-02, 6.164e-01, -2.217e-01, -1.919e-01, -3.862e-01, -7.604e-02, 1.613e-01, -3.184e-03, 2.655e-02)); + r += mul(s1_2, M4(9.709e-03, -3.816e-02, 1.620e-01, 1.450e-01, -8.520e-02, -2.075e-02, -2.589e-02, 3.698e-02, 2.418e-01, -2.286e-01, 1.079e-01, -1.842e-01, 1.802e-01, 2.779e-02, 1.030e-01, 1.619e-01)); + r += mul(s1_3, M4(4.478e-02, 2.673e-01, 1.790e-01, 8.634e-01, 1.264e-01, -1.054e-01, -5.468e-02, 9.383e-02, 5.269e-02, -1.739e-01, 1.309e-01, -4.688e-02, -9.362e-02, 1.972e-01, 3.641e-02, 1.954e-01)); + r += mul(s1_4, M4(6.388e-02, 3.896e-01, -3.653e-02, 2.251e-01, -2.954e-03, -1.042e-01, 1.853e-01, -4.550e-02, -1.051e-01, 3.389e-01, -4.250e-01, -2.007e-01, -5.688e-01, -1.508e-01, 7.064e-01, 6.004e-02)); + r += mul(s1_5, M4(-9.950e-02, 5.541e-02, -2.128e-02, -4.875e-02, -1.383e-01, 3.354e-02, -8.311e-02, 6.638e-02, 1.227e-01, -9.693e-02, -2.800e-01, -1.541e-01, -1.614e-01, 1.966e-01, -7.400e-02, 2.437e-01)); + r += mul(s1_6, M4(-2.248e-01, 1.920e-01, -1.348e-01, 1.644e-01, 7.828e-02, 6.507e-02, -1.169e-01, -2.298e-03, -5.534e-02, -3.319e-02, 8.279e-02, -3.367e-01, 1.957e-02, -9.159e-02, 3.740e-02, 1.285e-01)); + r += mul(s1_7, M4(-1.554e-01, 2.691e-01, -1.619e-03, 2.286e-01, 1.322e-01, 1.854e-01, 3.022e-02, -1.145e-01, 2.533e-01, -4.843e-02, -1.417e-01, 8.853e-02, 1.967e-01, 1.184e-01, 1.316e-01, 1.815e-01)); + r += mul(s1_8, M4(-4.295e-02, 6.376e-02, -3.526e-02, -9.822e-03, -9.027e-02, 1.269e-01, -1.316e-01, -4.076e-02, 1.872e-01, -2.292e-01, -1.654e-02, -4.104e-02, 4.792e-02, 7.334e-04, 8.095e-02, 2.344e-02)); + r += mul(s2_0, M4(-1.002e-01, 5.501e-02, -9.044e-02, -2.085e-01, 2.396e-01, 9.810e-03, 1.250e-01, -8.117e-02, -8.548e-02, 1.014e-01, 5.014e-03, 4.902e-02, -4.588e-02, -6.699e-02, 5.775e-02, 4.000e-02)); + r += mul(s2_1, M4(4.693e-02, -1.771e-02, -4.663e-02, 9.740e-03, 2.533e-01, -6.075e-03, 2.745e-01, -1.388e-01, -6.714e-03, 6.294e-02, -1.463e-01, 5.952e-02, 8.493e-03, 1.575e-02, -2.039e-02, -7.066e-02)); + r += mul(s2_2, M4(2.990e-02, -1.827e-02, 1.280e-01, -4.732e-03, -3.333e-02, 9.852e-02, 6.789e-03, 7.925e-02, 2.172e-01, 1.211e-02, 5.048e-03, -5.788e-03, -1.174e-01, -7.720e-02, 1.081e-01, 2.216e-02)); + r += mul(s2_3, M4(-3.294e-02, -4.414e-02, 2.925e-02, -2.467e-02, 7.990e-01, 2.652e-01, -6.670e-02, -2.865e-01, -7.467e-02, -7.593e-02, 2.950e-02, 2.605e-02, 1.218e-01, 8.940e-02, 4.734e-02, 1.893e-01)); + r += mul(s2_4, M4(5.450e-02, -1.838e-02, 4.478e-03, 3.214e-02, 6.661e-01, -4.739e-01, 2.007e-01, -9.035e-03, -9.837e-02, -2.274e-03, -7.101e-02, 8.342e-02, 2.435e-01, 3.271e-01, -4.180e-01, 1.528e-01)); + r += mul(s2_5, M4(1.515e-01, -5.193e-03, -2.831e-02, -7.512e-03, 3.126e-02, -4.437e-02, 3.982e-02, 3.651e-02, -1.386e-01, 8.916e-03, 3.549e-02, 2.244e-02, -7.115e-02, -5.704e-03, 4.866e-03, -8.916e-02)); + r += mul(s2_6, M4(1.310e-02, -1.523e-02, 9.267e-02, 4.946e-02, 4.964e-01, 2.664e-02, -6.537e-02, -4.119e-01, -1.304e-01, 3.533e-02, -4.697e-02, -9.565e-02, -1.232e-01, 3.422e-02, 7.040e-02, -3.372e-02)); + r += mul(s2_7, M4(-1.129e-01, 1.396e-01, 1.748e-01, -1.426e-02, 3.643e-01, -1.541e-01, -7.931e-02, -1.284e-01, 2.232e-02, -4.469e-02, -3.891e-02, -3.585e-02, 5.561e-02, 2.462e-01, 3.234e-02, 2.021e-01)); + r += mul(s2_8, M4(-1.359e-02, -4.823e-02, -3.557e-02, 2.589e-02, 1.714e-01, 3.752e-04, -8.612e-02, 3.571e-03, 1.894e-02, -5.221e-02, 4.723e-02, -8.858e-02, -9.333e-03, 3.621e-03, 1.998e-02, -5.063e-03)); + r += mul(s3_0, M4(2.641e-01, 3.315e-01, 4.508e-01, 4.795e-01, -2.357e-01, -8.855e-02, 2.665e-01, -5.523e-02, 1.919e-01, 2.029e-01, -2.329e-01, 1.520e-01, -5.090e-02, -8.241e-02, 1.356e-02, 1.958e-02)); + r += mul(s3_1, M4(-9.005e-03, -1.667e-01, 3.307e-01, -2.594e-02, -1.753e-01, -3.024e-01, 1.903e-01, -1.350e-01, -4.514e-01, -3.310e-01, -6.884e-01, 3.825e-01, 1.908e-01, -1.299e-01, 8.744e-02, -4.903e-02)); + r += mul(s3_2, M4(1.283e-01, 2.811e-01, 2.186e-01, -1.949e-01, 2.913e-02, -2.319e-02, -7.935e-02, -1.218e-01, 5.260e-01, 1.168e+00, -3.059e-01, 6.219e-02, -3.941e-02, 8.204e-02, -1.651e-01, -3.590e-02)); + r += mul(s3_3, M4(-2.824e-01, -2.941e-01, -2.375e-01, 3.744e-01, -1.475e-01, 1.733e-01, 6.679e-01, 1.257e-01, -4.539e-01, 3.346e-01, 7.491e-02, -6.180e-01, 2.857e-01, -1.645e-01, 2.666e-01, -6.673e-01)); + r += mul(s3_4, M4(-1.184e-01, -2.264e-01, -8.824e-02, 2.530e-04, -1.127e-01, 3.692e-02, -3.473e-01, 1.687e-03, -8.749e-01, -4.262e-01, 2.562e-01, 7.095e-01, -1.857e-01, -3.273e-01, 3.071e-02, -2.807e-01)); + r += mul(s3_5, M4(-1.840e-01, -1.353e-01, 7.658e-03, -9.348e-02, -1.108e-01, 2.537e-01, 2.278e-01, 7.551e-03, 9.690e-01, 6.814e-01, 6.398e-01, -2.575e-01, 1.214e-01, -3.526e-01, -1.120e-01, -1.305e-01)); + r += mul(s3_6, M4(4.187e-01, 3.849e-02, -2.515e-01, 3.674e-01, -2.762e-01, 1.309e-01, -1.209e-01, 5.010e-01, 9.787e-02, 2.315e-01, -1.820e-01, -2.462e-01, 1.978e-01, -4.533e-01, 1.127e-04, -1.071e-01)); + r += mul(s3_7, M4(2.133e-01, -9.526e-02, -6.901e-01, -7.056e-01, -2.181e-01, 2.398e-01, 1.889e-02, 3.611e-01, -1.576e+00, -7.019e-02, -6.407e-01, -2.719e-01, 1.921e-01, 3.341e-02, 1.196e-02, -5.842e-01)); + r += mul(s3_8, M4(5.534e-01, 1.108e-01, 7.098e-02, -4.399e-01, -1.019e-01, 8.808e-02, 2.568e-02, 2.692e-02, -7.789e-02, 4.973e-01, 1.098e+00, 1.720e-01, 1.772e-01, -2.268e-01, 2.061e-01, 4.719e-02)); + r += mul(s4_0, M4(1.587e-01, 1.536e-01, -1.577e-01, 2.578e-01, -1.157e-01, -1.004e-01, 2.266e-01, -7.324e-02, 3.054e-02, -6.088e-02, 1.640e-01, -1.862e-02, 1.644e-01, -7.456e-03, -1.357e-01, -6.408e-02)); + r += mul(s4_1, M4(-1.253e-02, 1.927e-01, -4.527e-01, -3.137e-02, 7.477e-02, -3.825e-01, 2.155e-01, -2.479e-01, -5.870e-02, -1.261e-01, 7.608e-02, 5.194e-02, 2.882e-01, 5.250e-01, -6.071e-01, 2.978e-02)); + r += mul(s4_2, M4(-1.729e-01, 1.483e-01, -3.575e-02, 2.951e-01, -9.167e-02, -3.164e-01, 5.163e-02, -2.111e-01, -1.754e-02, -8.125e-02, -1.560e-02, 9.648e-02, 4.462e-01, 1.731e-01, -2.170e-01, -6.087e-02)); + r += mul(s4_3, M4(-1.811e-01, -8.706e-02, 2.581e-02, -5.347e-02, 1.550e-01, 1.538e-01, -4.210e-02, -2.730e-02, 1.282e-02, -1.373e-01, 1.264e-02, 8.382e-03, -2.971e-01, -4.154e-03, -1.343e-01, 9.709e-02)); + r += mul(s4_4, M4(2.233e-01, -1.945e-01, 1.350e-01, -5.341e-02, 2.933e-01, -9.302e-02, 1.302e-02, 4.402e-02, -9.468e-02, 3.817e-01, -4.733e-01, -6.991e-02, 5.489e-01, -2.329e-01, 3.871e-01, -5.727e-01)); + r += mul(s4_5, M4(-1.585e-02, -9.024e-02, -9.388e-02, 7.590e-02, 8.037e-02, 8.891e-02, -2.049e-01, 4.058e-02, -2.567e-02, -6.523e-02, 1.244e-01, -9.568e-02, 7.226e-01, -6.335e-01, -1.637e-01, 1.593e-01)); + r += mul(s4_6, M4(-1.601e-01, 9.097e-02, -2.821e-02, 3.879e-03, -7.961e-02, 6.534e-02, 5.302e-02, 3.970e-03, 2.394e-03, 8.816e-02, 8.474e-02, -1.101e-02, -1.189e-01, -4.395e-02, 9.528e-02, -5.206e-02)); + r += mul(s4_7, M4(-1.709e-01, -1.114e-01, 5.967e-02, -1.459e-02, 5.295e-02, 7.691e-02, 1.043e-02, 2.034e-01, 1.798e-01, 1.501e-01, 2.531e-03, 5.915e-02, -2.368e-01, -9.150e-05, 1.774e-01, 1.769e-01)); + r += mul(s4_8, M4(-1.248e-01, 5.240e-02, 3.076e-02, 1.944e-02, -1.145e-01, 9.493e-02, -8.435e-02, -8.691e-02, 1.657e-02, -4.717e-03, -2.467e-02, -3.439e-02, -7.809e-02, -1.662e-01, 4.556e-01, 3.114e-01)); + r += mul(s5_0, M4(2.135e-01, -5.993e-02, 2.290e-01, 1.518e-01, 3.434e-01, -2.914e-01, 3.053e-02, -5.904e-01, 2.180e-01, 1.652e-01, -2.240e-01, -1.576e-01, -3.795e-02, 8.326e-02, 4.146e-02, -1.860e-01)); + r += mul(s5_1, M4(-1.012e-01, 4.538e-02, -4.146e-03, -2.427e-02, -6.541e-01, 1.180e-01, 5.420e-02, -1.134e-01, -2.636e-01, -2.055e-01, 2.984e-03, -5.246e-02, -3.146e-01, 3.864e-02, 1.169e-01, 2.068e-01)); + r += mul(s5_2, M4(1.581e-01, 1.069e-01, -1.180e-01, -4.234e-02, -1.146e-01, -1.985e-01, 1.692e-01, -3.268e-01, -5.819e-02, 3.629e-02, -2.507e-01, -2.992e-01, -1.950e-01, -1.803e-01, 4.073e-01, -5.513e-02)); + r += mul(s5_3, M4(1.895e-01, -9.019e-02, 3.446e-01, 2.836e-02, 2.208e-01, 6.808e-02, 5.954e-02, -9.321e-02, 1.590e-01, -3.876e-01, 1.642e-01, -6.263e-01, 9.162e-02, 6.038e-02, 4.329e-02, 1.823e-01)); + r += mul(s5_4, M4(5.040e-02, 2.126e-01, 1.964e-01, -9.975e-04, 1.943e-01, 5.599e-02, -3.350e-02, -3.520e-01, 7.714e-02, -1.129e+00, 2.793e-01, -7.623e-01, 4.531e-02, 1.862e-01, 2.894e-01, 9.525e-02)); + r += mul(s5_5, M4(5.389e-02, 5.149e-02, -2.258e-02, -8.668e-02, 1.733e-01, 1.634e-02, -1.569e-01, -1.869e-01, 2.677e-01, -5.336e-01, -2.514e-01, -3.674e-01, 1.247e-01, 3.136e-01, -4.908e-01, 1.574e-01)); + r += mul(s5_6, M4(1.038e-01, -2.041e-02, 2.091e-02, -5.148e-03, 1.047e-01, 8.839e-03, -2.202e-02, 6.192e-02, 4.809e-01, -4.174e-01, 2.981e-01, -7.936e-02, -9.874e-02, -8.878e-02, 1.795e-02, -1.034e-01)); + r += mul(s5_7, M4(-2.322e-02, -6.292e-02, -1.332e-01, -3.057e-01, -8.255e-02, 6.054e-02, 7.983e-02, -2.289e-01, -2.134e-01, -4.304e-01, -1.994e-01, -6.384e-01, 1.080e-01, -1.530e-02, -1.238e-01, -1.491e-01)); + r += mul(s5_8, M4(8.246e-02, -5.373e-02, -4.011e-02, 2.231e-02, -4.627e-02, -1.188e-01, -4.430e-02, -9.327e-02, -1.237e-01, -9.078e-02, 1.438e-01, -8.875e-02, -3.324e-02, -2.484e-01, -3.955e-02, 2.300e-02)); + r += mul(s6_0, M4(1.192e-03, 3.644e-02, -3.324e-02, 5.183e-02, -1.500e-01, 1.644e-01, -1.594e-02, 1.351e-01, 1.059e-02, -1.694e-01, -1.076e-01, -1.518e-01, -7.045e-02, -2.294e-01, -1.497e-01, 7.650e-01)); + r += mul(s6_1, M4(-5.869e-02, 1.637e-01, 1.052e-01, 5.075e-02, 2.782e-02, 4.687e-02, -2.417e-01, 5.446e-02, 2.118e-01, 5.241e-02, -2.148e-01, -4.308e-02, -7.045e-01, 3.529e-01, -4.069e-01, 3.599e-01)); + r += mul(s6_2, M4(-2.914e-01, -4.515e-03, 1.882e-01, -5.199e-02, -5.184e-03, 8.319e-02, -5.139e-02, -8.999e-02, -9.549e-02, 2.413e-03, -4.866e-02, 1.453e-01, -2.063e-01, 2.463e-01, 6.671e-02, 6.351e-02)); + r += mul(s6_3, M4(7.447e-02, -2.294e-03, 1.910e-02, -2.190e-01, -2.705e-01, -5.867e-03, -1.861e-01, 1.657e-01, 1.362e-02, -1.587e-01, 2.105e-01, -1.370e-01, -2.316e-01, 3.930e-02, 5.674e-03, 2.345e-03)); + r += mul(s6_4, M4(1.365e-02, -5.652e-01, 3.707e-01, -2.277e-01, -1.733e-01, 1.912e-01, 1.706e-01, 1.572e-01, 7.769e-02, -8.719e-02, 1.378e-01, -1.233e-02, -2.064e-01, 3.939e-01, -2.783e-01, 2.233e-01)); + r += mul(s6_5, M4(-2.941e-01, -9.158e-03, 9.300e-02, -1.640e-01, -1.032e-01, 1.259e-01, 4.342e-02, -3.282e-03, 4.791e-02, -8.513e-03, 1.481e-01, 2.104e-02, 6.097e-02, 2.498e-02, 4.170e-02, -1.108e-01)); + r += mul(s6_6, M4(3.965e-01, -6.470e-02, 2.955e-01, -5.753e-02, -4.037e-03, -2.916e-02, 8.991e-02, 1.365e-01, -1.050e-01, -2.876e-02, -4.556e-02, -1.204e-01, 2.740e-01, -7.769e-02, -6.419e-02, 7.562e-02)); + r += mul(s6_7, M4(4.449e-01, 9.539e-02, 4.915e-01, 1.838e-01, -1.030e-01, -1.530e-01, -1.382e-01, 5.573e-03, 2.141e-02, -4.057e-03, 2.855e-02, 2.697e-01, -1.344e-01, 2.599e-01, -8.800e-02, -2.466e-01)); + r += mul(s6_8, M4(2.154e-03, -2.552e-01, -5.128e-01, 1.831e-02, 1.020e-01, -1.913e-01, 2.779e-01, -2.477e-02, -2.670e-02, 9.595e-02, -3.882e-02, 6.970e-02, -2.032e-01, 4.113e-02, 8.961e-03, 9.796e-02)); + r += mul(s7_0, M4(9.631e-02, 2.773e-02, 1.361e-02, -6.150e-02, 2.253e-01, 6.211e-02, -1.811e-01, 2.811e-02, -1.038e-01, -9.217e-02, -3.777e-01, 1.273e-01, 1.210e-01, 7.055e-02, 3.595e-02, 5.174e-02)); + r += mul(s7_1, M4(4.247e-02, 1.466e-02, 3.899e-02, 5.292e-02, 3.122e-01, -3.013e-02, 3.042e-02, 2.134e-01, 2.280e-02, -3.049e-01, -1.718e-01, -6.173e-02, 1.016e-02, 2.278e-02, 3.504e-02, 1.785e-03)); + r += mul(s7_2, M4(-3.382e-02, 8.445e-02, 9.257e-02, 5.136e-02, 1.694e-01, 7.343e-02, -1.552e-01, 1.099e-01, -1.514e-01, 8.443e-02, 2.600e-01, -2.022e-01, -2.503e-02, 3.217e-03, -2.207e-02, 1.450e-01)); + r += mul(s7_3, M4(1.311e-02, -6.610e-02, -9.563e-02, 7.953e-02, 1.718e-01, -2.105e-01, 1.410e-02, 7.725e-02, 4.814e-02, -3.939e-02, -1.998e-01, 2.058e-01, 4.837e-02, 1.069e-01, 4.654e-01, -1.726e-01)); + r += mul(s7_4, M4(-4.907e-02, -4.985e-03, 1.072e-01, 7.100e-02, 8.207e-01, -1.985e-01, 2.636e-02, -2.899e-01, -1.499e-01, 4.449e-01, -2.068e-01, -9.532e-02, 1.470e-01, -1.778e-01, -1.206e-01, -8.091e-02)); + r += mul(s7_5, M4(-1.207e-01, -6.268e-02, -2.007e-01, -2.291e-02, -3.847e-02, -2.381e-01, 2.108e-01, 7.411e-02, 1.324e-01, 5.207e-02, 2.269e-01, -2.144e-01, 7.696e-02, -3.380e-02, -1.811e-01, 9.073e-03)); + r += mul(s7_6, M4(-1.933e-01, -3.160e-02, -1.301e-02, 2.390e-02, 3.879e-02, 1.396e-01, -7.546e-03, -2.120e-01, 8.246e-03, 8.081e-02, 3.536e-03, -5.456e-02, -1.409e-01, -7.363e-02, 1.343e-01, -7.728e-02)); + r += mul(s7_7, M4(6.143e-02, -4.371e-02, -1.936e-01, -1.996e-01, 3.614e-01, -8.935e-02, -1.915e-01, -1.972e-02, 1.197e-01, 1.455e-01, 2.121e-03, -1.250e-01, 5.444e-02, -1.297e-02, -1.260e-01, 6.226e-02)); + r += mul(s7_8, M4(8.694e-02, 1.276e-01, 5.649e-02, 5.117e-02, 4.046e-02, -1.476e-02, 1.368e-01, -1.461e-01, 9.102e-02, -4.083e-02, -3.203e-02, -1.654e-01, -1.900e-01, -2.258e-03, -7.842e-02, 4.455e-02)); + r += V4(-2.429e-03, 1.297e-02, 9.825e-04, 2.920e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(6.024e-01, 1.614e-01, 2.969e-02, -9.242e-02, -9.832e-02, -1.728e-01, -2.218e-01, -1.139e-02, -1.441e-01, 7.603e-03, 6.030e-02, -1.796e-01, -3.607e-02, -6.201e-02, 1.123e-02, 9.867e-02)); + r += mul(s0_1, M4(1.035e-01, 2.519e-01, -1.066e-02, 2.335e-01, 9.790e-02, 7.715e-02, 6.862e-02, -3.410e-02, -1.344e-02, 1.615e-02, -4.114e-02, -1.530e-01, -1.585e-01, 1.480e-01, 3.173e-01, 8.129e-02)); + r += mul(s0_2, M4(1.971e-02, 2.791e-02, 8.140e-03, -1.451e-01, 4.679e-02, 3.280e-02, 7.822e-03, -9.194e-02, 6.941e-02, -2.800e-02, -1.471e-01, -9.730e-02, -1.746e-01, 1.818e-02, 1.778e-01, -3.595e-02)); + r += mul(s0_3, M4(1.948e-02, 5.508e-02, -2.931e-02, 1.913e-02, -2.793e-02, 5.774e-02, 1.264e-02, -1.930e-02, 2.701e-01, -5.001e-02, 6.975e-02, 2.101e-01, -1.242e-02, 5.086e-02, 1.624e-01, -1.180e-01)); + r += mul(s0_4, M4(-1.038e-01, 1.854e-01, 2.881e-01, 1.693e-01, -5.477e-02, -3.241e-02, -1.576e-01, -4.147e-02, -2.655e-01, -6.189e-02, -6.585e-02, 2.051e-02, -2.719e-01, -2.128e-02, -6.742e-02, -1.151e+00)); + r += mul(s0_5, M4(-8.933e-04, 1.195e-01, 5.673e-02, -8.695e-02, -6.231e-02, 1.636e-01, 5.256e-02, -8.184e-02, 1.224e-01, 1.210e-01, -9.299e-02, -3.799e-02, -2.662e-01, -3.923e-02, 3.441e-01, -4.183e-01)); + r += mul(s0_6, M4(2.794e-02, 2.201e-01, 6.458e-02, -6.654e-03, 4.686e-02, -8.718e-02, 3.347e-02, 6.941e-02, 1.722e-03, 5.302e-02, 1.548e-02, 7.879e-02, -1.090e-01, -1.296e-01, 1.196e-01, -1.215e-01)); + r += mul(s0_7, M4(3.276e-01, 4.730e-01, 2.102e-01, -2.684e-02, 6.094e-02, -1.314e-01, -5.663e-02, 1.800e-01, -1.939e-01, 2.985e-02, -1.470e-01, 4.822e-02, -5.254e-01, -1.792e-01, -1.169e-01, 1.054e-01)); + r += mul(s0_8, M4(1.490e-01, 1.925e-01, 4.673e-03, 9.016e-02, -1.824e-01, 1.178e-01, -7.139e-02, 9.492e-02, -7.642e-02, -1.532e-01, 6.804e-02, -1.857e-02, -2.361e-01, 4.108e-02, 1.072e-01, -6.357e-02)); + r += mul(s1_0, M4(-3.124e-01, -1.010e-01, 2.682e-02, 9.663e-02, 1.685e-02, 2.384e-02, -1.228e-01, -8.220e-03, 1.251e-01, -8.046e-03, 8.745e-02, -3.563e-01, -1.724e-01, 1.384e-02, -1.248e-01, 1.518e-01)); + r += mul(s1_1, M4(-2.027e-01, 9.547e-02, -4.659e-03, 2.623e-01, -6.915e-02, 5.698e-02, -1.083e-01, -4.840e-02, 2.208e-01, 1.417e-01, 1.187e-01, -1.917e-01, 2.543e-01, 1.339e-01, 8.921e-02, 3.216e-01)); + r += mul(s1_2, M4(1.834e-02, -1.707e-03, 1.831e-01, 5.512e-02, -1.295e-02, 9.339e-02, -4.129e-02, -6.925e-02, 3.520e-02, 4.108e-01, 2.608e-01, 2.010e-01, 5.027e-02, 1.602e-01, -9.465e-02, 1.882e-01)); + r += mul(s1_3, M4(5.955e-02, -2.485e-01, -7.986e-02, -3.319e-02, -1.972e-03, -2.394e-02, 1.219e-01, -5.178e-02, -5.744e-01, 3.955e-01, 1.148e-01, 3.070e-03, 2.324e-01, 9.480e-02, -2.293e-01, 1.773e-01)); + r += mul(s1_4, M4(-6.757e-02, 1.505e-01, -1.167e-01, -5.043e-02, 8.591e-05, -2.709e-01, -5.627e-02, 5.334e-03, 4.120e-01, -1.987e-01, 4.480e-01, -1.209e-01, 6.104e-01, 2.653e-02, -3.970e-01, -2.835e-01)); + r += mul(s1_5, M4(4.765e-03, 4.705e-02, 4.717e-02, 8.159e-03, -8.101e-04, 6.611e-02, 5.412e-02, -5.183e-03, -7.808e-02, -3.112e-01, 1.957e-01, 3.155e-01, 8.824e-02, -1.785e-02, -2.577e-01, 2.288e-01)); + r += mul(s1_6, M4(-8.924e-02, -3.349e-01, -1.986e-01, 8.754e-03, 5.885e-02, 9.169e-02, -1.576e-02, 5.966e-03, 1.663e-02, 1.397e-01, -1.041e-01, -2.549e-02, -1.444e-02, -8.100e-03, -1.193e-01, 7.517e-02)); + r += mul(s1_7, M4(-1.685e-01, -3.000e-01, -2.483e-01, -1.825e-02, -5.701e-02, -6.274e-02, -1.281e-01, 1.214e-01, 2.599e-01, 2.717e-01, 1.289e-01, 1.857e-02, 9.641e-02, -3.836e-01, -3.064e-02, 1.713e-01)); + r += mul(s1_8, M4(-4.192e-02, -6.364e-02, 2.977e-02, -8.860e-02, -1.118e-01, 1.485e-02, -7.474e-02, 1.263e-01, 1.917e-01, 2.233e-01, 4.108e-02, -4.125e-02, 3.258e-01, -5.616e-02, -1.479e-01, 1.414e-02)); + r += mul(s2_0, M4(-1.166e-01, 6.576e-02, -5.141e-02, 2.146e-02, 1.297e-01, -9.262e-02, -2.629e-01, -1.504e-01, 1.028e-02, 1.035e-01, 3.107e-02, 8.578e-02, -5.688e-03, 2.526e-03, 7.956e-02, 8.222e-02)); + r += mul(s2_1, M4(-2.237e-02, -2.554e-02, -3.804e-02, -9.550e-03, 5.775e-02, -1.148e-01, -5.972e-02, -1.597e-01, -8.328e-03, 8.419e-02, -7.435e-03, 1.412e-01, -2.015e-01, 1.395e-01, 8.639e-02, 2.914e-01)); + r += mul(s2_2, M4(6.010e-02, 6.698e-02, -9.117e-02, 5.730e-03, 1.754e-01, -4.930e-02, -2.671e-02, 5.044e-02, 4.720e-02, 2.003e-01, 5.494e-04, 1.080e-01, 8.684e-02, 4.576e-02, 8.002e-02, 1.365e-01)); + r += mul(s2_3, M4(5.108e-02, -2.667e-02, -1.292e-01, -3.799e-02, 1.562e-01, -1.942e-01, -1.129e-01, -4.328e-02, -4.678e-02, 1.000e-02, -2.792e-02, -6.946e-02, -3.776e-02, -2.409e-01, -7.967e-02, -2.662e-01)); + r += mul(s2_4, M4(6.281e-02, -7.670e-02, -3.073e-02, 2.312e-02, 1.029e-01, 2.886e-01, -1.085e-01, 2.622e-01, 3.088e-02, 2.355e-02, 7.788e-02, 1.948e-02, 9.080e-02, -1.428e-01, -1.498e-01, 1.635e-01)); + r += mul(s2_5, M4(2.505e-02, 1.141e-01, 5.919e-02, -3.992e-02, 1.990e-01, 8.450e-03, -1.645e-01, 2.123e-02, -3.088e-02, 3.905e-02, -5.922e-02, -5.176e-02, -1.057e-01, 7.104e-02, -9.727e-02, -3.979e-02)); + r += mul(s2_6, M4(-1.883e-01, -6.903e-02, 1.357e-02, 1.006e-01, 4.127e-02, 6.045e-01, -1.417e-01, 1.510e-01, -1.729e-01, -1.531e-01, -8.601e-02, 1.774e-02, -7.535e-03, 6.714e-04, -9.852e-02, 4.678e-02)); + r += mul(s2_7, M4(1.303e-03, -1.218e-01, -1.070e-01, -2.185e-02, 3.714e-01, 4.524e-01, 1.029e-01, 1.001e-01, -6.366e-02, -1.359e-01, -1.372e-01, 3.981e-02, -2.002e-02, -6.594e-02, 1.919e-02, -2.447e-02)); + r += mul(s2_8, M4(-4.613e-02, -5.523e-02, -4.112e-03, -9.545e-02, 2.432e-02, -9.902e-02, -9.300e-02, 2.047e-01, -4.906e-02, -1.037e-01, -6.037e-02, -6.359e-02, -9.715e-02, -5.602e-03, 2.586e-03, -1.759e-01)); + r += mul(s3_0, M4(3.237e-01, 4.918e-01, -2.668e-01, 1.157e-02, -3.132e-02, -3.390e-02, 1.897e-02, -1.129e-01, -4.634e-01, 5.922e-01, -7.222e-01, 6.022e-01, 2.221e-01, 2.233e-01, -5.005e-02, -4.525e-02)); + r += mul(s3_1, M4(1.096e-01, 6.729e-01, -1.870e-01, 2.077e-01, -1.609e-01, -7.736e-02, 7.650e-02, -2.442e-01, -2.838e-01, -1.199e-01, -3.359e-01, 1.487e-01, 1.102e-01, 4.324e-02, -7.319e-02, -4.535e-02)); + r += mul(s3_2, M4(-2.172e-01, 2.894e-01, -1.623e-01, -1.274e-01, -1.549e-01, -5.039e-02, 9.217e-02, -6.576e-02, 3.091e-01, 7.660e-01, 1.150e+00, 1.007e+00, 2.005e-01, 1.174e-01, -2.202e-01, -2.503e-01)); + r += mul(s3_3, M4(-1.587e-01, -4.409e-01, 7.186e-01, -8.712e-02, -3.360e-01, -1.635e-01, -9.153e-02, -3.941e-02, -1.524e-01, 8.945e-01, -2.371e-01, 6.067e-01, -3.247e-01, 5.680e-02, 1.870e-01, 9.043e-02)); + r += mul(s3_4, M4(-5.073e-01, 3.782e-01, 1.547e-01, 2.375e-01, -2.941e-01, -2.225e-01, -3.937e-02, 1.585e-01, 6.400e-01, 6.338e-02, 6.255e-01, -9.882e-01, -8.300e-02, 1.964e-01, 2.085e-01, -5.729e-02)); + r += mul(s3_5, M4(-1.060e-01, -3.881e-01, 1.130e-01, -2.133e-01, -2.131e-01, -5.654e-02, 9.555e-02, 9.273e-02, 3.269e-01, -2.140e-01, 2.201e+00, -4.989e-01, 2.170e-01, 3.391e-01, -1.298e-01, -1.696e-01)); + r += mul(s3_6, M4(1.210e+00, 1.642e-01, 2.442e-02, 4.690e-01, -3.490e-01, -2.805e-01, 1.715e-01, -6.646e-02, -1.273e-01, 1.969e-01, -8.193e-02, 1.531e-01, -3.637e-01, -4.352e-02, 3.050e-02, 2.056e-01)); + r += mul(s3_7, M4(3.109e-02, 3.192e-01, 3.558e-01, -7.807e-01, -2.123e-01, -3.377e-02, -6.032e-02, -5.000e-02, -7.944e-01, 4.559e-01, 7.100e-01, -2.139e-01, 1.249e-01, 3.205e-01, 3.829e-01, 8.794e-02)); + r += mul(s3_8, M4(1.721e-01, -2.141e-02, -5.124e-02, -1.941e-02, -6.881e-02, -1.218e-01, -2.140e-02, -1.477e-02, 2.367e-01, 3.393e-01, 1.307e+00, -9.865e-01, -1.969e-02, -1.983e-02, 1.031e-01, 9.551e-03)); + r += mul(s4_0, M4(5.793e-03, -2.997e-01, -2.149e-01, 7.801e-02, -5.903e-03, -6.859e-02, 9.974e-04, -2.067e-01, -8.018e-02, 5.507e-02, -5.498e-02, -1.249e-02, 4.331e-02, -7.296e-02, 1.522e-01, 6.020e-02)); + r += mul(s4_1, M4(-1.126e-03, -2.090e-01, -1.333e-01, 2.479e-01, -6.377e-02, -1.562e-01, 2.477e-02, -4.739e-01, 8.594e-02, -1.862e-02, -1.435e-01, 8.022e-02, -4.041e-01, -2.097e-01, 7.237e-01, 7.145e-01)); + r += mul(s4_2, M4(4.093e-01, -1.603e-02, -4.302e-02, 1.847e-01, -5.292e-02, -1.715e-02, -4.633e-02, -2.926e-01, 1.107e-02, 1.358e-01, -2.217e-02, 3.609e-02, 3.626e-02, -2.235e-01, 3.172e-01, -2.094e-01)); + r += mul(s4_3, M4(1.372e-01, -4.963e-02, 3.393e-03, -1.382e-01, -1.840e-01, 2.136e-02, 1.010e-01, 2.007e-01, -1.317e-01, -1.516e-01, 1.125e-01, -3.056e-02, 1.042e-01, -6.974e-02, 6.472e-02, 7.204e-02)); + r += mul(s4_4, M4(6.797e-02, 5.619e-01, -1.842e-01, -1.167e-01, 1.120e-01, -6.628e-02, -7.528e-02, 1.173e-01, -8.345e-02, 9.310e-03, 2.113e-01, 2.760e-01, -1.062e-01, -2.080e-01, -1.711e-01, -2.247e-01)); + r += mul(s4_5, M4(1.379e-01, -3.213e-02, -1.093e-01, -1.256e-02, 6.009e-02, 9.502e-02, 7.922e-02, 1.096e-01, -5.342e-02, 2.567e-01, -7.645e-02, -1.566e-01, -4.579e-01, 1.754e-01, 5.527e-01, 4.371e-01)); + r += mul(s4_6, M4(2.313e-01, -1.434e-01, -2.594e-02, 4.450e-02, -8.749e-02, -4.179e-02, 4.259e-03, 4.061e-03, -7.949e-03, -2.408e-01, -9.617e-03, -3.280e-02, 5.811e-02, 1.831e-02, -2.068e-02, -9.291e-02)); + r += mul(s4_7, M4(3.050e-01, 1.183e-01, -3.893e-02, 9.341e-02, 2.277e-01, -1.558e-02, 6.117e-02, 3.575e-02, 1.044e-01, 8.139e-02, 5.209e-03, -1.367e-01, 1.577e-01, -3.980e-01, 2.223e-01, -3.205e-02)); + r += mul(s4_8, M4(8.588e-02, 2.461e-01, 5.215e-04, -7.777e-02, -4.666e-02, 6.132e-02, 5.166e-02, 3.593e-02, 5.295e-02, -6.697e-02, -1.082e-01, 6.431e-02, -2.786e-01, -1.883e-01, -2.140e-01, 6.313e-02)); + r += mul(s5_0, M4(-1.634e-01, 1.580e-01, -1.498e-01, 1.332e-02, -2.833e-01, 1.933e-02, -1.910e-01, -1.457e-01, -2.154e-01, -1.393e-01, 2.019e-01, 4.654e-02, 5.307e-02, -2.079e-02, -1.021e-01, -6.443e-02)); + r += mul(s5_1, M4(-1.549e-01, 1.103e-01, -9.549e-02, 1.638e-01, 4.452e-01, 2.824e-02, 8.840e-02, -5.022e-02, 1.916e-02, 6.132e-02, 2.783e-01, -3.927e-01, 1.925e-01, 2.155e-01, -4.751e-03, -6.121e-03)); + r += mul(s5_2, M4(-1.336e-01, 1.558e-01, 2.160e-01, -8.837e-02, 1.081e-01, 2.939e-01, 1.024e-01, 3.714e-02, -6.304e-01, -4.093e-01, 2.276e-01, 2.333e-01, 1.152e-02, 3.609e-01, -5.508e-01, 3.806e-01)); + r += mul(s5_3, M4(-2.184e-01, 2.841e-01, 1.092e-01, -2.413e-01, -1.375e-02, 1.723e-01, 2.776e-02, 6.387e-02, 1.073e-01, 1.465e-01, 3.192e-01, -5.573e-01, 3.674e-03, 1.185e-01, 7.853e-03, -1.184e-01)); + r += mul(s5_4, M4(2.010e-02, 6.965e-02, 4.445e-01, -2.783e-01, 3.320e-01, -6.711e-02, 2.167e-03, -8.034e-02, 3.523e-01, -2.237e-01, 3.191e-01, -8.361e-01, 6.816e-01, 1.419e-02, -2.330e-01, -3.863e-01)); + r += mul(s5_5, M4(-1.692e-01, -4.727e-02, 1.890e-02, -4.228e-02, -2.228e-01, 1.929e-01, -2.376e-01, 4.520e-02, -1.926e-01, -5.829e-01, 1.372e-01, -6.165e-01, -5.897e-03, 1.233e-01, 9.314e-02, -4.698e-01)); + r += mul(s5_6, M4(-1.805e-01, -1.371e-01, 6.896e-03, 3.752e-02, -1.036e-01, 3.209e-01, -1.229e-02, -5.416e-02, -2.065e-01, 5.857e-01, 4.271e-01, -2.046e-01, 1.299e-02, 1.409e-01, -6.584e-02, -8.911e-02)); + r += mul(s5_7, M4(-2.166e-01, -4.869e-01, 3.691e-02, 9.347e-03, -7.123e-02, 7.614e-02, -8.810e-02, 2.679e-02, -3.274e-01, 1.435e-01, 5.891e-01, 2.630e-01, 8.166e-02, -2.710e-03, -1.822e-02, 2.107e-02)); + r += mul(s5_8, M4(-7.499e-02, -1.186e-01, 2.727e-02, -3.484e-03, -2.397e-01, 6.605e-02, -1.695e-01, 6.389e-02, -2.937e-02, -3.176e-01, 4.547e-01, 2.197e-01, -2.220e-01, -5.530e-02, -5.789e-02, -1.362e-01)); + r += mul(s6_0, M4(1.284e-01, -5.820e-02, -1.821e-02, -2.132e-02, -1.812e-01, 3.254e-02, -6.980e-02, 4.877e-02, -3.981e-01, -6.346e-03, 1.061e-01, 2.830e-01, -1.488e-01, 2.322e-01, -4.172e-01, 1.401e-01)); + r += mul(s6_1, M4(1.744e-01, -1.377e-02, 1.247e-01, -9.359e-02, -1.703e-01, -6.018e-02, 3.291e-02, 1.262e-01, -2.660e-01, 3.021e-02, -6.027e-02, 1.319e-01, 1.809e-01, -1.488e-01, -6.257e-01, -7.873e-02)); + r += mul(s6_2, M4(9.294e-03, -4.738e-02, 2.425e-02, -9.006e-02, -7.711e-02, -1.200e-01, 1.304e-01, 8.477e-02, 5.670e-02, 1.555e-01, -3.501e-02, -8.189e-02, -3.005e-01, -4.845e-02, 1.362e-01, -1.009e-01)); + r += mul(s6_3, M4(3.859e-02, 7.534e-02, -2.690e-01, -6.434e-02, 2.921e-02, 6.423e-02, -1.309e-01, 2.018e-01, -4.921e-01, 2.839e-01, 1.028e-01, -3.733e-02, 1.341e-01, -7.584e-02, 4.857e-02, -3.187e-01)); + r += mul(s6_4, M4(2.750e-01, 2.931e-01, -7.052e-01, -5.350e-01, 5.074e-02, -5.417e-02, 1.930e-02, -1.304e-01, 1.098e-01, 8.690e-02, 1.029e-01, 3.523e-02, 5.505e-02, 2.928e-01, -3.176e-01, -5.488e-01)); + r += mul(s6_5, M4(-1.432e-01, 4.407e-01, -4.435e-01, 6.909e-01, -4.673e-02, 6.270e-02, -5.395e-03, -4.062e-02, 3.409e-02, -2.240e-01, 2.137e-01, -2.802e-02, -7.794e-02, 1.457e-02, -3.355e-02, 3.011e-03)); + r += mul(s6_6, M4(-1.440e-01, 5.284e-01, 7.095e-02, -3.436e-01, -3.962e-03, -2.807e-02, -1.546e-01, 6.346e-02, -8.007e-03, -6.288e-02, -1.336e-01, -8.172e-02, 6.688e-02, 2.013e-02, 9.883e-02, 6.834e-03)); + r += mul(s6_7, M4(-7.737e-02, 1.352e-01, -1.823e-02, 1.410e-01, -3.071e-01, 1.073e-01, -1.601e-02, -2.010e-01, 1.849e-01, -1.390e-01, 1.748e-01, 4.664e-02, -2.876e-01, -1.700e-01, -8.310e-02, 6.118e-02)); + r += mul(s6_8, M4(-2.698e-01, 2.050e-01, -2.982e-01, -4.068e-01, -1.921e-01, -2.708e-02, 9.079e-02, -2.908e-01, 8.824e-02, -1.377e-01, -2.613e-02, 8.476e-02, 2.881e-01, 2.715e-02, 5.938e-02, 4.345e-02)); + r += mul(s7_0, M4(2.057e-02, 3.372e-02, -8.995e-02, 3.579e-02, 3.542e-01, 7.582e-02, -1.086e-03, -7.511e-02, 2.623e-02, 1.204e-01, -5.686e-02, -6.017e-04, 1.221e-01, -1.064e-02, 1.557e-01, -8.613e-03)); + r += mul(s7_1, M4(-2.953e-02, -8.286e-02, -9.441e-02, -2.524e-02, 4.550e-01, 2.552e-01, 2.057e-01, 1.977e-01, 1.033e-01, 9.862e-02, 4.592e-02, 5.174e-02, -1.450e-01, -2.149e-02, -2.172e-02, 2.110e-01)); + r += mul(s7_2, M4(-7.143e-02, -1.142e-01, 2.439e-02, -1.636e-01, 1.959e-01, 8.576e-02, 2.433e-01, -6.556e-02, 1.911e-01, 1.498e-01, -2.678e-02, -7.298e-02, 1.300e-01, -1.324e-01, 2.859e-03, 1.392e-01)); + r += mul(s7_3, M4(-4.561e-02, 4.549e-02, -1.297e-01, 9.444e-02, -9.785e-02, 2.926e-01, 3.633e-01, 3.022e-02, 3.227e-01, -2.358e-01, 3.198e-01, -1.437e-01, -2.234e-02, -8.447e-02, -7.054e-02, -1.036e-01)); + r += mul(s7_4, M4(-3.751e-02, -1.283e-01, 1.522e-01, 4.857e-02, 3.744e-01, 5.343e-01, 2.480e-01, -7.117e-02, 5.132e-01, 2.266e-01, -1.138e-01, 3.109e-02, 9.592e-02, 1.364e-02, 4.631e-02, 1.131e-02)); + r += mul(s7_5, M4(-7.775e-03, -1.390e-01, 1.219e-01, -1.763e-01, 5.525e-01, 1.910e-01, 1.082e-01, 2.706e-01, -5.759e-02, 2.822e-01, 3.152e-02, -2.117e-01, -7.155e-02, -7.768e-02, -1.189e-01, -7.904e-03)); + r += mul(s7_6, M4(1.032e-01, -1.927e-02, 1.675e-02, -1.546e-02, 1.065e-01, -1.700e-01, 1.457e-01, -1.539e-01, 1.431e-01, -2.567e-02, -4.598e-02, -2.291e-02, -1.475e-01, 2.480e-01, -9.909e-02, 8.509e-04)); + r += mul(s7_7, M4(3.658e-02, -9.913e-02, 7.420e-02, -6.561e-02, 2.804e-01, 2.319e-01, 3.820e-02, -2.351e-02, 2.700e-01, 2.374e-01, 1.995e-02, -1.508e-01, 1.266e-01, -5.414e-02, 1.113e-01, 1.421e-02)); + r += mul(s7_8, M4(2.975e-03, -8.924e-02, 7.782e-04, 3.556e-01, 1.857e-01, -1.485e-01, -1.141e-01, -2.377e-02, -3.094e-01, 2.868e-01, -1.138e-01, -1.233e-01, -1.582e-02, 5.449e-02, 8.113e-03, 2.659e-02)); + r += V4(4.227e-02, -2.005e-02, 4.461e-02, -2.311e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.261e-01, 1.275e-01, 1.893e-01, -4.490e-02, 3.075e-02, -2.067e-02, 8.184e-02, 6.246e-02, -1.453e-01, -8.347e-02, 1.846e-02, -4.013e-02, 8.788e-02, 7.863e-02, 2.785e-02, -2.049e-02)); + r += mul(s0_1, M4(5.533e-02, 2.675e-01, 2.347e-01, 1.069e-02, 3.747e-02, 3.292e-02, 1.962e-01, -2.536e-02, 1.213e-01, -6.926e-02, -1.038e-02, 8.587e-02, 1.096e-01, 3.013e-02, -1.298e-01, 1.057e-01)); + r += mul(s0_2, M4(1.319e-01, -1.151e-01, -9.549e-02, -5.034e-02, 1.870e-02, -6.669e-04, 2.032e-02, 5.385e-02, -1.551e-01, -9.357e-03, 2.673e-02, -2.371e-03, 3.701e-02, 3.333e-02, -1.777e-02, 1.445e-01)); + r += mul(s0_3, M4(-1.049e-01, -4.468e-01, -2.183e-01, 1.698e-01, 1.613e-01, 1.641e-01, 2.722e-01, -9.911e-02, 3.634e-02, 1.021e-01, 5.158e-02, -9.397e-02, -1.835e-02, 4.337e-02, -3.981e-02, 1.435e-02)); + r += mul(s0_4, M4(5.649e-02, 1.327e-01, -7.398e-02, 2.571e-01, 1.044e-01, -4.123e-02, -2.703e-03, 1.883e-01, 2.395e-02, -3.654e-02, -6.511e-02, -3.507e-02, 1.353e-01, 7.352e-02, -2.371e-01, 5.515e-01)); + r += mul(s0_5, M4(-1.566e-01, -2.231e-01, -1.955e-01, 1.308e-01, 7.935e-02, 1.006e-02, 1.399e-01, -1.568e-01, -2.835e-02, 2.378e-01, 4.649e-02, 2.825e-02, 1.614e-01, -1.804e-02, -1.573e-03, 2.541e-01)); + r += mul(s0_6, M4(-7.541e-03, -2.560e-01, 2.715e-02, 5.328e-02, -3.816e-02, -8.862e-02, 2.168e-01, -1.064e-01, -7.636e-02, 8.903e-02, 7.338e-02, -1.177e-03, 1.478e-01, -1.738e-01, -1.222e-01, 1.096e-01)); + r += mul(s0_7, M4(-8.512e-02, 1.416e-01, 9.192e-03, 1.923e-01, 1.785e-01, 2.284e-02, 1.032e-01, -1.785e-03, -4.071e-02, -1.462e-01, -5.904e-02, -1.422e-01, -1.056e-01, -2.295e-01, -2.900e-01, 1.496e-01)); + r += mul(s0_8, M4(-7.149e-02, 2.004e-01, -1.053e-01, 1.203e-01, -6.376e-03, -4.344e-02, 2.072e-01, -1.266e-01, -8.375e-02, -3.221e-02, -1.236e-03, -8.896e-02, -1.123e-01, -2.438e-01, -1.008e-01, 3.596e-01)); + r += mul(s1_0, M4(-3.661e-02, 7.188e-02, -8.516e-02, 1.255e-01, -2.830e-01, 1.819e-02, 8.893e-02, 1.407e-02, 8.581e-02, -2.465e-01, -2.073e-02, 4.201e-01, -2.264e-02, 2.318e-02, 6.141e-02, -2.330e-02)); + r += mul(s1_1, M4(-3.989e-02, -2.107e-01, 7.287e-02, 1.096e-01, -2.123e-02, -8.654e-03, 2.330e-01, -5.653e-02, 9.050e-02, -2.271e-02, -3.233e-01, -3.972e-01, -1.002e-01, 8.960e-02, -6.543e-03, -4.752e-02)); + r += mul(s1_2, M4(-6.677e-03, 4.369e-02, 1.492e-02, -1.174e-01, 8.590e-02, 5.901e-02, 4.269e-02, 7.195e-02, -6.207e-02, 1.069e-01, 1.033e-01, -1.025e-01, -1.156e-01, 5.085e-02, 1.734e-01, -1.801e-01)); + r += mul(s1_3, M4(-1.858e-02, -1.199e-01, 5.713e-02, -3.361e-02, 1.338e-01, 9.905e-02, 2.074e-01, -5.975e-03, 4.418e-01, -5.371e-01, -1.131e-01, 8.611e-02, -2.646e-01, 2.978e-01, 1.242e-01, 9.650e-02)); + r += mul(s1_4, M4(-2.745e-01, 2.923e-01, 8.791e-02, -2.121e-01, 2.900e-01, 3.680e-02, 1.520e-01, 1.254e-01, 1.652e-02, 2.902e-01, 2.883e-01, 3.979e-01, -2.851e-01, -5.253e-02, 2.688e-01, -1.871e-02)); + r += mul(s1_5, M4(-6.374e-02, -2.885e-02, -7.878e-02, -3.370e-02, 3.692e-02, -6.525e-02, 1.714e-01, -9.354e-02, 1.326e-01, -6.534e-02, -6.443e-02, 1.037e-01, -2.664e-01, -9.002e-01, 6.225e-02, -4.174e-01)); + r += mul(s1_6, M4(3.009e-02, 2.041e-02, -6.071e-02, -9.755e-03, -1.186e-01, -4.602e-02, 2.015e-01, -1.719e-01, -2.484e-02, -3.894e-01, -9.188e-02, 1.068e-01, -1.791e-02, 3.681e-01, 1.223e-01, -2.349e-02)); + r += mul(s1_7, M4(7.351e-02, 3.737e-03, 1.317e-01, -1.007e-01, 1.540e-01, -1.224e-02, 1.404e-01, -2.308e-02, 3.033e-01, -2.157e-01, 7.529e-02, 4.100e-02, -2.292e-02, 4.977e-02, 2.144e-01, -5.221e-01)); + r += mul(s1_8, M4(5.573e-02, -1.405e-01, -6.806e-02, 2.158e-02, 1.295e-01, -1.267e-02, 1.694e-01, -5.347e-02, 1.971e-01, 3.000e-01, -1.284e-01, 2.282e-01, 3.039e-02, 1.595e-01, -2.105e-01, -3.446e-01)); + r += mul(s2_0, M4(2.024e-01, -3.515e-02, -2.489e-03, -2.118e-02, -1.715e-01, -1.940e-01, -6.598e-02, -8.850e-02, -7.708e-02, 5.732e-02, 2.541e-02, -4.510e-02, -2.579e-02, -1.242e-01, 9.859e-02, 1.145e-02)); + r += mul(s2_1, M4(2.687e-01, -6.414e-02, -4.625e-02, 8.588e-03, 1.435e-02, -1.698e-01, -7.112e-04, -7.204e-02, 1.790e-01, 9.271e-02, 4.105e-02, -5.917e-02, 3.966e-02, -3.604e-02, -5.485e-02, -9.420e-03)); + r += mul(s2_2, M4(7.422e-02, 7.694e-02, -7.387e-03, -1.102e-02, -2.691e-03, -1.765e-02, 7.516e-02, 4.662e-04, -1.576e-01, 1.058e-01, 2.696e-03, 5.033e-03, -1.272e-01, 3.192e-02, 1.493e-02, 6.949e-07)); + r += mul(s2_3, M4(2.105e-01, -3.479e-02, 1.751e-02, 2.534e-02, -2.367e-01, -1.494e-01, -2.756e-01, -4.013e-01, -2.370e-02, -1.276e-02, -9.586e-02, 5.129e-03, 7.380e-02, -1.411e-01, -9.599e-02, 5.479e-02)); + r += mul(s2_4, M4(3.765e-01, 7.629e-02, -9.550e-04, 1.215e-02, -6.660e-02, -2.347e-01, 4.939e-02, -4.931e-01, 2.056e-01, -1.731e-04, -4.735e-02, 2.450e-02, -2.056e-01, 8.265e-02, -5.027e-02, -7.208e-02)); + r += mul(s2_5, M4(2.828e-03, -1.418e-02, 7.302e-02, 5.425e-03, 8.625e-03, 1.805e-01, -4.860e-02, -1.532e-01, 8.334e-02, -5.988e-02, -1.329e-01, -2.702e-03, 9.095e-03, 1.942e-01, 4.222e-02, -1.058e-03)); + r += mul(s2_6, M4(8.565e-01, 5.326e-02, 1.705e-02, 5.460e-02, -1.571e-01, -2.404e-01, 9.784e-02, -3.661e-01, -1.714e-01, -4.530e-02, -7.510e-03, -2.285e-01, -4.492e-02, -1.553e-01, 4.557e-02, 2.245e-01)); + r += mul(s2_7, M4(5.439e-01, -8.800e-02, -7.793e-02, -1.366e-02, 3.674e-02, 4.674e-01, -8.680e-02, -2.205e-01, 4.452e-02, -4.380e-02, 5.358e-02, -1.180e-01, 6.422e-02, 2.963e-01, -1.742e-03, -5.871e-02)); + r += mul(s2_8, M4(7.783e-02, -2.963e-02, 9.227e-02, -7.006e-02, -1.405e-01, 6.849e-02, 8.228e-03, -6.423e-02, -6.025e-03, 6.067e-03, -8.835e-03, -4.613e-02, -1.066e-01, -2.231e-01, -2.395e-02, -8.309e-02)); + r += mul(s3_0, M4(5.781e-02, -2.000e-01, 3.001e-01, -4.266e-01, -1.135e-02, -4.194e-03, -1.160e-01, 2.562e-01, -3.129e-01, -4.833e-01, 2.166e-01, -6.893e-01, -5.524e-02, 4.001e-02, 1.344e-01, -7.897e-02)); + r += mul(s3_1, M4(-1.326e-01, 8.882e-03, -2.982e-01, 1.020e-01, -3.543e-02, 5.500e-02, 1.003e-01, 6.316e-02, -8.846e-02, -9.325e-02, 1.653e-01, 6.537e-01, 1.465e-01, 3.688e-01, 6.048e-02, -1.175e-01)); + r += mul(s3_2, M4(-2.682e-01, 1.979e-01, 1.594e-01, -1.703e-01, -6.687e-02, 4.401e-02, 1.082e-02, 8.612e-02, -1.097e+00, 2.854e-02, 3.023e-01, 1.011e-01, 2.071e-01, -5.715e-02, -1.160e-02, 1.284e-01)); + r += mul(s3_3, M4(1.385e-01, 1.462e-02, -6.357e-03, -2.761e-01, -1.417e-02, 7.504e-02, -3.562e-02, 6.311e-01, -3.158e-01, -3.881e-01, -4.738e-02, -1.761e-01, -1.311e-01, -1.783e-01, -6.620e-02, 4.290e-01)); + r += mul(s3_4, M4(-1.443e-01, -1.535e-02, -1.380e-01, -3.112e-01, -1.150e-01, 1.580e-02, 2.280e-01, 1.955e-02, -5.125e-01, 1.029e+00, -1.082e-01, -4.534e-01, 3.793e-01, -1.121e-01, 1.789e-01, 5.391e-02)); + r += mul(s3_5, M4(1.708e-01, -2.072e-01, 1.042e-01, 9.238e-02, -1.095e-01, -1.285e-02, -5.910e-03, 3.810e-02, -2.203e+00, 5.042e-01, 1.321e+00, -9.544e-02, 1.855e-01, -2.122e-01, -3.456e-02, 8.827e-02)); + r += mul(s3_6, M4(-8.513e-01, 1.258e-01, -8.035e-01, 5.733e-01, 5.071e-02, 1.761e-01, 6.746e-02, 5.481e-01, 1.247e-02, -3.302e-01, 5.462e-01, 5.615e-02, -3.885e-03, 1.080e-01, -1.204e-01, -7.939e-02)); + r += mul(s3_7, M4(-5.870e-01, -4.577e-01, -4.069e-01, 1.490e-01, 2.019e-02, -2.588e-01, 1.686e-01, -1.399e-02, -1.564e-01, 8.066e-01, 1.391e+00, 7.143e-01, -2.593e-02, -2.001e-01, -9.285e-02, -1.235e-01)); + r += mul(s3_8, M4(7.291e-02, 2.909e-01, -9.548e-02, -2.685e-01, -9.634e-04, -3.878e-04, 1.458e-01, 1.708e-01, -2.020e+00, -1.086e+00, -2.588e-01, -4.462e-01, -1.030e-01, 2.201e-01, 2.603e-02, 1.435e-02)); + r += mul(s4_0, M4(-1.146e-01, 3.621e-02, -1.112e-01, -1.958e-01, -7.980e-02, 2.275e-01, -2.337e-02, -2.127e-02, -6.880e-02, 3.179e-02, 2.920e-02, -7.738e-02, -1.011e-01, -2.483e-01, 4.229e-02, -2.371e-03)); + r += mul(s4_1, M4(1.071e-01, -1.620e-01, -1.109e-01, -2.518e-01, -2.020e-01, 1.469e-01, 1.715e-02, -2.811e-01, -6.490e-02, 8.599e-02, -7.551e-03, -8.389e-02, 2.535e-01, -5.084e-01, -2.596e-01, -2.502e-01)); + r += mul(s4_2, M4(1.108e-01, -1.844e-01, 1.731e-01, -2.074e-01, -1.584e-01, -1.588e-01, 4.427e-02, 4.406e-02, -2.766e-01, 6.326e-02, 1.104e-01, -3.570e-03, 7.484e-02, -2.749e-01, -3.075e-02, 1.215e-02)); + r += mul(s4_3, M4(-1.798e-02, 5.229e-02, -1.702e-02, 5.724e-02, -9.031e-02, 2.412e-01, -4.968e-02, 2.454e-02, -2.855e-02, -2.255e-01, -2.992e-02, 3.039e-01, -2.892e-03, -4.545e-04, -6.446e-03, -5.023e-01)); + r += mul(s4_4, M4(6.744e-02, -3.922e-01, -1.469e-01, 3.184e-02, -1.335e-02, -4.252e-01, -1.877e-01, -4.014e-02, 9.306e-02, 1.625e-03, -1.837e-02, -1.705e-01, -1.491e-01, -1.039e-01, -3.117e-01, -4.440e-01)); + r += mul(s4_5, M4(9.537e-02, -9.820e-02, -1.369e-01, -1.174e-01, 5.395e-02, -3.037e-01, -7.887e-02, -7.200e-02, -2.337e-03, 1.488e-01, 6.189e-02, 9.512e-02, -1.188e-01, 2.278e-01, -1.107e-01, -3.545e-01)); + r += mul(s4_6, M4(-8.930e-02, 4.310e-02, 6.118e-02, -1.643e-02, -8.316e-02, 5.446e-02, 3.115e-02, 1.496e-01, -1.781e-01, -7.885e-02, 4.587e-02, -1.316e-01, 2.634e-02, 2.030e-02, -1.087e-01, -4.127e-02)); + r += mul(s4_7, M4(9.015e-02, 2.788e-01, 6.667e-02, 1.200e-01, 1.077e-01, -1.438e-01, 4.557e-02, -4.757e-02, 2.845e-01, 4.945e-02, -7.365e-02, 1.125e-01, -5.808e-02, 2.942e-01, -6.692e-02, 1.674e-01)); + r += mul(s4_8, M4(1.499e-01, 6.180e-01, 8.160e-02, 9.050e-02, 1.187e-02, 2.776e-01, 2.204e-01, -5.643e-02, -1.233e-01, 2.070e-02, -3.342e-02, -2.091e-01, 4.348e-03, 5.386e-01, 1.921e-01, 2.900e-01)); + r += mul(s5_0, M4(-2.882e-02, 1.452e-01, -1.705e-02, 2.552e-01, -1.229e-01, -4.764e-01, -2.250e-01, -1.043e-01, 2.435e-02, 6.002e-02, -4.948e-02, -3.049e-01, 8.513e-02, -8.677e-02, -2.205e-02, 3.659e-02)); + r += mul(s5_1, M4(2.908e-02, 2.192e-01, 3.956e-02, 5.391e-02, 1.977e-01, 3.076e-02, -1.594e-01, 1.171e-01, 1.115e-01, -3.301e-01, -2.461e-03, -7.164e-02, -5.271e-02, 5.961e-01, -7.649e-03, 1.362e-01)); + r += mul(s5_2, M4(-7.324e-02, -1.870e-01, 1.408e-01, -1.262e-02, 3.113e-01, 1.345e-01, 1.300e-01, -1.704e-01, -4.467e-02, -2.020e-02, -1.222e-01, 2.106e-01, 6.127e-02, 1.340e-01, 1.212e-01, 6.721e-02)); + r += mul(s5_3, M4(-4.438e-02, 1.089e-01, -1.936e-02, 3.638e-01, -5.397e-03, -1.789e-01, -7.034e-02, 2.115e-01, 2.980e-01, 1.433e-01, -3.109e-01, 5.999e-02, 2.967e-02, 1.040e-01, 8.379e-02, 2.824e-01)); + r += mul(s5_4, M4(-1.738e-01, -2.783e-02, 1.302e-01, 1.638e-01, 2.230e-01, 3.076e-01, 9.329e-02, 7.890e-02, 3.738e-01, -4.847e-02, 1.343e-01, -4.918e-01, -1.716e-01, 2.760e-01, -2.303e-01, 6.735e-01)); + r += mul(s5_5, M4(2.113e-02, -6.445e-01, -1.235e-01, -5.110e-02, 1.540e-01, -7.572e-02, 1.704e-01, 1.401e-01, 7.002e-01, 7.781e-01, 4.996e-03, 5.194e-01, 1.068e-01, -6.048e-01, 6.111e-02, 2.408e-02)); + r += mul(s5_6, M4(-1.330e-01, 8.110e-02, -9.976e-02, -6.708e-02, -5.166e-02, 2.543e-01, -7.829e-03, 8.376e-02, 7.038e-02, -2.733e-01, -1.933e-01, 3.888e-01, 3.742e-02, -3.099e-02, -3.839e-02, -2.899e-02)); + r += mul(s5_7, M4(4.468e-03, 8.327e-02, -1.232e-02, -2.009e-01, -6.394e-03, -7.117e-02, 8.830e-02, -2.180e-01, -6.541e-01, -1.278e+00, -2.546e-01, -8.400e-01, 3.278e-02, -4.873e-01, 2.989e-02, -2.834e-01)); + r += mul(s5_8, M4(5.587e-02, 1.196e-01, 6.747e-02, -1.516e-01, 1.226e-01, -3.731e-02, 1.674e-01, -4.205e-03, -3.633e-02, 6.329e-01, 2.203e-01, 3.548e-01, -1.867e-02, -6.607e-02, 1.559e-01, -1.029e-01)); + r += mul(s6_0, M4(5.776e-02, -4.237e-02, -3.157e-02, -3.967e-02, 5.468e-02, 1.806e-01, 8.435e-02, -4.378e-02, -8.778e-02, -5.004e-02, 1.611e-01, -1.044e-01, -9.836e-02, 1.137e-01, 4.520e-01, 4.521e-01)); + r += mul(s6_1, M4(1.205e-01, -2.476e-01, -2.943e-02, 2.900e-01, 1.660e-01, 1.942e-01, -3.279e-02, -1.001e-01, 1.873e-01, -5.917e-01, -9.511e-02, 8.591e-02, 2.772e-01, -4.016e-01, -4.060e-02, 4.557e-01)); + r += mul(s6_2, M4(7.314e-02, 2.591e-01, -5.486e-03, 1.097e-01, 8.268e-03, 2.578e-02, 8.015e-02, -3.003e-02, -1.831e-02, 2.921e-01, 3.462e-02, 4.376e-02, 3.626e-02, -2.226e-01, -2.944e-02, 5.713e-02)); + r += mul(s6_3, M4(6.148e-02, -1.827e-01, -3.366e-02, -6.681e-02, 4.735e-02, 1.116e-01, -3.804e-02, 1.689e-02, 1.110e-01, -4.969e-01, -1.288e-01, 1.565e-02, 4.991e-02, -7.043e-02, -1.163e-02, 2.936e-01)); + r += mul(s6_4, M4(-3.330e-01, 7.520e-01, -2.222e-01, 2.143e-01, 8.841e-02, 1.125e-01, -3.049e-02, 1.071e-01, 1.219e-01, 5.266e-01, -1.147e-01, 1.817e-01, -2.865e-01, 4.773e-01, -2.476e-01, -1.749e-01)); + r += mul(s6_5, M4(-5.390e-02, 1.513e-02, 1.866e-02, -3.879e-02, -1.743e-02, -3.595e-01, 8.142e-02, 1.721e-01, -1.288e-01, 2.899e-02, 1.466e-01, -1.986e-02, 3.972e-01, 3.514e-02, 3.046e-01, 1.597e-01)); + r += mul(s6_6, M4(1.587e-01, -2.005e-02, 1.181e-01, 6.660e-01, 1.190e-01, -2.422e-02, -8.934e-02, -9.839e-02, -1.263e-01, 1.481e-01, -1.014e-01, -4.174e-02, 4.562e-02, 1.991e-01, 4.164e-02, 1.085e-01)); + r += mul(s6_7, M4(2.260e-01, 2.539e-01, -4.326e-01, 3.654e-01, 1.095e-02, 1.816e-02, -3.917e-02, -1.604e-01, 6.233e-02, 3.373e-01, -2.174e-02, 1.601e-01, 1.970e-02, -2.403e-01, 8.825e-02, -5.946e-02)); + r += mul(s6_8, M4(-1.247e-02, -7.207e-01, 3.782e-01, 9.277e-03, -1.601e-01, -1.992e-01, 4.560e-02, 9.246e-02, -5.056e-02, -6.212e-02, 9.100e-02, 2.018e-02, 1.977e-01, -1.284e-01, 3.475e-02, -4.638e-02)); + r += mul(s7_0, M4(-2.275e-04, 2.430e-02, 2.681e-02, 7.920e-02, -1.901e-03, 8.596e-02, -1.221e-02, -1.267e-01, 1.530e-01, 3.664e-01, 1.657e-01, 7.102e-02, 1.934e-01, -2.876e-02, -6.522e-02, -5.363e-03)); + r += mul(s7_1, M4(-7.802e-02, -1.425e-01, -4.622e-02, 1.281e-01, 1.430e-01, -2.953e-01, 2.089e-01, -4.956e-02, -1.123e-01, 1.033e-01, 5.525e-02, -1.141e-01, 6.440e-02, 1.535e-01, -6.271e-02, -5.787e-03)); + r += mul(s7_2, M4(2.966e-01, -2.963e-02, -2.085e-02, 1.250e-02, 1.683e-01, -5.223e-01, 5.280e-02, -1.393e-02, -1.692e-01, 4.296e-02, 8.091e-02, -2.750e-02, -1.376e-01, -1.713e-01, 1.871e-02, -1.477e-01)); + r += mul(s7_3, M4(-1.420e-01, 6.008e-02, 5.251e-02, -1.548e-01, 1.021e-01, -2.581e-01, -1.735e-02, -9.447e-02, 9.829e-02, 3.613e-01, -7.935e-02, -2.009e-01, -2.036e-01, -1.951e-02, -7.652e-03, 1.836e-01)); + r += mul(s7_4, M4(-4.783e-02, -2.506e-01, 6.729e-02, 3.130e-02, -3.645e-01, 1.714e-01, -2.112e-01, 1.140e-01, -1.834e-01, -5.257e-01, 1.401e-01, -2.666e-01, -1.429e-01, -3.021e-01, 4.720e-02, -1.594e-02)); + r += mul(s7_5, M4(9.489e-02, 4.199e-03, 5.857e-03, 2.213e-01, 2.342e-03, 2.767e-01, -1.112e-02, -5.612e-02, -2.421e-01, -1.204e-01, 1.011e-01, -2.542e-02, 7.188e-02, -8.339e-02, 1.519e-01, -2.918e-02)); + r += mul(s7_6, M4(-4.214e-02, -7.475e-03, 8.229e-04, -1.843e-01, -1.392e-01, -2.375e-01, -1.100e-01, -1.774e-01, 7.883e-02, 3.931e-02, -5.235e-02, -6.999e-02, 9.210e-02, 9.923e-02, -7.138e-02, 3.417e-02)); + r += mul(s7_7, M4(3.614e-02, -4.942e-03, -5.801e-02, -2.246e-01, -1.091e-01, 2.237e-01, 3.636e-01, -7.534e-02, -3.811e-02, -2.855e-01, 1.454e-01, -1.862e-01, 4.262e-02, 1.475e-01, 3.243e-02, 4.744e-02)); + r += mul(s7_8, M4(1.115e-01, 3.370e-01, -1.262e-01, -1.915e-01, -1.522e-01, -7.482e-02, 6.731e-02, -7.372e-03, -1.355e-01, -1.548e-01, -1.192e-02, 4.345e-03, 9.265e-02, 9.544e-02, -8.572e-02, -1.153e-01)); + r += V4(-1.247e-01, -6.734e-03, 1.475e-01, 4.504e-02); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(6.327e-02, -4.161e-03, -7.095e-02, 8.572e-02, 2.713e-01, -8.915e-02, 1.283e-01, 1.211e-01, 4.276e-02, 1.801e-01, 4.923e-02, 1.689e-01, 4.860e-03, 9.050e-02, 1.170e-01, 2.876e-02)); + r += mul(s0_1, M4(3.230e-01, 1.148e-02, -8.886e-02, -7.384e-02, -2.449e-01, 1.492e-01, -2.951e-01, 1.323e-01, -1.567e-01, -1.223e-01, -6.559e-02, 1.257e-01, -3.000e-02, -7.952e-02, -4.354e-02, 5.939e-02)); + r += mul(s0_2, M4(-2.210e-01, -2.521e-02, -1.389e-01, 8.327e-03, 1.671e-01, -8.144e-02, 2.434e-01, 6.808e-02, 7.082e-02, -1.674e-03, -9.994e-02, 1.391e-02, 4.024e-02, 4.371e-01, 1.293e-02, 1.008e-02)); + r += mul(s0_3, M4(7.365e-02, 1.708e-01, -5.529e-02, 1.132e-01, -2.704e-01, 1.576e-01, 6.757e-01, 2.633e-01, -2.244e-01, -1.270e-01, -1.897e-01, -3.632e-01, 3.384e-01, 1.121e-01, -4.074e-02, -2.217e-02)); + r += mul(s0_4, M4(1.276e-01, 3.451e-01, 6.513e-01, -1.107e-01, -3.603e-01, 1.250e-01, 1.405e-01, 1.633e-01, -2.302e-01, 3.803e-01, 8.127e-02, 4.307e-02, 1.260e-01, -8.987e-02, 2.313e-01, 1.861e-02)); + r += mul(s0_5, M4(6.664e-02, 9.814e-02, -2.025e-01, -5.130e-02, 1.402e-01, 2.758e-02, -1.749e-01, 4.557e-02, -6.261e-02, 4.286e-02, 3.171e-02, 3.924e-02, 6.739e-03, 4.048e-01, 3.059e-01, -9.780e-02)); + r += mul(s0_6, M4(-2.523e-01, 5.620e-02, 1.544e-01, 7.048e-02, 6.348e-02, 1.012e-01, -1.061e-01, -4.494e-02, 2.313e-02, -1.618e-01, -2.614e-01, 2.950e-02, -5.357e-03, -1.370e-01, 4.001e-02, -1.482e-02)); + r += mul(s0_7, M4(5.214e-02, 3.052e-01, 4.194e-01, -8.665e-02, -9.621e-03, 1.808e-02, 1.415e-01, 3.203e-02, -1.528e-01, 1.688e-01, -1.156e-01, 1.174e-01, 1.449e-01, -2.612e-01, -3.573e-01, -6.727e-03)); + r += mul(s0_8, M4(1.988e-01, 1.280e-01, -1.995e-02, 4.861e-02, -2.275e-02, -2.332e-01, -3.972e-01, 7.563e-03, -1.249e-01, 5.808e-02, -2.661e-01, 2.108e-02, -1.025e-01, -2.647e-01, 9.820e-02, 1.589e-01)); + r += mul(s1_0, M4(-8.562e-02, 7.639e-02, 2.790e-02, 4.735e-02, 1.694e-01, 6.082e-02, -1.144e-01, 2.403e-01, -6.880e-02, 1.604e-02, -1.194e-01, 2.735e-02, 9.442e-02, 1.005e-01, 1.251e-01, -9.961e-02)); + r += mul(s1_1, M4(-5.455e-02, -1.737e-01, -9.332e-02, -6.026e-02, -2.545e-01, 7.186e-02, -1.942e-01, -6.522e-02, 1.620e-01, -1.688e-01, 2.674e-02, 1.777e-02, -3.592e-02, -6.977e-03, 1.704e-01, 4.673e-02)); + r += mul(s1_2, M4(7.543e-02, 1.294e-01, 1.760e-01, 6.920e-02, 1.324e-01, -3.765e-02, 6.680e-02, 1.844e-01, 3.093e-02, -2.529e-01, -4.490e-02, -7.585e-04, 2.114e-03, -8.864e-03, -5.308e-03, 9.409e-02)); + r += mul(s1_3, M4(-2.335e-01, 7.110e-02, -3.204e-02, 2.092e-01, -8.619e-02, 1.550e-01, 2.951e-01, -7.618e-02, -8.411e-02, 2.200e-01, -8.623e-02, -2.308e-02, 1.055e-01, -8.905e-02, -6.922e-02, -3.127e-02)); + r += mul(s1_4, M4(1.158e-01, 1.023e-02, 9.718e-02, -1.151e-01, 6.683e-03, -9.999e-02, -7.944e-02, -3.799e-01, -3.740e-02, 2.614e-01, 2.009e-01, 3.862e-02, -2.460e-01, 4.585e-02, -2.018e-01, 5.815e-02)); + r += mul(s1_5, M4(5.490e-02, -1.056e-01, -6.731e-02, 5.668e-02, 1.145e-01, 4.128e-02, -3.419e-02, 4.321e-02, 2.014e-01, -8.320e-03, -8.264e-02, -1.081e-02, 2.209e-01, 7.960e-02, -1.285e-02, -1.142e-01)); + r += mul(s1_6, M4(3.860e-01, 3.242e-01, -4.442e-02, 9.476e-03, -5.421e-02, 7.842e-02, 7.767e-02, 3.662e-02, -5.802e-02, -5.228e-02, -1.865e-02, -1.248e-02, -5.554e-03, -6.500e-03, -1.300e-01, 5.588e-02)); + r += mul(s1_7, M4(-1.747e-01, -1.868e-02, -2.577e-02, 1.003e-01, -5.426e-02, 6.938e-02, 2.382e-01, 5.263e-02, -7.487e-02, -6.574e-02, 9.440e-02, -1.578e-02, -2.529e-01, -6.164e-02, -1.741e-01, -3.964e-02)); + r += mul(s1_8, M4(8.963e-02, -3.654e-03, -9.010e-02, 5.938e-02, 2.432e-02, -7.417e-02, -1.305e-01, 1.299e-01, 9.012e-02, -1.098e-01, -7.742e-02, 1.153e-02, -3.196e-01, 8.448e-03, 3.800e-02, -3.158e-04)); + r += mul(s2_0, M4(1.585e-02, -4.317e-02, -7.029e-02, -1.408e-02, -1.021e-01, -1.469e-01, -6.816e-02, 1.462e-01, 2.180e-01, -1.636e-01, -1.523e-01, -1.271e-01, -3.630e-02, 5.139e-03, 5.684e-02, 1.091e-01)); + r += mul(s2_1, M4(1.188e-01, 4.941e-02, 1.238e-01, -2.339e-02, 3.207e-01, 1.724e-01, -5.752e-02, 2.132e-03, -1.144e-02, 6.469e-02, -7.387e-02, -2.626e-02, 1.331e-01, 2.901e-01, 1.256e-01, -1.309e-01)); + r += mul(s2_2, M4(7.347e-02, -6.850e-02, 5.946e-02, -3.748e-02, -9.691e-02, 4.133e-01, -1.619e-01, 2.053e-02, 1.234e-01, -7.837e-02, 3.781e-01, 5.012e-02, 1.324e-01, 1.592e-01, 4.595e-01, 8.679e-02)); + r += mul(s2_3, M4(-6.365e-02, -1.803e-02, 1.918e-03, -3.813e-02, -2.683e-01, -5.608e-01, -1.542e-02, 1.429e-02, 1.080e-01, -2.332e-01, 1.838e-01, -1.336e-01, -1.502e-01, 5.805e-02, 2.329e-01, 1.809e-02)); + r += mul(s2_4, M4(1.323e-01, -4.564e-02, 2.595e-04, 2.409e-02, 3.545e-01, 1.530e-01, -6.533e-02, -4.940e-02, 1.852e-01, -7.116e-01, -3.391e-01, 1.780e-01, 3.437e-01, 8.667e-02, 2.948e-01, -1.259e-01)); + r += mul(s2_5, M4(8.857e-02, -5.210e-02, -7.111e-02, -1.854e-01, 2.610e-01, 2.809e-01, 1.234e-01, -4.128e-02, 2.214e-01, -4.631e-01, 1.295e-01, -3.318e-01, -1.156e-01, 1.898e-01, -4.517e-01, 8.782e-02)); + r += mul(s2_6, M4(8.610e-02, 2.528e-02, -2.750e-02, -2.833e-02, -2.638e-01, -5.164e-01, -8.466e-02, 9.584e-02, 2.705e-01, 4.046e-02, 4.747e-03, 1.428e-01, -1.552e-01, -1.916e-01, -1.442e-01, -6.089e-02)); + r += mul(s2_7, M4(-2.071e-01, -3.368e-02, -1.481e-01, -1.713e-02, 1.977e-01, 2.827e-01, 3.648e-01, 1.130e-01, 6.445e-01, -3.686e-01, -3.002e-01, -1.252e-01, 1.120e-01, -4.683e-02, 2.805e-01, -7.061e-02)); + r += mul(s2_8, M4(3.741e-02, -9.474e-03, 7.240e-02, 1.201e-02, 9.436e-02, 1.156e-01, -8.933e-02, 1.423e-01, 2.941e-01, -1.259e-01, 2.881e-02, -8.421e-02, 7.420e-02, 3.754e-02, -2.140e-01, -3.572e-02)); + r += mul(s3_0, M4(-1.187e-01, 6.031e-02, -3.228e-02, -3.443e-02, 6.831e-02, 1.834e-01, 6.940e-02, 6.946e-02, -1.477e-01, -4.231e-02, -1.754e-01, -4.200e-02, -8.707e-02, 2.923e-03, -6.957e-02, 7.741e-02)); + r += mul(s3_1, M4(3.398e-02, 9.156e-02, 4.898e-02, -4.594e-03, 1.062e-01, 1.513e-01, 8.946e-02, -7.440e-02, -3.083e-02, -4.001e-02, -1.423e-01, 6.950e-02, 7.627e-02, -1.463e-01, 1.154e-02, -4.211e-02)); + r += mul(s3_2, M4(-1.576e-01, 3.827e-01, 2.089e-02, 1.329e-01, 5.788e-02, 6.723e-02, -8.173e-03, 1.822e-02, 5.617e-02, 3.804e-02, 7.445e-03, 9.113e-02, -2.647e-01, -1.568e-01, -1.883e-02, 9.578e-02)); + r += mul(s3_3, M4(3.644e-02, 2.846e-01, 1.637e-01, 2.092e-02, -2.495e-01, -8.316e-02, 6.982e-02, -5.121e-02, -1.906e-01, 5.269e-02, -3.513e-02, -8.395e-02, -1.062e-01, -3.967e-02, 4.036e-02, 4.684e-02)); + r += mul(s3_4, M4(4.960e-01, 3.997e-01, 4.401e-02, -1.968e-04, 1.716e-01, 2.557e-01, 2.948e-01, -9.129e-02, -2.818e-02, -1.504e-01, 6.695e-02, 1.177e-01, -2.574e-02, -1.026e-01, 1.391e-01, 2.191e-03)); + r += mul(s3_5, M4(2.392e-02, 5.665e-01, 2.468e-02, 1.034e-03, 8.954e-02, 1.568e-01, -2.749e-02, -1.099e-01, 7.728e-02, 2.235e-01, 1.992e-01, 9.381e-03, -5.173e-02, -2.759e-02, -2.518e-01, 1.953e-02)); + r += mul(s3_6, M4(-2.692e-01, -1.146e-01, 1.086e-01, -3.173e-02, 2.293e-03, 8.060e-02, -3.199e-01, 1.096e-01, -1.890e-01, 1.366e-01, 4.567e-02, 7.226e-02, 3.653e-02, 2.276e-02, 4.357e-02, -1.038e-02)); + r += mul(s3_7, M4(1.461e-01, 3.255e-01, 1.605e-01, -3.955e-02, 3.326e-02, -1.174e-01, 3.523e-01, 9.180e-02, -6.552e-02, 1.428e-01, 1.248e-01, -2.213e-02, -1.386e-01, -5.679e-02, -1.679e-01, -6.844e-02)); + r += mul(s3_8, M4(2.182e-01, 9.706e-02, 8.389e-02, 3.322e-02, 2.416e-02, 6.132e-02, 6.103e-02, -1.857e-03, 1.241e-01, 2.562e-01, 1.088e-01, -1.497e-02, 1.795e-02, -2.255e-02, -1.364e-01, 2.330e-02)); + r += mul(s4_0, M4(-1.897e-01, 3.918e-03, 2.127e-02, -3.432e-02, -6.214e-02, -4.871e-02, 2.735e-01, -5.989e-02, 2.061e-01, -2.198e-02, 1.275e-01, -1.477e-04, 9.195e-02, -9.152e-02, -1.392e-01, -6.532e-02)); + r += mul(s4_1, M4(3.915e-01, 4.246e-02, 2.639e-01, -7.523e-02, 1.005e-01, -1.055e-01, 3.850e-01, -2.673e-01, 3.600e-03, 7.291e-02, -3.459e-02, 8.809e-02, -1.028e-01, -5.200e-02, -2.047e-01, -1.948e-02)); + r += mul(s4_2, M4(-1.083e-01, -7.279e-03, -2.331e-01, 2.004e-02, -2.114e-01, -2.466e-02, -8.172e-02, -7.143e-02, 4.624e-02, -4.669e-02, -2.231e-02, -9.357e-02, -5.481e-02, -1.953e-02, 4.750e-02, -1.478e-02)); + r += mul(s4_3, M4(-6.179e-02, 6.193e-03, -1.727e-01, -7.010e-02, -3.048e-01, 4.445e-02, -1.026e-01, 9.666e-02, 4.053e-02, -6.707e-02, 2.172e-01, 5.937e-02, 3.824e-02, 1.248e-01, -1.294e-01, -6.955e-02)); + r += mul(s4_4, M4(-2.312e-02, -1.556e-01, 1.048e-01, -1.411e-02, 1.620e-01, 3.157e-01, 2.627e-01, 1.063e-01, -1.202e-01, 2.224e-01, 4.503e-01, 5.285e-02, 1.987e-01, 3.739e-01, 2.998e-01, -2.872e-02)); + r += mul(s4_5, M4(2.167e-01, 2.060e-02, 1.937e-01, -2.236e-02, -8.763e-02, 9.011e-03, 2.259e-02, -8.353e-03, -2.376e-03, -1.644e-01, -1.076e-01, 7.411e-02, -9.956e-02, -3.235e-02, -9.366e-02, 6.449e-02)); + r += mul(s4_6, M4(4.405e-02, 4.167e-02, 1.097e-01, 7.252e-02, -2.906e-03, 1.321e-02, -1.755e-01, -5.058e-02, -6.062e-02, 1.258e-01, 3.338e-02, 7.420e-03, 1.128e-01, 9.513e-02, 2.138e-01, 5.929e-02)); + r += mul(s4_7, M4(-7.551e-02, -2.955e-02, -5.810e-02, 6.246e-02, -3.637e-02, 5.806e-02, 9.447e-02, -1.058e-02, -4.137e-02, -3.802e-02, 1.992e-01, 6.407e-02, 7.304e-02, -8.740e-03, -9.634e-02, 3.922e-02)); + r += mul(s4_8, M4(1.127e-01, -2.911e-02, 9.664e-02, 5.907e-02, 3.276e-02, 2.230e-02, -6.320e-02, -5.340e-02, -1.120e-01, 8.211e-02, -1.139e-02, -1.026e-01, -4.230e-01, 9.882e-02, -1.533e-01, -7.614e-03)); + r += mul(s5_0, M4(3.628e-02, -1.154e-01, -9.059e-02, 8.894e-02, 1.600e-01, -1.455e-01, 2.762e-01, 8.866e-02, 8.908e-02, -1.244e-01, 6.007e-03, -6.604e-03, -1.643e-02, -1.979e-01, -3.706e-02, -4.506e-02)); + r += mul(s5_1, M4(-3.384e-02, 6.505e-02, -1.242e-01, 6.364e-02, 6.078e-02, 2.186e-01, -2.541e-02, -1.192e-01, -2.079e-01, -1.322e-02, 7.614e-02, 8.697e-02, -1.462e-01, -1.954e-01, -2.591e-02, 6.293e-03)); + r += mul(s5_2, M4(8.138e-02, -2.241e-03, 1.985e-01, 9.739e-02, -1.941e-01, -1.051e-01, -1.806e-01, -9.988e-03, 3.914e-02, 1.884e-01, 5.192e-03, 9.964e-02, -4.189e-02, 7.034e-02, -3.800e-02, -3.726e-02)); + r += mul(s5_3, M4(-1.779e-02, -6.324e-02, -2.182e-01, 1.308e-02, -9.059e-03, -1.412e-01, 7.894e-02, 9.893e-02, 1.928e-01, -2.165e-01, 1.406e-01, 6.054e-02, -1.174e-01, 2.001e-01, 4.642e-02, -6.890e-02)); + r += mul(s5_4, M4(3.239e-01, -1.116e-01, -1.082e-01, -1.656e-01, 3.001e-01, 2.518e-01, 2.099e-01, 2.291e-01, -2.275e-01, 1.730e-01, 1.631e-01, 2.516e-01, -1.181e-01, -3.579e-01, -2.563e-01, -1.597e-01)); + r += mul(s5_5, M4(3.881e-02, 4.573e-03, 9.434e-02, 7.049e-02, 2.350e-02, -1.397e-01, -3.036e-01, 5.881e-03, 1.452e-02, -3.329e-01, -3.474e-01, 1.116e-01, -1.395e-01, -3.177e-01, 1.819e-02, 8.131e-02)); + r += mul(s5_6, M4(-9.471e-02, -2.972e-03, 7.773e-02, 9.366e-02, 4.038e-02, -1.017e-01, -1.674e-01, 1.358e-02, -1.484e-01, -6.491e-02, 1.528e-01, 2.826e-02, -4.188e-02, 5.348e-03, -1.330e-01, 6.659e-02)); + r += mul(s5_7, M4(-6.946e-02, 1.261e-02, 5.539e-03, 1.232e-01, -3.141e-02, -4.053e-02, -7.442e-02, 4.680e-02, -2.723e-03, 7.596e-02, 1.265e-01, 1.786e-01, -3.848e-01, 4.192e-02, 1.359e-01, 6.009e-01)); + r += mul(s5_8, M4(8.002e-02, -8.406e-02, -1.795e-01, 5.072e-02, -3.172e-02, -9.015e-03, -9.026e-03, -2.030e-02, 1.183e-01, 1.129e-01, 7.920e-04, 6.145e-02, 6.649e-02, 1.501e-01, 1.855e-01, 1.570e-01)); + r += mul(s6_0, M4(-9.948e-02, -3.819e-01, -3.683e-01, 1.272e-01, -1.245e-01, 5.014e-02, 2.566e-03, 1.791e-02, -2.544e-02, -1.346e-02, 9.671e-02, -1.296e-01, -1.373e-01, -1.413e-01, -1.511e-01, 5.454e-02)); + r += mul(s6_1, M4(1.689e-01, 1.160e-01, -3.101e-01, 2.190e-01, 1.594e-02, 1.233e-01, 7.174e-02, 1.163e-01, -3.987e-02, -6.442e-02, 2.028e-02, 2.604e-01, 2.538e-01, -6.826e-03, -8.698e-02, 2.701e-03)); + r += mul(s6_2, M4(-3.038e-01, -3.873e-02, -3.611e-01, 2.083e-01, 8.210e-03, 2.875e-01, 2.235e-01, -9.963e-03, 2.046e-01, 8.798e-02, 6.654e-02, -8.716e-02, 1.516e-03, 1.959e-01, -1.237e-01, -6.952e-02)); + r += mul(s6_3, M4(-1.227e-01, -3.209e-01, 4.159e-01, -1.595e-01, 1.925e-01, -6.211e-02, -1.687e-01, -2.715e-02, -2.856e-02, 1.477e-01, 1.866e-01, -1.047e-01, -3.563e-01, -2.639e-02, -1.135e-01, -2.952e-02)); + r += mul(s6_4, M4(-6.050e-02, 2.194e-03, 5.951e-01, -6.900e-01, 2.066e-01, -3.367e-02, -1.890e-01, 3.375e-02, 9.062e-02, -4.336e-03, -2.486e-01, 4.065e-02, -2.189e-01, 3.577e-01, 4.976e-01, -4.142e-02)); + r += mul(s6_5, M4(-8.950e-02, -9.476e-02, -5.042e-01, 1.305e-01, -2.734e-01, -6.901e-02, -8.919e-02, 1.665e-02, -1.636e-01, -1.991e-01, -8.216e-02, -4.933e-02, 3.207e-01, 2.510e-01, 4.275e-01, 1.801e-02)); + r += mul(s6_6, M4(6.011e-02, -3.569e-02, -1.104e-01, 1.611e-01, 6.707e-02, 7.508e-02, 1.801e-01, 4.667e-02, -3.339e-02, 4.987e-02, 3.933e-02, -1.535e-01, -1.190e-01, -9.312e-02, -1.689e-01, -7.907e-02)); + r += mul(s6_7, M4(5.741e-02, -3.315e-01, -1.535e-02, 2.660e-02, 1.870e-01, 1.909e-01, -1.146e-01, 1.900e-02, -2.739e-02, 9.114e-02, 1.117e-01, 2.496e-01, 2.605e-02, -1.141e-01, 1.132e-01, 8.098e-02)); + r += mul(s6_8, M4(1.431e-01, -5.832e-02, -2.409e-01, 8.434e-02, -4.826e-02, 1.786e-03, 2.087e-02, 3.139e-02, 1.958e-02, 1.201e-02, -4.888e-02, -4.389e-03, -6.186e-02, 5.031e-02, -2.338e-01, 7.370e-03)); + r += mul(s7_0, M4(-2.819e-02, 4.830e-02, -1.123e-01, -2.288e-02, -2.261e-01, -3.548e-02, 6.272e-03, -1.159e-01, -1.028e-01, 2.509e-01, 3.823e-01, -3.339e-01, -1.939e-03, 6.241e-02, 1.383e-01, 5.818e-02)); + r += mul(s7_1, M4(3.128e-01, 1.791e-01, 1.683e-01, 3.046e-02, 2.142e-01, -1.121e-01, 1.690e-01, 4.042e-02, 2.893e-01, -2.183e-01, -1.629e-02, -3.933e-01, 2.875e-01, -6.478e-02, 4.908e-02, -3.129e-02)); + r += mul(s7_2, M4(-4.698e-02, 1.046e-01, -5.359e-02, 1.036e-01, 1.937e-01, 8.666e-02, 9.641e-02, -9.856e-02, 1.051e-01, -8.032e-02, 1.118e-01, -3.495e-01, 1.361e-01, -7.262e-02, -1.698e-02, 1.504e-02)); + r += mul(s7_3, M4(1.390e-01, -1.189e-01, 4.071e-02, -6.657e-02, 7.152e-02, 4.180e-02, -2.413e-01, -5.912e-02, -1.159e-01, 7.575e-02, 2.380e-02, -3.793e-01, -2.213e-02, -7.473e-02, 1.470e-01, -4.935e-03)); + r += mul(s7_4, M4(-1.537e-01, -4.603e-02, 1.730e-01, -8.155e-02, -1.529e-01, -2.781e-01, -2.626e-01, -3.020e-02, -9.334e-02, -3.587e-02, -3.223e-01, -3.744e-01, -1.472e-01, -2.700e-01, 1.532e-01, -4.350e-02)); + r += mul(s7_5, M4(1.005e-01, -1.657e-01, -1.212e-01, -6.836e-03, 3.816e-02, 1.726e-02, 5.315e-02, -6.332e-02, -1.666e-01, -8.853e-02, 2.522e-01, -3.005e-01, 1.110e-01, 4.975e-02, 1.238e-01, 1.633e-02)); + r += mul(s7_6, M4(2.830e-03, -3.752e-02, 1.024e-01, 1.412e-02, 2.369e-02, -8.427e-02, -1.137e-01, -5.829e-02, 7.208e-02, 2.210e-01, 1.495e-01, -2.120e-01, 1.816e-01, -9.071e-02, 8.602e-02, -7.145e-02)); + r += mul(s7_7, M4(-7.057e-02, -2.065e-02, -9.396e-02, 2.313e-02, 2.014e-02, 2.292e-02, -1.276e-01, -7.497e-02, 2.522e-01, 2.429e-01, -2.176e-01, -3.563e-01, 2.477e-01, -1.382e-01, -1.759e-01, 1.100e-01)); + r += mul(s7_8, M4(-6.531e-02, 7.056e-02, -3.217e-02, 6.280e-02, -1.141e-01, 5.571e-02, -2.464e-02, -6.609e-02, -2.009e-01, 1.591e-01, -4.558e-02, -2.204e-01, -1.367e-01, -1.187e-01, -2.091e-01, 8.745e-02)); + r += V4(2.615e-03, -3.608e-04, -7.717e-03, -8.081e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.536e-01, 2.193e-02, -9.375e-02, -4.897e-02, 1.054e-01, -3.643e-02, 1.441e-01, 1.123e-01, -7.945e-02, 4.298e-02, 7.503e-02, 1.980e-01, 1.347e-01, -3.603e-01, -7.707e-02, -1.264e-01)); + r += mul(s0_1, M4(-3.196e-02, -3.222e-02, 4.719e-02, 2.527e-03, 1.524e-01, 3.143e-02, -1.850e-02, 4.592e-02, -1.449e-01, -6.270e-02, -1.521e-01, -2.693e-02, 3.581e-02, -8.189e-02, 1.889e-01, -1.276e-01)); + r += mul(s0_2, M4(-3.966e-02, 6.965e-02, -1.673e-01, -1.664e-03, 1.432e-01, -4.040e-03, 2.888e-01, 1.322e-01, -1.117e-01, 9.460e-02, 3.472e-02, -2.666e-01, 3.271e-02, 4.691e-02, 1.412e-01, 3.633e-01)); + r += mul(s0_3, M4(3.393e-01, -8.829e-02, 1.219e-01, -2.154e-02, 5.591e-02, -2.134e-02, -2.328e-01, 7.491e-02, 3.801e-02, -2.387e-01, 4.726e-02, -8.321e-02, 1.396e-02, 1.148e-01, 6.694e-02, -4.088e-02)); + r += mul(s0_4, M4(1.191e-01, 1.926e-01, 1.550e-01, 1.086e-01, -2.196e-01, 7.820e-02, -1.490e-01, -2.558e-01, -5.860e-02, -8.136e-02, -6.774e-03, -5.233e-02, -1.020e-01, 1.495e-01, -2.110e-01, -1.891e-01)); + r += mul(s0_5, M4(1.661e-03, 8.923e-03, 3.049e-02, -1.079e-01, 1.894e-02, -1.517e-01, -3.742e-02, 1.854e-02, -6.763e-02, -1.833e-01, 5.647e-02, -7.094e-02, -2.422e-03, 2.143e-01, 5.749e-01, -2.184e-01)); + r += mul(s0_6, M4(-6.715e-03, 1.537e-01, 4.878e-02, 3.618e-03, -1.182e-01, -4.082e-02, 7.658e-02, 5.044e-02, -2.996e-02, -3.596e-02, 1.077e-01, -1.437e-01, -1.350e-02, -1.825e-01, -1.276e-01, -1.177e-01)); + r += mul(s0_7, M4(-2.086e-01, -8.139e-02, 1.454e-01, 5.074e-02, 6.160e-03, 3.910e-01, 2.105e-01, 5.321e-02, -1.713e-01, 5.953e-02, 1.654e-01, -1.192e-01, 1.336e-01, 2.573e-01, -2.016e-01, 3.498e-02)); + r += mul(s0_8, M4(6.900e-02, -1.446e-01, -1.762e-01, 2.112e-01, 1.580e-01, 2.114e-01, 9.015e-03, 1.532e-01, 8.336e-03, 5.163e-02, 2.668e-02, -1.992e-01, 1.748e-01, 6.060e-01, 6.609e-04, -1.562e-01)); + r += mul(s1_0, M4(-7.974e-02, -5.734e-02, 4.343e-02, 1.315e-01, 5.257e-02, 2.619e-02, 1.474e-01, 2.785e-02, -2.122e-01, 1.286e-01, 8.247e-02, -4.775e-02, -8.110e-02, -7.834e-02, 1.119e-01, 6.464e-02)); + r += mul(s1_1, M4(-1.791e-01, -1.306e-01, -9.517e-02, -9.715e-02, 1.383e-01, -7.254e-02, 2.996e-02, -1.071e-01, 6.347e-03, 3.042e-02, -2.237e-01, 4.132e-02, 7.086e-02, -2.773e-01, 1.161e-01, 2.955e-01)); + r += mul(s1_2, M4(6.846e-02, 3.698e-02, 5.577e-02, 1.688e-01, 6.940e-02, -4.380e-02, 2.417e-01, 1.075e-01, 4.557e-02, 6.876e-03, -4.562e-02, -7.453e-02, 1.256e-01, -1.922e-01, -2.465e-01, 3.321e-01)); + r += mul(s1_3, M4(5.729e-02, -7.859e-02, 1.990e-01, 1.460e-01, 3.100e-02, 8.277e-02, 3.022e-02, 1.528e-02, 1.802e-01, 7.794e-02, 6.750e-02, 8.433e-03, -1.899e-01, 6.637e-02, 1.012e-01, -6.952e-02)); + r += mul(s1_4, M4(2.476e-01, 2.536e-01, 4.014e-01, 2.553e-01, 1.488e-02, 2.272e-01, 1.149e-01, -1.419e-01, 8.072e-02, -3.484e-02, -6.580e-02, -2.480e-04, -2.329e-01, -7.622e-02, -3.141e-01, -2.013e-01)); + r += mul(s1_5, M4(-8.155e-04, -8.081e-02, 5.585e-02, -1.909e-01, 1.755e-01, -4.790e-02, 1.461e-01, 2.156e-02, 1.235e-01, -4.207e-02, 6.835e-02, -7.678e-02, -5.357e-02, -3.332e-01, 6.888e-02, 3.389e-01)); + r += mul(s1_6, M4(9.778e-02, -1.402e-01, 2.056e-01, 2.418e-01, -1.001e-01, 4.984e-02, 1.329e-01, -8.407e-02, -1.903e-01, 7.319e-02, 1.819e-01, -1.242e-01, 6.291e-02, -9.227e-02, -6.650e-02, 2.091e-02)); + r += mul(s1_7, M4(-5.745e-02, -2.956e-02, -2.382e-01, 1.047e-01, -2.035e-02, 1.017e-01, 3.915e-02, 3.923e-03, -1.076e-01, -1.961e-01, -1.103e-01, -1.057e-01, -7.928e-02, -4.208e-01, 3.675e-02, -1.746e-01)); + r += mul(s1_8, M4(2.269e-02, -8.960e-02, 2.786e-02, 1.194e-01, -7.041e-02, -4.548e-02, 3.849e-03, 1.108e-02, 4.179e-02, -6.224e-03, -1.517e-01, -3.722e-02, -2.309e-01, -4.689e-01, -9.688e-03, -1.259e-01)); + r += mul(s2_0, M4(-2.010e-02, 5.144e-02, 6.034e-02, 7.541e-03, 1.314e-01, -7.879e-02, -1.082e-01, 3.129e-02, -4.445e-02, 1.534e-01, 2.469e-02, 1.628e-01, 1.400e-01, -3.228e-02, -1.776e-03, 5.769e-02)); + r += mul(s2_1, M4(1.808e-01, -1.405e-01, 6.782e-02, -4.191e-02, 2.192e-01, -1.407e-01, 1.498e-01, -2.685e-02, 1.277e-01, -2.999e-02, 7.470e-03, -1.035e-01, 2.323e-01, 9.520e-02, -1.943e-01, 6.655e-02)); + r += mul(s2_2, M4(4.662e-02, 1.309e-01, 2.511e-02, 1.753e-01, -1.090e-01, 1.047e-01, 3.579e-01, 8.986e-02, 1.825e-01, 9.717e-03, 4.552e-02, 6.842e-03, 8.078e-02, -8.883e-02, -3.848e-01, 3.532e-01)); + r += mul(s2_3, M4(-3.281e-02, 1.429e-01, 4.601e-02, -9.739e-02, 2.633e-01, -1.107e-01, -3.663e-01, -1.183e-01, -1.530e-01, -1.636e-01, -1.450e-01, 1.975e-01, -4.363e-02, 2.147e-02, -1.104e-02, 1.039e-01)); + r += mul(s2_4, M4(-2.343e-02, 6.642e-02, 2.882e-01, 1.283e-01, 4.546e-01, 2.547e-01, 3.903e-01, -3.050e-01, -1.711e-01, -3.089e-01, -2.291e-01, 4.552e-02, -2.052e-03, -1.387e-01, 1.635e-01, -3.818e-01)); + r += mul(s2_5, M4(7.496e-02, -1.952e-01, -2.334e-02, -4.991e-02, 7.087e-02, 1.786e-01, 3.642e-01, 1.281e-01, 5.938e-01, -1.237e-01, 3.925e-02, -3.615e-02, 4.378e-02, -1.997e-01, -8.283e-02, -2.681e-01)); + r += mul(s2_6, M4(6.869e-03, -5.420e-02, -3.790e-03, -7.131e-02, -6.633e-02, 1.967e-01, -1.449e-02, 4.217e-01, 3.091e-01, 9.026e-02, 1.471e-03, 1.838e-01, -1.607e-01, 2.643e-02, -1.177e-03, 3.705e-03)); + r += mul(s2_7, M4(5.169e-02, 7.583e-02, 7.764e-02, -8.519e-02, -4.837e-02, -6.246e-02, 4.484e-01, -2.561e-01, 4.161e-01, 3.703e-01, -1.218e-01, 4.129e-01, -2.863e-01, -2.196e-03, 3.475e-01, -1.904e-01)); + r += mul(s2_8, M4(-4.862e-02, 3.213e-01, 1.736e-02, -1.612e-02, -9.350e-02, -1.273e-01, -3.268e-02, -6.336e-02, 1.920e-01, -6.951e-02, 3.064e-03, -1.035e-01, 9.870e-02, -3.462e-01, -1.288e-01, 5.762e-02)); + r += mul(s3_0, M4(-2.217e-01, 1.547e-01, 5.870e-03, -1.022e-01, 2.492e-01, -2.678e-02, -1.080e-01, 8.747e-02, 1.652e-01, 1.323e-01, -2.610e-02, 6.079e-03, 2.915e-03, -3.514e-02, 8.385e-02, -6.264e-02)); + r += mul(s3_1, M4(-9.377e-02, -4.420e-02, 5.238e-02, -2.172e-01, 1.824e-01, -3.558e-02, -2.377e-02, 1.185e-01, 5.646e-02, 2.368e-01, -4.782e-02, -2.171e-01, 1.230e-01, 1.990e-01, 5.330e-02, -5.694e-02)); + r += mul(s3_2, M4(5.045e-02, 2.208e-02, 5.464e-02, 1.499e-01, -1.315e-02, 1.246e-01, 2.056e-01, 8.074e-02, -3.316e-02, 1.584e-01, 5.447e-02, -5.966e-02, -8.432e-02, 4.824e-02, -1.086e-01, -5.020e-02)); + r += mul(s3_3, M4(1.809e-01, 5.069e-02, 5.310e-02, -1.384e-01, -1.275e-01, -1.509e-01, -2.822e-01, -5.726e-02, -4.277e-02, -7.647e-02, -2.646e-01, 6.126e-02, -5.023e-03, -5.288e-02, -4.901e-02, 7.492e-02)); + r += mul(s3_4, M4(1.318e-01, -3.723e-01, 4.562e-01, -5.841e-01, 4.835e-02, -1.168e-01, 4.724e-02, 8.787e-02, -3.497e-01, -1.297e-01, -2.496e-01, -2.261e-01, 3.500e-02, 3.051e-01, 2.755e-01, -1.225e-02)); + r += mul(s3_5, M4(9.279e-02, 1.468e-01, 6.113e-01, -1.173e-01, -9.417e-02, 9.222e-02, -5.217e-02, 8.464e-02, 6.096e-02, 9.554e-02, 1.285e-01, -1.953e-01, 6.569e-02, -2.118e-01, -3.568e-02, -1.352e-01)); + r += mul(s3_6, M4(-1.768e-01, 8.608e-02, 1.309e-01, -1.416e-01, 8.261e-03, -1.316e-01, 7.054e-02, 4.611e-03, 3.212e-02, 2.430e-02, 9.214e-02, 6.280e-03, -2.185e-02, -3.935e-02, 5.571e-02, 4.193e-02)); + r += mul(s3_7, M4(8.105e-02, -1.591e-01, 3.039e-01, -9.778e-02, 3.951e-02, -3.152e-01, -1.966e-01, -2.455e-01, -3.366e-02, -7.204e-02, -8.174e-03, -7.198e-02, -6.558e-03, 1.123e-01, -9.551e-02, 6.672e-03)); + r += mul(s3_8, M4(1.235e-01, 3.982e-02, 3.039e-01, 1.235e-01, 2.316e-02, -8.746e-02, 2.213e-02, 1.028e-01, -1.428e-01, 2.836e-02, -3.352e-02, 4.037e-03, 6.577e-02, 1.374e-01, 1.652e-02, -1.878e-02)); + r += mul(s4_0, M4(1.246e-01, 1.239e-01, -9.360e-03, 2.423e-01, -1.773e-01, 2.159e-01, -2.099e-01, 2.785e-02, -1.292e-01, -3.997e-02, -3.523e-02, 6.947e-02, -1.759e-02, 4.929e-02, 7.086e-02, -2.629e-02)); + r += mul(s4_1, M4(5.799e-02, -3.160e-02, -4.384e-02, -6.587e-03, 2.184e-01, -2.625e-02, -2.947e-02, -8.658e-03, -9.866e-03, -1.366e-01, -4.666e-02, -1.631e-02, -2.408e-01, -2.021e-02, 2.256e-02, -1.333e-01)); + r += mul(s4_2, M4(-3.149e-02, -8.892e-03, 5.679e-02, 1.374e-02, -1.574e-01, 9.301e-02, -9.448e-02, -2.024e-01, 4.531e-02, 1.678e-01, -1.825e-01, -1.331e-02, 9.388e-03, -1.343e-01, -4.238e-02, 3.823e-02)); + r += mul(s4_3, M4(-3.200e-02, 1.715e-01, 4.701e-02, -1.146e-02, -3.603e-02, -5.154e-02, 4.062e-02, -8.847e-02, 6.117e-02, 1.044e-01, -6.379e-02, -1.141e-01, 1.272e-01, 1.586e-02, -7.757e-02, -1.664e-01)); + r += mul(s4_4, M4(-5.916e-02, -9.493e-02, -2.504e-01, -8.049e-03, 1.603e-01, -1.276e-01, -6.224e-03, -1.402e-01, 1.848e-01, -9.695e-02, 1.695e-01, 1.547e-01, 2.973e-01, 2.754e-02, 1.808e-01, 1.692e-03)); + r += mul(s4_5, M4(-3.831e-02, 7.519e-02, 1.461e-01, 1.416e-01, 1.063e-02, -1.076e-01, -1.000e-01, -4.520e-02, -9.602e-02, 1.353e-01, -3.368e-01, 3.250e-02, -1.229e-01, -5.047e-02, -1.801e-01, 1.681e-02)); + r += mul(s4_6, M4(-5.671e-02, -2.402e-01, 8.270e-03, 1.744e-02, -1.152e-01, 8.024e-02, 2.130e-01, 5.225e-02, 6.645e-02, 3.031e-02, 8.195e-02, -1.439e-01, 2.048e-01, -1.359e-01, 1.120e-01, -3.217e-02)); + r += mul(s4_7, M4(-1.605e-01, 6.211e-02, 9.125e-02, 8.926e-02, 8.750e-02, -1.852e-01, -2.375e-01, 1.248e-01, 3.083e-02, 4.785e-02, 1.139e-03, -5.040e-03, 3.171e-02, 2.389e-01, 6.904e-02, 2.381e-02)); + r += mul(s4_8, M4(-7.120e-02, -1.292e-01, -1.611e-01, -9.858e-02, -2.775e-02, 2.880e-02, 2.723e-02, -4.047e-02, -3.972e-02, -2.975e-02, -2.026e-01, 7.687e-02, -8.534e-02, 1.556e-01, 3.034e-01, -5.549e-02)); + r += mul(s5_0, M4(-9.213e-02, -5.440e-02, -4.894e-02, 3.393e-02, 1.027e-01, 2.624e-02, -4.883e-02, -6.931e-03, -1.735e-01, 2.098e-02, 5.265e-02, 6.680e-03, -8.395e-02, -4.679e-02, 6.241e-02, 4.699e-02)); + r += mul(s5_1, M4(-1.782e-01, 7.983e-03, -3.259e-02, -1.868e-01, 1.215e-01, -1.321e-01, 5.041e-02, -4.801e-02, 1.265e-01, -1.372e-01, -3.296e-02, 1.442e-01, 3.257e-02, 1.222e-01, -1.512e-02, -1.479e-01)); + r += mul(s5_2, M4(-2.983e-02, 3.076e-02, 8.010e-02, 2.028e-01, -3.401e-02, -9.389e-02, 1.362e-01, -1.877e-01, 1.574e-01, 9.401e-02, 4.403e-02, 8.403e-02, 4.736e-02, -9.071e-02, -2.370e-02, -2.582e-02)); + r += mul(s5_3, M4(2.278e-01, 1.284e-01, 4.590e-02, 1.622e-02, 1.337e-01, 7.761e-02, 1.359e-02, -2.618e-02, 4.119e-02, -2.623e-02, 2.930e-02, -2.091e-01, -1.855e-01, 6.665e-02, 8.423e-02, 8.824e-03)); + r += mul(s5_4, M4(-9.664e-02, 4.501e-02, 2.264e-02, -1.253e-01, 1.688e-01, 1.336e-01, 3.444e-01, 1.840e-01, -3.033e-02, -3.596e-01, 4.561e-02, 2.575e-02, 9.805e-02, 1.956e-01, -1.676e-01, 2.728e-01)); + r += mul(s5_5, M4(-1.832e-02, -8.205e-02, 1.263e-01, -1.139e-02, -6.246e-02, -1.061e-01, -4.148e-02, 5.692e-02, -1.353e-01, 1.976e-01, -1.707e-01, -1.122e-01, -1.321e-02, -1.263e-01, -1.884e-01, -1.639e-01)); + r += mul(s5_6, M4(-4.451e-02, -4.525e-02, -4.485e-02, 8.385e-02, -6.119e-03, 1.204e-01, 9.351e-02, 5.574e-02, 9.341e-02, 9.519e-02, -8.570e-02, 5.382e-02, -1.132e-01, -1.837e-01, 2.056e-02, 9.329e-02)); + r += mul(s5_7, M4(1.321e-01, 9.413e-02, -1.748e-02, 2.099e-01, 3.750e-02, -7.634e-02, -2.838e-02, 1.046e-01, -2.752e-01, 5.688e-02, 2.894e-01, 6.021e-02, -2.231e-01, -7.531e-01, -3.799e-01, -2.388e-01)); + r += mul(s5_8, M4(3.437e-02, 1.477e-01, 8.038e-02, -1.803e-02, -8.878e-02, -5.580e-03, 1.168e-01, -4.627e-02, -7.544e-03, -8.881e-02, -7.309e-03, 1.347e-02, 1.249e-01, -1.073e-01, 1.571e-01, 2.393e-01)); + r += mul(s6_0, M4(1.056e-01, -1.655e-01, 6.628e-02, -6.710e-02, 6.007e-02, -1.825e-01, -9.008e-02, 4.077e-02, -4.660e-02, 8.993e-02, -7.418e-02, 5.962e-02, 9.706e-02, 1.196e-01, 3.072e-02, -1.198e-01)); + r += mul(s6_1, M4(-2.106e-01, -1.708e-01, -1.243e-01, 2.257e-01, -8.822e-02, -1.948e-01, -4.944e-02, 1.032e-01, 8.067e-02, -2.809e-02, 1.437e-02, -8.750e-02, 2.549e-01, 4.931e-02, -8.305e-02, 6.867e-02)); + r += mul(s6_2, M4(-5.325e-02, -1.668e-01, 1.553e-01, -5.354e-02, 7.197e-02, -1.310e-01, 7.045e-02, 6.476e-02, 9.089e-02, 5.229e-02, 1.479e-01, 8.049e-02, 9.977e-03, 1.119e-01, 2.414e-01, 7.301e-02)); + r += mul(s6_3, M4(-4.870e-01, 1.024e-01, 1.018e-01, 5.722e-01, -1.674e-01, 4.754e-02, 1.592e-02, 9.791e-02, 1.142e-01, 1.606e-01, 1.078e-01, -7.011e-02, -3.353e-02, -6.611e-02, -1.191e-01, 2.364e-01)); + r += mul(s6_4, M4(-1.742e-01, 3.119e-01, 2.820e-01, 7.737e-01, 1.333e-01, -2.364e-01, 2.087e-02, -1.183e-01, 7.062e-02, -4.126e-02, 1.620e-01, 1.013e-01, -6.792e-02, 1.401e-01, -2.499e-02, 7.562e-02)); + r += mul(s6_5, M4(1.543e-01, 1.291e-01, 9.007e-02, -2.081e-01, -6.102e-02, -2.928e-02, -2.347e-02, -2.141e-01, -1.020e-02, 3.410e-02, -6.463e-02, -3.068e-02, -2.030e-01, -3.017e-01, 6.632e-02, 1.002e-01)); + r += mul(s6_6, M4(-2.041e-01, -1.945e-02, 4.302e-01, -1.541e-01, 1.051e-01, -2.894e-01, -5.986e-02, 1.052e-01, -5.165e-02, -3.485e-02, 1.858e-02, -1.355e-02, -5.686e-02, 1.246e-01, -4.571e-03, -4.745e-02)); + r += mul(s6_7, M4(-3.618e-02, 1.593e-01, 3.889e-01, 6.082e-02, 9.454e-03, -7.776e-02, 3.672e-02, 1.174e-01, -6.291e-02, 2.715e-02, 6.512e-03, -1.539e-02, -4.770e-02, -2.034e-02, -3.059e-02, -1.692e-01)); + r += mul(s6_8, M4(6.489e-02, -1.127e-01, -6.162e-03, 6.240e-02, 4.506e-02, -1.909e-01, -7.385e-03, 6.463e-02, -4.856e-02, -1.004e-01, -1.271e-02, -1.031e-01, 1.637e-01, 1.260e-01, 9.375e-02, 8.475e-02)); + r += mul(s7_0, M4(-1.056e-01, -6.793e-02, -2.290e-02, -5.765e-02, -3.109e-03, 8.130e-02, 9.209e-02, -5.064e-02, 6.955e-03, 1.225e-01, -1.973e-01, -2.322e-02, 1.099e-01, -4.783e-02, 4.413e-02, -1.477e-01)); + r += mul(s7_1, M4(7.251e-02, -9.158e-02, 1.464e-01, 3.379e-02, 1.043e-01, 8.307e-02, 7.843e-02, 1.139e-01, 3.996e-01, 1.389e-01, -3.900e-02, 2.640e-01, 2.734e-01, 1.025e-01, -6.518e-03, 1.987e-02)); + r += mul(s7_2, M4(-1.926e-02, -1.240e-01, -7.366e-02, -1.574e-02, 6.097e-02, 8.307e-02, 1.858e-01, 1.307e-01, 1.268e-01, 4.671e-03, -7.674e-03, -4.922e-02, -2.716e-02, 4.860e-02, 6.937e-02, -2.109e-02)); + r += mul(s7_3, M4(-3.253e-02, -4.109e-02, 1.668e-01, -3.072e-02, -1.440e-01, 1.336e-01, 6.707e-03, -4.122e-02, 1.029e-01, 1.450e-01, 7.241e-02, -5.743e-02, -8.728e-02, 3.656e-02, -2.325e-02, 3.032e-03)); + r += mul(s7_4, M4(6.291e-02, 7.310e-02, 6.814e-02, 5.994e-02, -5.259e-02, -6.363e-02, -1.033e-01, -1.411e-01, 3.942e-02, 2.257e-01, 2.250e-01, 3.676e-02, -2.449e-01, -1.138e-01, -8.450e-03, -7.929e-02)); + r += mul(s7_5, M4(-5.406e-02, -5.043e-02, -7.410e-03, 2.141e-02, 3.475e-02, -8.651e-03, -3.283e-02, -4.722e-02, -2.018e-01, 1.197e-02, -9.335e-02, -4.392e-02, -6.526e-02, -2.383e-01, -6.525e-02, 3.451e-01)); + r += mul(s7_6, M4(9.223e-03, 3.934e-02, -1.763e-01, -2.859e-02, -3.031e-02, 7.242e-02, 4.034e-02, -1.797e-01, -2.029e-01, -1.904e-01, -5.353e-02, 1.297e-02, 4.330e-02, 1.548e-01, -8.438e-03, -3.724e-02)); + r += mul(s7_7, M4(9.916e-02, 7.321e-02, 1.276e-01, 1.453e-01, -8.301e-02, 1.738e-01, 2.930e-01, -3.099e-01, 2.716e-02, -8.043e-02, 5.851e-02, -5.140e-02, 1.389e-01, 2.330e-01, -1.324e-01, 7.324e-02)); + r += mul(s7_8, M4(-1.500e-01, 3.048e-02, -3.140e-02, -4.852e-02, -3.718e-02, 2.308e-02, -7.317e-03, -9.998e-02, 4.094e-02, -2.750e-01, -1.856e-01, -2.096e-02, 7.663e-02, 2.274e-01, 1.548e-01, 6.048e-02)); + r += V4(-1.495e-02, -2.023e-02, 2.264e-02, -1.147e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(4.953e-02, -1.043e-02, -1.914e-02, -1.432e-01, -5.304e-03, 8.359e-02, -1.140e-01, -7.852e-02, -4.640e-02, -2.001e-01, 1.919e-01, 1.548e-01, 8.743e-02, 9.778e-02, 1.424e-02, -2.270e-01)); + r += mul(s0_1, M4(2.060e-02, 1.248e-01, -4.010e-02, 1.965e-01, -2.438e-02, -4.201e-02, 7.033e-03, 4.888e-02, -6.871e-02, 6.190e-02, 7.333e-02, -3.403e-01, -5.559e-02, 1.867e-01, -1.300e-01, 2.189e-01)); + r += mul(s0_2, M4(-3.874e-02, 1.285e-01, 3.391e-02, -1.204e-01, 4.428e-02, -1.871e-01, -1.593e-02, 6.042e-03, 3.254e-02, -1.973e-02, 1.518e-01, -1.300e-01, 1.446e-02, -2.044e-01, 2.918e-02, 2.606e-01)); + r += mul(s0_3, M4(4.960e-02, -6.399e-02, -9.520e-02, -1.839e-01, 2.173e-01, -2.356e-02, -1.127e-01, 8.063e-02, -1.667e-01, 9.172e-02, -4.671e-02, 1.245e-01, 6.115e-02, -9.430e-02, -2.204e-01, -4.703e-02)); + r += mul(s0_4, M4(-1.679e-01, 1.597e-01, -2.778e-01, 1.738e-01, -1.146e-01, -1.453e-01, -1.057e-01, 1.756e-01, 1.647e-01, 1.490e-01, 9.088e-02, -4.520e-01, 9.319e-03, 1.503e-02, -3.174e-01, -1.853e-01)); + r += mul(s0_5, M4(-5.286e-02, -3.716e-03, -2.907e-02, -2.483e-01, 6.450e-02, -3.113e-02, -9.256e-03, -7.021e-02, 1.031e-01, -8.872e-02, 9.891e-02, -1.813e-01, 1.220e-01, 9.982e-03, 1.022e-01, 6.488e-01)); + r += mul(s0_6, M4(-8.355e-02, -1.048e-01, -2.398e-01, 1.720e-01, -1.541e-01, 4.061e-02, -1.442e-01, 9.865e-03, -4.325e-01, -1.424e-01, -2.084e-01, 3.330e-01, 2.950e-02, 1.551e-01, 2.286e-01, -1.593e-01)); + r += mul(s0_7, M4(1.490e-01, -3.721e-02, 6.991e-02, -2.120e-02, 1.801e-01, 8.975e-02, 1.922e-01, -8.228e-02, 8.051e-02, -2.346e-01, 1.200e-01, -1.173e-01, -2.726e-01, -1.120e-01, -1.173e-01, 2.100e-01)); + r += mul(s0_8, M4(2.178e-02, 8.923e-02, -4.526e-02, -4.848e-02, 1.993e-01, -1.361e-02, 1.385e-02, 2.657e-01, 1.233e-01, -2.770e-01, 2.021e-01, -8.325e-02, -7.663e-02, 8.893e-02, 8.350e-02, 2.419e-01)); + r += mul(s1_0, M4(1.109e-01, 6.740e-02, -1.106e-01, -1.285e-01, -5.509e-02, 2.375e-03, -1.023e-01, -3.401e-02, 2.224e-02, 2.057e-01, -4.947e-02, 6.982e-02, -2.823e-02, 1.032e-01, -1.465e-01, -1.322e-01)); + r += mul(s1_1, M4(-2.185e-01, 2.893e-01, 2.807e-02, -3.932e-02, -8.755e-02, 1.413e-01, -5.673e-02, -1.102e-02, 1.019e-01, 1.588e-01, 8.734e-03, -1.707e-01, -9.751e-02, -7.912e-02, 1.111e-01, -5.010e-02)); + r += mul(s1_2, M4(1.031e-02, -2.234e-02, -5.363e-02, 5.568e-02, -4.657e-02, -1.581e-01, 3.861e-02, 1.389e-02, 7.192e-02, 6.914e-02, 2.167e-02, 9.141e-02, 1.584e-01, -4.665e-02, 1.057e-01, -1.803e-01)); + r += mul(s1_3, M4(-2.364e-02, 8.902e-02, 3.271e-02, -9.566e-02, 1.637e-01, -9.631e-02, -1.292e-01, -7.108e-02, -5.199e-02, -1.548e-01, 6.055e-02, 1.562e-01, -2.478e-01, 1.768e-02, 2.132e-01, 7.910e-02)); + r += mul(s1_4, M4(-2.331e-02, 2.652e-02, 2.280e-01, -7.540e-02, -1.734e-02, -1.351e-01, 9.421e-02, -1.589e-01, 3.353e-02, 3.798e-02, -1.973e-01, -1.582e-01, 9.918e-02, -1.993e-01, -1.849e-01, -5.645e-02)); + r += mul(s1_5, M4(-8.244e-02, -3.014e-03, -2.259e-02, -6.663e-02, -7.560e-02, -1.792e-01, 7.072e-02, 1.509e-01, 1.072e-01, 3.003e-02, -5.873e-02, 6.435e-02, -5.586e-02, 1.371e-01, -1.778e-01, 1.172e-01)); + r += mul(s1_6, M4(1.430e-01, 3.925e-02, -4.229e-02, 7.416e-02, -1.553e-01, -6.153e-02, -5.697e-02, 3.974e-03, 1.487e-02, 1.702e-01, -1.097e-01, 1.459e-01, -1.018e-01, -9.522e-02, 1.186e-01, -9.903e-03)); + r += mul(s1_7, M4(3.177e-01, -2.166e-01, 2.651e-02, -5.773e-02, -2.696e-02, -9.562e-02, 4.629e-02, 5.782e-04, -4.577e-02, 1.483e-01, -1.887e-01, 9.765e-03, -4.184e-01, -7.145e-02, -2.911e-01, 6.700e-02)); + r += mul(s1_8, M4(1.877e-01, -7.176e-02, 7.557e-02, 3.244e-02, -4.746e-03, -2.181e-02, 2.802e-02, 2.184e-01, 9.669e-03, 2.156e-02, 2.035e-02, -1.332e-01, -2.256e-02, 2.689e-02, -2.441e-02, -2.549e-01)); + r += mul(s2_0, M4(5.106e-02, 1.100e-01, -7.814e-02, 2.028e-02, -1.567e-01, 1.860e-01, 3.232e-01, 9.718e-02, 5.487e-02, 1.834e-01, -9.297e-02, 8.886e-02, -1.441e-01, -2.210e-01, -5.134e-02, 6.252e-02)); + r += mul(s2_1, M4(-4.781e-02, -2.123e-01, 4.822e-02, 3.936e-02, 1.046e-02, 4.684e-01, 2.696e-01, -1.558e-01, 1.541e-02, -1.177e-01, -5.005e-02, -1.295e-01, 1.704e-01, -2.501e-01, 6.061e-02, 2.970e-01)); + r += mul(s2_2, M4(1.695e-02, -1.034e-01, -6.278e-02, -5.928e-02, -3.629e-02, 6.065e-02, -7.926e-03, 1.698e-01, 1.410e-01, 1.694e-01, -2.007e-01, -1.189e-01, 8.948e-02, -3.077e-01, -2.389e-01, 1.502e-01)); + r += mul(s2_3, M4(3.987e-02, -4.753e-02, 1.210e-01, 7.428e-02, 1.577e-01, 4.927e-01, -5.874e-02, 6.032e-02, 6.508e-02, 2.733e-01, 8.902e-02, -8.632e-02, 2.456e-01, -2.898e-02, -3.099e-01, -2.062e-01)); + r += mul(s2_4, M4(2.310e-01, -1.338e-01, -5.528e-02, -6.663e-02, 5.035e-03, 5.237e-01, -2.444e-01, 1.748e-03, 1.092e-02, 1.659e-01, -5.375e-01, -1.727e-01, 1.075e-01, -3.335e-02, 1.716e-01, 2.911e-02)); + r += mul(s2_5, M4(-3.422e-02, 1.164e-01, -7.015e-02, 2.228e-01, -2.588e-01, 1.001e-01, -2.292e-01, 3.307e-01, 1.157e-01, -1.517e-01, -1.974e-01, 1.010e-01, -1.465e-01, 1.708e-01, 6.236e-02, -9.108e-02)); + r += mul(s2_6, M4(-1.071e-01, 1.134e-02, -8.607e-03, -3.712e-02, 1.710e-01, 2.361e-01, -1.406e-01, 1.209e-01, 2.425e-02, -9.058e-02, -3.232e-01, 4.471e-02, -1.308e-01, -3.260e-02, 6.854e-02, -6.492e-02)); + r += mul(s2_7, M4(-4.509e-02, 7.984e-02, 1.088e-01, -1.713e-01, 8.724e-02, -2.148e-01, -2.251e-01, -5.391e-01, -1.393e-02, -2.744e-01, -3.601e-01, 1.169e-01, 1.460e-01, 1.543e-01, 1.556e-01, -1.452e-01)); + r += mul(s2_8, M4(-3.656e-02, 1.428e-01, -1.008e-01, -8.022e-02, -1.002e-01, -1.470e-01, -6.616e-02, 1.897e-02, 1.665e-01, -1.197e-02, -1.142e-01, -5.160e-02, 6.965e-02, -5.536e-02, 2.038e-01, 4.050e-03)); + r += mul(s3_0, M4(-3.699e-02, -2.169e-01, -1.551e-01, -8.639e-02, -1.323e-01, -3.194e-01, 4.831e-02, -4.322e-02, -5.369e-02, -2.300e-01, 7.718e-02, 1.629e-01, -4.036e-02, -9.572e-03, -3.976e-02, 5.321e-02)); + r += mul(s3_1, M4(-2.518e-02, -1.041e-01, 7.633e-02, 1.902e-01, 1.493e-02, -1.167e-01, 1.430e-01, -6.275e-02, -5.291e-02, -1.949e-01, -1.360e-03, 2.395e-03, 5.281e-02, 3.305e-01, -9.706e-02, 1.571e-01)); + r += mul(s3_2, M4(-1.239e-01, 1.558e-02, -8.931e-02, 1.519e-01, -6.491e-02, -8.699e-02, 1.022e-04, 1.037e-02, 7.850e-04, -6.647e-02, -3.083e-03, 9.909e-02, 3.606e-02, -6.656e-02, -4.089e-02, -6.208e-02)); + r += mul(s3_3, M4(-2.321e-02, -1.324e-01, -2.979e-01, -2.022e-01, -1.156e-01, -2.492e-01, 2.480e-02, 1.023e-01, 1.111e-01, 1.108e-02, 4.745e-02, 2.682e-01, 1.655e-01, 1.510e-01, 2.804e-02, -6.706e-02)); + r += mul(s3_4, M4(7.570e-02, 1.188e-01, 1.951e-01, -1.867e-01, 5.990e-02, 3.134e-02, -1.611e-01, -2.741e-01, 9.583e-02, -5.437e-02, -1.426e-02, -1.455e-01, -2.097e-02, 2.090e-01, -3.508e-02, 1.004e-01)); + r += mul(s3_5, M4(-1.678e-01, -1.709e-01, 1.389e-01, 5.599e-01, 5.316e-02, -5.649e-02, -5.180e-02, 1.958e-01, -1.194e-01, 4.412e-02, -1.158e-02, 9.583e-02, 2.081e-01, -1.927e-02, 1.263e-01, 6.535e-02)); + r += mul(s3_6, M4(-1.998e-01, 7.452e-04, -1.024e-01, -1.500e-01, -1.898e-01, -1.200e-01, 2.673e-01, 1.930e-01, 6.552e-02, 4.753e-02, -5.766e-02, 1.452e-02, -2.033e-02, -1.847e-02, 4.007e-02, -4.012e-02)); + r += mul(s3_7, M4(-3.738e-02, 9.887e-03, 2.957e-01, -2.111e-01, 1.195e-01, 1.952e-02, -1.179e-01, -2.235e-01, -7.834e-02, -5.193e-02, -2.680e-02, 2.864e-02, -2.601e-02, 6.703e-02, -6.174e-02, 3.249e-02)); + r += mul(s3_8, M4(-1.530e-01, -8.802e-02, 1.475e-01, 2.635e-01, -9.074e-02, 9.380e-02, 4.124e-02, -7.946e-02, -1.148e-01, 1.286e-02, 4.742e-02, 2.332e-02, -1.977e-03, -5.675e-02, 1.096e-01, 1.150e-02)); + r += mul(s4_0, M4(-2.261e-01, -2.561e-01, 1.126e-02, 2.395e-01, 1.953e-02, -4.803e-02, -2.832e-01, 2.786e-01, 4.088e-02, 1.210e-01, -1.743e-01, -8.318e-02, -2.497e-02, 9.149e-02, -1.279e-01, -1.996e-01)); + r += mul(s4_1, M4(9.359e-02, -2.179e-01, -1.029e-01, -2.677e-01, 5.302e-03, 3.054e-01, 9.251e-02, -1.286e-01, 1.787e-04, 2.154e-02, -9.384e-02, 4.740e-02, 9.689e-02, 1.660e-02, -1.787e-01, 1.200e-01)); + r += mul(s4_2, M4(-1.114e-01, -3.321e-03, -4.210e-02, 4.867e-02, -1.530e-02, 1.223e-01, -1.452e-02, 1.470e-02, 2.443e-01, 1.265e-01, -2.683e-02, 8.435e-02, -2.650e-02, 1.006e-01, -1.267e-01, -1.657e-02)); + r += mul(s4_3, M4(2.259e-01, -2.401e-02, -7.352e-02, 4.411e-02, -1.591e-01, -4.921e-02, -8.919e-02, 1.951e-01, -3.401e-02, 2.066e-01, -4.425e-02, 1.086e-02, -2.287e-01, 1.715e-01, 1.947e-02, 3.394e-01)); + r += mul(s4_4, M4(-1.943e-01, 6.106e-02, -2.059e-01, 1.530e-01, -4.310e-01, -5.169e-02, 5.227e-03, -5.809e-02, -8.751e-03, 1.375e-01, 9.137e-02, -5.076e-02, -2.291e-01, 3.071e-01, 8.408e-02, -3.188e-01)); + r += mul(s4_5, M4(1.396e-01, 1.749e-03, -1.139e-01, 4.759e-03, 1.080e-01, -1.796e-01, 1.342e-01, 1.888e-02, 2.666e-01, 3.913e-02, -2.299e-01, -1.284e-01, 5.995e-02, -1.558e-03, 8.185e-02, -7.686e-02)); + r += mul(s4_6, M4(-1.669e-01, -6.859e-02, -7.372e-02, -9.100e-02, -3.056e-02, 5.829e-02, 1.240e-01, 1.077e-01, -5.259e-02, -6.470e-02, -1.078e-01, -4.296e-03, -8.721e-02, 4.027e-02, -5.710e-02, 8.209e-02)); + r += mul(s4_7, M4(-1.577e-01, 6.222e-03, -2.773e-01, 3.279e-02, 1.536e-01, -1.257e-01, -4.675e-02, -1.285e-01, -7.705e-02, -1.055e-01, 2.227e-02, -1.549e-01, 4.493e-01, -2.209e-01, 2.735e-01, 1.233e-01)); + r += mul(s4_8, M4(-4.798e-01, 1.189e-01, -2.058e-01, -1.575e-01, 8.871e-02, 8.258e-02, -4.181e-03, -5.046e-02, 2.217e-01, 5.726e-02, -1.620e-01, 1.539e-01, -1.201e-01, -1.012e-01, 5.392e-03, 3.764e-02)); + r += mul(s5_0, M4(1.165e-01, 1.608e-01, -7.487e-02, -1.509e-01, 9.449e-02, 3.221e-01, -3.382e-02, -1.816e-03, -3.550e-02, 1.265e-01, -6.539e-02, -1.075e-01, 3.394e-02, 2.355e-01, 3.894e-02, -8.796e-02)); + r += mul(s5_1, M4(-2.447e-02, 8.070e-02, 1.423e-01, 1.402e-01, 2.515e-02, 4.841e-01, 4.955e-02, -2.066e-01, -9.107e-02, -2.201e-02, 4.951e-02, -3.315e-03, 1.130e-01, -1.267e-01, -7.044e-02, 5.966e-02)); + r += mul(s5_2, M4(-1.221e-01, -7.627e-02, -9.912e-02, 3.213e-02, -1.224e-02, 1.525e-02, 5.261e-02, 7.614e-05, 1.389e-01, -2.451e-02, 2.648e-02, 1.036e-01, 1.281e-02, -5.334e-02, -3.525e-02, 4.862e-02)); + r += mul(s5_3, M4(-4.838e-02, -1.165e-01, 4.634e-02, 5.762e-02, -1.966e-01, 4.590e-03, -1.469e-01, 1.240e-02, -8.315e-02, 1.313e-01, 7.028e-03, -3.308e-01, 8.331e-02, -8.449e-02, -2.792e-01, -1.632e-01)); + r += mul(s5_4, M4(2.086e-01, -7.599e-02, 6.890e-02, -1.638e-01, 6.631e-02, -3.018e-01, 3.685e-01, 1.289e-01, -3.224e-02, 2.471e-01, 1.554e-01, -9.048e-02, 6.640e-02, -1.579e-01, 4.120e-02, 3.324e-01)); + r += mul(s5_5, M4(-8.094e-02, 7.154e-02, -2.721e-01, 3.947e-02, 1.108e-01, -9.913e-02, 7.985e-02, 7.660e-02, 1.077e-01, -7.017e-03, -8.944e-02, 6.666e-02, 1.555e-01, -3.792e-02, 2.088e-01, 1.484e-01)); + r += mul(s5_6, M4(1.184e-01, 8.033e-02, -1.298e-02, 1.090e-01, 7.464e-03, 5.399e-02, 6.435e-02, 9.456e-02, 1.882e-02, 7.983e-02, 6.878e-02, -3.361e-02, -1.222e-01, -1.425e-02, 2.428e-02, 7.745e-03)); + r += mul(s5_7, M4(1.102e-01, -1.544e-02, 1.597e-01, -4.132e-02, 1.377e-01, -2.824e-02, -8.234e-02, -2.761e-05, 8.093e-02, -2.793e-01, 1.530e-01, 8.385e-02, -7.603e-02, -2.025e-01, 2.766e-01, -1.428e-01)); + r += mul(s5_8, M4(3.781e-03, -1.045e-01, 1.448e-01, 5.691e-02, -5.157e-02, 5.607e-02, -5.108e-02, -3.895e-02, 1.424e-01, -6.146e-02, -9.942e-02, 1.364e-01, -1.161e-02, 7.988e-02, -1.966e-02, 1.264e-01)); + r += mul(s6_0, M4(5.254e-02, 4.286e-02, 2.203e-01, 2.235e-01, 1.933e-02, -1.109e-01, 8.487e-02, -9.477e-02, -5.130e-02, -4.777e-02, -2.017e-02, 3.128e-02, -6.470e-02, 1.543e-01, 8.668e-02, 1.235e-02)); + r += mul(s6_1, M4(1.808e-01, 7.979e-02, 1.731e-01, 1.029e-01, 2.928e-02, -2.398e-01, 2.685e-01, -5.568e-03, 1.759e-01, -9.958e-02, -1.266e-01, 9.668e-02, -9.320e-02, -1.677e-01, 2.248e-01, -9.537e-02)); + r += mul(s6_2, M4(-1.645e-02, -4.210e-02, 2.464e-01, 2.570e-01, -6.245e-02, -1.263e-01, 7.909e-03, 1.125e-01, 1.147e-02, -8.072e-03, 9.007e-03, -7.610e-02, 7.843e-02, -2.010e-01, -1.495e-02, -3.870e-02)); + r += mul(s6_3, M4(-3.565e-02, 5.785e-01, -9.211e-01, 5.079e-01, 5.528e-02, 6.960e-02, 2.126e-01, -4.858e-02, 4.311e-04, -7.106e-02, -9.924e-02, 3.560e-02, -4.707e-02, -9.750e-02, 1.573e-01, 2.595e-01)); + r += mul(s6_4, M4(9.734e-02, 1.154e+00, -2.324e-01, -1.876e-01, 1.616e-01, -4.130e-02, 5.493e-02, -1.671e-01, -2.353e-01, 1.169e-03, 1.057e-01, -2.784e-02, 3.860e-01, 1.029e-01, -4.524e-01, -1.442e-01)); + r += mul(s6_5, M4(3.244e-01, -6.583e-02, -5.003e-02, -6.686e-01, -5.430e-02, -8.618e-02, 8.184e-02, -5.639e-02, 4.921e-02, -4.133e-02, 1.289e-01, -1.323e-01, -6.690e-02, 1.474e-01, -6.345e-02, 5.825e-02)); + r += mul(s6_6, M4(-1.715e-01, -3.189e-01, -2.790e-01, -2.477e-01, 1.002e-01, -5.547e-02, 2.995e-02, -1.182e-01, 6.040e-02, 8.125e-02, -1.164e-01, -4.264e-02, 2.022e-02, 1.063e-01, 3.899e-02, 1.597e-01)); + r += mul(s6_7, M4(-1.266e-01, -9.623e-02, 1.275e-01, -3.748e-02, 1.626e-01, -6.270e-02, 9.291e-02, 5.724e-03, 4.365e-02, 7.480e-02, 5.771e-02, 5.527e-02, -3.332e-01, 3.902e-02, -1.400e-01, 3.447e-06)); + r += mul(s6_8, M4(3.818e-01, 1.863e-01, -2.614e-02, -3.133e-02, 1.381e-01, -6.371e-02, 6.787e-02, 6.309e-02, -1.131e-01, -4.142e-02, -7.571e-02, 8.214e-02, -1.547e-01, -1.238e-01, 1.511e-02, 1.554e-01)); + r += mul(s7_0, M4(-5.391e-02, 1.323e-01, 4.757e-02, 4.474e-02, 5.920e-02, -4.543e-02, 3.262e-02, 2.424e-01, -2.397e-02, -1.617e-01, -1.591e-01, 2.583e-02, 5.164e-02, -8.457e-03, 2.784e-02, -4.374e-02)); + r += mul(s7_1, M4(8.171e-02, -4.498e-02, 1.671e-01, -2.266e-03, -1.252e-01, 1.708e-01, -2.352e-01, 3.477e-02, 8.995e-02, -3.851e-01, -1.649e-01, -4.319e-02, 8.272e-02, -9.280e-02, -1.689e-01, 4.940e-02)); + r += mul(s7_2, M4(-3.313e-02, 6.006e-02, -9.039e-03, 9.748e-02, -4.762e-02, -5.250e-03, -1.652e-01, 1.174e-01, 4.957e-02, -2.381e-01, 4.842e-02, -3.336e-02, 2.251e-01, -2.005e-02, -1.765e-01, -8.915e-02)); + r += mul(s7_3, M4(-1.958e-01, 1.081e-01, -8.218e-02, -5.008e-02, 2.882e-01, 2.378e-01, -6.291e-02, 1.016e-01, -2.227e-01, -1.629e-01, 7.849e-02, 6.485e-02, 5.065e-02, 9.825e-02, 4.433e-02, -6.809e-02)); + r += mul(s7_4, M4(-7.990e-02, 6.251e-02, -7.690e-02, -2.683e-02, -7.804e-02, 2.686e-02, 9.093e-02, 2.700e-02, -1.886e-02, 2.563e-01, 1.230e-01, -8.396e-02, 2.219e-01, -1.033e-01, 9.530e-03, 9.932e-02)); + r += mul(s7_5, M4(1.400e-02, -2.426e-02, 5.110e-02, 4.281e-02, -1.137e-01, -7.280e-02, 5.152e-02, -9.314e-03, 1.449e-01, 3.328e-02, 1.324e-01, -2.731e-01, -2.096e-01, 2.987e-02, 7.699e-03, 1.335e-01)); + r += mul(s7_6, M4(4.344e-02, -6.642e-02, -7.341e-02, -6.276e-02, -1.511e-01, 1.958e-01, -7.402e-02, -2.163e-02, 1.100e-01, 1.279e-01, -9.180e-02, -5.255e-02, 1.680e-01, 5.994e-02, -2.382e-02, 1.012e-02)); + r += mul(s7_7, M4(3.082e-02, -8.443e-02, 9.441e-02, 1.118e-02, -1.380e-01, 1.702e-02, 6.489e-02, 7.571e-02, -1.530e-01, 4.445e-02, 1.333e-01, -8.167e-03, 4.990e-01, 5.790e-02, 1.082e-01, 5.093e-02)); + r += mul(s7_8, M4(2.108e-01, 1.091e-01, -1.877e-02, -5.456e-02, -1.412e-01, 8.228e-02, -1.366e-01, 8.877e-02, -1.840e-01, -7.525e-02, -6.979e-02, 7.806e-02, -1.131e-01, -1.048e-01, 8.550e-02, 1.826e-01)); + r += V4(-4.391e-02, -3.059e-02, 2.082e-02, 1.403e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.003e-01, -5.919e-02, 2.076e-01, 1.672e-02, 2.856e-02, -3.657e-01, -5.536e-02, 6.897e-02, -5.467e-02, -2.065e-01, -8.650e-02, 7.769e-02, 1.806e-01, -6.661e-02, 1.437e-01, -7.272e-02)); + r += mul(s0_1, M4(1.047e-01, -4.994e-03, 2.010e-02, -1.732e-01, 1.841e-02, -6.717e-02, -9.422e-02, 1.552e-01, -1.356e-01, 5.723e-03, 3.318e-01, 7.563e-02, -7.158e-02, -6.739e-02, -1.119e-01, -3.589e-02)); + r += mul(s0_2, M4(5.615e-02, -1.190e-01, 1.940e-02, 1.183e-01, 1.353e-01, -1.109e-01, -8.721e-02, -2.443e-01, -7.026e-02, 9.086e-03, 1.657e-01, 5.800e-02, 9.707e-02, 4.103e-02, 8.620e-02, -1.047e-01)); + r += mul(s0_3, M4(-1.864e-01, 5.525e-01, 5.940e-02, 1.066e-01, -6.391e-02, 2.615e-01, 1.369e-01, 1.755e-01, 5.718e-02, -3.624e-01, -2.133e-01, 2.429e-02, 1.918e-01, 2.540e-02, 1.998e-01, -1.374e-01)); + r += mul(s0_4, M4(-7.575e-02, -2.771e-01, 1.057e-01, 6.924e-02, 6.660e-02, -3.778e-02, -1.177e-01, -1.973e-01, 1.329e-01, -9.089e-02, 1.796e-01, 2.113e-01, 2.779e-01, 1.087e-01, 1.340e-01, 5.727e-02)); + r += mul(s0_5, M4(3.028e-01, -1.509e-01, 1.293e-02, 1.337e-01, -5.479e-02, -4.922e-03, 2.247e-01, 4.516e-02, -1.411e-01, -1.697e-01, 7.813e-02, 1.060e-02, -1.126e-01, 3.681e-01, 2.706e-01, -3.745e-01)); + r += mul(s0_6, M4(-1.866e-01, -1.746e-01, -6.487e-02, 6.742e-02, -1.552e-01, -5.886e-02, 1.360e-01, -1.544e-01, -1.652e-01, 1.706e-02, 2.617e-01, 1.328e-01, -3.640e-03, 9.480e-02, -1.287e-02, 3.921e-02)); + r += mul(s0_7, M4(1.243e-01, -9.323e-02, -2.042e-01, 1.297e-01, 9.513e-02, -1.313e-02, -1.341e-02, 1.389e-01, 6.648e-02, 1.188e-02, 4.082e-02, 3.604e-01, -2.582e-01, -5.481e-02, 9.386e-02, -2.171e-01)); + r += mul(s0_8, M4(-1.791e-01, 2.842e-02, 1.129e-01, -2.167e-02, 2.324e-01, 4.512e-02, 6.812e-02, -2.952e-02, -1.427e-01, -2.060e-01, 3.594e-02, 1.304e-01, -3.837e-01, 4.871e-02, 4.871e-01, -2.685e-01)); + r += mul(s1_0, M4(-7.620e-02, 8.123e-02, 3.190e-02, -1.187e-01, -9.578e-02, -1.791e-01, 2.089e-03, -1.004e-01, -3.081e-02, 7.196e-02, 2.389e-02, -5.165e-03, 5.603e-02, -8.371e-02, -7.499e-02, 2.005e-01)); + r += mul(s1_1, M4(1.862e-01, -2.538e-02, -2.103e-02, -5.109e-02, -4.727e-02, 4.574e-02, -6.642e-03, -2.504e-02, -6.596e-02, -7.968e-03, -7.350e-02, 1.135e-01, -1.027e-01, -1.208e-01, -3.533e-02, 2.085e-01)); + r += mul(s1_2, M4(-1.857e-02, -1.421e-01, -5.308e-02, 5.773e-03, -4.630e-02, -4.522e-02, 1.625e-02, -9.741e-02, 9.684e-02, -7.835e-02, 7.179e-02, 8.255e-02, -7.932e-02, 5.574e-02, 3.519e-02, 1.743e-01)); + r += mul(s1_3, M4(-8.234e-03, -3.040e-01, 1.035e-01, -3.756e-01, -1.978e-01, 1.257e-01, 4.788e-02, 1.895e-01, -1.629e-01, -8.749e-02, -2.094e-03, -3.859e-02, 1.008e-01, -1.914e-01, -1.519e-02, 9.775e-02)); + r += mul(s1_4, M4(-2.454e-01, 3.034e-01, -1.188e-02, -9.649e-03, 1.881e-01, -8.000e-02, -1.499e-01, -1.489e-01, 3.142e-01, -2.846e-01, -8.070e-02, -3.620e-02, 1.075e-01, 2.874e-01, -3.062e-01, 2.726e-01)); + r += mul(s1_5, M4(-3.710e-03, 2.431e-01, 1.270e-01, -1.509e-01, -4.571e-02, -4.919e-03, 3.846e-03, -5.178e-02, -1.865e-02, 1.187e-01, 4.861e-02, -1.342e-01, -1.103e-01, 1.058e-01, -2.100e-01, 2.385e-01)); + r += mul(s1_6, M4(-9.861e-02, 3.686e-01, 1.439e-02, -1.672e-01, -1.270e-01, -7.478e-02, 5.627e-02, -1.598e-01, 2.436e-01, 1.530e-03, -4.498e-02, -1.594e-01, 8.192e-02, 6.402e-02, -1.798e-02, -4.105e-02)); + r += mul(s1_7, M4(1.852e-01, -2.272e-01, 1.581e-03, 6.883e-02, 1.501e-01, -2.629e-01, -5.432e-02, 1.199e-01, 4.607e-02, 1.023e-01, -5.083e-02, -1.080e-01, 1.099e-02, -2.751e-01, -2.276e-02, -2.410e-02)); + r += mul(s1_8, M4(6.250e-02, -1.419e-01, -2.915e-03, 1.734e-01, 1.879e-01, -1.916e-02, -9.208e-02, 5.980e-02, 3.169e-02, 6.434e-04, 2.046e-02, -3.522e-03, 2.520e-01, -8.233e-02, -1.115e-01, 2.729e-01)); + r += mul(s2_0, M4(-4.664e-04, -4.989e-02, -1.104e-01, 1.057e-01, -1.360e-01, 2.355e-01, -4.501e-02, -1.698e-01, -2.486e-02, -9.401e-02, -1.214e-01, -1.000e-01, -4.274e-02, -1.672e-02, -2.114e-01, 2.534e-02)); + r += mul(s2_1, M4(4.812e-02, -9.418e-02, -1.018e-01, -8.638e-03, 3.706e-02, 7.197e-02, -2.987e-02, 3.142e-02, 1.394e-01, -3.675e-01, -7.134e-02, -5.112e-02, -4.693e-02, -2.533e-01, -3.831e-01, 3.512e-01)); + r += mul(s2_2, M4(9.591e-02, -1.676e-01, -9.105e-02, 2.459e-02, -1.919e-01, -1.080e-01, -4.686e-02, -2.874e-01, 1.909e-02, -2.085e-01, -2.761e-01, -2.017e-01, 3.358e-01, -4.021e-01, -3.048e-01, 3.027e-01)); + r += mul(s2_3, M4(-1.724e-01, -7.723e-02, 4.065e-02, 8.664e-02, 9.049e-02, 2.755e-01, -2.836e-01, -3.475e-01, 1.131e-01, -6.232e-03, 5.263e-02, -1.029e-01, -2.092e-01, 9.432e-04, -1.458e-01, 1.312e-01)); + r += mul(s2_4, M4(-2.089e-01, 2.550e-01, 2.688e-01, -1.896e-01, 2.094e-01, -1.791e-01, -8.042e-02, 8.799e-02, -2.168e-02, 2.295e-01, 2.426e-01, -1.890e-02, 5.267e-02, -3.035e-01, -1.247e-01, -4.930e-01)); + r += mul(s2_5, M4(2.220e-02, 1.895e-01, 5.114e-02, -1.285e-01, -3.346e-01, -2.613e-01, 1.481e-01, 1.299e-01, -6.024e-02, -3.446e-01, 5.558e-02, -3.976e-01, 5.876e-02, 2.849e-02, -1.475e-02, -1.995e-01)); + r += mul(s2_6, M4(5.093e-02, 1.313e-02, 1.755e-02, 6.282e-03, -1.416e-01, -1.481e-01, 4.453e-02, -2.304e-01, -2.065e-02, 4.309e-02, 1.393e-01, -6.992e-02, -1.607e-01, -3.332e-01, -4.975e-02, -2.214e-02)); + r += mul(s2_7, M4(2.077e-01, -1.100e-01, 3.094e-02, 3.014e-02, 2.021e-01, -8.420e-02, -7.505e-02, 4.633e-02, -1.732e-01, 1.133e-01, 3.275e-01, 1.928e-01, 1.625e-01, 3.875e-03, -1.657e-01, 1.779e-03)); + r += mul(s2_8, M4(3.694e-02, 5.474e-03, 5.581e-02, -3.022e-02, -1.588e-01, 1.027e-01, 2.136e-01, -2.300e-02, 7.629e-02, 3.632e-02, -4.073e-02, -4.021e-01, 9.291e-03, 2.195e-02, 4.774e-02, 5.479e-02)); + r += mul(s3_0, M4(-1.114e-02, -2.405e-01, -5.870e-02, 2.470e-01, -1.186e-01, 8.453e-03, 1.200e-01, 9.027e-02, -9.437e-03, 1.128e-01, -6.882e-02, -1.012e-01, 3.310e-02, 1.354e-03, -7.321e-02, -6.252e-02)); + r += mul(s3_1, M4(1.713e-01, -1.959e-01, -4.247e-03, -5.163e-02, 1.953e-01, -1.454e-01, -1.412e-01, 3.189e-01, 2.225e-02, -7.657e-02, -1.032e-01, 6.375e-02, 4.638e-02, -2.646e-02, -5.159e-02, -1.400e-01)); + r += mul(s3_2, M4(-9.414e-02, 1.055e-01, 8.043e-02, -1.504e-01, -6.142e-02, -1.294e-01, -1.302e-01, 1.671e-02, 1.782e-02, -5.430e-02, -1.272e-01, 5.470e-02, -9.129e-02, 5.558e-04, -7.446e-02, -1.080e-01)); + r += mul(s3_3, M4(-1.609e-01, 2.412e-01, 1.539e-02, 1.635e-01, 5.731e-02, 4.505e-02, -1.987e-01, 1.142e-01, 7.460e-02, 1.938e-01, -1.055e-01, -1.156e-01, 2.912e-02, -3.555e-02, 4.497e-03, 5.731e-02)); + r += mul(s3_4, M4(-2.135e-02, 1.500e-01, -4.374e-02, 2.777e-02, 1.931e-01, -1.217e-01, -4.481e-02, 2.285e-01, 1.289e-01, -1.594e-02, -9.186e-02, 2.677e-01, -1.608e-01, 6.458e-03, 1.041e-01, 2.212e-02)); + r += mul(s3_5, M4(-4.324e-01, 2.047e-01, 4.299e-02, -2.363e-01, -3.169e-02, 1.719e-03, -1.141e-03, 1.626e-01, -1.440e-01, -4.942e-02, 3.266e-02, 2.561e-02, -7.315e-03, 3.041e-01, 1.774e-01, -5.855e-02)); + r += mul(s3_6, M4(-2.103e-01, -1.573e-01, -1.381e-01, -3.838e-02, -1.851e-04, -8.056e-02, -4.797e-02, -9.107e-02, 5.331e-03, 3.598e-02, -5.975e-02, -1.012e-02, 4.188e-02, 3.601e-02, 6.932e-02, 2.420e-02)); + r += mul(s3_7, M4(1.788e-01, -1.795e-01, -2.201e-01, -1.477e-01, -9.825e-02, 1.962e-01, 8.007e-02, -9.079e-02, -2.744e-01, 7.188e-02, -1.559e-02, -5.103e-02, -7.550e-03, 2.157e-02, 1.767e-02, 6.859e-03)); + r += mul(s3_8, M4(-5.107e-02, -6.800e-02, -9.903e-02, 1.452e-01, 1.844e-02, 4.129e-03, 1.381e-02, 5.966e-02, -4.372e-02, 9.473e-02, -6.700e-02, 8.696e-02, -2.279e-01, 5.274e-02, 8.592e-02, -3.748e-02)); + r += mul(s4_0, M4(2.020e-01, -3.807e-01, -3.319e-01, 1.218e-01, -1.111e-01, 1.247e-01, -1.426e-02, 1.241e-01, 2.500e-01, -2.348e-01, 8.659e-02, 1.143e-01, 7.011e-02, 2.251e-01, 1.403e-01, 3.869e-02)); + r += mul(s4_1, M4(4.766e-01, -1.855e-01, -8.916e-02, -9.894e-02, 5.991e-01, -2.487e-02, 1.151e-01, 3.345e-01, -5.781e-02, 9.442e-02, 7.019e-03, -2.077e-02, 2.310e-03, 1.404e-01, 2.783e-01, -1.494e-01)); + r += mul(s4_2, M4(7.451e-02, -1.099e-01, 3.675e-02, 1.111e-02, 4.328e-02, 1.623e-01, 3.479e-02, 5.629e-03, -2.138e-01, 2.511e-01, -1.213e-01, 1.228e-01, -1.394e-03, 2.696e-01, -8.794e-02, -4.609e-02)); + r += mul(s4_3, M4(9.251e-03, -2.668e-01, -2.042e-01, -7.595e-02, -6.389e-02, 1.573e-02, -6.267e-02, 6.237e-04, 5.438e-02, 3.235e-01, -1.413e-01, -5.880e-02, 2.368e-01, -2.608e-02, -3.962e-01, 2.038e-01)); + r += mul(s4_4, M4(3.697e-01, -2.039e-01, 3.670e-02, 1.657e-01, -2.546e-01, -5.970e-02, 1.157e-01, -8.191e-02, 1.630e-01, -2.037e-01, -1.200e-01, 1.127e-01, 2.758e-02, 1.736e-01, 8.863e-02, -6.960e-03)); + r += mul(s4_5, M4(5.224e-02, -2.290e-01, -1.657e-01, 1.463e-01, 2.281e-01, -2.390e-01, -1.169e-01, 5.749e-02, 3.311e-01, -4.811e-02, -2.389e-01, 8.186e-03, 6.221e-02, -9.661e-02, 2.287e-01, 2.388e-01)); + r += mul(s4_6, M4(1.481e-01, -5.240e-02, -1.941e-02, 8.818e-02, 2.495e-01, -1.412e-02, -2.608e-02, -4.810e-02, -2.315e-01, -8.362e-02, 4.897e-02, -2.118e-02, -1.361e-01, 1.350e-02, -1.880e-01, 1.905e-01)); + r += mul(s4_7, M4(4.878e-01, -1.511e-01, -1.736e-03, -3.363e-02, -5.360e-01, 7.955e-02, 7.788e-02, 9.979e-02, 2.209e-01, 4.757e-02, -1.000e-01, 4.289e-02, 3.118e-01, -6.413e-02, -1.840e-01, 2.950e-01)); + r += mul(s4_8, M4(3.545e-01, 6.789e-02, 1.073e-01, -1.039e-02, 1.130e-02, 2.724e-02, -1.097e-02, -5.575e-02, 2.353e-02, -8.818e-02, -3.032e-02, -1.197e-01, 1.907e-01, -1.782e-01, -2.438e-01, 6.993e-02)); + r += mul(s5_0, M4(-1.077e-01, 1.371e-01, 1.620e-01, -6.391e-02, 1.730e-01, -2.195e-02, 1.333e-01, -5.393e-02, 2.290e-01, -1.654e-01, 6.814e-02, 6.463e-02, 1.763e-02, 7.299e-02, -9.175e-02, 4.784e-03)); + r += mul(s5_1, M4(9.838e-02, -2.407e-02, 1.090e-01, -4.032e-02, -2.245e-01, 9.300e-02, 1.714e-01, 7.662e-02, -1.342e-01, 1.574e-01, -2.104e-01, -7.998e-02, -9.281e-02, -1.426e-02, -3.546e-02, -1.383e-01)); + r += mul(s5_2, M4(9.630e-02, -7.988e-02, -4.258e-03, 4.059e-02, 1.575e-01, -2.816e-02, 9.929e-02, -6.793e-02, -1.562e-01, 5.718e-02, -1.190e-01, -2.971e-02, 2.246e-02, 1.853e-01, 2.036e-01, -1.354e-01)); + r += mul(s5_3, M4(-1.125e-01, 2.767e-02, 1.741e-02, 1.890e-01, -8.985e-02, 5.772e-02, -6.771e-03, 1.740e-01, -1.138e-01, 9.879e-02, 6.454e-02, 2.596e-01, -9.205e-02, 8.011e-02, 2.136e-02, -3.488e-01)); + r += mul(s5_4, M4(-1.583e-01, 1.084e-01, -1.522e-02, 1.867e-01, 1.444e-01, -7.261e-02, 3.910e-02, 1.073e-01, 3.037e-01, 1.705e-02, -6.208e-02, -7.626e-02, 1.010e-01, -6.482e-03, -2.114e-01, -5.472e-02)); + r += mul(s5_5, M4(6.420e-02, -4.978e-02, -1.974e-02, -1.764e-01, -1.035e-01, -1.282e-01, 1.841e-02, 1.211e-01, 1.873e-01, -1.638e-02, 1.286e-01, -4.075e-02, -1.992e-01, 3.898e-02, 7.555e-02, 5.191e-02)); + r += mul(s5_6, M4(4.248e-02, 4.291e-02, -7.928e-02, -5.759e-02, 6.021e-02, -1.298e-01, -4.777e-02, -6.316e-02, -2.009e-01, 3.201e-02, -1.013e-02, 1.052e-01, 3.579e-02, -1.210e-01, -2.331e-01, -1.311e-01)); + r += mul(s5_7, M4(-6.934e-02, 1.753e-01, 3.212e-02, 6.848e-03, 3.728e-02, -6.126e-02, -1.051e-01, 1.405e-01, -3.361e-01, -1.548e-01, -9.565e-02, 1.302e-01, 5.812e-02, -9.976e-02, -3.195e-01, -1.182e-01)); + r += mul(s5_8, M4(-1.784e-02, 1.564e-02, 8.872e-02, 2.014e-02, 1.662e-01, -1.599e-02, 3.252e-02, 5.147e-02, -1.107e-01, -2.154e-01, 3.114e-02, 1.464e-01, -1.589e-01, -5.730e-02, -2.735e-01, -1.417e-02)); + r += mul(s6_0, M4(-2.384e-01, 2.611e-01, -1.268e-01, 1.316e-01, -1.823e-01, 7.801e-02, 9.472e-02, 1.327e-01, -1.290e-02, 6.820e-02, -9.707e-02, 2.936e-02, -1.107e-01, 2.095e-01, 1.146e-01, 5.724e-02)); + r += mul(s6_1, M4(-1.344e-01, 2.408e-01, 1.536e-01, -3.545e-01, 1.624e-01, 1.110e-01, -7.389e-02, -2.104e-02, 5.315e-02, -1.555e-02, -9.936e-03, 1.219e-01, -1.640e-01, 3.448e-02, 7.913e-02, 1.352e-01)); + r += mul(s6_2, M4(1.533e-01, -9.037e-03, -1.185e-01, -2.583e-01, -2.308e-01, 2.075e-01, -5.145e-02, -1.724e-01, -1.219e-01, -2.210e-02, -5.663e-02, -1.331e-02, 2.090e-01, -1.316e-01, 4.100e-02, -2.699e-01)); + r += mul(s6_3, M4(-2.112e-01, -1.713e-02, -1.445e-01, 3.541e-01, 5.172e-02, -6.357e-02, 5.586e-02, -8.194e-02, -1.106e-01, 1.409e-03, -6.749e-02, -6.735e-02, 1.516e-01, -1.863e-03, -1.481e-01, -2.105e-01)); + r += mul(s6_4, M4(7.159e-01, -5.119e-01, -1.683e-01, 2.406e-01, 1.592e-01, 1.474e-01, 2.973e-01, 8.472e-02, -9.213e-02, -1.400e-01, 9.298e-02, -2.664e-01, -5.050e-02, 2.569e-01, 4.411e-02, 2.103e-01)); + r += mul(s6_5, M4(-4.620e-01, -8.028e-02, -2.350e-01, 4.759e-02, -4.889e-02, -1.022e-01, -4.022e-03, 9.208e-02, 2.407e-01, 5.864e-03, 2.830e-02, 6.999e-03, -3.806e-02, 3.520e-01, 1.274e-01, 2.909e-02)); + r += mul(s6_6, M4(-1.176e-01, 1.713e-02, -6.950e-02, 3.435e-01, -2.294e-01, 9.154e-02, -3.631e-02, 1.902e-01, -7.157e-02, -8.642e-02, -7.304e-03, 2.735e-02, 1.880e-01, -9.725e-02, -1.776e-01, 3.204e-02)); + r += mul(s6_7, M4(-1.755e-01, -6.715e-02, -7.087e-02, -2.950e-01, -2.957e-02, -4.643e-02, -2.744e-02, 1.343e-01, 1.597e-01, -7.456e-03, 4.374e-02, 8.110e-02, 4.893e-01, 1.294e-01, 9.328e-03, -1.487e-01)); + r += mul(s6_8, M4(-4.028e-02, -1.625e-02, 2.767e-01, 1.098e-01, -3.776e-02, -6.545e-02, 2.046e-02, 1.530e-02, 3.918e-02, -3.557e-02, -7.773e-02, -9.076e-03, -1.003e-01, -1.724e-02, -1.270e-03, -4.472e-02)); + r += mul(s7_0, M4(-1.844e-01, -4.295e-02, -5.664e-02, 1.616e-01, 6.627e-02, -9.438e-02, -2.259e-01, 1.396e-01, 2.611e-02, 1.834e-01, -2.500e-01, 9.836e-02, -5.051e-02, 3.898e-03, 1.266e-01, 6.027e-03)); + r += mul(s7_1, M4(1.215e-01, 1.592e-01, 9.098e-02, -1.148e-01, 1.719e-01, -6.846e-02, -1.485e-01, 1.138e-01, 1.436e-01, -1.201e-01, 1.874e-02, 3.492e-01, -1.054e-01, -8.116e-02, 6.488e-02, 1.260e-01)); + r += mul(s7_2, M4(-1.123e-01, 1.626e-01, -7.825e-02, -1.088e-01, 1.374e-01, -8.742e-02, -1.126e-01, -7.326e-02, -1.524e-02, -1.172e-01, -8.262e-02, 1.151e-01, -2.039e-02, -7.255e-03, 2.493e-01, -1.163e-01)); + r += mul(s7_3, M4(-1.115e-01, 3.692e-02, 2.518e-02, 2.190e-01, -8.416e-02, -1.554e-01, 5.779e-02, -3.729e-01, -4.536e-02, -2.621e-01, -1.562e-01, -1.263e-01, -4.522e-02, -3.959e-03, 8.233e-02, -3.101e-02)); + r += mul(s7_4, M4(2.293e-01, -9.952e-02, -1.936e-01, 3.692e-02, 1.759e-02, -2.504e-01, 1.199e-01, 1.134e-01, 1.294e-01, 9.082e-03, 8.671e-02, -2.547e-01, -2.408e-02, 3.526e-02, 1.867e-01, 2.024e-01)); + r += mul(s7_5, M4(1.211e-01, -3.754e-02, -1.250e-01, 9.511e-02, 1.084e-01, -9.184e-02, 3.417e-02, -5.457e-03, 2.062e-01, 1.586e-01, 2.161e-03, 8.496e-02, 2.035e-01, 1.617e-01, 1.828e-01, -3.025e-02)); + r += mul(s7_6, M4(-1.154e-01, 1.107e-01, -8.670e-02, 5.546e-02, 1.741e-01, -2.064e-02, 3.745e-02, -2.159e-02, -7.298e-02, -7.743e-02, -4.781e-02, -1.333e-02, -4.613e-02, 3.182e-02, -4.090e-02, 7.631e-02)); + r += mul(s7_7, M4(-5.393e-02, -1.284e-01, 3.319e-02, -8.761e-02, 2.782e-01, 8.453e-02, -1.033e-01, 3.022e-02, 4.400e-02, 2.091e-01, 8.963e-02, -5.267e-02, -3.514e-02, 4.259e-01, 2.436e-01, -3.799e-02)); + r += mul(s7_8, M4(1.071e-01, -1.190e-01, 1.713e-01, 7.520e-02, -1.375e-03, -7.968e-03, -5.822e-02, -1.101e-01, -6.349e-03, -1.021e-01, -3.258e-02, -1.664e-03, -3.207e-02, -3.003e-02, 8.426e-02, 6.163e-02)); + r += V4(-1.253e-02, -1.539e-02, -3.069e-03, 1.641e-03); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.430e-02, -2.502e-03, 6.129e-02, 6.356e-02, -6.155e-02, -7.575e-02, -1.735e-01, 2.518e-02, 6.293e-02, -8.199e-03, 3.854e-02, -1.450e-02, 1.340e-01, 3.969e-02, -1.542e-01, 1.633e-01)); + r += mul(s0_1, M4(2.588e-02, 1.567e-01, 6.751e-02, 6.298e-02, -4.141e-02, 5.382e-02, -9.239e-02, -1.018e-01, -8.423e-02, -5.073e-02, 1.521e-01, -8.445e-03, -2.287e-01, -1.461e-01, 1.232e-01, -2.732e-01)); + r += mul(s0_2, M4(-3.856e-02, 6.064e-02, 9.084e-02, 3.555e-02, -1.407e-02, -1.076e-01, 1.188e-01, -5.670e-02, -8.707e-02, 5.879e-02, 5.196e-02, 2.004e-02, -2.128e-02, 7.286e-02, 1.653e-01, 7.814e-02)); + r += mul(s0_3, M4(-5.635e-02, 6.645e-02, 2.274e-01, -3.328e-03, 6.878e-03, -1.448e-02, 4.074e-02, -2.986e-03, 7.733e-02, 9.767e-02, -1.488e-01, -3.001e-02, 2.552e-01, -4.813e-01, 2.380e-01, 3.349e-01)); + r += mul(s0_4, M4(-9.538e-02, -9.769e-02, -1.525e-01, 4.189e-02, -6.814e-02, -8.984e-02, 3.600e-02, 1.161e-01, -9.361e-04, 3.397e-02, -5.772e-02, -1.352e-01, 1.652e-01, 2.967e-01, -3.507e-01, -3.361e-01)); + r += mul(s0_5, M4(-2.982e-02, -9.992e-02, -8.754e-02, -5.282e-02, -3.020e-02, -7.405e-02, 1.307e-02, 2.055e-01, 7.412e-02, 3.693e-02, 1.558e-01, -2.303e-02, -3.333e-01, 1.319e-01, 8.950e-03, -1.264e-01)); + r += mul(s0_6, M4(-4.504e-02, 1.119e-02, -1.500e-01, 3.713e-02, 1.535e-01, 8.320e-02, 6.245e-02, -4.285e-02, 5.770e-02, -1.590e-02, -7.777e-02, -4.296e-02, -2.475e-01, -2.060e-01, -3.452e-02, -5.126e-03)); + r += mul(s0_7, M4(-1.587e-01, 5.268e-02, -5.849e-02, 9.573e-03, 5.701e-02, 9.641e-02, -9.526e-02, 7.030e-02, -1.654e-02, -1.078e-01, 8.349e-02, 4.318e-03, 5.628e-03, 1.483e-01, 2.488e-01, -5.701e-01)); + r += mul(s0_8, M4(1.630e-02, -4.883e-02, -3.023e-02, 9.988e-03, -5.532e-02, 9.620e-03, -1.785e-02, 2.546e-03, -9.849e-03, -1.123e-01, -5.571e-02, -6.771e-02, -1.957e-01, -2.368e-01, 1.368e-01, 1.634e-01)); + r += mul(s1_0, M4(-1.197e-02, -7.756e-02, -1.080e-01, 4.262e-02, 3.548e-02, -9.235e-02, -3.710e-02, -5.714e-02, -1.188e-03, -2.066e-02, -5.759e-02, 2.447e-01, 2.317e-02, 2.063e-02, -4.138e-02, 9.497e-02)); + r += mul(s1_1, M4(1.251e-01, 1.449e-01, 2.212e-02, 6.304e-02, 1.226e-02, 5.597e-02, -3.858e-01, -1.712e-01, 9.556e-02, 1.928e-01, 1.014e-01, -1.521e-01, -7.535e-02, 3.528e-02, 1.629e-02, 2.299e-02)); + r += mul(s1_2, M4(3.850e-02, 8.374e-02, -7.629e-02, 1.294e-01, -5.879e-03, 2.033e-02, -1.193e-01, -9.012e-02, -3.272e-02, -6.056e-02, -1.866e-01, -4.290e-02, -1.694e-02, 5.956e-03, 9.157e-03, -3.825e-02)); + r += mul(s1_3, M4(1.604e-01, 4.090e-02, 7.334e-02, 2.415e-02, 3.192e-01, -2.538e-02, 2.221e-01, -2.832e-01, 7.005e-02, -1.376e-01, -1.501e-01, 1.552e-01, -5.752e-02, -1.340e-02, 3.240e-02, 5.229e-02)); + r += mul(s1_4, M4(-5.953e-02, -1.548e-01, 1.631e-01, 2.119e-02, 7.934e-02, 2.167e-01, 1.857e-01, -1.035e-01, 1.381e-01, 3.463e-02, -2.884e-01, 4.351e-01, 1.858e-01, -8.326e-02, -2.519e-02, -9.710e-02)); + r += mul(s1_5, M4(6.947e-02, -8.995e-02, 2.487e-01, -2.135e-01, 3.198e-03, -9.266e-02, -1.341e-01, 3.048e-02, 8.311e-02, -2.381e-01, 2.681e-02, 5.102e-02, 3.620e-02, 2.993e-02, 1.298e-02, -6.167e-02)); + r += mul(s1_6, M4(3.554e-01, 4.870e-02, -8.663e-02, 6.653e-02, 1.344e-01, -5.864e-02, 9.889e-02, -2.758e-01, 1.533e-01, 1.133e-01, -1.640e-01, -5.022e-02, -6.187e-02, -1.555e-03, -1.428e-02, -3.003e-02)); + r += mul(s1_7, M4(-1.435e-01, 3.390e-02, -3.164e-01, 2.373e-01, 2.667e-01, -5.951e-02, -5.348e-02, -4.216e-02, 1.783e-01, 4.678e-01, 1.639e-01, -9.901e-02, -1.401e-01, -5.017e-03, -8.770e-02, 2.325e-02)); + r += mul(s1_8, M4(-7.280e-02, -1.682e-01, -1.215e-02, 8.937e-02, -3.245e-02, -8.058e-02, 5.462e-02, -1.603e-01, 4.640e-02, -1.189e-01, -2.991e-03, -1.409e-01, 4.921e-02, -6.372e-02, 4.113e-02, -8.392e-03)); + r += mul(s2_0, M4(2.396e-02, 4.426e-02, 7.507e-02, -2.888e-03, 1.127e-02, -1.107e-03, -1.501e-01, -4.166e-02, 6.494e-02, 4.537e-02, 1.415e-01, 2.229e-02, 5.914e-02, -3.564e-04, 2.547e-02, -4.783e-02)); + r += mul(s2_1, M4(4.173e-02, -2.709e-02, -8.981e-03, -6.010e-02, 2.876e-02, 1.221e-02, -8.832e-03, -3.574e-02, 1.257e-01, -3.440e-02, 2.348e-01, 2.222e-01, -7.485e-04, -3.096e-02, 7.948e-02, -4.845e-02)); + r += mul(s2_2, M4(-9.122e-03, 1.967e-02, 7.223e-02, 5.584e-03, -1.126e-01, 9.523e-02, -1.517e-01, -3.730e-02, 6.605e-02, 1.327e-01, 6.751e-02, -5.396e-02, 5.221e-02, 5.649e-02, -2.484e-01, -4.794e-02)); + r += mul(s2_3, M4(-6.743e-02, -5.392e-02, -3.705e-02, 1.631e-02, 5.557e-03, -6.563e-03, 4.973e-03, 1.547e-01, -5.396e-02, -9.422e-02, -1.079e-01, 1.227e-01, 6.776e-02, 9.316e-02, -3.001e-02, 7.081e-02)); + r += mul(s2_4, M4(-5.178e-02, 2.642e-02, 1.853e-01, -6.710e-03, -1.842e-02, 6.398e-02, -1.168e-02, 1.464e-01, -8.433e-02, -1.485e-01, -1.797e-02, -6.697e-03, -3.999e-02, 1.180e-01, -8.230e-02, 7.239e-03)); + r += mul(s2_5, M4(-3.445e-02, 1.215e-01, -4.651e-02, 1.042e-01, 2.504e-02, 1.252e-01, 4.106e-02, -2.745e-02, 3.966e-02, -8.817e-02, -2.228e-02, 1.102e-01, -8.929e-02, 3.381e-03, -6.434e-02, 7.325e-03)); + r += mul(s2_6, M4(-1.106e-02, -4.253e-02, 1.019e-01, 4.586e-04, 7.738e-02, 8.312e-02, 5.753e-02, 4.607e-02, -2.998e-01, -3.446e-03, -4.848e-03, 1.264e-01, -3.766e-02, -3.247e-02, -3.531e-02, -1.165e-01)); + r += mul(s2_7, M4(-8.607e-02, -3.008e-01, -1.574e-01, -2.179e-01, -4.569e-02, 1.867e-02, 1.438e-02, -8.711e-02, -1.629e-02, -1.113e-01, 7.664e-02, 9.039e-03, 1.211e-01, -4.794e-02, 3.923e-02, 2.456e-01)); + r += mul(s2_8, M4(5.356e-02, 2.222e-02, -3.259e-02, 3.216e-02, 6.819e-03, -4.598e-02, -9.678e-02, 5.281e-02, -3.978e-02, 5.241e-02, 1.722e-01, 3.001e-02, 9.125e-02, 7.772e-02, 8.599e-02, -8.592e-02)); + r += mul(s3_0, M4(3.115e-02, 1.028e-01, -6.195e-02, 2.367e-02, -1.322e-01, -7.176e-02, 7.053e-02, 1.276e-01, 4.564e-02, 1.063e-02, 8.671e-02, 1.843e-02, 3.702e-02, -7.545e-02, 1.198e-01, -1.720e-01)); + r += mul(s3_1, M4(2.552e-02, -6.462e-02, -7.313e-02, 4.329e-02, 3.530e-02, -3.807e-02, -1.673e-02, -1.561e-01, -1.243e-01, -1.062e-01, 1.733e-01, 1.645e-01, 3.684e-02, 1.227e-01, -7.788e-02, -7.140e-02)); + r += mul(s3_2, M4(1.483e-02, -1.223e-02, -5.747e-02, -7.337e-02, 2.469e-02, -1.636e-01, -1.594e-01, 2.546e-01, -6.015e-02, 7.157e-02, 1.841e-01, -1.612e-02, 7.649e-02, -4.488e-02, -9.112e-02, -5.642e-03)); + r += mul(s3_3, M4(6.861e-02, 1.171e-02, -2.535e-01, 1.983e-01, -1.498e-01, -4.660e-02, -6.927e-02, 8.195e-02, 3.378e-02, 1.077e-01, -2.126e-01, -1.488e-01, 9.868e-02, 6.725e-02, -5.056e-02, -9.546e-02)); + r += mul(s3_4, M4(-8.023e-02, -1.265e-01, 3.597e-02, -2.229e-02, -1.194e-02, -2.538e-01, -7.756e-03, -3.325e-02, -1.065e-01, -1.217e-02, -1.758e-01, -8.267e-03, -5.941e-03, 2.422e-01, 2.235e-01, -8.565e-02)); + r += mul(s3_5, M4(1.598e-01, -1.432e-01, 1.035e-01, 1.283e-02, 4.021e-02, 8.888e-03, 2.267e-01, 2.556e-02, -1.227e-01, 1.103e-01, 1.254e-01, 5.504e-02, -8.774e-02, 7.539e-02, 2.931e-02, -1.855e-02)); + r += mul(s3_6, M4(5.906e-02, -4.077e-02, 1.489e-01, 1.314e-01, -3.541e-01, 1.956e-03, 1.613e-02, -2.846e-02, 1.842e-01, 2.073e-02, 1.122e-02, 2.773e-02, -1.092e-01, 2.624e-02, -1.591e-02, -1.047e-01)); + r += mul(s3_7, M4(3.114e-01, -4.035e-02, -1.719e-01, -1.601e-02, -5.080e-01, -3.678e-02, -2.460e-02, 9.596e-02, 1.603e-01, 2.930e-03, 7.151e-03, -1.612e-01, -9.080e-02, -1.158e-01, -3.950e-02, 7.667e-02)); + r += mul(s3_8, M4(4.895e-04, 7.552e-02, -1.776e-01, 1.818e-01, -7.535e-02, -7.622e-02, -1.121e-01, 5.100e-03, -2.094e-02, 4.762e-02, 5.506e-02, 8.806e-02, 7.822e-03, 4.282e-02, -5.181e-02, -9.044e-03)); + r += mul(s4_0, M4(2.090e-02, 5.948e-02, -1.547e-01, 3.702e-02, -8.610e-04, -3.230e-02, -1.227e-01, 3.420e-02, -4.326e-02, -6.442e-02, 2.825e-01, 5.421e-03, 2.814e-02, -8.593e-03, -8.132e-02, -3.103e-03)); + r += mul(s4_1, M4(-2.322e-01, -3.469e-01, -2.500e-01, -3.387e-02, -6.166e-02, -3.307e-02, -1.402e-01, -1.919e-02, 1.674e-01, -8.884e-02, 1.193e-01, 1.444e-01, -2.201e-02, -1.885e-01, -4.131e-01, 2.004e-01)); + r += mul(s4_2, M4(-2.260e-01, 4.543e-02, -2.431e-01, 1.101e-01, -5.023e-02, 8.514e-02, -2.432e-02, 8.143e-02, -8.401e-02, -1.176e-01, -1.595e-01, -1.600e-01, -1.187e-01, -4.430e-02, 1.121e-01, -3.468e-02)); + r += mul(s4_3, M4(-5.215e-03, -1.233e-01, 7.827e-02, -5.274e-02, 1.962e-02, -4.484e-02, 5.968e-02, 1.161e-01, -8.261e-02, 8.903e-02, 1.394e-03, -2.465e-02, 3.084e-02, 1.483e-02, -2.100e-01, -1.218e-01)); + r += mul(s4_4, M4(1.997e-01, -6.270e-02, 1.550e-01, -1.618e-02, -3.016e-02, 3.239e-01, 8.539e-02, -7.489e-02, 2.133e-02, -3.946e-01, 1.209e-01, 7.094e-01, -9.293e-03, 1.545e-02, 1.811e-01, 8.852e-02)); + r += mul(s4_5, M4(-1.151e-01, 1.454e-01, 1.800e-01, 4.234e-02, 1.568e-02, -2.295e-01, -1.812e-01, 8.562e-02, 1.648e-01, -7.203e-02, -1.982e-01, -1.259e-01, 5.267e-02, -2.894e-02, -7.699e-02, -1.009e-01)); + r += mul(s4_6, M4(-1.691e-02, -1.591e-02, 6.519e-02, -1.106e-01, 3.055e-02, -7.047e-02, -1.120e-01, 5.428e-02, 2.806e-02, -7.755e-02, -1.886e-01, 1.087e-01, 1.662e-01, 2.564e-02, 7.856e-02, 5.331e-02)); + r += mul(s4_7, M4(1.820e-01, 4.350e-02, 5.261e-02, 1.101e-01, 1.758e-01, -2.128e-01, -8.567e-02, -1.119e-01, -5.122e-02, -1.566e-01, -1.069e-01, 8.537e-02, 1.297e-01, 6.946e-02, 8.958e-02, 4.395e-02)); + r += mul(s4_8, M4(-5.285e-03, 4.673e-02, -4.109e-02, 8.651e-02, 1.925e-02, 5.814e-02, -1.975e-02, 1.046e-01, 7.454e-02, -1.315e-01, 1.961e-02, -2.057e-01, 6.031e-03, 8.229e-02, -1.340e-01, -7.076e-02)); + r += mul(s5_0, M4(8.428e-02, -6.466e-02, -1.203e-01, 9.652e-02, -9.675e-03, 9.028e-02, 9.799e-03, -4.970e-03, 7.256e-03, -1.621e-02, 1.486e-03, 2.717e-02, 1.641e-02, 1.739e-02, -5.012e-02, 8.156e-02)); + r += mul(s5_1, M4(-9.258e-02, -2.166e-01, 1.321e-01, 5.894e-02, 4.417e-02, -3.021e-02, -1.223e-01, 1.535e-02, -3.462e-02, 2.490e-02, 1.146e-01, -1.133e-01, -3.950e-02, -7.089e-02, 5.085e-03, 1.121e-01)); + r += mul(s5_2, M4(-5.568e-03, 3.933e-02, 1.782e-01, -3.796e-02, -4.307e-02, -2.704e-02, 1.217e-01, 2.829e-02, 3.353e-02, -4.298e-02, 1.166e-01, -5.746e-03, -4.542e-02, -6.204e-02, -6.279e-02, 9.747e-02)); + r += mul(s5_3, M4(6.976e-02, -8.809e-02, 6.384e-02, 6.630e-02, -5.726e-02, 5.241e-02, -2.371e-04, -4.323e-02, 5.460e-02, 2.087e-02, -7.948e-02, 3.768e-03, 3.190e-02, 1.526e-02, 6.161e-02, -1.331e-01)); + r += mul(s5_4, M4(2.771e-02, -2.025e-01, -9.295e-02, 1.869e-03, -6.839e-03, -8.896e-02, 2.431e-01, 1.043e-01, 2.362e-02, -3.601e-02, -7.062e-02, -2.395e-02, -5.630e-02, 1.035e-01, -1.120e-01, -4.269e-02)); + r += mul(s5_5, M4(-6.221e-02, 5.515e-02, -6.945e-02, -2.008e-02, -5.769e-02, -2.422e-01, -4.253e-02, -5.424e-02, 1.082e-01, -3.192e-01, 2.441e-01, -4.830e-02, 1.521e-01, -8.100e-02, 6.139e-02, 2.581e-02)); + r += mul(s5_6, M4(6.218e-02, 1.707e-02, 5.381e-02, 4.556e-02, -1.143e-01, -3.433e-02, 1.830e-02, 7.153e-03, -2.423e-03, -2.425e-02, -4.516e-02, 2.008e-02, 7.175e-02, 2.979e-02, 8.772e-02, 2.496e-02)); + r += mul(s5_7, M4(1.569e-01, 8.843e-02, 8.214e-02, -3.816e-03, 1.612e-02, -1.714e-01, 6.246e-02, -6.355e-02, -1.388e-01, 6.606e-02, -1.192e-01, 5.246e-02, 1.004e-01, 2.411e-01, 1.450e-01, -2.187e-03)); + r += mul(s5_8, M4(3.150e-02, 1.450e-01, -7.715e-02, 4.629e-02, 1.862e-02, 2.719e-02, 8.752e-02, -1.093e-01, 5.075e-02, -6.477e-02, -5.522e-02, -1.557e-02, -1.725e-02, 7.111e-02, -5.877e-02, 4.227e-02)); + r += mul(s6_0, M4(5.059e-02, 3.600e-02, -1.045e-01, 7.332e-03, 4.851e-02, -6.121e-02, -1.669e-01, -2.964e-02, -5.797e-03, 3.425e-02, -4.846e-02, 2.897e-02, -6.363e-02, -4.005e-02, -1.674e-01, -6.592e-02)); + r += mul(s6_1, M4(-7.886e-02, -3.454e-02, -4.988e-02, 7.899e-02, -1.289e-01, 5.159e-02, -1.258e-02, 1.658e-01, 5.253e-02, -1.218e-01, 9.302e-02, 1.078e-03, 2.690e-02, 6.063e-02, 3.983e-02, -6.998e-02)); + r += mul(s6_2, M4(4.389e-02, -2.511e-02, 1.766e-02, 3.051e-02, -2.096e-01, -1.513e-02, -2.819e-01, 1.285e-02, 2.957e-02, 3.381e-02, -3.444e-02, 4.307e-02, -1.404e-02, -6.485e-02, 2.216e-01, -2.653e-01)); + r += mul(s6_3, M4(1.420e-01, -3.867e-02, 1.017e-02, 3.951e-02, 3.024e-01, -6.194e-03, -5.191e-02, -9.082e-02, -3.109e-02, 6.490e-02, 8.594e-02, -1.713e-01, 9.532e-02, 5.568e-02, 2.173e-01, 8.075e-02)); + r += mul(s6_4, M4(2.442e-02, -1.022e-01, 6.919e-02, -2.534e-02, 1.513e-01, -2.728e-02, 2.169e-01, -1.866e-01, 3.269e-02, 5.941e-02, 1.968e-01, -1.196e-01, 6.886e-02, 3.435e-01, 9.940e-02, 1.840e-01)); + r += mul(s6_5, M4(-1.406e-02, -4.546e-02, 1.050e-01, -3.953e-02, 2.146e-01, -3.592e-02, 8.963e-02, -2.727e-01, 2.125e-02, -1.543e-01, 8.049e-02, 4.861e-02, -4.433e-02, 3.279e-01, -6.396e-02, 2.270e-01)); + r += mul(s6_6, M4(1.996e-02, 5.264e-02, -6.494e-03, 4.879e-02, -2.092e-02, -1.126e-01, -2.927e-01, -7.429e-02, -1.299e-01, -6.691e-02, 3.593e-02, 6.911e-02, 2.352e-02, 2.037e-02, 1.791e-03, 7.659e-02)); + r += mul(s6_7, M4(3.134e-02, 7.911e-04, -4.691e-02, 6.814e-02, -1.124e-01, -3.115e-01, -3.733e-01, 3.364e-01, -4.880e-02, 1.425e-01, -8.420e-02, -1.705e-01, 1.497e-01, 4.582e-02, 2.033e-02, -1.566e-01)); + r += mul(s6_8, M4(-2.332e-02, 1.154e-01, -3.068e-02, 1.733e-02, 7.944e-04, -1.321e-01, -1.958e-01, 9.601e-02, -1.902e-02, -1.278e-01, 2.439e-02, -9.167e-03, 2.557e-02, 2.074e-01, -6.324e-03, -2.506e-01)); + r += mul(s7_0, M4(-9.704e-02, 8.497e-02, 4.279e-01, -2.315e-01, 6.230e-02, -1.185e-02, 5.086e-02, -5.616e-02, -4.671e-02, -5.134e-02, 1.671e-01, -5.957e-02, 1.091e-02, -3.777e-02, -9.111e-03, -7.947e-02)); + r += mul(s7_1, M4(1.325e-01, 1.757e-01, 1.235e-01, 2.153e-01, -4.627e-02, -6.814e-02, 1.869e-01, 1.260e-02, 1.781e-01, 3.753e-02, -8.738e-03, -5.145e-02, -9.106e-02, -1.544e-02, 1.918e-01, 3.458e-02)); + r += mul(s7_2, M4(-1.953e-01, 2.622e-01, 2.068e-01, -1.512e-01, -2.271e-03, -2.426e-02, 4.496e-03, 2.948e-02, 9.174e-02, 5.540e-02, -3.832e-01, -3.503e-03, -1.018e-01, 4.478e-02, -2.541e-02, -2.804e-02)); + r += mul(s7_3, M4(-7.293e-02, 1.416e-01, -4.117e-01, -4.729e-02, 7.547e-02, 7.161e-02, -1.651e-01, -2.265e-02, 4.845e-02, 2.563e-02, -3.190e-02, -8.842e-02, 6.109e-02, -1.210e-02, 1.880e-01, -7.380e-02)); + r += mul(s7_4, M4(-9.428e-02, -1.562e-01, -9.766e-02, 1.031e-01, 2.406e-03, 6.987e-02, -1.348e-01, 9.654e-02, -1.417e-02, 1.787e-01, -1.476e-01, -6.345e-02, 2.099e-02, 7.796e-03, -2.826e-02, -1.217e-01)); + r += mul(s7_5, M4(-1.551e-01, 1.827e-01, -9.766e-02, -2.134e-01, 4.146e-02, 9.782e-02, -1.664e-01, 1.176e-01, -1.567e-01, -1.144e-01, 1.927e-01, -2.515e-02, 1.365e-02, 2.013e-01, -5.611e-02, 1.228e-01)); + r += mul(s7_6, M4(-9.818e-02, 1.221e-01, -1.325e-01, 1.431e-01, 6.329e-02, 2.043e-02, 3.903e-02, 3.868e-02, -1.627e-01, -1.269e-01, 8.569e-03, -3.293e-02, -2.025e-01, -7.457e-02, -5.839e-02, 1.231e-02)); + r += mul(s7_7, M4(-6.773e-02, 3.339e-01, -2.705e-02, -2.185e-01, -2.356e-03, 7.593e-02, 6.016e-02, 8.892e-02, 3.956e-01, -1.463e-01, 3.654e-01, -1.317e-01, -1.505e-01, -7.638e-02, -5.450e-03, -1.264e-01)); + r += mul(s7_8, M4(-1.649e-01, -1.729e-02, -7.593e-02, 8.465e-02, -3.762e-02, -4.065e-02, 9.005e-02, -1.389e-02, -3.085e-02, 1.258e-02, -1.305e-01, 7.684e-02, -3.777e-02, -8.700e-02, 6.696e-03, -4.232e-02)); + r += V4(1.502e-02, 2.141e-03, -2.058e-02, 1.180e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.627e-02, -3.706e-02, 2.273e-03, 1.536e-02, 5.301e-03, -1.796e-02, 1.585e-02, 5.789e-02, 3.824e-02, -2.587e-02, -6.336e-02, 2.388e-02, -1.271e-01, -7.856e-02, 1.474e-01, -2.274e-01)); + r += mul(s0_1, M4(4.593e-02, -1.174e-01, 9.351e-02, -8.614e-02, 2.725e-02, -6.049e-02, 2.536e-03, 2.922e-02, -3.083e-02, -1.015e-02, -5.483e-02, 4.235e-02, 1.300e-01, -1.464e-02, 1.705e-01, 1.818e-01)); + r += mul(s0_2, M4(2.636e-02, -6.043e-02, -9.670e-02, 2.721e-02, -8.521e-02, 3.564e-02, -4.830e-03, 1.304e-02, -5.090e-02, -2.392e-02, 6.243e-03, -2.102e-02, -2.274e-01, -1.684e-01, -1.534e-01, 2.337e-01)); + r += mul(s0_3, M4(4.204e-02, -1.005e-01, 6.872e-02, 4.869e-02, -6.030e-02, 7.907e-02, -6.551e-02, 2.803e-02, -7.273e-02, 1.702e-02, -8.917e-02, 8.109e-02, 9.057e-02, 2.228e-02, -4.067e-01, -2.123e-01)); + r += mul(s0_4, M4(1.333e-01, 4.912e-02, 7.565e-02, 3.742e-03, 3.059e-02, -3.174e-02, -6.240e-02, 5.730e-02, -7.866e-02, -1.045e-02, 8.373e-02, 5.979e-02, -4.503e-01, 2.687e-01, -7.459e-02, 4.265e-01)); + r += mul(s0_5, M4(-5.302e-02, 6.140e-02, 7.408e-03, 8.252e-02, -1.031e-02, 1.042e-01, -3.733e-02, -3.354e-02, 7.338e-03, -5.500e-02, 1.031e-01, 2.960e-02, 3.828e-01, -2.754e-02, -2.871e-01, 1.416e-01)); + r += mul(s0_6, M4(7.235e-02, -9.262e-02, -4.357e-02, 8.340e-02, -3.616e-02, 1.498e-02, 7.191e-02, -7.057e-02, 2.108e-02, -4.524e-03, -2.951e-02, -4.385e-02, 1.128e-01, -3.883e-02, -2.984e-01, 1.454e-01)); + r += mul(s0_7, M4(4.685e-02, 3.870e-03, -9.658e-02, -4.813e-02, 1.318e-02, -5.383e-02, -6.818e-02, 1.219e-02, -6.892e-02, -1.896e-02, -1.107e-01, 5.092e-02, -8.280e-02, 9.133e-02, 1.534e-01, 1.031e-01)); + r += mul(s0_8, M4(-7.269e-03, 1.340e-02, 6.275e-02, 7.108e-03, 6.567e-02, 2.403e-03, -3.498e-02, -1.143e-02, -6.371e-02, 9.872e-02, -1.041e-01, -2.016e-02, -3.668e-01, 2.973e-01, -1.393e-01, 1.399e-01)); + r += mul(s1_0, M4(6.435e-02, 1.008e-02, -3.227e-02, 1.622e-01, 3.116e-02, 1.173e-01, 4.471e-02, 4.926e-02, 1.794e-01, 3.143e-02, -1.059e-01, 6.966e-02, -4.122e-02, -1.442e-02, 5.200e-02, 1.345e-02)); + r += mul(s1_1, M4(8.526e-02, -1.183e-01, 5.110e-02, -4.240e-02, 3.903e-02, 4.467e-02, -7.250e-02, -1.168e-01, 2.552e-01, -1.188e-02, -8.694e-02, 1.658e-02, 1.522e-01, 2.154e-02, -5.800e-02, -9.179e-02)); + r += mul(s1_2, M4(6.380e-02, -6.909e-02, -2.660e-02, 4.635e-02, 1.963e-02, 8.193e-03, 1.138e-02, 5.036e-02, 1.687e-02, 9.006e-02, -2.169e-02, -3.294e-02, -4.215e-02, -6.957e-02, -4.407e-02, 1.604e-02)); + r += mul(s1_3, M4(8.150e-02, -3.895e-02, 1.754e-01, 4.230e-02, -1.249e-01, 2.390e-01, 5.074e-02, -3.046e-02, 2.020e-02, 2.077e-01, 1.416e-01, -1.547e-01, 5.266e-02, -1.331e-01, 4.703e-02, 5.557e-03)); + r += mul(s1_4, M4(3.522e-01, -1.193e-01, -9.524e-02, 1.514e-01, 9.165e-02, 5.100e-02, 1.074e-01, -1.809e-01, 1.972e-01, -2.142e-02, 3.038e-01, -1.775e-01, -7.309e-02, 1.762e-01, -2.303e-02, 5.562e-02)); + r += mul(s1_5, M4(-1.424e-01, 1.279e-01, -4.616e-02, 6.096e-02, 1.339e-01, 6.948e-02, 8.022e-02, 2.968e-02, 5.865e-02, 4.638e-02, -7.140e-02, 3.687e-02, -7.514e-02, -5.826e-02, 1.623e-01, 8.376e-03)); + r += mul(s1_6, M4(1.101e-01, -3.896e-02, 1.135e-01, 1.991e-02, -3.304e-02, 1.337e-01, 2.981e-01, -4.507e-02, -1.599e-02, 8.470e-02, -1.310e-01, -7.373e-02, 3.429e-02, -6.702e-02, -1.505e-03, 9.035e-02)); + r += mul(s1_7, M4(1.812e-01, -7.926e-02, 2.959e-02, 7.337e-02, -1.275e-01, -7.013e-02, 1.419e-01, 3.057e-02, -1.709e-02, -1.890e-01, 1.521e-01, -2.855e-02, 8.112e-02, 1.838e-03, -5.860e-02, -4.827e-02)); + r += mul(s1_8, M4(-1.008e-01, 1.633e-01, -2.021e-01, -2.959e-03, 5.128e-02, 1.395e-01, -1.652e-02, 3.358e-02, 9.271e-02, -6.090e-02, -1.068e-01, 1.570e-02, 2.471e-02, 5.604e-02, -3.309e-02, -8.012e-03)); + r += mul(s2_0, M4(-7.188e-02, 2.960e-02, 6.470e-02, -4.708e-02, 5.598e-02, 5.825e-02, 4.624e-02, 3.552e-02, 5.060e-02, -1.837e-01, -3.757e-03, 9.105e-02, -5.707e-02, -1.952e-02, -1.222e-02, 6.376e-02)); + r += mul(s2_1, M4(8.038e-02, -1.987e-02, 1.788e-02, -4.915e-03, -1.764e-02, -1.092e-02, -6.952e-02, 4.656e-02, -3.170e-01, -5.994e-02, 7.551e-02, -2.007e-02, 8.198e-02, 4.931e-02, 2.651e-03, 3.904e-02)); + r += mul(s2_2, M4(5.293e-02, 3.933e-02, -7.201e-02, -4.464e-03, 5.815e-02, 1.212e-02, -6.521e-02, -2.304e-02, 1.165e-01, -7.738e-02, 1.932e-01, 5.646e-02, -9.020e-02, 1.666e-02, 1.118e-01, -8.715e-02)); + r += mul(s2_3, M4(3.826e-02, 6.648e-02, -7.553e-02, -4.523e-04, -7.340e-02, -7.885e-02, 8.911e-02, 2.264e-02, 1.029e-01, -1.053e-01, -8.941e-02, 4.054e-01, -2.498e-03, -4.982e-02, 6.183e-02, 1.528e-01)); + r += mul(s2_4, M4(-1.312e-01, -1.318e-01, 1.237e-01, -1.090e-01, 1.089e-01, 2.118e-02, -1.616e-01, -1.268e-01, 9.568e-03, -5.246e-02, -6.390e-01, -9.805e-02, 1.340e-01, -1.311e-01, -4.989e-02, -2.292e-01)); + r += mul(s2_5, M4(1.449e-01, -4.417e-02, 1.359e-01, -2.568e-02, 7.714e-02, -7.431e-02, 1.489e-01, 1.559e-02, -1.086e-01, -1.940e-02, -1.070e-01, 1.865e-03, 1.768e-01, -1.110e-01, -9.865e-02, -1.790e-01)); + r += mul(s2_6, M4(-6.517e-02, 2.601e-02, -1.632e-02, 8.751e-02, -9.319e-03, -3.826e-02, 1.682e-01, 1.859e-02, 6.412e-02, -2.240e-02, -7.695e-02, 8.414e-02, 9.562e-02, -1.344e-02, 3.524e-02, -3.504e-02)); + r += mul(s2_7, M4(1.413e-01, 2.495e-02, -2.045e-01, -2.954e-03, 8.908e-03, -6.806e-02, -2.154e-02, 2.458e-02, -9.071e-02, 6.857e-02, -1.723e-01, -2.654e-02, -5.486e-02, 1.018e-01, 3.252e-01, -8.761e-02)); + r += mul(s2_8, M4(-1.249e-02, -2.934e-03, -8.129e-05, -8.104e-02, 4.956e-02, 7.004e-02, 1.413e-02, 3.958e-03, -3.843e-02, 1.097e-02, -1.374e-02, 5.251e-02, 5.842e-03, -8.886e-02, 1.657e-01, 9.379e-03)); + r += mul(s3_0, M4(-7.571e-03, -2.115e-02, -5.197e-02, 9.415e-02, 8.949e-02, 3.121e-03, -1.294e-01, 3.839e-01, 3.977e-04, 3.072e-03, -7.390e-02, -1.015e-01, -1.002e-01, 1.109e-01, 7.192e-02, 8.343e-02)); + r += mul(s3_1, M4(1.220e-01, 8.792e-03, -6.851e-02, 2.211e-03, -8.430e-02, 1.781e-01, -2.230e-01, 5.424e-02, -1.278e-01, 4.666e-02, 1.977e-01, 9.164e-02, 7.105e-03, 2.962e-02, -7.392e-02, 1.262e-02)); + r += mul(s3_2, M4(4.917e-02, -7.202e-02, -4.426e-02, -4.060e-02, -2.578e-01, 1.368e-01, -1.178e-01, 7.409e-02, -5.321e-02, -2.093e-02, 4.148e-02, 8.132e-03, 1.633e-03, 5.683e-02, 5.647e-02, -4.462e-03)); + r += mul(s3_3, M4(5.651e-02, 6.987e-02, 1.022e-02, 2.228e-02, -3.403e-02, -1.238e-01, -8.688e-02, 2.847e-01, 8.300e-02, 1.064e-01, 5.079e-02, 2.312e-01, -2.011e-02, 4.970e-02, 8.474e-02, 1.646e-01)); + r += mul(s3_4, M4(1.658e-01, 2.664e-02, 6.081e-04, -1.901e-02, -2.153e-01, 1.785e-01, -1.159e-01, 1.344e-01, 2.095e-01, -6.540e-02, -9.419e-02, 2.543e-02, 2.168e-01, -1.920e-01, -1.066e-01, -3.762e-02)); + r += mul(s3_5, M4(-2.605e-01, 1.033e-01, 5.749e-02, -7.792e-03, -1.805e-01, 3.559e-02, -8.796e-02, -2.171e-02, -1.491e-02, -1.234e-01, -1.024e-01, -4.689e-02, 8.840e-03, -6.047e-02, -1.086e-01, -8.053e-02)); + r += mul(s3_6, M4(-7.415e-02, 6.573e-02, -1.010e-02, 1.004e-01, 8.146e-02, 1.904e-02, -2.087e-01, 1.677e-01, -8.537e-02, -9.823e-04, 8.008e-02, -2.436e-02, 1.405e-01, 1.090e-02, -1.450e-02, 1.604e-03)); + r += mul(s3_7, M4(1.052e-01, 2.178e-01, -1.695e-01, -1.675e-01, 7.965e-02, 9.504e-02, -1.882e-01, 2.795e-02, -1.650e-01, 7.266e-02, 2.497e-01, -1.030e-01, -3.892e-02, 4.202e-02, 5.738e-02, -2.670e-02)); + r += mul(s3_8, M4(2.595e-01, -1.184e-01, -5.760e-03, 1.873e-03, -3.031e-02, 8.616e-02, -1.890e-01, -3.566e-02, -6.621e-02, -6.521e-02, 9.123e-02, 1.774e-02, 1.456e-02, -4.624e-02, 1.121e-01, 7.075e-02)); + r += mul(s4_0, M4(-1.808e-01, -7.151e-02, 1.136e-01, 8.906e-02, -3.798e-02, 3.175e-02, 3.574e-03, -6.163e-02, -1.217e-02, -3.910e-02, -4.029e-02, 1.510e-02, -5.323e-02, 4.221e-02, -1.139e-02, 2.482e-02)); + r += mul(s4_1, M4(2.471e-01, 2.122e-01, -2.925e-01, 1.159e-01, 1.034e-01, 3.971e-02, -2.369e-02, -6.175e-02, 1.270e-01, -1.832e-02, -2.061e-01, 2.132e-01, -8.107e-02, 1.786e-01, 8.508e-03, -5.903e-02)); + r += mul(s4_2, M4(2.225e-01, 1.082e-01, -1.634e-01, -2.090e-02, 3.859e-02, -5.848e-02, -7.811e-02, 2.496e-02, 6.474e-02, 8.606e-02, 1.459e-01, -1.556e-01, -2.996e-02, 1.241e-02, -1.139e-01, -4.844e-02)); + r += mul(s4_3, M4(6.932e-02, 1.109e-01, -1.615e-01, -6.077e-02, -3.714e-02, 9.318e-02, 4.859e-02, 3.846e-02, 7.590e-02, 3.168e-02, -6.170e-02, 2.132e-01, -1.446e-01, 7.892e-02, -1.163e-01, 9.597e-02)); + r += mul(s4_4, M4(-4.274e-02, 3.076e-01, 6.231e-02, 1.233e-01, 3.335e-02, -2.843e-01, 2.578e-01, -3.067e-01, 1.023e-01, 1.018e-01, -1.995e-01, -1.487e-01, 3.421e-02, 1.787e-01, -9.120e-03, -6.409e-02)); + r += mul(s4_5, M4(2.020e-01, -3.115e-01, 1.270e-01, -1.962e-03, 5.069e-02, -1.333e-02, -2.809e-02, 1.169e-01, 6.924e-02, -9.659e-02, 3.233e-01, -8.298e-02, 2.950e-02, -4.171e-02, 1.832e-01, -1.195e-03)); + r += mul(s4_6, M4(-1.517e-03, 5.659e-02, -8.075e-02, 4.669e-03, 7.980e-02, -2.630e-02, -5.654e-02, -1.299e-01, 2.705e-02, -1.421e-02, -5.630e-02, 3.774e-02, -1.197e-02, -3.027e-02, -9.603e-02, -9.663e-02)); + r += mul(s4_7, M4(1.387e-02, 4.735e-02, 7.699e-02, -7.601e-02, 1.694e-02, 1.520e-01, -2.505e-01, 7.931e-03, 7.297e-02, 1.056e-01, -2.348e-01, -7.999e-02, -4.488e-02, -1.450e-02, 9.499e-02, 1.999e-02)); + r += mul(s4_8, M4(-1.672e-02, -6.865e-02, 7.445e-02, -5.010e-03, -5.751e-02, 1.366e-01, -7.348e-02, 4.473e-02, -6.648e-02, 1.935e-01, -6.542e-03, -6.221e-02, 1.000e-02, -4.030e-02, -2.002e-02, -5.702e-02)); + r += mul(s5_0, M4(-1.844e-02, -2.279e-02, 1.859e-02, -1.994e-01, -7.484e-02, -7.738e-02, -1.324e-01, 6.908e-02, 3.229e-02, 3.760e-02, 2.144e-02, 8.776e-03, -3.110e-03, 3.086e-02, -3.827e-02, -2.496e-02)); + r += mul(s5_1, M4(1.135e-01, 1.224e-01, 8.033e-02, 7.685e-02, 6.138e-02, -2.476e-02, -1.497e-02, -1.407e-02, -1.467e-02, 4.675e-02, -8.522e-02, -7.936e-02, 5.384e-02, 1.046e-01, 1.349e-02, -8.820e-02)); + r += mul(s5_2, M4(4.358e-02, -9.912e-02, -3.773e-02, -1.635e-02, -5.953e-02, 5.375e-02, -2.343e-02, 4.447e-02, -6.225e-02, -1.359e-03, 4.897e-02, -4.672e-03, 1.588e-02, 8.632e-02, -1.006e-01, -1.180e-02)); + r += mul(s5_3, M4(-5.165e-02, 6.907e-02, 3.863e-02, -7.992e-02, -2.743e-03, -4.003e-02, -4.348e-02, -1.824e-02, 6.504e-02, -2.389e-02, -5.433e-02, 1.896e-02, -1.038e-01, 7.700e-02, -4.348e-02, -1.953e-02)); + r += mul(s5_4, M4(-1.504e-01, 1.324e-01, -1.111e-01, 1.308e-01, -1.680e-02, 9.616e-03, 2.922e-02, -2.276e-01, -6.420e-02, 2.921e-02, 8.717e-02, -2.128e-01, 1.967e-01, -6.838e-02, 7.717e-03, -2.039e-01)); + r += mul(s5_5, M4(8.521e-02, -3.579e-02, -7.804e-02, 5.746e-02, -2.326e-02, 1.403e-01, -1.433e-01, 5.825e-02, -2.646e-01, 2.818e-01, -4.705e-02, 2.818e-02, 2.052e-02, -4.477e-02, 2.043e-02, 3.274e-02)); + r += mul(s5_6, M4(-8.509e-02, -1.012e-02, -2.024e-02, 3.751e-02, -1.689e-02, -1.200e-02, -6.649e-03, -9.636e-03, -9.893e-03, 2.870e-02, 1.406e-02, 6.826e-03, -1.444e-01, 2.577e-02, 3.355e-02, -4.936e-02)); + r += mul(s5_7, M4(6.914e-03, 4.111e-03, -3.507e-02, 4.883e-02, -1.203e-01, 1.782e-01, -1.221e-01, -2.258e-01, 6.142e-02, -5.628e-02, -3.241e-02, -1.518e-02, -1.409e-01, -1.079e-01, 4.747e-02, 4.770e-02)); + r += mul(s5_8, M4(3.974e-02, -1.140e-01, 9.127e-02, 2.949e-02, -4.091e-02, -3.985e-02, 2.987e-02, 9.161e-03, 1.925e-02, 3.103e-02, -8.920e-02, 1.126e-02, 5.775e-02, -5.850e-02, -1.156e-01, -5.278e-02)); + r += mul(s6_0, M4(4.430e-02, 2.128e-04, -2.363e-02, -8.580e-02, 4.758e-02, 9.645e-02, 1.086e-01, -4.137e-02, -8.103e-03, 6.940e-02, -1.248e-02, -3.844e-02, 4.752e-02, -6.782e-02, 3.714e-02, -9.741e-02)); + r += mul(s6_1, M4(6.954e-02, 7.004e-03, -5.223e-02, 9.466e-04, -2.323e-02, -2.353e-02, 5.527e-03, -8.855e-02, -3.303e-02, 2.008e-02, -3.504e-02, -6.496e-03, -3.531e-02, 7.740e-02, -3.346e-02, -2.544e-02)); + r += mul(s6_2, M4(-1.047e-01, -1.141e-02, -4.571e-02, -5.042e-02, 2.876e-01, -6.013e-02, 1.330e-01, -6.781e-03, 9.382e-03, 2.406e-02, 8.489e-02, -3.575e-02, -8.829e-02, 8.565e-02, 5.110e-04, 1.838e-02)); + r += mul(s6_3, M4(-6.323e-02, 5.539e-02, 4.282e-02, -1.517e-01, 6.639e-02, 8.618e-02, 6.218e-02, -1.342e-01, -8.240e-02, 2.534e-02, -1.269e-01, -5.660e-02, 1.406e-02, 8.384e-02, 6.082e-02, -1.931e-01)); + r += mul(s6_4, M4(-1.133e-01, 6.829e-02, -2.698e-02, 2.658e-02, -3.588e-02, 1.752e-01, 4.161e-01, 4.412e-02, -1.112e-02, -1.806e-01, 1.313e-01, 1.439e-01, 4.300e-02, -1.159e-01, -6.542e-02, -2.109e-01)); + r += mul(s6_5, M4(-1.037e-02, -6.339e-02, -4.106e-02, -7.217e-03, -1.563e-01, -5.163e-02, -1.412e-02, -4.560e-02, -3.262e-02, 2.693e-02, -7.205e-02, -1.538e-01, 2.494e-01, -7.619e-02, 9.734e-02, -1.963e-02)); + r += mul(s6_6, M4(4.477e-02, -2.460e-02, 1.561e-02, 7.902e-03, 2.819e-02, 8.137e-02, -9.074e-02, 4.879e-02, -7.361e-02, 8.618e-02, -9.277e-02, -1.204e-01, -3.147e-02, -2.592e-02, 4.403e-02, -3.817e-02)); + r += mul(s6_7, M4(-9.093e-03, -6.024e-02, -4.040e-02, 5.986e-02, 7.624e-02, 1.555e-01, -1.529e-01, 1.163e-01, 2.841e-03, -9.467e-02, -1.644e-01, 5.406e-02, -1.252e-01, 1.712e-02, 1.295e-01, -1.023e-01)); + r += mul(s6_8, M4(4.371e-02, -7.912e-02, -2.886e-02, -4.878e-03, -3.480e-02, 2.319e-01, -1.500e-01, 9.054e-02, 6.201e-02, 7.539e-02, -1.330e-01, -1.722e-02, 2.272e-01, -2.516e-01, 1.550e-01, -1.427e-02)); + r += mul(s7_0, M4(-1.865e-01, -5.930e-02, 1.480e-01, 5.092e-02, 8.114e-02, 1.780e-02, 4.968e-02, 1.214e-01, 1.395e-02, 1.867e-02, 6.042e-03, 9.279e-02, -6.797e-02, 2.587e-02, -1.102e-02, -2.887e-02)); + r += mul(s7_1, M4(-4.457e-01, -1.417e-01, -1.668e-01, -1.126e-01, -1.799e-02, -2.844e-02, -4.474e-02, 4.784e-02, 1.272e-02, 3.509e-02, 5.592e-02, 3.425e-02, -8.181e-02, 8.894e-02, 9.407e-02, -1.074e-01)); + r += mul(s7_2, M4(2.992e-02, -2.817e-01, 4.041e-02, -1.705e-01, -3.277e-02, 4.062e-02, 1.309e-02, -3.913e-02, 2.197e-02, -5.788e-02, 5.907e-02, -1.210e-01, 1.236e-01, 4.234e-02, -2.209e-02, 3.183e-02)); + r += mul(s7_3, M4(-1.444e-01, 7.209e-02, -1.524e-01, -1.727e-01, 5.041e-02, -2.331e-02, 4.138e-02, 1.204e-01, -1.734e-01, 9.672e-02, -4.040e-02, -5.078e-02, -2.563e-02, -2.302e-02, -1.457e-02, -3.718e-03)); + r += mul(s7_4, M4(-4.509e-01, 2.154e-01, 5.315e-01, 2.917e-01, 2.953e-02, -6.827e-02, -1.176e-01, 1.002e-01, 7.062e-02, -6.440e-02, 1.057e-01, 1.603e-01, -2.618e-03, -3.762e-02, -6.451e-02, -2.631e-02)); + r += mul(s7_5, M4(1.754e-01, -3.155e-01, 1.431e-02, -2.786e-01, 2.343e-02, -5.685e-02, 8.120e-02, -7.000e-02, 6.258e-02, 1.671e-02, -1.732e-01, -8.802e-02, -3.786e-02, -5.198e-02, 1.550e-01, 1.113e-02)); + r += mul(s7_6, M4(-9.336e-02, 7.900e-03, 1.448e-02, -1.041e-01, 3.344e-02, 3.577e-02, 5.547e-02, -2.618e-02, 9.245e-03, 2.217e-02, -1.096e-01, -1.983e-01, -1.686e-02, 5.541e-02, -1.284e-01, 3.291e-02)); + r += mul(s7_7, M4(-5.726e-02, -2.849e-01, 4.176e-01, 7.591e-02, 2.073e-02, -1.418e-01, 1.758e-01, 3.021e-02, -1.954e-01, -4.763e-02, 2.269e-01, 1.887e-01, 3.317e-02, 5.195e-02, -1.633e-01, -1.694e-02)); + r += mul(s7_8, M4(-2.306e-02, -1.179e-01, 4.999e-02, -8.378e-02, 2.885e-03, -1.295e-02, -2.832e-02, -6.580e-02, -8.781e-03, -1.141e-02, 5.429e-02, -2.212e-02, 4.049e-02, 8.520e-02, -1.725e-01, -6.162e-03)); + r += V4(2.776e-02, -8.120e-03, 1.695e-02, -8.298e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-9.317e-02, 1.915e-02, 8.386e-03, -7.604e-03, 2.599e-02, 3.101e-02, 2.067e-02, -7.477e-02, -3.195e-02, 3.575e-03, -1.788e-02, 3.550e-02, -5.165e-02, 8.900e-02, -1.879e-02, 6.665e-02)); + r += mul(s0_1, M4(2.323e-02, 4.761e-02, 3.358e-03, 1.379e-02, 6.437e-02, -7.584e-02, 5.153e-02, -8.086e-02, 3.212e-02, -3.236e-02, 4.675e-03, 7.112e-03, 1.418e-01, 3.161e-01, 1.194e-02, -2.045e-01)); + r += mul(s0_2, M4(1.196e-02, 2.864e-02, 7.652e-02, -9.290e-03, -2.904e-02, 1.836e-02, 3.561e-02, 3.707e-02, 8.349e-02, 4.243e-02, -2.999e-02, 1.486e-02, -1.074e-01, 2.173e-01, 2.716e-02, -5.154e-02)); + r += mul(s0_3, M4(-7.280e-02, -1.273e-01, 9.497e-02, -2.195e-02, -1.005e-01, 9.936e-02, -4.427e-02, 5.945e-02, 4.234e-02, -6.874e-02, -3.501e-03, -3.821e-02, -5.160e-01, -1.874e-01, 2.054e-01, -6.566e-02)); + r += mul(s0_4, M4(-1.746e-01, 1.102e-01, 4.787e-02, -3.177e-02, -1.164e-01, 1.112e-01, 5.676e-02, 4.072e-03, -1.577e-02, -3.395e-02, 4.062e-02, -1.182e-01, -5.695e-01, -6.229e-01, -1.910e-01, -6.589e-01)); + r += mul(s0_5, M4(-4.777e-03, 1.793e-03, 8.129e-02, -1.238e-01, -3.224e-02, 1.482e-01, 2.397e-02, 8.479e-02, 4.881e-03, -5.675e-02, 1.567e-01, -3.732e-02, 2.494e-01, -1.549e-02, 6.266e-02, -8.537e-02)); + r += mul(s0_6, M4(3.985e-02, -3.295e-02, -5.131e-02, -1.254e-01, 4.139e-02, 1.998e-02, -1.187e-02, -1.422e-02, 4.812e-02, -2.490e-02, -7.066e-02, -6.262e-02, 6.560e-03, -1.073e-01, 1.650e-01, 1.090e-01)); + r += mul(s0_7, M4(2.583e-02, -5.291e-02, -3.541e-02, -2.015e-02, 1.829e-02, -6.868e-05, -1.853e-01, -8.112e-02, -6.439e-02, -7.911e-02, 4.696e-03, -9.687e-02, -2.225e-01, 5.081e-02, 8.367e-02, 7.498e-01)); + r += mul(s0_8, M4(2.599e-02, 2.886e-03, -8.094e-02, 4.401e-02, 1.440e-02, -2.015e-02, 1.880e-02, 1.081e-03, -9.465e-02, -9.373e-03, 1.154e-01, -3.917e-02, -1.011e-01, 4.820e-02, -1.554e-01, -3.663e-01)); + r += mul(s1_0, M4(-1.092e-01, 1.216e-01, -1.118e-01, 1.907e-01, -8.130e-02, -5.784e-02, 6.783e-02, -3.183e-02, -1.563e-01, 1.382e-01, -1.255e-01, 1.461e-01, 3.380e-02, 8.835e-03, -1.597e-02, -1.284e-02)); + r += mul(s1_1, M4(7.447e-02, -1.840e-02, -1.411e-01, 4.135e-02, -3.556e-02, -2.067e-01, -5.427e-02, -4.274e-02, -9.245e-02, -1.770e-01, -7.212e-02, 7.392e-02, 3.132e-02, 2.501e-02, 8.675e-02, -2.563e-02)); + r += mul(s1_2, M4(-4.532e-02, -7.753e-02, -4.897e-02, 1.012e-02, -3.691e-02, -1.659e-01, -3.360e-02, -1.401e-01, 1.182e-02, 3.324e-02, -1.490e-01, 4.877e-03, -2.668e-02, 2.633e-02, 2.280e-02, -5.413e-02)); + r += mul(s1_3, M4(-6.860e-03, -3.030e-02, -5.868e-02, 1.373e-01, -1.136e-01, 6.083e-02, -1.212e-01, 1.402e-01, 5.197e-02, 3.028e-01, 2.910e-04, 2.725e-01, -1.142e-01, 2.221e-02, 1.983e-02, 4.980e-03)); + r += mul(s1_4, M4(-1.558e-01, 2.200e-02, -7.589e-02, -4.114e-02, -2.075e-02, -2.329e-01, -2.486e-01, 2.982e-01, 5.568e-01, -1.902e-01, -3.293e-01, 2.973e-01, -8.979e-02, -1.734e-01, -8.188e-02, -1.248e-01)); + r += mul(s1_5, M4(-1.136e-01, 2.001e-02, -6.094e-02, -1.164e-01, -9.559e-03, -2.312e-01, -2.666e-02, 5.802e-02, 5.574e-03, -8.042e-02, 1.882e-02, -5.696e-03, 5.486e-02, -2.811e-02, 3.405e-02, 3.376e-02)); + r += mul(s1_6, M4(1.890e-02, 2.390e-01, -1.844e-01, 1.908e-01, 6.778e-02, -2.268e-01, 2.431e-02, -6.474e-02, 1.892e-01, 2.155e-01, 1.174e-02, 1.018e-01, 2.103e-02, 9.542e-03, -5.061e-02, 9.833e-02)); + r += mul(s1_7, M4(2.532e-01, 1.047e-01, -1.373e-01, 6.951e-03, 1.005e-02, -2.509e-01, -1.389e-01, 1.150e-01, 8.669e-02, -9.414e-02, -1.985e-01, -2.291e-02, 3.444e-02, 6.642e-03, 4.162e-02, -1.231e-01)); + r += mul(s1_8, M4(-7.705e-02, 1.206e-01, 2.046e-02, 2.431e-02, 5.501e-02, -1.745e-01, -6.070e-03, 3.750e-02, -2.622e-03, -1.714e-01, -4.185e-02, 8.370e-02, -2.327e-02, 1.082e-01, -3.338e-02, 7.039e-02)); + r += mul(s2_0, M4(2.506e-02, -5.960e-03, 8.603e-03, -6.330e-03, -9.869e-03, 4.860e-03, 4.122e-03, -2.970e-02, -1.211e-01, -1.324e-01, 1.043e-01, -1.156e-01, -2.330e-02, 8.029e-03, -1.386e-02, -1.006e-01)); + r += mul(s2_1, M4(-6.310e-02, 3.873e-02, -8.762e-02, 5.317e-02, -1.089e-01, -9.682e-02, -2.468e-02, -1.545e-02, -5.920e-02, 1.584e-01, -6.268e-03, -7.099e-02, -7.300e-02, 5.048e-02, 6.387e-02, -1.378e-01)); + r += mul(s2_2, M4(-2.160e-02, 1.193e-02, 5.254e-02, -6.813e-02, 2.525e-02, 2.411e-02, 3.671e-02, -4.003e-02, 7.208e-02, -3.762e-02, -1.295e-01, -2.987e-03, -9.501e-03, 2.885e-02, -2.823e-02, -3.649e-02)); + r += mul(s2_3, M4(-1.447e-02, -6.604e-03, 5.087e-02, -5.231e-02, -1.432e-01, 9.517e-03, -2.375e-02, 6.718e-02, -1.024e-01, -1.657e-01, 1.543e-02, -2.130e-02, 7.696e-02, -1.628e-01, -2.175e-02, -6.305e-02)); + r += mul(s2_4, M4(1.241e-01, 3.732e-02, 3.107e-02, 9.781e-02, -1.883e-01, 5.448e-02, -5.869e-02, 3.828e-02, 2.123e-01, 2.982e-01, 3.094e-01, -3.948e-02, 2.509e-01, 6.215e-02, 1.057e-01, 1.183e-01)); + r += mul(s2_5, M4(1.370e-01, 3.386e-03, -1.440e-02, 1.083e-01, 6.201e-02, -8.137e-02, -5.148e-02, -8.041e-02, 7.349e-03, 2.396e-02, 9.658e-02, -6.630e-02, -5.620e-02, 9.297e-02, 1.075e-01, 2.647e-01)); + r += mul(s2_6, M4(-8.195e-03, -1.201e-01, -7.786e-03, 8.643e-02, 1.019e-02, 7.961e-02, -5.549e-03, 1.267e-01, -9.394e-02, -1.329e-01, 4.056e-02, 3.870e-02, -2.269e-02, -9.855e-02, -3.089e-02, -5.161e-02)); + r += mul(s2_7, M4(-4.394e-02, 8.193e-02, 4.097e-02, -2.051e-01, 2.861e-02, 6.525e-02, -2.093e-02, 8.476e-04, -6.580e-02, -1.047e-01, -2.002e-02, -3.528e-01, -5.101e-02, 3.704e-02, -5.583e-02, 1.864e-01)); + r += mul(s2_8, M4(-6.193e-02, 4.235e-02, 1.458e-01, 1.280e-01, 1.397e-03, 6.387e-03, -3.770e-02, 3.617e-02, -4.485e-02, -1.430e-02, -3.923e-02, -2.988e-02, 9.385e-02, 1.037e-01, -1.469e-02, 8.648e-02)); + r += mul(s3_0, M4(7.370e-02, -6.681e-02, 6.857e-03, -1.464e-03, -8.139e-02, -2.321e-02, 2.256e-01, -2.762e-01, 4.326e-03, -9.937e-02, 5.479e-02, -1.248e-01, -1.140e-01, 3.124e-02, -3.539e-02, 5.063e-02)); + r += mul(s3_1, M4(-4.666e-02, -5.328e-02, 4.147e-03, 3.243e-02, -1.825e-01, -1.587e-01, 1.040e-01, -8.371e-02, 7.847e-02, 1.809e-01, 5.909e-02, -2.629e-02, 2.192e-02, -6.512e-03, 7.880e-02, -2.717e-02)); + r += mul(s3_2, M4(2.217e-02, -3.510e-02, 8.875e-02, 2.984e-02, -8.095e-02, 3.588e-02, 2.922e-02, 2.322e-02, 4.776e-02, 1.330e-01, -5.593e-02, 1.860e-01, -2.725e-02, -4.471e-02, 6.853e-02, -2.785e-02)); + r += mul(s3_3, M4(4.651e-03, 2.414e-03, -1.772e-02, -9.891e-02, -5.626e-02, 1.426e-01, -1.825e-03, -2.968e-01, 1.269e-01, -3.594e-03, -8.609e-02, 5.360e-03, 8.408e-02, -3.451e-02, 4.265e-03, -4.175e-02)); + r += mul(s3_4, M4(4.502e-01, -1.098e-01, 1.086e-01, -7.408e-02, -5.760e-02, 1.924e-01, 1.202e-01, -1.614e-01, 2.295e-01, 1.453e-01, 2.048e-01, -1.967e-01, 1.238e-01, 1.250e-01, -9.183e-02, -9.538e-02)); + r += mul(s3_5, M4(-1.430e-01, -4.830e-02, -7.549e-02, -3.340e-03, -5.755e-02, 9.543e-02, 1.986e-01, 1.168e-02, -5.303e-02, 1.102e-01, 2.070e-01, 6.416e-02, 6.153e-02, 1.059e-01, -4.811e-02, 6.984e-02)); + r += mul(s3_6, M4(-2.191e-01, 4.890e-02, -5.366e-02, 1.201e-01, -3.396e-02, 4.261e-03, 9.185e-02, 9.099e-02, 1.899e-02, -8.291e-02, -1.536e-01, -7.678e-02, -7.581e-03, -6.502e-02, 1.899e-02, 2.996e-02)); + r += mul(s3_7, M4(1.050e-01, -6.143e-02, -2.060e-01, -3.645e-01, -4.155e-02, 3.520e-03, -6.472e-02, -4.338e-01, 1.628e-02, -5.007e-02, -5.609e-03, -1.029e-01, 6.029e-02, -2.550e-03, -4.082e-03, 2.602e-01)); + r += mul(s3_8, M4(1.017e-01, 1.053e-02, 2.575e-01, 6.993e-04, -1.868e-02, 6.033e-02, 1.548e-01, -7.071e-02, -2.339e-02, -6.659e-02, 3.672e-02, 4.389e-02, 6.373e-02, 4.674e-02, -5.896e-02, -1.070e-01)); + r += mul(s4_0, M4(1.968e-01, -3.459e-03, -1.691e-01, -4.071e-02, 5.010e-02, 2.857e-02, -7.519e-03, -5.577e-03, -1.646e-01, -8.852e-02, 9.672e-03, -5.369e-03, -3.776e-03, -1.913e-02, 7.260e-03, -1.101e-01)); + r += mul(s4_1, M4(4.915e-02, 2.861e-01, 5.564e-03, 5.088e-01, 4.463e-02, 5.954e-02, -2.309e-02, 5.717e-02, -3.021e-01, -2.191e-01, 6.434e-02, 7.461e-02, -2.205e-02, -4.874e-03, -1.735e-01, 1.253e-01)); + r += mul(s4_2, M4(1.128e-01, 1.075e-01, -2.875e-02, -1.778e-02, 4.533e-02, -3.758e-02, 2.265e-02, -4.932e-02, 3.630e-02, 2.354e-01, -1.175e-03, -3.448e-02, -3.037e-02, -1.978e-02, 8.120e-02, -8.233e-02)); + r += mul(s4_3, M4(8.857e-02, 3.684e-02, 9.267e-03, -7.369e-04, -1.267e-01, 1.045e-01, 8.024e-03, 2.250e-02, -1.073e-01, -1.799e-01, -3.614e-02, -1.036e-01, 2.249e-01, -1.213e-01, -1.413e-01, 6.528e-02)); + r += mul(s4_4, M4(2.018e-02, -7.586e-03, -7.276e-02, 9.245e-02, 2.630e-01, 8.460e-02, -1.289e-01, 1.931e-01, -2.951e-01, -1.197e-01, -1.362e-02, -6.101e-02, -1.125e-01, -7.522e-03, 1.929e-01, 2.081e-01)); + r += mul(s4_5, M4(1.735e-01, -3.837e-02, -2.689e-01, -1.575e-01, -1.147e-02, -5.049e-02, -8.954e-02, -9.647e-03, 1.735e-02, -7.929e-02, -2.055e-01, -1.046e-01, 2.800e-03, -2.230e-01, 5.326e-02, -8.484e-02)); + r += mul(s4_6, M4(9.596e-02, 1.478e-01, 8.386e-02, 4.962e-02, 2.575e-02, 2.522e-02, -9.939e-02, -1.447e-02, -7.036e-02, -1.864e-01, -4.597e-02, -1.242e-01, -1.236e-01, -1.066e-01, 2.954e-02, -2.495e-01)); + r += mul(s4_7, M4(4.021e-02, 8.410e-02, -2.024e-02, 2.625e-01, 2.479e-02, 1.913e-01, -8.467e-02, 2.132e-01, -1.626e-01, -1.470e-01, -1.475e-01, -1.940e-01, 8.031e-02, -3.446e-02, 4.137e-02, 1.510e-02)); + r += mul(s4_8, M4(5.455e-02, 4.836e-02, -2.443e-03, 4.940e-02, -6.281e-02, 5.090e-02, -7.503e-02, 2.358e-01, -1.184e-01, -7.135e-02, -1.001e-01, 1.106e-01, -2.813e-02, 7.361e-02, -1.206e-01, 7.500e-03)); + r += mul(s5_0, M4(1.094e-01, 1.463e-01, -4.544e-02, 1.765e-01, 1.195e-02, -1.165e-02, -1.074e-02, -4.479e-02, -4.199e-03, -6.425e-03, -8.801e-03, 1.340e-02, -1.941e-02, 2.875e-02, -2.974e-02, 6.107e-02)); + r += mul(s5_1, M4(5.405e-02, 1.598e-01, -1.369e-01, -3.004e-02, -3.157e-02, 1.339e-02, 7.332e-02, -3.229e-02, -1.327e-02, 1.182e-02, 4.794e-02, -6.305e-02, 4.248e-02, 7.300e-02, -5.926e-02, 8.302e-02)); + r += mul(s5_2, M4(2.698e-02, -5.484e-02, 3.239e-03, -2.466e-02, 1.411e-02, 3.545e-02, 7.326e-02, -6.638e-02, 3.476e-02, 9.492e-02, -2.676e-02, 6.366e-02, -3.944e-02, 3.457e-02, -1.909e-02, 3.126e-02)); + r += mul(s5_3, M4(1.193e-01, 7.390e-02, 5.802e-02, 6.966e-02, -5.505e-02, 5.759e-02, 2.647e-02, 3.655e-02, 8.248e-02, -3.531e-04, 4.006e-02, -2.483e-02, 1.442e-01, 2.735e-02, -5.585e-03, 1.256e-01)); + r += mul(s5_4, M4(-7.416e-02, 1.455e-02, -3.851e-02, -1.117e-01, 5.953e-02, 6.274e-02, 2.311e-01, 1.762e-01, -9.020e-02, 2.775e-02, -9.684e-02, 4.637e-03, 7.744e-02, 1.524e-01, -4.357e-02, 1.402e-01)); + r += mul(s5_5, M4(1.048e-02, -6.540e-03, 1.283e-02, 1.530e-02, -7.299e-02, -8.799e-02, 1.209e-01, -1.417e-01, -1.657e-01, 7.952e-02, 9.619e-02, 2.051e-02, -9.300e-02, -8.988e-02, -4.284e-02, -2.833e-02)); + r += mul(s5_6, M4(1.059e-02, -9.975e-03, -1.002e-02, -5.971e-02, -1.522e-02, -1.066e-01, 2.274e-02, 2.326e-02, 6.018e-03, -3.394e-02, 1.579e-02, 6.317e-03, 4.164e-03, 9.888e-02, -1.304e-02, -7.333e-02)); + r += mul(s5_7, M4(-2.437e-02, 5.469e-02, 3.865e-02, 2.620e-02, -5.758e-02, -2.490e-02, -4.255e-02, -1.501e-01, 5.814e-03, -2.612e-02, -7.788e-02, -1.049e-01, -3.619e-02, -1.124e-02, -5.754e-02, -1.702e-01)); + r += mul(s5_8, M4(7.789e-02, -5.184e-03, -1.090e-01, -9.938e-02, -4.395e-02, -8.541e-02, -1.900e-02, 1.958e-01, -5.825e-02, -1.991e-02, 1.695e-01, 4.282e-02, 4.553e-02, 9.582e-03, 4.027e-02, 8.375e-02)); + r += mul(s6_0, M4(-4.620e-02, 3.979e-02, -7.421e-03, -4.636e-03, -4.301e-02, -1.731e-03, -8.134e-02, -4.580e-02, 8.641e-02, -1.178e-02, -3.114e-02, 2.985e-02, 8.926e-03, -8.501e-02, 1.403e-01, -8.929e-02)); + r += mul(s6_1, M4(8.552e-03, 1.045e-01, 2.590e-02, -5.385e-02, 1.899e-01, 3.053e-02, 3.391e-02, 8.761e-02, -8.812e-02, 1.820e-02, 2.085e-03, 7.044e-02, -8.843e-03, 3.972e-02, 2.005e-01, -9.386e-02)); + r += mul(s6_2, M4(-3.533e-02, 8.881e-02, 7.908e-02, -1.673e-02, 2.972e-01, 5.909e-02, 1.822e-01, 7.794e-03, 3.495e-02, 8.208e-03, -8.089e-03, 3.064e-02, -1.182e-01, -3.028e-02, 5.854e-03, 2.649e-02)); + r += mul(s6_3, M4(4.463e-02, 6.808e-02, -1.320e-02, 7.349e-02, 1.047e-01, 3.442e-02, 5.491e-02, 1.766e-01, 2.378e-01, 5.525e-02, -1.079e-03, 7.857e-02, -1.323e-01, -1.197e-02, -4.535e-02, 5.279e-02)); + r += mul(s6_4, M4(4.852e-03, -1.891e-01, -8.620e-02, -7.778e-02, -8.466e-02, -1.193e-01, -3.509e-01, -4.832e-02, 2.248e-02, 5.971e-02, -1.411e-01, -1.547e-01, 1.189e-01, -2.901e-02, -3.536e-01, -1.434e-01)); + r += mul(s6_5, M4(5.967e-04, -8.594e-03, 2.770e-02, 2.773e-02, -6.823e-02, 2.855e-02, 4.400e-03, 3.281e-03, -7.049e-02, 7.300e-02, 1.679e-02, -4.129e-02, 3.537e-02, -7.997e-02, 1.755e-01, 7.430e-02)); + r += mul(s6_6, M4(2.333e-02, 7.693e-02, -1.168e-02, -5.052e-02, 2.763e-01, -8.145e-02, -2.557e-02, 5.789e-02, -1.378e-01, 3.910e-02, -2.735e-02, -1.627e-01, 1.390e-02, 9.601e-02, 1.422e-02, 1.167e-01)); + r += mul(s6_7, M4(-5.161e-02, 4.514e-02, 4.401e-02, -4.723e-02, 8.271e-02, -1.165e-02, -1.094e-01, -1.951e-01, 1.299e-01, -1.491e-01, -1.073e-02, -2.226e-01, 3.247e-02, 1.319e-01, -5.319e-02, 1.879e-01)); + r += mul(s6_8, M4(1.413e-02, 8.114e-02, 8.126e-02, 4.824e-02, -5.459e-02, 1.786e-01, 2.315e-01, 3.483e-01, -6.537e-02, -1.749e-02, -9.204e-02, 8.909e-02, 1.182e-01, -3.454e-01, -2.984e-02, -3.699e-02)); + r += mul(s7_0, M4(-5.222e-02, -1.666e-02, 3.111e-03, -1.269e-01, -1.162e-01, -1.072e-01, 1.369e-02, -1.421e-01, -4.234e-02, -5.603e-02, 1.896e-02, 3.023e-02, 2.745e-02, -1.633e-02, 7.530e-03, -2.612e-03)); + r += mul(s7_1, M4(-7.186e-02, 1.331e-01, 1.729e-01, 3.696e-02, -3.435e-02, -5.384e-02, 1.712e-01, -4.012e-02, -2.510e-01, 6.177e-03, -1.304e-01, -3.065e-02, 8.131e-02, -2.031e-02, 5.018e-03, 1.444e-02)); + r += mul(s7_2, M4(1.941e-01, -4.139e-02, 4.032e-01, -2.533e-01, 3.842e-02, 7.539e-02, -3.439e-02, 2.617e-03, -1.251e-04, 3.135e-03, -1.139e-01, 4.916e-02, -2.644e-02, -1.615e-01, -1.998e-02, 3.684e-02)); + r += mul(s7_3, M4(1.724e-01, 7.871e-02, -2.757e-01, -1.913e-01, 1.702e-02, -1.198e-01, -1.135e-01, 5.146e-02, 1.594e-01, 8.131e-02, -1.323e-01, 7.771e-02, -1.266e-02, 3.053e-02, 6.846e-02, 2.713e-03)); + r += mul(s7_4, M4(-8.568e-02, 2.638e-02, 1.671e-01, -4.321e-02, -4.085e-02, -5.166e-02, -5.991e-02, -2.253e-02, 2.570e-01, 1.164e-01, -4.512e-02, 1.597e-01, -2.795e-02, 1.080e-01, -1.880e-01, 3.372e-02)); + r += mul(s7_5, M4(1.639e-01, -1.249e-01, 3.916e-02, -3.251e-01, 1.722e-01, 5.592e-02, 2.117e-02, 1.050e-01, 8.313e-02, 7.693e-02, 2.470e-01, 1.904e-01, 6.296e-02, -1.557e-01, 1.590e-01, 3.042e-02)); + r += mul(s7_6, M4(5.130e-02, 2.760e-01, -1.234e-01, -3.254e-01, -5.325e-03, 5.583e-02, -1.817e-02, 3.850e-02, 1.723e-01, 1.316e-01, 1.474e-01, 4.388e-02, -4.979e-03, -5.402e-02, 2.037e-02, -1.502e-02)); + r += mul(s7_7, M4(9.497e-02, 4.342e-02, 1.315e-01, 6.205e-02, -3.387e-02, 9.014e-02, -5.622e-02, -8.738e-03, -2.491e-01, -2.529e-01, 1.265e-01, -1.339e-01, -7.270e-02, -4.320e-02, -6.492e-02, -2.869e-01)); + r += mul(s7_8, M4(7.856e-02, 5.496e-02, -1.532e-01, -2.587e-01, 1.621e-02, 4.016e-02, 6.799e-02, -5.266e-02, 3.437e-02, -4.965e-02, -1.842e-01, -1.572e-01, -9.818e-02, -6.989e-02, 1.409e-01, -9.155e-02)); + r += V4(1.842e-03, 2.684e-02, -2.388e-02, 1.341e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.169e-02, 4.090e-02, 2.344e-02, 4.020e-02, 9.370e-02, -6.211e-03, 1.759e-02, -8.241e-02, -5.080e-02, -8.047e-03, -6.565e-03, 1.017e-01, -2.722e-01, -7.091e-02, -4.424e-02, 2.552e-02)); + r += mul(s0_1, M4(2.063e-02, 7.496e-02, -2.114e-03, 8.604e-02, -1.607e-01, -1.300e-02, -6.959e-02, 1.666e-02, 1.500e-01, 2.465e-03, -1.725e-02, 5.064e-02, 1.482e-01, -1.181e-01, 9.553e-02, -1.207e-01)); + r += mul(s0_2, M4(1.909e-01, -3.339e-02, -9.496e-03, 7.458e-02, -4.476e-03, 4.041e-02, -1.012e-03, 4.225e-02, 2.620e-02, -9.565e-02, 7.120e-02, -3.336e-02, -2.538e-01, -5.166e-02, -2.399e-02, -2.251e-01)); + r += mul(s0_3, M4(-1.458e-01, -5.764e-02, 3.750e-02, 2.984e-02, -1.128e-02, -1.041e-02, -5.174e-02, 7.210e-03, -1.559e-01, -8.793e-03, -5.447e-02, 7.778e-03, 1.510e-01, -3.397e-01, 5.370e-02, -9.013e-02)); + r += mul(s0_4, M4(-1.753e-01, -4.923e-02, 6.602e-02, -1.124e-02, 7.038e-02, -9.715e-02, -1.053e-01, -8.512e-02, -1.163e-02, -1.973e-02, 1.249e-02, 1.401e-02, 8.067e-01, -1.868e+00, 1.319e+00, -1.978e-01)); + r += mul(s0_5, M4(8.049e-02, -3.326e-02, 4.174e-02, -5.696e-02, -9.918e-02, 3.153e-02, -4.987e-02, 1.879e-02, -1.179e-01, 1.669e-03, 2.144e-02, 1.086e-01, -5.053e-01, -2.559e-01, -8.946e-02, 1.051e-01)); + r += mul(s0_6, M4(-1.396e-01, -4.532e-02, -5.139e-02, 5.597e-02, 1.531e-01, 7.081e-02, 1.311e-01, -8.032e-02, 1.066e-01, -3.651e-02, -1.540e-02, -3.857e-02, -2.438e-01, -1.575e-01, -1.553e-01, 8.916e-02)); + r += mul(s0_7, M4(6.295e-02, 1.070e-01, 1.808e-02, 5.307e-02, 2.205e-02, -3.011e-02, -2.873e-03, 5.554e-02, -1.650e-02, -7.347e-02, -3.139e-02, -7.255e-02, -7.705e-02, -2.776e-01, 1.386e-01, 2.273e-01)); + r += mul(s0_8, M4(5.068e-02, -2.934e-02, -6.390e-02, -3.529e-03, 2.496e-02, -5.950e-02, 9.848e-03, -4.895e-02, 4.320e-03, 2.137e-02, 8.088e-03, -1.885e-02, 3.472e-02, 7.456e-02, -9.062e-02, -1.616e-01)); + r += mul(s1_0, M4(-6.916e-02, 1.117e-01, 1.061e-02, -2.858e-02, -1.394e-02, -2.206e-02, 8.983e-02, -9.641e-02, 3.806e-02, 2.601e-02, 3.293e-03, -3.597e-03, -1.863e-02, 1.859e-02, 1.415e-02, 6.731e-03)); + r += mul(s1_1, M4(-8.516e-02, 8.850e-02, -1.573e-02, 2.316e-01, -1.692e-01, 8.693e-02, -1.889e-02, 5.133e-03, -7.009e-02, -4.160e-02, -5.661e-02, 1.357e-01, 4.354e-02, -2.824e-02, -1.820e-02, 5.339e-02)); + r += mul(s1_2, M4(1.563e-01, 4.841e-02, 1.820e-02, 2.156e-01, 2.081e-02, 1.728e-02, 6.492e-02, -6.011e-06, 1.238e-01, -1.890e-01, 5.422e-02, -1.803e-02, 3.738e-02, -3.699e-03, -3.440e-02, -4.935e-02)); + r += mul(s1_3, M4(-3.771e-02, 1.027e-02, 6.767e-02, 2.648e-02, 4.782e-03, -1.468e-01, 8.528e-02, -1.018e-01, 1.828e-01, 9.974e-02, 1.174e-01, -1.621e-01, -4.533e-02, -1.360e-01, 1.073e-01, -2.108e-02)); + r += mul(s1_4, M4(-2.093e-01, -8.418e-03, -1.582e-01, -1.408e-01, 2.320e-01, -6.685e-05, 8.601e-02, 3.419e-02, -3.267e-03, -2.444e-01, 5.275e-02, -1.274e-01, 3.212e-02, -5.614e-02, 1.082e-01, -5.123e-02)); + r += mul(s1_5, M4(3.014e-04, 1.183e-02, -8.981e-03, -7.106e-03, -7.303e-02, -6.706e-02, 8.619e-02, 2.726e-02, 2.377e-01, -1.732e-01, 6.783e-03, -8.318e-02, -8.778e-03, -6.088e-02, 3.059e-02, 6.358e-02)); + r += mul(s1_6, M4(-1.735e-01, -1.492e-02, -4.869e-02, 1.349e-02, 4.406e-02, -5.447e-02, 1.870e-01, -2.048e-01, 2.230e-01, 6.500e-02, -4.847e-02, 1.976e-03, -1.086e-01, 1.539e-02, -4.601e-02, 6.654e-02)); + r += mul(s1_7, M4(-4.916e-02, 3.111e-01, -2.172e-01, -2.984e-02, 8.054e-02, -9.696e-02, 1.715e-01, 1.023e-01, -3.379e-01, -2.222e-01, -1.553e-01, 2.824e-01, 6.686e-02, 3.219e-02, -3.475e-02, 7.468e-03)); + r += mul(s1_8, M4(1.450e-01, 1.175e-01, -3.282e-01, -2.399e-01, -3.023e-02, -1.193e-01, 6.894e-02, -2.026e-01, 1.794e-02, -8.950e-02, 3.820e-02, 5.242e-02, -2.527e-02, -2.720e-02, -3.382e-02, -4.500e-02)); + r += mul(s2_0, M4(-3.438e-02, 5.057e-02, 3.839e-03, -2.960e-02, 3.820e-02, -3.512e-02, 2.248e-02, -7.140e-02, 1.783e-01, -2.689e-02, -5.086e-02, 1.715e-02, -2.994e-02, -2.400e-02, 5.800e-02, -4.431e-02)); + r += mul(s2_1, M4(-1.055e-01, -5.624e-02, 1.805e-02, -4.016e-02, -1.477e-01, 1.458e-02, -1.010e-01, 1.892e-02, 2.247e-01, 1.387e-01, -6.506e-03, 1.135e-01, -1.305e-01, -4.718e-02, -5.146e-02, -5.844e-02)); + r += mul(s2_2, M4(-1.991e-02, 4.664e-02, -2.422e-02, 1.581e-02, 1.908e-02, -1.695e-02, 2.632e-02, 7.893e-03, -1.246e-01, -1.096e-01, 2.398e-02, 3.730e-02, -5.910e-02, 4.511e-02, -1.959e-02, -1.711e-02)); + r += mul(s2_3, M4(6.776e-02, -2.936e-02, -3.144e-02, -1.066e-01, 1.442e-01, -2.189e-02, 1.530e-01, 5.580e-02, -1.258e-01, -1.123e-01, -5.577e-02, -5.266e-02, -1.371e-01, -6.798e-02, 7.172e-02, 7.116e-02)); + r += mul(s2_4, M4(1.143e-01, 2.555e-02, -5.947e-02, -6.223e-02, 9.015e-02, 7.343e-02, -5.756e-02, 2.320e-02, -1.310e-01, 3.643e-01, -3.327e-01, 2.611e-02, 3.383e-01, 1.516e-01, -8.123e-02, 1.385e-01)); + r += mul(s2_5, M4(-1.150e-01, -2.017e-01, 6.345e-02, -4.715e-02, -5.644e-02, -3.207e-02, 5.612e-02, 7.632e-02, 2.011e-02, 2.242e-01, -1.096e-01, 7.068e-02, 1.181e-01, 1.197e-01, 2.674e-02, -4.620e-02)); + r += mul(s2_6, M4(-9.551e-02, -9.498e-02, 2.679e-02, -8.821e-02, 2.767e-02, 7.976e-03, 5.204e-02, 1.872e-02, -1.089e-01, -4.498e-02, -6.061e-02, 8.602e-02, -9.359e-02, -4.350e-03, 3.876e-02, -8.969e-03)); + r += mul(s2_7, M4(-8.954e-02, -2.433e-01, 1.061e-01, -1.242e-01, -6.749e-02, 1.916e-02, -3.863e-03, 4.032e-02, -3.083e-02, -1.311e-01, -8.384e-02, 4.916e-02, 2.500e-01, 2.014e-02, 1.214e-01, 1.388e-01)); + r += mul(s2_8, M4(4.107e-03, 9.941e-02, 3.767e-02, 7.627e-02, -5.683e-02, -6.008e-04, -7.914e-03, -3.822e-02, -5.795e-02, 4.685e-02, -4.567e-02, -1.056e-02, -4.484e-02, -3.449e-02, 1.140e-01, 3.265e-02)); + r += mul(s3_0, M4(-2.306e-02, 2.921e-02, -3.826e-02, 8.888e-02, -7.899e-02, -2.019e-02, -6.082e-02, -1.194e-01, -7.656e-02, -1.032e-02, -1.433e-02, 1.198e-01, 3.798e-02, -3.611e-02, 1.199e-01, -9.537e-02)); + r += mul(s3_1, M4(2.121e-02, -9.145e-02, 8.246e-02, 9.169e-03, -4.139e-01, 6.092e-02, -8.756e-02, 8.627e-03, 1.045e-01, -3.856e-02, 5.856e-02, -1.957e-01, 7.316e-02, -6.812e-02, 3.636e-03, 6.997e-03)); + r += mul(s3_2, M4(2.658e-02, 7.785e-02, -3.263e-02, -1.520e-02, 1.612e-01, 4.608e-02, -7.045e-02, -7.303e-03, -1.109e-02, 1.272e-02, -3.647e-02, -3.375e-02, -7.386e-02, -6.704e-02, 1.301e-02, -2.946e-03)); + r += mul(s3_3, M4(-1.009e-01, 1.465e-01, -2.605e-02, 9.497e-02, -1.340e-01, -1.414e-01, -3.507e-02, 1.428e-01, -2.289e-01, -1.389e-02, 5.820e-02, -1.047e-01, -1.330e-01, 1.576e-02, 7.398e-02, -6.534e-02)); + r += mul(s3_4, M4(-4.357e-01, 7.855e-02, -2.403e-01, 1.323e-01, -4.297e-01, 4.858e-02, -2.305e-01, 1.827e-01, -1.741e-01, 1.311e-01, -7.600e-02, -2.465e-02, -8.542e-02, 8.355e-02, -2.918e-01, -6.781e-02)); + r += mul(s3_5, M4(8.516e-02, 5.179e-02, -4.071e-02, 2.149e-01, 3.137e-02, 1.405e-01, -1.702e-01, 1.147e-01, -1.077e-02, 2.508e-01, -9.565e-02, -2.340e-02, -5.770e-03, -2.109e-02, -2.620e-02, -2.459e-02)); + r += mul(s3_6, M4(1.820e-01, 5.519e-02, -4.102e-02, -2.981e-02, -9.297e-02, 4.135e-02, -1.152e-01, 1.431e-01, 1.658e-01, 1.139e-03, 8.223e-02, -3.856e-02, 3.631e-02, -3.451e-02, 7.224e-02, 9.099e-02)); + r += mul(s3_7, M4(2.301e-01, -1.471e-02, 3.288e-02, -7.237e-03, 2.109e-01, 2.026e-01, -2.288e-01, -2.980e-02, 9.537e-02, -4.301e-02, 1.508e-01, -4.254e-02, 6.804e-02, -3.899e-02, 4.478e-02, -1.037e-01)); + r += mul(s3_8, M4(-4.881e-02, 1.157e-01, 3.404e-02, -3.252e-03, 6.786e-02, -5.432e-02, -9.315e-02, -6.633e-02, -3.822e-02, 1.650e-02, 4.145e-02, 5.647e-02, -1.303e-01, -1.228e-01, 9.147e-02, 1.600e-01)); + r += mul(s4_0, M4(-1.691e-01, 2.079e-02, -7.759e-02, 4.724e-02, -2.213e-02, 5.189e-02, 9.549e-04, 5.798e-03, 8.639e-02, -4.559e-03, 6.095e-02, 4.818e-02, -1.305e-01, -5.503e-02, -1.809e-02, 9.957e-03)); + r += mul(s4_1, M4(1.043e-01, 6.596e-02, -2.271e-01, -1.330e-01, -2.238e-02, 6.202e-02, 2.146e-02, -2.418e-03, -2.807e-01, 6.046e-02, 1.510e-01, 8.035e-02, -1.384e-01, 2.720e-02, -5.025e-02, -7.221e-02)); + r += mul(s4_2, M4(2.902e-02, 5.965e-02, -8.962e-02, -1.041e-01, -3.013e-02, 7.582e-03, -5.418e-02, 1.194e-02, -2.878e-02, 1.109e-02, 6.175e-02, 2.218e-01, -3.740e-02, 7.279e-02, 6.343e-02, -1.504e-01)); + r += mul(s4_3, M4(2.594e-03, 6.859e-02, -8.585e-02, -1.093e-01, 1.127e-01, 2.987e-02, -4.764e-03, -7.266e-02, -2.309e-01, -8.483e-02, 2.689e-02, 9.467e-02, -1.932e-01, 3.025e-02, -1.774e-01, 2.257e-02)); + r += mul(s4_4, M4(-3.380e-02, -2.715e-01, 2.109e-01, 1.939e-01, 3.544e-01, 1.760e-01, 1.256e-01, 3.389e-01, -1.171e-01, 8.500e-02, -7.548e-03, -6.249e-03, 1.652e-01, 2.683e-01, 1.176e-01, 1.076e-01)); + r += mul(s4_5, M4(3.852e-02, 5.116e-02, 7.302e-02, 2.183e-02, -6.533e-02, -1.307e-01, -3.048e-02, -5.867e-02, 4.960e-02, -1.511e-01, 1.733e-01, 9.017e-02, -5.388e-02, -9.929e-02, 8.055e-02, 7.235e-03)); + r += mul(s4_6, M4(1.582e-02, 1.934e-02, -1.586e-02, 5.434e-02, 5.866e-02, 6.706e-02, -7.785e-02, -9.925e-03, -1.298e-01, -3.682e-02, 1.045e-02, -2.375e-02, 5.247e-02, 6.268e-02, 3.628e-02, 1.363e-01)); + r += mul(s4_7, M4(6.417e-02, 9.830e-02, -5.222e-02, -1.481e-01, 4.089e-02, -2.354e-02, -1.246e-01, -1.849e-01, 7.210e-02, -1.505e-01, 4.560e-02, -3.750e-02, 9.564e-02, 1.137e-01, 6.895e-02, -3.109e-02)); + r += mul(s4_8, M4(-8.126e-03, -1.664e-02, 4.428e-02, 3.837e-02, -6.828e-02, 6.690e-02, -5.074e-02, 3.574e-02, 4.772e-02, -3.342e-03, -2.924e-02, -1.893e-03, 4.866e-03, -4.318e-02, 1.736e-03, -3.455e-02)); + r += mul(s5_0, M4(-8.940e-02, 7.315e-02, -5.067e-02, 3.852e-02, 6.637e-02, 6.184e-02, -5.849e-02, 4.301e-02, -5.232e-02, 6.418e-03, 2.258e-02, 5.604e-02, -4.454e-02, -1.157e-03, 1.682e-03, 9.476e-03)); + r += mul(s5_1, M4(8.366e-02, 6.140e-03, -6.720e-02, -1.008e-01, -8.472e-02, 1.509e-02, -1.952e-02, -1.084e-02, 1.948e-02, 8.458e-02, -1.669e-03, 4.498e-03, -2.013e-01, -1.214e-01, -7.837e-03, -7.837e-02)); + r += mul(s5_2, M4(-7.940e-02, 2.387e-02, -6.337e-02, 5.181e-02, -1.316e-01, 3.389e-02, -5.279e-02, 2.279e-02, -1.102e-01, -7.101e-02, -9.703e-03, 2.051e-02, 2.353e-02, -4.424e-02, 1.273e-02, -9.692e-02)); + r += mul(s5_3, M4(5.557e-02, 7.451e-02, 3.178e-03, -4.212e-02, 3.122e-02, -2.396e-02, -8.966e-02, -9.927e-04, 2.508e-02, -5.133e-03, 8.334e-03, -2.729e-02, -5.904e-02, 3.528e-02, -9.926e-02, -1.284e-01)); + r += mul(s5_4, M4(1.171e-01, -1.991e-02, 3.233e-02, -1.152e-01, -2.104e-02, 8.669e-02, -2.160e-01, -1.384e-02, -3.869e-02, -8.916e-02, -4.776e-02, -5.731e-02, -1.426e-02, -1.452e-01, -9.362e-02, 5.315e-03)); + r += mul(s5_5, M4(-9.686e-02, 2.843e-02, -1.125e-03, 2.183e-02, -9.590e-02, -1.502e-01, 4.370e-02, -1.739e-01, -6.316e-02, -1.170e-01, 1.243e-01, -1.580e-01, 1.111e-01, -1.091e-01, 2.264e-02, -1.214e-02)); + r += mul(s5_6, M4(-5.885e-02, -3.099e-02, 1.093e-03, 5.912e-02, -4.486e-02, 3.087e-02, -2.398e-02, -6.617e-02, 1.619e-03, -1.174e-02, 1.940e-02, -6.470e-02, 1.710e-01, 3.624e-02, 4.390e-02, 5.356e-02)); + r += mul(s5_7, M4(-7.138e-02, 6.516e-02, -3.867e-02, 2.541e-02, 1.851e-01, -7.945e-02, 6.494e-02, -2.329e-01, 3.373e-02, -5.117e-02, 4.693e-02, -1.827e-02, -7.217e-02, -7.580e-02, 7.111e-02, 1.205e-01)); + r += mul(s5_8, M4(-4.836e-02, 7.135e-03, 8.069e-03, 2.399e-02, -7.394e-03, -3.601e-02, 1.617e-01, -7.325e-02, 1.779e-02, 1.871e-02, -6.577e-03, 5.262e-02, 3.870e-02, -1.992e-02, 4.046e-03, 8.874e-02)); + r += mul(s6_0, M4(-9.499e-02, 1.140e-02, -2.421e-02, 2.122e-02, -2.021e-01, -3.334e-02, 3.182e-02, -1.274e-01, -8.233e-03, -2.705e-02, -5.592e-03, 3.303e-02, 4.097e-02, -5.783e-02, 1.346e-01, -3.961e-02)); + r += mul(s6_1, M4(6.091e-02, 6.245e-02, -4.442e-02, 1.453e-02, 2.683e-01, 5.106e-02, -4.170e-02, 8.964e-02, -5.980e-02, -1.027e-01, 2.440e-02, -1.843e-01, -6.987e-02, 1.362e-01, 2.607e-02, 1.640e-01)); + r += mul(s6_2, M4(-1.758e-02, 4.428e-02, -2.778e-02, 3.710e-02, -3.928e-02, -1.444e-01, 8.485e-02, -2.627e-01, -8.773e-03, -4.307e-02, 3.662e-02, -4.266e-02, -1.248e-01, -4.775e-02, 1.386e-01, -4.849e-02)); + r += mul(s6_3, M4(2.991e-02, 1.300e-02, -3.722e-02, -1.127e-02, -6.081e-02, -6.664e-02, 2.307e-02, -1.537e-01, -9.816e-02, 6.137e-02, -1.776e-01, -7.971e-02, 1.737e-01, 1.321e-01, 1.894e-02, 6.328e-02)); + r += mul(s6_4, M4(2.797e-01, -4.712e-02, 9.233e-02, 1.622e-02, -3.759e-01, -3.643e-01, -3.666e-01, -2.763e-01, -1.820e-01, -9.981e-02, -1.633e-01, 5.873e-02, 3.666e-01, 1.277e-02, 2.019e-01, 7.803e-02)); + r += mul(s6_5, M4(2.367e-02, -8.355e-02, -4.754e-03, -4.127e-02, -2.992e-02, -2.973e-02, -2.183e-01, -1.935e-01, 4.919e-02, 1.440e-01, -1.552e-01, 1.123e-02, 3.556e-02, -2.278e-01, 7.747e-02, -2.039e-01)); + r += mul(s6_6, M4(2.708e-02, -4.988e-02, 1.906e-02, 6.498e-02, -6.744e-02, -6.364e-02, -6.158e-02, -1.413e-01, 5.636e-02, 2.833e-02, -2.499e-02, -5.730e-02, 2.125e-02, 4.344e-02, -3.250e-02, -1.461e-03)); + r += mul(s6_7, M4(2.917e-02, 5.699e-02, -9.029e-03, 5.299e-03, -7.207e-03, -3.742e-04, -2.350e-01, -5.041e-01, -5.733e-02, -1.208e-01, -1.018e-01, -8.049e-02, 6.673e-02, 1.619e-02, 5.462e-02, -9.444e-02)); + r += mul(s6_8, M4(-2.370e-02, 4.235e-02, -9.635e-03, 7.599e-02, -5.283e-02, 1.118e-01, 8.985e-04, -2.380e-01, 5.308e-02, -4.938e-02, -2.485e-02, -4.421e-02, -1.214e-01, -1.494e-01, 1.557e-01, 1.011e-01)); + r += mul(s7_0, M4(8.055e-03, -1.224e-01, 8.803e-02, -1.356e-01, -2.769e-02, -8.317e-02, 5.839e-02, 8.470e-03, 4.342e-02, -1.724e-02, 2.101e-02, -9.978e-02, 1.580e-02, 7.303e-03, -6.736e-03, -2.369e-02)); + r += mul(s7_1, M4(2.547e-01, 3.471e-01, 1.057e-01, 2.483e-02, 5.170e-02, 7.626e-02, 3.029e-02, 1.112e-01, -1.420e-01, 6.397e-03, 8.291e-02, 2.148e-02, 4.163e-02, 3.154e-02, 3.481e-04, -2.553e-02)); + r += mul(s7_2, M4(-3.976e-02, 1.646e-01, 1.366e-02, -6.070e-02, 3.627e-02, -4.193e-02, 3.261e-02, -1.633e-02, 7.845e-02, 1.841e-02, 4.332e-02, 1.388e-01, -9.223e-02, -5.002e-02, 7.201e-02, -1.203e-01)); + r += mul(s7_3, M4(-2.471e-01, -1.349e-01, -1.073e-01, -1.309e-01, -6.461e-02, 1.575e-02, -5.407e-03, -9.095e-04, 7.147e-02, -5.513e-02, -9.230e-02, -9.799e-02, 1.026e-01, 3.824e-02, -5.407e-02, 3.357e-02)); + r += mul(s7_4, M4(2.224e-01, 3.955e-01, 6.146e-01, -3.382e-01, -6.338e-04, 4.467e-02, 1.799e-02, 1.858e-01, -6.463e-02, -2.434e-02, -5.017e-02, -6.841e-02, 8.690e-02, 7.333e-02, -5.250e-02, -1.024e-01)); + r += mul(s7_5, M4(-5.744e-03, -7.747e-02, -1.165e-01, 6.425e-02, 7.534e-04, 2.417e-02, 3.699e-02, -1.184e-03, 1.170e-01, 1.713e-01, -7.106e-02, -7.859e-02, -5.088e-02, 1.855e-02, 8.254e-03, 1.640e-02)); + r += mul(s7_6, M4(2.489e-01, -1.301e-02, 8.608e-02, -8.025e-03, 1.432e-01, -1.112e-02, 3.130e-02, 4.804e-02, 4.789e-02, 3.900e-02, 5.997e-02, -6.963e-02, -1.209e-01, -2.880e-02, -5.815e-02, -1.188e-02)); + r += mul(s7_7, M4(6.111e-02, 1.857e-01, -4.093e-02, -2.237e-01, -1.180e-01, -3.436e-03, -5.388e-02, 3.630e-02, -9.795e-02, -5.991e-02, 1.480e-01, -4.211e-02, -5.772e-02, -7.684e-03, -8.719e-02, -9.107e-02)); + r += mul(s7_8, M4(1.557e-01, -1.302e-02, 3.695e-02, -1.460e-01, 7.050e-02, 6.253e-02, 2.880e-02, 2.843e-02, 4.670e-04, -1.595e-01, 8.835e-02, -6.949e-02, 1.607e-02, 1.309e-03, -1.750e-02, -3.881e-02)); + r += V4(7.646e-03, -3.306e-02, 1.535e-02, 6.065e-03); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 5 +//!DESC conv4 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.218e-02, -9.156e-03, 1.697e-02, -3.071e-02, 1.779e-03, 2.556e-02, -3.235e-02, -4.232e-02, 1.762e-02, -3.808e-02, 1.125e-02, 1.392e-02, 4.069e-03, 2.945e-03, -7.095e-03, -1.031e-02)); + r += mul(s0_1, M4(-6.904e-03, -7.007e-02, -8.971e-03, 4.521e-02, 2.397e-02, -3.704e-02, -1.335e-02, -4.669e-02, -2.479e-02, 1.644e-02, 6.426e-02, 3.375e-02, -1.541e-03, -6.073e-02, 1.416e-02, -3.269e-02)); + r += mul(s0_2, M4(-2.058e-02, 7.011e-02, -5.697e-02, 4.020e-02, 7.338e-03, -2.103e-03, 2.206e-02, 6.535e-03, -3.231e-02, 3.870e-02, -3.860e-03, -3.670e-02, 2.495e-02, -2.574e-02, 1.926e-02, -4.973e-02)); + r += mul(s0_3, M4(-4.293e-02, -3.056e-02, -1.686e-01, 1.441e-02, -1.059e-01, -1.344e-01, 2.379e-02, -4.777e-02, -3.675e-02, 1.265e-01, -5.420e-02, -3.269e-02, -3.922e-03, 5.347e-02, -2.844e-02, -1.718e-02)); + r += mul(s0_4, M4(-8.574e-02, -9.176e-02, -1.966e-01, 1.048e-01, -8.294e-03, 1.117e-01, 9.178e-02, 1.257e-01, -4.588e-02, -5.564e-02, -1.730e-01, 1.361e-02, 6.663e-02, 1.381e-01, -1.630e-02, -4.021e-02)); + r += mul(s0_5, M4(4.136e-03, -7.935e-02, -8.286e-02, -1.222e-01, 4.729e-02, 5.418e-02, 8.974e-02, 1.665e-02, -2.416e-02, -6.075e-02, -1.305e-01, 3.602e-02, -3.502e-02, 1.021e-02, -2.809e-02, 2.753e-02)); + r += mul(s0_6, M4(3.777e-02, 2.017e-02, 3.033e-02, -4.287e-03, 2.545e-02, 1.474e-02, 4.763e-02, -3.844e-02, -8.892e-03, 1.642e-02, 8.656e-02, -8.221e-03, 2.291e-03, -2.849e-02, -1.719e-02, -5.023e-03)); + r += mul(s0_7, M4(6.786e-02, -3.284e-03, 6.808e-02, -7.077e-04, 3.924e-02, 3.459e-02, 3.492e-02, -3.447e-02, -1.055e-01, -1.112e-01, 2.040e-01, -5.955e-02, 5.243e-03, -4.751e-02, -1.151e-02, 1.004e-01)); + r += mul(s0_8, M4(-4.098e-02, 7.437e-03, -2.620e-02, -3.189e-02, -1.628e-02, 8.777e-03, 8.764e-03, -7.779e-03, 1.184e-01, 9.020e-03, 7.741e-02, -8.047e-02, 2.824e-02, 5.073e-03, -4.022e-02, 6.762e-02)); + r += mul(s1_0, M4(4.494e-02, -6.119e-02, 4.058e-02, 5.218e-03, 1.067e-02, 2.838e-02, 2.448e-02, 4.347e-02, -6.863e-03, 1.615e-02, 2.493e-02, -2.700e-02, 4.393e-03, 9.190e-03, -2.479e-02, -2.607e-02)); + r += mul(s1_1, M4(-1.614e-02, -2.665e-02, 5.162e-02, 4.714e-02, -2.552e-02, 3.422e-02, -2.133e-02, -2.968e-02, 8.076e-03, -1.551e-02, -2.935e-02, -3.791e-03, 5.501e-02, -7.794e-04, 1.778e-02, 3.403e-02)); + r += mul(s1_2, M4(4.270e-02, -1.920e-02, 1.891e-02, 1.107e-01, -5.436e-03, -2.268e-02, 1.225e-03, 6.840e-03, 5.458e-03, -3.349e-03, 1.522e-02, -4.326e-04, 3.125e-02, -3.705e-02, 9.931e-03, -2.976e-03)); + r += mul(s1_3, M4(-6.230e-03, 7.126e-02, -8.891e-02, 5.800e-02, 4.454e-02, 9.799e-02, -4.127e-02, 3.676e-02, -1.231e-02, 1.625e-01, -5.166e-02, -3.153e-02, -4.016e-01, -1.090e-03, 2.476e-01, -4.948e-03)); + r += mul(s1_4, M4(-1.130e-02, 2.329e-01, -2.248e-01, -2.242e-02, 3.017e-02, -7.676e-02, -8.103e-02, -1.202e-01, 6.177e-02, -8.543e-03, 4.122e-02, -1.608e-02, -5.273e-02, -4.625e-02, 5.280e-01, -1.035e-01)); + r += mul(s1_5, M4(1.048e-01, -4.992e-02, -4.728e-02, -8.256e-02, -1.446e-02, -1.091e-01, -3.986e-03, 3.267e-01, -5.484e-03, -4.397e-02, 1.283e-02, -1.320e-02, -1.774e-02, 2.067e-02, 7.630e-02, -9.716e-02)); + r += mul(s1_6, M4(-7.980e-02, 4.070e-03, 1.274e-02, 2.572e-02, 5.635e-02, -3.270e-02, 5.764e-02, -9.156e-02, -1.360e-02, -4.462e-03, -1.850e-02, 6.472e-03, -6.115e-02, -5.032e-02, -2.241e-02, -2.497e-02)); + r += mul(s1_7, M4(-2.255e-02, -4.712e-02, 1.017e-01, 2.487e-02, 1.273e-02, -1.527e-02, -9.580e-02, 1.803e-01, -4.787e-02, 5.310e-03, -8.233e-02, -6.115e-02, -2.104e-01, 2.682e-02, 3.220e-02, -8.610e-02)); + r += mul(s1_8, M4(9.152e-03, -5.382e-02, -2.372e-02, 3.469e-02, 1.743e-02, 1.022e-02, -1.876e-03, -1.372e-01, 3.201e-02, 8.503e-03, -1.625e-02, 3.147e-02, 3.708e-02, 1.029e-02, -1.535e-02, -1.202e-01)); + r += mul(s2_0, M4(-3.319e-02, 1.736e-02, 4.131e-03, -1.563e-02, -3.905e-02, 5.418e-02, -6.206e-02, -2.935e-03, 2.800e-02, -2.023e-02, -2.784e-02, 3.006e-03, -6.059e-03, 6.524e-02, -2.346e-02, -1.736e-02)); + r += mul(s2_1, M4(-4.049e-02, 6.725e-03, -1.085e-02, 4.035e-03, -1.821e-02, 1.768e-01, -8.133e-02, -3.072e-02, 4.867e-02, -6.692e-02, -7.153e-02, -2.925e-02, -2.937e-02, 4.958e-02, 2.931e-02, 2.594e-02)); + r += mul(s2_2, M4(1.404e-02, 1.566e-02, -2.191e-02, 1.534e-02, 4.273e-02, 1.018e-02, 3.499e-02, -5.138e-02, 3.985e-02, -1.176e-01, -1.058e-01, 1.436e-02, -2.559e-02, 1.130e-02, -2.797e-02, 3.066e-02)); + r += mul(s2_3, M4(3.445e-02, 1.171e-02, -3.006e-02, 3.256e-03, -3.482e-02, 4.140e-02, -4.909e-02, -6.044e-02, 1.321e-01, -1.897e-01, -8.159e-02, -2.112e-02, 1.763e-02, -1.096e-02, 5.685e-02, 4.375e-02)); + r += mul(s2_4, M4(-4.789e-02, -4.563e-03, -4.865e-02, -5.439e-02, 5.289e-02, -4.466e-01, -3.885e-01, -7.680e-03, 1.108e-01, 3.201e-02, -1.204e-01, 8.638e-02, -1.449e-02, 3.291e-01, -6.534e-02, 1.375e-01)); + r += mul(s2_5, M4(-7.094e-02, -9.337e-02, -2.026e-02, -4.696e-02, -9.318e-02, -1.711e-01, 1.475e-03, 2.454e-01, 3.542e-02, -1.756e-01, 3.723e-02, 2.257e-01, 1.025e-01, -1.602e-01, -1.003e-01, -9.612e-02)); + r += mul(s2_6, M4(6.853e-02, -7.465e-02, -5.793e-02, -1.559e-02, 1.170e-01, -4.489e-02, -1.518e-02, -1.635e-01, 1.255e-01, -6.608e-02, -1.771e-02, -2.516e-02, -2.612e-05, 5.702e-02, -4.380e-02, -1.893e-02)); + r += mul(s2_7, M4(7.728e-02, 7.197e-02, 4.858e-02, -9.770e-02, 1.490e-02, -9.032e-03, -8.429e-02, 1.293e-02, 1.367e-01, -2.023e-02, 8.069e-02, 1.568e-01, 1.764e-01, 2.864e-02, 3.665e-02, -1.475e-01)); + r += mul(s2_8, M4(1.070e-02, -7.340e-03, 2.201e-02, -4.505e-02, 1.376e-02, 3.961e-02, -5.880e-02, -2.605e-01, 1.372e-01, -3.956e-02, 3.714e-02, 1.820e-02, -2.554e-02, -1.309e-01, -9.963e-03, -1.017e-01)); + r += mul(s3_0, M4(-8.436e-02, 7.121e-02, -5.202e-02, -2.004e-03, -2.206e-02, 4.483e-02, 5.284e-02, -1.632e-02, -1.945e-02, 4.127e-02, -3.915e-02, -3.035e-02, -1.770e-02, 2.422e-02, -4.709e-03, -1.520e-02)); + r += mul(s3_1, M4(-5.321e-02, 1.762e-01, -2.945e-02, 7.592e-02, -1.644e-02, 1.135e-01, -7.430e-03, -3.749e-02, -5.811e-03, 1.906e-02, 2.663e-02, 3.511e-02, 3.281e-03, -1.015e-01, -3.120e-02, -6.097e-02)); + r += mul(s3_2, M4(-1.092e-03, 3.153e-02, -5.503e-03, -1.146e-02, 5.314e-02, -2.881e-03, -3.640e-02, -9.900e-02, 4.377e-03, 5.991e-02, -2.789e-02, -3.115e-02, -2.704e-03, -1.549e-02, 2.186e-02, 1.206e-02)); + r += mul(s3_3, M4(-7.141e-02, -1.261e-01, 1.962e-01, 6.488e-02, 2.605e-02, -2.440e-02, -8.179e-02, -3.468e-03, -4.819e-02, -3.724e-02, 7.258e-02, -3.404e-04, 3.014e-02, 3.029e-02, -1.218e-02, -3.531e-02)); + r += mul(s3_4, M4(-1.182e-01, -6.580e-02, -1.255e-02, 9.880e-02, -3.174e-02, -9.890e-02, -7.282e-03, -9.129e-02, -4.213e-02, 4.028e-02, 9.673e-03, 6.345e-02, -3.783e-03, -1.110e-01, 2.652e-02, 5.203e-02)); + r += mul(s3_5, M4(-2.349e-02, -8.691e-02, 4.553e-03, -8.088e-02, -6.252e-02, -7.572e-02, 6.911e-02, -1.130e-01, -4.595e-02, -1.190e-01, 6.714e-02, 1.355e-02, -1.892e-02, 1.295e-01, -1.577e-01, -1.358e-02)); + r += mul(s3_6, M4(2.027e-01, 1.257e-01, -3.358e-02, -4.551e-03, 6.611e-02, 2.388e-02, 1.469e-02, -6.323e-02, -1.892e-02, 2.749e-02, 2.641e-02, 1.404e-03, 2.111e-02, -1.817e-02, -2.992e-02, -2.689e-02)); + r += mul(s3_7, M4(-1.821e-01, -9.000e-02, 3.136e-02, -4.891e-04, 1.247e-01, 4.818e-02, 1.421e-01, -1.236e-01, -1.147e-02, 6.238e-02, 5.303e-02, 1.022e-02, 2.910e-02, -1.084e-02, 3.081e-02, 5.174e-02)); + r += mul(s3_8, M4(-1.797e-02, -5.589e-03, -1.555e-02, -6.042e-02, 1.961e-02, 6.395e-02, 1.150e-01, -2.035e-03, 3.739e-02, 4.287e-02, 5.316e-02, -9.278e-02, -1.505e-02, 1.129e-01, 1.067e-01, 1.242e-01)); + r += mul(s4_0, M4(3.162e-02, -9.156e-02, 4.139e-02, -5.570e-03, 2.892e-02, 2.495e-02, -2.536e-02, 5.830e-04, -1.000e-02, -8.313e-02, -1.337e-01, -1.601e-01, -4.115e-02, 3.800e-02, 2.641e-02, 1.219e-02)); + r += mul(s4_1, M4(-5.936e-03, -7.623e-02, 3.342e-02, 8.082e-02, -1.409e-02, 7.734e-03, -4.970e-02, -2.206e-02, -8.357e-02, 9.654e-02, -6.065e-02, 8.079e-03, 7.353e-02, -1.206e-01, -5.928e-03, 1.667e-02)); + r += mul(s4_2, M4(7.881e-04, 6.908e-02, -6.028e-03, -1.618e-02, 4.818e-04, 3.488e-02, 1.759e-03, 2.699e-02, 2.713e-02, 3.136e-02, -2.399e-02, -1.504e-03, -6.237e-03, -4.065e-02, 5.615e-02, -3.163e-03)); + r += mul(s4_3, M4(-2.860e-02, 1.294e-01, -2.332e-02, 1.808e-02, -1.743e-02, -5.386e-02, -3.762e-02, -4.321e-02, -2.588e-01, 1.398e-01, 1.353e-01, -8.920e-02, 2.448e-02, 4.920e-02, -1.224e-02, -1.614e-02)); + r += mul(s4_4, M4(1.204e-01, -8.509e-02, -4.924e-02, 6.064e-03, 3.034e-02, 1.574e-02, 4.869e-02, 2.665e-02, 6.871e-02, -6.866e-02, 1.735e-02, 1.449e-01, -1.354e-01, 7.863e-02, 6.953e-02, -1.963e-03)); + r += mul(s4_5, M4(1.099e-01, 5.797e-02, -1.392e-01, -4.072e-02, 5.068e-02, -4.678e-02, 3.621e-04, -6.722e-02, -7.565e-03, -6.160e-02, 2.661e-02, -3.352e-02, -1.439e-02, 1.805e-02, -3.017e-02, 2.053e-02)); + r += mul(s4_6, M4(-6.244e-02, -2.477e-04, 1.592e-02, 8.183e-02, 4.341e-02, -3.616e-02, -3.582e-02, -7.649e-03, 1.282e-01, -2.958e-02, 6.122e-02, 4.400e-02, -8.084e-02, 2.607e-02, -1.274e-02, -3.702e-02)); + r += mul(s4_7, M4(4.572e-02, -7.332e-02, -1.435e-02, -1.354e-03, -7.438e-03, 3.358e-02, -1.565e-02, 2.219e-02, -2.104e-01, 1.387e-01, -1.787e-01, 9.578e-03, -3.047e-02, 8.672e-03, 1.326e-02, 7.549e-02)); + r += mul(s4_8, M4(2.942e-02, 5.386e-02, 7.478e-02, -1.506e-02, -3.262e-02, 5.324e-02, 1.510e-02, -7.526e-03, -9.404e-02, -2.948e-02, -1.159e-01, 5.612e-02, 1.017e-02, -2.557e-02, 4.533e-02, -7.814e-03)); + r += mul(s5_0, M4(1.273e-02, -2.335e-02, 4.686e-04, -6.832e-02, -5.006e-03, 1.206e-01, 2.255e-02, 2.793e-02, 2.599e-02, -2.800e-02, -6.195e-02, -1.542e-02, 2.949e-02, 2.249e-02, -1.452e-02, -3.281e-02)); + r += mul(s5_1, M4(-2.655e-02, -7.515e-02, 5.225e-02, 2.554e-02, -3.540e-03, 1.848e-01, -4.751e-02, 6.319e-02, -1.683e-02, -8.168e-02, -3.264e-04, 1.614e-02, -8.596e-02, 1.572e-01, -7.540e-02, -3.979e-02)); + r += mul(s5_2, M4(2.264e-02, 2.874e-02, 1.692e-02, -2.441e-02, 8.897e-03, 2.086e-01, -2.564e-02, -6.567e-02, -2.882e-02, 2.852e-03, -1.204e-02, 2.980e-02, 9.673e-02, -8.394e-02, -1.061e-02, -5.353e-02)); + r += mul(s5_3, M4(-4.644e-02, 2.119e-01, -4.925e-02, 4.407e-02, -2.709e-02, 3.256e-02, 1.203e-01, 7.638e-02, -8.119e-02, -3.423e-02, 5.092e-02, 4.114e-02, -3.080e-02, -4.722e-02, 3.639e-02, 3.353e-02)); + r += mul(s5_4, M4(1.231e-02, -2.017e-01, 2.356e-01, 4.422e-02, -1.923e-01, 2.277e-01, 2.554e-01, -1.027e-01, 7.785e-02, 1.668e-01, 2.965e-02, 7.079e-02, 1.176e-01, -3.389e-01, -8.714e-02, 7.058e-02)); + r += mul(s5_5, M4(-9.489e-03, 2.903e-02, -3.210e-02, -1.888e-01, 7.274e-02, -1.247e-01, -3.767e-02, -4.563e-01, 4.031e-02, -6.312e-02, 5.967e-02, -1.956e-02, -1.023e-01, 1.412e-01, -4.549e-02, 7.961e-02)); + r += mul(s5_6, M4(-1.957e-02, 2.566e-02, -1.645e-02, -4.895e-02, -7.324e-02, 5.104e-02, 1.960e-02, 1.259e-03, -8.464e-02, 1.944e-02, 3.265e-02, 3.044e-02, 6.595e-02, -2.463e-02, 8.220e-02, -3.774e-02)); + r += mul(s5_7, M4(2.361e-02, 4.172e-02, -2.336e-01, -1.671e-02, -2.482e-01, 1.362e-01, -5.941e-02, -2.531e-01, -1.605e-01, 5.645e-02, -1.127e-02, -2.137e-04, -1.616e-02, -5.168e-03, 5.329e-02, -4.315e-02)); + r += mul(s5_8, M4(-3.904e-02, 3.886e-02, -4.339e-02, -1.119e-02, 1.983e-02, -1.485e-02, -4.235e-02, -2.184e-01, 5.063e-03, -2.330e-02, -1.304e-02, -2.295e-02, 5.199e-02, -2.578e-03, 1.972e-02, 1.030e-02)); + r += mul(s6_0, M4(-2.338e-02, 5.055e-02, 1.732e-03, -9.885e-03, -2.915e-02, 8.990e-02, 2.518e-02, 2.124e-02, -1.774e-02, -3.234e-02, 6.783e-02, -1.173e-02, 9.443e-03, -2.168e-02, -1.047e-02, -3.308e-02)); + r += mul(s6_1, M4(-3.121e-02, 5.847e-02, -1.311e-02, 9.991e-03, 7.540e-02, 3.953e-02, 1.283e-02, -1.149e-01, -2.369e-02, 1.301e-01, 1.979e-03, -1.296e-03, -2.955e-02, -1.485e-02, -8.727e-02, -1.772e-02)); + r += mul(s6_2, M4(3.338e-02, -2.252e-03, -2.092e-02, 1.192e-03, 9.133e-04, -2.904e-02, 6.185e-03, -6.905e-02, -2.519e-02, 7.629e-02, 4.223e-02, -9.257e-02, 8.333e-03, -3.317e-02, 3.203e-02, 6.154e-02)); + r += mul(s6_3, M4(1.712e-02, 2.045e-02, -5.793e-02, -2.341e-03, 8.039e-02, -8.269e-02, 2.075e-02, -4.721e-02, 9.015e-02, -3.655e-02, 6.540e-02, 3.960e-02, 1.583e-02, -7.436e-02, -1.300e-02, 1.814e-02)); + r += mul(s6_4, M4(2.184e-02, -1.733e-02, -1.068e-01, 2.435e-02, -8.370e-02, -5.671e-01, -2.107e-02, 4.727e-02, -4.600e-02, 4.284e-02, -7.898e-02, -1.697e-01, 1.979e-02, -5.546e-02, 1.361e-01, 9.164e-02)); + r += mul(s6_5, M4(1.320e-02, -1.454e-01, 2.815e-02, -5.059e-02, -1.163e-03, -1.499e-01, 3.430e-02, -2.612e-02, 1.303e-03, 9.012e-03, -9.780e-02, 1.160e-01, -4.786e-02, 1.464e-02, -1.254e-02, -6.177e-02)); + r += mul(s6_6, M4(1.925e-02, -3.924e-04, -5.408e-02, 8.060e-03, -1.343e-01, -7.555e-02, 8.500e-02, 7.564e-03, -8.056e-03, 9.773e-02, -6.052e-02, 4.965e-02, -2.164e-02, 6.185e-02, -1.131e-01, -2.691e-02)); + r += mul(s6_7, M4(1.581e-02, 1.541e-02, -9.285e-02, -6.290e-02, 2.561e-02, -2.064e-01, 1.334e-01, -4.603e-02, 3.075e-01, 1.706e-02, -5.560e-02, 1.086e-01, 1.394e-01, -2.809e-02, -8.801e-02, -3.920e-02)); + r += mul(s6_8, M4(-2.864e-02, 3.645e-03, -3.122e-02, -4.279e-02, 6.721e-03, -5.037e-02, 5.367e-02, -5.210e-02, 1.086e-02, 8.553e-02, -2.049e-02, 1.921e-01, -1.198e-03, 5.278e-02, 3.162e-03, 5.703e-02)); + r += mul(s7_0, M4(8.237e-04, 7.565e-02, -3.261e-02, 1.459e-02, -3.049e-02, -6.221e-02, -3.988e-02, -2.718e-02, 1.579e-02, -6.823e-02, 9.483e-03, 3.391e-03, -1.150e-02, 2.973e-02, 4.140e-02, 1.146e-02)); + r += mul(s7_1, M4(-6.292e-02, -3.017e-02, -2.304e-02, -4.184e-02, 6.524e-03, -9.847e-02, 4.331e-03, -2.763e-03, 2.436e-02, 7.249e-02, -4.674e-02, 6.134e-02, 3.011e-02, 3.430e-01, 7.222e-02, 1.344e-01)); + r += mul(s7_2, M4(3.360e-02, -5.552e-02, -4.943e-02, 3.161e-02, -1.759e-02, 2.561e-02, -1.584e-02, 1.067e-02, -6.018e-03, -2.191e-02, 1.001e-01, -5.162e-02, 4.172e-02, -7.222e-02, -4.320e-02, -1.671e-01)); + r += mul(s7_3, M4(-1.997e-02, 6.211e-02, 2.093e-02, 2.590e-02, 4.681e-02, -1.864e-01, 2.634e-02, -2.768e-02, -1.375e-03, 5.001e-03, 5.199e-02, 7.349e-02, 2.499e-02, 9.689e-02, -5.260e-02, -4.920e-02)); + r += mul(s7_4, M4(-2.316e-02, 2.798e-02, -2.930e-03, 2.767e-02, 7.590e-02, -3.101e-01, 1.334e-01, 1.037e-01, -1.135e-02, 4.434e-02, -1.875e-01, -1.480e-01, -3.558e-01, -2.322e-02, 1.764e-02, -3.115e-01)); + r += mul(s7_5, M4(3.913e-02, -3.022e-02, -8.960e-02, -1.972e-01, 1.655e-02, -2.856e-02, -1.324e-02, 1.702e-03, 3.825e-02, -5.577e-02, -9.738e-02, 2.391e-02, -8.171e-02, 8.100e-02, -3.003e-02, 3.654e-02)); + r += mul(s7_6, M4(5.264e-02, 4.476e-03, -1.157e-01, -1.648e-02, 1.046e-02, -1.651e-02, -8.078e-02, 3.162e-02, 1.550e-02, 1.141e-02, -9.502e-02, -9.920e-03, -9.480e-02, -5.184e-02, -9.785e-02, -1.331e-02)); + r += mul(s7_7, M4(7.429e-02, 1.379e-01, 5.235e-02, -1.123e-01, -9.043e-02, -7.733e-02, -6.002e-02, -1.955e-02, 4.030e-02, -1.113e-01, 9.796e-02, -8.453e-03, -2.674e-01, 3.664e-02, -6.066e-02, -1.008e-01)); + r += mul(s7_8, M4(-8.165e-02, 1.089e-02, 1.097e-01, 2.733e-01, 4.780e-03, -6.589e-03, -3.362e-02, 3.750e-02, -4.349e-02, -2.515e-02, -6.238e-02, -3.233e-02, -2.766e-03, -4.207e-03, -1.958e-02, -1.892e-01)); + r += V4(1.089e-02, -2.045e-02, 7.370e-03, 9.968e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.852e-02, 1.417e-02, -5.760e-02, 1.306e-02, -4.786e-02, 1.171e-01, -9.699e-02, -3.414e-02, 2.571e-02, 3.132e-03, -3.758e-02, 4.471e-02, 1.401e-02, 3.463e-02, -1.304e-02, 2.016e-02)); + r += mul(s0_1, M4(-5.553e-02, 3.789e-02, 4.053e-02, -4.282e-02, 3.149e-02, -2.488e-02, 2.642e-02, 4.853e-03, -2.545e-02, -7.643e-02, -1.664e-02, -2.488e-02, 6.374e-02, -3.734e-02, 3.373e-02, 4.745e-02)); + r += mul(s0_2, M4(4.395e-02, 5.040e-02, -5.552e-02, 2.688e-02, 4.750e-02, -1.505e-02, -1.916e-03, 1.541e-04, -5.306e-02, 5.310e-02, 2.434e-02, 2.006e-02, -9.437e-02, 5.815e-03, -3.701e-03, -2.521e-02)); + r += mul(s0_3, M4(2.083e-02, 7.140e-02, -2.608e-02, 2.495e-02, 4.434e-02, -5.999e-02, -6.960e-02, -6.558e-02, -1.061e-01, 4.193e-02, -1.131e-01, -3.521e-02, 8.448e-03, -2.716e-02, -3.281e-02, 4.599e-02)); + r += mul(s0_4, M4(-3.234e-02, 6.022e-03, -4.811e-02, -1.240e-01, 4.969e-02, -7.316e-02, -2.026e-01, 9.092e-02, 6.015e-02, 2.843e-01, 2.905e-03, -4.195e-02, -5.940e-02, 4.388e-02, 1.342e-02, -2.337e-02)); + r += mul(s0_5, M4(-2.380e-03, 4.119e-02, -1.256e-02, 2.114e-02, 7.213e-02, 2.175e-02, 3.727e-02, -7.079e-02, 6.056e-02, -9.221e-02, -2.669e-03, -8.523e-02, -2.162e-02, -1.026e-02, 2.079e-03, 1.027e-02)); + r += mul(s0_6, M4(1.307e-03, -3.766e-03, 5.204e-02, 4.266e-02, -5.200e-02, -5.702e-02, -1.953e-02, -2.081e-03, -2.318e-03, 1.303e-01, 6.440e-02, 9.118e-03, -1.671e-02, 7.427e-02, -6.194e-03, 4.608e-03)); + r += mul(s0_7, M4(-1.086e-02, -4.986e-02, -5.484e-02, -6.225e-02, 5.482e-02, -4.452e-02, -4.073e-03, -2.407e-02, -3.900e-01, -2.193e-01, -4.044e-02, -1.590e-01, -1.257e-02, 1.443e-02, 2.293e-02, -2.049e-02)); + r += mul(s0_8, M4(1.868e-02, 2.597e-02, -4.738e-03, 4.515e-03, -7.239e-03, 8.633e-02, -2.154e-02, -2.209e-02, -7.547e-02, -8.588e-03, -2.511e-03, -1.880e-02, 1.668e-02, 1.148e-02, -2.489e-02, -2.564e-04)); + r += mul(s1_0, M4(-9.664e-03, 6.720e-03, -3.616e-02, -1.741e-02, 2.702e-02, 1.912e-02, -8.431e-02, 1.800e-02, 7.762e-03, -5.716e-03, -3.505e-02, 3.793e-02, -5.187e-02, -8.724e-03, 1.210e-04, 1.115e-02)); + r += mul(s1_1, M4(3.598e-02, 1.044e-01, -1.339e-02, 5.163e-02, -9.806e-02, -6.674e-02, -1.240e-01, -9.032e-02, 2.787e-02, -3.783e-03, 1.077e-02, -2.244e-02, 2.076e-01, -1.668e-01, 2.460e-01, 1.348e-01)); + r += mul(s1_2, M4(3.081e-02, -6.488e-02, 4.064e-02, 2.232e-02, -3.069e-02, -2.755e-02, -1.444e-02, -4.403e-02, -3.164e-02, 1.650e-03, -5.957e-04, -1.117e-02, 1.618e-01, 4.197e-02, 4.412e-02, 8.392e-02)); + r += mul(s1_3, M4(-7.096e-02, -1.122e-02, 7.644e-02, -3.528e-02, 8.855e-02, -4.147e-02, -1.767e-01, -1.084e-02, -2.915e-02, 7.801e-02, -9.706e-02, 1.738e-02, -5.857e-02, -7.020e-02, 1.048e-02, -8.851e-02)); + r += mul(s1_4, M4(1.548e-01, 2.916e-02, -4.715e-02, 1.714e-01, -4.154e-01, 3.114e-01, -4.738e-02, -2.979e-01, -3.507e-02, -4.455e-02, -2.859e-02, -6.443e-02, 1.712e-01, 1.228e-01, 1.159e-01, 5.612e-01)); + r += mul(s1_5, M4(-3.328e-02, -7.147e-02, -1.732e-02, -1.046e-01, 3.714e-02, 1.004e-02, 1.801e-02, 4.052e-02, -2.488e-02, -8.034e-02, 2.109e-02, -1.397e-02, -3.360e-02, 1.254e-01, 7.808e-02, 2.022e-01)); + r += mul(s1_6, M4(1.909e-02, 1.367e-03, -6.469e-02, -1.298e-02, -1.989e-02, -4.991e-02, -5.581e-02, 6.140e-02, -3.023e-02, 2.203e-02, -3.642e-02, -1.523e-02, -2.428e-02, 2.404e-02, -1.127e-03, -1.336e-01)); + r += mul(s1_7, M4(1.024e-02, -1.469e-01, 8.200e-02, 4.504e-03, -8.006e-02, -6.594e-02, -5.790e-02, -1.692e-01, -8.631e-03, 9.204e-02, -2.329e-02, -1.009e-02, 3.536e-03, 1.820e-01, -2.303e-02, 1.502e-01)); + r += mul(s1_8, M4(-5.898e-02, -6.566e-02, -7.726e-02, -3.458e-02, -4.422e-02, 1.652e-01, 1.264e-02, 4.795e-02, -4.558e-03, -4.430e-02, -5.889e-03, -2.927e-02, 1.604e-02, 2.417e-01, -2.948e-02, -2.499e-02)); + r += mul(s2_0, M4(-3.273e-02, 2.925e-03, -1.014e-01, -1.018e-02, -6.325e-02, 4.441e-02, -1.262e-01, 3.454e-02, 2.184e-02, -5.589e-02, 2.767e-02, -1.997e-03, -3.547e-02, 3.166e-02, -3.762e-02, -4.284e-02)); + r += mul(s2_1, M4(4.734e-02, -1.519e-02, 4.751e-02, -1.437e-02, 1.469e-01, 3.827e-02, -7.801e-02, -4.105e-02, -9.743e-02, -2.653e-02, -1.080e-01, -6.721e-02, 7.721e-02, 1.992e-02, -1.360e-01, 1.240e-03)); + r += mul(s2_2, M4(-1.485e-02, -4.148e-03, -9.333e-03, -2.213e-02, -1.235e-01, -3.868e-02, -2.507e-02, -2.216e-02, -9.666e-02, -7.740e-02, -3.893e-02, -1.239e-01, 1.794e-02, 1.602e-01, 2.262e-02, -2.582e-02)); + r += mul(s2_3, M4(8.309e-03, 1.107e-01, -2.482e-01, -5.239e-02, 1.168e-01, -7.872e-02, 1.419e-01, -4.649e-02, -1.291e-03, -1.392e-01, 1.145e-02, -1.387e-02, 2.985e-02, -2.689e-02, 3.944e-02, -7.916e-03)); + r += mul(s2_4, M4(-8.015e-02, 5.834e-02, 3.784e-02, -1.896e-01, -5.955e-01, 2.887e-01, -2.022e-01, 9.331e-02, -1.495e-01, -4.096e-02, -1.243e-01, -1.639e-01, 2.726e-02, -1.548e-01, 1.335e-01, 1.273e-01)); + r += mul(s2_5, M4(2.455e-02, 3.565e-02, -3.254e-03, 5.547e-02, 9.475e-02, -3.398e-02, 3.236e-02, -1.890e-01, 9.742e-02, 6.328e-03, -1.349e-04, -1.412e-01, 1.845e-01, 5.447e-01, 1.569e-01, 1.644e-01)); + r += mul(s2_6, M4(1.251e-02, -1.017e-01, -5.491e-02, -1.858e-02, 5.391e-02, 1.969e-02, 1.018e-02, 5.492e-02, -1.187e-02, -1.082e-01, -2.789e-02, 2.796e-03, 2.415e-02, 1.100e-02, 2.746e-02, 5.840e-03)); + r += mul(s2_7, M4(-1.939e-02, -1.814e-02, 4.260e-02, -1.620e-02, -9.263e-02, 8.165e-02, -1.502e-01, -8.349e-02, 1.599e-02, 1.467e-01, -5.246e-02, -2.023e-01, 7.758e-02, 2.664e-01, 9.279e-02, 8.311e-02)); + r += mul(s2_8, M4(-1.522e-02, -3.585e-02, 9.057e-03, 5.160e-02, -8.552e-02, 1.032e-01, 1.580e-02, 7.069e-02, -2.397e-02, -1.317e-02, 5.901e-03, -6.085e-05, 4.503e-02, -1.911e-02, 3.011e-02, -4.191e-03)); + r += mul(s3_0, M4(-1.978e-01, 1.193e-02, -4.782e-03, -1.304e-01, 2.629e-02, 4.897e-02, -7.251e-02, 1.659e-02, -4.481e-02, 1.271e-04, 9.208e-02, -1.513e-02, 2.725e-02, -2.319e-02, 3.292e-02, 2.903e-02)); + r += mul(s3_1, M4(1.598e-01, 1.196e-01, 6.364e-03, 7.658e-02, 1.132e-02, -1.454e-02, 2.569e-03, 1.864e-02, 4.843e-02, 6.323e-02, 1.658e-02, 1.019e-01, -5.539e-02, -2.201e-02, -2.732e-02, -2.322e-02)); + r += mul(s3_2, M4(7.839e-03, 2.614e-02, 4.543e-03, 1.929e-02, 6.896e-02, -5.200e-03, -3.588e-03, -2.290e-02, 5.938e-02, 1.207e-02, -1.416e-02, -7.921e-04, -8.801e-03, 2.962e-02, 1.712e-02, 5.104e-03)); + r += mul(s3_3, M4(6.813e-02, -6.967e-02, -2.467e-01, -3.398e-02, 6.877e-02, -7.348e-02, 9.109e-02, 2.261e-02, -4.072e-03, -2.712e-02, 1.256e-01, -1.947e-02, -3.744e-03, 8.099e-02, -8.439e-02, 3.376e-02)); + r += mul(s3_4, M4(1.077e-01, -1.501e-01, 2.260e-01, 9.637e-02, 9.239e-02, -9.979e-02, -6.790e-02, 1.135e-01, 5.774e-02, 8.720e-02, -1.809e-02, 7.099e-02, -1.255e-01, -1.150e-01, -2.787e-01, -1.439e-01)); + r += mul(s3_5, M4(-2.963e-02, 6.668e-03, -3.052e-02, -7.320e-02, 6.541e-02, -6.276e-02, 7.650e-02, -2.627e-02, 2.684e-02, -5.495e-02, 2.097e-02, -3.237e-02, -1.115e-01, -9.660e-02, -1.034e-01, -3.057e-01)); + r += mul(s3_6, M4(-8.964e-02, 1.108e-01, 5.358e-02, -8.675e-02, -1.298e-03, 3.944e-02, 4.797e-02, 3.419e-02, 2.338e-02, 1.032e-01, 3.897e-02, -5.207e-03, 2.477e-02, -3.808e-02, 3.602e-02, -7.691e-03)); + r += mul(s3_7, M4(-2.280e-02, -2.297e-01, -2.656e-02, 4.335e-03, 2.757e-02, -4.156e-02, 1.953e-02, 8.705e-02, 2.555e-02, 1.782e-01, -1.626e-02, 7.317e-02, 3.277e-02, -1.371e-01, 2.527e-02, 1.452e-02)); + r += mul(s3_8, M4(-2.977e-03, -6.503e-03, 1.028e-02, 1.623e-01, -2.416e-02, 2.116e-02, 1.747e-02, -3.146e-02, 9.452e-03, 9.436e-03, 4.173e-02, 3.043e-02, 2.906e-02, 4.177e-02, -7.579e-02, -7.208e-02)); + r += mul(s4_0, M4(-3.654e-02, 2.371e-02, -4.703e-02, -3.417e-02, 1.690e-02, 5.024e-02, 6.902e-03, 6.806e-03, -6.909e-02, 1.855e-01, 2.151e-02, -4.070e-02, 2.795e-02, 1.880e-02, 3.047e-02, -8.102e-03)); + r += mul(s4_1, M4(5.194e-02, -3.014e-02, -8.487e-03, 2.008e-02, 2.216e-02, -5.617e-03, -1.076e-02, -1.327e-02, 9.989e-02, 2.196e-01, 1.202e-02, 9.436e-02, -4.214e-02, -1.455e-02, 2.944e-02, 4.371e-03)); + r += mul(s4_2, M4(-1.887e-02, 4.668e-02, 5.779e-03, -1.966e-02, 2.824e-02, 3.541e-02, 3.188e-03, 1.323e-02, 3.842e-02, 6.945e-03, -2.692e-02, -6.403e-02, 5.748e-02, -4.583e-02, 5.327e-03, 1.260e-02)); + r += mul(s4_3, M4(-1.078e-01, 1.597e-02, -6.295e-02, -1.910e-02, -1.310e-02, -5.021e-02, -1.010e-01, -3.924e-02, 8.872e-03, -1.098e-01, 8.058e-02, 1.212e-02, 1.831e-02, 1.778e-02, 5.973e-02, 1.841e-02)); + r += mul(s4_4, M4(1.154e-01, -6.212e-02, -9.262e-02, -3.601e-02, -8.978e-03, -9.383e-02, 6.147e-02, 1.060e-02, 2.360e-01, -2.783e-01, 1.116e-01, 1.979e-01, -3.989e-02, -1.178e-01, -6.627e-03, -3.447e-02)); + r += mul(s4_5, M4(4.532e-02, -1.230e-01, 4.558e-02, -2.350e-02, -3.753e-02, 6.548e-02, -1.758e-02, -5.187e-02, -1.046e-01, -1.463e-02, -4.373e-02, -1.568e-01, 2.144e-02, -8.064e-02, -4.304e-02, 3.818e-02)); + r += mul(s4_6, M4(6.579e-02, 1.296e-01, -6.474e-02, -6.651e-02, -2.080e-02, -7.649e-02, 3.104e-03, -5.876e-03, 3.586e-02, 6.342e-02, -1.736e-02, 2.302e-02, 1.731e-02, 4.072e-02, 1.752e-02, 1.068e-02)); + r += mul(s4_7, M4(-1.406e-02, -2.128e-01, 1.429e-03, 6.866e-02, -1.158e-02, 1.064e-01, -7.743e-03, -3.322e-02, 2.194e-03, 2.286e-01, 4.062e-02, -7.815e-02, 1.619e-02, 3.628e-02, 2.618e-03, 5.319e-03)); + r += mul(s4_8, M4(-4.473e-02, -4.662e-02, -3.070e-02, -3.530e-02, -1.360e-02, 1.564e-02, 2.604e-02, -1.164e-02, -3.569e-02, 7.896e-02, -5.832e-02, 2.687e-02, 2.879e-02, 9.026e-03, 2.841e-03, 1.433e-02)); + r += mul(s5_0, M4(-1.709e-02, 6.835e-02, -5.300e-02, 2.365e-02, 1.208e-02, -5.065e-02, 7.121e-02, 4.341e-03, -2.329e-03, 8.360e-02, 3.878e-02, 1.105e-02, -9.525e-02, 1.938e-02, -7.229e-02, -8.657e-02)); + r += mul(s5_1, M4(-5.753e-02, -2.475e-02, -8.615e-03, -6.791e-02, 2.518e-01, -1.107e-03, 3.793e-02, 1.224e-01, -8.810e-02, -2.332e-02, -2.174e-02, -8.714e-02, 1.592e-02, 9.309e-02, -1.568e-01, -3.244e-02)); + r += mul(s5_2, M4(3.602e-02, 5.059e-03, 1.107e-02, 2.958e-02, 2.779e-02, 5.966e-02, -1.819e-03, -2.871e-02, 6.199e-02, -1.606e-02, 1.334e-02, 4.840e-02, -4.332e-02, -1.755e-01, -1.225e-02, -1.465e-01)); + r += mul(s5_3, M4(1.342e-02, -2.938e-03, -1.333e-01, 4.088e-02, 5.255e-02, 2.106e-02, 1.246e-01, 5.430e-03, 2.071e-02, -1.101e-01, -1.422e-02, -5.257e-02, 4.171e-02, 2.614e-02, -5.425e-02, -5.484e-02)); + r += mul(s5_4, M4(-3.738e-02, -2.543e-01, -1.355e-01, -2.047e-01, 6.045e-01, 6.712e-04, 2.806e-01, 4.430e-01, 1.019e-01, -5.017e-02, -8.394e-03, 4.678e-04, 4.016e-02, -6.143e-01, -6.335e-02, 9.052e-02)); + r += mul(s5_5, M4(-1.454e-02, -4.759e-02, 3.359e-02, 1.241e-01, 1.558e-02, -1.056e-01, -2.818e-02, 2.574e-01, 1.429e-02, 2.406e-02, 5.111e-02, 3.146e-02, -5.400e-02, -1.546e-01, -5.701e-02, -1.978e-01)); + r += mul(s5_6, M4(-1.559e-02, 1.226e-01, -4.477e-02, 2.875e-03, 3.966e-02, -4.744e-02, 5.312e-02, 2.740e-02, -1.497e-02, -1.681e-03, -7.210e-02, -7.023e-02, 1.059e-02, 4.577e-03, 3.884e-02, 1.989e-03)); + r += mul(s5_7, M4(2.155e-03, 5.351e-02, -5.339e-02, -1.507e-02, 1.282e-03, 1.122e-01, 3.821e-02, 1.328e-01, 1.675e-02, 1.377e-02, -3.465e-02, -3.002e-02, 3.591e-02, 7.138e-02, 6.867e-03, 3.803e-02)); + r += mul(s5_8, M4(-2.054e-02, -2.482e-03, -4.547e-02, -3.309e-02, -2.872e-02, -5.235e-02, 3.979e-02, 6.017e-02, -1.569e-02, -2.154e-02, -1.878e-02, 3.573e-03, -3.146e-02, 6.997e-02, -3.866e-02, -4.409e-02)); + r += mul(s6_0, M4(-2.632e-02, 1.644e-02, -5.125e-02, -1.923e-02, 4.263e-02, -8.466e-02, -4.567e-02, 7.356e-02, 1.789e-02, -1.403e-02, 2.957e-02, 2.201e-02, -2.127e-02, 2.052e-02, 4.376e-02, -3.482e-03)); + r += mul(s6_1, M4(-2.532e-02, 1.704e-02, -5.390e-02, -1.703e-02, 8.306e-02, -5.652e-02, 1.410e-02, 1.581e-02, 1.172e-01, 1.514e-03, 7.870e-04, 1.082e-01, -1.701e-02, -2.203e-02, -3.648e-02, -5.616e-03)); + r += mul(s6_2, M4(2.110e-02, -6.930e-02, -1.405e-02, -2.682e-02, -3.148e-02, -2.610e-02, 4.580e-02, -1.302e-02, -8.942e-02, 6.348e-02, 3.620e-02, 5.638e-02, 6.823e-02, -9.127e-03, 2.566e-02, -1.982e-02)); + r += mul(s6_3, M4(2.426e-02, 4.841e-02, -2.629e-02, 1.539e-03, -3.696e-02, -2.072e-02, 5.157e-03, -8.802e-03, 3.766e-02, 5.942e-02, -7.291e-02, 3.623e-02, 2.300e-02, -1.450e-01, 1.207e-01, 3.822e-03)); + r += mul(s6_4, M4(-6.935e-02, -1.371e-02, 1.685e-03, -9.513e-02, -3.332e-02, -3.843e-01, 4.060e-02, 8.189e-03, -2.561e-01, 3.931e-01, 2.969e-02, -3.895e-02, 7.095e-02, -9.008e-03, 4.266e-02, 1.025e-01)); + r += mul(s6_5, M4(-1.056e-02, -6.190e-02, 4.329e-02, 5.268e-02, 3.018e-02, -1.134e-01, -1.945e-02, 1.213e-02, -1.794e-01, -9.701e-02, -5.967e-02, -7.068e-02, -5.236e-02, -8.995e-02, 1.906e-02, 3.944e-02)); + r += mul(s6_6, M4(7.527e-03, 1.485e-03, 1.679e-02, -5.276e-03, 3.644e-02, -2.594e-01, 5.044e-03, 3.530e-02, -5.528e-02, -1.169e-02, -1.202e-02, 1.599e-02, 1.861e-02, 5.332e-02, 1.572e-02, 3.334e-02)); + r += mul(s6_7, M4(2.066e-02, 6.183e-02, 6.742e-03, 4.145e-02, -2.724e-03, -4.688e-01, 2.701e-02, 1.142e-01, -1.106e-02, -2.290e-01, 2.774e-02, -5.572e-02, -4.977e-03, 7.180e-02, -7.132e-03, 1.043e-01)); + r += mul(s6_8, M4(1.379e-02, 2.942e-02, 3.097e-02, -1.588e-02, -9.760e-03, -1.506e-01, -1.631e-02, -1.675e-02, -1.553e-02, -1.675e-01, -1.927e-02, -5.148e-02, -6.721e-03, 4.411e-02, -1.285e-02, -7.492e-02)); + r += mul(s7_0, M4(-3.382e-02, 4.013e-02, -5.116e-02, -4.171e-02, 1.667e-02, 1.291e-02, -6.363e-02, 1.707e-02, -1.358e-02, -3.281e-02, -6.209e-03, -1.720e-02, -4.898e-02, -3.425e-02, 6.959e-02, 9.825e-03)); + r += mul(s7_1, M4(-5.809e-03, 6.135e-02, -4.941e-02, -4.388e-02, -1.574e-02, -3.333e-02, 3.341e-02, -1.193e-02, 1.603e-01, -7.629e-03, -1.384e-02, 5.651e-02, 2.039e-01, -8.853e-02, -4.779e-02, -3.405e-02)); + r += mul(s7_2, M4(-5.094e-02, 1.089e-02, -4.568e-02, -4.278e-02, -4.847e-02, 5.021e-02, -9.973e-03, 4.309e-02, -1.239e-01, -5.720e-02, 4.405e-02, 1.561e-02, -3.265e-02, -1.596e-02, 4.407e-02, -7.645e-02)); + r += mul(s7_3, M4(-7.759e-02, 2.397e-02, -1.377e-01, -4.577e-02, -7.223e-02, -3.973e-02, -6.573e-02, -3.493e-02, 9.430e-02, -7.609e-02, -6.839e-02, 9.491e-03, -3.487e-02, 2.174e-02, 1.111e-01, -1.857e-02)); + r += mul(s7_4, M4(-3.929e-02, -2.153e-01, -2.263e-02, -1.158e-01, 1.643e-01, -8.632e-02, 1.678e-01, 3.190e-02, -1.678e-01, -1.560e-01, 4.112e-02, 2.362e-02, 4.460e-01, 1.385e-01, 2.484e-01, 2.283e-01)); + r += mul(s7_5, M4(-1.422e-02, -1.066e-01, 3.035e-02, -1.148e-01, -7.523e-02, 7.633e-02, -7.322e-03, 3.361e-02, 2.663e-02, 7.289e-02, -1.969e-02, 5.205e-02, 7.212e-04, 6.407e-02, 3.565e-02, 2.164e-01)); + r += mul(s7_6, M4(-3.088e-02, 1.356e-02, -5.746e-03, -8.216e-02, 1.131e-03, -5.902e-02, -4.959e-02, -1.055e-02, -6.029e-02, -2.294e-02, -8.764e-02, 2.750e-02, 7.840e-02, -6.970e-02, -7.362e-05, -7.489e-02)); + r += mul(s7_7, M4(-6.526e-02, 9.753e-02, 7.053e-02, 3.520e-02, -8.953e-02, -2.164e-01, 9.827e-03, -1.388e-02, 2.596e-02, -2.515e-01, 8.294e-02, -8.363e-02, 4.801e-02, 1.176e-01, 7.288e-02, 1.821e-01)); + r += mul(s7_8, M4(2.840e-02, 2.860e-02, 4.192e-02, -4.194e-02, 4.181e-03, -2.567e-02, -9.064e-03, 1.524e-02, -2.387e-02, 1.684e-02, -1.586e-02, 8.428e-02, 7.378e-03, 9.452e-02, 1.910e-02, 3.319e-02)); + r += V4(-8.471e-03, -1.471e-02, -1.078e-02, -1.292e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(5.564e-02, -2.568e-02, 9.599e-03, -2.108e-02, 4.250e-02, 4.686e-02, -5.431e-02, -2.406e-01, -1.053e-01, -6.775e-02, -4.199e-02, 8.179e-02, -5.315e-04, 4.287e-02, 5.107e-02, 7.491e-02)); + r += mul(s0_1, M4(-4.795e-02, -5.934e-03, -5.950e-02, 7.670e-02, 1.012e-01, -1.948e-03, -3.097e-03, 2.226e-02, -6.913e-02, -8.117e-03, 7.958e-02, -4.828e-02, 8.114e-02, 2.249e-02, 1.629e-02, 7.050e-02)); + r += mul(s0_2, M4(-6.514e-02, -1.138e-02, 3.485e-03, 7.612e-03, -6.234e-03, 1.859e-03, 5.234e-02, 2.880e-02, -5.665e-02, 1.437e-02, -1.102e-01, 1.712e-02, 3.604e-02, 1.462e-02, 3.754e-02, 1.502e-02)); + r += mul(s0_3, M4(-1.039e-01, -5.124e-02, -8.815e-02, 3.959e-02, 5.831e-02, -5.496e-02, 2.485e-02, 1.652e-01, 3.139e-01, -8.668e-02, 6.456e-02, -1.771e-02, 7.739e-02, -4.782e-04, -1.474e-02, 2.205e-02)); + r += mul(s0_4, M4(3.960e-02, 1.269e-01, 1.638e-01, -2.935e-02, -6.888e-02, 2.781e-02, -2.099e-02, -4.190e-02, 2.912e-01, 1.422e-01, -2.274e-01, 2.599e-02, 1.336e-01, -8.425e-02, -6.435e-03, 2.277e-02)); + r += mul(s0_5, M4(-5.320e-02, 1.616e-01, -9.022e-02, -4.923e-02, -5.044e-03, 4.681e-02, -8.809e-03, 2.334e-02, 2.030e-01, 4.037e-02, 2.446e-01, 6.085e-02, 1.287e-02, -5.437e-02, 6.038e-02, 2.147e-02)); + r += mul(s0_6, M4(8.658e-02, 2.401e-03, -1.006e-02, -1.908e-02, -8.866e-02, -1.222e-02, 5.580e-02, -3.716e-02, -2.095e-01, 1.739e-01, -1.304e-01, 1.341e-01, -5.495e-02, -4.232e-02, -1.343e-02, 2.139e-04)); + r += mul(s0_7, M4(2.428e-02, -7.374e-03, -1.641e-02, -1.115e-05, -1.166e-01, -1.747e-01, -4.897e-02, 2.178e-02, -3.975e-01, 2.204e-01, 1.356e-01, -7.871e-02, -3.107e-02, -3.150e-02, 9.228e-03, -2.140e-02)); + r += mul(s0_8, M4(9.544e-02, -9.344e-02, 2.867e-02, -9.081e-03, 3.649e-02, 1.379e-02, 1.854e-02, -3.414e-02, -2.156e-01, 4.895e-02, -2.182e-01, -8.193e-02, 9.780e-03, 1.209e-02, -3.988e-02, -9.680e-03)); + r += mul(s1_0, M4(7.363e-02, -6.508e-03, -2.946e-02, 3.452e-02, -4.840e-02, 5.347e-02, 1.271e-02, 1.585e-01, 2.045e-02, -3.410e-02, 3.161e-02, 1.609e-02, -1.150e-01, 2.777e-03, -3.012e-01, -8.839e-02)); + r += mul(s1_1, M4(-6.948e-02, 1.041e-02, 1.361e-01, -6.431e-02, -2.487e-02, -1.073e-02, 8.229e-02, 3.936e-02, 5.405e-02, -1.490e-02, -3.526e-02, 4.847e-02, -2.115e-01, -1.143e-01, -2.937e-02, -2.322e-01)); + r += mul(s1_2, M4(-1.079e-01, -2.854e-02, -6.011e-02, 5.054e-02, -5.800e-02, 2.570e-02, -1.312e-02, 2.525e-02, -8.760e-03, -1.410e-02, 6.679e-02, 1.628e-03, 3.924e-02, 2.187e-02, -3.925e-02, 5.869e-02)); + r += mul(s1_3, M4(8.716e-02, -2.256e-02, -1.715e-03, -7.717e-02, 2.653e-01, 6.507e-02, -8.374e-02, 1.413e-01, -2.495e-03, 1.303e-02, 6.540e-04, 2.399e-02, -2.225e-01, 9.055e-02, -4.018e-02, -6.299e-02)); + r += mul(s1_4, M4(2.131e-01, -2.639e-02, -1.449e-01, -7.339e-02, 2.747e-01, -5.850e-02, -4.813e-04, 1.108e-01, 7.560e-03, -4.137e-02, 1.204e-02, 8.980e-03, -2.765e-01, 1.510e-01, -1.933e-01, 3.426e-02)); + r += mul(s1_5, M4(-1.979e-02, 4.280e-02, 1.420e-01, 5.255e-02, 5.800e-02, -4.042e-02, -4.368e-02, -3.771e-02, -2.228e-02, -4.709e-02, -1.652e-02, -1.663e-04, -9.305e-02, 4.241e-02, -6.751e-02, 1.683e-02)); + r += mul(s1_6, M4(-4.216e-02, 4.015e-02, -2.219e-03, 6.089e-03, -1.194e-01, -1.567e-01, 5.761e-02, 2.711e-02, -8.200e-02, 2.148e-02, -4.152e-02, 7.995e-02, -1.315e-01, -7.088e-03, -1.967e-02, -4.194e-02)); + r += mul(s1_7, M4(1.017e-02, -3.080e-02, 1.067e-01, -1.596e-02, -1.468e-01, 5.528e-02, -4.388e-02, -2.552e-02, -8.031e-02, 1.102e-01, 2.575e-02, -5.867e-02, -1.491e-01, -8.697e-02, -1.588e-01, 1.828e-02)); + r += mul(s1_8, M4(7.462e-03, 2.122e-03, -1.188e-01, 1.442e-02, 5.623e-02, 8.783e-02, -3.208e-02, -3.765e-02, 5.568e-02, -5.648e-03, -2.887e-02, 2.546e-02, -1.080e-02, 1.306e-01, -5.768e-02, -4.871e-02)); + r += mul(s2_0, M4(6.899e-03, -2.458e-02, 3.732e-02, 2.687e-02, 1.890e-01, 3.174e-02, 6.358e-03, -1.116e-01, 8.355e-02, -2.709e-02, 1.097e-01, 1.508e-01, 3.515e-02, 1.664e-02, 3.033e-03, -1.267e-01)); + r += mul(s2_1, M4(-1.644e-02, 1.609e-02, -8.031e-02, 3.372e-02, 3.681e-02, -1.157e-01, -1.050e-01, 9.057e-02, 1.149e-01, -4.600e-02, 1.532e-02, 3.066e-02, 5.304e-03, -1.317e-02, 4.920e-02, 5.047e-02)); + r += mul(s2_2, M4(-1.031e-01, -2.233e-03, -3.758e-02, -4.363e-02, -1.708e-01, -5.747e-02, -4.124e-02, 1.331e-02, 1.331e-01, -1.784e-02, 1.647e-01, -4.233e-02, 8.356e-02, 5.630e-03, 5.010e-02, -4.132e-02)); + r += mul(s2_3, M4(5.442e-02, 2.208e-02, 5.676e-02, 3.522e-02, 4.321e-02, 1.724e-02, 4.825e-02, 3.068e-02, 3.098e-01, -8.694e-02, -9.975e-03, 7.839e-02, -3.647e-02, -1.175e-02, -8.793e-03, -7.234e-02)); + r += mul(s2_4, M4(1.310e-01, 1.019e-01, 2.457e-01, -5.249e-02, 4.429e-01, 1.160e-01, 2.001e-01, 2.508e-01, 2.921e-01, -5.226e-01, 2.153e-01, -2.860e-02, -1.546e-01, 1.213e-01, -1.770e-02, -2.842e-01)); + r += mul(s2_5, M4(1.565e-01, 2.226e-02, -1.299e-02, 2.889e-02, 2.147e-01, 4.120e-02, 1.104e-01, -7.634e-02, 1.461e-01, -1.129e-01, 7.282e-02, 3.477e-02, 1.359e-02, 3.209e-02, 1.097e-02, -7.702e-02)); + r += mul(s2_6, M4(-2.648e-02, -3.652e-02, -4.807e-02, 8.143e-02, 1.982e-02, -2.093e-01, -8.276e-02, 5.334e-02, 5.568e-02, -2.253e-01, 7.336e-02, 1.876e-02, 1.413e-02, -5.480e-03, 3.730e-02, 1.190e-02)); + r += mul(s2_7, M4(-2.826e-02, -1.869e-02, 2.409e-02, -1.078e-02, -8.085e-02, -1.092e-01, -1.236e-01, -1.195e-01, -3.388e-02, -3.166e-01, -1.016e-01, 2.367e-02, 2.484e-02, -2.032e-03, 2.010e-03, 9.625e-02)); + r += mul(s2_8, M4(2.494e-03, -1.099e-01, 1.929e-02, -6.145e-03, 3.189e-03, 1.209e-01, 3.996e-02, -1.905e-02, -1.075e-01, 2.687e-02, 2.764e-02, 1.283e-02, 2.492e-02, -2.514e-02, 2.399e-03, 2.090e-02)); + r += mul(s3_0, M4(-5.188e-02, -5.302e-04, -4.649e-02, -7.301e-02, -2.265e-02, 5.858e-02, 4.601e-02, 3.600e-02, -1.778e-02, -7.573e-03, 1.024e-02, -1.120e-01, 2.020e-02, 8.781e-03, 1.753e-02, 1.242e-01)); + r += mul(s3_1, M4(-9.918e-02, 1.148e-02, -1.616e-02, 1.531e-02, -9.831e-03, 2.570e-02, 1.128e-01, -9.396e-04, -8.930e-02, 3.763e-03, -5.709e-02, -1.298e-01, 3.632e-02, 3.189e-02, -8.941e-02, 4.325e-02)); + r += mul(s3_2, M4(-5.675e-02, -9.192e-04, -1.271e-01, -6.402e-03, 3.858e-02, 1.883e-02, -5.269e-02, -3.356e-02, -8.538e-02, -1.160e-03, -6.894e-02, -1.370e-02, -3.186e-02, 1.357e-02, 5.788e-02, -2.196e-02)); + r += mul(s3_3, M4(2.708e-02, -4.281e-02, 4.847e-02, 8.453e-02, -3.591e-02, -6.288e-03, 1.320e-02, 8.700e-04, 3.477e-02, -5.825e-02, 1.362e-02, -8.986e-02, 2.621e-02, -1.296e-02, 6.036e-03, -8.892e-02)); + r += mul(s3_4, M4(1.101e-01, 2.002e-01, 4.733e-02, 6.113e-02, -1.253e-01, 1.351e-02, -5.160e-02, 7.784e-03, 1.009e-01, -1.159e-01, -7.248e-03, 6.105e-02, 3.384e-02, -7.175e-02, -7.411e-03, -2.225e-01)); + r += mul(s3_5, M4(9.257e-02, 1.273e-01, 1.194e-01, 3.997e-02, 6.216e-02, 4.575e-02, 5.219e-02, 1.013e-03, 1.138e-01, -5.140e-02, 4.693e-03, 2.465e-02, -6.723e-02, -2.648e-02, 4.385e-02, -8.677e-02)); + r += mul(s3_6, M4(-3.210e-02, 3.838e-02, 7.788e-02, 9.044e-02, 3.106e-02, -4.789e-02, 2.401e-02, 3.500e-02, 5.927e-02, 1.184e-02, -1.481e-02, 1.091e-02, -5.906e-02, -4.968e-02, -1.467e-02, 3.172e-02)); + r += mul(s3_7, M4(4.558e-02, -8.335e-02, -5.322e-02, -2.910e-02, -1.990e-02, -8.604e-02, -4.847e-03, -4.192e-02, 3.607e-02, 3.262e-02, -7.834e-03, -3.046e-02, 9.790e-03, 4.237e-02, -3.386e-02, 5.023e-02)); + r += mul(s3_8, M4(-6.291e-02, 4.206e-02, 3.200e-02, 2.044e-02, -9.929e-02, 5.190e-02, 3.410e-02, 1.033e-02, -3.491e-02, 3.895e-02, -2.048e-03, 1.777e-02, 1.251e-01, -6.249e-02, -9.290e-02, 3.368e-02)); + r += mul(s4_0, M4(7.315e-02, 4.151e-03, -2.859e-02, 5.017e-02, 4.525e-02, 6.032e-03, 5.704e-03, -4.161e-02, 2.248e-01, 1.231e-01, -1.596e-01, -1.029e-01, -1.354e-01, 6.874e-02, -5.986e-02, 2.122e-02)); + r += mul(s4_1, M4(5.375e-02, -8.616e-02, 6.589e-02, -9.744e-02, 2.313e-02, 5.684e-03, 3.187e-02, 1.480e-02, -8.559e-02, 1.176e-01, -7.169e-02, 1.836e-02, 5.834e-02, 3.290e-02, -1.134e-01, -8.451e-02)); + r += mul(s4_2, M4(7.774e-02, -4.352e-02, 1.077e-01, 2.943e-02, 3.852e-02, -3.905e-02, -1.682e-02, 7.009e-02, -1.695e-02, 2.796e-02, -7.754e-02, -2.108e-02, 1.772e-02, -3.573e-02, 5.487e-02, 3.715e-03)); + r += mul(s4_3, M4(-1.166e-01, 7.483e-02, 8.287e-02, -2.406e-01, -2.707e-02, -1.932e-02, 2.245e-03, 1.747e-01, -1.694e-01, -8.341e-02, 1.334e-01, -1.510e-01, 1.329e-01, -5.959e-02, -7.460e-03, -7.832e-02)); + r += mul(s4_4, M4(1.055e-01, 2.643e-02, -2.022e-01, 9.058e-02, -1.638e-01, 4.163e-03, 3.108e-02, -3.503e-02, 7.427e-02, -1.404e-01, -2.431e-01, -3.152e-02, 2.168e-02, -1.821e-01, 5.341e-03, -4.026e-02)); + r += mul(s4_5, M4(-1.392e-01, -8.403e-02, 6.099e-02, -5.146e-02, -6.017e-02, 1.218e-01, -2.153e-02, -1.807e-02, 1.186e-01, -4.060e-02, -2.946e-02, 1.951e-02, -6.394e-02, -1.915e-02, -1.332e-01, 1.149e-02)); + r += mul(s4_6, M4(-1.588e-01, 1.224e-01, -6.327e-02, 4.951e-02, 9.498e-02, -8.888e-02, 2.724e-02, -7.547e-03, 8.303e-02, -8.869e-02, -3.263e-02, -8.616e-02, -6.354e-02, 1.949e-02, -2.067e-04, -4.272e-02)); + r += mul(s4_7, M4(-1.390e-02, 1.797e-01, 1.093e-01, -2.220e-02, 5.163e-02, 1.969e-02, -2.223e-02, 2.175e-03, 2.056e-01, -1.871e-01, 1.528e-01, -2.037e-02, -1.613e-02, 6.063e-02, -5.701e-02, 6.490e-03)); + r += mul(s4_8, M4(-8.356e-02, 9.201e-02, -6.660e-02, 5.225e-02, 5.527e-02, -5.178e-02, 4.180e-03, -1.556e-02, 4.026e-02, 2.421e-02, -1.164e-01, 2.126e-02, -4.531e-03, 5.718e-02, 6.483e-02, 1.105e-02)); + r += mul(s5_0, M4(1.185e-01, 5.196e-03, 3.141e-02, -4.355e-02, 1.014e-01, -6.355e-02, -1.440e-02, -3.068e-01, -3.370e-02, 6.441e-02, -1.696e-02, -1.651e-01, 1.775e-01, 3.987e-02, 1.402e-02, -2.260e-01)); + r += mul(s5_1, M4(8.821e-02, 4.918e-03, -1.254e-01, -3.305e-02, 9.728e-02, -1.233e-01, -4.204e-01, -1.657e-01, 1.364e-02, 5.103e-02, -4.482e-02, 3.125e-02, 8.831e-02, 1.491e-01, 8.666e-02, -9.823e-02)); + r += mul(s5_2, M4(4.780e-03, 3.399e-03, 1.170e-01, 1.614e-02, 5.032e-02, -1.143e-01, 9.699e-02, 3.955e-02, 6.252e-02, 1.201e-02, 1.065e-03, 8.514e-03, 1.375e-01, 7.129e-02, -1.061e-02, -5.335e-03)); + r += mul(s5_3, M4(-1.229e-01, 1.690e-02, -5.375e-02, -1.088e-01, -2.155e-01, 9.302e-02, 3.596e-02, -1.506e-01, 4.357e-02, -2.802e-02, 4.347e-02, -1.340e-02, -2.441e-01, 4.218e-03, -5.089e-02, 3.196e-02)); + r += mul(s5_4, M4(-9.945e-02, -8.537e-02, 1.498e-01, 5.485e-02, -3.567e-01, 2.777e-01, 3.483e-02, 5.400e-02, 6.884e-02, -4.037e-02, -6.517e-02, -7.915e-03, -2.116e-01, -6.909e-02, 1.464e-01, -1.226e-01)); + r += mul(s5_5, M4(3.022e-03, -4.202e-03, -1.367e-01, -3.061e-02, 8.022e-02, 1.031e-01, -2.493e-01, 5.077e-02, -4.849e-02, -5.362e-02, 3.291e-02, -2.919e-02, -7.381e-02, -1.435e-01, -3.912e-02, 1.071e-02)); + r += mul(s5_6, M4(-3.188e-02, -1.425e-03, -8.109e-03, -6.234e-03, 7.547e-02, 3.376e-02, 3.357e-02, 2.933e-02, -5.036e-02, 7.092e-02, -1.196e-02, -2.627e-02, 6.140e-04, -1.185e-02, 1.312e-02, 3.290e-02)); + r += mul(s5_7, M4(-6.365e-02, 4.057e-03, -1.303e-02, -3.592e-02, -4.351e-02, 1.689e-01, -2.047e-02, -1.170e-02, 2.940e-02, -7.746e-02, 1.809e-02, -8.583e-03, -2.782e-02, 3.031e-02, -6.050e-02, 2.208e-02)); + r += mul(s5_8, M4(2.310e-02, -3.416e-02, 4.306e-02, 1.533e-02, 9.334e-02, -8.918e-02, -5.069e-02, -7.945e-03, 7.296e-02, -1.590e-02, 1.189e-02, -4.936e-04, -1.224e-02, 8.756e-02, 5.734e-02, -6.707e-03)); + r += mul(s6_0, M4(6.112e-02, -4.774e-03, -6.005e-03, 2.364e-02, -2.329e-02, -2.986e-02, 1.616e-02, -6.983e-02, -1.663e-01, 2.243e-02, -3.605e-02, 3.315e-01, 9.484e-03, 3.748e-02, -9.987e-03, 9.160e-03)); + r += mul(s6_1, M4(2.533e-02, 1.706e-02, 1.712e-02, 1.156e-01, -3.681e-02, 2.597e-03, 7.530e-02, 1.275e-02, -1.541e-01, 4.503e-02, 1.143e-01, 1.588e-01, 1.915e-01, -1.275e-02, 3.138e-02, 1.509e-02)); + r += mul(s6_2, M4(-6.694e-02, -1.834e-02, 8.816e-02, -1.622e-02, -1.021e-01, 1.299e-02, 1.748e-02, -5.791e-02, -5.930e-02, 6.122e-02, 8.327e-04, 3.157e-02, 1.111e-01, -1.483e-02, -1.748e-03, -2.191e-02)); + r += mul(s6_3, M4(-2.837e-02, 1.398e-02, -2.785e-02, 2.663e-03, 4.867e-02, 4.068e-02, -1.933e-02, 2.442e-02, -3.858e-02, 7.174e-02, 1.585e-02, 1.077e-01, 1.722e-02, -5.831e-02, 5.412e-02, -1.175e-02)); + r += mul(s6_4, M4(-7.951e-02, 5.032e-02, 8.040e-02, -2.639e-02, 2.334e-01, -4.099e-02, -2.540e-01, 9.531e-02, 4.706e-01, 1.059e-01, 2.458e-01, -4.419e-02, 1.768e-02, -1.573e-01, -3.025e-02, 8.663e-02)); + r += mul(s6_5, M4(-6.631e-02, 7.565e-03, -1.313e-01, -2.789e-02, 4.157e-02, 6.711e-02, 1.025e-01, 5.542e-04, -3.761e-02, 6.066e-02, 1.626e-01, 2.160e-02, -5.487e-02, -7.425e-02, -1.634e-02, -2.391e-02)); + r += mul(s6_6, M4(1.005e-02, 3.554e-03, -6.610e-03, -2.522e-02, -6.029e-02, -1.488e-02, 8.100e-02, 3.640e-02, -2.426e-02, 9.615e-02, -2.740e-02, -2.791e-02, 7.064e-02, -6.777e-03, -2.041e-02, 2.271e-02)); + r += mul(s6_7, M4(7.635e-02, -4.346e-02, -4.108e-02, 6.087e-02, -1.638e-01, 1.368e-01, 5.916e-02, 8.663e-03, 9.976e-02, -5.511e-02, -2.765e-02, -1.956e-02, 3.657e-02, -6.695e-02, 5.414e-02, -2.143e-03)); + r += mul(s6_8, M4(1.062e-01, -6.264e-02, 5.932e-02, -6.501e-03, -2.462e-02, 1.685e-02, -6.453e-02, 1.135e-02, 2.601e-03, -1.934e-01, 8.357e-03, -9.472e-04, -1.883e-02, 8.681e-03, 4.172e-02, 1.029e-02)); + r += mul(s7_0, M4(8.194e-02, 1.504e-02, 2.109e-02, -7.019e-02, -6.763e-02, -1.386e-02, 5.007e-05, 4.384e-02, 4.691e-02, 3.245e-03, -4.615e-02, -1.523e-01, 6.344e-02, -9.443e-03, 4.452e-02, 1.401e-01)); + r += mul(s7_1, M4(5.713e-02, 4.575e-03, -2.760e-02, 1.835e-01, -4.168e-02, 7.856e-02, 1.071e-01, 6.429e-02, 4.507e-02, -6.009e-02, -7.034e-03, 1.460e-01, -6.400e-02, -1.837e-01, -2.117e-01, -4.930e-01)); + r += mul(s7_2, M4(5.604e-02, 4.955e-02, 2.635e-02, -3.725e-02, -9.501e-02, 3.427e-02, 4.963e-02, -3.468e-02, -6.375e-04, -2.081e-02, 6.959e-02, -1.102e-02, 5.659e-02, 1.112e-01, -6.842e-02, 1.090e-01)); + r += mul(s7_3, M4(9.068e-02, 1.051e-02, -3.817e-02, 1.735e-01, -2.777e-02, 3.623e-02, 4.151e-02, 3.167e-02, -1.217e-02, 8.726e-03, 4.313e-02, 2.150e-01, 2.386e-01, -2.212e-02, 6.009e-02, -2.260e-01)); + r += mul(s7_4, M4(1.871e-01, -1.482e-01, 3.274e-02, -3.016e-01, -6.794e-02, -7.395e-02, -2.008e-01, 4.266e-02, 8.102e-02, 2.207e-01, -1.172e-01, -2.571e-02, -1.292e-01, -1.271e-01, -1.939e-01, 3.910e-02)); + r += mul(s7_5, M4(2.271e-02, -5.943e-02, -4.335e-02, 8.813e-02, 4.214e-02, 2.775e-02, -6.806e-03, -6.327e-03, 3.212e-02, 4.120e-02, 8.407e-03, 2.191e-02, -4.026e-02, -1.536e-01, -1.916e-01, -4.750e-03)); + r += mul(s7_6, M4(4.624e-02, 2.899e-02, 4.689e-02, -5.187e-02, -2.949e-03, -9.181e-03, -2.542e-02, 3.311e-02, -4.676e-02, -3.951e-02, -7.407e-03, 8.856e-04, 3.157e-02, -7.219e-02, 3.976e-02, -6.937e-02)); + r += mul(s7_7, M4(6.242e-02, -7.255e-02, -8.852e-02, 6.086e-02, 1.416e-02, 9.508e-02, 6.196e-02, -3.596e-02, -9.707e-02, -6.330e-02, 7.787e-02, 5.456e-02, 1.479e-01, 1.658e-01, 2.335e-02, 6.382e-03)); + r += mul(s7_8, M4(4.285e-03, 1.385e-02, -1.282e-02, 1.915e-02, -3.214e-02, 1.144e-02, -1.831e-02, -1.096e-02, -8.330e-03, -2.045e-02, -4.096e-02, -5.822e-02, -9.821e-03, 7.185e-02, -3.122e-02, -1.760e-02)); + r += V4(-7.389e-03, 1.176e-03, 1.090e-02, 3.142e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(9.318e-03, 1.971e-02, -4.711e-02, -1.441e-02, 2.845e-02, 1.500e-02, -7.066e-02, 4.036e-02, 1.277e-01, -2.572e-02, -2.008e-02, -1.923e-02, -2.453e-02, -5.953e-03, -3.675e-03, -3.523e-03)); + r += mul(s0_1, M4(4.747e-02, -9.007e-02, 3.769e-02, 2.768e-02, -1.276e-02, 7.454e-02, 1.610e-02, -7.106e-03, 1.486e-03, -6.065e-02, 5.470e-02, -2.325e-02, -2.186e-02, 2.483e-02, 2.275e-02, -9.590e-02)); + r += mul(s0_2, M4(1.496e-01, -2.921e-02, -3.473e-03, -2.540e-02, -1.334e-01, 3.500e-02, -3.829e-02, -1.840e-02, 1.008e-01, -2.292e-02, 3.331e-02, 1.530e-02, 3.491e-03, 1.641e-03, 2.776e-02, 7.546e-02)); + r += mul(s0_3, M4(7.560e-02, -3.680e-02, -1.262e-02, -2.280e-02, -3.754e-02, -9.010e-02, 7.737e-02, 6.975e-02, -2.890e-01, -1.377e-01, 6.193e-02, 1.340e-01, 1.297e-02, -2.207e-02, -2.043e-02, -4.829e-02)); + r += mul(s0_4, M4(2.365e-01, -2.331e-01, 1.118e-01, 1.209e-01, 1.117e-02, -9.303e-02, -9.648e-05, -1.518e-01, -3.367e-01, 8.023e-02, -1.159e-01, -8.597e-02, 1.433e-01, -1.751e-02, 1.518e-03, 3.134e-02)); + r += mul(s0_5, M4(-4.400e-02, -6.611e-02, -1.059e-01, -2.305e-02, 6.234e-03, -3.756e-02, -4.826e-03, -4.008e-02, -1.677e-01, -1.284e-01, 1.431e-01, 4.148e-02, 1.298e-01, -3.273e-02, 5.965e-03, 5.432e-02)); + r += mul(s0_6, M4(-6.353e-02, 8.337e-02, -3.857e-02, -2.829e-02, -3.215e-03, -1.428e-01, 3.896e-02, -1.339e-02, 1.479e-02, 7.588e-02, -1.057e-01, 4.416e-02, 6.222e-02, -4.782e-02, -2.898e-02, -3.526e-02)); + r += mul(s0_7, M4(-1.113e-01, 6.889e-02, 1.222e-01, 5.193e-02, -7.594e-02, 4.964e-02, 8.847e-03, 2.485e-02, 3.323e-01, -2.066e-01, -9.174e-02, 1.259e-01, 8.803e-02, -7.918e-02, 4.065e-02, 1.561e-02)); + r += mul(s0_8, M4(-4.717e-03, 3.938e-02, -5.724e-02, -1.059e-02, -1.039e-01, 3.829e-02, -6.498e-02, 1.782e-02, 1.593e-01, -5.529e-02, -5.608e-02, 1.020e-01, -3.413e-03, 1.366e-02, 1.714e-02, 5.552e-03)); + r += mul(s1_0, M4(7.586e-02, 3.628e-02, 3.292e-03, -1.218e-02, -2.084e-02, -7.520e-03, -9.144e-03, -2.570e-02, -4.835e-03, -1.008e-02, -3.211e-02, -2.545e-02, 1.710e-02, -7.760e-02, 7.692e-02, 9.374e-02)); + r += mul(s1_1, M4(9.785e-02, 8.141e-03, -6.459e-02, -5.718e-02, 8.235e-02, 4.175e-02, -1.180e-01, 1.333e-01, -1.943e-02, 3.633e-02, 4.322e-02, -3.006e-03, 7.250e-02, -1.302e-01, 6.711e-02, -2.551e-01)); + r += mul(s1_2, M4(1.206e-01, 4.771e-02, 1.311e-01, -1.836e-02, -3.410e-02, 1.290e-02, -6.934e-02, -4.725e-03, -7.862e-02, 3.476e-03, -1.437e-02, 1.828e-02, -1.412e-01, 6.391e-02, 4.649e-03, -9.304e-02)); + r += mul(s1_3, M4(-5.228e-02, -5.781e-02, -1.174e-02, 3.145e-02, 5.907e-02, 8.617e-02, 6.714e-02, -3.907e-02, 5.771e-02, -7.103e-02, -1.651e-02, 2.482e-02, -3.957e-02, -2.212e-01, 1.794e-01, 7.303e-02)); + r += mul(s1_4, M4(-1.779e-01, 8.501e-02, 3.207e-02, -1.450e-01, 1.176e-01, 1.274e-01, -4.872e-02, 4.053e-01, 3.342e-02, -4.093e-02, -7.129e-03, -1.106e-02, -2.820e-01, -1.113e-01, 2.251e-01, -7.125e-01)); + r += mul(s1_5, M4(-1.010e-02, -4.005e-02, 1.107e-01, 8.682e-02, -2.968e-02, 5.384e-02, -1.008e-01, -9.560e-02, 3.131e-02, 1.392e-03, 5.557e-03, 9.966e-03, 1.275e-01, -4.041e-02, 5.097e-02, -1.446e-01)); + r += mul(s1_6, M4(5.897e-02, -4.893e-02, 1.086e-02, 5.963e-02, 2.258e-02, -9.672e-02, -3.724e-02, -1.315e-01, -1.192e-02, -7.347e-02, 1.482e-02, 3.289e-02, 4.438e-02, -1.143e-01, 1.857e-01, 1.824e-01)); + r += mul(s1_7, M4(8.749e-02, -5.186e-02, -4.175e-02, -1.028e-01, 5.872e-02, -5.764e-02, 4.637e-02, 2.059e-01, 1.254e-01, -1.173e-01, -2.757e-02, 2.218e-03, -1.354e-01, -1.943e-01, -8.947e-02, -1.984e-01)); + r += mul(s1_8, M4(3.318e-03, 3.692e-02, 8.340e-02, 1.072e-01, -4.176e-02, 5.210e-02, -3.261e-04, 2.130e-02, -5.689e-03, 3.972e-03, 2.435e-02, 1.099e-02, 7.078e-03, -2.827e-03, 7.182e-02, 8.673e-02)); + r += mul(s2_0, M4(1.291e-02, -3.830e-02, -4.018e-02, 8.030e-03, 9.176e-02, 3.323e-03, -8.411e-02, -2.544e-02, 8.492e-02, 8.521e-02, -2.509e-02, -5.748e-03, -3.107e-02, -1.200e-02, 1.204e-02, 5.356e-02)); + r += mul(s2_1, M4(-2.296e-02, -6.058e-03, 7.210e-03, -2.304e-02, -3.666e-02, 9.114e-02, -1.274e-01, 7.791e-02, -1.240e-05, 9.711e-02, -8.943e-02, 9.498e-02, -1.958e-02, -2.426e-02, 5.381e-02, -3.134e-02)); + r += mul(s2_2, M4(5.071e-02, -2.808e-02, 2.896e-02, 2.517e-02, -1.061e-01, -9.929e-02, 3.763e-02, -6.403e-03, 1.436e-01, 1.704e-02, 2.727e-02, 1.008e-01, 2.704e-02, -5.297e-03, 1.933e-02, -3.730e-02)); + r += mul(s2_3, M4(2.860e-03, 5.863e-02, -1.089e-01, 5.943e-02, 2.059e-01, 1.105e-01, -3.185e-02, 1.028e-01, 1.689e-01, 2.686e-01, -1.146e-01, -4.051e-03, 4.745e-02, 3.347e-02, 4.123e-02, -3.907e-03)); + r += mul(s2_4, M4(1.016e-01, -8.799e-02, 1.260e-01, 1.206e-01, 2.857e-01, 1.680e-01, -4.300e-02, 2.488e-01, 1.589e-01, 3.359e-01, -1.774e-01, 2.562e-01, 1.264e-01, 3.470e-03, 1.608e-01, -1.381e-01)); + r += mul(s2_5, M4(-7.841e-02, 3.849e-02, -4.457e-02, -2.774e-02, -8.847e-02, 5.292e-02, -2.125e-01, -7.150e-02, -1.219e-01, 1.448e-01, 6.493e-02, 5.641e-02, 5.238e-02, 4.933e-02, 1.830e-01, -1.423e-01)); + r += mul(s2_6, M4(1.342e-02, 2.656e-02, -5.992e-02, 4.271e-02, 4.075e-03, 1.125e-02, 1.313e-02, -1.325e-01, 2.797e-02, -6.779e-02, -7.360e-02, -1.585e-03, -2.407e-03, 4.910e-03, -2.452e-02, 1.567e-02)); + r += mul(s2_7, M4(9.773e-02, 4.043e-02, 1.555e-02, -1.071e-02, 8.971e-02, 1.628e-01, 1.131e-02, 4.467e-01, 3.060e-02, 6.077e-02, -5.876e-02, 2.018e-01, -3.268e-02, 1.232e-01, 8.955e-02, -1.261e-01)); + r += mul(s2_8, M4(-1.874e-02, 2.312e-02, -1.151e-01, -3.373e-04, -6.412e-03, 3.135e-02, -4.381e-02, -1.055e-02, 3.871e-03, 4.973e-02, -7.849e-02, 3.332e-03, 1.271e-01, -9.244e-02, 3.887e-02, -5.154e-02)); + r += mul(s3_0, M4(1.036e-03, -8.569e-02, 1.218e-01, 2.093e-01, -4.657e-02, -3.873e-03, 1.242e-02, -2.976e-02, -2.957e-02, -4.404e-02, 4.678e-02, 4.112e-02, 3.632e-02, -1.145e-02, 1.252e-02, -2.102e-02)); + r += mul(s3_1, M4(1.155e-01, -1.149e-01, -1.369e-01, -1.560e-01, -1.744e-02, -3.161e-02, 5.220e-03, -2.918e-02, -1.461e-01, -4.223e-02, -2.358e-02, -8.501e-02, 6.100e-02, 4.602e-02, 1.639e-02, 2.726e-02)); + r += mul(s3_2, M4(3.795e-02, -1.800e-02, 5.280e-02, 1.153e-02, -9.406e-02, 6.172e-03, 5.216e-02, -1.007e-02, -3.424e-02, -3.728e-02, 2.888e-02, -1.508e-02, -9.406e-04, -4.032e-02, -6.712e-02, -2.512e-02)); + r += mul(s3_3, M4(4.153e-02, 9.395e-02, -9.089e-04, 1.137e-01, 1.831e-01, 5.065e-02, 6.709e-02, -9.444e-03, 1.631e-02, 2.292e-02, 2.117e-02, 4.207e-02, -3.171e-02, 3.065e-02, -1.305e-01, -4.601e-02)); + r += mul(s3_4, M4(-3.116e-01, -5.851e-02, -2.218e-01, -5.380e-01, 5.661e-02, -7.224e-02, 7.720e-02, -1.200e-01, 1.318e-02, 1.099e-01, -2.581e-02, 3.823e-02, -1.119e-01, -6.392e-03, 6.803e-02, 1.336e-01)); + r += mul(s3_5, M4(-2.828e-01, -9.250e-02, -7.496e-03, 3.760e-02, -9.597e-02, -1.174e-02, -6.336e-03, -6.538e-02, -9.218e-02, 7.557e-02, -3.513e-03, -5.946e-03, 1.581e-01, -3.998e-02, 7.530e-02, 2.452e-01)); + r += mul(s3_6, M4(-1.943e-01, 4.789e-02, -6.449e-02, 3.238e-02, -6.507e-02, -3.958e-02, 1.756e-02, -7.005e-02, -3.224e-03, 1.235e-02, 3.941e-02, -9.004e-03, 4.120e-02, 1.704e-02, -6.094e-02, -3.104e-03)); + r += mul(s3_7, M4(3.313e-02, 1.373e-02, -4.096e-02, -7.572e-03, -1.006e-01, 2.217e-01, -4.270e-02, 6.028e-05, -1.352e-02, -2.511e-02, -1.181e-02, -5.750e-02, 6.419e-02, 3.716e-02, 6.893e-02, -7.033e-03)); + r += mul(s3_8, M4(2.463e-02, -2.423e-02, 4.314e-02, 2.715e-03, -9.067e-02, 7.964e-03, -1.740e-02, -1.417e-02, -2.412e-03, 8.601e-03, -5.609e-02, -4.326e-02, -2.185e-01, 3.743e-02, 2.167e-03, 3.162e-02)); + r += mul(s4_0, M4(-3.291e-02, 2.270e-02, 4.077e-02, 2.796e-02, 7.450e-02, -2.478e-03, -3.507e-02, -1.515e-02, -6.977e-02, 3.246e-02, 8.137e-02, 7.233e-02, 6.702e-03, -3.982e-03, 5.725e-02, 2.816e-04)); + r += mul(s4_1, M4(-3.446e-02, 1.092e-01, -6.071e-02, -8.485e-02, 4.414e-02, -1.326e-02, 2.453e-02, 2.279e-02, 7.401e-02, -1.150e-01, -3.160e-02, -8.228e-02, 4.281e-02, 7.600e-02, 8.618e-02, 1.720e-02)); + r += mul(s4_2, M4(6.556e-02, 8.818e-02, -2.382e-02, 7.144e-02, -2.628e-02, 3.018e-02, -4.495e-02, -1.798e-02, 4.115e-02, -7.469e-02, -1.743e-02, -2.103e-02, -5.742e-02, 5.787e-02, -6.095e-02, -1.175e-02)); + r += mul(s4_3, M4(-8.912e-02, -1.611e-01, 6.763e-02, 6.021e-02, 2.892e-02, -1.182e-02, -2.709e-02, 7.934e-02, 4.207e-02, -1.868e-01, -1.718e-01, 3.170e-02, -6.664e-02, 1.254e-02, -1.698e-02, -5.976e-02)); + r += mul(s4_4, M4(8.374e-02, 9.909e-02, 6.506e-02, -1.439e-02, -6.977e-02, 4.786e-03, -1.697e-01, -9.840e-02, 2.172e-01, -9.600e-03, -1.772e-01, -2.745e-01, -3.708e-02, 2.665e-02, -3.142e-02, 8.629e-02)); + r += mul(s4_5, M4(2.115e-01, -4.760e-03, -1.529e-01, -4.313e-02, -7.121e-02, -2.899e-02, -4.600e-02, 5.944e-02, -8.202e-02, 7.803e-03, 5.726e-02, 1.435e-01, -2.347e-03, 1.732e-02, 3.681e-02, -4.667e-02)); + r += mul(s4_6, M4(-2.713e-02, -3.046e-02, 3.364e-02, 6.123e-02, -1.136e-02, 5.839e-02, -6.758e-02, 2.464e-02, -1.570e-02, 1.000e-01, -9.065e-02, -9.243e-02, 4.498e-02, -3.277e-02, -5.533e-03, -1.995e-02)); + r += mul(s4_7, M4(2.923e-02, -9.440e-02, 6.992e-03, -1.119e-01, 1.441e-03, -1.225e-03, 2.778e-02, 4.411e-02, -9.295e-02, -1.301e-01, 5.208e-02, -2.434e-03, -1.158e-02, -6.665e-03, 1.270e-02, 3.487e-03)); + r += mul(s4_8, M4(-1.583e-01, -1.284e-02, 1.077e-01, 1.784e-02, 1.477e-02, 1.177e-02, -1.049e-01, 3.212e-02, 1.332e-01, -9.264e-03, 1.031e-01, 1.339e-01, -3.333e-02, -1.368e-02, 8.314e-02, -3.010e-02)); + r += mul(s5_0, M4(-7.179e-02, 5.272e-02, -1.243e-01, -9.554e-03, -6.960e-02, -8.464e-02, 7.857e-02, -8.299e-03, 9.177e-02, 2.644e-02, -7.346e-02, -3.273e-02, -1.278e-01, 2.899e-02, 1.952e-01, 1.239e-01)); + r += mul(s5_1, M4(-1.492e-01, 5.505e-02, 6.646e-02, 4.484e-02, -8.677e-02, 1.566e-03, 1.786e-01, -8.437e-02, 3.549e-02, 1.309e-02, 1.116e-01, 1.697e-01, 1.790e-02, 1.489e-01, -3.890e-01, -1.688e-04)); + r += mul(s5_2, M4(-3.552e-02, 1.203e-02, 4.686e-03, -6.220e-02, 7.194e-02, -1.916e-03, -1.341e-01, 2.712e-02, -2.318e-02, 4.155e-02, -1.011e-01, -8.134e-02, 3.656e-02, -2.726e-02, 2.495e-01, 7.981e-02)); + r += mul(s5_3, M4(1.123e-01, -1.111e-01, -5.847e-02, -2.608e-02, -1.121e-01, 3.707e-02, 6.862e-02, -3.574e-02, -1.159e-01, -1.770e-01, 2.562e-02, 8.975e-04, 3.038e-02, 4.613e-02, 9.902e-02, 2.753e-02)); + r += mul(s5_4, M4(2.135e-01, -1.309e-01, 6.215e-02, 1.244e-01, -6.001e-01, -3.448e-01, 6.351e-01, -6.863e-01, -1.117e-01, 1.596e-02, 1.419e-01, 1.092e-02, 2.283e-01, 5.573e-02, -2.652e-01, -3.967e-02)); + r += mul(s5_5, M4(1.827e-01, 7.803e-03, -3.927e-02, -3.662e-02, -1.242e-01, 7.477e-02, -6.226e-02, 2.110e-03, 2.340e-02, 2.710e-02, -5.058e-02, -2.235e-02, 1.579e-01, -2.095e-02, 1.359e-01, 5.965e-02)); + r += mul(s5_6, M4(-3.795e-02, -2.222e-02, -5.652e-02, 1.478e-02, 3.860e-02, 2.259e-02, 1.439e-02, 1.043e-02, 2.601e-02, -7.573e-02, 8.292e-02, 7.048e-02, -8.373e-02, 4.477e-02, 4.754e-02, -4.149e-03)); + r += mul(s5_7, M4(1.704e-01, -9.205e-03, -2.108e-02, 1.881e-02, 1.268e-01, -6.519e-02, 3.233e-02, -1.141e-01, -1.000e-02, -3.744e-02, -2.276e-02, -1.411e-02, -1.528e-01, 3.616e-02, -8.978e-02, -7.246e-02)); + r += mul(s5_8, M4(1.876e-02, -2.578e-02, 3.945e-02, 6.642e-03, 1.662e-01, 9.105e-02, -1.487e-01, 6.059e-02, -8.806e-03, 2.749e-02, -1.692e-02, 1.774e-02, -1.055e-01, 2.088e-02, 8.990e-02, 5.275e-02)); + r += mul(s6_0, M4(1.573e-02, -6.888e-03, -4.979e-02, 1.519e-02, 2.973e-02, -5.261e-02, 3.624e-02, -7.583e-02, -3.794e-02, -1.598e-01, 8.630e-02, -2.981e-02, -2.372e-02, 5.984e-02, -2.515e-02, -5.868e-03)); + r += mul(s6_1, M4(2.470e-02, -4.338e-02, -3.948e-02, 1.234e-02, -1.575e-01, 4.389e-02, -5.306e-02, -6.776e-02, 2.467e-01, -1.589e-01, -1.711e-02, -2.030e-01, -6.872e-03, 1.258e-02, 1.350e-01, 5.095e-02)); + r += mul(s6_2, M4(-1.180e-02, -3.677e-02, 2.121e-02, -9.657e-03, -2.715e-03, -5.844e-02, 1.344e-01, 3.278e-02, 7.357e-02, -7.242e-02, 3.892e-02, 2.501e-02, -7.762e-02, 3.119e-02, -1.051e-01, -5.809e-02)); + r += mul(s6_3, M4(1.091e-01, 1.278e-02, -3.599e-02, 5.367e-03, 3.677e-02, -1.054e-01, 1.821e-01, 5.286e-02, -1.328e-01, 6.282e-02, 6.553e-02, -8.289e-03, 1.058e-01, 7.081e-03, 3.306e-02, 1.479e-02)); + r += mul(s6_4, M4(1.046e-01, -3.988e-02, 4.995e-02, 1.470e-01, 2.933e-01, 1.105e-01, -2.948e-01, -8.135e-02, 4.229e-01, 7.307e-02, -3.227e-01, 1.825e-01, -1.001e-02, 1.679e-01, 7.475e-02, -1.002e-01)); + r += mul(s6_5, M4(-1.456e-02, 1.389e-02, 2.569e-02, -7.156e-02, -6.755e-02, 3.306e-02, 4.274e-04, -1.295e-02, 9.901e-02, -7.322e-02, 4.438e-02, 5.605e-02, 1.685e-02, -1.109e-02, -8.532e-03, -3.867e-02)); + r += mul(s6_6, M4(-3.475e-02, 3.209e-02, 1.362e-02, 6.563e-03, -9.628e-02, -2.524e-03, 3.816e-02, 2.843e-02, 4.535e-02, 3.333e-02, -1.107e-01, -2.981e-02, 8.319e-03, 7.799e-04, 2.900e-04, -2.311e-02)); + r += mul(s6_7, M4(2.919e-02, 1.059e-02, 7.065e-03, -3.345e-02, 2.468e-02, 1.444e-03, -4.799e-02, -7.159e-02, -1.106e-01, 1.015e-01, -6.719e-02, 6.121e-02, 2.030e-01, 9.782e-02, 7.160e-03, -8.295e-02)); + r += mul(s6_8, M4(-7.268e-03, 4.832e-02, -6.109e-02, 4.652e-02, -6.329e-02, -5.492e-03, 4.236e-02, -9.245e-03, -9.686e-02, 4.322e-02, 2.277e-02, 7.572e-02, 7.280e-02, -2.059e-02, 6.613e-02, 1.643e-02)); + r += mul(s7_0, M4(-2.119e-02, 3.030e-02, -4.405e-02, 6.537e-02, 2.802e-02, 6.263e-03, -9.705e-03, -4.797e-02, -4.451e-04, 1.365e-02, 2.454e-02, -1.171e-03, -3.285e-03, -9.244e-02, -8.995e-03, 3.439e-02)); + r += mul(s7_1, M4(-4.763e-02, 8.042e-02, -1.512e-01, 5.042e-03, -1.447e-02, 5.979e-02, 1.804e-02, -3.810e-02, 4.092e-02, 9.670e-02, 7.289e-02, -7.576e-02, 5.053e-02, -1.740e-01, 1.056e-02, 1.016e-02)); + r += mul(s7_2, M4(-4.215e-02, 9.243e-03, 4.547e-02, 5.101e-02, 1.471e-02, -4.845e-02, 8.137e-02, 6.904e-03, 3.433e-03, 2.506e-03, 4.958e-02, 1.959e-02, 6.461e-02, -8.735e-03, -9.370e-02, 3.753e-02)); + r += mul(s7_3, M4(2.423e-01, -1.513e-02, -1.648e-01, 7.641e-02, -3.378e-02, 1.648e-02, 2.892e-02, 8.187e-02, -4.073e-02, 4.538e-02, 1.119e-01, -1.919e-02, 7.811e-02, -4.842e-02, -1.180e-01, -1.526e-02)); + r += mul(s7_4, M4(3.740e-01, 1.261e-01, -3.589e-01, -8.046e-03, -1.334e-01, -6.983e-02, 1.186e-01, -1.908e-01, 2.490e-01, 4.919e-02, 4.566e-02, -1.406e-01, -2.601e-01, 1.413e-03, 6.755e-01, -1.009e-01)); + r += mul(s7_5, M4(1.648e-01, -9.394e-02, 1.533e-01, 9.692e-02, -3.313e-02, 7.436e-02, 1.267e-02, 6.690e-02, 7.949e-02, -2.163e-02, 5.379e-02, -3.235e-02, 1.076e-01, -6.953e-02, -2.222e-01, -2.015e-01)); + r += mul(s7_6, M4(-5.087e-03, -3.670e-02, 8.675e-02, 1.666e-01, -9.506e-04, 1.749e-02, -8.476e-02, 1.666e-02, 4.410e-03, 5.603e-03, -8.273e-02, -4.501e-02, 1.623e-01, -1.785e-02, 1.753e-02, 1.361e-01)); + r += mul(s7_7, M4(-1.254e-01, 1.228e-01, -2.234e-01, 2.425e-02, 2.744e-02, 2.496e-02, -6.378e-02, -3.784e-02, 1.057e-02, -2.370e-02, 1.629e-01, 4.549e-02, -3.170e-02, -3.826e-02, 3.585e-02, -1.727e-01)); + r += mul(s7_8, M4(1.884e-02, -5.068e-02, -6.821e-02, -7.822e-02, -1.579e-02, -3.825e-02, 3.887e-02, -3.989e-03, 2.832e-02, 2.644e-02, -7.321e-02, -1.082e-02, -1.362e-02, -1.826e-02, -7.490e-03, 2.499e-02)); + r += V4(-7.589e-03, 2.995e-04, -9.737e-05, 7.202e-03); + return r; +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 6 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0, t1, t2, t3 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(6.706e-03, 2.738e-02, 1.982e-02, 3.446e-02, 1.264e-02, 1.304e-02, 7.763e-03, 1.023e-02, 6.302e-02, -1.182e-02, 2.278e-03, -1.232e-02, -3.429e-02, -2.040e-02, -1.168e-02, -1.181e-04)); + r += mul(s0_1, M4(1.620e-02, -1.199e-02, 4.502e-04, -2.397e-02, 1.938e-02, 2.430e-02, -2.373e-02, -1.482e-02, 2.732e-01, 2.037e-01, 1.514e-02, -4.439e-03, 1.380e-02, -2.781e-02, -1.160e-02, -5.282e-03)); + r += mul(s0_2, M4(-2.373e-02, -3.357e-02, -1.911e-02, -2.116e-02, 3.150e-03, -1.397e-02, 6.662e-03, 2.505e-03, -6.251e-03, 1.411e-01, -6.574e-03, 2.131e-02, -7.093e-03, -2.413e-03, 4.677e-03, -6.331e-03)); + r += mul(s0_3, M4(-8.154e-03, -2.709e-02, -4.627e-02, 1.616e-02, -1.821e-02, -1.728e-03, 1.458e-02, 3.793e-03, 7.761e-04, 1.175e-02, 4.766e-02, 4.329e-02, 7.208e-02, 2.035e-02, 3.456e-02, 7.867e-03)); + r += mul(s0_4, M4(3.753e-02, 8.269e-02, 4.995e-02, -7.103e-02, 2.705e-01, 1.385e-01, 1.998e-01, 1.450e-01, 3.745e-02, -3.325e-02, -5.085e-01, -2.738e-01, -3.485e-02, 8.088e-03, 1.705e-02, -1.057e-02)); + r += mul(s0_5, M4(-2.050e-02, -2.972e-02, -2.553e-02, 5.540e-03, -3.511e-03, 1.057e-01, 3.660e-04, 5.968e-02, -1.991e-03, 4.205e-02, 2.424e-02, -1.986e-01, -5.259e-03, 1.820e-02, -1.526e-02, 2.478e-02)); + r += mul(s0_6, M4(9.996e-04, 3.596e-03, 1.001e-02, 1.456e-02, -4.555e-03, 7.179e-03, -2.682e-02, -6.408e-03, 3.120e-03, 1.067e-02, 3.258e-03, -3.356e-03, 1.815e-03, -1.414e-02, 5.043e-03, -3.697e-03)); + r += mul(s0_7, M4(-3.559e-03, -8.496e-03, 3.859e-02, 9.324e-03, 9.646e-03, 8.992e-04, 9.910e-02, 2.640e-02, -1.351e-02, -3.127e-02, 9.650e-03, 8.386e-03, 1.654e-02, 6.433e-03, -2.265e-02, -1.257e-02)); + r += mul(s0_8, M4(-1.199e-04, -1.512e-03, -1.954e-03, 2.475e-02, -1.249e-03, 9.731e-03, 5.393e-04, 4.577e-02, -2.155e-03, 5.138e-03, 6.896e-03, 1.335e-02, 3.760e-03, 3.140e-03, 5.814e-03, 9.744e-03)); + r += mul(s1_0, M4(-8.659e-02, 3.889e-02, -2.871e-02, 3.276e-02, -8.269e-03, -5.772e-03, -7.633e-03, -3.066e-04, 3.702e-02, 6.132e-03, -2.208e-03, -6.681e-03, 2.615e-01, -3.704e-03, 3.935e-02, -1.550e-02)); + r += mul(s1_1, M4(5.674e-01, -3.038e-01, -7.032e-02, 2.502e-01, -1.740e-02, -8.063e-03, -3.653e-02, -3.674e-02, -1.522e-02, 3.629e-02, 3.234e-02, 1.025e-03, -5.206e-02, -1.642e-01, -3.535e-03, 1.201e-01)); + r += mul(s1_2, M4(-6.240e-02, -6.285e-03, -3.948e-03, -9.802e-02, -1.037e-02, -2.798e-02, -4.192e-03, -9.613e-03, 9.687e-03, 1.919e-04, 3.353e-03, 3.614e-02, -1.174e-02, -4.330e-02, -4.163e-03, -3.807e-02)); + r += mul(s1_3, M4(4.163e-02, -9.872e-03, 2.819e-02, 2.463e-02, -6.409e-02, -1.859e-02, -2.710e-02, -2.217e-02, 7.472e-03, 1.579e-02, 5.426e-02, 2.485e-02, -1.173e-01, 4.078e-03, -3.701e-02, 2.325e-01)); + r += mul(s1_4, M4(-2.448e-02, 1.968e-01, -1.342e-01, -2.612e-01, 2.046e-01, 7.601e-02, 1.447e-01, 1.028e-01, -3.414e-02, -6.663e-02, -8.081e-02, -2.704e-02, 5.723e-04, -3.118e-02, 3.543e-03, -3.026e-01)); + r += mul(s1_5, M4(-4.390e-02, -7.740e-02, -1.474e-02, 2.634e-01, -2.341e-02, 5.932e-02, -2.056e-02, 2.264e-02, -1.905e-02, -6.032e-03, 2.378e-03, -1.867e-02, 7.279e-04, 2.674e-02, -1.385e-02, 3.487e-02)); + r += mul(s1_6, M4(3.921e-03, 3.367e-03, 1.907e-02, 1.209e-02, -1.719e-02, -1.690e-03, -5.189e-02, -9.493e-03, 3.343e-03, 9.647e-03, 2.207e-03, 2.075e-03, -3.259e-03, -7.067e-03, -3.279e-02, -2.717e-02)); + r += mul(s1_7, M4(-2.856e-03, -4.601e-05, 6.812e-02, 4.533e-02, -1.426e-02, -2.584e-02, 5.481e-02, -1.690e-02, -6.950e-03, -2.264e-02, -1.304e-02, -1.176e-04, 2.057e-02, 7.382e-03, -1.340e-02, -5.682e-03)); + r += mul(s1_8, M4(2.008e-02, -3.867e-05, 4.696e-03, 4.263e-02, -1.265e-03, -7.850e-03, -5.589e-03, 1.498e-02, 1.009e-03, 5.427e-03, -9.848e-03, -2.229e-02, 4.909e-03, 1.174e-02, 1.151e-02, 1.605e-02)); + r += mul(s2_0, M4(1.550e-02, 7.876e-03, 1.935e-02, -2.579e-02, 3.936e-02, -1.027e-02, -4.275e-03, -4.746e-04, 6.157e-04, 1.466e-03, -8.017e-03, -7.368e-03, -2.300e-01, 6.241e-02, 2.634e-03, 2.035e-02)); + r += mul(s2_1, M4(-1.844e-02, 6.846e-02, 5.139e-02, 6.380e-02, 1.223e-03, 8.304e-02, 1.181e-02, 1.088e-03, 1.064e-01, -6.148e-03, -5.413e-03, -4.218e-03, -6.852e-03, -7.436e-02, -1.611e-02, 7.466e-03)); + r += mul(s2_2, M4(-2.136e-03, -2.562e-02, 1.898e-02, 3.205e-02, -6.119e-03, -2.026e-02, -4.648e-03, -2.375e-04, 1.091e-02, 6.421e-02, 6.904e-03, -1.513e-02, 4.585e-03, 1.058e-02, 6.842e-03, 5.545e-03)); + r += mul(s2_3, M4(1.061e-01, -4.794e-03, 1.037e-01, 3.058e-02, 4.968e-02, 2.590e-02, 9.155e-02, 1.009e-02, -3.137e-02, 1.630e-02, -6.220e-03, 2.111e-02, -9.500e-02, 1.714e-01, -4.057e-01, 1.782e-01)); + r += mul(s2_4, M4(2.339e-01, -5.819e-01, -2.723e-01, 6.472e-03, -1.238e-01, -1.127e-02, -6.174e-02, 6.429e-02, -1.267e-01, -3.096e-01, 1.014e-01, -2.204e-01, 2.052e-02, 1.475e-01, 6.066e-02, 9.589e-02)); + r += mul(s2_5, M4(-2.304e-02, 2.318e-02, 2.866e-02, -4.408e-02, 1.489e-02, -5.801e-02, -1.756e-03, -3.577e-02, 5.791e-02, 2.144e-01, 9.753e-03, 1.912e-01, -3.009e-02, 5.106e-03, -1.229e-02, 1.420e-02)); + r += mul(s2_6, M4(4.358e-02, 3.502e-02, -6.798e-02, 5.187e-02, -4.211e-04, 1.322e-03, 5.382e-03, 2.963e-03, -2.588e-03, -7.874e-03, -2.179e-02, -1.582e-03, -2.461e-02, -6.470e-02, -3.839e-03, 2.191e-02)); + r += mul(s2_7, M4(-6.287e-03, 2.149e-02, 7.913e-02, -5.061e-02, 1.243e-02, -7.702e-03, -4.872e-02, -1.086e-02, 1.832e-02, 9.044e-02, -5.083e-02, -6.268e-02, 1.514e-02, 1.465e-03, -1.091e-02, 6.273e-03)); + r += mul(s2_8, M4(1.213e-02, 1.402e-02, -1.088e-02, 1.310e-02, -2.250e-03, 1.604e-02, 6.334e-03, -2.184e-02, -3.757e-04, -3.529e-02, 4.777e-02, 5.153e-02, 8.795e-03, 5.222e-03, -6.544e-03, 9.581e-03)); + r += mul(s3_0, M4(-1.079e-02, 1.253e-04, 4.458e-03, -1.254e-02, 9.123e-02, 4.150e-03, 1.458e-02, 1.127e-02, 1.968e-02, 4.168e-03, -3.046e-03, -1.191e-02, -2.692e-03, -8.615e-03, -9.278e-03, 2.227e-02)); + r += mul(s3_1, M4(1.472e-03, -2.004e-02, 2.582e-02, 4.490e-02, 5.903e-02, 1.377e-01, 4.830e-02, 4.055e-02, -5.469e-03, 2.753e-02, -5.264e-03, -7.181e-03, -1.594e-02, -1.295e-01, -6.401e-03, -6.162e-03)); + r += mul(s3_2, M4(-2.532e-03, 1.243e-02, 2.114e-03, 1.702e-02, 4.265e-03, 1.309e-02, 7.171e-03, 1.492e-02, -1.846e-03, 1.942e-02, 1.357e-02, -1.592e-04, 3.933e-03, 9.982e-03, 6.534e-03, -2.432e-03)); + r += mul(s3_3, M4(7.380e-03, -2.116e-02, -1.380e-02, 2.664e-02, 7.186e-02, 4.053e-02, 1.256e-01, 2.894e-02, -1.920e-02, 1.135e-02, 3.353e-02, 2.217e-02, 8.643e-02, 4.063e-02, 1.027e-01, -2.016e-02)); + r += mul(s3_4, M4(-1.645e-02, 3.861e-02, -3.602e-02, -6.018e-02, -1.166e-01, -1.456e-02, -3.626e-02, 9.418e-02, -3.214e-02, -6.153e-02, -5.345e-02, 2.475e-02, -2.590e-02, 6.906e-02, -9.229e-03, -5.921e-02)); + r += mul(s3_5, M4(-1.950e-02, -1.801e-02, 1.145e-02, -5.417e-03, 1.553e-02, -4.899e-02, 3.746e-03, -2.643e-02, 1.086e-02, -1.598e-02, -2.667e-02, -1.705e-02, -2.110e-02, 2.587e-02, -7.802e-03, 4.066e-02)); + r += mul(s3_6, M4(2.483e-02, 2.045e-02, 2.890e-02, 2.709e-02, 1.065e-02, 3.481e-03, 2.060e-02, 1.064e-02, -1.112e-02, -1.077e-02, -3.564e-02, -7.229e-03, -2.201e-02, -7.123e-02, -3.014e-02, -9.062e-03)); + r += mul(s3_7, M4(-1.130e-02, -8.691e-03, -2.022e-02, -1.872e-03, 1.187e-02, -1.794e-03, -5.008e-02, -8.469e-03, 1.349e-02, 7.055e-02, 2.765e-02, -1.753e-02, 2.082e-02, 8.165e-03, -3.951e-04, 5.765e-03)); + r += mul(s3_8, M4(1.337e-02, -3.658e-03, -1.502e-02, -2.274e-02, -9.337e-04, 1.728e-02, 8.759e-03, -2.078e-02, 4.858e-03, -2.704e-02, 2.696e-02, 5.332e-03, 8.513e-03, -1.582e-03, -4.886e-03, 5.885e-03)); + r += mul(s4_0, M4(3.610e-03, 1.743e-04, 7.204e-04, -4.711e-03, -1.319e-01, -1.839e-02, -9.755e-03, -9.129e-03, -7.165e-04, -1.347e-02, 6.162e-03, 1.950e-03, 1.229e-03, 3.174e-03, 4.744e-04, 1.910e-03)); + r += mul(s4_1, M4(1.342e-02, 5.925e-03, 1.500e-02, 1.613e-02, -1.814e-01, -2.920e-01, -1.278e-02, 4.213e-03, -1.575e-02, 5.395e-03, -2.002e-03, -4.321e-03, 1.622e-03, 3.033e-03, -7.231e-03, -6.567e-03)); + r += mul(s4_2, M4(1.099e-03, 2.215e-03, -5.874e-03, -4.530e-04, 6.192e-03, -6.228e-03, -3.858e-03, -1.363e-02, 3.433e-03, -9.166e-03, 3.268e-03, 4.154e-03, -4.996e-04, -3.292e-04, -7.646e-03, -9.518e-03)); + r += mul(s4_3, M4(1.052e-02, 1.420e-02, 4.065e-03, 1.323e-02, 3.106e-02, 2.095e-02, 2.641e-02, -2.029e-02, 3.237e-02, -9.146e-03, -1.105e-02, -3.650e-02, -1.699e-02, -5.951e-03, -1.118e-02, -1.966e-02)); + r += mul(s4_4, M4(-2.217e-04, -3.200e-02, -3.792e-02, -5.471e-02, 2.181e-02, -1.362e-02, 3.526e-01, 2.753e-01, -1.813e-03, 3.984e-02, 7.449e-03, 5.883e-02, -1.606e-02, 2.281e-02, 5.425e-02, 3.770e-02)); + r += mul(s4_5, M4(1.261e-03, 1.474e-02, 7.484e-03, 1.939e-03, -2.267e-02, 2.509e-02, -2.069e-02, 9.510e-02, -7.844e-03, 4.294e-04, -6.696e-03, -1.740e-02, 3.167e-02, -2.329e-02, 1.160e-02, 5.096e-02)); + r += mul(s4_6, M4(2.120e-03, 9.430e-03, 1.219e-02, 7.862e-03, 3.047e-03, 1.150e-02, 4.749e-02, 2.562e-02, -9.188e-03, 2.203e-02, 3.607e-02, 3.083e-03, -2.106e-03, 2.432e-03, 1.199e-03, -4.127e-03)); + r += mul(s4_7, M4(-1.028e-02, -1.226e-02, 3.231e-02, 1.802e-02, 3.789e-03, -6.684e-03, -1.964e-02, 9.650e-03, 3.068e-03, -3.723e-02, -3.946e-02, 1.179e-02, -7.176e-03, -1.497e-02, -7.633e-02, 2.594e-02)); + r += mul(s4_8, M4(-3.920e-03, -8.312e-03, -9.664e-03, 8.452e-03, 2.961e-03, -2.312e-03, -1.455e-02, -1.813e-02, 2.082e-03, -2.123e-04, 8.967e-04, -1.630e-02, 1.116e-02, 9.301e-03, 3.381e-02, -7.034e-02)); + r += mul(s5_0, M4(1.728e-02, -1.585e-05, -9.193e-03, -6.659e-03, -1.837e-02, -1.407e-02, 1.653e-03, -5.910e-03, -1.014e-01, -1.989e-02, 1.128e-04, 7.965e-03, 6.667e-03, 5.024e-03, 7.592e-03, 3.754e-03)); + r += mul(s5_1, M4(3.763e-02, 1.312e-02, -1.504e-02, -1.373e-02, -6.919e-03, -4.613e-02, -1.367e-02, -1.003e-02, 2.991e-02, -9.302e-02, 2.019e-02, 1.075e-02, 2.194e-02, 2.247e-02, 3.750e-03, 2.476e-03)); + r += mul(s5_2, M4(9.883e-03, 2.720e-02, -1.846e-02, -2.238e-02, -1.193e-02, 5.010e-03, -9.641e-03, -5.314e-03, 5.198e-03, 6.518e-04, 4.161e-03, 6.686e-03, -2.658e-02, -7.220e-03, -7.024e-03, -1.111e-03)); + r += mul(s5_3, M4(-1.562e-02, 9.244e-03, 6.323e-02, 1.166e-02, 9.164e-03, 6.390e-03, -4.895e-02, 2.439e-03, -3.811e-02, -9.895e-02, 2.417e-01, -6.693e-02, 1.474e-02, -9.886e-05, 7.191e-03, -1.361e-02)); + r += mul(s5_4, M4(-4.278e-01, -2.293e-01, 3.549e-01, 1.887e-01, 5.394e-02, 2.415e-03, 4.315e-02, -6.372e-02, -2.050e-01, 6.035e-01, -1.450e-01, -2.261e-01, -2.389e-01, 5.383e-02, 8.571e-02, 9.038e-02)); + r += mul(s5_5, M4(-6.487e-02, -2.900e-01, 3.513e-02, 2.660e-01, -2.179e-02, 4.067e-02, -1.280e-02, 3.715e-02, -1.417e-02, -6.856e-02, -1.100e-03, 1.504e-02, 2.764e-01, -1.850e-01, -7.299e-02, -4.783e-02)); + r += mul(s5_6, M4(1.426e-02, 7.731e-03, -1.884e-03, 3.143e-03, 6.810e-03, 1.151e-02, 5.335e-02, 1.234e-02, 1.521e-02, 6.843e-03, -6.522e-02, 2.987e-02, -4.732e-03, -1.716e-03, 1.397e-02, 5.751e-03)); + r += mul(s5_7, M4(1.255e-02, 1.881e-02, 1.056e-02, -1.908e-02, -1.926e-03, -5.265e-03, -2.304e-03, 3.788e-02, -1.586e-02, -7.495e-02, 1.929e-01, -4.114e-02, 2.569e-01, -9.647e-02, -1.509e-01, -1.543e-01)); + r += mul(s5_8, M4(2.466e-02, 1.450e-02, -3.034e-02, -1.328e-02, 6.704e-03, -1.769e-03, -3.458e-03, 3.007e-03, 2.685e-02, -3.713e-02, -1.753e-02, -4.535e-03, 1.058e-02, -5.708e-02, 5.038e-02, 2.939e-01)); + r += mul(s6_0, M4(-4.531e-03, -8.348e-03, -5.183e-03, -1.459e-03, 5.203e-03, -1.217e-04, -5.029e-03, 7.963e-03, 2.546e-02, -3.475e-02, 8.723e-03, -1.175e-02, 3.405e-03, 6.611e-03, 1.978e-03, 2.428e-03)); + r += mul(s6_1, M4(-2.185e-03, -2.680e-03, 4.320e-03, -7.711e-03, -5.130e-02, -1.818e-02, -4.918e-02, -6.470e-02, -1.355e-01, 1.808e-01, -1.148e-02, 9.756e-04, 2.852e-03, -7.537e-02, 2.680e-02, 4.285e-02)); + r += mul(s6_2, M4(-1.070e-02, 4.954e-03, 2.978e-03, 9.844e-03, 1.107e-02, -9.837e-03, 1.935e-02, 2.246e-02, 2.193e-02, -4.628e-02, 9.669e-03, -8.352e-03, -4.623e-03, 3.010e-02, 1.293e-02, 9.552e-03)); + r += mul(s6_3, M4(7.925e-03, -5.744e-03, 3.464e-03, 1.769e-04, -1.121e-02, -1.561e-02, 1.824e-02, -3.989e-02, 2.250e-02, -3.043e-02, 3.715e-02, -6.117e-02, 3.335e-02, -1.336e-02, 5.664e-02, 1.929e-02)); + r += mul(s6_4, M4(6.696e-03, 4.596e-03, -2.174e-02, -2.013e-02, 5.530e-02, 3.743e-02, 5.506e-02, 1.587e-01, -3.331e-01, 3.474e-01, -3.669e-01, 5.025e-01, -4.944e-02, 3.737e-02, -4.266e-02, -1.045e-01)); + r += mul(s6_5, M4(-1.437e-02, 2.715e-03, -1.708e-03, 7.397e-03, -1.268e-02, 2.074e-02, -1.405e-02, -4.529e-02, 2.829e-03, 1.265e-03, 1.605e-02, -7.341e-02, -5.201e-03, 1.521e-02, -9.495e-03, 4.242e-02)); + r += mul(s6_6, M4(2.535e-04, 1.031e-02, 7.683e-03, 7.707e-04, -1.282e-02, -5.099e-03, -1.141e-02, -1.839e-02, -1.007e-02, -2.516e-02, -4.471e-03, -1.873e-02, -6.821e-03, -2.326e-02, -2.518e-02, -1.273e-02)); + r += mul(s6_7, M4(3.362e-03, -2.606e-02, 1.189e-02, 1.764e-02, 2.339e-02, -1.431e-02, -2.236e-02, 1.224e-03, 2.395e-03, 1.231e-02, -6.983e-02, 5.847e-02, 6.893e-03, 9.406e-03, -1.467e-02, 6.487e-03)); + r += mul(s6_8, M4(1.385e-03, 1.439e-02, -1.064e-02, -7.657e-03, 8.523e-05, 2.189e-02, 3.952e-03, -4.479e-03, 3.323e-03, -1.651e-02, 1.816e-03, -1.029e-03, 1.190e-02, -1.157e-02, 9.120e-03, -1.558e-02)); + r += mul(s7_0, M4(-4.056e-02, -2.108e-02, 1.828e-02, 1.432e-02, -3.539e-03, 1.663e-02, -9.396e-03, 4.822e-03, 8.786e-03, -8.241e-05, 4.654e-03, 8.757e-03, -8.862e-02, 2.074e-02, 2.974e-02, -1.814e-02)); + r += mul(s7_1, M4(-5.678e-02, -5.286e-02, 3.109e-02, 1.401e-02, 1.048e-01, 7.690e-02, -3.782e-02, -4.993e-02, -2.728e-02, 1.580e-02, 3.322e-03, -1.647e-02, -1.076e-01, 1.363e-01, -1.095e-02, 5.280e-02)); + r += mul(s7_2, M4(-1.123e-02, -2.316e-02, 6.307e-03, 2.675e-02, -3.419e-03, 3.929e-02, 9.452e-03, 1.004e-02, -1.637e-03, -1.911e-02, 5.418e-03, 1.204e-02, 1.972e-02, 1.721e-02, 2.992e-02, 8.433e-03)); + r += mul(s7_3, M4(1.490e-01, -9.791e-03, -1.428e-01, -1.558e-02, -6.523e-02, -1.196e-02, -6.402e-02, -3.144e-02, 5.498e-03, 1.826e-02, 2.666e-02, -1.873e-02, -3.081e-02, 2.688e-02, -1.907e-01, 8.472e-02)); + r += mul(s7_4, M4(3.719e-01, 4.177e-01, -3.323e-01, -3.875e-01, -6.588e-02, -1.706e-01, 2.773e-01, 1.075e-01, 2.692e-02, -9.144e-03, -3.408e-02, 8.546e-02, -2.251e-01, 7.381e-02, -4.385e-01, 4.693e-01)); + r += mul(s7_5, M4(-1.497e-02, 1.150e-01, -7.533e-03, -7.972e-02, -7.560e-03, 3.861e-02, -1.411e-02, 9.379e-02, -3.411e-03, -1.342e-03, -1.345e-03, -6.180e-02, 1.408e-02, 4.882e-02, 2.710e-02, 4.628e-02)); + r += mul(s7_6, M4(-1.653e-02, 4.965e-03, 3.135e-02, 1.568e-02, -9.323e-04, 9.862e-03, -1.727e-02, -1.187e-02, -1.217e-02, -1.441e-02, -2.301e-02, 8.357e-03, 5.023e-03, -1.157e-02, -4.895e-02, -4.306e-05)); + r += mul(s7_7, M4(-3.122e-02, -5.748e-02, 1.710e-02, 3.771e-02, 1.876e-02, -1.052e-02, -1.102e-01, -6.328e-02, 4.230e-03, 1.399e-02, 3.042e-02, -3.180e-02, -1.349e-02, 3.137e-02, -6.765e-02, 2.530e-02)); + r += mul(s7_8, M4(1.825e-03, -4.076e-04, 2.666e-03, 1.097e-03, -7.524e-03, 2.039e-02, -3.127e-02, -6.067e-02, -6.952e-03, -8.859e-03, -8.515e-03, 9.646e-03, 2.047e-02, -1.944e-02, 2.644e-02, -2.179e-02)); + r += V4(1.512e-04, -2.611e-04, 1.538e-04, -1.644e-04); + return tanh(r); +} + +void Pass6(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-4x16C-NVL.hlsl b/src/Effects/CuNNy/CuNNy-4x16C-NVL.hlsl new file mode 100644 index 000000000..1dfab5ed6 --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-4x16C-NVL.hlsl @@ -0,0 +1,2223 @@ +// CuNNy 4x16C BILINEAR RGB NVL - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-D16N04 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t2; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t3; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t4; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t5; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t6; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t7; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0, t1, t2, t3 + +#define l0(x, y) min16float((dot(float3(1.870e-01, 3.591e-01, 7.602e-02), O(INPUT, float2(x, y)).rgb) + -4.087e-01)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(8.371e-02, -3.545e-01, -3.840e-02, -6.216e-03) * s0_0; + r += V4(-3.142e-01, 3.579e-01, -1.140e-01, 3.073e-02) * s0_1; + r += V4(1.333e-01, -7.066e-03, 1.714e-01, 3.818e-01) * s0_2; + r += V4(-7.350e-02, 4.033e-01, 1.318e-01, -1.724e-03) * s0_3; + r += V4(-4.259e-02, -3.955e-01, -3.838e-01, -1.100e-01) * s0_4; + r += V4(1.791e-01, -2.211e-03, -1.344e-01, -2.725e-01) * s0_5; + r += V4(9.662e-03, -4.290e-02, 2.646e-01, 6.740e-03) * s0_6; + r += V4(5.519e-02, 3.918e-02, 1.461e-01, -2.911e-02) * s0_7; + r += V4(-4.700e-02, 5.593e-03, -4.187e-02, -2.506e-03) * s0_8; + r += V4(-1.070e-02, -1.340e-03, -2.419e-04, 5.535e-03); + return r; +} + +V4 f1(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-2.701e-02, 8.362e-02, -1.470e-01, -2.450e-03) * s0_0; + r += V4(7.854e-02, -6.238e-02, 2.998e-01, -2.166e-02) * s0_1; + r += V4(-3.843e-02, -1.087e-03, 8.015e-03, -8.253e-03) * s0_2; + r += V4(8.528e-02, -4.219e-02, -3.129e-02, -7.154e-02) * s0_3; + r += V4(1.907e-01, -3.604e-01, -1.470e-01, -1.043e-01) * s0_4; + r += V4(-2.596e-01, 1.101e-01, -1.610e-01, 3.564e-01) * s0_5; + r += V4(-3.794e-02, -1.201e-02, -4.534e-02, -2.067e-03) * s0_6; + r += V4(-2.723e-01, -7.101e-03, 2.920e-01, -4.703e-02) * s0_7; + r += V4(2.764e-01, 2.903e-01, -6.617e-02, -1.000e-01) * s0_8; + r += V4(-6.139e-03, 1.469e-03, -2.491e-05, -8.164e-03); + return r; +} + +V4 f2(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-1.344e-02, -6.919e-03, -1.467e-02, -1.242e-02) * s0_0; + r += V4(-4.248e-01, 2.286e-01, 5.847e-02, 5.995e-03) * s0_1; + r += V4(-2.976e-02, 1.412e-01, 3.197e-02, -1.810e-02) * s0_2; + r += V4(1.150e-02, 3.446e-02, 1.025e-01, -2.692e-02) * s0_3; + r += V4(3.936e-01, -2.788e-01, 2.418e-01, 2.693e-01) * s0_4; + r += V4(3.597e-02, 1.973e-01, 5.150e-02, -2.774e-02) * s0_5; + r += V4(1.056e-02, -1.251e-02, 2.856e-02, 1.177e-02) * s0_6; + r += V4(1.555e-02, -2.186e-03, -4.072e-01, -4.419e-02) * s0_7; + r += V4(1.607e-03, 5.487e-03, -1.008e-01, 5.047e-03) * s0_8; + r += V4(5.744e-03, -5.855e-02, 3.652e-03, 6.364e-02); + return r; +} + +V4 f3(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-1.162e-02, 2.393e-02, 3.063e-01, 1.225e-01) * s0_0; + r += V4(2.079e-02, -6.710e-02, -4.221e-02, 1.147e-01) * s0_1; + r += V4(-1.939e-02, 4.827e-03, -1.670e-01, -1.281e-02) * s0_2; + r += V4(3.828e-02, 6.747e-02, 1.639e-01, -4.572e-01) * s0_3; + r += V4(-2.201e-02, 1.626e-01, -2.213e-01, 2.021e-03) * s0_4; + r += V4(3.526e-02, -2.339e-01, -7.593e-02, -7.410e-04) * s0_5; + r += V4(-3.178e-01, -9.888e-02, 8.550e-03, 2.823e-01) * s0_6; + r += V4(2.998e-01, 3.429e-01, -1.879e-03, -6.281e-02) * s0_7; + r += V4(-1.127e-02, -2.026e-01, 2.804e-02, 1.138e-02) * s0_8; + r += V4(9.305e-03, 1.342e-03, 1.386e-03, -1.248e-03); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.915e-01, 8.617e-02, 2.783e-01, -2.880e-01, 5.170e-02, 2.674e-01, -1.035e-01, -4.085e-02, -4.754e-02, 2.783e-01, 3.582e-03, 3.390e-02, 7.482e-02, 1.070e-01, -4.532e-02, -1.152e-01)); + r += mul(s0_1, M4(1.452e-01, -8.931e-02, 9.665e-02, -2.959e-01, 1.034e-01, 8.728e-02, -7.314e-02, -3.113e-01, 7.188e-03, 5.407e-01, 9.069e-02, 1.456e-01, -5.582e-03, -4.387e-02, -3.720e-02, -1.689e-01)); + r += mul(s0_2, M4(-1.109e-01, -2.353e-01, -1.249e-01, -2.881e-02, 1.277e-02, -2.393e-01, -9.998e-02, 8.359e-02, 8.465e-02, 3.467e-01, -4.426e-01, 1.844e-01, -9.682e-03, 1.228e-02, 2.420e-01, -2.956e-03)); + r += mul(s0_3, M4(-2.872e-01, -2.808e-01, 1.123e-01, -3.129e-01, -1.782e-01, 1.057e-01, -7.654e-02, -6.484e-02, -9.183e-02, 2.257e-01, -4.162e-02, -3.929e-02, 1.257e-01, -1.185e-01, 1.132e-01, 2.286e-02)); + r += mul(s0_4, M4(-6.034e-01, -2.698e-01, 2.346e-01, 3.800e-01, 1.354e-01, -2.793e-01, -2.765e-01, 4.368e-01, 2.433e-01, 1.016e-01, -2.549e-01, 1.175e-01, -5.999e-02, 1.617e-01, 2.720e-01, 2.399e-02)); + r += mul(s0_5, M4(2.778e-01, 2.151e-01, -1.113e-01, 5.907e-02, 2.666e-02, -2.804e-01, -2.523e-01, -1.033e-01, -3.724e-01, 4.970e-02, -6.201e-02, 1.650e-01, 3.048e-02, -3.321e-02, -8.487e-02, 5.839e-02)); + r += mul(s0_6, M4(-1.577e-01, -5.794e-01, 1.435e-03, -9.709e-02, -2.404e-02, 5.901e-02, 7.698e-02, 3.494e-02, 9.943e-02, -2.683e-03, 3.986e-02, 1.381e-01, -1.134e-01, -7.731e-03, -3.773e-01, -7.648e-02)); + r += mul(s0_7, M4(-6.406e-01, 2.884e-01, 1.041e-01, -2.176e-01, 3.817e-02, -6.199e-01, -1.545e-01, 2.707e-01, 2.183e-01, -4.624e-02, -1.211e-01, -1.331e-01, 8.447e-02, -5.417e-02, -1.235e-01, -5.106e-02)); + r += mul(s0_8, M4(1.502e-01, 5.535e-01, -4.493e-01, 5.334e-02, 2.547e-01, -3.920e-01, -1.611e-01, 3.136e-01, -2.486e-01, -1.901e-01, 1.735e-01, 1.422e-01, -1.382e-01, 1.335e-01, -6.865e-02, 5.767e-02)); + r += mul(s1_0, M4(-1.072e-01, 6.178e-02, -8.950e-02, -4.395e-02, 2.797e-02, 1.097e-01, -1.335e-02, 2.105e-01, -5.711e-02, -7.394e-02, 6.051e-02, -1.298e-01, 8.520e-02, 1.592e-01, -1.618e-02, 2.063e-01)); + r += mul(s1_1, M4(-9.313e-02, 1.554e-01, 2.962e-01, 2.182e-01, -2.366e-02, -1.189e-01, 1.419e-02, -3.432e-03, -2.118e-01, -1.505e-01, 1.335e-01, -3.626e-01, -1.272e-01, -2.990e-01, -1.156e-01, 2.206e-01)); + r += mul(s1_2, M4(2.653e-02, 6.007e-03, -9.481e-02, 2.164e-03, -6.669e-02, 1.896e-02, -2.685e-01, 1.459e-02, 1.478e-01, -1.619e-01, 9.891e-02, 4.510e-02, -6.737e-02, 8.488e-02, 2.175e-01, 1.367e-01)); + r += mul(s1_3, M4(8.973e-02, 1.315e-01, -9.112e-02, -2.657e-02, 4.734e-02, -1.120e-01, 1.049e-01, 8.353e-02, -3.940e-02, -1.524e-02, -4.120e-02, -4.177e-02, -1.102e-01, 4.304e-01, -1.386e-01, 5.626e-02)); + r += mul(s1_4, M4(1.491e-01, 1.458e-01, 9.985e-02, -3.261e-02, 1.666e-01, -3.331e-01, 1.899e-02, 7.565e-02, 4.484e-03, -2.159e-01, 3.560e-03, 4.046e-02, 2.046e-01, -3.285e-01, -3.512e-01, -1.651e-01)); + r += mul(s1_5, M4(-3.374e-01, -4.701e-02, -5.227e-02, -5.245e-02, -3.441e-01, 1.487e-01, 1.912e-01, -1.615e-01, -1.529e-01, -1.536e-02, 1.713e-01, -2.163e-01, -2.387e-03, 6.917e-02, 2.498e-01, 9.538e-02)); + r += mul(s1_6, M4(2.198e-02, 2.846e-02, -7.364e-02, -9.207e-02, 1.338e-01, -1.202e-01, 1.576e-01, 1.028e-01, 4.717e-02, -3.245e-02, -1.692e-02, 1.459e-01, 3.656e-01, -1.545e-01, 4.262e-01, 2.335e-02)); + r += mul(s1_7, M4(2.171e-01, -1.561e-01, 8.276e-02, 2.344e-02, -1.918e-01, 2.082e-01, -2.113e-01, -2.870e-01, -4.678e-02, -7.053e-02, -3.567e-03, -7.122e-02, -5.404e-01, 1.409e-02, 6.304e-01, -2.495e-02)); + r += mul(s1_8, M4(-2.908e-02, -3.121e-01, 2.461e-03, 1.053e-02, 6.840e-02, 2.736e-01, 5.635e-01, -1.502e-01, -2.910e-02, 1.594e-02, 1.646e-01, -9.596e-02, -1.050e-01, -1.280e-01, 1.544e-01, 1.133e-01)); + r += mul(s2_0, M4(-1.709e-01, 4.813e-02, -1.308e-01, -3.929e-01, -2.671e-01, 1.794e-01, -3.473e-02, -1.082e-01, 1.704e-01, -9.371e-02, 1.177e-01, 1.082e-01, -1.074e-01, -1.489e-01, -6.564e-02, 2.592e-01)); + r += mul(s2_1, M4(-3.982e-01, 1.608e-01, 2.630e-01, 2.781e-01, 9.405e-02, 3.370e-01, 1.315e-01, -2.692e-01, 6.091e-02, -8.330e-02, 1.807e-01, 1.158e-01, 1.087e-01, -4.806e-01, -1.911e-02, 4.111e-01)); + r += mul(s2_2, M4(7.164e-04, 1.734e-01, 1.819e-01, 6.334e-02, -2.757e-01, -2.027e-02, 2.200e-02, -5.364e-02, 1.775e-01, 9.564e-02, 1.428e-01, 1.097e-01, 2.450e-01, 3.822e-02, -1.977e-01, 1.627e-01)); + r += mul(s2_3, M4(1.574e-02, 1.611e-01, -2.701e-01, 2.160e-01, 1.052e-01, 1.880e-01, 1.925e-01, 1.191e-01, 2.600e-01, -8.277e-02, 2.963e-02, 1.030e-01, 2.238e-01, -5.039e-02, 2.048e-01, -1.014e-01)); + r += mul(s2_4, M4(-3.232e-01, 1.614e-01, 3.728e-01, 1.739e-02, 2.750e-01, 9.738e-02, -1.019e-01, -2.264e-01, 2.518e-02, -2.377e-01, 1.322e-01, 8.435e-02, 4.010e-01, 2.758e-01, -4.264e-01, 7.319e-02)); + r += mul(s2_5, M4(-6.737e-02, 2.770e-01, -3.094e-01, 1.656e-01, -1.544e-01, -2.708e-01, 1.999e-01, -7.944e-02, 1.547e-01, 8.519e-02, 1.132e-01, 2.361e-01, 2.004e-01, -3.373e-01, -1.710e-01, -7.043e-02)); + r += mul(s2_6, M4(1.828e-01, 1.317e-01, -8.375e-02, -2.125e-01, -7.807e-02, -1.727e-01, 3.817e-02, 4.709e-02, -5.390e-02, -5.449e-02, -4.993e-02, 1.147e-01, -1.161e-01, 4.468e-01, 8.868e-02, -8.974e-02)); + r += mul(s2_7, M4(9.288e-02, -3.211e-02, 2.103e-01, -1.412e-01, -2.230e-02, -3.565e-01, -1.209e-01, -5.526e-02, 9.967e-02, 9.655e-02, 5.062e-02, -1.023e-01, 5.712e-02, -6.176e-02, -3.075e-02, 2.405e-02)); + r += mul(s2_8, M4(3.821e-02, -6.003e-02, -1.239e-01, -2.513e-01, -8.277e-02, -1.232e-01, 6.995e-02, -1.091e-01, 2.963e-01, 9.518e-02, 1.699e-01, 9.288e-02, 1.102e-01, -8.510e-02, 2.233e-01, 8.911e-03)); + r += mul(s3_0, M4(1.078e-01, -6.877e-02, 4.785e-02, 4.049e-03, 2.763e-01, 2.047e-01, -1.208e-01, -3.427e-02, -5.237e-02, -7.717e-02, 1.456e-01, -2.029e-01, -5.558e-02, -3.988e-03, -5.646e-02, -6.426e-02)); + r += mul(s3_1, M4(2.635e-01, 5.426e-02, -3.762e-01, -8.153e-02, -2.918e-01, 2.323e-01, -4.968e-02, 2.672e-01, -1.023e-01, 2.621e-02, -9.011e-02, -2.466e-01, -9.320e-02, 8.727e-02, 6.309e-02, 3.082e-02)); + r += mul(s3_2, M4(-7.310e-02, 3.103e-02, 3.669e-01, -1.146e-01, 2.471e-01, 8.364e-02, -2.383e-02, 2.244e-01, -1.019e-02, 1.156e-02, 1.421e-02, -1.744e-02, 6.525e-02, -6.749e-02, -1.353e-01, -4.656e-02)); + r += mul(s3_3, M4(5.752e-02, -2.742e-01, 2.097e-01, 1.157e-01, 5.903e-02, 7.109e-02, 1.085e-01, -1.613e-01, -1.231e-01, -1.270e-02, -9.659e-02, -1.619e-01, -1.958e-01, 2.087e-01, -9.484e-02, -7.992e-02)); + r += mul(s3_4, M4(-6.251e-02, -2.237e-01, -2.261e-01, -1.846e-03, 2.110e-01, -2.218e-01, -2.144e-01, 2.588e-01, -2.198e-01, 1.925e-01, -1.086e-01, -1.815e-02, 5.868e-02, 1.777e-01, 1.376e-01, 1.380e-01)); + r += mul(s3_5, M4(6.631e-02, 8.099e-02, 2.929e-01, 1.477e-01, 1.644e-02, 2.053e-01, -1.695e-01, 9.151e-02, -2.903e-02, 1.207e-01, -1.663e-01, -9.041e-02, 2.284e-02, 4.472e-02, -2.491e-02, 9.218e-02)); + r += mul(s3_6, M4(-1.521e-01, 1.273e-01, -3.648e-02, 2.055e-02, 3.445e-03, 9.287e-03, 6.272e-03, 1.347e-01, 8.115e-02, 1.683e-01, -8.573e-03, -8.772e-02, 1.265e-01, -1.132e-01, 7.200e-02, 7.849e-02)); + r += mul(s3_7, M4(-1.989e-01, 1.623e-01, -3.536e-02, 1.904e-01, 1.371e-01, 1.042e-02, -7.387e-02, 2.075e-01, -2.366e-01, 4.228e-02, -1.673e-01, -2.606e-02, 2.563e-02, -1.812e-01, -2.389e-02, -1.613e-01)); + r += mul(s3_8, M4(4.839e-02, 1.181e-02, -1.900e-01, -1.261e-01, -1.134e-02, 7.055e-02, -4.342e-03, 2.042e-01, -1.193e-01, 1.091e-02, -1.185e-01, -1.153e-01, -5.787e-02, -5.799e-02, 3.598e-02, -1.043e-02)); + r += mul(s4_0, M4(4.490e-03, -6.261e-02, 1.111e-01, -2.427e-02, -1.759e-01, 1.041e-01, -3.451e-02, -6.152e-02, -2.537e-01, 5.739e-02, -2.566e-02, 3.164e-02, 5.651e-03, 7.279e-02, 7.798e-02, 8.355e-02)); + r += mul(s4_1, M4(1.033e-01, 1.069e-01, 6.319e-02, -3.446e-02, 1.358e-01, 1.130e-01, 3.079e-01, -1.337e-01, 1.295e-01, -4.400e-01, -2.262e-01, -2.545e-02, 4.901e-02, -5.701e-02, -9.943e-02, -1.870e-02)); + r += mul(s4_2, M4(-1.782e-01, -5.946e-02, -5.404e-02, 1.318e-01, -9.018e-02, -1.477e-01, -3.033e-01, -1.501e-01, 5.032e-02, -2.488e-01, -1.183e-01, -4.715e-03, -9.155e-04, 1.007e-03, 3.462e-02, -7.236e-02)); + r += mul(s4_3, M4(2.654e-02, -1.238e-01, -8.066e-02, -5.186e-02, -6.032e-01, 3.248e-01, 6.459e-02, -2.177e-01, -5.860e-02, -9.400e-02, 2.884e-02, 5.215e-02, -8.824e-02, -2.220e-03, 2.254e-02, 6.366e-02)); + r += mul(s4_4, M4(-3.236e-01, 5.681e-01, 1.250e-01, 1.640e-03, 4.444e-01, -2.916e-02, -3.283e-01, 6.014e-01, 8.423e-02, -1.713e-01, -9.128e-02, 2.059e-02, 8.158e-02, -7.455e-02, -5.478e-02, -6.427e-02)); + r += mul(s4_5, M4(2.456e-01, -2.188e-01, -4.943e-01, 1.680e-01, 1.525e-03, -2.317e-01, -6.046e-02, -8.908e-02, -1.479e-02, 1.991e-01, 2.361e-01, -1.998e-02, -2.287e-02, 1.821e-02, -8.832e-02, 3.747e-02)); + r += mul(s4_6, M4(-1.173e-01, 3.572e-02, 5.401e-02, 9.243e-02, -3.274e-01, -2.456e-02, 2.820e-02, -7.847e-01, -4.977e-02, 9.341e-03, 4.813e-03, 1.187e-01, -7.300e-02, 4.657e-02, 3.412e-02, 7.358e-02)); + r += mul(s4_7, M4(1.495e-01, 8.350e-03, 2.992e-01, 2.885e-01, 3.542e-01, 8.891e-02, -4.413e-01, -3.464e-01, 5.170e-02, -7.054e-03, -1.080e-01, -1.595e-01, 2.983e-02, 7.148e-03, -7.949e-02, -1.909e-02)); + r += mul(s4_8, M4(1.172e-03, 4.500e-02, -2.422e-01, 1.899e-02, 1.485e-01, -6.020e-02, 7.405e-02, 2.323e-02, 4.174e-02, 4.857e-02, 9.561e-02, 3.943e-02, 4.867e-02, 4.024e-02, 1.465e-01, 1.642e-02)); + r += mul(s5_0, M4(1.130e-01, 5.549e-02, 1.454e-01, -8.837e-02, -4.060e-02, -1.125e-02, -5.483e-02, -8.110e-02, 1.469e-01, -2.157e-01, 1.990e-02, -5.508e-02, 4.400e-01, 6.614e-01, -2.171e-01, 1.001e+00)); + r += mul(s5_1, M4(-5.725e-02, -2.493e-01, -3.369e-01, -1.876e-01, 3.814e-02, -1.004e-02, 1.808e-01, -2.036e-02, 4.590e-02, -8.456e-02, 4.353e-01, 1.158e-01, 2.519e-01, -6.799e-02, 1.615e-01, -4.632e-01)); + r += mul(s5_2, M4(1.155e-01, 1.853e-01, 1.455e-01, 2.910e-02, -4.775e-02, -8.169e-03, -1.597e-01, -2.954e-02, -1.888e-01, 7.739e-02, -2.427e-01, 1.965e-01, -5.393e-01, -6.719e-01, 3.822e-01, -5.405e-01)); + r += mul(s5_3, M4(-1.053e-01, -2.010e-01, 4.452e-02, 1.220e-01, -1.387e-01, -1.663e-02, 6.420e-02, 5.683e-02, 1.482e-02, -1.238e-01, -5.976e-02, 9.798e-02, 1.887e-01, 2.244e-01, -1.055e-01, 8.778e-01)); + r += mul(s5_4, M4(2.642e-01, -2.471e-01, 1.213e-01, -4.243e-03, 2.136e-01, -1.197e-01, -1.370e-01, 1.747e-01, -1.902e-01, 4.095e-01, 2.239e-01, -1.064e-01, -1.516e+00, -5.901e-01, -7.535e-01, -9.025e-01)); + r += mul(s5_5, M4(1.336e-01, -1.581e-01, 4.231e-01, -5.318e-03, -6.304e-02, 1.581e-02, 1.323e-01, -1.488e-02, -4.654e-02, -1.028e-01, -2.243e-01, -6.142e-03, -4.791e-02, 8.956e-01, 8.111e-01, -6.925e-01)); + r += mul(s5_6, M4(7.804e-02, -9.557e-03, 4.499e-02, 4.405e-02, -6.914e-02, 4.490e-02, 9.597e-02, -1.751e-01, -6.983e-02, -1.291e-01, 1.312e-02, 1.856e-01, 4.377e-01, 3.248e-01, 5.635e-01, 3.066e-02)); + r += mul(s5_7, M4(-3.601e-02, -4.945e-02, -3.836e-01, -1.191e-01, -4.540e-03, 1.375e-02, -6.256e-02, -6.577e-02, -1.628e-01, -2.256e-02, 2.093e-01, 2.254e-01, 2.224e-01, 2.670e-01, 4.099e-01, 5.051e-02)); + r += mul(s5_8, M4(-1.848e-02, 1.601e-01, 2.141e-01, -5.077e-02, 6.266e-02, 3.822e-02, -3.642e-02, -1.121e-02, -6.476e-02, -1.910e-01, -1.888e-02, -1.003e-01, 2.444e-01, 4.690e-01, -2.952e-01, 7.102e-02)); + r += mul(s6_0, M4(7.241e-02, -4.093e-02, 2.419e-02, 2.711e-02, 2.755e-01, -1.572e-01, -8.057e-02, -7.000e-03, 1.500e-02, 3.219e-02, 2.177e-03, -1.652e-02, -1.219e-02, 3.024e-01, 1.092e-01, -9.745e-03)); + r += mul(s6_1, M4(-1.114e-01, 1.200e-02, 3.204e-04, -2.240e-02, -2.077e-01, 9.453e-02, 1.596e-02, -4.109e-02, -1.172e-02, 1.225e-01, -3.765e-02, -1.096e-02, 3.648e-02, 2.852e-01, -7.502e-02, -1.534e-01)); + r += mul(s6_2, M4(1.341e-01, 2.011e-01, -2.868e-02, 1.322e-01, -5.997e-02, 8.211e-02, 1.455e-03, -1.164e-01, -1.329e-01, -3.898e-02, -8.764e-02, 2.576e-02, -3.821e-03, -4.150e-01, -4.675e-01, 2.473e-01)); + r += mul(s6_3, M4(-3.463e-03, 7.775e-02, 6.833e-02, 2.438e-02, 1.574e-01, -1.770e-01, -9.328e-02, 1.130e-03, 1.146e-01, 4.248e-02, 6.905e-03, -1.858e-01, -6.367e-02, -6.603e-02, 4.715e-02, -5.800e-02)); + r += mul(s6_4, M4(-2.588e-02, 5.901e-02, -3.184e-02, 1.589e-02, -1.631e-01, 8.692e-02, 1.868e-01, 1.412e-01, 3.177e-01, -3.267e-01, -1.985e-02, -3.030e-01, 3.703e-02, -2.103e-01, 1.394e-03, -8.820e-02)); + r += mul(s6_5, M4(7.007e-02, 1.945e-02, -1.305e-01, -8.563e-02, 9.467e-02, -2.749e-03, -2.522e-01, 4.262e-02, -3.563e-01, -2.511e-01, 2.137e-01, -1.280e-01, 1.020e-01, 1.425e-01, 4.868e-03, 8.239e-02)); + r += mul(s6_6, M4(5.757e-02, 1.342e-02, 1.885e-02, -1.555e-02, 2.024e-01, 1.458e-02, 5.292e-02, -2.133e-01, -1.199e-02, 6.484e-02, 6.354e-02, -1.145e-01, -7.400e-02, 5.425e-02, 1.474e-02, -5.936e-02)); + r += mul(s6_7, M4(-4.723e-02, -3.575e-02, -2.449e-02, 4.364e-02, 4.840e-02, 1.412e-01, 5.925e-02, 2.112e-01, -1.469e-01, -1.052e-01, 1.318e-01, -9.390e-02, -1.663e-01, 4.892e-02, 6.059e-02, 1.907e-02)); + r += mul(s6_8, M4(1.405e-01, -8.783e-02, 2.731e-02, -2.606e-02, -1.139e-01, 1.528e-01, -1.996e-01, -3.131e-02, -1.743e-01, -1.660e-01, -1.564e-02, -6.395e-02, -2.327e-01, 1.027e-01, 1.346e-01, 2.954e-01)); + r += mul(s7_0, M4(-2.209e-01, 3.902e-01, -1.621e-01, 6.093e-02, -1.620e-01, -3.094e-01, 2.904e-01, 8.403e-02, 3.714e-02, 1.173e-01, 2.281e-03, 1.580e-01, -1.034e-01, 1.150e-01, 5.607e-02, 5.691e-02)); + r += mul(s7_1, M4(5.886e-01, -3.046e-01, 3.721e-01, -1.162e-01, 6.265e-02, -1.670e-01, 7.349e-02, -3.569e-02, 7.806e-02, 2.046e-03, -6.049e-03, -2.090e-01, 3.608e-02, -1.603e-01, 8.953e-02, -1.140e-01)); + r += mul(s7_2, M4(-2.383e-01, -3.387e-01, -9.132e-04, -5.473e-01, -3.308e-03, -2.049e-01, 9.447e-03, -3.845e-02, -5.569e-02, -2.745e-01, -6.863e-02, 2.397e-01, 2.584e-01, 3.671e-01, 4.496e-01, -1.728e-01)); + r += mul(s7_3, M4(9.881e-02, 4.344e-01, 1.883e-01, 1.685e-02, -2.303e-01, -7.714e-02, -1.158e-01, 7.909e-03, -3.606e-02, 2.296e-02, 2.534e-02, 1.496e-01, -7.556e-03, -9.981e-03, 6.978e-02, 6.716e-02)); + r += mul(s7_4, M4(3.535e-01, -2.261e-01, -3.110e-01, -1.825e-01, 1.536e-01, 7.248e-02, -1.235e-02, -1.980e-01, -6.042e-02, -1.250e-01, -5.143e-02, 2.953e-01, -1.616e-01, 9.051e-02, 1.251e-01, 2.020e-02)); + r += mul(s7_5, M4(-3.339e-01, -1.343e-01, 1.744e-01, 2.897e-01, 6.002e-02, -4.475e-02, 5.910e-02, -1.401e-01, -7.893e-02, 1.294e-01, -7.917e-02, -3.911e-02, 2.927e-02, -2.521e-01, 1.366e-01, 2.207e-01)); + r += mul(s7_6, M4(8.209e-02, -5.761e-02, 5.538e-02, 1.485e-01, -4.020e-02, 2.563e-01, 7.761e-02, -2.641e-02, 1.096e-01, 6.517e-02, 7.305e-02, 6.710e-02, 2.998e-02, -8.325e-02, -5.355e-02, -3.583e-03)); + r += mul(s7_7, M4(1.182e-01, -6.605e-02, -1.125e-01, -2.782e-01, 1.988e-01, 6.287e-02, 1.677e-01, 7.684e-02, 9.364e-02, 1.821e-01, -1.918e-01, 1.370e-01, -2.287e-01, 4.243e-02, 3.614e-02, -1.039e-01)); + r += mul(s7_8, M4(-7.800e-02, -1.585e-02, 7.305e-02, -1.289e-01, 2.250e-03, -1.136e-02, -1.836e-01, -3.147e-02, 1.269e-01, -3.905e-02, -7.012e-02, 2.610e-01, 1.584e-01, -3.807e-02, -1.562e-01, -3.618e-01)); + r += V4(-5.962e-03, -3.537e-03, 1.371e-03, -3.011e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.493e-01, -1.360e-03, 1.121e-02, -2.183e-01, 1.482e-01, 1.005e-01, 2.226e-04, 2.673e-02, -6.415e-02, 1.177e-01, 1.227e-01, -7.412e-03, -6.499e-02, 9.640e-02, -6.468e-02, -7.867e-02)); + r += mul(s0_1, M4(5.620e-02, 1.236e-01, 2.228e-01, -1.802e-01, -5.798e-02, 4.350e-02, 2.388e-01, 3.069e-02, -2.787e-01, 3.844e-01, 8.522e-02, 1.007e-01, -2.076e-02, 8.274e-02, 1.084e-01, -5.538e-02)); + r += mul(s0_2, M4(-1.091e-01, 1.605e-01, -3.383e-02, -2.930e-02, 5.375e-02, -1.329e-02, 2.408e-01, -8.769e-02, -1.789e-02, 2.825e-01, -2.359e-02, -4.484e-02, -9.254e-02, 1.488e-02, 4.321e-03, 7.121e-02)); + r += mul(s0_3, M4(-1.047e-01, -3.072e-01, -1.337e-01, -2.016e-01, -8.555e-02, 8.769e-02, -2.449e-02, -2.339e-01, -6.242e-02, 2.778e-01, 3.091e-02, 2.197e-01, 2.342e-01, -2.435e-01, 4.551e-02, -2.338e-02)); + r += mul(s0_4, M4(3.583e-01, 2.164e-01, 3.505e-02, 1.225e-01, 7.236e-03, -8.012e-02, 4.456e-01, -7.709e-01, 7.706e-02, 1.436e-01, 3.503e-01, -2.874e-01, -8.961e-02, 1.931e-01, -7.575e-02, 3.137e-02)); + r += mul(s0_5, M4(2.393e-01, 4.598e-01, -4.286e-01, -1.308e-01, -2.729e-01, 2.429e-02, 1.458e-01, -2.588e-01, -4.170e-01, 6.320e-01, -2.866e-01, 2.631e-01, 1.276e-01, 5.969e-02, -1.602e-01, -5.755e-02)); + r += mul(s0_6, M4(-3.896e-01, -2.052e-01, 4.924e-02, -3.981e-02, 9.718e-02, 6.549e-02, -8.761e-02, -3.933e-01, -4.399e-02, -4.761e-03, 9.525e-02, -1.152e-01, -1.571e-01, 2.094e-02, -6.634e-02, 4.642e-02)); + r += mul(s0_7, M4(1.463e-03, 6.666e-02, 4.664e-01, -2.558e-01, -2.090e-01, -1.326e-01, -3.289e-01, -2.566e-01, -2.533e-01, 2.534e-01, 1.027e-01, 2.202e-01, -2.342e-01, -8.858e-03, -1.037e-01, 1.784e-01)); + r += mul(s0_8, M4(1.968e-01, -1.747e-02, 3.331e-01, 1.073e-01, -1.047e-01, 2.814e-03, -6.119e-02, -8.912e-02, -1.593e-01, 2.664e-01, 1.753e-01, 9.957e-02, -1.679e-02, 1.427e-01, 1.291e-01, 6.444e-02)); + r += mul(s1_0, M4(9.660e-02, 3.016e-02, 4.277e-02, -4.001e-02, 8.294e-02, -5.211e-02, 1.711e-03, -3.301e-02, 2.325e-01, -5.042e-02, -1.068e-01, 1.108e-01, 1.222e-03, -1.030e-01, 1.842e-01, -1.213e-01)); + r += mul(s1_1, M4(3.118e-02, -1.874e-02, -1.168e-02, 3.724e-02, -5.922e-02, -3.813e-02, 6.865e-02, 9.811e-02, 3.009e-01, -1.214e-01, -1.653e-01, -1.361e-02, -9.460e-02, -4.843e-01, -1.923e-01, -4.082e-02)); + r += mul(s1_2, M4(-4.070e-02, -6.820e-02, -4.630e-02, 3.216e-02, 1.371e-02, 1.069e-01, 7.885e-02, -1.577e-02, -2.718e-02, -4.756e-01, -4.672e-03, -2.539e-01, 1.984e-02, 3.908e-02, -6.613e-02, 8.673e-02)); + r += mul(s1_3, M4(-3.718e-02, 6.549e-02, 1.625e-02, 2.071e-01, -6.907e-02, 6.928e-02, -1.844e-01, -1.591e-02, 1.394e-01, -1.004e-02, 1.170e-01, 3.020e-01, 5.181e-02, -5.939e-02, -5.880e-02, 7.376e-02)); + r += mul(s1_4, M4(1.063e-01, -1.026e-01, 1.934e-01, -2.595e-02, 1.901e-01, -1.462e-01, -5.239e-01, 8.369e-02, 1.658e-01, -2.750e-01, 1.504e-02, -1.351e-01, 4.891e-04, -1.481e-01, 5.045e-02, 1.154e-01)); + r += mul(s1_5, M4(-1.030e-01, -1.654e-01, 1.218e-01, 2.341e-02, 9.236e-02, 3.391e-01, -3.344e-01, 7.693e-02, -3.664e-02, -1.771e-01, -3.157e-01, -1.499e-01, 3.311e-02, -1.792e-01, 2.381e-01, 1.076e-01)); + r += mul(s1_6, M4(7.149e-02, -4.252e-02, -1.031e-01, -1.888e-01, -1.277e-01, -5.187e-03, 2.521e-01, -8.569e-02, 2.787e-02, -1.504e-01, -4.477e-02, -1.184e-01, 4.185e-01, -4.126e-02, 2.010e-01, -4.618e-02)); + r += mul(s1_7, M4(-6.966e-03, -5.100e-02, -1.230e-01, -2.367e-01, 4.423e-02, 3.513e-02, 2.033e-01, 3.137e-01, -6.549e-02, -1.007e-01, -1.392e-01, 9.041e-02, -1.890e-01, -2.362e-01, 3.349e-01, 2.219e-01)); + r += mul(s1_8, M4(4.125e-02, 1.474e-01, -1.518e-01, 1.076e-01, 4.368e-02, -2.465e-01, 1.384e-01, -1.506e-01, 1.370e-01, 1.435e-02, -2.052e-01, -7.013e-03, -2.786e-01, -1.330e-01, -2.805e-01, -1.722e-01)); + r += mul(s2_0, M4(2.952e-01, 4.441e-02, -4.741e-01, 4.375e-01, 2.978e-01, -7.597e-02, 1.013e-01, 4.516e-02, -2.158e-01, -1.479e-01, 1.699e-01, -9.469e-02, 1.567e-01, 6.033e-02, -4.819e-02, 2.453e-02)); + r += mul(s2_1, M4(4.749e-01, 2.013e-01, 1.305e-01, -4.380e-02, 2.683e-01, -1.753e-01, -7.340e-02, -1.215e-01, -2.864e-01, -2.927e-01, -6.246e-02, 1.608e-01, -2.835e-01, 8.082e-03, 5.970e-02, 2.425e-01)); + r += mul(s2_2, M4(1.911e-01, -3.841e-02, 1.451e-02, -1.175e-01, 4.068e-02, -1.027e-01, 5.235e-02, 3.985e-02, 2.603e-02, -3.411e-03, -4.188e-03, -3.843e-02, -4.186e-02, -3.254e-01, 1.096e-01, -8.038e-03)); + r += mul(s2_3, M4(2.730e-01, 8.854e-02, 3.190e-01, 3.133e-01, 1.826e-02, 6.850e-02, -3.333e-02, 1.059e-01, 6.928e-02, -1.145e-02, 2.283e-01, -8.865e-02, 2.391e-01, -1.820e-01, -3.573e-02, -1.730e-01)); + r += mul(s2_4, M4(4.755e-01, -1.270e-01, 4.131e-01, 2.268e-01, 2.208e-01, -3.773e-01, 1.155e-01, -1.669e-01, 2.618e-01, -6.293e-02, -4.787e-02, -1.925e-01, 6.307e-01, 2.681e-01, -2.374e-01, -8.996e-02)); + r += mul(s2_5, M4(2.443e-01, -2.722e-01, 8.635e-02, -1.306e-01, 3.156e-02, 1.101e-01, -3.838e-01, 8.736e-02, 1.960e-02, -1.844e-01, 3.986e-04, -1.759e-01, -7.092e-03, -3.461e-01, -2.356e-01, 1.175e-01)); + r += mul(s2_6, M4(-2.070e-02, 1.906e-01, -3.961e-01, 5.045e-02, -1.113e-01, -3.115e-02, -2.427e-01, 8.359e-02, 6.035e-02, -2.168e-02, -7.367e-02, 1.275e-01, 1.385e-01, -4.470e-02, -8.486e-03, 2.152e-01)); + r += mul(s2_7, M4(1.535e-01, 1.293e-01, 3.150e-03, 1.369e-01, -3.304e-02, 7.236e-03, -1.207e-02, 3.864e-02, 1.884e-02, -3.147e-01, 5.685e-02, 1.124e-01, 1.616e-01, 6.865e-02, -2.169e-02, 1.214e-01)); + r += mul(s2_8, M4(-2.751e-02, 1.530e-02, -2.166e-01, 1.189e-01, 1.530e-01, -4.993e-02, -2.652e-02, -3.569e-02, -5.124e-02, -2.675e-01, -1.650e-01, -1.609e-02, -9.086e-02, 3.321e-02, 7.116e-03, 8.445e-02)); + r += mul(s3_0, M4(-1.433e-01, 2.111e-02, 8.117e-02, -2.792e-01, -4.618e-01, 2.032e-01, -8.773e-02, -1.422e-01, 1.648e-02, 2.203e-03, 2.217e-02, 1.057e-01, 1.814e-01, -3.271e-02, 5.886e-02, 1.601e-01)); + r += mul(s3_1, M4(3.798e-02, -9.214e-02, 1.001e-01, -5.864e-02, -2.529e-01, 1.986e-01, 1.937e-01, 1.154e-01, -8.759e-02, 2.373e-01, -1.875e-01, -5.515e-02, -1.158e-02, 3.020e-01, -1.429e-01, 6.739e-03)); + r += mul(s3_2, M4(-2.931e-02, 1.164e-01, 1.347e-01, 4.203e-02, -5.261e-02, 1.060e-01, 9.737e-02, -8.234e-02, -1.152e-01, 5.234e-02, 5.639e-02, 2.082e-02, 7.628e-02, 4.062e-02, -1.446e-01, -1.245e-01)); + r += mul(s3_3, M4(3.235e-02, -1.423e-01, -2.326e-01, -1.616e-01, -6.639e-02, 1.896e-01, 1.743e-01, 7.735e-02, -1.749e-02, -6.962e-02, 1.095e-01, 2.082e-01, -1.188e-01, 6.600e-02, -3.677e-02, 2.052e-01)); + r += mul(s3_4, M4(-2.059e-01, 1.696e-01, -4.044e-01, 1.401e-01, -1.428e-01, 4.176e-01, -2.219e-03, 6.759e-02, 1.705e-01, 1.514e-01, 3.040e-01, -2.451e-01, -8.452e-02, -2.797e-01, 2.688e-01, 4.933e-02)); + r += mul(s3_5, M4(1.261e-01, 1.920e-02, -1.831e-01, -7.127e-02, -2.744e-01, 1.421e-01, 1.497e-01, 1.165e-01, -9.432e-02, 8.987e-02, -6.732e-01, 1.422e-01, 7.804e-02, -7.633e-03, 2.764e-01, 1.964e-02)); + r += mul(s3_6, M4(-1.383e-02, 5.891e-02, 1.599e-01, -7.055e-02, -1.188e-01, 1.840e-02, 1.678e-01, -2.558e-02, 1.142e-01, 9.206e-02, -7.608e-02, 1.032e-01, 3.337e-02, -3.068e-02, -8.554e-02, -1.006e-01)); + r += mul(s3_7, M4(5.861e-02, 4.090e-02, 2.595e-01, 1.175e-01, -3.135e-01, -4.337e-02, -4.331e-02, -2.942e-02, -1.244e-01, 2.569e-01, 9.823e-02, 1.764e-01, -8.771e-02, -1.305e-01, -1.500e-01, -3.085e-01)); + r += mul(s3_8, M4(2.161e-02, -6.209e-02, 1.138e-01, 6.710e-02, -9.868e-02, 5.677e-02, 1.302e-01, -4.357e-02, 4.976e-03, 1.871e-01, 1.746e-01, 1.241e-02, -4.849e-02, 5.190e-02, -2.857e-02, -6.322e-02)); + r += mul(s4_0, M4(-4.927e-02, -4.089e-02, 8.644e-03, 5.684e-02, -5.871e-02, -5.351e-02, 2.254e-02, -3.264e-02, 1.201e-01, -1.862e-01, -2.446e-02, -3.252e-02, -2.362e-02, 4.419e-02, 3.543e-02, 2.515e-02)); + r += mul(s4_1, M4(8.533e-02, 9.602e-02, -4.276e-02, 2.019e-02, 6.095e-02, 2.188e-01, 5.990e-02, 1.652e-01, 1.754e-01, -5.736e-02, 6.605e-02, -1.228e-01, -9.409e-02, 1.920e-02, 3.324e-02, 2.614e-02)); + r += mul(s4_2, M4(-1.835e-02, -3.571e-02, -5.266e-02, 4.572e-02, 1.883e-01, -1.398e-01, -2.163e-01, 1.244e-02, 1.965e-01, 2.415e-01, -2.188e-01, -4.871e-03, -5.044e-03, -1.593e-02, 1.016e-02, -8.609e-02)); + r += mul(s4_3, M4(-7.819e-03, 1.220e-01, -2.014e-01, 1.773e-01, -5.222e-01, -5.908e-01, 2.559e-02, -1.676e-01, 1.346e-02, 1.334e-01, -3.624e-01, -9.509e-02, 2.341e-02, 2.038e-02, -7.312e-03, -5.461e-03)); + r += mul(s4_4, M4(1.769e-01, 1.743e-01, 7.910e-02, 2.040e-01, -2.588e-01, 2.454e-01, 2.247e-01, -3.087e-01, -7.547e-02, -4.226e-02, -2.001e-01, 5.399e-01, -1.347e-02, -3.727e-02, 8.474e-02, 3.298e-02)); + r += mul(s4_5, M4(-1.075e-02, -2.296e-01, 1.972e-01, 4.847e-02, -9.768e-02, -9.002e-02, -1.535e-01, -9.385e-02, 1.167e-01, 3.740e-02, 4.510e-01, -3.662e-02, 2.415e-01, 2.915e-02, 8.082e-02, -4.644e-02)); + r += mul(s4_6, M4(4.066e-02, 1.286e-02, 7.837e-02, -3.065e-01, -3.700e-01, -5.469e-01, 2.814e-01, 3.348e-01, 2.184e-03, -2.622e-02, -7.294e-02, -7.004e-02, 5.945e-02, -4.840e-02, -5.578e-02, -4.259e-02)); + r += mul(s4_7, M4(4.334e-01, -9.103e-02, -3.913e-02, -7.487e-01, -4.837e-02, -2.505e-01, -4.386e-01, -7.301e-02, -1.304e-01, 1.597e-01, 1.349e-01, -1.269e-01, -3.156e-02, -8.233e-02, -8.439e-02, 6.437e-02)); + r += mul(s4_8, M4(-4.428e-02, -1.736e-01, 4.854e-02, 1.284e-02, -3.662e-01, 2.678e-02, 5.405e-01, 2.903e-01, -5.651e-02, 9.120e-02, -1.013e-01, 1.912e-01, -4.199e-02, -8.293e-02, -6.620e-02, 2.943e-02)); + r += mul(s5_0, M4(-1.987e-01, -1.547e-01, -5.561e-02, -8.342e-02, 1.514e-02, 5.663e-02, 2.794e-02, 1.212e-02, -2.875e-01, 6.254e-02, 7.387e-02, -1.633e-01, 8.085e-01, 3.431e-01, 3.014e-01, 2.745e-01)); + r += mul(s5_1, M4(-2.051e-01, -6.087e-02, 5.766e-02, -7.692e-02, -1.024e-01, -4.273e-02, -1.372e-01, 4.320e-02, -1.518e-01, 1.360e-01, 8.103e-02, -1.821e-01, -2.056e-01, -2.549e-01, 2.046e-01, 5.617e-01)); + r += mul(s5_2, M4(-1.728e-01, 4.100e-02, 9.013e-02, -1.124e-01, 3.265e-02, 2.598e-02, 9.202e-02, 1.273e-02, -2.965e-01, 1.821e-01, 6.122e-02, -8.528e-02, 2.803e-01, -1.811e-01, 1.370e-01, 2.149e-01)); + r += mul(s5_3, M4(-1.312e-01, -3.662e-01, 8.119e-02, -2.271e-01, -4.113e-02, 2.732e-02, -1.767e-03, -2.948e-03, -9.039e-02, -7.144e-02, -1.613e-01, -8.596e-02, 8.690e-01, 4.897e-01, -1.207e+00, 9.880e-02)); + r += mul(s5_4, M4(-2.735e-01, -1.183e-01, -4.351e-02, -3.761e-01, 1.948e-01, -1.191e-01, 1.629e-01, -1.778e-01, -3.398e-01, 2.488e-01, -1.947e-02, -1.321e-01, 6.902e-01, 5.633e-01, -1.232e-01, 4.927e-01)); + r += mul(s5_5, M4(2.976e-01, 2.988e-01, -6.464e-02, -9.545e-02, -6.653e-02, 9.741e-02, -4.290e-02, 4.555e-02, -2.893e-02, 1.308e-01, 5.605e-02, -6.064e-02, -3.114e-01, -8.178e-01, 3.104e-01, 2.635e-02)); + r += mul(s5_6, M4(-8.242e-02, -1.583e-01, -1.901e-01, 1.168e-01, -2.350e-02, 6.218e-03, 4.620e-02, 6.105e-02, -1.188e-01, 4.919e-02, 1.486e-01, 1.242e-01, 7.847e-01, 3.252e-02, -5.260e-01, -1.583e-01)); + r += mul(s5_7, M4(-4.103e-01, -1.964e-01, 2.185e-01, 8.644e-02, -3.499e-02, 2.093e-01, -1.213e-01, 1.163e-01, -3.420e-01, 2.604e-02, 6.462e-02, 1.924e-01, -4.559e-01, 1.323e-01, 5.912e-01, -1.579e+00)); + r += mul(s5_8, M4(-2.548e-02, -4.858e-02, -3.874e-02, -1.303e-01, -3.209e-02, -1.387e-01, -5.796e-02, -1.124e-01, 2.226e-02, 2.233e-02, -6.212e-02, 1.973e-01, -2.222e-01, 2.089e-01, -9.420e-01, -9.306e-01)); + r += mul(s6_0, M4(-1.511e-02, 4.097e-03, -4.216e-02, 7.326e-03, -2.635e-01, 1.710e-01, 1.514e-01, -5.487e-02, 4.944e-02, 2.235e-02, -5.493e-02, 1.125e-02, 1.411e-02, -8.220e-02, 1.317e-01, 9.543e-02)); + r += mul(s6_1, M4(-1.996e-01, 4.866e-02, -8.690e-02, 7.167e-02, -5.474e-01, 4.555e-01, 1.259e-01, -1.487e-01, 5.495e-02, 5.494e-02, 1.018e-02, 3.303e-02, -3.038e-01, -1.981e-01, 2.573e-02, -1.666e-01)); + r += mul(s6_2, M4(-1.510e-01, -1.444e-01, 1.912e-01, 2.293e-02, -5.680e-02, -1.020e-02, 2.478e-03, 8.278e-02, -4.629e-02, 1.199e-01, -1.316e-02, 1.723e-02, 1.025e-01, -3.087e-01, -1.497e-01, -2.983e-01)); + r += mul(s6_3, M4(7.026e-02, 8.863e-02, -7.994e-03, 7.872e-02, -1.736e-01, 3.692e-02, 1.725e-01, 2.365e-01, 2.483e-02, 5.723e-02, 7.511e-02, 2.707e-02, -1.319e-01, -1.479e-01, 1.642e-01, -1.211e-02)); + r += mul(s6_4, M4(1.314e-01, -1.694e-01, 1.535e-01, -9.212e-02, -5.466e-02, -2.357e-03, 1.302e-01, 8.194e-02, 1.791e-01, -8.864e-02, -1.744e-01, -3.018e-01, -4.210e-01, -1.349e-01, 1.893e-02, -4.113e-01)); + r += mul(s6_5, M4(4.086e-02, 4.316e-02, -2.262e-01, -5.058e-02, -2.932e-01, 4.587e-02, -3.250e-02, -4.383e-02, -2.157e-01, 2.700e-01, -2.430e-01, 7.205e-02, -5.680e-01, -3.811e-01, 2.149e-01, -2.573e-01)); + r += mul(s6_6, M4(1.078e-02, -3.045e-02, 2.199e-02, -4.741e-03, 1.188e-01, 5.412e-02, -7.901e-02, -3.999e-02, 8.803e-02, -3.155e-02, 4.958e-02, 2.134e-02, -7.579e-02, -8.130e-02, -5.317e-02, -5.295e-02)); + r += mul(s6_7, M4(-1.196e-01, -1.957e-01, -7.800e-02, 9.788e-04, 2.797e-02, 9.152e-02, 1.106e-01, -8.070e-02, -2.925e-02, -5.609e-02, -2.472e-01, 1.509e-01, -2.631e-01, 7.884e-02, 1.909e-02, 1.659e-02)); + r += mul(s6_8, M4(1.074e-01, 6.685e-02, 8.382e-02, 1.438e-03, -1.291e-02, 8.508e-02, 8.563e-02, -3.086e-03, 2.686e-01, 7.002e-02, 2.222e-01, -3.415e-02, -1.489e-01, -1.043e-01, 1.182e-02, 2.139e-01)); + r += mul(s7_0, M4(-1.145e-01, 2.744e-01, -2.056e-01, 1.062e-02, 3.849e-01, -1.757e-01, -4.004e-01, 1.634e-01, -9.739e-02, -2.396e-02, 4.497e-02, -1.056e-01, 1.486e-03, 4.912e-02, -1.418e-02, 2.096e-01)); + r += mul(s7_1, M4(1.714e-01, -4.460e-03, -1.904e-01, 5.127e-02, 4.457e-01, -2.005e-01, -2.399e-01, 2.293e-01, 6.236e-03, -7.333e-02, -1.223e-01, -8.620e-02, 1.111e-03, -1.813e-03, -7.598e-02, 4.735e-02)); + r += mul(s7_2, M4(-2.502e-02, -3.663e-01, -8.110e-01, 3.594e-01, 2.693e-01, -1.316e-01, -1.044e-01, 5.490e-02, -7.955e-03, 4.426e-02, 6.322e-02, -3.838e-02, 7.466e-02, 1.846e-01, 1.519e-01, 8.454e-03)); + r += mul(s7_3, M4(-7.352e-03, 2.623e-01, 3.130e-01, -3.261e-01, 3.505e-01, -1.179e-01, -2.721e-01, 8.523e-02, -6.879e-02, -1.767e-02, 5.651e-02, -8.914e-03, 2.447e-01, -1.083e-01, 4.430e-02, 7.107e-02)); + r += mul(s7_4, M4(-1.929e-02, 3.448e-01, 2.304e-01, 2.651e-02, 2.087e-01, 1.053e-01, 1.363e-01, -3.634e-01, -1.015e-01, -2.363e-01, 2.921e-01, -2.386e-01, 2.457e-01, 3.764e-01, 3.237e-02, 5.597e-01)); + r += mul(s7_5, M4(-1.553e-01, -1.074e-01, 5.874e-01, -2.274e-01, 2.114e-01, -6.435e-02, -2.172e-01, -2.761e-02, -3.907e-01, 3.560e-01, -2.775e-01, 6.104e-02, 3.689e-01, -1.453e-02, -1.728e-01, 7.566e-03)); + r += mul(s7_6, M4(-7.675e-02, -1.726e-01, -6.876e-02, 4.848e-03, 5.463e-02, -1.048e-01, -6.406e-02, -2.276e-01, -1.039e-01, -2.836e-02, -5.157e-02, 5.531e-02, -3.366e-02, -7.267e-02, 1.411e-02, 1.252e-01)); + r += mul(s7_7, M4(-1.270e-01, -4.631e-02, 1.328e-02, 2.479e-02, 3.272e-01, -2.270e-01, 5.842e-02, -3.250e-01, -2.514e-01, 2.098e-02, 3.925e-02, 4.051e-01, 1.776e-03, 1.506e-01, -1.081e-01, 6.672e-02)); + r += mul(s7_8, M4(-5.914e-02, 3.369e-02, -3.859e-01, 5.139e-02, 1.176e-02, -2.484e-01, -2.014e-01, -9.925e-02, 8.024e-02, 2.293e-01, 1.120e-01, 3.000e-01, -1.269e-02, 1.858e-01, 3.387e-02, 7.079e-02)); + r += V4(-3.553e-03, 1.851e-02, -1.475e-03, -2.046e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.301e-01, 1.852e-01, 5.814e-02, 1.602e-01, -7.324e-02, -9.636e-03, -3.789e-02, 6.091e-02, 2.139e-02, 1.489e-01, -5.092e-02, -2.141e-01, 1.817e-02, -5.365e-02, -8.297e-03, -1.622e-01)); + r += mul(s0_1, M4(-2.478e-01, 3.316e-02, 8.461e-02, 1.211e-01, -1.180e-01, 1.206e-02, 1.610e-02, -2.749e-01, 1.140e-01, 7.598e-03, -3.519e-02, -1.408e-01, -1.648e-02, -1.656e-02, -4.414e-02, 2.070e-02)); + r += mul(s0_2, M4(-2.335e-02, 1.978e-01, -1.556e-02, 3.378e-02, -2.163e-01, 3.364e-01, 1.228e-01, 1.547e-02, -6.270e-02, -3.193e-02, 1.627e-01, 2.969e-02, 2.034e-02, 2.136e-02, 6.094e-02, -2.019e-02)); + r += mul(s0_3, M4(1.285e-02, -2.024e-01, -9.700e-02, -1.794e-01, -9.623e-02, -9.618e-02, -9.907e-02, -9.705e-02, -2.150e-02, 1.495e-01, -4.000e-02, 1.222e-01, 2.313e-02, -6.182e-02, -2.520e-01, -3.130e-02)); + r += mul(s0_4, M4(4.752e-01, 3.734e-01, 1.631e-01, 4.730e-01, -6.413e-02, -1.026e-01, -3.961e-01, -3.547e-01, -3.024e-03, -3.711e-02, -2.455e-01, -6.002e-02, -1.056e-01, -5.290e-02, 9.390e-02, -9.400e-02)); + r += mul(s0_5, M4(-2.050e-01, 1.889e-01, 5.210e-02, -1.519e-01, -4.424e-01, 4.170e-01, 3.336e-01, 6.601e-02, -4.192e-02, 1.582e-01, 7.213e-01, -6.145e-02, 2.067e-02, 1.119e-01, 8.269e-02, 2.498e-02)); + r += mul(s0_6, M4(6.317e-02, -2.393e-01, 2.097e-01, -3.241e-01, -5.513e-02, -2.993e-01, -3.327e-02, 1.146e-01, 6.658e-02, 2.526e-01, -6.466e-02, -1.299e-01, 1.189e-01, 1.805e-01, -1.025e-01, 4.052e-01)); + r += mul(s0_7, M4(-1.485e-01, -3.282e-01, 2.939e-01, -1.965e-01, 1.870e-01, -8.957e-02, -1.142e-01, -3.095e-01, -1.417e-02, 1.559e-01, 4.212e-03, 1.039e-03, 2.103e-01, 1.226e-01, -1.062e-02, 3.177e-03)); + r += mul(s0_8, M4(-4.186e-02, -2.353e-01, -3.995e-01, -3.152e-01, -1.573e-01, -1.809e-01, 5.544e-02, 1.379e-01, -8.439e-02, 4.880e-02, 2.083e-01, -2.285e-02, 3.872e-02, 2.384e-02, 1.582e-01, 3.685e-02)); + r += mul(s1_0, M4(-2.870e-02, -2.180e-01, -3.956e-03, -6.426e-02, -7.369e-02, -5.972e-02, -5.140e-02, 5.628e-03, -5.322e-03, -5.234e-03, -6.372e-02, 7.234e-02, 1.971e-01, -4.791e-02, -1.424e-02, 1.053e-01)); + r += mul(s1_1, M4(6.124e-02, -2.243e-03, 7.789e-02, -7.308e-02, 6.888e-02, -9.903e-02, -2.497e-03, -2.062e-01, 6.845e-02, 2.470e-01, 2.999e-02, -1.223e-01, 1.429e-01, -1.507e-01, -1.256e-01, 2.482e-02)); + r += mul(s1_2, M4(2.098e-03, 2.680e-02, 1.280e-03, -4.057e-02, -9.606e-02, 1.380e-01, 6.146e-02, 1.155e-01, -2.300e-01, -4.105e-02, 6.834e-02, 8.465e-02, 9.438e-02, -4.055e-02, 6.837e-02, -7.285e-02)); + r += mul(s1_3, M4(-1.040e-02, 1.511e-01, -4.298e-02, 1.217e-01, -3.000e-02, 8.181e-02, -8.391e-02, 1.223e-01, 2.920e-02, -1.869e-01, -3.919e-02, 6.057e-02, 1.891e-01, -3.193e-01, -2.273e-01, -5.798e-04)); + r += mul(s1_4, M4(1.959e-01, -1.577e-01, -2.195e-01, -1.640e-01, 1.440e-01, -2.885e-01, -2.857e-01, 1.112e-01, 1.304e-01, -2.717e-01, -2.614e-01, 1.832e-01, 1.940e-02, 4.701e-02, 2.184e-01, -6.236e-02)); + r += mul(s1_5, M4(-8.599e-02, 6.717e-02, -1.220e-02, 1.836e-01, 1.421e-01, -1.275e-01, 5.431e-01, -4.815e-02, -1.114e-01, -1.473e-01, 8.673e-01, 7.798e-03, 1.355e-01, 2.496e-02, 4.905e-02, 3.218e-03)); + r += mul(s1_6, M4(4.089e-02, 3.901e-02, 5.964e-02, 1.996e-01, -7.805e-02, 8.634e-02, 1.071e-02, -2.466e-01, 2.620e-02, -1.834e-01, 1.571e-02, -4.377e-04, 1.694e-03, -3.362e-01, -9.583e-02, -2.825e-01)); + r += mul(s1_7, M4(-8.871e-03, 1.092e-01, -1.994e-02, 1.509e-01, -7.954e-02, -3.180e-02, 6.564e-02, 2.146e-01, -3.940e-02, 9.707e-02, -5.112e-02, 1.443e-01, -1.240e-01, -1.050e-01, 3.472e-02, -1.069e-01)); + r += mul(s1_8, M4(-1.782e-01, 6.332e-02, 1.005e-01, 6.335e-02, -3.707e-04, 2.747e-01, -1.506e-01, 1.424e-01, -1.282e-02, -3.532e-02, 1.548e-01, 1.675e-01, 2.869e-02, 5.486e-02, 2.176e-01, -6.712e-02)); + r += mul(s2_0, M4(-1.412e-01, -3.098e-01, -7.445e-02, 2.157e-01, 5.311e-02, -1.558e-01, -8.965e-02, 4.372e-01, -9.685e-02, 1.470e-01, 1.126e-02, -2.712e-01, -6.178e-03, 2.177e-01, 9.057e-02, -3.586e-01)); + r += mul(s2_1, M4(-2.887e-02, 3.857e-01, 1.478e-01, -3.073e-01, -4.715e-02, -3.539e-02, -3.963e-02, -5.781e-02, -6.861e-02, 2.639e-02, 6.041e-02, -1.550e-01, 2.678e-03, 2.058e-01, 1.584e-01, 2.845e-02)); + r += mul(s2_2, M4(-5.053e-02, 2.012e-01, 7.816e-02, 1.918e-01, 3.118e-02, 4.426e-02, 8.575e-02, -7.318e-03, 7.795e-02, 1.365e-01, -3.814e-02, 4.892e-02, 1.396e-01, -2.441e-02, -1.398e-01, 1.301e-01)); + r += mul(s2_3, M4(-3.261e-01, -5.362e-01, -6.300e-03, 3.624e-01, 5.769e-02, -8.803e-02, -1.381e-01, 1.369e-01, -1.077e-01, 1.248e-01, -1.380e-01, -1.420e-01, -1.239e-01, 1.671e-01, 9.395e-02, -7.019e-01)); + r += mul(s2_4, M4(-3.974e-01, 2.763e-01, -1.879e-01, 4.564e-01, 1.173e-01, -1.006e-01, -3.845e-01, 2.435e-01, -2.412e-02, 2.398e-01, 1.796e-01, 1.459e-02, 4.543e-01, 1.174e-02, -2.342e-01, -9.092e-02)); + r += mul(s2_5, M4(-2.089e-02, 1.043e-01, -1.193e-01, 2.741e-01, 3.587e-03, 1.636e-01, 2.701e-01, 1.616e-01, -6.077e-03, 6.642e-03, -1.240e-01, -2.768e-01, 1.194e-01, -2.036e-01, -3.423e-02, 1.648e-02)); + r += mul(s2_6, M4(1.165e-01, -3.786e-01, 1.182e-01, 1.605e-01, 7.495e-02, -6.525e-02, -4.634e-02, -4.205e-02, 2.304e-03, -2.042e-01, -6.347e-02, -1.020e-01, 1.175e-01, 1.265e-01, -8.620e-02, 5.688e-03)); + r += mul(s2_7, M4(-7.384e-03, -1.116e-01, -8.951e-02, 2.036e-01, -6.836e-02, -1.773e-01, 1.245e-01, 7.999e-02, 6.993e-02, 1.821e-01, 5.845e-02, -1.998e-01, 1.521e-01, 9.793e-02, 7.695e-02, 1.262e-01)); + r += mul(s2_8, M4(-8.152e-02, 3.856e-02, -6.092e-02, 2.426e-01, 8.689e-02, -7.674e-04, 1.209e-01, -1.583e-01, -2.678e-02, 1.684e-01, -1.711e-01, -7.350e-03, 2.231e-01, -2.241e-01, 7.775e-02, -1.196e-02)); + r += mul(s3_0, M4(-1.452e-02, 4.054e-02, -4.633e-02, -8.515e-02, 5.561e-02, 8.761e-02, -1.812e-01, -3.232e-01, 1.194e-01, 2.698e-01, 2.884e-02, -8.514e-02, 6.478e-02, -2.661e-01, -1.670e-03, 8.681e-02)); + r += mul(s3_1, M4(9.767e-02, -8.863e-02, -3.921e-02, 6.901e-02, 6.170e-02, -1.756e-01, 1.710e-01, -1.548e-01, 6.032e-02, 7.535e-02, 5.940e-02, 7.616e-02, -1.238e-01, 4.833e-02, 1.955e-01, 8.794e-02)); + r += mul(s3_2, M4(2.524e-02, 2.662e-02, 8.459e-02, -6.155e-03, 3.935e-02, 1.891e-02, 9.259e-03, -1.071e-01, 1.772e-01, -1.131e-01, -5.408e-02, 1.043e-01, -1.338e-01, -1.317e-02, -1.410e-01, 2.864e-02)); + r += mul(s3_3, M4(-8.616e-03, -2.835e-02, -1.107e-01, -1.689e-01, 5.178e-02, 6.897e-01, -1.391e-01, 8.983e-02, 4.867e-02, -2.122e-01, -1.802e-01, -1.015e-02, -2.706e-03, 1.483e-01, 1.323e-01, 3.286e-01)); + r += mul(s3_4, M4(-6.784e-02, -7.027e-03, 2.647e-01, -1.590e-02, 1.041e-01, -1.383e-01, -1.478e-01, -1.713e-01, 2.543e-02, -2.550e-01, -8.248e-03, -7.414e-03, 5.348e-02, 1.681e-02, -4.007e-01, -3.284e-02)); + r += mul(s3_5, M4(-1.803e-02, 7.886e-03, -6.364e-02, -2.430e-02, 8.611e-02, 5.071e-02, 1.808e-01, -1.886e-01, 1.901e-01, -2.587e-01, -7.593e-02, -1.351e-02, 8.406e-02, -7.927e-02, -7.538e-02, -1.249e-01)); + r += mul(s3_6, M4(-7.358e-02, 8.325e-02, 1.090e-01, -3.392e-02, -5.795e-02, 2.896e-01, -4.952e-02, 1.028e-01, -4.203e-02, -2.732e-02, 8.058e-03, 1.598e-01, -1.319e-02, -8.781e-02, 2.060e-02, 2.139e-02)); + r += mul(s3_7, M4(-2.411e-02, -4.139e-02, -8.324e-02, 6.797e-02, -8.778e-02, 8.484e-02, 2.128e-02, 6.571e-02, 4.007e-02, -3.879e-02, 9.627e-03, 1.774e-01, 1.298e-02, 1.097e-01, 1.314e-01, -4.692e-02)); + r += mul(s3_8, M4(-5.470e-02, 4.771e-02, -9.790e-02, -2.405e-04, 7.797e-02, 1.121e-01, 6.055e-02, -8.066e-02, 5.387e-02, 9.211e-03, -1.203e-01, 5.955e-02, 3.587e-02, 3.225e-03, 4.431e-03, -2.250e-02)); + r += mul(s4_0, M4(-2.785e-02, 7.038e-02, 1.777e-02, -6.899e-02, -1.786e-01, 5.678e-01, 3.086e-01, -6.197e-02, -2.101e-01, -4.708e-01, 7.508e-03, 1.944e-01, 6.144e-02, -1.105e-01, 3.901e-02, 5.978e-02)); + r += mul(s4_1, M4(2.066e-02, -1.288e-01, 4.087e-02, 1.860e-01, 3.306e-01, 1.913e-01, -1.289e-01, 2.461e-01, -3.596e-02, 5.661e-02, -9.447e-02, -2.039e-02, -7.522e-02, 1.154e-01, 5.388e-03, -5.283e-02)); + r += mul(s4_2, M4(8.102e-02, -7.280e-02, -1.040e-01, -2.783e-02, 1.147e-01, 9.859e-02, -9.925e-02, 1.961e-01, 9.113e-03, 1.240e-01, 1.797e-01, -1.161e-02, -5.068e-02, -6.592e-03, -7.103e-02, -1.090e-02)); + r += mul(s4_3, M4(8.255e-02, -3.384e-02, 1.756e-01, -7.400e-02, -6.854e-01, 2.786e-01, 1.294e-01, -1.433e-01, 1.169e-01, 4.389e-02, 8.293e-02, 2.403e-01, -4.694e-02, 6.013e-02, 1.809e-01, -1.189e-03)); + r += mul(s4_4, M4(6.230e-02, 2.329e-01, 1.511e-01, -1.491e-01, -6.517e-01, -3.116e-01, -7.005e-01, 3.086e-01, 1.032e-01, -8.114e-02, 1.042e-01, -1.800e-02, -1.441e-01, -2.906e-02, -4.734e-02, -4.410e-02)); + r += mul(s4_5, M4(1.066e-01, -1.437e-01, -3.467e-01, 9.725e-02, -3.574e-01, 1.397e-02, 4.027e-01, -9.219e-02, -1.345e-01, -3.592e-02, 5.481e-02, 2.041e-02, -3.619e-02, -4.025e-02, -8.226e-02, 8.882e-03)); + r += mul(s4_6, M4(-1.570e-01, 1.786e-01, 1.084e-01, -3.860e-02, -9.958e-01, 5.107e-01, 2.791e-01, -6.198e-01, 8.632e-03, 4.455e-02, 3.793e-02, 1.799e-01, -1.521e-02, -2.071e-03, 1.500e-01, -5.810e-02)); + r += mul(s4_7, M4(2.058e-02, -5.472e-02, 2.999e-02, -3.534e-01, -5.280e-01, 1.283e-01, 1.379e-02, -2.095e-01, -7.465e-02, -1.780e-02, 5.720e-02, 1.105e-01, -2.299e-02, -8.545e-02, 1.220e-02, -9.390e-02)); + r += mul(s4_8, M4(-7.549e-02, 2.686e-02, -3.224e-02, -1.891e-01, -2.563e-01, -2.980e-02, -2.683e-01, 4.311e-02, -5.702e-02, 9.769e-02, -3.178e-02, 7.403e-02, -3.932e-02, 9.627e-02, -1.382e-01, -3.233e-02)); + r += mul(s5_0, M4(-7.157e-02, 1.218e-01, 4.895e-02, -2.697e-02, -2.247e-01, -3.837e-02, 1.527e-01, 4.106e-02, 1.022e-02, 5.294e-02, -1.417e-02, -1.746e-01, -5.884e-02, -8.050e-01, -1.704e-01, -2.815e-01)); + r += mul(s5_1, M4(-1.708e-01, 4.261e-03, 4.935e-02, -2.315e-01, -7.595e-02, 7.886e-02, -9.628e-02, -1.124e-01, 1.992e-01, 4.148e-02, 1.165e-02, 2.824e-01, 3.617e-01, 1.361e-01, -2.831e-01, -5.913e-01)); + r += mul(s5_2, M4(-9.310e-02, 3.631e-02, -6.965e-02, -5.447e-02, -1.125e-01, -1.207e-02, -8.180e-02, 6.154e-02, -9.411e-02, -8.497e-02, 1.995e-02, -8.788e-03, -3.000e-03, -6.742e-01, -5.002e-02, 7.143e-01)); + r += mul(s5_3, M4(-9.140e-02, -3.208e-02, 1.647e-01, -1.924e-01, -3.634e-02, 5.920e-02, 1.146e-01, 5.098e-02, -5.068e-02, -2.226e-01, 1.232e-01, -6.575e-02, -3.223e-01, -7.135e-02, 4.797e-01, -3.695e-01)); + r += mul(s5_4, M4(2.570e-02, -2.095e-01, 1.539e-01, 1.723e-02, 5.085e-02, -1.300e-01, -2.396e-01, -3.175e-02, -1.770e-01, 2.109e-01, 3.226e-02, 5.015e-02, -1.094e+00, 9.919e-01, 4.163e-01, 3.218e-01)); + r += mul(s5_5, M4(-2.830e-01, 4.161e-02, -2.056e-01, -8.113e-02, 3.432e-02, 1.171e-01, 1.716e-01, -2.918e-03, -1.261e-02, 1.227e-02, 6.649e-02, -2.405e-01, -3.997e-02, -2.948e-01, -2.825e-01, 8.032e-01)); + r += mul(s5_6, M4(-6.231e-02, -1.346e-01, 1.226e-01, 1.050e-01, -1.370e-01, -1.499e-01, 1.062e-01, 8.043e-02, 2.492e-02, 1.732e-03, 9.060e-02, -2.609e-01, -3.209e-01, 1.015e+00, -2.472e-01, 3.568e-01)); + r += mul(s5_7, M4(3.223e-01, 4.831e-02, 1.083e-01, 4.442e-01, -1.101e-01, 1.498e-01, 5.688e-02, 2.313e-02, -4.522e-03, 6.388e-02, 3.195e-02, -1.484e-01, -2.437e-01, 7.016e-01, 1.396e-02, 1.372e-01)); + r += mul(s5_8, M4(2.243e-01, 1.056e-01, -1.796e-01, 7.784e-02, -2.328e-01, -5.866e-02, -1.860e-01, 4.648e-02, -4.558e-03, 6.233e-02, 3.451e-02, -5.534e-02, 7.662e-02, -2.773e-01, -2.279e-01, -9.295e-02)); + r += mul(s6_0, M4(-3.374e-02, -2.356e-02, 3.986e-02, -1.112e-01, -1.095e-01, -1.457e-01, -8.422e-02, -3.487e-01, -2.405e-02, -9.978e-02, -3.585e-02, -3.358e-02, -3.417e-02, 2.651e-01, 6.467e-02, 6.501e-02)); + r += mul(s6_1, M4(9.522e-02, -2.185e-01, 5.612e-02, -2.141e-01, 1.499e-01, -6.949e-02, 2.295e-01, -1.708e-01, 4.647e-02, 5.918e-02, -8.221e-02, -3.715e-02, 6.330e-02, 1.352e-01, -6.976e-02, 2.410e-02)); + r += mul(s6_2, M4(8.852e-02, -2.405e-02, -1.594e-01, 1.164e-02, -6.867e-02, -2.369e-02, -1.449e-01, 8.873e-02, -2.884e-02, 1.480e-02, 1.827e-01, 4.061e-02, -1.097e-01, -9.323e-03, 3.740e-02, -6.737e-02)); + r += mul(s6_3, M4(2.529e-03, 2.041e-03, 7.147e-02, -1.587e-01, 5.348e-02, -2.473e-01, 1.089e-01, -4.750e-01, -9.232e-02, 7.928e-03, 2.191e-02, 1.704e-01, -7.646e-03, 1.259e-01, -1.162e-02, -4.419e-02)); + r += mul(s6_4, M4(4.559e-02, -1.482e-02, -5.774e-02, 4.264e-02, -1.135e-01, 3.957e-02, 3.339e-01, -8.331e-02, 6.278e-02, 7.753e-02, -1.133e-01, 6.567e-02, -1.869e-01, -3.601e-01, 2.568e-01, 5.296e-02)); + r += mul(s6_5, M4(3.472e-02, 1.764e-02, -6.853e-02, -3.703e-02, -4.294e-03, 6.097e-02, -7.461e-02, -6.429e-02, -1.063e-01, 1.582e-01, 7.603e-01, -1.219e-01, -1.339e-01, -4.161e-01, -3.681e-01, -1.219e-01)); + r += mul(s6_6, M4(5.539e-02, 5.962e-02, -3.619e-02, 1.538e-01, 1.036e-01, -2.077e-01, 2.501e-02, -4.524e-02, -4.840e-02, -1.428e-01, -1.573e-03, 2.170e-02, 2.106e-02, 3.732e-02, -4.521e-02, 5.403e-02)); + r += mul(s6_7, M4(-1.762e-02, 8.959e-02, -7.463e-02, -3.898e-03, 3.685e-02, 1.724e-01, -2.027e-01, -7.210e-02, -4.951e-02, -4.845e-02, -6.237e-02, 1.247e-01, 4.155e-02, -1.484e-01, 2.090e-01, 1.118e-01)); + r += mul(s6_8, M4(1.777e-01, -2.414e-02, -3.344e-02, 5.355e-02, 2.664e-02, 5.144e-02, -3.208e-02, 1.029e-01, -5.298e-02, -1.654e-01, 3.050e-01, -8.195e-02, -2.546e-01, 3.789e-02, -1.125e-01, -9.267e-02)); + r += mul(s7_0, M4(-1.020e-01, -9.980e-02, -3.541e-02, -6.894e-02, 9.931e-03, 4.217e-01, 5.043e-02, 2.429e-01, -6.606e-02, 4.922e-02, -4.476e-02, -1.333e-01, -3.997e-02, -1.578e-02, 6.067e-02, -2.050e-01)); + r += mul(s7_1, M4(-1.406e-01, -6.426e-02, -4.408e-02, 2.217e-01, -4.419e-01, -2.667e-02, 4.985e-02, 4.093e-01, 3.958e-02, -5.435e-03, -4.548e-02, -5.027e-02, 1.826e-02, 2.531e-01, -1.185e-01, -1.326e-01)); + r += mul(s7_2, M4(-5.890e-02, 2.680e-01, 5.605e-01, -1.489e-01, -6.722e-02, 2.036e-01, -5.221e-02, -7.006e-02, -1.348e-01, -4.079e-02, 2.412e-01, 5.350e-02, 1.568e-01, -4.918e-01, 1.060e-01, 6.523e-02)); + r += mul(s7_3, M4(-9.169e-02, 2.759e-01, 3.524e-03, 3.174e-02, -1.553e-01, 4.311e-02, 1.794e-01, 9.289e-03, 4.978e-02, 2.541e-02, 6.696e-02, -2.903e-01, 2.529e-02, 1.400e-01, -7.264e-02, 7.721e-02)); + r += mul(s7_4, M4(-4.292e-02, 1.501e-01, -1.926e-01, -3.703e-01, -6.892e-02, -6.903e-02, 8.106e-02, 2.570e-03, 2.991e-02, -3.356e-02, -8.382e-02, -2.773e-01, -3.668e-02, 6.332e-01, 2.201e-01, 1.223e-01)); + r += mul(s7_5, M4(-3.877e-01, -1.253e-02, 1.201e-01, -6.158e-02, -3.971e-02, -8.619e-02, -1.808e-01, -4.551e-02, -2.302e-01, 2.303e-01, 7.661e-01, -1.921e-01, 2.226e-01, -3.984e-02, -5.196e-01, 1.682e-01)); + r += mul(s7_6, M4(6.983e-02, 1.725e-01, -1.283e-01, -9.891e-02, -9.465e-03, -1.510e-02, 1.482e-02, -8.322e-02, 6.130e-02, 2.546e-01, -1.395e-02, -2.814e-01, 3.947e-02, 7.462e-02, -3.519e-02, -5.704e-02)); + r += mul(s7_7, M4(1.587e-01, -1.040e-01, -2.058e-02, -2.364e-01, 1.647e-01, -2.598e-02, -1.130e-01, -6.468e-02, -1.939e-02, 7.214e-02, 6.094e-02, -1.198e-01, 7.880e-02, 5.938e-02, 6.658e-02, 1.664e-01)); + r += mul(s7_8, M4(1.303e-01, 1.111e-01, -8.631e-02, -1.597e-01, 5.681e-02, 5.566e-02, 1.534e-02, -7.725e-02, -9.031e-02, -1.759e-02, 8.732e-02, -8.315e-02, 5.717e-03, -2.107e-01, -5.688e-02, -4.731e-02)); + r += V4(3.098e-02, 3.454e-03, -5.766e-03, 4.181e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(5.935e-02, -3.880e-02, -2.790e-02, -1.735e-01, 1.160e-01, -6.289e-02, -2.058e-03, -9.196e-02, -1.447e-02, -1.849e-01, -4.009e-02, 1.732e-01, 1.334e-03, 2.341e-02, -1.479e-01, -1.151e-01)); + r += mul(s0_1, M4(-1.772e-01, -9.737e-02, -4.854e-02, 2.510e-02, 1.265e-01, -1.552e-01, -2.271e-01, -1.100e-01, 3.124e-04, -1.101e-01, -6.041e-01, 1.344e-01, 1.428e-01, -4.656e-02, -2.475e-03, -9.346e-02)); + r += mul(s0_2, M4(3.122e-02, -1.688e-01, 9.462e-03, 1.356e-01, -7.914e-02, -8.563e-02, 3.218e-02, -2.323e-03, 1.664e-01, -4.178e-02, -1.479e-01, 4.674e-02, -7.378e-02, 1.561e-02, -3.414e-02, 1.088e-01)); + r += mul(s0_3, M4(1.459e-01, -4.136e-02, 7.610e-02, 6.052e-02, -1.811e-02, -1.102e-01, 4.487e-02, -6.445e-02, -2.153e-01, -1.092e-01, -2.133e-02, -7.086e-03, 5.998e-02, -1.157e-01, 2.759e-01, -1.350e-02)); + r += mul(s0_4, M4(-2.649e-01, -2.284e-01, 1.643e-01, -2.664e-01, 9.469e-02, -2.300e-01, 2.886e-01, -3.613e-01, 2.813e-01, 3.831e-02, 1.587e-01, -2.142e-01, 1.235e-01, -6.098e-02, 4.062e-02, 4.105e-02)); + r += mul(s0_5, M4(2.236e-01, 9.460e-02, 3.632e-02, -4.123e-01, -1.973e-01, -1.740e-01, -7.194e-02, 3.953e-02, 1.288e-01, 3.952e-03, -1.424e-01, 1.233e-01, 4.519e-02, 8.368e-02, -9.193e-02, 1.482e-01)); + r += mul(s0_6, M4(3.798e-01, -7.213e-03, 5.432e-02, -7.467e-02, 6.516e-02, -9.290e-02, 1.106e-01, 9.044e-02, -1.529e-01, -7.000e-02, 3.770e-02, -1.429e-02, -5.189e-01, -2.602e-02, 5.561e-02, -5.095e-01)); + r += mul(s0_7, M4(-1.523e-01, 5.008e-01, -1.562e-01, -7.291e-02, 3.043e-01, -1.469e-01, -1.206e-01, 3.775e-02, -2.512e-02, -2.162e-01, 1.028e-01, 1.379e-01, -4.190e-02, -2.226e-01, 8.129e-02, 6.048e-02)); + r += mul(s0_8, M4(6.118e-02, 2.485e-01, 1.206e-01, -6.543e-03, -1.393e-01, -1.410e-01, 6.509e-02, 2.388e-03, 1.684e-01, 8.039e-02, 9.741e-02, 2.378e-01, 2.011e-02, -2.027e-01, 3.670e-02, 7.855e-03)); + r += mul(s1_0, M4(-2.440e-02, 1.029e-01, -4.293e-02, -1.076e-02, -2.178e-03, 7.526e-02, -3.960e-02, -2.570e-02, -5.119e-02, 5.620e-02, -4.994e-02, 6.796e-02, -1.239e-01, 5.108e-02, -4.895e-02, 1.677e-01)); + r += mul(s1_1, M4(-1.277e-02, 1.926e-01, -2.228e-01, 5.436e-02, 2.798e-02, 9.485e-02, -2.055e-01, 5.804e-02, 5.623e-02, 1.654e-01, -7.012e-01, 2.912e-02, -1.199e-01, 3.823e-01, 1.260e-01, 1.435e-02)); + r += mul(s1_2, M4(1.549e-02, 4.324e-02, -3.260e-02, 8.202e-02, 4.305e-02, 3.250e-02, 4.104e-02, 3.048e-02, 2.463e-02, 1.867e-01, -9.088e-02, -1.244e-02, -6.285e-02, -5.562e-02, -2.877e-02, 3.847e-02)); + r += mul(s1_3, M4(-1.821e-01, -2.045e-01, 2.250e-02, -3.304e-02, -5.895e-02, 6.221e-02, 3.330e-02, 5.408e-02, -1.284e-01, 8.601e-04, -4.206e-02, 2.415e-02, -1.203e-01, 1.027e-01, -6.372e-01, 1.800e-03)); + r += mul(s1_4, M4(5.460e-02, 1.171e-01, -1.551e-01, 3.550e-02, 3.756e-01, 1.625e-01, 4.267e-01, -3.865e-02, 2.818e-01, 1.405e-01, 2.300e-01, -3.230e-01, -2.077e-01, 4.363e-01, -3.084e-01, -8.473e-02)); + r += mul(s1_5, M4(-7.994e-02, -1.892e-01, 1.632e-01, -9.721e-02, 1.368e-01, 1.737e-01, -2.244e-01, -3.117e-02, -2.395e-01, 2.234e-01, -2.775e-01, 1.149e-01, -2.063e-03, -1.582e-02, -7.830e-02, 6.865e-02)); + r += mul(s1_6, M4(-6.998e-02, -6.605e-03, 2.792e-02, 2.266e-01, -6.332e-02, 3.743e-02, 5.763e-02, 2.327e-03, -1.112e-01, 1.532e-02, 5.560e-03, -1.085e-01, 4.812e-01, -1.153e-01, 3.079e-01, 4.507e-01)); + r += mul(s1_7, M4(7.519e-02, 3.813e-02, 1.490e-01, -1.141e-02, -2.822e-01, -1.736e-01, -5.976e-02, 1.584e-02, -9.721e-02, -1.107e-01, 6.983e-02, 8.767e-03, -4.832e-01, -6.800e-02, 1.575e-01, 1.001e-01)); + r += mul(s1_8, M4(1.033e-01, -4.228e-02, -2.677e-02, -5.970e-02, -7.797e-02, -5.242e-02, -1.062e-02, -5.887e-02, -2.408e-02, 2.243e-01, 4.478e-02, 1.706e-01, -1.517e-01, -1.129e-01, 5.236e-02, -8.809e-03)); + r += mul(s2_0, M4(2.329e-01, 8.285e-02, 1.670e-01, -1.654e-02, -2.183e-01, -1.100e-02, -1.346e-01, -1.262e-01, -1.249e-01, -8.720e-02, -3.215e-02, 7.218e-03, -2.670e-03, 5.960e-03, 6.482e-04, 7.179e-02)); + r += mul(s2_1, M4(1.630e-01, 4.832e-01, -1.151e-01, -3.790e-02, 1.828e-02, 1.931e-01, -5.413e-01, -1.107e-02, -5.487e-02, -1.571e-01, -2.764e-01, -9.029e-02, -9.916e-02, 2.014e-01, 1.388e-01, -1.145e-01)); + r += mul(s2_2, M4(6.762e-03, 2.226e-01, 1.987e-02, -5.423e-02, -1.619e-02, 9.245e-02, -1.441e-01, 9.986e-02, 9.010e-02, -4.430e-02, -8.324e-02, -8.687e-02, -1.810e-01, -1.421e-01, 1.816e-02, -1.274e-01)); + r += mul(s2_3, M4(-6.187e-02, -2.608e-01, 1.371e-01, -7.463e-02, -1.247e-01, -1.769e-01, 1.860e-02, 1.903e-01, 1.004e-01, 1.098e-01, 6.763e-02, -6.562e-02, -1.488e-01, -7.447e-02, -1.223e-01, -1.545e-01)); + r += mul(s2_4, M4(-3.720e-01, -2.576e-02, -5.008e-02, -1.121e-01, -2.110e-02, -1.102e-03, 2.392e-01, -4.548e-02, 2.275e-01, 1.186e-01, 4.092e-01, 1.159e-03, -5.391e-02, 1.189e-01, -2.803e-01, -6.989e-02)); + r += mul(s2_5, M4(-2.357e-01, -2.881e-02, -1.628e-02, -5.717e-02, -5.192e-02, 8.166e-02, 5.828e-02, 1.137e-01, -2.024e-01, -2.246e-01, -1.492e-01, -1.331e-01, -2.242e-01, 2.840e-01, -6.200e-02, -1.559e-02)); + r += mul(s2_6, M4(3.271e-01, -2.728e-01, -2.201e-02, 1.165e-01, 4.544e-02, -6.330e-02, 9.576e-02, -8.706e-02, 6.837e-02, -9.075e-02, -6.291e-02, -6.290e-02, -2.278e-01, -2.028e-02, 2.094e-02, -1.649e-01)); + r += mul(s2_7, M4(2.697e-01, -1.229e-01, 2.684e-02, 2.014e-02, -1.793e-01, 7.902e-02, 2.036e-01, 1.403e-02, -1.683e-01, -1.475e-01, -9.334e-02, -9.973e-02, 1.661e-02, 1.149e-01, 8.812e-02, -9.418e-02)); + r += mul(s2_8, M4(1.436e-02, 5.699e-02, 2.813e-04, -2.904e-02, -4.214e-02, -9.986e-02, 3.843e-02, 3.767e-02, -7.750e-02, -1.411e-01, -7.223e-02, -9.483e-03, 2.011e-01, -1.208e-01, 1.293e-02, -1.869e-01)); + r += mul(s3_0, M4(1.751e-01, -1.945e-01, -2.919e-02, -1.608e-01, 2.052e-01, -2.025e-01, 3.285e-03, 2.567e-02, 1.568e-02, 6.525e-02, 1.727e-02, 1.762e-01, -7.644e-02, 1.772e-01, -2.706e-03, -9.601e-03)); + r += mul(s3_1, M4(-8.242e-03, -8.551e-02, 2.167e-01, 9.846e-02, 4.345e-02, -1.528e-01, -6.191e-01, -1.605e-02, -1.175e-01, -1.736e-01, -2.902e-01, -5.365e-02, -2.956e-02, 1.581e-02, 1.141e-01, -7.861e-02)); + r += mul(s3_2, M4(-8.168e-02, 9.317e-02, 1.080e-03, 7.253e-02, -4.799e-02, 1.806e-01, -2.751e-01, 4.717e-02, 1.430e-01, -1.340e-01, -2.213e-02, 1.214e-01, 1.308e-04, 8.701e-02, 3.179e-03, 9.169e-02)); + r += mul(s3_3, M4(4.130e-02, 1.459e-01, 8.394e-02, -1.792e-02, -2.212e-02, -1.066e-01, 1.071e-01, 7.341e-02, 1.160e-01, 8.097e-02, 5.112e-02, -7.335e-03, -9.636e-02, -1.319e-01, -4.296e-02, 6.101e-02)); + r += mul(s3_4, M4(-7.374e-02, -2.285e-02, 3.085e-02, 6.584e-02, 3.442e-01, -9.557e-03, 3.653e-02, -2.316e-01, -4.737e-02, 1.919e-01, 3.546e-01, -6.006e-02, 7.363e-02, 6.111e-02, 1.844e-02, -1.562e-01)); + r += mul(s3_5, M4(-5.292e-02, -1.842e-01, -5.210e-02, -1.087e-01, -9.492e-02, -1.672e-01, 8.528e-02, 3.359e-02, 7.379e-02, 1.750e-01, -1.559e-01, 5.677e-02, -1.406e-02, -1.478e-01, 2.558e-02, -2.936e-02)); + r += mul(s3_6, M4(6.805e-03, 9.529e-02, -1.310e-01, 3.838e-02, -3.827e-01, 6.978e-02, 1.043e-01, 4.919e-02, -1.805e-01, 4.051e-02, 1.205e-02, 1.159e-01, 9.640e-02, -1.350e-01, -8.444e-03, 1.027e-01)); + r += mul(s3_7, M4(3.617e-02, 7.544e-02, -3.033e-02, -1.630e-02, -8.538e-02, 1.023e-01, 1.871e-01, -9.201e-02, 6.537e-02, -1.245e-01, -1.172e-01, -3.826e-02, 4.586e-02, 3.039e-02, 8.042e-02, 1.039e-01)); + r += mul(s3_8, M4(5.207e-02, -1.440e-01, -7.737e-04, 2.142e-02, -5.843e-02, 1.186e-02, 3.869e-02, 1.482e-02, -1.587e-01, 5.633e-02, -3.806e-02, 1.101e-01, 6.918e-02, 4.886e-02, 8.826e-03, 1.185e-01)); + r += mul(s4_0, M4(4.134e-02, -5.948e-02, 5.677e-02, 1.336e-01, 6.300e-02, 1.927e-01, 2.735e-01, 2.763e-01, 1.347e-01, 1.102e-01, 2.558e-01, 1.069e-02, -1.023e-01, -2.797e-02, -2.972e-02, 6.513e-02)); + r += mul(s4_1, M4(-2.106e-02, 4.948e-02, 4.398e-02, 1.727e-01, 7.379e-02, -2.729e-01, -1.749e-01, 2.668e-01, -1.237e-01, 5.604e-02, 8.591e-01, -1.229e-01, 2.704e-01, -1.718e-01, 1.959e-01, -3.976e-02)); + r += mul(s4_2, M4(1.325e-02, 4.691e-02, 5.585e-02, 6.508e-02, 1.005e-01, -2.067e-01, 1.859e-01, -4.363e-02, 7.888e-02, -3.773e-02, 2.071e-01, -1.816e-01, 6.120e-02, -2.286e-02, 7.740e-02, 5.250e-02)); + r += mul(s4_3, M4(5.538e-02, 1.723e-01, 5.215e-02, 5.218e-02, -1.293e-01, -2.408e-02, 1.046e-01, 1.966e-01, 1.912e-01, 3.076e-02, -5.604e-03, 2.458e-02, 6.852e-03, 3.539e-02, 7.286e-02, -7.049e-03)); + r += mul(s4_4, M4(-6.083e-02, 1.961e-01, -7.071e-01, 7.913e-02, 3.160e-01, -5.599e-01, 5.716e-01, -1.436e-01, -1.422e-01, -3.896e-02, 3.694e-02, 6.415e-02, -3.721e-02, 8.450e-02, -2.492e-02, 4.170e-02)); + r += mul(s4_5, M4(-4.336e-02, -5.783e-02, 1.582e-01, 7.864e-03, -2.993e-01, 5.704e-01, -3.629e-02, 1.938e-02, -3.487e-02, 2.981e-01, -1.278e-01, -1.120e-01, -3.010e-02, 8.082e-02, 8.272e-03, -5.287e-02)); + r += mul(s4_6, M4(5.917e-01, 2.954e-01, 6.211e-02, 4.851e-02, 1.850e-01, -1.119e-01, -2.654e-01, 4.469e-01, -2.616e-01, 1.536e-01, 5.430e-02, 5.105e-02, -4.489e-02, -1.715e-02, -7.015e-02, -3.212e-02)); + r += mul(s4_7, M4(3.557e-01, 6.459e-02, -8.283e-02, 9.279e-02, -1.481e-01, -6.049e-01, -2.963e-01, 6.103e-01, -2.518e-01, -1.455e-01, -3.510e-02, 1.284e-01, -6.814e-02, -5.309e-03, -2.072e-01, -2.491e-02)); + r += mul(s4_8, M4(-9.047e-02, 1.771e-01, -3.731e-02, 8.465e-02, -1.685e-01, 7.672e-01, -3.389e-01, 7.383e-02, -4.803e-02, -1.344e-03, 9.633e-02, 3.742e-02, -2.154e-02, 5.721e-02, -1.354e-01, -2.491e-02)); + r += mul(s5_0, M4(4.403e-02, -1.212e-01, 3.089e-02, 5.049e-02, 7.390e-02, 1.838e-03, 6.242e-02, 1.249e-01, -8.862e-02, -6.818e-02, 3.279e-01, -5.009e-02, -4.138e-01, 2.611e-01, -3.124e-01, 5.053e-01)); + r += mul(s5_1, M4(-3.701e-02, -1.304e-01, -5.969e-02, -7.970e-02, 3.802e-02, -1.047e-01, -4.551e-02, 1.588e-01, 1.413e-01, 2.751e-02, 8.287e-01, -5.949e-03, 2.463e-01, -2.822e-01, 8.598e-01, 3.485e-01)); + r += mul(s5_2, M4(1.565e-01, -1.857e-01, -7.390e-02, -1.039e-01, -2.352e-02, -6.304e-02, 1.453e-01, -5.807e-02, -3.799e-02, 5.020e-02, 2.783e-01, -4.422e-03, 1.387e-01, -5.167e-01, -2.931e-01, 9.587e-02)); + r += mul(s5_3, M4(8.589e-02, 1.289e-01, -6.683e-02, 4.814e-02, -8.548e-02, 2.677e-02, 6.868e-02, 5.201e-02, 7.186e-02, -1.416e-01, -1.007e-01, -8.311e-02, 3.191e-01, 4.675e-01, 5.159e-01, 7.636e-01)); + r += mul(s5_4, M4(-6.463e-02, -1.631e-01, 5.215e-01, 3.350e-01, 8.587e-02, 4.618e-02, 4.037e-01, -8.477e-02, -1.770e-01, -1.140e-01, -1.549e-01, -1.450e-01, 1.364e-01, -9.146e-01, -5.524e-01, 1.825e+00)); + r += mul(s5_5, M4(1.068e-01, -6.471e-02, 2.219e-01, -2.326e-01, -1.201e-01, 7.923e-02, -1.147e-01, 9.964e-02, 1.038e-01, -1.205e-01, -2.941e-02, 1.203e-01, -1.237e-01, -1.006e+00, 3.622e-01, 8.427e-01)); + r += mul(s5_6, M4(-2.385e-01, -8.214e-02, 4.111e-02, -1.813e-01, 2.792e-03, -1.395e-02, -1.086e-01, 1.065e-01, -2.391e-02, -3.052e-01, 1.056e-01, -3.421e-02, 9.469e-02, 8.340e-01, 2.849e-01, 2.031e-01)); + r += mul(s5_7, M4(9.971e-02, 5.047e-01, -2.979e-01, -1.733e-01, -8.788e-02, 1.026e-01, -1.817e-01, 4.214e-03, 3.298e-02, -7.605e-02, 1.429e-02, -4.575e-02, -1.267e-01, -5.742e-01, -7.175e-01, 2.723e-01)); + r += mul(s5_8, M4(9.887e-02, -4.113e-01, -1.008e-01, -1.136e-01, 1.018e-01, -8.625e-02, -1.318e-01, -4.740e-03, -2.426e-02, -1.553e-01, 1.172e-01, 1.149e-01, -2.133e-01, 6.096e-01, 1.745e-02, 3.555e-01)); + r += mul(s6_0, M4(3.467e-03, 4.382e-02, -1.133e-02, 1.122e-01, -3.510e-01, 7.850e-02, 8.999e-02, 4.331e-02, 7.696e-02, 1.111e-01, 1.802e-02, 4.615e-02, 1.111e-01, -8.898e-02, -4.695e-02, 2.049e-02)); + r += mul(s6_1, M4(-1.653e-01, 6.621e-02, -9.456e-02, 4.057e-02, -1.378e-01, -2.176e-01, -1.620e-01, 5.770e-02, -6.695e-02, 1.311e-02, -1.634e-01, -6.544e-02, 5.784e-02, -1.495e-01, -2.370e-02, -3.395e-02)); + r += mul(s6_2, M4(-2.519e-01, -1.395e-01, -1.221e-02, -6.786e-03, -8.544e-02, -1.050e-01, -6.063e-02, 1.284e-01, -6.179e-02, 1.278e-01, -5.883e-02, 6.400e-02, -7.423e-02, -1.831e-01, -2.137e-02, -2.193e-01)); + r += mul(s6_3, M4(-9.478e-02, 1.650e-01, -1.248e-03, -3.417e-02, 7.054e-01, 2.271e-01, -5.585e-02, 6.382e-02, -7.301e-02, -3.995e-02, -1.089e-01, -7.827e-03, 2.849e-02, -2.844e-02, -1.083e-01, -6.346e-02)); + r += mul(s6_4, M4(3.468e-01, -2.994e-03, 1.476e-01, -2.409e-01, 7.781e-02, 2.649e-01, -2.783e-01, 1.083e-01, 1.441e-01, 6.688e-02, 2.519e-01, -9.673e-02, -2.108e-01, 1.019e-01, 1.226e-01, 4.383e-02)); + r += mul(s6_5, M4(2.953e-01, -9.021e-02, -1.069e-01, 8.069e-02, 1.225e-02, -2.118e-01, 8.543e-02, 3.792e-02, -6.041e-02, 3.033e-01, 3.664e-03, 3.392e-01, -1.063e-01, -2.672e-01, -2.135e-01, -2.742e-01)); + r += mul(s6_6, M4(-9.368e-02, -1.528e-01, 4.258e-02, -3.731e-02, 3.532e-01, 5.316e-02, 1.216e-03, 7.313e-02, -5.776e-02, -6.265e-02, 7.818e-02, 7.796e-02, -9.342e-02, -1.789e-01, -5.186e-03, -3.357e-02)); + r += mul(s6_7, M4(-1.119e-01, 1.409e-02, -8.511e-02, -8.581e-02, 4.249e-01, -3.115e-02, 1.212e-02, 4.140e-02, -8.697e-02, -3.781e-02, 2.415e-02, 1.004e-01, 2.033e-01, -2.625e-01, -1.195e-01, -1.446e-01)); + r += mul(s6_8, M4(-3.751e-02, 9.377e-02, 5.160e-02, -1.203e-01, -2.743e-02, 6.405e-02, -3.684e-02, -5.488e-02, 4.787e-02, 9.193e-02, 2.991e-02, -7.666e-02, 1.946e-01, -2.017e-01, -3.872e-02, -1.146e-01)); + r += mul(s7_0, M4(5.174e-02, -1.932e-01, -5.817e-03, 3.660e-02, 1.551e-01, -7.090e-02, 1.983e-01, 4.514e-02, 2.454e-02, 2.151e-02, 2.254e-02, 2.139e-02, 8.666e-02, -8.359e-02, -6.452e-02, 3.204e-03)); + r += mul(s7_1, M4(6.476e-01, -2.439e-01, -3.170e-02, 1.879e-01, -1.950e-01, -9.900e-02, 3.069e-02, -1.771e-02, -3.367e-02, -1.146e-01, -2.026e-01, -1.267e-01, -6.860e-02, -5.204e-02, -9.675e-03, -2.931e-02)); + r += mul(s7_2, M4(3.228e-01, -1.253e-02, 1.359e-01, 3.780e-01, -7.254e-02, -2.202e-01, -1.502e-02, 1.158e-02, -7.370e-02, 7.129e-02, 2.702e-02, 8.427e-02, 3.789e-01, 2.073e-02, -1.668e-02, 2.945e-02)); + r += mul(s7_3, M4(-5.933e-02, -1.988e-01, 5.261e-02, 1.353e-01, -1.531e-01, -1.078e-01, -7.204e-02, -2.297e-01, 3.892e-02, -6.414e-02, -1.413e-01, -6.568e-02, -4.182e-02, -6.158e-02, -7.575e-02, -1.336e-02)); + r += mul(s7_4, M4(-5.649e-01, 3.911e-01, 1.717e-01, 1.238e-01, -8.583e-02, -9.941e-02, -2.325e-01, -1.359e-02, 1.744e-01, -3.874e-02, 3.085e-01, -1.988e-01, -1.290e-01, -1.004e-02, 1.707e-01, 1.001e-01)); + r += mul(s7_5, M4(-3.325e-01, 2.026e-01, -8.293e-02, 2.123e-01, -3.594e-02, 4.981e-02, 1.057e-01, -7.827e-02, -1.563e-01, 1.890e-01, -9.641e-02, 6.924e-02, 7.589e-02, 1.252e-01, 1.572e-01, 4.694e-02)); + r += mul(s7_6, M4(-3.188e-01, -8.384e-02, 1.629e-02, -1.835e-02, 6.289e-02, 2.915e-02, -2.368e-02, -1.087e-01, -1.791e-02, 4.458e-02, 7.646e-02, 2.115e-03, -1.045e-01, 1.727e-02, 2.779e-03, 4.634e-02)); + r += mul(s7_7, M4(1.319e-01, -2.771e-01, -6.121e-02, 1.152e-01, -5.918e-02, 1.664e-01, 3.653e-02, -1.324e-01, -3.359e-01, -7.423e-02, 2.660e-02, -8.684e-02, -5.081e-01, -1.056e-01, -1.695e-01, 5.068e-02)); + r += mul(s7_8, M4(2.551e-01, -2.393e-02, 2.413e-02, 1.231e-01, 6.143e-02, -1.052e-01, -7.441e-03, -8.615e-02, 2.186e-01, 5.253e-02, 5.781e-02, -1.213e-01, -2.231e-01, 5.117e-02, -5.953e-02, -5.153e-02)); + r += V4(-9.954e-04, -3.246e-03, 1.139e-02, 8.716e-02); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.550e-02, 1.482e-01, -6.323e-02, 3.364e-03, 2.080e-02, 2.463e-02, -8.141e-02, 5.245e-02, 8.640e-03, -1.637e-01, -3.179e-01, -5.465e-02, -2.549e-02, 3.515e-02, -4.925e-02, -1.317e-01)); + r += mul(s0_1, M4(1.858e-02, 9.248e-02, -8.656e-02, 5.550e-02, 1.817e-02, 8.032e-02, -5.676e-02, 5.070e-02, -1.802e-02, -7.254e-02, -2.202e-02, -1.170e-01, 9.502e-02, -2.105e-01, 2.068e-01, 2.038e-02)); + r += mul(s0_2, M4(7.407e-03, -1.247e-02, -1.068e-02, 6.370e-02, -3.783e-02, -3.181e-02, -8.697e-03, 6.734e-02, 5.835e-02, 3.542e-02, -7.918e-02, -2.409e-02, -6.659e-02, -9.377e-03, -8.609e-02, -1.181e-02)); + r += mul(s0_3, M4(-7.379e-02, -2.101e-01, 2.361e-02, 5.116e-03, -9.090e-02, -2.460e-02, -8.051e-03, 2.844e-03, -2.771e-01, 1.732e-01, 1.808e-01, 1.874e-02, -2.907e-02, -1.239e-01, 2.501e-01, -1.563e-01)); + r += mul(s0_4, M4(-4.727e-02, -3.771e-01, -4.637e-02, 2.019e-01, 1.333e-02, -9.634e-02, 6.812e-02, 1.205e-01, 1.588e-01, -7.420e-02, -1.483e-01, -9.922e-02, 2.153e-01, -5.827e-02, 1.769e-01, -3.377e-01)); + r += mul(s0_5, M4(-7.824e-02, 1.683e-01, 7.693e-02, 4.683e-02, 2.437e-02, 8.481e-02, -1.120e-01, -2.411e-02, -8.791e-02, 9.666e-02, -1.889e-02, -5.651e-02, 3.954e-02, 1.329e-01, -1.240e-01, -4.657e-02)); + r += mul(s0_6, M4(-3.418e-02, 1.678e-01, -1.421e-01, 2.776e-02, 9.744e-02, -2.309e-01, 1.045e-01, 8.956e-03, 4.882e-02, 9.200e-02, 1.934e-02, -1.460e-01, -1.651e-02, 1.741e-02, -5.017e-02, -8.097e-02)); + r += mul(s0_7, M4(7.096e-02, -1.308e-02, 1.686e-01, 1.322e-01, 3.749e-02, 3.528e-03, -1.659e-01, -1.403e-02, -3.273e-02, -6.009e-02, 2.140e-01, -1.632e-01, 7.239e-02, -1.206e-01, -1.586e-01, -3.078e-01)); + r += mul(s0_8, M4(1.512e-03, -7.403e-02, 2.947e-02, -4.233e-02, 1.530e-02, -9.816e-02, -2.201e-02, 7.873e-02, 3.993e-02, -5.018e-02, -1.193e-01, -8.346e-02, 1.808e-02, 4.632e-02, -1.430e-01, -6.569e-02)); + r += mul(s1_0, M4(3.089e-02, -1.530e-01, -5.461e-02, -1.197e-01, -4.821e-02, -1.186e-02, -2.666e-02, -7.656e-02, 7.529e-02, -8.186e-02, -4.492e-02, 9.174e-02, 2.188e-03, -1.622e-02, 1.487e-02, 9.233e-03)); + r += mul(s1_1, M4(-7.001e-02, 7.108e-02, -7.005e-02, 4.512e-02, -6.238e-02, 2.113e-01, -2.022e-02, -6.211e-02, -8.457e-02, -7.076e-02, 1.031e-01, 7.805e-02, -8.044e-02, 3.066e-02, -9.706e-02, 6.739e-02)); + r += mul(s1_2, M4(2.233e-03, 5.735e-02, -1.050e-01, 3.267e-05, 2.600e-02, -1.462e-01, 1.228e-01, 4.476e-02, -5.821e-02, 1.009e-02, -4.157e-02, 5.004e-02, 1.489e-03, -5.188e-02, -1.130e-01, 7.655e-03)); + r += mul(s1_3, M4(1.187e-01, -1.361e-01, -3.887e-02, -1.879e-01, -1.532e-01, 1.675e-01, 9.292e-02, 1.058e-01, -1.031e-01, 1.873e-01, -1.012e-01, -6.570e-03, -1.088e-01, 6.697e-02, -2.098e-01, -2.466e-02)); + r += mul(s1_4, M4(-3.238e-02, 2.032e-01, -2.280e-01, -8.513e-02, -7.811e-02, 1.825e-01, 8.879e-02, 5.777e-02, 6.934e-02, 1.430e-01, -1.761e-01, -4.238e-02, -9.137e-02, 2.031e-01, -6.119e-02, -2.051e-01)); + r += mul(s1_5, M4(-1.427e-01, 1.108e-01, 7.984e-02, 1.138e-01, -6.363e-02, 2.572e-01, -9.833e-02, 4.010e-03, 1.009e-02, -5.946e-02, -2.506e-02, 1.648e-02, -1.293e-02, 3.644e-02, -3.313e-02, -6.616e-02)); + r += mul(s1_6, M4(5.543e-02, -2.084e-01, 1.536e-01, 1.478e-02, 5.205e-02, 1.060e-01, 2.033e-01, 8.088e-02, 1.063e-03, 1.169e-02, 8.632e-02, 7.281e-03, 5.383e-02, 1.271e-02, 1.117e-01, 9.029e-02)); + r += mul(s1_7, M4(-1.349e-02, -7.237e-02, 2.589e-01, 5.118e-03, -6.311e-02, 1.450e-02, -6.996e-02, -1.008e-01, -3.551e-02, -5.876e-02, 3.745e-02, -4.988e-02, 8.911e-02, -1.860e-01, 3.110e-01, -1.417e-01)); + r += mul(s1_8, M4(1.646e-01, -1.234e-01, 1.665e-01, -9.602e-02, -6.306e-02, 1.731e-01, 8.883e-03, -2.104e-02, -3.799e-02, 2.678e-02, 3.067e-02, -4.282e-02, 3.751e-02, 1.611e-02, -3.500e-02, -3.051e-02)); + r += mul(s2_0, M4(5.755e-02, -9.998e-03, 1.024e-01, 2.299e-02, 1.365e-01, -9.806e-02, 9.162e-02, -1.494e-01, 8.121e-02, -1.116e-01, -5.291e-02, -1.021e-01, -2.382e-03, -8.515e-02, -1.747e-01, -1.377e-01)); + r += mul(s2_1, M4(-7.788e-02, 6.038e-02, 9.375e-02, -1.776e-02, 5.052e-03, -3.421e-02, -3.974e-02, -3.626e-02, 1.030e-01, 9.294e-03, -9.927e-02, 6.549e-02, 1.685e-02, -1.361e-01, 4.110e-04, 2.135e-01)); + r += mul(s2_2, M4(2.658e-02, 7.140e-02, -5.212e-02, -6.860e-02, 4.382e-02, 4.198e-02, 3.386e-02, 1.893e-02, 4.225e-03, 1.905e-02, -6.424e-03, -7.844e-02, 6.918e-02, 1.072e-01, -1.525e-01, 9.003e-02)); + r += mul(s2_3, M4(9.057e-02, -1.498e-01, 4.421e-02, -1.056e-01, -8.476e-02, 1.583e-01, -1.531e-02, -1.883e-01, 5.109e-02, -2.248e-01, -9.583e-02, 4.146e-02, 3.877e-02, -6.171e-02, -1.498e-01, 7.983e-02)); + r += mul(s2_4, M4(9.063e-02, -1.617e-01, -1.447e-01, -9.241e-02, 3.600e-02, 5.373e-03, -1.078e-01, -1.163e-01, 6.829e-02, -5.413e-02, 8.948e-02, -1.109e-01, 5.275e-02, -3.897e-01, 1.237e-01, 9.982e-02)); + r += mul(s2_5, M4(4.084e-02, 4.763e-03, -3.784e-02, -4.274e-02, -5.362e-03, -1.529e-02, 7.842e-03, 5.604e-04, 7.714e-02, -1.559e-01, 7.681e-02, -7.394e-02, -8.979e-02, 2.113e-01, 2.168e-01, 4.097e-02)); + r += mul(s2_6, M4(-8.091e-02, -1.115e-01, 2.361e-01, -1.733e-01, 7.247e-03, -1.287e-01, 1.563e-01, -8.712e-02, -3.140e-03, 9.098e-02, -1.299e-01, 6.030e-03, 4.423e-02, -7.119e-02, -2.863e-01, 6.274e-02)); + r += mul(s2_7, M4(6.526e-02, -7.356e-02, -2.948e-02, -9.011e-02, 1.460e-02, -6.172e-02, 3.199e-03, 2.079e-02, -5.142e-02, 8.263e-02, 7.087e-02, -1.835e-02, 1.853e-02, 4.476e-02, 5.705e-02, 8.962e-02)); + r += mul(s2_8, M4(2.256e-02, -3.066e-02, -1.381e-01, -8.295e-02, -2.385e-02, -5.564e-02, -1.301e-01, -1.046e-01, 5.761e-02, -5.741e-02, 8.829e-02, -1.901e-02, 7.150e-02, -4.324e-02, -4.973e-02, 1.143e-01)); + r += mul(s3_0, M4(3.401e-04, -5.849e-04, 6.542e-02, -1.244e-02, -9.909e-02, 1.931e-01, -7.912e-02, 4.238e-02, 1.492e-02, -2.218e-03, 1.655e-01, -2.451e-02, -3.112e-02, 3.901e-02, 1.196e-01, -4.961e-02)); + r += mul(s3_1, M4(1.788e-02, -4.814e-02, -7.687e-02, -1.610e-02, -1.226e-01, 3.760e-01, 1.033e-02, 1.085e-01, -1.322e-01, 6.261e-02, 1.646e-01, 1.469e-01, 5.817e-02, -8.755e-02, 2.057e-01, 1.778e-02)); + r += mul(s3_2, M4(5.219e-02, 1.550e-01, 4.151e-02, -9.940e-02, -1.919e-02, -4.969e-02, 6.124e-02, 3.869e-02, -1.602e-01, 2.288e-01, -7.836e-03, -2.868e-02, -8.724e-02, -5.142e-02, 1.497e-01, 6.299e-03)); + r += mul(s3_3, M4(1.154e-01, -2.413e-01, -1.159e-01, 3.445e-02, -3.170e-02, -3.682e-02, 4.749e-02, -2.001e-01, -3.168e-02, 2.549e-01, -3.563e-01, 8.866e-02, -4.008e-02, 8.638e-02, 1.173e-01, -1.110e-03)); + r += mul(s3_4, M4(-2.685e-01, 3.041e-02, -2.903e-01, -4.052e-02, -2.745e-01, 5.754e-01, -3.479e-02, 1.664e-01, -6.268e-02, 5.738e-02, 2.118e-01, -1.481e-01, -1.829e-01, 1.945e-01, -4.317e-02, -2.469e-02)); + r += mul(s3_5, M4(6.148e-02, -1.607e-01, -3.468e-03, 8.311e-03, -4.307e-02, 1.277e-01, 1.087e-01, -1.532e-02, 2.988e-02, -8.367e-02, 4.991e-02, -5.328e-02, -4.701e-02, 6.573e-02, -2.594e-02, -8.493e-02)); + r += mul(s3_6, M4(-1.161e-02, 7.270e-02, 1.249e-01, -8.621e-02, 8.553e-02, 2.959e-01, 3.113e-01, 2.873e-03, -1.351e-01, -2.965e-02, -7.862e-02, -5.562e-02, -1.062e-02, 9.243e-02, -5.759e-02, 1.790e-02)); + r += mul(s3_7, M4(2.188e-01, -5.976e-02, -7.703e-02, 1.416e-02, 1.885e-02, -2.569e-01, 1.704e-01, 1.260e-01, -1.121e-01, 6.791e-02, 1.233e-01, -4.520e-02, -1.194e-01, 1.583e-01, 1.293e-02, 1.602e-02)); + r += mul(s3_8, M4(-3.095e-03, -2.183e-02, 9.775e-02, 1.380e-02, -4.415e-02, 1.140e-01, -1.133e-01, 4.689e-02, -4.382e-02, -5.442e-02, 2.616e-02, -7.310e-03, 5.992e-02, -5.769e-02, 8.517e-03, 5.975e-02)); + r += mul(s4_0, M4(-2.874e-02, -3.209e-02, 7.440e-02, -5.051e-02, 3.173e-02, -4.283e-03, 2.989e-02, -8.500e-02, -2.492e-01, 1.640e-01, 2.135e-03, 1.260e-01, 2.995e-02, -4.737e-02, -7.130e-03, -8.467e-03)); + r += mul(s4_1, M4(-5.790e-02, 1.738e-02, -8.117e-02, -1.148e-01, 8.217e-02, -3.495e-02, 4.301e-02, 3.421e-02, -3.894e-02, 9.015e-03, -6.426e-02, 7.283e-02, 2.620e-02, -8.512e-02, -7.089e-02, -7.352e-02)); + r += mul(s4_2, M4(-2.312e-02, 3.598e-02, 1.935e-02, 2.357e-02, 2.581e-02, -1.139e-01, 2.659e-02, 3.450e-02, -1.109e-02, -1.854e-02, -7.685e-02, -2.472e-02, 8.593e-02, 1.155e-02, -7.321e-03, 6.701e-02)); + r += mul(s4_3, M4(7.399e-02, -5.969e-02, 1.448e-01, -1.209e-01, 4.066e-02, 2.562e-02, 1.040e-01, -1.381e-02, -7.872e-01, 4.618e-01, 2.801e-02, 1.128e-01, -1.565e-02, 6.946e-02, -5.437e-02, -2.053e-02)); + r += mul(s4_4, M4(2.776e-01, -2.267e-01, 2.452e-01, 3.041e-01, 1.735e-03, -6.368e-02, -2.212e-02, 1.169e-01, -6.648e-02, 4.323e-01, -1.261e-01, 2.998e-02, -1.491e-01, -6.740e-02, 4.344e-02, -2.343e-01)); + r += mul(s4_5, M4(-1.965e-02, 4.745e-02, 3.125e-02, 1.700e-02, -4.554e-02, 2.626e-01, 1.867e-02, 6.873e-02, -5.527e-03, 4.543e-02, 1.086e-01, -2.145e-03, 8.956e-03, -2.003e-02, -4.228e-02, -8.769e-02)); + r += mul(s4_6, M4(-3.365e-02, 1.210e-01, -6.053e-02, -1.365e-01, -7.537e-02, 4.398e-02, -2.364e-02, -5.594e-02, -1.008e-01, -1.719e-01, 1.680e-01, 9.306e-02, 2.551e-02, 9.705e-02, -1.017e-01, 1.386e-01)); + r += mul(s4_7, M4(-2.247e-01, -5.840e-02, -1.633e-01, 1.787e-01, -1.938e-02, 6.637e-02, -9.436e-02, -5.768e-02, -2.798e-02, 7.276e-03, 1.510e-01, 2.070e-02, 8.682e-03, -8.152e-02, 1.585e-01, -1.970e-01)); + r += mul(s4_8, M4(2.726e-02, 1.013e-01, -9.009e-02, 2.377e-02, 4.192e-02, -5.158e-03, 1.333e-01, 5.335e-02, 5.155e-02, 7.597e-03, 1.612e-01, 4.137e-02, 2.126e-02, 2.504e-02, 4.211e-02, 2.331e-02)); + r += mul(s5_0, M4(7.914e-03, 5.196e-02, 1.528e-01, -1.733e-01, -4.303e-02, -8.394e-02, -6.856e-02, -1.815e-03, 1.145e-01, -6.718e-02, 2.572e-01, -7.499e-02, 5.476e-02, 5.707e-02, -3.430e-02, -2.263e-02)); + r += mul(s5_1, M4(2.027e-01, -9.534e-02, -4.479e-02, -2.295e-01, -1.571e-04, 1.039e-01, -1.418e-02, -9.583e-03, 1.155e-01, -1.823e-01, 7.095e-02, -8.661e-02, -8.197e-02, -5.617e-02, -2.881e-01, 1.652e-02)); + r += mul(s5_2, M4(7.570e-02, -5.338e-03, -1.236e-02, -8.058e-02, 1.990e-02, -5.564e-02, -1.548e-01, 9.153e-02, -4.377e-02, 3.944e-02, -3.167e-03, -5.789e-03, 2.200e-02, -7.143e-02, -6.064e-02, 7.612e-02)); + r += mul(s5_3, M4(2.007e-01, -1.894e-01, 1.677e-02, -1.159e-01, 3.995e-02, -1.170e-01, 2.050e-01, 1.138e-02, 1.128e-02, 3.693e-02, 2.784e-01, -2.002e-01, -4.578e-04, -1.568e-01, 3.455e-02, 1.001e-02)); + r += mul(s5_4, M4(-1.635e-01, -1.109e-02, 6.541e-02, -3.544e-01, 7.378e-02, 1.112e-01, -5.674e-02, 1.863e-01, 7.911e-01, -2.734e-01, -1.205e-01, 1.684e-01, -1.814e-01, 1.269e-01, 1.220e-01, 7.336e-03)); + r += mul(s5_5, M4(-4.375e-02, 8.044e-02, 1.655e-01, -2.669e-01, 7.938e-02, 8.039e-02, -2.610e-02, 1.606e-01, 4.477e-02, -1.701e-03, 7.304e-03, -4.718e-02, 2.108e-02, 2.270e-01, 2.392e-01, 4.445e-02)); + r += mul(s5_6, M4(4.577e-02, -6.396e-03, -9.791e-02, -1.781e-01, -2.233e-02, 1.123e-01, 6.753e-02, -1.246e-01, -6.603e-03, 9.639e-03, 3.877e-02, -3.097e-02, -3.699e-02, -8.160e-02, -1.579e-01, -4.564e-02)); + r += mul(s5_7, M4(-1.952e-01, 3.921e-04, 2.565e-01, -4.736e-01, -9.141e-02, 3.384e-02, -3.370e-01, -1.511e-02, 1.161e-01, -3.249e-02, -2.568e-01, -2.537e-02, -1.069e-01, -3.597e-02, -4.000e-02, 2.052e-01)); + r += mul(s5_8, M4(2.847e-02, -7.864e-02, -1.253e-02, -2.447e-01, -9.473e-02, 1.263e-01, -6.200e-02, 3.020e-02, 2.729e-02, 3.150e-02, 4.010e-02, -2.741e-02, 8.655e-02, 1.347e-02, 1.495e-01, -6.498e-04)); + r += mul(s6_0, M4(7.169e-02, -6.749e-02, 9.479e-02, 3.264e-03, 1.453e-02, -5.768e-02, -7.251e-02, -1.236e-01, 3.857e-03, -2.619e-02, -2.213e-02, -3.012e-03, -6.991e-02, 3.919e-02, -8.657e-03, -3.474e-02)); + r += mul(s6_1, M4(3.885e-02, 2.170e-02, 6.984e-02, 6.616e-02, 7.536e-02, 1.188e-01, -1.020e-01, 1.248e-02, -6.022e-02, 6.860e-02, 2.724e-02, -1.921e-02, -7.704e-02, 2.208e-02, 1.482e-01, 1.781e-01)); + r += mul(s6_2, M4(-5.202e-02, -7.019e-02, 2.054e-02, -5.957e-02, 2.252e-02, -9.743e-02, 6.670e-02, 4.199e-02, 3.750e-02, -1.800e-02, 2.483e-02, 8.751e-02, 6.627e-02, 1.258e-01, -4.092e-02, 4.300e-02)); + r += mul(s6_3, M4(2.245e-02, -1.616e-02, 3.853e-02, -1.251e-01, 1.342e-01, -2.051e-01, 3.328e-01, -2.076e-02, -1.401e-01, -3.579e-02, -1.392e-01, -1.270e-01, 1.674e-02, 3.339e-02, 2.546e-01, 1.754e-01)); + r += mul(s6_4, M4(-1.724e-01, 2.212e-01, 1.627e-01, -1.674e-01, 9.932e-03, 1.412e-01, 2.723e-01, 7.141e-02, -7.426e-01, 1.770e-01, 2.432e-01, -6.203e-02, 2.387e-01, -9.877e-02, -4.044e-02, 3.033e-01)); + r += mul(s6_5, M4(1.734e-01, -8.957e-03, 1.074e-01, 1.615e-02, 2.509e-03, -3.180e-02, 1.738e-02, 1.614e-03, -2.015e-01, -5.618e-03, -1.549e-02, -6.552e-02, -1.216e-01, -9.633e-02, -1.392e-01, 6.371e-03)); + r += mul(s6_6, M4(9.966e-03, -5.015e-02, 1.106e-03, 2.051e-02, -3.092e-02, 8.487e-02, -8.627e-02, 3.511e-02, -4.166e-02, -4.374e-02, -8.720e-02, -2.525e-02, 5.230e-02, -3.937e-02, -3.387e-02, 3.305e-02)); + r += mul(s6_7, M4(7.256e-02, 8.201e-02, 1.425e-01, 3.578e-02, 4.288e-02, 2.544e-01, -7.146e-02, 1.259e-01, -5.159e-02, 5.622e-02, 8.943e-02, -1.483e-02, -1.130e-01, 1.102e-02, 5.953e-02, 2.999e-02)); + r += mul(s6_8, M4(-1.649e-02, -6.988e-02, 1.654e-01, 8.961e-02, -5.890e-02, -7.814e-02, -3.838e-02, 5.917e-02, -2.741e-03, 1.389e-01, -7.250e-02, 2.338e-02, -3.435e-02, -1.182e-01, -1.089e-02, -8.475e-02)); + r += mul(s7_0, M4(1.302e-01, -2.233e-02, -1.353e-01, -9.712e-02, -8.077e-02, -1.417e-01, -6.761e-02, -1.097e-01, -1.623e-02, 2.213e-02, -2.955e-03, 2.299e-02, -2.731e-02, -6.499e-02, 8.680e-02, -2.647e-01)); + r += mul(s7_1, M4(1.919e-01, 2.093e-01, -3.153e-01, -1.151e-01, 3.468e-02, 7.060e-02, -2.447e-01, 7.122e-02, 8.064e-03, 8.447e-02, 9.937e-02, -6.254e-02, -1.451e-01, -4.861e-02, 2.636e-01, 9.170e-02)); + r += mul(s7_2, M4(1.963e-02, -4.898e-02, -5.656e-01, -1.833e-02, 1.456e-02, -2.585e-02, 3.618e-02, -1.040e-01, 8.570e-03, -4.964e-03, -2.392e-02, 5.105e-02, 9.810e-02, 1.419e-01, -1.011e-02, -6.824e-02)); + r += mul(s7_3, M4(-2.838e-02, -3.018e-01, -1.827e-01, -2.484e-02, -2.072e-01, 1.908e-01, -1.321e-01, 2.737e-02, 3.042e-02, -1.350e-01, 5.893e-02, -1.916e-02, 1.234e-01, 1.333e-01, -1.184e-01, 1.806e-01)); + r += mul(s7_4, M4(1.446e-01, 5.919e-01, -2.780e-01, -9.927e-02, -8.727e-02, 2.295e-01, 8.388e-02, 5.303e-02, 2.302e-01, -2.994e-01, 1.563e-01, -9.238e-02, -2.779e-01, 7.911e-01, -9.902e-01, 1.192e+00)); + r += mul(s7_5, M4(-1.373e-02, -1.054e-02, -3.860e-01, -7.999e-02, 1.305e-01, -4.235e-02, -1.591e-01, 7.167e-02, -7.756e-02, -1.028e-02, -8.130e-02, -7.158e-02, -1.240e-02, -9.955e-02, -2.537e-01, 1.257e-02)); + r += mul(s7_6, M4(2.098e-02, -1.965e-01, -2.843e-04, 9.224e-02, -4.041e-02, -6.028e-02, -1.702e-02, 1.063e-01, 5.851e-02, -6.682e-03, 1.789e-01, -9.654e-02, 8.077e-02, -2.780e-01, -2.409e-01, -2.038e-01)); + r += mul(s7_7, M4(9.059e-02, 2.376e-02, -2.666e-01, 1.010e-01, -2.099e-02, 1.413e-01, -1.796e-01, 1.615e-01, 3.644e-01, -6.318e-01, -2.343e-01, -2.385e-02, 2.060e-01, -1.687e-01, -8.451e-03, -9.400e-02)); + r += mul(s7_8, M4(1.305e-02, 2.460e-02, -7.354e-02, 8.471e-02, -9.206e-02, -3.224e-02, -4.200e-02, 1.797e-02, 3.352e-02, -4.818e-02, 6.038e-02, 7.224e-03, 2.233e-02, 8.301e-02, -1.024e-01, -1.395e-01)); + r += V4(5.086e-03, 1.201e-02, -1.446e-02, -1.059e-01); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.149e-01, 8.911e-02, 2.520e-01, -2.292e-03, 5.639e-02, -4.318e-02, 1.845e-02, -7.860e-02, 6.697e-03, -1.714e-01, -3.535e-02, -2.981e-01, 5.744e-02, 8.686e-02, 1.889e-02, 2.883e-02)); + r += mul(s0_1, M4(-4.305e-02, 1.415e-01, 5.839e-03, -1.340e-01, 4.330e-02, -7.875e-02, -1.012e-01, 3.681e-02, -4.121e-01, -4.761e-03, -1.401e-01, -1.585e-01, -2.174e-01, 2.217e-02, 2.682e-01, 6.936e-02)); + r += mul(s0_2, M4(6.654e-02, 3.023e-02, -1.800e-01, 1.687e-01, 2.321e-02, 7.973e-03, -2.736e-02, -1.150e-01, -5.164e-02, 2.459e-01, 8.378e-02, 7.680e-02, 1.271e-01, -4.691e-02, 1.622e-02, -2.721e-02)); + r += mul(s0_3, M4(5.899e-02, -9.233e-02, -1.391e-01, -7.216e-02, -7.611e-02, 8.228e-02, 2.076e-01, 3.763e-02, -5.480e-01, 8.615e-02, 3.822e-01, -2.952e-02, -2.619e-01, -5.971e-02, -9.140e-02, 2.824e-01)); + r += mul(s0_4, M4(-1.667e-01, -2.103e-01, -1.571e-01, -5.567e-02, 1.152e-02, 5.381e-03, -7.040e-02, -3.720e-02, -5.531e-01, 5.504e-02, -5.912e-02, -8.021e-02, 1.411e-01, 8.420e-02, 2.038e-01, 1.034e-01)); + r += mul(s0_5, M4(2.704e-01, 8.585e-02, -5.662e-02, 4.992e-02, -6.336e-02, 3.235e-01, 2.407e-01, -1.285e-02, -1.531e-01, 2.347e-01, 4.188e-02, -7.345e-03, 6.674e-02, 3.855e-03, 7.364e-02, -1.997e-01)); + r += mul(s0_6, M4(-1.012e-01, -5.227e-03, 1.766e-01, -9.700e-02, 5.514e-02, -4.035e-02, 7.468e-02, -4.129e-02, -5.134e-01, -1.258e-01, 4.091e-01, 5.850e-02, -8.517e-03, -2.508e-01, -3.440e-01, -8.356e-02)); + r += mul(s0_7, M4(-2.280e-02, 6.713e-02, 9.325e-02, 4.812e-02, 2.527e-01, -3.502e-02, 1.417e-01, -2.768e-01, -2.534e-01, 3.620e-01, -1.574e-01, 2.076e-02, 2.430e-01, -2.213e-01, 1.102e-01, -1.316e-01)); + r += mul(s0_8, M4(1.387e-01, -5.009e-03, -5.013e-02, -3.373e-02, -1.025e-01, -1.323e-01, 6.666e-02, -8.504e-02, 1.629e-01, -1.330e-01, -1.634e-01, -2.773e-01, 7.168e-02, 4.939e-03, -6.583e-02, -1.095e-01)); + r += mul(s1_0, M4(1.867e-01, 1.141e-01, 1.059e-01, 5.602e-03, 3.579e-03, 2.358e-02, -1.528e-01, 2.542e-02, 1.274e-01, -4.719e-02, 5.717e-02, -7.865e-02, 3.376e-02, -1.231e-01, -6.044e-02, 3.892e-02)); + r += mul(s1_1, M4(9.617e-02, -7.401e-02, -5.730e-02, -3.455e-01, 6.837e-02, 2.837e-02, -1.304e-01, -8.394e-02, 1.283e-01, -6.171e-02, 3.258e-02, 1.780e-02, -1.206e-01, -1.125e-01, 8.222e-04, -3.074e-02)); + r += mul(s1_2, M4(-1.065e-01, -1.395e-01, -2.066e-01, 1.635e-02, -2.101e-01, -5.436e-03, 8.472e-02, 4.570e-02, 5.396e-02, 1.469e-02, 1.089e-03, -1.631e-02, -3.540e-02, -1.250e-01, -1.126e-01, -1.950e-02)); + r += mul(s1_3, M4(2.203e-01, -2.307e-03, 5.877e-02, -1.447e-02, -1.733e-01, 5.954e-02, -6.173e-03, -1.295e-02, 1.613e-01, 1.474e-01, 1.018e-01, -3.808e-03, -4.718e-02, -6.116e-02, -3.219e-02, 8.776e-04)); + r += mul(s1_4, M4(-9.098e-02, -2.021e-01, 1.750e-01, 3.967e-02, -2.998e-01, 1.723e-01, -3.029e-01, -3.594e-02, 2.998e-02, -8.915e-03, 2.087e-01, -8.583e-02, -9.560e-02, -7.081e-02, 9.754e-02, 1.962e-01)); + r += mul(s1_5, M4(-8.326e-02, 6.852e-03, -1.690e-01, 9.679e-02, -1.238e-01, -4.300e-02, -3.576e-02, 2.092e-01, -1.946e-01, 8.110e-03, 6.578e-02, -6.703e-03, 1.955e-01, 1.397e-01, 1.084e-01, -7.745e-02)); + r += mul(s1_6, M4(2.939e-01, 1.634e-01, -1.056e-01, 6.142e-02, -2.635e-02, -2.484e-02, -3.410e-01, 1.010e-01, 2.867e-02, -2.709e-02, -3.079e-03, 1.117e-01, -2.867e-02, 1.411e-01, -1.167e-01, 6.789e-02)); + r += mul(s1_7, M4(-3.754e-02, 1.528e-01, -4.843e-02, 1.548e-01, -8.606e-02, 1.717e-01, -3.897e-01, 1.396e-01, 9.584e-03, 1.372e-01, -1.273e-01, 2.555e-02, 3.264e-02, 1.434e-01, -2.085e-02, 1.285e-01)); + r += mul(s1_8, M4(-2.823e-01, 4.009e-02, -8.112e-03, -7.967e-02, 5.913e-02, -3.484e-02, -1.382e-01, 1.537e-01, -3.762e-02, -5.813e-02, 5.067e-02, 4.077e-02, -7.708e-02, 1.750e-01, -8.353e-02, -6.431e-02)); + r += mul(s2_0, M4(-4.761e-02, -3.946e-02, -1.049e-01, 1.499e-02, 6.860e-02, 8.789e-02, 4.937e-02, 1.417e-02, 1.655e-02, -1.264e-01, -4.095e-02, -5.697e-02, 2.133e-02, 4.595e-02, -8.380e-02, -3.804e-02)); + r += mul(s2_1, M4(-7.046e-04, 1.721e-01, 2.778e-01, -8.966e-02, -5.113e-02, 2.596e-02, 1.490e-01, -9.792e-02, -7.188e-02, -2.215e-02, -6.325e-02, -6.428e-02, 5.408e-02, -2.526e-01, 2.887e-01, -3.238e-02)); + r += mul(s2_2, M4(2.108e-02, -9.977e-02, -5.268e-02, 9.567e-02, -4.931e-03, 7.264e-03, -4.562e-02, -1.025e-02, 6.414e-02, -3.967e-02, 7.990e-03, 3.237e-03, -6.207e-02, -1.327e-02, -3.273e-01, -4.403e-02)); + r += mul(s2_3, M4(4.363e-02, 7.571e-02, 1.539e-02, 1.079e-02, 1.036e-02, -3.000e-02, -1.272e-01, -3.467e-02, 4.469e-02, 1.152e-01, 2.394e-01, -9.794e-02, -6.073e-02, 1.029e-01, 5.621e-02, -1.406e-01)); + r += mul(s2_4, M4(-3.829e-01, -8.430e-02, 3.327e-01, -4.212e-02, 5.647e-02, 5.437e-02, 6.288e-02, 1.051e-01, 6.112e-02, 1.456e-02, 6.431e-02, -7.205e-02, -2.087e-01, 4.363e-01, 1.342e-01, -2.842e-01)); + r += mul(s2_5, M4(2.446e-01, 3.877e-02, 1.068e-01, 3.048e-02, 5.673e-02, 5.352e-02, 1.736e-01, 1.529e-02, -7.917e-03, -8.124e-03, -8.913e-02, -3.513e-02, 4.064e-02, 2.757e-01, -2.200e-01, -7.200e-02)); + r += mul(s2_6, M4(1.532e-01, -6.366e-02, -3.245e-01, 1.011e-01, 7.095e-02, -5.889e-02, -7.494e-02, 2.072e-02, -2.135e-02, 2.221e-03, -5.211e-03, -5.587e-02, -4.618e-02, -3.320e-02, 2.060e-01, -1.612e-01)); + r += mul(s2_7, M4(1.381e-01, -1.927e-01, -2.183e-01, 3.981e-02, 1.200e-02, 2.706e-02, -1.832e-01, 3.230e-02, -1.780e-01, 2.596e-02, -2.166e-03, -5.568e-04, 5.992e-02, -7.717e-02, 4.973e-01, -1.275e-01)); + r += mul(s2_8, M4(-1.990e-02, 8.939e-02, 2.998e-02, -3.216e-02, -8.370e-02, -2.356e-02, -9.606e-02, -6.189e-02, -1.653e-02, 3.231e-02, -5.651e-02, -3.294e-02, -1.618e-02, 1.377e-01, 1.816e-01, -9.103e-02)); + r += mul(s3_0, M4(-2.448e-02, 1.816e-02, -6.946e-02, -5.889e-02, 3.135e-02, 3.722e-04, -3.753e-02, -5.770e-02, 1.193e-01, -8.895e-02, -2.573e-01, 1.528e-02, -5.268e-04, 6.228e-02, -4.613e-02, 8.812e-02)); + r += mul(s3_1, M4(1.488e-01, -1.580e-01, -1.913e-02, -1.196e-01, -8.225e-02, -1.431e-01, -6.842e-02, -4.574e-02, 3.027e-02, 1.272e-01, 9.118e-02, -5.961e-02, 3.162e-02, 2.013e-01, 2.842e-01, 1.782e-01)); + r += mul(s3_2, M4(-1.842e-01, -1.236e-01, 4.733e-03, 4.541e-02, -2.843e-01, 1.886e-02, -5.482e-02, 1.554e-02, -3.282e-04, 1.603e-01, 2.066e-01, 8.032e-02, -1.252e-01, 9.479e-02, 6.652e-02, -3.031e-02)); + r += mul(s3_3, M4(1.332e-01, 6.801e-02, 8.690e-02, -1.644e-01, -1.712e-01, 1.379e-03, 5.773e-02, 1.829e-01, -1.392e-01, -3.067e-02, -1.159e-01, -2.234e-01, 6.363e-02, 2.278e-02, -8.508e-02, 1.793e-01)); + r += mul(s3_4, M4(-1.939e-01, -2.012e-01, -2.093e-02, -2.581e-01, 6.079e-02, 2.150e-01, -1.328e-01, 2.465e-01, -1.639e-01, -1.367e-01, 5.548e-02, -6.148e-02, -8.833e-02, -2.174e-01, -1.782e-01, 6.176e-02)); + r += mul(s3_5, M4(1.715e-01, -4.351e-02, -5.803e-02, -3.304e-02, 2.949e-01, -8.398e-02, -6.903e-02, 3.449e-01, -3.465e-01, -6.238e-02, 6.393e-03, 5.881e-02, 8.145e-02, 3.321e-02, 3.255e-01, 9.732e-02)); + r += mul(s3_6, M4(-9.109e-02, 2.088e-02, -2.294e-02, -2.026e-02, 3.452e-02, -1.362e-01, -2.975e-01, 1.434e-01, 2.172e-02, 4.001e-02, 4.512e-02, 9.264e-02, -1.299e-03, -1.395e-02, 5.402e-02, 3.233e-02)); + r += mul(s3_7, M4(5.424e-02, 1.469e-03, 1.920e-01, 7.914e-02, 8.262e-02, 2.975e-01, 1.553e-01, 1.052e-01, 7.508e-02, -9.778e-02, 1.060e-02, 9.677e-02, 1.410e-01, -1.261e-01, -1.352e-01, 4.144e-02)); + r += mul(s3_8, M4(-7.174e-02, 1.715e-01, 5.468e-02, -6.194e-02, 3.492e-01, -1.378e-01, -1.953e-01, 1.411e-01, 1.519e-01, -8.186e-02, 6.761e-02, 2.683e-02, -5.830e-02, -4.595e-02, -5.331e-02, -8.498e-02)); + r += mul(s4_0, M4(-5.072e-02, -3.540e-03, 8.559e-02, 1.311e-01, -1.522e-02, -9.324e-02, -1.275e-01, 4.831e-02, -4.644e-02, -1.001e-01, -1.506e-01, -1.311e-01, 1.272e-02, -7.014e-02, 2.022e-02, 6.296e-02)); + r += mul(s4_1, M4(-2.854e-02, 1.655e-01, 8.404e-02, -5.780e-02, 3.009e-02, -6.629e-02, -5.704e-02, -6.469e-02, 5.247e-02, -1.604e-01, -9.047e-02, -2.845e-01, 9.670e-02, -1.796e-01, -1.380e-01, -3.574e-03)); + r += mul(s4_2, M4(-2.697e-03, -4.975e-03, -7.660e-03, 7.743e-02, 3.247e-02, -1.067e-01, -1.018e-01, -1.520e-01, -8.117e-02, -2.336e-02, -1.463e-02, 5.555e-02, 4.583e-03, 1.768e-01, 4.816e-02, -3.692e-02)); + r += mul(s4_3, M4(1.620e-01, 8.388e-02, -1.169e-01, -3.581e-02, -4.106e-02, -3.627e-02, 1.430e-01, 5.196e-02, -1.702e-01, -3.480e-02, 4.717e-01, -3.194e-02, -3.970e-03, 2.198e-02, -8.772e-02, 5.059e-02)); + r += mul(s4_4, M4(9.552e-02, 8.912e-02, 1.907e-01, 1.027e-01, -1.978e-02, 7.419e-02, 1.448e-02, 8.088e-02, -1.997e-02, -2.794e-02, -1.128e-01, 2.479e-01, 1.915e-01, 1.131e-01, 1.333e-01, 1.439e-01)); + r += mul(s4_5, M4(-2.261e-02, 1.215e-01, 2.526e-02, -1.030e-01, 2.554e-02, -5.495e-02, -2.359e-01, 7.267e-02, -6.545e-02, 5.010e-02, -1.986e-01, 1.409e-02, 3.694e-02, 2.374e-02, -1.637e-01, -1.430e-01)); + r += mul(s4_6, M4(-9.275e-03, -3.836e-02, -2.318e-01, -1.291e-02, 1.239e-02, 2.026e-02, -6.110e-02, 1.283e-01, -2.133e-02, 1.175e-01, -8.486e-02, 1.390e-01, -5.388e-02, 6.180e-03, 5.454e-02, -4.336e-02)); + r += mul(s4_7, M4(2.686e-03, -2.226e-01, -1.882e-01, -3.096e-02, -1.566e-01, 4.253e-02, 1.095e-01, 6.164e-02, 2.204e-01, 1.283e-01, 1.472e-03, 4.472e-03, -1.886e-02, -7.630e-02, -6.029e-03, 2.414e-02)); + r += mul(s4_8, M4(-1.006e-02, -1.550e-01, 1.131e-01, 8.689e-02, 1.551e-01, 4.256e-02, 1.496e-02, 1.045e-01, -5.234e-02, 6.156e-02, 8.616e-02, -1.477e-02, 7.000e-02, 4.368e-02, -6.712e-02, -8.073e-03)); + r += mul(s5_0, M4(-1.165e-02, 3.834e-02, 1.929e-01, 2.580e-01, 3.527e-02, -3.028e-02, 1.145e-01, -4.400e-02, -3.880e-02, 1.096e-01, 3.407e-02, 9.251e-02, -8.576e-03, -4.400e-02, -5.589e-02, -4.480e-02)); + r += mul(s5_1, M4(-5.521e-02, 1.958e-01, 9.908e-03, -3.048e-02, -1.328e-01, -1.558e-01, -1.928e-01, -3.044e-02, 9.158e-02, 2.361e-01, 1.509e-01, -1.519e-01, -1.864e-01, -2.131e-01, -1.853e-01, -9.660e-02)); + r += mul(s5_2, M4(8.105e-02, 5.143e-02, 8.310e-02, -3.027e-02, 2.522e-02, -3.523e-02, 6.113e-02, -4.142e-02, 5.358e-02, 2.218e-02, -4.899e-02, 6.927e-02, -1.362e-02, -4.119e-02, -4.594e-02, -8.481e-02)); + r += mul(s5_3, M4(1.161e-01, -2.525e-02, -3.580e-01, -1.583e-01, 8.566e-02, 1.360e-01, -2.126e-02, 1.256e-01, 8.959e-02, -2.142e-01, -4.432e-01, -2.091e-02, -1.128e-01, -6.440e-02, -9.157e-02, -7.419e-02)); + r += mul(s5_4, M4(1.432e-01, 9.207e-02, 1.216e-01, 1.868e-01, 4.710e-03, 6.143e-02, 2.036e-02, 2.773e-02, 3.342e-01, -3.121e-02, 1.280e-01, -4.769e-01, -1.556e-01, 1.409e-01, 7.212e-02, 2.450e-01)); + r += mul(s5_5, M4(-4.965e-02, -2.193e-01, 4.564e-02, -1.510e-02, -1.967e-01, 2.075e-01, -7.082e-02, -1.474e-01, -1.644e-01, -1.231e-02, 1.465e-03, -8.072e-02, 5.559e-02, -2.192e-02, -8.904e-02, 8.271e-02)); + r += mul(s5_6, M4(2.420e-02, -7.204e-02, -1.764e-01, -1.620e-01, 3.617e-02, -6.037e-02, -7.868e-02, -4.807e-02, -1.401e-01, -1.517e-02, -7.456e-03, -1.255e-01, -4.744e-02, 7.887e-02, 2.760e-01, -8.194e-02)); + r += mul(s5_7, M4(-1.462e-01, -5.964e-02, -1.076e-01, 1.317e-01, 1.432e-01, -1.444e-01, 1.604e-01, -1.333e-01, 9.255e-02, -3.494e-01, -1.301e-01, -1.314e-01, -1.797e-01, -9.114e-02, 2.844e-01, 8.951e-02)); + r += mul(s5_8, M4(9.544e-02, -2.597e-01, -5.053e-02, 7.503e-02, -3.005e-02, -1.006e-01, -2.448e-02, -5.392e-03, -3.139e-03, 1.526e-02, 6.304e-02, -3.833e-02, 3.146e-01, 2.355e-01, 1.633e-01, 3.961e-02)); + r += mul(s6_0, M4(4.636e-02, 4.262e-02, 3.864e-02, 9.925e-03, 1.837e-01, -1.999e-02, -6.969e-02, 1.142e-01, 2.331e-03, 1.973e-02, -1.066e-02, 3.883e-03, 8.835e-02, 4.475e-02, 7.398e-02, 1.614e-01)); + r += mul(s6_1, M4(-5.366e-03, 9.651e-03, -1.386e-01, 7.966e-02, -5.665e-02, 2.822e-01, 7.414e-02, 9.816e-02, 9.970e-02, -1.057e-01, -1.293e-01, -2.524e-02, -1.975e-01, 8.194e-04, 7.563e-02, 7.114e-02)); + r += mul(s6_2, M4(-9.110e-02, 3.338e-02, 5.020e-02, 3.362e-02, -5.810e-02, 3.438e-02, -3.802e-02, 4.279e-02, 3.974e-02, -9.508e-02, -2.217e-02, -2.416e-02, 4.781e-02, 5.796e-02, -2.198e-02, -7.349e-02)); + r += mul(s6_3, M4(3.351e-02, -1.827e-02, 4.784e-02, 6.976e-03, -1.206e-01, -1.366e-01, -1.192e-01, 6.868e-02, 1.499e-01, 1.403e-02, 3.193e-01, -9.053e-02, 2.399e-02, 5.364e-02, -1.321e-01, 5.013e-02)); + r += mul(s6_4, M4(-1.453e-02, -1.141e-01, -7.246e-02, 1.635e-01, 1.065e-01, 2.452e-02, -3.737e-02, -1.136e-01, -1.491e-01, -3.948e-01, 8.181e-02, 1.890e-02, 2.061e-01, 4.008e-02, 8.056e-02, 7.170e-02)); + r += mul(s6_5, M4(1.725e-01, -6.943e-02, -1.250e-01, -2.471e-02, -5.219e-02, -1.457e-02, 7.348e-02, 1.527e-02, 1.720e-01, -2.917e-01, 8.404e-02, 1.338e-02, 1.483e-02, -7.198e-03, 1.363e-01, 5.711e-02)); + r += mul(s6_6, M4(-1.382e-02, 3.408e-02, 2.277e-02, -6.609e-03, -1.179e-02, -1.054e-01, 1.119e-01, 9.839e-02, 2.587e-02, 1.137e-01, 1.036e-01, -1.115e-02, 4.407e-02, -4.043e-02, -3.306e-01, -6.428e-02)); + r += mul(s6_7, M4(-9.106e-03, 1.359e-01, 1.737e-01, 1.440e-01, 1.975e-01, -1.186e-01, -2.003e-01, 1.971e-02, -1.398e-01, 1.812e-01, 3.037e-01, 1.114e-01, 1.335e-03, -5.942e-02, -1.107e-01, 5.152e-02)); + r += mul(s6_8, M4(1.070e-02, 1.372e-01, 2.074e-02, 1.492e-02, -1.984e-02, -3.483e-02, 1.191e-02, 2.197e-02, 2.504e-01, 7.123e-02, 5.291e-02, 1.117e-01, 4.101e-02, -6.315e-04, -6.324e-03, 7.728e-02)); + r += mul(s7_0, M4(-3.974e-02, 2.985e-02, -1.092e-02, 1.240e-01, 1.882e-01, -3.639e-02, 1.109e-01, -1.198e-01, -1.671e-02, 1.336e-02, -5.847e-04, 1.193e-03, 2.677e-01, -5.223e-03, -4.448e-02, 1.367e-01)); + r += mul(s7_1, M4(-3.101e-01, 2.589e-01, 3.798e-01, -1.355e-01, 8.195e-02, 5.822e-02, -7.400e-02, -1.067e-01, -7.814e-02, 1.383e-01, 5.114e-02, 1.185e-01, 1.133e-01, -2.103e-01, -2.439e-01, -1.480e-01)); + r += mul(s7_2, M4(-1.113e-01, 3.120e-01, 1.693e-01, -5.996e-01, 2.809e-02, -4.451e-02, 7.395e-02, -6.342e-02, -3.381e-02, 4.870e-02, -1.399e-03, -6.268e-02, 1.393e-01, 6.764e-02, 3.476e-01, -1.854e-01)); + r += mul(s7_3, M4(4.503e-02, 1.243e-01, -1.130e-01, 2.658e-02, 1.479e-01, 4.798e-02, 1.252e-01, -1.806e-02, -7.891e-02, 1.534e-02, -3.566e-03, 2.627e-02, 5.887e-02, -1.256e-01, -5.538e-01, -2.069e-01)); + r += mul(s7_4, M4(-2.137e-01, -1.263e-01, -6.078e-02, -1.987e-01, -1.223e-01, -1.493e-01, 8.156e-02, 8.118e-02, -1.225e-01, 3.500e-02, 1.243e-01, 5.459e-03, -1.236e-01, 6.559e-01, 2.614e-01, -1.698e-01)); + r += mul(s7_5, M4(-3.382e-01, 5.502e-02, 1.169e-01, -3.525e-01, 7.489e-03, -2.617e-02, -8.019e-02, -1.182e-01, -3.470e-02, 2.337e-02, 1.442e-02, -5.355e-02, 3.594e-01, 3.674e-01, 1.463e-01, 3.906e-02)); + r += mul(s7_6, M4(-1.151e-02, -2.260e-02, -3.088e-02, -6.555e-02, 7.364e-04, -3.801e-02, 6.501e-03, 3.616e-02, 6.097e-02, -6.920e-02, -7.986e-02, -6.582e-02, 2.156e-02, -1.616e-01, -4.479e-01, -1.592e-01)); + r += mul(s7_7, M4(-6.885e-02, -1.099e-02, -9.592e-02, -9.008e-02, 5.067e-02, -7.398e-02, -5.587e-02, -5.412e-02, 1.803e-01, -7.075e-02, -5.241e-01, -1.093e+00, 1.650e-02, 9.611e-02, 2.075e-01, -4.235e-02)); + r += mul(s7_8, M4(1.153e-01, 1.718e-02, 8.889e-02, -8.588e-02, -2.290e-01, 1.386e-02, 6.186e-03, 4.417e-02, -2.398e-01, -8.062e-02, -8.487e-02, -2.124e-01, -4.520e-02, 1.713e-01, -1.170e-01, 1.027e-01)); + r += V4(-2.069e-02, -2.987e-03, 1.740e-02, -2.946e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.484e-02, 1.363e-01, 1.128e-02, 2.731e-02, 2.148e-01, 2.105e-02, 5.216e-02, 5.292e-02, -1.420e-01, -1.113e-01, 7.175e-02, 2.105e-01, -1.338e-01, -1.191e-01, -5.884e-02, -1.030e-01)); + r += mul(s0_1, M4(-1.851e-01, -1.903e-02, 8.249e-02, 2.198e-02, -2.887e-02, -1.088e-01, -4.141e-03, 6.071e-02, -5.344e-01, 5.386e-02, 2.128e-01, 3.875e-01, 2.583e-01, -2.893e-02, 6.592e-02, -7.484e-03)); + r += mul(s0_2, M4(1.078e-01, 1.461e-01, -1.388e-02, 1.416e-01, 9.463e-02, -7.817e-02, 2.688e-02, 3.827e-02, 1.508e-01, 1.824e-01, -4.168e-02, 2.600e-01, 4.083e-02, -2.807e-02, -1.836e-02, -8.304e-03)); + r += mul(s0_3, M4(-1.126e-01, -1.466e-01, -3.001e-02, 1.208e-01, 4.733e-02, -1.028e-01, 5.886e-03, -5.126e-03, -1.510e-01, 3.632e-01, -2.124e-01, 2.895e-01, -1.412e-01, 9.587e-02, -7.396e-02, 8.584e-02)); + r += mul(s0_4, M4(-7.479e-02, -7.598e-03, 1.048e-01, -1.165e-02, -2.234e-01, -1.819e-01, -6.609e-05, 2.110e-01, -2.571e-02, 2.342e-02, -1.309e-01, -7.539e-03, -1.236e-01, 1.297e-01, -1.038e-01, 1.552e-01)); + r += mul(s0_5, M4(6.023e-02, 8.846e-02, -1.023e-01, -1.589e-02, -9.358e-03, -3.013e-02, -4.546e-02, 7.737e-02, -3.049e-02, 9.323e-02, -1.501e-01, 1.910e-01, 2.322e-01, -1.065e-01, 4.293e-02, -7.179e-03)); + r += mul(s0_6, M4(1.627e-02, -1.165e-02, 6.617e-02, -1.753e-01, -5.665e-02, -9.799e-02, -9.912e-03, 2.379e-01, 1.711e-01, 1.360e-01, 1.661e-01, 3.947e-01, 6.462e-03, -1.389e-01, 1.025e-01, 1.542e-01)); + r += mul(s0_7, M4(1.922e-01, -1.436e-02, 5.552e-02, -7.602e-04, -1.403e-01, -6.904e-02, 1.033e-01, -1.636e-01, 3.080e-01, -1.015e-04, 4.191e-02, 4.467e-01, -2.803e-02, 9.106e-02, 1.607e-01, 1.954e-02)); + r += mul(s0_8, M4(-3.284e-02, -6.743e-02, 8.716e-03, 2.141e-02, -9.278e-03, -1.025e-01, -4.230e-03, -3.253e-02, -2.380e-01, -1.509e-01, 3.078e-01, -1.802e-02, 1.304e-01, 8.580e-02, -6.820e-04, -1.603e-02)); + r += mul(s1_0, M4(9.042e-02, -1.366e-01, 1.458e-01, 1.153e-04, -2.110e-02, -9.788e-02, 5.997e-03, 7.205e-02, 1.532e-01, 1.358e-01, 7.913e-03, -3.246e-03, 4.556e-03, 5.523e-02, -7.687e-02, -9.071e-02)); + r += mul(s1_1, M4(-9.420e-03, -9.019e-02, -8.659e-02, -8.480e-02, -3.355e-01, 3.096e-01, -7.870e-02, -1.325e-01, 1.520e-01, -5.675e-02, -4.352e-04, -4.262e-02, 9.278e-02, 1.046e-01, 1.015e-02, -6.058e-03)); + r += mul(s1_2, M4(-1.024e-01, -4.386e-02, 4.412e-02, -2.980e-02, 1.547e-01, 2.227e-01, -1.597e-01, -8.375e-02, 6.362e-02, -5.846e-02, 3.936e-02, -4.481e-02, 9.786e-02, -3.618e-02, 1.203e-01, -1.681e-02)); + r += mul(s1_3, M4(-8.245e-02, 3.859e-02, -9.779e-02, 4.944e-02, -1.470e-01, 3.137e-01, -2.276e-02, -1.920e-01, -1.117e-01, -1.752e-01, 7.393e-02, -1.024e-01, 4.972e-03, -6.864e-02, 1.742e-02, -8.623e-03)); + r += mul(s1_4, M4(9.116e-02, 3.166e-01, 9.215e-03, -2.985e-01, -4.718e-01, 3.885e-01, -1.375e-01, -3.841e-01, -2.298e-01, 3.961e-02, -1.559e-01, 3.777e-03, 3.737e-01, -4.798e-02, 1.972e-01, -9.030e-02)); + r += mul(s1_5, M4(1.063e-01, 5.654e-02, 9.905e-02, -4.074e-02, 1.597e-01, 2.864e-01, -1.859e-01, 3.723e-02, 1.923e-02, -6.971e-02, 5.062e-02, -6.205e-03, 2.413e-01, -4.901e-02, -9.816e-02, -1.688e-01)); + r += mul(s1_6, M4(-5.862e-02, -2.331e-01, 1.486e-02, -5.033e-02, 8.738e-02, 3.527e-01, -1.055e-01, 8.905e-02, -8.721e-02, 3.613e-02, 9.311e-02, -8.565e-02, 1.348e-01, 9.023e-02, -5.384e-02, 2.078e-02)); + r += mul(s1_7, M4(-7.510e-02, -2.392e-01, -4.755e-01, 8.797e-02, 1.964e-02, 4.879e-01, -1.827e-01, -2.588e-01, -3.040e-02, -1.410e-01, -5.313e-02, 1.515e-02, -1.777e-01, 3.547e-02, -1.833e-01, -4.035e-02)); + r += mul(s1_8, M4(-3.043e-02, -6.401e-02, 1.010e-02, -1.899e-01, 2.236e-02, 1.968e-01, -8.204e-02, -6.947e-03, 3.201e-02, 1.249e-03, 1.314e-02, -6.879e-02, -2.536e-02, -9.149e-03, -6.333e-02, 3.822e-02)); + r += mul(s2_0, M4(5.347e-02, 7.325e-03, -4.429e-02, -4.327e-02, -1.392e-01, -3.509e-02, -5.283e-02, 1.858e-02, 7.683e-02, 8.472e-02, -1.160e-01, 3.134e-02, -9.732e-02, -3.585e-01, 4.247e-02, 2.477e-02)); + r += mul(s2_1, M4(-1.586e-01, -1.442e-01, 1.286e-01, -7.904e-02, 2.603e-02, 1.464e-01, 7.698e-02, -1.023e-01, -1.926e-01, -8.076e-02, 6.816e-02, -2.236e-03, 3.057e-01, 1.784e-01, -8.541e-02, 1.268e-01)); + r += mul(s2_2, M4(2.412e-01, 1.712e-01, 1.838e-02, 9.514e-02, -7.224e-02, -2.570e-02, -3.206e-02, 7.936e-02, 2.439e-02, -1.086e-01, 2.443e-02, -5.714e-02, 8.040e-02, 1.994e-02, 7.100e-02, -1.015e-01)); + r += mul(s2_3, M4(9.619e-02, 2.899e-02, -6.595e-02, 2.943e-01, -6.384e-02, -1.141e-02, 2.826e-02, 9.702e-02, 5.752e-02, -8.619e-02, -4.905e-02, -7.927e-02, -1.452e-02, 2.305e-01, -9.046e-02, -6.537e-04)); + r += mul(s2_4, M4(-2.045e-01, -2.654e-02, 2.702e-01, -9.460e-02, 2.665e-02, -6.689e-02, -1.427e-01, 1.653e-01, 1.390e-01, -1.241e-01, 1.771e-01, -6.290e-02, -2.333e-01, -2.707e-01, 9.625e-03, -5.905e-02)); + r += mul(s2_5, M4(8.847e-02, -7.852e-02, -1.493e-01, -9.243e-02, -2.261e-01, 7.904e-03, -7.501e-02, -2.753e-02, -6.426e-02, -9.592e-02, 5.333e-02, 2.715e-02, 3.946e-01, -4.773e-02, 5.602e-02, 8.862e-02)); + r += mul(s2_6, M4(-1.205e-01, -6.520e-02, -5.556e-02, 3.410e-02, -6.862e-02, 5.047e-02, 9.071e-02, -1.081e-01, 3.530e-02, 1.150e-02, -1.697e-02, -4.909e-02, 2.646e-02, -1.374e-01, 4.384e-02, -5.705e-02)); + r += mul(s2_7, M4(-1.264e-01, 1.952e-02, -1.369e-01, 1.110e-01, 5.948e-02, -1.002e-01, -3.700e-01, 2.251e-01, 1.225e-01, 7.785e-02, 1.945e-02, 3.239e-02, 2.355e-02, 2.343e-02, 2.768e-01, 4.173e-03)); + r += mul(s2_8, M4(9.095e-02, -9.903e-02, -7.958e-04, 8.852e-02, -1.572e-01, -8.326e-03, 7.338e-02, 9.836e-02, -6.421e-02, 6.471e-02, -2.983e-02, -6.093e-03, 1.107e-01, -9.398e-02, -6.380e-02, 4.922e-02)); + r += mul(s3_0, M4(-4.309e-02, -1.197e-01, 4.255e-02, -1.473e-02, -1.998e-01, -6.516e-02, -8.865e-02, -1.683e-01, -3.871e-02, 1.745e-01, -1.008e-01, 5.839e-02, 6.387e-02, 4.789e-02, 2.588e-02, 8.918e-02)); + r += mul(s3_1, M4(1.241e-01, -3.877e-01, 2.505e-02, 9.344e-03, -8.102e-02, 4.774e-01, 1.097e-01, -1.704e-01, -2.908e-01, -2.172e-01, 5.179e-02, 7.486e-02, -3.017e-02, -5.884e-02, -1.501e-01, 1.206e-01)); + r += mul(s3_2, M4(-7.797e-02, -1.143e-01, 2.076e-01, 1.485e-01, 7.942e-02, 2.727e-01, -2.411e-02, -2.504e-02, -6.716e-02, 1.004e-01, 8.960e-02, 2.700e-01, 9.010e-02, 1.735e-01, 6.447e-02, -1.233e-01)); + r += mul(s3_3, M4(1.893e-01, -3.796e-03, 3.319e-02, 6.583e-02, -1.925e-01, 3.439e-01, -5.220e-02, -3.126e-02, 4.169e-03, 1.043e-02, 2.200e-01, 5.113e-01, -9.720e-02, 1.513e-01, -5.826e-02, 2.044e-01)); + r += mul(s3_4, M4(1.065e-01, -3.098e-01, 5.500e-02, -1.968e-01, 2.995e-01, 1.819e-01, 1.623e-01, -4.408e-01, 1.195e-01, 4.975e-02, 3.792e-01, 7.896e-02, -5.645e-02, 1.141e-01, -2.436e-01, 1.889e-01)); + r += mul(s3_5, M4(2.662e-01, -1.622e-01, 3.682e-01, -1.006e-02, 1.401e-01, 2.683e-01, -2.091e-01, -1.592e-01, -8.872e-02, -3.401e-02, -1.889e-03, 1.025e-01, -5.749e-02, 4.696e-02, 3.135e-01, -9.817e-02)); + r += mul(s3_6, M4(2.941e-02, -7.613e-02, -6.551e-03, -1.035e-01, -7.753e-02, 4.135e-01, 6.004e-02, -1.295e-01, 2.653e-02, -1.741e-02, -1.511e-01, 2.690e-02, -6.961e-02, -3.361e-02, 1.144e-02, -8.718e-02)); + r += mul(s3_7, M4(-4.181e-02, -1.020e-01, 2.157e-01, -9.168e-02, 1.548e-02, 2.957e-01, -2.600e-02, -2.993e-01, 4.842e-02, -7.354e-02, 2.110e-01, 1.405e-01, 1.667e-01, 1.008e-01, -6.331e-02, 6.662e-02)); + r += mul(s3_8, M4(3.903e-03, -1.583e-01, -4.185e-02, -3.656e-02, -4.145e-03, 6.326e-03, -1.888e-01, 1.221e-02, -6.146e-02, -2.519e-02, 6.748e-02, 8.828e-02, -6.787e-02, -5.366e-02, 1.493e-02, 1.099e-02)); + r += mul(s4_0, M4(-5.212e-02, -2.718e-02, -6.386e-02, 7.415e-02, 1.907e-03, 1.751e-01, -1.435e-02, -1.995e-02, -1.429e-01, -3.443e-02, 1.670e-01, 7.753e-02, -2.640e-03, 2.735e-02, 2.744e-02, 5.876e-02)); + r += mul(s4_1, M4(6.529e-02, -9.429e-02, 4.892e-02, -1.701e-02, 1.001e-01, 7.521e-02, -5.676e-02, 4.098e-02, 1.464e-02, -3.263e-03, -1.239e-02, 1.567e-01, -1.362e-01, 4.094e-03, -1.228e-01, -4.357e-03)); + r += mul(s4_2, M4(1.608e-02, 1.142e-01, -6.012e-02, -5.642e-03, -1.307e-02, 1.401e-01, -6.375e-03, -6.159e-02, 3.472e-02, -4.362e-02, 5.201e-02, -1.493e-02, -3.256e-02, 4.176e-03, -4.099e-02, 3.087e-03)); + r += mul(s4_3, M4(-3.638e-02, 1.756e-01, -1.242e-02, 3.948e-02, 1.582e-01, 1.562e-01, 1.811e-02, -3.975e-02, 3.561e-01, -7.619e-03, 2.069e-01, -2.175e-01, 9.041e-03, -1.801e-01, 1.094e-01, -1.363e-01)); + r += mul(s4_4, M4(-3.071e-01, -2.576e-01, 5.100e-02, -9.115e-02, 1.922e-01, 9.628e-02, -8.975e-02, 4.941e-02, 2.380e-01, -3.636e-03, -7.834e-03, -4.376e-01, 1.751e-01, -1.537e-01, 4.383e-02, -1.223e-01)); + r += mul(s4_5, M4(5.033e-02, -5.616e-03, -6.727e-03, -6.098e-02, 7.306e-02, 1.751e-01, 2.450e-01, 3.670e-02, 1.730e-01, -8.718e-02, 3.038e-03, 1.224e-02, -1.678e-02, 9.115e-03, -3.014e-02, 1.537e-01)); + r += mul(s4_6, M4(-1.150e-01, 2.567e-02, -2.899e-02, -6.135e-02, 3.416e-02, 8.015e-02, -1.608e-01, 1.962e-02, -4.545e-02, -1.634e-01, 3.203e-03, 7.314e-02, 5.206e-02, 4.898e-02, -9.349e-02, 7.265e-02)); + r += mul(s4_7, M4(6.312e-02, 1.064e-01, -1.538e-01, 8.327e-02, -2.634e-02, 1.686e-02, -4.496e-02, -1.649e-02, -3.866e-02, 1.477e-02, -5.656e-02, 3.023e-02, -2.998e-01, 1.067e-01, 8.864e-02, 1.002e-01)); + r += mul(s4_8, M4(3.159e-02, 6.080e-03, 4.017e-02, 9.761e-02, 7.659e-02, -2.756e-03, 7.293e-02, -1.748e-01, 5.821e-03, 4.578e-02, -6.616e-02, -1.362e-01, 2.252e-02, -4.367e-02, -7.143e-02, -7.386e-03)); + r += mul(s5_0, M4(-5.706e-02, 8.028e-02, -4.827e-02, 7.617e-02, 3.856e-03, -5.172e-02, 6.074e-03, -3.814e-02, 3.856e-02, 1.129e-01, -2.847e-03, 5.457e-03, 1.055e-01, -3.678e-02, 2.637e-02, 7.666e-02)); + r += mul(s5_1, M4(-1.340e-01, 2.221e-02, -1.840e-02, 1.375e-01, 6.071e-03, -4.839e-02, -6.450e-02, -2.356e-03, 1.429e-01, -1.235e-01, -1.162e-01, -3.469e-02, -6.388e-02, -1.347e-03, -1.941e-02, 3.643e-02)); + r += mul(s5_2, M4(-2.089e-02, 2.257e-01, -3.685e-02, -2.035e-02, 2.204e-01, -1.359e-01, 5.952e-02, -7.981e-02, 7.158e-02, 3.472e-02, -3.589e-02, -5.763e-02, 2.305e-01, 1.726e-01, 1.183e-01, 1.133e-01)); + r += mul(s5_3, M4(1.358e-01, -5.186e-02, -7.769e-02, 2.976e-01, -4.275e-02, -2.520e-02, 6.198e-03, -7.618e-02, -2.241e-01, -8.704e-02, -1.312e-01, 2.994e-02, 1.104e-01, -7.620e-02, 8.333e-02, -1.020e-01)); + r += mul(s5_4, M4(-9.760e-02, 8.211e-02, 2.833e-01, 1.034e-01, -1.214e-01, -7.752e-02, 7.067e-02, -3.664e-02, -4.609e-01, -1.316e-01, -4.471e-01, 7.434e-02, 4.300e-01, -5.906e-02, 2.448e-01, -2.251e-01)); + r += mul(s5_5, M4(2.167e-01, -1.186e-01, -4.355e-02, -1.010e-01, -2.587e-02, 1.355e-02, 1.408e-01, 1.098e-01, -9.257e-03, -1.341e-01, 1.755e-02, 1.099e-01, -1.095e-01, 9.256e-02, 7.816e-02, -4.387e-02)); + r += mul(s5_6, M4(-6.477e-02, -7.389e-02, 7.819e-02, 2.130e-03, -1.563e-01, 1.246e-03, -9.797e-03, 1.048e-01, 8.510e-02, 1.086e-01, -1.951e-01, 3.935e-02, 8.755e-02, -2.165e-01, 6.726e-02, -9.942e-02)); + r += mul(s5_7, M4(-9.343e-02, 3.212e-01, -1.502e-01, -2.067e-01, -1.848e-01, 2.423e-02, 6.901e-02, -1.206e-02, 3.628e-02, 4.616e-02, -1.154e-01, 3.674e-02, 1.506e-01, -7.028e-02, 2.274e-01, 1.040e-01)); + r += mul(s5_8, M4(-1.124e-01, 7.508e-02, 9.292e-03, -2.175e-01, 2.326e-02, -1.198e-03, -1.011e-02, 9.965e-02, -9.725e-03, 8.435e-02, 8.129e-02, -3.936e-02, -1.041e-01, -8.395e-02, 3.440e-02, -1.548e-01)); + r += mul(s6_0, M4(4.309e-02, 1.069e-02, 5.830e-02, 8.488e-02, 3.710e-02, -1.017e-01, -2.255e-02, 8.133e-02, 8.677e-02, -2.632e-02, -2.362e-03, 6.089e-02, -4.658e-02, -7.388e-02, -6.218e-03, 1.440e-02)); + r += mul(s6_1, M4(-1.503e-01, -9.422e-02, -2.333e-02, -1.174e-01, -1.932e-01, -8.729e-02, -1.528e-01, 9.658e-02, -2.449e-02, 7.951e-02, 1.074e-02, -1.482e-01, -1.562e-02, 9.610e-02, -1.884e-02, -2.390e-03)); + r += mul(s6_2, M4(-4.995e-02, 1.221e-01, 7.090e-02, -3.020e-02, 1.030e-01, 7.577e-02, -1.035e-01, -4.602e-02, -7.354e-03, 6.743e-02, -2.907e-02, 8.861e-02, -1.696e-01, 5.770e-02, 1.089e-02, -1.339e-02)); + r += mul(s6_3, M4(-5.793e-02, -3.234e-02, -1.031e-02, -5.240e-02, -2.342e-01, 2.934e-01, -8.607e-03, -3.140e-02, -1.454e-01, -2.016e-01, -2.977e-02, -1.380e-01, -1.277e-02, 4.317e-02, -8.529e-02, -4.209e-02)); + r += mul(s6_4, M4(1.006e-01, 7.887e-02, 2.682e-01, -1.783e-01, -3.231e-02, 2.358e-01, -3.659e-02, -9.102e-02, 3.925e-01, 3.797e-01, 3.037e-01, -4.256e-01, -1.432e-01, -4.200e-02, -2.229e-01, 1.447e-01)); + r += mul(s6_5, M4(-3.216e-02, -7.773e-02, 6.950e-02, -6.786e-02, -5.007e-02, 5.464e-03, -4.387e-02, -1.906e-02, -5.423e-02, 9.449e-02, -1.166e-01, -4.415e-01, 1.884e-02, -1.511e-02, -3.674e-02, -3.145e-02)); + r += mul(s6_6, M4(2.022e-03, -5.738e-02, -2.423e-02, -2.368e-02, 1.396e-01, -4.719e-02, 6.353e-02, 1.798e-02, -1.106e-01, -6.147e-02, 1.454e-02, -1.188e-01, 3.093e-02, 1.150e-01, -2.183e-02, 5.553e-02)); + r += mul(s6_7, M4(-4.065e-02, 9.399e-02, 6.671e-02, -5.721e-03, 5.936e-03, 1.269e-01, -7.222e-02, -2.388e-02, 2.619e-01, -2.158e-02, -8.924e-02, 1.281e-01, -1.135e-01, -3.007e-03, -9.746e-02, 1.963e-02)); + r += mul(s6_8, M4(-5.138e-02, -1.870e-02, -1.453e-01, 6.832e-02, 4.452e-04, -1.008e-02, -4.934e-02, 3.627e-02, -1.170e-01, -7.439e-02, -1.414e-01, 5.688e-02, 8.207e-04, -6.678e-02, -2.995e-02, -1.367e-02)); + r += mul(s7_0, M4(-6.817e-02, 2.469e-01, -1.395e-01, 8.505e-02, 6.080e-02, -1.734e-01, 1.795e-01, -7.193e-02, 3.754e-02, -5.872e-02, 2.572e-02, 3.828e-02, 1.476e-01, -3.651e-01, 7.070e-03, 1.340e-01)); + r += mul(s7_1, M4(-6.316e-03, -1.203e-01, 9.622e-02, 3.262e-02, 1.129e-01, 6.123e-02, -3.000e-02, 2.447e-01, -1.079e-01, -2.132e-03, -2.900e-02, 3.731e-02, 3.870e-01, 3.113e-01, -1.099e-01, 4.130e-01)); + r += mul(s7_2, M4(-1.462e-01, -1.103e-01, -1.563e-01, 3.878e-02, -1.411e-01, 5.459e-02, 6.373e-02, -3.447e-02, -3.014e-02, -4.506e-02, -2.200e-02, -1.378e-02, -4.950e-01, 5.802e-02, -2.420e-02, 2.306e-01)); + r += mul(s7_3, M4(-4.910e-02, -1.139e-01, -1.849e-01, 8.177e-03, -1.325e-01, -4.014e-01, 1.496e-01, -1.073e-01, 9.380e-02, -4.773e-02, -4.225e-03, 2.243e-01, 1.393e-01, 5.335e-01, -2.668e-01, -2.251e-01)); + r += mul(s7_4, M4(-1.134e-01, 1.663e-01, 1.440e-01, 1.847e-01, 3.020e-01, -1.341e-01, 1.602e-01, 1.243e-01, 1.085e-01, 2.234e-02, 6.035e-02, 2.298e-01, 9.849e-01, -1.289e-01, 2.030e-01, -2.575e-01)); + r += mul(s7_5, M4(2.986e-01, -1.106e-01, -5.721e-01, 3.473e-01, -7.268e-02, -1.940e-01, 9.358e-02, 5.481e-02, -8.842e-02, -7.366e-02, -1.714e-01, -4.578e-02, -2.949e-03, 1.634e-02, -4.188e-02, 5.497e-02)); + r += mul(s7_6, M4(1.568e-02, 3.107e-03, -4.870e-02, 1.169e-01, -9.966e-03, 2.597e-02, 8.971e-02, -9.648e-02, -1.299e-02, 1.173e-01, 8.042e-02, 9.655e-02, 1.278e-02, -4.929e-02, 3.850e-02, 4.322e-02)); + r += mul(s7_7, M4(-1.831e-02, -1.859e-02, -9.896e-04, 6.425e-02, 2.686e-02, -5.379e-02, 9.713e-02, 4.660e-02, -2.112e-01, -8.350e-02, -5.293e-01, 2.979e-01, -1.496e-01, -2.814e-01, 1.535e-01, 3.757e-01)); + r += mul(s7_8, M4(-5.505e-04, 1.637e-01, -5.255e-02, 1.151e-01, -3.981e-02, 1.988e-02, -3.469e-02, 9.384e-04, 6.732e-02, -1.318e-01, -3.960e-02, 2.956e-01, -5.225e-02, -8.783e-03, -2.130e-01, 6.466e-02)); + r += V4(4.700e-02, -8.415e-04, 3.618e-02, -1.033e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(5.775e-03, -1.915e-02, 1.380e-01, -2.432e-01, -1.598e-03, -6.168e-02, 1.550e-02, 1.500e-02, -4.176e-02, -1.250e-01, 2.926e-01, -5.989e-01, -1.535e-02, -8.070e-03, 4.714e-02, -1.803e-02)); + r += mul(s0_1, M4(-1.209e-01, -9.318e-02, -2.872e-02, 4.523e-03, -5.891e-02, 8.063e-02, 1.018e-02, -7.905e-02, 2.183e-01, -9.264e-02, -2.915e-01, -3.450e-01, 2.925e-02, 3.904e-02, -9.594e-02, 3.135e-01)); + r += mul(s0_2, M4(-6.871e-03, 3.522e-02, 7.605e-02, 1.022e-01, -7.157e-02, -1.163e-02, -9.893e-02, -1.464e-01, -1.112e-01, 5.742e-02, 5.764e-03, 3.180e-03, -1.463e-01, 9.152e-02, 5.833e-02, -1.950e-01)); + r += mul(s0_3, M4(-4.064e-02, 2.833e-02, -2.051e-02, 8.984e-02, -9.573e-03, -2.246e-02, -1.190e-01, -1.258e-01, -7.168e-01, -1.872e-01, 4.114e-01, 1.773e-01, 8.321e-02, -2.403e-02, -2.976e-02, 8.150e-02)); + r += mul(s0_4, M4(-1.311e-02, 6.584e-02, 1.059e-01, 5.435e-02, 1.123e-01, -7.403e-02, -1.220e-02, -5.464e-02, -3.922e-01, -2.240e-01, 2.893e-01, -1.939e-01, 4.861e-02, -1.121e-01, 2.521e-02, -4.980e-02)); + r += mul(s0_5, M4(1.092e-01, -5.111e-02, -5.624e-02, -2.115e-01, -3.735e-02, 3.550e-02, 6.980e-02, 1.099e-01, -1.519e-01, 1.773e-01, -2.026e-02, -4.360e-02, -1.731e-01, -6.923e-02, -5.706e-02, 7.600e-02)); + r += mul(s0_6, M4(8.325e-02, 3.091e-02, 5.629e-02, 1.093e-01, 7.900e-02, 1.461e-03, -6.437e-02, 1.671e-02, -1.752e-01, 2.031e-02, 2.250e-01, 4.330e-02, 1.337e-01, 9.253e-02, -9.684e-02, -9.401e-02)); + r += mul(s0_7, M4(-4.894e-02, 1.022e-02, -1.280e-01, -2.706e-01, 4.335e-02, -2.013e-02, -2.357e-01, -1.417e-01, -1.250e-01, 5.286e-02, -5.300e-03, 1.706e-01, 8.294e-02, 1.679e-01, -1.052e-01, -7.337e-02)); + r += mul(s0_8, M4(-5.247e-02, 2.057e-01, 1.361e-01, 1.726e-01, 1.140e-02, -1.476e-02, 1.134e-01, -2.014e-01, 2.536e-02, 6.699e-02, 4.528e-02, -1.776e-01, -7.202e-02, -7.618e-02, -1.158e-02, 1.702e-01)); + r += mul(s1_0, M4(-1.952e-03, -6.681e-03, -1.451e-01, 1.533e-01, 1.081e-02, 1.271e-02, 7.490e-02, 1.208e-01, -3.395e-02, -1.497e-02, -2.631e-02, 4.242e-02, 6.987e-02, -2.747e-02, 4.741e-02, 8.648e-02)); + r += mul(s1_1, M4(3.855e-02, -3.548e-01, -2.914e-01, 3.399e-01, -1.315e-02, 1.092e-01, 2.029e-02, 3.275e-01, 2.009e-02, -5.757e-02, 3.717e-03, 4.731e-02, -2.055e-03, -1.584e-01, -1.442e-01, -1.122e-02)); + r += mul(s1_2, M4(4.168e-02, -2.889e-01, -8.142e-03, 1.570e-01, 2.149e-02, -1.116e-01, 1.079e-01, 1.796e-01, -8.895e-02, -1.314e-02, -1.826e-02, -2.545e-02, 1.139e-01, -5.690e-02, 1.901e-02, -8.866e-02)); + r += mul(s1_3, M4(4.450e-02, 5.745e-02, -2.083e-01, 3.231e-01, 6.007e-02, -1.125e-01, -1.766e-01, 1.204e-01, 1.214e-01, -5.118e-02, -2.882e-01, -2.368e-01, 1.062e-01, 3.331e-03, 2.207e-03, 1.383e-01)); + r += mul(s1_4, M4(6.997e-02, -2.012e-01, 1.352e-01, 4.280e-01, 6.532e-03, -8.004e-02, 1.141e-01, 2.356e-02, -1.539e-02, -5.730e-02, -1.123e-01, 2.892e-02, -6.529e-02, -8.186e-02, 5.215e-02, 1.579e-01)); + r += mul(s1_5, M4(-1.647e-02, -5.560e-02, 1.827e-01, -4.019e-02, -2.140e-01, -5.621e-02, 9.606e-02, 6.940e-01, 1.692e-01, -4.055e-02, -9.464e-02, 1.215e-01, -2.381e-01, -1.810e-03, -2.217e-02, 9.709e-03)); + r += mul(s1_6, M4(1.115e-01, -6.232e-02, -1.842e-02, 1.088e-01, 1.617e-02, 1.216e-02, -4.391e-02, -6.813e-02, -1.200e-02, -6.210e-02, -1.142e-01, -1.091e-03, 4.177e-02, -2.947e-03, 8.917e-02, -6.753e-02)); + r += mul(s1_7, M4(9.709e-02, -1.644e-01, -1.105e-01, 8.448e-01, -3.137e-01, -1.223e-03, 2.051e-01, 2.591e-01, 6.120e-02, -5.587e-02, 5.817e-02, 2.077e-02, -6.603e-02, 1.237e-01, -1.174e-01, 2.430e-01)); + r += mul(s1_8, M4(1.362e-02, 4.226e-03, 9.381e-02, 3.483e-01, -2.034e-01, -1.091e-01, 8.972e-02, 2.797e-01, -8.692e-02, 9.477e-03, -4.873e-03, 5.114e-02, 6.086e-02, 6.953e-02, -1.248e-01, 1.996e-01)); + r += mul(s2_0, M4(4.099e-02, -2.409e-02, -1.468e-01, 8.254e-02, -2.475e-02, 3.390e-02, -6.001e-02, -4.829e-05, 1.152e-01, -3.965e-02, -2.303e-02, -4.546e-02, 2.851e-02, 2.591e-02, -1.313e-01, -1.955e-01)); + r += mul(s2_1, M4(1.176e-01, 8.189e-02, 1.254e-01, 4.143e-02, 1.287e-02, -8.469e-02, 5.808e-02, 6.216e-02, 2.254e-02, 4.490e-02, -1.897e-01, -5.535e-02, -7.591e-02, 6.961e-02, -2.051e-01, 1.174e-01)); + r += mul(s2_2, M4(-4.913e-02, -3.644e-02, 8.416e-02, -2.713e-01, -2.499e-03, -8.316e-02, -8.719e-02, -2.012e-02, 1.115e-01, -7.469e-02, -2.734e-02, -8.379e-02, 8.680e-02, -1.107e-01, 1.119e-01, -2.290e-02)); + r += mul(s2_3, M4(3.903e-02, 1.035e-01, 2.128e-02, 6.264e-02, 7.463e-02, -8.552e-02, 6.475e-02, -1.452e-01, -3.157e-02, -7.142e-02, 2.747e-02, 3.922e-01, -5.046e-02, -2.033e-01, -1.181e-01, -7.762e-02)); + r += mul(s2_4, M4(2.228e-01, 4.668e-02, 4.773e-02, -2.063e-02, -1.213e-01, 7.994e-02, 1.636e-01, -1.763e-01, -5.805e-03, 3.983e-02, -5.585e-02, 7.100e-02, -8.525e-03, -4.250e-01, 8.265e-02, -1.339e-01)); + r += mul(s2_5, M4(-5.280e-02, -3.438e-02, 3.967e-02, -1.133e-01, 6.352e-03, 1.601e-01, 5.818e-02, 1.326e-01, 1.388e-01, 7.722e-02, 1.132e-01, -3.909e-02, 2.519e-04, -1.723e-01, -1.168e-01, 1.490e-01)); + r += mul(s2_6, M4(6.596e-03, -7.169e-02, 7.047e-02, -2.775e-01, -3.024e-03, 4.072e-02, 3.099e-03, -6.798e-02, 1.090e-02, 1.233e-03, -1.005e-02, 1.251e-01, 2.488e-02, -2.424e-02, 2.031e-02, 1.340e-02)); + r += mul(s2_7, M4(1.544e-01, 4.432e-02, -1.298e-01, 1.629e-01, -6.938e-02, -2.137e-02, -1.190e-01, 1.030e-01, -2.966e-02, 2.679e-02, 6.637e-02, -3.573e-02, 4.394e-02, 5.055e-03, -8.268e-02, -1.051e-01)); + r += mul(s2_8, M4(-4.681e-02, -4.377e-02, 8.927e-02, 5.233e-02, -6.401e-02, -1.798e-02, 8.584e-02, -1.031e-01, 1.057e-01, 3.010e-03, -4.292e-02, -6.953e-02, -3.872e-03, -2.862e-02, -2.269e-02, -7.101e-02)); + r += mul(s3_0, M4(-2.470e-03, -6.987e-02, -6.901e-02, -1.364e-01, -5.554e-02, -1.329e-01, 2.559e-01, 9.696e-03, -9.911e-02, -2.221e-02, -2.538e-01, 1.265e-01, 4.683e-02, -6.980e-02, -1.954e-01, -8.603e-02)); + r += mul(s3_1, M4(1.254e-01, 2.579e-01, -6.602e-02, 8.325e-02, 5.961e-02, -2.167e-01, 5.840e-01, -1.367e-01, -2.569e-01, -2.328e-03, -5.283e-03, -1.728e-01, -2.137e-02, 2.465e-01, 5.074e-01, 7.129e-02)); + r += mul(s3_2, M4(-2.218e-02, -1.554e-01, 3.234e-02, -4.435e-02, 9.366e-02, -5.878e-03, -7.086e-02, 1.741e-01, -2.773e-01, 9.823e-02, 6.571e-02, -1.128e-02, -2.979e-02, 1.190e-01, 1.796e-01, 8.459e-02)); + r += mul(s3_3, M4(-8.935e-02, 1.110e-01, 9.640e-02, 2.515e-01, -1.905e-01, 1.623e-01, 2.097e-01, 1.553e-01, -1.017e-01, -1.275e-01, 9.053e-02, -1.946e-01, -1.909e-01, -7.685e-02, -2.068e-02, -1.055e-02)); + r += mul(s3_4, M4(2.260e-01, 1.087e-01, -3.768e-01, -3.065e-03, -2.213e-01, -2.439e-01, 1.779e-01, 6.739e-02, -1.644e-01, -8.631e-02, 4.835e-01, 3.453e-02, 1.473e-01, -6.266e-02, -3.585e-02, 6.705e-02)); + r += mul(s3_5, M4(-9.208e-03, 9.673e-03, 2.043e-01, -2.582e-01, 1.772e-01, -3.914e-02, 1.438e-02, 4.101e-01, -5.037e-02, -6.065e-02, 2.835e-01, -1.345e-01, 3.581e-02, 1.024e-03, -8.780e-03, 3.375e-03)); + r += mul(s3_6, M4(1.456e-01, -6.021e-02, 1.159e-01, -1.113e-01, -2.387e-02, 3.019e-01, -1.355e-01, -5.563e-02, -3.382e-01, -1.946e-01, 1.994e-01, 1.954e-01, -7.471e-02, -2.324e-02, -9.937e-03, 3.020e-02)); + r += mul(s3_7, M4(1.250e-01, 5.734e-02, -3.092e-02, 4.361e-02, -1.033e-01, 4.429e-02, -6.119e-02, -1.869e-01, -9.072e-02, -2.381e-02, 1.206e-01, -1.160e-03, 3.010e-02, 8.222e-02, -1.184e-01, 4.834e-02)); + r += mul(s3_8, M4(-5.493e-02, 6.257e-03, -1.606e-01, 6.701e-02, 1.589e-02, 6.510e-02, 1.324e-02, 5.111e-02, -1.510e-02, -1.567e-02, 7.450e-02, -1.451e-01, -1.068e-02, 1.092e-02, 4.474e-03, 3.111e-02)); + r += mul(s4_0, M4(5.026e-03, 1.825e-02, 2.259e-02, -4.380e-03, -5.178e-02, -2.513e-02, -5.339e-02, 1.241e-01, -5.429e-02, 3.186e-02, -2.382e-01, -4.040e-02, 7.159e-02, 1.341e-02, 1.121e-01, -3.143e-03)); + r += mul(s4_1, M4(-8.335e-02, 5.689e-02, -6.871e-02, -6.070e-03, -2.638e-02, 2.636e-01, -1.103e-01, 9.037e-02, 1.066e-01, -1.255e-01, -6.273e-02, -4.147e-02, -1.496e-01, -3.183e-02, -3.861e-02, -6.475e-02)); + r += mul(s4_2, M4(-1.181e-03, 5.513e-03, 1.929e-02, -8.892e-03, 9.030e-02, 5.711e-02, -9.029e-02, 5.996e-02, 3.188e-02, -1.179e-01, -3.602e-02, -1.647e-02, -9.029e-02, 3.394e-03, 4.245e-02, 2.177e-01)); + r += mul(s4_3, M4(-3.745e-02, -1.071e-01, -7.763e-03, 7.312e-02, -1.244e-02, -3.923e-02, 1.577e-01, 7.876e-02, -3.967e-01, -2.705e-01, 1.242e-01, -1.339e-01, 9.164e-02, -8.832e-02, -5.150e-02, 9.188e-04)); + r += mul(s4_4, M4(3.188e-01, 2.089e-01, 1.191e-01, -1.741e-01, -9.131e-02, -1.280e-01, -8.498e-02, -1.474e-02, -7.363e-01, -6.461e-01, -1.540e-01, -3.043e-01, -8.331e-02, -3.601e-02, -1.337e-01, -3.212e-03)); + r += mul(s4_5, M4(-1.324e-01, -7.492e-02, -2.765e-02, 1.714e-01, 1.525e-01, -1.808e-01, -5.317e-03, 2.487e-01, -5.368e-02, -1.320e-01, 1.646e-01, -1.221e-01, 1.742e-01, 7.191e-02, 1.264e-01, -1.884e-02)); + r += mul(s4_6, M4(-5.439e-02, -1.459e-01, -9.954e-02, -1.495e-01, -3.570e-02, 6.260e-02, 1.202e-01, 8.844e-02, -2.543e-02, 6.869e-02, 5.723e-02, -8.969e-02, 4.016e-02, -5.638e-02, -1.861e-02, -2.536e-02)); + r += mul(s4_7, M4(-2.141e-02, 1.867e-01, 9.165e-02, 3.497e-03, -8.593e-03, 1.722e-02, -9.671e-02, 2.116e-01, -1.176e-01, 9.203e-02, -3.250e-03, 2.352e-01, 4.347e-02, -1.797e-02, 1.628e-02, -1.260e-01)); + r += mul(s4_8, M4(8.824e-03, -1.198e-01, 1.086e-01, -4.312e-02, 5.914e-02, -6.638e-02, -2.196e-01, -6.658e-02, -1.844e-02, 4.803e-02, -3.338e-02, 2.232e-01, 1.276e-02, 4.370e-02, -1.566e-01, -6.510e-02)); + r += mul(s5_0, M4(-7.742e-02, -5.564e-03, -1.219e-01, 8.266e-02, -2.908e-02, -5.622e-02, -6.020e-02, 1.122e-01, 3.347e-02, 1.352e-01, 5.121e-02, 1.043e-01, 9.550e-02, 7.644e-02, 1.249e-01, 1.721e-02)); + r += mul(s5_1, M4(6.050e-02, -5.223e-02, -1.074e-02, -9.829e-02, 1.650e-02, -8.694e-02, -2.254e-01, 1.426e-02, -1.388e-01, 7.306e-02, 5.138e-02, 2.509e-01, -1.576e-02, -4.962e-02, -1.111e-01, 1.685e-02)); + r += mul(s5_2, M4(6.816e-03, 5.468e-02, -4.772e-02, -4.338e-02, 1.247e-01, 9.513e-02, 8.595e-03, -1.408e-01, -8.185e-02, -3.862e-02, 1.914e-02, -6.964e-02, 5.147e-02, -2.923e-01, -1.373e-01, 5.155e-02)); + r += mul(s5_3, M4(-5.775e-02, 1.676e-01, 9.105e-02, 8.077e-02, -5.880e-02, 9.001e-03, 5.488e-02, -3.103e-02, 4.378e-01, 5.467e-01, 5.087e-02, 1.015e-02, 1.360e-01, -1.773e-01, 1.621e-01, 9.090e-02)); + r += mul(s5_4, M4(1.843e-01, -3.777e-02, 2.242e-01, -8.750e-02, 8.410e-02, -1.868e-01, -1.184e-01, -1.742e-01, 2.463e-01, 2.272e-01, 2.064e-02, 1.751e-01, -3.371e-01, -4.247e-01, 1.850e-01, -1.210e-01)); + r += mul(s5_5, M4(-2.420e-01, 8.496e-02, 3.214e-01, 8.231e-02, 1.791e-01, 8.399e-02, 2.479e-02, -1.334e-01, 7.132e-02, -7.497e-02, -1.178e-02, 1.141e-01, 2.510e-01, -3.215e-01, -2.085e-01, -1.628e-01)); + r += mul(s5_6, M4(2.309e-02, 1.314e-02, -1.278e-01, -1.081e-01, 4.483e-02, -3.659e-02, -7.600e-02, -9.992e-02, 1.486e-01, -1.862e-01, -6.407e-02, -3.771e-02, 1.169e-02, 7.518e-02, 1.239e-01, 1.198e-01)); + r += mul(s5_7, M4(1.076e-04, -1.035e-01, 8.117e-02, 2.222e-01, 1.414e-01, -8.499e-04, 7.303e-02, -2.033e-01, 1.418e-01, 7.929e-05, -3.849e-02, -2.114e-01, -1.265e-01, 1.412e-01, 2.669e-01, 2.386e-02)); + r += mul(s5_8, M4(1.398e-01, 1.127e-02, -2.013e-01, -1.141e-01, 3.554e-02, -1.411e-01, 8.951e-03, -6.957e-02, -6.525e-02, 1.170e-02, 3.001e-02, -5.121e-03, -1.427e-01, 1.769e-01, -1.797e-01, 7.330e-02)); + r += mul(s6_0, M4(-6.830e-02, 2.586e-02, 6.957e-02, 3.344e-03, -8.251e-02, -1.024e-01, 1.401e-02, -8.155e-02, 3.130e-02, 1.567e-02, 9.907e-02, 8.775e-02, 1.828e-02, -6.401e-02, -1.254e-01, -3.486e-03)); + r += mul(s6_1, M4(-7.500e-02, 1.524e-01, 1.332e-01, -2.362e-03, -2.281e-01, -1.344e-01, -1.993e-02, 3.437e-02, -4.016e-02, 8.094e-03, -1.552e-01, -9.684e-03, -2.945e-02, 1.827e-01, 1.631e-01, -1.402e-01)); + r += mul(s6_2, M4(1.283e-01, 1.022e-01, -4.673e-02, -1.206e-01, 6.208e-02, -7.104e-03, -1.880e-03, 9.911e-02, 7.525e-02, -2.320e-02, 1.065e-01, 5.128e-02, -1.031e-01, -5.443e-02, 8.565e-02, 1.749e-02)); + r += mul(s6_3, M4(-7.235e-02, -4.825e-04, 1.150e-01, -2.535e-02, 2.054e-02, -1.127e-01, -2.437e-01, -2.327e-01, -4.160e-02, -1.421e-02, -1.598e-01, 8.317e-02, 1.066e-01, 9.147e-02, 2.876e-02, -1.104e-01)); + r += mul(s6_4, M4(-8.142e-02, -1.204e-01, -4.673e-02, 2.870e-02, 1.121e-01, 8.413e-02, 3.548e-01, -8.794e-02, 7.436e-02, -5.747e-01, -3.031e-01, 1.076e-01, -9.881e-02, -4.884e-02, -2.669e-02, 9.961e-02)); + r += mul(s6_5, M4(4.019e-02, 3.443e-02, 6.175e-02, -1.364e-01, -4.113e-02, 3.754e-02, 9.865e-03, -2.927e-02, 8.488e-02, 5.757e-02, -2.555e-01, 4.841e-02, 1.484e-01, -4.510e-02, 1.274e-01, -3.619e-02)); + r += mul(s6_6, M4(2.464e-02, 8.601e-02, 7.057e-02, -5.465e-03, -4.709e-02, 1.541e-02, -1.009e-01, 2.418e-01, 1.212e-02, -1.384e-01, -5.690e-03, 7.989e-02, 1.026e-02, 4.835e-03, 1.245e-01, -4.626e-02)); + r += mul(s6_7, M4(-8.064e-02, -5.022e-02, 6.276e-03, 3.697e-02, -1.358e-01, -2.119e-02, -3.152e-02, 8.134e-03, 4.175e-03, -6.934e-01, 2.208e-02, 1.098e-02, 2.263e-02, 4.586e-02, -8.756e-02, -5.983e-03)); + r += mul(s6_8, M4(1.869e-02, 6.459e-02, 3.638e-03, 8.153e-04, -2.581e-02, -2.617e-02, 1.255e-01, -8.043e-02, 8.142e-02, -6.911e-02, -1.027e-01, 1.238e-01, -6.244e-02, -3.995e-02, 3.448e-02, 6.247e-02)); + r += mul(s7_0, M4(6.368e-03, -2.268e-01, -5.136e-02, 2.237e-01, -6.148e-02, -2.566e-02, -1.429e-01, 1.650e-01, -1.630e-02, -3.195e-03, 3.669e-02, -2.326e-02, 1.549e-01, -2.640e-01, 1.995e-01, -1.645e-01)); + r += mul(s7_1, M4(-1.511e-01, 1.334e-02, 1.126e-01, -9.383e-02, -1.275e-01, -1.259e-01, -1.455e-01, -3.768e-02, -1.533e-02, 8.384e-02, 8.307e-02, -1.146e-01, -5.860e-02, -2.392e-02, 4.100e-04, 2.633e-01)); + r += mul(s7_2, M4(3.066e-01, -1.423e-03, -4.397e-01, -7.083e-02, 9.075e-02, 7.438e-02, -1.084e-01, -4.088e-02, 2.111e-03, 6.951e-02, 1.201e-02, -7.343e-02, 1.864e-01, -1.239e-01, 9.225e-02, 3.681e-02)); + r += mul(s7_3, M4(3.698e-02, -2.120e-01, 3.324e-02, 2.238e-01, -3.819e-02, -3.993e-02, 1.447e-01, -1.123e-01, -8.550e-02, 1.485e-01, -1.165e-01, -1.284e-01, 1.179e-01, 2.494e-01, 2.122e-02, -7.347e-02)); + r += mul(s7_4, M4(2.141e-01, -2.257e-01, -2.297e-01, -1.144e-01, -6.907e-02, 5.844e-02, -3.309e-01, 1.606e-01, 1.254e-01, 6.750e-01, 3.546e-01, 7.465e-02, -1.612e-01, -1.870e-01, -1.952e-01, 5.436e-01)); + r += mul(s7_5, M4(3.007e-01, -4.092e-01, -2.155e-01, -2.010e-01, 1.396e-01, -4.618e-03, 1.402e-02, -4.520e-02, -1.665e-01, 1.031e-02, 2.096e-01, -5.751e-02, 9.736e-02, 5.130e-02, 2.232e-01, -1.639e-01)); + r += mul(s7_6, M4(5.731e-02, -6.303e-02, -8.612e-03, 8.817e-02, -7.004e-02, -2.376e-02, 5.575e-02, -7.091e-03, -6.420e-02, -1.200e-01, -1.599e-01, -5.591e-02, -5.707e-02, 4.149e-02, 5.281e-02, -1.419e-01)); + r += mul(s7_7, M4(-1.024e-01, -1.073e-01, -1.133e-02, -9.733e-02, -5.793e-02, -1.041e-02, -2.091e-01, -5.623e-02, 2.134e-01, 8.509e-02, 3.772e-01, -2.028e-01, -2.103e-01, -5.787e-02, 1.942e-01, -1.438e-01)); + r += mul(s7_8, M4(3.537e-02, -9.345e-02, 1.828e-01, 8.816e-02, -6.724e-02, -5.321e-02, 7.143e-02, -3.994e-02, -1.362e-01, -6.127e-02, 3.408e-01, -1.016e-01, 1.485e-02, -2.559e-01, -2.418e-02, 6.540e-02)); + r += V4(-1.024e-03, -4.227e-03, -3.308e-02, 2.423e-02); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.687e-01, 8.166e-02, -1.199e-01, -1.438e-01, -1.553e-01, 1.877e-02, -1.204e-02, 1.459e-02, -1.317e-01, 1.964e-02, 3.228e-02, 3.404e-02, -1.089e-01, -6.737e-03, -1.876e-01, 1.835e-02)); + r += mul(s0_1, M4(-1.304e-01, 1.646e-01, 1.825e-02, -1.453e-01, -7.232e-04, 8.586e-02, 1.332e-01, -6.766e-02, -2.069e-02, 8.783e-02, -6.577e-03, 8.726e-03, -2.289e-01, -5.098e-02, 4.306e-01, -2.771e-01)); + r += mul(s0_2, M4(9.318e-02, 8.611e-03, -1.182e-01, -2.625e-01, -1.586e-01, 3.613e-02, -1.326e-01, -3.240e-02, -2.252e-03, -2.125e-02, 1.131e-01, 8.956e-02, -1.659e-01, -3.769e-01, 3.006e-01, 1.306e-01)); + r += mul(s0_3, M4(2.156e-02, -1.255e-01, 2.045e-01, 4.720e-01, 3.673e-02, 1.511e-01, -4.359e-02, 8.447e-02, 9.906e-03, 2.171e-02, -3.553e-02, 8.704e-02, -2.092e-01, -5.608e-02, -1.380e-01, -1.604e-02)); + r += mul(s0_4, M4(2.147e-01, 3.794e-01, -5.221e-02, 2.407e-01, -8.350e-02, 2.791e-01, 3.463e-01, -1.276e-01, -4.068e-04, 2.771e-02, 8.716e-02, 1.195e-01, 4.733e-01, 1.988e-01, -1.505e-01, -3.883e-01)); + r += mul(s0_5, M4(8.607e-02, 1.514e-01, -3.988e-01, -3.330e-01, -6.680e-02, -2.056e-01, 2.278e-01, 1.257e-02, -8.822e-02, -1.234e-01, 5.052e-02, 8.519e-02, -2.473e-01, -3.413e-01, 4.017e-01, 8.034e-02)); + r += mul(s0_6, M4(-8.551e-02, -1.403e-01, 2.125e-01, 7.841e-03, 5.549e-03, -5.862e-02, 9.476e-04, 1.900e-02, -3.914e-02, -3.345e-02, 5.222e-02, -9.521e-03, 2.778e-02, 1.449e-01, -2.182e-01, -2.048e-01)); + r += mul(s0_7, M4(1.332e-02, -1.636e-01, -1.366e-01, 4.173e-01, 1.212e-01, 3.106e-02, -9.726e-02, -1.551e-01, 5.765e-03, 1.214e-02, -1.040e-01, -1.715e-01, 5.105e-02, -1.803e-02, 2.323e-01, -1.403e-01)); + r += mul(s0_8, M4(3.398e-02, 6.988e-02, -2.143e-01, -1.702e-01, -2.889e-02, 5.287e-02, -5.093e-02, -5.457e-03, -4.596e-02, -1.432e-02, 4.034e-02, 1.615e-01, -4.963e-02, -1.742e-01, 1.176e-01, 1.721e-01)); + r += mul(s1_0, M4(-1.326e-01, -2.362e-02, 1.268e-01, 1.417e-01, -2.640e-02, -6.551e-02, -1.278e-02, 6.166e-02, -7.067e-02, 6.927e-02, 1.225e-01, 6.188e-02, -1.689e-02, 1.018e-03, 1.171e-05, 4.638e-02)); + r += mul(s1_1, M4(5.420e-02, 3.479e-03, 6.777e-02, -1.539e-02, 1.453e-03, 1.949e-01, -1.092e-01, 3.312e-02, 1.668e-02, -2.749e-02, -7.548e-02, 1.983e-02, 3.772e-02, 9.288e-02, 3.287e-02, -1.036e-01)); + r += mul(s1_2, M4(4.808e-02, -7.779e-03, -8.201e-02, -1.903e-02, -8.753e-02, 9.981e-02, -1.936e-01, 1.469e-01, 2.946e-01, -6.026e-02, 1.247e-01, 1.665e-01, 1.790e-02, -2.399e-02, 1.201e-01, 2.750e-02)); + r += mul(s1_3, M4(6.217e-02, 9.400e-02, -4.512e-02, 8.812e-02, -2.566e-02, -4.474e-02, 4.474e-02, 1.304e-01, 3.143e-02, -1.294e-02, 2.472e-01, 8.708e-02, 8.283e-03, 6.958e-02, 4.408e-02, 5.403e-02)); + r += mul(s1_4, M4(1.630e-02, 1.362e-01, -1.194e-01, -3.578e-01, -9.621e-02, -2.325e-02, -6.584e-02, 3.462e-02, -3.388e-01, -3.103e-01, 1.561e-01, -4.855e-01, 1.372e-02, 1.174e-01, -1.389e-01, -1.157e-01)); + r += mul(s1_5, M4(1.248e-01, -1.963e-02, -1.700e-02, 1.012e-01, 1.012e-01, 1.299e-01, -4.928e-01, 5.774e-02, -4.600e-02, 3.790e-01, 4.851e-01, -2.039e-02, -9.516e-02, -1.836e-01, 1.571e-01, 1.615e-01)); + r += mul(s1_6, M4(-4.777e-02, -2.571e-02, -1.059e-01, -7.602e-02, 3.390e-04, -3.210e-02, 9.651e-03, -3.705e-02, -4.980e-02, -6.590e-02, 1.129e-01, 1.401e-01, 2.812e-02, 2.992e-02, -1.999e-02, -5.651e-02)); + r += mul(s1_7, M4(1.004e-01, 2.057e-02, -2.537e-02, 2.206e-02, -6.160e-02, -1.883e-02, -9.290e-03, 4.182e-02, -8.648e-02, -7.383e-03, -8.947e-02, 2.048e-01, 6.374e-02, -3.761e-02, -5.065e-02, -3.502e-02)); + r += mul(s1_8, M4(-8.064e-02, 5.522e-02, 4.249e-02, 1.068e-01, -1.652e-02, -2.258e-02, -1.323e-01, 1.204e-01, -9.693e-02, -6.873e-02, 2.582e-01, 2.303e-01, -4.725e-02, -3.186e-02, 9.203e-02, 5.410e-02)); + r += mul(s2_0, M4(3.263e-02, -9.901e-02, 4.089e-02, 3.456e-02, 3.123e-02, -1.064e-02, -1.264e-01, -1.284e-01, -6.946e-02, -2.256e-02, 5.680e-02, 7.784e-02, 3.726e-02, -1.081e-01, -6.456e-02, 1.050e-01)); + r += mul(s2_1, M4(2.190e-02, 4.159e-02, -1.148e-01, 8.853e-02, -1.967e-01, 4.572e-02, 1.118e-01, 3.464e-02, 5.631e-02, 8.991e-02, -1.654e-02, 1.463e-02, 1.619e-01, 4.864e-02, -5.587e-02, -5.653e-02)); + r += mul(s2_2, M4(-5.594e-02, -3.155e-02, 8.088e-02, 5.277e-02, -7.377e-02, -1.008e-02, 5.421e-02, 2.860e-02, 8.171e-02, 2.243e-01, -5.323e-02, -1.391e-01, -2.219e-02, 2.425e-02, 1.050e-01, -7.410e-02)); + r += mul(s2_3, M4(-1.869e-02, -8.442e-02, 3.266e-01, 1.762e-01, 6.582e-02, 2.504e-02, -1.160e-01, 9.119e-02, 5.249e-02, 6.679e-02, -6.709e-03, -5.891e-02, -4.800e-02, 7.017e-03, 6.847e-03, -6.442e-02)); + r += mul(s2_4, M4(3.592e-03, 1.994e-01, -1.673e-01, 7.164e-02, 1.105e-01, 1.378e-01, -2.136e-01, -2.875e-01, -2.423e-02, 2.685e-02, 9.026e-02, 3.959e-02, -7.615e-02, 2.090e-03, -4.808e-02, 9.262e-02)); + r += mul(s2_5, M4(-3.896e-03, -7.554e-02, 6.684e-02, 7.698e-02, -9.906e-02, 6.621e-05, 1.286e-01, 1.069e-01, -1.022e-01, 2.141e-02, -4.026e-02, -1.684e-02, 3.187e-02, -3.626e-02, -2.315e-02, -1.050e-01)); + r += mul(s2_6, M4(4.210e-03, -1.220e-02, 1.567e-01, 6.979e-02, 2.187e-03, -2.065e-03, 1.274e-01, 3.273e-03, -3.667e-02, 9.996e-02, -5.206e-03, -9.575e-02, 4.773e-03, 4.623e-02, -1.341e-02, -1.804e-02)); + r += mul(s2_7, M4(-6.208e-03, -1.973e-02, 4.654e-02, 3.823e-04, -1.456e-01, -5.811e-03, 2.883e-02, -1.811e-02, 2.366e-02, 5.063e-02, 8.785e-03, -1.582e-01, -1.013e-01, -7.017e-02, 1.187e-01, 8.433e-02)); + r += mul(s2_8, M4(-2.073e-02, -2.841e-02, 7.324e-02, -1.301e-02, -3.501e-02, -4.349e-02, 4.727e-02, 5.348e-02, -6.533e-02, 3.860e-02, -5.548e-02, -7.562e-02, -7.104e-02, -2.708e-02, -2.633e-02, 1.581e-02)); + r += mul(s3_0, M4(1.952e-01, -4.406e-02, 2.251e-01, 1.730e-02, 1.321e-01, 2.949e-02, -1.146e-01, -9.297e-02, 3.493e-03, 3.876e-02, 3.592e-02, 1.943e-02, 6.047e-02, 7.362e-02, 3.877e-02, 4.716e-02)); + r += mul(s3_1, M4(-1.067e-01, 3.903e-01, 1.590e-01, 7.771e-02, 1.419e-01, -4.469e-02, -2.373e-02, 1.276e-01, 1.104e-01, -7.168e-02, -1.678e-02, -8.715e-02, 1.352e-01, 5.306e-02, -2.815e-02, -2.683e-01)); + r += mul(s3_2, M4(7.782e-02, 1.057e-01, 3.814e-02, -1.432e-01, -3.761e-02, -6.796e-02, 1.127e-01, 1.391e-02, 1.189e-01, 7.453e-02, -7.495e-02, -1.539e-01, 3.333e-01, -3.008e-01, 2.307e-01, 1.726e-01)); + r += mul(s3_3, M4(2.905e-01, -1.213e-01, 4.333e-02, 3.070e-02, -1.941e-01, 2.148e-02, -4.467e-02, -2.192e-01, 4.357e-02, 1.709e-01, 1.875e-02, -1.411e-02, 2.852e-01, 4.772e-03, 6.625e-02, -3.587e-01)); + r += mul(s3_4, M4(-1.116e-01, 4.040e-01, -1.487e-01, -1.316e-01, -1.446e-01, -2.333e-01, -1.028e-01, 6.341e-02, -2.364e-01, -2.721e-02, 1.083e-01, -2.123e-01, 4.096e-01, -2.602e-01, 3.234e-01, -2.992e-01)); + r += mul(s3_5, M4(-8.916e-02, 5.935e-02, 2.286e-02, 3.991e-02, 3.674e-02, 2.909e-02, 2.075e-01, 1.902e-02, 2.073e-01, -3.102e-01, -1.087e-01, 2.351e-01, -1.173e-01, 3.481e-02, 1.374e-01, 1.690e-01)); + r += mul(s3_6, M4(-2.447e-03, -1.310e-01, 1.115e-01, 7.939e-02, -8.932e-02, -2.751e-02, 6.472e-02, 1.440e-01, -2.778e-01, -7.787e-02, 1.362e-01, 1.740e-01, -2.580e-01, -7.293e-02, -1.313e-01, -4.858e-01)); + r += mul(s3_7, M4(6.532e-02, 5.795e-02, 1.133e-01, -9.871e-03, -2.339e-01, -1.562e-01, 1.458e-02, -1.546e-01, -1.127e-01, -7.856e-04, -1.915e-01, -3.379e-01, -9.314e-02, -8.137e-02, 2.881e-01, 2.245e-01)); + r += mul(s3_8, M4(-3.721e-02, -5.194e-02, 1.918e-01, -7.738e-02, -1.546e-02, 5.603e-02, 7.840e-02, 3.734e-02, -1.114e-01, 1.450e-01, -1.754e-01, -2.606e-01, -9.938e-02, 1.193e-01, 1.560e-01, -2.439e-01)); + r += mul(s4_0, M4(-9.219e-03, 3.068e-02, -6.399e-02, -5.676e-02, 2.555e-01, 3.642e-02, 7.457e-02, -2.328e-02, 5.980e-02, -6.396e-03, 1.706e-01, -5.731e-03, -2.385e-01, -7.905e-02, -4.412e-02, -3.759e-02)); + r += mul(s4_1, M4(7.442e-02, 2.789e-02, -5.155e-02, 6.180e-02, -4.061e-02, -6.683e-02, 1.053e-01, 3.698e-02, -1.689e-01, -2.763e-01, 1.186e-01, 7.406e-02, -3.383e-01, -2.349e-02, -2.192e-02, -2.980e-02)); + r += mul(s4_2, M4(1.363e-02, -1.910e-02, 5.765e-03, -8.381e-02, -8.163e-02, -9.549e-03, -7.204e-03, -8.317e-03, 6.825e-02, -7.917e-02, -9.160e-02, 2.864e-02, 4.084e-03, -1.179e-01, -5.971e-02, 1.859e-02)); + r += mul(s4_3, M4(1.251e-01, 6.503e-02, -2.114e-01, -1.335e-01, 5.915e-02, 2.959e-02, 5.515e-03, -1.260e-01, -1.847e-01, 4.622e-02, 1.991e-01, 1.137e-01, 1.777e-01, 8.480e-02, -1.747e-01, -8.698e-02)); + r += mul(s4_4, M4(1.291e-01, -3.333e-01, -2.523e-02, 6.282e-02, -2.010e-01, -1.878e-01, 7.108e-02, 1.376e-01, -3.144e-01, -2.832e-01, 1.041e-01, -2.659e-01, -5.124e-02, 2.179e-01, -2.510e-01, 8.983e-02)); + r += mul(s4_5, M4(1.359e-01, -2.681e-01, 6.136e-02, 1.128e-01, 4.392e-02, -7.634e-02, 1.562e-02, 1.960e-02, -1.059e-01, -8.959e-02, 9.625e-02, 1.929e-01, -1.112e-01, -2.043e-01, 3.311e-02, 7.884e-02)); + r += mul(s4_6, M4(-5.216e-02, 1.000e-01, -2.716e-01, -1.806e-01, -1.624e-02, -3.480e-02, 6.681e-02, 3.609e-02, 8.244e-02, -6.571e-02, -4.160e-02, 1.384e-02, 9.817e-02, -6.976e-02, -1.353e-01, -8.585e-02)); + r += mul(s4_7, M4(-1.156e-01, 7.337e-02, 8.131e-02, 3.727e-02, 1.808e-01, 8.877e-02, -7.023e-02, -2.808e-01, 6.601e-02, 5.052e-02, -1.602e-01, 3.795e-02, -7.324e-02, -2.153e-02, -7.231e-02, 4.927e-02)); + r += mul(s4_8, M4(-1.597e-01, 9.291e-02, 1.072e-01, -9.688e-02, 1.202e-01, -9.313e-02, 2.052e-03, -6.283e-02, 1.234e-02, -4.641e-02, 1.124e-01, 7.310e-02, 5.806e-02, -1.393e-02, -7.467e-02, -1.365e-02)); + r += mul(s5_0, M4(1.908e-02, 4.101e-02, 7.300e-03, -1.589e-03, 1.602e-02, -8.488e-03, 5.971e-02, 7.128e-03, -1.286e-01, -1.413e-02, 4.143e-02, 4.473e-02, 3.487e-03, 2.195e-02, -3.393e-02, 2.150e-02)); + r += mul(s5_1, M4(6.465e-02, 1.069e-01, 4.920e-03, -1.181e-02, 2.109e-01, -6.762e-02, 9.279e-02, -8.565e-03, 6.348e-02, 1.019e-01, 3.998e-02, 3.639e-02, -1.348e-01, 2.041e-01, -5.174e-02, -1.375e-01)); + r += mul(s5_2, M4(-1.421e-02, 4.131e-02, -2.263e-02, -6.063e-02, 3.799e-02, -8.022e-02, 2.392e-02, -9.560e-03, 8.589e-03, -1.635e-02, -1.595e-02, -2.145e-02, 1.137e-02, 3.846e-02, -5.293e-02, 8.807e-02)); + r += mul(s5_3, M4(1.300e-01, 5.945e-02, 1.313e-02, 4.673e-02, -7.139e-02, 6.130e-02, -8.234e-02, -2.109e-01, 1.234e-01, -1.599e-02, 3.811e-02, -1.684e-01, -1.032e-01, -1.729e-01, 1.180e-01, 1.451e-01)); + r += mul(s5_4, M4(-2.139e-01, -1.261e-01, 5.155e-02, -3.438e-02, -9.973e-02, 5.261e-02, 1.968e-01, -2.858e-02, -1.402e-01, -7.740e-02, 2.029e-01, -1.669e-02, 9.150e-02, 1.662e-02, -7.390e-02, -1.764e-01)); + r += mul(s5_5, M4(-1.639e-02, 2.398e-02, 5.206e-02, -4.966e-02, -3.472e-02, -7.197e-02, 1.725e-01, -3.140e-02, 1.676e-02, -1.381e-01, 1.557e-01, 1.213e-01, -7.629e-02, -1.392e-01, 4.410e-02, 4.050e-02)); + r += mul(s5_6, M4(-1.096e-01, -7.978e-02, 3.590e-02, 8.377e-02, 1.031e-01, 1.100e-01, -5.918e-02, -7.414e-02, 8.954e-02, -1.134e-01, -5.441e-02, -3.733e-02, 8.487e-02, 1.935e-02, 3.119e-02, -7.440e-02)); + r += mul(s5_7, M4(1.882e-02, 2.669e-02, 7.765e-02, -6.490e-04, -1.474e-02, 1.028e-01, 1.065e-01, -6.083e-02, 1.412e-03, 1.229e-02, 9.548e-03, -1.022e-02, -3.959e-02, 3.008e-02, -8.053e-02, 2.315e-02)); + r += mul(s5_8, M4(-1.005e-01, -5.850e-02, 1.339e-01, -3.531e-03, -4.871e-02, 8.862e-03, -5.255e-03, -8.616e-02, 2.939e-02, -3.063e-02, 6.904e-02, 8.195e-02, -4.837e-02, -6.718e-02, 1.523e-01, 7.304e-02)); + r += mul(s6_0, M4(2.511e-01, -2.700e-02, -2.241e-01, -8.827e-02, 1.578e-03, -1.330e-01, -1.169e-01, 1.971e-01, 1.208e-02, -2.703e-02, 8.641e-02, 1.194e-02, 1.682e-02, 1.062e-01, 3.456e-02, -1.125e-01)); + r += mul(s6_1, M4(-1.913e-01, 2.695e-02, 1.756e-02, 6.275e-02, 9.474e-02, 2.234e-01, -2.228e-01, -1.051e-02, -5.410e-01, 1.036e-01, 5.839e-02, -1.927e-01, 5.608e-01, 7.018e-02, -5.304e-02, -3.249e-01)); + r += mul(s6_2, M4(6.880e-02, 5.915e-02, -1.394e-01, 7.292e-02, 7.694e-03, 1.793e-01, -3.022e-02, 4.487e-03, 1.501e-01, -6.532e-02, -4.614e-02, 3.067e-02, 2.152e-01, -3.339e-02, 1.689e-01, 5.163e-02)); + r += mul(s6_3, M4(2.106e-02, 1.292e-01, -5.325e-01, 1.742e-01, 1.928e-01, -1.107e-01, -1.024e-01, 1.384e-01, -3.115e-02, -1.027e-01, 1.363e-01, 6.820e-02, -1.583e-01, 6.273e-02, -1.063e-01, -2.276e-01)); + r += mul(s6_4, M4(-2.076e-01, 9.245e-02, -1.537e-01, 2.041e-01, -3.218e-01, -9.736e-03, 4.585e-02, 3.374e-01, -2.229e-01, 2.515e-01, -2.451e-01, -3.592e-01, 6.475e-01, 2.927e-01, 1.947e-02, -5.491e-01)); + r += mul(s6_5, M4(1.881e-01, 9.823e-02, -5.798e-01, 2.706e-01, -7.773e-03, 4.207e-01, -2.544e-01, -6.409e-02, 8.943e-02, 1.581e-01, -3.087e-02, 3.644e-02, 4.059e-01, 1.233e-01, 1.340e-01, -1.729e-02)); + r += mul(s6_6, M4(1.272e-01, -7.512e-02, -4.167e-01, 1.740e-01, 1.082e-01, -8.696e-05, 6.458e-02, 8.547e-02, -2.630e-02, 6.979e-02, 3.380e-02, 9.220e-02, 2.841e-01, -2.527e-02, 1.248e-01, -7.761e-03)); + r += mul(s6_7, M4(2.636e-01, 1.016e-01, -5.834e-02, 3.598e-02, 1.360e-01, -6.870e-02, 4.744e-03, 8.948e-02, -2.987e-01, 6.511e-02, -9.257e-02, -4.916e-02, -1.611e-01, -1.630e-01, 2.508e-01, 3.155e-01)); + r += mul(s6_8, M4(-2.714e-02, -4.710e-02, -7.061e-02, 1.471e-01, 1.043e-01, 3.127e-02, -1.498e-01, 1.127e-01, -7.893e-02, 4.654e-02, -2.941e-02, -5.514e-02, -1.395e-01, 1.096e-01, -1.843e-02, -1.769e-01)); + r += mul(s7_0, M4(-7.351e-02, -9.425e-03, -2.481e-02, -1.941e-02, 1.248e-01, -3.381e-02, -1.058e-01, -6.811e-02, 5.354e-03, -3.077e-02, 7.082e-02, -1.596e-03, -1.241e-01, -5.744e-02, 9.882e-02, 6.528e-02)); + r += mul(s7_1, M4(2.069e-01, 1.091e-01, 6.860e-02, -1.470e-01, 2.973e-02, -2.553e-02, -1.002e-01, 8.685e-02, -4.484e-02, -1.102e-01, 1.163e-01, -4.555e-02, -1.942e-02, -1.147e-02, -1.121e-01, -4.226e-02)); + r += mul(s7_2, M4(-8.667e-02, -6.287e-02, 7.794e-02, 3.801e-02, 2.144e-02, -3.184e-03, -7.759e-02, -3.430e-02, -6.115e-02, 8.789e-04, 4.944e-02, 6.660e-04, -5.836e-04, 2.528e-02, -8.120e-02, -7.806e-03)); + r += mul(s7_3, M4(1.657e-01, -3.697e-02, -7.059e-02, -4.885e-02, 7.071e-02, -4.343e-03, -2.486e-02, 1.618e-02, -4.309e-02, 7.969e-02, 5.391e-02, 6.220e-02, -1.250e-02, -3.509e-02, 7.260e-02, 8.836e-02)); + r += mul(s7_4, M4(-1.635e-01, 8.655e-03, 2.083e-01, 2.367e-01, 2.724e-02, 4.316e-02, -3.500e-02, 1.175e-01, -1.324e-01, 8.391e-02, 1.303e-01, 3.996e-02, 2.966e-02, 3.837e-02, 3.528e-02, -3.091e-02)); + r += mul(s7_5, M4(1.302e-02, 1.612e-01, -9.924e-02, -1.282e-01, 3.968e-02, 2.190e-01, -2.045e-01, -2.511e-01, 1.372e-02, 2.535e-02, 2.012e-02, 7.447e-02, 7.591e-03, 1.943e-02, 5.198e-02, 5.475e-02)); + r += mul(s7_6, M4(-1.832e-02, -2.806e-02, 3.148e-02, 3.680e-02, -2.261e-02, 9.937e-02, 2.641e-02, -8.156e-02, -6.391e-02, 1.383e-02, -1.494e-02, -3.854e-02, 9.237e-03, 1.985e-02, -6.267e-02, -2.064e-02)); + r += mul(s7_7, M4(-1.015e-02, -1.696e-03, -3.062e-02, -7.102e-02, 1.778e-02, 1.163e-02, 2.630e-03, -1.040e-01, -1.397e-02, 2.023e-03, -1.445e-01, -9.735e-02, 2.512e-02, 5.539e-03, -4.223e-02, -3.399e-02)); + r += mul(s7_8, M4(-5.911e-02, -5.396e-02, 3.260e-03, 1.271e-02, -4.968e-02, -5.689e-02, 1.541e-02, -1.444e-02, -2.694e-02, -2.971e-03, -7.396e-02, 7.005e-02, 4.195e-02, 1.648e-02, 2.096e-02, -1.043e-02)); + r += V4(-9.620e-03, -3.005e-03, 1.671e-02, 3.678e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.851e-01, -4.425e-02, -1.296e-02, -1.750e-01, 5.089e-02, -6.421e-02, 1.176e-01, 8.044e-02, 2.377e-02, 6.474e-02, 9.873e-03, 9.184e-03, -7.470e-02, -1.489e-01, 1.416e-01, -1.896e-02)); + r += mul(s0_1, M4(-1.199e-01, 2.641e-01, 1.930e-01, 4.907e-01, -7.344e-02, -5.004e-02, -8.055e-02, -1.078e-02, 7.095e-02, -6.252e-02, -2.632e-02, -3.368e-02, 6.609e-03, 1.314e-01, 5.713e-02, 3.680e-01)); + r += mul(s0_2, M4(1.390e-01, -1.533e-02, -2.715e-01, -3.167e-02, 2.957e-02, -6.323e-02, -1.147e-02, -9.545e-02, -7.786e-02, -5.598e-02, -1.246e-02, -2.553e-02, -2.138e-01, 2.817e-01, 2.373e-01, -4.542e-02)); + r += mul(s0_3, M4(3.335e-01, 2.806e-01, -3.541e-01, -8.990e-01, -1.782e-02, -2.947e-02, 1.060e-01, 5.400e-02, -1.187e-01, 8.051e-02, 2.860e-02, 3.945e-02, -5.752e-02, 2.183e-01, 1.091e-01, 3.010e-01)); + r += mul(s0_4, M4(2.118e-01, -9.166e-01, 2.769e-01, -9.884e-01, -7.564e-03, 6.236e-02, -5.832e-02, -1.010e-01, -1.217e-02, 3.905e-02, -2.208e-01, -4.258e-02, 2.551e-01, 2.034e-01, 3.851e-01, -1.103e+00)); + r += mul(s0_5, M4(3.171e-01, -5.767e-01, -6.511e-01, 3.612e-01, -2.524e-02, -1.359e-01, -5.570e-02, -5.619e-03, -9.918e-02, 1.493e-01, 2.648e-01, 6.511e-02, 1.727e-01, -1.207e-01, -3.091e-01, 4.578e-01)); + r += mul(s0_6, M4(-1.468e-01, -6.237e-02, -1.201e-01, 1.999e-01, -7.051e-02, 1.615e-03, -2.254e-02, -2.706e-02, 8.809e-03, -3.437e-02, -6.647e-02, -6.118e-02, 4.269e-01, 1.911e-03, 3.764e-01, 2.219e-01)); + r += mul(s0_7, M4(-3.013e-01, -2.085e-01, 1.862e-01, -2.243e-01, 1.371e-01, 1.049e-01, 3.014e-02, 2.320e-01, 8.462e-02, -4.937e-04, 1.677e-01, 2.084e-01, 4.763e-01, 1.236e-01, 1.038e-01, 1.427e-01)); + r += mul(s0_8, M4(-8.250e-02, 1.199e-01, -1.917e-01, 1.713e-01, -2.901e-01, -2.251e-02, 2.543e-02, 7.192e-02, 7.725e-02, -6.247e-02, 1.014e-01, -8.146e-03, 2.485e-01, 9.060e-02, 1.734e-01, -4.235e-02)); + r += mul(s1_0, M4(1.510e-02, 8.534e-02, -1.256e-01, 2.189e-03, 9.642e-03, -3.459e-02, 6.241e-02, -9.895e-02, -9.687e-02, 6.979e-02, -1.456e-01, -8.158e-02, -8.378e-02, 7.961e-02, -3.521e-02, -2.195e-02)); + r += mul(s1_1, M4(2.153e-02, -1.841e-03, -7.239e-02, 1.259e-01, 4.431e-03, -6.453e-02, -9.590e-02, -2.068e-01, 9.783e-02, 2.137e-02, -6.300e-02, 1.256e-01, -5.519e-02, -1.348e-01, -1.007e-01, 2.611e-02)); + r += mul(s1_2, M4(-2.289e-02, 1.505e-02, -9.565e-02, -9.444e-02, 1.853e-02, -4.434e-03, 4.014e-02, -1.921e-01, -9.101e-02, 4.097e-02, 2.745e-01, -4.392e-02, -1.296e-01, 4.348e-02, 2.198e-02, 7.045e-02)); + r += mul(s1_3, M4(-2.575e-01, 6.324e-02, -8.278e-03, 4.785e-02, -3.651e-02, 3.326e-03, -4.598e-02, -5.095e-02, -2.492e-01, 1.092e-01, -1.840e-01, 2.782e-02, -1.246e-03, -4.282e-02, -6.877e-02, -2.206e-02)); + r += mul(s1_4, M4(1.184e-01, 2.659e-02, 1.701e-01, -2.271e-02, 2.266e-01, 2.369e-01, 1.755e-01, -1.267e-01, 2.627e-01, 3.871e-01, 1.716e-01, 8.503e-01, 5.393e-02, 1.675e-01, 3.168e-02, -3.640e-02)); + r += mul(s1_5, M4(-3.522e-01, 1.113e-01, 1.264e-01, -5.386e-02, -1.754e-01, 1.181e-01, 1.734e-01, -1.911e-01, -4.819e-01, -1.085e-01, 2.815e-01, 1.453e-01, 5.816e-02, -5.647e-03, 7.420e-02, -4.151e-02)); + r += mul(s1_6, M4(-1.874e-02, -8.346e-02, 3.685e-02, -7.575e-02, -4.982e-02, -3.935e-02, -2.556e-03, 1.423e-01, -2.015e-01, -2.308e-02, 5.985e-02, -1.632e-01, -5.075e-02, 1.870e-02, -8.252e-04, 2.759e-02)); + r += mul(s1_7, M4(-8.994e-02, -1.059e-01, -5.740e-03, -6.832e-02, 7.141e-02, -4.124e-02, 5.578e-02, 1.743e-02, 4.363e-02, -2.104e-01, 1.439e-01, -1.920e-01, 6.467e-02, -6.073e-02, -4.433e-02, 5.301e-02)); + r += mul(s1_8, M4(-4.437e-02, -1.887e-02, 5.984e-02, 4.283e-02, -1.120e-01, 2.971e-02, 2.586e-02, -1.201e-01, -1.510e-01, 5.116e-02, 5.325e-02, -2.019e-01, 2.296e-02, 3.367e-02, 6.640e-02, -2.484e-02)); + r += mul(s2_0, M4(-1.154e-01, -3.727e-02, 4.962e-02, -2.989e-01, 1.860e-02, -6.695e-02, 1.899e-01, 8.573e-03, -1.383e-03, 2.706e-02, -3.739e-02, -1.134e-01, -2.095e-02, 3.718e-02, 7.743e-02, -5.088e-02)); + r += mul(s2_1, M4(-3.608e-02, 7.123e-02, 1.076e-01, -7.700e-02, -3.791e-02, -1.535e-02, -1.460e-01, -9.970e-02, 4.157e-02, -6.670e-02, -2.439e-02, -1.401e-01, -2.776e-02, -8.079e-02, -1.439e-02, -5.680e-03)); + r += mul(s2_2, M4(3.343e-02, -3.126e-02, -4.215e-02, 3.148e-02, -1.017e-01, 6.599e-02, 8.057e-02, 8.994e-02, 8.925e-02, -1.281e-01, -1.496e-01, -2.940e-02, -2.018e-02, -5.284e-03, -2.517e-02, 1.505e-01)); + r += mul(s2_3, M4(5.484e-02, 1.543e-01, -9.173e-02, -1.284e-01, 2.570e-02, 3.504e-02, 1.456e-01, -8.890e-02, 4.839e-04, 5.335e-02, 3.747e-03, 1.087e-01, 1.775e-01, 4.767e-02, 7.708e-02, -3.248e-02)); + r += mul(s2_4, M4(-2.317e-01, 7.581e-02, 1.210e-01, -4.226e-03, -2.791e-02, -8.750e-02, -1.933e-01, 1.012e-01, -4.911e-02, 8.685e-03, -2.032e-02, -3.417e-02, 4.964e-03, -1.350e-01, 2.001e-02, -7.469e-03)); + r += mul(s2_5, M4(-1.299e-02, 4.379e-02, -5.396e-02, 4.428e-03, -1.233e-02, 4.362e-02, 2.401e-01, 7.475e-02, -1.050e-01, -3.944e-02, -1.340e-01, -1.165e-01, -6.201e-02, -6.557e-02, -1.916e-01, -5.014e-02)); + r += mul(s2_6, M4(-1.015e-01, -1.525e-01, -8.618e-02, -2.857e-03, -4.288e-03, 2.047e-02, -4.102e-02, 8.515e-02, 3.796e-02, 1.793e-02, -1.177e-02, 3.954e-02, -4.504e-02, -4.991e-02, 2.301e-02, 7.153e-02)); + r += mul(s2_7, M4(-3.917e-02, -1.842e-02, -6.262e-02, 1.534e-02, 2.883e-02, -1.888e-01, -5.097e-02, -4.846e-02, 1.489e-01, 1.533e-01, 4.138e-02, 1.724e-01, -1.702e-01, -5.663e-02, -1.019e-01, -1.328e-01)); + r += mul(s2_8, M4(1.018e-01, -5.588e-02, -1.930e-03, 8.572e-02, -6.390e-02, 1.678e-03, 3.503e-02, -4.689e-02, -9.708e-03, 8.473e-02, 2.030e-02, -3.910e-02, -6.516e-02, 7.146e-02, 2.815e-02, 2.361e-02)); + r += mul(s3_0, M4(-1.943e-01, 2.283e-03, 2.672e-02, -2.011e-01, 1.774e-02, -2.625e-02, 5.873e-02, 6.881e-02, 8.473e-03, -5.936e-02, -1.474e-01, -1.988e-03, -1.413e-01, -1.813e-03, 5.192e-02, 6.340e-02)); + r += mul(s3_1, M4(1.403e-02, 4.498e-02, -6.944e-02, 1.105e-01, 9.061e-02, -4.487e-02, 1.236e-01, -6.802e-02, 1.823e-02, 1.081e-01, 2.219e-02, 8.465e-02, 5.791e-02, -1.179e-01, -1.434e-01, 2.652e-01)); + r += mul(s3_2, M4(5.514e-02, 6.423e-02, -9.896e-02, 1.110e-01, -6.340e-03, 5.026e-03, -2.387e-02, 5.807e-02, 2.515e-02, -4.631e-04, -4.919e-02, -3.969e-02, -2.147e-01, 2.110e-01, 3.099e-01, 4.742e-02)); + r += mul(s3_3, M4(8.744e-02, 1.767e-01, -7.535e-05, -2.209e-02, 8.828e-02, 9.865e-02, -5.802e-02, -8.668e-02, -3.398e-02, 3.748e-02, -1.399e-01, -5.093e-02, 3.669e-01, 3.469e-02, -1.172e-01, 3.117e-01)); + r += mul(s3_4, M4(-2.595e-01, -1.440e-01, -1.141e-01, -2.532e-01, -1.192e-01, 2.156e-01, 4.248e-01, 3.467e-01, 3.341e-01, 3.922e-01, 8.266e-03, 2.721e-01, 3.318e-01, -8.631e-02, -3.512e-02, 6.408e-01)); + r += mul(s3_5, M4(-2.619e-02, -2.449e-04, -2.637e-02, -4.795e-02, -1.172e-01, -2.330e-02, 1.458e-01, 2.066e-03, -3.113e-02, 1.527e-01, -8.270e-02, 1.190e-01, -2.479e-01, -7.839e-02, 3.707e-02, -6.188e-04)); + r += mul(s3_6, M4(7.165e-02, 7.162e-02, -3.879e-02, 4.270e-02, 1.367e-01, 6.812e-02, 9.305e-02, 5.848e-02, -2.524e-01, 2.116e-01, -5.745e-02, -9.440e-02, 2.686e-01, 1.744e-01, 1.037e-01, 2.787e-01)); + r += mul(s3_7, M4(-7.360e-02, 5.454e-03, -2.180e-02, 1.877e-02, 2.010e-01, -2.632e-01, -2.253e-01, -2.037e-01, 3.199e-01, 2.561e-01, -7.339e-03, 2.001e-01, 2.209e-01, -9.087e-02, 2.985e-01, 1.805e-01)); + r += mul(s3_8, M4(-2.490e-02, 5.573e-02, -1.460e-02, -4.931e-02, 1.280e-02, -1.876e-02, 6.681e-02, -9.540e-02, -3.020e-02, 4.041e-01, 3.484e-01, 2.859e-01, -6.750e-02, -3.162e-02, -6.087e-02, 1.181e-01)); + r += mul(s4_0, M4(4.114e-02, 2.156e-02, 3.484e-02, 1.157e-01, -6.806e-02, -1.986e-01, 3.319e-02, -2.484e-02, -1.519e-01, 6.665e-02, -2.368e-01, 6.504e-02, -4.751e-02, 1.172e-01, 1.072e-01, 1.416e-01)); + r += mul(s4_1, M4(-3.483e-02, -8.210e-03, -5.584e-02, -2.905e-02, 3.782e-04, 2.407e-02, 6.459e-02, -5.554e-04, 7.566e-02, 6.572e-02, 1.876e-01, 1.694e-01, 1.467e-02, 2.151e-01, -3.089e-02, -3.616e-02)); + r += mul(s4_2, M4(-1.297e-02, 3.713e-03, 4.972e-02, -5.496e-02, 9.963e-02, 1.992e-02, 4.349e-02, -1.380e-01, -1.116e-01, 1.538e-01, 8.035e-02, -7.731e-02, -6.989e-02, 5.333e-02, 1.544e-01, -1.858e-01)); + r += mul(s4_3, M4(-9.309e-03, 1.125e-02, 4.129e-03, 7.425e-02, 5.974e-02, -1.624e-01, 2.433e-02, 5.115e-02, -4.260e-02, -2.221e-02, -9.451e-02, -1.544e-02, 2.424e-01, -2.633e-01, 1.835e-01, 8.455e-02)); + r += mul(s4_4, M4(2.690e-02, 3.714e-01, 1.985e-01, 2.515e-01, 5.585e-02, -1.168e-01, 1.335e-02, 3.549e-02, 1.684e-02, -1.506e-01, -6.873e-02, 2.926e-02, 1.050e-01, 1.381e-01, -9.192e-02, -1.441e-01)); + r += mul(s4_5, M4(3.613e-02, 2.025e-01, 2.172e-01, 1.357e-01, -8.527e-02, 6.962e-03, -9.127e-02, 6.487e-02, -9.093e-02, 1.070e-01, 3.577e-01, -1.280e-01, 1.199e-01, -1.240e-01, -2.633e-02, 1.482e-01)); + r += mul(s4_6, M4(1.900e-01, -1.009e-01, 8.051e-02, 1.877e-02, -4.018e-02, 5.551e-02, -6.814e-03, 1.446e-02, -1.889e-02, 1.077e-01, 6.959e-02, 6.740e-02, 2.197e-01, -2.703e-02, 1.179e-01, -1.687e-03)); + r += mul(s4_7, M4(1.679e-01, -1.207e-02, 2.712e-01, -1.008e-01, 6.575e-03, 9.142e-02, -1.550e-01, 3.974e-01, -1.676e-03, 1.305e-01, 8.865e-02, 7.105e-02, -1.234e-01, -1.420e-01, -8.748e-02, -7.283e-02)); + r += mul(s4_8, M4(2.725e-01, -6.153e-02, 4.981e-02, 1.774e-01, -9.073e-02, 6.040e-02, 9.336e-02, -7.679e-02, 1.834e-02, -1.102e-01, -1.968e-02, -3.985e-02, -4.841e-02, 3.740e-02, 3.600e-02, 6.039e-03)); + r += mul(s5_0, M4(1.158e-02, 2.884e-02, 4.734e-02, -4.314e-02, 2.771e-02, -9.670e-03, -4.776e-02, 4.941e-02, -2.571e-02, 1.174e-03, 1.968e-01, -8.499e-02, 6.964e-03, -1.313e-01, 8.917e-02, -1.986e-01)); + r += mul(s5_1, M4(4.028e-02, -1.396e-01, -9.028e-02, -1.018e-01, 2.001e-02, -2.781e-02, -3.648e-02, -3.024e-02, 1.685e-01, -9.134e-02, -2.649e-02, -7.201e-02, 5.048e-02, -5.481e-02, 2.729e-02, -2.042e-01)); + r += mul(s5_2, M4(3.310e-02, -8.450e-02, -2.617e-02, -2.513e-02, 4.769e-02, -1.087e-02, 2.853e-02, -7.859e-02, 5.306e-02, -5.234e-03, -6.802e-02, -9.789e-03, -1.707e-03, -4.896e-02, 6.068e-02, -3.022e-02)); + r += mul(s5_3, M4(-3.122e-02, 9.881e-02, -8.278e-02, -1.581e-01, -1.089e-02, -1.052e-01, 1.229e-01, 1.970e-01, -2.099e-01, -4.803e-01, -6.948e-02, 3.955e-02, 1.303e-01, -8.005e-02, -4.979e-02, -3.878e-02)); + r += mul(s5_4, M4(-9.817e-03, 2.285e-01, 1.675e-01, 3.270e-01, 3.366e-03, 1.700e-02, -2.087e-01, 1.093e-01, -2.263e-01, -1.546e-01, -4.074e-01, 3.933e-01, 1.169e-01, -1.683e-01, -1.159e-01, -1.172e-01)); + r += mul(s5_5, M4(-6.034e-02, 1.517e-01, 1.172e-01, 6.405e-03, -1.346e-01, -8.563e-02, 2.563e-03, 1.381e-01, -3.129e-02, 1.142e-02, 9.090e-02, -1.378e-03, 3.933e-02, 2.873e-02, -2.354e-02, 5.006e-02)); + r += mul(s5_6, M4(3.680e-02, -5.471e-02, -6.054e-02, -3.135e-01, 1.337e-01, 9.144e-03, 3.424e-02, 1.716e-01, 1.616e-01, 8.375e-02, 1.528e-01, 1.229e-01, -3.529e-02, 5.043e-02, 6.950e-02, 2.310e-01)); + r += mul(s5_7, M4(1.190e-01, 2.114e-01, 1.784e-01, 3.127e-01, 2.863e-02, -8.856e-03, -2.116e-01, 8.963e-02, 1.096e-01, -1.033e-01, -1.181e-01, -3.442e-02, -4.559e-02, 3.785e-02, -4.856e-02, 9.249e-02)); + r += mul(s5_8, M4(1.512e-01, -3.018e-01, 1.104e-01, 5.374e-02, 1.420e-01, -5.410e-02, -2.156e-02, 2.549e-02, -1.304e-01, 7.199e-02, -1.647e-02, -1.655e-02, -1.562e-02, -9.125e-03, 4.871e-02, 8.061e-02)); + r += mul(s6_0, M4(-1.661e-01, -7.401e-02, 2.991e-01, 3.378e-02, 8.224e-03, 2.133e-01, 1.556e-01, -8.104e-02, -3.602e-02, 1.171e-01, -1.558e-01, -2.589e-01, 5.025e-02, -3.294e-01, 8.721e-02, -9.232e-02)); + r += mul(s6_1, M4(-4.399e-02, -9.049e-02, -1.463e-01, 1.184e-03, -1.211e-02, -4.100e-01, -1.794e-01, -5.316e-01, -3.010e-02, -1.254e-01, -4.003e-02, 2.558e-01, 1.006e-01, -3.878e-01, 5.315e-02, -4.302e-01)); + r += mul(s6_2, M4(-6.827e-02, -7.814e-02, -1.883e-02, -7.352e-02, 5.799e-02, -2.375e-01, -1.946e-01, 2.344e-01, -3.900e-02, 1.998e-01, 3.617e-02, 3.397e-01, -3.544e-03, -1.710e-01, -2.681e-01, 3.032e-01)); + r += mul(s6_3, M4(-1.239e-02, -3.642e-01, 5.169e-01, 1.102e-01, -1.929e-01, -2.757e-01, 2.194e-01, 1.241e-02, 1.100e-02, 5.933e-02, -3.584e-02, 4.883e-02, 8.225e-02, -3.258e-01, -3.024e-01, -7.706e-02)); + r += mul(s6_4, M4(-1.048e-01, -2.233e-01, -4.194e-01, -1.971e-01, 3.057e-02, -5.247e-01, -3.971e-01, -7.568e-01, 4.288e-01, -4.538e-01, -1.061e-01, -4.279e-03, 3.848e-01, 6.533e-02, 1.332e-01, 6.273e-01)); + r += mul(s6_5, M4(-3.299e-01, 4.182e-01, 5.688e-02, -3.792e-01, 1.344e-01, -1.587e-01, -3.395e-01, -4.844e-01, -9.787e-02, 2.398e-01, 1.441e-01, -2.164e-01, -2.454e-01, -3.969e-01, 5.676e-02, 1.053e-01)); + r += mul(s6_6, M4(7.717e-02, 1.401e-01, 5.452e-02, 8.239e-02, -2.146e-01, -7.424e-02, -4.493e-02, 1.239e-01, -1.574e-01, -6.523e-02, 2.124e-02, 3.047e-02, -2.604e-02, -2.442e-01, 1.042e-01, 2.765e-01)); + r += mul(s6_7, M4(-2.414e-01, 3.261e-01, 4.907e-02, 1.913e-01, -1.530e-01, -2.334e-01, -2.100e-01, -3.092e-01, -3.491e-01, 8.934e-02, -1.828e-01, -2.946e-01, -2.928e-01, -3.750e-01, -1.567e-01, -5.165e-01)); + r += mul(s6_8, M4(-2.566e-01, 7.944e-03, -3.867e-02, 4.289e-02, -1.935e-01, 2.399e-01, -7.992e-02, -3.480e-01, 2.756e-01, -3.004e-01, -1.980e-01, 8.063e-02, 1.975e-01, -5.120e-01, -7.178e-01, -1.576e-01)); + r += mul(s7_0, M4(-5.313e-02, -9.675e-02, 8.447e-02, -1.015e-03, 4.811e-02, -5.155e-02, 1.174e-01, -3.531e-02, -5.847e-02, 2.552e-02, -3.431e-02, 9.081e-02, -1.513e-02, 4.855e-02, -5.698e-02, -1.104e-01)); + r += mul(s7_1, M4(2.220e-02, -2.069e-01, -3.894e-02, -1.384e-01, -3.986e-02, 6.346e-02, 5.772e-03, -5.374e-02, -6.286e-02, 4.130e-02, 7.041e-02, 5.773e-02, -6.073e-02, -4.835e-02, 6.096e-02, -5.045e-02)); + r += mul(s7_2, M4(-5.409e-02, -2.186e-02, -2.865e-02, 5.458e-02, 6.856e-02, -6.888e-02, -4.426e-02, -1.850e-02, -3.532e-02, 9.458e-02, 4.661e-02, 5.473e-02, -6.527e-03, 2.349e-02, 1.594e-02, 3.684e-02)); + r += mul(s7_3, M4(7.641e-02, 1.499e-01, 1.137e-01, 9.937e-02, 1.111e-01, -4.915e-02, 6.421e-02, -5.531e-02, -7.104e-02, -7.343e-02, 6.893e-03, 3.452e-02, 7.411e-04, 1.072e-03, -1.191e-02, 1.098e-01)); + r += mul(s7_4, M4(-1.380e-01, 1.221e-01, -2.653e-01, 1.543e-01, -7.499e-03, -1.484e-01, -6.812e-02, -1.438e-01, 1.563e-01, -9.350e-02, -5.984e-02, -1.608e-02, -1.048e-03, -3.701e-03, 1.823e-03, 1.252e-02)); + r += mul(s7_5, M4(-5.844e-02, 6.552e-02, -2.884e-04, -1.433e-01, 1.590e-01, 2.454e-02, -1.529e-02, 1.079e-01, -1.876e-02, -7.553e-02, -5.341e-02, 5.856e-02, 5.801e-02, -5.251e-02, -9.596e-02, -1.300e-02)); + r += mul(s7_6, M4(-3.389e-02, -4.335e-03, 6.620e-02, 1.189e-01, 5.442e-02, 6.087e-02, -7.007e-02, -6.704e-03, 4.338e-02, 6.597e-02, 5.637e-02, -3.489e-02, 1.003e-01, -1.031e-04, 1.344e-02, -2.442e-03)); + r += mul(s7_7, M4(3.731e-04, 2.007e-03, -2.524e-02, 2.764e-02, 1.374e-01, 9.564e-02, 2.084e-02, 2.440e-01, -7.477e-02, 1.743e-01, 1.243e-01, 5.556e-02, 9.613e-02, 4.185e-02, -2.427e-02, -5.286e-02)); + r += mul(s7_8, M4(1.166e-01, -1.165e-01, 4.113e-02, -1.206e-02, 3.121e-02, 2.477e-02, 3.920e-02, -3.340e-02, -7.050e-02, 9.544e-03, -1.030e-02, -1.385e-01, -4.626e-02, 1.883e-02, -3.286e-02, 6.743e-02)); + r += V4(-1.360e-02, 3.650e-03, -6.944e-03, -3.785e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.154e-01, -1.985e-01, 1.499e-01, -3.340e-02, -1.705e-01, 1.114e-01, -9.974e-02, 3.990e-02, -6.327e-02, 9.178e-02, -5.247e-02, -4.332e-02, 1.879e-01, 6.154e-02, 1.950e-01, -1.546e-02)); + r += mul(s0_1, M4(-3.251e-01, 1.854e-01, -1.207e-01, 8.930e-03, 1.084e-01, 4.698e-02, -2.006e-01, -1.060e-01, -2.855e-02, -5.228e-02, 4.426e-02, -7.995e-02, 2.124e-01, 1.694e-01, -4.630e-02, -2.907e-01)); + r += mul(s0_2, M4(1.770e-01, -2.177e-02, -3.960e-02, -4.858e-03, 2.929e-02, -9.233e-02, -2.425e-02, -3.472e-02, -3.113e-02, 2.401e-02, 5.923e-02, -1.336e-02, -1.948e-02, 2.982e-01, 2.401e-01, -6.072e-02)); + r += mul(s0_3, M4(2.466e-01, -1.719e-01, -2.502e-01, -2.076e-01, 5.577e-02, 8.098e-02, 2.537e-02, 3.755e-02, 1.009e-01, 5.775e-02, -6.156e-02, 1.435e-01, 1.549e-01, 7.111e-02, 7.965e-02, 3.535e-02)); + r += mul(s0_4, M4(-3.845e-01, -4.311e-01, -9.531e-02, 1.549e-01, 1.925e-01, 1.959e-01, -4.525e-02, -1.145e-01, -1.287e-01, 3.548e-02, -2.069e-01, -3.321e-02, -5.058e-01, -5.712e-01, 2.665e-01, -1.251e-01)); + r += mul(s0_5, M4(-2.107e-01, -2.416e-01, 2.315e-01, 1.819e-01, -1.505e-01, -1.737e-01, -1.769e-01, 2.376e-01, 3.709e-02, -4.289e-03, -1.603e-01, 1.601e-01, 6.685e-02, -1.035e-02, 2.208e-01, 4.211e-01)); + r += mul(s0_6, M4(1.504e-02, 2.069e-01, -4.744e-02, -4.930e-02, 1.714e-02, -2.204e-02, -3.344e-02, 8.667e-02, -8.427e-02, -1.567e-02, 7.016e-03, 2.903e-02, -2.768e-01, 8.307e-02, 1.835e-01, 3.246e-02)); + r += mul(s0_7, M4(2.362e-01, 1.497e-01, -2.478e-02, 2.906e-01, -5.144e-02, -3.948e-02, -8.055e-02, 8.972e-02, -3.795e-02, -7.829e-02, 1.327e-01, -2.375e-02, 1.843e-01, 1.676e-02, 7.430e-02, -4.663e-02)); + r += mul(s0_8, M4(-4.038e-01, -8.562e-02, -2.718e-02, 6.020e-03, -2.424e-03, -8.278e-02, 9.951e-03, 2.370e-01, 1.352e-01, -5.401e-02, 2.332e-02, 9.735e-02, 2.579e-02, -2.907e-02, -1.644e-01, 5.503e-01)); + r += mul(s1_0, M4(-7.736e-02, 1.452e-02, -1.477e-01, 1.563e-02, -1.381e-01, -3.512e-02, -9.591e-03, 1.146e-01, 1.125e-01, 4.748e-02, -1.161e-01, 2.147e-02, 2.703e-03, 2.926e-02, 8.666e-03, -1.114e-01)); + r += mul(s1_1, M4(9.165e-02, 1.007e-01, 9.648e-03, -2.103e-01, 9.773e-02, -1.104e-01, 1.274e-01, 6.616e-02, 1.256e-01, -4.704e-03, -5.183e-02, 7.561e-03, -1.578e-02, -1.429e-01, -8.847e-03, -1.296e-01)); + r += mul(s1_2, M4(-6.068e-02, -1.243e-01, -1.991e-02, 9.462e-02, -1.522e-01, 6.658e-02, 7.342e-02, -8.660e-03, -5.382e-02, -5.284e-02, 1.034e-01, -2.342e-02, 1.933e-02, -1.654e-02, -5.903e-03, -5.457e-02)); + r += mul(s1_3, M4(-6.515e-02, 1.092e-01, 5.110e-02, -1.597e-01, 1.952e-02, 1.197e-02, -1.445e-01, 1.139e-01, -2.891e-02, -4.518e-02, -8.023e-02, 2.761e-02, 6.582e-02, 6.526e-03, -4.022e-02, 4.173e-02)); + r += mul(s1_4, M4(-3.641e-02, 2.065e-01, 2.426e-01, -1.669e-01, 1.970e-01, -9.693e-02, 1.057e-01, 8.536e-02, -1.529e-01, -7.039e-02, -6.987e-02, -2.977e-01, -1.966e-01, -7.779e-02, 5.828e-02, 2.423e-02)); + r += mul(s1_5, M4(-2.706e-02, 9.218e-02, -1.989e-02, -7.584e-02, -2.667e-01, -2.042e-01, -8.444e-02, 8.047e-02, 2.194e-01, -2.141e-03, -2.280e-01, -1.609e-01, 9.254e-02, 1.471e-03, -2.530e-02, 7.227e-03)); + r += mul(s1_6, M4(3.705e-02, 2.614e-02, 5.001e-02, 1.458e-01, -7.003e-02, 2.803e-03, 3.010e-02, 3.675e-02, -1.411e-01, -7.202e-02, 9.695e-02, -5.535e-02, 6.720e-03, 1.328e-01, -1.939e-02, 3.309e-02)); + r += mul(s1_7, M4(-8.033e-02, 3.550e-02, 4.386e-02, 7.473e-02, 4.803e-03, 5.806e-02, 6.359e-02, -4.416e-02, 1.404e-01, 2.763e-01, 2.265e-01, -7.626e-02, 8.332e-03, -1.466e-01, 1.343e-02, -1.013e-01)); + r += mul(s1_8, M4(1.662e-02, -4.509e-02, 2.532e-02, 6.410e-02, -1.172e-01, -4.649e-02, 6.102e-02, 5.086e-02, 1.754e-01, 6.016e-02, 5.396e-02, -1.824e-01, 1.560e-02, 8.324e-02, 3.164e-02, 7.181e-02)); + r += mul(s2_0, M4(-6.640e-02, -1.426e-01, 1.137e-01, 5.428e-03, -2.069e-01, 4.937e-02, 1.724e-01, 2.726e-02, -1.155e-02, -4.132e-02, -8.424e-02, 1.730e-03, -1.123e-02, 3.346e-02, 6.771e-02, -8.231e-03)); + r += mul(s2_1, M4(-1.062e-01, 3.203e-02, 8.230e-02, 7.972e-02, 7.404e-03, 1.401e-01, -1.138e-01, 1.447e-01, -1.044e-01, -1.311e-03, 7.789e-02, 7.804e-02, -3.355e-02, 1.654e-02, 7.310e-02, 6.201e-02)); + r += mul(s2_2, M4(-3.019e-02, -3.596e-02, 2.255e-02, -2.297e-02, 3.052e-03, 8.359e-02, -6.949e-02, -5.776e-02, 8.718e-02, 1.232e-01, 4.465e-02, 8.701e-04, 7.185e-02, -5.816e-02, -8.228e-02, 7.398e-02)); + r += mul(s2_3, M4(5.284e-02, -1.955e-01, -2.261e-01, -8.229e-02, -1.307e-02, -1.169e-02, 1.719e-02, 1.146e-03, -4.732e-02, 1.624e-02, 3.803e-03, -2.993e-02, -3.137e-02, -4.827e-02, -2.871e-02, 8.863e-02)); + r += mul(s2_4, M4(1.399e-02, -2.412e-02, -1.161e-01, -7.799e-02, 1.447e-01, 3.550e-02, 1.973e-01, 7.966e-02, -3.834e-02, 5.803e-02, 1.882e-02, -1.382e-01, -6.053e-02, -3.725e-02, 3.926e-02, -1.539e-01)); + r += mul(s2_5, M4(-5.312e-02, 4.702e-02, 4.705e-02, -1.199e-01, -4.658e-03, -1.364e-02, -8.581e-03, 7.523e-02, -9.700e-02, 4.502e-02, -3.235e-02, -5.661e-02, -3.437e-02, -9.407e-04, 6.316e-02, -8.778e-03)); + r += mul(s2_6, M4(-6.892e-02, 3.391e-02, 2.239e-02, 8.307e-02, -7.027e-02, -1.052e-01, -2.128e-02, 3.202e-02, 4.993e-02, 2.987e-02, 4.555e-02, -1.847e-02, 1.443e-02, 9.374e-02, 2.856e-02, 4.332e-02)); + r += mul(s2_7, M4(1.564e-03, 6.275e-02, 6.931e-02, 5.604e-02, 1.268e-01, 1.958e-01, 2.448e-02, -5.129e-03, -4.479e-03, -5.916e-02, 1.685e-02, 2.788e-02, 4.907e-02, 5.102e-02, -6.232e-02, -1.471e-01)); + r += mul(s2_8, M4(-4.005e-02, 3.995e-02, -5.486e-03, -7.590e-03, -5.829e-02, 8.440e-03, -2.257e-02, 5.725e-02, 7.811e-02, -3.087e-02, 1.459e-02, 7.373e-02, -8.110e-02, -4.362e-02, 9.073e-02, -1.205e-03)); + r += mul(s3_0, M4(5.996e-02, 2.559e-02, -4.335e-02, -1.921e-02, 3.060e-02, 4.734e-02, 3.138e-02, 4.786e-02, -5.547e-02, -1.006e-01, 6.969e-02, 2.517e-02, 1.450e-01, -1.965e-01, -2.166e-02, 5.454e-02)); + r += mul(s3_1, M4(-9.509e-02, 2.396e-01, -1.223e-01, 1.525e-01, 1.781e-02, 4.113e-02, 8.034e-02, 4.791e-02, -5.124e-02, 3.655e-02, 5.606e-02, -1.326e-01, 2.362e-01, -2.856e-01, 9.949e-02, -2.503e-03)); + r += mul(s3_2, M4(-2.826e-02, -4.316e-02, -6.645e-03, -9.037e-02, 2.881e-02, 1.849e-02, -2.980e-02, 6.783e-02, -3.966e-02, -1.189e-01, 2.440e-02, 1.622e-01, 1.795e-01, -2.550e-02, -1.020e-01, -2.077e-01)); + r += mul(s3_3, M4(2.792e-01, -1.974e-01, 7.530e-02, 1.101e-01, 1.597e-01, -8.238e-03, 1.060e-01, -1.239e-01, -3.282e-02, 2.198e-01, 2.315e-01, 1.032e-01, 3.121e-01, 1.830e-01, 1.565e-01, -1.057e-01)); + r += mul(s3_4, M4(-1.694e-01, 7.644e-02, 8.391e-02, 1.463e-01, -1.367e-01, 5.560e-02, 2.217e-01, -9.963e-02, 3.859e-01, 5.398e-01, 2.109e-01, -3.869e-01, 2.685e-01, 5.993e-01, 8.275e-01, -4.697e-01)); + r += mul(s3_5, M4(-1.178e-02, -1.160e-01, 6.509e-02, -3.215e-01, 8.458e-02, -3.025e-02, -2.700e-02, 1.391e-01, -9.935e-02, 1.111e-01, 2.960e-01, -1.086e-01, 1.861e-01, 1.874e-01, -1.004e-01, -1.994e-01)); + r += mul(s3_6, M4(-1.525e-01, -7.228e-02, 2.043e-02, -8.608e-02, -1.859e-01, 1.840e-01, -1.472e-02, -8.037e-02, 4.645e-02, -3.003e-02, -1.340e-01, 7.466e-03, 3.184e-01, 2.931e-01, 8.777e-03, -3.022e-01)); + r += mul(s3_7, M4(-2.205e-02, -6.896e-03, 8.011e-02, -1.185e-01, 3.310e-01, 1.849e-01, -1.434e-01, 3.598e-01, -1.459e-01, 1.145e-02, 4.120e-02, 5.183e-02, 2.295e-01, -3.073e-01, -1.620e-01, 5.568e-02)); + r += mul(s3_8, M4(-6.493e-02, 5.277e-02, 8.076e-02, -2.153e-01, -7.712e-03, 3.392e-02, -8.628e-02, -4.286e-02, 8.173e-02, 1.005e-01, -1.663e-02, 1.694e-01, -8.667e-02, 2.390e-02, -9.287e-02, 4.230e-01)); + r += mul(s4_0, M4(-2.535e-02, 7.364e-02, -3.215e-02, 4.371e-02, 1.397e-01, -6.697e-02, 2.961e-02, 2.035e-02, 2.170e-01, -6.461e-02, -2.757e-02, -6.409e-02, 6.608e-02, 1.919e-01, 9.635e-03, 1.185e-01)); + r += mul(s4_1, M4(-9.423e-03, -1.002e-01, -4.713e-02, -5.427e-02, 4.230e-02, 6.388e-02, 6.406e-02, 5.379e-02, 1.912e-01, -2.114e-02, 3.427e-02, -1.870e-01, -9.313e-02, 2.295e-01, -2.073e-02, -5.200e-02)); + r += mul(s4_2, M4(2.578e-03, -1.141e-01, 6.690e-03, 2.978e-02, -4.502e-03, -8.770e-04, -3.174e-02, 2.535e-02, -1.870e-01, -8.949e-02, 2.078e-02, -9.413e-02, -8.965e-02, 1.744e-02, -4.550e-02, -4.648e-02)); + r += mul(s4_3, M4(-9.328e-02, 1.079e-01, 1.978e-02, -1.923e-01, 1.085e-01, -6.612e-02, 4.103e-02, -3.044e-02, 3.967e-02, 1.013e-01, 1.862e-01, -1.381e-01, -1.094e-02, 5.844e-02, 3.528e-02, 1.830e-01)); + r += mul(s4_4, M4(6.287e-02, 1.220e-01, 1.360e-01, -1.083e-01, -1.495e-01, 2.206e-01, -2.363e-01, 2.418e-01, -8.528e-02, 3.689e-01, 1.967e-01, 9.816e-02, -2.394e-01, -3.554e-01, -6.090e-02, 1.320e-01)); + r += mul(s4_5, M4(-2.292e-02, -7.498e-02, 6.930e-02, -1.607e-03, 1.009e-01, -5.481e-02, 9.314e-02, -7.166e-02, -9.101e-03, 9.149e-03, 4.943e-02, -5.215e-02, -8.129e-02, 1.033e-01, 1.182e-02, 1.077e-01)); + r += mul(s4_6, M4(-5.504e-02, 9.136e-02, 1.641e-01, 4.553e-02, 6.292e-02, -6.497e-02, -1.033e-01, -4.378e-02, -6.554e-02, -1.832e-01, 2.845e-02, 1.938e-02, -3.586e-02, 1.955e-01, 2.876e-02, -3.387e-02)); + r += mul(s4_7, M4(3.816e-02, 1.257e-01, 1.176e-01, 1.315e-02, -6.209e-02, -7.614e-03, -2.597e-02, 1.052e-01, -3.444e-02, -8.019e-02, 1.353e-01, 6.694e-02, 1.535e-01, -4.052e-02, -3.827e-02, -1.347e-01)); + r += mul(s4_8, M4(9.157e-02, 6.265e-02, -1.309e-01, 4.060e-01, 1.959e-01, -2.986e-02, -1.577e-01, -3.929e-01, 1.190e-02, 4.277e-02, 1.645e-02, 1.027e-02, -1.224e-01, -6.421e-02, -3.139e-02, 1.453e-01)); + r += mul(s5_0, M4(-1.302e-01, 3.300e-02, 6.960e-03, 3.966e-02, 8.696e-02, -6.568e-03, 1.117e-02, -5.805e-02, -1.110e-01, -5.196e-03, -2.877e-02, 1.780e-01, 6.423e-02, -9.575e-03, 7.860e-02, 1.108e-01)); + r += mul(s5_1, M4(-9.273e-03, 2.335e-02, 4.001e-02, -4.611e-02, 1.594e-01, -8.232e-02, 8.274e-02, 3.317e-02, 1.149e-01, -1.097e-01, -2.183e-03, -5.485e-04, -1.422e-02, 2.953e-02, -2.611e-02, 2.156e-02)); + r += mul(s5_2, M4(1.851e-02, -1.381e-02, 1.401e-02, 3.595e-02, 1.079e-01, -1.405e-01, 3.376e-02, 4.824e-03, -4.003e-02, -2.315e-02, 3.456e-02, -4.590e-02, -7.153e-02, -5.812e-02, 5.591e-02, -7.371e-02)); + r += mul(s5_3, M4(6.266e-02, -6.022e-02, 1.909e-02, -9.669e-02, -1.710e-01, 1.155e-01, 1.624e-01, -3.687e-02, 3.599e-02, -9.302e-02, 5.482e-02, -1.785e-02, 1.419e-01, -6.523e-02, -2.334e-01, 2.313e-01)); + r += mul(s5_4, M4(1.499e-02, 2.946e-01, 3.317e-02, -1.004e-02, 1.402e-01, -5.913e-02, -3.612e-01, 6.792e-02, 7.166e-02, 1.300e-01, 4.334e-02, 2.055e-02, -5.005e-02, -3.802e-01, -1.309e-01, 2.824e-01)); + r += mul(s5_5, M4(-3.209e-02, 1.013e-01, -3.733e-02, 6.260e-02, 1.285e-01, -2.924e-02, 5.652e-02, -1.114e-01, 4.553e-02, -9.548e-05, -4.586e-02, -1.267e-01, -5.062e-02, -7.563e-02, 3.300e-02, 1.982e-03)); + r += mul(s5_6, M4(-3.837e-02, 1.041e-02, -8.579e-02, 3.184e-02, 1.004e-01, -5.789e-03, 5.265e-02, 1.051e-01, 9.106e-02, -1.191e-01, -5.411e-02, -6.687e-02, -4.547e-03, -1.372e-01, -2.608e-02, -1.647e-02)); + r += mul(s5_7, M4(-6.538e-02, -2.211e-01, -2.886e-02, 3.342e-01, 2.763e-01, -1.112e-03, -3.350e-02, -5.897e-02, 9.749e-02, 5.000e-02, -1.087e-01, -1.115e-01, 1.179e-01, -2.407e-01, 2.304e-02, -5.334e-02)); + r += mul(s5_8, M4(1.509e-01, 3.522e-02, -4.966e-02, 1.525e-01, -9.950e-03, 5.831e-02, -2.597e-02, 5.006e-02, 1.811e-01, 6.085e-02, -2.301e-02, -1.797e-01, 3.146e-02, -3.496e-02, 5.519e-02, -4.698e-02)); + r += mul(s6_0, M4(-3.889e-01, 8.472e-02, 3.466e-01, -2.317e-02, -3.272e-01, -8.978e-03, 2.403e-01, -8.252e-02, 8.031e-02, 5.331e-02, -1.362e-01, 4.599e-02, -7.745e-01, 5.237e-03, -3.595e-02, 1.151e-02)); + r += mul(s6_1, M4(-4.438e-02, 1.434e-01, -2.854e-01, 8.344e-02, -2.194e-01, -1.053e-01, 3.440e-01, -1.755e-01, 1.965e-01, 2.596e-01, -2.437e-01, 2.639e-02, 8.182e-02, -6.142e-01, 5.269e-01, 1.076e-01)); + r += mul(s6_2, M4(4.176e-02, -1.379e-02, -2.602e-02, -1.157e-01, 1.066e-01, 1.188e-01, 1.506e-01, -1.939e-01, -6.532e-02, 1.515e-01, -6.209e-02, -5.568e-02, -2.508e-01, 7.956e-02, -6.839e-03, -7.680e-02)); + r += mul(s6_3, M4(-5.274e-01, 1.574e-01, 2.322e-02, 1.974e-01, -2.015e-01, -4.006e-02, -4.281e-02, 6.102e-02, -8.254e-02, 2.448e-02, -1.601e-01, 6.131e-02, 6.199e-02, 4.952e-02, -3.097e-01, 4.143e-01)); + r += mul(s6_4, M4(-1.786e-01, 2.494e-01, 1.251e-01, 9.363e-02, -1.613e-01, 1.808e-01, -6.279e-01, 5.140e-01, -2.397e-01, -3.454e-02, -1.753e-01, 3.366e-01, -4.295e-01, -8.298e-02, -3.088e-01, -2.506e-01)); + r += mul(s6_5, M4(-5.745e-01, -2.340e-03, 4.310e-02, 1.359e-01, -9.361e-02, 1.619e-01, -1.893e-01, 2.686e-01, 2.511e-01, 3.182e-02, 2.292e-02, -1.272e-01, 1.148e-01, 3.345e-01, -5.757e-01, -2.783e-01)); + r += mul(s6_6, M4(2.996e-01, -1.552e-01, 1.382e-01, 3.307e-01, 9.571e-02, -1.282e-02, -7.687e-02, -2.027e-02, -1.476e-02, 3.399e-02, -1.134e-01, -2.647e-02, -1.429e-01, -7.036e-02, -1.513e-02, 3.024e-02)); + r += mul(s6_7, M4(-6.036e-01, -1.425e-01, 1.056e-01, -4.375e-01, -3.845e-01, 1.217e-01, -2.882e-01, 3.131e-01, 1.087e-01, -1.809e-01, -4.599e-02, -1.999e-02, 1.257e-01, 2.246e-01, -1.548e-01, -1.252e-01)); + r += mul(s6_8, M4(-9.834e-02, 9.684e-02, 7.735e-02, 6.983e-02, 1.036e-01, 1.324e-01, 1.711e-01, -3.028e-01, -2.124e-02, -3.839e-02, 6.275e-02, -6.320e-02, -1.567e-01, 1.782e-01, -1.595e-01, 5.094e-01)); + r += mul(s7_0, M4(-6.443e-02, -5.409e-02, -9.004e-02, 8.554e-02, 1.222e-02, 6.177e-02, 9.452e-02, -1.811e-03, 2.774e-03, -8.285e-03, -4.119e-02, -2.120e-02, 4.556e-02, 2.671e-02, -7.488e-02, 3.807e-02)); + r += mul(s7_1, M4(7.269e-02, -9.661e-02, 7.214e-03, 1.116e-01, -6.318e-02, -7.593e-02, -7.289e-03, 1.131e-01, 1.499e-01, 9.402e-02, -4.967e-02, -1.259e-02, -5.367e-02, -7.372e-02, 8.031e-02, -3.618e-02)); + r += mul(s7_2, M4(6.442e-03, -3.003e-02, -1.879e-02, 6.275e-02, 8.840e-02, 5.896e-02, 6.386e-02, -2.205e-02, -4.846e-02, 6.194e-02, -7.356e-02, -3.447e-03, -1.864e-02, -4.320e-02, -1.732e-02, -4.979e-02)); + r += mul(s7_3, M4(2.765e-02, 4.141e-02, 2.696e-02, -5.018e-02, 3.907e-02, -7.082e-02, 9.419e-02, -8.741e-03, 1.573e-01, 9.473e-02, 4.708e-02, -1.652e-02, 1.502e-02, 8.182e-02, -1.322e-02, 3.526e-02)); + r += mul(s7_4, M4(-1.474e-01, -3.990e-02, -1.310e-01, -5.310e-02, 1.340e-01, -1.109e-02, -1.248e-01, 8.321e-02, -1.425e-01, -5.966e-02, -1.426e-01, -1.162e-03, 5.641e-02, 2.371e-03, 1.682e-02, -4.745e-03)); + r += mul(s7_5, M4(-3.767e-03, 1.891e-02, 2.372e-03, -1.110e-01, 1.951e-02, -4.606e-02, -1.608e-01, 2.397e-01, -7.490e-02, -3.436e-02, 2.068e-02, -1.087e-01, -1.346e-01, 4.235e-02, 1.825e-02, 6.811e-02)); + r += mul(s7_6, M4(-2.091e-02, 1.196e-02, -2.033e-02, 8.187e-02, 6.749e-02, -3.418e-02, -3.065e-04, -8.559e-02, -6.455e-02, 8.523e-02, 7.157e-02, 2.214e-02, -1.851e-02, -2.486e-04, 3.343e-02, 5.581e-02)); + r += mul(s7_7, M4(-5.703e-03, 5.631e-02, -2.922e-02, 1.219e-01, 2.548e-02, -1.005e-01, -1.114e-01, -9.546e-02, -2.900e-02, 5.282e-02, 1.550e-01, -1.171e-02, -3.671e-03, 2.578e-02, 1.132e-02, -1.251e-03)); + r += mul(s7_8, M4(-2.153e-02, 3.114e-02, -2.612e-02, 1.743e-01, 3.585e-02, -2.815e-02, 2.412e-02, -4.767e-02, -1.490e-01, 2.185e-02, 8.061e-02, 8.901e-02, 2.930e-02, -3.864e-03, -4.137e-05, -8.294e-02)); + r += V4(-3.055e-03, 8.217e-04, 1.986e-04, -1.878e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.929e-02, 1.394e-02, -1.029e-01, 2.892e-01, 4.374e-02, 6.804e-02, -1.009e-02, 3.317e-02, 6.233e-02, -6.770e-02, 6.294e-02, 9.080e-02, 1.315e-01, 2.016e-02, 1.460e-01, -2.511e-01)); + r += mul(s0_1, M4(1.536e-01, 7.006e-02, 1.478e-01, -1.093e-01, -6.266e-02, 1.507e-02, 5.591e-02, 4.596e-02, 1.262e-02, -6.441e-02, 7.968e-03, -7.319e-02, -4.285e-01, 4.255e-01, 6.450e-02, -2.665e-01)); + r += mul(s0_2, M4(-3.394e-01, 2.370e-02, -1.853e-01, 4.372e-02, -6.294e-02, 6.183e-02, 1.800e-02, 1.802e-03, 2.202e-02, -3.171e-02, -1.361e-02, 8.976e-03, -9.553e-02, 4.330e-02, 1.193e-01, -2.788e-02)); + r += mul(s0_3, M4(-6.521e-01, 1.263e-01, -2.676e-01, 6.885e-01, -7.121e-02, 7.531e-02, -2.154e-02, -4.210e-02, 5.622e-02, -4.680e-02, 1.068e-01, 3.298e-02, -1.073e-01, 1.729e-01, -6.696e-02, 4.533e-01)); + r += mul(s0_4, M4(1.685e-01, 2.405e-01, -1.513e-01, -2.889e-01, -5.371e-01, -4.868e-01, -3.566e-01, -9.242e-02, 2.206e-01, -1.275e-01, 2.622e-01, 1.606e-01, -4.582e-01, 1.417e+00, -7.227e-01, -1.538e-01)); + r += mul(s0_5, M4(1.065e-01, 6.979e-02, 1.696e-02, 5.949e-02, -7.888e-02, 1.250e-01, -1.149e-02, 5.212e-03, -5.860e-02, 1.635e-04, -1.013e-01, 2.759e-02, -2.364e-01, 2.244e-01, -3.560e-01, 2.910e-01)); + r += mul(s0_6, M4(-4.975e-02, -9.110e-02, -2.293e-01, 3.099e-01, -6.053e-03, -1.695e-03, 1.976e-02, 2.727e-02, -5.408e-02, -1.152e-02, 7.005e-02, 3.262e-03, 1.176e-01, 1.129e-01, 2.885e-01, -6.376e-02)); + r += mul(s0_7, M4(3.176e-02, -1.691e-02, -2.270e-01, -1.639e-01, 1.829e-02, 1.531e-02, 1.670e-01, -2.879e-02, -7.558e-02, 1.134e-02, -7.440e-02, 1.989e-02, -6.936e-01, 2.447e-01, -4.170e-01, -1.200e-01)); + r += mul(s0_8, M4(-9.140e-02, 9.945e-02, -1.102e-01, 1.411e-01, -4.055e-03, 4.229e-02, 8.477e-02, -4.669e-03, -9.157e-02, 1.788e-02, -1.170e-01, -1.028e-04, -1.183e-01, 7.345e-02, -2.036e-01, 1.127e-01)); + r += mul(s1_0, M4(6.494e-03, -8.688e-02, 7.673e-02, 5.917e-02, 1.075e-01, -4.407e-02, -1.117e-01, 1.813e-01, 1.302e-01, -7.901e-02, 1.289e-01, 2.643e-01, -9.625e-02, 2.704e-02, 7.501e-03, 3.876e-02)); + r += mul(s1_1, M4(7.495e-02, -4.704e-02, -2.103e-03, -2.250e-02, 3.552e-02, 5.431e-02, 1.374e-01, -1.099e-01, 1.235e-01, 8.284e-02, 1.965e-01, -3.252e-01, -5.457e-02, 4.518e-02, 2.164e-02, -2.932e-02)); + r += mul(s1_2, M4(3.765e-02, 3.760e-02, 2.836e-02, -1.903e-02, 2.096e-01, 5.725e-02, 9.411e-02, -6.930e-02, 1.475e-01, -8.083e-02, -2.270e-02, -1.851e-02, -1.210e-02, -5.534e-02, 3.330e-02, -2.178e-02)); + r += mul(s1_3, M4(-5.579e-02, -1.761e-01, 1.940e-01, -1.588e-03, -1.796e-02, -1.910e-02, -2.752e-02, 3.096e-01, -2.540e-02, -1.123e-01, 1.989e-01, -1.469e-01, 1.753e-01, 9.378e-03, -3.200e-02, -4.680e-02)); + r += mul(s1_4, M4(-1.698e-01, 6.087e-02, -1.699e-01, 4.775e-02, -1.398e-01, 4.084e-02, 1.978e-01, 2.140e-02, -3.981e-01, -4.084e-01, 7.406e-01, 3.177e-02, 3.568e-02, 1.043e-02, 7.653e-02, -6.814e-02)); + r += mul(s1_5, M4(-6.348e-02, -1.269e-01, -4.933e-02, -6.444e-02, -4.170e-02, 8.089e-02, -7.447e-02, -2.348e-01, 1.053e-01, -8.449e-02, 7.375e-02, 3.311e-02, -3.556e-03, -1.869e-02, -1.145e-01, 7.772e-02)); + r += mul(s1_6, M4(1.427e-01, -5.258e-02, 1.594e-01, -6.412e-02, -3.988e-03, -6.896e-02, 3.285e-02, 2.957e-02, -8.045e-02, -8.518e-02, -3.740e-02, 1.324e-02, -5.752e-02, -2.155e-02, 8.845e-02, -2.719e-02)); + r += mul(s1_7, M4(6.319e-02, -1.372e-01, 5.411e-02, -1.137e-02, -1.919e-03, 3.565e-03, 6.239e-02, 2.035e-02, 1.491e-01, -3.921e-02, -2.202e-01, 1.100e-01, 6.417e-02, 3.860e-02, -9.892e-02, 4.096e-02)); + r += mul(s1_8, M4(-1.877e-02, -8.548e-03, -9.893e-03, 8.239e-02, -1.252e-01, 6.837e-02, -4.214e-02, -1.122e-01, -1.549e-01, -8.251e-02, -1.850e-02, 6.051e-02, -1.606e-02, -4.278e-02, 7.242e-03, 3.119e-02)); + r += mul(s2_0, M4(2.938e-02, 6.311e-03, -1.603e-02, 1.864e-01, 2.389e-02, -7.010e-02, -1.969e-02, 1.665e-01, -8.765e-03, 3.094e-02, -7.212e-02, 9.473e-02, -9.279e-02, 4.196e-02, -6.420e-02, -1.139e-02)); + r += mul(s2_1, M4(-6.522e-02, 3.284e-02, 6.400e-03, -6.888e-02, 1.214e-01, 4.339e-02, -4.605e-02, -1.347e-02, -5.231e-02, -4.723e-02, 5.547e-02, 4.327e-04, 3.641e-02, 1.037e-01, -3.036e-02, -5.208e-02)); + r += mul(s2_2, M4(5.624e-02, -4.202e-02, -7.348e-03, 8.454e-02, -1.246e-02, -3.614e-02, -4.076e-02, 3.926e-02, 7.292e-02, -1.052e-02, 1.421e-01, -1.122e-01, 5.749e-02, 7.618e-03, 7.279e-02, 3.060e-02)); + r += mul(s2_3, M4(-3.719e-02, 8.886e-02, -2.164e-01, 1.742e-01, 1.479e-01, 8.478e-02, -2.363e-01, 2.324e-01, -7.640e-02, 3.614e-02, -1.143e-02, -4.378e-02, 7.800e-02, 1.697e-02, 2.702e-02, -6.870e-02)); + r += mul(s2_4, M4(7.121e-03, 5.612e-02, -1.539e-02, -1.779e-01, -2.794e-01, 5.596e-02, -6.104e-03, -4.248e-01, 4.653e-02, -5.111e-02, 4.138e-02, -5.634e-02, 2.431e-02, -1.374e-01, 1.631e-01, 8.579e-02)); + r += mul(s2_5, M4(-7.348e-02, -2.786e-02, -5.249e-02, 8.804e-02, 1.323e-02, -8.938e-03, -5.376e-02, 1.981e-02, 7.637e-02, -1.045e-02, 4.240e-02, -3.250e-02, 3.613e-02, -7.318e-02, 1.206e-01, 5.736e-02)); + r += mul(s2_6, M4(-1.362e-01, -4.425e-03, -6.220e-02, -1.334e-02, -1.092e-01, -4.584e-02, -1.233e-01, -6.446e-02, 1.995e-02, 1.654e-02, 9.213e-02, -4.404e-02, 4.236e-02, -2.820e-02, 2.746e-02, -2.553e-02)); + r += mul(s2_7, M4(-6.418e-02, 4.919e-03, 1.203e-02, -1.571e-01, 1.260e-01, -1.607e-02, 2.401e-01, -8.348e-02, 2.950e-02, -2.530e-02, -4.351e-03, 2.585e-02, -1.479e-01, -2.062e-02, -5.796e-02, 1.788e-02)); + r += mul(s2_8, M4(6.063e-02, -8.423e-03, -2.582e-02, 1.009e-01, -2.646e-02, -2.565e-02, -3.990e-03, 7.251e-02, 7.221e-02, -1.586e-02, 3.092e-02, -5.658e-02, 8.470e-02, -9.544e-02, 6.901e-02, 3.920e-02)); + r += mul(s3_0, M4(2.115e-02, -8.124e-03, -2.608e-02, -7.437e-02, -1.013e-01, -1.921e-03, -2.710e-02, 4.266e-02, -1.688e-02, 7.936e-02, 1.422e-01, -1.240e-01, -2.660e-01, 2.149e-01, -1.288e-01, -8.502e-02)); + r += mul(s3_1, M4(3.215e-01, -2.627e-02, 1.900e-01, -6.555e-02, 5.407e-02, 2.532e-02, -8.633e-02, -6.194e-02, -1.221e-01, 5.269e-03, 4.452e-02, -1.678e-02, -1.882e-01, 7.496e-02, 9.751e-02, -1.519e-02)); + r += mul(s3_2, M4(7.334e-03, -3.815e-02, 3.329e-02, -4.134e-02, 6.074e-02, -1.149e-02, -1.718e-02, 1.138e-01, 2.585e-02, 3.471e-03, 3.192e-02, -5.751e-02, -2.387e-01, -6.247e-02, -1.428e-01, 1.758e-01)); + r += mul(s3_3, M4(-8.371e-02, 1.729e-01, -4.040e-02, 7.848e-03, 3.826e-02, 1.037e-01, 3.446e-01, -1.630e-01, 1.937e-01, -2.246e-02, 2.290e-01, -3.674e-01, -3.557e-01, -9.400e-02, 8.883e-02, -2.473e-01)); + r += mul(s3_4, M4(4.022e-02, 9.074e-02, 3.186e-01, -2.781e-01, -2.178e-01, 1.871e-01, 2.181e-01, -1.682e-01, -2.982e-01, -1.465e-01, 1.422e-01, -2.944e-01, -4.263e-01, -3.463e-01, -1.279e-01, 5.540e-02)); + r += mul(s3_5, M4(5.062e-03, 2.637e-02, -6.406e-02, 2.344e-02, -2.716e-02, 4.376e-02, -1.275e-02, 1.372e-01, -9.199e-02, -6.601e-02, -1.921e-01, 1.098e-01, -2.245e-01, 4.797e-02, -3.830e-02, -9.858e-02)); + r += mul(s3_6, M4(-7.698e-02, -1.355e-02, -6.104e-03, -5.110e-02, -1.172e-01, 2.314e-02, -1.717e-01, 4.721e-02, -4.435e-02, -9.955e-03, -7.747e-03, 1.264e-02, 8.600e-02, -8.934e-02, 3.089e-01, -2.826e-01)); + r += mul(s3_7, M4(2.209e-01, -2.247e-02, 1.127e-01, -1.381e-01, -1.893e-02, -1.863e-02, -1.433e-01, -1.005e-01, 3.568e-01, 1.667e-01, 6.082e-01, -2.027e-01, -4.644e-02, 7.690e-02, -2.035e-01, 7.185e-02)); + r += mul(s3_8, M4(3.511e-02, -4.453e-02, 5.640e-02, 6.786e-03, -9.863e-02, 3.650e-02, 1.515e-03, 9.665e-02, 3.349e-03, 5.730e-02, 2.440e-01, 5.529e-02, -9.132e-03, 1.216e-01, 1.038e-01, 1.600e-01)); + r += mul(s4_0, M4(1.456e-02, 1.803e-02, -3.595e-02, 7.360e-02, -3.576e-02, 1.153e-02, 1.893e-02, -6.975e-02, -2.245e-02, -4.034e-03, 1.824e-01, -1.271e-01, 1.824e-01, 6.083e-02, -1.168e-02, 6.473e-02)); + r += mul(s4_1, M4(2.423e-02, 1.714e-02, -2.532e-02, -2.726e-03, 1.654e-01, -1.382e-02, -9.903e-03, 3.748e-02, 1.648e-02, 1.015e-01, 3.709e-02, -6.161e-02, 1.033e-02, 5.414e-02, 7.422e-02, 7.323e-02)); + r += mul(s4_2, M4(-1.186e-02, 6.384e-02, 2.749e-02, 4.222e-02, -3.260e-02, -1.220e-02, 1.434e-02, -5.661e-02, -9.468e-02, -3.613e-02, -1.145e-01, 2.334e-02, 9.387e-02, -5.652e-02, -1.897e-02, 2.035e-02)); + r += mul(s4_3, M4(-8.724e-02, 4.730e-02, 1.411e-01, -2.486e-01, -9.238e-03, 1.033e-01, -2.847e-02, 1.085e-03, -6.764e-02, -7.273e-02, 1.216e-01, -1.953e-01, 2.037e-01, 1.383e-01, -1.154e-01, -9.102e-03)); + r += mul(s4_4, M4(-3.881e-01, -8.371e-02, 5.415e-02, -5.053e-02, 6.264e-02, 5.022e-02, -1.208e-01, 1.725e-01, 1.402e-01, -1.852e-01, 1.642e-01, 1.540e-01, -6.915e-03, 3.961e-02, -7.876e-02, -1.696e-01)); + r += mul(s4_5, M4(-4.428e-02, -2.901e-02, -1.489e-01, 1.314e-01, -8.217e-02, -3.771e-02, 2.933e-02, -1.317e-01, -9.995e-02, -1.362e-01, -1.978e-01, -4.783e-02, 4.732e-02, -7.175e-02, -1.323e-02, 2.203e-01)); + r += mul(s4_6, M4(2.074e-01, 5.412e-02, 2.316e-01, -1.630e-01, -1.077e-01, 7.548e-02, -1.094e-01, 3.166e-02, 1.419e-02, 3.955e-02, -5.825e-03, -3.774e-02, -8.126e-02, 3.387e-03, -3.478e-02, 6.284e-02)); + r += mul(s4_7, M4(-1.209e-01, -4.387e-03, -2.495e-01, -2.842e-02, 2.790e-01, 4.301e-03, 2.841e-01, -5.945e-02, 2.642e-01, -4.255e-04, 8.720e-02, -1.297e-01, -2.182e-01, 2.956e-03, -1.868e-01, 1.557e-03)); + r += mul(s4_8, M4(-5.167e-02, 2.543e-02, -3.688e-02, 2.518e-01, -1.135e-01, -4.912e-02, -1.333e-01, -8.341e-02, -8.503e-02, 1.235e-02, -1.045e-01, 5.429e-02, -1.481e-02, -2.261e-02, -5.317e-02, 8.046e-04)); + r += mul(s5_0, M4(-4.787e-02, 2.142e-02, -3.906e-02, 1.847e-02, -8.494e-02, 6.585e-03, -1.889e-02, -2.740e-02, 3.385e-02, 7.995e-03, 1.826e-02, -1.367e-01, 1.279e-01, 5.466e-02, -3.254e-02, -4.132e-03)); + r += mul(s5_1, M4(-1.343e-02, 2.381e-03, 6.909e-02, -5.667e-02, -6.099e-02, -5.946e-02, -9.307e-02, 3.245e-02, -6.459e-02, 7.934e-02, -1.250e-02, -2.810e-02, -4.333e-02, -6.090e-05, 8.321e-02, -5.979e-02)); + r += mul(s5_2, M4(6.584e-02, 4.673e-02, 4.752e-02, -2.226e-02, -1.202e-02, -6.762e-02, -1.710e-02, -4.154e-02, -4.673e-02, -3.560e-02, -3.847e-03, 8.072e-02, -1.030e-02, 1.828e-02, 3.360e-02, -5.665e-02)); + r += mul(s5_3, M4(9.618e-02, -6.697e-02, 5.786e-02, -1.769e-02, 6.292e-02, 6.496e-02, -5.166e-02, -1.268e-01, 1.838e-01, -5.380e-02, -1.049e-01, -2.942e-01, -8.008e-02, 1.163e-01, -2.147e-01, 8.550e-02)); + r += mul(s5_4, M4(1.806e-01, -2.214e-02, 1.480e-01, -4.695e-03, -2.015e-01, 4.037e-02, -2.033e-01, 9.411e-02, -1.514e-01, -1.609e-02, 1.584e-01, 9.198e-02, 3.643e-01, 1.669e-01, 5.255e-02, -5.626e-02)); + r += mul(s5_5, M4(-3.640e-02, -7.449e-02, 4.916e-02, -3.708e-02, 3.561e-02, -6.471e-02, 6.061e-02, 1.204e-01, -7.679e-03, -4.449e-02, -1.074e-01, 5.020e-02, -1.304e-02, 1.463e-02, -1.400e-02, 7.025e-02)); + r += mul(s5_6, M4(-6.186e-02, 2.234e-02, -2.843e-02, 9.581e-02, 2.966e-02, 7.306e-02, -7.263e-02, 2.891e-02, -4.971e-02, 4.807e-02, -7.351e-02, -2.457e-02, -6.424e-03, 4.141e-02, -2.535e-02, -8.942e-03)); + r += mul(s5_7, M4(-1.253e-01, 3.679e-02, -4.725e-02, -4.554e-02, -1.710e-02, 5.232e-02, -4.381e-04, -1.350e-01, -2.596e-03, 2.879e-02, -8.824e-02, -1.344e-02, -1.887e-02, -1.858e-02, 1.976e-02, -9.516e-02)); + r += mul(s5_8, M4(-1.535e-02, 4.001e-02, 2.514e-02, 1.360e-01, 3.558e-02, -1.636e-02, 3.687e-03, 1.382e-01, -7.037e-02, -6.769e-03, -4.821e-02, -9.504e-03, -1.632e-02, -4.082e-02, -4.551e-02, 8.719e-02)); + r += mul(s6_0, M4(3.314e-01, -1.421e-01, -1.206e-02, 1.993e-01, -1.283e-01, -1.398e-01, -6.857e-02, -4.534e-03, 1.774e-01, 3.379e-03, 1.985e-03, 8.964e-03, -3.737e-01, 4.491e-02, -8.822e-02, -8.766e-02)); + r += mul(s6_1, M4(1.706e-01, 1.010e-01, 6.570e-02, 3.344e-01, -2.310e-01, -2.290e-01, 1.214e-02, 1.429e-01, 6.835e-02, 1.687e-01, 3.769e-02, 8.285e-02, -5.040e-01, -8.710e-02, -2.200e-01, -2.363e-01)); + r += mul(s6_2, M4(1.107e-01, 4.987e-02, 6.279e-02, 6.233e-02, -7.785e-02, -8.975e-02, 9.166e-02, -1.674e-01, 4.368e-02, -1.147e-01, -9.301e-02, 2.696e-03, 1.017e-01, 7.177e-02, 1.267e-01, 7.352e-02)); + r += mul(s6_3, M4(8.419e-01, 1.557e-01, -1.064e-01, 3.449e-01, 1.228e-01, -6.586e-02, -3.389e-01, 4.428e-01, 3.516e-02, 9.812e-04, -1.527e-01, 3.165e-01, 2.210e-01, 1.391e-01, 2.527e-01, 7.654e-01)); + r += mul(s6_4, M4(-7.380e-02, 2.066e-02, 1.157e-01, 3.417e-01, 6.235e-01, 2.678e-01, -4.457e-03, 7.970e-01, 4.932e-01, 1.805e-01, 1.979e-01, -1.145e-01, 8.089e-01, 1.208e-01, 6.427e-01, 4.596e-01)); + r += mul(s6_5, M4(1.512e-01, 4.153e-02, -1.349e-02, -3.943e-01, 1.621e-01, 2.125e-01, 3.478e-01, -2.117e-01, -4.187e-01, -1.377e-01, -2.767e-01, -3.075e-01, -3.716e-01, 1.678e-02, -7.506e-02, -7.046e-02)); + r += mul(s6_6, M4(-5.645e-04, 8.539e-02, 1.192e-02, 1.301e-01, -5.319e-02, 1.275e-02, -1.687e-01, 2.101e-01, 1.096e-01, -2.104e-04, -1.130e-01, -1.952e-03, 1.334e-02, -4.353e-02, -8.794e-02, 3.036e-02)); + r += mul(s6_7, M4(2.405e-01, -1.602e-02, 1.442e-01, -2.301e-01, 5.157e-01, -2.357e-03, 1.410e-02, 2.979e-01, -4.966e-02, 1.288e-01, 7.233e-02, -1.980e-01, -1.339e-01, 9.085e-02, -2.666e-01, 5.901e-02)); + r += mul(s6_8, M4(-1.635e-01, -1.742e-02, -7.872e-04, -3.437e-02, -5.201e-02, -7.742e-02, 1.519e-01, -3.565e-01, 1.847e-01, 7.361e-02, 2.874e-02, 5.474e-02, -1.476e-01, 1.268e-01, 1.097e-01, 1.250e-01)); + r += mul(s7_0, M4(-2.977e-02, 1.180e-01, -1.144e-01, -2.712e-02, 2.344e-02, 3.005e-02, -5.376e-02, 2.245e-02, -1.964e-02, -3.806e-02, 5.948e-02, -3.850e-02, 8.518e-02, 2.359e-02, 3.988e-02, 3.925e-02)); + r += mul(s7_1, M4(-7.016e-02, 5.264e-02, -1.546e-02, 1.149e-02, -7.637e-02, 5.961e-02, -1.217e-01, 8.794e-02, 4.621e-02, -2.797e-02, -8.991e-05, 1.274e-01, -3.130e-02, 9.594e-02, 9.378e-02, -2.204e-02)); + r += mul(s7_2, M4(6.668e-02, 1.219e-02, 2.202e-02, 2.572e-02, 3.076e-02, 3.448e-02, -1.584e-02, 7.644e-03, -5.237e-02, 1.070e-02, 8.137e-03, 4.005e-02, -7.023e-02, -3.941e-03, -7.663e-02, 1.537e-02)); + r += mul(s7_3, M4(8.858e-02, 5.080e-02, -2.043e-01, 6.574e-02, -5.817e-02, 9.694e-02, -1.094e-01, -1.146e-01, -4.703e-02, 7.113e-02, -2.575e-02, 5.163e-03, -4.438e-02, 2.622e-02, -2.860e-02, 1.962e-02)); + r += mul(s7_4, M4(1.332e-01, -1.357e-01, 1.993e-01, 1.685e-01, 5.237e-02, 3.852e-02, -1.314e-01, 1.839e-01, 1.229e-02, 5.550e-03, -2.378e-02, -4.422e-02, 3.551e-02, -4.724e-02, -2.718e-02, -6.719e-02)); + r += mul(s7_5, M4(-1.227e-01, -1.223e-02, 3.191e-02, -1.237e-01, -4.575e-02, 8.884e-02, 8.201e-02, -4.919e-02, 6.765e-02, -1.065e-02, -4.379e-02, 3.344e-02, 1.195e-02, 1.224e-02, -4.593e-02, 1.547e-02)); + r += mul(s7_6, M4(-2.574e-02, 7.691e-03, -9.264e-02, 7.526e-02, -1.241e-02, 3.506e-02, 8.149e-03, -8.028e-02, 9.684e-02, -4.999e-03, 1.119e-01, -1.116e-01, -7.561e-03, 3.837e-02, 5.530e-02, -5.594e-02)); + r += mul(s7_7, M4(-2.481e-02, 5.517e-02, -8.307e-02, -1.765e-02, 1.868e-02, 1.927e-02, 4.930e-02, 3.460e-02, 2.957e-02, 3.232e-02, -4.537e-02, -8.795e-02, -4.719e-02, 1.410e-02, -2.081e-02, -8.162e-02)); + r += mul(s7_8, M4(7.341e-03, 6.469e-02, 2.905e-02, -1.249e-02, 7.263e-03, -2.017e-02, 8.103e-04, -4.192e-02, -3.527e-02, -8.213e-03, 2.403e-02, -6.824e-02, 6.086e-02, -2.941e-02, 3.362e-03, 1.645e-02)); + r += V4(9.541e-03, -6.427e-03, 9.772e-03, -2.560e-03); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 5 +//!DESC conv4 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.035e-01, -3.131e-02, -1.683e-02, 6.132e-02, 7.106e-02, 8.530e-02, 3.712e-02, 2.200e-02, 5.484e-02, -2.405e-02, 3.609e-03, 2.484e-02, -1.239e-02, 3.143e-02, -1.143e-03, 5.853e-02)); + r += mul(s0_1, M4(-6.852e-03, -5.276e-02, -1.574e-02, -7.396e-03, -5.895e-03, 2.645e-01, 6.641e-02, 8.550e-02, 8.407e-02, -5.343e-02, -2.788e-02, -2.786e-02, -5.168e-02, 4.419e-03, 8.365e-02, -2.581e-02)); + r += mul(s0_2, M4(4.320e-03, 4.378e-02, 6.773e-02, 4.165e-02, 6.133e-04, 3.012e-02, -1.258e-02, 9.748e-02, -2.418e-02, 4.472e-02, 5.103e-02, 1.214e-02, 1.352e-02, -2.460e-02, -4.003e-03, -1.377e-02)); + r += mul(s0_3, M4(2.667e-01, 9.762e-02, 5.992e-02, -3.087e-02, 3.817e-02, -3.431e-02, 2.540e-02, -1.875e-03, 9.802e-02, 6.589e-02, 5.863e-02, -1.504e-02, 9.547e-02, 9.985e-02, -3.192e-02, -4.271e-03)); + r += mul(s0_4, M4(-1.698e-01, 2.632e-01, -5.454e-03, -7.862e-02, 1.375e-01, -3.125e-01, 9.081e-03, 9.557e-02, 8.478e-02, 2.898e-02, -1.091e-01, -8.330e-02, 6.596e-02, 2.710e-02, 1.680e-01, 1.788e-01)); + r += mul(s0_5, M4(-9.017e-03, 6.888e-02, 6.141e-02, 1.776e-02, -1.655e-02, -6.601e-03, 3.162e-01, 1.655e-01, 8.691e-03, -6.128e-02, 3.331e-02, 9.702e-03, 1.413e-02, -7.276e-02, 5.032e-02, -1.593e-01)); + r += mul(s0_6, M4(1.046e-01, -5.227e-02, 5.895e-02, -3.235e-02, 6.427e-02, 3.331e-02, 5.590e-02, 2.335e-02, -1.632e-02, -1.411e-01, 1.116e-01, 3.695e-02, -2.561e-02, -1.120e-02, 5.754e-02, -1.682e-03)); + r += mul(s0_7, M4(8.318e-02, -7.579e-02, 8.523e-02, 1.695e-01, 1.272e-01, 3.044e-01, 9.789e-02, 2.895e-02, 8.045e-02, -1.118e-01, 1.189e-01, 4.772e-02, 3.643e-02, 1.177e-02, 9.985e-02, -5.121e-02)); + r += mul(s0_8, M4(-5.202e-02, -6.098e-02, 1.816e-01, -1.054e-01, -1.061e-02, 7.545e-02, 1.807e-01, 1.109e-01, 2.225e-02, 4.505e-02, -1.468e-02, -1.591e-02, 4.481e-02, 8.765e-02, 7.746e-02, 1.284e-01)); + r += mul(s1_0, M4(-2.938e-02, 5.927e-02, -1.109e-03, -6.546e-04, -4.689e-03, 5.730e-02, -1.717e-02, 1.489e-02, 4.013e-02, 1.947e-02, 2.004e-02, -3.337e-02, -7.947e-02, 1.191e-01, -3.596e-03, 1.463e-02)); + r += mul(s1_1, M4(5.920e-02, 4.055e-02, -1.927e-02, 5.889e-02, -2.832e-02, 1.920e-02, 8.304e-03, -1.604e-02, 5.066e-02, 2.033e-02, -4.666e-02, 1.436e-01, -4.947e-02, 2.174e-01, -1.265e-03, -7.555e-03)); + r += mul(s1_2, M4(-2.522e-02, 8.095e-03, -1.292e-02, -4.297e-02, -2.803e-03, 2.694e-03, 9.495e-03, 4.633e-03, -5.493e-03, 2.904e-02, -5.570e-02, -5.671e-02, 1.625e-02, 3.008e-03, -7.597e-02, 4.828e-02)); + r += mul(s1_3, M4(-2.861e-02, -7.359e-02, -3.017e-02, -6.939e-03, -9.235e-02, -4.895e-02, 3.907e-02, -1.481e-02, -3.577e-02, -1.057e-01, -1.420e-02, -3.991e-02, -3.386e-02, 8.669e-02, 1.668e-02, -6.214e-03)); + r += mul(s1_4, M4(4.040e-02, -2.914e-02, 4.031e-02, -1.132e-01, -2.109e-02, 3.115e-02, -1.246e-01, -2.000e-02, 1.326e-02, 1.058e-01, -8.576e-02, -1.588e-01, -4.464e-01, -2.079e-01, 2.685e-01, 1.333e-01)); + r += mul(s1_5, M4(-2.591e-02, 3.814e-02, -3.386e-02, -9.743e-02, 4.114e-02, -3.478e-02, 6.425e-02, -3.640e-02, -8.821e-03, -3.544e-02, -5.167e-02, -2.047e-04, -2.015e-03, -1.694e-02, -1.662e-02, -3.549e-01)); + r += mul(s1_6, M4(4.528e-02, -1.449e-01, 9.702e-03, 1.420e-03, -3.255e-02, -1.604e-02, -3.173e-02, 1.473e-02, -2.362e-03, -1.247e-01, 3.800e-02, -4.005e-03, -4.589e-02, -4.932e-03, -6.621e-02, -5.870e-03)); + r += mul(s1_7, M4(2.555e-02, 4.944e-02, -1.997e-02, -3.789e-02, -6.467e-02, -7.647e-02, 2.485e-01, 2.307e-02, 9.054e-02, -1.921e-01, -1.510e-02, 1.256e-01, -6.435e-02, -3.311e-01, -1.668e-01, -8.547e-03)); + r += mul(s1_8, M4(-5.899e-03, -9.739e-02, -9.003e-03, -1.285e-01, 3.565e-02, 5.708e-02, 5.470e-03, 6.773e-02, -3.124e-02, -1.481e-02, 6.026e-02, -6.631e-02, 5.404e-02, 2.347e-01, -7.943e-02, 5.130e-02)); + r += mul(s2_0, M4(4.027e-02, 6.051e-02, -1.822e-02, 1.411e-01, -4.953e-02, -2.818e-02, 1.410e-02, 1.730e-02, -1.016e-01, -5.901e-02, -4.021e-02, -6.473e-02, 2.396e-02, -3.431e-02, 3.567e-02, -5.546e-02)); + r += mul(s2_1, M4(-1.226e-01, 6.911e-02, 2.438e-02, 3.790e-01, 2.273e-02, -9.692e-02, -6.908e-02, -6.222e-02, -2.884e-02, 4.129e-02, 3.012e-02, -2.219e-02, -4.559e-02, 4.080e-02, -1.208e-02, -1.721e-01)); + r += mul(s2_2, M4(1.698e-02, -5.581e-02, -1.028e-01, -2.648e-01, -2.852e-02, -4.793e-02, 3.531e-02, 4.713e-02, 6.749e-03, -2.892e-02, 1.004e-03, -8.385e-02, 3.496e-03, 7.913e-03, 3.682e-02, 4.875e-02)); + r += mul(s2_3, M4(-8.191e-02, -2.076e-02, -3.883e-03, -6.020e-03, -4.486e-02, 1.603e-01, -5.054e-02, 1.925e-02, -1.880e-01, -1.032e-01, 9.008e-02, 1.290e-01, -1.580e-01, -1.112e-02, 2.247e-02, 4.732e-02)); + r += mul(s2_4, M4(-1.828e-02, -1.797e-01, 1.553e-01, 1.716e-01, 1.113e-01, 2.362e-01, -2.680e-01, -3.383e-03, -1.252e-01, 6.608e-02, -2.373e-01, -7.700e-02, -2.883e-01, 9.044e-02, 4.293e-01, 9.977e-02)); + r += mul(s2_5, M4(-8.041e-03, 2.502e-02, 6.603e-02, 1.422e-01, -5.744e-02, 2.975e-03, -3.393e-02, 2.418e-01, 3.547e-02, 1.473e-03, -5.479e-03, 2.087e-01, 9.054e-02, -5.279e-02, -2.166e-01, -3.318e-01)); + r += mul(s2_6, M4(-5.745e-02, 5.943e-02, 3.035e-02, 9.155e-03, -2.198e-02, 1.365e-02, 4.023e-04, -9.323e-03, -5.497e-02, -1.600e-02, -3.127e-02, -2.707e-02, -4.076e-02, -6.445e-02, -1.111e-01, -2.571e-02)); + r += mul(s2_7, M4(-3.950e-02, -7.730e-02, 1.813e-02, -4.415e-02, 5.548e-02, -9.475e-02, -8.720e-02, 5.763e-02, -8.685e-02, 2.206e-01, 2.090e-01, 2.510e-02, -3.670e-02, -2.902e-01, 3.033e-01, 4.140e-02)); + r += mul(s2_8, M4(4.482e-02, 8.381e-02, 2.472e-01, 6.329e-02, 1.484e-02, -3.689e-02, -1.033e-01, -1.513e-01, -4.236e-03, -1.259e-02, -3.677e-02, -4.812e-02, 4.726e-02, -3.249e-02, -2.763e-02, -1.101e-01)); + r += mul(s3_0, M4(-3.101e-02, 4.627e-02, 1.449e-02, -3.616e-03, -5.091e-02, 6.408e-02, 1.786e-02, -1.482e-02, -3.041e-02, 3.234e-02, -1.162e-02, -2.272e-02, 7.108e-03, -9.229e-03, 2.850e-02, 1.415e-02)); + r += mul(s3_1, M4(-5.226e-02, 1.188e-01, 5.643e-02, 6.820e-02, 6.848e-03, 8.559e-02, 4.529e-02, 9.235e-02, -2.974e-03, -2.439e-02, 1.520e-02, 9.824e-03, -5.821e-02, 2.097e-02, 8.484e-03, 5.340e-03)); + r += mul(s3_2, M4(1.145e-02, -3.415e-02, -3.814e-02, -1.144e-01, 2.474e-03, 2.845e-02, 5.365e-02, 2.777e-02, -7.355e-03, 4.104e-03, -4.118e-02, -3.890e-02, 1.295e-02, -1.091e-02, 1.410e-02, -5.293e-02)); + r += mul(s3_3, M4(-5.924e-02, -2.347e-03, 4.579e-02, 3.097e-02, -8.237e-03, -3.171e-02, 1.003e-01, -3.099e-02, 8.427e-02, -1.607e-01, -2.136e-02, 5.186e-02, 5.234e-02, 8.716e-02, -2.204e-02, -8.407e-03)); + r += mul(s3_4, M4(-3.137e-02, -6.165e-02, -4.790e-02, -2.918e-03, -5.244e-02, -7.952e-02, -2.419e-01, -9.282e-02, 1.111e-01, 1.613e-02, -6.698e-02, -9.656e-02, -6.602e-03, 1.187e-01, 9.135e-02, 1.027e-01)); + r += mul(s3_5, M4(1.183e-02, 4.164e-02, -1.076e-02, -1.707e-02, 1.665e-02, -1.242e-02, 6.418e-02, 1.193e-02, -7.299e-02, 1.017e-01, 1.254e-01, 1.039e-01, -2.898e-02, -1.810e-02, 2.075e-02, -3.683e-03)); + r += mul(s3_6, M4(-2.723e-02, -6.785e-02, -6.557e-02, -1.114e-02, 7.214e-02, 1.238e-02, 1.335e-03, 2.707e-02, 5.311e-02, 5.707e-02, -1.330e-02, 9.797e-03, -5.754e-02, -1.093e-02, 8.026e-02, -9.979e-03)); + r += mul(s3_7, M4(-2.533e-03, -1.139e-01, 2.439e-02, -1.492e-02, -6.427e-02, 3.910e-03, 3.283e-02, 5.846e-02, 4.403e-02, -2.133e-03, 3.179e-02, 1.806e-03, 1.629e-02, -6.802e-02, 9.503e-03, -2.512e-02)); + r += mul(s3_8, M4(2.353e-02, -2.638e-02, 4.675e-02, 2.404e-02, 4.130e-02, 1.884e-02, 1.145e-01, 3.410e-02, -3.601e-02, -9.180e-02, -7.135e-02, -9.899e-02, -9.522e-03, 3.465e-02, 4.162e-02, 7.758e-03)); + r += mul(s4_0, M4(-1.596e-01, 7.351e-04, -5.917e-02, 9.830e-02, -2.920e-02, -2.196e-02, -2.547e-02, -6.508e-03, -5.261e-03, 2.840e-02, 2.271e-02, 4.707e-03, 3.022e-01, 7.367e-02, 9.815e-02, -4.388e-02)); + r += mul(s4_1, M4(-5.155e-03, 2.948e-02, 1.522e-02, -2.238e-02, 1.755e-02, -2.385e-02, -1.528e-02, -7.690e-02, 2.317e-02, 9.707e-02, 3.772e-02, 4.510e-02, 7.763e-02, -1.526e-01, -3.017e-02, 2.790e-01)); + r += mul(s4_2, M4(8.279e-03, -8.619e-02, 4.243e-02, 3.116e-02, -3.015e-02, -1.552e-02, 1.076e-02, -3.931e-02, 1.595e-02, 4.532e-02, -7.481e-03, 4.177e-02, 3.674e-02, 1.605e-03, 5.683e-02, 8.314e-02)); + r += mul(s4_3, M4(1.502e-01, 4.229e-01, 1.655e-01, -8.824e-02, 1.440e-02, 1.396e-02, 1.824e-02, 5.334e-02, -4.940e-02, 3.722e-02, 1.545e-02, -5.093e-02, 8.613e-02, 1.342e-01, -8.895e-03, -5.035e-02)); + r += mul(s4_4, M4(2.528e-01, -9.416e-03, 1.601e-01, 4.679e-01, 3.810e-02, 7.959e-03, -1.754e-03, 1.858e-02, -4.325e-02, -1.131e-01, -4.183e-02, -1.017e-01, -4.375e-02, 3.145e-02, -1.272e-01, 8.395e-02)); + r += mul(s4_5, M4(-2.072e-01, 1.731e-01, 2.855e-01, 2.574e-01, -4.553e-02, 8.366e-02, 6.631e-02, -5.029e-02, 3.748e-02, 2.669e-03, 1.499e-03, -1.058e-01, 2.305e-02, -1.007e-02, 5.451e-02, 3.447e-02)); + r += mul(s4_6, M4(-1.253e-02, -1.557e-02, 2.680e-01, -4.947e-02, 9.106e-03, -4.863e-02, 3.862e-02, 2.887e-03, 5.714e-02, 1.407e-02, -5.319e-04, 3.290e-02, 9.566e-02, 1.003e-01, 2.218e-02, -2.021e-02)); + r += mul(s4_7, M4(-1.862e-01, 7.949e-01, -4.100e-02, -1.616e-01, 1.377e-02, -7.986e-02, 3.415e-04, 4.065e-04, -2.645e-02, -1.855e-01, 1.605e-02, 4.846e-02, -6.506e-03, 5.088e-03, 4.173e-02, 2.327e-02)); + r += mul(s4_8, M4(-1.919e-01, -9.826e-03, 2.859e-01, 3.247e-02, 1.377e-02, -1.271e-01, 5.140e-02, -1.924e-02, 3.601e-02, 7.237e-02, 3.313e-02, 1.073e-01, 2.213e-02, 1.931e-02, -3.036e-02, -1.857e-02)); + r += mul(s5_0, M4(2.811e-02, -3.688e-02, -1.893e-02, -1.153e-02, 4.608e-02, 7.400e-04, 9.096e-05, 8.831e-03, 4.843e-02, -3.175e-02, 2.007e-03, 3.164e-03, -2.815e-02, -3.291e-02, 6.942e-03, -7.922e-03)); + r += mul(s5_1, M4(-6.265e-02, -3.621e-02, -1.263e-02, 1.924e-02, 5.043e-02, -8.130e-02, -2.655e-02, -3.845e-02, -2.888e-02, -3.681e-02, -3.308e-02, -6.232e-02, 9.469e-03, -1.419e-01, -6.398e-02, 2.439e-03)); + r += mul(s5_2, M4(7.006e-03, -8.489e-03, 1.088e-02, -1.893e-02, -1.850e-02, 1.441e-02, 8.278e-03, 1.588e-02, -1.311e-02, 2.058e-02, 1.626e-02, 2.249e-02, 1.278e-02, -1.710e-02, -1.824e-02, -4.346e-02)); + r += mul(s5_3, M4(-1.600e-02, 4.441e-02, -2.673e-02, 5.737e-02, 1.657e-02, -7.720e-02, -2.863e-02, 3.338e-02, -6.272e-02, -4.107e-02, -3.531e-02, -6.193e-02, -2.302e-01, -8.016e-02, -1.737e-02, -7.314e-02)); + r += mul(s5_4, M4(-5.253e-02, 6.372e-02, 1.257e-01, 2.593e-02, 1.450e-02, -3.240e-02, 2.255e-02, -5.535e-02, 1.490e-01, -6.299e-02, -9.303e-02, 5.202e-02, -6.767e-04, -6.085e-02, -8.085e-02, -4.924e-02)); + r += mul(s5_5, M4(2.291e-02, 1.323e-02, 8.572e-02, -3.387e-02, -3.431e-04, 1.920e-02, 6.956e-03, -9.327e-02, -6.671e-02, -6.480e-02, 1.405e-02, 1.114e-01, -6.012e-03, -1.361e-01, -3.791e-02, 4.829e-02)); + r += mul(s5_6, M4(8.799e-03, 3.142e-02, 8.483e-03, -3.836e-02, 1.978e-01, -1.646e-02, 2.252e-02, -6.048e-02, 4.975e-02, 1.991e-02, 3.369e-02, -2.007e-02, -2.957e-02, -2.680e-03, -2.970e-02, -4.281e-03)); + r += mul(s5_7, M4(-2.848e-02, 3.081e-02, -8.394e-02, -4.064e-03, 2.752e-02, 3.067e-01, -1.155e-01, 9.175e-02, -2.368e-02, -1.767e-01, 7.387e-02, -4.105e-03, -1.972e-02, 2.154e-02, -3.299e-02, 3.965e-02)); + r += mul(s5_8, M4(2.673e-04, 2.480e-02, -3.968e-02, 2.957e-02, -1.786e-02, -1.226e-01, -9.062e-02, 4.356e-02, 1.271e-02, 4.314e-02, -1.928e-01, -1.119e-01, 1.780e-02, 4.065e-03, 8.221e-03, -3.530e-02)); + r += mul(s6_0, M4(2.098e-02, -1.053e-02, 7.348e-03, -2.663e-02, -3.212e-02, 3.080e-02, -4.537e-03, -2.740e-02, 3.162e-02, -2.540e-03, -8.751e-03, -1.148e-02, -4.170e-03, -2.416e-02, -3.985e-03, 5.122e-03)); + r += mul(s6_1, M4(4.269e-02, -1.696e-02, -1.119e-02, 5.508e-02, 5.475e-02, 3.714e-02, -4.380e-02, 1.362e-02, 4.667e-02, -2.880e-02, -1.132e-02, -3.636e-02, -3.802e-02, -3.298e-03, 4.957e-02, -4.383e-02)); + r += mul(s6_2, M4(-2.587e-02, 1.347e-02, -8.682e-03, -7.210e-02, -4.046e-02, 1.120e-02, -4.019e-02, -3.884e-02, 6.137e-02, -7.922e-02, -7.046e-03, 1.565e-01, 1.043e-03, 9.036e-03, 1.292e-02, 8.501e-03)); + r += mul(s6_3, M4(5.580e-02, -2.963e-03, 1.947e-03, -2.473e-03, 1.265e-01, -1.265e-01, 3.623e-02, -7.397e-02, 3.229e-02, 4.138e-02, 4.128e-02, -1.169e-02, 2.586e-02, 2.790e-02, 4.328e-02, -3.918e-02)); + r += mul(s6_4, M4(6.678e-02, 1.508e-01, -3.867e-02, 5.655e-02, 4.361e-01, -1.392e-01, -3.821e-02, 1.118e-01, -2.316e-01, 1.216e-02, 1.708e-01, -6.198e-02, -2.788e-02, 2.630e-02, -7.593e-02, 4.442e-02)); + r += mul(s6_5, M4(-2.494e-02, 7.203e-02, 9.884e-02, 4.679e-02, -1.412e-01, 1.266e-01, 1.616e-01, 5.192e-01, 7.496e-02, -6.297e-02, -7.291e-02, -2.761e-03, 2.747e-03, 2.842e-02, -1.438e-02, 8.791e-02)); + r += mul(s6_6, M4(-6.034e-03, -1.988e-02, 9.423e-03, -8.018e-03, 5.482e-02, 4.266e-02, 2.008e-02, 2.532e-02, 4.935e-02, -4.054e-02, -1.212e-02, -2.818e-03, -1.251e-02, -7.254e-02, 5.047e-02, -1.091e-02)); + r += mul(s6_7, M4(-4.368e-03, -2.020e-02, 1.459e-02, -2.116e-02, 1.027e-02, 3.926e-01, 3.189e-01, -3.561e-02, -5.842e-02, 5.650e-02, -1.034e-01, -3.434e-02, -7.243e-02, -1.526e-02, 5.262e-03, -1.506e-02)); + r += mul(s6_8, M4(-4.331e-02, -1.012e-02, -5.957e-02, 2.021e-02, -2.783e-02, -4.619e-02, -1.793e-01, -1.121e-01, 5.735e-02, 3.291e-02, 1.028e-02, 3.021e-02, 1.916e-02, -5.925e-02, 9.729e-02, -3.139e-02)); + r += mul(s7_0, M4(8.811e-02, -3.230e-02, -2.340e-02, 1.007e-02, 2.262e-02, 1.299e-02, 1.861e-02, 3.002e-03, -4.389e-02, 2.304e-02, 1.989e-02, 4.361e-02, 1.100e-02, 2.912e-02, -6.940e-04, -3.995e-03)); + r += mul(s7_1, M4(-2.878e-02, -1.447e-02, -8.574e-02, 8.482e-02, 7.547e-02, -5.559e-02, -2.542e-02, -1.200e-02, -9.488e-02, 2.377e-02, 7.290e-02, -1.909e-01, 4.442e-02, -6.422e-02, -1.878e-02, 2.297e-02)); + r += mul(s7_2, M4(-1.161e-02, 3.624e-02, 2.303e-02, 4.236e-02, -4.980e-02, 1.092e-02, 2.231e-02, -3.462e-02, 2.148e-02, -7.254e-02, -9.069e-02, 5.621e-02, 2.326e-02, 2.044e-02, 1.256e-03, -3.556e-02)); + r += mul(s7_3, M4(1.694e-01, -6.393e-04, 2.174e-02, 5.228e-02, -7.491e-02, 3.133e-02, 7.240e-02, -1.048e-02, 1.087e-01, -9.066e-02, 1.845e-02, 1.804e-03, -5.398e-03, -6.133e-02, 4.277e-02, -2.896e-02)); + r += mul(s7_4, M4(4.209e-02, 1.779e-01, -1.103e-02, 8.013e-03, 2.688e-01, -3.924e-02, 2.387e-02, 1.478e-02, -3.389e-01, 5.841e-02, -6.463e-02, 1.170e-01, -2.725e-01, 1.440e-01, -1.064e-02, -5.789e-02)); + r += mul(s7_5, M4(5.661e-02, -1.287e-02, -1.040e-01, 2.762e-01, -8.414e-02, -2.199e-02, 7.695e-02, 1.284e-01, 2.643e-02, -1.841e-02, -3.880e-03, -2.730e-01, 3.362e-02, 1.292e-03, -6.430e-02, 1.298e-01)); + r += mul(s7_6, M4(5.765e-02, 1.471e-01, -6.371e-02, 6.783e-03, -2.742e-02, -9.306e-02, 4.691e-02, 2.644e-02, 4.329e-02, -1.373e-01, 3.846e-02, -4.862e-03, 8.690e-03, 1.078e-02, -4.893e-02, 2.456e-02)); + r += mul(s7_7, M4(-1.123e-02, -3.080e-02, 1.587e-01, 7.075e-02, 4.285e-02, 1.250e-01, 4.928e-02, -4.179e-02, -1.304e-01, -2.597e-01, -1.338e-01, -7.231e-02, -3.581e-03, -2.680e-02, 1.008e-01, -1.061e-02)); + r += mul(s7_8, M4(-1.404e-02, -6.516e-02, -1.755e-02, -1.282e-01, 4.673e-03, 5.937e-03, -4.064e-02, 2.759e-02, 2.023e-02, -7.021e-02, -1.563e-02, 2.361e-02, 1.991e-02, -1.549e-01, 1.831e-01, 2.950e-02)); + r += V4(-4.202e-04, 6.301e-03, 6.510e-03, 1.881e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.076e-01, 9.459e-02, 5.018e-02, 6.361e-02, -1.552e-03, -1.362e-02, -5.013e-03, 2.647e-02, 4.040e-02, -2.398e-02, 3.036e-02, 6.250e-02, 6.878e-02, 1.299e-02, 2.376e-03, -5.089e-02)); + r += mul(s0_1, M4(-3.186e-02, -1.003e-01, 8.043e-03, -1.930e-02, 9.658e-02, 1.229e-01, 1.399e-02, 4.080e-02, -6.420e-02, 2.529e-02, 6.975e-02, 9.162e-02, -1.342e-01, -7.190e-03, 6.387e-02, -3.481e-02)); + r += mul(s0_2, M4(1.963e-02, 3.109e-02, -7.229e-02, 4.592e-03, -1.123e-01, 2.408e-02, -3.028e-02, 3.707e-02, 8.276e-02, 3.451e-02, -9.454e-03, -3.834e-02, 1.974e-02, -2.533e-02, 5.984e-03, 8.251e-02)); + r += mul(s0_3, M4(-1.385e-01, 1.865e-01, 7.740e-02, 1.387e-01, 4.463e-02, 1.012e-01, 7.993e-02, 2.930e-02, -5.065e-02, 6.022e-03, -8.449e-02, 9.553e-02, 7.640e-02, 6.897e-02, -7.650e-02, 6.023e-02)); + r += mul(s0_4, M4(7.506e-02, 4.216e-01, 1.997e-01, -6.951e-02, 4.671e-01, -1.686e-01, 2.571e-01, -2.584e-01, 1.607e-01, 3.681e-02, 4.867e-03, -3.146e-01, 1.337e-01, 1.016e-01, 4.053e-01, 8.276e-02)); + r += mul(s0_5, M4(2.423e-02, 1.422e-01, -1.150e-03, -3.969e-03, 5.352e-02, -4.009e-03, -3.000e-02, 1.944e-02, -8.228e-02, 1.139e-01, 1.020e-01, 1.204e-01, -7.730e-02, -9.062e-02, 6.823e-02, -6.768e-02)); + r += mul(s0_6, M4(-2.356e-02, -1.025e-01, 9.710e-02, -3.185e-02, 5.413e-02, 9.684e-02, 1.312e-01, 1.200e-01, 3.235e-02, -5.876e-02, 1.181e-01, -2.789e-02, -1.420e-02, -1.891e-02, -4.461e-02, -4.793e-03)); + r += mul(s0_7, M4(1.376e-01, -1.486e-01, 1.329e-01, 2.389e-01, 5.239e-02, 1.317e-01, 2.417e-01, -2.170e-02, 9.953e-03, -9.714e-02, 1.253e-01, 1.715e-01, -2.740e-02, -3.472e-02, -2.705e-02, 4.845e-02)); + r += mul(s0_8, M4(-1.929e-01, -1.534e-01, -1.140e-01, 6.699e-02, 1.044e-01, 6.899e-02, 2.676e-02, 1.343e-01, 1.196e-02, -4.285e-02, 2.777e-02, -1.889e-02, 1.189e-01, 1.513e-03, 7.006e-02, -4.608e-02)); + r += mul(s1_0, M4(4.727e-02, 4.907e-02, -4.865e-02, 4.088e-02, 7.322e-02, 4.079e-02, -1.010e-02, 1.619e-02, -4.750e-03, 5.155e-02, 2.044e-02, 6.424e-02, 5.315e-02, 2.716e-02, -4.774e-02, -1.353e-02)); + r += mul(s1_1, M4(7.921e-02, 7.939e-02, 5.319e-02, -5.682e-02, -6.991e-02, 1.192e-01, 6.963e-02, -4.258e-02, 1.701e-01, 6.614e-02, -8.819e-02, -8.870e-03, -5.026e-02, 1.030e-01, 1.329e-02, -1.035e-01)); + r += mul(s1_2, M4(-3.164e-02, -3.713e-02, -2.748e-02, -7.602e-03, 6.976e-02, 2.410e-02, -3.311e-02, -1.760e-02, -8.627e-02, 4.056e-02, 1.148e-01, -1.721e-02, 6.658e-03, -5.652e-02, -1.008e-01, 6.403e-02)); + r += mul(s1_3, M4(-2.600e-02, -9.065e-02, -1.888e-02, 5.124e-02, -1.238e-01, -1.830e-01, -2.969e-02, 5.243e-02, -1.284e-01, -7.646e-02, 4.992e-02, 2.685e-03, 3.629e-02, 4.630e-02, -1.430e-01, 3.552e-02)); + r += mul(s1_4, M4(-2.979e-01, 3.411e-02, 1.029e-01, 3.688e-02, 1.793e-01, -2.283e-01, 1.243e-01, 2.995e-02, -2.282e-01, -1.471e-01, -4.377e-01, 8.713e-02, 8.873e-02, 4.323e-02, -1.517e-01, 1.898e-01)); + r += mul(s1_5, M4(-1.356e-02, -4.639e-02, -5.458e-03, -1.772e-02, -7.631e-02, -1.901e-02, 7.485e-02, 1.410e-03, 9.795e-02, 8.527e-03, 5.029e-02, -4.242e-02, -8.664e-02, -2.813e-02, -1.742e-01, 1.645e-01)); + r += mul(s1_6, M4(-2.665e-02, -1.223e-01, 8.783e-02, -6.537e-03, 2.250e-03, -7.386e-03, 3.235e-04, 8.568e-02, 9.910e-02, -9.138e-02, 4.718e-03, -6.234e-02, -6.278e-03, 6.142e-03, -4.212e-02, 2.019e-02)); + r += mul(s1_7, M4(-2.058e-01, -1.362e-01, -5.059e-03, 2.211e-02, 1.474e-02, -5.797e-03, 1.787e-02, -4.932e-02, 5.892e-02, -1.100e-02, 2.070e-01, 6.475e-02, -8.533e-02, -1.685e-01, -1.017e-01, 2.089e-02)); + r += mul(s1_8, M4(-1.094e-01, -3.021e-02, -4.875e-02, 4.281e-02, 6.425e-02, 3.246e-02, 5.784e-02, -1.173e-01, -9.465e-02, -8.862e-02, -5.373e-02, 3.057e-02, 8.867e-02, -5.970e-02, 4.623e-02, 8.439e-02)); + r += mul(s2_0, M4(1.587e-01, -1.166e-01, 7.347e-02, 5.028e-02, -2.301e-02, -1.425e-02, -4.434e-02, -3.650e-02, -5.876e-02, 5.358e-03, -6.112e-02, -3.876e-02, -9.871e-02, -9.629e-02, 2.887e-02, -5.464e-02)); + r += mul(s2_1, M4(2.192e-01, -8.572e-02, -1.725e-01, -2.315e-01, -4.065e-02, -1.574e-02, -4.730e-02, 4.133e-02, -1.877e-03, -1.450e-02, -4.131e-02, 5.251e-02, -7.241e-02, -2.118e-01, -1.658e-01, -1.554e-02)); + r += mul(s2_2, M4(-2.951e-02, -6.779e-02, 3.084e-02, -2.750e-01, -1.586e-02, -4.853e-02, 1.521e-02, 1.087e-02, -3.765e-02, 8.129e-03, 4.653e-02, -2.222e-02, 7.725e-03, -7.037e-04, -2.007e-02, -8.173e-03)); + r += mul(s2_3, M4(1.025e-01, 2.878e-01, 4.804e-02, 1.290e-01, -4.854e-02, 3.440e-02, -5.406e-02, 4.039e-02, 4.142e-02, -1.714e-01, 1.287e-02, -4.413e-02, 9.058e-03, -1.193e-01, 9.933e-02, -1.257e-02)); + r += mul(s2_4, M4(-6.006e-02, 1.470e-01, -1.725e-01, 8.381e-02, 1.430e-01, -2.764e-01, 2.258e-01, -3.232e-02, -5.532e-02, -3.717e-02, -4.678e-01, 2.989e-01, 2.234e-02, 3.625e-03, -4.473e-01, -1.063e-03)); + r += mul(s2_5, M4(5.328e-02, 9.418e-03, 4.552e-02, -9.636e-02, 1.165e-01, -5.062e-02, -3.916e-02, 1.290e-01, 6.573e-02, 8.872e-03, 3.454e-02, -7.966e-02, -5.075e-02, -2.213e-02, 1.388e-01, 1.829e-01)); + r += mul(s2_6, M4(3.882e-02, 2.023e-02, 2.501e-02, 9.446e-02, -2.924e-03, -4.121e-02, -8.625e-02, 4.996e-03, -3.805e-02, 3.746e-02, 1.038e-01, 3.066e-04, 1.063e-03, 6.865e-02, -6.979e-02, 3.771e-02)); + r += mul(s2_7, M4(5.676e-03, 2.452e-02, -8.635e-02, -7.576e-02, -1.391e-01, 1.490e-01, 2.582e-02, -2.445e-01, -1.249e-01, -2.115e-02, -1.512e-01, 3.049e-01, -6.295e-02, -6.138e-02, -2.572e-01, -3.793e-01)); + r += mul(s2_8, M4(-1.268e-03, 2.520e-02, -3.155e-02, -9.465e-02, 3.650e-02, 1.035e-02, 3.947e-02, 1.959e-01, -7.596e-02, -2.304e-02, 5.939e-03, -4.067e-02, -7.969e-02, -6.106e-02, 3.986e-02, -4.797e-02)); + r += mul(s3_0, M4(3.360e-02, 9.741e-02, -3.713e-02, 2.114e-02, 7.099e-04, 7.903e-02, -3.197e-02, 1.065e-02, -2.551e-02, -1.802e-02, -2.769e-02, -1.930e-02, 1.047e-02, 3.540e-02, 1.322e-02, -1.556e-04)); + r += mul(s3_1, M4(4.761e-02, 1.757e-01, 4.040e-02, -7.825e-02, 7.368e-02, 1.965e-01, 1.090e-02, 3.098e-02, 1.595e-02, 1.373e-02, 1.417e-02, -7.205e-02, -1.419e-02, 4.930e-02, 8.902e-02, 3.248e-04)); + r += mul(s3_2, M4(-7.793e-02, -5.996e-04, -2.884e-02, 7.126e-02, 4.825e-02, 5.384e-02, 1.636e-02, -2.258e-02, -4.520e-02, -5.878e-02, -4.555e-02, 6.671e-02, -2.214e-02, -1.812e-02, -3.157e-02, 1.006e-02)); + r += mul(s3_3, M4(4.366e-02, 5.893e-03, -4.569e-02, 8.509e-03, -2.388e-02, -4.327e-02, 8.083e-03, -2.875e-02, 6.482e-03, -2.898e-02, 1.219e-01, 3.075e-02, 4.515e-02, 1.265e-02, -1.177e-02, 1.695e-02)); + r += mul(s3_4, M4(-4.587e-02, -4.540e-02, -4.528e-02, -3.017e-02, -9.578e-02, -1.165e-01, 6.568e-03, 7.609e-02, -5.974e-02, -1.383e-01, 6.412e-03, -1.829e-02, -3.406e-02, -8.001e-02, -5.122e-02, 1.077e-01)); + r += mul(s3_5, M4(3.416e-02, -3.407e-02, 8.393e-02, -1.160e-01, -9.369e-03, 3.058e-02, 4.454e-02, -9.247e-02, 9.737e-02, -6.020e-02, -1.013e-01, -2.782e-02, 8.023e-02, -7.069e-02, -1.847e-03, -5.169e-02)); + r += mul(s3_6, M4(7.537e-03, -6.927e-02, -8.231e-02, 7.270e-02, 4.258e-02, -4.721e-02, 3.532e-02, 2.222e-02, -3.422e-02, 4.518e-02, 9.489e-02, 5.504e-02, 7.420e-03, -1.408e-02, -7.131e-02, -3.159e-02)); + r += mul(s3_7, M4(-6.396e-03, -1.854e-02, 8.382e-02, -6.459e-02, 5.731e-02, -9.457e-02, -2.301e-02, -4.219e-02, -3.609e-02, 7.196e-02, 9.962e-02, -1.094e-01, 2.650e-02, 6.392e-03, 1.478e-02, 9.639e-02)); + r += mul(s3_8, M4(4.995e-02, -1.478e-02, -2.014e-02, -2.019e-02, 7.100e-02, -6.871e-04, 5.266e-02, -1.085e-01, -7.515e-02, 2.356e-02, -4.473e-02, 2.427e-01, -5.491e-03, 2.872e-02, 3.165e-02, 5.489e-02)); + r += mul(s4_0, M4(-3.458e-02, -1.959e-01, -1.213e-01, -2.589e-01, -3.968e-02, -5.974e-02, -1.907e-02, -7.278e-03, 1.537e-02, 4.771e-02, 2.328e-02, 2.913e-02, 1.330e-01, -4.545e-02, 5.589e-03, 1.417e-02)); + r += mul(s4_1, M4(3.301e-01, 1.127e-01, -2.534e-02, -3.332e-02, -4.439e-02, -8.422e-02, -6.904e-02, -2.112e-02, 5.374e-02, 1.986e-01, -4.109e-02, 3.611e-02, 3.623e-01, 1.059e-02, 3.093e-01, 3.962e-02)); + r += mul(s4_2, M4(-1.211e-01, -9.423e-02, 9.185e-02, -3.221e-02, -6.293e-02, -6.185e-02, -2.286e-03, 5.499e-03, 9.204e-02, 9.981e-02, 7.664e-02, -2.940e-02, 6.183e-02, 3.852e-02, -5.530e-02, 4.065e-02)); + r += mul(s4_3, M4(1.963e-03, 1.767e-01, -2.015e-01, -7.453e-02, 3.252e-02, 1.308e-02, 8.874e-02, 5.307e-02, -8.716e-02, -6.464e-02, 1.080e-02, -2.419e-02, 9.475e-02, 2.395e-01, 8.134e-02, -4.515e-02)); + r += mul(s4_4, M4(-1.869e-02, 4.839e-02, 5.578e-01, -2.113e-01, -1.419e-01, -1.454e-04, -5.902e-02, -1.506e-02, -2.261e-01, -3.061e-01, -2.529e-01, 4.105e-02, 1.003e-01, 1.875e-01, 2.203e-01, 7.133e-02)); + r += mul(s4_5, M4(5.376e-01, -4.584e-02, 4.556e-02, -4.455e-01, 5.528e-02, -1.674e-02, -8.416e-02, 3.166e-02, -1.333e-01, 2.677e-02, 1.385e-01, 7.247e-02, -3.433e-02, 6.367e-02, 7.385e-03, -4.556e-02)); + r += mul(s4_6, M4(-1.279e-02, -9.193e-02, 1.330e-01, -3.062e-01, -1.544e-02, -2.016e-02, 6.331e-02, -3.779e-02, 6.792e-02, -9.991e-03, 5.801e-02, 4.266e-03, -3.719e-02, -3.649e-02, 5.247e-02, 8.472e-02)); + r += mul(s4_7, M4(4.007e-01, 4.785e-02, 1.757e-01, -2.749e-01, -1.780e-02, -9.400e-02, -1.440e-01, -1.256e-03, 1.011e-01, -1.379e-01, -6.547e-02, 2.229e-02, -2.131e-02, -7.870e-02, -1.186e-02, -6.191e-02)); + r += mul(s4_8, M4(-2.379e-01, 2.552e-02, -3.286e-01, 9.823e-01, -8.046e-03, 5.090e-03, 1.685e-02, 1.315e-01, 4.574e-02, 6.229e-02, 8.783e-02, -1.363e-01, -4.713e-03, -1.715e-02, 2.698e-02, 9.398e-03)); + r += mul(s5_0, M4(-1.468e-02, -2.028e-02, 1.466e-02, -4.579e-02, 1.535e-02, 3.095e-03, 8.097e-02, 3.445e-02, -4.863e-02, -1.563e-02, 2.629e-02, -3.146e-02, 1.701e-02, -4.191e-03, -3.601e-02, -1.665e-02)); + r += mul(s5_1, M4(-3.403e-02, -9.204e-02, -5.272e-02, -8.214e-03, 4.176e-02, -4.487e-02, 3.617e-02, -2.815e-02, -1.425e-01, -3.422e-02, -5.875e-02, 3.796e-02, -6.358e-02, -2.590e-01, -9.751e-03, -2.323e-02)); + r += mul(s5_2, M4(3.660e-03, -1.702e-02, -9.442e-04, 2.394e-02, -2.582e-02, -2.589e-02, -5.776e-02, -1.213e-02, 7.397e-02, 2.383e-02, 2.967e-02, -3.611e-02, -4.360e-02, -4.814e-02, 6.573e-02, 2.316e-02)); + r += mul(s5_3, M4(1.052e-01, 6.102e-02, -1.410e-03, -2.740e-02, 2.966e-02, 1.002e-03, 1.699e-01, 3.489e-02, -4.189e-02, -7.594e-02, -9.216e-02, -4.146e-02, -5.124e-02, -8.469e-02, -1.911e-01, -3.356e-02)); + r += mul(s5_4, M4(1.532e-02, 1.675e-01, -6.142e-02, 5.359e-03, -3.161e-01, -1.008e-02, -6.061e-02, -6.506e-02, 1.185e-01, -1.872e-01, -1.266e-01, 2.179e-02, -1.714e-02, 5.675e-02, -1.877e-02, -2.057e-02)); + r += mul(s5_5, M4(3.497e-02, 9.410e-02, -4.845e-02, -1.955e-02, -9.722e-02, -9.591e-03, -9.701e-03, 6.356e-02, -8.054e-03, 4.357e-02, -8.791e-02, 8.673e-02, -2.616e-03, -1.464e-02, 3.709e-02, -4.116e-02)); + r += mul(s5_6, M4(-8.170e-02, 1.745e-02, 4.576e-03, -1.441e-02, -1.709e-02, -5.536e-02, 2.046e-01, 4.309e-02, 3.489e-02, 3.760e-02, -1.835e-02, -2.928e-02, 3.855e-02, -2.936e-02, -5.064e-04, 4.572e-03)); + r += mul(s5_7, M4(-1.266e-02, -2.220e-02, -4.455e-02, 1.910e-02, 2.502e-01, 1.983e-01, -2.494e-02, -8.154e-02, -1.582e-01, -1.609e-02, 2.153e-02, -5.182e-02, -4.083e-03, -1.843e-02, -7.066e-02, -1.014e-01)); + r += mul(s5_8, M4(9.087e-03, 1.330e-03, -3.767e-04, -4.360e-02, 3.942e-02, -4.327e-02, 7.721e-02, 6.839e-02, -3.861e-02, 3.624e-02, 1.080e-02, 1.728e-01, -3.517e-02, 1.390e-02, 2.920e-02, 2.377e-02)); + r += mul(s6_0, M4(-1.396e-02, 4.944e-02, -1.320e-02, 1.199e-02, -3.217e-02, -3.029e-02, -9.161e-02, 1.708e-04, -7.892e-03, -4.578e-02, 9.429e-03, -3.443e-02, -2.402e-02, -2.350e-02, -6.804e-02, 1.150e-02)); + r += mul(s6_1, M4(-8.674e-03, 5.924e-02, 4.191e-02, 3.338e-02, 4.583e-02, 2.542e-02, 6.378e-02, 4.860e-02, 3.023e-02, -1.641e-01, 1.021e-01, -7.298e-03, 1.726e-02, 3.585e-02, -2.231e-02, -7.084e-02)); + r += mul(s6_2, M4(3.300e-02, -8.408e-03, -5.822e-02, -9.899e-02, -5.071e-02, -4.682e-02, -1.381e-02, 4.860e-02, 7.645e-02, -1.057e-01, 3.290e-02, 4.256e-02, -3.318e-02, 4.675e-02, -4.134e-02, 1.627e-02)); + r += mul(s6_3, M4(7.606e-03, -5.919e-02, -7.668e-03, -1.690e-03, -5.880e-02, -2.051e-01, 4.579e-02, -7.223e-02, -1.077e-02, -4.231e-02, -4.106e-02, 4.490e-02, -7.850e-02, -3.918e-03, -5.043e-02, -3.242e-02)); + r += mul(s6_4, M4(8.115e-02, 1.467e-01, 4.984e-02, 2.527e-01, 9.786e-01, -1.904e-01, 6.230e-01, 3.002e-01, -9.438e-02, 1.898e-01, -1.968e-02, -1.330e-01, 7.036e-02, -6.077e-02, 1.128e-01, -2.748e-03)); + r += mul(s6_5, M4(7.808e-02, 1.051e-01, -1.295e-01, 5.095e-03, 1.513e-01, -1.094e-01, -9.200e-02, -1.044e-01, 7.178e-02, 3.189e-02, 1.993e-01, -1.136e-01, 5.804e-02, -6.015e-02, -3.642e-02, -9.688e-02)); + r += mul(s6_6, M4(-2.588e-02, 1.746e-02, -2.700e-02, 1.074e-02, 6.922e-02, -2.008e-02, 2.125e-02, -8.833e-02, -3.047e-02, -1.309e-02, 1.033e-01, -5.231e-02, 1.709e-02, -1.158e-02, -2.972e-02, -7.904e-02)); + r += mul(s6_7, M4(2.365e-02, -5.650e-02, -7.233e-02, 1.694e-01, -2.315e-01, -3.880e-02, 4.061e-02, 1.881e-01, -2.348e-03, 1.333e-01, 3.504e-02, 7.788e-02, -4.871e-02, -4.378e-02, 2.568e-03, -1.388e-02)); + r += mul(s6_8, M4(-9.590e-03, -3.210e-02, 4.876e-02, 1.297e-03, -7.297e-02, -1.207e-02, -5.161e-02, 1.450e-01, 4.795e-02, 8.561e-02, -5.993e-02, -9.444e-02, 7.645e-03, 6.642e-02, 3.855e-02, -3.747e-03)); + r += mul(s7_0, M4(1.993e-02, -5.599e-02, 1.307e-02, 5.126e-02, 2.252e-03, -5.554e-02, -4.524e-02, 2.811e-02, -2.211e-02, 2.689e-02, 9.672e-03, -7.220e-02, 4.916e-03, -4.126e-02, -3.318e-02, 3.250e-03)); + r += mul(s7_1, M4(-1.198e-01, 2.290e-02, -9.118e-02, 1.422e-01, -4.106e-02, 9.354e-03, 6.920e-02, 1.073e-02, -1.082e-01, -1.340e-01, -1.270e-01, -7.924e-02, -5.469e-02, 2.681e-02, 8.133e-02, 8.518e-03)); + r += mul(s7_2, M4(3.376e-02, -1.809e-02, -2.202e-02, -2.870e-02, 4.718e-03, -1.995e-02, -2.811e-02, -1.792e-02, 1.182e-02, -5.890e-02, 2.217e-02, 4.638e-02, -8.611e-02, 8.245e-02, 8.950e-02, -2.750e-03)); + r += mul(s7_3, M4(9.746e-02, 4.480e-02, 1.120e-01, 5.923e-02, -1.928e-02, -1.379e-01, -4.855e-02, -3.636e-02, -5.096e-02, -9.697e-02, -1.329e-02, 8.373e-03, -7.251e-02, -8.911e-02, 5.372e-02, -4.751e-02)); + r += mul(s7_4, M4(4.912e-01, 1.320e-01, -2.570e-01, 1.634e-02, 1.529e-02, -1.948e-01, -1.609e-02, 1.018e-01, -1.592e-01, -9.952e-02, -3.147e-01, -2.723e-01, -6.472e-02, 9.893e-02, -9.035e-02, 1.362e-01)); + r += mul(s7_5, M4(9.074e-02, 4.720e-02, 5.895e-03, 6.767e-02, 7.657e-02, -7.854e-02, -1.175e-01, -2.531e-02, -7.094e-02, 2.834e-03, 9.916e-02, -1.150e-01, 1.608e-01, -1.808e-01, 1.651e-01, -4.608e-02)); + r += mul(s7_6, M4(-2.852e-02, 9.908e-02, 5.067e-02, 1.720e-01, 9.137e-03, -2.604e-02, 6.387e-02, -1.178e-01, -4.066e-02, -3.658e-02, 6.847e-02, -8.960e-02, 4.407e-02, -3.909e-02, 6.733e-02, 1.338e-02)); + r += mul(s7_7, M4(-7.476e-02, 6.065e-03, -4.672e-02, 1.024e-02, -3.047e-02, 2.018e-01, -9.026e-02, 2.397e-02, -1.401e-01, -1.550e-01, -6.817e-02, 1.072e-01, 7.496e-02, -3.902e-02, 4.703e-02, 1.025e-01)); + r += mul(s7_8, M4(-7.574e-02, 8.433e-03, 1.116e-01, -2.104e-01, 4.504e-02, 7.003e-02, -3.852e-02, 7.617e-02, -9.383e-03, -1.871e-02, -5.760e-02, -8.031e-02, -1.150e-02, -3.233e-03, 1.605e-01, -5.731e-02)); + r += V4(2.115e-03, 4.044e-03, 5.828e-03, 3.265e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.188e-02, -3.332e-02, 8.720e-02, 1.829e-03, -6.758e-02, -5.979e-02, 9.119e-02, 1.884e-02, -6.329e-02, -5.896e-02, 3.922e-02, 1.329e-02, 3.359e-03, -4.119e-02, 1.304e-01, -3.453e-02)); + r += mul(s0_1, M4(4.524e-03, 1.038e-02, -7.542e-02, -2.815e-03, -1.062e-01, 1.727e-02, 1.155e-01, 3.582e-01, 1.361e-02, 2.549e-02, -1.253e-02, -8.946e-02, -2.741e-02, -6.133e-02, 1.343e-03, 1.056e-01)); + r += mul(s0_2, M4(1.166e-02, 7.431e-02, 7.213e-02, 7.082e-02, 2.824e-02, -6.961e-02, 1.072e-01, -1.137e-01, -9.178e-03, 2.803e-02, 1.748e-02, 2.367e-02, -3.033e-02, 3.265e-03, 2.166e-02, -6.877e-03)); + r += mul(s0_3, M4(-9.045e-02, -3.842e-02, 8.165e-02, -9.603e-02, -1.656e-01, -1.603e-01, 5.166e-03, 5.611e-02, -2.426e-01, -3.644e-02, 5.843e-02, 2.447e-02, -9.501e-02, 1.309e-01, -3.283e-02, 3.567e-02)); + r += mul(s0_4, M4(-2.686e-01, -2.336e-02, 1.422e-01, 4.370e-02, 3.774e-01, -3.523e-02, 5.176e-01, -2.106e-02, 7.873e-02, -1.034e-01, 1.042e-01, -1.486e-02, 2.457e-02, 5.041e-02, 1.460e-01, -1.445e-01)); + r += mul(s0_5, M4(3.611e-02, -1.113e-01, 3.064e-02, -3.519e-02, -9.354e-02, 1.515e-01, -9.649e-02, 3.154e-01, -2.846e-02, -2.897e-02, 2.011e-02, -9.511e-02, 6.145e-02, 8.183e-02, 1.010e-01, -4.757e-02)); + r += mul(s0_6, M4(4.124e-02, 3.306e-02, -4.793e-02, -4.706e-03, -1.112e-01, 4.022e-02, 6.962e-02, 3.748e-02, 3.347e-02, 2.291e-02, 5.088e-03, 1.170e-02, 9.311e-02, -1.557e-01, 3.237e-02, -2.680e-02)); + r += mul(s0_7, M4(2.484e-03, 8.932e-02, 1.036e-01, 3.142e-01, 1.771e-03, -1.162e-01, -8.698e-02, 5.788e-02, 3.005e-02, -1.686e-01, -1.090e-02, 7.850e-02, -1.064e-01, 2.479e-02, -2.199e-02, 2.138e-02)); + r += mul(s0_8, M4(-2.168e-02, -2.943e-02, -3.124e-02, -1.104e-01, -5.848e-02, -2.798e-01, 8.619e-02, -1.055e-02, 7.123e-03, -5.557e-02, 1.454e-03, -1.419e-02, 6.118e-02, 1.648e-02, -2.522e-02, 8.671e-02)); + r += mul(s1_0, M4(-2.472e-02, 2.191e-02, -4.262e-02, -4.035e-03, 9.183e-03, -2.177e-02, -6.007e-02, 6.140e-02, -2.719e-03, -4.530e-03, -6.548e-02, 3.801e-02, -2.606e-02, -4.633e-02, 6.039e-02, -4.108e-04)); + r += mul(s1_1, M4(7.687e-02, -2.714e-02, 5.675e-02, -8.649e-02, 3.078e-02, -3.505e-02, 5.217e-02, -9.711e-02, 2.458e-02, 2.113e-02, -1.036e-01, -1.770e-01, 4.180e-02, -1.399e-01, 9.692e-02, 1.941e-01)); + r += mul(s1_2, M4(-2.535e-02, -3.846e-03, -4.850e-02, -4.418e-03, 1.452e-02, -2.092e-02, 9.281e-03, -5.721e-02, -4.805e-02, -8.908e-02, 6.999e-03, 4.991e-02, 1.403e-02, -7.863e-03, 7.527e-03, -1.596e-01)); + r += mul(s1_3, M4(-4.035e-02, 3.764e-02, 1.273e-02, -2.216e-02, 6.812e-02, -1.512e-02, 6.997e-02, -5.809e-02, 1.579e-02, -6.077e-02, 3.481e-02, -1.458e-03, -7.933e-03, 6.306e-02, -1.131e-01, -8.109e-02)); + r += mul(s1_4, M4(-1.046e-01, 1.092e-01, -1.678e-01, -2.851e-01, 4.912e-02, -3.760e-03, 5.742e-02, 5.613e-02, -6.450e-02, 2.752e-01, 4.494e-02, -3.994e-02, 2.077e-01, 1.700e-01, -7.376e-02, -7.578e-02)); + r += mul(s1_5, M4(-2.043e-02, 6.183e-02, 2.938e-02, -2.012e-03, 1.725e-02, 4.365e-02, 1.063e-01, -7.568e-03, 5.594e-02, -3.278e-02, 4.597e-02, 2.987e-02, -1.881e-02, 5.059e-01, 5.104e-02, -1.365e-01)); + r += mul(s1_6, M4(6.789e-02, -7.871e-03, -4.167e-02, 2.075e-03, -1.833e-02, 1.941e-01, 1.266e-02, 5.510e-02, 3.110e-02, -1.049e-02, -3.576e-02, -7.252e-03, -6.012e-02, 5.847e-02, 2.815e-02, 2.331e-02)); + r += mul(s1_7, M4(-9.442e-02, 6.906e-02, -8.851e-02, -2.485e-02, 5.090e-02, -6.309e-02, 3.697e-02, -1.238e-02, 6.781e-02, -8.338e-02, -3.093e-02, 1.085e-01, 9.117e-02, 3.855e-01, 4.742e-02, -1.132e-02)); + r += mul(s1_8, M4(-1.800e-03, 1.546e-01, -1.963e-02, -7.079e-02, 3.692e-02, 2.564e-02, -1.822e-02, 8.978e-02, -3.236e-02, 1.218e-01, -1.214e-02, -8.239e-02, 5.708e-02, 2.057e-01, 6.424e-02, 8.472e-02)); + r += mul(s2_0, M4(-5.481e-02, -1.404e-01, 8.220e-02, 1.072e-01, 9.481e-03, 2.446e-02, 2.734e-02, -8.670e-02, 1.175e-01, 1.414e-02, 1.080e-02, 1.073e-01, 5.968e-02, -2.128e-03, 1.044e-01, 7.510e-03)); + r += mul(s2_1, M4(3.686e-03, -1.669e-01, -1.364e-01, 1.046e-01, 2.527e-04, -1.904e-02, -2.032e-01, 4.817e-02, 3.698e-02, 9.219e-03, -1.743e-02, -2.651e-01, 1.376e-02, 3.408e-02, -3.703e-02, -2.350e-01)); + r += mul(s2_2, M4(4.511e-02, -6.042e-02, 1.397e-01, -5.209e-02, -3.447e-02, 3.691e-02, -6.187e-03, -5.817e-02, 1.027e-02, -1.841e-02, 1.670e-02, 2.625e-02, 1.474e-02, 6.275e-02, 1.233e-02, 1.266e-01)); + r += mul(s2_3, M4(-1.833e-01, 3.643e-01, 1.313e-01, 8.859e-02, -2.271e-01, 4.529e-02, -2.425e-01, -2.007e-02, 1.616e-01, -9.640e-02, -8.167e-02, -9.877e-02, 1.014e-01, -2.368e-02, 1.173e-01, -4.600e-02)); + r += mul(s2_4, M4(-1.047e-01, 5.489e-01, 2.875e-01, -6.239e-02, -1.576e-01, -8.642e-02, 1.690e-01, -1.704e-01, -8.154e-02, -3.884e-02, -1.718e-01, 2.920e-01, -4.465e-01, -1.001e-01, -5.537e-02, 1.294e-01)); + r += mul(s2_5, M4(3.794e-02, 2.799e-01, 3.400e-02, 2.627e-02, -5.562e-02, -2.795e-02, -7.195e-02, 2.062e-01, 4.760e-02, -1.185e-02, 3.430e-02, 2.486e-02, -1.905e-02, -7.942e-03, 6.469e-02, -2.473e-01)); + r += mul(s2_6, M4(-3.862e-02, -2.716e-01, -1.779e-02, 1.971e-02, -9.843e-02, 1.211e-01, 1.658e-02, 1.460e-02, -2.303e-03, 5.473e-02, 7.051e-04, -1.557e-02, -1.441e-01, 1.367e-01, 9.302e-02, 1.536e-02)); + r += mul(s2_7, M4(5.274e-02, -2.609e-01, 6.992e-02, 1.837e-03, 3.149e-02, 2.595e-01, -1.588e-01, -3.238e-02, -9.817e-02, -3.578e-02, -2.076e-02, -7.313e-02, 1.019e-01, 3.797e-01, 3.405e-02, -1.473e-01)); + r += mul(s2_8, M4(3.928e-03, 7.927e-02, 2.124e-02, -2.093e-02, -7.487e-02, 1.220e-01, 6.759e-02, -9.136e-03, -1.540e-02, -6.047e-02, -4.920e-02, 1.421e-02, 6.404e-02, 2.256e-01, -2.249e-02, 5.230e-03)); + r += mul(s3_0, M4(-2.836e-02, -6.272e-02, 5.909e-03, 5.849e-03, -1.289e-02, 5.955e-03, -5.245e-02, 5.188e-02, -5.392e-03, -4.093e-02, -7.894e-02, 3.270e-02, -9.103e-03, 1.188e-02, 3.357e-02, -2.240e-02)); + r += mul(s3_1, M4(4.517e-02, -3.684e-02, -5.974e-02, -4.620e-02, 5.921e-02, 2.377e-02, -3.287e-02, 1.555e-02, 3.179e-02, -5.626e-02, 2.167e-02, -1.737e-02, -4.715e-03, -6.318e-02, 9.200e-03, 8.010e-02)); + r += mul(s3_2, M4(1.023e-02, -2.900e-03, 1.975e-02, -1.037e-01, 7.047e-03, 2.345e-02, 8.585e-03, 2.902e-02, -3.185e-02, -5.850e-02, 1.260e-02, -7.110e-02, 1.932e-02, -2.763e-02, 2.815e-02, -4.028e-02)); + r += mul(s3_3, M4(2.569e-02, -3.794e-02, -1.767e-01, 4.292e-02, 6.767e-02, 1.558e-02, 3.336e-02, -2.264e-02, 9.934e-02, 7.835e-02, 7.838e-02, -2.692e-02, -3.773e-02, 2.213e-02, 3.180e-02, 3.728e-02)); + r += mul(s3_4, M4(-1.067e-02, -4.388e-02, 8.321e-02, -1.344e-01, -7.525e-04, -2.050e-02, 7.072e-02, 8.138e-02, 1.190e-01, 7.250e-02, 9.039e-02, -1.053e-01, 5.307e-02, 3.833e-02, 3.821e-02, -3.772e-02)); + r += mul(s3_5, M4(7.423e-02, 4.272e-03, -8.332e-03, 2.138e-02, 7.411e-02, 9.967e-02, 2.008e-02, 3.070e-02, -9.977e-02, -3.803e-02, -5.478e-02, 6.565e-02, 1.025e-02, 9.232e-02, 7.907e-04, 3.556e-02)); + r += mul(s3_6, M4(-1.878e-02, -1.041e-02, 6.881e-02, -7.144e-03, 4.166e-02, 2.246e-02, -3.086e-02, 2.266e-02, -6.732e-02, -1.242e-02, -2.006e-02, 2.065e-02, 9.135e-03, -2.940e-02, 2.761e-02, -2.484e-02)); + r += mul(s3_7, M4(4.517e-02, 9.055e-02, -4.538e-03, 1.247e-03, 5.470e-02, -2.161e-02, 4.771e-02, 3.237e-02, 5.565e-03, 1.050e-01, -1.003e-01, -2.731e-04, -3.393e-02, 3.448e-02, 3.846e-02, -4.142e-05)); + r += mul(s3_8, M4(1.010e-02, 5.598e-02, 4.990e-02, 2.106e-03, 6.861e-02, 2.441e-02, 1.923e-02, 2.508e-02, -1.440e-01, 2.018e-02, 7.907e-03, -5.819e-02, -2.897e-02, 2.882e-02, -2.872e-02, -1.642e-02)); + r += mul(s4_0, M4(-1.741e-02, 1.603e-01, 1.289e-01, 5.191e-02, 2.557e-03, -4.322e-02, -2.579e-02, -5.512e-02, 3.220e-03, -2.343e-02, -6.169e-02, 6.911e-02, 4.653e-02, -2.211e-02, 2.116e-01, 1.909e-01)); + r += mul(s4_1, M4(4.580e-02, 2.516e-02, -1.149e-01, 7.143e-01, 5.640e-02, 8.803e-02, 1.368e-02, -5.165e-02, -1.294e-02, -3.442e-02, 4.503e-02, -7.777e-02, -4.877e-02, -1.414e-02, 1.643e-01, 5.026e-01)); + r += mul(s4_2, M4(6.897e-02, 2.236e-02, 3.488e-03, -7.202e-02, -1.974e-02, -4.583e-03, -4.172e-03, -3.397e-02, -1.087e-02, -6.656e-02, -9.977e-03, 4.895e-02, 7.498e-03, 8.218e-03, -6.155e-03, 2.803e-02)); + r += mul(s4_3, M4(-4.639e-01, 2.028e-01, -1.113e-01, 1.884e-01, -4.210e-02, 5.013e-02, 1.792e-01, 3.296e-02, -4.909e-03, -2.323e-02, 5.133e-02, -1.792e-02, -1.214e-02, 2.493e-02, 1.672e-01, -1.013e-01)); + r += mul(s4_4, M4(3.603e-01, 9.485e-02, 3.685e-01, -5.827e-01, 7.797e-02, -4.314e-03, -1.100e-01, -3.284e-02, 1.190e-01, 9.011e-02, -1.382e-01, 2.920e-01, -1.540e-02, -2.027e-01, -1.193e-01, 1.663e-01)); + r += mul(s4_5, M4(1.756e-01, -5.404e-03, -1.063e-01, 2.605e-01, -8.981e-02, 6.862e-02, -2.303e-02, -2.865e-03, -4.260e-02, 1.051e-01, 1.012e-01, -8.179e-02, 4.335e-03, -6.002e-02, 1.632e-02, -8.527e-04)); + r += mul(s4_6, M4(1.823e-01, -7.957e-02, -6.844e-02, -1.092e-01, 2.816e-02, -7.993e-03, -1.047e-01, -2.782e-04, 8.214e-02, -3.725e-02, 6.411e-02, 4.630e-02, 3.495e-02, -1.434e-02, -1.415e-02, 1.406e-02)); + r += mul(s4_7, M4(4.341e-01, 1.546e-02, 1.486e-01, 6.923e-01, 2.763e-02, 8.329e-02, 7.277e-02, 3.213e-02, -4.289e-02, -1.067e-02, 5.603e-02, 4.448e-02, -1.503e-02, -7.192e-02, -2.421e-02, 2.587e-02)); + r += mul(s4_8, M4(-4.639e-01, -1.631e-01, -5.696e-03, -3.638e-01, -6.436e-02, 9.907e-02, 2.130e-02, -3.039e-02, 8.544e-02, -1.465e-02, 8.856e-03, 3.908e-02, -1.725e-03, 1.072e-02, 5.467e-03, 1.618e-02)); + r += mul(s5_0, M4(3.606e-03, 1.553e-02, 2.554e-03, 2.442e-02, 2.776e-02, -4.318e-03, -4.577e-02, 1.408e-03, 1.245e-02, -3.113e-02, 2.087e-02, 1.057e-02, 8.364e-03, -1.414e-02, -1.301e-01, 6.560e-02)); + r += mul(s5_1, M4(-2.726e-02, -2.469e-02, 9.038e-02, -2.211e-02, 3.204e-02, 6.069e-02, 1.048e-01, -7.627e-02, -3.532e-02, 1.767e-02, -5.957e-03, -3.333e-02, 7.942e-03, -1.149e-02, -4.773e-02, -1.110e-01)); + r += mul(s5_2, M4(5.837e-03, 4.688e-02, -1.546e-02, 5.968e-03, 9.293e-03, 1.956e-02, -4.029e-02, -2.396e-02, 1.236e-02, -5.458e-02, -1.188e-03, 3.742e-02, -2.838e-02, 5.513e-02, -2.169e-02, 7.259e-02)); + r += mul(s5_3, M4(6.263e-02, 2.280e-02, 4.589e-02, 3.006e-02, -9.011e-04, 3.325e-02, 2.086e-01, 7.638e-02, -1.186e-01, 3.649e-02, -1.092e-01, 3.803e-03, 8.423e-02, 7.991e-03, -1.693e-02, -1.075e-01)); + r += mul(s5_4, M4(2.400e-02, -1.483e-01, -8.745e-02, -3.013e-02, 5.574e-02, 7.154e-02, -2.962e-01, -1.161e-01, 6.972e-03, 5.588e-02, 1.938e-01, 1.804e-01, 6.647e-02, 3.031e-01, -4.253e-02, 9.602e-03)); + r += mul(s5_5, M4(1.932e-02, -4.527e-02, 1.921e-02, -7.693e-03, -6.628e-02, 7.647e-02, 4.823e-02, 5.237e-02, -2.770e-02, 5.194e-02, -2.895e-02, 1.791e-02, 5.963e-03, 8.029e-02, 2.304e-02, 1.151e-02)); + r += mul(s5_6, M4(-4.450e-02, 5.960e-02, -2.029e-02, -1.440e-02, 2.494e-02, 7.894e-02, -2.103e-01, 3.868e-02, -1.304e-02, -4.871e-02, 1.784e-01, -3.489e-02, -1.003e-02, -5.506e-02, 3.736e-02, 2.121e-02)); + r += mul(s5_7, M4(1.365e-02, -2.573e-03, 3.244e-03, 3.968e-02, -1.743e-01, -1.541e-01, 2.862e-01, 1.174e-01, -6.948e-02, 2.940e-01, -1.918e-01, -3.279e-02, 4.482e-02, -5.136e-02, -4.504e-02, 1.486e-02)); + r += mul(s5_8, M4(3.427e-02, 4.197e-04, 1.681e-03, 4.410e-02, -3.634e-02, -2.480e-02, -1.041e-01, -5.174e-02, -5.241e-02, -1.550e-01, 4.137e-02, -2.072e-02, -1.327e-02, 4.156e-02, -1.612e-02, -2.276e-02)); + r += mul(s6_0, M4(-4.266e-03, 3.320e-02, 1.134e-02, 1.656e-03, 3.672e-03, 4.921e-02, -5.757e-02, -4.775e-02, 1.112e-02, -9.419e-03, -2.463e-02, -4.559e-02, 6.722e-03, 1.077e-02, 6.055e-02, -4.903e-02)); + r += mul(s6_1, M4(-7.871e-03, -3.928e-02, 4.258e-02, -6.994e-02, 3.260e-02, -7.975e-03, 1.626e-01, 1.292e-02, -1.299e-02, -1.378e-02, 9.619e-02, 1.042e-01, 2.211e-02, -3.767e-02, -4.459e-02, 4.980e-02)); + r += mul(s6_2, M4(1.567e-02, -3.108e-02, 4.549e-02, -2.211e-02, -9.127e-03, 8.663e-03, -3.361e-02, -6.962e-02, -1.247e-02, -6.681e-02, -7.330e-02, 1.146e-01, 2.686e-02, 1.850e-02, -8.339e-02, -3.501e-02)); + r += mul(s6_3, M4(1.518e-02, 7.592e-04, -3.129e-02, 2.893e-02, 1.521e-01, 3.939e-02, 1.852e-01, -1.160e-01, -1.022e-01, 7.489e-02, 9.395e-02, -5.603e-02, -9.725e-02, 7.591e-02, -1.499e-01, 3.280e-02)); + r += mul(s6_4, M4(-2.125e-01, -5.828e-02, -2.803e-02, 9.889e-02, -3.857e-01, 1.435e-01, 4.770e-01, 5.873e-01, -3.387e-01, 5.346e-02, -1.896e-01, -1.812e-01, 7.526e-02, -4.860e-02, 1.626e-01, 1.091e-01)); + r += mul(s6_5, M4(4.983e-02, 5.614e-02, -8.512e-02, -6.024e-02, 1.184e-01, 1.253e-01, -5.679e-02, -1.284e-01, 1.984e-02, -6.673e-02, 4.165e-02, 1.339e-02, 2.025e-02, -5.299e-02, 1.479e-01, 1.989e-01)); + r += mul(s6_6, M4(1.706e-02, 7.105e-02, 1.356e-02, -3.985e-03, 1.652e-02, 1.051e-01, -4.720e-02, 3.490e-02, 9.166e-03, -1.493e-02, -5.726e-02, 1.760e-03, -5.532e-03, 5.947e-03, 6.036e-02, -3.430e-02)); + r += mul(s6_7, M4(-1.490e-02, -1.024e-01, 6.941e-02, -5.101e-02, -1.117e-01, -6.986e-01, -1.208e-01, -5.732e-02, -9.263e-02, 2.819e-02, -2.642e-02, -4.355e-03, -4.265e-02, 8.176e-02, -1.318e-02, -3.722e-02)); + r += mul(s6_8, M4(-1.656e-02, -4.463e-02, -1.614e-02, 2.373e-02, -8.426e-02, -2.565e-01, -1.981e-02, -3.502e-02, 5.466e-02, -1.358e-01, 2.843e-02, -5.444e-03, 3.367e-03, 2.317e-02, 1.843e-02, -1.724e-02)); + r += mul(s7_0, M4(-2.948e-02, 4.966e-02, -6.441e-02, 1.678e-01, -2.802e-02, 2.056e-02, 5.476e-02, -5.943e-02, 1.840e-02, -6.474e-02, 9.075e-02, -5.608e-02, 2.460e-02, 1.304e-02, -3.671e-02, 3.320e-02)); + r += mul(s7_1, M4(-1.210e-01, -5.928e-03, -9.505e-02, -1.144e-01, 4.999e-02, 4.006e-02, 7.867e-02, -9.689e-02, 2.531e-02, -2.388e-02, -1.326e-01, -4.300e-03, -4.730e-03, -1.709e-02, 9.504e-02, -1.560e-01)); + r += mul(s7_2, M4(2.368e-02, 1.598e-02, 1.939e-02, 2.229e-02, -8.901e-03, 3.264e-02, -5.201e-02, 2.975e-02, -1.862e-02, -1.274e-02, -5.630e-02, 9.691e-02, -2.017e-02, 1.436e-02, -5.703e-02, -1.329e-01)); + r += mul(s7_3, M4(6.468e-02, -5.168e-02, -4.589e-02, -1.296e-01, -3.748e-02, 6.205e-02, 1.169e-01, 1.290e-02, -3.376e-02, 1.150e-01, -5.875e-02, 3.723e-02, 5.062e-02, -2.485e-02, 1.010e-01, -3.626e-02)); + r += mul(s7_4, M4(-3.859e-01, -1.543e-01, 3.731e-02, 2.154e-01, -1.875e-01, -2.323e-02, -6.666e-02, 2.167e-01, 2.360e-01, -5.187e-02, -2.250e-01, -1.269e-01, -1.053e-01, 4.447e-02, 8.836e-02, 3.553e-01)); + r += mul(s7_5, M4(1.962e-03, -2.099e-02, -3.995e-02, -6.632e-02, 5.703e-02, -2.154e-02, 4.528e-02, -9.169e-02, -3.296e-03, -3.898e-02, 9.495e-02, -7.648e-02, -6.956e-02, -3.532e-02, 1.037e-01, 1.246e-01)); + r += mul(s7_6, M4(-9.365e-02, 1.239e-01, 3.454e-02, 3.758e-02, 6.988e-02, -4.852e-02, -1.090e-01, -3.746e-02, 2.325e-02, -6.496e-02, -5.741e-02, -3.494e-02, 5.050e-02, 1.425e-02, 1.124e-02, 3.424e-02)); + r += mul(s7_7, M4(-1.265e-01, 6.155e-02, -1.387e-02, -7.290e-02, -5.033e-02, -4.871e-02, 3.574e-02, -1.409e-02, -8.228e-02, 4.601e-01, 6.758e-04, -3.336e-02, -1.582e-01, -1.377e-01, 5.354e-02, -2.447e-02)); + r += mul(s7_8, M4(-2.962e-03, 1.799e-01, -1.983e-02, 4.959e-02, -1.261e-02, -1.327e-01, -1.250e-02, -1.425e-02, 8.955e-02, 1.298e-01, -1.716e-02, 1.260e-02, -6.916e-03, 3.633e-02, 1.023e-02, -6.955e-02)); + r += V4(-9.683e-04, -2.415e-03, 4.691e-03, 2.899e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-5.844e-03, 1.554e-01, -9.987e-02, 1.321e-01, -7.304e-02, 9.423e-02, 3.915e-02, -4.898e-02, -5.754e-02, 1.577e-01, -3.256e-02, 1.626e-01, -3.055e-02, -9.402e-03, -1.852e-02, -1.834e-02)); + r += mul(s0_1, M4(-6.417e-02, 7.474e-02, 4.864e-02, -4.934e-02, -7.196e-02, 1.810e-01, -1.471e-01, -1.831e-02, 6.508e-02, 1.228e-02, -1.634e-02, -9.918e-02, 2.784e-03, 1.068e-02, 9.356e-02, -1.270e-01)); + r += mul(s0_2, M4(-7.413e-03, 3.390e-02, -1.188e-01, -1.457e-02, -9.838e-03, 1.611e-02, -4.288e-02, -4.426e-02, -6.105e-02, 5.614e-02, 5.299e-02, 7.541e-02, -7.666e-02, -1.571e-02, -6.267e-02, -8.539e-03)); + r += mul(s0_3, M4(-7.046e-02, -9.099e-02, -7.332e-02, -4.078e-02, -1.651e-01, 2.003e-01, 3.956e-03, 8.217e-03, -1.464e-01, -4.877e-02, -1.386e-01, 1.315e-01, 4.155e-02, 2.929e-02, -3.655e-02, -3.612e-02)); + r += mul(s0_4, M4(-3.723e-01, -1.761e-01, -2.862e-01, 8.008e-04, -5.477e-02, 1.588e-01, -1.080e-01, 3.813e-01, -7.015e-02, -2.961e-02, -2.635e-02, 2.571e-01, 3.292e-02, -7.038e-02, 5.401e-03, -9.336e-03)); + r += mul(s0_5, M4(-5.563e-02, -2.680e-02, -1.954e-02, 1.323e-01, -2.530e-01, 1.074e-01, -4.652e-01, 7.981e-02, -2.483e-02, -2.423e-02, -3.723e-02, -8.706e-02, -5.204e-02, -5.312e-02, 2.606e-02, -3.335e-03)); + r += mul(s0_6, M4(-9.645e-02, 6.358e-02, 4.216e-02, 1.131e-01, 2.108e-02, 2.098e-02, 6.549e-03, -5.148e-02, -9.235e-02, 6.919e-02, 1.248e-02, -4.267e-03, 1.063e-01, -1.325e-02, -2.634e-03, -2.397e-02)); + r += mul(s0_7, M4(3.359e-02, 1.352e-01, -2.312e-01, -9.207e-02, -7.667e-02, -4.678e-02, -1.027e-01, -7.593e-02, -4.580e-02, 6.862e-03, 5.972e-04, 2.609e-02, -1.291e-01, 4.462e-02, -5.702e-02, 5.811e-02)); + r += mul(s0_8, M4(1.752e-02, -1.779e-02, 6.688e-03, -1.062e-01, -9.378e-02, 2.514e-02, -2.137e-01, 9.271e-02, -5.857e-02, 3.197e-03, -7.593e-02, 6.963e-02, -5.174e-02, 5.168e-02, 6.475e-02, 5.364e-03)); + r += mul(s1_0, M4(-2.804e-02, 3.615e-03, -1.209e-02, -2.437e-02, 1.378e-02, -4.057e-02, 3.856e-02, -1.013e-02, 5.417e-02, 1.450e-01, 6.627e-02, -3.019e-02, -5.221e-02, -3.153e-02, 6.608e-02, 1.319e-03)); + r += mul(s1_1, M4(6.063e-02, 2.107e-02, 4.786e-02, 7.006e-02, -8.505e-03, 3.895e-02, -6.218e-02, -9.748e-02, -9.074e-02, -4.568e-03, -1.059e-01, -4.216e-02, 1.567e-01, 2.320e-02, 3.330e-01, -1.519e-01)); + r += mul(s1_2, M4(1.058e-03, -2.897e-02, -2.634e-02, -3.948e-02, -3.213e-02, -7.246e-03, 1.435e-02, 1.586e-02, 4.976e-02, 4.865e-02, -1.550e-02, -8.290e-03, -8.188e-02, -8.424e-02, 1.040e-02, 6.041e-02)); + r += mul(s1_3, M4(-4.196e-02, -1.958e-01, -2.251e-02, -1.413e-01, 6.275e-02, 4.110e-02, 9.982e-02, 5.650e-02, 9.648e-02, -9.338e-02, 3.261e-02, -5.427e-03, 2.020e-02, -4.565e-02, 1.015e-02, -1.362e-01)); + r += mul(s1_4, M4(-9.324e-02, -1.733e-01, -5.316e-02, -2.774e-01, -5.793e-02, 1.081e-02, -2.404e-01, 1.028e-01, 2.578e-02, 2.004e-02, 8.182e-02, 1.598e-02, 7.152e-01, -1.617e-01, -1.103e-02, -8.309e-02)); + r += mul(s1_5, M4(4.133e-03, -2.312e-02, 2.066e-02, 7.804e-02, 3.571e-02, -3.780e-03, 9.405e-02, -7.585e-05, 4.132e-02, -9.038e-02, 1.091e-01, 7.613e-02, -1.828e-01, 2.568e-02, -1.351e-01, -9.911e-02)); + r += mul(s1_6, M4(9.771e-02, 1.240e-02, 2.540e-02, -1.478e-02, 1.216e-01, 4.834e-02, 1.506e-02, -4.087e-02, -1.564e-01, 2.364e-02, -1.977e-03, 1.286e-01, 8.045e-03, 7.522e-03, -1.324e-02, -4.959e-02)); + r += mul(s1_7, M4(-2.213e-01, -1.216e-02, -7.938e-02, -1.294e-01, 3.202e-02, 3.705e-02, -4.118e-02, -4.715e-02, 8.480e-02, 3.195e-02, -1.020e-01, -8.519e-02, 2.847e-01, 6.033e-02, 5.722e-02, -2.313e-02)); + r += mul(s1_8, M4(8.365e-02, -1.676e-02, 3.341e-02, -6.065e-02, -9.992e-03, 3.877e-02, 2.455e-02, 2.107e-02, 4.355e-03, -4.409e-02, -4.814e-02, -8.902e-02, -1.522e-02, 2.618e-02, -3.471e-03, 1.016e-02)); + r += mul(s2_0, M4(2.073e-03, 1.905e-01, 5.916e-02, 8.437e-02, -4.019e-02, 1.317e-02, -6.072e-02, 1.908e-02, 1.679e-01, 6.164e-02, 7.262e-02, -9.583e-03, 6.088e-02, -7.323e-03, 1.460e-01, -1.744e-02)); + r += mul(s2_1, M4(2.516e-02, 2.239e-01, -2.767e-01, 1.073e-02, 1.391e-01, -7.229e-02, 3.888e-02, -2.419e-02, -3.716e-02, 8.999e-03, 6.149e-02, 2.671e-02, -1.305e-01, -2.019e-01, 1.115e-01, -1.616e-01)); + r += mul(s2_2, M4(-1.651e-02, 7.084e-02, -1.182e-03, 2.399e-01, -1.086e-01, -6.664e-03, -7.713e-02, -9.798e-02, 7.428e-02, 2.596e-02, -9.965e-03, -2.995e-02, 5.164e-02, -1.678e-03, -1.451e-01, -8.076e-02)); + r += mul(s2_3, M4(1.190e-01, 3.007e-02, 4.784e-03, -7.826e-02, -2.599e-01, -1.379e-01, -2.805e-02, -1.294e-01, 1.990e-01, -1.185e-01, 5.226e-02, 5.370e-02, 1.508e-01, -9.447e-02, 1.116e-01, 9.319e-02)); + r += mul(s2_4, M4(1.826e-01, 7.537e-02, -3.623e-02, -3.881e-02, 2.956e-01, -2.971e-01, 1.199e-01, 1.082e-01, 2.164e-02, 5.639e-02, 8.989e-02, -3.416e-03, 7.939e-03, -3.193e-01, -1.613e-01, 1.073e-01)); + r += mul(s2_5, M4(6.423e-02, 8.481e-02, -1.448e-02, -6.871e-02, -1.065e-01, 1.465e-02, -6.037e-02, 4.801e-03, 6.445e-02, 3.601e-02, -2.203e-03, 8.718e-02, 1.127e-01, -4.716e-02, 9.620e-02, 7.383e-02)); + r += mul(s2_6, M4(-3.053e-02, -3.952e-02, -6.666e-02, 7.401e-03, -1.246e-01, 5.702e-02, 2.252e-02, -1.665e-01, 5.571e-02, -6.662e-02, 2.469e-02, -1.780e-01, 9.934e-02, -1.687e-02, -1.031e-02, -2.773e-01)); + r += mul(s2_7, M4(1.636e-01, 1.405e-02, -5.639e-02, -7.883e-02, 6.668e-02, 1.294e-01, 4.010e-03, -7.690e-02, -8.254e-02, -3.036e-02, 2.605e-02, -6.489e-02, -1.315e-03, 5.165e-02, 7.594e-02, -1.500e-01)); + r += mul(s2_8, M4(-2.363e-02, 5.973e-03, 9.222e-02, 5.499e-02, -6.881e-03, 3.373e-02, -3.033e-02, 8.101e-02, 3.046e-02, -5.542e-02, 1.137e-03, -4.601e-02, 9.911e-02, 3.916e-02, 3.760e-02, -9.976e-02)); + r += mul(s3_0, M4(-7.036e-03, 2.348e-02, -2.668e-02, -1.673e-02, -1.081e-02, 5.490e-02, 3.197e-02, -1.238e-01, 7.411e-02, -1.263e-03, 5.113e-02, -3.649e-02, -3.940e-02, 1.214e-03, -4.966e-02, 3.034e-03)); + r += mul(s3_1, M4(9.343e-02, 7.414e-02, 1.265e-01, -1.021e-01, 3.649e-02, 1.131e-01, 2.501e-02, -3.036e-02, -3.411e-02, -2.742e-02, -2.141e-02, 5.247e-02, 1.422e-03, 2.364e-03, 2.285e-02, -4.067e-02)); + r += mul(s3_2, M4(-2.294e-02, -7.549e-03, 4.819e-02, 2.262e-02, -5.783e-02, 5.869e-02, -1.805e-02, 3.606e-02, -1.720e-02, -4.628e-02, -6.133e-03, 1.211e-02, 1.473e-02, 4.633e-04, 6.455e-02, 1.707e-02)); + r += mul(s3_3, M4(4.179e-02, 4.599e-02, 8.031e-03, -3.261e-02, -7.724e-02, -2.398e-02, 8.735e-04, 4.392e-02, 1.509e-01, -1.521e-03, -1.172e-02, 7.285e-02, -1.587e-02, -1.100e-02, 6.007e-03, -6.076e-02)); + r += mul(s3_4, M4(1.015e-01, -9.532e-02, 2.807e-02, 9.613e-02, 1.001e-01, -8.175e-02, -1.064e-01, -2.877e-02, 3.196e-01, -2.984e-02, 2.316e-02, 1.708e-02, 1.076e-01, 1.969e-02, 5.984e-02, -1.004e-01)); + r += mul(s3_5, M4(5.147e-02, -7.400e-03, 4.185e-02, -1.841e-02, 1.928e-02, -6.324e-02, -7.078e-03, -1.564e-02, -1.146e-01, 8.080e-02, -7.694e-02, 1.290e-02, -8.033e-02, 5.211e-02, -2.262e-03, -2.603e-02)); + r += mul(s3_6, M4(1.547e-02, 2.161e-02, -1.655e-02, 3.602e-02, -1.989e-02, 8.062e-03, -7.888e-03, 2.668e-02, -2.696e-02, -1.124e-02, 8.936e-03, -7.228e-02, -6.847e-02, -2.122e-02, -1.353e-02, 2.351e-02)); + r += mul(s3_7, M4(1.200e-01, 4.431e-03, 3.703e-02, -6.798e-02, -4.969e-02, 1.252e-02, -5.912e-02, -6.205e-02, -5.068e-02, 7.159e-03, -4.983e-02, -2.351e-02, 3.609e-02, -2.077e-02, 4.927e-02, 6.275e-02)); + r += mul(s3_8, M4(-1.623e-02, 2.697e-04, 1.456e-02, 6.648e-02, -3.145e-02, 1.694e-02, 3.269e-02, 5.426e-02, 2.234e-02, -3.981e-03, -4.205e-02, 1.487e-02, -1.922e-02, -2.691e-02, 3.359e-02, -4.451e-02)); + r += mul(s4_0, M4(-3.743e-01, -2.256e-02, -2.008e-01, 2.801e-01, -1.807e-02, -3.615e-02, 3.172e-02, -1.966e-02, -3.453e-02, 1.082e-01, 4.911e-02, -4.706e-02, -4.231e-02, 8.145e-01, 2.666e-03, 4.914e-01)); + r += mul(s4_1, M4(1.266e-01, 4.054e-01, 2.968e-01, 4.639e-01, 1.063e-01, -4.573e-02, 4.886e-02, -1.962e-02, -3.122e-02, -1.772e-02, -1.607e-01, -5.203e-02, -2.388e-01, 3.515e-01, -2.278e-01, 3.669e-01)); + r += mul(s4_2, M4(5.112e-02, -2.854e-02, 2.998e-01, -9.501e-02, 6.029e-03, -3.608e-02, 2.662e-02, -1.232e-01, 2.802e-03, 3.074e-02, 3.239e-02, 9.839e-02, -4.825e-02, 5.658e-02, -8.847e-02, 1.115e-01)); + r += mul(s4_3, M4(-2.824e-01, 1.812e-01, -1.479e-02, -4.548e-02, -2.106e-02, -1.398e-02, 1.541e-02, -5.793e-02, 2.479e-02, -1.207e-01, 6.618e-02, -1.195e-01, -1.081e-01, 3.893e-03, -3.853e-02, 3.672e-01)); + r += mul(s4_4, M4(-1.810e-01, -7.004e-02, 2.545e-01, 3.877e-01, 2.124e-01, 2.176e-02, -5.939e-02, -1.713e-01, -9.402e-02, -2.104e-01, -1.047e-01, -2.873e-01, 1.080e-01, -9.161e-03, -1.703e-01, -1.753e-01)); + r += mul(s4_5, M4(-3.937e-01, 1.740e-01, 2.346e-02, -2.843e-01, -9.724e-02, 1.133e-02, 4.398e-02, 7.771e-02, 4.686e-02, 3.870e-02, 4.896e-02, -1.328e-01, 7.552e-03, 4.673e-02, -2.957e-02, 6.534e-02)); + r += mul(s4_6, M4(-6.416e-02, -1.017e-01, 1.154e-01, 1.133e-01, -3.764e-02, -3.613e-03, -7.828e-04, 4.870e-02, -1.794e-02, 9.768e-02, -4.049e-02, 1.990e-01, 9.667e-02, 4.579e-02, 2.897e-02, -5.383e-02)); + r += mul(s4_7, M4(2.810e-01, 2.579e-01, -2.749e-02, -4.264e-01, -7.708e-02, 8.357e-03, 2.550e-02, 4.187e-02, -8.995e-02, -8.624e-02, 2.607e-02, -8.133e-03, -1.030e-01, 2.847e-02, -9.830e-02, -3.134e-02)); + r += mul(s4_8, M4(-3.575e-01, -2.686e-02, -1.088e-01, -8.145e-02, -4.100e-03, -1.068e-02, 4.221e-03, 1.113e-02, -9.216e-04, 3.555e-02, 1.371e-02, -5.217e-02, -9.224e-04, 1.972e-02, -3.992e-02, -1.156e-02)); + r += mul(s5_0, M4(5.843e-02, 5.810e-03, 1.973e-02, -4.943e-02, 2.861e-02, 3.491e-02, 2.447e-02, 3.334e-03, 4.134e-02, 6.052e-02, -1.442e-02, 4.824e-03, -6.182e-02, 4.679e-02, -1.181e-02, 5.764e-02)); + r += mul(s5_1, M4(-1.254e-01, -3.464e-02, -1.534e-02, 1.099e-01, -7.255e-02, 3.052e-02, -1.206e-01, 1.915e-02, 5.714e-02, -1.211e-02, -4.535e-03, -1.104e-01, -1.319e-01, -4.419e-02, 6.282e-02, 1.092e-01)); + r += mul(s5_2, M4(4.543e-02, -3.484e-02, 5.793e-02, -4.426e-02, 4.397e-02, -4.703e-02, 1.392e-02, -8.030e-02, -1.836e-02, 3.873e-02, 9.299e-02, 2.857e-02, 2.991e-02, -1.835e-02, -5.184e-02, -2.837e-02)); + r += mul(s5_3, M4(4.986e-02, -2.258e-04, -8.686e-03, 3.482e-02, 8.419e-03, 5.851e-02, -1.684e-02, -5.232e-02, -7.397e-02, -4.820e-02, -3.955e-02, 4.374e-02, 2.606e-02, -1.474e-02, 5.834e-02, 4.784e-02)); + r += mul(s5_4, M4(-3.278e-04, -4.129e-03, 2.977e-02, -4.259e-02, -7.836e-02, -1.768e-01, -6.157e-02, -4.212e-01, 1.305e-01, 5.900e-03, 3.175e-02, 1.393e-01, 1.560e-01, 1.311e-02, 1.226e-01, -1.183e-01)); + r += mul(s5_5, M4(-5.974e-02, 6.514e-02, -2.588e-02, 5.216e-02, 5.597e-02, 2.416e-02, 7.330e-02, -2.582e-03, -7.243e-03, 6.378e-03, 3.404e-02, -7.524e-02, 4.381e-02, 1.811e-02, 2.229e-02, 3.689e-02)); + r += mul(s5_6, M4(2.569e-04, 5.830e-03, 4.810e-03, -3.864e-02, 1.194e-02, 6.163e-02, 2.504e-02, 1.396e-01, -9.142e-03, 2.301e-02, -2.321e-02, -5.109e-02, -4.148e-02, 3.869e-02, 8.283e-04, -3.578e-02)); + r += mul(s5_7, M4(2.535e-03, 6.239e-02, 4.885e-03, -4.034e-02, -2.369e-01, 3.504e-02, -2.052e-01, 2.957e-01, -6.562e-02, 2.255e-02, -7.893e-03, -1.276e-01, -4.230e-02, 2.348e-02, 5.599e-03, -6.273e-02)); + r += mul(s5_8, M4(-9.187e-03, 5.527e-02, 9.465e-03, 1.230e-02, -9.693e-02, 2.337e-02, -5.018e-02, 6.008e-03, 1.378e-02, -4.946e-03, 5.239e-02, 2.652e-02, 3.399e-02, -8.836e-04, 1.851e-02, -2.626e-02)); + r += mul(s6_0, M4(3.379e-02, -2.399e-02, -4.265e-02, -8.043e-02, 2.099e-02, -9.810e-02, 4.898e-02, -5.211e-02, -2.842e-03, -3.843e-02, 3.951e-02, 6.667e-03, 2.649e-02, 1.701e-02, -3.060e-02, 4.041e-02)); + r += mul(s6_1, M4(-9.006e-03, 1.676e-01, -1.003e-02, 5.151e-02, 3.163e-02, 4.564e-02, -6.965e-02, 2.953e-02, -1.655e-01, -1.426e-01, -1.307e-01, 2.165e-01, 3.455e-02, -3.141e-02, 7.936e-02, -2.925e-02)); + r += mul(s6_2, M4(-3.907e-02, 1.847e-03, 6.902e-02, 6.066e-02, 2.182e-03, -8.721e-02, 7.738e-02, -6.377e-02, -6.177e-02, 1.658e-02, -1.375e-01, 8.056e-02, 4.474e-02, -4.473e-03, -3.670e-02, -1.195e-01)); + r += mul(s6_3, M4(2.058e-02, 2.677e-02, -4.589e-04, -2.547e-02, 1.198e-01, -6.429e-02, 1.756e-02, 3.844e-01, -9.013e-02, -5.836e-02, -4.381e-03, -7.382e-02, -5.788e-02, -1.925e-02, -4.363e-02, -9.881e-02)); + r += mul(s6_4, M4(-1.774e-01, 1.101e-01, -4.123e-02, -2.167e-03, -4.139e-01, -1.634e-01, -4.823e-01, 4.205e-01, -1.990e-01, -5.623e-03, -1.491e-03, 2.477e-03, 8.383e-02, 6.132e-02, 8.338e-02, 8.971e-02)); + r += mul(s6_5, M4(-1.043e-01, 2.256e-02, 1.143e-01, -6.616e-02, -1.464e-01, -7.427e-02, 3.555e-02, -1.852e-01, 4.454e-02, 1.994e-02, -5.957e-02, 5.819e-02, 5.707e-02, 4.078e-02, -2.942e-02, 1.648e-01)); + r += mul(s6_6, M4(1.917e-02, 2.395e-02, 1.240e-03, 7.490e-02, 2.125e-02, -5.508e-02, 4.957e-02, -1.780e-01, -4.028e-02, -2.978e-03, 1.606e-02, 8.035e-03, -5.799e-02, -2.113e-03, -3.876e-02, -1.504e-02)); + r += mul(s6_7, M4(2.720e-02, -2.151e-02, 4.837e-02, 8.041e-02, -3.779e-01, 1.368e-01, 4.470e-02, 1.481e-03, -4.211e-02, 8.353e-03, -8.142e-02, -3.991e-02, -1.079e-01, -7.464e-02, 3.982e-03, -1.240e-01)); + r += mul(s6_8, M4(9.181e-03, -1.923e-04, 9.953e-03, -6.180e-02, 1.241e-02, 5.246e-03, 4.752e-02, -2.098e-02, 1.469e-02, 5.371e-02, 3.001e-02, 9.175e-02, 3.327e-02, 1.569e-03, 6.034e-02, 1.628e-02)); + r += mul(s7_0, M4(4.657e-02, 1.687e-01, 3.505e-02, -6.694e-02, -1.819e-02, -4.057e-02, -1.262e-02, -5.044e-02, 8.183e-02, -2.083e-01, -7.118e-02, -1.669e-02, 3.043e-02, 2.407e-02, 3.859e-02, 1.313e-02)); + r += mul(s7_1, M4(-1.862e-02, -9.117e-02, -2.167e-01, -8.257e-02, 7.039e-02, 1.596e-01, -3.436e-02, -3.428e-02, -4.240e-02, -7.341e-02, 2.769e-01, 5.086e-02, -1.324e-02, -4.328e-02, -1.447e-01, 4.743e-04)); + r += mul(s7_2, M4(1.579e-02, -1.775e-02, 9.142e-02, -9.606e-03, -1.254e-02, -8.763e-03, 6.363e-02, -6.381e-02, -3.024e-02, 6.262e-02, -1.323e-01, 3.912e-02, 6.531e-02, -7.883e-02, -1.101e-01, -1.480e-01)); + r += mul(s7_3, M4(4.739e-02, -1.739e-02, 6.063e-02, 1.364e-01, -9.051e-02, -1.215e-01, -7.398e-02, 2.284e-02, 9.971e-02, -1.589e-02, -8.920e-02, -1.952e-01, 7.063e-03, -8.791e-02, 4.235e-02, -3.058e-02)); + r += mul(s7_4, M4(-2.864e-01, 2.797e-01, -2.977e-01, 2.923e-01, 1.144e-01, -3.507e-01, -7.560e-02, 3.456e-02, 4.366e-01, 9.177e-02, 9.463e-02, -2.082e-01, -2.926e-01, 1.083e-01, -2.552e-01, 6.352e-02)); + r += mul(s7_5, M4(-2.574e-03, 4.174e-02, 2.754e-02, 5.446e-02, -6.982e-02, -3.854e-02, 3.453e-02, -6.915e-02, 1.067e-01, -2.521e-02, 9.016e-03, 7.085e-02, -3.043e-02, -1.628e-01, -3.251e-01, -1.362e-01)); + r += mul(s7_6, M4(9.595e-02, 4.518e-02, 3.763e-02, -1.439e-01, -1.223e-01, -4.048e-02, 1.560e-02, 8.571e-02, -7.292e-02, -6.801e-02, -2.210e-02, -3.341e-02, -6.534e-03, -4.839e-03, 2.293e-02, 5.185e-02)); + r += mul(s7_7, M4(-1.496e-01, 1.637e-01, -1.085e-02, 2.865e-02, -1.082e-01, -4.290e-02, -8.978e-03, 8.324e-02, 4.995e-02, -2.644e-02, 2.657e-03, -1.422e-01, -2.690e-01, -1.773e-01, -1.007e-02, -2.957e-02)); + r += mul(s7_8, M4(1.455e-02, 5.432e-02, -4.916e-02, -7.562e-02, -2.951e-02, 5.760e-02, 2.528e-02, 4.951e-02, 4.740e-02, -1.737e-03, 6.007e-02, -7.422e-04, -7.052e-02, -3.010e-02, 3.249e-02, -8.888e-02)); + r += V4(-2.811e-03, 2.195e-03, -5.315e-03, 3.496e-03); + return r; +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 6 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0, t1, t2, t3 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.888e-02, -3.403e-03, 5.631e-03, -8.622e-03, -1.458e-05, -2.226e-02, 2.582e-02, -5.644e-03, -3.952e-02, 9.939e-03, -2.528e-02, -5.767e-03, 1.976e-02, 2.383e-04, -1.765e-02, -2.412e-02)); + r += mul(s0_1, M4(-4.944e-02, 8.011e-02, -6.296e-02, 1.099e-02, 1.841e-02, 4.527e-02, -9.701e-03, 9.380e-03, -1.764e-02, -8.276e-02, 5.824e-03, -2.972e-02, -2.729e-02, 5.527e-02, -9.236e-03, -8.955e-03)); + r += mul(s0_2, M4(1.764e-02, -6.812e-02, 3.431e-03, -2.123e-02, -1.605e-02, 1.850e-03, -3.013e-03, 8.246e-03, -6.503e-03, 2.533e-02, -8.876e-03, -9.591e-04, 2.255e-03, -1.201e-02, 1.540e-03, -2.920e-03)); + r += mul(s0_3, M4(2.650e-02, -5.410e-03, 3.586e-02, -3.986e-04, -4.626e-02, -1.262e-02, -6.323e-02, -1.873e-02, 2.632e-02, -6.002e-03, 1.960e-02, 1.260e-02, -3.853e-02, -6.770e-03, 5.896e-02, -1.443e-03)); + r += mul(s0_4, M4(1.233e-01, 3.216e-02, 1.401e-01, 1.211e-01, 2.484e-02, 1.150e-02, 1.584e-02, -1.874e-02, 3.018e-02, 8.186e-02, -8.657e-03, 2.788e-03, 3.894e-02, -1.638e-02, -8.682e-03, 2.488e-02)); + r += mul(s0_5, M4(-8.753e-02, 2.810e-02, -3.129e-02, -5.065e-02, 1.391e-02, -2.717e-03, 4.407e-03, 1.924e-02, -1.319e-02, -2.091e-02, -1.679e-03, 7.680e-03, 9.123e-03, 1.159e-02, 1.399e-03, -1.071e-02)); + r += mul(s0_6, M4(-4.298e-03, 3.027e-03, 4.670e-03, 3.987e-03, 1.077e-02, 1.056e-03, -2.411e-03, -7.530e-03, 1.200e-03, 6.779e-04, 7.780e-03, 4.998e-03, 6.117e-03, -1.134e-02, -2.398e-02, 6.210e-03)); + r += mul(s0_7, M4(-1.655e-02, 8.511e-03, -5.150e-03, -1.515e-03, -3.137e-03, 5.990e-03, 3.137e-02, 3.198e-02, -2.858e-03, -2.917e-03, 1.657e-02, 2.253e-02, -6.815e-03, 1.640e-02, 1.105e-02, 2.986e-02)); + r += mul(s0_8, M4(3.787e-03, -2.778e-02, -3.509e-02, -9.779e-03, 2.890e-03, 4.340e-03, -3.797e-03, -4.003e-03, 2.937e-03, 2.451e-03, -1.992e-03, -4.934e-03, 3.575e-03, 1.287e-03, 7.888e-03, 1.431e-02)); + r += mul(s1_0, M4(8.311e-04, 3.358e-03, -4.527e-04, -8.006e-03, -2.292e-02, -9.604e-03, 2.427e-02, 1.118e-02, -4.610e-02, -5.515e-02, -8.618e-02, 1.304e-02, -5.202e-02, -8.690e-02, 2.107e-02, -3.049e-02)); + r += mul(s1_1, M4(1.753e-02, 2.576e-02, -7.201e-02, 4.915e-03, -2.588e-01, -1.619e-01, 2.213e-01, 1.254e-01, -1.919e-01, 2.244e-01, 1.117e-01, -8.585e-02, -1.778e-02, 6.616e-02, 1.959e-03, 3.946e-03)); + r += mul(s1_2, M4(3.591e-02, -8.080e-02, 4.239e-03, -2.942e-02, 6.586e-04, -4.860e-02, -4.173e-02, 6.332e-02, 7.348e-03, -1.984e-02, 1.691e-02, -4.066e-02, -3.536e-03, -5.051e-03, 9.376e-04, -1.307e-03)); + r += mul(s1_3, M4(2.056e-03, 8.929e-03, 1.033e-02, 2.423e-02, -2.230e-02, -8.957e-03, -4.406e-02, -9.152e-03, 1.344e-02, -1.218e-02, 2.665e-01, -9.596e-02, 3.645e-01, -4.531e-02, -2.095e-01, 1.210e-01)); + r += mul(s1_4, M4(3.958e-01, -7.946e-02, 3.844e-01, -8.027e-03, 7.845e-02, 4.609e-02, -2.085e-02, 2.660e-02, 9.713e-02, 1.818e-02, -1.598e-01, 1.207e-02, 1.814e-02, -9.009e-02, -4.132e-03, -8.960e-02)); + r += mul(s1_5, M4(-5.529e-02, -1.450e-01, 1.146e-02, -1.788e-01, 3.910e-02, 5.140e-02, 2.975e-02, -2.628e-02, -9.157e-03, 3.761e-03, 1.054e-02, 1.309e-02, 1.474e-02, 2.617e-02, -3.821e-03, -9.823e-03)); + r += mul(s1_6, M4(-4.906e-03, 8.074e-03, -1.045e-02, 6.266e-03, 8.151e-03, 2.609e-03, 2.665e-03, -2.719e-03, 2.041e-04, 1.069e-02, -8.650e-03, -2.215e-03, -3.139e-02, 1.281e-03, -1.727e-02, -7.010e-03)); + r += mul(s1_7, M4(-1.916e-02, 1.448e-03, 6.456e-02, -4.235e-02, -6.792e-03, 9.169e-04, 3.381e-02, 2.972e-02, -3.892e-03, -2.675e-03, 3.504e-02, 2.591e-02, -2.620e-02, -4.219e-02, -2.688e-04, 1.606e-01)); + r += mul(s1_8, M4(-7.694e-03, -1.994e-02, -3.518e-02, -5.359e-02, 2.410e-03, -2.129e-04, -5.800e-03, 6.700e-03, -1.502e-03, 1.191e-03, -7.976e-03, -5.875e-03, 8.394e-04, -5.713e-03, 1.298e-02, 2.316e-03)); + r += mul(s2_0, M4(-4.358e-02, 1.205e-02, -1.150e-02, 1.204e-02, 2.057e-02, 2.434e-03, -7.853e-03, 2.368e-03, -7.508e-03, -1.951e-03, 6.640e-03, 1.790e-03, -1.187e-02, -6.954e-03, -1.163e-02, 6.363e-03)); + r += mul(s2_1, M4(6.230e-03, -9.421e-02, 2.283e-02, 5.563e-03, -1.273e-02, 3.673e-02, -1.731e-03, 5.327e-03, 1.650e-02, -2.073e-02, 4.072e-03, -1.494e-02, 5.910e-02, -9.066e-03, 1.432e-02, -9.310e-03)); + r += mul(s2_2, M4(1.872e-03, 4.321e-03, -6.060e-03, 1.506e-03, -1.147e-03, -3.120e-02, 5.099e-03, -8.089e-03, -1.281e-03, 3.518e-02, 2.337e-03, 1.787e-02, -2.022e-03, 2.472e-02, -2.785e-03, 1.791e-03)); + r += mul(s2_3, M4(8.936e-03, 2.829e-02, -3.183e-02, 2.790e-02, 3.410e-02, 2.374e-02, 4.922e-02, 1.864e-02, -6.471e-02, 6.844e-04, -6.425e-02, -1.419e-03, 2.599e-02, 1.764e-02, -6.465e-03, -1.644e-02)); + r += mul(s2_4, M4(7.059e-02, 8.303e-03, -6.271e-03, -9.405e-02, -1.386e-01, -9.546e-02, -6.011e-02, -9.171e-03, -2.279e-03, -5.129e-02, 2.251e-02, -5.562e-02, -5.187e-02, -2.396e-02, 4.020e-02, 3.432e-02)); + r += mul(s2_5, M4(-3.111e-02, 2.546e-02, -1.325e-03, 2.816e-02, 1.775e-02, -1.269e-02, 1.138e-02, -1.000e-02, 1.697e-02, 3.864e-03, 2.087e-03, 3.344e-02, -8.484e-04, -2.693e-02, 3.090e-03, 2.142e-02)); + r += mul(s2_6, M4(6.730e-03, 3.269e-04, 2.920e-02, 1.163e-02, -1.127e-02, 1.981e-03, 1.961e-03, 1.407e-02, 7.273e-03, -5.854e-03, -7.461e-03, -5.651e-03, -3.546e-04, 4.019e-03, 2.618e-02, 2.367e-02)); + r += mul(s2_7, M4(2.107e-02, 2.829e-02, 8.396e-02, 4.000e-02, 1.222e-02, -1.084e-02, -7.531e-02, -6.661e-02, -1.950e-03, 3.465e-03, -1.228e-02, 1.770e-05, 4.522e-03, -5.513e-03, -3.509e-02, -4.016e-02)); + r += mul(s2_8, M4(-3.927e-03, -9.815e-04, -1.947e-02, 6.041e-03, -7.065e-03, -2.652e-03, 5.390e-04, -2.048e-02, 5.926e-03, 7.797e-03, 1.450e-02, 1.169e-02, -1.547e-03, 2.840e-04, -5.163e-03, -1.303e-02)); + r += mul(s3_0, M4(-5.409e-02, 9.184e-03, -4.422e-03, 1.524e-02, 2.248e-02, -3.864e-03, -6.101e-03, -3.069e-03, -2.527e-02, 6.830e-03, -1.349e-03, 8.878e-03, 1.036e-02, -4.895e-02, 3.211e-02, -2.753e-02)); + r += mul(s3_1, M4(1.371e-02, -6.961e-02, 3.120e-02, 3.138e-02, 1.296e-01, 1.191e-01, -1.508e-02, 1.052e-02, 1.901e-01, -1.728e-01, 1.270e-02, -7.327e-03, -3.998e-02, 2.193e-01, -1.273e-02, 1.802e-02)); + r += mul(s3_2, M4(4.931e-03, -2.124e-03, -8.819e-03, 3.987e-03, -9.392e-03, 1.330e-02, 1.135e-03, -2.080e-02, -2.240e-02, 3.794e-02, -8.390e-03, 3.292e-02, 8.262e-03, -5.288e-03, 8.888e-05, -1.230e-03)); + r += mul(s3_3, M4(6.616e-02, 4.391e-02, -7.932e-02, 3.235e-02, 4.285e-02, 2.983e-02, 3.588e-02, -7.229e-03, -6.323e-02, 2.271e-02, -6.090e-02, 1.291e-02, 1.438e-02, 1.232e-02, -1.788e-02, -3.406e-02)); + r += mul(s3_4, M4(1.351e-01, 4.053e-01, -1.401e-01, -4.350e-01, -3.408e-02, -3.919e-02, -3.148e-01, -1.567e-01, 2.798e-01, -3.152e-01, 4.335e-01, -4.720e-01, -4.453e-03, -3.111e-02, 5.432e-02, 1.661e-01)); + r += mul(s3_5, M4(-1.449e-02, 3.672e-02, 3.008e-03, 3.696e-02, 2.229e-02, 4.568e-02, 8.036e-03, -1.061e-01, -8.701e-03, 2.603e-02, -1.476e-02, -8.277e-03, 1.253e-03, -4.533e-02, 1.677e-02, -4.820e-02)); + r += mul(s3_6, M4(-1.351e-03, -1.201e-03, 2.428e-02, -1.435e-03, -1.643e-02, 3.242e-03, 7.810e-03, 1.274e-02, -9.967e-04, -1.764e-03, -2.105e-02, 1.409e-02, 3.607e-03, 4.228e-03, 2.109e-02, 2.833e-02)); + r += mul(s3_7, M4(-8.967e-04, 4.304e-04, 1.274e-01, 4.554e-03, -3.403e-03, -2.228e-02, -5.090e-02, -5.075e-02, 1.022e-02, -1.074e-02, 3.729e-02, -4.013e-02, 1.206e-03, 3.008e-03, -3.285e-02, -3.790e-02)); + r += mul(s3_8, M4(-8.135e-03, -6.612e-03, -1.430e-02, 1.443e-02, -9.392e-03, -8.874e-03, 2.189e-03, -4.325e-03, 3.305e-03, 7.361e-03, -3.845e-03, 4.320e-02, 5.838e-05, -5.906e-03, -9.746e-03, -6.372e-03)); + r += mul(s4_0, M4(-6.619e-02, 3.386e-03, -1.440e-02, -2.684e-03, 1.861e-01, 6.708e-03, -2.086e-01, -2.792e-02, 5.085e-03, -1.371e-03, 7.496e-04, 5.475e-04, 3.164e-02, -2.828e-03, 7.371e-03, -3.730e-03)); + r += mul(s4_1, M4(2.662e-01, 9.839e-02, -1.261e-03, 1.859e-02, 1.929e-01, 4.174e-01, -1.989e-01, -4.026e-01, 1.022e-02, 3.651e-03, 1.549e-02, 9.997e-03, 1.631e-02, 5.879e-02, -1.085e-02, 5.666e-03)); + r += mul(s4_2, M4(-4.957e-03, 5.830e-02, 1.930e-03, 1.448e-02, -1.145e-02, -1.217e-03, -2.842e-02, -1.898e-02, -3.649e-03, -1.017e-02, -6.273e-03, -9.116e-03, -5.015e-04, -1.688e-02, 3.775e-03, -1.721e-04)); + r += mul(s4_3, M4(2.389e-03, 2.408e-02, -5.210e-02, 6.875e-03, -3.646e-02, -1.013e-02, 4.480e-02, 4.505e-02, 2.366e-03, 3.030e-03, 1.549e-03, -1.001e-03, 9.867e-02, -2.665e-02, 9.044e-02, -1.874e-02)); + r += mul(s4_4, M4(-3.643e-02, -2.222e-01, 4.160e-01, -8.814e-02, -3.183e-03, -2.813e-02, -2.998e-02, 3.857e-03, 1.168e-02, -2.557e-03, -3.460e-03, 3.700e-02, -5.386e-02, 1.318e-01, 2.204e-02, 1.646e-01)); + r += mul(s4_5, M4(-1.667e-02, 1.179e-02, -1.704e-02, 1.701e-02, 1.466e-02, 6.225e-03, 1.316e-02, 6.819e-03, 1.538e-02, 1.750e-02, 1.290e-02, -9.130e-03, -9.388e-04, -2.310e-02, 1.012e-03, -2.192e-02)); + r += mul(s4_6, M4(-4.788e-03, -1.788e-04, 1.942e-03, 2.068e-02, 8.190e-03, 2.922e-03, 8.121e-03, 8.156e-03, -6.692e-04, 4.287e-03, -1.235e-03, -3.182e-04, -3.056e-02, 8.216e-03, -1.327e-02, -1.535e-02)); + r += mul(s4_7, M4(4.394e-03, -1.151e-02, -1.421e-01, -8.570e-02, -3.001e-04, -3.082e-03, -9.208e-03, -5.047e-03, -2.155e-02, -8.398e-03, -3.852e-02, -2.369e-02, -2.241e-02, -5.901e-02, -6.197e-02, -4.610e-02)); + r += mul(s4_8, M4(-2.005e-03, -7.680e-03, -7.675e-03, 1.350e-03, 2.217e-03, 5.842e-03, 6.929e-03, 4.388e-03, 2.925e-03, -1.936e-02, 2.993e-03, -6.092e-03, -5.771e-03, -1.297e-02, -8.820e-03, -4.081e-02)); + r += mul(s5_0, M4(-8.093e-03, -2.108e-02, -5.065e-03, -9.804e-03, 8.706e-03, -2.703e-03, 8.560e-03, -1.145e-03, 1.271e-02, 3.374e-04, -2.669e-03, 3.678e-04, 3.406e-02, -1.116e-03, -4.666e-03, -3.086e-03)); + r += mul(s5_1, M4(1.444e-01, 8.691e-02, -3.666e-03, -1.186e-02, -2.945e-02, 1.773e-02, -3.385e-03, 9.432e-03, -8.320e-02, 3.048e-02, 4.784e-02, 1.887e-02, 2.853e-02, 4.528e-02, -1.537e-02, 1.443e-02)); + r += mul(s5_2, M4(1.261e-03, 8.035e-02, 2.059e-03, 1.571e-02, -3.960e-03, -7.533e-03, 1.761e-03, 3.228e-04, 4.202e-02, -6.871e-02, -1.490e-02, -1.569e-02, -1.648e-03, -2.460e-02, 4.012e-03, 1.774e-03)); + r += mul(s5_3, M4(-1.331e-02, 2.152e-02, -2.614e-02, -9.924e-03, -1.947e-02, 1.059e-02, -1.041e-02, -1.621e-03, -2.572e-02, 2.033e-02, 2.865e-02, 5.849e-03, 5.318e-02, -5.221e-02, 1.551e-01, -2.830e-02)); + r += mul(s5_4, M4(-1.323e-01, -9.644e-02, 1.592e-01, 1.171e-01, 4.554e-03, -1.978e-02, 7.734e-04, 5.202e-03, 4.516e-01, -1.821e-01, -2.188e-01, -6.628e-02, -1.213e-01, 3.488e-01, 3.650e-02, 1.072e-01)); + r += mul(s5_5, M4(9.571e-03, -5.795e-02, 1.968e-03, 5.286e-02, 8.690e-03, 4.884e-03, -2.144e-03, 2.432e-03, -6.079e-02, -1.205e-01, -5.606e-02, 2.910e-01, 3.558e-03, -4.736e-02, 1.097e-03, -2.437e-02)); + r += mul(s5_6, M4(-2.386e-03, 3.239e-03, 1.889e-03, 2.179e-02, 5.114e-03, 2.074e-03, 7.194e-03, 7.554e-03, 2.152e-03, 2.239e-02, -2.804e-02, 7.534e-03, -9.674e-03, 8.039e-03, -5.096e-02, 7.962e-03)); + r += mul(s5_7, M4(9.905e-03, -1.465e-02, -1.165e-01, -1.121e-01, -1.333e-04, -2.307e-04, -1.100e-02, -1.205e-02, -8.494e-02, -1.837e-02, -8.022e-02, 1.404e-01, -7.194e-02, -5.811e-02, 4.097e-02, -2.056e-01)); + r += mul(s5_8, M4(-5.079e-04, 1.061e-02, 2.578e-03, -1.954e-02, 1.028e-03, 4.906e-03, 7.644e-03, 5.385e-03, 1.110e-02, 5.051e-03, 3.922e-02, -8.392e-02, -4.674e-03, -2.416e-02, -2.396e-02, -2.507e-02)); + r += mul(s6_0, M4(-5.674e-03, 1.696e-02, -3.176e-02, 2.557e-02, -6.480e-03, 4.567e-03, -1.663e-03, 1.376e-04, 1.050e-03, 2.005e-02, -7.700e-03, -4.650e-03, 5.174e-03, 2.959e-04, 5.847e-04, 6.868e-03)); + r += mul(s6_1, M4(1.993e-01, -3.446e-01, 5.166e-02, -6.301e-02, -2.130e-02, -2.832e-02, 1.387e-02, 6.658e-03, -1.506e-02, 1.915e-02, 1.594e-03, -1.395e-02, -3.983e-03, -5.809e-03, -4.394e-03, -2.282e-02)); + r += mul(s6_2, M4(-3.163e-02, 6.764e-02, -2.420e-02, 4.381e-02, -7.289e-04, -9.938e-03, 1.256e-04, -1.360e-04, -6.267e-03, -2.814e-02, -2.084e-04, -6.295e-03, 1.764e-03, 3.965e-02, 2.172e-03, 1.004e-02)); + r += mul(s6_3, M4(-3.805e-02, 6.931e-03, 5.346e-02, 1.400e-02, -5.747e-02, 6.911e-03, -4.360e-02, -2.992e-03, 1.962e-01, -5.279e-03, 1.268e-01, 1.710e-02, 3.139e-02, -9.558e-03, 2.899e-02, -3.663e-03)); + r += mul(s6_4, M4(3.873e-01, -1.973e-01, 4.189e-01, -5.647e-01, -6.008e-03, -7.391e-02, -5.739e-02, -7.744e-02, 8.159e-02, -3.188e-01, 1.655e-02, -5.484e-02, -3.178e-02, 4.290e-02, -2.692e-02, 9.210e-04)); + r += mul(s6_5, M4(-2.935e-02, -2.705e-02, -1.973e-02, 3.284e-02, 4.812e-03, -1.630e-03, -6.547e-03, -2.704e-02, 3.867e-03, 2.585e-02, -1.039e-02, -2.115e-02, 2.652e-02, -4.987e-03, 9.940e-03, 3.099e-02)); + r += mul(s6_6, M4(-1.325e-02, 1.862e-02, -6.437e-02, 3.401e-02, 8.708e-03, -9.918e-03, -1.084e-02, -6.120e-03, 9.072e-03, -9.683e-03, 9.543e-02, -1.780e-02, -1.107e-03, 1.807e-03, 8.317e-03, -4.227e-03)); + r += mul(s6_7, M4(-1.322e-03, -2.431e-02, 1.236e-01, 3.596e-02, 1.617e-02, 3.369e-02, 3.577e-02, 2.416e-02, 2.259e-02, 4.767e-04, 6.982e-02, -2.046e-01, 2.224e-03, -1.279e-02, -3.976e-03, 2.405e-02)); + r += mul(s6_8, M4(1.170e-02, 1.031e-02, -1.128e-02, -3.595e-03, -2.951e-03, -1.011e-03, 4.208e-03, 8.020e-03, -9.252e-03, 4.930e-03, -4.802e-03, 2.488e-02, 7.594e-03, 1.785e-02, 2.293e-02, 1.881e-02)); + r += mul(s7_0, M4(-3.035e-02, 1.103e-02, -1.247e-02, 8.212e-03, -3.649e-03, 7.823e-04, -4.061e-03, -5.997e-03, 6.184e-03, 8.128e-03, -1.791e-02, -3.658e-03, 9.366e-03, -1.359e-03, 6.250e-03, 7.013e-03)); + r += mul(s7_1, M4(-1.474e-02, -9.790e-02, 2.877e-02, -5.354e-03, 1.812e-03, -1.444e-02, -8.681e-04, -4.601e-03, -1.857e-02, 1.924e-02, -4.324e-03, -6.665e-04, -2.237e-02, -1.259e-02, 2.804e-02, -1.132e-02)); + r += mul(s7_2, M4(4.229e-03, 3.503e-02, -9.494e-03, 3.929e-03, 1.529e-03, 1.047e-02, -1.211e-03, -1.295e-03, -6.108e-03, -3.601e-02, 1.257e-03, -5.264e-03, -3.022e-02, -9.369e-03, 2.472e-02, 4.690e-02)); + r += mul(s7_3, M4(1.319e-02, -1.454e-02, -5.079e-03, 7.460e-03, -5.480e-02, 5.655e-03, -3.895e-02, -3.164e-03, 4.913e-02, 1.430e-02, 8.021e-02, 1.776e-02, 4.046e-02, 4.571e-03, -2.042e-03, -4.160e-03)); + r += mul(s7_4, M4(6.519e-02, 8.739e-02, -5.626e-02, -8.788e-02, -3.206e-01, -2.325e-01, 3.845e-02, 7.201e-03, -5.066e-02, -1.198e-02, -4.289e-02, 1.265e-02, 3.552e-01, 1.004e-01, -3.917e-01, -1.470e-01)); + r += mul(s7_5, M4(-2.531e-02, -1.486e-02, 4.528e-03, 2.192e-02, 7.700e-03, -1.460e-01, 5.769e-03, 8.122e-03, 1.705e-02, 1.979e-02, -6.251e-04, -3.190e-02, 6.387e-02, 2.744e-01, 1.321e-02, -1.953e-01)); + r += mul(s7_6, M4(1.643e-03, -1.660e-03, 4.977e-03, -1.102e-02, 1.609e-02, -5.231e-03, -9.750e-03, 3.558e-03, -1.151e-02, -5.640e-03, -1.219e-02, 3.597e-03, -2.938e-02, -1.077e-02, 6.192e-02, -1.080e-02)); + r += mul(s7_7, M4(-8.093e-03, 1.761e-03, 6.421e-02, 7.350e-02, 8.811e-03, 2.184e-02, 2.007e-01, 1.038e-01, 1.103e-02, -7.409e-03, -1.095e-02, -1.284e-03, -1.262e-02, -5.736e-02, -5.778e-03, 1.275e-01)); + r += mul(s7_8, M4(7.431e-03, 4.067e-03, -1.131e-02, -1.269e-04, -4.994e-03, -3.172e-03, 5.994e-04, 8.132e-02, -1.893e-03, -5.842e-03, 1.233e-02, 1.106e-02, 1.308e-02, 1.715e-02, -7.352e-03, -1.630e-02)); + r += V4(4.937e-06, -9.679e-05, 1.474e-04, 7.536e-05); + return tanh(r); +} + +void Pass6(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-4x4C-NVL-DN.hlsl b/src/Effects/CuNNy/CuNNy-4x4C-NVL-DN.hlsl new file mode 100644 index 000000000..ff13858f7 --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-4x4C-NVL-DN.hlsl @@ -0,0 +1,486 @@ +// CuNNy 4x4C BILINEAR RGB NVL DN - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-DN-D04N04 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0 + +#define l0(x, y) min16float((dot(float3(2.428e-01, 4.714e-01, 1.229e-01), O(INPUT, float2(x, y)).rgb) + -7.696e-02)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(9.154e-02, 3.758e-01, 2.353e-02, -5.798e-02) * s0_0; + r += V4(-5.382e-01, 1.688e-01, -1.190e-01, 4.082e-02) * s0_1; + r += V4(2.460e-02, -5.810e-02, 7.788e-02, 3.018e-02) * s0_2; + r += V4(1.211e-01, -1.552e-01, -9.990e-02, 3.963e-02) * s0_3; + r += V4(-2.611e-01, -4.835e-01, -6.965e-01, -4.893e-01) * s0_4; + r += V4(-3.017e-01, -4.435e-02, 1.836e-01, 4.600e-01) * s0_5; + r += V4(1.275e-01, 2.485e-01, 7.354e-02, -4.648e-02) * s0_6; + r += V4(2.527e-01, 1.279e-01, 3.053e-01, 3.957e-02) * s0_7; + r += V4(1.003e-02, 1.193e-01, 2.476e-01, -2.051e-02) * s0_8; + r += V4(1.690e-02, 8.856e-03, -9.136e-04, 2.267e-02); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-4.540e-03, -2.499e-01, 4.202e-02, 1.132e-02, 2.910e-02, -3.788e-02, 3.330e-02, -2.254e-02, -1.953e-01, 1.226e-01, -1.907e-01, -1.378e-01, 9.555e-02, -2.443e-01, 6.124e-02, -7.256e-03)); + r += mul(s0_1, M4(-1.225e-01, -1.812e-01, -1.238e-02, 4.088e-01, -9.977e-02, 4.395e-02, -2.394e-02, -5.584e-03, 2.939e-01, 4.102e-01, 6.228e-02, 3.822e-01, 8.618e-02, -1.109e-01, 1.776e-01, -7.505e-02)); + r += mul(s0_2, M4(2.047e-01, -6.853e-02, 1.880e-02, -9.030e-03, 1.505e-01, 7.782e-02, 1.347e-02, 5.566e-01, -6.951e-02, -1.352e-01, 1.941e-03, 3.975e-02, 1.637e-01, 6.708e-02, 1.501e-02, 1.373e-01)); + r += mul(s0_3, M4(-1.974e-01, 1.068e-01, -1.102e-01, 5.909e-02, 2.355e-03, 1.275e-01, -5.986e-02, -5.288e-02, 8.785e-04, -1.440e-01, -3.369e-01, -9.128e-02, 2.030e-01, 4.937e-01, -1.637e-01, 4.814e-02)); + r += mul(s0_4, M4(-3.954e-01, 4.772e-01, -5.841e-01, -8.070e-02, -2.056e-01, -2.335e-01, -2.091e-01, 1.223e-01, -2.686e-01, 1.240e+00, 7.095e-02, 6.502e-01, 1.044e-01, -3.071e-01, -2.892e-01, 4.861e-01)); + r += mul(s0_5, M4(5.943e-02, 2.245e-01, 4.014e-01, -1.063e-01, -1.869e-01, 1.384e-01, 2.996e-01, -1.928e-01, 1.212e-01, 2.849e-01, 2.093e-01, -3.821e-01, -8.705e-02, 1.976e-01, 5.176e-01, -7.461e-02)); + r += mul(s0_6, M4(1.048e-01, 2.374e-02, 2.730e-01, 1.446e-01, -5.406e-02, -1.587e-02, -2.014e-01, -3.422e-02, -2.114e-01, -5.198e-01, 2.674e-02, -6.078e-02, -2.293e-01, -9.914e-02, -2.110e-01, 7.008e-02)); + r += mul(s0_7, M4(5.799e-02, 4.932e-01, 4.559e-01, -3.118e-02, 4.706e-02, -2.242e-01, -3.165e-01, -9.912e-02, 4.041e-01, 7.241e-01, -1.696e-01, 1.990e-01, 4.697e-01, 9.965e-03, -1.141e-02, -1.365e-02)); + r += mul(s0_8, M4(-1.744e-01, -7.119e-02, 3.632e-01, -2.802e-01, -3.155e-01, 4.455e-01, -1.866e-02, -2.667e-02, 1.255e-01, -5.762e-01, -2.226e-02, 2.812e-02, -2.349e-01, 1.552e-01, -6.424e-03, 7.450e-02)); + r += mul(s1_0, M4(6.159e-02, -4.426e-02, 2.277e-02, 1.040e-01, -6.306e-04, -1.704e-01, 3.807e-02, -8.670e-02, -1.403e-01, 1.644e-01, -9.679e-02, -1.055e-01, 2.394e-01, -5.504e-02, 8.006e-02, 6.312e-02)); + r += mul(s1_1, M4(-1.134e-01, -1.030e-01, -2.777e-02, 2.955e-01, -1.225e-01, -4.096e-02, -2.748e-02, 9.404e-02, 2.890e-01, -2.441e-01, 1.560e-01, 1.694e-01, 1.853e-01, 3.311e-01, 3.408e-01, -8.678e-02)); + r += mul(s1_2, M4(1.821e-01, 3.898e-02, -2.560e-02, 1.160e-01, 2.382e-01, -1.638e-01, -1.345e-01, 3.193e-01, -1.839e-01, -2.638e-01, 5.265e-02, 2.415e-01, 2.803e-01, 1.919e-01, -7.340e-02, 1.762e-02)); + r += mul(s1_3, M4(-2.606e-01, -1.263e-01, -3.067e-02, -1.695e-02, 4.665e-03, 2.947e-02, -1.965e-02, -2.658e-02, -7.935e-02, -1.566e-01, -3.246e-01, -1.075e-03, 1.896e-01, -2.937e-01, -1.020e-01, -1.513e-01)); + r += mul(s1_4, M4(-3.696e-01, 8.901e-02, -1.890e-01, -2.804e-02, -2.998e-01, -6.597e-02, -2.613e-01, 3.877e-01, -1.032e+00, -2.328e-01, 7.941e-02, 5.733e-01, 8.618e-02, 4.213e-02, -1.242e+00, 5.861e-01)); + r += mul(s1_5, M4(1.919e-02, -5.609e-02, 3.295e-01, -2.364e-01, -4.238e-01, -6.041e-01, 3.389e-01, -4.460e-01, 4.482e-02, 1.077e-03, 8.990e-02, -2.725e-01, -4.829e-02, 1.184e-01, 1.941e-01, -3.646e-01)); + r += mul(s1_6, M4(2.968e-01, 2.018e-01, 2.695e-01, 8.891e-02, -5.857e-02, 6.005e-02, -2.440e-01, -1.349e-02, -7.572e-02, -3.213e-01, 6.274e-02, -1.229e-02, -7.589e-01, -2.313e-01, -1.627e-01, 2.538e-01)); + r += mul(s1_7, M4(-5.728e-02, 1.333e-01, 2.492e-01, -3.609e-02, 1.936e-01, -1.276e-01, -3.034e-01, -1.091e-01, 1.390e-01, 3.356e-01, -1.183e-01, 2.047e-01, 3.779e-01, -3.353e-01, 2.019e-01, 4.337e-02)); + r += mul(s1_8, M4(-1.386e-01, 1.179e-01, 2.340e-01, -1.604e-01, -4.890e-01, -5.407e-01, -1.546e-01, -1.826e-01, 1.596e-01, -1.784e-01, 5.777e-02, 3.961e-02, -2.290e-01, 2.752e-01, -4.260e-02, 9.649e-02)); + r += V4(-4.697e-03, -2.213e-02, 3.898e-01, -1.481e-02); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t1 +//!OUT t0 + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.362e-01, -5.847e-02, 2.766e-02, 2.969e-02, 9.796e-02, 6.555e-02, -3.067e-02, -5.139e-02, 1.512e-01, 1.401e-01, -3.820e-03, 2.649e-02, -1.802e-01, -2.099e-02, -6.604e-02, 4.042e-02)); + r += mul(s0_1, M4(-2.144e-01, -1.437e-01, 4.670e-02, -2.348e-01, 9.990e-02, -5.186e-02, 1.658e-01, 9.557e-02, -1.353e-01, -1.146e-01, -9.837e-02, -8.956e-02, 1.229e-01, 2.354e-01, -2.342e-01, -1.343e-01)); + r += mul(s0_2, M4(5.918e-01, 2.130e-02, 5.753e-01, -6.941e-02, -3.156e-02, -4.438e-02, -6.348e-02, 2.682e-02, -1.078e-02, 9.727e-03, 8.472e-02, 1.460e-01, -1.921e-01, 1.872e-01, 6.067e-02, 3.762e-02)); + r += mul(s0_3, M4(1.341e-01, 1.082e-01, -4.460e-02, -1.008e-02, -1.262e-01, -7.942e-02, 5.610e-02, 4.418e-02, -1.725e-01, -1.158e-01, 6.377e-03, -1.171e-01, -3.447e-02, 4.459e-02, 2.822e-04, -7.623e-02)); + r += mul(s0_4, M4(1.994e-01, -2.251e-01, -2.432e-01, 2.467e-02, 3.717e-02, 3.275e-01, 2.005e-01, 1.427e-01, 1.122e-01, 2.864e-01, 1.478e-01, 3.701e-01, 3.111e-01, -1.704e-01, -1.410e-01, -7.490e-01)); + r += mul(s0_5, M4(-1.392e-01, -2.284e-02, 2.819e-01, -5.560e-02, -2.624e-01, 7.282e-02, -2.417e-01, -5.534e-02, -6.351e-03, -1.714e-01, -1.505e-01, -3.035e-01, -3.580e-02, 4.429e-02, 1.628e-01, -1.101e-01)); + r += mul(s0_6, M4(8.306e-04, 3.258e-02, -2.746e-02, -3.143e-02, -1.301e-02, -5.828e-02, 2.411e-03, 1.395e-02, 3.728e-02, -8.319e-02, 3.326e-02, 1.294e-01, -6.226e-02, 5.103e-02, -1.218e-02, 2.411e-01)); + r += mul(s0_7, M4(-6.323e-02, -1.343e-02, 3.400e-02, -1.727e-02, 3.683e-02, 6.325e-02, 4.834e-04, 3.849e-02, 9.424e-03, -2.010e-02, -3.447e-02, -1.330e-01, -4.107e-01, -7.682e-02, 4.138e-01, 5.994e-02)); + r += mul(s0_8, M4(7.556e-02, 1.846e-02, 1.847e-02, 1.057e-01, -1.140e-01, -2.834e-02, -3.141e-02, -1.045e-01, -2.025e-02, 4.729e-02, -2.822e-02, -4.072e-02, 3.368e-01, 6.871e-02, 1.184e-01, 1.536e-01)); + r += mul(s1_0, M4(-6.688e-02, 2.483e-02, 1.598e-01, -4.834e-02, 2.141e-01, -4.911e-02, -4.452e-02, -4.879e-02, -9.473e-01, 6.527e-01, -6.118e-01, -2.436e-01, -3.017e-02, -3.402e-01, 1.343e-01, 9.397e-02)); + r += mul(s1_1, M4(-1.330e-01, 2.557e-01, 6.838e-02, -3.936e-01, 4.806e-01, 1.828e-01, 5.073e-01, 4.502e-01, -1.404e+00, -2.954e-01, -6.745e-02, 5.594e-02, 2.640e-01, 2.330e-02, 1.331e-02, -2.700e-02)); + r += mul(s1_2, M4(2.695e-01, -1.004e-01, 9.104e-02, -4.919e-01, 3.357e-01, 4.895e-02, 4.062e-01, -3.494e-02, -4.352e-01, -1.232e-01, 8.889e-03, 3.472e-01, -1.174e-01, 7.690e-02, 6.341e-02, 9.255e-02)); + r += mul(s1_3, M4(1.805e-01, 2.494e-01, 3.474e-02, 3.930e-02, 2.671e-02, -1.438e-02, 7.294e-02, 4.854e-02, -2.864e+00, -5.832e-01, 4.350e-01, -4.265e-01, -2.643e-02, -6.234e-01, 1.283e-01, 5.168e-02)); + r += mul(s1_4, M4(-2.192e-01, 2.982e-01, -2.860e-01, -4.050e-01, 8.612e-02, 5.008e-02, 5.366e-01, 5.256e-01, -6.222e-01, 1.169e+00, 1.897e+00, 3.009e+00, 9.105e-02, -2.369e-01, -4.718e-01, -2.725e-01)); + r += mul(s1_5, M4(-7.441e-01, -1.820e-01, -5.828e-02, -6.348e-01, 5.721e-01, 1.143e-01, 2.871e-01, 3.254e-01, -1.446e-01, 1.446e-01, -8.526e-02, 7.228e-01, -9.749e-02, -1.665e-01, -1.116e-01, -2.705e-01)); + r += mul(s1_6, M4(-6.357e-02, -2.576e-02, 1.277e-02, -3.956e-02, 2.724e-02, -2.141e-02, 9.778e-02, 7.199e-03, -1.153e+00, -6.945e-01, -4.788e-01, -1.246e+00, 1.909e-01, 1.315e-01, 4.454e-02, 2.678e-01)); + r += mul(s1_7, M4(-1.022e-01, 1.572e-01, 9.404e-02, 6.768e-02, 2.191e-01, -3.163e-02, 1.257e-01, 1.058e-01, -6.394e-01, 7.223e-03, -6.930e-01, -2.963e-01, -2.666e-01, 3.461e-03, 2.203e-01, -1.212e-01)); + r += mul(s1_8, M4(-1.179e-01, 7.311e-02, 1.371e-01, -4.039e-02, 2.171e-01, 3.131e-02, 2.219e-01, 1.564e-02, -4.895e-01, -5.067e-03, -4.528e-01, 5.694e-02, 6.858e-02, 6.808e-03, -1.017e-01, 6.675e-03)); + r += V4(-8.341e-03, 1.434e-02, 5.791e-03, -1.033e-02); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-6.123e-02, 9.666e-03, 4.969e-02, 3.030e-02, 1.714e-02, -3.117e-02, -9.470e-02, 2.078e-03, 4.109e-02, -5.560e-02, 3.757e-02, -3.667e-03, -3.500e-02, -8.151e-02, 1.104e-01, -1.219e-01)); + r += mul(s0_1, M4(9.596e-02, -6.361e-02, 1.162e-02, -3.138e-02, -1.277e-02, -4.005e-02, 1.805e-02, -1.459e-02, -7.903e-03, 1.138e-02, 1.542e-02, -2.357e-02, -1.421e-01, -2.953e-01, 1.322e-01, 6.480e-03)); + r += mul(s0_2, M4(1.571e-01, -1.081e-01, 1.345e-01, -5.616e-02, -1.211e-02, 4.515e-02, 1.797e-02, 6.143e-02, -9.605e-02, 7.782e-02, -1.421e-01, 3.195e-02, 1.841e-01, -7.735e-02, 1.082e-01, 1.785e-02)); + r += mul(s0_3, M4(1.739e-03, -4.187e-02, 1.093e-01, 1.042e-01, -6.538e-03, 5.025e-02, -7.052e-03, -1.033e-01, -1.394e-01, -4.638e-01, 4.354e-02, -1.188e-02, 7.809e-04, 2.484e-01, -8.330e-01, -2.787e-01)); + r += mul(s0_4, M4(-6.489e-03, -6.309e-01, 7.169e-01, 1.557e-01, 1.478e-01, 2.977e-01, -2.818e-01, 5.129e-02, 7.598e-01, 8.124e-01, -1.262e-02, -1.325e-01, -2.764e-01, 3.485e-01, 4.717e-01, -2.467e-01)); + r += mul(s0_5, M4(2.022e-02, -1.396e-01, 1.865e-01, 1.568e-02, 3.924e-01, -2.466e-01, 4.990e-01, 3.971e-02, -1.176e-01, 1.792e-01, -2.861e-01, 3.555e-02, -1.428e-01, 2.528e-01, -2.085e-01, -1.311e-01)); + r += mul(s0_6, M4(3.340e-02, -1.203e-01, 1.014e-01, 1.154e-01, -9.031e-03, -5.586e-02, -5.700e-03, 2.391e-02, -3.509e-01, 6.729e-02, 1.004e-01, -3.277e-01, 1.026e-01, 3.286e-03, -6.603e-02, -3.238e-03)); + r += mul(s0_7, M4(-6.854e-01, 1.013e-01, -6.298e-02, -5.464e-01, 2.486e-01, -2.186e-01, 3.986e-02, 3.800e-01, -1.267e-01, 1.037e-01, 1.538e-01, -2.069e-01, 9.431e-02, 5.337e-02, -8.507e-02, 2.015e-01)); + r += mul(s0_8, M4(-5.009e-03, 1.493e-01, -3.010e-02, -2.429e-02, -3.137e-01, -2.276e-01, 1.556e-01, 1.452e-02, 2.063e-01, 3.699e-02, -1.675e-03, 8.221e-02, -6.732e-02, 8.296e-02, -8.474e-02, -1.458e-01)); + r += mul(s1_0, M4(-3.003e-02, -9.777e-03, 1.239e-02, -3.907e-02, 1.841e-01, -8.959e-02, 9.257e-02, 1.333e-01, 5.703e-04, -1.367e-01, -1.026e-01, 6.398e-02, 1.262e-02, 1.101e-02, 4.291e-02, -4.238e-02)); + r += mul(s1_1, M4(5.516e-02, 9.884e-04, -5.383e-02, -1.048e-02, 2.529e-01, 9.819e-02, 1.255e-01, 3.149e-02, -8.249e-02, -1.386e-02, 6.214e-02, 2.957e-02, 1.001e-01, 1.590e-01, 1.159e-02, 5.273e-02)); + r += mul(s1_2, M4(4.571e-02, -6.277e-03, 1.496e-01, -4.044e-02, 4.089e-02, -3.801e-02, -3.690e-02, -1.037e-01, -6.031e-02, 2.117e-03, -9.644e-02, 6.392e-02, 5.093e-02, -2.512e-02, 1.131e-01, 1.304e-01)); + r += mul(s1_3, M4(-3.118e-02, 2.185e-02, 1.763e-01, 8.327e-02, 6.337e-02, 8.724e-02, 6.808e-02, -4.070e-01, -6.922e-02, -2.417e-01, -1.175e-01, -1.845e-01, -3.773e-03, -1.869e-01, -9.345e-02, -2.340e-01)); + r += mul(s1_4, M4(-1.159e-01, -4.476e-01, 2.989e-01, 2.794e-01, 5.756e-01, -4.803e-01, -5.979e-02, -1.959e-01, 5.261e-02, -2.399e-01, -6.616e-02, -9.243e-01, 4.622e-01, 1.139e-01, 2.482e-01, 2.254e-01)); + r += mul(s1_5, M4(1.064e-01, -1.989e-02, 8.581e-02, 3.218e-02, 3.344e-01, -5.684e-01, 4.009e-01, 4.482e-01, 7.737e-02, 8.716e-02, -1.382e-01, -7.145e-02, -1.225e-01, 1.471e-01, -1.866e-01, 3.674e-02)); + r += mul(s1_6, M4(5.376e-02, -6.192e-03, -1.760e-01, 7.590e-02, -3.279e-02, -1.888e-01, 2.057e-01, 2.114e-01, -3.941e-01, 5.584e-03, 9.400e-03, -4.289e-01, -2.289e-01, 1.880e-01, 3.184e-02, -4.442e-01)); + r += mul(s1_7, M4(-4.174e-01, -1.344e-01, 3.866e-02, 4.521e-02, -4.215e-01, 1.479e-01, 2.476e-01, -7.051e-01, -4.153e-01, 3.373e-01, 8.098e-02, -6.680e-01, 3.920e-01, -1.023e-01, -2.166e-02, 3.816e-01)); + r += mul(s1_8, M4(-3.441e-02, 3.404e-03, -4.958e-02, 9.652e-03, -1.930e-02, -2.470e-01, 1.610e-01, 1.112e-01, 2.574e-02, 2.310e-01, 3.643e-02, -5.044e-02, 7.788e-02, 1.923e-03, -7.115e-02, -6.575e-03)); + r += V4(1.370e-02, 1.151e-02, 2.567e-03, -1.881e-03); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 5 +//!DESC conv4 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t1 +//!OUT t0 + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.376e-02, 2.931e-02, 7.304e-02, -5.238e-02, -6.500e-03, -3.887e-02, 2.506e-02, 5.201e-03, 5.599e-02, -1.951e-01, -3.847e-01, 8.685e-02, -1.106e-01, -3.954e-02, 1.571e-01, 2.293e-02)); + r += mul(s0_1, M4(-2.738e-02, 1.554e-01, 1.120e-01, 1.856e-02, 9.513e-03, -2.222e-01, -2.174e-01, -1.065e-02, 3.001e-02, 7.638e-02, -7.497e-02, -2.727e-02, -1.521e-02, 1.843e-01, 3.547e-01, -1.642e-02)); + r += mul(s0_2, M4(-2.533e-02, -1.959e-02, -6.274e-02, 8.121e-03, -8.703e-03, 5.091e-02, 6.548e-02, 1.988e-02, 4.089e-02, -4.827e-02, -4.089e-02, -4.361e-02, -1.112e-02, -1.101e-02, 2.968e-02, -2.196e-03)); + r += mul(s0_3, M4(1.813e-02, -2.087e-01, -2.474e-01, -1.066e-01, 2.549e-01, 6.466e-01, 3.169e-01, -1.109e-01, -1.551e-02, -3.119e-01, -3.959e-01, 2.141e-01, 1.121e-01, 3.268e-01, 1.038e-01, -5.818e-02)); + r += mul(s0_4, M4(-3.147e-01, 2.716e-01, 1.304e-01, 3.887e-01, 9.396e-02, -9.787e-02, -1.596e-01, -7.138e-02, -2.462e-01, -3.027e-01, 6.980e-01, -1.546e-01, 3.730e-02, -7.502e-02, -4.408e-02, 3.814e-02)); + r += mul(s0_5, M4(-4.177e-02, -1.326e-02, -7.497e-02, 1.168e-03, 5.595e-03, 3.603e-02, 2.589e-02, -2.179e-02, 1.998e-02, -3.544e-03, 1.125e-01, 2.648e-03, -2.417e-02, -1.876e-02, 4.009e-02, 5.481e-02)); + r += mul(s0_6, M4(-7.181e-02, -2.968e-02, -3.169e-02, -1.899e-02, -3.692e-02, -2.156e-02, 9.595e-02, 1.055e-01, -1.274e-01, -2.576e-02, 8.706e-02, 1.895e-01, 6.316e-04, -4.574e-02, 2.201e-02, 1.199e-01)); + r += mul(s0_7, M4(-2.193e-01, 1.563e-02, 1.287e-01, 2.403e-01, 2.222e-01, -1.748e-02, 1.486e-02, -7.685e-02, 4.971e-01, 2.920e-01, -2.253e-01, -8.145e-01, 3.018e-01, -4.559e-02, -1.509e-01, -3.003e-01)); + r += mul(s0_8, M4(1.685e-02, -1.082e-02, 3.539e-03, -2.765e-02, -5.968e-03, -4.628e-03, 3.847e-02, 6.426e-02, -6.284e-02, 5.455e-02, -3.291e-02, 1.636e-01, 5.828e-02, -5.613e-02, -4.404e-02, -1.715e-02)); + r += mul(s1_0, M4(1.875e-02, 7.150e-02, 3.015e-02, -4.917e-02, 9.333e-03, -1.519e-01, -1.153e-01, 4.344e-02, -1.603e-02, -4.775e-02, -4.484e-02, 6.567e-02, -6.714e-02, 2.569e-01, 4.638e-01, 3.038e-02)); + r += mul(s1_1, M4(-4.046e-02, 1.372e-01, 2.476e-01, 6.565e-02, 6.481e-04, -1.529e-02, 1.376e-02, 1.367e-02, 2.941e-04, 1.423e-01, 2.311e-01, 7.538e-03, -6.762e-02, -3.992e-01, -1.160e-02, 3.123e-02)); + r += mul(s1_2, M4(-3.926e-02, 1.709e-04, -4.761e-02, -8.731e-03, 5.123e-03, 7.039e-02, 1.061e-01, -1.322e-03, 4.069e-02, -1.182e-01, -3.698e-04, -7.746e-02, -3.827e-02, 9.957e-02, 9.991e-02, 5.215e-02)); + r += mul(s1_3, M4(-1.865e-01, -9.784e-01, -5.871e-01, 1.384e-01, 2.097e-01, -1.229e-01, -4.912e-01, -4.254e-02, 3.395e-04, -8.968e-02, -6.923e-02, -4.916e-02, 2.424e-01, 7.730e-01, 2.573e-01, -2.380e-01)); + r += mul(s1_4, M4(-9.293e-01, 6.176e-01, 1.970e-01, 3.467e-01, 4.341e-01, 9.866e-01, 3.035e-01, -1.062e-01, -1.501e-01, 2.709e-01, 1.991e-01, -2.164e-01, 2.881e-01, -1.696e-01, -4.141e-01, -1.004e+00)); + r += mul(s1_5, M4(-8.323e-02, -1.285e-02, -3.468e-02, 1.551e-01, 1.330e-01, -1.238e-01, -1.675e-03, 5.588e-02, 2.128e-01, -2.327e-01, -2.891e-02, 1.567e-01, -1.448e-01, 8.781e-02, 3.254e-02, 7.142e-02)); + r += mul(s1_6, M4(1.231e-01, 5.139e-02, -9.426e-02, -2.822e-01, 1.761e-03, 6.853e-03, 1.165e-01, 7.861e-02, -9.715e-03, 5.489e-03, -1.066e-02, -8.332e-03, -9.111e-02, 3.911e-02, 1.757e-01, 2.222e-01)); + r += mul(s1_7, M4(2.275e-02, 1.199e-01, 5.904e-02, -2.051e-01, 6.950e-01, 1.592e-02, -9.888e-02, -6.701e-01, -9.096e-02, 3.203e-02, 1.204e-01, 2.153e-01, 1.448e-01, -5.225e-03, 6.786e-02, 2.005e-02)); + r += mul(s1_8, M4(-3.290e-02, -3.758e-02, -3.158e-02, 8.713e-02, 3.917e-02, 4.275e-02, -2.450e-02, 3.970e-02, 1.928e-01, 5.498e-02, -5.673e-02, -3.743e-01, 4.981e-02, -1.785e-02, 1.958e-02, 3.487e-02)); + r += V4(7.249e-03, 2.949e-03, 5.297e-03, 3.693e-03); + return r; +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 6 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.340e-02, 8.171e-02, -1.124e-01, -5.065e-02, -5.505e-02, -5.540e-02, -3.000e-03, -1.346e-02, 3.800e-02, 4.944e-02, -2.084e-02, 6.388e-03, 8.566e-02, 2.480e-02, 1.184e-01, -1.075e-04)); + r += mul(s0_1, M4(-2.188e-02, -2.056e-01, 1.480e-02, -7.451e-02, 5.240e-02, 4.098e-02, -4.668e-03, 1.810e-02, -2.533e-02, -6.403e-02, 1.984e-02, -5.716e-02, -3.356e-03, -2.173e-01, 1.218e-01, 1.179e-01)); + r += mul(s0_2, M4(7.330e-03, 2.521e-02, 1.372e-02, 3.411e-02, -1.438e-02, -1.009e-02, 7.676e-03, -1.712e-02, 5.980e-03, 2.040e-02, -8.766e-03, 3.442e-02, -1.623e-02, -2.557e-02, -6.086e-03, 5.413e-04)); + r += mul(s0_3, M4(1.754e-01, 6.364e-02, 2.842e-01, 2.378e-01, -1.684e-01, -1.911e-02, -3.838e-01, -2.622e-02, 2.065e-01, 3.951e-02, 4.217e-01, 4.374e-02, -1.028e-02, 2.417e-02, -1.595e-02, 6.305e-02)); + r += mul(s0_4, M4(-5.620e-02, -8.609e-02, -1.256e-01, -3.166e-01, -1.712e-01, -1.602e-01, -1.577e-01, -4.901e-01, -5.012e-02, 1.082e-01, -7.271e-02, 4.072e-01, -7.789e-02, -1.725e-01, -1.397e-01, -4.507e-01)); + r += mul(s0_5, M4(1.401e-02, 4.716e-02, 1.486e-02, 4.642e-02, 1.131e-02, 3.865e-02, -9.865e-03, 9.301e-02, 3.441e-03, -8.098e-03, -6.012e-03, -1.549e-01, 1.486e-02, 1.872e-02, -2.469e-03, 1.294e-02)); + r += mul(s0_6, M4(-3.894e-02, -4.136e-05, -3.022e-02, 1.045e-03, -3.730e-02, -1.838e-02, -5.573e-02, -2.760e-02, 3.516e-02, 1.602e-02, 6.358e-02, 3.111e-02, -3.045e-02, -7.728e-03, -4.189e-02, -1.102e-02)); + r += mul(s0_7, M4(-1.184e-02, 1.728e-02, 7.925e-03, 6.763e-02, 2.590e-03, -9.456e-03, -4.407e-02, -2.044e-02, 4.472e-02, 2.228e-02, 7.233e-02, 4.863e-02, -1.814e-02, -2.034e-03, -4.994e-02, -2.460e-02)); + r += mul(s0_8, M4(-3.292e-03, -9.015e-03, -3.171e-03, -2.504e-02, 2.120e-03, 3.064e-02, 2.108e-02, 4.592e-02, 2.258e-03, -2.192e-04, -3.576e-03, 3.733e-02, -1.931e-03, -5.083e-03, 5.877e-03, -1.764e-02)); + r += mul(s1_0, M4(4.321e-02, -8.135e-02, -1.567e-01, -6.888e-03, -6.542e-02, -1.656e-02, 1.236e-02, -7.563e-03, 4.657e-02, 9.222e-03, -6.696e-03, -3.545e-03, -6.401e-01, 1.189e-01, 1.509e-01, 2.417e-01)); + r += mul(s1_1, M4(-2.058e-02, 1.174e-01, -2.482e-02, -8.423e-02, -1.692e-02, -1.094e-02, 3.530e-02, 1.780e-02, -9.937e-02, -9.030e-02, 2.304e-02, 1.294e-02, 7.976e-02, -3.096e-01, 1.382e-01, 2.456e-01)); + r += mul(s1_2, M4(4.491e-02, -1.336e-02, 3.593e-02, -3.503e-02, -8.630e-03, -4.295e-03, -1.356e-02, 3.843e-02, 9.887e-03, 1.913e-03, 2.247e-03, 1.113e-02, -7.234e-04, -3.058e-02, 2.833e-03, -1.707e-02)); + r += mul(s1_3, M4(2.007e-01, 6.756e-02, 9.393e-01, 9.057e-02, -3.701e-01, -1.729e-02, -4.136e-01, 2.233e-02, 2.783e-01, 3.590e-02, 3.564e-01, 8.342e-03, 1.333e-01, 7.944e-02, -2.312e-01, 8.354e-02)); + r += mul(s1_4, M4(-3.334e-01, -2.705e-01, -4.072e-01, 3.946e-01, 5.159e-03, -5.860e-01, 1.578e-01, -3.614e-01, 5.366e-01, 4.699e-01, -3.700e-01, 9.463e-02, -4.090e-02, -9.767e-02, -7.999e-02, -4.859e-01)); + r += mul(s1_5, M4(5.700e-02, 6.092e-02, 4.114e-02, -1.564e-02, -1.345e-02, 9.692e-02, 1.456e-03, 9.371e-02, -3.845e-02, -4.751e-02, -2.509e-02, -2.842e-01, 2.938e-03, 2.387e-02, -6.191e-04, -3.120e-04)); + r += mul(s1_6, M4(3.888e-02, 4.969e-02, -1.851e-01, -9.866e-03, -3.527e-02, -1.377e-02, -7.594e-02, -2.619e-02, 3.259e-02, 9.636e-03, 8.622e-03, 1.788e-02, -3.505e-02, -1.048e-03, -1.329e-02, 1.425e-02)); + r += mul(s1_7, M4(6.891e-03, 8.118e-02, -6.443e-02, -1.487e-01, 2.183e-02, 1.106e-03, 6.656e-02, -9.506e-02, 7.418e-04, -6.015e-02, 3.594e-01, 1.039e-02, -3.600e-02, -7.771e-03, -3.406e-02, 2.935e-02)); + r += mul(s1_8, M4(-4.598e-03, -4.678e-03, 1.595e-02, -8.273e-03, 6.740e-03, 1.175e-02, -2.997e-02, -6.116e-03, -3.788e-02, -9.471e-02, -2.149e-02, 4.139e-02, -9.614e-03, -5.573e-03, -1.643e-02, -1.712e-02)); + r += V4(2.510e-03, 4.409e-03, 2.891e-03, 4.977e-03); + return tanh(r); +} + +void Pass6(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-4x4C-NVL.hlsl b/src/Effects/CuNNy/CuNNy-4x4C-NVL.hlsl new file mode 100644 index 000000000..d9235defd --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-4x4C-NVL.hlsl @@ -0,0 +1,486 @@ +// CuNNy 4x4C BILINEAR RGB NVL - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-D04N04 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0 + +#define l0(x, y) min16float((dot(float3(-4.174e-01, -7.873e-01, -1.763e-01), O(INPUT, float2(x, y)).rgb) + 1.011e+00)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(1.222e-01, 7.038e-03, 1.179e-01, 1.876e-01) * s0_0; + r += V4(1.025e-01, -2.993e-01, 3.154e-01, -1.050e-01) * s0_1; + r += V4(5.656e-02, -3.117e-03, -6.665e-02, -2.044e-01) * s0_2; + r += V4(-5.045e-01, -4.189e-01, -3.076e-01, -3.691e-01) * s0_3; + r += V4(1.365e-01, 6.699e-01, 3.389e-01, 4.561e-01) * s0_4; + r += V4(-7.690e-02, 2.655e-02, -1.044e-02, 7.271e-02) * s0_5; + r += V4(1.358e-02, 3.378e-03, -1.802e-01, -1.936e-01) * s0_6; + r += V4(8.227e-02, 1.550e-02, -1.820e-01, -1.670e-01) * s0_7; + r += V4(9.988e-03, 1.413e-03, -2.486e-02, 3.258e-01) * s0_8; + r += V4(3.566e-02, -1.308e-03, -5.595e-03, -5.246e-03); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.282e-01, 1.199e-01, 1.156e-01, -4.091e-02, -1.771e-02, -1.431e-01, -1.478e-02, 4.041e-02, -1.559e-01, 1.231e-02, -8.571e-02, 2.159e-02, -6.484e-02, 3.819e-02, -3.386e-02, -3.344e-02)); + r += mul(s0_1, M4(6.131e-02, 1.493e-01, 1.954e-01, -2.565e-01, 1.570e-01, -3.852e-01, -2.313e-01, 9.262e-02, 1.038e-01, -4.169e-01, -2.446e-01, 9.953e-02, -1.830e-01, -9.774e-02, -1.498e-01, 8.626e-02)); + r += mul(s0_2, M4(9.908e-02, 1.372e-01, -1.254e-02, 4.486e-03, 1.023e-01, 6.484e-02, 1.645e-01, -4.932e-02, -4.221e-02, -1.919e-01, -2.135e-02, 6.955e-02, -1.406e-01, 8.082e-02, -7.935e-02, 3.010e-02)); + r += mul(s0_3, M4(-7.203e-02, -1.210e-01, 1.084e-01, -6.958e-03, 1.303e-01, 1.030e-01, -2.392e-01, -1.084e-01, 2.173e-01, -7.864e-02, -2.983e-01, -3.510e-01, -3.076e-01, 4.533e-02, 1.940e-01, 4.051e-01)); + r += mul(s0_4, M4(9.270e-02, -4.072e-01, 2.338e-01, 4.098e-01, -1.440e-01, 6.971e-01, 5.515e-01, 2.682e-01, -1.401e-01, 3.504e-02, 1.366e-01, 6.149e-01, -3.330e-01, 1.880e-01, -4.170e-01, 3.244e-01)); + r += mul(s0_5, M4(-5.380e-01, -7.843e-02, -1.293e-01, -9.225e-02, 1.393e-01, -2.588e-01, 4.618e-01, -2.264e-02, -5.369e-02, 1.321e-01, -3.029e-02, 7.983e-02, -1.048e-01, 3.279e-02, -5.969e-02, -3.766e-03)); + r += mul(s0_6, M4(3.432e-02, 1.518e-02, 1.940e-02, -1.086e-01, 1.052e-01, -5.430e-02, -3.343e-02, 1.824e-01, -9.831e-02, 1.097e-02, 6.281e-02, 1.194e-01, 3.253e-02, 4.046e-02, -2.183e-02, -1.328e-01)); + r += mul(s0_7, M4(1.538e-01, 6.796e-02, -4.870e-01, 7.139e-02, -2.497e-01, 2.916e-02, 6.191e-01, -2.650e-01, -4.194e-02, 1.782e-01, -3.431e-01, -9.707e-02, 2.173e-02, -1.150e-01, -8.162e-03, 4.551e-02)); + r += mul(s0_8, M4(5.804e-02, 5.436e-02, -1.604e-01, 8.077e-02, 2.685e-01, 4.741e-02, 1.225e-01, -1.033e-01, -4.358e-02, -1.091e-01, 8.815e-02, -3.121e-02, -2.569e-02, -1.093e-02, -2.550e-02, -1.571e-02)); + r += mul(s1_0, M4(8.760e-02, 1.254e-01, 9.299e-02, -1.140e-02, 4.179e-02, -1.333e-01, 3.048e-03, -3.111e-02, -6.091e-02, 6.563e-03, 4.609e-03, -4.717e-02, -6.470e-02, -5.791e-02, -5.529e-03, 8.697e-02)); + r += mul(s1_1, M4(6.935e-02, 9.805e-02, 1.851e-01, -2.726e-01, 1.731e-01, -2.863e-01, -2.267e-01, -3.813e-02, 1.104e-01, -3.193e-01, -1.958e-01, 9.567e-02, 1.819e-01, -2.054e-01, 1.228e-01, 3.906e-02)); + r += mul(s1_2, M4(-1.957e-01, 7.733e-02, -2.023e-01, 1.297e-01, -1.646e-01, 1.304e-01, -1.728e-02, -4.396e-02, 7.828e-02, -2.639e-01, 3.389e-02, 1.101e-01, 1.388e-01, -4.075e-03, 1.023e-01, -7.785e-03)); + r += mul(s1_3, M4(-2.828e-02, -7.018e-02, 4.269e-02, -1.386e-01, 2.143e-02, 2.504e-01, -2.134e-01, -2.483e-01, 1.075e-01, -2.671e-02, -2.588e-01, -3.271e-01, 1.173e-01, -6.103e-02, 5.539e-01, 5.341e-01)); + r += mul(s1_4, M4(-2.415e-01, -2.975e-01, -6.622e-02, 4.027e-01, -5.871e-01, 7.506e-01, 1.939e-02, -1.680e-01, 4.796e-01, -2.840e-01, 5.077e-01, 9.122e-02, 1.463e-01, 2.124e-01, 6.358e-02, 2.993e-01)); + r += mul(s1_5, M4(4.298e-01, -1.754e-01, 5.357e-01, -1.440e-01, -4.439e-01, -3.819e-01, -1.009e-01, 2.113e-02, -2.275e-02, -1.842e-02, 1.441e-01, 6.590e-03, 2.627e-02, 3.381e-02, 9.956e-02, -1.935e-02)); + r += mul(s1_6, M4(-5.557e-02, 3.378e-02, -2.451e-02, -1.718e-01, -2.037e-01, 1.631e-02, -2.822e-01, -7.724e-02, -6.657e-02, -2.282e-02, 2.673e-02, 8.716e-02, 1.291e-01, 9.472e-03, 3.810e-02, -1.134e-01)); + r += mul(s1_7, M4(1.441e-01, 4.331e-02, -4.741e-01, 2.165e-01, -5.974e-01, -2.669e-02, -4.949e-02, -3.179e-01, 1.007e-01, 1.512e-01, -4.138e-02, -7.470e-02, 8.828e-02, -1.400e-01, 5.797e-02, -4.988e-03)); + r += mul(s1_8, M4(-2.478e-01, 1.392e-01, -8.663e-02, -3.629e-02, 1.823e-01, 7.573e-03, -2.445e-01, -1.641e-02, -5.197e-02, -8.804e-02, 1.244e-01, 2.095e-02, 1.683e-02, -4.073e-02, -5.207e-03, -3.854e-03)); + r += V4(-4.317e-03, 2.687e-03, -1.530e-03, 4.681e-04); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t1 +//!OUT t0 + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.921e-01, -2.132e-02, -5.460e-03, -6.681e-02, 9.988e-02, -2.228e-02, 4.719e-02, 9.124e-03, -1.072e-01, 1.506e-01, 2.070e-02, -4.671e-02, 2.244e-01, -4.895e-02, -8.150e-03, -9.520e-02)); + r += mul(s0_1, M4(8.226e-02, 4.651e-02, -1.842e-01, -3.376e-02, 1.349e-01, 2.148e-02, -1.746e-01, 1.671e-02, 9.761e-02, 7.581e-02, 1.470e-01, -8.582e-02, -1.149e-01, 2.143e-02, -1.597e-01, 1.626e-01)); + r += mul(s0_2, M4(-5.810e-04, -3.566e-02, 4.708e-02, -3.068e-02, 1.578e-02, 5.503e-03, 3.081e-02, -4.174e-02, 3.394e-01, 7.398e-02, -9.467e-02, -1.127e-01, -1.314e-01, 1.511e-02, 1.538e-01, -5.695e-03)); + r += mul(s0_3, M4(2.959e-01, 3.316e-02, -5.716e-02, -2.233e-01, 5.020e-01, -1.416e-01, -6.082e-02, -3.393e-01, 3.292e-01, -6.813e-02, 9.009e-02, -1.638e-01, 1.190e-01, -2.728e-02, -6.042e-02, -1.360e-01)); + r += mul(s0_4, M4(5.902e-01, 3.040e-01, -2.870e-01, 2.228e-02, -1.646e-01, 2.078e-02, -1.480e-01, 2.083e-01, -4.397e-01, -2.549e-01, -1.168e-01, -4.199e-01, 2.199e-01, 2.596e-02, 2.598e-02, -1.313e-01)); + r += mul(s0_5, M4(1.043e-01, 1.050e-02, -5.654e-02, -1.265e-01, -1.978e-01, 3.772e-02, 2.474e-01, 1.395e-01, 2.041e-01, 6.617e-02, -2.602e-01, -1.601e-01, -5.577e-02, -1.591e-02, 2.096e-01, 2.594e-02)); + r += mul(s0_6, M4(7.245e-02, 6.156e-02, 5.317e-02, -3.912e-01, 1.871e-01, -2.079e-02, -2.552e-02, -6.961e-02, 2.686e-01, 8.518e-02, -1.026e-01, -4.040e-01, -6.324e-02, 7.999e-03, 1.317e-02, 1.619e-02)); + r += mul(s0_7, M4(1.240e-01, -8.349e-02, -1.258e-01, -3.269e-01, 6.624e-01, -1.357e-01, -6.738e-01, -5.998e-01, -8.375e-04, 2.226e-01, -1.880e-01, 5.678e-02, -8.383e-02, -3.455e-02, -1.399e-02, 4.540e-02)); + r += mul(s0_8, M4(-3.130e-02, 9.691e-02, 1.763e-01, -1.847e-02, -1.193e-01, -7.494e-03, 1.485e-02, 1.244e-02, 9.559e-02, 3.116e-02, 8.046e-03, -1.264e-01, -2.403e-01, 6.389e-02, 2.999e-01, 1.484e-01)); + r += mul(s1_0, M4(2.569e-01, -8.689e-03, -1.806e-02, -3.993e-02, 9.155e-02, -2.022e-02, 1.034e-02, -3.455e-02, -1.534e-01, 1.836e-02, -1.176e-03, 3.593e-03, 2.642e-01, -6.587e-02, -4.169e-02, -2.237e-01)); + r += mul(s1_1, M4(1.398e-01, 1.020e-02, -2.478e-01, 2.747e-02, 7.152e-02, 1.835e-02, -2.013e-01, 1.151e-02, -2.586e-01, -3.622e-02, 2.529e-01, 1.465e-01, -3.973e-01, 5.907e-02, -9.450e-02, 3.761e-02)); + r += mul(s1_2, M4(3.157e-02, 7.847e-03, 8.109e-03, -3.333e-02, -3.333e-02, -6.401e-03, -6.632e-03, 3.296e-02, -1.433e-02, 2.167e-02, 1.194e-01, -1.028e-01, -2.104e-01, 1.352e-02, -6.835e-02, 1.901e-01)); + r += mul(s1_3, M4(3.443e-01, -1.004e-01, -6.176e-02, -3.047e-01, 4.779e-01, -7.928e-02, -8.134e-02, -4.873e-01, -1.421e-01, 3.972e-02, 7.459e-02, 2.099e-01, 1.118e-01, -1.022e-02, -8.584e-02, -1.657e-01)); + r += mul(s1_4, M4(-1.721e-01, 2.625e-02, -7.292e-03, 2.646e-01, 2.505e-02, 1.479e-01, -3.357e-01, 1.088e-01, 1.016e-01, -1.902e-01, -1.622e-01, -6.326e-02, -4.305e-01, 4.763e-01, -1.357e-03, -5.685e-01)); + r += mul(s1_5, M4(3.324e-03, 1.692e-02, -5.726e-02, 2.853e-02, -3.135e-01, -4.534e-03, 2.549e-01, 1.183e-01, -1.277e-01, -5.030e-02, 9.190e-02, 1.145e-01, 3.445e-01, 6.425e-02, -2.707e-01, -1.701e-01)); + r += mul(s1_6, M4(2.164e-02, 1.998e-02, 1.667e-02, -6.126e-02, 2.400e-01, -9.253e-02, -4.525e-02, 8.615e-03, 5.148e-02, -1.803e-02, -7.495e-02, -7.102e-02, -2.646e-02, 6.819e-02, 1.465e-01, 1.904e-01)); + r += mul(s1_7, M4(-2.339e-02, 3.350e-02, -1.274e-01, 5.525e-02, 9.120e-01, -9.074e-01, -6.856e-01, -7.422e-02, 4.849e-02, -1.377e-02, -1.409e-01, -5.792e-02, -1.044e-01, 9.079e-02, 2.520e-01, 2.053e-01)); + r += mul(s1_8, M4(1.891e-02, -1.562e-02, -1.024e-02, -2.686e-02, -1.038e-01, -3.210e-02, 4.222e-01, -2.084e-01, -1.841e-01, 3.231e-02, 7.320e-02, 1.727e-01, 2.861e-01, 2.506e-02, -2.266e-01, -3.940e-01)); + r += V4(-1.043e-03, 3.601e-03, 5.622e-03, -7.848e-04); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-7.801e-03, 7.517e-03, 5.348e-02, 7.686e-02, -8.770e-03, 1.144e-02, -2.398e-02, 1.355e-02, -4.642e-02, 5.880e-02, 3.263e-02, 1.860e-01, -4.443e-02, -2.732e-02, -2.133e-02, -1.166e-01)); + r += mul(s0_1, M4(-1.751e-02, -1.230e-02, -1.218e-01, -1.231e-01, 4.092e-03, -8.769e-03, -2.251e-03, 5.142e-02, 4.354e-03, -4.445e-02, -2.369e-01, -1.616e-01, 4.495e-03, -1.326e-01, -5.371e-01, -5.119e-01)); + r += mul(s0_2, M4(3.143e-02, 2.366e-02, 8.884e-02, -1.819e-02, 2.358e-03, 3.812e-04, -4.972e-02, -5.311e-02, 1.729e-02, 1.523e-02, 7.798e-02, -1.705e-05, -2.295e-02, 6.567e-02, 1.422e-01, 1.890e-01)); + r += mul(s0_3, M4(2.363e-02, 1.555e-02, -1.307e-01, -8.190e-02, 1.026e-02, 9.724e-03, 5.358e-02, -2.783e-01, 7.268e-03, 1.659e-01, -5.801e-02, 3.076e-01, -1.575e-01, -9.567e-02, 3.294e-02, -7.694e-01)); + r += mul(s0_4, M4(1.677e-02, -1.324e-01, 4.019e-01, -2.902e-01, -6.051e-02, -4.625e-02, 8.409e-01, 4.756e-01, -1.135e-01, -3.213e-01, 6.389e-02, -2.083e-01, -1.219e+00, 2.280e-01, 9.667e-01, -3.604e-01)); + r += mul(s0_5, M4(-5.948e-02, 1.567e-01, 3.883e-02, -4.843e-03, -2.153e-02, 3.439e-02, -1.160e-01, -1.325e-02, -5.312e-02, 1.136e-01, -5.260e-02, -3.524e-02, 7.315e-02, 3.527e-01, 6.186e-01, -7.505e-02)); + r += mul(s0_6, M4(-3.841e-02, 1.620e-03, 9.449e-02, -8.648e-02, -2.656e-02, -1.676e-03, 2.364e-03, -7.221e-02, -9.590e-02, 4.160e-02, -1.278e-02, -3.171e-02, 6.213e-02, 2.673e-02, -7.931e-02, 2.588e-01)); + r += mul(s0_7, M4(-3.636e-02, -1.558e-01, 2.151e-01, 1.188e-01, 1.275e-01, -8.114e-02, -8.376e-02, -3.690e-02, -1.968e-02, -1.038e-01, 8.994e-02, 3.846e-02, -1.499e-01, 6.457e-01, -8.201e-02, -3.935e-01)); + r += mul(s0_8, M4(-2.833e-03, 2.529e-01, -3.350e-03, -3.433e-02, 1.943e-02, -2.796e-02, 3.313e-02, 1.582e-02, 1.702e-02, 5.663e-02, -1.647e-02, -2.229e-02, -4.865e-01, 3.285e-01, -4.462e-01, -4.307e-01)); + r += mul(s1_0, M4(-6.004e-02, 4.898e-03, 3.591e-02, 1.900e-01, -3.816e-02, -3.269e-02, 1.459e-01, -3.464e-03, -1.235e-02, -3.737e-02, 1.569e-02, 2.559e-01, -3.173e-04, 1.268e-02, 8.886e-03, 2.960e-02)); + r += mul(s1_1, M4(-1.582e-02, -7.507e-02, -2.026e-01, 2.027e-01, -6.107e-02, 2.055e-02, -5.811e-02, 5.420e-03, 1.028e-02, -1.374e-02, -6.152e-01, -2.259e-01, -3.408e-03, -1.800e-02, 4.574e-02, -9.590e-02)); + r += mul(s1_2, M4(4.210e-02, 2.126e-02, 8.277e-02, 2.079e-02, -1.733e-01, -2.483e-02, 2.686e-01, 1.498e-01, 7.352e-02, -2.511e-02, 3.159e-02, 5.775e-02, 5.942e-02, 3.383e-02, 1.274e-01, -5.928e-02)); + r += mul(s1_3, M4(5.614e-02, 7.561e-02, -8.328e-02, 2.427e-01, 7.214e-02, -1.122e-01, 9.434e-02, -2.602e-01, -1.052e-02, -6.944e-02, -3.023e-02, -1.655e-01, 1.236e-03, 4.025e-03, -3.082e-02, -1.533e-01)); + r += mul(s1_4, M4(6.675e-01, -2.254e-01, 1.173e+00, -8.261e-02, 5.655e-01, -2.000e-01, 8.301e-01, 1.458e+00, -2.497e-01, -1.091e+00, -4.698e-01, -1.876e-01, -3.358e-02, -2.854e-01, 5.032e-01, -1.558e-01)); + r += mul(s1_5, M4(-1.444e-02, 1.502e-01, -4.221e-02, -4.864e-02, 3.236e-01, -2.572e-01, 1.344e-01, 8.562e-02, -1.030e-01, 2.690e-01, 1.238e-01, 3.309e-02, -3.849e-02, 1.860e-01, 6.528e-03, 2.840e-02)); + r += mul(s1_6, M4(-1.161e-01, 5.405e-02, -3.101e-02, -1.009e-01, -9.594e-02, -1.207e-02, -3.836e-02, -6.894e-02, -1.770e-02, -2.958e-02, 8.484e-02, -2.284e-02, 2.585e-04, -2.764e-02, 4.972e-02, -5.968e-02)); + r += mul(s1_7, M4(-4.113e-02, -1.948e-01, -2.728e-02, -3.142e-02, -2.894e-01, -1.111e-01, 7.492e-02, -2.892e-02, 9.054e-02, 4.350e-02, 2.183e-01, 1.489e-01, 1.167e-02, -6.678e-02, 3.696e-02, -1.315e-02)); + r += mul(s1_8, M4(2.532e-02, 4.585e-02, -3.694e-02, -6.244e-02, -1.673e-01, 6.180e-02, -4.475e-02, 1.028e-02, -1.658e-02, 8.923e-02, 1.711e-02, 3.037e-03, 4.651e-02, 1.652e-01, 7.863e-03, -3.387e-02)); + r += V4(-6.562e-04, 7.371e-04, -4.319e-03, -8.757e-04); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 5 +//!DESC conv4 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t1 +//!OUT t0 + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.087e-01, -5.083e-02, 3.146e-01, -4.241e-02, 4.462e-02, -4.358e-02, -1.562e-01, -2.609e-03, 5.918e-02, -2.526e-02, -3.132e-02, -1.150e-02, -8.799e-03, 3.070e-02, -1.680e-02, -1.046e-02)); + r += mul(s0_1, M4(1.762e-01, 8.784e-01, -2.704e+00, -1.565e+00, -1.473e-01, -5.723e-01, 7.838e-02, -7.420e-03, -1.769e-01, -2.041e-01, -1.783e-03, -4.944e-03, 1.304e-02, 2.646e-01, -1.708e-01, 7.483e-03)); + r += mul(s0_2, M4(-1.907e-01, 1.514e-01, -3.657e-01, -5.840e-01, -4.943e-02, -1.014e-02, -2.869e-03, 6.488e-03, 2.266e-02, -3.850e-02, 6.125e-03, 1.899e-02, -3.541e-02, -2.011e-01, 1.567e-01, 1.008e-02)); + r += mul(s0_3, M4(-3.061e-01, -1.768e-01, 9.163e-02, -2.243e-01, 4.945e-02, 1.106e-01, -1.137e-01, 1.755e-02, 2.640e-01, -9.298e-02, -1.704e-01, 3.935e-02, 1.506e-01, -3.284e-02, 4.719e-02, 5.543e-02)); + r += mul(s0_4, M4(-4.579e-01, -6.198e-02, -9.889e-01, -4.446e-01, -1.612e-01, 1.518e-01, 2.588e-01, 1.075e-02, -1.527e+00, -7.923e-01, 8.120e-02, -1.116e-01, -2.079e-01, -1.206e-01, -4.422e-01, -1.951e-01)); + r += mul(s0_5, M4(1.064e-01, -1.684e-01, 2.316e-01, 4.211e-01, -9.153e-02, 9.155e-02, -7.649e-02, -1.385e-01, 9.422e-02, -1.631e-01, 8.278e-02, 3.318e-01, 7.284e-02, 3.489e-01, -2.303e-02, -6.554e-01)); + r += mul(s0_6, M4(-6.320e-02, -4.390e-02, 1.453e-02, 3.187e-02, 2.166e-02, 2.423e-03, 1.573e-03, -2.226e-02, 1.401e-01, 2.026e-01, -2.249e-01, 6.471e-02, 3.593e-02, -1.575e-02, -3.186e-02, 1.339e-02)); + r += mul(s0_7, M4(2.778e-02, 7.495e-02, -1.086e-01, 8.862e-02, -2.352e-02, 1.477e-02, 2.741e-02, 4.345e-02, -2.865e-01, 9.405e-02, 1.880e-01, -3.610e-01, -7.797e-02, -5.710e-03, 3.386e-02, 2.830e-02)); + r += mul(s0_8, M4(-3.734e-02, 3.357e-02, 5.657e-03, -1.596e-01, -7.661e-03, 1.603e-02, -3.137e-02, -7.023e-03, 6.522e-03, -2.715e-02, 2.765e-02, 4.724e-02, 1.922e-02, 3.944e-02, -8.276e-02, -1.915e-02)); + r += mul(s1_0, M4(-7.121e-02, -2.276e-02, 7.266e-02, -4.411e-03, -5.600e-01, 4.502e-01, -1.817e-01, -2.906e-01, -5.675e-02, 2.653e-02, 3.284e-02, -1.925e-03, -4.729e-03, -1.554e-03, -6.081e-03, -2.195e-02)); + r += mul(s1_1, M4(2.212e-01, 3.154e-01, -2.765e-01, 4.432e-02, 1.402e+00, 2.159e-01, 4.402e-01, 2.537e-01, 6.697e-02, 1.207e-01, -5.192e-02, 2.638e-02, 5.366e-02, 5.855e-02, -3.687e-02, 4.389e-03)); + r += mul(s1_2, M4(3.137e-02, -1.157e-01, 9.497e-02, -3.724e-02, 5.241e-02, 7.793e-02, 2.277e-04, -4.033e-01, 1.432e-02, 4.622e-02, -1.636e-02, -5.840e-03, -1.593e-02, -7.447e-02, 3.943e-02, -3.517e-03)); + r += mul(s1_3, M4(-1.209e-02, -1.350e-01, 3.018e-01, 1.233e-01, -1.262e-03, 2.194e-01, -2.919e-01, -8.031e-03, 4.620e-03, 5.318e-02, 1.247e-02, -4.260e-02, 7.155e-02, 3.256e-02, -9.839e-02, -6.741e-04)); + r += mul(s1_4, M4(3.291e-01, 2.397e-01, -2.820e-01, 5.703e-01, 7.831e-03, 5.816e-02, -1.696e-02, -1.957e-01, -1.851e-01, 3.696e-02, -2.611e-01, 7.039e-03, -1.562e-01, -7.676e-01, 9.080e-01, 7.823e-02)); + r += mul(s1_5, M4(9.918e-03, 6.364e-02, 3.364e-02, -3.291e-01, 1.393e-02, 3.139e-02, 1.701e-02, -5.675e-02, 5.085e-02, -2.050e-01, 1.160e-01, 4.875e-02, -1.189e-01, 2.310e-01, -1.353e-01, 2.046e-02)); + r += mul(s1_6, M4(-5.477e-03, -1.704e-02, 9.510e-03, -1.701e-02, 1.391e-02, -8.760e-03, -3.355e-02, -6.898e-03, -9.203e-03, -2.442e-02, 7.547e-03, 1.817e-02, 1.871e-02, -1.149e-02, 6.458e-02, 1.403e-02)); + r += mul(s1_7, M4(-5.073e-03, -5.454e-02, -2.710e-02, 1.292e-02, 2.458e-02, 1.739e-02, -2.319e-03, 3.865e-02, 5.399e-02, -1.176e-02, -1.315e-01, 1.489e-01, -7.903e-02, 8.120e-02, 4.749e-02, 1.961e-01)); + r += mul(s1_8, M4(4.163e-02, -1.603e-02, 8.659e-03, 1.023e-01, 5.233e-03, -2.900e-03, -5.293e-03, -5.829e-03, -1.453e-02, 2.467e-02, 7.198e-02, -2.407e-01, -4.023e-02, 1.009e-01, -1.560e-01, -1.567e-01)); + r += V4(-3.709e-04, 2.029e-04, -3.042e-03, -2.970e-04); + return r; +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 6 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-6.857e-02, -6.042e-02, 3.293e-03, -2.389e-03, -1.606e-01, -1.556e-02, -5.115e-02, -4.602e-02, -3.762e-02, 1.994e-02, -2.370e-02, 3.558e-02, -7.142e-01, 8.184e-01, -1.361e-01, 1.228e-01)); + r += mul(s0_1, M4(-1.887e-01, -2.260e-01, 1.293e-02, -1.757e-02, 1.257e-01, 1.304e-01, -4.525e-02, 4.471e-02, 6.895e-01, -4.096e-01, 4.096e-02, 1.817e-02, -1.343e-01, -4.170e-01, 3.991e-03, 1.516e-03)); + r += mul(s0_2, M4(-2.667e-01, -8.692e-02, 1.481e-01, -1.466e-01, 6.142e-02, -2.084e-02, 1.942e-02, 6.700e-04, 3.942e-02, 3.109e-01, -1.323e-02, 2.240e-02, -2.306e-02, -4.749e-02, -1.155e-02, 1.843e-03)); + r += mul(s0_3, M4(-1.004e-01, -1.184e-02, -8.590e-02, -1.018e-01, 6.862e-02, -4.700e-02, -1.537e-01, -1.096e-01, -1.228e-01, 1.462e-02, -1.715e-01, 1.862e-02, 3.668e-01, -1.138e-01, 8.494e-04, 6.113e-01)); + r += mul(s0_4, M4(4.389e-01, -5.527e-01, -4.972e-01, -7.620e-01, 1.684e-01, 5.375e-02, 1.032e+00, 5.723e-01, 4.427e-02, -2.447e-01, 1.132e+00, -5.297e-01, 1.150e-01, 3.877e-01, 1.224e-01, 1.294e-01)); + r += mul(s0_5, M4(-1.023e+00, 1.567e+00, -9.747e-01, 1.051e+00, 1.537e-02, 1.993e-01, -1.679e-01, 1.139e-01, -7.358e-02, -1.782e-01, -1.938e-01, 4.419e-02, 2.001e-02, 5.881e-02, 8.971e-03, 3.368e-03)); + r += mul(s0_6, M4(-5.126e-03, 1.449e-02, -7.018e-02, 2.929e-02, 4.748e-02, -4.443e-03, -5.791e-02, -3.490e-02, 3.817e-02, 1.007e-02, -5.501e-02, -1.488e-02, -8.848e-03, 4.884e-02, -6.548e-02, 3.392e-02)); + r += mul(s0_7, M4(-4.449e-02, 7.313e-02, 3.311e-01, 3.138e-02, -6.466e-02, 5.666e-02, 1.929e-01, 8.274e-02, 3.994e-02, 2.105e-02, -1.821e-01, -1.539e-02, -9.333e-03, -4.728e-02, 6.975e-03, -3.292e-03)); + r += mul(s0_8, M4(2.038e-01, -2.356e-01, -1.987e-01, -3.746e-02, -1.499e-02, -7.007e-02, -9.546e-02, 1.905e-02, -9.802e-03, 1.990e-02, 2.140e-02, -8.164e-03, 5.109e-03, -2.081e-02, -2.386e-02, 1.183e-02)); + r += mul(s1_0, M4(-7.067e-02, -4.613e-02, -5.433e-04, -2.191e-02, -1.125e-01, -3.650e-02, -1.298e-02, -3.479e-02, -1.118e-01, -1.521e-02, -4.731e-03, -7.478e-03, 1.802e-01, 4.872e-02, -1.599e-03, -1.452e-02)); + r += mul(s1_1, M4(-2.920e-01, -1.831e-01, -1.305e-02, 4.031e-02, 1.989e-01, 3.120e-03, 2.025e-02, 5.432e-02, 2.607e-01, 2.403e-02, 1.863e-02, 8.423e-02, -3.372e-01, -1.327e-01, -1.248e-01, -1.247e-01)); + r += mul(s1_2, M4(-9.286e-02, -1.948e-01, -8.532e-03, 7.416e-03, 4.578e-02, 1.581e-01, 1.473e-03, -3.796e-02, 1.011e-01, 2.393e-01, 2.742e-02, -4.224e-02, -9.579e-03, -9.888e-02, -2.065e-03, 7.685e-03)); + r += mul(s1_3, M4(-2.056e-01, -3.479e-02, -2.666e-01, -5.344e-02, 1.579e-01, -6.091e-02, -1.655e-01, -1.575e-01, -8.230e-02, -4.748e-02, -1.304e-01, -7.186e-02, 2.953e-01, 6.950e-02, 1.865e-01, 7.567e-02)); + r += mul(s1_4, M4(3.408e-01, -1.054e-01, -2.613e-01, -6.084e-01, 3.193e-01, 6.366e-01, 4.251e-01, 4.066e-01, -3.742e-01, -8.521e-02, 5.906e-01, 1.870e-01, 2.044e-02, 2.495e-01, 1.046e-01, 3.018e-01)); + r += mul(s1_5, M4(4.748e-03, 2.086e-01, 4.231e-03, -7.764e-03, 3.933e-02, 3.446e-03, -3.431e-02, 8.415e-02, -3.798e-02, -3.428e-01, -7.206e-02, 2.392e-01, 2.157e-02, 2.692e-02, 3.313e-02, 1.841e-02)); + r += mul(s1_6, M4(1.813e-02, 2.306e-03, -3.402e-02, 1.009e-03, 4.408e-02, -2.307e-02, -3.394e-02, -3.912e-02, 3.822e-02, -1.051e-02, -1.023e-01, -4.626e-02, -4.871e-02, 6.250e-03, 1.367e-01, 3.674e-02)); + r += mul(s1_7, M4(-1.170e-02, 3.747e-02, 1.548e-01, 1.243e-01, -1.074e-01, -9.848e-03, 2.627e-01, 1.132e-01, 4.550e-02, 5.050e-02, -1.194e-01, -6.091e-02, -2.180e-02, -6.381e-02, -5.949e-02, 1.580e-02)); + r += mul(s1_8, M4(-1.146e-04, -1.852e-02, -1.515e-02, 2.488e-02, -1.877e-02, -7.739e-02, -6.812e-02, 7.656e-03, 2.688e-02, 5.650e-02, 4.285e-02, -3.270e-02, 1.163e-03, 8.328e-04, -1.998e-02, -2.282e-02)); + r += V4(-3.259e-04, -3.197e-04, 4.954e-04, 4.568e-04); + return tanh(r); +} + +void Pass6(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-4x8C-NVL-DN.hlsl b/src/Effects/CuNNy/CuNNy-4x8C-NVL-DN.hlsl new file mode 100644 index 000000000..6f6f7748f --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-4x8C-NVL-DN.hlsl @@ -0,0 +1,921 @@ +// CuNNy 4x8C BILINEAR RGB NVL DN - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-DN-D08N04 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t2; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t3; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0, t1 + +#define l0(x, y) min16float((dot(float3(2.214e-01, 4.385e-01, 1.006e-01), O(INPUT, float2(x, y)).rgb) + -6.858e-01)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-2.401e-02, 1.817e-03, -1.218e-01, 2.796e-02) * s0_0; + r += V4(3.256e-02, 3.929e-03, -5.850e-02, -5.602e-02) * s0_1; + r += V4(4.497e-04, -1.812e-02, 5.241e-02, 3.698e-02) * s0_2; + r += V4(5.371e-01, -2.302e-01, -1.373e-01, -4.038e-03) * s0_3; + r += V4(1.565e-01, -6.067e-02, 3.397e-01, -3.741e-01) * s0_4; + r += V4(-2.095e-03, 4.044e-02, -3.770e-02, 5.665e-02) * s0_5; + r += V4(-1.993e-01, -2.645e-01, -8.892e-02, 1.948e-02) * s0_6; + r += V4(-4.865e-01, 5.400e-01, -1.396e-01, 1.270e-01) * s0_7; + r += V4(-1.667e-02, -9.433e-03, -1.324e-02, -1.803e-03) * s0_8; + r += V4(2.880e-04, 1.418e-02, 1.413e-02, -1.036e-01); + return r; +} + +V4 f1(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-4.610e-02, -6.199e-01, 8.493e-03, -1.532e-02) * s0_0; + r += V4(-7.178e-02, 5.957e-01, 1.575e-03, 1.807e-02) * s0_1; + r += V4(1.106e-01, 3.625e-03, 3.713e-02, -4.124e-03) * s0_2; + r += V4(1.288e-01, -5.582e-02, 5.082e-02, 1.674e-02) * s0_3; + r += V4(-6.074e-01, 8.818e-02, -3.371e-01, -6.663e-01) * s0_4; + r += V4(-8.030e-02, -4.780e-03, -3.421e-01, 5.358e-02) * s0_5; + r += V4(4.990e-01, 7.623e-03, 1.778e-03, 2.401e-02) * s0_6; + r += V4(9.546e-02, -1.656e-02, 6.935e-04, 6.387e-01) * s0_7; + r += V4(-2.302e-02, 5.209e-03, 5.835e-02, -6.361e-02) * s0_8; + r += V4(-4.485e-04, -2.620e-04, 2.449e-02, -7.403e-04); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(7.103e-02, 1.495e-01, -1.731e-02, -9.952e-02, -1.539e-01, -1.103e-01, 7.099e-02, 2.023e-01, 2.681e-02, 5.202e-03, 1.954e-02, -6.822e-02, -1.650e-01, 3.710e-01, -6.020e-01, 4.879e-01)); + r += mul(s0_1, M4(-1.168e-02, 2.587e-01, -4.670e-01, -3.986e-02, -1.268e-01, 3.619e-02, 5.712e-02, 1.722e-01, 4.473e-02, -1.224e-01, 8.228e-02, -3.981e-02, 4.044e-01, -3.039e-01, -3.390e-01, 5.925e-02)); + r += mul(s0_2, M4(4.083e-02, 7.140e-02, -5.864e-01, 1.188e-01, 2.214e-01, -2.826e-01, 2.294e-01, -2.199e-01, -9.048e-02, 1.787e-01, -6.887e-02, -6.645e-02, -1.285e-01, -8.261e-02, -1.975e-01, 2.428e-01)); + r += mul(s0_3, M4(-5.801e-02, -3.381e-02, -2.285e-01, 9.377e-02, 1.878e-01, 9.285e-02, -1.001e-01, -5.059e-02, -2.155e-02, -9.098e-02, -1.279e-02, 9.801e-02, 1.178e-01, -1.967e-01, -4.792e-02, -1.106e-01)); + r += mul(s0_4, M4(3.048e-01, 2.731e-01, -2.351e-01, -1.516e-01, -1.382e-02, 1.296e-01, -9.530e-02, 2.975e-02, 2.411e-01, 2.343e-02, 1.731e-02, -2.331e-01, -2.161e-01, 4.114e-01, 4.417e-01, 1.225e+00)); + r += mul(s0_5, M4(3.337e-01, 2.844e-01, 1.065e-01, -2.391e-01, -1.265e-01, -3.625e-02, -7.062e-02, 3.529e-02, 2.208e-02, -8.459e-03, -1.366e-01, -1.563e-02, -1.648e-01, -5.919e-01, 4.061e-01, -4.975e-02)); + r += mul(s0_6, M4(6.213e-03, -2.020e-02, 2.520e-03, 2.167e-02, -2.361e-01, -1.421e-01, -4.579e-02, -1.353e-01, -2.883e-01, -5.900e-04, 2.720e-02, 1.591e-01, -5.120e-01, -4.253e-01, -3.397e-02, -4.633e-01)); + r += mul(s0_7, M4(2.456e-01, -6.978e-02, 5.668e-02, -9.795e-03, -1.925e-01, -4.841e-02, -1.273e-02, 1.282e-02, -1.223e-01, -4.080e-02, 2.975e-02, 1.595e-01, -3.345e-01, -1.504e-01, 1.080e-01, 8.549e-01)); + r += mul(s0_8, M4(8.700e-02, 1.611e-02, 8.589e-02, -3.284e-02, -1.637e-01, 2.627e-01, 1.851e-02, 2.843e-02, 1.224e-01, 6.163e-02, 4.991e-02, -1.510e-01, 1.885e-01, -5.951e-02, -3.463e-02, 2.172e-01)); + r += mul(s1_0, M4(1.856e-01, -1.041e-01, 1.900e-01, 8.420e-02, -3.223e-01, 6.258e-02, -9.766e-02, -6.517e-01, 3.066e-02, -7.562e-02, 1.015e-02, -1.139e-01, 1.569e-02, -3.684e-02, -2.813e-02, 8.835e-02)); + r += mul(s1_1, M4(-7.107e-02, -1.146e-01, 5.488e-01, -2.960e-01, 3.743e-01, -5.368e-01, -2.219e-01, -3.122e-01, 2.468e-02, -7.477e-01, 1.858e-01, 3.498e-01, 1.771e-03, 4.215e-03, 8.478e-02, 9.318e-02)); + r += mul(s1_2, M4(-2.350e-03, -3.382e-01, 5.964e-01, -2.321e-01, 2.011e-01, 1.890e-01, -2.062e-01, -3.725e-02, -1.003e-01, -1.464e-01, 1.040e-01, 9.994e-02, -7.113e-02, -3.827e-02, -1.258e-01, -1.584e-01)); + r += mul(s1_3, M4(-1.609e-01, -1.460e-01, -4.804e-03, 5.503e-02, 2.784e-01, -1.475e-02, 9.395e-02, -1.128e-01, 1.032e-02, -1.969e-01, 2.170e-01, 2.335e-01, -1.371e-01, 4.853e-02, 8.945e-03, -2.698e-01)); + r += mul(s1_4, M4(7.739e-02, -1.105e-01, 3.348e-01, 1.093e-01, -7.745e-02, -1.642e-01, -2.191e-01, -2.674e-02, 4.199e-01, -3.302e-01, 1.445e-01, -2.815e-01, -3.154e-01, 6.646e-02, 8.520e-02, -1.053e-01)); + r += mul(s1_5, M4(-4.165e-01, -8.545e-02, 2.291e-01, -1.042e-01, 3.791e-01, -7.209e-02, -6.332e-02, -3.174e-01, 1.038e-01, 8.122e-03, -9.715e-02, 6.808e-01, -9.362e-02, -4.634e-02, 5.184e-03, 1.295e-01)); + r += mul(s1_6, M4(-8.179e-02, -8.513e-02, 4.470e-02, -7.799e-02, -1.092e-01, -1.851e-01, -1.025e-01, -4.220e-02, -3.853e-01, 3.040e-02, -9.081e-02, 1.439e-01, -2.730e-02, -5.086e-02, 5.352e-03, -5.102e-03)); + r += mul(s1_7, M4(7.601e-02, -1.423e-01, 3.421e-01, 2.574e-03, 1.165e-01, 6.863e-03, 1.250e-02, -4.862e-02, -3.859e-01, -1.108e-01, 2.515e-02, 5.564e-01, 2.485e-01, 2.230e-01, -3.839e-02, 3.605e-02)); + r += mul(s1_8, M4(-9.424e-02, 1.248e-01, 1.980e-01, -1.671e-01, 1.098e-01, 6.555e-02, -7.194e-02, -1.626e-01, -1.439e-01, -2.086e-01, -1.925e-02, 1.520e-01, 2.139e-01, -7.764e-02, 6.469e-02, 7.875e-03)); + r += mul(s2_0, M4(4.572e-02, 3.661e-02, -3.845e-01, -1.383e-01, 1.729e-02, 1.780e-02, 3.664e-02, -6.961e-02, -9.001e-03, -1.853e-02, -6.735e-02, -1.864e-02, 1.695e-01, -1.420e-01, 2.679e-01, -1.525e-01)); + r += mul(s2_1, M4(9.967e-02, -2.869e-01, -2.251e-01, 8.470e-02, 3.178e-02, -9.701e-03, 9.260e-02, 4.087e-04, -8.081e-02, 1.341e-01, 5.882e-03, 1.043e-02, 8.559e-03, 6.534e-02, -4.619e-01, -3.010e-01)); + r += mul(s2_2, M4(-1.676e-02, -3.339e-01, 1.848e-01, -2.562e-01, -8.563e-02, 2.487e-02, 2.495e-01, 9.448e-02, 2.189e-02, -3.018e-02, 5.698e-02, 6.041e-02, -4.869e-02, -2.627e-02, 1.602e-01, 1.092e-01)); + r += mul(s2_3, M4(6.867e-02, -1.693e-01, -1.614e-01, -1.944e-01, 1.992e-01, 1.720e-01, 2.393e-01, 1.219e-02, 4.866e-02, -1.165e-01, -1.285e-01, 2.929e-01, 2.043e-01, -1.399e-02, 1.595e-02, -2.746e-01)); + r += mul(s2_4, M4(-4.477e-01, -5.696e-01, -1.760e-02, 1.362e-01, 1.472e-01, 3.113e-01, -2.419e-01, 8.650e-02, -8.358e-02, 1.081e-01, 3.881e-02, -1.400e-01, -2.071e-01, 3.977e-02, -3.149e-01, 2.525e-01)); + r += mul(s2_5, M4(5.496e-02, -9.963e-02, -1.227e-01, -1.892e-01, 4.361e-02, -3.776e-01, -6.576e-01, 2.628e-01, -8.215e-02, -8.123e-02, 2.248e-03, 1.261e-01, 1.193e-01, 2.608e-01, 2.567e-01, 8.120e-02)); + r += mul(s2_6, M4(-1.587e-01, -9.849e-02, 1.122e-01, -5.963e-02, -9.176e-02, 7.341e-03, 1.164e-03, -5.660e-02, 1.567e-01, -6.958e-02, -3.780e-02, 4.238e-04, -6.186e-02, 1.777e-01, 2.398e-01, 6.853e-03)); + r += mul(s2_7, M4(1.062e-01, -1.498e-01, 5.492e-02, 1.108e-01, -3.248e-01, -2.901e-01, -4.360e-01, 1.128e-01, 7.346e-02, 8.659e-02, 9.740e-02, -1.434e-01, 1.538e-01, 1.349e-01, 1.408e-01, -1.367e-01)); + r += mul(s2_8, M4(1.412e-01, -8.889e-02, 2.029e-02, -1.523e-01, 4.847e-01, -7.432e-01, -1.181e-01, 4.132e-01, 3.119e-02, -5.840e-02, -2.292e-02, -3.125e-02, 2.440e-02, 2.815e-02, 2.759e-01, -8.781e-02)); + r += mul(s3_0, M4(2.147e-02, 2.192e-01, 2.489e-01, -3.436e-02, 1.086e-02, -2.680e-02, -9.925e-02, 3.978e-02, 1.239e-01, 3.645e-02, 5.463e-01, 5.005e-01, 1.039e-01, -1.694e-01, -3.816e-02, 3.834e-01)); + r += mul(s3_1, M4(1.418e-01, 5.806e-02, 1.317e-01, 2.227e-01, 1.486e-02, -4.235e-03, -5.750e-02, -1.548e-01, -7.700e-01, 3.263e-01, -1.193e-02, 3.537e-01, -2.841e-01, 4.657e-01, -1.576e-01, -9.526e-02)); + r += mul(s3_2, M4(7.641e-02, 8.195e-01, 1.080e-01, 1.814e-01, -5.471e-02, 2.211e-02, -4.212e-02, -1.249e-02, 2.469e-02, 5.436e-01, 3.805e-01, -9.622e-02, -6.358e-02, -3.739e-01, -3.504e-01, -2.627e-01)); + r += mul(s3_3, M4(-9.359e-02, -1.830e-02, -7.015e-02, -7.774e-02, 2.286e-01, -6.321e-02, -5.124e-02, -2.799e-03, -5.063e-01, -1.835e-01, 3.716e-01, 1.130e+00, 3.259e-01, -2.045e-01, -1.792e-01, 4.892e-01)); + r += mul(s3_4, M4(-7.478e-01, -1.192e-01, 1.022e-01, 8.111e-01, 7.253e-02, 2.280e-01, -1.116e-01, -2.828e-01, -2.364e-01, -1.233e+00, -1.125e+00, 1.750e+00, -1.215e+00, 4.973e-02, 2.070e-01, 6.996e-01)); + r += mul(s3_5, M4(-4.115e-02, 3.613e-01, 2.694e-01, 4.126e-02, 7.046e-02, 6.242e-02, 9.300e-02, -1.965e-01, -3.211e-01, 8.504e-01, 2.518e-01, -5.622e-01, 5.663e-02, -1.139e-01, 1.150e-01, -1.954e-01)); + r += mul(s3_6, M4(-1.870e-01, -9.168e-02, -8.947e-02, 6.127e-03, 1.163e-02, 3.733e-04, -3.330e-01, 1.935e-01, 3.424e-01, 1.313e-01, -6.732e-01, 8.256e-02, 6.713e-02, 2.980e-02, -6.912e-02, 1.715e-01)); + r += mul(s3_7, M4(1.636e-01, 1.212e-01, 2.280e-02, 1.552e-01, -4.955e-01, 8.376e-01, 1.476e-01, 2.192e-01, 9.746e-01, -3.148e-01, 8.206e-01, -8.104e-01, -7.918e-02, -1.604e-01, 5.505e-02, 7.640e-02)); + r += mul(s3_8, M4(1.248e-01, 2.878e-01, -4.182e-02, -9.214e-02, -1.210e-01, 4.382e-01, 8.062e-02, -3.051e-01, -1.803e-01, -3.041e-01, 1.368e-01, -1.030e-01, 2.941e-02, -2.724e-01, 3.480e-02, 1.396e-02)); + r += V4(-3.046e-02, 3.515e-02, 4.880e-02, 4.740e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.574e-01, -6.104e-03, -2.288e-01, 5.024e-03, -2.149e-03, -8.674e-02, 1.209e-01, 7.107e-02, 1.242e-01, 2.312e-03, -5.300e-02, -2.285e-01, -8.824e-02, 7.402e-02, -4.447e-01, 1.117e+00)); + r += mul(s0_1, M4(-5.617e-02, 3.613e-01, -4.666e-01, 1.795e-01, 1.718e-01, -1.005e-01, -2.593e-01, 4.103e-01, 2.477e-01, 1.883e-01, 3.928e-02, -3.635e-01, -7.353e-01, 3.209e-01, 2.171e-01, 3.924e-01)); + r += mul(s0_2, M4(-3.304e-01, 6.332e-01, -3.898e-01, 2.704e-01, 4.110e-02, -2.786e-01, -2.513e-01, 1.800e-01, 9.402e-03, -1.975e-01, -4.040e-02, -2.047e-01, -3.239e-01, -1.623e-01, 1.001e-01, 3.053e-02)); + r += mul(s0_3, M4(3.935e-01, 5.218e-02, 4.630e-02, 2.202e-02, -2.172e-01, -1.530e-02, -1.782e-01, -9.327e-02, -6.425e-02, -2.402e-02, -2.919e-02, -4.034e-02, 6.589e-01, -4.900e-02, 7.783e-02, 6.334e-01)); + r += mul(s0_4, M4(-5.475e-02, 1.543e-01, -1.597e-01, -2.500e-01, 4.990e-02, 5.780e-02, 1.162e-01, 1.140e-01, -2.980e-01, -2.524e-02, -2.103e-01, 4.297e-01, 4.528e-01, -3.098e-01, -1.415e-01, 7.565e-01)); + r += mul(s0_5, M4(-1.097e-01, 3.376e-01, -5.685e-01, 1.347e-01, 1.155e-01, -1.396e-01, -2.840e-01, -1.373e-01, 1.442e-01, 8.711e-02, 1.357e-01, -1.110e-01, 2.095e-01, -2.901e-01, -1.007e-01, -2.473e-01)); + r += mul(s0_6, M4(-7.405e-02, 9.320e-02, -5.870e-02, -2.569e-01, 6.017e-03, -8.078e-02, -3.798e-02, 2.334e-01, 1.440e-01, -1.852e-01, -6.627e-03, 3.514e-03, -1.499e-02, -6.237e-02, 3.665e-01, 3.270e-01)); + r += mul(s0_7, M4(2.443e-01, 8.076e-02, -2.143e-01, 1.120e-01, 1.187e-01, 1.317e-01, 1.811e-01, 1.918e-01, -2.164e-02, -1.829e-01, 2.105e-01, 3.085e-01, 3.155e-01, 2.801e-01, -6.834e-01, 2.861e-01)); + r += mul(s0_8, M4(9.974e-03, 9.704e-02, -2.363e-01, 1.829e-01, 1.844e-02, 9.298e-02, -5.319e-02, -5.899e-02, -2.154e-01, 2.555e-02, -8.374e-02, 1.254e-01, -2.736e-01, -4.065e-02, 4.838e-02, 3.338e-02)); + r += mul(s1_0, M4(-1.239e-02, -1.316e-01, 8.694e-02, -8.443e-02, -1.143e-01, -6.018e-02, -9.054e-02, 7.381e-02, 2.722e-01, 1.030e-01, -8.583e-02, -4.433e-01, -1.339e-01, 1.264e-01, 8.581e-02, -1.947e-01)); + r += mul(s1_1, M4(3.030e-01, -3.527e-02, 4.665e-01, -3.372e-02, -2.301e-02, 7.308e-01, 5.938e-01, -5.901e-01, 4.766e-01, 1.081e-01, 8.809e-02, 3.482e-01, -1.938e-01, -8.091e-02, 3.649e-02, 9.321e-02)); + r += mul(s1_2, M4(1.376e-01, -4.460e-01, 4.298e-01, -4.809e-02, -3.819e-01, 5.216e-01, 2.687e-01, 1.359e-01, 2.936e-01, 1.222e-02, 3.706e-01, 2.481e-01, -4.716e-02, -1.798e-02, 2.731e-02, -7.140e-02)); + r += mul(s1_3, M4(1.657e-01, -3.624e-02, 1.541e-01, -5.006e-03, -4.051e-01, -9.782e-02, 3.008e-02, 1.962e-01, -6.146e-02, 1.866e-03, -3.052e-01, -2.202e-01, 1.057e-01, -1.151e-01, -6.310e-02, 3.914e-01)); + r += mul(s1_4, M4(-2.629e-01, 1.029e-01, 1.812e-02, -2.950e-01, -1.191e-01, 2.580e-01, -4.833e-01, 1.095e-01, 2.309e-02, 4.519e-02, 1.086e-01, 5.362e-01, -1.349e-01, -1.278e-01, 7.109e-02, -1.992e-01)); + r += mul(s1_5, M4(-1.815e-01, 2.898e-01, 3.446e-01, -1.587e-01, -6.360e-02, 1.662e-01, 5.187e-01, 1.701e-01, -2.770e-02, -5.932e-01, 2.467e-01, 3.940e-01, 1.022e-01, 1.033e-01, -5.084e-02, -6.520e-02)); + r += mul(s1_6, M4(-1.494e-01, 3.180e-02, 9.864e-02, -3.409e-01, 1.397e-02, 9.932e-03, -2.110e-01, 2.636e-01, 1.353e-01, -8.495e-02, -2.680e-03, -2.287e-01, 1.136e-01, -1.047e-01, 2.910e-02, 9.922e-02)); + r += mul(s1_7, M4(1.533e-01, 4.819e-04, 1.735e-01, 2.027e-01, 1.316e-01, 1.029e-01, 1.446e-01, 1.737e-01, 4.855e-02, 4.781e-02, 2.025e-01, 1.587e-01, 1.661e-01, 7.134e-02, 5.853e-02, -1.530e-01)); + r += mul(s1_8, M4(-1.476e-01, -4.916e-02, 1.989e-01, 1.159e-01, 4.753e-02, 1.694e-01, 4.343e-02, -6.974e-03, 3.382e-02, 2.275e-01, 3.466e-01, -7.178e-03, -1.104e-01, 2.059e-03, -7.101e-02, 8.934e-02)); + r += mul(s2_0, M4(-3.467e-01, 8.471e-04, 1.580e-01, 2.685e-01, -2.680e-02, -6.444e-02, 8.843e-02, 5.232e-03, 2.576e-02, -3.756e-02, -7.913e-03, -3.871e-02, -5.374e-02, -6.060e-02, -7.688e-02, 6.738e-01)); + r += mul(s2_1, M4(-3.963e-01, 1.295e-01, 2.623e-01, 2.565e-01, -1.831e-01, -6.054e-02, 1.817e-01, -8.944e-02, 1.974e-01, -2.800e-04, -3.964e-02, 1.232e-01, -3.477e-01, 3.791e-01, 1.438e-01, -7.862e-02)); + r += mul(s2_2, M4(2.540e-02, 1.123e-01, 6.461e-01, -3.856e-03, 3.373e-02, -5.719e-02, 1.556e-01, -1.100e-01, -3.499e-02, 9.146e-02, -4.624e-02, 9.774e-02, -1.148e-01, -2.280e-01, 4.977e-01, -1.568e-01)); + r += mul(s2_3, M4(5.352e-02, -1.293e-01, -6.991e-03, 4.190e-01, -2.334e-03, -4.433e-02, -8.470e-02, 1.162e-01, -1.045e-01, -7.444e-02, 8.951e-02, -1.124e-01, 4.295e-01, 1.086e-01, 1.336e-01, 2.645e-01)); + r += mul(s2_4, M4(-4.062e-01, -6.781e-02, 4.629e-01, -4.931e-01, -1.875e-01, 1.958e-01, -4.560e-01, -2.286e-02, -2.066e-01, 1.151e-01, -5.924e-02, 1.350e-01, -1.752e-01, 2.244e-01, -3.564e-02, -6.129e-01)); + r += mul(s2_5, M4(6.644e-02, 4.611e-01, 9.200e-02, 6.845e-03, -1.628e-02, 8.352e-02, -1.119e-01, -4.386e-02, -5.822e-02, -4.769e-02, -3.224e-02, -1.235e-01, -3.296e-01, 5.835e-03, 2.231e-01, 5.535e-02)); + r += mul(s2_6, M4(-2.961e-02, -5.230e-02, 5.124e-02, 6.542e-02, 2.004e-01, 1.189e-01, -1.797e-01, -1.535e-02, 6.469e-02, 1.134e-01, -1.204e-04, -7.606e-02, 2.436e-02, -1.630e-02, 1.841e-01, -2.529e-01)); + r += mul(s2_7, M4(-1.147e-02, 3.246e-02, 7.626e-02, -1.013e-01, 1.075e-01, 5.871e-01, -5.227e-01, -3.076e-01, 1.609e-01, 5.768e-02, -1.912e-02, 5.898e-02, -7.530e-02, -1.307e-01, 5.828e-02, -1.456e-02)); + r += mul(s2_8, M4(-7.053e-02, 8.728e-02, 1.211e-01, 1.410e-01, -2.160e-01, 9.970e-02, -5.345e-01, 1.141e-01, 8.112e-04, -4.348e-02, 9.858e-02, 2.780e-02, -1.116e-01, -2.331e-01, 1.545e-01, 7.984e-02)); + r += mul(s3_0, M4(-5.412e-02, 6.012e-03, -2.395e-01, -1.209e-02, -5.734e-02, 3.058e-02, -7.202e-02, -7.514e-02, 7.241e-03, -1.702e-01, 1.020e+00, 2.997e-01, -2.173e-01, 4.518e-02, -2.703e-02, -4.087e-02)); + r += mul(s3_1, M4(-5.670e-02, -9.713e-03, -2.091e-01, -1.621e-01, -5.370e-03, -5.579e-02, 1.042e-01, 2.220e-02, 4.788e-01, -6.623e-01, 5.548e-01, 8.186e-01, 2.462e-01, -7.624e-01, -9.065e-02, -1.105e-02)); + r += mul(s3_2, M4(4.043e-02, -1.577e-01, -3.166e-01, -1.256e-01, -9.515e-02, -8.852e-02, -4.960e-02, 1.129e-01, 1.690e-01, 2.314e-01, -5.134e-01, 9.584e-02, -3.085e-02, 2.399e-01, -3.381e-01, -7.233e-02)); + r += mul(s3_3, M4(1.750e-01, -9.450e-02, -2.230e-01, 4.190e-01, 8.900e-02, 2.306e-02, 2.783e-01, -3.295e-01, 2.697e+00, 8.855e-02, 5.728e-01, -8.682e-01, 6.085e-02, 5.010e-02, 1.343e-01, 1.137e-01)); + r += mul(s3_4, M4(9.857e-02, 3.310e-01, -3.584e-01, -5.586e-01, 5.751e-01, -4.023e-01, 3.838e-01, 1.240e-01, -1.482e-01, -1.233e-01, -5.953e-01, 1.534e+00, 3.390e-01, -2.022e-02, 1.619e-01, -2.959e-01)); + r += mul(s3_5, M4(1.528e-01, 1.593e-01, -1.886e-01, 2.281e-02, 2.174e-01, -8.846e-01, 5.726e-02, 7.369e-03, -1.490e-01, 3.377e-01, -4.669e-02, 1.206e-01, -1.251e-01, 2.600e-01, -2.439e-01, 2.067e-01)); + r += mul(s3_6, M4(4.090e-02, -2.118e-02, -9.012e-02, -8.624e-03, 1.464e-01, 6.929e-02, 1.492e-01, -4.039e-01, 6.123e-01, 2.679e-01, -2.284e-01, -3.609e-01, -6.598e-02, 1.341e-01, -2.371e-02, -2.899e-01)); + r += mul(s3_7, M4(5.189e-02, -3.928e-02, 1.670e-01, -1.536e-01, 5.066e-01, -3.768e-01, 6.577e-01, 1.140e-01, -1.537e-01, -1.941e-01, -9.152e-02, -3.571e-02, 1.068e-01, 4.803e-02, -3.180e-01, 4.361e-02)); + r += mul(s3_8, M4(-8.453e-02, -1.454e-02, 3.613e-02, 8.974e-03, -1.258e-01, -5.842e-01, 3.264e-01, 2.910e-01, 1.306e-01, 4.552e-01, 4.524e-01, 1.065e-02, -1.792e-02, 1.875e-02, -2.206e-01, 2.028e-01)); + r += V4(3.015e-03, -4.690e-02, 3.573e-02, -1.486e-02); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.130e-02, -1.243e-01, -7.648e-02, -2.424e-01, -4.742e-02, -5.420e-02, 4.117e-02, 1.568e-01, -3.621e-02, 2.032e-01, 4.484e-02, 1.249e-02, -1.505e-01, 7.294e-02, 4.943e-02, -6.336e-02)); + r += mul(s0_1, M4(-1.474e-01, -3.366e-01, -5.670e-01, 4.113e-02, -1.260e-01, -1.539e-01, -5.421e-02, 1.779e-01, -1.072e-01, 1.209e-01, 4.423e-02, 2.454e-01, -5.430e-02, -1.442e-01, -1.501e-02, -4.731e-02)); + r += mul(s0_2, M4(6.444e-02, 1.509e-01, 1.452e-01, -2.840e-02, 9.365e-02, 2.016e-01, 1.002e-01, -3.226e-02, -1.186e-01, 1.535e-01, -1.652e-01, -1.104e-02, 4.170e-02, -4.404e-02, 1.189e-01, 1.007e-02)); + r += mul(s0_3, M4(-4.618e-02, 1.024e-01, -1.723e-01, -1.354e-01, 1.981e-01, -1.992e-01, 1.670e-01, 3.857e-01, -6.927e-03, 9.087e-02, 1.176e-01, 3.314e-01, 9.860e-02, 4.009e-04, 1.061e-01, -6.930e-02)); + r += mul(s0_4, M4(4.923e-01, -9.248e-02, 8.616e-03, 4.541e-02, -1.148e-01, 3.990e-03, -3.218e-02, 8.942e-02, 3.219e-02, -9.786e-02, 6.813e-02, 2.492e-01, -3.165e-01, 6.925e-02, -9.826e-02, 4.518e-01)); + r += mul(s0_5, M4(2.222e-02, 1.046e-01, -2.327e-02, -7.823e-02, 3.540e-01, 3.363e-01, -4.089e-02, 1.292e-02, -2.530e-01, 4.606e-01, -6.191e-02, -3.673e-02, -2.764e-01, -1.360e-01, -2.947e-03, 2.534e-02)); + r += mul(s0_6, M4(-2.067e-02, -1.566e-01, -8.968e-02, 1.386e-03, -8.841e-02, -1.077e-01, 1.646e-01, 1.987e-01, -3.098e-01, 2.764e-01, 1.935e-01, 1.847e-01, 1.116e-01, -1.514e-01, -5.175e-02, 8.710e-02)); + r += mul(s0_7, M4(1.266e-02, -2.119e-01, -1.610e-01, -6.512e-02, -1.679e-01, 2.247e-01, -5.854e-02, 1.200e-02, -1.406e-01, 4.393e-01, -8.517e-02, 3.281e-02, -1.177e-01, -1.861e-01, -3.241e-01, -2.918e-02)); + r += mul(s0_8, M4(-3.015e-02, -1.605e-01, -1.001e-01, 7.795e-03, -5.873e-02, -7.686e-02, -1.448e-01, -1.851e-02, -2.172e-01, 1.977e-01, -1.333e-01, -8.894e-02, -8.939e-03, 1.675e-01, -7.976e-03, 4.020e-02)); + r += mul(s1_0, M4(1.165e-01, -4.833e-02, 4.750e-02, -4.032e-02, -2.287e-02, -4.825e-02, 9.058e-02, 2.136e-01, 1.009e-01, -2.133e-02, 4.162e-02, -6.816e-02, -9.863e-02, -4.160e-03, -2.467e-02, -9.096e-02)); + r += mul(s1_1, M4(8.597e-02, -2.205e-01, 1.515e-01, -2.918e-02, -1.099e-01, -4.171e-02, 3.893e-04, -5.273e-03, -2.046e-02, -3.905e-03, 7.793e-04, 5.930e-02, 2.653e-02, -2.546e-01, -8.456e-02, -6.554e-02)); + r += mul(s1_2, M4(-1.058e-01, 3.302e-01, 1.812e-01, 6.427e-02, -4.601e-02, -1.589e-02, 4.405e-02, -1.366e-02, -5.996e-03, -5.402e-04, 3.237e-02, -5.725e-02, -7.486e-02, 1.358e-01, 4.739e-02, -2.432e-02)); + r += mul(s1_3, M4(3.333e-02, 5.179e-01, -1.939e-03, 7.798e-02, 2.011e-02, -2.959e-01, 1.135e-01, 3.122e-01, 8.651e-02, -2.708e-02, 7.183e-03, 4.554e-02, -3.342e-02, 9.136e-03, -7.067e-02, -1.867e-01)); + r += mul(s1_4, M4(6.231e-01, 9.512e-01, 3.523e-01, 3.744e-01, 2.388e-01, -2.827e-01, 9.968e-02, -5.306e-02, -4.498e-02, -2.222e-01, -5.865e-02, 2.967e-02, -3.029e-01, -2.137e-01, -5.363e-01, 8.872e-02)); + r += mul(s1_5, M4(-4.862e-02, 7.326e-01, 1.354e-01, 5.607e-02, 1.667e-01, -1.184e-01, -1.304e-01, 6.817e-02, 3.287e-02, 3.310e-01, 1.521e-01, -3.212e-02, -8.947e-02, 4.250e-02, -9.770e-02, -8.344e-02)); + r += mul(s1_6, M4(-9.242e-04, 4.835e-03, 1.322e-01, 3.745e-02, 9.613e-02, -8.310e-03, 4.718e-02, 2.763e-02, -1.616e-02, 6.167e-02, -3.382e-02, 3.624e-02, 1.213e-02, -2.014e-01, -2.776e-03, 4.360e-02)); + r += mul(s1_7, M4(-6.861e-02, 4.772e-02, -3.779e-02, 7.567e-02, -8.548e-02, -1.028e-02, 1.881e-02, 2.421e-03, 1.378e-01, 1.305e-01, 2.177e-02, -1.118e-03, 5.861e-02, -1.416e-01, -3.140e-01, -9.031e-02)); + r += mul(s1_8, M4(-4.147e-02, 1.546e-01, 5.650e-02, 4.098e-02, -1.460e-01, -5.779e-02, -1.959e-02, -2.318e-02, 3.538e-02, -5.044e-02, 3.304e-02, -3.517e-03, -1.176e-01, -3.185e-01, -1.738e-01, -4.349e-02)); + r += mul(s2_0, M4(-3.428e-03, 6.059e-02, 7.024e-02, 2.739e-02, 1.313e-02, -5.748e-02, 9.005e-03, -7.139e-03, 1.165e-01, -1.541e-01, 1.493e-01, 2.725e-01, 3.254e-02, -2.934e-02, 1.115e-02, -2.844e-02)); + r += mul(s2_1, M4(-8.601e-03, -3.177e-03, 1.878e-01, 1.106e-01, 1.951e-02, 8.194e-02, 4.971e-02, 5.805e-02, 2.515e-02, -2.529e-01, -2.250e-01, 3.498e-02, 7.183e-02, -8.617e-02, -8.616e-02, 1.623e-01)); + r += mul(s2_2, M4(-8.072e-02, -1.234e-01, 3.482e-02, -2.873e-02, -4.049e-02, 4.828e-03, 1.940e-02, 3.828e-02, -5.156e-03, 4.585e-03, 2.326e-02, 2.346e-02, -8.908e-02, -1.384e-03, -2.366e-02, 1.290e-02)); + r += mul(s2_3, M4(4.921e-02, 1.726e-01, 3.832e-02, -2.490e-01, -1.152e-01, -1.722e-01, -1.705e-01, 4.228e-01, -8.215e-02, -1.478e-02, 1.554e-01, 3.701e-01, -8.863e-02, 1.068e-01, 8.890e-03, 6.324e-02)); + r += mul(s2_4, M4(1.307e-01, 2.312e-01, -1.734e-01, 2.083e-02, -1.966e-01, -3.991e-01, -8.681e-02, 1.976e-03, -3.177e-01, 1.528e-01, -2.329e-01, 2.569e-01, -6.230e-03, 6.020e-02, 4.969e-02, -2.039e-01)); + r += mul(s2_5, M4(1.660e-01, 1.642e-02, 7.203e-02, -1.613e-01, 6.225e-02, 6.470e-02, 3.305e-03, 2.230e-02, -2.455e-02, 6.599e-02, -1.740e-01, 7.887e-02, 3.463e-03, 1.003e-01, -1.850e-01, 7.885e-02)); + r += mul(s2_6, M4(-2.170e-02, 1.372e-01, 7.445e-02, -9.419e-02, -1.851e-01, 4.957e-02, -2.454e-01, 5.879e-02, -5.800e-02, -1.122e-01, 7.445e-02, 1.190e-01, 2.695e-02, -5.701e-02, -5.166e-02, -5.058e-02)); + r += mul(s2_7, M4(5.390e-01, 1.674e-01, 1.213e-01, -1.147e-01, -6.939e-02, -1.218e-01, -2.891e-01, 2.682e-02, -2.636e-01, -1.104e-01, -1.556e-01, 3.774e-02, -4.121e-02, -2.431e-01, -1.248e-01, 1.275e-01)); + r += mul(s2_8, M4(1.053e-01, 2.238e-01, -1.104e-01, 5.372e-02, 6.179e-02, -2.431e-03, -4.843e-02, 3.820e-02, -7.539e-02, 7.898e-02, 7.562e-03, 1.596e-02, 7.298e-02, -1.553e-01, -3.545e-01, 1.990e-02)); + r += mul(s3_0, M4(8.232e-02, -6.815e-02, -7.421e-02, -3.191e-02, -1.592e-01, 2.814e-01, 5.009e-02, 3.669e-02, -5.908e-02, -5.445e-02, 4.873e-02, 1.538e-01, 1.065e-01, -2.194e-01, -2.612e-02, -2.297e-02)); + r += mul(s3_1, M4(1.431e-02, -7.835e-02, -2.790e-03, 9.305e-02, -2.975e-01, 1.527e-01, 1.888e-01, -1.279e-02, -1.938e-02, -1.022e-01, -2.197e-02, -2.919e-02, 2.192e-01, -8.056e-02, 1.328e-03, 3.478e-02)); + r += mul(s3_2, M4(4.920e-03, -6.286e-02, -7.779e-02, 1.075e-01, -1.092e-01, 2.909e-01, 3.056e-01, -9.017e-02, -3.625e-02, 1.079e-01, 1.107e-01, 6.613e-02, 1.696e-01, -1.852e-01, -1.253e-01, -9.675e-02)); + r += mul(s3_3, M4(-6.350e-02, 1.137e-01, -3.559e-02, -1.684e-01, -2.044e-01, -9.368e-02, 2.283e-01, 8.052e-01, 4.476e-03, -1.599e-01, 2.594e-02, 1.582e-01, -2.483e-02, 9.216e-02, 5.719e-02, 2.237e-01)); + r += mul(s3_4, M4(-1.694e-01, 1.597e-01, -3.311e-01, 1.880e-01, 2.614e-01, -2.584e-01, 5.296e-02, 9.726e-02, -3.932e-02, -7.518e-02, -1.749e-01, 1.604e-01, 1.008e-01, 2.920e-01, 5.358e-01, -6.383e-01)); + r += mul(s3_5, M4(-2.706e-01, -2.716e-01, -4.196e-01, 1.023e-01, 2.201e-01, -1.412e-01, 1.003e-01, -6.972e-02, 3.727e-02, -8.424e-02, -7.870e-02, 2.294e-02, 2.836e-01, -4.165e-01, -2.974e-01, -3.567e-02)); + r += mul(s3_6, M4(-3.434e-02, 6.420e-02, -8.729e-02, -8.600e-02, -2.041e-01, 1.646e-02, 9.025e-02, 1.724e-01, -4.951e-02, -3.894e-02, -7.985e-02, 1.580e-02, 2.554e-01, -3.100e-01, -2.769e-01, 8.336e-05)); + r += mul(s3_7, M4(-6.557e-02, 3.865e-02, -3.263e-02, 4.621e-02, -2.077e-01, 2.705e-02, -3.354e-01, 1.480e-01, 4.155e-02, -2.143e-01, -2.626e-01, 1.091e-02, 1.382e-01, -1.706e-01, -1.355e-01, -7.700e-02)); + r += mul(s3_8, M4(-2.004e-01, 4.575e-01, -1.812e-01, 6.102e-02, 3.469e-01, -6.634e-02, 1.302e-01, -9.621e-02, 4.023e-02, 1.048e-01, -9.194e-02, 5.130e-03, 4.272e-01, -5.971e-01, -2.025e-01, -1.364e-01)); + r += V4(3.575e-03, 3.041e-03, 1.241e-02, -2.230e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.437e-01, -9.784e-02, 2.649e-01, -8.638e-02, -1.746e-01, 2.031e-01, 1.203e-01, -8.812e-02, -2.317e-01, 2.311e-01, 3.171e-02, -3.619e-02, -7.798e-02, -2.507e-02, 1.902e-01, 5.780e-02)); + r += mul(s0_1, M4(2.504e-01, 1.577e-01, -5.397e-02, 4.599e-01, -1.392e-01, 2.560e-01, 1.018e-01, 7.968e-02, 2.247e-01, -2.962e-03, 1.421e-03, -1.201e-01, -3.622e-01, 1.378e-01, 1.392e-01, 1.641e-01)); + r += mul(s0_2, M4(-6.143e-02, -6.336e-02, 1.131e-01, 6.811e-02, -5.817e-02, 7.362e-02, 1.407e-01, 1.823e-02, 4.880e-01, -2.282e-01, -2.704e-01, -4.287e-01, -2.741e-01, 3.163e-02, 1.098e-01, 1.514e-01)); + r += mul(s0_3, M4(1.794e-01, 1.720e-01, -4.092e-01, 1.277e-01, -1.938e-01, 3.107e-01, 2.915e-01, 2.279e-01, 2.259e-01, 2.136e-01, 5.867e-02, 2.359e-01, -1.589e-01, 1.132e-01, 6.871e-02, 2.837e-01)); + r += mul(s0_4, M4(-3.070e-01, -4.494e-01, 5.817e-02, 5.153e-01, 5.215e-01, 5.410e-01, 1.286e-01, -5.596e-01, 4.287e-01, 1.821e-01, 1.542e-01, 3.755e-01, 3.820e-01, 2.953e-01, -2.768e-01, -6.977e-02)); + r += mul(s0_5, M4(-4.881e-02, 2.327e-02, 9.209e-02, -2.102e-02, -1.394e-01, -8.093e-03, 2.263e-01, -4.307e-01, 1.998e-01, -8.793e-02, -1.057e-01, -1.899e-01, 1.577e-01, 3.435e-01, 6.721e-02, 3.093e-01)); + r += mul(s0_6, M4(7.516e-02, -1.224e-01, 1.257e-02, -6.769e-02, -8.618e-02, 1.283e-01, 2.060e-01, -1.966e-01, 8.166e-02, -1.263e-01, -2.269e-01, -3.272e-01, -3.439e-02, -2.849e-01, 2.105e-01, -3.015e-03)); + r += mul(s0_7, M4(7.447e-02, -8.731e-02, 2.804e-02, -4.819e-02, -3.311e-01, 3.824e-01, 7.766e-02, 5.672e-02, 4.014e-01, -4.037e-03, 2.287e-01, 5.626e-02, 3.481e-01, -1.010e-01, -1.156e-01, -2.865e-01)); + r += mul(s0_8, M4(5.454e-02, -5.590e-02, 3.408e-02, 3.551e-03, 1.262e-02, 8.638e-02, 1.222e-01, 3.418e-01, -2.154e-01, 1.868e-01, 1.210e-01, -2.330e-01, -4.810e-02, -5.190e-02, -8.587e-02, -2.145e-01)); + r += mul(s1_0, M4(-3.063e-01, -1.830e-02, 5.167e-01, 4.813e-02, -7.310e-02, 1.443e-01, 1.654e-01, 1.158e-01, 4.789e-02, -3.030e-02, -1.358e-01, 2.986e-02, -4.855e-02, -7.736e-02, 4.514e-01, -1.797e-02)); + r += mul(s1_1, M4(4.322e-01, -1.369e-01, 9.431e-02, 3.921e-01, 2.708e-02, -1.218e-02, -9.091e-02, 1.871e-01, 3.763e-02, -9.213e-02, -1.209e-01, -1.587e-01, 3.014e-03, 1.816e-01, 3.099e-01, 3.210e-01)); + r += mul(s1_2, M4(-7.234e-02, 1.685e-02, 4.444e-01, -1.886e-01, -9.543e-03, 3.966e-02, 1.105e-01, 4.870e-02, 9.471e-02, -5.263e-02, -1.085e-01, 4.226e-02, -1.565e-01, -3.812e-02, 1.708e-01, 1.457e-01)); + r += mul(s1_3, M4(2.370e-01, -3.354e-02, -9.648e-02, 1.531e-01, -3.468e-01, -3.957e-02, 3.152e-01, 3.402e-02, 3.762e-02, 9.507e-02, 7.836e-02, 9.088e-03, -1.614e-01, 4.377e-02, 4.748e-02, 1.055e-01)); + r += mul(s1_4, M4(2.342e-01, -5.059e-01, 2.781e-01, 2.906e-01, 1.656e-01, 1.268e-01, 1.183e-01, -2.458e-02, 2.290e-01, 1.779e-01, -8.310e-02, 1.389e-01, 7.282e-02, 1.050e-01, -3.525e-01, 6.810e-02)); + r += mul(s1_5, M4(1.078e-01, -4.451e-02, 7.031e-02, -2.977e-01, 3.596e-02, 3.359e-02, 9.589e-03, 9.070e-02, -1.862e-01, -1.863e-01, -9.652e-02, -5.039e-02, 1.004e-01, 1.598e-01, 1.466e-01, 2.349e-01)); + r += mul(s1_6, M4(1.109e-02, -1.607e-01, 1.578e-02, -1.971e-01, 5.020e-02, -7.597e-02, 7.238e-02, 7.241e-02, 2.025e-02, -2.246e-02, 4.652e-02, -8.760e-02, -1.111e-02, 1.890e-02, 1.046e-01, -2.233e-03)); + r += mul(s1_7, M4(1.252e-01, -8.046e-02, -1.321e-01, -3.724e-01, -1.383e-01, 1.151e-01, 5.397e-02, -1.422e-01, 8.319e-02, 9.089e-02, -2.620e-02, 1.662e-01, 2.847e-02, -1.255e-01, 6.933e-02, -1.636e-01)); + r += mul(s1_8, M4(-1.517e-01, 3.661e-02, -3.135e-01, -3.395e-01, -1.139e-01, 1.973e-01, 8.547e-03, -3.118e-02, -8.869e-02, -1.209e-01, 1.867e-02, -4.531e-02, 1.016e-01, -6.909e-02, 1.436e-01, 1.663e-01)); + r += mul(s2_0, M4(-9.314e-02, 1.395e-02, -1.741e-02, -7.208e-02, -5.164e-02, -5.743e-02, 5.702e-02, 1.342e-01, 6.011e-03, 1.626e-01, 1.101e-01, -1.130e-01, 6.127e-02, -8.956e-03, -7.149e-02, -6.488e-03)); + r += mul(s2_1, M4(-2.534e-01, 1.086e-01, -1.007e-01, -3.067e-02, -1.074e-01, 7.219e-03, 6.768e-02, -1.012e-01, 2.019e-01, 4.263e-03, -7.411e-02, -1.173e-01, 1.961e-01, -5.619e-02, -2.390e-01, -1.323e-01)); + r += mul(s2_2, M4(-1.039e-01, -9.899e-02, -2.206e-01, -2.187e-01, -8.739e-03, 6.607e-02, 4.125e-02, 5.363e-02, -6.572e-03, 3.014e-02, 1.314e-01, -9.560e-02, 2.106e-01, 1.237e-02, -8.354e-02, -4.939e-03)); + r += mul(s2_3, M4(-4.682e-02, -1.357e-01, 3.481e-02, -2.187e-01, 1.113e-01, 8.812e-02, -1.211e-01, -2.011e-02, 1.567e-01, -2.216e-02, -4.920e-03, -2.458e-01, 2.263e-02, 6.741e-02, -1.234e-02, 2.338e-02)); + r += mul(s2_4, M4(5.105e-02, -3.845e-01, 1.812e-01, -1.927e-01, 2.840e-01, -2.094e-01, 5.673e-02, 4.405e-02, 5.957e-01, 1.734e-02, -1.158e-01, -6.956e-01, -2.077e-01, 5.130e-03, 4.744e-01, -1.540e-02)); + r += mul(s2_5, M4(1.601e-01, -2.680e-01, -1.678e-01, -1.207e-01, -4.648e-02, -6.454e-02, 1.122e-01, -6.567e-02, 1.638e-01, -1.259e-01, -2.470e-02, -3.547e-01, -1.333e-01, -1.219e-02, -7.710e-02, -3.881e-01)); + r += mul(s2_6, M4(-6.060e-02, 1.662e-01, -2.082e-01, 3.193e-01, -1.317e-01, 1.395e-04, 2.436e-01, -1.480e-01, 6.104e-03, -2.009e-01, -6.729e-02, -2.207e-01, -7.784e-02, -7.589e-02, 7.569e-02, 3.261e-03)); + r += mul(s2_7, M4(-2.951e-01, -2.050e-01, 2.827e-02, 3.739e-01, 1.947e-01, 5.411e-01, -2.262e-01, -8.808e-03, 2.262e-01, -9.010e-02, -1.476e-01, -3.582e-01, -1.718e-01, 2.844e-02, 7.832e-02, 1.414e-03)); + r += mul(s2_8, M4(3.534e-01, 1.695e-01, -1.247e-01, 4.750e-01, 4.171e-02, 2.338e-02, -4.525e-02, -4.955e-02, 2.934e-01, -3.865e-02, -1.125e-01, -2.127e-01, 1.326e-01, 5.967e-02, 6.215e-02, 1.048e-01)); + r += mul(s3_0, M4(4.186e-02, -5.378e-02, 7.641e-02, -3.524e-02, -2.447e-01, -5.374e-02, -1.380e-01, -4.221e-01, -3.797e-02, -7.623e-03, -4.826e-02, 1.791e-02, -1.390e-01, 1.115e-01, 2.252e-01, -9.103e-03)); + r += mul(s3_1, M4(1.339e-01, 3.093e-01, -3.615e-02, 8.684e-02, -4.098e-01, -1.216e-01, 2.372e-01, -1.247e-01, -5.358e-02, -1.660e-01, -8.435e-02, 3.871e-02, 2.722e-01, -1.145e-01, -3.944e-01, -5.003e-02)); + r += mul(s3_2, M4(-4.430e-02, -3.135e-02, 1.019e-01, -1.129e-01, -2.647e-01, -1.317e-01, 8.715e-02, -5.466e-02, -3.946e-02, 7.216e-02, 1.677e-01, 9.349e-02, 8.069e-02, -1.097e-01, -9.659e-03, -8.460e-02)); + r += mul(s3_3, M4(-5.036e-03, 4.992e-02, 1.086e-01, -1.339e-02, 2.792e-01, 3.294e-01, -1.578e-01, 4.592e-01, -7.749e-02, 4.384e-02, -4.212e-02, 2.287e-02, 1.456e-01, 4.774e-02, -1.264e-01, 7.437e-02)); + r += mul(s3_4, M4(3.022e-01, -2.197e-01, -4.347e-02, -2.198e-01, 3.922e-02, 8.609e-02, 8.862e-02, 3.418e-01, 8.117e-02, -2.026e-02, -3.236e-01, -2.539e-01, -6.030e-02, -2.409e-01, 7.879e-02, -8.457e-02)); + r += mul(s3_5, M4(3.525e-01, 2.622e-01, -4.994e-02, -1.932e-01, -1.508e-01, 1.229e-01, 1.359e-01, 1.613e-01, 1.830e-01, -4.473e-02, -5.438e-02, -1.041e-01, 4.534e-01, -4.660e-01, -7.405e-02, -1.001e-01)); + r += mul(s3_6, M4(-1.224e-02, -5.840e-03, 8.031e-02, -2.279e-02, -2.128e-01, 1.477e-01, -9.937e-03, 4.142e-02, -3.726e-02, -1.013e-01, -2.940e-03, -1.333e-01, 1.353e-01, -2.192e-01, -3.858e-01, -1.100e-01)); + r += mul(s3_7, M4(-8.882e-02, 1.341e-01, 2.707e-01, 2.212e-01, 2.628e-01, 3.454e-01, -3.703e-01, 4.902e-01, 1.527e-01, 8.567e-03, -1.742e-01, -1.884e-01, -7.710e-01, 1.028e-01, 3.233e-01, -3.897e-01)); + r += mul(s3_8, M4(3.715e-02, 2.936e-01, -1.195e-01, -1.295e-01, -1.313e-01, -1.222e-01, -2.876e-01, 5.694e-02, 6.813e-02, -1.738e-02, -1.154e-01, 1.649e-02, 1.755e-01, -1.639e-01, 3.212e-02, 3.504e-01)); + r += V4(-8.611e-03, -6.529e-03, -1.098e-03, 4.669e-03); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(7.356e-02, 8.402e-03, 1.287e-01, 6.762e-02, 2.134e-01, -6.620e-02, -2.788e-01, -5.744e-02, -3.896e-02, -3.993e-02, -7.161e-02, -1.982e-01, -6.734e-02, 8.804e-03, -4.739e-02, 6.502e-02)); + r += mul(s0_1, M4(2.249e-01, 4.958e-02, 1.138e-01, 3.152e-01, 2.008e-01, 1.703e-01, 5.817e-02, -9.482e-02, -2.371e-01, 3.975e-02, -1.755e-01, -2.666e-01, 2.819e-01, -2.640e-02, 1.405e-01, -6.009e-02)); + r += mul(s0_2, M4(2.065e-01, -3.027e-02, -3.447e-02, 3.226e-03, -1.252e-02, -7.589e-03, 2.344e-03, -1.704e-02, -8.894e-02, 3.136e-02, -1.517e-01, -2.176e-02, 8.920e-02, -5.322e-02, -9.529e-02, 8.355e-02)); + r += mul(s0_3, M4(1.136e-01, 1.015e-01, -2.730e-02, -2.144e-01, -9.526e-02, -2.857e-01, 2.711e-01, -1.991e-01, 2.596e-01, 1.602e-01, -2.169e-01, -1.097e-01, 3.353e-02, 6.231e-02, 8.753e-03, 3.707e-01)); + r += mul(s0_4, M4(-1.945e-01, 3.081e-01, -2.270e-01, -5.963e-02, -1.666e-01, -3.408e-01, 1.161e-01, -6.384e-02, -6.823e-01, -4.014e-01, -6.276e-01, -1.672e-01, 2.986e-03, -1.351e-01, 1.668e-01, -3.133e-01)); + r += mul(s0_5, M4(4.802e-02, -4.275e-02, 1.978e-03, -7.602e-02, -4.082e-03, 5.572e-02, -3.341e-02, 9.101e-03, -1.038e-01, 1.622e-01, 2.334e-02, 1.768e-01, 9.416e-03, -2.287e-01, 1.048e-01, -2.926e-01)); + r += mul(s0_6, M4(5.333e-04, 3.089e-02, 2.721e-02, -3.601e-02, -5.081e-02, -1.152e-01, 6.752e-02, 1.701e-01, -2.951e-02, 2.450e-01, -1.684e-01, -4.702e-02, -1.580e-02, 1.200e-02, -1.266e-02, 4.937e-02)); + r += mul(s0_7, M4(-1.351e-02, -6.248e-02, -3.060e-03, 4.140e-02, -2.090e-01, -6.831e-01, -8.857e-02, 2.536e-01, -2.333e-02, 1.521e-01, -8.033e-02, 2.124e-01, -6.615e-02, 1.317e-01, 1.847e-01, -2.150e-01)); + r += mul(s0_8, M4(4.605e-02, 1.013e-01, 6.834e-03, -6.411e-02, -1.476e-02, -2.845e-01, -4.312e-02, -1.171e-02, 6.985e-02, -6.859e-02, -2.785e-02, -3.226e-02, 5.186e-02, 1.102e-01, -2.071e-02, -1.250e-01)); + r += mul(s1_0, M4(1.952e-01, -3.342e-02, -3.770e-02, -2.026e-01, 4.850e-02, -3.174e-02, -1.987e-01, -2.886e-02, -1.298e-01, 1.994e-02, 1.131e-01, 2.950e-02, -1.791e-02, -4.533e-02, 4.695e-02, -6.907e-02)); + r += mul(s1_1, M4(2.401e-01, 1.809e-01, -5.151e-02, -6.271e-02, -1.409e-01, 9.215e-03, 1.176e-01, 2.717e-02, 1.130e-01, -3.228e-02, -9.086e-02, -1.202e-03, 1.642e-03, -7.943e-03, 1.097e-01, 1.842e-01)); + r += mul(s1_2, M4(8.774e-02, -1.486e-02, -4.808e-02, 4.089e-02, 6.244e-02, -7.645e-02, 5.614e-02, -5.706e-02, -2.386e-02, 4.407e-02, -1.378e-01, -5.880e-02, 2.936e-02, 2.285e-02, -3.924e-02, 5.724e-02)); + r += mul(s1_3, M4(2.603e-01, -1.455e-01, 1.429e-01, -2.992e-02, -6.288e-02, -5.216e-02, -1.802e-01, 1.060e-01, -2.473e-02, -6.795e-03, 2.843e-02, 7.745e-02, -4.868e-03, -9.998e-02, -7.961e-02, 5.068e-02)); + r += mul(s1_4, M4(2.018e-01, -1.293e-01, -5.291e-02, -4.763e-02, 3.484e-02, -1.648e-01, 8.786e-02, -6.101e-02, -1.083e-01, 5.522e-02, -1.814e-01, -2.392e-01, 6.427e-02, -1.908e-02, 2.643e-01, 1.294e-01)); + r += mul(s1_5, M4(-7.897e-02, -5.967e-02, -2.620e-01, 1.274e-02, -2.583e-02, 5.654e-02, -7.639e-02, -7.534e-03, -5.812e-02, -7.887e-02, -3.738e-03, 7.664e-02, 1.753e-02, -2.842e-01, -3.237e-01, 2.077e-02)); + r += mul(s1_6, M4(6.558e-02, -9.890e-02, 1.849e-02, 3.242e-04, 1.021e-02, 1.234e-01, 1.224e-02, -4.322e-02, -2.778e-02, 3.860e-02, -5.257e-02, -1.466e-02, -1.001e-02, -1.291e-03, 1.724e-01, -9.167e-02)); + r += mul(s1_7, M4(-5.291e-02, -2.764e-01, -6.402e-02, 4.327e-02, 1.921e-02, -1.484e-01, 3.286e-02, 4.051e-02, 1.636e-02, 3.932e-01, -5.432e-02, 4.540e-02, 3.947e-02, -1.385e-01, -1.065e-01, 1.569e-01)); + r += mul(s1_8, M4(-1.729e-02, 8.177e-02, -4.479e-02, -1.275e-01, -3.302e-03, -1.265e-01, -2.922e-02, 3.720e-02, 1.560e-02, 5.266e-02, -1.572e-02, -4.840e-02, 3.991e-03, 1.003e-01, -1.423e-01, 7.414e-02)); + r += mul(s2_0, M4(-1.207e-02, -2.418e-02, -7.769e-03, -1.401e-01, 1.660e-01, -6.347e-03, -1.092e-02, -1.830e-02, -1.252e-01, -5.217e-02, 9.898e-03, 1.461e-02, 2.654e-02, 1.219e-02, -3.769e-02, 1.897e-02)); + r += mul(s2_1, M4(-3.650e-02, 1.317e-01, 1.299e-02, -5.512e-02, -1.287e-01, 2.438e-02, -1.609e-03, 1.759e-01, 1.824e-02, 6.477e-03, 2.905e-02, -8.644e-02, 7.496e-02, -9.920e-02, 1.147e-02, 1.889e-01)); + r += mul(s2_2, M4(-7.005e-03, -4.482e-02, -1.853e-02, 3.441e-02, 1.251e-01, -3.162e-02, -1.701e-01, -5.231e-02, -1.647e-01, 2.261e-02, 8.255e-02, -3.730e-02, 1.811e-01, -9.052e-02, 1.728e-02, 1.911e-02)); + r += mul(s2_3, M4(2.359e-02, -1.334e-01, 2.761e-02, -1.251e-01, 1.455e-01, 4.076e-02, -3.260e-02, -1.782e-01, -3.575e-02, 1.411e-02, 1.322e-01, -9.592e-02, 5.423e-02, 7.989e-03, -1.460e-01, 8.895e-02)); + r += mul(s2_4, M4(1.304e-01, 1.296e-01, -7.250e-02, -6.647e-02, 8.382e-02, 1.111e-01, 8.976e-02, -5.914e-02, -2.228e-01, -4.772e-02, -1.931e-03, 8.499e-02, 4.483e-01, 1.327e-01, 5.086e-02, -4.795e-01)); + r += mul(s2_5, M4(4.674e-02, 7.104e-02, -5.312e-02, -7.730e-02, 2.647e-03, 8.893e-03, -8.889e-02, -5.714e-02, -4.546e-02, -4.002e-02, -1.514e-01, -2.989e-02, -8.669e-02, -5.441e-03, 1.460e-02, -2.327e-02)); + r += mul(s2_6, M4(1.146e-01, -1.154e-01, 8.289e-03, 7.655e-02, -2.194e-02, -3.908e-02, -2.191e-02, 2.363e-03, 4.527e-02, -7.852e-02, -4.728e-02, 1.066e-01, 4.023e-02, -5.192e-02, -4.180e-02, -3.879e-02)); + r += mul(s2_7, M4(2.446e-01, -2.295e-01, -5.819e-02, -2.646e-02, 8.106e-02, -8.799e-02, -3.455e-02, 6.900e-02, 5.579e-02, -1.551e-01, 1.609e-01, 9.954e-02, -1.499e-01, 8.628e-02, 1.114e-01, 1.313e-02)); + r += mul(s2_8, M4(1.028e-02, 9.150e-02, -6.161e-02, 5.124e-03, 3.822e-02, 1.533e-02, 2.329e-02, -1.106e-01, -1.541e-03, -1.818e-01, -9.577e-02, -3.402e-02, 1.784e-02, -1.152e-01, 6.896e-02, -1.111e-01)); + r += mul(s3_0, M4(-7.349e-02, -4.782e-02, 3.080e-02, -1.668e-01, 9.572e-02, 5.307e-02, 5.573e-03, 6.483e-02, 1.104e-01, -5.707e-02, -8.579e-02, -1.754e-02, 1.038e-01, 1.706e-02, -1.185e-01, 5.863e-02)); + r += mul(s3_1, M4(-1.639e-01, -6.808e-03, 1.836e-02, -1.482e-01, 1.032e-01, 2.612e-02, -1.751e-01, -1.527e-01, 3.169e-03, 5.272e-02, 7.983e-02, 5.066e-02, 1.191e-01, 3.658e-02, 3.275e-02, -1.122e-01)); + r += mul(s3_2, M4(-8.279e-02, -1.068e-02, 3.848e-02, -8.857e-03, -3.783e-02, 9.934e-02, -7.181e-02, 2.801e-02, -1.524e-01, -7.166e-02, 1.038e-01, -9.840e-04, -7.254e-03, -3.252e-02, -1.435e-02, 6.052e-03)); + r += mul(s3_3, M4(-3.534e-02, -2.891e-02, 3.778e-01, -2.472e-01, -4.015e-02, -5.651e-02, 2.006e-01, 1.249e-02, -8.408e-02, -1.160e-02, 2.881e-01, -6.805e-03, 1.340e-02, -1.237e-01, -1.617e-01, 1.894e-02)); + r += mul(s3_4, M4(-1.512e-02, 3.232e-01, -1.441e-01, -3.778e-01, -1.475e-01, -2.644e-03, -3.149e-01, 3.225e-02, 1.227e-01, -3.620e-02, -1.175e-01, -3.857e-01, 4.834e-02, -1.567e-01, 1.632e-01, -1.292e-01)); + r += mul(s3_5, M4(-1.592e-01, 3.426e-02, -1.506e-01, 1.215e-01, 1.314e-01, -7.432e-02, -8.767e-02, 1.685e-01, 6.875e-02, 2.804e-01, -3.279e-02, -1.870e-01, 1.049e-01, -9.061e-02, 8.573e-02, -9.407e-02)); + r += mul(s3_6, M4(5.310e-02, -1.089e-01, -1.496e-01, 2.134e-01, 5.599e-02, -1.565e-01, -6.842e-02, -1.362e-02, 6.861e-02, -2.548e-02, -1.614e-01, -3.698e-02, -2.731e-02, 1.138e-02, 1.288e-02, -1.789e-02)); + r += mul(s3_7, M4(-7.967e-02, -2.461e-01, -2.139e-01, 3.193e-01, 1.377e-01, -1.213e-01, 8.415e-02, 1.224e-02, 1.192e-01, 1.785e-01, 1.978e-01, 1.008e-01, 3.016e-02, 9.868e-02, 3.118e-03, -3.294e-02)); + r += mul(s3_8, M4(1.121e-01, -4.625e-02, 3.331e-02, -7.687e-02, 5.520e-02, 6.326e-02, 1.369e-02, 1.850e-02, 4.062e-02, -1.561e-01, -8.640e-02, 1.105e-01, 8.446e-03, -1.746e-03, 4.572e-02, -1.015e-01)); + r += V4(-1.057e-02, -1.114e-02, 1.597e-04, 1.132e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.399e-01, 1.190e-01, 9.941e-02, -5.908e-03, 2.176e-01, -3.861e-02, -4.997e-02, -3.036e-02, -6.079e-02, 2.294e-02, -1.260e-01, 6.001e-02, -7.690e-02, -4.805e-02, 6.117e-03, 4.358e-02)); + r += mul(s0_1, M4(-4.669e-02, -1.150e-01, 9.700e-03, 2.351e-02, 3.215e-01, -1.737e-03, 2.091e-01, -1.245e-01, -8.592e-02, 1.866e-01, 2.826e-01, -6.728e-01, 1.528e-01, 5.511e-02, -4.930e-02, -1.959e-02)); + r += mul(s0_2, M4(-2.182e-02, -4.512e-02, 6.864e-02, 8.299e-02, 8.483e-03, -4.855e-02, -1.500e-01, 1.325e-02, 6.098e-02, -1.867e-02, 1.276e-02, 1.721e-02, 5.918e-03, -1.130e-01, 7.066e-04, -1.824e-03)); + r += mul(s0_3, M4(4.594e-02, 1.518e-01, -2.067e-01, 1.546e-02, -1.548e-02, 1.126e-01, -4.502e-03, -2.014e-02, 2.417e-01, -1.530e-01, -1.095e-01, -4.966e-02, 2.291e-01, -4.598e-03, 2.836e-01, 5.562e-02)); + r += mul(s0_4, M4(5.432e-02, -3.003e-01, 7.389e-01, -1.497e-01, -2.439e-01, -3.298e-01, 4.081e-01, -2.105e-01, -4.267e-01, 3.913e-01, 5.470e-01, 5.594e-01, -1.221e-01, -5.444e-02, -4.180e-01, 1.515e-01)); + r += mul(s0_5, M4(2.205e-01, -5.813e-03, 7.451e-03, 8.130e-02, -3.312e-02, -9.387e-02, -9.824e-02, 4.493e-02, 8.187e-02, -2.042e-01, 1.644e-01, 1.562e-01, -8.427e-02, 2.057e-01, -1.668e-02, -2.356e-01)); + r += mul(s0_6, M4(1.150e-02, 1.442e-02, -1.973e-02, -4.599e-02, -9.680e-02, 3.962e-02, 1.731e-02, -2.402e-02, 3.936e-02, 6.512e-03, 2.103e-02, 2.025e-03, -1.308e-02, -5.259e-02, 5.631e-02, 3.037e-02)); + r += mul(s0_7, M4(-1.306e-02, -3.164e-02, 1.196e-01, 2.798e-02, -2.533e-01, -1.204e-01, 1.860e-01, 1.564e-01, -4.731e-02, -7.323e-02, 1.441e-03, -9.049e-02, -3.371e-02, -2.801e-04, 2.952e-02, -2.632e-02)); + r += mul(s0_8, M4(3.024e-02, -1.034e-02, -7.595e-02, -7.550e-02, 3.562e-02, -4.589e-02, -3.066e-02, 7.995e-02, -1.866e-02, 1.022e-01, -2.624e-02, -1.074e-01, 2.176e-02, 1.434e-01, -5.664e-02, -3.473e-02)); + r += mul(s1_0, M4(2.252e-01, 9.801e-02, -5.786e-02, -6.661e-02, 7.599e-02, -9.244e-02, 4.437e-02, -1.203e-01, -1.577e-01, -3.797e-02, -1.335e-02, 4.540e-02, -3.540e-03, -9.094e-03, -4.076e-02, -8.099e-02)); + r += mul(s1_1, M4(2.557e-01, -2.549e-01, 2.306e-01, -4.389e-02, -3.677e-02, 5.796e-02, 4.505e-02, -1.209e-01, -4.484e-02, 1.229e-01, -5.686e-02, 2.778e-02, 9.876e-02, -6.893e-04, 9.771e-02, 1.264e-01)); + r += mul(s1_2, M4(-5.324e-02, -9.632e-02, -1.092e-02, -1.426e-02, 3.082e-02, 9.196e-02, -1.381e-01, -1.013e-01, 7.758e-03, -3.290e-02, 1.630e-02, -4.979e-03, -7.297e-02, -7.534e-02, 2.040e-02, -1.983e-01)); + r += mul(s1_3, M4(1.951e-01, 3.566e-02, 4.220e-02, 8.086e-02, -5.114e-02, -5.626e-02, -6.912e-02, 1.462e-01, 2.268e-03, -2.592e-02, 3.527e-02, -3.832e-02, 4.756e-02, 1.234e-01, -5.494e-03, 4.695e-02)); + r += mul(s1_4, M4(4.147e-01, -2.431e-01, 2.372e-01, 2.574e-04, -4.485e-02, 5.014e-02, 3.928e-02, -2.817e-02, 3.512e-01, 2.983e-01, -1.260e-01, 4.326e-01, -2.366e-01, -6.912e-02, 2.259e-01, -4.534e-01)); + r += mul(s1_5, M4(1.323e-01, 5.260e-03, 2.693e-02, 1.841e-01, -1.105e-01, 6.002e-02, -1.233e-01, 1.012e-02, -9.410e-02, -1.260e-01, 1.264e-02, -3.910e-02, 3.656e-01, -1.103e-01, 5.059e-01, 4.280e-01)); + r += mul(s1_6, M4(-7.537e-02, -2.153e-02, -4.511e-02, -5.184e-02, -1.745e-02, -1.165e-02, 1.352e-02, -1.951e-02, -4.888e-02, 2.249e-02, -3.915e-02, -4.557e-03, -9.946e-03, -1.633e-04, -3.200e-02, -1.356e-02)); + r += mul(s1_7, M4(-1.509e-01, -2.227e-02, 1.640e-01, 2.693e-02, 4.846e-02, 3.303e-02, -5.390e-02, 3.607e-02, -2.818e-02, -7.170e-02, 3.311e-02, -9.203e-02, -1.946e-03, -8.577e-02, -2.925e-02, 1.238e-01)); + r += mul(s1_8, M4(-3.295e-02, 1.995e-02, -1.689e-01, -4.353e-02, -4.138e-02, -7.439e-03, -2.343e-02, 6.997e-02, 8.031e-02, 1.117e-01, 4.894e-02, -6.214e-02, -1.960e-01, -1.630e-01, 8.586e-02, -8.213e-02)); + r += mul(s2_0, M4(-9.883e-02, -1.168e-02, -1.110e-01, -2.148e-01, 1.452e-01, 3.417e-03, -4.513e-02, 8.845e-02, -7.791e-02, 2.326e-02, -4.188e-02, -3.659e-02, 3.105e-02, -1.318e-02, -4.552e-03, 7.109e-02)); + r += mul(s2_1, M4(1.958e-02, -6.995e-02, 2.588e-01, -6.431e-02, -2.211e-01, 5.281e-02, 5.399e-02, 8.884e-02, -5.135e-02, -4.768e-02, 1.363e-01, -2.064e-01, -1.391e-01, 1.106e-01, -2.611e-01, 2.038e-01)); + r += mul(s2_2, M4(-6.883e-02, -1.360e-03, -1.628e-01, 7.301e-02, 1.213e-01, -5.159e-03, 1.194e-01, -1.148e-02, -1.285e-01, -1.448e-01, 1.776e-02, -1.414e-01, -3.022e-02, 1.382e-01, 6.695e-02, -4.201e-02)); + r += mul(s2_3, M4(-1.194e-01, 1.524e-03, -1.945e-01, -1.496e-01, 1.413e-03, -8.697e-04, -1.542e-01, -1.798e-03, -4.991e-02, -7.944e-03, -1.094e-01, -5.578e-02, 1.526e-01, -6.170e-02, 1.598e-01, 1.306e-01)); + r += mul(s2_4, M4(3.583e-02, -1.213e-01, 2.087e-01, -4.616e-02, 2.125e-01, -1.242e-01, 2.776e-01, -8.100e-02, -1.733e-01, 1.016e-01, 2.949e-01, 1.489e-01, 5.059e-01, 3.526e-01, -4.764e-01, -1.105e-02)); + r += mul(s2_5, M4(7.240e-02, 1.034e-01, -1.103e-01, 2.351e-02, -2.711e-02, 1.506e-02, -1.534e-01, 1.093e-01, 5.065e-02, -2.686e-01, 1.423e-01, -4.993e-02, 7.167e-02, 1.084e-01, -8.139e-03, 4.460e-02)); + r += mul(s2_6, M4(1.243e-01, 1.281e-02, 7.048e-02, 1.117e-01, -1.145e-01, -1.703e-02, -1.470e-02, -3.647e-02, 3.796e-03, 2.441e-02, -8.422e-02, 1.955e-02, -2.861e-02, -6.963e-02, 6.894e-02, -4.071e-02)); + r += mul(s2_7, M4(2.315e-01, 7.446e-02, -7.632e-02, 1.319e-01, -2.392e-02, 2.525e-02, 4.687e-02, 7.645e-02, 4.250e-02, -4.733e-02, 2.179e-01, -3.843e-02, -3.526e-01, 9.675e-02, -1.837e-01, -1.563e-01)); + r += mul(s2_8, M4(5.933e-02, 1.490e-01, -5.844e-02, 9.363e-02, 7.616e-04, -1.075e-02, -1.365e-01, -6.094e-02, 7.094e-03, -1.218e-01, 7.021e-02, 3.101e-02, -4.184e-02, 3.989e-02, -7.167e-02, -1.179e-01)); + r += mul(s3_0, M4(-7.835e-02, 6.392e-02, -5.802e-02, -1.483e-01, 1.374e-01, 3.699e-02, 2.043e-03, 1.554e-01, -6.873e-02, -1.174e-02, -1.518e-01, -1.405e-02, 4.783e-03, -1.131e-01, 4.121e-02, -8.849e-02)); + r += mul(s3_1, M4(-1.463e-01, 5.240e-02, -1.651e-02, -2.410e-01, 1.092e-01, -3.146e-02, -1.629e-02, -2.974e-02, -7.838e-02, -7.374e-03, 2.745e-01, -1.408e-01, 1.335e-01, 8.634e-02, 1.073e-02, -1.407e-02)); + r += mul(s3_2, M4(-7.340e-02, 2.321e-02, 1.922e-02, -1.112e-01, 2.932e-02, -2.587e-02, 1.333e-01, 4.721e-02, -1.514e-01, -3.395e-02, -1.264e-01, 1.777e-02, -8.692e-02, 1.186e-02, -7.424e-02, -2.402e-02)); + r += mul(s3_3, M4(5.052e-02, 2.790e-03, 3.121e-02, -1.839e-01, 3.910e-02, 2.279e-02, 6.041e-02, -8.205e-03, -5.819e-02, 5.701e-04, 5.763e-02, -1.835e-02, -7.273e-02, -1.017e-01, -4.708e-02, 3.331e-02)); + r += mul(s3_4, M4(-4.521e-02, -3.700e-02, -1.199e-01, -3.863e-01, -4.641e-01, -2.451e-01, 1.512e-03, -3.424e-01, -1.194e-01, 1.119e-01, -1.183e-01, 1.918e-01, 8.865e-02, 1.866e-01, -4.503e-02, 2.355e-03)); + r += mul(s3_5, M4(5.461e-02, -1.461e-01, 2.827e-01, 2.041e-01, -8.786e-03, 1.079e-02, 1.593e-01, 2.173e-01, 4.916e-01, -1.773e-01, 2.149e-02, -1.461e-01, -5.435e-02, 1.909e-01, -2.171e-01, -7.547e-02)); + r += mul(s3_6, M4(2.543e-02, 5.455e-02, -6.107e-02, 5.194e-03, 9.984e-02, 8.664e-02, -7.757e-04, 3.957e-02, 1.432e-01, 3.805e-02, -1.005e-03, 7.600e-02, -4.304e-02, -6.326e-02, 3.996e-02, 3.872e-03)); + r += mul(s3_7, M4(-1.234e-01, -1.276e-01, 1.312e-01, 8.454e-02, 1.539e-01, 6.822e-02, -1.455e-02, 1.223e-01, -1.060e-01, -3.708e-02, -1.480e-01, -7.922e-02, 6.503e-02, 1.105e-01, -1.249e-01, -3.210e-02)); + r += mul(s3_8, M4(1.676e-01, -7.072e-03, 4.581e-02, -1.006e-01, -1.056e-02, -8.209e-02, -4.804e-02, 2.427e-02, -1.165e-01, -8.224e-02, 2.940e-01, -9.220e-03, 5.420e-02, 1.802e-01, -1.190e-01, 1.433e-02)); + r += V4(-4.712e-03, -1.187e-02, 1.287e-02, -6.625e-03); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 5 +//!DESC conv4 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.060e-02, 9.173e-03, 9.548e-04, -7.886e-02, -1.324e-02, 4.660e-02, -4.997e-02, -5.676e-02, -3.290e-02, 6.253e-02, -5.777e-02, 1.265e-02, 6.136e-03, 7.179e-02, 3.102e-02, 4.961e-02)); + r += mul(s0_1, M4(5.787e-03, -2.090e-03, -1.489e-01, 4.380e-02, 1.259e-01, 5.508e-01, 1.211e-01, 3.385e-01, 2.399e-02, -1.436e-01, 2.987e-03, -2.839e-02, -3.021e-02, -8.641e-03, 1.716e-01, -1.328e-02)); + r += mul(s0_2, M4(-3.284e-02, -5.196e-02, -2.983e-02, -2.858e-02, 1.729e-02, 7.665e-02, 1.387e-01, 1.037e-01, 4.289e-02, 1.274e-01, 3.348e-02, 1.911e-02, -1.786e-02, -4.888e-02, 6.323e-02, -2.989e-02)); + r += mul(s0_3, M4(2.473e-02, -6.550e-02, -1.373e-01, 3.680e-02, 1.575e-01, -8.270e-02, 3.186e-02, -3.836e-02, 4.508e-02, 4.254e-02, 5.656e-03, -9.132e-02, 1.334e-01, -5.076e-02, -2.445e-02, -4.735e-02)); + r += mul(s0_4, M4(-5.346e-01, 1.950e-01, 2.121e-01, -3.694e-01, 5.004e-02, 1.610e-02, 2.249e-01, -5.962e-02, -6.243e-02, -3.270e-01, 1.851e-01, 4.051e-02, -2.310e-01, -2.300e-01, -1.314e-01, 3.374e-01)); + r += mul(s0_5, M4(-4.686e-02, -3.968e-01, 2.772e-02, 2.495e-02, 4.541e-02, 8.724e-02, 4.401e-02, -1.515e-02, -6.453e-02, -7.210e-02, -1.250e-02, 4.044e-02, 3.057e-02, 2.485e-01, 2.228e-02, 6.774e-02)); + r += mul(s0_6, M4(-1.518e-01, -6.862e-02, -8.148e-02, -2.030e-01, -4.453e-02, -2.133e-03, -6.081e-02, -8.941e-02, -5.417e-02, 1.564e-02, -5.425e-02, 5.875e-02, -8.805e-02, -1.910e-02, 2.099e-02, -1.402e-02)); + r += mul(s0_7, M4(-1.730e-02, -6.152e-02, -2.764e-01, -8.728e-02, 9.519e-03, -2.799e-02, -5.662e-02, 3.249e-02, 8.716e-02, 2.809e-02, -7.241e-02, 3.046e-02, 1.368e-01, 2.723e-02, 1.130e-01, -4.615e-02)); + r += mul(s0_8, M4(-5.021e-02, -3.352e-02, 5.072e-02, -1.434e-02, 6.511e-02, 6.519e-02, -8.987e-02, 2.193e-02, 1.583e-04, 2.714e-02, -2.315e-02, -3.077e-02, 7.792e-03, 2.782e-02, 9.282e-02, 5.011e-02)); + r += mul(s1_0, M4(-2.541e-02, -9.530e-03, -2.089e-01, -2.421e-02, 1.340e-02, 1.228e-01, 8.861e-02, -1.063e-02, -7.461e-02, 5.226e-02, -7.276e-02, 3.544e-02, -1.591e-02, 1.851e-02, 9.562e-03, 4.559e-02)); + r += mul(s1_1, M4(2.747e-02, -7.982e-02, -1.475e-01, 4.885e-02, -1.175e-02, -9.209e-02, -9.273e-02, -7.428e-02, 3.696e-02, -2.012e-01, 4.627e-02, 3.609e-02, 1.096e-01, -5.087e-02, 2.170e-01, 5.311e-02)); + r += mul(s1_2, M4(2.410e-02, 6.970e-02, 2.315e-02, 2.908e-02, 2.961e-05, 1.661e-02, 8.374e-02, 5.064e-02, 2.637e-02, 1.330e-01, 5.175e-02, -5.518e-02, -4.871e-03, 1.162e-01, 8.451e-02, 1.741e-02)); + r += mul(s1_3, M4(4.863e-02, -7.095e-02, 3.927e-03, -9.085e-02, 2.639e-02, -8.297e-02, -1.865e-01, -9.647e-02, 6.967e-02, 1.376e-02, 1.222e-01, -2.819e-01, 1.563e-01, -1.399e-02, -4.367e-02, -5.187e-02)); + r += mul(s1_4, M4(9.322e-02, 9.848e-02, 1.680e-01, -2.298e-01, -6.183e-02, -4.167e-02, -1.103e-02, -9.856e-03, -2.983e-03, -3.805e-01, -3.115e-01, -4.107e-01, -1.341e-01, -3.703e-01, -3.661e-01, -4.633e-01)); + r += mul(s1_5, M4(-2.785e-03, -2.188e-02, -2.790e-03, -4.276e-04, 7.082e-02, 1.004e-01, -3.532e-03, 1.740e-03, 6.693e-03, -5.230e-01, 2.119e-01, 2.878e-02, 3.915e-03, 1.842e-01, -1.630e-02, -3.874e-02)); + r += mul(s1_6, M4(2.313e-02, -6.545e-02, 1.631e-02, -1.278e-01, -4.216e-02, -4.147e-02, 6.827e-02, -1.725e-02, -5.254e-02, -3.942e-02, -2.400e-02, -8.124e-02, -3.250e-02, -1.806e-03, -3.947e-02, -7.056e-02)); + r += mul(s1_7, M4(8.445e-03, 1.147e-01, -7.772e-02, 1.091e-01, 1.842e-02, -6.040e-03, -7.053e-02, 1.824e-02, 2.212e-01, -8.777e-02, -1.003e-01, 6.533e-03, 2.090e-01, 4.588e-02, 9.886e-02, 6.176e-02)); + r += mul(s1_8, M4(4.046e-02, 1.872e-02, -5.723e-02, -4.997e-02, 5.232e-03, 1.795e-02, -2.747e-02, -1.507e-02, -1.704e-01, 7.849e-02, -1.475e-01, -4.255e-02, 7.807e-02, 4.185e-02, 3.849e-02, 3.137e-02)); + r += mul(s2_0, M4(-8.062e-03, 6.677e-02, 6.217e-02, 1.833e-01, -1.475e-01, 2.782e-01, 3.524e-02, -6.275e-02, 4.315e-02, 1.484e-02, 3.820e-02, -3.304e-02, 1.659e-03, -9.567e-03, -3.360e-02, -2.623e-02)); + r += mul(s2_1, M4(9.928e-02, -2.526e-01, -2.613e-02, 2.043e-01, 1.710e-02, -1.137e-01, 1.798e-01, -1.427e-01, 4.676e-03, 1.728e-01, 8.082e-02, -5.413e-02, -1.710e-02, -3.169e-02, -6.860e-02, 1.496e-02)); + r += mul(s2_2, M4(1.785e-02, 1.092e-01, -7.685e-02, 7.691e-02, 5.271e-03, -5.168e-02, 3.395e-02, 1.726e-02, 2.936e-02, -1.321e-02, 5.364e-02, -6.785e-03, 2.429e-02, -4.442e-02, -6.348e-02, 3.035e-02)); + r += mul(s2_3, M4(2.676e-01, 4.022e-03, -5.435e-02, -2.723e-01, -1.412e-01, -6.091e-01, 1.576e-02, 6.829e-02, -1.410e-01, 5.578e-03, 3.833e-03, 1.863e-01, -2.274e-02, 6.034e-03, 1.518e-01, -5.434e-02)); + r += mul(s2_4, M4(7.884e-02, 5.377e-01, -4.655e-02, -3.752e-01, 1.490e-01, -4.235e-02, -5.390e-02, 2.610e-01, 1.979e-01, -5.718e-02, 1.773e-02, 5.727e-02, 1.703e-02, 7.533e-01, -3.023e-02, 5.456e-02)); + r += mul(s2_5, M4(-4.898e-02, 4.237e-02, 6.311e-02, -4.635e-02, 3.660e-03, 2.139e-01, -3.722e-02, -6.738e-02, -3.009e-02, -6.140e-02, 2.777e-02, 3.917e-02, -1.421e-01, -4.041e-01, -1.524e-01, -9.837e-02)); + r += mul(s2_6, M4(6.071e-02, 1.084e-01, -6.370e-02, 1.323e-01, -7.251e-02, -1.079e-01, 1.208e-01, -4.495e-02, -2.115e-03, -4.107e-02, 2.465e-02, -1.230e-01, -6.064e-02, -4.263e-02, -1.388e-01, 6.519e-02)); + r += mul(s2_7, M4(9.042e-02, -8.032e-02, 1.186e-01, -1.537e-02, -6.566e-03, -3.216e-02, 3.412e-02, -3.207e-02, -1.586e-01, -2.988e-03, -2.358e-03, 2.172e-02, 6.775e-02, -3.590e-01, -4.123e-01, -3.506e-01)); + r += mul(s2_8, M4(8.486e-02, 4.731e-02, 5.779e-02, 1.000e-01, 9.121e-03, -3.421e-02, 4.891e-02, 4.916e-02, 3.343e-03, 4.437e-03, -2.002e-02, -3.856e-02, -1.319e-01, -4.022e-02, -1.752e-01, -9.250e-02)); + r += mul(s3_0, M4(-6.652e-03, 6.416e-02, 9.292e-03, 6.520e-02, 1.213e-02, 4.177e-02, 7.038e-02, -3.160e-02, 2.146e-02, -9.523e-02, -1.436e-01, -8.325e-02, -1.234e-02, -1.222e-02, -3.877e-02, -4.175e-02)); + r += mul(s3_1, M4(-5.171e-02, 1.011e-01, 7.998e-02, -8.804e-02, 1.067e-02, 1.516e-01, 6.508e-02, -7.724e-02, 2.717e-02, -4.901e-02, -6.059e-03, 4.013e-02, -3.833e-02, 1.538e-01, 5.948e-02, -4.945e-02)); + r += mul(s3_2, M4(1.023e-02, -1.230e-01, -1.861e-02, -2.570e-02, 2.512e-02, -4.630e-02, 6.354e-02, 3.897e-02, -2.146e-02, 2.446e-01, 1.906e-03, -9.068e-03, 1.754e-02, -7.082e-02, 1.107e-04, -9.604e-03)); + r += mul(s3_3, M4(7.380e-02, 2.216e-02, -2.608e-02, -6.491e-02, 4.018e-02, -6.657e-02, 1.116e-01, 9.405e-02, -7.168e-02, -3.646e-01, 8.387e-02, 1.352e-02, -4.589e-02, 2.235e-02, 1.881e-01, 1.759e-01)); + r += mul(s3_4, M4(-7.735e-02, -8.574e-02, -6.380e-02, 1.221e-01, 5.556e-02, -1.281e-01, 1.461e-01, 2.757e-01, 8.144e-01, -1.075e-01, 3.165e-03, -2.036e-01, 1.814e-01, 1.744e-01, -1.745e-01, 3.724e-02)); + r += mul(s3_5, M4(-6.864e-02, 1.273e-02, 7.502e-02, 4.164e-02, 1.301e-02, 1.407e-01, -9.985e-02, -8.079e-02, 1.428e-01, 3.034e-01, -1.564e-02, 6.091e-02, -1.271e-02, -2.153e-01, -7.843e-02, -4.063e-02)); + r += mul(s3_6, M4(-5.115e-02, 6.016e-02, -2.719e-02, 4.668e-02, -3.214e-02, -2.274e-02, -6.954e-03, -9.099e-03, 4.861e-02, 1.007e-01, -2.150e-01, -1.607e-01, -3.578e-02, 1.230e-02, -5.095e-02, 1.622e-02)); + r += mul(s3_7, M4(9.498e-02, -6.763e-02, 1.451e-01, 3.408e-03, -3.253e-02, 1.145e-01, 8.122e-03, -9.192e-02, 5.071e-02, -6.317e-02, 1.097e-01, 5.913e-02, 8.494e-02, 2.731e-04, -3.736e-01, -6.110e-03)); + r += mul(s3_8, M4(1.881e-02, 1.750e-02, 5.956e-02, 4.179e-02, -4.554e-02, -9.824e-02, 8.917e-03, 3.348e-02, 4.160e-02, 6.525e-02, 1.484e-02, -2.331e-02, -8.092e-02, -2.834e-02, -1.284e-01, -7.521e-02)); + r += V4(-5.270e-03, 1.390e-02, 8.622e-03, 1.255e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-4.868e-02, -7.333e-02, -1.029e-02, -7.011e-04, 2.404e-02, -9.301e-02, 1.457e-01, 2.242e-02, 6.850e-02, -1.328e-03, -2.557e-02, -4.854e-04, 1.071e-01, 3.788e-04, 1.408e-01, 5.354e-03)); + r += mul(s0_1, M4(7.892e-03, 5.832e-02, -1.077e-01, -6.140e-02, -1.003e-02, -4.887e-01, 8.263e-01, 2.416e-01, -3.434e-02, 1.089e-02, -1.984e-02, -3.615e-02, -5.692e-03, 1.615e-02, -5.680e-02, 2.041e-02)); + r += mul(s0_2, M4(2.136e-02, -2.731e-02, -1.742e-02, 2.592e-02, -4.319e-02, 6.426e-03, 2.110e-02, -6.338e-02, -6.921e-03, -1.288e-03, 4.579e-02, -2.155e-03, 3.041e-02, 1.946e-02, 1.238e-02, 6.906e-02)); + r += mul(s0_3, M4(-2.058e-02, 3.187e-02, -1.057e-01, 2.407e-01, -3.813e-02, -2.640e-02, 3.941e-02, 1.362e-01, 2.406e-02, 1.518e-02, -4.224e-02, 3.455e-02, 5.443e-02, -6.617e-02, -8.858e-02, -1.949e-02)); + r += mul(s0_4, M4(3.205e-01, -6.490e-01, -3.962e-01, -1.142e-01, -3.091e-02, 4.755e-01, -2.822e-01, -1.328e-01, -5.487e-01, 5.932e-02, -2.439e-02, -1.689e-01, -3.681e-02, -8.227e-02, 3.967e-02, -8.989e-02)); + r += mul(s0_5, M4(-5.668e-02, 3.658e-02, 1.227e-02, 8.117e-02, 1.161e-01, 9.350e-02, 9.971e-02, -1.220e-01, 7.876e-02, 5.186e-02, -4.261e-02, 1.436e-01, -2.114e-02, 6.113e-02, 2.251e-02, 2.534e-02)); + r += mul(s0_6, M4(-5.365e-02, -2.678e-02, -2.565e-02, 7.923e-02, -2.138e-02, -4.932e-02, -6.107e-03, 1.685e-02, 5.425e-02, -1.012e-02, -9.037e-03, 8.218e-04, -1.210e-02, 5.623e-02, -2.094e-02, -2.325e-02)); + r += mul(s0_7, M4(2.031e-02, -3.187e-02, 8.229e-02, 1.457e-01, 1.044e-01, -4.475e-02, 2.858e-02, -7.345e-02, -3.919e-02, -5.753e-02, 1.684e-02, -1.669e-01, 9.680e-03, 1.254e-01, 2.022e-03, -9.900e-02)); + r += mul(s0_8, M4(-1.164e-02, 5.171e-02, -5.704e-02, -1.643e-01, 2.554e-02, -9.988e-02, 3.699e-02, -3.752e-02, -8.076e-04, -2.527e-02, -2.081e-02, 3.110e-02, 1.484e-03, 4.064e-02, 2.481e-02, 2.225e-01)); + r += mul(s1_0, M4(4.384e-02, -1.401e-01, -4.071e-02, -1.137e-02, -4.979e-03, 6.159e-02, 1.275e-01, 6.544e-02, 1.288e-01, -4.421e-02, -4.471e-02, 2.682e-02, 5.621e-02, -4.062e-02, 1.034e-01, 6.606e-02)); + r += mul(s1_1, M4(2.799e-02, -1.333e-01, 1.521e-01, -5.025e-02, -1.895e-01, -7.913e-02, -2.321e-01, -6.526e-02, -1.330e-02, 1.499e-02, 1.620e-01, 6.936e-02, -6.816e-02, 1.353e-01, 1.107e-01, 5.514e-02)); + r += mul(s1_2, M4(5.826e-03, -5.941e-03, -2.338e-02, -2.826e-02, 9.265e-02, 3.608e-02, 1.114e-01, 1.274e-01, -1.291e-01, 1.284e-02, -7.540e-02, -3.458e-02, -1.006e-02, 2.083e-02, -8.393e-02, 8.186e-02)); + r += mul(s1_3, M4(-3.893e-02, -1.137e-02, 1.243e-01, 1.118e-01, 7.397e-02, -1.316e-01, -1.303e-01, -7.808e-05, 1.468e-02, -4.172e-03, -5.014e-02, -2.610e-02, 8.366e-02, -2.755e-02, 1.646e-04, -2.938e-02)); + r += mul(s1_4, M4(-1.114e-01, -2.017e-01, 2.898e-03, -7.984e-02, -8.403e-02, 2.626e-02, 1.563e-01, -1.397e-02, 2.724e-02, -6.698e-01, 2.358e-01, -6.466e-01, -9.650e-03, -6.742e-01, 1.411e-01, -3.343e-01)); + r += mul(s1_5, M4(7.380e-02, 6.420e-02, 7.990e-02, 6.014e-02, 5.950e-02, 6.212e-02, -7.881e-02, -2.782e-02, 1.087e-01, -3.347e-02, 3.819e-01, 1.988e-01, 5.813e-02, 2.239e-02, 3.012e-01, 1.275e-01)); + r += mul(s1_6, M4(-9.473e-02, -2.417e-02, -2.870e-02, 7.718e-02, -2.223e-02, 2.306e-02, 8.255e-03, -1.818e-02, -2.983e-02, -3.495e-02, 1.540e-02, 8.013e-02, 1.651e-02, -1.298e-02, 2.377e-02, 5.523e-02)); + r += mul(s1_7, M4(1.414e-01, 1.346e-01, 4.336e-03, -7.594e-02, -2.044e-02, -9.596e-03, -1.087e-03, 5.324e-02, -2.041e-02, -6.328e-02, 7.533e-02, -3.971e-01, 5.408e-04, 1.087e-01, 9.749e-03, -2.047e-01)); + r += mul(s1_8, M4(4.656e-02, -4.771e-02, -2.210e-02, -2.060e-02, -6.953e-03, -3.366e-02, -7.290e-03, -3.300e-02, -1.354e-01, -5.015e-02, -2.887e-02, 2.802e-01, 2.605e-02, -1.972e-02, 1.168e-03, 1.422e-01)); + r += mul(s2_0, M4(8.826e-02, -4.751e-02, 2.493e-01, 4.446e-02, 1.752e-01, 5.741e-03, -1.820e-01, 1.371e-02, -6.855e-02, 1.164e-02, -5.215e-02, -7.373e-04, -1.491e-02, 7.033e-03, -5.440e-02, -9.302e-05)); + r += mul(s2_1, M4(-6.871e-02, -9.419e-03, 3.276e-01, 2.826e-02, 5.675e-02, -3.974e-03, 1.104e-01, -2.975e-02, 3.281e-02, 8.429e-03, 1.129e-01, -4.830e-02, -4.374e-02, -6.905e-02, 8.143e-02, 3.180e-03)); + r += mul(s2_2, M4(-7.197e-02, -1.804e-02, -9.024e-02, -1.527e-03, 2.403e-02, 6.062e-02, 3.346e-02, 4.784e-02, -1.462e-02, 4.216e-02, 2.800e-02, 4.034e-04, -4.216e-02, -4.431e-03, -3.496e-02, -3.005e-02)); + r += mul(s2_3, M4(2.710e-02, -6.523e-02, 1.559e-01, -6.059e-02, 1.965e-01, -1.608e-01, -9.293e-03, -2.404e-01, -4.061e-02, 8.819e-02, 2.112e-02, 2.398e-01, -1.463e-01, 5.373e-02, -1.346e-02, 3.025e-02)); + r += mul(s2_4, M4(1.846e-02, 3.857e-01, -4.128e-01, -2.530e-01, -2.312e-01, 3.354e-02, -3.948e-01, -1.465e-01, 1.072e-01, -8.544e-02, -7.428e-02, 4.751e-02, 2.139e-01, 3.097e-01, -3.761e-01, 5.621e-02)); + r += mul(s2_5, M4(-1.203e-02, -9.598e-02, 4.101e-01, 1.578e-01, -5.394e-02, -6.714e-02, -6.320e-02, 8.249e-03, 5.620e-02, -3.219e-02, 9.398e-03, 6.809e-02, -7.400e-02, -1.431e-01, -1.425e-01, -2.358e-02)); + r += mul(s2_6, M4(4.035e-02, 5.655e-02, -3.307e-03, -3.497e-02, 6.522e-02, 1.103e-01, -9.802e-02, -2.655e-01, -5.802e-02, -4.359e-02, 3.459e-03, 1.592e-01, -2.566e-02, -1.156e-01, 2.646e-02, 3.806e-02)); + r += mul(s2_7, M4(-3.211e-02, -1.509e-01, 2.028e-03, -1.702e-01, 4.576e-02, -5.340e-02, 5.503e-02, 1.257e-02, -5.581e-02, 9.818e-02, -2.745e-02, 1.486e-01, 1.063e-01, -3.707e-01, 1.116e-01, -6.709e-02)); + r += mul(s2_8, M4(9.062e-04, -7.371e-03, 8.420e-02, 1.629e-01, -2.707e-02, -5.219e-03, 6.567e-02, 1.766e-01, 4.554e-04, 1.178e-02, -1.124e-02, 3.477e-02, -5.473e-02, -7.643e-02, 9.083e-03, -4.250e-02)); + r += mul(s3_0, M4(8.537e-02, 7.246e-02, 5.043e-02, 3.850e-02, -3.951e-02, 9.224e-03, 1.640e-02, 1.906e-02, -1.333e-01, -8.517e-02, -1.410e-01, 4.781e-02, -1.641e-02, 2.463e-03, -7.445e-02, -4.602e-02)); + r += mul(s3_1, M4(7.934e-02, 7.380e-03, -1.062e-01, -4.154e-03, -2.611e-02, -3.119e-02, 9.679e-02, -1.394e-02, -1.108e-01, 1.158e-02, 1.850e-01, -6.765e-02, 5.765e-02, 3.392e-02, -1.560e-02, -6.052e-02)); + r += mul(s3_2, M4(-3.056e-02, -8.450e-03, 1.524e-02, -1.007e-02, 1.030e-02, 4.032e-02, 9.837e-02, 9.371e-03, 4.740e-02, -4.795e-02, -3.356e-02, 8.602e-03, 2.484e-02, -9.889e-03, 6.734e-02, -2.287e-02)); + r += mul(s3_3, M4(6.303e-02, -7.328e-02, -5.082e-02, -5.070e-02, -8.129e-03, 7.948e-03, 7.351e-02, 5.601e-02, -4.169e-01, -8.219e-03, 3.373e-01, 1.781e-01, -5.139e-02, 1.471e-01, 3.415e-02, 7.690e-02)); + r += mul(s3_4, M4(1.455e-01, 6.664e-02, 5.792e-02, -1.276e-01, 2.360e-01, 5.978e-02, 5.147e-02, 2.707e-01, -7.581e-01, -2.740e-01, -3.000e-01, -2.017e-02, 2.005e-01, 6.934e-02, 2.996e-02, -3.036e-01)); + r += mul(s3_5, M4(-1.708e-01, -6.628e-02, 9.170e-02, 3.461e-02, -1.028e-01, -4.244e-02, -1.955e-01, -1.875e-01, 8.215e-02, 2.976e-02, -4.637e-03, 1.512e-01, -4.626e-02, 3.819e-03, -2.222e-01, -1.078e-01)); + r += mul(s3_6, M4(9.585e-02, 3.354e-02, -5.140e-03, -2.883e-02, -9.057e-02, 3.950e-02, -5.781e-02, -2.509e-02, -7.106e-02, -1.351e-01, -4.933e-02, 8.332e-02, -2.922e-02, -3.890e-02, 3.291e-03, 7.054e-02)); + r += mul(s3_7, M4(-8.208e-02, 1.377e-02, -2.475e-02, -1.353e-01, 9.728e-02, 7.136e-02, -3.984e-02, 1.374e-01, -1.160e-01, 4.362e-02, 6.714e-02, 5.038e-03, 1.866e-01, -2.349e-01, 8.599e-02, -1.510e-01)); + r += mul(s3_8, M4(-3.279e-02, 4.634e-02, 1.698e-02, 1.410e-01, -2.615e-02, 1.178e-02, 5.268e-02, 5.209e-02, 1.624e-02, 2.431e-02, -3.280e-02, 7.160e-02, -6.704e-02, -1.195e-01, -4.136e-02, -2.048e-01)); + r += V4(-7.279e-03, 1.016e-02, -7.400e-03, 4.979e-03); + return r; +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 6 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0, t1 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.838e-01, -1.901e-02, 9.627e-03, -5.113e-02, -2.616e-02, -2.850e-02, -1.739e-02, 9.125e-03, 1.563e-02, -1.253e-02, 1.902e-02, -1.512e-02, 3.495e-03, -1.497e-02, 4.974e-03, -1.115e-02)); + r += mul(s0_1, M4(-4.288e-01, 2.549e-01, -1.017e-01, -1.945e-01, 4.749e-02, 5.258e-02, -8.284e-03, -2.265e-02, -7.010e-02, -1.542e-02, 6.889e-03, 4.028e-02, -1.355e-02, 4.321e-03, 3.330e-02, 8.256e-02)); + r += mul(s0_2, M4(-1.045e-03, -2.140e-01, -3.928e-02, 1.364e-02, 1.787e-03, -9.427e-03, 1.927e-03, -2.145e-03, -4.617e-03, 2.814e-02, 2.045e-02, 1.776e-02, 3.188e-02, 3.187e-02, -1.498e-02, -2.180e-02)); + r += mul(s0_3, M4(1.059e-02, -5.522e-02, 6.447e-02, -4.395e-02, -4.846e-02, -2.209e-02, 1.866e-02, -5.920e-02, -2.419e-02, 1.032e-02, 7.736e-04, 1.092e-03, 1.854e-02, -4.388e-03, 3.893e-02, 9.549e-03)); + r += mul(s0_4, M4(3.358e-02, 7.431e-03, 1.780e-01, 5.353e-01, 2.674e-01, 2.227e-01, 1.450e-01, 2.085e-01, -8.104e-03, -3.561e-02, -1.231e-01, -1.932e-01, -6.538e-02, 3.378e-02, -1.314e-01, -2.862e-02)); + r += mul(s0_5, M4(-7.887e-02, 4.783e-02, -1.380e-01, -3.877e-01, 3.436e-03, 4.712e-02, -1.250e-02, 2.247e-02, 2.920e-02, 7.190e-02, -2.005e-02, 6.169e-02, 5.594e-03, -4.630e-02, 9.661e-02, 3.625e-02)); + r += mul(s0_6, M4(1.963e-02, -1.873e-02, 1.489e-02, -1.253e-02, -9.356e-03, 1.334e-02, -3.747e-02, -1.115e-02, 7.741e-04, -6.463e-03, 3.707e-03, -3.598e-03, 1.783e-02, -5.539e-03, 9.899e-03, -9.354e-03)); + r += mul(s0_7, M4(-2.952e-03, 3.132e-02, -6.679e-02, 2.883e-02, -3.721e-02, -3.573e-02, 1.204e-01, -6.785e-02, -1.208e-02, -1.355e-04, 2.872e-02, 2.196e-02, -1.655e-02, 3.784e-02, -5.921e-03, 2.494e-02)); + r += mul(s0_8, M4(-1.215e-02, -2.947e-02, -2.454e-03, -6.326e-02, 2.248e-03, 2.302e-02, -2.863e-03, 5.834e-02, 2.187e-02, 9.973e-03, 2.158e-02, 4.902e-02, -2.207e-02, -3.485e-02, -5.118e-02, -6.696e-02)); + r += mul(s1_0, M4(8.377e-02, -3.093e-02, 2.280e-02, -2.664e-02, -4.333e-02, -3.292e-02, -8.109e-03, 1.105e-02, 1.507e-02, 9.138e-03, 2.597e-02, -1.926e-02, 4.537e-02, -9.080e-03, -1.629e-02, -1.180e-02)); + r += mul(s1_1, M4(-7.478e-02, 1.238e-01, -5.092e-02, -3.473e-02, 4.269e-02, 4.444e-02, 7.295e-03, 1.274e-04, -1.646e-01, -1.551e-03, 3.424e-02, 4.906e-02, -2.056e-01, -5.847e-02, 5.262e-02, 1.049e-01)); + r += mul(s1_2, M4(-6.323e-02, -1.179e-01, -1.982e-02, -4.065e-02, 3.089e-03, -9.469e-03, 2.850e-03, 3.314e-03, -1.819e-03, -1.065e-01, 1.882e-02, 9.349e-03, 1.624e-02, -2.906e-02, -2.029e-02, -5.020e-02)); + r += mul(s1_3, M4(1.101e-01, -3.490e-02, 1.327e-01, -2.853e-02, -5.027e-03, -5.703e-02, 6.484e-03, -6.473e-02, -4.310e-02, 3.882e-02, -3.100e-02, -7.837e-04, -5.501e-02, -1.261e-02, 7.285e-02, 4.648e-02)); + r += mul(s1_4, M4(-6.214e-02, 1.841e-01, -9.546e-02, 3.700e-01, 2.824e-01, 3.400e-01, 2.309e-01, 2.237e-01, 3.482e-01, -1.294e-01, -4.546e-01, -3.556e-01, -4.730e-01, -1.392e-01, 4.776e-01, 1.210e-01)); + r += mul(s1_5, M4(-5.408e-02, -1.286e-01, -6.571e-02, -1.230e-01, 9.991e-03, 6.421e-02, 4.305e-03, 1.780e-02, 2.254e-02, 3.661e-01, 6.275e-02, 8.004e-02, -1.834e-02, -3.465e-01, 9.274e-02, 3.935e-01)); + r += mul(s1_6, M4(-1.620e-03, -1.423e-02, 2.785e-02, -1.252e-02, -1.218e-02, 2.842e-03, -3.496e-02, -2.927e-02, -2.106e-02, -7.099e-03, 2.545e-02, 2.484e-02, 2.973e-02, 4.563e-04, 2.010e-04, -1.839e-02)); + r += mul(s1_7, M4(1.400e-04, 3.080e-02, -6.992e-03, 8.032e-02, -2.280e-02, -4.436e-02, 7.600e-02, 1.165e-02, -9.494e-02, -2.207e-02, 2.783e-01, 2.095e-01, 2.645e-02, 5.203e-02, -7.492e-02, -1.303e-02)); + r += mul(s1_8, M4(-5.360e-03, -2.277e-02, -2.252e-02, -6.191e-02, 1.263e-02, 1.540e-02, 9.566e-03, 3.637e-02, -1.265e-02, -3.092e-02, -1.298e-02, -1.187e-02, 1.286e-02, 1.181e-02, -5.675e-02, -5.487e-02)); + r += mul(s2_0, M4(6.275e-02, 3.332e-02, 2.458e-02, -1.910e-02, -1.764e-02, 2.292e-02, -3.220e-02, -1.127e-02, 4.114e-02, 4.303e-02, -3.355e-02, -8.882e-03, 1.881e-02, 1.788e-02, -3.354e-03, -1.345e-02)); + r += mul(s2_1, M4(-1.562e-01, -7.407e-02, -5.684e-02, -3.194e-03, 1.150e-01, -2.700e-02, -9.666e-03, -3.629e-02, 5.862e-02, 6.747e-02, -1.085e-02, -3.454e-02, 7.263e-05, 2.167e-02, 5.491e-03, -6.472e-02)); + r += mul(s2_2, M4(5.068e-03, 4.899e-02, 1.480e-02, -2.153e-02, 1.102e-02, 2.831e-02, -5.931e-03, 1.021e-02, -1.267e-02, -1.569e-02, 5.418e-04, 1.030e-02, -3.280e-02, -3.072e-02, -2.688e-02, -2.208e-02)); + r += mul(s2_3, M4(-7.105e-02, 1.664e-03, -3.108e-02, 6.985e-02, 3.176e-02, 2.312e-02, -3.835e-02, 3.884e-02, -1.038e-01, 6.660e-02, -1.372e-01, 2.432e-02, -2.888e-04, -2.049e-02, 2.271e-02, 9.383e-03)); + r += mul(s2_4, M4(4.697e-01, -3.721e-01, 1.705e-01, -2.767e-01, -1.791e-02, -7.276e-02, 2.503e-01, 1.040e-01, 1.180e-02, -5.212e-01, 4.014e-01, 1.946e-01, -2.547e-02, -1.567e-02, -5.652e-02, 9.687e-02)); + r += mul(s2_5, M4(3.024e-02, 1.618e-02, 2.619e-02, 8.868e-02, -5.217e-02, -7.642e-02, -3.704e-02, -2.374e-02, -4.639e-02, 5.743e-02, -3.967e-02, -2.450e-02, 2.091e-02, -1.108e-02, 6.949e-03, -1.502e-02)); + r += mul(s2_6, M4(5.298e-03, -6.810e-03, -1.982e-02, 1.960e-04, 3.645e-03, 5.483e-03, 3.357e-03, 3.697e-02, -1.339e-02, 3.253e-02, 3.649e-02, 4.492e-03, 2.076e-02, -9.046e-03, 2.043e-02, -7.803e-03)); + r += mul(s2_7, M4(-2.707e-02, 7.878e-02, 1.816e-01, -1.506e-03, 9.060e-03, -1.418e-02, -6.983e-02, -5.833e-02, 3.309e-02, -2.537e-02, -3.298e-01, -1.735e-01, -2.132e-03, 5.241e-02, 1.155e-02, 4.817e-02)); + r += mul(s2_8, M4(-1.781e-02, 1.652e-02, -7.188e-03, 2.114e-03, -1.105e-02, -1.137e-02, -9.037e-03, -5.600e-02, -1.220e-02, 8.292e-03, -3.404e-03, -4.211e-02, -1.018e-02, -1.004e-02, 1.505e-03, -4.591e-03)); + r += mul(s3_0, M4(7.349e-02, 2.926e-02, 2.398e-02, -1.821e-02, -1.290e-02, 1.201e-02, 5.000e-03, 1.316e-02, -1.567e-02, 2.025e-02, -2.171e-02, -3.941e-04, -7.948e-03, 6.116e-02, -9.445e-03, 1.911e-02)); + r += mul(s3_1, M4(-1.294e-01, -6.121e-02, -4.576e-02, 9.211e-03, 1.371e-02, -1.964e-02, -3.133e-03, 4.701e-03, 9.544e-02, 6.692e-03, 4.665e-04, -2.056e-02, 3.455e-01, -2.495e-01, 1.027e-02, -7.393e-02)); + r += mul(s3_2, M4(1.532e-02, 9.402e-03, 6.812e-04, -3.241e-02, 1.245e-03, 6.504e-03, 3.970e-03, 7.168e-03, 9.435e-03, 1.574e-02, -5.118e-03, 5.232e-03, -2.659e-02, -6.011e-02, -2.446e-02, 8.062e-04)); + r += mul(s3_3, M4(-6.714e-02, 3.454e-03, -1.486e-02, 5.921e-02, 5.177e-02, 3.766e-02, -1.473e-01, 4.371e-02, -7.118e-02, 2.462e-02, -1.810e-02, 3.430e-02, -5.552e-02, 3.047e-02, -5.066e-02, 5.769e-02)); + r += mul(s3_4, M4(3.191e-02, -1.387e-01, -5.992e-02, -1.554e-01, 4.660e-01, 3.655e-01, 2.406e-02, -3.902e-01, 3.973e-02, -1.333e-01, 1.792e-01, 8.854e-02, 2.477e-01, -3.115e-01, 6.035e-01, -4.717e-01)); + r += mul(s3_5, M4(7.241e-02, 1.273e-01, 6.810e-02, 1.118e-01, -5.454e-02, -1.728e-02, -1.007e-01, -2.265e-02, -4.534e-02, -5.171e-02, -2.524e-02, -3.337e-02, -2.366e-03, -1.723e-02, 2.300e-02, -9.889e-02)); + r += mul(s3_6, M4(3.383e-02, -7.898e-03, 1.681e-02, -5.131e-03, 3.687e-02, 1.929e-02, -7.695e-03, 2.145e-03, -2.814e-02, 3.366e-02, -8.788e-02, 3.614e-02, 2.951e-02, -6.964e-03, 2.272e-02, 1.581e-02)); + r += mul(s3_7, M4(7.020e-03, 6.046e-02, 2.975e-02, 5.663e-02, 2.155e-02, -1.786e-02, -2.588e-01, -1.310e-01, -6.372e-02, -1.218e-01, -7.160e-02, -3.058e-01, 2.297e-03, 3.050e-02, -2.346e-02, 4.674e-02)); + r += mul(s3_8, M4(-1.071e-02, -1.089e-02, 9.286e-03, 5.202e-02, -2.291e-02, -2.655e-02, 2.386e-02, -3.231e-02, 4.599e-03, 1.114e-02, -1.630e-02, -1.693e-02, -2.194e-03, 1.842e-02, -2.522e-02, 5.265e-02)); + r += V4(-1.667e-03, -2.914e-03, -1.783e-03, -1.113e-03); + return tanh(r); +} + +void Pass6(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-4x8C-NVL.hlsl b/src/Effects/CuNNy/CuNNy-4x8C-NVL.hlsl new file mode 100644 index 000000000..8bc45dba2 --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-4x8C-NVL.hlsl @@ -0,0 +1,921 @@ +// CuNNy 4x8C BILINEAR RGB NVL - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-D08N04 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t2; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t3; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0, t1 + +#define l0(x, y) min16float((dot(float3(2.329e-01, 4.438e-01, 9.598e-02), O(INPUT, float2(x, y)).rgb) + -5.664e-01)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-1.368e-01, -5.123e-02, -2.270e-01, -9.888e-02) * s0_0; + r += V4(3.682e-01, 4.625e-02, 1.372e-01, 3.834e-01) * s0_1; + r += V4(-9.245e-02, 7.555e-03, 3.923e-02, 1.252e-02) * s0_2; + r += V4(-2.312e-01, 2.012e-02, 1.660e-01, 4.386e-01) * s0_3; + r += V4(-3.965e-02, -4.834e-01, 3.729e-01, -7.207e-01) * s0_4; + r += V4(2.190e-01, -9.021e-02, -1.087e-01, -9.632e-03) * s0_5; + r += V4(4.088e-02, 1.183e-01, 8.976e-02, -1.710e-03) * s0_6; + r += V4(-5.188e-03, 5.274e-01, -8.856e-02, -6.446e-03) * s0_7; + r += V4(-7.160e-02, -9.349e-02, -3.823e-01, 1.947e-03) * s0_8; + r += V4(3.244e-02, 2.492e-04, 8.562e-04, 1.261e-04); + return r; +} + +V4 f1(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(2.403e-02, 8.569e-03, -8.618e-02, 2.022e-02) * s0_0; + r += V4(4.893e-01, 2.383e-02, 2.423e-02, -3.486e-01) * s0_1; + r += V4(-3.682e-02, 2.437e-03, 1.872e-01, 1.135e-01) * s0_2; + r += V4(-2.361e-02, 2.588e-02, 7.348e-02, -8.229e-03) * s0_3; + r += V4(-4.433e-01, -5.131e-01, -3.778e-01, 6.107e-02) * s0_4; + r += V4(-4.423e-02, 2.098e-02, 9.260e-03, 4.444e-02) * s0_5; + r += V4(-1.370e-02, 1.009e-02, 3.020e-01, 1.159e-02) * s0_6; + r += V4(-3.030e-03, 8.145e-03, -2.789e-02, -7.085e-03) * s0_7; + r += V4(2.648e-02, 4.731e-03, -1.067e-01, -4.477e-03) * s0_8; + r += V4(-1.971e-02, 8.202e-02, 4.706e-03, -6.665e-02); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.205e-01, 8.504e-02, -7.328e-02, 1.539e-01, -9.103e-03, -2.708e-02, -1.401e-01, -2.159e-01, -2.552e-01, 7.462e-02, 5.919e-02, 8.905e-02, 1.169e-01, -4.383e-03, -1.997e-01, -1.379e-01)); + r += mul(s0_1, M4(2.844e-02, 2.238e-02, 2.143e-01, -1.624e-01, 1.885e-01, 1.316e-01, -1.276e-01, -1.713e-01, 2.553e-03, -1.343e-01, 4.700e-02, 4.762e-01, -2.676e-01, 1.784e-01, -4.065e-02, 1.015e-01)); + r += mul(s0_2, M4(-4.442e-03, 3.253e-01, 2.650e-02, -2.907e-01, 2.749e-01, -3.510e-01, 8.545e-02, -2.446e-01, -1.579e-01, 9.398e-02, -4.544e-02, -9.123e-02, -2.529e-01, -2.538e-01, -2.686e-01, 2.607e-01)); + r += mul(s0_3, M4(1.518e-01, -1.515e-01, -1.597e-01, 2.163e-01, -6.933e-02, 7.220e-02, 2.114e-01, -2.227e-01, -3.743e-01, 9.056e-02, 2.612e-02, 3.036e-01, -1.583e-02, -8.293e-02, -1.068e-01, 6.201e-02)); + r += mul(s0_4, M4(-2.305e-02, 9.029e-02, -1.003e-01, -2.375e-01, -1.891e-01, 3.623e-01, -2.999e-01, -4.511e-01, 1.460e-01, -3.825e-01, 1.231e-01, 6.391e-01, -6.041e-01, 5.588e-01, -3.508e-01, -3.131e-01)); + r += mul(s0_5, M4(8.812e-02, 2.197e-01, -8.630e-03, 2.287e-02, -1.918e-01, -6.428e-01, 1.496e-01, 2.272e-01, 3.445e-02, -7.188e-03, -8.518e-02, 1.948e-01, 1.606e-01, -8.707e-01, 2.092e-02, -4.993e-01)); + r += mul(s0_6, M4(9.718e-03, 8.373e-03, 7.436e-02, -1.552e-01, 8.410e-02, -1.728e-02, -1.971e-01, 2.255e-02, -8.645e-02, 1.863e-02, -9.399e-02, -8.424e-02, -1.533e-03, 1.223e-01, 2.715e-01, -1.268e-01)); + r += mul(s0_7, M4(-4.246e-01, -1.034e-01, 3.236e-01, 5.680e-01, -1.213e-02, 1.577e-01, -9.408e-02, -7.294e-02, -6.410e-02, 4.264e-02, -8.392e-03, 2.192e-01, 1.656e-01, 4.681e-02, 9.146e-01, -6.311e-02)); + r += mul(s0_8, M4(-1.847e-01, -9.105e-02, -3.260e-02, 2.506e-01, -6.470e-02, 4.430e-02, -1.242e-02, -1.097e-01, 5.488e-02, 9.106e-02, 3.144e-02, -3.367e-05, 2.468e-01, -2.535e-01, 1.409e-01, -5.311e-01)); + r += mul(s1_0, M4(1.294e-01, 1.098e-01, 7.497e-03, 1.016e-01, 1.377e-02, -1.480e-02, -2.694e-02, -3.417e-02, -1.083e-01, -2.575e-03, 1.137e-01, -2.616e-01, -1.260e-01, -2.567e-02, -1.958e-01, 6.103e-02)); + r += mul(s1_1, M4(-1.355e-01, 1.168e-01, 2.368e-01, -2.379e-01, 8.556e-01, 1.401e-01, 3.238e-01, 2.737e-01, 8.041e-02, -1.662e-01, 9.181e-02, -3.488e-01, -1.586e-01, 1.407e-01, -1.126e-01, 1.825e-01)); + r += mul(s1_2, M4(-1.881e-02, 4.604e-01, -1.712e-02, 3.453e-02, 3.171e-01, -1.126e-01, 6.510e-02, 2.908e-01, -9.125e-02, 7.793e-02, -5.580e-02, -3.603e-01, 9.996e-02, -2.647e-01, -2.114e-01, 2.330e-01)); + r += mul(s1_3, M4(2.957e-01, -1.252e-01, -2.840e-01, 1.815e-01, -2.900e-01, 1.027e-01, 1.404e-01, -1.123e-01, -1.767e-01, 1.535e-03, -3.568e-03, -2.824e-01, 2.015e-01, -7.712e-02, -6.140e-02, 6.517e-02)); + r += mul(s1_4, M4(-2.439e-01, 7.096e-02, -2.116e-01, -1.980e-01, -3.221e-01, 2.007e-01, -4.243e-01, -5.013e-01, 1.181e-01, -3.735e-01, 1.812e-01, -5.095e-01, 3.646e-01, 4.013e-01, -8.028e-02, 1.287e-01)); + r += mul(s1_5, M4(-8.389e-02, -1.091e-01, 6.962e-02, 2.605e-01, -3.435e-03, -5.146e-01, 4.125e-01, 5.487e-01, -1.481e-01, 6.810e-02, -1.450e-01, -9.583e-02, 3.305e-01, -1.238e+00, 2.036e-01, 1.879e-01)); + r += mul(s1_6, M4(-8.033e-02, 5.944e-03, 2.453e-01, -2.971e-01, -5.652e-02, -1.251e-02, -1.449e-01, -5.344e-02, -1.377e-01, 9.383e-03, -1.862e-01, -2.528e-01, -3.825e-02, 7.296e-02, 2.373e-01, -1.935e-01)); + r += mul(s1_7, M4(-1.795e-01, 1.597e-01, 2.709e-01, -3.738e-01, 2.604e-02, 1.678e-01, -8.718e-02, -9.483e-03, -3.844e-02, 6.235e-02, -1.344e-01, 1.837e-02, -3.074e-02, 2.568e-02, 1.030e+00, 1.831e-01)); + r += mul(s1_8, M4(4.299e-02, 6.530e-03, -2.571e-02, 3.382e-01, -1.327e-01, 2.975e-02, -2.861e-02, 1.963e-01, 8.130e-04, 9.743e-02, -1.177e-02, -1.273e-01, -1.265e-01, -3.003e-01, 2.635e-01, 5.426e-02)); + r += mul(s2_0, M4(-1.538e-01, 1.580e-01, 1.392e-01, -1.077e-01, -1.228e-01, 1.853e-01, -1.010e-01, 3.144e-02, 2.203e-01, -3.309e-02, 6.819e-02, 2.708e-01, 1.720e-01, 2.635e-01, -1.290e-01, -2.932e-01)); + r += mul(s2_1, M4(1.615e-01, -1.424e-01, -2.346e-01, -1.008e-01, 1.386e-01, -2.281e-01, -1.313e-01, -5.902e-02, -3.376e-02, 1.925e-01, -1.172e-01, 7.865e-02, 2.112e-01, -7.280e-02, -1.953e-01, -1.198e-02)); + r += mul(s2_2, M4(1.280e-01, -1.353e-01, 1.251e-01, 3.212e-02, -1.144e-01, -1.492e-01, -1.499e-01, 2.211e-01, 1.307e-01, 1.336e-01, 1.977e-01, -1.429e-02, -5.395e-02, -2.772e-02, -3.214e-01, -1.907e-01)); + r += mul(s2_3, M4(-2.703e-01, 3.122e-01, 1.951e-01, -2.005e-01, 1.463e-01, 3.000e-01, 1.058e-01, 8.352e-02, 1.567e-01, -1.256e-01, -1.854e-01, -2.018e-01, 3.248e-01, 8.780e-02, 1.586e-01, -9.757e-03)); + r += mul(s2_4, M4(3.941e-02, -1.430e-01, 1.023e-01, 2.878e-01, 8.414e-02, 1.385e-01, 8.032e-02, -6.330e-02, -1.020e-01, 2.731e-01, -6.877e-02, -3.492e-01, 3.758e-01, -7.526e-02, 4.955e-01, -5.595e-01)); + r += mul(s2_5, M4(2.684e-01, -1.924e-02, -2.975e-02, 7.205e-01, 6.611e-02, -1.645e-01, 1.267e-01, 6.066e-02, 1.695e-01, -4.367e-01, -1.450e-01, -4.074e-02, 4.469e-01, -7.176e-03, 4.177e-01, -4.565e-01)); + r += mul(s2_6, M4(-1.843e-01, 2.522e-01, 3.324e-01, -1.821e-01, -1.327e-01, 1.182e-01, 1.158e-01, -2.494e-01, -6.459e-03, -6.606e-03, 1.333e-01, 2.229e-01, 2.481e-01, -2.018e-01, 2.456e-01, 2.351e-01)); + r += mul(s2_7, M4(-6.894e-03, -2.822e-01, -1.863e-01, -2.252e-01, 6.755e-02, -1.766e-01, 8.884e-02, -2.720e-03, -4.431e-02, -2.119e-02, 2.876e-01, -5.268e-01, -3.635e-01, -1.001e-01, -8.433e-01, 5.160e-01)); + r += mul(s2_8, M4(-1.786e-01, 2.208e-01, 4.289e-01, 1.663e-01, -2.341e-01, 8.148e-03, -7.557e-02, 7.817e-02, -1.340e-01, -2.341e-01, 3.123e-02, 1.120e-01, -7.753e-01, 2.056e-01, -2.926e-01, -1.222e-01)); + r += mul(s3_0, M4(-4.903e-02, 1.377e-01, 6.984e-02, -1.053e-02, -5.115e-01, 2.891e-01, -4.612e-01, -6.693e-01, 4.752e-02, -5.287e-02, -2.183e-02, 4.134e-01, 1.073e-02, 2.383e-01, -2.142e-01, 1.384e-01)); + r += mul(s3_1, M4(1.680e-01, -1.307e-01, -1.038e-01, -2.130e-02, -1.231e+00, -2.602e-01, -5.456e-01, 3.295e-01, -5.588e-02, 1.505e-01, -4.784e-02, -1.493e-01, 1.202e-01, -2.349e-01, -1.452e-01, -5.111e-02)); + r += mul(s3_2, M4(-8.858e-02, -1.293e-01, 9.441e-02, -1.295e-01, -3.373e-01, -1.841e-01, -1.818e-01, 1.570e+00, -8.336e-02, 2.012e-01, 1.362e-01, 1.830e-01, -6.053e-02, -1.725e-03, -2.011e-01, -1.021e-01)); + r += mul(s3_3, M4(-2.017e-01, 3.505e-01, 3.541e-02, 2.044e-01, -3.839e-01, 5.124e-01, 1.104e-01, 1.311e-01, 1.022e-01, -1.111e-01, -2.883e-01, 1.086e-01, 9.932e-02, 1.308e-01, 2.954e-01, -1.416e-02)); + r += mul(s3_4, M4(6.088e-02, -4.532e-02, -1.302e-01, -1.067e-01, -4.196e+00, 7.383e-01, -2.786e-01, -2.053e+00, -3.758e-01, 2.955e-01, -1.898e-01, 1.875e-01, 1.263e-01, 9.931e-03, 1.016e-01, 5.201e-02)); + r += mul(s3_5, M4(9.722e-03, -5.478e-02, -1.823e-01, -3.983e-02, -2.434e+00, -4.700e-01, 4.168e-01, 3.938e-01, 1.251e-01, -2.933e-01, -2.054e-02, 8.827e-02, 2.048e-02, 6.212e-02, 1.448e-01, 1.042e-01)); + r += mul(s3_6, M4(-1.605e-02, 1.851e-01, 2.427e-01, 4.894e-02, -6.032e-01, -3.413e-02, 4.158e-01, 6.903e-01, -1.865e-02, -1.318e-02, 1.003e-01, 3.193e-01, 4.503e-02, 1.880e-01, -4.608e-02, -3.137e-01)); + r += mul(s3_7, M4(-4.125e-02, -1.494e-01, 8.853e-01, -1.540e-01, -2.445e-01, 2.292e-01, 1.684e+00, 1.098e+00, 5.576e-02, -8.241e-02, 2.507e-01, -1.086e-01, 1.392e-01, -2.115e-01, -2.600e-01, 9.268e-02)); + r += mul(s3_8, M4(5.677e-02, 9.206e-02, 5.863e-02, 5.663e-02, -2.019e+00, -1.006e-01, -1.769e-01, -3.617e-01, 1.293e-02, -2.766e-01, 2.843e-02, 3.331e-01, -2.316e-01, -1.762e-01, -6.013e-03, -2.482e-02)); + r += V4(3.430e-02, -1.031e-02, -1.631e-02, -3.189e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.260e-02, 1.675e-01, 8.130e-02, -2.153e-01, -1.987e-01, -9.443e-02, 3.512e-01, 2.289e-02, 9.481e-02, -1.921e-01, -3.818e-01, 1.373e-01, -9.032e-02, 7.892e-02, 1.392e-01, -6.033e-02)); + r += mul(s0_1, M4(-8.203e-02, -1.015e-01, -1.313e-02, -5.337e-02, -2.948e-01, -2.678e-01, -2.321e-01, -5.995e-01, 1.364e-01, 1.030e-01, 1.546e-01, -1.179e-02, 1.996e-01, 2.244e-01, -2.304e-01, -1.304e-02)); + r += mul(s0_2, M4(-2.319e-02, -2.236e-02, 3.976e-02, 1.804e-01, 6.474e-02, 1.315e-01, -1.456e-02, -1.538e-01, 3.061e-02, -1.998e-02, -1.918e-02, -8.662e-02, -1.980e-01, -1.596e-01, -4.624e-01, -3.728e-01)); + r += mul(s0_3, M4(-3.171e-03, -2.887e-02, 3.107e-01, -8.532e-02, 1.489e-02, -2.798e-01, -2.458e-02, 2.922e-01, 5.196e-02, 2.333e-02, -4.100e-01, 3.851e-01, 8.566e-02, 1.655e-01, 3.680e-01, -3.572e-01)); + r += mul(s0_4, M4(4.618e-02, -3.100e-02, -1.849e-01, 2.228e-02, -2.182e-01, -5.806e-01, -6.298e-02, 2.421e-01, 4.266e-01, 7.738e-02, 4.856e-03, -1.191e-01, 3.469e-01, -8.683e-02, -2.397e-01, 6.512e-02)); + r += mul(s0_5, M4(8.363e-02, -9.745e-02, 2.398e-01, -1.335e-01, -1.585e-01, -1.161e-02, 2.482e-02, 1.319e-03, -4.696e-02, -6.675e-02, -7.519e-02, 1.125e-01, -1.199e-01, -9.094e-03, -2.590e-01, -8.812e-01)); + r += mul(s0_6, M4(7.745e-02, 3.414e-02, 6.378e-02, -8.388e-02, 4.456e-02, 1.354e-02, -1.138e-02, 1.131e-01, 2.361e-01, 1.828e-01, -2.135e-01, -1.100e-02, 1.683e-01, 2.134e-01, 1.832e-01, 8.420e-02)); + r += mul(s0_7, M4(-3.223e-01, -4.870e-02, -1.457e-01, 1.996e-01, -1.632e-01, -1.811e-01, -1.625e-01, 4.046e-02, -8.959e-02, 1.432e-01, -2.360e-02, -9.415e-02, -1.547e-01, 1.379e-01, 5.098e-01, -4.069e-01)); + r += mul(s0_8, M4(1.568e-01, -2.510e-02, -9.894e-02, 1.124e-01, -1.372e-01, 5.952e-03, 4.501e-02, 9.591e-03, 1.430e-01, 6.422e-02, -1.412e-03, 1.042e-02, 4.601e-02, -5.133e-02, -7.936e-02, -1.621e-01)); + r += mul(s1_0, M4(1.380e-01, 1.774e-01, 2.958e-01, -2.044e-01, -2.085e-01, 7.192e-03, -7.903e-02, 6.119e-02, -3.542e-02, -1.060e-01, -1.832e-01, 3.603e-01, -3.854e-02, 5.092e-02, -1.092e-01, -2.074e-01)); + r += mul(s1_1, M4(-5.638e-02, -1.659e-01, -1.006e-02, 5.355e-02, -2.243e-01, 3.533e-01, -2.130e-01, 6.480e-02, 4.462e-02, 1.065e-01, 1.598e-01, 5.025e-03, -3.810e-02, 1.012e-01, 2.123e-02, 2.124e-01)); + r += mul(s1_2, M4(5.207e-02, -1.428e-01, 1.745e-01, 2.563e-01, 4.058e-01, 5.320e-02, 3.527e-03, -4.664e-02, -1.641e-03, -2.830e-02, 1.453e-02, 1.169e-01, -5.840e-01, -1.545e-01, 3.880e-01, 1.250e-01)); + r += mul(s1_3, M4(-2.089e-01, 3.070e-02, 3.770e-01, -2.868e-01, -1.965e-01, -2.499e-01, -2.145e-01, 5.348e-02, -1.201e-01, -3.454e-01, -5.723e-01, 4.313e-01, -7.068e-02, -6.358e-02, -2.426e-02, -2.841e-01)); + r += mul(s1_4, M4(1.315e-01, 2.464e-01, -2.505e-01, -1.589e-01, 4.124e-01, 4.860e-01, -2.493e-01, 1.201e-01, -1.304e-01, -1.620e-01, 2.228e-01, 4.485e-02, 6.945e-02, -2.261e-01, -8.190e-04, 5.678e-01)); + r += mul(s1_5, M4(3.529e-01, 1.800e-02, -9.794e-02, -1.160e-01, 7.052e-01, 4.176e-01, 5.822e-02, -5.300e-02, -1.144e-01, -1.890e-01, 1.337e-01, 1.163e-01, -5.024e-01, 9.977e-01, 1.831e-01, 2.166e-02)); + r += mul(s1_6, M4(-1.239e-01, 1.465e-01, 3.700e-01, -1.638e-01, -1.022e-01, -3.216e-02, -2.412e-02, -2.505e-02, 5.450e-02, -1.325e-02, -2.760e-01, 5.219e-02, -5.604e-02, 3.602e-02, -1.026e-01, 4.063e-02)); + r += mul(s1_7, M4(1.669e-01, 2.580e-01, -2.923e-01, -2.497e-01, 1.135e-01, -1.599e-01, -2.419e-01, -1.202e-01, -3.903e-01, -2.141e-01, 9.642e-02, -6.096e-02, -6.762e-01, 5.614e-01, 3.076e-01, -4.187e-01)); + r += mul(s1_8, M4(5.456e-02, -6.641e-02, -3.839e-01, 8.629e-02, 1.149e-01, 1.204e-02, -2.509e-02, -1.413e-03, -1.329e-02, -5.670e-02, -6.186e-02, 5.108e-02, 3.592e-02, 4.563e-01, -7.450e-02, -2.259e-01)); + r += mul(s2_0, M4(1.013e-01, -2.126e-02, -1.260e-01, 8.480e-03, -3.292e-02, 6.069e-04, 4.154e-02, 5.578e-02, 1.586e-02, 8.252e-02, 1.237e-01, -1.312e-01, 1.489e-01, 2.561e-01, -9.917e-02, -1.060e-01)); + r += mul(s2_1, M4(-1.285e-01, -8.314e-02, 1.521e-02, 1.037e-01, -1.021e-02, 7.112e-02, -2.319e-02, 7.051e-04, -1.101e-01, -1.896e-01, -2.458e-01, -7.399e-02, -4.133e-02, 1.606e-01, -1.511e-01, -2.425e-01)); + r += mul(s2_2, M4(7.543e-02, 9.235e-02, 2.139e-01, 2.879e-01, 9.583e-02, 4.372e-02, -8.231e-02, 2.498e-01, 1.241e-01, 1.377e-02, 2.380e-01, 2.586e-02, -1.926e-01, -1.406e-01, -3.627e-01, -8.414e-02)); + r += mul(s2_3, M4(9.655e-03, -9.581e-02, -6.071e-02, 2.231e-01, -1.148e-01, -3.513e-02, -2.013e-02, -1.094e-01, -1.606e-01, 9.180e-02, 3.498e-01, -2.726e-01, -7.696e-03, -4.007e-01, -8.497e-02, -6.989e-01)); + r += mul(s2_4, M4(4.965e-03, -1.346e-01, -4.517e-02, 2.043e-01, -1.348e-01, 1.451e-01, 8.113e-02, -8.530e-02, -1.414e-01, 7.261e-02, -2.368e-01, 1.601e-01, -2.438e-02, -2.554e-01, 4.057e-01, -2.224e-01)); + r += mul(s2_5, M4(-8.716e-02, 1.496e-01, -4.429e-02, 6.451e-01, -9.547e-03, -3.189e-02, -1.096e-01, -5.416e-02, -5.032e-01, 1.331e-01, 2.389e-02, 1.028e-01, -3.186e-01, -2.524e-01, 2.663e-02, -9.995e-03)); + r += mul(s2_6, M4(-2.465e-01, 1.585e-01, 3.196e-01, -9.098e-02, 2.765e-02, -1.793e-01, 1.519e-01, -9.565e-04, -1.160e-01, -3.035e-02, -1.082e-01, 3.172e-02, 5.502e-01, -6.251e-01, -4.487e-01, 1.932e-01)); + r += mul(s2_7, M4(-5.017e-01, -5.180e-01, -2.682e-01, -4.715e-01, 1.958e-02, -7.007e-02, -3.332e-02, -8.389e-02, -1.135e-01, -2.956e-02, 1.994e-01, 2.315e-02, -2.553e-01, -3.153e-03, 4.275e-01, 1.669e+00)); + r += mul(s2_8, M4(1.400e-01, 6.775e-01, 5.287e-02, 2.007e-02, 1.213e-01, -1.460e-03, -2.313e-02, 1.282e-01, -8.355e-02, 2.399e-01, -5.277e-02, -1.499e-01, 7.246e-02, -2.553e-02, 2.185e-01, 8.662e-01)); + r += mul(s3_0, M4(3.069e-02, -3.668e-02, -3.646e-02, 1.140e-01, -7.882e-02, 2.759e-01, 9.170e-01, 2.779e-01, 1.459e-01, 3.766e-02, -1.214e-01, 5.718e-03, -3.323e-02, 9.705e-02, -1.282e-02, -1.401e-01)); + r += mul(s3_1, M4(-1.405e-02, 2.809e-02, 1.466e-01, -1.286e-01, 4.754e-01, 8.076e-01, 5.775e-02, -5.403e-01, 1.919e-01, -2.015e-01, -1.976e-01, -8.544e-02, -8.431e-02, 9.302e-02, 6.560e-02, 2.011e-02)); + r += mul(s3_2, M4(2.107e-01, 2.334e-02, -2.591e-01, -1.023e-01, 6.461e-01, 1.138e+00, 3.917e-01, 2.270e-01, 4.023e-01, 6.135e-02, 4.125e-02, -5.551e-02, 1.871e-02, -1.344e-01, -1.534e-01, 1.216e-01)); + r += mul(s3_3, M4(8.077e-02, -1.149e-01, 6.733e-02, -9.044e-03, -6.431e-02, -1.755e-02, 2.617e+00, 5.203e-01, 8.910e-02, 9.642e-02, 3.720e-01, -2.326e-01, -1.142e-01, -4.017e-02, 2.351e-01, -1.062e-01)); + r += mul(s3_4, M4(-2.427e-01, -4.425e-03, 4.260e-01, -6.273e-02, 4.224e+00, -2.047e+00, -1.911e+00, 2.329e+00, 2.987e-01, -3.286e-01, -1.115e-01, 2.053e-01, -5.309e-02, -8.751e-02, -1.275e-02, -2.105e-01)); + r += mul(s3_5, M4(-1.413e-02, -4.404e-01, -1.525e-01, -1.703e-01, -9.999e-01, 5.276e-01, 4.779e-01, -5.145e-01, 4.772e-01, 2.730e-02, -7.651e-02, -2.235e-01, -1.122e-01, -1.686e-01, 9.595e-02, -1.169e-01)); + r += mul(s3_6, M4(-1.162e-01, 3.109e-01, -2.686e-01, -1.492e-01, 2.122e-01, 6.911e-01, 7.412e-01, 3.675e-02, 1.420e-01, -3.979e-02, -3.526e-02, -1.170e-01, 2.192e-01, 6.369e-02, 2.568e-01, 1.606e-02)); + r += mul(s3_7, M4(-2.482e-02, 6.355e-01, 4.230e-01, -4.331e-01, -1.462e+00, -9.944e-01, 1.154e+00, 8.760e-01, 3.625e-01, 2.127e-01, 3.382e-01, 6.009e-02, 1.431e-01, 9.892e-02, -2.409e-01, 4.223e-02)); + r += mul(s3_8, M4(-1.832e-02, 7.811e-02, -1.928e-02, 1.448e-01, -1.288e+00, 1.805e-01, 6.324e-01, -2.704e-02, 6.456e-02, -6.364e-02, 4.971e-02, -6.535e-03, 1.766e-01, 5.142e-02, -1.375e-01, 2.532e-01)); + r += V4(8.007e-03, 2.570e-02, 2.487e-03, -2.496e-02); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.802e-01, -3.301e-02, -1.047e-01, 6.427e-02, 1.357e-02, -8.015e-02, 7.763e-02, -9.646e-02, 1.136e-01, -1.443e-01, -3.950e-02, 2.744e-01, 8.414e-03, -1.005e-01, -1.683e-01, -5.766e-02)); + r += mul(s0_1, M4(2.907e-01, 1.339e-01, -7.005e-02, 9.074e-02, -2.491e-03, 6.498e-02, 1.121e-01, -9.272e-02, 3.415e-01, 1.949e-01, -2.613e-01, -2.328e-01, 1.311e-01, 1.285e-01, 1.685e-02, -4.780e-02)); + r += mul(s0_2, M4(1.671e-01, -2.228e-02, -5.777e-02, -5.853e-02, 1.243e-02, -3.269e-02, 8.757e-03, -1.478e-01, -4.190e-02, 3.164e-02, 2.922e-01, -3.017e-01, -6.631e-02, 5.380e-02, -2.750e-02, -7.771e-02)); + r += mul(s0_3, M4(-2.454e-02, 2.148e-01, -1.116e-01, -1.125e-01, -1.792e-01, -7.021e-01, -2.183e-01, 2.920e-01, -1.698e-01, 1.827e-01, -6.779e-02, 9.333e-02, -2.153e-01, 2.441e-01, 9.794e-02, -2.729e-01)); + r += mul(s0_4, M4(-6.750e-02, 1.324e-01, -5.087e-02, 2.746e-01, 1.579e-01, -1.909e-01, -7.631e-01, -4.744e-01, -1.732e-01, -2.741e-01, 4.145e-02, -2.124e-01, 7.946e-02, -1.579e-01, 2.856e-01, 5.090e-02)); + r += mul(s0_5, M4(8.392e-02, -1.504e-01, 2.815e-01, -1.174e-01, 3.942e-02, 1.918e-02, 1.561e-01, -1.457e-01, -5.976e-02, 1.230e-01, -2.539e-01, -1.965e-01, 1.869e-01, -1.795e-01, -1.283e-01, -3.447e-02)); + r += mul(s0_6, M4(-3.547e-03, -6.576e-03, -5.087e-02, 3.466e-02, -3.130e-03, -3.176e-01, 8.737e-02, 4.018e-02, -6.489e-02, -1.580e-03, -8.784e-03, -4.500e-02, 2.343e-03, 5.945e-02, -5.201e-02, -3.127e-02)); + r += mul(s0_7, M4(-3.546e-02, 1.145e-01, -4.773e-02, 8.280e-02, 6.746e-03, -1.036e-01, -6.616e-02, -1.224e-01, 7.156e-02, -1.941e-01, 9.307e-02, -3.567e-02, -2.215e-01, 2.437e-01, -5.542e-04, 1.208e-01)); + r += mul(s0_8, M4(-1.115e-02, -4.687e-02, -3.210e-02, -1.470e-01, -4.609e-02, 4.657e-02, -6.476e-02, -1.372e-01, -4.956e-03, 1.024e-01, -2.349e-01, -8.472e-02, -2.757e-02, -1.707e-02, 2.065e-01, 1.863e-02)); + r += mul(s1_0, M4(3.728e-02, -7.100e-02, -4.937e-02, 6.239e-02, -7.377e-03, -3.033e-02, 1.675e-01, -1.863e-02, -2.631e-02, -9.633e-02, -1.130e-01, -1.201e-01, 1.414e-01, -1.737e-01, -8.031e-02, -6.951e-02)); + r += mul(s1_1, M4(-3.703e-02, 4.012e-02, -2.289e-02, 3.332e-02, 2.161e-02, 8.828e-02, 5.544e-02, 1.017e-01, 3.684e-01, 3.149e-01, 3.662e-01, 4.298e-02, 1.966e-01, -2.697e-02, 2.216e-02, 7.540e-02)); + r += mul(s1_2, M4(-4.974e-02, -3.826e-02, -2.810e-02, -8.318e-02, 3.356e-02, -7.605e-02, -1.087e-01, 1.987e-02, -1.153e-01, -1.039e-01, -5.868e-02, -3.313e-02, -1.750e-02, 3.884e-03, -9.170e-02, -1.011e-01)); + r += mul(s1_3, M4(2.119e-01, -1.340e-01, -3.650e-02, 2.219e-01, 3.634e-01, 3.474e-01, 2.302e-01, 7.494e-02, -2.253e-01, 1.239e-01, -6.032e-02, 1.293e-01, 9.583e-02, 4.424e-02, -3.920e-02, -1.870e-01)); + r += mul(s1_4, M4(-2.664e-01, 8.462e-02, -4.745e-01, 1.985e-01, 2.803e-01, 7.429e-02, 7.814e-01, 4.658e-01, 3.661e-01, -2.319e-02, 3.324e-01, 2.860e-01, 3.178e-01, 9.301e-02, 1.316e-01, 4.547e-02)); + r += mul(s1_5, M4(5.369e-02, 6.912e-02, 2.659e-01, -1.491e-01, 4.462e-02, -4.823e-02, 1.130e-01, 1.710e-02, -7.604e-02, -7.003e-02, 3.093e-01, 2.537e-01, 2.466e-01, -1.039e-01, 2.413e-02, -1.256e-01)); + r += mul(s1_6, M4(-1.188e-01, 1.026e-01, 4.215e-02, -9.677e-02, 2.443e-03, 1.957e-01, 2.961e-02, -5.553e-02, -3.488e-02, 2.515e-02, -4.840e-03, 1.814e-02, 9.644e-02, -8.802e-02, 3.516e-03, -2.940e-03)); + r += mul(s1_7, M4(-1.792e-01, 1.391e-01, 1.322e-02, -1.514e-02, -2.173e-01, 1.743e-01, 1.530e-01, 5.286e-02, -8.655e-02, 2.541e-01, 6.282e-02, 1.167e-01, 9.664e-02, 2.304e-01, -1.538e-01, -1.298e-01)); + r += mul(s1_8, M4(-1.720e-01, 4.693e-02, 2.790e-01, 2.187e-02, -4.386e-02, 7.714e-03, 9.800e-02, 6.484e-03, -5.497e-02, 1.216e-01, 3.924e-02, 5.162e-02, 1.403e-01, -5.364e-03, -6.795e-03, -6.163e-02)); + r += mul(s2_0, M4(2.905e-01, -3.799e-02, 1.332e-01, 2.496e-02, 7.202e-02, -3.659e-01, -2.940e-02, -1.028e-03, -1.221e-01, 1.147e-01, 3.613e-02, 9.125e-02, -8.760e-03, 1.489e-02, -9.652e-02, 4.452e-03)); + r += mul(s2_1, M4(4.027e-01, -2.178e-01, -8.478e-02, 2.903e-01, 2.463e-02, 9.527e-03, -2.835e-01, 2.066e-01, -6.698e-02, -2.653e-01, -6.667e-02, 4.320e-02, -2.610e-01, -1.351e-01, 7.826e-02, -5.429e-02)); + r += mul(s2_2, M4(-1.249e-01, 4.376e-02, -6.245e-02, 1.702e-01, -5.731e-02, 8.022e-02, -1.335e-01, 1.528e-01, -2.969e-02, 1.062e-01, -1.303e-01, 1.226e-01, 2.030e-02, 5.205e-02, -1.877e-01, 4.309e-02)); + r += mul(s2_3, M4(-6.329e-02, -1.286e-01, -7.222e-02, 5.592e-03, -3.023e-02, 9.502e-02, -4.077e-02, -2.299e-01, -1.038e-01, -5.742e-02, -5.106e-04, 5.143e-02, 3.098e-02, -1.235e-01, 1.987e-02, 1.477e-02)); + r += mul(s2_4, M4(1.113e-01, -1.761e-01, 5.038e-02, -1.304e-01, 3.668e-01, -3.430e-01, 2.169e-01, 3.877e-01, -3.750e-02, 2.473e-01, 3.416e-02, 2.184e-01, 5.168e-01, -7.132e-02, 3.818e-01, -1.508e-01)); + r += mul(s2_5, M4(1.479e-01, -8.656e-02, -1.700e-01, 3.874e-01, 2.286e-02, -8.854e-02, 3.305e-02, -4.668e-03, -1.481e-01, 5.115e-02, 2.686e-01, 4.113e-01, -3.740e-01, -2.013e-01, 9.838e-04, 3.008e-01)); + r += mul(s2_6, M4(3.428e-01, -3.200e-01, 7.593e-02, 1.911e-01, 1.219e-01, 1.211e-02, -5.694e-02, -5.767e-02, 3.119e-02, -7.609e-02, 6.471e-02, 1.215e-01, -2.793e-04, 1.650e-02, 7.190e-03, -4.468e-02)); + r += mul(s2_7, M4(3.970e-01, -3.192e-01, -5.639e-02, 8.182e-02, -2.831e-02, 4.036e-02, 7.004e-02, 1.095e-01, -3.655e-02, 2.443e-01, 5.606e-02, -4.974e-02, 9.825e-02, 1.158e-01, -5.104e-02, -2.986e-02)); + r += mul(s2_8, M4(1.440e-01, 5.504e-02, -2.020e-01, 2.618e-03, -1.098e-02, -3.678e-02, 7.661e-02, 5.652e-02, -7.426e-02, 5.461e-02, 4.239e-01, 2.093e-01, 9.316e-03, -3.679e-02, 6.108e-02, 2.036e-01)); + r += mul(s3_0, M4(1.806e-02, 2.233e-02, 5.056e-02, 1.758e-01, 3.566e-02, -1.383e-01, 5.349e-02, 1.066e-01, 3.314e-02, -1.258e-01, -2.885e-02, -6.648e-02, -6.860e-03, -2.283e-02, -1.052e-01, -1.623e-02)); + r += mul(s3_1, M4(7.369e-02, -3.141e-02, 3.877e-03, 8.113e-03, -1.773e-01, 5.122e-03, -3.198e-01, 9.005e-02, 7.291e-02, -1.519e-01, -1.501e-01, -8.202e-02, -4.729e-02, -2.877e-02, -4.056e-02, 7.599e-02)); + r += mul(s3_2, M4(1.282e-01, 2.477e-03, 6.185e-02, 3.967e-02, -1.343e-01, 8.884e-02, 5.299e-02, -7.324e-02, 1.842e-01, -3.053e-02, -1.335e-01, -6.790e-03, -8.128e-02, 6.665e-02, 1.583e-03, -5.358e-02)); + r += mul(s3_3, M4(1.135e-01, 9.360e-03, 1.646e-01, 1.844e-01, 1.104e-02, 7.072e-02, -9.632e-02, -1.169e-01, -1.458e-01, 2.540e-02, -5.132e-02, -1.627e-01, -1.066e-01, -4.819e-02, -4.340e-02, -5.074e-02)); + r += mul(s3_4, M4(-1.198e-01, -7.965e-02, -2.989e-01, -4.946e-01, -1.666e-02, -2.136e-01, -3.575e-02, 1.351e-01, -8.546e-02, 2.553e-02, -7.878e-02, -3.233e-01, -2.955e-01, -7.765e-02, 1.450e-01, -2.114e-01)); + r += mul(s3_5, M4(-7.593e-02, -1.849e-03, -1.688e-01, 3.626e-02, 4.408e-03, 4.014e-02, -1.401e-01, -2.239e-01, 9.538e-02, -2.310e-01, 2.831e-02, 5.065e-02, 1.135e-01, 2.542e-02, -4.365e-01, 4.393e-02)); + r += mul(s3_6, M4(-5.217e-02, -1.327e-02, -1.851e-02, 2.806e-02, 4.648e-02, -9.047e-04, 2.961e-02, -2.922e-02, 6.360e-02, -3.494e-02, 2.573e-02, 1.309e-02, -2.512e-03, -4.086e-02, -2.086e-03, -6.018e-02)); + r += mul(s3_7, M4(9.887e-02, -9.515e-03, 1.306e-01, 5.290e-02, 1.832e-01, -2.549e-01, -4.640e-02, -1.256e-01, 4.915e-02, -5.163e-02, 3.044e-02, -9.871e-02, 8.168e-03, -7.112e-02, -5.743e-02, 3.687e-02)); + r += mul(s3_8, M4(-6.440e-02, 2.530e-02, -2.166e-03, -4.680e-02, 8.009e-02, -6.634e-02, -1.390e-01, -2.524e-02, 6.524e-02, -1.120e-01, -4.252e-02, -8.413e-03, -2.017e-02, 1.444e-02, -4.483e-02, 4.690e-02)); + r += V4(3.009e-03, -1.445e-03, 8.191e-03, -7.852e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.005e-01, -6.367e-02, 4.428e-02, 1.687e-02, -9.639e-02, -1.209e-01, -1.374e-02, 4.932e-02, -9.949e-02, -2.569e-01, 1.199e-01, 1.077e-02, 5.110e-02, -1.129e-01, 6.104e-02, -4.656e-03)); + r += mul(s0_1, M4(-2.156e-01, 8.505e-02, 4.815e-04, -1.042e-01, -2.724e-01, -1.870e-01, 3.876e-02, 7.840e-02, -4.018e-01, -8.239e-01, 2.611e-01, -3.623e-01, -6.999e-03, 1.848e-02, 6.095e-02, -2.318e-02)); + r += mul(s0_2, M4(-2.195e-01, -6.727e-02, 7.111e-02, 5.119e-02, 7.396e-02, 1.116e-02, -1.261e-02, 9.531e-02, -3.892e-01, 1.430e-01, -9.840e-02, -2.423e-01, 2.669e-01, 3.009e-02, -2.478e-02, 1.168e-01)); + r += mul(s0_3, M4(-4.344e-01, 8.202e-02, 9.272e-03, -8.384e-02, -8.136e-02, -4.359e-01, 2.361e-01, -2.183e-01, 4.609e-02, -2.144e-02, 9.525e-03, -7.197e-02, -9.339e-02, 1.927e-01, -1.687e-02, 3.193e-02)); + r += mul(s0_4, M4(4.702e-01, 1.415e-04, 1.097e-01, 2.415e-01, 1.899e-01, -7.324e-01, -4.745e-03, -1.237e-01, -2.043e-01, 2.674e-02, 6.899e-01, 8.700e-02, 5.083e-02, 2.271e-01, 4.884e-02, 3.767e-01)); + r += mul(s0_5, M4(6.758e-02, -4.638e-02, 9.477e-02, -8.290e-02, -1.994e-01, 1.090e-01, -5.148e-02, -1.470e-01, 7.433e-02, 3.404e-01, 1.020e-01, -8.353e-02, 1.793e-01, -1.368e-01, 6.375e-02, 5.993e-02)); + r += mul(s0_6, M4(1.596e-02, 3.589e-02, 1.177e-02, 1.541e-01, -1.159e-01, -1.621e-02, 2.451e-01, 2.767e-01, -3.754e-04, 4.995e-02, -6.760e-02, -9.945e-02, 4.017e-01, 4.413e-02, 2.189e-02, 4.126e-02)); + r += mul(s0_7, M4(1.635e-01, -1.853e-01, -1.823e-01, -1.003e-01, -4.884e-02, 1.686e-01, 7.826e-02, 5.419e-01, -1.017e-01, 7.007e-02, 2.084e-01, 2.030e-01, 5.150e-01, -1.861e-01, -3.037e-01, -3.846e-01)); + r += mul(s0_8, M4(1.162e-01, 9.675e-02, -9.807e-02, 7.794e-02, 1.154e-01, 7.680e-02, 7.823e-02, 1.665e-01, 1.414e-01, 4.509e-02, -1.327e-02, 1.752e-01, -2.721e-01, -9.636e-04, 2.198e-02, -9.405e-02)); + r += mul(s1_0, M4(-3.554e-02, 7.673e-02, -1.735e-02, 3.910e-02, -9.934e-02, 1.798e-01, -4.244e-02, -2.008e-02, -1.586e-01, 7.918e-02, 6.812e-02, 1.784e-01, -2.173e-01, 8.736e-02, -3.130e-02, -1.487e-02)); + r += mul(s1_1, M4(1.142e-01, 2.330e-02, -7.096e-03, 5.291e-02, -3.702e-01, 2.102e-01, 7.156e-02, -1.416e-01, 1.017e-01, 3.888e-01, -5.335e-02, 9.686e-02, -1.093e-01, -1.631e-02, -2.884e-03, -4.091e-02)); + r += mul(s1_2, M4(4.795e-02, 4.423e-03, 1.494e-02, 2.666e-02, 1.261e-01, -7.251e-02, 2.103e-02, 1.095e-01, 2.166e-01, -1.249e-01, 8.981e-03, 1.792e-01, -3.697e-02, 6.864e-03, -1.141e-02, 2.430e-02)); + r += mul(s1_3, M4(-1.206e-01, 1.584e-03, -1.789e-02, -1.335e-02, 2.398e-01, 8.681e-01, -1.241e-01, -4.454e-02, -7.396e-02, 1.759e-02, -9.138e-02, 1.573e-01, -2.025e-01, 8.569e-02, 2.132e-02, 9.791e-02)); + r += mul(s1_4, M4(-4.834e-02, -7.974e-01, 2.858e-01, -2.441e-01, 4.163e-01, -1.650e-01, -1.897e-01, 1.309e-01, 4.031e-02, -8.242e-02, 3.338e-01, 3.567e-01, -1.532e-01, 2.807e-01, -7.324e-02, 5.093e-03)); + r += mul(s1_5, M4(-1.538e-01, 9.244e-02, -7.570e-02, -4.333e-02, -1.407e-01, -4.201e-02, -4.186e-02, -1.603e-01, -2.031e-01, 6.309e-02, -8.191e-02, 9.121e-02, -8.138e-02, -4.037e-02, 3.793e-02, 4.240e-02)); + r += mul(s1_6, M4(1.780e-01, 1.059e-01, -5.233e-03, 1.087e-01, 1.808e-01, -1.409e-01, 1.162e-02, -1.312e-01, 6.866e-02, 1.401e-02, 6.420e-02, 5.614e-02, -6.830e-02, 1.731e-02, 5.889e-02, 2.257e-02)); + r += mul(s1_7, M4(2.057e-01, -2.093e-02, -1.741e-01, 9.891e-02, -3.673e-02, 3.314e-02, -2.223e-01, -3.177e-01, 2.374e-01, -5.871e-02, -5.086e-02, -9.418e-02, -1.935e-02, -1.902e-02, -1.255e-01, -2.744e-01)); + r += mul(s1_8, M4(1.654e-01, 7.328e-02, 2.874e-02, 1.256e-01, -2.608e-01, 1.926e-03, 4.500e-02, -7.882e-02, -1.035e-02, -3.478e-02, -1.061e-01, -8.474e-02, -2.438e-01, -6.889e-02, -7.579e-02, -1.871e-01)); + r += mul(s2_0, M4(6.493e-02, 1.357e-01, -6.197e-02, -5.055e-02, 2.568e-01, -5.699e-02, -1.266e-01, -1.411e-02, 2.936e-02, -5.234e-02, -5.882e-03, -8.014e-02, -5.334e-02, -8.555e-02, 5.632e-02, 8.296e-03)); + r += mul(s2_1, M4(-3.582e-01, 2.351e-01, -1.636e-01, 2.172e-01, -1.840e-01, 9.838e-02, -7.565e-02, 1.535e-01, 8.151e-02, 3.002e-02, 1.149e-01, 1.180e-01, 1.323e-01, -7.682e-03, 5.013e-02, -2.190e-02)); + r += mul(s2_2, M4(-1.957e-01, -5.823e-02, -1.131e-01, -7.025e-02, 3.355e-01, 1.378e-01, -2.046e-01, 2.575e-01, 1.663e-01, 2.567e-02, -3.703e-02, -9.489e-02, -6.431e-02, -6.700e-02, 9.598e-02, 4.460e-03)); + r += mul(s2_3, M4(-1.522e-01, 1.335e-01, -2.140e-01, 3.368e-02, -5.076e-02, 2.412e-01, 6.141e-03, 2.456e-02, -9.105e-03, 1.014e-02, -1.056e-02, 1.368e-01, 8.030e-02, -2.874e-02, -7.499e-02, -2.675e-02)); + r += mul(s2_4, M4(2.115e-02, -6.849e-02, -8.528e-02, -3.270e-01, 2.112e-02, 7.309e-02, -3.852e-02, 2.604e-01, 1.772e-01, 4.115e-01, -2.443e-01, 3.100e-01, 3.139e-01, 3.829e-01, -2.701e-01, 1.463e-01)); + r += mul(s2_5, M4(2.664e-03, 4.352e-02, -2.378e-01, 5.316e-02, -1.369e-01, -1.293e-01, 1.587e-01, 2.153e-01, 3.820e-01, -1.515e-01, -4.429e-02, 2.391e-01, -3.720e-01, -1.154e-01, -1.196e-01, 3.172e-01)); + r += mul(s2_6, M4(-3.174e-01, -2.340e-01, 1.286e-01, -1.076e-01, 5.834e-02, 6.138e-02, -6.854e-03, 5.658e-02, 5.314e-02, -1.751e-02, 9.115e-03, 8.328e-03, 8.394e-03, 2.608e-02, 1.125e-01, 1.593e-01)); + r += mul(s2_7, M4(-6.600e-01, 1.899e-01, 1.094e-01, 1.665e-02, 1.089e-01, -1.034e-01, -1.811e-01, -3.040e-01, 4.782e-01, 3.160e-02, -4.648e-02, 1.286e-01, 1.070e-01, -1.022e-01, 5.693e-02, -5.195e-02)); + r += mul(s2_8, M4(3.748e-03, -4.142e-02, -7.021e-02, -2.596e-01, -2.444e-01, -6.341e-05, 4.125e-02, -7.382e-02, 4.456e-02, 3.144e-02, -5.055e-02, -1.724e-01, -1.835e-01, 4.462e-02, -1.398e-01, -2.631e-02)); + r += mul(s3_0, M4(-1.892e-01, -2.298e-01, 7.045e-02, -6.423e-02, 7.789e-02, -9.540e-02, -3.161e-02, -5.171e-02, -3.656e-02, -6.148e-02, -1.413e-02, -8.995e-02, 2.536e-02, 1.995e-03, 3.317e-02, 1.918e-02)); + r += mul(s3_1, M4(1.245e-02, -4.971e-03, 1.026e-02, -7.525e-02, -2.233e-01, -4.502e-01, -4.530e-03, -1.802e-01, -1.799e-01, 1.915e-02, 1.043e-02, 4.008e-02, 1.524e-01, 1.881e-03, -7.387e-02, 1.566e-02)); + r += mul(s3_2, M4(-1.750e-01, 3.216e-03, -1.033e-03, -7.055e-02, -1.263e-01, 1.586e-01, 2.603e-02, -1.282e-01, 5.606e-02, -1.498e-02, -3.338e-02, -8.978e-03, -2.218e-02, -5.852e-02, -3.208e-03, -1.352e-02)); + r += mul(s3_3, M4(-9.577e-02, -8.859e-02, 7.921e-02, -1.569e-02, -7.962e-02, 2.890e-02, 4.107e-02, -5.870e-02, 2.510e-02, 1.765e-02, 4.458e-02, 1.891e-02, 7.541e-02, 3.492e-02, 3.160e-02, 1.201e-02)); + r += mul(s3_4, M4(-6.228e-02, 9.576e-02, -1.743e-01, -1.935e-01, 2.054e-01, 1.479e-01, 8.056e-04, 3.321e-02, -1.362e-01, 5.003e-01, 9.071e-02, 8.153e-02, 2.283e-01, -3.484e-01, 4.509e-02, -4.658e-01)); + r += mul(s3_5, M4(2.528e-01, -9.286e-04, -2.468e-02, 1.338e-01, 4.431e-02, 3.503e-02, 1.304e-01, 1.652e-01, 4.628e-01, -2.670e-01, 1.880e-01, 1.516e-01, -1.538e-01, 1.379e-01, -3.334e-02, 2.977e-02)); + r += mul(s3_6, M4(1.385e-01, -6.592e-02, -1.225e-01, -1.381e-01, -4.498e-02, -6.343e-03, 4.811e-02, 9.639e-02, 1.635e-02, -3.467e-02, 3.640e-03, -3.186e-02, 6.265e-02, 2.282e-01, 9.661e-02, 1.295e-01)); + r += mul(s3_7, M4(-3.053e-03, 7.999e-02, 2.407e-01, 2.655e-01, -3.969e-01, -9.502e-03, 1.900e-02, 9.557e-02, -6.199e-02, -3.574e-02, 8.350e-02, -7.837e-02, -1.442e-02, -5.281e-03, 4.503e-01, 4.026e-01)); + r += mul(s3_8, M4(-1.313e-01, 4.424e-02, -1.155e-02, 6.769e-02, 2.192e-02, 6.721e-02, 5.694e-03, 7.376e-02, -2.155e-01, -7.512e-02, 6.252e-03, -3.428e-01, 3.324e-01, 2.784e-03, -5.606e-02, 2.108e-01)); + r += V4(-6.039e-04, -3.875e-03, -3.020e-03, 2.282e-03); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.947e-02, -1.234e-01, -3.169e-02, -9.158e-02, -1.406e-01, 6.941e-02, -1.367e-02, -1.406e-02, 9.073e-02, 5.642e-01, -2.007e-02, 9.725e-02, 7.122e-03, -1.956e-03, 6.532e-03, -5.457e-02)); + r += mul(s0_1, M4(-1.130e-01, -4.645e-02, 3.624e-02, 3.391e-02, 3.882e-01, 2.453e-01, -2.237e-01, -2.271e-01, 2.803e-01, 1.718e-01, 3.255e-02, -2.046e-01, 1.441e-01, -1.880e-03, 2.335e-02, -1.232e-01)); + r += mul(s0_2, M4(2.016e-01, 1.243e-01, -3.895e-02, -1.135e-01, -2.167e-02, 1.465e-02, -7.776e-02, -1.213e-01, -7.195e-03, 4.404e-03, 6.598e-02, -5.135e-02, -2.062e-01, -3.725e-02, -8.296e-03, 8.739e-03)); + r += mul(s0_3, M4(-2.068e-02, -3.876e-02, 5.737e-02, 9.886e-02, -9.663e-02, -2.569e-01, 6.761e-02, -1.454e-01, 4.660e-02, 7.810e-01, -2.254e-01, 1.899e-01, -9.628e-02, 8.080e-02, -1.093e-02, 1.451e-02)); + r += mul(s0_4, M4(3.133e-01, 2.759e-01, -9.917e-02, -3.134e-01, 1.137e-01, -5.446e-01, -2.044e-03, -5.215e-01, -6.867e-02, 5.254e-01, -1.466e-01, -3.048e-01, 3.408e-01, 5.791e-01, -2.594e-01, -4.879e-04)); + r += mul(s0_5, M4(6.871e-02, -1.221e-01, -5.702e-02, -2.731e-02, 6.025e-01, 1.350e-01, -3.119e-01, -4.130e-01, 2.091e-01, 1.003e-01, 4.509e-02, -1.541e-01, 1.151e-01, -1.558e-01, 6.309e-03, -2.192e-01)); + r += mul(s0_6, M4(2.139e-02, 1.540e-02, -9.451e-02, 8.898e-02, 1.983e-02, -1.259e-01, 2.162e-01, -9.477e-02, -2.253e-01, -1.456e-01, -2.432e-02, 9.649e-02, 2.147e-02, -9.523e-02, 2.042e-02, -7.790e-02)); + r += mul(s0_7, M4(-3.105e-03, 1.944e-01, -1.808e-01, -3.058e-02, 4.007e-01, 5.645e-01, -2.452e-01, -7.366e-02, 1.279e-02, 3.212e-02, -1.573e-01, -1.267e-01, 1.613e-02, -1.976e-01, -1.519e-01, -2.687e-02)); + r += mul(s0_8, M4(-1.906e-04, 8.306e-02, 2.480e-02, 1.696e-02, 1.275e-01, 1.372e-01, 1.205e-01, 1.120e-02, 1.424e-02, -1.526e-01, -6.629e-02, -9.104e-02, 2.042e-02, -1.167e-01, 1.050e-01, 1.560e-02)); + r += mul(s1_0, M4(-2.398e-02, -1.009e-01, 2.671e-02, -8.841e-02, -7.277e-03, -4.411e-02, -1.240e-02, -5.367e-04, -1.223e-01, -7.251e-02, 4.941e-02, 7.545e-02, 6.688e-02, 1.727e-02, -1.144e-02, -7.713e-02)); + r += mul(s1_1, M4(-1.507e-01, -3.095e-01, 5.017e-02, -1.145e-01, 3.430e-02, -2.241e-01, -9.050e-02, -8.470e-02, -8.624e-02, -1.021e-02, -1.620e-02, 3.932e-03, 7.775e-02, -2.376e-02, 6.270e-02, -7.896e-02)); + r += mul(s1_2, M4(3.578e-02, -3.242e-02, 9.400e-03, -2.998e-02, -1.545e-02, -1.481e-01, -6.667e-02, 3.496e-02, 6.722e-02, 7.676e-04, -8.215e-04, 2.142e-03, 4.007e-02, 9.690e-02, -1.652e-03, 3.858e-02)); + r += mul(s1_3, M4(6.321e-02, -1.472e-01, 6.571e-02, -1.929e-01, -7.340e-02, -8.067e-02, 1.715e-02, 2.182e-02, -8.623e-02, -2.195e-01, -6.101e-02, 8.246e-02, -4.908e-02, -3.293e-02, -7.341e-02, -1.941e-01)); + r += mul(s1_4, M4(5.609e-01, 5.581e-01, -1.143e-01, -1.052e-01, 2.477e-01, 2.387e-01, 1.272e-01, 3.284e-03, -3.135e-01, 8.385e-02, -7.393e-02, -2.270e-01, 4.403e-01, -1.179e-01, -1.620e-01, 2.978e-01)); + r += mul(s1_5, M4(-3.015e-02, 1.055e-01, 1.072e-01, 1.177e-01, 3.838e-01, 3.206e-02, -4.556e-03, -5.072e-02, 4.250e-02, -1.665e-02, -1.759e-02, 2.822e-02, -2.408e-01, -2.204e-02, -3.440e-02, 6.520e-02)); + r += mul(s1_6, M4(9.180e-04, 3.395e-02, -1.211e-02, -5.605e-03, -7.356e-03, -2.439e-02, -2.498e-02, -6.361e-04, -5.167e-02, -1.009e-02, 7.202e-02, 3.652e-02, 3.036e-03, -7.672e-03, -2.822e-02, -9.942e-02)); + r += mul(s1_7, M4(-7.041e-02, -2.366e-01, -1.556e-01, 1.499e-01, -2.674e-02, 6.601e-03, -1.490e-01, 1.329e-02, -1.127e-01, 8.363e-03, -1.333e-01, 1.038e-02, -1.219e-02, -1.366e-01, 8.814e-02, 4.260e-03)); + r += mul(s1_8, M4(-1.397e-02, 2.863e-02, 5.459e-03, -1.166e-02, -1.201e-02, 1.346e-01, 5.461e-02, 1.584e-02, -8.155e-02, 8.451e-03, -3.444e-02, 3.920e-02, 2.082e-02, -4.174e-02, 6.205e-02, 5.646e-02)); + r += mul(s2_0, M4(5.465e-02, 7.303e-02, 1.200e-01, 8.938e-03, -8.960e-02, -2.248e-01, -1.073e-02, 6.882e-02, 4.637e-02, -1.215e-01, -2.319e-02, -2.049e-01, -8.235e-02, -2.689e-02, 8.521e-02, 2.612e-02)); + r += mul(s2_1, M4(-1.284e-01, -8.509e-02, 6.859e-02, 2.538e-02, -7.401e-02, 2.860e-01, -2.240e-01, 1.754e-01, -2.073e-01, -9.333e-02, -9.310e-02, -3.311e-01, 2.251e-01, 1.948e-01, -1.091e-01, 2.448e-02)); + r += mul(s2_2, M4(4.550e-03, 2.884e-02, -1.023e-02, -1.793e-02, 1.472e-01, 1.728e-02, -5.533e-02, -4.606e-02, -1.128e-01, 1.845e-01, -9.297e-02, 7.245e-02, 2.303e-02, -1.293e-01, -2.277e-02, -1.523e-02)); + r += mul(s2_3, M4(5.703e-02, 4.629e-03, -7.495e-02, -7.220e-02, -1.245e-01, 1.142e-01, -1.688e-03, -9.906e-03, 9.714e-02, -2.851e-02, 7.069e-03, -3.250e-01, -5.029e-03, -1.421e-01, -4.162e-02, 1.032e-01)); + r += mul(s2_4, M4(5.200e-02, -3.414e-02, -3.809e-02, -9.742e-02, 8.686e-01, 1.140e+00, 2.062e-01, 8.598e-02, 4.073e-01, -3.313e-01, 2.673e-01, 1.050e-01, -9.355e-02, 1.764e-01, 8.423e-02, 1.156e-01)); + r += mul(s2_5, M4(-5.260e-03, 8.804e-02, 3.636e-02, 3.074e-03, 1.724e-01, 2.433e-01, -1.126e-02, -2.652e-01, -1.229e-01, 3.135e-02, 1.187e-02, -6.661e-02, -1.872e-02, -6.508e-02, -7.109e-02, 1.141e-01)); + r += mul(s2_6, M4(6.180e-03, 2.059e-03, -1.768e-02, 4.877e-03, -7.838e-02, 1.366e-01, -7.231e-02, -2.826e-02, 6.251e-02, 7.375e-02, 2.531e-02, 2.038e-02, -4.462e-03, -4.896e-02, -4.376e-02, -7.998e-03)); + r += mul(s2_7, M4(1.011e-01, 8.753e-02, -5.554e-02, 6.949e-04, 4.137e-02, 2.710e-01, -3.203e-01, 6.752e-02, 9.720e-02, 3.447e-02, -5.777e-02, -1.723e-02, -9.154e-03, 5.461e-02, 1.248e-01, -3.906e-04)); + r += mul(s2_8, M4(4.126e-02, 3.442e-02, 9.763e-03, -4.560e-02, -4.233e-04, -1.519e-01, 2.421e-02, -4.043e-02, -1.281e-02, 1.166e-02, 2.489e-04, -3.061e-02, -4.476e-02, 4.493e-03, -4.164e-02, 9.694e-03)); + r += mul(s3_0, M4(-1.352e-01, -1.938e-01, 7.285e-02, -4.706e-02, 1.920e-02, 1.891e-02, 1.233e-02, 3.876e-02, 1.342e-02, 2.020e-01, 3.292e-02, 2.778e-02, -5.017e-02, 3.560e-02, 7.028e-02, 7.562e-03)); + r += mul(s3_1, M4(3.014e-01, 1.243e-01, -2.656e-02, -9.796e-02, 1.585e-01, 2.259e-01, -6.651e-02, 4.080e-02, 1.902e-01, 2.705e-01, -9.774e-02, -1.144e-02, -4.653e-01, -3.536e-01, 2.515e-02, 9.628e-02)); + r += mul(s3_2, M4(-7.724e-02, 1.181e-01, 2.182e-02, 1.999e-02, -7.114e-02, -4.414e-02, -5.748e-06, -8.931e-03, 4.985e-03, 6.360e-02, 4.422e-02, 6.005e-02, 1.335e-01, -8.144e-03, -3.979e-02, 6.952e-03)); + r += mul(s3_3, M4(-1.826e-03, 2.390e-02, 4.665e-03, -3.357e-02, 2.088e-02, 1.436e-01, -2.474e-02, 1.100e-02, 2.727e-02, -1.649e-02, -9.539e-02, -1.112e-01, -2.427e-02, 1.811e-01, -4.267e-02, 1.060e-01)); + r += mul(s3_4, M4(9.873e-02, -1.417e-01, -1.365e-01, -3.187e-01, -7.583e-03, 3.047e-01, -2.480e-02, 2.623e-01, -3.193e-01, -1.539e-01, -2.986e-01, 2.350e-01, 4.367e-01, 2.441e-01, -3.426e-01, -5.108e-02)); + r += mul(s3_5, M4(-8.384e-02, 1.343e-01, 1.653e-01, 7.978e-02, 6.329e-02, 7.040e-02, 2.203e-02, -2.280e-01, 2.531e-02, -9.408e-02, -5.137e-02, -1.717e-01, -1.577e-01, 4.030e-02, -2.802e-01, 1.155e-01)); + r += mul(s3_6, M4(1.895e-02, 1.436e-01, 3.568e-04, -6.075e-02, 3.213e-02, -3.462e-02, -2.821e-02, -8.374e-03, 3.451e-02, -2.349e-02, 9.517e-03, 2.092e-02, -8.229e-02, 6.530e-02, 6.116e-03, -2.414e-02)); + r += mul(s3_7, M4(1.530e-01, 2.073e-01, -7.258e-02, -6.975e-02, -6.610e-03, -3.885e-02, -5.636e-03, 1.227e-01, 8.913e-02, 4.336e-02, -1.931e-03, -3.869e-02, -2.019e-02, -1.340e-02, -1.506e-02, 1.591e-02)); + r += mul(s3_8, M4(2.536e-02, -3.220e-02, 6.413e-02, -1.835e-02, -9.124e-02, -8.098e-02, -5.479e-02, -1.361e-02, -3.146e-03, 1.204e-01, -4.020e-02, -6.924e-02, -1.030e-01, -1.301e-01, 1.634e-02, 1.029e-01)); + r += V4(8.385e-03, 1.035e-02, -6.465e-04, -6.502e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-5.222e-02, -3.069e-02, 2.456e-03, -1.117e-02, 4.933e-02, 5.166e-02, -6.284e-03, -9.151e-02, 1.439e-02, 1.755e-02, -8.848e-02, -9.796e-02, -2.835e-02, 3.699e-02, 2.912e-02, 4.373e-02)); + r += mul(s0_1, M4(6.333e-03, -2.767e-02, -7.247e-02, 8.441e-02, -3.433e-02, -4.699e-02, -1.193e-02, -1.729e-01, 2.481e-02, -4.121e-02, -2.861e-01, -1.202e-02, 2.687e-02, -1.313e-01, 1.747e-02, -9.108e-02)); + r += mul(s0_2, M4(1.309e-02, -1.968e-02, -1.246e-01, -3.915e-02, -1.159e-01, -6.491e-03, 3.316e-01, -6.851e-02, -2.940e-02, -1.787e-02, -5.850e-03, -6.207e-02, 5.272e-02, 9.800e-02, 4.709e-02, 7.491e-02)); + r += mul(s0_3, M4(-1.127e-01, -3.748e-02, -1.091e-01, 1.788e-01, -7.982e-02, -7.528e-02, 1.898e-01, -1.355e-01, -1.568e-01, 9.648e-02, 2.337e-01, -9.666e-02, -7.316e-02, 2.915e-02, 2.259e-02, -1.310e-02)); + r += mul(s0_4, M4(1.689e-02, -1.028e-01, 1.304e-01, -6.012e-02, -8.030e-02, -1.823e-01, 4.179e-01, -3.553e-01, 9.095e-04, 9.972e-02, 3.227e-01, -4.967e-02, -2.329e-01, 1.272e-01, 4.332e-01, -8.456e-01)); + r += mul(s0_5, M4(1.815e-02, -5.743e-02, 7.236e-02, -8.782e-02, 1.161e-01, 2.258e-01, 7.053e-01, -2.993e-01, 6.605e-02, -2.666e-03, -4.733e-02, -1.087e-01, -1.101e-01, 1.554e-01, 1.656e-01, 2.530e-01)); + r += mul(s0_6, M4(-7.750e-02, -6.619e-02, 2.202e-02, 4.186e-02, -1.519e-01, -8.918e-03, -1.919e-01, -7.085e-02, -1.356e-01, -1.363e-01, 1.782e-01, -1.499e-02, 9.670e-02, 1.450e-03, 5.675e-02, -3.337e-02)); + r += mul(s0_7, M4(-9.267e-02, 1.661e-01, 1.306e-01, -2.387e-01, -2.261e-02, 2.870e-01, -2.711e-01, 6.281e-02, 2.181e-02, 1.010e-01, 2.979e-01, -9.254e-02, 1.307e-01, -2.024e-02, 2.013e-01, -1.862e-02)); + r += mul(s0_8, M4(-7.233e-02, 8.276e-02, 1.279e-02, -3.778e-02, -3.737e-01, -2.422e-01, -1.352e-01, -1.631e-01, 6.518e-02, 2.511e-01, 1.588e-01, -3.599e-02, 8.821e-02, 3.757e-02, -1.340e-01, 1.006e-01)); + r += mul(s1_0, M4(1.034e-02, 8.194e-02, 9.844e-02, -1.052e-01, 4.683e-03, 4.432e-03, 8.420e-03, 7.511e-03, 7.210e-02, -8.697e-03, -9.834e-02, 1.366e-01, 3.221e-04, 1.836e-02, 1.307e-02, -6.823e-02)); + r += mul(s1_1, M4(-7.232e-02, 1.103e-01, 2.975e-01, 4.747e-02, -1.075e-01, -6.863e-02, 2.378e-01, -2.994e-02, 6.426e-02, 2.459e-02, -1.361e-01, 4.394e-02, 4.558e-02, -5.684e-02, -3.386e-02, 8.075e-02)); + r += mul(s1_2, M4(-1.568e-02, 6.463e-02, 4.001e-02, 3.549e-02, -3.385e-02, -1.547e-02, 2.510e-01, 3.198e-02, 2.533e-02, -6.612e-02, -5.453e-02, 1.387e-03, 3.071e-02, -5.115e-03, -9.345e-02, 1.790e-02)); + r += mul(s1_3, M4(1.723e-01, 2.119e-02, -3.394e-01, -1.101e-01, 7.882e-03, -4.188e-02, -6.882e-02, 5.060e-02, 4.902e-02, 2.919e-02, 7.773e-02, 1.080e-01, 8.944e-02, -2.819e-02, -1.252e-02, -2.744e-01)); + r += mul(s1_4, M4(2.682e-01, 8.840e-03, -3.974e-01, 2.436e-01, 1.156e-02, 3.806e-04, -5.090e-01, -1.339e-02, 1.677e-02, -1.337e-01, -1.050e-01, 2.647e-01, -1.971e-01, -1.145e-02, 1.471e-01, -7.814e-02)); + r += mul(s1_5, M4(-5.376e-02, 2.321e-02, -1.908e-01, -1.538e-01, 5.032e-03, 2.979e-02, -3.934e-02, -1.754e-01, 3.674e-02, 8.713e-03, -7.429e-02, -2.768e-03, -1.878e-01, -1.382e-01, 1.114e-01, 4.843e-02)); + r += mul(s1_6, M4(4.390e-03, 1.082e-02, 6.300e-03, -2.220e-02, -1.578e-02, -3.883e-02, 6.290e-02, 5.752e-03, 9.478e-02, 5.108e-03, 6.174e-02, 8.270e-02, -5.128e-02, -3.664e-02, 3.095e-02, -1.575e-01)); + r += mul(s1_7, M4(2.131e-01, 8.669e-03, 8.288e-02, 1.767e-01, -8.764e-02, -6.440e-03, 1.179e-01, -9.407e-02, -1.114e-01, -1.384e-01, 7.349e-02, 2.379e-02, 6.264e-02, -6.347e-02, -1.973e-01, 3.150e-02)); + r += mul(s1_8, M4(6.920e-02, 2.737e-01, 5.444e-02, -1.065e-01, -8.435e-02, 1.268e-01, -7.219e-03, -4.022e-02, -3.687e-02, -3.873e-02, 5.773e-02, 1.171e-02, 5.552e-02, -2.870e-02, -4.903e-02, 2.162e-02)); + r += mul(s2_0, M4(-6.811e-02, 3.915e-02, -1.970e-02, 5.496e-02, -3.225e-02, -5.284e-02, -3.737e-03, -1.864e-03, -1.361e-01, -7.308e-02, -4.948e-02, -1.634e-01, 5.283e-02, 1.746e-02, -8.374e-02, 7.123e-02)); + r += mul(s2_1, M4(4.868e-03, 7.851e-02, 1.067e-01, 5.576e-02, 1.276e-01, -7.837e-02, -2.875e-01, 3.754e-02, -1.315e-01, -9.095e-02, 8.041e-02, -1.156e-01, 1.309e-02, 1.086e-01, -1.335e-01, 9.059e-02)); + r += mul(s2_2, M4(-1.092e-02, 1.501e-01, -3.542e-02, 2.500e-02, 1.500e-02, -1.832e-01, -3.447e-01, -2.562e-02, -1.110e-01, 1.362e-01, 1.634e-01, -5.146e-02, -1.184e-02, -1.154e-01, 4.862e-02, 1.344e-03)); + r += mul(s2_3, M4(3.103e-02, -2.009e-02, 2.266e-02, 5.094e-02, 5.909e-01, 1.844e-01, -3.418e-02, -1.460e-01, 1.218e-02, -3.631e-02, -2.582e-01, -2.230e-01, 9.666e-02, -6.432e-02, 7.267e-02, 7.577e-02)); + r += mul(s2_4, M4(8.062e-02, -3.981e-02, -3.232e-02, -1.032e-01, -9.859e-02, 6.539e-01, 5.533e-01, -1.046e-02, -5.348e-01, 1.009e-02, -3.879e-01, 1.190e-01, -1.151e-01, 1.835e-01, -7.797e-02, 1.418e-01)); + r += mul(s2_5, M4(-1.404e-02, -1.730e-01, -4.516e-02, -2.158e-02, 2.544e-01, 4.463e-01, 1.404e-01, -6.854e-02, -9.712e-02, -4.920e-01, -2.485e-02, -6.416e-02, 3.612e-02, 2.451e-01, 2.327e-02, -1.251e-03)); + r += mul(s2_6, M4(6.507e-02, -2.267e-02, -7.660e-02, 3.043e-02, 3.541e-01, 2.804e-01, 2.783e-01, -2.580e-01, -1.185e-01, 8.028e-02, -1.395e-01, -4.988e-03, 4.702e-02, -5.327e-02, 4.580e-02, 3.130e-03)); + r += mul(s2_7, M4(9.806e-02, 6.990e-02, -4.317e-02, -2.415e-02, -2.263e-01, -1.723e-01, 2.669e-02, -3.393e-01, 9.368e-02, -6.775e-02, -1.883e-01, -8.601e-02, -2.278e-01, 1.612e-01, 1.625e-01, 8.821e-02)); + r += mul(s2_8, M4(-1.921e-02, 1.119e-01, 3.717e-02, -2.554e-02, 2.852e-02, 8.987e-02, 1.246e-01, 6.463e-03, 2.548e-02, -2.950e-02, 7.289e-02, 1.802e-02, 2.576e-02, 5.798e-02, 6.021e-02, -5.030e-03)); + r += mul(s3_0, M4(-1.023e-01, -3.759e-02, -2.437e-02, 1.032e-01, -2.143e-02, -4.189e-02, -6.139e-02, 9.887e-02, -9.094e-03, 3.087e-02, -1.056e-01, 1.376e-01, 1.702e-02, 3.138e-02, -1.243e-01, -5.115e-02)); + r += mul(s3_1, M4(3.439e-02, -1.018e-01, -3.260e-01, 6.226e-02, 3.794e-02, -6.747e-02, -1.743e-01, -9.149e-02, 6.116e-02, -3.539e-02, -3.971e-01, -2.458e-02, -1.436e-01, 4.323e-02, 5.595e-01, 1.160e-01)); + r += mul(s3_2, M4(-7.596e-02, -9.502e-02, -1.112e-02, -7.256e-02, -1.625e-02, -1.013e-01, -7.450e-02, 2.969e-03, -1.481e-02, -1.199e-01, -8.230e-02, 2.952e-02, -3.199e-02, 8.852e-02, -1.541e-02, 1.722e-02)); + r += mul(s3_3, M4(2.768e-03, -9.600e-02, 1.333e-01, -1.174e-01, -7.190e-02, 1.265e-02, 8.135e-02, -6.909e-03, 9.249e-02, -2.800e-02, 2.029e-01, -1.212e-02, 9.955e-02, -2.791e-02, -1.172e-01, 2.079e-01)); + r += mul(s3_4, M4(-1.948e-01, -1.936e-01, 5.127e-01, -7.970e-02, -1.135e-01, 1.060e-01, 1.226e-01, -3.195e-01, -4.980e-01, -5.665e-03, 3.167e-01, -2.413e-01, 2.036e-01, 1.519e-01, 7.793e-04, -1.316e-01)); + r += mul(s3_5, M4(-8.284e-02, -1.590e-01, 5.041e-03, -2.936e-02, 1.485e-01, 8.341e-02, -3.804e-02, 3.576e-02, 1.499e-01, -8.989e-02, 7.085e-02, -4.898e-02, 1.070e-01, 5.825e-02, 1.863e-01, -9.850e-03)); + r += mul(s3_6, M4(-3.057e-01, 2.794e-02, -7.737e-02, -4.168e-02, 2.696e-02, 1.279e-02, 2.638e-02, 8.177e-02, 1.217e-01, 2.531e-02, -1.188e-01, 1.018e-01, -5.486e-02, -6.606e-03, 1.868e-01, -1.050e-01)); + r += mul(s3_7, M4(-3.018e-01, -1.795e-01, -1.578e-01, -1.809e-01, 1.241e-01, -4.960e-02, -1.067e-01, -1.004e-02, -8.835e-02, 6.620e-02, 1.309e-01, -1.399e-01, 4.651e-02, 4.837e-02, -9.106e-02, 1.670e-01)); + r += mul(s3_8, M4(1.081e-02, -9.947e-02, 1.643e-02, -2.769e-02, 9.803e-02, -8.389e-02, -2.782e-02, -2.689e-02, 3.693e-02, -3.436e-03, 1.229e-02, -2.929e-02, -1.751e-01, -5.859e-03, 1.543e-01, 8.225e-02)); + r += V4(-2.994e-04, -3.163e-05, 4.528e-03, -1.285e-02); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 5 +//!DESC conv4 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(5.681e-02, -7.933e-02, -1.161e-02, -3.257e-02, -1.507e-02, 2.248e-02, -1.351e-02, 2.789e-02, -1.713e-01, 9.482e-02, 2.715e-02, 9.506e-02, 1.714e-01, -1.090e-01, -7.237e-02, -1.563e-01)); + r += mul(s0_1, M4(-9.622e-03, -7.774e-04, -4.095e-02, 1.106e-02, -3.592e-02, -4.358e-02, 1.983e-02, -1.134e-02, -1.313e-02, -1.086e-01, 1.102e-01, -3.091e-01, 1.982e-01, 1.438e-01, -6.038e-02, 9.579e-02)); + r += mul(s0_2, M4(-3.893e-02, 1.554e-02, -7.763e-05, 1.610e-02, 3.470e-03, 9.915e-03, -9.881e-03, 5.331e-02, -9.152e-02, 6.899e-02, -3.615e-02, 1.558e-01, -3.300e-02, 4.493e-02, 2.148e-02, -3.677e-02)); + r += mul(s0_3, M4(1.939e-01, -7.700e-02, -1.449e-01, -1.942e-02, 9.649e-02, -3.580e-03, -1.767e-02, 2.394e-02, -1.299e-01, 1.160e-01, 8.000e-02, 9.737e-02, 2.751e-01, -4.435e-01, 1.013e-01, -1.782e-01)); + r += mul(s0_4, M4(-2.745e-01, 2.922e-01, -2.008e-01, 1.636e-01, -4.843e-02, 4.172e-01, 3.097e-02, 3.326e-01, -1.798e-02, -3.860e-01, 3.246e-02, 4.225e-01, -1.057e-01, 2.302e-01, -7.879e-02, 4.832e-02)); + r += mul(s0_5, M4(-6.834e-04, -3.372e-02, -9.351e-02, 1.547e-02, 5.621e-02, -1.195e-02, -9.402e-03, 6.439e-02, 8.787e-02, 1.499e-02, 1.928e-01, 6.693e-02, 6.516e-02, -1.145e-01, -6.610e-02, 3.986e-02)); + r += mul(s0_6, M4(7.682e-02, -9.222e-02, 1.566e-01, -1.438e-02, 5.080e-02, -2.762e-02, -3.121e-02, -1.242e-02, 2.046e-02, -1.131e-02, 4.555e-02, -3.006e-02, 1.125e-01, -7.883e-02, 1.063e-01, 3.027e-03)); + r += mul(s0_7, M4(-1.395e-01, 4.847e-02, 1.605e-01, 1.363e-01, 6.243e-02, -1.464e-02, 3.336e-02, -8.862e-02, 3.286e-02, -2.398e-02, -2.326e-02, -8.408e-02, 1.274e-01, -4.997e-02, 1.548e-01, -8.650e-02)); + r += mul(s0_8, M4(4.236e-02, 3.116e-02, 7.690e-02, 3.084e-02, 6.290e-03, 1.016e-02, 7.155e-02, -9.786e-02, -1.453e-02, -4.564e-04, -3.654e-02, 7.179e-03, -2.110e-02, -2.766e-02, 1.022e-01, -6.664e-02)); + r += mul(s1_0, M4(-2.814e-02, 6.473e-02, 5.209e-02, 6.202e-02, -1.898e-02, 6.061e-02, -1.557e-02, 3.561e-02, 2.137e-01, -1.913e-01, 2.387e-03, -1.470e-01, 4.553e-02, -3.358e-02, 1.936e-03, -4.798e-02)); + r += mul(s1_1, M4(4.947e-03, -8.431e-02, -3.362e-03, -1.057e-01, -6.735e-02, 8.463e-03, -4.622e-02, -2.022e-02, -1.450e-01, -1.687e-03, -1.541e-02, -1.116e-02, 4.447e-02, 5.088e-02, -7.198e-03, 3.279e-02)); + r += mul(s1_2, M4(1.202e-03, -2.591e-02, -5.357e-03, -3.844e-02, -7.403e-03, 3.771e-02, -6.171e-02, 8.820e-02, 6.744e-03, -4.156e-02, -1.377e-02, 9.398e-02, -2.643e-02, 4.991e-02, -2.000e-02, 1.056e-02)); + r += mul(s1_3, M4(3.923e-01, 3.525e-02, -1.294e-01, 1.478e-02, 9.667e-02, 1.289e-01, 8.960e-02, 1.946e-02, 3.128e-01, -3.315e-01, -3.019e-01, 1.021e-01, 2.095e-01, -1.488e-01, -9.439e-02, -9.635e-02)); + r += mul(s1_4, M4(-3.641e-01, -9.985e-02, -3.482e-01, -2.646e-01, -5.257e-01, 9.475e-01, 1.714e-01, 5.842e-01, -2.199e-01, -6.131e-02, -4.597e-01, 5.556e-01, 7.933e-02, -2.150e-01, -3.469e-01, -1.978e-01)); + r += mul(s1_5, M4(7.883e-05, -2.207e-02, -1.735e-02, 2.167e-02, 4.628e-02, 8.814e-02, -4.837e-02, 6.515e-02, 1.617e-01, -4.460e-02, -1.002e-01, 7.496e-02, -1.180e-01, 5.540e-02, -5.708e-02, 5.715e-02)); + r += mul(s1_6, M4(1.680e-01, -5.262e-02, 6.143e-02, -4.758e-02, -5.343e-02, 4.332e-02, 1.191e-01, 8.545e-03, 1.171e-01, -8.169e-02, 1.535e-02, -2.281e-01, 8.009e-02, -9.744e-02, 6.114e-02, 8.379e-03)); + r += mul(s1_7, M4(-9.744e-02, 2.573e-02, 6.125e-02, 1.265e-01, 9.253e-02, -1.227e-01, 3.224e-01, -2.402e-01, 1.083e-01, 1.607e-02, 1.155e-01, -4.014e-01, -2.347e-02, -3.821e-02, 2.379e-01, 2.605e-02)); + r += mul(s1_8, M4(5.428e-02, -5.434e-02, -2.345e-02, -2.189e-03, 1.274e-02, 7.503e-02, 1.442e-01, -8.839e-02, -3.480e-02, 1.444e-02, -3.859e-02, -1.089e-01, -3.183e-02, 9.172e-02, 1.092e-01, 6.688e-02)); + r += mul(s2_0, M4(2.283e-01, 3.872e-02, -5.533e-02, -1.704e-02, -1.533e-02, 1.459e-02, 3.842e-02, 6.367e-02, -4.041e-02, -6.411e-03, -5.052e-03, -8.331e-03, 2.786e-03, -5.502e-02, 6.695e-03, -1.982e-02)); + r += mul(s2_1, M4(-4.716e-01, 4.092e-01, -1.581e-01, 4.209e-01, 1.255e-01, -7.138e-02, 7.300e-02, -1.357e-01, -6.908e-02, -1.986e-02, 1.801e-02, -4.505e-02, -1.611e-01, -1.216e-01, -6.522e-02, -9.093e-02)); + r += mul(s2_2, M4(1.019e-01, -3.650e-02, 1.353e-02, 2.487e-01, -1.344e-04, 4.653e-02, 1.721e-02, 4.005e-02, 7.572e-03, -4.357e-02, -3.720e-02, 2.091e-02, 6.051e-03, -6.957e-02, -9.009e-02, -1.788e-02)); + r += mul(s2_3, M4(2.159e-02, -3.325e-02, 3.084e-02, 1.091e-01, -9.662e-02, 1.040e-01, 1.078e-01, -2.572e-02, 2.237e-04, -2.571e-02, -2.335e-02, -1.554e-02, 1.275e-01, -4.579e-02, -1.772e-02, 3.282e-02)); + r += mul(s2_4, M4(4.984e-02, 2.302e-01, 6.568e-02, 1.279e-01, 6.857e-02, -1.499e-01, -4.461e-02, -1.977e-01, -1.903e-01, 1.430e-01, 3.271e-02, 1.978e-01, 2.410e-01, 5.980e-01, -1.394e-01, 2.261e-01)); + r += mul(s2_5, M4(2.188e-02, -8.976e-03, 2.475e-02, 1.340e-02, -4.458e-02, 5.360e-02, 2.628e-02, -1.405e-02, 6.166e-02, -4.895e-02, 1.348e-03, 5.680e-02, -1.123e-01, 7.224e-02, -6.458e-02, 1.314e-01)); + r += mul(s2_6, M4(3.252e-02, -2.389e-02, -2.067e-02, -6.871e-02, -8.327e-02, 7.793e-02, 7.681e-03, 5.095e-02, -1.693e-02, -3.622e-02, 3.065e-02, -1.582e-02, -6.963e-03, 2.835e-02, 6.805e-02, -1.475e-02)); + r += mul(s2_7, M4(4.783e-02, -2.945e-02, 4.732e-02, -9.789e-04, -1.619e-02, -2.603e-02, -1.368e-01, 2.956e-02, 9.844e-02, -1.214e-01, 1.776e-01, -1.461e-01, -5.165e-02, -1.055e-02, 1.793e-01, -4.355e-02)); + r += mul(s2_8, M4(2.619e-03, 4.801e-02, 6.393e-02, -2.399e-02, -1.280e-03, -2.210e-02, -4.649e-02, 1.561e-03, -1.789e-02, 5.576e-02, 1.200e-01, 3.338e-03, 4.475e-02, -2.957e-02, 9.300e-02, -7.837e-02)); + r += mul(s3_0, M4(-1.536e-01, -3.593e-03, -1.064e-02, 1.740e-02, 9.197e-02, 2.772e-01, 5.258e-01, 5.745e-01, 2.331e-02, 8.995e-02, 2.611e-02, 5.463e-02, 4.872e-02, -8.230e-03, -1.742e-02, 3.405e-03)); + r += mul(s3_1, M4(4.799e-02, 1.088e-01, -7.562e-02, 5.926e-02, 4.190e-01, -4.922e-01, -1.822e-01, -2.309e-01, 1.776e-01, 1.799e-01, 1.213e-01, 3.198e-01, -1.565e-01, 2.118e-02, -5.914e-02, 1.048e-01)); + r += mul(s3_2, M4(-6.867e-02, -2.488e-02, 2.563e-02, -3.161e-02, -4.038e-02, 5.042e-02, 2.474e-02, 3.962e-03, -4.263e-02, 4.382e-02, -6.197e-03, 5.435e-02, 8.477e-02, -7.694e-02, -2.473e-02, -2.000e-02)); + r += mul(s3_3, M4(-6.567e-02, 7.271e-02, -2.275e-02, -4.345e-03, -4.825e-02, -7.541e-01, 5.163e-01, 9.170e-01, -1.040e-01, -9.911e-03, 3.569e-02, 2.347e-01, 2.350e-02, 6.202e-02, 7.421e-03, 2.377e-02)); + r += mul(s3_4, M4(-3.371e-02, -2.738e-02, 1.670e-01, 2.607e-01, -5.009e-02, 5.743e-03, -6.991e-01, -2.858e-02, -6.907e-02, -4.016e-01, 3.462e-01, 9.128e-01, -1.622e-01, 1.392e-01, 2.250e-01, 1.183e-01)); + r += mul(s3_5, M4(-8.330e-03, 1.029e-01, 1.045e-01, 2.013e-01, 2.609e-02, 7.939e-02, -1.054e-01, 6.487e-02, 1.165e-01, -6.250e-02, 1.274e-01, 2.396e-01, 2.390e-01, -2.468e-01, 1.178e-02, 6.794e-02)); + r += mul(s3_6, M4(5.411e-02, -5.669e-02, 2.831e-02, -3.762e-02, 1.186e-01, 1.750e-01, -2.862e-01, -9.876e-02, 5.851e-02, 2.750e-02, 7.348e-03, -2.151e-01, -3.151e-02, 5.225e-02, 3.178e-02, 1.438e-02)); + r += mul(s3_7, M4(-2.053e-03, 2.875e-02, -4.633e-02, -7.843e-02, -5.216e-02, -1.497e-04, -2.534e-01, -5.098e-01, 3.092e-02, -4.215e-02, -1.330e-01, -9.137e-02, 5.062e-02, 5.514e-02, -1.958e-01, 6.162e-03)); + r += mul(s3_8, M4(3.627e-02, 1.482e-02, 2.228e-02, -7.151e-02, -1.770e-02, 6.009e-02, 2.013e-01, 2.403e-02, 1.912e-03, -9.001e-03, 1.673e-02, -3.465e-02, 5.222e-02, -3.027e-02, -4.458e-03, -6.391e-02)); + r += V4(-3.261e-03, 1.350e-04, -6.605e-05, 1.307e-04); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-5.528e-02, 2.435e-02, -2.728e-02, 5.042e-02, -2.357e-02, 1.752e-02, 6.730e-02, -1.869e-02, 5.562e-02, 2.108e-03, -2.535e-02, -7.791e-02, -6.984e-02, 8.842e-02, 7.203e-02, 3.709e-02)); + r += mul(s0_1, M4(-6.164e-02, -1.824e-02, 8.179e-02, -3.238e-02, 5.338e-02, -5.506e-02, -1.020e-01, 1.520e-02, 1.953e-01, -2.850e-02, 8.323e-02, -8.899e-02, 5.112e-02, 6.369e-02, -5.510e-02, 1.997e-02)); + r += mul(s0_2, M4(6.117e-02, -1.311e-02, -9.258e-03, -1.479e-02, -2.710e-02, 2.958e-02, 2.946e-02, -9.472e-03, 4.257e-02, -7.053e-02, -5.896e-02, 5.475e-02, 6.131e-02, -1.827e-02, -2.909e-02, -6.470e-02)); + r += mul(s0_3, M4(-1.411e-01, 1.597e-01, 2.142e-01, 6.972e-02, 1.704e-02, 4.423e-02, -8.405e-02, 4.993e-02, 1.176e-02, -8.471e-02, 4.062e-02, -1.001e-01, -3.805e-02, 3.820e-02, -6.258e-01, 2.568e-01)); + r += mul(s0_4, M4(3.384e-01, -2.619e-01, 1.799e-01, -3.175e-01, 3.472e-03, -1.186e-01, 7.886e-02, -1.126e-01, 1.378e-01, -3.772e-02, -1.396e-02, 6.889e-02, -1.383e-01, 1.958e-01, 7.297e-02, -1.066e+00)); + r += mul(s0_5, M4(-4.115e-04, 8.733e-03, 3.432e-02, 5.650e-02, 9.203e-02, 6.899e-02, -9.987e-03, 5.139e-02, 2.075e-01, -1.229e-02, 5.912e-02, -2.866e-02, -1.602e-01, 1.654e-01, 6.957e-02, 5.472e-02)); + r += mul(s0_6, M4(-1.000e-01, 9.401e-02, -3.864e-02, 1.160e-01, 1.108e-03, 8.814e-02, 6.570e-04, 2.167e-02, 6.762e-05, -1.080e-02, -1.670e-02, -4.178e-03, -9.704e-03, 2.164e-01, 3.748e-02, -1.258e-02)); + r += mul(s0_7, M4(7.557e-02, -2.360e-01, -2.727e-02, -7.688e-02, -3.110e-02, 1.671e-02, -4.238e-02, 5.553e-02, 6.518e-02, 3.357e-02, -2.725e-02, -2.524e-02, -1.352e-01, -1.005e-01, -4.108e-02, 2.664e-01)); + r += mul(s0_8, M4(9.624e-02, 5.754e-03, 8.412e-02, -2.955e-02, 2.850e-02, 8.830e-03, -4.162e-02, -1.337e-02, -4.374e-02, -2.352e-02, -1.566e-02, 1.822e-02, 7.979e-02, -9.058e-02, -1.071e-01, -3.379e-03)); + r += mul(s1_0, M4(1.395e-02, 1.801e-02, 1.899e-03, -3.313e-02, 2.251e-02, -3.697e-03, 5.577e-02, -3.001e-02, -6.090e-02, 1.645e-01, -1.047e-01, 1.483e-01, -6.634e-03, 3.917e-04, -1.999e-02, 2.114e-02)); + r += mul(s1_1, M4(2.859e-03, 5.455e-02, 4.336e-02, -2.717e-02, 9.302e-02, -9.807e-02, 7.046e-02, -3.707e-02, -1.275e-01, -3.463e-02, -1.160e-01, -4.227e-02, 3.162e-02, 3.583e-02, 4.579e-02, -1.196e-02)); + r += mul(s1_2, M4(-7.086e-03, 2.542e-03, 1.500e-03, -6.273e-03, 5.711e-02, -5.317e-02, -5.455e-03, 4.847e-02, 8.830e-02, 5.991e-02, 3.356e-02, 1.214e-03, -5.272e-03, -5.211e-02, -2.142e-02, -1.246e-02)); + r += mul(s1_3, M4(-4.807e-02, 4.530e-02, 2.719e-01, -1.035e-02, 4.911e-02, -5.824e-03, -6.478e-02, -1.051e-03, -1.348e-02, 6.405e-01, -4.257e-01, 3.690e-01, -9.665e-02, 2.101e-01, 6.571e-02, 9.738e-02)); + r += mul(s1_4, M4(2.423e-01, -2.074e-01, -4.394e-01, -2.830e-02, 5.415e-02, -2.337e-01, 6.080e-01, -1.843e-01, -5.128e-01, 1.559e-01, -2.033e-01, -6.040e-02, -6.726e-02, 2.589e-01, 1.901e-01, -9.598e-02)); + r += mul(s1_5, M4(-1.456e-01, 6.484e-02, 1.125e-01, -1.183e-02, 2.186e-01, 2.930e-02, -4.285e-02, 6.272e-02, 1.500e-01, 1.033e-01, 2.173e-01, -3.328e-02, -6.785e-02, -7.882e-02, -1.450e-01, 7.182e-02)); + r += mul(s1_6, M4(-4.062e-02, 9.988e-02, -5.106e-02, 1.546e-01, 5.122e-02, -7.398e-02, -5.320e-03, -5.669e-02, -4.188e-02, 2.035e-01, -5.253e-02, -7.554e-03, -6.233e-02, 1.285e-01, 1.152e-02, 7.495e-02)); + r += mul(s1_7, M4(1.168e-01, -1.061e-01, -8.798e-02, -2.456e-01, -1.274e-01, -9.338e-02, 6.064e-04, 1.255e-01, 2.944e-02, -9.599e-02, -1.606e-01, 1.477e-01, -5.541e-02, -9.992e-02, -5.652e-02, 1.402e-02)); + r += mul(s1_8, M4(-8.447e-02, -2.272e-02, 3.291e-02, 1.141e-01, 2.835e-01, 2.747e-02, 9.338e-03, -1.271e-01, 1.118e-03, -3.543e-02, -3.201e-02, 5.803e-02, 1.793e-01, -6.889e-02, -3.139e-02, -1.000e-01)); + r += mul(s2_0, M4(3.477e-02, 8.152e-03, -8.100e-03, 3.869e-02, 4.675e-02, 8.080e-02, -4.909e-02, 6.764e-03, -2.946e-03, -7.021e-02, -1.191e-02, -1.660e-02, -5.967e-02, -1.872e-02, -3.485e-02, 3.391e-02)); + r += mul(s2_1, M4(1.685e-01, -2.681e-01, -2.340e-01, -1.748e-01, -1.593e-01, 7.496e-02, 3.748e-02, 1.562e-02, 5.150e-02, -3.648e-02, 3.739e-02, -4.384e-02, -1.521e-02, -1.061e-01, -1.381e-01, 1.733e-02)); + r += mul(s2_2, M4(1.573e-01, 1.415e-01, 1.714e-01, -5.175e-02, -2.442e-02, 1.054e-02, 3.047e-03, -5.944e-03, -6.027e-03, 1.034e-02, -3.381e-02, 4.299e-02, -9.763e-02, 4.729e-02, 9.642e-02, -1.450e-02)); + r += mul(s2_3, M4(8.191e-03, 1.353e-01, -6.018e-02, 5.677e-02, -1.725e-02, -1.324e-01, 1.646e-01, -1.154e-01, -9.796e-03, 3.066e-02, -5.975e-02, 2.878e-02, -1.381e-01, 1.550e-01, 3.556e-02, 8.926e-02)); + r += mul(s2_4, M4(1.715e-01, -2.115e-02, 8.179e-02, -2.066e-01, 1.275e-01, 1.599e-01, 2.325e-02, -9.637e-03, 6.565e-02, -1.901e-01, 7.185e-02, -1.559e-01, 1.106e-01, -6.210e-02, -3.672e-01, 6.248e-02)); + r += mul(s2_5, M4(-3.453e-03, 5.284e-02, -1.031e-01, 5.091e-02, 1.538e-02, -9.971e-02, -5.610e-02, -2.585e-02, 6.441e-02, 1.113e-01, 3.085e-02, 6.860e-02, -6.167e-02, -6.774e-02, -6.898e-02, -4.397e-03)); + r += mul(s2_6, M4(-1.561e-02, 5.106e-02, 2.999e-03, -7.663e-03, 6.665e-02, -1.217e-01, -9.529e-03, -2.096e-02, -2.825e-02, 4.854e-02, -2.196e-02, -7.191e-03, 2.274e-03, 1.698e-02, -1.727e-02, 1.967e-03)); + r += mul(s2_7, M4(3.534e-02, -1.077e-02, 1.607e-02, 4.542e-02, -7.989e-02, 1.294e-01, 4.920e-02, -6.332e-02, -9.402e-02, 2.028e-02, -6.305e-03, 9.061e-02, 2.225e-03, 2.352e-02, -4.032e-03, -4.985e-02)); + r += mul(s2_8, M4(7.112e-02, -1.427e-02, -2.352e-02, -2.989e-02, -5.633e-02, -6.039e-03, 3.496e-03, 2.535e-02, 1.265e-01, -4.541e-02, -5.393e-02, -5.355e-02, 1.498e-03, 2.057e-02, 1.278e-02, 5.662e-02)); + r += mul(s3_0, M4(9.523e-02, -7.183e-02, -2.740e-01, -1.569e-02, 1.008e-01, 3.065e+00, -2.003e-01, 1.938e-01, 7.503e-02, -1.096e-01, -3.177e-02, -4.074e-02, 1.090e-03, -2.250e-02, -4.727e-02, 2.528e-02)); + r += mul(s3_1, M4(-7.789e-02, 7.186e-03, 3.838e-01, -1.314e-01, -4.119e-01, 1.344e-01, 5.252e-02, -4.478e-02, -2.421e-01, 8.221e-02, 1.588e-01, 5.943e-02, -6.960e-02, -7.055e-02, -5.857e-02, -2.367e-02)); + r += mul(s3_2, M4(1.578e-01, -5.477e-02, -1.343e-01, 7.698e-02, 9.761e-02, -2.725e-02, -6.329e-02, -5.552e-02, -6.854e-02, 1.143e-02, -8.043e-02, 1.416e-02, 5.387e-02, 1.371e-01, 1.146e-01, -5.881e-04)); + r += mul(s3_3, M4(7.307e-03, -8.177e-02, 5.634e-02, -1.149e-01, -4.060e-01, 1.613e+00, -3.145e-01, 2.057e-02, -9.555e-02, 2.548e-01, 5.932e-02, 7.789e-02, 7.174e-03, -6.399e-03, -2.315e-02, 8.381e-03)); + r += mul(s3_4, M4(1.200e-01, 1.356e-01, 8.711e-03, 7.537e-02, -1.751e-01, 3.458e-02, 2.391e-01, -1.111e-01, 1.506e-01, -3.165e-01, -4.619e-01, -9.386e-02, -4.377e-02, -1.492e-01, -5.002e-01, 9.821e-02)); + r += mul(s3_5, M4(1.539e-01, 7.309e-02, 4.257e-03, -1.539e-01, -4.757e-01, 1.070e-01, 1.702e-02, 9.709e-02, -1.140e-01, 1.938e-01, 1.982e-01, -3.215e-02, -3.822e-01, 3.408e-01, 1.647e-01, 1.597e-01)); + r += mul(s3_6, M4(-3.320e-02, 4.854e-02, -1.957e-02, 3.353e-02, 1.823e-01, 8.532e-02, 3.236e-02, -1.874e-01, -1.073e-02, -6.598e-03, -2.954e-02, -2.175e-02, 1.184e-02, -3.856e-02, 2.166e-02, -2.608e-02)); + r += mul(s3_7, M4(2.038e-02, -4.606e-02, -3.841e-02, -4.008e-02, -2.542e-01, -1.076e-01, -2.891e-02, 1.837e-01, 3.842e-02, 1.753e-01, 3.043e-02, -3.298e-02, 2.990e-02, 1.215e-01, 9.583e-02, -5.860e-02)); + r += mul(s3_8, M4(6.138e-02, 3.405e-02, 3.364e-04, 6.037e-03, 1.811e-01, 9.691e-04, 3.497e-02, -1.810e-02, -3.940e-02, -1.159e-01, -7.007e-02, 1.170e-01, 1.829e-02, -2.216e-02, -1.689e-02, 1.150e-01)); + r += V4(-1.782e-04, -1.204e-03, 6.004e-04, -1.736e-03); + return r; +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 6 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0, t1 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-6.910e-02, 1.215e-03, -2.039e-03, -1.079e-04, 8.088e-02, -2.119e-02, -1.929e-02, 1.865e-02, -6.142e-02, 2.499e-02, -4.185e-03, 1.951e-03, -1.099e-02, 1.071e-02, 3.133e-03, -9.539e-03)); + r += mul(s0_1, M4(-2.129e-02, 6.812e-02, 2.738e-02, -2.965e-02, -1.569e-01, -7.369e-02, 6.714e-02, -2.416e-02, 6.421e-02, -3.329e-02, 4.397e-03, 1.902e-02, 1.426e-01, 7.469e-02, -3.306e-02, 1.260e-02)); + r += mul(s0_2, M4(-2.521e-02, -1.556e-02, -1.880e-02, 1.813e-02, -2.926e-03, -3.967e-02, -2.562e-02, 1.669e-02, 1.699e-03, 2.545e-02, 9.862e-03, 1.052e-02, -1.392e-02, 1.215e-02, 2.436e-02, 2.113e-04)); + r += mul(s0_3, M4(1.800e-02, -2.761e-02, 1.145e-02, -6.469e-02, 1.392e-01, 1.033e-02, 1.406e-01, -7.326e-03, -2.077e-02, 2.985e-03, -1.102e-01, 2.804e-02, -1.544e-02, 5.050e-02, 2.915e-02, 2.396e-02)); + r += mul(s0_4, M4(1.242e-01, -4.463e-01, -3.829e-01, 1.871e-01, -8.392e-02, 6.470e-02, -3.115e-01, -1.970e-01, -1.186e-01, -1.204e-01, -2.296e-02, -1.763e-01, -1.265e-01, -1.919e-01, 6.718e-02, 8.923e-02)); + r += mul(s0_5, M4(-2.493e-02, 3.014e-02, 2.446e-02, -1.488e-01, 1.299e-02, -5.759e-02, 2.138e-02, -9.211e-02, -8.051e-03, -4.216e-02, -1.327e-02, -9.724e-04, 3.675e-02, 7.968e-03, -3.353e-02, -4.044e-02)); + r += mul(s0_6, M4(2.027e-02, 3.813e-03, -2.557e-03, -2.670e-02, 2.068e-02, 1.886e-02, 6.014e-02, 3.191e-02, -1.917e-03, -2.659e-03, 1.273e-02, 3.109e-03, 9.881e-03, -4.410e-04, 7.569e-03, 1.276e-02)); + r += mul(s0_7, M4(-1.802e-03, 4.820e-02, 4.201e-02, 4.574e-02, 2.826e-02, 2.044e-02, 1.196e-01, 9.132e-02, 1.800e-02, 2.670e-02, -3.398e-03, 1.359e-02, 1.247e-02, 1.268e-02, 1.628e-03, -1.067e-02)); + r += mul(s0_8, M4(5.233e-03, 3.648e-02, 2.719e-02, 2.838e-02, 1.857e-03, -1.999e-03, 1.703e-02, 5.921e-02, 7.925e-03, -2.543e-03, 5.431e-03, -1.102e-02, -1.116e-02, -5.510e-03, -9.183e-03, -8.054e-03)); + r += mul(s1_0, M4(-6.423e-02, -5.758e-03, -8.948e-03, -2.227e-03, 5.802e-02, -2.252e-02, -8.134e-03, 1.448e-02, -3.642e-02, 4.476e-03, 7.865e-03, 3.269e-03, 1.053e-02, 1.269e-02, -1.530e-03, -9.628e-03)); + r += mul(s1_1, M4(-2.553e-02, 4.747e-02, 4.136e-02, -2.368e-02, -1.401e-01, -4.967e-02, 6.372e-02, -1.788e-04, 3.663e-01, 2.193e-01, -8.228e-02, -8.507e-02, 1.404e-01, 8.229e-02, -5.862e-02, -1.161e-02)); + r += mul(s1_2, M4(-2.216e-02, -7.521e-03, -2.522e-02, 2.337e-02, -2.651e-03, -3.786e-02, -9.854e-03, 2.033e-02, 9.696e-03, 1.237e-01, 6.173e-03, 2.898e-02, -1.335e-02, 2.948e-02, 9.778e-03, -1.243e-02)); + r += mul(s1_3, M4(-1.598e-02, -1.677e-02, -4.726e-02, -2.250e-02, 2.076e-01, -2.825e-02, 1.389e-01, -2.552e-02, 3.209e-02, -3.267e-03, -9.876e-02, 3.775e-02, -5.440e-02, 6.367e-02, 8.425e-02, 7.583e-03)); + r += mul(s1_4, M4(-2.339e-01, -8.617e-02, -3.313e-01, 1.470e-01, -1.249e-01, 3.994e-01, -7.191e-01, -2.121e-01, 2.521e-02, 4.601e-02, -3.584e-01, -4.014e-01, -4.299e-01, -4.828e-01, 4.034e-01, 3.633e-01)); + r += mul(s1_5, M4(3.413e-02, -4.685e-03, 4.308e-02, -1.211e-01, 3.722e-02, -1.000e-01, 5.938e-02, -1.900e-01, 3.286e-03, 6.076e-03, 2.628e-02, -1.190e-01, 3.968e-02, -3.583e-02, -4.724e-02, 5.713e-02)); + r += mul(s1_6, M4(3.008e-02, -2.083e-02, 7.970e-03, -2.011e-02, -8.809e-03, 9.741e-03, 7.228e-02, 1.875e-02, -8.374e-03, -2.245e-03, 1.642e-02, -9.996e-03, 2.093e-02, 6.393e-03, 6.227e-03, -6.775e-03)); + r += mul(s1_7, M4(1.113e-02, 5.783e-02, -1.430e-02, 2.826e-02, -1.250e-02, -3.106e-02, 1.754e-01, 2.001e-01, -1.431e-02, -1.368e-02, 4.329e-02, 4.832e-02, 4.089e-02, 3.702e-02, -5.774e-03, 8.701e-03)); + r += mul(s1_8, M4(1.395e-03, 3.747e-02, 2.706e-02, 4.675e-02, -1.191e-02, -2.163e-02, 3.137e-02, 7.056e-02, 4.929e-03, -6.465e-03, 1.083e-03, 1.816e-02, -3.896e-03, 1.081e-02, -1.507e-02, -1.412e-02)); + r += mul(s2_0, M4(5.551e-02, 3.061e-02, 2.172e-02, -4.435e-04, 7.341e-02, -4.254e-03, -3.710e-02, 2.005e-02, 3.528e-02, 1.764e-02, 4.547e-03, -6.460e-03, 1.949e-01, 2.466e-02, 7.886e-02, -2.722e-03)); + r += mul(s2_1, M4(-1.216e-03, 4.895e-02, -2.548e-02, 1.354e-02, 1.184e-01, -2.592e-01, 3.262e-02, 3.213e-02, -7.885e-02, -2.429e-02, -5.811e-02, 1.909e-02, 3.185e-02, -7.057e-02, -2.388e-02, 1.018e-01)); + r += mul(s2_2, M4(-4.325e-03, 8.278e-03, -7.126e-04, -3.013e-03, -2.277e-02, 6.470e-02, -3.258e-02, 6.558e-03, 2.954e-02, 9.175e-03, -1.066e-03, -1.931e-02, 3.523e-03, 1.347e-03, -1.837e-03, -3.765e-03)); + r += mul(s2_3, M4(-1.063e-01, 1.364e-02, -1.031e-01, 7.569e-02, -3.770e-02, 3.667e-02, 2.683e-02, 5.980e-02, -1.057e-01, -1.107e-02, -7.272e-02, 5.094e-02, 7.605e-02, 1.566e-02, 1.708e-01, 2.124e-01)); + r += mul(s2_4, M4(1.344e-02, -6.091e-02, 2.694e-02, -2.727e-02, 2.786e-01, 5.187e-02, 6.738e-01, -9.220e-01, 1.745e-01, -1.468e-02, 1.843e-01, -1.866e-01, -9.396e-02, -1.505e-01, 2.471e-01, -1.138e+00)); + r += mul(s2_5, M4(6.506e-03, 7.226e-03, 9.650e-03, 3.959e-03, -2.858e-02, -1.124e-01, -5.599e-02, 8.081e-02, -3.923e-02, 6.977e-02, 2.327e-03, 1.164e-01, 1.242e-02, -1.947e-02, -4.582e-02, 2.119e-02)); + r += mul(s2_6, M4(-1.730e-02, -2.202e-02, -2.408e-02, -6.448e-02, -3.767e-03, 2.506e-02, -4.165e-02, 4.527e-02, 1.431e-02, -2.421e-02, -1.170e-02, -6.665e-02, -1.236e-02, 5.709e-03, -6.345e-03, -3.440e-02)); + r += mul(s2_7, M4(-4.211e-02, -5.191e-02, -9.762e-02, -1.275e-01, 2.079e-02, -1.004e-01, 7.470e-02, 1.084e-02, -1.789e-02, 8.006e-02, 3.170e-02, 1.111e-01, -4.772e-02, -6.100e-02, 2.375e-02, 2.545e-03)); + r += mul(s2_8, M4(-7.109e-03, 1.968e-03, -9.159e-03, -1.523e-02, -1.024e-02, -5.787e-04, -4.581e-02, -1.496e-02, 2.302e-02, -1.568e-02, 2.850e-02, 9.731e-03, -1.219e-02, 1.316e-03, -1.859e-02, 8.662e-02)); + r += mul(s3_0, M4(2.241e-01, 1.599e-02, -3.007e-02, -8.278e-02, -2.343e-02, -1.323e-02, 6.153e-03, 8.030e-03, 1.988e-02, 1.870e-02, 7.620e-03, -1.035e-02, 2.443e-01, 4.061e-02, 3.123e-02, -4.152e-03)); + r += mul(s3_1, M4(-1.500e-02, -2.365e-02, -2.046e-02, 4.369e-02, 7.611e-03, -9.342e-03, 4.413e-03, -1.110e-03, -1.238e-01, -3.394e-02, -4.442e-02, 2.423e-02, -9.742e-02, -2.324e-02, -3.479e-02, 4.742e-02)); + r += mul(s3_2, M4(5.839e-03, 1.560e-02, -3.631e-03, 6.730e-03, -2.371e-03, -1.011e-02, -3.821e-03, 1.830e-03, 2.255e-02, 1.426e-02, -1.146e-02, -1.650e-02, 9.035e-03, 5.831e-03, 2.660e-03, -4.854e-03)); + r += mul(s3_3, M4(-1.694e-01, -2.771e-01, 6.449e-01, -2.979e-01, 9.108e-02, -2.277e-02, -5.309e-02, -3.552e-02, -1.626e-01, 2.544e-02, -7.033e-02, 7.145e-02, -1.334e-01, 1.008e-01, 1.121e-01, 1.733e-01)); + r += mul(s3_4, M4(-1.019e-01, 1.989e-01, -6.682e-02, -7.066e-02, -3.795e-02, 1.362e-01, 4.307e-02, -4.383e-02, 6.286e-01, -3.881e-01, 1.970e-01, -3.421e-01, -5.374e-03, -2.446e-01, -8.874e-02, -4.099e-01)); + r += mul(s3_5, M4(1.279e-02, -1.406e-02, 7.997e-03, 1.743e-02, 2.251e-02, -4.285e-02, -2.154e-03, -1.441e-02, -2.329e-02, 1.667e-02, 4.333e-02, 1.229e-01, -2.284e-03, -2.450e-02, -8.000e-03, -1.712e-02)); + r += mul(s3_6, M4(7.251e-02, 9.488e-03, -1.511e-01, -6.947e-02, -2.728e-02, 7.342e-03, 2.289e-02, 1.443e-02, 1.492e-02, -8.903e-03, -5.817e-02, -4.836e-02, -1.677e-03, 1.964e-02, -6.858e-03, -1.328e-02)); + r += mul(s3_7, M4(-8.618e-02, -5.596e-02, -1.276e-01, -1.230e-01, 4.851e-03, -5.676e-02, 2.939e-02, -4.192e-02, -2.508e-02, 4.430e-02, 1.352e-01, 2.072e-02, -8.584e-03, -3.983e-02, 1.177e-02, -4.721e-02)); + r += mul(s3_8, M4(6.050e-03, -3.781e-04, -3.124e-03, -1.667e-02, -1.291e-02, -1.315e-02, -2.106e-02, -5.240e-03, 1.412e-02, -2.504e-02, 3.138e-02, -2.989e-02, -6.363e-03, -1.480e-04, 1.157e-03, 1.933e-02)); + r += V4(-8.480e-04, -1.222e-04, -8.629e-04, -1.828e-04); + return tanh(r); +} + +void Pass6(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-6x8C-NVL-DN.hlsl b/src/Effects/CuNNy/CuNNy-6x8C-NVL-DN.hlsl new file mode 100644 index 000000000..ce86c52a4 --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-6x8C-NVL-DN.hlsl @@ -0,0 +1,1247 @@ +// CuNNy 6x8C BILINEAR RGB NVL DN - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-DN-D08N06 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t2; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t3; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0, t1 + +#define l0(x, y) min16float((dot(float3(-2.035e-01, -4.051e-01, -9.041e-02), O(INPUT, float2(x, y)).rgb) + 4.315e-01)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-5.333e-02, 1.506e-02, -4.863e-02, 5.352e-03) * s0_0; + r += V4(3.064e-02, -1.241e-03, 3.831e-02, 1.406e-01) * s0_1; + r += V4(-3.234e-03, 3.668e-03, -1.982e-02, -1.101e-01) * s0_2; + r += V4(-4.854e-01, 5.950e-01, -9.253e-02, -3.601e-01) * s0_3; + r += V4(-1.239e-01, 1.942e-01, 4.939e-01, 5.723e-01) * s0_4; + r += V4(-1.746e-02, 8.397e-04, 4.267e-02, -7.863e-02) * s0_5; + r += V4(5.387e-01, -5.918e-01, -5.641e-02, 2.018e-02) * s0_6; + r += V4(9.656e-02, -2.095e-01, -1.788e-01, -1.900e-01) * s0_7; + r += V4(1.780e-02, -3.460e-03, -1.204e-01, -1.629e-02) * s0_8; + r += V4(-8.698e-03, -1.051e-02, -2.456e-02, 8.033e-03); + return r; +} + +V4 f1(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(1.238e-02, 8.036e-03, 3.125e-01, 1.440e-01) * s0_0; + r += V4(-5.165e-02, -8.626e-03, -3.096e-01, 4.477e-02) * s0_1; + r += V4(3.169e-04, 1.974e-02, -2.705e-01, -5.500e-02) * s0_2; + r += V4(-2.838e-02, 3.386e-03, 1.727e-01, 4.413e-01) * s0_3; + r += V4(1.497e-01, 1.326e-02, 3.072e-01, -5.548e-01) * s0_4; + r += V4(1.428e-04, -3.819e-01, -2.438e-01, 5.685e-03) * s0_5; + r += V4(-3.416e-02, 2.989e-03, 2.969e-02, 2.118e-02) * s0_6; + r += V4(5.299e-01, 4.041e-03, -9.012e-02, -7.878e-02) * s0_7; + r += V4(-5.243e-02, 9.452e-03, 7.145e-02, 3.160e-02) * s0_8; + r += V4(1.285e-01, 1.293e-01, -1.923e-02, 1.313e-02); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.415e-01, -1.943e-02, -8.607e-02, 1.720e-02, -5.503e-02, -7.077e-02, 6.357e-02, 1.637e-01, -3.997e-04, 3.993e-01, 1.124e-02, -7.681e-02, 2.275e-01, 2.744e-01, 3.333e-02, -7.161e-02)); + r += mul(s0_1, M4(9.960e-02, 3.193e-01, 1.675e-01, -3.303e-01, 2.128e-01, -4.128e-01, -2.347e-01, 7.342e-01, 1.727e-01, 3.478e-02, -1.685e-01, 2.954e-01, 3.302e-01, -4.103e-01, -1.933e-01, 6.543e-01)); + r += mul(s0_2, M4(3.761e-01, -1.545e-01, -1.035e-01, 4.556e-01, 2.239e-01, 2.557e-01, -2.634e-01, -3.533e-01, 1.636e-01, -6.189e-02, 7.688e-02, 4.012e-01, 2.164e-01, -1.716e-01, -1.205e-01, 8.663e-02)); + r += mul(s0_3, M4(-1.181e-02, 1.876e-01, -6.568e-02, -5.398e-02, 1.158e-01, -1.222e-01, 5.085e-02, 2.418e-01, 1.176e-01, -3.773e-01, -2.300e-02, 1.588e-02, 3.060e-02, 6.070e-02, -7.153e-02, 1.807e-04)); + r += mul(s0_4, M4(2.148e-01, -3.282e-01, -1.202e-01, -5.740e-02, -8.745e-02, 4.563e-01, 1.371e-01, -2.197e-02, 1.796e-01, -8.210e-02, 4.639e-01, -2.013e-02, -1.657e-01, 1.274e-01, 1.544e-01, -3.108e-01)); + r += mul(s0_5, M4(5.544e-01, 1.682e-01, -5.642e-01, 2.636e-01, -5.510e-01, -1.371e-01, 1.450e-01, -4.443e-01, 1.705e-01, -1.304e-01, -2.385e-02, -7.088e-02, -3.489e-01, 6.447e-02, 6.309e-02, -1.137e-01)); + r += mul(s0_6, M4(-1.218e-01, -2.795e-02, -1.160e-01, 8.723e-02, -2.646e-02, -2.134e-01, 1.170e-02, 1.284e-01, -1.001e-02, 1.919e-01, -9.593e-02, -2.529e-01, -8.665e-02, 1.473e-01, 3.237e-01, -9.264e-02)); + r += mul(s0_7, M4(-2.280e-01, -1.011e-01, -1.681e-01, -1.644e-01, 7.984e-02, 4.135e-02, 9.764e-02, 1.405e-01, -3.485e-01, 2.078e-01, 8.260e-02, -6.538e-02, -1.472e-01, -2.301e-02, -1.911e-02, -6.720e-04)); + r += mul(s0_8, M4(-2.326e-01, -1.645e-01, -1.724e-01, -1.018e-01, 1.470e-01, 2.117e-01, 3.018e-01, -7.956e-02, -1.260e-01, -2.090e-01, -7.409e-02, 8.810e-03, 7.153e-02, 1.818e-02, 8.868e-03, -1.993e-03)); + r += mul(s1_0, M4(-4.601e-02, -1.224e-01, 1.492e-01, 1.063e-01, 1.004e-01, 3.807e-02, -7.326e-02, 3.358e-02, 2.690e-02, 3.908e-02, 7.352e-02, -1.512e-01, -1.342e-02, 3.690e-01, -2.220e-01, -9.966e-02)); + r += mul(s1_1, M4(-4.769e-01, 1.653e-01, -2.428e-01, 3.893e-02, 8.017e-01, -5.618e-01, 7.747e-02, 2.968e-01, 2.540e-01, -3.845e-01, -2.625e-01, 4.699e-01, 4.378e-01, -9.630e-01, -5.949e-02, 8.363e-01)); + r += mul(s1_2, M4(-4.463e-01, -1.382e-01, -3.918e-01, -4.812e-01, 3.207e-01, -2.003e-01, 1.050e-01, 1.800e-01, 2.242e-01, -1.199e-01, -4.862e-02, 1.616e-01, 1.341e-01, -4.310e-01, -1.937e-01, 2.326e-01)); + r += mul(s1_3, M4(2.147e-02, -2.783e-01, 8.845e-02, 2.262e-01, 1.024e-01, 2.714e-01, -4.063e-02, -7.636e-03, 1.980e-02, -1.420e-01, -2.109e-02, 3.946e-02, 8.302e-02, 5.234e-01, -2.883e-01, -1.296e-01)); + r += mul(s1_4, M4(-5.593e-02, 7.059e-02, -1.420e-01, 8.657e-02, 2.728e-01, -8.770e-02, 1.477e-01, -1.427e-01, -1.250e-01, 6.256e-02, -3.762e-01, -1.046e-01, -3.094e-01, 5.179e-01, -3.617e-02, -2.885e-01)); + r += mul(s1_5, M4(-1.382e-01, 2.495e-02, 3.461e-01, -5.939e-01, 3.019e-01, -8.266e-02, -3.544e-01, 2.824e-01, -1.029e-01, -3.089e-04, 1.307e-01, -6.184e-02, -1.293e-01, -4.540e-02, -3.406e-01, -1.988e-01)); + r += mul(s1_6, M4(-6.472e-02, -2.037e-01, -1.320e-02, 1.285e-01, -1.378e-01, -3.275e-02, -8.836e-02, 9.191e-02, 1.123e-01, 1.736e-01, -2.116e-01, 2.340e-02, -1.655e-01, 1.227e-01, 2.700e-01, -1.485e-01)); + r += mul(s1_7, M4(1.229e-01, 2.141e-02, 1.523e-01, 7.696e-02, -1.830e-01, -1.036e-01, -9.236e-02, -1.558e-01, -2.906e-01, -3.243e-02, -2.723e-02, -8.754e-02, -2.951e-01, 2.923e-02, -9.960e-02, -9.770e-03)); + r += mul(s1_8, M4(9.990e-02, 3.377e-01, 1.933e-01, -1.242e-01, -1.515e-01, -6.217e-02, 4.245e-02, -6.189e-02, -9.236e-02, -2.214e-01, -6.575e-02, 6.456e-03, 1.795e-02, 4.278e-02, -6.188e-02, 1.573e-02)); + r += mul(s2_0, M4(1.491e-01, -9.449e-02, -1.071e-01, 3.575e-02, -6.811e-04, -4.960e-02, -5.181e-04, 7.298e-02, 3.839e-02, -2.178e-02, -1.703e-02, 1.046e-01, -4.208e-02, 9.251e-03, 3.523e-02, -2.881e-01)); + r += mul(s2_1, M4(3.309e-02, 6.440e-02, 2.337e-01, 1.524e-01, -1.725e-02, 3.153e-02, 9.155e-03, -5.480e-02, 4.245e-02, -1.042e-01, -1.053e-01, 1.759e-01, -1.713e-01, -1.269e-01, -1.378e-02, -6.438e-02)); + r += mul(s2_2, M4(-1.648e-01, -6.306e-03, 8.299e-02, -1.812e-01, 6.679e-02, -6.670e-02, -8.122e-02, 1.284e-01, -7.879e-02, 9.292e-02, 7.607e-02, -2.007e-01, 2.088e-02, 5.058e-02, -1.943e-01, 1.150e-01)); + r += mul(s2_3, M4(9.643e-02, 2.546e-01, -5.823e-02, 6.665e-02, -6.931e-02, 1.891e-02, 1.862e-01, 1.711e-02, 1.426e-01, 2.191e-01, -2.335e-01, 2.214e-01, -5.269e-02, 2.464e-02, -9.016e-02, -7.974e-02)); + r += mul(s2_4, M4(3.501e-02, -1.022e-02, 1.990e-01, -7.641e-02, -1.492e-01, 1.836e-01, 8.877e-02, -2.226e-01, -2.686e-01, 1.809e-01, 2.979e-01, -2.644e-01, 1.681e-01, -3.821e-01, -1.366e-01, 7.164e-01)); + r += mul(s2_5, M4(2.277e-01, -1.757e-01, -1.340e-02, 1.040e-01, -7.691e-02, 1.556e-01, -1.029e-02, -9.558e-02, 2.705e-01, 4.410e-02, 1.649e-01, 6.361e-02, 1.790e-01, 2.952e-01, -5.692e-02, -3.225e-02)); + r += mul(s2_6, M4(-5.395e-02, 8.925e-02, 6.361e-02, -4.226e-02, -3.947e-02, -1.783e-01, -3.393e-02, 5.163e-02, 4.873e-02, 2.768e-01, -1.197e-01, -1.120e-01, -1.770e-02, 9.506e-02, 1.098e-01, -1.090e-01)); + r += mul(s2_7, M4(-2.177e-01, 7.495e-02, 8.441e-02, -1.043e-01, 2.036e-01, -1.763e-01, 1.748e-01, 1.660e-01, 1.121e-01, -2.253e-01, -1.790e-01, -1.439e-01, -1.083e-01, 5.504e-02, 2.336e-01, -9.936e-02)); + r += mul(s2_8, M4(-1.184e-01, -8.465e-02, 5.144e-02, 2.009e-02, 7.120e-02, 9.413e-02, 4.370e-02, -5.484e-02, -1.443e-01, -8.664e-02, -4.848e-02, 1.242e-01, 5.112e-02, 2.200e-01, -5.659e-03, 5.474e-02)); + r += mul(s3_0, M4(6.419e-01, -3.023e-01, 2.127e-01, 3.398e-01, -1.325e+00, -2.531e-01, -9.418e-01, -1.673e+00, -2.186e-02, -1.045e-01, -4.623e-02, 9.273e-02, 5.411e-02, 2.154e-01, 1.728e-01, -1.901e-01)); + r += mul(s3_1, M4(-1.247e-01, 2.308e-01, -5.479e-01, -4.446e-02, -2.334e-01, -7.716e-02, -1.220e-01, -3.670e-01, 2.478e-02, -4.998e-02, -7.881e-02, 1.175e-01, -6.152e-01, 8.178e-02, -8.520e-02, -2.934e-01)); + r += mul(s3_2, M4(-8.153e-01, 1.575e-01, 7.752e-01, 9.207e-01, 3.695e-02, -2.903e-01, -1.513e-01, 6.705e-03, -5.714e-02, 1.421e-01, 4.701e-02, -8.445e-02, 1.149e-01, -1.975e-01, -2.647e-01, 3.480e-01)); + r += mul(s3_3, M4(3.278e-02, 4.846e-01, -6.441e-01, -5.745e-02, 1.015e+00, -5.113e-01, -2.161e+00, 9.403e-01, 1.408e-01, 2.036e-01, -8.272e-02, 1.226e-01, -1.586e-01, -6.721e-02, -1.057e-01, -5.341e-02)); + r += mul(s3_4, M4(-1.353e+00, 7.765e-01, -7.567e-01, -9.945e-01, 4.103e-01, 1.065e-01, -1.313e-02, 2.815e-01, -3.549e-02, -1.802e-01, 3.446e-01, -3.407e-01, 3.526e-01, -3.278e-01, -2.182e-01, 7.167e-01)); + r += mul(s3_5, M4(2.939e-01, 1.015e+00, -1.901e-01, 7.339e-01, -1.241e-01, 8.653e-02, 3.034e-01, 4.420e-01, 1.446e-02, 3.259e-04, 1.860e-01, -1.794e-02, -1.243e-01, -7.423e-02, -1.561e-01, -3.792e-01)); + r += mul(s3_6, M4(-2.100e-02, 1.608e-01, 6.866e-02, 8.797e-02, 6.613e-01, -5.224e-01, -9.743e-01, 1.941e-01, -3.859e-02, 9.605e-02, -1.248e-02, -4.475e-03, 8.367e-02, 2.368e-01, 1.273e-01, -9.406e-02)); + r += mul(s3_7, M4(1.635e-01, 3.213e-01, -1.108e-01, 5.477e-01, 2.577e-01, -4.309e-01, 2.367e-01, 7.649e-02, -1.462e-02, -1.543e-01, -1.300e-01, -3.833e-02, -4.089e-02, -2.145e-02, 2.984e-01, 5.385e-02)); + r += mul(s3_8, M4(1.851e-01, -4.212e-01, -1.136e-01, -1.345e-01, -2.134e-01, -4.054e-01, -2.896e-01, -5.105e-02, -4.118e-02, -6.056e-02, 1.869e-02, 6.431e-02, 2.077e-02, -1.061e-01, -5.865e-02, 2.204e-01)); + r += V4(2.009e-02, -6.020e-02, -2.095e-01, -2.428e-04); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.340e-01, -2.739e-02, -5.723e-01, 5.792e-02, -2.152e-01, 5.695e-02, 1.657e-01, 1.371e-01, -1.275e-01, 2.891e-01, -2.673e-01, 9.725e-02, -5.268e-02, 5.140e-02, -3.580e-01, 4.427e-01)); + r += mul(s0_1, M4(2.613e-01, 3.597e-01, -3.318e-01, -1.844e-02, -1.190e-02, -2.073e-01, -1.665e-01, -3.505e-01, 8.562e-02, -3.558e-01, 1.160e-01, 1.326e-03, -7.102e-02, -4.067e-01, 3.810e-01, 1.910e-01)); + r += mul(s0_2, M4(4.934e-02, -5.566e-01, 6.730e-01, -1.377e-01, 1.549e-02, 2.280e-01, -3.924e-01, 1.057e-01, 6.662e-03, -2.799e-01, 9.640e-02, -8.985e-02, 1.093e-02, -1.567e-01, 8.349e-02, -1.169e-01)); + r += mul(s0_3, M4(9.578e-02, -9.551e-02, 4.883e-02, 1.361e-01, -2.646e-01, 9.763e-02, -2.427e-01, 4.072e-01, -3.877e-01, 1.953e-01, 1.251e-01, 4.511e-02, 6.040e-02, 1.248e-02, -2.899e-02, 2.293e-01)); + r += mul(s0_4, M4(1.141e-01, -3.109e-01, -2.500e-01, -2.775e-01, -8.209e-02, 7.398e-02, 2.509e-01, 4.549e-02, 7.525e-02, 4.394e-01, 3.993e-01, 1.754e-01, 1.776e-01, 2.971e-01, -3.441e-01, -5.120e-01)); + r += mul(s0_5, M4(-6.703e-03, 5.090e-01, -1.500e-01, 2.635e-01, 1.989e-01, -6.816e-01, 4.591e-01, -3.330e-01, 4.645e-03, -1.490e-01, -1.983e-01, -1.401e-01, 1.173e-01, -1.492e-02, 3.411e-01, -3.166e-02)); + r += mul(s0_6, M4(-3.800e-02, -1.477e-01, 5.108e-02, 2.905e-01, -7.616e-03, -1.992e-01, -1.272e-01, 1.880e-01, -3.035e-02, 2.286e-01, 8.193e-02, 2.364e-01, -4.650e-02, 6.422e-03, 2.178e-01, -3.440e-01)); + r += mul(s0_7, M4(1.507e-02, -1.787e-01, 2.303e-01, -8.268e-02, -8.501e-02, -6.999e-02, -2.088e-01, 1.476e-01, 1.189e-01, 1.203e-01, -2.417e-01, -2.783e-01, 5.869e-02, 2.074e-01, -1.446e-01, 1.392e-01)); + r += mul(s0_8, M4(9.754e-02, 1.925e-01, -3.828e-03, -2.816e-01, -1.213e-01, -2.292e-02, 2.923e-01, -2.626e-01, -1.298e-01, -9.575e-02, 2.873e-02, -9.571e-02, 9.200e-02, -1.673e-01, -1.188e-01, 8.730e-02)); + r += mul(s1_0, M4(-2.350e-01, 1.587e-01, 3.174e-01, 1.542e-01, 1.652e-01, -9.002e-02, -4.229e-01, 9.736e-02, 5.939e-02, 3.149e-01, -1.059e-01, 1.857e-01, -3.531e-01, 2.776e-01, -2.455e-01, 4.585e-01)); + r += mul(s1_1, M4(-3.605e-01, -1.254e-01, -4.447e-01, -4.215e-01, 2.543e-01, -1.086e-02, 3.330e-01, 1.645e-01, 4.044e-02, -1.669e-01, 1.289e-01, -2.380e-02, -3.433e-01, -3.620e-01, 1.712e-01, 1.668e-01)); + r += mul(s1_2, M4(1.756e-02, -2.704e-03, -3.697e-01, 3.403e-01, -3.247e-02, -2.006e-01, -3.319e-02, -2.798e-01, -1.027e-01, -1.990e-01, -4.246e-01, 7.520e-02, 1.787e-01, -3.869e-01, 7.570e-02, -3.073e-01)); + r += mul(s1_3, M4(-8.614e-02, 6.455e-02, -1.730e-01, 5.939e-02, -1.393e-02, -2.230e-02, 1.723e-01, 4.100e-01, 1.529e-02, -1.270e-01, -1.502e-01, 3.934e-01, -3.637e-01, -6.028e-02, 1.548e-01, 3.118e-01)); + r += mul(s1_4, M4(-1.500e-01, -1.187e-01, 4.891e-01, 1.696e-01, 1.480e-01, 2.560e-01, -2.057e-01, -4.255e-01, 1.289e-01, -1.118e-02, -1.087e-02, -1.905e-01, 1.239e-01, 2.520e-02, -1.996e-02, -3.386e-01)); + r += mul(s1_5, M4(1.052e-01, -2.239e-02, 3.975e-01, -1.405e-01, -1.267e-02, 4.224e-01, -2.635e-01, 9.741e-02, 1.863e-02, -2.286e-01, 3.328e-01, -2.667e-01, -6.544e-02, -3.208e-01, -2.388e-01, 4.362e-02)); + r += mul(s1_6, M4(-5.319e-03, -1.865e-01, -4.191e-02, 2.387e-01, -2.874e-02, -1.661e-01, 1.281e-01, 2.304e-01, -2.066e-01, -1.021e-01, -1.088e-01, 2.609e-01, -1.502e-01, -1.255e-01, 1.263e-01, -4.176e-01)); + r += mul(s1_7, M4(-6.257e-02, -1.212e-01, -2.666e-01, 5.840e-03, 1.525e-01, 2.046e-01, 2.017e-01, 4.602e-02, 3.198e-02, 9.142e-02, 6.658e-02, -1.799e-01, 5.912e-02, 2.451e-01, 4.026e-01, 2.306e-02)); + r += mul(s1_8, M4(-3.453e-02, 1.717e-01, 1.058e-01, -2.543e-01, -8.375e-03, 1.824e-03, -1.615e-01, -2.510e-01, 4.237e-02, -7.161e-02, 1.259e-01, -9.696e-02, 3.739e-02, 2.963e-02, -3.003e-01, 5.154e-02)); + r += mul(s2_0, M4(-3.399e-02, 4.481e-02, -1.857e-01, 2.356e-01, -3.322e-02, -5.841e-02, 6.648e-02, -1.404e-01, -1.145e-02, -6.816e-02, -1.611e-02, 5.459e-02, -5.165e-03, 2.280e-01, -7.486e-02, -1.055e-01)); + r += mul(s2_1, M4(5.683e-02, -1.365e-01, -7.714e-03, 2.856e-02, 5.374e-02, -1.428e-01, 1.258e-01, -7.951e-02, 9.614e-02, -2.378e-01, 2.039e-01, -1.078e-01, -5.929e-02, 5.041e-02, -1.935e-01, -1.077e-01)); + r += mul(s2_2, M4(-7.949e-02, 3.859e-02, 1.619e-01, -1.051e-01, 8.312e-02, 2.784e-02, -1.049e-01, 9.604e-02, -9.492e-02, 1.578e-01, -4.464e-02, 1.584e-03, -2.066e-03, 5.707e-02, -2.157e-02, 8.817e-02)); + r += mul(s2_3, M4(1.038e-01, 8.953e-02, 5.480e-02, 2.716e-01, -7.691e-02, -3.918e-02, -9.394e-02, 9.078e-02, -1.943e-01, -7.785e-02, -2.547e-01, 8.458e-01, -1.954e-02, 1.182e-01, -1.381e-01, -3.832e-01)); + r += mul(s2_4, M4(-2.084e-01, 3.375e-02, -1.733e-01, -7.274e-02, -9.431e-02, 1.635e-01, -1.894e-01, -3.871e-02, 4.172e-02, 2.193e-01, -3.097e-01, -4.415e-01, -1.866e-01, -3.925e-01, 2.134e-01, 6.298e-01)); + r += mul(s2_5, M4(1.212e-01, -5.359e-02, -2.222e-02, -2.707e-01, -9.405e-02, 6.387e-02, 8.948e-03, 1.820e-01, -8.107e-02, 1.164e-01, -2.881e-01, -8.921e-02, -1.152e-01, 5.029e-01, -5.062e-01, 7.837e-02)); + r += mul(s2_6, M4(-5.824e-02, 9.913e-02, 9.971e-02, 1.557e-01, 7.915e-02, -8.769e-02, 7.980e-02, -3.432e-02, -1.692e-01, -9.786e-02, 1.282e-01, 1.709e-01, -1.265e-01, -6.808e-02, 7.225e-02, 1.675e-01)); + r += mul(s2_7, M4(-4.990e-02, -6.602e-03, -1.068e-01, -1.430e-02, 8.055e-02, -2.758e-02, -6.937e-02, -4.834e-02, -1.227e-02, 8.519e-02, 7.793e-01, -1.867e-01, -3.421e-01, -7.152e-02, -1.059e-01, 1.740e-01)); + r += mul(s2_8, M4(8.256e-02, -7.590e-03, 6.570e-02, -1.256e-01, -8.610e-02, 2.026e-01, -3.546e-02, 1.104e-01, 1.248e-01, -1.588e-01, 5.464e-01, -6.521e-02, 1.475e-01, -1.531e-01, 3.597e-01, -1.642e-01)); + r += mul(s3_0, M4(6.340e-01, 3.940e-01, 1.113e+00, 1.007e+00, -1.212e+00, 2.533e-01, -1.397e+00, 6.065e-01, -7.533e-02, -2.773e-02, -1.402e-02, 4.141e-02, 8.358e-02, 7.306e-02, 7.087e-03, -1.155e-01)); + r += mul(s3_1, M4(1.419e+00, 4.487e-01, -7.031e-01, -8.197e-01, 2.210e-01, -1.184e+00, 8.590e-01, -4.940e-01, 1.331e-01, -3.564e-01, 2.108e-01, -8.584e-02, -2.572e-01, -1.731e-02, 2.636e-01, -1.133e-01)); + r += mul(s3_2, M4(9.915e-01, -7.961e-01, 5.330e-01, -4.510e-01, -5.253e-01, 1.903e-01, -4.662e-01, 3.206e-01, 2.827e-02, 2.187e-02, -3.303e-02, 1.886e-02, -1.535e-02, -1.334e-02, 6.018e-03, 1.582e-01)); + r += mul(s3_3, M4(4.868e-02, 5.590e-01, 5.528e-02, 4.772e-01, 2.589e-01, -4.851e-01, 9.148e-01, 7.832e-01, -3.434e-01, -5.368e-02, -1.723e-01, 1.019e+00, 2.202e-01, 1.859e-01, 5.046e-03, -7.167e-01)); + r += mul(s3_4, M4(2.415e+00, 1.390e+00, 6.607e-01, -2.473e-01, 2.204e-01, -1.690e-01, -6.748e-01, -6.208e-01, 5.492e-02, 1.191e-01, -3.462e-01, -4.150e-01, 2.729e-01, -2.437e-01, 1.929e-01, 1.347e-01)); + r += mul(s3_5, M4(9.691e-01, -8.869e-01, 3.704e-01, -5.191e-02, -1.921e-02, 1.358e+00, -4.369e-01, 4.558e-01, -5.294e-02, 3.324e-02, -1.043e-01, -1.216e-01, 1.593e-01, 5.623e-01, -4.612e-01, -1.042e-01)); + r += mul(s3_6, M4(-1.293e-01, 4.959e-01, 1.363e-01, 5.027e-02, -5.043e-01, -4.472e-01, -6.873e-02, -6.303e-02, -2.425e-01, 1.265e-01, -1.273e-01, 2.420e-01, 6.472e-03, 5.157e-02, 1.899e-01, 6.324e-02)); + r += mul(s3_7, M4(-2.009e-01, 5.446e-01, -2.478e-01, -9.895e-03, 6.806e-02, -3.905e-01, -3.134e-01, -4.657e-01, -9.783e-02, -1.021e-01, 3.844e-01, -1.041e-01, -4.335e-03, 3.746e-01, -1.352e-01, 5.207e-02)); + r += mul(s3_8, M4(3.874e-01, 2.981e-02, 2.205e-01, -1.412e-01, 3.674e-01, 8.717e-01, -6.576e-01, 4.285e-01, -4.677e-02, 1.490e-02, 1.870e-01, -1.712e-02, 1.799e-01, 2.057e-01, -9.212e-02, -6.896e-02)); + r += V4(-1.228e-02, -4.362e-02, 4.248e-02, -2.610e-02); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.867e-01, -2.287e-03, -1.191e-01, -1.249e-02, -8.376e-02, 2.333e-01, 3.011e-02, 8.727e-02, -1.528e-01, 4.522e-02, -1.092e-01, 1.235e-01, 1.179e-01, 1.085e-01, 2.075e-01, -4.524e-02)); + r += mul(s0_1, M4(-6.251e-02, 9.622e-02, -1.299e-01, -8.819e-02, 8.242e-02, -4.033e-01, -1.512e-01, -8.449e-02, -2.673e-01, 3.868e-01, 2.615e-02, 1.069e-01, 2.055e-01, -1.800e-01, 7.375e-03, 8.391e-03)); + r += mul(s0_2, M4(1.410e-01, -8.185e-02, 2.465e-02, 3.546e-02, -7.339e-02, -4.492e-02, -6.633e-02, -1.005e-02, 5.993e-02, 5.536e-02, -6.278e-03, -8.258e-02, -1.498e-01, 5.872e-02, 1.560e-01, -7.461e-02)); + r += mul(s0_3, M4(-2.116e-01, 2.332e-01, 2.930e-01, -1.691e-01, -9.505e-02, -8.336e-02, 1.302e-01, -8.985e-03, 1.795e-01, 4.858e-01, -5.144e-02, 1.663e-01, 2.647e-01, -3.083e-01, -1.612e-01, 9.123e-02)); + r += mul(s0_4, M4(2.233e-01, 3.415e-01, 2.541e-01, 1.521e-01, 2.939e-01, 5.545e-02, 2.474e-01, -1.522e-01, 1.945e-01, 1.302e-01, 6.035e-02, 1.204e-01, -5.938e-02, -1.101e-01, -9.751e-02, -2.144e-01)); + r += mul(s0_5, M4(-1.567e-01, 7.505e-02, 7.642e-02, -1.079e-01, 3.845e-02, -9.225e-02, 1.250e-02, -9.117e-02, 3.753e-01, -1.417e-01, -7.198e-02, 3.818e-01, -5.320e-02, -2.646e-01, 2.586e-03, 1.312e-01)); + r += mul(s0_6, M4(-9.201e-02, 1.449e-01, 2.745e-01, -2.933e-02, 8.872e-02, -7.589e-02, 1.074e-01, -1.002e-01, -8.457e-03, -3.762e-01, 5.415e-02, 1.822e-01, 2.688e-01, -2.326e-01, -5.302e-01, -6.336e-02)); + r += mul(s0_7, M4(1.119e-01, -1.561e-02, -6.547e-03, -9.972e-02, -1.073e-01, 9.063e-02, 3.843e-02, 6.815e-02, 3.779e-01, -1.196e-01, -1.353e-01, -2.813e-02, 1.060e-01, 2.387e-01, 1.267e-02, 1.083e-01)); + r += mul(s0_8, M4(-9.346e-02, 1.792e-02, -1.428e-01, 3.228e-02, -1.043e-01, -1.120e-01, -3.488e-03, -4.356e-02, -2.876e-01, -7.053e-02, -4.502e-02, -5.783e-02, -2.254e-01, 5.783e-02, 3.232e-01, 7.605e-04)); + r += mul(s1_0, M4(9.940e-02, 9.588e-03, -3.584e-01, 1.252e-01, -1.917e-01, 1.258e-01, 2.297e-01, 1.390e-01, 4.639e-02, -1.121e-01, -1.118e-01, -4.381e-02, -1.695e-01, 1.821e-01, 2.465e-01, -9.049e-02)); + r += mul(s1_1, M4(5.143e-02, 1.069e-02, -8.068e-02, -8.982e-02, -2.494e-02, -7.513e-02, 1.598e-02, 3.467e-02, -1.174e-01, 2.261e-01, -8.818e-02, -1.923e-02, -3.776e-02, -3.819e-01, -8.594e-02, 4.435e-03)); + r += mul(s1_2, M4(-3.623e-02, -5.439e-02, -9.351e-03, 1.235e-01, -9.703e-03, -1.335e-01, 1.036e-01, 2.097e-02, 1.458e-02, 3.203e-02, 1.967e-01, -1.818e-02, -9.724e-02, -7.890e-02, 3.670e-02, -4.865e-02)); + r += mul(s1_3, M4(-2.127e-01, -2.056e-02, 3.076e-01, 5.685e-02, 9.937e-02, -2.134e-01, 3.166e-02, -4.301e-02, 6.663e-02, -5.974e-02, 8.903e-02, 3.410e-02, 4.111e-02, 1.169e-01, 4.898e-02, 5.654e-02)); + r += mul(s1_4, M4(1.874e-01, 1.435e-01, -4.010e-01, 6.335e-05, 3.853e-01, -4.015e-01, -4.051e-01, -2.454e-01, -2.675e-02, 2.236e-01, 2.143e-01, -4.164e-02, -2.142e-01, -1.742e-01, 4.441e-01, -2.792e-01)); + r += mul(s1_5, M4(-1.686e-01, -3.803e-02, 1.583e-01, 9.059e-02, -1.233e-01, 7.512e-02, 1.737e-01, 6.937e-02, 1.250e-01, -9.459e-02, -2.433e-01, 2.107e-01, 8.135e-02, -1.054e-01, -1.870e-01, -1.011e-01)); + r += mul(s1_6, M4(1.078e-01, -1.143e-01, 2.303e-01, 1.447e-01, -6.396e-02, -1.863e-01, -1.454e-01, -1.476e-01, 2.346e-02, -1.712e-02, 3.948e-02, -2.445e-02, -7.822e-02, 5.100e-01, 1.676e-01, 5.526e-02)); + r += mul(s1_7, M4(2.939e-01, -3.193e-01, 1.130e-01, 1.019e-01, -2.836e-01, -3.252e-01, -1.221e-01, -1.886e-01, -1.541e-02, -3.348e-02, -1.192e-01, 5.553e-02, 2.747e-01, 3.810e-01, -1.083e-01, 8.926e-02)); + r += mul(s1_8, M4(-1.914e-01, -1.810e-01, 3.616e-01, 1.443e-01, -2.356e-01, 1.987e-02, 2.206e-01, -7.203e-03, -4.873e-02, 1.449e-02, 2.429e-03, 4.135e-03, 1.601e-01, 1.436e-02, -2.503e-01, -1.747e-01)); + r += mul(s2_0, M4(5.158e-02, -5.768e-01, 1.059e-01, 6.356e-02, 5.453e-02, 1.182e-01, 1.025e-01, -4.581e-03, -4.476e-02, -7.326e-02, 3.128e-02, -1.658e-02, 7.979e-02, -1.015e-01, -7.756e-03, 5.261e-02)); + r += mul(s2_1, M4(-9.209e-02, 5.020e-04, 1.687e-01, 2.337e-01, 5.893e-02, -1.190e-02, 2.134e-01, 2.056e-01, -8.468e-02, 1.393e-01, 2.666e-01, 8.482e-02, 9.005e-03, 1.830e-01, 1.487e-01, 1.541e-01)); + r += mul(s2_2, M4(-1.577e-02, 1.800e-01, -4.463e-02, 1.857e-01, -2.505e-02, -4.244e-02, 3.534e-02, 1.091e-02, 3.562e-02, -1.613e-02, 7.525e-02, -6.411e-02, -1.120e-01, 2.570e-01, 1.793e-01, 3.232e-01)); + r += mul(s2_3, M4(1.081e-01, -3.272e-01, 9.989e-02, -1.714e-01, 1.272e-02, -8.817e-02, -6.541e-01, 9.601e-03, 5.846e-03, -1.214e-01, 4.286e-02, -1.424e-04, 2.707e-02, -1.255e-01, -1.766e-01, 7.553e-02)); + r += mul(s2_4, M4(1.707e-01, 2.406e-02, 7.128e-01, 3.512e-01, 4.649e-01, -2.976e-02, -2.139e-01, -1.608e-01, 1.348e-01, -1.933e-01, -1.625e-01, -1.502e-01, 3.457e-02, -2.310e-01, -1.418e-01, -1.607e-01)); + r += mul(s2_5, M4(2.141e-01, -3.424e-01, -2.787e-01, 1.588e-01, -8.585e-03, 1.987e-01, 8.631e-02, -2.482e-02, -1.159e-01, 3.130e-02, 6.300e-02, -1.646e-01, 2.076e-01, 3.123e-01, 7.450e-02, 8.300e-01)); + r += mul(s2_6, M4(-1.156e-01, 5.885e-01, 2.790e-01, 1.944e-01, -2.609e-02, 1.192e-01, 1.064e-01, 2.452e-01, 6.769e-02, 7.500e-02, 1.444e-01, 1.657e-02, 1.032e-04, 6.885e-02, -3.119e-01, 1.838e-02)); + r += mul(s2_7, M4(2.686e-01, -3.624e-01, -3.820e-01, -1.910e-02, 2.541e-01, -3.535e-02, 3.461e-02, 6.867e-02, 1.792e-01, -4.944e-02, 9.518e-02, -9.353e-02, 1.577e-01, -2.950e-01, -5.366e-01, -9.244e-02)); + r += mul(s2_8, M4(-5.502e-02, -3.642e-02, 2.950e-01, 1.710e-02, 1.386e-03, -4.643e-03, -5.468e-02, 1.526e-01, -3.839e-02, 2.502e-02, 6.890e-02, -5.190e-02, -3.046e-01, -8.298e-02, -2.710e-01, -4.361e-02)); + r += mul(s3_0, M4(3.470e-02, -1.794e-01, 4.939e-03, 5.871e-02, -3.644e-02, 2.163e-01, 1.659e-01, 1.729e-02, -7.065e-02, -4.364e-02, -1.401e-01, -6.392e-02, 2.933e-02, -2.708e-01, -6.419e-02, 2.552e-02)); + r += mul(s3_1, M4(2.013e-01, 1.346e-01, 1.245e-01, 2.012e-01, 1.159e-01, 2.536e-02, 3.348e-01, 2.151e-01, -1.428e-01, 3.330e-02, 5.570e-02, 1.265e-01, -1.421e-01, -2.205e-01, -2.407e-02, -1.310e-02)); + r += mul(s3_2, M4(1.897e-01, 1.382e-01, 8.547e-02, 1.685e-02, 3.906e-02, -1.645e-02, 8.177e-02, -1.090e-01, 2.720e-02, -1.483e-01, -1.471e-01, -3.796e-02, -9.755e-02, 2.098e-01, -1.363e-01, 8.741e-04)); + r += mul(s3_3, M4(-8.746e-02, -2.411e-01, -5.564e-01, 1.288e-01, -1.560e-03, -1.454e-01, -3.086e-01, -1.364e-01, -6.448e-02, 2.139e-02, -6.920e-02, 1.125e-01, 2.256e-02, -1.337e-01, 2.318e-01, -9.337e-02)); + r += mul(s3_4, M4(1.330e-01, 7.059e-02, 2.570e-01, -3.958e-02, 2.678e-01, -1.782e-01, -4.249e-01, -7.859e-02, -7.410e-03, 4.629e-02, 1.869e-01, 6.028e-02, -2.636e-01, -8.746e-01, 4.294e-01, -7.591e-01)); + r += mul(s3_5, M4(6.538e-02, 3.535e-01, 3.381e-01, 1.665e-01, -1.464e-01, -4.395e-02, 6.332e-02, 6.707e-02, 4.085e-02, 3.123e-02, -8.943e-02, -1.124e-01, 7.129e-01, -2.572e-01, -3.428e-02, -3.166e-01)); + r += mul(s3_6, M4(3.447e-02, 1.964e-02, -1.246e-02, -4.894e-02, -3.053e-02, 6.215e-02, 1.432e-01, 4.347e-02, 9.639e-02, 2.140e-01, 1.791e-01, 1.898e-01, -5.042e-02, 2.674e-01, 1.714e-01, 8.155e-02)); + r += mul(s3_7, M4(3.324e-02, -2.281e-01, 8.075e-02, 8.469e-02, 7.409e-02, 4.094e-02, 1.500e-01, 7.922e-02, 1.632e-01, 3.876e-02, -1.316e-01, 1.200e-01, 3.408e-01, 1.752e-01, -1.315e-01, 1.251e-01)); + r += mul(s3_8, M4(-1.043e-01, -2.295e-01, -2.968e-01, -7.326e-02, 9.514e-02, 1.595e-02, -2.148e-02, -7.380e-02, -3.873e-02, 5.639e-02, 5.948e-02, 6.939e-02, 4.680e-01, 2.191e-01, -1.091e-01, -4.211e-02)); + r += V4(3.966e-02, 2.168e-04, 3.712e-03, 7.866e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.510e-01, 1.269e-02, -1.267e-01, 1.446e-01, 3.963e-02, 3.673e-02, -3.129e-02, 2.499e-02, 1.567e-02, 1.863e-01, -4.694e-03, -1.522e-01, -1.215e-01, 1.422e-01, 1.161e-01, -1.114e-01)); + r += mul(s0_1, M4(-2.120e-02, 1.988e-01, 1.436e-01, -1.165e-01, 1.754e-01, 6.868e-02, -1.784e-01, 1.453e-02, 1.257e-02, -3.559e-01, 6.345e-02, 7.528e-02, -1.168e-01, -3.730e-01, -1.436e-02, 2.597e-02)); + r += mul(s0_2, M4(-1.577e-01, 4.407e-03, -3.120e-02, -3.013e-03, -1.327e-01, -1.630e-01, -8.175e-02, -1.577e-02, -2.674e-01, -5.831e-02, 1.139e-01, -9.667e-02, 1.499e-01, -3.017e-02, -7.416e-02, 8.048e-02)); + r += mul(s0_3, M4(-3.099e-01, 1.730e-01, 4.941e-02, 3.643e-01, 5.996e-03, -1.379e-01, 3.862e-01, 1.959e-01, 3.855e-01, 2.959e-01, -1.075e-01, -3.195e-01, 5.520e-02, -2.633e-02, 6.609e-01, -2.863e-01)); + r += mul(s0_4, M4(-1.581e-01, 5.127e-01, 5.755e-01, -1.758e-02, 2.351e-02, -1.945e-01, -3.701e-01, -2.035e-01, 1.510e+00, -6.021e-01, 2.540e-01, 1.989e-03, -2.473e-01, 6.479e-01, -1.236e-01, -2.509e-02)); + r += mul(s0_5, M4(6.567e-02, 4.649e-02, 1.349e-01, 4.919e-02, 4.185e-02, -1.002e-02, 9.981e-02, -2.859e-02, -2.278e-01, 4.727e-01, 1.868e-01, -2.588e-01, 8.428e-02, 7.575e-03, -3.525e-01, 3.996e-03)); + r += mul(s0_6, M4(-1.744e-01, -9.151e-02, 1.860e-01, 2.731e-01, -6.986e-02, -3.020e-02, -1.869e-01, -1.327e-02, -1.042e-01, -2.540e-02, -2.241e-01, 3.422e-01, -1.022e-01, 1.238e-01, 2.314e-01, -3.349e-01)); + r += mul(s0_7, M4(-2.791e-02, 1.601e-01, 2.001e-01, -7.415e-02, 1.847e-01, -9.655e-02, -3.229e-02, 2.887e-02, -9.137e-02, -3.329e-02, 2.700e-01, 9.334e-02, 9.282e-02, -6.146e-02, 1.428e-01, 1.684e-01)); + r += mul(s0_8, M4(1.795e-01, -2.872e-02, 3.678e-02, 6.662e-02, -1.044e-01, -3.932e-02, 2.314e-02, -7.340e-02, 9.445e-02, 1.306e-01, -2.439e-01, 9.100e-02, 3.984e-02, -9.294e-02, -6.069e-02, -2.545e-02)); + r += mul(s1_0, M4(4.324e-01, 2.259e-01, -4.051e-02, 4.797e-03, -1.294e-01, 2.765e-02, 1.875e-01, 5.255e-02, -1.390e-02, 2.073e-01, 6.489e-02, -1.416e-01, -2.698e-01, 2.849e-02, 1.129e-01, 6.424e-02)); + r += mul(s1_1, M4(-1.527e-01, -3.081e-01, -2.367e-01, -1.744e-01, -1.040e-02, -1.817e-01, 1.216e-01, -5.997e-03, -1.056e-01, 1.047e-02, -9.173e-02, 6.123e-02, 1.587e-01, 3.438e-01, 8.663e-02, 1.487e-01)); + r += mul(s1_2, M4(-1.173e-01, -1.156e-01, -4.805e-02, 5.150e-02, -8.245e-02, 7.996e-02, -1.812e-01, 1.072e-01, -4.140e-02, -1.859e-01, 9.617e-03, 9.194e-02, 9.678e-03, 4.886e-02, -7.522e-02, 1.882e-02)); + r += mul(s1_3, M4(4.124e-02, -1.781e-01, -8.895e-01, 1.978e-02, -1.229e-01, -1.255e-01, 1.666e-01, 3.719e-01, -1.012e-01, -4.286e-02, 8.884e-02, -1.577e-01, -1.258e-01, 1.669e-01, 4.306e-01, 2.863e-01)); + r += mul(s1_4, M4(-3.640e-01, 7.624e-02, -5.308e-01, -3.999e-02, 6.426e-02, -2.244e-01, -2.932e-01, -2.347e-01, 3.475e-01, -1.236e-01, 1.469e-01, 8.772e-02, 9.278e-02, 5.106e-01, -7.446e-02, -5.061e-02)); + r += mul(s1_5, M4(2.733e-01, 2.298e-01, -4.516e-02, 1.068e-01, -6.146e-02, 1.248e-01, 1.512e-01, -1.409e-01, -2.574e-02, 1.303e-01, -6.689e-02, -1.138e-01, 3.539e-02, 3.450e-03, -3.933e-02, -2.456e-02)); + r += mul(s1_6, M4(-1.740e-01, 1.962e-01, -2.318e-01, 4.245e-02, 1.837e-01, 1.318e-02, 6.216e-02, -8.214e-02, 3.329e-02, 4.938e-02, -1.139e-01, 2.849e-02, 7.127e-02, -1.978e-01, -6.718e-02, 3.379e-01)); + r += mul(s1_7, M4(1.036e-01, -1.074e-01, 1.211e-01, -1.502e-01, 3.505e-01, 2.469e-02, 3.160e-02, 1.840e-01, -5.042e-02, 1.114e-02, 1.469e-02, -2.675e-02, -6.888e-02, -1.076e-01, -1.764e-01, -1.250e-03)); + r += mul(s1_8, M4(-1.227e-01, 3.169e-02, -1.346e-01, 2.663e-02, -3.523e-01, -4.952e-02, 2.526e-02, -3.065e-01, 1.105e-01, -1.705e-02, -2.442e-02, 4.761e-02, 2.978e-01, -1.184e-01, -2.559e-02, -1.439e-01)); + r += mul(s2_0, M4(-6.434e-02, -1.450e-01, 6.691e-02, -1.498e-02, -2.586e-01, -1.415e-01, 2.408e-02, 1.405e-02, -9.741e-02, -2.278e-02, -7.323e-03, 1.029e-01, 1.971e-01, -7.889e-02, 1.621e-01, -1.481e-01)); + r += mul(s2_1, M4(7.306e-02, 8.261e-03, -1.106e-01, -6.119e-02, 9.048e-02, 3.668e-01, -4.932e-02, 1.419e-01, -1.065e-01, 1.921e-01, -6.933e-03, 1.397e-01, 1.131e-01, -2.959e-01, 2.377e-01, -8.244e-02)); + r += mul(s2_2, M4(-6.484e-02, -8.438e-02, -8.452e-02, 7.652e-02, 1.060e-01, 2.751e-01, -4.448e-02, 6.228e-02, 2.452e-02, 1.715e-01, -2.210e-01, 1.880e-01, -1.089e-01, -4.575e-01, 1.402e-01, 2.589e-02)); + r += mul(s2_3, M4(5.019e-02, 1.179e-01, -3.322e-01, -1.521e-01, 1.682e-01, 5.881e-02, -4.209e-01, -8.599e-02, 1.338e-02, 1.764e-02, -1.383e-01, 7.389e-02, 3.577e-02, 7.338e-02, -2.762e-01, -9.510e-02)); + r += mul(s2_4, M4(-1.853e-01, -4.620e-01, -1.324e-01, -2.907e-01, 4.004e-01, 6.572e-02, -3.924e-01, 3.585e-02, 1.534e-02, 2.261e-01, -1.448e-01, 7.334e-02, -4.871e-01, 2.474e-01, 6.138e-01, 1.618e-01)); + r += mul(s2_5, M4(-3.986e-02, -5.998e-02, -1.165e-01, 2.199e-01, 1.416e-01, -2.026e-01, -1.034e-01, -8.049e-03, 2.830e-01, 1.312e-03, -1.952e-01, 1.926e-01, 1.404e-01, -1.712e-01, 2.704e-01, 5.702e-02)); + r += mul(s2_6, M4(1.954e-01, 1.098e-01, -4.060e-01, 6.263e-01, 9.565e-02, 3.321e-02, -9.177e-02, 1.727e-01, -3.629e-02, -3.498e-02, -1.498e-01, -1.020e-02, 7.289e-02, -1.008e-01, -6.138e-02, -7.229e-02)); + r += mul(s2_7, M4(4.694e-01, 4.689e-02, -2.026e-02, -1.420e-01, -1.815e-01, 1.337e-01, -3.029e-03, 8.901e-03, -7.604e-02, 6.520e-03, -9.244e-02, 1.180e-01, 1.924e-01, 4.576e-03, 9.215e-02, 1.393e-01)); + r += mul(s2_8, M4(-2.694e-01, -7.673e-02, -3.907e-02, 2.054e-01, -7.508e-02, 4.244e-02, 8.589e-03, -2.004e-02, 1.029e-01, -2.785e-04, -1.329e-01, 1.480e-01, 1.240e-01, 1.682e-01, 4.375e-02, 5.794e-02)); + r += mul(s3_0, M4(1.908e-01, -9.550e-02, -6.231e-02, -8.961e-02, -2.150e-01, -5.266e-02, -5.877e-02, 6.574e-02, 1.206e-01, -5.647e-02, 1.819e-01, 9.125e-03, -1.411e-02, -7.630e-02, 1.694e-01, -6.670e-02)); + r += mul(s3_1, M4(-7.247e-02, -1.664e-02, 3.111e-02, -8.914e-02, 1.373e-02, -7.042e-03, 2.124e-01, 8.056e-03, 1.248e-01, 1.013e-01, 1.053e-01, 8.886e-02, -3.534e-01, -5.092e-02, -8.903e-02, 1.120e-01)); + r += mul(s3_2, M4(-1.274e-01, 1.296e-01, 6.739e-02, -1.061e-01, 1.488e-02, 1.174e-01, -2.220e-02, 3.190e-02, -8.067e-03, 6.182e-03, 3.938e-02, -1.144e-02, 1.720e-02, 2.232e-02, -1.033e-01, 1.938e-01)); + r += mul(s3_3, M4(2.119e-01, -7.107e-03, -1.771e-01, 3.717e-03, -9.809e-02, -2.060e-01, -1.242e-01, 1.331e-01, 1.656e-01, -4.429e-02, 3.430e-01, -1.180e-01, -8.243e-02, 2.318e-01, -3.745e-01, 2.954e-02)); + r += mul(s3_4, M4(5.462e-02, 2.234e-01, 5.211e-02, 4.298e-02, 2.002e-01, -6.029e-02, 8.922e-02, -1.422e-01, 4.850e-03, 2.692e-02, 1.883e-03, -5.867e-02, -1.761e-01, -2.994e-01, -1.180e-01, 5.357e-01)); + r += mul(s3_5, M4(-1.020e-01, -9.731e-03, 4.965e-02, 1.040e-01, 3.933e-02, -7.445e-02, -8.691e-02, 2.101e-02, 1.135e-01, -4.423e-02, 7.911e-02, -7.177e-04, 7.153e-03, -2.231e-01, -2.036e-01, 2.961e-01)); + r += mul(s3_6, M4(-8.583e-02, -1.500e-01, -4.673e-02, -1.242e-01, 5.128e-02, 1.491e-03, 2.211e-01, 4.382e-03, -1.899e-01, -1.419e-02, 1.903e-01, -1.301e-01, 6.635e-02, -2.247e-02, -5.734e-02, 3.099e-01)); + r += mul(s3_7, M4(1.511e-01, -1.528e-01, 1.166e-01, 1.147e-01, -3.419e-01, 3.498e-02, 1.819e-02, -1.488e-01, -1.092e-01, -4.590e-02, 2.240e-01, -2.029e-01, 3.950e-02, 2.020e-02, 1.403e-02, 4.349e-01)); + r += mul(s3_8, M4(4.921e-02, 8.799e-02, -6.688e-03, 9.084e-02, 3.443e-02, 3.622e-02, 8.327e-02, -2.267e-02, -1.895e-02, -2.932e-02, 8.468e-02, -4.335e-02, -5.082e-02, 8.529e-02, 7.031e-02, 2.080e-01)); + r += V4(2.464e-02, -1.503e-02, -3.365e-02, 7.710e-03); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.303e-01, -2.502e-01, 6.500e-03, 2.601e-01, -3.627e-02, 1.172e-02, -2.737e-02, 1.167e-02, -1.626e-01, -3.004e-02, 2.364e-02, -2.347e-02, -4.186e-02, 1.951e-01, 2.632e-02, 1.146e-01)); + r += mul(s0_1, M4(-1.030e-01, -1.744e-01, -7.392e-03, 1.713e-02, 7.134e-02, -2.497e-04, -1.421e-01, 1.345e-01, -1.077e-01, -2.417e-01, 7.019e-02, -3.896e-01, -7.303e-02, -4.439e-03, -4.887e-02, 9.957e-02)); + r += mul(s0_2, M4(-3.608e-02, 1.177e-02, 1.276e-01, -1.034e-01, 3.357e-02, 1.218e-01, -6.970e-02, 2.349e-02, -8.838e-03, -1.774e-01, -1.413e-01, -9.342e-02, -4.418e-02, -7.144e-02, 4.315e-02, 6.959e-02)); + r += mul(s0_3, M4(-2.867e-01, -2.233e-01, 4.131e-01, 6.114e-01, 7.602e-02, 1.683e-01, -1.640e-01, 5.004e-02, -1.479e-01, -1.302e-01, 1.253e-01, -5.586e-02, 1.083e-02, 2.092e-01, 4.253e-01, 5.892e-02)); + r += mul(s0_4, M4(2.991e-01, -2.867e-01, 3.591e-01, 1.815e-01, 1.301e-01, 2.781e-02, 6.110e-02, -7.526e-02, -2.236e-01, -2.383e-01, -1.365e-01, -1.511e-01, -3.541e-02, 2.105e-01, 1.880e-01, -2.169e-01)); + r += mul(s0_5, M4(1.310e-01, 6.178e-03, 6.162e-02, 8.473e-02, -4.512e-02, 2.741e-01, -2.587e-01, -1.368e-01, 1.448e-01, -2.241e-01, 1.788e-01, 1.691e-01, -7.749e-02, -6.264e-02, 4.554e-02, 6.527e-02)); + r += mul(s0_6, M4(9.763e-02, 4.812e-02, -1.276e-01, -2.642e-01, -2.232e-02, -2.877e-02, -1.684e-01, 1.152e-01, -9.840e-02, 1.224e-01, 3.182e-02, -6.762e-02, 8.287e-02, 3.558e-01, -4.662e-02, -2.938e-01)); + r += mul(s0_7, M4(1.670e-01, -1.360e-02, -9.542e-02, -1.691e-01, -1.819e-01, -9.168e-03, 9.087e-02, -7.566e-02, -9.845e-03, 1.043e-01, -1.144e-01, 2.102e-02, -7.557e-02, -2.903e-02, 2.711e-02, 4.163e-02)); + r += mul(s0_8, M4(3.748e-02, 7.026e-02, -5.355e-02, -2.032e-01, -1.054e-01, 7.008e-02, -3.887e-03, 9.327e-02, -5.416e-02, -4.324e-02, -8.562e-02, -2.246e-01, 4.115e-02, -5.455e-02, -5.699e-02, 9.352e-02)); + r += mul(s1_0, M4(-1.647e-01, -6.626e-02, -1.987e-01, 1.533e-01, -4.810e-02, -1.097e-02, -1.609e-01, -2.368e-01, 1.036e-02, 5.420e-02, 5.011e-02, 1.669e-01, -6.079e-02, -1.636e-01, -1.879e-03, -2.154e-01)); + r += mul(s1_1, M4(-2.303e-01, -1.604e-01, 5.711e-02, 1.264e-01, -3.227e-02, -3.792e-01, -1.013e-01, -1.919e-01, 2.945e-02, 1.079e-01, 2.416e-01, -1.813e-01, -8.157e-02, -2.102e-01, -1.462e-01, 1.594e-01)); + r += mul(s1_2, M4(-5.792e-02, -1.373e-02, 1.286e-01, 4.894e-02, -9.753e-02, -2.114e-01, -1.218e-02, -1.496e-01, 4.394e-02, 1.692e-02, -7.685e-02, -1.331e-01, 4.462e-03, -5.792e-02, 1.508e-01, 1.907e-01)); + r += mul(s1_3, M4(-3.344e-01, 2.221e-01, 3.409e-01, 2.959e-01, 2.695e-01, -5.308e-02, -2.027e-01, 1.681e-01, -7.817e-02, 1.927e-02, 1.200e-01, -1.694e-01, 4.014e-01, 2.818e-01, 1.119e-01, -2.400e-01)); + r += mul(s1_4, M4(-2.868e-02, -7.143e-02, 2.904e-01, 5.075e-02, -1.590e-01, -3.739e-01, 1.705e-01, -1.525e-01, -7.932e-02, -8.959e-02, -1.228e-01, 1.294e-01, 1.669e-01, -2.851e-01, 2.203e-02, -6.589e-01)); + r += mul(s1_5, M4(-1.570e-02, 2.385e-02, -1.108e-02, 9.343e-02, -2.494e-01, -3.927e-01, 4.257e-02, 2.139e-01, 1.506e-01, 1.220e-02, 1.834e-01, -9.939e-02, -1.581e-01, 6.915e-02, -1.147e-01, -2.094e-01)); + r += mul(s1_6, M4(7.238e-02, 2.064e-02, -8.644e-02, 1.141e-01, 9.264e-02, -1.623e-01, -5.621e-02, 2.077e-03, -1.771e-01, 6.011e-02, 1.249e-01, 2.444e-02, 1.023e-02, 2.457e-02, 1.956e-01, 1.182e-01)); + r += mul(s1_7, M4(-1.855e-02, 1.438e-01, -5.501e-02, -2.316e-01, -7.153e-02, -1.020e-02, -1.145e-01, -4.723e-01, 1.101e-01, -2.734e-02, -8.558e-03, 5.762e-01, 8.280e-02, -1.551e-01, 8.434e-02, 2.051e-01)); + r += mul(s1_8, M4(1.539e-02, 1.436e-02, 8.719e-03, -6.027e-02, -1.844e-01, -1.623e-01, 8.752e-02, 7.324e-02, 8.527e-02, 6.751e-03, -8.172e-02, 5.645e-02, -9.335e-02, 9.705e-03, -1.859e-03, 2.190e-01)); + r += mul(s2_0, M4(1.063e-02, -5.882e-02, 7.294e-02, 1.135e-01, 6.834e-03, 2.521e-01, 3.334e-02, -7.202e-02, 1.982e-02, 8.106e-04, 5.125e-02, -3.961e-02, 1.600e-02, -1.903e-01, 8.996e-02, 1.905e-01)); + r += mul(s2_1, M4(1.227e-01, 1.424e-01, 1.137e-01, 3.396e-02, -7.578e-02, 9.724e-02, -2.649e-02, -7.468e-02, 2.632e-02, -2.053e-01, -2.161e-02, -8.906e-02, -5.289e-02, -1.019e-02, -5.841e-02, -2.570e-02)); + r += mul(s2_2, M4(3.359e-02, 5.073e-02, 1.054e-01, -3.593e-03, -7.366e-02, 1.187e-02, 2.007e-03, 2.112e-02, 8.835e-02, 7.359e-02, 2.478e-02, 4.758e-02, -8.954e-02, -2.354e-02, -1.852e-01, 1.836e-01)); + r += mul(s2_3, M4(-2.403e-01, -3.137e-01, 4.761e-02, -1.840e-01, 1.012e-01, 5.392e-02, -1.457e-01, -6.425e-02, -6.436e-03, -2.480e-02, 9.199e-02, -8.296e-02, -2.894e-01, 1.054e-01, 1.282e-01, -7.266e-02)); + r += mul(s2_4, M4(3.247e-01, -1.603e-01, 6.251e-02, -1.106e-01, -4.083e-02, 2.252e-01, -2.616e-01, 1.397e-01, -1.153e-01, -4.065e-02, 5.923e-02, -4.491e-03, 6.616e-02, -2.146e-01, 6.314e-02, 5.401e-01)); + r += mul(s2_5, M4(7.356e-02, 3.269e-03, 1.710e-01, -2.914e-02, -7.138e-02, 2.294e-02, 1.634e-01, 1.120e-01, -7.388e-02, -1.768e-02, -3.813e-02, -3.019e-02, -4.569e-02, -1.257e-01, -1.341e-01, 4.040e-03)); + r += mul(s2_6, M4(-1.479e-01, 2.260e-02, 5.173e-02, 1.889e-01, -1.911e-01, -6.621e-02, 1.666e-01, 1.329e-01, -1.320e-01, 3.452e-02, 1.232e-01, -1.545e-02, 7.646e-02, 4.924e-02, -9.582e-02, -1.941e-01)); + r += mul(s2_7, M4(1.873e-01, -1.340e-02, -1.278e-01, 9.443e-02, 4.326e-02, 1.467e-01, 7.788e-02, 1.345e-01, 8.715e-02, -1.063e-01, -1.265e-01, 1.358e-01, 3.897e-02, -9.523e-03, -6.335e-02, 2.356e-02)); + r += mul(s2_8, M4(-1.881e-02, -5.854e-02, -1.554e-02, -1.388e-01, -5.206e-03, 4.616e-02, -4.462e-02, 8.522e-03, -4.023e-02, -1.133e-01, 8.830e-02, -7.407e-02, -1.715e-02, -3.550e-02, 2.474e-02, 9.114e-02)); + r += mul(s3_0, M4(7.529e-02, 2.230e-01, 1.972e-02, -1.296e-02, 1.146e-01, -6.835e-02, 4.912e-02, 1.397e-02, 1.189e-01, 6.960e-02, -4.406e-02, -7.066e-02, -1.058e-01, 6.757e-02, 1.539e-01, 1.892e-01)); + r += mul(s3_1, M4(-6.716e-02, 2.716e-01, 1.435e-01, -2.363e-01, 5.216e-02, -3.579e-02, 1.132e-01, -8.205e-03, 3.452e-01, 1.436e-01, -4.125e-02, 2.391e-01, -1.578e-01, 1.164e-02, 1.621e-01, -7.879e-02)); + r += mul(s3_2, M4(-1.167e-01, 1.343e-01, -1.091e-01, -1.064e-01, -1.269e-02, -8.151e-02, 7.006e-02, -3.599e-02, 1.215e-01, 2.535e-02, -8.567e-02, 7.653e-02, 1.470e-02, 1.479e-01, -6.068e-02, 8.576e-02)); + r += mul(s3_3, M4(-2.659e-01, 5.315e-02, 8.693e-02, 1.562e-01, 2.764e-01, -4.758e-01, -2.118e-02, -1.585e-01, 1.634e-01, 1.815e-01, 3.742e-02, 1.125e-01, -1.832e-01, 3.244e-01, 1.500e-01, 1.319e-01)); + r += mul(s3_4, M4(-7.748e-02, -1.417e-01, -2.502e-02, -5.102e-01, 5.740e-02, -1.569e-01, -1.604e-01, 1.076e-01, 9.692e-02, 1.335e-01, 1.066e-01, 2.489e-01, -1.600e-01, -2.911e-02, 9.011e-04, -1.260e-01)); + r += mul(s3_5, M4(-5.218e-02, 1.823e-01, 2.527e-02, 5.142e-02, 3.503e-02, -1.782e-01, 1.947e-01, 1.810e-01, 1.059e-01, 3.631e-02, -1.122e-01, -1.281e-01, -6.201e-02, 5.464e-02, -2.633e-01, -2.567e-01)); + r += mul(s3_6, M4(-3.602e-01, 8.443e-02, 3.335e-02, 2.454e-01, 2.898e-02, -5.114e-01, 1.577e-01, 1.251e-01, -6.350e-02, 4.388e-02, 4.373e-02, 3.042e-03, 4.010e-02, 4.499e-02, -8.740e-02, -1.123e-01)); + r += mul(s3_7, M4(-1.979e-01, 3.968e-02, -6.856e-02, 9.250e-02, 5.656e-02, -2.908e-01, 4.040e-01, 1.679e-01, 3.232e-01, 2.436e-01, -2.430e-01, 3.446e-01, -1.092e-01, 8.027e-02, 5.210e-03, -1.632e-01)); + r += mul(s3_8, M4(-8.864e-02, 5.409e-02, -7.792e-02, -6.164e-02, -3.213e-02, 5.911e-02, -3.344e-02, -1.642e-01, -3.467e-02, 1.005e-01, -4.552e-02, -5.140e-02, -3.020e-02, -5.049e-02, 5.460e-02, 2.551e-01)); + r += V4(1.554e-02, -1.502e-02, 3.403e-02, -3.054e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.210e-01, -8.771e-02, 8.433e-02, -1.834e-02, 4.487e-03, 1.229e-01, 9.812e-02, 1.122e-02, 6.535e-02, 2.964e-02, -5.238e-02, -8.784e-03, 1.608e-01, -3.298e-01, -7.243e-02, -2.594e-02)); + r += mul(s0_1, M4(1.251e-01, 2.624e-03, 1.901e-01, -4.362e-02, -2.175e-02, 6.931e-02, 8.964e-03, -1.462e-02, 4.903e-02, 4.953e-03, 4.655e-02, -7.294e-02, 1.417e-01, -8.032e-02, 3.567e-02, -2.951e-02)); + r += mul(s0_2, M4(6.153e-02, -1.150e-01, 1.440e-01, -5.493e-02, 4.033e-02, 1.981e-03, -2.974e-02, 2.702e-02, 5.450e-02, -7.033e-02, -2.222e-02, -7.965e-02, 1.456e-02, 2.782e-02, 2.746e-02, 1.040e-02)); + r += mul(s0_3, M4(2.015e-01, 3.217e-01, 7.861e-02, -8.683e-03, 3.937e-02, -4.713e-03, -3.380e-02, 2.001e-03, 1.475e-02, 4.002e-02, -1.472e-02, -6.966e-02, 1.221e-01, -1.903e-01, -2.130e-02, 2.612e-02)); + r += mul(s0_4, M4(-1.416e-01, 6.991e-02, 5.720e-02, 8.791e-02, 1.773e-01, -3.038e-01, -3.310e-01, -1.970e-01, -1.473e-01, 2.420e-01, 3.486e-02, -5.738e-02, -2.204e-01, -5.488e-02, -2.149e-01, 4.573e-01)); + r += mul(s0_5, M4(-4.676e-02, 6.991e-02, 8.723e-02, -4.131e-02, -3.696e-03, -9.530e-02, -2.305e-01, 7.145e-02, 1.770e-01, 1.801e-01, 7.689e-02, -2.927e-01, 2.088e-01, -6.714e-02, 6.918e-02, -8.667e-02)); + r += mul(s0_6, M4(2.136e-01, -2.888e-03, 6.237e-02, -2.379e-01, -1.859e-01, -4.943e-02, 9.806e-02, 3.477e-02, 1.773e-01, -1.017e-02, -8.464e-02, -4.892e-02, 1.725e-01, -2.379e-02, -1.774e-01, 6.270e-02)); + r += mul(s0_7, M4(9.334e-02, 4.811e-03, 1.178e-01, -8.021e-02, 3.474e-02, -4.447e-02, 2.635e-02, 3.954e-02, 6.511e-02, -1.431e-01, -3.595e-02, -2.550e-01, -1.586e-01, -2.793e-01, -5.439e-02, 1.146e-01)); + r += mul(s0_8, M4(-1.065e-02, -2.644e-02, 2.093e-02, 8.023e-02, 3.019e-02, 1.242e-02, -3.142e-02, -1.020e-01, 1.166e-01, -4.816e-02, -7.099e-02, -4.466e-02, 1.021e-01, 1.844e-02, -5.070e-02, 5.314e-02)); + r += mul(s1_0, M4(-4.590e-02, -2.057e-01, -1.242e-01, -3.319e-02, -2.894e-01, 3.706e-01, 1.807e-01, 4.483e-02, -3.240e-03, 6.281e-02, -1.038e-01, 1.921e-02, 2.339e-01, -3.037e-01, 1.067e-01, -7.859e-02)); + r += mul(s1_1, M4(-1.438e-01, 1.188e-02, 1.465e-01, -5.182e-02, -1.534e-01, 9.311e-02, 2.122e-01, -5.038e-03, -1.265e-01, -5.506e-03, -3.281e-02, -1.296e-02, -2.746e-02, -1.885e-02, 1.191e-01, -6.267e-03)); + r += mul(s1_2, M4(-8.901e-03, -1.093e-01, 8.306e-02, -6.763e-02, -8.580e-02, 1.989e-01, 2.396e-01, 9.082e-02, 6.451e-03, -9.354e-02, -7.606e-02, 4.595e-02, -6.431e-02, 2.274e-01, 2.200e-01, 8.497e-02)); + r += mul(s1_3, M4(-8.861e-02, 1.255e-01, 3.949e-02, -6.585e-02, -4.185e-02, 4.627e-01, -3.145e-02, 2.407e-02, -5.978e-02, -7.530e-02, -9.670e-03, -1.027e-02, -9.589e-02, 9.377e-02, 2.228e-01, 2.195e-02)); + r += mul(s1_4, M4(-3.904e-01, 3.109e-03, -1.475e-01, 3.837e-01, -1.894e-01, 6.479e-02, -6.395e-02, -1.995e-02, -2.757e-01, 8.815e-02, -7.492e-02, 1.081e-01, -3.250e-01, 2.217e-02, -2.679e-01, 2.825e-01)); + r += mul(s1_5, M4(-3.827e-02, -8.505e-02, -4.066e-02, -6.333e-02, -7.558e-02, 2.457e-01, 1.232e-01, -2.482e-01, 3.403e-02, 4.594e-02, -4.212e-03, -1.739e-01, -2.179e-01, -1.252e-01, 2.949e-02, 2.564e-01)); + r += mul(s1_6, M4(6.617e-02, -9.007e-02, 8.929e-02, -1.230e-01, -1.098e-01, -1.735e-02, 6.949e-02, -4.675e-02, -6.309e-02, -7.797e-02, -6.646e-02, -5.128e-03, 2.148e-01, 1.261e-01, -1.065e-01, 1.382e-01)); + r += mul(s1_7, M4(5.742e-02, -1.194e-01, 8.199e-02, -5.755e-02, -6.747e-03, 2.054e-01, 1.251e-01, 3.603e-02, -1.160e-01, -1.202e-01, -1.329e-01, -1.194e-02, -1.654e-01, -2.280e-02, -2.143e-01, 9.542e-02)); + r += mul(s1_8, M4(8.072e-02, -2.271e-02, -2.824e-02, -9.697e-03, -2.783e-01, -2.375e-02, 1.056e-01, -4.308e-02, -9.954e-02, -5.603e-02, 1.340e-02, 6.323e-02, -9.490e-02, 1.534e-01, -9.825e-02, 1.031e-01)); + r += mul(s2_0, M4(-6.657e-02, 1.213e-01, -4.044e-03, -1.773e-02, -1.002e-01, -9.754e-02, -8.548e-02, -7.991e-03, -1.089e-02, 2.650e-02, 7.231e-02, 1.553e-02, 1.862e-02, -4.304e-02, -2.659e-02, -9.413e-02)); + r += mul(s2_1, M4(-1.215e-01, 2.919e-01, -1.007e-01, 5.449e-02, -3.993e-02, -2.603e-01, -9.447e-02, 4.317e-02, 1.249e-02, 6.626e-03, 3.549e-02, 2.056e-02, -1.929e-01, 2.127e-01, 1.262e-01, 1.266e-01)); + r += mul(s2_2, M4(-1.348e-01, 9.263e-02, 4.868e-02, 3.141e-02, -3.260e-02, -5.119e-02, 1.882e-02, -2.687e-02, 1.175e-01, 7.400e-03, 5.936e-02, -3.558e-02, -1.845e-01, 1.852e-01, 1.900e-01, 3.633e-02)); + r += mul(s2_3, M4(-1.500e-01, 5.300e-01, 1.362e-01, 1.648e-02, -2.638e-01, -1.051e-01, -8.124e-02, 6.321e-02, 1.936e-02, -9.783e-03, -2.973e-03, -3.255e-02, -8.121e-03, 1.057e-01, 3.040e-02, 1.927e-03)); + r += mul(s2_4, M4(-3.394e-01, -9.811e-02, 1.051e-01, 7.235e-02, 7.202e-02, -4.507e-02, -3.230e-01, 7.066e-02, -9.951e-02, -3.135e-02, -3.983e-02, -1.663e-02, -1.988e-01, -1.760e-01, 7.408e-02, 7.511e-03)); + r += mul(s2_5, M4(-1.656e-01, -5.325e-02, 8.364e-02, -1.237e-01, -2.843e-02, -1.214e-01, 2.234e-02, -8.794e-02, -5.301e-02, -9.052e-02, -7.931e-03, 1.106e-01, -5.915e-02, 2.891e-01, 1.154e-02, 4.972e-01)); + r += mul(s2_6, M4(-1.144e-01, 1.519e-01, -1.635e-02, -4.138e-02, -2.222e-01, -2.007e-02, -9.045e-02, 7.578e-02, -1.112e-02, 7.288e-02, -6.133e-02, 4.914e-02, -3.029e-02, 6.290e-02, -3.176e-02, -5.098e-02)); + r += mul(s2_7, M4(9.470e-03, -3.087e-02, -4.088e-02, -1.636e-01, -1.954e-01, -3.066e-02, -6.252e-01, 3.643e-01, -8.264e-02, -9.051e-02, -4.227e-02, -1.096e-01, -1.828e-01, 9.511e-02, -1.038e-01, 1.368e-01)); + r += mul(s2_8, M4(-1.110e-01, 1.403e-02, 6.434e-02, 4.785e-02, -9.627e-02, -9.932e-02, -3.143e-02, 1.898e-01, 5.175e-02, 1.199e-01, 1.578e-01, 1.062e-02, -1.138e-01, 8.965e-02, 4.713e-03, 1.549e-01)); + r += mul(s3_0, M4(3.829e-02, -1.337e-01, -1.104e-03, -7.664e-03, 6.715e-02, 1.739e-01, 1.395e-02, 1.015e-02, -6.929e-02, -4.876e-02, -3.160e-02, 4.958e-03, 3.909e-02, -7.175e-02, 6.043e-02, -1.690e-03)); + r += mul(s3_1, M4(-4.348e-02, -3.422e-01, -1.035e-01, 5.749e-02, -2.770e-02, -6.681e-02, 9.643e-02, -3.363e-02, -1.086e-01, 1.263e-01, -3.565e-02, 1.481e-01, 2.105e-01, -1.008e-01, 2.541e-02, -1.773e-03)); + r += mul(s3_2, M4(-1.641e-01, -1.775e-01, -3.140e-02, -2.266e-02, 5.426e-02, 1.135e-01, 1.251e-01, -6.043e-02, -1.022e-01, 4.385e-01, 7.205e-03, 9.644e-02, 9.829e-02, -5.496e-02, -2.885e-02, -8.019e-02)); + r += mul(s3_3, M4(-1.623e-01, -4.012e-02, 4.193e-02, 5.826e-02, 7.017e-02, 3.917e-02, 1.997e-01, -8.713e-02, -2.845e-02, 8.698e-02, -1.084e-02, 3.293e-02, 3.392e-04, 6.589e-02, -7.126e-02, 2.960e-02)); + r += mul(s3_4, M4(-3.154e-01, -1.805e-01, -2.569e-01, 1.802e-02, 5.784e-01, 3.153e-02, 1.085e-01, -8.445e-02, -1.383e-01, 3.813e-01, -4.014e-01, 2.353e-01, 7.655e-02, -5.224e-01, -2.345e-01, -5.558e-02)); + r += mul(s3_5, M4(-7.284e-02, -4.643e-01, -1.979e-01, -2.940e-01, 3.934e-02, 1.468e-01, 2.169e-01, -1.865e-01, 3.773e-02, 1.863e-01, -2.202e-01, 3.661e-01, 2.442e-01, -6.375e-02, -1.982e-01, -2.276e-02)); + r += mul(s3_6, M4(-6.857e-02, -1.182e-01, -5.203e-02, -4.814e-02, 1.110e-01, 5.270e-02, 1.293e-01, -6.715e-02, -1.529e-01, 1.844e-01, -1.089e-01, 6.857e-02, 1.382e-01, 3.184e-02, 2.557e-02, -2.965e-02)); + r += mul(s3_7, M4(1.598e-01, -3.311e-01, -1.057e-01, -4.271e-01, 2.381e-01, -4.726e-02, 1.948e-01, -8.839e-02, -9.368e-02, 1.343e-01, -1.369e-01, 8.824e-02, 7.348e-02, 3.476e-02, -2.949e-02, 6.313e-03)); + r += mul(s3_8, M4(1.153e-01, -1.471e-01, -5.845e-02, -2.276e-01, 1.669e-01, -4.299e-02, 7.663e-02, -3.158e-02, -1.171e-01, 1.190e-01, -5.942e-02, 1.771e-01, 1.996e-03, 2.404e-02, -3.151e-02, -3.041e-02)); + r += V4(-1.519e-02, -1.626e-03, -6.363e-03, 2.015e-02); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 5 +//!DESC conv4 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-4.603e-02, 1.457e-02, 4.018e-02, 1.284e-01, -2.497e-02, 1.041e-02, 8.365e-02, -2.611e-02, 2.223e-02, -8.476e-03, -1.068e-01, -1.092e-02, -2.023e-02, -6.393e-02, 3.540e-02, -3.517e-02)); + r += mul(s0_1, M4(4.999e-02, -3.245e-02, 3.582e-03, -1.362e-01, 5.032e-02, 2.189e-03, -1.182e-01, -5.752e-02, -3.133e-02, -4.474e-02, 6.192e-02, -5.508e-02, 2.607e-02, 3.363e-02, -4.378e-02, -6.372e-02)); + r += mul(s0_2, M4(-1.613e-01, -8.100e-02, -7.770e-03, 1.425e-02, -1.075e-02, 4.560e-02, -1.378e-02, 2.426e-02, 4.124e-02, 6.632e-02, 1.771e-01, 1.226e-01, -8.028e-02, 3.032e-02, 2.187e-02, -7.814e-03)); + r += mul(s0_3, M4(4.616e-02, 1.362e-01, -1.578e-01, -7.459e-02, -1.762e-01, 3.738e-02, -2.270e-02, -1.593e-01, -9.981e-02, 7.580e-02, 1.020e-01, -1.905e-02, -4.813e-02, -3.603e-02, -6.406e-02, 5.516e-02)); + r += mul(s0_4, M4(-3.955e-01, 4.862e-03, 4.408e-01, 3.431e-01, 1.827e-01, 4.348e-02, 1.825e-02, 3.428e-02, 9.285e-02, 2.980e-01, -1.655e-01, 1.925e-02, 1.656e-01, -7.685e-02, -2.193e-01, -6.239e-02)); + r += mul(s0_5, M4(1.214e-01, -1.069e-02, -2.443e-01, 4.160e-03, -2.923e-03, -3.846e-01, 1.242e-01, 1.638e-02, 1.392e-01, 2.218e-01, 8.119e-02, 6.071e-02, -3.691e-02, -2.882e-01, 1.464e-01, 7.349e-02)); + r += mul(s0_6, M4(-1.240e-01, 7.950e-03, 6.145e-02, -8.886e-02, 5.262e-02, 9.563e-02, -2.241e-02, -5.722e-02, -1.371e-02, 4.435e-02, 1.125e-01, 1.029e-01, -1.878e-02, -3.662e-03, 2.522e-02, -6.313e-02)); + r += mul(s0_7, M4(-2.502e-02, 1.062e-01, -2.685e-01, 4.436e-02, 2.446e-02, 7.840e-02, -6.193e-02, -7.416e-05, 2.145e-02, 2.132e-01, -1.304e-01, 1.844e-01, -3.718e-04, -8.233e-02, 1.137e-01, -1.111e-02)); + r += mul(s0_8, M4(6.311e-02, 1.239e-02, 3.474e-02, 6.830e-02, 7.880e-02, 1.460e-01, 1.436e-02, 5.359e-03, 1.001e-01, 1.663e-01, 3.111e-02, 1.435e-02, 1.698e-02, 1.415e-02, 6.806e-02, 2.488e-03)); + r += mul(s1_0, M4(1.176e-02, -4.177e-02, -5.899e-02, -8.043e-02, -5.556e-02, -5.232e-02, 1.104e-01, -1.912e-02, -1.999e-02, 3.132e-02, 3.493e-02, -4.262e-02, 8.312e-02, 9.728e-02, -4.373e-02, 9.260e-02)); + r += mul(s1_1, M4(-1.069e-01, 4.645e-02, 2.282e-01, -5.721e-02, -3.651e-02, -3.742e-03, -8.886e-03, -5.040e-02, -5.751e-02, 4.869e-02, 9.988e-03, 1.313e-02, -3.140e-02, 1.363e-01, -9.975e-02, -3.207e-02)); + r += mul(s1_2, M4(-1.288e-01, -2.886e-02, 9.747e-02, 2.324e-02, 1.364e-02, -8.983e-02, 5.610e-02, 2.811e-02, -2.433e-02, 2.233e-01, -1.660e-02, 7.249e-02, -4.680e-02, 2.193e-01, -2.173e-01, -8.384e-02)); + r += mul(s1_3, M4(-1.302e-03, -2.432e-02, 1.779e-01, 8.593e-02, -1.982e-01, 5.378e-02, -5.749e-03, -3.947e-02, -2.771e-02, 6.767e-02, -1.146e-01, -3.119e-02, 6.422e-02, 9.432e-02, -3.233e-01, 9.318e-02)); + r += mul(s1_4, M4(-2.855e-01, 1.026e-01, 2.524e-01, 2.993e-01, 2.177e-01, -1.882e-01, -5.235e-02, -6.145e-03, 2.709e-02, 2.778e-01, 2.826e-01, -2.878e-01, 3.057e-01, 1.621e-01, -2.407e-01, -3.343e-01)); + r += mul(s1_5, M4(-3.628e-02, 5.554e-03, 1.063e-01, 4.774e-02, 3.746e-02, -1.545e-01, -4.221e-02, 6.273e-02, 7.253e-02, 5.197e-01, -5.520e-02, -6.371e-02, 1.401e-01, 2.903e-02, -6.322e-02, -1.457e-01)); + r += mul(s1_6, M4(-3.768e-02, 8.166e-03, 3.330e-01, 9.689e-02, -3.890e-02, 1.159e-01, 6.738e-02, -8.253e-02, -7.114e-03, -1.422e-02, 1.549e-01, 6.271e-02, -1.100e-02, 2.598e-02, -2.596e-01, -8.673e-02)); + r += mul(s1_7, M4(-1.372e-01, 6.481e-02, -1.001e-01, 7.663e-02, 5.323e-02, 1.137e-01, 9.721e-02, 3.063e-01, 7.966e-02, 1.892e-01, -2.452e-01, 8.129e-02, 6.147e-02, -4.174e-02, -8.889e-02, -2.223e-01)); + r += mul(s1_8, M4(-7.513e-02, 1.446e-02, 1.697e-01, 7.281e-02, 2.272e-02, 1.891e-01, -2.171e-01, -8.146e-02, 4.419e-02, 9.160e-02, 3.967e-02, 3.398e-02, 1.016e-02, 1.109e-02, -1.280e-02, 6.043e-03)); + r += mul(s2_0, M4(-1.125e-02, -4.668e-02, 8.017e-03, -1.144e-01, -1.167e-01, 1.599e-02, 1.427e-01, -8.549e-02, -8.563e-02, -7.150e-03, 1.269e-01, -1.134e-02, -1.111e-02, -9.739e-02, 5.041e-02, -7.451e-02)); + r += mul(s2_1, M4(-4.166e-02, -2.516e-02, 9.528e-02, 5.857e-02, 9.985e-02, -1.021e-02, 1.122e-02, 1.325e-02, -3.881e-02, 5.275e-02, -1.425e-01, 2.573e-01, 3.958e-02, -1.059e-01, 1.155e-01, 4.783e-02)); + r += mul(s2_2, M4(-9.030e-03, -9.384e-03, 1.231e-01, -2.887e-02, 6.405e-02, -2.676e-02, -7.671e-03, 1.489e-02, -3.528e-02, -2.811e-01, 1.087e-01, 2.132e-02, -4.533e-02, 1.584e-02, 7.144e-02, 3.402e-02)); + r += mul(s2_3, M4(1.897e-01, 6.806e-03, -2.573e-01, 1.147e-02, -4.295e-02, 5.574e-02, -1.997e-01, -1.539e-01, 2.371e-01, 8.366e-02, -5.064e-02, 7.570e-02, 2.415e-01, -9.769e-02, 1.666e-02, 1.138e-01)); + r += mul(s2_4, M4(4.319e-01, -1.018e-02, -2.282e-01, -6.357e-02, -2.734e-02, -2.508e-01, 7.848e-02, 6.481e-02, 1.501e-01, 1.706e-01, -7.193e-02, -4.792e-01, -7.641e-02, -7.082e-02, -1.236e-01, -6.017e-02)); + r += mul(s2_5, M4(-7.273e-02, -2.068e-01, -9.371e-02, -2.801e-02, 1.659e-01, 5.531e-02, -1.428e-02, 2.927e-02, -1.047e-01, -2.108e-01, 1.224e-01, 1.807e-01, -5.600e-02, -1.184e-02, -8.557e-02, 2.045e-02)); + r += mul(s2_6, M4(6.962e-02, -9.497e-02, -1.894e-02, 7.397e-02, 4.261e-02, 1.095e-02, 1.084e-01, 8.961e-02, 1.426e-01, 5.959e-02, -2.532e-02, -7.401e-02, 9.025e-02, -2.152e-02, 5.712e-02, -7.736e-02)); + r += mul(s2_7, M4(1.248e-01, -1.912e-01, 2.197e-01, -3.152e-02, 1.601e-02, -5.634e-02, 1.397e-01, 3.612e-01, 5.845e-02, 1.990e-01, 1.667e-01, 1.460e-01, -1.803e-02, -1.255e-01, 2.606e-02, -3.095e-02)); + r += mul(s2_8, M4(-7.043e-02, -1.660e-01, -1.409e-02, 2.977e-02, 9.866e-02, 6.648e-02, -8.271e-02, -1.705e-02, -5.470e-02, -4.730e-02, -9.331e-02, -9.813e-02, -2.882e-02, -7.116e-02, 3.656e-02, 7.024e-02)); + r += mul(s3_0, M4(3.335e-02, -1.035e-01, -4.027e-02, -9.856e-02, 1.123e-02, -4.545e-02, -1.428e-02, -4.436e-02, -1.125e-02, 5.338e-02, 1.272e-01, 7.738e-02, 2.460e-01, 6.170e-02, -2.828e-01, -9.801e-02)); + r += mul(s3_1, M4(-1.291e-02, -9.460e-02, 1.647e-01, 3.956e-02, 6.269e-02, -1.042e-01, 1.587e-01, 1.304e-01, 8.502e-03, 3.894e-02, 4.633e-02, 1.400e-01, -1.945e-02, 3.277e-01, 1.464e-01, 1.050e-01)); + r += mul(s3_2, M4(-3.341e-02, -1.665e-02, 7.029e-02, 4.459e-02, 5.240e-02, -1.783e-01, 1.174e-01, 7.980e-02, -6.808e-04, 3.650e-02, -9.933e-02, -2.446e-02, -7.836e-02, 1.854e-01, 5.613e-02, -1.825e-02)); + r += mul(s3_3, M4(-2.069e-01, -4.267e-02, 2.725e-01, 5.264e-03, -4.267e-02, -9.650e-03, -3.641e-02, -5.011e-02, 1.346e-01, 1.087e-01, -1.854e-01, 1.758e-01, 4.033e-01, -1.891e-01, 1.411e-01, -1.705e-01)); + r += mul(s3_4, M4(3.056e-01, -5.330e-01, 5.010e-02, 2.460e-01, -2.118e-02, 1.479e-01, 1.269e-01, 2.600e-01, 9.058e-02, -2.206e-01, 2.627e-02, -2.579e-01, -5.702e-03, -2.285e-01, 2.033e-01, -1.174e-01)); + r += mul(s3_5, M4(6.399e-02, -1.645e-01, -1.331e-01, 5.684e-02, 4.928e-02, -2.339e-01, 2.013e-02, -4.284e-02, -2.017e-01, -4.847e-02, 2.648e-02, -1.329e-02, -1.203e-01, -1.062e-01, -7.317e-02, 7.343e-02)); + r += mul(s3_6, M4(1.753e-02, -9.153e-03, 9.648e-02, 1.480e-01, 2.131e-02, 6.322e-03, 1.606e-01, 2.550e-01, 1.438e-02, 9.530e-02, -1.749e-02, -3.708e-03, 1.606e-01, 6.072e-02, 1.209e-01, -1.087e-01)); + r += mul(s3_7, M4(4.474e-02, -1.546e-01, -1.312e-02, 4.897e-02, 9.376e-02, 8.851e-02, 8.660e-02, -7.482e-03, 1.519e-01, 8.864e-02, 5.592e-02, -1.059e-02, -1.083e-01, -1.601e-01, 2.599e-01, 6.959e-02)); + r += mul(s3_8, M4(-1.033e-01, -1.514e-01, 3.964e-02, 5.707e-02, -2.455e-02, -1.163e-01, 2.173e-01, 1.447e-01, -1.753e-02, -3.387e-02, 2.775e-02, -3.772e-02, -1.142e-01, 5.344e-02, -1.796e-01, -1.290e-01)); + r += V4(7.981e-03, 1.017e-02, -9.502e-03, -2.618e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(4.499e-03, -1.196e-02, -1.650e-02, 6.240e-02, -4.222e-02, -1.463e-02, -6.762e-02, -3.560e-02, 1.501e-01, -1.203e-01, 1.276e-01, 3.161e-02, 8.032e-02, -5.788e-02, 3.964e-02, -1.081e-01)); + r += mul(s0_1, M4(7.720e-02, -1.253e-01, 1.192e-01, 4.363e-02, 2.300e-03, 3.121e-02, -9.608e-02, -1.262e-01, -1.082e-02, -5.831e-02, 1.486e-01, 1.558e-01, 2.582e-02, -7.162e-02, -1.317e-01, -1.325e-01)); + r += mul(s0_2, M4(-3.036e-02, -1.122e-01, -2.954e-02, 4.476e-02, 5.942e-02, 1.889e-02, 2.699e-02, 3.370e-02, 7.309e-02, 7.204e-03, 8.554e-02, -3.684e-02, 2.101e-03, 5.032e-02, 4.602e-02, -4.234e-02)); + r += mul(s0_3, M4(1.287e-01, -6.731e-02, -1.535e-01, 6.860e-02, 1.347e-01, -6.583e-02, -2.647e-02, -7.921e-03, 1.514e-01, -4.460e-02, -5.672e-02, -1.412e-01, 2.153e-01, -8.017e-03, 3.155e-03, 9.885e-02)); + r += mul(s0_4, M4(1.192e-01, -1.237e-01, -1.446e-01, -2.197e-01, 5.265e-02, -1.721e-02, 1.762e-01, -1.570e-01, -2.728e-01, -9.284e-02, -1.318e-01, -2.472e-01, 2.642e-01, 3.686e-02, 2.810e-01, -7.129e-02)); + r += mul(s0_5, M4(1.372e-01, -3.096e-01, 1.258e-01, 1.579e-01, -1.576e-01, 3.233e-01, -7.987e-02, -1.414e-01, -6.258e-02, 8.063e-02, -9.703e-02, 1.087e-01, 2.968e-02, 3.011e-02, 3.135e-01, 5.113e-02)); + r += mul(s0_6, M4(-4.781e-02, -1.868e-02, -1.984e-02, 3.647e-02, -7.447e-02, -1.965e-02, -7.801e-02, -2.734e-02, 5.353e-02, 7.222e-02, -2.888e-02, -5.486e-02, 5.216e-02, -2.872e-02, -1.073e-02, 3.894e-02)); + r += mul(s0_7, M4(-8.822e-02, 2.075e-01, -9.096e-02, -5.276e-03, 3.016e-01, 9.170e-02, -3.208e-02, 1.088e-01, -1.142e-01, 3.082e-01, -1.133e-01, -2.093e-01, 2.744e-01, 2.188e-02, 2.380e-02, 9.535e-02)); + r += mul(s0_8, M4(-1.286e-02, -8.243e-02, 2.199e-02, 2.645e-01, 6.284e-02, 1.616e-02, -5.037e-02, 8.813e-02, -2.444e-02, -8.535e-02, 3.516e-02, 3.767e-02, -8.980e-02, 1.333e-01, 2.389e-02, 4.367e-02)); + r += mul(s1_0, M4(-1.972e-02, -4.719e-02, 4.859e-02, -8.123e-03, 7.200e-02, -2.635e-02, -1.962e-02, -6.082e-02, 7.614e-02, 6.818e-03, 6.225e-02, 7.733e-02, -2.530e-02, -2.384e-02, -1.177e-01, -4.357e-02)); + r += mul(s1_1, M4(1.055e-01, -8.694e-02, 4.911e-03, -5.235e-02, 2.067e-02, -7.520e-02, -1.095e-01, -2.507e-01, 4.948e-02, -5.254e-02, -7.734e-02, 3.959e-01, -5.456e-02, -8.501e-02, -2.084e-01, -7.064e-02)); + r += mul(s1_2, M4(-4.418e-02, -2.485e-03, -9.979e-02, -1.702e-02, -1.339e-01, 2.062e-02, -6.812e-02, -3.193e-01, -8.136e-02, 8.432e-02, -9.392e-02, -7.457e-02, -4.301e-02, -9.250e-02, -3.137e-01, -1.783e-01)); + r += mul(s1_3, M4(7.296e-02, 6.799e-03, -8.333e-04, 4.303e-02, 2.939e-01, -1.032e-01, 2.746e-02, 1.708e-02, 1.057e-01, -7.090e-02, -1.024e-01, 1.230e-03, 3.034e-02, -7.408e-02, -1.711e-01, 2.089e-01)); + r += mul(s1_4, M4(2.376e-01, -4.740e-01, -2.568e-01, 2.305e-01, 1.795e-01, 3.196e-03, 2.923e-01, -2.949e-01, -2.451e-01, -1.050e-01, 6.901e-03, -1.139e-01, 1.644e-01, -1.711e-03, -2.378e-02, 1.237e-01)); + r += mul(s1_5, M4(1.772e-01, -1.673e-01, 2.690e-01, -1.356e-03, 6.326e-03, 1.908e-01, 6.859e-02, -1.588e-02, 5.134e-02, 1.322e-01, -1.345e-01, 2.626e-01, 6.353e-02, -6.034e-03, -4.632e-03, 6.808e-02)); + r += mul(s1_6, M4(-1.538e-01, -2.396e-02, 9.343e-02, -9.545e-02, 2.179e-01, -6.521e-02, 3.206e-02, -1.402e-01, 8.921e-02, -1.290e-02, -8.212e-03, -3.336e-03, -1.101e-01, -7.274e-02, -1.222e-01, -3.584e-03)); + r += mul(s1_7, M4(-2.568e-01, 3.896e-01, -1.382e-01, -1.016e-01, 6.448e-01, 2.466e-02, -1.558e-01, -1.596e-01, 9.313e-03, 2.957e-01, -1.509e-01, -1.101e-03, 1.536e-01, -6.831e-02, -2.091e-02, -5.669e-03)); + r += mul(s1_8, M4(2.950e-02, -3.207e-02, 3.291e-02, 1.807e-02, 2.214e-01, -6.876e-02, -1.293e-01, 2.144e-01, -8.780e-02, -1.075e-01, 1.157e-02, 1.124e-01, -1.431e-01, 7.471e-02, 2.634e-02, -1.739e-01)); + r += mul(s2_0, M4(-3.641e-02, -1.460e-01, 3.591e-02, 7.587e-02, -1.678e-02, 3.743e-02, -9.497e-02, -4.368e-02, -7.434e-02, 6.365e-02, -5.406e-02, -1.009e-01, -4.776e-02, -1.209e-01, 1.313e-01, -1.629e-02)); + r += mul(s2_1, M4(-2.724e-02, -4.254e-02, -1.241e-02, -1.851e-01, -1.199e-01, 3.992e-02, -1.018e-01, -2.485e-01, 2.846e-02, 3.044e-02, -2.142e-01, -1.481e-01, 5.311e-02, -6.084e-02, 2.746e-02, 2.253e-01)); + r += mul(s2_2, M4(2.912e-02, -8.517e-02, 4.616e-02, 2.585e-02, -3.418e-02, 7.831e-02, -6.313e-02, -6.349e-02, -2.185e-01, 4.734e-03, -1.576e-01, -2.940e-01, 3.779e-02, -4.732e-02, -2.341e-02, 9.825e-02)); + r += mul(s2_3, M4(-1.595e-01, 1.192e-01, -1.748e-02, 1.593e-01, 2.061e-01, -1.857e-01, 1.617e-01, 1.555e-02, -1.745e-02, 4.421e-02, -4.839e-02, -1.656e-01, -1.701e-01, 2.096e-01, 2.222e-01, -1.145e-01)); + r += mul(s2_4, M4(-7.095e-02, 2.911e-01, 1.119e-01, 6.460e-01, -1.009e-01, 2.390e-01, 4.439e-01, -2.016e-01, 3.076e-01, 1.578e-01, -8.802e-02, 2.667e-01, -8.825e-02, -4.537e-01, -2.085e-01, -3.020e-01)); + r += mul(s2_5, M4(5.685e-02, -2.135e-01, 3.319e-02, -1.256e-03, 9.093e-03, 2.351e-01, 8.694e-02, 1.826e-02, 1.843e-02, 1.012e-01, 9.358e-02, -1.900e-01, -5.482e-02, -1.073e-01, 2.982e-02, 8.203e-02)); + r += mul(s2_6, M4(-2.368e-01, -9.192e-03, 2.210e-02, 1.076e-01, -9.849e-02, 3.512e-02, 1.548e-02, -3.966e-02, 3.754e-01, 1.172e-01, -6.346e-02, 1.927e-01, 1.465e-01, -1.343e-01, 1.473e-01, 1.476e-01)); + r += mul(s2_7, M4(-1.001e-01, -2.172e-01, 3.278e-01, 3.514e-01, -2.047e-01, 2.959e-01, -2.066e-01, -2.066e-01, 2.820e-01, 2.278e-01, 3.879e-02, -9.134e-02, 1.094e-01, 3.713e-02, 7.681e-02, 1.422e-01)); + r += mul(s2_8, M4(1.533e-01, -9.170e-02, -1.468e-02, -1.166e-01, -1.566e-02, 2.613e-02, -6.614e-02, 1.829e-01, 1.087e-02, 5.517e-02, 6.255e-02, 1.459e-01, 4.011e-02, -6.970e-02, 5.322e-02, -1.088e-02)); + r += mul(s3_0, M4(-1.905e-02, -1.010e-01, 1.092e-01, 4.898e-02, -2.340e-02, 3.379e-02, 9.514e-02, 5.005e-03, 3.866e-03, -3.382e-02, -3.593e-02, 2.363e-02, -1.810e-01, 1.544e-01, 6.000e-02, 4.837e-02)); + r += mul(s3_1, M4(-1.749e-02, -1.130e-01, 6.763e-02, -4.278e-02, -3.698e-02, 7.479e-02, 1.882e-01, -1.991e-02, 9.916e-02, -3.566e-02, -2.936e-02, -6.237e-03, 9.864e-02, -1.650e-02, 8.939e-02, 6.387e-01)); + r += mul(s3_2, M4(1.503e-03, -3.209e-02, 1.688e-02, 9.354e-02, -6.807e-02, 1.355e-01, 1.820e-01, -4.962e-02, 7.103e-02, -2.163e-02, -5.474e-03, 8.376e-02, -4.150e-03, -1.194e-01, -3.023e-02, 3.833e-02)); + r += mul(s3_3, M4(5.056e-02, 6.071e-02, 2.378e-01, -1.013e-01, -2.159e-02, 1.218e-01, 2.809e-02, -6.215e-02, -1.733e-01, -3.470e-04, -7.359e-02, 7.317e-03, -5.394e-01, 5.426e-01, 3.015e-01, -3.820e-01)); + r += mul(s3_4, M4(-6.020e-02, -2.055e-01, 3.680e-01, -1.910e-01, -8.192e-02, 2.159e-01, -3.798e-02, -2.656e-01, 5.141e-02, 2.568e-01, -3.443e-01, 2.042e-01, -3.336e-01, -6.124e-01, 2.211e-01, -2.492e-01)); + r += mul(s3_5, M4(-3.453e-02, -4.960e-02, 7.981e-02, 2.635e-01, -7.613e-02, 2.725e-01, 1.583e-02, -1.533e-01, -6.782e-02, -1.673e-01, -1.790e-01, -1.456e-01, 1.161e-01, -2.882e-01, -4.865e-02, -8.904e-02)); + r += mul(s3_6, M4(-6.294e-02, 3.362e-02, 5.988e-02, 2.554e-02, -2.094e-01, 6.535e-02, 6.381e-02, -7.398e-02, 1.185e-01, -8.304e-02, -2.793e-02, -1.274e-01, 6.660e-02, 1.347e-02, 3.585e-02, -4.377e-02)); + r += mul(s3_7, M4(5.355e-02, -1.068e-01, -2.474e-02, 2.486e-01, 2.395e-01, -1.376e-01, 5.042e-02, 1.474e-01, 1.529e-01, 8.491e-02, 7.636e-02, -8.829e-03, 1.284e-01, -1.563e-01, 2.101e-01, -1.310e-01)); + r += mul(s3_8, M4(4.926e-02, -1.433e-01, -2.143e-02, -6.901e-02, -9.212e-02, 2.929e-01, 2.215e-01, 8.423e-02, 5.067e-02, 5.809e-02, 1.200e-01, 8.764e-02, 9.560e-02, -2.676e-01, -2.228e-01, -9.947e-02)); + r += V4(-1.969e-02, 1.581e-02, -1.910e-02, -1.871e-02); + return r; +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 6 +//!DESC conv5 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-5.629e-02, -1.499e-02, 2.729e-02, 5.240e-02, -2.085e-02, 1.943e-02, 2.721e-01, 1.895e-02, 1.703e-05, -2.185e-03, 5.241e-02, 1.372e-01, 9.478e-02, -1.754e-01, -5.204e-02, -3.875e-02)); + r += mul(s0_1, M4(-1.752e-01, 8.544e-03, 2.009e-01, 1.081e-01, -6.113e-02, -1.264e-02, -1.099e-01, 7.721e-02, -8.883e-02, 6.624e-02, 1.543e-01, -1.470e-01, 7.084e-02, 2.041e-02, -1.353e-01, 5.514e-02)); + r += mul(s0_2, M4(-8.100e-02, 7.181e-02, 1.823e-02, 1.137e-01, -7.627e-03, 1.157e-02, 6.381e-03, -2.029e-02, -1.793e-01, 2.163e-01, 1.071e-01, 1.975e-01, 4.782e-03, -1.038e-01, -2.722e-02, -2.863e-02)); + r += mul(s0_3, M4(-1.987e-01, 2.234e-02, 1.629e-01, 1.411e-01, 1.686e-01, -1.169e-01, -1.153e-01, 5.303e-02, -1.401e-01, 7.446e-02, 1.651e-02, 1.049e-01, 6.766e-02, -4.508e-03, -4.739e-02, -1.971e-02)); + r += mul(s0_4, M4(-6.077e-01, -1.132e-01, 5.496e-02, -1.474e-01, -4.083e-02, -2.358e-02, 5.782e-02, -8.903e-03, -2.187e-01, -5.799e-01, -2.327e-01, 3.352e-01, 4.096e-01, 1.543e-01, 2.634e-01, 1.353e-02)); + r += mul(s0_5, M4(-1.090e-01, -1.697e-02, -6.169e-02, 8.472e-02, 3.849e-02, -4.439e-02, -2.872e-02, 2.331e-02, -2.056e-01, 2.056e-01, -6.852e-02, 2.337e-03, -6.904e-02, 9.751e-02, 2.718e-02, 2.761e-02)); + r += mul(s0_6, M4(-4.874e-02, 2.765e-02, -2.766e-02, 9.790e-02, -3.487e-02, 9.293e-02, 6.253e-02, 2.296e-02, -6.212e-02, 1.142e-01, 5.876e-02, 1.080e-01, -5.299e-02, -8.509e-04, -9.618e-02, -6.980e-03)); + r += mul(s0_7, M4(-2.509e-02, -1.062e-01, 4.788e-02, 9.037e-02, 6.434e-03, -4.993e-02, 1.378e-02, -2.191e-02, -2.489e-02, 1.125e-01, -3.071e-02, 1.494e-01, 1.074e-02, -1.213e-02, -6.753e-02, -1.152e-01)); + r += mul(s0_8, M4(-6.282e-02, 7.023e-03, -1.272e-02, 4.021e-02, -4.950e-02, 7.900e-02, -3.829e-03, 5.379e-02, -7.884e-02, 4.758e-02, -2.827e-02, 2.264e-02, 3.407e-02, -1.753e-01, 5.579e-03, 3.211e-02)); + r += mul(s1_0, M4(2.093e-02, -4.623e-02, -3.265e-02, 1.873e-02, 3.220e-02, -1.627e-01, 5.046e-02, -4.882e-02, -6.228e-02, 2.889e-02, 5.429e-03, 2.945e-02, -4.053e-02, -6.110e-02, -4.289e-02, 1.538e-02)); + r += mul(s1_1, M4(1.844e-02, -5.925e-02, -1.384e-01, -1.712e-01, 2.836e-02, -7.524e-02, -1.785e-01, -6.518e-02, -3.521e-02, -1.454e-02, 1.426e-01, -4.671e-02, 4.228e-02, 4.236e-02, -3.918e-02, 3.684e-02)); + r += mul(s1_2, M4(-5.506e-02, -1.322e-02, -8.521e-02, 2.606e-02, 2.439e-02, -3.190e-02, 8.334e-03, -1.499e-03, -5.366e-02, -1.778e-02, 5.984e-02, -5.725e-02, 2.785e-02, -3.899e-02, -4.964e-02, 5.202e-02)); + r += mul(s1_3, M4(-2.135e-02, 1.172e-02, -3.127e-03, 4.214e-02, -1.414e-01, -7.592e-02, -9.769e-01, 3.319e-01, -3.444e-02, 3.007e-02, -3.268e-02, 6.656e-03, 4.946e-02, 7.209e-02, 5.624e-03, 4.819e-02)); + r += mul(s1_4, M4(2.301e-02, -8.416e-03, -1.886e-01, -4.043e-01, 5.243e-02, 8.550e-02, -1.203e-01, -2.122e-01, -8.371e-03, -2.264e-01, -1.174e-01, -1.152e-01, -2.725e-02, 1.957e-01, 5.036e-02, 1.214e-01)); + r += mul(s1_5, M4(9.059e-02, -1.391e-01, -2.427e-01, -1.716e-03, 1.048e-01, -9.487e-02, -8.739e-02, 1.464e-02, -5.532e-02, -8.633e-02, -1.491e-01, 1.422e-01, -7.080e-02, 2.580e-01, -1.783e-02, 1.028e-01)); + r += mul(s1_6, M4(5.407e-02, -4.321e-02, -6.909e-02, -5.464e-04, 2.971e-02, 6.365e-02, -3.780e-02, 3.135e-04, -1.352e-02, 5.407e-02, -2.180e-02, 1.456e-02, -2.334e-02, -6.107e-03, 3.552e-02, 1.831e-02)); + r += mul(s1_7, M4(9.306e-02, -3.273e-01, 6.469e-02, -6.293e-02, 7.617e-02, -5.466e-02, 5.817e-04, -1.038e-01, -8.348e-03, -4.016e-02, 5.163e-02, 5.153e-02, -4.913e-02, -3.528e-02, -1.018e-02, -4.440e-02)); + r += mul(s1_8, M4(-1.901e-02, -1.003e-02, 4.926e-03, -5.469e-02, 4.712e-02, 6.210e-02, 6.828e-03, 8.601e-03, -4.973e-02, 1.034e-01, 8.064e-02, -4.831e-02, -8.915e-03, 1.338e-01, -2.552e-02, 1.284e-01)); + r += mul(s2_0, M4(1.099e-01, -3.907e-02, -5.627e-02, 3.726e-02, -4.089e-02, 3.905e-02, -7.333e-02, -5.201e-02, 1.199e-01, -3.077e-02, 7.080e-02, -6.518e-02, 5.478e-02, 3.929e-02, 3.922e-02, 5.502e-02)); + r += mul(s2_1, M4(-1.650e-01, 1.957e-02, -1.799e-01, 2.149e-01, 8.199e-02, -1.061e-01, -4.542e-02, -1.756e-01, -3.644e-02, -2.350e-02, -1.891e-01, -1.270e-01, 7.510e-02, 6.960e-02, 1.670e-01, -3.395e-01)); + r += mul(s2_2, M4(1.296e-01, -1.354e-02, -1.690e-01, 1.539e-01, 4.985e-02, -5.993e-02, 1.355e-03, 1.321e-02, 5.868e-02, -6.005e-03, 7.050e-03, -4.139e-02, 2.584e-02, -5.562e-02, 1.896e-03, -6.420e-02)); + r += mul(s2_3, M4(-3.750e-03, 9.356e-02, 8.346e-02, -9.586e-02, 5.356e-02, 1.038e-01, 6.120e-02, 7.898e-02, 2.210e-01, -5.833e-02, -2.771e-01, -6.909e-02, 1.841e-01, -2.505e-02, -1.723e-01, 6.775e-02)); + r += mul(s2_4, M4(-1.675e-01, 2.077e-01, 3.639e-01, 1.616e-01, -1.849e-02, -6.565e-02, 2.531e-01, -3.022e-01, -2.568e-01, 1.240e-01, -3.299e-01, 2.133e-01, 2.786e-01, -2.316e-01, -4.497e-02, -2.981e-01)); + r += mul(s2_5, M4(-1.918e-01, 1.659e-01, 1.114e-01, 1.974e-01, 3.208e-02, -2.558e-02, -1.265e-02, -2.035e-02, 2.564e-01, -3.624e-02, -6.179e-02, -1.233e-01, -3.454e-02, -7.588e-02, 2.185e-02, -5.032e-02)); + r += mul(s2_6, M4(1.514e-02, -9.920e-03, 3.117e-02, -5.511e-02, 4.066e-02, 3.938e-02, -5.380e-03, -5.667e-02, -1.978e-02, -4.687e-02, 1.253e-01, -1.195e-02, -7.667e-03, 5.952e-02, 2.288e-02, 3.814e-02)); + r += mul(s2_7, M4(-3.610e-02, -3.112e-02, 3.272e-02, 7.061e-02, 3.913e-02, -1.046e-01, -1.197e-03, 2.646e-02, 9.618e-03, 1.146e-01, 7.210e-02, -1.143e-01, -1.136e-03, 6.210e-02, -1.122e-02, -2.587e-02)); + r += mul(s2_8, M4(-7.697e-02, -9.952e-04, -7.539e-03, -1.513e-02, -9.029e-03, 1.408e-02, 2.055e-06, -1.059e-02, 7.768e-02, 3.124e-02, 3.368e-02, -1.155e-02, -1.546e-02, -2.931e-03, 2.106e-02, -3.257e-02)); + r += mul(s3_0, M4(7.144e-03, -1.322e-02, -1.944e-01, -1.612e-02, -4.237e-02, 1.456e-01, -5.996e-02, -2.145e-01, 4.330e-02, -3.849e-02, 8.443e-02, -4.098e-03, 1.960e-03, 5.152e-03, 9.973e-02, 8.078e-02)); + r += mul(s3_1, M4(1.113e-01, -3.005e-02, 1.199e-01, -4.029e-02, 9.934e-02, -1.540e-01, -3.818e-01, 3.199e-02, -3.314e-02, 3.015e-02, -8.632e-02, 7.195e-02, -8.405e-02, 1.511e-02, -1.364e-01, -4.779e-02)); + r += mul(s3_2, M4(-7.198e-02, -9.730e-03, -7.659e-02, -9.198e-02, -1.291e-02, -2.118e-02, -7.500e-02, 3.164e-03, -3.136e-02, 2.908e-02, 6.306e-02, -3.065e-03, 1.294e-04, -2.250e-02, 6.295e-02, 1.235e-02)); + r += mul(s3_3, M4(-3.799e-03, 3.301e-02, 1.588e-01, 2.691e-02, 1.612e-01, 2.476e-01, -6.205e-02, 2.352e-01, 9.168e-02, 7.156e-03, 2.315e-02, -2.909e-02, 3.882e-02, 3.297e-02, -1.062e-01, 9.778e-02)); + r += mul(s3_4, M4(-1.450e-01, 8.155e-02, 6.351e-02, -7.599e-02, -9.454e-04, -4.971e-01, 1.665e-01, -5.560e-01, -1.282e-01, 1.987e-01, 7.292e-02, 6.917e-02, 2.889e-02, 2.158e-01, 1.941e-01, -1.472e-01)); + r += mul(s3_5, M4(-6.303e-03, 4.578e-02, 5.086e-02, -1.684e-02, -5.308e-02, 2.573e-01, 6.806e-02, 2.070e-02, 1.006e-01, -1.195e-01, 9.802e-03, -1.123e-01, 1.097e-02, 6.926e-03, 7.695e-02, 1.168e-01)); + r += mul(s3_6, M4(3.211e-02, -1.982e-02, -2.188e-02, -4.794e-02, 2.477e-02, 3.063e-02, 7.311e-02, -9.376e-02, 6.516e-02, -1.927e-02, 4.276e-02, 7.122e-04, -4.200e-02, 2.947e-02, 8.275e-02, 3.763e-02)); + r += mul(s3_7, M4(-1.918e-02, -3.916e-03, -6.341e-03, 5.559e-02, 1.027e-01, -7.839e-02, -3.427e-02, 9.477e-03, 1.564e-03, -2.435e-02, -3.752e-02, -4.801e-02, 4.764e-02, -1.354e-01, 1.214e-02, -5.935e-02)); + r += mul(s3_8, M4(2.244e-02, -2.103e-02, 2.209e-02, -3.679e-02, -1.259e-02, 8.911e-02, -2.068e-02, -8.888e-03, 3.137e-02, -4.821e-02, -2.994e-03, -7.204e-03, -6.453e-02, -7.934e-02, 7.787e-03, -5.808e-03)); + r += V4(1.622e-02, 8.633e-03, 2.282e-02, -1.091e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.176e-01, 4.590e-02, -1.097e-03, 9.562e-03, -9.896e-02, -4.315e-02, 5.445e-03, 5.506e-02, -1.072e-01, 2.441e-02, 1.076e-02, -7.966e-02, -1.692e-01, 3.632e-02, 4.413e-03, -4.337e-02)); + r += mul(s0_1, M4(-1.753e-01, 1.540e-01, 9.123e-02, 1.894e-01, 3.174e-02, 1.360e-01, -9.307e-02, 1.550e-01, -3.076e-01, -7.511e-02, -9.321e-03, 2.660e-01, 1.373e-01, 2.495e-02, 1.941e-01, 2.048e-02)); + r += mul(s0_2, M4(-7.759e-02, 7.841e-02, -7.251e-02, 2.212e-01, -2.399e-02, -1.360e-02, -1.016e-02, 2.063e-03, 9.256e-03, -1.288e-02, 8.587e-02, -2.566e-02, 9.888e-02, 8.241e-02, -4.956e-02, -1.440e-01)); + r += mul(s0_3, M4(-1.812e-01, 4.365e-02, 1.120e-01, -1.079e-01, -7.096e-02, 1.734e-01, 5.045e-02, -6.046e-02, 7.682e-02, 2.865e-02, -4.742e-03, -2.903e-02, 1.958e-01, 1.039e-02, -4.816e-02, -6.483e-03)); + r += mul(s0_4, M4(-2.251e-01, 5.948e-02, 2.301e-01, 3.526e-01, 1.676e-01, 3.284e-01, 7.006e-02, -4.188e-01, -1.429e-01, 4.799e-01, 4.885e-02, 1.954e-01, -1.919e-01, -3.617e-01, -1.317e-01, -1.693e-01)); + r += mul(s0_5, M4(5.904e-02, 1.668e-01, 2.446e-01, -6.086e-03, 2.400e-02, 6.791e-03, -1.382e-02, 4.357e-02, -6.655e-02, -2.124e-01, -1.177e-01, 7.355e-02, -5.483e-03, -3.423e-02, 2.298e-01, -8.469e-02)); + r += mul(s0_6, M4(2.939e-03, 4.728e-02, 1.274e-01, -9.988e-02, -6.243e-03, -8.422e-03, 1.984e-02, -1.120e-02, -3.664e-02, 1.984e-02, 5.726e-02, -1.255e-01, 1.866e-02, 3.333e-02, -4.936e-02, 4.108e-02)); + r += mul(s0_7, M4(-1.240e-02, 1.239e-01, 1.241e-01, 5.387e-02, -1.040e-01, -2.150e-04, -1.027e-02, 1.470e-01, -2.384e-02, -2.690e-02, 2.359e-01, 5.578e-02, 2.181e-02, -2.860e-02, -4.156e-02, 5.233e-03)); + r += mul(s0_8, M4(-1.985e-02, 1.875e-02, 2.490e-02, 4.427e-02, 1.119e-02, -1.343e-02, -1.967e-02, 2.461e-02, 1.923e-02, -1.645e-02, 8.968e-02, 1.569e-02, 6.168e-02, 8.987e-02, -7.952e-02, -2.058e-02)); + r += mul(s1_0, M4(-7.795e-02, -3.943e-02, -9.027e-02, 4.130e-02, -4.783e-02, 3.800e-02, -1.264e-01, 3.568e-02, -4.561e-02, -5.500e-03, -3.950e-03, 2.980e-02, 1.306e-02, 4.398e-02, -1.101e-01, 6.269e-02)); + r += mul(s1_1, M4(7.817e-03, -8.656e-02, -1.610e-01, 1.343e-01, 7.768e-03, 6.015e-02, -2.118e-01, 9.987e-02, -9.086e-02, 3.293e-02, -1.073e-02, 2.126e-02, 9.909e-02, 7.013e-02, -1.638e-02, 1.370e-02)); + r += mul(s1_2, M4(1.524e-02, 1.213e-01, -2.121e-01, -7.601e-03, -1.088e-03, -7.068e-03, -3.429e-02, -1.854e-02, -1.256e-01, -5.000e-02, -4.249e-03, 6.956e-02, 1.810e-01, 1.228e-01, -1.183e-01, -2.285e-02)); + r += mul(s1_3, M4(-3.223e-02, -3.501e-02, 5.832e-02, -1.608e-01, 3.728e-01, 5.507e-01, -3.432e-02, -1.058e-01, -5.823e-02, 5.583e-03, 4.258e-02, -4.061e-02, 5.032e-02, -1.031e-02, 6.090e-02, -6.812e-02)); + r += mul(s1_4, M4(-5.041e-02, -6.582e-01, 1.553e-01, 1.593e-01, 1.460e-01, 2.310e-02, 3.522e-02, -4.788e-01, -1.449e-01, -4.072e-02, 1.242e-01, -1.751e-02, 3.372e-02, -1.114e-01, -5.362e-02, -2.117e-02)); + r += mul(s1_5, M4(1.978e-01, 2.884e-01, 5.209e-02, -1.751e-01, 3.227e-02, 1.168e-02, -9.727e-03, 4.108e-02, 2.009e-02, 7.821e-02, 2.300e-01, 1.131e-01, 7.034e-02, 5.115e-02, -2.973e-01, 1.802e-01)); + r += mul(s1_6, M4(1.958e-02, 2.796e-02, 6.192e-02, -5.750e-02, -6.072e-02, 8.721e-02, 4.716e-02, 4.567e-02, -4.567e-02, -2.416e-02, 2.033e-02, -3.503e-02, 3.544e-03, 2.999e-02, -1.305e-02, 3.346e-02)); + r += mul(s1_7, M4(8.872e-03, 8.004e-02, -7.152e-02, 4.066e-02, -3.603e-02, -9.614e-02, 3.228e-02, 9.803e-03, -6.331e-02, -4.420e-02, 1.642e-01, -1.447e-02, 4.909e-02, 5.909e-02, -8.621e-02, 1.548e-01)); + r += mul(s1_8, M4(-1.711e-02, 5.139e-02, -7.837e-02, -7.919e-03, -1.636e-02, -4.681e-03, 2.828e-03, 2.137e-02, -1.023e-01, -7.483e-03, 7.474e-02, 1.323e-01, 1.304e-01, 2.446e-02, -9.139e-02, -9.162e-02)); + r += mul(s2_0, M4(7.268e-02, 3.978e-02, -1.480e-01, 1.105e-01, 1.312e-01, 1.599e-02, -4.579e-02, 9.830e-02, 5.525e-02, -7.153e-02, 4.305e-02, -3.304e-02, -4.484e-02, -7.667e-05, -1.446e-02, 3.995e-03)); + r += mul(s2_1, M4(1.060e-01, 7.116e-02, -2.697e-01, -2.968e-01, 7.944e-02, -1.363e-03, 7.398e-02, -3.584e-01, -2.039e-02, -3.787e-02, -2.126e-01, 3.468e-02, -3.340e-01, -2.642e-01, -1.053e-01, 4.584e-02)); + r += mul(s2_2, M4(8.515e-02, 7.926e-02, -1.782e-01, 3.409e-01, -4.535e-02, 6.948e-02, 4.167e-03, -8.690e-02, -1.100e-03, -1.622e-02, -2.110e-02, -4.446e-03, -2.315e-02, 1.463e-03, 4.195e-03, -7.673e-02)); + r += mul(s2_3, M4(7.574e-02, -6.551e-02, -7.931e-02, 1.847e-02, -1.222e-01, -2.567e-02, 1.781e-02, 6.826e-02, 1.751e-01, 6.480e-02, 9.014e-02, 5.389e-02, -6.614e-02, 1.289e-01, 3.795e-02, 7.552e-02)); + r += mul(s2_4, M4(1.655e-01, 1.347e-01, -4.840e-02, -4.500e-02, -2.469e-01, -1.613e-02, 9.173e-02, 2.510e-02, 6.241e-01, 9.625e-02, 3.608e-01, -9.156e-02, -3.869e-01, -2.592e-01, 3.916e-01, -2.647e-01)); + r += mul(s2_5, M4(3.298e-02, 2.479e-01, 9.145e-02, 1.734e-01, 1.213e-02, 3.894e-02, -4.250e-02, 1.374e-01, 1.559e-01, -3.204e-02, -1.421e-01, -2.360e-01, -1.211e-01, -1.204e-02, 1.006e-01, 1.015e-01)); + r += mul(s2_6, M4(-3.940e-02, -3.056e-02, 9.430e-03, -2.775e-03, -1.404e-03, -5.216e-02, 1.604e-02, -2.766e-02, 3.187e-02, 9.107e-02, -1.513e-01, 1.491e-01, 2.200e-02, -5.174e-03, -4.308e-02, -1.033e-02)); + r += mul(s2_7, M4(-4.013e-02, 3.924e-02, 3.782e-02, 2.474e-02, 6.905e-02, -1.493e-02, 7.994e-04, -1.260e-01, -3.816e-02, 8.267e-02, -2.321e-01, -4.363e-03, 5.037e-02, -9.344e-02, -3.923e-02, 2.305e-02)); + r += mul(s2_8, M4(-9.598e-02, 6.235e-02, -3.485e-02, 1.507e-01, 2.140e-02, -1.713e-02, -2.150e-02, 5.550e-03, -7.057e-02, -4.752e-02, -8.683e-02, 9.942e-02, 1.652e-02, -2.513e-03, 6.316e-03, -1.445e-02)); + r += mul(s3_0, M4(7.494e-03, 6.167e-02, -2.229e-02, 9.588e-02, 2.098e-01, -2.744e-01, -1.121e-01, -1.093e-01, -3.600e-02, 7.007e-03, 6.640e-02, -6.241e-02, -2.350e-02, -2.487e-02, -8.227e-03, -9.150e-02)); + r += mul(s3_1, M4(3.763e-02, 2.717e-02, 2.647e-01, -3.230e-01, 4.209e-01, 3.723e-01, -1.271e-01, -1.294e-01, -3.973e-02, -2.565e-02, -4.823e-02, 2.920e-01, -3.086e-02, 2.765e-02, -8.713e-02, 1.211e-01)); + r += mul(s3_2, M4(-8.572e-02, -2.810e-03, -4.112e-02, 1.389e-01, 1.504e-03, -1.636e-03, -4.064e-03, 7.007e-02, -6.103e-02, -9.792e-03, 6.503e-02, -2.098e-02, -1.478e-02, -1.370e-02, 5.957e-02, -2.460e-02)); + r += mul(s3_3, M4(-7.273e-02, -7.381e-02, -1.120e-02, 2.998e-02, -1.515e-01, 2.393e-02, 1.901e-01, -2.623e-02, 9.012e-02, 3.699e-02, -5.944e-02, 5.235e-02, -4.027e-02, 1.674e-01, -4.495e-03, 3.007e-02)); + r += mul(s3_4, M4(-1.656e-01, -2.356e-02, -1.028e-01, 1.295e-01, -4.564e-01, -2.254e-01, 2.951e-01, -1.610e-02, 1.831e-01, 2.144e-01, -1.461e-01, -4.598e-02, -2.185e-01, -2.248e-01, 5.653e-01, -4.681e-01)); + r += mul(s3_5, M4(-4.814e-02, 6.573e-02, 2.865e-02, 1.553e-01, -7.495e-02, -4.398e-02, -1.738e-01, 1.548e-01, -4.272e-03, 3.478e-02, -2.813e-02, -9.149e-02, -7.011e-02, -3.629e-03, 6.780e-02, 8.682e-02)); + r += mul(s3_6, M4(-4.552e-03, -7.880e-03, 3.240e-02, -4.527e-02, -4.243e-02, -3.649e-02, -3.803e-02, 6.218e-02, 4.543e-03, 1.675e-02, -1.154e-02, 3.727e-03, -6.186e-02, 3.390e-02, 2.030e-02, 3.279e-02)); + r += mul(s3_7, M4(3.833e-02, -3.550e-04, 4.441e-02, -7.695e-02, 1.958e-01, -5.135e-02, -1.626e-01, -1.099e-01, 7.221e-02, 3.797e-02, -8.113e-02, -1.506e-01, -1.960e-01, 7.023e-02, 4.237e-02, 1.152e-01)); + r += mul(s3_8, M4(-4.346e-02, -3.773e-03, 6.954e-02, 6.227e-03, 7.212e-02, -4.309e-02, -1.550e-02, -9.249e-02, -1.252e-02, -3.851e-03, -1.582e-02, -2.023e-02, -3.647e-02, 6.547e-02, 5.062e-02, 4.362e-02)); + r += V4(-6.201e-03, -2.350e-02, 4.995e-03, 5.325e-03); + return r; +} + +void Pass6(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 7 +//!DESC conv6 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.952e-02, -2.301e-02, -3.736e-04, -1.581e-02, 7.341e-02, -3.569e-02, 5.963e-03, 1.284e-01, 6.945e-03, 1.208e-02, -2.852e-02, 1.210e-02, -5.234e-02, 3.450e-02, 1.034e-02, 9.300e-04)); + r += mul(s0_1, M4(-1.048e-01, 4.626e-02, 1.610e-02, -1.378e-02, -1.910e-01, 2.419e-02, 7.012e-01, -2.998e-01, -8.037e-02, -4.437e-02, -3.772e-02, -1.620e-02, -3.646e-01, -2.323e-02, 1.019e-02, 3.517e-02)); + r += mul(s0_2, M4(-9.399e-02, -3.329e-02, 1.219e-02, -8.421e-03, -3.055e-02, 3.756e-02, 3.237e-02, 1.406e-01, -1.392e-02, -1.962e-02, 1.136e-01, 2.379e-02, -6.186e-03, -4.576e-02, -4.557e-02, -1.548e-01)); + r += mul(s0_3, M4(1.638e-02, 8.104e-02, -3.369e-02, 2.663e-02, 4.984e-02, 2.547e-01, -3.319e-02, 1.098e-01, -8.594e-03, 4.489e-03, 9.319e-03, -1.642e-02, 3.309e-02, -2.018e-02, -4.158e-03, -2.341e-02)); + r += mul(s0_4, M4(4.271e-02, -2.245e-01, 5.310e-02, -1.032e-01, 2.033e-02, -5.645e-01, -1.033e-01, -1.308e-01, 1.039e-01, -5.559e-02, 2.827e-02, -1.009e-01, 3.317e-01, -1.548e-01, -6.898e-02, -2.608e-01)); + r += mul(s0_5, M4(5.093e-02, 1.337e-01, 2.877e-02, 1.042e-01, -1.935e-02, 5.340e-02, 3.398e-02, -1.656e-02, 1.024e-02, 2.018e-01, -5.609e-02, 2.052e-01, -1.878e-02, -1.618e-01, 3.110e-02, -1.025e-01)); + r += mul(s0_6, M4(-1.982e-03, -4.609e-02, 2.637e-02, -9.266e-03, 2.498e-04, 2.218e-02, -1.013e-02, 2.184e-02, 5.965e-03, 1.045e-02, 1.644e-03, 2.716e-02, 3.749e-03, -3.710e-02, -1.464e-02, -1.227e-02)); + r += mul(s0_7, M4(-1.146e-02, -3.264e-03, -1.180e-02, -2.658e-02, -2.606e-02, -3.221e-02, -1.237e-02, -2.961e-02, 9.372e-03, -4.673e-02, -5.785e-04, -4.285e-02, 1.433e-03, -7.620e-02, -2.407e-03, 2.959e-02)); + r += mul(s0_8, M4(-3.948e-04, 6.024e-02, 9.655e-04, 2.124e-02, -1.357e-02, -2.965e-02, -1.354e-03, -6.947e-03, 3.782e-02, 4.020e-02, -2.875e-02, 3.749e-02, -5.428e-03, -3.603e-02, -3.256e-02, -2.771e-02)); + r += mul(s1_0, M4(3.111e-03, -4.528e-02, 2.406e-02, -3.269e-02, 2.000e-02, -5.895e-02, 3.012e-02, -2.212e-02, 1.071e-01, 1.928e-02, -3.492e-02, 1.053e-01, -6.362e-02, 4.470e-02, 2.578e-02, -1.040e-02)); + r += mul(s1_1, M4(1.134e-01, 6.078e-02, -4.550e-03, -7.141e-03, -1.449e-01, 7.662e-02, 1.104e-01, -3.543e-02, 4.676e-01, 1.248e-02, -3.675e-01, -1.655e-01, 7.049e-02, 2.875e-02, -3.009e-02, 9.027e-02)); + r += mul(s1_2, M4(-5.208e-02, -3.105e-02, -3.721e-02, 1.006e-01, -3.864e-02, 2.810e-02, 1.604e-02, 8.082e-02, 8.273e-02, 7.069e-02, -2.601e-02, 8.756e-02, -2.213e-02, -3.431e-02, -3.394e-02, -4.138e-02)); + r += mul(s1_3, M4(6.901e-02, -1.813e-02, -6.033e-02, -5.048e-02, 1.642e-02, 6.266e-02, -4.282e-02, 2.908e-02, -5.339e-02, 2.241e-02, -7.009e-02, -2.048e-02, 5.016e-02, 1.243e-02, -2.653e-02, 8.097e-02)); + r += mul(s1_4, M4(-8.214e-03, -6.216e-02, 3.436e-02, 3.719e-02, 1.412e-01, -1.256e-01, -9.253e-02, 1.326e-01, 8.852e-01, -4.551e-02, -7.485e-01, -1.673e-01, -9.588e-03, -1.411e-01, -4.338e-02, -4.232e-01)); + r += mul(s1_5, M4(7.913e-02, 5.560e-01, 9.992e-02, 5.410e-01, -1.700e-03, -1.288e-02, -3.230e-02, -8.186e-02, 1.255e-01, 4.736e-02, -5.084e-01, 7.951e-02, 1.181e-01, -8.278e-02, 3.674e-02, -1.468e-01)); + r += mul(s1_6, M4(-6.616e-03, -4.503e-02, 2.533e-02, -1.964e-02, -8.381e-03, -6.342e-03, 6.659e-03, -1.064e-02, 4.090e-02, 7.180e-02, 5.233e-02, 5.864e-03, -1.308e-03, 5.536e-03, -2.743e-03, -1.219e-02)); + r += mul(s1_7, M4(2.071e-02, 7.370e-02, 2.681e-04, 9.920e-03, -2.067e-02, 1.362e-01, 1.855e-02, 5.200e-04, 3.511e-01, -2.001e-01, -1.940e-01, 9.383e-02, -3.934e-02, -1.915e-01, 2.669e-02, 1.094e-02)); + r += mul(s1_8, M4(-2.372e-02, 1.062e-01, -1.496e-02, 1.175e-02, -1.135e-02, 6.667e-03, 3.389e-03, 1.929e-02, -1.125e-02, 1.674e-01, -3.336e-02, -6.202e-02, 6.653e-03, 1.235e-02, -1.633e-02, 5.038e-02)); + r += mul(s2_0, M4(-3.964e-02, -3.119e-03, -9.673e-03, -6.366e-03, 8.531e-02, -3.670e-02, -5.924e-03, 1.261e-02, -6.665e-02, -1.278e-02, -1.696e-02, -2.727e-02, -3.882e-02, 1.510e-03, 2.840e-02, -1.340e-02)); + r += mul(s2_1, M4(-1.639e-01, -1.744e-02, -5.910e-03, -1.800e-02, 4.918e-02, 2.777e-02, 4.527e-02, -1.255e-01, -1.224e-01, -4.651e-02, 3.581e-02, -2.700e-03, -6.049e-02, 2.921e-02, 4.759e-02, -3.412e-03)); + r += mul(s2_2, M4(-1.017e-01, 3.087e-02, 4.443e-01, 7.643e-02, -5.113e-02, 3.162e-02, -2.544e-02, 1.116e-01, 2.750e-03, 6.519e-02, -1.121e-02, 5.005e-02, -6.372e-02, -2.334e-02, 6.144e-02, 6.538e-02)); + r += mul(s2_3, M4(2.036e-01, -2.519e-02, -1.132e-02, -6.775e-02, -1.205e-02, 2.868e-02, -1.777e-02, -1.051e-02, 7.531e-02, -8.572e-02, 5.924e-02, -8.820e-02, 1.963e-01, -4.408e-02, -4.202e-02, -4.132e-02)); + r += mul(s2_4, M4(-4.319e-01, 2.154e-02, 4.534e-01, 1.691e-02, -5.194e-02, 8.830e-03, 4.441e-02, 2.165e-01, -4.723e-02, 2.877e-01, 2.503e-02, 2.071e-01, 2.739e-01, 1.595e-01, -1.141e-01, 4.121e-01)); + r += mul(s2_5, M4(3.627e-01, 8.034e-02, 2.531e-01, 1.357e-01, -5.579e-03, -5.171e-02, 4.784e-02, -1.580e-01, -1.597e-03, -2.143e-02, 1.145e-02, -2.409e-02, 1.135e-01, 1.942e-02, -2.637e-02, -1.371e-01)); + r += mul(s2_6, M4(5.398e-02, 8.680e-03, 1.624e-02, -2.777e-02, -1.992e-02, 9.359e-03, 2.106e-02, 1.651e-02, -3.371e-02, -3.989e-02, 2.268e-02, -1.706e-02, 7.451e-03, -3.325e-02, -2.215e-02, -1.584e-02)); + r += mul(s2_7, M4(1.898e-02, -7.606e-02, -8.297e-02, 5.946e-03, -2.822e-02, 1.434e-02, 6.815e-03, -7.690e-02, -2.483e-02, 1.220e-02, 2.584e-02, -1.723e-02, -1.801e-02, 2.864e-01, 2.826e-02, -1.121e-02)); + r += mul(s2_8, M4(5.351e-02, 1.077e-01, -6.458e-02, 6.860e-02, -1.110e-02, 3.929e-02, 1.543e-02, 6.122e-02, -8.551e-03, 4.159e-02, 2.833e-03, 3.752e-02, 2.189e-02, 2.252e-04, -1.454e-02, 8.179e-02)); + r += mul(s3_0, M4(-1.542e-02, -7.969e-03, -1.764e-02, 1.333e-02, 1.272e-01, -2.018e-02, -1.554e-02, 1.398e-02, -6.583e-02, 4.630e-02, -5.456e-02, -5.594e-02, -1.606e-01, -2.573e-02, 4.592e-02, -4.465e-02)); + r += mul(s3_1, M4(-4.177e-02, -3.885e-02, -5.615e-02, -3.797e-02, -5.923e-02, -2.316e-02, 1.191e-02, -1.548e-01, -6.239e-01, -8.157e-02, 1.527e-01, 8.326e-02, -2.888e-01, -6.520e-02, 3.209e-02, 1.560e-02)); + r += mul(s3_2, M4(-2.764e-03, 2.741e-02, 5.627e-02, 1.597e-02, 1.443e-02, 4.968e-02, 1.132e-02, 9.153e-02, -5.382e-03, -1.644e-02, 5.247e-03, 3.311e-02, -4.528e-02, 2.605e-02, 4.794e-02, 7.640e-02)); + r += mul(s3_3, M4(2.045e-02, -1.614e-03, 2.289e-03, -4.305e-02, -1.017e-01, 5.861e-02, -1.314e-02, 2.477e-02, -1.999e-01, -1.155e-01, 1.294e-01, -2.392e-02, 1.875e-01, 2.845e-02, -3.870e-02, 5.380e-02)); + r += mul(s3_4, M4(5.933e-02, -1.103e-02, 7.176e-02, -3.689e-02, 5.613e-02, -3.270e-01, 3.240e-02, -2.858e-01, -1.259e+00, 2.597e-01, 3.525e-01, 1.430e-02, 1.284e-01, 8.753e-02, 7.569e-02, -5.311e-02)); + r += mul(s3_5, M4(2.279e-02, 1.895e-01, -6.586e-02, 1.515e-01, -2.923e-02, -1.046e-01, 7.716e-03, -1.229e-01, 2.289e-02, -7.755e-02, 1.265e-01, 8.574e-02, 1.189e-02, 6.164e-02, -7.733e-03, 5.218e-03)); + r += mul(s3_6, M4(-8.498e-03, -3.724e-02, 2.832e-02, -7.279e-03, -3.663e-02, 2.402e-02, 2.821e-02, 1.569e-02, 2.319e-02, 2.036e-02, 2.252e-02, -1.611e-02, 9.843e-03, -3.099e-02, 3.160e-03, -5.084e-02)); + r += mul(s3_7, M4(-2.465e-02, 1.290e-02, 2.281e-02, -5.200e-04, -6.553e-02, -1.768e-01, 9.698e-03, -6.422e-02, -1.386e-02, -1.391e-01, -5.888e-02, 5.580e-03, 2.841e-02, 2.990e-02, -5.399e-02, 5.650e-03)); + r += mul(s3_8, M4(-4.902e-03, 9.385e-03, -8.622e-03, 1.815e-02, -8.117e-03, 4.919e-02, 1.259e-02, 4.528e-02, 2.606e-02, 4.409e-02, -1.766e-02, 1.950e-02, -1.739e-02, 7.717e-03, -1.473e-02, -1.569e-02)); + r += V4(-3.105e-03, 3.230e-03, 4.884e-04, 1.938e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-9.510e-02, 5.523e-03, -5.580e-02, -3.107e-02, 2.588e-02, -3.076e-02, 4.525e-02, 2.069e-02, 1.302e-01, 1.260e-02, 3.877e-02, -1.878e-02, -1.553e-01, -3.705e-02, 2.666e-02, -1.631e-02)); + r += mul(s0_1, M4(1.518e-01, 5.025e-04, 1.008e-01, 2.029e-02, -1.343e-01, 1.633e-01, 3.819e-01, 6.217e-03, -7.098e-02, 2.839e-02, 1.971e-02, -4.067e-02, -7.846e-02, -2.545e-01, 1.126e-01, 1.475e-02)); + r += mul(s0_2, M4(-6.206e-02, -4.029e-02, 2.049e-02, -1.331e-02, 4.682e-02, 3.132e-03, 1.294e-01, -3.039e-02, 5.065e-02, -2.711e-03, 7.522e-02, 1.499e-01, 1.383e-03, -2.086e-02, -5.117e-02, -8.129e-02)); + r += mul(s0_3, M4(-7.973e-02, 2.775e-02, -1.490e-02, -7.830e-02, 8.944e-02, -7.283e-02, 1.841e-01, -4.484e-02, -3.632e-02, 1.255e-02, 3.882e-02, -2.862e-02, -2.390e-01, -2.774e-02, 6.677e-02, 1.939e-02)); + r += mul(s0_4, M4(2.243e-01, 8.911e-02, -1.718e-01, 1.485e-01, 9.759e-02, 2.321e-01, -4.390e-01, 2.092e-01, 4.278e-02, 1.205e-01, -1.966e-01, 2.130e-02, -2.223e-01, 3.818e-01, -5.426e-01, -3.762e-01)); + r += mul(s0_5, M4(-2.560e-02, -7.789e-02, -1.339e-01, 4.073e-02, -4.376e-03, -1.462e-01, -2.860e-02, 1.660e-01, -3.137e-02, 2.393e-01, 2.252e-02, 5.165e-02, 6.906e-02, -1.712e-02, 3.349e-02, -3.350e-01)); + r += mul(s0_6, M4(3.665e-03, 8.648e-02, -5.364e-02, 3.068e-02, -2.822e-02, -7.398e-02, 5.950e-03, 6.154e-03, 1.010e-02, 1.587e-02, -7.437e-03, 1.494e-02, 6.585e-02, 4.852e-02, 1.699e-02, -2.453e-02)); + r += mul(s0_7, M4(-6.942e-02, -1.231e-01, 1.254e-01, -7.903e-03, 1.278e-02, -4.337e-02, 1.014e-01, -3.921e-02, 3.086e-02, 8.863e-02, 3.278e-03, -1.790e-02, -4.250e-02, -6.413e-02, 6.324e-02, 9.679e-02)); + r += mul(s0_8, M4(-4.634e-02, 3.222e-03, 3.770e-02, -6.919e-02, 6.432e-03, 3.093e-02, -1.804e-02, -3.677e-02, 1.142e-03, 5.227e-02, -1.075e-01, 1.375e-01, 1.826e-02, 1.763e-02, 4.677e-02, -2.110e-02)); + r += mul(s1_0, M4(-1.817e-02, 1.265e-02, -2.043e-02, 1.326e-02, -1.695e-01, 1.255e-01, -8.576e-02, 7.959e-03, 7.193e-02, 7.724e-02, -4.627e-02, -2.431e-02, -1.319e-01, -5.651e-02, 5.140e-02, 1.678e-02)); + r += mul(s1_1, M4(3.337e-02, 4.582e-02, -2.165e-02, 9.076e-03, -4.945e-02, -8.773e-03, 9.112e-02, 1.320e-01, 3.413e-02, 3.273e-01, -2.814e-01, 1.595e-02, 1.343e-01, -6.513e-02, 4.512e-02, -8.970e-02)); + r += mul(s1_2, M4(3.505e-02, 2.270e-02, 7.114e-02, 1.198e-01, 3.787e-02, 1.018e-02, 6.472e-02, -2.210e-02, 2.939e-02, 8.307e-02, 2.993e-03, 1.123e-01, -9.273e-02, 2.409e-02, -4.903e-02, -4.636e-02)); + r += mul(s1_3, M4(-4.843e-02, -6.013e-02, 2.715e-02, -9.149e-03, 1.452e-01, 3.518e-02, 7.180e-02, 3.286e-03, -2.588e-01, -1.638e-01, 1.521e-01, -1.203e-02, -2.689e-01, 4.285e-02, -1.837e-01, -5.794e-03)); + r += mul(s1_4, M4(2.749e-01, -3.031e-01, 1.152e-01, -7.245e-02, 9.716e-02, 8.837e-02, -3.117e-01, 5.664e-02, -1.557e-01, 3.682e-01, 1.259e-01, 3.908e-02, -8.916e-02, 2.177e-01, 1.254e-01, -2.862e-01)); + r += mul(s1_5, M4(3.740e-02, 1.899e-01, -9.864e-02, 6.740e-01, -8.316e-03, 1.469e-02, -4.439e-02, 2.380e-02, 1.310e-02, 2.651e-01, -3.290e-02, -4.670e-02, -3.505e-02, -1.649e-01, -2.098e-01, 1.243e-01)); + r += mul(s1_6, M4(-3.589e-02, 2.351e-02, -2.010e-02, 1.626e-02, -4.976e-02, -8.924e-02, -1.295e-03, 2.366e-02, -1.079e-02, 9.095e-02, -5.359e-02, -2.689e-02, 1.871e-02, -4.332e-03, -3.191e-02, -2.244e-02)); + r += mul(s1_7, M4(-4.806e-02, 1.472e-02, 6.391e-02, -5.629e-03, 3.986e-02, -8.815e-02, 1.416e-01, -6.209e-02, -2.343e-03, 3.541e-01, -7.232e-02, 1.099e-01, 2.329e-03, 7.972e-02, -7.797e-02, 1.009e-02)); + r += mul(s1_8, M4(-1.305e-02, -6.657e-02, 4.004e-02, 3.393e-03, 1.116e-02, 4.225e-03, -1.438e-02, -1.464e-02, 6.804e-02, -1.095e-02, -1.145e-01, 2.725e-01, -2.005e-02, 6.875e-02, 7.349e-02, 1.354e-02)); + r += mul(s2_0, M4(5.085e-02, 3.046e-02, -8.986e-03, -1.066e-02, -6.250e-02, 6.891e-02, -9.668e-02, 1.813e-02, 1.333e-01, -6.920e-02, 4.096e-02, -2.500e-02, -1.387e-01, -3.212e-02, -7.621e-03, 5.839e-03)); + r += mul(s2_1, M4(-6.667e-02, 1.181e-02, 7.953e-02, 7.288e-02, 2.128e-02, 2.170e-01, -3.280e-02, 5.377e-02, 6.852e-03, -7.527e-03, 9.964e-02, 7.642e-02, 3.760e-02, -4.753e-02, 7.111e-02, -1.569e-02)); + r += mul(s2_2, M4(2.790e-02, 3.059e-02, 2.660e-01, 1.046e-01, 2.424e-02, -2.548e-02, 3.858e-02, 2.470e-02, -1.736e-03, 1.112e-03, 6.293e-03, -3.455e-02, 5.024e-03, -3.655e-02, 7.447e-02, 3.024e-02)); + r += mul(s2_3, M4(-8.148e-02, 1.468e-01, -2.009e-02, 6.602e-02, 1.842e-02, -2.225e-01, 2.157e-01, -1.562e-02, -1.049e-01, 9.012e-02, -3.863e-02, 7.084e-02, -7.823e-02, 8.032e-03, 4.661e-02, 1.247e-01)); + r += mul(s2_4, M4(2.322e-01, 6.905e-02, -6.217e-01, -2.876e-01, 2.005e-01, -8.268e-01, 1.071e-02, -1.958e-01, 1.116e-01, 1.250e-01, -5.871e-02, -1.294e-01, 3.019e-01, 3.933e-01, 3.487e-01, -2.436e-01)); + r += mul(s2_5, M4(-9.753e-02, 6.593e-02, -2.563e-01, 2.381e-01, 4.691e-02, 1.796e-01, 1.375e-03, -3.460e-02, -3.481e-02, -2.220e-02, 3.476e-03, 9.687e-02, -4.415e-02, 1.811e-01, -1.381e-01, -6.685e-02)); + r += mul(s2_6, M4(-1.212e-03, 4.823e-02, -1.680e-03, 1.062e-02, -3.830e-02, -5.797e-02, 1.147e-02, 2.973e-02, 1.650e-02, -2.618e-02, 5.139e-02, 1.433e-02, -1.063e-01, -2.012e-01, 1.667e-01, 3.318e-02)); + r += mul(s2_7, M4(-2.061e-02, 5.721e-02, 4.963e-02, 3.164e-02, 1.512e-02, -4.045e-02, 1.890e-01, -3.224e-02, -2.952e-02, -1.150e-01, 3.362e-02, -3.598e-03, 2.868e-02, -2.058e-01, 2.494e-01, -1.499e-01)); + r += mul(s2_8, M4(-3.157e-02, -1.385e-01, 1.396e-02, 6.336e-02, -2.137e-02, -9.049e-02, 1.104e-02, -3.260e-03, 2.508e-03, -4.677e-02, -1.825e-02, -3.115e-03, -3.979e-02, -1.426e-01, 2.571e-02, 1.175e-01)); + r += mul(s3_0, M4(1.485e-01, -6.051e-03, 3.529e-02, -2.369e-02, -2.982e-01, 1.121e-01, -1.680e-01, -2.738e-02, 1.186e-01, -8.329e-02, 5.089e-02, -2.131e-02, 1.111e-01, -4.026e-02, 9.861e-02, -2.885e-02)); + r += mul(s3_1, M4(-1.694e-01, 2.375e-02, -3.937e-02, -3.478e-02, 2.193e-02, 7.665e-02, -5.437e-02, 4.724e-02, -6.213e-02, -4.031e-01, 1.861e-01, 9.586e-03, -4.383e-02, -1.106e-01, 1.108e-01, 4.909e-02)); + r += mul(s3_2, M4(6.812e-02, -2.311e-02, 5.210e-02, 9.546e-02, 2.433e-02, 3.416e-02, 2.978e-02, 2.772e-02, -5.155e-03, -2.713e-02, 2.212e-02, -6.056e-03, 5.356e-02, -5.571e-02, 7.526e-02, 6.467e-02)); + r += mul(s3_3, M4(-1.633e-01, -2.742e-02, -7.204e-03, -8.113e-03, -2.917e-01, -1.351e-01, 5.153e-02, -8.188e-02, -1.977e-01, -1.460e-01, -1.173e-01, -1.349e-02, 7.836e-02, 1.091e-01, -5.539e-02, 9.694e-02)); + r += mul(s3_4, M4(2.165e-01, 1.383e-01, -1.362e-01, 8.388e-02, 9.544e-02, -4.350e-01, -3.249e-01, 1.559e-01, 1.919e-01, -4.631e-01, -4.345e-01, -3.247e-01, 7.224e-02, 2.959e-01, -2.107e-01, -2.463e-01)); + r += mul(s3_5, M4(-4.089e-02, 2.202e-01, 6.955e-02, -1.248e-01, 1.140e-02, -7.158e-03, -3.404e-02, 1.370e-01, -4.043e-03, -7.008e-02, 7.084e-02, 8.809e-02, 1.570e-02, 6.716e-02, -9.100e-04, -1.166e-02)); + r += mul(s3_6, M4(2.194e-02, 6.765e-02, -9.062e-03, 1.480e-02, 1.792e-02, 4.489e-02, -1.333e-02, 3.524e-02, 3.035e-02, 1.197e-01, -5.057e-02, -2.569e-03, -5.396e-02, -3.068e-02, 3.150e-02, -3.015e-02)); + r += mul(s3_7, M4(-7.630e-02, -1.487e-01, 1.509e-01, -2.381e-02, 2.594e-02, -2.307e-02, 1.260e-01, -3.836e-02, -2.424e-02, 3.167e-02, 3.592e-03, 8.270e-02, -5.491e-02, -1.358e-01, 9.294e-02, 2.589e-02)); + r += mul(s3_8, M4(-4.926e-04, -1.151e-01, -4.633e-02, 5.883e-02, -5.406e-03, 1.852e-02, -2.089e-02, -5.929e-02, -1.291e-02, -2.909e-02, -1.351e-02, -1.849e-02, -1.130e-02, -7.846e-02, -9.407e-03, -1.708e-02)); + r += V4(4.076e-03, -1.858e-02, 9.260e-03, 6.390e-04); + return r; +} + +void Pass7(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 8 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0, t1 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.911e-02, 7.153e-03, -2.976e-04, -7.179e-03, -1.377e-02, -6.451e-03, 2.290e-02, -1.537e-02, 2.230e-02, 1.335e-02, -2.970e-03, -1.058e-02, 2.051e-02, 6.362e-03, -2.487e-02, -6.941e-04)); + r += mul(s0_1, M4(-2.149e-02, 8.997e-03, 5.809e-02, 4.138e-02, -3.275e-02, -2.813e-02, 1.701e-02, -8.705e-03, 2.320e-02, 9.248e-03, -3.233e-02, -6.138e-03, 4.763e-02, 2.689e-02, -5.537e-02, -2.760e-02)); + r += mul(s0_2, M4(-1.414e-02, -3.483e-02, -7.279e-03, 1.521e-02, -8.604e-03, -5.970e-02, -3.428e-02, -1.553e-02, 3.362e-03, 3.209e-02, 2.829e-03, -1.605e-02, 1.413e-02, 6.860e-02, 3.001e-02, -1.223e-03)); + r += mul(s0_3, M4(-6.645e-02, -8.320e-03, 4.523e-02, 5.457e-02, 1.400e-02, 3.198e-02, 1.427e-02, 1.483e-02, -1.421e-01, -2.657e-02, 4.797e-02, 7.217e-02, -3.955e-02, -3.721e-02, 2.393e-02, 2.795e-02)); + r += mul(s0_4, M4(-1.653e-02, -1.199e-01, -4.775e-01, -3.048e-01, 5.775e-02, -2.554e-02, 1.568e-01, 3.992e-02, -1.368e-01, -2.363e-01, -5.499e-02, -1.315e-01, -2.829e-02, 1.148e-02, -4.298e-02, -2.297e-02)); + r += mul(s0_5, M4(2.388e-02, 9.640e-02, -6.766e-04, -1.894e-01, -1.938e-02, -1.417e-02, -6.162e-03, 1.409e-02, 1.217e-02, -8.992e-03, 2.264e-02, 8.724e-02, 7.060e-03, 1.580e-02, 1.648e-02, 4.897e-02)); + r += mul(s0_6, M4(7.503e-03, -2.970e-03, -5.068e-03, -1.602e-02, 1.288e-02, 9.609e-03, -1.090e-02, 1.443e-03, -1.047e-02, 1.581e-02, -7.625e-03, -7.056e-02, -2.884e-02, -3.020e-02, -2.549e-02, -4.848e-02)); + r += mul(s0_7, M4(2.075e-02, 1.442e-02, 4.774e-02, 2.273e-02, 1.258e-02, 1.637e-02, -7.148e-02, -5.842e-02, 1.055e-02, -3.364e-02, 1.822e-01, 2.194e-01, -2.953e-02, -2.531e-02, 4.235e-02, 4.981e-02)); + r += mul(s0_8, M4(-6.428e-03, 9.970e-03, 1.333e-02, 5.654e-02, -9.648e-03, 2.059e-04, -1.464e-02, -3.448e-02, 3.807e-03, 1.338e-02, -2.262e-02, -1.434e-02, 6.271e-03, -9.572e-03, 6.071e-03, 1.341e-02)); + r += mul(s1_0, M4(1.419e-02, 5.825e-03, 3.581e-03, -2.137e-03, 2.863e-03, 7.173e-03, 4.501e-02, -2.820e-02, 3.248e-02, -7.079e-03, 1.837e-02, 2.565e-02, 1.775e-02, 1.246e-02, -3.047e-02, 1.125e-02)); + r += mul(s1_1, M4(-4.685e-02, 1.148e-03, 7.450e-02, 5.961e-02, -7.743e-02, -1.134e-02, -2.803e-02, -8.041e-03, 1.388e-01, 1.785e-01, -2.667e-02, -3.292e-02, 2.367e-02, 1.317e-02, -5.723e-02, 6.215e-03)); + r += mul(s1_2, M4(-9.482e-03, -5.237e-02, -5.906e-04, 2.193e-02, 2.896e-03, -6.707e-02, -9.075e-03, -1.458e-02, -4.769e-05, 2.592e-02, 1.115e-02, -9.638e-03, 2.494e-02, 7.106e-02, 2.839e-02, 5.956e-03)); + r += mul(s1_3, M4(-6.437e-02, -4.261e-03, 6.120e-02, 6.179e-02, 3.619e-02, 6.905e-02, -7.007e-02, -9.677e-02, 1.439e-02, -5.513e-02, -1.084e-01, 9.009e-02, -2.376e-01, -7.979e-02, -7.095e-03, 8.423e-02)); + r += mul(s1_4, M4(-3.465e-02, -1.174e-01, -4.971e-01, -2.725e-01, -1.110e-01, -2.241e-01, -1.919e-01, 1.115e+00, 1.600e-01, 1.870e-01, -3.286e-01, -5.513e-01, -4.899e-01, 9.921e-01, -1.312e-01, -2.666e-01)); + r += mul(s1_5, M4(6.483e-03, 4.674e-02, 1.070e-02, -2.163e-01, -4.141e-03, -2.429e-02, 4.874e-02, -1.576e-01, -3.007e-02, 2.606e-02, 4.476e-02, 6.729e-02, 5.863e-02, -6.616e-02, 7.878e-03, 9.787e-02)); + r += mul(s1_6, M4(2.207e-02, 5.671e-03, 2.283e-03, -1.648e-02, 5.681e-03, -1.943e-03, 1.594e-02, 2.148e-02, -3.016e-02, 3.046e-04, 4.328e-02, -4.072e-02, -1.499e-02, -6.713e-03, -8.285e-02, 1.440e-03)); + r += mul(s1_7, M4(-4.522e-02, -1.953e-02, 3.004e-01, 1.255e-01, 6.459e-03, 3.954e-02, -1.323e-01, -1.315e-01, -1.715e-02, -5.240e-02, 1.653e-01, 2.243e-01, -2.645e-02, -2.654e-02, 3.446e-02, 1.267e-02)); + r += mul(s1_8, M4(1.441e-02, 2.868e-03, 4.941e-02, 2.607e-01, -5.114e-03, -6.024e-03, 3.945e-03, -4.669e-02, 6.142e-03, 4.197e-03, -4.871e-03, 2.252e-04, 1.890e-02, 1.142e-02, -1.040e-02, 7.153e-02)); + r += mul(s2_0, M4(4.156e-03, 3.821e-03, -1.529e-03, 2.462e-03, -2.837e-02, -1.254e-02, -1.678e-02, -5.421e-03, -8.727e-03, -8.809e-03, -3.158e-02, -2.025e-02, -1.411e-02, 7.584e-03, 8.450e-02, 4.221e-02)); + r += mul(s2_1, M4(1.418e-02, -7.004e-03, -2.935e-02, -2.645e-02, -1.518e-02, -3.918e-02, -6.617e-02, -5.273e-02, -9.888e-02, -1.090e-02, 1.380e-01, 7.376e-02, -1.161e-02, -1.487e-02, 2.550e-02, 6.274e-02)); + r += mul(s2_2, M4(5.309e-03, 2.695e-02, 1.267e-02, -3.495e-03, 9.392e-03, 2.603e-02, 2.278e-02, 3.305e-03, -1.302e-02, -1.123e-01, -1.547e-02, 3.941e-02, 1.298e-03, -1.396e-02, 1.964e-03, 7.526e-03)); + r += mul(s2_3, M4(-1.822e-02, 8.636e-03, 8.368e-03, 1.102e-02, 6.230e-02, -2.289e-02, -1.296e-02, -1.681e-02, 4.747e-02, -1.882e-02, 9.110e-03, -1.084e-02, -2.726e-02, -2.000e-02, -1.814e-01, -7.070e-02)); + r += mul(s2_4, M4(-4.428e-02, -6.799e-02, -5.391e-03, -2.368e-02, 3.822e-01, 3.324e-01, 2.766e-01, 1.771e-01, 1.929e-01, 2.418e-01, -2.039e-01, 5.701e-03, 1.467e-01, 4.835e-02, 1.470e-01, -7.146e-02)); + r += mul(s2_5, M4(-9.316e-03, -2.872e-03, 7.287e-03, 5.032e-02, -2.617e-02, 9.548e-02, -3.550e-02, 9.550e-02, -4.338e-02, -2.402e-02, -4.187e-02, -2.445e-01, -5.388e-03, 3.789e-02, -2.563e-02, 2.686e-02)); + r += mul(s2_6, M4(9.817e-03, 4.676e-03, -6.399e-03, 7.844e-03, -1.019e-02, -8.003e-04, 5.167e-02, 1.419e-02, -9.988e-03, -2.480e-03, 2.983e-02, -6.955e-03, -4.089e-02, -1.849e-02, -3.747e-02, -2.679e-02)); + r += mul(s2_7, M4(3.122e-02, 1.641e-02, 5.238e-02, -1.886e-03, -1.368e-02, -1.047e-02, 4.063e-02, 8.081e-02, -2.436e-02, -1.314e-02, 7.406e-02, 1.255e-01, 1.328e-03, -1.480e-02, -2.203e-02, -6.794e-03)); + r += mul(s2_8, M4(-1.214e-02, 3.663e-02, -5.298e-02, 4.172e-02, -1.636e-03, -1.193e-02, 1.591e-02, 1.546e-02, -4.897e-03, -2.570e-02, 1.029e-02, -1.321e-02, 1.692e-03, -4.016e-03, 1.854e-02, -2.154e-02)); + r += mul(s3_0, M4(1.653e-02, 8.081e-04, -8.249e-04, -2.749e-03, -4.287e-02, -1.117e-02, -1.953e-02, -1.392e-02, -9.860e-03, -1.139e-02, -3.431e-02, -2.482e-02, 2.073e-03, 2.388e-02, 8.476e-02, 5.234e-02)); + r += mul(s3_1, M4(1.545e-02, 2.697e-03, -2.547e-02, -1.587e-02, 2.739e-02, -5.155e-02, -8.569e-02, -6.176e-02, -4.554e-02, -2.669e-02, 1.469e-01, 7.837e-02, -3.883e-02, -1.063e-02, 3.383e-02, 7.495e-02)); + r += mul(s3_2, M4(1.549e-02, -4.319e-03, 3.554e-02, -1.405e-02, 3.542e-03, 5.563e-02, 1.556e-02, -6.819e-03, -2.139e-02, -4.968e-02, -2.142e-02, 3.475e-02, -1.378e-02, -3.919e-02, -8.013e-03, -5.496e-03)); + r += mul(s3_3, M4(-3.531e-03, 9.257e-03, 1.262e-02, 4.105e-03, 4.193e-02, -2.331e-02, -2.424e-02, -2.262e-02, 5.437e-02, -3.147e-02, 2.348e-02, -2.547e-02, -1.490e-01, 2.063e-02, -1.968e-01, -2.503e-02)); + r += mul(s3_4, M4(-6.533e-02, -2.179e-02, 2.316e-02, -8.762e-03, 2.356e-01, 2.350e-01, 3.014e-01, 1.560e-01, 2.691e-01, 3.561e-01, -4.286e-01, -1.469e-01, 5.332e-01, -3.363e-01, 2.388e-01, -2.940e-01)); + r += mul(s3_5, M4(3.350e-01, -2.028e-01, -8.521e-02, -1.395e-02, -4.912e-02, 1.973e-02, -3.872e-02, 1.125e-01, -6.922e-02, -1.093e-01, -1.874e-02, -2.666e-01, -4.931e-02, 8.861e-02, -4.383e-02, 8.293e-03)); + r += mul(s3_6, M4(1.665e-02, -1.267e-03, 2.164e-03, 3.524e-03, -1.150e-02, -4.903e-03, 2.395e-02, 2.391e-03, -1.383e-02, 1.477e-03, 1.728e-02, -1.356e-02, 2.222e-03, 6.370e-03, -7.935e-02, -8.551e-03)); + r += mul(s3_7, M4(2.945e-02, 3.503e-02, 1.334e-02, 6.900e-02, -1.146e-02, -1.218e-02, 5.461e-02, 7.172e-02, -2.653e-02, -2.229e-02, 1.076e-01, 1.016e-01, -7.640e-03, -5.440e-03, 5.615e-02, -5.270e-02)); + r += mul(s3_8, M4(1.949e-02, -1.039e-01, 6.569e-01, -3.909e-01, -1.983e-03, -1.781e-02, -3.701e-03, -1.970e-02, -1.160e-02, -2.080e-02, -1.361e-02, 1.851e-03, -1.366e-02, -2.093e-02, -4.601e-03, -1.360e-02)); + r += V4(6.897e-04, 1.064e-03, 8.808e-04, 1.561e-03); + return tanh(r); +} + +void Pass8(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-6x8C-NVL.hlsl b/src/Effects/CuNNy/CuNNy-6x8C-NVL.hlsl new file mode 100644 index 000000000..3b139d452 --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-6x8C-NVL.hlsl @@ -0,0 +1,1247 @@ +// CuNNy 6x8C BILINEAR RGB NVL - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-D08N06 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t2; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t3; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0, t1 + +#define l0(x, y) min16float((dot(float3(2.668e-01, 5.128e-01, 1.094e-01), O(INPUT, float2(x, y)).rgb) + -8.262e-01)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(3.453e-03, -2.009e-02, 1.029e-02, -4.229e-02) * s0_0; + r += V4(-1.162e-01, -3.046e-01, 3.944e-01, -3.426e-02) * s0_1; + r += V4(4.098e-02, 2.644e-01, 7.739e-03, 1.740e-02) * s0_2; + r += V4(-5.645e-01, 3.510e-01, -1.381e-01, 3.015e-01) * s0_3; + r += V4(5.692e-01, 2.277e-02, -1.002e-02, -2.280e-01) * s0_4; + r += V4(1.874e-02, -2.802e-01, -2.867e-02, -9.688e-02) * s0_5; + r += V4(3.162e-02, -3.273e-01, -1.823e-03, 2.738e-02) * s0_6; + r += V4(9.148e-02, 3.054e-01, 4.438e-02, 2.153e-01) * s0_7; + r += V4(-7.020e-02, -1.003e-02, 3.250e-03, -1.590e-01) * s0_8; + r += V4(2.110e-03, -3.283e-04, -1.730e-02, -4.942e-03); + return r; +} + +V4 f1(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(4.130e-03, -1.028e-02, -7.847e-02, -1.277e-01) * s0_0; + r += V4(1.850e-02, 1.097e-02, -1.303e-01, 1.772e-01) * s0_1; + r += V4(-3.426e-02, 3.168e-01, 9.948e-03, -4.789e-02) * s0_2; + r += V4(1.225e-03, 1.298e-02, 2.803e-01, -1.551e-02) * s0_3; + r += V4(-1.303e-02, -1.703e-02, 1.588e-01, 2.124e-01) * s0_4; + r += V4(5.410e-01, -2.998e-01, 2.618e-02, -1.909e-01) * s0_5; + r += V4(-1.382e-03, 4.087e-03, 1.879e-01, 4.719e-02) * s0_6; + r += V4(-3.849e-03, 1.123e-02, -2.463e-01, -5.789e-02) * s0_7; + r += V4(-5.129e-01, -2.681e-02, 3.530e-02, 7.564e-03) * s0_8; + r += V4(3.018e-03, 1.376e-02, -1.274e-02, 5.625e-03); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.042e-01, -4.946e-02, 6.554e-02, -8.392e-03, 1.283e-01, 1.651e-01, 3.683e-04, -1.051e-01, 4.040e-02, -6.936e-02, 1.557e-01, -1.129e-01, 2.308e-01, -4.099e-02, -5.587e-02, 5.584e-02)); + r += mul(s0_1, M4(-6.587e-02, -5.729e-02, 2.461e-01, -1.196e-02, -4.689e-02, 1.865e-01, 1.570e-02, 3.911e-03, -1.411e-01, -3.073e-02, 4.971e-02, -2.336e-01, 1.874e-01, -9.632e-02, 1.814e-01, -3.548e-01)); + r += mul(s0_2, M4(1.724e-01, 2.876e-01, 1.552e-01, -2.105e-01, 1.037e-01, 3.116e-01, 1.832e-01, -8.618e-02, 6.401e-02, 1.319e-01, 3.862e-02, 2.301e-01, -1.265e-02, -2.864e-01, -3.199e-02, 2.311e-01)); + r += mul(s0_3, M4(4.153e-02, -5.609e-02, -1.424e-01, 3.368e-02, 2.271e-01, -8.217e-02, -8.373e-02, -2.666e-01, -3.089e-01, -3.909e-02, -3.284e-02, -1.384e-01, 1.265e-02, 2.006e-01, -5.720e-02, 1.076e-01)); + r += mul(s0_4, M4(-4.261e-02, 1.184e-01, 3.302e-01, -2.949e-01, -3.369e-02, 3.576e-02, 1.019e-01, 3.097e-01, -4.891e-02, -2.452e-01, 5.598e-02, -2.664e-01, 6.785e-02, -1.832e-02, -6.347e-01, 2.930e-01)); + r += mul(s0_5, M4(5.791e-01, -1.328e-01, -2.570e-01, 2.606e-01, 8.937e-02, -2.239e-01, 6.957e-02, 1.786e-02, 2.350e-01, -4.333e-03, 9.579e-02, 2.193e-02, -2.945e-02, 1.651e-01, -8.800e-02, 3.369e-01)); + r += mul(s0_6, M4(-1.784e-01, -1.200e-01, -1.743e-01, 1.992e-01, -3.070e-02, -1.489e-01, -1.489e-01, 4.937e-03, -5.589e-01, -5.703e-01, 5.314e-01, -5.588e-01, -2.519e-02, 2.655e-03, -3.409e-02, 2.062e-01)); + r += mul(s0_7, M4(-4.764e-02, -2.769e-01, -8.199e-01, 6.832e-02, -1.419e-02, -5.326e-02, -2.143e-01, -6.217e-03, -2.033e-01, 9.656e-02, 1.715e-01, -2.919e-01, 1.069e-01, 2.729e-01, -8.952e-04, 1.155e-01)); + r += mul(s0_8, M4(1.953e-02, -3.145e-01, 1.220e-02, -1.279e-01, -1.510e-01, -1.802e-01, -9.842e-02, 1.264e-02, -2.215e-01, 2.586e-02, 4.424e-01, 6.816e-01, -7.124e-02, 1.132e-01, 4.702e-02, 4.041e-01)); + r += mul(s1_0, M4(9.269e-02, 1.047e-02, 5.079e-02, 5.864e-02, -3.747e-02, 1.270e-01, 1.255e-01, -1.602e-01, 7.921e-02, 5.447e-03, 1.268e-01, -4.616e-02, 2.253e-01, -9.203e-03, 6.893e-04, -8.571e-02)); + r += mul(s1_1, M4(-1.425e-01, 3.062e-02, 2.404e-01, -3.486e-01, -4.895e-01, -1.163e-01, 1.275e-01, -2.587e-01, -4.745e-02, 5.669e-02, 7.794e-02, -1.012e-01, 8.594e-02, -4.235e-01, 3.494e-01, -3.638e-01)); + r += mul(s1_2, M4(-2.751e-01, -4.543e-01, 2.819e-01, -3.893e-01, -8.049e-03, 2.251e-01, 1.453e-01, -2.370e-01, 3.627e-02, 2.143e-01, 1.543e-02, 1.223e-01, 6.544e-02, -2.368e-01, 9.235e-03, 2.656e-01)); + r += mul(s1_3, M4(1.210e-01, 9.555e-02, -5.417e-02, 1.165e-01, -1.068e-01, 6.166e-02, -2.891e-02, -8.389e-02, -1.224e-01, 5.482e-02, 2.476e-02, -8.222e-03, -1.536e-01, 6.750e-02, -1.824e-01, 1.234e-01)); + r += mul(s1_4, M4(-2.644e-02, 1.267e-01, 4.029e-01, 2.617e-01, -4.951e-01, 1.263e-01, 1.485e-01, 5.693e-01, -8.725e-03, 2.043e-01, -4.077e-03, -1.607e-01, -2.110e-01, -4.477e-02, -5.539e-01, 3.658e-01)); + r += mul(s1_5, M4(2.017e-01, 6.315e-01, -2.568e-01, -6.606e-01, 4.404e-01, -4.315e-01, -2.281e-02, -3.790e-01, 6.808e-02, -3.220e-02, -7.074e-02, -8.168e-02, -3.823e-01, 1.807e-01, 9.908e-02, -7.475e-02)); + r += mul(s1_6, M4(-1.382e-01, -2.290e-01, -1.867e-01, 2.769e-01, -6.167e-02, -1.228e-01, -1.397e-01, 1.529e-01, -1.591e-02, -4.457e-02, 2.053e-01, 2.664e-02, -9.287e-02, 3.469e-02, 8.066e-02, -5.675e-02)); + r += mul(s1_7, M4(3.744e-01, -3.467e-01, -9.004e-01, 5.327e-01, 5.426e-02, -6.985e-02, -3.799e-01, 4.876e-01, -6.079e-02, 2.228e-03, 2.478e-02, -2.092e-01, 2.800e-02, 2.458e-01, -1.370e-02, -1.551e-01)); + r += mul(s1_8, M4(2.552e-01, 1.097e-01, -1.182e-01, 1.627e-01, 2.166e-01, -4.759e-03, -1.014e-01, 2.126e-02, 6.229e-02, 2.329e-02, 9.464e-02, 4.757e-02, 3.131e-03, -2.587e-03, -2.741e-02, -4.537e-02)); + r += mul(s2_0, M4(1.745e-01, 4.940e-01, 2.727e-02, -8.500e-02, -2.356e-01, -1.500e-01, 9.724e-03, -2.077e-01, -5.020e-02, -1.467e-01, 1.685e-01, 3.359e-01, 2.094e-01, 1.175e-02, -6.380e-02, 4.710e-02)); + r += mul(s2_1, M4(-3.113e-02, 8.027e-01, -4.982e-03, -1.428e-01, 6.573e-02, 4.074e-04, 1.637e-01, 1.466e-01, 3.238e-01, 2.433e-01, -3.147e-01, -1.136e-02, -3.248e-02, -1.969e-01, 9.390e-02, -2.485e-01)); + r += mul(s2_2, M4(-1.045e-01, 2.494e-01, -3.734e-02, 4.765e-02, 5.332e-03, -2.832e-01, -2.026e-01, -1.339e-02, -4.321e-01, -7.626e-01, 1.908e-01, 3.502e-01, 3.451e-01, 1.199e-01, -7.371e-02, -6.362e-02)); + r += mul(s2_3, M4(-1.629e-01, -4.320e-01, 7.872e-01, -2.766e-01, -1.373e-01, 1.317e-02, -1.461e-01, 2.414e-02, -9.297e-02, 3.045e-02, -6.804e-02, 8.275e-02, -3.640e-01, -1.441e-01, -3.555e-02, 1.300e-01)); + r += mul(s2_4, M4(-3.256e-01, -5.021e-01, 1.595e-01, 4.114e-01, 7.795e-02, 1.064e-01, -9.154e-02, 1.568e-01, -2.319e-01, -8.268e-02, 3.692e-01, -9.510e-03, -1.566e-01, 1.517e-01, 2.046e-01, -5.032e-02)); + r += mul(s2_5, M4(-2.343e-01, -3.166e-01, -2.043e-04, -5.327e-02, -2.782e-02, -2.989e-01, -7.909e-02, 1.890e-01, -2.144e-01, 5.248e-01, -3.855e-02, 1.994e-01, -3.525e-01, -6.465e-02, -1.340e-02, 1.749e-01)); + r += mul(s2_6, M4(2.183e-02, 1.909e-03, 3.262e-02, 7.862e-02, -3.753e-01, 2.833e-01, 4.159e-01, -2.270e-01, 2.925e-02, -2.160e-01, -1.200e-01, -3.268e-02, -1.949e-01, -1.118e-01, 8.766e-02, 2.260e-02)); + r += mul(s2_7, M4(-1.560e-02, -3.559e-02, 3.894e-02, -7.496e-02, 1.947e-01, 4.921e-01, 2.227e-01, -2.002e-01, 3.045e-01, -1.339e-01, -5.773e-01, -2.032e-01, -2.834e-02, -1.129e-01, -2.371e-01, -6.408e-03)); + r += mul(s2_8, M4(1.109e-02, -4.708e-02, -3.267e-02, -6.323e-02, 2.935e-01, 2.542e-01, 1.224e-01, -9.832e-02, -1.134e-02, -7.131e-02, -5.557e-02, 5.606e-01, 2.551e-01, 2.313e-02, -8.756e-02, 8.658e-02)); + r += mul(s3_0, M4(-4.977e-01, 7.734e-02, 1.035e-01, -5.555e-01, -1.226e-01, -7.049e-02, -6.882e-02, -3.950e-02, -5.240e-03, -1.275e-01, 1.254e-01, 1.743e-01, 5.163e-02, 5.000e-03, -8.731e-02, -6.371e-02)); + r += mul(s3_1, M4(-3.240e-02, -1.207e-01, 1.163e-01, -1.555e-01, 1.009e-01, -1.052e-01, 2.058e-01, 2.205e-01, 6.038e-02, 1.410e-01, -1.091e-01, -2.054e-02, -3.050e-01, -6.117e-03, 1.110e-01, -1.869e-01)); + r += mul(s3_2, M4(1.899e-01, 9.263e-02, -4.842e-02, 2.578e-02, -1.349e-01, -1.889e-01, -1.932e-01, 2.278e-02, -1.531e-01, -1.199e-01, 2.886e-03, 3.222e-02, -2.776e-02, 1.405e-01, -6.920e-02, -5.137e-01)); + r += mul(s3_3, M4(-4.189e-01, 1.880e-01, -1.234e-01, 7.659e-01, 5.997e-01, -1.043e-01, -1.229e-01, 6.046e-01, -1.916e-01, 5.904e-02, 2.624e-02, 1.703e-02, -1.891e-02, 3.916e-02, 1.597e-01, -1.509e-01)); + r += mul(s3_4, M4(5.301e-01, -1.048e-01, -8.784e-02, -4.903e-01, 3.227e-01, -4.585e-01, -2.122e-01, 3.939e-01, -1.169e-01, -8.373e-03, 1.272e-01, 6.810e-02, 2.517e-01, 2.839e-01, 1.257e-01, -4.846e-01)); + r += mul(s3_5, M4(1.499e-01, -2.604e-01, -1.691e-01, -3.895e-01, -4.073e-01, 3.564e-02, 8.605e-02, -2.012e-01, 4.062e-02, 4.141e-02, 1.726e-02, -9.629e-02, 1.348e-01, -9.468e-02, 2.113e-03, -7.229e-02)); + r += mul(s3_6, M4(-5.457e-02, 1.150e-01, -1.297e-01, 2.096e-01, 4.355e-01, -6.530e-02, -6.438e-01, 6.335e-01, 5.057e-02, -2.750e-01, -2.055e-01, 8.634e-02, -5.581e-01, -1.185e-01, 2.945e-02, 1.703e-01)); + r += mul(s3_7, M4(-1.032e-02, -4.045e-02, 9.848e-02, -1.865e-01, -1.078e-01, -4.990e-01, 4.652e-01, -1.320e-01, 1.969e-01, -1.816e-01, -4.724e-01, 1.471e-01, 5.619e-01, 4.145e-02, -3.606e-02, 8.608e-01)); + r += mul(s3_8, M4(4.007e-02, -5.339e-02, -2.525e-04, 1.082e-01, -2.520e-01, 2.832e-01, 4.025e-01, 1.532e-01, 1.244e-01, -9.873e-02, -8.482e-02, 5.278e-02, 1.618e-01, -3.404e-02, 2.947e-02, 2.445e-01)); + r += V4(6.864e-03, -7.720e-03, -1.170e-02, 2.714e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-5.307e-02, -6.220e-02, 7.854e-02, 3.023e-02, 1.780e-02, -4.660e-02, 5.384e-02, -3.152e-02, 1.070e-01, -8.460e-02, 1.359e-01, -8.144e-02, -1.251e-01, -7.183e-02, -5.920e-02, -1.442e-01)); + r += mul(s0_1, M4(-7.721e-02, 5.857e-02, 1.973e-01, 3.698e-02, -1.199e-01, -2.585e-02, 7.877e-02, -1.865e-01, -3.580e-01, 1.767e-01, -1.279e-02, -4.554e-02, -1.928e-01, 1.017e-01, 2.319e-01, -2.160e-01)); + r += mul(s0_2, M4(-9.376e-02, -1.128e-01, -6.333e-02, 3.354e-02, -1.542e-02, 7.209e-02, 2.742e-01, 2.597e-02, 5.620e-02, -6.394e-02, -7.670e-03, 4.574e-02, -7.222e-02, 6.187e-02, -1.593e-01, 3.579e-03)); + r += mul(s0_3, M4(5.476e-02, 8.972e-02, -2.571e-01, 2.037e-01, 7.994e-02, -7.436e-06, -1.929e-01, -8.885e-02, 2.996e-01, 3.133e-01, 1.697e-01, 2.396e-02, 2.580e-01, -8.171e-02, -5.660e-01, 1.743e-01)); + r += mul(s0_4, M4(-7.670e-01, 4.858e-01, 1.888e-01, -3.512e-01, 1.555e-01, 1.685e-01, 1.297e-01, -1.428e-01, -1.589e-01, 6.807e-02, 2.787e-01, -7.520e-01, -3.137e-02, -2.188e-01, 4.388e-01, 1.228e-01)); + r += mul(s0_5, M4(2.404e-01, -3.138e-01, 2.209e-01, 9.275e-02, 5.628e-02, 7.736e-02, -3.531e-01, -7.307e-02, 1.796e-01, -2.184e-01, 2.552e-01, 2.046e-01, -2.306e-01, 1.558e-01, -4.967e-02, -1.155e-01)); + r += mul(s0_6, M4(1.131e-01, -2.256e-02, 1.024e-01, 1.899e-01, 1.293e-01, 2.068e-02, -1.174e-01, 1.143e-01, -5.395e-02, 4.318e-02, -9.846e-02, 1.123e-01, 4.766e-02, -2.608e-01, -1.273e-01, -2.761e-01)); + r += mul(s0_7, M4(1.882e-01, -5.930e-02, -2.394e-01, 1.931e-01, 1.246e-01, -2.508e-02, -7.158e-02, -2.783e-02, -2.621e-01, 7.212e-01, -1.169e-01, -3.426e-01, -1.276e-01, 6.132e-02, 2.120e-01, -3.544e-01)); + r += mul(s0_8, M4(1.605e-02, 4.498e-02, 3.280e-01, -9.840e-02, 1.066e-01, -2.167e-02, 1.566e-01, -6.055e-02, -7.933e-02, 2.419e-01, -6.969e-01, -1.372e-01, 6.544e-02, -4.269e-02, -3.820e-04, 1.499e-01)); + r += mul(s1_0, M4(-8.681e-02, -1.087e-01, 9.044e-02, -1.616e-01, 5.700e-02, -1.112e-01, 7.944e-02, -1.029e-01, 2.576e-02, -8.748e-02, 1.241e-01, 7.166e-03, 2.636e-02, 2.446e-02, -4.103e-02, 1.675e-01)); + r += mul(s1_1, M4(1.572e-02, 3.793e-02, 2.414e-01, -2.281e-01, -8.712e-02, -6.538e-02, 1.419e-01, -2.081e-01, -9.182e-02, 2.571e-02, 1.355e-01, -1.110e-01, -2.355e-01, 2.339e-01, 1.910e-02, -1.032e-01)); + r += mul(s1_2, M4(1.797e-02, -6.616e-02, -1.663e-02, -1.947e-01, -3.057e-02, 5.054e-02, 1.722e-01, -1.862e-02, 7.327e-03, 2.610e-02, -3.037e-01, 1.290e-01, -5.838e-02, 2.218e-02, -5.471e-02, 6.622e-02)); + r += mul(s1_3, M4(7.168e-03, 8.656e-02, -1.933e-01, 1.346e-01, 8.983e-04, -7.273e-03, -1.299e-01, 4.448e-02, -1.194e-02, 1.806e-01, 2.176e-01, 9.534e-02, 2.997e-01, -4.100e-02, -6.068e-01, 3.835e-01)); + r += mul(s1_4, M4(-2.998e-01, 4.814e-01, 1.137e-01, -9.396e-01, -1.303e-01, 3.726e-02, 2.731e-01, 4.488e-01, -1.218e-01, 1.669e-01, 1.251e-01, -7.614e-02, 2.104e-01, -1.051e-01, 1.398e-01, 3.838e-01)); + r += mul(s1_5, M4(4.201e-01, -1.504e-01, 1.123e-01, 1.165e-01, 7.321e-03, 1.057e-01, -5.330e-01, -1.028e-01, 6.962e-02, 1.858e-01, 1.840e-01, -1.049e-01, 4.850e-02, 2.241e-01, -2.765e-01, 2.054e-02)); + r += mul(s1_6, M4(1.442e-01, -6.238e-02, 1.505e-01, -7.470e-02, 8.926e-02, -9.376e-03, 2.118e-02, 1.368e-01, 5.482e-02, 1.042e-01, -1.818e-01, 1.364e-01, 8.520e-02, -2.172e-01, -1.501e-01, -1.698e-01)); + r += mul(s1_7, M4(2.163e-01, -6.139e-02, -2.190e-01, -1.932e-01, -4.070e-02, -5.123e-02, -9.987e-02, 1.412e-01, -1.614e-01, 6.292e-01, -1.916e-01, -8.262e-02, -2.486e-01, 8.627e-03, 2.816e-01, -2.516e-02)); + r += mul(s1_8, M4(-4.631e-02, -2.598e-02, -8.044e-02, -1.965e-01, 9.235e-02, -2.282e-02, -9.376e-02, 1.773e-02, -9.385e-02, 2.837e-01, -5.315e-01, 1.079e-01, 3.615e-02, -3.025e-02, -8.471e-02, 1.044e-01)); + r += mul(s2_0, M4(1.066e-01, 7.468e-02, 1.383e-01, -2.494e-01, -5.715e-02, 1.462e-01, -4.641e-02, -6.538e-02, -3.077e-02, 6.170e-02, 2.492e-01, 1.307e-01, 1.527e-01, 4.648e-02, -1.311e-01, 1.123e-02)); + r += mul(s2_1, M4(-2.184e-02, -8.723e-02, -6.092e-02, 3.431e-03, 9.186e-02, 5.499e-03, 2.798e-01, -1.613e-02, 2.447e-01, 2.809e-02, -3.777e-02, 4.224e-02, 9.632e-02, -3.813e-02, -6.021e-02, -5.802e-02)); + r += mul(s2_2, M4(7.639e-02, 1.966e-02, 2.578e-01, 1.208e-01, 2.502e-02, -3.256e-02, -1.851e-01, -2.343e-02, -2.017e-01, -5.456e-02, -1.051e-01, -1.624e-01, 2.899e-02, -1.224e-03, 3.144e-01, 1.033e-01)); + r += mul(s2_3, M4(-1.768e-01, 1.433e-01, 1.390e-01, -1.494e-01, -3.681e-02, -1.480e-01, -3.328e-01, 4.138e-02, -1.058e-01, 2.190e-01, 3.278e-01, -2.495e-01, 8.836e-02, 1.228e-01, -3.728e-01, 3.534e-01)); + r += mul(s2_4, M4(5.058e-02, -9.077e-02, -7.532e-02, 9.344e-02, 1.096e-01, 5.318e-02, -2.979e-01, 1.779e-01, -1.682e-01, 3.371e-01, 1.964e-01, -2.526e-01, -3.013e-01, 5.184e-02, 3.613e-01, 5.832e-02)); + r += mul(s2_5, M4(-4.613e-02, 3.121e-02, 5.499e-02, 7.645e-02, -1.745e-01, -4.111e-02, 3.210e-01, -2.519e-02, -8.660e-02, 4.306e-01, 5.642e-01, -1.645e-01, -6.127e-02, 3.565e-02, 3.529e-02, 5.166e-02)); + r += mul(s2_6, M4(-1.482e-01, 3.199e-02, -1.302e-02, 7.168e-02, 2.378e-01, -7.776e-02, 2.090e-01, 1.870e-01, 1.820e-01, 8.673e-02, -9.481e-05, 2.911e-01, 6.907e-02, -4.005e-02, -1.265e-01, -1.030e-01)); + r += mul(s2_7, M4(1.354e-01, 2.906e-03, 6.018e-02, 8.451e-02, -1.123e-02, -1.011e-01, 7.115e-02, -8.680e-02, 2.088e-01, 2.977e-01, 8.741e-02, 2.908e-05, -2.357e-01, 3.996e-02, -1.434e-02, -1.192e-01)); + r += mul(s2_8, M4(2.121e-03, 2.972e-02, -5.123e-02, -5.465e-03, -1.124e-01, 7.063e-02, 1.180e-02, -1.732e-01, -1.739e-01, 8.947e-02, -8.452e-02, 4.847e-01, -5.393e-02, -4.491e-02, -3.357e-02, -5.699e-03)); + r += mul(s3_0, M4(4.013e-01, 1.153e-01, 5.353e-03, -2.882e-01, -5.749e-02, 5.224e-02, -2.923e-02, -3.499e-02, 2.801e-02, -1.767e-02, 9.020e-02, -8.928e-02, 9.319e-02, -3.256e-02, -4.600e-02, -2.629e-01)); + r += mul(s3_1, M4(1.098e-01, -1.177e-03, -2.211e-01, 1.097e-02, 3.512e-02, 1.884e-02, 2.745e-01, -1.055e-01, 3.093e-02, 7.905e-02, 5.369e-02, 1.962e-01, -8.069e-03, -1.057e-01, -5.889e-02, 6.130e-02)); + r += mul(s3_2, M4(1.107e-01, -2.817e-02, 8.050e-02, 6.254e-02, -1.035e-02, -7.545e-02, -1.390e-01, -8.220e-02, -7.958e-02, -1.110e-01, -2.425e-01, -8.612e-02, 8.152e-03, -1.041e-01, 2.705e-01, -2.947e-03)); + r += mul(s3_3, M4(-1.766e-01, 2.151e-01, 5.500e-01, 1.463e-01, -3.390e-01, -9.116e-02, -1.704e-01, 9.677e-02, -1.434e-01, 2.198e-01, 4.151e-01, -1.036e-01, 2.908e-01, 1.355e-01, -3.566e-01, -1.082e-01)); + r += mul(s3_4, M4(1.299e-01, -6.859e-02, -4.033e-01, 2.197e-01, -1.170e-01, 9.324e-03, -2.593e-01, 1.434e-01, -1.492e-01, 4.124e-01, 3.600e-01, -1.676e-01, 2.548e-02, -1.581e-02, 1.796e-01, -3.075e-01)); + r += mul(s3_5, M4(-5.624e-02, 1.779e-02, 5.036e-02, -1.849e-01, -1.064e-01, -1.738e-02, 3.161e-01, 5.391e-02, -5.693e-02, 3.279e-01, -6.516e-02, -3.823e-02, -1.512e-01, 9.958e-03, -4.383e-02, -1.996e-01)); + r += mul(s3_6, M4(-1.586e-01, -2.236e-03, -8.471e-02, -1.331e-01, -3.366e-02, -3.968e-01, -2.313e-01, -5.757e-01, 1.606e-01, 3.686e-02, 1.387e-02, 9.619e-02, -1.653e-01, -1.318e-01, -1.257e-01, -1.748e-01)); + r += mul(s3_7, M4(1.938e-01, 1.519e-02, 2.123e-02, 1.121e-01, 5.456e-02, -1.568e-01, 2.224e-01, -5.000e-01, -6.363e-02, 2.205e-01, -3.627e-02, 1.547e-01, 8.136e-03, 2.279e-02, 4.444e-01, -5.358e-02)); + r += mul(s3_8, M4(-5.716e-02, -5.222e-03, -1.156e-02, -6.042e-02, -1.024e-01, 7.483e-02, -2.342e-02, 1.065e-01, -5.690e-02, 6.041e-02, -1.489e-01, -5.921e-02, -1.887e-02, -2.115e-01, 6.895e-02, -7.626e-02)); + r += V4(-1.499e-01, -1.776e-03, -6.774e-03, 1.322e-02); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.716e-02, -1.345e-01, 1.032e-01, 4.104e-02, 8.711e-02, 8.589e-02, 8.094e-02, -4.856e-02, 1.430e-01, 7.623e-02, -6.074e-02, 9.124e-02, 3.848e-03, -2.421e-02, -3.542e-02, 2.262e-02)); + r += mul(s0_1, M4(5.328e-02, -4.876e-02, 2.747e-01, -3.134e-01, 5.165e-02, -8.168e-02, -9.645e-02, 9.013e-02, 5.684e-01, 3.945e-01, -5.757e-02, 3.315e-01, 2.290e-01, -2.012e-01, -1.305e-01, 1.678e-01)); + r += mul(s0_2, M4(-5.859e-02, -1.364e-01, 1.517e-02, -2.630e-01, 2.100e-02, 1.196e-02, -6.509e-03, 3.381e-02, 1.752e-01, 1.950e-01, 3.989e-02, -3.574e-03, -1.470e-01, -2.938e-01, 9.845e-02, 8.800e-02)); + r += mul(s0_3, M4(-3.231e-02, -1.792e-01, 4.669e-01, 2.254e-01, -5.143e-03, -1.238e-01, -1.704e-01, -2.428e-01, -1.350e-01, -1.620e-01, -2.490e-01, 5.236e-02, -9.524e-02, -5.944e-02, -1.270e-01, 2.059e-01)); + r += mul(s0_4, M4(-6.691e-02, 8.980e-03, 1.013e-02, 1.386e-01, -8.353e-02, -1.772e-01, -6.338e-02, -3.381e-01, -6.478e-02, 2.335e-01, 6.344e-02, 2.214e-01, 1.850e-01, 2.665e-02, -1.302e-01, -3.592e-01)); + r += mul(s0_5, M4(-6.946e-02, -3.227e-03, 1.130e-01, -3.646e-01, 6.317e-02, -2.508e-01, 1.449e-02, 1.909e-01, 4.092e-02, -1.928e-01, 4.181e-02, 1.998e-01, -1.042e-01, -1.643e-01, 1.123e-01, -1.423e-01)); + r += mul(s0_6, M4(4.571e-04, -2.226e-01, 1.542e-01, -1.980e-01, 2.272e-01, 6.605e-02, -1.823e-01, -3.202e-01, -1.017e-02, 4.403e-02, -1.199e-01, -6.896e-02, -2.050e-02, -4.035e-03, -2.686e-02, 6.758e-02)); + r += mul(s0_7, M4(-2.184e-02, 2.620e-01, -1.598e-01, -1.310e-01, -1.282e-02, -6.247e-02, -5.500e-02, -1.549e-01, -1.201e-02, 2.664e-02, -1.159e-01, 2.574e-02, -4.379e-02, 4.502e-02, -1.428e-01, 2.220e-01)); + r += mul(s0_8, M4(8.156e-03, -1.909e-03, 1.260e-01, -8.108e-02, 8.324e-02, -1.054e-01, 3.860e-02, -5.290e-02, -1.608e-03, -3.940e-02, -1.942e-02, -8.427e-02, -5.776e-02, -1.097e-01, -1.933e-02, 1.763e-01)); + r += mul(s1_0, M4(-2.796e-02, -6.590e-02, -1.873e-01, 2.170e-01, -8.312e-02, -2.841e-02, 1.736e-01, -1.202e-01, 6.437e-02, 1.920e-03, 1.832e-01, -2.252e-01, 1.106e-01, 1.346e-01, 8.420e-03, -2.089e-01)); + r += mul(s1_1, M4(7.340e-02, 2.419e-02, 8.738e-03, 1.919e-01, -6.706e-02, -1.429e-02, -9.722e-02, -1.269e-01, -4.527e-02, -1.184e-01, -3.381e-03, -1.308e-01, 1.871e-01, 2.487e-01, -2.376e-02, -1.436e-01)); + r += mul(s1_2, M4(1.461e-01, 1.143e-01, -1.074e-01, 8.386e-02, -1.447e-01, -1.686e-01, 9.007e-02, -1.183e-01, 5.843e-02, -7.875e-02, -6.239e-02, 5.076e-02, -8.418e-02, -5.289e-02, 7.695e-02, -4.506e-02)); + r += mul(s1_3, M4(-1.277e-01, -9.044e-02, -2.494e-01, 2.466e-01, -3.309e-02, -1.805e-01, 2.197e-01, 9.758e-02, 2.339e-02, -2.082e-01, 2.091e-01, -3.370e-01, 1.096e-01, 1.691e-01, -1.687e-01, 1.317e-03)); + r += mul(s1_4, M4(7.503e-02, -9.570e-02, -3.911e-01, 3.020e-01, -3.351e-01, 3.397e-01, -1.038e-01, 4.915e-01, -2.002e-01, 2.340e-01, 1.700e-01, 1.536e-01, 2.331e-01, 3.055e-01, -7.175e-02, -2.936e-01)); + r += mul(s1_5, M4(1.693e-01, 6.752e-02, -6.320e-02, 1.731e-01, 5.053e-02, 1.589e-01, -3.209e-02, 2.180e-02, -2.516e-02, -3.550e-01, -6.057e-02, 1.766e-01, 6.070e-02, 2.377e-01, 1.420e-01, -1.750e-01)); + r += mul(s1_6, M4(5.968e-02, 1.878e-01, -7.204e-02, -1.839e-01, 9.632e-02, 4.321e-02, 1.055e-02, -5.708e-02, -5.873e-02, -3.095e-02, -2.059e-02, 1.277e-02, -1.520e-02, 1.516e-01, -2.994e-01, -7.694e-02)); + r += mul(s1_7, M4(-5.850e-02, 3.689e-02, -2.758e-01, -5.820e-02, -7.878e-02, 1.943e-01, 1.318e-01, -6.132e-03, 3.420e-02, 1.539e-01, -1.353e-01, 1.066e-01, -1.837e-02, 1.026e-01, -2.007e-01, 1.803e-02)); + r += mul(s1_8, M4(3.459e-02, -7.917e-02, -6.715e-02, -6.375e-02, 5.512e-02, -1.532e-01, -1.008e-02, 3.281e-02, 2.993e-02, 6.528e-02, 1.202e-02, -1.341e-01, 1.046e-01, 2.568e-01, -3.195e-02, -6.128e-03)); + r += mul(s2_0, M4(-1.676e-01, 6.091e-02, 1.021e-01, 1.900e-01, 6.284e-01, 3.015e-01, 1.076e-01, -4.315e-01, 9.855e-02, 7.527e-02, 1.173e-01, 1.454e-01, -1.936e-02, -7.795e-02, 4.561e-02, -1.009e-01)); + r += mul(s2_1, M4(-1.538e-01, 1.924e-01, -2.160e-01, 1.121e-02, 3.876e-01, 6.074e-01, 1.912e-01, 1.107e+00, -1.019e-01, 2.046e-02, 4.697e-01, 2.091e-02, -1.841e-01, -4.174e-02, 2.999e-01, 8.326e-02)); + r += mul(s2_2, M4(-9.220e-02, 1.825e-01, -1.562e-01, 3.358e-01, 5.226e-02, 6.678e-01, 3.058e-01, 2.938e-01, 1.706e-01, 8.658e-02, -6.464e-02, -3.511e-01, 4.983e-02, 1.655e-01, 6.151e-02, -2.758e-01)); + r += mul(s2_3, M4(-1.193e-01, -8.883e-03, -6.676e-02, -1.863e-01, 5.507e-01, 2.139e-01, -2.313e-01, -3.117e-01, 7.110e-03, -2.253e-02, 2.619e-02, -1.450e-01, 2.895e-02, 1.838e-02, 1.456e-02, -1.848e-02)); + r += mul(s2_4, M4(8.266e-01, 1.957e-01, -5.592e-01, -1.102e+00, -1.108e+00, -2.799e-01, 2.460e+00, -2.017e+00, -1.304e-01, 5.934e-02, 2.235e-01, -1.531e-01, 2.761e-01, -2.611e-02, -2.482e-01, -2.947e-02)); + r += mul(s2_5, M4(1.606e-01, 1.278e-02, -9.921e-02, 9.180e-02, 1.138e-01, -3.889e-02, 1.627e-01, 2.813e+00, 5.255e-02, -2.075e-02, -1.651e-01, -1.358e-01, -1.813e-01, 3.018e-01, 3.389e-04, -3.242e-01)); + r += mul(s2_6, M4(-1.029e-02, 1.966e-01, 9.284e-03, 3.095e-01, -1.677e-01, -5.509e-02, -1.289e+00, -3.562e-02, 2.497e-02, 1.040e-01, -1.576e-01, -1.242e-01, -6.614e-02, -2.606e-02, -2.151e-02, 9.371e-02)); + r += mul(s2_7, M4(2.008e-01, -2.042e-01, 3.626e-03, -8.524e-03, -3.760e-01, 3.521e-01, -4.482e-01, -7.956e-01, -1.076e-01, -2.298e-02, 1.228e-01, 2.675e-01, 8.843e-02, 2.774e-01, -1.114e-01, 6.773e-02)); + r += mul(s2_8, M4(7.028e-03, 2.032e-01, -2.760e-01, -5.645e-02, 7.888e-02, 4.420e-01, 1.608e-01, 7.254e-02, 8.015e-02, -1.828e-01, -1.142e-02, -2.063e-02, -1.091e-01, -3.137e-02, 2.670e-02, -1.370e-03)); + r += mul(s3_0, M4(3.985e-02, -4.648e-04, -6.229e-02, 1.024e-02, 2.510e-02, -1.833e-01, -2.537e-02, -5.906e-02, 1.551e-01, 1.097e+00, -9.003e-01, 9.391e-01, -1.978e-02, -5.407e-02, 2.358e-01, 1.039e-01)); + r += mul(s3_1, M4(-1.454e-02, 6.619e-02, -5.213e-03, -5.554e-02, 4.323e-02, 1.004e-01, 1.153e-01, 1.370e-01, 4.502e-01, 3.804e-01, 2.568e-01, 7.781e-01, -3.095e-02, 3.557e-02, 1.035e-01, -2.194e-02)); + r += mul(s3_2, M4(-3.902e-02, -1.607e-01, 5.826e-02, 1.034e-02, -2.753e-03, 1.175e-01, 3.703e-02, 1.375e-02, 7.297e-02, 2.820e-01, 5.462e-04, -5.188e-02, 2.212e-01, 1.518e-01, -1.325e-01, 9.074e-02)); + r += mul(s3_3, M4(-5.867e-02, -1.032e-01, -2.187e-02, 2.861e-02, 6.211e-02, 8.308e-02, -1.475e-01, 3.146e-02, -1.838e-01, -1.555e-01, -8.405e-01, -3.969e-01, -2.232e-02, -1.254e-01, 1.786e-01, -2.215e-01)); + r += mul(s3_4, M4(1.142e-01, -2.823e-01, -3.135e-01, 2.494e-01, -2.510e-01, 1.359e-01, 2.920e-01, 3.156e-03, -1.326e-01, -4.658e-01, 2.085e-01, -2.149e-01, 3.455e-01, 5.045e-01, -3.612e-01, 2.250e-01)); + r += mul(s3_5, M4(9.107e-03, -2.388e-01, 8.027e-02, -1.974e-01, 2.763e-01, -7.958e-02, 1.082e-01, 1.907e-01, 1.002e-01, -7.503e-02, 1.903e-02, 1.209e-01, -3.394e-01, -5.029e-03, -2.791e-01, -3.699e-02)); + r += mul(s3_6, M4(-1.739e-02, 6.326e-02, 8.739e-02, 6.420e-02, -3.866e-02, -9.341e-02, -2.571e-01, -1.428e-01, -4.662e-02, 8.787e-02, -2.799e-01, -2.134e-01, 1.020e-02, -2.975e-02, 7.063e-02, -1.584e-01)); + r += mul(s3_7, M4(1.056e-01, -1.006e-01, 1.313e-01, 1.137e-02, -9.836e-02, -4.874e-02, -2.130e-01, -1.634e-01, -5.612e-02, -1.760e-01, -1.908e-01, 1.199e-01, -1.486e-02, 1.704e-01, -7.049e-03, -1.845e-01)); + r += mul(s3_8, M4(-1.343e-01, -2.351e-02, -1.138e-01, 1.969e-02, -2.157e-02, -2.178e-01, 6.627e-02, 2.071e-02, -8.215e-02, -2.077e-01, 6.798e-02, 3.783e-02, -4.294e-02, 2.298e-01, 3.238e-01, -3.153e-01)); + r += V4(-2.216e-03, -1.314e-01, -1.773e-02, 2.580e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.193e-01, -1.140e-01, -1.286e-01, 1.857e-01, 1.764e-02, 2.680e-02, -2.096e-02, -2.392e-02, 1.995e-02, -1.275e-01, -1.179e-02, -9.041e-02, 2.080e-02, -2.054e-02, -6.917e-02, -3.731e-02)); + r += mul(s0_1, M4(5.910e-02, -3.300e-02, 2.187e-01, -4.265e-02, -1.050e-02, -3.859e-02, 7.390e-02, 1.629e-02, 4.529e-02, -1.788e-01, 4.091e-01, -6.928e-02, 1.657e-01, -4.254e-03, -3.151e-01, 2.919e-01)); + r += mul(s0_2, M4(-4.341e-02, 2.223e-02, -1.008e-01, 8.143e-02, -9.846e-03, 5.909e-02, -2.690e-02, 3.039e-02, 1.086e-01, -6.404e-02, -6.064e-02, -1.898e-01, 9.368e-02, 3.736e-03, -3.105e-02, 6.931e-02)); + r += mul(s0_3, M4(-3.157e-02, -1.135e-02, -4.165e-01, -1.928e-01, -7.721e-02, 1.880e-01, -1.479e-01, 1.343e-01, 1.394e-01, -1.888e-01, 4.132e-02, 3.282e-02, 1.008e-01, 3.034e-02, -1.672e-01, 2.711e-01)); + r += mul(s0_4, M4(5.348e-02, -7.725e-02, 1.096e-01, 1.428e-01, 2.230e-02, 4.346e-01, -1.861e-01, 2.064e-01, 9.964e-02, -1.543e-01, 2.040e-01, -2.667e-01, 2.274e-01, -4.279e-02, 2.191e-01, 1.740e-01)); + r += mul(s0_5, M4(-4.585e-02, 7.224e-02, 2.702e-02, -5.440e-02, -8.830e-02, 1.219e-01, -5.870e-02, -1.575e-02, -2.895e-01, 1.297e-01, -2.219e-01, -3.090e-01, 1.459e-02, 6.562e-02, 8.912e-02, 3.619e-02)); + r += mul(s0_6, M4(5.821e-02, -1.235e-01, 2.805e-01, 1.661e-02, -1.411e-01, 4.756e-01, 1.758e-02, 2.091e-01, -7.398e-02, -1.141e-02, 9.094e-03, 5.836e-02, -5.062e-02, -1.953e-02, 9.519e-03, 1.514e-01)); + r += mul(s0_7, M4(-2.608e-02, 5.385e-02, 2.445e-01, -2.328e-02, -1.261e-02, 3.237e-01, -1.273e-01, -9.910e-02, 6.702e-03, -9.507e-03, 1.413e-01, -8.367e-02, 6.338e-02, 7.485e-02, 1.797e-01, -7.493e-03)); + r += mul(s0_8, M4(-1.616e-01, 9.600e-02, 5.955e-02, -1.206e-01, -1.295e-02, -7.217e-03, -9.834e-02, 1.012e-02, -9.242e-02, -8.508e-03, -8.789e-02, -1.357e-01, 3.131e-02, 1.343e-02, -8.285e-02, 1.658e-02)); + r += mul(s1_0, M4(-1.426e-02, 1.560e-02, -2.351e-01, 1.411e-02, 6.414e-02, -1.062e-02, -6.187e-02, 3.851e-02, -1.255e-01, -2.124e-01, 5.334e-02, -1.702e-01, -1.592e-02, 8.683e-02, 3.298e-02, -1.396e-01)); + r += mul(s1_1, M4(2.771e-02, 1.236e-01, 3.715e-01, 3.765e-03, 1.343e-01, -8.645e-02, -2.129e-01, 2.529e-01, -3.053e-01, 6.211e-02, 6.285e-01, 6.935e-02, -1.150e-01, -1.477e-01, 1.362e-01, -7.007e-02)); + r += mul(s1_2, M4(-1.013e-01, 2.877e-02, -1.274e-01, 6.261e-02, 4.531e-02, -6.713e-02, -1.651e-02, -9.269e-02, -1.616e-01, -3.828e-03, -3.715e-01, 1.978e-03, 2.994e-02, 5.941e-02, -1.039e-01, -9.121e-02)); + r += mul(s1_3, M4(1.614e-01, 2.806e-01, -5.564e-01, 1.712e-01, 3.865e-02, -9.885e-02, 1.400e-01, -1.006e-01, 3.954e-02, -7.045e-02, 2.290e-01, 5.378e-03, -4.572e-02, 5.276e-02, 3.839e-02, 7.448e-02)); + r += mul(s1_4, M4(-2.672e-02, 2.006e-01, 2.841e-01, -1.999e-02, 1.419e-02, 8.672e-02, 2.153e-01, 2.087e-01, 1.046e-02, 9.963e-01, 6.317e-01, 7.537e-01, -2.202e-01, -7.516e-02, 3.076e-01, -1.556e-01)); + r += mul(s1_5, M4(-3.143e-01, 5.866e-02, -1.605e-01, -1.615e-01, -8.577e-02, 7.590e-03, 2.199e-01, 9.684e-03, -2.125e-02, -2.988e-02, -4.535e-01, 5.538e-02, -2.528e-01, 1.243e-01, 1.700e-01, -1.200e-01)); + r += mul(s1_6, M4(-1.574e-01, 6.763e-02, 4.039e-02, 1.469e-01, 2.257e-03, 1.424e-01, -1.655e-01, 8.329e-02, -7.065e-02, -2.476e-01, -9.983e-02, 1.232e-01, -7.377e-02, 1.521e-02, -3.496e-02, -1.429e-02)); + r += mul(s1_7, M4(-3.125e-04, -2.618e-03, -6.887e-02, -2.244e-01, -4.241e-02, -1.978e-01, 1.768e-01, -1.142e-01, 3.186e-02, -8.708e-02, 1.963e-01, 1.347e-01, -9.015e-02, -1.290e-01, 1.373e-01, -1.238e-01)); + r += mul(s1_8, M4(-2.192e-01, 5.605e-04, -1.505e-01, -2.801e-01, 1.144e-01, -1.020e-01, -5.691e-02, 1.329e-01, -4.557e-02, -5.067e-02, -5.847e-03, 1.539e-02, -2.222e-01, -1.948e-02, -7.628e-02, -1.468e-01)); + r += mul(s2_0, M4(1.248e-02, -6.067e-02, 6.221e-03, -2.587e-01, 1.340e-01, 9.183e-02, 1.780e-01, 1.129e-01, -5.239e-03, 7.200e-03, -8.953e-03, 1.245e-01, -5.878e-02, -5.848e-02, 4.972e-02, -1.553e-02)); + r += mul(s2_1, M4(5.369e-02, -1.389e-01, 2.474e-01, -9.388e-02, 1.382e-01, -2.378e-01, 1.034e+00, -4.127e-01, -5.765e-02, -2.028e-01, -5.532e-02, -6.872e-02, -1.059e-02, 7.206e-03, -2.455e-01, -4.938e-02)); + r += mul(s2_2, M4(1.712e-01, -7.478e-02, 2.833e-01, -1.819e-01, 3.654e-01, -1.324e-01, 2.584e-01, -3.937e-01, 1.282e-01, -7.266e-02, 7.739e-02, 1.304e-01, -4.748e-02, -1.578e-01, 4.012e-02, -3.365e-02)); + r += mul(s2_3, M4(-1.748e-01, -1.185e-01, 3.252e-01, 8.781e-02, 3.503e-01, -6.309e-01, 1.597e+00, 9.112e-02, -3.477e-01, -7.516e-02, -5.034e-02, -2.982e-01, 1.566e-01, -7.671e-03, -5.705e-02, 4.100e-02)); + r += mul(s2_4, M4(-7.281e-01, 7.474e-01, -7.460e-01, -5.525e-01, 6.676e-01, -2.384e+00, -2.085e+00, 2.139e+00, 5.178e-02, -2.687e-01, 4.824e-02, 4.211e-02, -4.863e-03, -2.555e-01, 1.548e-01, -5.389e-02)); + r += mul(s2_5, M4(2.294e-01, 4.563e-02, -1.082e-01, 1.918e-01, -4.791e-01, -3.667e-01, -9.200e-01, -7.338e-02, 2.055e-01, 1.443e-01, -1.102e-01, 3.247e-01, -1.077e-01, 1.510e-01, 9.772e-02, -9.652e-02)); + r += mul(s2_6, M4(-7.180e-02, -6.766e-02, 4.233e-02, 2.346e-01, 2.416e-01, -5.872e-01, -3.803e-01, 1.459e-01, 1.314e-01, 1.762e-01, 2.180e-01, -6.325e-02, -2.639e-02, -6.961e-02, -6.823e-02, 1.208e-01)); + r += mul(s2_7, M4(-1.771e-02, 8.693e-01, 6.895e-02, 1.746e-01, 7.339e-01, -2.318e+00, -9.599e-02, 6.005e-03, -2.361e-01, 4.397e-03, 4.241e-02, -3.560e-01, -2.276e-01, 2.720e-01, -2.148e-03, 1.158e-01)); + r += mul(s2_8, M4(1.415e-02, 4.584e-02, -3.553e-01, 5.112e-02, -6.470e-01, -1.005e+00, -4.282e-02, -9.747e-01, -4.664e-02, 1.182e-01, -1.638e-01, 3.781e-02, 9.961e-02, 6.290e-02, 1.452e-01, -2.967e-02)); + r += mul(s3_0, M4(-1.042e-02, 1.059e-02, 8.109e-02, -1.976e-02, 5.573e-02, 1.743e-01, 2.189e-03, 1.850e-01, -2.536e-01, 2.196e-01, 9.472e-01, -4.919e-01, 1.358e-02, -1.083e-02, -4.265e-01, -5.187e-02)); + r += mul(s3_1, M4(8.612e-02, -1.189e-01, -2.101e-01, 1.060e-01, 1.314e-01, -1.630e-02, -5.186e-02, 4.664e-02, -6.413e-01, -3.610e-01, 4.229e-01, -4.334e-01, 4.259e-02, -1.822e-02, 4.311e-01, -2.530e-01)); + r += mul(s3_2, M4(2.664e-02, 2.255e-02, 3.200e-02, -1.377e-01, -1.708e-01, -8.546e-02, 8.941e-02, -8.314e-02, -2.572e-01, -4.522e-03, 2.598e-02, -3.987e-02, -7.309e-02, -7.170e-02, 3.738e-01, -1.056e-01)); + r += mul(s3_3, M4(1.327e-03, -1.352e-01, -1.486e-01, -4.253e-03, 9.866e-02, 2.134e-01, 2.813e-01, 1.472e-01, 3.699e-01, -6.387e-01, 4.480e-01, -5.420e-01, 6.457e-02, -1.278e-01, 1.867e-01, -2.355e-01)); + r += mul(s3_4, M4(-1.330e-01, -1.691e-01, 3.231e-01, -2.959e-01, 2.198e-01, -1.739e-02, -6.569e-02, 3.037e-01, -2.062e-01, 9.855e-02, -5.762e-01, -1.501e-02, 4.521e-02, 2.517e-01, -3.916e-01, -3.841e-01)); + r += mul(s3_5, M4(2.251e-01, 2.664e-02, 3.331e-02, 2.836e-01, -2.527e-01, 4.076e-02, -1.331e-02, -3.068e-03, -4.003e-04, -3.312e-02, -4.418e-02, -7.245e-02, 1.807e-01, -5.017e-01, 6.191e-02, -6.283e-01)); + r += mul(s3_6, M4(-5.410e-02, 7.319e-02, -4.022e-02, -5.598e-02, 1.675e-01, -1.364e-02, 3.302e-02, 2.473e-03, 1.630e-01, -1.955e-02, 2.311e-02, -1.402e-01, -1.078e-02, -1.675e-01, 1.211e-01, -3.069e-03)); + r += mul(s3_7, M4(-3.596e-02, -1.378e-02, 1.912e-01, 1.530e-02, 1.993e-01, -2.688e-01, -6.555e-02, -2.267e-01, -9.549e-02, -1.909e-01, -1.314e-01, -1.145e-01, -3.460e-01, -2.630e-02, 3.692e-01, -1.209e-01)); + r += mul(s3_8, M4(2.329e-01, 4.865e-02, 1.411e-02, 1.037e-01, -3.701e-01, -9.056e-02, -1.421e-01, -3.640e-01, -2.971e-02, -8.960e-02, 4.366e-03, -1.232e-01, -5.654e-02, -6.378e-02, 2.785e-01, -1.645e-01)); + r += V4(5.906e-02, -2.826e-02, 3.865e-02, 1.167e-02); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.057e-01, 8.402e-02, -7.690e-02, -5.544e-02, -1.356e-02, -1.211e-01, -3.841e-02, 7.556e-02, -1.057e-01, -6.017e-02, -5.377e-03, -3.373e-02, 2.015e-02, -3.443e-01, 4.561e-03, -1.471e-01)); + r += mul(s0_1, M4(-9.330e-02, -3.572e-02, 1.283e-01, 1.837e-02, -1.698e-02, -7.926e-02, -3.830e-02, -6.985e-02, 1.791e-01, -1.764e-02, -2.987e-01, -2.914e-02, -1.109e-01, -2.236e-01, 1.116e-02, 1.155e-03)); + r += mul(s0_2, M4(-1.422e-02, -3.524e-02, 6.026e-02, 2.085e-03, 3.122e-02, -8.025e-02, -2.071e-01, 6.779e-03, 3.087e-02, -1.078e-01, -1.336e-01, 3.374e-02, 4.059e-02, -1.509e-01, -2.500e-02, 1.575e-02)); + r += mul(s0_3, M4(2.079e-01, 1.217e-01, -9.225e-02, -1.526e-01, 2.831e-01, -2.399e-01, -1.879e-01, 3.896e-02, -2.888e-02, 1.898e-01, 7.200e-03, 1.166e-01, 4.709e-01, -2.553e-01, -8.659e-02, -1.966e-01)); + r += mul(s0_4, M4(-2.466e-01, 1.668e-01, -3.000e-01, 1.331e-01, 1.691e-01, -8.165e-02, 1.461e-01, -2.816e-01, 4.627e-01, -5.020e-02, 4.608e-02, -4.259e-01, 1.733e-01, -2.493e-01, -2.859e-01, 1.255e-01)); + r += mul(s0_5, M4(-1.061e-01, -1.312e-02, 4.457e-03, 6.950e-02, -1.528e-02, -8.325e-03, 9.288e-03, 4.323e-04, -1.349e-01, -4.300e-02, -1.214e-01, -6.221e-02, 1.326e-01, 1.729e-02, -9.435e-03, -8.256e-02)); + r += mul(s0_6, M4(-2.764e-01, 2.277e-01, -6.479e-02, 9.793e-02, -1.252e-01, -2.225e-01, -3.927e-02, 9.069e-03, -3.082e-02, -9.839e-02, -1.432e-02, 4.213e-03, 1.038e-01, -1.336e-02, -8.449e-02, 9.390e-03)); + r += mul(s0_7, M4(1.321e-01, -1.783e-01, 3.953e-02, -4.840e-03, -8.847e-02, 3.953e-02, -2.180e-03, 3.542e-02, -1.772e-01, -9.279e-02, -1.972e-02, -1.933e-02, 9.595e-02, -7.297e-02, -2.008e-02, 2.667e-02)); + r += mul(s0_8, M4(-1.079e-02, -5.796e-02, 8.349e-02, 9.407e-03, -6.521e-02, 2.585e-02, 3.029e-02, -2.179e-02, 1.644e-02, 2.894e-02, 9.475e-02, 2.535e-02, -1.062e-01, 1.059e-01, 5.134e-02, -4.920e-02)); + r += mul(s1_0, M4(2.292e-02, -1.255e-01, 6.694e-03, -1.429e-02, 2.778e-02, 1.610e-02, -5.433e-02, -1.691e-02, -8.172e-02, 1.096e-01, -7.444e-02, -1.744e-02, -1.607e-02, -1.718e-02, 2.261e-02, -7.930e-02)); + r += mul(s1_1, M4(-1.606e-01, 2.755e-01, 2.518e-01, -8.121e-02, 8.844e-02, -9.943e-02, -1.479e-01, 2.317e-03, -1.857e-02, 3.330e-01, -2.844e-01, 5.172e-03, 4.147e-02, 8.858e-05, -3.684e-02, 2.822e-02)); + r += mul(s1_2, M4(1.972e-02, 1.372e-01, 8.050e-02, -3.001e-02, -2.199e-02, -1.105e-03, -6.767e-02, -1.520e-02, 2.293e-02, 4.982e-04, 1.077e-01, -4.043e-02, -3.146e-04, 1.341e-04, -2.927e-02, 1.540e-03)); + r += mul(s1_3, M4(6.177e-01, -2.128e-01, -1.951e-01, -2.603e-01, -2.382e-01, 4.451e-02, -6.117e-02, 2.986e-01, -1.350e-01, -3.939e-02, -2.575e-02, 6.810e-02, -5.279e-02, -4.608e-02, -8.021e-02, -7.411e-02)); + r += mul(s1_4, M4(-1.743e-01, 3.337e-01, -2.366e-01, 1.677e-01, 1.648e-01, -1.324e-01, 4.842e-02, -1.070e-01, 1.649e-01, 5.767e-01, 8.170e-02, -1.744e-01, 1.955e-01, 1.592e-01, -2.500e-01, -7.249e-02)); + r += mul(s1_5, M4(-1.271e-01, -1.008e-01, 7.718e-04, 6.185e-03, 2.051e-02, -1.960e-02, 5.900e-02, -7.462e-03, -9.429e-02, -6.792e-02, -1.796e-01, 7.379e-02, -3.235e-02, 4.667e-03, -8.281e-02, -1.639e-02)); + r += mul(s1_6, M4(2.655e-01, 4.646e-02, -7.062e-02, 1.656e-01, 1.325e-01, -1.897e-01, 5.944e-04, 4.675e-02, -5.384e-02, -9.959e-02, -6.261e-05, 2.482e-03, -1.603e-01, 1.532e-03, -6.567e-02, 5.924e-02)); + r += mul(s1_7, M4(3.912e-01, -2.348e-01, -1.125e-02, 6.278e-02, -8.276e-02, 1.717e-01, -2.386e-02, 5.405e-02, 1.651e-01, -7.568e-02, -2.812e-02, 1.120e-02, 7.600e-02, -3.569e-02, 8.237e-02, 3.588e-02)); + r += mul(s1_8, M4(-1.545e-01, -2.594e-01, 5.109e-02, -3.375e-02, -6.946e-02, 5.645e-04, -3.457e-02, -1.802e-02, -8.377e-02, 3.700e-02, -5.377e-02, 2.370e-03, -5.512e-02, 3.772e-02, -6.696e-03, -2.474e-02)); + r += mul(s2_0, M4(9.595e-02, -1.386e-01, 6.095e-02, 1.382e-01, -6.666e-02, 3.536e-02, 3.510e-02, 9.837e-02, -7.013e-02, 1.777e-02, -6.525e-02, 4.813e-03, 4.890e-02, 1.697e-01, 4.055e-02, 3.352e-03)); + r += mul(s2_1, M4(-9.534e-02, -1.931e-01, 7.936e-02, -5.563e-03, 1.180e-01, -1.126e-01, -4.561e-01, 1.382e-02, -5.442e-02, 1.826e-02, -1.906e-01, 2.481e-02, 1.479e-01, 6.991e-02, -4.527e-01, 4.636e-02)); + r += mul(s2_2, M4(2.077e-02, -1.935e-02, 2.863e-02, -1.457e-03, 8.646e-02, -3.495e-02, -1.110e-01, -5.121e-02, 1.296e-02, 1.610e-01, 1.111e-02, -5.463e-03, -6.310e-02, 9.282e-02, -1.124e-02, -5.502e-03)); + r += mul(s2_3, M4(-2.461e-01, 2.046e-01, 7.247e-02, 4.344e-01, -2.654e-03, -1.548e-01, 5.001e-02, -2.193e-02, -2.841e-02, 4.966e-02, -5.860e-02, -1.607e-02, -3.365e-01, 3.779e-01, -7.002e-02, 2.456e-01)); + r += mul(s2_4, M4(-3.244e-02, 4.290e-01, 8.561e-02, -8.759e-02, 5.843e-01, 2.402e-01, -7.254e-02, 1.672e-01, 9.564e-03, 6.555e-01, 1.018e-01, 2.472e-01, 3.227e-01, -2.279e-01, 2.948e-01, 1.007e-01)); + r += mul(s2_5, M4(7.899e-02, -3.829e-02, -6.897e-02, -6.274e-02, -1.648e-01, 1.819e-01, -3.680e-02, -2.700e-02, -1.223e-01, 3.068e-02, -9.879e-02, 9.617e-02, -1.048e-01, 8.574e-03, 1.253e-01, 1.451e-02)); + r += mul(s2_6, M4(6.709e-02, -2.087e-03, -3.317e-02, 9.660e-02, -4.432e-02, 1.087e-01, -3.855e-03, 3.000e-02, -1.565e-01, 1.285e-02, -1.122e-02, -2.268e-02, 2.213e-01, -5.280e-03, -1.048e-02, 1.095e-02)); + r += mul(s2_7, M4(3.409e-02, 1.319e-02, -3.860e-02, 1.186e-01, -1.729e-01, 6.734e-02, -4.122e-02, -1.425e-02, -1.333e-01, 3.082e-01, -5.667e-02, 1.330e-02, -7.902e-02, -2.279e-01, -2.277e-02, -7.345e-02)); + r += mul(s2_8, M4(-8.456e-02, 7.960e-03, -3.009e-02, 3.726e-02, -1.028e-01, 9.260e-02, -9.118e-03, -1.295e-02, 8.764e-02, 3.246e-03, -3.662e-02, 3.309e-02, -6.546e-02, -1.168e-01, 1.179e-02, -1.356e-02)); + r += mul(s3_0, M4(2.830e-01, -1.486e-01, -6.659e-02, 7.406e-02, 3.936e-02, 1.384e-01, -8.613e-02, 5.222e-02, -2.008e-02, -3.306e-02, 3.603e-03, 8.508e-03, 6.657e-02, 8.277e-03, -1.684e-02, -3.086e-02)); + r += mul(s3_1, M4(2.673e-02, -1.550e-01, -1.939e-01, 1.124e-01, 1.494e-01, 3.597e-01, -2.434e-01, -3.767e-02, 6.287e-02, -1.489e-02, -2.108e-01, 9.890e-02, -6.012e-02, -1.588e-01, 5.574e-02, 6.678e-02)); + r += mul(s3_2, M4(7.056e-02, -9.550e-02, 1.579e-02, 6.754e-02, 6.387e-02, 9.595e-02, -1.968e-01, -3.396e-02, -6.625e-03, 1.153e-01, -2.162e-02, -1.236e-02, -2.182e-02, 9.085e-02, -7.137e-02, 3.357e-02)); + r += mul(s3_3, M4(8.677e-02, -7.446e-02, 8.953e-02, 2.128e-01, 3.175e-01, 1.377e-02, 9.215e-02, 8.452e-02, -2.583e-01, 5.582e-03, 4.653e-02, 1.038e-01, -5.802e-01, -1.506e-01, -5.875e-02, -4.308e-02)); + r += mul(s3_4, M4(-5.314e-02, 5.278e-01, 5.725e-02, -3.945e-02, 3.018e-01, -3.613e-02, 1.135e-01, 6.378e-02, 1.129e-01, 3.251e-01, -2.298e-01, 2.272e-01, -2.859e-02, -1.997e-01, -1.289e-02, -9.605e-02)); + r += mul(s3_5, M4(-8.572e-03, 1.098e-01, 6.951e-02, -1.006e-01, 8.029e-02, -1.004e-02, 3.641e-03, 3.452e-02, -2.027e-01, -1.419e-01, 9.411e-02, 4.631e-03, -2.382e-02, -1.774e-01, 9.282e-02, 7.921e-02)); + r += mul(s3_6, M4(-5.645e-02, 5.717e-02, -8.271e-02, 1.460e-01, 1.135e-01, -2.347e-02, 3.011e-02, 6.505e-02, -1.186e-01, 6.520e-02, 2.698e-03, -4.274e-02, 3.661e-03, 1.253e-01, 1.867e-02, 3.871e-03)); + r += mul(s3_7, M4(9.708e-02, -1.020e-01, -1.182e-01, 1.458e-01, -1.234e-01, -1.871e-01, -2.362e-02, -5.212e-02, 2.773e-01, -6.513e-02, -4.346e-03, -4.354e-02, 3.573e-01, -2.655e-02, 4.325e-02, -2.378e-02)); + r += mul(s3_8, M4(-8.228e-02, -2.852e-02, -2.048e-02, 3.619e-02, 9.848e-03, 4.128e-02, -4.446e-03, -9.824e-03, -1.064e-01, -5.623e-02, 4.160e-03, -3.699e-02, -6.863e-03, -2.035e-02, 1.601e-03, -4.166e-02)); + r += V4(-9.721e-03, -6.824e-03, 1.589e-03, -2.346e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.150e-02, 3.627e-02, 1.098e-01, -2.531e-02, 7.604e-03, 6.546e-02, -3.447e-01, 5.052e-03, -5.728e-02, 1.302e-02, 1.200e-01, -3.060e-02, 3.626e-02, 8.441e-03, -1.570e-01, 1.402e-02)); + r += mul(s0_1, M4(-3.360e-02, -1.269e-01, 1.234e-02, 9.393e-02, -1.762e-01, 2.675e-01, -1.063e-01, -8.648e-02, -1.491e-01, 1.604e-01, -1.693e-01, 5.005e-02, 1.166e-01, -1.870e-02, 8.843e-02, 1.417e-02)); + r += mul(s0_2, M4(6.443e-02, 2.579e-02, 5.084e-02, -2.141e-02, -4.518e-02, -6.926e-02, -7.947e-02, -5.832e-03, 7.140e-02, -1.427e-01, 1.275e-01, 7.012e-02, -1.982e-03, -6.910e-02, -2.167e-01, 1.330e-02)); + r += mul(s0_3, M4(-2.132e-02, 2.807e-02, 1.044e-01, 1.442e-02, 3.026e-01, 1.629e-01, 1.141e-01, 1.649e-01, 6.632e-02, 1.488e-02, -1.725e-01, 3.376e-02, -4.740e-01, 5.156e-03, -2.510e-01, 6.820e-02)); + r += mul(s0_4, M4(-7.393e-02, 1.460e-01, -2.561e-02, 2.588e-01, 2.248e-01, 1.511e-01, 2.192e-01, -1.222e-01, 4.382e-01, 3.200e-02, 3.125e-02, -3.612e-01, -1.890e-01, -2.971e-01, -1.180e-01, -1.741e-01)); + r += mul(s0_5, M4(-8.022e-03, 3.348e-03, -9.508e-02, -5.842e-03, 2.266e-02, -3.213e-02, -1.024e-01, -8.443e-02, 2.648e-03, -1.541e-01, 8.091e-02, -4.184e-02, -2.928e-02, 9.030e-02, -1.811e-01, -1.895e-02)); + r += mul(s0_6, M4(2.134e-01, 1.204e-02, 2.013e-02, -8.178e-02, 1.413e-02, 5.513e-02, 1.111e-01, 2.577e-01, 1.495e-03, -5.161e-02, 1.933e-02, 7.202e-02, -1.053e-01, -6.070e-03, -1.038e-01, -1.344e-01)); + r += mul(s0_7, M4(3.203e-01, -1.824e-01, -4.901e-02, -2.154e-01, 3.227e-02, 2.323e-02, 1.477e-02, 1.885e-02, 1.952e-01, -3.905e-02, 3.536e-02, 1.957e-01, 2.784e-01, -1.506e-01, -1.907e-01, 5.836e-02)); + r += mul(s0_8, M4(-7.886e-02, -1.750e-01, 4.233e-02, -2.816e-01, -1.097e-02, 5.733e-03, -2.752e-02, 3.943e-03, -6.800e-02, -1.612e-02, -5.243e-02, -1.074e-02, -8.493e-02, 9.050e-02, -7.421e-02, -1.520e-01)); + r += mul(s1_0, M4(1.024e-01, -3.071e-02, -7.767e-02, -1.160e-02, -1.668e-01, -7.681e-03, 7.039e-02, 4.690e-02, -4.672e-02, 3.627e-03, 2.337e-02, -1.154e-02, 2.441e-03, 1.719e-02, 3.137e-02, 2.789e-02)); + r += mul(s1_1, M4(1.128e-01, 4.853e-02, 3.627e-01, 5.215e-03, -4.474e-02, -3.383e-02, 5.492e-03, 1.021e-01, -3.035e-02, 7.184e-03, 9.787e-02, 6.902e-02, 2.559e-02, 6.446e-03, -1.559e-01, 1.046e-01)); + r += mul(s1_2, M4(5.861e-02, 8.621e-02, -4.399e-03, 1.267e-02, 1.553e-02, -1.611e-02, 5.352e-02, 2.444e-03, -3.847e-02, 1.450e-01, 2.706e-01, -1.122e-01, -2.804e-03, 1.538e-02, 6.326e-02, -8.110e-02)); + r += mul(s1_3, M4(-4.627e-01, -2.751e-02, 1.883e-02, 6.767e-02, 1.365e-03, -7.980e-02, -1.068e-01, -8.643e-02, -1.110e-01, -5.781e-02, 2.143e-01, 6.533e-02, 9.844e-02, -1.375e-01, 1.719e-01, 1.652e-02)); + r += mul(s1_4, M4(-3.445e-01, 1.223e-01, -5.479e-01, 1.608e-01, 2.282e-01, 4.579e-02, 2.864e-02, -2.323e-01, 3.533e-01, -1.573e-01, 5.705e-02, -1.202e-01, -3.604e-02, 7.107e-02, -7.818e-02, -1.104e-01)); + r += mul(s1_5, M4(9.595e-02, 2.793e-01, -1.118e-01, -7.789e-02, -5.870e-02, -9.267e-02, 1.003e-01, -7.460e-02, 3.747e-02, -4.606e-01, 1.714e-01, -7.485e-04, -1.787e-02, -2.575e-02, 8.273e-02, -3.576e-02)); + r += mul(s1_6, M4(8.346e-03, -1.200e-01, 1.913e-03, -5.262e-02, -3.471e-02, -3.522e-02, 4.409e-02, 7.695e-02, 4.910e-02, -5.825e-02, 1.350e-01, 3.096e-04, 3.797e-03, -9.389e-02, 5.738e-02, -1.094e-02)); + r += mul(s1_7, M4(3.179e-01, -2.333e-01, 7.353e-02, -4.401e-01, -1.195e-01, 6.711e-02, -2.217e-02, -1.906e-01, 1.201e-01, -8.349e-02, 4.145e-02, -2.381e-02, 6.768e-02, 1.101e-02, 2.261e-02, -2.036e-01)); + r += mul(s1_8, M4(-4.521e-02, -3.368e-01, 2.684e-01, -1.975e-01, -1.879e-02, 1.738e-02, 4.605e-02, 1.067e-01, -6.758e-02, -8.444e-02, 1.229e-01, -6.125e-02, -1.200e-02, -6.803e-03, -6.103e-03, 5.747e-02)); + r += mul(s2_0, M4(-1.677e-02, -7.385e-03, 7.314e-03, 6.695e-02, 5.321e-03, -7.504e-03, 1.785e-01, 8.061e-02, 6.187e-02, 2.549e-02, -4.809e-02, -3.195e-02, -1.591e-01, 8.312e-02, -6.793e-02, 3.581e-02)); + r += mul(s2_1, M4(3.261e-02, -7.202e-02, -1.190e-01, 1.516e-01, -5.670e-03, 1.967e-01, -4.255e-01, 4.834e-02, 4.823e-02, 4.959e-02, 2.604e-01, -9.989e-02, -4.437e-02, -6.331e-02, 5.512e-02, -4.163e-02)); + r += mul(s2_2, M4(4.071e-02, 7.433e-02, 7.196e-02, -3.118e-02, -4.637e-02, -3.775e-02, -2.451e-02, -1.277e-02, 4.582e-02, -4.385e-02, 3.060e-01, -1.481e-02, 3.523e-03, 1.282e-02, -1.091e-01, 6.814e-02)); + r += mul(s2_3, M4(2.584e-01, -1.657e-02, -1.307e-01, -1.203e-01, -5.162e-02, 6.113e-04, 6.253e-02, 1.557e-01, -1.242e-01, -5.452e-02, 3.666e-01, -9.119e-02, 5.750e-01, -4.420e-02, -2.222e-01, 3.541e-02)); + r += mul(s2_4, M4(-3.262e-02, 1.971e-01, -7.493e-02, -2.959e-01, 1.890e-02, -4.554e-02, -1.241e-01, -2.084e-01, -3.179e-01, -6.226e-02, -3.992e-01, -4.678e-02, 9.481e-02, -2.873e-01, -9.361e-02, -2.487e-01)); + r += mul(s2_5, M4(-9.744e-02, -3.792e-02, -1.262e-01, -1.641e-02, -9.649e-02, 2.342e-01, 7.814e-02, -1.527e-01, -2.606e-01, -1.009e-01, 1.738e-02, -1.352e-01, 4.687e-03, -4.251e-03, 8.552e-02, -1.215e-01)); + r += mul(s2_6, M4(4.338e-02, -6.728e-02, -2.316e-02, 1.704e-01, -4.922e-02, -2.497e-02, -3.437e-02, -2.233e-02, 8.673e-02, -8.427e-02, 3.012e-01, 1.868e-02, -5.820e-02, 2.842e-02, 1.574e-01, -5.813e-02)); + r += mul(s2_7, M4(7.404e-02, -4.425e-02, 8.098e-02, -7.744e-02, -2.555e-02, 7.253e-02, 8.971e-02, -4.607e-02, -1.052e-01, 7.666e-02, 3.011e-02, -3.467e-01, 5.327e-02, -4.348e-02, 2.473e-01, 1.847e-01)); + r += mul(s2_8, M4(-1.205e-02, -1.017e-01, 1.288e-02, -6.228e-02, -7.597e-03, 9.311e-03, 2.267e-02, 5.187e-04, 2.936e-02, -1.846e-01, 2.377e-02, 1.072e-01, 7.069e-02, -6.199e-02, 1.657e-01, 1.708e-01)); + r += mul(s3_0, M4(-2.337e-01, 2.411e-01, -5.654e-02, -6.128e-02, 1.441e-01, -7.801e-02, 8.768e-02, -1.401e-02, -7.979e-02, 2.938e-02, 3.423e-02, 2.258e-02, 3.470e-02, 1.049e-01, -9.900e-02, -3.849e-02)); + r += mul(s3_1, M4(1.636e-01, -5.372e-01, 7.445e-02, 4.422e-01, 2.717e-02, 4.842e-01, -3.236e-01, -1.247e-01, 9.917e-03, 2.162e-02, -1.777e-01, 3.460e-02, -1.702e-01, -4.158e-02, -6.269e-02, 3.410e-03)); + r += mul(s3_2, M4(-2.422e-03, -5.821e-02, -8.683e-02, -1.286e-02, 7.737e-02, 1.772e-01, 1.056e-01, -5.362e-02, 3.000e-02, 3.260e-02, -2.047e-02, -2.903e-02, 4.510e-02, 3.030e-02, 3.413e-02, 1.265e-01)); + r += mul(s3_3, M4(-3.559e-01, -4.824e-03, 2.601e-01, 5.725e-02, 1.480e-01, 9.340e-02, -6.162e-03, -1.121e-01, 3.200e-02, -2.580e-02, -1.105e-01, 3.647e-02, 1.558e-01, 3.367e-02, -3.009e-01, 2.425e-01)); + r += mul(s3_4, M4(-3.534e-01, 1.046e-01, -1.263e-02, -2.606e-01, 1.026e-01, 2.504e-01, -2.282e-01, -1.836e-01, 7.199e-02, -3.665e-02, -2.959e-01, -2.501e-01, 2.980e-02, -1.711e-01, 2.363e-01, -1.904e-01)); + r += mul(s3_5, M4(-1.381e-01, 9.220e-02, 1.082e-01, -1.717e-01, 5.453e-02, 1.216e-01, 6.893e-03, 4.137e-02, -3.005e-02, -2.100e-03, -5.898e-02, -3.922e-02, 1.715e-01, -1.886e-01, -2.510e-01, 2.493e-01)); + r += mul(s3_6, M4(5.587e-02, -5.724e-02, 4.462e-02, 1.133e-01, 2.672e-02, 1.392e-02, -1.762e-02, 8.027e-02, 1.795e-02, 4.128e-02, -1.986e-02, 1.847e-02, -1.562e-01, 6.650e-02, -1.597e-01, -8.866e-02)); + r += mul(s3_7, M4(5.177e-02, -1.843e-01, 6.256e-03, 3.103e-02, -8.057e-03, 1.538e-01, 8.820e-02, 9.421e-02, -1.280e-01, 3.670e-02, 2.720e-03, -7.796e-02, -5.717e-02, -1.237e-01, -1.806e-01, 9.065e-02)); + r += mul(s3_8, M4(-4.682e-02, -1.655e-01, 3.621e-02, 1.126e-02, 1.055e-01, 4.565e-02, -4.206e-02, 1.496e-01, 6.181e-02, 5.189e-02, 6.303e-02, -1.095e-01, -1.832e-02, 1.099e-01, 6.317e-03, -2.335e-01)); + r += V4(-2.548e-03, 2.631e-03, -1.796e-03, 6.472e-05); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 5 +//!DESC conv4 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(4.108e-02, 4.397e-02, 8.099e-02, 6.707e-02, -7.574e-03, -3.379e-02, -1.189e-01, 2.469e-02, 5.677e-02, 1.936e-01, -2.147e-01, 2.745e-02, -2.339e-02, 1.413e-01, 9.424e-03, -1.532e-02)); + r += mul(s0_1, M4(6.845e-02, 2.566e-01, -3.429e-01, 7.152e-02, 7.412e-03, -1.234e-01, 1.319e-01, -1.394e-01, -1.452e-01, 4.029e-02, -8.428e-02, -9.997e-02, 3.241e-01, 4.534e-01, 1.353e-01, -6.836e-02)); + r += mul(s0_2, M4(8.957e-02, 2.015e-01, -9.037e-02, 4.122e-02, -9.043e-02, -2.070e-01, 1.988e-01, -7.000e-02, -1.122e-02, 3.547e-02, 2.630e-03, -6.662e-03, 1.449e-01, 1.636e-01, -4.471e-02, 7.979e-02)); + r += mul(s0_3, M4(7.138e-02, 3.893e-02, 7.791e-02, 4.093e-02, -2.462e-01, 3.752e-02, 7.621e-03, -4.290e-02, -1.033e-01, 1.806e-01, -8.490e-03, -1.038e-01, 4.361e-02, 7.536e-02, 7.619e-02, -5.600e-02)); + r += mul(s0_4, M4(-2.419e-02, 1.601e-01, 1.182e-01, -6.286e-02, 4.891e-02, 4.624e-02, -9.374e-02, 1.644e-01, -1.319e-01, 3.175e-01, -5.865e-02, -8.696e-02, 5.228e-01, 3.163e-01, -2.053e-01, 1.315e-01)); + r += mul(s0_5, M4(2.643e-02, -1.714e-01, -2.308e-02, 2.050e-02, -1.852e-01, -5.479e-02, 6.061e-02, -1.538e-01, -7.382e-02, -4.387e-04, -5.849e-02, -5.184e-02, 1.197e-01, 5.114e-02, -3.098e-02, 1.580e-02)); + r += mul(s0_6, M4(3.068e-02, 4.721e-04, 3.968e-02, 3.974e-02, 1.701e-02, -2.658e-02, 4.949e-02, -7.216e-02, -1.259e-01, -6.904e-03, 7.295e-02, 1.948e-02, -3.177e-02, -3.845e-03, 2.910e-02, -6.175e-03)); + r += mul(s0_7, M4(3.500e-02, -6.070e-02, -3.675e-02, 5.680e-02, -8.178e-02, -1.257e-01, -1.144e-03, 1.419e-01, 1.017e-01, 6.111e-02, -9.663e-02, 3.145e-01, 1.886e-01, 1.274e-01, -6.735e-02, 1.646e-02)); + r += mul(s0_8, M4(1.100e-02, -1.008e-01, -2.527e-02, 9.051e-02, -2.122e-02, -3.495e-02, 2.186e-02, -3.673e-02, -5.042e-02, -4.490e-02, -6.696e-02, -1.370e-01, 8.272e-02, -1.107e-01, 3.828e-02, 2.405e-02)); + r += mul(s1_0, M4(4.308e-02, -1.587e-02, 5.034e-02, -1.256e-03, -3.630e-03, 9.342e-02, -1.246e-01, 1.407e-03, 2.283e-01, 7.103e-02, 1.496e-01, 1.529e-03, 7.748e-02, -4.744e-02, 1.267e-01, 8.372e-05)); + r += mul(s1_1, M4(-2.667e-01, 4.345e-03, -6.051e-02, 7.748e-04, 5.212e-02, 7.912e-02, -7.013e-02, -4.177e-02, -5.925e-03, 7.525e-03, 1.316e-01, 1.347e-02, -1.046e-01, -3.121e-01, 4.098e-01, -3.102e-02)); + r += mul(s1_2, M4(-7.060e-02, 2.314e-01, -9.224e-02, 2.077e-02, -4.781e-03, -7.540e-02, 7.904e-02, -2.268e-02, 4.577e-02, 1.067e-01, -1.948e-03, 4.846e-02, -2.153e-01, -1.920e-01, 7.195e-02, 5.041e-02)); + r += mul(s1_3, M4(1.662e-02, 3.623e-03, 9.545e-03, 5.119e-04, -1.384e-01, 9.941e-03, 4.085e-02, 1.023e-02, 8.623e-02, -1.836e-02, 1.508e-01, 2.565e-02, 1.250e-01, -6.873e-02, 1.039e-02, 3.096e-02)); + r += mul(s1_4, M4(-3.783e-01, 5.804e-02, 5.745e-02, -1.070e-01, 4.047e-01, 4.005e-01, -2.802e-01, 1.174e-01, -3.899e-01, -2.940e-01, 6.426e-01, -1.456e-01, -2.346e-01, -2.872e-01, 3.019e-02, 2.075e-01)); + r += mul(s1_5, M4(-1.279e-01, -5.204e-02, 1.008e-01, -7.402e-02, -1.272e-01, -2.809e-02, 4.025e-02, -4.037e-02, 1.705e-01, 1.347e-01, 4.935e-02, -5.119e-03, -3.623e-01, -2.111e-01, 2.074e-01, -4.637e-01)); + r += mul(s1_6, M4(3.894e-02, 1.740e-02, 1.167e-02, -8.472e-02, 2.890e-02, -2.453e-02, 4.209e-02, 1.043e-01, 8.521e-02, -9.523e-02, 6.200e-02, 1.241e-02, 3.387e-03, -2.783e-02, -2.927e-02, -2.315e-02)); + r += mul(s1_7, M4(-1.122e-01, -5.652e-02, -4.186e-02, -4.056e-02, 1.860e-02, 8.204e-03, -1.358e-02, 2.056e-01, -6.379e-02, -1.215e-02, 1.135e-01, -2.047e-01, -1.013e-02, 1.194e-02, -1.397e-01, -1.773e-01)); + r += mul(s1_8, M4(-2.283e-03, -1.172e-02, 2.030e-02, -3.740e-03, 6.208e-02, -1.690e-02, 7.899e-03, 1.212e-01, 1.094e-01, -4.111e-02, 4.576e-02, 7.825e-02, -6.665e-02, -9.950e-02, 6.104e-03, -1.272e-01)); + r += mul(s2_0, M4(5.802e-02, -8.129e-02, 7.649e-02, 8.207e-02, -7.115e-02, -9.510e-02, 2.679e-01, 5.415e-02, 3.331e-02, -5.390e-02, 2.535e-01, -6.838e-02, 1.007e-01, -8.470e-02, 1.052e-01, -4.556e-03)); + r += mul(s2_1, M4(-7.709e-02, -2.091e-01, 7.253e-02, 3.751e-02, -3.012e-02, 2.705e-01, 9.391e-02, 5.310e-02, -3.819e-02, -1.502e-01, 3.674e-01, 6.050e-02, 8.643e-02, -3.193e-02, 3.706e-01, -1.272e-01)); + r += mul(s2_2, M4(-8.654e-02, -1.264e-01, 9.612e-02, -5.250e-02, 1.367e-02, 2.760e-02, 3.644e-02, 5.628e-02, -2.305e-02, -3.072e-02, 7.435e-02, 1.310e-01, 1.349e-03, -8.390e-02, 8.076e-02, 9.622e-02)); + r += mul(s2_3, M4(2.535e-02, 3.247e-02, -3.928e-02, 1.713e-02, -1.630e-01, -4.284e-02, -8.256e-02, 2.143e-01, 1.523e-01, -1.190e-01, 1.579e-01, 1.972e-02, -1.024e-01, -3.809e-02, -1.194e-01, 1.843e-02)); + r += mul(s2_4, M4(-2.534e-01, 4.147e-03, 3.652e-02, -2.514e-01, -4.926e-02, 1.540e-01, -6.906e-02, -2.077e-01, -4.119e-02, 6.231e-01, -2.267e-01, -1.284e-01, -1.211e-01, 1.260e-01, -3.930e-02, 1.480e-01)); + r += mul(s2_5, M4(-1.185e-01, -1.450e-01, 5.094e-02, -1.236e-01, 5.897e-02, 3.769e-02, 4.215e-02, 4.909e-02, -1.146e-01, 1.742e-02, 2.068e-01, 1.393e-01, 7.795e-02, 1.028e-01, 4.501e-02, 1.306e-01)); + r += mul(s2_6, M4(7.121e-03, 1.788e-02, 8.609e-03, 4.098e-02, 9.014e-02, 4.066e-02, -4.107e-02, -5.454e-02, 8.660e-02, 3.154e-02, 1.323e-01, 2.997e-02, 1.011e-02, -7.761e-02, 8.035e-02, 1.981e-02)); + r += mul(s2_7, M4(-2.280e-02, -5.927e-02, -4.650e-02, -2.669e-02, 1.326e-02, -6.787e-02, -9.816e-02, -2.418e-01, -2.725e-02, -2.671e-02, 6.625e-02, 1.176e-01, -1.511e-01, -8.170e-02, 8.073e-02, -4.317e-02)); + r += mul(s2_8, M4(2.503e-02, -5.313e-02, -5.163e-02, -1.025e-01, 3.311e-03, 5.522e-02, -2.906e-02, 6.220e-02, -1.606e-01, -9.050e-02, 1.435e-01, -7.260e-02, -1.731e-03, -3.100e-02, 6.860e-02, 5.068e-02)); + r += mul(s3_0, M4(2.504e-01, 3.584e-02, 4.543e-02, 5.108e-03, 1.660e-01, 3.755e-02, 2.497e-02, 3.799e-02, 1.946e-01, 6.233e-02, 1.666e-02, 2.337e-02, 4.533e-01, -1.371e-01, -3.372e-01, 9.264e-02)); + r += mul(s3_1, M4(-2.139e-01, -3.254e-01, 2.315e-01, 1.407e-01, -4.157e-02, 1.958e-01, -6.251e-03, 2.082e-03, -3.687e-02, 7.719e-02, 8.827e-02, 1.692e-02, 2.429e-03, -4.380e-02, -1.665e-01, -4.293e-01)); + r += mul(s3_2, M4(2.632e-02, -4.644e-01, 3.778e-01, 1.915e-01, 1.765e-02, 9.826e-02, -5.215e-02, 6.615e-02, 4.066e-02, 8.500e-02, -6.289e-02, 1.127e-01, 4.373e-02, -2.529e-02, -3.463e-03, 1.561e-02)); + r += mul(s3_3, M4(1.812e-01, 3.307e-02, -1.222e-01, -1.056e-01, 3.178e-01, -6.752e-02, -4.926e-02, 9.877e-02, -1.413e-02, 2.520e-02, 2.318e-02, -5.239e-02, -1.853e-02, 1.247e-01, -2.800e-01, -4.267e-02)); + r += mul(s3_4, M4(-2.170e-01, 9.658e-02, -2.656e-02, -3.532e-01, -2.667e-01, -1.484e-01, 1.986e-01, 3.970e-01, -9.642e-02, 3.251e-01, -4.856e-02, -1.348e-01, -2.365e-01, -7.690e-02, -4.313e-02, -3.293e-01)); + r += mul(s3_5, M4(3.116e-01, 3.575e-01, -1.439e-01, -2.788e-01, 1.682e-01, 3.373e-02, 1.132e-03, -2.062e-02, 2.790e-02, 4.237e-02, 6.550e-02, -7.303e-02, 1.220e-01, 4.608e-02, 2.672e-02, -6.588e-02)); + r += mul(s3_6, M4(1.361e-01, 1.959e-02, -6.558e-02, -5.488e-02, 1.780e-01, 1.400e-02, -8.174e-02, 3.617e-02, 2.232e-02, 1.570e-02, 2.935e-02, 1.595e-02, 4.166e-02, 1.395e-02, -7.261e-02, 3.343e-02)); + r += mul(s3_7, M4(-6.674e-03, -7.837e-02, 2.041e-02, -7.274e-02, -1.975e-01, -1.022e-01, 3.521e-02, -2.381e-01, 4.452e-02, -4.855e-02, -2.357e-02, 4.965e-02, -2.079e-03, -5.777e-02, -4.211e-02, -1.147e-01)); + r += mul(s3_8, M4(1.852e-02, -4.532e-02, 3.780e-02, -2.960e-02, 4.216e-03, 7.749e-02, -2.922e-03, 9.211e-02, 1.465e-02, 1.361e-03, -5.097e-02, 2.529e-02, 6.341e-02, 1.534e-02, -2.929e-02, 6.519e-02)); + r += V4(-1.083e-02, 2.841e-03, 4.056e-03, -3.056e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-6.130e-04, -7.656e-02, 1.752e-01, 1.022e-01, 5.050e-03, -7.227e-02, 4.673e-03, -1.316e-01, -8.276e-02, -2.565e-02, 1.057e-01, 4.770e-02, -6.372e-02, -9.303e-02, 3.416e-02, 2.778e-02)); + r += mul(s0_1, M4(-1.171e-01, 7.738e-02, 1.872e-01, 2.325e-02, 5.726e-02, 6.929e-02, -1.247e-01, -3.376e-02, -1.218e-01, 1.106e-01, -9.175e-02, 2.578e-02, -1.139e-01, -5.745e-02, -5.009e-03, 1.082e-01)); + r += mul(s0_2, M4(-1.007e-01, 7.851e-03, 2.006e-02, 1.214e-01, 8.999e-02, -2.452e-02, -6.921e-02, -1.470e-01, -2.441e-02, -6.812e-02, -3.163e-02, -3.730e-02, -9.304e-02, -6.580e-02, -3.085e-02, 4.557e-02)); + r += mul(s0_3, M4(2.147e-02, 2.392e-02, 6.809e-02, 5.823e-02, -5.266e-02, -6.499e-02, -1.937e-01, -1.774e-01, -7.026e-02, 5.565e-02, 2.104e-01, 9.940e-02, -6.656e-02, -5.599e-02, -9.527e-02, -2.195e-02)); + r += mul(s0_4, M4(-1.314e-01, -2.100e-02, -8.236e-02, -5.332e-03, -8.806e-02, -1.931e-02, -3.184e-02, 1.283e-01, -1.044e-01, -9.658e-02, -3.773e-02, 4.251e-01, -6.699e-01, -2.441e-01, 2.817e-01, 3.949e-02)); + r += mul(s0_5, M4(-6.834e-02, 1.218e-01, -6.189e-02, -1.587e-01, -9.935e-03, 4.393e-02, -2.135e-01, -2.161e-01, 5.552e-03, 5.016e-02, 3.099e-02, -1.251e-01, 2.489e-02, -4.931e-01, 5.038e-03, -8.030e-03)); + r += mul(s0_6, M4(3.225e-02, -2.105e-02, 3.418e-03, 1.529e-02, -3.405e-02, 3.200e-02, -1.592e-01, -9.388e-02, 6.723e-02, 3.172e-02, -4.816e-02, 1.309e-01, -1.431e-02, -7.290e-02, -9.302e-02, -8.654e-02)); + r += mul(s0_7, M4(4.884e-02, 1.260e-01, 6.116e-02, 8.572e-02, 1.022e-01, 2.291e-02, -7.667e-02, -1.023e-01, -1.076e-01, 2.182e-01, 1.787e-01, 4.600e-01, 4.603e-02, 4.013e-02, -1.248e-01, 1.515e-02)); + r += mul(s0_8, M4(2.182e-02, 1.733e-01, 1.738e-01, 1.317e-01, -3.960e-02, 1.821e-02, -6.237e-02, -3.957e-02, -8.466e-02, 3.118e-01, -1.109e-03, 6.180e-02, 1.087e-01, 1.219e-01, 1.164e-01, 7.167e-02)); + r += mul(s1_0, M4(-5.654e-03, -5.632e-02, -1.110e-01, -8.149e-02, -4.162e-02, -3.990e-02, 7.565e-02, -5.569e-02, 5.983e-02, 3.346e-02, 1.598e-02, 3.390e-02, 7.153e-02, 7.041e-03, -1.990e-02, 1.044e-01)); + r += mul(s1_1, M4(-1.193e-02, -1.392e-01, -6.353e-02, -5.281e-02, -3.663e-02, 4.147e-02, -1.629e-01, -1.972e-02, 4.507e-02, 6.254e-02, -2.663e-02, -2.552e-02, 2.680e-01, 6.236e-02, -5.901e-02, -1.626e-02)); + r += mul(s1_2, M4(-1.057e-01, -3.432e-02, 5.563e-02, 1.240e-01, 1.598e-02, -3.795e-02, -1.715e-02, -2.897e-03, 1.671e-02, 7.545e-04, 3.111e-02, 5.136e-02, 1.207e-01, -1.587e-02, 7.899e-02, 3.483e-02)); + r += mul(s1_3, M4(-1.056e-02, 8.004e-03, -1.297e-02, -2.314e-02, -4.324e-02, -1.453e-02, -3.458e-02, 7.711e-03, 9.450e-02, -7.279e-03, -1.025e-01, -2.236e-01, 5.674e-02, 6.152e-02, 7.612e-02, 8.618e-03)); + r += mul(s1_4, M4(-1.020e-01, -1.119e-01, -4.415e-01, -4.113e-01, -2.564e-01, -8.898e-02, 2.198e-01, 3.662e-01, 3.076e-01, -3.531e-01, -3.432e-02, 2.568e-01, -1.218e-01, 5.504e-02, 2.069e-01, -6.714e-01)); + r += mul(s1_5, M4(-5.354e-02, 5.475e-02, -3.443e-01, -2.398e-01, -2.952e-02, 1.826e-02, -3.823e-02, -9.658e-02, -1.836e-02, -7.218e-02, 5.875e-02, -1.017e-01, 2.128e-01, -2.690e-01, -1.918e-01, -2.150e-01)); + r += mul(s1_6, M4(9.334e-04, 1.622e-02, 3.458e-02, 6.131e-02, 2.779e-02, 4.021e-03, 4.950e-02, -1.843e-02, 7.233e-02, 2.085e-03, -1.719e-02, 2.032e-02, 4.844e-03, -5.081e-03, 2.610e-02, 1.633e-02)); + r += mul(s1_7, M4(-3.670e-02, 5.013e-02, -6.773e-02, -6.609e-02, 1.170e-01, 3.290e-02, -9.547e-02, -2.685e-02, -1.327e-01, -3.630e-03, -2.409e-01, -3.083e-01, 3.003e-02, 1.812e-01, 1.606e-01, 1.490e-01)); + r += mul(s1_8, M4(-4.339e-02, 3.640e-03, -4.089e-02, -2.330e-02, 2.747e-02, 1.597e-01, 6.442e-03, 1.263e-01, -1.345e-02, 7.396e-02, 1.475e-02, -2.817e-03, -3.443e-02, 1.270e-01, 8.616e-02, 2.625e-02)); + r += mul(s2_0, M4(5.066e-02, 8.901e-02, -5.357e-02, -3.008e-04, 6.639e-02, 4.513e-02, -7.859e-02, 1.288e-01, 1.788e-02, 3.146e-03, -1.870e-01, 8.331e-03, 8.237e-02, 1.556e-02, -4.060e-02, -3.354e-02)); + r += mul(s2_1, M4(1.211e-01, -1.958e-02, -1.821e-01, -2.678e-01, 5.985e-02, -3.434e-02, -1.473e-04, -4.211e-02, 9.400e-02, -1.979e-01, -1.631e-01, -1.431e-01, 1.288e-01, 1.301e-01, 7.110e-02, 1.196e-01)); + r += mul(s2_2, M4(9.777e-02, 7.517e-02, 5.213e-02, 2.601e-02, 6.137e-04, 2.892e-02, -3.826e-02, 3.445e-02, 2.244e-02, -2.289e-01, -2.827e-02, -1.499e-01, -1.293e-02, -1.242e-01, 7.029e-03, 3.803e-02)); + r += mul(s2_3, M4(-7.883e-03, 3.989e-02, 2.103e-01, 9.354e-02, -2.803e-02, -2.155e-02, 1.230e-01, -2.510e-01, -4.577e-02, -8.206e-02, -4.634e-02, -5.687e-02, -4.763e-02, 8.117e-04, 4.716e-02, -6.873e-02)); + r += mul(s2_4, M4(5.074e-02, -9.310e-02, -3.064e-01, -2.342e-01, -3.624e-01, 1.487e-01, -1.847e-01, -1.638e-01, -2.098e-01, -2.940e-01, -7.838e-02, -3.450e-01, -1.351e-01, 1.343e-01, -2.153e-01, -3.085e-01)); + r += mul(s2_5, M4(-9.094e-02, 1.096e-01, -1.252e-01, -1.611e-01, -1.475e-02, -1.251e-01, -3.660e-02, -9.706e-02, 8.987e-02, -2.422e-01, -1.290e-01, -8.441e-02, 1.325e-01, -1.979e-01, 1.195e-01, 1.066e-01)); + r += mul(s2_6, M4(3.450e-02, -1.976e-03, -2.692e-02, 2.470e-02, 4.134e-02, 1.106e-01, 5.587e-03, 1.203e-01, 3.348e-03, -1.976e-01, -6.304e-03, -9.345e-02, 4.034e-03, -2.611e-02, -2.223e-02, -4.153e-02)); + r += mul(s2_7, M4(-1.626e-02, 9.840e-02, -8.620e-03, -1.479e-02, -1.601e-01, -1.274e-01, -1.019e-01, -1.193e-01, 1.353e-01, -1.601e-01, -3.038e-01, -2.066e-01, 4.585e-02, -7.649e-02, -1.581e-01, -1.694e-01)); + r += mul(s2_8, M4(-6.049e-02, -7.053e-02, 6.931e-03, -1.654e-02, 1.932e-02, 2.112e-02, 6.621e-02, 7.266e-02, -5.272e-02, -5.286e-02, -2.885e-01, -2.012e-01, 5.525e-02, -1.721e-02, -4.575e-02, -9.210e-04)); + r += mul(s3_0, M4(-2.271e-02, 2.668e-02, -1.480e-01, -2.625e-02, 9.554e-03, 6.925e-02, -1.252e-01, 2.866e-02, 1.177e-02, -4.071e-02, -2.070e-02, -2.731e-02, 8.067e-02, 1.324e-01, 5.591e-02, 1.092e-01)); + r += mul(s3_1, M4(2.863e-01, -3.245e-02, -2.369e-01, -2.863e-01, -2.117e-03, -4.875e-02, 3.885e-02, -1.088e-01, 3.018e-02, -7.956e-02, 1.067e-01, 4.768e-03, 2.319e-01, 4.443e-01, 1.081e-01, -2.955e-01)); + r += mul(s3_2, M4(2.579e-01, -1.163e-01, -4.524e-02, 1.974e-01, -3.204e-02, 2.183e-02, -9.418e-03, 1.514e-03, -7.422e-02, -8.652e-02, -1.831e-02, 7.031e-02, 1.678e-02, -6.474e-02, 3.298e-03, 1.898e-01)); + r += mul(s3_3, M4(-1.287e-01, 6.977e-02, 2.034e-01, 1.203e-01, -1.577e-01, -1.159e-01, 5.411e-01, 1.079e-01, -5.869e-02, -9.771e-02, 4.457e-02, 1.618e-02, -1.096e-01, 7.700e-02, 1.746e-01, -4.051e-02)); + r += mul(s3_4, M4(-2.308e-01, -1.267e-01, -1.459e-01, 5.467e-03, -1.625e-01, 2.188e-01, -1.188e-01, -9.377e-03, -2.440e-01, -8.329e-02, -1.366e-01, 4.782e-02, -3.538e-01, 1.737e-01, -1.531e-01, -1.443e-01)); + r += mul(s3_5, M4(-4.365e-01, 5.178e-01, -2.166e-01, -2.063e-01, 3.981e-02, -1.054e-02, 5.701e-03, -4.639e-02, 4.680e-03, 2.433e-02, -3.099e-02, -1.344e-01, -4.936e-03, -2.510e-02, 9.593e-02, 6.336e-03)); + r += mul(s3_6, M4(-9.464e-03, 6.306e-02, 9.266e-02, 1.239e-01, 9.411e-02, -8.883e-03, 1.447e-02, 3.888e-02, 3.209e-03, -4.987e-02, -7.248e-02, -4.528e-02, 3.389e-02, 3.453e-02, 4.363e-02, -1.139e-02)); + r += mul(s3_7, M4(-1.101e-01, -7.831e-02, -6.882e-02, -4.597e-02, -2.022e-01, -1.681e-01, -8.124e-02, -1.088e-01, 7.872e-02, 5.759e-02, 6.814e-02, 9.876e-02, -3.474e-02, 5.673e-02, 1.178e-01, 1.018e-01)); + r += mul(s3_8, M4(-4.424e-02, -1.753e-01, -1.880e-01, -1.999e-01, 1.189e-02, 2.421e-02, 7.007e-03, -2.655e-02, -3.715e-02, 6.751e-02, -7.333e-02, -7.710e-02, 2.700e-02, 2.248e-02, 9.982e-02, 5.511e-02)); + r += V4(-5.051e-03, -7.374e-04, -9.013e-03, -1.041e-02); + return r; +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 6 +//!DESC conv5 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.543e-02, 7.950e-02, 4.966e-02, -1.433e-03, 8.905e-03, -6.500e-02, 3.529e-02, -6.468e-03, -9.021e-04, -2.890e-03, 2.941e-02, 1.722e-02, -9.993e-02, -4.554e-02, 1.116e-01, 4.605e-03)); + r += mul(s0_1, M4(-1.089e-01, -6.759e-02, 5.507e-02, 2.826e-02, 3.102e-02, 4.981e-03, -6.957e-03, -2.970e-02, 3.893e-02, -1.985e-02, 3.482e-03, -3.115e-03, 6.911e-02, -1.665e-01, -3.388e-02, -1.252e-02)); + r += mul(s0_2, M4(1.046e-01, 2.638e-02, -8.177e-02, -9.476e-02, -2.313e-02, 9.290e-03, -1.408e-03, 4.678e-02, -1.106e-01, 4.373e-02, 4.853e-02, 4.161e-02, 2.201e-02, -4.520e-02, 1.641e-02, 1.639e-03)); + r += mul(s0_3, M4(-3.788e-02, -1.936e-01, 4.114e-02, 1.067e-02, 2.481e-02, 1.257e-02, -2.408e-02, 9.182e-02, -1.143e-02, -2.779e-02, -5.009e-02, -3.573e-02, -4.170e-01, -3.232e-01, 1.870e-01, 1.240e-02)); + r += mul(s0_4, M4(-2.012e-01, 3.719e-03, 4.258e-01, 1.357e-01, 1.017e-01, -9.143e-02, 1.062e-01, -2.157e-01, 2.065e-01, -1.497e-01, 2.811e-01, -1.238e-01, 4.285e-01, -1.183e-03, -2.368e-01, 3.175e-01)); + r += mul(s0_5, M4(2.946e-01, 5.902e-02, 3.874e-02, -9.653e-02, -9.581e-02, 2.797e-02, -5.432e-02, -3.276e-03, -2.409e-01, 4.022e-02, 6.466e-03, -1.014e-01, 8.373e-02, 1.670e-02, 4.462e-02, 8.036e-02)); + r += mul(s0_6, M4(-8.663e-02, -2.484e-02, 3.960e-02, -4.742e-03, 5.179e-02, 8.916e-02, -2.668e-02, -2.848e-02, 9.887e-02, 1.283e-02, -2.283e-02, -8.960e-02, -1.223e-01, -1.672e-02, 6.586e-02, 2.669e-02)); + r += mul(s0_7, M4(-2.569e-01, 1.932e-03, 1.442e-01, -1.947e-04, 1.002e-01, 9.362e-02, 1.206e-01, 5.726e-02, -3.269e-02, 1.274e-01, 7.910e-02, -1.930e-01, 9.277e-02, 2.994e-02, -4.800e-02, 8.016e-02)); + r += mul(s0_8, M4(9.842e-02, 1.108e-01, -1.303e-01, -3.840e-01, -4.456e-02, -6.372e-02, -6.192e-02, 4.026e-02, -8.011e-02, -7.656e-02, -2.155e-02, 1.167e-02, 4.969e-02, -8.967e-03, -1.639e-02, 6.565e-02)); + r += mul(s1_0, M4(-3.505e-03, 5.798e-02, -1.063e-02, 1.098e-02, -5.275e-02, -1.071e-02, 9.760e-02, -4.337e-02, 2.637e-02, -4.918e-02, 1.090e-03, -1.405e-02, -6.812e-02, -5.049e-02, 1.111e-01, -3.104e-02)); + r += mul(s1_1, M4(-6.327e-02, 8.466e-03, 1.248e-01, 2.211e-03, 7.483e-03, 6.452e-02, -1.087e-01, 3.269e-02, 2.075e-02, -1.408e-01, 3.009e-02, -4.146e-02, 1.034e-01, 1.109e-01, -9.252e-02, -1.611e-02)); + r += mul(s1_2, M4(7.447e-02, 4.614e-02, -6.169e-02, -2.029e-02, -7.331e-02, -3.250e-02, 7.219e-02, 1.042e-02, -2.912e-02, 4.549e-02, -1.047e-02, 2.094e-03, -5.769e-02, -1.615e-02, 8.071e-03, 3.771e-02)); + r += mul(s1_3, M4(-4.766e-02, -1.712e-01, 8.735e-02, -5.360e-02, -1.975e-01, -2.133e-01, 8.035e-02, 5.819e-02, -7.093e-02, -2.900e-01, -3.236e-02, -2.934e-02, -3.232e-01, -1.972e-01, 2.268e-01, 1.010e-01)); + r += mul(s1_4, M4(-2.210e-01, -1.157e-01, 2.817e-01, 5.840e-02, 2.098e-01, -8.874e-02, 7.924e-02, -2.510e-01, 1.030e-01, 1.279e-01, 6.653e-02, 1.219e-01, 4.141e-01, 2.998e-01, -9.834e-02, 1.607e-01)); + r += mul(s1_5, M4(1.294e-01, -1.988e-02, -1.361e-01, 2.103e-01, -1.155e-01, -2.235e-02, 6.869e-02, -8.075e-02, -4.372e-03, 5.632e-02, -7.754e-02, 7.795e-02, -1.595e-01, -1.241e-01, -2.547e-03, 9.130e-02)); + r += mul(s1_6, M4(-4.901e-02, -4.760e-02, 1.173e-02, -6.623e-02, -2.803e-01, -1.136e-01, 2.054e-01, -7.351e-03, -1.322e-01, -2.660e-01, 3.918e-02, -8.464e-02, -1.010e-01, 3.265e-02, 6.085e-02, 7.092e-02)); + r += mul(s1_7, M4(-1.737e-01, 5.872e-02, 1.029e-01, -1.338e-01, 1.971e-02, -1.182e-03, 6.404e-02, 1.520e-01, 2.388e-01, -3.107e-02, -1.075e-01, -1.224e-01, 1.625e-01, -7.233e-03, -6.741e-02, 2.098e-01)); + r += mul(s1_8, M4(1.496e-01, 7.058e-02, -7.112e-02, -7.923e-03, -8.993e-02, -9.366e-02, -1.300e-02, 1.920e-01, 1.403e-01, 1.831e-01, 1.431e-01, -2.287e-01, 2.402e-02, 5.445e-03, 2.004e-02, 4.421e-02)); + r += mul(s2_0, M4(2.048e-02, -2.281e-01, -6.323e-03, -3.437e-02, -4.873e-03, -1.313e-01, 9.471e-02, -1.031e-01, 8.325e-02, 9.654e-02, -6.140e-02, 3.037e-02, -1.616e-01, -3.013e-02, 1.615e-01, 6.894e-03)); + r += mul(s2_1, M4(-1.950e-01, 3.048e-01, -1.785e-01, 1.902e-01, 9.630e-02, 2.068e-01, -1.684e-01, -1.342e-01, 4.692e-02, 6.126e-02, -7.899e-02, 9.330e-02, 3.255e-02, 3.098e-01, -2.201e-01, 4.671e-02)); + r += mul(s2_2, M4(1.306e-01, 5.318e-02, -5.240e-02, 2.361e-02, 2.423e-02, 2.361e-03, -1.695e-02, 1.569e-02, -7.172e-02, -3.968e-02, 1.209e-01, 7.186e-02, 5.879e-02, 1.591e-02, -1.891e-02, -2.890e-02)); + r += mul(s2_3, M4(2.569e-01, 4.588e-01, -2.766e-01, 7.439e-02, -2.793e-01, -1.519e-01, 7.798e-02, 2.029e-02, 2.249e-01, 2.073e-01, -2.353e-01, 3.969e-02, -3.564e-01, -1.221e-01, 2.605e-01, -1.442e-02)); + r += mul(s2_4, M4(-5.101e-01, -1.367e-01, 1.769e-02, -8.440e-01, 5.105e-02, 1.282e-01, 7.769e-02, 9.786e-02, -2.935e-02, 8.851e-02, 6.132e-01, 3.093e-01, -1.629e-01, -7.917e-02, -3.350e-01, 1.921e-01)); + r += mul(s2_5, M4(1.598e-01, 7.845e-02, 1.064e-01, 2.970e-03, 3.521e-02, 5.990e-02, 7.355e-03, 2.954e-02, -1.850e-01, 2.212e-01, -3.574e-02, -5.264e-01, 2.390e-01, 2.798e-01, 7.917e-03, -4.841e-02)); + r += mul(s2_6, M4(1.539e-01, 6.461e-02, -2.001e-02, 8.195e-03, -1.185e-01, -7.702e-02, 4.283e-02, -1.233e-01, 1.809e-01, 1.459e-02, -5.636e-02, -1.972e-02, -9.290e-02, 1.186e-01, 3.799e-02, 6.921e-02)); + r += mul(s2_7, M4(-1.350e-01, -1.079e-01, 3.874e-02, -2.217e-02, 7.493e-02, 1.194e-01, 3.591e-02, -1.228e-01, -4.547e-02, -2.423e-02, 6.285e-02, -9.256e-02, 6.649e-02, 6.761e-03, -1.018e-01, 1.880e-01)); + r += mul(s2_8, M4(1.154e-02, 5.337e-03, -8.154e-03, -4.658e-02, 5.640e-03, -1.535e-02, 3.469e-02, -3.478e-02, -1.819e-01, -1.338e-01, 2.321e-02, 2.736e-01, 1.002e-01, 5.359e-02, 5.718e-02, -6.956e-03)); + r += mul(s3_0, M4(6.467e-02, -1.265e-01, -5.698e-03, -7.443e-03, -1.703e-02, 4.604e-02, 2.747e-02, -1.545e-02, 1.014e-01, 2.919e-02, -5.269e-02, 4.818e-02, -8.014e-02, 5.749e-02, 3.776e-02, 7.501e-03)); + r += mul(s3_1, M4(-1.627e-01, -3.031e-02, 3.391e-02, 4.149e-05, 6.013e-02, 8.038e-02, 1.545e-02, 1.997e-02, -1.785e-02, -3.410e-01, 1.219e-01, 1.513e-02, -7.337e-02, -2.446e-02, -7.831e-03, 2.064e-02)); + r += mul(s3_2, M4(5.945e-02, -7.145e-02, -4.252e-03, -1.227e-02, -4.884e-02, -2.366e-02, 4.214e-04, 4.615e-02, -3.871e-02, 3.228e-02, -7.452e-03, 3.501e-02, 9.101e-02, 3.086e-02, -8.161e-02, 4.753e-02)); + r += mul(s3_3, M4(1.265e-01, 5.028e-02, -2.140e-02, 6.421e-02, -4.819e-03, -1.641e-01, -3.700e-02, -6.100e-02, 1.873e-01, 2.147e-01, -2.009e-01, 1.324e-03, -7.643e-02, -1.167e-01, 1.724e-01, 2.491e-02)); + r += mul(s3_4, M4(-4.396e-01, -8.640e-02, -1.586e-01, -1.634e-01, 2.135e-01, 2.594e-01, 3.525e-01, -4.384e-01, 1.224e-01, 1.559e-02, 9.960e-02, -4.443e-02, -2.066e-01, 5.257e-02, -1.255e-01, 1.136e-01)); + r += mul(s3_5, M4(1.476e-01, 1.642e-02, 8.563e-02, -2.731e-02, -9.205e-02, -8.172e-03, -9.883e-03, 4.732e-02, -1.877e-01, -4.119e-02, -3.235e-02, 1.874e-02, 1.632e-01, 3.157e-02, -7.976e-02, 6.713e-02)); + r += mul(s3_6, M4(5.719e-02, 2.829e-03, -3.028e-02, -3.731e-02, -1.195e-01, -2.658e-02, 7.224e-02, -6.579e-03, 7.836e-02, -5.193e-02, -2.216e-02, -3.423e-02, -7.006e-02, 1.208e-02, 9.662e-04, 3.652e-03)); + r += mul(s3_7, M4(-1.694e-01, 5.317e-02, 1.652e-01, -4.481e-02, 7.641e-04, 2.080e-02, 1.120e-02, 3.510e-02, 7.017e-03, -1.044e-01, 4.685e-03, 4.015e-02, 1.265e-01, 1.437e-01, -9.223e-02, -6.538e-02)); + r += mul(s3_8, M4(2.532e-02, -4.911e-02, -7.204e-02, -3.564e-02, 9.314e-03, 5.000e-02, 8.495e-02, -5.174e-02, -1.079e-01, -5.239e-02, -3.180e-02, 5.520e-02, 2.376e-02, 3.436e-02, 7.244e-02, -9.291e-02)); + r += V4(-8.722e-04, 4.321e-03, -2.282e-03, 5.775e-04); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(7.753e-02, -5.337e-02, -2.432e-02, 3.644e-02, 1.594e-03, 2.936e-02, -2.457e-02, 9.446e-03, -1.292e-03, 1.233e-02, -3.333e-02, -1.896e-02, 2.404e-02, -8.731e-04, -1.313e-02, -5.993e-02)); + r += mul(s0_1, M4(-1.016e-03, 1.421e-01, -1.392e-01, -8.620e-02, 2.236e-02, -5.276e-02, -9.275e-03, 1.497e-01, 5.114e-02, 1.802e-01, 2.670e-02, 1.683e-02, -8.748e-04, -1.293e-01, 2.992e-01, 2.780e-01)); + r += mul(s0_2, M4(-7.056e-02, 2.695e-02, -1.473e-01, 3.070e-02, -3.678e-02, 5.208e-02, 2.452e-03, -5.092e-02, -8.107e-02, 1.028e-01, -9.323e-02, -1.948e-01, -1.106e-01, -1.169e-01, 6.257e-02, 4.575e-02)); + r += mul(s0_3, M4(-1.961e-01, 7.539e-02, -8.065e-02, -6.256e-02, -1.814e-01, 8.188e-02, -1.757e-02, -9.656e-02, -4.720e-02, -3.039e-02, 1.786e-03, -2.460e-02, 6.616e-02, 1.227e-02, -1.082e-01, 2.060e-01)); + r += mul(s0_4, M4(1.100e-02, 1.185e-01, -3.507e-01, 1.974e-01, 8.355e-03, -1.227e-01, 3.761e-01, 3.390e-01, 8.516e-04, -1.056e-01, 2.269e-01, -2.299e-01, -3.527e-01, -3.526e-01, -7.157e-02, 6.354e-02)); + r += mul(s0_5, M4(-9.008e-02, 4.770e-01, -9.990e-02, 9.218e-02, 7.512e-02, -1.304e-01, 9.839e-02, 9.985e-03, 1.453e-01, -3.162e-02, -4.388e-02, 7.887e-02, 3.180e-03, -9.248e-02, 1.566e-02, 5.009e-03)); + r += mul(s0_6, M4(3.981e-02, -3.872e-02, 6.997e-02, 2.283e-02, -3.285e-02, 5.390e-02, -2.957e-02, -8.100e-02, 1.132e-02, 6.093e-02, -5.962e-02, 2.166e-02, -3.606e-02, -1.190e-03, 4.312e-03, 4.615e-02)); + r += mul(s0_7, M4(-2.725e-01, 3.443e-02, 9.929e-02, -9.112e-03, 4.324e-02, 1.421e-01, 2.795e-02, 1.846e-03, 1.279e-01, -1.918e-02, -4.382e-02, 1.812e-01, -4.804e-02, 9.204e-04, -8.716e-02, 4.059e-02)); + r += mul(s0_8, M4(4.227e-02, -1.239e-01, -6.485e-02, 8.795e-02, 7.347e-02, 1.209e-01, 3.079e-03, -5.941e-02, -1.296e-01, 1.148e-01, -5.571e-02, -1.353e-01, -7.367e-02, -1.618e-02, 6.989e-03, -2.546e-02)); + r += mul(s1_0, M4(-6.146e-02, -4.772e-02, -9.417e-03, -4.165e-02, 2.001e-01, 4.540e-02, -4.097e-02, 3.659e-02, 2.014e-02, -6.690e-02, 4.933e-02, 8.231e-02, -9.543e-03, -3.356e-02, -2.012e-02, -5.326e-02)); + r += mul(s1_1, M4(1.254e-01, 1.032e-01, -7.584e-02, -1.386e-01, -1.665e-01, -4.887e-03, 3.620e-02, 6.679e-02, 3.288e-02, 3.521e-03, 4.558e-02, 1.208e-01, 5.012e-02, 4.727e-02, -2.972e-02, 1.239e-01)); + r += mul(s1_2, M4(1.979e-03, -9.131e-03, 1.800e-02, 3.867e-02, 2.082e-02, -2.502e-02, -9.056e-02, -9.712e-02, -8.861e-02, 4.792e-02, -2.188e-02, -5.227e-02, 4.840e-02, 7.105e-02, -7.533e-03, -1.297e-01)); + r += mul(s1_3, M4(1.216e-01, 8.527e-02, -2.377e-02, 4.553e-02, -3.099e-01, -8.314e-02, 9.121e-02, -1.404e-01, 3.008e-02, 2.271e-02, 2.118e-02, 1.255e-01, 6.427e-02, 1.129e-02, -8.356e-02, 1.902e-01)); + r += mul(s1_4, M4(-5.332e-01, 2.151e-01, -2.231e-01, -6.729e-02, 3.100e-01, -5.922e-01, 4.658e-01, 4.307e-01, -2.838e-01, 1.628e-01, -2.627e-02, -3.515e-01, -3.412e-01, -8.481e-01, -1.145e-01, 4.690e-01)); + r += mul(s1_5, M4(-4.161e-01, 2.098e-02, -1.141e-01, -1.561e-02, -5.537e-02, -3.509e-01, 1.350e-01, 2.163e-01, 2.016e-01, -1.842e-01, -4.780e-02, 2.205e-01, -8.208e-02, -1.956e-01, -4.500e-02, -2.913e-01)); + r += mul(s1_6, M4(-5.157e-03, -9.563e-03, 3.716e-02, 2.411e-02, 2.625e-01, 2.386e-02, 5.859e-02, 1.699e-01, 1.736e-01, -4.062e-02, 1.283e-01, 2.319e-01, 4.247e-02, 6.236e-02, -3.562e-02, -9.159e-02)); + r += mul(s1_7, M4(-1.881e-01, -8.441e-02, -4.476e-03, -2.962e-03, -1.410e-01, -1.377e-01, -1.603e-01, 2.566e-01, -1.171e-01, -2.909e-01, -7.991e-02, 3.955e-03, -1.577e-01, 8.067e-02, 3.065e-02, -7.451e-02)); + r += mul(s1_8, M4(3.017e-02, 5.592e-02, -1.845e-02, 8.121e-02, -1.605e-01, -1.450e-02, 6.103e-02, 5.926e-02, 1.401e-02, 7.801e-02, -1.884e-01, 1.805e-01, 1.250e-01, -4.373e-03, -2.044e-02, 7.077e-03)); + r += mul(s2_0, M4(-2.586e-02, -5.334e-02, -4.608e-02, 1.800e-01, 5.117e-02, 7.026e-02, -1.504e-01, 1.845e-01, 5.936e-02, -1.167e-01, 1.007e-01, 1.775e-02, 9.509e-02, 1.031e-01, -4.267e-02, -4.285e-02)); + r += mul(s2_1, M4(-2.268e-01, -3.782e-01, -1.416e-01, 1.976e-02, 7.927e-02, -3.233e-01, 4.022e-02, 1.397e-01, -1.285e-01, -2.301e-02, 2.178e-01, -3.169e-01, -1.043e-01, 1.333e-01, 6.284e-02, -2.571e-01)); + r += mul(s2_2, M4(1.330e-01, -2.228e-01, -2.477e-02, 1.212e-01, 1.048e-01, -3.967e-02, 6.519e-02, -3.451e-02, 2.996e-03, -8.184e-02, -3.909e-02, -4.447e-02, 1.670e-01, -4.006e-02, 5.001e-02, 7.491e-03)); + r += mul(s2_3, M4(-1.906e-01, 2.986e-02, -3.952e-02, -2.861e-01, 1.208e-01, 2.352e-01, 2.821e-02, -4.515e-02, 2.386e-02, -1.363e-01, 5.457e-02, 1.751e-02, 2.567e-01, 4.067e-02, -3.435e-02, 1.811e-01)); + r += mul(s2_4, M4(5.372e-01, 4.567e-01, 1.075e-02, -4.759e-02, 5.753e-02, 2.359e-01, -1.344e-01, -3.942e-01, 1.139e-01, 2.329e-01, 7.097e-03, -2.416e-01, -3.146e-01, -5.996e-01, -8.141e-05, 2.811e-01)); + r += mul(s2_5, M4(1.261e-02, 1.636e-01, -1.048e-01, 1.364e-02, 3.740e-03, 5.542e-02, -5.258e-02, -1.429e-01, 2.881e-01, -4.462e-01, 1.270e-02, -7.021e-03, -4.951e-02, -2.034e-03, 4.602e-02, 1.214e-01)); + r += mul(s2_6, M4(-6.033e-02, 1.222e-02, 3.018e-03, -1.194e-01, 1.189e-01, -4.994e-02, 6.138e-02, 6.122e-02, -2.687e-02, 2.007e-02, -7.344e-04, -3.446e-02, 6.520e-02, -1.479e-02, -3.500e-02, -2.706e-02)); + r += mul(s2_7, M4(1.186e-01, -6.526e-02, 1.112e-01, 3.259e-02, 4.759e-02, -1.519e-01, 6.760e-02, 2.415e-02, -2.772e-02, -9.989e-02, 3.411e-02, -2.095e-01, 8.407e-02, 2.159e-01, -4.676e-02, -1.920e-02)); + r += mul(s2_8, M4(8.961e-02, 5.177e-03, 2.837e-02, 8.839e-02, 5.019e-03, -2.271e-02, -2.850e-02, -6.803e-02, 8.845e-02, 1.518e-01, 1.899e-01, -3.774e-02, 3.282e-02, -5.420e-03, -3.044e-02, -8.935e-02)); + r += mul(s3_0, M4(-6.317e-02, -8.128e-02, 2.424e-02, 7.323e-02, 8.362e-02, 1.499e-01, -9.427e-02, -4.817e-02, -5.095e-02, -4.980e-02, 1.289e-02, 3.431e-02, 1.232e-01, 9.105e-02, -2.362e-03, -6.421e-02)); + r += mul(s3_1, M4(-3.092e-02, -2.956e-01, 1.516e-01, 4.510e-02, 9.056e-04, 8.930e-02, -6.518e-02, -9.671e-03, -3.079e-02, -1.072e-01, 1.636e-01, 8.242e-02, -1.800e-02, 9.257e-02, -2.550e-02, -7.577e-02)); + r += mul(s3_2, M4(2.229e-02, -1.460e-01, -6.113e-03, 1.165e-01, 2.291e-02, -2.013e-02, 5.773e-02, -8.558e-02, -1.616e-01, -2.072e-01, 1.440e-01, 6.408e-02, -4.857e-02, 2.835e-02, 1.128e-01, 1.248e-02)); + r += mul(s3_3, M4(-7.688e-02, -2.559e-02, 5.337e-02, -1.297e-01, -1.065e-01, 3.964e-03, 2.806e-02, -8.071e-03, -7.883e-02, -1.012e-01, 6.138e-02, -1.165e-01, -3.865e-02, 8.460e-02, -8.840e-02, 1.374e-02)); + r += mul(s3_4, M4(-6.546e-02, 3.511e-01, 2.069e-02, 5.183e-01, 3.118e-01, 4.113e-02, -2.531e-01, 1.565e-01, -7.322e-03, 1.533e-01, 7.628e-02, -2.852e-01, 8.712e-02, -2.433e-01, 7.957e-02, 4.118e-01)); + r += mul(s3_5, M4(-1.812e-01, -3.087e-02, -4.874e-02, -4.923e-02, -9.320e-03, -7.889e-02, -8.610e-02, -4.763e-02, 8.360e-02, -2.848e-02, -1.165e-02, -6.383e-02, -9.233e-02, 1.244e-01, -4.966e-02, -2.979e-02)); + r += mul(s3_6, M4(-2.472e-02, 4.788e-02, -2.114e-02, -1.386e-02, -7.752e-02, 6.096e-02, 3.628e-02, -6.224e-02, 6.418e-02, -5.069e-02, 1.566e-02, 5.723e-02, -1.027e-01, 7.279e-02, -7.078e-02, -3.996e-02)); + r += mul(s3_7, M4(1.666e-02, -1.710e-03, 6.469e-04, 6.369e-03, 2.621e-02, 2.647e-02, 1.027e-01, 7.724e-02, -1.024e-01, -8.304e-02, 1.842e-02, -5.779e-02, 1.711e-01, 8.228e-02, -1.305e-01, 8.329e-02)); + r += mul(s3_8, M4(-2.309e-02, 2.703e-02, -2.298e-02, -7.029e-03, 6.323e-02, 6.507e-02, -4.531e-02, -1.340e-02, -7.977e-02, -4.942e-02, 4.605e-02, 9.001e-02, 8.840e-02, 8.368e-02, -5.912e-02, -1.229e-01)); + r += V4(2.664e-03, 8.117e-03, -7.210e-03, -3.509e-03); + return r; +} + +void Pass6(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 7 +//!DESC conv6 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-7.726e-02, -4.236e-02, 2.663e-02, 7.717e-02, 1.578e-02, -3.045e-02, 1.833e-03, 4.928e-03, 2.055e-02, 1.234e-03, 1.293e-02, 8.221e-03, -2.347e-02, 4.309e-02, 7.447e-02, 4.002e-03)); + r += mul(s0_1, M4(2.106e-02, 1.496e-01, 2.554e-02, 9.246e-02, -2.261e-01, 9.247e-02, 2.955e-02, 3.709e-02, -9.295e-02, 1.226e-02, -2.886e-02, -6.831e-04, -1.196e-01, 2.725e-01, -2.178e-01, -1.039e-01)); + r += mul(s0_2, M4(3.396e-02, -8.075e-03, -6.267e-02, -4.931e-02, 1.142e-02, -4.036e-02, 4.313e-02, 9.018e-02, 2.037e-02, 2.323e-03, 5.455e-02, 7.263e-02, 6.421e-02, -3.649e-02, 3.168e-01, 8.844e-02)); + r += mul(s0_3, M4(-5.285e-03, -4.309e-02, -7.397e-02, -5.091e-02, 1.711e-02, 9.498e-02, 1.229e-02, 3.768e-02, 3.743e-03, 2.677e-02, -1.260e-02, 3.233e-02, -1.630e-01, -2.482e-02, -3.347e-03, -6.632e-03)); + r += mul(s0_4, M4(-1.568e-01, -9.216e-02, -1.668e-02, -4.316e-03, -1.176e-02, 3.826e-02, -5.394e-02, 6.991e-02, 1.248e-01, 2.796e-02, -1.266e-01, -2.623e-02, -1.338e-01, -4.132e-01, 4.030e-02, 5.239e-02)); + r += mul(s0_5, M4(-8.780e-02, 5.905e-02, 3.941e-03, 7.738e-02, 1.064e-01, 1.930e-02, -9.021e-03, 3.723e-02, -1.666e-01, -1.172e-03, -3.216e-01, 6.315e-02, -5.310e-02, -3.348e-02, -7.481e-02, 4.842e-02)); + r += mul(s0_6, M4(-1.376e-02, 2.783e-02, 1.790e-02, 3.019e-02, -1.654e-02, 5.290e-03, -2.644e-02, 9.440e-03, -1.162e-02, -1.778e-02, -9.626e-03, -1.727e-02, 6.551e-03, 3.601e-02, -7.306e-03, 9.867e-03)); + r += mul(s0_7, M4(7.459e-03, -1.180e-01, -2.086e-02, 7.975e-03, 3.646e-02, 1.958e-01, 3.334e-02, 1.391e-02, -1.209e-01, 2.599e-03, 7.582e-02, 2.697e-03, 3.305e-02, 3.455e-02, -5.884e-02, 2.060e-02)); + r += mul(s0_8, M4(6.161e-02, -2.655e-02, 3.691e-02, -2.139e-02, -9.580e-02, -1.344e-02, -8.940e-04, 1.652e-02, 4.274e-02, 4.742e-02, 1.043e-01, 6.114e-03, -2.154e-02, 3.635e-03, -7.209e-02, 2.929e-03)); + r += mul(s1_0, M4(-5.330e-02, 2.271e-02, -2.661e-02, 5.278e-02, 1.092e-02, -1.082e-02, 4.099e-04, 4.704e-02, -1.445e-02, -1.413e-02, 3.703e-02, -4.442e-02, -6.642e-02, 4.103e-03, 5.965e-02, 4.796e-03)); + r += mul(s1_1, M4(-9.926e-04, 2.275e-01, 5.207e-03, 4.746e-02, -2.941e-01, -5.993e-02, 1.567e-02, 1.336e-01, -1.455e-01, 1.303e-01, -3.239e-02, -1.908e-03, 5.867e-02, 8.483e-02, -1.097e-01, -1.861e-01)); + r += mul(s1_2, M4(1.195e-02, 1.569e-02, 1.077e-01, -1.050e-03, 1.581e-01, 3.738e-03, -6.575e-02, -1.782e-01, -5.437e-04, -3.055e-04, 1.013e-01, 1.508e-01, 9.356e-03, -1.633e-02, 1.667e-01, 8.674e-02)); + r += mul(s1_3, M4(-1.561e-02, 5.396e-02, -1.401e-02, -3.718e-02, 8.327e-02, 8.406e-02, 3.457e-02, -3.943e-02, 5.426e-02, 4.467e-02, -1.367e-02, -1.917e-02, -1.209e-01, -6.920e-02, -7.704e-02, -9.114e-03)); + r += mul(s1_4, M4(-4.008e-02, -7.207e-01, -7.651e-03, -2.095e-01, -5.950e-03, 1.458e-02, -8.664e-02, 2.716e-01, 3.749e-01, -2.539e-01, -1.156e-01, -5.679e-02, 3.828e-02, -2.838e-01, -7.992e-02, 2.495e-01)); + r += mul(s1_5, M4(1.640e-02, 1.746e-01, 5.193e-02, -8.520e-02, -1.726e-01, 2.218e-02, 4.205e-01, 1.427e-01, 1.426e-02, -1.424e-02, -4.810e-02, -7.034e-02, -4.420e-02, 4.693e-02, -1.059e-01, 6.651e-02)); + r += mul(s1_6, M4(1.277e-02, 1.624e-02, 2.739e-02, 5.491e-03, -1.327e-01, -6.239e-02, -6.986e-02, 4.944e-02, -8.849e-02, 1.346e-02, -2.343e-02, 8.993e-03, 1.826e-02, 4.062e-02, 2.256e-02, -2.188e-02)); + r += mul(s1_7, M4(-3.195e-02, 5.322e-03, 2.795e-02, 4.747e-02, 5.449e-01, 8.519e-02, 2.450e-01, -1.700e-01, -8.635e-02, -1.283e-01, -3.413e-03, 3.388e-02, 1.035e-03, 3.489e-03, -4.357e-02, -8.121e-03)); + r += mul(s1_8, M4(8.044e-02, -4.392e-02, 2.967e-02, 1.582e-02, -2.407e-01, -5.252e-02, -1.646e-01, 1.136e-01, 5.603e-03, -1.985e-02, 2.054e-02, 7.259e-03, -1.430e-02, 4.313e-02, -1.538e-03, -1.881e-02)); + r += mul(s2_0, M4(2.887e-02, -3.079e-02, -2.064e-02, -1.707e-02, 1.619e-03, 9.501e-03, -3.989e-02, 6.850e-02, -1.874e-02, 4.451e-02, 1.375e-02, 3.694e-02, -5.088e-02, 2.351e-03, 1.058e-02, 1.304e-01)); + r += mul(s2_1, M4(3.288e-02, 3.849e-02, -1.623e-02, 1.541e-02, -4.641e-02, -5.803e-02, -5.059e-02, 7.932e-02, 3.378e-02, 4.757e-02, 1.197e-02, -1.701e-01, -2.045e-01, -1.753e-01, -6.869e-03, 4.030e-01)); + r += mul(s2_2, M4(1.061e-02, -1.051e-02, 1.275e-02, -7.823e-03, -2.037e-03, 1.164e-02, -3.866e-03, -1.300e-02, 3.747e-02, -1.642e-02, 2.196e-02, 2.024e-02, 5.399e-02, 1.077e-02, -8.838e-02, -3.091e-02)); + r += mul(s2_3, M4(-3.355e-03, -6.519e-02, 9.655e-03, -2.412e-02, -2.784e-01, 3.036e-01, -6.591e-02, 5.922e-02, 5.079e-02, 2.057e-01, -4.058e-02, -1.246e-01, 8.695e-02, 2.195e-01, 1.155e-02, -3.856e-02)); + r += mul(s2_4, M4(-1.359e-01, -6.361e-02, -2.067e-02, 6.341e-02, -4.197e-01, 2.943e-01, -6.616e-02, 2.731e-01, -9.448e-02, -2.046e-01, -1.120e-01, -4.872e-01, 5.110e-01, 5.573e-02, -2.966e-02, -2.035e-01)); + r += mul(s2_5, M4(-2.759e-02, 1.413e-02, -3.434e-02, 1.415e-02, 4.479e-02, -3.119e-02, -3.333e-02, 6.124e-02, -6.661e-03, 6.655e-02, 1.934e-03, 9.448e-04, 2.794e-02, -2.752e-02, 2.138e-01, -8.479e-03)); + r += mul(s2_6, M4(2.669e-04, 5.510e-03, 1.853e-02, 7.035e-03, -1.453e-01, -4.035e-02, -4.686e-02, 3.419e-02, 7.741e-02, -4.406e-02, -4.407e-02, -3.379e-02, -1.623e-02, 3.556e-02, -4.110e-03, 2.244e-03)); + r += mul(s2_7, M4(7.366e-02, 3.837e-02, -9.561e-04, -6.028e-02, 1.728e-04, 3.019e-02, -4.455e-02, 5.838e-02, 2.192e-01, 1.857e-01, -3.371e-02, 1.257e-02, -1.802e-01, -1.366e-02, -5.060e-02, 1.007e-01)); + r += mul(s2_8, M4(4.923e-03, 1.485e-02, 1.286e-02, -3.308e-02, -7.387e-03, 1.748e-02, -2.415e-02, 3.404e-02, 1.977e-02, -9.480e-02, -2.739e-02, -2.710e-02, -4.596e-03, 3.783e-03, -2.672e-02, 1.560e-03)); + r += mul(s3_0, M4(1.821e-01, -1.726e-02, 6.138e-02, -2.267e-01, 3.341e-03, 3.308e-02, 2.540e-02, 5.495e-02, -6.068e-02, 1.206e-02, 3.137e-02, 9.503e-02, -8.975e-03, 4.392e-03, 3.276e-03, 8.800e-02)); + r += mul(s3_1, M4(-8.920e-02, 2.005e-02, 2.098e-01, 2.013e-01, -6.256e-02, 5.254e-02, -7.322e-02, -6.087e-02, -1.509e-01, -6.047e-02, -1.060e-01, 1.994e-01, -3.721e-02, -6.304e-02, -7.775e-02, 1.860e-02)); + r += mul(s3_2, M4(3.821e-02, 2.521e-02, -2.931e-01, -6.716e-02, 6.188e-02, -1.317e-02, 1.047e-01, -1.675e-02, -6.626e-02, 1.678e-02, 3.259e-02, 9.227e-02, 3.036e-02, 1.550e-02, 8.623e-02, 3.820e-02)); + r += mul(s3_3, M4(-3.170e-01, 1.141e-01, -2.329e-01, -8.852e-03, -2.495e-01, 3.291e-01, -1.958e-02, -5.070e-02, 6.349e-02, 1.136e-01, 2.229e-02, -8.225e-02, -9.059e-02, 2.346e-01, 2.239e-02, 4.994e-02)); + r += mul(s3_4, M4(4.100e-01, -5.625e-01, -8.633e-02, -1.264e-01, -5.683e-01, 3.076e-01, 6.387e-01, 7.517e-02, 1.931e-01, 6.261e-02, 9.888e-02, -1.452e-01, -6.821e-03, 1.317e-01, -1.053e-01, -8.811e-02)); + r += mul(s3_5, M4(-9.594e-02, 8.279e-02, 1.886e-01, -4.391e-02, -2.549e-02, -3.722e-02, -5.847e-02, 1.530e-01, 1.440e-02, -1.658e-03, -5.252e-02, -3.569e-03, -8.633e-02, -3.385e-02, -6.311e-02, -2.457e-02)); + r += mul(s3_6, M4(1.305e-01, -1.413e-02, 2.234e-02, -3.575e-02, -2.280e-01, -1.338e-01, -7.509e-02, 5.110e-02, 1.254e-02, -9.590e-02, -6.164e-02, -8.308e-03, -2.046e-02, 2.761e-02, -2.675e-02, 3.092e-02)); + r += mul(s3_7, M4(2.543e-02, -3.494e-02, -5.345e-02, -2.582e-02, -1.284e-02, -2.036e-01, 1.160e-01, 7.207e-02, 4.682e-02, 1.265e-01, 4.585e-02, -1.319e-02, -6.320e-02, 9.944e-03, 3.594e-02, 5.627e-02)); + r += mul(s3_8, M4(-1.190e-02, -1.152e-02, -6.325e-02, -1.505e-02, -2.987e-02, -4.424e-03, -7.566e-03, 1.253e-02, -2.009e-02, -4.585e-02, 2.023e-02, 1.135e-02, 5.201e-02, 1.751e-02, -7.444e-03, 1.437e-02)); + r += V4(1.496e-03, 1.976e-03, -2.228e-03, 2.339e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.417e-02, 2.377e-02, 7.056e-02, -1.708e-01, -4.606e-04, 1.689e-02, 4.567e-02, -1.430e-02, 2.483e-02, 3.123e-02, 4.566e-02, 2.764e-02, -1.146e-01, -3.580e-02, -1.005e-01, 6.299e-02)); + r += mul(s0_1, M4(9.670e-02, -8.422e-02, 1.885e-02, 2.737e-01, -6.124e-02, -3.411e-02, 1.177e-02, -4.768e-02, 8.543e-02, -8.313e-02, 4.539e-02, -1.623e-02, 2.000e-01, -5.159e-02, -1.927e-01, 2.705e-01)); + r += mul(s0_2, M4(-6.511e-03, 2.374e-02, -3.419e-02, 5.751e-02, 3.509e-03, -5.322e-03, -2.503e-02, -5.954e-02, 7.219e-02, -3.067e-02, -1.468e-02, 2.907e-03, -1.977e-01, -3.698e-02, -8.116e-02, 1.209e-02)); + r += mul(s0_3, M4(-7.616e-03, 1.184e-01, -8.993e-02, 4.361e-02, 1.499e-02, -4.169e-03, -4.891e-02, 3.895e-02, 3.504e-02, -2.033e-02, -3.223e-02, -6.037e-02, -7.856e-02, -5.490e-02, 6.653e-02, 8.377e-02)); + r += mul(s0_4, M4(1.822e-01, -2.446e-01, -2.585e-01, 1.245e-01, -5.140e-02, 1.930e-01, 1.942e-02, 1.680e-02, -2.178e-01, 1.537e-01, -1.011e+00, -4.548e-01, 3.506e-01, 2.489e-01, 2.640e-01, 1.546e-01)); + r += mul(s0_5, M4(7.701e-02, -3.282e-02, -4.543e-02, -4.894e-02, 3.782e-03, -9.876e-04, 5.202e-02, -2.252e-02, 2.125e-01, 1.396e-01, -7.587e-02, -2.342e-01, -2.900e-01, 5.066e-02, 2.059e-01, -1.003e-01)); + r += mul(s0_6, M4(1.741e-02, -5.800e-02, -1.900e-02, -1.103e-01, -1.544e-02, 1.248e-02, -2.188e-02, 2.753e-02, -6.159e-03, 3.643e-02, 2.510e-02, -1.613e-02, -1.972e-02, -5.235e-02, -3.681e-02, 5.366e-02)); + r += mul(s0_7, M4(1.740e-02, 1.441e-01, 3.796e-02, 2.267e-02, -2.051e-02, -1.564e-01, -6.488e-02, -6.801e-02, 3.368e-02, -6.264e-02, 1.791e-02, -5.944e-02, -4.683e-02, -1.256e-02, -6.540e-02, 3.546e-02)); + r += mul(s0_8, M4(-6.253e-02, -2.709e-02, 3.707e-02, 1.718e-02, -3.430e-02, 3.417e-02, -5.257e-02, -3.959e-04, 2.007e-01, -7.594e-02, -3.454e-02, -2.327e-05, -2.480e-02, 5.655e-02, -4.720e-02, 3.006e-02)); + r += mul(s1_0, M4(3.999e-03, 1.203e-03, -9.676e-03, -2.231e-02, 3.772e-02, 2.693e-02, 2.818e-02, 2.641e-02, -3.407e-02, -1.765e-02, -9.486e-04, -5.857e-03, -8.413e-02, 6.513e-03, -2.523e-02, 2.071e-02)); + r += mul(s1_1, M4(6.666e-02, -5.555e-02, -1.008e-01, 2.823e-01, -4.945e-02, 3.166e-03, 1.133e-01, -3.291e-01, 1.761e-01, -1.626e-02, 6.843e-02, 2.767e-02, 2.024e-01, -9.546e-02, -1.752e-01, 6.617e-02)); + r += mul(s1_2, M4(-1.210e-01, -6.037e-02, 5.096e-02, 8.161e-02, 1.310e-01, -2.582e-02, -6.073e-02, 5.804e-02, 5.883e-02, -5.329e-02, -1.231e-02, -2.863e-02, -5.061e-02, -2.119e-02, -9.351e-02, -8.943e-02)); + r += mul(s1_3, M4(-3.971e-02, -1.009e-01, -6.313e-02, 1.450e-01, 6.074e-02, -6.618e-02, -6.086e-04, 1.100e-01, 7.771e-02, 9.246e-02, -8.459e-02, -7.042e-03, -1.138e-01, 1.398e-01, -5.286e-02, 6.421e-02)); + r += mul(s1_4, M4(-3.669e-01, -6.429e-02, 4.852e-02, -5.484e-01, -1.444e-01, 5.232e-01, -2.359e-02, -2.425e-01, 3.525e-01, 1.542e-01, -1.489e-01, 1.304e-01, 2.430e-01, 3.256e-01, 2.770e-01, 4.015e-02)); + r += mul(s1_5, M4(-1.860e-01, 9.205e-02, -6.804e-02, 2.132e-01, 3.018e-01, -1.301e-02, 2.932e-01, -4.531e-01, -1.165e-01, 1.096e-01, -5.766e-02, 2.676e-02, -8.117e-02, -4.090e-02, 7.593e-02, -1.031e-01)); + r += mul(s1_6, M4(2.984e-02, 5.976e-03, 2.771e-02, -1.520e-02, 9.423e-03, 7.899e-02, -3.400e-02, -1.421e-01, -1.602e-02, -1.316e-02, -1.356e-02, -1.758e-02, -3.187e-02, -8.554e-02, -6.954e-02, 6.126e-02)); + r += mul(s1_7, M4(3.484e-03, 5.750e-02, 4.953e-02, 1.427e-02, 5.264e-02, 1.119e-01, -1.563e-01, -1.832e-02, 7.184e-02, 1.238e-01, 1.384e-03, -2.950e-03, -2.796e-02, -1.510e-02, -5.581e-02, 5.907e-03)); + r += mul(s1_8, M4(-2.979e-02, -1.805e-01, 4.289e-02, 7.547e-02, 1.453e-01, 1.155e-01, -6.317e-02, 2.787e-03, 6.409e-02, -2.811e-02, 2.758e-02, 1.464e-02, -4.431e-02, 2.005e-03, -3.745e-02, 6.827e-03)); + r += mul(s2_0, M4(2.466e-02, 3.345e-02, 1.015e-02, 1.515e-02, 1.833e-02, 8.333e-03, -2.040e-03, 6.401e-02, 2.044e-02, -6.089e-02, -5.738e-02, 3.871e-02, -1.920e-02, 5.305e-02, 5.707e-02, 2.513e-02)); + r += mul(s2_1, M4(7.018e-02, -2.544e-02, -2.494e-02, 1.088e-01, -3.509e-02, -8.734e-03, -1.365e-02, -9.361e-02, -2.252e-03, -1.209e-02, -3.689e-02, 7.149e-02, -3.120e-02, 2.036e-02, 4.506e-02, -3.936e-01)); + r += mul(s2_2, M4(-8.268e-03, -1.327e-02, 2.513e-02, -4.868e-03, -2.935e-02, 8.804e-03, -5.610e-03, 2.269e-02, 6.841e-03, -6.307e-04, -6.629e-02, -4.883e-03, 2.400e-02, -7.430e-04, -8.010e-03, 7.980e-02)); + r += mul(s2_3, M4(-1.232e-02, -2.083e-02, -1.672e-02, -2.160e-02, 1.786e-02, -2.434e-01, -7.226e-02, 1.562e-01, 9.118e-02, -1.116e-01, -6.319e-02, 2.136e-01, -3.159e-03, -1.786e-01, -6.991e-02, 4.110e-01)); + r += mul(s2_4, M4(-2.275e-02, -2.278e-02, -1.567e-01, 9.989e-02, -1.908e-01, 5.677e-02, -1.271e-01, -2.123e-02, -1.454e-01, 2.650e-01, 2.278e-01, -1.885e-01, -9.669e-02, 1.841e-01, 7.851e-02, -6.762e-01)); + r += mul(s2_5, M4(-3.343e-02, 3.704e-02, -3.857e-02, 8.299e-03, 2.396e-02, -5.124e-02, -5.434e-02, 7.997e-02, 3.406e-02, -4.136e-02, 1.870e-02, 3.525e-02, -6.752e-02, -8.309e-02, 1.068e-01, 3.379e-03)); + r += mul(s2_6, M4(7.981e-03, -4.048e-02, 3.325e-02, -9.389e-03, -1.876e-02, 2.147e-02, -6.131e-02, 9.452e-02, 1.583e-02, 1.418e-01, -1.309e-02, 1.419e-02, -4.582e-03, -4.512e-03, -3.895e-02, 4.452e-02)); + r += mul(s2_7, M4(-3.083e-03, 3.093e-03, 2.493e-02, 3.899e-02, 4.875e-02, -1.968e-01, -1.069e-01, 6.543e-02, -1.266e-01, -2.185e-01, -9.342e-02, 9.424e-02, -5.126e-02, -1.718e-02, 9.158e-03, -3.601e-02)); + r += mul(s2_8, M4(-3.967e-02, -3.457e-02, 2.545e-02, 2.582e-02, 1.908e-03, 3.888e-02, -4.385e-02, -2.935e-02, 7.741e-02, 3.255e-02, -3.235e-02, 6.256e-02, 1.187e-02, -2.497e-02, -3.396e-02, 1.806e-02)); + r += mul(s3_0, M4(1.117e-02, -7.495e-02, -4.312e-02, 6.636e-05, -3.893e-02, -4.891e-02, -8.095e-02, 8.555e-02, -1.992e-02, 1.728e-03, -3.447e-02, -8.472e-02, -1.013e-02, 2.594e-02, 6.256e-02, 1.169e-01)); + r += mul(s3_1, M4(-1.737e-01, 9.502e-02, -3.787e-02, 1.131e-01, 4.237e-02, 1.059e-02, -8.165e-02, 9.365e-03, 1.581e-03, 1.907e-01, 1.439e-01, -2.903e-02, 9.400e-03, -4.874e-02, -1.740e-02, -9.145e-02)); + r += mul(s3_2, M4(2.043e-01, 1.205e-03, -3.967e-02, 5.084e-02, -3.772e-02, -1.594e-02, -2.356e-02, 4.866e-02, -3.912e-02, -1.182e-01, 1.228e-01, 2.044e-02, -2.094e-02, -6.302e-03, -8.608e-03, 4.632e-02)); + r += mul(s3_3, M4(1.028e-02, -2.428e-01, 4.209e-02, 2.619e-01, 8.150e-02, -2.480e-01, -3.820e-02, 9.968e-02, 4.816e-02, -4.436e-02, -3.762e-03, 6.175e-02, -3.872e-03, -2.305e-01, -1.069e-01, 1.947e-01)); + r += mul(s3_4, M4(-7.723e-01, 9.837e-02, 3.011e-02, 1.353e-01, -1.254e-01, 2.501e-01, 1.086e-02, -4.026e-01, -2.775e-02, -2.070e-01, -1.837e-01, -1.658e-01, 1.984e-02, 1.307e-01, 1.635e-01, -1.135e-01)); + r += mul(s3_5, M4(1.416e-02, -9.369e-02, 1.069e-01, -8.305e-02, 1.437e-01, -3.897e-02, -6.218e-02, 3.772e-02, 6.288e-02, 1.493e-01, -5.597e-02, 7.343e-02, -8.453e-02, 4.008e-02, -1.136e-02, 1.524e-02)); + r += mul(s3_6, M4(6.399e-02, 1.405e-01, -1.857e-03, 3.541e-02, -1.237e-02, 1.110e-01, -4.929e-02, 2.277e-02, 1.195e-02, 1.679e-01, 2.287e-02, 1.645e-02, 8.215e-03, -2.489e-04, -3.430e-02, -2.937e-02)); + r += mul(s3_7, M4(-5.599e-02, 3.509e-01, 7.784e-02, 3.488e-02, 5.222e-01, -1.892e-01, 1.668e-02, 1.938e-04, -8.913e-03, -3.322e-02, -6.398e-02, -2.276e-02, 5.969e-04, -1.078e-01, -5.033e-03, 6.744e-02)); + r += mul(s3_8, M4(-1.150e-02, -5.770e-02, -1.917e-02, 1.003e-01, 2.864e-02, -6.083e-04, -2.847e-02, -1.435e-02, 4.296e-02, -9.737e-04, -4.802e-02, 6.634e-02, 4.267e-02, -3.589e-02, 1.502e-02, 1.632e-02)); + r += V4(-1.351e-03, -1.537e-03, -2.278e-03, 6.430e-04); + return r; +} + +void Pass7(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 8 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0, t1 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.665e-02, -8.034e-03, -1.825e-02, 3.618e-03, -1.946e-03, -9.426e-03, -8.760e-03, 1.811e-02, 5.287e-02, -2.521e-02, 1.465e-02, -2.874e-02, -1.678e-04, -5.970e-03, -2.613e-03, -4.475e-03)); + r += mul(s0_1, M4(1.441e-01, 5.668e-02, 9.456e-03, -6.067e-02, 2.085e-01, 9.162e-02, 3.905e-02, -2.220e-02, -9.590e-02, -4.507e-02, -2.980e-02, 3.405e-02, 5.457e-02, 1.862e-02, 2.969e-02, -3.874e-03)); + r += mul(s0_2, M4(-3.635e-02, 4.605e-02, -2.088e-02, -1.038e-03, -5.091e-02, 3.757e-02, -2.177e-02, -3.267e-03, 3.186e-02, 3.149e-03, 7.400e-03, 3.707e-03, -2.361e-02, -4.816e-03, -1.245e-02, 1.252e-02)); + r += mul(s0_3, M4(-2.961e-02, -3.446e-03, 9.278e-03, -3.365e-02, 1.645e-03, -1.782e-02, -5.623e-03, -4.475e-02, 2.640e-02, -1.904e-02, -1.195e-01, 1.154e-01, -2.134e-02, 6.999e-03, -1.240e-02, -2.264e-02)); + r += mul(s0_4, M4(-4.989e-01, -3.369e-01, 5.449e-01, 3.551e-01, 2.530e-01, 1.923e-01, 4.218e-01, 2.627e-01, -4.863e-02, 1.521e-01, 6.711e-02, -2.250e-01, -8.437e-02, -2.255e-01, -1.239e-02, 7.682e-02)); + r += mul(s0_5, M4(-3.139e-02, -1.480e-01, -2.284e-02, 8.665e-02, -4.610e-02, 4.704e-02, -3.240e-02, 8.506e-02, 1.377e-03, 2.643e-02, 2.989e-03, 8.917e-03, -2.035e-02, 3.411e-02, 1.877e-02, -4.490e-02)); + r += mul(s0_6, M4(9.737e-03, 1.686e-02, -3.094e-02, 2.318e-02, -6.414e-03, -2.440e-03, 2.925e-03, -1.545e-03, 4.439e-02, 9.093e-04, 3.674e-01, -3.203e-01, 5.221e-02, 2.167e-02, 4.061e-02, 4.237e-02)); + r += mul(s0_7, M4(2.009e-02, -4.379e-03, 6.470e-02, -6.519e-02, -2.176e-02, -8.537e-03, 2.645e-02, 1.331e-02, 3.747e-02, 4.555e-02, 3.777e-02, -1.084e-02, 1.072e-01, 1.385e-01, 1.934e-02, -9.520e-02)); + r += mul(s0_8, M4(1.143e-02, 1.461e-02, -4.811e-02, 1.175e-02, -1.676e-02, -2.844e-02, -4.268e-02, -2.958e-02, -5.801e-03, 1.108e-03, -1.108e-02, 4.385e-02, -9.585e-03, -1.232e-02, -3.864e-02, 1.812e-02)); + r += mul(s1_0, M4(6.319e-04, -1.321e-02, -4.810e-03, 3.134e-03, 1.949e-02, 8.926e-04, 3.095e-04, 1.404e-02, 2.945e-02, -1.625e-02, -1.022e-03, -8.526e-03, 1.548e-02, -1.219e-02, -9.063e-03, -8.040e-03)); + r += mul(s1_1, M4(1.512e-01, 5.866e-02, 2.923e-02, -2.301e-02, 1.588e-01, 1.093e-01, 4.457e-02, -2.858e-02, -9.663e-02, -4.469e-02, -3.315e-02, 1.949e-02, 6.519e-02, 4.569e-02, 2.538e-02, -2.218e-02)); + r += mul(s1_2, M4(-4.665e-02, 2.594e-02, -1.993e-02, 1.175e-02, -3.333e-02, 2.913e-02, -9.827e-03, 3.326e-05, 3.552e-02, -3.633e-03, 8.832e-03, 9.121e-04, -2.972e-02, -8.319e-03, -1.437e-02, 1.409e-02)); + r += mul(s1_3, M4(-1.715e-02, 2.335e-02, -1.714e-02, -3.837e-02, -8.227e-02, 8.352e-04, 6.854e-04, -3.553e-02, 3.693e-02, -8.114e-02, -1.259e-02, 1.459e-02, -1.087e-01, 3.946e-04, 2.406e-02, -1.555e-02)); + r += mul(s1_4, M4(-4.320e-02, -1.420e-01, 1.501e-01, 1.979e-01, 4.555e-01, 5.655e-02, 4.005e-01, 4.561e-01, -6.498e-02, 1.753e-01, 7.693e-02, -2.573e-01, -1.655e-01, -3.975e-01, -6.450e-03, 1.195e-01)); + r += mul(s1_5, M4(-3.430e-02, -4.627e-02, -3.033e-02, 9.612e-03, -4.261e-02, 9.353e-02, -5.574e-02, 7.956e-02, -1.190e-03, 1.946e-02, 9.613e-03, 2.960e-02, -1.353e-02, 6.958e-02, 1.888e-02, -3.233e-02)); + r += mul(s1_6, M4(-3.424e-03, 8.490e-03, 4.527e-03, 2.695e-02, 9.218e-03, -3.838e-04, 5.782e-03, -1.419e-02, 6.274e-02, -1.145e-02, 4.382e-02, -1.952e-02, -1.567e-03, 1.579e-02, 1.813e-01, 3.017e-02)); + r += mul(s1_7, M4(3.557e-02, 1.086e-02, -6.621e-02, -9.656e-02, 4.175e-02, 5.237e-02, -1.381e-01, -1.110e-01, 3.749e-02, 5.361e-02, 3.329e-02, 8.597e-02, -9.733e-03, -8.791e-02, 1.724e-01, 3.238e-01)); + r += mul(s1_8, M4(1.282e-02, 7.556e-03, -2.733e-02, -1.689e-02, -3.745e-02, -4.170e-02, -3.300e-02, -4.508e-03, 3.246e-03, 5.182e-03, -1.597e-02, 3.762e-02, 3.515e-03, 8.707e-04, -2.837e-02, 2.238e-02)); + r += mul(s2_0, M4(2.606e-02, -4.157e-03, 1.889e-02, -1.571e-02, -1.264e-03, 1.164e-02, 3.781e-02, 2.570e-02, -1.546e-02, 5.382e-03, 3.603e-02, 2.516e-02, 6.918e-03, 1.161e-02, 3.155e-03, -1.756e-03)); + r += mul(s2_1, M4(-4.998e-02, -6.958e-02, -5.387e-02, 2.511e-02, -1.032e-02, -1.567e-01, 1.284e-01, 4.528e-02, -5.753e-02, 1.843e-02, 6.067e-02, 1.268e-02, -8.389e-02, -3.496e-02, -1.261e-02, 1.271e-02)); + r += mul(s2_2, M4(2.201e-02, 1.038e-02, 1.562e-03, 6.786e-03, 3.414e-02, 6.890e-02, -2.765e-02, 2.985e-02, 1.279e-02, -3.082e-02, 1.793e-03, 9.163e-03, 2.754e-02, -2.456e-02, 7.180e-03, 1.141e-03)); + r += mul(s2_3, M4(6.714e-02, 1.266e-02, 1.392e-03, 7.777e-02, 1.648e-02, -3.943e-02, 7.426e-02, -1.875e-02, 3.643e-01, 7.651e-02, -3.760e-01, -3.663e-02, -2.532e-02, -5.826e-03, 1.486e-02, 2.171e-02)); + r += mul(s2_4, M4(3.235e-01, -3.337e-01, 2.851e-01, -4.555e-01, 2.373e-01, 3.584e-01, -4.600e-01, 1.548e-01, 5.534e-01, 6.629e-01, 9.779e-03, -4.056e-01, 8.883e-02, -1.558e-01, -1.062e-01, -1.018e-01)); + r += mul(s2_5, M4(-4.431e-02, 1.741e-02, -2.464e-02, 4.318e-02, -1.063e-02, -9.543e-02, 9.203e-02, 3.816e-03, -8.191e-02, 3.298e-02, -3.944e-02, -1.789e-02, -3.580e-02, 9.644e-02, 2.248e-02, -5.676e-03)); + r += mul(s2_6, M4(-1.707e-02, -1.871e-03, 6.918e-03, -9.750e-03, 1.813e-02, 1.149e-02, 1.491e-02, -2.354e-03, -6.421e-02, -2.200e-03, 6.571e-02, 3.823e-02, 1.606e-02, 1.188e-02, -2.462e-04, -6.835e-03)); + r += mul(s2_7, M4(-1.709e-02, -3.317e-02, 7.031e-02, -7.599e-02, -3.559e-03, 5.356e-03, 1.223e-01, 5.016e-03, -3.943e-02, -5.634e-02, 6.522e-02, 1.959e-02, 2.838e-03, 3.219e-02, 8.960e-02, -2.077e-02)); + r += mul(s2_8, M4(-1.128e-02, -1.450e-02, -2.515e-02, -1.278e-02, 1.182e-02, -6.085e-03, -3.478e-02, -7.803e-05, 2.175e-03, -1.893e-02, -3.165e-02, 1.144e-02, -4.075e-03, -1.776e-02, -2.098e-02, 2.191e-02)); + r += mul(s3_0, M4(3.673e-02, -7.948e-03, 1.825e-02, -2.366e-02, -4.290e-03, 1.377e-02, 8.323e-03, 1.511e-02, -5.298e-03, 2.701e-03, 8.641e-03, 4.330e-03, 1.059e-03, 1.412e-02, -7.184e-03, 3.384e-04)); + r += mul(s3_1, M4(-1.180e-01, -1.494e-02, -2.777e-02, 3.327e-02, 2.241e-02, -4.968e-02, -2.362e-02, 6.181e-02, 3.152e-02, 5.492e-03, 1.043e-02, -2.723e-02, -6.920e-02, -5.721e-02, -2.918e-03, 9.618e-03)); + r += mul(s3_2, M4(4.164e-02, 4.799e-03, 7.672e-03, 6.442e-03, 2.289e-02, -5.849e-03, -1.497e-02, 3.935e-02, -9.540e-03, -5.052e-03, -6.008e-03, 3.086e-04, 1.775e-02, 5.478e-03, 6.428e-03, 3.996e-03)); + r += mul(s3_3, M4(-4.285e-02, 2.518e-02, -6.202e-02, 7.750e-02, 3.723e-02, -1.612e-02, 1.900e-03, 1.359e-02, 3.029e-03, 2.731e-03, -5.163e-03, -2.669e-02, -3.451e-03, 1.188e-02, 2.654e-02, 4.026e-02)); + r += mul(s3_4, M4(1.114e-01, 2.722e-02, 8.689e-02, -2.510e-01, 3.922e-02, 1.319e-01, 1.630e-01, -7.910e-02, 2.532e-01, 1.401e-01, 1.096e-01, 1.772e-01, 3.884e-01, -4.444e-01, 5.710e-02, -2.594e-01)); + r += mul(s3_5, M4(-3.528e-02, 1.519e-03, -1.285e-02, 2.613e-02, -5.286e-02, 3.211e-02, 1.899e-02, -8.061e-03, -6.238e-02, 7.851e-03, -1.807e-02, -3.191e-02, -3.676e-02, 5.701e-02, 6.615e-03, 1.207e-02)); + r += mul(s3_6, M4(-8.811e-03, -2.217e-03, -8.144e-03, -2.373e-02, 1.499e-02, 6.994e-03, 2.934e-02, -5.142e-03, -6.236e-03, 7.265e-04, -2.347e-03, 1.380e-02, 1.357e-02, 1.125e-02, 7.815e-03, 1.489e-03)); + r += mul(s3_7, M4(-2.411e-02, -3.528e-02, -1.130e-02, 2.333e-02, 1.379e-02, 2.206e-02, 9.893e-03, 3.702e-02, -2.773e-02, -7.861e-04, 9.765e-02, 7.302e-03, 3.641e-02, 2.749e-02, 2.540e-01, -1.831e-01)); + r += mul(s3_8, M4(-2.727e-03, -1.083e-02, -1.462e-02, -1.577e-02, -2.030e-03, -6.144e-03, -2.325e-02, 1.531e-02, 2.263e-04, -1.137e-02, -2.704e-02, -1.031e-02, -1.318e-02, -8.554e-03, -3.022e-02, 1.633e-02)); + r += V4(1.205e-04, 8.082e-04, 5.443e-04, -3.668e-04); + return tanh(r); +} + +void Pass8(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-8x16C-NVL-DN.hlsl b/src/Effects/CuNNy/CuNNy-8x16C-NVL-DN.hlsl new file mode 100644 index 000000000..2ab594bec --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-8x16C-NVL-DN.hlsl @@ -0,0 +1,4027 @@ +// CuNNy 8x16C BILINEAR RGB NVL DN - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-DN-D16N08 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t2; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t3; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t4; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t5; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t6; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t7; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0, t1, t2, t3 + +#define l0(x, y) min16float((dot(float3(-1.941e-01, -3.865e-01, -8.377e-02), O(INPUT, float2(x, y)).rgb) + 2.427e-01)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(3.079e-02, -3.606e-03, 7.865e-02, -7.318e-03) * s0_0; + r += V4(-8.047e-03, 1.049e-02, -8.037e-02, 1.078e-02) * s0_1; + r += V4(2.018e-03, -5.329e-03, 8.914e-02, -2.185e-03) * s0_2; + r += V4(2.427e-01, -1.099e-01, -4.625e-02, -3.691e-01) * s0_3; + r += V4(-2.399e-01, 1.115e-01, -4.162e-02, 3.721e-01) * s0_4; + r += V4(-1.869e-02, -1.139e-02, 3.210e-02, 2.956e-04) * s0_5; + r += V4(1.169e-01, -2.234e-01, -1.335e-02, -1.347e-02) * s0_6; + r += V4(-1.421e-01, 2.203e-01, -5.437e-02, -6.029e-04) * s0_7; + r += V4(8.644e-03, 1.055e-02, -3.855e-04, 9.967e-03) * s0_8; + r += V4(8.018e-03, 1.569e-02, 1.090e-02, -7.794e-05); + return r; +} + +V4 f1(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(7.324e-02, 1.679e-02, -2.775e-02, -4.778e-02) * s0_0; + r += V4(2.549e-01, 4.843e-02, 5.584e-02, 7.739e-02) * s0_1; + r += V4(1.709e-02, -8.911e-02, -4.865e-02, -1.851e-02) * s0_2; + r += V4(-6.274e-02, -7.121e-02, 1.284e-01, 1.029e-01) * s0_3; + r += V4(-2.921e-01, -1.194e-01, -6.886e-02, 2.502e-01) * s0_4; + r += V4(-1.090e-02, -3.670e-02, 7.626e-02, 3.154e-02) * s0_5; + r += V4(9.342e-03, 4.821e-02, -1.915e-02, -6.984e-02) * s0_6; + r += V4(5.284e-03, 6.002e-02, -4.112e-02, 1.237e-01) * s0_7; + r += V4(-5.183e-03, 1.393e-01, 8.273e-03, -1.178e-02) * s0_8; + r += V4(1.318e-02, 1.522e-02, 4.021e-03, -8.088e-02); + return r; +} + +V4 f2(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(1.577e-01, 5.275e-03, -8.819e-02, -1.712e-02) * s0_0; + r += V4(-1.033e-01, 1.714e-01, -2.117e-01, -2.830e-02) * s0_1; + r += V4(-6.247e-02, -2.103e-02, -2.257e-03, 4.623e-02) * s0_2; + r += V4(-2.031e-02, 7.706e-02, -6.031e-02, -3.312e-01) * s0_3; + r += V4(-1.796e-01, 2.363e-01, 1.926e-02, 1.713e-01) * s0_4; + r += V4(9.624e-02, 8.355e-02, 1.085e-01, 1.431e-01) * s0_5; + r += V4(3.225e-04, -7.018e-02, 3.764e-02, -6.763e-02) * s0_6; + r += V4(5.291e-02, -4.092e-01, 1.667e-01, 9.630e-02) * s0_7; + r += V4(7.371e-03, -7.103e-02, 2.365e-02, -1.196e-02) * s0_8; + r += V4(1.425e-02, -3.870e-03, -1.067e-02, 7.370e-03); + return r; +} + +V4 f3(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(2.076e-03, 1.123e-02, 5.660e-02, -2.469e-02) * s0_0; + r += V4(8.450e-02, -8.344e-03, 3.408e-01, 1.068e-01) * s0_1; + r += V4(-8.521e-02, -8.863e-02, -8.628e-03, 4.745e-02) * s0_2; + r += V4(4.301e-02, 1.143e-02, -6.893e-02, -1.578e-01) * s0_3; + r += V4(8.960e-02, 3.037e-01, -3.176e-01, 4.196e-02) * s0_4; + r += V4(-1.172e-01, -2.426e-01, -8.905e-03, 2.253e-01) * s0_5; + r += V4(-4.505e-02, -7.804e-03, 1.783e-02, -8.501e-02) * s0_6; + r += V4(-1.606e-01, -1.452e-02, -1.241e-02, -1.597e-01) * s0_7; + r += V4(1.909e-01, 3.577e-02, 6.756e-03, 1.028e-02) * s0_8; + r += V4(-4.116e-03, 1.699e-02, -1.220e-02, -2.878e-03); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.048e-02, -4.829e-03, 1.116e-01, 1.424e-01, 4.054e-02, -1.403e-02, -1.093e-02, -4.677e-02, 1.253e-01, 2.540e-02, 1.268e-02, 7.303e-02, 6.252e-02, 1.872e-01, -8.308e-02, -1.259e-01)); + r += mul(s0_1, M4(-4.819e-02, 9.732e-02, 2.710e-03, 4.293e-02, -4.048e-02, -7.517e-02, -1.502e-01, -9.363e-02, -1.166e-01, -4.531e-02, 1.796e-01, 2.567e-02, -2.013e-01, 1.364e-01, -4.303e-02, -2.691e-02)); + r += mul(s0_2, M4(1.476e-01, 3.921e-02, 1.519e-01, 5.807e-02, 3.850e-02, -2.336e-01, -2.001e-01, 6.151e-03, 5.383e-02, 6.078e-02, -1.792e-01, -1.423e-01, 8.391e-02, 2.738e-01, 1.784e-01, -7.098e-02)); + r += mul(s0_3, M4(4.426e-04, -1.720e-01, -5.887e-02, 1.816e-02, 1.950e-02, 2.097e-01, -5.867e-02, 1.822e-02, -4.042e-03, -1.147e-02, -6.860e-02, 3.117e-02, 8.652e-02, 1.681e-01, -6.179e-02, -8.912e-02)); + r += mul(s0_4, M4(-2.212e-01, 2.744e-01, -3.287e-02, 1.385e-02, -2.971e-03, 2.905e-02, 4.532e-02, 4.762e-02, -2.109e-02, 5.882e-02, -4.547e-02, -2.030e-02, 1.673e-01, 6.945e-02, -3.483e-01, -4.171e-01)); + r += mul(s0_5, M4(-4.755e-02, 2.782e-01, 8.658e-02, -3.461e-01, 2.831e-02, 7.165e-03, 5.473e-02, 9.912e-02, -5.727e-02, 4.351e-02, -1.463e-01, 1.089e-01, -7.061e-02, 3.919e-01, 1.370e-01, 6.642e-02)); + r += mul(s0_6, M4(1.101e-01, 5.662e-02, -7.494e-02, -5.565e-03, 1.182e-01, -4.261e-02, 5.721e-02, 2.424e-02, 1.077e-01, 1.178e-02, 3.188e-02, -1.370e-02, 2.044e-02, 8.463e-02, -9.320e-02, 1.874e-02)); + r += mul(s0_7, M4(1.014e-02, 1.127e-01, 3.595e-02, -2.266e-01, -9.668e-02, 3.770e-02, 1.041e-01, 2.042e-02, -9.701e-02, -2.058e-02, -4.635e-02, -1.055e-01, -2.100e-01, -1.200e-01, 1.076e-01, 5.583e-01)); + r += mul(s0_8, M4(1.595e-01, -1.046e-02, -1.695e-02, 2.403e-02, -3.490e-02, -1.954e-01, -1.325e-02, -1.459e-02, 1.135e-02, -1.278e-01, 3.800e-02, -4.423e-02, -8.423e-02, -4.600e-01, -3.075e-02, 2.350e-01)); + r += mul(s1_0, M4(1.528e-02, -1.405e-02, -6.905e-02, -5.089e-02, 1.860e-02, -1.111e-02, 8.663e-02, -5.265e-02, 7.609e-02, 6.255e-02, -1.408e-01, 1.661e-01, -5.898e-02, -2.493e-01, 5.922e-02, -4.831e-02)); + r += mul(s1_1, M4(2.318e-01, -1.965e-01, -1.191e-02, 9.833e-03, 1.083e-01, 4.304e-01, 1.066e-01, -1.447e-01, 1.249e-01, 7.425e-02, 4.746e-02, 2.711e-01, -5.051e-02, -1.087e-01, -2.176e-02, 2.572e-01)); + r += mul(s1_2, M4(7.448e-02, -1.738e-01, -1.739e-01, 8.456e-02, 2.159e-01, 4.333e-01, -1.027e-01, -3.007e-01, -4.220e-02, 2.373e-01, -1.648e-01, -5.700e-02, -4.307e-02, -2.232e-01, -7.669e-02, 1.161e-01)); + r += mul(s1_3, M4(2.425e-03, 7.625e-02, -5.265e-02, 6.556e-02, 7.800e-02, -5.827e-02, -7.143e-02, -1.148e-01, -8.064e-02, 1.301e-02, -1.257e-01, 1.360e-01, -5.066e-02, -1.077e-01, 1.705e-02, 5.452e-02)); + r += mul(s1_4, M4(2.348e-01, -1.722e-01, 7.330e-02, 5.327e-01, 1.861e-01, 1.807e-01, -3.935e-03, 9.360e-02, 6.946e-03, -2.022e-02, 1.355e-01, -1.639e-01, 4.145e-01, -1.285e-01, -1.717e-01, -5.645e-01)); + r += mul(s1_5, M4(1.669e-01, -3.700e-01, -2.197e-01, 2.970e-01, -3.760e-01, 8.613e-02, 1.677e-02, -1.083e-01, 6.986e-02, 2.914e-01, 1.835e-01, -3.219e-02, -3.564e-01, -4.215e-02, 1.380e-01, 2.394e-01)); + r += mul(s1_6, M4(6.843e-02, 5.395e-02, -3.803e-02, 1.370e-01, -1.090e-02, -1.215e-01, 1.153e-02, -4.736e-02, -6.143e-02, -1.871e-01, 2.793e-01, 1.721e-01, -8.387e-03, -7.740e-03, 1.398e-01, -3.166e-02)); + r += mul(s1_7, M4(-5.026e-02, -1.521e-02, -4.912e-02, -1.959e-01, 2.314e-02, -4.775e-02, -1.317e-01, 8.754e-02, 3.595e-02, 2.588e-01, -1.679e-01, -1.083e-01, -3.263e-02, 2.192e-03, 1.870e-01, 3.609e-01)); + r += mul(s1_8, M4(1.043e-01, 1.864e-01, -3.583e-01, 5.765e-02, -7.832e-02, -4.087e-02, -3.872e-02, 1.526e-01, -1.825e-01, -1.559e-01, -9.546e-02, 3.329e-01, 1.625e-01, -1.244e-03, -8.165e-02, -2.106e-01)); + r += mul(s2_0, M4(4.537e-02, 8.269e-02, -7.632e-02, -2.650e-02, -5.914e-02, -2.708e-02, -5.644e-02, -1.955e-02, 4.075e-01, 1.023e-01, 2.545e-01, -3.611e-02, -4.924e-01, 2.777e-01, 2.361e-01, 8.845e-02)); + r += mul(s2_1, M4(1.618e-02, -4.293e-02, -2.222e-01, 9.574e-02, 5.111e-02, -7.128e-02, -8.863e-02, -9.884e-02, 2.190e-01, 2.622e-01, 4.013e-01, -2.096e-03, -5.959e-01, -5.596e-01, 2.032e-01, -5.020e-01)); + r += mul(s2_2, M4(-2.909e-02, -4.977e-02, -1.716e-01, 7.754e-02, -3.726e-02, -6.215e-02, -2.769e-02, 4.118e-02, 3.981e-01, -9.391e-02, 3.138e-01, 6.588e-03, -5.657e-02, 3.557e-01, 5.208e-02, 1.875e-01)); + r += mul(s2_3, M4(-1.199e-02, 3.765e-02, 2.132e-01, 1.092e-01, 7.937e-02, 8.123e-02, 3.154e-02, 1.558e-01, -3.848e-01, 4.161e-02, -4.289e-01, 2.959e-01, -3.045e-01, -1.637e-01, -1.188e-01, -2.289e-01)); + r += mul(s2_4, M4(-1.106e-01, -6.425e-02, -2.421e-05, 2.504e-01, -5.712e-02, -4.485e-02, 9.015e-02, 1.155e-01, -4.607e-01, 4.294e-01, -8.332e-02, 8.106e-03, -5.400e-01, -5.825e-01, -4.254e-01, -6.588e-01)); + r += mul(s2_5, M4(-6.044e-02, -1.510e-02, -1.153e-01, 1.034e-01, -3.919e-02, 1.693e-01, -9.993e-02, 9.887e-02, -4.767e-01, -5.224e-01, -2.356e-01, -2.719e-01, 2.220e-01, -5.790e-02, -2.353e-01, 3.756e-01)); + r += mul(s2_6, M4(-3.249e-02, 1.989e-01, -9.129e-02, -2.266e-01, 9.810e-02, 1.517e-02, -6.743e-02, -7.139e-02, 5.856e-02, -1.223e-01, 1.849e-01, -2.400e-01, -6.638e-01, -1.431e-01, 7.543e-02, 7.866e-02)); + r += mul(s2_7, M4(1.129e-01, -2.504e-01, -1.268e-01, -4.894e-01, -2.064e-02, -6.057e-02, 1.197e-01, -2.185e-01, -9.475e-02, -5.783e-03, 1.411e-01, 1.097e-02, -6.736e-01, 2.996e-01, 2.517e-02, 5.610e-01)); + r += mul(s2_8, M4(7.648e-02, -8.256e-02, -1.898e-03, -6.823e-02, -3.014e-02, 2.219e-02, -1.916e-02, 9.177e-02, -1.586e-01, 6.034e-02, -1.241e-01, 8.531e-02, -9.582e-02, 1.067e+00, -1.194e-01, 4.834e-01)); + r += mul(s3_0, M4(2.375e-01, -4.091e-01, 1.832e-01, 1.283e-01, -6.383e-03, 7.594e-02, 2.752e-01, -1.586e-02, 3.441e-02, -5.528e-02, 6.215e-02, -5.897e-02, 6.831e-02, 1.216e-02, 4.011e-02, -3.186e-02)); + r += mul(s3_1, M4(6.683e-02, 2.236e-01, 3.607e-01, -7.426e-02, 2.785e-03, 1.162e-01, -2.118e-01, -1.945e-03, -1.978e-02, -3.249e-03, 1.293e-01, -1.713e-01, 1.218e-01, -1.215e-02, -1.414e-01, -6.262e-02)); + r += mul(s3_2, M4(-3.481e-02, 1.937e-01, 5.413e-02, -6.549e-02, -1.993e-02, -9.367e-02, -1.667e-01, -3.937e-02, 2.967e-02, -7.572e-02, -1.416e-01, 3.481e-02, 1.576e-01, 6.027e-02, -3.635e-03, -7.787e-02)); + r += mul(s3_3, M4(1.250e-01, -4.078e-01, 1.158e-01, -1.665e-02, -3.614e-03, -1.915e-01, 2.157e-01, -5.540e-01, -9.113e-02, 1.063e-01, 7.709e-02, 4.088e-02, 2.007e-01, -8.304e-02, -3.002e-02, -5.822e-02)); + r += mul(s3_4, M4(-4.140e-02, -8.432e-02, 2.778e-01, 2.684e-01, -2.473e-01, -6.407e-02, -1.096e-01, -1.436e-01, 7.597e-02, 3.470e-02, 1.170e-01, -8.363e-02, 1.282e-01, -5.355e-03, -3.813e-02, 2.595e-02)); + r += mul(s3_5, M4(8.598e-02, -1.880e-01, 1.063e-01, -3.625e-02, -1.067e-02, -1.114e-01, 4.480e-02, -7.637e-02, 1.680e-02, 1.438e-01, 8.682e-02, 5.675e-02, 1.712e-01, -1.034e-01, -5.088e-02, -3.905e-02)); + r += mul(s3_6, M4(1.316e-01, 2.414e-02, 1.710e-01, -1.727e-02, 8.122e-02, -1.222e-01, 1.751e-01, -1.420e-01, 2.528e-02, -6.473e-02, 1.683e-03, 4.588e-02, 8.350e-03, 1.496e-02, -4.137e-03, -2.057e-02)); + r += mul(s3_7, M4(2.034e-01, 5.782e-01, 3.159e-01, 3.472e-01, 1.365e-01, -2.080e-01, 3.101e-02, 1.994e-01, -5.950e-02, -1.722e-01, -6.752e-02, -7.090e-02, 4.903e-02, -3.817e-02, -5.804e-02, 7.004e-02)); + r += mul(s3_8, M4(8.316e-02, -5.188e-02, 2.791e-02, 2.633e-01, 8.128e-02, -6.263e-02, 4.355e-03, 1.811e-01, 1.124e-03, 1.370e-01, -1.033e-01, 9.403e-02, 4.089e-03, 1.682e-01, -1.840e-02, 1.889e-01)); + r += mul(s4_0, M4(6.883e-02, -3.365e-02, -7.749e-02, -1.125e-02, -5.791e-02, 8.547e-02, -1.313e-01, 8.004e-02, 1.737e-01, -1.435e-01, 5.896e-02, 3.570e-01, 6.774e-03, -9.935e-02, -7.111e-02, -2.184e-02)); + r += mul(s4_1, M4(2.542e-02, 4.588e-02, -2.912e-02, 7.918e-02, -1.258e-01, 1.255e-01, 2.762e-01, 8.645e-02, 1.868e-01, -4.082e-02, 1.071e-01, -8.159e-02, 1.061e-01, -7.220e-02, 7.972e-02, -1.144e-01)); + r += mul(s4_2, M4(-3.943e-02, -2.530e-03, -1.421e-01, 1.234e-02, -6.156e-02, -5.961e-02, 1.371e-01, -8.217e-02, -1.241e-02, 2.655e-02, 1.023e-01, 2.034e-02, -2.526e-02, -2.962e-01, -1.047e-01, -5.024e-02)); + r += mul(s4_3, M4(-3.914e-02, -3.351e-02, -9.769e-02, 3.761e-02, -4.687e-02, 5.776e-03, -1.007e-01, -1.576e-02, 2.036e-02, 8.528e-02, -1.367e-01, -3.148e-02, 3.450e-02, 5.910e-02, -1.594e-01, 6.574e-02)); + r += mul(s4_4, M4(-1.158e-01, 9.737e-02, 4.901e-02, 9.995e-02, 3.487e-01, 2.029e-01, -1.692e-01, -3.025e-01, 2.957e-02, 1.805e-01, 2.607e-01, -2.922e-01, -3.176e-02, 7.012e-04, -6.591e-02, 1.762e-01)); + r += mul(s4_5, M4(1.198e-02, 1.014e-01, -1.126e-01, -2.023e-02, -7.713e-03, -3.943e-01, -3.786e-02, 1.089e-02, -1.520e-02, -1.209e-02, 1.198e-01, -3.087e-01, -4.675e-02, -1.251e-01, -1.260e-02, 1.825e-01)); + r += mul(s4_6, M4(-2.353e-03, 1.799e-02, -1.054e-01, -7.154e-02, 5.610e-02, -1.107e-01, -1.445e-01, -9.241e-02, -3.225e-02, -1.135e-01, -1.337e-01, -9.913e-02, -3.790e-02, -8.893e-03, 3.147e-02, 7.881e-02)); + r += mul(s4_7, M4(4.549e-02, 3.463e-03, -1.602e-01, -1.657e-01, -4.810e-02, -6.075e-02, 1.189e-01, 2.263e-01, 4.268e-02, 3.193e-02, 5.690e-02, -8.275e-02, -3.675e-02, 2.772e-02, 1.014e-01, 1.266e-01)); + r += mul(s4_8, M4(-2.574e-02, -6.567e-02, 5.514e-02, -2.007e-02, -6.985e-02, -3.602e-02, 1.411e-01, 2.116e-01, 2.192e-02, 2.803e-01, -4.727e-02, -1.305e-01, 7.359e-02, -2.774e-01, 1.381e-02, 2.945e-02)); + r += mul(s5_0, M4(1.886e-01, -3.069e-01, 1.924e-01, 8.122e-05, 1.030e-01, 3.917e-01, 9.103e-02, 1.352e-02, -5.548e-03, -9.670e-02, -1.490e-01, 4.872e-02, 1.341e-01, 2.537e-01, 3.456e-02, 2.064e-01)); + r += mul(s5_1, M4(-7.483e-03, 1.570e-01, -3.750e-02, 7.532e-02, -1.204e-01, -7.274e-02, -1.715e-01, -6.946e-02, -8.478e-03, 2.234e-02, -6.538e-02, 8.187e-02, -4.276e-02, 3.250e-01, 7.092e-02, -1.634e-01)); + r += mul(s5_2, M4(-2.400e-01, -6.757e-02, -2.895e-01, -4.304e-02, -6.639e-02, 2.374e-01, 2.146e-02, -2.000e-01, -1.112e-01, 3.496e-02, 1.347e-01, -1.487e-02, -1.085e-01, 4.659e-01, 1.224e-01, -8.265e-02)); + r += mul(s5_3, M4(3.145e-01, 1.658e-02, -2.276e-02, -1.438e-01, -3.178e-02, 3.935e-02, -3.544e-02, -1.899e-01, -5.814e-02, -1.307e-01, -1.382e-01, -3.227e-02, 1.437e-02, 1.111e-01, -1.035e-01, -7.885e-02)); + r += mul(s5_4, M4(-3.229e-01, -9.307e-02, 3.023e-01, 6.295e-03, 2.145e-01, -1.877e-01, -1.613e-01, -6.503e-01, -8.287e-03, 1.163e-01, 9.040e-02, 3.824e-01, 7.898e-02, 3.488e-02, 5.911e-02, -2.907e-01)); + r += mul(s5_5, M4(1.529e-01, -3.209e-01, 4.762e-04, -1.543e-01, -1.263e-01, 1.533e-01, -8.658e-02, -1.062e-01, 1.029e-01, 1.971e-01, 3.683e-03, -6.256e-02, 3.022e-02, 3.967e-01, 3.126e-01, 1.873e-01)); + r += mul(s5_6, M4(2.686e-01, -4.941e-01, -1.025e-01, -1.662e-01, 1.417e-01, 1.175e-01, -1.813e-01, -2.293e-01, -1.127e-01, -6.958e-02, -9.037e-02, 1.713e-01, 1.146e-01, -1.494e-01, -1.107e-01, 9.983e-02)); + r += mul(s5_7, M4(-2.660e-01, 1.089e-02, 5.858e-02, 2.201e-01, -1.121e-02, -2.135e-01, -2.460e-02, -1.489e-01, -2.915e-02, 1.928e-01, 7.983e-02, 3.235e-01, -1.740e-01, -7.737e-02, 1.455e-03, -3.482e-02)); + r += mul(s5_8, M4(-1.029e-02, 3.737e-01, 1.250e-01, 3.295e-02, -7.525e-02, 3.134e-01, -1.192e-01, 1.571e-01, -4.867e-02, 7.209e-02, -2.612e-02, 4.915e-02, 1.351e-01, -4.645e-02, -9.808e-02, -3.062e-02)); + r += mul(s6_0, M4(7.136e-02, 3.892e-01, -2.039e-02, -9.361e-02, -3.301e-02, 2.963e-02, -1.642e-02, -4.920e-03, -1.372e-01, -1.734e-01, -4.992e-02, -3.279e-02, 2.520e-02, 1.099e-01, 8.215e-02, 8.987e-02)); + r += mul(s6_1, M4(-3.586e-02, 5.550e-01, -1.931e-02, 5.454e-02, -1.024e-01, -1.373e-03, -1.027e-01, -1.894e-02, 2.436e-02, -7.983e-02, 1.831e-01, -7.232e-02, -9.584e-02, 2.270e-01, 4.128e-01, -1.151e-01)); + r += mul(s6_2, M4(9.899e-03, -8.969e-02, -1.024e-01, -9.267e-02, -5.787e-02, -5.934e-03, 3.101e-02, -8.005e-03, 1.361e-02, 5.110e-02, 1.814e-01, 9.458e-03, -5.395e-02, -1.132e-01, 8.022e-02, -9.348e-02)); + r += mul(s6_3, M4(-1.691e-02, 1.228e-01, -4.414e-01, -4.493e-02, 6.157e-02, 1.725e-01, 1.978e-01, 9.639e-02, -2.200e-01, 1.519e-02, -6.653e-02, -1.240e-01, 1.104e-01, -2.867e-01, -1.772e-01, -1.694e-01)); + r += mul(s6_4, M4(4.175e-03, -3.042e-03, -4.069e-01, -1.799e-01, 7.398e-02, 1.979e-01, -2.934e-01, -3.110e-01, -2.453e-01, 3.449e-01, 2.932e-03, 4.631e-01, -5.239e-02, 4.868e-02, -1.146e-01, -3.701e-01)); + r += mul(s6_5, M4(-1.675e-02, -3.687e-01, 5.851e-02, -9.250e-02, 4.892e-02, -1.813e-01, 4.720e-03, 3.314e-02, -1.723e-01, -1.135e-01, 3.627e-02, 5.550e-02, 6.245e-02, 3.006e-01, 1.394e-01, 1.003e-01)); + r += mul(s6_6, M4(-3.407e-02, -2.386e-01, 5.349e-02, -2.292e-01, 9.524e-02, 4.638e-02, -7.525e-02, -2.798e-02, 1.035e-01, 1.299e-01, 3.415e-01, -1.354e-01, 7.460e-02, -1.440e-01, 1.353e-01, 3.060e-03)); + r += mul(s6_7, M4(-1.258e-01, 2.745e-02, 1.231e-01, 1.713e-01, -2.098e-02, 7.189e-02, -1.986e-01, 2.369e-01, 1.856e-01, 4.211e-01, 6.151e-03, 6.073e-01, -3.834e-02, -2.427e-01, 5.039e-02, 1.161e-02)); + r += mul(s6_8, M4(-1.989e-02, -4.339e-03, 2.842e-01, -1.031e-02, 1.369e-02, -1.014e-01, -6.263e-02, 1.122e-01, -1.563e-01, -6.126e-01, 1.238e-01, 5.685e-02, 4.562e-02, -1.745e-01, 1.086e-01, -1.466e-01)); + r += mul(s7_0, M4(-4.805e-02, -1.401e-01, 6.923e-02, 1.697e-01, -2.523e-01, 8.848e-02, 1.435e-01, -2.233e-01, -5.380e-02, -6.852e-02, -1.428e-01, 2.790e-03, -2.441e-02, -2.829e-02, -5.574e-02, 1.381e-01)); + r += mul(s7_1, M4(1.177e-01, -7.392e-02, 1.024e-02, -1.602e-01, 7.005e-02, -5.573e-02, 2.307e-01, -7.645e-02, -3.209e-02, -7.073e-02, -1.727e-01, -1.063e-02, 6.527e-02, -3.432e-01, -4.096e-02, 7.950e-03)); + r += mul(s7_2, M4(-8.284e-02, 1.449e-01, -1.798e-02, -3.799e-02, -1.531e-03, -2.513e-01, -9.600e-02, -4.397e-02, -2.352e-02, 7.292e-03, -6.193e-02, 7.588e-02, -8.963e-02, 9.885e-04, -1.486e-01, 5.317e-02)); + r += mul(s7_3, M4(-6.310e-02, -2.401e-01, 4.052e-02, 1.000e-01, -8.855e-02, -2.369e-01, 2.686e-01, -7.257e-02, 2.088e-02, -2.243e-01, 3.193e-03, 1.760e-01, -6.365e-02, 9.996e-02, 2.837e-03, -8.671e-02)); + r += mul(s7_4, M4(8.164e-02, -4.756e-02, 2.309e-02, -2.026e-01, 4.132e-01, 4.186e-01, -2.936e-01, -2.088e-01, 5.347e-02, -2.918e-01, 2.646e-01, 9.397e-02, 4.247e-02, -6.289e-02, 7.642e-02, -2.434e-01)); + r += mul(s7_5, M4(-3.417e-02, 3.304e-02, -2.885e-02, 1.089e-02, -1.627e-02, 1.389e-01, 1.373e-01, 2.479e-02, 3.377e-02, 1.498e-01, 7.101e-02, 4.019e-02, 7.393e-03, 2.319e-01, -2.269e-02, 1.284e-01)); + r += mul(s7_6, M4(1.941e-02, 8.722e-02, -2.539e-02, 4.058e-02, -8.321e-02, -1.395e-01, -1.337e-01, 6.340e-02, 1.466e-02, 2.007e-01, 4.412e-02, -9.243e-02, -1.015e-01, 1.885e-01, -4.207e-02, 1.015e-01)); + r += mul(s7_7, M4(3.785e-02, 4.694e-02, 7.932e-02, 2.261e-02, 1.802e-01, -3.702e-01, 2.005e-02, 4.594e-03, 8.157e-02, -9.254e-02, 8.060e-02, -4.165e-01, 6.297e-02, 5.093e-02, 4.032e-03, -2.049e-02)); + r += mul(s7_8, M4(9.597e-03, 1.507e-01, -4.030e-02, 1.059e-01, 4.280e-02, -1.442e-01, -7.376e-02, -1.243e-01, -7.370e-02, 1.650e-01, 1.268e-01, -2.555e-01, 3.424e-02, -5.523e-02, -6.058e-02, -1.255e-01)); + r += V4(2.168e-01, -7.048e-03, 9.668e-03, -7.673e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.536e-02, -1.410e-02, 1.647e-02, 3.362e-02, -5.931e-02, 5.242e-02, 8.443e-02, 1.832e-02, -6.297e-02, 1.131e-02, 1.886e-03, 7.406e-02, 2.468e-01, 3.622e-02, 9.487e-02, -1.406e-01)); + r += mul(s0_1, M4(1.708e-02, 5.549e-02, 8.309e-02, -3.319e-01, -8.135e-02, -6.274e-02, 4.542e-02, -1.088e-02, -6.131e-05, -6.698e-02, 2.377e-01, 1.074e-02, 2.328e-01, -5.083e-02, 5.312e-02, 3.142e-02)); + r += mul(s0_2, M4(2.108e-01, -2.520e-01, -1.814e-01, -2.618e-01, 1.928e-01, 1.105e-01, 3.550e-01, 2.058e-01, 3.686e-02, -1.207e-01, -3.100e-02, -3.347e-03, -2.387e-02, 1.090e-01, -5.270e-02, -1.661e-01)); + r += mul(s0_3, M4(-1.782e-01, 9.148e-02, 1.673e-02, 1.584e-01, 9.863e-02, -6.084e-02, -1.283e-01, 4.255e-02, 3.218e-02, -1.173e-01, -2.996e-02, -7.470e-02, -1.136e-01, -6.690e-02, 1.829e-02, -2.640e-01)); + r += mul(s0_4, M4(-2.231e-01, -6.642e-02, -1.165e-01, 4.834e-01, 1.864e-01, 1.146e-01, 6.213e-02, -1.358e-01, -2.288e-02, -9.995e-02, -1.871e-02, -3.403e-02, 4.270e-02, -1.442e-01, 2.548e-01, -3.048e-01)); + r += mul(s0_5, M4(6.320e-02, 5.797e-02, 1.611e-01, -1.258e-01, 5.108e-02, -6.501e-02, -2.157e-01, -5.767e-02, -4.806e-03, 5.627e-02, 8.263e-02, 6.277e-02, 1.150e-01, 1.506e-01, 3.555e-02, -2.758e-01)); + r += mul(s0_6, M4(6.143e-02, -1.360e-01, 6.878e-02, 1.956e-02, 4.439e-02, -6.323e-02, -5.701e-02, -3.591e-02, -3.441e-02, 3.028e-02, -9.030e-02, -2.044e-01, -1.354e-01, -1.885e-01, 6.433e-02, -8.726e-02)); + r += mul(s0_7, M4(2.577e-02, 1.404e-01, -5.843e-02, 9.891e-02, 4.416e-02, -1.072e-01, -2.792e-03, -2.544e-02, 8.820e-02, 1.357e-01, -5.828e-02, 1.665e-01, 8.421e-02, 7.829e-02, -1.086e-02, 1.252e-01)); + r += mul(s0_8, M4(1.450e-01, 4.198e-02, -3.822e-03, 9.691e-02, 2.170e-02, 1.878e-01, -1.555e-01, 1.012e-01, -2.978e-02, 7.988e-02, -5.955e-02, 6.559e-02, -1.048e-01, -2.250e-01, 6.121e-02, -1.443e-01)); + r += mul(s1_0, M4(1.716e-01, 1.011e-01, 8.449e-02, -1.361e-01, 7.292e-02, -2.243e-01, 9.731e-02, -8.865e-02, -2.096e-01, -4.386e-01, -1.018e-01, -1.794e-01, -1.590e-02, 1.191e-01, -1.300e-01, 7.400e-02)); + r += mul(s1_1, M4(-2.492e-01, 1.453e-01, 2.022e-01, 4.891e-02, -7.163e-02, -1.665e-01, 2.042e-02, 5.348e-01, 3.951e-01, 4.514e-01, 1.276e-01, 1.216e-01, 3.184e-02, 6.502e-02, -2.052e-01, 1.890e-01)); + r += mul(s1_2, M4(3.835e-02, -5.284e-02, -2.494e-01, 2.411e-01, -5.676e-03, -1.005e-01, 2.276e-01, -7.575e-01, 1.925e-01, 4.742e-01, -4.667e-01, -1.748e-01, -4.170e-01, -1.441e-01, -3.124e-03, 3.889e-01)); + r += mul(s1_3, M4(1.985e-01, -1.217e-01, 6.853e-02, -6.425e-01, -1.093e-01, 3.058e-02, 1.008e-01, -1.000e-02, -4.686e-02, 3.577e-01, 5.252e-02, 2.789e-02, 1.321e-01, 2.989e-01, -7.806e-02, 1.051e-01)); + r += mul(s1_4, M4(2.250e-01, -4.267e-01, 7.196e-02, -8.018e-01, 1.341e-01, 1.303e-01, 1.809e-01, 6.113e-01, 1.488e-02, 8.244e-02, 9.509e-02, -6.869e-02, 3.005e-01, 2.894e-01, 1.626e-01, 5.481e-02)); + r += mul(s1_5, M4(3.079e-01, -2.656e-01, -4.390e-02, 1.123e-01, 3.129e-02, 2.423e-01, 1.855e-01, 6.808e-02, 1.320e-01, 2.064e-02, -2.904e-02, -1.324e-01, -8.232e-02, -1.363e-01, -5.007e-01, 3.797e-01)); + r += mul(s1_6, M4(5.187e-02, -1.303e-02, -1.266e-01, -1.807e-01, 1.205e-01, -7.534e-02, 2.407e-01, 3.149e-02, 8.110e-03, -3.276e-02, 8.138e-02, -5.819e-02, -6.869e-02, -1.575e-01, -4.518e-02, 4.960e-02)); + r += mul(s1_7, M4(7.838e-02, -1.572e-01, -8.077e-02, -5.178e-01, 1.536e-02, -1.072e-01, 1.329e-01, 3.574e-02, -1.702e-01, -2.425e-01, -1.537e-01, -3.181e-01, 1.056e-01, 3.161e-01, -1.665e-01, 6.447e-02)); + r += mul(s1_8, M4(-1.255e-01, -1.266e-01, -3.164e-02, -2.112e-01, -1.026e-01, 2.861e-01, 1.551e-02, -1.131e-02, 6.266e-02, -2.642e-01, -2.798e-01, 2.402e-01, -2.326e-01, 2.260e-01, -8.163e-02, 5.749e-02)); + r += mul(s2_0, M4(-8.472e-02, 3.525e-02, -3.044e-03, 8.102e-02, -1.510e-01, 2.814e-02, -2.020e-01, -2.593e-02, -1.469e-01, 2.096e-01, 1.935e-01, 1.393e-01, -3.591e-01, 4.131e-01, -7.662e-01, 2.411e-01)); + r += mul(s2_1, M4(3.957e-02, -6.504e-02, 3.046e-01, -8.791e-02, 2.511e-02, 1.664e-01, 2.942e-01, -9.583e-02, 1.185e-01, 6.388e-02, -1.285e-01, 2.930e-01, -3.716e-01, 9.728e-01, -2.521e-01, 5.220e-02)); + r += mul(s2_2, M4(7.598e-02, 9.158e-02, 2.824e-02, -6.247e-02, -1.126e-01, 8.460e-02, 5.086e-02, 1.760e-01, 3.303e-02, 2.893e-01, 3.190e-01, 6.632e-03, 6.721e-01, -1.753e-01, -2.229e-01, 2.627e-01)); + r += mul(s2_3, M4(1.787e-01, -1.077e-02, -5.198e-02, -7.528e-02, 1.769e-03, 1.579e-01, 1.184e-01, -8.013e-02, 6.146e-03, -2.568e-01, 7.215e-02, -1.016e-01, 3.469e-01, -3.423e-01, -6.154e-02, -1.618e-01)); + r += mul(s2_4, M4(1.202e-02, -1.281e-01, -1.330e-01, -1.741e-02, 9.794e-02, -1.963e-01, -8.935e-02, 8.661e-02, 3.888e-01, -8.260e-02, -1.119e-01, -1.267e-02, -1.810e-01, 7.144e-02, 5.592e-01, -2.022e-01)); + r += mul(s2_5, M4(-6.198e-02, -1.699e-03, 9.683e-02, -1.632e-02, 1.796e-03, 3.730e-02, -1.630e-01, -7.638e-04, 1.343e-01, -3.744e-02, -3.589e-01, -3.690e-02, 4.693e-01, -1.661e-01, -2.354e-01, -1.382e-03)); + r += mul(s2_6, M4(1.837e-02, -2.326e-02, -1.332e-01, -2.146e-02, 1.084e-01, -1.774e-01, 2.879e-03, 1.380e-02, 3.178e-02, 2.288e-01, 4.481e-02, 1.634e-01, -3.233e-01, -1.216e-01, 6.813e-02, 3.430e-02)); + r += mul(s2_7, M4(1.806e-01, -1.755e-01, 2.481e-02, 8.190e-02, -1.475e-02, -1.077e-01, 1.408e-02, -1.033e-02, 2.499e-02, -8.072e-02, -9.272e-02, -4.949e-02, -4.336e-01, 5.354e-01, 1.022e+00, -1.878e-01)); + r += mul(s2_8, M4(-3.080e-03, 6.252e-02, 4.382e-02, 1.736e-01, -1.568e-02, 1.294e-01, 1.588e-01, -3.209e-03, -5.668e-02, -2.989e-02, 2.673e-01, -1.652e-01, -1.984e-01, -2.244e-01, -1.289e-01, -2.206e-02)); + r += mul(s3_0, M4(-3.345e-02, -6.145e-02, 1.239e-01, 1.860e-01, 5.689e-01, 5.143e-02, -1.748e-01, -4.751e-01, -2.738e-01, -1.132e-01, -1.073e-01, -5.956e-02, 6.889e-02, 4.414e-02, -5.410e-02, 2.421e-02)); + r += mul(s3_1, M4(-4.755e-02, -3.658e-02, 1.941e-01, 1.070e-01, 6.188e-02, -4.438e-01, 6.193e-02, -8.983e-02, 8.250e-02, -3.456e-02, -3.287e-03, 1.277e-01, 8.643e-02, 3.769e-02, -1.689e-01, -5.630e-02)); + r += mul(s3_2, M4(4.335e-02, 1.358e-02, 5.347e-02, 1.566e-01, -4.897e-02, -1.998e-01, 2.272e-01, -7.817e-02, -1.532e-01, 3.866e-02, -4.835e-02, -8.465e-03, -1.622e-01, -8.045e-02, 8.395e-02, -1.099e-01)); + r += mul(s3_3, M4(-2.014e-01, 8.535e-02, -1.486e-01, 4.427e-01, 5.379e-01, -2.338e-02, -1.982e-01, -1.629e-01, -9.256e-03, 1.563e-01, -2.911e-02, -6.092e-02, -7.215e-02, 1.460e-01, -4.224e-03, 4.965e-02)); + r += mul(s3_4, M4(5.919e-02, 3.212e-01, -3.621e-02, -4.207e-01, 6.892e-02, 8.369e-02, 4.878e-02, -1.324e-01, -1.539e-01, 9.862e-03, -1.181e-01, 1.386e-01, 4.491e-02, 4.382e-02, 5.232e-02, 1.089e-01)); + r += mul(s3_5, M4(-4.122e-02, 1.981e-01, 3.009e-02, 1.198e-01, 6.533e-01, -1.429e-01, -4.168e-01, -8.503e-02, 3.888e-02, 3.679e-02, -1.368e-01, -3.706e-02, 4.718e-02, -1.036e-01, 6.045e-02, 2.219e-02)); + r += mul(s3_6, M4(-1.123e-01, -1.056e-01, -3.821e-02, -8.801e-02, -4.420e-02, 4.255e-03, -2.447e-01, 4.728e-03, 1.282e-01, 1.008e-01, 1.862e-01, -9.000e-03, -1.476e-02, -6.480e-02, 1.721e-02, 8.208e-02)); + r += mul(s3_7, M4(8.583e-02, -5.756e-02, -2.266e-01, -3.114e-01, -2.369e-01, 1.099e-01, -2.180e-01, -2.813e-01, -8.890e-02, -8.020e-02, 1.347e-01, -5.877e-02, 1.781e-01, 1.528e-02, 2.447e-02, -6.176e-02)); + r += mul(s3_8, M4(-5.824e-02, -2.396e-02, -1.056e-01, 2.120e-01, 7.797e-02, -7.151e-03, 9.115e-02, -1.450e-01, 7.011e-02, 2.717e-02, -1.055e-01, -8.077e-02, -1.130e-01, -1.042e-01, -6.020e-03, 5.198e-03)); + r += mul(s4_0, M4(5.166e-03, 1.304e-01, -4.617e-02, 2.465e-01, -1.138e-01, -8.594e-02, 2.374e-02, 2.387e-01, -1.221e-01, 3.200e-01, -4.661e-02, -2.573e-01, -1.490e-01, 8.570e-02, 1.173e-01, -8.304e-02)); + r += mul(s4_1, M4(-1.341e-01, -9.204e-02, 1.326e-01, -4.510e-02, 1.127e-01, -1.129e-01, -9.003e-02, 6.497e-02, -8.473e-02, 2.957e-01, -5.822e-02, 2.001e-01, -1.312e-01, 1.851e-01, 6.737e-02, 2.108e-01)); + r += mul(s4_2, M4(-1.810e-01, 5.466e-02, 1.819e-01, 1.517e-01, 7.883e-02, -2.586e-02, -1.670e-01, -7.121e-02, -2.213e-01, 2.562e-01, -1.028e-02, 1.537e-01, -2.064e-01, 1.528e-01, 2.646e-01, 8.116e-02)); + r += mul(s4_3, M4(1.251e-01, -1.643e-01, -1.100e-01, -1.026e-01, -1.469e-01, 1.956e-01, -1.132e-01, -2.920e-01, -1.660e-01, 4.833e-01, 7.354e-02, 3.729e-01, 2.297e-01, 1.096e-01, 1.608e-01, -9.050e-02)); + r += mul(s4_4, M4(-1.269e-01, -1.451e-01, -1.771e-01, -2.583e-02, 2.799e-01, -7.998e-02, -7.126e-02, -3.134e-01, -1.531e-01, 2.349e-01, -6.403e-02, 3.341e-01, 5.373e-01, 3.720e-01, 5.513e-02, -3.110e-02)); + r += mul(s4_5, M4(2.867e-01, -7.715e-02, -2.043e-02, -1.149e-01, -7.275e-02, 1.572e-01, 1.648e-02, -1.160e-03, -8.993e-02, -1.441e-02, -2.222e-01, 1.281e-01, 6.476e-02, 2.251e-01, -2.359e-01, -1.092e-01)); + r += mul(s4_6, M4(-1.415e-02, 1.372e-01, -1.024e-02, -1.359e-01, -1.174e-01, 1.206e-01, 3.089e-02, 9.655e-02, -7.835e-02, 2.261e-01, 8.681e-02, 3.623e-01, 1.141e-01, -1.457e-03, -1.088e-01, -1.918e-01)); + r += mul(s4_7, M4(-1.089e-01, -7.624e-02, -2.815e-02, -6.304e-02, -7.850e-02, 2.300e-01, 1.237e-02, -1.772e-01, -1.581e-01, 1.061e-01, 1.307e-02, 2.340e-02, -1.419e-01, -1.174e-01, -2.259e-01, -2.288e-01)); + r += mul(s4_8, M4(1.302e-01, -4.664e-03, -3.681e-02, 5.637e-02, -1.678e-01, 8.718e-02, -2.442e-02, -7.711e-02, 4.939e-02, 2.992e-01, 1.165e-01, -4.479e-02, -1.839e-01, 1.086e-01, -9.941e-02, -4.750e-02)); + r += mul(s5_0, M4(2.046e-01, 1.038e-01, -4.133e-01, -2.010e-01, 2.893e-02, -1.592e-01, 1.398e-01, -7.004e-02, 1.399e-01, -1.197e-01, 2.010e-02, -6.009e-02, 1.121e-01, -1.981e-01, -1.484e-01, -6.293e-02)); + r += mul(s5_1, M4(2.744e-02, -1.539e-01, -4.901e-01, -1.259e-01, 7.296e-03, 1.802e-03, -3.523e-02, 2.218e-02, -2.658e-02, -1.041e-01, 1.138e-02, 9.445e-02, 3.299e-02, -2.628e-01, -1.952e-01, -2.199e-01)); + r += mul(s5_2, M4(1.606e-02, 1.991e-01, 1.412e-01, 1.806e-03, -2.459e-01, -5.529e-02, -1.762e-01, 9.896e-02, 2.059e-01, -1.678e-01, 3.718e-02, 2.952e-02, 3.115e-01, -1.161e-01, -2.211e-01, -1.061e-01)); + r += mul(s5_3, M4(-3.352e-01, 1.077e-01, 2.678e-01, -4.436e-01, 1.087e-02, -1.374e-02, 2.687e-02, 6.731e-02, 5.617e-02, -9.116e-02, 1.330e-02, -9.804e-02, -1.134e-01, -2.236e-02, -1.948e-01, -7.179e-02)); + r += mul(s5_4, M4(2.647e-01, -2.525e-02, -2.016e-01, -1.311e-01, 3.206e-01, 2.642e-01, 9.656e-02, 5.895e-02, 5.513e-02, -2.987e-02, 8.391e-02, -1.445e-01, -2.961e-01, -5.127e-01, -1.732e-01, 3.098e-02)); + r += mul(s5_5, M4(-8.694e-02, 6.743e-01, 3.185e-01, 1.705e-01, -7.492e-02, 5.664e-02, -1.143e-01, 1.243e-01, 1.243e-01, -1.035e-01, 5.420e-02, -1.083e-01, -1.275e-01, -2.274e-01, -2.832e-01, -1.849e-02)); + r += mul(s5_6, M4(1.238e-01, 1.585e-01, 5.295e-01, 8.451e-02, 5.142e-02, -2.950e-02, 2.497e-02, 1.916e-01, -7.226e-02, -1.434e-01, 1.111e-01, -1.004e-01, 2.997e-02, -1.751e-02, -5.959e-02, -4.726e-02)); + r += mul(s5_7, M4(8.209e-01, 1.719e-01, 5.450e-01, -2.042e-02, 9.534e-02, 1.269e-01, 5.457e-02, -1.328e-02, -2.037e-01, 9.191e-02, -8.484e-02, 5.416e-03, 1.403e-02, -3.179e-02, 2.140e-01, -4.565e-02)); + r += mul(s5_8, M4(1.142e-01, 5.137e-01, 1.714e-01, 5.144e-01, -1.404e-02, -4.950e-02, 1.041e-01, 1.605e-01, -4.338e-02, 4.684e-02, -1.573e-02, -1.635e-02, 5.163e-02, -1.147e-01, 6.578e-02, 2.531e-01)); + r += mul(s6_0, M4(1.340e-01, -2.285e-01, -9.562e-02, 1.714e-01, 2.930e-01, 8.161e-02, -4.617e-02, -4.056e-02, -2.496e-03, 4.013e-03, -5.556e-02, 2.695e-02, 2.549e-01, -2.422e-01, -5.763e-03, -3.592e-01)); + r += mul(s6_1, M4(-2.073e-01, -2.901e-01, 2.882e-01, 1.224e-01, 2.254e-01, -6.127e-02, 1.563e-01, 1.424e-01, -1.351e-01, 1.496e-02, 2.001e-02, 2.628e-02, 8.940e-02, -3.824e-01, -2.008e-02, -2.165e-01)); + r += mul(s6_2, M4(1.411e-01, 7.237e-02, -7.323e-02, -6.832e-02, -2.638e-02, 2.173e-02, -5.183e-02, -2.793e-02, 1.588e-01, -3.605e-02, -1.937e-01, 2.666e-01, 3.727e-02, -1.306e-01, -1.939e-01, -2.043e-01)); + r += mul(s6_3, M4(-9.488e-02, -1.844e-01, 7.070e-02, -1.471e-01, -9.504e-03, -1.100e-01, 5.983e-02, -7.031e-03, 2.339e-01, 2.060e-01, -1.541e-01, -1.469e-01, -6.130e-02, -1.651e-01, 5.473e-03, -4.535e-01)); + r += mul(s6_4, M4(-9.408e-02, 1.051e-01, 5.935e-01, 2.440e-01, 4.207e-02, -1.485e-01, 2.241e-01, -1.543e-01, 4.195e-02, -1.073e-01, -3.652e-01, 7.859e-02, 2.443e-01, -3.602e-01, 7.919e-02, -3.963e-01)); + r += mul(s6_5, M4(-9.282e-02, -5.716e-03, 2.926e-01, 1.167e-02, 4.879e-02, 3.732e-02, -8.926e-02, 4.256e-02, -1.788e-01, 2.622e-01, 1.357e-01, 7.940e-02, 1.382e-01, 6.902e-02, -1.767e-01, -1.719e-01)); + r += mul(s6_6, M4(-1.696e-03, -1.124e-01, 1.223e-01, -2.895e-01, 1.031e-01, -7.886e-02, 3.023e-02, 7.795e-02, -4.661e-02, -9.979e-02, 6.993e-02, -7.016e-02, 4.502e-02, -7.555e-02, -6.096e-02, -3.440e-01)); + r += mul(s6_7, M4(-3.132e-02, -1.518e-01, 9.386e-02, 3.184e-02, -2.529e-02, 2.358e-01, -1.085e-01, 1.057e-01, 7.691e-03, -2.959e-01, 3.834e-03, 1.955e-01, 1.709e-01, -3.687e-02, 8.499e-02, 2.509e-02)); + r += mul(s6_8, M4(3.696e-02, 5.454e-02, -1.603e-01, 1.265e-01, -9.435e-02, 5.880e-02, -3.653e-02, 6.345e-02, -1.639e-02, -2.848e-01, -6.280e-02, 2.619e-01, 7.176e-02, -4.521e-02, 5.806e-02, 8.413e-02)); + r += mul(s7_0, M4(-8.326e-02, -6.503e-02, -2.698e-01, 1.909e-01, 2.131e-02, -2.712e-01, 7.258e-02, -5.686e-03, 2.315e-02, 1.176e-01, -3.460e-02, -2.184e-01, -1.923e-01, 4.375e-02, -3.431e-03, 1.264e-01)); + r += mul(s7_1, M4(1.767e-01, 1.411e-01, 5.236e-02, -2.954e-02, -5.819e-02, -1.108e-01, -1.118e-02, -9.685e-02, -2.081e-02, -7.350e-02, 5.443e-02, -1.465e-01, -1.765e-01, 1.862e-01, -3.695e-02, 2.506e-01)); + r += mul(s7_2, M4(9.326e-02, 6.618e-03, -7.719e-02, -1.290e-01, -3.512e-02, -8.315e-02, -3.536e-01, 8.163e-02, -8.239e-02, 1.151e-01, 7.672e-02, -5.276e-02, -2.285e-01, -2.168e-02, 1.083e-01, 1.704e-01)); + r += mul(s7_3, M4(2.332e-02, 1.445e-01, 1.241e-01, -6.039e-02, 8.266e-02, -1.892e-01, -5.408e-01, 8.206e-02, 2.250e-01, 2.425e-01, 6.192e-02, 5.134e-03, -5.053e-02, 2.632e-01, -7.094e-03, 9.883e-02)); + r += mul(s7_4, M4(-2.304e-02, -8.378e-02, -6.218e-02, -1.157e-01, -4.370e-02, 1.416e-01, 4.883e-01, 1.203e-01, -1.086e-01, 5.591e-02, -2.372e-01, 5.909e-02, -1.630e-01, 1.430e-01, 8.641e-02, -4.704e-03)); + r += mul(s7_5, M4(-1.545e-01, -6.740e-02, 1.799e-01, -4.104e-02, -3.570e-02, 2.898e-01, -1.500e-01, -2.319e-01, -1.425e-01, -4.409e-02, 2.314e-01, 2.013e-02, -1.138e-01, 1.329e-01, -1.275e-01, 9.538e-02)); + r += mul(s7_6, M4(6.026e-02, 2.461e-01, 6.967e-02, 1.197e-01, -7.590e-02, 7.220e-02, -6.694e-02, -6.113e-01, 6.629e-02, 8.935e-02, -1.509e-01, 1.458e-01, -3.097e-02, 8.744e-02, 5.177e-02, 1.485e-01)); + r += mul(s7_7, M4(-7.051e-02, -1.119e-01, -7.987e-02, 3.374e-02, -2.263e-01, 5.299e-02, -2.333e-01, -6.558e-01, 9.406e-02, -2.782e-01, 1.104e-01, -2.358e-01, -1.142e-01, -4.699e-02, -2.193e-01, -5.177e-02)); + r += mul(s7_8, M4(-1.533e-02, -4.159e-02, -9.005e-02, -2.094e-02, 4.571e-02, 8.586e-02, -7.003e-02, -1.314e-01, -2.340e-02, -3.771e-02, 9.190e-02, 1.260e-01, -1.660e-01, -9.283e-03, 8.861e-02, -9.932e-02)); + r += V4(-4.065e-02, 3.730e-03, 1.817e-03, 1.345e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(9.513e-02, -1.023e-01, 1.587e-01, 1.025e-01, -6.720e-02, 1.538e-01, -3.814e-02, 2.291e-02, 8.159e-02, -5.872e-02, 8.317e-02, 1.526e-01, 1.371e-01, -4.979e-04, -1.534e-01, -3.054e-02)); + r += mul(s0_1, M4(-5.019e-03, 1.215e-01, -2.858e-01, 1.509e-01, -7.670e-02, -1.181e-01, -3.646e-02, -5.105e-02, 1.079e-01, 8.831e-02, -1.282e-02, -1.592e-01, 2.819e-01, -1.678e-01, 1.269e-01, 2.218e-02)); + r += mul(s0_2, M4(-2.377e-01, -3.966e-02, -4.808e-02, -1.515e-03, 2.682e-02, -1.740e-02, 1.949e-01, -2.585e-01, 1.936e-01, -2.997e-02, 6.483e-02, -1.305e-01, 8.261e-02, -1.004e-01, 7.083e-03, -8.298e-02)); + r += mul(s0_3, M4(-1.870e-02, -2.180e-02, 1.287e-01, -1.681e-02, 5.307e-02, -1.779e-02, -3.764e-02, -6.278e-03, -6.895e-02, -1.381e-01, 8.012e-02, 1.061e-02, -1.052e-02, 6.920e-02, -9.246e-02, -1.925e-01)); + r += mul(s0_4, M4(1.152e-01, 3.707e-01, -3.938e-01, -2.672e-02, -2.630e-02, -3.670e-01, 9.251e-02, 1.047e-01, 3.332e-02, 3.702e-02, 3.343e-02, -4.699e-02, 1.887e-01, -3.178e-01, 4.346e-01, -1.446e-01)); + r += mul(s0_5, M4(-1.992e-01, 2.275e-01, -1.044e-01, -1.716e-01, 8.659e-03, -1.067e-01, -5.529e-02, 1.015e-01, -2.028e-01, -6.617e-03, 3.986e-02, -1.639e-01, 6.785e-01, 1.518e-01, 1.681e-01, -1.835e-01)); + r += mul(s0_6, M4(-1.659e-01, -8.147e-02, 1.015e-01, 5.627e-02, -6.553e-02, 1.071e-01, -1.059e-01, -1.409e-01, 1.422e-02, -9.457e-02, -1.223e-01, 9.262e-02, 2.481e-01, 3.273e-01, -4.202e-02, -1.980e-01)); + r += mul(s0_7, M4(1.837e-01, 1.838e-01, 4.286e-02, -2.344e-02, 1.487e-01, 3.209e-02, -3.188e-02, 8.071e-02, 9.691e-03, 8.573e-02, 2.254e-01, -5.335e-02, 6.388e-02, 7.047e-02, -1.156e-01, -1.170e-01)); + r += mul(s0_8, M4(-2.658e-01, 1.008e-01, 3.286e-02, 2.389e-03, 7.517e-02, -2.851e-02, 6.483e-02, 7.479e-02, -1.277e-01, 7.402e-02, -1.648e-01, 6.628e-02, 1.634e-01, 1.439e-01, -2.544e-01, 4.385e-02)); + r += mul(s1_0, M4(-1.361e-02, -2.189e-02, -1.571e-01, 7.063e-02, 1.665e-01, 1.246e-01, 2.842e-01, -3.367e-02, -2.822e-01, -2.907e-01, -5.340e-01, -1.405e-01, -3.345e-02, -7.615e-02, 2.898e-02, 5.967e-02)); + r += mul(s1_1, M4(-4.387e-02, -4.236e-01, 4.281e-01, -1.953e-01, 7.487e-01, 7.529e-01, -5.215e-01, 1.123e-01, -1.363e-01, 1.408e-02, -1.552e-01, 3.233e-01, -2.761e-01, 8.336e-02, -7.412e-02, -9.009e-02)); + r += mul(s1_2, M4(-4.157e-02, -3.318e-02, 2.718e-01, 1.120e-01, 3.292e-01, 1.252e-01, -2.324e-01, -5.152e-03, -3.227e-01, -2.902e-02, -4.069e-01, 2.859e-01, 2.375e-01, -4.981e-02, -1.607e-01, -1.368e-02)); + r += mul(s1_3, M4(2.961e-02, -3.944e-01, 6.968e-02, 6.573e-02, 2.166e-02, 1.155e-01, 1.526e-01, 1.394e-01, 1.478e-01, 7.622e-02, 5.468e-02, 4.773e-02, -1.586e-02, -1.204e-02, 5.099e-02, 1.672e-01)); + r += mul(s1_4, M4(-4.088e-01, -1.786e-02, 3.435e-01, 1.761e-01, 4.563e-01, 7.573e-01, -3.591e-01, -1.546e-01, -2.650e-01, 8.012e-02, -3.056e-01, 1.255e-01, -1.513e-01, -2.147e-01, -8.252e-02, 2.772e-02)); + r += mul(s1_5, M4(-1.657e-01, -1.217e-01, 3.345e-01, -2.820e-01, 3.113e-03, 4.717e-01, 4.105e-02, 2.253e-01, 2.879e-01, 1.124e-01, 6.953e-02, 5.647e-01, -2.668e-01, -3.415e-01, -1.983e-02, 1.841e-01)); + r += mul(s1_6, M4(5.096e-02, -3.405e-02, 2.642e-01, -1.489e-01, -1.688e-02, -1.223e-01, 2.398e-01, 9.426e-02, -2.125e-01, -1.438e-01, 5.223e-02, -2.062e-01, -2.646e-01, 1.598e-01, 4.466e-02, 2.793e-01)); + r += mul(s1_7, M4(-1.469e-01, -2.421e-01, 3.448e-01, -4.945e-02, 2.766e-01, 1.970e-01, -2.203e-01, -6.525e-02, 2.432e-02, 4.168e-01, 4.742e-01, 2.581e-01, -1.645e-01, -4.138e-01, -1.509e-01, 1.089e-01)); + r += mul(s1_8, M4(2.470e-01, -1.231e-01, -1.178e-01, -6.888e-02, -5.987e-02, -3.821e-02, -1.473e-01, 1.678e-01, 5.462e-01, 9.235e-02, -7.806e-01, 3.665e-01, -1.915e-01, -1.793e-01, -9.810e-02, 2.176e-01)); + r += mul(s2_0, M4(2.157e-01, 2.611e-02, 2.140e-02, 7.000e-02, 1.860e-01, -1.012e-02, 5.828e-02, 1.636e-01, 1.133e-02, 5.581e-02, -1.959e-01, 9.071e-03, -3.571e-01, 1.393e-01, -2.630e-01, -4.527e-01)); + r += mul(s2_1, M4(-2.209e-02, -8.153e-02, -2.016e-01, -2.546e-02, -1.248e-01, 3.990e-02, -2.968e-02, -1.432e-01, -7.041e-02, -2.290e-01, -1.667e-01, 2.402e-01, -3.291e-01, -3.824e-02, 4.528e-01, -2.175e-01)); + r += mul(s2_2, M4(4.376e-01, 3.842e-02, 4.605e-02, 3.200e-02, 2.267e-01, -1.208e-02, -2.008e-01, -1.760e-01, -1.106e-01, -7.952e-02, -2.439e-02, -2.233e-02, 2.287e-01, 7.038e-02, -8.782e-02, 2.208e-01)); + r += mul(s2_3, M4(-2.928e-01, -1.633e-01, 2.021e-01, -1.321e-01, -1.699e-01, -1.313e-01, 2.450e-01, 9.154e-02, -1.370e-01, -1.742e-01, 3.649e-01, 1.948e-02, -1.845e-01, 5.658e-01, -3.862e-01, -5.922e-01)); + r += mul(s2_4, M4(-1.180e-01, 5.187e-02, -1.602e-01, 1.942e-01, -6.294e-02, 6.854e-02, -4.731e-02, -1.045e-01, 4.786e-01, 1.082e-01, -2.489e-01, -3.709e-01, 1.893e-01, -8.286e-02, 1.363e-01, -5.499e-01)); + r += mul(s2_5, M4(3.056e-02, 8.957e-02, -1.323e-01, -4.396e-02, 1.382e-01, 4.930e-02, 1.285e-01, -1.072e-01, -8.383e-02, 7.365e-02, -3.295e-02, -4.595e-02, 3.012e-01, -3.066e-01, 3.639e-01, 6.333e-03)); + r += mul(s2_6, M4(1.122e-01, -1.318e-01, 2.506e-02, 7.950e-02, -1.492e-01, -1.961e-02, 1.083e-01, 6.986e-02, -6.250e-02, 1.755e-01, 1.868e-01, -1.286e-01, -4.907e-01, 1.165e-01, 2.817e-01, -7.604e-01)); + r += mul(s2_7, M4(5.926e-02, -1.446e-02, 6.740e-02, -1.877e-01, 1.512e-01, 4.305e-02, -4.368e-02, -3.655e-02, 2.004e-01, 8.357e-02, -4.056e-01, -2.699e-01, 6.772e-02, -5.004e-01, 7.243e-01, -6.036e-01)); + r += mul(s2_8, M4(-1.636e-01, 3.726e-02, -7.612e-02, -3.775e-02, -3.209e-02, -6.485e-02, -1.036e-01, 3.863e-02, -2.952e-01, -1.468e-01, 5.695e-01, -3.171e-01, 2.957e-01, -2.874e-01, -7.122e-02, 2.408e-01)); + r += mul(s3_0, M4(-2.847e-01, -6.624e-02, 3.070e-01, 2.705e-01, -4.489e-01, -2.211e-02, 6.367e-02, -1.580e-01, -3.082e-02, 1.637e-02, -5.281e-02, -2.793e-01, 1.842e-03, 1.121e-01, -1.861e-01, -2.153e-02)); + r += mul(s3_1, M4(-1.506e-01, -8.362e-03, 2.448e-01, 2.311e-01, -1.994e-04, 1.714e-01, -5.810e-02, -3.037e-01, -1.406e-01, 1.158e-02, -9.110e-02, -1.218e-03, 7.478e-02, -2.950e-03, 1.240e-02, 4.613e-02)); + r += mul(s3_2, M4(-1.082e-01, 2.173e-01, 1.511e-01, -3.761e-01, -1.362e-01, -7.930e-02, -3.676e-01, 3.172e-02, -1.701e-01, -8.242e-02, 7.806e-03, -9.085e-02, 8.387e-02, -4.756e-02, 1.034e-01, 2.831e-02)); + r += mul(s3_3, M4(1.168e-01, -1.872e-01, 2.581e-02, 1.804e-01, 3.309e-01, 3.506e-02, -1.452e-01, -5.199e-01, 8.211e-02, 1.174e-02, -3.079e-02, -7.860e-03, 2.571e-02, 2.033e-01, -2.844e-03, -5.911e-03)); + r += mul(s3_4, M4(5.956e-01, 3.077e-01, -2.823e-01, 4.851e-01, 2.023e-01, 8.281e-02, -1.973e-02, 2.091e-01, 7.000e-02, 2.272e-01, 7.937e-02, 1.465e-02, -3.652e-02, -8.854e-02, 5.284e-02, 2.072e-02)); + r += mul(s3_5, M4(2.745e-01, 1.015e-01, 1.909e-01, 1.966e-02, 8.355e-02, 1.783e-01, 1.539e-01, 2.331e-01, -1.056e-01, -5.163e-02, 2.818e-03, 2.078e-02, -6.584e-02, 1.102e-02, 6.753e-02, 3.359e-03)); + r += mul(s3_6, M4(1.170e-01, 2.291e-01, 1.342e-01, -1.054e-01, -3.271e-01, 3.134e-01, 2.687e-01, 3.008e-02, -9.659e-02, -2.473e-02, -3.813e-03, 5.018e-03, 5.568e-02, 9.386e-03, -1.370e-01, -1.200e-01)); + r += mul(s3_7, M4(2.118e-01, -1.038e-01, 4.191e-01, 2.680e-01, -1.326e-01, -2.662e-01, -2.487e-01, -1.841e-01, 4.636e-02, 3.607e-02, -1.344e-01, 1.520e-01, -5.490e-02, -5.336e-02, -3.617e-02, 2.010e-03)); + r += mul(s3_8, M4(7.668e-03, -3.285e-03, 1.146e-01, -5.370e-03, -2.923e-01, 2.723e-02, -1.948e-01, 7.522e-02, 7.848e-02, -1.212e-01, 2.810e-01, 8.272e-02, -3.596e-02, -1.255e-01, 7.025e-02, 6.561e-02)); + r += mul(s4_0, M4(-5.750e-02, -2.343e-03, 1.155e-02, 1.263e-01, 2.045e-01, -1.364e-01, 4.942e-02, 2.860e-02, -2.458e-01, -1.640e-01, 2.016e-01, 6.267e-02, 9.234e-02, -6.066e-02, 3.070e-02, 6.094e-02)); + r += mul(s4_1, M4(-3.113e-02, 7.467e-02, 2.200e-02, -6.583e-02, 2.668e-01, -7.219e-02, 1.316e-01, -9.913e-02, -4.762e-02, 4.021e-02, -2.226e-01, -1.450e-01, 9.304e-02, -8.959e-02, 1.760e-01, 2.760e-02)); + r += mul(s4_2, M4(2.147e-01, -8.705e-03, 4.760e-03, -2.295e-01, 9.373e-02, -8.850e-02, -3.576e-02, 2.126e-01, -1.836e-01, -1.994e-02, 2.873e-02, -2.957e-03, 1.928e-01, 4.375e-02, 6.175e-02, -5.182e-02)); + r += mul(s4_3, M4(-5.723e-02, -1.145e-01, 2.409e-01, 1.399e-01, 3.128e-02, 4.703e-03, -1.700e-01, -5.630e-02, -7.253e-02, -1.281e-01, -4.285e-03, 4.734e-02, -8.377e-02, -3.062e-01, 2.587e-01, 1.099e-01)); + r += mul(s4_4, M4(-7.614e-02, -9.925e-03, -8.455e-03, -4.035e-02, 2.741e-01, 7.492e-02, 6.086e-01, 1.481e-01, -1.592e-02, -7.169e-02, 1.228e-01, 1.198e-01, -1.680e-01, -2.289e-01, 4.790e-01, 3.902e-02)); + r += mul(s4_5, M4(-1.123e-01, 8.752e-02, 1.686e-02, -1.039e-01, -1.253e-01, -1.933e-02, 1.212e-03, 1.023e-01, 7.552e-02, -9.984e-02, 3.426e-02, 6.780e-02, 2.702e-02, -6.068e-02, 1.967e-01, 8.629e-02)); + r += mul(s4_6, M4(5.797e-02, -1.353e-01, 1.033e-01, -1.719e-01, -1.335e-01, 6.762e-02, 7.013e-02, -7.472e-02, 3.723e-02, -3.790e-02, 9.515e-02, 8.516e-02, -7.411e-02, -2.409e-01, -2.353e-02, 2.940e-01)); + r += mul(s4_7, M4(2.548e-02, 2.544e-03, -1.350e-02, -2.464e-01, -2.018e-01, -1.882e-02, -2.318e-01, -1.809e-01, -1.304e-02, 1.333e-01, 9.827e-02, 9.163e-02, -4.668e-02, -4.071e-02, -8.042e-03, -2.185e-03)); + r += mul(s4_8, M4(3.587e-02, 1.501e-01, -5.163e-01, 1.042e-01, -6.538e-02, -1.005e-01, -4.472e-02, 4.179e-02, 1.050e-01, -4.198e-02, 2.039e-01, 2.244e-02, -1.004e-01, -1.028e-01, 2.824e-02, 8.763e-02)); + r += mul(s5_0, M4(7.821e-02, 7.323e-02, -3.199e-02, -1.595e-01, 3.097e-01, -3.831e-02, 1.785e-02, -2.153e-01, 1.812e-01, -4.676e-02, 1.000e-01, 2.932e-02, -9.241e-02, 9.792e-02, 7.440e-02, -3.885e-02)); + r += mul(s5_1, M4(-3.028e-01, -3.757e-02, -5.032e-02, 2.849e-01, -2.772e-01, 8.332e-02, -2.052e-01, 5.729e-02, 6.911e-02, -7.431e-02, 1.082e-01, 1.315e-01, 9.834e-02, 3.055e-02, -1.315e-01, 5.726e-03)); + r += mul(s5_2, M4(-1.636e-01, 1.645e-01, -4.554e-02, -2.774e-01, -2.691e-02, -5.220e-02, 9.571e-02, -3.884e-02, 4.966e-02, 2.502e-02, -9.109e-02, -2.851e-02, 6.685e-02, 3.636e-02, 5.882e-02, -1.581e-01)); + r += mul(s5_3, M4(-1.163e-01, 2.638e-02, -1.828e-01, -2.552e-01, 3.410e-03, -6.034e-03, 4.494e-02, 1.072e-01, 2.371e-01, -2.228e-01, -1.056e-02, -1.276e-02, 3.036e-02, 2.416e-01, -3.011e-01, -2.545e-01)); + r += mul(s5_4, M4(1.485e-01, 2.530e-01, -2.858e-01, 4.832e-01, -5.213e-02, -2.858e-01, -1.698e-01, -1.453e-01, -1.771e-01, 1.007e-01, -1.706e-01, -1.012e-01, 3.282e-02, 5.874e-02, -5.467e-01, 3.825e-02)); + r += mul(s5_5, M4(-3.837e-01, 3.641e-03, 5.814e-01, 4.933e-01, 3.131e-02, 8.307e-02, -3.989e-02, 5.503e-02, 6.367e-02, -4.050e-02, 7.006e-02, 2.848e-02, -4.190e-02, 1.647e-01, -9.264e-02, -3.662e-02)); + r += mul(s5_6, M4(-9.389e-02, -2.259e-01, 1.881e-01, -7.548e-02, 3.967e-02, 8.459e-02, 1.937e-01, 2.617e-02, -1.179e-01, -1.718e-01, 1.474e-01, 1.257e-01, 3.605e-02, -2.829e-02, 2.272e-02, -3.789e-01)); + r += mul(s5_7, M4(-2.656e-02, -1.844e-01, 3.402e-01, 4.901e-01, -5.836e-02, 3.312e-02, -2.004e-01, -4.783e-02, -2.746e-01, 1.515e-01, 3.878e-02, -7.158e-02, -1.820e-02, 3.105e-01, -1.832e-02, -1.099e-03)); + r += mul(s5_8, M4(4.523e-01, 1.502e-01, -2.123e-01, 2.320e-01, 2.085e-01, 5.715e-02, 3.191e-02, 7.901e-02, -8.453e-02, 5.544e-02, 4.279e-03, -1.328e-01, -9.691e-02, -2.488e-02, -4.719e-02, -1.008e-01)); + r += mul(s6_0, M4(4.345e-01, -8.277e-02, -7.012e-02, 8.223e-02, -1.071e-01, 1.610e-01, 1.502e-01, 4.634e-02, -1.456e-01, 2.283e-02, 1.598e-01, 9.160e-02, 1.612e-02, -2.086e-01, -7.139e-02, -2.495e-01)); + r += mul(s6_1, M4(3.248e-01, -1.401e-01, -2.566e-02, 1.442e-01, 1.050e-01, -5.083e-02, 1.393e-01, 3.161e-02, -1.916e-01, 2.434e-02, 2.304e-01, -1.068e-02, -1.549e-01, 2.850e-01, -3.977e-02, 1.540e-01)); + r += mul(s6_2, M4(1.457e-02, 2.612e-02, -2.899e-02, -2.297e-01, 4.891e-02, -6.237e-02, -3.175e-02, 2.566e-02, 1.602e-01, 1.467e-02, -8.463e-02, -1.173e-01, 3.783e-02, -3.369e-03, -5.365e-02, -1.653e-03)); + r += mul(s6_3, M4(-1.816e-01, -1.660e-02, 1.077e-01, -3.667e-01, 2.299e-01, 2.353e-01, -3.491e-01, -3.060e-02, 1.803e-01, -9.714e-02, 7.105e-02, 9.895e-02, 1.764e-01, 3.558e-02, 9.593e-02, 8.519e-02)); + r += mul(s6_4, M4(1.539e-01, 1.200e-01, -1.353e-01, 1.529e-01, -9.333e-02, -2.507e-01, 3.316e-01, -1.001e-01, 1.263e-01, 3.118e-02, -1.714e-01, 4.910e-01, 1.587e-01, -5.905e-02, -1.439e-01, -1.274e-01)); + r += mul(s6_5, M4(-1.838e-01, -2.972e-02, -9.090e-02, 1.057e-01, 1.846e-01, -7.972e-02, -9.290e-02, 1.015e-01, 2.065e-01, 4.804e-02, -1.697e-01, -3.964e-01, 5.007e-02, 5.312e-02, -3.128e-02, 9.884e-02)); + r += mul(s6_6, M4(-3.086e-02, -6.920e-02, -9.317e-02, 4.002e-02, -2.568e-01, 2.163e-01, -7.020e-02, -5.020e-02, 1.172e-01, -1.631e-01, -2.243e-01, -1.067e-01, 3.532e-02, -3.497e-01, 1.851e-01, -1.516e-01)); + r += mul(s6_7, M4(2.297e-01, 2.157e-01, 7.460e-02, -8.860e-02, 8.448e-02, -3.654e-02, -2.066e-01, 1.232e-01, -2.739e-01, 2.449e-01, -2.853e-01, -1.242e-01, -1.292e-01, 8.422e-02, -2.224e-01, -1.715e-02)); + r += mul(s6_8, M4(6.990e-02, 7.739e-02, 2.214e-01, 6.205e-03, 2.966e-02, -2.918e-02, -3.100e-01, -3.494e-02, -2.535e-01, -6.415e-02, -3.804e-02, 7.443e-02, 1.881e-01, -4.946e-02, -1.682e-02, 4.035e-02)); + r += mul(s7_0, M4(-3.276e-02, 5.851e-02, -7.947e-02, 1.102e-01, 1.314e-01, -2.101e-02, 4.667e-03, 1.552e-01, 1.391e-01, 9.108e-02, -5.034e-02, -1.252e-01, 1.662e-01, -2.567e-01, 2.469e-02, -2.212e-01)); + r += mul(s7_1, M4(-6.053e-02, -5.261e-02, -6.294e-02, -2.328e-01, 3.425e-01, 1.057e-01, 7.203e-02, -3.723e-01, 2.148e-02, 3.648e-02, -1.026e-01, 4.757e-02, 5.726e-02, 5.349e-02, 1.339e-01, -1.629e-01)); + r += mul(s7_2, M4(-3.005e-03, 8.149e-02, 1.175e-01, -6.981e-02, 7.763e-02, -7.641e-02, 2.218e-01, -1.141e-01, 7.203e-02, -5.508e-02, -8.120e-02, 8.905e-02, 1.130e-01, -1.605e-01, 1.333e-01, -4.009e-02)); + r += mul(s7_3, M4(8.709e-02, -1.158e-02, 6.767e-03, 4.893e-02, 3.468e-01, -1.774e-01, 2.457e-01, -2.309e-01, -2.759e-01, -1.656e-01, 5.379e-03, 5.537e-02, -2.075e-01, -3.874e-01, 9.547e-02, 1.176e-01)); + r += mul(s7_4, M4(1.664e-01, -6.784e-03, 1.283e-01, -5.551e-02, 5.972e-01, -1.779e-01, 7.139e-01, -3.232e-02, -1.695e-01, 1.692e-01, -1.356e-01, 5.937e-02, -1.303e-01, -5.354e-02, 1.923e-01, -6.313e-03)); + r += mul(s7_5, M4(1.809e-01, -5.603e-02, -1.256e-01, 1.202e-01, 2.529e-01, 4.616e-02, -5.929e-02, 5.680e-02, -2.929e-01, 7.123e-02, 1.358e-01, -3.910e-02, -2.787e-01, 2.232e-02, 5.208e-02, -1.017e-01)); + r += mul(s7_6, M4(-2.591e-01, -6.096e-02, 8.695e-02, 1.089e-01, 1.466e-01, -6.621e-01, 2.940e-01, 1.774e-01, 2.114e-01, 4.313e-02, 1.019e-01, -3.813e-02, 1.112e-02, -1.973e-01, 1.290e-01, -1.048e-02)); + r += mul(s7_7, M4(3.255e-02, -2.408e-02, -1.358e-01, -8.922e-02, 2.999e-01, -8.591e-02, -1.777e-01, 2.427e-01, 1.476e-01, -1.198e-01, 1.215e-01, -7.049e-02, -1.513e-01, 2.305e-02, 1.597e-01, 3.870e-02)); + r += mul(s7_8, M4(-1.616e-01, 6.090e-02, 7.322e-02, 3.689e-02, 1.026e-02, -5.670e-02, 5.745e-02, 1.727e-01, 9.462e-02, 6.318e-02, 1.639e-01, -1.412e-02, -2.783e-01, -1.519e-01, -1.171e-01, 1.104e-03)); + r += V4(-8.533e-04, -5.599e-03, -1.843e-02, 5.456e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.560e-02, -7.087e-03, -1.410e-01, 9.884e-02, -3.941e-02, 4.886e-03, 9.250e-02, -1.153e-01, 1.099e-01, 1.787e-02, 7.890e-02, 7.949e-02, 4.733e-02, 4.525e-02, -1.421e-01, 1.549e-01)); + r += mul(s0_1, M4(-1.759e-02, 3.651e-02, -2.068e-01, -6.240e-02, -1.894e-01, -6.618e-02, -5.977e-02, 1.928e-01, 2.456e-02, -4.077e-02, 1.906e-02, 7.152e-02, 2.549e-01, 1.165e-01, 2.430e-01, 1.737e-01)); + r += mul(s0_2, M4(1.805e-03, -2.171e-03, -1.793e-01, -2.124e-01, 1.723e-01, 2.540e-02, -5.076e-03, 8.146e-02, 5.550e-03, 1.639e-02, -1.948e-01, 3.635e-03, -4.027e-02, -3.399e-02, 2.736e-01, 6.094e-02)); + r += mul(s0_3, M4(-2.643e-02, -3.043e-02, 1.180e-01, -8.517e-02, 2.385e-02, 4.729e-02, 3.137e-02, 9.062e-02, -1.664e-01, -3.938e-02, -2.115e-02, -4.653e-03, 2.821e-01, 9.189e-02, -2.044e-02, -2.310e-02)); + r += mul(s0_4, M4(5.197e-01, 1.222e-01, 6.437e-02, 1.062e-01, -3.377e-01, 4.647e-02, -9.965e-02, -7.055e-02, 1.566e-02, 5.988e-02, 2.100e-02, -5.248e-02, -3.215e-01, -1.829e-01, -1.951e-01, -8.805e-02)); + r += mul(s0_5, M4(-1.388e-01, 6.670e-02, 2.007e-01, -8.099e-02, 8.637e-02, -5.086e-02, 3.154e-02, -3.167e-03, 6.398e-02, -5.311e-03, 1.620e-01, -7.674e-02, -3.382e-01, -1.289e-01, 5.176e-01, -1.229e-01)); + r += mul(s0_6, M4(5.681e-02, -2.286e-02, 3.962e-02, -4.412e-02, -1.683e-01, 3.669e-02, 1.467e-01, -2.707e-03, -1.067e-01, -3.951e-03, -4.406e-02, 5.629e-02, -3.607e-02, -1.522e-02, -1.369e-01, -5.543e-02)); + r += mul(s0_7, M4(2.579e-01, 1.184e-01, 8.497e-02, 1.449e-01, -5.598e-02, 3.054e-02, -8.097e-02, -1.113e-01, 2.554e-01, -5.213e-02, -3.915e-02, -6.455e-02, -3.335e-01, 1.170e-01, -9.548e-02, 2.320e-02)); + r += mul(s0_8, M4(-2.914e-01, -1.802e-01, 2.361e-02, -7.381e-02, 1.470e-01, 5.039e-02, -7.519e-02, -4.570e-03, -1.217e-01, 1.500e-02, 6.749e-02, 7.562e-02, -3.426e-01, -1.080e-01, 6.514e-02, 6.881e-02)); + r += mul(s1_0, M4(1.618e-01, -3.931e-02, -2.383e-02, 1.431e-01, 5.090e-02, -1.330e-02, -1.028e-01, 6.260e-02, -2.058e-01, 7.500e-03, 1.932e-01, -7.916e-02, -1.371e-01, -8.696e-02, -4.685e-02, -2.273e-02)); + r += mul(s1_1, M4(-3.040e-01, -5.094e-02, 9.008e-02, -1.285e-01, 7.680e-01, -9.636e-02, -2.345e-02, -2.210e-01, 4.076e-02, 1.152e-01, 3.239e-01, -7.640e-01, 1.565e-02, -5.839e-02, 2.723e-01, 2.319e-01)); + r += mul(s1_2, M4(4.965e-03, 1.240e-01, 2.817e-01, 1.224e-01, 7.815e-02, 4.454e-02, -7.984e-03, -2.240e-01, -3.825e-01, 7.662e-02, -1.033e-01, -2.338e-01, 2.427e-02, 1.267e-01, 1.043e-01, 3.834e-01)); + r += mul(s1_3, M4(-8.741e-02, -2.785e-02, 1.292e-01, -9.184e-02, 7.198e-02, -5.808e-02, -1.383e-01, 4.611e-02, 8.833e-02, -2.850e-02, -1.079e-01, -6.543e-02, 2.152e-02, -8.040e-02, 4.714e-02, -1.107e-01)); + r += mul(s1_4, M4(-1.832e-01, 4.999e-02, 2.310e-01, 2.137e-01, 2.118e-01, -4.953e-02, -7.942e-04, -2.041e-02, 9.558e-02, -1.278e-02, -2.033e-01, 1.049e-01, -1.479e-01, -2.008e-01, -5.131e-02, -2.963e-01)); + r += mul(s1_5, M4(-6.279e-02, 2.429e-01, 4.757e-03, -1.206e-01, -1.910e-01, -1.530e-01, 6.063e-01, -2.034e-01, 1.171e-01, 6.146e-02, -3.540e-02, 2.388e-01, 4.859e-01, 1.376e-01, -6.543e-01, 1.784e-01)); + r += mul(s1_6, M4(-1.344e-01, 1.360e-02, 9.947e-02, 1.474e-03, 1.993e-02, 3.195e-03, 4.908e-02, -3.237e-02, 1.468e-01, 8.365e-02, 1.188e-01, -1.473e-01, -4.335e-02, 2.327e-02, -5.855e-02, -1.053e-01)); + r += mul(s1_7, M4(-4.558e-03, 1.134e-02, -5.775e-02, 8.397e-02, 1.323e-01, 3.408e-02, 1.067e-01, 1.505e-02, -1.862e-01, -2.054e-01, 1.248e-01, 1.360e-01, -1.162e-01, 5.213e-02, 5.240e-03, 8.693e-02)); + r += mul(s1_8, M4(1.518e-01, 2.164e-01, 8.775e-02, -5.471e-02, -8.322e-03, 8.992e-02, -5.009e-03, -9.936e-02, -8.858e-02, -1.484e-01, 1.137e-01, -4.466e-01, 3.340e-01, 6.147e-02, -1.144e-01, -8.945e-02)); + r += mul(s2_0, M4(-4.849e-02, -1.255e-02, -1.621e-01, -7.859e-02, -2.514e-01, -1.148e-02, -9.760e-02, -1.137e-01, 2.550e-01, 1.699e-02, -8.587e-02, 7.128e-02, 7.682e-02, -7.427e-02, -8.652e-02, 3.613e-01)); + r += mul(s2_1, M4(2.779e-02, 7.570e-03, -1.344e-01, 1.296e-02, -1.083e-02, 3.117e-02, -1.328e-01, -3.693e-02, 2.474e-01, 1.763e-01, 1.408e-02, 1.362e-01, -2.391e-02, -2.802e-01, -4.279e-02, 3.188e-01)); + r += mul(s2_2, M4(-1.033e-01, -3.043e-02, -7.807e-02, -5.715e-02, 1.855e-01, -1.128e-01, 1.395e-02, -9.351e-02, -2.720e-01, 2.711e-01, -4.261e-02, 6.892e-02, 1.308e-01, -1.577e-01, 1.907e-01, 4.823e-01)); + r += mul(s2_3, M4(-1.561e-01, -2.317e-02, 2.741e-02, -1.019e-01, -2.797e-02, 7.486e-02, -6.564e-02, 2.268e-02, 2.078e-01, -2.920e-01, -1.632e-01, -5.214e-02, -2.783e-01, -3.381e-01, 2.476e-01, -4.718e-01)); + r += mul(s2_4, M4(2.369e-01, 3.477e-02, 2.674e-01, 2.764e-01, 1.127e-02, 2.274e-02, 5.289e-02, 1.770e-01, -1.297e-03, 1.805e-02, 1.734e-01, 3.586e-03, 1.278e-01, -6.466e-01, -5.282e-03, -3.198e-01)); + r += mul(s2_5, M4(3.002e-02, 1.447e-01, 2.242e-01, 3.591e-02, 1.510e-02, 5.069e-02, 1.510e-01, 1.019e-01, 9.118e-02, -4.876e-02, -1.372e-01, 2.119e-01, 5.599e-01, -2.415e-01, -2.045e-01, -5.714e-01)); + r += mul(s2_6, M4(-6.998e-02, -3.651e-02, 2.052e-03, -1.139e-01, 5.134e-02, -6.756e-02, -6.024e-02, -1.392e-01, 1.954e-01, 9.353e-02, 1.594e-02, -2.237e-01, -1.781e-01, -1.366e-01, -2.123e-01, 2.311e-02)); + r += mul(s2_7, M4(1.693e-01, -9.344e-02, 1.462e-01, 1.420e-01, -8.540e-02, 4.959e-02, -4.992e-02, 1.013e-01, -5.185e-01, 9.739e-02, -5.330e-02, -1.208e-01, -1.194e-01, -2.685e-01, -2.982e-01, -1.289e-01)); + r += mul(s2_8, M4(1.141e-01, -5.735e-02, 8.146e-02, -2.221e-02, 9.828e-02, -5.525e-02, 6.784e-02, -9.348e-02, -1.546e-01, 3.030e-01, -2.793e-02, 1.449e-02, -3.587e-01, -1.617e-01, 1.805e-01, 2.176e-01)); + r += mul(s3_0, M4(-8.667e-03, -5.350e-02, -3.774e-02, 7.121e-02, 1.094e-01, 2.259e-02, 5.645e-01, 4.007e-02, 6.136e-03, 2.721e-02, 1.573e-02, -1.529e-02, 3.419e-02, -1.128e-01, 1.004e-01, 6.939e-02)); + r += mul(s3_1, M4(3.826e-02, -2.135e-02, -3.854e-01, -4.123e-01, 1.643e-01, 8.967e-02, 7.549e-02, -3.133e-01, -8.262e-02, -2.612e-02, 1.106e-01, 1.795e-01, -7.740e-02, -3.704e-02, 2.029e-01, 1.461e-02)); + r += mul(s3_2, M4(-6.023e-02, 6.654e-02, -1.361e-01, -1.431e-01, 7.496e-02, 1.351e-01, 1.292e-01, 1.911e-01, -6.692e-02, -3.242e-02, 3.928e-02, -8.824e-03, -1.226e-03, -4.044e-02, 1.344e-01, 1.214e-01)); + r += mul(s3_3, M4(-3.305e-02, -1.101e-01, -1.894e-01, -3.469e-01, -1.210e-01, -3.212e-02, 2.904e-01, -2.047e-01, -1.211e-03, -7.072e-02, -1.544e-02, -5.603e-02, 1.195e-01, -2.336e-01, -5.574e-02, -3.214e-02)); + r += mul(s3_4, M4(-2.567e-01, 1.577e-01, -2.711e-01, -1.814e-01, 2.629e-01, -1.322e-01, 4.110e-01, -2.530e-01, 1.157e-01, -6.624e-02, -6.523e-02, 9.403e-02, -3.037e-02, -1.903e-01, -4.489e-02, -6.323e-02)); + r += mul(s3_5, M4(3.183e-01, 1.222e-01, -1.796e-01, -6.899e-01, -3.581e-02, 7.077e-02, 1.821e-01, -2.122e-01, -3.829e-03, 3.924e-02, 4.984e-02, 3.319e-03, -1.210e-02, -1.916e-01, -6.451e-02, -5.390e-02)); + r += mul(s3_6, M4(9.451e-02, 1.416e-01, 2.363e-01, -2.431e-03, -4.550e-01, 9.670e-02, 4.511e-01, -2.440e-01, -1.039e-02, -7.438e-03, -5.394e-02, -1.583e-01, 5.988e-02, -1.223e-01, -6.949e-02, -7.947e-02)); + r += mul(s3_7, M4(-4.528e-01, 1.578e-01, -2.586e-01, 9.291e-02, -4.691e-02, 1.333e-01, 7.600e-02, -6.177e-03, 1.024e-02, -2.282e-02, -7.689e-02, -6.901e-02, -6.665e-02, -4.332e-02, -1.728e-01, -6.849e-02)); + r += mul(s3_8, M4(1.362e-01, 2.879e-02, -1.518e-01, -8.947e-02, -9.332e-02, 1.320e-01, 1.138e-01, -1.209e-01, -4.019e-02, 1.469e-01, 1.528e-03, -1.214e-01, -5.435e-02, -1.586e-01, -2.739e-02, 6.551e-02)); + r += mul(s4_0, M4(-1.033e-01, -6.952e-02, -1.560e-02, 1.402e-02, -3.370e-02, -7.858e-02, 2.946e-01, -9.622e-02, -4.151e-02, -3.780e-02, -1.132e-01, 1.453e-01, 1.965e-02, -3.571e-02, 9.255e-02, -1.576e-02)); + r += mul(s4_1, M4(-6.419e-02, 1.816e-02, -1.188e-01, -1.148e-01, -4.849e-02, 2.576e-03, 5.385e-01, 6.037e-03, 4.484e-02, -1.937e-02, -4.330e-01, -7.164e-02, -8.341e-02, 3.394e-02, -6.566e-02, 2.859e-01)); + r += mul(s4_2, M4(1.414e-01, 9.715e-03, -2.075e-02, 3.829e-02, -8.767e-02, -9.468e-02, 3.312e-01, -6.282e-03, 3.118e-02, 1.290e-02, -7.580e-03, 9.585e-02, 1.152e-01, -7.785e-02, 8.913e-02, 1.269e-01)); + r += mul(s4_3, M4(-1.300e-01, -8.615e-02, 2.134e-02, -2.515e-02, 4.651e-02, 2.008e-01, -6.371e-02, -1.236e-02, 2.614e-01, -1.788e-01, -1.925e-02, -1.645e-02, -1.069e-01, -6.968e-02, 6.480e-02, 3.090e-02)); + r += mul(s4_4, M4(1.352e-01, -1.886e-02, -1.145e-01, 4.115e-02, -3.652e-01, -8.227e-02, 5.339e-02, -2.728e-01, 1.191e-01, 2.115e-02, 2.001e-01, -1.219e-02, -5.189e-01, -9.690e-03, 9.258e-02, -7.153e-02)); + r += mul(s4_5, M4(-3.574e-02, 6.680e-02, -2.589e-02, -3.711e-02, -1.948e-01, 8.654e-02, 2.185e-01, -1.511e-01, -9.703e-02, 1.522e-01, -2.773e-01, 1.940e-01, -7.396e-02, 1.386e-01, 2.249e-01, -3.014e-02)); + r += mul(s4_6, M4(-1.586e-01, -3.006e-02, 4.357e-02, 3.494e-02, 1.060e-01, 6.097e-02, -4.613e-02, 1.076e-01, 1.332e-01, 1.145e-01, -5.582e-02, 1.621e-01, -1.310e-02, -2.543e-02, -1.657e-02, -2.863e-02)); + r += mul(s4_7, M4(1.932e-01, 6.256e-02, 1.217e-01, -8.493e-02, -4.115e-01, 1.171e-03, 1.135e-01, -7.224e-02, -1.774e-02, 9.645e-02, -1.115e-01, 6.669e-02, -5.159e-02, 6.090e-02, -4.863e-02, -1.897e-02)); + r += mul(s4_8, M4(-7.878e-02, -9.714e-02, 1.048e-01, 9.664e-03, 8.354e-02, 2.088e-02, 2.220e-02, 2.531e-02, -6.113e-02, 3.972e-02, -9.207e-02, 4.613e-02, 1.827e-01, 7.274e-02, -2.476e-01, -5.481e-02)); + r += mul(s5_0, M4(1.970e-01, 1.672e-01, 2.433e-01, -8.995e-02, 7.859e-02, -3.751e-02, -3.454e-02, 2.298e-01, -1.324e-01, -1.465e-02, -8.560e-02, -2.178e-01, -3.343e-02, 6.746e-02, 8.993e-02, -4.901e-02)); + r += mul(s5_1, M4(-6.850e-03, -1.971e-01, -5.692e-02, 7.432e-02, 3.548e-01, -1.108e-01, 3.175e-01, 1.081e+00, 5.009e-03, -4.461e-02, 2.832e-02, -2.137e-01, 2.470e-01, 5.725e-02, -1.697e-02, -1.859e-02)); + r += mul(s5_2, M4(5.239e-02, 6.549e-02, -2.298e-01, -1.966e-01, -9.228e-02, -1.393e-01, 1.912e-01, 4.543e-01, -1.425e-02, 9.029e-02, -9.371e-02, -2.144e-01, 7.205e-02, 2.581e-02, -1.731e-01, -2.638e-01)); + r += mul(s5_3, M4(9.616e-02, 1.156e-01, -4.901e-02, 3.704e-02, -1.969e-02, 3.668e-02, -7.894e-02, 4.214e-03, -1.831e-01, 2.568e-02, 1.322e-02, 4.639e-02, -7.497e-02, 6.491e-02, -5.406e-03, -1.238e-01)); + r += mul(s5_4, M4(-1.216e-01, 2.233e-01, -8.263e-02, -1.517e-01, 2.918e-01, -5.018e-02, -1.391e-01, 2.899e-02, -1.208e-01, -1.490e-01, -2.105e-01, -5.076e-02, -8.915e-03, 4.420e-03, -3.801e-01, -5.936e-03)); + r += mul(s5_5, M4(8.879e-02, 2.007e-01, -2.374e-02, 4.744e-01, -2.168e-01, 8.071e-02, 6.015e-03, 8.826e-02, 1.548e-01, -1.410e-01, 1.649e-01, -1.502e-01, 4.645e-03, 8.603e-02, 4.699e-02, 1.293e-02)); + r += mul(s5_6, M4(2.715e-01, 2.036e-01, -1.004e-01, 2.508e-01, 5.107e-02, 1.315e-01, 6.567e-02, 6.999e-02, 1.513e-02, -1.198e-01, -3.505e-02, 1.060e-01, 8.095e-02, -3.236e-03, 2.448e-01, 7.195e-03)); + r += mul(s5_7, M4(-7.150e-01, 2.499e-01, -3.129e-01, 2.092e-01, 2.434e-02, -6.082e-03, -5.837e-02, 2.251e-02, -1.022e-01, -2.280e-02, -2.848e-01, -1.327e-02, 4.425e-02, 6.298e-02, 1.132e-02, -9.445e-02)); + r += mul(s5_8, M4(-5.568e-02, -6.042e-02, 1.757e-01, -5.052e-02, -5.824e-02, 6.717e-03, -8.544e-02, -1.293e-02, -8.259e-02, 5.506e-02, 9.705e-02, 9.304e-02, -1.503e-01, -8.160e-02, 1.082e-01, -1.377e-01)); + r += mul(s6_0, M4(3.476e-01, 9.336e-02, 2.231e-02, 1.022e-02, -3.424e-02, -7.095e-02, 3.900e-02, 9.158e-02, -1.061e-01, -5.546e-02, -9.304e-02, -5.883e-02, 1.331e-01, 7.858e-02, 1.167e-01, 7.884e-02)); + r += mul(s6_1, M4(-1.835e-02, -1.974e-01, -1.369e-01, 2.903e-01, -1.457e-02, -2.445e-02, 1.877e-02, 1.939e-01, -9.732e-02, 3.776e-02, -1.752e-01, -2.629e-01, 2.034e-01, -6.453e-02, 2.623e-01, -2.109e-01)); + r += mul(s6_2, M4(-3.623e-02, 5.361e-02, -2.681e-01, 1.447e-01, -3.448e-02, 4.750e-02, 9.537e-02, 1.124e-03, 1.402e-01, 1.087e-01, -1.424e-01, -1.509e-01, 3.056e-02, -5.132e-02, -5.697e-03, -1.327e-02)); + r += mul(s6_3, M4(-4.655e-02, -4.782e-02, 2.343e-01, 8.769e-02, 1.124e-01, -2.491e-03, -2.597e-02, -6.833e-02, -1.243e-01, 4.811e-02, -3.578e-01, 2.454e-02, -1.434e-01, 5.503e-03, -6.405e-02, 2.061e-02)); + r += mul(s6_4, M4(-2.463e-01, -8.253e-02, 3.349e-01, -2.741e-01, -1.254e-02, 3.522e-02, -1.978e-01, 1.421e-03, -1.022e-02, 1.297e-02, -9.613e-02, -2.839e-01, 1.682e-01, 3.789e-02, -1.225e-01, -1.144e-01)); + r += mul(s6_5, M4(-1.381e-01, 1.578e-01, 1.174e-01, 9.555e-02, 1.071e-01, -1.781e-02, 4.394e-02, 3.225e-02, 1.769e-01, -1.736e-01, -3.724e-01, -4.223e-01, -1.162e-01, 3.163e-02, -5.523e-02, -9.306e-02)); + r += mul(s6_6, M4(5.363e-02, 2.286e-02, -1.182e-01, 2.210e-01, -7.831e-02, 3.203e-03, 7.760e-02, 3.020e-03, 2.411e-01, -5.532e-02, -2.875e-02, 1.754e-01, 1.305e-01, -2.458e-03, 1.417e-01, 6.157e-03)); + r += mul(s6_7, M4(-1.955e-01, -1.845e-01, 8.119e-02, -1.347e-01, -3.374e-02, 1.839e-02, 3.874e-03, -1.838e-01, -4.347e-01, -2.846e-01, -1.806e-01, 5.604e-02, 1.243e-01, -2.117e-02, -5.486e-02, 6.884e-03)); + r += mul(s6_8, M4(7.283e-02, -1.282e-01, -2.823e-01, 5.028e-02, -1.353e-01, 1.419e-01, -5.407e-02, -6.650e-03, 3.999e-01, -1.855e-01, -2.906e-01, -3.688e-02, -1.405e-01, 1.427e-02, -2.796e-02, -4.579e-02)); + r += mul(s7_0, M4(9.975e-02, 7.898e-02, 1.381e-01, 8.796e-02, 2.603e-01, -8.414e-02, -1.423e-01, -9.692e-03, -1.139e-01, 6.085e-02, -3.823e-03, -7.671e-02, 1.877e-02, -8.808e-03, 2.768e-02, 2.780e-01)); + r += mul(s7_1, M4(8.771e-02, 1.460e-02, 1.628e-02, 5.329e-03, 1.449e-01, -3.699e-01, 3.923e-01, 2.301e-02, -1.277e-01, -2.741e-02, -1.047e-01, 5.968e-02, -5.592e-02, 7.473e-02, 2.236e-01, 6.206e-01)); + r += mul(s7_2, M4(-1.469e-01, -7.466e-02, -5.950e-02, -1.130e-01, 2.839e-02, -2.606e-02, 1.334e-01, 1.732e-01, 3.303e-02, -9.745e-02, -1.245e-01, 6.786e-02, 1.955e-01, -5.324e-02, 2.093e-01, 1.236e-01)); + r += mul(s7_3, M4(-1.293e-01, -2.116e-02, -3.673e-02, -9.411e-02, -5.480e-01, 1.889e-01, 7.690e-02, 1.970e-01, -4.543e-02, 5.556e-02, -6.816e-02, 1.532e-02, -2.292e-03, -1.880e-02, 7.113e-02, 2.672e-02)); + r += mul(s7_4, M4(6.920e-02, -4.618e-02, -6.882e-02, 3.691e-02, -8.673e-02, -1.959e-01, -3.585e-01, -9.090e-02, 2.925e-01, 3.724e-02, 2.743e-01, 3.540e-01, -2.959e-01, -3.552e-02, -4.064e-02, -1.069e-01)); + r += mul(s7_5, M4(-3.202e-02, 2.347e-02, 7.362e-02, 2.157e-02, -7.213e-02, -1.073e-01, 2.008e-01, -2.052e-01, -1.134e-01, -2.505e-02, 1.241e-02, 1.802e-01, -1.081e-01, 5.897e-02, 1.798e-02, 7.188e-02)); + r += mul(s7_6, M4(-5.356e-04, 6.207e-02, 8.194e-02, 8.600e-03, -2.790e-01, 2.941e-01, -4.157e-02, -2.512e-01, 8.039e-02, -1.770e-02, -5.099e-02, 3.033e-02, -8.296e-02, -2.268e-02, -1.175e-01, 1.419e-02)); + r += mul(s7_7, M4(-4.728e-03, -4.249e-02, -9.443e-02, 2.155e-03, -1.180e-01, -1.111e-01, 5.674e-02, 1.564e-01, 3.111e-01, 4.206e-02, 1.174e-01, 2.935e-02, 5.181e-02, -9.064e-02, 8.466e-02, -9.002e-03)); + r += mul(s7_8, M4(1.323e-01, 8.600e-02, -3.627e-02, 3.855e-02, -5.842e-02, 3.473e-01, -2.465e-02, -1.118e-01, -6.674e-02, 4.149e-03, 6.115e-04, 4.737e-02, 1.022e-01, 4.757e-02, 2.210e-02, -5.416e-02)); + r += V4(-3.625e-03, -6.509e-02, -2.543e-02, 2.655e-02); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(4.835e-02, -2.396e-02, 1.538e-01, -1.119e-02, -7.433e-03, 2.244e-02, -4.176e-01, 3.162e-02, 2.272e-02, 2.109e-02, 2.717e-02, 1.403e-01, 4.632e-01, 8.008e-02, -3.298e-01, -1.300e-01)); + r += mul(s0_1, M4(-7.625e-02, -1.928e-02, -3.921e-02, -5.083e-02, -1.915e-01, 1.015e-01, -4.724e-01, 5.635e-02, -1.024e-02, 9.931e-03, -8.880e-02, 4.998e-02, 6.319e-01, -1.827e-01, 1.566e-01, -3.693e-01)); + r += mul(s0_2, M4(-6.468e-02, -1.760e-01, -7.115e-02, -4.285e-02, -2.285e-01, 8.701e-02, -2.009e-01, -1.627e-01, -8.255e-02, -5.679e-02, 5.767e-03, 2.168e-01, 4.349e-02, 3.930e-02, 4.913e-02, -1.117e-01)); + r += mul(s0_3, M4(1.302e-01, 5.135e-02, 4.496e-02, -3.394e-02, -7.374e-02, -4.251e-02, -5.037e-02, -2.096e-02, 3.509e-02, -1.396e-01, 1.593e-01, 6.313e-02, 9.144e-03, 6.799e-02, -2.578e-01, 1.379e-02)); + r += mul(s0_4, M4(-4.673e-02, -3.859e-02, 7.260e-03, -1.913e-01, 9.779e-02, -1.876e-02, -1.807e-01, -1.189e-02, -4.810e-02, 1.021e-01, 6.610e-02, 5.553e-02, -3.128e-02, 3.649e-01, 2.145e-02, 1.857e-01)); + r += mul(s0_5, M4(-6.326e-02, -2.190e-02, -1.025e-01, 2.645e-01, 1.325e-01, -3.209e-02, 3.893e-03, 1.114e-01, -1.186e-01, -2.977e-02, -8.407e-02, -2.533e-01, 1.688e-01, -2.146e-02, 2.017e-01, -2.172e-01)); + r += mul(s0_6, M4(-2.134e-02, 2.436e-03, 7.637e-02, 1.031e-02, 1.962e-02, -2.824e-02, -1.031e-01, -1.505e-01, 4.045e-02, -3.927e-02, 4.998e-02, -1.292e-01, -7.246e-03, 1.795e-03, 7.585e-02, 4.875e-02)); + r += mul(s0_7, M4(-2.179e-02, -3.530e-02, -1.362e-01, -2.993e-02, 1.283e-01, 1.468e-02, 1.005e-01, 3.694e-01, 4.319e-02, 7.983e-02, 2.198e-02, -4.084e-02, -1.126e-02, 3.306e-02, 5.772e-02, -1.415e-01)); + r += mul(s0_8, M4(4.490e-02, -4.903e-02, 3.056e-02, 4.978e-02, -1.774e-02, -1.850e-02, 1.111e-01, -1.493e-02, 4.528e-02, 7.043e-02, 7.131e-02, 1.664e-01, 7.829e-02, -1.444e-02, 4.841e-02, -4.486e-02)); + r += mul(s1_0, M4(-3.755e-02, -4.232e-02, -5.924e-02, 6.035e-02, -1.205e-01, 1.010e-02, 2.912e-02, 1.513e-01, -8.444e-02, -4.351e-02, -5.507e-02, 9.106e-02, -2.273e-03, -5.590e-02, 8.234e-02, -8.793e-02)); + r += mul(s1_1, M4(1.390e-01, -5.847e-02, -1.205e-01, 3.187e-01, -2.568e-01, 3.647e-02, -4.785e-02, 1.482e-01, 8.543e-02, 1.616e-01, -1.851e-01, 5.716e-02, -4.927e-01, -1.209e-01, 3.762e-02, 1.529e-01)); + r += mul(s1_2, M4(-1.530e-01, -1.645e-01, -1.839e-02, 2.038e-01, 8.178e-02, 1.021e-02, 6.465e-02, 7.910e-02, 2.042e-02, -1.093e-01, 2.834e-03, -3.437e-02, -1.692e-01, -8.842e-02, 1.308e-01, 2.947e-01)); + r += mul(s1_3, M4(7.424e-02, -1.654e-01, -1.246e-01, 1.160e-01, 1.581e-01, -6.440e-03, -8.087e-03, 6.360e-02, 1.111e-01, 2.032e-02, 2.760e-02, 2.297e-01, -2.736e-02, 1.178e-02, 4.567e-02, 6.139e-02)); + r += mul(s1_4, M4(3.265e-01, -5.441e-01, 9.085e-02, -1.616e-01, -1.701e-02, -4.155e-02, 4.148e-02, -3.896e-01, 2.620e-01, 1.954e-01, 1.190e-01, -1.761e-01, -3.432e-01, 1.121e-01, -2.782e-01, 2.503e-01)); + r += mul(s1_5, M4(-3.005e-01, -3.291e-01, -2.238e-02, -3.021e-02, -1.015e-02, -7.567e-02, -1.607e-01, -5.375e-02, 1.436e-01, 1.396e-02, 9.834e-02, -1.777e-01, -1.109e-01, -9.568e-02, 4.459e-02, 2.166e-01)); + r += mul(s1_6, M4(-1.383e-02, -1.195e-01, -7.240e-02, -7.391e-02, 7.793e-02, -8.250e-04, 2.773e-02, -9.196e-02, 1.148e-01, 3.961e-02, 3.218e-02, -5.892e-02, -1.103e-02, 6.608e-02, -9.241e-03, 9.855e-02)); + r += mul(s1_7, M4(-2.428e-01, -2.303e-01, 2.808e-04, -2.284e-01, 1.944e-01, 7.671e-02, 1.247e-01, 2.123e-01, -6.229e-02, 3.342e-01, -4.740e-02, 1.074e-01, -9.819e-02, -3.941e-02, -3.164e-02, -4.836e-02)); + r += mul(s1_8, M4(1.763e-01, -1.885e-01, -5.378e-02, 2.569e-01, 5.091e-02, -2.026e-02, 1.619e-01, -4.407e-02, 5.194e-02, 5.987e-02, -6.887e-02, 2.649e-01, -8.527e-02, -2.827e-03, 2.306e-02, -5.023e-02)); + r += mul(s2_0, M4(-8.205e-03, 4.433e-03, -1.343e-01, 2.500e-02, 1.505e-01, 1.889e-02, -1.210e-01, -8.672e-02, 3.076e-02, 1.498e-02, -2.644e-01, 5.938e-02, -8.131e-02, -2.713e-02, 1.534e-01, -1.360e-01)); + r += mul(s2_1, M4(-2.952e-02, -1.140e-01, 3.189e-02, 2.388e-01, -1.578e-01, -7.641e-03, -5.162e-02, -2.394e-01, 2.268e-02, -4.927e-02, 8.503e-02, -1.354e-01, 1.159e-01, -3.340e-02, 4.248e-02, -1.120e-01)); + r += mul(s2_2, M4(-8.455e-02, -9.497e-02, 5.362e-03, 1.827e-01, 1.962e-02, 4.699e-02, 8.229e-03, 4.554e-02, -7.800e-02, -6.534e-03, -5.066e-02, 3.068e-02, -1.612e-01, -1.929e-02, -1.645e-01, 1.841e-01)); + r += mul(s2_3, M4(-1.360e-01, -1.711e-02, -6.952e-02, -1.146e-01, 6.530e-02, 3.795e-02, 1.130e-01, 9.324e-02, 1.226e-01, 6.978e-02, 9.599e-02, -7.250e-03, 2.414e-02, 2.477e-02, 9.773e-02, 1.837e-01)); + r += mul(s2_4, M4(2.329e-01, -6.908e-02, -3.555e-03, -2.507e-01, -7.980e-02, -1.374e-02, -2.531e-01, 2.446e-01, -1.155e-01, -1.329e-01, -1.561e-01, 1.295e-01, -8.048e-02, -9.703e-02, 1.904e-01, 2.686e-02)); + r += mul(s2_5, M4(-7.302e-02, -5.162e-02, -1.045e-01, -1.587e-01, 1.160e-01, -3.129e-02, -2.348e-02, -1.675e-01, 6.703e-02, 8.933e-02, -9.654e-02, -1.748e-02, -1.317e-01, -2.547e-02, 3.693e-03, -4.579e-01)); + r += mul(s2_6, M4(-2.708e-02, 2.805e-02, 1.276e-01, 6.081e-02, 1.260e-01, -1.918e-02, -1.428e-02, 4.839e-02, 1.033e-02, -1.845e-02, 4.060e-02, -1.122e-01, -2.161e-01, 7.566e-02, 8.280e-02, 5.468e-02)); + r += mul(s2_7, M4(-5.334e-02, -2.162e-01, 1.007e-01, 2.634e-01, -1.227e-01, -2.969e-02, -9.729e-03, 3.114e-02, 4.496e-03, 1.649e-02, 1.208e-01, 7.892e-02, -7.813e-02, -6.307e-02, -1.427e-01, -5.671e-02)); + r += mul(s2_8, M4(-1.008e-01, -3.837e-02, -3.730e-02, 6.125e-02, 3.003e-02, -2.922e-02, -1.998e-02, -1.995e-03, 1.952e-02, -1.100e-02, -1.260e-01, 1.306e-01, 1.291e-01, 3.207e-02, 5.305e-02, 1.342e-01)); + r += mul(s3_0, M4(3.873e-03, -1.372e-02, -1.029e-01, 1.221e-01, -4.917e-01, 4.365e-02, -1.042e-01, -2.129e-02, -2.043e-02, 4.700e-02, -2.792e-01, 7.757e-03, 7.957e-02, 1.361e-02, 1.071e-01, 7.693e-02)); + r += mul(s3_1, M4(9.662e-02, -2.932e-02, 1.941e-01, -3.194e-01, -1.324e-01, -1.779e-02, -1.697e-01, -2.333e-01, 6.349e-02, -1.091e-01, 1.962e-01, 9.707e-02, 4.680e-02, -2.846e-02, 2.182e-01, 6.872e-02)); + r += mul(s3_2, M4(4.693e-02, -3.250e-03, -4.054e-02, -2.195e-01, 5.278e-02, 1.146e-01, -3.060e-01, -1.790e-01, 4.953e-02, 2.110e-02, -5.913e-02, -2.237e-01, 4.825e-02, -1.640e-02, -1.017e-02, 2.968e-03)); + r += mul(s3_3, M4(-6.447e-02, 3.180e-02, 2.896e-02, 1.189e-01, 2.795e-01, 3.655e-03, -6.887e-02, 5.190e-02, -6.438e-02, 9.903e-04, 2.467e-01, -1.479e-01, 1.112e-01, -2.545e-02, 6.097e-02, -8.142e-02)); + r += mul(s3_4, M4(1.924e-01, 3.832e-02, -3.986e-01, 9.887e-02, 5.257e-01, -1.760e-01, -1.260e-01, 1.574e-01, 6.137e-01, -1.438e-01, -4.044e-01, -2.024e-01, 4.315e-02, -4.846e-02, 9.769e-02, 8.331e-02)); + r += mul(s3_5, M4(-2.651e-01, 4.248e-02, -2.012e-02, 5.405e-01, 8.924e-03, -5.811e-02, -1.290e-02, 4.415e-01, -2.601e-02, -2.126e-02, -1.961e-02, 3.511e-01, 4.215e-02, -2.716e-02, 1.296e-01, -2.416e-01)); + r += mul(s3_6, M4(-9.102e-02, 7.787e-02, 8.254e-02, 1.284e-01, 2.343e-02, 7.932e-02, 1.647e-01, 7.651e-02, 6.076e-02, 1.102e-01, -4.897e-02, 1.291e-02, -5.505e-02, 4.130e-02, 5.113e-04, -1.506e-02)); + r += mul(s3_7, M4(1.474e-01, -5.158e-02, 1.400e-01, -7.421e-02, 3.513e-02, 1.671e-03, 2.964e-01, 1.693e-01, -1.401e-01, 3.910e-01, 2.678e-03, -1.492e-01, 9.331e-02, 2.221e-02, -2.008e-02, -1.650e-01)); + r += mul(s3_8, M4(-5.802e-02, 4.015e-03, -7.727e-02, -1.935e-01, 7.799e-03, -2.203e-02, -6.666e-02, -1.774e-01, 4.786e-03, 1.240e-01, -1.484e-01, 4.574e-02, -1.314e-01, 3.668e-02, -3.512e-03, 2.067e-01)); + r += mul(s4_0, M4(-3.480e-02, -3.356e-02, -4.895e-02, -5.957e-03, -4.391e-02, 2.161e-02, -8.893e-02, 4.473e-03, -3.188e-01, 3.568e-02, -2.502e-02, 5.069e-02, -6.971e-02, 1.207e-02, -8.822e-02, -1.065e-02)); + r += mul(s4_1, M4(-4.001e-02, -1.151e-03, -1.282e-01, -1.111e-01, 7.041e-03, -5.520e-02, -2.813e-01, 2.801e-01, -2.163e-02, 1.550e-01, 2.525e-02, 9.990e-02, 1.432e-02, -8.062e-02, 1.754e-01, 7.733e-02)); + r += mul(s4_2, M4(1.065e-01, 4.473e-02, -1.780e-02, -1.057e-01, 2.457e-01, -6.964e-02, 2.837e-01, 5.544e-02, 1.114e-01, 1.061e-01, -4.014e-03, -7.439e-02, 6.132e-02, 2.903e-02, -2.439e-02, 3.338e-02)); + r += mul(s4_3, M4(2.458e-01, 2.098e-02, -1.032e-01, -5.928e-02, 1.101e-01, 6.145e-02, 3.779e-03, -5.586e-03, -1.783e-01, -1.598e-01, -1.364e-01, 1.479e-01, -8.708e-03, -2.184e-02, -7.759e-02, 1.388e-02)); + r += mul(s4_4, M4(-2.294e-02, -5.229e-02, 9.278e-02, 3.531e-02, -4.284e-02, 1.636e-01, 2.907e-02, -1.274e-01, 1.585e-01, -5.364e-02, 1.031e-01, -2.084e-01, -1.381e-01, 9.829e-02, 6.917e-02, 1.736e-01)); + r += mul(s4_5, M4(7.420e-02, 1.156e-01, 2.054e-02, -8.936e-02, 4.535e-02, -4.649e-02, -3.211e-02, 2.075e-01, 1.284e-01, 1.272e-03, 1.120e-01, 1.890e-01, -2.611e-03, 5.461e-02, 2.135e-02, 2.503e-02)); + r += mul(s4_6, M4(-1.240e-01, -1.611e-02, 5.571e-02, 1.172e-01, 7.590e-02, -1.411e-02, -8.898e-02, 1.471e-01, -5.932e-02, 2.798e-02, -7.635e-02, -3.789e-02, -1.260e-01, 2.772e-02, 1.101e-01, -8.736e-02)); + r += mul(s4_7, M4(-1.001e-01, 1.539e-02, 7.691e-02, 3.275e-03, -1.396e-01, -8.471e-02, 5.078e-02, -3.117e-03, -4.415e-02, 1.229e-01, 8.650e-02, 1.605e-01, 2.221e-01, -7.118e-02, -1.555e-01, 3.037e-02)); + r += mul(s4_8, M4(4.324e-02, -7.434e-02, 1.425e-01, 2.127e-02, -1.275e-01, 1.402e-02, 4.531e-02, -1.775e-01, 7.208e-03, 1.470e-01, -9.997e-02, 4.179e-02, 3.099e-02, 3.236e-03, 1.256e-02, -5.362e-02)); + r += mul(s5_0, M4(-5.499e-02, 1.089e-02, 1.966e-01, 1.265e-01, 1.598e-02, -8.382e-03, -5.048e-02, -1.358e-01, 1.039e-01, 5.560e-02, 5.535e-02, -5.814e-02, -2.833e-01, -8.683e-03, -1.797e-02, 2.128e-02)); + r += mul(s5_1, M4(-3.021e-01, -1.367e-01, -6.594e-02, -2.379e-01, -1.311e-01, -1.704e-02, 1.063e-01, -1.499e-01, -2.149e-02, 1.256e-01, 3.517e-02, -1.273e-01, 5.299e-02, 4.324e-02, -4.636e-02, -1.730e-01)); + r += mul(s5_2, M4(1.915e-01, -3.671e-02, 7.262e-02, 3.138e-01, 8.891e-02, -1.213e-02, 6.398e-02, -1.219e-01, -1.356e-01, -9.526e-03, -2.735e-02, -7.145e-02, -2.606e-01, 2.359e-02, 1.698e-01, 1.522e-01)); + r += mul(s5_3, M4(-2.451e-01, 1.785e-02, 3.102e-01, 3.007e-02, 1.073e-01, -4.354e-02, -4.559e-03, -3.315e-02, -6.856e-02, -7.520e-02, 1.284e-03, -6.251e-02, 8.424e-02, 1.095e-02, -3.088e-01, -9.742e-02)); + r += mul(s5_4, M4(-1.551e-01, -1.494e-01, 2.153e-01, 1.393e-02, -2.496e-01, 2.217e-01, 1.903e-01, 7.574e-02, 1.421e-01, -1.086e-01, -4.961e-02, 1.004e-01, 1.392e-01, 3.209e-01, 1.138e-01, -2.434e-01)); + r += mul(s5_5, M4(-4.235e-01, 2.616e-02, 1.230e-01, -8.260e-02, 6.940e-02, -8.190e-02, -6.281e-03, -2.808e-01, 3.210e-02, -7.540e-02, 1.612e-01, 2.514e-01, 3.180e-02, 1.199e-01, -9.426e-02, 2.046e-01)); + r += mul(s5_6, M4(-3.787e-01, 1.389e-01, 3.114e-01, 1.714e-01, 5.573e-02, 3.739e-02, -8.473e-02, -4.610e-02, 9.574e-02, 2.153e-03, -8.866e-03, -4.410e-02, -1.155e-02, -5.400e-02, 1.925e-02, 1.738e-01)); + r += mul(s5_7, M4(-8.150e-02, -2.090e-01, -1.538e-01, 4.760e-01, -1.717e-01, 1.913e-02, 9.299e-03, 1.178e-02, 8.386e-02, -2.141e-02, -4.802e-02, -2.068e-01, 1.744e-01, -4.770e-02, -1.123e-01, 1.521e-01)); + r += mul(s5_8, M4(1.337e-01, -5.348e-02, 1.427e-02, -1.795e-02, 2.848e-03, -1.898e-02, -4.698e-03, -4.581e-02, -1.794e-01, 5.208e-02, 2.183e-02, 8.358e-02, -2.178e-01, -3.258e-03, -1.373e-01, 1.160e-01)); + r += mul(s6_0, M4(-2.858e-02, -5.607e-02, 6.122e-02, 1.521e-02, 1.714e-02, 1.630e-01, -1.331e-01, 7.024e-02, -6.379e-02, -2.672e-02, 2.664e-02, -1.155e-01, -3.388e-02, 4.736e-02, -8.130e-02, 3.151e-01)); + r += mul(s6_1, M4(-2.304e-01, 4.245e-02, -3.877e-01, 4.955e-02, -4.519e-02, 4.113e-02, 5.945e-02, 4.988e-02, 6.812e-02, 1.708e-02, 5.513e-02, 2.077e-01, 5.672e-03, 3.768e-02, -1.364e-02, 9.538e-02)); + r += mul(s6_2, M4(3.777e-01, 2.776e-03, 4.238e-01, 2.226e-01, 4.303e-02, 1.494e-01, -6.361e-02, 2.114e-02, 3.915e-02, -5.454e-02, -1.746e-02, -8.090e-02, -4.120e-02, -5.266e-03, -3.846e-02, 1.031e-01)); + r += mul(s6_3, M4(-1.792e-01, 7.010e-02, -3.349e-01, 1.673e-01, -9.938e-02, 1.228e-01, 4.501e-02, -3.223e-02, -1.085e-01, -2.602e-02, 3.112e-01, 1.934e-02, -4.332e-02, -1.565e-02, 2.233e-01, 2.114e-01)); + r += mul(s6_4, M4(3.164e-02, 1.312e-01, 2.002e-01, -8.395e-04, -6.305e-02, 1.137e-01, -8.620e-03, -1.526e-02, -3.245e-02, 2.534e-02, 3.431e-01, 2.682e-01, 6.176e-02, 7.767e-02, -1.683e-02, -1.062e-01)); + r += mul(s6_5, M4(3.983e-01, -1.263e-01, 1.939e-01, -4.347e-01, 1.405e-02, 1.783e-01, -9.961e-02, 3.092e-02, -1.693e-01, -2.893e-02, 2.976e-02, 8.765e-02, -8.958e-02, 6.622e-03, -1.051e-01, -2.510e-01)); + r += mul(s6_6, M4(3.025e-02, 4.474e-02, 8.671e-02, -1.723e-01, -3.552e-02, 6.098e-02, 6.620e-02, -6.662e-02, -2.257e-01, -6.825e-02, -6.809e-02, 8.563e-02, -3.061e-02, -7.433e-03, 9.983e-02, 1.045e-01)); + r += mul(s6_7, M4(2.197e-02, 1.158e-01, 1.172e-01, -8.142e-02, 8.136e-03, 1.074e-01, 8.381e-03, -8.652e-02, 1.715e-01, -9.235e-02, -4.468e-03, 2.780e-01, 1.224e-01, -4.897e-02, -1.505e-01, -3.873e-01)); + r += mul(s6_8, M4(1.077e-01, 9.874e-02, 2.784e-01, 1.056e-02, 1.047e-01, 3.006e-02, 7.689e-02, -7.230e-02, -7.157e-02, 2.606e-02, 1.841e-02, 7.986e-02, 2.093e-01, -7.551e-02, 1.089e-01, -2.931e-01)); + r += mul(s7_0, M4(-7.262e-02, -2.929e-02, 1.450e-01, -1.313e-01, -2.737e-02, 2.686e-01, 1.031e-01, -1.140e-01, 6.014e-02, -1.711e-02, -1.712e-01, 2.743e-01, -3.961e-03, 5.349e-02, 3.779e-02, -1.480e-01)); + r += mul(s7_1, M4(-2.523e-01, -8.444e-02, -5.685e-02, 1.679e-01, 1.718e-01, 3.372e-02, -1.776e-01, 3.977e-01, -1.106e-01, 4.569e-02, -3.599e-02, -6.396e-02, -5.179e-02, 1.339e-02, 7.202e-02, 8.055e-02)); + r += mul(s7_2, M4(1.646e-02, -4.254e-02, -2.996e-02, 2.612e-01, 3.034e-01, 2.075e-01, 5.784e-02, 1.962e-01, -4.796e-02, -1.175e-02, 1.115e-02, -4.677e-02, 1.267e-01, -9.842e-02, 5.732e-02, 4.389e-02)); + r += mul(s7_3, M4(-1.339e-01, 4.460e-02, -1.090e-01, 4.402e-02, -6.307e-01, 2.004e-01, 1.306e-01, -1.242e-01, 3.439e-01, 5.751e-02, 1.782e-01, -4.005e-02, 1.038e-02, 3.962e-02, -7.156e-02, -5.698e-02)); + r += mul(s7_4, M4(-5.161e-02, 1.104e-01, 1.064e-01, -2.529e-01, 1.336e-01, -1.512e-01, 2.080e-02, -3.020e-01, 1.244e-01, 1.446e-01, 2.897e-01, -3.604e-01, 3.560e-02, 2.879e-01, -6.541e-02, 4.722e-02)); + r += mul(s7_5, M4(-1.565e-02, -8.385e-02, -1.309e-01, -2.954e-01, 5.100e-03, 3.356e-01, -2.372e-01, -1.208e-01, -7.546e-02, 2.556e-02, 2.936e-02, -2.685e-01, 1.546e-01, 4.975e-02, -7.203e-02, 3.938e-02)); + r += mul(s7_6, M4(-9.647e-02, -2.487e-03, 1.106e-01, -1.505e-02, -1.015e-01, 1.383e-01, 1.683e-01, -5.457e-02, -4.709e-02, 2.964e-02, 1.209e-01, -1.821e-01, 1.030e-01, 8.563e-02, -3.546e-01, 1.567e-02)); + r += mul(s7_7, M4(5.586e-03, 7.714e-04, -8.230e-02, -7.812e-02, -2.193e-02, -9.724e-02, -2.037e-01, -2.090e-02, 1.396e-01, 1.463e-02, 5.520e-02, -2.476e-01, 9.839e-02, 2.303e-02, -8.032e-02, 1.211e-02)); + r += mul(s7_8, M4(4.894e-02, -2.312e-02, -1.515e-01, -7.387e-02, 2.279e-01, 1.422e-01, 1.680e-02, -1.466e-02, 9.482e-02, -2.726e-02, -3.037e-02, 2.144e-01, 1.412e-01, 2.566e-02, 1.389e-02, 2.099e-01)); + r += V4(-7.067e-03, -2.480e-01, 1.181e-02, 1.629e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-6.296e-02, 7.760e-03, 6.615e-03, -6.910e-02, -1.733e-01, -1.156e-01, -1.551e-01, -1.193e-01, 4.787e-02, 6.314e-03, -3.864e-02, -1.277e-01, -1.072e-01, -1.677e-01, -1.273e-01, -6.141e-01)); + r += mul(s0_1, M4(-1.401e-02, -7.855e-02, 7.498e-02, 1.439e-01, -1.983e-01, -1.254e-01, -6.861e-02, 2.587e-01, -1.967e-01, -4.699e-02, 1.141e-01, 1.117e-01, -1.657e-01, -1.043e-01, -1.024e-01, -3.792e-01)); + r += mul(s0_2, M4(-8.788e-02, 1.379e-02, 3.835e-02, -4.003e-02, 7.202e-02, -2.507e-02, -7.286e-03, 9.680e-02, 4.744e-02, 7.028e-02, 4.711e-02, 1.302e-01, 5.147e-02, -1.442e-01, -1.021e-01, -1.200e-01)); + r += mul(s0_3, M4(-1.092e-01, 3.675e-02, 1.100e-01, -1.764e-02, 1.332e-01, 3.322e-02, -1.338e-01, 3.050e-01, -1.125e-01, 1.422e-01, 3.435e-02, 5.083e-02, -1.082e-01, 3.614e-03, -1.721e-01, -2.248e-01)); + r += mul(s0_4, M4(-1.823e-01, -4.488e-02, 2.356e-01, 6.690e-03, 4.926e-02, -6.168e-02, -6.737e-02, 9.670e-02, 7.045e-02, -9.428e-02, -1.176e-01, 2.986e-01, -8.239e-03, -1.726e-01, -2.281e-01, 4.943e-02)); + r += mul(s0_5, M4(-2.979e-02, 1.098e-02, 1.388e-01, 1.658e-02, 8.264e-02, -1.971e-02, -7.715e-02, 3.450e-02, -7.950e-02, 4.570e-02, 4.495e-02, -3.240e-03, -2.056e-01, 8.439e-02, -1.198e-02, -8.534e-03)); + r += mul(s0_6, M4(2.039e-02, 1.046e-01, 1.343e-01, -1.218e-01, 1.207e-02, -9.623e-02, -7.417e-02, 9.335e-03, -3.468e-03, 9.762e-02, 9.744e-03, -4.961e-02, -7.627e-02, 9.029e-02, 1.106e-02, 4.004e-03)); + r += mul(s0_7, M4(8.910e-02, -1.111e-02, -9.261e-03, -1.198e-01, 5.008e-02, -1.737e-02, -2.231e-01, 9.532e-02, 1.245e-03, -3.073e-02, -5.711e-02, 6.196e-02, -1.358e-01, -1.777e-01, 1.823e-03, 6.634e-02)); + r += mul(s0_8, M4(2.888e-02, 1.446e-01, 1.182e-01, -2.587e-02, -7.939e-03, -6.873e-02, 9.312e-02, -7.450e-02, 1.244e-02, -1.426e-01, -1.923e-02, -9.536e-02, 1.318e-01, -4.630e-02, 1.188e-01, -5.019e-02)); + r += mul(s1_0, M4(8.971e-02, -9.082e-02, -7.461e-02, -2.907e-02, -1.107e-01, 1.039e-01, -8.184e-03, -1.111e-01, 2.382e-02, 1.451e-01, -4.172e-02, 1.012e-02, 2.358e-01, -1.155e-01, 3.239e-02, 7.585e-02)); + r += mul(s1_1, M4(2.872e-02, -2.363e-01, 7.393e-02, 2.592e-01, 2.485e-02, 4.569e-02, 8.531e-02, 1.181e-01, 2.343e-02, 1.434e-01, -1.593e-01, 5.882e-02, 3.096e-01, 4.042e-02, 6.920e-02, -3.214e-02)); + r += mul(s1_2, M4(-3.794e-02, -6.319e-03, 2.241e-01, 2.066e-02, -1.690e-01, 2.888e-02, -2.124e-02, 2.206e-02, 6.969e-02, 1.724e-01, -4.655e-02, 1.423e-01, 3.927e-02, 1.894e-02, 1.240e-01, 4.935e-02)); + r += mul(s1_3, M4(-4.410e-01, -4.775e-02, -2.330e-01, 2.230e-01, -2.839e-01, -6.997e-02, -1.015e-01, -1.054e-01, -2.014e-01, 2.071e-01, -1.688e-02, 1.301e-01, 2.427e-01, -4.421e-02, 8.016e-03, 4.136e-02)); + r += mul(s1_4, M4(-3.077e-01, -1.590e-01, -1.906e-01, 1.468e-01, 1.443e-02, -1.398e-01, -2.670e-02, -7.530e-02, 1.152e-01, 8.916e-02, -5.355e-02, 2.350e-01, 3.961e-01, 1.659e-01, -1.960e-01, 7.532e-02)); + r += mul(s1_5, M4(-2.561e-01, 1.781e-02, 1.265e-02, 1.338e-01, 8.716e-02, 2.390e-02, 9.098e-02, -8.604e-03, -2.280e-01, 2.041e-02, 1.755e-01, 1.053e-01, 9.669e-03, 2.135e-01, 1.511e-01, -3.904e-02)); + r += mul(s1_6, M4(1.296e-01, -1.541e-01, -2.282e-01, 1.621e-01, 1.033e-02, -6.657e-02, 1.396e-01, -7.432e-02, 2.574e-01, 2.337e-02, -9.461e-02, -5.584e-02, 6.284e-02, 1.940e-01, 1.195e-01, -1.045e-02)); + r += mul(s1_7, M4(2.040e-01, 1.778e-02, -3.518e-01, 3.646e-02, 4.260e-02, 2.341e-01, 1.879e-02, -6.436e-02, 1.185e-01, -2.253e-01, 7.939e-02, 1.112e-02, -5.558e-02, 1.451e-01, 4.042e-02, -7.908e-02)); + r += mul(s1_8, M4(-4.773e-02, 1.554e-01, -1.877e-01, -1.379e-01, -5.846e-02, -1.405e-02, 9.912e-02, -5.957e-02, 3.975e-01, -5.402e-02, -5.425e-02, 4.381e-02, -1.086e-01, 5.943e-02, 2.654e-02, 5.067e-02)); + r += mul(s2_0, M4(6.013e-03, -1.253e-01, 1.684e-01, 1.034e-02, 6.153e-03, -2.489e-02, 2.605e-02, -3.455e-02, -2.483e-02, -7.223e-02, -2.935e-02, -3.720e-02, -1.545e-01, 7.488e-02, 1.590e-01, -9.646e-02)); + r += mul(s2_1, M4(-1.124e-01, -2.540e-02, -1.007e-01, 3.891e-01, 9.111e-02, -6.169e-02, 4.814e-03, -2.303e-02, 1.628e-01, 4.082e-03, -1.690e-02, 3.356e-02, -2.616e-01, 3.774e-03, -2.280e-01, 2.459e-01)); + r += mul(s2_2, M4(3.480e-02, 6.542e-02, 3.395e-02, 1.568e-01, 1.253e-01, 1.658e-02, -1.394e-02, 9.313e-02, 1.123e-01, 3.798e-02, -7.904e-02, 5.355e-02, 1.717e-02, -7.496e-02, 1.604e-01, -1.738e-02)); + r += mul(s2_3, M4(-3.165e-03, -8.321e-02, -1.332e-01, -2.712e-02, 8.552e-02, -2.493e-01, -1.120e-01, 1.037e-03, 1.315e-01, -1.144e-01, -5.099e-02, 3.977e-02, 1.624e-01, -2.295e-01, 2.364e-01, 6.290e-02)); + r += mul(s2_4, M4(-9.325e-02, -1.038e-01, 1.666e-02, 1.585e-01, -7.056e-02, 1.631e-01, -3.784e-03, 1.019e-01, -1.726e-01, 3.181e-03, -9.122e-02, -7.055e-02, 5.812e-01, -2.316e-01, -3.565e-02, -7.114e-02)); + r += mul(s2_5, M4(-6.748e-02, -1.361e-01, -1.217e-01, -7.496e-02, 1.507e-03, -1.285e-01, 9.394e-02, 2.374e-02, -1.774e-01, -6.646e-02, 3.708e-02, -1.242e-02, 1.416e-01, -1.894e-01, -1.324e-01, 1.631e-01)); + r += mul(s2_6, M4(1.277e-01, 4.821e-02, 9.689e-02, 3.256e-02, -3.484e-02, -2.009e-02, -5.629e-02, -5.021e-02, 1.237e-01, -1.442e-02, -4.216e-03, 9.495e-02, -7.393e-02, 2.990e-02, 3.954e-01, 3.259e-02)); + r += mul(s2_7, M4(1.946e-01, 1.638e-01, 5.388e-02, -1.684e-02, 1.038e-01, 1.256e-02, 1.280e-01, -1.644e-04, 2.570e-01, -2.273e-01, 1.624e-01, -3.824e-02, 1.537e-01, -1.509e-01, -4.485e-02, -1.919e-02)); + r += mul(s2_8, M4(1.524e-02, 1.102e-01, 1.297e-01, -6.958e-02, 3.049e-02, -1.014e-01, -4.677e-03, 6.799e-02, 1.526e-01, -7.553e-02, 2.533e-03, 1.559e-01, -9.696e-03, -2.257e-01, 2.736e-01, -2.925e-01)); + r += mul(s3_0, M4(-5.678e-03, -9.975e-02, 5.608e-02, -1.878e-02, 1.492e-01, -1.788e-01, 6.868e-02, -3.736e-02, -9.419e-02, -1.721e-02, 1.413e-02, -1.774e-02, -7.584e-02, 6.618e-03, 2.523e-02, -6.888e-02)); + r += mul(s3_1, M4(9.743e-02, -5.011e-02, -1.033e-01, 1.148e-01, 7.509e-02, 1.483e-01, -2.218e-02, -2.814e-01, 2.120e-01, 6.953e-03, 7.592e-02, -2.436e-01, -1.137e-01, -2.710e-02, -8.729e-02, -4.858e-02)); + r += mul(s3_2, M4(5.880e-02, 2.609e-02, 4.146e-02, 2.087e-01, -1.517e-01, 9.844e-02, 5.635e-02, -2.144e-01, -8.840e-02, 1.367e-01, 2.265e-03, 1.798e-02, -8.372e-02, -1.541e-02, -8.325e-02, 6.236e-02)); + r += mul(s3_3, M4(1.284e-01, -5.165e-02, -1.073e-01, 1.566e-02, -8.156e-02, 9.434e-02, -1.572e-01, -2.248e-01, 1.289e-02, -2.447e-02, -1.321e-02, -1.364e-01, 1.252e-03, -2.467e-01, 9.000e-02, 1.592e-01)); + r += mul(s3_4, M4(3.242e-02, 8.790e-02, 6.500e-02, 1.169e-01, 1.310e-01, 5.295e-02, -3.770e-01, 2.659e-02, -1.715e-01, 7.007e-02, -3.154e-02, -1.503e-01, 1.687e-01, -1.812e-01, 1.072e-01, -5.560e-02)); + r += mul(s3_5, M4(5.164e-02, 2.774e-02, 3.806e-02, -1.682e-01, 2.807e-01, -1.499e-01, -1.471e-01, -1.294e-01, 3.739e-02, 1.806e-02, -3.975e-02, -7.185e-02, 1.505e-01, -7.103e-03, -1.306e-01, 1.362e-01)); + r += mul(s3_6, M4(1.583e-01, -2.523e-01, -9.988e-02, -4.831e-02, 1.392e-01, 2.817e-01, -2.417e-01, -1.503e-01, 2.149e-02, 4.919e-01, 7.501e-02, -4.918e-02, -3.924e-03, 2.290e-01, 5.465e-02, -3.692e-02)); + r += mul(s3_7, M4(-2.568e-01, 2.126e-01, -9.229e-02, -9.968e-02, 2.697e-01, -2.022e-01, 7.863e-02, -1.691e-01, -3.892e-01, 8.126e-02, 5.303e-01, -9.675e-02, -5.414e-02, -5.654e-02, -5.946e-02, -1.013e-01)); + r += mul(s3_8, M4(-4.412e-02, 8.399e-02, -2.984e-01, 1.498e-01, -1.460e-01, -1.423e-01, -2.728e-01, 7.422e-02, -1.358e-01, 5.904e-02, -9.015e-02, -2.758e-02, 5.924e-02, -1.556e-01, 5.550e-02, -4.398e-02)); + r += mul(s4_0, M4(-1.170e-01, -1.378e-01, -4.116e-02, 1.387e-01, 1.216e-01, 9.390e-02, 1.264e-01, -1.458e-01, 8.866e-02, -1.196e-01, 1.862e-01, 5.594e-01, 4.088e-02, -5.253e-03, -3.735e-02, 1.472e-01)); + r += mul(s4_1, M4(8.938e-03, -6.878e-03, -2.098e-02, -3.777e-02, 2.842e-01, 1.345e-01, 1.345e-02, -2.262e-02, -3.473e-02, -1.377e-02, -5.251e-02, -3.000e-01, 5.819e-02, 7.130e-03, 8.860e-02, -4.931e-02)); + r += mul(s4_2, M4(-7.974e-02, -1.470e-02, -1.071e-01, 6.803e-02, 2.739e-02, 1.840e-01, 1.857e-02, 1.595e-02, -9.594e-02, -2.005e-01, -1.699e-01, -6.428e-02, -1.115e-01, 2.762e-02, -1.109e-01, -5.929e-02)); + r += mul(s4_3, M4(-7.114e-02, 2.181e-02, -2.073e-01, -1.219e-02, 4.196e-02, 2.025e-01, 5.159e-02, 1.224e-01, -2.798e-01, -1.442e-01, 1.147e-02, 7.897e-02, -3.916e-03, 3.012e-02, -1.081e-01, 3.503e-03)); + r += mul(s4_4, M4(-3.617e-02, 1.201e-01, 5.453e-02, 1.035e-01, -2.327e-01, 1.920e-01, -6.373e-02, 2.633e-01, -1.994e-01, 1.184e-01, 1.997e-01, -8.347e-02, 6.505e-02, -1.572e-01, -3.503e-02, 1.064e-01)); + r += mul(s4_5, M4(-1.005e-01, -2.648e-02, -4.836e-02, 8.596e-02, 9.809e-02, 7.420e-02, 1.844e-01, -1.432e-01, 1.792e-01, -2.256e-01, -4.148e-01, 9.456e-02, 2.001e-02, -3.829e-02, -2.361e-01, -1.106e-01)); + r += mul(s4_6, M4(1.471e-02, 9.680e-02, 5.551e-02, -5.460e-02, 6.416e-02, 1.523e-02, -1.561e-01, -9.986e-02, 4.359e-02, -1.792e-01, 2.300e-01, 3.665e-02, -1.285e-02, -1.844e-01, -8.183e-02, -2.661e-02)); + r += mul(s4_7, M4(1.598e-02, -1.975e-02, 2.088e-01, -8.643e-02, 1.618e-01, 1.001e-01, -1.640e-01, -2.352e-02, 2.128e-01, -4.072e-04, -5.130e-01, 4.805e-04, -5.274e-02, 9.536e-02, -7.823e-02, 5.530e-02)); + r += mul(s4_8, M4(-2.983e-02, 4.106e-02, 4.312e-02, -8.918e-02, -1.063e-01, 2.148e-01, 1.454e-02, 7.327e-02, -2.582e-03, 2.259e-02, -3.194e-01, -1.971e-02, 2.798e-02, 5.562e-02, -1.398e-01, -6.819e-02)); + r += mul(s5_0, M4(4.513e-01, 4.676e-01, 5.226e-01, 1.764e-01, -9.800e-02, -7.946e-02, 2.521e-02, -4.449e-02, 6.989e-02, -8.352e-02, -4.870e-02, 3.570e-02, 4.503e-02, 2.162e-01, -9.362e-02, 3.516e-01)); + r += mul(s5_1, M4(-5.371e-02, 1.972e-01, -4.532e-02, 2.918e-01, -2.111e-01, -3.814e-02, 1.917e-01, -5.373e-03, 6.381e-02, 1.753e-02, 6.961e-02, 3.351e-02, 1.103e-01, -9.770e-02, -9.194e-02, -5.176e-02)); + r += mul(s5_2, M4(9.004e-02, -1.363e-02, -1.163e-02, -9.717e-02, -1.343e-01, -5.610e-02, -1.661e-01, 3.208e-02, 8.446e-02, -3.063e-02, 7.279e-02, -1.185e-01, 3.186e-01, 6.196e-02, 1.419e-01, 1.332e-01)); + r += mul(s5_3, M4(2.893e-01, -4.345e-02, 6.999e-02, -4.645e-02, 1.928e-01, -2.891e-01, -5.506e-02, -5.547e-02, 6.418e-02, 5.734e-02, 6.073e-03, 1.036e-01, 1.590e-01, -1.633e-01, -2.515e-02, -5.278e-02)); + r += mul(s5_4, M4(-1.224e-01, -2.496e-01, 4.529e-02, 3.488e-01, 1.504e-03, -4.240e-01, 2.477e-02, 4.511e-02, 5.403e-02, 1.711e-01, 1.764e-01, 3.557e-02, 1.015e-01, -3.583e-01, 3.328e-01, 7.640e-02)); + r += mul(s5_5, M4(-1.450e-01, -2.665e-01, -1.446e-01, 1.696e-01, -1.642e-01, -7.631e-02, -3.422e-03, -9.272e-02, -1.440e-02, 3.656e-02, 4.633e-02, 6.810e-02, 7.371e-02, 2.298e-01, 2.306e-01, -2.436e-01)); + r += mul(s5_6, M4(3.661e-01, 1.620e-01, 4.195e-01, -3.285e-01, 1.617e-02, -2.502e-01, 1.880e-02, 1.265e-01, -4.917e-02, 4.856e-02, 1.003e-01, -7.730e-02, -1.180e-01, -1.242e-01, 6.801e-03, 3.626e-03)); + r += mul(s5_7, M4(3.307e-01, -1.375e-01, -2.029e-01, 2.181e-01, 1.969e-01, -3.681e-01, 1.225e-01, -2.982e-02, 4.205e-02, 8.890e-02, -1.453e-01, -8.069e-02, 3.265e-01, -7.184e-02, 6.168e-02, -1.892e-01)); + r += mul(s5_8, M4(1.585e-01, 2.796e-01, 2.616e-01, 8.947e-02, 1.624e-01, -1.246e-01, -2.392e-01, 9.131e-02, -8.142e-02, -1.192e-02, 3.717e-02, -8.612e-02, 1.071e-01, -1.051e-01, 1.600e-01, 7.882e-02)); + r += mul(s6_0, M4(-5.474e-02, -1.008e-01, 1.636e-02, 1.844e-01, 1.248e-01, 3.317e-02, 1.378e-02, 3.465e-03, 7.883e-02, -1.977e-02, 1.255e-01, -1.549e-01, -7.282e-02, 1.778e-01, -5.500e-02, 9.860e-02)); + r += mul(s6_1, M4(2.543e-01, -2.954e-01, 4.462e-02, -9.544e-02, 1.528e-02, -9.023e-02, 1.001e-01, -1.330e-01, 6.335e-03, -2.947e-02, 5.462e-02, -1.493e-01, 4.117e-02, -6.603e-02, -5.900e-03, 3.771e-02)); + r += mul(s6_2, M4(6.381e-02, 2.838e-03, -3.115e-01, 9.896e-02, 9.216e-02, -2.670e-02, 6.396e-02, -2.471e-02, 3.709e-02, -6.857e-02, -3.678e-02, 4.962e-02, 8.385e-03, 1.931e-01, 1.366e-02, -1.669e-02)); + r += mul(s6_3, M4(8.992e-02, -8.330e-02, 2.828e-02, -1.335e-01, 4.957e-02, 7.820e-03, 7.989e-02, 6.431e-02, 1.280e-01, -2.863e-03, 1.777e-01, -1.335e-01, -5.924e-02, 1.229e-01, -7.804e-02, 1.636e-01)); + r += mul(s6_4, M4(-1.124e-01, -1.182e-01, 3.981e-02, 2.047e-01, -1.712e-02, -1.877e-02, 1.001e-01, -3.272e-02, -1.424e-01, -8.957e-02, -6.594e-02, -1.042e-02, -9.962e-02, 2.836e-02, 6.545e-02, 1.542e-01)); + r += mul(s6_5, M4(5.565e-03, 2.089e-01, -8.464e-02, 5.421e-02, -1.540e-01, -5.868e-02, 1.151e-01, -1.651e-01, -1.522e-03, 3.179e-02, -1.211e-02, 1.724e-02, 4.772e-02, -2.717e-01, -1.096e-03, -2.160e-01)); + r += mul(s6_6, M4(-1.562e-02, 1.106e-01, 3.543e-02, 7.591e-02, -3.997e-02, 6.842e-02, 1.129e-01, 1.381e-01, 1.876e-01, 4.092e-01, 1.209e-01, 6.818e-02, -1.437e-01, 1.191e-01, -1.169e-01, 8.674e-02)); + r += mul(s6_7, M4(-4.597e-02, -3.929e-02, -1.468e-02, 7.116e-02, -8.660e-02, 3.209e-02, 3.214e-02, -3.238e-02, -1.492e-01, 2.566e-02, 1.710e-01, -1.887e-01, -2.617e-01, 3.479e-01, -1.033e-01, 2.452e-01)); + r += mul(s6_8, M4(5.360e-01, -6.639e-02, 4.089e-02, -2.392e-02, -2.369e-01, 1.002e-01, 2.614e-02, -3.216e-02, -3.226e-02, 8.686e-02, 2.782e-02, 3.562e-02, 5.844e-02, 8.772e-02, 3.218e-01, -2.124e-03)); + r += mul(s7_0, M4(-4.048e-02, -9.997e-02, 8.137e-02, 2.078e-02, 1.268e-01, -1.598e-01, 3.866e-02, -1.074e-01, -2.593e-01, 9.528e-02, 9.960e-02, 5.756e-02, 1.358e-01, -5.139e-03, -1.316e-02, -1.092e-01)); + r += mul(s7_1, M4(-1.213e-01, 8.196e-02, -8.100e-02, 1.871e-01, 1.131e-01, 1.529e-01, -6.739e-03, -2.926e-01, 6.150e-02, -3.458e-02, 8.555e-02, -9.260e-02, 1.937e-02, -8.979e-02, 2.298e-02, 8.512e-02)); + r += mul(s7_2, M4(-8.931e-03, -3.090e-03, -1.055e-01, -9.030e-02, 8.746e-02, -9.688e-02, 9.056e-02, 8.719e-02, 1.805e-03, -6.690e-02, 3.257e-02, -1.213e-02, 1.109e-02, 2.994e-02, 2.223e-03, 1.758e-01)); + r += mul(s7_3, M4(-1.470e-01, -1.280e-02, 6.654e-02, 4.358e-02, 9.439e-02, 1.011e-01, 1.927e-01, 2.059e-01, -2.276e-01, 1.261e-01, -6.429e-02, -1.375e-02, 2.319e-01, -1.480e-01, -1.147e-01, -3.065e-01)); + r += mul(s7_4, M4(-3.930e-02, -2.527e-01, 2.501e-01, 8.167e-02, -1.693e-01, -1.588e-01, 2.412e-01, 4.158e-01, 1.166e-01, -1.097e-01, -1.071e-01, 1.972e-01, 2.167e-01, -5.103e-02, -2.525e-02, 4.689e-02)); + r += mul(s7_5, M4(-1.498e-01, 9.670e-02, -3.224e-02, -1.630e-01, -3.076e-01, 1.045e-02, 1.002e-01, -1.043e-01, 1.172e-01, 7.072e-04, -7.597e-02, 9.863e-03, -5.800e-02, -4.964e-02, -3.846e-02, -9.352e-04)); + r += mul(s7_6, M4(-1.372e-01, 1.598e-01, -1.029e-01, -7.709e-02, 4.725e-02, 5.317e-02, 4.150e-01, 7.688e-02, -2.923e-01, 2.772e-01, -3.063e-01, 7.638e-02, 1.667e-01, -1.784e-01, -5.216e-02, -2.791e-02)); + r += mul(s7_7, M4(-8.641e-02, 1.598e-01, 8.128e-02, -4.598e-02, 9.464e-02, 2.164e-01, 4.623e-01, 4.714e-02, -1.506e-01, 1.833e-01, -8.676e-02, 3.411e-01, -2.230e-02, -1.713e-01, 4.596e-02, -2.925e-02)); + r += mul(s7_8, M4(-9.475e-02, -3.959e-02, 7.701e-02, 4.111e-02, -4.562e-01, 2.156e-02, -2.381e-03, -2.020e-01, 8.663e-02, 5.398e-02, -1.718e-01, -3.074e-02, -5.906e-02, -4.721e-02, 2.176e-01, -8.696e-02)); + r += V4(7.251e-02, -4.031e-02, -3.769e-02, 6.364e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.698e-02, -1.873e-01, -6.680e-02, 4.658e-02, -1.640e-02, -4.673e-02, 2.303e-02, -7.395e-02, 9.013e-02, -2.312e-02, 7.911e-02, -3.101e-02, 1.765e-01, -3.523e-01, 2.891e-01, -1.342e-01)); + r += mul(s0_1, M4(-5.807e-02, 1.886e-01, 3.353e-03, -1.506e-02, -2.154e-01, -5.218e-03, 2.449e-01, -1.151e-01, 6.348e-02, -1.934e-02, -3.096e-02, 1.163e-02, 5.664e-01, -1.311e-01, 3.078e-01, 8.386e-02)); + r += mul(s0_2, M4(-4.766e-02, -3.748e-02, 1.156e-01, -1.899e-02, 2.700e-02, 7.951e-02, -1.855e-02, -1.360e-01, -9.049e-02, 1.629e-01, 5.111e-02, 2.138e-02, -4.326e-02, -6.232e-02, 1.730e-01, 9.890e-02)); + r += mul(s0_3, M4(-9.821e-02, -8.497e-04, 3.265e-02, 3.157e-02, 3.790e-02, 2.119e-01, -2.147e-01, 7.346e-02, 1.442e-01, -5.694e-02, -6.352e-02, 1.299e-01, 2.590e-01, 3.356e-02, 2.009e-01, 3.760e-01)); + r += mul(s0_4, M4(1.312e-01, 2.749e-01, 3.881e-02, 2.240e-01, 1.452e-01, -5.652e-02, 1.002e-01, 3.697e-03, -2.693e-01, -4.560e-02, -5.198e-02, -1.121e-01, -5.385e-02, -2.180e-01, 2.357e-01, -4.682e-02)); + r += mul(s0_5, M4(1.037e-01, -4.922e-03, -1.589e-02, 1.669e-01, 6.173e-02, 8.454e-03, 3.746e-01, 1.235e-01, -8.087e-02, -1.984e-02, 3.124e-02, -1.156e-01, -1.325e-01, 2.218e-01, 1.087e-01, 1.539e-01)); + r += mul(s0_6, M4(-5.949e-03, -2.015e-01, 7.017e-02, -9.461e-02, -1.481e-01, 1.249e-01, -1.170e-01, -1.190e-01, 6.665e-02, -7.714e-02, -3.005e-02, -1.891e-01, 5.926e-02, 2.404e-02, -2.855e-02, 4.209e-02)); + r += mul(s0_7, M4(1.168e-01, 1.383e-01, 4.428e-02, -2.288e-01, 9.044e-02, 2.164e-01, -8.071e-02, 6.872e-02, 1.572e-02, -4.685e-02, 2.427e-01, -2.775e-02, -4.994e-02, 3.562e-02, 4.875e-02, 1.291e-01)); + r += mul(s0_8, M4(4.041e-03, 2.384e-02, -1.436e-01, 7.285e-02, -9.456e-02, -2.413e-02, 1.093e-01, 1.186e-01, -3.203e-02, 1.475e-02, -1.489e-02, 2.053e-01, 5.281e-02, 4.876e-02, -6.375e-02, 1.727e-02)); + r += mul(s1_0, M4(-2.273e-02, 1.106e-01, -2.805e-01, 4.683e-02, 4.003e-02, 1.739e-03, -9.869e-02, -6.333e-02, -7.502e-02, 2.742e-01, 6.364e-02, -8.908e-02, 1.085e-01, 3.470e-01, -3.442e-01, -9.594e-02)); + r += mul(s1_1, M4(-2.592e-01, -2.718e-02, 2.159e-01, 5.279e-02, -7.593e-02, -5.258e-02, 1.568e-01, 1.135e-01, 1.196e-01, 1.640e-01, 2.071e-01, -3.588e-01, -1.379e-01, 1.784e-01, 6.214e-03, -5.460e-02)); + r += mul(s1_2, M4(-1.736e-02, 2.079e-01, 4.595e-03, -1.123e-01, -2.302e-02, 6.572e-02, -1.167e-01, 1.474e-02, -1.697e-03, -2.812e-01, 1.269e-02, -3.907e-03, -6.566e-02, 1.811e-01, 1.914e-01, -3.899e-02)); + r += mul(s1_3, M4(-1.516e-01, -1.128e-02, -1.515e-01, -5.396e-02, 3.382e-02, -4.851e-02, 5.555e-02, 1.203e-01, 7.104e-02, 2.457e-01, 3.103e-02, 7.618e-02, -1.235e-02, -6.414e-02, -1.932e-01, 2.398e-01)); + r += mul(s1_4, M4(9.719e-02, -2.010e-01, 1.421e-01, -3.102e-03, 4.764e-02, 1.152e-01, -1.954e-03, -2.084e-01, 1.056e-01, -7.074e-02, -2.112e-02, -1.057e-01, -1.572e-01, -4.169e-02, -6.214e-02, -7.277e-01)); + r += mul(s1_5, M4(8.558e-02, -2.495e-01, -1.989e-01, 9.590e-02, 1.615e-01, -1.643e-01, -1.051e-02, -9.111e-02, -6.694e-02, 1.960e-01, 3.509e-01, 2.216e-02, -1.859e-01, -3.016e-01, -1.452e-01, 1.759e-01)); + r += mul(s1_6, M4(1.043e-01, -1.302e-01, 3.582e-02, -2.599e-01, 9.496e-02, -1.366e-01, 3.558e-02, -1.240e-01, -8.862e-02, 1.433e-01, -2.149e-02, -1.374e-02, 2.642e-02, -2.283e-02, -2.794e-02, -2.896e-02)); + r += mul(s1_7, M4(1.294e-01, -1.096e-01, -1.064e-01, -2.245e-01, 8.796e-02, 9.860e-02, 2.812e-02, 1.750e-02, 1.139e-01, -1.873e-01, 2.878e-02, 1.735e-01, 1.740e-02, 1.851e-02, -2.990e-02, -1.145e-01)); + r += mul(s1_8, M4(-1.369e-01, -6.807e-02, -5.180e-02, 1.313e-01, -3.927e-05, -1.128e-01, 4.890e-02, 3.989e-02, -2.035e-01, 2.148e-01, -2.251e-01, 1.421e-01, -7.962e-03, 1.509e-01, -1.495e-02, -1.019e-01)); + r += mul(s2_0, M4(-9.294e-02, -2.481e-02, 1.403e-01, -1.359e-01, -1.061e-01, -5.883e-02, -1.400e-01, 5.274e-02, -2.305e-02, 4.316e-02, -1.507e-01, 1.499e-02, 1.564e-02, -4.029e-01, 1.641e-01, -5.780e-02)); + r += mul(s2_1, M4(-1.752e-02, -4.913e-02, 1.778e-02, -1.957e-03, -1.179e-01, 8.784e-02, -1.297e-01, -1.210e-02, -4.552e-02, 7.796e-02, -1.006e-02, -8.141e-02, -2.534e-01, -5.645e-02, 5.620e-02, 7.241e-02)); + r += mul(s2_2, M4(-2.166e-01, -5.779e-02, 1.009e-02, 1.082e-01, 3.005e-02, -2.124e-01, 7.502e-02, 1.306e-02, -2.851e-02, 6.912e-02, 3.177e-02, 1.945e-02, -1.518e-01, -1.881e-01, -1.107e-01, 1.201e-02)); + r += mul(s2_3, M4(-4.604e-02, 9.225e-02, 1.064e-01, 6.817e-03, -1.436e-01, 1.484e-01, 8.814e-03, 1.094e-01, -2.963e-02, -1.773e-01, 4.952e-02, 3.703e-02, -1.068e-01, -4.995e-02, -1.110e-01, -2.143e-01)); + r += mul(s2_4, M4(-1.260e-01, 4.946e-01, 9.564e-02, -8.133e-02, 1.674e-01, -2.238e-01, 7.130e-02, -1.971e-02, 1.666e-01, 2.136e-02, -1.943e-02, 2.817e-02, 6.673e-02, 5.442e-02, -9.210e-02, -2.027e-01)); + r += mul(s2_5, M4(1.928e-02, 3.097e-01, 1.487e-01, -2.217e-01, 1.282e-01, -2.386e-02, 6.971e-03, 8.529e-02, -9.620e-03, -5.562e-02, -1.293e-01, -7.603e-02, 2.167e-02, 1.700e-01, 3.485e-02, -5.729e-02)); + r += mul(s2_6, M4(-8.671e-02, 1.699e-01, 1.681e-01, 5.908e-02, -4.252e-03, -3.336e-02, 1.989e-01, 3.247e-02, 4.496e-02, -6.578e-02, -1.127e-01, -1.141e-01, -1.770e-01, -1.885e-01, 3.977e-02, 7.611e-02)); + r += mul(s2_7, M4(-1.079e-01, -8.983e-02, -1.298e-01, 1.607e-01, -1.065e-01, 1.661e-02, -7.341e-02, -4.099e-02, 2.815e-02, 1.428e-01, 7.976e-02, -2.694e-02, 2.546e-01, -4.262e-02, 6.669e-02, 4.667e-02)); + r += mul(s2_8, M4(-7.842e-02, -1.926e-01, 1.274e-02, -1.604e-02, 1.094e-02, 6.732e-02, 3.969e-02, 7.989e-03, -4.900e-02, -1.781e-01, -2.337e-02, -1.487e-02, 1.947e-01, -2.330e-01, 1.304e-01, -1.592e-02)); + r += mul(s3_0, M4(-1.820e-01, -7.021e-02, -8.828e-02, 7.994e-02, 9.838e-02, -7.559e-02, -6.619e-02, 6.800e-02, -9.744e-03, -6.037e-02, 1.779e-02, -4.527e-02, 3.634e-02, 3.175e-01, 2.481e-03, -1.218e-01)); + r += mul(s3_1, M4(-3.103e-02, 3.963e-02, -7.307e-02, 4.523e-02, -1.794e-01, -3.067e-01, 1.376e-01, 1.912e-01, 9.885e-02, -5.196e-02, -8.585e-02, 9.436e-02, -1.400e-01, 8.002e-02, -5.219e-03, 5.146e-02)); + r += mul(s3_2, M4(1.009e-01, 1.692e-01, 1.049e-01, -9.813e-03, 8.228e-02, 7.716e-02, 3.513e-01, -1.930e-01, -8.766e-02, -7.978e-02, -2.276e-01, -7.862e-02, -1.635e-02, 1.706e-01, -1.281e-01, 5.733e-02)); + r += mul(s3_3, M4(-1.142e-01, -1.328e-01, -6.850e-02, -7.040e-02, -1.122e-01, 1.799e-01, 2.511e-01, 9.606e-02, -9.700e-02, -2.469e-01, 2.237e-01, -2.198e-01, 3.658e-02, -6.923e-02, 1.219e-01, 7.830e-02)); + r += mul(s3_4, M4(1.073e-01, -1.174e-01, 1.235e-02, 1.604e-01, 3.105e-01, -1.302e-02, 3.550e-01, -1.716e-01, 1.775e-01, 2.759e-02, 1.459e-02, 1.077e-01, 3.152e-03, -1.262e-01, -3.016e-02, -1.051e-01)); + r += mul(s3_5, M4(-4.056e-02, 3.065e-02, 9.051e-02, 1.969e-01, 1.910e-01, 2.110e-01, 5.586e-02, 7.756e-02, -1.644e-01, -1.751e-01, -6.551e-02, 4.693e-02, -8.392e-02, 1.207e-01, -9.491e-02, 9.841e-02)); + r += mul(s3_6, M4(4.175e-02, -1.885e-02, -3.087e-02, -3.164e-01, 7.055e-04, 1.036e-01, -1.606e-01, -6.494e-02, 2.920e-01, -4.953e-01, 1.364e-01, 2.848e-02, 2.253e-02, 7.942e-02, 1.192e-01, -3.001e-02)); + r += mul(s3_7, M4(3.773e-03, -4.945e-02, 5.913e-02, -1.400e-01, 1.157e-01, 9.907e-02, -5.604e-02, 8.307e-02, 1.586e-01, 9.916e-02, -1.486e-02, 3.248e-01, 1.851e-01, -7.099e-02, 8.477e-02, 4.372e-02)); + r += mul(s3_8, M4(-2.045e-02, -7.855e-02, -4.495e-02, 6.078e-02, -6.315e-02, -7.862e-02, -1.406e-01, 3.674e-02, -1.628e-01, 1.351e-01, -6.527e-02, -6.756e-03, 6.466e-02, 1.532e-02, 4.055e-02, -3.032e-02)); + r += mul(s4_0, M4(-1.499e-01, -7.250e-02, -6.480e-02, 1.274e-01, 2.127e-01, -3.638e-02, -1.541e-01, 1.998e-02, -1.626e-01, -1.542e-02, -2.297e-01, -4.711e-02, 1.672e-01, 1.783e-01, 1.519e-01, -2.619e-02)); + r += mul(s4_1, M4(-1.092e-01, -8.656e-02, 1.802e-01, -7.827e-02, -4.769e-02, -3.521e-01, -6.076e-02, -1.028e-01, 9.007e-02, 7.495e-02, 1.750e-01, -1.573e-02, -9.365e-02, -1.703e-01, -2.072e-01, -3.748e-02)); + r += mul(s4_2, M4(4.518e-02, -2.296e-02, -1.432e-01, 5.951e-02, 2.120e-01, -1.424e-01, 1.259e-01, 1.291e-01, 1.397e-01, 2.877e-01, 2.697e-01, -2.689e-02, 7.480e-03, 1.533e-01, -1.073e-01, 6.312e-02)); + r += mul(s4_3, M4(-3.880e-02, -4.093e-02, 4.001e-02, -8.178e-02, -6.328e-02, -7.422e-02, 9.309e-02, 2.872e-02, -2.817e-01, 2.359e-01, -1.395e-01, -2.873e-01, -8.667e-02, 1.968e-01, 7.938e-02, -1.771e-02)); + r += mul(s4_4, M4(-2.421e-02, -8.792e-02, 4.204e-02, 3.885e-02, -8.507e-02, -1.308e-01, -1.262e-01, -2.356e-01, 1.499e-01, 1.452e-01, 1.380e-01, 2.543e-01, 1.627e-02, -1.195e-01, 5.032e-02, -1.295e-02)); + r += mul(s4_5, M4(1.467e-02, 1.872e-01, 5.851e-02, -1.954e-02, 1.338e-01, -5.445e-02, -9.792e-02, -1.148e-01, -9.106e-02, 7.280e-02, 1.391e-02, -1.308e-01, -3.100e-02, 9.612e-02, 5.221e-02, 1.605e-02)); + r += mul(s4_6, M4(5.990e-02, -4.208e-02, -1.734e-01, -1.367e-01, -7.889e-02, 1.409e-01, -5.239e-02, -5.982e-02, 1.043e-01, 3.526e-03, 2.075e-03, -2.397e-01, 8.369e-02, 3.376e-02, 7.339e-02, 2.281e-01)); + r += mul(s4_7, M4(2.764e-01, -1.796e-01, 1.455e-01, -3.286e-02, -4.900e-02, -1.909e-02, -2.249e-02, -9.996e-02, -3.512e-02, -1.206e-01, 1.847e-02, 2.303e-02, -6.943e-02, -1.329e-01, -7.892e-02, -7.028e-02)); + r += mul(s4_8, M4(4.555e-02, 1.136e-01, 1.246e-02, -1.455e-01, -4.663e-02, -2.871e-02, -2.046e-01, -1.446e-01, -2.253e-01, 2.763e-02, -1.004e-01, 2.980e-03, 7.329e-02, -1.165e-01, -2.569e-02, -1.452e-01)); + r += mul(s5_0, M4(1.021e-01, -4.993e-01, -3.943e-01, -3.816e-02, -1.130e-01, -9.294e-02, -1.374e-01, -2.294e-02, 1.072e-01, 4.437e-02, -4.967e-02, 2.527e-03, -5.801e-02, 5.197e-01, -6.761e-02, -7.898e-02)); + r += mul(s5_1, M4(1.401e-01, -1.985e-02, -2.018e-01, -2.551e-01, -3.057e-01, -8.481e-02, 7.933e-02, -2.035e-01, 4.128e-02, -2.439e-01, 1.204e-01, -5.914e-02, -1.325e-01, 1.267e-01, 6.798e-02, 1.577e-01)); + r += mul(s5_2, M4(-5.245e-02, -9.068e-02, 4.098e-03, -1.682e-03, -2.397e-01, 3.945e-02, -2.103e-02, 6.739e-02, 9.228e-02, -1.101e-01, 3.726e-01, 7.907e-02, -1.078e-01, -1.618e-01, -8.862e-02, -2.620e-02)); + r += mul(s5_3, M4(-4.351e-01, -1.867e-01, -1.876e-01, -1.895e-01, 7.828e-03, -1.009e-01, 8.182e-02, 2.871e-03, 1.487e-01, 5.502e-02, -2.857e-02, -1.281e-01, 8.090e-02, 2.686e-01, 8.667e-02, 5.965e-02)); + r += mul(s5_4, M4(-1.018e-01, -1.264e-01, -3.769e-02, 4.907e-02, -2.531e-01, -1.157e-02, -2.996e-04, -1.775e-01, 6.139e-02, 1.082e-01, 5.053e-02, 1.333e-01, -3.971e-02, 5.975e-02, 6.767e-02, -1.828e-01)); + r += mul(s5_5, M4(1.820e-01, 1.515e-01, 1.530e-02, 1.505e-01, -9.685e-02, 4.150e-02, 2.998e-02, -1.715e-01, -5.765e-02, -2.729e-02, -5.637e-03, 9.204e-02, -2.588e-02, -4.561e-01, -4.315e-02, -8.377e-02)); + r += mul(s5_6, M4(8.928e-02, 1.850e-01, -1.260e-01, 1.848e-01, -4.262e-02, -5.891e-02, -6.487e-02, 9.753e-02, 5.160e-02, -6.055e-02, 1.049e-01, 9.045e-02, -9.491e-02, 5.606e-02, -1.062e-01, 2.199e-01)); + r += mul(s5_7, M4(1.982e-02, 1.538e-01, -1.591e-01, 3.782e-03, -7.732e-02, 2.261e-01, -9.981e-02, 9.836e-02, 3.086e-02, -1.224e-01, -7.203e-02, -4.307e-02, 9.526e-02, 5.374e-02, -8.144e-02, 1.073e-01)); + r += mul(s5_8, M4(-3.075e-01, -2.543e-02, 1.095e-01, -9.644e-02, -5.159e-02, 8.254e-03, -9.783e-02, 7.402e-02, -2.176e-01, -1.525e-02, 1.419e-02, 5.681e-02, 7.628e-03, -1.650e-01, 1.085e-01, -7.338e-03)); + r += mul(s6_0, M4(1.741e-02, 2.125e-01, 1.168e-01, -4.260e-02, 1.296e-01, 1.149e-01, -1.442e-01, -8.121e-03, -2.016e-02, -1.870e-02, 5.396e-02, 1.021e-01, -1.165e-02, 1.188e-01, -9.040e-03, -3.638e-02)); + r += mul(s6_1, M4(2.115e-01, 3.941e-01, 2.957e-02, 1.120e-01, 7.739e-02, 3.555e-02, -3.015e-02, -9.409e-02, -1.270e-01, 2.272e-01, 7.447e-02, 9.663e-03, -9.259e-02, -1.570e-01, 1.401e-01, -2.905e-02)); + r += mul(s6_2, M4(5.055e-02, -2.221e-01, 9.541e-02, 3.872e-01, -1.639e-01, -2.329e-02, 4.069e-02, 1.590e-02, -8.454e-02, -2.204e-03, -6.812e-02, 1.247e-01, 2.795e-02, 8.716e-02, 2.249e-01, -1.203e-01)); + r += mul(s6_3, M4(9.096e-02, 2.607e-01, 1.479e-02, -8.888e-05, 1.222e-01, 8.601e-02, -5.109e-02, 7.726e-02, 1.441e-01, -2.092e-01, 1.506e-01, 1.928e-02, -6.446e-02, 2.402e-01, -2.574e-01, -2.931e-02)); + r += mul(s6_4, M4(-1.233e-01, 1.028e-01, -4.458e-02, 4.378e-01, -3.634e-02, 1.858e-01, 2.470e-02, -2.198e-01, -2.388e-01, 5.963e-02, 5.429e-01, -1.237e-01, -1.648e-01, 3.738e-02, -3.718e-01, 3.435e-02)); + r += mul(s6_5, M4(1.562e-01, -3.674e-01, 1.336e-01, -2.522e-01, -4.597e-02, 6.303e-03, 9.126e-02, -2.470e-02, -9.233e-02, -6.120e-02, 1.047e-01, 8.981e-02, 1.036e-01, 1.669e-01, 2.349e-02, -2.290e-02)); + r += mul(s6_6, M4(-6.684e-03, -2.623e-01, -1.988e-01, 7.140e-02, 3.641e-02, 1.881e-02, -5.311e-02, 1.847e-01, -5.423e-02, 1.317e-01, -8.327e-02, -6.475e-02, -2.640e-02, -1.547e-01, 2.943e-01, 1.121e-01)); + r += mul(s6_7, M4(2.161e-01, -2.097e-01, 8.820e-02, 2.329e-01, -9.084e-03, -6.403e-02, 8.263e-02, 1.153e-01, -1.475e-01, -2.645e-02, -1.344e-01, 1.451e-01, -1.776e-01, -2.007e-01, 2.800e-02, 7.088e-01)); + r += mul(s6_8, M4(1.772e-01, -2.968e-01, -8.142e-02, -2.700e-01, -3.186e-02, -3.387e-02, 7.155e-02, 8.385e-02, -8.074e-02, -6.685e-02, -1.783e-01, 2.027e-01, -5.418e-02, 7.905e-03, 1.838e-01, -1.703e-02)); + r += mul(s7_0, M4(-8.795e-03, 8.697e-02, -1.522e-02, -4.836e-02, 2.736e-01, -3.156e-02, 2.585e-02, -7.371e-02, -8.043e-02, -9.909e-02, 1.712e-01, 9.713e-03, 1.880e-01, 2.281e-02, -4.252e-02, 3.635e-02)); + r += mul(s7_1, M4(-2.456e-01, -3.123e-02, -8.438e-02, 5.096e-02, 2.406e-02, 5.820e-02, 1.871e-01, 9.246e-02, -4.854e-02, -1.220e-02, 8.671e-02, -8.788e-02, 9.197e-02, -1.051e-02, -1.603e-02, 7.826e-03)); + r += mul(s7_2, M4(-1.740e-01, -1.295e-01, -7.792e-02, 3.404e-02, -1.469e-01, -7.475e-02, 5.725e-02, -2.245e-01, -5.151e-02, -2.124e-02, -8.004e-02, 7.175e-02, 1.452e-02, 1.684e-01, -1.166e-01, -4.261e-02)); + r += mul(s7_3, M4(-2.162e-02, 6.368e-02, 4.331e-02, 3.686e-02, 4.560e-01, 3.422e-02, -7.137e-02, -1.088e-01, 6.296e-02, -1.548e-01, 2.900e-01, 2.713e-02, 2.923e-01, -1.117e-01, 9.811e-02, -5.212e-02)); + r += mul(s7_4, M4(-1.214e-01, -1.009e-01, -1.105e-01, -1.741e-01, -3.244e-01, 2.416e-02, -1.582e-01, -2.779e-01, 3.225e-02, -1.660e-01, 7.940e-02, 1.323e-01, 1.905e-01, -4.041e-02, 6.546e-03, -9.449e-02)); + r += mul(s7_5, M4(-8.208e-02, -3.476e-02, -9.438e-02, -1.004e-01, -2.276e-02, 4.244e-01, -2.006e-02, -1.780e-01, -6.147e-02, 3.855e-02, 2.678e-02, -7.125e-02, 1.039e-01, -2.048e-02, -1.289e-01, -2.667e-02)); + r += mul(s7_6, M4(3.593e-02, -2.194e-02, -7.869e-02, -1.356e-01, -4.294e-02, -6.255e-02, 1.837e-01, 3.174e-01, -4.049e-02, 1.013e-02, 9.296e-02, 1.090e-01, 7.926e-02, -7.720e-02, -1.803e-01, 5.508e-02)); + r += mul(s7_7, M4(-2.289e-02, -5.673e-02, 2.362e-02, 2.427e-01, 7.482e-03, -3.263e-01, 3.090e-01, 3.126e-01, -5.159e-02, 1.584e-01, -1.165e-01, 3.172e-01, 2.170e-01, -7.058e-02, 8.322e-02, -4.230e-02)); + r += mul(s7_8, M4(-4.434e-02, 8.279e-02, -1.851e-01, 7.341e-02, -5.335e-02, 1.912e-01, 3.554e-01, 4.142e-02, 4.895e-02, 3.337e-03, 1.815e-01, -5.542e-02, -2.550e-02, -7.432e-02, -2.534e-02, -3.253e-02)); + r += V4(-5.796e-02, -9.994e-02, -4.215e-02, -3.212e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-7.087e-02, 6.239e-02, 5.798e-02, 8.463e-02, 2.003e-01, 1.054e-01, -5.041e-02, -9.348e-02, -1.609e-01, 7.109e-02, 1.378e-01, -1.261e-01, -1.773e-01, -1.247e-01, -1.533e-01, -3.260e-02)); + r += mul(s0_1, M4(-5.111e-02, -8.474e-02, -1.873e-01, -1.267e-01, -8.925e-03, 5.333e-02, -2.662e-01, -1.796e-01, -1.243e-02, 2.119e-02, -1.518e-02, -6.461e-02, 5.907e-02, 3.671e-04, -4.520e-02, 4.513e-02)); + r += mul(s0_2, M4(6.582e-03, -1.185e-02, -4.998e-02, -3.709e-02, -2.846e-02, 4.560e-02, -1.383e-02, -1.422e-01, -5.102e-02, -7.843e-02, -4.691e-02, -1.638e-01, 1.126e-01, -1.427e-01, 7.998e-02, 6.866e-02)); + r += mul(s0_3, M4(1.005e-01, 9.193e-02, 1.235e-02, -2.497e-02, 8.452e-02, 7.020e-02, 1.421e-01, 8.788e-02, 8.692e-02, -8.267e-02, -6.585e-05, 1.645e-01, -4.443e-02, -9.324e-02, -2.148e-01, 1.671e-01)); + r += mul(s0_4, M4(4.984e-02, 1.531e-01, -8.617e-02, -2.485e-02, -1.843e-01, 7.348e-02, 6.349e-02, 2.055e-01, 2.145e-02, -5.108e-02, -4.078e-02, 9.661e-02, 8.807e-02, -5.728e-02, 6.109e-03, 2.942e-01)); + r += mul(s0_5, M4(-9.202e-02, -9.519e-02, 1.117e-01, -4.526e-02, -2.775e-01, 7.643e-02, 1.403e-02, 2.340e-02, -1.074e-02, -1.436e-01, 7.396e-03, 1.238e-03, 1.729e-01, 2.339e-02, 1.535e-01, -8.622e-02)); + r += mul(s0_6, M4(1.005e-02, -3.189e-02, 9.263e-02, 3.803e-02, 1.789e-01, -1.022e-01, 6.633e-02, -7.069e-03, -5.458e-02, 3.946e-02, -1.137e-01, -7.933e-02, -8.455e-02, 9.216e-02, 5.908e-03, -6.718e-02)); + r += mul(s0_7, M4(1.734e-01, -4.896e-02, 1.712e-01, 9.361e-03, 3.362e-02, 9.108e-02, 4.730e-01, 2.667e-01, -5.602e-02, -1.280e-01, 1.817e-01, -7.983e-02, 5.922e-03, -1.085e-02, -7.856e-02, 4.042e-02)); + r += mul(s0_8, M4(4.479e-02, -5.251e-02, 3.659e-03, -6.087e-02, -2.521e-02, -9.217e-02, 1.009e-01, 5.147e-02, 7.763e-02, 9.779e-02, -1.913e-02, -1.032e-02, -5.252e-02, -2.632e-02, 6.120e-02, -4.011e-02)); + r += mul(s1_0, M4(1.749e-01, 4.373e-02, 1.850e-01, -1.322e-03, 7.954e-02, 1.727e-02, -1.852e-01, 4.827e-02, 1.903e-01, -1.351e-01, 9.082e-02, -7.283e-02, -1.823e-01, 5.595e-02, -2.808e-02, -9.443e-03)); + r += mul(s1_1, M4(-1.981e-02, 8.999e-02, -2.314e-01, -1.194e-01, 1.176e-01, 2.058e-02, -1.970e-02, -6.448e-02, -1.178e-02, 1.526e-01, 9.951e-02, 2.790e-02, 3.800e-02, 2.846e-01, 1.088e-01, -9.929e-02)); + r += mul(s1_2, M4(-2.137e-03, -9.093e-02, -8.969e-02, -7.525e-02, 2.343e-01, -8.973e-02, -9.970e-02, 7.148e-02, 1.542e-01, 4.977e-02, -1.556e-01, 6.833e-02, -2.306e-02, 9.185e-02, -1.417e-01, 3.659e-02)); + r += mul(s1_3, M4(-9.093e-04, 8.085e-02, 1.412e-01, -2.497e-01, -1.439e-01, 9.891e-02, 2.928e-02, 1.516e-01, 4.379e-03, -4.236e-02, 2.203e-02, -8.157e-02, 7.379e-02, -1.329e-01, 2.084e-01, -7.908e-02)); + r += mul(s1_4, M4(-9.692e-02, 2.172e-01, -5.032e-03, -4.059e-01, -1.811e-01, -1.188e-01, -6.957e-02, 2.046e-01, 2.704e-01, -2.443e-01, 2.411e-01, -4.141e-02, -1.404e-01, -1.501e-01, 3.869e-02, 1.738e-01)); + r += mul(s1_5, M4(1.970e-01, -4.060e-01, -2.504e-01, 2.264e-01, 2.595e-02, -1.535e-01, 1.830e-01, 6.851e-02, -6.238e-02, -3.365e-01, -2.264e-01, -7.431e-03, -4.215e-02, 1.390e-01, -4.475e-01, -1.452e-01)); + r += mul(s1_6, M4(-2.510e-01, 1.528e-02, 4.968e-02, -2.842e-01, -3.769e-02, 9.383e-02, -8.688e-02, -1.122e-01, -1.430e-01, 5.863e-02, -5.752e-02, 1.801e-01, 1.882e-01, 6.842e-02, 2.072e-01, -2.585e-02)); + r += mul(s1_7, M4(-3.887e-02, -5.653e-02, 4.207e-02, -8.963e-02, -5.419e-02, 2.145e-02, 1.625e-01, 1.640e-02, -4.333e-03, 1.425e-02, 1.677e-01, 3.464e-01, 1.892e-01, 1.811e-02, 2.237e-02, -1.581e-02)); + r += mul(s1_8, M4(1.410e-01, -2.081e-01, -4.614e-02, 6.339e-02, 5.790e-02, 1.528e-02, -1.789e-01, -3.247e-02, 2.894e-01, -1.078e-01, 6.089e-02, 6.678e-02, 8.685e-02, 1.617e-02, -9.748e-02, -1.817e-02)); + r += mul(s2_0, M4(-2.281e-01, 1.145e-01, 1.690e-01, 3.860e-03, -2.416e-02, 3.134e-02, 3.746e-02, 9.753e-02, -1.249e-02, -2.384e-02, -6.270e-02, -1.023e-01, 9.561e-02, 1.324e-01, 2.370e-02, -1.242e-01)); + r += mul(s2_1, M4(2.798e-02, 2.944e-02, -1.391e-01, -2.065e-02, 1.531e-02, 8.454e-03, 5.738e-02, -8.349e-02, 1.830e-02, -4.295e-02, -1.300e-01, 3.290e-02, -7.138e-02, -6.105e-02, -1.993e-01, 1.054e-01)); + r += mul(s2_2, M4(1.498e-01, -2.510e-02, 6.616e-02, -8.944e-02, -3.445e-02, 1.348e-01, -5.942e-02, 9.708e-02, 1.031e-01, 2.463e-02, 5.985e-03, -7.200e-02, 3.105e-01, -1.207e-01, 3.550e-02, -1.467e-01)); + r += mul(s2_3, M4(2.385e-02, 1.033e-01, -4.184e-02, 1.449e-01, -1.506e-01, -1.277e-01, 2.225e-01, -1.859e-01, -1.908e-01, -1.564e-01, 7.764e-02, -1.052e-01, -1.490e-02, 1.196e-01, 3.107e-01, -2.853e-02)); + r += mul(s2_4, M4(-8.772e-02, -4.082e-02, -2.519e-01, 2.430e-01, 3.050e-01, -1.207e-01, 2.054e-02, -1.949e-01, 8.330e-02, 1.593e-01, -1.335e-01, -9.296e-02, -6.047e-01, 9.570e-02, -1.153e-01, 3.585e-01)); + r += mul(s2_5, M4(-1.555e-01, -9.061e-02, -1.124e-01, 1.400e-01, 5.122e-03, -4.839e-02, -4.809e-02, 5.546e-02, 1.789e-01, 4.024e-02, -8.952e-02, 4.942e-02, -1.059e-01, -8.778e-02, -2.437e-01, 5.702e-02)); + r += mul(s2_6, M4(9.926e-02, -9.385e-03, 1.125e-01, 9.954e-03, -7.612e-02, 8.703e-02, 5.513e-02, 2.757e-03, -7.221e-02, 4.016e-02, -5.296e-03, -1.003e-01, 5.334e-01, -3.056e-02, -3.064e-01, 7.117e-03)); + r += mul(s2_7, M4(-2.532e-01, -4.781e-02, 9.780e-02, -8.244e-02, 2.035e-02, 4.577e-03, -1.115e-01, -6.034e-02, -2.605e-01, -8.794e-02, 3.998e-01, 5.975e-02, -1.221e-01, -4.130e-03, -1.800e-01, -1.644e-01)); + r += mul(s2_8, M4(-5.837e-02, -1.843e-01, -2.463e-02, -2.233e-01, -1.517e-01, -5.376e-02, -1.700e-01, 5.567e-02, 6.757e-02, -1.117e-01, 4.710e-04, 1.177e-01, -1.470e-01, -1.264e-01, -3.143e-01, -1.695e-01)); + r += mul(s3_0, M4(1.737e-02, 1.084e-01, 1.796e-01, 3.595e-02, -1.737e-01, 8.785e-04, -2.436e-01, -4.031e-02, -1.042e-01, 4.849e-02, 1.806e-02, -9.414e-02, -2.708e-01, 9.794e-02, -1.460e-01, 3.161e-02)); + r += mul(s3_1, M4(1.500e-01, -1.101e-01, -2.117e-01, 1.373e-01, 2.621e-01, -8.611e-02, -1.486e-02, 4.369e-02, -3.867e-02, -6.837e-02, -4.205e-01, 2.570e-03, 4.208e-02, 1.402e-01, 2.627e-01, 7.434e-02)); + r += mul(s3_2, M4(5.101e-02, 4.293e-02, 1.221e-01, 2.186e-02, -1.894e-02, -1.332e-01, -2.449e-01, 3.077e-02, 1.728e-01, 8.873e-02, -1.142e-01, 3.216e-02, 9.510e-02, -2.458e-02, 9.526e-02, -4.923e-02)); + r += mul(s3_3, M4(-7.877e-02, -2.980e-02, 1.517e-01, -4.755e-02, 3.299e-02, -1.722e-02, 6.496e-04, 2.745e-01, -3.029e-01, 2.031e-02, 4.097e-02, 4.456e-02, 1.630e-01, -6.488e-02, 2.990e-01, -5.979e-02)); + r += mul(s3_4, M4(2.165e-01, -1.397e-01, -8.698e-02, 6.277e-02, -1.256e-01, 2.055e-01, 1.409e-01, 2.895e-01, 5.554e-01, 2.140e-01, -1.626e-01, 8.477e-02, -4.270e-01, 1.513e-01, -1.548e-01, -1.152e-01)); + r += mul(s3_5, M4(-6.026e-02, -1.783e-02, 1.314e-01, -6.727e-02, -2.205e-01, -1.427e-01, 2.454e-01, 3.660e-02, -8.011e-02, -6.024e-02, 2.695e-01, 2.440e-01, 2.838e-02, -5.347e-02, -4.060e-02, 8.471e-03)); + r += mul(s3_6, M4(-2.508e-01, 2.804e-01, 2.255e-02, -2.307e-01, 1.237e-02, 2.406e-02, 2.628e-01, 8.921e-02, -2.000e-01, 7.200e-02, 3.133e-01, -4.548e-01, 8.203e-02, -1.264e-01, -3.128e-03, 1.769e-01)); + r += mul(s3_7, M4(7.194e-02, 2.412e-02, -7.567e-02, 8.308e-02, -2.699e-01, 9.371e-02, 1.981e-01, 9.185e-02, 4.334e-02, -4.782e-02, -3.260e-01, -1.932e-01, 2.238e-01, 1.088e-01, -1.397e-02, 9.171e-02)); + r += mul(s3_8, M4(-5.989e-02, 2.174e-02, 1.074e-01, 7.843e-02, 2.823e-01, 2.629e-02, -2.285e-02, 1.446e-01, 1.295e-02, 1.623e-01, -4.525e-02, 1.443e-01, -1.739e-01, 1.968e-02, 1.531e-01, -1.264e-03)); + r += mul(s4_0, M4(3.647e-02, -1.538e-01, -2.943e-02, 1.765e-02, 3.218e-03, 8.694e-02, -1.315e-01, -6.913e-03, 3.649e-01, 1.162e-01, -1.007e-02, -7.707e-02, 5.933e-02, -7.162e-02, -1.167e-01, -4.503e-02)); + r += mul(s4_1, M4(-2.816e-02, -1.272e-03, 1.170e-01, -2.845e-02, 3.693e-01, -1.619e-01, -6.193e-02, -1.078e-01, -2.879e-01, -8.741e-02, 1.235e-02, 3.369e-01, -2.247e-01, 1.600e-01, 2.450e-01, 1.223e-01)); + r += mul(s4_2, M4(1.452e-01, -7.362e-02, -1.782e-01, 8.628e-03, -1.430e-01, 1.576e-01, -1.538e-01, 9.937e-02, -1.037e-03, 8.466e-02, -7.769e-03, 1.498e-01, -7.806e-02, -1.294e-01, -2.572e-02, 1.327e-01)); + r += mul(s4_3, M4(-5.415e-02, -1.741e-01, 5.747e-02, -2.019e-02, 1.861e-01, -6.486e-02, 1.126e-01, 2.221e-01, 1.252e-01, -3.269e-02, -1.470e-01, -1.061e-01, -3.489e-02, -4.614e-02, -4.317e-02, 2.373e-02)); + r += mul(s4_4, M4(-1.777e-01, 2.710e-02, -3.992e-02, -1.750e-01, -1.067e-01, -1.050e-01, -3.173e-01, 1.977e-01, 1.913e-01, 1.056e-01, 6.696e-02, -3.033e-01, 9.398e-02, -6.405e-03, -9.652e-03, 3.989e-02)); + r += mul(s4_5, M4(3.614e-02, -2.415e-02, -7.483e-02, 1.032e-01, -2.156e-01, 3.760e-01, -2.740e-01, -9.042e-02, -2.041e-01, 1.055e-01, 2.293e-01, -8.301e-02, -1.113e-01, 8.617e-03, -2.125e-02, -9.272e-02)); + r += mul(s4_6, M4(9.263e-02, -6.307e-02, 5.636e-02, -2.100e-02, -1.294e-01, -7.647e-03, -4.177e-02, -3.614e-02, -5.024e-01, -9.556e-02, -1.460e-01, -1.035e-01, -5.274e-02, -3.602e-02, 7.783e-02, 7.514e-02)); + r += mul(s4_7, M4(1.637e-01, 1.444e-01, 4.168e-02, -1.246e-01, 8.515e-02, -1.587e-01, -2.420e-01, -2.454e-01, 1.013e-01, 2.599e-01, 3.772e-01, 6.808e-02, -6.795e-02, 1.837e-01, 6.424e-02, -2.261e-02)); + r += mul(s4_8, M4(-9.780e-02, -2.525e-02, 1.251e-01, 1.286e-01, 2.938e-01, -4.910e-02, -6.868e-02, -4.813e-02, 1.199e-01, 1.295e-01, -3.079e-01, 8.940e-02, -3.757e-03, -1.445e-01, 9.543e-02, 1.553e-01)); + r += mul(s5_0, M4(1.186e-01, -2.205e-02, -3.427e-01, -3.424e-01, 1.843e-02, 6.114e-02, -1.087e-01, -7.415e-03, 3.993e-02, 2.651e-02, 1.467e-01, 6.105e-04, -1.080e-01, 1.258e-01, -1.729e-01, -7.216e-03)); + r += mul(s5_1, M4(-2.679e-01, 5.804e-02, -1.816e-01, 2.207e-01, -2.009e-01, -1.762e-02, 3.188e-02, -1.822e-01, 5.268e-02, -6.641e-02, 2.279e-01, 7.508e-02, 3.310e-02, 8.985e-02, -1.834e-01, -3.886e-02)); + r += mul(s5_2, M4(1.843e-02, 1.547e-01, -3.799e-01, 5.699e-02, 3.773e-01, 1.677e-01, -9.834e-02, 7.512e-02, -2.524e-01, 1.043e-01, -3.282e-02, 4.121e-02, 1.174e-01, 3.207e-03, -1.808e-01, -3.663e-02)); + r += mul(s5_3, M4(-5.561e-02, 4.176e-01, -2.231e-01, 1.208e-01, -2.510e-01, -8.773e-02, 9.459e-02, -5.313e-02, 1.463e-01, -1.630e-01, -4.437e-02, -8.088e-02, -4.934e-01, -6.573e-02, 8.557e-02, -4.096e-02)); + r += mul(s5_4, M4(8.186e-02, -1.844e-01, 1.705e-01, 7.483e-02, -2.746e-01, -6.372e-02, 6.459e-02, -1.048e-01, 2.139e-01, -1.889e-02, -2.934e-01, -2.557e-01, 2.013e-01, -1.773e-01, 1.988e-01, 1.563e-01)); + r += mul(s5_5, M4(-4.151e-03, 3.976e-02, 8.492e-02, -6.545e-02, 2.319e-01, 1.423e-01, -1.815e-02, 3.074e-02, -5.575e-02, 1.540e-02, -6.605e-02, -1.032e-03, 1.375e-01, 4.536e-02, 1.015e-01, -1.465e-01)); + r += mul(s5_6, M4(3.149e-02, -1.012e-01, -5.762e-01, 3.408e-01, 4.326e-02, 5.614e-03, 1.175e-01, 1.259e-02, 4.803e-02, -1.723e-01, -1.382e-01, 1.344e-01, 4.600e-01, -1.617e-01, -2.588e-01, 1.638e-01)); + r += mul(s5_7, M4(2.048e-01, 4.332e-02, 1.848e-01, -3.604e-01, -2.261e-01, -1.508e-01, -6.950e-03, 9.758e-02, -6.281e-02, -8.408e-02, 4.513e-02, -9.105e-02, -3.698e-01, 4.274e-02, -1.901e-01, -5.405e-03)); + r += mul(s5_8, M4(-1.685e-01, 4.163e-02, 3.980e-02, -1.844e-01, -4.091e-02, 7.826e-02, -1.567e-02, 1.459e-01, -2.282e-02, 7.285e-02, -3.953e-01, 1.006e-01, 7.857e-03, -3.035e-02, -1.264e-01, -4.881e-02)); + r += mul(s6_0, M4(4.902e-02, -1.445e-01, 2.465e-01, 3.400e-02, 2.473e-02, -1.400e-01, 1.087e-01, -7.253e-02, -1.108e-01, 1.126e-01, -3.531e-02, 1.471e-02, 5.746e-02, 2.257e-02, 1.677e-01, -1.217e-01)); + r += mul(s6_1, M4(-1.741e-01, 2.362e-01, 1.354e-01, 1.540e-01, -7.186e-02, -1.081e-01, 3.640e-02, -7.877e-02, -3.734e-02, 2.443e-02, -3.647e-01, -1.431e-01, -1.476e-01, 5.666e-02, 2.071e-01, 1.688e-01)); + r += mul(s6_2, M4(4.008e-01, 7.678e-02, 3.198e-01, -4.019e-02, 7.331e-02, 1.380e-01, -4.423e-02, -6.537e-02, 9.380e-02, -1.094e-02, 6.226e-02, 1.114e-01, -1.281e-01, -6.551e-02, 1.202e-01, -1.214e-01)); + r += mul(s6_3, M4(-2.556e-02, 1.131e-02, -2.070e-01, 1.448e-01, -2.169e-02, -6.685e-02, 1.419e-01, 2.091e-03, -1.227e-01, 3.769e-02, 5.021e-02, 5.427e-03, 1.610e-01, -9.560e-02, 5.762e-02, 3.525e-01)); + r += mul(s6_4, M4(4.889e-02, 1.146e-01, 4.403e-01, 1.301e-01, -7.739e-02, 2.461e-02, 3.249e-03, -4.671e-02, 7.585e-02, -2.600e-01, 1.391e-01, -1.733e-01, -2.416e-01, -2.492e-01, -3.474e-01, 3.281e-01)); + r += mul(s6_5, M4(1.752e-01, 1.489e-01, 4.170e-01, 2.319e-02, -6.171e-02, 2.129e-01, -5.746e-02, -2.849e-02, 1.139e-01, 1.991e-01, -2.380e-01, 1.588e-01, -1.587e-01, -1.974e-01, 1.952e-01, 1.432e-01)); + r += mul(s6_6, M4(-1.657e-02, 8.776e-02, 2.591e-02, -2.024e-01, 1.049e-01, -1.078e-01, 3.503e-02, -4.971e-02, -1.480e-01, -9.008e-02, -2.529e-01, 2.549e-01, 5.592e-01, -5.244e-02, 8.626e-02, 3.597e-02)); + r += mul(s6_7, M4(-3.532e-02, 8.905e-02, 1.791e-01, -9.458e-02, 6.780e-02, -5.794e-02, -1.292e-01, 1.035e-02, -2.125e-01, 1.398e-01, 1.009e-01, 2.977e-02, 2.564e-01, -2.495e-01, -6.772e-01, -4.445e-01)); + r += mul(s6_8, M4(-9.304e-02, 2.128e-01, 3.858e-01, 2.376e-02, 1.338e-01, 4.078e-02, -4.558e-02, 2.654e-02, 7.575e-02, -1.313e-01, 3.156e-02, -5.099e-02, -1.872e-01, 6.766e-02, -8.702e-02, -1.713e-01)); + r += mul(s7_0, M4(-5.595e-02, 6.183e-02, 2.464e-01, -1.414e-01, 7.124e-02, -6.475e-02, 2.529e-01, -1.904e-01, 3.912e-02, -9.186e-02, -1.179e-01, -1.466e-01, 9.174e-03, 1.061e-02, 5.436e-02, 9.571e-02)); + r += mul(s7_1, M4(-2.018e-01, -8.089e-03, -7.846e-02, -9.330e-02, -1.790e-02, 3.018e-02, 1.472e-01, -1.160e-01, -1.650e-01, -8.923e-02, -1.474e-01, -7.902e-02, 4.715e-02, 9.154e-02, 3.535e-02, 2.654e-02)); + r += mul(s7_2, M4(-1.588e-01, -8.289e-02, 2.280e-03, -7.578e-02, -1.446e-01, 3.556e-01, -1.940e-01, -2.261e-01, -4.402e-02, 7.329e-03, 2.272e-02, -4.111e-02, 4.818e-02, -6.474e-02, -8.609e-02, 3.821e-03)); + r += mul(s7_3, M4(1.366e-01, 2.409e-03, -5.919e-02, 5.778e-02, -2.203e-01, -1.450e-01, 5.869e-02, -1.753e-01, 9.278e-02, -3.186e-01, 1.456e-01, 2.001e-02, 1.284e-03, -3.633e-02, 6.268e-02, 3.140e-02)); + r += mul(s7_4, M4(-2.500e-01, 4.451e-02, -2.156e-01, -1.797e-01, -6.846e-02, -4.157e-01, 4.179e-01, -5.199e-02, -8.430e-02, -2.177e-01, 1.486e-01, 2.498e-01, -4.685e-02, -8.815e-02, 1.002e-01, 1.469e-01)); + r += mul(s7_5, M4(5.817e-02, 1.482e-01, -1.004e-01, 7.580e-02, 2.545e-01, 3.320e-01, -3.625e-01, -1.010e-01, -1.058e-01, -1.438e-02, 4.576e-02, 3.802e-02, 3.077e-01, 1.275e-01, -3.467e-01, 1.315e-01)); + r += mul(s7_6, M4(7.622e-02, -9.737e-03, -5.742e-02, -8.808e-02, 2.788e-01, -8.131e-02, 2.101e-02, 2.700e-02, 2.274e-01, -4.161e-02, 2.973e-01, -2.978e-02, -1.343e-01, 5.086e-04, 2.287e-02, 5.469e-02)); + r += mul(s7_7, M4(8.660e-02, -1.785e-01, -1.782e-01, 1.151e-01, -9.595e-02, -1.576e-01, -5.201e-02, 3.305e-01, -7.192e-02, 2.183e-01, 1.806e-02, -1.954e-01, -1.990e-01, 3.558e-01, 1.260e-01, 2.973e-01)); + r += mul(s7_8, M4(-1.830e-01, -1.268e-01, 2.358e-02, -1.262e-02, -1.297e-02, 2.289e-01, -4.859e-02, 1.057e-01, 5.627e-02, -1.755e-02, 8.560e-02, -1.372e-01, 1.458e-01, 2.299e-02, -2.603e-01, 4.789e-02)); + r += V4(-2.183e-02, 2.789e-02, -4.360e-02, 1.810e-03); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(5.898e-02, 1.023e-01, 5.179e-02, 3.510e-02, -4.142e-01, -5.004e-01, -2.296e-01, 1.074e-02, 6.569e-02, -1.632e-02, -4.021e-02, 2.674e-02, -1.201e-01, 2.210e-03, 1.166e-01, -2.971e-02)); + r += mul(s0_1, M4(-1.003e-01, 1.235e-01, 6.209e-02, -1.355e-01, -2.316e-01, -1.078e-01, 2.680e-01, -4.341e-01, -3.042e-02, 1.843e-01, 1.914e-02, -3.570e-02, -5.369e-02, -1.635e-02, 6.805e-02, 3.052e-02)); + r += mul(s0_2, M4(-1.906e-02, 6.295e-02, 5.892e-02, -1.250e-01, 2.057e-01, 1.112e-01, 2.352e-01, -2.113e-01, 1.208e-01, -6.867e-03, 4.805e-02, 7.473e-02, -6.804e-03, -1.886e-02, -1.671e-02, 2.571e-02)); + r += mul(s0_3, M4(-1.429e-01, 1.016e-01, 1.189e-01, -1.123e-01, -5.401e-01, -1.508e-01, 1.473e-01, 2.299e-01, -2.651e-01, -2.545e-01, -9.793e-02, 1.652e-01, -1.913e-01, 2.026e-01, 3.793e-02, -1.470e-01)); + r += mul(s0_4, M4(8.107e-02, 1.051e-01, 2.518e-01, 8.745e-02, 6.514e-01, -3.956e-01, 1.570e-01, -4.313e-01, 1.011e-01, -2.761e-01, 1.706e-01, -7.386e-02, 8.301e-02, 1.543e-01, -7.868e-03, -2.640e-01)); + r += mul(s0_5, M4(4.034e-02, 1.852e-01, -8.966e-02, -7.864e-02, 5.985e-02, -3.990e-01, 4.423e-01, 3.717e-01, 2.115e-01, 7.712e-02, 1.040e-01, 7.704e-02, 5.719e-02, 3.220e-02, -4.211e-02, -3.406e-02)); + r += mul(s0_6, M4(-5.175e-02, -7.418e-02, 5.758e-03, 6.788e-02, -3.265e-01, -2.676e-01, -1.584e-02, 2.578e-01, -2.691e-02, -3.379e-02, -1.588e-02, -3.627e-03, -1.053e-01, -7.474e-02, 9.695e-02, 4.017e-02)); + r += mul(s0_7, M4(2.569e-02, -1.294e-01, -2.184e-01, -1.896e-01, 1.249e-01, -3.481e-01, 3.441e-01, -2.525e-01, -1.948e-02, 4.415e-02, -1.896e-01, -1.199e-01, -3.296e-02, 5.648e-04, 7.137e-02, -6.672e-02)); + r += mul(s0_8, M4(-1.059e-01, -1.555e-01, -4.792e-03, -2.355e-02, 8.325e-02, -1.706e-01, 2.737e-02, -3.699e-02, 5.793e-03, -3.858e-02, 7.466e-02, -2.144e-02, -1.472e-01, -8.354e-02, 9.293e-04, 7.899e-02)); + r += mul(s1_0, M4(1.214e-01, -3.888e-02, -7.692e-02, 7.518e-03, -1.114e-01, -6.412e-02, 3.779e-02, -3.651e-02, 6.508e-02, 4.873e-02, -8.752e-02, -2.748e-04, 1.274e-01, 2.137e-02, 3.784e-02, 1.540e-02)); + r += mul(s1_1, M4(9.708e-02, 5.635e-02, -8.596e-02, 9.531e-02, -2.201e-02, -6.158e-02, -9.603e-02, 4.481e-02, 1.933e-01, 5.793e-02, -8.643e-02, -1.559e-01, 1.082e-01, 9.472e-02, 1.581e-01, 2.260e-01)); + r += mul(s1_2, M4(-4.898e-02, -5.911e-03, 1.488e-02, 5.290e-02, 8.232e-02, -6.016e-03, 2.088e-02, -2.137e-03, -6.928e-02, 7.237e-02, 1.024e-01, 4.490e-02, 2.106e-01, 3.186e-02, 1.385e-01, 2.273e-01)); + r += mul(s1_3, M4(-2.274e-03, 1.202e-01, 8.561e-02, -2.359e-03, 9.927e-03, 6.843e-02, 3.887e-02, 6.791e-02, 2.428e-02, -5.698e-02, -5.294e-02, -1.431e-02, 1.391e-01, -1.181e-01, -8.015e-02, -1.303e-01)); + r += mul(s1_4, M4(1.543e-01, 2.311e-01, 5.523e-01, 4.505e-01, 1.687e-01, -1.044e-01, 2.146e-01, 7.037e-02, 3.646e-01, -8.474e-02, -2.911e-02, -3.091e-02, 1.659e-01, 1.867e-01, -1.452e-02, -9.702e-03)); + r += mul(s1_5, M4(-5.954e-02, 1.475e-01, -1.169e-01, -2.935e-02, -1.248e-02, -2.545e-02, 1.196e-01, 7.875e-02, -1.230e-01, -4.100e-02, -1.244e-01, 2.200e-02, -8.112e-02, 1.377e-02, 4.385e-02, 3.784e-01)); + r += mul(s1_6, M4(-6.024e-02, -1.752e-01, -4.626e-02, 1.655e-01, 1.731e-03, -7.482e-02, -1.530e-01, -7.325e-02, 5.236e-02, 1.229e-01, 3.696e-02, -4.321e-03, -1.850e-01, 2.204e-02, -2.633e-02, 3.600e-01)); + r += mul(s1_7, M4(2.696e-02, -1.389e-02, 4.563e-02, -7.437e-02, -3.164e-02, 3.326e-03, -5.309e-02, -3.683e-02, 1.613e-01, 1.284e-01, -2.614e-02, -8.940e-02, -1.848e-01, -4.281e-02, 6.911e-03, 1.952e-01)); + r += mul(s1_8, M4(6.517e-02, -1.205e-01, -1.575e-01, -2.235e-01, -3.822e-02, 8.424e-02, 3.246e-02, -2.478e-02, -7.266e-03, -6.509e-02, -1.253e-01, -4.156e-02, -1.074e-01, -9.996e-02, 6.088e-02, 2.346e-01)); + r += mul(s2_0, M4(7.127e-02, -1.396e-02, -6.454e-03, 2.312e-01, 2.245e-01, 1.260e-01, -5.369e-02, 6.271e-02, 5.813e-03, 1.763e-01, 5.081e-02, -1.903e-02, 9.205e-02, 3.677e-02, 1.465e-02, -3.572e-02)); + r += mul(s2_1, M4(-8.076e-02, 2.054e-01, -1.043e-01, 1.886e-02, 1.810e-01, 1.393e-01, 1.145e-01, 1.048e-01, 7.747e-02, 1.002e-01, 3.084e-02, -1.256e-01, 1.678e-01, -5.360e-02, 9.116e-03, -3.719e-02)); + r += mul(s2_2, M4(7.671e-02, -1.683e-01, 2.119e-02, -1.963e-01, 3.664e-02, -3.882e-02, -6.431e-02, 4.355e-02, 5.644e-03, -2.142e-02, 1.063e-02, -1.070e-02, -3.074e-03, 7.798e-02, 5.430e-02, -5.788e-02)); + r += mul(s2_3, M4(1.597e-01, 4.635e-03, -9.995e-02, -5.731e-02, 2.099e-01, -1.744e-01, -1.172e-01, -3.430e-02, 4.002e-02, -4.481e-02, -1.069e-01, 1.615e-03, -5.802e-03, 6.416e-02, -7.563e-03, -3.679e-03)); + r += mul(s2_4, M4(-7.730e-02, 8.141e-02, 2.194e-01, 3.742e-02, -5.233e-02, -1.086e-01, 2.415e-02, -2.193e-01, -4.474e-02, -4.683e-02, -5.579e-02, 8.826e-02, 3.113e-01, -9.740e-02, 2.551e-01, 4.137e-02)); + r += mul(s2_5, M4(-6.207e-02, 2.488e-02, 5.997e-02, -2.508e-01, 9.913e-02, 8.113e-02, 4.466e-02, 2.117e-01, -1.341e-02, 2.354e-02, 4.115e-02, 1.401e-02, -3.650e-02, -1.225e-01, -3.836e-02, 1.445e-01)); + r += mul(s2_6, M4(1.256e-01, -7.921e-02, 5.601e-02, 1.096e-01, -1.118e-01, 2.026e-02, -9.184e-02, 5.103e-02, -5.814e-02, -1.031e-01, -1.367e-01, -8.229e-02, -1.179e-01, 4.272e-02, -2.452e-03, 1.150e-01)); + r += mul(s2_7, M4(1.799e-01, 3.391e-02, 1.348e-01, 2.273e-01, 6.945e-02, 1.657e-01, 7.544e-02, -7.215e-02, -5.604e-02, -1.335e-02, 1.002e-01, 1.115e-01, 1.283e-01, 5.942e-02, -7.851e-02, 9.534e-02)); + r += mul(s2_8, M4(5.800e-02, -4.717e-03, -4.043e-02, -3.308e-01, 4.501e-02, -3.996e-02, -5.832e-02, -9.447e-02, -2.623e-03, 7.341e-03, 2.828e-02, 1.066e-02, -7.222e-03, -9.796e-02, -4.808e-02, 8.999e-03)); + r += mul(s3_0, M4(1.333e-01, 9.811e-02, 3.517e-02, 1.008e-02, 2.086e-01, -1.047e-01, -2.222e-02, 1.142e-01, 1.103e-01, 4.521e-02, 3.430e-01, 1.308e-01, -1.588e-02, 5.853e-02, -6.328e-02, -8.583e-02)); + r += mul(s3_1, M4(-1.590e-02, 5.338e-02, 1.783e-01, -3.995e-02, -2.121e-01, -2.104e-01, -8.402e-02, -6.910e-02, -2.198e-01, -1.369e-01, 4.857e-01, -3.527e-01, 5.229e-02, 4.553e-02, 3.975e-02, 1.113e-01)); + r += mul(s3_2, M4(1.378e-01, 5.009e-02, 1.147e-02, -4.811e-02, 1.995e-02, -4.220e-02, -1.434e-02, -1.682e-01, 1.375e-02, -2.277e-01, 4.034e-01, 1.324e-02, -1.265e-01, -4.825e-03, -5.124e-02, 4.928e-02)); + r += mul(s3_3, M4(3.023e-01, -1.505e-01, -1.060e-01, -2.507e-02, -1.052e-01, -4.072e-03, 9.374e-02, 1.291e-01, 2.717e-02, -2.332e-02, 1.779e-01, 2.247e-01, 4.461e-02, -3.617e-02, 1.135e-01, 1.578e-01)); + r += mul(s3_4, M4(6.922e-03, -7.092e-03, -7.514e-02, -1.999e-01, -7.352e-02, 9.282e-02, -4.062e-02, 4.427e-02, -2.842e-01, 5.538e-02, 1.728e-01, 6.405e-01, 5.666e-02, -1.584e-01, 2.390e-01, -1.563e-01)); + r += mul(s3_5, M4(-1.542e-01, 6.271e-02, -7.880e-02, -6.906e-02, 5.749e-02, 3.161e-01, -1.099e-01, -2.815e-01, -1.928e-01, 2.556e-02, 2.316e-01, 3.207e-01, -7.350e-02, -8.937e-02, -8.128e-02, -3.217e-02)); + r += mul(s3_6, M4(-1.054e-01, -6.319e-02, 1.393e-01, 1.755e-01, 1.463e-02, 4.238e-02, 1.288e-01, 2.410e-02, 6.348e-02, -9.399e-02, 5.054e-02, 1.567e-01, -9.147e-02, -9.821e-02, -5.046e-02, -9.996e-02)); + r += mul(s3_7, M4(4.389e-02, 1.437e-01, 1.209e-01, -1.093e-01, -3.484e-02, -9.388e-03, -1.517e-02, 2.933e-02, 2.747e-02, -2.815e-03, 2.313e-02, 4.478e-02, -2.331e-02, -2.325e-02, 4.810e-02, 3.544e-03)); + r += mul(s3_8, M4(1.622e-01, -7.435e-02, -2.240e-02, -1.503e-01, 9.484e-02, 7.273e-03, 1.479e-02, 5.303e-04, -3.781e-02, 1.331e-02, 2.239e-01, 6.487e-02, 3.113e-02, -1.109e-02, -7.214e-02, -5.213e-02)); + r += mul(s4_0, M4(-1.767e-01, -1.761e-02, 5.794e-02, -2.092e-01, -1.063e-01, -6.469e-02, -1.832e-01, -2.754e-01, 6.988e-02, 4.643e-02, -8.930e-04, 2.108e-02, -1.621e-01, 1.921e-03, 9.110e-03, 5.548e-02)); + r += mul(s4_1, M4(-3.711e-02, 4.007e-02, -2.122e-02, 1.410e-01, -4.001e-01, -2.502e-01, -3.357e-01, 1.560e-01, -6.828e-02, -2.182e-01, -2.920e-02, 7.451e-02, 2.454e-01, 5.595e-02, 1.532e-02, 2.326e-01)); + r += mul(s4_2, M4(-3.415e-02, 1.130e-01, 1.315e-01, 9.990e-02, 2.578e-01, 3.132e-02, -2.092e-02, -4.679e-02, 1.714e-01, -6.984e-02, -3.989e-02, 6.598e-03, 2.138e-02, 5.911e-02, -1.324e-01, 2.257e-02)); + r += mul(s4_3, M4(-3.899e-02, -2.317e-02, -5.292e-02, 1.811e-01, -1.660e-02, 3.567e-01, -3.313e-02, -1.344e-01, 3.119e-02, 1.979e-02, 1.413e-01, 2.558e-02, -2.075e-01, -5.004e-02, 1.268e-01, 4.136e-02)); + r += mul(s4_4, M4(-1.033e-01, 1.753e-01, -3.781e-02, -1.163e-02, -1.936e-02, -7.617e-02, -1.734e-01, 3.961e-01, 2.580e-02, 2.199e-02, 3.309e-02, -1.168e-04, -1.629e-01, 2.210e-01, -1.099e-01, -6.111e-02)); + r += mul(s4_5, M4(-1.314e-01, 1.596e-01, -4.496e-02, 3.517e-02, 3.716e-01, 1.264e-02, 1.556e-01, -1.134e-01, -4.923e-02, 8.925e-02, 5.157e-02, -1.111e-01, 1.235e-01, -5.074e-02, -9.965e-02, -2.354e-01)); + r += mul(s4_6, M4(-2.187e-01, -8.205e-02, -5.784e-02, -1.245e-01, -1.000e-01, 9.404e-02, 7.355e-02, -5.207e-01, 2.849e-02, 4.311e-02, -2.477e-03, -1.228e-01, -7.596e-02, 3.517e-02, 5.883e-02, 6.495e-02)); + r += mul(s4_7, M4(-1.546e-01, 1.033e-01, -8.544e-02, 1.434e-01, -2.261e-01, -1.058e-01, 7.453e-03, 2.644e-01, -1.821e-01, 3.247e-03, 1.335e-01, 1.569e-01, 1.079e-01, 1.831e-02, 7.809e-02, 7.146e-02)); + r += mul(s4_8, M4(-1.282e-01, 4.611e-02, 1.102e-01, 8.853e-02, -1.722e-01, -1.031e-01, 2.514e-01, -1.527e-01, -2.682e-03, 1.106e-01, -6.223e-03, -2.285e-02, 1.846e-01, 1.044e-02, -4.121e-02, -9.054e-02)); + r += mul(s5_0, M4(1.332e-01, -5.180e-02, 7.027e-03, 4.352e-02, -2.723e-02, -2.257e-02, -5.886e-02, -4.457e-02, -7.016e-02, 6.257e-02, 1.170e-02, 8.506e-02, -4.006e-02, -6.097e-02, 6.540e-02, 8.862e-02)); + r += mul(s5_1, M4(-4.496e-02, -5.056e-02, -1.407e-03, 1.206e-01, -4.892e-02, -8.885e-02, 6.538e-03, 4.501e-02, 4.362e-02, -8.527e-02, -1.946e-01, 1.466e-01, 1.045e-01, -1.014e-01, 1.647e-01, -2.132e-01)); + r += mul(s5_2, M4(1.616e-02, -1.825e-01, -2.892e-02, 3.381e-03, -2.863e-02, 4.838e-02, -1.173e-02, 5.060e-03, 1.067e-01, -1.108e-01, 1.812e-01, -3.503e-02, -1.645e-01, 6.200e-02, 4.331e-02, -1.618e-01)); + r += mul(s5_3, M4(-1.727e-01, -1.164e-01, -7.706e-02, 1.022e-02, -8.109e-02, 8.064e-02, 1.430e-01, -3.233e-02, -1.537e-01, 5.173e-03, -1.557e-01, 5.926e-02, -1.343e-01, -2.039e-03, -2.499e-02, -7.986e-02)); + r += mul(s5_4, M4(-2.449e-01, 8.561e-02, -5.706e-02, -2.523e-02, -7.240e-02, 4.196e-02, -1.457e-01, 1.570e-01, -7.590e-02, -5.256e-02, -1.481e-01, -2.097e-01, -2.887e-01, -5.940e-02, -2.843e-02, -4.960e-02)); + r += mul(s5_5, M4(-6.858e-02, 2.541e-02, -3.451e-02, -3.459e-01, 1.838e-02, -1.523e-03, 2.673e-02, -8.445e-02, 3.166e-04, 1.589e-02, -1.491e-01, -1.015e-01, -3.630e-01, -3.018e-01, 7.412e-02, 1.560e-01)); + r += mul(s5_6, M4(-1.742e-02, 1.457e-03, -1.061e-01, 1.826e-02, 6.549e-02, 1.906e-02, -3.508e-02, 2.989e-02, -4.677e-02, -1.683e-01, 2.756e-01, -2.025e-01, 1.932e-02, 1.945e-02, 2.062e-02, -2.864e-02)); + r += mul(s5_7, M4(1.323e-01, -1.310e-01, -4.512e-02, -4.500e-02, 1.232e-01, 1.050e-02, -1.799e-02, -1.608e-01, 1.488e-01, 1.833e-02, 3.364e-02, -1.157e-01, -8.935e-02, 6.832e-02, -2.770e-02, -4.232e-02)); + r += mul(s5_8, M4(8.221e-02, 8.575e-03, -4.399e-02, -2.292e-01, -4.005e-02, -7.331e-02, 7.642e-03, -1.209e-01, 6.489e-02, 1.665e-01, 5.812e-02, 1.639e-01, -1.728e-01, -4.343e-02, 1.431e-01, -1.113e-01)); + r += mul(s6_0, M4(-3.535e-02, 9.030e-02, -5.320e-02, 7.737e-02, -5.293e-03, -5.581e-03, -7.533e-02, -1.016e-01, 4.049e-02, 1.179e-01, 8.793e-02, 5.243e-02, -2.340e-01, -1.673e-01, -4.937e-02, -4.802e-02)); + r += mul(s6_1, M4(5.493e-02, -6.066e-02, -1.590e-01, 9.192e-02, -7.866e-02, -1.228e-01, -1.021e-01, 4.901e-02, -2.071e-02, 9.203e-02, 2.903e-01, 5.426e-02, 6.469e-02, 1.092e-01, 9.984e-02, -1.571e-01)); + r += mul(s6_2, M4(1.058e-01, 2.990e-02, 8.940e-03, 4.651e-02, 1.527e-01, -2.858e-02, 2.792e-02, 1.128e-01, -7.559e-02, -4.387e-02, 1.674e-01, 1.207e-01, -2.941e-02, 4.771e-03, -6.180e-02, -1.577e-01)); + r += mul(s6_3, M4(1.342e-01, 1.282e-01, 2.951e-02, -6.237e-02, 2.081e-01, 2.897e-02, 3.082e-01, 5.621e-02, -5.228e-02, -7.116e-03, -3.120e-02, 3.504e-02, -6.504e-02, 1.618e-01, 2.461e-02, -1.625e-01)); + r += mul(s6_4, M4(-1.675e-01, 2.803e-02, 3.666e-02, 2.285e-01, 4.541e-02, 1.696e-01, -3.614e-01, 6.305e-02, -1.967e-01, 1.952e-02, 1.416e-01, -1.762e-01, -7.810e-02, 1.376e-01, -7.025e-02, 1.066e-01)); + r += mul(s6_5, M4(1.353e-01, 1.662e-01, -4.207e-02, -2.346e-02, 3.252e-03, -4.560e-02, 1.697e-02, 2.627e-01, -1.936e-01, -9.696e-02, 2.661e-02, -7.479e-02, 1.646e-01, 1.714e-01, 1.544e-01, -3.598e-01)); + r += mul(s6_6, M4(1.437e-01, 3.831e-02, 6.898e-02, 2.046e-01, 7.234e-02, -1.429e-01, 1.993e-01, 1.094e-01, -4.150e-02, 2.519e-02, -7.068e-02, -8.525e-02, 2.540e-01, 6.760e-02, -1.217e-01, -2.109e-02)); + r += mul(s6_7, M4(1.491e-01, 1.031e-01, 1.842e-02, 7.649e-02, -1.016e-01, -1.476e-01, 1.011e-01, 3.578e-02, -1.648e-01, -4.999e-02, 1.983e-01, 1.706e-01, 1.083e-01, -4.382e-02, 3.283e-02, 2.631e-01)); + r += mul(s6_8, M4(1.966e-01, 3.838e-03, 3.439e-02, 5.648e-02, -5.775e-02, 4.761e-02, -2.427e-02, -6.857e-02, 9.181e-02, 4.807e-02, -4.314e-02, 1.334e-01, 1.542e-01, 1.521e-01, 7.161e-03, -1.978e-01)); + r += mul(s7_0, M4(1.146e-02, -6.005e-02, 5.545e-02, -9.761e-02, 1.503e-01, -2.184e-02, -2.481e-02, 7.512e-03, 1.664e-01, 1.335e-01, -1.812e-02, 7.166e-04, 1.337e-02, -2.437e-02, 1.074e-01, -1.082e-01)); + r += mul(s7_1, M4(-1.392e-01, -4.758e-02, -1.420e-01, -1.394e-01, 8.528e-02, 1.490e-01, -1.346e-02, -1.093e-02, -1.024e-02, -1.676e-01, 9.042e-02, -9.947e-02, -4.090e-02, -4.279e-03, 2.057e-01, 2.859e-02)); + r += mul(s7_2, M4(-1.675e-01, -2.194e-01, 5.117e-02, -1.949e-01, 1.652e-02, -9.819e-02, 5.137e-02, 4.510e-02, -1.041e-01, -8.313e-02, -8.920e-02, -1.221e-02, -8.391e-02, -7.470e-02, -3.483e-02, 2.702e-02)); + r += mul(s7_3, M4(1.624e-01, -1.387e-02, 1.789e-01, 4.982e-02, 2.285e-01, -1.281e-01, 1.582e-01, -1.199e-01, -1.904e-02, -2.422e-02, 1.750e-01, 1.218e-02, 8.004e-02, -1.072e-01, 3.704e-02, 1.066e-01)); + r += mul(s7_4, M4(-6.251e-03, -1.467e-02, 3.164e-02, 1.254e-01, -8.096e-02, 6.160e-02, -1.487e-01, -4.789e-03, 1.438e-01, -4.290e-02, 5.777e-02, -6.356e-02, 8.993e-02, -8.667e-02, -2.150e-01, 6.103e-02)); + r += mul(s7_5, M4(-1.233e-01, -1.353e-01, -8.816e-03, -1.068e-01, -3.542e-02, -6.869e-02, 1.217e-01, 2.516e-01, 1.305e-01, 1.769e-01, -3.911e-02, 3.566e-02, 4.754e-02, 1.708e-02, -5.803e-02, 1.294e-01)); + r += mul(s7_6, M4(-6.324e-02, -1.053e-01, 8.470e-02, 1.818e-02, 1.645e-02, -4.823e-02, 1.704e-01, -1.717e-02, -7.712e-02, 1.961e-02, -1.057e-01, -7.132e-02, 5.527e-02, -2.007e-02, 4.258e-02, -5.366e-02)); + r += mul(s7_7, M4(-7.590e-02, 1.014e-02, -4.700e-02, -8.332e-02, -2.349e-04, 8.786e-02, -9.277e-03, 8.736e-02, -2.654e-03, -1.076e-02, 6.248e-02, 1.686e-01, -2.289e-02, 8.949e-03, 3.939e-02, 2.480e-01)); + r += mul(s7_8, M4(-1.031e-01, -1.418e-01, -1.160e-02, -9.134e-02, -8.708e-02, -1.059e-02, 3.590e-02, 9.035e-02, 6.970e-02, 4.722e-02, -1.140e-01, -2.233e-02, 5.186e-02, -1.830e-03, 4.614e-02, -5.985e-02)); + r += V4(-6.233e-03, -4.512e-02, 8.188e-02, 1.874e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.072e-02, 2.987e-02, -4.870e-02, -9.877e-02, -2.593e-01, 8.765e-02, -3.330e-01, 4.828e-02, 3.932e-02, 3.659e-04, 1.314e-01, -1.119e-01, 2.564e-02, 5.181e-02, 7.177e-02, -2.318e-02)); + r += mul(s0_1, M4(1.366e-01, 8.945e-02, 6.219e-02, 1.591e-01, 1.157e-01, 1.207e-01, -1.396e-01, 1.469e-01, -1.020e-01, -1.702e-02, -3.086e-02, -8.716e-02, 5.325e-02, -7.046e-02, -5.272e-02, 1.926e-01)); + r += mul(s0_2, M4(9.012e-02, -1.006e-02, 5.874e-03, 1.501e-01, -5.434e-02, 9.288e-02, -3.788e-02, -1.272e-01, -6.693e-03, 5.686e-03, -3.368e-02, -2.107e-02, -3.497e-02, 3.170e-02, 5.741e-02, 5.688e-02)); + r += mul(s0_3, M4(-8.879e-02, 1.134e-01, -2.091e-01, -2.618e-02, -1.441e-01, -1.830e-01, 1.010e-01, -2.361e-02, 7.942e-02, -9.917e-02, 7.310e-02, 1.108e-01, -4.764e-02, 9.027e-03, 1.707e-01, 6.562e-02)); + r += mul(s0_4, M4(-1.312e-01, 2.961e-02, 3.481e-02, 8.557e-02, 7.619e-01, -4.109e-01, -7.920e-02, -6.275e-01, -3.851e-02, 1.703e-02, -1.155e-01, -6.941e-02, 8.292e-02, -7.877e-02, -1.201e-01, 8.829e-02)); + r += mul(s0_5, M4(6.816e-02, -1.082e-01, -1.076e-01, -2.286e-01, -2.404e-01, 3.053e-01, -1.037e-01, 1.641e-01, 1.430e-01, 1.705e-01, 8.834e-03, 8.574e-02, -4.016e-02, -2.470e-02, 7.121e-02, 1.250e-03)); + r += mul(s0_6, M4(6.383e-02, 1.785e-02, 1.217e-01, -5.187e-02, -1.436e-01, -1.256e-01, -9.063e-02, 1.767e-01, -3.213e-02, -2.595e-01, -1.391e-01, 8.152e-03, -9.418e-02, -2.258e-01, 2.352e-02, -5.914e-02)); + r += mul(s0_7, M4(2.106e-02, -1.326e-01, -1.596e-01, 1.914e-02, -8.504e-02, 6.304e-01, 1.081e-01, 2.286e-01, -1.086e-01, -1.806e-01, 1.410e-01, -3.709e-02, -4.926e-02, -1.029e-01, -9.883e-02, -2.675e-02)); + r += mul(s0_8, M4(-7.430e-03, 2.211e-02, -1.627e-02, -3.563e-02, -1.603e-02, -1.272e-01, -7.611e-02, 1.647e-01, -2.259e-01, 5.723e-02, -1.199e-01, 9.717e-03, -1.614e-02, -3.597e-02, -2.514e-02, -1.460e-01)); + r += mul(s1_0, M4(1.850e-02, 7.559e-02, 2.411e-01, -5.024e-02, -2.092e-03, 8.279e-03, 1.779e-01, 7.559e-02, 3.466e-02, -1.003e-01, 6.475e-02, -6.812e-02, 7.454e-02, -1.681e-01, 8.178e-02, 5.003e-02)); + r += mul(s1_1, M4(-7.757e-02, 1.018e-01, -4.131e-02, -2.083e-01, 2.325e-02, 1.465e-02, -4.361e-02, 8.990e-02, -1.067e-01, -1.373e-01, 4.339e-02, 1.775e-01, -4.043e-02, -1.337e-01, 9.192e-03, 8.997e-02)); + r += mul(s1_2, M4(-1.574e-01, -1.295e-01, -6.844e-02, -9.125e-02, -8.375e-03, 6.014e-02, 2.392e-02, -4.710e-02, -2.392e-02, -1.597e-01, 2.631e-03, -3.381e-03, 1.888e-01, -2.238e-01, -3.792e-02, -1.087e-01)); + r += mul(s1_3, M4(-5.730e-02, 1.616e-01, -1.510e-01, 1.484e-01, 2.380e-03, -5.401e-02, 3.699e-02, -4.056e-02, 1.217e-01, 2.217e-02, -4.013e-02, 1.260e-01, -1.723e-02, -2.650e-01, 2.407e-01, -6.329e-02)); + r += mul(s1_4, M4(-3.091e-01, 2.740e-01, -1.965e-02, 2.077e-02, 1.116e-01, -9.509e-02, -2.301e-01, -7.686e-02, -5.567e-02, 6.194e-02, -2.114e-02, -5.373e-02, 1.820e-01, -3.116e-01, -2.269e-01, 7.808e-02)); + r += mul(s1_5, M4(2.357e-01, 3.560e-02, -6.030e-02, -2.233e-01, 1.096e-01, 5.193e-02, -2.080e-01, 2.401e-02, 7.055e-02, -2.879e-02, 4.936e-02, 1.033e-01, -9.082e-02, 1.456e-02, -1.203e-01, -9.185e-02)); + r += mul(s1_6, M4(-4.888e-03, -1.463e-02, -1.374e-01, -7.310e-02, -8.804e-02, -1.713e-03, 1.975e-01, -7.981e-02, 1.088e-01, -3.366e-02, -2.186e-01, 5.982e-03, -5.231e-02, -8.029e-02, 1.584e-01, 1.171e-02)); + r += mul(s1_7, M4(-2.043e-01, -2.478e-01, -1.684e-01, 7.396e-02, -5.522e-02, 1.367e-02, 2.299e-02, -6.665e-02, -7.718e-02, 7.285e-02, 1.271e-01, 8.786e-02, 1.472e-01, 7.952e-02, -2.088e-01, -7.105e-02)); + r += mul(s1_8, M4(-2.984e-02, -9.663e-02, -6.416e-03, 4.946e-02, -5.698e-02, 5.056e-02, 4.170e-02, 7.171e-03, 1.453e-01, 3.092e-02, -5.553e-02, -6.796e-03, 1.211e-01, 1.874e-02, -1.011e-01, -3.792e-02)); + r += mul(s2_0, M4(-1.008e-01, -5.194e-02, -2.756e-02, -6.745e-03, -1.505e-01, -1.320e-01, -1.929e-01, 4.847e-02, -1.121e-01, -2.102e-01, -1.006e-01, 1.444e-02, -8.762e-02, -8.098e-03, -7.473e-02, -1.624e-01)); + r += mul(s2_1, M4(1.426e-01, 1.293e-01, -1.464e-02, -1.140e-02, -2.014e-02, 4.292e-02, -2.943e-01, -1.110e-01, 1.252e-01, -5.200e-02, 1.308e-01, 6.224e-02, 7.254e-02, -2.254e-02, 7.862e-02, -2.198e-01)); + r += mul(s2_2, M4(4.654e-02, -2.551e-01, -2.251e-01, 2.466e-01, -1.344e-01, -4.730e-02, -1.331e-01, 8.710e-02, -2.163e-01, -1.685e-01, 2.531e-02, 4.731e-02, 3.408e-02, 1.302e-02, -4.481e-02, -7.309e-02)); + r += mul(s2_3, M4(-1.833e-01, 1.709e-01, 5.242e-02, -2.393e-02, 5.576e-02, 6.886e-02, 4.389e-01, -4.525e-02, 7.399e-02, 1.564e-01, 9.747e-03, -1.112e-01, -2.863e-02, 2.534e-01, 1.318e-01, 1.231e-01)); + r += mul(s2_4, M4(1.389e-02, -2.544e-01, 6.263e-02, 4.152e-02, -4.439e-02, 9.213e-02, 1.155e-01, -3.731e-02, 4.804e-02, 6.394e-02, 3.836e-02, -1.901e-02, 3.252e-01, -1.103e-01, 3.155e-01, -3.334e-01)); + r += mul(s2_5, M4(2.692e-02, 1.670e-03, -1.956e-01, -2.065e-01, 2.862e-02, 1.070e-01, -5.289e-04, 2.543e-01, 3.560e-03, 3.700e-02, 2.995e-02, -1.575e-01, -2.839e-02, -1.122e-02, -1.519e-01, -3.399e-01)); + r += mul(s2_6, M4(-3.126e-02, 6.623e-02, 1.242e-01, 1.889e-02, -4.032e-02, 8.464e-03, 1.190e-01, 4.678e-02, -1.823e-02, -1.588e-02, -2.659e-02, 6.273e-02, -7.421e-02, -1.694e-01, 1.068e-01, -7.284e-03)); + r += mul(s2_7, M4(1.102e-01, -1.866e-01, -1.061e-01, -2.824e-01, 1.737e-02, 1.007e-01, 5.043e-03, -1.614e-01, 3.293e-03, 1.382e-01, -1.468e-02, -1.901e-02, -3.788e-02, 9.288e-02, -1.407e-01, -2.130e-02)); + r += mul(s2_8, M4(2.724e-02, 6.120e-02, 1.856e-01, 1.509e-02, 1.522e-01, 4.458e-02, 1.284e-01, 1.431e-01, -1.399e-02, -4.098e-02, 6.481e-02, -6.521e-02, 1.377e-01, -2.937e-02, 2.207e-02, 3.419e-02)); + r += mul(s3_0, M4(1.091e-02, -8.703e-02, 2.567e-01, 3.758e-02, -1.626e-01, 9.900e-02, 9.897e-02, 1.972e-02, -1.741e-01, -3.328e-01, 9.653e-02, -7.223e-02, -7.728e-02, 1.670e-01, 1.268e-01, -1.022e-02)); + r += mul(s3_1, M4(8.696e-02, 4.460e-02, -1.843e-01, 5.664e-02, 2.322e-01, 2.416e-01, 4.537e-02, -8.637e-02, 2.466e-01, -3.734e-01, -1.130e-01, -4.065e-02, 6.738e-02, 1.748e-01, 5.445e-02, -1.321e-01)); + r += mul(s3_2, M4(4.309e-02, 1.146e-01, 1.336e-03, -1.639e-01, 8.132e-02, 1.614e-01, 4.700e-02, -3.760e-01, -2.383e-01, 1.015e-01, 1.638e-02, 3.249e-01, -3.819e-02, 4.556e-02, -5.602e-02, 2.362e-01)); + r += mul(s3_3, M4(-1.616e-01, 1.037e-01, 4.294e-01, -8.711e-02, 9.881e-02, -1.627e-01, -4.622e-02, -3.638e-02, -1.581e-01, -2.005e-01, -2.844e-01, -1.328e-01, -5.698e-02, 1.438e-02, 2.079e-01, 1.697e-02)); + r += mul(s3_4, M4(-4.821e-02, -3.568e-02, -1.423e-01, -7.904e-02, 1.332e-01, 7.288e-03, 8.952e-02, -5.698e-02, 5.983e-02, 2.679e-01, -9.634e-02, 6.227e-02, 2.570e-01, -2.033e-02, -3.207e-01, -4.696e-02)); + r += mul(s3_5, M4(2.321e-01, 1.473e-02, 4.822e-02, -1.961e-01, 3.712e-01, 1.017e-02, -1.449e-02, 9.831e-02, 8.188e-02, 2.311e-01, 1.631e-01, -5.482e-02, -5.875e-02, -2.277e-01, 2.308e-02, -5.562e-02)); + r += mul(s3_6, M4(3.540e-03, 1.396e-01, -7.204e-02, 3.212e-02, -5.876e-02, 5.417e-02, -1.013e-01, -1.313e-02, -1.003e-01, -3.366e-01, 9.102e-02, 4.483e-02, -1.129e-01, -8.134e-02, 1.506e-01, -4.069e-02)); + r += mul(s3_7, M4(-4.611e-02, 6.878e-03, -1.654e-02, 4.913e-02, -5.735e-02, 1.157e-01, -1.774e-02, -8.984e-02, 2.189e-02, 2.690e-01, 2.728e-02, -6.942e-03, 5.850e-02, -9.036e-02, -1.306e-01, -4.179e-03)); + r += mul(s3_8, M4(-8.081e-02, -5.007e-02, 5.750e-02, 5.247e-02, -2.169e-02, 1.248e-01, 1.077e-05, 1.078e-01, -1.131e-01, -3.974e-02, 2.086e-01, -1.031e-02, 9.351e-02, -1.307e-01, -2.288e-01, 4.383e-02)); + r += mul(s4_0, M4(3.424e-02, 7.999e-02, 7.685e-02, 4.790e-03, -6.946e-02, 3.152e-01, -1.126e-01, 2.237e-01, -2.916e-02, 4.300e-02, 3.158e-02, 2.072e-02, 1.954e-02, 1.402e-01, 1.155e-01, -5.400e-02)); + r += mul(s4_1, M4(4.558e-02, -1.627e-01, 1.460e-01, -1.519e-01, 1.269e-01, -1.166e-01, 7.981e-02, -1.032e-01, -1.566e-02, -7.502e-02, 1.240e-02, -6.222e-02, -1.907e-02, 1.501e-01, 1.784e-01, -2.211e-01)); + r += mul(s4_2, M4(1.578e-02, 2.068e-02, 1.515e-02, -2.311e-02, -2.813e-02, 1.740e-01, 1.060e-01, -7.887e-02, 2.634e-02, 7.770e-02, 2.411e-02, 5.767e-02, 9.125e-02, -2.644e-02, -2.305e-02, -9.402e-02)); + r += mul(s4_3, M4(1.087e-01, -2.020e-01, 1.134e-01, 2.271e-02, -7.384e-02, 1.375e-01, -3.297e-01, 6.567e-02, -2.486e-02, 1.378e-01, -2.338e-03, 1.319e-01, 6.882e-02, 1.297e-01, -6.683e-02, 1.000e-01)); + r += mul(s4_4, M4(-2.737e-01, 1.427e-01, -3.568e-01, 3.743e-02, 2.118e-01, -8.814e-03, 2.406e-01, 9.181e-02, -4.541e-02, 8.189e-03, 1.395e-01, 4.992e-02, -7.127e-02, 1.991e-01, 3.851e-01, 1.538e-01)); + r += mul(s4_5, M4(1.133e-01, -3.303e-02, 4.544e-02, -1.079e-02, -6.669e-02, -1.649e-01, 1.830e-01, 5.873e-02, -6.171e-02, 6.666e-02, 9.514e-02, -1.057e-01, 1.724e-01, 8.855e-02, 6.628e-02, -9.804e-02)); + r += mul(s4_6, M4(-1.156e-01, 4.579e-02, -1.436e-01, -1.500e-01, 1.156e-04, 1.470e-01, 2.128e-02, 1.526e-02, -2.010e-02, -4.426e-02, -2.241e-02, 1.347e-02, -3.333e-02, -8.785e-03, -1.368e-01, 3.007e-02)); + r += mul(s4_7, M4(3.182e-02, -3.640e-02, -2.042e-02, 6.383e-03, 1.342e-01, -1.910e-01, -2.039e-01, -2.644e-01, -5.062e-02, -6.343e-02, -1.043e-02, -7.188e-02, -1.742e-01, -5.018e-02, 3.093e-02, -2.548e-02)); + r += mul(s4_8, M4(2.927e-02, 8.337e-02, -4.069e-03, -4.532e-02, -1.445e-02, -1.190e-02, 5.480e-02, -2.169e-01, 6.353e-02, -2.374e-01, -1.116e-01, 2.739e-02, -9.849e-04, -9.187e-02, 2.816e-02, 1.036e-01)); + r += mul(s5_0, M4(-6.044e-02, 8.869e-02, 1.616e-03, -5.585e-02, 5.658e-02, 9.891e-03, -1.156e-02, 1.706e-02, 7.989e-03, 3.445e-01, -3.770e-02, 1.117e-01, -1.178e-01, -1.527e-01, -2.576e-01, -1.498e-01)); + r += mul(s5_1, M4(-6.545e-02, -9.450e-03, -6.159e-02, -4.696e-02, 8.623e-02, -7.169e-02, -5.838e-02, 1.784e-02, 1.132e-01, 1.126e-01, 1.970e-02, 1.624e-01, -4.806e-02, -2.634e-01, -1.809e-01, -3.138e-01)); + r += mul(s5_2, M4(1.511e-01, 9.117e-02, -5.241e-02, -6.197e-02, -6.328e-02, -1.878e-02, -2.180e-02, -4.510e-02, -1.126e-01, 1.166e-01, 1.270e-01, 1.281e-01, -8.226e-02, -7.153e-02, 3.331e-02, -1.265e-01)); + r += mul(s5_3, M4(7.965e-02, -1.161e-02, 1.187e-01, 9.454e-02, -2.232e-02, -1.270e-02, -4.553e-02, 4.193e-02, -1.702e-01, 6.725e-02, -2.090e-01, -3.786e-02, 1.777e-01, -1.197e-01, 2.123e-01, 9.745e-02)); + r += mul(s5_4, M4(2.837e-02, -3.252e-01, 3.035e-01, 1.311e-01, 5.649e-02, -2.523e-02, 3.847e-02, 1.933e-01, -1.843e-01, 8.062e-02, 2.911e-02, -1.414e-01, 1.123e-01, 1.763e-01, -1.875e-01, 1.437e-01)); + r += mul(s5_5, M4(1.268e-01, -1.762e-01, 3.054e-01, -1.945e-01, -8.692e-02, -5.161e-04, 7.561e-02, 3.989e-02, 1.580e-01, -2.259e-01, 1.818e-01, -2.756e-01, -2.821e-01, -5.012e-02, -1.082e-01, -2.341e-01)); + r += mul(s5_6, M4(-7.304e-02, 9.063e-03, -1.128e-01, -9.447e-02, 1.103e-01, -2.242e-01, -2.195e-01, -2.092e-02, -5.718e-02, -9.265e-03, -1.021e-02, 9.082e-02, 6.298e-02, 8.127e-02, 1.945e-01, 6.062e-02)); + r += mul(s5_7, M4(1.440e-01, -1.044e-01, -1.070e-01, -3.389e-02, -2.963e-02, 5.793e-02, -3.072e-02, -2.344e-03, -2.492e-02, -2.873e-01, -3.807e-01, 2.718e-01, -2.020e-02, 1.431e-01, -1.180e-01, -1.030e-01)); + r += mul(s5_8, M4(3.213e-01, 6.824e-03, -7.111e-02, 3.511e-01, -4.654e-02, -6.974e-02, 6.345e-02, 1.485e-02, 2.044e-01, -1.140e-01, -4.490e-02, 1.607e-01, -9.209e-02, 1.401e-01, -5.090e-02, -8.411e-03)); + r += mul(s6_0, M4(6.762e-02, 3.111e-02, 8.905e-02, -8.417e-02, 3.567e-02, 4.367e-02, 1.799e-01, -6.583e-02, 2.586e-01, -5.969e-02, 9.891e-02, -6.286e-02, 2.217e-01, 9.322e-02, 1.010e-02, 4.488e-03)); + r += mul(s6_1, M4(-6.680e-02, 3.286e-02, 3.087e-02, -4.619e-03, -1.036e-02, 7.282e-02, 1.468e-01, -5.122e-02, 2.252e-01, 1.292e-01, -7.800e-02, -1.970e-01, -9.419e-02, 9.136e-02, -4.977e-02, 2.149e-01)); + r += mul(s6_2, M4(-4.593e-03, 3.990e-02, 1.935e-01, 1.642e-01, -1.323e-01, -3.260e-02, 6.514e-03, 8.888e-02, 1.945e-01, -2.007e-01, -1.095e-01, 8.508e-02, 2.230e-01, 1.375e-01, 5.262e-01, 3.002e-02)); + r += mul(s6_3, M4(-1.067e-01, 1.100e-02, -9.022e-02, -8.760e-03, -1.470e-01, 4.327e-02, -4.423e-01, -9.102e-02, -7.296e-02, -5.397e-02, 3.183e-01, -9.107e-02, 7.823e-02, 1.340e-01, -3.831e-01, 3.284e-01)); + r += mul(s6_4, M4(-1.125e-01, -1.064e-01, 5.733e-02, 2.783e-01, 4.943e-02, 8.598e-02, -1.754e-01, 1.106e-01, 7.048e-03, -1.128e-01, -1.402e-01, 7.994e-02, 6.531e-02, -1.963e-01, -1.339e-01, -1.787e-01)); + r += mul(s6_5, M4(6.126e-02, -2.194e-01, -1.802e-02, -1.396e-01, -1.366e-01, 1.403e-03, -2.045e-02, -4.980e-03, 3.831e-03, -5.249e-02, -8.786e-02, -9.294e-02, 5.863e-02, 2.601e-01, 1.074e-01, 2.285e-01)); + r += mul(s6_6, M4(4.155e-02, -2.726e-02, -1.368e-01, -3.527e-02, -1.657e-03, 4.799e-02, -1.674e-01, -2.810e-02, 1.697e-01, -2.212e-01, -1.978e-01, -8.034e-02, -3.231e-02, -3.516e-01, 1.731e-01, -2.747e-02)); + r += mul(s6_7, M4(1.853e-01, 6.818e-02, 4.581e-02, 2.965e-02, 2.423e-01, -1.597e-01, 1.566e-01, 2.783e-01, 4.119e-02, -5.123e-02, 2.303e-01, -9.911e-02, -9.150e-02, -6.106e-02, 1.850e-02, 2.050e-01)); + r += mul(s6_8, M4(2.283e-01, -1.458e-01, 1.233e-01, 8.499e-02, 6.123e-02, 1.016e-01, -5.853e-03, -8.747e-02, 7.646e-02, -6.887e-02, -2.715e-02, -3.436e-02, -1.119e-01, -1.890e-01, 2.693e-01, 2.559e-03)); + r += mul(s7_0, M4(1.384e-02, -1.850e-01, 5.129e-02, -1.023e-01, 9.918e-02, -2.892e-02, 1.510e-01, -8.043e-02, -5.683e-02, 4.200e-02, -1.634e-01, 4.746e-02, 3.527e-02, -5.455e-02, -1.567e-01, -1.112e-01)); + r += mul(s7_1, M4(-1.209e-01, -2.167e-01, -9.072e-02, 6.864e-02, -1.791e-01, -7.372e-02, 7.429e-02, -2.261e-01, 5.672e-02, -8.587e-02, -1.480e-01, 8.569e-02, -4.476e-02, 4.380e-02, -2.727e-01, 2.679e-01)); + r += mul(s7_2, M4(-9.439e-02, -1.420e-01, -8.216e-02, 1.063e-01, -2.824e-02, -1.300e-01, 4.051e-02, -3.691e-02, -3.306e-02, -6.779e-02, -1.291e-02, 1.214e-01, -1.861e-02, -7.860e-02, 6.709e-02, -1.240e-01)); + r += mul(s7_3, M4(-4.692e-02, 4.636e-03, -3.419e-02, 3.887e-02, -9.496e-02, -5.115e-02, -1.757e-01, 8.311e-02, -8.690e-03, 5.378e-02, 2.403e-01, -1.772e-02, 8.144e-03, 1.039e-02, -1.007e-02, 2.934e-02)); + r += mul(s7_4, M4(-8.310e-02, -1.082e-01, 1.465e-01, 1.458e-01, -6.636e-02, 1.827e-01, -1.259e-01, 7.035e-02, 1.577e-01, 2.910e-02, 1.289e-01, 1.968e-01, 6.651e-02, -1.057e-01, 3.715e-02, -2.438e-01)); + r += mul(s7_5, M4(-2.911e-02, -9.885e-02, -2.341e-02, -2.019e-01, 2.090e-01, 8.860e-02, -1.892e-01, 1.353e-01, -5.556e-02, -7.870e-03, 5.824e-02, 5.973e-02, -3.291e-02, 7.454e-03, 3.751e-02, -1.121e-01)); + r += mul(s7_6, M4(7.551e-02, -3.335e-02, -1.762e-01, 5.832e-02, 5.756e-02, -1.613e-02, 1.464e-01, -9.083e-02, 1.369e-02, -1.940e-02, 2.754e-02, -5.272e-02, -4.991e-02, 1.147e-01, -2.800e-02, -5.788e-02)); + r += mul(s7_7, M4(-2.630e-02, 8.548e-02, -3.191e-02, 1.016e-01, 2.250e-01, 8.246e-02, 1.763e-01, 8.115e-03, -1.055e-01, 6.761e-02, 2.263e-02, -9.174e-02, 4.222e-02, 7.305e-02, 1.293e-01, -2.264e-03)); + r += mul(s7_8, M4(3.674e-02, -2.297e-02, -3.111e-02, 1.134e-02, -1.363e-01, 1.643e-01, 9.281e-02, -7.981e-02, 8.216e-02, -9.746e-02, -1.500e-02, 7.859e-02, -8.993e-02, -1.750e-02, 1.173e-01, 4.751e-03)); + r += V4(1.698e-03, 2.579e-02, -1.678e-02, 7.305e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-6.186e-02, 7.644e-02, -1.504e-01, 6.998e-03, 3.227e-01, 5.705e-01, 1.276e-02, -9.864e-03, 2.392e-01, 1.188e-02, 2.665e-02, 4.136e-03, 1.107e-01, -1.087e-02, -3.264e-02, -1.099e-01)); + r += mul(s0_1, M4(5.791e-02, -8.280e-02, -2.316e-01, -1.607e-01, -4.218e-01, 6.681e-01, 1.097e-01, -7.250e-01, -1.247e-01, -1.116e-01, 6.560e-02, -9.282e-02, -3.846e-02, 4.643e-02, -2.071e-02, -2.146e-01)); + r += mul(s0_2, M4(3.270e-03, 3.178e-02, -1.218e-01, 5.875e-02, 2.629e-03, 2.297e-01, 4.372e-01, -2.591e-01, 2.010e-02, -6.648e-02, 2.203e-02, -3.337e-02, 5.933e-02, -9.940e-02, -8.647e-02, -3.455e-02)); + r += mul(s0_3, M4(-7.295e-02, 1.090e-01, -2.574e-01, -4.590e-02, 1.968e-01, 5.900e-01, 6.299e-02, -3.414e-02, 1.580e-01, -7.376e-02, 9.417e-02, -4.065e-02, 1.430e-03, -1.640e-02, 4.694e-02, -3.214e-05)); + r += mul(s0_4, M4(5.960e-02, -1.084e-01, -2.587e-01, 1.333e-01, -7.512e-02, 8.546e-01, -3.336e-01, 3.835e-01, 1.885e-01, 5.548e-02, -6.802e-02, 5.619e-03, -1.972e-02, 1.343e-01, 8.009e-02, -2.308e-01)); + r += mul(s0_5, M4(7.935e-02, -2.185e-02, 9.521e-03, 2.267e-02, 4.557e-02, 4.106e-01, -5.828e-02, 1.617e-01, -8.067e-02, 1.044e-01, -5.421e-02, 2.106e-01, 9.214e-02, -9.703e-03, -6.816e-03, 5.811e-02)); + r += mul(s0_6, M4(-8.258e-02, 5.813e-02, -3.165e-01, -4.061e-02, -2.125e-01, 3.517e-01, 1.852e-01, 3.948e-02, -3.181e-02, -9.381e-02, 3.857e-03, 1.596e-02, 1.605e-01, 1.763e-02, 6.127e-02, 5.993e-02)); + r += mul(s0_7, M4(-1.638e-01, 6.370e-02, -3.140e-02, -6.196e-02, -8.679e-02, 3.919e-01, -7.714e-02, 6.702e-01, 2.352e-01, 4.334e-02, 2.054e-01, -1.406e-01, -1.907e-02, -1.223e-02, 7.092e-02, 5.143e-02)); + r += mul(s0_8, M4(-2.864e-02, 2.101e-02, -5.985e-02, -3.778e-02, 1.740e-01, 4.349e-01, 3.508e-01, -8.256e-02, 1.417e-01, -2.963e-02, 5.655e-02, 5.685e-02, -3.032e-02, -7.560e-03, -3.227e-02, 1.275e-01)); + r += mul(s1_0, M4(-1.037e-01, -5.329e-02, -1.153e-03, 1.188e-03, 4.432e-02, 9.465e-02, 3.873e-02, -1.126e-02, 9.906e-02, -6.669e-03, -3.942e-02, -4.308e-02, 8.005e-03, 3.278e-02, -6.866e-02, -4.305e-02)); + r += mul(s1_1, M4(2.644e-01, 3.882e-02, 1.599e-01, 2.417e-01, -1.866e-01, -5.371e-02, -3.262e-02, -1.531e-01, -9.844e-02, -1.569e-02, -1.239e-02, -1.319e-02, -7.975e-02, 2.914e-02, 9.776e-02, 1.726e-01)); + r += mul(s1_2, M4(-7.199e-03, -9.698e-02, 5.974e-02, 2.839e-02, 1.722e-02, 1.038e-02, 4.286e-02, -1.109e-01, -1.882e-01, -5.766e-02, -1.242e-01, 6.661e-02, -9.155e-02, 9.355e-02, 4.687e-02, -7.348e-03)); + r += mul(s1_3, M4(-2.169e-01, 1.116e-01, -1.917e-01, -3.858e-03, 7.152e-02, -2.615e-02, -7.558e-02, -8.680e-02, -3.220e-02, 1.908e-01, 1.524e-02, 4.523e-03, 8.582e-02, 5.460e-02, 1.954e-01, 2.942e-01)); + r += mul(s1_4, M4(3.139e-02, -9.837e-02, 1.743e-02, 1.242e-01, 8.060e-02, 1.809e-01, -1.421e-01, 1.008e-01, -1.521e-01, 6.146e-02, -8.842e-02, 2.465e-01, -3.182e-01, -4.065e-02, 1.646e-01, -3.455e-01)); + r += mul(s1_5, M4(5.098e-02, -2.421e-02, 1.039e-01, 6.447e-02, -1.005e-01, 1.246e-02, -3.039e-03, 4.305e-02, -6.107e-02, 2.312e-02, -9.916e-02, 1.579e-01, 8.034e-03, 4.809e-02, 4.289e-02, -3.629e-02)); + r += mul(s1_6, M4(2.395e-01, -3.280e-02, -8.095e-02, -1.944e-02, 5.828e-02, 5.073e-02, -2.697e-02, 1.291e-02, -2.329e-01, 1.244e-01, -9.033e-02, 9.723e-02, 7.572e-03, 4.856e-02, -1.690e-01, 1.099e-01)); + r += mul(s1_7, M4(2.168e-02, 3.986e-02, 2.528e-01, -1.322e-01, 9.191e-02, -7.203e-02, -4.302e-02, 1.143e-01, -2.355e-02, 2.543e-03, -4.589e-03, -8.326e-02, -1.032e-01, -1.386e-01, -1.470e-01, 9.319e-02)); + r += mul(s1_8, M4(-4.893e-02, -1.262e-04, 1.475e-01, -7.349e-02, -1.439e-02, -1.244e-03, 7.020e-02, 1.256e-01, 1.024e-01, -4.272e-02, -5.351e-02, 2.883e-02, -6.076e-02, -1.863e-02, -1.155e-01, -1.205e-03)); + r += mul(s2_0, M4(1.458e-02, -5.736e-02, 1.231e-02, 1.840e-01, -1.126e-01, -4.911e-02, -1.182e-01, 1.364e-01, 1.724e-03, 7.908e-02, 6.441e-02, -1.983e-01, -8.041e-02, -3.410e-02, -9.758e-02, -3.682e-02)); + r += mul(s2_1, M4(-1.064e-01, 1.305e-02, 1.300e-02, -2.330e-01, 1.571e-01, -5.209e-02, -1.620e-01, -8.339e-02, -5.291e-02, 6.925e-02, -1.189e-02, 1.150e-01, 1.358e-04, -1.515e-01, 2.310e-02, 8.484e-02)); + r += mul(s2_2, M4(-1.471e-02, 8.768e-02, 1.317e-01, 6.032e-02, -2.307e-02, 4.888e-02, 4.469e-02, -2.151e-01, -1.492e-01, 1.163e-01, 7.104e-02, 8.186e-02, 8.922e-02, -5.862e-02, -9.091e-02, 2.635e-01)); + r += mul(s2_3, M4(7.698e-02, 4.197e-02, -4.196e-02, 1.545e-01, 1.266e-02, -1.129e-01, 1.528e-01, -5.952e-02, 3.781e-02, 8.110e-02, 1.030e-01, -2.405e-02, -5.800e-02, -1.409e-02, -6.333e-02, -2.070e-02)); + r += mul(s2_4, M4(-9.783e-02, 6.081e-02, -6.229e-02, 7.337e-02, -1.360e-01, 1.127e-03, 4.817e-02, -2.041e-01, 6.752e-02, -1.186e-02, -7.077e-02, -1.223e-01, 2.327e-02, 7.801e-03, 6.270e-02, 3.135e-01)); + r += mul(s2_5, M4(9.916e-02, -4.970e-02, 4.994e-02, -7.438e-02, 1.170e-01, 9.513e-02, -1.987e-01, 1.479e-01, -1.008e-01, 1.637e-02, -4.408e-02, 1.639e-01, 1.416e-02, 7.802e-02, 7.414e-02, 2.309e-01)); + r += mul(s2_6, M4(2.317e-02, -9.267e-03, 2.679e-03, -1.355e-02, 1.708e-01, 9.609e-02, 7.928e-02, -2.145e-03, 2.286e-01, 5.851e-02, 7.349e-02, -4.331e-02, 4.207e-02, 5.328e-03, -1.931e-01, 1.520e-02)); + r += mul(s2_7, M4(-4.572e-02, -5.036e-02, 4.120e-02, -2.082e-01, -1.949e-01, -9.669e-02, 4.695e-02, 2.991e-02, -6.106e-03, 6.338e-02, -1.248e-02, 3.833e-02, 1.196e-01, 3.891e-02, 1.696e-01, 3.910e-02)); + r += mul(s2_8, M4(1.415e-03, 5.763e-02, -1.149e-02, -2.407e-01, 1.740e-03, -8.224e-02, -1.945e-02, 1.355e-02, -4.428e-02, 1.613e-02, -3.505e-02, -4.122e-02, -2.886e-02, -6.491e-02, -5.194e-02, -8.513e-02)); + r += mul(s3_0, M4(-1.765e-01, -1.367e-01, -4.111e-02, 1.888e-02, -4.251e-03, 1.376e-02, -3.862e-02, -8.980e-03, 1.037e-01, -1.789e-01, 9.146e-02, 2.300e-01, -1.614e-01, 1.308e-01, -1.819e-01, 1.665e-01)); + r += mul(s3_1, M4(-2.059e-01, -5.625e-02, 2.864e-02, -7.048e-02, -1.706e-03, -1.770e-01, -4.874e-02, -2.786e-02, -1.275e-01, -5.649e-02, 3.366e-02, 1.795e-01, -1.223e-02, -2.242e-01, -1.622e-01, 1.141e-01)); + r += mul(s3_2, M4(1.345e-01, 6.934e-02, 6.283e-02, 1.327e-01, 6.862e-02, -2.143e-01, 8.570e-02, -3.989e-02, -4.377e-02, -1.398e-01, 2.988e-01, 1.374e-01, 1.468e-01, 8.299e-02, -4.532e-02, 2.113e-01)); + r += mul(s3_3, M4(3.431e-01, -9.639e-03, 2.325e-01, -5.670e-02, -1.314e-01, 3.757e-02, -6.452e-02, 6.469e-02, -5.444e-03, -1.508e-01, -4.739e-01, -1.679e-01, 1.373e-01, -6.124e-02, 7.933e-03, -1.489e-02)); + r += mul(s3_4, M4(-1.132e-01, 5.289e-02, 1.535e-02, -2.451e-01, 3.783e-03, 1.595e-01, 9.528e-02, 1.430e-01, 1.694e-01, -1.169e-01, -2.842e-01, -2.346e-01, 3.864e-01, -1.825e-01, -2.749e-01, -1.994e-01)); + r += mul(s3_5, M4(-7.642e-03, 8.555e-02, -1.200e-02, 1.146e-01, 1.802e-02, 1.937e-01, 1.797e-01, 5.593e-03, 4.507e-02, -1.199e-01, -2.198e-01, 1.812e-01, -5.173e-03, 1.568e-01, 9.984e-02, -2.659e-02)); + r += mul(s3_6, M4(6.704e-02, 4.698e-02, -1.374e-02, -9.021e-02, -1.707e-01, 4.764e-03, -9.841e-03, 5.072e-02, 1.605e-01, -2.053e-01, -3.838e-02, -1.444e-02, 2.384e-02, -1.151e-01, -1.005e-01, 4.233e-02)); + r += mul(s3_7, M4(-8.390e-02, 1.908e-02, -1.100e-01, -8.602e-02, 4.900e-02, 1.116e-02, 2.053e-01, -5.726e-02, -9.621e-02, -1.169e-01, 2.024e-01, 4.824e-02, -2.096e-02, 3.369e-02, 2.029e-01, -1.462e-01)); + r += mul(s3_8, M4(1.567e-01, -2.444e-02, 9.509e-02, -1.816e-02, -3.936e-02, -2.473e-02, -1.129e-01, 2.242e-02, -3.106e-03, -1.018e-01, -1.701e-01, 1.499e-01, -1.430e-01, -3.654e-02, 2.967e-02, 8.369e-02)); + r += mul(s4_0, M4(1.034e-01, -5.312e-04, -1.239e-01, -5.456e-02, 1.879e-02, -7.676e-02, -5.956e-02, -1.783e-01, -1.130e-01, -6.370e-02, 2.026e-01, 3.057e-02, -2.113e-01, 5.926e-02, -5.394e-02, 1.535e-01)); + r += mul(s4_1, M4(-6.531e-03, -1.431e-01, -1.812e-01, 1.802e-01, 8.262e-03, 7.741e-02, -6.223e-02, 9.611e-03, -9.345e-02, -8.801e-02, 3.407e-02, 1.188e-01, 1.595e-01, -4.748e-02, 1.459e-02, 2.855e-01)); + r += mul(s4_2, M4(1.810e-01, -4.054e-02, -3.277e-02, -1.111e-01, 6.464e-02, -2.245e-01, 1.261e-02, -2.225e-01, 1.451e-01, 1.198e-01, 2.836e-02, 3.377e-02, 1.767e-01, -4.248e-02, 8.389e-02, 2.485e-02)); + r += mul(s4_3, M4(-9.627e-02, 5.986e-02, 7.569e-03, -7.105e-02, 2.928e-01, -1.040e-01, -2.868e-01, -1.463e-01, -8.511e-03, -1.502e-02, -7.755e-02, 1.688e-01, 6.127e-02, 8.810e-02, 3.685e-02, 2.663e-02)); + r += mul(s4_4, M4(-6.368e-02, 5.359e-02, -3.643e-02, 1.399e-01, 3.936e-03, 2.547e-01, 2.957e-01, 2.191e-01, 2.612e-01, 3.846e-02, 3.046e-02, -7.856e-02, -3.086e-02, -6.937e-02, 5.934e-02, 7.798e-02)); + r += mul(s4_5, M4(-1.666e-01, 4.368e-02, -9.936e-02, 5.513e-02, 2.653e-01, -7.287e-02, -2.554e-01, 2.655e-01, -1.452e-01, 5.002e-02, -6.006e-02, 3.174e-02, -5.631e-03, 7.299e-02, 1.471e-01, 6.928e-03)); + r += mul(s4_6, M4(7.689e-02, -6.132e-02, -9.584e-02, 5.564e-02, 9.063e-02, 1.710e-01, -6.893e-02, -1.494e-01, -1.700e-01, -5.163e-02, -7.758e-02, -4.896e-02, -1.714e-02, 3.621e-02, 8.242e-02, 2.643e-02)); + r += mul(s4_7, M4(1.566e-02, -1.998e-02, -1.708e-01, 2.358e-01, 2.403e-02, 2.645e-01, 4.161e-01, -1.649e-01, 2.306e-01, -3.243e-02, 9.435e-02, -1.145e-01, 1.212e-01, 7.266e-02, 1.598e-01, -7.062e-02)); + r += mul(s4_8, M4(3.270e-02, -1.166e-01, -3.694e-02, 2.545e-01, -1.235e-01, 1.676e-01, 3.960e-03, -1.373e-01, -5.666e-03, -1.110e-01, -9.305e-03, 6.505e-02, -7.349e-03, -2.651e-02, 7.352e-02, -6.788e-03)); + r += mul(s5_0, M4(8.014e-02, 1.658e-02, -5.292e-02, 1.630e-02, -7.409e-02, 1.580e-02, 5.897e-02, -1.103e-02, 8.100e-02, -1.677e-01, -2.744e-01, 8.982e-02, 4.863e-02, -7.768e-02, 2.005e-01, -8.596e-03)); + r += mul(s5_1, M4(-1.216e-01, -7.057e-02, 4.285e-02, 6.371e-02, 5.647e-02, 6.204e-02, 4.051e-03, 1.280e-02, -5.409e-02, 2.483e-02, 4.165e-02, -1.142e-01, 2.118e-01, -2.997e-01, -9.129e-02, 3.454e-01)); + r += mul(s5_2, M4(5.001e-02, 8.768e-02, -7.564e-02, 7.259e-02, -2.339e-02, -4.598e-02, 3.337e-02, 6.158e-02, 2.909e-01, -1.490e-01, 1.540e-01, 6.356e-02, -6.790e-03, 3.766e-02, 2.135e-02, 1.071e-01)); + r += mul(s5_3, M4(3.641e-02, 7.143e-02, -1.138e-01, -2.032e-02, -1.992e-02, -7.735e-02, -5.734e-02, -4.763e-02, -8.787e-03, 3.215e-02, 3.169e-02, 6.289e-02, 3.540e-01, -7.365e-02, -8.671e-02, 9.639e-02)); + r += mul(s5_4, M4(1.139e-01, -1.842e-01, 2.704e-01, -1.627e-01, -4.522e-02, -3.535e-02, 9.351e-02, -2.893e-02, 1.861e-01, 2.077e-01, 1.372e-02, 4.970e-02, 1.317e-01, -3.054e-01, -8.803e-02, 1.714e-01)); + r += mul(s5_5, M4(2.900e-02, 3.563e-03, 1.104e-01, -2.544e-01, 1.542e-02, -6.940e-02, 3.600e-03, 5.139e-02, -3.194e-01, 2.563e-01, 2.459e-02, -3.874e-02, -6.024e-02, 2.414e-02, -1.001e-01, -1.498e-01)); + r += mul(s5_6, M4(-4.244e-02, -5.413e-02, -9.869e-02, 2.158e-02, 1.469e-02, 7.325e-02, 1.743e-01, 1.407e-02, -3.738e-01, -2.959e-02, 3.194e-02, -1.635e-01, 1.413e-01, -4.843e-02, -1.413e-03, -3.039e-02)); + r += mul(s5_7, M4(1.139e-01, -6.672e-03, 2.730e-01, -3.003e-02, 2.715e-02, -3.701e-02, 2.281e-01, -1.225e-01, 2.238e-01, -8.357e-02, -6.010e-03, -2.614e-01, 7.331e-02, -1.523e-02, -2.548e-01, -8.554e-02)); + r += mul(s5_8, M4(3.213e-02, 1.268e-01, 3.449e-02, -2.570e-01, -1.225e-01, 7.178e-02, 9.433e-02, 2.491e-02, 1.620e-01, 1.357e-02, -2.706e-02, 6.437e-03, 1.109e-01, -7.013e-02, -6.792e-02, -1.037e-01)); + r += mul(s6_0, M4(8.426e-02, 1.621e-01, -3.307e-02, 6.085e-02, -2.910e-01, 1.052e-01, -1.113e-01, 1.969e-01, -1.820e-02, -4.381e-02, 5.765e-02, 8.801e-03, -1.752e-01, -4.596e-02, 2.692e-03, -1.809e-02)); + r += mul(s6_1, M4(5.368e-02, 8.830e-03, 1.342e-01, -1.547e-01, 1.712e-01, -2.743e-02, 2.409e-02, 2.343e-01, 2.959e-01, -6.927e-02, 1.156e-01, -2.541e-02, 1.364e-01, 1.695e-01, 1.817e-02, 5.368e-02)); + r += mul(s6_2, M4(5.028e-02, 1.692e-01, 2.213e-02, -7.803e-02, -1.072e-01, 4.685e-02, -5.682e-03, 5.716e-02, 1.669e-01, 8.173e-03, 1.611e-01, 1.414e-01, -1.356e-01, -2.232e-01, -8.235e-02, 1.440e-01)); + r += mul(s6_3, M4(-5.098e-02, 1.137e-01, -9.407e-02, -6.296e-02, -1.385e-01, 1.617e-01, -1.909e-01, 1.068e-01, -6.835e-03, -5.320e-02, -2.809e-01, 4.434e-02, -1.940e-01, 1.189e-01, 6.201e-03, -1.800e-02)); + r += mul(s6_4, M4(-6.221e-02, 5.196e-02, -2.060e-02, 5.637e-02, 3.642e-01, 5.141e-02, 2.882e-02, -2.343e-01, 1.591e-01, -1.559e-02, -1.596e-01, 2.177e-02, 7.673e-02, 3.992e-01, -1.146e-01, 2.448e-01)); + r += mul(s6_5, M4(-5.695e-02, 7.100e-02, -2.178e-02, 1.582e-01, 2.372e-02, -2.224e-02, -4.140e-02, -8.826e-02, -4.937e-02, -1.031e-01, -1.706e-01, -4.947e-02, -1.536e-01, 1.721e-01, 6.676e-03, 8.255e-02)); + r += mul(s6_6, M4(-9.010e-02, 1.503e-01, 7.492e-02, -1.993e-03, -3.702e-01, 1.511e-01, -3.409e-02, -2.098e-02, -4.503e-01, -6.997e-02, -1.055e-01, -2.102e-02, -7.406e-02, 2.809e-02, 3.084e-01, 8.653e-02)); + r += mul(s6_7, M4(-1.697e-01, 1.711e-01, 1.144e-01, 2.860e-02, 2.195e-02, 1.660e-03, -7.057e-02, -1.680e-01, -1.430e-01, -3.346e-02, -5.121e-02, 2.135e-02, 1.318e-01, -1.161e-02, 1.916e-01, 1.212e-01)); + r += mul(s6_8, M4(-3.071e-02, 1.432e-01, 1.052e-01, 7.180e-02, -4.928e-02, 5.232e-02, -3.310e-03, -3.017e-05, 1.398e-01, -7.870e-02, -9.837e-02, 2.374e-02, 1.507e-01, -8.493e-02, -5.154e-02, 5.780e-02)); + r += mul(s7_0, M4(2.455e-01, -7.282e-02, 7.852e-02, 4.070e-02, -3.297e-02, 4.145e-02, -1.426e-01, 7.745e-02, -3.729e-02, 8.566e-03, 1.435e-01, -1.010e-01, 6.033e-02, -8.686e-03, -7.574e-02, -3.943e-02)); + r += mul(s7_1, M4(-5.348e-02, -1.035e-01, 1.114e-01, -1.529e-01, -6.293e-03, -3.968e-02, -6.971e-02, 1.515e-01, 7.013e-02, 1.038e-01, -3.970e-02, -1.193e-01, 1.365e-01, 6.677e-02, 2.004e-02, -1.010e-02)); + r += mul(s7_2, M4(1.277e-02, -3.852e-02, -6.031e-02, -1.067e-02, -1.349e-02, -5.066e-02, -9.634e-02, 1.096e-01, -4.221e-02, -2.845e-02, -6.615e-02, -1.064e-01, -5.977e-02, -1.288e-01, -1.142e-01, 3.774e-02)); + r += mul(s7_3, M4(1.835e-01, -1.255e-01, 6.001e-02, -9.365e-02, -8.400e-02, -2.954e-02, -1.218e-01, 2.082e-02, 9.881e-03, -6.183e-02, 7.922e-02, 4.288e-02, -8.498e-02, -7.827e-02, -1.268e-02, 5.906e-06)); + r += mul(s7_4, M4(-1.167e-02, -1.976e-01, 1.218e-01, 2.846e-02, 1.403e-01, -1.914e-02, -3.142e-02, -6.645e-03, 1.120e-01, 4.741e-02, -4.679e-02, 1.165e-01, 6.907e-02, 9.039e-02, -1.763e-01, 6.242e-02)); + r += mul(s7_5, M4(-6.373e-02, -4.400e-02, -3.731e-02, -3.382e-02, 7.034e-02, -4.918e-02, 8.839e-03, -4.738e-02, -1.555e-02, -2.111e-02, -1.032e-02, 7.233e-03, 8.502e-02, 9.928e-02, -1.683e-02, 1.288e-01)); + r += mul(s7_6, M4(1.680e-01, 1.016e-02, 6.608e-02, -4.082e-03, 3.294e-02, 4.425e-02, -1.113e-01, -6.943e-02, -3.064e-02, -1.889e-02, 9.421e-02, 2.824e-02, 6.753e-02, -7.434e-02, -5.314e-02, -2.174e-02)); + r += mul(s7_7, M4(-3.561e-02, -1.050e-01, -1.267e-01, -7.304e-02, 9.945e-02, -1.332e-01, -4.349e-02, -7.886e-02, -7.833e-02, 1.614e-01, 7.414e-02, 2.130e-03, 1.372e-01, 1.233e-02, -1.111e-01, 3.728e-02)); + r += mul(s7_8, M4(-1.044e-02, -5.582e-02, 1.359e-02, -1.386e-02, -3.630e-02, -1.954e-02, -1.212e-01, 1.234e-02, -2.190e-02, 1.427e-02, -3.315e-02, 1.844e-02, -1.252e-01, -5.419e-02, -3.355e-02, 2.707e-03)); + r += V4(3.455e-02, -5.682e-02, 3.028e-02, 8.647e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.096e-01, -1.852e-02, -1.637e-02, 4.392e-02, -2.820e-02, 2.433e-02, -1.942e-01, -1.233e-01, 1.259e-01, -9.204e-02, 2.761e-02, 2.025e-02, -5.680e-02, 2.470e-02, 1.573e-01, 1.047e-01)); + r += mul(s0_1, M4(2.607e-01, -2.214e-02, 3.786e-02, 7.953e-02, 1.091e+00, -3.300e-01, -5.729e-02, 1.207e-01, -7.803e-02, -1.589e-01, 1.505e-02, -2.123e-02, 1.959e-01, 5.892e-02, -1.357e-02, 1.410e-01)); + r += mul(s0_2, M4(-5.059e-02, -7.549e-02, 4.878e-03, 7.983e-02, -1.802e-01, 1.381e-01, -7.327e-02, -1.975e-01, 1.452e-02, 1.400e-01, -1.519e-01, 1.185e-01, 2.760e-02, -8.449e-02, 3.833e-02, 1.663e-02)); + r += mul(s0_3, M4(-4.666e-02, -3.460e-04, 1.701e-02, -8.272e-02, -1.174e-01, -1.036e-01, 1.497e-03, 2.781e-01, 9.424e-02, 2.704e-02, -3.550e-02, -1.404e-01, 1.897e-01, 1.363e-01, 1.109e-01, 3.753e-02)); + r += mul(s0_4, M4(-1.341e-01, 3.821e-02, -3.010e-02, -1.671e-01, -1.511e-01, -6.140e-01, -1.095e-01, -3.052e-01, 2.433e-01, -3.002e-02, -2.312e-01, -8.997e-04, 1.391e-02, 1.397e-01, 1.878e-01, -2.238e-02)); + r += mul(s0_5, M4(1.033e-01, -8.012e-03, -6.417e-02, 9.283e-02, -1.379e-01, -4.926e-02, -2.075e-01, -1.917e-01, 4.786e-02, 2.361e-02, -5.584e-02, 9.578e-02, -5.804e-02, -4.844e-03, -1.844e-02, -1.057e-02)); + r += mul(s0_6, M4(-2.536e-02, -5.752e-02, 2.363e-02, 1.713e-01, -5.923e-01, 3.168e-01, 1.674e-01, 1.652e-01, -6.523e-02, -3.655e-03, -5.167e-02, 1.683e-02, -1.840e-01, 5.627e-02, -1.028e-01, 1.927e-01)); + r += mul(s0_7, M4(-1.704e-01, 1.462e-02, 3.242e-02, 7.732e-02, -6.424e-01, 1.905e-01, -8.243e-02, 7.533e-02, 2.171e-01, 6.840e-02, -9.205e-02, -6.206e-03, 5.733e-02, -1.789e-02, -9.110e-02, 5.664e-02)); + r += mul(s0_8, M4(-5.938e-03, -7.444e-02, 5.562e-02, -6.044e-02, -3.737e-01, 1.747e-01, 1.633e-01, 1.036e-01, 1.738e-02, -2.388e-02, -1.659e-01, -8.670e-02, 7.200e-02, -1.353e-01, -4.155e-04, -8.666e-02)); + r += mul(s1_0, M4(1.040e-01, 4.584e-04, 7.114e-02, -1.014e-01, -4.784e-02, 8.807e-02, -2.202e-02, -6.059e-02, -1.686e-02, -6.356e-02, 1.230e-01, 1.079e-01, -1.823e-02, 2.432e-01, -1.581e-01, -2.449e-01)); + r += mul(s1_1, M4(3.786e-02, -1.782e-01, -4.362e-02, 8.725e-02, 1.882e-03, -5.855e-02, 1.016e-01, 8.599e-02, -1.231e-01, 5.488e-02, 5.245e-02, -8.730e-02, -3.452e-01, 1.340e-01, -2.548e-02, -2.157e-01)); + r += mul(s1_2, M4(-1.819e-03, -1.625e-01, 2.959e-02, 1.547e-02, -6.048e-03, 1.479e-01, -1.464e-01, 7.402e-02, -1.513e-01, 3.002e-02, 6.133e-02, 5.366e-02, -1.184e-01, 3.590e-01, -1.053e-01, -1.425e-01)); + r += mul(s1_3, M4(1.404e-01, -9.241e-02, 1.825e-01, 2.719e-04, -5.835e-02, 6.595e-03, -9.739e-03, -1.057e-01, 1.661e-01, -1.151e-02, 6.201e-02, 1.203e-02, 3.053e-01, -8.240e-03, 6.581e-02, -1.970e-02)); + r += mul(s1_4, M4(-2.797e-01, 2.141e-02, 1.896e-01, 3.946e-02, -1.040e-01, -2.634e-01, 1.826e-01, -2.420e-01, -2.634e-01, -1.385e-01, 1.424e-01, -1.114e-01, 1.808e-01, -1.194e-01, 9.477e-02, -2.438e-01)); + r += mul(s1_5, M4(3.524e-01, -2.425e-02, -8.163e-02, 1.418e-02, 1.681e-01, 1.876e-02, -1.001e-01, 5.731e-02, -7.652e-02, -2.750e-02, 2.377e-01, -1.445e-01, -1.499e-01, -6.638e-02, 2.818e-02, -2.138e-01)); + r += mul(s1_6, M4(-3.080e-02, 1.693e-02, 5.671e-03, 1.528e-01, 5.871e-02, -2.643e-02, -5.709e-02, 4.359e-02, -2.522e-02, 1.197e-01, -9.253e-02, 4.255e-02, 1.676e-01, 2.026e-01, -2.956e-01, -2.714e-01)); + r += mul(s1_7, M4(-1.606e-01, 6.631e-02, -1.906e-01, -2.287e-01, -6.485e-02, -7.836e-02, 4.530e-02, 5.725e-02, 6.908e-02, 1.061e-01, 1.805e-01, 7.356e-02, 2.819e-01, 1.429e-01, -1.723e-01, -3.174e-02)); + r += mul(s1_8, M4(-8.581e-02, -2.105e-02, -5.922e-03, -3.853e-02, 4.403e-02, 7.008e-02, 2.021e-02, 8.475e-02, -4.575e-02, -1.065e-01, 5.490e-02, 8.552e-02, 1.052e-01, -5.821e-02, -3.439e-02, -1.344e-02)); + r += mul(s2_0, M4(-9.902e-02, -3.794e-02, 1.320e-01, 1.776e-02, -7.469e-02, -1.189e-02, -7.807e-02, -7.543e-02, 4.629e-02, 3.148e-02, -1.269e-01, -1.108e-01, -6.617e-02, 5.129e-02, -1.180e-01, 2.706e-03)); + r += mul(s2_1, M4(9.910e-02, -1.586e-01, 8.186e-02, 5.193e-01, 2.276e-01, 2.274e-02, -1.020e-01, -1.754e-03, 1.400e-01, -8.362e-02, 3.942e-02, 5.102e-02, -8.412e-02, 6.594e-02, -2.943e-02, -6.063e-02)); + r += mul(s2_2, M4(1.368e-02, 4.901e-02, -1.587e-01, -3.238e-02, 4.454e-02, -1.343e-01, -1.012e-01, -2.655e-01, -1.020e-01, 1.676e-01, 1.253e-01, -9.942e-02, 3.205e-02, -5.055e-02, -1.117e-01, -1.089e-01)); + r += mul(s2_3, M4(-5.653e-03, -1.945e-02, 1.854e-01, 7.696e-02, 1.377e-02, -1.351e-01, -2.388e-01, -1.644e-01, 1.011e-02, -5.458e-02, -7.235e-02, 2.097e-02, -1.990e-02, 1.203e-02, 9.880e-02, -1.743e-01)); + r += mul(s2_4, M4(-8.908e-02, 7.718e-03, 6.576e-02, 1.137e-03, 7.829e-02, 2.680e-01, -1.524e-01, -1.810e-01, -6.039e-02, 1.009e-02, -7.063e-02, 2.310e-01, -2.906e-01, -6.006e-02, 1.890e-01, -1.672e-01)); + r += mul(s2_5, M4(-2.529e-03, 8.173e-02, 3.242e-02, -5.432e-02, 3.282e-02, -1.262e-01, 1.755e-01, 9.609e-03, -7.836e-02, 9.272e-02, 3.969e-02, 1.379e-01, -1.428e-01, -2.265e-02, -1.399e-01, 9.609e-02)); + r += mul(s2_6, M4(8.486e-02, -3.489e-02, 7.254e-02, 2.976e-02, 4.917e-02, -7.064e-02, -7.310e-02, -1.542e-01, -1.208e-01, -1.891e-02, -2.144e-02, -1.589e-02, -3.039e-04, 1.014e-01, -1.183e-02, -1.660e-02)); + r += mul(s2_7, M4(1.340e-01, 1.040e-01, -7.289e-02, -3.622e-02, -4.446e-03, -6.201e-02, 1.615e-01, 1.755e-01, 7.300e-02, 8.229e-02, -8.150e-02, 4.328e-02, -5.828e-02, -5.730e-03, -1.219e-01, -1.250e-01)); + r += mul(s2_8, M4(-8.284e-02, -1.916e-02, 1.192e-01, -1.383e-01, 2.001e-02, -8.057e-03, 8.148e-02, -8.773e-04, 2.151e-02, 1.743e-04, -6.163e-04, -4.874e-02, 1.019e-01, -1.012e-02, -1.159e-02, -3.961e-02)); + r += mul(s3_0, M4(-9.251e-03, 1.791e-01, 6.008e-02, -1.205e-01, -1.015e-02, 7.059e-02, -9.721e-02, -1.524e-02, -1.661e-01, 1.372e-01, 4.558e-02, -1.628e-01, -1.264e-01, -3.679e-02, -3.447e-02, 2.277e-02)); + r += mul(s3_1, M4(-1.366e-01, 2.667e-01, -4.057e-02, 1.793e-02, -1.248e-01, 1.381e-01, 1.214e-01, 5.758e-02, 9.372e-02, -1.240e-01, 3.346e-01, -7.787e-02, 6.877e-02, -3.500e-01, 1.308e-01, 3.518e-02)); + r += mul(s3_2, M4(-3.532e-03, -1.039e-01, -1.171e-01, 6.105e-02, 2.353e-01, -1.094e-01, 1.156e-01, -2.502e-03, -2.945e-01, 1.159e-01, 2.745e-01, 7.918e-02, 6.662e-03, -6.117e-02, 3.318e-02, 6.362e-03)); + r += mul(s3_3, M4(5.691e-02, -8.692e-02, -8.899e-02, 1.076e-01, 8.120e-03, 1.426e-02, -1.447e-01, -3.731e-02, 7.444e-02, -1.054e-01, 1.010e-01, 1.529e-01, 4.061e-02, 1.064e-01, -1.352e-01, -1.880e-01)); + r += mul(s3_4, M4(-1.729e-01, 2.776e-02, -7.887e-03, -2.290e-01, -2.224e-01, 2.358e-03, -7.686e-02, -2.962e-02, 2.791e-01, -3.004e-01, 2.505e-02, -2.687e-01, 1.216e-01, 9.880e-02, 7.356e-02, -6.637e-02)); + r += mul(s3_5, M4(1.795e-01, -3.148e-02, -2.065e-01, -2.491e-01, -1.342e-01, -1.418e-01, 1.059e-01, -1.083e-01, -2.623e-02, 1.010e-01, 1.061e-01, 2.263e-01, 1.283e-02, 2.022e-01, 2.694e-02, 1.391e-01)); + r += mul(s3_6, M4(-2.450e-02, 1.307e-02, -8.529e-03, 3.717e-02, 4.865e-02, 1.302e-01, 4.016e-02, -4.964e-02, -1.244e-01, -5.523e-02, 1.237e-02, -4.402e-02, -1.660e-02, 4.089e-02, -9.886e-02, -9.060e-03)); + r += mul(s3_7, M4(-8.691e-02, -5.331e-02, 1.517e-01, 1.034e-01, -9.627e-02, 3.671e-02, 7.032e-02, 2.741e-02, 1.266e-01, 1.678e-02, -7.681e-02, -7.390e-02, -5.747e-02, 2.561e-01, -1.791e-02, 4.539e-02)); + r += mul(s3_8, M4(-7.839e-02, 6.835e-03, -3.815e-02, -6.703e-02, 1.276e-02, -4.495e-02, -8.536e-02, -5.965e-02, 7.876e-02, -7.166e-02, 1.930e-01, 1.739e-01, 2.574e-02, -6.235e-02, 1.674e-01, -2.943e-02)); + r += mul(s4_0, M4(3.382e-02, -5.765e-02, -1.294e-01, -1.117e-01, 7.191e-02, -1.291e-01, -1.010e-01, -6.787e-02, -9.919e-02, -1.310e-02, -5.468e-02, -8.673e-02, -4.614e-02, 2.058e-02, 1.568e-02, 6.677e-02)); + r += mul(s4_1, M4(9.792e-02, -1.954e-01, -1.295e-01, -9.406e-02, 7.871e-03, 2.065e-02, -1.771e-01, -1.587e-01, 1.736e-02, -7.412e-02, -1.920e-02, 6.103e-02, -1.134e-01, -7.384e-02, 2.419e-01, -2.548e-01)); + r += mul(s4_2, M4(-1.086e-01, -6.933e-02, -2.300e-02, -4.260e-02, 7.342e-02, 1.651e-01, -2.871e-01, -1.338e-01, 2.483e-02, 1.475e-02, 2.509e-02, 8.418e-02, 9.221e-02, 8.517e-02, -3.979e-02, 1.869e-02)); + r += mul(s4_3, M4(-1.124e-01, 1.399e-01, -2.047e-01, -1.929e-01, 6.006e-02, 3.435e-02, 2.235e-01, 4.982e-02, -1.128e-01, -2.094e-02, 3.461e-01, -9.115e-04, -7.437e-02, -2.786e-03, 9.743e-02, 1.126e-01)); + r += mul(s4_4, M4(-2.532e-01, -1.764e-02, -7.388e-02, 2.223e-01, 3.639e-01, 1.871e-01, 7.373e-02, 9.498e-02, 1.290e-01, 1.640e-02, 3.807e-02, 1.783e-01, -6.746e-02, 2.492e-02, -2.109e-01, -7.012e-02)); + r += mul(s4_5, M4(-2.031e-01, -1.759e-01, -4.291e-02, -1.635e-01, 1.516e-01, 1.822e-01, -8.788e-02, -9.115e-02, -3.340e-02, 1.875e-02, -1.274e-01, -1.669e-01, -4.712e-02, 2.403e-02, 2.677e-02, -9.679e-02)); + r += mul(s4_6, M4(-1.202e-01, -1.172e-01, 1.657e-01, 1.462e-01, 1.135e-01, -2.146e-02, 1.613e-01, -8.259e-02, -3.707e-02, 1.363e-01, 1.371e-01, -8.764e-02, 7.924e-02, 3.371e-02, 7.389e-02, 2.623e-02)); + r += mul(s4_7, M4(-4.097e-03, -6.457e-02, 3.019e-01, 9.493e-02, -1.907e-01, 2.051e-01, 1.891e-01, 1.184e-01, -6.448e-02, -3.581e-02, -9.772e-03, -1.005e-02, -9.187e-02, -5.031e-02, -7.213e-02, 4.266e-02)); + r += mul(s4_8, M4(4.281e-02, -5.041e-02, 1.101e-01, 9.184e-02, -2.280e-01, 6.764e-03, -1.061e-01, -2.754e-01, 2.351e-02, -4.372e-02, -4.200e-02, 1.559e-02, -1.357e-02, 3.516e-02, 4.607e-02, -1.332e-02)); + r += mul(s5_0, M4(-7.007e-02, 1.679e-01, -1.274e-02, 7.591e-02, 1.389e-02, 1.539e-02, -9.365e-02, 8.885e-02, -7.271e-02, 4.573e-02, 1.978e-01, 3.180e-02, 3.804e-02, -1.208e-01, -1.929e-01, 1.365e-01)); + r += mul(s5_1, M4(-3.422e-03, 9.937e-02, 2.542e-02, 1.253e-01, -7.473e-02, -1.020e-03, -1.301e-02, 5.780e-02, 1.960e-01, -7.564e-02, 8.091e-02, 4.205e-02, -3.428e-01, -1.536e-01, -1.327e-01, -1.579e-01)); + r += mul(s5_2, M4(9.969e-02, -1.642e-01, -5.405e-02, 1.528e-01, 3.682e-02, -1.369e-02, 1.060e-01, -5.419e-02, 9.039e-03, -3.165e-02, -8.450e-02, -8.017e-02, -8.518e-02, 2.551e-02, -1.087e-01, 1.125e-01)); + r += mul(s5_3, M4(-1.152e-01, 7.566e-02, 7.127e-02, -4.112e-02, 3.422e-02, -2.033e-01, 7.094e-02, -3.279e-03, 1.251e-01, -1.479e-01, 3.210e-01, 2.175e-02, 1.728e-01, -5.799e-02, -7.935e-03, 9.648e-02)); + r += mul(s5_4, M4(2.459e-01, 2.175e-01, 2.886e-02, -6.984e-02, 1.226e-01, 7.211e-03, 1.837e-02, 9.922e-02, 4.560e-01, -1.017e-01, 2.737e-02, 6.745e-02, 1.495e-03, 1.674e-01, -1.435e-01, -4.169e-02)); + r += mul(s5_5, M4(-7.349e-02, -5.326e-02, 1.862e-01, -1.118e-01, -6.708e-02, -1.111e-01, -3.200e-02, -1.652e-01, -3.148e-03, -4.066e-02, -7.074e-02, -5.107e-02, -3.497e-02, -1.160e-01, -1.724e-01, 5.532e-03)); + r += mul(s5_6, M4(4.559e-02, 8.109e-02, 8.744e-02, 8.321e-02, -8.380e-03, 8.471e-02, -9.455e-02, -8.131e-02, -2.126e-01, 1.962e-01, -1.948e-02, 7.930e-02, 1.526e-01, -1.633e-01, -6.995e-02, 8.871e-02)); + r += mul(s5_7, M4(-6.974e-02, 5.436e-02, 3.158e-02, 9.851e-02, -1.683e-02, 1.089e-01, -3.526e-02, -8.762e-02, -2.212e-01, -4.418e-02, -1.515e-01, 6.581e-02, 2.384e-01, -1.836e-02, -6.843e-02, -7.604e-02)); + r += mul(s5_8, M4(4.627e-03, 5.753e-02, -6.744e-02, -2.015e-01, -4.092e-02, 6.422e-02, 1.893e-02, -2.088e-01, -9.010e-02, 2.032e-02, -6.638e-02, -3.300e-03, 8.718e-02, -2.528e-02, 2.858e-02, 1.160e-01)); + r += mul(s6_0, M4(6.870e-02, 1.131e-02, 7.865e-02, -3.451e-02, 1.591e-02, -1.381e-01, -9.905e-02, -2.441e-01, -1.138e-01, 2.561e-01, -2.368e-02, -1.296e-01, 1.939e-01, -2.497e-01, -4.962e-02, -1.540e-01)); + r += mul(s6_1, M4(1.207e-01, 1.171e-01, -9.254e-02, -8.790e-03, -5.541e-02, -1.699e-01, -2.360e-02, -1.511e-01, 4.065e-02, -1.579e-01, -1.230e-01, 3.388e-02, -7.535e-02, -2.809e-04, -1.092e-01, 8.226e-02)); + r += mul(s6_2, M4(1.462e-01, 1.123e-02, -1.396e-01, 3.841e-02, -1.342e-01, 1.159e-01, 7.373e-03, -9.726e-02, 1.521e-01, 2.392e-01, -3.398e-02, -7.629e-02, 3.649e-02, 1.545e-01, 1.291e-01, -1.353e-01)); + r += mul(s6_3, M4(1.193e-01, 6.714e-02, -5.676e-02, -6.763e-02, 2.939e-01, -4.573e-02, -2.876e-02, 7.910e-02, 2.628e-01, -3.672e-02, 5.333e-02, -1.391e-01, -2.088e-02, 3.584e-01, 3.513e-02, 3.537e-02)); + r += mul(s6_4, M4(-3.668e-02, -4.670e-02, 7.637e-02, -3.769e-02, 3.771e-02, 1.010e-01, -2.853e-01, 1.099e-01, -1.628e-02, -2.058e-01, -5.978e-02, 2.668e-01, -2.075e-01, -2.548e-01, 2.696e-01, 1.503e-01)); + r += mul(s6_5, M4(-9.871e-02, 1.987e-01, -5.740e-02, 6.089e-02, -1.584e-01, 8.837e-02, -3.021e-02, 1.039e-01, -9.652e-02, -1.423e-01, -4.528e-02, 3.550e-02, 8.129e-02, 7.949e-02, -3.520e-02, 1.582e-01)); + r += mul(s6_6, M4(-9.904e-03, 5.141e-02, -4.732e-03, 2.337e-02, -3.112e-03, 1.407e-01, 6.291e-02, 5.261e-02, 1.129e-02, 1.724e-01, 1.948e-01, 1.209e-01, -3.328e-02, 6.157e-02, -1.265e-01, -1.565e-01)); + r += mul(s6_7, M4(1.120e-01, 1.573e-01, 7.552e-02, -1.189e-01, -1.018e-01, -5.837e-03, 1.956e-01, -1.450e-01, -1.985e-01, -2.056e-01, 1.990e-01, 2.739e-02, -7.748e-02, 2.858e-02, -1.676e-01, -2.205e-01)); + r += mul(s6_8, M4(3.343e-02, -1.598e-02, 8.243e-02, -1.057e-01, -9.837e-02, -1.382e-01, 4.827e-02, -2.322e-02, -6.006e-02, -1.225e-02, -8.345e-03, 3.788e-02, 7.137e-02, -8.822e-02, -1.240e-01, 1.587e-01)); + r += mul(s7_0, M4(4.197e-03, -9.150e-02, 2.243e-02, 1.510e-01, -4.178e-02, -4.519e-03, -2.065e-01, -1.866e-01, -6.665e-02, 6.482e-02, -3.272e-02, -1.524e-02, -5.689e-02, -8.562e-02, -1.224e-01, -5.034e-02)); + r += mul(s7_1, M4(-2.028e-01, 1.248e-02, -7.040e-02, 6.906e-02, -1.455e-01, -9.177e-02, -2.913e-02, -8.226e-02, 8.236e-02, 1.811e-01, -5.199e-02, 9.111e-02, 2.452e-01, -2.731e-03, -1.706e-02, 8.847e-02)); + r += mul(s7_2, M4(4.152e-02, -2.097e-02, -6.550e-02, 5.457e-02, -8.043e-02, 4.064e-02, 2.533e-02, 4.574e-02, -5.159e-02, -2.161e-02, -4.714e-02, -1.196e-01, 1.831e-02, -7.269e-02, -2.514e-02, -1.522e-01)); + r += mul(s7_3, M4(1.127e-01, -1.283e-01, -6.297e-02, 1.758e-02, 1.899e-01, 2.562e-02, -1.396e-01, -8.503e-02, -3.588e-02, 8.375e-02, -1.897e-01, 3.284e-02, -5.664e-02, 7.889e-02, -1.285e-01, -4.259e-02)); + r += mul(s7_4, M4(1.804e-01, -1.357e-01, -9.422e-02, 1.343e-01, 6.370e-02, -8.731e-02, -2.610e-01, 3.235e-02, 2.090e-02, 1.737e-02, 3.840e-02, 6.650e-02, -1.454e-01, -9.199e-02, 1.087e-01, -8.518e-02)); + r += mul(s7_5, M4(-7.470e-02, 1.550e-02, 5.159e-02, 1.604e-01, -2.909e-02, -3.405e-02, -4.281e-02, 1.604e-01, -7.784e-02, 9.256e-02, 1.461e-01, 1.052e-01, -6.975e-02, 2.712e-02, 9.854e-02, 6.271e-02)); + r += mul(s7_6, M4(5.877e-02, -1.074e-01, -1.386e-01, 5.543e-02, -6.297e-03, -4.513e-02, -5.606e-02, 8.423e-02, -9.024e-02, -8.228e-02, 5.863e-02, -1.366e-02, -6.564e-02, -4.470e-04, 1.867e-03, 1.007e-01)); + r += mul(s7_7, M4(5.408e-02, -1.122e-02, -1.600e-01, -2.141e-02, 2.848e-01, -3.632e-02, 5.869e-02, -1.663e-01, 1.365e-01, 8.206e-03, 6.789e-02, -3.498e-02, 3.298e-02, -9.073e-02, 4.929e-02, -3.582e-02)); + r += mul(s7_8, M4(6.673e-02, -8.083e-02, -5.151e-03, -4.743e-02, 3.548e-02, -1.058e-01, -6.822e-02, -2.533e-02, -2.161e-02, -8.336e-02, 1.863e-02, -4.424e-02, 8.914e-02, 1.755e-02, 6.385e-02, 3.120e-02)); + r += V4(-4.162e-03, -6.545e-02, 3.186e-02, -6.206e-02); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 5 +//!DESC conv4 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(6.038e-02, -1.027e-01, 5.822e-02, -6.061e-02, -7.843e-02, 5.764e-02, -4.207e-02, -6.612e-02, 5.435e-02, -1.535e-02, -2.172e-02, 6.898e-02, -6.557e-02, 8.073e-02, 1.011e-02, -4.904e-02)); + r += mul(s0_1, M4(8.764e-04, 1.018e-01, 1.772e-01, 1.661e-01, 6.140e-02, -2.055e-02, -1.707e-01, -7.450e-02, 7.612e-02, -1.036e-01, 1.102e-01, -1.187e-01, -1.174e-02, -3.453e-02, -9.480e-02, -9.441e-02)); + r += mul(s0_2, M4(-5.569e-02, -1.362e-01, 1.829e-01, 1.028e-01, 2.850e-02, -2.040e-03, 5.163e-02, 3.880e-02, 5.329e-02, 5.051e-03, -2.657e-02, -4.745e-02, -5.201e-02, -1.034e-02, 3.430e-02, -7.121e-02)); + r += mul(s0_3, M4(-1.999e-01, 2.646e-01, -1.668e-02, -2.097e-02, -3.757e-02, 2.681e-03, 9.334e-02, -7.001e-03, 1.230e-01, -3.327e-02, 3.168e-03, 2.902e-02, 6.001e-02, 9.742e-02, -1.138e-04, 8.002e-02)); + r += mul(s0_4, M4(4.957e-02, 2.881e-01, -2.932e-01, 9.982e-02, -5.757e-02, -1.387e-01, 1.029e-01, -1.149e-01, -9.208e-02, 2.486e-01, 3.498e-02, 4.900e-02, 4.180e-02, 8.201e-03, 1.992e-01, -1.461e-02)); + r += mul(s0_5, M4(5.227e-02, -3.034e-02, -2.078e-02, 3.595e-02, -8.179e-02, 2.010e-02, 1.053e-01, 1.640e-02, -1.606e-01, 3.057e-02, -4.679e-02, 9.908e-02, 4.368e-02, -1.294e-02, -4.690e-02, 8.246e-02)); + r += mul(s0_6, M4(2.268e-02, 1.556e-01, -1.868e-02, 3.933e-02, 3.993e-02, -8.431e-02, -6.621e-02, 4.407e-02, -2.419e-02, 8.918e-02, -4.738e-02, -7.020e-02, 4.749e-02, -1.842e-02, -5.654e-02, -8.227e-03)); + r += mul(s0_7, M4(-5.760e-04, 1.471e-01, -1.820e-01, 6.890e-02, 1.373e-01, 9.203e-03, -2.028e-01, 9.876e-02, 1.083e-01, 5.770e-02, -3.253e-02, 2.297e-02, 1.292e-02, 1.122e-01, -1.429e-01, -8.835e-02)); + r += mul(s0_8, M4(-5.292e-02, 1.342e-01, -2.273e-03, 2.506e-01, 1.349e-02, 3.801e-03, -1.132e-01, -8.694e-02, 1.308e-01, 1.180e-02, -1.627e-01, 1.411e-01, 3.735e-02, -1.071e-01, 1.332e-02, -7.277e-02)); + r += mul(s1_0, M4(2.520e-01, 1.005e-02, -6.599e-02, -1.435e-02, -8.251e-02, 9.219e-02, 2.099e-01, 4.397e-02, 7.254e-02, 5.108e-02, -6.084e-02, 2.280e-01, -7.666e-02, 2.139e-01, -1.559e-01, 3.090e-02)); + r += mul(s1_1, M4(-1.438e-01, 1.429e-01, -1.312e-01, 2.296e-02, -1.380e-01, -1.234e-02, -9.997e-02, 1.424e-01, -1.930e-01, -2.154e-01, -5.713e-03, -1.436e-01, -1.227e-01, -6.206e-02, 1.316e-01, 1.183e-01)); + r += mul(s1_2, M4(-3.133e-02, -7.789e-03, 9.705e-02, -2.099e-01, 2.683e-01, 8.855e-02, -1.552e-01, -2.798e-01, -4.940e-02, 7.202e-02, 4.606e-02, -1.568e-01, -1.705e-01, -2.370e-02, 6.812e-02, 5.503e-02)); + r += mul(s1_3, M4(-2.010e-01, 7.593e-02, 1.140e-01, -1.111e-01, 3.125e-01, 1.057e-03, -2.343e-01, 2.188e-01, 1.613e-01, -5.725e-02, 2.443e-01, 1.606e-01, -1.111e-01, -3.890e-02, -1.082e-01, 2.268e-01)); + r += mul(s1_4, M4(-1.969e-02, 1.231e-01, -1.682e-01, -2.341e-01, -2.476e-01, -2.521e-02, 1.433e-01, 2.585e-01, -5.905e-01, -6.658e-03, 3.786e-01, 1.818e-02, -3.195e-01, -4.331e-02, 2.499e-01, 3.572e-01)); + r += mul(s1_5, M4(-2.933e-02, 6.255e-02, 5.270e-02, -6.322e-02, -6.288e-01, 3.056e-02, 2.344e-01, -5.509e-01, 4.417e-02, -1.143e-01, 1.720e-01, -3.562e-01, -1.757e-02, -7.980e-02, -5.221e-02, 1.142e-01)); + r += mul(s1_6, M4(4.704e-02, -7.956e-03, -4.388e-02, 8.044e-02, 1.961e-01, 3.794e-02, -3.145e-01, 4.592e-02, 7.526e-02, 5.595e-02, -8.909e-02, -8.451e-02, -3.933e-02, 7.445e-02, -4.443e-01, -4.812e-02)); + r += mul(s1_7, M4(-6.361e-03, -2.334e-02, 6.683e-02, 5.801e-02, 4.036e-01, 3.348e-01, -5.469e-01, -4.894e-01, 7.236e-02, -1.876e-02, 3.989e-02, -1.081e-01, 2.781e-02, -4.224e-02, 7.930e-02, 8.278e-02)); + r += mul(s1_8, M4(7.127e-03, -7.018e-02, -1.531e-02, -2.631e-02, 1.443e-01, -1.601e-01, -2.977e-01, -3.354e-01, 2.859e-02, 3.597e-02, -6.660e-02, -9.328e-02, -2.760e-03, -1.686e-01, 4.671e-03, -5.109e-02)); + r += mul(s2_0, M4(1.738e-01, 2.499e-01, 1.767e-01, -2.066e-01, -2.718e-02, 5.185e-02, 1.130e-01, 4.579e-02, -5.349e-02, -1.729e-02, 9.773e-03, 7.695e-02, -4.065e-02, -3.152e-02, -5.521e-02, -4.779e-02)); + r += mul(s2_1, M4(1.050e-01, -2.569e-02, 1.166e-01, -4.488e-02, 5.872e-03, -1.715e-01, -4.873e-02, -8.728e-02, -5.655e-02, -5.162e-02, -2.014e-02, 5.109e-02, -2.352e-01, -6.745e-02, -4.318e-02, -1.529e-01)); + r += mul(s2_2, M4(-5.747e-02, -1.001e-04, -1.659e-01, -1.151e-02, 1.184e-01, 1.088e-02, 9.885e-02, -4.186e-02, 2.511e-03, -4.830e-02, -9.706e-02, -2.413e-02, -1.844e-02, 3.858e-02, -7.717e-03, -6.948e-02)); + r += mul(s2_3, M4(7.010e-02, -1.670e-02, 2.139e-01, 7.957e-02, -1.501e-01, 1.199e-02, 9.177e-02, -1.012e-01, 8.116e-03, -3.847e-02, -8.662e-02, 3.384e-02, 2.598e-02, 1.091e-01, 1.689e-02, -1.996e-02)); + r += mul(s2_4, M4(5.851e-01, -4.156e-02, -2.361e-02, -1.495e-01, -4.768e-03, 2.125e-02, -5.071e-02, -2.313e-02, 1.628e-01, -1.056e-01, 4.753e-02, -5.418e-02, -1.305e-01, -1.263e-01, 3.208e-02, -7.109e-03)); + r += mul(s2_5, M4(2.752e-01, -4.555e-04, -3.288e-01, 9.494e-02, 6.333e-02, 2.145e-01, -5.950e-02, -4.529e-02, -1.564e-01, 4.102e-02, 1.456e-01, 1.577e-01, -2.017e-01, -1.197e-02, -1.137e-01, -4.620e-02)); + r += mul(s2_6, M4(3.852e-01, -3.149e-02, -2.738e-02, -1.856e-02, -2.038e-02, -1.391e-02, 1.545e-01, 4.244e-02, -7.351e-02, -1.266e-01, 6.077e-02, 2.922e-02, -1.241e-01, 5.051e-02, -1.331e-01, 1.044e-01)); + r += mul(s2_7, M4(-1.448e-01, 1.017e-01, 1.255e-01, -1.116e-01, 6.711e-02, -4.391e-02, 4.815e-03, 1.760e-01, -8.878e-02, 5.888e-03, -1.435e-01, -1.693e-01, -2.145e-01, -6.326e-02, 1.285e-02, -7.705e-02)); + r += mul(s2_8, M4(-9.547e-02, -1.173e-01, 1.793e-01, -7.501e-02, 7.400e-02, -6.034e-03, 3.186e-02, 1.514e-01, 2.215e-02, -4.265e-02, 6.466e-03, 2.240e-02, -1.609e-01, 7.133e-02, -3.133e-02, -7.962e-03)); + r += mul(s3_0, M4(1.172e-02, 2.080e-01, 1.168e-01, -5.939e-02, -6.782e-02, -4.500e-02, 9.755e-04, 3.416e-02, 2.640e-01, 9.256e-02, 3.313e-02, 6.642e-02, -5.530e-02, -4.369e-02, 9.257e-02, -6.417e-02)); + r += mul(s3_1, M4(7.388e-02, -9.949e-02, -7.112e-02, -6.696e-02, -1.935e-02, -5.532e-02, -3.353e-02, 8.160e-02, 9.737e-02, -5.728e-02, -2.573e-02, 2.480e-01, -2.191e-01, 1.059e-01, 2.171e-01, 2.034e-02)); + r += mul(s3_2, M4(7.582e-02, 1.122e-01, -1.356e-03, -2.247e-01, 8.112e-02, 5.689e-02, -1.266e-01, -1.438e-01, 1.457e-01, 3.666e-02, -3.664e-02, -9.318e-02, 3.935e-02, 1.949e-02, -7.842e-02, -3.551e-02)); + r += mul(s3_3, M4(-1.352e-01, -2.188e-01, -8.137e-02, 1.343e-01, -9.011e-02, 1.735e-01, -2.651e-01, -4.401e-02, -1.104e-01, -1.712e-02, -4.738e-02, 9.005e-02, 2.003e-01, 7.993e-02, 2.341e-02, -5.732e-02)); + r += mul(s3_4, M4(-3.021e-01, -1.628e-01, -9.939e-02, -5.101e-02, 1.852e-01, 4.853e-02, -2.408e-01, 1.685e-01, -1.590e-01, -1.416e-01, 2.550e-01, 8.751e-02, -1.338e-01, -7.205e-02, 7.076e-02, 1.652e-01)); + r += mul(s3_5, M4(1.865e-02, -1.665e-02, 2.272e-02, 3.661e-02, -1.933e-01, 2.391e-01, -4.422e-01, -2.826e-02, -9.839e-02, -3.878e-03, -1.376e-01, 8.164e-03, 2.325e-02, 5.991e-02, -1.257e-01, -1.250e-01)); + r += mul(s3_6, M4(2.248e-02, -1.574e-01, 1.337e-01, 2.721e-02, -7.530e-02, 6.672e-02, -1.552e-01, -3.174e-02, -6.926e-03, -8.575e-02, 6.217e-02, -7.909e-02, 3.127e-02, 2.158e-01, -1.689e-01, -8.679e-02)); + r += mul(s3_7, M4(-1.810e-02, 1.414e-01, -1.642e-01, 1.957e-01, 9.396e-02, 9.682e-02, 6.155e-02, -5.372e-02, 3.441e-03, 1.170e-01, -4.898e-04, 1.152e-01, 1.871e-01, -2.607e-01, -9.338e-02, 1.715e-01)); + r += mul(s3_8, M4(-3.406e-02, -5.075e-02, 7.496e-02, -1.226e-01, -1.136e-02, 6.224e-02, -2.319e-01, 2.711e-02, 1.313e-01, 8.848e-02, -3.355e-01, -2.899e-01, 3.552e-02, 4.240e-02, -1.280e-01, -1.896e-01)); + r += mul(s4_0, M4(2.978e-03, 4.338e-02, 2.667e-01, -3.428e-02, 4.550e-02, -5.447e-02, -3.128e-01, -5.822e-02, 7.073e-02, 9.923e-02, -1.416e-01, -8.767e-02, -6.636e-02, 1.224e-01, -4.772e-02, 2.206e-02)); + r += mul(s4_1, M4(5.822e-02, 4.371e-02, 5.661e-02, 9.083e-04, -1.675e-01, 4.282e-02, 9.959e-02, 2.148e-02, 6.614e-02, -6.577e-02, 9.507e-02, 1.344e-01, -2.506e-02, -6.194e-02, 5.761e-02, -1.130e-01)); + r += mul(s4_2, M4(-6.944e-02, -5.566e-02, 1.012e-01, -4.955e-02, -4.260e-02, 4.159e-02, -1.214e-01, -1.117e-01, -6.724e-03, -8.405e-02, -1.590e-01, 6.350e-02, 5.316e-02, -2.193e-03, -6.465e-02, 6.182e-02)); + r += mul(s4_3, M4(-7.619e-02, -6.501e-02, 2.396e-01, 2.766e-02, -9.113e-02, -2.040e-01, 1.268e-01, -5.021e-02, -5.359e-02, -3.492e-03, 9.936e-02, 1.327e-01, -1.176e-02, 1.324e-01, -6.895e-02, -1.033e-01)); + r += mul(s4_4, M4(-9.050e-02, -1.741e-01, 1.353e-01, -2.499e-02, 1.499e-01, 6.837e-02, 1.544e-01, -2.328e-01, -1.001e-01, -3.200e-01, 2.555e-02, 3.163e-02, 5.167e-03, 6.599e-02, 5.332e-02, -2.383e-02)); + r += mul(s4_5, M4(-1.380e-01, -6.767e-02, 7.205e-02, 7.794e-02, 3.260e-02, 8.421e-03, 9.155e-02, 2.219e-03, -2.114e-01, -5.606e-02, 9.497e-02, 1.809e-01, 3.274e-02, -3.988e-03, -9.138e-02, -8.120e-02)); + r += mul(s4_6, M4(-2.992e-02, -2.485e-03, 2.118e-02, 1.017e-02, 9.404e-02, -4.484e-02, -2.044e-01, 3.509e-02, -8.506e-02, -7.819e-02, -2.538e-02, -2.135e-02, 1.035e-01, 8.211e-02, -6.053e-02, 2.719e-02)); + r += mul(s4_7, M4(-1.954e-02, -1.303e-01, 2.458e-02, 1.259e-01, 6.521e-02, 7.827e-02, -2.962e-02, -1.487e-01, 1.335e-01, -1.090e-01, 5.524e-02, 1.814e-01, -1.546e-01, 1.165e-01, 4.443e-04, -1.573e-01)); + r += mul(s4_8, M4(-3.494e-03, 4.970e-02, 4.422e-02, 8.782e-02, -5.605e-02, 2.573e-01, -2.386e-01, -1.946e-01, 2.648e-02, -1.001e-01, -5.948e-02, 1.535e-01, 3.174e-02, 8.339e-02, -5.591e-02, 1.059e-02)); + r += mul(s5_0, M4(3.668e-02, 5.965e-02, -1.721e-02, -3.068e-02, 1.906e-02, -1.586e-01, -1.670e-01, 1.607e-01, -1.702e-01, -1.249e-01, -2.854e-01, 2.574e-02, -1.970e-01, -1.641e-02, -8.082e-02, -5.110e-02)); + r += mul(s5_1, M4(-1.574e-01, 1.272e-01, 1.492e-02, -7.330e-03, -2.324e-01, -6.950e-02, 1.762e-02, 2.972e-02, -3.110e-01, -6.619e-03, -3.372e-02, -9.828e-02, -1.360e-02, -9.453e-02, 9.109e-02, -6.697e-02)); + r += mul(s5_2, M4(3.255e-02, 1.122e-01, 3.934e-02, -1.884e-01, -2.090e-01, -6.767e-02, 6.147e-02, 1.271e-01, -9.991e-02, -8.133e-02, -9.660e-02, -4.156e-02, 2.997e-02, -9.234e-02, -6.113e-02, -1.478e-01)); + r += mul(s5_3, M4(-5.160e-02, -1.590e-02, -1.875e-01, -4.392e-02, -5.295e-02, -1.335e-01, -4.673e-02, 5.958e-02, -6.313e-02, -5.531e-02, 1.475e-01, 1.547e-02, 1.027e-01, -6.958e-02, 2.418e-02, 2.403e-02)); + r += mul(s5_4, M4(9.424e-02, -8.101e-02, -2.912e-01, -1.108e-01, 1.939e-01, 3.821e-02, -5.703e-02, 1.513e-02, 9.871e-02, 4.350e-02, 2.875e-02, -1.627e-01, -2.624e-02, -1.394e-01, 1.132e-01, -1.424e-02)); + r += mul(s5_5, M4(-6.870e-02, -6.601e-02, -1.372e-01, -1.179e-01, 1.986e-01, 5.603e-02, 1.706e-01, -9.338e-03, 1.029e-01, -5.792e-02, 6.861e-02, -7.211e-02, -4.139e-02, -2.509e-01, -1.461e-01, -1.405e-01)); + r += mul(s5_6, M4(7.451e-02, 2.164e-01, -2.009e-01, 2.788e-02, 1.175e-01, -9.648e-02, -1.430e-02, -7.214e-02, 9.366e-02, -6.314e-03, -1.510e-01, -6.284e-03, -1.500e-01, 2.851e-02, 5.196e-02, 1.170e-01)); + r += mul(s5_7, M4(-3.499e-04, 8.393e-02, -1.515e-01, 5.736e-02, 3.603e-02, 1.352e-02, 9.950e-02, -2.798e-02, 2.069e-01, 7.262e-02, 4.129e-02, -1.399e-01, -1.792e-01, -3.952e-01, 1.173e-01, -4.999e-02)); + r += mul(s5_8, M4(-4.190e-02, 1.134e-01, -2.901e-01, -8.789e-02, -1.495e-01, 2.644e-01, -9.540e-02, -3.225e-01, -7.771e-02, 1.823e-01, -2.192e-01, -1.170e-01, -4.472e-03, -1.040e-01, 1.290e-01, 1.096e-02)); + r += mul(s6_0, M4(-1.807e-01, -3.467e-02, -1.978e-01, 4.916e-03, 6.145e-02, 1.576e-02, 1.923e-02, -1.277e-02, 2.114e-02, -3.700e-02, -3.673e-02, -9.642e-02, 1.059e-01, 6.235e-02, 6.436e-02, 2.042e-02)); + r += mul(s6_1, M4(-9.068e-02, 5.704e-02, -8.745e-02, -6.303e-02, 2.124e-01, -1.943e-01, -1.370e-01, -1.624e-01, -1.096e-01, 4.166e-02, 6.197e-02, -1.716e-02, 1.131e-01, 4.626e-02, 8.055e-02, 6.132e-02)); + r += mul(s6_2, M4(7.138e-02, -5.989e-02, 1.507e-01, 5.818e-02, 1.495e-01, -2.149e-02, -7.632e-02, -1.873e-01, 7.602e-02, 1.672e-02, 1.305e-01, 5.485e-02, -1.106e-01, 9.418e-02, 2.097e-01, -1.107e-01)); + r += mul(s6_3, M4(-7.939e-02, -9.415e-02, -3.253e-01, 5.550e-02, -5.333e-02, -6.900e-02, 2.532e-01, -1.236e-02, 6.154e-02, 2.696e-03, -1.090e-01, 9.496e-02, -5.117e-02, 2.633e-01, 9.791e-02, -5.712e-02)); + r += mul(s6_4, M4(-1.092e-02, 5.562e-02, 2.001e-02, -8.640e-02, 2.022e-01, 1.713e-01, 7.558e-02, -2.621e-02, -2.034e-01, -7.972e-02, -9.542e-02, -7.372e-02, -2.119e-02, -9.791e-02, -4.815e-02, 1.717e-01)); + r += mul(s6_5, M4(-6.813e-02, -1.132e-01, 9.465e-02, -2.221e-02, 8.347e-02, -2.863e-02, 1.618e-02, 1.241e-01, -1.080e-01, -4.588e-02, -1.048e-02, 8.343e-02, 2.000e-02, -7.991e-03, 3.295e-01, -7.222e-02)); + r += mul(s6_6, M4(-1.257e-01, 2.813e-02, -5.140e-02, -1.422e-01, 1.255e-01, 1.617e-01, -9.818e-02, 9.477e-03, -1.898e-03, 8.431e-02, 1.631e-01, -2.976e-02, 2.819e-01, 6.733e-02, 4.286e-02, 8.241e-02)); + r += mul(s6_7, M4(-1.021e-01, -1.888e-01, -8.428e-02, 1.146e-01, 1.510e-01, -5.692e-02, -1.291e-01, 5.237e-02, 2.558e-02, -3.653e-02, 1.264e-01, 3.667e-02, -9.369e-02, 1.182e-01, -2.564e-01, -4.997e-02)); + r += mul(s6_8, M4(4.274e-02, 9.755e-02, -9.820e-02, -5.409e-03, 9.648e-03, -3.442e-02, -1.963e-01, -9.613e-02, 8.596e-02, -2.486e-02, -7.731e-02, 1.119e-01, 3.031e-03, 1.189e-02, 7.198e-02, -2.010e-02)); + r += mul(s7_0, M4(-8.593e-02, 4.410e-02, 5.916e-02, 5.405e-02, 1.801e-02, 3.804e-02, 8.731e-02, 4.344e-02, 2.200e-01, -2.394e-02, 1.496e-01, 6.278e-02, -7.431e-02, 1.860e-01, 7.845e-02, -9.594e-02)); + r += mul(s7_1, M4(-1.069e-01, -2.582e-02, 5.910e-02, -6.761e-02, -1.506e-01, -8.254e-02, 8.958e-02, -4.935e-02, -1.770e-01, -2.038e-01, 7.928e-02, 3.037e-01, -1.013e-01, 2.359e-02, -1.057e-01, -5.003e-02)); + r += mul(s7_2, M4(6.604e-02, -8.159e-02, 9.681e-02, -9.276e-03, 4.673e-02, -1.199e-02, -2.188e-01, 4.027e-02, 1.614e-01, 4.145e-02, 2.027e-01, 4.471e-02, -5.706e-02, 8.516e-03, -2.315e-02, -5.171e-02)); + r += mul(s7_3, M4(5.693e-02, -2.617e-03, 9.846e-02, 1.033e-01, -1.299e-01, -6.217e-03, 1.042e-01, -1.079e-01, -2.066e-01, -2.701e-01, 3.056e-01, 9.355e-02, 7.681e-02, 1.426e-01, -1.712e-01, 9.973e-02)); + r += mul(s7_4, M4(1.308e-01, 5.535e-02, 1.887e-01, -7.625e-02, 4.354e-02, 4.574e-02, -7.604e-03, -1.578e-02, -1.550e-01, 1.512e-01, -3.109e-01, -3.903e-01, -1.085e-01, -1.300e-01, 8.819e-02, 3.659e-02)); + r += mul(s7_5, M4(-5.433e-02, 1.856e-04, 1.124e-01, -1.339e-01, -1.416e-01, -1.226e-02, -7.421e-02, -3.983e-02, -1.132e-01, 1.275e-01, -4.816e-02, -1.477e-01, 1.074e-01, 7.671e-02, 1.985e-01, 1.291e-01)); + r += mul(s7_6, M4(-3.036e-02, -1.725e-02, -2.685e-02, -2.915e-03, 3.075e-02, 1.702e-01, -9.196e-02, -1.142e-01, 6.406e-02, 7.094e-03, -1.987e-02, 9.400e-02, 6.958e-02, 4.125e-02, -2.776e-02, 2.524e-02)); + r += mul(s7_7, M4(8.227e-02, -1.415e-01, 3.321e-02, 1.146e-03, -5.870e-02, -4.962e-02, 4.423e-02, -8.562e-03, -1.255e-01, -2.399e-02, -9.390e-02, 2.133e-01, 9.043e-02, -1.159e-02, -1.988e-01, 5.948e-03)); + r += mul(s7_8, M4(3.438e-02, 1.674e-01, -9.081e-02, 1.104e-01, -8.589e-02, -1.508e-01, -6.714e-03, -1.057e-02, -3.694e-02, 9.586e-02, -8.628e-03, 1.749e-01, 1.191e-02, -7.848e-02, -1.482e-01, -2.125e-01)); + r += V4(2.042e-02, -1.567e-02, 1.777e-02, 8.206e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.871e-01, -2.821e-02, -2.131e-01, -2.105e-02, 1.113e-02, -8.090e-02, -6.108e-02, -1.710e-02, -2.215e-02, -1.474e-03, 1.704e-02, 3.001e-02, -5.379e-02, 4.545e-02, -1.134e-01, -2.035e-01)); + r += mul(s0_1, M4(-9.495e-04, -1.423e-01, -2.280e-02, -2.646e-02, -4.118e-02, 1.052e-01, -6.480e-02, -3.867e-02, -1.676e-02, -2.095e-02, 7.072e-02, 1.037e-01, 1.288e-01, -1.114e-02, 1.837e-02, -2.369e-02)); + r += mul(s0_2, M4(-1.243e-02, -1.434e-01, 8.768e-03, 2.108e-01, 1.194e-01, 5.633e-02, -1.265e-02, -9.167e-02, -2.684e-02, -1.275e-01, -3.768e-02, -5.514e-02, -4.753e-02, 3.975e-02, 1.264e-01, 3.772e-02)); + r += mul(s0_3, M4(-5.436e-02, 1.387e-01, -4.905e-02, 1.009e-02, 2.186e-04, -9.563e-02, -7.090e-02, -1.014e-01, 2.932e-03, 1.358e-01, -2.002e-02, 3.347e-02, 7.167e-02, 1.143e-01, 1.485e-01, -9.345e-02)); + r += mul(s0_4, M4(-1.239e-01, -6.966e-02, 1.447e-01, -1.225e-01, -6.644e-03, -6.150e-02, -1.980e-01, 2.807e-02, -3.973e-03, 1.241e-02, 1.473e-01, -2.119e-01, -5.985e-02, -1.174e-02, -1.191e-01, -1.458e-01)); + r += mul(s0_5, M4(-1.331e-01, -8.315e-02, -1.300e-01, -1.763e-01, -6.882e-02, -4.095e-03, -3.071e-02, -1.962e-01, -1.584e-03, -1.034e-01, -4.404e-02, -9.630e-02, -1.096e-02, -3.780e-02, -3.443e-02, 5.438e-02)); + r += mul(s0_6, M4(-7.963e-02, -3.774e-02, 2.984e-02, 7.809e-02, -5.955e-02, -3.516e-02, -2.589e-02, -6.405e-02, 1.241e-02, -4.356e-02, 2.731e-02, -4.592e-02, -1.681e-02, -2.013e-02, 1.026e-04, 6.043e-02)); + r += mul(s0_7, M4(6.424e-02, -1.309e-01, -1.238e-01, -4.344e-02, 1.308e-03, -1.141e-01, -5.711e-02, -1.666e-01, -3.539e-03, -1.410e-02, -6.655e-02, -2.380e-02, 1.941e-02, -7.815e-02, 1.136e-01, -3.781e-02)); + r += mul(s0_8, M4(-2.276e-02, -2.375e-01, -2.214e-02, 5.372e-03, 6.425e-03, -7.252e-02, 7.044e-02, -8.419e-03, -7.296e-02, -4.230e-02, 3.871e-03, 3.827e-02, -3.834e-02, 7.722e-02, 2.097e-03, -8.225e-02)); + r += mul(s1_0, M4(4.056e-02, -3.233e-02, -6.508e-02, -6.274e-02, -1.572e-01, -1.340e-01, 6.801e-02, 8.273e-02, 8.778e-02, 8.204e-02, 1.450e-01, 1.294e-01, 6.334e-02, -2.436e-01, -2.443e-01, 1.411e-01)); + r += mul(s1_1, M4(1.442e-01, 1.466e-03, -9.557e-02, -5.947e-02, 1.829e-01, 2.630e-01, -6.553e-02, 1.282e-02, 2.104e-01, 7.472e-03, 2.137e-01, -1.518e-02, 1.268e-01, -3.356e-01, -5.628e-02, -1.184e-01)); + r += mul(s1_2, M4(1.325e-01, -6.010e-02, -8.943e-03, 1.608e-02, -4.346e-02, 2.257e-02, 2.482e-02, -1.306e-01, 1.403e-01, 1.675e-01, -3.712e-02, -3.032e-02, -1.534e-01, -2.324e-01, 9.234e-02, 6.583e-02)); + r += mul(s1_3, M4(7.828e-02, -4.908e-02, -3.408e-02, 4.230e-02, 2.449e-02, 3.121e-01, -8.996e-02, -8.823e-02, -1.253e-01, 1.874e-01, 1.196e-01, 1.294e-01, -1.330e-01, 2.173e-01, 6.471e-02, 2.821e-02)); + r += mul(s1_4, M4(2.224e-02, -2.149e-02, 2.666e-01, -9.329e-02, 2.452e-01, -5.926e-01, -1.972e-01, 1.340e-02, 7.994e-03, -1.694e-01, 5.613e-01, -3.168e-01, -1.292e-01, 1.472e-02, -1.207e-01, -1.759e-01)); + r += mul(s1_5, M4(-2.011e-02, 4.120e-02, 8.215e-02, -2.399e-01, -2.318e-01, -8.668e-02, 1.445e-01, 1.350e-01, -1.501e-01, -1.454e-02, -8.517e-02, -1.553e-01, 3.227e-02, 1.248e-02, 5.942e-02, 1.594e-01)); + r += mul(s1_6, M4(7.560e-03, 4.543e-02, -6.540e-02, 4.231e-02, 2.725e-02, 1.371e-02, 2.006e-01, 1.437e-01, -4.834e-02, -1.133e-02, 8.049e-02, -2.226e-02, -8.326e-02, -8.656e-02, -7.931e-02, 4.502e-02)); + r += mul(s1_7, M4(-6.518e-02, 8.934e-02, -1.219e-01, -4.372e-02, 4.623e-02, -3.027e-01, -1.326e-01, -1.512e-01, -3.234e-02, -6.440e-02, 5.738e-02, -4.798e-03, 6.094e-02, 2.635e-02, 1.314e-01, -7.782e-02)); + r += mul(s1_8, M4(6.408e-02, 4.318e-02, -1.060e-02, -6.661e-02, -2.892e-01, -5.121e-01, 2.127e-01, -2.559e-01, -1.175e-01, -1.270e-01, 8.104e-03, -6.716e-02, -5.801e-02, 1.361e-01, -2.497e-02, -3.579e-02)); + r += mul(s2_0, M4(-2.161e-01, 1.363e-01, 8.227e-02, -6.284e-02, -5.903e-02, 5.772e-03, 2.087e-02, -1.065e-01, -1.529e-02, 1.757e-01, 6.662e-02, -2.364e-02, -6.565e-02, 7.983e-02, -7.751e-02, -7.226e-02)); + r += mul(s2_1, M4(-2.017e-02, 4.055e-02, 9.606e-02, -2.980e-02, -3.927e-02, 1.147e-01, 2.286e-02, 2.563e-01, 3.504e-02, 1.352e-01, 5.012e-02, -3.308e-04, -1.470e-01, -3.391e-02, 1.755e-01, 9.478e-02)); + r += mul(s2_2, M4(-1.474e-01, -1.028e-01, 7.540e-02, -1.023e-01, -7.313e-02, 1.066e-01, 5.016e-02, -1.897e-01, -5.914e-02, 7.808e-02, -4.346e-02, -2.776e-02, 2.904e-02, 8.417e-02, -4.232e-02, 1.674e-01)); + r += mul(s2_3, M4(2.335e-01, -2.842e-02, -5.739e-02, -1.221e-01, 2.011e-02, 3.406e-02, 1.656e-01, -1.083e-01, 3.551e-02, -9.249e-02, 5.859e-02, 3.906e-02, 7.709e-02, 1.910e-01, 2.195e-02, 7.340e-02)); + r += mul(s2_4, M4(7.809e-02, -1.515e-02, 1.817e-01, -7.771e-02, 9.634e-02, 1.301e-01, -3.744e-02, 1.008e-01, -4.234e-02, 2.875e-03, -9.835e-04, 3.331e-02, 2.310e-01, -2.396e-01, -2.066e-01, -7.600e-02)); + r += mul(s2_5, M4(2.134e-02, -4.246e-02, 7.435e-03, -9.941e-03, 2.696e-02, -1.199e-01, -6.648e-03, -6.885e-02, 6.191e-03, -4.447e-02, 1.650e-02, 9.745e-02, 1.988e-01, -1.938e-01, 2.823e-01, 1.705e-01)); + r += mul(s2_6, M4(1.660e-01, -2.599e-01, 4.331e-02, -1.877e-01, 6.713e-02, 1.880e-02, 7.058e-03, -4.739e-02, -1.476e-02, 2.851e-02, -2.477e-02, -2.204e-03, 1.588e-02, 7.779e-03, -1.553e-01, 3.717e-02)); + r += mul(s2_7, M4(1.023e-01, -1.597e-02, -6.167e-02, 6.849e-02, 4.146e-02, 7.833e-03, 4.950e-02, 1.834e-03, 2.423e-03, 1.970e-02, 2.839e-02, -5.577e-02, -1.964e-01, -1.136e-01, 7.158e-03, -4.833e-02)); + r += mul(s2_8, M4(-6.461e-02, 1.330e-01, -9.992e-03, 3.494e-02, 1.050e-02, 3.294e-02, -7.505e-02, 1.301e-03, 6.948e-02, -3.319e-03, 4.139e-02, -1.808e-02, 1.410e-01, -7.249e-02, -9.410e-02, -2.916e-03)); + r += mul(s3_0, M4(9.794e-02, -8.956e-02, 9.463e-03, -3.120e-02, -1.004e-01, -1.610e-01, -4.092e-02, -2.509e-01, -9.779e-03, -2.849e-01, 9.762e-02, 3.390e-02, -7.858e-02, -2.126e-02, -8.321e-02, -4.915e-02)); + r += mul(s3_1, M4(1.035e-03, -5.190e-02, 9.745e-02, 2.534e-01, -1.810e-01, 2.817e-01, -1.096e-01, 2.089e-01, -9.686e-02, -2.929e-01, 2.868e-02, 1.402e-01, -8.799e-02, -2.324e-02, 7.078e-02, -2.115e-03)); + r += mul(s3_2, M4(9.160e-02, 2.894e-02, -8.441e-03, -1.818e-01, -1.529e-01, 9.104e-02, 6.442e-02, -2.131e-01, -3.002e-01, -1.751e-01, -1.274e-01, 7.683e-02, 2.336e-02, 6.137e-02, -2.225e-02, 9.249e-02)); + r += mul(s3_3, M4(-9.086e-02, -1.176e-01, -2.192e-01, 2.074e-02, 9.874e-02, 7.857e-02, 5.903e-02, -1.617e-01, -1.056e-01, -2.939e-01, 1.331e-02, -2.528e-02, -9.598e-02, -1.886e-02, 1.347e-02, -2.747e-02)); + r += mul(s3_4, M4(3.116e-02, -1.008e-01, 1.747e-01, 2.508e-02, 1.102e-01, 8.567e-02, -1.714e-01, -3.481e-02, -2.085e-01, -3.166e-02, -9.038e-02, 1.993e-01, 1.475e-01, -1.922e-01, -9.841e-02, -2.740e-02)); + r += mul(s3_5, M4(-2.964e-02, 3.304e-02, -5.346e-02, -3.595e-02, -1.856e-02, -2.969e-01, -1.847e-01, -4.361e-02, -1.784e-01, 9.786e-02, 1.694e-01, 1.664e-01, 7.496e-02, 2.784e-02, 1.145e-01, -2.746e-02)); + r += mul(s3_6, M4(-4.400e-02, 2.339e-02, 6.149e-02, 1.891e-03, 1.015e-01, -3.804e-03, 1.096e-01, -7.637e-02, -7.288e-02, -9.137e-02, -7.551e-02, 8.777e-03, -6.516e-02, 1.604e-02, 4.501e-02, -5.608e-02)); + r += mul(s3_7, M4(3.896e-03, -7.334e-02, -3.673e-02, 2.116e-03, 1.060e-01, 2.827e-03, 5.156e-02, -1.699e-01, -3.804e-02, -1.013e-02, -2.048e-01, 1.115e-01, -5.598e-02, 1.942e-01, -3.608e-02, -9.662e-03)); + r += mul(s3_8, M4(-1.038e-01, 4.401e-02, 1.331e-02, -1.917e-02, 9.200e-02, 1.028e-01, 3.215e-02, -4.704e-02, -1.374e-01, -1.129e-01, -8.069e-02, -6.331e-02, -2.008e-02, 1.892e-02, 7.121e-03, 1.436e-02)); + r += mul(s4_0, M4(-7.756e-02, 2.062e-02, 4.399e-02, 1.107e-02, 6.661e-02, -5.046e-03, 1.393e-01, -6.560e-02, -1.426e-01, -1.762e-01, 8.760e-03, -1.763e-02, -5.790e-02, -5.544e-02, -7.877e-02, -5.698e-02)); + r += mul(s4_1, M4(-6.876e-02, -5.972e-02, 3.602e-02, 1.548e-01, -1.600e-01, 1.551e-01, -9.600e-02, -2.685e-01, -2.378e-02, -4.005e-02, -3.964e-02, 2.882e-01, -5.547e-02, 1.104e-01, 8.137e-02, 1.379e-01)); + r += mul(s4_2, M4(-9.239e-02, 7.299e-02, 4.045e-02, 5.511e-03, -1.197e-02, 2.754e-02, 2.256e-01, 1.320e-02, 2.351e-02, -1.223e-01, 5.086e-02, 7.153e-02, 5.949e-02, 2.832e-03, -5.051e-03, 1.528e-02)); + r += mul(s4_3, M4(-1.235e-01, 6.233e-02, -8.211e-02, -1.093e-01, -1.089e-01, 7.827e-03, 1.260e-01, 3.940e-02, -1.223e-01, 2.037e-02, -3.039e-03, -1.007e-02, -1.453e-01, 1.705e-01, 5.045e-02, -6.679e-02)); + r += mul(s4_4, M4(-1.850e-01, 1.124e-01, -2.744e-02, 4.633e-01, -1.888e-02, -1.497e-01, 2.819e-01, -9.535e-02, 4.777e-02, 9.229e-03, -1.721e-01, 1.564e-01, -2.240e-01, 8.776e-02, -1.616e-01, 2.031e-01)); + r += mul(s4_5, M4(2.790e-02, 6.465e-02, 1.183e-01, 6.803e-02, 2.085e-01, 8.519e-02, 5.526e-02, 1.918e-02, 9.109e-02, 1.322e-01, 1.707e-01, 9.906e-02, 6.375e-02, 1.046e-01, 4.839e-02, -7.507e-02)); + r += mul(s4_6, M4(-5.825e-02, 1.863e-02, -2.560e-02, -1.343e-01, -6.049e-02, -8.857e-02, 6.271e-02, -8.805e-02, 4.986e-02, 1.458e-01, 1.089e-01, -1.048e-02, -3.973e-02, -8.065e-02, 3.775e-02, -3.309e-02)); + r += mul(s4_7, M4(-9.327e-02, 4.840e-02, -1.265e-01, 1.491e-01, 7.817e-02, 4.162e-02, 1.440e-01, 1.603e-01, -5.780e-02, -1.189e-01, -7.263e-03, -1.101e-01, -1.226e-02, -3.063e-02, -3.353e-02, 2.052e-01)); + r += mul(s4_8, M4(1.751e-02, -1.135e-01, 1.571e-02, -2.205e-02, -3.998e-02, -1.429e-01, -2.788e-02, -7.556e-02, -7.037e-02, 1.664e-01, 9.417e-02, 4.116e-02, 1.210e-02, -6.077e-02, -3.109e-03, -9.682e-02)); + r += mul(s5_0, M4(4.328e-02, -2.640e-03, 1.726e-02, -6.619e-02, -7.153e-02, -5.347e-03, 1.232e-01, 1.862e-03, 1.355e-04, -5.360e-02, -9.326e-03, -1.251e-02, 2.813e-02, 5.841e-02, -3.303e-02, -1.476e-01)); + r += mul(s5_1, M4(-8.970e-03, -9.200e-02, -1.182e-01, 8.614e-02, 1.028e-02, -3.642e-02, -4.635e-02, -1.499e-01, -1.331e-01, 1.114e-01, -1.823e-01, -1.963e-01, -1.437e-01, -6.405e-02, 1.130e-01, -1.615e-02)); + r += mul(s5_2, M4(3.005e-03, 1.274e-01, 2.344e-02, 1.668e-02, -5.464e-02, 5.698e-02, 4.789e-02, -9.728e-02, 1.634e-01, -2.751e-01, 7.409e-03, -6.518e-02, 4.952e-02, 1.257e-02, 4.623e-03, -9.371e-02)); + r += mul(s5_3, M4(-9.059e-02, -1.951e-02, -3.413e-02, -8.996e-02, 5.690e-02, 4.996e-02, -9.201e-02, 1.690e-01, 1.255e-01, 6.144e-02, 6.561e-02, 4.638e-02, -4.952e-02, 1.578e-01, 2.006e-02, -1.715e-01)); + r += mul(s5_4, M4(7.592e-02, 7.317e-02, -1.401e-01, 9.003e-04, 1.105e-01, 6.094e-02, 6.376e-02, -5.125e-02, 1.325e-01, 3.113e-01, -9.730e-02, -1.553e-01, -2.130e-01, 2.451e-01, 4.230e-02, 1.606e-01)); + r += mul(s5_5, M4(5.599e-02, -2.547e-02, 3.313e-02, -7.994e-02, -5.691e-02, 9.699e-02, -7.989e-02, -1.939e-01, -2.585e-02, 1.913e-01, -4.057e-02, -7.749e-02, 1.399e-01, 2.816e-01, 9.736e-02, -1.271e-02)); + r += mul(s5_6, M4(-3.046e-02, -5.232e-02, 6.563e-02, 9.402e-03, -9.218e-02, 8.404e-03, -3.375e-02, -9.036e-02, 4.413e-02, 3.919e-02, -2.495e-02, -1.239e-03, -3.065e-02, -2.916e-02, 4.007e-03, -2.579e-01)); + r += mul(s5_7, M4(-7.098e-03, -1.860e-03, -1.707e-01, 5.445e-03, 1.281e-01, 1.628e-01, 1.225e-01, 7.790e-02, -1.567e-03, 7.209e-02, 1.182e-01, -9.290e-02, -1.202e-01, 2.262e-01, -2.065e-02, 1.571e-01)); + r += mul(s5_8, M4(9.056e-02, -1.256e-02, 5.825e-02, -7.197e-02, -5.779e-02, -1.845e-01, -1.186e-01, -1.548e-01, -1.480e-01, 1.068e-01, -1.482e-01, -5.204e-02, -2.760e-02, -8.757e-02, 1.002e-01, -1.388e-02)); + r += mul(s6_0, M4(7.393e-03, 1.422e-01, -4.407e-02, 2.375e-02, 7.300e-02, 7.536e-02, 4.936e-02, 8.964e-02, 3.491e-03, 1.315e-02, 4.104e-02, 7.586e-02, 5.862e-02, 9.637e-02, -8.634e-02, -1.815e-02)); + r += mul(s6_1, M4(5.447e-02, -4.146e-02, -1.955e-01, 3.767e-02, -8.846e-02, -2.408e-02, 7.166e-02, 9.727e-03, 8.169e-02, -2.903e-03, -8.014e-02, -2.011e-02, 7.493e-02, -1.732e-02, 1.244e-01, 5.136e-02)); + r += mul(s6_2, M4(8.524e-02, -2.271e-02, -4.364e-02, 3.690e-02, -9.154e-02, 6.250e-02, 4.295e-02, -2.384e-01, 8.008e-02, -7.720e-06, 1.023e-01, -9.540e-02, -2.173e-01, 5.006e-02, -5.404e-02, 1.215e-01)); + r += mul(s6_3, M4(5.341e-02, -1.380e-02, 4.311e-02, -6.630e-02, -3.448e-02, -6.397e-02, 2.076e-02, 1.184e-02, 2.474e-02, -1.257e-01, -7.047e-02, 6.348e-02, 1.586e-01, 1.115e-01, 3.230e-02, -1.733e-01)); + r += mul(s6_4, M4(4.991e-02, -1.141e-01, -1.378e-01, -6.951e-03, 1.873e-01, 4.493e-02, -4.137e-02, -4.430e-02, 1.579e-01, -3.852e-02, 1.640e-01, 9.429e-02, 1.355e-01, 1.617e-01, -5.072e-01, -6.408e-02)); + r += mul(s6_5, M4(6.064e-03, -6.852e-02, -1.470e-01, 8.329e-03, 7.775e-03, -9.763e-02, -5.572e-02, 1.070e-01, 9.256e-02, 1.398e-01, -9.383e-02, -1.502e-02, -6.022e-02, 1.013e-01, 1.429e-02, -1.039e-02)); + r += mul(s6_6, M4(-8.822e-02, 4.282e-03, -7.599e-02, 4.053e-02, -2.953e-02, 3.238e-02, 1.071e-01, 5.918e-03, -1.500e-02, 8.788e-03, -2.782e-02, 3.525e-02, 7.210e-02, -1.199e-01, 2.845e-02, -2.940e-01)); + r += mul(s6_7, M4(-8.471e-02, 1.662e-01, -6.165e-02, 3.814e-02, 7.629e-02, 2.466e-02, 7.141e-02, -1.585e-03, -1.659e-01, 4.623e-02, -9.034e-02, 3.881e-03, -2.486e-01, 1.006e-01, 3.395e-02, -1.782e-02)); + r += mul(s6_8, M4(-5.127e-03, -6.243e-03, -1.914e-01, 2.034e-02, 1.134e-01, -2.337e-01, 8.303e-02, 5.794e-02, 5.706e-02, 8.810e-02, -1.217e-02, -9.543e-02, -5.800e-02, 1.709e-02, -5.711e-02, -1.136e-02)); + r += mul(s7_0, M4(1.130e-01, 8.798e-02, 2.158e-02, -8.030e-02, 1.381e-01, 1.055e-02, -6.276e-03, -3.859e-02, 3.457e-02, -1.523e-01, 1.370e-01, 4.508e-02, -7.988e-02, 5.912e-02, -5.479e-02, -6.642e-02)); + r += mul(s7_1, M4(1.081e-01, -1.007e-03, 1.322e-02, -8.172e-02, -1.023e-02, 9.431e-02, 8.142e-02, 2.190e-02, 3.612e-01, -2.514e-01, -1.085e-01, 1.408e-01, 7.889e-02, -1.442e-02, 1.788e-01, -7.563e-03)); + r += mul(s7_2, M4(7.983e-02, 4.121e-02, 1.014e-01, 5.905e-02, 3.933e-02, -2.331e-02, 3.376e-02, -1.003e-01, -1.513e-01, -1.039e-01, -9.056e-02, -3.042e-01, 3.447e-02, -1.100e-01, -9.911e-02, -7.386e-02)); + r += mul(s7_3, M4(7.042e-02, 7.651e-03, 6.423e-02, -4.223e-02, -9.795e-02, 1.053e-01, -6.054e-02, -9.443e-02, -2.108e-01, 7.588e-02, -1.837e-01, 4.932e-02, 6.889e-02, -4.395e-02, 2.615e-02, 9.409e-03)); + r += mul(s7_4, M4(5.884e-02, -7.014e-03, 1.347e-02, -6.053e-02, 1.090e-01, 1.379e-01, -2.118e-01, -1.471e-01, -1.746e-02, 2.390e-01, 2.742e-01, 1.095e-02, 1.466e-01, 8.033e-02, -1.313e-01, -1.958e-01)); + r += mul(s7_5, M4(7.272e-03, 1.305e-02, -1.297e-01, -9.173e-02, -5.465e-02, 8.300e-02, 3.174e-02, 1.459e-01, 5.363e-02, 2.652e-01, 3.240e-02, -1.516e-01, -1.920e-02, 8.674e-02, -2.512e-02, 8.875e-02)); + r += mul(s7_6, M4(-9.080e-02, -9.108e-02, -2.072e-02, -7.774e-02, -1.248e-01, 6.572e-02, 1.437e-01, -1.314e-01, 2.497e-02, 2.202e-01, 1.100e-01, 2.945e-02, -2.824e-02, -5.907e-02, -4.124e-02, 9.261e-02)); + r += mul(s7_7, M4(-6.926e-02, 2.453e-01, 1.731e-01, -4.591e-03, -1.366e-01, -1.971e-02, -1.156e-01, -1.253e-03, -1.535e-01, 2.887e-02, -1.003e-01, 8.991e-02, -3.469e-02, 1.163e-01, 1.835e-01, -3.614e-02)); + r += mul(s7_8, M4(1.541e-02, -6.370e-02, -7.276e-02, 9.494e-03, -4.712e-02, 1.327e-02, 4.455e-02, -9.613e-02, 1.129e-01, -2.214e-01, -7.553e-02, 5.877e-03, -7.342e-02, 4.476e-02, 5.164e-02, -2.974e-02)); + r += V4(1.081e-02, 1.104e-03, 2.498e-03, 2.111e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-9.523e-02, 7.526e-03, -2.074e-01, 4.818e-02, -5.945e-02, -3.677e-02, -1.007e-01, 2.116e-02, 1.844e-01, -2.631e-02, 4.416e-02, 1.048e-01, 4.360e-02, -7.908e-03, -4.849e-02, -6.085e-02)); + r += mul(s0_1, M4(1.516e-01, 5.656e-02, -8.160e-02, 1.651e-02, -1.158e-01, 3.780e-02, 1.881e-01, 2.673e-02, 1.120e-01, 3.675e-02, -8.565e-02, -3.982e-02, -7.177e-02, -9.332e-02, 8.668e-02, -1.560e-01)); + r += mul(s0_2, M4(1.006e-01, -7.895e-02, -1.341e-01, 5.189e-02, 4.895e-02, 4.167e-02, -1.296e-01, -6.457e-02, 2.716e-02, 7.833e-02, 8.289e-02, -1.103e-03, -9.103e-02, 1.081e-01, 2.987e-02, 6.692e-03)); + r += mul(s0_3, M4(3.345e-02, -6.734e-02, 2.309e-02, 5.627e-02, 8.582e-02, -2.582e-02, 4.288e-02, -4.792e-02, -5.124e-03, 1.531e-02, 2.494e-02, -3.836e-04, 1.338e-01, -4.618e-02, 8.606e-02, 8.540e-02)); + r += mul(s0_4, M4(-9.091e-02, -2.635e-01, 1.093e-01, 2.452e-01, -6.538e-02, 7.022e-02, 4.870e-02, -1.239e-01, 1.271e-02, 4.693e-03, 1.197e-01, 5.333e-02, -6.442e-02, 6.737e-02, 6.474e-02, -1.860e-01)); + r += mul(s0_5, M4(1.886e-01, 1.379e-01, 7.388e-02, 9.951e-02, 5.822e-02, 9.415e-03, 1.483e-01, 9.551e-02, -4.265e-02, 7.647e-02, -7.110e-02, 1.301e-01, -4.740e-02, 2.135e-02, -3.256e-02, 7.272e-02)); + r += mul(s0_6, M4(1.471e-02, -2.215e-02, 1.118e-02, -4.224e-03, -9.526e-03, 1.056e-01, 6.225e-02, 6.163e-03, -1.199e-02, 3.090e-02, 1.100e-01, 4.367e-02, -6.287e-02, 2.110e-03, 4.653e-02, 2.634e-03)); + r += mul(s0_7, M4(-9.156e-02, 6.332e-02, 8.221e-02, -2.532e-02, 9.286e-02, 1.789e-01, -7.587e-02, 3.588e-03, 1.320e-01, 3.023e-02, -7.919e-02, 9.182e-03, 4.106e-02, 1.427e-02, -1.481e-01, -3.329e-02)); + r += mul(s0_8, M4(-8.374e-02, 5.366e-02, -2.426e-01, 4.147e-03, 5.590e-02, -5.014e-02, -7.904e-03, -8.100e-02, -5.368e-02, 7.723e-03, -2.304e-02, 6.514e-02, 2.844e-03, 3.521e-02, -1.951e-02, 9.246e-03)); + r += mul(s1_0, M4(3.039e-02, -1.293e-03, 1.293e-01, -3.167e-02, -1.130e-01, -6.094e-02, -1.905e-01, 9.723e-02, 6.267e-02, 4.243e-06, 2.944e-01, -8.186e-02, 1.090e-01, 1.586e-02, 1.518e-01, -8.095e-02)); + r += mul(s1_1, M4(6.135e-02, 6.516e-02, -7.528e-02, -4.670e-02, -2.456e-01, -5.538e-02, 1.169e-01, -2.047e-01, 1.181e-02, 1.002e-02, 2.542e-01, -4.795e-01, 2.283e-01, -1.144e-01, -1.208e-01, 2.578e-02)); + r += mul(s1_2, M4(5.741e-02, 8.183e-02, -8.066e-02, 9.448e-02, 7.245e-02, 4.110e-02, 7.466e-02, 1.324e-01, 1.988e-01, -2.446e-01, 2.133e-01, -2.613e-02, -2.349e-02, -1.161e-01, 6.522e-02, 1.124e-01)); + r += mul(s1_3, M4(-9.236e-02, -9.255e-02, -1.040e-01, 4.370e-02, 7.208e-02, 1.923e-01, 1.063e-02, 7.156e-02, -6.318e-02, 2.311e-01, -1.186e-01, -5.819e-02, 1.928e-01, 1.622e-01, 1.470e-01, 1.048e-01)); + r += mul(s1_4, M4(-6.268e-02, -1.375e-01, 6.604e-03, 2.802e-02, -3.740e-02, -5.399e-02, -1.374e-01, 2.602e-01, -1.768e-02, 3.827e-01, 6.387e-01, 2.928e-02, 5.753e-02, 1.206e-01, 3.682e-03, 4.942e-02)); + r += mul(s1_5, M4(4.952e-02, 2.523e-01, 3.339e-02, 3.963e-02, -3.530e-01, -3.700e-01, -1.063e-01, -1.958e-01, 1.963e-01, -6.659e-02, 3.456e-01, -3.005e-02, -1.509e-01, -1.022e-01, 1.169e-01, -4.150e-02)); + r += mul(s1_6, M4(-1.231e-01, 1.060e-01, 1.328e-02, -8.564e-02, 3.125e-01, 2.742e-03, -7.321e-02, -5.935e-03, -5.441e-02, 2.168e-01, 2.433e-01, -1.589e-01, 1.178e-01, -1.255e-01, 8.176e-02, -8.035e-02)); + r += mul(s1_7, M4(-8.170e-02, 1.597e-01, 1.886e-01, -7.304e-02, 3.819e-01, 7.933e-02, 1.686e-01, -2.394e-02, -7.509e-02, -2.816e-02, 3.992e-02, -3.778e-02, 3.477e-02, -5.884e-02, -7.473e-02, -1.785e-03)); + r += mul(s1_8, M4(-7.482e-02, 1.184e-02, -3.933e-03, -4.874e-02, 6.113e-02, 3.012e-01, 3.709e-02, 1.862e-01, 5.347e-04, -1.135e-01, -8.545e-02, -7.592e-02, 1.704e-02, 1.547e-02, 1.187e-01, -1.872e-02)); + r += mul(s2_0, M4(-1.486e-01, 8.789e-02, 2.623e-02, 1.376e-01, -1.004e-03, 6.926e-02, 4.750e-02, -7.964e-03, 5.269e-02, -4.034e-02, 1.346e-01, 4.845e-03, -1.978e-02, -2.621e-02, -6.480e-02, -3.674e-02)); + r += mul(s2_1, M4(1.335e-01, 2.026e-01, -1.693e-01, -1.756e-01, -1.606e-01, 1.774e-01, -9.483e-03, -2.651e-02, -1.382e-01, -2.495e-01, 5.076e-03, -1.924e-02, -1.548e-02, 3.372e-02, 6.016e-02, -4.393e-02)); + r += mul(s2_2, M4(5.384e-02, -4.307e-02, 3.152e-02, 3.141e-02, 2.630e-01, 1.072e-02, 7.661e-02, -8.139e-03, -1.391e-01, 6.246e-02, -7.113e-02, -5.639e-02, -4.102e-02, 1.321e-02, -3.045e-02, -4.878e-02)); + r += mul(s2_3, M4(-5.485e-02, -5.021e-02, -9.393e-02, 2.505e-01, 6.772e-02, 3.911e-02, -1.625e-01, 6.379e-02, -7.047e-02, -1.624e-02, 8.427e-02, -9.974e-02, -2.270e-02, -3.061e-02, 2.650e-01, -1.225e-01)); + r += mul(s2_4, M4(1.345e-01, -3.003e-01, 8.125e-02, -7.746e-02, -8.684e-02, 1.411e-01, 9.097e-02, -1.195e-01, -7.510e-02, 1.460e-01, -2.291e-01, -1.175e-01, -4.250e-02, 1.536e-01, -3.261e-02, -4.076e-01)); + r += mul(s2_5, M4(-1.581e-01, 3.753e-02, -1.418e-02, -6.127e-02, 5.118e-02, 2.163e-02, -2.048e-02, -2.313e-02, -1.666e-01, 7.394e-02, -1.726e-01, -1.422e-01, -1.639e-01, 2.895e-01, -4.619e-02, -6.550e-02)); + r += mul(s2_6, M4(8.274e-02, -1.434e-01, 1.279e-02, -5.499e-02, 3.480e-02, -9.788e-02, -1.559e-02, -4.615e-02, -1.374e-02, 2.087e-02, 1.207e-02, 1.965e-02, -7.874e-02, 2.656e-02, -1.384e-01, 5.413e-03)); + r += mul(s2_7, M4(-1.188e-01, -1.006e-01, -3.870e-03, 1.726e-02, -4.400e-02, -4.391e-02, 1.943e-02, -7.128e-02, -2.829e-02, -7.835e-02, -1.709e-02, 1.198e-03, -9.185e-02, 1.434e-01, -4.201e-02, 1.719e-02)); + r += mul(s2_8, M4(-2.822e-02, -3.034e-02, 8.413e-03, 9.900e-03, -5.806e-02, 5.050e-03, -2.474e-02, 2.117e-04, -6.470e-02, 2.178e-02, -2.448e-02, -3.711e-02, -1.022e-01, 7.474e-02, -8.927e-02, 3.808e-03)); + r += mul(s3_0, M4(4.649e-02, 5.995e-02, 1.237e-01, -4.439e-02, -8.079e-02, 4.739e-02, -1.330e-01, -7.111e-02, 1.249e-01, -1.390e-02, -4.347e-02, 3.089e-02, -1.378e-02, -5.054e-02, -2.371e-01, 1.214e-01)); + r += mul(s3_1, M4(-4.147e-02, 1.312e-01, 8.521e-02, 4.791e-02, -2.893e-01, -8.516e-03, 3.991e-02, 4.734e-03, 4.495e-02, -1.611e-01, -2.962e-02, 7.655e-02, 3.432e-03, -8.433e-02, 4.505e-02, -6.860e-02)); + r += mul(s3_2, M4(1.496e-01, -7.197e-03, 1.138e-01, 2.208e-02, 1.407e-01, 8.353e-02, 2.982e-02, -5.375e-02, 1.311e-01, 1.746e-02, -9.200e-02, 1.702e-01, -1.306e-01, -2.512e-02, 4.415e-03, 2.814e-02)); + r += mul(s3_3, M4(-1.853e-02, 1.384e-02, -5.118e-02, -4.419e-02, 2.638e-02, -6.450e-02, 2.930e-02, 1.899e-02, 4.168e-02, -2.827e-02, 2.166e-01, 1.744e-02, 2.126e-01, -4.719e-02, 7.233e-02, 1.602e-01)); + r += mul(s3_4, M4(-2.018e-01, -1.064e-01, 2.128e-02, -2.395e-02, -7.627e-02, 1.624e-01, -1.261e-02, -7.677e-02, -1.227e-01, 1.087e-01, -1.174e-01, -1.210e-01, 6.392e-02, 2.524e-01, 1.421e-02, 5.939e-03)); + r += mul(s3_5, M4(4.596e-02, -1.440e-01, -8.835e-02, -4.192e-02, -7.037e-02, -5.660e-02, -2.150e-02, 5.382e-02, -6.105e-02, 2.803e-02, -7.108e-02, 7.229e-02, 3.941e-02, 1.201e-01, 3.529e-02, 1.408e-01)); + r += mul(s3_6, M4(-1.068e-01, -4.231e-02, -4.821e-02, -1.200e-02, -2.450e-02, -9.650e-03, -2.444e-02, 6.018e-02, -1.616e-02, -2.735e-02, 7.351e-03, 3.997e-02, 1.775e-01, -4.294e-03, 8.998e-02, 4.438e-02)); + r += mul(s3_7, M4(-6.566e-02, -3.504e-02, 5.708e-02, 1.989e-02, -1.345e-02, -1.144e-01, -3.381e-02, 3.762e-03, 5.950e-02, -5.932e-02, -1.283e-01, 1.091e-01, -6.164e-02, -7.551e-02, -1.460e-01, -1.032e-01)); + r += mul(s3_8, M4(-3.855e-02, 4.966e-02, -7.798e-02, -4.355e-02, -7.390e-02, 8.112e-02, 1.543e-02, 9.386e-03, 5.591e-02, 1.782e-02, 6.727e-02, 5.322e-02, 4.568e-02, 9.473e-02, 1.551e-01, 1.565e-01)); + r += mul(s4_0, M4(1.207e-02, -4.134e-02, -8.656e-02, 1.051e-01, 5.978e-02, -5.619e-02, 8.259e-02, -3.851e-02, 5.958e-02, 3.760e-02, 3.731e-03, 9.856e-02, -4.378e-02, -2.418e-02, -1.357e-01, 4.911e-02)); + r += mul(s4_1, M4(-6.349e-02, 5.498e-03, 1.302e-02, 4.400e-02, -1.769e-01, 1.710e-02, 5.431e-02, -1.440e-01, 8.952e-02, 7.151e-02, -3.314e-02, 6.503e-02, -4.302e-02, -1.076e-02, 5.917e-02, 1.234e-01)); + r += mul(s4_2, M4(-9.742e-03, 7.909e-02, 1.068e-01, -1.500e-01, -6.214e-02, 1.684e-01, -7.083e-03, 1.061e-01, 6.843e-02, -3.233e-02, 3.288e-03, 1.763e-01, 6.327e-02, -5.016e-02, -3.696e-02, -2.885e-02)); + r += mul(s4_3, M4(7.711e-02, -9.057e-02, -6.094e-02, -3.825e-02, -2.414e-01, -4.039e-02, -1.211e-01, 1.822e-01, -4.986e-02, 8.437e-02, 6.417e-02, -3.619e-02, 6.501e-02, 3.774e-02, -1.829e-02, 8.399e-02)); + r += mul(s4_4, M4(-2.235e-01, -4.193e-02, 6.100e-02, 9.313e-02, 5.158e-02, 1.821e-02, -2.817e-01, -2.336e-02, -2.633e-02, 1.295e-01, -1.070e-01, -6.050e-02, -1.249e-01, 4.648e-02, 5.849e-02, 7.926e-02)); + r += mul(s4_5, M4(-1.228e-01, 8.444e-02, 1.364e-02, 1.304e-02, 1.845e-02, 2.311e-01, -6.098e-02, 1.697e-01, -2.870e-02, 3.777e-02, 7.205e-02, -1.859e-01, -1.704e-02, 1.537e-02, -2.451e-02, -7.949e-02)); + r += mul(s4_6, M4(5.451e-02, -3.614e-02, -1.951e-02, -5.074e-03, 5.978e-02, -1.053e-01, 9.244e-02, -2.224e-02, -3.829e-02, -6.109e-02, 9.426e-02, -4.299e-02, 3.332e-02, 4.194e-02, -7.992e-02, 2.305e-02)); + r += mul(s4_7, M4(-7.507e-02, 3.732e-02, 9.090e-02, 2.122e-02, -3.735e-03, -1.434e-01, 1.082e-01, 4.114e-02, -5.953e-02, -7.751e-02, -1.033e-01, -2.877e-02, -1.485e-01, -4.885e-02, 8.432e-02, 4.760e-02)); + r += mul(s4_8, M4(-1.455e-03, -9.294e-03, -8.645e-02, -2.720e-02, 4.855e-02, 3.842e-02, -5.364e-02, -5.806e-02, -2.382e-02, -2.905e-02, 8.819e-02, -1.782e-02, 2.207e-02, -1.173e-02, -2.117e-02, 4.590e-02)); + r += mul(s5_0, M4(-8.125e-03, -1.201e-01, -3.500e-02, 1.288e-01, -1.329e-02, -4.118e-02, 1.373e-01, -7.228e-02, -4.887e-02, 3.610e-02, -2.157e-02, -9.036e-02, -9.072e-02, 6.122e-02, 1.467e-01, 5.007e-02)); + r += mul(s5_1, M4(-5.586e-02, -3.752e-02, -8.767e-02, 4.407e-02, -6.290e-02, 4.581e-02, -5.733e-02, -9.619e-02, -6.788e-02, -6.172e-02, -6.335e-03, 2.470e-02, 7.034e-02, 7.370e-02, 1.988e-01, 1.529e-01)); + r += mul(s5_2, M4(-6.664e-02, 2.605e-02, 1.183e-01, -1.122e-01, -1.632e-02, 4.579e-02, -4.336e-02, -2.472e-02, 1.734e-01, -2.186e-01, -4.677e-02, 1.248e-01, -3.843e-02, 9.578e-02, 1.781e-01, 1.108e-01)); + r += mul(s5_3, M4(1.859e-02, 2.767e-02, -3.291e-02, 8.182e-02, -1.983e-01, 2.496e-02, 7.179e-02, -1.044e-02, -5.468e-02, 1.339e-02, -6.350e-02, -2.690e-02, -1.548e-01, 5.006e-02, 4.520e-02, 5.536e-02)); + r += mul(s5_4, M4(4.791e-02, -2.237e-01, 3.843e-02, 2.922e-01, 4.297e-02, -5.274e-02, -1.429e-01, -1.418e-02, 1.950e-01, -1.983e-02, 6.589e-02, -4.016e-02, -2.981e-02, 1.265e-01, -5.908e-02, 3.204e-01)); + r += mul(s5_5, M4(-8.215e-02, -8.358e-03, 5.427e-02, 1.616e-01, 2.650e-01, 2.579e-01, 1.048e-01, 1.796e-01, 1.994e-01, -1.162e-01, 3.958e-02, -4.143e-02, -1.218e-02, -2.132e-01, 3.810e-02, -1.137e-01)); + r += mul(s5_6, M4(-8.306e-02, -1.655e-02, 9.988e-02, 4.427e-02, 5.594e-02, -1.272e-01, 4.672e-02, -2.217e-02, -9.166e-02, -7.092e-03, -6.258e-02, -3.021e-02, 2.103e-02, 9.653e-04, -2.749e-01, 1.407e-02)); + r += mul(s5_7, M4(-4.161e-02, -4.165e-02, 3.618e-02, -9.598e-03, -5.146e-02, -3.152e-02, -1.016e-01, -9.250e-03, -7.838e-02, -1.316e-01, -1.805e-01, -4.386e-02, -1.398e-01, -2.552e-01, 2.408e-01, 9.776e-02)); + r += mul(s5_8, M4(-4.489e-02, -4.643e-02, -6.028e-02, 6.739e-02, -6.689e-02, -1.980e-01, -1.361e-02, -2.730e-02, -8.366e-02, -2.995e-02, -7.422e-02, -8.726e-02, 9.930e-02, 9.294e-02, 2.463e-01, 1.307e-01)); + r += mul(s6_0, M4(-1.925e-02, -1.450e-01, 1.278e-01, -3.691e-02, 3.996e-02, 2.192e-02, 2.472e-02, 1.158e-01, -7.544e-02, -1.339e-02, -3.826e-02, -3.638e-02, 7.539e-02, -4.897e-03, 4.112e-03, 6.074e-02)); + r += mul(s6_1, M4(7.575e-02, -7.417e-02, -3.193e-02, -4.582e-03, -1.736e-01, 1.612e-01, 1.551e-01, 2.777e-01, 1.157e-01, 5.545e-02, -5.521e-02, -1.460e-01, 1.296e-01, 5.881e-02, 1.068e-02, 2.062e-01)); + r += mul(s6_2, M4(7.444e-02, -8.126e-02, -3.648e-02, 1.020e-01, 8.436e-02, -9.793e-02, 6.097e-02, 6.469e-02, 2.770e-03, 1.760e-02, 1.403e-01, -2.313e-02, -2.332e-02, 1.045e-01, 5.235e-02, 3.027e-02)); + r += mul(s6_3, M4(2.360e-02, 5.155e-03, 2.159e-01, -4.357e-02, -1.689e-01, 2.565e-02, -1.304e-01, 8.094e-02, -8.436e-02, -5.728e-04, -4.061e-02, -1.199e-02, 2.252e-01, -1.218e-01, -4.930e-02, 4.136e-02)); + r += mul(s6_4, M4(2.147e-01, -9.630e-02, -1.378e-01, -1.134e-02, 8.536e-02, 1.351e-02, -2.152e-01, 2.169e-01, -1.158e-01, -2.056e-01, 1.360e-01, -6.122e-02, 1.519e-01, -4.620e-02, 1.102e-01, -4.739e-02)); + r += mul(s6_5, M4(1.090e-01, -6.277e-02, -8.943e-02, 2.391e-01, -2.288e-01, -4.373e-01, -1.907e-01, -8.421e-02, 1.734e-02, -1.778e-01, 3.346e-02, 2.713e-02, 1.243e-01, 2.066e-01, -1.519e-01, -1.352e-01)); + r += mul(s6_6, M4(1.169e-02, 4.047e-03, -5.656e-02, 5.895e-02, 9.585e-02, 1.338e-02, -1.428e-01, 3.144e-02, -3.333e-02, -1.187e-01, 1.244e-01, 1.784e-02, 2.415e-01, -1.440e-01, 1.009e-01, -7.098e-02)); + r += mul(s6_7, M4(7.430e-02, 2.219e-03, -9.441e-03, -3.378e-02, 2.002e-01, -3.325e-02, 1.963e-02, -4.829e-02, -7.793e-02, 5.002e-02, -6.652e-02, -1.204e-02, -6.962e-02, 3.672e-02, -7.999e-02, -3.428e-02)); + r += mul(s6_8, M4(2.586e-02, -1.384e-01, 1.233e-01, 6.076e-03, -1.840e-02, 5.091e-02, 1.198e-01, 4.472e-02, -5.558e-02, 9.491e-02, 6.196e-03, -9.158e-02, 6.259e-02, 8.428e-02, -1.255e-01, 4.307e-02)); + r += mul(s7_0, M4(-3.222e-02, 3.138e-03, 1.957e-02, 1.499e-02, -4.444e-02, -2.183e-02, 1.365e-01, -8.917e-02, 2.109e-02, 8.503e-02, 1.513e-01, -1.106e-02, -5.235e-02, -2.710e-02, -7.498e-02, 4.757e-02)); + r += mul(s7_1, M4(5.725e-02, -2.067e-03, 3.363e-02, -6.695e-03, -4.120e-02, 1.073e-01, 7.809e-02, 1.221e-01, 1.075e-01, -4.413e-03, -2.130e-01, -6.235e-02, 1.317e-01, -5.886e-02, -1.093e-01, 1.391e-01)); + r += mul(s7_2, M4(1.989e-02, -1.940e-03, 1.291e-01, 2.503e-02, -1.334e-01, -1.339e-01, 1.157e-01, 2.046e-02, 1.673e-01, -1.449e-02, 5.462e-02, 1.793e-02, 3.130e-02, -2.726e-02, 2.497e-02, 1.924e-02)); + r += mul(s7_3, M4(-3.377e-02, 5.555e-02, 1.141e-01, -3.961e-02, -1.033e-01, 6.960e-02, -1.337e-02, 2.128e-02, -3.006e-01, 7.817e-02, -2.368e-01, -2.991e-02, -3.921e-02, 1.059e-01, 2.200e-01, -1.236e-01)); + r += mul(s7_4, M4(1.268e-01, 1.573e-01, -1.810e-01, 5.189e-02, 9.665e-02, 1.684e-01, -9.649e-02, 1.212e-01, -3.799e-01, -1.285e-01, 2.667e-01, -1.329e-01, 1.717e-01, 1.509e-01, -2.089e-01, 1.262e-02)); + r += mul(s7_5, M4(6.720e-02, 5.172e-02, -1.574e-02, 9.391e-02, -2.402e-01, -2.259e-01, -4.062e-02, -1.388e-01, -1.521e-01, -2.795e-02, -7.224e-03, 3.382e-02, -7.645e-04, 4.380e-02, 1.791e-02, 5.836e-03)); + r += mul(s7_6, M4(-5.870e-02, 1.127e-01, -2.018e-02, 1.996e-02, 2.143e-01, 6.391e-02, 2.659e-02, 5.426e-02, -4.221e-03, 2.226e-02, -2.242e-02, -6.617e-02, -1.122e-01, 9.153e-02, 5.476e-02, -9.256e-02)); + r += mul(s7_7, M4(-1.343e-01, 2.115e-02, -1.554e-01, -3.678e-02, 9.672e-02, -4.869e-02, -4.701e-02, 3.799e-02, -2.664e-01, 1.993e-01, -5.260e-02, -7.176e-02, 6.625e-02, 5.332e-03, -1.422e-01, -5.649e-02)); + r += mul(s7_8, M4(-7.533e-02, -2.946e-02, 1.724e-01, 8.496e-03, 2.760e-02, 1.191e-02, -7.346e-02, -2.989e-02, 1.546e-02, -3.697e-02, -4.831e-02, -5.810e-04, -4.130e-02, -7.427e-02, 1.663e-01, -1.218e-01)); + r += V4(2.410e-02, -9.396e-03, 1.051e-02, 5.262e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.541e-01, 7.445e-02, -1.031e-01, -4.227e-02, 1.194e-01, 1.177e-01, 4.318e-03, 3.377e-02, -1.042e-01, -7.155e-02, 3.097e-02, 3.587e-03, 2.435e-02, 1.159e-01, 1.653e-02, 4.330e-02)); + r += mul(s0_1, M4(9.426e-02, 1.593e-02, -1.154e-02, 2.807e-01, -1.165e-01, -5.208e-02, 1.036e-01, -3.098e-02, 1.813e-02, 2.485e-02, -7.865e-02, -2.436e-01, -4.745e-02, 1.809e-01, 2.603e-02, -3.831e-02)); + r += mul(s0_2, M4(2.777e-02, -1.637e-02, -7.339e-03, 1.470e-01, 4.922e-02, 3.150e-02, 2.567e-02, 8.265e-03, 5.751e-02, 2.533e-02, -3.575e-02, 8.372e-02, 1.036e-01, 3.502e-02, -3.228e-04, -4.643e-02)); + r += mul(s0_3, M4(4.781e-02, 2.210e-01, -5.909e-04, -4.151e-03, 1.515e-01, -1.007e-01, 6.112e-02, -5.742e-02, -1.989e-01, -8.581e-02, -7.817e-02, 1.420e-02, -8.560e-03, 5.586e-02, -6.567e-02, -4.390e-02)); + r += mul(s0_4, M4(-5.837e-02, 3.538e-02, -5.611e-02, -8.464e-02, -8.826e-02, -9.334e-02, 1.638e-01, -9.774e-02, -4.502e-02, 2.860e-02, 4.700e-02, -4.664e-02, -1.113e-01, -1.132e-01, -6.476e-02, -2.240e-02)); + r += mul(s0_5, M4(-1.893e-02, 1.075e-01, -1.443e-01, 4.658e-02, -3.577e-02, -1.106e-01, -1.077e-01, -5.052e-02, -1.149e-02, -4.290e-02, 3.050e-02, 1.243e-01, -9.772e-02, -3.819e-02, -1.082e-01, -3.553e-02)); + r += mul(s0_6, M4(2.176e-02, -1.894e-01, -7.480e-02, 1.118e-01, 4.854e-02, 1.391e-02, 5.312e-02, -6.790e-02, -1.328e-02, -2.371e-02, -5.487e-02, -3.335e-02, -4.248e-02, 1.991e-02, -4.070e-02, 3.184e-02)); + r += mul(s0_7, M4(-4.676e-02, 1.857e-01, 1.548e-01, -8.722e-02, 2.913e-02, -2.723e-02, -7.639e-02, -3.793e-02, 1.133e-01, -1.049e-01, -6.420e-02, -5.949e-03, -1.141e-01, -1.956e-02, 5.989e-02, -1.345e-01)); + r += mul(s0_8, M4(3.326e-02, -1.111e-01, 1.462e-01, 8.195e-02, 6.368e-02, -1.687e-03, -1.765e-02, 4.541e-02, 1.443e-01, 1.527e-01, 2.521e-02, -6.235e-02, -7.915e-02, -4.487e-02, -5.134e-02, 3.652e-02)); + r += mul(s1_0, M4(3.368e-02, 8.714e-02, 6.110e-03, -1.981e-01, 2.033e-01, -8.898e-02, 8.475e-03, 1.873e-01, -1.498e-01, 4.971e-02, 1.536e-01, 3.002e-01, -2.681e-01, -3.378e-02, 1.997e-02, 4.818e-01)); + r += mul(s1_1, M4(1.055e-01, 1.301e-01, -2.522e-02, -7.098e-02, -1.269e-01, 2.670e-02, 1.136e-01, 7.816e-02, -1.890e-01, 2.785e-01, -2.470e-01, -8.519e-02, -2.315e-02, 1.341e-01, -9.506e-02, 1.278e-01)); + r += mul(s1_2, M4(1.948e-02, -3.190e-02, -3.223e-02, 3.931e-02, 3.433e-02, 1.312e-01, -1.393e-02, -1.037e-01, 8.951e-02, 1.129e-01, 6.835e-02, -9.325e-02, 1.195e-01, -3.151e-02, -1.330e-02, 5.604e-03)); + r += mul(s1_3, M4(9.086e-02, 1.209e-01, 6.033e-02, -1.874e-02, -1.190e-01, -1.473e-01, 8.160e-02, 5.619e-02, -3.533e-01, -1.393e-01, -2.688e-01, 1.894e-01, 3.564e-02, 2.016e-01, -1.385e-01, 4.488e-02)); + r += mul(s1_4, M4(-9.440e-03, -1.706e-02, -1.748e-02, 2.662e-02, 4.748e-02, 1.932e-02, 1.696e-01, 2.769e-01, -5.026e-01, 2.010e-01, 9.951e-02, 1.637e-01, 1.957e-01, -3.575e-01, -1.192e-01, 1.617e-01)); + r += mul(s1_5, M4(-3.517e-03, -4.071e-02, -8.419e-02, 1.006e-01, -1.733e-01, 2.983e-01, -1.682e-01, 5.680e-02, -5.220e-02, -1.610e-01, -2.310e-01, -1.312e-02, 1.825e-02, -3.759e-02, -1.022e-01, -5.093e-02)); + r += mul(s1_6, M4(-8.118e-02, -5.037e-03, 1.736e-03, -3.371e-02, -1.289e-02, 8.800e-03, 9.251e-02, -1.756e-01, -1.530e-02, 2.300e-01, 5.308e-02, -1.295e-01, 6.294e-02, 1.396e-01, -3.647e-02, 2.122e-03)); + r += mul(s1_7, M4(3.921e-02, 1.880e-04, 7.184e-02, -5.317e-03, -1.052e-01, 1.530e-01, -2.644e-01, -1.468e-02, 3.993e-02, -4.042e-03, 6.291e-02, -3.918e-02, -2.002e-01, -2.985e-02, -3.781e-02, 1.375e-02)); + r += mul(s1_8, M4(-8.215e-02, 9.509e-03, 2.562e-02, -2.062e-02, 1.324e-01, 1.454e-02, 3.688e-02, 1.111e-01, 2.343e-01, -4.873e-02, 1.626e-01, -6.006e-02, -1.030e-01, 7.297e-02, -2.219e-02, 9.851e-02)); + r += mul(s2_0, M4(5.611e-02, -3.181e-01, -4.837e-02, -6.922e-02, 2.914e-03, 5.952e-02, -4.679e-02, -2.544e-02, 2.193e-02, -1.003e-01, -9.020e-03, 2.205e-02, -1.759e-03, 3.894e-02, 1.423e-02, -8.472e-02)); + r += mul(s2_1, M4(-7.214e-02, 1.584e-02, -4.465e-02, 2.169e-01, -3.078e-02, 5.434e-02, -3.933e-02, 1.729e-03, -1.555e-02, 5.433e-02, 2.206e-02, 4.131e-02, -8.110e-02, -1.106e-01, -4.616e-02, -2.315e-02)); + r += mul(s2_2, M4(1.754e-01, -5.070e-02, -3.150e-02, 7.899e-02, 5.769e-02, -2.129e-01, -4.636e-02, 4.046e-02, -1.395e-01, 4.660e-02, 2.975e-02, 1.117e-02, -6.117e-02, -1.751e-02, -2.903e-02, 6.764e-02)); + r += mul(s2_3, M4(-6.471e-02, 2.146e-01, 5.085e-02, -5.202e-01, -1.359e-02, 5.058e-02, -7.127e-02, -1.601e-02, -6.091e-02, -1.459e-02, -7.134e-02, 1.549e-01, 1.276e-01, -9.287e-02, 1.771e-01, 1.394e-01)); + r += mul(s2_4, M4(1.596e-01, -1.380e-01, -2.423e-01, 2.340e-01, 1.000e-02, 2.824e-02, 1.236e-01, 3.541e-02, -1.289e-01, 2.382e-02, -6.086e-02, 2.541e-02, 5.685e-03, -3.143e-02, 5.591e-02, -1.216e-01)); + r += mul(s2_5, M4(-8.766e-02, -2.235e-02, -2.451e-02, 1.227e-02, 1.550e-01, -5.607e-02, 7.935e-02, 8.179e-02, -2.942e-01, 1.411e-01, -1.799e-02, -3.496e-02, -2.725e-02, 1.133e-01, 5.287e-02, 1.074e-01)); + r += mul(s2_6, M4(-1.036e-01, 1.293e-01, 5.427e-02, -3.299e-01, -6.639e-02, 2.028e-02, -6.833e-02, 4.915e-02, -4.676e-03, 4.487e-02, -5.957e-03, 7.035e-02, -7.784e-02, 5.093e-02, 1.658e-01, 1.278e-02)); + r += mul(s2_7, M4(3.254e-02, -6.731e-02, 6.136e-02, -6.006e-02, -4.211e-03, 1.189e-01, 7.107e-02, -5.250e-02, 1.580e-01, 1.173e-01, 7.031e-02, -2.363e-02, 4.882e-02, -6.122e-02, 3.727e-02, 1.939e-02)); + r += mul(s2_8, M4(2.800e-02, -2.528e-02, 4.430e-02, -3.989e-02, -3.861e-02, -7.836e-02, -5.991e-02, -1.907e-02, -1.333e-01, 3.260e-02, -6.408e-03, 5.615e-02, 2.487e-02, 3.189e-02, 7.691e-02, -1.030e-01)); + r += mul(s3_0, M4(-7.688e-02, 4.350e-02, -1.354e-01, -5.353e-02, 1.306e-01, 1.329e-01, -4.048e-02, -8.876e-02, 1.484e-02, -2.443e-01, 3.756e-03, 7.480e-02, -8.744e-02, 8.263e-02, -2.327e-02, 1.993e-02)); + r += mul(s3_1, M4(1.341e-02, -4.843e-02, 1.961e-02, 1.442e-01, 1.704e-01, 8.335e-02, -9.952e-02, 1.931e-02, -5.787e-02, -2.613e-01, 2.198e-02, 6.073e-02, -1.278e-02, -2.314e-01, 3.446e-02, 2.892e-02)); + r += mul(s3_2, M4(1.312e-02, -1.539e-02, -6.587e-02, 3.002e-02, 1.623e-01, -1.527e-01, -2.507e-02, 8.886e-02, -3.306e-03, 1.216e-01, -5.373e-02, -4.630e-02, 2.683e-02, 1.528e-03, 1.237e-01, -1.357e-01)); + r += mul(s3_3, M4(-3.629e-02, -1.790e-02, 8.143e-02, 3.422e-02, -7.654e-02, -1.413e-01, 1.523e-02, -1.196e-01, 5.257e-02, -1.080e-01, -3.571e-03, -1.339e-01, 1.751e-02, 4.219e-02, 9.139e-02, 1.905e-01)); + r += mul(s3_4, M4(-1.332e-01, 1.085e-01, -5.291e-02, 1.711e-02, -1.511e-01, -1.611e-02, 6.475e-02, -1.778e-01, 1.864e-02, 1.446e-03, -9.403e-03, -1.638e-01, -6.173e-02, -1.837e-01, -7.164e-02, 9.682e-02)); + r += mul(s3_5, M4(4.124e-02, -5.000e-02, -3.391e-02, -5.335e-02, 1.619e-01, 4.778e-02, 1.230e-01, -9.066e-02, 2.046e-01, 2.125e-02, -5.583e-02, -2.781e-01, 2.121e-01, 4.222e-02, 5.072e-02, -6.791e-02)); + r += mul(s3_6, M4(5.591e-02, -4.172e-02, 5.074e-03, 6.773e-02, 7.004e-02, 3.783e-03, 1.241e-02, -3.087e-02, 5.125e-02, -2.546e-02, -6.424e-02, -1.288e-01, -6.226e-02, -1.023e-01, -1.236e-01, -1.059e-01)); + r += mul(s3_7, M4(6.663e-02, 2.416e-02, -5.159e-02, -3.987e-02, -1.621e-02, 1.979e-02, 4.159e-02, -6.761e-02, -3.534e-02, -1.029e-01, -1.345e-02, -4.275e-02, -2.683e-01, 1.552e-01, -2.544e-01, 2.859e-04)); + r += mul(s3_8, M4(-1.992e-02, 5.768e-02, -5.584e-03, 6.441e-03, 1.317e-01, 3.139e-02, 6.948e-02, 1.451e-02, 1.074e-01, -7.998e-02, -8.032e-02, -6.475e-02, 1.482e-01, -9.720e-03, -5.256e-02, -1.479e-02)); + r += mul(s4_0, M4(-4.839e-02, 1.140e-03, -3.622e-02, 7.300e-02, 4.100e-02, -1.103e-01, 3.436e-02, -2.778e-02, -9.181e-02, -6.152e-02, -4.272e-02, 1.237e-01, 1.008e-02, 6.290e-02, -8.303e-03, 9.899e-03)); + r += mul(s4_1, M4(-6.312e-02, -8.452e-02, 7.491e-03, 9.689e-02, -6.351e-02, -1.581e-01, -5.564e-02, -7.967e-03, -3.090e-02, -3.347e-02, 9.113e-02, 1.925e-01, 4.861e-02, -2.838e-02, 4.783e-02, -1.699e-01)); + r += mul(s4_2, M4(-1.478e-01, 4.556e-02, -5.336e-02, -6.732e-03, -2.761e-03, -5.113e-02, -1.607e-03, 1.340e-01, 1.012e-01, 2.428e-02, 1.503e-02, -7.795e-02, 6.548e-02, -7.697e-02, 1.112e-02, 3.049e-03)); + r += mul(s4_3, M4(-2.407e-03, -2.252e-03, -4.581e-02, 5.584e-02, 5.979e-03, -4.358e-02, 6.233e-02, 1.063e-02, -2.296e-01, -1.326e-01, -4.987e-02, 1.989e-02, 1.359e-01, 2.509e-01, 1.292e-01, 7.777e-02)); + r += mul(s4_4, M4(-2.050e-01, 1.206e-01, 3.710e-02, 1.986e-01, -1.741e-01, 7.256e-02, -3.484e-02, -3.576e-01, 2.451e-01, -1.688e-01, -1.057e-01, 9.107e-02, 8.758e-02, -2.751e-01, 2.239e-01, 2.364e-02)); + r += mul(s4_5, M4(-5.809e-02, 1.454e-02, -3.366e-02, -2.671e-02, -9.982e-02, -1.511e-02, -1.628e-01, -1.030e-01, -6.869e-02, 1.522e-01, 1.662e-01, -3.735e-02, 1.431e-01, 3.946e-03, 1.875e-01, -6.482e-02)); + r += mul(s4_6, M4(3.218e-02, -7.122e-02, -1.460e-02, 7.275e-02, 8.113e-02, -3.292e-03, -5.182e-02, 1.175e-01, 1.257e-02, 5.358e-02, -3.594e-02, 5.446e-02, 7.923e-02, 3.842e-02, 2.085e-01, 7.095e-02)); + r += mul(s4_7, M4(1.590e-02, 1.359e-01, 3.769e-03, 4.688e-02, 4.924e-02, 1.446e-01, 1.228e-01, -2.817e-02, -1.074e-01, 1.347e-01, 2.297e-02, 2.567e-02, 5.231e-02, -3.432e-03, 3.995e-01, -2.672e-02)); + r += mul(s4_8, M4(-1.036e-01, -3.753e-02, 7.380e-02, -7.441e-03, 2.090e-04, 7.063e-02, 1.665e-01, 6.673e-03, 9.839e-02, 1.731e-02, -1.267e-01, 2.546e-02, 1.349e-01, -7.099e-02, 8.893e-03, -3.966e-02)); + r += mul(s5_0, M4(-3.046e-02, -1.849e-01, -1.706e-02, -1.224e-01, -5.873e-03, -1.910e-01, 5.894e-02, 1.852e-01, 8.535e-03, 8.984e-02, -5.644e-02, -1.735e-01, -2.286e-01, 1.058e-01, -3.983e-02, 2.355e-02)); + r += mul(s5_1, M4(2.592e-02, 6.491e-03, 1.929e-02, -7.618e-02, -1.963e-01, -1.009e-01, 6.043e-02, -1.961e-01, -1.542e-02, 5.331e-04, 8.130e-02, 1.849e-01, -2.411e-01, 1.440e-01, -7.786e-02, 3.432e-02)); + r += mul(s5_2, M4(-8.275e-03, 3.609e-03, 9.891e-03, -8.344e-02, 2.447e-02, 1.084e-01, 9.394e-02, -1.679e-02, -6.476e-02, 4.520e-02, 9.260e-02, 2.126e-01, -1.950e-01, 9.232e-02, -3.969e-02, 6.067e-02)); + r += mul(s5_3, M4(-7.232e-04, -6.282e-02, 5.756e-02, -3.110e-02, -1.114e-01, -7.194e-02, 4.142e-03, 1.695e-01, 7.497e-02, -2.395e-02, 6.252e-02, 1.579e-02, -1.255e-01, 2.525e-01, 5.575e-02, 6.153e-01)); + r += mul(s5_4, M4(-6.491e-02, 7.502e-02, 3.167e-03, -2.348e-02, -8.472e-02, 2.548e-01, -4.978e-02, -1.951e-02, -3.048e-02, -5.704e-02, 1.774e-01, -6.338e-03, -2.838e-01, -1.511e-01, -4.053e-02, 6.059e-01)); + r += mul(s5_5, M4(-2.412e-02, -1.205e-01, -6.613e-02, -5.165e-02, -8.040e-02, -4.929e-02, -1.725e-01, -3.018e-02, -1.233e-01, -9.717e-03, 5.918e-02, 1.565e-01, -1.531e-02, 5.785e-02, 1.118e-01, -3.537e-02)); + r += mul(s5_6, M4(3.586e-02, -4.831e-02, -1.700e-02, -1.002e-01, 5.432e-02, -1.102e-02, -2.840e-02, 1.737e-01, 3.152e-03, 1.527e-01, 4.713e-03, 7.891e-03, -1.070e-01, -7.131e-02, -3.445e-02, 7.807e-02)); + r += mul(s5_7, M4(-6.315e-02, 1.014e-01, 1.688e-01, -1.030e-01, 1.241e-01, 7.856e-02, 8.263e-02, 2.699e-02, -3.012e-02, 2.876e-02, 1.430e-01, -1.694e-01, -8.311e-02, -5.530e-02, -2.528e-01, 1.123e-01)); + r += mul(s5_8, M4(4.501e-02, 7.023e-02, 9.987e-02, 1.779e-02, 2.821e-02, -7.071e-02, 8.364e-02, -4.817e-02, 1.017e-01, -1.175e-01, 6.084e-02, -2.026e-02, -4.104e-02, 1.192e-01, -2.724e-01, -7.172e-02)); + r += mul(s6_0, M4(-2.403e-02, -1.430e-01, -4.463e-02, 2.115e-01, -3.311e-03, 1.294e-01, 2.786e-02, 6.036e-02, -3.302e-02, -3.810e-02, 1.050e-02, 9.657e-03, -6.527e-02, -3.026e-02, -1.425e-02, -2.182e-01)); + r += mul(s6_1, M4(-1.318e-01, 2.104e-02, -1.155e-01, 1.466e-01, -1.479e-01, 3.399e-01, 4.071e-02, -4.176e-02, -1.956e-02, -4.514e-02, -1.132e-03, -3.509e-04, 4.392e-02, -4.815e-03, -5.243e-02, -3.786e-02)); + r += mul(s6_2, M4(1.311e-02, -1.032e-02, -8.790e-03, 1.187e-01, -5.755e-02, -1.255e-01, -1.005e-01, -1.453e-02, -1.102e-02, -4.807e-02, 1.795e-02, -1.851e-01, -9.856e-02, 1.487e-01, -7.258e-02, -5.214e-03)); + r += mul(s6_3, M4(-4.654e-02, -3.135e-02, -1.684e-01, 3.834e-01, -2.876e-02, 1.784e-01, -1.386e-02, -4.142e-02, -8.756e-02, -2.283e-01, -9.441e-02, -4.160e-02, 2.263e-01, 2.885e-01, -3.452e-03, -2.400e-01)); + r += mul(s6_4, M4(7.687e-02, -6.810e-02, -1.320e-01, 2.525e-01, 1.797e-01, -2.430e-02, 1.885e-02, -6.292e-02, -3.197e-02, -7.428e-03, -2.033e-02, 1.679e-01, 6.859e-02, 7.989e-02, -7.397e-02, -2.282e-01)); + r += mul(s6_5, M4(4.482e-02, -7.678e-02, 1.278e-03, 2.601e-01, 4.335e-03, 1.209e-01, 3.467e-03, -2.305e-01, 1.498e-01, 8.959e-02, 3.266e-02, 1.003e-01, -1.326e-01, -3.894e-02, -6.801e-02, 5.746e-02)); + r += mul(s6_6, M4(5.336e-02, -9.724e-02, -1.431e-01, 2.915e-01, 1.202e-01, -1.028e-01, -8.011e-03, 1.251e-01, 4.663e-02, -5.798e-02, -8.256e-02, 2.405e-02, -5.388e-02, -8.614e-02, -7.355e-02, -3.901e-02)); + r += mul(s6_7, M4(-3.570e-02, 2.925e-01, -7.481e-02, 2.625e-02, 1.195e-01, 4.382e-02, -3.380e-02, 1.320e-02, -6.054e-02, 1.930e-01, 1.641e-01, -5.457e-02, -6.494e-02, 4.445e-02, 1.441e-01, 4.620e-02)); + r += mul(s6_8, M4(9.954e-02, 1.499e-01, 3.501e-02, 1.605e-02, -2.730e-01, 1.298e-01, -4.405e-02, -2.582e-02, -5.153e-03, -1.636e-02, -3.186e-02, -1.102e-01, 8.378e-02, -5.718e-02, 1.241e-02, 2.242e-02)); + r += mul(s7_0, M4(4.522e-03, 1.157e-01, 6.721e-02, -1.100e-01, -3.192e-02, 1.623e-02, -1.048e-02, 4.959e-02, -8.910e-02, -8.295e-02, 3.892e-03, 7.422e-02, 7.690e-02, 1.323e-02, 1.388e-01, 3.940e-02)); + r += mul(s7_1, M4(-6.403e-03, 9.173e-02, 7.153e-03, -1.033e-01, -6.463e-02, 6.393e-02, -4.339e-02, -6.190e-03, -1.071e-01, 7.056e-02, 5.481e-02, 6.483e-02, -6.428e-02, 2.973e-02, 1.292e-01, 1.169e-01)); + r += mul(s7_2, M4(5.770e-02, 5.588e-02, 6.031e-02, -1.759e-01, -3.895e-02, 2.467e-03, -2.560e-02, -1.027e-01, 2.629e-02, -1.205e-01, -9.231e-02, -2.143e-01, -2.209e-02, 7.736e-02, 1.911e-02, -6.816e-02)); + r += mul(s7_3, M4(1.705e-02, 5.705e-02, -4.597e-02, -7.820e-02, 8.144e-03, 6.708e-02, -3.695e-02, 2.001e-01, -1.255e-01, -1.630e-02, -2.811e-02, -5.311e-03, 1.111e-01, 6.014e-02, 1.773e-02, 8.124e-02)); + r += mul(s7_4, M4(1.531e-02, -1.537e-02, -9.494e-02, -6.804e-02, 1.393e-01, -6.602e-02, 4.988e-02, 2.267e-01, -3.947e-02, 8.325e-02, -8.862e-02, -6.870e-02, 1.717e-01, -2.222e-01, 4.594e-02, -1.655e-01)); + r += mul(s7_5, M4(1.169e-01, 1.878e-02, 4.611e-02, 2.927e-02, 7.513e-02, 4.478e-02, 8.064e-02, 2.155e-02, 2.792e-01, -1.029e-01, 9.900e-02, -3.829e-02, -1.461e-02, -5.504e-02, -6.803e-02, -1.202e-02)); + r += mul(s7_6, M4(3.775e-02, -4.092e-02, -9.230e-02, 3.931e-02, -2.544e-02, -5.744e-02, -1.008e-01, 8.357e-03, 4.594e-03, -5.746e-03, -1.147e-01, -3.240e-02, 2.209e-02, -7.544e-02, 8.800e-02, 3.604e-02)); + r += mul(s7_7, M4(7.549e-02, -3.910e-02, -5.900e-02, -2.941e-03, -8.728e-02, -4.351e-02, -7.503e-02, 1.369e-01, -4.670e-02, 2.395e-01, 2.080e-01, -6.757e-02, 1.841e-02, 7.237e-03, -4.694e-02, 1.169e-02)); + r += mul(s7_8, M4(7.998e-02, -1.301e-02, 8.638e-02, -2.254e-01, -1.370e-01, -5.376e-02, -3.425e-02, -2.080e-03, -1.539e-01, -1.300e-01, 4.326e-02, -3.162e-02, 3.087e-03, 1.125e-01, 8.557e-02, -6.775e-02)); + r += V4(-1.190e-02, 2.995e-03, -4.483e-02, 1.950e-02); + return r; +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 6 +//!DESC conv5 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.325e-02, 3.102e-02, -1.201e-02, 2.268e-02, 7.444e-03, 1.479e-01, -1.168e-01, -5.885e-03, 4.910e-03, -2.434e-02, 4.692e-02, -8.707e-03, 1.163e-01, -5.718e-02, 4.571e-02, -3.987e-03)); + r += mul(s0_1, M4(9.745e-02, 4.481e-02, -5.736e-03, 4.527e-02, -6.175e-02, -8.090e-02, -1.783e-01, -4.490e-02, -1.074e-01, 6.241e-03, 1.121e-01, 5.417e-02, -1.167e-01, -1.013e-01, -9.217e-03, -8.213e-02)); + r += mul(s0_2, M4(-1.143e-01, -2.914e-02, 1.086e-02, 5.355e-02, -5.619e-02, 7.229e-02, -3.712e-02, -2.005e-02, 1.814e-02, 3.906e-02, 9.007e-02, 4.586e-02, 4.925e-02, -4.990e-02, 3.002e-02, -5.329e-02)); + r += mul(s0_3, M4(-7.281e-02, -1.456e-01, 6.030e-02, -7.220e-03, 1.217e-02, 3.942e-02, 2.493e-03, -2.184e-02, -4.370e-02, -2.313e-02, -3.702e-03, -1.439e-02, 5.491e-02, -1.097e-01, -1.525e-02, 1.306e-02)); + r += mul(s0_4, M4(2.689e-02, 6.599e-02, 1.036e-01, -8.721e-02, -2.337e-01, 1.000e-01, 1.439e-02, 2.684e-01, 2.232e-02, 7.914e-03, -3.002e-02, 7.379e-02, -2.289e-02, -1.065e-02, -4.016e-02, -1.150e-01)); + r += mul(s0_5, M4(7.046e-02, -1.307e-03, -1.191e-02, 4.801e-02, -2.147e-02, 1.351e-01, 3.794e-02, -3.068e-02, 3.729e-02, 1.114e-02, -1.309e-02, 1.434e-02, -1.162e-01, 1.812e-02, -5.316e-02, -5.238e-02)); + r += mul(s0_6, M4(-1.274e-03, 3.140e-02, 1.683e-02, 3.169e-02, -2.787e-03, -9.452e-02, 1.432e-02, -2.380e-02, -3.839e-02, 1.362e-01, -8.840e-03, 2.950e-02, -9.202e-02, -4.772e-02, 2.859e-02, 3.891e-02)); + r += mul(s0_7, M4(5.686e-02, 2.562e-02, 3.277e-02, -3.834e-03, -4.117e-02, -1.190e-03, 8.718e-02, -3.681e-02, 2.798e-02, 1.672e-02, 8.348e-02, 1.928e-02, 2.370e-02, 6.126e-02, 5.123e-02, -7.158e-02)); + r += mul(s0_8, M4(6.307e-04, 2.505e-02, -2.436e-03, -2.344e-02, -6.344e-02, 2.815e-02, -3.121e-02, 5.070e-02, 2.422e-02, 8.561e-03, 2.690e-02, -8.370e-02, 9.301e-03, -2.347e-02, -4.862e-02, -4.908e-02)); + r += mul(s1_0, M4(-2.105e-01, 5.742e-02, 5.282e-02, 1.075e-01, 8.419e-02, -8.720e-03, 3.498e-02, 8.101e-03, -4.217e-02, -6.896e-02, 1.060e-01, 1.892e-02, 1.353e-01, -5.279e-02, 3.427e-02, -1.061e-01)); + r += mul(s1_1, M4(-5.899e-02, -8.880e-02, 4.749e-02, 5.689e-02, -1.243e-01, -1.050e-01, 1.305e-01, -1.822e-01, -1.794e-01, -2.956e-01, 2.878e-01, -5.563e-02, 1.107e-01, -9.594e-02, 1.681e-01, -4.173e-01)); + r += mul(s1_2, M4(2.088e-01, -1.263e-02, 8.239e-02, 1.372e-01, -1.523e-01, -4.600e-02, -6.727e-02, 1.620e-02, -1.129e-01, -1.329e-01, -2.328e-02, -8.638e-02, -8.206e-02, -5.865e-02, 7.702e-02, 8.697e-02)); + r += mul(s1_3, M4(1.856e-03, -1.151e-01, 1.269e-01, -1.246e-01, -2.480e-02, 1.943e-02, -7.583e-02, -7.204e-02, 2.768e-02, 6.190e-02, 8.628e-02, -1.388e-01, 3.124e-02, -8.930e-02, -1.392e-01, 3.038e-01)); + r += mul(s1_4, M4(1.273e-01, -4.104e-02, 5.600e-01, -1.033e-01, -2.556e-01, 8.996e-02, -2.634e-02, 1.067e-02, -2.025e-01, 1.187e-01, 3.098e-01, -1.177e-01, 2.340e-01, 9.085e-02, -7.989e-02, -3.875e-01)); + r += mul(s1_5, M4(-4.001e-01, -2.410e-01, -2.005e-01, 3.053e-01, -4.187e-02, -2.079e-02, 3.795e-02, -5.801e-01, 1.572e-01, -3.704e-02, 1.789e-01, 3.634e-02, -2.765e-03, 3.068e-01, 6.566e-02, 1.782e-01)); + r += mul(s1_6, M4(-5.601e-02, -2.663e-01, -5.452e-02, -3.342e-02, 2.845e-02, -5.523e-02, 3.937e-02, -1.254e-02, -9.803e-02, -4.092e-02, 1.070e-01, -1.884e-02, -8.056e-02, 5.957e-02, -2.268e-01, -2.883e-02)); + r += mul(s1_7, M4(3.602e-02, 3.605e-01, -1.058e-01, -1.025e-01, 7.327e-02, 1.212e-02, 3.897e-02, -1.530e-03, -2.248e-01, 2.349e-01, 7.325e-02, -6.040e-02, 2.359e-01, 5.645e-01, 1.694e-02, -4.593e-02)); + r += mul(s1_8, M4(-6.717e-02, -2.712e-01, -7.268e-02, 4.477e-02, 5.479e-02, 2.246e-02, 3.345e-02, -7.484e-02, 4.180e-02, -4.276e-02, 2.712e-02, 1.372e-02, 6.067e-02, -6.065e-02, -1.066e-01, -1.852e-02)); + r += mul(s2_0, M4(-9.897e-02, -1.366e-01, 3.089e-02, -4.977e-03, 6.995e-02, 2.994e-01, 8.808e-02, -3.481e-02, -5.962e-02, -5.574e-02, -5.478e-02, -2.862e-02, 3.514e-02, 9.242e-02, -1.727e-02, -1.140e-02)); + r += mul(s2_1, M4(-2.045e-01, 1.060e-01, 1.197e-01, -7.549e-02, 2.766e-01, -3.171e-02, 8.807e-02, -1.171e-02, 8.605e-02, -1.305e-01, -1.057e-01, -8.969e-02, -1.155e-01, 3.084e-02, -2.138e-02, -5.441e-03)); + r += mul(s2_2, M4(-5.702e-02, -1.392e-02, 2.288e-02, 9.423e-02, 1.703e-02, -8.309e-02, 8.809e-03, -1.677e-01, -1.509e-02, -4.547e-02, 4.150e-02, -3.334e-02, -1.208e-01, 9.868e-02, -8.768e-02, -1.492e-02)); + r += mul(s2_3, M4(-1.112e-02, 1.112e-01, -3.933e-02, 2.128e-02, -2.588e-03, 3.539e-01, 3.261e-02, -4.850e-02, 5.691e-02, -1.492e-01, 4.581e-02, 1.560e-02, 1.683e-01, -2.399e-02, 9.759e-02, -8.359e-02)); + r += mul(s2_4, M4(-9.395e-02, 3.051e-02, -2.703e-01, -5.509e-02, -7.402e-02, 1.713e-01, -1.217e-02, -1.481e-01, -1.499e-01, 9.432e-02, 1.353e-01, 1.110e-01, 7.470e-02, -1.484e-01, 2.865e-02, 1.123e-01)); + r += mul(s2_5, M4(-4.374e-02, 2.185e-01, -1.399e-02, -3.567e-02, 1.530e-01, 2.002e-01, 1.431e-02, -2.271e-01, -1.001e-01, -9.959e-03, -2.941e-02, -3.313e-02, 4.434e-02, -1.691e-02, 3.744e-02, 2.550e-01)); + r += mul(s2_6, M4(-7.543e-02, -1.643e-02, 3.870e-02, -2.994e-02, -9.913e-02, -4.311e-02, -7.535e-02, 7.141e-02, -1.986e-02, -3.983e-02, -1.601e-02, 1.515e-02, 5.399e-02, -1.822e-02, -1.145e-01, -1.189e-01)); + r += mul(s2_7, M4(5.299e-02, 5.771e-02, -1.905e-02, 4.469e-04, -8.914e-03, 1.371e-01, -3.636e-02, -1.195e-03, -3.347e-02, -3.162e-02, -3.516e-02, -1.002e-01, -1.821e-01, 1.202e-01, 8.073e-02, 1.828e-02)); + r += mul(s2_8, M4(-3.079e-02, 4.782e-02, 4.581e-02, 3.428e-02, 1.364e-02, 3.328e-02, 1.065e-01, -5.447e-02, 1.521e-02, 1.987e-03, -5.060e-02, -4.637e-02, -1.284e-01, 1.990e-02, -5.804e-02, 5.293e-02)); + r += mul(s3_0, M4(-3.279e-02, -1.138e-01, -7.613e-02, 5.656e-02, -2.444e-02, -4.692e-02, 2.756e-02, 3.238e-02, -5.189e-03, 6.887e-04, -2.633e-02, -4.697e-02, -7.190e-02, 1.854e-01, 1.255e-01, -5.488e-02)); + r += mul(s3_1, M4(8.669e-02, -3.164e-02, 8.179e-02, -2.261e-02, 9.947e-02, 7.810e-02, -2.032e-02, 9.205e-03, 1.738e-01, 2.339e-02, -3.427e-02, -4.536e-02, 2.671e-01, 1.905e-01, -1.258e-01, -1.761e-01)); + r += mul(s3_2, M4(9.081e-02, 2.051e-02, -6.132e-03, -2.126e-02, -1.383e-01, -1.578e-02, -2.328e-02, 3.190e-03, 1.567e-01, -6.032e-02, 8.671e-02, 1.130e-02, -1.476e-01, 1.344e-01, -1.120e-01, -5.130e-02)); + r += mul(s3_3, M4(-2.872e-02, 1.763e-02, -7.216e-02, -1.072e-01, -1.180e-01, -1.004e-01, -4.798e-02, -2.352e-02, 1.105e-01, -2.879e-02, 1.592e-02, 6.813e-03, -4.098e-01, -1.040e-01, -2.934e-01, 3.858e-02)); + r += mul(s3_4, M4(8.946e-02, 9.835e-02, -2.144e-01, 5.372e-02, -2.239e-01, -2.255e-02, -1.035e-01, -1.412e-01, -2.732e-01, 1.072e-01, 3.488e-02, 1.192e-01, 3.397e-01, 2.329e-01, 4.348e-02, -1.439e-01)); + r += mul(s3_5, M4(-1.008e-01, -5.632e-02, -7.657e-03, -2.069e-02, -1.341e-01, -2.471e-02, 7.401e-02, 4.367e-03, -1.240e-02, 1.633e-01, -1.791e-03, -1.558e-01, -1.800e-01, -2.511e-01, 1.134e-01, -1.670e-02)); + r += mul(s3_6, M4(-3.769e-02, 9.544e-03, 4.993e-02, 3.764e-03, 3.193e-02, -9.158e-02, -9.055e-02, 4.805e-02, 6.795e-02, 5.456e-02, -1.025e-01, 1.730e-02, 1.168e-02, 1.310e-01, -3.507e-02, 4.382e-02)); + r += mul(s3_7, M4(-5.501e-02, -3.820e-02, 5.317e-02, -6.017e-02, 1.382e-01, 1.471e-01, -1.624e-01, 2.640e-02, 2.001e-02, 1.615e-01, -6.277e-02, -9.653e-02, -2.665e-01, 2.116e-01, -1.292e-02, -5.260e-02)); + r += mul(s3_8, M4(4.992e-02, 9.520e-02, -3.869e-02, 7.041e-02, -1.096e-01, -7.984e-02, 6.372e-02, 4.116e-02, -5.481e-02, 6.716e-02, -7.373e-02, 3.300e-02, -1.699e-01, 1.113e-01, -5.838e-02, 2.048e-02)); + r += mul(s4_0, M4(3.478e-03, 1.604e-01, 1.143e-02, 3.950e-02, 2.978e-02, -2.053e-02, 1.382e-02, -3.209e-02, -4.983e-02, -1.550e-01, 1.172e-02, -1.038e-01, 5.117e-02, -4.133e-02, 1.129e-01, 1.274e-02)); + r += mul(s4_1, M4(1.712e-01, -3.888e-02, -1.187e-01, 6.333e-02, 2.066e-02, 1.027e-01, -6.894e-02, -9.356e-02, -1.740e-01, 4.635e-02, 1.101e-01, -9.743e-02, -1.694e-01, -3.618e-02, 1.157e-01, 9.724e-03)); + r += mul(s4_2, M4(1.616e-01, -8.447e-03, -3.057e-02, -5.442e-02, -1.433e-01, -5.447e-02, -4.839e-02, 3.747e-02, -2.974e-02, -2.371e-02, 8.349e-03, -7.386e-02, 2.599e-03, 1.992e-02, 5.651e-02, -9.889e-02)); + r += mul(s4_3, M4(-1.205e-01, 4.766e-02, -1.792e-01, -7.001e-03, -1.023e-01, 2.760e-03, -1.750e-01, 1.432e-02, 2.140e-02, -1.442e-02, -1.203e-02, 5.236e-03, 2.750e-02, 5.747e-02, 4.160e-02, 1.839e-02)); + r += mul(s4_4, M4(1.175e-01, -6.460e-02, -1.678e-01, -1.293e-01, 4.098e-02, -1.212e-01, -9.402e-02, -1.696e-01, -1.637e-01, -2.059e-02, -3.352e-02, 1.008e-01, -2.280e-01, -1.991e-02, 1.460e-01, -5.383e-02)); + r += mul(s4_5, M4(-5.260e-02, -6.286e-02, -7.651e-02, -3.258e-01, -8.880e-02, -6.865e-02, -2.491e-02, 5.650e-02, 3.969e-03, 4.130e-05, 7.006e-02, -8.126e-02, 3.293e-04, 1.805e-02, 1.934e-01, -4.173e-02)); + r += mul(s4_6, M4(2.222e-02, 5.658e-02, -1.776e-03, 3.331e-02, 2.624e-02, 4.201e-02, -1.271e-01, 1.976e-02, -7.570e-02, -5.879e-02, 1.233e-01, -4.147e-02, 5.968e-02, -1.057e-02, -4.891e-02, -6.360e-02)); + r += mul(s4_7, M4(7.232e-02, -1.316e-01, 8.325e-02, -3.296e-02, 1.187e-01, 2.971e-02, 8.751e-02, -3.254e-02, -1.432e-01, -2.923e-02, 1.748e-01, 7.771e-02, 1.595e-01, 1.106e-01, -1.131e-01, 1.774e-02)); + r += mul(s4_8, M4(-9.671e-02, -5.734e-02, -5.968e-02, -3.762e-02, 1.582e-02, 1.257e-02, 3.137e-02, -7.635e-04, -7.236e-02, -3.294e-02, -3.009e-02, -7.280e-02, -7.171e-02, -4.889e-03, -2.667e-02, -4.693e-03)); + r += mul(s5_0, M4(9.525e-03, 2.583e-01, 1.577e-01, -8.960e-02, -1.342e-01, 5.619e-02, 7.415e-02, -5.099e-02, 1.083e-02, -1.041e-02, 7.983e-02, 5.332e-02, -7.776e-03, -6.704e-02, 1.509e-01, 4.181e-02)); + r += mul(s5_1, M4(-2.083e-01, 1.158e-01, -2.234e-02, 7.118e-02, 5.266e-02, 4.810e-02, 9.202e-02, -1.003e-01, -1.734e-01, 4.296e-02, 1.714e-01, -2.401e-03, -1.137e-01, -1.756e-01, 6.021e-02, -2.741e-02)); + r += mul(s5_2, M4(6.815e-02, 5.817e-02, -6.170e-02, -4.732e-02, 3.478e-02, -4.199e-03, -1.851e-02, -3.221e-03, 8.744e-02, 5.604e-02, 5.947e-02, -1.328e-01, 1.943e-02, -8.131e-03, 1.900e-02, -7.607e-02)); + r += mul(s5_3, M4(2.857e-01, 1.539e-01, 1.038e-01, 1.132e-02, 1.329e-02, 2.174e-01, 1.244e-02, 8.200e-02, -4.505e-02, -5.227e-02, 4.619e-04, 6.776e-02, -5.088e-02, -3.256e-02, 6.318e-02, -4.579e-02)); + r += mul(s5_4, M4(-4.660e-02, -2.435e-01, -6.781e-03, -7.809e-02, 1.874e-01, -1.164e-01, -1.782e-01, -2.800e-01, 2.698e-02, 6.071e-02, -5.110e-02, 7.245e-02, 1.245e-02, 1.899e-01, 1.096e-01, -5.294e-02)); + r += mul(s5_5, M4(8.890e-02, 9.728e-02, 4.997e-02, -9.887e-02, 5.615e-02, 2.021e-02, -2.259e-02, -1.068e-01, -3.950e-02, -8.120e-02, -1.665e-02, -4.129e-02, -8.374e-02, -1.258e-01, 1.182e-01, -1.213e-02)); + r += mul(s5_6, M4(-8.262e-03, 1.664e-02, -7.024e-02, -3.985e-02, -2.808e-02, 1.221e-01, -1.890e-01, -1.087e-01, 1.692e-02, -1.162e-02, 2.958e-02, 2.775e-02, -6.859e-02, -1.296e-02, 1.193e-02, 4.396e-03)); + r += mul(s5_7, M4(8.172e-02, 1.117e-01, -1.482e-02, 5.308e-03, -5.901e-03, -5.041e-02, 4.588e-02, -1.821e-02, 8.305e-02, 1.224e-01, -6.272e-03, 1.969e-02, 4.009e-02, -3.353e-02, 6.791e-02, -1.372e-02)); + r += mul(s5_8, M4(-1.109e-01, -1.571e-02, -4.466e-02, 9.630e-02, 7.569e-03, 6.390e-02, 2.270e-02, 7.346e-03, 2.162e-02, 6.602e-02, -7.577e-02, 3.169e-02, 6.660e-03, -7.800e-02, -5.085e-03, 7.740e-02)); + r += mul(s6_0, M4(-1.017e-01, -4.034e-02, 6.708e-02, 7.748e-02, -7.052e-02, -6.251e-02, 6.019e-02, -4.987e-03, 1.404e-01, 1.748e-01, -1.077e-01, 8.488e-02, -5.884e-03, -4.451e-03, 2.979e-02, -3.478e-02)); + r += mul(s6_1, M4(-2.065e-02, 2.125e-01, 1.827e-01, -1.059e-01, 5.760e-02, 7.237e-02, 6.810e-02, 8.762e-03, -8.153e-02, 8.979e-02, -9.872e-03, 7.730e-02, 9.600e-02, -1.581e-01, 9.862e-02, -8.078e-02)); + r += mul(s6_2, M4(1.839e-02, 8.069e-02, -5.765e-02, -3.410e-02, -1.856e-01, 3.381e-02, -4.610e-02, -1.308e-02, 7.696e-02, -3.619e-02, 1.082e-01, -1.086e-01, 8.181e-02, -3.576e-02, 8.041e-03, -1.460e-01)); + r += mul(s6_3, M4(-1.003e-01, 6.476e-02, 4.396e-02, -1.430e-02, -1.837e-02, 1.834e-01, 1.998e-02, 6.159e-02, -9.878e-02, 1.229e-01, -9.909e-02, -8.428e-02, 2.370e-02, -1.355e-01, 1.162e-02, -9.837e-03)); + r += mul(s6_4, M4(1.037e-01, 1.753e-01, -1.921e-01, -6.578e-02, 6.742e-02, 3.040e-02, 1.255e-01, 6.624e-02, 8.632e-02, 1.224e-02, -7.217e-03, 9.856e-02, 1.402e-01, 1.209e-01, 1.964e-01, -2.759e-02)); + r += mul(s6_5, M4(3.978e-02, 2.199e-02, -2.567e-02, -1.525e-01, -1.993e-01, 9.225e-02, -8.903e-02, -1.027e-01, 1.432e-01, 7.147e-02, 3.857e-03, -6.097e-02, 1.333e-01, -4.337e-02, -7.006e-02, -2.442e-01)); + r += mul(s6_6, M4(-1.520e-01, -1.692e-02, 1.714e-02, 5.022e-02, -9.391e-02, 5.515e-03, 4.718e-02, -1.328e-02, -2.968e-02, -6.351e-02, -5.077e-02, 7.981e-02, 5.259e-02, -1.407e-02, 3.420e-02, -9.187e-02)); + r += mul(s6_7, M4(4.626e-03, 7.643e-03, 7.219e-02, -4.271e-02, 7.871e-02, 8.244e-03, 2.424e-01, 9.938e-02, 1.456e-01, -4.297e-03, -7.914e-02, -4.582e-02, 1.354e-01, -1.694e-02, -8.773e-02, 1.764e-02)); + r += mul(s6_8, M4(-4.360e-02, 6.883e-02, -3.494e-02, -3.672e-02, -1.503e-01, 5.198e-02, -1.430e-02, 2.389e-02, -7.594e-02, -1.904e-02, 5.872e-02, 3.285e-02, 1.975e-01, 1.726e-01, -9.447e-02, -9.414e-02)); + r += mul(s7_0, M4(3.853e-02, -1.717e-01, 2.571e-02, -4.859e-03, 4.389e-02, -1.690e-01, 1.320e-02, -3.390e-02, -1.307e-01, 5.201e-02, 2.112e-01, -5.495e-02, -3.004e-02, 5.965e-02, -5.489e-03, 4.998e-02)); + r += mul(s7_1, M4(-1.303e-03, 5.981e-03, 4.903e-02, 7.560e-03, 1.147e-01, -3.206e-02, -7.700e-02, -9.505e-02, 6.375e-02, 1.431e-01, 2.929e-01, -1.937e-01, -4.918e-02, 3.872e-02, 1.358e-01, 4.230e-02)); + r += mul(s7_2, M4(-1.391e-01, -6.461e-02, -4.030e-02, -3.580e-02, 1.159e-03, -8.988e-02, -1.543e-02, -1.233e-01, 1.509e-01, -5.529e-02, 1.104e-01, 1.352e-01, -5.030e-02, 5.567e-02, 1.109e-01, -2.720e-03)); + r += mul(s7_3, M4(-2.481e-02, -1.004e-01, 1.421e-02, -5.236e-03, -8.489e-03, -6.281e-02, -1.650e-02, 1.920e-02, 1.945e-02, -5.418e-02, -6.112e-02, -7.400e-02, -7.176e-02, 2.431e-02, -3.867e-02, -7.052e-03)); + r += mul(s7_4, M4(-4.605e-02, -1.893e-02, -2.272e-01, -6.262e-02, 1.103e-01, -2.067e-01, 9.526e-02, 1.047e-01, -1.670e-01, -1.132e-01, 1.711e-01, 7.622e-03, -7.388e-02, 1.010e-02, -1.279e-02, 4.707e-02)); + r += mul(s7_5, M4(3.021e-02, 8.185e-02, -1.230e-01, 2.231e-01, -3.762e-02, -2.493e-01, -2.623e-02, -2.778e-01, 6.918e-02, 1.902e-02, -9.916e-02, -2.807e-01, -4.177e-02, 3.213e-02, -5.395e-02, 5.346e-02)); + r += mul(s7_6, M4(-3.677e-02, 8.425e-03, 1.583e-02, -7.588e-03, 3.335e-02, 4.774e-02, -1.190e-02, 1.936e-02, -5.956e-02, -2.931e-02, -6.092e-02, 6.113e-02, 8.702e-03, -2.472e-02, -7.609e-03, -7.800e-02)); + r += mul(s7_7, M4(-1.198e-01, 6.918e-02, -2.854e-02, 2.875e-02, 8.723e-02, -1.937e-01, 2.986e-02, 1.310e-01, -6.265e-02, -1.409e-01, -1.194e-01, 2.090e-01, -1.643e-02, -1.501e-01, 4.103e-02, -1.420e-02)); + r += mul(s7_8, M4(2.341e-02, 8.679e-02, 1.332e-02, 7.991e-02, 1.440e-01, -8.666e-02, 5.631e-02, -8.778e-02, 1.794e-01, 4.280e-02, -1.685e-02, -1.721e-01, -4.744e-02, 4.625e-02, -3.583e-02, -2.333e-02)); + r += V4(2.939e-02, 1.703e-02, -4.544e-03, -7.752e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(6.320e-02, -7.718e-02, 9.506e-02, 7.016e-03, -6.464e-02, 2.906e-03, -1.199e-01, 9.856e-02, -8.741e-02, 3.326e-02, 1.542e-02, -5.129e-02, 1.470e-01, -7.230e-02, -6.427e-02, 3.123e-02)); + r += mul(s0_1, M4(-8.588e-02, -2.614e-02, 8.184e-02, 7.694e-02, -5.604e-02, 5.216e-02, 1.357e-01, 2.437e-01, -1.033e-01, 7.084e-02, 4.441e-02, 4.397e-02, -3.856e-02, -7.256e-02, 2.395e-02, 4.535e-03)); + r += mul(s0_2, M4(-4.604e-02, 1.596e-02, 1.392e-01, 3.868e-02, 5.121e-02, 2.543e-01, 7.318e-02, 7.010e-02, 3.419e-02, 3.113e-02, -1.747e-02, -2.473e-02, -2.759e-02, -9.824e-02, -7.014e-02, -6.002e-02)); + r += mul(s0_3, M4(-3.614e-02, -1.616e-01, -5.425e-02, 8.711e-02, -7.508e-02, 1.317e-01, 3.801e-02, 3.961e-02, -2.620e-03, -1.130e-02, -2.620e-02, 5.976e-02, -7.145e-03, 1.283e-01, -7.578e-02, -7.914e-02)); + r += mul(s0_4, M4(4.839e-02, -1.199e-02, -3.252e-02, 5.731e-02, -1.909e-01, 1.175e-01, -1.192e-01, 1.086e-01, -9.094e-02, 1.143e-01, -3.654e-02, -3.028e-02, -8.647e-02, -6.826e-02, -1.610e-01, -3.038e-03)); + r += mul(s0_5, M4(-1.978e-02, -1.834e-02, -4.260e-02, -1.173e-01, 1.760e-01, 1.418e-01, 4.347e-02, -2.425e-01, 3.787e-02, -2.291e-02, 5.158e-02, 6.051e-03, 2.596e-02, 7.399e-02, 1.628e-02, 7.668e-02)); + r += mul(s0_6, M4(-1.158e-02, -3.459e-02, -7.090e-03, 6.518e-02, 1.804e-02, -4.394e-02, -1.789e-02, 5.996e-03, -6.698e-03, 4.131e-02, 1.149e-01, -8.381e-02, 7.473e-02, 1.978e-03, 9.873e-02, 8.531e-03)); + r += mul(s0_7, M4(3.496e-02, 8.744e-02, 4.371e-02, -1.088e-01, -3.036e-02, -5.697e-02, 1.940e-02, -6.022e-03, 6.581e-03, 3.124e-03, 1.142e-02, -8.218e-02, 3.751e-02, -1.171e-01, -7.349e-03, -1.270e-01)); + r += mul(s0_8, M4(4.895e-02, 2.241e-02, 5.956e-03, -3.883e-02, -7.192e-02, 1.757e-01, 4.466e-02, 4.012e-02, -3.673e-02, -1.307e-02, -2.441e-02, -9.896e-02, -2.868e-02, -1.131e-02, 4.646e-02, 9.386e-02)); + r += mul(s1_0, M4(-1.430e-03, 3.561e-02, -3.690e-01, 1.564e-01, 3.685e-02, 2.425e-03, -1.631e-01, -4.238e-02, -2.123e-01, -5.178e-02, 9.378e-02, -7.436e-02, 6.296e-02, -6.532e-02, -2.221e-01, 6.686e-01)); + r += mul(s1_1, M4(-9.776e-03, -4.756e-02, -5.781e-02, 2.344e-01, -4.454e-02, -1.328e-01, -4.029e-02, 1.455e-01, -2.053e-01, 1.001e-02, 1.591e-01, -2.491e-01, -2.265e-01, -2.036e-01, -3.604e-02, -6.287e-03)); + r += mul(s1_2, M4(1.567e-01, 1.016e-01, -1.516e-01, -7.148e-02, 2.769e-02, 5.141e-02, 2.809e-02, -5.084e-02, -1.296e-01, 3.819e-02, 3.261e-01, -2.187e-01, 1.355e-02, -2.216e-01, 4.251e-03, 2.603e-02)); + r += mul(s1_3, M4(-5.761e-02, 1.639e-01, 1.786e-01, -3.273e-01, 9.615e-02, 6.864e-02, 9.650e-02, -4.877e-02, -4.408e-02, 7.343e-03, 7.796e-02, -2.653e-01, 1.835e-01, 1.345e-01, 9.666e-02, 2.262e-01)); + r += mul(s1_4, M4(-3.391e-01, 4.119e-02, 2.529e-01, -2.811e-01, -2.786e-03, -1.297e-01, -8.726e-02, 7.554e-02, -3.214e-01, -4.229e-02, 6.652e-02, -3.961e-01, -5.312e-02, -1.620e-01, 1.159e-02, 2.044e-01)); + r += mul(s1_5, M4(-8.320e-02, 1.444e-01, 1.536e-01, -3.765e-01, 2.449e-02, -3.528e-02, -1.436e-01, -9.440e-02, -2.857e-01, -3.807e-01, 1.571e-01, -1.162e-01, 2.310e-02, 1.582e-01, 1.143e-01, -1.543e-02)); + r += mul(s1_6, M4(-5.282e-02, 1.744e-01, 4.193e-02, 2.067e-01, -1.906e-02, 6.041e-02, 4.649e-02, -4.463e-03, -6.232e-02, 4.702e-02, 1.993e-01, -3.678e-01, 2.422e-01, 1.648e-01, 1.056e-01, 3.540e-01)); + r += mul(s1_7, M4(-2.235e-01, 5.221e-01, 1.819e-01, 5.619e-02, -1.677e-02, -1.433e-01, 1.799e-02, 1.457e-02, -1.292e-01, 1.292e-01, 2.298e-01, -3.856e-01, 1.253e-01, -2.538e-01, -2.143e-01, -3.131e-01)); + r += mul(s1_8, M4(-1.091e-01, 6.784e-02, -2.271e-01, -5.712e-02, -8.358e-02, -2.816e-02, -1.511e-02, -9.424e-02, -4.637e-02, -1.751e-01, 1.128e-01, -1.316e-01, 3.199e-03, 3.003e-02, -2.507e-03, 3.708e-02)); + r += mul(s2_0, M4(7.022e-02, 9.826e-03, 1.264e-01, -6.655e-02, -9.567e-02, 7.824e-02, -1.140e-01, -6.869e-02, -4.578e-02, -4.380e-02, 8.145e-02, 1.228e-02, 2.101e-02, -2.336e-02, 4.873e-02, -4.936e-02)); + r += mul(s2_1, M4(9.575e-03, 5.262e-02, -9.150e-02, 4.189e-02, -7.853e-02, 2.990e-02, -4.065e-03, -1.226e-01, -3.636e-04, 1.089e-02, 9.353e-02, 1.230e-02, 6.342e-03, -4.481e-02, -3.678e-02, -3.514e-03)); + r += mul(s2_2, M4(1.176e-01, -1.579e-01, 4.680e-02, 2.423e-02, -1.349e-01, -2.077e-01, 3.620e-02, -1.201e-01, -2.024e-02, -6.755e-03, 2.079e-02, 4.859e-02, 1.734e-01, 8.149e-03, 1.137e-01, 2.593e-03)); + r += mul(s2_3, M4(1.332e-02, -1.541e-01, -5.517e-02, -2.061e-02, 7.641e-02, -4.840e-02, -1.162e-01, -3.291e-01, -1.245e-01, 3.492e-02, -2.600e-02, 1.613e-01, 9.448e-02, 1.412e-01, 2.535e-02, -1.406e-01)); + r += mul(s2_4, M4(2.151e-01, -1.365e-02, 1.180e-02, 2.581e-02, 6.572e-02, -1.372e-02, -1.722e-01, -1.926e-01, 4.826e-02, -1.078e-01, -1.183e-01, -7.610e-02, -7.766e-03, 4.998e-02, -1.190e-02, -1.360e-02)); + r += mul(s2_5, M4(2.103e-01, -1.463e-02, -1.856e-01, 2.490e-02, 2.971e-02, -1.128e-01, -2.542e-01, -1.356e-01, -8.917e-03, 6.293e-02, -1.829e-02, -5.433e-02, 8.434e-02, -1.365e-01, -2.178e-01, -9.137e-02)); + r += mul(s2_6, M4(3.370e-01, -2.690e-01, -1.512e-02, -1.231e-01, -5.368e-02, -6.936e-02, 5.311e-02, -3.567e-02, 7.326e-02, -7.379e-02, 3.780e-03, -4.926e-03, -5.118e-03, 4.603e-02, 1.331e-02, 7.783e-02)); + r += mul(s2_7, M4(1.102e-01, -4.355e-02, -1.842e-01, -4.951e-02, -2.790e-02, -2.210e-01, -4.385e-02, -1.147e-01, 3.262e-02, 2.523e-02, -3.362e-02, 5.086e-02, -7.375e-02, 5.865e-03, 7.601e-02, 1.326e-01)); + r += mul(s2_8, M4(2.669e-01, -2.111e-01, 7.244e-03, -7.698e-02, -2.844e-02, -4.207e-02, -3.568e-03, -6.819e-02, 3.351e-02, 2.784e-03, 2.415e-04, -2.362e-02, -2.661e-02, 2.854e-02, -2.674e-02, -6.056e-02)); + r += mul(s3_0, M4(3.955e-02, 2.142e-02, -2.566e-02, 1.080e-02, 4.278e-02, -7.498e-02, 3.332e-02, 1.456e-02, -3.917e-03, -3.820e-02, -1.247e-01, -6.262e-02, -2.027e-02, -1.745e-01, -1.151e-01, -1.639e-02)); + r += mul(s3_1, M4(5.990e-02, 4.835e-02, -1.038e-01, 7.007e-02, -2.788e-02, 2.322e-03, 1.116e-01, -9.015e-02, 4.873e-02, -7.391e-02, 1.696e-02, 4.602e-02, -2.696e-01, -1.999e-01, -1.103e-01, -1.714e-02)); + r += mul(s3_2, M4(3.210e-02, -8.543e-02, -8.054e-02, -1.211e-02, 1.079e-02, 3.980e-02, 6.774e-02, -3.586e-02, -5.813e-02, -9.956e-02, -1.907e-02, -8.042e-02, -1.100e-01, 1.353e-01, -4.048e-02, 2.285e-02)); + r += mul(s3_3, M4(-2.530e-02, -3.663e-02, 5.284e-02, -7.050e-02, -3.852e-02, 6.278e-03, -7.247e-02, -2.077e-02, 7.670e-04, 4.137e-02, 4.632e-02, 4.830e-02, 2.586e-01, 6.367e-02, -6.495e-02, -3.022e-02)); + r += mul(s3_4, M4(-2.026e-01, 1.004e-01, 2.051e-01, 1.364e-02, 1.316e-01, 9.292e-02, -5.945e-02, 1.091e-01, 1.033e-01, -1.929e-01, -1.421e-01, -5.142e-02, -1.676e-02, -2.549e-01, 2.412e-01, -3.018e-01)); + r += mul(s3_5, M4(-3.704e-02, -6.964e-02, 6.598e-03, 8.814e-02, 7.165e-02, -4.431e-02, -1.288e-01, -5.984e-02, 3.779e-02, -1.225e-01, -3.971e-03, 5.088e-02, -5.586e-02, -7.200e-02, 2.022e-02, -1.665e-01)); + r += mul(s3_6, M4(-1.237e-01, -1.838e-01, 1.462e-02, -7.791e-02, 1.130e-01, 6.404e-02, 5.346e-02, -4.299e-04, 1.870e-02, 4.440e-02, 3.492e-03, 1.038e-01, -6.858e-02, -2.071e-02, -2.566e-02, 4.679e-02)); + r += mul(s3_7, M4(-7.821e-02, 9.331e-02, -6.336e-02, 8.823e-02, -4.691e-03, -1.123e-02, -4.472e-02, 1.027e-01, -5.471e-02, 8.144e-02, 1.998e-02, 1.143e-01, -2.183e-01, 7.181e-02, -3.760e-02, -8.993e-02)); + r += mul(s3_8, M4(5.917e-02, -2.845e-02, -2.938e-02, -3.687e-02, -1.116e-01, -1.184e-01, 1.099e-01, 1.020e-01, 2.030e-03, 1.817e-03, -3.595e-02, 4.143e-04, -5.942e-02, 3.027e-02, -4.019e-02, 9.432e-02)); + r += mul(s4_0, M4(-4.261e-02, 1.766e-02, 9.293e-02, -5.081e-02, 9.748e-02, -6.530e-02, -1.216e-01, 1.010e-02, -2.167e-01, -7.010e-03, 2.974e-02, -4.901e-02, -2.646e-03, 1.033e-02, -6.713e-02, -1.442e-01)); + r += mul(s4_1, M4(7.373e-02, -6.465e-02, -7.503e-02, -7.127e-02, -1.255e-01, 1.340e-02, -1.245e-01, 3.496e-02, -1.546e-01, -3.533e-02, -5.367e-02, -8.579e-03, 1.241e-01, 1.731e-02, -1.479e-02, -2.206e-02)); + r += mul(s4_2, M4(5.265e-02, -7.941e-02, 5.017e-02, -2.362e-02, 2.740e-02, 6.718e-02, 1.580e-02, -4.146e-02, -5.366e-02, -3.145e-02, 1.035e-01, 2.084e-02, 1.680e-02, -9.646e-02, -1.065e-01, -1.834e-02)); + r += mul(s4_3, M4(8.164e-02, 7.382e-03, 5.676e-02, 7.940e-02, 8.327e-02, 7.134e-02, 1.251e-01, -1.103e-02, -1.549e-01, -6.383e-02, 1.144e-02, -7.174e-02, 3.976e-02, 8.730e-02, -1.003e-01, -5.632e-02)); + r += mul(s4_4, M4(1.269e-01, -1.364e-01, 2.985e-01, 4.161e-02, 3.140e-03, 9.006e-02, -1.225e-02, 2.627e-01, -3.744e-01, 4.081e-02, 9.455e-02, 3.477e-02, -9.757e-03, 8.997e-03, -2.210e-01, 2.177e-01)); + r += mul(s4_5, M4(2.005e-02, 7.934e-02, -8.341e-03, 5.906e-02, 4.050e-02, 8.305e-02, -6.599e-02, 4.685e-02, -3.776e-02, -1.510e-01, 3.013e-02, -1.222e-02, -1.127e-01, 1.073e-02, -1.633e-02, -1.180e-01)); + r += mul(s4_6, M4(-3.381e-02, -4.626e-02, 3.144e-02, -6.961e-02, 9.662e-02, -1.304e-02, -4.102e-02, 6.509e-02, -1.617e-01, -1.666e-02, 2.633e-02, 1.079e-01, 1.275e-01, 2.305e-01, 1.164e-01, 5.076e-02)); + r += mul(s4_7, M4(8.643e-02, -1.511e-01, -6.230e-02, -6.197e-02, -6.672e-02, 2.599e-02, -7.017e-02, 5.437e-02, -1.406e-01, 3.464e-02, -8.536e-02, -5.700e-02, -8.383e-02, 1.391e-01, 3.891e-01, 8.632e-02)); + r += mul(s4_8, M4(4.181e-02, -4.849e-02, -7.718e-02, 7.518e-02, 1.577e-03, -1.840e-02, -9.197e-03, -4.337e-02, -1.126e-01, 5.666e-02, 1.596e-03, -1.028e-01, -1.218e-01, 6.298e-02, 1.049e-01, 8.461e-02)); + r += mul(s5_0, M4(-1.387e-01, 4.819e-03, 3.064e-02, -1.743e-01, 1.076e-01, -4.823e-02, -4.510e-02, 4.724e-02, -9.852e-03, 2.309e-03, -2.033e-02, -1.944e-02, 2.860e-03, 2.271e-02, -4.729e-02, -9.371e-02)); + r += mul(s5_1, M4(-7.357e-02, -5.027e-02, -1.819e-01, -5.641e-02, 8.112e-02, -4.896e-02, -9.809e-02, 5.778e-03, 4.078e-02, -6.831e-02, -1.898e-01, -1.351e-02, 1.226e-01, 6.178e-02, -4.512e-02, -2.158e-02)); + r += mul(s5_2, M4(5.850e-02, -5.272e-02, 1.044e-01, -7.582e-02, 1.749e-02, 4.037e-02, 6.002e-02, -6.747e-03, 1.003e-01, -9.924e-02, -9.152e-02, 5.488e-03, -1.101e-02, -7.243e-02, -1.296e-01, 1.558e-02)); + r += mul(s5_3, M4(6.173e-02, 1.755e-01, -7.152e-02, -3.348e-01, 3.104e-01, -6.098e-02, 2.352e-01, -3.734e-01, 5.136e-02, 4.157e-02, 5.028e-02, -1.600e-01, -1.269e-01, 4.136e-02, -1.563e-02, -6.619e-02)); + r += mul(s5_4, M4(-1.144e-02, 2.076e-01, 6.527e-02, -1.160e-01, -1.525e-01, 8.878e-02, 2.254e-01, 1.486e-01, -1.173e-01, -2.311e-02, 3.828e-02, -3.514e-02, -1.886e-01, 1.043e-01, -1.510e-01, 7.074e-02)); + r += mul(s5_5, M4(2.749e-02, -6.254e-02, -1.639e-01, -2.591e-01, 7.234e-02, -1.603e-03, -9.239e-02, 4.716e-02, 2.100e-02, -1.405e-01, 1.463e-01, 8.715e-02, -1.764e-01, 1.204e-01, 3.718e-02, 3.813e-02)); + r += mul(s5_6, M4(2.766e-02, 1.623e-01, 1.378e-01, -1.562e-01, 1.616e-01, 8.169e-02, 5.208e-02, -1.288e-01, 6.320e-02, 9.176e-02, 1.180e-02, 1.137e-01, 8.061e-02, -1.175e-01, 6.469e-02, -1.322e-02)); + r += mul(s5_7, M4(-1.880e-01, 2.300e-02, 1.062e-01, 1.907e-02, -9.183e-02, -8.551e-02, 9.594e-02, 1.451e-01, 1.138e-01, 1.241e-01, -7.151e-02, 9.592e-02, 5.135e-02, -8.747e-02, -8.409e-02, -6.575e-02)); + r += mul(s5_8, M4(-2.020e-01, -1.719e-01, -6.394e-02, 1.819e-02, 8.297e-02, -9.173e-02, 5.302e-02, -9.164e-02, 1.000e-01, -1.038e-01, -8.130e-02, 1.412e-01, -1.411e-01, 1.092e-01, 2.095e-02, 9.288e-02)); + r += mul(s6_0, M4(-4.180e-02, -5.188e-02, 9.579e-02, -2.742e-02, -9.921e-04, 1.066e-02, 9.985e-02, -4.637e-02, 6.303e-02, -1.439e-02, 4.576e-02, -2.195e-02, 4.854e-02, -1.255e-01, -8.582e-03, 6.013e-02)); + r += mul(s6_1, M4(-3.422e-02, 2.045e-02, -4.093e-02, -1.336e-01, -8.530e-02, 5.189e-02, 1.427e-01, -1.012e-01, 1.486e-01, -1.828e-02, -9.215e-02, 2.158e-02, 1.942e-02, -1.541e-02, -1.068e-01, -3.169e-02)); + r += mul(s6_2, M4(-3.700e-02, -1.217e-01, 1.191e-01, -4.359e-03, -2.485e-02, 1.825e-02, 1.838e-01, -3.668e-02, 2.293e-02, 3.977e-02, -8.759e-02, 4.553e-03, -6.276e-02, -2.628e-02, -3.391e-02, 1.566e-04)); + r += mul(s6_3, M4(-1.024e-02, -2.777e-02, 4.128e-02, -6.782e-02, -3.726e-03, -5.963e-02, -9.420e-03, -9.402e-02, -8.151e-02, -9.414e-02, -3.991e-02, 7.724e-02, -7.746e-02, -7.503e-02, 7.464e-02, -8.905e-02)); + r += mul(s6_4, M4(-2.709e-01, 2.423e-01, 2.496e-01, 2.597e-03, -1.579e-01, 2.001e-01, -1.406e-01, -7.105e-02, 3.040e-01, -3.878e-02, -1.402e-01, -6.004e-02, -8.634e-02, -1.154e-01, 7.337e-02, -5.099e-02)); + r += mul(s6_5, M4(8.580e-02, -1.532e-01, 5.976e-02, 2.598e-01, -1.812e-02, 2.478e-01, -1.218e-02, 1.008e-01, -3.848e-02, -3.495e-02, 1.092e-03, -1.450e-01, -9.653e-02, -9.008e-02, 4.896e-02, 7.095e-02)); + r += mul(s6_6, M4(-1.837e-01, -1.233e-01, -1.180e-02, -1.169e-02, -1.061e-01, -2.287e-02, -8.552e-03, -5.326e-02, 7.852e-02, 7.589e-03, 2.108e-02, -4.405e-03, 3.078e-02, -9.147e-02, -1.023e-01, -1.030e-02)); + r += mul(s6_7, M4(1.237e-01, -2.445e-01, -2.362e-01, -2.529e-02, -7.137e-02, -5.691e-04, -7.231e-02, -1.206e-01, -5.707e-02, 8.429e-02, -2.818e-02, -5.455e-03, -1.368e-01, 1.412e-01, 1.685e-01, 1.579e-01)); + r += mul(s6_8, M4(8.906e-02, -7.479e-03, -4.303e-02, 4.799e-02, -8.465e-02, 7.594e-02, -1.030e-02, -1.335e-01, -8.414e-03, -4.142e-02, -7.459e-03, 3.904e-02, -1.490e-01, -8.325e-04, 6.042e-02, 1.324e-01)); + r += mul(s7_0, M4(4.533e-02, -5.916e-02, -9.127e-02, -4.446e-02, 8.467e-02, -1.642e-02, 9.352e-02, 3.970e-03, -1.636e-01, -1.423e-01, 4.556e-03, -1.125e-01, -4.726e-02, 8.311e-02, 5.499e-02, 3.963e-02)); + r += mul(s7_1, M4(4.547e-02, 3.472e-03, -8.499e-02, 6.881e-02, 8.495e-02, -8.364e-02, -4.263e-02, 1.058e-01, -9.016e-02, -7.198e-02, -1.056e-01, -6.057e-03, -4.490e-02, 1.829e-02, 4.157e-02, -8.081e-02)); + r += mul(s7_2, M4(-8.375e-02, -1.000e-02, -1.144e-02, 5.829e-02, 5.197e-02, -1.701e-01, -2.617e-02, -3.956e-02, 2.774e-03, -6.882e-02, -1.381e-01, -2.288e-01, -6.002e-02, 3.220e-02, -1.218e-03, 6.675e-02)); + r += mul(s7_3, M4(5.483e-02, -1.547e-02, -6.716e-03, -1.510e-01, 7.996e-02, -1.777e-02, 4.348e-02, 8.002e-02, 1.035e-01, -9.894e-03, -2.000e-02, 1.043e-01, -9.879e-02, -1.135e-01, -2.837e-02, 1.051e-01)); + r += mul(s7_4, M4(-2.424e-01, 2.243e-01, -3.853e-02, 2.895e-01, 2.039e-01, -1.486e-01, -3.057e-02, -6.787e-02, 1.795e-02, -1.118e-01, -2.165e-01, -4.785e-02, 5.191e-02, 1.092e-01, -1.205e-01, 1.643e-01)); + r += mul(s7_5, M4(2.205e-01, -2.171e-02, 4.996e-02, 9.916e-02, 5.749e-02, 3.081e-02, 8.032e-02, 1.415e-01, -2.332e-01, 1.230e-02, -1.489e-02, 2.408e-02, 4.951e-02, -7.304e-02, 3.857e-02, 3.996e-03)); + r += mul(s7_6, M4(-6.612e-02, 1.535e-02, -8.325e-02, 9.137e-02, 2.110e-02, 2.828e-02, -3.476e-02, -3.593e-02, 6.483e-03, -6.664e-02, 1.464e-01, 3.244e-03, -4.637e-02, -3.400e-02, 8.721e-03, -2.291e-02)); + r += mul(s7_7, M4(1.081e-01, -8.187e-03, -5.687e-02, 5.623e-02, 9.695e-02, 9.866e-03, -3.664e-02, -7.675e-03, 2.581e-02, 7.745e-02, 5.536e-02, -7.075e-02, -1.414e-01, -5.800e-02, -1.569e-02, 4.333e-02)); + r += mul(s7_8, M4(6.782e-02, 2.670e-02, -1.368e-02, -1.088e-01, -6.193e-02, -5.791e-02, 8.965e-02, -6.801e-03, 2.512e-02, -3.296e-02, 1.083e-01, 3.109e-02, 2.950e-02, 2.437e-02, -2.642e-02, 8.794e-03)); + r += V4(-1.256e-03, -1.254e-02, 2.979e-02, 1.394e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.887e-02, 3.025e-02, 1.517e-03, -3.288e-02, -1.083e-01, -4.845e-02, 2.170e-02, 4.413e-02, 2.451e-02, -6.292e-02, -2.513e-02, 5.728e-02, 3.625e-02, 7.753e-02, 7.297e-02, -5.741e-02)); + r += mul(s0_1, M4(-1.728e-02, -5.232e-02, -8.858e-02, -2.687e-02, -1.604e-02, -6.041e-02, -1.106e-01, 7.307e-02, -3.834e-02, 1.996e-02, -1.364e-02, -1.310e-02, -1.076e-01, -1.056e-01, -1.804e-02, -1.684e-02)); + r += mul(s0_2, M4(1.105e-01, -2.900e-02, 5.276e-02, -5.441e-03, -7.289e-02, 1.312e-01, 1.417e-01, -7.838e-03, -9.457e-02, 3.891e-04, 6.369e-02, -1.775e-02, 9.250e-02, 1.866e-02, -9.643e-02, 4.267e-02)); + r += mul(s0_3, M4(-1.042e-01, 4.142e-02, -1.478e-01, -4.082e-02, -2.004e-01, -4.275e-02, 5.319e-02, 3.056e-02, -9.493e-03, 9.243e-03, -8.440e-02, -1.425e-02, -2.520e-02, -5.825e-02, 5.914e-02, -2.031e-02)); + r += mul(s0_4, M4(-1.251e-01, -1.828e-01, -3.615e-02, 9.177e-02, -2.178e-01, 1.441e-01, -1.189e-01, -4.995e-02, -6.180e-02, 2.246e-02, 2.158e-02, 2.159e-03, -1.302e-01, -7.711e-02, -1.093e-01, -1.144e-02)); + r += mul(s0_5, M4(1.565e-02, 2.006e-02, -2.840e-02, 4.644e-02, -1.280e-01, 7.514e-02, 6.596e-02, -1.821e-01, 3.658e-02, 8.469e-03, 5.564e-02, -5.136e-02, 2.397e-02, -1.296e-02, -4.520e-02, -1.471e-02)); + r += mul(s0_6, M4(-3.172e-02, 4.281e-02, 5.391e-04, -7.864e-02, -1.542e-02, 1.393e-02, 8.824e-02, -5.775e-02, -1.202e-02, 6.912e-02, 1.232e-02, 7.675e-03, -4.605e-03, 7.013e-03, -1.183e-02, -1.676e-02)); + r += mul(s0_7, M4(-1.415e-01, -5.271e-02, -5.525e-02, 6.507e-02, -3.757e-02, 3.016e-02, -1.386e-02, -4.343e-02, 1.544e-03, 1.466e-02, -1.114e-01, 7.776e-02, -2.544e-03, 1.628e-02, 1.985e-02, 5.872e-02)); + r += mul(s0_8, M4(3.167e-02, -7.367e-02, -4.462e-02, 3.674e-02, 5.715e-02, -3.717e-02, -5.898e-02, 2.154e-02, 1.026e-01, -8.293e-03, -4.104e-02, 9.261e-04, 2.202e-02, 2.020e-02, -7.776e-03, -8.210e-03)); + r += mul(s1_0, M4(-2.467e-01, -1.944e-02, 8.368e-02, 7.498e-02, 7.499e-02, 2.208e-03, 9.049e-02, -8.912e-02, -8.240e-02, 6.174e-02, 3.425e-02, -4.272e-02, -1.418e-01, 2.797e-01, 2.659e-01, -5.863e-02)); + r += mul(s1_1, M4(6.391e-02, -6.175e-02, -1.784e-02, -4.688e-03, 1.535e-01, -3.568e-02, 3.525e-02, 2.541e-02, -1.344e-01, 3.259e-01, -3.196e-02, -6.148e-02, -3.933e-02, 1.929e-01, -1.274e-02, -2.005e-01)); + r += mul(s1_2, M4(-7.743e-02, 1.428e-01, 1.349e-01, -5.239e-02, 1.095e-01, 4.970e-02, -1.311e-02, -1.640e-03, 1.850e-02, 1.172e-01, -2.602e-02, -4.640e-02, -1.370e-01, 8.976e-02, -8.676e-02, -7.201e-02)); + r += mul(s1_3, M4(-1.673e-01, 1.323e-01, 1.950e-02, -1.240e-02, 8.475e-02, 2.113e-02, 8.506e-02, -5.274e-02, 1.806e-01, 1.457e-01, 2.915e-01, -1.440e-01, -1.149e-01, -1.061e-01, -2.980e-02, 6.842e-02)); + r += mul(s1_4, M4(-2.799e-01, 2.112e-02, -2.612e-01, -1.700e-01, 3.217e-02, 1.276e-01, -9.203e-02, 2.761e-02, -2.181e-01, 5.061e-01, 3.991e-01, -1.106e-01, -2.741e-02, -2.481e-01, 2.170e-01, -2.852e-01)); + r += mul(s1_5, M4(3.487e-01, 7.214e-02, 4.494e-02, 5.371e-02, 8.193e-02, 9.480e-02, -1.073e-01, 2.985e-02, 1.997e-02, 7.729e-02, 9.772e-02, -7.332e-02, -2.498e-01, 6.762e-02, 2.293e-02, -2.888e-01)); + r += mul(s1_6, M4(4.480e-02, 1.056e-01, 1.198e-01, -1.012e-01, 1.828e-01, -7.560e-02, 3.066e-02, -6.429e-02, 8.751e-02, 3.170e-02, 7.807e-02, -3.122e-02, 9.162e-02, -1.106e-02, -1.311e-02, 9.599e-02)); + r += mul(s1_7, M4(3.818e-02, -1.433e-01, 3.782e-01, -2.188e-02, 3.919e-02, -8.324e-02, -3.761e-02, 1.588e-02, -1.014e-01, 1.850e-01, 2.286e-02, 3.918e-02, 1.407e-01, -6.814e-02, 1.631e-01, 1.562e-02)); + r += mul(s1_8, M4(-4.358e-04, -9.482e-02, 2.193e-01, -2.293e-01, 2.554e-01, -9.018e-02, -8.484e-02, 5.927e-02, 7.714e-02, 1.900e-01, 3.396e-02, 1.056e-02, -7.022e-02, 1.926e-02, 4.448e-02, 4.663e-02)); + r += mul(s2_0, M4(1.784e-02, -7.199e-02, -5.824e-02, 6.873e-02, -3.191e-04, -9.134e-02, 8.370e-02, -3.068e-02, -5.419e-02, -6.458e-02, 6.291e-03, 3.031e-02, -4.128e-02, 5.380e-02, 4.977e-02, -7.324e-03)); + r += mul(s2_1, M4(1.181e-02, -1.516e-01, -9.786e-02, 7.405e-02, 1.638e-01, -4.243e-02, 1.167e-01, 5.082e-02, 1.001e-01, 1.146e-01, -1.138e-01, 3.022e-02, -4.277e-02, 1.614e-02, 9.942e-02, -7.122e-03)); + r += mul(s2_2, M4(1.158e-01, -1.022e-01, -2.080e-02, -7.057e-02, 5.919e-02, -1.602e-01, -5.761e-02, 6.987e-02, 1.531e-01, -6.507e-02, -8.643e-02, 9.661e-02, -4.889e-02, -4.945e-03, -1.143e-02, -2.502e-02)); + r += mul(s2_3, M4(-1.857e-01, 1.340e-01, -2.733e-01, 1.031e-01, -4.808e-02, 1.324e-01, 4.173e-02, 3.009e-02, -1.060e-01, -2.040e-02, 9.355e-03, 6.253e-02, 5.537e-02, 8.865e-02, 1.822e-01, -1.006e-01)); + r += mul(s2_4, M4(-9.784e-02, 2.023e-02, -1.142e-01, -2.146e-01, -3.103e-02, 1.708e-02, 1.423e-01, 3.270e-02, -1.197e-01, 2.456e-01, -1.454e-01, 3.783e-01, 1.439e-02, -4.989e-03, 8.481e-02, 5.491e-03)); + r += mul(s2_5, M4(1.557e-01, -1.260e-02, 9.447e-02, 7.943e-02, 2.710e-02, 5.271e-02, 2.650e-02, 7.770e-02, 7.340e-02, -5.755e-02, -3.267e-03, 8.076e-02, -7.777e-02, 6.201e-02, 2.384e-02, -6.711e-03)); + r += mul(s2_6, M4(-1.337e-01, 2.017e-01, 1.967e-01, 8.228e-02, -1.399e-03, -1.107e-01, 1.128e-02, 8.017e-02, -2.114e-02, 2.236e-02, 3.746e-02, 1.154e-01, 7.151e-03, -4.103e-02, 2.060e-01, 3.782e-02)); + r += mul(s2_7, M4(1.582e-01, 4.512e-02, 1.201e-01, 3.814e-02, -8.135e-02, -5.291e-03, -5.477e-03, 9.017e-02, 1.552e-01, -3.701e-02, -7.238e-02, 1.975e-01, 4.302e-02, -3.722e-02, -7.104e-02, 1.231e-01)); + r += mul(s2_8, M4(-1.981e-02, 1.215e-01, -6.274e-02, 5.424e-02, 9.411e-02, 6.553e-02, -7.473e-02, 1.218e-01, 4.111e-03, 1.296e-02, -1.339e-02, 6.284e-02, -3.329e-02, 4.491e-03, -4.000e-02, 2.687e-02)); + r += mul(s3_0, M4(1.900e-02, 3.132e-02, -1.107e-02, 2.916e-02, -2.324e-02, -3.320e-02, -7.677e-02, -3.622e-03, -1.094e-01, 2.387e-02, 8.828e-03, 4.683e-03, 2.363e-01, 6.495e-02, 6.438e-02, -4.063e-03)); + r += mul(s3_1, M4(1.306e-02, -7.558e-03, 8.329e-04, 8.291e-02, 1.002e-01, 6.220e-02, 3.872e-02, 2.616e-02, -6.577e-02, 1.472e-01, -1.272e-01, 1.232e-02, 1.216e-01, 3.466e-02, -5.328e-02, 3.051e-02)); + r += mul(s3_2, M4(2.935e-02, -8.973e-02, 6.832e-02, -2.761e-02, 3.865e-02, 2.328e-02, -7.397e-02, -1.242e-02, 2.967e-02, 1.057e-01, -1.049e-01, 1.078e-01, -6.954e-02, 1.442e-02, 1.111e-01, -4.695e-02)); + r += mul(s3_3, M4(2.117e-02, 6.500e-02, -7.845e-02, 8.502e-04, -6.699e-02, 3.160e-02, -8.446e-02, -3.742e-02, -1.103e-01, 4.940e-02, 1.327e-01, 6.795e-02, 2.514e-01, 1.525e-02, 1.133e-01, -7.580e-02)); + r += mul(s3_4, M4(-1.365e-01, -1.377e-02, 2.066e-02, -3.622e-02, 1.422e-01, -1.854e-01, 1.101e-01, 9.160e-02, -2.666e-01, 2.378e-01, -2.834e-01, 2.074e-01, 6.809e-01, -6.729e-02, 1.868e-01, 2.246e-01)); + r += mul(s3_5, M4(7.356e-02, -9.508e-02, -1.172e-04, 5.422e-02, -1.893e-02, 6.854e-02, -1.892e-02, 6.860e-03, -2.202e-02, -9.934e-02, 2.023e-02, 1.135e-01, 8.935e-02, 1.218e-01, -6.803e-02, 5.408e-02)); + r += mul(s3_6, M4(1.426e-01, -9.283e-02, 2.520e-02, -5.103e-02, 1.056e-01, -1.709e-02, 9.450e-02, -2.310e-02, -1.719e-02, 6.322e-02, 2.448e-02, 8.671e-02, 9.691e-02, -1.051e-01, -2.729e-02, 2.363e-02)); + r += mul(s3_7, M4(-9.781e-02, -1.276e-01, -1.571e-01, 1.159e-02, -4.129e-03, -1.075e-01, 1.402e-01, -1.880e-02, 1.100e-01, -4.011e-02, -1.048e-02, 1.545e-01, 3.196e-01, -1.222e-01, 3.091e-02, 5.484e-02)); + r += mul(s3_8, M4(-2.461e-02, -1.011e-01, -2.695e-02, 2.886e-02, 4.451e-02, -5.603e-02, -7.350e-02, 1.154e-02, 6.965e-02, 1.972e-02, -5.793e-03, 5.401e-02, 6.273e-02, -1.393e-01, 6.810e-02, -1.290e-02)); + r += mul(s4_0, M4(-7.404e-02, 5.460e-02, -4.076e-02, -3.550e-02, 6.267e-02, 3.118e-02, 3.598e-02, -1.232e-02, 7.524e-02, -1.605e-02, -8.208e-02, 2.116e-02, 1.133e-01, -4.855e-02, -1.767e-02, 6.122e-03)); + r += mul(s4_1, M4(-2.916e-02, -3.449e-02, -1.089e-02, 8.319e-03, -4.846e-02, -1.997e-01, 2.053e-03, 5.580e-02, -6.975e-02, -8.495e-02, -4.238e-03, 4.718e-02, 3.513e-02, 6.665e-02, 6.092e-02, 5.555e-02)); + r += mul(s4_2, M4(1.128e-01, 4.959e-02, -3.970e-02, 2.896e-03, 1.092e-02, 2.103e-02, -3.434e-03, -3.586e-02, 1.436e-02, 4.328e-02, -4.943e-03, 2.167e-02, -1.454e-02, -1.050e-02, 3.731e-03, -1.429e-02)); + r += mul(s4_3, M4(5.178e-02, 2.050e-02, -1.067e-01, -1.003e-02, 1.600e-01, 1.565e-01, -1.674e-01, 1.675e-02, 1.097e-01, -8.281e-03, -4.795e-02, -6.924e-03, 1.339e-01, -7.568e-02, -8.288e-02, 9.900e-02)); + r += mul(s4_4, M4(-4.674e-02, -2.970e-02, -2.418e-01, -4.915e-04, 3.764e-03, -1.025e-01, -1.236e-01, -2.139e-01, -6.857e-02, 5.994e-03, -1.035e-01, -7.110e-02, -1.443e-01, -6.694e-02, 1.153e-01, -3.853e-02)); + r += mul(s4_5, M4(7.134e-02, 1.370e-02, -2.476e-02, 7.440e-02, 3.270e-02, -2.947e-02, 1.667e-02, 8.067e-03, 2.572e-02, 1.023e-01, -7.215e-02, -3.183e-03, -7.987e-02, -4.945e-02, -1.032e-04, 4.923e-02)); + r += mul(s4_6, M4(2.903e-02, -5.938e-03, -4.288e-02, -2.255e-02, 2.899e-02, 6.334e-03, 2.114e-02, 3.533e-02, 8.110e-03, -9.280e-02, -1.825e-02, -4.875e-02, 2.026e-01, -7.598e-02, -3.330e-03, -6.580e-02)); + r += mul(s4_7, M4(-1.462e-01, 3.489e-02, 3.617e-02, -1.989e-02, 5.885e-02, -2.127e-04, -1.736e-02, 8.010e-03, -7.144e-03, -6.051e-02, -9.249e-02, -8.694e-02, -6.210e-02, -1.074e-01, 2.473e-01, -5.482e-02)); + r += mul(s4_8, M4(1.274e-01, 4.183e-02, 2.901e-02, 3.209e-03, -8.022e-02, 1.969e-02, 2.072e-02, -2.608e-02, 5.421e-02, -1.053e-01, 2.146e-02, -5.179e-02, 7.973e-02, -5.988e-02, 1.210e-01, -4.631e-03)); + r += mul(s5_0, M4(2.636e-02, 8.165e-03, 1.182e-01, -6.995e-02, -6.976e-02, 2.215e-03, -5.314e-02, 8.933e-02, -4.300e-02, 2.515e-02, -8.037e-02, 6.770e-03, -9.013e-02, -1.090e-02, -1.293e-02, 7.325e-02)); + r += mul(s5_1, M4(2.783e-02, 1.057e-01, 1.285e-01, -4.333e-02, -2.084e-02, -1.239e-02, 1.368e-02, 1.511e-02, -1.975e-02, 9.784e-02, 7.017e-02, -2.711e-02, 5.978e-02, -2.261e-02, 2.617e-02, 2.476e-02)); + r += mul(s5_2, M4(5.491e-02, 9.302e-02, -6.994e-02, -3.273e-02, -7.603e-02, 5.919e-02, -2.470e-02, 5.787e-02, -1.126e-01, 1.712e-01, 9.627e-03, -2.791e-02, -6.110e-02, -5.188e-02, 3.332e-03, 6.078e-02)); + r += mul(s5_3, M4(2.370e-01, -4.704e-02, 2.878e-01, 1.572e-02, 8.702e-02, 2.085e-01, -1.493e-01, 4.833e-03, 2.878e-03, 9.338e-02, -9.101e-02, -4.112e-02, 1.460e-01, 3.081e-02, -1.515e-01, 8.386e-02)); + r += mul(s5_4, M4(7.837e-02, 8.659e-03, 1.307e-01, -1.490e-01, -1.352e-01, 1.181e-01, -7.400e-03, 1.975e-02, 1.048e-02, 7.512e-02, -7.127e-02, -3.951e-02, -1.431e-01, -1.643e-01, 1.383e-01, 7.349e-02)); + r += mul(s5_5, M4(-6.518e-02, -7.774e-03, 4.944e-02, -1.617e-02, -6.328e-02, -7.094e-02, 4.030e-02, 6.984e-02, 9.349e-02, -2.611e-02, -4.126e-02, -4.661e-02, -9.324e-02, -5.326e-02, -3.313e-02, 1.342e-01)); + r += mul(s5_6, M4(4.095e-02, 6.703e-02, 1.374e-01, 5.021e-02, -8.304e-02, 7.704e-02, 3.923e-02, 7.668e-02, -1.441e-01, 6.898e-02, 8.456e-04, -3.362e-02, -5.124e-02, -5.141e-02, -1.341e-01, 7.546e-02)); + r += mul(s5_7, M4(2.028e-01, -6.965e-02, 1.672e-01, 8.417e-02, 1.001e-01, -1.704e-02, -3.863e-02, 7.112e-02, -1.513e-02, 2.499e-02, 1.131e-01, -2.397e-02, -1.813e-01, -5.494e-02, -9.805e-04, -9.176e-02)); + r += mul(s5_8, M4(1.635e-01, 6.301e-02, 3.947e-02, -2.926e-02, -1.375e-01, 4.817e-02, -2.079e-02, 4.112e-02, 2.780e-02, 2.360e-03, 7.991e-03, 2.806e-02, -2.174e-02, -1.659e-01, 6.571e-02, 2.288e-02)); + r += mul(s6_0, M4(1.174e-02, 3.992e-02, -2.902e-02, -1.564e-02, -9.947e-03, 1.022e-01, -4.538e-02, 3.136e-02, -3.908e-02, 2.333e-02, -1.226e-02, -1.850e-02, 7.776e-02, 4.887e-02, -3.093e-02, -3.977e-02)); + r += mul(s6_1, M4(-5.709e-02, 2.241e-01, 7.495e-02, -8.553e-02, -1.715e-02, 7.678e-02, -8.276e-02, -1.398e-01, 6.393e-02, 2.037e-02, -3.649e-02, -4.055e-02, 1.133e-01, 1.523e-01, -2.925e-02, -1.990e-02)); + r += mul(s6_2, M4(-4.584e-02, 5.224e-02, -1.234e-01, -6.822e-02, 7.514e-02, -3.682e-02, -1.897e-02, -6.379e-02, 4.519e-02, 2.138e-02, -4.252e-02, 5.279e-02, 1.038e-01, 1.302e-01, -1.700e-02, 1.614e-05)); + r += mul(s6_3, M4(-1.100e-01, 6.973e-02, -5.033e-02, -4.205e-02, 1.030e-01, 9.182e-02, -5.071e-02, -1.013e-01, 9.751e-02, 4.474e-03, 3.944e-02, -2.502e-02, 4.318e-02, 3.176e-02, 7.114e-02, -9.254e-02)); + r += mul(s6_4, M4(-1.743e-02, -1.281e-01, 1.648e-01, -1.096e-01, -1.154e-01, -1.067e-01, -1.176e-01, -2.978e-02, -1.082e-01, -1.007e-01, 1.235e-01, -2.123e-01, -3.999e-02, 8.520e-02, 1.106e-01, -1.162e-01)); + r += mul(s6_5, M4(5.794e-02, -1.919e-01, 1.467e-01, -1.568e-01, -2.875e-02, -9.323e-02, 7.649e-02, -1.092e-01, 6.633e-02, 6.140e-02, 4.574e-02, 4.918e-02, 1.785e-01, 1.223e-01, 4.278e-03, -1.782e-01)); + r += mul(s6_6, M4(-7.013e-02, -4.507e-02, -7.393e-02, 2.513e-03, 1.189e-01, 1.257e-02, -4.197e-02, -3.805e-02, 7.862e-03, 7.478e-02, -1.991e-02, 1.965e-02, 1.314e-01, -1.430e-02, 2.908e-02, -6.047e-02)); + r += mul(s6_7, M4(-3.217e-02, 4.262e-02, -1.983e-01, 4.035e-02, -1.659e-02, -9.437e-02, -4.622e-02, -6.658e-02, -8.081e-02, 3.849e-02, 1.314e-01, -5.261e-02, 8.661e-02, -8.237e-03, 1.585e-01, -1.614e-01)); + r += mul(s6_8, M4(-5.737e-02, -3.302e-03, -6.742e-02, -9.583e-02, -3.381e-02, -2.261e-02, -1.857e-02, 2.350e-03, 1.091e-01, -6.425e-02, 1.871e-02, -6.960e-02, 1.333e-01, -2.407e-02, 2.956e-02, -2.104e-01)); + r += mul(s7_0, M4(-2.732e-02, -6.994e-02, -1.080e-01, 1.307e-02, 5.425e-03, 6.067e-02, 4.972e-03, 6.205e-02, 5.202e-02, 1.135e-02, -5.078e-02, -9.209e-02, -8.480e-02, -5.938e-02, -2.936e-02, 2.958e-02)); + r += mul(s7_1, M4(-9.352e-02, 4.578e-02, -1.038e-01, 4.931e-02, 2.834e-02, 2.557e-02, 1.260e-01, 8.063e-02, -4.916e-02, -2.708e-02, 1.820e-01, -6.008e-02, 5.311e-02, -2.096e-02, -6.459e-02, -2.527e-02)); + r += mul(s7_2, M4(-1.379e-02, -6.491e-02, -3.690e-02, 8.692e-03, 6.566e-02, -2.458e-02, 7.286e-02, 1.636e-03, 3.495e-02, 9.653e-02, 4.719e-02, 4.307e-04, 3.166e-02, -2.120e-02, 1.104e-03, 2.250e-02)); + r += mul(s7_3, M4(-2.417e-02, -3.347e-02, -5.044e-03, -2.641e-03, 7.385e-02, 5.451e-02, -3.506e-02, 3.922e-02, 1.964e-01, 7.353e-02, 5.081e-02, 5.929e-02, -1.945e-01, -9.979e-03, -2.426e-01, 5.309e-02)); + r += mul(s7_4, M4(1.228e-01, 3.044e-02, -4.675e-02, -6.772e-02, -1.994e-01, -7.171e-02, -1.114e-01, 2.572e-01, -2.172e-01, -1.079e-01, 9.681e-03, -6.967e-02, 1.219e-03, -4.686e-03, -7.765e-03, 8.815e-03)); + r += mul(s7_5, M4(-6.989e-02, -1.078e-01, 8.804e-02, -1.109e-01, 1.683e-01, -4.964e-02, -2.924e-02, 1.125e-02, 2.629e-01, 1.073e-01, -5.051e-02, 1.134e-01, 3.944e-02, -2.659e-02, -3.542e-02, 2.898e-02)); + r += mul(s7_6, M4(2.536e-02, -6.420e-02, -4.203e-02, -1.582e-02, 4.380e-02, -3.555e-02, -2.422e-02, -8.902e-03, -1.086e-01, 2.797e-02, -1.048e-01, 1.116e-01, -1.139e-02, 3.176e-02, 2.587e-02, 2.958e-02)); + r += mul(s7_7, M4(5.033e-02, 1.780e-02, -9.527e-02, 3.954e-02, -6.976e-02, -1.139e-02, 4.906e-02, 7.189e-02, -2.352e-02, 5.318e-02, 5.140e-02, 4.704e-02, -4.843e-02, -3.853e-02, -7.482e-02, -8.305e-02)); + r += mul(s7_8, M4(-1.397e-01, 2.179e-02, 2.338e-02, -1.016e-01, 1.103e-01, -6.863e-02, 1.780e-02, 2.561e-02, 1.304e-01, -1.720e-02, 2.107e-02, 4.751e-03, 4.767e-02, -1.159e-01, -1.885e-01, 1.317e-02)); + r += V4(3.299e-02, 3.575e-02, 4.653e-02, -5.926e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-5.885e-03, -2.546e-02, 2.558e-02, -2.541e-02, 3.475e-02, 1.645e-01, -4.967e-02, 5.703e-02, -8.175e-03, 1.006e-02, -9.734e-03, 1.796e-02, 1.192e-01, -6.662e-02, 1.989e-02, 6.194e-02)); + r += mul(s0_1, M4(-1.198e-01, 3.162e-02, -3.272e-02, -3.826e-02, -3.700e-01, 1.482e-01, -2.114e-01, -1.351e-01, -8.899e-02, -4.971e-02, 1.209e-01, -8.257e-02, 1.265e-01, -1.939e-02, -6.032e-03, 1.074e-01)); + r += mul(s0_2, M4(1.562e-02, -8.563e-02, -6.263e-03, -9.005e-02, -1.248e-01, 1.228e-01, -6.548e-03, -2.062e-01, 8.436e-02, 6.663e-02, 9.394e-02, -9.600e-02, -6.184e-02, 4.953e-02, -1.026e-01, 3.131e-02)); + r += mul(s0_3, M4(-6.295e-02, -4.597e-02, 8.288e-02, 1.139e-01, -2.021e-01, 6.453e-02, 1.036e-02, -1.155e-01, 2.089e-02, -2.989e-02, -3.874e-02, 1.819e-02, -1.555e-02, -1.129e-01, 9.900e-03, -1.252e-01)); + r += mul(s0_4, M4(9.694e-02, 5.782e-02, 1.188e-02, -9.054e-02, -3.874e-01, 2.309e-01, -1.367e-01, -1.582e-01, -2.347e-01, 6.123e-03, -7.074e-02, -4.260e-02, 8.489e-02, -1.239e-01, 1.993e-02, -3.858e-02)); + r += mul(s0_5, M4(-4.685e-02, -1.415e-01, -1.177e-01, -4.488e-02, 1.536e-01, 2.300e-01, -7.466e-02, 9.373e-02, 1.940e-02, -6.754e-02, 8.615e-03, 4.944e-02, -5.749e-02, 1.053e-01, 3.029e-02, 7.247e-02)); + r += mul(s0_6, M4(-9.967e-02, -5.030e-02, 2.466e-03, 1.887e-02, -5.499e-02, 1.128e-03, -2.234e-03, 6.063e-03, 4.992e-02, -3.842e-02, -6.264e-03, 6.056e-04, -2.896e-02, -1.566e-02, -7.347e-02, -3.167e-02)); + r += mul(s0_7, M4(3.720e-03, -4.195e-02, -2.264e-02, 4.933e-02, 3.558e-02, 3.830e-02, -2.230e-02, 1.220e-02, 5.597e-02, -3.489e-02, -4.640e-02, 3.423e-03, -3.118e-02, -1.338e-02, -1.107e-01, -1.904e-02)); + r += mul(s0_8, M4(8.768e-02, 9.464e-02, -7.227e-02, -4.277e-03, -7.150e-02, 8.855e-02, -1.373e-02, -2.813e-02, 9.033e-03, 1.266e-02, 2.571e-02, 2.239e-02, 7.557e-02, -2.955e-02, 3.206e-02, -3.293e-02)); + r += mul(s1_0, M4(1.093e-01, 2.123e-01, 1.520e-01, -1.096e-02, 1.140e-01, -6.139e-04, 1.689e-01, 7.166e-02, 8.511e-02, -8.368e-02, -5.352e-02, -1.224e-01, 3.872e-01, -1.178e-01, 2.918e-01, 2.200e-01)); + r += mul(s1_1, M4(1.231e-01, 1.746e-01, 4.732e-02, -5.149e-02, 1.214e-01, -1.181e-01, 7.278e-02, 7.755e-02, -5.278e-02, -1.474e-01, 1.502e-02, -3.327e-02, -3.196e-03, 4.036e-01, 7.350e-01, 8.395e-02)); + r += mul(s1_2, M4(2.968e-02, 9.406e-02, -4.022e-02, -1.459e-01, -7.690e-02, -4.029e-03, -5.914e-02, -5.643e-02, -1.059e-01, -8.707e-03, 2.767e-02, -2.201e-01, -5.068e-02, 1.015e-01, 1.402e-01, 6.987e-03)); + r += mul(s1_3, M4(1.979e-02, 2.252e-01, -2.014e-01, -2.294e-01, 1.291e-01, -3.770e-02, 4.641e-02, -7.505e-02, 2.448e-01, -2.456e-01, 6.304e-02, -3.550e-02, 9.021e-02, -7.235e-02, 2.433e-01, -3.196e-02)); + r += mul(s1_4, M4(-1.443e-01, -4.827e-02, 5.127e-02, 1.965e-01, 1.573e-02, -8.680e-02, 9.137e-02, 6.669e-02, 3.524e-01, 3.636e-03, 1.260e-01, 2.576e-01, -1.165e-01, -3.268e-02, 2.371e-01, 1.671e-01)); + r += mul(s1_5, M4(1.851e-01, 3.140e-01, -3.121e-01, 2.096e-01, -1.471e-01, -6.644e-03, -7.641e-02, -4.940e-03, -1.964e-02, 4.312e-02, 1.531e-01, 5.525e-02, -7.958e-02, -4.680e-04, -3.540e-02, -1.654e-01)); + r += mul(s1_6, M4(1.756e-01, 8.053e-02, 8.184e-02, 1.962e-01, 6.900e-02, 2.487e-03, 2.113e-02, -4.635e-02, -1.309e-01, -1.730e-01, -1.307e-01, -1.163e-01, 1.018e-01, 3.996e-02, 9.020e-02, 6.544e-02)); + r += mul(s1_7, M4(-7.530e-02, 2.220e-01, 1.192e-01, -6.576e-02, 3.215e-02, -2.875e-02, 3.263e-02, 8.846e-03, 4.031e-02, -1.396e-01, -1.058e-01, -3.059e-02, -2.242e-01, 1.991e-01, -1.833e-02, -1.613e-02)); + r += mul(s1_8, M4(-2.439e-01, 9.792e-02, 6.240e-02, -9.079e-02, 2.016e-02, -3.977e-02, -5.885e-02, 2.288e-02, 2.199e-01, -9.182e-02, 4.601e-02, -3.255e-02, 1.085e-01, -1.052e-01, 3.403e-02, -8.799e-02)); + r += mul(s2_0, M4(-8.172e-02, -5.813e-02, -1.148e-01, -5.606e-02, 1.254e-01, -1.118e-02, 8.617e-02, 3.101e-02, -6.399e-02, 2.935e-02, -3.022e-02, -9.815e-02, 1.263e-01, 8.716e-02, 1.488e-03, 2.892e-02)); + r += mul(s2_1, M4(-2.403e-01, -6.756e-02, 3.861e-02, -1.272e-01, 1.012e-01, -9.013e-03, -3.556e-02, 4.456e-02, 3.567e-02, 8.643e-02, -6.859e-04, 7.789e-02, -9.175e-02, 8.167e-02, 9.014e-03, -3.291e-03)); + r += mul(s2_2, M4(-2.589e-02, -8.047e-02, 1.765e-02, -5.666e-02, 2.589e-02, -6.236e-02, 5.595e-02, 1.409e-01, -9.791e-02, -3.648e-02, 9.036e-03, -3.095e-02, -7.711e-02, -9.589e-02, 7.740e-02, -3.167e-02)); + r += mul(s2_3, M4(-4.075e-01, -2.607e-01, -1.351e-01, 8.469e-02, -4.617e-02, -6.523e-02, -6.272e-03, 1.454e-01, 7.918e-02, 4.549e-02, -1.289e-01, 4.559e-04, -1.578e-01, -3.195e-02, -1.310e-01, -1.988e-01)); + r += mul(s2_4, M4(-1.001e-01, 1.142e-01, -2.295e-01, -1.917e-01, 2.870e-01, -2.153e-01, 1.699e-01, 8.478e-02, 7.359e-02, -2.038e-01, 1.080e-01, 9.298e-02, 1.395e-03, -6.084e-02, 8.516e-03, 5.921e-02)); + r += mul(s2_5, M4(4.890e-02, -1.980e-01, 5.298e-02, -7.964e-02, -6.946e-02, -2.022e-01, -1.893e-03, -8.310e-02, -5.277e-02, -9.537e-03, 5.377e-02, 6.629e-02, 1.544e-02, 3.057e-03, -7.162e-02, -1.441e-01)); + r += mul(s2_6, M4(7.994e-02, -2.992e-02, -1.136e-01, 2.080e-03, -5.637e-02, 3.382e-02, 8.393e-03, -1.294e-02, 2.123e-02, 6.833e-03, 3.525e-02, 3.002e-02, 7.027e-02, 7.827e-02, 1.082e-01, 1.245e-02)); + r += mul(s2_7, M4(-2.703e-01, -1.811e-01, 1.082e-01, -1.525e-02, 1.525e-01, 9.581e-02, -4.152e-02, -1.611e-02, -1.248e-01, -1.078e-02, 1.731e-02, -4.998e-02, -1.774e-01, 5.634e-02, -1.451e-01, 6.042e-02)); + r += mul(s2_8, M4(7.234e-02, -7.818e-02, 4.948e-04, 4.252e-02, 1.517e-01, -6.549e-02, -9.122e-02, -1.192e-02, 4.772e-02, 4.773e-02, -2.814e-02, -4.270e-02, 2.777e-02, 7.317e-02, -1.038e-01, 2.099e-03)); + r += mul(s3_0, M4(-1.160e-03, -4.776e-02, 1.320e-01, -5.099e-02, -7.016e-02, -3.634e-02, 5.819e-03, -3.680e-02, 2.560e-02, 9.705e-03, -1.349e-01, 4.162e-02, 2.998e-01, 6.999e-02, -9.148e-02, 1.306e-01)); + r += mul(s3_1, M4(8.671e-02, -2.513e-04, 2.481e-01, -3.360e-02, 1.810e-02, 1.426e-02, -1.753e-01, -7.422e-02, -4.182e-04, 2.963e-02, -4.225e-02, 7.439e-02, -5.775e-02, 1.039e-01, -8.999e-02, 1.931e-01)); + r += mul(s3_2, M4(1.279e-01, 4.706e-02, 1.274e-01, 5.953e-02, -4.903e-02, -4.301e-02, -6.777e-02, 1.582e-02, -3.740e-02, -1.648e-03, -9.810e-05, 1.001e-01, 1.818e-01, -9.297e-02, 3.020e-04, -5.370e-02)); + r += mul(s3_3, M4(-9.060e-02, 1.185e-02, -1.783e-01, -4.235e-02, -1.380e-01, -2.946e-03, 1.148e-01, 1.939e-02, 1.450e-01, 2.229e-02, -2.045e-01, 3.980e-02, 2.243e-01, -7.762e-02, -9.742e-04, -8.569e-02)); + r += mul(s3_4, M4(-2.442e-02, 2.062e-01, -1.265e-01, 4.012e-02, 2.739e-01, 4.146e-03, 1.713e-01, 3.659e-02, 1.138e-01, -1.864e-01, -4.026e-02, 1.728e-01, -7.976e-02, 6.411e-02, 2.068e-02, 2.147e-01)); + r += mul(s3_5, M4(-1.294e-01, 2.487e-02, 1.467e-01, 8.779e-02, -1.647e-01, 4.106e-02, 3.740e-02, 1.564e-02, -1.010e-03, 2.372e-02, 3.233e-02, 1.864e-01, -1.230e-01, -7.062e-02, 4.219e-02, -1.362e-01)); + r += mul(s3_6, M4(1.222e-01, -5.516e-02, -8.402e-02, 1.478e-02, -4.594e-02, 2.981e-02, -3.904e-02, -8.650e-02, -1.487e-03, 1.269e-03, 4.843e-02, 2.894e-02, 5.032e-02, 6.047e-02, -1.473e-03, 3.292e-02)); + r += mul(s3_7, M4(9.554e-02, 2.187e-02, -1.455e-01, -8.992e-02, 1.309e-03, -3.460e-02, 1.417e-01, -3.960e-02, -1.057e-01, -3.014e-02, 1.360e-01, -6.093e-02, 2.066e-01, 2.765e-01, -1.451e-02, 2.153e-01)); + r += mul(s3_8, M4(6.974e-02, -8.067e-03, -1.227e-02, 2.595e-02, -8.348e-03, -6.840e-02, -1.754e-02, 3.177e-02, -3.672e-02, -3.918e-03, 9.301e-04, 6.000e-02, 4.009e-02, 1.286e-01, 1.005e-01, 4.752e-02)); + r += mul(s4_0, M4(-6.639e-03, 1.205e-02, -9.161e-02, 6.168e-02, 1.450e-01, -8.361e-02, 2.304e-01, 8.059e-02, -3.226e-03, -8.089e-02, 1.486e-01, 3.589e-02, 1.554e-04, -2.149e-02, -1.098e-01, 6.157e-03)); + r += mul(s4_1, M4(-7.550e-02, 2.784e-02, -1.346e-01, 3.855e-02, 1.488e-01, -8.323e-02, 1.272e-03, -1.955e-02, 7.526e-02, -4.654e-02, 1.138e-01, -2.886e-02, 1.533e-01, -5.429e-02, 1.491e-01, 1.911e-02)); + r += mul(s4_2, M4(2.749e-02, 2.167e-03, -2.995e-02, 1.297e-01, -3.067e-02, -1.002e-01, -1.171e-01, 5.089e-02, 7.106e-02, 2.617e-02, 7.060e-02, -1.464e-02, 1.128e-01, 1.469e-01, 1.755e-01, -1.565e-02)); + r += mul(s4_3, M4(-3.147e-02, -2.933e-02, 1.108e-01, -5.021e-03, 5.973e-02, -1.508e-01, 1.813e-01, 9.161e-02, -5.187e-02, 8.315e-02, -6.392e-02, 8.132e-02, -1.111e-01, 4.777e-02, 8.955e-02, -9.181e-02)); + r += mul(s4_4, M4(7.240e-02, 5.859e-02, -9.687e-03, 9.683e-02, -2.997e-01, 5.311e-02, -7.869e-02, 1.245e-01, 4.405e-01, 2.145e-04, 1.532e-02, -1.421e-01, 1.712e-02, -3.623e-02, 2.816e-01, -9.044e-02)); + r += mul(s4_5, M4(-1.773e-01, 8.718e-02, 1.580e-01, -6.844e-02, 1.122e-01, -3.360e-04, -5.619e-02, -8.491e-02, -1.605e-01, 1.087e-01, -6.330e-03, -5.711e-02, -1.025e-01, -2.467e-03, 6.342e-02, -3.439e-02)); + r += mul(s4_6, M4(2.071e-02, -5.719e-02, 4.813e-03, 4.429e-02, 9.461e-02, -2.034e-02, 1.234e-01, 1.544e-02, -4.892e-02, 2.310e-02, 6.791e-02, 4.307e-02, -2.855e-02, -4.628e-04, 1.339e-02, -5.448e-02)); + r += mul(s4_7, M4(3.531e-02, 7.981e-02, -2.148e-01, 2.049e-02, -1.053e-02, -3.160e-02, 1.023e-01, 2.071e-02, -8.098e-02, 9.493e-02, -5.892e-02, -1.295e-02, 1.871e-01, -4.189e-02, 9.112e-02, 8.300e-02)); + r += mul(s4_8, M4(-1.699e-02, -3.385e-02, -3.076e-02, -2.967e-02, 1.298e-02, -1.494e-02, 7.397e-03, 4.782e-02, 1.178e-01, 5.643e-02, 2.861e-02, -9.439e-03, -8.228e-02, 1.107e-01, -4.137e-03, -3.320e-02)); + r += mul(s5_0, M4(1.071e-01, 8.736e-02, -5.126e-02, -3.961e-02, 1.843e-01, -2.182e-01, 1.343e-01, 1.082e-01, -6.434e-02, -9.993e-02, -4.548e-02, -3.949e-02, 3.089e-02, -4.468e-02, -5.958e-03, -1.703e-02)); + r += mul(s5_1, M4(5.276e-02, 2.210e-01, -1.552e-01, 1.921e-01, 2.015e-01, -9.600e-02, 4.278e-02, -1.323e-01, -6.578e-03, -1.043e-01, 5.091e-02, 4.558e-02, 4.814e-02, -2.042e-01, 1.910e-01, 3.570e-02)); + r += mul(s5_2, M4(-7.656e-02, 1.000e-01, 6.224e-02, -2.822e-02, -6.178e-02, -1.274e-01, -8.665e-02, -3.369e-02, -7.111e-02, 3.011e-02, 3.098e-02, 1.618e-02, 7.771e-02, -2.006e-03, 1.463e-01, -4.340e-02)); + r += mul(s5_3, M4(1.671e-02, -2.406e-02, 3.675e-02, -6.527e-02, 1.862e-02, -2.094e-01, 1.246e-01, -3.006e-02, -2.290e-02, 2.513e-02, -5.528e-02, -3.249e-02, 1.135e-01, -1.494e-01, -7.521e-02, -3.806e-02)); + r += mul(s5_4, M4(2.826e-01, 1.471e-01, 2.434e-01, 6.211e-01, -1.110e-01, 5.876e-02, 2.357e-02, 1.291e-01, 1.158e-01, -6.958e-02, -4.165e-02, -1.199e-02, -1.858e-01, -2.900e-01, 2.511e-03, 2.313e-02)); + r += mul(s5_5, M4(2.556e-02, 3.107e-01, 6.381e-02, 1.181e-01, 1.081e-01, -1.219e-02, -2.541e-02, -7.888e-02, -1.273e-01, -9.006e-02, -1.173e-02, -1.212e-02, -8.626e-03, 1.553e-02, 4.044e-02, -5.263e-02)); + r += mul(s5_6, M4(7.118e-02, 1.130e-01, 9.119e-02, 8.911e-03, 1.276e-01, -6.608e-02, 5.927e-02, -4.754e-02, -1.168e-01, -1.445e-03, -4.022e-02, -3.406e-02, -3.202e-02, -1.565e-01, -1.082e-01, -6.669e-02)); + r += mul(s5_7, M4(-1.636e-01, -4.448e-02, 2.813e-02, 1.441e-01, -1.376e-02, 2.730e-02, 8.901e-04, -2.648e-02, -1.212e-01, 8.716e-02, 3.431e-02, -7.011e-02, 6.266e-02, -6.659e-03, -5.885e-02, 1.105e-02)); + r += mul(s5_8, M4(-1.766e-02, 1.021e-02, 3.733e-02, 7.508e-02, 2.418e-02, 2.041e-02, -1.344e-02, -1.642e-02, 6.738e-02, 1.122e-02, 4.157e-02, -2.922e-02, -1.743e-02, 9.020e-02, 1.103e-01, -7.592e-02)); + r += mul(s6_0, M4(1.134e-01, -3.963e-02, 1.128e-01, 3.215e-02, 3.800e-02, 3.478e-02, -9.269e-02, 8.044e-02, -2.934e-01, 1.042e-02, -1.862e-01, -9.349e-02, 7.495e-02, 6.232e-02, 1.725e-01, 1.731e-02)); + r += mul(s6_1, M4(1.909e-02, 4.707e-02, -1.050e-01, 2.469e-02, -3.667e-02, 1.034e-01, -5.543e-02, -5.119e-02, -2.366e-01, -1.433e-01, -1.732e-01, -2.134e-01, 5.333e-02, -6.928e-02, 7.900e-02, 8.672e-02)); + r += mul(s6_2, M4(-7.238e-02, 1.367e-02, -1.049e-01, 1.265e-01, -3.939e-03, 2.224e-02, -8.017e-04, -2.103e-02, -1.096e-01, -1.776e-03, 1.391e-01, 1.414e-01, 1.216e-01, 9.376e-02, 5.524e-02, 6.758e-02)); + r += mul(s6_3, M4(-1.067e-01, 1.998e-02, -2.571e-02, -5.100e-03, -1.376e-01, -5.383e-02, -1.193e-01, -7.877e-02, -2.767e-04, 2.886e-02, 2.653e-02, 1.714e-01, -1.229e-02, -7.802e-02, 3.130e-02, -2.958e-02)); + r += mul(s6_4, M4(-7.202e-02, -6.781e-02, 7.058e-02, 1.504e-02, 1.208e-01, 9.888e-02, -4.686e-03, -1.753e-02, 1.937e-01, 7.496e-02, -1.258e-01, 1.294e-01, -1.517e-02, -1.711e-03, 2.826e-01, 2.925e-01)); + r += mul(s6_5, M4(7.727e-02, -1.526e-01, 2.852e-01, 2.777e-01, -8.854e-02, 6.094e-02, -5.698e-03, -1.644e-01, -7.879e-02, 1.572e-02, 3.822e-02, 9.991e-02, -1.252e-01, 1.024e-01, 2.398e-01, 2.683e-01)); + r += mul(s6_6, M4(-2.504e-03, -7.172e-02, -1.091e-01, 1.194e-02, 7.060e-02, -4.136e-03, 2.395e-03, 5.092e-02, -2.992e-02, -2.311e-03, -4.920e-02, -1.680e-01, 4.731e-02, 3.985e-02, 1.189e-01, 8.127e-02)); + r += mul(s6_7, M4(7.482e-02, -5.409e-02, -7.627e-02, -3.000e-02, 4.041e-02, 9.987e-02, -1.435e-02, 1.451e-01, -2.708e-02, -7.973e-02, 1.030e-01, 8.814e-02, 3.109e-03, -7.251e-02, 1.606e-01, 3.079e-02)); + r += mul(s6_8, M4(-7.090e-02, -1.135e-01, 8.462e-02, -6.155e-02, -3.090e-02, 5.601e-02, 3.941e-02, 4.744e-03, 2.282e-02, 4.505e-02, 1.192e-02, 6.843e-02, 1.088e-01, -1.489e-01, 1.263e-01, 5.793e-02)); + r += mul(s7_0, M4(2.178e-02, 1.912e-02, 4.309e-02, 1.703e-02, 3.737e-02, -7.983e-03, 2.782e-02, 1.850e-02, 1.452e-01, -1.497e-01, 4.961e-02, 5.188e-02, 1.984e-02, -2.894e-02, 1.030e-01, -3.062e-04)); + r += mul(s7_1, M4(-2.136e-01, -4.810e-02, -9.977e-02, 1.103e-01, -3.734e-02, -1.038e-01, -3.264e-02, 1.932e-02, 3.639e-01, -6.706e-02, 4.201e-01, 1.576e-02, -4.993e-02, -1.537e-02, -5.964e-02, 4.097e-02)); + r += mul(s7_2, M4(2.272e-02, -2.516e-02, -1.510e-02, 5.791e-02, -7.611e-02, -2.916e-02, 1.016e-01, 1.075e-01, 1.209e-02, 7.300e-02, 6.268e-02, 9.154e-02, 8.169e-02, -7.432e-02, -2.736e-02, -2.039e-03)); + r += mul(s7_3, M4(-5.458e-02, -3.447e-02, -3.795e-03, -5.079e-02, -6.263e-02, -1.703e-01, 2.396e-02, -2.922e-02, 2.510e-01, 2.071e-02, -6.895e-02, 8.368e-02, -1.364e-01, 4.028e-02, -7.619e-02, -6.385e-02)); + r += mul(s7_4, M4(-1.184e-01, -7.357e-02, 2.109e-01, 9.892e-02, 1.238e-01, 3.710e-03, -6.457e-02, -9.826e-02, -3.336e-01, 2.071e-02, -2.630e-02, -4.478e-02, 6.411e-02, 4.174e-03, 9.080e-02, 4.497e-02)); + r += mul(s7_5, M4(1.538e-01, 7.003e-02, 6.026e-02, 3.331e-02, -1.284e-01, -8.834e-02, 1.376e-01, 8.211e-02, -1.947e-01, -1.464e-01, 3.603e-02, -8.266e-02, -1.739e-02, -6.261e-02, -1.003e-01, 4.533e-02)); + r += mul(s7_6, M4(-1.178e-01, -5.722e-02, 2.677e-02, -4.211e-03, -1.071e-01, -8.749e-02, 1.005e-01, 7.650e-02, -1.112e-01, 6.735e-02, -8.316e-02, -9.940e-02, 1.052e-01, -9.073e-02, -3.761e-02, 4.248e-02)); + r += mul(s7_7, M4(5.572e-02, -3.820e-02, -1.385e-01, 3.057e-02, 5.144e-02, -1.668e-02, 1.240e-01, 1.815e-01, -5.345e-02, 1.495e-01, -8.913e-02, 9.643e-02, -2.567e-03, -6.845e-02, 1.134e-01, 8.737e-02)); + r += mul(s7_8, M4(-8.663e-04, -8.466e-04, 6.296e-02, 7.255e-02, 9.676e-02, -7.858e-02, 1.671e-01, 8.251e-02, 2.800e-01, 1.228e-01, -1.019e-01, -5.512e-02, -4.996e-02, 5.146e-02, -9.201e-02, -2.324e-02)); + r += V4(1.173e-02, 1.947e-02, 2.270e-02, 7.192e-02); + return r; +} + +void Pass6(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 7 +//!DESC conv6 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-4.959e-02, 2.300e-02, 6.317e-03, 1.703e-02, -2.688e-03, 1.642e-02, -2.200e-02, -2.420e-02, 2.363e-02, -1.022e-01, 6.023e-02, -2.609e-02, 9.291e-02, -1.160e-01, 2.052e-02, 2.354e-01)); + r += mul(s0_1, M4(-1.879e-02, -1.916e-02, -8.637e-03, 4.526e-02, 6.088e-02, -2.138e-03, -4.232e-02, -4.008e-02, -4.421e-02, 2.282e-01, 3.115e-02, -1.581e-02, -6.951e-03, -2.424e-02, 1.346e-01, -3.285e-02)); + r += mul(s0_2, M4(-4.787e-02, -1.360e-01, 5.257e-02, -4.066e-02, 2.153e-02, 2.471e-02, 3.004e-02, -7.448e-02, -1.941e-02, 1.677e-01, -1.904e-01, -2.967e-02, -7.226e-03, 8.123e-03, 1.559e-01, -1.188e-02)); + r += mul(s0_3, M4(-3.923e-02, -4.992e-02, 6.446e-02, 7.936e-02, -3.738e-02, -1.250e-02, 5.185e-02, 2.579e-02, 5.660e-02, -1.233e-01, 5.719e-02, 1.460e-02, 2.067e-01, -4.069e-02, 3.571e-02, 1.317e-01)); + r += mul(s0_4, M4(2.346e-02, -2.846e-01, -2.588e-02, 5.688e-02, -5.397e-03, -1.564e-02, -6.028e-02, 7.542e-02, -1.610e-01, 6.204e-02, 5.513e-02, 1.456e-02, -2.901e-01, 9.572e-02, 2.208e-02, -1.044e-01)); + r += mul(s0_5, M4(-9.583e-02, -2.630e-02, 4.636e-02, -1.747e-02, 5.710e-02, -3.728e-02, -9.599e-02, 2.775e-02, 9.363e-02, -1.286e-02, -6.437e-02, 8.923e-03, 5.191e-02, 2.725e-02, 5.111e-02, 5.944e-02)); + r += mul(s0_6, M4(3.916e-02, 1.242e-04, -2.263e-02, 1.843e-02, -2.212e-02, 1.383e-02, 4.635e-02, -8.325e-03, 8.122e-02, -1.123e-01, 1.783e-02, -7.211e-03, 5.749e-03, -1.679e-03, -5.085e-02, 6.039e-03)); + r += mul(s0_7, M4(-4.237e-02, 2.693e-02, 7.501e-03, -5.510e-02, 1.524e-02, -4.218e-02, -1.076e-02, -3.760e-02, 1.048e-01, -5.222e-02, 1.925e-02, -1.094e-02, 7.092e-03, -3.861e-02, 5.624e-02, 1.161e-02)); + r += mul(s0_8, M4(4.406e-03, 6.380e-02, 1.261e-02, -2.852e-02, 1.263e-02, -1.119e-02, -5.527e-02, -2.118e-02, 1.659e-02, 1.159e-01, -5.585e-02, -2.343e-02, 1.292e-02, -4.138e-02, 1.541e-02, 6.174e-02)); + r += mul(s1_0, M4(-1.596e-02, 1.235e-01, -1.475e-02, -1.215e-01, 4.439e-02, 2.252e-02, -1.036e-04, 2.227e-01, 2.471e-02, 1.859e-02, -1.305e-02, 2.673e-02, 1.234e-02, -7.778e-02, -6.012e-02, -3.814e-02)); + r += mul(s1_1, M4(-1.121e-01, 1.950e-01, 1.715e-01, 1.439e-02, 6.037e-02, -3.254e-01, -7.611e-02, -1.765e-01, 3.997e-02, 6.524e-02, 9.926e-02, 4.034e-02, -3.307e-02, -8.003e-02, -1.239e-02, 2.386e-02)); + r += mul(s1_2, M4(-6.216e-02, 1.513e-01, 1.518e-02, 1.468e-02, -5.238e-02, -1.184e-02, 9.673e-02, 8.954e-02, -3.878e-02, -3.462e-02, 1.696e-03, 4.314e-02, 3.306e-02, -2.129e-02, 3.722e-02, 9.545e-03)); + r += mul(s1_3, M4(-4.358e-02, 1.169e-01, -6.705e-02, 7.877e-02, 6.420e-02, 1.647e-02, -1.408e-01, -1.886e-02, -1.090e-02, -7.420e-02, 1.611e-02, -2.608e-02, 1.181e-02, 1.139e-02, -3.279e-02, -1.538e-01)); + r += mul(s1_4, M4(-7.269e-02, -9.624e-02, 2.104e-01, -3.678e-02, -8.138e-02, -7.530e-02, 1.403e-01, 1.101e-01, -2.562e-01, 1.725e-01, -2.215e-01, -1.774e-02, -9.527e-02, 2.512e-02, -1.014e-01, -6.340e-02)); + r += mul(s1_5, M4(9.204e-02, 1.466e-01, -8.662e-03, -7.309e-02, 1.940e-01, 1.822e-03, -1.982e-01, -4.872e-02, -9.534e-02, -3.194e-03, -2.147e-01, -5.603e-02, 4.305e-02, 4.031e-02, -1.879e-01, 1.564e-02)); + r += mul(s1_6, M4(-4.283e-02, 4.618e-02, -8.197e-02, -6.512e-02, -7.169e-02, -6.047e-02, 2.293e-02, 3.459e-02, -5.382e-02, -5.021e-02, 7.904e-02, -3.021e-02, 2.588e-03, -6.667e-02, 5.148e-02, 5.692e-02)); + r += mul(s1_7, M4(-8.603e-02, 1.255e-01, 2.339e-01, -3.309e-02, -5.634e-02, -1.763e-01, 9.410e-02, 1.203e-01, -3.287e-02, -4.261e-02, 1.248e-01, 7.539e-02, -5.734e-03, -3.458e-02, 8.259e-02, 8.584e-04)); + r += mul(s1_8, M4(9.075e-02, -4.847e-02, -4.681e-02, 1.470e-01, -1.874e-02, -1.161e-01, -1.934e-01, 3.652e-02, 4.413e-02, 4.896e-02, -1.374e-01, -6.084e-02, 8.472e-02, 6.868e-03, -5.092e-02, 2.943e-02)); + r += mul(s2_0, M4(-7.578e-02, 3.864e-02, 3.828e-02, 1.041e-01, -2.067e-02, -1.882e-02, 2.747e-03, 1.201e-02, 1.090e-02, 7.076e-04, 4.765e-03, -3.586e-02, 5.400e-02, -2.777e-02, 1.078e-02, 1.827e-02)); + r += mul(s2_1, M4(5.610e-02, -6.995e-03, 4.990e-03, 9.165e-02, -1.922e-02, 7.537e-02, 1.615e-02, -1.001e-01, -3.874e-02, -6.028e-03, 1.239e-01, 4.476e-02, -5.677e-02, 2.028e-01, 8.904e-02, -6.016e-02)); + r += mul(s2_2, M4(-3.735e-02, -1.418e-01, -7.564e-02, -2.437e-02, -1.181e-02, -3.410e-02, 7.301e-02, 8.895e-03, -3.613e-02, -3.974e-02, -3.959e-02, 1.515e-02, -1.539e-03, -2.772e-03, -3.304e-02, -3.478e-02)); + r += mul(s2_3, M4(-1.914e-02, -9.525e-02, 8.687e-02, 2.162e-02, -3.970e-02, -4.097e-02, -2.761e-02, -8.150e-02, 1.069e-02, 1.161e-02, 4.322e-02, 3.107e-02, -6.213e-02, 5.408e-04, -2.306e-03, -2.409e-02)); + r += mul(s2_4, M4(1.511e-01, 1.203e-01, -2.227e-01, -9.491e-02, -1.130e-01, 1.699e-01, -5.111e-02, 8.725e-02, 5.434e-02, -1.741e-01, 1.328e-02, 1.151e-01, 2.696e-02, 1.803e-01, 2.174e-02, 1.227e-01)); + r += mul(s2_5, M4(-1.622e-02, 2.244e-01, 2.659e-02, -2.045e-02, -5.316e-02, 2.989e-02, -3.311e-02, -1.226e-01, -3.029e-03, 6.464e-03, -1.315e-02, 2.236e-02, -6.947e-02, -5.856e-02, -7.343e-02, -1.114e-02)); + r += mul(s2_6, M4(-4.299e-02, 4.939e-03, 1.489e-01, 2.805e-02, 3.084e-02, 1.229e-02, -3.138e-02, 2.415e-02, 2.300e-02, -5.926e-02, 3.672e-02, -9.303e-02, -2.705e-02, -7.170e-02, 8.702e-02, 5.325e-02)); + r += mul(s2_7, M4(-1.679e-02, -1.060e-01, -6.970e-02, 6.048e-02, -1.964e-02, 2.604e-02, 1.357e-01, 2.298e-02, -1.148e-01, 5.353e-02, -8.100e-02, 2.667e-02, 1.939e-02, -3.578e-02, -1.812e-02, 3.766e-02)); + r += mul(s2_8, M4(-3.038e-04, -5.991e-03, -3.775e-02, -3.493e-02, 2.390e-02, -7.596e-02, 2.851e-02, 6.164e-02, 1.460e-02, -1.681e-03, 4.620e-02, -1.403e-02, 2.086e-02, 2.709e-02, -1.620e-02, 3.560e-02)); + r += mul(s3_0, M4(1.141e-04, 2.863e-02, 7.410e-02, 6.073e-02, -1.141e-01, 6.118e-02, -3.687e-03, 6.243e-02, -4.684e-03, 8.379e-02, 2.439e-02, -9.396e-02, 4.764e-04, -1.569e-02, 8.856e-02, 7.804e-03)); + r += mul(s3_1, M4(-2.494e-02, -2.837e-01, 5.573e-02, 1.198e-02, -4.818e-02, 2.223e-02, -9.465e-02, 5.685e-02, -2.476e-02, 2.298e-01, -5.739e-02, 1.791e-01, -4.102e-02, 9.728e-02, -5.806e-02, -3.767e-02)); + r += mul(s3_2, M4(9.027e-02, 2.405e-03, 1.261e-01, -1.430e-02, 6.552e-03, -5.556e-02, 1.538e-02, 1.255e-02, -5.140e-02, 1.569e-01, -1.936e-01, 1.306e-02, 4.347e-02, -1.850e-02, -1.557e-03, -1.791e-02)); + r += mul(s3_3, M4(9.246e-02, -1.177e-01, -7.298e-02, -1.532e-02, -1.054e-01, -5.527e-02, 5.620e-03, -8.630e-02, -1.256e-01, -7.708e-02, 6.246e-02, 2.278e-02, -1.151e-01, -2.338e-02, 4.332e-02, 4.785e-02)); + r += mul(s3_4, M4(5.673e-02, 6.712e-02, -1.401e-02, -1.203e-03, 3.747e-02, 4.443e-03, -4.435e-02, 5.732e-02, -1.563e-03, -7.244e-03, -1.148e-01, -1.453e-01, 1.674e-01, -9.521e-02, -3.138e-02, 1.018e-01)); + r += mul(s3_5, M4(-5.533e-02, 1.502e-01, 1.289e-01, -4.958e-02, -6.971e-02, -1.172e-01, 1.874e-01, 6.877e-03, 1.016e-02, 1.560e-01, -1.155e-01, 6.263e-03, -1.972e-02, 5.096e-03, 1.872e-01, -1.115e-01)); + r += mul(s3_6, M4(-2.659e-02, -4.438e-02, 3.411e-02, -4.316e-02, 2.489e-02, -5.362e-02, 6.324e-03, 2.250e-02, 6.458e-02, -1.310e-01, 1.392e-01, 5.710e-02, -2.336e-02, 3.723e-02, 2.933e-02, -9.192e-03)); + r += mul(s3_7, M4(-3.195e-02, 1.089e-02, 3.270e-02, 7.136e-02, 6.604e-03, -6.304e-02, 4.783e-02, 5.569e-02, -9.362e-02, -4.429e-02, -6.615e-02, 8.673e-02, 1.845e-01, -7.756e-03, -1.776e-01, -1.473e-02)); + r += mul(s3_8, M4(-1.581e-02, -5.631e-02, 1.651e-02, 3.415e-02, -7.547e-02, 5.109e-02, 2.791e-02, 8.264e-03, -2.090e-02, -1.188e-01, 5.602e-02, 8.052e-02, -2.345e-02, 6.393e-02, -1.052e-02, 5.089e-02)); + r += mul(s4_0, M4(2.126e-02, 6.163e-02, 5.698e-02, -4.189e-02, -1.033e-02, -8.728e-02, -5.533e-02, -3.495e-03, 9.104e-02, -4.146e-02, -3.394e-02, -3.003e-02, 9.144e-02, -2.309e-01, 1.130e-01, -1.458e-02)); + r += mul(s4_1, M4(1.959e-02, 1.240e-01, -2.060e-01, -8.081e-02, 7.705e-03, -2.860e-02, 6.000e-02, -5.123e-02, -3.581e-02, 1.002e-01, 8.903e-02, -5.170e-02, -2.747e-02, 9.524e-03, -5.792e-02, -6.941e-01)); + r += mul(s4_2, M4(4.583e-02, -6.945e-02, 8.239e-02, -9.996e-03, 9.288e-03, -2.209e-02, 3.012e-02, -2.720e-02, -6.338e-02, -1.081e-01, -1.522e-01, 1.295e-02, -4.195e-02, -1.774e-02, 9.414e-02, -1.140e-01)); + r += mul(s4_3, M4(-1.832e-02, 1.066e-01, 1.446e-01, 8.115e-02, -3.907e-02, -9.588e-03, 4.508e-02, -2.114e-02, 7.214e-03, 5.881e-02, -9.425e-03, 1.351e-02, -7.886e-03, -1.125e-02, 8.937e-02, -1.805e-01)); + r += mul(s4_4, M4(-1.545e-01, 3.279e-02, -7.314e-02, 3.145e-02, -2.324e-01, 2.880e-02, -4.514e-02, -7.526e-02, -3.248e-02, 1.011e-01, 1.070e-01, 9.826e-02, -6.378e-01, -1.113e-01, -5.476e-03, -6.405e-01)); + r += mul(s4_5, M4(4.225e-02, 4.995e-02, -5.151e-02, -6.658e-02, -3.270e-03, 2.154e-02, 4.740e-02, 3.039e-03, -1.839e-02, -1.744e-01, -1.593e-01, 6.823e-02, -1.242e-01, -1.483e-02, -1.341e-01, -2.339e-01)); + r += mul(s4_6, M4(1.194e-02, -7.981e-02, 6.914e-02, 3.322e-02, 5.713e-02, -4.931e-02, -3.948e-02, -2.579e-03, -4.410e-02, 6.070e-02, 1.195e-03, -3.376e-02, 1.641e-02, -5.587e-02, -4.053e-02, -1.630e-02)); + r += mul(s4_7, M4(4.789e-02, -4.548e-02, 3.627e-02, 7.645e-02, -4.072e-02, 1.233e-01, 2.409e-02, 2.719e-02, 1.063e-01, 6.620e-03, 5.710e-02, 2.887e-02, -1.481e-02, 1.618e-02, -4.501e-02, -7.504e-02)); + r += mul(s4_8, M4(3.256e-02, -6.711e-02, -7.155e-02, 1.077e-03, 1.179e-01, 1.927e-02, -7.517e-02, -5.050e-03, -1.652e-02, 2.988e-02, -6.473e-02, -2.233e-02, 1.078e-01, 1.606e-02, 1.051e-01, 1.858e-02)); + r += mul(s5_0, M4(1.185e-02, -8.303e-02, 7.525e-02, -3.948e-02, -2.980e-02, -5.279e-03, 1.521e-02, -3.329e-02, 3.512e-02, -4.894e-03, 1.869e-03, -9.467e-02, 4.547e-02, -1.113e-01, -4.921e-04, -3.650e-02)); + r += mul(s5_1, M4(-2.533e-02, -5.457e-02, 6.664e-02, -1.109e-01, 2.634e-03, -3.898e-02, -1.115e-01, -9.777e-02, 4.398e-03, 2.218e-01, -8.076e-02, -8.298e-03, -1.466e-03, 6.349e-02, -9.325e-02, -7.098e-02)); + r += mul(s5_2, M4(-1.490e-02, -1.382e-01, 6.412e-02, 4.542e-02, 2.084e-02, 3.895e-02, -1.326e-01, -2.134e-02, 5.695e-02, 1.803e-01, -6.086e-02, -8.669e-02, 3.222e-02, -8.005e-02, 6.315e-02, -7.551e-02)); + r += mul(s5_3, M4(-1.885e-03, 9.724e-02, -8.784e-02, 1.272e-02, -5.185e-02, -1.148e-02, -4.775e-02, 1.111e-02, -3.767e-02, -4.852e-02, 1.005e-01, 3.394e-02, -2.777e-02, 1.755e-02, 2.754e-02, -5.608e-02)); + r += mul(s5_4, M4(-8.003e-02, -3.442e-02, -1.281e-03, -5.182e-03, -3.213e-01, -1.581e-02, 6.228e-02, -1.903e-01, -1.459e-01, -1.218e-01, 2.070e-01, 4.157e-01, -1.788e-01, 1.294e-01, -6.393e-02, -4.187e-02)); + r += mul(s5_5, M4(-3.751e-02, 6.109e-02, 4.843e-02, -1.268e-02, -1.034e-01, -6.947e-02, -2.445e-02, -4.195e-02, -1.025e-01, -2.431e-01, -2.601e-01, 1.780e-01, -6.830e-02, -6.775e-02, -2.807e-02, -7.518e-02)); + r += mul(s5_6, M4(6.528e-03, -7.250e-02, 1.841e-02, 2.886e-02, 1.267e-02, 9.469e-03, -2.174e-01, -1.515e-02, -3.808e-02, 3.925e-02, -5.356e-02, -4.861e-02, 6.533e-03, 1.848e-02, 4.500e-04, -4.908e-02)); + r += mul(s5_7, M4(-4.713e-02, -4.756e-02, 3.375e-02, 4.234e-02, -2.338e-01, 1.421e-01, 4.348e-02, -1.375e-01, 7.462e-02, 7.048e-02, -7.856e-02, -1.155e-01, 3.095e-02, 4.129e-03, -2.738e-02, -2.082e-02)); + r += mul(s5_8, M4(1.326e-02, -8.204e-02, -7.037e-02, 5.335e-02, -5.314e-03, 7.717e-02, 2.884e-02, -8.406e-02, 9.963e-03, 1.188e-01, -2.833e-02, 1.061e-02, 1.649e-02, 6.136e-03, 2.692e-02, -6.118e-02)); + r += mul(s6_0, M4(-1.225e-01, -3.199e-02, -2.469e-02, -3.257e-01, 2.266e-03, 5.483e-02, -2.814e-02, 4.752e-02, 4.902e-02, 8.969e-02, -1.586e-02, 4.620e-02, 4.357e-03, 7.414e-02, -4.872e-02, -4.663e-02)); + r += mul(s6_1, M4(-4.936e-02, 1.609e-01, -1.327e-01, -1.711e-01, 2.718e-02, 8.757e-03, -4.149e-02, 4.596e-03, -2.391e-02, 1.315e-02, 1.624e-01, 1.233e-01, 1.183e-01, 1.777e-01, -2.123e-02, 4.965e-02)); + r += mul(s6_2, M4(-1.488e-02, 2.607e-02, -2.137e-01, -9.055e-02, 1.082e-01, -7.058e-03, -8.666e-02, 4.212e-02, -9.235e-03, -9.004e-02, 1.824e-01, 3.037e-02, 3.261e-02, 1.020e-01, -2.231e-02, -1.477e-02)); + r += mul(s6_3, M4(-1.582e-01, -7.736e-04, 1.119e-01, -2.861e-01, 1.220e-01, -1.340e-01, -6.330e-02, 6.531e-02, 1.245e-01, 5.890e-02, -6.131e-02, 5.569e-02, -1.808e-01, 6.471e-02, 2.407e-02, -9.163e-02)); + r += mul(s6_4, M4(5.294e-02, 2.264e-02, 9.260e-02, 2.702e-03, 6.289e-02, -1.609e-01, -1.517e-02, -9.012e-02, 9.663e-02, 8.651e-02, -7.378e-02, -2.462e-01, 7.932e-03, 5.327e-02, -6.168e-02, -2.111e-02)); + r += mul(s6_5, M4(-5.505e-02, 1.211e-01, 2.485e-02, 6.687e-05, 2.232e-02, 4.068e-02, -8.433e-02, -5.399e-02, 1.062e-01, 4.350e-02, 1.213e-01, -1.309e-03, 4.683e-02, 5.696e-02, -2.095e-01, 2.013e-02)); + r += mul(s6_6, M4(-6.952e-02, 8.277e-02, 6.191e-02, -1.102e-01, 6.552e-02, -1.717e-02, -4.441e-02, 3.744e-02, -1.211e-03, 2.891e-02, 2.197e-02, 3.364e-02, 7.535e-03, -6.770e-03, -6.397e-02, -1.047e-01)); + r += mul(s6_7, M4(-1.045e-02, 1.324e-01, -6.323e-03, -1.891e-01, -1.024e-01, 8.601e-03, -3.786e-02, -4.529e-02, -1.346e-01, -2.908e-02, -1.530e-03, -5.581e-02, -5.267e-02, 4.319e-02, 6.115e-04, -4.299e-02)); + r += mul(s6_8, M4(-1.878e-02, 2.048e-01, -4.330e-02, -4.895e-02, -1.007e-02, 1.237e-02, -2.172e-02, -8.858e-03, 1.403e-02, -6.982e-02, 1.142e-01, -2.795e-03, 6.235e-02, 2.970e-02, -8.211e-02, 3.042e-02)); + r += mul(s7_0, M4(-5.024e-02, -3.928e-02, 8.248e-03, 3.024e-02, 5.021e-02, -2.346e-02, 6.404e-02, 1.671e-01, 4.870e-02, 4.704e-03, -2.799e-02, -2.238e-02, 3.293e-02, -5.885e-02, 8.025e-02, 8.911e-03)); + r += mul(s7_1, M4(4.734e-02, -5.602e-02, -8.165e-02, 8.589e-02, -6.049e-02, -2.462e-01, 9.227e-03, -2.058e-01, -2.801e-02, 1.579e-01, 3.732e-02, -4.812e-02, 1.353e-01, -3.953e-02, -9.637e-02, 8.001e-02)); + r += mul(s7_2, M4(-3.973e-03, -1.591e-02, 1.366e-03, 4.087e-02, 5.343e-02, -5.346e-02, -5.055e-02, 2.863e-02, 5.103e-02, 1.944e-02, 6.019e-02, -8.776e-02, 3.405e-02, 2.922e-02, 1.362e-01, -7.645e-03)); + r += mul(s7_3, M4(-4.861e-04, -4.012e-02, 4.286e-02, -4.906e-03, 1.627e-01, -1.097e-01, -1.243e-01, -6.100e-02, 6.076e-02, -1.064e-02, 2.648e-02, 6.042e-02, -7.793e-02, -1.243e-02, 6.117e-02, -5.637e-02)); + r += mul(s7_4, M4(2.827e-02, 3.054e-02, 1.967e-02, 1.132e-01, 1.891e-01, -1.120e-01, 6.793e-02, 1.781e-01, -6.503e-02, 3.875e-02, -1.368e-01, -6.066e-02, 8.103e-02, 4.358e-02, -1.609e-01, 3.028e-02)); + r += mul(s7_5, M4(-2.243e-02, 6.646e-03, 4.602e-02, -8.031e-02, -2.034e-01, 2.752e-02, 8.253e-02, 5.569e-03, 1.200e-01, 1.976e-02, 1.460e-01, 1.773e-01, 5.920e-02, -8.765e-02, -3.917e-02, 2.605e-02)); + r += mul(s7_6, M4(3.516e-02, -1.987e-02, -3.543e-02, 5.867e-02, -3.802e-02, 5.770e-02, 1.447e-01, 2.270e-02, 1.202e-02, 6.453e-02, -6.165e-03, -1.177e-02, 9.626e-02, -5.555e-02, -9.464e-02, -6.232e-02)); + r += mul(s7_7, M4(-8.539e-04, -4.631e-02, 5.124e-02, 2.576e-02, -7.279e-02, 1.385e-01, -1.287e-01, -5.966e-02, -9.662e-03, 5.361e-02, -5.509e-03, -2.013e-02, 6.722e-02, -7.907e-02, -6.298e-02, -6.806e-02)); + r += mul(s7_8, M4(-1.113e-02, -1.198e-02, 6.223e-03, -3.262e-04, 2.197e-02, 1.376e-01, -2.326e-02, -9.016e-02, 2.050e-02, 3.226e-02, 3.427e-02, -4.716e-02, 7.373e-02, 5.725e-02, 5.104e-02, 5.237e-02)); + r += V4(2.325e-03, -1.910e-04, -2.453e-02, -3.947e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.452e-02, -2.213e-02, 3.418e-02, -1.868e-03, 5.112e-02, 2.362e-02, 6.041e-02, 2.931e-02, 3.201e-02, -6.846e-02, 1.007e-02, -1.945e-02, -1.491e-01, -1.026e-01, -6.356e-02, 8.083e-03)); + r += mul(s0_1, M4(-3.302e-02, 1.039e-02, -7.105e-02, 4.512e-02, 1.699e-02, -4.399e-02, -1.129e-01, -3.180e-03, 4.396e-02, -2.110e-02, -2.890e-02, -1.940e-02, 1.083e-01, 1.618e-03, -4.433e-02, 2.564e-02)); + r += mul(s0_2, M4(-2.591e-02, 4.649e-03, -1.796e-04, -1.397e-04, 3.523e-03, 3.807e-03, -2.256e-02, -2.057e-02, 2.152e-02, -4.283e-02, -4.498e-03, 5.868e-02, 8.216e-02, -1.374e-02, -1.599e-02, 1.185e-01)); + r += mul(s0_3, M4(-2.138e-02, -2.510e-02, 3.635e-03, 5.926e-02, 1.948e-02, -1.106e-02, 2.721e-02, -6.293e-03, -1.257e-01, 3.189e-02, -2.102e-02, -1.052e-01, -1.488e-01, -2.395e-02, -1.034e-01, 4.175e-02)); + r += mul(s0_4, M4(3.801e-02, -5.686e-02, -2.110e-02, 8.493e-02, -4.898e-02, -1.377e-01, 2.817e-02, -1.992e-01, 2.107e-01, 2.706e-01, -6.196e-02, -1.218e-01, 1.192e-01, 1.155e-01, -1.004e-01, 1.263e-01)); + r += mul(s0_5, M4(6.634e-02, -6.558e-02, -5.310e-02, 8.774e-02, -1.082e-01, -1.092e-01, 9.039e-02, 4.413e-02, -1.282e-01, 4.255e-02, 2.607e-02, 3.934e-02, -3.736e-02, -3.743e-02, -7.450e-02, 5.096e-02)); + r += mul(s0_6, M4(-3.132e-02, 1.058e-01, 5.335e-02, 3.218e-02, -8.362e-03, -1.239e-02, 7.032e-02, -2.217e-02, -9.698e-02, -2.439e-02, -2.590e-02, -1.107e-01, -1.166e-02, 1.432e-03, -2.418e-02, -4.755e-02)); + r += mul(s0_7, M4(1.094e-02, -5.756e-02, -1.564e-02, -5.575e-02, -5.384e-02, 5.222e-03, -1.447e-02, 3.893e-02, -1.928e-01, 3.496e-02, 2.032e-01, 5.912e-02, 6.310e-02, 1.259e-02, -1.224e-02, 8.429e-02)); + r += mul(s0_8, M4(-6.143e-02, 8.296e-02, 3.608e-02, -6.786e-03, -8.912e-02, 7.147e-02, 3.735e-03, -1.193e-02, 6.371e-02, -1.715e-03, 6.778e-02, 4.096e-02, -2.178e-03, -2.116e-02, 3.634e-02, 2.353e-02)); + r += mul(s1_0, M4(3.726e-02, -2.700e-02, 5.479e-02, 2.802e-02, -4.521e-02, 6.023e-02, 1.441e-01, -3.327e-02, 7.922e-03, 9.608e-02, 6.132e-02, 6.058e-02, -1.181e-01, 1.375e-02, 3.535e-02, 1.659e-02)); + r += mul(s1_1, M4(9.419e-02, 4.554e-02, -1.226e-02, 3.050e-02, 1.130e-01, -5.704e-02, 4.752e-02, -1.253e-01, -5.118e-02, 4.104e-02, -6.749e-02, -5.343e-02, 5.244e-02, 6.944e-02, 1.140e-01, -9.239e-02)); + r += mul(s1_2, M4(-4.040e-02, 5.244e-02, 2.012e-02, 5.836e-02, 4.047e-02, 3.844e-02, 2.243e-01, -2.281e-02, 6.051e-02, -4.283e-02, 8.320e-02, 4.852e-02, -6.591e-03, 1.960e-02, 1.263e-01, -3.942e-02)); + r += mul(s1_3, M4(2.830e-02, 8.144e-02, -3.719e-02, -6.285e-02, -8.031e-02, -2.416e-02, -1.323e-01, -4.774e-02, 2.482e-03, 1.067e-01, 8.261e-02, 7.856e-02, -4.436e-02, 6.318e-02, -2.072e-02, 4.913e-03)); + r += mul(s1_4, M4(1.982e-02, 1.753e-01, -5.781e-02, 3.787e-01, -9.268e-02, -4.545e-01, 2.008e-01, -1.498e-02, 3.232e-02, -4.611e-02, -3.575e-01, -1.095e-01, 1.653e-02, -1.507e-02, -1.033e-03, -9.718e-02)); + r += mul(s1_5, M4(-2.268e-01, -9.784e-02, -1.059e-01, -3.203e-02, 5.056e-02, 1.258e-01, 4.908e-02, -2.078e-01, 9.348e-02, 1.395e-01, -1.733e-01, 7.156e-02, -1.265e-01, 8.872e-03, -9.426e-02, -9.058e-02)); + r += mul(s1_6, M4(-4.677e-02, -2.115e-02, -1.070e-02, 7.677e-02, 1.224e-02, -8.576e-02, 9.551e-02, -6.854e-02, 7.423e-02, 2.429e-02, 5.618e-02, -2.685e-02, -9.624e-02, -6.545e-02, 4.498e-02, -3.158e-02)); + r += mul(s1_7, M4(-1.134e-01, 8.718e-02, -1.208e-01, -1.686e-02, 1.400e-01, -3.213e-01, 9.984e-02, 1.925e-01, 9.047e-02, -7.602e-02, -1.353e-01, 3.362e-02, 3.519e-03, -7.485e-02, -4.753e-03, 3.789e-02)); + r += mul(s1_8, M4(-1.636e-01, -5.761e-02, 8.762e-02, 3.048e-02, 8.438e-02, 3.496e-02, 8.871e-02, -1.531e-01, 2.154e-02, 2.129e-02, -1.388e-01, -1.133e-02, -9.511e-02, -4.549e-02, -1.384e-02, -5.895e-02)); + r += mul(s2_0, M4(4.882e-03, 1.283e-01, -2.763e-02, 9.043e-02, -2.804e-03, -5.500e-03, 9.404e-02, 2.569e-02, 3.186e-02, 5.725e-02, -2.883e-02, -8.733e-02, -5.914e-02, 7.541e-02, -2.237e-03, 1.062e-02)); + r += mul(s2_1, M4(2.729e-02, 1.656e-02, 4.354e-02, 6.795e-02, -6.394e-02, 6.677e-04, -9.367e-02, -7.237e-03, 1.514e-03, -6.692e-02, 5.455e-02, -8.540e-03, 4.207e-02, 1.632e-01, 5.576e-02, -2.645e-02)); + r += mul(s2_2, M4(-1.607e-02, -2.096e-02, -1.031e-01, -1.291e-02, 7.235e-02, -3.595e-04, -1.461e-03, 1.701e-02, -1.732e-03, 8.503e-02, -1.746e-03, 1.168e-03, 8.124e-02, -3.384e-02, -1.580e-02, 9.406e-02)); + r += mul(s2_3, M4(-4.748e-02, 2.919e-02, 1.506e-01, 1.201e-01, 3.544e-03, 6.126e-02, 1.094e-01, 7.174e-02, -3.791e-02, 1.865e-01, -6.470e-02, -1.531e-01, 1.565e-02, 3.920e-02, 4.791e-02, 4.874e-02)); + r += mul(s2_4, M4(-6.980e-02, -3.444e-02, -7.843e-02, -7.325e-02, 1.878e-01, -9.045e-03, -8.041e-02, -2.307e-02, -5.940e-02, -3.866e-02, 6.295e-02, 1.268e-01, 1.971e-02, 1.880e-01, -7.046e-02, 7.267e-02)); + r += mul(s2_5, M4(1.551e-01, -1.490e-01, 4.663e-02, 2.092e-03, 4.003e-02, -9.856e-03, -1.524e-01, 8.802e-02, -6.113e-02, 1.032e-01, -5.811e-02, -1.167e-01, -1.040e-01, -1.417e-02, -7.664e-02, 6.155e-02)); + r += mul(s2_6, M4(9.057e-02, 1.014e-01, -9.122e-03, -1.784e-02, 3.977e-02, 4.477e-02, 4.628e-02, 2.379e-02, -1.242e-02, 6.113e-02, -9.431e-03, -5.879e-02, 2.596e-02, -2.014e-02, 3.798e-02, 2.791e-02)); + r += mul(s2_7, M4(1.446e-02, -1.929e-02, 2.062e-02, -5.054e-02, 8.961e-02, -2.919e-02, -3.591e-02, -4.359e-05, 8.416e-02, -9.472e-03, -6.747e-02, -4.886e-02, 4.601e-02, 2.285e-01, -4.229e-02, -1.883e-02)); + r += mul(s2_8, M4(2.301e-02, -8.001e-03, -3.776e-02, -4.969e-02, -6.737e-02, -7.542e-02, 9.241e-02, 4.902e-05, 1.421e-02, 1.136e-01, -1.230e-01, -2.179e-02, -9.226e-02, 1.140e-01, 5.157e-03, -4.170e-02)); + r += mul(s3_0, M4(5.915e-02, 1.352e-01, -6.930e-02, -3.117e-02, 6.114e-02, -1.653e-01, 1.082e-01, 8.015e-02, 3.380e-02, 3.502e-03, 4.469e-02, 1.862e-02, 6.960e-03, 9.355e-03, 5.013e-02, 9.937e-02)); + r += mul(s3_1, M4(-1.468e-01, 4.873e-02, 1.114e-01, -5.181e-03, 5.865e-02, -9.539e-02, -8.387e-02, 5.829e-02, 6.005e-02, -1.361e-01, -8.120e-02, -8.009e-02, 1.379e-01, -1.110e-01, -3.764e-02, 7.363e-02)); + r += mul(s3_2, M4(-1.323e-02, 1.469e-01, 4.694e-02, 1.735e-02, 3.762e-02, -9.231e-02, -7.239e-02, 1.063e-02, 4.043e-02, 4.433e-02, -5.364e-02, -3.767e-02, -3.601e-02, -6.791e-02, 7.434e-03, 8.255e-02)); + r += mul(s3_3, M4(-3.209e-02, 8.216e-02, 1.743e-01, -5.094e-02, 1.137e-01, -2.192e-01, -4.207e-02, 8.447e-03, -1.389e-02, 1.533e-01, 6.047e-03, -6.254e-03, -1.571e-02, 1.528e-02, 4.149e-02, 3.927e-02)); + r += mul(s3_4, M4(1.320e-01, -2.930e-02, -1.799e-02, 2.068e-01, 4.463e-02, -1.153e-03, -1.415e-02, -1.850e-01, 2.495e-01, 4.801e-02, -2.517e-01, 1.030e-01, -3.563e-01, 2.625e-01, 3.427e-02, 5.950e-02)); + r += mul(s3_5, M4(-7.095e-02, 5.167e-02, 1.356e-02, -5.700e-02, 1.530e-01, -1.915e-01, -1.541e-02, 1.587e-01, -3.247e-02, 1.876e-01, -2.764e-01, -2.076e-01, 3.612e-02, 1.690e-01, 8.827e-02, 6.959e-02)); + r += mul(s3_6, M4(-5.147e-02, 4.759e-02, 3.091e-02, 1.571e-02, -4.751e-02, 2.807e-02, -1.546e-02, -2.250e-02, -3.121e-03, 1.554e-02, -8.659e-02, -1.952e-01, 4.762e-04, -3.163e-02, -8.135e-02, 6.519e-02)); + r += mul(s3_7, M4(-2.127e-02, -1.332e-02, 2.117e-02, 1.499e-02, 4.184e-03, 6.348e-03, 4.794e-02, -4.764e-03, -1.938e-01, 3.070e-02, 5.067e-02, -1.063e-01, -7.583e-02, 9.846e-02, -5.883e-02, 1.245e-02)); + r += mul(s3_8, M4(-7.546e-02, -6.857e-02, 4.869e-02, 6.144e-04, 2.149e-02, -1.723e-01, 3.948e-02, 1.033e-01, -9.994e-02, -1.799e-02, -4.577e-02, 1.503e-02, 1.917e-02, 2.287e-02, -3.749e-02, 3.704e-03)); + r += mul(s4_0, M4(2.386e-02, -6.699e-02, 1.179e-01, 1.217e-01, -3.840e-03, -9.233e-02, -5.570e-02, -4.940e-02, -3.374e-02, 1.446e-02, -3.170e-02, -2.923e-02, 6.858e-03, 1.429e-02, -1.063e-01, 3.806e-02)); + r += mul(s4_1, M4(6.160e-02, 4.078e-02, -9.278e-02, 8.813e-03, 4.014e-02, 8.360e-02, 1.578e-01, 2.356e-02, 1.554e-02, -1.546e-02, 1.870e-01, -2.622e-02, 1.007e-01, -2.018e-02, 3.210e-02, -1.491e-01)); + r += mul(s4_2, M4(4.191e-02, 8.331e-02, 7.167e-02, -2.731e-02, 2.408e-03, -9.919e-03, -1.365e-03, -9.990e-03, -6.103e-02, -6.417e-02, -4.156e-02, 4.083e-02, 1.673e-01, 1.464e-02, -5.283e-02, -1.810e-02)); + r += mul(s4_3, M4(1.847e-01, -5.065e-02, 1.175e-01, 1.171e-01, 1.287e-02, -1.970e-01, 1.837e-02, 9.569e-02, -5.157e-02, 1.203e-01, 8.410e-02, -6.370e-02, 6.671e-02, 1.668e-01, 3.538e-02, 6.047e-02)); + r += mul(s4_4, M4(-1.742e-01, 1.513e-01, 1.198e-01, 3.599e-02, -5.566e-02, 9.012e-02, -1.984e-01, -2.470e-02, 1.116e-01, 3.129e-01, -8.667e-02, 1.346e-01, 1.939e-01, 2.393e-02, -9.264e-02, -2.492e-01)); + r += mul(s4_5, M4(4.079e-02, 7.863e-02, 3.043e-02, 8.451e-02, 7.184e-02, 9.325e-02, -3.495e-02, 6.198e-02, -8.641e-02, -2.968e-02, 1.519e-01, -1.383e-01, -5.764e-02, -1.684e-01, -7.344e-02, -1.298e-01)); + r += mul(s4_6, M4(2.949e-02, 5.492e-02, -6.664e-02, -1.867e-02, -2.591e-02, -2.480e-02, 2.903e-02, 1.387e-01, 5.483e-02, -1.129e-01, 1.877e-02, 7.596e-03, -1.058e-01, -1.558e-01, 9.410e-02, 6.218e-02)); + r += mul(s4_7, M4(7.291e-02, -9.164e-02, -2.596e-02, 1.246e-01, -1.701e-02, -1.238e-01, -8.126e-02, -1.795e-02, -6.595e-02, 1.063e-01, 2.183e-02, 5.273e-02, 1.348e-02, -5.259e-02, 1.109e-01, -1.108e-01)); + r += mul(s4_8, M4(-8.273e-02, -2.826e-02, 5.756e-02, -6.913e-02, -9.518e-02, 3.518e-02, -4.045e-02, -1.886e-03, -5.751e-03, 1.910e-02, 8.394e-03, -5.276e-02, 5.724e-02, 9.454e-04, 3.042e-02, 1.589e-03)); + r += mul(s5_0, M4(-4.081e-02, 2.613e-02, -1.001e-02, -7.691e-02, 6.423e-03, -7.491e-03, 7.306e-02, 5.021e-02, 5.762e-02, -7.077e-02, -6.219e-02, 1.087e-01, 4.356e-02, -1.110e-02, 2.045e-02, -4.486e-02)); + r += mul(s5_1, M4(-8.378e-02, 9.047e-02, 7.844e-02, -1.880e-01, 9.810e-02, 1.244e-01, 4.427e-02, 8.744e-02, 9.155e-02, 9.292e-02, 8.227e-02, 8.444e-02, -1.160e-02, 2.979e-02, 5.415e-02, -1.836e-02)); + r += mul(s5_2, M4(9.188e-02, 4.463e-02, 3.819e-02, 2.323e-02, -8.297e-03, -5.695e-02, -3.495e-02, -6.575e-03, -2.870e-02, 3.126e-02, -7.547e-02, 4.411e-02, 5.214e-03, 4.028e-02, -2.259e-02, 3.480e-02)); + r += mul(s5_3, M4(-3.300e-02, 4.670e-02, -6.394e-02, -7.687e-03, 8.160e-02, -1.323e-01, -3.512e-02, 6.628e-02, 5.705e-03, 2.856e-03, 9.416e-02, -2.114e-02, 2.401e-02, 9.146e-03, 7.629e-03, -6.669e-02)); + r += mul(s5_4, M4(-1.094e-01, 2.638e-01, 3.507e-02, 5.298e-03, -1.671e-01, 1.657e-01, 4.612e-02, -6.475e-02, 3.626e-01, -6.742e-02, 2.930e-01, 1.344e-01, -9.630e-02, -5.689e-02, -4.070e-02, -1.976e-01)); + r += mul(s5_5, M4(5.550e-02, 7.444e-02, 6.249e-02, 9.736e-02, -9.858e-02, -3.331e-02, -3.191e-02, -1.121e-02, -2.269e-01, -2.242e-01, -6.816e-04, -3.120e-01, -1.583e-02, 8.640e-03, -3.154e-02, -1.502e-01)); + r += mul(s5_6, M4(3.199e-02, 6.824e-02, -5.314e-02, 6.801e-03, -1.462e-01, 7.690e-02, 5.163e-02, 3.599e-02, 1.236e-02, -5.421e-02, 2.565e-02, -3.847e-03, -3.625e-02, 4.862e-02, -1.461e-02, 1.887e-02)); + r += mul(s5_7, M4(1.903e-02, 1.049e-01, -9.422e-02, 7.068e-03, 7.797e-02, -2.580e-02, -2.133e-02, -8.385e-02, 2.896e-01, -7.134e-02, -3.428e-02, 3.129e-02, -8.228e-02, 6.240e-02, 1.937e-02, -5.818e-02)); + r += mul(s5_8, M4(2.743e-02, -1.725e-02, 2.877e-02, 1.920e-02, -9.789e-02, -9.666e-02, -1.232e-02, -1.358e-01, -5.698e-02, -6.617e-02, -2.075e-01, -9.234e-02, 7.224e-02, 5.932e-02, 3.815e-02, 5.836e-03)); + r += mul(s6_0, M4(1.294e-01, -1.894e-01, 1.271e-02, -3.241e-02, 6.861e-02, 7.944e-03, -6.318e-02, 7.642e-02, -1.590e-03, 2.999e-02, -1.520e-01, -3.941e-02, 8.424e-02, 1.252e-01, 3.210e-02, 3.888e-02)); + r += mul(s6_1, M4(7.113e-03, -3.657e-01, -5.681e-02, -2.579e-02, 3.934e-03, -6.467e-02, -4.227e-02, -1.791e-02, -4.824e-02, -4.570e-02, 1.281e-01, -1.851e-01, -7.617e-02, -2.520e-02, 4.820e-02, -3.522e-02)); + r += mul(s6_2, M4(7.420e-02, -1.187e-01, 1.931e-02, -7.240e-02, 1.547e-02, 1.421e-02, -2.403e-02, 5.392e-02, -2.654e-02, -3.127e-02, 5.019e-02, -9.106e-03, -2.130e-03, 7.296e-02, -1.444e-02, 4.740e-02)); + r += mul(s6_3, M4(5.434e-02, -9.763e-02, -1.274e-01, 3.995e-02, -4.936e-03, -9.896e-02, -1.038e-01, -9.221e-03, 1.122e-02, 7.027e-02, -1.762e-01, 9.409e-03, -1.257e-02, -4.714e-02, 1.009e-01, -5.563e-02)); + r += mul(s6_4, M4(2.441e-01, -1.990e-01, 8.441e-02, 1.368e-01, 9.119e-02, 6.636e-03, -4.092e-02, 3.751e-02, 8.522e-02, 2.960e-02, -5.376e-02, -1.098e-01, 1.705e-01, -1.020e-01, -8.451e-02, 1.714e-01)); + r += mul(s6_5, M4(1.497e-01, -7.163e-02, -7.517e-02, 1.158e-01, -4.815e-02, 4.708e-02, -6.679e-03, -1.606e-02, 6.199e-02, -6.321e-02, -1.431e-02, -8.619e-02, -1.887e-01, 1.032e-01, -1.129e-01, -5.719e-02)); + r += mul(s6_6, M4(1.094e-01, -9.081e-02, -1.038e-01, -3.552e-02, -5.055e-03, -7.073e-02, -3.967e-02, 1.570e-02, -4.736e-02, -9.455e-02, 1.999e-02, 8.959e-03, 1.274e-02, 9.704e-02, 3.143e-02, -6.793e-03)); + r += mul(s6_7, M4(2.531e-01, -6.100e-02, -2.354e-01, -6.290e-02, -4.297e-02, 2.630e-03, 1.902e-02, -4.228e-02, -5.898e-02, 3.427e-02, 2.461e-02, -7.373e-02, 1.105e-01, -1.016e-01, 1.337e-02, -7.272e-02)); + r += mul(s6_8, M4(9.801e-02, -5.510e-02, -1.810e-01, -5.994e-02, 7.279e-02, -1.181e-02, 6.652e-02, 2.043e-03, 7.534e-02, -1.073e-01, 1.965e-02, -4.311e-02, -1.470e-01, 4.022e-02, 1.256e-02, -1.590e-01)); + r += mul(s7_0, M4(2.198e-03, 1.195e-02, 4.987e-02, 2.487e-02, -3.843e-02, 6.466e-02, 6.872e-03, -4.453e-02, -4.167e-02, 3.627e-02, 9.219e-03, -1.346e-02, 7.166e-02, 2.580e-02, -2.460e-03, 1.006e-01)); + r += mul(s7_1, M4(-1.733e-02, -1.171e-02, -9.107e-02, 7.854e-02, 5.693e-02, -1.864e-02, 1.852e-02, 5.034e-02, 7.212e-02, 3.279e-02, 7.340e-03, -1.160e-02, 1.476e-02, -1.143e-01, 5.309e-02, 8.046e-02)); + r += mul(s7_2, M4(1.709e-03, -9.215e-03, 3.640e-02, -2.015e-02, -7.813e-02, -2.006e-02, 5.107e-03, -3.323e-02, 3.283e-03, 1.799e-01, -6.751e-02, -3.501e-02, 5.822e-02, -1.271e-02, -1.269e-02, 3.323e-02)); + r += mul(s7_3, M4(-4.981e-02, 1.224e-01, 9.955e-03, 4.094e-02, -1.480e-01, -1.458e-01, -1.077e-01, 4.140e-02, 6.963e-02, -9.296e-03, -7.067e-02, 6.662e-02, -1.469e-01, -7.788e-02, 3.749e-02, -6.122e-02)); + r += mul(s7_4, M4(8.569e-02, -3.485e-03, 6.274e-02, 4.062e-02, -8.150e-02, -2.033e-02, -4.892e-02, -3.897e-02, -7.900e-02, 3.007e-02, -7.428e-02, -9.108e-02, 1.780e-01, -1.877e-01, -8.340e-02, 2.277e-01)); + r += mul(s7_5, M4(-2.498e-03, 4.742e-02, -1.618e-02, 5.209e-02, 7.432e-02, -5.888e-02, 5.732e-02, 2.196e-02, 2.111e-02, -1.725e-01, 5.155e-02, -7.693e-03, -1.316e-01, 6.684e-04, 3.329e-02, 4.710e-02)); + r += mul(s7_6, M4(-3.028e-03, -1.294e-02, 7.406e-03, 1.783e-02, 6.747e-03, 1.236e-02, 5.246e-02, 1.643e-01, -5.246e-02, 5.649e-03, -3.658e-02, -5.173e-02, -2.663e-02, -1.295e-01, 3.044e-02, -1.094e-03)); + r += mul(s7_7, M4(-1.570e-02, 1.167e-01, 2.844e-02, -6.654e-03, 1.367e-01, -2.043e-01, -9.955e-02, -1.525e-01, 6.153e-02, 7.433e-03, -1.944e-02, 1.738e-02, 6.937e-02, -2.045e-02, 1.065e-01, -1.348e-01)); + r += mul(s7_8, M4(-6.352e-03, 3.662e-02, 5.139e-02, 2.546e-02, 6.839e-02, -2.401e-02, -9.333e-02, -1.460e-01, 6.587e-02, 6.411e-02, -6.108e-02, -1.712e-02, -3.972e-02, -4.960e-02, 2.875e-02, 1.562e-02)); + r += V4(-2.817e-02, -3.278e-02, 5.487e-03, -3.455e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.094e-04, 3.656e-02, 2.530e-02, 1.321e-02, 3.186e-02, 2.905e-02, -3.188e-02, 2.978e-03, 1.053e-01, -6.066e-02, 4.463e-02, -9.486e-02, -5.617e-02, 1.871e-01, 1.274e-01, -8.033e-02)); + r += mul(s0_1, M4(3.014e-02, 1.179e-01, -4.175e-03, -4.586e-02, -6.619e-02, -1.521e-02, -1.647e-02, 9.131e-04, 2.558e-02, 4.775e-02, -5.385e-02, 4.971e-02, -8.648e-02, 7.845e-02, -3.333e-02, -3.736e-02)); + r += mul(s0_2, M4(2.093e-02, 1.867e-03, -5.568e-02, 6.354e-03, -1.903e-02, -1.263e-02, -7.210e-03, -2.144e-03, -3.158e-03, -2.480e-02, 1.009e-01, 2.064e-02, -3.645e-02, -2.622e-02, -5.732e-02, -2.596e-02)); + r += mul(s0_3, M4(-7.684e-02, 1.370e-01, -3.305e-02, -5.904e-03, -4.982e-03, -1.551e-01, 5.245e-02, -2.731e-02, -3.460e-02, 2.563e-02, -1.279e-01, 1.652e-04, -3.821e-01, 1.504e-01, -1.377e-02, -6.622e-02)); + r += mul(s0_4, M4(-1.214e-01, -8.551e-02, -1.371e-01, 3.295e-02, 7.233e-02, 9.524e-02, 1.523e-01, -1.300e-02, -9.606e-03, 1.983e-01, 2.064e-01, -8.872e-02, 4.599e-02, 1.161e-01, 1.250e-01, -1.654e-01)); + r += mul(s0_5, M4(-4.586e-02, -6.165e-02, -6.160e-02, 1.361e-02, 2.826e-02, 4.357e-02, 3.549e-02, 3.897e-02, -5.086e-02, -1.082e-01, 1.085e-01, 1.877e-01, -4.027e-02, -2.616e-02, 4.383e-02, -8.405e-02)); + r += mul(s0_6, M4(-7.785e-02, 4.741e-02, -3.778e-02, -8.051e-02, 1.224e-01, -5.179e-02, -2.464e-02, 6.087e-02, -7.910e-02, 1.355e-01, -6.820e-02, 5.459e-02, -7.689e-02, 7.860e-02, 7.812e-02, -2.411e-02)); + r += mul(s0_7, M4(-1.077e-01, -6.815e-02, -5.486e-02, -2.057e-02, 5.878e-02, 1.066e-01, -5.390e-02, 4.218e-02, -1.593e-01, 6.220e-02, 2.660e-02, 3.090e-02, -1.283e-01, -1.931e-02, -6.371e-02, -2.323e-02)); + r += mul(s0_8, M4(3.539e-02, 2.408e-04, 1.936e-02, -3.251e-02, -1.620e-02, -8.020e-02, 1.042e-01, 6.912e-04, -6.654e-02, 1.033e-01, 2.011e-01, -8.273e-02, -3.157e-02, 2.211e-02, 4.827e-02, 3.244e-02)); + r += mul(s1_0, M4(-2.871e-02, -2.896e-02, -4.957e-02, 9.638e-03, -6.017e-02, 4.270e-01, 1.089e-01, -1.574e-01, 5.912e-02, -8.238e-03, 6.139e-02, -4.290e-02, -6.069e-02, 1.503e-01, 1.930e-02, 3.593e-02)); + r += mul(s1_1, M4(8.551e-02, -1.489e-01, -1.031e-02, -5.748e-02, -1.455e-02, 8.760e-02, -6.351e-02, 2.076e-01, -6.131e-02, 1.324e-02, 1.597e-02, -9.106e-03, -7.278e-02, 3.747e-04, 1.872e-02, 7.650e-02)); + r += mul(s1_2, M4(1.915e-02, -7.556e-02, 3.498e-02, 6.874e-02, 3.128e-02, 2.021e-01, 4.100e-02, 4.759e-02, 2.797e-03, 7.574e-02, -2.713e-02, -6.555e-02, 4.066e-02, 3.684e-03, -6.114e-02, 9.379e-05)); + r += mul(s1_3, M4(-1.447e-02, 2.614e-03, 1.120e-01, -5.897e-02, 5.813e-02, 2.165e-01, 2.444e-01, 8.125e-02, -1.264e-03, -3.225e-02, -1.145e-01, -8.569e-02, -1.534e-01, -1.499e-01, -1.726e-01, 2.016e-01)); + r += mul(s1_4, M4(3.678e-02, -2.782e-01, 1.807e-01, 1.374e-01, 1.275e-01, 1.405e-01, -1.047e-01, 4.539e-02, -1.196e-01, -2.815e-02, 5.395e-02, 3.457e-02, 1.847e-01, -1.763e-01, -9.819e-02, 2.932e-01)); + r += mul(s1_5, M4(-2.290e-02, 5.359e-02, 1.682e-01, -4.842e-02, -2.260e-01, 1.002e-01, 1.145e-02, -2.250e-01, 6.991e-03, 1.070e-01, 1.103e-01, -1.632e-02, 8.423e-02, 1.185e-01, 4.030e-02, 4.863e-03)); + r += mul(s1_6, M4(1.285e-01, 1.951e-01, 9.006e-02, -8.574e-02, 7.778e-02, 2.669e-01, 1.092e-01, -2.875e-02, 2.324e-02, 5.815e-02, -4.646e-02, 2.474e-02, 6.236e-03, -4.835e-02, 2.692e-02, -1.556e-02)); + r += mul(s1_7, M4(6.567e-02, -1.771e-01, 1.636e-01, 1.389e-01, 1.923e-02, -2.261e-02, -1.336e-01, 7.837e-02, 7.038e-02, 7.848e-02, 7.020e-02, -4.635e-02, 2.443e-02, -1.174e-01, -1.740e-02, 2.764e-02)); + r += mul(s1_8, M4(4.308e-02, -1.546e-02, -3.099e-02, 3.656e-02, 1.635e-01, 3.060e-01, 9.050e-02, 1.285e-01, -1.200e-01, -7.059e-02, 7.957e-03, 4.086e-02, 9.437e-02, 5.796e-02, -4.148e-03, 3.257e-02)); + r += mul(s2_0, M4(-2.373e-02, -4.504e-02, -1.705e-03, 5.850e-02, 4.369e-02, -2.675e-02, -1.464e-02, 1.841e-03, -2.994e-02, -8.266e-03, 1.376e-03, -7.074e-02, 5.712e-03, 6.283e-02, -1.036e-02, -4.803e-02)); + r += mul(s2_1, M4(1.495e-02, 1.791e-01, 4.742e-02, -1.452e-01, -1.541e-02, 1.155e-01, 5.500e-02, -1.145e-01, -9.387e-03, 1.789e-02, 3.983e-02, 6.136e-02, -1.055e-01, 1.866e-01, -4.713e-02, -5.747e-02)); + r += mul(s2_2, M4(-1.225e-01, 2.266e-02, 1.181e-02, -7.051e-03, -6.077e-02, 9.735e-02, -7.292e-02, -8.954e-02, 1.563e-02, -2.959e-03, 2.662e-02, -2.103e-02, 8.584e-03, -9.759e-02, -7.741e-02, 3.616e-02)); + r += mul(s2_3, M4(3.635e-02, -1.645e-01, -1.969e-02, 4.328e-03, 4.810e-02, 3.235e-02, -1.383e-01, -2.948e-02, 5.382e-02, -8.886e-02, 8.926e-03, -2.520e-02, 6.535e-02, -7.581e-02, -2.856e-02, 1.286e-02)); + r += mul(s2_4, M4(5.513e-02, 7.227e-02, 2.869e-02, 2.017e-02, 8.521e-02, 2.349e-02, 2.405e-01, -1.228e-01, 2.694e-02, 1.984e-02, 6.508e-02, -5.179e-02, 1.851e-02, 1.414e-01, 4.834e-01, -7.654e-02)); + r += mul(s2_5, M4(-4.136e-02, -6.083e-02, -4.117e-02, 4.112e-03, -6.040e-02, -1.106e-01, 3.498e-02, 4.838e-02, 4.583e-02, 5.135e-02, 3.665e-02, 1.567e-01, 7.413e-02, -1.082e-01, 2.292e-01, 1.199e-01)); + r += mul(s2_6, M4(-5.964e-03, -1.046e-01, 1.439e-01, -3.429e-02, -3.755e-02, 4.449e-02, 8.300e-02, 4.855e-02, 2.292e-02, -4.491e-02, -5.796e-02, 4.857e-02, 5.673e-03, -6.138e-02, 2.884e-02, -6.701e-04)); + r += mul(s2_7, M4(2.151e-02, 1.747e-01, -1.631e-01, 9.152e-02, 5.497e-02, -6.721e-02, -1.185e-02, 6.380e-02, 1.387e-02, 1.475e-01, -2.808e-02, -9.629e-02, 7.208e-02, 1.401e-01, -7.801e-02, 6.167e-02)); + r += mul(s2_8, M4(-4.352e-02, 1.319e-03, 8.084e-02, 3.737e-03, 6.664e-02, 1.447e-01, -1.793e-02, -4.077e-02, -1.716e-02, -6.143e-02, 4.823e-03, 5.037e-02, -9.153e-03, 5.060e-02, 1.001e-01, 4.966e-02)); + r += mul(s3_0, M4(5.649e-02, -3.891e-02, -2.430e-02, -2.174e-02, 1.105e-01, -3.841e-02, 4.498e-02, -5.182e-02, 9.689e-03, -9.495e-02, 5.334e-03, -6.659e-02, 3.015e-02, 4.723e-02, 1.369e-02, -1.516e-02)); + r += mul(s3_1, M4(1.232e-02, -6.351e-02, -1.843e-01, -8.472e-02, 1.921e-02, -6.587e-02, -1.244e-02, -3.887e-02, 2.925e-04, 1.065e-01, 3.573e-02, 4.122e-02, 6.901e-03, -7.812e-02, 7.476e-02, 4.080e-02)); + r += mul(s3_2, M4(-6.150e-03, 3.499e-02, -5.778e-02, -7.025e-02, 2.570e-02, -2.516e-02, -5.620e-02, -2.414e-02, -8.359e-03, -2.838e-02, 8.285e-02, 1.164e-01, -8.725e-03, 2.962e-02, 5.898e-02, -1.734e-02)); + r += mul(s3_3, M4(4.642e-02, 6.787e-02, -1.394e-01, -1.438e-03, 3.154e-02, 2.288e-02, -1.279e-02, 5.279e-02, 7.700e-02, 9.242e-02, 6.436e-02, -1.958e-02, 2.512e-02, -1.689e-01, 1.819e-01, -4.532e-02)); + r += mul(s3_4, M4(-3.873e-02, -1.275e-01, -2.041e-01, -3.506e-02, 2.099e-02, 3.069e-01, 1.333e-01, -2.164e-01, 3.009e-02, -2.383e-02, 1.660e-01, 7.033e-03, -1.980e-01, 1.627e-01, -3.039e-02, -1.680e-01)); + r += mul(s3_5, M4(-1.979e-02, -3.970e-02, 1.013e-01, 1.470e-02, -8.796e-03, -1.388e-01, -2.466e-01, -1.449e-02, -4.928e-02, -1.117e-01, 6.766e-03, 2.252e-01, -8.004e-02, -1.170e-01, 1.027e-02, -4.645e-02)); + r += mul(s3_6, M4(3.630e-02, 1.871e-02, -7.153e-02, -1.659e-02, -2.352e-02, 5.419e-02, 6.725e-02, 5.810e-03, -9.791e-02, -6.364e-03, 1.147e-01, 1.143e-01, 7.626e-02, 6.141e-02, 3.847e-03, 4.719e-02)); + r += mul(s3_7, M4(2.607e-02, 1.518e-02, 4.637e-02, 2.521e-02, -5.243e-02, 2.306e-02, -4.595e-02, -5.553e-02, 2.664e-02, 1.821e-01, 1.098e-01, -1.458e-01, -2.127e-01, -1.272e-02, 6.474e-02, 6.118e-02)); + r += mul(s3_8, M4(2.075e-02, 1.177e-02, -4.928e-02, 1.140e-02, -1.305e-01, -2.648e-03, 1.769e-02, -1.216e-01, 1.441e-01, 3.274e-02, 3.489e-02, -6.273e-02, -7.208e-02, 1.051e-01, 1.537e-01, -1.123e-01)); + r += mul(s4_0, M4(7.110e-02, 4.850e-02, -1.157e-02, 4.185e-02, 5.356e-02, 4.014e-02, 7.733e-02, -1.246e-02, -2.777e-02, -2.533e-02, 2.298e-02, -2.722e-02, -1.042e-01, 3.940e-02, 9.855e-02, -1.004e-01)); + r += mul(s4_1, M4(2.769e-02, 4.162e-02, -1.011e-01, 1.103e-01, -4.562e-02, -2.646e-02, -3.413e-02, 4.516e-02, -5.508e-02, 1.650e-01, 6.129e-02, 1.280e-01, -1.667e-01, 6.874e-02, -1.240e-01, 1.050e-01)); + r += mul(s4_2, M4(2.617e-02, 1.081e-02, 5.938e-02, -2.068e-02, 9.066e-02, 5.606e-02, 1.225e-02, 6.061e-03, -2.554e-03, -1.691e-02, 4.391e-02, 7.897e-03, -3.455e-02, 4.846e-02, -9.722e-02, -4.085e-02)); + r += mul(s4_3, M4(1.545e-01, -1.108e-01, 2.369e-01, 3.505e-02, -6.744e-02, 8.668e-02, -6.337e-02, -2.470e-03, 1.048e-01, 3.657e-03, 6.390e-03, -7.144e-02, -2.276e-02, -1.648e-01, -2.147e-01, -1.581e-03)); + r += mul(s4_4, M4(1.460e-01, 9.660e-03, -1.189e-01, 3.128e-02, 1.497e-01, 5.374e-02, 2.867e-02, 3.194e-02, -1.442e-01, -4.870e-02, 7.521e-04, 2.193e-01, 5.682e-01, -3.875e-01, 4.589e-01, 4.727e-01)); + r += mul(s4_5, M4(2.797e-02, -5.300e-02, -2.445e-01, -5.600e-02, -6.601e-02, -2.028e-02, -3.025e-02, -3.851e-02, 2.073e-02, -2.140e-02, -2.093e-01, 2.266e-01, 8.101e-02, 1.409e-01, 8.547e-02, -8.630e-02)); + r += mul(s4_6, M4(8.292e-02, -7.876e-02, -8.856e-02, 1.907e-02, -6.368e-02, -3.828e-02, -1.131e-01, 4.408e-02, 7.031e-03, -4.064e-02, 6.172e-02, -6.402e-02, -5.491e-02, -4.418e-02, -1.405e-01, -5.717e-02)); + r += mul(s4_7, M4(6.500e-02, -7.290e-03, 2.594e-02, 4.244e-02, 9.845e-02, -1.441e-01, 1.111e-01, -1.345e-02, -4.688e-02, -1.691e-01, 9.880e-02, 4.007e-02, -1.081e-01, 2.336e-01, 1.152e-01, -7.479e-02)); + r += mul(s4_8, M4(1.162e-01, -2.099e-02, 1.145e-01, 1.166e-01, -8.507e-02, 5.079e-02, 7.288e-02, -1.387e-01, 5.778e-02, 6.205e-02, -3.556e-02, 8.295e-02, -8.924e-02, -9.102e-02, -5.964e-02, 8.599e-02)); + r += mul(s5_0, M4(-1.462e-02, 8.082e-02, 4.455e-02, -4.356e-02, 7.335e-02, 4.376e-02, 8.118e-03, -4.540e-03, 1.430e-02, -9.267e-03, -4.582e-02, -8.926e-04, -5.143e-02, 1.560e-02, 2.137e-02, -2.697e-02)); + r += mul(s5_1, M4(-3.726e-02, 1.228e-01, -4.132e-02, 7.470e-02, 1.000e-02, 9.297e-02, 1.480e-01, -1.979e-04, 3.006e-02, -3.010e-02, -7.384e-02, 1.877e-01, 1.923e-02, 1.162e-01, -9.008e-02, 1.230e-01)); + r += mul(s5_2, M4(-2.161e-02, 1.287e-02, -6.257e-02, 1.472e-02, 5.709e-02, -4.043e-02, 1.515e-02, 1.690e-02, 2.494e-02, -1.136e-01, -9.791e-03, -5.449e-02, -3.664e-02, -4.210e-02, -1.805e-02, 4.579e-02)); + r += mul(s5_3, M4(-5.763e-02, 3.082e-02, -1.415e-02, -5.158e-04, -9.545e-02, 4.433e-02, 1.523e-03, -3.887e-03, 8.415e-02, -1.272e-01, 1.091e-01, 2.093e-02, -4.191e-02, -3.525e-02, -1.222e-01, -5.924e-03)); + r += mul(s5_4, M4(-3.762e-02, 1.749e-02, -2.028e-01, -4.117e-02, 1.383e-01, 9.640e-02, -3.933e-02, -1.313e-01, 2.118e-01, -9.010e-02, 2.746e-01, -3.513e-02, 2.214e-01, 5.478e-03, 9.056e-02, 2.379e-01)); + r += mul(s5_5, M4(-5.022e-02, 5.552e-02, -8.665e-03, -7.388e-02, 7.463e-03, -1.085e-01, 1.008e-01, 4.463e-02, 1.378e-01, 3.763e-02, 8.676e-02, 1.877e-01, -1.080e-02, 9.133e-02, 8.161e-02, -1.125e-01)); + r += mul(s5_6, M4(-3.119e-02, 1.863e-02, 1.994e-02, -3.058e-02, -5.287e-02, 1.133e-01, -2.383e-01, 8.179e-03, 6.562e-03, -7.171e-02, -7.081e-02, 3.105e-02, -6.101e-02, 9.925e-02, -8.762e-02, -8.117e-02)); + r += mul(s5_7, M4(3.108e-02, 5.279e-02, 2.264e-02, 6.178e-02, 1.021e-01, 5.481e-02, 1.419e-01, -7.912e-03, 1.020e-02, -2.002e-01, -1.818e-01, 9.782e-02, -5.765e-03, 9.245e-02, 8.338e-02, -8.818e-03)); + r += mul(s5_8, M4(-1.147e-02, -2.966e-02, 5.288e-03, -1.074e-03, 3.207e-02, -2.794e-02, -9.356e-03, -2.497e-02, 9.674e-02, 2.462e-02, -6.490e-02, 9.148e-02, -4.516e-02, -6.141e-02, -4.324e-02, 4.126e-02)); + r += mul(s6_0, M4(1.036e-01, -2.570e-01, -1.208e-01, 8.886e-02, 7.710e-02, -2.474e-02, 2.849e-03, 1.278e-02, -1.482e-02, -1.193e-01, -1.380e-02, 1.007e-02, 1.184e-01, -1.439e-01, -1.044e-01, 2.887e-02)); + r += mul(s6_1, M4(1.209e-01, -3.716e-01, 1.783e-01, 1.966e-01, 1.532e-02, -5.382e-02, -8.952e-02, 8.532e-02, -7.640e-02, 1.069e-01, 1.326e-01, -1.499e-01, -1.033e-02, -2.936e-02, 1.092e-01, 8.362e-02)); + r += mul(s6_2, M4(4.508e-02, -7.873e-02, -2.155e-02, 7.153e-02, 1.640e-02, 6.660e-02, 2.897e-03, 4.817e-03, -3.263e-02, 1.306e-02, -6.786e-02, 8.870e-02, 9.132e-03, 5.999e-02, -1.520e-02, 1.487e-02)); + r += mul(s6_3, M4(1.034e-01, -3.553e-01, 1.452e-01, 8.826e-02, 2.262e-02, 3.700e-02, 8.773e-03, 2.630e-02, -2.575e-02, 8.167e-02, -2.821e-02, -3.883e-02, 1.549e-01, -1.925e-02, 2.490e-01, -6.542e-02)); + r += mul(s6_4, M4(1.189e-01, -2.134e-01, 1.221e-01, 1.048e-01, -1.359e-01, -2.367e-02, -2.231e-01, -2.973e-02, 9.188e-02, -8.502e-02, -1.946e-02, 8.132e-02, 3.279e-01, -1.637e-01, 9.216e-02, 3.257e-02)); + r += mul(s6_5, M4(1.481e-01, -2.576e-01, -3.705e-03, 1.502e-01, 1.999e-02, 1.120e-01, 6.229e-02, -4.918e-02, -4.610e-03, 1.663e-01, -6.736e-02, -2.088e-01, 4.956e-02, 4.480e-02, 2.288e-01, 1.646e-01)); + r += mul(s6_6, M4(4.327e-02, -2.827e-01, 1.014e-01, 1.575e-01, -2.061e-03, 1.758e-01, -1.348e-01, -5.756e-02, -4.617e-02, -7.614e-02, 3.244e-02, -5.471e-02, 6.149e-02, 1.591e-01, -1.890e-01, 3.956e-03)); + r += mul(s6_7, M4(-1.924e-03, -2.328e-01, 7.678e-02, 1.188e-01, -3.185e-02, -1.139e-01, 1.001e-01, 3.155e-02, 1.520e-01, 1.029e-01, -4.720e-02, 8.691e-02, 5.704e-02, -7.140e-02, -2.305e-02, 9.784e-02)); + r += mul(s6_8, M4(3.108e-02, -1.951e-01, 8.722e-02, 7.469e-02, -2.963e-02, 2.462e-02, 3.282e-02, -9.392e-02, 8.362e-02, -9.343e-02, -8.554e-02, -7.126e-03, -6.007e-02, 7.685e-02, 3.067e-02, 7.109e-02)); + r += mul(s7_0, M4(-1.303e-02, 3.881e-02, -5.598e-02, 3.519e-02, -2.655e-02, 1.206e-01, 8.129e-02, -1.125e-01, 3.243e-02, 7.205e-02, 1.959e-02, 4.478e-02, -1.042e-02, -4.264e-03, 3.322e-02, 7.399e-03)); + r += mul(s7_1, M4(2.747e-02, 1.158e-02, -6.186e-02, -1.866e-02, -1.027e-02, -2.844e-02, -7.940e-02, -2.550e-04, 8.304e-03, -8.307e-02, 3.995e-04, 1.069e-02, -4.984e-03, 6.702e-02, 1.038e-01, 2.283e-02)); + r += mul(s7_2, M4(-2.234e-03, 1.159e-02, -5.776e-02, -2.739e-02, 6.097e-05, -3.496e-02, 4.195e-02, 2.330e-02, -2.140e-02, 1.020e-02, 3.447e-03, 1.336e-02, -5.864e-02, 2.480e-02, -2.061e-04, -5.578e-02)); + r += mul(s7_3, M4(-9.866e-03, -1.995e-02, -6.760e-02, 3.330e-02, -2.021e-01, 1.889e-01, -1.641e-01, 7.595e-02, -1.147e-02, -4.509e-02, 5.394e-02, -2.518e-02, 6.504e-02, -4.305e-02, 2.914e-02, -1.353e-01)); + r += mul(s7_4, M4(-2.741e-02, 1.660e-02, 3.526e-02, -1.491e-03, -2.191e-01, 2.912e-01, -4.188e-01, -1.748e-01, 1.531e-01, -1.138e-01, 2.163e-01, 2.023e-01, 8.601e-02, 1.334e-01, 5.447e-02, -6.781e-02)); + r += mul(s7_5, M4(-6.088e-03, -3.201e-02, 2.290e-02, -5.825e-02, -1.473e-02, -6.408e-02, -7.407e-02, 6.456e-02, 1.347e-02, 1.728e-02, 3.563e-02, -1.706e-01, 4.241e-02, 6.153e-02, 4.919e-02, -3.836e-03)); + r += mul(s7_6, M4(7.463e-03, 4.043e-02, 8.435e-02, 3.904e-02, -1.027e-01, -9.643e-02, -3.152e-02, 2.372e-02, 4.518e-02, 1.498e-02, 1.794e-02, -3.230e-02, -1.977e-02, 6.486e-02, -1.300e-01, 2.541e-03)); + r += mul(s7_7, M4(-2.846e-02, 3.876e-02, 6.175e-02, 4.732e-03, -3.786e-02, 9.850e-02, -7.317e-02, 1.009e-01, 8.281e-02, -3.916e-02, -1.173e-01, 4.051e-02, 2.851e-02, 7.268e-02, -1.119e-01, 5.271e-02)); + r += mul(s7_8, M4(-3.044e-02, 3.166e-02, 4.317e-03, -2.729e-03, -8.130e-02, -1.958e-01, 1.340e-01, -1.164e-02, -4.582e-03, -5.562e-02, 3.506e-02, -6.888e-02, -2.856e-02, -5.796e-02, -5.004e-02, -1.432e-02)); + r += V4(-3.570e-03, -4.291e-02, -1.299e-02, 1.425e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-4.462e-02, -1.310e-02, 6.609e-03, 5.740e-02, -8.642e-03, 4.899e-02, -1.583e-02, 2.844e-02, 6.648e-02, 3.871e-03, -1.768e-02, -4.883e-04, -9.543e-02, -1.685e-02, 1.791e-02, -3.466e-02)); + r += mul(s0_1, M4(-6.833e-02, 5.737e-02, 8.637e-02, -7.064e-02, -2.990e-02, 3.789e-03, -1.489e-02, 3.995e-02, -2.039e-02, -6.969e-02, 1.027e-01, 1.380e-01, 7.285e-03, 2.917e-02, 7.228e-02, -1.042e-01)); + r += mul(s0_2, M4(-5.274e-02, 2.310e-02, -6.004e-02, 1.292e-02, -3.580e-02, -8.655e-02, -3.162e-02, 2.008e-02, 6.223e-02, -2.044e-01, -1.197e-01, -4.355e-02, 1.107e-02, -7.214e-02, -3.807e-02, -5.094e-02)); + r += mul(s0_3, M4(1.280e-02, -1.115e-02, 2.182e-04, -2.115e-02, -3.455e-02, -1.242e-02, 2.680e-03, -6.225e-02, -7.121e-02, 1.230e-02, 2.253e-02, -6.083e-02, 4.416e-03, 6.251e-03, -1.459e-01, -1.779e-01)); + r += mul(s0_4, M4(1.331e-01, -1.431e-01, 1.044e-01, 3.952e-02, -1.209e-01, 1.043e-02, -9.951e-02, -8.294e-02, -1.616e-01, 1.760e-01, 7.440e-02, -6.678e-02, -5.565e-02, -9.580e-02, -7.826e-02, -1.852e-01)); + r += mul(s0_5, M4(2.952e-03, 1.791e-03, -1.559e-02, -7.742e-02, -6.616e-03, -7.258e-02, -5.087e-02, -7.048e-03, -7.029e-02, 4.702e-02, 1.028e-02, -8.031e-02, 1.639e-02, 1.458e-02, 3.045e-02, -5.971e-02)); + r += mul(s0_6, M4(-4.983e-02, -3.599e-03, -8.925e-04, -3.772e-02, 1.299e-03, -2.566e-02, -6.971e-03, 9.303e-02, -5.120e-02, -4.997e-02, 1.306e-02, 6.740e-02, 3.890e-02, -1.435e-01, -9.920e-02, 4.651e-02)); + r += mul(s0_7, M4(-4.194e-02, -2.549e-02, 8.896e-03, -5.754e-02, 8.325e-03, 7.072e-02, 5.385e-02, -1.802e-02, 1.931e-02, 2.261e-01, -9.477e-02, -1.303e-01, -1.919e-02, -2.643e-02, 1.063e-01, -2.408e-02)); + r += mul(s0_8, M4(-7.756e-03, 7.950e-03, 1.119e-03, -5.365e-03, -6.542e-03, -2.240e-02, 4.740e-03, -5.067e-02, -8.639e-03, -5.217e-02, -1.758e-02, -5.323e-02, 3.224e-02, -1.480e-02, -4.384e-02, -3.711e-02)); + r += mul(s1_0, M4(-7.298e-02, 1.445e-02, -9.454e-03, -1.058e-02, -8.425e-02, 5.788e-02, 4.315e-02, -4.853e-02, 3.247e-02, 1.929e-02, 1.093e-02, 5.032e-02, -3.654e-02, -9.867e-02, 6.489e-02, -3.972e-03)); + r += mul(s1_1, M4(-1.560e-02, 9.919e-03, 1.003e-02, 7.083e-03, -2.534e-01, -2.473e-01, -4.555e-02, -4.074e-02, 7.089e-02, 1.078e-02, -9.212e-02, -1.034e-01, -6.360e-02, 3.824e-02, -1.433e-02, -7.397e-02)); + r += mul(s1_2, M4(6.288e-02, -1.428e-03, 2.118e-02, 2.259e-04, -2.058e-01, 6.832e-02, -6.487e-02, -8.280e-02, -2.492e-02, -3.735e-02, -7.691e-02, 4.195e-02, -5.090e-02, -2.049e-02, -2.962e-02, 2.124e-02)); + r += mul(s1_3, M4(4.037e-02, -1.183e-01, -3.306e-02, -2.481e-02, -8.280e-02, -1.402e-01, -1.123e-01, -1.309e-01, 1.840e-02, 5.922e-02, 6.474e-02, 9.333e-02, 1.322e-02, 1.328e-01, -7.711e-04, 1.101e-01)); + r += mul(s1_4, M4(2.431e-01, -1.526e-02, 3.973e-01, 6.055e-02, -2.158e-01, 1.573e-01, 1.153e-01, -2.713e-01, 1.282e-01, 1.626e-01, 2.550e-01, 9.282e-02, 1.051e-01, 1.085e-01, 7.336e-02, 2.281e-02)); + r += mul(s1_5, M4(-5.789e-02, -1.408e-01, 2.681e-01, 8.174e-02, -1.873e-01, -1.429e-01, 1.980e-02, -2.227e-02, 1.378e-01, -7.831e-02, -1.198e-01, -5.265e-02, 6.618e-02, 1.066e-01, 2.457e-02, 1.241e-02)); + r += mul(s1_6, M4(-1.809e-02, 3.453e-02, -2.993e-02, -4.992e-02, 3.207e-02, -4.045e-02, -1.958e-03, -3.240e-02, -3.410e-02, 3.105e-02, -1.371e-01, 1.361e-01, 3.359e-02, -2.561e-02, -5.956e-03, -1.423e-02)); + r += mul(s1_7, M4(-1.847e-02, 1.268e-01, 2.167e-01, -4.562e-04, 8.563e-02, -1.685e-01, -6.241e-02, -2.425e-01, 1.773e-01, 1.288e-02, 6.724e-02, 1.295e-01, -3.540e-02, -1.026e-01, 7.806e-02, 1.190e-01)); + r += mul(s1_8, M4(5.638e-02, 9.204e-02, 4.834e-02, -3.144e-02, 1.102e-01, -1.814e-02, 1.203e-01, 1.756e-02, 2.416e-03, -2.827e-02, 1.459e-01, 1.547e-01, 3.526e-02, 1.291e-02, 8.043e-02, 3.777e-02)); + r += mul(s2_0, M4(1.361e-01, 3.691e-02, 4.395e-02, 1.606e-01, 7.802e-03, -1.292e-01, -1.353e-01, 3.487e-02, 1.666e-02, 6.090e-03, 1.957e-02, 1.009e-02, -2.801e-02, 7.073e-03, -6.974e-03, -1.360e-01)); + r += mul(s2_1, M4(4.295e-02, 1.758e-02, -1.065e-01, 1.572e-01, 3.012e-02, 6.403e-02, -6.217e-02, -2.477e-02, 5.268e-02, -4.278e-02, -3.036e-02, 3.319e-02, -3.211e-02, 6.374e-02, -8.323e-02, -5.754e-02)); + r += mul(s2_2, M4(-3.358e-03, -2.677e-02, -5.062e-03, -8.726e-02, -1.376e-01, -3.560e-02, -6.401e-02, 6.773e-02, 2.704e-02, 3.601e-02, 1.257e-02, 3.423e-02, 1.469e-01, -1.546e-01, -1.254e-01, -1.696e-02)); + r += mul(s2_3, M4(5.545e-02, 4.398e-02, -2.672e-02, 1.246e-01, 3.295e-02, 8.615e-02, -4.438e-03, 1.501e-02, -4.680e-02, 2.229e-02, 1.763e-02, 5.402e-02, 5.715e-02, -5.796e-03, -3.928e-02, 7.037e-02)); + r += mul(s2_4, M4(2.797e-02, -1.276e-01, 1.149e-01, 4.661e-02, 1.573e-01, 1.898e-01, 2.875e-01, 2.349e-01, 1.526e-01, -1.870e-01, 4.934e-02, 2.432e-01, 6.115e-02, -2.159e-03, 5.827e-02, -1.524e-01)); + r += mul(s2_5, M4(6.111e-02, -2.927e-02, -1.449e-01, 7.417e-02, 4.468e-02, 2.148e-02, 4.834e-02, -7.180e-02, 6.146e-02, 5.945e-02, 2.432e-02, 2.849e-02, 1.800e-01, 8.602e-02, 1.058e-01, -1.170e-01)); + r += mul(s2_6, M4(5.160e-02, 7.842e-03, -3.424e-02, 8.373e-02, -4.979e-02, -1.220e-01, -1.123e-01, 4.353e-03, 4.925e-02, 6.592e-02, -6.178e-04, -1.969e-02, -1.097e-03, 3.600e-03, 3.540e-02, 4.032e-03)); + r += mul(s2_7, M4(4.263e-02, 5.162e-02, -4.264e-02, 1.454e-02, -4.603e-02, -2.586e-02, -4.238e-02, 7.478e-02, -3.296e-02, -2.916e-02, -3.310e-02, 8.334e-02, -1.462e-02, 2.845e-02, -1.949e-02, -8.869e-02)); + r += mul(s2_8, M4(7.671e-03, -1.960e-02, 8.305e-03, 3.879e-03, 1.967e-02, 2.672e-02, -1.089e-02, 1.420e-02, 7.171e-03, -2.250e-02, 5.168e-02, 1.100e-01, -6.511e-02, -1.224e-02, -1.919e-01, -5.954e-02)); + r += mul(s3_0, M4(-5.489e-02, -1.202e-01, -6.728e-02, -8.068e-02, 7.251e-02, 5.867e-02, -6.234e-02, 1.155e-01, 5.125e-02, -5.765e-02, -5.751e-02, -1.635e-02, 1.864e-02, 6.609e-02, -1.105e-03, -4.950e-02)); + r += mul(s3_1, M4(-1.404e-01, -4.070e-02, -2.387e-01, 6.615e-03, -4.551e-02, 1.071e-01, 5.698e-02, 9.160e-02, 8.717e-02, -7.866e-02, 3.544e-02, 1.517e-01, -2.900e-02, -1.239e-02, -5.162e-02, 1.900e-02)); + r += mul(s3_2, M4(-6.847e-02, -1.020e-01, -7.745e-02, 3.094e-02, -4.354e-02, -6.276e-02, -3.621e-02, -6.242e-02, 5.462e-02, -1.308e-01, -1.210e-01, -2.065e-02, -4.381e-02, -5.895e-02, 8.102e-02, -1.266e-01)); + r += mul(s3_3, M4(-1.059e-04, -1.497e-01, -1.039e-02, -4.629e-02, 2.626e-02, 1.801e-02, -9.512e-04, 6.084e-02, 4.065e-02, 9.308e-02, -2.085e-02, -2.430e-03, 1.454e-02, -5.680e-03, -1.196e-01, -5.078e-02)); + r += mul(s3_4, M4(-8.717e-02, -1.156e-01, -4.551e-02, -1.233e-01, -3.711e-02, -9.569e-02, 7.082e-02, -9.574e-02, 3.863e-01, 1.752e-02, 7.915e-02, 4.365e-01, -6.295e-02, -1.045e-02, 4.266e-02, -2.812e-01)); + r += mul(s3_5, M4(-1.277e-03, -8.856e-02, -1.335e-02, 8.308e-02, -1.264e-01, 8.471e-02, -9.759e-02, -7.361e-02, 1.505e-01, 2.235e-01, 1.617e-01, 1.516e-02, -2.184e-02, 3.152e-02, -2.481e-01, -1.529e-01)); + r += mul(s3_6, M4(-6.069e-02, 7.997e-02, -2.999e-02, 6.667e-02, -3.571e-02, -8.018e-02, -4.923e-02, -2.775e-02, 2.642e-02, -7.642e-02, -3.110e-02, -8.301e-02, 3.597e-02, -6.190e-02, 5.578e-02, 1.650e-02)); + r += mul(s3_7, M4(1.950e-03, -5.827e-02, -1.350e-01, -3.964e-02, 2.067e-02, 8.880e-02, -5.618e-02, -9.141e-02, 6.710e-02, 1.227e-02, -1.851e-01, -1.946e-01, 4.703e-02, -4.175e-03, 1.142e-01, -2.030e-01)); + r += mul(s3_8, M4(-4.808e-02, -8.059e-02, -1.219e-01, -1.497e-01, 5.362e-02, -6.077e-02, -7.402e-02, -9.153e-02, 1.003e-01, -4.803e-03, -7.885e-02, -2.247e-02, 1.434e-02, -4.348e-02, 6.374e-02, -1.095e-01)); + r += mul(s4_0, M4(8.929e-02, 1.477e-01, 1.770e-01, 1.408e-01, 6.920e-03, 5.607e-02, -1.516e-02, -5.035e-02, 2.399e-02, -4.023e-02, -4.029e-02, -6.642e-02, -2.567e-02, -4.240e-02, -4.215e-02, -3.904e-01)); + r += mul(s4_1, M4(2.250e-02, -3.173e-03, 1.788e-02, 1.658e-01, 5.531e-02, -4.699e-02, -2.140e-02, -1.022e-01, -1.031e-02, -2.320e-02, 6.078e-02, -1.768e-02, 4.480e-01, -3.266e-01, -6.451e-02, -1.013e-01)); + r += mul(s4_2, M4(5.465e-02, 8.356e-02, 2.870e-04, -4.076e-02, 6.278e-02, 7.096e-03, -2.026e-02, -2.613e-02, -4.095e-03, -8.923e-02, 1.429e-02, 1.257e-02, -2.308e-02, -4.313e-02, -8.777e-02, 2.147e-02)); + r += mul(s4_3, M4(1.128e-02, -8.824e-02, 3.743e-02, 1.121e-01, 3.255e-02, 6.373e-03, 3.494e-02, 5.817e-02, -5.255e-02, 6.612e-02, -2.657e-02, 9.935e-02, -5.839e-02, 1.502e-03, 1.674e-02, -2.483e-01)); + r += mul(s4_4, M4(-3.301e-03, 1.381e-03, 2.348e-01, -7.932e-02, 7.121e-02, 1.894e-02, 5.626e-02, 1.242e-01, 3.424e-02, 2.703e-01, 9.893e-02, 9.129e-02, 3.413e-02, 3.306e-01, -6.402e-02, 1.710e-01)); + r += mul(s4_5, M4(1.190e-03, -1.362e-01, 6.616e-02, 7.430e-02, -1.908e-02, -7.131e-02, -6.680e-02, -1.412e-02, -1.783e-03, 1.154e-01, -5.414e-02, -1.866e-01, -6.085e-02, -8.559e-02, 1.169e-01, -4.633e-02)); + r += mul(s4_6, M4(-4.513e-03, 3.699e-02, 9.614e-02, 6.584e-02, -4.287e-02, -9.917e-02, -2.837e-02, -1.147e-01, 5.015e-02, 8.579e-03, 4.115e-02, 9.666e-02, 4.970e-02, 3.432e-02, 1.172e-01, -1.503e-01)); + r += mul(s4_7, M4(4.660e-02, 5.717e-02, 1.118e-02, -1.337e-02, 1.256e-02, -4.309e-02, 8.529e-02, -4.747e-02, -5.440e-02, -1.774e-02, 1.103e-01, 4.132e-02, -3.581e-02, 9.867e-02, 2.425e-02, -1.120e-02)); + r += mul(s4_8, M4(5.126e-02, 3.103e-02, 1.489e-01, 8.006e-02, -3.423e-02, -6.998e-02, 6.039e-02, -2.939e-02, -1.505e-02, 7.190e-03, -4.863e-02, -2.403e-02, -6.097e-02, -8.975e-02, -3.179e-02, -8.617e-02)); + r += mul(s5_0, M4(-8.262e-02, 2.481e-02, 4.836e-03, -4.425e-02, -2.515e-02, 5.051e-02, -6.125e-03, -5.547e-02, 6.078e-02, 5.525e-02, 9.176e-02, 1.572e-02, -1.979e-02, 5.908e-02, 9.396e-02, 5.186e-02)); + r += mul(s5_1, M4(-4.726e-02, 3.977e-02, -1.014e-01, -1.503e-02, 3.785e-02, -1.439e-02, 1.512e-02, -3.868e-02, 5.295e-02, -8.620e-02, 8.191e-02, 6.278e-02, 1.169e-01, -1.905e-01, 7.085e-02, 1.050e-01)); + r += mul(s5_2, M4(-2.736e-02, 6.220e-02, -5.548e-02, 6.147e-02, 1.232e-01, 1.163e-02, 1.780e-02, -7.604e-02, 1.381e-01, 6.557e-03, 8.610e-03, 6.522e-03, 2.147e-02, 1.777e-02, 1.541e-02, 1.560e-02)); + r += mul(s5_3, M4(5.259e-02, -3.546e-02, 6.230e-02, 6.797e-02, 7.732e-02, 8.863e-03, -1.746e-02, -3.139e-02, -5.820e-02, 2.022e-02, -1.225e-01, -3.615e-02, -4.625e-02, -2.697e-02, -2.579e-02, 1.510e-01)); + r += mul(s5_4, M4(-2.816e-02, -9.628e-02, 7.526e-02, -5.355e-02, -1.576e-01, -2.085e-01, -3.468e-01, -2.840e-02, -1.601e-02, 1.563e-01, 4.990e-02, -7.243e-02, -1.961e-01, 1.467e-01, 7.937e-02, 4.030e-02)); + r += mul(s5_5, M4(-3.320e-03, -1.204e-01, -5.233e-02, -5.508e-02, 1.663e-01, 1.481e-01, 2.805e-02, -1.490e-01, 1.753e-01, 2.295e-01, 2.995e-01, -5.415e-02, -9.904e-02, -8.550e-02, -1.067e-01, 2.813e-02)); + r += mul(s5_6, M4(-1.444e-02, 1.265e-02, 3.572e-02, 9.024e-03, -5.852e-02, -7.202e-02, -1.086e-01, -1.721e-01, 3.204e-02, 8.423e-03, 4.847e-02, 3.032e-02, 7.523e-03, 2.191e-02, 8.512e-03, -5.825e-02)); + r += mul(s5_7, M4(4.656e-02, 4.840e-02, -3.721e-02, -5.674e-02, 3.699e-02, -7.553e-02, -2.514e-02, 3.656e-02, -3.952e-02, 4.498e-03, 1.271e-03, 2.396e-01, -2.891e-02, 3.648e-02, 3.780e-03, 8.656e-03)); + r += mul(s5_8, M4(5.193e-02, 5.043e-02, 3.167e-02, 1.211e-02, -4.968e-02, -7.751e-02, -5.472e-02, -1.230e-01, -8.041e-02, 7.367e-03, 3.397e-02, -6.051e-02, 1.718e-02, -1.775e-02, -3.377e-02, 2.839e-03)); + r += mul(s6_0, M4(2.356e-01, 2.417e-01, -7.312e-03, 2.296e-01, 1.123e-02, 2.061e-02, 1.249e-01, 1.465e-01, -8.119e-02, -8.273e-02, 2.396e-02, -5.723e-02, -1.368e-03, -8.216e-02, -7.126e-02, 1.767e-01)); + r += mul(s6_1, M4(3.064e-01, 1.425e-01, 1.339e-01, 1.613e-01, 5.711e-02, 1.105e-01, 3.318e-02, -1.383e-02, -1.848e-01, 7.377e-02, -1.934e-01, -1.845e-01, 2.550e-02, 9.450e-02, 4.248e-02, 1.391e-02)); + r += mul(s6_2, M4(2.085e-01, 1.108e-01, 1.701e-01, 2.713e-02, 4.024e-02, 1.521e-02, 7.399e-02, 4.808e-02, -6.747e-02, 5.166e-02, 5.017e-02, 6.361e-02, -2.552e-02, -6.003e-02, 4.130e-04, 5.567e-02)); + r += mul(s6_3, M4(2.763e-01, 2.114e-01, 4.282e-02, 1.008e-01, 2.514e-02, 6.967e-03, 7.642e-02, 7.630e-02, 2.342e-02, -5.306e-02, 1.487e-02, -1.732e-01, 3.900e-02, -1.407e-01, -1.259e-01, 1.265e-01)); + r += mul(s6_4, M4(1.623e-01, -1.235e-02, 2.044e-01, 5.010e-02, 2.684e-01, -5.874e-02, 4.729e-02, -2.150e-03, -8.674e-02, -2.515e-03, 1.375e-02, 1.261e-01, 1.919e-01, -1.287e-01, 2.525e-01, 4.040e-01)); + r += mul(s6_5, M4(1.842e-01, 1.762e-01, 1.343e-01, 1.453e-01, -4.476e-02, -1.114e-02, -2.291e-02, 2.730e-02, -1.311e-01, -1.116e-01, -1.276e-01, 1.305e-01, 6.493e-02, 8.502e-02, 9.360e-02, -1.174e-02)); + r += mul(s6_6, M4(1.881e-01, 9.549e-02, -2.598e-02, 1.182e-01, 2.119e-03, 4.513e-02, 3.322e-02, -5.448e-02, -6.380e-03, -1.790e-02, -1.076e-02, -7.405e-02, -4.231e-02, 1.375e-02, -2.372e-02, 9.198e-02)); + r += mul(s6_7, M4(8.190e-02, -4.095e-02, 3.436e-02, 2.693e-01, 9.724e-03, -8.337e-02, 4.002e-04, -1.853e-01, -4.698e-02, -4.704e-03, -1.762e-01, 1.410e-02, 5.535e-02, -4.814e-02, -2.937e-02, 1.699e-01)); + r += mul(s6_8, M4(7.594e-02, 2.500e-02, 2.054e-01, 1.359e-01, -6.950e-03, -3.872e-02, -8.553e-02, 3.117e-02, 8.458e-02, 1.037e-01, 9.023e-03, 6.317e-02, -5.330e-02, -1.181e-02, -1.154e-02, 1.801e-02)); + r += mul(s7_0, M4(3.500e-02, -1.354e-02, -6.275e-02, 2.530e-02, -1.126e-01, -2.442e-02, 6.522e-02, -4.946e-02, -3.849e-02, -1.052e-03, 7.946e-02, 8.609e-02, 8.536e-02, -8.888e-02, -1.648e-02, -9.296e-03)); + r += mul(s7_1, M4(1.536e-02, -1.095e-01, -9.734e-03, 1.057e-01, -3.581e-02, -1.352e-01, -6.353e-02, 6.288e-02, 7.990e-02, -1.541e-02, -2.616e-02, -2.092e-02, -1.386e-01, 3.706e-02, 9.455e-02, -8.811e-02)); + r += mul(s7_2, M4(-1.917e-02, 3.680e-03, -9.103e-02, 6.932e-03, 1.862e-02, 5.211e-02, 4.197e-02, -2.273e-02, -4.925e-03, 5.122e-02, 6.613e-02, 4.453e-02, -1.724e-02, -3.789e-02, -5.483e-03, -6.117e-02)); + r += mul(s7_3, M4(6.494e-02, 5.085e-02, -3.084e-02, -4.360e-02, -3.047e-04, 1.334e-01, 1.050e-01, -5.439e-02, 3.051e-02, 5.960e-02, 8.870e-02, -2.083e-02, 6.208e-02, -2.183e-02, -5.802e-02, 6.238e-02)); + r += mul(s7_4, M4(7.910e-02, 3.215e-02, -5.218e-02, -2.821e-02, 1.271e-01, -2.276e-01, -1.860e-01, -1.064e-01, 2.585e-02, 3.594e-02, 2.412e-01, 2.176e-01, 1.651e-01, -1.164e-01, 4.035e-01, -8.403e-02)); + r += mul(s7_5, M4(3.154e-02, -6.521e-02, -3.050e-02, 6.594e-02, 1.437e-01, 1.430e-02, -1.211e-01, -3.589e-02, 6.985e-02, 6.226e-02, 3.090e-02, 1.198e-01, -6.754e-03, -1.157e-02, -1.102e-01, -1.356e-01)); + r += mul(s7_6, M4(2.194e-02, -1.653e-02, -4.073e-02, -1.260e-02, 3.354e-02, 3.995e-02, -3.034e-02, -9.874e-02, -2.932e-02, 2.086e-02, -1.687e-02, 5.243e-02, 2.606e-02, 2.746e-02, -2.158e-02, -3.860e-02)); + r += mul(s7_7, M4(2.909e-02, -1.935e-02, -9.481e-02, -4.910e-02, -9.199e-02, 1.076e-02, 9.528e-03, 1.829e-01, 1.451e-03, 2.201e-03, -2.104e-02, 1.605e-01, -8.880e-02, -1.363e-01, -9.424e-02, -2.646e-02)); + r += mul(s7_8, M4(-1.473e-02, -5.392e-02, -7.442e-02, -2.511e-02, -1.129e-02, -6.392e-02, 1.128e-01, 6.659e-02, -1.281e-02, -1.358e-02, -2.367e-02, 7.697e-02, 2.667e-03, -4.043e-02, -8.640e-02, -6.715e-02)); + r += V4(-2.098e-02, 7.151e-03, -1.746e-02, -5.302e-02); + return r; +} + +void Pass7(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 8 +//!DESC conv7 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.429e-02, -2.332e-02, 4.314e-02, 1.064e-02, 2.172e-03, -9.378e-03, -6.456e-02, 5.519e-02, -1.450e-01, 3.668e-02, 5.025e-02, 1.031e-01, 1.049e-02, 1.149e-02, -3.782e-02, -6.485e-02)); + r += mul(s0_1, M4(-1.216e-02, 1.289e-01, 4.855e-03, -5.725e-03, 5.489e-02, -1.701e-02, 9.362e-04, 2.121e-02, -4.344e-02, -1.216e-01, 7.830e-02, 1.137e-01, 2.323e-02, -2.009e-02, -4.112e-02, -6.864e-02)); + r += mul(s0_2, M4(-4.899e-02, -3.241e-02, -3.536e-03, 5.684e-02, -2.514e-02, -3.484e-02, -3.530e-02, 9.876e-03, 5.883e-02, 2.769e-02, 4.048e-02, -3.935e-02, -2.423e-02, -9.510e-03, 3.234e-02, 1.199e-02)); + r += mul(s0_3, M4(-1.349e-01, -1.353e-01, 6.827e-03, -5.336e-02, -6.016e-02, -1.109e-02, 9.085e-02, -5.200e-02, -1.802e-01, -2.713e-02, 4.700e-02, 4.469e-02, -7.109e-03, -5.905e-02, -7.286e-02, -1.176e-01)); + r += mul(s0_4, M4(-1.088e-01, 2.120e-01, -9.531e-02, -7.526e-02, -3.024e-02, -8.052e-02, 7.018e-02, -1.512e-01, -8.795e-02, -9.058e-02, 1.183e-02, 8.818e-02, -2.706e-01, 5.783e-02, -6.422e-02, -7.333e-02)); + r += mul(s0_5, M4(-1.192e-02, -8.800e-02, 2.284e-02, 4.938e-02, 2.447e-02, 1.127e-01, 2.742e-02, -1.041e-01, -6.344e-03, 5.594e-03, -1.393e-01, -1.855e-02, -1.230e-01, 2.002e-02, -9.026e-02, -7.525e-04)); + r += mul(s0_6, M4(-2.280e-02, -7.241e-02, 2.928e-02, 1.022e-02, -5.593e-02, 2.254e-02, 6.370e-02, 1.904e-02, -8.088e-02, -4.722e-02, 5.025e-02, 6.547e-02, 1.310e-01, -4.054e-02, 1.521e-02, 7.518e-04)); + r += mul(s0_7, M4(1.161e-01, -6.198e-03, 8.148e-02, 1.159e-02, 1.194e-01, -4.006e-04, 7.621e-02, 6.951e-03, 2.882e-03, -2.765e-02, 4.471e-02, 8.374e-02, -1.463e-01, 2.266e-01, -2.111e-01, 8.026e-03)); + r += mul(s0_8, M4(-5.146e-02, -5.043e-02, -4.174e-04, 8.790e-03, 7.446e-02, -3.545e-02, 1.583e-02, 9.433e-03, -5.614e-02, -2.652e-02, 1.538e-02, 6.623e-02, -2.130e-02, -1.283e-01, -1.310e-02, -3.202e-02)); + r += mul(s1_0, M4(-6.381e-02, 3.036e-02, 5.632e-02, 6.065e-03, -7.098e-02, -4.316e-02, 3.370e-02, -9.274e-02, 1.666e-03, -3.818e-03, 3.570e-02, 2.863e-02, -3.719e-03, 1.548e-02, 8.114e-04, -2.087e-02)); + r += mul(s1_1, M4(-1.125e-01, 2.095e-01, -5.373e-03, 5.728e-02, -1.684e-02, 5.683e-02, 6.455e-02, 8.118e-02, -2.147e-02, -6.171e-02, 5.331e-02, -2.312e-02, 5.723e-02, -7.003e-02, -5.061e-02, -6.937e-02)); + r += mul(s1_2, M4(-3.802e-02, -1.043e-02, 4.402e-02, 5.884e-03, -6.816e-02, 2.554e-02, -6.462e-02, 3.692e-02, 2.855e-02, -7.795e-02, -1.158e-01, 9.489e-03, -3.566e-02, -3.169e-02, 1.226e-02, -1.260e-02)); + r += mul(s1_3, M4(-7.747e-02, -1.154e-03, -1.686e-01, -6.604e-02, -6.979e-02, 1.844e-02, -1.876e-01, 1.099e-01, -1.142e-01, -2.556e-02, 4.666e-02, 5.026e-02, -1.906e-02, -4.265e-02, 4.301e-02, -8.881e-02)); + r += mul(s1_4, M4(1.986e-03, -3.458e-01, 3.094e-01, -1.274e-01, 2.375e-01, -7.486e-02, -1.912e-01, 4.443e-02, 6.306e-02, 8.115e-02, -5.890e-02, 2.617e-03, -3.320e-02, -8.362e-02, -1.985e-03, -1.438e-01)); + r += mul(s1_5, M4(2.581e-02, 1.221e-02, 2.112e-02, 5.072e-02, -6.900e-02, 4.629e-02, -5.685e-02, 7.048e-02, -4.532e-02, -3.919e-02, 9.927e-02, -3.342e-02, -3.581e-02, 5.119e-02, -1.537e-01, 1.891e-02)); + r += mul(s1_6, M4(5.745e-03, 4.427e-02, 5.853e-02, 3.445e-02, 1.085e-01, -1.109e-01, 5.323e-02, 1.128e-01, -5.432e-02, 7.260e-02, -1.084e-01, 8.378e-02, 8.080e-02, -6.499e-02, 1.355e-01, 4.337e-02)); + r += mul(s1_7, M4(8.341e-02, 3.424e-02, 3.290e-02, -1.363e-02, 1.580e-01, 3.006e-01, 2.149e-03, 2.191e-01, 8.049e-02, 1.118e-02, 8.779e-03, 6.471e-02, -1.069e-01, -2.980e-02, -4.201e-02, 1.076e-01)); + r += mul(s1_8, M4(-6.640e-02, -2.167e-03, -7.802e-02, -6.291e-04, -5.879e-02, -1.432e-01, -1.221e-02, 4.027e-02, 1.564e-02, 1.884e-02, -3.517e-02, 2.630e-02, 7.528e-03, -1.925e-02, 4.156e-02, -1.512e-02)); + r += mul(s2_0, M4(-5.786e-02, -3.811e-02, 7.465e-02, -1.153e-01, 9.855e-03, 1.133e-03, -9.657e-02, -5.820e-02, -4.431e-02, -3.342e-02, 3.839e-03, 1.346e-02, 3.735e-03, -6.809e-03, -1.971e-02, -7.439e-02)); + r += mul(s2_1, M4(-2.957e-02, 2.258e-02, 9.420e-02, -4.162e-02, -2.241e-02, 3.396e-02, -9.463e-03, 6.093e-02, 2.241e-02, 5.466e-03, -1.272e-02, 9.590e-02, -2.979e-02, 1.120e-01, 2.448e-02, -3.680e-02)); + r += mul(s2_2, M4(6.455e-04, 3.673e-02, 7.003e-02, -6.360e-03, 3.664e-02, -6.810e-03, 2.913e-02, 8.731e-03, 9.219e-03, 7.756e-02, -1.472e-01, 1.450e-02, -3.781e-03, 3.535e-03, -6.173e-02, 3.913e-02)); + r += mul(s2_3, M4(4.824e-03, 3.512e-02, -1.684e-01, -4.602e-02, 4.232e-02, -2.799e-02, 9.374e-02, 2.816e-02, 7.564e-02, 1.042e-01, -8.223e-02, 2.283e-02, -7.351e-02, 3.973e-02, -1.391e-01, 2.565e-03)); + r += mul(s2_4, M4(-2.335e-01, 2.684e-02, -3.716e-01, -1.481e-02, 5.490e-02, 1.224e-01, -2.276e-02, 6.176e-02, 3.403e-02, 1.614e-01, 1.994e-02, 2.240e-01, -4.081e-02, 1.772e-02, 5.466e-02, 9.059e-02)); + r += mul(s2_5, M4(5.616e-02, -2.055e-02, -1.607e-01, 2.489e-02, -7.941e-02, 1.590e-01, -2.552e-02, 2.637e-02, 3.266e-03, -1.990e-02, 1.649e-01, 1.237e-01, 1.191e-01, -6.178e-02, 2.103e-01, -2.459e-02)); + r += mul(s2_6, M4(-4.669e-02, -3.467e-02, 2.145e-01, 6.600e-03, 9.879e-03, 2.595e-02, 1.483e-02, -3.995e-02, 1.454e-03, -4.725e-03, 4.796e-02, 6.020e-02, 5.114e-02, 7.967e-03, -8.423e-02, -1.184e-02)); + r += mul(s2_7, M4(8.979e-02, 2.301e-01, 2.273e-01, -2.155e-02, 2.495e-03, -3.534e-02, 5.817e-02, -4.511e-03, -1.832e-02, 2.070e-01, 7.383e-02, 6.175e-02, 4.585e-02, -6.402e-03, 1.827e-02, 1.981e-02)); + r += mul(s2_8, M4(1.809e-02, -7.041e-02, 3.777e-03, -9.913e-04, -8.198e-04, 6.589e-02, -9.252e-02, 4.381e-02, -2.878e-02, -3.744e-02, 1.090e-02, -6.804e-02, 1.619e-02, 1.085e-01, -1.391e-02, -4.922e-02)); + r += mul(s3_0, M4(-3.984e-02, 5.420e-03, -4.550e-02, -1.233e-01, 4.322e-02, -5.213e-02, 1.335e-02, -5.276e-03, -1.693e-02, -7.303e-03, 6.252e-03, -4.690e-02, 3.173e-02, 3.850e-02, -8.569e-02, 5.811e-02)); + r += mul(s3_1, M4(2.930e-02, 2.401e-02, -8.850e-02, -1.446e-01, -5.032e-02, 1.690e-01, 7.153e-02, 6.679e-02, -9.158e-02, 7.794e-02, -3.869e-02, 3.164e-02, -7.438e-02, 2.832e-02, 9.705e-02, -6.236e-02)); + r += mul(s3_2, M4(3.107e-02, 4.798e-02, 2.440e-02, -5.775e-02, 1.245e-02, -1.367e-01, 1.987e-01, 5.487e-02, -7.530e-02, -2.864e-02, -6.313e-02, 2.412e-02, -4.335e-02, -1.480e-02, 3.736e-02, 7.179e-02)); + r += mul(s3_3, M4(1.861e-02, 3.280e-02, -1.080e-01, -1.617e-02, 6.373e-02, -7.300e-02, 1.517e-03, -1.489e-02, 2.597e-02, -7.612e-02, 5.984e-02, -4.236e-02, -6.426e-03, -7.361e-02, 5.293e-02, -2.767e-02)); + r += mul(s3_4, M4(-1.651e-01, -9.867e-02, -5.652e-02, -5.221e-02, 8.410e-02, 1.038e-01, -7.153e-02, -2.243e-02, -2.833e-02, -4.937e-02, 2.125e-01, -5.315e-02, 5.610e-02, -1.936e-01, 4.265e-01, 2.646e-01)); + r += mul(s3_5, M4(-5.804e-02, 2.708e-02, 3.219e-02, 6.301e-02, -9.947e-02, 6.365e-02, -1.993e-01, -2.109e-02, 7.368e-03, 8.566e-02, -1.474e-02, 4.023e-02, 6.955e-02, -3.642e-02, 7.850e-02, 1.153e-02)); + r += mul(s3_6, M4(2.621e-02, 1.533e-02, 8.712e-02, -6.340e-02, 1.455e-03, -5.999e-03, 2.176e-01, 4.310e-02, -2.057e-02, -5.172e-02, 7.316e-02, -7.311e-02, -3.093e-02, 6.763e-02, -1.499e-01, 1.656e-02)); + r += mul(s3_7, M4(6.913e-02, 4.051e-02, 6.953e-02, -6.580e-02, 7.922e-03, 4.726e-02, 1.577e-01, 2.761e-02, 4.748e-02, -6.473e-02, 7.108e-02, -1.199e-02, -1.576e-02, 4.525e-02, 9.625e-03, 6.197e-02)); + r += mul(s3_8, M4(2.181e-02, 5.856e-02, -2.680e-02, -4.358e-03, 2.145e-02, -5.667e-02, 1.037e-01, 7.757e-03, 2.428e-02, -3.369e-02, 2.964e-02, -4.941e-02, 3.692e-02, 7.243e-02, 3.112e-02, -2.451e-02)); + r += mul(s4_0, M4(5.122e-03, -1.978e-03, 7.659e-02, 8.961e-02, 3.828e-02, -4.181e-02, -7.118e-02, 1.168e-02, 7.047e-02, -3.212e-03, 1.733e-03, 5.195e-02, 4.263e-02, 3.222e-02, -7.399e-02, 1.099e-02)); + r += mul(s4_1, M4(-1.358e-03, -7.553e-02, 1.239e-01, 6.855e-02, -8.082e-02, 2.287e-02, 1.084e-01, -1.193e-01, -7.322e-02, 1.251e-01, 1.231e-02, -1.121e-02, -9.672e-02, 2.728e-02, -5.787e-02, 3.771e-02)); + r += mul(s4_2, M4(1.228e-01, -2.115e-02, -5.478e-02, -9.391e-02, -7.383e-02, -1.139e-01, -2.159e-02, 1.082e-01, -5.576e-02, -8.820e-02, 8.714e-02, -7.996e-03, -1.793e-02, 4.947e-03, -4.892e-03, 9.143e-02)); + r += mul(s4_3, M4(4.650e-02, -1.056e-01, -1.316e-01, -9.701e-03, 8.937e-02, -7.974e-02, -1.177e-02, 7.090e-02, -2.849e-02, -2.004e-02, 4.543e-02, -4.452e-02, 7.714e-02, -7.697e-02, 1.959e-02, 7.751e-02)); + r += mul(s4_4, M4(-1.665e-01, 1.863e-01, -8.215e-02, -5.029e-02, -1.530e-02, 2.796e-01, 1.253e-01, -1.444e-01, -1.062e-01, 2.850e-02, -6.918e-03, -7.752e-02, -8.895e-02, -4.825e-03, 7.103e-02, 7.231e-02)); + r += mul(s4_5, M4(1.011e-01, 8.921e-02, -8.754e-02, -8.981e-02, -3.880e-02, -2.067e-01, 8.463e-02, -8.275e-02, 7.571e-02, -1.195e-02, 1.401e-02, -8.276e-02, 2.977e-02, 7.257e-02, 1.761e-01, 9.600e-02)); + r += mul(s4_6, M4(3.094e-02, -6.186e-03, 7.496e-02, 2.487e-02, -3.221e-02, -6.985e-03, 1.212e-01, -3.637e-02, -1.775e-02, 1.375e-02, -3.809e-02, -2.806e-02, -1.726e-03, 4.752e-02, -9.841e-03, -2.093e-02)); + r += mul(s4_7, M4(-6.665e-02, 8.968e-02, -1.947e-03, 7.916e-03, 1.457e-01, 9.335e-02, -1.535e-01, 5.746e-02, -3.287e-02, -1.573e-02, 4.369e-02, -4.585e-03, 1.116e-01, 8.898e-03, -6.204e-02, 6.182e-02)); + r += mul(s4_8, M4(5.083e-02, -3.205e-02, 4.880e-02, -4.987e-04, -2.765e-02, -7.642e-02, 7.199e-02, -1.001e-02, -4.057e-02, 2.807e-02, -1.059e-02, 3.827e-03, -6.015e-02, -4.428e-02, -1.895e-02, -2.997e-02)); + r += mul(s5_0, M4(1.595e-02, -4.492e-02, -6.387e-02, -7.062e-02, 8.614e-03, -1.412e-02, 5.415e-02, -3.994e-02, 1.212e-02, 2.353e-02, -5.643e-02, -9.880e-02, -9.901e-02, 3.936e-02, 3.183e-02, 1.666e-02)); + r += mul(s5_1, M4(2.284e-02, 9.285e-02, -7.087e-02, 1.962e-02, 1.836e-02, -9.249e-02, 7.274e-02, -3.925e-02, -1.148e-01, -3.381e-02, -1.147e-01, 1.551e-02, -1.443e-01, -1.351e-02, 2.367e-02, 7.472e-02)); + r += mul(s5_2, M4(-6.567e-02, -6.534e-04, -2.999e-02, 3.788e-02, 5.928e-02, 2.940e-02, -3.614e-02, -8.431e-02, 6.493e-03, 9.792e-02, 8.166e-02, 8.121e-03, 3.733e-03, -6.000e-02, 1.023e-02, -2.708e-02)); + r += mul(s5_3, M4(4.225e-02, -3.035e-02, -3.136e-02, -7.348e-02, -1.554e-02, 3.912e-02, -2.285e-01, -4.692e-02, 1.047e-01, -8.994e-02, 1.769e-01, 1.248e-01, -6.176e-02, 1.573e-02, -8.333e-02, 3.275e-02)); + r += mul(s5_4, M4(7.160e-02, 2.975e-01, 1.284e-01, -4.519e-02, 1.370e-01, -7.781e-02, -8.876e-02, 1.955e-02, -1.218e-01, 3.048e-01, 1.675e-01, 1.339e-01, -1.556e-01, 5.754e-02, 3.948e-02, 8.600e-02)); + r += mul(s5_5, M4(-2.356e-01, 7.179e-02, -8.412e-02, 1.549e-02, 8.241e-02, -2.647e-02, 7.431e-02, -4.565e-02, 1.053e-01, -1.426e-01, 1.563e-01, 8.870e-02, 1.115e-01, -2.496e-02, 3.625e-02, -9.573e-03)); + r += mul(s5_6, M4(2.063e-02, -1.548e-02, 9.447e-02, -9.163e-03, 4.279e-03, 7.593e-03, 4.727e-02, -1.522e-02, -3.840e-02, 8.830e-02, -7.936e-02, 3.788e-03, -9.664e-02, 6.229e-02, -1.123e-02, -2.040e-02)); + r += mul(s5_7, M4(4.598e-02, 1.724e-01, -9.667e-02, -3.743e-02, 7.593e-02, 8.019e-03, 7.071e-02, 2.795e-02, -1.156e-01, -3.966e-02, -1.742e-01, -7.907e-02, -2.258e-03, -8.263e-02, 8.969e-02, 9.902e-02)); + r += mul(s5_8, M4(6.340e-02, 5.018e-02, -9.251e-02, 1.755e-02, 2.021e-02, -7.445e-02, -7.248e-03, 3.071e-03, -4.418e-02, 1.510e-01, -5.434e-02, -4.775e-02, 8.281e-03, -7.794e-02, 1.005e-01, 2.240e-02)); + r += mul(s6_0, M4(-8.541e-02, -7.668e-03, 8.428e-02, -5.870e-02, -2.830e-03, 2.378e-02, 3.209e-02, 4.221e-02, -1.416e-02, -7.413e-02, -2.234e-03, 9.183e-03, 4.123e-02, 3.226e-03, 5.348e-02, 7.047e-03)); + r += mul(s6_1, M4(4.549e-02, -2.740e-02, 4.882e-02, 1.613e-02, -2.580e-02, 1.114e-02, 6.591e-02, -1.712e-02, 4.918e-02, 1.411e-01, -6.373e-03, 4.880e-02, -1.052e-01, 7.112e-02, 1.284e-01, -2.216e-02)); + r += mul(s6_2, M4(-5.023e-02, 6.376e-03, 1.275e-02, 7.544e-02, -2.007e-02, -2.550e-02, 7.617e-03, 1.943e-03, -3.676e-02, -2.099e-02, -3.072e-02, 6.026e-02, -3.776e-02, -3.612e-02, 1.224e-02, -3.338e-02)); + r += mul(s6_3, M4(-6.905e-02, -9.304e-02, -1.556e-01, -4.700e-02, 7.271e-03, 3.053e-03, -1.253e-01, 3.238e-02, 7.153e-02, 1.051e-02, -3.062e-02, 2.274e-02, -8.820e-02, 7.336e-03, -1.870e-01, -3.185e-02)); + r += mul(s6_4, M4(2.769e-01, -5.243e-02, -1.432e-01, 8.889e-02, -2.395e-03, -1.144e-01, 1.223e-01, -4.498e-02, -1.315e-01, 1.572e-01, -1.973e-01, 1.578e-01, 3.022e-01, 5.166e-02, -1.834e-01, 1.442e-03)); + r += mul(s6_5, M4(-4.584e-02, -1.975e-03, -1.124e-02, 1.870e-02, -3.355e-02, -6.602e-03, 4.581e-02, -2.536e-02, -1.210e-01, 1.289e-02, -1.664e-01, 4.967e-02, 1.213e-01, 6.296e-02, -1.414e-01, -5.444e-04)); + r += mul(s6_6, M4(-1.273e-02, -5.103e-02, 7.597e-02, -5.106e-03, 1.498e-03, -6.909e-02, -3.966e-02, -4.536e-03, -1.409e-02, 3.820e-02, 4.236e-03, 2.091e-02, 8.105e-03, -8.909e-03, -4.405e-02, 5.760e-02)); + r += mul(s6_7, M4(-8.759e-02, -8.420e-02, -1.538e-01, 1.251e-01, 8.604e-02, 8.704e-02, 2.454e-02, -5.437e-02, 1.268e-01, 1.532e-02, 4.255e-02, 4.514e-02, -7.425e-02, 5.124e-02, -1.197e-02, 1.105e-02)); + r += mul(s6_8, M4(-9.679e-02, -3.635e-02, -1.599e-01, 2.217e-02, 1.157e-02, 7.903e-02, -1.282e-02, 2.517e-02, 3.072e-03, 1.212e-02, -5.068e-02, 1.219e-02, -5.048e-02, -4.789e-02, 6.014e-02, -1.834e-02)); + r += mul(s7_0, M4(-7.395e-02, 4.417e-02, -7.326e-02, 1.657e-04, -8.550e-02, 5.602e-03, -2.309e-02, 1.407e-02, 2.018e-02, -4.896e-02, 5.191e-02, -1.649e-02, 1.707e-02, -9.629e-03, 1.198e-01, 5.186e-02)); + r += mul(s7_1, M4(9.423e-02, -3.370e-02, -5.891e-02, -2.028e-03, -7.932e-02, -3.480e-02, 4.558e-02, 8.408e-02, 7.433e-02, 2.920e-02, -1.659e-02, -4.810e-03, -1.731e-01, 6.637e-03, 1.876e-01, -3.583e-02)); + r += mul(s7_2, M4(-3.342e-02, 5.117e-02, -6.354e-02, -2.572e-02, -8.278e-02, 4.129e-02, 3.478e-02, 5.152e-02, 1.427e-02, -4.283e-02, 3.041e-02, -3.038e-02, -5.652e-02, -1.183e-01, 3.366e-02, 4.271e-02)); + r += mul(s7_3, M4(-1.862e-01, 6.683e-02, -8.013e-02, -1.807e-01, -6.053e-02, -1.974e-02, -1.750e-01, -2.887e-02, -3.272e-03, -6.210e-02, 2.414e-02, -1.185e-02, -6.046e-02, -6.312e-02, -5.798e-02, -7.354e-03)); + r += mul(s7_4, M4(-1.143e-01, -9.726e-03, -8.392e-02, 2.117e-01, -5.687e-02, -1.317e-01, 8.600e-02, -1.780e-03, -2.099e-02, 8.315e-02, -2.715e-02, 5.463e-02, 1.539e-01, 1.487e-01, -2.683e-01, 5.775e-02)); + r += mul(s7_5, M4(-1.194e-01, -3.820e-02, 4.998e-02, -4.698e-02, -9.662e-02, 5.332e-02, 6.678e-03, 1.984e-02, -5.187e-02, 3.933e-02, -2.790e-02, 2.727e-02, -1.750e-02, -5.128e-02, 4.539e-02, 6.119e-02)); + r += mul(s7_6, M4(8.464e-02, -5.824e-02, 6.962e-02, -2.679e-02, 3.461e-03, -1.479e-02, -4.726e-02, 7.719e-02, 3.804e-02, -1.091e-02, 1.558e-01, 8.211e-03, -3.342e-02, -5.296e-02, -5.517e-02, 3.337e-02)); + r += mul(s7_7, M4(3.936e-02, -9.005e-02, -2.283e-01, 4.386e-02, 2.988e-02, -1.306e-01, 3.540e-02, -1.752e-02, 6.241e-02, -8.067e-03, 5.707e-02, 2.101e-02, -2.648e-02, -6.828e-02, 6.354e-02, 4.999e-02)); + r += mul(s7_8, M4(1.137e-01, 1.752e-02, -1.071e-01, 1.907e-02, -6.962e-02, 2.038e-02, -8.363e-02, 5.959e-02, 5.774e-02, 3.732e-03, 1.098e-01, -3.748e-03, -1.016e-01, -4.212e-02, 8.611e-02, -2.438e-03)); + r += V4(3.150e-02, 5.513e-03, -4.366e-02, 3.548e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(7.776e-02, -8.490e-02, -2.067e-02, 5.440e-02, 1.813e-02, 1.135e-01, 2.268e-02, 3.435e-02, -4.555e-02, -1.398e-01, -8.772e-02, -3.666e-02, -7.805e-02, 5.617e-02, -8.735e-04, -8.572e-04)); + r += mul(s0_1, M4(-3.737e-02, -1.494e-01, -3.927e-04, 3.512e-02, -1.903e-02, 3.908e-02, 2.908e-02, 9.508e-02, -1.152e-02, -1.174e-01, -1.210e-02, -8.811e-02, 1.220e-02, 1.813e-01, 5.844e-03, -1.913e-03)); + r += mul(s0_2, M4(3.612e-02, -7.414e-02, -2.258e-02, 4.371e-02, 1.355e-02, -1.887e-02, -3.716e-03, 2.872e-02, -5.748e-02, -3.006e-02, 2.186e-02, 9.390e-02, 5.408e-04, 7.620e-02, 2.978e-02, -4.244e-02)); + r += mul(s0_3, M4(4.609e-02, 4.968e-02, -8.155e-02, -8.411e-02, 6.703e-02, -1.381e-01, -6.787e-02, 1.911e-02, -4.370e-02, 5.156e-03, -2.329e-02, -1.051e-01, 3.994e-02, -2.949e-02, -3.222e-02, -7.972e-02)); + r += mul(s0_4, M4(1.459e-01, 1.109e-01, -5.734e-02, 1.614e-01, -1.681e-01, 3.539e-03, -9.288e-02, -1.257e-01, -1.014e-01, 5.845e-02, -2.211e-01, -1.245e-02, 1.704e-01, -1.163e-01, -1.202e-01, -1.136e-01)); + r += mul(s0_5, M4(-5.593e-02, 8.014e-02, 4.557e-02, 1.334e-02, 5.387e-02, -2.346e-02, 4.065e-02, -3.965e-02, -3.307e-02, 1.917e-02, 4.113e-02, -3.794e-02, -4.636e-02, -6.311e-02, -6.138e-02, 1.189e-01)); + r += mul(s0_6, M4(-2.809e-02, 2.056e-02, 3.268e-02, -1.146e-01, 1.179e-01, 9.341e-02, -6.772e-02, -1.389e-01, -1.195e-01, 8.743e-02, 1.178e-01, -9.008e-02, -7.854e-02, 5.421e-02, -3.389e-02, 3.070e-02)); + r += mul(s0_7, M4(-6.888e-02, -3.600e-02, 9.016e-02, -3.882e-02, -1.024e-01, -4.963e-02, 6.431e-02, -2.895e-02, 6.086e-02, -1.285e-01, 1.493e-01, 9.648e-02, 2.547e-01, 1.457e-01, -1.138e-01, 5.216e-02)); + r += mul(s0_8, M4(1.205e-02, -1.702e-02, 5.149e-02, 7.429e-03, 8.371e-02, 4.814e-03, 2.502e-02, -9.301e-02, 3.953e-02, 2.052e-02, -1.253e-03, -1.018e-03, -1.161e-01, 1.265e-02, 4.431e-02, -8.109e-02)); + r += mul(s1_0, M4(1.480e-01, -1.260e-01, 4.616e-02, 4.163e-02, 4.978e-03, 1.068e-02, -1.285e-02, 2.300e-02, -1.355e-03, -2.255e-02, -6.099e-03, 5.602e-02, 1.958e-02, 4.921e-03, -2.415e-02, 5.545e-03)); + r += mul(s1_1, M4(-1.063e-01, 4.527e-02, -4.872e-02, 7.474e-02, 5.204e-02, 2.735e-02, -1.533e-02, -1.948e-02, -1.332e-01, -1.691e-02, -1.712e-03, -6.606e-02, -2.148e-02, -6.268e-02, -4.682e-02, -1.285e-02)); + r += mul(s1_2, M4(-4.936e-02, 8.319e-03, 2.232e-02, -4.762e-02, -5.726e-03, -1.410e-02, 3.006e-02, -5.014e-03, 2.384e-02, -1.393e-02, 1.334e-02, 1.541e-02, 2.470e-02, -1.395e-02, 1.095e-02, -3.093e-02)); + r += mul(s1_3, M4(9.590e-02, 7.083e-02, -1.208e-01, -4.706e-02, -1.534e-01, -5.825e-03, 2.298e-02, -1.212e-01, 6.187e-03, -4.588e-03, -8.587e-02, -3.169e-03, -1.830e-02, 1.651e-02, -1.949e-02, -1.584e-02)); + r += mul(s1_4, M4(-2.469e-01, 4.538e-02, -1.953e-01, -1.554e-01, -7.490e-02, -3.285e-01, 1.109e-01, -7.588e-03, 1.271e-01, 2.494e-03, 6.129e-02, 5.532e-02, -4.167e-02, 2.438e-01, -2.073e-01, -2.021e-01)); + r += mul(s1_5, M4(1.644e-02, 1.013e-02, 4.682e-02, 4.437e-02, 7.156e-02, -3.308e-02, -3.563e-02, 9.885e-02, -4.479e-02, 1.694e-02, -1.865e-02, -8.136e-03, 2.330e-02, 1.699e-02, -1.056e-01, 1.087e-01)); + r += mul(s1_6, M4(-6.609e-02, 2.807e-02, 3.110e-02, -8.848e-02, 2.765e-02, 8.620e-02, 3.319e-02, -5.906e-02, 8.444e-02, 2.822e-02, 1.107e-02, -3.552e-02, -9.031e-02, 1.956e-02, -5.127e-02, -5.593e-03)); + r += mul(s1_7, M4(1.399e-01, -4.111e-02, 2.611e-02, 4.028e-02, -1.003e-02, 1.235e-01, -1.821e-02, -1.066e-02, 3.003e-02, -9.736e-03, 1.179e-01, -3.768e-02, 4.234e-02, 5.035e-02, -2.162e-01, -4.588e-02)); + r += mul(s1_8, M4(1.795e-02, -3.291e-02, 6.307e-03, 5.110e-02, -1.168e-01, 7.285e-03, -6.830e-02, 3.189e-02, 6.169e-02, 7.242e-03, 2.605e-02, -2.164e-02, -1.964e-02, -3.577e-02, -4.418e-02, -3.246e-02)); + r += mul(s2_0, M4(-3.049e-02, 3.876e-02, 1.798e-02, 1.660e-02, 5.262e-03, -7.100e-03, -2.275e-04, -2.742e-02, -5.409e-02, 1.216e-01, 1.681e-02, -8.374e-02, 1.909e-01, -1.592e-02, -2.350e-02, 4.850e-02)); + r += mul(s2_1, M4(-3.809e-02, 7.727e-02, -1.538e-02, -7.747e-02, -1.638e-02, 3.303e-03, -4.032e-02, -1.914e-02, 6.667e-02, -1.473e-01, 9.159e-02, 2.177e-02, 7.800e-02, -2.749e-02, 2.020e-02, -7.160e-02)); + r += mul(s2_2, M4(3.598e-02, -6.197e-02, 1.842e-02, -8.793e-03, -1.018e-02, 5.654e-02, 5.311e-02, 2.156e-02, 6.909e-02, -4.899e-02, -7.643e-04, 5.588e-02, -2.846e-02, 1.155e-02, -6.372e-02, -4.291e-02)); + r += mul(s2_3, M4(3.729e-02, 1.007e-01, 2.525e-02, -1.302e-01, -1.402e-02, -3.402e-02, -2.153e-02, 9.281e-02, 1.373e-01, -5.732e-02, -3.573e-02, -3.186e-02, -4.562e-02, -1.542e-01, -2.256e-02, 2.462e-02)); + r += mul(s2_4, M4(1.297e-01, -8.224e-02, -2.092e-01, 2.684e-01, -2.339e-03, 2.797e-02, 3.087e-02, 1.398e-01, 9.239e-02, 7.381e-02, -8.860e-02, -5.791e-02, 6.191e-02, 5.237e-02, 7.781e-02, 2.119e-01)); + r += mul(s2_5, M4(-4.760e-02, 1.027e-02, 3.280e-02, -1.385e-02, 5.485e-02, 3.425e-02, -8.161e-02, 1.306e-01, -7.043e-04, -1.177e-02, 5.946e-02, -7.133e-02, -3.342e-02, -6.026e-02, 2.064e-02, -1.492e-02)); + r += mul(s2_6, M4(-9.405e-02, -4.250e-02, 1.683e-02, 7.080e-03, 2.519e-02, 1.549e-02, 3.318e-02, -1.970e-02, 8.231e-02, -8.037e-02, -7.925e-02, -6.067e-03, 3.509e-03, -4.632e-02, 5.849e-02, 7.958e-02)); + r += mul(s2_7, M4(1.424e-01, 5.223e-02, 3.317e-02, -2.217e-01, 5.145e-02, 1.991e-02, 3.337e-03, -8.977e-02, 6.504e-02, -5.637e-02, 1.894e-01, 6.373e-02, -8.009e-02, -2.898e-02, -2.254e-02, 5.607e-02)); + r += mul(s2_8, M4(4.897e-02, -2.811e-02, 4.906e-02, 4.026e-02, -1.230e-01, -5.932e-02, 4.591e-03, 4.566e-02, -9.466e-02, -2.565e-02, 3.836e-02, 5.090e-02, 2.224e-02, 4.254e-02, -7.822e-02, -1.107e-02)); + r += mul(s3_0, M4(-1.730e-02, -3.316e-02, 3.565e-02, -2.075e-02, -1.353e-02, 7.812e-03, 9.693e-03, -2.420e-02, -1.212e-02, -9.251e-03, -1.824e-02, -5.754e-02, 4.554e-02, 6.043e-02, 2.870e-02, 3.500e-02)); + r += mul(s3_1, M4(-7.212e-02, 2.119e-02, 3.419e-02, 8.373e-02, -3.011e-02, 1.856e-02, -1.155e-01, -2.185e-03, 7.813e-02, -2.907e-02, -3.720e-02, 3.578e-02, -1.057e-01, -8.175e-03, -5.053e-02, 2.420e-02)); + r += mul(s3_2, M4(9.529e-02, 1.016e-01, 3.172e-02, -2.386e-02, -8.391e-02, 6.843e-02, -9.736e-03, -9.194e-02, 1.399e-02, -8.978e-03, 2.900e-02, -6.146e-02, -6.319e-02, -1.560e-03, -9.177e-02, -6.033e-02)); + r += mul(s3_3, M4(2.028e-02, 1.315e-02, 5.543e-02, 7.761e-02, -8.352e-02, 6.295e-03, -6.043e-03, 9.804e-02, -3.626e-02, -5.410e-02, -2.883e-02, -1.200e-02, -2.720e-03, -3.627e-02, 6.209e-02, -1.272e-02)); + r += mul(s3_4, M4(1.698e-02, -1.959e-03, -1.426e-01, -7.658e-02, -5.019e-03, 7.411e-02, 1.118e-01, 1.282e-01, 8.223e-02, 2.384e-01, -8.747e-02, -1.974e-01, 2.444e-02, -6.515e-02, 5.985e-02, -2.092e-01)); + r += mul(s3_5, M4(-5.460e-03, -2.205e-02, 8.765e-02, -6.076e-02, -5.173e-02, -5.875e-02, -8.130e-02, 2.187e-01, 1.908e-02, -2.804e-02, -4.912e-03, -1.549e-02, 3.233e-03, -2.002e-02, 4.922e-02, 1.501e-01)); + r += mul(s3_6, M4(4.957e-02, -3.018e-02, 1.195e-02, -1.627e-02, -2.495e-02, 2.673e-02, 3.557e-02, -1.834e-02, -3.139e-02, 1.945e-01, -4.287e-02, -2.590e-01, 6.648e-02, -1.035e-01, 1.978e-02, 2.497e-01)); + r += mul(s3_7, M4(4.261e-02, -4.467e-03, 9.605e-02, -1.084e-01, -4.150e-02, 8.793e-03, -4.267e-02, -6.823e-02, -4.787e-02, -1.536e-01, 1.780e-01, 1.504e-02, -8.017e-02, 6.095e-02, -1.297e-01, 1.903e-01)); + r += mul(s3_8, M4(3.615e-02, -5.108e-02, 3.453e-02, 1.011e-01, -7.597e-02, -6.015e-03, 1.119e-02, -3.885e-02, 2.361e-02, 3.135e-02, -5.040e-02, -3.085e-02, 3.696e-02, 7.501e-02, -2.058e-02, -1.242e-01)); + r += mul(s4_0, M4(4.920e-02, -7.762e-02, -1.594e-02, 6.495e-02, -6.660e-03, 6.573e-02, 5.427e-02, -1.234e-01, 4.898e-02, 6.072e-03, 4.608e-02, 5.062e-02, 9.321e-02, -5.075e-03, -1.583e-02, 6.021e-03)); + r += mul(s4_1, M4(5.049e-02, 7.086e-02, -7.803e-02, -2.777e-03, 1.827e-01, -1.059e-01, -8.425e-02, 1.674e-01, 9.989e-03, -5.947e-02, 7.359e-03, 1.393e-01, -1.248e-01, 6.799e-02, -1.108e-01, -3.081e-02)); + r += mul(s4_2, M4(-9.983e-02, -3.100e-03, 5.232e-02, 3.045e-02, 7.090e-02, -1.493e-02, 3.596e-02, -6.723e-02, -3.310e-02, 3.425e-02, -6.720e-02, -7.788e-02, -2.674e-02, -1.370e-02, 6.114e-02, -7.593e-02)); + r += mul(s4_3, M4(-1.041e-01, 7.343e-02, 5.043e-02, -8.283e-03, -3.446e-02, -7.448e-02, 1.155e-01, 9.511e-02, -4.565e-03, -7.229e-03, -4.273e-02, 5.366e-02, -1.361e-01, -4.208e-02, 1.054e-01, 1.825e-01)); + r += mul(s4_4, M4(1.120e-01, 1.808e-02, -1.422e-01, 4.162e-02, 3.161e-01, 8.619e-02, 1.298e-01, -2.176e-01, 7.566e-02, -1.184e-01, -4.221e-02, -6.631e-03, -1.206e-02, 2.938e-02, -1.332e-01, 4.335e-02)); + r += mul(s4_5, M4(2.097e-01, 3.137e-02, 1.642e-01, -1.393e-01, -2.272e-01, -7.897e-02, -9.002e-02, 5.244e-02, -2.101e-02, -5.433e-02, 1.045e-01, -1.250e-02, 4.406e-02, 1.994e-02, -2.391e-02, 6.093e-02)); + r += mul(s4_6, M4(-6.634e-02, -6.862e-03, 8.092e-03, 3.152e-02, 9.126e-02, 3.181e-02, -2.699e-02, -2.576e-02, 2.686e-02, -5.285e-02, 1.551e-02, 1.751e-02, 1.191e-01, 8.773e-02, 1.456e-02, -1.665e-01)); + r += mul(s4_7, M4(-2.891e-02, -3.612e-02, -5.749e-02, 2.471e-02, 2.772e-01, -1.306e-01, 8.702e-02, 1.480e-01, -4.810e-02, 4.675e-02, -4.128e-03, -1.958e-02, 5.197e-02, -6.999e-02, 9.079e-02, 7.055e-02)); + r += mul(s4_8, M4(9.988e-02, 1.870e-02, 8.131e-02, -2.260e-02, -8.316e-02, 8.149e-03, 2.334e-02, -5.434e-03, 5.043e-02, 2.868e-02, -2.653e-02, 6.189e-02, -3.698e-02, 3.083e-02, -3.091e-02, 2.374e-03)); + r += mul(s5_0, M4(5.254e-02, -7.315e-02, -2.225e-02, 2.153e-02, 5.725e-02, -4.636e-02, 1.601e-03, 4.516e-02, 8.068e-02, 2.729e-02, -3.870e-02, -7.201e-02, 6.760e-02, 4.552e-03, 4.438e-02, 4.500e-02)); + r += mul(s5_1, M4(1.685e-01, 1.509e-01, 2.948e-03, 1.776e-02, 1.675e-02, 6.696e-02, -5.363e-02, -6.286e-03, 6.024e-02, 3.023e-03, -5.779e-02, 5.292e-03, -1.165e-01, 7.991e-02, -1.851e-02, 4.630e-04)); + r += mul(s5_2, M4(-7.170e-02, 1.589e-02, -1.884e-02, -3.883e-02, 7.200e-02, -1.746e-02, -2.195e-02, 9.030e-02, 1.058e-01, -8.948e-02, -1.143e-01, 6.897e-02, -6.149e-02, 1.574e-03, 4.231e-02, 1.363e-02)); + r += mul(s5_3, M4(3.745e-02, 1.247e-01, 2.019e-02, -3.741e-02, -3.449e-03, -4.847e-03, 1.767e-03, 5.781e-03, -7.935e-02, -2.522e-01, 3.269e-02, 1.529e-01, 3.102e-01, -1.893e-01, -6.841e-02, 3.808e-02)); + r += mul(s5_4, M4(1.921e-01, -7.191e-02, 1.048e-01, 6.779e-03, 6.828e-02, 2.094e-02, 4.389e-02, 5.065e-02, -1.595e-02, -2.677e-01, -7.935e-02, -1.236e-01, 1.635e-01, -4.481e-02, 4.704e-02, 1.869e-01)); + r += mul(s5_5, M4(6.662e-03, 6.683e-02, -9.824e-02, 1.703e-01, -2.904e-02, 5.726e-02, -3.509e-02, -1.256e-01, -1.423e-01, 9.473e-02, -1.299e-01, 1.175e-01, -3.071e-02, -2.751e-02, 4.384e-02, -8.192e-02)); + r += mul(s5_6, M4(-1.580e-02, -3.752e-02, 4.568e-02, 7.775e-03, 2.727e-02, 7.825e-03, -1.738e-02, -9.195e-03, -3.736e-02, -6.091e-02, 3.713e-02, 6.820e-02, 1.742e-02, 1.099e-02, -2.703e-02, -4.711e-02)); + r += mul(s5_7, M4(-2.568e-02, -3.585e-02, -8.110e-05, -1.047e-02, 1.640e-02, -2.770e-02, 2.889e-02, 3.152e-02, 8.110e-02, -1.274e-01, 6.282e-02, 1.401e-01, 1.375e-02, -5.635e-02, 4.940e-02, 5.975e-02)); + r += mul(s5_8, M4(4.971e-02, -3.187e-02, 5.470e-02, -5.790e-02, 2.007e-02, 1.442e-02, -5.515e-02, 8.206e-02, 9.185e-03, -2.850e-02, -4.617e-02, -2.761e-02, 7.174e-02, -2.299e-03, -8.726e-03, -4.167e-02)); + r += mul(s6_0, M4(-2.564e-02, -1.334e-03, 1.618e-02, -3.692e-02, 1.668e-02, -1.014e-02, 3.184e-02, -8.247e-05, -5.784e-02, -6.551e-02, -1.474e-02, -1.508e-02, 1.280e-02, 1.104e-02, 1.940e-02, -2.254e-02)); + r += mul(s6_1, M4(1.281e-02, 2.432e-02, -3.916e-02, -8.733e-02, 2.112e-02, -1.650e-02, -5.389e-02, 2.289e-02, 6.510e-02, -1.273e-01, 8.516e-02, -4.619e-02, 7.458e-02, -4.286e-02, -1.097e-01, -4.328e-02)); + r += mul(s6_2, M4(2.650e-02, -1.040e-02, 7.558e-03, -1.189e-02, -6.083e-03, -3.193e-03, -9.984e-03, -3.848e-02, -2.653e-02, -7.239e-02, 2.185e-02, 1.225e-02, -3.582e-02, -2.979e-02, -6.728e-02, -6.881e-02)); + r += mul(s6_3, M4(-3.437e-02, 6.707e-02, 3.284e-02, -1.281e-01, 2.801e-02, 2.978e-03, 2.879e-02, -7.024e-03, -5.116e-02, 3.863e-02, -5.467e-02, 8.941e-02, 5.295e-02, -1.732e-01, -1.607e-01, -6.141e-03)); + r += mul(s6_4, M4(-5.183e-02, 9.150e-02, 2.158e-01, 1.855e-01, -1.774e-01, 4.494e-02, -3.685e-02, -2.582e-02, 1.537e-01, -1.493e-02, -8.099e-02, 1.548e-01, 8.523e-02, -1.556e-02, 1.480e-01, 1.893e-01)); + r += mul(s6_5, M4(1.187e-01, 5.356e-02, -6.313e-02, 9.672e-02, 2.376e-03, 1.626e-02, 6.959e-03, -9.679e-02, 5.677e-03, -3.208e-02, -9.106e-02, 1.793e-01, 1.366e-02, -1.316e-02, 6.110e-02, -5.431e-02)); + r += mul(s6_6, M4(-3.338e-03, 8.092e-02, 6.916e-02, -1.039e-01, -6.471e-02, 8.988e-03, 5.310e-02, 7.353e-02, 1.128e-02, 3.089e-02, -2.249e-02, 1.239e-02, -6.665e-02, -3.720e-02, -2.291e-02, 5.894e-02)); + r += mul(s6_7, M4(1.134e-01, 1.221e-02, -6.070e-02, 2.300e-01, -8.134e-02, 3.754e-02, 9.095e-02, -3.804e-02, 7.501e-02, -1.004e-01, 1.162e-01, 5.006e-03, 4.744e-02, 1.739e-02, -1.323e-01, 8.227e-02)); + r += mul(s6_8, M4(-1.192e-01, -2.425e-02, -9.741e-03, 1.033e-02, 1.348e-02, 4.238e-03, 5.134e-02, 2.432e-02, -3.472e-02, 2.930e-02, -1.485e-02, -6.624e-03, 2.464e-02, -6.743e-03, -5.677e-02, -1.125e-02)); + r += mul(s7_0, M4(2.961e-04, 5.695e-02, 1.692e-02, 1.641e-02, 1.896e-02, 3.423e-02, 3.964e-02, -4.835e-02, -7.153e-02, -1.021e-02, -1.489e-02, -4.491e-02, 6.248e-03, 6.234e-02, 3.034e-02, -4.721e-02)); + r += mul(s7_1, M4(4.580e-02, -4.302e-03, 5.372e-02, 1.092e-01, 9.311e-02, 7.429e-02, -7.219e-02, 1.315e-02, -4.621e-02, -3.435e-02, 8.618e-02, 6.339e-04, -5.809e-02, -5.998e-02, -8.710e-02, -5.751e-02)); + r += mul(s7_2, M4(4.485e-02, 2.007e-02, 3.034e-02, 5.546e-02, -1.602e-02, -3.560e-02, -2.043e-02, 4.087e-02, 4.749e-02, -8.724e-04, 4.623e-02, 1.840e-02, -3.697e-02, 8.748e-03, -3.764e-03, -1.345e-01)); + r += mul(s7_3, M4(1.538e-01, -1.023e-01, -1.091e-01, -1.067e-01, 1.039e-01, 2.164e-03, -3.211e-02, 3.243e-02, -7.896e-02, 8.814e-02, -3.925e-02, -7.875e-02, 5.796e-03, -5.906e-02, -7.718e-02, 3.024e-02)); + r += mul(s7_4, M4(-1.636e-01, -7.883e-02, -2.044e-01, 3.189e-02, -2.025e-01, -3.092e-02, -2.128e-01, 3.520e-02, -5.502e-03, 7.618e-02, -1.111e-01, 4.137e-02, 2.036e-01, -2.299e-02, 2.865e-02, 1.354e-01)); + r += mul(s7_5, M4(1.363e-01, -4.054e-02, -2.335e-02, 2.584e-02, 3.570e-02, 8.628e-02, 1.420e-02, -1.352e-02, -3.313e-03, -4.462e-03, -1.690e-03, 3.997e-02, -1.273e-01, -3.589e-03, -4.635e-02, -2.387e-02)); + r += mul(s7_6, M4(8.699e-02, 7.315e-03, 4.443e-02, 1.327e-02, -7.019e-02, -1.051e-02, 4.505e-02, 4.059e-02, -5.563e-02, 3.547e-02, 8.318e-03, -7.002e-02, -3.911e-02, 1.473e-02, -2.082e-02, -4.584e-03)); + r += mul(s7_7, M4(2.100e-03, -4.188e-02, 1.642e-01, 2.146e-01, 5.512e-02, 1.341e-01, -6.786e-02, -2.989e-02, -6.685e-02, -7.217e-02, 1.433e-01, -1.345e-02, -5.557e-02, 2.869e-02, -8.787e-02, 5.125e-02)); + r += mul(s7_8, M4(9.447e-02, -1.057e-02, 9.580e-02, 3.739e-03, -6.323e-02, -3.503e-03, 1.983e-02, 7.549e-03, -5.925e-02, -3.357e-02, 2.204e-03, 2.899e-02, -6.129e-02, 1.970e-02, -2.830e-02, -5.696e-02)); + r += V4(2.020e-02, -4.553e-03, -2.459e-02, 1.937e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.077e-01, 9.312e-02, -5.188e-02, 7.384e-02, -5.968e-02, 2.927e-03, -3.236e-02, -5.140e-02, -3.015e-02, -3.844e-02, 8.994e-02, -1.180e-01, -9.557e-02, -4.035e-02, 4.019e-02, -4.095e-02)); + r += mul(s0_1, M4(-6.296e-02, -4.241e-02, -6.281e-03, -6.678e-02, 9.068e-03, 3.762e-02, 1.032e-01, -4.027e-02, 2.218e-02, 6.294e-02, -1.689e-02, 1.324e-01, -1.670e-02, -7.119e-02, -5.797e-02, -4.215e-03)); + r += mul(s0_2, M4(-6.613e-02, -1.290e-02, 1.807e-02, 4.846e-04, 4.511e-02, 4.319e-02, -1.287e-02, 1.502e-02, -3.730e-02, -1.057e-02, 2.362e-02, -5.695e-02, -5.287e-03, 1.501e-02, -1.542e-02, -1.826e-02)); + r += mul(s0_3, M4(-3.606e-02, 1.462e-01, 2.109e-01, -2.420e-02, 1.386e-03, -5.487e-02, 5.800e-03, 4.513e-02, -1.422e-01, -8.521e-02, 2.863e-01, -2.422e-01, 2.749e-02, -1.105e-01, 1.328e-03, -2.261e-01)); + r += mul(s0_4, M4(1.142e-01, 4.361e-02, 4.532e-02, 2.146e-03, -1.481e-01, -1.284e-01, -3.127e-02, -9.069e-02, -1.411e-01, -2.611e-02, -2.912e-01, 2.501e-01, 1.544e-01, -3.091e-02, -1.803e-01, -5.694e-02)); + r += mul(s0_5, M4(-5.748e-02, -8.738e-02, -1.973e-02, 4.722e-02, -3.113e-02, -7.124e-03, 8.854e-03, -1.669e-02, 6.880e-02, 2.122e-02, -5.463e-02, 1.973e-03, -6.377e-02, -5.127e-03, -3.509e-02, 5.819e-02)); + r += mul(s0_6, M4(5.161e-02, 6.178e-02, -6.580e-03, 7.210e-02, 1.012e-01, 1.839e-02, 5.339e-02, 2.248e-02, 6.152e-02, 3.338e-02, 4.869e-03, -1.636e-01, -5.381e-02, -7.649e-02, 3.155e-02, -9.452e-02)); + r += mul(s0_7, M4(3.837e-02, -2.835e-03, 9.076e-02, -8.489e-04, 6.508e-02, 2.648e-02, -6.215e-02, 8.233e-02, 2.223e-02, -6.067e-02, 5.139e-02, -4.934e-02, -9.968e-02, 6.218e-02, 2.873e-03, -6.527e-02)); + r += mul(s0_8, M4(5.733e-02, 1.893e-02, -6.683e-02, 4.339e-02, -4.547e-02, 2.725e-02, 1.997e-02, -1.831e-02, 3.253e-02, 5.693e-02, -3.447e-02, 3.470e-02, 6.988e-02, -1.481e-01, 4.093e-02, 4.865e-02)); + r += mul(s1_0, M4(1.218e-01, 1.391e-01, -9.917e-02, 2.681e-01, -6.814e-03, -4.386e-02, -1.731e-02, -4.896e-02, -4.263e-03, 7.133e-03, -2.855e-02, 1.162e-01, -6.505e-02, -3.310e-02, 4.968e-02, -7.657e-02)); + r += mul(s1_1, M4(-1.714e-01, -3.825e-02, 1.989e-03, -8.984e-02, 4.135e-02, 3.992e-02, -8.609e-03, -6.335e-02, -3.296e-02, -3.458e-02, 4.691e-03, 1.030e-01, 2.207e-02, -1.372e-02, -2.782e-02, 7.251e-02)); + r += mul(s1_2, M4(1.158e-02, 2.207e-02, 1.008e-02, -1.122e-02, 5.151e-02, 3.024e-02, -4.144e-02, 5.029e-02, 6.255e-02, 6.074e-03, 3.316e-02, -3.334e-02, 9.460e-04, 3.999e-02, 4.258e-02, -1.319e-02)); + r += mul(s1_3, M4(-6.833e-03, 2.981e-01, 1.190e-01, 4.172e-01, -2.082e-01, -1.010e-01, 1.663e-01, 2.756e-02, 1.126e-02, 6.412e-02, 5.292e-02, 3.232e-02, 5.397e-02, -6.604e-02, -8.024e-02, -3.327e-02)); + r += mul(s1_4, M4(1.082e-01, -7.797e-02, 3.743e-01, -2.278e-02, 1.222e-01, 3.582e-02, 3.057e-01, -1.525e-01, 3.195e-02, 3.684e-03, 1.318e-01, -1.431e-02, -1.279e-01, -1.193e-01, -9.534e-02, 1.111e-02)); + r += mul(s1_5, M4(-7.195e-02, -8.329e-03, 2.136e-02, 1.546e-02, -3.515e-02, -3.558e-02, 3.955e-02, 8.360e-02, -1.101e-01, -1.174e-01, -3.039e-02, -2.660e-02, -1.078e-02, -1.269e-02, -1.314e-01, 7.183e-02)); + r += mul(s1_6, M4(5.152e-02, 5.627e-02, -4.271e-02, 1.356e-01, 1.291e-01, -1.450e-01, -1.893e-02, 1.868e-01, 6.525e-02, 5.429e-02, 1.105e-01, -9.555e-02, -7.097e-02, 1.332e-02, 3.753e-02, -4.533e-03)); + r += mul(s1_7, M4(3.159e-02, 5.370e-02, 1.118e-01, -2.614e-02, 1.104e-01, 4.714e-03, 8.088e-02, -1.254e-01, 5.789e-02, 1.972e-02, -2.895e-02, 1.946e-02, 5.596e-02, 1.046e-01, 3.249e-02, -6.347e-02)); + r += mul(s1_8, M4(4.930e-02, 2.392e-02, -3.323e-02, 3.137e-02, -1.039e-01, -1.100e-01, 1.549e-02, 1.914e-02, 5.477e-02, 1.615e-03, 6.294e-02, -3.471e-02, 6.569e-02, -4.273e-02, 1.651e-02, 9.589e-03)); + r += mul(s2_0, M4(3.221e-02, 7.105e-02, 2.089e-02, 2.255e-02, -6.648e-03, -2.093e-02, 6.522e-03, -2.318e-02, -8.746e-03, 5.476e-02, -3.258e-02, -1.102e-01, 1.749e-01, 2.147e-02, -2.678e-02, -5.268e-02)); + r += mul(s2_1, M4(-1.864e-02, -4.252e-04, -8.805e-02, -4.201e-02, -8.959e-02, -2.189e-02, 2.673e-02, 4.537e-02, 2.190e-01, 1.311e-01, 5.191e-03, 4.206e-02, -2.308e-02, -9.327e-02, -3.661e-02, -1.148e-01)); + r += mul(s2_2, M4(-2.992e-02, 3.498e-02, 5.053e-02, 1.144e-02, 3.595e-02, 1.984e-02, 1.643e-02, -2.382e-02, 3.988e-02, 4.426e-02, 1.589e-02, 3.642e-02, 6.817e-03, 9.914e-03, -7.543e-02, 4.210e-02)); + r += mul(s2_3, M4(-1.351e-01, 1.527e-01, 2.414e-03, -2.222e-01, 9.301e-03, -4.186e-04, -7.728e-03, -1.186e-01, 7.279e-04, 4.599e-02, 1.162e-02, -1.156e-01, -9.149e-03, 2.266e-02, 3.615e-02, -1.548e-01)); + r += mul(s2_4, M4(1.148e-01, 1.567e-01, -2.782e-01, 3.847e-03, -2.695e-02, -1.642e-03, -8.825e-02, -1.578e-02, 1.793e-01, 2.009e-01, -2.151e-01, -1.173e-01, -1.997e-02, -3.643e-02, 1.601e-02, 3.910e-02)); + r += mul(s2_5, M4(4.336e-02, 8.253e-02, 1.158e-02, -1.455e-02, -5.475e-02, -1.940e-02, -6.271e-02, 1.003e-02, -1.544e-02, 4.810e-03, 6.860e-02, -2.462e-02, -6.332e-02, -6.534e-02, 8.899e-02, 2.976e-02)); + r += mul(s2_6, M4(-3.050e-02, -1.263e-02, -1.012e-01, -3.166e-02, -7.449e-02, -2.503e-02, -5.525e-02, 3.306e-02, -3.886e-02, 5.543e-02, 5.382e-02, -3.963e-02, 3.195e-02, 3.792e-03, -2.265e-02, -2.995e-02)); + r += mul(s2_7, M4(-9.550e-02, 8.093e-03, -6.212e-02, 1.538e-02, -4.168e-04, 4.310e-03, -1.145e-04, 1.177e-01, -2.583e-02, 1.046e-01, -5.829e-03, 1.304e-01, 3.170e-02, -6.325e-02, 3.423e-02, 6.609e-03)); + r += mul(s2_8, M4(4.738e-02, 2.854e-02, -7.887e-02, 4.673e-02, 8.907e-03, 1.670e-02, 2.104e-02, -5.520e-02, 3.884e-02, 1.973e-02, 1.202e-01, 7.833e-02, -7.837e-02, -4.010e-02, 8.031e-02, -9.000e-02)); + r += mul(s3_0, M4(2.679e-02, 2.173e-02, -1.323e-02, -1.968e-03, 1.052e-02, 2.645e-02, -4.180e-02, 3.171e-02, -2.261e-02, -2.448e-02, -5.866e-02, -7.578e-02, 7.423e-02, 5.530e-02, -1.174e-02, 1.807e-02)); + r += mul(s3_1, M4(-5.659e-02, -4.152e-02, -6.892e-02, -7.556e-02, -1.425e-01, -7.938e-02, 3.291e-02, -3.197e-02, 6.805e-02, 8.159e-02, -9.808e-02, -1.154e-01, -5.595e-02, -8.041e-02, -1.891e-02, -1.454e-01)); + r += mul(s3_2, M4(-1.225e-03, -7.471e-03, 4.640e-03, -2.132e-02, -1.187e-01, -3.307e-02, -3.342e-02, 5.745e-02, -4.317e-03, 6.259e-02, -9.434e-03, 8.730e-02, -2.601e-02, -3.185e-02, -5.734e-02, 6.062e-02)); + r += mul(s3_3, M4(8.051e-03, 5.024e-02, -7.025e-02, 8.818e-02, -8.869e-02, -1.515e-02, 5.252e-03, -2.860e-02, -6.840e-02, -2.742e-03, 8.916e-02, 2.543e-02, 1.025e-01, -2.034e-02, -5.761e-03, 3.742e-01)); + r += mul(s3_4, M4(-4.240e-02, 7.146e-02, -4.326e-02, 8.698e-02, -1.976e-02, -1.082e-01, -2.198e-02, -1.269e-01, 6.265e-03, 7.006e-02, -1.549e-02, -1.263e-01, -1.393e-01, -1.977e-01, 4.258e-01, 8.264e-02)); + r += mul(s3_5, M4(-4.435e-02, 2.545e-02, 1.863e-02, 2.360e-02, -3.587e-02, -8.683e-02, 8.091e-02, 1.216e-02, -1.017e-02, 2.783e-02, -2.242e-02, -4.877e-02, 3.016e-02, -4.084e-02, 2.683e-02, 3.136e-02)); + r += mul(s3_6, M4(4.558e-02, -3.461e-02, 1.245e-02, 6.048e-02, -3.341e-03, -4.436e-02, 1.241e-02, 5.533e-02, 8.177e-03, -4.103e-03, 3.365e-02, 7.956e-02, -7.129e-02, 5.349e-02, -4.337e-02, -1.018e-01)); + r += mul(s3_7, M4(-3.148e-03, -2.321e-02, 3.585e-02, -1.823e-02, -1.738e-02, 3.308e-02, 9.017e-02, 3.342e-04, -5.252e-03, -2.761e-02, 1.347e-01, 2.909e-02, -3.061e-03, 2.617e-02, -1.475e-02, -3.956e-02)); + r += mul(s3_8, M4(9.334e-03, -1.697e-02, -5.276e-02, 2.803e-02, 3.719e-03, -2.596e-03, -3.970e-02, 2.540e-02, -1.326e-01, -3.069e-02, 4.041e-02, -2.985e-02, -5.079e-02, 6.418e-02, -2.325e-02, -7.249e-02)); + r += mul(s4_0, M4(3.710e-02, 3.287e-02, 4.836e-02, -9.585e-02, 1.885e-03, 5.685e-03, -5.507e-03, 4.869e-02, 8.140e-02, 2.910e-02, -7.700e-02, 8.265e-02, -8.802e-03, 7.170e-02, -8.877e-04, -2.661e-02)); + r += mul(s4_1, M4(-1.652e-02, 7.513e-04, 3.076e-02, 5.516e-02, -2.761e-02, 6.551e-03, 1.027e-01, -4.674e-02, -5.690e-03, 5.166e-03, -3.712e-02, -1.844e-02, -3.894e-02, -3.135e-02, -6.120e-02, 1.714e-02)); + r += mul(s4_2, M4(6.301e-02, -5.956e-03, -3.980e-03, -2.500e-02, 4.266e-02, 1.491e-01, -6.553e-02, 1.103e-01, -3.315e-02, -1.231e-02, -3.530e-02, -6.407e-03, 3.088e-02, 4.581e-02, 6.168e-02, 5.876e-02)); + r += mul(s4_3, M4(-1.942e-02, -8.815e-03, -6.423e-02, 1.349e-02, -9.786e-02, -5.757e-02, -2.002e-02, 1.992e-01, 3.463e-02, 5.042e-02, 2.547e-03, -2.162e-02, -7.019e-02, -1.184e-01, 6.112e-02, -1.267e-01)); + r += mul(s4_4, M4(2.089e-03, 1.035e-02, -1.179e-01, 6.884e-02, 1.921e-01, -1.789e-02, 1.577e-01, -2.155e-01, 3.626e-03, 1.062e-02, 1.924e-02, 9.510e-02, -9.622e-02, -1.313e-01, -3.070e-02, 1.933e-01)); + r += mul(s4_5, M4(7.287e-02, 1.362e-01, 3.051e-02, -7.536e-02, -8.768e-02, 6.265e-02, -8.248e-02, -5.200e-02, 2.649e-02, -7.968e-02, 1.080e-02, -6.782e-03, -6.966e-02, -3.352e-02, 2.188e-02, -3.046e-03)); + r += mul(s4_6, M4(-4.444e-03, -1.998e-02, -3.328e-02, 4.993e-02, 1.821e-02, -1.305e-02, -1.285e-02, 1.196e-01, -1.910e-02, 1.556e-02, -1.920e-02, 4.315e-03, 5.933e-02, 1.134e-03, 3.927e-03, -1.362e-02)); + r += mul(s4_7, M4(-3.740e-02, -4.676e-02, 6.161e-02, 7.081e-03, -1.382e-01, 1.377e-01, -5.139e-02, -1.040e-01, -7.295e-04, -2.772e-02, 2.037e-02, 3.765e-02, 2.740e-02, -3.209e-02, -1.239e-02, -3.002e-03)); + r += mul(s4_8, M4(-8.065e-02, 8.158e-03, -6.351e-02, 3.085e-02, 8.517e-02, -4.434e-02, -9.817e-02, -1.627e-03, 1.648e-02, 8.428e-03, 1.192e-02, -3.798e-02, 5.931e-02, -2.690e-02, 5.350e-02, 2.766e-02)); + r += mul(s5_0, M4(-1.622e-02, -4.403e-02, 1.023e-01, -1.139e-01, 5.014e-02, 2.008e-02, 2.745e-02, 2.676e-02, 7.143e-02, 2.604e-03, -1.732e-01, 2.093e-02, 7.864e-02, 5.946e-02, -5.021e-02, -2.883e-02)); + r += mul(s5_1, M4(3.377e-02, 3.455e-02, 1.131e-01, -1.224e-01, -1.474e-02, -1.274e-02, -7.299e-02, 2.685e-02, -1.012e-02, 8.374e-02, 1.117e-01, 1.403e-01, -2.833e-02, -1.101e-02, -1.028e-01, 1.124e-01)); + r += mul(s5_2, M4(-1.305e-01, -9.027e-02, -1.216e-01, 2.585e-02, 5.500e-02, 4.169e-02, 3.386e-02, -4.850e-03, 1.853e-02, -9.479e-03, 1.559e-01, -5.464e-02, -4.298e-02, -1.684e-02, 4.651e-02, 2.252e-02)); + r += mul(s5_3, M4(-2.488e-02, 1.727e-02, 3.289e-02, -1.119e-01, -7.352e-02, 6.214e-03, 5.950e-02, -5.322e-02, -2.526e-03, -2.031e-02, -1.997e-01, 2.193e-01, 1.698e-01, 9.388e-02, -1.760e-02, -2.799e-01)); + r += mul(s5_4, M4(-1.644e-01, -6.465e-02, -1.964e-01, -2.100e-01, -8.515e-03, 8.435e-04, 2.077e-02, 6.934e-02, -1.016e-02, 1.320e-01, -4.857e-01, -1.243e-01, 2.594e-02, 5.574e-02, -2.013e-01, -6.372e-02)); + r += mul(s5_5, M4(6.039e-02, 1.160e-02, -8.932e-02, -9.111e-03, -3.383e-02, 1.090e-02, 7.543e-02, -7.096e-02, -1.671e-01, -2.288e-01, -1.212e-01, 4.297e-02, -7.059e-02, 4.491e-02, 3.937e-02, 5.454e-02)); + r += mul(s5_6, M4(2.136e-02, -8.988e-03, -3.129e-02, 1.085e-02, 4.960e-02, -2.167e-03, 2.436e-03, -5.589e-02, -3.687e-02, 2.597e-02, 5.500e-02, 7.337e-02, -2.889e-02, 1.762e-01, 9.740e-02, -2.120e-01)); + r += mul(s5_7, M4(-5.648e-02, -9.203e-02, 1.907e-02, -7.378e-02, -1.303e-02, 3.863e-03, -1.161e-01, 2.245e-02, 6.718e-02, 4.304e-02, 2.344e-01, 2.262e-02, 8.896e-02, 6.554e-02, -2.300e-01, 7.788e-02)); + r += mul(s5_8, M4(-3.322e-02, 3.952e-02, -6.377e-02, 1.787e-02, 1.770e-02, -1.739e-03, 5.596e-03, 1.804e-03, 1.023e-01, 2.178e-02, -4.965e-02, -1.213e-02, 5.301e-02, -1.652e-02, 4.560e-02, 3.575e-02)); + r += mul(s6_0, M4(5.305e-02, 6.024e-03, -6.012e-03, -2.101e-02, 9.004e-04, 1.515e-02, -3.313e-02, 6.079e-02, -2.658e-02, -4.520e-02, -7.564e-03, -1.143e-01, 9.037e-02, 8.192e-03, -3.790e-02, 1.233e-01)); + r += mul(s6_1, M4(-4.037e-02, 2.179e-02, -7.962e-02, 9.745e-02, -5.554e-02, -3.406e-02, -3.068e-03, -5.949e-02, 6.269e-02, 5.716e-02, 5.162e-02, 3.283e-02, 3.814e-02, -1.724e-02, -1.610e-02, -3.306e-02)); + r += mul(s6_2, M4(2.850e-02, 1.388e-02, -1.091e-02, -1.767e-03, 4.596e-04, 7.681e-04, -4.901e-02, 3.987e-02, -6.414e-03, -2.162e-02, 2.892e-02, -6.097e-03, -2.390e-02, -3.975e-02, -3.758e-02, 3.565e-02)); + r += mul(s6_3, M4(-1.030e-01, 5.867e-02, 6.311e-02, 4.378e-02, -7.515e-02, 7.870e-04, 2.617e-02, 1.581e-02, 2.060e-02, -2.820e-02, -6.539e-03, -2.060e-01, 1.867e-02, 7.975e-02, 7.304e-02, -1.851e-01)); + r += mul(s6_4, M4(1.318e-01, 6.028e-02, 8.039e-02, -4.800e-02, -1.777e-01, -2.444e-01, 7.054e-02, -2.082e-02, 1.185e-01, 1.637e-01, -1.121e-01, -2.441e-02, 2.106e-01, 4.146e-02, -6.769e-02, 6.979e-02)); + r += mul(s6_5, M4(1.051e-02, 2.402e-02, 4.806e-02, -3.359e-02, -6.737e-02, -3.636e-02, 4.301e-02, 1.530e-03, 1.313e-01, 1.824e-02, -8.806e-02, -7.708e-02, 9.100e-02, 3.534e-02, 2.935e-02, 3.096e-02)); + r += mul(s6_6, M4(3.856e-02, -3.014e-03, 3.394e-02, 1.071e-01, -4.078e-02, -6.640e-02, -2.350e-02, 1.959e-02, 9.128e-03, -4.047e-02, -3.454e-02, -8.068e-02, -3.010e-02, 6.882e-02, 5.409e-02, -7.349e-02)); + r += mul(s6_7, M4(6.851e-02, 8.326e-02, 1.533e-01, -6.287e-02, 1.728e-02, -1.714e-01, -5.740e-02, 4.809e-03, 2.828e-02, 3.169e-02, -6.673e-02, 3.754e-02, -2.514e-02, 3.858e-02, 6.254e-02, -7.638e-02)); + r += mul(s6_8, M4(-5.590e-03, 5.953e-02, 7.725e-02, -2.560e-03, -1.225e-01, -2.320e-02, 4.039e-02, -3.092e-02, 3.539e-02, 9.669e-03, -8.232e-02, -3.003e-02, 1.670e-02, -2.627e-02, 1.353e-02, -1.655e-02)); + r += mul(s7_0, M4(-6.490e-02, 5.115e-03, -1.528e-02, -1.145e-01, 7.564e-02, 7.405e-02, -5.443e-02, 6.745e-03, -6.547e-02, -4.020e-02, -4.225e-02, 3.839e-02, 1.049e-01, 4.631e-02, -5.378e-02, 1.743e-01)); + r += mul(s7_1, M4(3.424e-04, 1.829e-02, 9.501e-03, 5.903e-02, 1.090e-01, 1.012e-01, -6.213e-03, -4.232e-02, -3.044e-02, -3.550e-03, 6.156e-02, -1.484e-03, 6.939e-03, -8.635e-03, 2.082e-03, -3.243e-02)); + r += mul(s7_2, M4(7.190e-02, 9.364e-02, 2.862e-02, -8.397e-02, 6.550e-02, 6.722e-02, -6.371e-02, -4.278e-03, 4.129e-02, 6.908e-03, -5.093e-02, 8.225e-04, -3.228e-02, 1.587e-02, 1.322e-02, 9.009e-02)); + r += mul(s7_3, M4(-1.729e-02, 7.476e-02, 6.151e-02, 3.103e-02, 8.877e-02, 6.905e-02, -2.906e-02, -9.292e-02, -1.081e-01, -6.498e-02, 8.411e-02, -3.789e-02, -2.383e-02, 5.354e-02, -1.833e-03, -4.389e-03)); + r += mul(s7_4, M4(8.625e-02, 5.764e-02, -2.618e-01, -3.980e-02, -6.684e-02, -1.350e-02, -2.388e-01, 1.900e-01, 6.187e-02, 2.144e-02, -3.389e-03, -9.097e-02, 1.580e-01, 1.538e-01, 2.038e-02, -6.656e-02)); + r += mul(s7_5, M4(-1.117e-01, 1.565e-02, -5.238e-02, -8.835e-02, 6.472e-03, 8.414e-02, 3.239e-02, -4.135e-02, 1.121e-02, -7.688e-03, -1.144e-01, 1.978e-02, 5.818e-02, 6.246e-02, -3.547e-02, 4.597e-02)); + r += mul(s7_6, M4(2.220e-02, 7.870e-03, 2.765e-02, 2.444e-03, -7.059e-02, 6.972e-02, -5.966e-02, 1.448e-01, 5.399e-02, -1.218e-01, -5.895e-02, 3.625e-02, -4.502e-02, 6.365e-02, 2.878e-02, 4.715e-02)); + r += mul(s7_7, M4(-7.888e-02, 1.038e-01, -3.999e-02, 2.142e-01, 7.834e-02, 1.839e-02, 9.709e-02, 2.613e-02, -3.665e-02, -1.361e-03, 3.381e-02, -3.310e-02, -3.747e-02, 6.834e-02, 3.315e-02, 8.918e-03)); + r += mul(s7_8, M4(-3.474e-02, 6.471e-02, 8.885e-02, -4.717e-02, 3.719e-02, 9.219e-02, -4.642e-02, -9.449e-03, -2.453e-02, -2.280e-02, -6.700e-02, -5.099e-03, -2.730e-02, 1.369e-02, 4.512e-02, 2.629e-02)); + r += V4(2.436e-02, 1.957e-02, -1.232e-02, -3.296e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.208e-01, -1.226e-01, 4.139e-02, 1.672e-02, 3.747e-02, 1.641e-02, -1.400e-02, 2.603e-03, -9.280e-02, 1.165e-01, 6.712e-02, 5.747e-02, 5.795e-03, -1.884e-02, -5.806e-02, -2.103e-02)); + r += mul(s0_1, M4(-4.448e-02, -1.759e-01, 4.656e-02, -5.958e-02, -7.425e-02, 4.919e-02, -8.946e-02, 3.773e-02, 1.568e-01, 1.065e-01, -4.453e-02, -7.229e-02, 3.335e-03, -1.265e-02, -3.742e-02, -5.298e-02)); + r += mul(s0_2, M4(9.436e-03, -1.831e-02, 4.140e-03, 6.075e-02, 3.335e-02, 3.694e-02, -2.059e-02, 3.985e-02, -3.482e-02, -2.645e-02, -6.472e-02, -5.205e-03, -1.363e-02, -1.129e-01, 6.678e-03, 1.812e-02)); + r += mul(s0_3, M4(-1.682e-02, 3.792e-02, -9.146e-02, -8.048e-03, 1.158e-01, 6.700e-04, 2.752e-02, -5.915e-03, -6.619e-02, -2.281e-01, -1.466e-01, 1.194e-01, -7.796e-02, 2.379e-01, 4.111e-03, -6.715e-02)); + r += mul(s0_4, M4(-2.327e-01, 8.179e-02, -2.227e-01, -1.154e-01, -3.701e-02, 2.974e-02, 8.689e-02, -9.778e-02, 7.231e-02, -8.271e-02, -7.702e-02, -6.086e-02, -7.075e-02, 3.773e-02, 1.167e-01, 1.593e-02)); + r += mul(s0_5, M4(2.426e-02, 4.114e-02, -7.095e-02, 1.081e-01, 2.005e-03, 2.834e-02, -4.480e-02, -1.039e-03, -3.527e-02, 9.050e-02, 8.530e-02, -2.006e-02, 2.647e-02, 4.315e-02, 4.213e-02, 6.488e-02)); + r += mul(s0_6, M4(-3.340e-02, 4.409e-02, 2.409e-04, 3.593e-03, -1.914e-03, 5.568e-02, -1.601e-02, 6.657e-02, -3.951e-02, 1.302e-02, 8.837e-02, 6.599e-02, 5.774e-02, -9.646e-02, -4.925e-02, -7.544e-02)); + r += mul(s0_7, M4(-4.754e-02, 3.320e-02, -6.046e-02, -5.000e-02, 1.162e-01, -6.466e-03, 1.049e-01, -4.923e-02, 4.806e-02, -2.365e-03, 4.935e-02, -1.513e-02, 6.068e-02, -4.087e-02, -2.004e-02, 1.045e-01)); + r += mul(s0_8, M4(4.572e-02, 2.684e-02, 4.193e-02, -6.155e-03, -1.725e-02, 1.555e-02, 1.902e-02, -7.173e-02, 3.296e-02, -2.906e-02, -3.089e-02, 1.108e-01, -2.672e-02, -1.714e-03, -2.541e-02, 1.468e-02)); + r += mul(s1_0, M4(1.692e-01, -3.109e-01, 7.963e-02, 3.339e-02, 4.335e-02, -8.582e-03, 9.552e-03, -2.264e-02, -7.118e-02, 2.155e-02, 5.057e-02, 3.908e-03, -2.734e-02, 7.640e-03, -4.017e-02, 1.679e-02)); + r += mul(s1_1, M4(-4.408e-02, -2.848e-01, 9.724e-02, -6.205e-02, -6.897e-03, -1.005e-01, 7.328e-02, -2.755e-02, -3.623e-02, 8.837e-03, -1.560e-02, -7.135e-02, 1.371e-02, 1.046e-01, 1.830e-02, -2.500e-02)); + r += mul(s1_2, M4(-4.164e-02, -5.760e-02, -4.867e-03, -4.731e-02, -1.110e-02, -3.070e-02, 2.912e-02, 3.289e-02, 3.885e-02, -5.508e-04, -1.053e-02, 1.754e-02, 5.379e-03, -5.888e-02, 3.521e-02, -2.847e-02)); + r += mul(s1_3, M4(1.379e-02, 1.928e-01, -9.303e-03, 5.900e-02, -1.323e-01, 8.078e-02, -5.612e-02, -5.716e-02, 3.289e-02, -9.726e-02, -8.349e-02, 5.944e-02, -6.001e-02, 6.552e-03, 2.450e-03, -3.166e-02)); + r += mul(s1_4, M4(-1.411e-01, -1.940e-02, -3.377e-01, -9.430e-02, -2.508e-02, 1.420e-01, -2.662e-02, 1.240e-01, -9.893e-02, 1.008e-01, -1.202e-01, -4.663e-02, -8.621e-02, -6.152e-02, -1.013e-01, -4.004e-03)); + r += mul(s1_5, M4(1.475e-02, 5.437e-02, -1.313e-02, 8.481e-02, 6.013e-02, 1.428e-02, 2.697e-03, 7.665e-02, 2.210e-02, -4.551e-02, -6.520e-02, 3.908e-03, 2.881e-02, 2.892e-02, 9.411e-02, 1.113e-02)); + r += mul(s1_6, M4(1.295e-02, 2.119e-02, 3.595e-02, -1.883e-02, 5.922e-02, 1.354e-01, 1.468e-02, 1.992e-02, 8.332e-02, 2.735e-02, -5.336e-02, 7.069e-02, -2.745e-02, -4.790e-02, -7.620e-02, 2.050e-02)); + r += mul(s1_7, M4(-1.151e-01, 1.238e-02, -3.719e-03, -8.327e-03, -1.265e-01, 4.812e-02, 9.510e-03, 2.710e-02, 1.868e-02, -3.206e-02, -1.689e-02, 8.033e-02, 4.113e-02, 8.544e-02, -2.066e-03, 2.793e-02)); + r += mul(s1_8, M4(-2.543e-02, 2.565e-02, 5.130e-02, 3.102e-03, 5.037e-02, 4.359e-02, -5.993e-02, 1.153e-01, 4.809e-03, 4.918e-02, 3.962e-02, -4.677e-03, 1.491e-02, -5.397e-02, -2.091e-02, 1.154e-02)); + r += mul(s2_0, M4(-4.696e-02, -6.299e-02, -2.885e-02, -3.337e-02, 5.491e-02, -7.292e-02, -2.948e-02, -5.428e-02, -4.811e-02, 1.687e-01, 9.580e-03, -3.189e-02, 1.525e-01, -1.680e-03, 2.752e-02, -5.007e-04)); + r += mul(s2_1, M4(6.137e-02, 8.930e-02, 1.935e-02, -9.878e-02, 6.470e-03, -1.180e-03, 4.439e-02, -4.159e-02, 7.039e-02, 6.277e-02, -4.220e-02, -3.506e-02, -7.495e-02, -4.569e-02, 9.915e-02, -4.819e-02)); + r += mul(s2_2, M4(-6.690e-03, -4.709e-02, -8.617e-03, 1.128e-02, -1.234e-02, -5.296e-02, -1.959e-02, 8.562e-03, 2.983e-02, 6.469e-02, -8.118e-03, 9.698e-03, 1.594e-02, 1.233e-01, 7.226e-02, 4.709e-02)); + r += mul(s2_3, M4(1.362e-01, 9.432e-02, 6.378e-02, -2.612e-02, 4.053e-02, -3.164e-02, 2.733e-02, -2.000e-02, 1.917e-01, 3.272e-02, -4.118e-02, -2.870e-02, 1.899e-01, 1.436e-02, 6.496e-02, -2.292e-02)); + r += mul(s2_4, M4(-1.569e-02, 7.891e-03, 1.097e-01, -1.259e-01, 5.660e-03, -2.804e-02, -1.121e-01, 4.181e-02, 1.050e-01, -3.083e-02, -4.846e-02, -8.452e-02, 1.462e-02, -8.250e-02, -1.173e-01, -9.593e-03)); + r += mul(s2_5, M4(-1.959e-02, 4.876e-03, 6.666e-02, 2.711e-02, 5.521e-02, 2.238e-02, 2.469e-02, 1.929e-01, -3.808e-02, -1.214e-01, -4.719e-02, 1.663e-01, -5.399e-02, -1.247e-01, -1.763e-01, -4.044e-02)); + r += mul(s2_6, M4(1.661e-01, -7.206e-02, 4.163e-02, -7.267e-02, 3.635e-02, -3.221e-02, 3.350e-03, -6.100e-03, 2.232e-02, -4.944e-03, -3.431e-02, -1.230e-01, -5.037e-02, 7.514e-02, 2.936e-02, 3.774e-03)); + r += mul(s2_7, M4(-2.536e-03, -2.618e-02, -1.153e-02, -5.133e-02, -5.048e-02, -7.482e-03, -9.806e-03, -2.532e-02, -1.631e-02, 1.052e-03, 6.459e-02, 1.764e-02, -3.458e-03, 1.575e-02, 2.013e-02, 1.126e-01)); + r += mul(s2_8, M4(-1.950e-03, 1.616e-02, 4.400e-02, -2.765e-03, -2.407e-02, 2.714e-02, 6.637e-02, 5.078e-02, -2.704e-02, 6.811e-02, -6.171e-02, -6.063e-02, -3.555e-03, -7.526e-03, -1.073e-02, -6.472e-02)); + r += mul(s3_0, M4(-1.586e-02, -8.472e-02, 1.757e-02, -7.384e-04, 7.426e-03, -6.145e-02, 1.518e-02, -5.079e-02, 3.692e-02, -2.280e-02, 1.228e-02, 2.641e-02, -2.579e-02, -2.550e-02, -9.544e-03, 2.705e-03)); + r += mul(s3_1, M4(-3.033e-02, 1.983e-02, -6.241e-03, -6.516e-02, -4.897e-02, -1.499e-01, -2.616e-02, 5.815e-02, 7.285e-02, 1.058e-01, 8.509e-02, 5.656e-02, -7.548e-02, -1.785e-01, 3.110e-02, -1.290e-02)); + r += mul(s3_2, M4(-2.506e-03, -8.837e-02, -4.282e-02, -2.442e-02, -1.142e-02, -1.472e-01, 8.187e-02, -6.503e-03, 4.318e-03, 1.718e-02, 3.119e-02, 4.592e-02, 4.852e-02, 6.621e-02, 6.957e-02, 8.130e-02)); + r += mul(s3_3, M4(7.670e-03, 4.436e-02, 3.912e-02, -2.809e-02, 3.055e-02, -1.178e-01, -4.016e-02, -1.266e-02, 2.756e-02, -1.929e-02, -2.997e-02, -3.582e-02, -9.364e-02, -8.080e-02, 1.828e-02, 2.897e-02)); + r += mul(s3_4, M4(-8.633e-02, -6.935e-02, -7.481e-03, -1.387e-01, -4.836e-02, -1.039e-01, -5.183e-02, -1.802e-02, 4.349e-02, -1.398e-01, -1.713e-01, -7.101e-02, -4.267e-02, -3.547e-02, -2.141e-01, -3.583e-01)); + r += mul(s3_5, M4(-6.121e-03, 2.361e-02, -1.575e-02, 7.977e-02, 4.023e-02, 6.795e-02, -1.890e-02, 6.154e-02, 3.224e-03, -1.176e-02, 4.541e-02, 7.858e-02, -5.268e-02, -3.705e-02, -1.409e-01, 9.165e-02)); + r += mul(s3_6, M4(-2.116e-02, 4.334e-02, -2.392e-03, -3.574e-02, -6.518e-03, -3.482e-02, 6.002e-03, -2.301e-02, -7.438e-02, -2.872e-02, -6.940e-02, -2.751e-02, 1.971e-02, -1.097e-02, 6.442e-02, -5.565e-02)); + r += mul(s3_7, M4(-2.776e-02, -1.469e-03, -5.992e-02, -8.719e-02, -2.990e-02, 3.825e-02, 3.316e-03, -1.588e-02, -2.449e-04, 2.527e-02, -2.485e-02, -1.512e-01, 5.911e-02, 3.865e-02, 9.859e-03, 1.625e-01)); + r += mul(s3_8, M4(1.487e-02, -5.139e-03, 1.519e-02, 9.446e-03, -1.208e-02, -3.288e-02, 1.786e-03, 1.715e-02, 3.260e-02, -1.019e-01, -1.111e-01, -6.100e-02, -4.261e-02, -2.880e-02, 1.342e-02, -6.207e-02)); + r += mul(s4_0, M4(7.473e-02, -9.843e-02, 4.634e-02, 1.222e-02, -9.640e-02, 8.247e-02, -3.801e-02, -2.276e-02, 1.277e-02, 4.691e-03, 2.958e-02, -2.230e-02, 1.357e-02, 1.769e-02, -4.195e-02, -6.130e-02)); + r += mul(s4_1, M4(4.292e-02, 5.470e-02, 3.183e-03, 9.491e-02, -5.083e-02, -3.304e-02, -9.519e-02, 5.875e-02, -4.293e-03, -5.284e-02, 2.780e-02, 3.798e-02, 1.284e-02, -2.329e-03, 1.198e-02, -5.990e-02)); + r += mul(s4_2, M4(-6.650e-02, 5.448e-02, -4.260e-02, -2.709e-02, 8.678e-02, 3.785e-02, 7.341e-02, -4.835e-02, -1.063e-02, -7.336e-02, 2.946e-03, -6.587e-02, -3.151e-02, 3.142e-02, -3.179e-02, 3.592e-02)); + r += mul(s4_3, M4(1.646e-02, 1.071e-01, -8.085e-02, 9.750e-03, 2.637e-02, -5.760e-02, -2.088e-02, -1.460e-01, 7.848e-03, -5.850e-02, -1.927e-03, -1.073e-02, -7.960e-02, -1.548e-01, -2.802e-02, -8.254e-03)); + r += mul(s4_4, M4(1.430e-01, -1.003e-01, -1.136e-01, 1.973e-01, -2.317e-01, 9.932e-02, 6.234e-02, -1.388e-02, 6.027e-02, 7.720e-03, 2.609e-02, -2.970e-02, -5.399e-03, -3.966e-02, -8.715e-02, 8.388e-02)); + r += mul(s4_5, M4(-5.722e-02, 2.848e-02, 6.717e-02, -1.891e-01, 1.238e-01, 1.638e-02, -9.690e-02, 2.956e-02, 2.124e-02, -4.084e-02, 8.300e-03, -6.430e-02, -5.404e-02, -3.029e-03, -4.792e-02, 1.314e-01)); + r += mul(s4_6, M4(-5.015e-02, -9.406e-02, 2.852e-02, -5.992e-03, 8.162e-02, -4.082e-02, -1.592e-03, 4.615e-02, -7.159e-02, 2.490e-02, 1.560e-02, -4.330e-02, -4.604e-02, 6.942e-02, -1.232e-02, 1.700e-03)); + r += mul(s4_7, M4(6.375e-02, 6.614e-02, -1.595e-02, 5.002e-02, -5.885e-02, -6.411e-02, 7.694e-02, -2.888e-03, -4.576e-03, -6.373e-03, -5.908e-02, -6.341e-03, 7.994e-03, -2.970e-02, -2.836e-03, 1.464e-01)); + r += mul(s4_8, M4(-6.851e-02, -9.533e-02, 1.039e-02, -7.268e-02, 1.300e-01, -2.387e-02, -4.770e-02, 1.225e-02, 7.557e-04, 1.826e-02, 2.401e-02, 2.455e-03, 9.505e-03, 1.583e-02, -7.808e-03, -4.432e-02)); + r += mul(s5_0, M4(1.032e-02, -1.248e-01, -4.259e-02, -2.107e-02, -2.476e-02, -2.230e-02, 3.155e-03, -6.801e-03, -6.227e-02, 1.033e-01, 1.546e-02, -2.175e-03, -1.698e-02, 4.505e-02, 1.032e-01, -1.746e-02)); + r += mul(s5_1, M4(7.643e-02, 4.087e-02, -5.026e-02, 9.843e-02, 5.734e-02, -1.803e-02, -3.746e-02, -4.964e-02, 1.173e-01, 1.584e-02, -1.999e-01, -7.213e-02, 3.227e-03, -5.543e-02, -8.192e-02, -2.449e-02)); + r += mul(s5_2, M4(1.835e-02, 4.458e-03, 4.713e-02, 1.193e-02, -7.368e-03, -2.718e-02, -7.838e-02, 1.944e-02, -1.046e-02, 3.976e-02, -1.163e-01, -8.234e-02, -7.764e-02, -5.386e-03, -3.642e-02, -4.625e-03)); + r += mul(s5_3, M4(-4.249e-02, -4.110e-02, -9.450e-02, -3.841e-02, 4.038e-02, -5.472e-02, -4.634e-02, 5.570e-02, -2.153e-01, 1.431e-01, 1.478e-01, -1.139e-01, 6.344e-02, -6.702e-02, -6.537e-02, 4.383e-02)); + r += mul(s5_4, M4(2.586e-01, -1.978e-01, 2.032e-02, 4.913e-02, -8.419e-02, 4.377e-02, 3.250e-02, -5.745e-02, -1.402e-01, 5.409e-02, 2.995e-02, 1.815e-02, 5.592e-03, -2.316e-02, 1.624e-01, 2.534e-01)); + r += mul(s5_5, M4(-3.445e-02, -4.932e-02, -1.166e-02, 7.935e-02, -2.074e-02, 1.331e-02, -6.949e-02, -9.694e-02, 7.806e-02, -4.970e-02, 8.471e-02, -3.458e-03, -5.019e-02, 4.166e-03, -9.217e-02, -5.729e-02)); + r += mul(s5_6, M4(3.876e-02, 5.201e-04, 7.537e-02, -6.016e-02, 3.884e-02, 2.412e-02, 3.531e-02, -2.031e-02, -1.272e-01, -5.110e-02, -1.301e-01, -6.391e-03, -8.345e-03, 1.014e-01, 7.550e-02, -4.753e-03)); + r += mul(s5_7, M4(9.155e-02, 8.092e-02, -4.110e-02, 2.557e-02, 6.704e-02, -4.796e-02, 9.720e-03, 6.098e-02, -1.181e-01, -1.338e-02, 1.684e-02, -3.873e-02, -3.165e-02, 3.809e-02, -8.942e-02, 1.094e-01)); + r += mul(s5_8, M4(-1.930e-02, -1.798e-02, 5.993e-02, -3.256e-02, -2.257e-02, 5.359e-03, 7.253e-04, -4.748e-02, 9.689e-02, 2.043e-02, 1.876e-02, -7.769e-02, -1.006e-02, -1.476e-02, 1.630e-02, -3.216e-03)); + r += mul(s6_0, M4(-1.942e-02, -1.636e-02, -2.003e-03, -9.176e-03, 4.595e-02, -3.816e-02, -6.196e-03, 4.540e-02, 4.486e-02, -5.883e-02, 4.952e-02, -4.961e-02, -2.278e-02, 8.348e-02, 4.060e-02, 3.106e-02)); + r += mul(s6_1, M4(5.459e-02, -7.661e-03, 3.247e-02, -2.312e-02, 1.555e-03, 2.422e-03, 2.480e-02, 8.737e-03, 8.432e-03, 3.619e-02, 4.601e-02, 7.276e-02, 4.691e-02, -2.799e-02, -3.063e-02, -5.632e-02)); + r += mul(s6_2, M4(1.771e-02, -4.439e-02, 8.003e-03, 4.068e-02, 2.212e-02, 2.882e-02, 3.817e-02, 1.550e-02, 4.477e-02, 5.303e-03, 4.320e-02, 8.337e-02, -2.148e-02, -1.042e-02, -4.141e-02, -7.424e-03)); + r += mul(s6_3, M4(-1.046e-02, 1.348e-01, -7.347e-02, 3.864e-04, -2.987e-02, 1.721e-02, -1.031e-02, 1.365e-02, 1.127e-01, -1.271e-01, -2.893e-02, -2.252e-02, 7.782e-02, 6.553e-02, 1.611e-02, 7.165e-02)); + r += mul(s6_4, M4(-1.647e-01, 8.367e-03, -1.954e-01, 3.519e-02, -6.066e-02, -1.751e-01, 1.987e-02, 4.552e-02, -3.052e-02, 1.324e-01, 7.788e-02, 1.411e-01, 5.785e-02, 8.841e-03, 3.008e-02, -1.193e-02)); + r += mul(s6_5, M4(4.575e-02, 2.933e-02, -3.609e-02, -1.482e-02, -4.333e-02, -2.008e-02, -3.871e-02, -5.532e-02, 9.783e-02, 6.445e-02, 1.499e-01, 1.683e-01, -2.356e-02, 6.141e-02, 2.904e-03, 2.112e-02)); + r += mul(s6_6, M4(-3.586e-02, 7.513e-03, 2.905e-02, -2.824e-02, 2.945e-02, -4.356e-02, 8.347e-04, 1.698e-03, 8.889e-02, -3.999e-02, -4.600e-02, -8.680e-03, -9.044e-03, -5.725e-03, 2.258e-03, 3.439e-02)); + r += mul(s6_7, M4(-5.036e-02, 1.410e-01, 2.360e-03, -7.009e-02, 4.161e-02, -9.975e-02, -1.061e-01, -1.230e-02, -4.415e-02, -6.260e-02, 4.748e-02, 7.399e-02, 7.400e-02, 5.209e-02, 1.495e-02, 1.961e-02)); + r += mul(s6_8, M4(3.887e-02, 2.861e-02, -5.926e-02, 3.014e-02, -4.256e-02, -3.001e-02, -5.039e-03, 1.458e-04, 6.518e-02, -2.086e-02, 5.172e-02, 3.376e-02, -2.826e-02, -1.956e-02, -6.630e-02, 2.044e-02)); + r += mul(s7_0, M4(2.581e-02, -8.798e-02, -3.776e-02, 5.633e-02, -4.144e-02, 1.434e-01, 5.310e-02, 4.405e-02, 8.046e-04, -7.999e-02, 3.022e-02, -4.280e-02, -5.826e-02, 9.874e-02, 3.675e-02, 1.498e-02)); + r += mul(s7_1, M4(4.093e-02, 1.469e-01, 3.709e-02, -2.254e-02, 5.227e-02, 1.961e-02, 2.499e-02, 4.711e-02, -4.289e-02, -3.350e-02, -1.520e-02, -1.267e-02, 1.336e-02, -1.772e-01, -7.237e-03, -6.089e-02)); + r += mul(s7_2, M4(-3.291e-03, 4.629e-03, -2.715e-02, -4.765e-03, 4.204e-03, -7.442e-03, 1.517e-02, -3.461e-03, 6.636e-02, 6.446e-02, 5.237e-02, -1.822e-02, 1.204e-02, 9.596e-03, 2.026e-02, -4.059e-02)); + r += mul(s7_3, M4(1.446e-01, 1.120e-01, 1.445e-02, 4.715e-02, -9.855e-02, 1.332e-01, 7.686e-02, 3.075e-02, 1.389e-01, -2.450e-02, -1.558e-01, 2.449e-02, -1.939e-02, -2.909e-02, 2.642e-02, 1.386e-02)); + r += mul(s7_4, M4(-3.889e-02, 1.283e-01, 1.610e-01, -1.688e-01, 3.061e-02, -1.769e-01, 2.392e-01, 8.607e-02, -5.087e-02, 9.072e-02, 3.085e-02, -4.719e-02, 7.219e-02, 1.239e-01, 8.664e-02, -4.961e-02)); + r += mul(s7_5, M4(7.244e-02, -2.825e-02, 7.402e-02, 3.481e-03, -3.425e-02, 4.283e-02, 7.261e-02, -8.555e-02, 1.067e-01, -8.323e-03, 1.513e-02, 7.220e-02, -5.575e-02, 1.445e-02, -1.231e-02, 8.109e-02)); + r += mul(s7_6, M4(-1.548e-02, -1.125e-01, 7.741e-02, -2.596e-02, 3.716e-02, -4.756e-02, -3.039e-02, 3.515e-02, 1.175e-02, -5.446e-03, -3.847e-02, -9.783e-03, -3.526e-02, 6.083e-03, -2.624e-02, 3.069e-02)); + r += mul(s7_7, M4(1.090e-01, -8.652e-02, 1.857e-01, 4.552e-02, -1.905e-02, 4.213e-02, -4.890e-03, -1.129e-01, -5.078e-02, -1.977e-02, 8.947e-02, 2.143e-02, 8.086e-03, -2.149e-02, 1.718e-02, -1.472e-02)); + r += mul(s7_8, M4(-1.970e-02, -4.491e-03, -2.376e-02, -7.501e-02, -1.957e-03, 6.665e-02, 1.631e-02, -9.853e-03, 6.115e-02, 5.283e-03, -1.900e-02, -1.069e-02, -2.595e-02, -2.954e-02, -1.801e-02, 1.378e-02)); + r += V4(9.662e-03, 1.842e-02, 1.390e-02, 1.935e-02); + return r; +} + +void Pass8(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 9 +//!DESC conv8 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.887e-02, 3.201e-02, 3.272e-03, 3.765e-02, 5.024e-02, 2.837e-02, 8.306e-03, 2.390e-02, -1.173e-01, -2.534e-02, -1.399e-01, 8.948e-02, -5.215e-03, -1.560e-02, 8.428e-03, -2.487e-02)); + r += mul(s0_1, M4(5.189e-02, -5.796e-02, -4.014e-02, 3.733e-02, 3.837e-02, 9.472e-02, -6.379e-03, 1.350e-02, -1.656e-01, 5.211e-02, 1.362e-01, -6.236e-02, -2.758e-02, -6.192e-02, -1.576e-02, -2.115e-02)); + r += mul(s0_2, M4(5.693e-02, 4.895e-02, -5.442e-02, -3.899e-03, -2.244e-02, -3.409e-02, 2.633e-02, 2.265e-02, -9.917e-02, -2.025e-01, 2.870e-02, -5.216e-02, 5.458e-02, 5.259e-02, 7.653e-03, -7.861e-03)); + r += mul(s0_3, M4(3.048e-02, -2.534e-03, 1.112e-02, -9.310e-02, -4.621e-02, -1.901e-02, 7.749e-02, 6.284e-02, -2.183e-01, 7.311e-03, 8.476e-02, -5.162e-02, 4.214e-02, -2.302e-02, -5.391e-02, 5.794e-02)); + r += mul(s0_4, M4(-9.152e-03, -1.432e-01, 2.792e-02, 8.631e-02, 5.127e-02, 6.313e-03, 5.178e-02, -2.321e-02, -4.154e-01, -2.910e-01, 5.044e-01, 1.183e-01, -3.116e-02, 8.087e-02, 5.677e-02, -3.368e-02)); + r += mul(s0_5, M4(1.282e-02, 2.131e-02, 5.910e-03, 3.835e-02, 4.310e-02, 6.678e-02, -3.528e-03, -5.441e-03, -1.461e-01, 2.870e-01, 8.298e-02, -1.276e-01, 1.755e-02, 4.309e-03, 2.989e-02, -2.564e-02)); + r += mul(s0_6, M4(7.705e-02, -1.516e-02, 2.567e-02, 1.081e-01, 2.617e-02, 2.163e-02, -4.291e-02, -9.064e-03, 8.263e-02, 2.934e-03, -3.025e-02, 1.798e-02, -9.562e-02, 2.937e-02, 3.906e-02, -2.487e-02)); + r += mul(s0_7, M4(9.005e-02, 3.765e-02, -3.861e-02, 1.184e-02, -2.468e-02, -2.764e-03, 6.975e-03, 3.962e-02, 1.575e-02, 2.822e-02, -2.852e-02, 1.132e-01, 2.799e-02, -4.942e-05, 6.324e-02, -4.779e-02)); + r += mul(s0_8, M4(-9.121e-03, -2.050e-02, -5.023e-03, 3.821e-02, -2.644e-02, 1.712e-02, 4.536e-02, -2.812e-02, 1.159e-01, -1.993e-02, -1.738e-01, -4.798e-02, 1.387e-03, -1.129e-02, -4.604e-02, 3.013e-02)); + r += mul(s1_0, M4(-2.237e-02, 5.579e-02, -2.246e-02, 3.570e-02, -2.149e-02, 8.068e-02, -1.792e-02, 3.440e-02, -1.433e-02, 1.115e-02, 1.515e-02, 2.813e-02, -1.558e-02, -3.391e-02, -1.400e-02, -4.613e-02)); + r += mul(s1_1, M4(-1.341e-02, 2.247e-02, 3.174e-02, -1.056e-02, 4.711e-02, 2.507e-01, 5.819e-02, -2.294e-02, 5.427e-03, 2.027e-02, 1.049e-03, -1.555e-02, -4.490e-02, -3.285e-02, -5.561e-02, 2.131e-02)); + r += mul(s1_2, M4(3.709e-02, -1.982e-02, 4.372e-03, -3.034e-03, 3.273e-02, 3.586e-02, -3.369e-02, 9.327e-04, -3.915e-02, -2.741e-02, 3.030e-03, -9.380e-05, -5.189e-02, 2.277e-02, 4.675e-02, 5.198e-03)); + r += mul(s1_3, M4(-7.693e-02, 8.631e-05, 2.404e-02, -5.598e-02, 1.511e-03, 4.933e-02, 5.708e-02, -8.107e-02, 3.209e-02, -1.408e-02, 3.269e-03, 2.666e-02, -1.425e-01, -1.133e-01, -1.107e-02, -1.065e-01)); + r += mul(s1_4, M4(-3.071e-02, -9.263e-02, -7.140e-03, -1.791e-03, -2.620e-01, 2.204e-01, 1.028e-01, -6.621e-02, -2.582e-02, -5.728e-02, 3.437e-02, -1.372e-02, 5.936e-03, -3.954e-01, -4.912e-02, -4.338e-02)); + r += mul(s1_5, M4(5.105e-03, -1.751e-02, 2.083e-02, -5.066e-02, 1.203e-01, -2.110e-02, 2.213e-02, -5.408e-02, 4.408e-02, 1.305e-02, 5.215e-04, -2.305e-02, -1.895e-02, 2.121e-01, 7.568e-03, -1.781e-02)); + r += mul(s1_6, M4(-1.679e-02, -4.701e-02, 3.984e-03, 1.883e-02, 1.751e-02, -8.799e-03, -5.917e-02, -7.541e-02, 3.519e-02, 1.518e-02, -2.381e-02, -4.177e-02, -6.473e-02, -2.278e-02, -1.700e-02, -2.191e-02)); + r += mul(s1_7, M4(2.106e-02, -4.757e-02, -1.853e-02, 2.467e-02, 4.212e-02, 5.024e-02, -9.230e-02, -3.573e-02, -4.607e-03, -2.600e-02, -1.520e-03, 8.444e-02, -9.903e-02, -2.004e-01, 2.285e-01, 1.495e-02)); + r += mul(s1_8, M4(-4.115e-02, -5.467e-02, -4.223e-03, -8.214e-03, -4.322e-03, 5.092e-02, 2.288e-02, -2.948e-03, 6.988e-03, 4.056e-02, 7.262e-03, -2.476e-02, -5.517e-02, 5.640e-02, -1.188e-02, -1.767e-02)); + r += mul(s2_0, M4(-1.913e-02, 2.134e-03, 1.936e-02, -2.429e-02, -5.421e-02, -2.326e-02, -5.509e-02, -3.796e-02, -3.348e-02, 3.749e-02, 1.311e-02, 2.045e-02, 3.166e-03, 1.811e-02, -1.005e-02, 1.696e-02)); + r += mul(s2_1, M4(-9.150e-02, -1.023e-01, 7.368e-03, 2.306e-02, 7.247e-02, -8.859e-02, -2.536e-02, -6.319e-03, -2.311e-02, -1.426e-04, 4.821e-02, -6.262e-02, -1.403e-03, -1.258e-02, 2.554e-02, -1.763e-02)); + r += mul(s2_2, M4(2.814e-02, 3.237e-02, -6.889e-03, 3.033e-02, 3.767e-02, 2.573e-02, -1.026e-01, -4.388e-02, -5.742e-03, -5.933e-02, 2.558e-02, -7.563e-03, -4.052e-02, -4.367e-02, 7.445e-03, -2.313e-02)); + r += mul(s2_3, M4(1.518e-03, 5.084e-02, -2.629e-02, -7.922e-04, -7.820e-02, -4.543e-02, -3.833e-02, -3.059e-02, 2.764e-02, 2.558e-02, -1.724e-02, 1.145e-01, -1.394e-02, -7.350e-03, 2.198e-02, -1.572e-02)); + r += mul(s2_4, M4(-4.070e-02, -2.989e-02, 8.852e-02, -3.323e-02, -1.839e-01, 2.319e-01, 1.441e-01, -6.286e-02, 4.204e-02, 1.371e-01, -7.721e-02, 3.164e-02, -3.207e-02, -1.033e-01, -1.103e-02, 1.647e-02)); + r += mul(s2_5, M4(-3.430e-02, -5.789e-02, 5.859e-02, 5.950e-02, -4.161e-03, -1.968e-01, -5.103e-02, -2.885e-02, -4.215e-02, -4.254e-02, -9.712e-03, -6.761e-03, 1.942e-02, -1.867e-02, -3.283e-02, 1.285e-02)); + r += mul(s2_6, M4(9.657e-03, -2.888e-02, -2.190e-02, 4.982e-02, 1.119e-01, -1.187e-02, 2.497e-03, -2.191e-02, -4.578e-02, -1.121e-02, -3.918e-02, -5.578e-02, 3.260e-02, 1.351e-02, -1.695e-02, -1.870e-02)); + r += mul(s2_7, M4(-6.146e-03, -5.304e-03, -2.515e-02, 6.409e-03, -1.061e-01, -6.602e-02, 1.038e-01, 7.992e-02, -1.125e-02, -1.140e-01, 2.139e-02, 1.562e-02, 3.380e-02, -4.087e-02, -3.406e-02, 2.572e-02)); + r += mul(s2_8, M4(-4.541e-03, -1.534e-02, -1.960e-02, 4.713e-03, 3.191e-02, 3.243e-02, -3.525e-03, 8.842e-02, 2.320e-02, -5.254e-02, 5.576e-03, -6.195e-04, -1.623e-02, 9.936e-03, 2.434e-02, -1.706e-02)); + r += mul(s3_0, M4(-6.865e-02, -2.470e-03, -7.014e-02, 9.169e-02, 8.304e-03, 1.561e-02, -3.308e-02, 6.586e-03, 5.683e-02, -3.107e-02, 1.599e-02, 1.966e-02, 8.197e-03, -8.098e-02, 3.804e-02, 2.126e-03)); + r += mul(s3_1, M4(2.214e-02, 1.383e-01, 1.963e-01, 5.929e-02, 1.310e-02, 3.765e-02, 5.032e-04, 1.223e-02, 5.856e-03, -6.911e-02, -1.580e-02, -4.967e-02, 7.396e-02, 8.123e-02, -2.005e-02, 2.053e-02)); + r += mul(s3_2, M4(2.141e-02, 4.417e-02, -2.593e-02, 1.296e-02, -4.732e-03, 2.229e-02, -1.534e-02, -8.790e-03, -6.795e-03, -3.142e-02, -1.915e-02, -3.137e-03, -8.709e-02, -1.506e-01, 1.318e-01, 3.611e-02)); + r += mul(s3_3, M4(-3.602e-02, 1.065e-02, 6.388e-02, 6.761e-02, -8.897e-02, 1.601e-02, 1.340e-02, -9.509e-03, 1.011e-01, -9.129e-03, 1.162e-02, 7.398e-02, 2.940e-01, -2.547e-01, -4.550e-02, -1.246e-02)); + r += mul(s3_4, M4(3.596e-02, 3.200e-01, -1.405e-01, -2.549e-01, -2.897e-02, 5.599e-02, 1.866e-02, -4.516e-02, 1.024e-02, 2.166e-03, -1.358e-02, 1.027e-01, 2.121e-01, 1.760e-01, -2.349e-01, 5.819e-02)); + r += mul(s3_5, M4(-9.025e-02, -1.524e-01, 2.781e-01, -3.188e-02, 1.139e-02, -1.866e-02, 1.844e-02, 2.610e-02, -4.711e-03, -3.051e-03, -5.856e-02, 2.500e-02, 6.084e-02, -1.612e-02, -6.092e-02, -4.106e-02)); + r += mul(s3_6, M4(-4.519e-02, 6.426e-02, -5.022e-02, -6.991e-02, -6.298e-03, 3.535e-02, 4.076e-02, -1.325e-02, 4.969e-02, 1.608e-02, -5.519e-03, -1.369e-02, -7.312e-02, 6.747e-02, 6.830e-02, 3.065e-03)); + r += mul(s3_7, M4(-4.999e-02, -1.162e-02, 1.204e-01, 2.076e-01, 8.110e-02, 4.946e-02, -5.974e-02, -8.557e-02, 1.209e-02, 2.526e-02, 3.859e-02, -5.364e-02, 1.788e-02, 5.056e-02, 2.686e-02, 7.050e-02)); + r += mul(s3_8, M4(7.296e-02, -7.147e-02, -1.277e-01, 1.150e-01, -4.047e-02, -2.960e-02, 3.931e-02, 1.868e-02, 5.949e-02, 3.640e-02, 8.097e-03, 1.534e-02, 2.215e-02, -2.979e-02, 3.785e-02, -4.045e-02)); + r += mul(s4_0, M4(2.419e-02, -3.334e-02, -6.239e-02, 1.849e-02, -1.288e-01, 3.855e-03, 8.238e-03, -3.097e-03, -2.360e-02, 3.227e-03, -2.656e-02, -7.296e-02, -2.975e-02, 1.910e-02, 3.925e-02, -1.792e-02)); + r += mul(s4_1, M4(-2.377e-02, 1.642e-02, -1.290e-02, 5.035e-02, -8.455e-03, -9.846e-02, 2.072e-02, -1.479e-01, 1.688e-01, 8.426e-02, 7.203e-02, 6.841e-02, -5.795e-02, 1.265e-01, 1.150e-02, 6.977e-02)); + r += mul(s4_2, M4(-1.809e-02, -9.698e-03, 1.761e-02, 1.454e-02, -8.195e-03, -6.953e-02, 4.992e-02, -4.566e-02, -7.422e-02, -1.824e-02, -4.712e-02, -5.545e-03, -2.661e-02, 4.049e-02, 2.054e-02, 3.071e-02)); + r += mul(s4_3, M4(1.207e-02, -4.860e-02, 1.842e-02, 4.550e-02, 1.145e-02, -3.759e-02, -5.589e-02, -1.074e-01, 9.698e-02, 7.776e-02, -1.022e-02, 1.271e-01, -4.734e-02, 2.670e-02, -3.132e-02, -2.664e-02)); + r += mul(s4_4, M4(5.663e-02, -4.446e-02, -6.395e-02, -6.242e-02, -6.249e-02, -1.642e-01, 2.852e-02, 1.742e-01, 1.478e-01, 3.719e-01, -7.803e-02, -3.928e-01, 1.613e-01, 2.810e-01, -6.044e-02, 3.681e-02)); + r += mul(s4_5, M4(1.708e-02, 2.751e-02, 6.601e-03, 3.119e-02, -7.969e-03, 1.282e-01, -3.515e-02, -1.187e-01, 6.335e-02, -1.502e-01, -4.561e-02, -2.970e-02, -1.501e-01, 5.000e-01, -2.194e-01, 4.952e-02)); + r += mul(s4_6, M4(8.606e-02, -5.005e-02, 2.406e-02, 1.606e-01, 4.853e-02, -2.827e-02, 6.015e-02, 9.987e-02, -1.819e-02, 4.406e-02, -9.072e-03, -8.452e-03, 4.792e-04, -1.547e-02, 2.470e-02, 3.105e-03)); + r += mul(s4_7, M4(8.391e-03, 9.081e-02, -9.720e-03, -1.825e-02, 8.768e-02, -1.014e-02, -1.528e-01, -1.106e-01, 7.172e-03, 1.264e-01, -2.313e-02, -3.496e-02, -2.137e-01, -3.030e-02, 9.430e-02, -1.309e-01)); + r += mul(s4_8, M4(-2.614e-02, 4.468e-02, 4.065e-02, -2.869e-02, -1.297e-02, -2.339e-02, -2.540e-03, 1.588e-02, -4.478e-02, 3.770e-02, 3.070e-02, 7.073e-03, -4.713e-02, 8.982e-02, 1.057e-01, -1.787e-01)); + r += mul(s5_0, M4(1.929e-01, -6.049e-02, 1.261e-02, -7.127e-02, -1.489e-01, 1.863e-02, 4.513e-02, -4.118e-03, -5.224e-03, -2.984e-02, -3.773e-02, 6.289e-03, 4.134e-02, 1.385e-02, 8.470e-03, 1.358e-02)); + r += mul(s5_1, M4(1.664e-02, -1.110e-03, -2.606e-01, 1.177e-01, 5.526e-03, 6.164e-02, 1.086e-01, -9.018e-02, -1.285e-01, -8.475e-02, -7.029e-03, -4.614e-02, -1.016e-02, 6.228e-03, -6.354e-03, -1.944e-02)); + r += mul(s5_2, M4(2.205e-02, 1.012e-01, -6.228e-02, -2.201e-02, -3.456e-02, -3.355e-02, 2.056e-02, -2.969e-03, -1.091e-01, -1.215e-01, 5.271e-02, 3.107e-02, -1.952e-02, -3.789e-02, -4.389e-03, 1.075e-02)); + r += mul(s5_3, M4(1.921e-01, 1.543e-01, -1.211e-01, 3.928e-01, 4.351e-02, -2.524e-02, -6.480e-02, 1.052e-01, -2.238e-02, 2.424e-02, -3.844e-02, 9.200e-03, 4.739e-03, -9.607e-03, -1.322e-02, 3.699e-02)); + r += mul(s5_4, M4(-2.783e-01, 7.867e-02, -2.074e-01, 2.666e-01, 5.335e-02, 1.131e-01, -2.982e-01, 2.570e-02, 3.721e-02, 2.780e-02, -1.026e-01, -1.936e-01, -2.599e-04, 4.792e-02, 2.084e-02, -4.745e-02)); + r += mul(s5_5, M4(1.033e-01, 7.341e-02, -2.279e-02, -5.819e-02, -8.561e-02, 8.010e-03, -3.334e-03, -2.201e-02, 2.485e-02, -2.677e-02, -7.876e-02, 4.692e-02, -1.914e-03, -2.119e-02, 2.180e-02, 5.092e-02)); + r += mul(s5_6, M4(7.233e-03, -1.247e-01, -1.209e-02, -8.267e-02, 1.057e-03, -1.752e-02, 7.966e-02, -6.080e-03, 3.715e-02, -1.670e-03, -3.067e-03, 1.106e-01, -2.165e-02, 7.407e-04, -8.410e-03, 1.055e-03)); + r += mul(s5_7, M4(-3.666e-02, 1.788e-02, 1.756e-02, 9.302e-02, 7.233e-03, -7.879e-03, -1.701e-02, 6.506e-03, 6.389e-03, -4.760e-06, -2.765e-02, -1.567e-02, -2.391e-02, -4.882e-02, -3.183e-02, -1.139e-02)); + r += mul(s5_8, M4(1.248e-01, -5.499e-02, -1.877e-01, 3.818e-02, -1.905e-02, 5.370e-03, -2.512e-02, -2.418e-03, -4.736e-02, 2.345e-02, 1.898e-02, -5.234e-02, -7.952e-02, -5.534e-03, 6.866e-02, -1.501e-02)); + r += mul(s6_0, M4(3.349e-02, 3.433e-02, 2.367e-03, 1.707e-02, 2.834e-04, 2.968e-02, -1.011e-02, 3.636e-02, 6.394e-02, -4.141e-02, -7.244e-02, -8.106e-03, 2.991e-02, 8.409e-03, -2.334e-02, 2.244e-02)); + r += mul(s6_1, M4(3.531e-02, 4.867e-02, 5.204e-03, -1.661e-02, 1.252e-02, 3.131e-02, 3.468e-02, -1.069e-02, 2.216e-02, -5.971e-02, -2.087e-01, -1.454e-03, -8.230e-02, 1.256e-02, 3.144e-02, -6.300e-03)); + r += mul(s6_2, M4(1.198e-02, -1.093e-01, 2.009e-03, 5.850e-03, 4.185e-02, -4.072e-04, 1.377e-02, -2.387e-03, -6.416e-02, -3.365e-02, -7.202e-02, -2.289e-02, -9.483e-03, -1.216e-02, -3.992e-02, 1.798e-02)); + r += mul(s6_3, M4(-2.601e-02, 4.478e-02, 3.726e-02, 8.723e-02, -3.169e-02, -5.242e-03, 2.795e-02, -9.956e-03, -2.930e-02, -1.820e-03, -1.177e-02, -1.033e-01, 2.332e-02, -3.233e-02, 7.997e-03, 7.938e-02)); + r += mul(s6_4, M4(1.140e-02, -2.316e-02, -3.004e-02, -2.630e-02, 8.140e-03, -1.012e-01, -2.611e-03, -5.549e-02, 1.747e-01, 8.882e-02, -1.212e-01, 3.937e-02, 5.680e-02, -1.092e-02, -5.874e-02, 1.098e-01)); + r += mul(s6_5, M4(4.590e-02, 3.616e-02, -1.270e-01, -9.277e-03, 2.422e-02, -9.688e-02, -1.732e-02, -3.310e-02, -3.165e-02, 2.455e-02, -8.957e-02, 1.277e-02, -9.996e-03, 2.412e-02, 3.442e-02, -1.002e-02)); + r += mul(s6_6, M4(-1.275e-02, 5.449e-03, -4.332e-03, -1.923e-02, 6.185e-02, 2.942e-02, -2.970e-02, 1.438e-02, -5.858e-02, 1.987e-02, 5.388e-02, 1.758e-02, 1.038e-02, -2.942e-02, -6.372e-02, -5.622e-02)); + r += mul(s6_7, M4(-4.430e-02, 3.630e-02, 8.744e-03, -5.341e-02, 1.083e-01, 3.413e-02, -1.207e-01, 1.866e-02, -1.196e-02, 6.106e-02, 1.101e-01, 1.858e-03, 1.266e-02, -1.900e-02, -2.594e-02, 1.300e-02)); + r += mul(s6_8, M4(-7.216e-02, -3.953e-03, 5.978e-02, 2.634e-02, -2.210e-02, -3.100e-02, -3.031e-02, 9.694e-03, -3.416e-02, 6.459e-03, 2.769e-02, -5.562e-02, -1.805e-02, -3.524e-02, 1.197e-02, -2.079e-03)); + r += mul(s7_0, M4(2.432e-02, -3.764e-02, 2.184e-06, -9.107e-03, -3.065e-02, -1.918e-02, -2.148e-02, -5.258e-02, -3.428e-02, 4.043e-02, -7.553e-02, -2.058e-02, 3.945e-02, 3.997e-02, 4.444e-02, -1.918e-02)); + r += mul(s7_1, M4(1.699e-02, -1.853e-02, -1.586e-02, -3.566e-02, 9.624e-02, 1.364e-01, -3.323e-03, 6.777e-02, 5.119e-02, -5.110e-02, 2.388e-02, -5.701e-03, -1.320e-01, -2.138e-01, 4.052e-02, -1.073e-02)); + r += mul(s7_2, M4(3.618e-02, 7.331e-02, -2.008e-02, -4.520e-02, -3.762e-02, -2.573e-02, -4.067e-02, -6.379e-03, -3.185e-02, -1.700e-02, -2.503e-02, -1.310e-02, 4.268e-02, 8.023e-02, -2.566e-02, 1.200e-02)); + r += mul(s7_3, M4(5.186e-02, -1.451e-01, 1.294e-03, -8.585e-02, 1.538e-01, -8.961e-02, 4.282e-02, 1.006e-01, 1.072e-01, -1.157e-01, -1.704e-02, 1.783e-02, 7.231e-02, 3.331e-03, 2.005e-02, -3.466e-02)); + r += mul(s7_4, M4(8.486e-02, -2.125e-01, 3.064e-02, 3.166e-01, 5.850e-02, 1.479e-01, -9.927e-02, -3.233e-01, 3.735e-01, 6.033e-03, -2.300e-01, -2.835e-01, 1.770e-02, 1.745e-01, -1.580e-01, -6.637e-02)); + r += mul(s7_5, M4(-2.507e-02, 2.838e-03, 6.493e-02, 4.726e-02, -1.672e-01, -2.349e-01, 1.630e-01, 8.437e-02, 6.116e-02, 3.310e-02, -5.197e-02, 2.226e-02, 4.672e-03, -1.372e-01, 8.634e-03, -1.429e-02)); + r += mul(s7_6, M4(-3.649e-02, -1.269e-02, -1.024e-02, 2.659e-02, -1.143e-02, 1.632e-02, -2.783e-02, -1.192e-01, 5.262e-02, 3.661e-03, -4.617e-02, -2.578e-02, 1.772e-02, -1.441e-01, -5.866e-02, -8.424e-02)); + r += mul(s7_7, M4(-8.299e-02, -9.926e-02, 5.071e-02, -3.511e-02, -2.202e-01, -2.311e-01, 2.417e-01, 3.174e-01, 6.560e-03, 3.771e-02, -6.069e-02, 8.530e-02, 3.150e-02, -1.810e-03, 1.121e-02, -3.748e-02)); + r += mul(s7_8, M4(-3.565e-02, 4.143e-02, 1.022e-02, -8.715e-02, 5.984e-02, 1.291e-01, -1.059e-02, -2.838e-01, 3.367e-02, 1.852e-02, 1.254e-02, -5.066e-02, 5.227e-03, -4.538e-03, -1.702e-02, 6.824e-03)); + r += V4(1.181e-03, 8.230e-03, -8.373e-04, -2.226e-04); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.735e-02, 7.321e-03, -9.063e-02, 2.655e-02, 5.782e-03, -1.955e-02, 4.747e-03, -1.940e-02, 4.638e-02, -4.222e-03, 1.020e-01, -2.915e-02, -6.362e-02, -1.949e-02, 8.342e-03, 1.491e-02)); + r += mul(s0_1, M4(-4.903e-02, 9.009e-02, -4.578e-02, 5.669e-02, 5.086e-02, 3.943e-02, -1.008e-01, -1.949e-03, -1.187e-01, 1.094e-01, -1.198e-01, 8.811e-02, -2.927e-02, 1.096e-02, 4.834e-02, 1.965e-02)); + r += mul(s0_2, M4(1.657e-02, 5.933e-03, -2.361e-02, 7.328e-02, 3.851e-02, -8.436e-04, 4.338e-03, 2.002e-04, -3.099e-03, -4.612e-03, 6.618e-02, -5.134e-02, 4.999e-02, -1.884e-02, -9.779e-03, 4.197e-02)); + r += mul(s0_3, M4(-2.789e-02, -1.824e-02, -3.909e-02, 4.040e-02, 1.013e-02, 8.704e-02, 1.274e-02, -3.519e-02, -2.138e-01, 2.234e-02, -2.230e-01, 5.964e-02, 1.265e-01, -1.117e-02, 3.896e-02, -2.179e-02)); + r += mul(s0_4, M4(-3.369e-02, 3.132e-02, -2.603e-02, -1.647e-01, -7.495e-02, -7.839e-02, -3.332e-02, -9.295e-02, -4.065e-02, -1.203e-01, -7.278e-03, -4.638e-01, -6.658e-02, 7.366e-04, 3.104e-02, -1.348e-01)); + r += mul(s0_5, M4(-2.310e-02, 3.044e-02, -2.977e-02, 6.350e-02, 1.043e-02, 4.835e-03, -1.005e-03, 1.369e-02, 2.835e-02, -4.378e-02, -1.284e-02, 1.836e-01, -1.514e-01, 2.900e-04, -4.419e-03, 3.467e-02)); + r += mul(s0_6, M4(-1.773e-02, 8.370e-02, -4.369e-02, 4.831e-03, 6.303e-03, -2.615e-02, -4.635e-02, 1.748e-02, 1.000e-01, 5.347e-02, -2.080e-02, 8.705e-02, -4.790e-02, -2.085e-02, 2.236e-02, -7.095e-02)); + r += mul(s0_7, M4(8.422e-03, 4.460e-02, -4.504e-02, -5.108e-02, 1.772e-02, 2.164e-02, -4.921e-02, 9.575e-02, 2.302e-01, 6.677e-02, 9.069e-02, -2.652e-02, -6.605e-03, 2.440e-02, -6.203e-02, 7.330e-03)); + r += mul(s0_8, M4(5.511e-02, 1.542e-02, -5.059e-03, 3.171e-02, -6.442e-02, -5.679e-03, 1.145e-02, 2.220e-05, 1.217e-01, -7.622e-03, 7.413e-03, 7.513e-02, 5.875e-02, -7.754e-03, -1.073e-02, 6.121e-02)); + r += mul(s1_0, M4(2.168e-02, -4.579e-02, 4.918e-02, -1.727e-02, 4.170e-02, -1.246e-01, 5.060e-02, 8.042e-03, 2.092e-02, -7.061e-03, 2.682e-02, -4.390e-03, -5.984e-03, 2.385e-02, 3.160e-02, -2.543e-02)); + r += mul(s1_1, M4(-7.658e-02, 5.386e-02, -7.724e-03, -5.315e-02, 6.599e-02, 1.275e-01, -2.768e-02, 6.249e-02, 3.247e-02, -3.022e-03, 1.086e-02, -3.709e-03, 3.864e-02, -4.664e-02, 6.696e-02, 8.100e-03)); + r += mul(s1_2, M4(-6.044e-02, -1.413e-02, -5.048e-03, -3.623e-02, 3.267e-02, -2.345e-03, -2.573e-02, 2.255e-02, 4.310e-02, 2.143e-03, 1.195e-02, -1.254e-03, 3.588e-02, 5.696e-04, 3.540e-02, -5.437e-02)); + r += mul(s1_3, M4(5.584e-02, 2.195e-02, 1.284e-01, -2.934e-02, -1.995e-02, 1.069e-01, 1.716e-02, 3.259e-02, 2.022e-02, 6.430e-02, 1.950e-02, -1.204e-03, -8.247e-03, -1.034e-01, 4.017e-02, -1.014e-01)); + r += mul(s1_4, M4(7.738e-02, -7.787e-02, -8.559e-02, -3.832e-01, -1.601e-01, 1.303e-02, 1.333e-01, -5.981e-01, 4.252e-02, -3.160e-02, 3.228e-02, -3.681e-03, -1.285e-01, 2.940e-01, 2.559e-01, 1.676e-02)); + r += mul(s1_5, M4(-1.728e-02, -9.817e-03, 2.978e-02, -1.828e-01, 1.827e-02, -1.183e-02, -3.527e-02, -8.733e-03, 5.358e-02, 2.633e-02, 1.640e-02, -2.296e-02, 2.498e-02, -5.456e-03, 4.117e-02, 1.054e-01)); + r += mul(s1_6, M4(1.625e-02, 3.119e-02, 2.967e-02, -4.471e-02, 3.823e-02, -6.555e-02, -2.538e-02, 8.332e-02, 2.353e-02, -3.428e-03, 2.937e-02, 1.472e-02, 1.979e-01, 1.279e-01, 2.702e-02, 4.688e-02)); + r += mul(s1_7, M4(-2.596e-02, 1.838e-02, -1.123e-02, -1.558e-01, 7.660e-02, -4.095e-02, -2.568e-03, 1.184e-01, 5.229e-02, 2.339e-02, -1.010e-02, 1.139e-02, -6.036e-02, -1.079e-01, -7.627e-02, -1.255e-01)); + r += mul(s1_8, M4(2.202e-02, -6.740e-03, 4.657e-02, -1.208e-01, 1.782e-03, 1.630e-02, 1.916e-02, -2.809e-02, 1.417e-02, -7.265e-04, -5.384e-04, 5.627e-03, -1.229e-03, 1.043e-02, -2.705e-02, 1.526e-01)); + r += mul(s2_0, M4(2.447e-02, 1.729e-02, -2.765e-02, 5.449e-02, -4.221e-02, -8.931e-02, 3.146e-02, -2.128e-02, -2.070e-02, 1.432e-03, -3.099e-02, -1.587e-02, 6.266e-03, -3.164e-02, -3.437e-02, 2.344e-02)); + r += mul(s2_1, M4(-9.059e-02, 5.509e-02, 3.072e-02, 1.440e-02, 9.820e-03, -1.885e-02, -5.084e-02, -7.885e-02, -2.189e-02, -1.145e-01, 3.476e-02, -5.418e-02, -1.010e-01, -1.096e-03, -3.446e-03, -1.668e-02)); + r += mul(s2_2, M4(-9.562e-02, 5.623e-03, -1.658e-02, 3.662e-02, -2.377e-02, -3.407e-02, -3.480e-02, 7.729e-02, -4.953e-02, 1.609e-02, 1.767e-02, -1.126e-01, -2.488e-02, -3.236e-02, -3.311e-03, -1.805e-02)); + r += mul(s2_3, M4(1.759e-02, -2.190e-02, -5.458e-02, 2.657e-02, 3.511e-02, -2.803e-01, 4.074e-02, -8.826e-02, 3.244e-02, 8.148e-02, 4.088e-02, 4.509e-02, -9.224e-02, -2.484e-04, -4.308e-02, -1.777e-03)); + r += mul(s2_4, M4(9.104e-04, 3.797e-02, -5.060e-02, 2.746e-03, 2.471e-01, 1.048e-01, 1.037e-01, 3.233e-01, 1.347e-01, 1.900e-03, 1.886e-02, -3.936e-01, -3.162e-02, 7.359e-02, 9.111e-02, -3.810e-02)); + r += mul(s2_5, M4(1.120e-02, 3.946e-02, 3.958e-03, 3.405e-02, -3.474e-02, -3.862e-02, -7.786e-03, 1.777e-01, 1.561e-02, 5.411e-03, 5.232e-02, -2.836e-01, 1.983e-02, 3.910e-03, 4.503e-02, -2.982e-02)); + r += mul(s2_6, M4(2.452e-02, 3.821e-02, -3.523e-02, 1.539e-02, -4.772e-02, 1.718e-02, -3.666e-02, -6.527e-02, 1.994e-02, -6.181e-03, 5.485e-02, -4.438e-03, 4.088e-02, 3.997e-02, 1.727e-02, 5.171e-02)); + r += mul(s2_7, M4(6.000e-02, 3.510e-02, 3.557e-02, -8.980e-02, -5.613e-02, -4.479e-02, -1.678e-01, 5.538e-02, 5.004e-02, 1.512e-02, 2.275e-02, -1.108e-01, 1.488e-01, 2.930e-03, -1.023e-02, 3.367e-02)); + r += mul(s2_8, M4(6.663e-03, -1.386e-03, -1.365e-02, -2.420e-02, -5.310e-02, 2.358e-02, -1.795e-02, -2.243e-02, 1.510e-02, 4.362e-03, 8.571e-05, -9.362e-02, 5.626e-03, -1.368e-02, -1.471e-03, 3.142e-02)); + r += mul(s3_0, M4(-3.950e-02, -4.733e-02, 9.475e-02, -4.892e-02, 9.380e-02, -1.509e-02, 1.450e-02, -2.095e-02, -3.504e-02, 2.453e-02, -5.969e-02, 2.178e-03, 6.615e-02, 7.004e-02, 8.001e-02, -6.070e-02)); + r += mul(s3_1, M4(-1.676e-02, -1.485e-02, 2.666e-01, 8.033e-02, -4.579e-03, -3.812e-02, 3.769e-03, 3.362e-03, -9.428e-02, -3.817e-02, -3.272e-02, 1.008e-01, -3.819e-02, 7.791e-02, -7.914e-02, 2.262e-02)); + r += mul(s3_2, M4(-2.685e-02, 2.254e-02, 3.175e-02, -7.497e-02, -5.441e-03, 3.000e-03, -3.112e-03, 2.195e-02, 5.569e-03, 1.282e-02, 2.887e-02, 1.238e-02, -1.596e-01, 1.621e-02, 1.791e-02, -1.396e-01)); + r += mul(s3_3, M4(9.641e-02, -1.211e-02, 1.318e-01, -2.892e-02, 9.309e-02, -1.012e-01, 5.106e-02, -2.608e-02, 3.708e-02, 8.899e-03, -7.867e-03, 6.110e-02, -2.357e-01, 2.636e-01, -1.759e-01, 3.462e-02)); + r += mul(s3_4, M4(1.184e-01, -6.208e-02, -7.662e-02, -1.109e-01, 1.660e-01, 2.238e-02, 2.532e-02, -2.775e-02, 6.642e-02, 1.673e-01, -1.187e-02, -1.796e-01, 3.810e-02, -1.789e-01, 3.771e-02, 5.403e-01)); + r += mul(s3_5, M4(-1.026e-03, 3.177e-02, -7.086e-02, 1.776e-01, 2.638e-02, 3.480e-02, 1.754e-02, 5.252e-02, 5.514e-03, 6.224e-03, 4.988e-04, 1.556e-03, 7.209e-02, 2.107e-02, 3.573e-02, -2.720e-01)); + r += mul(s3_6, M4(-7.826e-02, -2.392e-02, 7.209e-02, -7.598e-02, -1.960e-02, -9.385e-03, 3.182e-02, -2.384e-02, -3.284e-03, 2.330e-02, 2.146e-02, 3.374e-02, -3.855e-02, -2.250e-02, -2.083e-02, 2.701e-02)); + r += mul(s3_7, M4(-1.451e-01, 4.434e-02, -8.629e-02, 1.044e-02, 1.889e-01, 2.243e-02, 3.330e-03, -2.253e-02, 7.287e-03, -1.563e-02, 2.876e-03, -1.321e-02, -3.684e-02, 8.823e-02, 2.742e-02, 4.708e-02)); + r += mul(s3_8, M4(-6.824e-02, 2.854e-02, -6.687e-03, -9.146e-02, 6.651e-02, -1.166e-02, 1.677e-02, -5.796e-02, 7.355e-03, 8.139e-03, -2.706e-02, 5.048e-02, 3.256e-02, -7.823e-03, 3.660e-03, -1.004e-01)); + r += mul(s4_0, M4(-9.431e-03, 5.889e-02, -2.126e-02, -1.830e-02, 2.106e-02, -9.595e-02, 7.357e-02, -6.164e-02, -3.097e-03, -7.961e-02, 1.315e-01, 2.196e-02, 1.916e-02, 5.432e-02, 9.130e-03, 1.871e-02)); + r += mul(s4_1, M4(1.210e-01, 3.681e-02, 2.575e-02, 5.345e-02, -6.667e-02, 2.085e-02, -5.452e-02, -1.316e-01, 8.277e-02, 1.974e-02, -4.569e-03, 6.749e-02, 4.601e-02, -6.241e-03, 4.080e-01, 9.546e-02)); + r += mul(s4_2, M4(6.069e-02, 1.337e-02, 3.668e-03, 4.510e-02, 3.752e-02, -6.577e-02, 6.636e-02, -6.562e-02, -2.238e-02, 2.513e-03, -3.745e-02, 1.332e-01, -1.584e-01, -1.608e-02, 1.324e-01, -1.189e-01)); + r += mul(s4_3, M4(-1.071e-01, 2.279e-02, -3.308e-02, -1.314e-03, 4.047e-03, 5.535e-02, 5.889e-02, 1.296e-02, 9.058e-02, 4.993e-02, 7.142e-02, 3.842e-02, -3.450e-02, -1.056e-01, 3.612e-02, 1.292e-02)); + r += mul(s4_4, M4(1.172e-02, -9.352e-03, 5.152e-02, -5.238e-03, -3.401e-02, 9.417e-03, -2.763e-02, 5.862e-02, 2.160e-01, -1.091e-01, 2.705e-02, -1.936e-01, -5.915e-02, 1.654e-01, 5.814e-01, -7.901e-02)); + r += mul(s4_5, M4(1.396e-01, 3.896e-02, 1.335e-04, -3.864e-02, -5.279e-02, -4.169e-02, -2.619e-02, 7.096e-02, 9.102e-02, -3.263e-02, 7.145e-03, -5.895e-02, 5.079e-02, 2.301e-01, 2.891e-01, -8.593e-02)); + r += mul(s4_6, M4(1.144e-02, 2.328e-02, -7.695e-02, -1.343e-03, 5.186e-02, 6.368e-02, -2.607e-02, -2.692e-02, -5.042e-02, -4.400e-02, 1.485e-02, 2.958e-03, 4.236e-02, 5.249e-03, -1.026e-01, 2.812e-02)); + r += mul(s4_7, M4(5.092e-02, -3.809e-03, 4.445e-02, 2.612e-02, 1.142e-01, -7.595e-02, 5.381e-02, 1.712e-02, -1.065e-01, -7.024e-03, 2.167e-02, -9.190e-02, 6.058e-02, -9.443e-02, 1.459e-01, -4.769e-02)); + r += mul(s4_8, M4(3.684e-04, -1.860e-02, -1.058e-02, 1.948e-02, 2.465e-03, -5.119e-03, 2.769e-02, 1.458e-02, -2.639e-02, 8.080e-03, -1.704e-02, -2.213e-02, 5.268e-02, -5.988e-02, 3.712e-02, 1.254e-01)); + r += mul(s5_0, M4(-2.496e-02, 8.809e-02, -1.426e-01, 6.392e-02, 3.160e-02, -6.681e-02, 8.480e-02, -7.819e-02, -2.165e-02, 2.542e-02, -1.916e-02, 2.132e-02, -3.296e-02, 1.161e-02, -1.487e-02, 2.013e-02)); + r += mul(s5_1, M4(2.416e-01, 1.343e-02, 4.903e-02, 6.341e-02, -9.003e-02, -7.302e-02, -1.000e-01, -5.732e-03, -2.697e-02, 5.232e-02, -8.220e-02, -3.580e-02, 9.402e-03, -1.418e-03, -1.429e-02, 7.107e-03)); + r += mul(s5_2, M4(1.023e-01, 2.851e-02, -1.219e-02, 3.114e-02, 9.954e-03, -2.428e-02, 4.689e-02, 8.756e-03, -1.618e-02, 1.071e-02, -1.172e-02, -8.114e-03, -3.601e-02, 3.402e-02, -7.213e-03, -1.546e-02)); + r += mul(s5_3, M4(2.819e-02, 3.272e-01, -3.521e-02, -1.169e-01, 1.235e-01, 1.380e-01, 1.391e-01, -1.713e-02, -4.166e-02, -1.108e-01, -5.924e-02, 6.759e-02, -4.242e-02, 1.073e-02, -3.557e-02, -3.206e-02)); + r += mul(s5_4, M4(-1.297e-03, -7.965e-02, 2.005e-01, 3.798e-01, 1.215e-01, -3.609e-02, -8.034e-02, 6.808e-02, 4.566e-02, -1.028e-02, 1.626e-02, 2.790e-02, -6.773e-02, -5.215e-02, -1.234e-02, 1.084e-02)); + r += mul(s5_5, M4(1.163e-01, 6.928e-02, -7.286e-02, -7.548e-02, -7.859e-02, -3.504e-02, 1.962e-02, -1.861e-02, 6.673e-02, 5.095e-03, 4.188e-02, 3.247e-02, 5.449e-02, -3.710e-02, -4.225e-03, -6.734e-03)); + r += mul(s5_6, M4(-6.793e-02, -8.408e-02, -1.155e-01, 6.691e-02, -6.385e-03, -1.279e-02, 7.386e-02, -2.486e-02, 2.807e-02, 7.187e-02, -6.588e-02, 2.863e-02, 3.158e-02, 4.317e-03, -1.664e-02, -9.957e-03)); + r += mul(s5_7, M4(4.247e-02, 7.270e-02, 1.199e-02, -2.256e-01, 3.120e-02, 3.151e-03, -1.541e-02, -9.396e-02, 2.435e-02, 9.246e-03, 8.851e-04, -8.081e-02, 3.958e-02, 1.086e-02, -9.265e-04, -3.038e-02)); + r += mul(s5_8, M4(4.742e-02, 5.148e-02, -3.171e-02, -1.373e-01, 5.415e-02, -9.496e-03, 1.975e-02, 3.439e-03, -5.139e-02, -2.697e-02, 1.210e-02, -1.295e-02, -3.452e-02, -2.794e-03, 1.383e-02, 1.658e-02)); + r += mul(s6_0, M4(-2.507e-02, -7.946e-04, 1.110e-02, -4.932e-04, 2.526e-02, 9.996e-03, 2.226e-02, -1.076e-02, -6.662e-03, -3.381e-02, -2.353e-02, 5.043e-03, 3.321e-03, 2.826e-02, 4.069e-02, 5.503e-03)); + r += mul(s6_1, M4(3.050e-02, -2.427e-03, -1.249e-01, -4.679e-03, 3.654e-03, -4.095e-02, -3.797e-03, 5.638e-03, 4.113e-03, 3.107e-02, -3.934e-02, -6.310e-02, -3.349e-02, 7.328e-02, 3.905e-02, 5.081e-02)); + r += mul(s6_2, M4(2.691e-02, -3.391e-02, -1.199e-02, 4.570e-03, -6.239e-02, 2.360e-03, -3.945e-02, -6.586e-03, -2.278e-02, 2.032e-04, 7.616e-03, 2.224e-02, 1.718e-03, 1.473e-02, 1.707e-02, -1.987e-02)); + r += mul(s6_3, M4(-4.025e-03, 2.814e-02, 6.370e-02, -3.235e-02, -1.529e-02, 3.723e-02, -1.565e-03, 1.399e-02, 7.093e-02, -7.519e-02, 4.052e-02, 1.021e-02, 4.772e-02, 3.930e-02, -1.189e-01, -1.362e-02)); + r += mul(s6_4, M4(-3.540e-02, 8.062e-02, -1.039e-01, 1.217e-02, 3.187e-03, 1.265e-02, -4.382e-02, -2.559e-02, 1.570e-01, -7.089e-02, 8.914e-03, 7.153e-02, 1.327e-01, -9.643e-02, -5.999e-02, 7.664e-02)); + r += mul(s6_5, M4(-2.217e-02, 1.164e-02, -1.264e-02, 4.551e-02, -6.797e-02, 1.238e-03, 3.343e-02, 9.795e-03, 1.598e-01, 3.646e-02, 4.413e-02, -1.461e-02, 2.523e-03, -5.439e-04, -4.791e-02, 1.033e-03)); + r += mul(s6_6, M4(1.176e-02, 1.414e-02, -3.136e-03, -2.034e-03, 2.634e-02, 4.214e-02, 2.754e-02, 3.087e-02, -3.876e-02, -4.940e-02, -3.528e-02, -6.377e-03, -1.154e-02, -3.120e-02, 8.880e-02, 2.014e-02)); + r += mul(s6_7, M4(4.944e-02, 3.875e-03, 2.216e-02, -5.348e-02, 4.101e-02, 7.348e-02, -7.037e-02, 5.307e-02, -1.714e-01, 1.506e-02, -4.873e-02, -5.804e-02, -2.607e-02, 1.517e-02, 5.446e-02, -2.876e-02)); + r += mul(s6_8, M4(-7.465e-02, 7.315e-04, 9.037e-03, 1.764e-02, 3.369e-02, 5.548e-04, 5.357e-02, 2.939e-02, 4.354e-02, -1.601e-02, -3.103e-03, -7.247e-02, 5.881e-02, -7.223e-03, 2.963e-02, -1.031e-01)); + r += mul(s7_0, M4(-5.706e-02, -5.552e-03, -1.280e-01, -2.101e-02, -1.691e-02, -3.833e-02, 4.682e-02, -3.053e-03, -2.772e-02, -1.184e-01, 5.773e-02, -3.441e-02, 4.748e-02, -2.675e-02, -1.698e-01, 5.623e-02)); + r += mul(s7_1, M4(-1.115e-01, -5.093e-02, -2.015e-01, -1.037e-01, 7.328e-02, 4.829e-02, 6.089e-02, 5.624e-02, -1.812e-01, -6.170e-03, -6.281e-02, -6.716e-02, -8.051e-02, 3.991e-02, 7.369e-02, -7.423e-02)); + r += mul(s7_2, M4(8.941e-02, -1.362e-01, 4.570e-02, 5.541e-02, -1.336e-01, 3.946e-02, -1.285e-01, -6.767e-02, -3.703e-02, 2.243e-02, 1.189e-02, 2.213e-02, 7.284e-03, 1.078e-02, -3.074e-02, 6.708e-02)); + r += mul(s7_3, M4(-9.038e-02, -5.443e-02, -2.140e-01, 2.917e-02, -8.495e-02, 1.500e-01, -1.034e-01, -6.819e-03, 1.175e-01, -2.877e-02, 3.825e-02, 1.543e-02, -6.762e-02, 6.191e-02, -3.292e-02, 1.214e-01)); + r += mul(s7_4, M4(1.351e-01, 1.929e-01, -4.921e-01, 5.014e-02, -2.351e-01, -3.016e-01, -1.121e-01, 3.097e-01, -1.658e-01, -1.272e-01, 7.207e-02, -3.743e-01, 3.631e-01, -1.412e-01, -1.938e-01, -8.993e-02)); + r += mul(s7_5, M4(-4.301e-02, 7.487e-02, 2.500e-02, 1.069e-01, -1.016e-01, 4.391e-02, 1.315e-01, 5.845e-02, 4.127e-02, 1.446e-02, -1.786e-02, -8.449e-02, -4.528e-02, -1.804e-02, -2.909e-02, -8.959e-02)); + r += mul(s7_6, M4(3.403e-02, 5.445e-02, -8.672e-03, -5.562e-02, 1.259e-01, 7.118e-02, -1.102e-03, 6.377e-03, -6.440e-03, -3.774e-02, -1.121e-02, -4.780e-02, 2.907e-03, -6.161e-02, 4.809e-02, 6.793e-02)); + r += mul(s7_7, M4(3.418e-02, -1.906e-02, -4.124e-02, -6.526e-02, -1.621e-01, -3.490e-02, -4.261e-02, -1.441e-01, -1.343e-02, 3.143e-02, 8.302e-03, 1.160e-01, -4.174e-02, 7.590e-03, 1.030e-01, -2.129e-01)); + r += mul(s7_8, M4(6.975e-03, 1.297e-02, 3.317e-02, -8.614e-02, -1.193e-01, 6.797e-02, -2.014e-02, -6.614e-02, 9.511e-02, 1.817e-02, 6.716e-03, 2.764e-02, -4.762e-03, -4.361e-03, 1.822e-02, -7.534e-02)); + r += V4(-7.831e-03, -2.140e-04, -1.201e-04, 7.332e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-6.422e-02, 1.680e-02, -4.666e-02, -4.495e-02, -2.339e-03, 8.326e-03, -1.462e-02, -4.952e-02, 5.134e-02, -1.934e-02, 4.511e-02, 1.334e-01, -4.618e-03, -8.103e-03, 3.456e-02, 1.580e-02)); + r += mul(s0_1, M4(-1.842e-03, -3.105e-02, -2.950e-03, 1.885e-02, -3.407e-02, -3.604e-03, -2.389e-03, -5.071e-02, -8.016e-02, -3.803e-02, -8.330e-02, 1.628e-01, 1.962e-02, 3.725e-02, 2.377e-02, 3.499e-02)); + r += mul(s0_2, M4(-7.064e-03, 6.164e-03, -4.705e-02, -8.524e-03, -5.537e-03, 3.894e-02, -2.029e-02, -2.486e-02, -9.182e-03, 1.519e-01, -1.817e-02, 6.079e-02, -9.486e-03, -2.110e-02, 3.287e-02, -2.019e-03)); + r += mul(s0_3, M4(-1.171e-01, -5.278e-02, 7.026e-02, 4.255e-02, 1.090e-02, -5.941e-02, -3.697e-02, -3.014e-02, -1.685e-01, 2.513e-02, -4.012e-02, 8.469e-03, 1.878e-02, -1.719e-02, -8.298e-03, 1.018e-01)); + r += mul(s0_4, M4(-6.699e-02, 5.956e-04, -9.047e-02, -1.346e-01, 2.411e-02, -4.314e-02, 3.185e-02, -9.635e-02, 2.837e-01, -3.088e-02, -2.452e-02, -1.238e-01, -9.648e-02, 4.971e-02, -4.276e-02, -8.128e-02)); + r += mul(s0_5, M4(3.128e-02, -6.913e-02, -1.041e-02, -7.746e-02, -4.009e-02, -8.374e-02, 4.016e-02, 8.426e-03, 4.522e-03, 1.049e-01, 9.970e-02, -3.086e-01, -6.120e-03, -1.388e-01, -1.063e-01, -5.434e-02)); + r += mul(s0_6, M4(-3.365e-02, -3.072e-02, -9.321e-02, -1.832e-02, -5.298e-03, 7.121e-04, 2.169e-03, 4.990e-02, -8.102e-02, 4.921e-02, -5.902e-02, -2.201e-02, 1.276e-02, 3.207e-02, 3.180e-02, -1.999e-02)); + r += mul(s0_7, M4(-9.262e-02, -1.065e-01, -6.215e-02, -9.939e-02, 7.738e-02, -1.434e-02, 9.368e-03, 8.076e-02, 8.899e-03, -1.052e-01, -1.082e-01, 4.778e-02, -1.707e-01, -5.856e-02, -6.962e-02, -1.961e-02)); + r += mul(s0_8, M4(3.140e-02, 1.377e-02, 1.593e-04, -2.125e-02, 9.562e-03, 3.623e-02, -4.402e-02, -2.442e-02, -8.968e-02, 1.988e-01, 1.410e-02, 5.882e-02, 7.932e-02, 9.126e-02, 6.180e-02, -4.356e-02)); + r += mul(s1_0, M4(-1.858e-02, -9.682e-04, 1.851e-02, -2.569e-02, -1.704e-02, -5.016e-03, 4.800e-02, -9.740e-02, -5.270e-03, -6.199e-03, -2.214e-02, -2.733e-02, 3.645e-03, 5.273e-03, -2.155e-02, 1.597e-02)); + r += mul(s1_1, M4(2.906e-02, 6.890e-03, -2.221e-02, 5.509e-02, -1.067e-01, 3.076e-01, -1.190e-01, -3.612e-04, -7.062e-03, -2.028e-02, 5.407e-03, 3.502e-02, 4.203e-02, -5.999e-03, 6.040e-02, -9.000e-03)); + r += mul(s1_2, M4(-1.598e-02, -2.484e-02, -1.963e-02, 9.807e-03, 2.597e-02, -8.287e-02, 1.311e-02, -6.579e-02, 1.673e-02, 2.149e-02, 5.862e-03, -2.455e-03, -7.638e-04, 9.753e-03, -1.168e-02, 3.686e-03)); + r += mul(s1_3, M4(-2.996e-02, 1.216e-02, 2.214e-02, 1.757e-02, -1.134e-02, 5.456e-02, -3.628e-02, 4.101e-02, -1.514e-02, -2.445e-02, 6.486e-03, -4.812e-02, 4.947e-02, 1.221e-02, 1.329e-02, 4.396e-02)); + r += mul(s1_4, M4(1.473e-02, 1.606e-01, -5.135e-02, -3.255e-02, -1.753e-01, -5.682e-01, 1.286e-01, -4.066e-02, 8.350e-02, 4.364e-02, 9.298e-03, 1.028e-01, -3.886e-02, 3.602e-02, -2.097e-01, 2.511e-03)); + r += mul(s1_5, M4(3.637e-02, -9.506e-03, 6.912e-02, -1.290e-02, -2.436e-03, 1.188e-01, 5.628e-02, 1.431e-02, -2.094e-02, -1.958e-02, -3.403e-02, 4.689e-02, -7.153e-03, -3.282e-02, -3.716e-02, 1.626e-01)); + r += mul(s1_6, M4(-2.461e-02, 5.353e-02, -3.433e-02, 3.605e-02, -6.121e-02, -3.178e-03, 5.595e-02, 1.794e-02, -6.023e-02, -3.588e-03, -3.785e-02, 3.699e-02, 1.142e-01, 2.821e-02, -5.308e-02, 6.403e-02)); + r += mul(s1_7, M4(5.947e-02, -7.719e-02, 4.145e-02, -7.409e-03, -1.055e-01, 1.610e-01, -4.151e-02, 1.995e-02, 6.130e-02, -2.186e-02, -1.589e-02, 1.102e-01, 1.114e-01, -4.216e-02, 1.647e-03, -1.100e-01)); + r += mul(s1_8, M4(7.085e-03, 2.188e-02, -6.992e-03, -2.332e-03, 1.478e-02, -6.092e-02, -4.126e-02, 3.169e-04, -1.958e-02, -4.985e-03, -1.923e-03, -3.218e-02, 1.927e-02, -8.432e-03, -4.363e-02, 1.668e-03)); + r += mul(s2_0, M4(-6.546e-03, 1.343e-02, 2.296e-02, 5.659e-02, 2.752e-02, -4.231e-02, 6.074e-02, -5.683e-03, -3.253e-02, -5.294e-02, 1.101e-02, 9.926e-03, -1.273e-02, 3.820e-04, 4.827e-02, -4.940e-04)); + r += mul(s2_1, M4(4.736e-02, 2.476e-02, -9.261e-03, 7.812e-02, -1.871e-02, 1.027e-02, -4.256e-02, 1.155e-02, 2.293e-02, 2.758e-02, -1.940e-02, -1.913e-02, 1.163e-02, -1.837e-02, -1.667e-02, 1.094e-02)); + r += mul(s2_2, M4(-1.668e-02, -5.029e-02, -1.440e-02, 4.319e-02, 1.579e-02, -1.302e-02, 5.899e-02, 2.061e-02, -6.245e-03, -1.449e-02, 3.008e-02, -1.664e-02, 2.230e-02, 9.839e-03, 5.975e-02, 1.928e-02)); + r += mul(s2_3, M4(-1.839e-02, -2.045e-02, -4.858e-02, 3.903e-02, 7.104e-03, 1.747e-02, -5.856e-02, -1.094e-01, 1.239e-02, 2.181e-02, -1.091e-01, -8.099e-02, -4.212e-02, 3.092e-03, 4.736e-02, -7.150e-02)); + r += mul(s2_4, M4(-7.680e-03, 2.967e-02, -2.112e-02, -1.317e-02, -2.971e-01, -2.871e-01, 3.943e-01, -2.758e-01, 8.246e-02, 1.079e-01, 4.440e-03, 1.274e-01, 1.480e-02, 1.701e-03, -8.268e-02, 1.860e-02)); + r += mul(s2_5, M4(1.855e-02, 4.260e-02, -4.680e-02, -1.964e-02, 4.660e-02, 2.995e-01, -8.861e-02, 3.235e-02, 2.948e-02, 2.388e-03, 3.504e-03, -2.162e-03, 1.332e-02, 1.133e-02, 3.025e-02, 1.443e-02)); + r += mul(s2_6, M4(7.442e-03, 8.911e-03, -6.702e-03, -4.585e-02, 1.291e-01, -4.421e-02, 1.339e-01, 3.993e-02, -1.566e-02, 2.829e-02, 3.262e-02, 1.285e-02, -7.299e-03, 2.724e-02, -3.766e-02, 1.047e-02)); + r += mul(s2_7, M4(-3.381e-02, -1.174e-02, -4.296e-02, 1.705e-03, -4.339e-03, 2.267e-01, 7.326e-02, -2.796e-03, 6.150e-02, 3.889e-02, -6.071e-04, 9.139e-02, -3.776e-02, 2.394e-02, -5.208e-02, 5.949e-02)); + r += mul(s2_8, M4(-3.450e-02, -3.383e-02, 7.836e-03, 1.100e-02, 1.184e-02, -1.977e-01, -2.924e-02, -4.890e-02, -3.583e-03, -4.512e-02, -1.497e-02, 4.827e-02, 2.246e-03, 2.053e-02, 2.313e-02, -4.874e-02)); + r += mul(s3_0, M4(5.694e-02, -5.528e-02, 5.212e-02, 2.986e-02, 1.836e-02, 3.805e-02, -2.986e-02, 5.002e-02, -2.912e-02, -2.534e-02, -3.611e-02, -2.406e-02, 6.479e-03, -5.702e-03, -1.243e-01, 3.241e-02)); + r += mul(s3_1, M4(2.800e-02, -1.076e-01, 4.683e-02, 2.143e-02, 1.158e-02, -3.525e-02, 1.007e-01, -1.794e-03, -9.832e-03, -2.611e-02, 5.176e-03, -5.500e-02, -3.821e-02, 1.060e-02, 4.090e-02, -4.569e-02)); + r += mul(s3_2, M4(1.089e-02, -1.146e-01, -1.153e-02, -5.991e-02, 1.177e-02, 3.417e-03, -2.044e-02, -1.345e-03, 1.305e-03, 1.239e-02, 1.726e-02, -3.599e-02, 1.674e-02, -5.590e-02, -8.140e-02, -4.265e-02)); + r += mul(s3_3, M4(2.527e-02, 8.669e-02, 4.049e-02, -9.185e-03, -2.413e-02, -1.824e-03, 1.046e-01, 2.065e-02, -3.119e-02, -7.965e-02, -4.623e-02, 2.911e-02, 7.088e-02, -1.067e-01, 1.576e-01, 3.075e-02)); + r += mul(s3_4, M4(-1.465e-01, 1.556e-01, 6.490e-02, 1.404e-01, -1.715e-02, 1.477e-01, 1.859e-02, 1.233e-01, 2.244e-02, 8.846e-02, 1.742e-02, 6.853e-02, 8.496e-02, 2.641e-01, -2.901e-01, 1.538e-01)); + r += mul(s3_5, M4(5.795e-02, -5.553e-02, -1.528e-01, 2.953e-02, 4.175e-03, 3.038e-02, -7.615e-02, -2.932e-02, -3.998e-03, 3.270e-04, -5.368e-02, 4.379e-03, -3.845e-02, -8.842e-02, 1.667e-01, -1.082e-01)); + r += mul(s3_6, M4(-4.638e-02, 7.122e-02, 4.453e-03, -1.115e-01, 4.361e-02, 8.474e-03, 1.944e-02, 2.350e-02, -1.112e-02, -5.496e-02, 6.062e-03, -4.375e-03, -4.352e-02, 7.870e-02, -1.025e-01, -1.199e-01)); + r += mul(s3_7, M4(-9.071e-02, -5.780e-02, 3.592e-02, 3.266e-02, -7.675e-02, 7.431e-02, -9.768e-02, 2.174e-01, -9.449e-02, 2.945e-02, -7.159e-02, -6.166e-02, 9.235e-02, -1.781e-01, -5.495e-02, 7.006e-02)); + r += mul(s3_8, M4(-1.563e-02, 6.241e-02, 3.845e-02, -4.126e-02, 5.171e-02, -7.622e-03, -1.806e-03, 1.221e-01, 1.445e-02, -3.859e-02, -2.371e-02, 2.360e-02, -3.919e-02, 5.525e-02, 7.753e-04, -4.543e-02)); + r += mul(s4_0, M4(-7.578e-03, 2.589e-02, -3.446e-02, 9.255e-04, 1.508e-02, -4.230e-02, -3.854e-02, -8.312e-02, 5.879e-02, 2.448e-02, 3.286e-02, -8.081e-02, 2.705e-02, 5.509e-02, 1.780e-03, 9.692e-03)); + r += mul(s4_1, M4(7.212e-04, -9.108e-03, 8.121e-03, -4.260e-02, -3.512e-02, 5.898e-02, 9.525e-03, 4.439e-02, -1.030e-02, -1.913e-02, 5.020e-02, -1.238e-01, 9.665e-02, -1.807e-02, 5.449e-02, 2.058e-01)); + r += mul(s4_2, M4(1.662e-02, 3.488e-05, -4.044e-02, -5.045e-02, -2.282e-02, 7.772e-02, 5.240e-02, 1.573e-01, 3.551e-02, -1.906e-02, -4.839e-02, 3.603e-02, -5.828e-03, 1.748e-02, -1.287e-01, 1.122e-01)); + r += mul(s4_3, M4(-4.855e-02, -6.473e-02, -3.205e-02, -1.106e-02, 6.551e-02, 1.172e-01, 3.972e-02, -1.110e-02, 1.708e-01, -7.197e-02, 5.240e-02, 6.689e-02, 4.516e-02, -6.609e-02, 1.406e-02, 7.274e-03)); + r += mul(s4_4, M4(-4.430e-02, -1.395e-01, -4.970e-02, 1.998e-02, 1.560e-01, -1.112e-01, 6.712e-02, -4.218e-04, -2.212e-01, 5.050e-03, -1.333e-01, 2.678e-01, 2.361e-01, -1.204e-01, -2.388e-01, -1.541e-01)); + r += mul(s4_5, M4(1.204e-02, -1.997e-02, -6.763e-02, 1.468e-01, -6.895e-02, 7.981e-02, 3.946e-02, -1.001e-01, -5.877e-03, 1.462e-01, 1.895e-02, 9.893e-02, -7.228e-03, -3.419e-01, 5.974e-02, -3.917e-01)); + r += mul(s4_6, M4(7.028e-03, -1.167e-01, -1.396e-02, 6.679e-02, -1.213e-01, -1.553e-01, -6.473e-02, 5.061e-02, -4.635e-02, 6.373e-02, -2.930e-02, 2.868e-02, 6.047e-02, 3.747e-03, -5.183e-03, 2.880e-02)); + r += mul(s4_7, M4(9.784e-02, 2.841e-02, -6.079e-02, 4.337e-02, 4.047e-02, 1.088e-01, 2.636e-02, 7.993e-02, 1.425e-01, -2.001e-02, 2.533e-02, -1.085e-02, 1.535e-02, 8.569e-02, 1.048e-01, 4.093e-02)); + r += mul(s4_8, M4(-2.997e-02, -2.626e-02, 5.060e-02, 2.708e-02, 1.564e-02, -4.655e-03, 1.835e-02, -4.612e-03, 1.653e-02, -6.783e-03, -4.532e-05, -8.021e-02, -1.153e-01, 1.970e-01, -1.619e-01, 1.018e-01)); + r += mul(s5_0, M4(-7.963e-02, 8.415e-02, -8.603e-02, -6.643e-02, 5.413e-02, 1.145e-02, 4.283e-03, 1.071e-02, 1.425e-02, 9.550e-03, -5.742e-03, 2.553e-03, 8.458e-03, -1.834e-03, 2.582e-02, 1.234e-02)); + r += mul(s5_1, M4(4.813e-02, -3.183e-02, -8.014e-02, -1.980e-02, -4.461e-02, 3.795e-02, 1.069e-01, -3.706e-02, -1.612e-03, 2.115e-02, 4.532e-02, 1.812e-02, 4.970e-03, -7.523e-03, 6.517e-03, 4.676e-02)); + r += mul(s5_2, M4(-3.456e-03, 8.783e-03, -3.684e-03, -3.165e-02, 5.899e-03, 4.142e-02, 4.836e-02, 8.154e-02, 2.633e-02, 5.588e-02, -6.064e-02, 5.039e-02, -1.274e-02, 5.913e-04, -7.067e-02, -3.322e-02)); + r += mul(s5_3, M4(9.003e-02, -8.612e-02, -1.561e-01, 3.709e-02, 5.988e-02, 8.174e-02, -2.205e-02, -2.136e-02, 2.638e-02, -1.036e-01, 6.140e-02, 3.036e-02, -4.891e-02, -2.581e-02, -4.581e-02, -4.246e-02)); + r += mul(s5_4, M4(9.493e-03, -3.479e-01, -7.998e-02, -9.455e-02, 3.910e-02, -1.593e-02, 4.751e-02, 6.188e-02, 9.840e-02, 1.341e-01, -5.374e-02, -2.259e-02, 1.011e-01, 2.673e-02, -9.297e-03, -1.089e-01)); + r += mul(s5_5, M4(-3.690e-02, -3.564e-03, -7.306e-02, 6.273e-03, 5.888e-03, 1.425e-02, 1.465e-02, -4.929e-02, -1.569e-02, -5.070e-03, 6.049e-02, 8.172e-02, 7.841e-03, -3.845e-03, 9.943e-02, 2.239e-02)); + r += mul(s5_6, M4(-2.568e-01, 7.435e-02, -2.228e-02, 7.944e-02, -5.772e-02, -6.852e-02, -2.834e-02, 8.123e-02, 1.470e-01, 1.859e-02, -2.704e-02, 9.363e-03, 3.166e-02, 2.803e-02, -1.214e-03, 1.293e-02)); + r += mul(s5_7, M4(-6.304e-02, -2.063e-01, -7.649e-02, -1.351e-01, 5.625e-02, 3.581e-02, 2.647e-02, 9.335e-03, 1.043e-01, 5.061e-03, -5.448e-03, -1.123e-01, 2.964e-02, -9.273e-03, 4.073e-02, 8.731e-03)); + r += mul(s5_8, M4(-6.478e-02, -6.498e-02, -5.031e-02, 2.011e-01, 6.402e-03, 4.651e-02, 1.568e-02, 8.174e-02, -4.208e-02, -2.012e-02, 7.375e-03, -2.978e-02, 3.769e-02, -5.344e-03, -1.052e-02, 4.279e-02)); + r += mul(s6_0, M4(-5.605e-03, -1.344e-02, 3.815e-02, 6.425e-02, -4.361e-04, -5.982e-03, -2.016e-03, -6.674e-03, 1.816e-03, -6.747e-03, 9.217e-03, 7.074e-02, 1.043e-02, -9.410e-03, -2.738e-02, 2.603e-02)); + r += mul(s6_1, M4(-3.818e-02, -4.530e-02, 1.982e-03, 1.438e-02, 2.250e-02, -7.125e-03, -7.718e-05, -1.763e-02, -1.533e-02, 3.678e-02, -2.880e-02, 6.018e-02, 3.396e-02, -3.313e-02, 3.760e-03, 5.717e-02)); + r += mul(s6_2, M4(1.833e-02, 4.613e-02, -8.530e-03, -2.255e-02, -7.005e-03, -1.823e-02, 6.326e-04, -1.960e-02, 1.857e-02, 4.009e-03, 1.863e-02, 1.340e-02, 7.106e-03, 1.505e-02, -3.722e-02, 9.284e-03)); + r += mul(s6_3, M4(-1.978e-02, -2.542e-02, -5.701e-02, -3.668e-02, -3.079e-02, 1.690e-02, -4.785e-04, -4.757e-02, 2.229e-02, 3.626e-02, 7.499e-02, 7.492e-02, -8.919e-02, -1.301e-02, 3.316e-02, -2.238e-02)); + r += mul(s6_4, M4(5.750e-02, 1.236e-01, -3.727e-02, -9.971e-02, 5.715e-02, 3.370e-02, -5.223e-03, 6.820e-02, -3.931e-02, -2.162e-02, 4.768e-02, -3.176e-01, 1.332e-01, 8.223e-03, -6.747e-02, 1.525e-02)); + r += mul(s6_5, M4(-5.534e-02, 2.196e-02, -3.586e-02, 4.086e-02, -3.519e-02, 5.550e-02, 7.686e-03, 1.698e-02, 4.426e-02, 4.819e-02, -3.056e-02, 4.639e-03, 8.257e-03, -5.203e-03, 4.029e-02, 1.317e-02)); + r += mul(s6_6, M4(-4.034e-02, 5.021e-02, -5.809e-03, 2.783e-02, -2.373e-02, -3.029e-02, -5.745e-02, -1.953e-02, 7.143e-02, 7.924e-02, 9.374e-03, -6.179e-03, -3.599e-02, 3.946e-02, 3.311e-02, 1.562e-02)); + r += mul(s6_7, M4(4.839e-02, -6.011e-02, -1.972e-02, 6.475e-02, -1.888e-02, -4.052e-02, -5.563e-02, -6.110e-02, 7.359e-02, -2.875e-02, 4.410e-02, -6.216e-02, 3.299e-02, -2.920e-02, 1.754e-02, 4.605e-02)); + r += mul(s6_8, M4(4.073e-02, -1.848e-02, -1.961e-02, -3.710e-02, 4.170e-02, 1.913e-02, -2.446e-02, -1.048e-01, -2.286e-02, -1.650e-02, -8.772e-03, 7.160e-02, -2.420e-02, -3.058e-02, 3.382e-02, 3.350e-02)); + r += mul(s7_0, M4(-2.956e-02, -5.428e-02, 3.595e-02, 2.180e-02, -2.980e-02, -1.348e-02, -4.355e-02, 6.596e-02, 3.844e-02, 7.667e-03, 2.615e-02, 4.893e-02, -7.366e-02, 3.688e-02, -3.084e-02, -2.588e-02)); + r += mul(s7_1, M4(5.338e-02, 4.289e-02, 3.982e-02, 6.554e-02, 2.097e-03, -3.347e-02, 4.455e-02, 2.917e-02, -6.689e-03, -3.879e-02, 8.891e-02, -9.196e-02, 7.563e-03, 1.112e-01, -1.969e-01, 8.919e-02)); + r += mul(s7_2, M4(6.999e-02, 1.312e-03, 1.772e-01, 2.577e-03, -3.489e-02, -1.944e-02, -9.796e-02, -2.732e-02, 4.999e-03, -6.560e-03, -4.190e-02, 6.350e-02, -2.773e-02, -2.716e-02, 6.816e-03, -4.187e-02)); + r += mul(s7_3, M4(-2.480e-02, 1.246e-01, -2.110e-04, -5.843e-02, 5.100e-02, -5.059e-02, -5.935e-02, -1.641e-01, 5.114e-02, -6.457e-02, -3.647e-02, -4.605e-02, -2.080e-01, -9.407e-02, -4.015e-02, 8.764e-02)); + r += mul(s7_4, M4(-2.201e-01, 4.873e-01, -1.571e-01, -8.180e-02, 7.717e-02, 2.528e-01, 1.153e-02, -3.846e-02, -1.282e-01, 3.397e-01, 1.587e-01, -2.980e-01, -1.068e-02, 8.088e-02, 3.736e-01, -1.061e-01)); + r += mul(s7_5, M4(2.394e-02, -6.763e-02, -1.783e-01, 3.603e-02, 4.565e-03, -1.765e-01, 1.102e-01, -2.667e-01, 1.554e-02, -6.939e-02, 1.764e-03, -3.768e-02, 2.578e-02, 9.643e-02, 3.601e-02, -2.208e-02)); + r += mul(s7_6, M4(2.830e-02, 8.147e-03, 6.145e-03, -4.349e-02, -1.741e-01, 2.157e-02, -1.054e-01, -1.158e-01, 4.637e-02, -1.196e-02, 1.201e-01, -8.628e-03, -2.468e-01, 3.790e-02, 5.359e-02, 2.575e-02)); + r += mul(s7_7, M4(-8.000e-02, 3.732e-02, 1.501e-02, -4.495e-03, 1.110e-01, 5.499e-02, 3.363e-02, -9.936e-02, 8.269e-02, 4.787e-02, -5.760e-02, 4.804e-03, -6.810e-02, -6.330e-02, -5.539e-02, 4.120e-02)); + r += mul(s7_8, M4(3.943e-02, 1.279e-02, -8.367e-04, 4.311e-02, -1.619e-01, 4.199e-01, -2.100e-03, -3.792e-01, -4.745e-02, -2.734e-03, -4.497e-02, -2.701e-02, -2.135e-02, -2.782e-02, 1.058e-02, 2.532e-02)); + r += V4(-7.390e-04, 1.816e-03, -2.728e-04, -1.866e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.895e-02, -3.486e-03, -1.379e-02, -1.842e-02, -3.970e-02, 3.895e-02, -3.372e-03, -4.028e-02, -8.637e-02, 1.274e-01, -1.950e-02, -1.323e-01, 2.921e-02, -8.863e-03, -3.657e-02, -3.447e-02)); + r += mul(s0_1, M4(-1.112e-02, -1.559e-02, 1.731e-02, -9.551e-02, 4.760e-02, -4.934e-02, -2.834e-02, 2.646e-02, -7.670e-02, -6.197e-02, -1.755e-01, -2.571e-03, -8.377e-03, 2.422e-02, 3.033e-02, 1.416e-02)); + r += mul(s0_2, M4(-2.654e-03, -9.362e-02, 3.156e-02, -3.364e-02, 5.052e-03, 7.076e-02, -7.983e-03, 2.995e-03, 2.022e-02, 3.845e-02, 1.639e-02, 2.667e-02, 2.051e-02, -5.142e-02, 4.048e-03, 9.972e-03)); + r += mul(s0_3, M4(7.535e-02, -3.695e-02, -1.487e-03, 1.763e-02, 2.683e-02, 2.572e-02, -3.221e-03, 4.724e-02, 1.230e-01, -1.008e-02, -3.410e-01, -1.780e-02, -5.457e-02, 7.070e-02, 5.849e-02, 3.655e-02)); + r += mul(s0_4, M4(-1.365e-02, 6.174e-02, 3.209e-02, 1.707e-02, 2.051e-01, 3.121e-02, 1.777e-02, -5.450e-02, 8.808e-02, 2.535e-02, -3.146e-01, -9.044e-02, 4.733e-02, -1.513e-01, -4.178e-02, 1.460e-01)); + r += mul(s0_5, M4(5.314e-03, -9.108e-02, -1.646e-02, -6.978e-02, 4.098e-02, 4.081e-02, 3.569e-02, 3.792e-02, -5.984e-02, -5.551e-02, -2.214e-01, -4.605e-02, -6.742e-02, -4.717e-03, 3.778e-02, -4.409e-02)); + r += mul(s0_6, M4(-5.846e-02, 3.342e-02, 5.078e-02, -3.740e-02, 5.761e-02, -2.075e-02, 1.671e-02, -3.571e-02, -5.007e-02, -7.200e-02, 3.912e-02, -3.255e-02, 4.228e-02, -2.367e-02, -2.202e-02, 7.920e-03)); + r += mul(s0_7, M4(6.797e-02, -1.165e-01, 5.155e-02, -3.932e-02, 1.142e-02, 3.870e-02, -4.763e-04, -1.028e-02, -1.720e-01, 7.857e-02, -1.793e-02, -1.800e-02, 5.313e-02, -9.309e-02, 3.770e-02, -1.870e-02)); + r += mul(s0_8, M4(-1.552e-02, -1.472e-02, -4.440e-02, -1.742e-02, -9.677e-03, -2.365e-03, 8.017e-03, -3.447e-03, -4.267e-02, 9.093e-02, -5.832e-03, 1.950e-02, -4.456e-03, -1.277e-01, -9.661e-02, -3.767e-02)); + r += mul(s1_0, M4(1.220e-02, 2.545e-02, -2.427e-02, -2.919e-02, 1.376e-01, -4.778e-02, -5.280e-02, 3.424e-02, -1.951e-02, 5.586e-02, -2.064e-02, -1.663e-02, 5.072e-03, -1.511e-02, 2.885e-02, 3.522e-02)); + r += mul(s1_1, M4(-4.135e-03, -4.255e-02, 1.657e-02, 1.401e-02, 2.459e-01, -1.500e-01, -1.057e-01, 8.905e-02, 2.055e-02, -3.477e-02, -4.044e-02, -3.994e-02, 8.582e-03, 8.109e-02, -2.204e-03, -1.661e-02)); + r += mul(s1_2, M4(-5.406e-04, 1.480e-02, 2.395e-02, -2.375e-02, -4.139e-02, 7.214e-02, -6.710e-02, 1.183e-02, 6.631e-03, 4.481e-02, -2.836e-02, 1.352e-02, 1.651e-02, 1.306e-02, -4.984e-02, 8.676e-03)); + r += mul(s1_3, M4(-5.175e-03, 4.428e-02, -5.746e-02, 4.562e-02, 1.205e-02, -7.419e-02, 1.158e-02, 6.830e-02, 1.129e-02, -2.328e-02, 2.257e-02, 2.547e-02, 9.760e-03, 5.335e-02, -6.050e-02, 1.134e-01)); + r += mul(s1_4, M4(6.943e-04, 6.939e-02, 4.084e-03, -4.701e-02, 1.362e-01, -3.526e-01, -3.718e-02, 5.053e-02, -6.910e-02, 5.202e-02, -5.544e-02, -5.834e-02, -1.604e-02, 8.243e-02, 1.663e-01, -3.396e-02)); + r += mul(s1_5, M4(3.533e-03, -4.729e-02, -3.179e-02, 3.769e-02, 4.131e-02, -1.719e-01, -1.389e-02, 2.174e-02, 4.603e-03, -2.876e-02, 6.189e-02, 4.800e-02, 3.724e-02, 2.389e-02, -5.403e-02, 1.807e-02)); + r += mul(s1_6, M4(-1.131e-01, 8.189e-02, -1.971e-02, 1.542e-02, 9.368e-02, 6.963e-02, 3.110e-02, 1.709e-02, -1.058e-02, -2.140e-03, 2.814e-02, 1.187e-02, 8.828e-02, -1.966e-03, -1.097e-01, -3.622e-02)); + r += mul(s1_7, M4(8.828e-02, -1.550e-01, 2.141e-03, -2.533e-03, 2.275e-03, -9.305e-02, 2.913e-03, -2.725e-03, -3.023e-02, 7.153e-02, -2.468e-02, -1.383e-02, 9.642e-02, 1.112e-01, -2.530e-01, 5.328e-02)); + r += mul(s1_8, M4(-4.595e-02, 8.567e-02, -1.921e-02, 1.341e-02, -4.145e-03, -6.961e-02, -2.132e-03, -1.025e-02, 1.577e-02, -2.728e-02, 8.589e-03, -1.513e-02, -2.036e-03, 3.103e-02, 2.803e-02, -9.909e-03)); + r += mul(s2_0, M4(1.872e-02, -3.554e-02, -4.315e-02, 9.349e-03, 1.951e-02, 2.302e-02, -2.166e-02, 1.058e-02, 5.752e-02, -2.872e-02, 1.595e-03, -2.577e-02, 4.223e-02, 3.590e-02, -2.997e-02, -9.733e-03)); + r += mul(s2_1, M4(-6.303e-02, 8.675e-02, -4.988e-03, -4.818e-02, 6.319e-02, -9.892e-02, 8.761e-02, 5.963e-02, -3.507e-02, -1.705e-02, 6.934e-02, 4.970e-02, 2.267e-03, -4.190e-02, -2.179e-03, -3.351e-02)); + r += mul(s2_2, M4(2.213e-02, -3.701e-02, 3.497e-02, -5.799e-02, -3.614e-02, -2.253e-02, -4.799e-02, -4.115e-02, 8.547e-03, 7.009e-02, -1.611e-03, 3.784e-02, 1.500e-03, 5.809e-02, -2.469e-02, 2.074e-02)); + r += mul(s2_3, M4(-2.082e-02, 5.921e-02, -5.766e-03, -6.030e-02, 7.578e-03, 1.904e-02, -1.047e-01, 1.059e-01, -7.170e-02, 5.991e-02, 4.122e-02, -4.622e-02, 3.416e-03, 1.202e-03, 1.250e-02, 2.651e-02)); + r += mul(s2_4, M4(-5.199e-02, -8.660e-03, -9.264e-02, 4.332e-02, -2.770e-01, -9.477e-02, -1.739e-01, 3.310e-02, -5.992e-02, -2.027e-01, 1.018e-01, -6.506e-02, -1.506e-01, -3.280e-02, 7.540e-02, -2.998e-02)); + r += mul(s2_5, M4(-4.084e-02, -1.771e-02, 1.499e-02, -2.934e-02, -1.790e-02, -1.188e-01, 1.644e-01, -5.213e-02, -9.989e-03, 8.711e-02, -5.337e-02, -8.002e-03, -3.434e-02, 2.052e-02, 2.082e-02, 2.510e-02)); + r += mul(s2_6, M4(1.092e-02, 2.723e-02, 7.463e-03, -1.953e-02, -9.929e-02, -3.255e-02, 7.282e-02, -8.302e-02, 4.877e-02, -2.527e-02, 2.649e-04, 2.636e-02, -3.057e-02, -2.064e-02, 3.504e-02, 1.511e-02)); + r += mul(s2_7, M4(-2.356e-02, 1.447e-02, 3.365e-02, -6.376e-03, 1.848e-02, -4.435e-02, -1.451e-01, -1.324e-01, -1.865e-02, 2.552e-02, -5.798e-02, 8.032e-03, -4.240e-02, 6.194e-02, -7.205e-02, -7.899e-03)); + r += mul(s2_8, M4(5.712e-02, -8.752e-03, 3.965e-02, -1.018e-02, 5.286e-03, -3.574e-02, 7.631e-02, -1.518e-02, -1.465e-02, 1.768e-02, 2.405e-02, 8.130e-03, 2.138e-04, -6.461e-03, -2.955e-02, 3.826e-03)); + r += mul(s3_0, M4(-4.841e-02, 2.032e-01, -3.635e-02, -2.115e-01, 1.990e-02, 4.568e-03, 2.516e-03, -1.000e-02, 5.002e-02, -4.300e-02, 9.021e-03, -5.821e-02, -7.489e-02, 1.239e-02, 1.077e-01, 1.305e-01)); + r += mul(s3_1, M4(-4.201e-02, -1.989e-01, -3.334e-02, -6.472e-03, 1.388e-02, 2.082e-02, -4.926e-02, -1.762e-02, -5.040e-02, -2.869e-03, 2.892e-02, -4.308e-02, -2.206e-02, 1.446e-02, -7.367e-02, -6.763e-02)); + r += mul(s3_2, M4(2.386e-02, -9.420e-02, 1.363e-01, 1.211e-02, -6.687e-04, -3.608e-02, 7.732e-03, 1.757e-03, 2.526e-03, -1.860e-02, 4.211e-02, 2.851e-02, 4.414e-03, 6.763e-03, 8.533e-02, 3.429e-03)); + r += mul(s3_3, M4(-2.262e-01, 4.314e-02, -9.302e-02, 1.245e-01, 1.110e-01, 3.947e-02, -1.664e-02, 3.281e-02, -4.983e-02, 1.251e-02, 4.169e-02, -5.546e-02, 2.145e-01, -1.460e-01, 3.327e-01, -6.628e-02)); + r += mul(s3_4, M4(-2.728e-02, -4.408e-01, -9.411e-02, -2.079e-02, -1.833e-01, 6.247e-02, -7.999e-02, 7.305e-02, -7.140e-02, -5.381e-02, 2.619e-02, 4.561e-02, -1.321e-01, 1.948e-01, 5.573e-01, 6.632e-02)); + r += mul(s3_5, M4(-1.393e-01, -9.259e-02, -7.399e-02, -1.722e-01, -1.226e-02, -2.651e-02, 2.733e-02, -3.692e-02, -8.345e-03, 3.471e-02, 1.499e-02, -5.093e-02, 4.088e-03, 1.013e-02, 4.411e-02, 2.132e-02)); + r += mul(s3_6, M4(-1.362e-01, 1.633e-02, 3.048e-03, -2.313e-02, -1.697e-03, -5.715e-03, 2.150e-02, -1.245e-02, 6.827e-02, -7.725e-02, 5.958e-02, -2.006e-03, -8.098e-02, 1.432e-02, -1.368e-01, 2.472e-02)); + r += mul(s3_7, M4(-6.272e-02, -1.959e-01, 1.151e-01, -3.563e-02, -5.832e-02, -4.127e-02, -6.041e-04, 3.039e-02, 8.441e-03, 4.542e-02, -5.644e-02, -3.768e-03, -5.552e-03, -2.609e-02, 1.490e-01, -3.899e-02)); + r += mul(s3_8, M4(-2.868e-02, 1.260e-01, 2.419e-02, -7.906e-03, -1.357e-02, 6.577e-02, -3.260e-02, -1.060e-02, -9.049e-03, -8.496e-02, 1.345e-02, 1.437e-02, -4.895e-02, 5.249e-02, -3.312e-02, -1.726e-02)); + r += mul(s4_0, M4(1.845e-02, -1.428e-02, 3.025e-02, -3.601e-02, -7.456e-02, 4.700e-02, -5.271e-03, 8.968e-02, -3.187e-02, 8.715e-03, 1.572e-03, 7.963e-03, -2.486e-02, -3.948e-02, -4.365e-02, 1.295e-02)); + r += mul(s4_1, M4(5.060e-02, -3.672e-02, 4.173e-02, 5.038e-02, -1.104e-01, 4.492e-02, -6.302e-02, 2.167e-02, 1.372e-01, -3.682e-02, -3.740e-02, 2.062e-02, -6.461e-02, -5.470e-03, -8.412e-02, -1.235e-01)); + r += mul(s4_2, M4(-1.806e-02, 1.559e-02, -2.325e-02, -4.432e-02, 3.505e-02, -4.632e-03, 4.085e-02, 1.421e-01, -4.986e-03, 4.484e-03, 6.821e-03, 5.988e-03, 1.689e-01, -7.924e-02, 2.597e-02, -3.159e-02)); + r += mul(s4_3, M4(1.002e-01, -6.215e-02, -2.484e-02, -9.233e-02, 4.768e-02, -1.974e-01, -8.080e-03, 5.666e-03, -1.172e-02, 4.765e-03, -2.743e-03, -5.749e-02, -3.136e-02, -1.183e-02, 4.700e-02, 2.595e-02)); + r += mul(s4_4, M4(-3.767e-02, 2.888e-02, 1.105e-01, 1.856e-02, 9.276e-02, 7.851e-02, -5.774e-02, -1.489e-01, -7.548e-02, -7.316e-02, 6.093e-01, 3.221e-01, -5.959e-01, 9.822e-02, 8.992e-02, 1.061e-01)); + r += mul(s4_5, M4(-5.871e-03, 9.548e-03, -1.592e-02, 1.531e-02, 5.870e-02, -3.468e-03, 4.422e-02, 2.998e-02, 3.743e-03, -5.535e-02, 3.347e-02, 1.525e-02, 3.906e-02, 6.235e-02, -1.618e-02, 1.045e-01)); + r += mul(s4_6, M4(1.008e-03, 3.806e-03, 1.244e-02, -7.620e-02, 1.407e-02, 8.177e-03, 6.615e-02, -2.112e-02, -4.001e-02, -2.659e-02, 2.051e-03, -1.621e-02, 1.130e-01, -3.558e-02, -6.091e-02, -6.975e-03)); + r += mul(s4_7, M4(-1.033e-01, 5.164e-02, -8.706e-02, 5.892e-02, -2.319e-02, -6.319e-02, -1.004e-01, 6.903e-02, 3.794e-02, -1.284e-01, 5.790e-02, -3.345e-03, -1.194e-01, 1.352e-01, -8.862e-02, 9.251e-02)); + r += mul(s4_8, M4(6.850e-02, 2.214e-02, -2.984e-02, -6.584e-03, -6.177e-02, 1.027e-01, -1.062e-04, 1.554e-02, 2.624e-02, 2.446e-02, -2.709e-02, -6.687e-03, -8.656e-03, 6.004e-02, -8.694e-02, 2.832e-02)); + r += mul(s5_0, M4(6.199e-02, -1.131e-01, 6.170e-02, -7.534e-03, -4.255e-02, 4.538e-02, -3.063e-02, 4.212e-02, 2.331e-02, -1.634e-02, -6.050e-03, -4.349e-02, 2.814e-03, -1.989e-03, -1.116e-02, -2.933e-02)); + r += mul(s5_1, M4(-3.261e-02, 7.652e-02, 6.421e-02, -1.008e-01, 5.395e-02, 5.759e-02, -1.665e-01, 1.023e-01, 5.627e-02, -2.189e-02, 8.770e-03, 1.208e-01, -1.837e-02, -5.015e-02, 3.498e-02, 4.259e-02)); + r += mul(s5_2, M4(2.237e-02, 9.596e-03, -4.689e-02, 1.668e-02, -4.693e-02, 3.892e-02, -4.201e-02, 8.341e-03, 1.646e-02, 9.753e-02, 7.488e-04, -4.371e-02, -4.316e-02, 1.629e-02, 1.219e-02, -4.465e-02)); + r += mul(s5_3, M4(1.071e-01, 1.818e-01, 1.804e-01, -4.161e-01, -5.134e-02, -2.543e-03, 4.252e-02, -1.254e-01, 8.945e-02, -6.327e-02, -1.708e-02, -7.655e-02, -1.818e-02, 1.160e-02, 5.056e-02, -9.572e-03)); + r += mul(s5_4, M4(-7.047e-02, 5.558e-02, -2.566e-01, 2.293e-02, 1.344e-01, -5.759e-02, 1.336e-01, -1.365e-01, -1.018e-01, 1.131e-01, -2.599e-04, 5.203e-02, -4.006e-02, 5.433e-02, -1.179e-02, 4.684e-02)); + r += mul(s5_5, M4(9.100e-03, -5.963e-02, -4.189e-02, 2.374e-02, -1.049e-02, -1.697e-02, 3.455e-02, 7.721e-02, 4.418e-02, 9.174e-02, 1.238e-01, 6.190e-02, 7.101e-02, 7.415e-02, 8.181e-03, 5.949e-02)); + r += mul(s5_6, M4(-3.657e-02, 8.785e-02, -1.366e-01, 3.699e-02, -5.526e-02, 3.303e-02, -1.172e-02, 5.110e-02, 1.694e-02, -4.945e-02, -1.527e-02, -5.031e-02, 4.639e-03, -3.881e-03, -2.765e-02, -5.272e-03)); + r += mul(s5_7, M4(-1.075e-01, -3.386e-02, -4.008e-03, -2.277e-01, 2.691e-02, 5.804e-02, -7.960e-02, -1.331e-02, -1.555e-03, 2.882e-02, -2.749e-02, 1.986e-02, 4.661e-02, 5.051e-03, 2.789e-04, -1.044e-02)); + r += mul(s5_8, M4(3.584e-02, 5.666e-02, 2.708e-02, -7.064e-02, -1.714e-02, 3.086e-02, -6.665e-03, 1.561e-02, 3.868e-02, 1.260e-02, 3.594e-02, 1.405e-02, 4.834e-02, 1.432e-02, -1.207e-03, -3.702e-02)); + r += mul(s6_0, M4(-7.545e-03, 5.817e-03, -3.976e-03, -6.886e-02, 1.241e-03, 3.949e-02, -9.141e-03, -2.932e-02, 3.061e-02, -3.527e-03, 4.619e-02, -6.930e-03, 4.226e-03, -3.380e-02, -4.733e-03, -9.659e-02)); + r += mul(s6_1, M4(1.434e-02, -5.702e-02, 1.086e-02, 1.870e-02, -8.674e-04, -2.612e-03, -4.122e-02, -1.223e-02, 3.054e-02, 2.505e-02, 4.928e-02, -2.850e-02, -6.155e-02, -2.670e-02, -2.795e-02, -2.198e-02)); + r += mul(s6_2, M4(3.110e-02, 1.396e-02, 4.330e-02, -5.054e-03, -1.258e-03, 9.789e-04, 2.307e-03, 3.330e-03, 1.202e-02, -1.888e-03, -7.485e-03, -3.196e-02, 4.168e-03, 1.211e-02, 4.629e-02, -1.252e-02)); + r += mul(s6_3, M4(-5.995e-02, 2.188e-02, -2.808e-02, 1.556e-02, -1.819e-02, -5.827e-03, -2.431e-02, 4.258e-02, 6.412e-03, 2.012e-02, 1.066e-02, 8.169e-02, 4.143e-02, 2.179e-01, -3.265e-02, -8.631e-03)); + r += mul(s6_4, M4(1.035e-01, -1.406e-01, 1.473e-01, 7.944e-02, -3.233e-02, -1.322e-02, 1.975e-02, 4.355e-02, -6.917e-02, 6.086e-02, 1.008e-03, -5.061e-02, -1.441e-01, 6.546e-02, -2.602e-02, -1.753e-01)); + r += mul(s6_5, M4(8.260e-02, -4.606e-02, 1.443e-01, 9.276e-03, -6.593e-03, 3.684e-02, 3.764e-02, 1.085e-03, -1.626e-02, -5.051e-02, -1.110e-02, 7.292e-02, 3.659e-02, -2.852e-02, -3.503e-02, 2.781e-02)); + r += mul(s6_6, M4(-2.993e-02, 3.806e-02, 3.685e-03, -1.483e-02, -3.055e-02, -1.343e-02, 5.796e-02, -2.555e-02, -3.573e-03, 2.753e-02, -1.052e-01, 5.268e-03, -6.556e-02, 1.491e-02, 1.841e-02, 2.626e-02)); + r += mul(s6_7, M4(6.475e-02, -1.022e-01, -1.800e-02, -6.002e-03, 1.126e-01, -1.013e-01, 1.138e-01, -5.236e-02, 5.473e-02, -4.970e-02, 7.074e-02, -2.390e-02, -2.350e-02, -2.878e-02, 2.098e-02, 7.152e-02)); + r += mul(s6_8, M4(3.902e-02, -7.221e-03, 2.533e-02, -1.634e-02, -7.790e-02, 1.895e-02, 3.363e-02, 2.468e-02, 5.341e-02, 3.363e-02, -8.903e-03, 2.315e-03, 5.390e-03, 1.790e-02, -2.082e-02, -2.614e-04)); + r += mul(s7_0, M4(4.407e-02, -4.841e-02, 8.242e-02, 2.578e-02, -2.986e-02, -2.456e-02, -3.235e-02, 2.050e-02, 1.417e-02, 3.272e-02, -5.705e-03, -5.092e-02, 3.907e-02, -7.260e-02, -9.419e-02, 1.317e-02)); + r += mul(s7_1, M4(-6.845e-03, 2.661e-02, 7.769e-03, -6.435e-02, 7.383e-03, -1.629e-02, -4.591e-02, -7.700e-02, 6.616e-02, -9.670e-03, 1.328e-02, 7.153e-02, -4.024e-02, -2.898e-02, 4.793e-02, 7.884e-02)); + r += mul(s7_2, M4(-1.220e-02, -8.752e-02, -6.199e-02, 9.084e-03, 1.325e-02, -5.939e-02, 4.166e-02, 7.510e-03, -2.051e-02, -3.170e-02, 2.921e-02, -3.657e-02, -4.016e-02, 3.860e-02, -6.299e-02, -3.094e-02)); + r += mul(s7_3, M4(1.389e-01, 8.623e-05, 2.497e-03, 6.358e-02, 2.282e-02, 5.944e-03, 9.160e-02, 6.074e-02, -4.673e-02, 8.961e-02, 4.270e-02, 1.630e-01, 5.060e-01, 5.417e-03, 1.208e-01, 3.584e-02)); + r += mul(s7_4, M4(3.425e-02, 1.627e-01, 1.463e-01, -1.593e-01, 5.820e-02, -1.009e-01, -2.588e-01, 1.984e-01, -5.505e-02, -8.789e-02, -2.568e-02, -1.750e-01, 1.750e-01, -4.991e-01, -1.489e-01, -1.970e-02)); + r += mul(s7_5, M4(-9.526e-02, -5.354e-02, 4.962e-02, 1.750e-02, -8.023e-02, 2.113e-01, -1.938e-01, -2.143e-01, 1.331e-02, 1.890e-02, 8.502e-03, 1.317e-01, 3.780e-03, -1.992e-02, 1.165e-02, 5.067e-03)); + r += mul(s7_6, M4(8.740e-04, -1.707e-02, -5.092e-03, -2.246e-02, 3.230e-02, -2.677e-02, -4.519e-02, 1.569e-02, 2.273e-02, -7.644e-02, 5.968e-02, -5.865e-02, -7.115e-02, 2.750e-02, -2.825e-02, 5.514e-02)); + r += mul(s7_7, M4(1.093e-01, -4.552e-02, -2.246e-02, 5.209e-02, 1.481e-01, -4.852e-02, -7.095e-02, 1.554e-01, -5.972e-02, 4.680e-02, 4.096e-02, 7.233e-02, -2.237e-02, -1.442e-01, 4.287e-02, 4.532e-02)); + r += mul(s7_8, M4(1.076e-03, -9.680e-02, 6.785e-03, 3.552e-02, -9.446e-02, 1.963e-02, 1.414e-01, 1.773e-02, -2.142e-02, 3.047e-02, -3.103e-02, -2.439e-02, 1.599e-03, 8.774e-03, 1.240e-02, 2.460e-02)); + r += V4(8.583e-03, -6.438e-03, 6.338e-03, 1.485e-04); + return r; +} + +void Pass9(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 10 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0, t1, t2, t3 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.288e-03, -1.321e-02, -2.106e-02, -1.756e-02, 3.676e-02, 1.095e-02, 1.181e-02, -2.697e-03, 4.596e-02, 2.905e-02, 1.476e-02, 2.231e-02, 2.701e-03, 3.902e-03, -3.031e-03, 8.976e-03)); + r += mul(s0_1, M4(3.357e-02, 4.993e-02, 2.113e-02, 2.618e-02, -5.839e-02, 4.692e-03, 2.007e-02, 1.737e-02, 1.132e-02, 4.016e-02, -2.122e-02, -2.512e-02, 2.428e-02, 2.614e-02, -5.047e-02, 1.221e-02)); + r += mul(s0_2, M4(1.581e-03, 2.179e-02, -2.680e-02, -2.460e-02, 1.697e-03, -3.966e-02, -2.177e-03, 2.177e-03, 3.197e-03, 2.340e-02, -2.558e-03, 1.068e-02, 2.225e-02, 4.831e-04, 2.057e-02, -3.325e-02)); + r += mul(s0_3, M4(-8.245e-03, 1.447e-02, 4.863e-02, 3.704e-02, 9.376e-03, -1.558e-02, 4.408e-02, 5.230e-02, -1.237e-01, 2.204e-02, -4.083e-02, -7.021e-02, -3.404e-02, 2.196e-02, -5.405e-02, 1.713e-02)); + r += mul(s0_4, M4(-1.691e-02, -5.413e-02, 1.412e-02, -4.122e-02, 3.281e-02, 1.344e-01, -9.106e-02, 5.237e-02, 7.495e-02, -2.874e-01, 1.509e-01, 9.939e-02, 6.605e-02, 1.485e-02, 2.092e-01, -3.814e-02)); + r += mul(s0_5, M4(3.302e-02, 9.497e-03, 5.188e-02, 7.025e-02, -3.038e-02, -4.289e-02, 8.231e-03, -6.615e-02, -3.845e-02, 6.910e-02, -1.664e-03, 1.457e-02, -3.180e-02, -7.829e-02, -7.862e-02, -4.652e-02)); + r += mul(s0_6, M4(9.334e-03, -2.657e-02, -6.000e-02, -5.408e-02, 3.188e-02, -1.277e-02, 1.829e-02, -6.218e-02, 1.772e-02, 7.507e-03, -1.788e-02, 9.071e-03, 8.863e-03, -9.756e-03, -8.313e-04, -1.364e-02)); + r += mul(s0_7, M4(-6.391e-02, 1.455e-02, -4.018e-03, -1.183e-02, -1.013e-01, -3.137e-02, -4.261e-02, 2.507e-02, -5.843e-03, 5.284e-02, -7.524e-02, -2.047e-02, -6.055e-02, 2.008e-02, 1.745e-02, 6.396e-02)); + r += mul(s0_8, M4(9.589e-03, -7.702e-03, -1.385e-02, -3.273e-03, 7.148e-03, -2.582e-02, -1.582e-02, -4.480e-02, 1.874e-02, 7.253e-03, -7.904e-03, -2.832e-02, 9.305e-03, -1.280e-02, -3.018e-02, 9.239e-03)); + r += mul(s1_0, M4(-2.402e-03, -1.702e-02, -1.430e-02, -1.437e-02, 3.541e-02, 1.098e-02, 1.285e-02, -4.486e-03, -3.848e-03, 1.516e-02, 1.246e-02, 2.232e-02, -1.736e-02, -6.417e-03, -5.782e-03, 1.797e-02)); + r += mul(s1_1, M4(2.029e-02, 3.807e-02, 3.010e-02, 3.850e-02, -6.865e-02, -1.083e-02, 2.365e-02, 1.968e-02, -1.490e-02, -2.014e-02, -8.668e-03, -2.748e-02, -3.469e-04, 9.138e-02, -1.043e-01, 2.116e-02)); + r += mul(s1_2, M4(-7.515e-03, 3.076e-03, -2.357e-02, -1.654e-02, -4.330e-03, -3.571e-02, -3.355e-03, 4.346e-04, 5.423e-03, 9.114e-03, 3.344e-03, 2.062e-02, 2.949e-02, -7.357e-02, 1.904e-02, -2.418e-02)); + r += mul(s1_3, M4(-3.964e-02, 1.319e-02, 1.290e-02, 1.144e-02, 3.125e-02, -1.283e-02, 5.506e-02, 5.888e-02, -7.126e-03, 2.046e-03, 2.282e-02, -3.216e-02, 1.672e-02, 9.030e-03, -5.734e-02, 3.116e-02)); + r += mul(s1_4, M4(3.315e-01, 5.508e-02, -1.405e-01, -1.298e-01, 3.390e-02, 1.265e-01, -1.198e-01, 1.523e-02, 1.206e-01, -1.592e-02, -1.558e-02, 4.463e-03, -5.783e-02, 1.140e-03, 6.348e-01, -2.935e-01)); + r += mul(s1_5, M4(2.099e-02, 1.821e-01, 5.430e-02, -2.893e-03, -2.511e-02, -3.087e-02, 6.097e-03, -5.772e-02, -1.947e-02, 1.794e-02, -3.531e-03, -2.538e-02, -1.085e-01, 1.207e-01, -1.402e-01, -1.096e-01)); + r += mul(s1_6, M4(3.216e-02, -2.144e-02, -6.793e-02, -5.081e-02, 4.411e-02, -7.452e-03, 3.820e-02, -4.924e-02, -2.191e-02, -3.942e-03, -6.666e-02, -3.475e-03, 1.302e-02, -1.511e-02, -1.236e-02, -1.293e-02)); + r += mul(s1_7, M4(5.775e-03, 6.997e-02, -2.382e-01, -1.491e-01, -8.038e-02, -1.730e-02, -1.200e-02, 4.096e-02, -4.595e-02, -1.271e-02, 6.949e-02, 5.294e-03, -5.210e-02, 3.667e-03, -6.985e-02, 7.442e-02)); + r += mul(s1_8, M4(2.313e-02, 2.815e-02, -3.577e-03, -8.620e-02, 8.754e-03, -1.742e-02, -1.739e-02, -2.487e-02, 4.649e-03, -1.378e-02, -1.452e-02, 2.420e-02, -6.308e-04, -1.359e-02, -5.233e-02, 1.669e-02)); + r += mul(s2_0, M4(-3.368e-02, -3.980e-03, -9.703e-04, 1.080e-02, 9.988e-03, -1.531e-02, -4.364e-03, -1.666e-02, 3.106e-03, -5.797e-03, -9.667e-03, -1.150e-02, -2.375e-02, 2.223e-02, 2.287e-02, 2.405e-02)); + r += mul(s2_1, M4(5.779e-03, -2.728e-02, -3.430e-03, -2.802e-02, 6.992e-03, 5.027e-03, -1.433e-02, -1.959e-02, -2.256e-02, 2.146e-02, -1.511e-03, 1.838e-02, -4.855e-03, -6.710e-02, 5.815e-03, -5.167e-03)); + r += mul(s2_2, M4(-1.631e-02, -4.753e-03, 6.240e-03, 2.216e-02, -7.764e-04, 2.601e-02, -1.759e-02, -1.272e-02, -9.234e-03, 1.193e-02, -1.557e-02, 9.221e-03, 3.824e-03, 1.605e-02, -1.521e-03, 4.219e-03)); + r += mul(s2_3, M4(5.929e-02, -1.351e-02, -3.640e-02, -7.495e-02, 2.995e-04, -5.310e-03, 9.308e-03, 5.831e-03, -3.071e-02, 3.279e-02, 2.226e-02, 2.718e-02, -7.876e-02, -1.108e-02, -1.091e-01, -3.100e-03)); + r += mul(s2_4, M4(2.597e-01, 2.576e-01, -1.353e-01, -3.268e-02, -1.675e-02, -1.212e-02, -1.013e-01, 7.113e-03, -1.483e-01, -6.736e-02, -1.158e-01, -5.368e-02, -2.017e-02, -1.257e-01, -7.094e-02, -2.066e-01)); + r += mul(s2_5, M4(-6.604e-03, 4.490e-02, -1.689e-02, -8.739e-02, -1.825e-02, 2.317e-02, -7.293e-03, 6.325e-02, -3.538e-02, 2.314e-01, -3.612e-03, 9.028e-02, 1.150e-02, 3.354e-02, 2.008e-02, 4.114e-02)); + r += mul(s2_6, M4(-6.085e-02, 8.006e-03, 5.597e-03, 4.375e-03, 5.102e-02, -7.769e-03, 5.785e-02, -3.648e-02, 2.499e-02, -1.236e-02, -2.100e-02, -9.887e-03, 1.139e-02, 1.272e-02, 9.081e-03, 2.087e-03)); + r += mul(s2_7, M4(-2.607e-02, -9.075e-02, 3.944e-02, 8.550e-02, 1.373e-02, 2.031e-02, 1.814e-02, -7.935e-02, -4.653e-02, 8.520e-02, -8.139e-02, 5.661e-02, 1.263e-02, 1.954e-02, 2.187e-02, 3.499e-02)); + r += mul(s2_8, M4(-1.408e-02, -2.322e-02, -2.956e-02, -5.237e-02, -3.299e-02, -6.760e-03, 3.166e-02, 9.487e-02, 4.560e-03, 5.893e-03, -5.681e-02, 1.580e-01, 1.593e-03, 1.052e-03, 5.417e-03, 1.003e-02)); + r += mul(s3_0, M4(-1.585e-02, -6.165e-03, -6.595e-03, 6.867e-03, 2.167e-02, -1.104e-02, -5.165e-03, -1.270e-02, -1.542e-03, -9.897e-04, -9.215e-03, -7.713e-03, -3.071e-02, 2.166e-02, 2.315e-02, 2.699e-02)); + r += mul(s3_1, M4(2.748e-02, -9.194e-03, -9.189e-03, -3.307e-02, 2.411e-02, 8.146e-03, -1.984e-02, -1.898e-02, -1.813e-02, 4.340e-03, 5.052e-03, 1.439e-02, -1.342e-02, -7.745e-02, 4.404e-03, -8.122e-03)); + r += mul(s3_2, M4(-9.177e-03, 1.751e-02, 4.400e-03, 2.236e-02, 4.106e-03, -8.324e-03, -2.258e-02, -2.498e-03, 4.850e-03, -1.186e-02, -1.428e-02, -8.462e-03, 1.590e-03, 1.123e-02, -6.362e-03, 1.537e-03)); + r += mul(s3_3, M4(1.300e-02, -2.392e-02, 1.641e-02, -5.358e-02, 2.303e-02, 9.964e-04, 2.607e-02, 1.629e-02, -4.236e-02, 3.306e-02, 1.082e-02, 3.368e-02, -9.061e-02, -1.555e-02, -1.254e-01, -9.231e-03)); + r += mul(s3_4, M4(1.093e-02, 6.117e-02, -2.334e-02, 7.731e-02, -1.889e-01, -4.521e-02, -1.381e-01, 4.990e-02, -3.273e-03, -1.392e-01, -5.917e-02, -1.084e-01, -4.124e-02, -1.464e-01, -8.814e-02, -2.305e-01)); + r += mul(s3_5, M4(2.954e-03, -2.640e-02, -1.878e-02, -4.553e-02, -1.245e-01, 3.719e-01, 7.327e-02, -8.666e-02, 7.281e-02, 6.134e-02, 6.567e-02, 7.487e-03, 7.990e-03, 2.353e-02, 1.700e-02, 3.026e-02)); + r += mul(s3_6, M4(-1.941e-02, 4.529e-03, -4.042e-02, 2.598e-03, 4.582e-02, -7.800e-03, 3.671e-02, -2.995e-02, 1.575e-02, -9.918e-03, -3.626e-02, -7.104e-03, 1.194e-02, 1.250e-02, 1.155e-03, -5.683e-04)); + r += mul(s3_7, M4(1.277e-02, -1.435e-02, 1.008e-01, 3.798e-02, -1.700e-03, 2.830e-02, 2.744e-01, -1.602e-01, -3.744e-02, 9.253e-02, 9.984e-03, 2.357e-02, 7.448e-03, 1.759e-02, 1.404e-02, 2.423e-02)); + r += mul(s3_8, M4(-2.164e-02, -2.065e-02, -2.798e-02, 4.532e-03, -2.055e-02, -4.854e-02, -2.606e-02, 1.503e-02, 2.083e-02, -1.722e-02, 1.066e-02, 5.132e-02, 3.575e-04, -2.594e-03, 2.816e-03, 5.034e-03)); + r += mul(s4_0, M4(-1.426e-02, 1.257e-02, 1.033e-02, 1.935e-02, -3.030e-02, 1.229e-02, -4.250e-03, 6.165e-03, -1.997e-03, -9.436e-03, -2.618e-02, -1.437e-02, -1.190e-02, -2.632e-02, 2.175e-02, 2.317e-02)); + r += mul(s4_1, M4(2.095e-01, -5.779e-02, 5.158e-02, 2.231e-03, -4.922e-02, -8.571e-02, -4.875e-02, 2.141e-02, 9.009e-02, -1.725e-02, -5.313e-02, -2.566e-02, 1.735e-02, 4.925e-02, 1.420e-02, 1.780e-02)); + r += mul(s4_2, M4(-9.185e-02, -5.607e-02, -8.766e-02, -1.148e-01, -3.548e-03, -2.415e-02, -2.607e-02, -2.083e-02, 1.337e-03, 5.432e-02, 6.464e-03, 3.675e-03, 5.501e-03, -3.110e-03, 1.074e-02, 2.205e-02)); + r += mul(s4_3, M4(3.402e-02, -2.865e-02, -1.933e-02, -3.812e-02, -4.599e-02, 3.587e-03, -5.386e-02, 4.163e-02, 6.190e-02, -1.407e-02, 1.190e-02, -6.332e-03, 1.268e-01, 7.919e-03, -1.616e-01, 9.103e-03)); + r += mul(s4_4, M4(-1.923e-02, 1.850e-02, 2.046e-01, -1.099e-01, 1.178e-01, 2.343e-01, 2.749e-01, -1.415e-01, -3.663e-01, 2.106e-01, 1.741e-01, 7.106e-02, 2.185e-01, 2.617e-01, -2.407e-01, -4.685e-01)); + r += mul(s4_5, M4(-7.289e-02, 1.753e-02, -6.698e-02, 1.516e-01, -2.065e-02, -4.328e-02, -4.114e-02, -1.211e-02, 4.070e-02, 5.314e-02, -3.458e-02, -2.126e-01, -3.308e-03, 6.276e-02, -2.422e-03, 5.390e-03)); + r += mul(s4_6, M4(-2.044e-02, 9.857e-03, -5.566e-03, 1.947e-02, -2.924e-03, -2.792e-02, -2.582e-02, -1.406e-02, 4.819e-02, -3.570e-02, 5.594e-02, -4.488e-02, 1.468e-02, -1.481e-02, 2.532e-02, 4.259e-03)); + r += mul(s4_7, M4(4.942e-02, -2.351e-02, 6.075e-03, -4.373e-03, -1.144e-02, 5.010e-03, -1.145e-03, 3.259e-02, -1.478e-03, -5.162e-03, 4.193e-03, -8.413e-02, -4.683e-02, 2.837e-03, -2.870e-02, -3.504e-02)); + r += mul(s4_8, M4(-9.462e-03, 3.463e-04, -3.184e-02, -2.237e-02, -5.753e-03, -8.946e-03, -1.983e-02, 1.403e-02, 3.208e-04, -6.783e-03, 2.986e-02, 9.583e-02, 9.652e-03, -1.106e-02, 2.724e-02, 4.058e-02)); + r += mul(s5_0, M4(-4.553e-03, 1.354e-02, 1.294e-02, 2.198e-02, -3.796e-02, 1.777e-03, -1.345e-02, -2.558e-03, 6.565e-03, -8.288e-03, -1.664e-02, -1.294e-02, -5.033e-03, -5.418e-03, 3.318e-03, 9.769e-03)); + r += mul(s5_1, M4(2.888e-03, -1.481e-02, 1.988e-02, -5.457e-03, -3.176e-02, -4.046e-02, -3.968e-02, -7.777e-03, 3.050e-03, 1.551e-02, -4.423e-02, -2.049e-02, -2.785e-03, 8.283e-03, 8.081e-03, 5.535e-03)); + r += mul(s5_2, M4(-7.639e-03, 5.054e-03, -6.512e-02, -4.601e-02, -1.182e-02, -4.087e-02, -1.517e-02, -3.973e-02, 1.923e-02, -3.636e-03, 2.031e-02, -7.553e-03, 6.687e-03, 1.134e-02, 6.916e-03, 1.592e-02)); + r += mul(s5_3, M4(5.164e-02, -3.007e-02, 6.146e-04, -4.163e-02, -2.801e-02, 7.026e-03, -2.300e-02, 1.935e-02, -3.085e-03, -6.896e-03, 2.654e-02, -5.054e-03, -1.381e-02, 3.617e-03, -2.664e-02, -2.407e-03)); + r += mul(s5_4, M4(-5.210e-02, 6.646e-02, -5.044e-02, -2.083e-02, 1.804e-01, 6.674e-02, 1.508e-01, 7.434e-03, 7.070e-02, 1.608e-02, -3.052e-02, 1.301e-03, 1.424e-02, -3.447e-02, 8.955e-03, -5.847e-02)); + r += mul(s5_5, M4(2.626e-03, -2.935e-02, 9.258e-02, 9.205e-02, -2.765e-02, 1.139e-03, -2.845e-02, -1.555e-02, -5.127e-02, 1.636e-02, -3.642e-02, 4.822e-02, -1.547e-02, 1.102e-02, -5.527e-03, 7.313e-03)); + r += mul(s5_6, M4(-2.205e-02, 7.127e-03, 3.632e-03, 1.646e-02, -1.045e-02, -9.007e-03, -3.836e-02, -2.190e-02, 2.273e-02, -3.534e-03, 1.640e-02, -4.160e-02, 1.927e-02, -9.104e-03, 3.589e-02, -3.850e-03)); + r += mul(s5_7, M4(3.426e-02, -1.793e-02, 2.421e-02, 1.227e-02, -1.653e-02, 1.187e-02, 2.618e-02, 1.769e-03, -5.266e-02, -9.613e-03, 7.193e-03, 8.483e-03, -1.251e-02, 1.625e-02, -4.461e-02, -3.639e-03)); + r += mul(s5_8, M4(-5.050e-03, 1.453e-02, -2.586e-02, -2.142e-02, -5.868e-03, -6.412e-03, -1.996e-02, 1.883e-02, -3.762e-04, -2.114e-02, 1.910e-02, 1.897e-02, 7.753e-03, -1.023e-03, 1.424e-02, 4.169e-03)); + r += mul(s6_0, M4(-3.845e-03, 1.689e-02, 5.021e-03, 1.254e-02, -1.487e-03, 3.818e-02, -1.402e-02, 2.054e-02, -3.812e-03, 2.115e-02, 2.435e-02, 2.093e-02, 1.783e-02, -4.674e-03, -3.655e-03, 3.153e-03)); + r += mul(s6_1, M4(5.142e-03, -6.075e-02, 1.070e-02, -2.858e-02, 7.935e-02, -9.109e-02, 4.375e-02, 1.193e-02, -9.096e-02, -7.053e-02, -1.485e-02, -7.337e-03, 1.038e-01, 3.523e-02, 1.287e-02, 3.620e-02)); + r += mul(s6_2, M4(-1.055e-02, 3.804e-02, -1.936e-02, 2.036e-02, -9.637e-03, 2.061e-02, -1.709e-02, 1.762e-02, 3.860e-03, -3.284e-02, 8.486e-03, 8.479e-03, 4.988e-03, 2.713e-02, -1.273e-02, -2.067e-02)); + r += mul(s6_3, M4(2.856e-03, 1.376e-02, 1.553e-02, 1.966e-02, 6.574e-02, 1.731e-02, 9.099e-02, 4.415e-02, 8.429e-04, -9.494e-03, -2.708e-03, -2.939e-03, 1.001e-01, -7.926e-03, 3.623e-02, 6.166e-02)); + r += mul(s6_4, M4(-5.182e-02, -8.474e-03, -7.333e-02, -3.194e-02, 2.440e-01, -3.135e-01, 1.841e-01, -3.585e-01, 1.917e-01, 1.126e-01, -7.704e-02, -6.519e-02, -4.877e-01, 1.928e-01, 4.962e-02, -2.862e-01)); + r += mul(s6_5, M4(7.056e-02, -4.759e-03, 7.693e-02, -1.180e-02, -1.746e-03, 7.848e-03, 2.640e-02, 1.388e-02, -4.918e-02, -9.407e-03, -1.582e-02, -4.991e-02, 2.390e-02, 1.084e-02, -5.813e-02, 9.644e-02)); + r += mul(s6_6, M4(1.613e-02, -9.781e-03, -1.865e-02, -4.360e-03, -8.337e-03, 5.040e-03, -1.687e-02, 4.597e-04, -3.722e-02, 1.425e-02, -1.447e-02, 2.435e-02, 1.344e-02, -1.216e-02, -3.054e-04, -3.275e-02)); + r += mul(s6_7, M4(-9.062e-03, 3.863e-02, 4.212e-02, -2.035e-04, -1.452e-02, 9.739e-03, 8.968e-02, -7.169e-02, 2.987e-02, -2.008e-02, 1.045e-01, 7.690e-02, 3.761e-02, -3.235e-02, 1.704e-01, 9.325e-02)); + r += mul(s6_8, M4(-1.967e-02, -2.017e-02, -4.216e-02, 7.447e-03, -5.092e-03, 2.067e-03, -2.545e-02, -7.547e-03, -1.169e-02, -9.225e-03, -1.654e-02, 1.105e-02, 1.381e-02, 1.372e-02, 1.354e-02, -1.234e-01)); + r += mul(s7_0, M4(-3.337e-03, 8.507e-03, -3.598e-03, 5.681e-03, 1.162e-02, 1.914e-02, -2.418e-03, 8.949e-03, -5.832e-03, 1.836e-02, 2.189e-02, 1.753e-02, 2.472e-02, -1.509e-03, -8.574e-03, -5.146e-03)); + r += mul(s7_1, M4(5.323e-02, -1.037e-02, 9.631e-03, -2.247e-02, -1.439e-02, -1.350e-02, 4.510e-02, -2.726e-03, -3.537e-02, -4.546e-02, -4.999e-03, -8.271e-03, 3.596e-02, 4.316e-02, 2.662e-02, 3.942e-02)); + r += mul(s7_2, M4(-7.218e-03, -6.411e-03, -1.728e-02, 3.503e-02, -3.096e-03, -4.575e-03, -5.780e-03, 6.029e-03, -1.083e-02, -5.370e-03, 1.031e-02, 1.510e-02, 2.472e-03, 9.614e-03, -2.293e-04, -1.862e-02)); + r += mul(s7_3, M4(3.626e-02, 9.689e-03, 4.372e-02, 9.929e-03, -1.615e-03, 1.196e-02, 3.210e-02, 4.786e-02, -9.144e-03, 6.885e-03, -1.638e-03, -5.414e-03, 1.300e-02, 1.148e-02, -4.992e-03, 5.277e-02)); + r += mul(s7_4, M4(-2.144e-01, 8.854e-02, -5.507e-02, 9.481e-02, 9.738e-03, -1.262e-02, -9.155e-02, -4.065e-02, 2.764e-01, 8.610e-02, -2.896e-01, -1.021e-01, -8.618e-02, -3.892e-02, -3.471e-02, -9.937e-02)); + r += mul(s7_5, M4(1.219e-01, -1.227e-01, 9.791e-02, -1.922e-01, 1.401e-02, 1.540e-02, 3.960e-02, 1.178e-02, -1.002e-02, 1.329e-01, -2.955e-02, -2.166e-01, 2.488e-03, -8.343e-04, -3.448e-02, -3.006e-03)); + r += mul(s7_6, M4(1.627e-02, -1.631e-02, -8.284e-03, -1.098e-02, 4.314e-03, -3.418e-03, -1.261e-02, -1.564e-02, -4.235e-02, 2.073e-02, 2.123e-02, 1.715e-02, 1.017e-02, -4.602e-04, 1.562e-02, -2.705e-02)); + r += mul(s7_7, M4(-1.332e-02, 3.030e-02, -7.299e-02, 2.072e-03, -2.778e-02, -2.832e-03, 8.620e-03, 4.325e-03, -7.304e-04, -4.965e-02, 1.513e-01, 1.655e-01, 6.935e-05, -3.351e-02, 8.485e-02, -4.985e-03)); + r += mul(s7_8, M4(-1.102e-02, -6.604e-03, -1.332e-02, 2.311e-02, -3.667e-03, -1.546e-02, -1.896e-02, -2.554e-02, -1.369e-02, -2.191e-02, -2.209e-02, -7.603e-03, -1.588e-02, -3.563e-03, -2.569e-02, 5.890e-02)); + r += V4(4.986e-04, 3.006e-04, 5.437e-04, 6.561e-04); + return tanh(r); +} + +void Pass10(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-8x16C-NVL.hlsl b/src/Effects/CuNNy/CuNNy-8x16C-NVL.hlsl new file mode 100644 index 000000000..281154388 --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-8x16C-NVL.hlsl @@ -0,0 +1,4027 @@ +// CuNNy 8x16C BILINEAR RGB NVL - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-D16N08 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t2; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t3; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t4; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t5; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t6; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t7; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0, t1, t2, t3 + +#define l0(x, y) min16float((dot(float3(2.271e-01, 4.365e-01, 9.234e-02), O(INPUT, float2(x, y)).rgb) + -4.932e-01)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-2.811e-02, 2.484e-02, 8.152e-03, -1.120e-01) * s0_0; + r += V4(-3.057e-02, 1.311e-02, 6.150e-04, -2.159e-03) * s0_1; + r += V4(-1.407e-02, -3.651e-02, -3.011e-02, 1.178e-03) * s0_2; + r += V4(2.646e-01, -6.081e-02, -3.473e-03, 8.330e-02) * s0_3; + r += V4(-1.848e-02, 1.380e-01, -2.730e-02, -8.572e-02) * s0_4; + r += V4(2.680e-02, -1.536e-02, 1.157e-01, 1.092e-01) * s0_5; + r += V4(-3.694e-02, -1.645e-01, 9.501e-03, -1.749e-02) * s0_6; + r += V4(-3.178e-02, 5.182e-02, 1.843e-02, 1.107e-01) * s0_7; + r += V4(-4.695e-03, 5.493e-02, 1.829e-02, -4.980e-02) * s0_8; + r += V4(5.208e-02, -8.290e-03, 2.168e-03, 1.841e-03); + return r; +} + +V4 f1(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(3.963e-04, -3.350e-01, -9.155e-02, -2.133e-02) * s0_0; + r += V4(1.792e-01, 3.877e-01, -1.398e-01, -8.170e-03) * s0_1; + r += V4(-1.861e-01, -6.714e-02, 7.203e-02, 3.912e-02) * s0_2; + r += V4(-4.146e-03, 1.344e-01, 1.741e-01, 7.374e-02) * s0_3; + r += V4(-2.301e-01, -1.333e-01, -2.999e-01, 8.431e-02) * s0_4; + r += V4(2.309e-01, 1.260e-02, -8.470e-02, -8.617e-02) * s0_5; + r += V4(-4.725e-03, 1.410e-02, 1.814e-01, -8.658e-02) * s0_6; + r += V4(3.358e-02, -1.292e-02, 2.253e-01, 1.051e-01) * s0_7; + r += V4(-2.100e-02, -3.521e-03, -3.845e-02, -2.268e-02) * s0_8; + r += V4(-3.218e-03, 2.366e-03, -5.615e-03, -1.653e-02); + return r; +} + +V4 f2(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(1.224e-01, 3.559e-03, 8.155e-03, -7.150e-02) * s0_0; + r += V4(-3.380e-02, 2.165e-01, 2.783e-01, -1.733e-01) * s0_1; + r += V4(2.199e-03, 8.188e-02, 5.409e-02, 5.578e-02) * s0_2; + r += V4(-5.406e-02, -1.757e-02, 1.233e-01, -2.783e-01) * s0_3; + r += V4(-2.804e-02, -2.474e-01, -4.381e-01, 2.157e-01) * s0_4; + r += V4(-9.010e-04, -5.054e-02, -4.162e-02, 6.518e-02) * s0_5; + r += V4(1.605e-02, -3.841e-03, -8.540e-03, 1.233e-01) * s0_6; + r += V4(5.971e-02, 5.600e-02, -6.272e-03, 8.912e-02) * s0_7; + r += V4(6.856e-03, -2.717e-02, 2.613e-02, -2.536e-02) * s0_8; + r += V4(1.307e-03, 1.096e-02, -1.139e-02, 4.148e-05); + return r; +} + +V4 f3(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-4.770e-03, 2.395e-02, 5.034e-02, -9.936e-03) * s0_0; + r += V4(-1.666e-02, -1.328e-02, 1.166e-01, 2.045e-02) * s0_1; + r += V4(-2.423e-02, -7.371e-03, -1.666e-01, 2.468e-02) * s0_2; + r += V4(-2.045e-02, 1.276e-02, 5.554e-02, -1.668e-02) * s0_3; + r += V4(4.601e-01, -2.658e-01, 5.586e-02, -2.036e-01) * s0_4; + r += V4(6.315e-02, 1.528e-01, -1.345e-01, 1.167e-01) * s0_5; + r += V4(-2.105e-03, -5.846e-02, -7.890e-03, 1.110e-02) * s0_6; + r += V4(-2.450e-02, -8.582e-02, 2.247e-02, 1.425e-01) * s0_7; + r += V4(-2.825e-02, 2.409e-01, 8.078e-03, -7.072e-03) * s0_8; + r += V4(-9.510e-02, 6.547e-03, 7.733e-03, 2.232e-02); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.485e-02, -7.793e-02, -1.391e-01, 1.353e-01, 1.305e-01, 2.742e-01, -1.513e-01, -7.074e-02, -2.830e-01, -1.284e-02, 7.441e-02, -2.870e-02, 3.071e-03, -2.819e-03, -7.844e-02, 6.126e-02)); + r += mul(s0_1, M4(2.001e-01, -3.732e-02, -5.838e-02, 1.521e-01, 2.003e-01, -2.565e-01, -2.830e-01, -8.331e-03, 1.059e-01, 1.106e-01, -9.151e-02, 1.678e-01, -6.968e-02, 1.177e-01, 7.009e-02, -2.027e-01)); + r += mul(s0_2, M4(2.422e-02, -7.541e-02, -1.327e-01, 1.145e-01, 2.656e-02, 7.576e-02, -1.218e-01, -2.693e-02, 4.026e-02, -2.779e-02, 2.237e-02, 7.016e-03, -1.143e-01, 1.814e-01, 1.175e-01, 2.384e-02)); + r += mul(s0_3, M4(2.299e-01, -1.172e-02, -1.626e-01, 2.057e-02, 1.941e-01, -2.038e-01, 1.522e-01, -1.223e-01, 4.546e-01, -1.078e-01, -7.791e-02, 9.659e-02, -1.704e-01, -4.998e-02, 4.930e-02, 5.194e-02)); + r += mul(s0_4, M4(5.608e-01, 4.285e-02, -2.509e-02, -1.151e-01, 5.909e-02, 4.300e-02, 1.090e-01, -1.141e-01, 3.319e-02, -5.049e-01, 1.955e-01, 4.758e-02, 2.172e-01, 1.379e-01, -6.424e-02, 5.035e-02)); + r += mul(s0_5, M4(7.594e-01, -6.796e-02, 1.366e-01, -1.236e-01, 2.804e-01, -4.411e-03, 2.242e-01, -6.970e-02, -9.327e-02, -2.049e-01, 1.850e-02, -6.979e-02, 6.384e-02, 5.082e-02, 1.190e-02, -1.455e-02)); + r += mul(s0_6, M4(-3.605e-02, -3.102e-02, 8.660e-02, -1.093e-01, 1.433e-01, 3.220e-03, 8.072e-02, -2.325e-02, -4.192e-01, -1.610e-01, -7.125e-02, -4.631e-02, -5.018e-02, 2.527e-02, 8.482e-02, -8.232e-02)); + r += mul(s0_7, M4(9.651e-02, -3.884e-02, 1.191e-01, -1.640e-01, -6.699e-02, -1.876e-02, 3.375e-01, -5.633e-02, -1.872e-01, 2.687e-01, 1.714e-01, -1.203e-01, -4.437e-02, 4.755e-02, -3.764e-02, -1.596e-01)); + r += mul(s0_8, M4(3.349e-02, -1.671e-03, 1.049e-01, -5.840e-03, -1.073e-01, -7.845e-02, 2.098e-01, -6.893e-02, -1.870e-01, 6.613e-02, 6.280e-02, -6.822e-02, -1.402e-01, -2.053e-02, -1.265e-02, -1.554e-02)); + r += mul(s1_0, M4(-2.111e-01, -9.747e-02, -1.930e-01, 9.786e-02, 1.359e-01, -4.889e-02, 6.019e-02, 1.145e-02, 5.121e-02, 2.447e-02, -5.930e-02, 2.061e-01, 8.178e-02, -1.531e-02, -5.044e-03, 1.083e-01)); + r += mul(s1_1, M4(-3.074e-03, -8.969e-02, 1.186e-01, -1.392e-03, -1.821e-01, 2.694e-02, 5.397e-02, 1.006e-01, -9.086e-02, -1.056e-01, -1.693e-01, 4.910e-02, -1.256e-01, -8.187e-02, -8.469e-02, -6.965e-02)); + r += mul(s1_2, M4(-4.244e-01, -2.091e-01, 2.887e-02, 2.439e-01, -1.329e-01, -7.817e-03, 9.411e-03, 1.358e-01, 1.921e-02, 5.657e-02, 6.304e-02, 4.841e-02, 6.155e-02, 8.668e-02, -6.354e-02, 1.539e-01)); + r += mul(s1_3, M4(1.953e-01, 3.159e-02, -3.004e-01, -1.409e-02, -9.977e-02, -2.672e-03, -8.568e-03, -5.583e-02, -3.060e-01, 1.161e-01, -7.316e-02, 2.468e-02, -9.539e-02, 1.675e-02, -1.193e-01, -9.390e-02)); + r += mul(s1_4, M4(-2.435e-01, 1.206e-02, -3.330e-01, 2.784e-01, -1.101e-01, -1.052e-01, -1.901e-01, -1.526e-01, -5.335e-02, 2.873e-01, -4.360e-02, -3.828e-02, -7.663e-02, -1.771e-01, 7.653e-02, 1.080e-01)); + r += mul(s1_5, M4(-4.594e-01, -1.617e-01, 1.607e-02, 2.628e-01, 5.469e-02, 4.403e-02, 1.393e-01, 8.063e-02, 9.056e-02, -1.311e-01, 6.889e-02, -1.527e-02, -1.205e-02, -5.024e-02, 3.551e-02, -1.588e-01)); + r += mul(s1_6, M4(-8.833e-02, -2.610e-01, 1.757e-01, -1.550e-01, 4.564e-02, 9.005e-02, -2.060e-01, 1.243e-01, 2.637e-01, 2.292e-01, -4.701e-02, -9.572e-02, 2.924e-01, 1.623e-01, 5.523e-02, 1.126e-02)); + r += mul(s1_7, M4(3.183e-01, -8.466e-02, 4.289e-01, -1.178e-01, 4.748e-02, -2.097e-01, 1.554e-01, -8.571e-02, -5.126e-02, -1.034e-01, 1.339e-02, -1.571e-01, 4.438e-02, -4.967e-02, 1.716e-02, 1.383e-02)); + r += mul(s1_8, M4(-3.187e-01, 2.494e-01, 2.216e-02, -6.215e-02, 2.288e-01, 2.446e-01, 9.322e-02, -3.364e-02, 5.613e-02, -6.874e-02, 9.164e-02, 7.186e-03, -9.362e-03, -5.247e-03, 3.488e-02, 2.015e-02)); + r += mul(s2_0, M4(8.030e-02, -1.244e-02, 7.993e-02, 2.598e-01, -2.316e-02, 5.534e-02, 5.730e-02, -5.297e-02, -1.069e-01, 7.793e-02, 2.488e-01, 1.935e-02, 2.498e-01, 1.815e-01, -1.495e-01, -7.393e-02)); + r += mul(s2_1, M4(-8.116e-02, 4.036e-02, 1.196e-01, -1.272e-01, 1.161e-01, 1.581e-01, -8.973e-02, 8.201e-02, -2.048e-01, 1.527e-01, 4.581e-01, -3.365e-01, 3.989e-01, 4.902e-01, -1.368e-01, 1.206e-01)); + r += mul(s2_2, M4(9.207e-02, 3.834e-01, 1.062e-01, -1.624e-01, 4.958e-02, -7.315e-02, 1.455e-02, 7.133e-04, -1.119e-01, 5.565e-02, 1.329e-01, -2.086e-01, -2.720e-01, 2.259e-02, -3.638e-02, 2.410e-01)); + r += mul(s2_3, M4(-2.493e-02, -2.591e-01, -2.875e-02, -1.175e-01, 1.286e-01, -1.532e-01, 9.002e-02, -1.420e-01, 6.273e-02, -1.023e-01, 7.325e-02, 1.587e-01, 1.253e-01, -6.595e-01, 6.359e-02, 1.891e-01)); + r += mul(s2_4, M4(1.595e-01, -1.435e-01, -1.631e-01, -7.877e-02, 1.582e-01, 1.051e-01, -7.296e-02, 2.245e-01, -1.422e-01, -4.053e-01, 1.393e-01, -2.186e-03, 5.780e-02, -6.208e-02, -1.152e-01, -4.898e-01)); + r += mul(s2_5, M4(5.204e-02, 5.165e-02, 9.230e-03, 2.811e-02, -3.048e-02, -9.137e-02, -7.371e-02, -1.152e-01, -6.553e-02, -1.954e-01, -6.013e-01, 3.103e-02, -2.552e-01, -5.532e-01, 4.900e-04, 7.283e-02)); + r += mul(s2_6, M4(-6.250e-02, -8.190e-02, -2.849e-01, -2.471e-02, 1.289e-01, -3.091e-01, 1.231e-01, 6.081e-02, 3.088e-02, 1.494e-01, -4.576e-02, -4.224e-02, -1.199e-01, 2.171e-01, 5.012e-02, -1.173e-01)); + r += mul(s2_7, M4(-5.584e-02, -1.592e-02, 5.392e-02, 2.378e-02, -2.354e-02, 1.150e-01, -3.141e-02, -1.676e-01, 7.981e-02, -2.169e-02, 1.075e-01, -5.479e-03, 2.861e-01, -2.411e-01, 2.733e-01, -2.672e-01)); + r += mul(s2_8, M4(-4.809e-02, -2.214e-01, -3.851e-02, -1.119e-01, -1.870e-01, -4.463e-02, -1.437e-02, 1.404e-02, -6.566e-03, -2.549e-01, -8.061e-02, -2.557e-02, -2.421e-01, 4.801e-01, -1.782e-01, 1.192e-01)); + r += mul(s3_0, M4(1.082e-01, -9.349e-02, -4.882e-03, -9.671e-02, -5.846e-02, 5.447e-02, -4.925e-02, 6.508e-02, -7.234e-03, 2.136e-01, 1.191e-01, -1.146e-01, -2.104e-02, 5.780e-02, -4.717e-02, -5.266e-02)); + r += mul(s3_1, M4(8.722e-02, -4.961e-02, -4.340e-02, -1.314e-01, 2.672e-02, -1.840e-02, -4.682e-02, 2.123e-01, 7.567e-04, 1.599e-01, 3.556e-01, -6.521e-01, -8.441e-02, -9.187e-02, -9.814e-02, 1.154e-01)); + r += mul(s3_2, M4(-1.778e-01, -7.969e-02, 6.154e-02, 3.048e-02, -3.972e-02, -1.056e-01, 6.651e-02, 1.559e-01, -3.278e-03, 9.446e-02, 6.509e-02, -1.627e-01, 8.409e-02, 5.832e-02, 1.216e-01, 8.892e-02)); + r += mul(s3_3, M4(-1.267e-02, 9.277e-02, 1.067e-01, -2.599e-02, -8.945e-02, 9.880e-02, -2.422e-02, -9.366e-03, 1.762e-01, -3.082e-02, -8.064e-02, 1.689e-01, -5.425e-02, -3.865e-03, 1.076e-02, -3.944e-02)); + r += mul(s3_4, M4(-3.883e-02, 8.021e-02, 6.377e-02, 3.235e-01, -1.697e-01, 3.375e-02, 7.622e-03, 3.170e-01, 8.253e-02, 4.422e-02, 8.716e-03, 9.710e-02, -1.294e-01, -8.479e-02, -2.092e-01, 9.940e-03)); + r += mul(s3_5, M4(-4.877e-02, -1.744e-01, -9.427e-02, 1.363e-01, -6.597e-02, -4.598e-02, 6.530e-02, -1.739e-01, -7.951e-02, 5.267e-01, -3.922e-01, -5.767e-02, 1.514e-01, 1.597e-01, 1.178e-01, 1.291e-02)); + r += mul(s3_6, M4(1.090e-01, 8.036e-02, 7.251e-02, -1.705e-01, 7.163e-02, 7.327e-02, 2.597e-02, 1.043e-01, 9.985e-02, -5.557e-02, -5.285e-02, -3.540e-02, -2.397e-02, 5.184e-02, -7.039e-02, 1.035e-03)); + r += mul(s3_7, M4(-7.628e-02, 8.150e-03, -1.005e-01, -1.243e-01, -2.891e-01, 1.770e-01, 7.126e-02, -1.134e-01, -7.591e-02, 4.806e-02, 9.831e-03, 1.433e-01, 1.231e-01, -1.298e-01, 1.780e-01, -1.115e-01)); + r += mul(s3_8, M4(1.036e-01, 1.694e-01, -5.191e-02, 1.982e-02, -1.932e-01, 5.030e-02, 1.057e-01, -3.385e-02, 2.534e-02, -2.275e-01, -9.040e-02, 2.515e-02, -1.305e-01, -2.527e-02, 5.679e-02, 5.587e-02)); + r += mul(s4_0, M4(8.688e-02, 2.072e-01, -8.062e-02, 9.266e-02, 6.784e-02, 2.153e-02, 6.418e-02, -2.933e-02, -9.443e-02, -4.865e-02, 1.375e-01, -1.764e-01, -2.457e-02, 9.365e-02, -7.196e-02, -6.391e-02)); + r += mul(s4_1, M4(2.333e-01, 3.864e-01, -1.380e-01, 9.062e-02, 1.107e-03, 1.837e-01, -5.382e-02, -4.475e-02, -3.119e-02, 1.173e-01, 1.274e-01, -1.866e-02, 1.037e-01, 2.066e-01, -7.300e-02, -1.161e-01)); + r += mul(s4_2, M4(-1.456e-01, 2.448e-01, 1.572e-01, 1.278e-01, -1.013e-02, -5.934e-02, -2.116e-02, -2.452e-02, -1.327e-01, 2.185e-02, -1.993e-02, -3.170e-02, -8.520e-02, 1.591e-01, 3.506e-02, -9.729e-02)); + r += mul(s4_3, M4(2.335e-01, -2.123e-01, 2.018e-02, -1.073e-02, -9.702e-02, -5.556e-02, 8.472e-02, 9.629e-02, 1.970e-01, -5.520e-02, 4.945e-02, 1.202e-01, 4.341e-02, 1.853e-01, -1.267e-01, 2.928e-02)); + r += mul(s4_4, M4(2.631e-01, -6.454e-02, -1.787e-01, 2.898e-02, 1.660e-01, -7.202e-03, -9.751e-02, 5.543e-01, 5.921e-02, 3.227e-01, 1.145e-01, 6.503e-01, 1.196e-01, -1.562e-03, 9.743e-02, 3.687e-02)); + r += mul(s4_5, M4(1.116e-01, 1.249e-01, -1.325e-01, 1.028e-01, -8.355e-03, 1.822e-01, 1.045e-01, 1.230e-01, -2.942e-01, -3.597e-02, -2.454e-01, 6.629e-02, 1.839e-02, 3.743e-01, 5.781e-02, 5.934e-02)); + r += mul(s4_6, M4(-3.997e-02, 1.346e-01, 9.830e-02, -2.274e-02, -1.619e-01, 1.750e-01, -6.734e-02, -1.691e-02, -1.965e-01, -7.976e-02, 6.646e-02, 8.609e-02, -2.992e-02, 6.498e-02, 7.941e-03, 9.300e-02)); + r += mul(s4_7, M4(1.906e-01, 6.598e-02, 1.268e-03, -6.979e-02, -2.120e-01, -1.267e-01, -1.558e-01, -1.895e-01, -1.123e-01, -2.401e-02, -2.582e-01, 1.998e-01, 3.098e-03, 1.005e-01, 1.134e-01, 1.317e-02)); + r += mul(s4_8, M4(2.842e-02, -9.225e-02, -1.068e-01, -2.368e-01, 8.938e-02, -9.841e-02, -4.640e-02, -2.911e-02, -2.735e-02, 4.138e-02, -3.043e-01, -3.354e-02, -1.169e-01, 6.541e-02, 7.405e-02, -5.760e-02)); + r += mul(s5_0, M4(-2.697e-02, -8.209e-02, -6.014e-02, 8.016e-02, -5.242e-02, 6.674e-02, -9.365e-02, -3.331e-02, 1.423e-01, 1.208e-01, 2.431e-01, -1.652e-01, -3.686e-02, -1.841e-01, -4.516e-02, -6.155e-02)); + r += mul(s5_1, M4(-4.963e-02, -2.090e-01, 8.904e-02, 1.540e-02, 6.672e-03, -8.529e-02, 2.023e-01, -3.174e-01, -8.613e-02, 8.896e-02, 1.635e-01, 2.409e-02, -8.966e-02, 1.215e-01, -1.935e-02, -2.705e-02)); + r += mul(s5_2, M4(6.732e-02, -1.502e-02, -9.210e-02, -1.770e-01, 1.581e-02, 4.328e-02, -9.155e-02, 9.680e-02, 3.393e-02, -1.307e-01, -6.939e-02, -6.385e-02, -9.215e-03, -1.056e-01, 1.467e-01, -1.616e-01)); + r += mul(s5_3, M4(-7.139e-02, 1.397e-01, -1.181e-01, -3.929e-02, 3.273e-01, 1.138e-01, 2.840e-01, -3.111e-01, 8.805e-02, -1.499e-02, 2.261e-01, -1.184e-01, -1.775e-01, -2.000e-01, -1.358e-01, 5.634e-03)); + r += mul(s5_4, M4(-1.534e-01, -1.025e-01, 1.112e-01, 2.105e-02, 5.764e-02, -8.380e-02, 1.161e-01, -4.252e-01, -4.067e-02, -1.210e-01, -1.053e-01, 2.702e-01, -1.395e-02, -1.419e-01, 1.070e-01, -1.636e-01)); + r += mul(s5_5, M4(-3.549e-02, 7.727e-02, 1.248e-02, 5.666e-02, -1.519e-01, 5.273e-02, -1.705e-01, -1.669e-01, 1.945e-01, 1.301e-01, -2.281e-01, -1.808e-01, -6.349e-02, -3.592e-01, 2.363e-01, 3.038e-02)); + r += mul(s5_6, M4(-1.288e-02, -1.269e-01, -3.312e-02, -6.917e-02, -1.319e-01, -1.544e-02, 1.783e-01, 6.150e-02, -2.866e-02, 6.708e-02, -7.747e-02, -4.994e-02, 1.626e-01, 7.398e-04, 8.980e-02, 3.971e-02)); + r += mul(s5_7, M4(-9.159e-02, -5.895e-04, 8.511e-02, -3.319e-02, 7.901e-02, -7.770e-02, -4.426e-02, -2.112e-01, -1.948e-01, 2.900e-01, -2.900e-01, -6.018e-03, 1.328e-01, -2.664e-01, 1.343e-01, -7.560e-02)); + r += mul(s5_8, M4(1.040e-01, 5.183e-02, 1.105e-01, 8.969e-02, -8.304e-02, -1.443e-01, -2.251e-01, -7.725e-02, -9.840e-02, -2.817e-01, -2.504e-01, -3.580e-02, 3.837e-03, 1.300e-01, 1.364e-01, -3.900e-02)); + r += mul(s6_0, M4(5.644e-01, 2.238e-02, -3.205e-01, 2.862e-01, -4.545e-02, 9.792e-03, 2.880e-01, -2.262e-01, -7.295e-02, -6.523e-02, -3.954e-02, -3.450e-02, 8.041e-02, 6.435e-02, -1.465e-01, 6.761e-02)); + r += mul(s6_1, M4(1.684e-01, 6.494e-02, -3.871e-02, 4.333e-01, -1.253e-01, -1.290e-02, -3.312e-02, 1.780e-02, -7.196e-02, 1.142e-01, 1.516e-01, -9.619e-02, 3.103e-02, -1.658e-01, 7.592e-02, -1.505e-01)); + r += mul(s6_2, M4(1.490e-01, 1.816e-01, -2.416e-01, 2.591e-01, -3.719e-02, -1.706e-01, 7.797e-02, -3.245e-02, -5.190e-03, 2.357e-02, -3.275e-02, -6.354e-02, 1.407e-01, 6.104e-02, -3.129e-02, 8.271e-02)); + r += mul(s6_3, M4(7.564e-01, 3.245e-01, -3.065e-01, -1.192e-01, 5.567e-02, -8.577e-02, -8.148e-02, 4.286e-02, -8.816e-02, -4.774e-02, -1.396e-01, -1.188e-01, 7.332e-03, 1.308e-02, -1.512e-02, -3.359e-02)); + r += mul(s6_4, M4(-5.122e-01, 4.765e-01, 2.806e-01, -3.779e-01, 3.113e-03, 1.925e-01, 1.279e-01, -5.210e-02, 2.445e-02, -6.378e-03, 5.360e-02, 1.194e-01, 6.073e-02, -8.251e-02, 1.349e-02, 1.866e-01)); + r += mul(s6_5, M4(-6.574e-01, -5.307e-01, 4.246e-01, -1.077e-01, 1.071e-01, 3.634e-01, -9.335e-02, 5.315e-03, -2.182e-01, 1.369e-01, -2.047e-01, -3.629e-02, 6.883e-03, -1.242e-02, 8.672e-02, 3.954e-03)); + r += mul(s6_6, M4(-1.734e-01, -2.474e-01, -9.461e-02, -2.095e-01, 4.038e-02, -1.877e-02, -1.431e-01, 6.783e-03, 2.776e-01, 5.036e-02, -1.359e-01, 1.062e-01, -3.608e-04, 2.903e-02, 5.130e-02, -3.758e-02)); + r += mul(s6_7, M4(-1.645e-01, -5.196e-01, 3.487e-02, 1.290e-01, 1.361e-01, 1.889e-02, -1.576e-01, 4.734e-02, 3.265e-02, -1.890e-01, 1.266e-01, 5.530e-02, -9.453e-02, 1.284e-02, -4.585e-02, -1.094e-01)); + r += mul(s6_8, M4(-8.783e-02, 1.852e-01, 7.476e-02, -1.801e-01, -5.164e-02, 8.656e-03, -2.029e-02, -2.596e-02, 2.435e-02, -9.673e-03, 3.363e-02, 4.865e-03, 1.859e-02, 5.536e-02, 1.141e-01, -1.484e-02)); + r += mul(s7_0, M4(-6.820e-02, -2.884e-02, -1.303e-01, 1.882e-01, 2.328e-01, 1.377e-01, 5.239e-02, -4.071e-02, -5.681e-02, -2.048e-01, -1.447e-01, 5.849e-02, -8.528e-02, -1.719e-01, -6.476e-02, -8.679e-02)); + r += mul(s7_1, M4(-5.278e-02, -1.000e-02, -1.634e-02, 2.524e-01, 2.194e-01, 2.207e-02, 1.995e-01, -1.895e-01, 6.924e-02, 2.295e-02, -6.099e-03, 1.654e-01, 3.084e-02, 4.470e-01, 1.683e-01, -4.328e-01)); + r += mul(s7_2, M4(-5.793e-02, 1.309e-01, 2.056e-02, 6.027e-02, -1.388e-01, 7.674e-02, -1.620e-01, 2.527e-03, 3.178e-02, 5.334e-02, -6.098e-02, -6.434e-02, 8.816e-02, 1.462e-01, -8.037e-02, -5.590e-02)); + r += mul(s7_3, M4(-2.568e-01, 1.857e-02, -1.087e-01, -8.671e-02, 1.852e-01, -4.477e-02, -8.057e-02, 1.461e-01, 1.091e-01, -1.958e-01, -6.820e-02, 1.089e-01, -2.141e-01, 1.391e-01, 1.000e-01, 1.991e-01)); + r += mul(s7_4, M4(-1.097e-01, -5.762e-02, -1.018e-01, -8.032e-02, 4.852e-02, -2.752e-01, -1.481e-01, 1.985e-01, -4.663e-03, 3.380e-01, 7.200e-02, 9.291e-03, 4.142e-01, 4.841e-03, 1.571e-01, 3.154e-01)); + r += mul(s7_5, M4(5.582e-03, -7.928e-02, 1.440e-01, -9.364e-02, -8.041e-02, -1.663e-01, -2.057e-01, 1.327e-01, -5.687e-02, 9.180e-02, -1.950e-01, 1.874e-01, -2.831e-02, -1.973e-01, -1.410e-01, 1.005e-01)); + r += mul(s7_6, M4(8.888e-02, -2.330e-02, 7.020e-02, -1.240e-01, -6.910e-02, 2.411e-02, 3.791e-03, 1.450e-01, -3.441e-01, -4.260e-01, 5.536e-02, -4.727e-02, -3.908e-01, 1.535e-01, -6.593e-02, 2.619e-02)); + r += mul(s7_7, M4(-9.912e-02, 8.917e-02, 1.139e-01, -3.587e-02, -1.437e-03, -1.247e-01, -7.667e-03, 1.110e-01, -1.200e-01, 9.394e-02, 2.830e-01, -1.033e-01, 1.121e-01, 2.775e-01, -1.737e-01, -7.406e-02)); + r += mul(s7_8, M4(-4.771e-02, 2.535e-02, -2.644e-03, -8.139e-02, -3.943e-02, -7.504e-02, -1.410e-01, -8.238e-03, 5.641e-02, 3.758e-02, 3.588e-01, -4.553e-02, -1.887e-01, -5.847e-01, 1.438e-01, -1.075e-02)); + r += V4(-1.968e-01, 2.644e-02, 3.143e-03, -1.147e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.665e-01, 1.341e-01, -3.784e-02, -7.759e-02, -7.067e-02, -1.354e-02, -3.162e-01, 9.772e-02, -7.751e-03, 1.089e-01, -7.867e-02, 7.391e-02, 1.113e-01, 2.431e-02, -1.126e-02, -1.944e-02)); + r += mul(s0_1, M4(-7.397e-02, 1.614e-01, -9.000e-02, 6.037e-02, -2.921e-01, 2.061e-01, 5.314e-04, 2.018e-01, -9.617e-02, -1.867e-02, -2.428e-02, 1.273e-01, -1.373e-01, 2.826e-02, -6.593e-02, -2.146e-01)); + r += mul(s0_2, M4(3.230e-02, 1.260e-01, 1.749e-01, -2.806e-02, -3.382e-01, 1.529e-01, -7.863e-02, -1.249e-01, 1.769e-03, 9.832e-02, 1.502e-01, 1.163e-01, 7.971e-02, -4.328e-02, -9.819e-02, -3.709e-02)); + r += mul(s0_3, M4(-7.267e-02, 9.105e-03, -2.213e-01, -7.500e-02, -9.302e-02, 2.785e-02, 1.968e-01, -7.105e-02, -6.686e-02, 3.332e-02, 7.118e-02, -1.031e-01, 2.227e-02, -1.745e-01, -5.851e-02, -4.692e-02)); + r += mul(s0_4, M4(-4.300e-02, 5.561e-03, -1.691e-01, 1.734e-02, -2.447e-01, 1.481e-01, 4.838e-02, -2.720e-01, 3.631e-02, -1.309e-01, 4.375e-02, 3.994e-01, -6.899e-02, 1.035e-01, 1.422e-01, 6.815e-02)); + r += mul(s0_5, M4(-5.045e-03, 9.328e-03, 6.229e-02, -2.625e-01, 2.517e-01, 1.145e-01, 1.115e-02, 6.815e-02, 5.933e-02, 3.550e-02, 1.827e-01, -2.651e-01, 5.388e-02, -1.054e-01, -5.995e-02, 3.073e-01)); + r += mul(s0_6, M4(1.986e-02, -5.852e-02, -6.029e-02, 6.415e-02, -1.902e-01, 9.542e-02, 6.488e-02, 9.892e-02, -1.327e-02, -2.054e-02, -2.209e-01, -1.727e-01, -7.929e-02, 8.246e-02, -2.585e-02, -1.415e-02)); + r += mul(s0_7, M4(1.705e-01, -1.150e-01, 5.452e-02, 1.132e-01, -2.629e-01, 1.903e-03, -1.429e-01, 5.953e-02, 2.621e-02, -1.295e-01, 6.998e-02, 1.393e-01, 1.141e-02, -1.841e-02, 4.103e-02, -7.428e-02)); + r += mul(s0_8, M4(8.787e-02, -1.161e-01, 4.994e-02, 8.147e-02, 1.031e-01, 2.212e-02, 7.269e-02, 1.561e-01, 6.051e-02, 3.214e-02, -9.364e-02, -4.011e-01, -1.217e-01, 6.956e-02, 1.112e-01, 1.290e-01)); + r += mul(s1_0, M4(6.202e-02, 4.040e-02, -2.698e-01, -1.926e-01, -3.636e-02, 1.379e-02, 8.940e-02, -3.803e-02, -1.049e-02, -5.670e-02, -3.738e-02, -1.053e-01, 1.952e-03, -4.216e-02, 1.101e-01, -1.522e-01)); + r += mul(s1_1, M4(-9.631e-02, -8.827e-03, -3.405e-01, -5.959e-02, -2.423e-01, 2.353e-01, 9.648e-03, -5.868e-02, 5.729e-02, 7.017e-03, -4.890e-02, -1.569e-01, 1.440e-01, -4.340e-02, -2.159e-02, 8.336e-02)); + r += mul(s1_2, M4(2.106e-01, -6.010e-03, 1.479e-01, -3.243e-02, 1.049e-01, -2.281e-01, 2.060e-01, -4.001e-04, 1.115e-01, 2.111e-02, -8.208e-03, -4.814e-02, 4.377e-02, -7.831e-02, 6.725e-02, 1.976e-01)); + r += mul(s1_3, M4(-2.004e-01, -6.799e-02, -5.068e-02, 2.889e-01, 7.638e-02, -1.213e-01, -1.691e-01, 6.234e-03, -3.056e-02, -4.525e-02, -2.898e-01, 1.399e-01, -3.600e-01, 1.727e-01, -1.132e-02, 9.502e-02)); + r += mul(s1_4, M4(-5.388e-01, -7.512e-03, -8.975e-02, 8.544e-02, 1.271e-01, -7.689e-02, -1.746e-02, 1.128e-01, -4.000e-02, 2.901e-02, -1.270e-01, -2.532e-01, 1.449e-01, 6.640e-02, -2.687e-01, 8.076e-02)); + r += mul(s1_5, M4(8.166e-01, -1.339e-01, 6.942e-01, 9.886e-01, -4.053e-02, 1.141e-01, -1.294e-02, 6.103e-02, -7.290e-03, 5.853e-02, 1.198e-01, 4.576e-02, -2.898e-02, 4.735e-02, 4.603e-02, -3.222e-01)); + r += mul(s1_6, M4(-2.741e-01, -3.591e-02, 1.510e-01, 4.335e-02, 7.137e-02, 1.127e-02, 7.848e-02, 3.701e-02, -1.164e-01, 6.880e-02, 6.543e-03, 9.484e-02, 2.741e-01, 2.886e-02, 4.433e-02, 1.999e-01)); + r += mul(s1_7, M4(-1.289e-01, -5.624e-02, 6.800e-02, -1.822e-01, 8.965e-02, 2.975e-02, -1.535e-01, -1.449e-01, -5.375e-02, -2.592e-02, 1.103e-01, 6.022e-02, 1.662e-01, -1.462e-01, 1.722e-02, -3.086e-02)); + r += mul(s1_8, M4(-1.834e-01, 7.416e-02, 4.816e-01, 4.575e-01, -4.881e-02, -5.429e-03, 4.699e-02, 7.298e-02, 1.488e-01, -6.241e-02, 6.913e-02, 1.607e-01, -1.697e-01, -1.273e-01, -2.675e-04, -1.878e-01)); + r += mul(s2_0, M4(3.750e-02, -3.793e-02, 3.057e-01, 2.131e-02, -1.262e-01, 1.202e-01, 2.102e-02, 5.652e-02, -4.985e-02, -2.134e-01, 1.306e-01, -4.159e-04, -4.256e-01, 2.828e-01, 1.423e-01, 3.310e-01)); + r += mul(s2_1, M4(9.804e-02, -1.908e-01, 4.843e-02, -9.660e-02, -2.456e-01, 2.842e-03, 1.447e-03, 9.591e-02, 4.590e-02, -5.576e-01, 3.068e-01, 4.523e-02, -2.539e-01, 1.984e-01, 6.054e-02, -2.267e-01)); + r += mul(s2_2, M4(-1.325e-02, -1.171e-01, -6.820e-02, 6.680e-02, -4.834e-02, 8.046e-02, -1.157e-02, 1.409e-01, -1.438e-01, -3.252e-01, -2.903e-01, 3.376e-02, -8.853e-02, 8.772e-02, 2.176e-01, 4.031e-01)); + r += mul(s2_3, M4(2.235e-01, -1.113e-01, 2.688e-01, 2.116e-01, -1.617e-01, -1.286e-02, 2.732e-01, 6.103e-02, -1.480e-01, 4.627e-02, -1.872e-02, 6.235e-02, 4.024e-01, 8.464e-02, -4.365e-01, 8.703e-02)); + r += mul(s2_4, M4(-1.671e-01, 1.626e-01, -5.575e-02, -2.082e-02, -1.876e-01, 3.607e-01, -1.139e-01, -4.762e-02, -1.988e-02, 1.190e-01, -2.995e-01, 1.082e-01, 3.695e-01, 1.920e-02, -2.015e-01, -6.103e-01)); + r += mul(s2_5, M4(-1.472e-01, 7.181e-02, 5.143e-02, 1.519e-01, 7.109e-02, -6.342e-02, 1.723e-01, 2.540e-01, -5.503e-01, -1.194e-01, -1.900e-01, -2.061e-01, 1.004e-01, -1.319e-01, 3.679e-01, 1.158e-01)); + r += mul(s2_6, M4(-2.509e-02, 5.266e-02, 3.585e-01, -1.288e-01, 1.018e-01, 8.921e-02, 1.694e-01, -1.240e-01, -1.423e-01, 3.175e-02, -4.048e-02, 5.739e-02, -1.162e-01, 1.158e-01, -1.085e-01, 1.682e-01)); + r += mul(s2_7, M4(2.379e-01, -3.928e-02, 1.503e-01, 8.918e-02, 2.459e-01, -1.859e-01, 4.131e-01, 5.902e-02, -2.605e-01, -8.128e-03, 3.047e-02, -1.869e-01, 5.074e-02, -1.430e-01, 1.626e-01, -1.676e-02)); + r += mul(s2_8, M4(-6.399e-02, 9.947e-02, 7.501e-02, -1.298e-01, 3.091e-01, -1.130e-01, 1.293e-01, -1.407e-01, -2.135e-01, 6.533e-02, 1.802e-02, -1.939e-01, 1.320e-01, -1.771e-01, -3.453e-02, 1.703e-01)); + r += mul(s3_0, M4(-1.142e-01, 1.466e-01, 7.215e-03, -4.026e-02, 4.854e-02, -2.248e-02, 4.894e-02, -3.407e-02, 1.147e-01, -1.795e-01, 6.157e-02, 5.692e-02, 3.947e-02, 3.094e-02, -6.908e-02, 9.956e-02)); + r += mul(s3_1, M4(-1.148e-02, 6.514e-02, 3.684e-03, -1.459e-02, 2.465e-02, -2.901e-02, -8.985e-02, -1.146e-01, 4.421e-01, -4.375e-01, 2.371e-01, -1.641e-01, -2.372e-02, -1.411e-02, 6.435e-02, 9.437e-02)); + r += mul(s3_2, M4(5.557e-02, -2.242e-01, -3.345e-02, -2.953e-02, -1.091e-02, 2.882e-03, -1.566e-01, -3.146e-02, 6.489e-02, -2.984e-01, -9.024e-04, -4.651e-02, -4.070e-04, 5.174e-02, -5.263e-02, -1.400e-01)); + r += mul(s3_3, M4(-1.358e-01, 5.689e-02, -1.724e-01, -4.284e-02, 1.669e-01, -1.199e-01, -3.149e-01, -1.468e-01, -5.475e-03, 1.038e-01, 2.023e-01, -2.987e-02, -8.255e-02, 7.491e-02, -6.692e-02, -8.698e-02)); + r += mul(s3_4, M4(2.665e-01, 1.084e-01, 3.464e-02, 1.097e-01, -1.104e-01, 2.399e-01, -3.444e-01, 1.153e-01, 1.271e-01, 1.372e-01, -2.740e-01, 1.911e-01, 3.881e-03, -7.096e-02, -9.862e-02, -1.155e-02)); + r += mul(s3_5, M4(-1.003e-02, -8.276e-02, 1.139e-01, -9.830e-02, 5.847e-02, -2.887e-01, -3.093e-01, -1.283e-01, -1.310e-01, 1.559e-02, -1.089e-01, -1.444e-01, -6.158e-02, -1.244e-01, -1.105e-01, 1.040e-01)); + r += mul(s3_6, M4(-4.486e-02, -8.327e-02, -2.001e-01, 1.306e-01, -2.507e-01, -1.655e-01, -3.430e-01, -5.603e-02, -1.088e-02, -1.598e-02, 5.772e-02, -2.714e-02, -6.248e-02, 2.533e-02, -3.380e-02, -4.265e-02)); + r += mul(s3_7, M4(-2.085e-01, 1.207e-01, -3.720e-02, 3.187e-02, 8.328e-02, -1.478e-01, -3.583e-01, -8.352e-02, 1.044e-01, -1.021e-02, 1.267e-01, -8.480e-02, 1.596e-01, 1.340e-02, 9.415e-03, -8.058e-02)); + r += mul(s3_8, M4(9.541e-02, 2.103e-02, -3.731e-03, 2.217e-02, -2.704e-01, -6.955e-02, -2.359e-01, -3.753e-01, 5.169e-02, 3.572e-02, 2.793e-02, -8.367e-02, 2.345e-02, -2.282e-02, 3.577e-02, -1.130e-01)); + r += mul(s4_0, M4(-1.707e-01, 3.997e-02, -1.141e-01, 1.112e-01, 1.392e-02, -1.528e-01, 6.816e-02, 1.333e-02, 1.177e-03, -5.273e-02, 1.844e-02, 1.732e-02, 1.027e-01, 9.282e-02, -8.965e-02, -3.378e-02)); + r += mul(s4_1, M4(3.575e-02, -7.195e-02, -5.859e-02, 8.818e-02, -8.622e-02, -1.008e-01, 2.017e-02, -1.139e-02, 1.806e-02, -2.028e-01, 5.585e-02, 1.500e-01, 1.127e-01, -3.169e-02, 1.197e-01, 4.792e-02)); + r += mul(s4_2, M4(-2.604e-03, 1.051e-02, 1.412e-01, -5.863e-02, -4.217e-02, 3.996e-02, -9.017e-03, 2.003e-01, 3.660e-02, 4.378e-02, -4.152e-02, -1.036e-01, 1.732e-01, -3.565e-02, 7.231e-02, 1.261e-01)); + r += mul(s4_3, M4(2.970e-02, -4.589e-02, -1.678e-02, -1.266e-02, 1.554e-01, -8.091e-02, 1.432e-01, -2.513e-02, 6.188e-02, 3.822e-02, 1.232e-01, 8.549e-02, -7.653e-02, -1.183e-02, 5.302e-02, 1.202e-01)); + r += mul(s4_4, M4(2.794e-01, -4.722e-02, -3.501e-02, 3.923e-02, -3.495e-02, 2.474e-01, -1.880e-01, 4.487e-02, 4.384e-02, 2.064e-01, -3.922e-01, 1.410e-01, 1.671e-01, -3.333e-01, 2.001e-01, -1.453e-01)); + r += mul(s4_5, M4(8.181e-02, -1.581e-02, 1.478e-01, 1.558e-01, -6.474e-02, -1.127e-01, 4.454e-02, -1.335e-01, 1.221e-01, -2.658e-01, -2.024e-02, -2.517e-02, 7.345e-02, 5.997e-02, -1.399e-02, 1.402e-01)); + r += mul(s4_6, M4(-1.348e-01, -6.138e-02, -9.543e-03, -7.792e-03, -1.458e-02, -7.404e-03, -1.125e-01, 3.650e-02, 3.389e-02, 2.502e-02, 8.436e-02, 1.033e-02, 1.219e-01, -4.412e-02, -5.576e-02, -3.970e-03)); + r += mul(s4_7, M4(-3.417e-02, -1.380e-01, -1.627e-01, -1.374e-01, -1.803e-01, -4.939e-02, -7.251e-02, -2.915e-01, 1.441e-01, -1.316e-01, 2.778e-01, -8.583e-02, 3.703e-01, 6.378e-02, -1.088e-01, 1.179e-02)); + r += mul(s4_8, M4(-1.187e-02, 2.075e-01, -6.312e-02, -4.515e-01, 1.636e-01, 5.593e-03, -1.043e-02, 2.057e-02, -4.680e-01, -2.561e-02, -1.386e-01, -2.197e-01, 2.314e-01, -8.280e-02, -1.032e-01, -2.514e-01)); + r += mul(s5_0, M4(-9.275e-02, -6.584e-02, 8.687e-02, -1.240e-01, 1.077e-01, -3.266e-01, 3.930e-02, -1.755e-01, 1.417e-01, 9.351e-03, 6.679e-03, 4.820e-02, -2.202e-01, -1.487e-02, 2.234e-01, 5.928e-02)); + r += mul(s5_1, M4(4.244e-03, -8.613e-02, 7.380e-02, -5.880e-02, -1.364e-01, 1.651e-04, 1.180e-01, -4.176e-02, 2.033e-01, -1.228e-01, 3.606e-02, 1.587e-01, -2.911e-01, -1.252e-01, 2.768e-02, 2.469e-02)); + r += mul(s5_2, M4(-3.755e-02, 5.802e-02, 5.191e-02, -5.098e-02, 3.585e-01, -6.661e-02, 3.846e-02, -7.954e-02, -1.309e-01, 6.603e-02, 6.997e-02, 6.127e-03, -1.656e-01, -5.526e-02, -1.928e-01, 2.505e-01)); + r += mul(s5_3, M4(4.658e-02, -1.393e-02, -8.022e-03, -6.763e-02, 1.514e-02, -4.698e-02, 8.419e-02, -5.550e-02, 2.622e-01, 1.319e-01, 2.548e-01, 1.069e-02, -1.016e-01, -8.551e-03, 2.224e-01, 2.126e-02)); + r += mul(s5_4, M4(-7.520e-02, 4.847e-02, -5.405e-02, -3.974e-02, 5.266e-02, 2.176e-01, 9.205e-02, 1.811e-01, -3.110e-01, 5.955e-01, -2.327e-01, 9.697e-02, -9.345e-02, -1.508e-01, 1.331e-01, -1.117e-01)); + r += mul(s5_5, M4(-8.692e-03, 1.264e-01, -2.997e-02, -1.821e-01, 5.133e-02, 1.825e-01, 1.131e-01, 3.357e-02, -6.751e-02, 1.866e-02, -3.785e-02, -2.172e-01, -1.564e-01, 9.422e-02, -1.388e-01, 1.427e-01)); + r += mul(s5_6, M4(-1.623e-01, -3.759e-02, 6.117e-04, -2.766e-02, 1.206e-02, 5.389e-03, 3.726e-02, 5.531e-02, -9.318e-02, -3.451e-02, -1.199e-01, 6.547e-03, -1.887e-01, -5.438e-03, 1.026e-01, 4.290e-02)); + r += mul(s5_7, M4(-5.104e-02, -5.492e-03, 5.954e-02, 6.657e-02, 1.943e-01, 3.488e-02, 1.388e-01, 9.089e-02, -3.791e-01, -8.801e-02, 7.101e-03, -4.950e-02, 1.226e-01, 5.452e-02, -1.631e-01, -3.946e-03)); + r += mul(s5_8, M4(2.790e-01, -3.594e-02, 7.538e-03, 5.006e-01, -5.859e-02, 1.508e-01, 1.898e-02, -7.446e-02, 1.573e-01, -4.166e-02, -4.040e-02, -2.340e-01, -5.971e-02, -1.561e-01, 2.550e-02, 4.738e-01)); + r += mul(s6_0, M4(2.046e-01, -1.122e-04, 2.898e-01, 3.846e-01, 2.241e-01, -2.985e-02, -1.752e-01, -6.644e-02, -4.047e-02, 5.151e-02, 4.937e-02, -5.024e-02, -5.910e-02, -4.902e-02, -2.496e-02, -6.992e-03)); + r += mul(s6_1, M4(5.641e-02, 1.904e-01, 1.219e-01, 1.229e-01, 2.531e-01, -2.566e-01, 4.242e-02, 3.250e-02, 6.437e-02, -4.655e-02, 9.904e-02, -4.682e-02, -5.524e-02, -1.369e-01, 2.982e-02, -1.623e-01)); + r += mul(s6_2, M4(-2.628e-03, -2.270e-01, 1.512e-01, 3.369e-01, 1.355e-01, -6.808e-02, 3.351e-02, -1.966e-01, -5.281e-02, 4.194e-02, -9.085e-02, 1.204e-01, 1.040e-02, -4.112e-02, -3.086e-02, 8.178e-02)); + r += mul(s6_3, M4(1.006e-01, 2.417e-01, -8.287e-01, 4.077e-02, 3.227e-01, 7.162e-02, 2.490e-01, 1.280e-03, -2.438e-01, 8.278e-02, -1.428e-01, 6.916e-02, -6.346e-02, 9.989e-03, 1.231e-01, -1.119e-01)); + r += mul(s6_4, M4(-3.708e-01, -1.883e-01, 1.621e-01, -5.998e-01, -3.890e-02, 2.311e-01, 4.260e-02, 1.297e-01, -4.501e-02, -3.365e-02, -9.721e-02, -2.446e-01, -7.368e-02, 1.547e-01, -1.569e-01, 2.454e-01)); + r += mul(s6_5, M4(2.725e-01, 1.215e-01, 1.762e-01, 6.795e-01, -2.529e-01, -2.387e-02, -4.304e-02, -1.798e-01, -1.911e-01, -5.147e-02, -1.566e-01, -1.684e-02, 9.833e-02, 2.260e-02, 9.704e-02, -1.864e-01)); + r += mul(s6_6, M4(-5.371e-02, 7.565e-02, 2.204e-02, 7.461e-02, 1.265e-01, -8.627e-03, -3.540e-02, -1.881e-02, -2.202e-01, -6.486e-02, 2.303e-01, 5.538e-02, -4.212e-02, -4.130e-02, -5.136e-02, 6.881e-04)); + r += mul(s6_7, M4(3.574e-01, -3.207e-01, 3.057e-01, 2.824e-02, -1.310e-01, 4.847e-02, 1.734e-03, -1.464e-01, -8.705e-02, -1.006e-02, 6.186e-02, -7.267e-03, 8.074e-02, 7.032e-03, 3.469e-02, -8.714e-02)); + r += mul(s6_8, M4(-8.964e-02, -1.811e-02, 8.905e-02, 1.990e-01, 9.651e-03, 4.939e-02, -9.253e-02, -1.371e-01, 2.030e-01, -8.662e-02, 9.539e-02, 7.320e-02, 8.916e-02, -1.663e-04, 6.775e-02, 4.441e-02)); + r += mul(s7_0, M4(-1.150e-01, 1.302e-01, 2.498e-02, -1.342e-02, -1.705e-01, -9.536e-02, 1.426e-01, 8.821e-02, -3.133e-01, -1.155e-01, -2.864e-02, 3.672e-02, 1.816e-01, -1.225e-01, 2.499e-01, -2.514e-01)); + r += mul(s7_1, M4(-1.405e-01, 2.139e-01, -5.854e-02, 9.749e-02, -2.042e-01, -3.271e-01, -2.073e-01, -8.136e-02, 9.598e-03, -4.593e-02, 1.091e-01, -2.762e-02, -3.500e-01, -1.995e-01, 5.697e-02, -1.257e-01)); + r += mul(s7_2, M4(-3.587e-02, 8.100e-02, 2.962e-02, -7.049e-03, -9.572e-02, -1.407e-01, -9.330e-02, 3.043e-03, 1.304e-01, -1.072e-01, -1.646e-02, 4.262e-02, 1.474e-01, 3.215e-01, 1.387e-01, -1.788e-02)); + r += mul(s7_3, M4(-1.266e-01, -1.491e-02, -1.457e-01, 1.780e-03, -3.194e-01, -8.968e-02, 1.055e-01, 5.862e-02, 1.295e-01, -2.288e-02, -2.382e-01, -7.022e-02, 7.631e-02, -1.671e-01, -1.156e-01, 2.249e-01)); + r += mul(s7_4, M4(7.300e-02, -2.788e-01, 1.545e-01, 1.328e-02, -3.348e-02, 1.600e-01, -1.660e-01, 3.905e-01, 3.055e-01, 2.023e-01, -9.910e-02, 7.181e-02, 3.161e-01, 2.028e-01, 1.169e-02, 2.192e-02)); + r += mul(s7_5, M4(7.258e-02, -2.385e-02, 1.135e-01, 7.305e-02, 9.155e-02, -7.233e-02, -4.916e-03, -6.268e-02, 2.865e-01, -1.185e-01, 2.418e-01, 1.128e-01, 2.614e-01, 5.352e-03, 1.853e-01, 2.046e-01)); + r += mul(s7_6, M4(1.259e-01, 6.414e-02, -2.537e-02, 1.707e-02, -6.568e-02, -1.171e-01, 2.287e-01, 1.265e-01, 1.589e-01, -3.707e-02, -7.229e-02, -1.701e-01, -5.609e-02, -1.182e-01, -7.153e-02, 3.566e-01)); + r += mul(s7_7, M4(1.983e-01, -1.802e-01, -6.150e-02, 7.762e-02, -7.200e-02, -1.003e-01, 3.782e-02, 6.884e-02, 3.277e-01, -5.731e-02, 1.945e-01, 1.746e-01, -2.106e-01, -8.215e-02, -1.878e-01, 8.132e-02)); + r += mul(s7_8, M4(-2.486e-02, 9.845e-03, 1.111e-01, -6.140e-02, 8.605e-03, -6.981e-02, 2.888e-02, 1.531e-01, -1.288e-01, 8.149e-02, 1.744e-01, 2.852e-02, 1.347e-01, 9.315e-02, 5.734e-03, -1.109e-01)); + r += V4(-3.879e-03, 4.492e-03, 1.006e-02, 3.190e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(6.655e-03, 5.556e-02, -8.105e-02, 1.458e-01, -8.150e-02, -1.538e-02, -1.082e-01, -1.079e-02, -1.527e-01, -4.165e-02, 1.901e-01, 2.564e-02, -1.333e-01, 1.045e-01, 4.315e-03, 4.170e-03)); + r += mul(s0_1, M4(-2.539e-02, 4.535e-02, 1.207e-02, 9.199e-03, 2.607e-01, 1.892e-01, -4.678e-01, -1.581e-01, -5.594e-02, -7.519e-02, -1.405e-01, -2.092e-02, -4.527e-02, 9.793e-02, -1.439e-01, 1.186e-01)); + r += mul(s0_2, M4(1.802e-02, -2.676e-03, -2.571e-02, 4.971e-02, 2.588e-01, -8.633e-02, -1.160e-01, -2.965e-03, -7.715e-02, 9.330e-02, -1.310e-01, 7.265e-02, 1.781e-01, -4.536e-02, -1.389e-01, -8.484e-02)); + r += mul(s0_3, M4(2.170e-02, 2.383e-02, -1.583e-02, -5.823e-02, 1.480e-02, 8.420e-02, -8.576e-02, -3.458e-02, -6.119e-01, 1.272e-02, 1.903e-01, 1.106e-01, 1.466e-01, -2.758e-04, 5.097e-02, 5.463e-02)); + r += mul(s0_4, M4(-7.211e-02, 6.139e-02, 9.620e-02, -5.765e-03, -3.925e-01, -2.522e-01, -5.260e-02, 1.453e-01, 7.604e-02, -7.873e-03, 1.661e-02, -3.368e-02, 7.994e-03, -1.523e-01, -1.285e-01, -9.694e-02)); + r += mul(s0_5, M4(-2.075e-01, -5.935e-02, 2.857e-01, -8.070e-03, 4.043e-02, -2.901e-02, 5.117e-02, -1.444e-01, -1.882e-01, 3.192e-01, -6.324e-02, -5.315e-02, 3.058e-01, 4.921e-02, 8.788e-02, 1.490e-02)); + r += mul(s0_6, M4(3.021e-02, -1.996e-02, -3.307e-03, 6.202e-03, 9.278e-02, -2.484e-01, 2.414e-02, 9.733e-02, 8.946e-02, -2.138e-01, 1.501e-01, 1.242e-01, -1.088e-01, -8.307e-02, -5.870e-02, 2.474e-02)); + r += mul(s0_7, M4(-5.499e-02, -3.792e-03, -8.923e-02, 5.841e-02, -3.779e-02, -2.041e-01, 7.140e-02, -4.398e-02, 2.706e-02, -7.244e-02, 5.549e-02, 8.587e-03, -6.630e-02, 1.648e-01, 4.909e-02, 1.227e-02)); + r += mul(s0_8, M4(-1.652e-02, 1.010e-02, 3.214e-02, 6.838e-02, 3.679e-02, -2.320e-01, -1.775e-02, -4.921e-02, 1.925e-02, 1.835e-01, 3.734e-02, 3.136e-02, -7.236e-04, -3.134e-01, 9.843e-02, -3.487e-02)); + r += mul(s1_0, M4(-1.105e-01, 1.907e-01, -1.500e-01, 1.024e-01, 2.619e-02, -6.697e-02, 6.286e-02, -5.147e-03, 1.463e-01, 7.406e-03, -1.897e-01, -5.971e-01, 7.540e-03, -5.692e-02, -5.279e-02, -8.061e-02)); + r += mul(s1_1, M4(-2.672e-01, -1.255e-01, 1.946e-01, -1.145e-02, -3.182e-02, -7.422e-02, 4.570e-02, 9.251e-02, -1.189e-02, 1.085e-01, -2.353e-02, -1.135e-02, 6.594e-02, 4.029e-03, -3.751e-02, 2.063e-02)); + r += mul(s1_2, M4(-8.084e-02, -1.826e-01, -1.234e-01, 4.054e-01, -9.095e-02, -1.165e-01, 1.121e-01, -2.741e-02, 7.305e-02, -3.217e-02, 9.786e-02, -6.659e-02, 1.157e-01, -4.444e-02, 2.270e-01, 1.763e-03)); + r += mul(s1_3, M4(1.931e-01, -3.257e-02, -4.842e-01, -8.259e-02, 1.276e-02, -3.639e-02, 1.284e-01, -4.754e-02, 3.171e-01, -1.167e-01, -2.868e-01, -1.150e+00, 2.319e-01, -1.294e-01, -1.171e-01, -3.551e-02)); + r += mul(s1_4, M4(-2.500e-01, -1.085e-01, 1.923e-02, 8.237e-02, 2.004e-01, 4.469e-02, 9.436e-02, 5.312e-02, -1.068e-01, -7.486e-02, -4.478e-02, 1.552e-01, -2.460e-01, 1.511e-02, 3.162e-01, 5.281e-02)); + r += mul(s1_5, M4(7.678e-01, -3.055e-02, -4.185e-01, -6.309e-02, -9.683e-02, 1.223e-01, 5.832e-02, 4.524e-02, 1.655e-02, -1.655e-01, -1.073e-01, -1.390e-01, -4.154e-01, 7.540e-02, -1.736e-01, 1.085e-01)); + r += mul(s1_6, M4(-3.131e-01, -1.286e-01, -1.378e-01, -5.605e-02, 1.242e-02, -6.596e-02, 1.168e-01, 6.750e-02, 5.484e-02, 9.939e-02, -2.480e-01, 2.647e-02, 4.495e-02, 1.346e-01, -4.221e-04, -7.027e-02)); + r += mul(s1_7, M4(-4.976e-01, -3.209e-01, 6.290e-01, -3.283e-02, -4.935e-02, 1.593e-01, -1.244e-01, 9.397e-03, -1.819e-01, 2.349e-01, 1.214e-01, 4.953e-02, -1.022e-01, 7.186e-02, -4.505e-02, 1.690e-02)); + r += mul(s1_8, M4(7.415e-02, -5.239e-01, -1.586e-02, 3.447e-01, -3.513e-02, 1.279e-01, -3.171e-02, -6.615e-02, -2.123e-01, -1.534e-01, 1.701e-02, 4.486e-03, 1.917e-01, 1.039e-01, 6.547e-02, 2.694e-02)); + r += mul(s2_0, M4(8.618e-02, -4.803e-02, -1.171e-01, -1.513e-02, 2.974e-02, -2.265e-02, 9.962e-02, -1.931e-02, 6.049e-02, -8.627e-02, 3.800e-02, 9.618e-02, 2.536e-01, -4.773e-01, 3.032e-02, -3.750e-02)); + r += mul(s2_1, M4(6.232e-02, 1.536e-01, 1.781e-02, -6.115e-02, -5.699e-04, -4.098e-02, 1.818e-01, 1.820e-02, -1.301e-01, -4.811e-01, 1.531e-01, 8.739e-02, 1.003e-01, -1.786e-01, 4.009e-01, 1.199e-01)); + r += mul(s2_2, M4(-1.296e-01, 6.706e-02, 3.536e-02, -5.269e-03, 9.650e-03, 6.253e-02, 9.542e-02, -2.238e-02, -8.852e-04, -2.723e-01, -1.343e-01, 6.277e-02, 2.481e-01, -3.909e-01, -2.848e-01, -2.311e-02)); + r += mul(s2_3, M4(-1.844e-01, 2.576e-01, 1.042e-01, -3.795e-02, -1.274e-02, -6.163e-02, 1.016e-02, 5.374e-02, 1.026e-01, -5.200e-02, -1.895e-01, 1.465e-02, -4.053e-01, 3.662e-01, 1.275e-01, -1.067e-01)); + r += mul(s2_4, M4(-7.130e-03, 2.468e-01, 1.063e-01, -8.518e-02, -2.571e-01, 1.458e-01, 1.766e-01, -8.033e-03, -2.254e-01, -3.894e-01, 3.050e-02, -2.920e-01, -6.229e-01, -2.740e-02, -4.316e-01, 2.365e-01)); + r += mul(s2_5, M4(1.404e-01, 1.765e-01, 3.307e-02, 4.062e-02, -5.348e-02, 1.879e-01, 1.456e-01, 1.174e-01, 2.685e-01, -1.430e-01, -8.005e-02, 1.036e-01, -1.043e-01, -1.460e-01, -1.615e-01, 3.867e-02)); + r += mul(s2_6, M4(5.364e-03, 2.301e-02, 9.842e-02, 7.951e-02, -2.416e-02, 1.013e-01, -5.920e-02, 1.695e-02, 2.026e-02, -2.394e-01, 8.882e-02, 3.650e-02, -1.144e-01, -3.474e-01, 3.686e-01, 1.380e-02)); + r += mul(s2_7, M4(-8.185e-02, 1.872e-01, 1.357e-01, 2.697e-02, -2.581e-01, 3.224e-01, -2.279e-02, 3.041e-03, 2.607e-02, -1.089e-01, -2.909e-02, -1.060e-01, 2.486e-01, -1.111e-01, 2.357e-01, 1.090e-02)); + r += mul(s2_8, M4(-2.989e-02, -1.133e-01, 2.163e-01, -9.537e-03, -8.489e-02, -2.383e-01, -1.081e-01, -1.091e-02, 1.138e-02, -4.123e-01, -4.767e-02, 4.099e-02, 2.298e-01, 1.293e-01, 1.773e-01, 7.660e-02)); + r += mul(s3_0, M4(3.896e-02, -3.133e-02, 3.217e-02, 4.139e-02, 5.093e-02, 2.823e-02, -8.577e-03, -3.780e-02, 2.861e-03, -1.689e-03, -7.514e-02, 6.305e-02, -2.362e-02, 4.346e-03, 4.735e-03, -1.194e-01)); + r += mul(s3_1, M4(6.590e-02, -2.471e-02, -7.886e-03, 9.714e-03, 1.811e-01, 5.951e-03, -8.243e-02, -6.037e-02, 1.833e-02, -7.481e-02, -1.821e-01, 8.750e-02, 1.107e-01, 1.240e-01, -9.746e-02, 5.775e-02)); + r += mul(s3_2, M4(-6.174e-02, -5.979e-02, -3.929e-03, 1.273e-03, 1.079e-01, 1.519e-01, 1.121e-01, -5.081e-02, -7.291e-03, 3.345e-02, -1.553e-02, -2.742e-02, 2.182e-02, -1.318e-01, 5.946e-02, -5.933e-02)); + r += mul(s3_3, M4(3.138e-02, -3.365e-01, -1.367e-02, 1.570e-01, 3.432e-02, -7.250e-02, -1.769e-01, -1.685e-02, -5.534e-02, 1.023e-01, -8.995e-02, 2.204e-02, 5.705e-03, 1.595e-01, 4.186e-02, 2.708e-02)); + r += mul(s3_4, M4(-1.008e-01, -8.172e-02, -1.077e-02, -4.102e-02, -1.583e-01, -2.770e-01, -6.289e-04, -1.642e-01, -2.202e-01, 1.332e-01, -4.903e-02, -1.783e-01, 2.032e-02, -6.282e-02, 1.980e-02, -2.676e-01)); + r += mul(s3_5, M4(1.053e-01, 1.076e-01, -3.948e-02, 4.915e-02, 2.136e-01, -2.357e-01, -1.255e-01, -1.459e-02, 9.773e-02, 1.624e-01, -2.384e-02, 2.292e-01, 1.363e-01, 5.480e-02, -6.351e-02, -1.213e-01)); + r += mul(s3_6, M4(8.917e-03, 4.804e-03, 3.635e-02, -7.499e-02, 2.543e-01, -1.210e-02, -8.202e-02, -9.351e-03, -1.585e-02, -5.763e-03, 8.240e-02, 5.701e-02, -4.596e-02, 4.842e-02, 1.877e-02, -1.446e-01)); + r += mul(s3_7, M4(-1.232e-01, -6.384e-02, -1.339e-01, -8.216e-02, 3.312e-01, 6.954e-02, 3.517e-02, 1.689e-01, -4.765e-02, 2.718e-01, -1.615e-01, -1.460e-01, -8.312e-02, 9.253e-02, -1.689e-01, -1.620e-01)); + r += mul(s3_8, M4(8.432e-02, 1.952e-01, -4.068e-02, -3.398e-03, 2.903e-01, -1.703e-01, -1.062e-01, 3.111e-02, -1.865e-01, 9.773e-02, -1.087e-01, 3.428e-02, -2.037e-02, -4.488e-02, 5.985e-02, -7.044e-02)); + r += mul(s4_0, M4(2.537e-01, -1.393e-01, 3.364e-02, 5.812e-02, 1.427e-01, -1.551e-02, 1.041e-01, 1.064e-01, 4.486e-02, -3.444e-02, -5.496e-02, 3.862e-02, 9.252e-02, -1.363e-02, 6.163e-03, 2.246e-02)); + r += mul(s4_1, M4(8.846e-03, 4.557e-02, -1.059e-02, 1.340e-01, 1.547e-01, -5.603e-02, 2.197e-02, 1.421e-01, -2.348e-01, -1.163e-01, 3.919e-03, -2.110e-01, 9.481e-02, 1.260e-02, 1.355e-02, 9.323e-02)); + r += mul(s4_2, M4(-7.205e-02, -1.342e-01, 4.100e-03, 7.338e-02, 7.469e-02, -3.581e-02, 9.859e-02, 1.051e-01, -1.580e-01, 3.044e-02, -1.745e-01, -2.835e-02, -5.595e-02, 4.080e-01, 6.298e-02, -5.029e-02)); + r += mul(s4_3, M4(2.531e-01, 5.550e-02, 4.475e-02, -1.046e-01, 8.019e-02, 6.507e-02, -1.792e-01, 2.618e-02, -5.059e-02, -2.618e-01, -3.367e-02, -1.395e-01, 2.896e-01, -2.934e-02, -5.490e-02, 9.300e-02)); + r += mul(s4_4, M4(7.890e-02, 3.029e-01, 2.133e-02, -5.126e-02, -3.385e-01, -1.758e-01, -6.291e-02, -3.150e-01, -4.135e-01, 2.422e-01, 1.068e-02, -1.341e-01, 3.631e-01, 1.772e-02, -7.548e-04, 4.115e-02)); + r += mul(s4_5, M4(1.689e-01, 1.690e-01, -9.701e-02, -1.115e-01, 9.889e-02, -4.711e-02, -1.317e-01, -8.123e-03, -9.076e-02, -8.810e-02, 2.362e-01, -3.804e-02, 2.086e-02, 2.328e-01, -1.051e-01, -1.188e-01)); + r += mul(s4_6, M4(8.424e-02, -3.195e-01, 9.356e-02, 7.451e-02, 4.294e-03, 4.530e-02, -4.655e-03, 1.649e-02, -2.645e-02, -4.658e-02, -1.554e-01, -1.458e-02, 7.289e-02, 1.307e-01, 6.133e-02, 1.640e-03)); + r += mul(s4_7, M4(-7.246e-02, 1.717e-02, -2.153e-01, 3.459e-02, -2.573e-01, 2.923e-01, 1.217e-01, 8.217e-02, 3.190e-01, 3.920e-01, 1.205e-01, 1.563e-02, 3.302e-02, 1.573e-01, -2.217e-01, -6.957e-02)); + r += mul(s4_8, M4(-5.015e-01, -3.158e-02, -6.849e-02, 9.000e-02, -3.464e-01, -3.587e-04, 2.693e-02, -3.674e-02, 1.743e-01, 1.260e-01, 5.700e-02, 1.168e-01, 1.287e-01, 4.054e-01, -1.939e-01, 9.138e-02)); + r += mul(s5_0, M4(-1.275e-01, 9.258e-02, -3.912e-02, -5.020e-02, -1.170e-01, 1.043e-01, 1.126e-01, 2.848e-02, 1.123e-02, -7.908e-02, -2.502e-02, 1.207e-01, -9.056e-02, 1.312e-01, 5.744e-02, -2.529e-03)); + r += mul(s5_1, M4(6.956e-02, 6.685e-02, 4.250e-02, 1.363e-01, -5.655e-02, 4.736e-01, 2.432e-02, 7.977e-03, 1.009e-01, -1.834e-01, 8.792e-03, -6.653e-02, -2.083e-01, 1.321e-02, 3.548e-02, 3.161e-02)); + r += mul(s5_2, M4(-1.823e-02, -9.938e-02, 1.187e-01, 1.367e-01, -5.580e-02, 1.312e-01, -1.650e-01, 5.498e-02, -1.319e-03, 4.431e-02, 1.802e-01, 1.547e-02, -6.616e-02, 1.764e-02, 2.344e-02, 4.526e-02)); + r += mul(s5_3, M4(-1.979e-01, 1.126e-02, 4.399e-02, -5.940e-02, -1.603e-01, -3.065e-01, 1.705e-01, -5.170e-02, -1.620e-02, 5.378e-03, -3.391e-02, -3.194e-02, -1.239e-01, 1.018e-01, 1.143e-02, -4.358e-03)); + r += mul(s5_4, M4(-1.316e-01, -1.408e-01, 3.692e-02, -6.235e-02, 9.756e-02, 1.965e-01, 1.613e-01, -1.826e-02, 1.529e-01, 2.658e-01, -1.722e-01, -2.159e-01, -9.866e-02, -4.192e-01, -7.198e-02, 1.045e-01)); + r += mul(s5_5, M4(-6.488e-02, -1.184e-01, -9.189e-02, 4.309e-02, -1.599e-01, -2.858e-01, 2.405e-01, 2.957e-02, 1.531e-01, -5.256e-02, -1.240e-01, 1.017e-01, -9.043e-02, -1.965e-01, 2.388e-01, -6.754e-02)); + r += mul(s5_6, M4(7.957e-02, 7.197e-02, -1.815e-01, 1.702e-01, 9.849e-02, -3.109e-01, -1.147e-01, 1.632e-01, 1.087e-01, 1.393e-02, 7.967e-02, -8.238e-02, -4.902e-02, -4.507e-02, 7.177e-03, -1.265e-02)); + r += mul(s5_7, M4(-7.930e-02, -3.751e-02, 2.822e-01, 1.702e-01, 4.284e-01, -4.733e-04, -1.522e-01, 4.036e-01, 1.503e-01, 1.747e-02, -2.079e-02, 2.941e-01, -1.312e-01, -1.859e-01, 4.046e-02, -4.338e-02)); + r += mul(s5_8, M4(2.182e-01, 1.775e-01, 4.035e-02, -1.053e-01, 2.385e-01, 1.134e-01, 2.134e-01, 6.980e-02, -1.202e-01, -2.052e-01, -1.275e-01, 6.549e-02, 3.340e-02, -2.139e-01, 1.350e-01, 1.552e-02)); + r += mul(s6_0, M4(2.772e-01, 7.592e-02, 2.153e-02, 2.310e-01, -7.911e-03, 3.401e-03, 1.073e-01, -4.014e-02, 1.460e-02, 8.533e-02, -4.139e-02, -6.476e-02, -2.862e-02, -1.944e-02, 3.860e-01, -4.332e-02)); + r += mul(s6_1, M4(4.812e-01, 1.803e-02, 3.650e-01, 4.224e-01, -1.578e-01, -1.214e-01, 8.779e-02, -7.715e-03, 1.075e-01, 2.096e-01, 6.006e-02, 8.946e-02, 3.282e-03, -4.351e-02, -7.451e-02, 1.163e-01)); + r += mul(s6_2, M4(3.159e-01, -1.897e-01, 2.192e-01, 1.379e-01, 6.565e-02, -1.496e-01, -6.414e-04, 1.160e-01, -4.805e-02, -7.635e-02, -5.523e-02, 5.868e-02, -5.129e-02, 5.904e-02, 1.374e-02, 5.361e-02)); + r += mul(s6_3, M4(4.315e-01, 1.023e-01, 1.276e-01, 4.328e-01, 9.514e-02, -8.882e-02, -1.540e-01, 6.534e-02, -1.634e-01, 7.222e-02, -6.178e-02, 1.048e-02, 4.497e-02, -1.487e-01, 2.292e-02, -2.380e-02)); + r += mul(s6_4, M4(-2.985e-01, -7.845e-02, 8.364e-01, 1.638e+00, -2.243e-01, 1.157e-01, -1.560e-02, -3.542e-01, 1.578e-01, -1.317e-02, -1.165e-01, -4.623e-02, -1.523e-01, -4.097e-02, 7.027e-02, -1.893e-01)); + r += mul(s6_5, M4(-4.130e-02, -6.393e-01, -6.269e-02, 2.810e-01, 6.838e-02, 2.387e-01, -3.504e-02, 4.013e-02, 3.874e-02, 6.386e-03, 5.781e-02, 5.188e-02, 1.292e-01, 1.734e-01, -4.389e-02, 8.661e-02)); + r += mul(s6_6, M4(-3.234e-01, -8.243e-02, 2.946e-01, -3.210e-02, -1.900e-03, 1.524e-01, 1.520e-01, 6.625e-02, 3.115e-02, -1.955e-01, -1.206e-01, 3.781e-02, -2.400e-02, -1.820e-01, 1.845e-01, 7.588e-02)); + r += mul(s6_7, M4(-4.346e-01, 1.289e-01, 7.611e-01, 4.578e-01, 6.179e-02, 7.028e-02, -1.120e-01, -3.356e-02, 2.095e-01, -2.202e-03, -2.747e-02, -7.018e-04, -4.247e-02, 2.166e-01, 1.138e-01, 7.714e-02)); + r += mul(s6_8, M4(1.182e-01, 4.479e-01, 1.944e-01, 2.369e-01, -9.513e-02, -3.546e-02, -1.288e-01, 5.557e-02, 4.410e-02, -8.010e-02, 1.142e-01, 2.252e-02, 5.250e-02, 5.762e-02, 9.484e-02, 1.049e-02)); + r += mul(s7_0, M4(2.362e-02, -7.456e-02, -8.570e-02, 1.749e-01, 7.050e-02, -1.714e-01, -2.974e-01, -1.436e-02, -1.732e-01, 1.164e-01, 2.513e-01, -5.570e-02, 1.232e-02, 4.972e-02, -3.535e-01, 1.783e-01)); + r += mul(s7_1, M4(1.016e-01, -5.980e-02, -9.087e-02, 1.968e-01, 5.459e-02, -2.606e-01, -3.196e-01, 4.241e-03, -7.105e-02, 7.151e-02, 2.115e-01, 1.034e-01, -1.907e-03, 9.728e-02, -1.425e-02, 3.176e-01)); + r += mul(s7_2, M4(6.123e-02, 1.818e-02, 6.712e-02, 8.668e-02, 8.312e-02, -1.384e-01, -2.917e-02, 7.580e-03, -3.688e-02, 2.069e-01, 3.602e-02, -1.538e-02, 2.226e-03, -1.482e-01, 5.510e-02, 9.440e-02)); + r += mul(s7_3, M4(-3.627e-02, 5.019e-02, -9.160e-02, 6.557e-02, 2.114e-01, -3.279e-01, -3.847e-02, 2.284e-01, 9.347e-03, 2.755e-01, 3.278e-01, -1.176e-01, 1.993e-01, 2.487e-01, -1.174e-01, 4.235e-01)); + r += mul(s7_4, M4(8.905e-02, -4.722e-02, -1.047e-01, -6.425e-01, 6.954e-02, -2.695e-02, 2.081e-01, -2.489e-01, -1.908e-01, -2.528e-02, 1.878e-01, 3.810e-02, -2.715e-01, -2.092e-01, -1.303e-01, -9.392e-02)); + r += mul(s7_5, M4(-5.402e-03, 2.137e-02, 8.297e-02, 5.125e-03, 1.445e-02, -1.290e-01, -9.459e-02, -1.349e-02, -3.473e-02, -1.413e-03, -3.585e-02, 8.633e-02, 8.299e-02, -9.031e-02, 1.890e-02, 8.582e-02)); + r += mul(s7_6, M4(-2.498e-02, -8.174e-02, 5.545e-02, 1.061e-01, 5.317e-02, -1.445e-01, -5.075e-02, -2.452e-02, -6.244e-02, 2.842e-01, 1.286e-01, 5.565e-02, -1.222e-01, -4.650e-03, -3.429e-01, 7.063e-02)); + r += mul(s7_7, M4(-4.282e-02, 4.579e-03, 8.260e-02, 4.414e-02, 7.506e-02, -3.467e-02, -1.103e-01, -2.087e-02, -3.690e-01, -2.446e-02, 4.477e-02, -5.426e-03, -7.949e-03, 1.122e-01, -2.450e-01, 1.310e-01)); + r += mul(s7_8, M4(5.690e-02, 3.884e-02, 1.819e-02, 1.769e-01, -3.959e-02, -2.336e-01, 8.080e-02, 3.858e-02, 5.694e-03, 1.613e-01, -1.850e-01, 1.164e-01, -2.013e-01, -1.304e-01, -9.271e-02, 2.405e-02)); + r += V4(6.665e-02, -2.861e-02, -7.364e-02, -7.743e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-4.188e-02, 3.911e-02, -2.494e-03, -2.831e-02, -1.972e-01, 2.131e-01, 2.042e-04, -1.627e-02, 1.885e-01, 8.479e-02, 1.430e-01, -6.999e-02, -5.654e-02, 6.220e-02, 3.930e-02, -8.713e-02)); + r += mul(s0_1, M4(9.534e-04, -9.037e-02, -4.319e-02, 1.939e-01, -1.410e-01, -1.372e-01, -5.143e-01, 2.666e-03, 8.914e-02, 2.363e-01, -9.561e-02, 1.199e-01, 9.545e-02, 8.177e-02, -2.727e-02, 3.668e-02)); + r += mul(s0_2, M4(2.061e-01, 4.186e-02, 9.197e-02, 6.201e-02, 2.803e-01, 6.116e-02, 2.712e-01, 3.081e-01, 1.745e-01, -6.947e-02, 8.908e-02, -5.336e-03, -4.432e-02, 8.851e-02, 1.293e-01, 1.005e-01)); + r += mul(s0_3, M4(-1.996e-01, 4.379e-02, -1.397e-01, -1.841e-01, 1.023e-01, -2.295e-02, -1.174e-01, 3.601e-02, -2.838e-01, 9.978e-02, 1.481e-01, -1.023e-01, 3.961e-02, 6.618e-02, 9.490e-02, -2.691e-02)); + r += mul(s0_4, M4(3.418e-02, -1.448e-01, -1.010e-01, 2.336e-02, -1.532e-01, -7.991e-02, -1.969e-01, -1.017e-01, 5.245e-02, 2.387e-01, -2.388e-01, -1.195e-01, -1.677e-01, 6.502e-02, -2.937e-01, 1.324e-01)); + r += mul(s0_5, M4(1.576e-02, -4.273e-02, 8.894e-02, -5.236e-02, -2.616e-01, -1.620e-01, -5.406e-02, 3.255e-01, 3.261e-03, 1.246e-01, 1.157e-01, 1.764e-01, -1.268e-02, 3.138e-02, 1.725e-01, 5.102e-03)); + r += mul(s0_6, M4(-1.819e-01, 3.163e-02, -2.232e-01, -8.649e-02, 2.477e-01, 3.868e-02, -1.274e-01, -1.264e-01, -3.308e-02, -1.345e-01, -1.506e-01, -3.680e-02, 1.321e-01, 7.888e-03, 7.519e-02, -7.002e-02)); + r += mul(s0_7, M4(-3.134e-02, 8.152e-02, -1.868e-02, -6.641e-02, 1.172e-01, 1.674e-02, -1.663e-01, 9.303e-02, -7.864e-02, 3.147e-01, -5.638e-02, -7.548e-03, -1.552e-01, 1.281e-02, -6.565e-02, 1.074e-01)); + r += mul(s0_8, M4(1.213e-01, -6.248e-02, 7.482e-02, 9.157e-02, -8.935e-02, 1.599e-02, 1.405e-02, 1.277e-01, -2.177e-02, -5.653e-02, 1.523e-01, 1.668e-01, -1.860e-02, 7.483e-02, 6.869e-02, -1.209e-02)); + r += mul(s1_0, M4(2.365e-01, -2.096e-02, -1.695e-01, -8.783e-02, 1.421e-02, -1.485e-02, -8.857e-02, 9.697e-03, -8.778e-02, 8.530e-02, -1.804e-02, 5.474e-02, 1.052e-01, -1.055e-02, 9.641e-02, 1.474e-02)); + r += mul(s1_1, M4(4.347e-01, 2.115e-01, 1.887e-01, 2.844e-01, 5.543e-02, 1.159e-01, 2.477e-01, -9.212e-02, -1.935e-01, -5.802e-02, -1.655e-02, 4.781e-02, -6.906e-02, -1.315e-01, -5.511e-02, -1.924e-01)); + r += mul(s1_2, M4(-6.568e-02, -2.548e-01, 3.426e-02, 8.463e-02, 1.262e-02, -1.018e-01, -1.622e-01, -8.035e-03, 1.554e-01, -3.247e-02, 8.714e-02, -5.855e-02, -2.333e-01, -1.695e-01, -1.537e-02, 1.658e-01)); + r += mul(s1_3, M4(-1.008e-01, 2.533e-01, -9.077e-02, -7.201e-02, -9.625e-02, 1.257e-02, -3.490e-02, 6.242e-02, 1.221e-01, -2.578e-01, 4.224e-02, -3.465e-02, -2.070e-02, -1.371e-01, 1.174e-01, -5.439e-02)); + r += mul(s1_4, M4(-5.705e-01, 3.135e-01, -3.738e-01, 1.494e-01, 6.415e-02, -3.706e-03, 1.545e-01, -8.638e-02, -5.674e-02, -9.102e-02, -1.021e-01, 1.764e-01, 7.511e-02, 1.061e-01, 5.827e-03, 9.464e-02)); + r += mul(s1_5, M4(6.192e-01, -5.291e-02, 8.063e-02, -1.212e-01, 2.809e-01, 6.766e-02, 2.371e-02, 8.214e-02, 8.186e-02, -5.705e-02, -4.991e-02, 9.358e-02, 3.349e-01, -7.810e-02, 5.105e-03, -1.689e-02)); + r += mul(s1_6, M4(-8.209e-03, 1.708e-01, -6.446e-02, -5.466e-02, -5.550e-02, -7.580e-02, 1.763e-02, 7.632e-02, -2.885e-02, 1.813e-02, 3.694e-02, -1.628e-01, -1.729e-01, 1.523e-01, -4.755e-02, -1.874e-02)); + r += mul(s1_7, M4(-2.842e-02, -5.038e-01, -1.441e-01, -1.192e-01, -1.994e-01, 7.440e-02, -1.168e-01, -1.152e-01, -2.554e-02, 4.832e-02, -1.121e-03, -1.208e-01, -9.660e-02, 3.061e-02, -1.091e-01, -1.257e-01)); + r += mul(s1_8, M4(-1.060e-01, 1.430e-01, 6.638e-03, -8.655e-02, 5.032e-03, 6.352e-02, -1.219e-02, -1.721e-02, -2.960e-02, -9.753e-03, 1.217e-01, 6.606e-02, 1.146e-01, -2.372e-02, -1.066e-01, 1.112e-01)); + r += mul(s2_0, M4(1.512e-01, -1.360e-01, 1.003e-01, -2.228e-02, -1.981e-02, 5.680e-02, -2.351e-02, -2.149e-02, -3.795e-02, -6.851e-03, 6.286e-03, 5.634e-02, 3.187e-01, -7.946e-02, -1.691e-01, -3.474e-02)); + r += mul(s2_1, M4(1.811e-01, -2.832e-02, -3.044e-02, -2.047e-01, -6.424e-02, 7.703e-02, 6.279e-02, 5.720e-02, 2.866e-01, 1.931e-01, 6.403e-02, -3.005e-02, 5.048e-02, -1.983e-02, 2.380e-01, -4.797e-02)); + r += mul(s2_2, M4(-1.311e-01, -3.905e-02, -1.600e-02, -1.287e-01, 3.135e-02, -1.035e-01, 1.382e-02, 2.109e-01, -1.353e-01, 8.187e-02, -1.933e-02, -2.058e-01, -3.192e-01, 5.833e-03, 1.582e-01, 1.055e-01)); + r += mul(s2_3, M4(-7.578e-02, -1.802e-02, 1.142e-03, -6.449e-03, 1.890e-02, 8.185e-02, -1.360e-01, -1.382e-01, 2.346e-01, -1.593e-01, 1.064e-01, 8.349e-02, 8.157e-02, -7.758e-01, -4.485e-01, 1.432e-01)); + r += mul(s2_4, M4(-1.904e-01, -1.111e-02, 6.158e-02, 1.036e-03, -1.510e-01, 1.728e-01, -2.534e-01, 1.353e-01, 3.318e-02, 3.215e-01, -3.263e-01, 1.328e-01, -4.546e-01, 1.714e-01, 1.746e-01, 3.783e-01)); + r += mul(s2_5, M4(-1.312e-01, 5.085e-02, 3.116e-02, -8.743e-02, 2.796e-02, -8.472e-02, 7.036e-02, -8.341e-02, -2.516e-01, -5.612e-02, 8.958e-02, -1.229e-01, 2.736e-01, 1.007e-01, 1.740e-01, 2.939e-01)); + r += mul(s2_6, M4(6.271e-01, -6.375e-02, 3.872e-02, 1.205e-01, 1.454e-01, -1.306e-01, -2.290e-01, -1.096e-01, 5.513e-02, -1.251e-01, 7.400e-02, 4.007e-03, 2.942e-02, -1.990e-02, -4.658e-02, -2.138e-01)); + r += mul(s2_7, M4(1.030e-01, 2.035e-01, -9.021e-02, -2.600e-01, 9.415e-02, 1.905e-02, 1.688e-01, -3.456e-02, 2.280e-02, -4.347e-03, -9.776e-03, 1.257e-01, 4.754e-02, 7.155e-02, -7.335e-02, -1.227e-01)); + r += mul(s2_8, M4(4.734e-02, 8.976e-02, -7.800e-02, -4.560e-04, -9.280e-02, 8.119e-02, -4.994e-02, 2.394e-01, -9.923e-02, 1.476e-02, -3.865e-02, -5.304e-02, 4.677e-01, 2.234e-01, -2.916e-02, -7.769e-02)); + r += mul(s3_0, M4(8.064e-02, 1.291e-01, 7.727e-02, -1.999e-02, 1.549e-02, -4.633e-02, -3.663e-02, 1.059e-01, -6.190e-02, 1.411e-01, -6.361e-02, 9.450e-03, -1.625e-02, -1.700e-02, 1.808e-02, -1.312e-01)); + r += mul(s3_1, M4(3.164e-02, 8.257e-02, 9.946e-03, 5.608e-02, 2.040e-01, 1.026e-01, 4.893e-02, 4.471e-02, -2.647e-01, 5.232e-02, 8.367e-02, -4.396e-02, 1.204e-01, 2.318e-02, 2.416e-02, -5.404e-04)); + r += mul(s3_2, M4(-1.614e-01, -7.945e-02, -8.010e-02, 5.496e-03, 2.406e-01, 3.090e-01, 6.862e-02, 1.258e-01, -3.788e-02, 2.183e-01, 5.092e-02, -1.573e-01, 6.735e-02, -5.002e-02, 1.514e-01, 1.107e-01)); + r += mul(s3_3, M4(1.479e-01, 2.361e-02, 1.040e-01, 2.166e-02, -1.189e-01, 4.638e-03, 6.935e-02, -8.744e-03, 2.469e-01, -6.632e-02, 6.656e-02, 1.682e-01, -1.416e-01, 1.223e-01, -3.064e-01, 6.913e-03)); + r += mul(s3_4, M4(1.550e-01, 1.376e-01, -8.576e-02, 7.891e-02, 4.181e-02, -3.444e-04, -1.821e-01, 2.611e-01, -1.614e-01, 4.019e-02, -2.548e-01, 9.231e-02, -1.216e-01, -3.186e-02, -1.297e-01, -1.230e-01)); + r += mul(s3_5, M4(3.948e-02, 4.112e-02, -7.857e-03, -8.752e-02, 1.139e-01, 6.144e-02, 1.122e-01, 3.369e-02, -1.216e-01, -1.379e-01, 2.298e-01, -4.286e-01, -3.835e-02, -6.797e-02, 4.648e-02, 7.861e-02)); + r += mul(s3_6, M4(-2.087e-01, -1.544e-01, -9.079e-03, -1.595e-01, -1.079e-02, -1.745e-02, -8.393e-02, -5.607e-02, 2.371e-01, 2.013e-02, -3.454e-02, 3.150e-02, -8.327e-02, 2.099e-02, -2.286e-02, 5.170e-02)); + r += mul(s3_7, M4(-6.763e-02, -3.678e-02, 2.372e-02, 1.024e-01, -2.997e-01, 2.999e-01, 1.426e-01, -1.557e-01, 7.995e-02, -1.268e-02, -8.075e-02, 1.920e-01, 8.148e-03, -7.839e-03, -2.837e-02, 1.963e-02)); + r += mul(s3_8, M4(-5.668e-02, -1.421e-01, -5.679e-02, 1.756e-02, -7.119e-02, -2.055e-01, 9.936e-02, 1.253e-01, -1.029e-01, 2.465e-02, -4.078e-02, -2.501e-01, 7.316e-02, 1.241e-01, 1.499e-01, 6.111e-02)); + r += mul(s4_0, M4(-5.703e-02, 1.207e-01, 1.984e-02, -6.309e-02, -1.560e-02, -7.092e-02, -4.825e-02, -1.273e-01, 1.707e-02, -2.400e-02, 7.366e-02, 5.471e-03, 1.441e-01, 1.125e-01, 1.191e-01, -8.359e-02)); + r += mul(s4_1, M4(1.581e-01, 1.683e-01, 1.421e-02, 6.891e-02, 1.447e-02, 1.112e-01, -5.353e-02, 5.701e-02, -2.312e-01, -2.978e-02, -4.824e-03, 3.022e-01, -2.496e-01, 1.148e-01, -7.820e-02, -2.643e-01)); + r += mul(s4_2, M4(-1.181e-01, 2.555e-01, 2.130e-01, -3.359e-02, -1.143e-01, -8.525e-02, 2.284e-02, 1.125e-01, 3.372e-02, 4.638e-03, 6.635e-02, -2.298e-01, -2.304e-01, 2.675e-01, -3.765e-02, 1.185e-01)); + r += mul(s4_3, M4(-8.087e-02, -1.942e-01, -2.963e-01, -1.275e-01, 1.163e-01, -6.763e-02, 6.468e-02, 9.737e-02, 3.722e-01, -2.215e-01, 1.129e-01, 1.155e-01, -5.863e-02, -1.467e-02, -7.591e-03, 7.242e-02)); + r += mul(s4_4, M4(1.517e-01, -2.668e-02, -1.133e-01, 2.084e-02, 1.579e-01, -7.591e-02, -2.788e-01, -2.643e-02, -5.141e-01, 1.799e-01, -8.189e-03, 1.877e-01, 8.521e-02, -2.388e-01, 8.918e-02, -3.055e-01)); + r += mul(s4_5, M4(-4.801e-02, 1.488e-01, 4.658e-02, 1.361e-01, -3.208e-03, 7.001e-02, 9.136e-03, -9.821e-02, -4.169e-01, -1.277e-01, 7.576e-02, -2.144e-01, 1.671e-01, 3.049e-03, -1.878e-02, 3.510e-01)); + r += mul(s4_6, M4(-3.959e-02, -2.581e-01, 5.174e-02, -1.911e-01, 6.262e-02, 7.314e-03, 8.208e-02, -1.864e-02, -1.688e-01, -1.582e-01, 5.003e-02, -3.514e-02, 8.005e-02, 1.571e-01, 1.045e-01, -2.215e-02)); + r += mul(s4_7, M4(1.552e-01, -2.325e-01, -6.920e-02, 7.045e-02, 2.340e-02, -1.740e-01, 1.765e-01, 1.311e-01, 2.823e-01, 1.528e-01, 9.451e-02, -5.164e-02, -8.723e-02, 1.079e-01, -5.113e-02, -1.851e-01)); + r += mul(s4_8, M4(-2.172e-01, -2.458e-01, 1.762e-01, 6.071e-02, -4.242e-02, 4.728e-02, 5.004e-02, -3.509e-03, -6.302e-02, -1.150e-01, 1.248e-01, -1.306e-01, 3.278e-02, 5.849e-02, 6.113e-02, 6.530e-03)); + r += mul(s5_0, M4(2.186e-02, -8.463e-03, 2.938e-02, 2.110e-02, 1.763e-03, 4.754e-01, -1.752e-01, 4.844e-02, 1.616e-01, 8.694e-02, 1.584e-01, -5.774e-02, 4.046e-02, -1.407e-01, -1.444e-01, -6.617e-02)); + r += mul(s5_1, M4(3.854e-02, 1.206e-02, -2.397e-02, 1.277e-02, -3.590e-01, 3.185e-02, 1.971e-02, 4.395e-02, -2.453e-03, 5.720e-02, -6.919e-02, -1.315e-02, -1.392e-01, -2.179e-02, 5.873e-02, 1.179e-01)); + r += mul(s5_2, M4(1.175e-01, -1.706e-01, -5.034e-02, 1.384e-01, 3.400e-01, 4.653e-02, -3.204e-02, 2.707e-01, -2.241e-01, -6.196e-03, 1.705e-02, -2.575e-01, 2.917e-01, 3.558e-02, -4.881e-02, 2.312e-01)); + r += mul(s5_3, M4(-3.266e-02, 1.648e-01, -1.035e-01, -6.685e-02, -2.771e-01, 8.880e-02, 1.625e-01, 4.664e-02, 1.847e-01, 2.458e-01, 2.510e-01, -4.112e-02, 5.400e-02, -2.309e-02, -6.306e-02, 1.013e-01)); + r += mul(s5_4, M4(-1.191e-01, 7.659e-03, 1.893e-01, 8.074e-02, -4.872e-01, 2.676e-01, 6.461e-02, 1.923e-01, 2.383e-01, 1.264e-01, -4.948e-01, 1.789e-01, -4.451e-02, -1.557e-01, 3.381e-01, -4.749e-02)); + r += mul(s5_5, M4(9.079e-03, -2.406e-01, 1.096e-01, -7.782e-02, 2.432e-01, -8.577e-03, 2.335e-02, 5.146e-02, -3.574e-01, -1.064e-01, -1.162e-01, -3.525e-01, 2.478e-01, 2.060e-02, 6.505e-02, 3.386e-01)); + r += mul(s5_6, M4(4.418e-02, 2.185e-01, 1.616e-02, -4.339e-02, -1.643e-01, 2.276e-01, 2.055e-01, 1.004e-01, 1.394e-01, 2.683e-02, 1.894e-01, 1.381e-01, 2.891e-02, 3.274e-02, -1.072e-02, 4.000e-03)); + r += mul(s5_7, M4(-9.906e-02, 7.663e-02, -3.076e-02, -1.080e-02, 9.497e-03, 2.714e-01, -1.414e-01, -6.790e-02, 1.749e-01, 7.840e-02, 2.961e-01, 2.120e-01, -3.710e-01, -1.305e-03, -1.149e-01, -6.740e-02)); + r += mul(s5_8, M4(1.217e-01, 3.720e-01, -6.066e-02, -2.051e-02, 1.724e-01, 2.266e-01, 4.978e-03, 5.818e-02, -5.622e-02, -7.869e-02, 3.167e-02, 2.765e-02, -3.049e-02, -4.279e-02, 3.039e-02, 1.064e-01)); + r += mul(s6_0, M4(1.905e-01, -5.906e-01, -3.203e-01, -2.408e-01, 2.004e-01, 1.413e-01, 2.745e-01, 2.631e-02, -1.010e-01, 4.333e-02, -2.646e-01, 2.150e-02, -5.612e-02, -3.617e-03, 1.065e-01, 5.784e-02)); + r += mul(s6_1, M4(-6.313e-02, -4.270e-01, 1.188e-01, -8.757e-02, -8.934e-02, -4.415e-02, -1.320e-01, 5.785e-02, 2.906e-01, 9.523e-02, 1.325e-01, 3.187e-02, -3.050e-02, 2.539e-02, -9.284e-03, -2.695e-02)); + r += mul(s6_2, M4(3.490e-02, 6.192e-02, 7.606e-02, -6.606e-02, -6.879e-02, 6.426e-02, -2.263e-02, -2.917e-02, 5.522e-02, 9.662e-02, 9.221e-02, -3.130e-02, -6.562e-02, -8.454e-02, 4.558e-02, 3.649e-02)); + r += mul(s6_3, M4(-2.788e-01, -1.476e-01, -3.335e-01, -1.557e-01, 9.530e-02, -1.893e-01, 4.636e-01, -8.215e-02, -3.601e-01, 1.247e-01, -3.906e-01, 6.218e-02, 6.371e-02, 8.757e-02, 1.279e-01, -6.788e-02)); + r += mul(s6_4, M4(-3.870e-01, -3.416e-01, 6.768e-01, -1.327e-01, -4.949e-02, -2.135e-01, -3.057e-01, 5.377e-01, -1.211e-01, 1.121e-01, 7.310e-02, -5.575e-02, 1.120e-01, 9.291e-02, -3.408e-01, 1.694e-01)); + r += mul(s6_5, M4(4.870e-02, 2.610e-01, -7.851e-03, 2.329e-01, -6.136e-02, -1.045e-01, 7.367e-02, -9.340e-02, 2.040e-01, -6.531e-02, 1.480e-01, -4.656e-02, 1.423e-01, -9.352e-03, 1.209e-01, 7.090e-03)); + r += mul(s6_6, M4(-1.423e-01, -4.668e-01, -2.745e-01, -2.214e-02, -6.964e-02, -6.048e-02, 2.676e-01, -7.026e-03, -1.628e-01, 1.677e-01, -2.451e-01, -1.609e-02, -7.104e-02, 1.026e-01, 7.345e-02, -7.246e-02)); + r += mul(s6_7, M4(-2.397e-01, 5.067e-02, -2.079e-01, 2.579e-02, 2.359e-02, -3.813e-02, -3.910e-02, -2.711e-03, -4.485e-02, 6.503e-02, 1.338e-01, 1.947e-02, -5.614e-02, -1.120e-01, -6.167e-02, -7.495e-02)); + r += mul(s6_8, M4(-2.891e-02, 9.384e-02, 1.046e-01, 1.467e-01, 5.332e-02, -7.476e-03, 2.731e-02, -6.231e-02, -3.199e-02, 4.240e-02, -8.893e-04, 8.002e-02, 3.404e-02, -7.099e-03, -4.124e-02, 1.343e-01)); + r += mul(s7_0, M4(6.439e-03, -3.159e-02, -7.080e-02, -2.024e-02, -2.219e-01, -1.744e-01, -2.094e-01, 9.819e-02, -2.095e-01, 5.943e-02, -1.185e-01, 6.477e-02, 1.859e-01, 1.926e-01, 3.075e-02, -9.657e-02)); + r += mul(s7_1, M4(3.378e-02, -8.277e-02, 5.498e-03, 2.163e-02, -3.447e-01, 1.273e-02, 1.832e-01, -3.245e-01, 1.922e-01, -2.858e-01, -2.330e-02, 4.936e-02, -3.989e-02, 2.286e-01, -2.226e-01, -4.291e-02)); + r += mul(s7_2, M4(1.610e-01, -4.787e-02, 1.686e-01, -9.226e-03, -1.239e-02, 6.896e-02, 4.769e-02, -1.828e-01, -1.080e-01, 1.338e-01, 8.961e-04, -4.600e-03, 3.174e-01, 1.484e-01, 6.866e-02, 1.677e-01)); + r += mul(s7_3, M4(-2.186e-01, 1.113e-01, -1.255e-01, -1.763e-01, 5.137e-01, -2.105e-01, 3.290e-01, 5.426e-02, 3.304e-01, 1.222e-01, -3.562e-01, -4.745e-02, -2.218e-01, -1.034e-01, 3.933e-01, -2.037e-01)); + r += mul(s7_4, M4(1.024e-01, -1.234e-01, 8.316e-02, -1.358e-02, 2.174e-01, 2.707e-01, -1.130e-01, -2.329e-01, 6.534e-02, -2.604e-01, 4.130e-03, -9.019e-02, -4.505e-01, 2.406e-01, -3.111e-01, 6.225e-03)); + r += mul(s7_5, M4(5.314e-02, -7.235e-02, 5.180e-02, 2.898e-01, -4.080e-02, -2.012e-02, 5.062e-02, 1.625e-02, -5.302e-02, 6.459e-02, 5.811e-02, 8.758e-02, 6.558e-02, -3.686e-01, 3.314e-02, -4.048e-02)); + r += mul(s7_6, M4(-1.078e-01, -3.606e-03, -4.979e-02, -1.336e-01, 3.099e-01, -1.052e-02, 1.959e-01, 4.290e-02, 2.026e-01, -2.581e-02, 2.090e-02, -2.610e-01, 1.948e-01, 2.015e-02, 3.395e-02, -1.098e-01)); + r += mul(s7_7, M4(-5.818e-02, 6.125e-02, -8.358e-02, -9.321e-02, -9.888e-02, -3.231e-02, -1.697e-01, -1.265e-01, 3.775e-01, -1.196e-01, -3.976e-02, 3.943e-01, 2.197e-01, 2.273e-01, 6.525e-02, 1.704e-01)); + r += mul(s7_8, M4(5.946e-02, 1.244e-02, 7.324e-02, 6.264e-02, 6.809e-02, 1.185e-01, -4.188e-02, 9.021e-02, 6.260e-02, -1.134e-01, -2.057e-02, 1.018e-01, -2.984e-01, -5.999e-02, 7.880e-02, 2.304e-01)); + r += V4(3.921e-03, 1.878e-02, 2.174e-02, -2.472e-02); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.212e-01, 2.454e-01, -1.446e-01, 1.203e-01, -4.809e-02, 4.895e-03, 2.550e-02, 3.495e-02, -3.779e-02, 2.177e-02, -5.566e-02, 6.897e-02, -4.681e-02, 6.815e-03, -6.666e-02, -4.678e-02)); + r += mul(s0_1, M4(-4.755e-01, 2.214e-01, -4.025e-02, -1.050e-01, 1.359e-01, -3.377e-03, -1.989e-02, 5.404e-02, -2.671e-03, -8.268e-02, 7.647e-02, 4.717e-02, -1.557e-02, -5.494e-02, 1.315e-01, -6.446e-03)); + r += mul(s0_2, M4(-2.166e-01, -3.459e-01, 1.548e-01, -1.338e-01, -1.053e-02, 2.242e-01, -8.194e-02, 9.585e-02, -6.271e-02, 1.752e-01, -8.877e-03, 9.082e-02, 1.508e-02, -1.941e-02, 1.588e-02, 3.329e-02)); + r += mul(s0_3, M4(2.467e-01, 2.249e-01, 1.149e-02, -7.518e-02, 3.673e-03, -9.865e-02, 4.413e-02, 1.193e-01, 1.032e-01, 1.033e-01, -8.545e-02, 2.160e-01, -5.246e-02, -1.470e-01, 1.206e-02, -1.987e-02)); + r += mul(s0_4, M4(6.004e-02, -8.828e-02, -1.406e-01, -4.759e-03, 8.425e-02, 6.162e-02, -6.374e-02, -3.446e-02, -6.185e-02, 1.739e-02, -9.953e-02, 1.651e-01, -3.235e-02, -7.202e-02, 6.607e-02, 4.503e-02)); + r += mul(s0_5, M4(-8.011e-02, -1.428e-02, 8.263e-01, 1.851e-02, -1.110e-01, -1.280e-01, -6.627e-02, 2.972e-02, 5.936e-04, 1.779e-02, -1.123e-01, 4.516e-02, 5.523e-02, 1.052e-01, -2.980e-02, -1.024e-01)); + r += mul(s0_6, M4(-1.973e-01, -3.293e-01, 4.685e-02, -4.712e-02, 3.160e-02, -1.253e-01, 8.609e-03, -1.802e-01, 6.503e-02, -3.227e-02, 6.668e-02, -3.067e-02, -2.189e-01, 2.032e-01, -1.786e-02, -1.548e-01)); + r += mul(s0_7, M4(-7.875e-03, -2.037e-01, -2.293e-01, -1.999e-01, 1.738e-02, -1.747e-01, 7.696e-02, 1.080e-01, -1.258e-01, -1.728e-02, 5.457e-02, 2.354e-01, 1.137e-01, 8.128e-02, -8.535e-02, -1.716e-01)); + r += mul(s0_8, M4(-1.705e-01, 1.495e-01, 1.148e-01, -3.005e-02, -1.323e-01, 2.338e-01, 1.695e-02, 7.834e-02, -1.096e-01, -2.646e-02, -9.663e-02, 1.223e-02, 3.244e-01, 2.172e-01, -6.180e-02, -1.141e-01)); + r += mul(s1_0, M4(2.153e-01, 2.090e-01, 7.578e-03, 8.058e-02, -3.850e-02, -4.181e-02, 3.850e-02, -5.033e-02, -4.772e-02, 1.897e-01, 1.058e-02, 1.598e-01, -4.700e-02, 2.071e-01, 7.882e-02, 9.560e-02)); + r += mul(s1_1, M4(-1.755e-01, 5.417e-02, -1.480e-03, 6.746e-02, 1.306e-01, 2.608e-02, 1.655e-01, -2.009e-01, -6.070e-03, 1.411e-01, 1.808e-03, -1.645e-01, 2.538e-02, 6.212e-03, 1.568e-02, -1.292e-01)); + r += mul(s1_2, M4(4.012e-02, -9.499e-02, 6.889e-02, 1.433e-03, -1.366e-02, -1.862e-02, -3.913e-03, -6.391e-02, -1.184e-01, -8.437e-02, -7.939e-02, 7.770e-02, -1.310e-01, -5.947e-02, -2.605e-02, 2.056e-01)); + r += mul(s1_3, M4(-5.553e-02, -8.364e-02, -4.346e-02, -6.295e-02, -2.389e-01, -1.519e-01, 3.629e-02, -1.357e-01, 1.861e-01, 4.494e-02, -7.831e-02, -1.440e-01, 8.470e-02, 2.375e-01, -5.131e-02, 8.162e-02)); + r += mul(s1_4, M4(-1.542e-02, -2.632e-01, 5.705e-04, -8.803e-03, 3.299e-01, 2.544e-01, 6.760e-02, 1.361e-01, -1.028e-01, 1.969e-01, -5.706e-02, -3.219e-02, -1.354e-01, -9.802e-02, 5.579e-03, -1.029e-01)); + r += mul(s1_5, M4(8.236e-03, 4.757e-02, -1.364e-01, 8.383e-02, -5.845e-02, 1.069e-02, 9.025e-02, -1.442e-01, 2.297e-01, -9.455e-03, 1.582e-01, -1.652e-01, -1.464e-01, 2.857e-02, -7.026e-03, 1.508e-02)); + r += mul(s1_6, M4(-5.483e-02, 1.512e-01, -6.832e-02, 2.700e-02, 9.996e-02, 7.300e-02, -7.276e-02, 6.971e-04, 2.286e-01, 7.757e-03, 1.104e-01, -4.327e-02, 2.467e-01, -1.336e-01, -1.043e-01, -2.068e-01)); + r += mul(s1_7, M4(1.299e-02, -4.620e-02, 5.300e-02, -7.250e-02, -1.643e-01, 6.016e-02, 3.074e-02, -1.602e-03, -1.272e-01, -1.616e-01, 3.392e-02, -1.739e-01, -1.736e-01, -3.615e-01, 9.518e-02, 3.172e-02)); + r += mul(s1_8, M4(1.366e-01, 2.474e-02, 1.869e-02, -7.502e-02, -1.707e-01, 6.866e-03, 1.730e-02, -3.564e-01, -1.743e-02, 2.640e-01, -7.773e-02, -1.156e-01, -1.503e-01, 2.721e-01, -5.628e-02, 7.231e-02)); + r += mul(s2_0, M4(5.409e-02, 2.008e-01, -1.055e-02, 2.275e-02, 5.160e-02, -3.948e-02, 8.856e-03, 6.052e-02, 1.490e-01, 1.651e-02, -5.934e-02, 1.815e-01, 2.453e-01, 6.284e-03, -7.930e-02, 1.752e-02)); + r += mul(s2_1, M4(-1.884e-01, 2.381e-01, 6.626e-02, 4.132e-02, 1.137e-01, 5.828e-02, -1.162e-01, -1.047e-01, 1.733e-01, 9.714e-02, 1.684e-02, -4.726e-03, -4.425e-02, 1.308e-01, -4.460e-02, -5.757e-02)); + r += mul(s2_2, M4(-1.542e-01, -3.185e-02, 9.634e-02, -1.251e-01, 1.490e-02, -2.816e-02, -1.006e-02, 4.482e-02, 2.626e-01, 2.221e-01, -1.039e-01, 2.020e-01, 2.064e-01, 8.088e-02, 9.808e-02, -2.205e-02)); + r += mul(s2_3, M4(-1.850e-01, -1.351e-01, -2.917e-02, -8.903e-02, 9.033e-02, 1.841e-01, -8.272e-02, 5.601e-02, 1.014e-01, -2.128e-02, 2.091e-03, 2.212e-01, 7.023e-02, 1.933e-01, -2.272e-01, -3.150e-01)); + r += mul(s2_4, M4(-1.211e-01, -2.385e-01, -6.887e-02, -6.697e-02, 7.170e-02, 1.047e-01, -2.363e-02, -9.154e-02, 9.584e-02, -3.987e-02, -2.447e-01, 1.670e-01, 4.113e-02, -2.052e-01, -4.104e-02, -4.027e-02)); + r += mul(s2_5, M4(-1.085e-01, -8.478e-02, 2.455e-02, -1.240e-01, 5.816e-02, -2.644e-01, 1.125e-01, 2.009e-02, -1.308e-01, -1.138e-01, -4.613e-02, -1.862e-01, -9.501e-02, -9.699e-02, 3.395e-01, 5.730e-02)); + r += mul(s2_6, M4(6.815e-02, -2.550e-01, 2.668e-02, 3.038e-01, -5.298e-02, -2.368e-01, -2.784e-02, 5.199e-02, -4.197e-02, 1.275e-01, -2.058e-02, -5.542e-02, 2.150e-01, -7.770e-02, -8.895e-03, 7.471e-02)); + r += mul(s2_7, M4(-4.084e-01, -4.059e-02, 1.784e-02, 2.544e-01, -1.274e-01, -3.351e-01, 1.204e-01, -1.889e-01, -1.438e-01, 1.072e-01, 5.287e-02, -2.757e-01, -5.043e-02, 2.986e-02, -1.980e-01, 7.566e-02)); + r += mul(s2_8, M4(-6.861e-03, 2.390e-01, 1.255e-01, 5.195e-02, -3.367e-01, -1.951e-01, -5.925e-02, 2.316e-01, -2.390e-01, 2.294e-01, -7.012e-02, -5.888e-02, -1.562e-01, -2.314e-02, 5.731e-02, 7.487e-02)); + r += mul(s3_0, M4(-4.302e-02, 2.332e-03, -8.797e-02, -7.757e-03, 6.083e-02, 2.518e-03, -1.708e-02, 4.168e-02, -8.930e-02, -1.290e-01, -1.024e-01, -5.323e-02, -2.374e-01, -8.708e-02, -8.992e-02, 6.791e-02)); + r += mul(s3_1, M4(1.011e-01, 4.688e-02, -4.851e-02, 5.773e-02, -2.069e-02, 3.742e-02, -2.873e-02, 5.670e-02, -4.502e-02, -1.294e-01, 4.502e-03, -2.117e-02, 3.284e-01, 1.058e-01, 5.544e-02, 1.677e-02)); + r += mul(s3_2, M4(-3.529e-02, 3.277e-02, 4.281e-02, -1.646e-01, -1.416e-01, 5.331e-02, 3.031e-02, -9.500e-02, -1.216e-01, -9.459e-02, 3.225e-02, 1.333e-01, 6.765e-02, 1.066e-01, -1.228e-01, 8.375e-02)); + r += mul(s3_3, M4(2.865e-02, 6.776e-02, -1.468e-01, 1.705e-02, 1.683e-01, 2.070e-01, -5.935e-03, 2.404e-01, 5.563e-03, -1.044e-01, -4.681e-02, -6.475e-02, 1.726e-01, -8.206e-02, -8.217e-02, 1.699e-01)); + r += mul(s3_4, M4(-1.561e-01, 1.354e-01, 1.323e-01, -1.370e-01, -1.199e-01, -1.138e-01, 8.551e-03, 7.738e-02, 2.900e-01, 2.553e-01, 1.056e-02, -3.161e-02, -2.244e-01, -8.941e-02, -3.090e-02, 1.149e-01)); + r += mul(s3_5, M4(1.853e-01, -1.157e-01, 3.784e-03, -1.762e-02, -2.290e-01, 2.807e-02, 4.872e-02, -1.992e-01, -2.161e-01, 2.253e-01, -1.192e-01, 1.535e-01, -9.834e-02, 1.402e-02, -2.921e-02, 1.029e-01)); + r += mul(s3_6, M4(-6.681e-02, -2.734e-02, -7.752e-02, 1.202e-01, -6.007e-02, 1.529e-01, -1.549e-02, 3.140e-01, 5.553e-02, 5.046e-02, -3.036e-02, 1.311e-02, 1.555e-01, 1.706e-01, 1.118e-02, -9.797e-02)); + r += mul(s3_7, M4(-1.464e-01, 1.390e-01, -1.009e-01, 1.939e-01, 8.315e-02, 7.123e-02, -1.456e-01, -3.606e-02, -1.194e-01, 2.929e-02, 1.934e-01, -4.639e-02, 1.440e-01, -9.890e-02, 1.106e-01, -2.761e-01)); + r += mul(s3_8, M4(1.840e-01, -1.987e-01, 3.754e-02, 1.706e-01, 2.631e-02, -1.048e-01, -7.390e-02, -2.744e-01, 3.353e-01, -3.437e-01, -2.453e-02, -1.555e-02, 2.551e-02, -1.634e-01, -3.265e-02, -4.991e-02)); + r += mul(s4_0, M4(-1.614e-01, 3.217e-02, 2.040e-02, -5.201e-02, 1.162e-01, 7.774e-02, 5.804e-02, 5.019e-02, -1.326e-01, -8.524e-02, 4.765e-03, -3.015e-01, -3.097e-01, 6.249e-01, -4.764e-01, -1.097e-01)); + r += mul(s4_1, M4(2.482e-01, -7.808e-02, 4.798e-02, -2.799e-02, 3.943e-02, 3.450e-01, 8.065e-02, 2.050e-01, -5.876e-02, -1.702e-01, -1.998e-01, -4.602e-01, -6.030e-01, -7.888e-01, -1.939e-01, -7.929e-01)); + r += mul(s4_2, M4(5.042e-02, 3.316e-02, 1.540e-01, 7.098e-02, 7.762e-02, -1.345e-02, -6.667e-02, 7.275e-02, -2.205e-01, 5.857e-02, -4.728e-02, -3.452e-02, -2.423e-02, -2.893e-01, -5.288e-01, 4.894e-01)); + r += mul(s4_3, M4(2.116e-02, -6.328e-02, -3.709e-02, 5.392e-02, 6.060e-02, 2.983e-02, -2.665e-02, -2.008e-01, 1.694e-02, -3.005e-01, 1.903e-01, 2.144e-02, -2.694e-02, -1.083e+00, 2.387e-01, -2.326e-01)); + r += mul(s4_4, M4(-4.878e-02, -1.432e-01, 9.417e-02, 3.703e-02, -2.023e-01, -2.737e-02, 4.717e-02, -1.997e-01, 1.266e-01, -1.233e-02, -1.508e-01, -1.969e-02, 5.024e-01, -1.841e-01, -1.062e+00, -1.704e-01)); + r += mul(s4_5, M4(8.148e-02, -3.165e-02, -4.487e-02, -1.383e-02, 1.128e-01, -1.619e-01, -2.010e-02, 2.163e-02, -2.844e-02, -7.585e-02, -7.373e-02, 2.590e-02, 1.708e-01, 2.063e-01, -9.584e-01, 2.464e-01)); + r += mul(s4_6, M4(-2.254e-01, 5.652e-02, 8.183e-02, 1.288e-01, -4.934e-02, -2.113e-01, 2.622e-02, -1.164e-01, -3.864e-01, -2.177e-02, -3.944e-03, 1.932e-01, -4.718e-01, 1.481e-01, -3.146e-01, -7.798e-01)); + r += mul(s4_7, M4(1.397e-01, 7.781e-04, -7.422e-02, -8.363e-04, -3.861e-02, 2.361e-01, -5.971e-02, -5.968e-02, 4.388e-01, 6.492e-02, -2.985e-02, -2.851e-01, 4.776e-01, -2.561e-01, -3.868e-01, -7.250e-01)); + r += mul(s4_8, M4(8.290e-02, 1.899e-01, 8.968e-02, -8.672e-02, 1.264e-01, -1.086e-01, 3.075e-02, 1.026e-01, -3.738e-01, -1.710e-02, -1.849e-01, 3.125e-01, 1.557e-02, 1.924e-01, -5.640e-01, 4.823e-01)); + r += mul(s5_0, M4(-2.199e-01, -2.204e-01, -3.381e-02, -8.068e-02, -7.093e-02, 3.299e-02, -3.280e-02, -2.355e-02, 6.614e-02, -2.524e-02, -6.671e-03, -2.634e-02, -4.228e-02, 8.667e-02, -1.000e-02, 2.644e-02)); + r += mul(s5_1, M4(4.993e-02, 8.674e-02, 1.256e-02, 3.661e-02, -5.597e-02, -3.691e-02, 1.224e-01, -1.100e-03, -2.995e-02, -5.264e-02, 2.138e-02, -5.663e-02, 7.679e-02, -3.386e-02, 2.181e-02, -7.541e-02)); + r += mul(s5_2, M4(2.458e-02, -5.279e-01, 6.858e-02, -6.139e-02, 2.203e-01, 1.888e-01, 5.875e-02, 2.987e-02, 5.732e-02, 1.023e-01, -8.229e-03, 4.815e-02, 5.105e-02, -4.501e-05, 3.425e-02, 2.668e-02)); + r += mul(s5_3, M4(8.183e-02, -2.298e-02, -4.129e-02, 9.875e-02, 4.211e-02, -1.205e-01, 8.649e-02, -2.774e-02, 1.283e-01, -7.503e-02, 1.555e-01, -4.780e-02, -1.808e-01, -5.178e-02, 9.190e-02, -9.965e-02)); + r += mul(s5_4, M4(-4.966e-01, -7.886e-02, 5.324e-02, -1.069e-01, -1.466e-01, -1.293e-01, -1.252e-01, 5.233e-02, 1.714e-01, -5.468e-02, 9.061e-02, 5.902e-02, -6.234e-02, -6.380e-02, -7.879e-02, -1.139e-01)); + r += mul(s5_5, M4(-4.539e-01, -1.093e-01, -3.171e-01, 8.606e-02, 3.207e-02, -3.530e-01, -3.926e-02, 1.447e-01, -1.871e-01, 1.184e-01, 1.986e-01, -1.303e-01, 9.940e-02, -1.997e-02, -7.409e-02, 1.339e-01)); + r += mul(s5_6, M4(6.859e-02, 1.065e-01, -6.674e-02, -7.343e-02, -2.186e-02, -1.898e-02, 8.683e-02, -2.241e-01, -1.526e-01, -1.297e-01, 1.090e-01, 1.081e-01, -1.008e-01, -2.101e-02, -2.802e-02, -5.817e-03)); + r += mul(s5_7, M4(-2.935e-01, 2.552e-01, -1.772e-01, -3.008e-02, 2.061e-02, 1.602e-01, -1.067e-01, -1.088e-01, -9.616e-02, -4.000e-03, -1.740e-01, 1.931e-02, 1.135e-01, -1.029e-02, -2.060e-01, 1.138e-01)); + r += mul(s5_8, M4(2.994e-01, -1.938e-01, -3.579e-02, -3.688e-01, 1.932e-01, 7.019e-02, -5.929e-02, 5.967e-02, -6.230e-02, 2.406e-01, -1.282e-01, 4.323e-02, 5.835e-02, -9.893e-02, -2.381e-02, 1.695e-02)); + r += mul(s6_0, M4(-1.354e-01, -3.961e-02, 6.492e-02, -8.158e-02, -4.522e-02, -1.466e-01, 1.229e-01, 1.025e-01, -1.393e-01, 1.328e-01, 5.425e-02, -9.843e-02, -6.086e-02, -9.741e-02, 8.276e-02, 1.117e-01)); + r += mul(s6_1, M4(2.726e-01, -1.876e-02, -7.329e-02, 6.114e-03, 1.353e-01, 6.079e-02, -1.053e-01, 9.964e-03, 6.344e-02, 1.528e-01, -4.556e-02, -2.029e-01, -7.652e-02, -7.684e-02, 2.392e-02, -8.908e-02)); + r += mul(s6_2, M4(1.107e-01, 1.692e-01, 8.991e-02, 6.804e-03, 1.382e-01, 1.504e-01, 6.543e-02, 1.005e-02, 1.183e-01, -6.809e-02, -6.099e-03, 8.215e-02, 1.307e-01, -4.720e-02, -4.203e-02, 8.069e-02)); + r += mul(s6_3, M4(-2.380e-01, -7.450e-02, 9.634e-02, -1.225e-01, -1.447e-01, 1.746e-03, -6.290e-02, 9.689e-02, 5.720e-03, -5.844e-02, 2.926e-02, -1.172e-02, -2.900e-01, -5.345e-02, 1.743e-02, -1.469e-01)); + r += mul(s6_4, M4(1.879e-01, 2.363e-01, 5.324e-02, 5.700e-03, -1.848e-01, 4.805e-02, -2.434e-01, -1.537e-01, 1.913e-01, 1.772e-01, -1.126e-01, -1.782e-01, -2.319e-01, -2.431e-01, 2.707e-01, -9.524e-02)); + r += mul(s6_5, M4(-1.009e-01, 2.484e-01, -1.493e-02, -2.903e-02, -2.510e-01, -2.396e-01, 1.763e-02, -2.132e-02, 1.837e-01, 1.076e-01, -1.937e-01, 2.437e-01, -1.143e-01, -1.318e-01, 1.889e-01, -4.204e-02)); + r += mul(s6_6, M4(5.626e-02, 5.041e-03, -5.609e-02, -3.812e-02, 7.537e-02, 1.685e-02, 2.526e-02, 4.571e-02, -1.270e-01, 2.408e-02, 6.618e-02, -1.684e-01, 1.652e-01, -2.247e-02, 3.996e-02, 1.444e-01)); + r += mul(s6_7, M4(-4.078e-02, 4.302e-03, 2.513e-02, -1.440e-01, 2.091e-01, -1.047e-01, 7.912e-02, -5.481e-02, 1.110e-02, -8.656e-02, -1.186e-01, 5.608e-02, 3.072e-01, 3.693e-02, 5.291e-03, -1.331e-01)); + r += mul(s6_8, M4(2.316e-01, -1.662e-01, -3.324e-02, 4.843e-02, 9.420e-02, 1.201e-01, 2.672e-01, -1.255e-01, -8.873e-02, -1.695e-01, -7.646e-02, 2.508e-01, 1.432e-02, 1.685e-01, 4.257e-03, 1.293e-02)); + r += mul(s7_0, M4(-1.156e-01, 2.078e-02, 7.249e-02, -4.292e-02, -2.089e-01, -3.016e-02, -5.166e-02, -2.298e-02, 1.414e-01, 5.890e-02, 2.651e-02, -6.890e-03, 6.678e-02, 1.153e-01, -2.381e-01, -1.701e-01)); + r += mul(s7_1, M4(-8.216e-02, -1.252e-01, -9.205e-02, -2.337e-02, 6.869e-02, 6.252e-02, 2.805e-03, 2.468e-03, -1.468e-01, -2.906e-01, 1.110e-01, 1.351e-01, -3.282e-01, -2.939e-02, -4.672e-02, 5.490e-02)); + r += mul(s7_2, M4(-3.332e-02, -5.059e-02, -6.004e-03, -1.088e-01, -1.242e-01, -1.408e-01, 1.822e-01, 7.609e-02, 1.225e-01, 5.925e-02, 6.622e-02, 1.194e-01, -4.135e-02, 2.762e-02, 2.292e-02, 3.052e-02)); + r += mul(s7_3, M4(1.758e-02, 1.008e-01, -2.348e-02, 8.004e-02, 2.981e-02, -2.197e-01, 1.005e-01, -1.421e-01, 8.495e-02, -1.553e-01, -1.481e-02, 6.916e-02, 3.988e-02, 2.724e-01, 1.441e-01, 1.326e-01)); + r += mul(s7_4, M4(1.742e-02, -7.544e-02, -3.567e-02, 1.585e-01, 3.817e-01, -6.355e-01, 7.750e-02, 6.887e-02, -1.694e-01, -7.131e-02, -2.473e-02, 2.996e-01, 1.122e-01, 2.002e-01, -1.043e-01, 1.335e-01)); + r += mul(s7_5, M4(-1.301e-01, 1.746e-01, 4.739e-02, 1.297e-01, 1.039e-01, -1.531e-01, -1.170e-03, -2.399e-01, -1.084e-01, 2.192e-02, 1.281e-01, 4.687e-01, -4.872e-02, 3.747e-02, 1.742e-02, -7.458e-02)); + r += mul(s7_6, M4(1.210e-02, 2.003e-03, 5.535e-02, -5.994e-02, -2.231e-02, 1.939e-01, 1.085e-01, -2.014e-01, -4.221e-02, 5.293e-02, 6.648e-02, 1.168e-01, 2.498e-01, 2.621e-03, -7.611e-02, 3.010e-01)); + r += mul(s7_7, M4(-1.793e-02, -1.727e-02, -9.524e-02, 1.842e-01, 8.004e-02, 6.735e-02, 2.665e-01, 6.073e-02, 1.393e-01, 2.396e-03, 7.789e-02, -6.060e-02, 6.975e-02, -2.149e-01, -6.495e-02, 2.075e-01)); + r += mul(s7_8, M4(1.462e-01, -1.483e-01, 5.319e-02, 1.039e-01, -3.684e-01, 5.993e-01, 4.945e-02, 2.637e-02, 1.762e-01, -2.052e-01, 1.416e-02, 3.124e-02, 4.168e-02, 3.415e-02, 2.557e-02, -1.337e-01)); + r += V4(-1.099e-03, -1.032e-02, 3.082e-02, 1.035e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.907e-02, -1.632e-01, 2.621e-01, 4.495e-01, 2.450e-01, -7.969e-02, -9.695e-02, -5.663e-03, 3.399e-01, -1.211e-01, 2.122e-02, 1.645e-01, -2.607e-02, 3.828e-02, 4.675e-02, 7.656e-02)); + r += mul(s0_1, M4(-1.433e-01, 4.079e-02, -3.504e-02, 8.135e-03, -8.809e-02, 5.904e-02, -1.062e-01, -1.113e-01, 1.678e-01, 1.022e-01, 1.594e-02, 1.814e-02, -4.683e-02, -5.523e-02, -5.405e-04, 2.947e-03)); + r += mul(s0_2, M4(4.109e-01, 5.408e-01, -3.117e-02, -1.181e-01, 1.400e-02, 2.948e-02, 3.592e-02, -5.692e-02, -2.324e-02, 1.214e-01, 7.887e-02, 5.319e-02, -2.901e-02, -3.335e-02, 2.581e-02, -3.054e-02)); + r += mul(s0_3, M4(7.100e-02, 1.275e-01, -1.635e-01, 8.014e-02, -1.194e-01, 1.243e-01, -1.275e-01, 7.479e-02, 7.746e-02, 7.278e-02, -1.005e-01, 1.226e-01, 2.339e-01, -5.814e-02, 6.769e-03, 2.113e-01)); + r += mul(s0_4, M4(-1.694e-01, -4.253e-01, -8.214e-02, 4.835e-01, -2.095e-01, -6.991e-02, 2.059e-01, 3.859e-02, 7.947e-02, -3.136e-01, -2.686e-01, 2.411e-02, -1.857e-02, 1.003e-01, 1.938e-03, -7.732e-02)); + r += mul(s0_5, M4(-1.707e-01, 2.349e-02, -2.010e-01, -9.759e-02, -5.680e-02, 1.641e-02, 6.537e-02, 5.386e-02, -7.561e-02, 1.374e-01, 4.763e-02, 4.752e-02, -1.764e-02, 4.048e-02, -9.938e-02, -8.344e-02)); + r += mul(s0_6, M4(2.419e-01, -7.465e-01, 1.612e-01, -7.485e-02, -5.591e-03, -1.207e-01, 3.387e-02, -4.188e-02, -9.178e-02, -8.270e-02, -7.268e-02, 1.242e-01, 2.741e-01, 1.636e-01, -5.916e-02, 1.017e-01)); + r += mul(s0_7, M4(-1.365e-02, 1.412e-01, -2.887e-01, -7.540e-02, 7.596e-02, 4.870e-02, 3.877e-02, 6.815e-02, -1.386e-01, -2.262e-02, -2.122e-02, 7.128e-02, -1.927e-01, 3.095e-02, -1.590e-01, 1.062e-01)); + r += mul(s0_8, M4(1.916e-01, -2.455e-01, 7.014e-02, -1.270e-02, 5.769e-02, -2.310e-02, 7.511e-02, 7.148e-02, -4.717e-02, 5.066e-02, 1.695e-01, 7.793e-02, 1.294e-01, 1.100e-01, -1.557e-01, -4.429e-02)); + r += mul(s1_0, M4(1.231e-01, 1.075e-02, 5.984e-02, 1.041e-02, -3.679e-01, 7.167e-03, -3.533e-02, -1.685e-01, 8.681e-02, 7.785e-02, -2.864e-02, 1.379e-01, -2.166e-01, -1.711e-01, -7.297e-02, 5.376e-02)); + r += mul(s1_1, M4(-8.928e-02, 1.137e-02, 1.586e-02, 6.383e-02, 9.266e-03, 1.548e-01, -1.330e-01, -6.228e-02, 5.766e-02, 1.847e-01, -3.778e-02, 2.073e-01, 1.731e-01, 1.187e-01, 9.969e-02, -2.130e-01)); + r += mul(s1_2, M4(-3.809e-02, 7.181e-02, -1.175e-01, 7.879e-03, -5.394e-02, -3.892e-02, 7.642e-02, -1.793e-01, -5.986e-02, 4.414e-02, 4.186e-03, -4.312e-02, -1.446e-02, -2.616e-02, -9.201e-02, 6.201e-02)); + r += mul(s1_3, M4(-1.076e-02, 1.282e-01, -4.642e-02, 8.175e-02, 1.753e-01, 3.502e-02, -2.682e-01, 4.726e-03, 3.721e-01, -1.364e-02, -2.574e-02, 2.671e-01, -3.350e-01, -2.237e-01, -3.944e-02, -1.978e-01)); + r += mul(s1_4, M4(-7.233e-02, -1.170e-01, 9.592e-02, 1.430e-01, 1.253e-01, 1.406e-01, -2.460e-01, 2.717e-01, 4.557e-01, 9.758e-03, -9.454e-02, -8.419e-02, 7.404e-02, 9.134e-02, -7.464e-04, -1.799e-01)); + r += mul(s1_5, M4(-4.020e-02, -5.117e-02, -3.810e-02, -5.862e-03, 1.119e-01, -3.387e-04, 5.410e-02, 1.892e-01, -2.735e-02, -2.799e-02, -1.642e-01, -4.453e-02, 3.855e-02, 2.473e-01, 1.063e-02, 5.474e-02)); + r += mul(s1_6, M4(-1.402e-02, -1.952e-01, -9.704e-03, -2.762e-04, -3.107e-02, 6.896e-03, 1.085e-01, -6.318e-02, 8.645e-03, 9.242e-02, 8.004e-02, 1.478e-01, -4.364e-01, -4.608e-02, -2.816e-02, -4.576e-02)); + r += mul(s1_7, M4(1.324e-02, 1.314e-01, -2.591e-02, -1.382e-01, -9.404e-02, 1.415e-01, 6.301e-02, 2.456e-01, 1.937e-01, 1.006e-01, -2.163e-01, 2.670e-02, 4.946e-01, -8.813e-03, 3.136e-01, -4.913e-03)); + r += mul(s1_8, M4(3.963e-02, -3.145e-02, 8.015e-02, 8.454e-02, 7.263e-02, 1.905e-02, -2.074e-02, 1.540e-01, 3.261e-02, -1.254e-02, -3.647e-02, -1.329e-03, -5.681e-02, 8.675e-02, 2.288e-01, 9.746e-02)); + r += mul(s2_0, M4(-2.334e-01, -2.600e-01, -2.643e-02, -1.333e-01, -1.530e-01, -1.996e-03, -4.574e-02, -5.363e-02, 9.298e-02, 6.021e-02, 3.740e-02, -5.791e-03, 2.563e-01, -7.183e-02, 1.385e-01, 2.428e-01)); + r += mul(s2_1, M4(-1.647e-01, -1.176e-01, -7.941e-02, -8.957e-03, 4.862e-02, 1.564e-01, 2.620e-02, -1.008e-01, -6.024e-02, -2.132e-01, -3.268e-03, -1.069e-01, 1.208e-01, 8.403e-02, 1.108e-01, 1.906e-01)); + r += mul(s2_2, M4(-2.604e-02, 7.566e-04, -1.394e-01, 3.329e-03, 4.651e-03, 7.781e-02, -8.141e-02, 3.367e-02, -1.990e-01, -2.031e-01, 1.658e-02, -1.671e-02, 3.172e-02, 7.532e-02, -7.900e-02, -1.872e-01)); + r += mul(s2_3, M4(-3.348e-01, -8.474e-02, -2.502e-02, -4.599e-03, -1.662e-01, -6.901e-02, -5.174e-02, 1.332e-01, -3.291e-02, 7.699e-02, 1.718e-01, 6.653e-02, 3.260e-01, 1.176e-01, 1.030e-01, 8.394e-02)); + r += mul(s2_4, M4(-1.590e-01, 5.059e-02, -1.914e-01, 3.241e-01, -1.179e-01, -6.522e-02, -1.250e-01, -2.417e-01, -1.997e-01, 5.494e-02, 1.203e-01, 3.542e-01, -2.391e-01, 1.825e-01, -1.598e-01, -4.109e-02)); + r += mul(s2_5, M4(8.463e-02, 2.909e-02, 5.116e-02, 5.316e-02, 4.028e-02, 3.305e-02, -4.200e-02, 3.466e-02, -1.374e-01, 4.134e-02, 1.360e-01, -2.347e-02, -8.626e-02, 8.375e-03, -8.745e-02, -1.782e-01)); + r += mul(s2_6, M4(-4.684e-02, -2.580e-02, 6.509e-02, -3.403e-02, -3.545e-01, -9.629e-02, 1.430e-01, -1.666e-02, -1.962e-02, 1.495e-01, 5.942e-02, 5.085e-03, -5.955e-02, 1.693e-01, -3.783e-02, -2.697e-02)); + r += mul(s2_7, M4(8.830e-02, -1.563e-01, 1.951e-01, -8.618e-02, 2.719e-01, -1.812e-01, 3.895e-01, -1.152e-01, 4.660e-02, 1.743e-01, -5.543e-03, -4.107e-02, -1.726e-01, 4.877e-02, 9.327e-02, -2.110e-02)); + r += mul(s2_8, M4(2.213e-01, -7.418e-02, 3.567e-02, 5.040e-02, 3.594e-02, -3.393e-02, 5.708e-02, -5.244e-02, 1.290e-01, -4.411e-02, -1.165e-01, 7.365e-02, -4.063e-02, 2.345e-01, -2.177e-01, -9.990e-03)); + r += mul(s3_0, M4(-1.619e-01, -7.468e-02, 9.396e-03, 9.876e-03, 1.080e-01, 6.918e-02, 3.217e-02, 2.366e-02, 1.652e-01, 1.072e-01, -2.474e-02, 1.887e-02, -1.703e-01, 8.238e-02, -7.572e-02, -5.797e-03)); + r += mul(s3_1, M4(1.951e-01, 1.886e-01, 2.757e-02, 5.847e-02, -5.980e-02, -4.676e-02, -3.278e-02, 8.578e-02, -2.480e-01, -5.365e-02, -4.106e-02, -1.306e-01, 9.232e-02, -1.016e-01, 5.433e-02, -1.421e-01)); + r += mul(s3_2, M4(4.314e-02, 6.689e-02, -1.937e-01, 1.043e-01, 2.926e-02, 2.950e-02, 4.578e-02, -2.616e-02, 4.214e-02, -7.447e-02, 6.302e-02, -6.889e-02, 7.305e-03, -7.965e-02, 6.470e-02, 2.100e-02)); + r += mul(s3_3, M4(7.317e-02, 9.556e-02, 3.596e-02, -5.933e-02, 3.440e-01, 7.869e-02, 2.328e-02, 1.309e-02, 1.867e-01, 4.689e-02, -3.954e-02, -2.069e-01, -5.222e-02, -7.956e-02, 1.548e-01, 7.688e-02)); + r += mul(s3_4, M4(7.289e-02, 5.660e-03, 5.053e-02, -4.467e-02, -2.159e-01, 4.814e-02, -7.589e-02, -6.821e-02, 7.689e-02, 4.093e-02, 1.302e-02, 2.377e-01, 1.480e-01, 1.310e-01, -4.508e-02, -2.226e-01)); + r += mul(s3_5, M4(-9.152e-02, -7.684e-02, -2.148e-01, 9.026e-02, 9.941e-03, 6.244e-03, 1.108e-01, 1.261e-02, 5.612e-02, 1.847e-01, -2.409e-01, 3.319e-01, -6.235e-02, -7.327e-02, 7.389e-02, 4.743e-02)); + r += mul(s3_6, M4(2.987e-02, -1.183e-01, -6.748e-02, -2.711e-02, 2.086e-01, -6.959e-02, 1.568e-01, -5.467e-02, 5.666e-02, -1.374e-02, 1.058e-01, -1.353e-02, 4.040e-02, -4.324e-02, -1.479e-02, 1.396e-01)); + r += mul(s3_7, M4(-2.289e-01, 2.539e-01, -9.910e-02, 8.551e-02, -2.529e-01, 1.199e-01, 5.722e-01, 2.933e-01, -1.577e-01, -1.241e-01, 1.151e-01, 7.669e-02, -3.677e-02, 2.457e-02, 7.805e-02, 1.246e-01)); + r += mul(s3_8, M4(-2.348e-01, -7.462e-02, 3.307e-02, -1.614e-01, 1.204e-01, -1.886e-01, 6.438e-02, -1.010e-01, 4.648e-02, 1.065e-01, -5.959e-02, -4.188e-02, 2.160e-02, -7.793e-02, -7.079e-02, -1.110e-01)); + r += mul(s4_0, M4(-9.089e-02, -1.331e-02, 3.064e-02, -4.192e-02, 2.900e-01, 5.748e-02, 7.296e-02, 3.985e-02, 2.369e-01, -2.515e-01, -2.009e-01, -8.283e-02, -2.083e-01, 2.022e-01, 9.404e-02, 1.635e-01)); + r += mul(s4_1, M4(6.761e-02, 1.662e-01, -5.494e-02, 3.434e-02, -4.001e-02, -4.373e-02, 1.369e-01, -1.741e-01, 1.579e-01, -7.934e-03, 1.573e-02, -1.471e-01, -9.026e-01, -2.991e-01, 3.170e-02, -2.082e-01)); + r += mul(s4_2, M4(1.668e-01, 1.150e-01, 3.365e-02, 4.915e-02, 1.548e-01, -9.861e-02, -1.739e-03, -1.006e-01, 2.509e-01, 4.215e-02, -1.952e-01, -2.514e-01, 2.461e-01, -7.303e-01, 2.846e-01, -5.022e-02)); + r += mul(s4_3, M4(-1.248e-01, 1.479e-01, 5.223e-02, -7.043e-03, 1.892e-01, 9.435e-02, -9.993e-02, 2.667e-01, -1.804e-02, -1.080e-01, -1.120e-01, 1.346e-02, 1.712e-01, 5.516e-01, 1.766e-01, -3.375e-01)); + r += mul(s4_4, M4(2.485e-01, -2.547e-02, -1.379e-02, 1.010e-01, 2.497e-02, 5.752e-02, -3.676e-02, -1.949e-01, 2.648e-01, -2.184e-01, 1.004e-01, 3.635e-01, 2.710e-01, 4.792e-02, 1.356e+00, 7.392e-01)); + r += mul(s4_5, M4(-1.044e-01, -1.185e-01, -5.964e-02, 1.050e-01, -2.880e-01, -2.760e-01, -2.040e-02, -6.421e-02, -2.995e-01, 4.306e-01, -1.035e-01, 8.934e-03, -9.378e-02, -6.214e-01, -5.966e-02, -4.196e-01)); + r += mul(s4_6, M4(-6.191e-03, -1.208e-02, 1.564e-01, -1.394e-02, 1.507e-03, -8.000e-02, -8.325e-02, 9.059e-02, 2.129e-01, 2.970e-01, 1.032e-01, -9.473e-02, 6.199e-02, 4.550e-01, 1.335e-01, -1.861e-01)); + r += mul(s4_7, M4(-1.637e-01, -1.532e-01, 3.509e-02, -7.038e-02, -6.394e-02, -3.487e-01, -1.210e-01, 1.353e-01, -2.038e-01, -1.799e-01, -1.739e-01, 2.214e-02, -2.637e-01, -1.615e-01, -7.865e-01, 3.009e-01)); + r += mul(s4_8, M4(1.204e-02, -8.630e-02, 4.670e-02, 9.570e-03, -1.212e-01, -3.283e-02, 2.147e-02, -6.195e-02, -7.173e-02, 1.658e-01, 1.884e-01, -6.430e-02, -3.136e-01, -2.145e-01, -5.900e-01, -1.781e-01)); + r += mul(s5_0, M4(-1.602e-01, 7.305e-03, 1.013e-01, -4.396e-02, 2.237e-01, 9.667e-02, -2.031e-02, 6.526e-02, -6.753e-02, -2.687e-02, -2.583e-02, -4.206e-02, -7.052e-02, 5.000e-02, 3.210e-02, -2.945e-02)); + r += mul(s5_1, M4(-8.484e-02, -1.646e-01, -2.160e-01, 2.718e-02, -1.412e-01, 3.326e-03, -8.358e-02, 3.792e-02, -5.224e-02, -2.208e-02, 5.928e-02, -1.132e-01, 1.771e-01, -2.547e-02, 9.220e-02, -2.003e-02)); + r += mul(s5_2, M4(1.226e-01, -1.186e-01, -3.271e-02, 1.741e-01, -1.654e-01, 5.303e-02, 9.859e-02, -1.076e-01, -1.267e-01, -2.056e-02, 1.518e-02, -3.594e-02, 1.399e-01, 5.955e-02, 2.196e-02, 3.942e-03)); + r += mul(s5_3, M4(2.204e-01, -4.739e-02, 1.469e-02, 1.821e-01, -3.276e-02, 4.900e-02, -9.718e-02, 2.035e-01, 2.070e-02, 8.194e-02, -4.337e-02, -5.462e-02, -1.732e-01, -1.340e-01, 1.204e-01, 5.843e-02)); + r += mul(s5_4, M4(1.484e-01, 4.451e-01, 1.947e-02, -5.255e-03, -1.856e-01, -5.393e-02, 6.210e-03, -1.717e-01, 1.926e-02, -9.610e-02, -7.933e-02, 9.830e-02, -8.662e-02, -9.962e-03, 5.713e-02, 4.462e-02)); + r += mul(s5_5, M4(-1.955e-01, -5.243e-01, -6.282e-03, 1.982e-01, -1.204e-01, 7.155e-02, -1.225e-02, -1.338e-01, -3.579e-02, 1.574e-01, 7.702e-02, 3.390e-03, -6.295e-02, 3.460e-02, -1.409e-01, -2.541e-02)); + r += mul(s5_6, M4(-8.898e-02, 2.620e-01, 1.795e-01, -2.550e-02, -4.822e-02, 1.556e-01, -3.864e-02, -3.345e-02, 3.608e-02, -6.921e-02, 3.265e-02, 5.650e-02, -1.416e-02, 5.370e-02, -3.530e-02, -8.904e-03)); + r += mul(s5_7, M4(-8.129e-03, -1.428e-01, 2.235e-02, -8.305e-03, 1.021e-02, 3.031e-02, 5.251e-02, -6.861e-02, 6.833e-02, 5.761e-02, -5.271e-02, -4.588e-03, 1.608e-02, -1.631e-01, -5.513e-02, 6.018e-02)); + r += mul(s5_8, M4(-1.300e-01, -2.935e-01, -8.674e-02, 1.337e-01, -8.421e-02, 1.442e-01, -1.998e-01, 6.850e-03, 1.218e-01, -2.039e-01, 5.682e-02, -1.322e-02, -7.433e-04, -8.572e-03, 2.511e-02, -4.253e-02)); + r += mul(s6_0, M4(4.771e-01, -1.221e-02, -6.044e-02, 2.828e-02, -3.951e-02, -3.085e-02, -2.765e-02, 1.833e-02, -2.159e-01, -4.498e-02, -1.684e-02, 1.719e-02, 1.547e-01, 2.494e-02, 8.313e-04, 2.347e-01)); + r += mul(s6_1, M4(3.691e-01, -9.233e-02, 7.315e-02, -4.366e-02, 1.347e-01, 1.336e-01, -1.657e-03, 1.456e-02, -9.590e-02, -3.634e-02, 6.165e-02, 5.148e-02, 1.741e-01, -4.495e-02, 7.363e-02, -3.365e-02)); + r += mul(s6_2, M4(4.099e-02, 7.470e-02, 5.907e-02, -2.502e-02, 1.264e-01, -9.254e-02, -1.626e-02, 4.827e-02, -6.567e-02, -7.848e-02, -7.057e-02, 1.220e-01, 1.094e-01, 5.916e-02, -3.141e-02, -6.242e-03)); + r += mul(s6_3, M4(1.557e-01, -2.321e-01, -3.681e-02, 1.202e-01, -1.754e-01, 6.722e-03, 5.256e-02, -1.179e-01, 2.371e-01, 3.811e-02, -4.509e-02, -5.125e-03, -2.686e-01, 1.801e-01, -7.930e-02, 2.836e-01)); + r += mul(s6_4, M4(4.213e-01, 3.607e-02, -3.110e-02, 1.138e-01, -1.559e-01, -2.379e-01, 6.866e-02, -8.668e-02, 2.498e-01, 4.497e-02, 3.912e-01, 3.999e-02, 2.063e-01, -4.619e-02, 1.196e-01, -3.082e-02)); + r += mul(s6_5, M4(1.284e-01, 3.436e-03, -4.267e-02, 3.899e-02, 8.482e-02, -3.773e-02, 3.275e-03, -3.904e-02, -1.216e-01, -7.397e-02, -4.438e-01, -2.248e-02, -1.356e-01, -7.888e-02, 2.795e-02, -1.139e-02)); + r += mul(s6_6, M4(1.131e-01, -2.566e-02, 4.791e-03, -1.055e-01, -9.828e-02, 5.326e-02, -5.499e-02, -3.927e-02, 1.275e-01, 6.263e-02, -4.156e-02, 9.745e-02, 1.533e-01, -3.617e-02, -6.586e-02, 3.466e-02)); + r += mul(s6_7, M4(8.247e-02, -7.658e-02, 1.527e-01, 3.525e-02, 8.408e-02, -9.757e-02, -2.895e-02, 5.078e-02, -2.114e-01, -1.057e-01, -1.833e-01, -1.000e-01, 2.378e-01, 2.511e-01, -1.157e-01, -4.118e-02)); + r += mul(s6_8, M4(1.356e-02, -8.041e-02, 4.849e-02, -1.023e-01, 7.375e-03, 6.931e-02, -2.521e-02, 1.345e-01, -1.532e-01, -1.823e-02, -4.957e-02, -8.803e-02, 1.450e-01, 7.583e-02, -3.735e-03, -8.479e-02)); + r += mul(s7_0, M4(-1.693e-02, -4.398e-03, -5.467e-02, 1.243e-01, -3.660e-02, 1.610e-01, 1.077e-01, 1.909e-01, 8.232e-03, -1.508e-01, 1.603e-02, -8.618e-02, 3.586e-02, 1.234e-01, 7.491e-02, -1.224e-01)); + r += mul(s7_1, M4(-2.283e-01, 2.992e-01, -1.198e-01, -4.329e-03, 1.752e-01, -1.657e-01, 3.741e-02, -1.488e-01, 1.842e-01, -2.037e-01, 3.463e-02, -6.749e-02, -1.872e-01, -1.033e-01, -7.260e-02, 1.512e-01)); + r += mul(s7_2, M4(1.502e-02, -1.062e-01, -9.573e-03, -4.199e-02, -1.554e-01, -4.714e-02, 9.064e-02, 1.631e-02, 1.476e-01, -1.704e-01, -3.367e-02, 7.365e-02, 5.617e-02, 5.865e-02, -1.278e-02, -4.977e-02)); + r += mul(s7_3, M4(-1.026e-01, -1.229e-01, 4.286e-02, 1.130e-01, -2.324e-02, -4.465e-02, 4.590e-02, 6.720e-02, 8.055e-02, 2.163e-02, -4.712e-02, -1.149e-01, 1.638e-01, 5.185e-02, -6.191e-02, -2.353e-02)); + r += mul(s7_4, M4(-8.012e-02, 2.777e-01, -8.581e-02, 9.079e-03, -6.133e-03, 9.574e-02, -6.214e-02, -2.644e-01, 5.576e-02, 9.414e-02, 4.962e-02, 3.748e-01, -1.098e-01, -2.278e-01, -1.171e-01, 6.858e-02)); + r += mul(s7_5, M4(-2.794e-01, -1.326e-01, -6.401e-02, 2.166e-01, -9.729e-03, 1.545e-02, 1.940e-03, -9.707e-02, 3.446e-02, -2.200e-01, 1.705e-01, 5.518e-02, 1.778e-02, 5.802e-02, 7.104e-02, 8.698e-02)); + r += mul(s7_6, M4(-2.421e-01, 3.375e-02, -8.096e-02, 5.195e-02, -1.542e-02, -2.469e-02, 2.463e-02, -3.006e-02, 1.313e-01, -1.252e-02, -8.934e-02, 6.374e-02, -2.305e-01, 9.282e-02, -1.796e-01, 1.911e-02)); + r += mul(s7_7, M4(-1.725e-01, 1.027e-01, -7.768e-02, -5.955e-02, -3.257e-02, -2.853e-02, -3.702e-02, 2.073e-01, -3.216e-02, -3.304e-02, 5.860e-02, 1.499e-01, -5.883e-02, -1.523e-02, 3.860e-01, -9.647e-03)); + r += mul(s7_8, M4(-2.646e-02, 7.739e-02, 1.050e-01, -9.308e-02, 1.479e-01, -8.549e-02, 1.245e-01, -1.132e-01, -2.107e-01, 1.525e-02, -7.752e-02, 1.466e-01, -2.482e-02, 6.425e-02, -6.532e-02, -2.433e-02)); + r += V4(-1.497e-02, -1.151e-02, 1.197e-02, 4.981e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.909e-01, -9.392e-02, -1.340e-01, -2.449e-01, 5.722e-02, 8.879e-02, 7.768e-02, -2.801e-02, 3.341e-02, 1.701e-01, -9.158e-02, 7.848e-02, -3.646e-02, 3.091e-02, 5.503e-02, -6.088e-02)); + r += mul(s0_1, M4(-3.235e-01, -1.921e-01, -7.293e-02, 2.558e-02, 5.880e-02, -1.773e-02, -4.682e-02, 5.679e-02, 1.830e-01, -9.583e-03, 7.674e-02, 2.682e-01, -2.694e-02, -4.065e-02, -6.315e-03, 4.434e-02)); + r += mul(s0_2, M4(-5.880e-02, -1.520e-02, -1.367e-01, -4.294e-02, 3.502e-02, 5.052e-02, 5.428e-02, -6.451e-02, 2.027e-01, -5.946e-03, 4.097e-02, -4.362e-04, -3.192e-03, -5.913e-03, 1.121e-02, 5.335e-02)); + r += mul(s0_3, M4(-2.988e-01, -1.461e-01, -1.896e-01, 1.015e-01, -9.158e-03, 9.632e-02, 1.427e-02, -1.338e-02, -1.538e-01, 1.926e-01, 1.612e-01, -1.565e-01, 1.402e-01, -1.831e-01, -1.946e-02, -1.303e-01)); + r += mul(s0_4, M4(-1.630e-01, -4.281e-01, -2.889e-01, -5.586e-01, -5.453e-03, 4.795e-02, -9.513e-03, 6.337e-02, 1.790e-02, -6.090e-02, 1.318e-01, -2.561e-01, -3.402e-02, 1.715e-01, 3.424e-01, -3.754e-02)); + r += mul(s0_5, M4(-1.246e-02, 2.287e-01, 1.475e-02, -1.082e-01, -1.570e-02, -1.497e-01, 6.988e-02, -2.614e-04, 8.724e-02, -2.052e-02, 1.028e-01, -1.492e-01, -1.553e-01, -9.396e-02, -2.078e-01, 4.730e-02)); + r += mul(s0_6, M4(-2.498e-01, -1.893e-01, 6.595e-02, -1.457e-01, -3.905e-02, -8.088e-02, 8.604e-02, -3.156e-02, 1.087e-01, 6.363e-02, -1.002e-01, -1.236e-01, 1.007e-01, -1.389e-01, -3.014e-01, 3.087e-03)); + r += mul(s0_7, M4(-1.523e-01, -6.811e-02, -1.732e-01, -1.112e-01, 8.779e-03, -2.401e-02, -2.397e-01, -9.294e-02, 2.584e-01, -4.145e-02, 1.146e-01, -1.261e-02, -8.641e-02, -3.330e-02, 3.006e-01, 2.466e-01)); + r += mul(s0_8, M4(2.154e-01, 7.314e-02, 5.351e-01, -2.390e-01, 1.053e-01, -5.376e-02, 8.874e-02, 2.467e-02, 1.434e-01, 1.391e-02, -1.465e-01, -3.703e-02, 4.802e-03, 6.735e-02, -1.079e-01, 1.807e-02)); + r += mul(s1_0, M4(4.027e-02, 4.182e-03, -3.632e-03, 2.185e-03, 1.107e-01, 7.939e-02, 8.598e-02, -3.633e-03, -1.802e-01, 1.115e-01, 1.924e-01, -2.745e-02, -2.830e-02, 3.712e-02, 3.799e-02, 6.096e-02)); + r += mul(s1_1, M4(1.654e-01, 7.838e-02, -7.628e-02, 1.194e-02, 7.754e-02, -6.591e-02, -5.081e-02, -2.008e-01, -2.290e-01, 1.068e-01, 9.374e-02, 8.226e-02, 9.247e-02, 8.664e-02, -2.329e-01, -1.179e-01)); + r += mul(s1_2, M4(-7.976e-02, -1.166e-02, 6.066e-02, -1.187e-02, 1.831e-02, 2.978e-02, -1.625e-01, -6.556e-02, 1.551e-02, -8.357e-03, -1.501e-02, 1.152e-01, 1.086e-01, 4.976e-02, -5.822e-02, 1.128e-01)); + r += mul(s1_3, M4(-7.695e-02, 5.394e-02, 2.071e-02, -2.995e-02, 1.951e-01, 3.876e-01, -6.261e-01, 1.248e-01, -3.093e-01, -1.460e-01, -9.240e-02, -1.225e-01, -2.002e-01, 7.938e-02, 2.530e-01, -4.350e-02)); + r += mul(s1_4, M4(1.746e-02, 1.007e-01, 1.308e-01, -5.175e-02, 2.391e-02, 2.728e-01, 5.621e-02, 2.392e-01, -2.932e-01, 2.731e-01, 1.057e-01, -1.151e-01, -9.188e-02, -5.725e-03, -1.019e-01, -3.376e-01)); + r += mul(s1_5, M4(-4.040e-02, -3.587e-02, 5.088e-03, 1.021e-01, 3.585e-02, -1.990e-02, -1.224e-01, -8.409e-04, -8.527e-02, -1.184e-01, -4.145e-02, 1.970e-01, 3.514e-02, 2.597e-02, 3.056e-01, 1.344e-01)); + r += mul(s1_6, M4(8.428e-03, 6.969e-02, -1.480e-01, -2.313e-02, -1.710e-01, 3.692e-03, -1.741e-01, 5.840e-02, -2.906e-01, -3.011e-02, -3.556e-01, 3.636e-02, -3.008e-02, -1.295e-01, 2.098e-01, 1.202e-02)); + r += mul(s1_7, M4(-5.380e-02, -5.372e-04, -1.522e-01, 4.360e-02, -1.928e-02, 9.617e-02, 9.124e-02, -2.028e-02, -8.165e-02, 1.276e-01, -2.829e-02, -2.586e-02, -3.088e-01, 4.260e-02, -5.864e-01, -1.700e-01)); + r += mul(s1_8, M4(1.006e-01, -5.047e-02, 2.980e-02, -9.510e-02, 4.533e-02, 1.175e-01, 1.323e-01, -1.472e-01, -7.008e-02, -1.289e-02, -1.284e-01, -2.242e-02, 2.611e-01, -7.120e-02, 2.681e-01, -9.496e-02)); + r += mul(s2_0, M4(-2.248e-02, -2.052e-01, 1.088e-01, -4.810e-04, 9.921e-02, -5.395e-02, -4.588e-02, 3.818e-02, 9.605e-02, -1.101e-01, -8.049e-03, -2.017e-02, 1.018e-01, 3.519e-02, -1.687e-01, -5.327e-02)); + r += mul(s2_1, M4(-9.805e-02, -8.320e-02, 4.706e-02, -2.388e-01, 1.018e-01, -1.821e-02, 7.596e-02, -8.032e-02, -1.246e-01, 5.725e-02, -9.168e-02, 6.510e-02, 1.830e-01, -8.114e-02, 3.057e-01, -1.710e-01)); + r += mul(s2_2, M4(-1.154e-01, -2.258e-02, -8.471e-02, -7.985e-02, 1.000e-01, 1.283e-02, -7.777e-02, -7.797e-02, 5.683e-02, 9.853e-02, -1.674e-02, -1.913e-01, -8.108e-02, -1.205e-01, -1.177e-01, 5.743e-02)); + r += mul(s2_3, M4(3.045e-02, 8.145e-02, 1.612e-01, -1.759e-02, -4.420e-02, -2.996e-03, -8.212e-04, -2.170e-02, 8.253e-03, -1.066e-02, -3.993e-02, -7.132e-02, 9.062e-02, 3.725e-01, 1.848e-01, -1.038e-01)); + r += mul(s2_4, M4(5.332e-01, -1.182e-01, 6.704e-02, 1.766e-01, 1.094e-01, -1.092e-01, -2.973e-01, -2.605e-01, -2.007e-01, 2.994e-02, -2.023e-02, 1.198e-01, 8.121e-03, -5.029e-02, -1.861e-01, 9.399e-02)); + r += mul(s2_5, M4(6.238e-02, 6.043e-02, -1.120e-01, 2.542e-01, 1.376e-01, 7.607e-02, 3.749e-02, -1.192e-01, -5.898e-02, -2.174e-02, -1.773e-01, 8.908e-02, -7.967e-02, 4.329e-02, -1.669e-01, 1.684e-01)); + r += mul(s2_6, M4(-4.076e-02, -2.539e-02, 1.274e-01, -3.387e-02, -1.354e-01, -4.052e-02, 4.505e-01, -7.179e-02, 1.687e-01, -7.329e-02, -3.303e-02, -9.880e-02, 9.480e-02, 9.589e-02, 1.248e-01, 1.260e-01)); + r += mul(s2_7, M4(2.201e-01, 2.431e-02, 1.420e-01, -4.548e-02, -2.243e-01, 9.200e-02, -4.429e-01, -1.139e-01, -3.171e-02, 2.493e-02, 2.451e-01, 1.560e-01, -2.732e-01, -5.226e-02, -3.352e-02, 3.464e-02)); + r += mul(s2_8, M4(1.893e-01, -3.349e-02, 3.033e-01, 1.017e-02, 5.704e-02, -1.901e-02, 1.118e-01, -8.800e-02, -7.282e-02, 4.033e-02, 1.738e-01, -1.165e-01, 6.738e-02, 7.975e-02, 2.274e-01, -7.196e-04)); + r += mul(s3_0, M4(9.206e-02, -1.831e-01, -6.781e-02, 2.038e-02, -2.567e-02, 5.166e-04, -1.552e-01, -4.678e-02, 9.775e-02, 3.774e-02, -8.049e-02, -9.916e-03, -1.460e-01, -1.929e-01, 4.039e-02, -2.443e-02)); + r += mul(s3_1, M4(1.030e-01, 6.380e-02, -3.697e-03, 6.689e-02, -5.646e-03, -5.289e-02, 1.262e-01, 2.126e-01, 3.095e-02, 9.231e-02, -7.421e-02, 2.347e-02, 6.161e-02, -1.098e-01, 2.264e-03, 1.615e-01)); + r += mul(s3_2, M4(-1.076e-01, -2.051e-02, -1.032e-01, 4.689e-02, -3.618e-02, -8.719e-03, -1.070e-02, 8.687e-02, 3.851e-01, -2.398e-02, 1.685e-01, -1.628e-01, 3.233e-02, -3.770e-02, -4.527e-02, -3.666e-02)); + r += mul(s3_3, M4(3.846e-02, 1.956e-01, -9.103e-02, -5.621e-02, -2.641e-02, -8.174e-03, -2.783e-01, 2.945e-02, -1.613e-01, 1.782e-01, 1.632e-02, 6.232e-02, -1.465e-01, 5.124e-02, -1.099e-02, 6.585e-02)); + r += mul(s3_4, M4(-3.682e-02, 2.582e-01, 2.437e-02, 4.205e-02, 4.762e-02, 4.686e-03, 4.395e-01, -6.323e-02, -1.190e-02, 1.435e-01, -1.645e-01, 1.554e-02, -7.464e-02, -1.012e-02, -5.116e-02, 9.937e-02)); + r += mul(s3_5, M4(-1.994e-01, 8.514e-02, -6.310e-03, -1.008e-01, 4.595e-02, 6.921e-02, -9.169e-02, 1.508e-01, 3.123e-02, -9.426e-02, 1.300e-01, -3.395e-01, 9.113e-02, 4.549e-02, 6.148e-02, -5.932e-02)); + r += mul(s3_6, M4(9.103e-02, -8.564e-04, -9.881e-02, -4.359e-02, -3.149e-02, -2.886e-02, -2.087e-01, -1.312e-01, 6.307e-02, 2.503e-02, -2.925e-01, -1.377e-02, 6.212e-02, 1.372e-01, 2.146e-02, 9.611e-03)); + r += mul(s3_7, M4(8.192e-02, -1.447e-01, 2.783e-01, -4.569e-02, 1.534e-01, 5.361e-02, 4.747e-01, 2.498e-02, -9.529e-02, -5.268e-02, 2.216e-01, 1.535e-01, 4.301e-02, 7.891e-02, -3.746e-02, -1.234e-01)); + r += mul(s3_8, M4(-1.645e-01, -5.479e-02, -1.110e-01, 1.579e-01, -3.121e-02, 6.516e-02, -2.857e-02, -1.048e-03, -2.337e-02, -1.801e-02, 9.869e-03, 5.126e-02, -1.556e-01, 5.252e-02, -1.183e-01, 3.949e-02)); + r += mul(s4_0, M4(-6.007e-02, 8.082e-02, 2.268e-01, 2.774e-02, -1.145e-01, 9.654e-02, 6.821e-02, -8.230e-02, -6.243e-02, 7.611e-03, -1.037e-01, 1.656e-02, 9.589e-02, -2.637e-01, 4.317e-01, 4.253e-03)); + r += mul(s4_1, M4(-8.680e-02, 1.404e-02, 7.664e-02, -2.373e-01, -2.451e-02, 4.017e-02, 2.163e-02, 2.062e-01, -4.538e-03, 4.677e-04, -6.182e-02, 8.661e-02, -6.298e-01, 3.219e-01, -2.024e-01, 1.064e-01)); + r += mul(s4_2, M4(9.728e-02, 2.038e-02, 1.630e-01, 7.928e-02, 9.660e-02, -1.617e-02, 1.229e-01, -1.092e-01, 3.430e-03, -6.904e-03, -3.568e-01, 2.237e-01, 1.376e-02, -2.085e-01, 8.966e-03, -1.802e-01)); + r += mul(s4_3, M4(-2.242e-02, -1.785e-02, 5.024e-02, 1.283e-02, -1.134e-01, -4.483e-02, 4.119e-02, 1.140e-02, -4.267e-01, -9.236e-02, 4.341e-02, 1.355e-01, 2.437e-01, -2.002e-03, -1.510e-01, -3.528e-02)); + r += mul(s4_4, M4(-8.638e-02, 3.394e-02, -2.240e-01, 1.461e-01, 8.683e-02, 4.206e-02, -1.933e-01, -2.008e-01, 8.629e-02, 3.030e-01, 2.997e-01, -4.533e-01, -7.390e-01, 7.208e-01, 5.560e-01, -1.510e-01)); + r += mul(s4_5, M4(-2.310e-02, -2.152e-02, 1.186e-01, -1.363e-01, 8.357e-02, -3.742e-02, -8.575e-02, -1.443e-01, -6.026e-02, -2.186e-01, -4.317e-01, 2.039e-02, 3.948e-02, 6.303e-01, -9.625e-02, 1.544e-01)); + r += mul(s4_6, M4(1.165e-01, 6.348e-02, -1.213e-01, -1.745e-02, 1.120e-01, -2.659e-02, 1.632e-01, -3.124e-02, -1.503e-03, 4.689e-03, -2.001e-01, 7.933e-02, -3.229e-01, 8.996e-02, -4.774e-01, -1.837e-01)); + r += mul(s4_7, M4(2.074e-02, 3.243e-02, 5.314e-02, 8.075e-02, 5.473e-02, -3.461e-02, 2.726e-04, -1.971e-02, 3.940e-01, -3.165e-02, 2.749e-01, -1.532e-01, 1.806e-01, -3.744e-01, 7.934e-01, 5.761e-01)); + r += mul(s4_8, M4(-8.621e-02, -2.195e-02, -3.822e-02, -1.355e-01, 1.395e-02, 1.608e-02, -1.285e-01, -8.663e-02, 5.349e-02, -1.050e-01, 4.588e-02, -1.015e-01, -9.686e-02, -7.499e-02, -2.442e-01, 5.441e-03)); + r += mul(s5_0, M4(3.871e-02, 2.105e-02, 5.524e-02, -1.114e-01, -9.998e-03, 1.988e-01, 1.913e-01, 5.834e-02, -4.073e-02, 1.370e-02, -7.718e-03, -6.434e-02, 1.075e-01, 9.595e-02, 2.916e-02, -1.640e-02)); + r += mul(s5_1, M4(-2.685e-01, 6.975e-02, -4.860e-03, -8.839e-02, -1.550e-01, -1.042e-01, 5.083e-02, -2.413e-02, -2.945e-02, 3.974e-02, 5.426e-02, 1.020e-02, -1.489e-01, -9.121e-02, -2.901e-02, -1.348e-01)); + r += mul(s5_2, M4(1.314e-01, 1.061e-01, -3.463e-02, 1.217e-01, 5.746e-02, -1.503e-02, -6.959e-02, 2.556e-02, 1.508e-01, 2.171e-02, 3.378e-02, -1.861e-02, 1.151e-01, -5.487e-02, -1.317e-02, 7.012e-02)); + r += mul(s5_3, M4(5.046e-02, 9.001e-02, 7.535e-02, 9.960e-02, -1.300e-01, -3.197e-02, 1.296e-01, -7.338e-02, -1.165e-01, -9.652e-02, -7.622e-03, 1.548e-01, 6.067e-02, -9.001e-02, 1.764e-02, -3.334e-02)); + r += mul(s5_4, M4(-2.184e-01, -3.564e-01, -2.250e-01, 2.224e-01, 1.726e-01, -1.349e-02, -1.225e-01, 4.080e-02, -5.255e-03, 4.612e-02, -1.111e-01, -7.368e-02, -2.174e-01, 3.929e-02, 5.635e-02, -7.094e-04)); + r += mul(s5_5, M4(1.605e-01, -8.327e-02, 2.503e-01, 1.224e-01, -4.251e-02, -1.025e-01, -1.459e-01, -5.656e-03, 1.767e-02, 4.757e-02, -3.971e-03, 4.450e-03, -8.019e-03, 1.259e-02, 3.239e-02, 1.144e-01)); + r += mul(s5_6, M4(9.060e-02, -3.711e-02, -2.303e-01, 1.852e-02, 1.322e-01, 1.307e-01, -6.601e-02, 2.163e-02, 6.238e-02, -1.110e-01, 2.882e-02, -5.058e-02, 9.467e-02, 4.748e-02, -7.884e-03, -1.830e-02)); + r += mul(s5_7, M4(1.264e-01, -5.228e-03, -1.240e-01, 3.818e-02, 5.384e-02, 4.479e-02, 3.874e-03, -1.177e-02, 4.849e-02, -2.571e-02, -5.829e-02, 1.062e-01, -1.312e-01, -3.904e-02, 1.222e-01, 3.495e-02)); + r += mul(s5_8, M4(2.607e-01, -9.389e-02, -3.262e-01, -1.223e-01, 1.637e-01, 1.665e-02, -1.061e-01, 4.082e-02, -6.843e-03, 5.625e-02, -6.985e-03, -3.316e-02, -1.778e-02, -5.197e-03, -1.276e-01, -4.753e-02)); + r += mul(s6_0, M4(1.556e-02, 4.461e-02, 1.887e-01, -5.260e-02, -1.009e-01, -4.502e-02, 1.875e-01, 5.034e-02, -6.320e-02, 4.153e-02, -7.159e-02, -6.840e-02, 3.229e-01, -1.182e-01, -8.201e-02, -1.455e-01)); + r += mul(s6_1, M4(-2.783e-01, -9.929e-02, -1.127e-01, -5.826e-02, -5.076e-02, 6.958e-03, -1.807e-01, -1.226e-01, 6.811e-02, 3.467e-01, -1.360e-01, -1.994e-01, 1.270e-01, 6.603e-02, -1.298e-01, -3.304e-02)); + r += mul(s6_2, M4(-2.217e-01, 5.182e-02, -6.665e-03, -3.633e-02, -4.371e-02, -4.715e-02, 3.733e-02, 1.725e-02, 6.393e-02, -1.111e-01, 3.107e-01, -2.297e-01, 7.185e-02, -6.411e-02, -5.181e-02, -8.180e-02)); + r += mul(s6_3, M4(-1.247e-01, 1.267e-01, -9.271e-03, 3.259e-02, -1.425e-01, 1.140e-01, 3.898e-02, 7.075e-02, 4.735e-02, -2.949e-02, 8.820e-02, 1.113e-01, -1.436e-01, 8.337e-02, 1.733e-02, 1.245e-01)); + r += mul(s6_4, M4(-2.865e-01, 1.341e-01, -2.989e-01, 2.276e-02, 8.616e-02, -1.108e-01, -2.146e-01, -8.174e-02, -3.244e-01, 8.870e-01, -1.164e-01, 1.193e-01, 1.702e-02, 5.640e-03, 2.127e-02, -2.143e-01)); + r += mul(s6_5, M4(-2.566e-01, -1.100e-01, -2.349e-01, 5.444e-02, 1.520e-01, 6.735e-02, -3.749e-02, 1.863e-01, -3.958e-01, 1.222e-01, -1.456e-01, -3.227e-01, -1.480e-01, -4.675e-02, 3.483e-03, 1.997e-01)); + r += mul(s6_6, M4(-3.229e-01, -1.253e-01, -6.043e-02, -1.934e-02, -3.176e-02, -3.484e-02, -3.086e-02, -6.326e-02, -3.011e-02, 1.207e-01, -2.762e-02, 2.276e-02, 1.165e-01, 7.742e-02, -1.514e-01, -1.235e-01)); + r += mul(s6_7, M4(-3.719e-01, -1.963e-01, 6.235e-02, -1.683e-01, -3.227e-02, -6.104e-03, -9.616e-03, -8.380e-03, -3.989e-02, 8.398e-03, 2.611e-01, 1.075e-01, 8.220e-03, 6.441e-02, -1.289e-01, -5.594e-02)); + r += mul(s6_8, M4(-1.177e-01, -1.073e-01, 3.336e-02, 1.620e-02, 8.025e-02, 4.244e-02, 8.190e-02, 4.761e-02, -1.916e-01, 1.267e-03, -1.027e-01, 1.053e-01, -3.592e-02, 7.911e-02, -3.845e-02, 4.658e-02)); + r += mul(s7_0, M4(1.107e-01, 1.350e-02, 3.278e-02, 8.089e-02, -1.457e-02, -1.257e-01, 7.460e-02, -1.540e-01, -1.525e-01, -5.342e-02, -9.262e-02, -3.793e-02, -1.195e-01, 3.176e-01, 6.309e-02, -6.491e-02)); + r += mul(s7_1, M4(7.589e-02, -1.908e-02, -1.394e-01, 4.513e-02, 1.998e-01, -3.807e-02, -2.032e-01, 9.955e-02, 2.990e-01, -1.441e-01, -2.311e-01, -3.531e-02, -1.162e-01, -7.590e-02, 2.470e-01, -2.878e-01)); + r += mul(s7_2, M4(2.050e-02, 1.155e-01, 1.018e-01, -1.042e-01, 7.442e-03, -7.654e-03, 1.812e-01, -6.073e-02, 4.555e-02, -4.195e-02, -1.939e-01, -8.081e-02, -2.205e-02, -2.936e-02, 2.581e-03, 7.297e-02)); + r += mul(s7_3, M4(1.223e-01, 6.592e-02, 5.053e-02, -4.009e-03, -1.685e-01, 1.338e-01, -5.836e-03, -2.778e-02, -8.277e-02, -1.650e-01, 1.238e-01, 4.748e-02, 8.084e-02, 6.089e-01, 8.423e-02, -6.395e-02)); + r += mul(s7_4, M4(1.704e-01, 1.564e-01, -1.928e-01, -6.911e-03, -1.700e-01, -2.909e-01, 4.752e-01, 1.944e-01, 1.536e-01, -1.364e-01, -1.005e-01, 1.071e-01, -1.150e-01, -1.112e-01, -3.265e-02, -4.512e-01)); + r += mul(s7_5, M4(3.312e-02, 7.618e-02, 3.051e-02, -2.499e-01, -4.351e-02, 1.448e-01, -5.512e-02, 6.151e-02, 3.365e-01, -3.036e-02, 6.411e-02, -2.202e-01, 7.528e-02, -4.650e-02, 1.802e-02, 1.700e-01)); + r += mul(s7_6, M4(8.020e-02, 1.847e-02, 5.864e-02, -1.005e-01, -2.217e-02, -7.133e-03, -3.747e-01, 5.838e-02, -1.770e-01, 2.267e-02, 4.268e-02, 2.468e-02, -1.118e-01, 1.149e-02, -6.731e-02, -7.425e-02)); + r += mul(s7_7, M4(-2.701e-02, -7.810e-02, 7.503e-02, 2.226e-02, 5.091e-01, 1.019e-01, 4.667e-01, -2.403e-02, 1.082e-01, -2.768e-02, 2.378e-01, -4.118e-02, -7.649e-02, -1.219e-01, -2.735e-01, -5.228e-02)); + r += mul(s7_8, M4(4.283e-02, 4.777e-02, 7.159e-02, -6.319e-03, -7.433e-02, -1.139e-01, -2.676e-01, 5.704e-03, 2.529e-02, 2.313e-02, 1.420e-01, -1.098e-01, 1.671e-02, 6.478e-02, 5.599e-03, 3.933e-02)); + r += V4(1.033e-02, -7.655e-03, -2.000e-03, -1.543e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.758e-03, 7.617e-02, 6.912e-02, 2.377e-01, 7.070e-02, -5.379e-02, -7.769e-02, 2.329e-02, -1.681e-01, 5.942e-02, 5.860e-02, 6.472e-02, 8.355e-02, -5.434e-02, 5.401e-03, -3.641e-02)); + r += mul(s0_1, M4(-1.019e-01, 5.684e-01, 4.270e-02, 5.252e-02, 4.030e-02, 7.683e-02, 1.746e-02, -1.488e-01, -1.782e-01, -8.970e-02, 1.105e-01, -4.931e-02, -4.062e-02, 8.742e-02, 1.987e-02, -5.478e-02)); + r += mul(s0_2, M4(3.552e-02, 4.850e-01, -1.131e-02, 1.896e-01, 8.424e-02, -4.286e-02, -2.712e-03, 2.802e-02, 2.943e-02, 8.074e-02, 4.319e-03, -1.202e-01, -1.101e-01, -2.639e-02, -2.619e-02, 7.176e-02)); + r += mul(s0_3, M4(-1.692e-01, -1.949e-01, -3.303e-02, -1.001e-02, -3.108e-02, -5.779e-02, 6.328e-02, -1.635e-01, 3.118e-01, -8.366e-02, -2.326e-01, -3.063e-01, -1.434e-02, 6.857e-02, -5.738e-02, -5.841e-03)); + r += mul(s0_4, M4(-4.082e-01, -1.538e-01, -4.761e-02, -8.800e-02, 4.440e-02, -1.050e-01, 6.496e-02, 1.187e-01, 1.208e-01, 1.621e-01, 2.869e-01, -4.994e-02, 2.745e-02, 1.254e-01, -7.371e-02, 8.563e-02)); + r += mul(s0_5, M4(-3.001e-01, 2.473e-01, 2.051e-01, -1.431e-01, -3.970e-02, -8.972e-02, 2.985e-02, 3.882e-02, 5.168e-02, 2.137e-02, 1.726e-02, -3.367e-03, -2.589e-02, 5.863e-03, -4.580e-02, 8.751e-02)); + r += mul(s0_6, M4(3.715e-02, -1.473e-01, -5.820e-02, -1.097e-03, 5.256e-02, 1.142e-01, -9.848e-02, 7.447e-02, -6.206e-03, 3.195e-02, 7.233e-02, -1.535e-01, -1.121e-01, 1.395e-01, -2.690e-02, -8.704e-02)); + r += mul(s0_7, M4(-3.731e-01, -6.268e-01, -1.841e-01, 6.942e-02, -3.763e-02, 6.455e-02, 5.035e-02, -1.305e-01, 1.451e-01, 1.066e-01, 3.048e-01, 1.745e-02, 1.996e-02, -2.749e-02, 1.726e-01, -2.763e-01)); + r += mul(s0_8, M4(2.241e-01, -1.718e-01, 6.101e-03, 2.807e-01, 2.449e-03, 4.505e-02, -8.207e-02, -2.656e-02, 5.609e-03, 2.359e-01, -1.738e-02, 1.096e-01, 5.393e-02, -1.916e-02, -8.992e-02, -1.024e-01)); + r += mul(s1_0, M4(6.518e-02, 7.482e-02, 1.297e-02, -4.784e-02, -1.974e-01, -5.278e-02, 6.695e-02, -1.773e-01, -1.970e-01, -9.692e-02, 5.651e-02, -6.470e-02, -6.872e-02, -5.913e-02, -3.371e-02, -2.319e-02)); + r += mul(s1_1, M4(-6.429e-02, 1.404e-02, 7.580e-02, -1.190e-01, -1.332e-01, -9.807e-02, 5.872e-02, 6.732e-02, -2.426e-01, -2.158e-01, 1.174e-01, -2.639e-02, 2.155e-01, -2.171e-01, 4.090e-02, -4.613e-02)); + r += mul(s1_2, M4(5.646e-02, 6.236e-02, 4.568e-03, -3.600e-02, 6.474e-02, -6.144e-02, 2.782e-02, 1.139e-01, 7.254e-03, -7.956e-02, -2.111e-03, -7.433e-02, -5.684e-02, 1.990e-02, 4.432e-02, -1.449e-02)); + r += mul(s1_3, M4(-2.484e-01, -9.496e-02, -5.244e-03, 4.678e-03, 3.685e-01, -1.557e-01, 1.238e-01, 5.032e-01, 3.138e-02, -1.324e-01, -9.592e-02, 4.115e-02, 2.581e-01, -2.674e-02, -3.571e-03, -1.843e-01)); + r += mul(s1_4, M4(4.006e-02, 8.431e-02, 4.414e-02, 4.780e-02, -6.018e-02, -3.496e-01, 6.532e-02, 1.016e-01, -1.647e-01, 1.719e-01, 1.165e-01, 1.253e-01, 1.866e-01, -4.797e-02, 4.293e-02, -3.443e-01)); + r += mul(s1_5, M4(-9.642e-02, -3.151e-02, 5.090e-02, 4.500e-02, -1.058e-01, -3.040e-01, 7.838e-02, 1.731e-01, -4.494e-02, -3.210e-02, 4.687e-02, -7.305e-02, -1.455e-01, -2.310e-02, 1.105e-01, 6.461e-02)); + r += mul(s1_6, M4(6.516e-02, 2.049e-01, -7.350e-03, 1.887e-01, -2.249e-01, 4.453e-02, -1.392e-01, 3.856e-02, 3.693e-02, -1.981e-02, 2.009e-01, 2.242e-02, -6.250e-03, 1.109e-01, -1.288e-01, 4.991e-02)); + r += mul(s1_7, M4(1.644e-01, -1.369e-01, -1.025e-01, 8.022e-03, -4.350e-01, 5.228e-02, 5.725e-02, 4.300e-02, -3.344e-01, -4.307e-02, 4.145e-02, -2.767e-01, -1.284e-01, 3.278e-01, -7.789e-01, 9.142e-01)); + r += mul(s1_8, M4(-2.715e-02, -2.013e-02, -6.933e-02, 4.864e-02, 6.995e-02, 2.184e-01, 3.157e-02, 1.156e-01, 6.813e-02, 5.536e-02, 3.155e-02, 1.914e-02, 1.898e-02, -1.525e-01, -1.083e-01, 8.293e-02)); + r += mul(s2_0, M4(3.057e-01, 3.366e-02, -7.672e-02, -1.528e-01, -8.638e-02, 3.335e-02, 1.581e-02, 1.377e-02, 7.964e-02, -6.812e-02, 5.726e-03, 6.136e-02, -2.493e-01, 1.237e-01, 4.327e-02, 4.104e-02)); + r += mul(s2_1, M4(-4.913e-03, -6.517e-02, -7.917e-02, -1.400e-01, 6.403e-02, -4.813e-02, 7.681e-03, 6.456e-02, 1.468e-01, -1.053e-03, 3.867e-03, -1.601e-01, -8.750e-02, 2.363e-01, -4.503e-02, 1.427e-01)); + r += mul(s2_2, M4(1.049e-02, -2.269e-01, -5.767e-02, 1.123e-01, 4.629e-02, -7.771e-03, 1.951e-02, -1.070e-01, 3.297e-03, 1.909e-02, -6.388e-02, -1.872e-02, 1.106e-01, -1.076e-01, -4.868e-02, -4.004e-02)); + r += mul(s2_3, M4(9.685e-02, -1.747e-01, 5.253e-02, 2.288e-02, 2.158e-01, 4.820e-02, 1.199e-01, -1.479e-01, 1.971e-01, 5.220e-02, -1.107e-01, 1.755e-02, 2.448e-01, 2.043e-01, -5.064e-02, -1.467e-01)); + r += mul(s2_4, M4(1.483e-01, -9.338e-02, 3.053e-01, 3.674e-02, -4.789e-02, -6.824e-02, 2.009e-01, -2.110e-01, 9.896e-02, 3.739e-01, -3.624e-02, -6.419e-02, -1.496e-01, 2.263e-01, 2.480e-01, -3.066e-01)); + r += mul(s2_5, M4(4.285e-02, -1.865e-01, 1.919e-01, 1.222e-01, 2.484e-02, -1.035e-01, 5.325e-02, -1.774e-02, 8.416e-02, 8.750e-03, 1.072e-01, 1.335e-01, 1.340e-01, 1.472e-01, -2.104e-02, -1.035e-01)); + r += mul(s2_6, M4(5.401e-02, -1.072e-01, -2.533e-02, 9.819e-02, -1.101e-01, -6.547e-02, -2.413e-01, 1.610e-01, 1.089e-01, -4.599e-02, -9.606e-02, -2.390e-01, 3.399e-02, -1.979e-01, 5.705e-02, -5.747e-02)); + r += mul(s2_7, M4(-8.321e-02, 7.437e-02, -3.539e-01, -1.672e-02, -1.665e-02, 2.068e-01, -7.621e-01, 6.597e-01, -2.447e-01, 2.339e-02, 9.553e-02, 2.941e-02, -2.685e-01, 1.937e-01, -2.887e-01, 2.725e-01)); + r += mul(s2_8, M4(2.926e-02, -1.894e-01, 6.963e-03, 6.560e-02, -1.028e-01, -1.773e-01, -9.563e-02, 8.999e-02, -1.051e-01, -1.072e-01, -5.523e-02, 9.691e-02, -6.205e-02, 7.601e-03, 5.785e-03, 5.219e-02)); + r += mul(s3_0, M4(1.045e-01, 6.734e-02, 4.300e-02, 1.489e-01, 6.522e-02, -6.717e-02, -6.585e-03, -2.107e-02, 4.468e-02, -1.545e-01, 4.438e-02, 8.534e-02, 1.208e-01, 1.860e-02, 5.553e-02, -2.556e-02)); + r += mul(s3_1, M4(-7.592e-02, -5.138e-02, -7.580e-02, 1.265e-01, -1.954e-01, 8.033e-02, 2.697e-02, 7.622e-03, -1.312e-01, 8.863e-02, -2.099e-02, 5.640e-02, 2.769e-02, -5.627e-03, -1.252e-02, -1.444e-01)); + r += mul(s3_2, M4(-2.088e-02, 6.035e-02, 1.498e-02, 2.430e-02, -1.182e-02, -1.124e-01, -3.626e-02, 4.757e-02, 1.656e-01, -1.633e-01, -9.650e-02, 9.102e-02, 9.852e-04, -4.813e-02, 2.747e-02, -1.393e-01)); + r += mul(s3_3, M4(3.472e-01, 3.092e-02, -6.392e-02, 6.680e-02, 1.125e-01, 1.447e-03, -7.247e-02, -6.417e-02, -1.433e-02, 1.248e-01, 2.670e-02, 8.518e-02, -5.483e-02, 6.709e-03, -4.418e-02, 1.285e-01)); + r += mul(s3_4, M4(1.294e-01, 9.550e-02, 1.910e-01, -2.522e-01, -5.592e-03, -1.394e-01, 6.054e-02, 8.921e-02, 4.476e-01, 1.155e-01, -1.468e-01, 2.610e-01, -1.090e-01, -1.395e-02, 3.005e-02, 3.408e-03)); + r += mul(s3_5, M4(-1.316e-01, 1.761e-02, -3.773e-02, -7.906e-02, -5.174e-02, -6.989e-02, 1.748e-02, -2.507e-03, -2.583e-02, -3.823e-02, 1.149e-01, -7.926e-02, -2.495e-02, -3.870e-02, -4.058e-02, 1.254e-01)); + r += mul(s3_6, M4(7.346e-02, 4.658e-02, -1.027e-01, -9.269e-02, -1.354e-02, -3.943e-03, -2.036e-01, -1.463e-01, 5.179e-02, 1.687e-01, -5.164e-02, -2.656e-02, -5.462e-02, -5.923e-02, 1.995e-02, -6.361e-02)); + r += mul(s3_7, M4(1.919e-02, -3.371e-03, 2.938e-01, -1.992e-01, -2.979e-03, -1.093e-01, 1.858e-01, -4.176e-01, -8.299e-02, -6.490e-02, 3.646e-01, -2.355e-01, 1.422e-02, 5.154e-02, -3.259e-02, 1.654e-01)); + r += mul(s3_8, M4(-3.358e-02, 2.703e-03, -1.307e-01, -4.966e-02, -8.679e-02, 1.698e-01, -1.447e-01, 4.559e-02, -1.525e-01, 8.874e-02, -6.838e-02, -1.685e-01, 2.004e-02, -4.271e-02, -1.110e-02, 1.418e-02)); + r += mul(s4_0, M4(1.153e-01, 3.278e-02, -1.984e-02, 9.321e-03, -1.442e-01, 1.189e-01, -1.124e-02, 5.252e-02, 3.034e-01, -9.655e-02, -6.094e-02, 2.892e-01, -3.730e-03, 8.028e-01, -6.173e-02, -1.532e-01)); + r += mul(s4_1, M4(1.025e-01, -8.287e-02, 3.901e-02, -1.558e-01, -2.023e-01, -1.739e-01, 4.269e-02, 2.685e-02, -6.982e-02, -1.987e-01, -6.712e-02, 4.156e-02, 3.297e-01, 6.498e-01, 2.479e-01, -5.953e-01)); + r += mul(s4_2, M4(-1.068e-01, -2.891e-02, 8.870e-02, -1.661e-01, 1.573e-01, 5.167e-02, -7.041e-03, -8.596e-02, -2.175e-02, 2.233e-01, 1.118e-02, -7.196e-02, 4.037e-01, 4.387e-01, -1.536e-01, -4.341e-02)); + r += mul(s4_3, M4(-1.184e-01, -4.621e-02, 5.939e-02, -8.380e-02, 1.851e-01, -2.091e-01, -8.024e-04, 9.550e-02, -1.187e-01, -2.302e-01, 1.589e-01, 8.751e-02, -7.050e-01, -7.811e-01, 4.696e-01, 1.788e-01)); + r += mul(s4_4, M4(-2.695e-02, 4.550e-02, -1.792e-01, 1.519e-01, -1.141e-01, 2.095e-01, -8.601e-02, -9.451e-02, -4.056e-01, -3.000e-01, 2.386e-02, 1.290e-01, 2.230e-01, -3.472e-02, -9.723e-01, 2.486e-01)); + r += mul(s4_5, M4(2.915e-03, -7.673e-02, -1.262e-02, 1.916e-02, 1.816e-01, -3.453e-02, -2.695e-04, -1.356e-01, 3.496e-02, 5.267e-02, -8.667e-02, -3.235e-01, 3.861e-01, 5.660e-01, -1.828e-01, -1.704e-01)); + r += mul(s4_6, M4(1.632e-01, 2.510e-01, -9.598e-02, 3.087e-02, -5.671e-03, -1.048e-01, -4.369e-03, 1.901e-02, 2.465e-01, 8.645e-03, 1.715e-01, 2.438e-01, 5.865e-01, 4.047e-01, 5.474e-01, 6.404e-01)); + r += mul(s4_7, M4(-1.204e-01, 4.781e-02, 3.613e-02, -4.889e-02, -5.033e-03, -2.781e-01, -2.346e-02, 2.110e-02, 9.348e-02, -7.106e-02, -1.281e-01, 1.660e-01, 3.296e-01, -9.292e-01, 1.562e+00, -1.151e+00)); + r += mul(s4_8, M4(5.152e-03, 1.871e-02, -6.217e-03, 1.707e-01, 1.013e-01, 1.548e-01, 4.119e-02, -4.431e-02, -1.716e-02, 8.323e-02, -2.118e-02, -2.531e-01, 2.598e-01, 5.480e-02, -5.895e-01, -6.074e-01)); + r += mul(s5_0, M4(-1.151e-01, -8.785e-02, -7.595e-02, 2.556e-02, -1.627e-01, 9.401e-02, -6.269e-02, 5.041e-02, 3.075e-02, -1.058e-01, 2.416e-03, -8.175e-02, -2.533e-02, 1.259e-01, -1.033e-02, -1.544e-02)); + r += mul(s5_1, M4(2.719e-01, -5.704e-02, -4.706e-02, 1.435e-02, 1.781e-02, -8.804e-02, 6.679e-02, 3.407e-02, -8.725e-02, -1.454e-01, -3.964e-02, 1.686e-01, 3.078e-02, 1.527e-01, -2.336e-02, 8.638e-02)); + r += mul(s5_2, M4(-9.739e-02, -1.581e-01, 1.193e-01, -1.140e-01, 8.583e-02, 1.014e-01, -4.930e-02, -3.523e-02, -3.552e-02, 2.957e-02, 9.932e-02, -1.241e-01, 5.827e-04, 4.510e-03, 8.899e-02, 7.640e-03)); + r += mul(s5_3, M4(-7.002e-02, 1.106e-01, 7.769e-02, -3.625e-01, -2.617e-02, 6.854e-04, 1.240e-01, -2.399e-01, -1.245e-02, 1.096e-01, 7.265e-02, -5.656e-02, -1.526e-02, -4.186e-03, 4.619e-02, -1.449e-01)); + r += mul(s5_4, M4(2.417e-01, 1.537e-01, 4.630e-03, 5.479e-02, -1.961e-02, -5.185e-02, -1.879e-01, 1.597e-01, -5.192e-03, -1.293e-02, -6.857e-03, 4.178e-02, -1.144e-01, -1.014e-01, -1.105e-01, 2.070e-01)); + r += mul(s5_5, M4(1.026e-01, -2.085e-01, 9.310e-04, 6.208e-02, 5.632e-02, -3.012e-01, -1.693e-02, -1.228e-01, 3.079e-02, 6.654e-03, 2.042e-02, 3.115e-03, 2.767e-02, -3.941e-02, -1.027e-01, -1.193e-01)); + r += mul(s5_6, M4(-5.361e-02, 1.839e-01, -1.381e-02, -4.992e-02, 1.197e-01, -2.658e-02, 4.414e-02, 3.060e-01, 2.078e-01, -5.305e-02, -7.618e-02, -1.528e-01, 8.231e-02, 1.479e-02, -1.472e-02, -1.767e-02)); + r += mul(s5_7, M4(-1.962e-03, 1.135e-01, -8.852e-02, -1.797e-02, 2.641e-01, -1.138e-01, -1.912e-02, 2.951e-01, 6.433e-03, 7.263e-03, -6.725e-02, 1.238e-01, -1.916e-01, -1.174e-01, 3.328e-02, 1.476e-01)); + r += mul(s5_8, M4(-8.259e-02, 1.196e-01, 2.297e-02, 4.276e-02, 3.669e-02, -2.559e-02, 6.839e-02, 1.683e-01, -5.570e-02, 1.671e-01, -1.644e-02, -9.741e-02, 1.025e-01, -8.816e-02, 1.826e-02, -2.364e-02)); + r += mul(s6_0, M4(-2.535e-01, 1.111e-02, 1.390e-02, -1.202e-01, -5.318e-02, -4.885e-02, -4.511e-02, -4.861e-03, -1.718e-01, -5.259e-02, 2.545e-02, 2.374e-02, -1.528e-01, -6.412e-03, -1.480e-02, 1.350e-01)); + r += mul(s6_1, M4(-2.912e-01, -1.131e-02, 3.652e-02, -1.608e-02, 4.979e-03, -5.479e-02, 2.703e-02, -4.558e-02, -4.872e-02, 3.846e-02, 8.584e-02, 9.559e-02, 1.828e-01, -9.439e-04, -5.438e-02, 1.303e-01)); + r += mul(s6_2, M4(-5.778e-02, 5.187e-02, -2.630e-02, -7.704e-02, -3.566e-02, -5.934e-02, 1.820e-02, 5.576e-02, -2.745e-02, 3.232e-02, 8.394e-02, 3.470e-02, 5.279e-02, 4.295e-02, 5.794e-02, -4.407e-03)); + r += mul(s6_3, M4(-2.326e-02, -1.651e-02, 9.584e-02, 8.951e-02, -4.249e-02, 4.926e-02, 2.225e-02, 8.911e-02, -2.212e-01, 6.553e-03, 5.084e-02, 2.918e-02, 1.774e-02, -1.435e-01, 4.841e-02, 4.622e-02)); + r += mul(s6_4, M4(2.492e-01, 2.205e-02, 1.578e-02, -1.140e-01, -3.456e-02, 1.953e-01, 2.578e-02, 1.255e-01, 1.047e-01, 3.143e-02, -3.481e-01, 1.965e-01, -1.062e-01, -1.112e-01, 5.874e-02, 6.422e-02)); + r += mul(s6_5, M4(-1.986e-01, 1.203e-01, -6.987e-02, -9.607e-02, -1.267e-02, 1.618e-01, -5.594e-02, 2.820e-02, -1.304e-03, 2.070e-01, -1.037e-01, 2.567e-01, -9.442e-02, 9.773e-02, -6.431e-02, 4.221e-02)); + r += mul(s6_6, M4(6.672e-02, 5.162e-02, -4.157e-02, 4.170e-02, 2.030e-02, -5.778e-02, -8.677e-03, -7.128e-02, -5.729e-02, 2.775e-03, 1.171e-01, 9.867e-02, 3.169e-01, 2.530e-01, -2.410e-03, -3.967e-03)); + r += mul(s6_7, M4(2.745e-02, -9.191e-02, -4.763e-02, -7.475e-02, 2.111e-01, -1.829e-01, -5.249e-02, -6.307e-02, -1.656e-01, -4.147e-01, 2.770e-02, -1.667e-01, 1.274e-01, -2.991e-01, -9.795e-02, -2.018e-01)); + r += mul(s6_8, M4(6.462e-02, 8.191e-02, 1.501e-02, -3.084e-01, 3.494e-02, -1.754e-02, 2.011e-02, -1.587e-01, -1.362e-01, 1.886e-01, -1.740e-01, -1.429e-01, -7.398e-02, -7.677e-02, -1.702e-02, -5.762e-02)); + r += mul(s7_0, M4(1.334e-01, 1.942e-01, -1.171e-02, 8.299e-02, -8.526e-02, -2.180e-01, 5.514e-02, -1.436e-02, 5.323e-02, 3.455e-02, 2.642e-02, 1.681e-02, 9.616e-03, 1.213e-02, 7.231e-02, -1.113e-02)); + r += mul(s7_1, M4(-1.613e-01, 5.467e-02, 3.423e-02, 1.011e-02, 1.974e-01, 1.498e-01, 1.780e-02, -6.198e-02, 2.254e-01, -6.978e-04, -2.064e-02, 6.975e-03, -7.109e-02, -6.513e-02, -3.146e-02, -8.605e-02)); + r += mul(s7_2, M4(1.097e-01, 1.303e-01, -2.511e-02, 1.708e-02, -2.600e-02, 3.643e-02, 4.636e-02, -9.953e-04, 1.382e-01, 3.000e-01, -7.425e-02, 3.608e-02, -2.558e-02, 1.338e-01, 3.733e-02, 7.616e-02)); + r += mul(s7_3, M4(1.074e-01, 6.512e-02, -4.465e-02, 6.519e-02, -1.936e-02, -1.653e-01, 1.620e-03, -8.754e-04, -1.027e-01, -1.507e-01, -2.866e-02, 1.681e-01, 2.847e-01, 2.362e-01, -2.515e-01, -3.844e-01)); + r += mul(s7_4, M4(5.933e-02, -9.733e-02, -5.048e-02, 1.283e-01, -1.207e-01, 4.300e-01, -9.009e-02, 2.569e-01, 3.240e-01, -1.023e-01, -2.388e-01, -2.404e-01, 3.049e-02, 1.859e-01, -1.149e-02, 8.890e-02)); + r += mul(s7_5, M4(-4.837e-02, -4.022e-02, -9.985e-02, 2.189e-02, -1.578e-01, -4.311e-02, -2.034e-02, -1.535e-01, 1.856e-01, 8.270e-02, -7.737e-02, -1.922e-02, -4.536e-02, 4.765e-02, -1.193e-03, 7.205e-02)); + r += mul(s7_6, M4(1.942e-01, -1.707e-01, -4.945e-02, 7.447e-02, -4.521e-02, -4.525e-02, 5.924e-02, 1.494e-02, -1.148e-01, -2.395e-02, 9.159e-02, -1.781e-01, -8.017e-02, -1.350e-01, 1.942e-02, -1.520e-01)); + r += mul(s7_7, M4(1.286e-01, -5.202e-02, -8.071e-02, 2.529e-02, -2.521e-01, 4.532e-01, -2.095e-01, 2.114e-01, 7.191e-02, -4.856e-02, 3.030e-01, 2.091e-02, -8.676e-02, 7.358e-02, -1.278e-01, 2.998e-02)); + r += mul(s7_8, M4(9.250e-02, 2.525e-01, 2.439e-02, -7.490e-02, 8.973e-03, 2.871e-01, -1.946e-01, -3.327e-02, 2.057e-01, 3.037e-02, 3.450e-02, -1.399e-01, 3.158e-02, -4.555e-02, 4.175e-02, 1.327e-03)); + r += V4(2.171e-03, 1.231e-02, -3.483e-03, 2.026e-03); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.148e-01, -8.185e-02, -1.411e-02, 5.234e-02, 1.760e-01, -2.862e-02, 2.125e-03, -9.211e-03, 1.019e-01, 3.912e-02, 3.709e-02, 1.898e-02, 9.759e-02, 1.289e-01, 1.633e-02, -1.400e-01)); + r += mul(s0_1, M4(-1.021e-01, -2.273e-02, 9.546e-02, 2.171e-02, -5.309e-02, -7.581e-03, -9.869e-03, 1.025e-01, -5.688e-02, -9.736e-03, -3.693e-02, -9.752e-02, 3.177e-01, 1.791e-03, -4.513e-03, -3.494e-02)); + r += mul(s0_2, M4(1.081e-02, -3.946e-02, -8.423e-02, -5.189e-03, 2.352e-02, 9.578e-02, -2.665e-02, 7.931e-02, 1.164e-01, -7.756e-02, -7.551e-02, 2.317e-02, -1.850e-02, 2.503e-02, 1.020e-02, 3.127e-02)); + r += mul(s0_3, M4(-1.711e-01, -5.064e-02, 5.811e-02, 9.636e-02, -9.589e-02, -1.298e-02, -4.247e-02, -2.036e-02, -1.126e-01, -6.640e-02, -6.445e-03, 1.700e-01, 7.660e-02, -6.665e-03, -4.980e-02, -2.623e-02)); + r += mul(s0_4, M4(1.482e-01, -1.146e-01, -1.488e-01, -1.059e-01, -1.714e-01, 8.280e-02, 4.026e-02, 1.548e-01, -5.075e-02, 1.049e-01, 3.046e-02, 6.307e-02, -2.167e-02, 5.749e-02, -2.166e-01, 2.334e-01)); + r += mul(s0_5, M4(-6.098e-02, 3.640e-02, -1.929e-02, -4.996e-02, -1.136e-02, 1.956e-01, 1.212e-02, 1.828e-01, 1.526e-02, 5.544e-02, 2.381e-02, -3.802e-02, -1.418e-01, 4.179e-02, -1.739e-01, 1.139e-01)); + r += mul(s0_6, M4(-7.260e-02, 7.911e-02, 5.132e-03, 2.420e-02, 1.140e-03, -1.315e-01, -3.142e-02, -4.042e-02, 4.358e-02, 6.544e-02, 6.622e-03, 3.360e-02, 1.223e-01, 5.543e-02, 3.080e-02, 1.314e-01)); + r += mul(s0_7, M4(-8.027e-03, 4.926e-02, -3.521e-03, -2.338e-02, -1.126e-01, 5.741e-03, 5.360e-02, 1.018e-01, 1.403e-01, 7.060e-02, -5.804e-02, 1.384e-01, -1.067e-01, -1.198e-01, -3.449e-02, 1.961e-01)); + r += mul(s0_8, M4(-3.738e-02, -1.306e-01, 1.107e-02, -7.607e-02, 1.243e-01, -1.010e-01, 1.212e-02, 2.539e-01, -8.092e-02, -7.852e-03, 1.825e-02, -2.749e-04, 9.700e-02, 7.530e-02, -4.116e-02, -5.427e-02)); + r += mul(s1_0, M4(1.050e-01, -1.468e-02, -7.091e-02, -8.371e-02, 1.020e-02, 9.543e-02, -2.195e-02, -1.574e-04, 2.162e-01, -3.034e-01, -3.787e-02, 2.131e-01, -1.148e-02, 1.077e-02, 9.198e-02, 1.001e-01)); + r += mul(s1_1, M4(-3.883e-02, -4.188e-02, 6.138e-02, -8.807e-02, -1.576e-01, 1.075e-01, 5.166e-02, -2.216e-02, 1.049e-01, -1.876e-02, -9.194e-02, -4.131e-02, -8.374e-02, -5.167e-02, -1.251e-01, 1.607e-01)); + r += mul(s1_2, M4(-4.730e-02, 1.121e-01, 7.568e-02, 5.447e-02, 8.668e-02, 5.584e-02, -9.957e-03, -3.589e-03, 2.666e-01, 6.166e-02, 2.291e-02, 1.741e-01, 5.895e-02, 4.918e-02, -8.752e-02, 1.369e-01)); + r += mul(s1_3, M4(-1.635e-01, -4.629e-02, 5.164e-02, 8.537e-03, -8.651e-02, -1.172e-01, 2.755e-04, 1.021e-01, -1.107e-01, 1.182e-01, 2.746e-02, 2.390e-01, 1.820e-02, 8.032e-02, -7.235e-02, 7.531e-02)); + r += mul(s1_4, M4(1.067e-01, -4.792e-03, -9.543e-02, 5.090e-03, 2.230e-02, -3.161e-01, -5.231e-02, 2.124e-01, 2.475e-01, 1.983e-01, 7.380e-03, 1.579e-01, -1.399e-02, -1.190e-01, -1.073e-01, -9.188e-02)); + r += mul(s1_5, M4(7.826e-02, 1.374e-01, -9.163e-02, -3.429e-02, 2.204e-01, -3.229e-02, 5.139e-02, -9.958e-02, 7.382e-02, 2.384e-01, -2.388e-01, 3.980e-02, -8.830e-02, -4.230e-02, -1.036e-01, 5.042e-02)); + r += mul(s1_6, M4(2.173e-03, -6.181e-02, 3.234e-02, 1.587e-01, 7.717e-02, -2.153e-02, -2.742e-02, -9.150e-02, 7.825e-02, -2.646e-01, 1.481e-02, 2.971e-01, 2.110e-02, 1.253e-01, -7.860e-03, -8.129e-02)); + r += mul(s1_7, M4(7.583e-02, 1.063e-01, -2.099e-02, -4.610e-04, 1.094e-01, -5.212e-02, -2.047e-02, -6.110e-02, -1.883e-01, -7.224e-02, -4.196e-02, 9.632e-02, -1.147e-01, 6.630e-02, 3.887e-03, 1.138e-02)); + r += mul(s1_8, M4(3.765e-02, 1.596e-01, 5.584e-02, 1.169e-01, 5.791e-02, -4.240e-02, 1.590e-02, 2.315e-02, -3.759e-02, 1.557e-01, -2.875e-02, 8.878e-02, 4.666e-02, -4.438e-02, 4.127e-02, 7.803e-02)); + r += mul(s2_0, M4(-2.433e-02, -2.140e-02, 3.534e-02, 8.763e-02, -6.537e-02, 2.888e-03, 1.781e-02, -9.516e-02, -4.959e-02, -7.305e-04, -3.082e-03, 1.726e-01, -1.093e-01, 4.877e-04, -3.107e-02, 1.510e-01)); + r += mul(s2_1, M4(4.781e-02, 2.146e-02, -7.985e-02, 1.144e-01, 6.127e-02, -1.343e-01, -4.452e-02, 1.627e-01, 3.195e-02, -1.323e-01, -6.793e-02, -8.531e-02, 9.914e-03, 1.234e-01, 5.605e-02, 7.464e-02)); + r += mul(s2_2, M4(2.733e-02, 6.205e-02, 2.271e-02, -3.695e-02, -7.404e-02, 6.556e-02, -9.502e-02, 3.729e-02, -1.791e-01, -2.851e-02, 7.849e-02, -7.159e-02, 5.587e-02, 1.027e-01, -1.121e-01, -5.296e-02)); + r += mul(s2_3, M4(4.445e-02, 4.192e-02, -6.881e-03, 1.003e-01, 6.648e-02, 1.018e-01, -4.082e-03, 1.489e-02, -5.513e-02, 5.094e-02, -1.898e-02, -1.538e-01, -4.049e-02, -7.602e-02, -4.031e-02, 2.744e-01)); + r += mul(s2_4, M4(-4.634e-02, 1.529e-01, 4.334e-02, -7.627e-03, -2.217e-01, -1.784e-02, 1.034e-02, 7.111e-02, 4.219e-01, 3.733e-01, 5.571e-01, -3.626e-01, 2.417e-01, 5.628e-02, 6.872e-03, 6.866e-02)); + r += mul(s2_5, M4(-3.169e-02, -7.973e-02, -2.056e-01, 8.751e-02, 1.181e-02, -8.371e-02, -1.162e-01, 3.191e-02, -3.315e-02, -1.636e-01, -1.206e-01, 3.676e-02, -1.169e-01, -1.986e-01, -1.931e-01, 6.134e-02)); + r += mul(s2_6, M4(-6.188e-02, -3.657e-02, 1.201e-03, -6.201e-03, 4.759e-02, -1.386e-01, 6.573e-02, -4.832e-01, 5.454e-02, -1.073e-01, 5.971e-03, 4.345e-02, -1.782e-01, 1.215e-01, 2.692e-02, 2.557e-01)); + r += mul(s2_7, M4(-7.797e-02, -1.084e-01, 5.175e-03, 9.329e-02, -2.287e-01, 1.999e-01, -6.601e-03, -9.554e-02, -3.143e-02, 1.081e-01, -6.461e-02, -6.680e-02, 3.113e-02, -1.750e-03, -5.845e-02, 2.402e-02)); + r += mul(s2_8, M4(2.924e-02, 1.331e-01, 7.454e-03, -3.124e-02, 1.476e-02, 2.380e-02, 9.213e-03, -7.625e-02, -5.502e-02, -2.179e-01, -3.804e-02, -4.410e-02, 4.534e-02, 1.053e-01, 5.307e-02, 5.106e-02)); + r += mul(s3_0, M4(1.826e-02, -6.125e-02, 5.636e-02, -6.383e-02, 1.119e-01, -5.472e-02, -1.497e-02, -4.192e-02, 2.173e-01, -4.268e-02, 8.650e-02, -4.834e-02, -7.294e-02, -3.110e-02, 7.425e-03, 8.361e-03)); + r += mul(s3_1, M4(-2.545e-02, -2.326e-01, -4.745e-02, 2.995e-01, -2.049e-02, -8.489e-02, 3.580e-02, 1.492e-02, 1.409e-01, -4.312e-02, -1.762e-01, -4.962e-02, 5.562e-03, 3.523e-02, 7.543e-02, -5.587e-02)); + r += mul(s3_2, M4(-4.105e-02, -2.489e-01, -6.214e-02, 8.548e-02, 2.323e-02, -2.092e-03, -8.933e-02, 6.399e-02, 4.277e-02, -7.947e-02, 1.034e-01, -2.830e-02, 7.203e-02, -1.177e-02, -5.893e-02, -1.309e-01)); + r += mul(s3_3, M4(1.762e-01, -1.742e-01, 6.785e-02, -1.550e-01, -2.695e-02, -8.647e-03, -2.921e-02, 2.048e-03, -1.097e-01, -6.132e-02, 4.258e-02, 1.509e-01, 8.423e-02, 3.367e-03, -2.041e-03, -1.195e-01)); + r += mul(s3_4, M4(1.996e-01, -1.168e-01, -9.302e-02, 3.043e-01, -3.837e-02, 2.810e-01, 5.145e-02, -1.396e-01, 3.323e-03, -9.145e-02, 1.519e-01, 3.587e-01, 2.582e-01, -3.045e-01, 5.168e-02, -1.023e-01)); + r += mul(s3_5, M4(2.237e-01, -1.517e-01, -2.061e-01, 1.276e-02, -3.531e-02, 1.714e-02, -2.996e-02, -2.775e-02, 7.813e-02, -6.512e-05, -6.621e-02, -4.317e-03, 1.628e-01, -1.701e-01, -3.663e-01, -2.800e-01)); + r += mul(s3_6, M4(-2.112e-01, -1.664e-01, -1.945e-02, -3.029e-01, 1.268e-01, 1.859e-02, 2.151e-02, 7.883e-02, -1.175e-01, -7.301e-02, -1.675e-02, 3.992e-02, 3.473e-03, 4.124e-02, -1.485e-02, -1.433e-01)); + r += mul(s3_7, M4(-1.850e-01, -6.911e-02, -5.602e-02, -9.484e-02, 4.773e-02, 5.576e-02, -1.829e-02, 6.391e-02, 5.080e-02, 1.449e-01, 7.632e-02, 1.613e-02, 4.483e-03, -2.650e-01, -3.473e-02, 1.705e-02)); + r += mul(s3_8, M4(-6.757e-02, -6.804e-03, 1.366e-02, -2.227e-01, 2.078e-02, -8.953e-02, -1.051e-02, 3.131e-02, 1.905e-02, -4.710e-02, -1.816e-02, 1.138e-01, -9.230e-02, -1.380e-01, -7.560e-02, -1.729e-01)); + r += mul(s4_0, M4(-7.715e-02, -4.226e-02, -1.726e-04, -3.048e-02, 1.289e-01, -1.014e-02, 6.840e-04, -3.068e-02, 1.129e-02, -2.180e-02, 2.234e-02, -3.127e-02, -4.607e-02, -2.965e-02, 4.736e-02, 1.610e-02)); + r += mul(s4_1, M4(-1.268e-02, -6.178e-02, 8.163e-02, 6.305e-02, 1.033e-01, -3.181e-02, -4.803e-02, 1.333e-02, 3.381e-03, -1.908e-02, 2.222e-02, 7.992e-02, -9.103e-02, 3.049e-02, 8.586e-02, -8.075e-02)); + r += mul(s4_2, M4(5.629e-02, 1.303e-01, 2.169e-02, -1.143e-01, -9.910e-02, 9.202e-02, -4.430e-02, -5.384e-02, 1.788e-02, 5.285e-02, -1.935e-02, 1.770e-01, 9.311e-03, 1.101e-01, 5.366e-02, 8.400e-02)); + r += mul(s4_3, M4(-1.919e-01, 4.128e-02, -6.310e-02, 1.233e-01, 5.438e-02, 4.894e-02, 3.223e-02, -1.526e-01, 9.431e-03, -1.171e-01, -2.802e-03, 4.199e-02, 3.082e-01, -2.440e-01, 8.263e-02, -4.082e-02)); + r += mul(s4_4, M4(1.600e-01, -1.433e-01, 8.529e-02, -1.535e-01, -4.229e-01, -3.177e-02, -1.559e-01, 9.507e-02, -4.231e-02, -1.538e-01, 2.965e-02, 1.360e-01, 2.269e-01, -3.023e-01, -1.756e-01, -7.350e-02)); + r += mul(s4_5, M4(-1.554e-01, 7.256e-02, 1.819e-01, -3.069e-02, -1.896e-01, 8.487e-03, 3.531e-02, 1.163e-02, 9.509e-02, 7.207e-02, 3.051e-02, 1.369e-01, -1.521e-02, -3.002e-02, -1.198e-01, 5.352e-02)); + r += mul(s4_6, M4(1.679e-01, 8.049e-03, 3.098e-02, -3.888e-03, 1.408e-01, -5.948e-02, -4.859e-02, -6.342e-02, 1.652e-01, -5.122e-02, 2.694e-02, -4.236e-03, -7.010e-02, -2.119e-01, -2.484e-02, -1.795e-01)); + r += mul(s4_7, M4(4.459e-02, 9.580e-02, -1.811e-02, 1.641e-01, 7.625e-02, -9.174e-02, -4.983e-02, -1.211e-01, -4.453e-02, -2.507e-03, -6.407e-02, 6.882e-03, -8.679e-03, -2.247e-01, 6.515e-02, 4.370e-03)); + r += mul(s4_8, M4(-4.617e-03, -9.140e-02, 5.284e-03, -5.855e-02, 1.690e-01, -2.470e-02, -2.958e-02, -8.857e-02, 4.823e-02, 1.163e-01, -5.515e-02, 7.202e-02, -3.170e-03, -2.632e-01, -3.346e-02, 2.844e-04)); + r += mul(s5_0, M4(-1.674e-02, 4.465e-02, -1.055e-02, 7.767e-02, -2.578e-04, -9.568e-03, 5.266e-02, -1.918e-02, -1.499e-01, 2.841e-02, 2.862e-02, -7.700e-02, -1.363e-01, 7.312e-02, -2.431e-02, -6.614e-02)); + r += mul(s5_1, M4(-1.206e-01, -1.045e-02, -1.220e-01, 3.041e-02, 1.275e-01, -9.547e-02, 2.891e-02, 1.913e-02, 1.539e-01, 1.372e-01, -3.102e-02, 6.877e-04, -1.430e-01, 1.275e-01, 3.678e-02, 3.973e-02)); + r += mul(s5_2, M4(-9.990e-02, 6.500e-02, 2.164e-02, -3.470e-02, -1.054e-02, 1.669e-02, 6.746e-02, -4.130e-03, -5.044e-02, 8.461e-02, 1.025e-01, -7.755e-02, 2.416e-02, 4.338e-02, 3.518e-02, 5.068e-02)); + r += mul(s5_3, M4(-4.167e-02, 8.291e-02, -2.738e-02, -8.643e-02, -3.288e-01, -9.592e-03, 5.701e-02, 1.109e-01, -7.073e-02, -1.127e-02, 9.348e-03, 1.247e-01, 7.212e-02, -2.192e-02, -1.036e-01, 2.019e-01)); + r += mul(s5_4, M4(1.178e-01, 3.756e-01, 2.588e-01, -1.759e-01, 1.811e-01, -1.029e-01, 2.225e-01, -9.112e-02, -2.861e-01, 3.868e-02, 2.409e-02, 1.286e-01, -4.091e-02, 8.834e-02, 5.362e-01, 1.704e-01)); + r += mul(s5_5, M4(8.134e-02, -8.084e-02, -1.983e-02, 5.903e-02, -7.677e-02, -1.245e-01, -7.246e-01, 2.680e-02, -6.540e-02, 1.478e-01, 6.562e-02, -8.869e-02, -1.622e-02, 8.597e-02, -1.023e-01, 9.386e-02)); + r += mul(s5_6, M4(-7.100e-02, 1.966e-02, 3.316e-03, 1.300e-02, -1.048e-02, 9.209e-02, 1.191e-02, 1.060e-02, -9.255e-02, -1.161e-02, -1.859e-02, 3.663e-02, 3.179e-02, -2.344e-02, -4.789e-02, 5.694e-02)); + r += mul(s5_7, M4(-1.197e-01, 8.330e-02, 7.398e-03, 7.946e-02, 3.528e-02, 3.732e-02, 1.538e-01, -7.668e-02, -2.119e-01, 7.178e-03, -4.201e-03, -7.287e-02, -1.259e-02, -7.504e-02, 1.869e-02, -1.209e-02)); + r += mul(s5_8, M4(-1.196e-01, 1.395e-01, 5.809e-02, 1.249e-01, 2.554e-01, 1.052e-01, -2.597e-01, 5.365e-02, 4.474e-02, 1.310e-01, -1.281e-02, -1.988e-01, -1.366e-03, 5.336e-02, -4.164e-02, -4.151e-02)); + r += mul(s6_0, M4(4.469e-03, 1.737e-02, -1.705e-02, 5.297e-03, 1.855e-01, -8.838e-02, 2.011e-02, -2.865e-02, 3.722e-01, 1.446e-02, 1.195e-01, -8.723e-02, 3.307e-01, 2.032e-01, 4.686e-02, -1.524e-01)); + r += mul(s6_1, M4(1.355e-01, -1.272e-01, -1.107e-02, -4.717e-02, 1.443e-01, -7.464e-02, -1.940e-01, 1.704e-01, 1.906e-01, 5.857e-02, -1.822e-01, 1.136e-01, -2.085e-01, 1.741e-01, 8.393e-02, -9.816e-02)); + r += mul(s6_2, M4(1.483e-01, 1.003e-02, -5.125e-02, 1.980e-01, -4.966e-02, 4.141e-02, 1.500e-01, -1.313e-01, -4.901e-02, 4.779e-02, 1.867e-01, -8.433e-02, -5.538e-02, 6.535e-02, -1.589e-02, 8.818e-02)); + r += mul(s6_3, M4(-2.199e-01, 1.984e-02, 5.543e-02, 2.550e-01, -6.954e-02, -1.127e-01, 1.846e-02, 1.339e-01, 3.304e-01, -7.356e-02, 8.280e-02, -2.870e-01, 1.830e-01, 1.442e-02, -6.192e-02, -4.446e-01)); + r += mul(s6_4, M4(1.546e-01, -1.441e-01, 6.545e-04, -1.750e-01, -6.359e-02, 6.407e-02, -1.872e-01, 1.754e-01, -6.559e-02, -3.563e-01, -2.623e-01, 4.802e-02, -7.486e-02, 2.299e-01, -1.259e-01, -2.384e-01)); + r += mul(s6_5, M4(-1.093e-01, -3.822e-02, -4.077e-01, 1.806e-01, 1.153e-01, -7.748e-02, 8.795e-02, -7.897e-02, -3.800e-01, 8.830e-02, 2.954e-01, 8.917e-02, 8.809e-02, 2.980e-02, 1.702e-01, 3.553e-02)); + r += mul(s6_6, M4(5.084e-02, 1.546e-01, 4.755e-02, -9.203e-02, -1.390e-03, 9.045e-02, 4.932e-02, 4.791e-02, -1.887e-02, 7.416e-03, -2.694e-02, -6.002e-02, 6.272e-02, 4.008e-02, -1.954e-02, -1.398e-01)); + r += mul(s6_7, M4(1.455e-01, 2.552e-01, 2.080e-02, -1.462e-01, 1.691e-01, -6.206e-02, -1.155e-02, -1.095e-01, 1.461e-02, -4.326e-02, 1.744e-02, -9.827e-02, -1.507e-01, 8.395e-02, 2.241e-02, -3.994e-02)); + r += mul(s6_8, M4(1.827e-01, 2.126e-01, -2.834e-02, 1.223e-01, 1.185e-01, -6.990e-02, -5.047e-02, 3.786e-02, -1.499e-01, -4.389e-02, -1.992e-02, 1.044e-01, 7.411e-02, 1.526e-01, 7.076e-02, -1.150e-01)); + r += mul(s7_0, M4(2.013e-02, 2.178e-02, 1.181e-03, -1.664e-02, -5.620e-02, 5.579e-02, 1.369e-02, -5.287e-02, -1.574e-01, 1.091e-01, 1.789e-02, 5.556e-02, 1.153e-01, -9.259e-02, -1.508e-02, 4.501e-02)); + r += mul(s7_1, M4(-5.281e-02, 1.956e-03, 6.077e-02, 1.593e-02, -6.373e-02, 1.553e-01, 1.510e-03, 1.487e-01, -4.906e-02, -3.564e-02, -1.032e-02, 2.023e-01, 1.741e-02, -3.038e-01, 7.891e-02, 3.233e-01)); + r += mul(s7_2, M4(-4.136e-02, 7.703e-02, 1.899e-02, 1.387e-03, -3.422e-03, 1.277e-01, -6.664e-02, -1.432e-02, 1.807e-01, -1.567e-01, -1.541e-02, -5.388e-02, 6.383e-02, 8.804e-03, 1.239e-02, 9.009e-02)); + r += mul(s7_3, M4(-3.707e-02, 1.029e-02, 2.850e-02, 3.530e-02, 9.392e-02, -1.465e-02, 4.523e-02, -1.691e-01, -1.945e-01, 3.733e-02, 1.005e-01, -1.274e-01, -1.351e-01, -1.047e-01, -9.687e-02, -1.313e-01)); + r += mul(s7_4, M4(1.389e-01, -7.439e-02, 1.753e-02, -8.637e-02, 2.850e-03, 2.085e-01, -8.887e-02, -7.542e-02, -4.613e-01, -3.791e-01, -3.039e-02, 7.370e-02, -2.159e-02, -2.957e-02, -1.411e-01, -4.683e-02)); + r += mul(s7_5, M4(1.765e-02, 9.105e-02, -2.574e-01, -9.403e-02, -5.962e-02, -1.298e-01, 2.107e-02, -8.077e-02, 1.534e-01, 4.160e-02, 1.538e-01, 1.304e-03, 8.285e-02, -5.715e-02, 1.655e-01, 1.594e-02)); + r += mul(s7_6, M4(6.028e-02, -1.492e-02, 2.226e-02, 7.234e-02, -1.236e-01, 4.281e-02, 4.460e-02, -3.847e-02, 8.586e-02, 4.478e-03, 3.610e-03, 1.477e-02, 5.615e-02, 1.035e-02, -3.046e-02, 1.156e-01)); + r += mul(s7_7, M4(-1.278e-01, -8.835e-02, -6.829e-03, 1.548e-02, -5.993e-02, -1.321e-01, 5.002e-02, -1.982e-01, -8.825e-02, -1.431e-01, -1.756e-02, -1.548e-01, -8.113e-02, -7.183e-02, 7.995e-03, 1.356e-01)); + r += mul(s7_8, M4(-2.751e-02, 6.631e-03, -1.008e-02, 5.478e-02, 5.620e-02, 6.144e-02, -2.082e-02, -2.847e-02, 9.692e-02, 7.541e-02, 2.322e-02, -7.078e-02, 4.336e-02, -9.761e-02, 1.265e-02, -1.849e-02)); + r += V4(-1.950e-03, -9.835e-03, -5.918e-03, -6.251e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(6.175e-02, 7.904e-03, -3.665e-03, -7.399e-02, -7.346e-02, 3.634e-02, -1.192e-01, -1.515e-01, 1.484e-01, 8.720e-03, -1.163e-01, 5.150e-02, -1.065e-02, 1.991e-02, 1.009e-01, -4.675e-03)); + r += mul(s0_1, M4(-6.935e-02, -7.346e-02, 1.201e-02, -1.113e-01, 1.152e-01, 1.159e-01, 4.594e-02, -1.963e-02, -6.518e-02, -1.182e-01, -4.926e-02, 6.986e-02, -1.866e-01, -1.810e-01, -6.166e-02, -8.193e-02)); + r += mul(s0_2, M4(4.910e-02, 1.101e-01, -6.767e-02, 2.115e-02, 7.057e-02, -1.978e-01, 1.134e-01, -7.613e-02, 7.247e-02, 1.279e-01, -5.826e-03, -2.475e-02, -1.172e-01, -2.345e-02, -4.468e-02, -3.181e-02)); + r += mul(s0_3, M4(8.155e-02, 7.672e-02, 1.507e-02, 8.836e-02, -1.557e-01, 8.005e-02, 1.360e-01, -7.389e-03, 4.173e-02, -4.035e-02, -5.395e-02, 1.547e-01, -1.499e-01, -1.392e-01, -2.050e-01, 1.376e-01)); + r += mul(s0_4, M4(2.037e-01, 3.868e-02, -4.294e-01, -1.597e-01, 5.532e-02, 1.920e-01, -2.047e-01, -1.017e-01, 4.292e-02, -9.700e-02, 1.780e-01, -2.914e-01, -1.116e-01, -1.649e-01, -6.510e-02, 3.566e-01)); + r += mul(s0_5, M4(-9.637e-02, -4.248e-02, 5.101e-02, -3.660e-02, 5.758e-02, 1.896e-01, 2.192e-01, 1.228e-01, -9.211e-02, 7.804e-02, -3.575e-02, 7.258e-02, -2.384e-01, -2.825e-01, 8.578e-02, 5.352e-02)); + r += mul(s0_6, M4(5.512e-02, -6.419e-02, -1.067e-01, -5.827e-02, 4.080e-02, -1.957e-02, -1.244e-01, -1.166e-05, 5.461e-02, 3.495e-02, -3.948e-02, 4.974e-02, -7.014e-03, -1.700e-01, -9.444e-02, -3.135e-01)); + r += mul(s0_7, M4(-7.096e-02, 2.266e-01, 1.377e-02, -7.295e-02, -6.385e-02, 7.439e-02, -1.597e-02, -7.386e-02, -8.307e-02, -1.431e-02, -1.351e-02, -9.143e-02, -6.140e-02, -4.966e-02, -1.690e-02, -2.453e-01)); + r += mul(s0_8, M4(6.443e-02, 1.683e-02, -9.873e-02, -5.451e-02, 7.690e-02, 1.760e-01, -1.316e-03, -2.711e-02, -5.968e-02, -5.324e-05, 8.865e-03, -6.855e-02, 1.687e-02, 3.854e-03, 3.375e-02, -3.976e-02)); + r += mul(s1_0, M4(1.615e-01, 7.675e-02, 6.130e-02, -4.256e-03, 9.372e-03, 2.417e-02, -1.267e-02, 3.520e-02, 1.206e-01, 2.362e-01, -3.508e-02, -7.321e-02, -9.689e-03, 9.016e-02, -1.529e-01, 1.355e-01)); + r += mul(s1_1, M4(-1.988e-01, -1.015e-02, 2.393e-01, -1.601e-01, 4.967e-02, -5.905e-02, 1.260e-01, -4.496e-02, -1.891e-01, -3.653e-01, 2.536e-01, 9.012e-02, 1.276e-01, 7.789e-02, -1.061e-01, 5.498e-03)); + r += mul(s1_2, M4(-2.339e-03, 4.687e-02, 1.132e-01, 3.913e-02, -4.598e-02, 4.816e-02, 2.352e-01, 2.179e-03, -1.488e-01, 7.972e-03, 1.557e-01, -6.133e-02, -9.475e-03, 6.294e-03, -5.184e-02, -1.031e-01)); + r += mul(s1_3, M4(1.105e-02, -6.888e-02, 1.064e-01, 3.589e-01, -9.730e-02, -4.418e-02, 5.653e-02, 7.384e-02, 1.762e-01, -3.605e-01, 3.389e-02, 1.859e-01, -1.296e-02, -6.121e-02, -5.948e-02, -1.777e-01)); + r += mul(s1_4, M4(7.330e-02, -8.254e-03, -3.507e-01, 5.515e-02, -1.987e-02, -7.001e-02, -2.392e-01, -2.122e-02, 2.566e-01, -3.824e-01, 2.372e-01, -1.870e-01, 8.637e-02, -4.245e-02, -9.150e-02, 1.525e-01)); + r += mul(s1_5, M4(7.944e-03, -1.674e-01, 2.152e-01, 9.441e-02, -9.459e-02, -9.693e-02, 8.168e-03, 6.529e-02, -8.503e-02, 7.804e-02, -4.030e-02, 1.310e-01, -5.279e-02, -2.527e-01, 1.448e-01, 1.888e-02)); + r += mul(s1_6, M4(-1.401e-01, -5.401e-02, 5.864e-02, 1.263e-01, 1.251e-01, 5.264e-02, -7.004e-02, 2.028e-01, -2.203e-01, 2.476e-01, 2.297e-01, 6.445e-02, -1.481e-02, -9.286e-03, 4.134e-02, -6.499e-02)); + r += mul(s1_7, M4(-2.704e-01, 6.734e-03, 3.892e-02, -6.472e-02, 4.565e-02, -1.986e-01, 6.916e-02, -3.508e-02, -1.698e-01, -8.924e-02, -5.845e-02, -1.234e-01, 3.300e-02, 2.426e-01, -2.275e-02, 3.760e-02)); + r += mul(s1_8, M4(-1.011e-01, -6.674e-03, 6.852e-02, 3.372e-02, -1.279e-02, 1.429e-02, -1.103e-01, 5.372e-02, -4.091e-02, 2.646e-01, 6.331e-02, -2.434e-01, 2.344e-02, 1.366e-01, -1.057e-01, 3.504e-02)); + r += mul(s2_0, M4(-5.324e-02, -3.071e-02, 5.975e-02, -2.922e-02, 8.010e-02, 6.930e-02, 1.457e-01, -6.005e-02, -2.727e-01, 2.198e-03, 3.540e-02, 6.303e-04, -3.038e-02, 1.291e-01, 1.789e-01, -4.896e-02)); + r += mul(s2_1, M4(6.958e-02, 1.246e-01, -2.802e-01, -3.116e-02, 1.069e-01, 3.366e-02, -1.711e-01, -3.527e-02, 1.861e-02, 1.582e-01, -9.334e-02, -5.769e-02, -3.187e-03, -9.438e-04, 3.405e-02, -1.271e-01)); + r += mul(s2_2, M4(2.848e-02, -1.627e-01, 7.388e-02, -5.333e-03, -2.879e-02, -1.583e-01, -4.654e-02, -1.668e-02, -3.003e-02, 1.797e-04, -1.960e-01, -1.396e-01, -9.244e-02, -9.475e-02, 4.937e-02, -8.130e-02)); + r += mul(s2_3, M4(-1.044e-01, -3.147e-02, 3.702e-02, -1.435e-01, 1.565e-01, -2.193e-01, 8.940e-02, 1.489e-01, 7.603e-02, -2.348e-03, 1.507e-01, 2.641e-02, -2.228e-02, 2.442e-02, 1.193e-01, -9.107e-02)); + r += mul(s2_4, M4(-4.541e-02, -1.822e-02, 5.480e-02, 7.376e-02, -1.664e-02, 7.738e-02, 2.179e-01, -9.347e-02, -1.609e-01, -2.437e-01, -3.415e-02, 1.117e-01, -3.191e-01, 5.072e-02, -9.656e-02, -2.812e-01)); + r += mul(s2_5, M4(-7.991e-03, 1.267e-01, 8.988e-02, 7.728e-02, 1.820e-02, 2.832e-01, 1.555e-01, 1.880e-01, 5.675e-02, 1.278e-01, -9.058e-02, 3.068e-02, -1.569e-01, -3.377e-02, -5.649e-02, -2.665e-03)); + r += mul(s2_6, M4(-1.005e-01, 5.469e-02, -6.973e-03, -1.356e-02, -3.452e-02, -1.223e-01, -1.872e-01, 1.028e-01, -9.021e-02, -1.790e-01, -1.233e-01, 2.326e-01, -1.541e-03, 2.155e-01, 2.144e-02, 1.006e-02)); + r += mul(s2_7, M4(-9.650e-03, 6.253e-03, 5.875e-02, -6.425e-02, -9.983e-03, -3.266e-01, -2.351e-01, -7.764e-02, 6.405e-02, -2.074e-01, -4.355e-02, -2.418e-01, -8.434e-02, 2.632e-01, 3.880e-02, -1.288e-02)); + r += mul(s2_8, M4(-1.866e-02, -1.200e-01, 4.102e-02, -7.159e-02, -1.540e-01, -4.532e-02, 2.856e-02, 4.375e-02, 3.164e-02, -1.313e-01, 1.993e-03, -1.613e-01, -1.341e-02, 4.682e-02, -2.229e-02, 1.230e-01)); + r += mul(s3_0, M4(1.488e-02, 9.005e-02, -1.650e-01, 1.927e-01, 4.848e-03, 1.231e-01, 7.445e-02, 5.600e-02, 1.567e-01, -1.090e-02, -8.759e-02, 9.907e-02, 3.108e-02, -1.901e-02, 1.039e-03, 9.998e-02)); + r += mul(s3_1, M4(-3.929e-02, 2.184e-01, 1.619e-01, -9.356e-02, -6.699e-02, 7.045e-02, -4.070e-02, -8.299e-03, 6.922e-02, 1.851e-01, 8.325e-02, -8.713e-03, 2.463e-02, 6.725e-02, -1.198e-01, 8.346e-02)); + r += mul(s3_2, M4(2.297e-01, 4.461e-01, -8.689e-02, 8.217e-02, -3.398e-03, -1.322e-02, 8.756e-02, -1.440e-01, -2.319e-02, 1.481e-01, -1.330e-01, -7.913e-02, -8.133e-02, -1.326e-02, -8.318e-02, 1.419e-01)); + r += mul(s3_3, M4(3.549e-01, 1.413e-01, -1.879e-01, 4.910e-02, 1.367e-02, -1.706e-01, -6.252e-02, -5.629e-03, 2.197e-01, -1.161e-02, -5.096e-02, 1.135e-01, 1.198e-01, 3.848e-02, 7.361e-02, -4.333e-02)); + r += mul(s3_4, M4(3.386e-01, 2.337e-01, -1.768e-01, -1.808e-01, 7.786e-02, -7.506e-03, 8.529e-02, 5.867e-02, -8.106e-03, 1.265e-01, -2.229e-01, 2.138e-02, -2.045e-01, 1.796e-01, -1.054e-02, -8.061e-02)); + r += mul(s3_5, M4(-9.682e-02, 8.621e-03, -1.507e-01, -6.597e-02, -5.754e-02, 5.580e-02, -1.207e-02, 6.156e-02, 4.528e-02, 1.013e-01, 2.147e-02, -1.057e-01, -1.034e-01, -9.300e-03, -6.939e-02, 1.647e-01)); + r += mul(s3_6, M4(3.209e-02, 1.255e-01, -1.737e-02, 7.589e-03, -9.270e-02, 2.263e-02, -1.341e-02, -2.919e-02, -1.460e-02, -1.164e-01, -3.115e-02, -2.339e-01, 7.292e-02, 4.512e-02, -7.044e-03, 5.427e-02)); + r += mul(s3_7, M4(2.959e-01, 2.545e-01, -3.127e-01, -4.098e-02, 9.167e-02, 2.408e-02, -1.048e-01, 2.020e-01, 1.810e-02, 7.126e-02, -3.573e-02, 3.418e-02, 1.661e-01, -1.921e-01, 1.919e-02, 2.044e-01)); + r += mul(s3_8, M4(3.867e-02, 3.393e-02, -1.677e-01, 2.496e-01, -2.441e-04, 1.317e-02, -1.696e-02, -1.264e-01, 7.009e-02, 5.613e-02, -7.472e-02, 7.745e-02, -1.621e-02, -1.794e-01, -3.901e-01, 1.637e-01)); + r += mul(s4_0, M4(-1.428e-01, -1.089e-01, -3.830e-02, 1.444e-02, 1.623e-02, -9.268e-02, -1.500e-01, 4.216e-02, 3.939e-02, -4.109e-02, 1.220e-02, 4.494e-03, -6.716e-02, -8.483e-02, -1.629e-01, 5.165e-03)); + r += mul(s4_1, M4(-4.999e-02, 7.906e-02, -2.173e-01, 8.715e-02, -2.991e-02, -4.797e-02, 5.975e-02, 1.296e-01, 7.575e-02, -6.238e-02, 1.654e-02, 3.190e-02, -7.018e-02, -1.173e-01, 1.215e-04, 1.444e-02)); + r += mul(s4_2, M4(-9.951e-02, -2.135e-01, -6.087e-02, 6.044e-03, 1.522e-01, 7.643e-02, 2.089e-01, 7.293e-02, 6.425e-02, -3.798e-02, 1.156e-01, 8.501e-03, -1.260e-02, 2.014e-02, 3.734e-02, 5.496e-02)); + r += mul(s4_3, M4(3.064e-02, -4.301e-02, -5.276e-02, 9.839e-02, 1.045e-01, 1.096e-01, -1.898e-01, 2.660e-02, -1.665e-01, 1.566e-02, 1.201e-01, 7.686e-02, 2.139e-01, 2.064e-02, -1.203e-01, 1.569e-01)); + r += mul(s4_4, M4(1.121e-01, 2.903e-02, -4.011e-02, -6.393e-02, 5.334e-02, 1.105e-01, 3.123e-02, 2.782e-01, 1.157e-01, -8.022e-02, -8.132e-02, -2.135e-02, -2.199e-02, 2.053e-02, 9.230e-02, -1.862e-01)); + r += mul(s4_5, M4(-8.481e-02, -1.462e-01, -1.066e-01, 7.641e-02, 5.543e-02, 2.511e-02, 1.210e-01, 7.573e-02, 7.727e-02, 5.993e-02, 1.690e-01, -3.876e-02, -7.304e-02, -1.495e-01, -1.047e-01, -8.634e-02)); + r += mul(s4_6, M4(-1.278e-01, -2.702e-02, -1.215e-01, 5.285e-02, 1.277e-01, -7.058e-02, 4.249e-02, -5.602e-02, -5.366e-02, -8.487e-02, 7.268e-03, -3.483e-02, 2.169e-01, -1.059e-01, 5.655e-02, 1.155e-01)); + r += mul(s4_7, M4(-1.284e-01, -6.037e-02, 2.295e-01, -7.331e-02, -7.732e-02, -8.565e-02, 2.233e-01, 6.804e-02, 3.330e-02, 3.514e-02, 1.514e-01, 5.941e-02, 2.931e-02, 2.716e-02, -2.180e-01, 4.854e-01)); + r += mul(s4_8, M4(-7.057e-02, -8.788e-02, 5.648e-02, -1.189e-01, 9.679e-02, -3.120e-02, -5.615e-02, 1.174e-01, -1.002e-01, -2.512e-02, 1.596e-03, 1.479e-01, -4.285e-02, -1.325e-01, -5.417e-02, 1.184e-01)); + r += mul(s5_0, M4(-7.336e-02, 4.986e-02, -6.978e-02, -1.250e-01, 1.747e-02, -1.301e-01, 8.508e-04, -6.684e-03, 5.767e-02, -2.091e-01, -7.641e-02, 8.003e-03, 1.121e-02, 1.188e-01, 3.061e-02, 5.473e-02)); + r += mul(s5_1, M4(-4.532e-02, -8.673e-02, 1.559e-01, 4.620e-02, 4.773e-02, -1.382e-01, -2.404e-01, -3.637e-02, -1.933e-02, 1.359e-02, -4.025e-01, -5.301e-02, -5.870e-02, -1.347e-01, 9.181e-02, -6.606e-02)); + r += mul(s5_2, M4(8.472e-02, -1.045e-02, -3.147e-03, 1.595e-02, -2.099e-02, 2.361e-02, -1.277e-01, 6.447e-03, 1.178e-01, 2.549e-01, -1.025e-01, 6.712e-02, 3.116e-02, 2.829e-02, 6.850e-02, 4.284e-02)); + r += mul(s5_3, M4(1.353e-01, 1.083e-01, 7.092e-02, 8.925e-02, -1.226e-01, -1.477e-01, 1.308e-01, 7.759e-02, -8.527e-02, 8.495e-02, -3.606e-02, 7.682e-02, 1.057e-01, -4.422e-02, 1.012e-01, -3.112e-02)); + r += mul(s5_4, M4(5.194e-01, 1.127e-01, -1.789e-01, -2.902e-01, -8.033e-02, 2.070e-01, -1.377e-01, 1.797e-01, -9.848e-02, -2.584e-01, -3.094e-01, -3.850e-02, 6.486e-02, -2.379e-02, -7.151e-02, -1.400e-01)); + r += mul(s5_5, M4(1.470e-02, -5.933e-02, 3.310e-01, -2.670e-02, -6.346e-03, 3.447e-01, 6.178e-02, 1.894e-01, 2.137e-01, 4.231e-01, -1.992e-01, -7.742e-02, -1.112e-01, 5.088e-02, 7.837e-02, -1.893e-02)); + r += mul(s5_6, M4(-3.262e-02, -5.235e-02, 2.604e-02, -1.442e-03, -1.549e-01, 1.616e-01, 2.533e-02, 3.798e-02, 2.459e-02, -3.005e-02, -1.046e-01, -1.406e-01, -1.479e-01, 2.075e-01, 1.014e-02, -1.128e-01)); + r += mul(s5_7, M4(-2.366e-02, -9.877e-02, 2.663e-01, -5.407e-02, 3.387e-01, 9.756e-02, 1.765e-02, -8.855e-02, 8.803e-02, -7.577e-02, -7.924e-02, -1.757e-01, 2.251e-01, -8.353e-02, 1.433e-01, -8.220e-02)); + r += mul(s5_8, M4(-9.193e-02, 1.416e-01, -3.479e-02, -1.860e-01, 9.943e-03, -4.209e-01, 2.237e-01, -8.746e-02, 8.944e-03, -4.964e-02, 8.007e-02, -7.402e-02, 2.539e-02, -5.937e-02, -7.718e-02, 6.535e-02)); + r += mul(s6_0, M4(-1.149e-02, 1.336e-01, -2.739e-02, -3.410e-02, 2.509e-01, -5.544e-02, 4.526e-02, 2.260e-02, -9.964e-03, 1.702e-01, -2.814e-01, 1.477e-01, -9.708e-02, -1.924e-02, -1.130e-01, -9.316e-02)); + r += mul(s6_1, M4(1.282e-01, 1.518e-01, 7.988e-02, -2.483e-04, -3.388e-02, 6.106e-03, 8.260e-02, -1.189e-01, 2.677e-01, -1.093e-02, 2.075e-01, -6.567e-03, -2.249e-01, -4.893e-02, 1.156e-01, 5.537e-02)); + r += mul(s6_2, M4(-4.985e-02, 6.145e-03, -7.943e-02, -6.762e-02, 1.689e-03, 8.528e-03, 1.398e-01, 4.994e-02, 1.496e-01, 3.995e-02, -1.541e-01, -2.495e-01, -8.654e-02, 9.015e-02, -1.485e-01, -8.601e-02)); + r += mul(s6_3, M4(-4.579e-02, 1.257e-01, -1.238e-01, -1.227e-01, 1.272e-01, 5.987e-02, 4.823e-02, 1.464e-01, 9.932e-02, 3.692e-02, -1.035e-01, 1.131e-01, -2.758e-01, 9.812e-02, 1.047e-01, 2.907e-02)); + r += mul(s6_4, M4(-2.408e-02, 2.359e-01, 1.678e-01, -2.143e-01, -1.030e-01, 9.555e-02, 9.212e-02, 1.842e-01, 5.318e-01, -1.305e-01, 8.809e-02, -1.592e-01, 8.247e-02, 2.926e-02, -1.886e-01, -1.824e-02)); + r += mul(s6_5, M4(-1.915e-01, 2.335e-02, -2.525e-02, -1.063e-02, 9.678e-02, -6.154e-02, -7.538e-02, -9.964e-02, 1.441e-01, 6.867e-02, -2.635e-01, -3.383e-02, -9.144e-02, -1.622e-01, 1.558e-01, -2.456e-01)); + r += mul(s6_6, M4(-2.307e-01, -5.792e-03, -2.040e-02, -5.944e-02, 1.412e-02, -6.434e-02, 1.781e-01, -1.091e-01, -1.130e-02, -6.017e-02, 6.683e-03, -1.664e-01, -1.574e-01, -2.125e-02, -6.781e-02, -1.266e-01)); + r += mul(s6_7, M4(1.197e-01, 5.015e-01, 6.010e-02, 8.000e-02, 8.383e-02, -8.292e-02, 1.608e-01, -6.668e-02, -6.837e-02, 7.250e-02, -2.721e-01, 2.325e-01, -1.741e-02, 3.360e-02, 2.856e-03, 4.512e-02)); + r += mul(s6_8, M4(-3.674e-02, 2.187e-01, -1.116e-02, 4.427e-02, -1.097e-01, -1.043e-01, 1.155e-01, 4.850e-02, 8.935e-02, 4.684e-02, -8.794e-02, 1.239e-02, -8.093e-02, -2.943e-02, -5.811e-02, 9.012e-02)); + r += mul(s7_0, M4(3.852e-02, -1.208e-01, 5.057e-02, -6.356e-02, -1.381e-02, -9.910e-02, -6.690e-02, -2.560e-01, -1.577e-01, -9.102e-02, 1.005e-01, 4.090e-02, 1.631e-01, -9.212e-03, -1.694e-01, -4.077e-04)); + r += mul(s7_1, M4(5.392e-02, 2.484e-02, 6.332e-02, -7.669e-03, 7.114e-03, 1.714e-01, 2.427e-01, -1.117e-01, -8.476e-02, -2.812e-01, 1.370e-02, -1.133e-02, 3.609e-02, 2.885e-01, 9.425e-02, 1.530e-01)); + r += mul(s7_2, M4(-6.421e-02, -6.706e-02, -3.964e-02, -4.779e-02, -1.862e-02, 1.248e-01, -2.874e-01, -3.995e-02, 7.169e-02, 1.507e-01, 1.772e-01, -2.482e-02, 4.256e-02, -5.777e-02, 4.751e-02, -1.001e-01)); + r += mul(s7_3, M4(-5.318e-02, 3.495e-06, -8.124e-02, 1.093e-01, -1.059e-01, -1.241e-01, -6.011e-02, -5.487e-02, 2.842e-02, -1.128e-01, -7.228e-03, 1.620e-01, 1.305e-01, 7.061e-02, -1.104e-02, 3.548e-02)); + r += mul(s7_4, M4(-2.013e-01, 1.019e-01, 1.301e-01, 3.976e-02, -1.030e-01, 4.746e-02, -3.342e-02, 1.966e-02, -8.156e-02, -2.459e-01, 1.101e-01, -5.803e-01, 7.450e-02, 5.037e-02, -7.837e-02, 2.862e-03)); + r += mul(s7_5, M4(-3.368e-02, -4.640e-02, 1.928e-01, 6.733e-03, 1.056e-01, -1.108e-01, 1.836e-01, -1.951e-03, -9.614e-02, 1.760e-02, 4.410e-01, -1.877e-01, 6.688e-02, -1.423e-01, 1.370e-01, 6.039e-02)); + r += mul(s7_6, M4(1.508e-02, -8.418e-02, 5.578e-02, -3.385e-02, 7.789e-02, -2.398e-02, -9.028e-02, -2.499e-01, -7.129e-03, 1.097e-02, -3.166e-02, 1.110e-01, -4.634e-02, 1.620e-02, 8.152e-02, 2.475e-02)); + r += mul(s7_7, M4(-6.457e-02, -7.338e-02, -1.095e-02, -1.514e-01, 6.464e-02, -1.237e-01, 6.907e-03, -3.423e-01, -1.199e-01, -1.148e-01, -6.688e-02, -4.312e-04, 1.324e-01, -1.295e-01, 1.495e-01, 1.148e-02)); + r += mul(s7_8, M4(-3.264e-02, -8.084e-02, 1.435e-01, 2.666e-03, -9.529e-02, 9.988e-03, -8.879e-02, 6.709e-02, -2.555e-02, -1.776e-01, 2.287e-01, 1.296e-02, -1.190e-02, -7.646e-02, 9.769e-02, 4.489e-02)); + r += V4(-5.217e-03, 3.975e-03, 1.864e-03, 6.483e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.175e-02, -1.694e-02, -8.569e-02, -5.949e-02, -6.620e-02, -1.900e-01, -1.319e-01, 4.160e-02, 4.001e-02, 3.895e-02, 2.037e-02, -1.075e-01, -8.762e-03, -1.409e-01, 1.355e-01, 5.291e-02)); + r += mul(s0_1, M4(6.718e-03, 3.007e-03, 3.151e-02, 2.068e-01, -8.581e-02, -1.870e-02, -9.235e-02, -4.185e-02, 6.026e-03, 2.650e-02, 7.126e-02, 1.114e-02, 1.412e-01, -1.346e-01, -7.167e-02, -6.741e-02)); + r += mul(s0_2, M4(-1.020e-01, 5.801e-02, -6.347e-02, 4.344e-02, 6.335e-02, -1.528e-01, -5.082e-02, -6.141e-02, -2.859e-02, -1.392e-01, 3.954e-02, -1.084e-01, 7.401e-02, 9.802e-02, 2.179e-01, -3.707e-02)); + r += mul(s0_3, M4(-5.845e-02, -6.849e-02, 1.452e-01, 1.408e-01, -7.707e-02, 1.754e-01, 1.260e-02, 1.368e-02, -7.258e-02, -7.262e-02, 1.172e-01, -9.114e-02, -2.357e-01, 2.814e-01, 3.940e-01, -6.121e-02)); + r += mul(s0_4, M4(-6.395e-02, 2.226e-01, -7.355e-02, 1.557e-01, -3.481e-03, 9.685e-02, -2.766e-01, -2.425e-01, 1.484e-01, 1.361e-01, 3.202e-02, 2.433e-03, 8.253e-03, -1.182e-01, 2.435e-01, 9.558e-02)); + r += mul(s0_5, M4(3.071e-02, -5.864e-02, -7.887e-02, 1.055e-01, -2.783e-01, -1.640e-01, -8.075e-02, -3.597e-02, 1.222e-02, -1.484e-01, -1.331e-01, 1.219e-01, 1.875e-01, -9.749e-02, 8.626e-02, 1.095e-01)); + r += mul(s0_6, M4(-5.163e-03, -1.028e-03, -8.240e-02, 4.871e-02, -8.953e-02, -9.021e-02, -1.815e-02, 1.325e-02, -1.685e-02, -9.949e-02, 7.342e-02, -1.037e-02, 2.025e-01, -9.399e-02, 2.651e-01, -8.776e-03)); + r += mul(s0_7, M4(3.271e-02, -8.339e-02, -7.430e-02, -8.341e-02, 2.406e-02, -1.733e-01, -1.173e-01, -2.147e-02, 8.118e-02, -5.234e-03, -2.090e-02, 1.759e-02, -5.028e-02, -4.672e-02, 2.634e-02, 2.789e-01)); + r += mul(s0_8, M4(-6.788e-02, 6.316e-02, 1.329e-01, 3.159e-02, 1.531e-01, -9.412e-02, 9.037e-02, 6.997e-02, 1.121e-01, 5.133e-03, 6.301e-02, 2.824e-02, 2.527e-02, 3.360e-01, 1.835e-01, -6.276e-02)); + r += mul(s1_0, M4(7.692e-04, -1.298e-01, -8.722e-02, -1.373e-01, -7.525e-02, 3.236e-03, -1.369e-02, -3.295e-02, 8.296e-02, -3.321e-01, 1.751e-01, -8.234e-02, -6.476e-02, -5.886e-02, -4.614e-02, 1.376e-01)); + r += mul(s1_1, M4(3.652e-02, 3.372e-02, 9.810e-02, 6.269e-03, 7.645e-02, 1.129e-01, 2.408e-02, -1.142e-02, 1.517e-01, -2.540e-01, 1.259e-01, -4.302e-02, -1.289e-01, -6.566e-02, -3.015e-01, -9.834e-02)); + r += mul(s1_2, M4(-5.508e-02, 1.215e-01, -1.047e-01, -7.241e-02, 2.695e-02, 8.539e-02, 1.089e-01, -1.456e-02, 1.320e-01, 8.422e-02, -1.003e-01, 4.924e-02, 2.336e-02, -1.863e-02, 2.462e-03, -5.588e-02)); + r += mul(s1_3, M4(-1.951e-01, -1.009e-01, 1.124e-01, -2.740e-02, 1.498e-01, -1.999e-02, 5.868e-03, -9.946e-02, 1.999e-01, -3.002e-01, 3.018e-01, -2.979e-01, -2.358e-02, 4.934e-03, -6.591e-02, -1.072e-01)); + r += mul(s1_4, M4(-3.023e-01, 2.393e-01, -6.489e-02, 1.939e-01, -5.500e-02, -8.528e-02, 4.696e-02, 9.167e-02, 1.327e-01, -2.103e-01, 3.693e-01, 2.747e-01, -4.635e-03, -9.574e-02, -1.332e-01, -2.372e-02)); + r += mul(s1_5, M4(-4.051e-02, -1.101e-01, -1.208e-02, -9.351e-02, 1.126e-01, -2.147e-01, 1.626e-01, -7.501e-02, 5.165e-02, 4.960e-02, -1.825e-01, 2.578e-01, -7.848e-03, -1.683e-02, -1.148e-01, 2.115e-01)); + r += mul(s1_6, M4(8.666e-02, -9.403e-02, 2.449e-02, -1.476e-02, -2.238e-02, 2.431e-01, 5.181e-02, -1.073e-01, 2.711e-01, 9.686e-04, 1.803e-01, -1.269e-02, 1.704e-02, -1.467e-02, 5.393e-02, 3.761e-03)); + r += mul(s1_7, M4(9.757e-02, -5.646e-02, 5.085e-02, -1.721e-01, 2.511e-02, 1.110e-01, -6.198e-02, -9.647e-03, 1.704e-01, -9.128e-02, 3.873e-01, -1.793e-01, -1.634e-02, 3.471e-02, -1.560e-01, -4.559e-02)); + r += mul(s1_8, M4(1.608e-02, -3.813e-02, 1.002e-01, -1.142e-01, 1.281e-02, 8.274e-02, 4.468e-02, 7.662e-02, 1.245e-01, 3.294e-01, 4.836e-02, 9.884e-02, -8.359e-02, -8.662e-02, 4.983e-02, 4.233e-02)); + r += mul(s2_0, M4(1.087e-01, -4.769e-02, 4.811e-02, 1.460e-02, 5.261e-02, -5.656e-02, -6.427e-02, 1.753e-02, -1.460e-02, 2.694e-02, 1.154e-01, 1.164e-02, 3.179e-02, -6.948e-02, -5.754e-02, -5.212e-02)); + r += mul(s2_1, M4(-1.007e-01, -7.738e-04, -2.879e-02, -1.565e-02, 2.736e-01, -1.106e-01, -3.022e-01, 1.108e-01, -1.894e-01, -1.313e-01, 8.509e-02, 5.408e-02, -5.045e-03, 1.082e-02, -3.259e-03, 3.258e-02)); + r += mul(s2_2, M4(1.436e-01, 3.734e-02, 7.259e-02, -2.224e-02, 9.575e-02, -1.723e-01, -5.512e-02, -6.942e-02, 8.918e-02, 1.282e-01, 1.010e-01, -4.529e-02, 2.712e-02, -8.669e-02, 8.110e-02, 4.198e-02)); + r += mul(s2_3, M4(5.501e-02, -8.083e-02, 4.484e-02, -5.572e-02, -2.468e-02, -1.174e-01, 1.079e-01, 9.341e-02, 1.402e-01, 8.040e-02, 5.163e-02, -2.897e-01, 1.123e-01, 7.672e-04, -1.428e-02, -1.394e-02)); + r += mul(s2_4, M4(5.681e-02, 9.800e-03, -2.085e-01, 5.270e-03, -8.993e-02, 1.132e-01, 8.583e-02, 1.973e-01, 3.957e-02, 4.412e-01, -1.045e-01, -5.221e-02, -3.447e-01, -7.384e-02, -1.200e-01, -3.953e-01)); + r += mul(s2_5, M4(-1.139e-02, -8.055e-02, 4.224e-02, 2.346e-02, -7.170e-02, 6.123e-02, -1.175e-01, 1.110e-01, 3.642e-02, -4.370e-01, -1.034e-01, -2.471e-02, 1.901e-01, -1.608e-01, 1.655e-01, 7.686e-02)); + r += mul(s2_6, M4(-1.402e-02, -7.039e-02, -2.052e-02, 7.504e-04, -2.909e-01, -2.023e-01, 3.094e-01, 1.406e-01, -9.770e-02, 6.719e-02, 1.718e-01, -5.942e-02, 5.432e-03, 3.151e-03, 8.278e-04, 2.630e-02)); + r += mul(s2_7, M4(7.474e-02, -8.459e-02, 1.493e-01, 4.954e-02, -1.415e-01, -1.227e-01, 1.662e-01, -1.843e-01, 9.970e-02, -1.265e-01, -6.688e-02, 7.964e-02, -1.391e-01, 7.834e-02, -8.791e-02, 5.538e-02)); + r += mul(s2_8, M4(5.384e-02, 2.338e-01, 1.137e-01, -6.666e-02, 1.326e-01, -2.095e-01, -8.753e-02, -6.168e-02, -7.500e-02, 1.145e-02, -1.898e-02, 6.601e-02, 9.443e-02, 1.106e-01, 2.410e-03, -6.149e-02)); + r += mul(s3_0, M4(1.665e-02, 1.127e-01, -1.350e-01, -1.032e-01, -8.179e-02, 9.332e-02, 1.726e-01, 4.077e-02, -1.100e-01, 3.697e-02, 5.525e-03, 6.523e-02, 2.083e-02, 4.354e-02, -6.635e-02, -4.197e-02)); + r += mul(s3_1, M4(-2.168e-01, -2.338e-01, 3.560e-01, 1.092e-01, -9.485e-03, -2.303e-02, 5.368e-02, 5.366e-02, 1.152e-01, -8.287e-02, 6.032e-02, 2.436e-02, 6.182e-02, 7.975e-02, 7.848e-02, 9.542e-02)); + r += mul(s3_2, M4(-1.242e-01, 3.587e-01, -1.079e-01, 7.291e-02, -5.030e-03, 7.853e-02, -1.000e-01, 1.934e-02, 9.251e-02, -9.404e-02, -1.389e-02, -2.388e-02, 3.480e-02, -3.708e-02, 1.649e-01, -6.239e-02)); + r += mul(s3_3, M4(3.882e-03, -1.262e-02, -3.503e-01, 1.038e-01, 4.361e-02, -1.343e-02, -1.585e-01, 1.887e-02, -6.785e-02, 1.701e-01, 3.468e-02, 1.744e-01, 6.101e-02, 8.358e-02, 1.072e-01, -6.852e-03)); + r += mul(s3_4, M4(3.522e-02, 8.764e-02, 2.818e-01, 4.541e-01, -2.372e-01, 4.611e-02, -1.067e-01, -1.908e-01, -3.359e-01, 2.583e-02, -2.675e-02, 2.571e-01, -2.065e-01, -1.061e-01, -1.042e-01, -9.321e-02)); + r += mul(s3_5, M4(2.322e-01, -1.438e-01, 1.788e-01, 3.579e-02, 6.962e-02, -2.912e-01, -6.591e-02, 5.557e-03, 1.749e-01, -1.945e-02, -6.441e-02, -1.344e-01, 2.166e-01, 2.930e-02, 7.383e-02, 1.014e-01)); + r += mul(s3_6, M4(-2.261e-01, 2.485e-02, -2.500e-01, 1.323e-01, 1.172e-02, -1.858e-01, 2.890e-02, 9.360e-02, 5.834e-02, 2.847e-02, 5.543e-02, 7.950e-02, -5.224e-02, -4.632e-02, -1.439e-01, 1.235e-02)); + r += mul(s3_7, M4(-1.517e-01, 1.218e-01, -1.181e-01, 1.744e-01, -2.539e-02, 7.465e-02, -5.315e-02, -4.634e-02, 1.238e-02, -6.372e-02, -1.333e-01, 3.696e-02, -1.387e-01, 8.102e-02, 1.716e-01, 5.472e-02)); + r += mul(s3_8, M4(-6.559e-02, 2.729e-03, -1.297e-02, -1.486e-01, 6.707e-02, -3.678e-02, -3.943e-02, 4.336e-02, -1.812e-02, 2.694e-01, -3.442e-02, 3.155e-02, 7.208e-02, 7.274e-02, 2.978e-01, -3.940e-02)); + r += mul(s4_0, M4(-3.465e-02, -3.034e-02, 7.630e-02, 1.403e-01, -1.252e-02, 7.796e-02, -5.032e-02, 8.881e-02, 5.965e-03, 1.355e-01, 2.053e-02, -9.148e-02, 8.013e-02, -3.946e-03, 1.344e-01, 8.953e-02)); + r += mul(s4_1, M4(1.583e-01, 3.370e-02, 8.927e-02, 7.169e-02, 2.966e-03, 3.740e-02, 1.197e-01, -1.622e-02, -9.327e-03, -4.305e-02, 6.365e-02, 5.527e-02, 8.257e-02, 1.567e-01, 5.987e-02, 1.150e-01)); + r += mul(s4_2, M4(2.552e-02, 5.595e-02, 1.547e-01, -3.538e-02, -1.693e-01, -6.647e-02, -1.526e-01, -5.070e-02, -1.726e-01, 6.701e-02, -3.221e-02, 1.440e-02, -1.106e-01, 1.239e-01, -3.553e-03, -2.591e-02)); + r += mul(s4_3, M4(-8.017e-02, 4.509e-02, 5.302e-03, -2.601e-03, -1.256e-01, -2.064e-02, 1.372e-01, 1.490e-01, 1.511e-02, -1.598e-01, 8.642e-02, 1.566e-02, 9.403e-02, 1.655e-01, 1.993e-01, -5.423e-03)); + r += mul(s4_4, M4(2.370e-01, -1.393e-02, 8.122e-02, -1.765e-01, -8.387e-03, 2.716e-02, -1.179e-01, 1.011e-03, -5.295e-02, -1.455e-01, -4.660e-02, 3.690e-02, -2.313e-01, 5.659e-03, -2.144e-01, -2.195e-01)); + r += mul(s4_5, M4(-7.810e-02, 1.379e-01, 7.611e-02, -4.972e-02, 7.791e-02, 2.354e-02, 1.488e-01, -5.709e-02, 1.128e-01, 1.971e-01, -3.833e-02, -5.006e-02, -8.859e-02, -1.341e-01, 5.348e-02, 1.773e-01)); + r += mul(s4_6, M4(1.223e-01, -6.146e-02, 4.563e-02, -4.953e-02, 2.958e-02, -3.501e-03, 7.077e-02, -1.045e-01, 8.601e-03, 8.685e-02, 8.424e-02, 9.376e-02, -1.085e-02, 1.768e-01, -1.373e-01, -1.582e-02)); + r += mul(s4_7, M4(1.227e-01, -2.950e-01, -1.184e-01, -4.928e-02, 1.532e-01, -1.195e-02, -1.455e-01, -9.216e-02, -2.379e-01, 3.406e-03, -3.235e-02, 1.262e-01, -1.763e-01, 1.022e-01, 1.341e-01, -1.310e-02)); + r += mul(s4_8, M4(1.543e-02, 6.190e-02, 2.085e-01, 5.636e-02, -1.432e-01, -8.273e-02, -1.181e-01, -1.172e-01, 5.136e-02, -1.107e-01, -2.913e-02, -2.501e-02, 4.145e-03, -1.520e-01, -4.142e-02, 2.424e-01)); + r += mul(s5_0, M4(-2.120e-01, -1.650e-01, 1.832e-03, 1.610e-01, 1.080e-01, -2.009e-02, 2.099e-02, -2.894e-02, 5.138e-02, 1.296e-01, -3.023e-02, -1.079e-01, -6.958e-03, -1.421e-01, -1.021e-01, 6.042e-02)); + r += mul(s5_1, M4(-2.127e-01, -1.592e-01, -1.114e-01, 1.150e-01, 1.399e-01, 1.044e-02, -2.055e-01, -2.212e-01, 3.304e-03, -2.082e-01, 2.292e-02, -3.990e-02, 8.811e-02, 8.488e-02, 2.211e-02, 6.205e-02)); + r += mul(s5_2, M4(-1.944e-01, 1.505e-02, -5.527e-02, -4.706e-02, 4.283e-02, 2.693e-01, 6.356e-02, -6.605e-02, -1.454e-01, 1.435e-01, -9.110e-02, -1.689e-01, -4.769e-02, 1.216e-02, -4.311e-02, -1.373e-01)); + r += mul(s5_3, M4(1.193e-01, 7.729e-02, -1.368e-01, -1.508e-01, -1.828e-02, 2.755e-02, 1.502e-01, -1.052e-01, 2.362e-02, -9.706e-02, 1.241e-01, 2.726e-02, 2.339e-01, 5.539e-02, -2.415e-01, -9.340e-02)); + r += mul(s5_4, M4(7.527e-02, 1.521e-01, 1.750e-01, -2.241e-01, -1.489e-01, -1.223e-01, -1.495e-02, 2.460e-01, 1.867e-01, 9.202e-02, 3.072e-01, -6.468e-02, -8.080e-02, 3.779e-01, -3.739e-02, -3.178e-01)); + r += mul(s5_5, M4(5.583e-02, -1.809e-01, 1.902e-03, 3.495e-02, -1.362e-01, -2.026e-01, -6.797e-02, 7.779e-02, -6.602e-03, -2.163e-01, -1.743e-01, -2.702e-01, 1.207e-01, -2.103e-02, 6.260e-02, 8.788e-02)); + r += mul(s5_6, M4(-5.838e-02, 1.205e-02, 2.150e-04, -7.729e-02, 1.545e-01, 1.068e-02, -1.927e-01, -1.463e-01, -1.943e-01, 3.998e-03, -3.427e-02, 2.359e-01, 1.087e-01, -1.524e-01, -2.440e-02, -7.714e-02)); + r += mul(s5_7, M4(7.689e-03, -1.592e-01, -1.664e-01, 3.732e-02, -1.136e-01, 1.350e-01, -1.804e-01, 1.284e-01, -1.086e-01, 8.763e-02, 2.806e-01, -3.983e-02, -7.330e-02, -6.553e-02, 1.183e-02, -1.207e-01)); + r += mul(s5_8, M4(-7.484e-02, 9.195e-02, 9.746e-02, 1.688e-01, 2.331e-01, 3.554e-01, 9.061e-02, -6.124e-02, 1.140e-01, -2.146e-01, -3.030e-04, -2.512e-01, 2.448e-02, 4.512e-02, 1.486e-02, -6.921e-03)); + r += mul(s6_0, M4(-1.454e-01, -1.499e-01, 9.947e-02, 4.342e-02, -7.904e-02, -1.603e-02, 5.748e-02, -1.114e-01, 6.683e-02, 1.973e-01, -4.840e-02, -9.544e-02, -1.170e-01, -1.969e-01, 1.633e-02, -1.046e-01)); + r += mul(s6_1, M4(6.078e-02, 3.313e-02, -3.110e-01, -8.444e-02, 4.440e-03, -6.000e-02, -1.246e-01, -8.377e-02, -1.621e-01, -2.073e-01, -2.110e-01, -2.681e-01, -2.036e-01, 4.124e-03, 7.424e-02, -8.063e-02)); + r += mul(s6_2, M4(1.763e-01, 1.183e-01, 1.006e-01, 3.332e-02, -1.681e-01, 2.043e-01, 1.328e-01, -3.412e-02, 1.243e-01, -1.779e-02, -9.896e-02, -4.547e-02, -6.308e-02, -2.888e-01, -3.873e-02, -6.873e-02)); + r += mul(s6_3, M4(1.491e-01, -1.762e-01, -3.981e-01, -7.978e-03, -7.435e-02, -5.581e-04, -5.818e-02, 1.080e-01, 6.340e-02, 1.617e-01, 1.582e-02, -9.899e-02, 4.189e-02, -2.024e-01, 5.597e-02, -1.077e-01)); + r += mul(s6_4, M4(-2.062e-01, -1.078e-01, -1.528e-01, -2.963e-01, 3.182e-02, -8.465e-02, -3.658e-01, 8.401e-02, -4.927e-02, -1.083e-01, -6.493e-02, -3.408e-01, 2.211e-01, 1.792e-01, 5.270e-02, -1.743e-01)); + r += mul(s6_5, M4(7.843e-02, -6.175e-02, -2.934e-01, 2.443e-01, -1.198e-01, -1.715e-01, -7.102e-03, 2.675e-02, 6.839e-02, 2.838e-01, -6.958e-02, 4.742e-02, 2.483e-01, -8.091e-03, -6.271e-02, 5.051e-03)); + r += mul(s6_6, M4(4.133e-02, -6.723e-02, -1.961e-02, 2.190e-02, 5.627e-03, -4.316e-02, -3.063e-02, 1.018e-01, -8.854e-02, 8.450e-02, 3.357e-02, 1.446e-01, -1.041e-01, -1.390e-01, 8.766e-02, 8.002e-02)); + r += mul(s6_7, M4(-1.550e-01, 1.802e-01, -3.769e-02, -2.384e-01, 2.138e-02, -1.589e-02, 6.241e-02, 1.630e-01, -4.882e-02, 1.133e-01, -1.028e-01, -4.794e-02, -1.160e-01, 2.888e-01, 1.580e-01, -9.242e-02)); + r += mul(s6_8, M4(8.358e-02, -1.748e-01, -1.645e-01, 2.587e-02, -1.026e-02, 1.733e-01, 1.958e-01, 1.793e-02, -4.920e-02, -6.777e-03, 3.666e-02, 3.048e-02, 1.459e-02, -1.088e-01, 7.087e-02, -1.663e-01)); + r += mul(s7_0, M4(5.895e-02, -4.794e-02, 1.086e-01, -7.724e-02, -5.413e-02, 3.616e-02, -2.307e-01, -4.889e-02, -1.410e-01, -2.301e-02, 8.944e-02, -5.812e-02, 4.765e-02, 2.087e-03, 3.673e-02, -8.790e-02)); + r += mul(s7_1, M4(9.140e-02, 1.553e-01, -7.140e-02, 7.401e-02, -2.839e-01, 2.154e-02, -1.562e-01, 1.315e-01, -1.543e-01, 8.124e-02, 1.522e-01, 4.624e-02, 9.527e-02, 1.319e-01, -1.317e-01, 2.476e-02)); + r += mul(s7_2, M4(7.932e-02, -3.181e-02, 7.362e-02, 5.767e-02, -9.275e-02, -1.133e-01, -8.605e-02, 5.333e-02, 7.329e-02, -1.955e-01, 4.367e-02, -6.820e-02, 4.126e-02, 3.145e-02, -4.424e-02, -4.117e-02)); + r += mul(s7_3, M4(-2.246e-01, -9.220e-02, -7.055e-02, 7.230e-02, 7.646e-04, 6.877e-02, 1.218e-01, 2.049e-01, -2.007e-01, -9.204e-02, -6.821e-03, 2.486e-03, -2.154e-01, 6.851e-02, 2.000e-01, -1.530e-01)); + r += mul(s7_4, M4(-1.474e-01, 1.235e-01, 1.286e-01, -8.364e-02, 3.537e-01, -2.301e-01, 7.560e-02, -1.851e-01, 2.242e-01, -1.849e-01, 2.154e-01, 2.809e-01, 1.367e-02, 1.298e-01, -1.446e-01, 2.667e-01)); + r += mul(s7_5, M4(-6.607e-04, 1.046e-01, -2.800e-02, 1.565e-01, 7.318e-02, -1.494e-01, -6.101e-02, -1.297e-01, 5.198e-02, 1.059e-01, 1.264e-01, -2.130e-02, -1.369e-01, -1.306e-01, 1.246e-02, 1.328e-01)); + r += mul(s7_6, M4(1.957e-02, 5.465e-02, 7.663e-02, 3.571e-02, 2.113e-02, 6.583e-02, 7.021e-03, 2.210e-01, -6.460e-02, -1.017e-01, 7.844e-02, -5.866e-02, 5.942e-02, -1.024e-01, -7.892e-02, -1.215e-02)); + r += mul(s7_7, M4(6.379e-02, 2.692e-02, 3.743e-02, 1.136e-01, 7.757e-02, 2.353e-02, 5.736e-02, 1.397e-01, 1.707e-02, -3.405e-02, 1.132e-02, -3.412e-02, 1.058e-01, -2.043e-02, -1.351e-01, 8.975e-02)); + r += mul(s7_8, M4(3.588e-02, 4.297e-02, 4.129e-02, -1.757e-02, 6.987e-02, -7.156e-02, -2.011e-02, -2.481e-02, -3.291e-03, -5.477e-02, 4.980e-02, -1.382e-01, 9.755e-02, 5.748e-02, 1.447e-01, -4.677e-02)); + r += V4(-5.067e-03, 2.460e-02, -2.325e-02, 1.200e-02); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(5.937e-02, -2.293e-02, 4.463e-02, 1.063e-01, 4.986e-02, -1.206e-01, 3.787e-02, -2.935e-02, -2.059e-02, -1.142e-01, 2.708e-02, 6.391e-02, -6.658e-02, 6.910e-02, 4.432e-02, -9.706e-03)); + r += mul(s0_1, M4(8.342e-02, 9.988e-02, -8.872e-02, -1.077e-01, 8.941e-02, -6.181e-02, 2.197e-01, -1.389e-01, -7.792e-02, 1.116e-02, 2.648e-02, 1.142e-01, -5.875e-02, 1.048e-01, -2.993e-01, 1.287e-01)); + r += mul(s0_2, M4(8.515e-03, -6.301e-02, -3.246e-02, -7.302e-02, -4.694e-02, -5.162e-02, 3.283e-02, 1.284e-01, 3.292e-02, 3.917e-03, 4.455e-04, 3.902e-02, 1.077e-01, 7.922e-02, 7.547e-02, 1.984e-01)); + r += mul(s0_3, M4(5.984e-02, -1.123e-01, 5.092e-03, 7.493e-02, 8.852e-03, 1.073e-01, -4.862e-04, -1.227e-01, 6.467e-02, 2.370e-01, 2.201e-02, 4.920e-02, 1.976e-02, 2.423e-01, 8.258e-02, -2.042e-01)); + r += mul(s0_4, M4(1.104e-01, -1.184e-02, 4.333e-02, 1.280e-01, -9.092e-02, 4.953e-03, 5.800e-02, -2.968e-02, 1.006e-01, 2.226e-01, 6.584e-02, -3.137e-01, 3.614e-02, -1.852e-01, 8.170e-02, -3.669e-02)); + r += mul(s0_5, M4(9.177e-03, 3.203e-02, 1.440e-01, -1.248e-01, 3.477e-02, -3.008e-03, -5.599e-02, 9.376e-02, -4.932e-02, -2.787e-02, 5.835e-02, -1.218e-01, 2.021e-02, -1.881e-01, -2.538e-02, -2.028e-01)); + r += mul(s0_6, M4(1.072e-01, 2.250e-01, 5.476e-02, 6.869e-02, -6.962e-02, 7.225e-02, 5.069e-02, 5.585e-04, -1.927e-02, -1.290e-01, -3.297e-03, 7.809e-02, 1.727e-01, -1.283e-01, 6.731e-03, 1.033e-01)); + r += mul(s0_7, M4(4.805e-02, 2.181e-02, 9.102e-02, -1.635e-01, -3.383e-02, 4.860e-02, -7.892e-02, 1.356e-02, -4.804e-02, 1.178e-01, 3.489e-02, 1.040e-02, 4.216e-02, 2.385e-02, 1.314e-02, -9.646e-02)); + r += mul(s0_8, M4(4.335e-02, -2.793e-04, 1.483e-02, 3.358e-02, 3.870e-03, -7.282e-02, -4.608e-02, 1.967e-01, -9.398e-02, -1.592e-03, 5.984e-02, -5.951e-02, -6.416e-02, 4.254e-02, -1.406e-02, -6.531e-02)); + r += mul(s1_0, M4(-6.368e-02, -5.346e-02, -1.291e-01, 6.706e-02, -6.380e-03, 5.830e-02, 1.084e-01, 1.159e-01, -4.290e-02, -2.932e-01, -1.320e-01, 3.104e-01, -5.508e-02, -5.885e-02, 1.250e-01, -6.253e-02)); + r += mul(s1_1, M4(-5.418e-02, 9.444e-02, -1.881e-01, -7.574e-02, 1.423e-02, -2.660e-02, 5.467e-02, -7.591e-02, -5.805e-02, 3.760e-01, 1.647e-01, -1.093e-02, -9.242e-02, -4.123e-02, 8.769e-02, -4.475e-02)); + r += mul(s1_2, M4(-4.797e-02, -4.127e-02, -7.547e-02, -8.719e-02, -6.771e-02, 3.789e-02, 3.921e-02, -1.768e-02, 3.238e-02, 1.440e-01, 6.898e-02, -7.619e-02, 2.587e-02, -2.627e-02, -8.274e-02, -7.502e-02)); + r += mul(s1_3, M4(-1.377e-01, 1.457e-02, -1.122e-03, 8.004e-03, 4.097e-02, -5.827e-02, -2.435e-02, -1.067e-01, -5.614e-02, 3.039e-01, -2.157e-01, 1.790e-02, -6.650e-02, 4.609e-02, -1.470e-02, 8.873e-02)); + r += mul(s1_4, M4(3.185e-02, -8.179e-02, -3.400e-03, 6.147e-02, -5.562e-02, -2.330e-02, -1.119e-01, 3.381e-02, 1.271e-01, 7.784e-01, 8.042e-02, -7.207e-01, -1.046e-02, -9.352e-02, -7.585e-02, 8.128e-02)); + r += mul(s1_5, M4(-1.194e-01, -3.287e-03, -3.034e-02, 9.041e-03, 3.797e-02, -4.496e-02, -5.645e-02, 1.959e-01, -7.990e-02, -1.278e-01, -1.153e-01, 3.814e-02, -4.159e-02, 1.539e-01, 9.543e-02, 4.509e-03)); + r += mul(s1_6, M4(-4.600e-02, 9.363e-02, 1.716e-02, -1.629e-02, -8.715e-02, 1.116e-01, 1.500e-02, -4.391e-02, 5.248e-02, -2.408e-01, 2.500e-02, -1.324e-01, 3.408e-02, -6.700e-02, -6.231e-02, 1.441e-01)); + r += mul(s1_7, M4(6.006e-02, -5.199e-02, 7.669e-02, -3.729e-02, 9.547e-02, -1.554e-01, -8.385e-02, 3.733e-02, -7.727e-02, 3.808e-03, 8.038e-02, 1.458e-01, -7.448e-02, 2.456e-01, 8.873e-02, 2.466e-02)); + r += mul(s1_8, M4(-6.046e-02, 5.374e-02, -4.915e-02, -1.280e-02, 4.251e-02, -2.449e-02, -1.484e-02, -6.294e-02, -1.233e-01, -1.737e-01, 1.045e-01, -7.113e-02, -4.748e-02, 9.857e-02, 3.051e-02, 7.290e-02)); + r += mul(s2_0, M4(3.988e-02, 1.215e-01, 2.073e-02, 8.147e-02, -1.999e-01, -9.151e-02, 7.208e-02, 7.089e-02, -1.319e-01, -9.644e-02, -6.533e-02, -2.290e-01, -1.754e-02, 1.557e-02, 1.087e-01, -3.825e-02)); + r += mul(s2_1, M4(3.377e-02, 8.423e-02, 1.295e-01, -5.479e-02, 3.646e-03, 9.721e-03, -9.836e-02, 3.895e-02, -1.662e-01, 5.533e-02, -4.808e-01, -6.657e-02, 2.083e-02, -4.455e-02, -3.605e-02, -5.959e-02)); + r += mul(s2_2, M4(2.542e-02, -1.808e-01, -9.664e-02, 2.839e-02, -1.618e-01, -1.337e-01, -1.843e-02, 1.430e-01, 2.806e-02, 1.548e-02, 9.068e-02, -7.111e-03, 1.574e-02, -5.401e-02, -1.237e-01, 1.971e-01)); + r += mul(s2_3, M4(5.944e-03, 2.828e-02, 4.389e-03, 1.235e-01, -8.884e-02, 1.212e-01, 2.049e-01, 1.114e-01, -6.525e-02, 1.918e-01, 2.489e-01, 2.571e-01, 2.687e-02, 1.020e-01, 1.152e-01, 6.966e-02)); + r += mul(s2_4, M4(4.975e-02, 9.571e-02, 6.714e-02, -5.031e-02, 5.163e-03, 1.960e-01, -2.494e-01, 2.110e-02, 1.925e-01, 8.133e-02, -1.473e-02, 2.836e-01, 1.063e-01, -3.164e-01, 8.172e-02, 1.662e-02)); + r += mul(s2_5, M4(-2.252e-02, -1.249e-01, -6.242e-02, 9.623e-02, -7.664e-03, -7.865e-02, -4.791e-02, 1.493e-02, -5.184e-02, -1.797e-01, -1.056e-01, 9.278e-03, 5.084e-02, 7.665e-03, -4.054e-02, 1.255e-01)); + r += mul(s2_6, M4(3.234e-02, -6.118e-02, -1.704e-02, -3.087e-02, 1.263e-01, 2.389e-01, 7.382e-02, 9.601e-02, 5.484e-02, -6.373e-02, -1.740e-02, -1.325e-01, -4.036e-02, -8.706e-02, -2.694e-02, -6.775e-03)); + r += mul(s2_7, M4(2.323e-02, -2.030e-01, 1.767e-02, -4.558e-02, -1.615e-01, 9.983e-02, 9.543e-02, 6.072e-03, 1.758e-01, -1.039e-01, -1.035e-01, 7.006e-02, -8.250e-02, -5.464e-02, 8.563e-02, -1.959e-01)); + r += mul(s2_8, M4(2.464e-02, 6.963e-02, 1.998e-02, -2.792e-02, 1.232e-02, -1.589e-01, 2.308e-01, -9.732e-02, 6.288e-02, -2.257e-01, 6.418e-02, -4.960e-02, -1.244e-01, -7.443e-03, -1.028e-01, 6.149e-02)); + r += mul(s3_0, M4(-6.824e-02, 6.319e-02, 2.871e-02, -1.192e-02, -6.657e-02, 8.899e-02, 7.591e-03, -9.148e-02, 7.225e-02, -1.377e-01, -1.220e-01, 4.634e-02, 2.185e-02, 5.536e-02, -4.606e-02, -5.407e-02)); + r += mul(s3_1, M4(-8.597e-02, 6.928e-02, 1.251e-01, 5.169e-03, 2.121e-02, 7.393e-02, -7.205e-03, -1.571e-01, -1.590e-01, -2.689e-02, 9.329e-02, 6.116e-03, -7.495e-02, -6.854e-02, -4.035e-02, -1.580e-03)); + r += mul(s3_2, M4(4.135e-02, 1.288e-01, -3.735e-01, -2.938e-01, 3.338e-03, -3.670e-02, 2.704e-02, -2.551e-02, -4.303e-03, -4.822e-02, 1.012e-01, -2.797e-02, -9.120e-03, 3.496e-02, -1.207e-01, 5.322e-02)); + r += mul(s3_3, M4(-2.273e-02, 7.031e-03, -2.290e-01, 5.618e-02, -3.347e-02, 7.457e-02, 1.635e-01, 7.182e-02, 2.456e-01, 1.405e-02, -1.609e-01, 4.749e-02, 4.866e-03, -6.041e-02, -5.774e-02, -3.683e-02)); + r += mul(s3_4, M4(4.012e-02, -2.310e-01, -1.690e-01, 1.429e-01, -1.200e-02, 5.024e-04, -9.359e-02, -1.916e-02, 1.757e-01, 1.968e-02, -1.216e-02, 1.541e-01, 8.348e-02, -4.863e-02, 3.573e-02, -2.066e-03)); + r += mul(s3_5, M4(-6.490e-02, 3.889e-01, 5.566e-02, 2.052e-01, 1.422e-02, -1.181e-01, -6.391e-02, 1.733e-01, -1.568e-02, 2.046e-01, -1.073e-02, -7.929e-03, 1.964e-02, -6.834e-02, 1.291e-01, -1.093e-01)); + r += mul(s3_6, M4(-7.826e-02, -6.393e-02, -4.312e-02, -5.907e-02, -1.805e-02, 3.583e-02, 4.076e-02, 4.257e-03, 1.829e-02, 4.131e-02, -4.771e-02, 4.777e-02, -1.655e-01, 4.371e-02, -2.452e-02, -3.887e-03)); + r += mul(s3_7, M4(9.980e-02, 4.313e-02, -6.232e-02, 1.371e-02, -6.087e-02, 5.576e-04, -3.771e-02, -1.848e-02, 1.194e-01, 5.277e-02, -7.765e-03, -1.474e-02, -1.705e-01, 6.577e-02, -4.634e-02, -8.420e-02)); + r += mul(s3_8, M4(7.784e-02, -4.871e-02, 3.519e-02, -1.021e-01, 9.847e-02, -7.194e-02, 7.018e-02, 2.838e-02, -1.022e-01, 7.930e-02, 9.779e-03, 9.524e-02, -5.524e-02, -1.703e-01, 1.328e-02, -4.709e-02)); + r += mul(s4_0, M4(6.635e-02, 1.713e-01, 6.315e-04, -5.086e-02, 1.460e-02, 6.128e-02, -4.817e-02, -1.842e-03, 3.093e-02, 6.817e-02, 1.114e-01, -6.893e-02, 4.283e-02, 1.032e-01, -5.799e-02, -1.508e-01)); + r += mul(s4_1, M4(3.042e-02, 1.621e-01, 2.273e-01, 1.352e-02, 2.633e-02, -1.172e-01, -1.457e-01, 5.422e-02, 6.442e-02, -2.030e-01, 2.280e-01, -7.099e-02, -1.303e-02, 7.623e-02, -2.583e-01, -1.344e-01)); + r += mul(s4_2, M4(-3.789e-02, -2.870e-02, 4.769e-02, -3.381e-02, -2.221e-02, -8.522e-02, -2.436e-01, -3.402e-02, 8.091e-02, 5.432e-02, -6.819e-02, 2.341e-02, -3.246e-02, 6.236e-02, 2.118e-02, -1.794e-01)); + r += mul(s4_3, M4(6.806e-02, -6.803e-02, -2.788e-02, 1.043e-01, 9.637e-02, -1.210e-01, -3.798e-02, -6.496e-02, -3.167e-02, 4.748e-02, 5.062e-02, 1.399e-01, 2.309e-02, -1.467e-02, -3.349e-01, -3.838e-01)); + r += mul(s4_4, M4(-8.276e-02, 3.518e-02, -2.816e-02, -1.163e-01, 1.189e-01, 2.213e-01, 2.881e-01, 8.944e-02, 1.077e-01, 5.187e-02, 6.710e-02, -2.642e-02, 4.845e-03, 2.246e-01, 1.476e-01, 1.532e-01)); + r += mul(s4_5, M4(3.816e-02, -9.649e-02, 7.975e-02, -1.843e-01, -5.197e-02, -6.506e-02, 6.936e-02, -1.033e-01, 1.675e-01, 5.399e-03, 8.398e-02, 6.698e-02, 2.103e-02, -1.023e-02, -2.291e-02, 6.593e-02)); + r += mul(s4_6, M4(-2.078e-02, 4.519e-02, -4.727e-02, -4.325e-02, 8.269e-02, -1.692e-01, -1.491e-03, -1.700e-02, 3.600e-02, -6.412e-02, 8.008e-03, -7.571e-02, -3.149e-02, -1.032e-01, 1.922e-02, -1.210e-01)); + r += mul(s4_7, M4(1.219e-01, -7.195e-02, 3.661e-02, 1.487e-01, -9.657e-02, 1.968e-01, -3.820e-02, 1.134e-01, 2.622e-02, -3.342e-03, 9.060e-02, 7.066e-02, -2.124e-02, 2.729e-01, 6.725e-02, -1.748e-01)); + r += mul(s4_8, M4(6.273e-02, -1.209e-01, 7.117e-02, 2.641e-02, 2.375e-01, 1.264e-01, 2.762e-01, -2.528e-01, 2.499e-02, 6.918e-02, 1.109e-01, -5.336e-02, -3.088e-02, -1.585e-01, -1.852e-03, 9.760e-02)); + r += mul(s5_0, M4(1.189e-01, -1.113e-01, 2.007e-01, -1.077e-01, -1.777e-04, 8.058e-02, 2.036e-02, 5.560e-02, 8.056e-04, 1.197e-01, 2.565e-02, 1.867e-01, 2.823e-02, 2.924e-02, 2.840e-01, 7.127e-02)); + r += mul(s5_1, M4(-2.611e-03, 1.372e-01, -2.056e-01, -9.923e-03, -4.385e-03, -1.626e-01, -8.826e-02, 3.440e-02, -7.744e-02, 4.044e-02, 1.851e-01, 2.742e-01, 1.524e-02, 4.314e-03, 2.586e-02, 4.220e-02)); + r += mul(s5_2, M4(-5.841e-02, -3.703e-02, -1.729e-01, -1.789e-01, 5.015e-02, 3.310e-02, -5.206e-02, 8.799e-02, -5.436e-02, -2.004e-01, 6.269e-02, -1.548e-01, 5.159e-03, -3.037e-02, 3.914e-03, 1.735e-02)); + r += mul(s5_3, M4(4.642e-02, 1.776e-02, 2.240e-02, 1.530e-01, -1.358e-02, -1.500e-01, -5.521e-02, -3.159e-02, -5.773e-02, -9.288e-02, -1.132e-01, 8.716e-02, 3.616e-02, 7.593e-02, 8.513e-02, 1.642e-01)); + r += mul(s5_4, M4(-3.231e-02, 9.385e-02, 3.161e-02, -9.241e-02, 1.490e-01, 5.756e-02, -6.718e-03, -1.243e-01, -5.694e-02, -3.817e-02, -1.343e-01, 4.011e-01, 6.475e-02, -8.970e-02, 2.720e-01, -8.860e-02)); + r += mul(s5_5, M4(9.055e-04, -1.747e-01, 7.801e-02, 3.477e-01, 7.295e-02, -7.419e-03, -2.549e-01, 2.997e-01, 2.373e-03, -1.371e-01, -1.807e-01, -1.003e-01, -6.018e-02, -8.462e-03, -3.585e-02, 8.651e-02)); + r += mul(s5_6, M4(-4.387e-02, -1.482e-01, 4.883e-02, -7.088e-02, -4.485e-02, 1.373e-01, 3.773e-02, -3.221e-02, -5.640e-02, -5.259e-02, 5.393e-03, -2.579e-03, -2.197e-01, -8.668e-02, -7.545e-03, 8.670e-02)); + r += mul(s5_7, M4(-4.584e-02, -3.105e-02, -4.795e-02, 7.210e-02, -5.926e-02, -2.145e-01, -8.820e-02, 7.103e-02, 7.993e-02, -1.930e-01, -1.305e-01, 8.054e-02, -1.110e-01, -1.978e-01, -8.814e-02, -4.387e-02)); + r += mul(s5_8, M4(1.532e-01, -6.750e-02, 2.989e-04, -2.344e-02, -9.692e-02, -6.528e-02, 2.085e-01, 1.146e-01, -1.238e-01, -8.914e-02, -7.088e-02, 7.841e-02, -2.884e-02, 1.230e-01, -4.543e-02, -1.687e-02)); + r += mul(s6_0, M4(3.081e-03, 2.668e-02, 3.257e-02, 9.723e-02, 5.252e-02, 2.582e-02, -5.727e-02, -9.103e-02, -7.886e-02, -1.317e-01, 4.829e-02, -5.289e-02, -7.629e-02, -3.883e-02, 9.692e-02, -2.103e-02)); + r += mul(s6_1, M4(2.955e-03, 6.440e-02, -2.578e-01, -1.746e-01, -4.254e-02, -1.108e-02, 1.443e-01, 1.771e-01, -1.395e-01, -3.343e-01, 6.528e-02, -1.191e-01, -9.412e-02, 1.384e-01, 8.339e-02, -1.644e-01)); + r += mul(s6_2, M4(2.409e-02, -8.311e-02, -1.283e-01, -3.643e-03, 5.327e-02, 1.244e-01, -8.937e-02, -1.428e-01, 1.809e-02, -3.257e-02, -2.078e-02, 1.377e-01, -1.504e-01, -6.163e-02, -1.606e-01, -1.277e-01)); + r += mul(s6_3, M4(-1.773e-01, -2.236e-01, 1.885e-01, 9.712e-03, -1.424e-01, 3.719e-02, -9.438e-02, -8.754e-02, -1.713e-01, -1.251e-01, -1.434e-01, 8.788e-02, -1.422e-01, -8.237e-02, 7.846e-02, -5.479e-02)); + r += mul(s6_4, M4(-3.731e-02, -1.801e-01, -1.077e-01, 1.602e-03, 3.596e-03, -1.675e-01, -1.324e-02, 1.178e-01, -7.492e-01, -5.005e-02, -1.284e-01, -2.411e-01, -1.177e-01, 4.184e-02, 1.022e-01, -1.371e-02)); + r += mul(s6_5, M4(-6.394e-02, 5.309e-02, -2.414e-02, 2.214e-01, -2.043e-02, 9.590e-04, -3.341e-03, -8.743e-02, -2.566e-02, 1.464e-01, -1.183e-01, 1.416e-01, -1.272e-01, -8.216e-02, 1.574e-01, 1.021e-01)); + r += mul(s6_6, M4(-2.556e-01, 4.513e-01, 1.047e-01, 1.659e-01, -5.061e-02, -3.856e-02, 9.099e-03, 1.066e-01, 9.161e-03, 2.961e-02, -4.449e-02, 2.394e-02, -7.822e-02, 3.447e-02, 5.728e-02, -7.670e-02)); + r += mul(s6_7, M4(7.941e-02, 7.430e-02, 2.056e-02, -2.944e-03, -1.515e-02, 2.618e-02, 5.338e-02, 9.276e-02, -9.431e-02, 2.459e-02, 4.625e-02, -5.752e-02, -2.353e-01, -4.392e-02, 1.227e-01, -1.196e-01)); + r += mul(s6_8, M4(-1.129e-01, 1.712e-01, -6.834e-02, 3.321e-01, 4.220e-02, 8.974e-02, -2.602e-03, -5.431e-02, -1.216e-02, 1.084e-02, -8.162e-02, 1.976e-01, -4.058e-02, 1.933e-01, 6.706e-02, -7.726e-02)); + r += mul(s7_0, M4(5.551e-02, -3.296e-02, -2.882e-02, 5.526e-02, 2.951e-02, -1.036e-01, -1.112e-01, -8.518e-02, 1.286e-01, 7.652e-02, 1.076e-01, -7.867e-02, 3.029e-02, -8.501e-02, 1.274e-01, -1.497e-02)); + r += mul(s7_1, M4(5.751e-02, -1.738e-02, -7.516e-02, 1.059e-01, -6.178e-02, -1.163e-01, 1.264e-01, -1.363e-01, 1.472e-01, 9.282e-02, 4.998e-01, 2.662e-01, 4.779e-02, 5.058e-02, -5.293e-01, -4.943e-02)); + r += mul(s7_2, M4(-1.004e-02, -2.006e-02, 3.513e-02, 8.537e-02, -3.947e-02, -2.097e-02, -1.330e-01, -1.661e-01, 6.878e-02, -1.212e-01, 1.790e-01, 1.831e-01, -5.628e-02, 1.959e-03, 2.541e-01, 2.014e-02)); + r += mul(s7_3, M4(1.014e-02, 8.258e-02, -5.835e-02, 2.635e-03, -8.298e-03, -8.856e-02, -3.623e-02, -1.395e-02, 1.996e-01, 1.699e-01, 4.473e-02, -2.008e-02, 8.142e-02, 1.458e-01, 9.426e-02, -1.071e-01)); + r += mul(s7_4, M4(-3.228e-02, -1.110e-01, 1.505e-02, -6.655e-02, 7.545e-02, -1.883e-01, -1.282e-01, -3.508e-03, -2.321e-02, -3.186e-02, 2.323e-01, -1.491e-01, 2.796e-01, 6.684e-02, 3.796e-02, 1.500e-01)); + r += mul(s7_5, M4(1.068e-02, -1.265e-01, -1.353e-01, -9.899e-02, 4.629e-02, -1.067e-01, 1.070e-01, 9.997e-02, -3.634e-02, -1.548e-01, 1.127e-01, -3.369e-01, -8.821e-02, 1.451e-02, -6.364e-02, -2.183e-01)); + r += mul(s7_6, M4(2.495e-03, -6.878e-02, -3.258e-02, -5.085e-02, 7.895e-02, -7.889e-02, -4.088e-02, 1.297e-01, 5.189e-02, 9.643e-02, -1.326e-02, -2.523e-02, -5.041e-03, -7.283e-02, 1.669e-02, 1.419e-02)); + r += mul(s7_7, M4(1.606e-01, -1.207e-01, 5.464e-02, 2.323e-02, 6.808e-02, 1.081e-01, 4.238e-02, -6.394e-02, 4.764e-02, -7.067e-02, 4.302e-02, -5.596e-02, -6.000e-02, -2.220e-01, -3.503e-02, 9.465e-02)); + r += mul(s7_8, M4(9.846e-03, 1.402e-01, -1.452e-02, -7.531e-02, 6.204e-02, 1.329e-01, 1.072e-02, -2.937e-02, 3.163e-02, 1.031e-01, -1.080e-03, -1.216e-01, -6.041e-03, -2.592e-02, -7.239e-03, -7.482e-02)); + r += V4(2.168e-02, -1.996e-02, -1.898e-02, 1.578e-02); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 5 +//!DESC conv4 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.786e-02, -3.112e-02, 2.629e-02, 2.290e-02, 4.508e-02, -3.219e-02, -8.563e-02, -1.302e-01, -1.291e-02, 2.642e-02, 1.880e-01, 5.598e-02, -4.183e-02, 1.934e-02, 4.311e-02, -2.900e-02)); + r += mul(s0_1, M4(1.442e-02, 4.047e-02, -6.616e-02, 3.778e-02, -7.813e-02, -6.908e-02, -7.353e-02, -3.166e-02, -1.249e-01, 2.744e-02, -2.803e-02, 1.487e-02, 9.480e-02, -5.564e-02, -3.953e-02, -3.077e-01)); + r += mul(s0_2, M4(-1.819e-01, 4.849e-04, 8.099e-02, -8.160e-02, 2.268e-01, -5.019e-02, -2.439e-01, 1.967e-02, -1.063e-01, 9.971e-02, -2.644e-02, 6.571e-02, 1.156e-01, -1.429e-01, 1.537e-02, 1.328e-03)); + r += mul(s0_3, M4(1.714e-01, -1.112e-02, -3.325e-02, 1.182e-01, 1.532e-02, -4.505e-02, -5.494e-02, 1.101e-01, 4.073e-01, 3.473e-02, 9.761e-02, 3.505e-01, -8.731e-02, -7.174e-02, -2.050e-01, 1.128e-01)); + r += mul(s0_4, M4(4.445e-02, -3.808e-02, 5.439e-02, 3.406e-02, 5.838e-02, 3.611e-02, 6.767e-02, 7.023e-02, -4.706e-03, -3.040e-02, -1.740e-02, -6.775e-02, -2.607e-01, 6.729e-03, 8.469e-02, -1.808e-02)); + r += mul(s0_5, M4(1.413e-03, 4.123e-02, -4.701e-02, 9.418e-02, -3.279e-02, -2.479e-02, -2.107e-01, 1.633e-01, 9.501e-02, 8.430e-02, 4.720e-02, -1.022e-01, -1.958e-01, -1.459e-02, 2.013e-02, -5.798e-02)); + r += mul(s0_6, M4(3.062e-02, -7.145e-02, -7.975e-02, 6.689e-02, -2.102e-02, 4.818e-02, -3.446e-02, -2.829e-02, 1.792e-01, 7.161e-03, -1.020e-01, -6.972e-02, -6.039e-02, 7.220e-03, -5.098e-03, 4.861e-02)); + r += mul(s0_7, M4(6.546e-02, 1.474e-02, 9.940e-02, 3.281e-02, -4.283e-02, 7.346e-02, -3.103e-02, -1.691e-02, -5.183e-02, -1.116e-02, 1.681e-02, -1.291e-01, -4.107e-03, 6.317e-02, 1.915e-01, -2.165e-01)); + r += mul(s0_8, M4(-9.153e-02, -6.225e-02, 6.091e-02, 7.788e-03, 3.638e-03, -6.062e-04, -4.613e-02, 2.878e-02, 1.371e-02, 5.369e-02, 4.774e-02, 7.444e-03, -8.460e-02, 1.729e-02, 4.011e-02, 4.154e-03)); + r += mul(s1_0, M4(9.022e-02, -1.628e-02, 1.448e-01, -1.261e-02, 1.103e-01, -1.530e-02, 2.661e-01, 1.835e-01, -2.674e-02, 2.139e-02, 2.871e-02, 1.505e-01, -4.545e-02, 7.065e-04, -1.055e-01, -1.179e-01)); + r += mul(s1_1, M4(1.442e-01, -5.332e-02, -2.574e-01, 1.344e-02, 2.326e-01, -4.114e-02, -2.524e-01, 6.122e-02, 2.069e-01, -1.513e-01, -1.736e-01, -1.241e-02, -3.533e-03, 5.831e-02, 4.833e-03, 5.475e-02)); + r += mul(s1_2, M4(1.091e-01, -8.201e-02, 1.823e-02, -3.055e-02, -5.081e-02, -7.545e-02, -1.518e-01, 2.404e-02, 3.938e-02, -2.439e-02, -2.380e-02, 1.095e-01, -6.177e-02, 9.110e-02, 4.643e-02, 7.123e-02)); + r += mul(s1_3, M4(-1.171e-01, 5.602e-02, 1.115e-01, 4.051e-02, -6.017e-02, 9.896e-03, 6.392e-02, -2.940e-02, -3.572e-01, 7.977e-02, -8.511e-02, 9.054e-02, 7.819e-02, -8.478e-02, -6.579e-02, 8.911e-02)); + r += mul(s1_4, M4(9.697e-02, -1.022e-01, -2.703e-01, -1.566e-01, -1.255e-01, -1.421e-01, -3.423e-01, 9.287e-02, -2.351e-01, -5.762e-01, 5.198e-02, -2.300e-01, -2.060e-01, -5.823e-02, -1.229e-01, 1.165e-01)); + r += mul(s1_5, M4(1.825e-01, 7.774e-02, -1.016e-01, -2.725e-02, -7.310e-02, 6.078e-02, 2.814e-01, 1.973e-02, 9.262e-02, -1.498e-01, -2.977e-02, -1.005e-01, -4.163e-02, -6.892e-02, -4.553e-02, -1.923e-02)); + r += mul(s1_6, M4(-8.986e-02, -2.023e-02, -5.516e-03, 8.968e-02, 8.664e-02, -4.974e-02, -1.499e-01, 7.626e-02, 6.857e-02, -4.699e-02, 1.069e-01, -6.722e-02, 6.621e-02, -3.576e-02, -2.946e-02, 7.705e-02)); + r += mul(s1_7, M4(-1.015e-02, -2.559e-02, -8.076e-02, 1.014e-01, 5.824e-02, 1.264e-01, 8.622e-03, 7.740e-02, -1.513e-01, -1.809e-01, -2.511e-02, -1.863e-01, 9.051e-02, 5.982e-02, 1.576e-01, 2.685e-03)); + r += mul(s1_8, M4(8.626e-02, 2.226e-02, 1.986e-01, 1.037e-01, 1.898e-01, 2.340e-02, 1.760e-01, 4.621e-02, 3.388e-02, -3.321e-02, -1.556e-02, 1.179e-01, -7.777e-02, -1.524e-02, 9.254e-02, 6.025e-02)); + r += mul(s2_0, M4(3.083e-02, 3.455e-02, 7.274e-02, -3.024e-01, -9.245e-02, -1.497e-02, -2.403e-02, -1.094e-01, 6.849e-03, 2.399e-02, -6.319e-02, 1.405e-01, -5.364e-02, 6.548e-02, 6.240e-02, -2.531e-03)); + r += mul(s2_1, M4(-1.212e-01, 5.625e-04, -3.083e-01, -1.114e-01, -5.217e-03, 1.887e-02, 2.076e-01, -1.734e-02, -1.215e-01, 5.826e-02, -2.346e-02, 1.103e-01, -1.194e-01, -5.841e-02, -1.227e-02, -2.174e-02)); + r += mul(s2_2, M4(-3.621e-02, -7.684e-02, -9.088e-04, 1.813e-02, -1.315e-01, -2.461e-04, 1.060e-03, 3.393e-03, -7.380e-02, 1.814e-02, -2.801e-01, 1.468e-01, 1.288e-01, -4.260e-02, -1.871e-02, -5.461e-02)); + r += mul(s2_3, M4(5.318e-02, 6.631e-03, -9.817e-02, -2.529e-01, 2.421e-02, 2.086e-02, 3.605e-02, 8.178e-02, 4.743e-02, 9.302e-02, 4.339e-02, -7.637e-02, -2.042e-01, -9.427e-03, -4.775e-02, -1.017e-01)); + r += mul(s2_4, M4(-1.250e-01, -5.397e-02, -6.126e-02, -1.112e-01, 9.591e-02, 1.181e-01, -2.184e-01, 4.689e-02, 3.767e-01, -1.545e-01, -4.446e-02, 2.062e-01, -9.482e-02, 6.146e-03, -3.114e-01, -1.503e-01)); + r += mul(s2_5, M4(-2.206e-01, -5.046e-02, -4.007e-02, -4.498e-02, -1.744e-01, -3.015e-02, -4.830e-02, 1.551e-03, 1.089e-01, 4.379e-02, -4.175e-02, -5.496e-02, -1.987e-02, -4.003e-02, 2.631e-02, 1.678e-03)); + r += mul(s2_6, M4(5.501e-02, -2.809e-03, -3.428e-02, 1.954e-02, -1.177e-02, -1.439e-02, 1.093e-01, -1.743e-01, 1.530e-01, -1.372e-01, -3.701e-01, -1.271e-01, -1.260e-02, 4.581e-02, 4.199e-02, -2.764e-02)); + r += mul(s2_7, M4(-2.162e-02, -2.844e-04, 1.154e-01, -8.801e-02, -1.666e-01, -1.124e-01, -1.474e-01, -1.242e-02, 2.482e-01, 1.539e-01, -1.349e-02, -8.576e-02, 1.625e-03, 7.734e-02, -3.190e-02, -1.014e-01)); + r += mul(s2_8, M4(-8.863e-02, 6.740e-02, 2.618e-02, 6.910e-02, 8.198e-03, -6.465e-02, 8.453e-02, -1.630e-01, 1.622e-01, -3.858e-02, -3.013e-02, 7.943e-02, 5.486e-02, 1.231e-02, 5.551e-02, 1.091e-01)); + r += mul(s3_0, M4(-4.926e-02, -5.191e-03, 4.592e-03, 2.580e-03, -5.176e-02, 2.099e-02, -7.537e-02, -5.804e-04, 5.594e-03, 1.473e-02, 1.102e-01, -4.678e-02, 1.494e-01, -2.218e-02, 2.460e-02, -5.700e-02)); + r += mul(s3_1, M4(-8.926e-02, -5.994e-02, -9.126e-03, 5.391e-02, 1.378e-01, 1.110e-02, -3.565e-02, -1.239e-01, -8.361e-02, 7.913e-02, -1.435e-02, -7.187e-03, -1.193e-02, 1.499e-01, -4.087e-03, 1.375e-01)); + r += mul(s3_2, M4(-1.276e-01, 4.709e-02, 4.652e-02, -3.621e-02, 6.574e-02, -9.898e-02, -7.531e-02, 1.525e-02, -6.954e-02, -3.843e-02, 5.811e-02, 1.199e-02, 8.230e-02, 2.024e-01, 3.189e-02, -4.189e-03)); + r += mul(s3_3, M4(-8.962e-02, 2.121e-02, 6.877e-03, 4.628e-02, -6.696e-02, -1.758e-02, 6.365e-02, 1.147e-01, 1.634e-01, -8.616e-02, -1.130e-01, -3.855e-02, 1.976e-01, -4.414e-02, -2.729e-02, 1.943e-01)); + r += mul(s3_4, M4(1.419e-01, -1.145e-01, 7.334e-02, 3.357e-02, 1.087e-01, 6.161e-02, -4.461e-02, -8.577e-02, 1.379e-01, 5.960e-02, 7.210e-02, 9.497e-02, 9.145e-02, 2.237e-01, -2.762e-02, 1.797e-02)); + r += mul(s3_5, M4(-1.733e-01, -1.167e-01, 5.172e-02, -8.973e-02, -3.348e-02, 1.052e-01, -5.964e-03, -2.220e-02, -1.058e-01, 4.942e-02, 2.739e-02, 2.404e-02, 1.421e-01, 1.827e-01, 1.569e-01, -1.539e-01)); + r += mul(s3_6, M4(2.506e-02, 1.950e-02, 2.321e-03, -4.077e-02, 1.772e-01, -7.419e-02, 9.891e-02, 2.252e-01, -2.654e-02, 1.410e-02, 7.146e-02, -2.953e-02, 8.560e-02, 2.365e-02, -4.493e-03, -1.049e-01)); + r += mul(s3_7, M4(-6.964e-02, 7.567e-02, 1.144e-01, -1.584e-01, 9.097e-02, 1.047e-01, 2.855e-02, 1.349e-01, -1.304e-01, 4.699e-02, 8.094e-02, 2.085e-02, 2.096e-02, -2.421e-03, 8.454e-03, -9.203e-02)); + r += mul(s3_8, M4(-3.464e-02, -4.869e-02, 3.778e-03, 1.347e-01, 3.872e-02, -2.634e-02, -1.421e-01, -7.913e-02, -1.429e-03, -1.537e-02, -8.353e-03, 4.449e-02, -4.023e-03, 8.170e-02, 1.893e-01, 1.227e-01)); + r += mul(s4_0, M4(4.691e-02, 6.286e-02, 6.771e-02, -1.503e-01, -1.064e-02, 5.082e-03, 9.837e-02, -9.486e-02, 1.464e-02, -2.707e-02, -6.510e-02, -2.803e-03, 6.008e-02, 9.668e-03, 7.732e-02, -2.190e-01)); + r += mul(s4_1, M4(4.686e-02, -5.499e-02, 1.427e-02, 1.013e-01, -8.526e-02, 6.392e-02, -2.287e-02, 8.515e-03, -5.218e-02, -4.427e-02, 9.739e-02, -1.362e-01, -3.744e-03, -6.457e-02, -1.491e-01, -1.083e-01)); + r += mul(s4_2, M4(1.916e-02, -6.114e-02, 5.091e-02, 5.791e-02, -1.167e-01, 2.778e-02, 3.416e-02, 5.700e-02, 8.694e-02, -2.474e-02, -6.353e-02, -4.257e-02, 2.920e-02, -8.550e-02, -2.982e-02, -8.382e-03)); + r += mul(s4_3, M4(1.841e-02, -1.143e-02, -3.103e-03, -1.753e-01, 7.652e-02, 1.885e-02, -1.299e-02, 3.879e-02, 3.357e-02, -8.420e-04, -4.729e-02, 7.755e-02, 1.938e-02, 6.921e-02, 4.764e-02, -2.407e-01)); + r += mul(s4_4, M4(3.810e-03, 1.434e-01, -4.722e-02, 1.589e-01, -3.296e-02, -6.644e-02, -2.176e-02, 8.273e-02, -4.930e-02, 5.519e-04, 3.101e-02, -6.996e-03, -2.028e-01, 1.508e-01, -1.391e-01, -9.957e-02)); + r += mul(s4_5, M4(-8.341e-03, -1.784e-01, 6.608e-02, 1.683e-02, -3.243e-02, 2.721e-02, -2.764e-02, 1.793e-01, -1.665e-01, -4.042e-02, -1.725e-01, -4.332e-02, -2.139e-02, -3.609e-02, -1.666e-01, -8.603e-03)); + r += mul(s4_6, M4(8.658e-02, 3.472e-03, 1.808e-02, 4.573e-02, 3.757e-03, -2.061e-02, -2.943e-02, 7.366e-02, -2.444e-02, -4.287e-02, 7.998e-02, -1.348e-01, 1.004e-01, -3.994e-02, 1.171e-01, 7.248e-02)); + r += mul(s4_7, M4(1.384e-02, -4.799e-03, 6.849e-02, -4.029e-02, -5.857e-02, -4.379e-02, 7.338e-02, 1.482e-02, -2.001e-01, -2.230e-02, -6.334e-02, 2.028e-01, -6.983e-02, 6.961e-02, -1.304e-01, 9.270e-02)); + r += mul(s4_8, M4(-1.824e-02, -3.640e-02, -7.442e-02, 9.155e-02, 2.034e-02, -4.918e-03, -7.245e-02, 2.484e-02, 1.855e-01, -6.453e-03, -1.036e-01, -7.459e-02, 5.481e-02, -2.091e-02, 1.651e-01, 6.029e-02)); + r += mul(s5_0, M4(-8.622e-02, 3.879e-02, 3.278e-02, -9.564e-02, 1.094e-01, 5.871e-02, 8.887e-02, -1.808e-01, 2.255e-02, -3.330e-02, -3.161e-02, -5.811e-02, 1.091e-01, -5.190e-03, 1.022e-01, -8.137e-02)); + r += mul(s5_1, M4(7.586e-02, -6.875e-02, -6.969e-02, 2.362e-01, 2.035e-01, -8.574e-02, -1.608e-01, -2.036e-01, -1.019e-01, -4.419e-02, -4.203e-02, 7.364e-02, 2.055e-02, -2.850e-02, 1.823e-03, 3.981e-03)); + r += mul(s5_2, M4(1.079e-02, -1.865e-02, -1.433e-02, 8.257e-02, 1.680e-01, -5.562e-02, 8.500e-02, -1.274e-01, -2.049e-02, -4.915e-02, 6.383e-02, -6.182e-03, -1.959e-02, 2.753e-02, -1.664e-02, 4.208e-02)); + r += mul(s5_3, M4(-3.978e-02, -4.015e-02, -2.950e-02, -2.059e-01, 9.038e-02, 1.546e-02, -1.041e-01, -1.511e-02, 1.272e-02, 2.335e-02, -1.798e-02, 3.744e-02, 6.389e-02, 7.648e-02, -1.019e-01, -2.651e-02)); + r += mul(s5_4, M4(-6.980e-02, 2.744e-02, -4.570e-02, 1.148e-01, 1.342e-02, -4.770e-02, -8.229e-02, 6.089e-02, 3.118e-02, 2.920e-01, 3.167e-01, -1.521e-01, 5.399e-02, 1.020e-01, 1.149e-02, -8.619e-02)); + r += mul(s5_5, M4(3.253e-02, -2.127e-01, -3.972e-02, 3.569e-03, 4.084e-02, 6.521e-02, -9.020e-02, 2.094e-02, -9.394e-02, -1.544e-02, -4.392e-02, -1.579e-01, -8.755e-02, -5.974e-02, -4.310e-02, 7.993e-02)); + r += mul(s5_6, M4(-9.472e-02, 4.516e-02, -8.202e-02, -4.753e-02, 1.139e-01, -3.746e-02, 2.098e-03, -1.064e-01, 9.612e-02, -2.597e-02, 1.139e-01, 9.148e-03, 1.091e-01, -2.330e-02, -7.269e-02, 1.313e-01)); + r += mul(s5_7, M4(-1.524e-01, 3.063e-02, -1.198e-01, 1.550e-01, -3.171e-02, -5.483e-02, -4.361e-02, -6.489e-02, -1.526e-01, 6.543e-02, -6.782e-02, 1.009e-01, -1.401e-02, 1.156e-01, -2.638e-02, -2.473e-02)); + r += mul(s5_8, M4(1.049e-02, 6.141e-02, 7.195e-04, 5.764e-02, 3.226e-02, 1.319e-02, -1.031e-01, -1.945e-02, -7.885e-02, -3.438e-02, -9.568e-02, 5.990e-02, 1.148e-02, 5.140e-02, 2.240e-01, 6.594e-03)); + r += mul(s6_0, M4(1.406e-01, -2.602e-02, 1.205e-01, 2.176e-01, -1.926e-04, -5.914e-02, -5.769e-02, 1.209e-01, 6.493e-03, -3.487e-02, -3.463e-02, -7.879e-02, -2.138e-01, 1.331e-02, 4.086e-02, 4.241e-02)); + r += mul(s6_1, M4(2.871e-02, 3.683e-02, 7.109e-02, 1.031e-01, 5.881e-02, 6.017e-03, -2.555e-01, 6.659e-02, -4.397e-03, 8.779e-02, 2.377e-02, -9.669e-02, -1.420e-01, 1.134e-01, -1.499e-01, 1.266e-01)); + r += mul(s6_2, M4(1.369e-01, -1.154e-01, -1.666e-01, 5.584e-02, -1.002e-01, -6.546e-03, 1.571e-02, 1.530e-01, -1.969e-03, 2.545e-02, -6.619e-02, -6.586e-02, -1.282e-01, 7.008e-02, 9.567e-03, 1.252e-01)); + r += mul(s6_3, M4(-1.094e-01, 1.610e-02, 9.718e-02, 1.548e-01, 9.198e-02, -4.904e-02, -7.468e-04, 2.606e-01, 5.387e-03, 4.550e-02, 3.330e-02, -7.607e-02, 8.492e-02, -8.416e-02, -8.928e-02, 1.261e-01)); + r += mul(s6_4, M4(1.180e-01, 3.047e-02, -3.698e-01, -5.206e-02, 2.965e-02, 6.208e-02, -6.801e-02, 7.428e-02, 4.247e-02, -9.038e-03, 7.127e-03, -1.259e-01, -7.449e-02, -1.251e-01, -2.252e-01, -5.215e-02)); + r += mul(s6_5, M4(-1.766e-01, 1.002e-01, 2.354e-01, -1.658e-01, 1.370e-01, -4.224e-02, -6.316e-02, 2.053e-01, -1.863e-01, 7.560e-02, -1.606e-02, 8.289e-02, 2.111e-01, 8.929e-02, 1.141e-01, 1.075e-01)); + r += mul(s6_6, M4(6.023e-02, 5.537e-02, 8.805e-02, -1.834e-01, 8.324e-02, -9.879e-03, -3.770e-02, 1.114e-01, 2.186e-01, 3.151e-02, 2.377e-02, -9.786e-02, 5.232e-02, 2.238e-02, 1.318e-01, 4.875e-02)); + r += mul(s6_7, M4(8.652e-02, 2.481e-03, 2.506e-02, -5.165e-02, 8.771e-02, 9.404e-03, 4.192e-02, -5.653e-02, 3.218e-01, 1.354e-02, 8.510e-02, 8.392e-02, -6.334e-02, 7.593e-03, 1.150e-01, 4.606e-02)); + r += mul(s6_8, M4(-6.697e-02, -3.352e-02, -1.902e-02, -8.258e-02, -6.843e-02, -2.708e-02, 7.777e-02, 1.236e-01, 1.371e-02, 3.588e-02, -5.964e-02, 2.603e-04, 1.174e-01, 1.747e-02, -8.251e-02, 4.398e-02)); + r += mul(s7_0, M4(-2.168e-02, 7.199e-02, -5.631e-02, 1.383e-01, 1.324e-02, -4.067e-02, -1.283e-02, 6.363e-02, 2.488e-02, -6.017e-02, 3.009e-02, 1.336e-01, -2.399e-02, 5.897e-02, 8.480e-03, -4.747e-02)); + r += mul(s7_1, M4(1.093e-01, 7.825e-03, 3.643e-02, 4.922e-02, 7.785e-03, 7.618e-02, -4.316e-02, -1.097e-01, 6.528e-02, -7.597e-02, 1.538e-01, 3.378e-02, 1.036e-02, 1.740e-02, -1.892e-01, 2.236e-02)); + r += mul(s7_2, M4(2.499e-01, 1.734e-01, 1.939e-01, -1.016e-01, -2.700e-02, -1.218e-02, 1.330e-01, -4.832e-02, 7.202e-02, -3.737e-03, 7.867e-02, -1.434e-02, 1.819e-02, -1.655e-01, -1.782e-01, -3.900e-02)); + r += mul(s7_3, M4(5.788e-02, -1.806e-02, 4.414e-02, -6.338e-02, -4.268e-02, -3.505e-02, 5.591e-02, -2.530e-02, -4.892e-03, 2.751e-02, 1.689e-02, 1.906e-02, 2.026e-01, 5.643e-02, 1.002e-01, 1.098e-02)); + r += mul(s7_4, M4(-2.294e-02, -4.774e-02, 2.901e-01, 6.578e-02, -1.541e-01, 1.067e-01, 3.197e-01, -5.122e-02, 2.727e-02, -9.901e-02, -2.789e-02, -1.267e-01, -5.750e-02, 4.390e-02, -1.721e-01, -1.499e-01)); + r += mul(s7_5, M4(-9.402e-02, -2.030e-02, 5.222e-02, -1.743e-01, -4.580e-02, -3.926e-02, 2.109e-01, 5.317e-02, 8.321e-02, -3.145e-02, -1.214e-02, 1.857e-01, -3.654e-02, 2.986e-02, 2.944e-03, 8.246e-03)); + r += mul(s7_6, M4(-1.619e-01, 4.667e-02, -4.234e-02, -2.656e-02, -1.973e-02, 6.608e-02, 5.465e-02, -1.520e-01, 1.372e-02, -1.880e-03, 1.187e-01, 1.466e-01, 4.376e-02, 5.925e-03, 3.483e-02, -8.434e-02)); + r += mul(s7_7, M4(8.179e-02, -6.455e-02, 1.127e-01, -1.037e-01, -2.037e-01, 1.271e-01, 2.132e-01, -1.660e-01, -1.013e-01, 7.165e-03, 3.201e-03, -7.004e-02, -1.350e-01, -8.418e-02, -9.617e-04, -1.091e-02)); + r += mul(s7_8, M4(-1.502e-01, 2.698e-02, -2.027e-01, 2.748e-02, -1.763e-01, -6.399e-02, -4.648e-02, -1.599e-02, 4.186e-02, -3.419e-02, -1.255e-02, 1.214e-01, 6.660e-02, -2.269e-02, -3.588e-02, -2.921e-02)); + r += V4(-3.017e-03, -2.179e-03, -9.947e-04, -1.717e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.068e-04, -1.204e-01, 4.292e-02, -2.322e-01, 7.263e-02, -6.732e-02, -2.801e-02, 9.964e-03, -6.590e-03, 2.334e-02, 6.538e-02, -3.565e-01, 2.108e-02, -1.036e-01, 6.134e-02, 1.382e-01)); + r += mul(s0_1, M4(-5.029e-02, -7.275e-02, 5.063e-02, -1.110e-02, 4.380e-02, 4.034e-02, -1.042e-01, -1.455e-02, 1.055e-02, 3.170e-02, -1.554e-02, -1.033e-01, 2.073e-01, 3.202e-03, -8.257e-03, 2.445e-01)); + r += mul(s0_2, M4(5.688e-02, -7.895e-02, 5.165e-02, 6.176e-03, 6.183e-02, -7.351e-02, 1.285e-02, 1.244e-01, 1.361e-02, 2.905e-02, -3.437e-02, -6.790e-02, -2.269e-03, -4.321e-02, 6.246e-02, 1.473e-01)); + r += mul(s0_3, M4(-2.291e-02, -1.502e-01, -7.603e-03, 1.355e-01, -5.305e-02, 1.134e-01, -1.282e-02, 1.570e-02, -2.111e-01, -3.812e-01, -3.447e-01, 1.578e-01, 2.417e-03, 5.896e-02, -5.163e-02, -5.294e-02)); + r += mul(s0_4, M4(1.900e-01, -2.398e-01, 1.170e-01, -1.547e-02, 4.756e-03, -1.449e-01, -5.318e-02, -1.763e-01, 2.585e-01, -2.209e-01, 6.118e-02, -1.008e-01, -2.421e-01, -4.606e-02, -1.322e-01, 1.209e-02)); + r += mul(s0_5, M4(-5.750e-03, -5.512e-02, -3.397e-04, -6.929e-02, 7.312e-03, 2.912e-02, 6.804e-02, -9.701e-02, 5.363e-02, -3.913e-02, 1.144e-02, -6.841e-02, 9.510e-02, -1.010e-02, -7.571e-02, 6.174e-02)); + r += mul(s0_6, M4(3.139e-03, 1.147e-02, -1.003e-02, 2.934e-03, 1.570e-02, 3.821e-02, 1.733e-03, 1.197e-01, 1.307e-01, 1.198e-01, 5.437e-02, -1.454e-01, 7.494e-02, 8.354e-02, 4.805e-02, 8.956e-02)); + r += mul(s0_7, M4(-1.305e-02, -9.896e-03, -2.490e-02, -7.780e-02, 3.909e-02, 2.933e-02, -1.095e-02, 1.387e-03, 7.792e-02, -3.871e-02, 1.720e-01, -1.900e-01, 1.288e-02, -1.080e-01, -1.026e-01, 7.740e-03)); + r += mul(s0_8, M4(4.990e-03, 2.552e-02, 1.105e-01, 9.851e-02, -2.520e-02, -8.242e-02, -9.196e-02, -2.433e-02, -1.909e-02, -9.932e-04, -2.983e-02, -3.875e-02, 1.385e-02, 3.129e-02, -4.664e-03, 2.485e-02)); + r += mul(s1_0, M4(5.915e-02, 6.590e-02, 1.312e-02, -1.948e-01, -6.246e-02, 1.216e-01, 8.398e-02, -3.320e-01, 7.135e-02, -2.850e-03, 6.330e-03, 1.095e-01, 4.058e-02, -1.627e-02, -6.460e-02, -5.208e-02)); + r += mul(s1_1, M4(-2.444e-01, 1.257e-01, -3.160e-02, 6.848e-02, -1.397e-01, 1.748e-01, -3.867e-02, 1.034e-01, -6.418e-02, -2.701e-02, -3.038e-02, 2.540e-01, 1.164e-01, -4.954e-02, -3.766e-02, 5.431e-02)); + r += mul(s1_2, M4(-7.726e-02, -5.139e-02, -9.235e-02, -1.074e-01, -2.585e-02, -1.357e-01, -3.064e-02, -2.004e-01, -2.334e-02, -4.123e-02, -2.273e-02, 1.205e-02, 6.541e-02, 8.848e-03, 7.449e-02, 1.229e-01)); + r += mul(s1_3, M4(1.603e-01, 1.097e-01, -8.866e-02, 2.797e-01, -3.778e-02, -7.621e-02, -7.649e-02, 2.947e-01, -2.127e-01, -1.054e-01, 2.025e-01, 6.307e-02, -8.014e-02, 5.530e-02, -4.694e-02, -7.250e-02)); + r += mul(s1_4, M4(1.233e-01, -1.892e-01, -1.193e-02, 2.636e-01, 4.172e-02, -2.724e-01, -2.380e-02, -1.760e-01, -2.191e-01, -2.573e-02, 5.340e-01, -1.145e-01, 2.839e-02, -4.719e-02, 6.835e-02, 3.499e-02)); + r += mul(s1_5, M4(9.120e-04, -9.661e-02, -2.830e-01, -3.171e-02, 8.802e-02, -1.440e-01, 1.313e-01, -1.072e-01, -1.169e-01, 4.815e-02, 4.976e-02, 1.967e-01, 2.588e-02, 1.345e-02, 2.592e-02, -5.276e-02)); + r += mul(s1_6, M4(1.288e-01, -7.539e-02, -1.382e-01, 1.609e-02, -4.723e-02, 3.042e-02, -8.309e-02, -1.198e-01, -1.033e-01, -1.751e-01, 4.485e-03, 1.238e-01, 9.003e-02, 8.127e-02, 1.875e-02, 1.353e-02)); + r += mul(s1_7, M4(-9.747e-02, 1.855e-01, -1.925e-01, -5.547e-02, -7.088e-02, -9.803e-02, 1.539e-01, -9.433e-02, -1.762e-01, -1.183e-01, 1.326e-01, 2.236e-01, 4.235e-02, 2.056e-02, -3.245e-02, 1.732e-02)); + r += mul(s1_8, M4(8.181e-02, -5.216e-03, -8.506e-02, 8.966e-02, -2.085e-03, 1.868e-01, -3.452e-02, 1.632e-01, 1.210e-02, -1.903e-02, -6.220e-02, 5.310e-02, 4.398e-02, -3.663e-02, 3.722e-02, -4.242e-02)); + r += mul(s2_0, M4(-1.206e-01, 9.558e-02, -4.620e-02, 3.062e-03, -4.847e-02, -1.453e-01, 4.601e-02, 3.178e-02, -9.780e-02, 1.438e-01, 1.179e-01, 1.688e-01, -3.957e-02, -1.288e-02, 1.401e-01, 5.510e-02)); + r += mul(s2_1, M4(-1.381e-01, 1.737e-01, 5.948e-02, -1.221e-01, 5.363e-02, -1.066e-01, -5.197e-03, -9.137e-02, 4.265e-03, 2.725e-01, 1.724e-01, 1.988e-01, 5.521e-03, -8.716e-02, 1.280e-01, 2.470e-02)); + r += mul(s2_2, M4(5.819e-02, -7.113e-02, 8.023e-02, -2.218e-01, 5.752e-03, -5.493e-02, 6.714e-04, 1.236e-01, 2.653e-02, 1.468e-01, 1.229e-02, -6.665e-02, -7.151e-02, 1.198e-02, 1.073e-01, 6.364e-02)); + r += mul(s2_3, M4(6.612e-02, 4.067e-02, -4.382e-02, -4.092e-01, -1.763e-01, -3.350e-02, 1.109e-01, -1.721e-01, -3.335e-01, -1.528e-01, -4.465e-02, -2.936e-01, -3.856e-02, -2.933e-02, 4.427e-02, 1.626e-01)); + r += mul(s2_4, M4(-1.707e-02, 1.707e-02, -3.251e-01, 1.643e-01, 1.466e-01, 3.248e-02, 1.353e-02, 1.580e-02, -8.672e-04, 2.900e-01, -6.717e-02, 2.236e-01, -8.835e-02, -7.532e-02, 3.308e-02, -1.505e-01)); + r += mul(s2_5, M4(-1.692e-02, 7.445e-02, 2.193e-01, 8.993e-02, 9.978e-02, -2.544e-02, 1.497e-01, -1.315e-01, -2.081e-02, -7.005e-02, 2.727e-01, 1.743e-01, -6.032e-02, -1.367e-01, -1.602e-03, -8.789e-02)); + r += mul(s2_6, M4(-1.569e-02, 7.364e-02, 2.221e-02, 1.627e-01, -7.248e-02, -1.373e-01, 5.594e-02, -8.769e-03, -1.682e-02, 1.029e-01, 1.361e-01, -3.526e-01, 1.620e-02, 4.748e-02, 2.704e-02, -1.062e-01)); + r += mul(s2_7, M4(-1.054e-01, -1.154e-02, -7.373e-02, -2.821e-01, 5.064e-02, -1.059e-03, 1.904e-01, 1.205e-01, 2.214e-01, -5.921e-04, 2.009e-01, -1.527e-01, -6.023e-02, -6.896e-02, -4.439e-02, 3.888e-02)); + r += mul(s2_8, M4(1.200e-01, 1.974e-02, -1.368e-01, -1.805e-01, -1.308e-01, -9.498e-02, 5.894e-02, -1.164e-01, -1.340e-01, 1.556e-01, -2.300e-02, 1.509e-01, 5.887e-02, 1.487e-02, 2.487e-02, 6.804e-02)); + r += mul(s3_0, M4(4.228e-02, -5.438e-02, -2.953e-02, 2.476e-02, -3.938e-03, 8.333e-02, -2.764e-02, -3.250e-02, -2.921e-02, -4.583e-02, -1.548e-02, -1.151e-02, -3.856e-03, -7.593e-02, 1.128e-02, 6.069e-02)); + r += mul(s3_1, M4(1.559e-01, -1.065e-01, -2.049e-02, 6.581e-02, -5.467e-02, 1.221e-02, 2.313e-02, 7.192e-03, -2.114e-02, 2.589e-02, -7.534e-03, -1.009e-02, 1.563e-02, 4.367e-02, -1.770e-01, 1.473e-01)); + r += mul(s3_2, M4(9.814e-02, -3.655e-02, 5.478e-02, 4.815e-02, -2.470e-02, -6.146e-02, -7.068e-02, -6.625e-03, 4.639e-02, 1.204e-02, -2.514e-02, 1.869e-02, -3.452e-02, 6.861e-02, -3.143e-02, -2.935e-02)); + r += mul(s3_3, M4(-4.938e-02, -7.948e-02, 1.161e-02, -1.234e-02, -2.432e-02, 4.279e-02, 3.968e-02, -6.227e-02, -6.645e-02, 8.690e-02, -1.461e-01, -5.690e-03, 3.831e-02, 2.937e-01, 4.653e-02, 4.828e-02)); + r += mul(s3_4, M4(-7.999e-02, 2.704e-01, -2.645e-01, 1.795e-01, -5.116e-02, -2.315e-01, -6.567e-02, -7.200e-02, -5.511e-02, -1.013e-01, -7.740e-02, 5.260e-02, -1.919e-03, 1.028e-01, 9.671e-02, -6.519e-02)); + r += mul(s3_5, M4(-6.789e-03, -3.671e-02, 1.186e-01, -3.797e-01, -8.049e-02, 7.739e-02, 6.670e-02, 1.133e-01, -7.665e-03, 6.210e-02, 1.060e-01, -2.092e-02, 1.680e-01, 1.804e-02, -2.171e-02, 8.897e-02)); + r += mul(s3_6, M4(-5.568e-02, -1.409e-01, 1.968e-02, -3.617e-02, -3.076e-02, -3.317e-02, 1.160e-01, -5.722e-03, 6.979e-03, 6.354e-02, 1.354e-02, 7.411e-02, -4.428e-03, 9.028e-02, -1.606e-02, 5.119e-02)); + r += mul(s3_7, M4(-8.462e-02, 6.714e-02, -5.238e-03, 6.400e-03, 1.121e-01, -2.286e-02, -2.020e-02, -7.229e-04, 5.897e-04, -4.113e-02, 1.337e-01, 8.621e-03, -2.099e-01, -2.948e-02, -1.520e-02, 1.098e-01)); + r += mul(s3_8, M4(4.036e-02, -1.405e-01, 5.163e-02, -7.905e-03, 1.593e-02, -2.116e-02, -7.974e-03, 5.984e-02, 4.096e-02, 4.600e-02, 4.659e-02, 5.924e-02, 1.334e-02, 4.451e-02, -7.178e-02, 6.131e-03)); + r += mul(s4_0, M4(3.154e-02, 3.548e-02, 1.730e-02, 3.346e-02, -4.024e-02, -3.264e-02, -1.470e-01, -1.200e-01, 1.690e-02, -2.265e-02, -4.149e-02, -2.140e-02, 1.119e-01, 2.084e-01, -1.121e-01, -1.373e-01)); + r += mul(s4_1, M4(-5.462e-02, 3.954e-02, -2.348e-02, 6.568e-02, -6.036e-02, 7.795e-02, -9.620e-02, 3.154e-01, 8.862e-02, -7.174e-02, -9.480e-02, -3.692e-02, -1.154e-01, 3.932e-02, 1.292e-01, -2.248e-01)); + r += mul(s4_2, M4(2.872e-02, -5.408e-02, 4.336e-02, -7.111e-02, 4.963e-02, 3.748e-02, -6.812e-02, 8.766e-02, 9.474e-03, 5.083e-02, -7.391e-02, 1.053e-01, 6.506e-02, -6.320e-02, 1.222e-01, 9.191e-03)); + r += mul(s4_3, M4(-6.555e-02, 4.505e-02, -4.655e-02, -9.145e-02, 6.602e-03, 1.364e-01, 8.412e-02, -4.747e-03, -2.966e-02, 6.929e-02, 9.440e-02, -1.133e-01, 1.360e-01, 3.425e-02, -2.808e-02, -1.973e-01)); + r += mul(s4_4, M4(-9.314e-02, 6.382e-03, -1.005e-01, 8.729e-02, 5.224e-02, -9.051e-02, 2.902e-01, 1.207e-01, -5.073e-02, 2.289e-01, 8.406e-02, 6.964e-02, 1.459e-02, -2.334e-01, 1.740e-01, 1.771e-01)); + r += mul(s4_5, M4(1.237e-01, 8.729e-02, 4.959e-02, -1.984e-02, 1.293e-02, 7.650e-04, 4.499e-02, 5.544e-02, -1.017e-01, -8.729e-02, 2.581e-02, -7.212e-02, 3.828e-02, -3.534e-02, 2.333e-01, -2.448e-01)); + r += mul(s4_6, M4(-2.343e-02, -4.688e-02, 2.338e-02, 3.721e-02, 2.008e-02, 1.627e-03, 5.451e-02, 2.319e-01, 6.909e-02, -7.454e-03, -4.210e-03, 3.158e-02, -1.099e-02, 9.028e-02, -4.876e-02, 1.138e-01)); + r += mul(s4_7, M4(-6.356e-02, -1.661e-01, 2.443e-02, -5.680e-02, 1.244e-01, -1.160e-01, 5.501e-02, -1.000e-01, -1.478e-01, -8.587e-02, 6.595e-02, -1.460e-01, -1.961e-01, -1.595e-01, -1.242e-01, -1.330e-01)); + r += mul(s4_8, M4(3.144e-02, 3.457e-02, 4.639e-02, 3.490e-03, 7.927e-02, 5.451e-02, 1.042e-01, 4.356e-02, 2.098e-02, -1.338e-02, -5.927e-02, 5.882e-02, -5.585e-02, -9.631e-03, -4.868e-02, 7.457e-02)); + r += mul(s5_0, M4(-3.288e-03, 6.522e-02, -3.526e-03, -1.352e-02, 1.039e-01, 1.087e-01, -1.976e-02, -1.398e-01, -1.507e-02, 2.050e-02, 2.135e-02, 7.472e-02, 2.467e-02, 1.739e-03, -2.406e-03, 3.458e-02)); + r += mul(s5_1, M4(1.557e-01, 4.224e-02, -3.892e-02, 1.267e-01, 2.728e-02, -3.507e-02, 6.106e-02, -2.876e-02, 6.176e-02, 1.852e-02, -1.066e-01, 1.126e-01, -2.912e-02, -7.657e-02, -1.089e-01, -2.211e-02)); + r += mul(s5_2, M4(8.400e-02, -3.667e-02, 8.771e-02, -2.185e-02, -1.031e-02, 2.948e-02, -2.570e-02, -5.338e-02, 1.059e-01, -1.223e-02, -1.557e-01, 8.655e-02, 2.410e-02, 4.648e-02, 1.769e-02, -7.573e-02)); + r += mul(s5_3, M4(8.675e-02, -6.990e-02, 2.511e-02, -9.416e-03, 9.809e-02, 1.254e-01, -5.303e-02, -8.618e-02, -8.457e-02, 9.567e-02, -9.494e-02, -1.148e-01, -1.560e-02, 6.806e-02, 9.405e-02, 4.098e-02)); + r += mul(s5_4, M4(5.280e-02, 6.067e-02, -1.976e-01, 9.863e-02, -8.271e-02, -1.365e-02, 1.193e-01, -1.790e-01, -6.840e-03, 1.895e-01, -9.188e-02, -1.218e-01, -3.719e-03, 6.033e-02, -2.169e-02, 2.204e-01)); + r += mul(s5_5, M4(2.242e-01, -2.921e-03, -3.570e-04, -3.935e-03, -1.253e-03, -6.734e-02, 7.546e-04, -2.474e-02, -1.686e-01, 5.612e-02, 1.091e-01, -4.316e-02, 1.036e-01, 2.184e-02, 9.947e-02, -1.900e-01)); + r += mul(s5_6, M4(2.334e-02, -3.774e-02, -6.603e-02, -1.323e-01, 3.283e-02, -4.643e-02, -6.959e-02, -3.146e-03, -7.513e-03, 1.399e-03, -5.232e-02, 4.053e-02, 9.091e-03, 3.483e-02, 6.393e-03, 1.083e-02)); + r += mul(s5_7, M4(6.783e-02, -2.924e-01, -2.529e-02, -4.163e-01, 4.052e-02, -6.870e-02, -4.707e-02, -9.593e-02, -2.101e-01, -1.228e-01, -4.197e-02, 1.293e-02, -5.435e-02, -2.645e-01, 7.583e-02, -1.708e-01)); + r += mul(s5_8, M4(8.686e-03, -1.299e-01, -5.771e-02, -1.496e-01, 1.606e-02, -1.679e-02, -3.246e-03, -1.859e-01, -4.083e-02, -1.148e-01, -7.817e-03, 4.128e-02, 6.493e-02, 2.126e-02, 4.033e-03, 1.422e-01)); + r += mul(s6_0, M4(-1.077e-01, 3.526e-02, 7.391e-02, -5.982e-02, 3.658e-02, 6.680e-02, 1.467e-02, -1.150e-03, 1.662e-02, -3.063e-03, -8.420e-02, -1.484e-01, -2.569e-03, -2.318e-02, 1.749e-03, 2.355e-01)); + r += mul(s6_1, M4(-1.208e-01, 1.325e-01, 1.647e-02, 9.317e-02, 1.960e-02, 3.862e-02, 3.310e-02, 7.721e-02, 4.446e-02, -4.653e-03, -1.381e-02, 6.921e-02, -1.951e-01, 2.847e-02, -2.456e-01, -3.425e-02)); + r += mul(s6_2, M4(-4.401e-02, 1.811e-02, 4.561e-02, 3.513e-01, 1.081e-02, -3.141e-02, 2.725e-02, -2.780e-02, -6.767e-02, -1.508e-02, 8.973e-03, -2.144e-02, 1.207e-03, -2.087e-02, -1.307e-01, -1.781e-01)); + r += mul(s6_3, M4(-3.827e-02, -3.661e-02, 1.011e-02, 1.545e-01, 1.581e-02, 6.656e-02, -4.645e-02, 8.442e-02, -2.433e-02, 6.781e-02, -1.275e-01, 8.078e-02, -8.402e-02, -1.286e-02, 1.922e-01, 1.349e-01)); + r += mul(s6_4, M4(1.241e-01, -6.509e-01, -5.271e-02, -2.358e-01, -8.803e-02, 1.740e-01, -2.688e-01, 1.401e-01, -5.751e-02, 2.101e-02, -9.749e-02, -1.048e-01, -6.910e-02, -4.104e-02, 3.521e-01, -1.025e-01)); + r += mul(s6_5, M4(8.778e-03, -5.586e-02, 7.706e-02, -4.578e-02, 1.213e-02, 9.870e-02, -4.086e-02, -8.745e-02, -6.358e-02, 2.182e-02, -1.452e-02, 2.654e-02, -3.300e-02, 5.861e-02, 7.919e-02, 1.088e-01)); + r += mul(s6_6, M4(-6.089e-02, -2.363e-02, -1.039e-02, -8.941e-02, 4.292e-02, -3.782e-03, 4.320e-02, 5.394e-03, -6.660e-02, 1.384e-02, -1.341e-01, -1.106e-01, -7.917e-02, 3.123e-02, 3.668e-02, 1.299e-01)); + r += mul(s6_7, M4(5.717e-02, 1.718e-01, 9.223e-02, 4.245e-01, -2.327e-03, -1.134e-01, -9.775e-03, 4.413e-02, -5.453e-02, 4.146e-01, -8.485e-02, 2.315e-01, 1.504e-03, 1.173e-01, -5.307e-02, 3.062e-02)); + r += mul(s6_8, M4(-1.238e-01, 4.796e-02, -8.737e-02, -1.498e-01, 8.415e-02, -2.357e-02, 1.694e-01, 8.595e-02, 1.155e-01, -2.768e-02, -9.003e-02, -1.101e-01, -8.561e-02, -1.678e-02, -6.089e-02, 3.184e-02)); + r += mul(s7_0, M4(-4.965e-02, -4.120e-02, 1.285e-01, 1.078e-02, -1.119e-02, -6.779e-03, -5.037e-02, 1.074e-01, 6.507e-03, -1.108e-01, 6.075e-02, -6.955e-04, 1.402e-01, 9.810e-02, -7.748e-03, 4.435e-02)); + r += mul(s7_1, M4(1.647e-02, 3.714e-02, -5.533e-02, 6.509e-03, -7.659e-02, -8.373e-02, 5.623e-02, -7.749e-03, -6.849e-02, -8.011e-02, 6.262e-02, 1.709e-01, 7.538e-02, 1.420e-01, 1.243e-01, -4.103e-02)); + r += mul(s7_2, M4(-8.281e-02, -6.420e-02, 1.808e-02, -1.000e-01, 3.493e-02, -5.424e-02, 5.403e-02, -2.961e-02, 6.306e-02, 3.705e-02, -3.130e-02, -4.529e-02, -4.749e-02, 6.859e-04, 5.235e-02, -2.872e-02)); + r += mul(s7_3, M4(1.268e-01, -1.369e-01, 8.872e-02, -1.221e-01, 1.602e-02, 5.109e-03, -1.217e-01, 1.610e-01, 1.709e-01, -7.700e-03, 1.607e-01, 9.319e-02, 2.199e-01, -1.097e-01, -3.539e-02, -1.032e-01)); + r += mul(s7_4, M4(-4.076e-01, 1.495e-01, 6.882e-02, -5.524e-02, -4.597e-01, 4.264e-02, -2.740e-01, 1.135e-01, -6.260e-02, 8.472e-03, 3.468e-01, -1.748e-01, -9.361e-02, 8.264e-03, 5.110e-01, -1.052e-01)); + r += mul(s7_5, M4(-9.600e-02, -2.135e-02, -2.585e-02, -4.248e-02, -2.243e-01, 1.290e-01, -1.017e-01, 4.722e-02, -1.032e-02, 2.541e-02, 9.565e-02, -8.045e-02, -7.933e-02, -5.089e-02, 1.153e-01, 9.537e-03)); + r += mul(s7_6, M4(-5.367e-02, -3.114e-02, -1.703e-02, 9.851e-03, -1.690e-01, -8.414e-02, -8.379e-02, -5.830e-04, 2.686e-01, 1.545e-01, 9.429e-02, 1.295e-01, 2.207e-01, -6.419e-02, 3.829e-02, 5.205e-02)); + r += mul(s7_7, M4(7.676e-02, 1.129e-01, 3.466e-02, -2.708e-02, -9.591e-02, -1.674e-01, -8.708e-02, 7.116e-02, -2.083e-01, -6.360e-02, 1.431e-01, -1.180e-01, -6.194e-02, 9.175e-02, 3.166e-02, 2.715e-02)); + r += mul(s7_8, M4(-7.521e-02, -4.163e-02, -7.575e-02, -1.588e-01, 2.143e-02, 3.748e-02, 1.099e-02, 1.665e-01, 6.809e-02, -5.992e-02, 1.567e-01, 1.218e-01, 1.592e-02, -3.885e-02, -2.286e-02, -8.052e-02)); + r += V4(3.256e-03, 5.569e-03, -5.325e-03, 5.488e-04); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.547e-01, -1.336e-02, -9.624e-02, -1.577e-03, 3.593e-02, 3.042e-02, -8.003e-02, 2.192e-02, -4.950e-02, 7.476e-02, -6.039e-02, 8.056e-02, -7.910e-02, -8.635e-02, -1.421e-01, 8.092e-02)); + r += mul(s0_1, M4(-1.913e-02, 4.725e-02, -1.637e-01, -1.348e-01, -9.849e-02, -1.759e-01, -4.068e-02, 2.364e-02, -1.270e-01, -1.379e-02, -1.358e-01, 1.061e-01, 1.210e-01, -3.639e-02, -8.988e-02, -1.070e-01)); + r += mul(s0_2, M4(1.796e-02, 3.540e-02, -7.519e-02, -5.215e-02, 4.199e-02, -5.199e-02, 1.456e-03, -3.571e-03, -3.944e-02, -6.634e-03, -4.133e-02, 4.924e-02, -1.425e-01, -1.294e-01, 1.045e-01, -2.157e-02)); + r += mul(s0_3, M4(-9.407e-02, 7.844e-02, -3.308e-02, 5.621e-02, 1.042e-01, -3.890e-02, 2.055e-01, -1.072e-01, 2.748e-02, 4.462e-01, -1.676e-01, 3.662e-01, 8.381e-03, 1.181e-02, 3.234e-02, 3.358e-02)); + r += mul(s0_4, M4(-1.005e-01, -2.126e-03, 5.502e-02, -6.089e-02, 2.273e-01, 4.151e-03, -1.460e-01, 1.359e-01, 9.257e-02, 2.512e-02, -1.959e-01, 3.031e-01, 1.875e-01, 9.293e-02, 4.954e-02, -9.439e-02)); + r += mul(s0_5, M4(8.254e-02, -3.787e-02, 8.115e-03, 1.379e-01, -6.686e-02, -5.386e-02, -1.296e-01, -5.086e-02, -4.210e-02, -4.231e-03, -4.891e-02, 9.751e-02, -9.846e-03, -7.811e-02, 3.148e-02, -8.734e-02)); + r += mul(s0_6, M4(-3.271e-02, 1.928e-02, 2.436e-03, -6.287e-02, -3.796e-02, 7.205e-02, -7.604e-03, -1.239e-02, -4.773e-02, 9.715e-02, -8.941e-03, 9.275e-02, -9.353e-02, 4.975e-02, -1.497e-01, -9.723e-02)); + r += mul(s0_7, M4(-6.321e-02, -3.796e-02, -5.824e-02, 1.675e-01, 1.885e-01, 4.765e-02, 1.688e-01, 1.402e-02, -2.094e-01, 7.993e-02, -6.426e-02, 3.454e-02, -1.092e-01, 4.996e-02, -7.640e-02, 6.195e-02)); + r += mul(s0_8, M4(-1.590e-02, 1.018e-02, -3.362e-02, -4.663e-02, 9.686e-02, -5.498e-03, 1.110e-01, -8.963e-02, 7.007e-02, 6.482e-02, -2.983e-03, 9.060e-02, -2.021e-02, 3.653e-02, -2.570e-02, -1.258e-01)); + r += mul(s1_0, M4(-3.619e-02, -1.744e-01, -4.347e-02, -4.267e-02, -4.923e-03, -9.286e-02, 2.422e-01, -4.670e-02, 1.320e-01, -1.984e-01, 2.182e-01, 1.958e-02, 4.612e-03, 8.703e-03, -5.831e-02, -7.535e-02)); + r += mul(s1_1, M4(-5.531e-03, -9.462e-02, 2.501e-02, 9.215e-02, -1.924e-02, -1.056e-01, 1.524e-01, -5.061e-02, 9.483e-02, -1.310e-01, 1.569e-01, 1.182e-01, 6.513e-02, 2.052e-02, -9.772e-02, 2.345e-02)); + r += mul(s1_2, M4(3.993e-02, 2.333e-02, 2.727e-01, -1.340e-02, 2.623e-02, 3.439e-02, 2.126e-02, 9.785e-02, 9.983e-02, -2.156e-03, 6.621e-02, 5.928e-02, 6.364e-02, -8.201e-02, -1.501e-02, -8.141e-02)); + r += mul(s1_3, M4(1.770e-02, -1.235e-02, -4.150e-04, 1.100e-01, 5.326e-02, 2.712e-02, -1.292e-01, 7.162e-02, -1.035e-01, 2.255e-01, 1.976e-02, -2.447e-01, 1.629e-02, -8.791e-02, -2.548e-02, -1.104e-01)); + r += mul(s1_4, M4(-9.864e-04, -1.118e-01, 1.131e-01, -2.565e-01, -9.560e-02, 1.174e-01, -2.686e-01, 9.302e-02, -1.345e-01, -2.369e-01, 4.699e-02, -1.489e-02, 1.831e-01, 1.548e-01, 5.183e-02, 1.790e-01)); + r += mul(s1_5, M4(1.439e-01, 4.397e-03, -3.498e-02, 1.244e-01, 1.418e-01, -2.472e-02, -2.413e-01, 1.052e-01, 1.606e-01, -8.201e-03, 6.680e-02, -8.102e-02, -2.082e-02, -3.969e-02, -7.201e-02, -4.495e-02)); + r += mul(s1_6, M4(6.272e-02, 4.990e-02, 8.887e-02, 3.249e-02, 3.269e-02, -8.323e-02, 1.116e-01, 2.040e-02, 1.995e-02, 1.057e-01, -6.512e-03, -3.846e-02, -2.073e-02, 8.689e-03, 2.337e-02, -8.896e-03)); + r += mul(s1_7, M4(-1.356e-01, 2.012e-02, 3.494e-02, -6.068e-02, 8.204e-02, -3.639e-02, 1.480e-01, 1.784e-01, -4.654e-03, -8.309e-03, 5.955e-02, -9.202e-02, 3.201e-02, 7.348e-02, 1.535e-01, 1.772e-01)); + r += mul(s1_8, M4(7.614e-02, 4.078e-03, 2.274e-01, 6.708e-02, 1.090e-01, -4.981e-03, 1.636e-01, 1.457e-01, 7.425e-02, -6.353e-02, 1.289e-02, -2.213e-02, 1.256e-02, -4.327e-02, 4.475e-02, -5.957e-02)); + r += mul(s2_0, M4(-3.046e-02, 1.478e-01, 4.846e-02, -1.134e-01, -9.139e-02, -3.482e-03, -8.726e-02, -8.179e-02, -9.263e-02, -9.555e-03, 1.925e-02, 1.085e-01, -1.840e-02, 2.115e-01, 5.158e-02, 5.294e-02)); + r += mul(s2_1, M4(1.175e-01, -7.622e-02, 1.346e-01, -2.636e-01, -8.279e-02, 2.601e-02, -1.111e-02, -1.346e-01, -1.376e-01, -1.053e-01, -4.803e-01, 1.282e-01, -1.441e-01, -6.537e-02, -1.831e-01, -7.560e-02)); + r += mul(s2_2, M4(-5.088e-02, 6.594e-02, 3.108e-02, 1.725e-02, 1.314e-01, -2.219e-02, -7.137e-02, 3.789e-02, 5.741e-02, -2.482e-02, -2.007e-01, 1.658e-02, -5.010e-02, -5.699e-02, 9.114e-02, -5.246e-02)); + r += mul(s2_3, M4(-8.034e-02, 3.582e-04, 4.709e-02, -1.542e-01, 1.968e-02, 5.846e-02, 1.434e-01, -5.554e-02, -3.457e-01, 8.977e-02, -3.070e-01, -8.939e-02, -9.490e-02, 1.739e-02, -1.325e-02, -6.021e-02)); + r += mul(s2_4, M4(3.695e-02, 4.282e-01, -4.779e-02, -2.542e-01, 4.170e-01, -4.199e-03, 2.635e-01, 9.272e-02, -1.018e-01, -3.018e-01, -1.229e-01, 5.847e-01, 5.349e-02, -9.264e-02, 1.590e-01, -8.636e-02)); + r += mul(s2_5, M4(2.304e-01, 8.633e-02, -8.647e-02, -1.178e-01, 6.404e-02, 2.085e-02, 5.816e-02, 6.002e-02, -2.182e-01, -6.899e-03, 1.213e-01, -2.377e-01, 1.045e-01, -6.460e-03, -6.306e-02, 9.807e-02)); + r += mul(s2_6, M4(4.065e-02, 4.113e-02, -1.966e-02, -1.033e-02, 6.379e-02, -9.661e-02, -7.923e-02, -3.830e-02, -1.658e-01, -1.424e-01, -1.789e-01, -2.376e-01, 3.041e-02, 2.283e-02, 8.950e-02, 3.774e-02)); + r += mul(s2_7, M4(4.361e-03, 4.963e-02, -4.400e-02, 3.623e-02, 5.359e-02, 3.067e-04, -6.885e-02, -1.999e-01, -1.801e-02, 3.282e-02, -1.433e-01, 4.356e-01, 1.168e-01, -9.429e-02, 7.713e-03, 2.926e-02)); + r += mul(s2_8, M4(1.808e-01, -1.784e-02, 1.194e-01, 9.667e-02, 5.927e-02, -2.430e-02, -3.530e-02, 3.124e-02, -1.911e-01, -8.520e-02, -1.675e-01, 3.291e-02, 3.590e-02, -5.538e-02, 8.543e-03, 2.209e-02)); + r += mul(s3_0, M4(-4.417e-03, 6.142e-02, 3.291e-03, 4.601e-06, 9.343e-03, 6.774e-03, 3.549e-02, 1.940e-02, -1.764e-02, 8.100e-02, 4.121e-02, 5.637e-03, 4.980e-02, -3.647e-02, 6.004e-05, 4.063e-02)); + r += mul(s3_1, M4(2.011e-02, -3.297e-02, -2.439e-01, 9.536e-02, -4.038e-02, -1.251e-02, 9.924e-02, -2.949e-02, 1.093e-01, 7.510e-02, -2.881e-02, -2.545e-02, 1.724e-01, 8.464e-02, -7.721e-02, 1.169e-01)); + r += mul(s3_2, M4(2.340e-02, 7.193e-04, -1.792e-01, -6.388e-02, 1.548e-01, 1.248e-01, 9.521e-02, 3.513e-02, 4.711e-02, 1.726e-02, -7.443e-02, 1.201e-02, 1.924e-02, -8.228e-02, -3.082e-02, 1.620e-01)); + r += mul(s3_3, M4(-4.869e-02, 5.506e-02, -1.423e-03, -8.865e-02, 4.620e-02, -3.023e-02, -6.991e-02, -5.063e-02, -5.804e-03, 9.471e-04, -1.476e-02, -7.117e-02, 2.921e-02, -8.528e-02, 9.544e-02, -6.845e-02)); + r += mul(s3_4, M4(-3.153e-02, 2.901e-01, 1.062e-01, -1.654e-01, 3.504e-02, -1.983e-01, 3.474e-02, 2.137e-01, 5.099e-02, 1.739e-02, 1.364e-01, 3.750e-02, -1.250e-02, -4.225e-02, -6.298e-02, 1.174e-01)); + r += mul(s3_5, M4(3.235e-02, 1.014e-01, -4.390e-02, 1.367e-02, -7.041e-02, -8.663e-02, 4.107e-02, -1.313e-01, -5.041e-02, -8.777e-03, 6.259e-02, 2.509e-02, 1.405e-01, 1.588e-01, 2.664e-02, 1.379e-01)); + r += mul(s3_6, M4(4.731e-02, 3.992e-02, 8.726e-02, 8.352e-02, -4.385e-02, 6.694e-04, 5.591e-02, -1.869e-02, 3.434e-02, -4.853e-02, -6.173e-02, -4.227e-02, 1.133e-02, 7.837e-02, -6.929e-02, 5.805e-02)); + r += mul(s3_7, M4(1.449e-01, 1.094e-02, -5.163e-02, 9.452e-02, 1.171e-01, 1.046e-03, 1.260e-01, 1.795e-01, 3.258e-03, 3.548e-03, 2.109e-02, 1.907e-02, 5.014e-02, 4.713e-02, 1.726e-02, -7.078e-02)); + r += mul(s3_8, M4(8.935e-02, -6.422e-02, 6.770e-02, -4.356e-02, -1.583e-02, -1.684e-02, -8.968e-02, 2.203e-02, -1.843e-02, -2.197e-02, -1.469e-02, 1.552e-02, 2.657e-02, -4.394e-03, 4.107e-02, -7.831e-03)); + r += mul(s4_0, M4(7.700e-02, 3.542e-02, 2.845e-02, -6.420e-02, 2.802e-02, 6.201e-02, 1.097e-01, -1.347e-01, 3.497e-02, 9.450e-02, -1.123e-02, 5.075e-02, 8.997e-02, -1.132e-01, -1.337e-02, 5.028e-02)); + r += mul(s4_1, M4(-7.537e-02, 5.639e-02, 1.085e-01, -1.155e-01, 2.222e-01, 6.309e-02, -3.337e-02, -8.426e-02, -4.272e-02, 1.215e-02, -1.528e-01, 6.993e-02, -2.884e-01, -2.062e-01, 2.984e-02, -1.550e-02)); + r += mul(s4_2, M4(4.233e-03, -1.193e-01, 4.585e-02, 1.372e-01, -3.937e-02, -6.613e-02, -4.116e-02, 2.296e-03, 7.788e-02, 3.592e-02, 1.762e-02, -2.034e-01, -8.970e-02, 2.347e-02, 2.314e-02, -5.101e-02)); + r += mul(s4_3, M4(5.040e-02, -6.901e-02, 7.410e-03, -5.935e-02, -1.089e-02, 1.786e-01, 1.902e-01, -3.943e-03, 2.349e-03, -6.481e-02, -4.593e-02, 3.733e-02, 6.607e-02, -1.105e-01, 1.234e-01, -6.116e-02)); + r += mul(s4_4, M4(-1.231e-01, 8.899e-02, 5.861e-02, 6.610e-02, -1.799e-02, -2.101e-02, -4.797e-03, 2.038e-01, -1.418e-01, -3.988e-02, 3.584e-02, -1.958e-01, -2.224e-01, -1.563e-01, -1.337e-02, -1.558e-01)); + r += mul(s4_5, M4(2.220e-01, -1.101e-02, 2.653e-03, 7.280e-02, -3.506e-02, 7.103e-04, 1.225e-01, 3.272e-04, -4.636e-02, 1.115e-01, 4.076e-02, -1.121e-01, 4.149e-02, -4.349e-02, -1.524e-01, 3.738e-02)); + r += mul(s4_6, M4(7.249e-02, -5.538e-03, 3.810e-02, 7.547e-02, -4.347e-02, 3.399e-02, 1.992e-02, -4.980e-02, 1.005e-02, 3.027e-02, -4.584e-02, -2.648e-02, 3.174e-02, 1.282e-01, 3.011e-02, -1.462e-01)); + r += mul(s4_7, M4(1.377e-01, -3.892e-02, 1.709e-02, 1.353e-01, 3.778e-02, -8.988e-02, 1.101e-01, 7.760e-02, 6.029e-02, -1.098e-01, 1.895e-01, -9.690e-02, 2.427e-01, -9.797e-02, -8.529e-02, -5.416e-02)); + r += mul(s4_8, M4(1.343e-01, -7.242e-02, 2.388e-02, 2.345e-02, -3.886e-02, 3.248e-02, 1.910e-02, -8.438e-03, 2.481e-02, -4.870e-02, -6.791e-03, 1.061e-01, 3.995e-02, -4.022e-03, -6.221e-03, -5.024e-02)); + r += mul(s5_0, M4(5.602e-02, -4.308e-02, 9.808e-02, 6.040e-04, 6.339e-02, 5.917e-02, -4.881e-02, -9.820e-02, -4.466e-02, 7.009e-03, 2.818e-02, -3.478e-02, 1.907e-02, -1.026e-01, -2.022e-02, -9.760e-02)); + r += mul(s5_1, M4(-1.070e-01, -1.691e-03, 9.849e-02, 1.522e-01, -7.722e-03, 2.113e-02, -9.818e-02, -5.139e-02, 1.086e-01, -3.740e-02, 5.049e-02, -2.643e-02, 9.843e-03, 4.505e-02, -6.481e-02, 2.022e-02)); + r += mul(s5_2, M4(-9.071e-02, -6.706e-02, -2.061e-02, 1.538e-01, -3.540e-02, 2.135e-02, 9.540e-04, 3.653e-02, 1.507e-01, 1.192e-01, 1.021e-01, -8.306e-03, -4.461e-02, -5.049e-02, -1.124e-01, 9.884e-03)); + r += mul(s5_3, M4(6.294e-02, -1.282e-01, 3.868e-02, -1.814e-01, 4.439e-02, 7.000e-02, -1.362e-01, 7.447e-02, 2.668e-02, 4.819e-02, -5.729e-02, 8.139e-03, -5.443e-02, 9.531e-02, 1.156e-01, 4.085e-02)); + r += mul(s5_4, M4(-8.484e-03, 6.197e-03, 6.633e-02, 1.655e-01, 6.602e-02, -7.951e-02, -1.966e-01, 1.283e-01, -1.011e-01, 4.280e-02, -5.414e-02, -2.545e-01, -1.495e-01, -6.738e-02, 1.571e-02, -1.941e-01)); + r += mul(s5_5, M4(2.349e-01, 9.500e-02, -1.113e-01, -2.583e-02, 2.032e-02, 5.190e-02, -1.364e-01, -4.065e-02, -1.637e-01, 1.754e-02, -2.211e-02, -5.600e-02, 2.849e-01, 5.747e-02, -2.978e-02, 5.666e-02)); + r += mul(s5_6, M4(-6.206e-02, 2.496e-02, 2.480e-02, 3.657e-02, -1.159e-01, 5.506e-02, -1.441e-01, -2.198e-02, 2.306e-03, 9.180e-02, 2.019e-02, -4.232e-04, 2.116e-02, -2.850e-02, -4.118e-02, -3.627e-02)); + r += mul(s5_7, M4(8.919e-02, -4.855e-02, 6.477e-02, 1.439e-01, -6.477e-02, 1.138e-01, -9.273e-02, -2.459e-02, 2.984e-02, -5.195e-02, 5.993e-02, 1.544e-02, 1.065e-01, 5.338e-02, 5.271e-02, 1.000e-01)); + r += mul(s5_8, M4(1.037e-01, -3.917e-02, 1.450e-02, -5.118e-02, -5.068e-02, 3.593e-02, -2.183e-01, -2.920e-02, -8.386e-02, -5.458e-02, 8.985e-02, 4.988e-02, 7.487e-02, 2.530e-02, 5.512e-02, -2.582e-02)); + r += mul(s6_0, M4(-1.468e-02, 3.283e-02, 2.486e-01, -2.172e-02, -3.365e-02, -7.289e-02, -1.009e-01, 3.873e-02, -5.674e-02, -2.549e-03, -4.322e-02, 7.819e-03, 4.291e-02, -9.667e-02, -7.909e-04, -1.967e-01)); + r += mul(s6_1, M4(-2.950e-02, -1.050e-01, 1.905e-01, 3.581e-01, 6.321e-02, -6.692e-02, -6.391e-03, -6.914e-04, 3.219e-02, -4.255e-02, -6.420e-02, -7.836e-02, 2.385e-01, 2.386e-02, 3.324e-02, 5.495e-02)); + r += mul(s6_2, M4(8.648e-02, 9.869e-02, 3.091e-01, -5.345e-02, 7.443e-02, 1.316e-02, -1.180e-02, 3.567e-02, -2.939e-02, 1.190e-02, -5.904e-02, -9.631e-03, 1.171e-01, 1.086e-01, -4.678e-02, 3.794e-02)); + r += mul(s6_3, M4(-4.051e-02, -8.971e-02, -3.028e-01, 1.302e-01, 4.434e-02, -2.039e-02, -9.829e-02, 1.112e-02, 1.413e-01, 2.567e-01, 3.658e-02, -6.832e-02, -1.298e-01, -1.938e-01, 7.153e-02, -1.650e-02)); + r += mul(s6_4, M4(-8.773e-02, -2.883e-01, -4.347e-01, 1.199e-01, -4.540e-02, 3.200e-02, 9.576e-02, 6.254e-03, -1.749e-01, 2.754e-01, -3.059e-01, -8.842e-02, -3.085e-01, -1.033e-01, -4.969e-02, -1.340e-01)); + r += mul(s6_5, M4(-7.059e-02, 3.359e-03, -2.079e-01, 4.492e-02, -7.667e-02, -1.844e-02, 1.917e-01, 1.008e-02, -9.998e-02, -9.386e-02, -1.054e-01, -4.729e-02, -2.093e-01, -9.179e-02, 1.722e-01, 3.993e-02)); + r += mul(s6_6, M4(-7.153e-02, 5.159e-02, 8.474e-02, 1.321e-01, 1.886e-02, -1.753e-02, -5.105e-02, 5.375e-02, 4.419e-02, 1.282e-01, 4.456e-02, 9.137e-02, -4.818e-02, -1.826e-02, -1.290e-01, -3.767e-02)); + r += mul(s6_7, M4(-9.777e-02, 1.020e-01, -1.329e-01, -1.994e-01, 4.367e-02, 2.940e-02, -1.080e-01, 9.001e-02, 3.387e-01, 1.146e-01, 2.638e-01, 9.648e-02, -1.823e-01, -3.080e-02, 1.478e-01, 2.614e-02)); + r += mul(s6_8, M4(-5.921e-02, -3.250e-02, 4.172e-02, -7.936e-02, -5.095e-02, 2.877e-02, -1.530e-01, -9.587e-02, 2.092e-02, 7.210e-02, 1.127e-01, 3.929e-02, -8.320e-02, -3.228e-03, 1.452e-01, 1.284e-01)); + r += mul(s7_0, M4(-5.253e-02, 2.278e-02, 6.018e-02, 9.507e-02, 3.973e-02, 6.387e-02, 6.711e-03, -4.682e-02, -1.063e-01, 4.267e-03, 2.971e-02, -4.539e-02, 1.165e-01, -7.157e-02, 4.141e-02, 1.145e-01)); + r += mul(s7_1, M4(-4.652e-02, 7.815e-02, -1.048e-01, -4.295e-02, 1.362e-01, 6.352e-02, -2.051e-04, 2.946e-02, -1.731e-01, 3.810e-02, -8.881e-02, -2.138e-02, 9.736e-02, -3.249e-02, 6.547e-02, 1.465e-02)); + r += mul(s7_2, M4(-1.197e-01, -2.140e-02, -1.381e-02, -1.613e-02, 9.536e-02, 2.392e-02, -1.832e-02, -4.791e-03, -4.896e-02, -5.699e-03, 9.083e-03, -6.092e-02, 2.628e-02, 6.739e-02, 3.363e-02, 7.787e-02)); + r += mul(s7_3, M4(2.299e-02, -1.116e-01, -1.895e-01, 6.158e-02, -2.903e-02, 1.976e-01, -5.169e-02, -6.359e-03, -1.108e-01, -2.721e-02, -4.755e-02, 1.108e-02, -5.540e-03, 5.188e-02, 4.148e-02, 2.312e-01)); + r += mul(s7_4, M4(5.459e-02, -1.114e-01, -2.955e-01, 2.604e-02, -7.562e-02, 3.799e-02, 7.620e-02, -8.782e-02, -4.982e-01, -4.289e-02, -1.030e-01, 5.294e-02, -5.541e-02, -6.984e-02, 1.854e-02, -2.165e-02)); + r += mul(s7_5, M4(-1.309e-01, 1.373e-01, 1.565e-02, -2.078e-02, -2.317e-02, 4.668e-03, 1.389e-01, -7.272e-02, -1.375e-01, 4.768e-02, 8.811e-02, -1.680e-04, 9.234e-03, -6.011e-04, 1.287e-02, 1.274e-01)); + r += mul(s7_6, M4(-3.351e-02, -1.516e-02, 1.227e-02, 1.531e-02, -3.170e-02, 1.613e-01, 6.721e-02, 6.211e-02, 2.716e-02, -1.389e-01, 3.411e-02, 1.396e-03, -9.898e-02, 6.533e-03, -3.053e-02, -7.764e-03)); + r += mul(s7_7, M4(-5.799e-02, -7.095e-03, -7.659e-02, 1.150e-01, -2.979e-02, 5.477e-02, -1.147e-01, 7.278e-02, -1.017e-01, -9.430e-02, -4.289e-02, 1.574e-01, -8.640e-02, -4.577e-02, -1.199e-01, -1.743e-01)); + r += mul(s7_8, M4(7.061e-02, -4.189e-02, -3.871e-03, -3.726e-02, -2.013e-02, 1.605e-02, -8.725e-02, -6.317e-02, -1.159e-01, -6.920e-02, -9.147e-02, -1.023e-02, 6.410e-03, 3.396e-02, -2.014e-02, 6.373e-02)); + r += V4(5.089e-03, 6.337e-03, -1.003e-03, -8.375e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.675e-02, 5.997e-02, -3.650e-02, -9.469e-02, -1.309e-03, -1.026e-02, -4.568e-02, 1.177e-02, -2.319e-02, 3.435e-02, -4.605e-03, 8.751e-03, -1.265e-01, 7.075e-05, 1.598e-03, -7.128e-03)); + r += mul(s0_1, M4(-3.769e-02, 1.289e-01, 7.292e-02, 2.980e-02, -1.155e-01, -6.349e-02, 7.930e-02, -2.253e-02, -2.302e-02, -2.740e-02, 5.069e-03, 2.801e-02, -1.855e-01, -2.358e-01, -4.332e-02, 4.699e-02)); + r += mul(s0_2, M4(3.070e-02, 8.126e-02, -8.298e-02, 2.904e-03, 8.344e-02, -9.977e-02, -2.161e-01, -1.270e-02, 5.156e-02, -1.070e-01, 3.527e-02, -1.041e-02, -7.620e-02, -1.792e-01, -7.605e-02, -2.338e-02)); + r += mul(s0_3, M4(8.779e-02, 1.977e-02, 8.397e-03, -6.088e-02, -4.488e-02, 3.835e-03, -3.778e-02, 3.318e-04, 2.340e-01, 1.722e-01, -1.464e-01, 2.562e-02, 1.394e-01, -1.146e-01, 3.923e-02, -3.789e-02)); + r += mul(s0_4, M4(-1.314e-01, 4.759e-02, -1.111e-01, -8.904e-02, 2.705e-01, -1.703e-01, 2.401e-01, -3.214e-01, 3.357e-01, -3.734e-02, 1.988e-01, -1.702e-01, 6.567e-02, -3.786e-01, -6.481e-02, 7.642e-02)); + r += mul(s0_5, M4(-2.133e-02, 5.389e-02, -1.925e-02, 1.168e-02, -6.679e-02, -1.289e-01, 1.040e-02, -1.619e-01, 5.680e-02, 4.877e-02, 5.648e-02, 5.265e-02, 1.574e-01, -3.657e-01, -1.724e-01, 6.129e-02)); + r += mul(s0_6, M4(6.074e-03, -7.729e-02, -1.006e-01, -1.152e-02, 9.523e-02, -6.766e-02, -2.787e-03, 9.933e-03, 6.489e-02, -5.330e-02, -8.086e-04, -9.893e-02, 4.494e-02, -1.720e-01, 7.374e-02, 7.742e-02)); + r += mul(s0_7, M4(-1.204e-01, 7.328e-02, 7.396e-02, -1.209e-01, 9.194e-02, -1.228e-01, 4.137e-02, 4.043e-02, -1.156e-01, -2.074e-02, -5.843e-02, -4.110e-03, -2.273e-01, 8.433e-02, -1.024e-02, 3.197e-03)); + r += mul(s0_8, M4(-4.821e-02, -4.134e-02, -1.442e-01, -8.328e-02, 2.852e-02, 3.135e-02, 6.543e-03, 2.640e-02, 4.538e-02, 9.523e-02, 1.585e-02, -9.692e-02, -6.346e-02, -1.823e-01, -4.534e-02, 3.876e-02)); + r += mul(s1_0, M4(2.361e-02, -8.350e-02, 1.339e-01, -7.668e-02, -7.288e-02, 5.647e-02, 2.951e-02, -8.884e-02, -2.109e-02, 6.377e-02, 3.692e-01, -5.184e-02, 6.710e-02, -1.169e-01, 4.998e-02, -1.445e-02)); + r += mul(s1_1, M4(3.144e-02, -8.041e-02, 2.650e-01, 5.868e-02, 3.754e-02, 1.225e-01, -5.726e-02, 2.163e-01, 2.362e-02, 1.440e-01, 1.214e-01, 3.473e-04, -1.772e-01, 1.047e-01, -8.946e-02, 8.762e-02)); + r += mul(s1_2, M4(-1.873e-01, 3.456e-03, -3.806e-02, 9.440e-02, 7.302e-02, 1.553e-02, -4.385e-02, -2.931e-02, -5.164e-02, 1.311e-01, 3.349e-02, -1.530e-02, 1.267e-01, 3.255e-02, 1.438e-02, 2.959e-02)); + r += mul(s1_3, M4(1.597e-01, -1.768e-01, 1.526e-01, 5.055e-02, -5.962e-03, -1.644e-01, -8.546e-02, -4.917e-02, -1.387e-01, -6.411e-02, 4.689e-01, -1.519e-01, 2.620e-02, -2.779e-02, -7.039e-02, -4.236e-02)); + r += mul(s1_4, M4(2.026e-02, -4.697e-01, 9.243e-02, -2.950e-01, 5.595e-02, -1.964e-01, -1.564e-01, -4.191e-01, -3.245e-01, -4.436e-02, 1.247e-01, -2.702e-01, -3.343e-03, -9.253e-03, 1.241e-01, -5.372e-02)); + r += mul(s1_5, M4(-1.612e-01, -3.095e-01, -1.355e-02, 1.140e-01, -1.342e-01, 2.531e-01, 2.424e-01, -2.119e-02, -6.644e-02, -1.501e-02, -3.428e-02, 1.780e-02, -1.017e-01, 7.337e-02, -1.332e-02, 1.211e-01)); + r += mul(s1_6, M4(1.561e-01, -6.944e-02, 4.193e-02, 8.192e-02, 1.577e-01, 1.281e-02, -1.118e-01, -5.691e-02, -2.364e-02, 1.690e-02, 1.312e-01, 8.214e-02, 7.732e-02, 1.035e-01, 3.027e-02, -1.452e-02)); + r += mul(s1_7, M4(1.421e-01, -2.347e-01, 2.118e-02, 1.315e-01, 1.566e-01, 1.144e-01, 1.285e-01, 4.122e-02, -3.384e-02, 8.157e-02, -1.205e-01, 1.959e-02, -6.363e-02, 1.399e-01, 5.847e-02, 4.064e-02)); + r += mul(s1_8, M4(1.111e-02, -8.978e-02, -5.925e-02, -7.568e-02, 1.495e-01, 2.570e-01, -5.507e-04, 2.520e-02, -1.118e-02, -9.439e-02, -6.084e-02, -5.373e-02, -1.533e-03, 1.219e-01, -1.389e-02, -1.508e-01)); + r += mul(s2_0, M4(-4.162e-02, -4.071e-02, -5.766e-02, 5.628e-03, -6.182e-02, 4.116e-02, 1.410e-02, -1.759e-02, 2.086e-01, -1.512e-01, -1.230e-01, -1.002e-01, 4.260e-02, -2.905e-02, 8.458e-02, 1.022e-02)); + r += mul(s2_1, M4(3.019e-02, -1.456e-01, -1.264e-01, -4.691e-02, 7.353e-02, 5.608e-02, -1.380e-01, -4.886e-02, -7.036e-02, 1.277e-01, -1.796e-01, -2.979e-01, -3.455e-01, 1.449e-01, 7.792e-02, -7.989e-02)); + r += mul(s2_2, M4(8.347e-02, -4.100e-03, -3.079e-02, 1.025e-01, 8.068e-02, 1.296e-01, 8.245e-02, 1.703e-02, 1.256e-01, 2.133e-02, 1.571e-01, -8.556e-02, -1.067e-01, -1.455e-01, -2.035e-02, -6.335e-02)); + r += mul(s2_3, M4(-4.614e-02, 7.387e-02, 1.166e-01, -7.077e-02, -2.393e-02, 1.081e-01, -2.396e-01, 8.131e-02, -3.251e-03, -4.704e-02, -7.487e-02, 7.985e-03, -1.166e-01, -4.941e-02, 1.027e-02, 1.565e-01)); + r += mul(s2_4, M4(4.681e-02, -1.264e-02, 1.491e-01, 4.652e-02, -1.520e-03, 2.008e-01, -8.360e-02, -1.407e-01, -6.069e-02, 3.109e-01, -1.926e-02, -1.985e-02, -8.434e-02, -1.484e-02, 1.761e-01, -4.318e-02)); + r += mul(s2_5, M4(-3.128e-01, -4.635e-02, 1.431e-01, 1.348e-01, 1.183e-01, 4.978e-02, 1.522e-01, -9.026e-02, 1.138e-01, 1.307e-01, -4.564e-02, -1.154e-01, -3.911e-03, 8.876e-04, -1.357e-01, -1.134e-01)); + r += mul(s2_6, M4(-2.743e-02, 3.054e-02, 6.684e-02, 9.738e-04, -2.262e-01, 1.640e-01, -3.429e-02, -1.071e-02, -6.768e-02, -2.352e-02, -2.361e-01, -3.020e-01, 1.184e-02, 7.232e-02, 3.476e-02, -9.865e-03)); + r += mul(s2_7, M4(-2.802e-01, -2.529e-01, -1.229e-01, -1.349e-01, -5.267e-02, 1.006e-01, -2.282e-02, -3.943e-02, 3.723e-01, -2.812e-01, 3.149e-01, 1.970e-02, -3.297e-02, -1.841e-01, 9.541e-02, 1.027e-01)); + r += mul(s2_8, M4(1.147e-02, -8.793e-03, 1.588e-01, -1.293e-02, -1.458e-01, 1.161e-01, -2.741e-02, -6.339e-02, 3.385e-02, 6.893e-02, -6.859e-02, -1.301e-01, 4.266e-02, 4.091e-03, 9.147e-03, -6.810e-02)); + r += mul(s3_0, M4(5.698e-02, 8.608e-02, -7.571e-02, -1.704e-02, 7.851e-02, -7.372e-02, 4.605e-02, -3.019e-02, -3.718e-02, -2.448e-02, -3.068e-02, -5.345e-02, 1.398e-01, -1.581e-02, -1.463e-01, -8.957e-02)); + r += mul(s3_1, M4(3.903e-02, 1.542e-02, -1.304e-01, 2.455e-02, 6.567e-02, -2.254e-02, 1.981e-02, 8.340e-02, -8.381e-02, 8.334e-02, -1.282e-01, -6.353e-03, -3.466e-02, 9.503e-02, 2.178e-02, 2.268e-02)); + r += mul(s3_2, M4(7.165e-02, 6.839e-02, -1.454e-02, -2.518e-02, -3.534e-02, 6.600e-02, -3.333e-02, 1.099e-02, -5.149e-02, -5.962e-02, 3.705e-02, 8.507e-03, 1.171e-01, -5.122e-02, 1.308e-01, -7.599e-02)); + r += mul(s3_3, M4(-4.663e-02, -1.340e-02, -1.379e-01, 2.231e-03, -2.979e-02, 4.621e-02, -1.211e-01, -7.539e-02, -1.957e-02, 1.509e-01, -6.523e-03, -1.156e-01, 6.592e-02, -4.868e-02, 6.305e-03, -1.144e-01)); + r += mul(s3_4, M4(2.060e-01, 2.034e-03, -7.575e-03, 1.314e-01, -8.559e-02, -9.837e-02, 5.653e-02, -1.914e-01, 1.889e-02, 3.174e-02, -8.599e-02, 8.587e-02, 2.706e-01, -4.214e-02, 6.817e-02, -3.622e-01)); + r += mul(s3_5, M4(-5.971e-02, 1.780e-01, 1.817e-03, 1.477e-02, 2.373e-02, -2.837e-01, 2.382e-02, -9.675e-02, 1.090e-01, 1.249e-01, 1.647e-01, -2.997e-02, 2.477e-01, 1.275e-01, -6.222e-02, -1.336e-02)); + r += mul(s3_6, M4(-1.423e-02, 6.043e-02, 4.517e-02, -2.434e-02, -2.237e-03, 1.330e-01, -2.076e-02, 5.665e-02, -1.149e-01, -8.142e-02, 1.748e-02, 4.757e-02, 7.014e-02, -4.076e-03, 1.268e-02, 2.145e-02)); + r += mul(s3_7, M4(-1.785e-01, -7.156e-02, -2.492e-02, 4.320e-02, -1.728e-02, 1.034e-02, 5.533e-02, -5.906e-02, -1.259e-03, -1.854e-02, -5.582e-02, 3.797e-03, 3.889e-02, -7.529e-02, 2.445e-02, 8.897e-02)); + r += mul(s3_8, M4(-4.278e-02, 5.844e-02, 1.486e-02, -7.576e-03, -7.155e-03, -1.057e-01, -2.649e-02, -5.033e-03, 3.644e-02, 3.797e-02, 4.688e-02, 3.603e-02, 7.478e-02, 4.200e-02, 4.086e-02, 1.633e-02)); + r += mul(s4_0, M4(2.954e-02, 6.368e-02, 1.124e-02, 1.156e-02, -7.933e-02, 6.929e-02, 1.515e-02, 3.256e-02, -3.112e-02, 1.080e-01, -1.936e-01, -3.785e-02, -9.351e-02, -1.354e-01, 2.461e-01, -5.817e-02)); + r += mul(s4_1, M4(2.363e-02, -2.653e-02, -1.473e-01, 3.505e-02, 2.167e-02, -6.371e-02, 9.751e-02, -2.605e-02, -1.820e-01, -1.218e-01, 9.630e-02, -7.706e-02, -2.210e-01, -1.898e-01, 2.049e-01, -3.112e-02)); + r += mul(s4_2, M4(4.299e-02, 1.021e-01, 8.898e-02, 3.527e-02, -5.120e-02, -2.351e-01, 2.368e-02, 3.834e-02, -4.556e-02, -1.265e-01, -6.203e-02, -2.289e-02, 9.493e-02, -1.414e-02, 6.036e-03, -5.510e-02)); + r += mul(s4_3, M4(-3.797e-02, 3.851e-02, -9.470e-02, 5.490e-02, 6.167e-02, -1.562e-02, 1.131e-01, -9.297e-02, 1.683e-03, 1.295e-02, -6.105e-02, 1.045e-02, -1.353e-01, 1.796e-01, 1.789e-01, -1.213e-01)); + r += mul(s4_4, M4(-8.445e-02, 8.678e-02, -1.243e-01, -2.097e-02, -1.798e-01, 1.405e-01, 1.165e-01, 1.772e-01, 1.905e-01, 4.926e-02, 1.375e-01, -3.092e-02, 2.535e-01, 1.857e-02, -1.840e-01, -2.340e-01)); + r += mul(s4_5, M4(1.488e-01, 1.463e-01, 5.667e-02, -1.390e-02, 3.493e-02, 5.218e-03, 4.365e-02, 7.459e-02, -1.294e-01, -1.785e-01, -1.037e-01, -9.532e-02, -4.308e-02, -7.546e-02, 7.063e-02, -1.180e-02)); + r += mul(s4_6, M4(-5.328e-02, 1.059e-01, -4.184e-02, -2.673e-02, -1.184e-01, 7.588e-02, 7.261e-02, -9.560e-05, -1.074e-01, 3.232e-02, 2.400e-02, -6.284e-02, -1.591e-01, -5.297e-02, 1.021e-01, 1.491e-02)); + r += mul(s4_7, M4(-5.382e-02, -1.088e-01, -2.641e-03, -3.996e-03, -1.735e-01, 1.856e-01, 1.465e-01, -1.005e-01, -5.233e-02, -7.944e-03, 8.574e-02, 3.987e-02, -1.473e-01, -4.357e-01, -1.162e-02, 7.022e-02)); + r += mul(s4_8, M4(1.801e-01, 8.448e-02, 4.178e-02, -3.872e-02, 4.254e-02, -1.712e-01, 3.371e-03, -8.948e-02, 1.224e-02, -9.654e-02, -3.794e-02, 4.958e-02, 8.737e-02, 6.929e-02, -1.333e-01, -2.040e-02)); + r += mul(s5_0, M4(-4.158e-02, 2.599e-02, 7.901e-02, 8.321e-03, -1.736e-01, -6.602e-02, -6.660e-02, 8.860e-02, 1.724e-02, 1.033e-02, -8.451e-02, -2.804e-02, 3.716e-02, 3.139e-02, -1.055e-03, 2.071e-02)); + r += mul(s5_1, M4(2.268e-01, -1.047e-01, -1.858e-01, 5.456e-02, -1.151e-01, -2.588e-01, -3.722e-02, 3.701e-02, 6.879e-02, -8.261e-02, 2.911e-02, 6.549e-02, -2.712e-02, 4.789e-02, 1.068e-01, -1.197e-01)); + r += mul(s5_2, M4(-4.443e-02, -9.052e-02, -4.703e-02, 4.579e-03, -1.075e-01, -1.352e-01, -7.144e-02, -9.541e-03, 1.335e-01, -5.074e-03, 5.284e-02, 2.923e-02, 6.626e-02, 1.579e-03, 4.798e-02, -1.005e-01)); + r += mul(s5_3, M4(-1.143e-01, -1.722e-01, 2.441e-02, 1.310e-01, 3.420e-02, -5.373e-02, 7.689e-02, -4.045e-02, 1.203e-01, -9.565e-02, -7.908e-02, -2.734e-02, 1.031e-01, -3.036e-02, 9.257e-03, -9.268e-02)); + r += mul(s5_4, M4(-3.542e-02, -1.875e-01, 2.537e-02, -2.439e-02, -9.876e-02, -3.396e-02, 6.292e-02, 6.549e-02, 1.529e-01, -4.029e-02, 2.000e-01, 1.193e-01, -2.729e-02, -1.903e-02, -1.803e-01, -1.793e-02)); + r += mul(s5_5, M4(-6.030e-02, -1.222e-01, -1.170e-01, 2.348e-01, 6.808e-02, -3.617e-02, -7.631e-02, -2.720e-02, -2.292e-01, -3.099e-01, -8.020e-03, -1.038e-01, 6.242e-02, 5.190e-02, 8.095e-02, 9.514e-02)); + r += mul(s5_6, M4(6.978e-03, -6.634e-02, 4.558e-02, -5.632e-02, 3.886e-02, 1.042e-01, 6.360e-03, 1.132e-01, -5.098e-02, 8.961e-02, 9.910e-02, 8.475e-02, 8.224e-02, -4.271e-02, -7.546e-02, 2.300e-03)); + r += mul(s5_7, M4(6.222e-02, -3.369e-01, -6.016e-02, -2.080e-01, -1.476e-01, 1.600e-01, -9.421e-02, 1.922e-03, 6.097e-02, 1.039e-01, 6.280e-02, -4.231e-02, 3.617e-02, -2.071e-01, -2.099e-02, -7.936e-02)); + r += mul(s5_8, M4(1.362e-02, -2.364e-01, -2.881e-02, -1.893e-01, -4.383e-02, -5.227e-02, -6.600e-02, -2.155e-02, 1.962e-02, -4.751e-05, -1.398e-01, 9.889e-02, 1.032e-01, 2.337e-02, 2.391e-02, 4.492e-02)); + r += mul(s6_0, M4(3.002e-02, -2.149e-01, -5.255e-02, -1.344e-01, 5.155e-03, -4.274e-02, -1.978e-02, -2.494e-02, -7.873e-03, -4.412e-02, 2.502e-02, -4.448e-02, 2.398e-01, 1.624e-01, -9.287e-02, -6.755e-02)); + r += mul(s6_1, M4(-1.541e-02, 1.594e-01, 2.824e-02, 1.672e-01, 1.304e-01, -3.634e-02, -1.861e-02, 2.413e-02, 8.032e-02, 5.426e-02, 7.488e-02, -4.554e-02, 1.686e-01, 2.354e-01, 2.844e-01, -8.371e-02)); + r += mul(s6_2, M4(-2.267e-02, -7.544e-02, -8.038e-02, 1.154e-01, 1.699e-02, 8.058e-02, 7.575e-03, 4.182e-03, 2.275e-02, 8.964e-02, -7.309e-03, -7.329e-02, 7.492e-02, 1.014e-01, -4.596e-03, -2.045e-02)); + r += mul(s6_3, M4(2.319e-01, 3.723e-02, -1.344e-01, 1.486e-01, 4.493e-02, -4.327e-02, -8.266e-02, 3.615e-02, -1.576e-01, 2.074e-02, -5.127e-02, 3.548e-02, -5.083e-03, 2.821e-01, 1.122e-01, -7.263e-03)); + r += mul(s6_4, M4(-2.149e-03, -2.494e-01, -9.982e-02, -8.301e-01, 1.845e-01, -1.506e-01, -3.571e-03, -2.393e-02, -1.104e-01, -5.359e-02, -2.173e-02, 4.126e-02, 1.100e-01, -4.346e-02, 2.896e-01, 8.233e-03)); + r += mul(s6_5, M4(-8.545e-02, -7.732e-02, 7.867e-02, -2.736e-01, 1.708e-01, -1.131e-02, -3.363e-02, 3.079e-02, -1.554e-01, -4.239e-02, 5.462e-02, -1.106e-01, 4.246e-02, 9.369e-02, 1.026e-02, -2.496e-02)); + r += mul(s6_6, M4(-1.443e-01, 2.616e-02, 7.678e-02, -3.301e-02, -1.494e-04, -2.239e-02, -8.535e-02, 2.077e-02, -2.918e-01, 5.707e-02, 9.960e-02, 1.440e-02, 2.924e-02, 1.643e-02, -2.174e-02, 8.667e-03)); + r += mul(s6_7, M4(2.497e-01, 3.967e-02, -7.436e-02, 4.780e-01, 1.139e-01, 6.077e-02, 3.787e-02, 3.072e-02, 4.866e-02, -3.114e-02, 2.210e-01, 5.498e-01, 2.244e-03, 1.709e-01, 1.689e-01, 1.226e-01)); + r += mul(s6_8, M4(-9.497e-02, 1.582e-01, 9.806e-02, 2.241e-01, 1.024e-01, 1.254e-01, 9.498e-02, -7.761e-03, -1.223e-02, 6.641e-02, 1.096e-01, 6.125e-03, 3.307e-02, -2.369e-02, -4.217e-02, -1.282e-01)); + r += mul(s7_0, M4(-4.745e-02, 6.797e-02, -7.921e-02, -2.684e-02, -4.855e-02, -6.523e-02, 6.867e-02, -7.047e-02, 7.915e-02, 4.574e-02, -2.760e-02, -1.326e-02, 1.022e-02, -3.943e-02, 1.583e-01, -4.302e-02)); + r += mul(s7_1, M4(-1.258e-01, -6.023e-02, -3.607e-02, 3.000e-02, -4.323e-02, 1.399e-01, -3.115e-02, -7.780e-03, 1.587e-01, -1.995e-02, -1.057e-02, -3.390e-02, -1.875e-02, -7.517e-02, 8.273e-02, -1.273e-02)); + r += mul(s7_2, M4(-9.371e-02, 1.472e-01, 7.151e-02, -2.068e-02, -6.272e-02, 1.100e-01, -3.908e-03, -3.746e-02, 1.264e-01, -2.117e-03, -5.027e-03, 9.286e-03, 1.609e-03, -6.072e-02, 7.644e-05, -7.595e-02)); + r += mul(s7_3, M4(-8.126e-02, 1.954e-02, 1.255e-01, 1.604e-02, -4.537e-02, -1.292e-01, 8.680e-02, -1.509e-02, -3.346e-04, -5.120e-03, 2.588e-02, 1.256e-03, 1.354e-01, 9.971e-03, 3.504e-02, 1.009e-02)); + r += mul(s7_4, M4(-1.952e-04, -2.518e-01, -1.969e-01, 1.067e-01, -1.505e-01, 7.530e-02, 1.211e-01, -7.975e-02, -4.654e-02, -1.149e-01, -8.681e-02, 3.955e-02, 1.141e-01, -1.718e-01, 1.252e-01, -8.142e-02)); + r += mul(s7_5, M4(-6.035e-02, 5.333e-02, -4.389e-02, -9.822e-02, -7.485e-02, 7.244e-02, 7.671e-03, -4.813e-02, 1.363e-01, -3.805e-02, -1.642e-01, -1.544e-01, -3.469e-02, 6.749e-02, -5.906e-02, -1.140e-02)); + r += mul(s7_6, M4(6.610e-02, -1.710e-02, -3.299e-02, -7.538e-02, -1.988e-01, -1.846e-02, 8.273e-02, 1.009e-01, 1.600e-01, 1.211e-02, 5.993e-02, -5.980e-02, -9.680e-02, 9.439e-02, 1.498e-01, 1.416e-01)); + r += mul(s7_7, M4(2.026e-02, -7.128e-02, -1.125e-01, -6.044e-02, -1.754e-01, 7.820e-02, -4.586e-02, 1.090e-01, 1.076e-02, -9.473e-02, -1.519e-02, 7.060e-02, -1.306e-01, 2.420e-01, -4.872e-02, 1.552e-01)); + r += mul(s7_8, M4(-4.903e-02, -4.213e-02, 6.470e-02, -2.639e-02, -1.248e-01, -3.860e-02, 6.273e-02, 4.119e-03, 1.416e-01, 1.482e-01, -1.337e-01, -1.764e-01, -6.911e-02, 4.784e-02, 5.185e-02, 1.931e-02)); + r += V4(-1.894e-02, -4.402e-03, -8.438e-03, -1.903e-03); + return r; +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 6 +//!DESC conv5 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.983e-02, -2.355e-02, 1.340e-02, -1.287e-02, 2.803e-02, -2.711e-02, 3.019e-02, -3.417e-03, 2.946e-01, 2.738e-01, -8.508e-02, 2.910e-01, -6.676e-02, -9.997e-02, 2.409e-02, -7.472e-05)); + r += mul(s0_1, M4(-1.825e-02, 8.480e-02, 4.182e-02, -8.584e-02, -1.625e-02, -2.023e-02, -6.646e-03, -3.011e-02, -5.927e-02, 1.397e-01, -7.290e-02, 1.314e-01, -1.138e-01, -8.384e-02, -8.820e-02, 8.265e-02)); + r += mul(s0_2, M4(-2.600e-02, 2.241e-02, -5.769e-02, -8.343e-02, 1.166e-02, -2.480e-02, 3.902e-02, 5.080e-02, 7.858e-02, 5.016e-02, 5.021e-02, 9.969e-03, -1.482e-01, -4.674e-03, -1.362e-01, -3.118e-03)); + r += mul(s0_3, M4(-9.168e-02, 7.943e-02, -6.831e-03, -4.480e-02, 1.472e-01, -5.284e-02, -1.962e-01, 1.886e-01, 1.174e-01, -1.334e-01, 3.624e-04, 2.994e-01, 3.111e-03, -9.108e-02, 9.959e-02, 6.819e-02)); + r += mul(s0_4, M4(1.782e-01, -3.958e-02, 5.934e-02, 3.938e-02, 5.210e-02, -1.230e-02, 3.478e-01, -3.377e-02, -7.085e-01, 1.658e-01, -3.095e-03, -9.614e-02, 1.435e-01, -2.333e-01, -5.669e-02, 4.046e-02)); + r += mul(s0_5, M4(-1.679e-03, -3.512e-02, -1.010e-01, -2.660e-02, -2.707e-02, -3.091e-02, 9.248e-02, -6.013e-02, -2.270e-03, -4.490e-02, 2.436e-01, 1.211e-01, -1.776e-01, -3.132e-02, -9.672e-02, 2.008e-01)); + r += mul(s0_6, M4(-1.127e-01, -2.387e-02, -8.957e-02, -4.969e-02, 1.061e-01, 7.138e-02, -1.068e-01, -5.554e-02, 3.265e-01, 6.205e-03, -2.548e-01, -9.893e-02, -1.051e-01, -5.722e-03, -3.117e-02, -9.438e-02)); + r += mul(s0_7, M4(-3.578e-02, -2.550e-03, -2.800e-02, 9.891e-02, -1.086e-01, 1.480e-01, -7.757e-02, 4.708e-02, -1.695e-01, 3.290e-01, -5.393e-03, -1.031e-01, -1.637e-01, 1.105e-03, 8.458e-02, -6.238e-02)); + r += mul(s0_8, M4(-7.570e-02, -1.015e-02, 8.985e-02, 1.137e-01, 4.670e-02, 2.523e-02, -2.495e-02, 6.150e-02, 1.182e-01, -1.322e-01, 7.917e-02, -8.437e-02, -5.996e-02, -1.705e-01, -2.269e-02, 5.902e-02)); + r += mul(s1_0, M4(-3.108e-02, 2.095e-02, -5.984e-02, -3.504e-02, 6.117e-02, 5.307e-02, 6.236e-03, -9.603e-02, -2.684e-02, -2.362e-02, -2.378e-02, -2.466e-02, 1.570e-02, -3.570e-02, 1.274e-02, -6.502e-03)); + r += mul(s1_1, M4(-1.283e-03, 4.384e-02, 8.365e-02, 5.915e-02, -1.145e-02, 2.392e-02, 1.088e-01, -1.762e-03, 4.372e-02, -2.140e-02, 4.264e-02, 9.236e-03, -4.373e-03, -4.429e-02, 9.449e-02, -3.923e-02)); + r += mul(s1_2, M4(-1.221e-02, 9.479e-02, 4.346e-02, 2.486e-02, 4.054e-02, -4.108e-02, -2.336e-02, 2.922e-02, -1.015e-01, 4.149e-02, -9.165e-04, -1.976e-02, 2.947e-02, 3.191e-02, 1.560e-01, -1.711e-01)); + r += mul(s1_3, M4(6.241e-02, 6.151e-02, 1.526e-02, -1.081e-01, -7.364e-02, 9.332e-02, -1.636e-01, 2.652e-01, -4.666e-02, -4.395e-02, 2.264e-02, -4.261e-02, 2.372e-02, 2.705e-02, -1.008e-02, 7.214e-02)); + r += mul(s1_4, M4(5.696e-02, -7.886e-02, 7.643e-02, -8.079e-02, 1.814e-01, -8.703e-02, 1.432e-01, -6.011e-02, -5.641e-02, -1.646e-01, 3.240e-02, -5.445e-02, 5.017e-02, -2.545e-02, -2.747e-02, -2.010e-01)); + r += mul(s1_5, M4(1.828e-03, -6.594e-02, 1.822e-02, -4.988e-02, 7.417e-03, -6.251e-02, 4.977e-02, -7.883e-02, -3.799e-02, -6.548e-02, -9.739e-02, -6.066e-02, 7.457e-02, 7.262e-02, -1.766e-01, -8.015e-02)); + r += mul(s1_6, M4(9.728e-02, 8.458e-03, -1.198e-02, -7.956e-02, -1.125e-01, 8.212e-04, -3.966e-02, 1.108e-01, 1.952e-02, -1.352e-02, -1.405e-02, -1.585e-02, -2.641e-02, -2.669e-02, 4.201e-02, 3.363e-02)); + r += mul(s1_7, M4(6.647e-02, 3.102e-02, 1.156e-01, -3.595e-03, -3.543e-03, 6.918e-02, -1.841e-01, 2.141e-01, 3.876e-02, 1.731e-02, 8.944e-02, -6.585e-02, 1.066e-01, 4.440e-02, 1.549e-01, 1.150e-01)); + r += mul(s1_8, M4(1.277e-01, -6.115e-02, 1.196e-01, 1.267e-02, -1.131e-02, -1.013e-03, -7.094e-02, -6.453e-02, 9.931e-02, 4.298e-02, 3.554e-02, -2.879e-02, -2.796e-02, 1.079e-01, -6.519e-02, -7.899e-05)); + r += mul(s2_0, M4(3.688e-02, -1.057e-01, 6.859e-02, 7.128e-03, -9.767e-02, -2.339e-01, -1.277e-03, -6.880e-02, 2.195e-02, 5.017e-02, 3.168e-02, -3.514e-02, 5.195e-03, -1.414e-01, 6.491e-03, -9.037e-02)); + r += mul(s2_1, M4(6.062e-02, 6.298e-02, 1.971e-02, 1.479e-02, 2.941e-02, -1.413e-01, -9.835e-02, 1.221e-01, 1.035e-01, 1.188e-01, 9.886e-02, -2.821e-02, -9.335e-02, -1.124e-01, 1.724e-01, -2.172e-01)); + r += mul(s2_2, M4(9.788e-02, -1.209e-02, 1.114e-01, 5.620e-02, -5.509e-02, -5.775e-02, -2.744e-01, -6.316e-02, -4.517e-02, -8.469e-03, 9.252e-02, -7.269e-02, -4.882e-02, -1.046e-01, 3.074e-02, -1.737e-01)); + r += mul(s2_3, M4(1.225e-02, 8.615e-03, 1.241e-01, -3.521e-02, 1.214e-01, -1.194e-01, 2.046e-01, -1.198e-01, -1.477e-01, 1.091e-01, -8.267e-02, -1.965e-02, -4.767e-02, -4.121e-02, -1.680e-02, -1.344e-01)); + r += mul(s2_4, M4(1.174e-01, -4.352e-02, -6.449e-02, -4.885e-02, 2.678e-01, -4.427e-01, 3.447e-01, -2.581e-01, -4.840e-01, 4.853e-01, -1.646e-01, 1.738e-01, -1.628e-02, 4.239e-02, 1.317e-01, -3.075e-01)); + r += mul(s2_5, M4(3.840e-02, -1.366e-01, -2.506e-02, -6.418e-02, 1.866e-01, -1.816e-01, 1.662e-01, -6.389e-02, -8.916e-02, 9.377e-03, 5.231e-02, -1.304e-01, -5.023e-02, -7.681e-02, 1.138e-02, -1.651e-01)); + r += mul(s2_6, M4(4.548e-02, 2.773e-02, 6.133e-02, -8.995e-03, 2.604e-02, 3.399e-02, 4.119e-02, -5.129e-02, -7.591e-02, -7.953e-02, -6.499e-03, 9.256e-02, 8.176e-02, -7.033e-02, -2.963e-02, -1.108e-01)); + r += mul(s2_7, M4(5.637e-02, 2.460e-02, 4.969e-03, 1.028e-02, 9.972e-02, -8.314e-03, 1.056e-01, -8.911e-02, -2.204e-01, -2.369e-02, -1.037e-01, 1.372e-01, 1.047e-01, -6.154e-02, -3.977e-02, -1.519e-01)); + r += mul(s2_8, M4(6.608e-02, 2.095e-02, -7.517e-02, -6.470e-02, 1.252e-02, 3.683e-02, -9.879e-03, -7.856e-02, -1.072e-01, -7.476e-03, 4.455e-02, 3.734e-02, 6.658e-02, -1.017e-01, -1.125e-01, -1.040e-01)); + r += mul(s3_0, M4(5.096e-02, 2.713e-02, -2.334e-02, 6.541e-02, 2.906e-02, 6.274e-02, -8.113e-02, 1.571e-02, -5.359e-02, 2.073e-02, 3.527e-02, -3.818e-02, -3.931e-02, 6.263e-02, -5.998e-02, -1.589e-02)); + r += mul(s3_1, M4(6.973e-02, 1.830e-01, -7.406e-02, 1.496e-01, -3.793e-02, 1.698e-01, -7.125e-02, -6.870e-03, 1.689e-02, 2.680e-02, -1.572e-02, -3.051e-03, -2.107e-02, 3.672e-02, 5.379e-03, 9.172e-02)); + r += mul(s3_2, M4(-1.318e-01, 8.081e-02, -1.517e-01, -4.654e-02, -2.576e-02, 4.760e-02, -2.348e-02, 4.061e-02, -3.277e-02, -3.141e-02, 2.763e-02, -1.285e-01, -4.142e-02, -5.539e-02, -2.869e-02, -1.739e-02)); + r += mul(s3_3, M4(1.962e-02, -2.933e-02, -3.957e-02, 9.155e-02, -1.313e-02, 1.458e-01, 9.674e-02, 9.480e-02, -1.406e-01, -2.761e-02, 3.372e-02, -3.480e-02, -4.371e-02, 3.017e-02, -7.958e-02, 3.795e-02)); + r += mul(s3_4, M4(1.400e-01, 2.186e-01, -2.147e-01, 4.190e-02, -1.207e-01, -1.876e-02, 1.263e-01, -2.033e-01, -1.781e-01, 1.664e-01, -3.067e-01, 2.183e-01, -6.203e-02, -4.359e-03, 1.373e-01, 5.447e-02)); + r += mul(s3_5, M4(-1.102e-01, -2.220e-02, -2.002e-02, -4.875e-02, 8.470e-02, -4.010e-02, 2.741e-02, -4.627e-02, 3.194e-02, -4.281e-02, 3.218e-02, -6.617e-02, -2.043e-02, 4.356e-02, -9.002e-02, 7.398e-02)); + r += mul(s3_6, M4(-1.732e-02, -4.082e-02, -9.573e-03, -1.600e-02, -1.415e-02, -3.000e-02, -9.384e-04, 2.921e-02, -1.245e-01, -4.522e-02, 1.950e-02, -3.415e-02, -4.652e-02, -4.946e-02, -9.613e-02, -5.485e-02)); + r += mul(s3_7, M4(-1.953e-02, -1.297e-02, -7.540e-02, -5.647e-02, 7.540e-02, -1.910e-02, -1.929e-02, 7.800e-03, -7.194e-02, 2.666e-02, 1.040e-02, 5.431e-02, 1.307e-01, -7.904e-02, -9.697e-02, 3.888e-02)); + r += mul(s3_8, M4(-1.090e-02, 4.749e-02, -4.702e-02, 2.379e-02, 6.314e-02, 1.123e-01, 1.127e-02, -8.094e-03, 2.719e-02, 3.356e-02, 6.262e-02, 3.288e-02, -8.982e-02, -1.419e-02, -9.496e-02, -8.593e-03)); + r += mul(s4_0, M4(5.229e-03, -1.874e-02, -1.448e-02, -1.926e-02, -4.822e-02, 5.543e-02, -2.640e-02, -2.112e-02, -3.138e-02, 4.573e-02, -1.124e-01, -1.924e-02, 2.307e-02, 7.430e-02, -1.039e-01, -2.323e-02)); + r += mul(s4_1, M4(1.936e-02, -2.353e-02, -1.491e-02, -2.211e-02, 1.228e-02, 2.580e-02, 4.658e-02, 5.566e-02, 5.166e-02, 1.327e-01, -2.065e-01, 9.453e-02, -3.007e-02, -2.316e-01, -4.635e-02, -1.694e-01)); + r += mul(s4_2, M4(-1.102e-02, 7.086e-02, 5.652e-02, -1.799e-02, 7.982e-02, 7.910e-02, 6.309e-02, 6.261e-02, 4.725e-02, -6.972e-02, -9.501e-02, -6.557e-02, 5.831e-02, -1.474e-01, 1.304e-01, 3.218e-02)); + r += mul(s4_3, M4(-9.656e-02, -8.320e-02, 9.073e-02, -1.961e-01, -4.695e-02, -3.599e-02, 6.276e-02, -7.496e-02, 8.017e-02, -6.791e-02, -1.330e-01, 8.306e-02, 1.257e-01, 5.050e-02, 6.683e-02, 2.878e-01)); + r += mul(s4_4, M4(-4.799e-02, 1.671e-02, -2.723e-01, 3.626e-01, 1.262e-01, 9.708e-02, -3.368e-01, -2.745e-01, 2.264e-01, -9.542e-02, -3.528e-02, 8.713e-02, -1.441e-01, 2.150e-02, 1.147e-02, -1.302e-03)); + r += mul(s4_5, M4(-3.114e-02, 2.848e-02, -2.716e-02, 4.827e-02, -7.087e-03, 2.009e-01, -7.128e-03, 7.284e-02, 6.485e-02, -5.746e-02, 6.051e-02, -1.055e-01, 8.923e-02, -2.384e-01, 2.393e-01, -1.864e-02)); + r += mul(s4_6, M4(1.962e-02, 3.224e-02, 8.620e-02, 4.632e-02, -5.171e-02, 2.773e-02, 1.982e-02, -1.368e-02, 7.032e-02, 1.549e-02, 2.737e-03, -2.220e-02, 1.133e-01, -5.129e-02, -6.812e-02, 2.359e-02)); + r += mul(s4_7, M4(-4.866e-02, 6.495e-02, -2.213e-02, -5.636e-02, 4.421e-02, 2.423e-02, -2.990e-02, 1.080e-01, -8.973e-02, 6.921e-02, -1.620e-02, -9.782e-02, -1.811e-01, 7.043e-02, -3.606e-02, -2.599e-01)); + r += mul(s4_8, M4(3.589e-02, 4.545e-02, 7.728e-02, 3.590e-04, 2.634e-02, 9.737e-02, 1.026e-01, 9.209e-02, -1.511e-02, -6.577e-02, 3.313e-02, -1.148e-01, 1.270e-01, -2.888e-02, -7.045e-02, -1.277e-01)); + r += mul(s5_0, M4(-3.020e-02, -5.183e-03, 5.261e-02, -4.663e-02, -4.414e-03, 9.873e-02, -4.187e-02, -1.327e-02, 7.123e-03, -1.098e-02, -5.922e-04, 7.608e-02, -4.072e-02, -8.271e-02, 2.753e-02, 3.148e-02)); + r += mul(s5_1, M4(6.317e-03, -5.983e-02, 1.294e-01, 1.844e-02, 4.390e-02, 6.267e-02, -1.477e-02, 1.219e-01, 2.467e-01, -2.997e-02, -2.403e-02, -2.382e-02, 8.718e-02, -7.586e-02, 2.001e-02, 5.484e-03)); + r += mul(s5_2, M4(-5.760e-02, 6.528e-02, 1.958e-02, -5.853e-02, 8.200e-02, -2.746e-02, -8.542e-02, -1.933e-02, -1.067e-01, -2.666e-02, -7.423e-03, -4.112e-05, 1.513e-01, -8.296e-02, -4.183e-02, 6.055e-02)); + r += mul(s5_3, M4(-8.693e-02, -9.612e-02, -3.179e-02, -1.840e-01, 2.673e-03, -1.627e-01, 1.450e-01, 1.381e-02, -1.577e-02, -4.878e-02, -3.985e-02, 5.845e-02, -1.944e-02, 3.944e-02, 2.018e-02, -6.841e-02)); + r += mul(s5_4, M4(3.629e-02, 7.570e-02, -2.611e-01, 3.883e-01, 2.307e-02, -7.597e-02, 5.077e-02, -9.529e-02, 7.492e-02, 5.572e-02, 3.370e-02, 5.929e-02, -8.509e-03, 1.385e-01, -1.515e-01, 5.605e-02)); + r += mul(s5_5, M4(-1.791e-02, -6.816e-02, -3.925e-02, 1.986e-03, 8.331e-02, -9.256e-03, 7.505e-03, -6.509e-05, 4.694e-03, 1.398e-02, 6.624e-02, -6.567e-02, -1.356e-01, 4.696e-02, 4.942e-02, 1.464e-01)); + r += mul(s5_6, M4(5.126e-02, 2.097e-02, -5.698e-02, 4.155e-02, 1.072e-01, -1.438e-01, -4.168e-02, -3.538e-02, 8.435e-03, -1.050e-01, 5.704e-02, -9.189e-03, -5.236e-02, -5.514e-02, 9.633e-03, -5.798e-02)); + r += mul(s5_7, M4(-1.121e-01, 3.105e-02, -9.268e-02, -2.804e-02, -3.624e-04, -2.704e-02, 2.646e-02, 5.227e-03, 1.228e-01, -7.355e-02, -1.755e-01, -1.466e-01, -6.640e-02, -5.099e-02, 1.853e-02, -8.626e-02)); + r += mul(s5_8, M4(5.962e-02, 2.513e-02, 2.069e-03, 4.396e-03, -7.240e-02, 9.301e-02, -1.609e-02, -4.861e-02, 5.102e-02, -3.210e-02, 7.308e-02, -3.986e-02, -5.460e-02, -2.136e-02, -8.392e-02, -2.622e-02)); + r += mul(s6_0, M4(3.397e-02, 1.469e-01, 1.022e-02, 3.374e-02, -6.321e-02, -3.230e-02, -5.053e-02, 2.549e-02, 3.016e-02, 5.150e-02, 9.509e-02, 2.499e-02, 7.613e-02, 1.003e-01, 5.365e-02, 1.140e-01)); + r += mul(s6_1, M4(1.483e-02, 2.225e-02, -7.262e-02, -3.496e-02, 1.585e-02, 1.828e-01, -9.560e-02, 1.770e-01, -6.260e-02, 8.682e-03, -7.969e-03, -8.573e-02, -4.427e-03, 1.680e-02, -4.615e-02, -3.744e-02)); + r += mul(s6_2, M4(-1.174e-01, 4.881e-02, -1.090e-01, 3.989e-02, 7.840e-02, 5.978e-02, -7.178e-02, 9.298e-02, 3.747e-02, -8.806e-02, 3.627e-02, -6.363e-02, 1.044e-01, -1.821e-02, 1.555e-01, 1.578e-01)); + r += mul(s6_3, M4(-3.267e-02, -3.088e-02, 6.056e-02, 5.069e-02, -8.374e-03, -2.427e-03, -1.422e-01, -2.646e-02, -1.065e-01, -6.290e-03, -2.208e-02, -1.302e-02, 1.189e-01, 2.485e-01, -7.258e-02, 7.887e-02)); + r += mul(s6_4, M4(8.752e-02, -8.758e-02, 3.225e-02, 4.662e-03, -5.213e-02, 3.250e-02, -7.458e-02, 1.800e-02, -8.516e-02, 1.490e-01, -4.668e-02, 2.436e-02, -2.587e-01, 3.084e-01, -3.252e-01, -1.146e-01)); + r += mul(s6_5, M4(-1.196e-01, 5.511e-02, -9.120e-02, 5.974e-02, -8.223e-02, -7.963e-03, -7.138e-02, 7.335e-02, -5.691e-02, 2.994e-01, -1.789e-01, -1.437e-01, -2.558e-02, -1.812e-01, -3.163e-02, 7.934e-04)); + r += mul(s6_6, M4(-1.534e-02, 1.289e-02, 5.497e-02, -2.597e-02, -1.122e-03, 5.876e-02, -7.101e-02, 3.765e-02, -8.104e-02, 2.093e-02, 4.369e-02, -2.960e-03, 1.265e-01, -2.050e-02, 1.893e-02, 6.858e-02)); + r += mul(s6_7, M4(2.254e-01, 2.685e-02, 7.641e-02, 6.965e-02, -9.506e-02, -4.696e-03, -7.411e-02, 1.041e-01, 7.990e-02, 2.651e-03, 1.266e-01, 1.861e-01, 1.269e-01, -8.610e-02, -3.560e-02, 5.815e-02)); + r += mul(s6_8, M4(-9.611e-02, 1.951e-03, -2.302e-02, 4.085e-02, -8.276e-02, -2.860e-02, -5.000e-02, 7.451e-02, -1.413e-01, 1.717e-02, -6.044e-02, 5.176e-02, 9.992e-02, -4.323e-02, 2.536e-02, 3.481e-02)); + r += mul(s7_0, M4(1.046e-02, 1.490e-02, -8.906e-02, -2.217e-02, -9.614e-02, 3.381e-02, -2.320e-02, -5.408e-02, 5.732e-02, -2.322e-02, 6.762e-02, 2.361e-02, -1.171e-01, -2.232e-02, -7.652e-02, -5.135e-02)); + r += mul(s7_1, M4(-2.535e-03, -3.717e-02, -2.368e-02, 7.242e-02, 2.283e-02, 1.875e-02, -7.956e-02, 2.926e-02, -1.207e-02, -6.466e-02, -5.971e-02, -4.580e-02, 7.839e-02, 4.433e-02, -1.667e-02, -3.645e-02)); + r += mul(s7_2, M4(-4.826e-02, -1.186e-02, -1.457e-02, 7.339e-02, 7.090e-03, -1.427e-02, -5.455e-02, 1.983e-02, 6.506e-02, -6.326e-02, 3.217e-02, -3.848e-02, -5.468e-02, -2.149e-02, -8.942e-02, -1.877e-02)); + r += mul(s7_3, M4(-7.380e-02, -6.317e-02, 9.416e-02, 8.520e-02, -5.836e-03, -8.181e-03, 1.923e-02, -5.096e-02, -9.014e-02, 1.214e-02, 1.116e-02, -6.535e-02, -1.251e-01, 1.161e-01, 7.658e-02, -9.742e-02)); + r += mul(s7_4, M4(1.341e-02, -2.221e-02, 1.310e-01, 1.086e-01, 3.843e-02, -7.049e-03, 1.505e-01, -1.371e-01, -6.582e-02, -1.022e-01, 1.110e-01, -3.664e-02, -1.304e-01, 1.503e-02, -7.246e-02, -6.176e-02)); + r += mul(s7_5, M4(-9.001e-02, -4.611e-02, 8.131e-03, 1.494e-01, 4.858e-02, -4.241e-02, 7.965e-02, 5.774e-02, 1.301e-01, 8.733e-03, 1.785e-01, 4.300e-02, 5.781e-02, -8.571e-02, -4.702e-03, -9.600e-03)); + r += mul(s7_6, M4(2.557e-03, -5.550e-02, -4.600e-02, -2.987e-02, 2.553e-02, 4.577e-02, -1.413e-02, 7.015e-02, 7.875e-03, 4.624e-02, -3.063e-02, -1.460e-02, -9.625e-02, 4.864e-02, 7.717e-02, 1.122e-02)); + r += mul(s7_7, M4(2.427e-02, 1.764e-03, -8.690e-04, -3.449e-02, 1.256e-01, -5.344e-03, -6.869e-02, 3.021e-02, 4.185e-02, -2.832e-02, 1.056e-01, -2.813e-02, -7.792e-02, -1.077e-02, 1.005e-01, 3.086e-02)); + r += mul(s7_8, M4(-3.092e-03, -6.802e-02, 6.346e-02, -2.153e-02, -7.435e-03, 1.288e-02, -2.567e-02, 4.306e-02, -2.163e-02, 4.064e-02, 1.108e-01, -4.732e-02, -2.121e-03, -4.824e-02, 2.181e-02, 3.273e-02)); + r += V4(-8.272e-03, -2.557e-04, 6.658e-03, 1.107e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.291e-03, -3.309e-02, -2.467e-01, -3.733e-02, -5.110e-02, 6.313e-02, -2.229e-02, 6.136e-03, 8.936e-02, 5.282e-03, 2.755e-01, 5.059e-01, -6.946e-02, 4.384e-02, -3.811e-02, -6.313e-02)); + r += mul(s0_1, M4(1.246e-02, -1.170e-02, 3.099e-03, -4.764e-02, -5.165e-02, -2.684e-02, -1.382e-01, 4.533e-02, -4.507e-02, -2.643e-02, -5.564e-01, 1.367e-01, 1.495e-02, 5.881e-02, -6.815e-02, 1.318e-02)); + r += mul(s0_2, M4(8.372e-02, -6.137e-03, 8.760e-02, 9.903e-02, 3.475e-02, 6.949e-02, -4.691e-02, -8.269e-02, 2.272e-01, -7.034e-02, -2.833e-01, 2.207e-02, -1.049e-01, 2.166e-02, 5.872e-02, -1.278e-01)); + r += mul(s0_3, M4(-1.501e-02, 1.160e-01, -1.171e-01, -9.724e-02, -3.094e-02, 3.841e-02, 1.923e-02, 2.305e-01, 6.338e-02, 6.327e-02, 3.315e-01, 3.623e-01, -5.208e-02, 9.519e-02, -2.070e-01, 2.181e-02)); + r += mul(s0_4, M4(-8.023e-02, 3.626e-02, -3.083e-02, 1.394e-01, -1.228e-01, -4.644e-01, -1.909e-01, -7.613e-02, -3.076e-02, -2.855e-01, 4.138e-02, -5.659e-01, -1.743e-01, 1.125e-01, -1.574e-01, 8.071e-02)); + r += mul(s0_5, M4(-3.689e-03, 5.895e-02, 1.111e-01, 2.136e-01, 5.732e-03, 8.751e-02, -7.304e-02, 1.102e-01, 4.004e-02, 1.096e-01, 2.202e-01, 1.658e-01, 8.041e-02, 9.342e-02, -2.770e-01, -8.961e-02)); + r += mul(s0_6, M4(3.158e-02, 2.862e-02, -7.400e-02, 7.428e-03, 7.282e-02, -2.117e-01, 1.888e-02, 4.067e-02, 6.130e-02, -7.229e-03, 1.495e-01, -1.791e-01, -6.976e-02, -1.617e-02, 1.593e-02, -6.792e-03)); + r += mul(s0_7, M4(6.011e-02, 1.272e-01, 9.626e-03, 3.926e-02, -1.353e-01, -2.315e-01, 4.421e-02, 7.008e-02, 9.128e-02, -4.335e-01, -2.076e-01, 3.413e-01, -5.586e-02, 6.942e-02, 1.589e-01, 2.176e-02)); + r += mul(s0_8, M4(-3.488e-02, 1.162e-01, -1.783e-01, -1.375e-01, -1.052e-02, 7.387e-03, -2.240e-02, 3.897e-02, -5.279e-02, -6.690e-02, 8.762e-03, 2.620e-01, -1.395e-02, -3.752e-02, -6.188e-03, -5.247e-03)); + r += mul(s1_0, M4(2.785e-02, 3.127e-02, 3.176e-04, -5.409e-03, -2.951e-02, -2.531e-02, -4.053e-02, 7.329e-02, 2.876e-02, -3.404e-02, 1.595e-02, -1.918e-02, -1.012e-02, -4.033e-02, 3.814e-02, 8.836e-02)); + r += mul(s1_1, M4(-4.760e-02, 9.454e-02, -1.390e-02, 7.537e-04, -2.286e-02, -9.383e-02, -7.304e-02, 3.868e-02, 6.908e-02, 4.284e-02, -6.230e-02, -1.251e-03, -4.050e-03, -2.775e-02, -4.178e-02, -7.595e-03)); + r += mul(s1_2, M4(5.763e-02, 6.505e-03, -2.169e-02, -3.739e-02, -1.116e-01, 6.552e-02, 1.033e-01, -7.501e-02, -2.211e-02, -3.400e-02, -2.577e-01, -5.478e-02, -3.244e-03, -2.211e-01, 1.557e-01, -5.314e-03)); + r += mul(s1_3, M4(4.992e-02, -7.075e-02, -3.430e-03, -1.137e-02, -1.171e-01, 3.794e-02, 5.846e-02, 2.164e-01, 2.679e-02, -1.327e-02, -1.268e-02, -5.339e-02, 1.704e-02, 3.727e-02, 6.319e-03, -2.865e-03)); + r += mul(s1_4, M4(-1.487e-01, -1.586e-01, -9.926e-02, 4.898e-03, -8.301e-02, 3.593e-01, 2.840e-03, 3.870e-01, 1.008e-01, -8.199e-02, 1.092e-01, -3.774e-02, 6.955e-02, 4.101e-02, 1.254e-01, -8.251e-02)); + r += mul(s1_5, M4(1.882e-02, -2.076e-02, 9.574e-02, 1.333e-03, -4.918e-03, -1.920e-01, 2.487e-02, -2.706e-01, 4.526e-02, 7.234e-02, -3.801e-02, 1.536e-01, 1.891e-02, 3.662e-02, -6.406e-03, 1.036e-01)); + r += mul(s1_6, M4(1.646e-01, -7.632e-02, 6.412e-02, -3.754e-02, -5.181e-02, -2.892e-01, 1.465e-01, -7.335e-02, 9.862e-03, 2.385e-02, -1.057e-01, 3.245e-02, 3.830e-02, -1.001e-01, 1.262e-01, -7.832e-02)); + r += mul(s1_7, M4(5.726e-02, -6.863e-02, -7.450e-02, -3.658e-02, -9.617e-02, -4.115e-02, 6.957e-02, 1.118e-01, 5.442e-02, 1.292e-01, 1.318e-01, 4.223e-02, -6.430e-03, -1.448e-02, 8.713e-02, 1.552e-01)); + r += mul(s1_8, M4(-8.153e-02, 7.037e-02, -2.301e-01, -1.310e-02, -1.227e-01, 4.840e-02, 6.444e-02, -2.853e-02, -1.863e-02, 1.027e-02, -2.087e-02, 7.927e-02, 1.077e-01, -9.496e-02, 6.346e-02, -9.540e-03)); + r += mul(s2_0, M4(-7.252e-03, 3.289e-02, -1.045e-02, -6.928e-02, -3.508e-02, 3.188e-02, 6.015e-02, -1.932e-01, -3.900e-02, -8.703e-02, -3.339e-02, 4.936e-02, 8.343e-02, -4.920e-02, -1.612e-02, 1.044e-01)); + r += mul(s2_1, M4(-6.517e-02, 4.847e-03, 2.091e-02, 5.731e-03, -3.055e-02, 9.127e-02, 1.281e-01, -3.861e-01, -1.372e-01, 5.403e-02, 1.831e-02, 1.047e-01, 2.894e-02, -2.678e-02, 3.758e-03, 1.354e-01)); + r += mul(s2_2, M4(9.071e-02, -8.716e-02, -1.136e-01, 3.664e-02, 5.630e-02, -9.930e-02, -1.636e-02, -1.948e-01, 9.435e-03, 1.274e-02, -6.383e-02, -1.053e-01, -1.528e-02, -7.607e-02, 4.260e-02, -2.555e-02)); + r += mul(s2_3, M4(1.587e-02, -1.370e-01, 2.455e-02, -1.010e-01, 9.949e-02, -8.042e-02, -1.017e-01, -1.325e-01, -9.916e-02, 3.663e-02, -1.898e-02, 5.855e-02, -3.613e-03, 1.490e-02, -8.589e-02, 1.047e-01)); + r += mul(s2_4, M4(-3.639e-02, 2.064e-01, 1.610e-01, 1.304e-01, -2.004e-01, -6.326e-02, 6.047e-03, -2.861e-02, 4.202e-02, -5.227e-02, 6.733e-01, 5.908e-02, 8.132e-02, 1.850e-01, -1.535e-01, 5.053e-02)); + r += mul(s2_5, M4(-2.291e-01, 3.613e-02, 5.034e-02, -6.692e-02, 7.868e-02, 9.361e-02, -2.765e-02, -1.248e-01, -2.660e-01, 6.824e-02, -7.938e-03, 1.081e-01, -6.854e-02, 1.499e-01, 2.329e-01, -1.853e-02)); + r += mul(s2_6, M4(-3.399e-02, -6.068e-02, 5.757e-02, 7.576e-03, 6.779e-03, 3.723e-02, 1.359e-01, 4.284e-02, -2.183e-02, -8.148e-02, 1.455e-01, 4.367e-02, 6.087e-02, -1.987e-02, -1.896e-03, 1.028e-01)); + r += mul(s2_7, M4(-3.554e-02, -1.274e-01, 7.710e-02, 1.122e-01, -9.602e-02, 1.558e-01, 2.125e-02, -1.126e-01, -3.836e-02, 1.923e-01, 1.389e-01, -1.328e-01, 9.033e-02, 8.393e-02, -7.585e-02, 1.295e-01)); + r += mul(s2_8, M4(8.556e-02, 3.137e-02, 4.851e-03, -1.591e-02, -2.889e-02, 4.844e-02, -1.649e-01, -2.942e-03, -2.426e-01, -2.373e-02, 3.365e-02, 8.732e-02, 9.597e-02, 1.103e-02, 1.947e-01, 1.305e-01)); + r += mul(s3_0, M4(-3.671e-02, 6.794e-02, 6.830e-02, 5.933e-02, 1.526e-02, -2.671e-02, 3.779e-02, 1.246e-01, 1.483e-03, -3.035e-02, 1.430e-02, -3.135e-02, -2.275e-02, 4.349e-03, -5.725e-02, -2.111e-02)); + r += mul(s3_1, M4(-2.024e-01, 5.901e-02, 5.708e-02, 1.054e-01, 2.930e-02, -2.702e-02, -3.093e-02, -1.245e-01, -3.232e-02, -2.342e-02, 3.634e-02, 7.495e-03, -9.151e-03, 1.306e-02, 1.040e-01, -8.540e-03)); + r += mul(s3_2, M4(-3.154e-01, -1.633e-02, -1.445e-01, 9.304e-02, -3.271e-02, -7.434e-04, -7.387e-02, 3.626e-02, 2.005e-02, 1.719e-02, 6.899e-02, -4.114e-02, -7.850e-02, 6.077e-02, 2.796e-02, 2.726e-02)); + r += mul(s3_3, M4(-3.014e-02, -1.029e-01, -3.616e-02, 2.436e-02, -1.202e-02, 5.488e-03, -1.213e-01, 7.005e-02, 5.867e-02, 1.751e-02, -2.769e-01, 1.044e-01, -8.570e-02, 2.694e-03, -1.806e-01, 2.735e-02)); + r += mul(s3_4, M4(4.306e-03, -1.465e-01, 1.886e-02, 1.922e-01, -6.173e-02, 1.205e-01, 7.775e-02, -1.166e-01, 5.807e-02, -1.333e-01, 8.454e-02, 9.130e-02, 1.484e-03, 1.367e-01, -1.749e-01, 8.416e-02)); + r += mul(s3_5, M4(-1.530e-01, 1.928e-01, -7.508e-02, 1.057e-01, 2.565e-02, -1.256e-01, -1.364e-01, 1.981e-01, -5.027e-02, 9.177e-02, -7.396e-02, 7.215e-02, -7.897e-02, -1.941e-03, -1.516e-01, -1.162e-01)); + r += mul(s3_6, M4(-4.693e-02, 4.573e-02, 7.748e-02, -6.776e-02, -7.843e-03, 2.080e-02, 1.602e-01, 3.083e-02, 5.916e-02, -3.198e-03, 8.122e-02, -2.043e-02, -2.682e-02, -7.973e-03, -9.084e-02, -1.076e-01)); + r += mul(s3_7, M4(-1.377e-01, -5.352e-02, 2.281e-01, 5.663e-02, 2.000e-02, 8.774e-02, 8.066e-02, -1.324e-01, 1.974e-02, 8.534e-02, -9.524e-02, -7.202e-02, 7.866e-02, -3.841e-02, -1.220e-01, 1.124e-01)); + r += mul(s3_8, M4(-2.706e-02, -4.287e-02, 5.619e-02, 5.859e-02, -1.406e-02, 1.458e-02, -9.863e-02, 4.974e-02, -2.573e-02, 1.474e-01, -1.410e-01, 3.558e-02, 9.707e-02, 4.624e-03, -4.750e-02, -6.996e-02)); + r += mul(s4_0, M4(-9.143e-03, -4.070e-02, 3.796e-02, -1.124e-01, 5.682e-03, -5.952e-03, -1.083e-02, 5.149e-02, -3.908e-02, 8.725e-02, 7.219e-02, 2.210e-01, 2.867e-02, -5.124e-02, 1.472e-01, -1.298e-02)); + r += mul(s4_1, M4(8.129e-02, -5.610e-02, -1.131e-01, -7.804e-02, -6.142e-02, 4.405e-02, -1.935e-02, -7.470e-02, 3.230e-02, -1.014e-01, 5.993e-02, -6.655e-02, 1.223e-01, 5.139e-02, 7.857e-02, -5.123e-02)); + r += mul(s4_2, M4(-1.356e-02, -3.369e-02, -6.812e-02, -3.916e-02, 1.548e-02, -5.267e-02, -7.823e-02, -5.235e-02, -1.383e-01, 1.316e-02, 2.379e-01, 1.717e-01, -1.081e-01, 2.928e-02, 9.720e-02, -1.537e-01)); + r += mul(s4_3, M4(2.175e-02, -2.189e-02, 6.430e-02, -3.381e-02, 1.401e-01, -1.494e-01, -2.787e-02, -1.198e-02, -4.582e-04, 7.047e-02, -1.555e-02, 1.178e-01, 8.088e-02, 3.321e-02, 2.701e-02, 4.858e-02)); + r += mul(s4_4, M4(5.293e-02, -1.604e-01, -1.621e-01, -9.211e-03, 2.066e-02, -1.176e-02, 6.895e-02, 2.245e-01, -1.504e-01, -1.978e-01, -1.014e-01, 2.641e-01, -1.521e-01, -7.876e-02, -1.239e-01, -2.509e-01)); + r += mul(s4_5, M4(1.149e-01, 7.713e-02, 5.451e-03, -1.498e-03, -5.268e-02, -4.282e-01, -1.254e-01, 4.745e-02, 3.676e-01, -4.390e-02, 6.719e-02, -1.942e-01, -2.018e-01, 1.954e-02, 3.925e-02, 2.230e-02)); + r += mul(s4_6, M4(1.009e-01, -8.919e-02, -1.311e-02, 1.212e-01, 5.160e-02, -5.236e-02, -1.207e-02, 3.179e-02, 5.818e-02, 3.536e-02, 5.943e-02, -1.230e-02, -9.296e-02, 1.911e-02, 3.187e-02, -6.943e-02)); + r += mul(s4_7, M4(5.399e-02, 1.614e-01, -1.053e-02, -1.500e-01, 4.298e-02, -1.918e-01, 6.961e-02, -3.480e-02, 8.393e-02, 2.346e-01, -1.064e-01, -3.027e-01, -1.800e-01, -9.699e-02, -3.132e-01, -1.252e-01)); + r += mul(s4_8, M4(-3.151e-02, -2.081e-02, 1.231e-01, 7.380e-02, 1.128e-01, -4.889e-01, 2.393e-02, 9.431e-02, -1.470e-01, 1.966e-01, 8.645e-02, 1.004e-01, -1.134e-01, -5.037e-02, -1.599e-02, 5.272e-02)); + r += mul(s5_0, M4(3.013e-03, -1.013e-01, -4.385e-02, 4.259e-02, -6.318e-03, -1.260e-02, -4.536e-02, -6.414e-03, 1.995e-02, 7.380e-02, -9.842e-02, -4.764e-03, -7.561e-03, 3.450e-02, -7.110e-02, -6.934e-02)); + r += mul(s5_1, M4(5.377e-02, -6.271e-03, -7.884e-03, -1.168e-01, -3.067e-02, 7.240e-03, 1.859e-02, -2.578e-02, 1.713e-02, -2.894e-02, 1.178e-01, 1.695e-01, -2.749e-03, 2.910e-02, 6.388e-02, 2.339e-02)); + r += mul(s5_2, M4(5.237e-02, -8.001e-02, -8.870e-02, -4.603e-02, -4.446e-02, 5.708e-02, 1.001e-01, 2.091e-01, -9.143e-02, 3.154e-02, -1.637e-01, 6.044e-02, 2.701e-02, -1.251e-02, 3.416e-02, -7.120e-02)); + r += mul(s5_3, M4(6.492e-02, 3.277e-02, -4.320e-02, -9.589e-02, 9.444e-02, -7.879e-02, 4.590e-02, -1.423e-01, -3.251e-02, -1.204e-01, -1.721e-02, -1.954e-02, -5.756e-02, 3.336e-02, 2.164e-02, -1.029e-02)); + r += mul(s5_4, M4(8.779e-02, -1.304e-01, 1.074e-01, 2.571e-01, -4.797e-02, 9.547e-02, 1.564e-01, -6.865e-02, 4.324e-02, 2.505e-01, 6.845e-02, -1.870e-01, 5.440e-02, -1.351e-01, -2.531e-02, 1.165e-01)); + r += mul(s5_5, M4(-1.558e-02, 1.486e-01, 1.116e-01, -1.995e-02, -4.122e-02, -3.351e-01, -7.674e-02, -1.872e-01, -1.542e-01, 8.164e-03, -1.037e-01, 5.730e-02, -8.803e-02, 3.506e-02, 1.483e-01, -8.270e-03)); + r += mul(s5_6, M4(4.533e-02, 1.590e-02, -2.368e-01, 1.415e-01, 1.681e-02, 1.097e-02, 5.881e-02, 1.226e-01, 4.352e-03, 2.580e-02, 1.681e-01, -1.145e-01, -6.715e-02, 3.188e-04, 1.465e-01, 2.430e-02)); + r += mul(s5_7, M4(-3.316e-02, -2.123e-01, -1.365e-01, 1.252e-02, 4.186e-02, 3.447e-02, 6.215e-02, -4.931e-03, -5.371e-02, -1.396e-01, -2.083e-02, 8.467e-03, -5.557e-02, 9.560e-02, 7.489e-02, 3.044e-02)); + r += mul(s5_8, M4(4.188e-02, -2.360e-02, -7.651e-04, -3.802e-02, 2.662e-02, 3.421e-02, -1.124e-01, -1.103e-01, 5.888e-02, 4.408e-02, 2.304e-02, -4.045e-02, -3.763e-02, -2.086e-01, 2.852e-02, 6.561e-02)); + r += mul(s6_0, M4(2.000e-03, -3.707e-02, 6.655e-02, -9.570e-02, -1.103e-01, 3.724e-02, 3.844e-02, -4.925e-02, -5.481e-02, -4.484e-02, -5.163e-02, -1.044e-02, 1.085e-03, -5.087e-02, 5.953e-02, -1.942e-02)); + r += mul(s6_1, M4(7.659e-02, -6.084e-02, 3.926e-02, 6.993e-02, -5.095e-02, 5.799e-02, -2.354e-01, -1.374e-02, -3.596e-02, 5.061e-02, -6.000e-02, -2.160e-02, 1.592e-02, -7.282e-02, -1.824e-02, -3.632e-02)); + r += mul(s6_2, M4(-1.971e-02, 9.811e-02, -5.877e-02, 3.986e-02, -2.436e-02, 2.289e-04, -4.199e-02, 6.084e-02, 4.541e-02, 4.178e-02, 1.270e-01, -6.550e-02, 7.945e-02, 4.713e-02, 4.875e-02, -8.960e-02)); + r += mul(s6_3, M4(7.449e-02, -1.146e-01, -3.359e-03, -1.997e-02, -2.627e-02, 5.933e-02, 1.181e-02, 3.691e-02, 7.721e-02, 1.114e-01, 1.744e-01, -2.290e-01, 1.149e-02, 1.007e-02, 2.065e-01, 1.243e-01)); + r += mul(s6_4, M4(1.933e-01, -1.209e-01, 3.074e-01, -1.103e-01, -1.163e-01, 1.550e-01, -3.422e-01, -4.443e-02, 2.270e-01, -8.393e-03, 6.525e-02, -2.852e-01, -6.272e-02, 5.415e-02, 4.149e-02, 1.192e-02)); + r += mul(s6_5, M4(8.516e-02, -1.365e-01, -1.232e-01, 1.896e-03, 5.019e-02, -1.542e-01, -2.283e-01, -1.782e-01, 1.130e-01, -4.613e-02, -1.606e-01, 2.634e-02, -2.544e-01, -6.516e-02, 1.625e-01, -1.134e-01)); + r += mul(s6_6, M4(-5.353e-02, -3.659e-03, -2.821e-02, 5.288e-02, 5.496e-02, 2.354e-02, -7.293e-02, 5.376e-02, -1.864e-02, -2.116e-02, 1.268e-01, 2.539e-02, 4.690e-02, -2.525e-02, -8.503e-02, -3.469e-02)); + r += mul(s6_7, M4(1.192e-01, 4.570e-02, 2.053e-01, 6.541e-02, -6.350e-02, -1.007e-01, -2.017e-01, -7.632e-02, 1.655e-01, 1.507e-01, -9.662e-03, 7.911e-02, 8.390e-02, -1.917e-01, 4.232e-02, 1.134e-01)); + r += mul(s6_8, M4(1.551e-02, 1.855e-01, -9.961e-02, -1.690e-01, 2.262e-02, -3.772e-02, -6.411e-02, -5.053e-02, 9.622e-02, -4.271e-02, 1.404e-01, 7.263e-02, 1.561e-01, 5.037e-03, -2.265e-02, -7.068e-02)); + r += mul(s7_0, M4(3.013e-02, 2.365e-02, 2.057e-01, -1.254e-02, 1.532e-03, 3.604e-02, -2.406e-03, -1.678e-02, 1.235e-02, -7.218e-03, 7.051e-02, -3.434e-02, -9.820e-03, -1.599e-05, -3.336e-02, -1.208e-01)); + r += mul(s7_1, M4(1.232e-03, 2.311e-02, 1.745e-02, -1.300e-01, 4.110e-02, 4.932e-02, 2.509e-02, 9.283e-02, -6.518e-02, -4.350e-02, 7.861e-02, 2.999e-02, -1.083e-02, 4.835e-02, -8.659e-02, 1.901e-02)); + r += mul(s7_2, M4(-1.039e-01, 5.171e-02, 9.599e-03, -5.878e-02, -5.113e-02, 4.864e-02, 9.085e-02, 4.473e-02, 8.764e-02, -6.118e-02, 1.407e-01, 1.097e-01, -4.434e-02, -4.787e-03, -1.495e-01, -2.378e-02)); + r += mul(s7_3, M4(-7.828e-02, 1.726e-02, -1.521e-01, 8.278e-02, -3.601e-02, 2.918e-03, 1.109e-01, -2.525e-02, -6.621e-02, 2.801e-02, -7.083e-02, -9.028e-02, 7.347e-03, -1.125e-02, -4.711e-02, 8.305e-02)); + r += mul(s7_4, M4(-2.757e-02, -2.611e-01, -2.337e-01, -1.758e-01, 2.819e-02, 1.947e-01, 1.002e-02, -1.656e-01, -1.142e-01, -1.469e-01, -1.394e-01, -1.329e-01, 2.162e-02, 2.422e-01, 1.128e-01, -2.176e-01)); + r += mul(s7_5, M4(4.132e-03, -8.471e-02, -2.148e-01, -1.064e-01, -5.095e-02, 2.288e-02, -1.660e-01, -1.214e-01, 1.449e-01, -2.335e-01, -4.872e-02, -6.141e-03, -1.364e-01, 1.272e-01, -4.421e-02, 1.197e-02)); + r += mul(s7_6, M4(-2.372e-02, 3.105e-02, 1.059e-01, 1.354e-02, 3.583e-02, -3.911e-02, -1.577e-01, 8.424e-02, 7.771e-02, 1.798e-02, -5.473e-02, 1.030e-01, -4.950e-02, 6.024e-02, -1.077e-01, 3.528e-02)); + r += mul(s7_7, M4(8.236e-02, -4.677e-02, -9.013e-02, 9.874e-03, 3.090e-02, -1.172e-01, -9.468e-03, 2.097e-02, 1.017e-01, -8.313e-02, -7.684e-02, 1.709e-01, -8.242e-02, -5.089e-03, 1.613e-02, -4.114e-02)); + r += mul(s7_8, M4(-1.382e-02, 6.332e-02, -8.293e-02, -8.057e-02, 5.590e-02, -1.382e-03, 1.901e-02, -2.149e-02, 6.884e-02, 4.025e-02, 9.417e-02, 4.098e-02, -6.400e-02, 4.939e-02, 2.244e-02, -2.732e-02)); + r += V4(7.346e-03, -4.905e-03, 3.653e-03, -1.770e-02); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.541e-02, 1.657e-01, 5.189e-02, 4.130e-02, -1.890e-02, -1.867e-01, 6.755e-03, 3.576e-02, -2.349e-01, -3.670e-01, 1.333e-01, 1.520e-01, 5.807e-02, -9.119e-02, -5.037e-02, 1.544e-02)); + r += mul(s0_1, M4(-2.454e-02, 5.878e-02, -1.489e-03, -2.959e-02, 7.611e-02, -6.616e-02, -7.461e-02, -7.549e-02, 1.804e-01, 1.865e-02, -2.235e-01, 1.794e-01, 4.243e-02, -6.137e-02, -1.085e-01, 7.175e-02)); + r += mul(s0_2, M4(2.578e-02, -2.126e-02, -2.135e-02, 3.540e-02, -1.959e-02, 9.802e-02, 3.496e-03, 5.008e-03, -1.620e-01, -9.267e-02, 4.342e-02, 1.210e-02, 1.633e-01, -5.267e-02, -3.548e-02, 5.545e-02)); + r += mul(s0_3, M4(1.066e-01, 1.287e-02, 1.572e-01, 1.008e-01, -2.488e-01, -3.298e-01, -3.237e-02, 4.881e-02, 1.514e-01, -6.376e-01, 1.029e-01, 5.323e-03, 1.547e-01, 3.549e-02, -1.129e-01, 8.727e-02)); + r += mul(s0_4, M4(6.195e-02, 5.402e-02, -7.368e-02, 3.011e-03, -5.653e-02, -6.636e-03, 1.059e-02, -1.094e-01, 3.677e-01, -2.147e-01, 7.097e-03, 6.449e-02, -1.228e-01, 1.789e-02, -8.807e-02, 1.331e-01)); + r += mul(s0_5, M4(1.190e-01, 1.595e-02, 5.008e-02, 1.302e-02, -8.819e-02, 5.241e-02, -1.451e-02, 2.478e-02, -2.438e-02, 2.972e-01, -1.010e-01, -1.309e-02, -4.738e-02, 3.030e-02, -4.334e-02, 1.129e-01)); + r += mul(s0_6, M4(-1.379e-01, -1.129e-01, -7.080e-02, 1.190e-01, -2.756e-03, -3.091e-02, -1.385e-01, 4.875e-02, 2.495e-01, 7.972e-02, -1.587e-01, -2.658e-02, 2.143e-01, 6.147e-02, -5.138e-02, 8.032e-02)); + r += mul(s0_7, M4(-6.998e-02, -2.174e-04, 9.370e-02, 5.419e-02, 1.114e-01, -4.153e-02, -6.582e-02, 1.344e-03, -2.531e-01, -3.355e-01, -3.122e-03, 4.636e-02, 4.160e-02, 6.323e-03, -9.927e-02, 7.349e-02)); + r += mul(s0_8, M4(1.713e-02, 3.632e-02, 3.580e-02, 6.532e-02, -7.740e-02, 6.009e-02, -7.090e-02, -6.667e-02, 2.703e-01, 3.117e-01, -3.105e-01, -1.004e-01, 1.942e-01, 7.943e-02, -5.937e-02, 1.907e-04)); + r += mul(s1_0, M4(-6.210e-02, -1.937e-01, 9.403e-03, 8.441e-03, 4.634e-02, 1.566e-01, 2.479e-03, 1.181e-01, -2.323e-02, -6.377e-02, 3.282e-02, -2.346e-02, -1.981e-02, -1.853e-02, -2.167e-02, -1.601e-02)); + r += mul(s1_1, M4(-1.900e-02, 4.846e-02, -1.145e-01, -3.643e-02, 9.235e-02, -7.407e-02, 3.720e-02, -3.503e-02, -8.768e-02, 4.017e-02, -1.982e-03, 3.914e-02, -1.236e-01, 1.869e-01, 8.603e-02, -2.607e-02)); + r += mul(s1_2, M4(-1.153e-02, -4.062e-02, -2.601e-02, 2.952e-02, 6.171e-02, 2.022e-02, 3.942e-03, -4.038e-02, 1.993e-02, -6.770e-02, -9.243e-03, -5.448e-03, 2.202e-02, -2.069e-02, -3.895e-02, 3.428e-02)); + r += mul(s1_3, M4(-3.642e-03, -3.672e-01, 8.635e-02, 4.287e-03, 1.548e-01, -1.348e-01, 4.053e-01, 9.992e-02, 1.639e-02, -1.043e-01, -1.408e-01, -8.160e-02, 1.151e-02, -6.836e-02, -1.649e-01, 9.633e-02)); + r += mul(s1_4, M4(8.913e-02, -1.053e-01, -2.394e-01, -5.300e-02, -2.605e-01, 1.961e-01, -2.876e-01, 9.147e-03, 5.131e-02, 4.177e-03, -1.395e-02, 1.537e-02, 1.370e-01, 2.980e-01, 5.745e-02, -4.793e-02)); + r += mul(s1_5, M4(3.644e-02, 3.103e-02, -6.005e-02, -6.355e-02, 1.551e-02, -1.944e-01, 7.983e-02, 4.173e-02, 1.034e-01, 1.462e-01, -7.341e-02, -7.670e-03, 7.584e-02, 8.634e-02, 8.598e-02, 4.030e-02)); + r += mul(s1_6, M4(-1.251e-01, -1.384e-01, -1.873e-01, 8.910e-02, 1.206e-01, 9.330e-02, 1.335e-01, 7.109e-02, -3.307e-02, -6.496e-02, -9.494e-02, -5.701e-03, 5.430e-02, 1.328e-02, 5.097e-02, 4.453e-02)); + r += mul(s1_7, M4(-6.189e-02, -5.578e-02, 1.039e-01, 2.118e-01, 2.626e-02, 9.107e-02, -8.293e-02, -1.334e-01, -5.788e-02, 2.230e-02, 8.005e-02, -9.271e-04, -1.776e-01, 1.208e-01, -1.381e-01, -2.869e-01)); + r += mul(s1_8, M4(1.049e-01, 9.033e-03, -9.022e-02, 8.736e-02, -5.168e-02, 1.815e-03, 2.325e-01, 3.552e-03, -5.869e-03, 1.311e-02, 5.271e-04, 8.746e-03, -1.477e-01, -1.359e-01, 1.665e-01, -1.557e-01)); + r += mul(s2_0, M4(6.086e-03, 2.242e-02, 1.090e-01, 7.514e-04, 1.319e-01, -1.659e-01, 9.627e-02, -1.201e-02, -3.408e-02, 9.862e-02, 5.550e-02, -3.620e-02, -4.180e-02, 8.080e-02, 4.050e-02, 1.541e-01)); + r += mul(s2_1, M4(-1.202e-01, -2.337e-02, 1.652e-02, 1.014e-01, 6.757e-02, -2.003e-01, -2.529e-01, 1.267e-01, -2.596e-02, 2.547e-02, -1.586e-01, -2.041e-02, -2.650e-02, 5.746e-02, -4.375e-02, 1.520e-01)); + r += mul(s2_2, M4(-1.836e-02, 3.823e-02, -2.390e-02, -1.450e-02, 2.732e-02, -1.365e-01, 2.013e-02, -3.332e-02, 4.141e-02, -4.937e-02, 5.944e-02, 3.866e-02, 8.445e-02, -8.183e-02, 4.818e-02, 1.139e-01)); + r += mul(s2_3, M4(1.174e-01, 6.777e-02, 1.279e-02, 3.960e-02, -2.694e-02, 1.882e-01, -1.899e-02, 3.758e-02, 7.822e-02, 4.007e-01, 1.465e-01, 1.781e-01, 3.478e-02, 5.389e-02, -6.454e-04, 1.149e-01)); + r += mul(s2_4, M4(-3.450e-03, -1.621e-01, -3.191e-02, 1.023e-01, -2.158e-01, -1.803e-03, 5.940e-02, 2.468e-01, 4.695e-01, -2.193e-01, 5.147e-02, -1.365e-01, -3.636e-02, -3.669e-02, -2.557e-01, 8.754e-02)); + r += mul(s2_5, M4(4.038e-02, 1.148e-01, -5.276e-02, 4.902e-02, -3.413e-02, 4.901e-02, -5.960e-02, 3.716e-02, 3.228e-02, 1.062e-02, 1.174e-01, -3.452e-02, 1.159e-01, -1.491e-02, -4.849e-02, 5.492e-02)); + r += mul(s2_6, M4(3.433e-02, -1.013e-03, -5.464e-02, -6.439e-02, -1.780e-02, -4.620e-03, 5.071e-02, 1.898e-02, -1.033e-02, 3.266e-02, 2.992e-02, -1.400e-01, 1.042e-01, 2.634e-02, -2.643e-02, 6.895e-02)); + r += mul(s2_7, M4(6.651e-04, 1.780e-03, 3.300e-02, -5.542e-02, 3.230e-02, -4.599e-02, 9.108e-02, 4.865e-02, 1.771e-01, 1.276e-02, 2.705e-01, 1.487e-01, 6.958e-02, 3.214e-02, -1.242e-01, -1.647e-02)); + r += mul(s2_8, M4(-2.757e-02, 4.429e-03, -1.220e-02, -1.905e-02, 2.517e-02, 3.895e-02, -5.111e-02, 1.087e-01, 3.492e-03, 4.499e-02, 5.900e-02, -1.582e-02, 1.012e-01, -3.097e-02, -1.988e-01, 3.916e-02)); + r += mul(s3_0, M4(-2.900e-04, -5.983e-02, 3.252e-03, 2.125e-02, -4.191e-02, 6.632e-02, -1.033e-01, -5.799e-02, -2.119e-02, -2.728e-02, -5.169e-02, 2.993e-02, -2.283e-02, 2.173e-02, -3.010e-02, 2.619e-02)); + r += mul(s3_1, M4(2.626e-02, 2.380e-02, 7.962e-02, 3.137e-02, 5.138e-02, -4.827e-02, 8.346e-02, -1.010e-01, 4.306e-02, -1.095e-01, -2.155e-02, -8.967e-02, 2.533e-02, -1.816e-02, 2.855e-02, 2.077e-02)); + r += mul(s3_2, M4(2.225e-02, 5.791e-02, 6.161e-02, 5.779e-03, -5.488e-02, -3.480e-02, -6.262e-03, 5.816e-02, 9.009e-03, -4.501e-04, 6.241e-02, 2.831e-03, 2.386e-02, 2.153e-02, 4.770e-02, -6.131e-03)); + r += mul(s3_3, M4(8.755e-02, -7.699e-02, -5.070e-02, 1.162e-01, -1.126e-01, 1.112e-01, 3.857e-02, 1.604e-02, -4.325e-02, 1.515e-01, -3.948e-02, 1.030e-01, -5.669e-02, -6.762e-02, -2.610e-02, -4.522e-02)); + r += mul(s3_4, M4(-1.948e-01, -6.880e-02, -6.947e-02, 1.657e-01, 7.452e-02, -6.016e-02, 1.256e-01, 1.398e-01, 5.748e-02, -1.528e-01, 3.831e-02, -8.106e-03, -1.903e-01, 1.807e-01, -1.225e-01, 1.406e-01)); + r += mul(s3_5, M4(-2.452e-02, 2.292e-01, 1.474e-01, 7.133e-02, -1.047e-01, 6.568e-02, -1.582e-02, -1.310e-01, -1.071e-01, 5.300e-02, -2.739e-03, -2.249e-02, 9.331e-02, -1.606e-01, 3.771e-02, -4.263e-02)); + r += mul(s3_6, M4(1.518e-01, 1.917e-02, 1.447e-01, 9.757e-02, 1.160e-02, 4.976e-02, 8.316e-02, -4.158e-02, 1.367e-03, -2.320e-02, -8.661e-02, 2.788e-02, 8.342e-02, -1.139e-02, -2.212e-02, 6.059e-02)); + r += mul(s3_7, M4(1.919e-01, 7.601e-02, 1.733e-01, 1.862e-01, 3.007e-02, -1.910e-02, 2.440e-03, -1.434e-01, 4.645e-02, -9.358e-02, 9.214e-03, 1.602e-01, -1.101e-01, 1.797e-02, -2.656e-01, -1.126e-01)); + r += mul(s3_8, M4(5.247e-02, -4.065e-02, 2.272e-02, -2.877e-02, -9.167e-02, -3.887e-02, 7.758e-03, -8.309e-02, 1.724e-02, 2.718e-02, -2.350e-02, 1.850e-02, -2.346e-02, -8.699e-03, -2.707e-02, -5.296e-04)); + r += mul(s4_0, M4(3.924e-03, 2.069e-02, -5.953e-02, -1.139e-02, -6.720e-02, -4.303e-02, 4.648e-02, 2.124e-02, -4.898e-02, -7.581e-02, -1.027e-01, 7.361e-02, 3.286e-02, -4.268e-02, 4.958e-02, 9.935e-03)); + r += mul(s4_1, M4(-5.555e-02, 2.395e-02, -9.125e-03, -1.445e-01, -9.143e-02, -3.216e-02, 1.013e-01, 5.860e-02, 1.781e-01, -1.297e-01, 1.468e-01, -9.593e-02, 1.366e-02, -2.647e-01, -1.196e-02, -9.786e-02)); + r += mul(s4_2, M4(2.921e-02, -6.674e-02, 7.710e-02, 1.678e-02, -6.434e-02, 7.683e-02, -4.142e-02, -4.854e-02, 4.405e-02, 1.401e-01, -2.295e-01, -2.968e-02, -1.859e-01, -2.424e-02, 1.021e-01, -7.019e-03)); + r += mul(s4_3, M4(1.175e-02, 2.339e-01, 1.141e-01, -6.781e-02, -6.412e-02, -4.740e-02, 8.388e-03, -9.825e-03, -6.027e-02, 1.537e-01, 4.414e-01, -3.699e-02, -3.760e-01, -3.269e-01, 5.088e-03, 3.482e-02)); + r += mul(s4_4, M4(-4.872e-02, -1.092e-01, -1.548e-01, -1.096e-01, -1.383e-01, 6.343e-02, -1.440e-01, 4.788e-02, -3.298e-01, -1.797e-03, 2.764e-01, -1.386e-01, 4.241e-02, 1.295e-01, -8.381e-02, 8.987e-02)); + r += mul(s4_5, M4(-8.056e-02, 4.051e-02, -3.443e-03, 9.557e-03, -6.056e-02, -6.300e-02, 5.039e-02, -3.456e-02, -1.464e-01, -1.846e-01, 4.648e-02, -1.561e-01, -2.740e-02, 2.142e-01, -1.022e-01, -2.809e-02)); + r += mul(s4_6, M4(-5.604e-02, -1.731e-03, -2.083e-02, 9.337e-03, 1.118e-02, 8.124e-03, -1.683e-01, 2.042e-02, 2.922e-03, 6.654e-03, 2.038e-02, 1.058e-01, 4.162e-02, -9.044e-02, 1.658e-01, -1.673e-02)); + r += mul(s4_7, M4(2.176e-01, 6.536e-02, 2.805e-02, 7.785e-02, -2.137e-01, -1.928e-01, -1.061e-01, 5.347e-02, 3.740e-01, -4.384e-02, 1.540e-01, -1.887e-02, 1.949e-01, 3.791e-02, 1.475e-01, 1.193e-01)); + r += mul(s4_8, M4(-7.921e-02, 3.755e-02, 8.097e-03, -3.115e-02, -1.875e-01, -7.991e-02, -5.474e-02, -1.879e-02, 9.336e-02, 1.441e-01, -2.849e-02, 3.442e-02, 8.304e-02, 1.779e-02, -1.017e-01, -8.835e-02)); + r += mul(s5_0, M4(-6.224e-02, -1.286e-01, -1.065e-01, -5.836e-02, -4.088e-02, 9.037e-02, 7.194e-02, 8.010e-03, 3.740e-02, 7.190e-02, 1.243e-01, 9.762e-02, 4.848e-02, 4.667e-03, 1.339e-01, 8.745e-02)); + r += mul(s5_1, M4(1.937e-02, 1.509e-01, -8.489e-02, -7.999e-02, 6.341e-03, -1.031e-01, -1.684e-03, 1.612e-01, -1.448e-02, -2.470e-02, -8.730e-02, -1.667e-02, -6.683e-03, 1.188e-03, 8.151e-03, 3.647e-02)); + r += mul(s5_2, M4(4.567e-02, -5.051e-02, 2.011e-02, 4.950e-02, 6.689e-03, -8.069e-02, 7.060e-02, -1.987e-01, 4.187e-02, -5.661e-02, 5.078e-02, -2.869e-02, 1.583e-02, -6.038e-02, 8.259e-02, 1.968e-02)); + r += mul(s5_3, M4(-5.837e-02, -3.722e-02, 9.666e-03, -4.201e-02, -1.323e-01, 1.443e-01, -5.002e-03, 4.363e-02, -1.992e-02, -1.277e-02, -3.254e-02, -1.540e-02, 3.748e-02, 2.348e-01, -4.119e-02, 7.178e-02)); + r += mul(s5_4, M4(-8.116e-02, 3.259e-04, -2.917e-03, -2.878e-02, 2.343e-01, 5.959e-01, -1.587e-02, 1.861e-01, 3.467e-02, -1.485e-01, 8.425e-02, 1.610e-02, -8.615e-02, -6.870e-02, -1.616e-01, 1.586e-02)); + r += mul(s5_5, M4(-3.545e-02, 7.843e-02, 6.399e-03, -1.176e-02, 2.016e-01, -2.844e-01, 1.951e-01, -1.060e-01, -4.997e-02, -7.652e-02, 1.541e-02, 6.366e-02, -1.896e-01, -2.584e-02, 2.438e-02, 1.348e-01)); + r += mul(s5_6, M4(-1.974e-01, -8.405e-03, -2.123e-01, -8.176e-02, 9.694e-02, 5.998e-02, 1.547e-02, -2.905e-03, 8.708e-02, 8.034e-03, 3.775e-02, 1.818e-01, 5.126e-02, 3.987e-02, 1.284e-01, -1.518e-03)); + r += mul(s5_7, M4(5.220e-02, 1.340e-02, -1.743e-01, 4.127e-02, -1.978e-02, 7.040e-02, -2.645e-02, -1.323e-02, 9.182e-02, -4.373e-02, -1.061e-01, -1.235e-02, -2.161e-02, -2.467e-02, -2.966e-02, 8.084e-02)); + r += mul(s5_8, M4(-6.631e-02, -5.012e-02, -4.244e-02, -3.246e-02, 4.081e-02, -8.937e-02, 2.919e-01, 1.024e-01, -1.422e-02, 1.074e-01, -9.297e-02, 5.605e-02, 6.896e-02, -4.789e-02, -1.934e-02, 1.737e-02)); + r += mul(s6_0, M4(-9.073e-03, -1.754e-02, 4.972e-02, -6.835e-02, -3.904e-02, -1.677e-01, 8.521e-02, 3.834e-02, 2.069e-02, 4.147e-02, 1.049e-01, 3.096e-02, -2.174e-02, -8.744e-02, -1.113e-02, -7.001e-02)); + r += mul(s6_1, M4(3.434e-02, -2.274e-02, 1.253e-01, -1.063e-01, -3.540e-02, -9.340e-02, 6.120e-02, 1.239e-01, 5.530e-02, 7.165e-02, -5.067e-03, 1.067e-01, -9.119e-02, -8.561e-02, -1.101e-01, 1.067e-01)); + r += mul(s6_2, M4(-4.281e-02, -2.995e-02, -1.066e-01, 6.098e-02, 8.980e-03, -7.168e-02, -4.666e-02, 2.937e-02, 3.310e-03, 3.018e-02, 2.480e-02, 7.947e-02, -1.268e-01, 7.243e-02, -1.587e-01, 2.873e-02)); + r += mul(s6_3, M4(2.600e-02, 5.897e-02, -1.263e-03, -6.894e-02, -6.384e-02, -1.612e-01, -5.427e-02, 6.186e-02, 1.760e-01, 2.669e-01, 3.821e-02, 3.010e-01, -1.674e-01, -7.476e-02, 8.858e-02, 1.153e-01)); + r += mul(s6_4, M4(1.203e-01, 1.557e-01, 1.469e-01, -9.516e-02, 1.520e-03, -1.370e-02, -1.449e-01, 2.500e-01, -4.001e-02, 1.999e-01, 2.881e-01, -1.819e-02, -3.389e-02, 6.567e-02, 8.503e-02, 1.733e-02)); + r += mul(s6_5, M4(3.438e-02, -1.250e-01, -6.958e-02, -4.063e-02, -5.483e-02, -6.863e-02, -1.213e-01, -2.927e-02, 1.284e-01, -6.142e-03, 1.957e-02, -1.307e-02, 1.251e-01, 1.507e-01, -2.603e-01, -1.227e-02)); + r += mul(s6_6, M4(2.157e-02, -1.465e-02, 1.523e-01, -6.046e-02, -8.609e-02, -4.715e-02, 1.801e-02, 9.497e-02, -1.116e-01, 2.122e-01, 1.647e-01, -4.762e-02, -1.156e-02, 1.387e-01, 1.673e-02, 4.068e-02)); + r += mul(s6_7, M4(-1.946e-01, 9.404e-02, -2.110e-02, 3.399e-02, 6.190e-02, 1.982e-02, -1.791e-01, -2.514e-02, -1.027e-01, 1.270e-01, 6.977e-02, -1.515e-01, -2.467e-01, 1.036e-02, -1.526e-01, 5.823e-02)); + r += mul(s6_8, M4(-1.138e-02, -1.343e-03, 1.883e-02, 4.750e-03, -1.078e-01, -6.960e-02, 2.031e-02, -9.450e-03, -1.573e-01, 5.856e-02, 8.049e-02, 1.523e-02, -7.131e-03, 2.186e-02, -1.193e-01, -4.367e-03)); + r += mul(s7_0, M4(6.713e-02, -1.520e-01, -6.436e-02, -6.020e-02, 2.039e-02, -1.286e-01, -5.889e-02, 5.892e-03, 5.342e-02, 2.392e-02, 1.846e-02, -4.619e-02, 8.635e-02, -3.852e-02, 1.285e-02, -7.964e-02)); + r += mul(s7_1, M4(6.131e-02, -4.675e-03, -2.128e-03, -2.768e-02, -1.824e-02, 1.480e-02, 7.807e-03, 2.532e-02, 3.629e-02, 3.362e-02, 8.573e-02, 5.274e-02, 6.908e-02, 4.769e-02, 1.208e-01, -1.841e-01)); + r += mul(s7_2, M4(1.717e-03, 5.158e-02, -3.611e-02, 2.600e-02, -1.293e-02, 1.304e-03, -3.547e-02, 2.271e-02, 3.415e-02, 7.292e-02, -1.885e-02, 8.324e-02, 2.077e-02, -6.252e-02, 4.631e-03, -1.112e-03)); + r += mul(s7_3, M4(-5.372e-02, 2.620e-02, 5.227e-02, -3.020e-03, -8.740e-02, -1.964e-02, -2.730e-02, -2.839e-02, 5.457e-02, 3.909e-02, 2.537e-02, 6.894e-02, 5.165e-02, 4.257e-02, -1.101e-01, 7.219e-02)); + r += mul(s7_4, M4(3.319e-02, -2.076e-02, 6.214e-02, -2.477e-02, -1.349e-01, -4.793e-02, 5.303e-02, 2.127e-01, -8.348e-02, 2.164e-01, 1.433e-01, -2.909e-02, 1.139e-01, 1.535e-01, 1.743e-01, 1.616e-01)); + r += mul(s7_5, M4(8.549e-02, 5.226e-02, -1.042e-01, 1.041e-02, 4.782e-03, 7.801e-02, -6.359e-02, -3.618e-02, -1.527e-02, 8.395e-02, -4.284e-02, -7.498e-03, 1.422e-01, 1.298e-01, -1.283e-01, 6.424e-02)); + r += mul(s7_6, M4(3.569e-02, -6.977e-02, -8.615e-02, 2.842e-02, 7.590e-03, -1.724e-02, -7.115e-04, -7.015e-03, -1.663e-01, -8.630e-03, -2.303e-03, -4.852e-02, -4.141e-02, 1.975e-02, 5.583e-03, -1.889e-03)); + r += mul(s7_7, M4(-8.110e-02, 2.969e-02, 7.912e-02, 1.544e-01, -3.533e-02, 8.551e-02, -4.045e-02, -3.620e-02, -2.196e-01, -4.689e-03, -7.544e-02, 9.318e-02, 1.025e-02, 5.217e-02, 1.583e-01, -1.998e-02)); + r += mul(s7_8, M4(5.941e-02, 5.430e-02, -4.319e-02, 1.788e-01, -1.098e-01, -3.786e-02, 4.683e-02, 1.391e-02, 7.806e-02, 6.420e-02, 6.208e-03, 1.265e-01, 3.927e-02, 6.978e-03, 7.370e-02, 3.876e-02)); + r += V4(1.144e-02, 3.230e-03, 4.037e-03, -8.587e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(4.333e-02, 7.243e-02, -1.420e-02, -1.528e-02, 9.266e-04, -8.016e-02, 3.355e-03, 5.928e-02, 7.778e-03, 1.089e-01, -3.087e-01, -1.395e-02, -4.188e-02, 6.859e-02, -4.320e-03, 5.852e-02)); + r += mul(s0_1, M4(1.893e-03, -4.743e-02, -3.286e-03, 4.100e-02, 1.026e-01, -2.680e-02, 1.772e-02, 3.075e-02, 1.845e-01, 8.216e-02, -1.240e-01, -3.225e-02, -5.808e-02, 5.763e-02, 1.253e-02, 6.966e-02)); + r += mul(s0_2, M4(3.112e-02, 6.029e-03, 4.024e-02, 9.813e-02, -1.934e-02, -1.030e-01, -5.682e-03, -1.142e-01, 1.591e-01, -4.722e-02, -4.683e-02, -4.834e-02, 4.591e-02, 1.959e-01, 2.813e-02, 1.684e-02)); + r += mul(s0_3, M4(3.717e-03, 4.428e-02, -2.979e-02, -1.319e-01, -7.907e-02, -2.063e-01, 1.318e-01, 3.302e-01, 3.874e-01, -1.794e-01, -3.385e-01, 8.205e-02, -9.249e-02, 8.499e-02, 5.865e-02, 7.559e-02)); + r += mul(s0_4, M4(7.178e-02, -1.156e-01, 1.608e-01, -9.580e-03, 1.050e-01, -2.917e-01, -2.364e-02, -2.877e-01, -2.610e-02, 1.579e-01, -2.220e-01, 4.093e-01, -1.190e-03, -1.960e-02, 1.909e-01, 4.487e-02)); + r += mul(s0_5, M4(-3.653e-03, 1.197e-04, -5.824e-02, 4.286e-02, -4.729e-02, -5.418e-02, 3.518e-03, -2.898e-02, 1.607e-01, -2.224e-01, -5.801e-02, -1.593e-01, 4.829e-02, 4.995e-02, -3.266e-02, 2.814e-02)); + r += mul(s0_6, M4(6.767e-02, -2.756e-03, 6.101e-02, -5.580e-02, 3.613e-02, 1.924e-02, 3.047e-03, -1.283e-01, 3.231e-01, 1.304e-01, -9.342e-02, -4.034e-02, 4.553e-02, -7.184e-03, 1.890e-02, -1.038e-02)); + r += mul(s0_7, M4(1.376e-01, -8.569e-02, -3.427e-02, 5.919e-02, 1.711e-01, -1.167e-01, 4.846e-02, 8.636e-03, -5.423e-03, -2.490e-02, -2.068e-01, 2.383e-01, -2.875e-02, -7.850e-02, -9.597e-02, 7.667e-02)); + r += mul(s0_8, M4(1.119e-02, -3.089e-02, -1.391e-01, -5.112e-02, 7.785e-02, -1.214e-01, 6.563e-02, -2.149e-02, 2.425e-01, -3.175e-02, 8.154e-02, 5.956e-02, -1.279e-03, 4.092e-02, -4.260e-03, -1.078e-02)); + r += mul(s1_0, M4(-7.396e-02, 5.582e-02, -1.548e-02, 2.137e-02, 4.709e-02, -5.059e-02, -2.153e-02, 2.702e-03, 1.671e-02, -4.083e-03, 2.723e-02, -1.240e-02, -1.083e-01, -5.522e-02, -3.805e-02, -5.451e-03)); + r += mul(s1_1, M4(-5.498e-02, -8.589e-02, -3.719e-02, 3.869e-02, -6.313e-02, -1.802e-01, -3.138e-02, -1.590e-02, 8.387e-02, -3.262e-02, -2.200e-02, -1.219e-01, 7.055e-02, -3.643e-02, -3.876e-02, 2.919e-02)); + r += mul(s1_2, M4(-5.971e-02, -9.605e-02, 1.531e-02, 2.150e-02, -1.796e-02, 1.420e-01, -2.047e-02, -7.588e-02, -1.726e-02, -5.413e-02, 2.955e-02, -6.840e-03, 1.229e-01, -5.442e-02, 1.122e-02, 9.035e-02)); + r += mul(s1_3, M4(-2.981e-02, 4.970e-02, 1.666e-02, 3.094e-02, -7.380e-02, -6.886e-03, 9.964e-02, -1.509e-01, 4.556e-03, 3.679e-02, -1.705e-02, 2.600e-02, 2.041e-02, -1.858e-02, -9.259e-02, -6.156e-02)); + r += mul(s1_4, M4(-1.437e-01, -1.724e-01, 2.822e-01, 2.717e-01, 4.589e-02, -6.308e-01, 5.793e-02, -2.265e-02, 5.042e-02, 1.364e-01, -3.804e-02, 4.237e-03, 2.043e-01, 1.153e-02, 1.238e-01, 1.210e-02)); + r += mul(s1_5, M4(1.812e-02, -5.285e-02, 2.342e-02, 7.906e-02, -7.495e-02, 2.884e-01, 8.660e-02, 2.412e-02, 4.516e-02, -1.052e-01, -1.014e-03, -2.533e-02, 4.660e-02, 9.580e-02, -7.580e-02, -5.971e-02)); + r += mul(s1_6, M4(-9.260e-02, 6.785e-02, 1.371e-01, -1.164e-01, 1.498e-01, 1.070e-01, -6.899e-02, -1.077e-01, -5.263e-02, 1.199e-02, -1.819e-03, -3.347e-02, -1.207e-02, -5.150e-02, -7.479e-02, -8.583e-02)); + r += mul(s1_7, M4(-4.297e-02, -3.878e-02, 1.497e-01, 7.435e-02, 9.017e-02, -1.620e-02, 1.384e-01, 3.013e-02, 6.530e-02, 7.376e-02, -1.695e-02, 4.129e-02, -1.378e-01, -1.839e-01, 5.109e-02, -2.315e-02)); + r += mul(s1_8, M4(-9.330e-02, -6.025e-02, 1.083e-02, 6.513e-02, -6.527e-02, -8.349e-02, 3.801e-03, -5.190e-02, 4.935e-02, -1.226e-02, -1.807e-05, 2.047e-02, -1.693e-01, 4.893e-03, 4.495e-02, -8.611e-02)); + r += mul(s2_0, M4(-4.397e-03, -2.235e-03, 1.233e-02, 1.180e-02, -4.661e-02, 6.362e-02, 1.035e-01, 7.738e-02, -2.816e-02, 1.176e-01, -2.514e-02, 5.308e-02, 1.755e-01, 1.390e-01, -4.273e-03, 4.984e-02)); + r += mul(s2_1, M4(5.881e-02, -7.890e-02, -4.904e-02, -1.664e-01, -1.577e-01, -1.212e-01, 1.829e-01, 1.517e-01, 2.948e-02, -3.110e-02, -6.089e-02, 8.470e-02, 2.931e-01, 1.623e-02, -1.035e-02, 1.867e-02)); + r += mul(s2_2, M4(8.029e-03, -5.064e-02, 3.720e-02, 4.260e-02, -9.060e-03, 2.332e-01, 5.485e-02, -1.948e-01, -4.837e-02, -2.903e-02, -3.019e-02, -9.514e-04, 2.468e-01, -1.600e-02, 5.436e-02, 5.084e-02)); + r += mul(s2_3, M4(-1.575e-01, 2.353e-02, 3.893e-02, 5.288e-02, -1.999e-02, 1.489e-01, -6.922e-02, -9.150e-02, 1.382e-01, 6.813e-02, -3.534e-02, -6.858e-02, 1.107e-01, 9.316e-02, 1.947e-03, 1.004e-01)); + r += mul(s2_4, M4(-3.720e-01, -5.447e-02, 2.549e-02, 2.286e-01, 1.167e-01, -2.583e-02, -6.140e-02, 1.555e-01, -6.848e-03, 4.610e-01, -2.326e-02, -9.978e-02, -9.189e-02, 1.148e-02, 5.049e-02, 4.296e-02)); + r += mul(s2_5, M4(-9.210e-02, 1.150e-01, -1.262e-02, -3.182e-02, -6.583e-02, 3.653e-02, -1.123e-01, -3.868e-02, 4.312e-02, -2.195e-02, -2.840e-02, 9.926e-02, 1.779e-01, 4.119e-02, 7.840e-02, 1.152e-02)); + r += mul(s2_6, M4(-5.914e-02, -2.219e-02, -9.741e-02, -9.852e-03, -2.060e-01, -2.879e-02, -3.191e-02, -6.268e-03, 1.707e-01, -4.309e-02, -9.542e-02, 1.030e-02, 1.644e-01, 6.315e-02, 2.777e-02, -4.545e-02)); + r += mul(s2_7, M4(6.399e-02, 2.502e-03, -8.012e-04, 2.422e-02, 1.429e-02, -2.728e-02, -8.389e-02, 3.381e-02, 5.823e-02, 4.210e-03, -2.876e-01, 1.036e-01, 7.736e-02, 5.577e-02, 6.369e-02, -1.008e-01)); + r += mul(s2_8, M4(-5.091e-02, -5.603e-03, 2.450e-02, 3.630e-02, -2.032e-03, -2.386e-02, -7.338e-02, -9.917e-02, 8.712e-02, -2.359e-02, -1.168e-01, -4.326e-02, 1.907e-01, 9.281e-02, 8.805e-02, -1.709e-02)); + r += mul(s3_0, M4(-8.492e-03, -1.684e-01, -7.766e-02, -4.028e-02, -4.158e-02, -2.127e-02, -7.257e-03, 5.917e-03, 1.217e-02, 7.317e-02, -2.137e-02, 5.041e-02, -4.243e-02, -2.898e-02, 1.979e-02, 4.331e-02)); + r += mul(s3_1, M4(-6.358e-02, -1.387e-01, -6.814e-02, -1.817e-01, 4.827e-02, 5.557e-02, -5.562e-02, -2.517e-02, 7.520e-02, -5.369e-02, -2.957e-02, 6.842e-02, -4.920e-02, 9.637e-03, -9.558e-03, -5.943e-02)); + r += mul(s3_2, M4(-1.849e-01, -1.107e-01, 1.328e-02, 6.648e-02, -6.986e-03, -3.831e-02, 2.777e-02, -9.664e-02, -1.188e-02, -8.414e-02, -5.005e-03, -6.809e-02, -3.758e-03, 3.419e-02, 1.044e-02, 2.588e-02)); + r += mul(s3_3, M4(-3.455e-02, -6.447e-02, 4.304e-02, -3.681e-02, 1.114e-02, -2.089e-02, -1.545e-01, -1.155e-01, 7.848e-02, 3.047e-02, 7.575e-03, -1.164e-01, 4.672e-02, 7.194e-03, -8.710e-03, 1.492e-02)); + r += mul(s3_4, M4(-4.902e-01, -1.553e-01, 1.198e-02, 1.993e-01, 2.566e-01, -1.017e-02, -3.468e-01, -9.216e-02, 1.138e-01, 9.617e-02, 2.042e-01, 3.819e-02, 1.073e-02, -1.215e-01, -1.253e-01, 7.351e-02)); + r += mul(s3_5, M4(1.501e-01, 9.681e-03, -1.894e-01, 5.893e-02, 8.715e-02, 4.639e-02, -7.896e-02, -2.378e-02, -4.966e-03, -2.195e-02, 6.299e-02, 1.118e-01, -4.372e-02, 4.882e-02, 4.516e-02, -5.960e-02)); + r += mul(s3_6, M4(-2.788e-02, -5.976e-02, -1.486e-02, -6.390e-02, 1.490e-02, -5.229e-02, 9.739e-03, 1.628e-02, 1.290e-01, 1.911e-02, -9.108e-02, -1.047e-01, 5.630e-02, 5.946e-02, 2.895e-02, -7.990e-03)); + r += mul(s3_7, M4(1.312e-01, -7.650e-05, -8.691e-03, 2.166e-01, 6.408e-02, 2.675e-02, -5.012e-02, -1.038e-01, 2.693e-02, -5.429e-02, -5.977e-02, 2.153e-02, -2.628e-02, 7.319e-02, 1.860e-01, 1.051e-04)); + r += mul(s3_8, M4(-2.411e-02, 7.000e-03, 3.083e-02, 1.198e-01, 2.981e-02, 1.489e-03, -7.782e-03, 1.403e-02, 9.663e-02, -7.251e-02, -8.213e-02, -3.074e-02, 9.806e-04, -4.858e-03, 5.047e-02, -1.717e-02)); + r += mul(s4_0, M4(7.733e-02, -3.449e-02, 5.216e-04, 3.603e-02, 6.168e-02, -5.736e-02, 1.789e-02, 4.947e-02, 5.769e-02, -1.078e-01, 3.067e-02, 7.693e-02, -1.915e-02, -1.371e-01, -6.832e-03, -7.441e-02)); + r += mul(s4_1, M4(-2.435e-02, 6.718e-02, -2.504e-02, -1.398e-02, -4.522e-03, -1.198e-01, 8.722e-04, -6.143e-02, -1.031e-01, 1.309e-01, 8.645e-02, -1.824e-01, 9.033e-02, -6.244e-02, 6.688e-02, 8.934e-02)); + r += mul(s4_2, M4(9.768e-03, -6.388e-03, 2.846e-04, 2.673e-02, -7.163e-02, -1.729e-01, 4.732e-02, -6.461e-02, 5.416e-02, 7.461e-02, 6.755e-03, 7.088e-02, -1.494e-01, -1.939e-01, -2.334e-02, -6.639e-02)); + r += mul(s4_3, M4(4.916e-02, 7.842e-02, 8.636e-02, 4.896e-03, 2.077e-02, -7.260e-03, 6.933e-02, -1.500e-01, 3.326e-02, -6.514e-02, 4.309e-02, 5.474e-02, 2.360e-03, 3.087e-02, -1.838e-02, 3.402e-02)); + r += mul(s4_4, M4(8.343e-02, -7.392e-02, 2.044e-01, -2.791e-02, -3.864e-01, -3.340e-02, 7.701e-02, 2.842e-01, 1.702e-01, 4.962e-02, -1.480e-02, 1.600e-02, -3.186e-01, -2.030e-02, -4.357e-02, 3.020e-01)); + r += mul(s4_5, M4(-2.993e-02, -8.284e-02, -1.575e-02, 5.972e-02, 3.788e-02, -1.095e-01, 4.894e-03, -3.231e-02, 1.271e-01, 1.447e-01, -3.134e-02, 1.608e-01, 8.349e-02, -1.777e-01, -4.498e-02, 1.421e-02)); + r += mul(s4_6, M4(7.155e-02, 5.883e-04, 1.742e-02, -6.199e-02, 5.508e-03, -1.616e-02, -8.643e-02, -5.801e-02, 3.736e-02, 3.901e-03, -3.167e-02, -6.554e-03, 8.429e-02, 4.994e-03, 2.081e-02, 1.212e-01)); + r += mul(s4_7, M4(9.735e-02, -1.938e-03, -1.700e-02, -1.096e-03, 5.024e-02, 6.836e-02, 2.904e-02, -1.549e-01, -4.559e-02, 5.448e-02, -1.101e-01, -5.234e-02, -1.289e-01, -1.165e-03, -3.390e-01, 1.432e-01)); + r += mul(s4_8, M4(2.385e-02, -9.496e-02, 4.213e-02, 1.313e-01, 1.014e-01, -3.205e-02, -2.377e-03, 9.691e-02, 3.838e-02, 5.198e-02, -4.253e-02, -2.289e-02, 2.043e-01, 1.153e-01, 3.498e-02, 4.674e-02)); + r += mul(s5_0, M4(7.349e-02, -2.872e-02, 9.839e-03, 2.575e-02, 5.040e-02, -1.200e-02, 8.394e-03, 2.789e-02, 2.753e-02, -1.327e-02, 1.611e-02, -4.654e-02, 9.155e-02, 5.441e-03, 7.285e-03, -2.150e-02)); + r += mul(s5_1, M4(-3.458e-02, 6.279e-02, -9.407e-02, -4.962e-02, -2.365e-02, -3.668e-03, -5.242e-02, 4.234e-02, 1.004e-01, 8.471e-02, -7.121e-03, -3.339e-02, 1.435e-01, 1.598e-02, -9.651e-03, -9.784e-03)); + r += mul(s5_2, M4(5.242e-02, 4.609e-02, -5.016e-02, -8.921e-03, -3.206e-02, 9.631e-02, 2.813e-02, 1.163e-01, -7.121e-02, -2.991e-02, 4.636e-02, 1.120e-02, -9.152e-03, 1.125e-01, -2.285e-02, -2.552e-02)); + r += mul(s5_3, M4(5.592e-02, 1.196e-01, 1.682e-01, -2.464e-02, 9.764e-02, 1.534e-02, 2.007e-02, 1.612e-03, 1.054e-02, 3.811e-03, 3.054e-02, 7.187e-02, 3.507e-03, 5.375e-04, -3.795e-02, 9.252e-02)); + r += mul(s5_4, M4(3.448e-02, -1.545e-01, 2.232e-01, -1.527e-01, -2.125e-02, 2.051e-02, -1.904e-01, 2.173e-01, -9.658e-02, 1.783e-02, -1.125e-01, 5.383e-02, -2.253e-02, -2.935e-02, 7.137e-02, 2.442e-02)); + r += mul(s5_5, M4(-5.549e-02, 8.547e-04, 2.289e-02, -1.332e-02, 2.281e-02, 5.366e-01, -5.318e-02, 1.265e-01, 1.038e-01, 4.680e-02, -8.508e-03, -1.544e-03, -1.590e-01, -1.295e-01, -1.334e-02, -5.572e-02)); + r += mul(s5_6, M4(-6.454e-02, -7.916e-03, 2.528e-01, 8.998e-02, 4.818e-02, -5.405e-02, 2.007e-02, 5.993e-02, -4.428e-02, 5.162e-02, 3.843e-03, -2.584e-02, -5.946e-02, -1.401e-02, 6.041e-03, 3.848e-02)); + r += mul(s5_7, M4(1.182e-03, -9.651e-02, 9.935e-02, 4.947e-02, -1.461e-01, -5.850e-02, -9.432e-03, 1.211e-01, -9.445e-02, 1.133e-01, 1.787e-01, 4.793e-03, -8.293e-03, 5.160e-02, -2.377e-02, -5.676e-02)); + r += mul(s5_8, M4(-1.146e-02, -4.489e-02, 7.513e-02, 5.885e-04, -1.190e-01, -8.698e-03, -9.437e-02, -1.321e-01, 2.545e-02, -2.781e-02, 1.584e-02, -2.690e-02, 1.731e-01, 2.264e-02, 2.818e-02, 4.671e-02)); + r += mul(s6_0, M4(-5.553e-02, -3.049e-02, -2.810e-02, -1.063e-01, 1.536e-02, -2.471e-02, 1.935e-02, 4.999e-02, 6.846e-02, 1.009e-01, -2.470e-02, 1.182e-02, -1.138e-01, 9.476e-02, 5.192e-02, -6.661e-02)); + r += mul(s6_1, M4(-3.158e-02, 5.134e-02, -6.506e-03, 7.267e-02, -1.884e-02, -9.547e-02, 3.190e-02, -6.813e-02, -8.190e-03, -8.898e-02, -5.854e-03, 1.788e-02, -1.405e-01, -1.055e-01, 7.843e-02, 4.259e-02)); + r += mul(s6_2, M4(-4.024e-02, 1.837e-01, -9.659e-03, -9.674e-03, 8.621e-03, 2.509e-02, 5.523e-02, -4.426e-02, 4.017e-02, 5.583e-02, -2.628e-02, -3.894e-02, 1.962e-02, -1.237e-01, -2.311e-02, -9.066e-02)); + r += mul(s6_3, M4(-3.900e-02, -1.781e-02, -7.406e-02, 1.704e-02, -9.738e-02, -2.576e-02, 7.946e-02, 5.064e-02, 1.845e-01, 2.445e-02, -2.101e-03, -6.988e-02, -2.396e-01, -5.705e-02, 9.223e-02, 9.614e-03)); + r += mul(s6_4, M4(1.962e-01, 2.403e-01, -5.725e-02, -8.831e-02, -2.979e-01, -2.226e-01, 1.183e-02, 1.560e-02, -1.624e-01, 2.325e-01, 3.164e-02, 1.086e-01, -2.150e-01, -1.872e-02, -1.079e-03, 2.493e-01)); + r += mul(s6_5, M4(1.691e-04, -9.056e-02, 1.531e-01, -6.422e-02, -1.466e-01, 7.739e-02, 6.450e-02, -1.165e-01, 1.404e-04, 1.104e-01, 1.496e-02, -3.789e-02, -5.526e-02, 3.597e-02, 1.578e-01, -4.722e-02)); + r += mul(s6_6, M4(-9.485e-02, -2.619e-03, -2.968e-02, 2.768e-03, 9.337e-02, 1.958e-02, 2.719e-02, -8.179e-03, 7.328e-02, -2.161e-02, -4.146e-02, 5.023e-02, 1.997e-01, -3.878e-02, 4.260e-02, 1.436e-02)); + r += mul(s6_7, M4(8.732e-02, 9.822e-03, -1.201e-02, 1.027e-01, 8.020e-02, 1.386e-02, 9.703e-02, 9.239e-02, 1.340e-01, 5.484e-02, 9.953e-02, -7.304e-02, 3.625e-02, -4.690e-02, 9.176e-02, 6.110e-02)); + r += mul(s6_8, M4(-9.272e-02, 3.165e-02, -5.926e-02, -5.496e-02, -3.022e-02, 2.849e-02, 6.468e-02, 1.194e-02, -4.001e-02, -3.142e-02, -3.773e-02, -7.805e-02, -3.019e-02, -6.167e-02, 5.378e-02, 6.293e-02)); + r += mul(s7_0, M4(-6.682e-02, -3.100e-03, 4.288e-02, -4.757e-02, 1.168e-02, 4.485e-03, 1.499e-02, -4.155e-02, -5.604e-02, -3.052e-03, 4.053e-03, -9.959e-03, 2.347e-02, 1.329e-01, -7.440e-02, 1.943e-02)); + r += mul(s7_1, M4(-1.225e-01, 1.294e-01, -1.058e-02, -1.070e-02, -1.750e-02, 4.497e-03, 3.608e-02, 1.315e-02, -1.025e-01, -6.416e-02, 6.496e-02, 3.045e-03, 6.792e-02, 4.245e-02, -6.879e-02, 2.115e-01)); + r += mul(s7_2, M4(-4.218e-02, 5.811e-02, -4.552e-02, -4.372e-02, 1.682e-02, 3.276e-02, 1.525e-02, -1.266e-02, 6.467e-02, -6.312e-02, -2.242e-02, -2.783e-02, 2.884e-02, 6.089e-02, -2.662e-02, 2.866e-02)); + r += mul(s7_3, M4(-2.057e-02, -4.344e-02, 2.442e-02, 1.680e-01, -8.247e-02, 1.266e-02, -3.677e-02, 2.206e-02, 3.236e-02, -2.898e-02, -7.851e-02, 7.222e-02, -1.709e-02, 6.476e-02, -1.110e-01, -1.086e-01)); + r += mul(s7_4, M4(-7.845e-02, 1.909e-01, 3.045e-02, 8.544e-02, -7.163e-02, -1.065e-01, -1.011e-01, -1.703e-01, -1.102e-01, 1.369e-01, 2.257e-01, 3.306e-01, 1.614e-01, -4.870e-02, -1.226e-01, -2.679e-01)); + r += mul(s7_5, M4(9.959e-03, -9.835e-02, 2.715e-02, -6.583e-03, 4.776e-02, 2.997e-02, -1.947e-02, -4.679e-02, 2.217e-02, -4.622e-02, -7.050e-02, 1.626e-02, -2.419e-02, -7.395e-02, 1.233e-01, 6.038e-02)); + r += mul(s7_6, M4(-4.357e-02, -2.921e-03, 1.762e-02, 4.688e-03, -3.021e-02, 4.270e-03, 9.634e-02, -1.632e-02, 1.907e-02, 5.368e-02, 7.232e-02, -2.958e-02, 6.972e-02, 4.948e-02, -8.427e-02, -7.472e-03)); + r += mul(s7_7, M4(-4.863e-02, 2.256e-03, 1.555e-01, 9.200e-02, 5.525e-02, -7.310e-02, 7.447e-02, -4.961e-02, -9.250e-02, -2.349e-02, 1.928e-01, 2.023e-02, 4.853e-02, -3.875e-02, -2.442e-01, -7.941e-02)); + r += mul(s7_8, M4(-1.036e-02, -4.162e-03, -3.954e-02, 3.330e-02, -5.723e-02, -3.625e-02, 2.070e-02, 1.482e-03, 5.367e-03, -3.066e-02, -5.701e-02, -3.775e-02, 3.520e-02, 7.684e-02, -4.128e-02, -2.718e-02)); + r += V4(4.936e-03, 2.207e-02, -4.522e-03, -1.855e-03); + return r; +} + +void Pass6(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 7 +//!DESC conv6 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.892e-02, 9.057e-04, -2.227e-02, 1.170e-01, 5.052e-02, -4.755e-02, -2.612e-02, 6.990e-02, -6.464e-02, -4.279e-02, 1.755e-02, -4.057e-02, -3.950e-02, 6.520e-03, -3.059e-02, -7.818e-03)); + r += mul(s0_1, M4(9.898e-03, 1.838e-02, -3.997e-02, 3.761e-02, -2.064e-02, -6.330e-02, 2.579e-02, -3.046e-02, -1.524e-02, 4.854e-02, -7.193e-02, -3.546e-03, 1.800e-02, 7.523e-04, -1.513e-02, 5.210e-03)); + r += mul(s0_2, M4(-5.011e-02, -7.229e-02, -1.374e-02, -5.250e-02, 5.590e-04, -1.762e-02, -3.047e-02, -5.688e-02, -5.930e-02, 2.584e-02, -1.109e-03, 8.703e-02, 1.787e-02, -1.783e-02, -4.701e-02, -2.684e-02)); + r += mul(s0_3, M4(-9.078e-02, -3.604e-03, 8.995e-02, 3.718e-02, 1.311e-01, -3.371e-02, -3.473e-02, -2.238e-01, 8.436e-02, 2.053e-02, 4.818e-02, -3.848e-02, 1.036e-01, 1.370e-01, -6.924e-02, -1.183e-01)); + r += mul(s0_4, M4(-1.547e-01, 9.445e-02, -6.589e-02, 1.662e-01, 1.747e-01, -1.342e-01, 5.341e-02, -3.193e-01, 2.132e-01, 2.009e-01, 5.479e-02, -1.624e-01, -9.093e-02, 1.332e-01, 2.477e-01, -3.623e-01)); + r += mul(s0_5, M4(2.956e-02, 5.554e-02, 2.417e-02, -5.816e-02, 1.005e-01, 8.073e-02, 2.686e-02, 3.986e-03, -1.567e-02, -4.538e-02, -7.194e-02, 8.519e-03, 5.708e-02, 7.690e-02, -5.197e-03, -7.138e-02)); + r += mul(s0_6, M4(-5.022e-02, 1.932e-02, 1.924e-03, -2.213e-02, 1.373e-02, -8.656e-02, -3.289e-03, -5.205e-02, -3.293e-02, -5.007e-02, 6.656e-02, 6.720e-03, 1.852e-01, 1.395e-01, -1.305e-01, 7.133e-02)); + r += mul(s0_7, M4(3.341e-02, 2.647e-02, -1.255e-01, 9.840e-02, 1.228e-01, 3.911e-02, -1.104e-01, 4.893e-02, 1.619e-01, -1.152e-02, 1.642e-01, -2.472e-01, 7.064e-02, 7.472e-02, -2.321e-02, 1.860e-01)); + r += mul(s0_8, M4(3.182e-03, -1.035e-02, -4.764e-03, -2.834e-02, -4.592e-02, -8.360e-02, -1.049e-01, 9.181e-02, -5.580e-03, -2.966e-02, 4.314e-02, -5.040e-02, 7.959e-03, -4.075e-02, -7.035e-02, 7.185e-03)); + r += mul(s1_0, M4(1.005e-02, 3.869e-02, -9.593e-02, -6.703e-02, -5.489e-03, -6.815e-02, -2.919e-02, 7.197e-02, -3.231e-02, -8.941e-02, -2.824e-03, -1.433e-01, -4.432e-02, 2.306e-02, 2.600e-02, 2.355e-02)); + r += mul(s1_1, M4(-1.337e-02, 6.519e-02, 6.904e-02, 2.077e-01, -7.160e-02, -6.044e-03, -9.011e-02, -4.576e-02, -6.759e-02, -3.018e-02, 2.219e-02, -2.280e-02, 3.455e-03, -8.716e-02, 2.434e-02, 2.113e-02)); + r += mul(s1_2, M4(-4.231e-03, 8.658e-02, -4.622e-04, -1.155e-01, -6.464e-02, -7.393e-03, 2.837e-02, -3.901e-03, -3.928e-02, -1.095e-01, -2.863e-02, -9.264e-02, -5.068e-02, -1.030e-01, -1.353e-02, -3.888e-02)); + r += mul(s1_3, M4(6.558e-02, -2.655e-02, 2.628e-02, 1.995e-02, 7.401e-02, -5.787e-02, 5.426e-02, -1.168e-01, -4.519e-02, -1.051e-01, 4.971e-02, -2.537e-01, -9.540e-02, 1.039e-01, -1.761e-02, 1.026e-02)); + r += mul(s1_4, M4(1.446e-02, 1.660e-01, -1.851e-01, 4.339e-01, 9.934e-02, -1.544e-01, -2.348e-01, 8.207e-02, -2.856e-02, 7.433e-02, 7.259e-02, -7.759e-02, -1.389e-01, -3.222e-02, 2.700e-01, -1.827e-01)); + r += mul(s1_5, M4(7.654e-02, 3.500e-02, -6.733e-02, 1.029e-01, -8.113e-03, -7.692e-02, -8.108e-02, 4.407e-02, -8.400e-02, -2.423e-01, -5.516e-02, -2.318e-01, 2.917e-02, 8.021e-02, 1.554e-01, -1.132e-01)); + r += mul(s1_6, M4(-5.160e-03, 1.790e-04, -3.341e-02, 1.601e-02, 7.012e-02, -6.386e-02, -9.156e-03, 5.564e-03, 6.886e-02, 2.401e-02, -2.487e-02, -9.339e-02, 6.921e-02, 1.156e-01, -1.217e-01, 8.888e-02)); + r += mul(s1_7, M4(-4.282e-02, 7.150e-02, 3.611e-02, 2.136e-02, -6.391e-03, 1.245e-01, 2.154e-03, 6.634e-02, 6.563e-02, 1.349e-01, 5.738e-02, -1.428e-02, 1.269e-01, 2.861e-02, -1.129e-01, -3.461e-02)); + r += mul(s1_8, M4(8.093e-02, 6.132e-02, -3.980e-02, 4.061e-03, -6.737e-03, -1.218e-01, -7.884e-02, 2.701e-02, -5.346e-02, -3.825e-02, -3.358e-03, -3.880e-02, 4.392e-02, 1.837e-02, -3.878e-02, 2.689e-02)); + r += mul(s2_0, M4(-1.140e-01, -1.977e-01, 8.833e-02, 3.602e-02, -8.492e-02, -4.644e-02, 7.422e-02, 4.735e-02, -6.120e-02, -2.640e-02, -9.553e-03, -4.967e-02, 2.549e-01, 6.415e-02, -2.526e-02, 5.049e-02)); + r += mul(s2_1, M4(1.352e-01, 4.401e-02, -6.839e-02, 5.522e-04, -5.163e-03, -1.764e-01, 1.450e-01, 2.832e-02, -1.245e-02, -1.053e-02, -7.027e-03, 2.384e-02, 1.973e-01, 3.105e-01, -1.400e-01, -3.123e-02)); + r += mul(s2_2, M4(-5.263e-02, -3.082e-02, 2.771e-02, 7.748e-02, 7.367e-02, 5.136e-02, 9.221e-03, -9.403e-02, -3.892e-02, 2.250e-02, 3.406e-02, -3.803e-03, 1.243e-01, 1.343e-01, 4.603e-02, 9.632e-02)); + r += mul(s2_3, M4(-2.570e-02, 9.713e-02, -3.603e-02, -1.741e-01, -3.212e-01, -1.177e-01, 5.072e-02, -6.159e-02, 1.046e-02, -8.851e-03, 1.964e-03, -2.628e-02, 5.741e-02, 1.188e-01, -5.907e-02, -9.158e-02)); + r += mul(s2_4, M4(4.535e-02, 1.107e-01, -8.172e-02, -9.792e-02, -6.629e-03, 1.336e-01, 3.079e-01, 1.386e-01, 3.253e-02, -6.700e-02, 4.914e-02, -2.250e-01, 2.649e-01, 9.882e-02, 5.684e-02, -1.145e-01)); + r += mul(s2_5, M4(8.656e-04, -1.921e-02, 4.709e-02, -2.374e-02, 1.168e-02, -1.003e-01, -7.816e-03, 2.747e-02, -1.393e-02, 4.043e-02, -1.118e-02, 4.130e-02, 5.784e-02, -2.393e-02, -1.445e-01, -2.710e-02)); + r += mul(s2_6, M4(-3.388e-03, -3.829e-02, 5.542e-02, -7.072e-02, 7.206e-02, -1.228e-01, 5.047e-02, 5.735e-02, -1.856e-02, -2.545e-02, -2.100e-02, 6.857e-03, -1.250e-02, -3.396e-02, 7.393e-02, -6.130e-02)); + r += mul(s2_7, M4(-8.575e-03, -4.300e-02, -1.490e-01, 2.881e-02, -2.819e-02, -9.092e-02, 2.132e-01, -1.420e-01, 2.622e-02, -5.709e-02, 5.645e-03, 2.712e-02, 8.451e-02, 4.578e-02, 8.928e-03, 9.006e-02)); + r += mul(s2_8, M4(4.345e-02, 2.069e-02, 1.854e-02, -1.189e-02, 4.093e-02, -2.959e-02, -1.770e-02, 7.986e-02, -1.470e-02, -2.281e-02, 6.168e-03, -5.472e-03, 1.018e-01, 5.524e-02, -1.023e-01, -3.456e-02)); + r += mul(s3_0, M4(-7.354e-02, 2.029e-02, 4.034e-02, 4.410e-02, 5.180e-02, 4.118e-02, 4.462e-02, 9.580e-02, -2.305e-01, -1.860e-01, -1.725e-01, -2.209e-01, -1.102e-01, -3.227e-02, 3.321e-02, -5.393e-02)); + r += mul(s3_1, M4(3.237e-02, -1.845e-02, 7.690e-03, 2.054e-01, 2.388e-03, 2.858e-02, 2.746e-03, 6.395e-02, -2.665e-01, -2.872e-01, 1.131e-01, -9.316e-02, 2.001e-02, 1.909e-02, 3.956e-02, 7.376e-02)); + r += mul(s3_2, M4(-3.274e-02, 4.116e-02, 9.165e-03, -6.812e-02, 1.102e-02, 5.621e-02, -3.189e-02, 6.348e-02, -2.511e-02, -6.700e-02, -3.697e-02, -1.694e-01, -5.214e-03, -2.888e-02, -4.806e-02, 6.745e-03)); + r += mul(s3_3, M4(-1.959e-01, -1.499e-01, -1.345e-01, 4.864e-02, -7.274e-02, -8.329e-02, -2.831e-03, 9.697e-02, 4.047e-01, 6.853e-02, -1.007e-01, -4.966e-02, -3.069e-02, -5.057e-02, -3.145e-02, 1.141e-01)); + r += mul(s3_4, M4(1.736e-02, 3.371e-02, -1.292e-02, 9.849e-03, -2.007e-02, 8.965e-03, 1.146e-01, 6.432e-02, -9.202e-02, 6.812e-02, 1.029e-01, -1.521e-01, -3.578e-02, 1.258e-01, 6.123e-02, -3.616e-02)); + r += mul(s3_5, M4(-1.039e-01, 2.152e-02, 5.538e-04, -1.052e-01, 2.065e-02, -6.045e-02, -4.725e-02, -7.233e-02, -2.188e-01, 1.499e-02, -1.378e-01, 6.032e-02, 2.544e-03, -2.566e-03, -8.906e-03, -4.056e-02)); + r += mul(s3_6, M4(-6.231e-02, 4.043e-02, -6.667e-02, 4.605e-02, 1.026e-01, -1.503e-02, -2.104e-02, 8.022e-02, 1.437e-01, -2.244e-01, 7.704e-02, -1.969e-01, -8.773e-02, -8.024e-02, 9.243e-03, 3.532e-02)); + r += mul(s3_7, M4(6.778e-02, 8.541e-03, -8.499e-02, 5.502e-02, -1.259e-02, -1.763e-02, 5.560e-02, -1.815e-02, -1.941e-01, -3.327e-02, 2.370e-01, -3.977e-03, 1.362e-02, 3.610e-02, -1.219e-01, 9.990e-02)); + r += mul(s3_8, M4(1.069e-01, 7.105e-02, -1.385e-02, -8.030e-03, 5.911e-02, -1.568e-02, -7.717e-03, -1.807e-02, 1.325e-01, 1.617e-03, 3.105e-02, -2.326e-01, 2.131e-02, 4.202e-02, 3.413e-03, -4.040e-02)); + r += mul(s4_0, M4(8.906e-02, 1.115e-03, 3.390e-02, -2.516e-03, 5.805e-02, -3.799e-02, 4.655e-02, 2.257e-02, 4.932e-02, 2.314e-02, -4.908e-02, 2.996e-02, 4.110e-02, -4.586e-02, -6.483e-04, -5.355e-02)); + r += mul(s4_1, M4(9.657e-02, 2.625e-02, 7.033e-02, -1.720e-02, 4.770e-02, -2.590e-02, -2.325e-02, -2.599e-02, -3.552e-02, 7.924e-02, 2.860e-02, 2.673e-02, 7.177e-02, 2.816e-02, -7.265e-03, -1.118e-01)); + r += mul(s4_2, M4(4.743e-02, -3.720e-02, 8.744e-02, -1.634e-02, -8.956e-02, 2.616e-02, 1.434e-02, 4.662e-03, 5.470e-02, 3.277e-02, -1.178e-03, -1.544e-02, -1.169e-01, -9.655e-02, 4.487e-02, -1.241e-02)); + r += mul(s4_3, M4(-1.359e-02, -5.988e-02, 2.693e-02, 2.846e-02, 9.406e-02, 5.142e-02, 5.821e-02, 3.332e-02, 1.010e-01, -4.253e-03, -5.963e-02, -1.029e-03, 4.573e-02, -6.321e-02, 3.603e-02, 3.449e-02)); + r += mul(s4_4, M4(1.066e-01, 7.866e-02, 1.535e-02, -1.959e-01, -1.472e-02, -1.645e-01, -4.944e-02, 8.975e-02, -5.317e-02, -7.101e-02, -1.510e-02, 3.471e-02, 1.469e-01, 4.420e-03, -2.352e-01, 2.438e-01)); + r += mul(s4_5, M4(3.382e-02, 6.457e-02, 1.342e-01, -6.023e-02, -7.056e-02, -1.374e-02, 3.042e-02, 8.520e-03, 1.263e-01, -7.204e-02, 1.232e-01, 1.482e-02, -8.120e-02, -1.545e-01, 5.136e-02, 5.783e-02)); + r += mul(s4_6, M4(-4.757e-02, 2.582e-02, -7.227e-03, -1.299e-02, -7.072e-02, 2.237e-02, 5.139e-02, 3.069e-02, -4.099e-02, -4.838e-02, -9.954e-03, 6.936e-03, -6.913e-02, -4.324e-03, 3.736e-02, 2.256e-02)); + r += mul(s4_7, M4(8.430e-02, 4.440e-02, -6.424e-02, 4.997e-02, 1.963e-02, 3.090e-02, -1.815e-02, 2.615e-02, 3.461e-02, 5.020e-02, -4.343e-02, 2.793e-02, 3.126e-03, 6.713e-03, -1.250e-02, 8.975e-02)); + r += mul(s4_8, M4(-4.362e-02, 2.579e-02, -8.012e-03, 5.232e-02, -7.003e-02, -4.135e-02, -7.833e-03, 8.398e-03, 3.151e-02, 7.675e-02, 2.003e-02, 1.107e-02, -1.208e-01, -7.307e-02, -2.827e-02, 1.377e-02)); + r += mul(s5_0, M4(-5.169e-02, 1.376e-02, 7.169e-02, 3.810e-02, 3.989e-04, 4.194e-02, -3.871e-02, -1.040e-02, 1.715e-01, -2.089e-02, -2.049e-02, -1.927e-02, -6.435e-03, -1.077e-01, -2.022e-02, -4.709e-02)); + r += mul(s5_1, M4(8.097e-02, 1.595e-01, 6.419e-02, 6.755e-02, 9.451e-03, 3.660e-03, -1.911e-03, -3.770e-02, 6.812e-02, -1.181e-02, 7.683e-02, 9.669e-02, -1.066e-02, 9.533e-03, -6.897e-02, -3.196e-02)); + r += mul(s5_2, M4(-1.290e-01, -1.846e-01, 3.082e-02, -3.248e-02, -9.121e-02, -5.633e-02, 1.155e-02, -7.674e-02, -4.658e-02, -1.558e-01, -7.535e-03, -6.349e-02, 5.137e-02, -1.630e-02, 3.026e-02, -3.777e-02)); + r += mul(s5_3, M4(-2.546e-02, -1.736e-02, 5.887e-02, 6.525e-02, 1.211e-01, 4.194e-02, -9.258e-02, 7.958e-02, 1.622e-02, -8.012e-02, 1.706e-02, -1.745e-01, 3.421e-02, -4.461e-02, 7.792e-02, -3.323e-02)); + r += mul(s5_4, M4(-1.058e-02, 5.965e-02, 3.412e-03, -7.582e-02, 1.101e-01, 1.057e-01, 5.295e-02, 1.138e-01, 1.188e-01, -1.380e-01, -3.663e-02, -1.563e-01, 1.298e-01, -7.933e-02, -2.861e-01, 1.125e-01)); + r += mul(s5_5, M4(-1.250e-01, 1.039e-01, 9.261e-02, -6.083e-02, -6.239e-02, -1.635e-01, 1.773e-02, -7.921e-02, 7.637e-02, -4.078e-02, 2.057e-01, -1.251e-01, -3.691e-02, -3.187e-02, 7.676e-02, 1.064e-01)); + r += mul(s5_6, M4(-1.186e-01, -9.549e-03, 4.969e-02, -3.553e-02, 3.041e-02, 4.041e-02, 2.331e-02, -1.049e-02, 9.832e-03, -3.467e-02, -3.237e-02, 5.767e-02, 3.448e-02, 3.118e-02, 1.128e-02, -2.170e-02)); + r += mul(s5_7, M4(8.589e-02, 1.011e-02, 6.850e-02, -1.698e-01, 4.856e-02, 7.526e-02, -1.202e-02, -1.346e-02, 2.155e-01, 3.896e-02, -3.379e-02, -6.320e-03, 8.241e-02, -5.493e-03, -2.139e-02, -4.874e-02)); + r += mul(s5_8, M4(8.309e-03, 1.014e-01, 1.912e-04, -1.443e-02, -9.996e-02, -5.747e-02, -1.427e-02, -3.418e-02, 1.229e-02, 4.351e-02, 3.041e-02, 4.928e-02, -2.213e-02, -3.990e-02, -1.135e-02, -3.189e-03)); + r += mul(s6_0, M4(-1.363e-03, 3.767e-02, 2.673e-02, 1.090e-01, -5.906e-02, -6.840e-02, -3.299e-03, 5.067e-02, -8.695e-02, -7.577e-03, -2.977e-02, -3.850e-02, -3.534e-02, -6.116e-02, 4.473e-02, -6.983e-02)); + r += mul(s6_1, M4(-6.196e-02, -2.664e-02, 1.405e-03, -6.275e-02, 1.508e-01, 8.178e-02, -3.065e-02, 4.872e-02, -4.752e-02, 1.714e-01, 6.112e-02, 8.994e-02, 6.323e-02, -7.808e-02, 1.595e-02, -1.112e-02)); + r += mul(s6_2, M4(7.586e-02, 5.583e-02, -2.494e-02, 6.153e-02, 5.347e-02, 3.702e-02, 3.865e-02, 8.649e-02, -1.919e-02, 2.277e-01, 4.381e-02, 9.435e-02, 7.158e-02, -8.172e-02, -3.918e-02, -1.481e-02)); + r += mul(s6_3, M4(6.842e-03, 4.130e-02, 2.458e-02, -1.275e-01, -1.430e-02, 5.328e-02, 2.890e-02, -7.957e-02, 1.503e-03, -8.867e-02, 3.256e-03, 2.632e-02, -5.296e-02, -3.447e-02, 5.496e-02, 2.323e-02)); + r += mul(s6_4, M4(-2.043e-01, -1.940e-02, 1.762e-01, -2.170e-01, -4.904e-02, 5.043e-02, -8.911e-02, -1.098e-01, 6.870e-02, -4.414e-01, -1.441e-01, 1.978e-01, -6.663e-02, -1.210e-01, -9.031e-02, 2.019e-02)); + r += mul(s6_5, M4(4.577e-02, -4.071e-02, -3.518e-03, -1.440e-01, 5.879e-03, -6.269e-02, -2.873e-02, 7.060e-02, -1.238e-01, -1.681e-01, 5.140e-02, 1.019e-01, 1.375e-01, 1.099e-01, 1.521e-02, -7.575e-02)); + r += mul(s6_6, M4(-1.973e-03, -5.379e-02, 5.083e-02, -2.667e-02, 2.298e-01, 5.915e-02, -1.261e-01, 5.676e-02, -5.795e-02, -9.618e-03, 3.732e-02, -7.761e-03, 4.008e-02, -1.175e-02, 2.934e-02, 4.812e-03)); + r += mul(s6_7, M4(-3.987e-03, 9.212e-03, 3.804e-02, -8.632e-02, 2.043e-02, 4.381e-02, -5.177e-02, 1.427e-01, 1.331e-02, 3.319e-02, 6.874e-02, -1.323e-01, -1.051e-01, -7.225e-02, 8.682e-02, -7.441e-02)); + r += mul(s6_8, M4(2.258e-02, 6.892e-03, 9.611e-02, -7.609e-02, 1.090e-01, -4.216e-03, -5.880e-02, -4.078e-02, -9.083e-02, 2.742e-02, 6.515e-02, 1.319e-02, 6.738e-02, 3.922e-02, 9.416e-03, -4.067e-02)); + r += mul(s7_0, M4(1.031e-01, 1.012e-01, -4.783e-03, 4.571e-02, -5.356e-02, -2.030e-02, 5.415e-02, 4.546e-02, -2.107e-02, -5.140e-02, 2.113e-02, 5.384e-02, 5.068e-02, 3.218e-04, -1.839e-02, 7.545e-02)); + r += mul(s7_1, M4(-4.294e-02, 7.765e-02, 3.601e-02, -3.077e-02, 6.524e-02, -1.347e-02, -5.919e-02, 1.791e-02, 2.274e-01, 6.908e-02, -7.778e-02, 1.627e-01, 6.772e-02, 1.251e-01, -6.367e-02, 6.658e-02)); + r += mul(s7_2, M4(5.673e-02, -1.761e-02, -3.663e-03, 1.791e-02, -2.510e-02, 2.097e-02, -2.502e-02, -7.294e-04, 4.252e-03, 7.918e-02, 4.567e-02, 1.122e-01, -6.216e-02, -3.072e-03, 7.834e-03, 2.619e-02)); + r += mul(s7_3, M4(1.263e-01, 4.895e-02, -4.581e-02, -1.277e-01, -1.994e-01, 2.217e-01, 5.047e-02, 5.175e-02, -3.767e-02, -2.373e-02, -3.259e-02, -1.706e-01, 1.840e-01, -1.054e-01, -3.272e-02, 4.739e-02)); + r += mul(s7_4, M4(-1.134e-02, -7.667e-03, 2.331e-01, -3.020e-01, 1.627e-01, -3.228e-01, 5.315e-02, -6.137e-02, 5.719e-02, -2.665e-02, 1.806e-01, -3.680e-01, 1.204e-01, -4.809e-02, -3.139e-01, 3.382e-01)); + r += mul(s7_5, M4(1.173e-01, -3.797e-02, -7.569e-02, -9.224e-02, 1.795e-01, -2.326e-02, -1.464e-02, 1.275e-01, 1.945e-02, -1.106e-01, 1.670e-01, -1.490e-01, 9.719e-02, 1.096e-01, -6.054e-02, 1.833e-01)); + r += mul(s7_6, M4(1.252e-01, -5.389e-03, 1.982e-02, -7.582e-02, 4.581e-02, 3.180e-02, -5.152e-02, 3.405e-02, -7.310e-03, -3.880e-02, -2.935e-02, -2.388e-02, -1.040e-01, 1.890e-02, 1.067e-01, -4.400e-02)); + r += mul(s7_7, M4(6.951e-02, 2.690e-02, -2.907e-02, 4.672e-03, 6.524e-02, 6.165e-02, 1.446e-02, 1.205e-01, 1.406e-01, 7.088e-02, 1.196e-03, 7.178e-02, -1.725e-01, -1.580e-01, 1.284e-01, -1.150e-01)); + r += mul(s7_8, M4(6.039e-02, 1.726e-03, 2.689e-02, 6.749e-02, 5.954e-03, -5.211e-02, 1.374e-03, 1.189e-01, -1.196e-02, 1.119e-02, 4.533e-03, 1.074e-01, -7.028e-02, -2.072e-02, -1.303e-01, -1.357e-01)); + r += V4(-1.936e-02, -1.244e-02, 5.264e-03, 1.250e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.215e-02, -4.540e-02, -6.182e-02, 9.210e-02, -1.343e-01, -6.008e-02, 1.022e-02, 6.817e-03, 6.076e-02, -3.628e-02, 9.117e-02, -3.748e-02, 4.280e-02, 2.804e-02, 7.215e-02, -4.456e-02)); + r += mul(s0_1, M4(1.086e-01, -7.696e-02, 1.310e-01, 4.790e-02, -1.208e-02, 3.135e-02, -8.876e-04, 8.954e-02, -4.514e-02, -4.020e-02, -4.946e-02, 9.281e-03, -5.115e-02, -3.673e-02, -2.748e-02, 2.463e-03)); + r += mul(s0_2, M4(1.640e-02, 9.478e-02, 6.299e-02, -9.613e-03, -6.623e-02, -2.458e-02, -6.038e-03, -6.066e-02, 5.517e-02, -9.301e-02, -2.742e-02, -1.356e-02, 1.062e-02, -5.368e-02, -4.682e-02, 1.174e-02)); + r += mul(s0_3, M4(-1.294e-01, 6.481e-02, 6.688e-02, 4.674e-02, -2.854e-02, 2.854e-02, 1.117e-01, -6.279e-02, -2.460e-02, -6.481e-02, 3.189e-02, 1.215e-02, 2.836e-02, 3.841e-02, -3.548e-01, 1.343e-01)); + r += mul(s0_4, M4(1.699e-01, 3.216e-02, -2.921e-01, -1.732e-02, 2.208e-02, 1.366e-02, 5.443e-02, 4.008e-02, -1.648e-01, 2.962e-01, 1.915e-01, -1.546e-01, 5.420e-02, -1.610e-01, 1.169e-01, 2.131e-01)); + r += mul(s0_5, M4(-6.936e-02, 1.159e-01, 7.268e-02, -5.801e-02, 1.414e-02, 2.981e-02, 1.797e-01, 4.795e-02, 9.202e-02, -1.694e-01, -4.349e-02, -1.511e-02, 1.350e-02, -1.415e-02, -8.296e-02, 7.228e-02)); + r += mul(s0_6, M4(-3.815e-02, 5.744e-02, -6.128e-02, 7.184e-03, -2.640e-03, 2.213e-02, -9.551e-02, -2.212e-02, -2.726e-02, -1.189e-02, 2.630e-02, 6.464e-02, 1.301e-02, 3.157e-03, 6.255e-02, -7.935e-02)); + r += mul(s0_7, M4(-1.957e-01, -2.873e-02, 1.256e-01, -3.955e-03, -3.953e-02, 7.481e-02, 1.962e-01, -1.755e-01, 3.431e-01, 1.885e-02, -8.772e-02, 3.388e-02, -3.657e-01, 1.158e-01, 2.079e-01, -1.171e-01)); + r += mul(s0_8, M4(-2.954e-02, 7.288e-02, -2.538e-02, 4.632e-03, 5.899e-02, -4.524e-02, -7.829e-02, 1.084e-01, 4.310e-02, 3.622e-02, -4.349e-03, 5.737e-03, -2.865e-02, -5.213e-02, -5.996e-02, 1.157e-01)); + r += mul(s1_0, M4(-2.228e-02, 6.939e-03, -7.273e-02, -2.658e-02, -8.725e-02, -9.682e-02, -1.142e-02, 1.096e-02, -3.200e-03, 3.983e-02, 6.569e-02, -3.135e-02, 2.560e-02, -8.801e-04, 1.067e-01, -4.188e-02)); + r += mul(s1_1, M4(-1.916e-02, -5.042e-02, 5.203e-02, 1.398e-01, -5.274e-02, -1.499e-01, 4.090e-02, -1.336e-01, -1.097e-01, -2.390e-02, -5.167e-03, 3.541e-02, -1.152e-01, 1.872e-02, -6.303e-02, 4.480e-02)); + r += mul(s1_2, M4(-3.511e-02, 6.722e-02, -1.625e-02, -1.987e-01, -8.731e-03, 5.991e-02, -3.776e-03, -4.745e-02, 9.009e-02, -3.480e-02, -7.581e-02, 1.785e-02, -3.545e-02, -3.787e-02, 3.436e-02, -4.523e-02)); + r += mul(s1_3, M4(-7.645e-02, 5.618e-03, -6.662e-03, -5.060e-04, -1.354e-01, 2.537e-02, 1.650e-01, -9.984e-02, -1.982e-02, 2.930e-02, 1.762e-02, 1.023e-01, 4.403e-02, -4.789e-03, -2.220e-01, 2.075e-01)); + r += mul(s1_4, M4(1.497e-01, -5.995e-02, 4.478e-02, 1.940e-01, -1.135e-01, -3.094e-01, 1.325e-02, 3.125e-01, -1.059e-01, 1.516e-01, -2.374e-02, 1.221e-01, 1.473e-01, -3.604e-01, -2.142e-01, 2.015e-01)); + r += mul(s1_5, M4(-8.634e-02, -3.871e-03, 1.723e-02, 9.041e-02, 7.761e-03, 4.224e-02, 5.476e-03, 6.053e-02, 2.046e-03, -1.409e-01, -6.809e-02, 9.993e-02, -2.364e-02, 1.081e-01, 2.678e-02, -1.273e-02)); + r += mul(s1_6, M4(1.241e-02, 3.972e-02, -1.697e-02, -7.475e-03, 2.857e-02, 9.824e-03, -6.627e-02, -2.487e-02, 2.585e-02, -1.836e-02, -1.115e-01, -3.251e-02, -2.263e-02, 2.889e-02, 7.380e-02, -1.795e-02)); + r += mul(s1_7, M4(-6.224e-02, 9.404e-02, 1.472e-01, -9.855e-02, -2.213e-02, 7.542e-02, 1.146e-01, -1.254e-01, -2.543e-02, -7.808e-02, 1.043e-01, -1.256e-01, -1.819e-01, 8.211e-02, 1.043e-01, -3.737e-02)); + r += mul(s1_8, M4(-2.377e-03, -5.938e-03, -7.043e-02, 2.431e-02, 6.489e-02, -4.866e-02, -3.791e-02, 7.250e-02, 4.674e-02, 4.103e-02, -6.961e-03, 6.558e-02, -7.019e-02, 2.432e-02, -2.384e-02, 4.730e-02)); + r += mul(s2_0, M4(-1.052e-01, 1.714e-01, 1.920e-01, 8.695e-03, 1.192e-01, 2.749e-02, 6.753e-03, -7.678e-02, 5.081e-03, 1.391e-02, 5.604e-02, -1.924e-02, -1.572e-02, -2.414e-02, -5.768e-02, -1.455e-02)); + r += mul(s2_1, M4(-1.224e-01, -9.588e-02, -9.905e-02, 7.559e-03, -9.213e-02, 2.499e-01, 1.938e-01, 9.930e-02, -1.303e-02, -3.614e-02, -5.201e-02, -5.518e-02, 1.356e-02, -1.071e-01, 1.854e-01, -4.967e-02)); + r += mul(s2_2, M4(3.354e-02, -5.809e-03, -3.624e-02, 4.022e-02, -1.559e-01, -3.162e-02, -1.018e-01, -8.528e-02, 3.540e-02, -1.690e-02, 2.697e-03, 3.760e-02, 8.552e-02, 1.354e-03, 3.932e-02, 2.864e-02)); + r += mul(s2_3, M4(1.616e-02, 1.046e-01, 1.022e-02, -1.411e-01, -1.700e-02, 2.493e-01, 1.387e-01, -1.636e-02, 3.542e-02, -5.554e-02, -5.075e-02, -3.120e-02, -3.111e-02, 1.070e-01, 4.874e-02, -1.538e-02)); + r += mul(s2_4, M4(7.770e-02, 3.983e-02, 1.571e-01, -2.401e-02, -9.394e-03, 1.850e-01, 4.855e-01, -3.089e-01, -9.375e-03, 1.036e-02, 7.071e-02, -6.796e-02, 1.023e-01, 1.237e-03, 4.321e-01, -2.218e-01)); + r += mul(s2_5, M4(-8.584e-02, 4.444e-02, -1.155e-02, 4.413e-02, 2.076e-02, -1.746e-01, -2.055e-01, 1.220e-01, 3.257e-03, 9.627e-03, 1.818e-02, 1.360e-02, 7.268e-02, -2.056e-01, -2.268e-01, 1.396e-01)); + r += mul(s2_6, M4(1.675e-01, -1.478e-02, 4.312e-02, 8.251e-02, -8.362e-03, 2.056e-02, -5.405e-02, 1.599e-01, -3.400e-02, 1.371e-02, 3.787e-02, 1.585e-02, 8.075e-02, -9.411e-02, 1.153e-01, 9.444e-02)); + r += mul(s2_7, M4(-6.376e-02, -8.809e-02, 7.907e-03, 1.811e-02, 1.512e-02, 7.979e-02, -1.030e-01, 6.388e-02, -4.510e-02, 3.419e-02, 5.330e-03, 5.456e-02, -1.586e-01, -7.157e-02, 8.941e-02, 1.130e-01)); + r += mul(s2_8, M4(-2.671e-02, 3.746e-02, -7.254e-02, -2.724e-02, -1.712e-01, -3.450e-02, 2.189e-03, 1.064e-02, 5.478e-02, 6.619e-03, 2.169e-02, 1.149e-03, -1.430e-01, -6.315e-02, 1.675e-01, -8.328e-02)); + r += mul(s3_0, M4(-2.198e-02, 6.295e-02, -2.934e-02, -1.750e-02, -7.554e-03, 9.127e-03, -2.459e-02, -8.642e-02, -5.513e-02, 1.146e-01, -1.102e-01, 5.863e-02, 5.379e-02, 1.021e-02, 3.317e-02, -5.760e-02)); + r += mul(s3_1, M4(-1.049e-01, -8.519e-02, 7.863e-02, 1.037e-01, 4.839e-02, 4.829e-02, 6.753e-02, 1.505e-01, -2.484e-01, 2.085e-01, 4.480e-02, -2.862e-02, 4.703e-03, 1.778e-02, -2.094e-02, 1.421e-01)); + r += mul(s3_2, M4(-8.694e-03, -4.337e-02, 1.965e-03, 1.981e-03, -4.066e-02, -4.434e-02, 3.244e-02, 2.163e-02, -1.357e-01, -1.646e-01, -1.628e-01, 7.207e-02, 8.472e-03, -2.433e-02, 2.950e-02, 8.439e-02)); + r += mul(s3_3, M4(-5.182e-02, 4.552e-02, -8.238e-02, 1.243e-01, -3.660e-02, 5.286e-02, -4.190e-02, 7.928e-02, 7.770e-02, -8.976e-02, -3.774e-02, -2.373e-01, 3.765e-02, 5.051e-02, -2.150e-02, 1.144e-01)); + r += mul(s3_4, M4(-9.187e-02, 1.787e-01, 2.883e-01, -2.009e-01, -1.131e-01, 2.180e-01, 1.205e-01, -3.869e-02, -1.832e-01, 2.039e-01, 2.666e-01, -2.910e-01, 1.322e-01, 4.483e-02, 4.458e-02, -1.985e-01)); + r += mul(s3_5, M4(-3.202e-02, -2.526e-02, -1.951e-02, -1.423e-01, 5.709e-02, -4.307e-02, -4.011e-02, 2.025e-02, -5.903e-02, -7.382e-02, -2.092e-02, -2.200e-01, 8.407e-03, -1.098e-01, 4.387e-02, -9.421e-02)); + r += mul(s3_6, M4(-5.411e-02, 1.843e-03, 7.369e-02, -1.694e-01, -5.671e-02, -2.274e-02, -1.143e-02, -3.773e-02, 2.281e-01, 2.538e-01, -8.968e-02, 2.383e-01, -4.948e-02, -1.031e-02, -5.387e-03, 6.528e-02)); + r += mul(s3_7, M4(-1.095e-01, -7.263e-03, -1.382e-01, 1.252e-01, -5.913e-03, 4.645e-02, 5.962e-03, -7.470e-02, 1.801e-01, 2.301e-01, 1.172e-01, 2.978e-02, 3.583e-02, 1.851e-03, 7.472e-02, 7.639e-02)); + r += mul(s3_8, M4(-4.003e-02, 2.485e-02, -1.482e-02, 3.177e-02, -7.820e-02, 1.081e-02, 2.634e-03, 3.374e-02, 2.240e-01, 5.150e-02, -1.188e-01, 7.844e-02, 1.009e-01, 3.870e-02, 7.445e-02, 4.500e-02)); + r += mul(s4_0, M4(2.603e-02, 2.744e-02, 1.406e-01, -7.488e-02, 5.131e-02, 9.312e-03, 6.753e-02, -3.920e-02, 2.157e-02, -2.596e-02, -1.122e-01, -3.905e-02, -4.067e-03, -4.047e-02, 8.484e-02, -3.328e-03)); + r += mul(s4_1, M4(-5.090e-02, -7.445e-03, 1.373e-01, 4.901e-02, 6.356e-02, 5.066e-02, 4.342e-02, -3.282e-02, 6.329e-02, 7.936e-02, -2.880e-03, -7.725e-02, 5.561e-02, -1.732e-01, 3.595e-02, -9.497e-02)); + r += mul(s4_2, M4(-1.719e-01, 8.029e-02, 8.783e-02, 1.613e-02, 9.234e-02, 1.685e-02, -2.163e-03, 5.326e-03, 1.671e-02, -2.033e-02, 2.985e-02, -6.389e-02, 1.169e-01, -7.354e-02, -1.723e-01, -1.210e-01)); + r += mul(s4_3, M4(-3.432e-02, -1.843e-02, -9.934e-02, 1.439e-01, 5.726e-02, -4.894e-02, 1.138e-01, 1.118e-03, 1.394e-02, -2.013e-03, -8.812e-02, -1.739e-02, -6.373e-02, -8.443e-02, -1.352e-01, -2.226e-02)); + r += mul(s4_4, M4(1.253e-02, 7.655e-02, -4.875e-03, 1.001e-01, -9.879e-02, -1.880e-01, -1.733e-01, 2.513e-01, 8.727e-02, -7.056e-02, 7.586e-02, 2.894e-02, 7.644e-02, -2.775e-01, -2.627e-01, 3.969e-01)); + r += mul(s4_5, M4(2.915e-02, 6.822e-02, 6.869e-02, -3.122e-02, -8.106e-03, 4.366e-02, 4.509e-02, 5.110e-02, -9.694e-02, 2.586e-02, -5.651e-02, 2.758e-02, -4.959e-02, 6.587e-02, 2.136e-01, 8.082e-04)); + r += mul(s4_6, M4(1.144e-02, -4.841e-02, 4.750e-02, -2.561e-02, -9.700e-02, -4.014e-02, 7.104e-02, -7.030e-02, 4.325e-02, 6.899e-03, -7.420e-02, 1.027e-01, -3.142e-02, 7.203e-02, -2.908e-02, -6.599e-03)); + r += mul(s4_7, M4(-5.599e-02, -4.933e-02, 1.603e-01, -1.476e-01, 3.497e-02, 4.775e-02, -1.490e-03, 2.038e-02, -6.058e-02, -6.507e-02, -5.224e-02, -8.929e-02, -1.684e-01, -2.646e-02, 9.190e-02, -4.504e-02)); + r += mul(s4_8, M4(-1.992e-02, 3.383e-02, -5.947e-02, -2.892e-02, 8.796e-02, 3.969e-03, -2.367e-02, 2.041e-02, 7.427e-03, 3.707e-02, -5.194e-02, -5.459e-02, 1.220e-02, 8.075e-03, -1.319e-02, 9.306e-02)); + r += mul(s5_0, M4(-2.438e-02, 1.312e-02, -4.178e-03, 1.038e-01, -1.106e-02, 6.018e-02, 7.408e-02, -5.235e-02, 1.112e-02, -1.607e-02, -9.098e-02, -8.538e-02, -1.336e-01, 3.770e-02, -7.036e-02, 1.547e-01)); + r += mul(s5_1, M4(-2.156e-01, 1.579e-02, -1.175e-01, 3.351e-01, 9.171e-02, 5.327e-02, 1.050e-01, -1.305e-02, -6.839e-02, 1.260e-01, 5.867e-02, 5.210e-02, 1.380e-02, -6.424e-02, 1.335e-01, -1.469e-01)); + r += mul(s5_2, M4(-1.872e-02, -2.200e-02, -7.178e-02, 1.159e-01, 8.871e-03, 1.458e-02, -7.591e-02, 4.722e-02, -1.248e-02, -4.026e-02, -6.267e-02, 1.666e-02, -4.916e-02, -4.396e-03, -2.585e-02, -8.002e-02)); + r += mul(s5_3, M4(-5.758e-03, 2.092e-04, 6.852e-02, 4.371e-02, -1.390e-04, 2.203e-02, -8.133e-02, 5.463e-02, 8.813e-03, 2.453e-02, 4.666e-02, -1.089e-01, -1.602e-02, 3.621e-03, -6.850e-02, -1.048e-02)); + r += mul(s5_4, M4(-1.057e-01, -1.195e-01, 1.295e-03, -1.879e-01, 4.307e-02, -3.808e-02, 2.598e-01, 6.048e-02, -7.240e-02, 1.263e-01, 6.689e-02, -6.630e-02, -8.146e-02, -1.077e-01, 1.230e-02, 3.512e-02)); + r += mul(s5_5, M4(-2.763e-02, 1.045e-01, 9.368e-02, -2.891e-02, -1.262e-01, -2.279e-02, -9.170e-02, -5.480e-03, -6.894e-02, 2.340e-01, 1.316e-01, 9.275e-02, 7.497e-03, 8.773e-02, 1.450e-01, -4.171e-02)); + r += mul(s5_6, M4(2.338e-02, 2.649e-03, 1.191e-02, 1.114e-01, 8.369e-03, 2.264e-02, 5.923e-02, -8.366e-02, 2.113e-02, -1.026e-01, 4.153e-03, 1.457e-02, 8.502e-03, 8.433e-02, 1.364e-02, 9.216e-02)); + r += mul(s5_7, M4(2.501e-01, 4.072e-02, 4.366e-02, 1.529e-01, 7.336e-03, -1.041e-03, 6.438e-02, 4.095e-02, 1.695e-01, 4.772e-02, 3.436e-03, -7.710e-02, -1.302e-03, 6.680e-02, 1.340e-02, 7.820e-02)); + r += mul(s5_8, M4(1.079e-01, -2.753e-02, -8.011e-02, 5.671e-02, 2.691e-02, 1.961e-02, -5.833e-02, 7.675e-02, 4.570e-02, 1.346e-02, -1.609e-02, -1.391e-01, 1.159e-01, -2.085e-03, -4.314e-02, 1.949e-01)); + r += mul(s6_0, M4(1.760e-02, -5.478e-02, -1.305e-02, -2.815e-02, -5.564e-02, -2.450e-03, -3.298e-02, 1.908e-02, 1.709e-02, -5.423e-02, -7.570e-03, -9.201e-02, -2.826e-02, 6.207e-02, 1.535e-01, -7.470e-03)); + r += mul(s6_1, M4(-1.485e-02, 1.209e-01, 1.303e-01, -1.392e-01, 4.982e-02, -1.113e-01, -5.884e-02, 1.599e-02, 1.865e-01, 1.140e-01, 3.929e-02, 4.492e-02, -1.871e-01, -5.820e-02, -4.093e-03, -1.097e-01)); + r += mul(s6_2, M4(-6.840e-02, 6.539e-03, 7.601e-02, 1.108e-01, 6.659e-02, 1.486e-02, -7.902e-02, 3.357e-02, 2.116e-01, 2.026e-02, -7.777e-02, 1.355e-01, -9.171e-02, 1.374e-02, 4.920e-02, 3.573e-02)); + r += mul(s6_3, M4(-1.232e-02, -4.098e-02, 8.025e-02, -2.882e-02, 4.008e-03, 6.138e-02, -2.122e-01, 1.824e-01, 3.671e-02, -9.563e-02, 9.838e-02, -1.417e-02, 4.926e-02, -1.583e-02, -9.245e-02, 8.478e-02)); + r += mul(s6_4, M4(-4.521e-02, -2.417e-02, 3.265e-02, -2.099e-03, 6.462e-02, 9.975e-02, 1.865e-02, -3.928e-02, -2.705e-02, -2.945e-01, 1.402e-02, 2.707e-01, 2.218e-01, -1.392e-01, -4.710e-02, 3.750e-02)); + r += mul(s6_5, M4(8.548e-02, -3.462e-03, -4.457e-02, -8.958e-02, 6.745e-02, 3.801e-02, 3.625e-02, 2.756e-03, 3.118e-02, -7.609e-02, 1.555e-01, 7.602e-02, 1.309e-01, 1.372e-01, -4.452e-02, -1.374e-01)); + r += mul(s6_6, M4(-8.046e-02, 2.717e-02, -1.334e-01, 3.780e-03, -2.606e-02, -8.003e-02, 1.969e-02, -1.529e-01, -2.649e-02, 7.647e-02, -3.141e-03, 6.980e-02, 2.159e-02, -1.673e-02, -1.130e-01, -1.533e-02)); + r += mul(s6_7, M4(8.020e-02, -2.917e-02, -1.353e-01, -1.353e-02, -8.177e-02, -5.781e-02, -5.298e-02, 3.476e-02, -2.233e-02, 9.911e-02, -2.051e-02, -4.530e-02, -7.769e-02, 5.231e-02, -1.739e-02, 1.335e-02)); + r += mul(s6_8, M4(1.209e-01, 2.396e-02, -2.819e-02, 5.762e-02, 1.112e-01, 2.399e-02, 7.932e-02, 1.191e-01, 1.207e-01, 1.213e-02, -4.203e-02, 3.251e-02, -9.532e-02, -1.213e-02, 5.176e-02, 7.673e-03)); + r += mul(s7_0, M4(-9.347e-02, -2.286e-02, -8.569e-02, 5.922e-02, -1.050e-02, 1.615e-02, -6.755e-02, 8.917e-02, 5.177e-02, -8.341e-02, 8.778e-02, -1.525e-01, -8.610e-02, 8.524e-03, 3.540e-02, 2.440e-01)); + r += mul(s7_1, M4(-4.814e-02, 3.439e-02, 2.050e-01, -7.712e-02, -5.364e-02, -8.788e-02, -1.331e-01, 1.023e-01, -1.185e-01, -1.326e-01, -3.383e-01, 6.328e-02, -1.424e-01, -1.064e-01, -2.044e-01, -2.384e-02)); + r += mul(s7_2, M4(-7.306e-02, -9.739e-02, -7.158e-02, -6.684e-02, 1.426e-02, 1.303e-03, 1.836e-02, 4.949e-03, 9.151e-02, 7.248e-02, 7.497e-03, 2.724e-01, -6.642e-03, 6.167e-03, 3.202e-04, -1.025e-03)); + r += mul(s7_3, M4(-1.537e-02, -9.354e-02, 4.753e-02, 2.086e-02, 2.520e-02, 2.105e-03, -4.706e-02, -4.345e-02, -4.939e-02, 2.905e-02, 9.454e-02, 1.517e-02, 1.502e-02, 4.808e-02, -1.780e-01, 2.719e-01)); + r += mul(s7_4, M4(-9.833e-02, 3.796e-02, 2.619e-04, 8.518e-02, -8.701e-03, 9.340e-02, 4.374e-02, -1.319e-02, 1.728e-01, 8.227e-02, -1.186e-01, 4.184e-01, 2.972e-01, -3.795e-01, -4.548e-01, 7.204e-02)); + r += mul(s7_5, M4(-8.860e-02, -2.519e-02, -6.967e-02, -2.032e-02, 6.664e-03, 6.918e-02, 1.030e-01, 1.352e-01, 6.802e-02, -6.259e-04, 3.941e-02, -8.391e-02, 1.389e-01, 1.509e-01, -1.392e-01, 2.699e-01)); + r += mul(s7_6, M4(3.628e-02, -4.122e-02, -1.470e-01, 2.078e-02, -3.632e-02, -6.245e-02, -5.451e-02, 2.176e-01, 1.773e-03, 3.551e-03, -8.369e-02, -1.283e-03, 7.591e-02, 6.184e-02, 3.148e-02, 4.768e-02)); + r += mul(s7_7, M4(-1.721e-03, -8.997e-02, -1.011e-01, -2.121e-03, 1.935e-01, 2.244e-02, 3.641e-03, 2.111e-02, -2.126e-01, -1.150e-01, 2.173e-02, -2.666e-01, -2.086e-01, 6.622e-02, 6.155e-03, 1.749e-01)); + r += mul(s7_8, M4(6.197e-02, -3.724e-02, 5.872e-02, 3.360e-02, 7.349e-02, -1.592e-02, 5.272e-03, -2.045e-02, 9.958e-02, 1.306e-02, -4.321e-02, -2.412e-02, -1.620e-01, -1.109e-01, 2.305e-02, -9.959e-02)); + r += V4(1.116e-03, 1.909e-02, 1.793e-02, -7.700e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.919e-02, -7.925e-02, 1.392e-03, 7.669e-02, 4.054e-02, -6.717e-02, -2.192e-02, 4.973e-02, -2.415e-02, 3.759e-02, -2.293e-03, 2.573e-02, -5.178e-02, -4.537e-02, -3.029e-03, 8.999e-02)); + r += mul(s0_1, M4(2.788e-02, 1.615e-01, -9.678e-02, 5.520e-02, 9.562e-02, -3.281e-02, 2.804e-02, -1.793e-03, -4.934e-02, -7.966e-02, 3.871e-02, 1.981e-02, 4.654e-03, -5.576e-02, -1.091e-01, 6.611e-02)); + r += mul(s0_2, M4(3.869e-02, 7.204e-02, -1.226e-02, 4.877e-02, -8.615e-02, -1.289e-02, -7.947e-03, 9.051e-02, -3.714e-02, 1.279e-02, 6.112e-02, -6.459e-02, -9.514e-03, 1.510e-02, 4.982e-02, -2.587e-02)); + r += mul(s0_3, M4(-2.231e-03, 2.710e-02, -4.442e-03, 3.404e-02, 1.433e-02, -3.752e-02, -4.109e-02, 1.475e-01, 6.381e-02, 2.638e-02, -1.825e-02, 6.887e-03, -8.052e-02, -2.248e-01, 7.873e-02, -7.883e-03)); + r += mul(s0_4, M4(3.477e-02, 1.055e-01, 4.676e-02, 4.323e-02, -1.675e-01, 3.107e-01, 1.456e-01, 9.397e-03, -2.638e-01, 2.122e-02, 1.550e-01, -1.148e-01, 1.629e-01, 2.266e-01, 4.130e-03, 1.362e-01)); + r += mul(s0_5, M4(5.124e-02, -7.977e-03, 2.135e-02, 4.652e-02, 1.013e-01, -4.334e-03, -4.985e-02, 2.150e-02, 1.560e-02, 1.660e-05, 6.906e-02, 8.347e-03, -4.262e-02, 8.572e-02, 8.665e-03, 1.076e-01)); + r += mul(s0_6, M4(-1.136e-02, 1.071e-02, 6.784e-02, 6.999e-02, -2.730e-02, -4.259e-02, 6.931e-03, 9.898e-02, -5.586e-02, -3.762e-02, -1.208e-01, -6.106e-02, -3.304e-02, -1.983e-02, 1.497e-01, -6.972e-02)); + r += mul(s0_7, M4(-4.612e-02, 1.210e-01, -1.136e-01, -1.952e-01, -1.998e-02, 1.264e-01, -1.409e-01, 1.728e-02, 9.948e-02, -2.794e-02, 2.301e-01, 6.005e-02, -9.681e-02, 2.339e-01, -1.852e-01, -3.059e-01)); + r += mul(s0_8, M4(-7.229e-03, 1.016e-02, 5.459e-02, 9.762e-02, -4.530e-02, 9.182e-02, 6.607e-02, 1.315e-01, 2.363e-02, -1.151e-02, 7.571e-03, -1.558e-01, -2.558e-02, -2.231e-02, 1.212e-01, 4.068e-03)); + r += mul(s1_0, M4(1.568e-03, -6.265e-02, -3.742e-02, 3.680e-02, 4.999e-02, 6.828e-02, 8.172e-03, 6.230e-02, -3.445e-02, 1.025e-02, 6.826e-04, 1.275e-01, -1.594e-02, 8.525e-03, -6.673e-02, 1.133e-01)); + r += mul(s1_1, M4(7.044e-02, 1.775e-01, 1.069e-01, 1.081e-01, 3.330e-02, -1.107e-01, -4.276e-02, -1.295e-01, 4.836e-02, -5.847e-02, 3.050e-02, 1.332e-01, -7.048e-02, -5.868e-02, 6.968e-02, 1.823e-02)); + r += mul(s1_2, M4(1.368e-02, 4.063e-02, 1.168e-01, 3.856e-02, 1.630e-03, 1.284e-02, 1.284e-02, 1.563e-03, -5.222e-02, 4.899e-02, -1.622e-02, -1.790e-03, -1.195e-02, 3.564e-02, 4.695e-04, 1.405e-02)); + r += mul(s1_3, M4(1.593e-03, -3.215e-02, 7.395e-02, -7.133e-02, 7.484e-03, 1.356e-01, -9.938e-02, 3.818e-02, -1.549e-02, 2.184e-02, -1.766e-01, 6.092e-02, -6.955e-02, -6.860e-02, 1.052e-02, 9.732e-02)); + r += mul(s1_4, M4(1.515e-01, 1.307e-01, -2.911e-01, 3.438e-02, -3.025e-02, -4.919e-02, 1.980e-01, -1.327e-01, -6.091e-02, -1.083e-01, -2.129e-01, -1.964e-01, 1.909e-01, 1.199e-01, 1.115e-01, 1.683e-01)); + r += mul(s1_5, M4(4.515e-02, 2.601e-02, -1.402e-02, 4.323e-03, 6.401e-02, -4.858e-02, 1.265e-01, -3.195e-03, 1.839e-03, -7.002e-02, -3.491e-02, 2.607e-01, -4.128e-02, -1.300e-02, -1.078e-01, 1.049e-01)); + r += mul(s1_6, M4(4.349e-03, 2.439e-02, 1.118e-01, -6.831e-02, 1.013e-01, -6.029e-02, -2.105e-02, 3.956e-02, -4.134e-02, -3.690e-02, 1.320e-02, 3.823e-02, -7.655e-02, 4.842e-02, 1.105e-01, 4.895e-02)); + r += mul(s1_7, M4(-8.619e-02, 1.346e-01, -1.204e-01, -2.886e-02, 8.199e-02, 1.387e-01, 1.203e-01, 2.969e-02, -6.761e-02, -5.815e-02, -8.640e-02, 8.694e-03, -2.326e-02, 1.259e-01, -1.610e-01, -5.322e-02)); + r += mul(s1_8, M4(-2.302e-02, -1.948e-02, 5.414e-02, 1.489e-01, -3.227e-03, 4.110e-02, 1.508e-01, -7.698e-02, -3.182e-02, 9.852e-02, 1.671e-03, -9.501e-03, -2.827e-02, -5.427e-02, 4.112e-02, 1.148e-01)); + r += mul(s2_0, M4(-2.653e-02, 1.115e-01, 8.772e-02, -7.303e-02, 7.044e-02, 1.019e-01, -1.103e-02, 4.589e-02, -9.427e-04, 5.866e-02, -1.047e-02, 3.651e-02, 8.821e-02, 3.674e-02, 1.745e-01, -2.124e-01)); + r += mul(s2_1, M4(-8.092e-03, -1.616e-01, -2.955e-02, -5.523e-02, 2.648e-02, 2.296e-01, 1.767e-01, -1.016e-01, -1.790e-02, -2.629e-02, 6.654e-03, 3.217e-03, -6.323e-02, -8.076e-02, 1.921e-02, -1.112e-01)); + r += mul(s2_2, M4(-1.211e-02, -2.244e-02, 3.096e-02, 1.203e-01, -2.039e-02, -1.193e-01, 2.880e-02, -3.508e-02, -2.365e-02, -1.497e-02, -4.153e-02, -4.644e-02, 8.814e-02, -7.433e-03, 9.270e-02, -9.398e-02)); + r += mul(s2_3, M4(-1.593e-01, -1.226e-01, 1.925e-01, -5.361e-02, -8.462e-02, 2.094e-01, 5.406e-02, -1.166e-01, -5.615e-03, 6.467e-02, -9.683e-02, 2.345e-02, -9.391e-02, -1.485e-01, 1.865e-01, -2.633e-02)); + r += mul(s2_4, M4(-1.018e-01, 1.159e-01, 1.366e-01, 1.475e-01, -1.902e-01, 5.375e-01, -1.445e-01, 1.699e-01, 5.062e-02, 3.539e-02, 5.849e-02, 2.295e-02, 4.430e-02, 5.493e-02, -2.390e-01, 3.035e-01)); + r += mul(s2_5, M4(2.495e-02, 2.272e-03, 3.160e-02, 3.626e-02, -1.549e-01, -3.333e-02, 2.068e-01, -7.810e-02, 3.562e-02, 2.132e-02, -5.171e-02, -2.567e-02, 3.387e-02, 6.526e-02, 3.167e-01, -5.440e-02)); + r += mul(s2_6, M4(1.138e-01, -5.602e-02, 1.855e-02, -3.468e-02, -3.070e-02, 6.960e-02, -5.956e-02, -7.770e-02, -4.455e-03, 2.730e-02, -1.527e-02, -3.028e-03, 6.344e-02, 4.287e-02, -1.154e-02, -9.056e-02)); + r += mul(s2_7, M4(2.557e-02, -7.715e-02, 1.273e-01, 2.954e-02, 8.715e-02, -7.615e-02, -6.236e-02, 1.199e-01, -5.666e-04, 3.760e-02, -1.870e-02, -5.789e-02, -4.855e-02, 1.091e-01, 8.060e-02, -2.648e-01)); + r += mul(s2_8, M4(-1.947e-02, 1.035e-02, -1.053e-01, -4.396e-02, 1.595e-02, 2.110e-03, 2.073e-02, -2.046e-01, -5.264e-04, -1.792e-03, 3.973e-02, 3.448e-02, 1.158e-01, -9.957e-02, 2.655e-01, -2.842e-01)); + r += mul(s3_0, M4(-2.472e-02, 1.291e-02, 7.998e-02, 1.255e-01, 1.699e-02, 7.149e-02, -4.039e-02, 8.269e-02, 2.302e-01, 2.576e-01, 7.557e-02, -1.364e-01, -2.455e-02, -2.370e-02, 5.928e-02, 1.647e-02)); + r += mul(s3_1, M4(9.621e-02, -7.109e-02, -2.288e-03, -9.891e-04, 4.067e-02, 1.103e-02, -7.506e-03, 2.783e-03, 1.373e-01, 1.752e-01, -7.984e-02, 1.528e-01, -4.504e-02, 4.944e-02, 9.320e-02, -2.287e-02)); + r += mul(s3_2, M4(3.217e-03, -6.484e-03, 7.505e-02, 6.081e-02, -9.189e-03, -1.712e-02, -3.935e-02, 1.990e-02, 4.870e-03, -2.051e-02, 5.300e-02, -8.601e-02, -2.530e-02, 1.760e-02, 3.964e-03, 4.869e-02)); + r += mul(s3_3, M4(1.133e-01, -4.615e-02, -6.186e-02, -1.925e-01, 5.439e-02, 4.079e-02, -4.954e-03, 7.498e-02, -1.841e-01, 1.061e-01, 1.851e-01, 2.452e-02, -1.568e-02, 1.887e-02, -7.677e-02, -2.606e-02)); + r += mul(s3_4, M4(-9.875e-02, 1.220e-01, 5.520e-03, -6.611e-03, -3.200e-02, 2.402e-01, -1.204e-01, 9.334e-02, -6.591e-02, 8.266e-02, -1.431e-01, -3.456e-02, -7.360e-03, 5.965e-02, 1.024e-01, 1.577e-01)); + r += mul(s3_5, M4(-2.872e-03, -1.813e-02, -1.353e-02, 4.279e-02, -2.792e-02, -1.463e-02, 4.871e-02, 4.340e-02, -1.887e-01, 2.890e-02, -6.791e-02, 3.276e-03, -1.405e-02, 2.299e-02, -1.046e-01, -2.743e-02)); + r += mul(s3_6, M4(-1.520e-02, 9.810e-03, 1.150e-01, -6.546e-02, 2.204e-02, 5.595e-04, -2.268e-02, -1.272e-01, 1.290e-01, -2.221e-02, 4.320e-02, 1.093e-01, -8.626e-02, -1.002e-02, -9.400e-02, -3.952e-02)); + r += mul(s3_7, M4(-5.020e-02, -4.450e-02, -1.630e-02, 6.843e-02, 9.166e-03, 3.361e-02, -1.517e-02, -5.835e-02, -1.530e-01, 1.442e-01, 8.440e-02, 2.064e-01, 3.143e-02, -2.700e-02, 9.761e-02, 9.864e-03)); + r += mul(s3_8, M4(5.993e-03, -6.822e-02, -2.974e-02, -1.074e-01, 2.007e-02, -6.954e-03, -1.659e-03, -5.546e-02, 3.050e-02, -7.359e-02, 1.188e-01, 1.087e-01, -9.732e-03, 4.313e-02, -1.093e-01, -1.936e-02)); + r += mul(s4_0, M4(4.651e-02, 5.734e-02, -4.370e-02, 1.303e-02, -3.990e-02, -5.264e-02, -5.082e-02, -4.679e-02, 2.346e-02, -1.208e-01, 9.092e-02, 4.726e-02, 1.249e-03, -4.455e-02, 6.476e-02, -6.071e-02)); + r += mul(s4_1, M4(1.019e-01, 7.586e-02, 1.363e-02, 6.434e-04, 4.875e-02, -8.355e-03, -4.770e-02, -4.833e-02, -2.720e-02, 2.546e-02, 6.016e-02, 1.284e-01, -4.937e-02, -2.311e-02, -7.206e-02, -7.221e-02)); + r += mul(s4_2, M4(7.047e-02, 3.157e-02, -4.150e-02, -1.577e-01, -2.810e-03, 5.189e-02, 3.640e-02, 1.522e-01, 4.752e-02, -5.240e-02, 2.253e-02, 4.064e-02, -1.003e-01, -2.042e-02, 1.207e-01, -7.717e-02)); + r += mul(s4_3, M4(-2.889e-02, -1.341e-02, -4.202e-02, -7.141e-02, 1.300e-01, 1.342e-01, -1.211e-01, -8.244e-02, 1.337e-02, -7.162e-02, 1.231e-02, -3.629e-02, 1.921e-02, -6.773e-02, -7.825e-02, 9.541e-03)); + r += mul(s4_4, M4(-7.117e-02, 1.538e-01, 1.473e-01, 1.600e-03, -2.881e-01, -2.482e-01, 1.880e-01, -1.685e-01, 1.229e-01, 1.265e-01, -4.801e-02, 4.958e-02, 1.078e-01, -7.656e-02, -1.398e-01, 7.078e-02)); + r += mul(s4_5, M4(8.922e-02, 2.713e-02, -1.388e-01, -2.041e-02, 2.752e-02, 1.451e-01, 2.079e-01, 9.657e-03, 7.133e-02, 6.334e-02, -1.286e-02, -1.663e-01, -1.750e-02, 9.428e-02, 3.086e-02, -3.345e-03)); + r += mul(s4_6, M4(-1.315e-02, 4.226e-03, 1.794e-02, -3.194e-02, 1.228e-02, 9.752e-02, -3.953e-02, -8.082e-02, 4.687e-02, -3.622e-02, 1.084e-02, 3.371e-02, -3.397e-02, -7.870e-03, 9.080e-02, 5.172e-02)); + r += mul(s4_7, M4(-5.259e-02, 1.199e-01, -3.419e-02, -7.587e-02, 4.479e-02, -1.376e-01, 2.607e-01, 1.360e-01, -3.254e-03, -3.870e-02, -8.618e-02, 3.234e-02, -1.817e-02, -3.851e-03, -1.145e-01, -1.043e-01)); + r += mul(s4_8, M4(-5.326e-02, 1.129e-02, 3.093e-03, -1.843e-02, 1.400e-02, 7.148e-02, 2.607e-03, -6.451e-02, 6.076e-03, 3.317e-03, -2.717e-02, 5.046e-02, -8.958e-03, 7.050e-03, 4.207e-02, 8.876e-02)); + r += mul(s5_0, M4(2.586e-02, -2.349e-02, 2.932e-02, -5.166e-02, -7.897e-03, -7.043e-03, 2.976e-02, 6.619e-02, 4.217e-02, -4.423e-02, -1.386e-02, -4.745e-02, -5.444e-02, -5.169e-02, 7.885e-02, -1.084e-01)); + r += mul(s5_1, M4(8.738e-03, 7.152e-02, 7.845e-02, -2.044e-01, -8.195e-03, 5.205e-02, -4.538e-02, 3.715e-02, 9.693e-02, 7.824e-02, 2.084e-02, -2.676e-02, -4.720e-03, 1.107e-02, -1.449e-01, 1.888e-01)); + r += mul(s5_2, M4(-5.442e-02, 4.418e-02, 1.137e-01, 1.856e-02, -1.403e-02, 1.813e-02, 9.986e-02, 7.905e-02, -5.202e-02, -2.849e-02, -1.480e-02, -6.858e-02, -3.246e-02, -1.286e-02, 8.856e-02, -1.391e-01)); + r += mul(s5_3, M4(-1.563e-02, -4.548e-02, 1.092e-01, 1.109e-01, -5.316e-02, -3.517e-02, 3.365e-02, -6.571e-02, 5.587e-02, 3.823e-02, -6.547e-02, -4.291e-02, -1.775e-02, -1.006e-01, -2.846e-02, -5.472e-02)); + r += mul(s5_4, M4(-3.610e-01, -9.005e-02, -6.767e-03, -4.964e-02, 4.135e-02, 5.995e-02, -4.848e-02, -2.138e-01, -8.977e-02, 1.354e-01, 9.247e-02, -9.705e-02, -4.618e-02, -1.068e-01, -2.381e-01, -7.140e-02)); + r += mul(s5_5, M4(-7.878e-02, 3.430e-02, -1.457e-01, -2.780e-02, 2.820e-02, 1.043e-01, 2.156e-01, 1.127e-03, 1.850e-01, 9.750e-02, 4.406e-02, -1.432e-02, -9.345e-02, 1.137e-01, -1.125e-01, 6.013e-03)); + r += mul(s5_6, M4(-4.722e-02, 3.935e-02, -6.742e-02, -3.255e-02, 2.962e-02, -2.233e-02, 7.806e-02, 3.130e-03, -2.012e-02, -1.018e-02, -1.552e-01, -7.894e-03, -4.820e-03, -4.500e-04, 1.913e-02, 5.634e-02)); + r += mul(s5_7, M4(4.246e-02, -4.113e-02, 1.959e-01, 1.608e-01, 2.533e-02, 7.140e-02, 3.818e-02, 2.888e-02, 1.182e-01, 3.901e-02, 5.552e-02, 5.373e-03, 3.537e-02, 3.370e-02, -6.001e-02, 6.745e-02)); + r += mul(s5_8, M4(-2.812e-02, -7.215e-02, -6.015e-02, -7.893e-02, -2.153e-02, 3.129e-02, 2.694e-02, 1.638e-02, -9.557e-03, 5.442e-02, -1.113e-01, -4.262e-02, 2.120e-02, 2.377e-02, 4.144e-02, 9.725e-02)); + r += mul(s6_0, M4(6.638e-02, 1.293e-01, -3.926e-02, -1.474e-02, -2.119e-02, -9.355e-02, 1.058e-02, 3.320e-02, 4.711e-03, 1.100e-02, -3.412e-02, 4.699e-02, 7.911e-03, 3.650e-02, 9.713e-03, 3.565e-03)); + r += mul(s6_1, M4(-1.812e-02, 1.561e-01, -2.708e-02, -6.048e-02, 3.477e-02, -1.129e-01, -8.815e-02, -1.316e-01, 6.414e-02, 1.344e-01, 1.704e-01, 1.308e-01, 1.101e-02, -2.568e-02, 5.194e-02, -1.636e-01)); + r += mul(s6_2, M4(2.318e-02, -1.935e-02, -8.109e-02, -1.403e-01, 6.967e-03, 6.255e-02, 8.782e-02, 4.827e-02, -4.230e-02, 4.459e-02, 1.309e-02, -1.259e-01, 1.007e-03, -9.404e-03, -3.368e-02, -5.957e-02)); + r += mul(s6_3, M4(1.722e-02, -3.367e-02, -3.620e-02, 5.109e-02, -4.373e-02, -6.842e-02, -5.616e-02, 8.769e-03, 3.662e-02, 1.232e-01, 1.555e-02, 8.083e-02, -3.390e-02, -6.567e-02, 3.499e-02, -3.386e-02)); + r += mul(s6_4, M4(3.609e-02, 2.455e-01, 1.008e-01, 1.145e-01, -2.302e-01, -8.653e-02, 1.462e-01, 2.283e-01, 8.103e-02, 2.725e-02, -1.431e-01, 1.184e-01, -1.233e-01, -1.137e-03, -4.487e-02, 8.764e-02)); + r += mul(s6_5, M4(-1.343e-01, -6.138e-02, -7.601e-02, 9.977e-02, -4.357e-02, 2.132e-01, 1.662e-02, -4.598e-03, 5.994e-02, 8.297e-02, 4.904e-03, -1.470e-01, 4.775e-02, 6.281e-02, -6.231e-02, 1.806e-02)); + r += mul(s6_6, M4(-3.251e-02, -1.148e-01, -4.861e-03, 5.381e-02, 1.793e-02, -6.575e-02, 2.040e-01, 8.277e-02, -4.758e-02, 2.284e-03, -3.811e-02, 9.341e-02, 2.977e-02, -6.401e-02, 6.955e-02, 5.232e-02)); + r += mul(s6_7, M4(2.862e-02, -7.521e-02, 1.306e-02, 7.799e-02, 5.028e-02, -7.196e-02, 8.181e-02, -2.600e-01, 9.961e-02, 2.078e-02, -1.647e-02, 1.198e-01, -4.754e-03, -4.463e-02, 1.081e-01, 8.039e-03)); + r += mul(s6_8, M4(3.599e-02, -5.464e-02, 2.697e-02, 3.897e-02, 3.239e-02, 8.551e-02, -2.349e-02, 8.737e-02, 3.774e-02, 2.686e-02, -3.910e-02, -4.101e-02, -4.239e-02, -5.345e-03, -1.815e-02, -1.052e-02)); + r += mul(s7_0, M4(6.361e-02, 1.666e-02, 6.952e-02, -7.429e-02, 2.763e-02, -3.904e-02, -4.123e-02, -4.483e-02, 4.661e-02, 1.431e-01, -1.064e-01, 2.846e-02, -1.981e-02, -7.592e-02, -8.377e-02, 2.062e-02)); + r += mul(s7_1, M4(1.801e-02, 4.869e-02, -1.355e-01, -8.545e-02, 3.710e-02, -1.189e-01, 2.334e-02, -1.174e-01, 2.970e-01, -2.013e-01, 2.017e-01, -2.807e-01, 6.321e-03, -1.310e-01, 6.355e-02, -3.919e-02)); + r += mul(s7_2, M4(-6.371e-02, -5.002e-02, -7.557e-02, -1.677e-01, -1.730e-02, 3.528e-02, -1.888e-02, 3.005e-02, -4.489e-02, 1.365e-01, -1.371e-01, -2.878e-03, -3.842e-02, -7.498e-03, 4.025e-02, 1.424e-01)); + r += mul(s7_3, M4(-2.998e-02, -1.091e-01, 4.644e-02, -7.317e-02, -3.092e-02, 5.669e-02, -5.759e-02, 2.531e-02, -2.543e-02, 1.224e-01, -3.314e-02, 7.981e-02, -3.281e-02, -2.382e-01, -6.380e-02, 5.425e-02)); + r += mul(s7_4, M4(9.577e-02, 1.862e-01, -8.069e-02, 3.945e-02, -3.530e-01, 1.752e-01, 1.975e-01, -9.724e-02, 1.410e-01, 1.204e-01, 1.678e-01, 3.374e-01, 5.233e-02, -1.467e-01, -1.658e-01, 1.150e-01)); + r += mul(s7_5, M4(-1.004e-03, -1.027e-01, 4.311e-02, -1.368e-01, 9.128e-02, 2.698e-02, -9.869e-03, -2.861e-01, 1.421e-01, 9.835e-02, 1.079e-01, -1.416e-01, 9.540e-02, 2.884e-02, 1.069e-02, 1.558e-01)); + r += mul(s7_6, M4(7.539e-02, -7.728e-02, 2.320e-02, -2.850e-02, -4.240e-02, 6.454e-02, 6.304e-02, -1.285e-01, 2.739e-04, -2.594e-02, 7.202e-02, -3.416e-02, -2.743e-02, -1.126e-01, -3.591e-02, 1.367e-01)); + r += mul(s7_7, M4(-7.965e-03, -6.042e-02, 1.168e-02, -1.293e-01, 2.336e-02, 2.117e-01, 7.645e-02, 2.310e-02, -9.815e-02, 1.744e-04, -1.813e-01, -3.124e-02, -1.347e-01, -1.265e-01, -1.931e-01, 1.153e-01)); + r += mul(s7_8, M4(2.452e-02, 3.323e-02, -4.251e-02, -1.928e-01, 1.504e-02, 2.942e-02, -2.489e-02, -1.146e-01, 4.450e-02, -2.973e-02, 9.084e-02, -1.068e-01, 3.174e-02, -7.544e-02, 1.781e-01, -6.189e-02)); + r += V4(-6.267e-03, 7.941e-03, 4.343e-03, 7.643e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.242e-02, -3.043e-02, 1.724e-03, -2.338e-02, -1.597e-01, -2.985e-02, -1.658e-02, 1.889e-02, 3.976e-02, 2.116e-02, 7.945e-02, -3.781e-02, 7.506e-02, -4.380e-03, 3.268e-02, 2.745e-02)); + r += mul(s0_1, M4(-1.011e-01, 6.166e-02, -3.718e-02, 1.284e-02, 7.213e-02, -2.187e-02, 1.332e-02, 8.331e-02, 1.490e-01, -2.006e-02, 3.534e-02, 3.973e-02, -5.176e-02, -1.232e-02, 8.295e-03, -4.230e-02)); + r += mul(s0_2, M4(7.887e-03, -1.568e-02, -1.862e-02, -1.451e-02, -8.544e-02, -1.506e-02, 3.749e-02, 2.105e-02, 8.908e-04, 2.095e-02, 3.198e-02, -4.995e-02, -2.329e-02, 4.272e-02, -1.463e-02, 1.879e-02)); + r += mul(s0_3, M4(1.510e-01, 6.255e-02, 1.248e-02, -1.567e-02, 1.991e-02, 1.207e-01, 7.475e-02, -5.247e-02, -3.974e-02, 1.786e-02, 6.710e-02, -2.180e-02, 3.164e-01, -1.499e-01, 3.093e-02, 4.123e-02)); + r += mul(s0_4, M4(-5.023e-02, 7.105e-02, -2.487e-01, 9.555e-02, -1.562e-01, 2.903e-01, 1.295e-01, 1.401e-02, 1.188e-01, 1.839e-01, -1.013e-01, -1.282e-02, -1.435e-02, -1.721e-01, 2.703e-01, -1.190e-01)); + r += mul(s0_5, M4(3.173e-02, -1.025e-01, 4.968e-02, 3.047e-02, 7.803e-02, -3.848e-02, 6.896e-02, 5.594e-02, -2.399e-02, 1.367e-02, 1.396e-02, -6.260e-02, -1.713e-01, -2.355e-02, 2.674e-02, -1.170e-03)); + r += mul(s0_6, M4(-1.532e-02, -5.737e-02, 2.050e-02, 5.561e-02, -3.862e-02, -6.803e-02, -2.710e-03, -3.916e-02, 1.681e-01, -4.161e-02, 1.610e-02, 6.376e-02, -1.845e-01, 1.477e-01, -3.059e-02, -1.877e-01)); + r += mul(s0_7, M4(3.860e-03, 1.489e-01, -1.101e-01, 1.475e-02, -2.062e-02, 1.160e-01, -4.358e-02, 4.451e-02, -9.945e-02, -1.303e-01, 3.533e-02, -2.152e-01, 2.757e-01, 1.130e-01, 3.199e-03, 1.912e-01)); + r += mul(s0_8, M4(1.013e-02, -7.638e-02, 1.709e-02, 1.420e-02, -8.507e-02, -1.362e-01, -1.667e-01, 1.247e-01, -1.274e-02, -6.610e-03, 1.401e-02, -8.954e-03, -1.633e-01, 3.407e-02, -8.177e-03, -5.574e-02)); + r += mul(s1_0, M4(-1.150e-01, 2.966e-02, 4.007e-02, -7.870e-03, -1.109e-01, -1.495e-02, -2.973e-03, 4.372e-02, 6.264e-02, -1.931e-02, 5.305e-02, 3.212e-02, -1.566e-01, -8.504e-03, -3.634e-02, -9.955e-03)); + r += mul(s1_1, M4(-1.646e-01, -2.965e-02, 1.954e-02, -3.065e-03, 2.236e-02, 2.265e-02, 5.543e-02, 1.480e-01, 2.759e-02, -6.297e-02, 1.368e-02, -5.885e-03, 1.066e-02, -3.342e-02, -1.769e-01, 5.950e-02)); + r += mul(s1_2, M4(4.900e-03, -3.373e-02, -6.463e-02, 1.797e-02, 9.084e-02, -2.802e-02, 7.399e-03, -1.786e-02, -4.209e-02, 2.269e-02, 8.133e-02, 6.693e-02, -1.182e-01, -2.003e-02, -5.869e-02, 5.395e-03)); + r += mul(s1_3, M4(2.159e-01, 1.300e-01, -1.812e-02, 1.410e-02, -4.700e-02, 1.181e-01, 5.018e-02, -4.040e-02, -1.996e-02, 1.707e-02, 3.876e-02, -3.725e-02, 2.240e-01, -5.119e-02, 3.179e-02, -2.737e-02)); + r += mul(s1_4, M4(5.540e-02, -4.359e-02, -3.364e-01, 2.532e-01, 1.279e-01, 4.092e-01, 4.303e-02, -2.805e-02, -3.769e-02, -5.191e-02, -1.827e-01, -4.137e-03, -7.552e-02, 5.388e-02, 3.330e-01, -2.527e-01)); + r += mul(s1_5, M4(3.848e-02, -2.629e-02, -2.358e-01, 1.037e-01, 1.382e-01, 3.943e-02, 9.022e-02, -9.599e-02, -1.610e-01, -4.049e-02, 1.917e-02, 4.318e-02, -2.772e-02, -1.126e-01, 1.029e-01, -8.321e-02)); + r += mul(s1_6, M4(1.567e-02, 2.873e-02, -2.628e-02, -6.763e-02, 1.361e-01, -4.261e-02, -9.321e-03, 3.030e-02, -1.324e-01, -1.161e-02, 1.182e-02, -8.724e-02, -1.285e-01, 1.195e-01, 5.453e-03, -1.037e-01)); + r += mul(s1_7, M4(1.467e-01, -3.218e-02, 1.390e-02, 9.988e-02, -5.050e-02, 3.019e-01, -1.035e-02, 1.410e-01, -9.858e-02, -8.520e-02, 1.264e-01, -1.111e-01, 6.724e-02, -1.416e-02, -1.929e-01, 1.696e-01)); + r += mul(s1_8, M4(-4.329e-02, -1.751e-02, -2.354e-02, -5.241e-02, -8.490e-03, -6.171e-02, -3.452e-02, 2.030e-02, -8.949e-02, 6.865e-03, 1.079e-01, 1.670e-02, -1.447e-01, 7.597e-03, -3.787e-02, -1.937e-02)); + r += mul(s2_0, M4(-1.686e-01, -2.961e-02, -8.625e-02, 2.167e-02, -1.101e-01, -2.648e-02, 4.794e-03, -3.042e-02, -3.449e-02, 2.160e-02, -6.026e-03, 2.033e-02, 1.714e-01, 3.822e-02, -3.474e-02, 7.751e-02)); + r += mul(s2_1, M4(6.028e-03, -4.305e-02, 2.270e-03, 3.999e-02, 2.744e-02, 1.034e-02, 1.535e-02, 1.389e-01, -2.836e-02, -1.379e-02, 4.342e-02, -2.250e-02, 7.039e-02, 9.988e-02, 1.646e-01, -3.523e-02)); + r += mul(s2_2, M4(-9.132e-02, -1.591e-02, 4.453e-02, -3.060e-02, -9.097e-02, -4.457e-02, 2.598e-02, -3.109e-02, 6.325e-02, -2.730e-02, -1.516e-02, 4.366e-03, 1.352e-01, 8.085e-03, -3.540e-02, -2.945e-02)); + r += mul(s2_3, M4(1.619e-01, 4.548e-02, -1.683e-02, -1.876e-01, 1.306e-01, 1.099e-01, 1.160e-01, 3.330e-02, 2.466e-02, -2.324e-02, 3.287e-02, -2.988e-02, 2.974e-01, 6.584e-02, 8.339e-02, -3.905e-02)); + r += mul(s2_4, M4(1.930e-02, 6.519e-02, 6.865e-02, -7.566e-02, 6.272e-02, 2.777e-01, 2.815e-01, -1.267e-01, 2.525e-02, -1.705e-02, 9.735e-02, 1.840e-02, -6.507e-03, 2.442e-02, 1.873e-01, 4.203e-02)); + r += mul(s2_5, M4(-3.090e-02, 1.316e-02, 5.238e-02, -2.696e-02, -2.512e-01, 1.003e-02, -5.793e-02, 4.695e-02, -1.130e-02, -4.208e-03, -2.160e-02, -2.077e-02, -2.198e-01, 1.261e-01, 3.839e-02, 1.232e-01)); + r += mul(s2_6, M4(2.053e-03, -3.048e-02, -2.046e-02, -7.591e-03, 1.779e-01, 8.231e-04, 1.298e-02, 4.006e-02, -1.434e-02, 2.252e-02, -2.371e-03, -2.140e-02, -1.074e-01, 4.943e-02, 3.980e-02, 4.531e-02)); + r += mul(s2_7, M4(-1.625e-01, -5.116e-02, -1.598e-02, -7.413e-02, -5.324e-02, -1.362e-01, -1.930e-02, 3.562e-01, 5.185e-02, 2.173e-02, 1.555e-02, -1.232e-03, -4.592e-02, 1.353e-01, 6.000e-02, 1.579e-01)); + r += mul(s2_8, M4(5.657e-02, -4.500e-02, 7.839e-02, 2.178e-02, 7.723e-02, 1.228e-01, -1.940e-02, -1.431e-01, -1.076e-02, -3.345e-02, -3.213e-03, -3.475e-02, 4.939e-02, 1.538e-01, 3.908e-02, -1.416e-01)); + r += mul(s3_0, M4(-1.069e-01, -1.324e-01, 6.342e-02, 2.082e-04, 3.583e-02, -3.487e-02, 2.995e-02, -2.249e-02, -1.052e-01, -8.666e-02, -4.117e-01, 2.250e-01, -1.018e-02, 1.228e-02, 2.876e-02, -1.193e-02)); + r += mul(s3_1, M4(9.583e-03, -8.960e-02, -5.485e-02, 3.417e-02, -5.225e-02, 2.194e-02, 5.862e-02, 1.720e-02, -1.366e-01, -1.365e-01, -1.698e-01, -1.327e-01, 1.010e-01, -5.445e-03, -2.382e-02, 4.554e-03)); + r += mul(s3_2, M4(-1.024e-01, -6.450e-03, 4.188e-02, 2.180e-02, -1.434e-02, -2.351e-02, 1.839e-02, 1.035e-02, -7.543e-02, 4.912e-03, -2.013e-01, 1.131e-01, -3.632e-02, 3.108e-02, -2.070e-02, 1.023e-03)); + r += mul(s3_3, M4(-1.625e-01, -2.299e-01, -2.886e-01, 1.418e-01, -5.067e-02, 1.427e-02, -8.063e-02, -3.631e-02, 2.188e-04, 1.541e-01, -9.286e-02, -4.220e-02, 1.546e-01, -1.059e-01, 5.016e-02, 4.690e-02)); + r += mul(s3_4, M4(9.926e-02, -7.945e-02, 7.997e-02, -7.018e-02, -2.057e-02, 5.219e-02, -5.427e-02, 2.626e-03, 2.019e-01, -4.701e-02, 2.854e-01, -1.967e-01, -2.431e-01, 5.754e-02, 8.832e-02, -3.383e-02)); + r += mul(s3_5, M4(-7.371e-03, -3.424e-02, 1.090e-02, -6.494e-02, -5.179e-02, 1.272e-02, -5.241e-02, -2.506e-02, -3.050e-03, 5.860e-02, 4.395e-02, -1.695e-01, 4.741e-03, -6.107e-02, 1.171e-02, -5.669e-02)); + r += mul(s3_6, M4(2.198e-02, 7.570e-02, -3.538e-02, -1.042e-01, 8.800e-02, 4.772e-02, -1.502e-02, 2.290e-02, 2.151e-01, -1.611e-01, -1.154e-02, 8.626e-02, 7.106e-02, -1.341e-02, -1.330e-02, 5.211e-02)); + r += mul(s3_7, M4(-3.916e-02, -1.489e-01, 3.314e-02, -7.587e-03, -2.311e-02, -4.384e-02, -7.661e-03, 3.772e-02, 1.526e-01, -2.412e-01, 7.222e-02, 2.626e-01, -2.176e-02, 3.308e-02, -1.065e-01, 7.574e-02)); + r += mul(s3_8, M4(1.587e-01, 7.943e-03, 3.881e-02, -7.019e-02, -1.458e-02, -2.718e-03, 3.520e-02, -1.612e-02, 5.317e-02, 7.138e-03, -9.154e-03, -2.594e-02, 1.313e-02, -4.044e-02, 3.335e-02, 9.018e-02)); + r += mul(s4_0, M4(-9.390e-02, -2.957e-02, 3.991e-02, -1.721e-02, -4.073e-03, -5.301e-03, 3.996e-03, -2.481e-02, 3.193e-02, -2.475e-02, 4.997e-02, 1.875e-02, 1.144e-01, 2.920e-02, 2.555e-02, 1.171e-02)); + r += mul(s4_1, M4(-1.798e-01, -5.046e-02, -5.546e-02, 9.627e-02, 1.348e-01, -7.918e-03, 2.355e-02, 7.920e-03, -1.839e-02, -5.136e-03, 1.009e-01, -7.109e-02, 1.589e-02, 6.215e-02, 1.106e-01, -8.872e-03)); + r += mul(s4_2, M4(1.272e-01, -1.213e-01, -4.277e-03, 6.587e-02, -2.318e-02, -6.901e-03, 4.194e-02, -1.869e-02, -4.644e-02, -1.502e-02, -1.545e-02, 7.140e-03, -2.498e-02, 3.137e-02, 8.960e-02, -2.048e-02)); + r += mul(s4_3, M4(7.322e-02, -1.178e-01, -2.634e-02, 8.765e-02, -3.430e-01, -1.100e-03, -2.190e-03, 2.631e-02, 1.021e-01, -3.667e-02, 3.094e-03, 4.869e-03, 1.616e-01, -6.794e-02, -2.141e-02, 3.300e-02)); + r += mul(s4_4, M4(-7.055e-02, 1.225e-01, 1.412e-02, 1.065e-02, 2.057e-01, -8.213e-02, -4.030e-02, 9.385e-02, -1.705e-01, -2.334e-02, -8.216e-02, 7.482e-02, -1.352e-01, -2.363e-02, -6.218e-02, 2.513e-01)); + r += mul(s4_5, M4(1.639e-01, -1.603e-01, -3.466e-02, -8.816e-03, 4.738e-02, 2.710e-03, -1.057e-02, 1.490e-02, -2.733e-02, -6.313e-03, 4.574e-02, 2.334e-02, 1.030e-01, 1.220e-02, 1.563e-02, 1.519e-01)); + r += mul(s4_6, M4(-6.611e-02, 4.803e-02, -3.370e-02, -2.136e-02, 1.686e-01, 3.868e-02, -2.811e-02, 1.314e-01, -6.832e-02, -6.332e-02, 3.891e-03, 1.078e-03, 1.259e-02, 1.347e-02, -1.795e-02, -1.823e-02)); + r += mul(s4_7, M4(-4.383e-02, 3.919e-02, -1.702e-02, 6.133e-02, 2.650e-02, 1.528e-01, 1.077e-02, -1.136e-01, -3.415e-02, -4.393e-02, -1.535e-02, 5.751e-03, 7.887e-02, 8.357e-02, -1.169e-01, 9.759e-02)); + r += mul(s4_8, M4(5.286e-02, -1.319e-01, 2.557e-02, -6.891e-02, 8.569e-03, -1.494e-02, -8.862e-02, 6.980e-02, 2.563e-03, 3.462e-02, 9.554e-02, -3.793e-02, -5.212e-02, 1.955e-02, -3.071e-02, -3.454e-02)); + r += mul(s5_0, M4(-1.768e-01, 2.204e-02, 2.787e-02, -2.393e-02, -1.002e-02, 2.389e-02, 3.614e-02, -1.781e-02, -2.353e-02, -2.875e-02, 4.366e-03, 1.779e-03, 1.665e-01, -4.700e-02, -6.477e-02, 3.770e-02)); + r += mul(s5_1, M4(-2.880e-02, 1.783e-02, -4.043e-02, 6.825e-02, -1.876e-02, 5.088e-02, 1.608e-02, 1.315e-02, 3.018e-02, -1.662e-02, 4.605e-03, -5.020e-02, -1.377e-01, 5.353e-02, 8.571e-02, -1.120e-01)); + r += mul(s5_2, M4(4.789e-02, -5.843e-02, 3.200e-02, 4.193e-02, -6.985e-02, 1.890e-02, -4.449e-02, -1.330e-02, -2.758e-02, -2.345e-02, 3.961e-02, 6.886e-02, 5.490e-02, 4.636e-03, 4.952e-02, 1.662e-03)); + r += mul(s5_3, M4(2.187e-02, -7.181e-02, 7.356e-03, 3.356e-02, 4.571e-02, -4.116e-02, -4.134e-02, 3.689e-02, -5.024e-02, 2.879e-02, 4.638e-02, 1.015e-02, 2.537e-02, 4.648e-02, -8.458e-03, -4.430e-02)); + r += mul(s5_4, M4(-6.847e-02, 3.472e-01, 1.254e-01, 3.952e-02, 6.795e-02, 9.090e-02, -2.754e-02, -7.743e-02, -2.736e-02, 6.506e-02, 2.538e-02, 4.489e-02, -1.526e-02, 2.073e-02, -7.722e-02, 5.576e-02)); + r += mul(s5_5, M4(2.726e-01, -1.925e-01, 7.170e-02, -7.306e-02, -1.318e-01, 5.286e-02, -8.710e-03, -1.056e-01, 1.901e-01, -1.255e-01, 5.971e-02, -6.112e-02, 1.548e-01, -3.938e-02, 3.478e-02, 3.970e-02)); + r += mul(s5_6, M4(1.714e-01, 4.680e-03, 4.453e-02, 7.159e-02, 2.744e-02, 8.789e-02, -3.985e-03, -6.283e-02, -5.795e-02, -8.652e-02, 7.511e-03, 1.671e-02, 6.817e-02, 7.215e-02, 1.972e-02, -5.465e-03)); + r += mul(s5_7, M4(-7.153e-02, 3.829e-02, 6.513e-03, 5.031e-02, -4.022e-02, 1.541e-01, -1.352e-01, 8.070e-02, -8.733e-02, 3.275e-02, 1.219e-01, -9.393e-02, 6.974e-02, 1.334e-02, -5.241e-02, 9.083e-02)); + r += mul(s5_8, M4(7.991e-02, -1.213e-01, 2.898e-02, 2.543e-02, -1.676e-02, 8.872e-04, -1.376e-02, -1.789e-02, 5.687e-02, -1.101e-01, 1.071e-01, -1.883e-02, -4.311e-02, -2.708e-02, -3.975e-02, -3.926e-02)); + r += mul(s6_0, M4(-1.041e-01, -1.478e-02, 3.713e-02, -8.654e-03, -1.223e-02, -2.924e-02, -9.693e-03, 2.120e-02, 4.728e-02, -2.734e-02, 7.005e-02, 6.289e-03, -1.467e-01, -3.108e-02, 2.606e-03, 1.035e-02)); + r += mul(s6_1, M4(-8.825e-02, 6.671e-04, -2.186e-01, 4.616e-02, 7.066e-02, 2.789e-02, 7.979e-02, 5.889e-03, 1.313e-01, 7.202e-02, 1.629e-01, -1.344e-01, -8.374e-03, -5.702e-02, -6.884e-02, 9.055e-02)); + r += mul(s6_2, M4(2.092e-01, -4.789e-02, -2.173e-02, -4.804e-02, -1.003e-01, 3.567e-02, 1.570e-02, 1.596e-02, -4.941e-02, -2.362e-02, 1.388e-01, 4.125e-02, 2.788e-02, 3.079e-02, 1.342e-02, -2.604e-03)); + r += mul(s6_3, M4(-2.535e-02, 4.078e-03, 8.193e-02, 3.778e-03, 3.107e-01, 5.877e-02, -8.162e-02, -1.081e-02, -4.131e-02, -2.058e-02, -6.159e-02, -4.414e-02, 1.622e-01, -1.059e-01, 4.606e-02, -5.873e-02)); + r += mul(s6_4, M4(4.429e-02, 4.277e-02, 3.192e-02, -8.978e-02, -1.494e-01, 7.105e-02, 2.000e-02, 7.918e-02, -1.043e-01, -3.371e-01, -2.302e-01, 2.748e-01, -7.409e-02, 1.661e-02, 3.416e-02, -6.733e-02)); + r += mul(s6_5, M4(5.293e-03, -2.558e-02, 2.730e-02, -1.202e-02, 1.339e-02, 6.815e-02, 5.157e-02, 9.234e-03, 3.241e-02, -1.237e-02, -2.822e-01, 3.455e-02, 1.194e-01, 2.901e-02, 6.449e-02, -4.839e-02)); + r += mul(s6_6, M4(6.621e-02, -9.204e-02, 4.267e-02, -2.218e-02, -5.260e-02, 6.895e-02, 3.258e-02, -5.115e-02, 4.922e-02, -5.462e-03, -1.338e-02, -5.627e-02, 1.055e-02, -6.708e-02, 9.224e-03, 2.283e-02)); + r += mul(s6_7, M4(-6.565e-02, -3.862e-02, 1.656e-02, -1.221e-02, 1.148e-01, 3.873e-02, -4.293e-02, -7.591e-02, -5.572e-02, 2.340e-03, -1.108e-02, 1.175e-01, 1.409e-02, -7.847e-02, 1.175e-02, -4.514e-02)); + r += mul(s6_8, M4(-2.909e-03, 5.116e-02, 7.008e-02, -1.192e-01, -5.601e-02, 5.023e-02, -6.509e-02, 1.439e-01, 6.913e-03, -3.456e-03, 1.591e-02, -1.092e-01, 2.563e-03, -2.662e-03, -2.068e-02, -3.506e-02)); + r += mul(s7_0, M4(1.868e-01, -1.714e-02, -3.043e-02, 3.179e-02, 8.869e-02, -3.393e-02, -4.821e-02, -3.797e-02, -2.281e-01, 1.769e-02, -4.862e-02, 3.733e-03, -1.018e-01, -7.720e-02, 8.861e-03, 6.126e-02)); + r += mul(s7_1, M4(3.822e-02, 3.344e-02, -4.502e-02, 2.524e-02, 1.802e-01, 3.652e-02, -2.133e-02, 2.703e-02, 1.495e-01, -4.717e-02, -5.456e-02, 8.936e-02, -5.424e-02, -5.804e-02, -7.314e-02, 3.841e-02)); + r += mul(s7_2, M4(1.188e-01, -9.731e-03, -3.472e-03, -1.714e-02, 5.064e-02, 3.891e-02, -1.604e-02, -5.796e-02, 1.003e-01, -8.472e-02, -3.450e-02, 1.285e-02, -3.742e-02, 1.612e-03, -3.046e-02, -1.759e-02)); + r += mul(s7_3, M4(1.873e-01, 1.052e-01, 2.580e-02, -1.778e-02, -7.024e-02, 1.224e-01, 1.048e-01, -1.203e-01, 2.369e-01, 1.081e-01, 3.423e-02, 3.875e-02, 2.634e-02, -1.966e-01, -6.384e-02, -3.763e-02)); + r += mul(s7_4, M4(4.489e-02, -9.274e-02, 1.093e-01, 1.736e-02, -9.947e-02, 2.066e-02, -1.510e-02, 2.141e-02, -2.248e-01, -8.135e-02, 1.751e-01, 7.487e-02, -5.824e-02, -3.883e-02, -5.598e-02, -2.840e-03)); + r += mul(s7_5, M4(1.493e-01, 3.032e-02, 3.953e-02, -3.044e-02, 1.585e-01, 1.179e-03, -6.519e-02, -6.421e-02, 5.900e-02, -2.839e-02, -3.216e-02, -3.307e-02, -6.550e-02, 1.080e-01, 4.691e-02, 2.331e-02)); + r += mul(s7_6, M4(-6.866e-03, -3.810e-03, 3.708e-02, -2.128e-02, 1.840e-01, 3.419e-02, 7.918e-02, 3.062e-01, -8.909e-02, -7.120e-03, 4.227e-03, -1.091e-01, -3.904e-02, -3.161e-02, 7.149e-02, 2.966e-02)); + r += mul(s7_7, M4(4.679e-02, 2.399e-02, 7.689e-02, -9.124e-02, 2.010e-01, -1.263e-01, 1.106e-01, -4.843e-02, -3.990e-02, -4.025e-02, 7.679e-02, 4.350e-02, 8.031e-02, -1.385e-01, -2.760e-02, 1.475e-01)); + r += mul(s7_8, M4(1.304e-01, 1.269e-02, 5.142e-02, -1.304e-01, 1.175e-02, -7.449e-03, -9.245e-02, 1.372e-01, 3.384e-02, 7.022e-02, 8.292e-02, -4.169e-02, -9.016e-02, 1.304e-01, -1.643e-01, -2.188e-02)); + r += V4(-3.684e-03, 7.998e-03, -4.249e-03, -9.809e-03); + return r; +} + +void Pass7(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 8 +//!DESC conv7 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1, t2, t3 +//!OUT t4, t5, t6, t7 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(7.928e-02, 4.822e-02, 5.804e-04, -4.495e-02, -3.156e-02, -4.858e-02, 1.014e-01, 2.752e-02, 9.953e-02, 4.994e-02, -5.578e-02, -2.008e-02, 3.357e-02, 3.335e-02, 4.004e-03, 3.210e-02)); + r += mul(s0_1, M4(-5.454e-02, 1.172e-01, 5.500e-02, 3.605e-02, -4.624e-02, -1.258e-01, -5.372e-02, 5.674e-02, -1.013e-01, -3.818e-02, 3.052e-02, 5.311e-02, -1.847e-01, 1.806e-01, 4.878e-02, 2.028e-02)); + r += mul(s0_2, M4(1.427e-01, 3.897e-02, -3.087e-02, -2.563e-02, -5.991e-02, -5.467e-02, 2.661e-02, 5.379e-02, 9.090e-02, -7.985e-02, -4.141e-02, -1.746e-02, -1.910e-02, -1.430e-02, 3.502e-02, 2.449e-02)); + r += mul(s0_3, M4(9.980e-02, 2.403e-02, 1.376e-01, -3.144e-02, 5.065e-02, 2.243e-02, -6.782e-02, -1.758e-02, 6.117e-02, -6.080e-02, -2.983e-02, -5.149e-02, 4.535e-04, -3.357e-02, -9.829e-03, 4.028e-02)); + r += mul(s0_4, M4(7.933e-02, 1.366e-01, -1.287e-01, -1.279e-01, -1.537e-01, -3.683e-01, 3.433e-01, 2.802e-01, -3.495e-02, -4.771e-02, -1.293e-01, -1.279e-01, -3.095e-01, -6.632e-02, -9.007e-02, 5.759e-02)); + r += mul(s0_5, M4(7.745e-02, 1.275e-01, 8.537e-03, -1.231e-01, -2.843e-02, -6.560e-02, 1.503e-02, 4.679e-02, 7.496e-02, -1.226e-03, 6.703e-02, -3.926e-02, 7.192e-02, 4.207e-02, -4.325e-02, 2.096e-02)); + r += mul(s0_6, M4(-3.992e-02, 1.466e-02, -1.168e-02, 9.433e-03, -4.607e-03, -3.028e-02, 8.283e-02, 7.753e-03, -3.722e-02, -4.383e-02, -7.033e-03, 3.636e-02, -2.195e-02, -4.262e-02, 3.918e-02, 3.968e-02)); + r += mul(s0_7, M4(9.177e-02, 3.323e-02, 7.248e-02, 2.168e-02, -7.738e-02, -1.661e-01, -1.904e-01, 2.629e-03, -6.617e-02, -4.853e-02, -6.812e-02, 5.823e-02, -1.688e-01, 1.137e-01, -3.627e-02, -7.605e-02)); + r += mul(s0_8, M4(-2.016e-02, 4.008e-02, -4.592e-02, -3.231e-02, 5.997e-02, -3.119e-02, 7.739e-02, 8.729e-02, -1.062e-02, -1.966e-02, -3.977e-02, -9.933e-03, 3.076e-02, 3.503e-02, -5.243e-02, -5.442e-02)); + r += mul(s1_0, M4(6.632e-03, -2.218e-02, -8.032e-02, -2.854e-02, -8.480e-02, -5.885e-02, 2.241e-02, 7.530e-03, 1.836e-01, -4.341e-02, 6.516e-02, 4.234e-02, -4.015e-02, -1.274e-02, 2.232e-03, -4.298e-03)); + r += mul(s1_1, M4(-1.793e-01, 9.310e-02, 1.594e-02, 7.977e-04, -6.755e-02, -7.043e-02, -6.245e-02, -4.357e-02, -4.572e-02, -8.671e-02, -1.630e-01, -1.412e-01, 7.826e-02, -6.310e-02, -9.400e-02, -6.938e-02)); + r += mul(s1_2, M4(6.265e-02, -3.672e-02, 5.506e-03, 4.493e-03, -1.238e-01, -2.876e-02, 1.421e-02, 1.911e-02, -2.735e-02, 1.067e-01, -2.680e-02, -8.603e-02, 3.457e-02, 4.358e-02, -1.305e-02, 6.127e-03)); + r += mul(s1_3, M4(-5.895e-02, -3.985e-02, 8.656e-02, -3.214e-02, 1.907e-03, 5.553e-02, -1.673e-01, -4.178e-02, 2.555e-02, 5.746e-03, 9.090e-02, 2.383e-02, 3.308e-02, 2.390e-02, 2.692e-02, 1.934e-02)); + r += mul(s1_4, M4(1.077e-01, 8.866e-02, -1.509e-01, -7.764e-02, -3.423e-01, -1.899e-01, 2.406e-01, 9.025e-02, 1.675e-01, -8.327e-02, 9.804e-02, -4.304e-01, -1.195e-01, 6.765e-02, 1.466e-01, 2.829e-02)); + r += mul(s1_5, M4(2.812e-03, -4.669e-03, -6.641e-02, -2.064e-02, 1.053e-01, -1.186e-01, -2.315e-02, 5.758e-02, 5.627e-02, -8.880e-02, -1.077e-01, -6.856e-02, -7.529e-03, 3.375e-02, -1.046e-03, -5.482e-02)); + r += mul(s1_6, M4(-1.219e-01, 3.633e-03, -7.520e-02, 2.118e-02, -8.152e-02, -2.245e-02, -2.174e-02, -2.503e-02, -5.606e-02, 6.988e-03, -4.448e-02, 5.467e-02, 2.982e-02, -4.799e-02, -1.716e-02, 2.838e-02)); + r += mul(s1_7, M4(7.319e-02, 6.005e-03, 1.300e-02, 4.553e-02, -1.912e-01, -8.610e-02, -1.208e-01, 2.906e-02, -7.969e-02, -3.817e-02, 1.276e-01, 8.493e-02, -1.141e-01, 9.560e-03, -9.636e-02, 2.154e-02)); + r += mul(s1_8, M4(-3.762e-02, -5.653e-04, -5.481e-02, -1.965e-02, 2.146e-02, -5.393e-03, 3.041e-02, 6.961e-02, 1.217e-02, 5.822e-03, -2.191e-02, -4.545e-02, 3.931e-02, -1.587e-03, -1.719e-02, 1.047e-02)); + r += mul(s2_0, M4(2.373e-02, 2.247e-02, -6.849e-04, 1.556e-02, -7.684e-02, 2.018e-02, -9.347e-02, -4.673e-02, -3.720e-03, -9.546e-02, 5.003e-02, 8.953e-04, -3.589e-02, -2.696e-02, 3.924e-03, 2.185e-02)); + r += mul(s2_1, M4(1.047e-01, 7.446e-03, 2.984e-02, -2.656e-02, 1.127e-01, -1.081e-01, -9.928e-02, -5.862e-02, 1.805e-03, -3.810e-02, -4.034e-04, 4.211e-02, 4.344e-02, -1.064e-01, 4.765e-02, -9.679e-02)); + r += mul(s2_2, M4(6.379e-02, 5.960e-02, 3.886e-02, -2.070e-03, 2.553e-02, 2.428e-02, -5.641e-02, -1.192e-02, -3.963e-02, -4.780e-03, -2.298e-02, -2.753e-02, 1.223e-02, -2.535e-03, 4.660e-03, -6.159e-02)); + r += mul(s2_3, M4(1.149e-01, 2.259e-02, 2.713e-03, 3.219e-03, 2.691e-03, -5.508e-02, -1.100e-02, 3.262e-02, -1.204e-01, -2.375e-02, -2.582e-02, 2.569e-02, 1.195e-01, 4.853e-02, -2.514e-01, 6.026e-02)); + r += mul(s2_4, M4(-8.884e-02, -4.142e-02, 1.428e-01, 2.254e-01, 4.768e-02, -1.388e-01, 2.105e-01, 1.763e-01, 3.213e-02, 5.192e-02, -3.433e-02, -5.249e-03, -3.116e-02, 1.466e-01, -1.701e-03, 3.028e-01)); + r += mul(s2_5, M4(3.556e-02, 2.233e-02, 8.829e-03, -9.730e-03, 6.758e-03, 3.418e-02, -7.578e-03, 7.302e-04, -3.096e-02, -2.904e-02, -4.008e-02, -4.763e-02, -8.620e-02, -1.078e-01, 7.608e-02, 9.286e-02)); + r += mul(s2_6, M4(7.097e-03, -3.942e-03, 8.524e-03, -1.820e-02, 1.128e-02, -6.837e-04, -1.842e-02, -3.144e-02, -7.628e-02, -2.179e-02, 5.451e-03, 3.382e-02, 3.599e-01, -1.985e-02, 3.210e-01, -1.446e-01)); + r += mul(s2_7, M4(2.867e-02, 3.294e-02, -4.666e-03, 3.856e-02, 1.143e-01, 4.184e-02, 5.582e-02, 8.814e-03, -7.175e-02, 2.063e-02, 2.047e-02, 2.778e-02, 3.098e-01, 4.170e-02, 2.116e-01, -9.749e-02)); + r += mul(s2_8, M4(-2.635e-02, 4.096e-02, 3.253e-03, -3.084e-03, 3.638e-02, 8.699e-03, 1.527e-02, -1.289e-03, 6.092e-02, 9.191e-03, -3.761e-03, -2.226e-02, 4.413e-02, 4.219e-02, 3.322e-02, -1.974e-02)); + r += mul(s3_0, M4(-3.994e-02, 8.130e-02, -3.198e-02, -1.669e-02, -8.029e-03, -6.796e-02, -6.583e-02, -3.012e-02, 9.142e-03, -1.086e-01, 5.108e-02, 4.155e-02, 5.000e-02, -9.506e-03, 8.482e-03, 1.235e-02)); + r += mul(s3_1, M4(-1.659e-01, 2.043e-02, 4.550e-02, 5.520e-02, 1.052e-01, -5.562e-02, -7.151e-02, -9.228e-02, -1.309e-01, -1.847e-02, 4.913e-02, 4.541e-02, -8.032e-02, 3.619e-02, -2.746e-02, 1.785e-02)); + r += mul(s3_2, M4(7.671e-02, 2.484e-02, 6.241e-02, -1.613e-02, 6.753e-02, -1.390e-02, -6.519e-03, 3.430e-02, 5.440e-02, 2.179e-02, -5.459e-02, -3.557e-02, -5.773e-02, -1.613e-02, 1.931e-02, 1.466e-02)); + r += mul(s3_3, M4(6.151e-02, 8.306e-04, 8.367e-02, 2.388e-02, 9.813e-02, -5.005e-02, -6.053e-03, 8.916e-02, 5.964e-02, -1.682e-02, -2.338e-02, 1.274e-01, 7.861e-02, 3.454e-02, 1.468e-03, 5.767e-03)); + r += mul(s3_4, M4(9.943e-02, -9.606e-02, -6.648e-02, 1.613e-01, 1.685e-01, -2.279e-01, 1.675e-01, 2.100e-01, -3.538e-01, -2.136e-01, 1.959e-02, -2.794e-01, -1.435e-01, 1.942e-02, -3.614e-02, -7.109e-03)); + r += mul(s3_5, M4(-3.280e-02, 3.541e-02, -1.235e-02, -2.704e-02, -1.406e-01, 3.437e-02, 3.113e-02, 3.525e-02, 2.545e-01, -3.342e-02, -9.375e-02, -1.359e-01, 6.469e-03, -3.424e-02, 7.331e-03, 2.436e-02)); + r += mul(s3_6, M4(-4.128e-02, -9.223e-03, -3.515e-02, -3.574e-03, 8.755e-02, 1.804e-02, 5.316e-02, 4.868e-02, -2.996e-02, -6.481e-02, 1.053e-01, 5.608e-02, -5.896e-02, 8.228e-03, -9.137e-03, -1.011e-02)); + r += mul(s3_7, M4(3.142e-02, 3.920e-02, 5.861e-03, 7.950e-03, 1.942e-01, 1.906e-02, 2.305e-03, -1.533e-02, -5.578e-01, 4.189e-02, -2.218e-01, 1.115e-01, 5.651e-02, 2.233e-02, -1.435e-02, -2.548e-03)); + r += mul(s3_8, M4(-7.958e-02, 4.222e-02, -8.568e-03, -1.070e-02, -6.441e-03, 7.074e-03, 3.075e-03, 4.157e-02, 2.458e-01, 4.872e-02, 7.116e-02, -3.461e-02, 4.319e-02, -5.893e-03, 3.991e-02, 8.141e-03)); + r += mul(s4_0, M4(3.418e-02, -9.294e-02, -1.456e-01, -4.444e-02, 6.170e-02, 1.936e-02, 6.358e-02, 2.248e-02, 3.492e-02, -8.125e-02, 2.340e-02, 5.791e-02, 4.087e-02, 1.529e-02, -2.263e-02, 7.531e-03)); + r += mul(s4_1, M4(4.576e-02, -1.343e-01, -2.147e-01, -1.267e-01, -6.335e-02, 9.448e-02, 3.801e-02, 1.031e-02, 1.429e-02, -9.072e-04, 4.151e-03, -7.928e-03, -8.103e-02, 8.228e-02, 1.606e-02, -9.433e-03)); + r += mul(s4_2, M4(-9.938e-02, 2.235e-02, -2.479e-02, -5.497e-02, -5.530e-02, -5.077e-02, 9.537e-02, 7.495e-02, 3.749e-02, -6.759e-04, -7.712e-02, -3.873e-02, 5.519e-02, -1.195e-03, -4.125e-02, -9.629e-03)); + r += mul(s4_3, M4(-4.165e-02, 1.206e-02, -3.113e-01, 7.691e-02, 1.725e-01, 4.061e-02, 6.413e-02, -4.162e-02, 3.836e-02, -1.379e-02, 6.306e-02, 6.000e-02, 5.685e-02, 6.323e-02, 3.697e-02, -5.615e-02)); + r += mul(s4_4, M4(-3.059e-01, -1.780e-01, -2.509e-01, -6.061e-02, 1.590e-01, 8.690e-02, -8.801e-02, -1.086e-01, 2.244e-01, 1.915e-02, -1.168e-02, 3.451e-02, -1.122e-01, 8.629e-02, -2.466e-02, -1.351e-01)); + r += mul(s4_5, M4(8.602e-02, -9.071e-02, -8.880e-02, 1.006e-01, -1.470e-01, 2.896e-02, 8.073e-02, 5.723e-02, 5.191e-02, -3.617e-03, 4.879e-03, -3.711e-02, 9.028e-02, 3.305e-02, 1.294e-02, -3.785e-02)); + r += mul(s4_6, M4(6.079e-02, -2.926e-02, -3.995e-02, 4.857e-02, -1.966e-02, -8.598e-03, -4.296e-02, -2.273e-02, -1.563e-02, -3.625e-02, 2.455e-02, 5.626e-02, -5.617e-02, 1.546e-02, -5.664e-02, 7.118e-03)); + r += mul(s4_7, M4(6.910e-02, 5.679e-02, -1.621e-02, -1.419e-01, -4.303e-02, -3.255e-02, -3.813e-03, -1.900e-02, -1.566e-01, -1.576e-02, -3.888e-02, 1.545e-02, 1.173e-04, 1.922e-02, 1.461e-02, -3.308e-02)); + r += mul(s4_8, M4(3.567e-02, -3.772e-02, -8.326e-02, -3.235e-02, -5.440e-02, -5.952e-02, -1.039e-02, 2.460e-03, 2.784e-03, 3.484e-03, 3.995e-02, 4.645e-03, 5.301e-02, -4.191e-03, 8.651e-03, -4.718e-03)); + r += mul(s5_0, M4(7.415e-03, 7.788e-04, -5.342e-02, 1.154e-02, -9.967e-02, -9.571e-03, -2.785e-02, 1.109e-02, -1.993e-02, 5.363e-02, 3.610e-02, 4.299e-02, 4.654e-02, 5.421e-03, -5.148e-02, -1.616e-02)); + r += mul(s5_1, M4(-1.031e-01, 1.385e-02, 9.443e-02, -3.626e-02, 6.729e-02, -1.415e-02, 1.051e-01, -9.966e-03, 2.624e-01, -1.492e-02, -1.411e-01, -2.688e-02, -1.547e-03, 1.719e-02, -1.118e-01, -9.640e-02)); + r += mul(s5_2, M4(6.631e-02, -2.274e-02, -1.288e-02, 1.491e-03, 2.390e-02, -3.272e-02, -9.256e-03, -4.146e-02, -2.112e-01, 8.644e-02, 4.942e-02, -4.721e-03, -7.967e-02, -4.386e-04, -6.615e-02, -3.745e-02)); + r += mul(s5_3, M4(-3.943e-02, -2.252e-02, -8.333e-02, -1.378e-02, 8.017e-02, 2.037e-03, 8.727e-03, -2.196e-02, -1.023e-01, -3.603e-02, -2.292e-01, 1.195e-02, 7.777e-02, 8.270e-03, 1.038e-02, -4.713e-02)); + r += mul(s5_4, M4(-1.392e-01, -8.142e-03, -1.491e-01, -1.440e-01, 3.064e-01, -1.491e-01, -1.275e-01, -3.076e-01, 3.082e-01, 3.986e-01, 4.476e-02, 3.037e-01, -3.026e-01, 9.497e-02, 9.989e-02, -2.319e-01)); + r += mul(s5_5, M4(-2.376e-02, -3.715e-03, 6.960e-02, 2.098e-02, 1.130e-01, 3.826e-02, 3.829e-03, -2.412e-02, -2.564e-01, -4.456e-04, -5.662e-02, 4.287e-02, 4.126e-02, 3.119e-04, 3.600e-02, 6.596e-03)); + r += mul(s5_6, M4(1.803e-02, 1.501e-02, -4.666e-03, 9.316e-03, 2.560e-02, -2.137e-03, 2.721e-02, 2.083e-02, -9.668e-02, -1.302e-02, -2.248e-02, 1.082e-01, -1.994e-03, -2.949e-02, -7.086e-02, -4.480e-02)); + r += mul(s5_7, M4(-5.131e-03, -2.336e-02, -1.813e-02, -3.692e-02, 2.265e-02, -3.541e-02, 5.478e-02, -8.799e-02, -4.778e-03, 1.823e-02, 4.562e-02, 1.143e-01, -9.672e-02, -7.293e-03, -4.857e-02, -6.498e-02)); + r += mul(s5_8, M4(2.416e-02, -1.683e-02, -1.683e-02, -9.997e-03, -1.394e-02, -6.069e-02, -3.461e-02, -4.660e-02, -1.422e-01, 1.486e-02, -1.097e-02, -2.757e-03, -5.731e-04, -1.561e-02, -5.748e-02, -2.341e-02)); + r += mul(s6_0, M4(-1.249e-02, 3.845e-02, 5.718e-03, 1.434e-02, 6.750e-02, -5.721e-02, -8.473e-02, 5.111e-02, -5.758e-02, 1.557e-02, -3.896e-02, -1.285e-02, 1.352e-02, -5.283e-03, 2.060e-02, 4.702e-02)); + r += mul(s6_1, M4(2.872e-02, 7.152e-02, 1.209e-02, 3.583e-02, 7.593e-02, -7.288e-03, -1.075e-01, -9.605e-02, -1.324e-01, 4.844e-02, 2.521e-04, -4.092e-02, -8.412e-02, -3.797e-02, -1.491e-01, -1.859e-01)); + r += mul(s6_2, M4(-6.098e-03, -4.373e-02, -7.894e-03, 3.482e-02, -7.495e-02, -3.480e-02, 3.454e-02, 1.662e-02, 5.705e-02, -4.046e-02, 3.114e-02, 6.800e-02, 4.427e-02, 1.664e-02, -2.560e-02, 2.520e-02)); + r += mul(s6_3, M4(-7.703e-02, 1.120e-01, 1.198e-02, -1.646e-03, -1.605e-02, 1.075e-02, 5.630e-02, -7.171e-02, -5.739e-02, -1.108e-01, -3.192e-02, 3.843e-02, -2.070e-03, -1.209e-01, 7.992e-03, -6.664e-03)); + r += mul(s6_4, M4(-1.091e-01, 1.296e-02, -2.629e-02, -4.328e-02, 1.123e-01, 2.018e-02, 1.686e-01, -1.707e-01, 1.179e-01, 1.125e-01, -1.860e-01, -1.666e-01, 1.630e-01, 5.831e-03, -4.884e-02, 1.543e-02)); + r += mul(s6_5, M4(6.148e-02, -3.357e-02, 4.384e-02, 1.890e-02, -5.325e-02, -1.146e-02, 2.605e-02, 5.642e-02, 3.314e-02, -1.044e-03, 2.765e-02, 1.145e-01, -3.975e-02, -3.602e-02, 6.113e-02, 2.219e-02)); + r += mul(s6_6, M4(-4.629e-03, 6.955e-02, -3.905e-03, -2.236e-02, -2.049e-02, 4.068e-02, 1.674e-03, -1.239e-02, 7.260e-02, -3.747e-03, 1.437e-02, 3.210e-02, -2.625e-03, -5.696e-02, 1.303e-02, 4.426e-02)); + r += mul(s6_7, M4(4.351e-02, 3.086e-02, 2.064e-02, -2.015e-02, -5.198e-02, 4.068e-02, 5.444e-02, -1.383e-02, 2.095e-01, 3.082e-02, 9.118e-02, -5.258e-02, -1.734e-02, -2.578e-02, 3.836e-02, 5.330e-03)); + r += mul(s6_8, M4(-1.897e-02, 3.367e-03, -7.593e-02, 5.187e-02, -4.588e-02, 3.120e-04, -4.319e-03, -2.105e-02, -1.110e-01, 3.202e-02, -4.200e-03, -1.821e-02, -4.845e-02, 2.483e-02, 1.924e-02, -8.146e-03)); + r += mul(s7_0, M4(-5.626e-02, 6.364e-03, -1.765e-02, 1.029e-02, 1.255e-01, 7.597e-02, -4.705e-02, 2.156e-02, -1.286e-01, -8.622e-02, -5.481e-02, 6.097e-03, -5.013e-02, 2.833e-02, -9.929e-03, -1.467e-02)); + r += mul(s7_1, M4(5.623e-02, 2.189e-02, 2.068e-02, 2.171e-02, 1.321e-02, 2.415e-01, 2.451e-02, 1.200e-02, -2.508e-01, 5.658e-03, -5.360e-02, -2.568e-02, 1.899e-01, 4.632e-02, -2.182e-01, -1.004e-02)); + r += mul(s7_2, M4(-6.903e-02, 1.023e-02, -8.371e-03, 9.310e-03, -8.910e-02, -7.477e-02, -4.199e-03, -3.107e-02, 1.413e-01, -2.929e-02, -1.731e-02, 8.115e-02, -1.723e-01, 3.023e-02, 5.058e-02, 1.814e-02)); + r += mul(s7_3, M4(-9.143e-02, 3.089e-02, -3.683e-02, -3.701e-02, 5.979e-02, -5.476e-02, 7.731e-02, -3.719e-02, -8.051e-02, 4.463e-03, 1.348e-01, 6.575e-02, 4.512e-02, -1.238e-03, 1.535e-01, 1.107e-02)); + r += mul(s7_4, M4(2.022e-02, -5.338e-03, -3.378e-02, -5.679e-02, -6.939e-02, -2.564e-01, -2.188e-02, -1.236e-01, -3.304e-01, 2.622e-01, -3.185e-02, -4.603e-01, 3.060e-01, 2.514e-03, 7.321e-02, -4.819e-02)); + r += mul(s7_5, M4(4.261e-02, -3.187e-02, -5.567e-02, 9.319e-04, -5.670e-04, -2.831e-02, -9.990e-02, -1.360e-02, -1.421e-01, 7.289e-02, 1.848e-02, -1.327e-02, -1.597e-01, 2.097e-02, -1.113e-02, 3.342e-02)); + r += mul(s7_6, M4(-3.785e-02, 2.305e-02, -4.138e-02, -6.811e-03, 3.313e-03, 2.111e-02, -4.657e-02, 2.641e-02, 1.650e-02, 5.746e-03, -8.368e-03, 4.657e-03, 4.895e-02, -1.558e-02, 1.198e-02, 3.993e-02)); + r += mul(s7_7, M4(-3.501e-02, 4.877e-03, 5.460e-02, -6.116e-02, 1.592e-02, 1.768e-02, 5.892e-02, -1.304e-02, -1.658e-02, 4.944e-02, -3.337e-02, -3.778e-02, -4.636e-02, 2.160e-02, 6.470e-02, 1.165e-03)); + r += mul(s7_8, M4(1.307e-02, -2.179e-03, 3.846e-02, -1.762e-02, -5.784e-02, -3.612e-03, -4.913e-02, -1.284e-02, 1.709e-02, 4.493e-02, 3.720e-02, -4.552e-03, -8.815e-03, 4.349e-02, 3.794e-02, -3.816e-03)); + r += V4(-3.895e-03, -8.356e-03, 7.020e-03, 2.656e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.111e-01, -3.175e-04, 1.594e-02, -2.178e-02, 6.486e-02, 3.492e-02, -5.328e-02, -8.276e-02, 8.186e-03, -2.455e-02, 3.569e-02, -3.164e-03, -6.172e-02, -3.503e-02, -1.473e-02, -3.031e-02)); + r += mul(s0_1, M4(8.157e-02, -1.452e-02, -6.915e-02, 1.309e-02, -6.614e-03, -5.674e-02, -1.737e-02, 2.184e-02, 1.774e-01, 4.415e-02, -1.119e-01, -9.674e-02, -8.765e-02, 1.167e-01, 4.151e-04, -4.010e-02)); + r += mul(s0_2, M4(-1.075e-01, 1.050e-01, 9.690e-02, 4.260e-02, 2.686e-02, 4.955e-02, 3.839e-02, -1.783e-02, 4.947e-02, 1.239e-01, 4.785e-02, 4.874e-02, 5.631e-02, 5.338e-02, 1.938e-02, 1.017e-02)); + r += mul(s0_3, M4(2.327e-04, 4.618e-02, -8.965e-02, -2.445e-02, 1.397e-02, 1.034e-01, 3.686e-02, 4.144e-02, 7.913e-02, -1.383e-01, 2.294e-02, 7.601e-02, -1.200e-01, -1.116e-01, -8.621e-02, 8.035e-02)); + r += mul(s0_4, M4(-2.977e-02, -5.948e-02, 2.419e-02, 9.913e-02, 1.271e-01, -1.856e-02, 1.742e-01, -6.541e-03, 8.201e-02, -4.481e-02, -3.520e-03, 1.960e-01, -3.483e-01, -1.156e-01, 2.198e-01, -2.455e-02)); + r += mul(s0_5, M4(1.596e-02, -9.178e-03, -5.511e-02, -5.448e-02, 6.330e-02, -5.342e-02, -3.141e-02, 3.838e-02, -1.185e-01, 2.116e-01, 4.580e-02, -4.053e-02, -1.246e-01, -1.606e-01, -1.765e-02, 4.382e-03)); + r += mul(s0_6, M4(4.333e-02, 3.341e-02, 1.798e-02, -2.895e-02, -3.774e-02, -1.775e-02, -1.273e-02, -4.692e-02, 3.420e-02, -7.636e-04, 3.939e-02, 1.415e-02, 4.825e-02, -2.230e-03, -1.590e-02, 8.207e-02)); + r += mul(s0_7, M4(2.826e-02, -2.563e-03, -6.100e-02, -7.286e-02, -3.553e-02, -1.938e-01, -3.848e-02, -5.982e-02, -8.330e-03, -1.613e-01, 1.356e-02, 1.181e-01, -5.626e-02, 2.223e-02, -4.682e-02, 2.161e-01)); + r += mul(s0_8, M4(-1.068e-02, 2.957e-02, -7.559e-02, -2.765e-03, -5.642e-02, -6.326e-02, -9.012e-02, -1.650e-01, -7.645e-02, 2.591e-02, 3.277e-02, 7.611e-02, 4.265e-02, -4.556e-02, 2.665e-02, -4.761e-03)); + r += mul(s1_0, M4(-8.369e-03, -7.104e-02, -4.053e-03, 2.696e-02, 6.111e-02, -1.225e-02, 7.894e-03, -5.733e-02, 6.659e-02, -7.197e-02, 8.736e-02, 7.836e-02, -3.087e-02, 5.446e-03, -5.538e-03, 2.015e-02)); + r += mul(s1_1, M4(8.236e-03, 3.515e-02, -7.031e-02, -4.893e-04, 7.165e-02, 2.122e-02, -6.245e-02, 5.521e-02, -2.507e-01, -7.431e-03, -6.972e-02, 1.014e-01, -9.786e-02, 4.074e-02, -1.138e-03, 1.096e-01)); + r += mul(s1_2, M4(-8.829e-02, -3.148e-02, -2.142e-02, 5.981e-02, 5.066e-03, -6.110e-02, 3.339e-03, 7.664e-03, -2.020e-02, 5.512e-03, -7.908e-02, -2.970e-03, 9.111e-03, 1.377e-02, 1.319e-02, 8.516e-03)); + r += mul(s1_3, M4(-5.001e-02, 2.919e-03, -5.871e-02, 1.821e-02, -7.493e-02, -4.757e-02, 2.643e-02, 1.199e-02, -2.783e-01, -1.154e-01, 1.419e-01, -7.777e-02, 3.460e-02, -2.820e-02, -4.490e-02, -4.171e-03)); + r += mul(s1_4, M4(-7.329e-02, -7.155e-03, -4.647e-02, 1.127e-01, 8.173e-02, 1.380e-01, 1.007e-01, -1.666e-01, 3.941e-01, 1.394e-01, -6.078e-02, -1.604e-01, -2.940e-01, 1.331e-01, -3.606e-02, -1.772e-01)); + r += mul(s1_5, M4(2.868e-02, -2.188e-01, -6.634e-02, -3.221e-02, 4.437e-02, -1.630e-01, 4.866e-03, 4.620e-02, -1.544e-02, -1.962e-02, -1.514e-03, 3.143e-02, 1.624e-02, 4.628e-02, 7.661e-03, 3.106e-02)); + r += mul(s1_6, M4(1.388e-02, 8.381e-04, 1.228e-02, 5.737e-02, 3.986e-02, -3.514e-02, -7.946e-02, 5.119e-02, 4.793e-02, -1.235e-02, 5.782e-02, -1.727e-02, 9.271e-02, 4.089e-03, 1.185e-02, 6.081e-02)); + r += mul(s1_7, M4(-2.851e-02, -6.290e-02, -9.474e-03, -6.267e-02, -8.284e-02, -5.416e-02, -1.131e-02, 2.230e-01, 9.527e-03, -1.781e-01, 8.715e-02, -1.448e-01, 9.062e-02, 9.679e-03, 2.458e-02, 9.532e-02)); + r += mul(s1_8, M4(-4.635e-02, -4.072e-02, -7.488e-02, 6.482e-02, -1.024e-01, 2.769e-02, -5.381e-02, -7.026e-02, -1.038e-01, -9.665e-03, 5.855e-02, 1.004e-01, -3.043e-03, -3.584e-02, 5.021e-02, -2.025e-02)); + r += mul(s2_0, M4(-3.691e-02, 4.343e-03, 1.944e-03, 5.121e-03, -6.205e-02, 1.779e-02, 1.133e-02, 8.393e-02, 6.189e-02, -5.052e-02, -1.885e-04, -2.769e-02, 8.502e-02, 6.285e-02, -1.575e-01, -9.110e-03)); + r += mul(s2_1, M4(-2.548e-02, 3.900e-02, -7.437e-02, 1.593e-02, -6.999e-02, -1.089e-01, -8.605e-02, 1.203e-01, 1.808e-02, 2.306e-02, 4.526e-02, 1.072e-02, -1.069e-01, -1.257e-01, -1.824e-01, 1.765e-01)); + r += mul(s2_2, M4(2.279e-02, 5.037e-03, 8.069e-02, -1.478e-03, -3.047e-02, -8.335e-02, -2.637e-02, 2.844e-02, -2.300e-03, -1.131e-02, -3.253e-02, -2.081e-02, -1.460e-01, -2.562e-02, -2.515e-02, -1.627e-02)); + r += mul(s2_3, M4(8.925e-02, -8.621e-05, 3.551e-02, 5.852e-02, 7.491e-02, -1.479e-01, -8.129e-02, -1.654e-01, 2.907e-02, 4.001e-03, 1.898e-02, 1.946e-02, 1.645e-01, 9.341e-02, -1.773e-02, -2.151e-01)); + r += mul(s2_4, M4(-1.025e-01, -6.147e-02, 1.116e-01, -2.732e-01, 4.592e-02, 9.316e-02, 2.190e-01, -1.172e-01, 8.032e-03, -3.184e-02, -3.324e-02, 4.468e-02, 3.894e-01, -4.911e-02, -2.667e-01, -6.591e-01)); + r += mul(s2_5, M4(3.605e-02, -1.431e-02, 1.066e-01, -1.271e-03, -8.170e-02, -5.271e-02, -1.263e-01, 7.878e-03, -2.190e-02, 3.071e-02, -2.802e-02, 2.736e-02, -2.978e-02, -5.201e-02, -1.224e-01, -3.254e-01)); + r += mul(s2_6, M4(-7.998e-02, -8.551e-03, -9.969e-03, -6.664e-02, 3.013e-02, -1.551e-02, -5.777e-03, 3.318e-02, 2.715e-02, 1.761e-03, 1.030e-02, 1.606e-02, -7.406e-03, 7.278e-03, -4.858e-02, 5.677e-02)); + r += mul(s2_7, M4(-4.434e-02, 3.694e-02, 2.225e-02, 5.404e-02, -5.965e-02, 1.155e-01, -1.446e-02, -1.431e-01, -1.449e-02, 3.707e-02, -8.811e-03, 2.222e-02, 3.625e-02, 4.348e-02, 1.404e-01, 1.871e-02)); + r += mul(s2_8, M4(7.695e-02, 5.765e-02, 1.118e-01, -2.032e-02, 6.812e-02, -1.101e-01, 1.491e-02, -8.794e-02, -4.007e-03, 8.639e-04, 9.093e-03, 2.268e-02, -8.533e-03, -1.890e-02, -8.417e-03, -2.010e-02)); + r += mul(s3_0, M4(-5.556e-02, 6.920e-02, 1.927e-02, -2.532e-02, -6.915e-02, -2.470e-02, -6.204e-02, 3.620e-02, 6.127e-02, -5.440e-02, -3.001e-02, -4.251e-02, 4.457e-02, -9.204e-03, 3.700e-03, -3.529e-02)); + r += mul(s3_1, M4(1.667e-01, 1.160e-01, -8.801e-02, -1.770e-01, -1.248e-01, 1.147e-02, -1.574e-01, 8.512e-02, 8.809e-02, -2.999e-03, 5.942e-02, 1.731e-02, 3.972e-02, -4.257e-02, -1.670e-02, -2.578e-02)); + r += mul(s3_2, M4(3.448e-02, 2.192e-02, -1.129e-02, -4.515e-02, -4.126e-02, -5.978e-02, -3.035e-02, 4.129e-02, -1.721e-02, 3.501e-02, 1.505e-02, 2.744e-02, 4.188e-02, -2.930e-02, 2.939e-02, -3.203e-02)); + r += mul(s3_3, M4(-2.527e-02, -6.208e-02, 2.997e-02, 8.396e-02, -1.245e-03, -1.697e-02, -9.905e-02, -9.929e-02, 1.386e-01, 5.462e-02, 1.256e-01, 1.261e-01, 4.739e-02, 2.547e-02, 1.957e-02, 2.484e-03)); + r += mul(s3_4, M4(-1.878e-01, -1.528e-01, 3.545e-02, 1.051e-02, 1.128e-02, 2.776e-01, 3.176e-01, -1.291e-01, -2.102e-01, -2.655e-01, -4.457e-02, 3.313e-01, 3.943e-02, -8.620e-02, -3.682e-02, 8.068e-02)); + r += mul(s3_5, M4(2.688e-02, -4.420e-02, -7.753e-03, 1.846e-02, -3.534e-02, -1.075e-01, -1.408e-01, -6.160e-02, -6.013e-02, -3.317e-02, -4.455e-02, 1.931e-01, -2.555e-02, -2.839e-02, 1.649e-02, 1.812e-02)); + r += mul(s3_6, M4(-2.297e-02, -5.681e-02, -5.738e-02, -5.830e-02, -3.678e-02, 3.950e-02, 3.693e-02, -4.448e-02, -1.573e-03, 8.136e-02, 1.578e-02, -1.116e-01, 6.074e-02, 2.165e-02, 2.547e-02, -1.440e-02)); + r += mul(s3_7, M4(-8.937e-03, 2.686e-02, -1.064e-01, 4.029e-03, -3.302e-02, 7.123e-02, 2.526e-02, -1.513e-02, 9.030e-02, 9.048e-02, 4.383e-02, -7.035e-03, -3.051e-02, -6.291e-02, 1.686e-03, 4.893e-02)); + r += mul(s3_8, M4(6.268e-02, -9.602e-03, 1.679e-02, -3.474e-02, 6.772e-02, -4.073e-02, -5.241e-02, -3.012e-02, 4.198e-03, -1.029e-02, 4.477e-02, -1.357e-01, 1.368e-02, -8.683e-03, -1.462e-02, -3.328e-02)); + r += mul(s4_0, M4(-1.728e-04, -4.062e-02, -5.267e-03, 9.502e-02, -6.827e-02, 3.119e-02, -3.602e-02, -3.598e-02, 7.886e-02, -4.163e-02, -2.511e-02, -5.339e-03, 2.045e-03, -4.370e-02, -3.567e-02, -6.170e-02)); + r += mul(s4_1, M4(-1.357e-01, -5.105e-03, -1.439e-01, 1.387e-01, 3.401e-02, 7.299e-02, 1.007e-01, -8.077e-02, -2.987e-02, 5.897e-02, -5.838e-02, 1.707e-02, 4.441e-02, 7.889e-02, 3.613e-02, -4.555e-02)); + r += mul(s4_2, M4(-9.628e-02, 7.116e-03, -1.357e-01, -5.248e-02, 9.198e-02, 5.426e-02, 4.230e-02, -4.582e-02, -4.610e-02, -2.197e-02, -3.523e-02, 3.964e-02, -4.567e-02, 2.019e-02, 1.297e-02, 3.994e-02)); + r += mul(s4_3, M4(1.021e-01, 8.482e-02, -9.234e-03, 1.916e-01, -2.604e-02, 5.640e-02, -4.721e-02, 2.153e-02, -4.347e-02, 2.935e-02, 1.933e-02, 8.553e-02, -1.625e-02, 3.943e-02, 7.231e-02, -3.787e-02)); + r += mul(s4_4, M4(2.682e-01, 2.765e-01, -2.186e-01, -3.614e-01, 1.081e-02, 2.499e-02, -4.153e-02, -2.146e-02, 6.579e-02, -2.080e-02, -8.323e-03, 6.411e-02, -1.551e-01, 1.018e-01, -1.316e-01, 5.251e-02)); + r += mul(s4_5, M4(4.870e-02, 9.739e-02, -1.611e-01, 5.916e-02, -9.009e-02, 1.231e-02, 3.923e-02, -5.300e-02, -5.035e-02, 1.460e-02, -1.402e-02, -2.836e-02, 5.409e-02, 6.433e-02, -2.114e-03, -3.348e-02)); + r += mul(s4_6, M4(4.469e-02, -1.401e-03, -8.021e-03, 7.605e-02, 1.365e-02, -2.579e-02, -6.499e-02, 2.021e-02, -1.392e-02, -4.463e-02, 3.883e-02, -8.082e-02, 5.414e-02, 3.849e-02, -3.084e-02, 5.126e-02)); + r += mul(s4_7, M4(-6.951e-02, 7.847e-02, 1.060e-01, 3.783e-01, 1.528e-03, -1.161e-03, -3.397e-02, 2.894e-02, 2.840e-02, 2.163e-02, 5.997e-02, -2.849e-02, 1.324e-02, -1.374e-02, 3.235e-02, 7.879e-02)); + r += mul(s4_8, M4(-6.662e-02, -5.941e-02, 2.370e-02, 1.409e-01, -3.225e-02, 4.043e-02, 5.481e-03, 1.394e-02, -6.735e-03, -1.254e-02, -2.157e-02, -1.412e-02, -3.787e-02, 7.276e-02, 1.049e-02, 2.077e-02)); + r += mul(s5_0, M4(4.753e-02, -4.741e-02, 4.268e-02, -2.750e-02, -1.499e-02, 4.141e-02, 4.314e-02, -2.799e-02, -4.858e-02, 4.180e-02, -5.937e-02, 7.599e-03, -1.086e-01, -3.846e-02, -4.511e-01, 2.891e-02)); + r += mul(s5_1, M4(-3.799e-02, 1.486e-02, 1.909e-02, -5.205e-02, 8.668e-03, 9.116e-02, 9.902e-02, -6.908e-05, -1.486e-01, -4.575e-02, 9.426e-02, 7.493e-02, -7.610e-02, 2.594e-02, -4.155e-01, 1.043e-01)); + r += mul(s5_2, M4(-1.578e-02, -2.317e-02, -1.873e-02, 6.506e-03, 5.834e-03, 6.624e-02, 5.604e-02, -4.697e-03, 5.603e-02, 1.003e-01, 3.492e-02, -4.066e-02, -8.498e-02, -3.709e-02, -4.663e-01, 4.506e-02)); + r += mul(s5_3, M4(1.530e-01, -1.419e-02, 4.457e-02, 1.735e-02, -1.507e-02, 1.339e-01, 8.650e-02, 4.461e-02, 7.934e-02, -6.799e-02, -8.232e-02, -8.079e-03, 2.353e-02, -1.112e-01, -3.234e-01, -1.513e-01)); + r += mul(s5_4, M4(-9.873e-02, 1.266e-02, -4.563e-02, -6.144e-02, 8.620e-03, -1.178e-01, -5.440e-02, 1.080e-01, -2.497e-01, 6.533e-02, -3.113e-02, 1.041e-01, -1.834e-01, 1.515e-01, -3.158e-01, -5.617e-02)); + r += mul(s5_5, M4(-1.181e-02, 1.017e-01, -2.301e-02, 4.789e-03, 6.093e-02, -7.036e-02, -5.387e-02, -3.476e-02, -8.936e-02, 2.091e-01, 1.606e-01, 9.772e-02, 6.605e-02, -5.859e-02, -5.548e-01, -5.306e-02)); + r += mul(s5_6, M4(3.997e-04, 2.750e-02, 1.809e-02, 6.481e-02, -1.627e-02, 5.154e-03, -6.532e-02, -1.683e-02, -5.241e-02, -6.942e-03, 4.336e-02, 5.270e-02, 2.331e-02, 1.883e-02, -4.827e-01, 7.747e-02)); + r += mul(s5_7, M4(2.166e-02, 2.180e-02, 5.002e-02, 8.731e-02, -1.527e-02, 4.645e-03, -1.913e-02, 3.873e-02, -4.044e-03, 1.239e-01, 7.787e-02, -2.459e-02, 8.016e-03, -2.391e-02, -5.045e-01, 1.226e-01)); + r += mul(s5_8, M4(-2.626e-02, 7.217e-02, 8.186e-02, 5.629e-02, -2.961e-02, 7.589e-02, 1.711e-02, 2.898e-02, 9.636e-02, 4.845e-02, -1.364e-02, 1.790e-03, -1.044e-01, 1.256e-02, -4.494e-01, -1.234e-02)); + r += mul(s6_0, M4(1.140e-02, 9.363e-03, 4.300e-01, -1.318e-02, 2.510e-01, -7.819e-02, 8.848e-02, -9.342e-03, -1.298e-02, 7.493e-02, 5.181e-02, 2.558e-02, 1.605e-03, -5.114e-02, 2.143e-02, -4.397e-02)); + r += mul(s6_1, M4(-9.611e-04, 7.419e-02, 4.901e-01, -4.664e-02, -8.663e-03, -1.639e-01, -1.429e-01, 3.733e-02, -1.649e-02, 3.757e-02, -1.612e-03, 1.383e-02, -3.144e-01, 1.175e-01, -2.074e-01, 7.000e-02)); + r += mul(s6_2, M4(-1.122e-02, -2.168e-02, 4.276e-01, 1.530e-02, -6.548e-02, 8.041e-02, 9.651e-03, -2.591e-02, 4.500e-02, -1.279e-01, 8.215e-02, -2.642e-02, 1.282e-03, -1.513e-01, -2.194e-01, 5.308e-03)); + r += mul(s6_3, M4(-5.915e-02, 6.068e-02, 3.973e-01, 5.852e-02, -5.017e-02, -3.912e-02, 3.955e-02, -1.229e-01, 2.079e-01, 5.003e-02, -7.709e-02, 2.521e-02, 4.375e-02, -3.964e-02, -3.442e-02, -9.235e-02)); + r += mul(s6_4, M4(-1.313e-01, -2.077e-02, 1.088e-01, 1.850e-02, -1.380e-01, -9.510e-02, -4.841e-02, -3.782e-02, 4.680e-02, 9.948e-02, -2.507e-01, 1.493e-02, 5.328e-03, 4.027e-02, -8.760e-02, 4.633e-02)); + r += mul(s6_5, M4(5.972e-02, 1.194e-02, 3.225e-01, -9.068e-02, 5.225e-02, -2.503e-02, 4.713e-02, 2.473e-02, -4.251e-02, 2.470e-02, 5.847e-02, -1.685e-03, 1.264e-01, -2.465e-02, -1.440e-01, -7.791e-02)); + r += mul(s6_6, M4(-9.400e-03, 3.380e-02, 3.979e-01, -2.205e-02, 2.998e-02, 2.225e-02, 3.898e-02, -9.355e-02, -1.261e-03, -3.366e-02, 9.228e-02, -2.655e-03, -1.726e-02, -5.696e-02, 6.734e-03, -7.790e-02)); + r += mul(s6_7, M4(6.232e-02, -2.392e-02, 3.940e-01, 2.002e-02, 6.472e-02, -4.588e-02, -2.079e-02, -6.893e-02, 6.273e-02, 1.513e-01, 7.043e-02, -1.354e-01, -2.504e-02, 6.867e-02, -1.033e-02, -1.156e-01)); + r += mul(s6_8, M4(1.024e-01, 1.242e-03, 3.614e-01, -7.435e-02, 3.118e-03, -8.220e-03, 2.923e-02, -1.808e-03, -2.953e-03, 1.355e-02, 3.267e-02, 1.208e-02, 9.185e-03, 4.535e-02, -5.062e-02, 3.119e-02)); + r += mul(s7_0, M4(-1.910e-02, -1.887e-02, -4.592e-02, -2.185e-02, 8.543e-03, -3.987e-02, 3.666e-02, 3.995e-02, 6.334e-02, -5.211e-02, -5.736e-02, 4.134e-02, -1.546e-01, -1.693e-02, 7.901e-02, 4.490e-02)); + r += mul(s7_1, M4(2.399e-02, 3.919e-02, 2.938e-02, -3.529e-02, 1.825e-02, 4.760e-02, -9.350e-03, -6.631e-02, 4.184e-02, 2.180e-02, 3.567e-02, 1.530e-03, -8.034e-02, 3.142e-02, -2.671e-02, 4.697e-02)); + r += mul(s7_2, M4(1.911e-02, 7.166e-03, 4.074e-02, -1.249e-03, -1.535e-01, 1.070e-01, 5.826e-02, -2.056e-02, 7.336e-02, -1.698e-01, -2.646e-02, 6.150e-02, 8.283e-03, 8.127e-02, -3.357e-02, -9.405e-02)); + r += mul(s7_3, M4(-4.339e-02, -4.750e-03, -5.452e-02, 3.019e-02, -2.252e-02, -1.077e-01, 1.085e-02, -3.993e-02, 1.242e-02, 3.231e-02, -3.038e-02, -1.896e-01, -6.426e-02, -8.185e-03, 1.091e-01, -4.529e-02)); + r += mul(s7_4, M4(-1.119e-01, 4.192e-02, -1.368e-01, 1.173e-01, -1.318e-01, -3.726e-01, 6.285e-02, 1.275e-01, -2.175e-01, 9.336e-02, -1.567e-01, 8.716e-03, -1.037e-01, 3.719e-02, 6.634e-02, -4.542e-02)); + r += mul(s7_5, M4(5.178e-02, 8.103e-03, -6.096e-02, 5.751e-02, 6.189e-02, 1.582e-01, 4.693e-02, 8.167e-03, -4.985e-02, -8.742e-02, 5.395e-02, 1.497e-03, 8.699e-02, -1.446e-02, -3.192e-03, 8.255e-02)); + r += mul(s7_6, M4(-2.826e-02, 3.149e-03, -7.046e-02, 3.496e-03, -1.670e-02, -5.437e-03, 3.555e-03, -3.939e-02, -2.075e-02, 1.419e-04, 1.303e-02, 6.702e-02, -1.347e-02, -2.402e-02, 6.611e-02, -9.259e-02)); + r += mul(s7_7, M4(-3.679e-02, 6.286e-02, -1.727e-03, 4.870e-02, 5.135e-03, -3.917e-02, -4.287e-03, -1.575e-03, 4.891e-02, 1.372e-01, -6.934e-02, -1.228e-03, 1.604e-02, 3.688e-02, 8.385e-02, -6.384e-02)); + r += mul(s7_8, M4(-5.084e-02, 6.363e-02, 1.538e-02, -4.257e-02, -1.282e-02, 5.665e-02, 4.601e-02, 8.042e-02, 2.497e-02, 5.432e-02, 2.507e-02, -5.704e-02, 3.731e-02, 9.438e-03, 8.206e-02, 1.792e-02)); + r += V4(4.997e-03, 6.270e-03, 8.008e-03, -5.621e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-5.530e-02, -1.617e-02, 2.352e-02, -1.485e-02, 4.881e-02, -2.329e-02, -4.558e-03, 8.385e-03, -1.442e-01, -4.004e-02, 2.705e-02, -9.990e-03, -1.235e-01, -1.013e-01, -1.527e-02, 3.842e-02)); + r += mul(s0_1, M4(4.684e-03, 5.035e-04, -5.050e-02, -8.947e-03, 1.573e-02, -2.615e-02, 6.428e-02, -3.808e-03, -4.715e-02, -2.329e-02, 3.442e-02, 3.093e-02, -4.718e-02, 4.890e-02, -2.897e-02, 7.459e-02)); + r += mul(s0_2, M4(7.853e-02, -1.839e-03, -3.160e-02, -3.292e-02, -1.984e-02, 6.870e-02, -4.005e-02, 3.317e-02, -3.989e-03, 1.158e-01, -2.007e-02, 9.144e-03, -5.287e-02, -1.566e-03, -5.974e-02, 5.084e-03)); + r += mul(s0_3, M4(1.097e-01, 3.125e-02, -2.672e-02, 5.035e-02, -7.439e-02, -2.495e-02, -3.683e-02, -5.713e-03, 3.229e-02, 5.931e-02, -1.243e-02, 7.279e-03, 6.230e-02, -9.399e-02, 1.939e-02, -6.977e-02)); + r += mul(s0_4, M4(4.851e-02, -8.625e-02, -7.390e-03, -1.343e-01, -1.915e-01, 3.872e-02, -5.489e-02, -5.599e-02, -4.943e-02, -1.504e-01, 1.693e-01, 3.257e-03, 2.588e-01, 6.934e-02, -8.523e-02, 1.005e-01)); + r += mul(s0_5, M4(4.412e-02, -9.001e-02, 1.167e-01, -3.110e-02, -3.913e-02, -1.538e-01, -1.114e-01, 7.471e-03, -6.041e-02, 6.154e-02, -1.910e-01, -3.868e-02, 1.568e-02, -1.627e-01, 5.494e-02, -4.914e-02)); + r += mul(s0_6, M4(-1.899e-02, -5.654e-02, -3.378e-02, -1.689e-02, -3.330e-02, -6.734e-02, 5.689e-02, -2.113e-02, 9.719e-03, -2.733e-02, -4.758e-02, 2.752e-02, 1.083e-01, -3.866e-02, -2.688e-02, -1.729e-03)); + r += mul(s0_7, M4(-5.968e-02, -2.996e-02, 2.356e-02, -1.238e-02, 2.084e-01, 7.735e-03, -1.316e-01, -5.468e-03, 1.017e-01, 4.390e-02, 4.193e-02, -6.397e-02, 5.279e-02, -9.751e-02, -1.821e-02, -1.616e-01)); + r += mul(s0_8, M4(1.749e-02, 4.329e-03, -6.404e-02, -5.788e-02, 1.173e-02, -2.369e-02, 6.466e-03, 5.626e-03, 6.072e-02, -4.984e-02, 2.577e-03, 2.890e-03, 9.293e-02, -4.553e-02, -1.866e-02, -1.942e-02)); + r += mul(s1_0, M4(-5.736e-02, -4.241e-03, 4.091e-02, -9.481e-04, -3.571e-02, -2.591e-02, -3.441e-02, 1.974e-02, 2.702e-02, -6.178e-02, 3.701e-02, 4.750e-02, 3.939e-02, 2.841e-02, 1.887e-02, -1.122e-02)); + r += mul(s1_1, M4(5.769e-03, 7.284e-02, -9.693e-04, -1.688e-02, 5.025e-02, -3.396e-03, 3.851e-02, 3.025e-03, -1.075e-02, -1.265e-01, 1.086e-01, -1.664e-01, 6.975e-02, 2.271e-02, 2.500e-02, -4.588e-02)); + r += mul(s1_2, M4(9.697e-03, -2.097e-02, 5.419e-02, -3.256e-02, -4.287e-02, 3.847e-02, 2.259e-02, 1.297e-02, 9.146e-03, 1.455e-02, -1.044e-02, -2.461e-02, 2.637e-02, -1.331e-02, -1.739e-02, -1.716e-03)); + r += mul(s1_3, M4(7.006e-02, 9.059e-02, 1.133e-03, 1.094e-02, -7.047e-02, 5.636e-02, 4.308e-02, 3.544e-02, 1.890e-02, 1.031e-01, 1.460e-01, 9.134e-02, -1.043e-01, -6.854e-02, 2.331e-03, -5.222e-02)); + r += mul(s1_4, M4(7.126e-02, 4.843e-03, -2.516e-02, -5.130e-02, -1.754e-01, 7.127e-02, -1.121e-01, 7.451e-02, -3.895e-02, -3.175e-01, -3.025e-02, 2.512e-01, -1.846e-01, 1.422e-01, -8.243e-02, 7.740e-02)); + r += mul(s1_5, M4(1.101e-01, 5.575e-02, 1.701e-01, 1.255e-02, -3.568e-02, -2.743e-02, 8.043e-02, -1.039e-02, 9.320e-02, 1.439e-01, 1.610e-01, -1.980e-02, -4.803e-02, -4.626e-02, 3.953e-02, -2.514e-02)); + r += mul(s1_6, M4(-1.656e-02, -2.670e-03, 9.453e-03, 7.954e-03, 3.160e-03, -2.335e-02, -8.001e-02, -1.985e-02, -7.616e-03, -1.240e-01, -3.496e-03, -2.135e-02, 1.189e-01, 2.478e-02, -6.627e-02, 1.809e-02)); + r += mul(s1_7, M4(-3.664e-02, 6.085e-02, 6.035e-02, -2.396e-02, 1.023e-01, -3.293e-02, 8.034e-02, -1.698e-03, -1.236e-01, 5.066e-02, 6.293e-02, -3.673e-02, 1.275e-01, -1.213e-01, 1.118e-01, -7.451e-03)); + r += mul(s1_8, M4(-9.346e-03, 5.860e-02, 1.267e-02, -1.551e-02, 8.045e-03, 2.738e-02, -7.751e-02, 1.061e-02, -2.387e-02, -5.766e-02, 3.055e-03, -3.948e-02, 2.559e-02, 5.561e-02, -5.830e-02, -3.382e-02)); + r += mul(s2_0, M4(-6.069e-02, 2.108e-02, 9.496e-03, -1.872e-02, 1.233e-01, 5.139e-02, 9.632e-03, -2.231e-03, -1.803e-02, 1.978e-02, 5.600e-02, 3.702e-02, 2.384e-01, 3.441e-02, -8.479e-03, 1.096e-01)); + r += mul(s2_1, M4(3.898e-03, 2.343e-02, 7.591e-03, -3.634e-02, 9.175e-02, 1.232e-02, 9.339e-02, -1.427e-02, 1.635e-02, 3.624e-02, 5.380e-03, 3.340e-02, 5.886e-02, -6.395e-02, 3.795e-02, -1.333e-01)); + r += mul(s2_2, M4(-1.586e-02, 1.043e-02, -6.155e-03, -1.137e-02, 1.116e-02, -9.468e-03, 1.350e-02, -1.687e-02, 2.759e-02, 2.010e-02, 5.078e-02, 1.777e-02, -4.099e-02, -1.848e-02, -7.351e-03, -5.972e-02)); + r += mul(s2_3, M4(4.117e-02, -3.596e-02, -3.002e-02, 5.303e-02, -1.739e-01, -2.256e-01, 2.088e-02, -4.490e-02, 6.632e-02, -1.533e-02, -5.606e-03, -8.215e-03, -1.793e-01, -2.532e-01, -1.841e-01, 1.767e-01)); + r += mul(s2_4, M4(-9.474e-03, 1.164e-02, 1.225e-01, 8.000e-02, -2.243e-01, -1.921e-02, -1.794e-01, 1.990e-01, 2.374e-02, 2.428e-02, -6.996e-02, -3.162e-02, 3.179e-01, -3.472e-01, 1.488e-01, 1.717e-01)); + r += mul(s2_5, M4(2.778e-02, -1.627e-04, 3.268e-02, -3.187e-03, -2.008e-02, -8.358e-02, 9.114e-02, -5.115e-02, 2.025e-03, 1.126e-02, -1.056e-02, -3.162e-02, 5.090e-03, -1.149e-02, 2.072e-02, 2.220e-02)); + r += mul(s2_6, M4(-3.461e-02, 5.308e-02, 5.725e-02, -9.231e-03, -2.192e-02, 1.419e-01, -4.842e-02, 1.155e-01, -2.410e-02, -3.985e-02, 3.458e-02, -2.689e-02, 3.019e-02, 4.427e-02, 1.272e-01, 1.810e-01)); + r += mul(s2_7, M4(-8.477e-03, -6.933e-04, -1.920e-02, -2.858e-03, -2.798e-03, -1.307e-01, -2.007e-02, -5.314e-02, -1.314e-01, 7.247e-03, -1.472e-01, -5.190e-02, 1.460e-02, -1.797e-01, -3.916e-01, 1.581e-01)); + r += mul(s2_8, M4(-8.492e-03, -1.268e-02, -4.747e-02, -1.358e-02, -9.552e-03, 2.001e-02, 2.676e-02, -3.251e-03, -1.836e-02, 1.053e-02, -3.311e-02, -1.412e-02, -8.262e-04, -2.533e-01, 4.093e-02, 4.769e-02)); + r += mul(s3_0, M4(-1.965e-01, 1.753e-01, -2.753e-02, 5.399e-02, 1.293e-01, 4.383e-02, 9.391e-02, -4.703e-03, 1.665e-02, -1.957e-02, 3.671e-02, 5.154e-02, 1.635e-02, -5.094e-02, -6.182e-04, 1.959e-02)); + r += mul(s3_1, M4(-1.850e-03, 2.078e-01, 1.834e-02, 7.289e-02, 4.926e-02, 5.169e-03, 6.260e-03, -3.841e-02, -3.082e-02, 6.279e-02, -7.108e-02, 5.052e-02, -9.217e-03, 1.371e-02, 2.197e-03, 2.179e-02)); + r += mul(s3_2, M4(-5.101e-02, 4.943e-02, -7.672e-03, -5.337e-02, 1.374e-02, -4.443e-02, 6.857e-03, -1.739e-02, -1.378e-02, 1.933e-02, -6.228e-02, -6.243e-02, -6.422e-03, 5.847e-02, 1.756e-02, 4.110e-03)); + r += mul(s3_3, M4(8.184e-02, 2.246e-02, 3.377e-02, -3.316e-02, -9.441e-02, -1.048e-01, 6.579e-02, -2.111e-02, 3.701e-01, 6.567e-02, -2.069e-01, 1.831e-01, 7.971e-02, -6.782e-03, -1.376e-02, 5.703e-02)); + r += mul(s3_4, M4(4.998e-02, 2.069e-01, -4.661e-03, 1.201e-01, -1.860e-01, 4.666e-02, -2.778e-01, 5.755e-02, 6.265e-02, 1.730e-02, 1.673e-02, -1.287e-01, 5.509e-02, -5.944e-02, 6.826e-02, -7.650e-02)); + r += mul(s3_5, M4(3.328e-03, 4.624e-02, 7.487e-02, 1.406e-02, -3.831e-02, -8.897e-02, -6.237e-03, 1.329e-02, 7.446e-02, -6.956e-02, -4.222e-02, -1.347e-01, -2.057e-02, -6.899e-02, -4.685e-02, -5.832e-03)); + r += mul(s3_6, M4(-2.618e-02, 4.749e-02, 5.444e-02, -4.484e-04, -6.652e-02, -4.663e-02, 2.784e-02, 4.574e-03, -5.196e-02, -3.805e-02, -3.152e-02, -8.233e-02, 1.077e-02, -4.890e-02, -1.679e-02, -3.505e-02)); + r += mul(s3_7, M4(-4.617e-02, -7.254e-02, 3.598e-03, 2.800e-02, -1.499e-02, -9.795e-03, 2.029e-02, -7.912e-02, 2.532e-02, -1.790e-01, -3.266e-01, -4.014e-01, 8.254e-02, -1.687e-03, -4.668e-02, -1.094e-02)); + r += mul(s3_8, M4(1.320e-03, 4.325e-02, -5.827e-02, -4.300e-02, 4.556e-04, 2.221e-02, -2.681e-02, 7.054e-02, -2.948e-02, 2.460e-01, -2.830e-01, -1.085e-01, 4.318e-02, 3.407e-03, -3.434e-02, -1.976e-02)); + r += mul(s4_0, M4(1.460e-01, 4.754e-02, 1.399e-01, -1.620e-02, -3.993e-02, -3.515e-02, 2.831e-02, -1.404e-02, -3.676e-02, -2.850e-02, 1.493e-02, 2.536e-02, -5.705e-02, -7.211e-02, -2.649e-03, 5.163e-02)); + r += mul(s4_1, M4(6.518e-02, -6.099e-02, 4.237e-02, -8.836e-02, -7.260e-02, 2.330e-02, -1.394e-02, -1.646e-03, 3.951e-02, -1.861e-02, 6.994e-03, -2.942e-02, -5.052e-02, -3.522e-02, -8.298e-02, 3.180e-02)); + r += mul(s4_2, M4(2.823e-02, 3.593e-02, -3.280e-02, -5.109e-02, -2.827e-02, 1.401e-02, -6.206e-02, 4.539e-02, 1.991e-02, -1.716e-02, 2.243e-02, -2.297e-02, 2.695e-02, -1.056e-02, 3.035e-03, -2.991e-02)); + r += mul(s4_3, M4(2.547e-02, -5.687e-03, -2.042e-02, -9.337e-02, 3.524e-02, -1.397e-02, -3.742e-02, 1.454e-02, 7.672e-02, 3.066e-02, -3.199e-02, 2.233e-02, -5.170e-02, 5.818e-02, -2.007e-02, 4.057e-02)); + r += mul(s4_4, M4(-2.283e-01, 2.457e-01, -3.105e-01, -3.668e-02, 3.219e-02, -1.147e-01, -9.497e-02, -7.666e-02, 6.047e-02, -6.299e-03, 7.590e-02, -1.044e-01, -4.754e-04, -5.665e-02, -2.808e-02, -9.255e-02)); + r += mul(s4_5, M4(5.215e-02, 1.923e-01, 1.238e-01, -7.348e-02, -6.519e-02, -3.306e-02, -9.314e-02, 1.386e-01, 7.793e-03, 4.899e-02, 1.845e-02, -5.812e-02, 2.268e-02, -9.544e-03, -2.321e-02, -2.858e-02)); + r += mul(s4_6, M4(6.982e-03, -1.007e-01, 2.938e-02, -4.167e-02, 8.572e-02, 7.508e-03, -5.352e-02, -2.551e-02, 1.916e-02, -8.421e-02, -4.458e-03, -4.428e-02, 3.794e-02, -5.348e-02, -6.604e-02, -2.357e-02)); + r += mul(s4_7, M4(-3.960e-02, 1.965e-01, -1.140e-01, 2.182e-02, 1.082e-01, 6.102e-02, 6.258e-02, 5.545e-02, 1.849e-02, 9.835e-03, -1.410e-02, -2.211e-02, 3.209e-02, 2.958e-02, 6.653e-03, 3.695e-02)); + r += mul(s4_8, M4(-2.179e-02, -1.401e-03, 1.264e-02, 3.296e-02, 5.442e-03, -6.132e-02, 7.208e-02, 5.353e-02, -1.661e-02, 4.773e-02, -4.191e-02, 3.883e-03, -1.001e-02, -2.729e-02, -2.561e-02, -1.567e-03)); + r += mul(s5_0, M4(1.510e-02, -4.602e-02, 6.757e-02, 1.657e-02, -1.036e-01, 9.565e-02, -3.068e-03, 2.594e-02, 1.073e-02, 7.805e-03, -3.369e-02, -9.184e-03, 2.529e-02, -3.586e-02, 8.693e-02, 7.147e-04)); + r += mul(s5_1, M4(-6.905e-02, 2.315e-02, -3.566e-02, 1.506e-02, 2.833e-02, 6.089e-02, 1.761e-02, -1.660e-03, -8.523e-02, -5.831e-02, 3.955e-02, -1.125e-01, 3.922e-02, -1.963e-02, 3.167e-03, -9.310e-02)); + r += mul(s5_2, M4(1.307e-02, -4.174e-02, -2.938e-02, -2.647e-02, -3.047e-02, -7.772e-02, -2.001e-02, -3.471e-02, -6.911e-02, 7.078e-02, -1.451e-01, 2.392e-02, 6.754e-02, 1.151e-02, 1.840e-02, -2.239e-02)); + r += mul(s5_3, M4(-5.393e-03, -8.862e-02, 3.511e-02, -4.368e-02, 1.822e-01, -3.408e-02, -3.088e-02, 7.301e-02, 1.169e-02, -3.817e-02, 4.264e-03, -7.837e-02, -3.082e-02, 2.524e-02, 2.682e-02, -1.838e-02)); + r += mul(s5_4, M4(-2.514e-02, -2.609e-02, -7.041e-02, -9.363e-02, -4.125e-02, -8.346e-03, -2.117e-01, -7.318e-02, -1.934e-01, 2.920e-01, 3.854e-02, -2.982e-01, -1.804e-01, 2.558e-02, 1.561e-03, -4.694e-02)); + r += mul(s5_5, M4(-6.437e-02, 2.093e-01, -5.823e-02, 2.168e-02, 4.195e-02, 2.039e-01, 1.491e-01, -4.014e-02, 2.835e-03, -3.487e-02, -6.809e-02, -4.941e-02, -1.088e-02, -3.818e-02, 1.212e-03, 1.173e-02)); + r += mul(s5_6, M4(5.244e-02, 5.221e-02, -3.623e-02, 1.452e-02, -3.438e-02, 2.158e-02, 6.116e-02, 1.669e-02, -3.513e-03, -2.530e-02, -2.136e-02, -2.934e-02, 9.833e-02, 8.911e-02, -7.366e-02, 2.772e-02)); + r += mul(s5_7, M4(1.158e-02, -7.909e-02, 1.344e-02, 6.564e-03, 1.432e-01, 2.081e-01, -6.528e-02, -4.952e-03, -2.150e-02, 4.520e-02, -1.310e-01, -1.048e-01, 9.828e-02, -5.999e-03, 2.218e-02, 4.271e-02)); + r += mul(s5_8, M4(9.263e-03, -3.699e-02, -2.887e-02, 4.434e-02, 2.098e-02, -3.306e-02, 2.391e-02, 4.590e-04, -3.066e-02, 4.501e-02, 4.998e-02, -1.248e-02, 2.164e-02, 1.734e-02, -7.385e-02, -6.176e-02)); + r += mul(s6_0, M4(-1.200e-01, -6.541e-03, 1.621e-02, -2.429e-02, 1.346e-01, -4.731e-02, -6.673e-02, 8.000e-02, 3.600e-03, 2.747e-02, -4.569e-02, -1.495e-02, -3.311e-02, 2.682e-02, -1.057e-01, 1.090e-01)); + r += mul(s6_1, M4(-7.507e-02, 7.104e-02, 9.247e-03, -4.609e-02, -4.729e-03, -1.940e-01, 1.531e-01, -1.360e-01, 7.056e-02, 8.330e-02, -1.045e-02, -2.090e-03, -4.943e-02, -2.803e-02, 6.241e-02, -3.123e-02)); + r += mul(s6_2, M4(-4.705e-03, -4.333e-02, 6.813e-02, -3.971e-02, -3.500e-02, 5.215e-03, -1.209e-01, -4.852e-02, -2.663e-02, -5.876e-02, -3.687e-02, -2.737e-02, 9.777e-03, -6.370e-02, 7.566e-02, -9.757e-03)); + r += mul(s6_3, M4(-3.344e-02, 2.399e-02, -5.442e-02, -2.422e-03, -1.279e-01, 9.236e-02, 8.630e-02, -1.518e-02, 6.253e-02, -6.975e-02, -2.661e-04, -1.602e-02, 1.586e-02, 2.887e-02, 6.045e-02, 3.503e-02)); + r += mul(s6_4, M4(4.011e-02, 2.552e-02, -3.967e-02, -7.582e-02, -2.794e-01, 1.229e-02, -4.513e-01, 2.350e-02, 1.524e-02, -1.563e-01, 4.062e-02, -3.018e-01, 1.119e-01, 1.427e-01, 1.185e-01, 3.546e-01)); + r += mul(s6_5, M4(-4.893e-02, 2.692e-02, -3.284e-02, 1.005e-01, -4.720e-02, 3.716e-02, -6.935e-03, 1.084e-01, -7.848e-03, 5.418e-02, -3.424e-02, 2.442e-02, 1.865e-02, 1.802e-01, 2.675e-02, -8.322e-03)); + r += mul(s6_6, M4(-2.785e-02, -2.571e-02, -1.216e-02, -6.150e-03, -1.060e-01, 3.524e-02, -2.086e-02, 5.807e-02, -3.918e-02, 8.613e-03, 6.152e-04, -6.108e-02, -5.372e-02, 2.802e-02, 8.226e-02, 2.973e-02)); + r += mul(s6_7, M4(-4.775e-02, 5.714e-02, 3.044e-03, 3.925e-02, -1.426e-02, 4.745e-02, -3.258e-02, 7.909e-02, -9.992e-02, 3.526e-02, -1.658e-01, -9.226e-04, -1.165e-01, 5.208e-02, -6.919e-03, 5.177e-02)); + r += mul(s6_8, M4(-2.796e-02, -7.841e-03, -2.497e-02, -1.530e-02, 1.059e-02, -4.475e-02, -4.364e-02, -2.307e-02, -4.266e-03, -4.323e-02, -5.765e-02, 3.223e-02, -5.482e-02, 4.946e-02, 3.526e-02, 3.115e-03)); + r += mul(s7_0, M4(-2.466e-02, -1.719e-03, 3.801e-02, 1.752e-02, -3.110e-03, -4.578e-02, -4.427e-02, 1.854e-03, 1.912e-02, -5.210e-02, -8.798e-03, 1.338e-02, -5.674e-02, 4.983e-02, -1.342e-02, -2.986e-02)); + r += mul(s7_1, M4(-6.462e-02, -3.351e-02, -4.126e-02, -1.354e-02, 2.871e-02, -5.572e-02, 6.581e-02, -2.789e-02, -1.171e-02, 1.881e-01, -6.108e-02, 5.253e-02, 7.739e-02, 4.354e-04, 1.759e-02, 3.710e-02)); + r += mul(s7_2, M4(-2.664e-02, 9.902e-03, -1.423e-02, -2.955e-03, -2.286e-02, 7.444e-03, -4.907e-02, -1.944e-02, 1.294e-02, -6.049e-02, 4.284e-02, 3.838e-02, -2.826e-02, 2.839e-02, -1.008e-01, 1.964e-02)); + r += mul(s7_3, M4(-2.754e-02, 1.341e-02, -1.052e-02, 2.319e-03, -4.675e-02, 1.518e-02, 1.005e-01, -4.150e-02, 1.854e-02, -5.684e-02, -3.610e-02, -1.013e-02, -2.374e-02, 1.913e-02, 7.706e-02, 7.259e-04)); + r += mul(s7_4, M4(-1.687e-02, 2.132e-02, -9.203e-02, -1.918e-02, -1.380e-01, 1.196e-01, 8.180e-03, -4.208e-02, 2.657e-02, -2.285e-02, -8.182e-02, -2.380e-01, -2.184e-02, 7.319e-02, -1.213e-01, 6.137e-02)); + r += mul(s7_5, M4(4.043e-02, -1.381e-02, -6.284e-03, 3.607e-02, 5.081e-03, 2.861e-02, -6.638e-02, 5.004e-02, -3.729e-02, -6.326e-02, -2.281e-02, -1.070e-02, -1.072e-02, -2.321e-02, 1.644e-01, 7.287e-02)); + r += mul(s7_6, M4(3.375e-03, -2.589e-02, -2.995e-02, 1.989e-03, -5.787e-02, -1.684e-02, -4.837e-02, 1.978e-02, 1.767e-02, 4.527e-02, 1.354e-02, 7.828e-03, 5.698e-03, -2.555e-02, 1.943e-02, -8.800e-03)); + r += mul(s7_7, M4(-2.163e-02, -5.088e-05, -1.509e-03, 1.766e-02, -4.841e-02, 5.486e-02, -5.015e-02, 2.485e-02, -8.226e-02, -1.765e-01, 4.477e-04, -6.773e-02, -4.923e-02, 5.172e-02, 8.718e-03, 3.181e-02)); + r += mul(s7_8, M4(-6.163e-02, 3.111e-02, -1.483e-02, -6.322e-02, -3.006e-02, -6.286e-02, -1.226e-03, -1.081e-03, -4.795e-03, 1.847e-02, -5.359e-02, -3.331e-02, -1.364e-02, -1.835e-02, -2.644e-02, -3.180e-02)); + r += V4(-1.572e-03, -2.868e-03, -4.500e-03, -2.959e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(7.549e-03, 5.841e-03, 3.577e-02, -6.203e-02, 1.483e-02, -1.774e-02, -3.494e-04, 1.253e-01, 5.410e-02, 3.787e-02, -8.517e-03, -1.559e-02, -2.618e-02, 1.766e-02, 7.876e-02, -1.753e-01)); + r += mul(s0_1, M4(-3.639e-03, -6.624e-02, -1.868e-02, -7.252e-02, 4.951e-03, -2.230e-02, 5.442e-02, 4.589e-02, 2.376e-01, -2.639e-02, -3.136e-02, 1.902e-01, 9.939e-02, 7.555e-02, -3.844e-02, -1.404e-01)); + r += mul(s0_2, M4(1.508e-02, 3.310e-02, -2.113e-02, 3.406e-02, -2.882e-02, -5.043e-02, -2.153e-02, 2.604e-02, -2.178e-02, 8.144e-02, 1.316e-01, 1.140e-01, 3.774e-02, 2.014e-02, 5.501e-02, 4.149e-03)); + r += mul(s0_3, M4(5.904e-02, -4.471e-02, -1.419e-03, 5.577e-02, 2.097e-02, 2.939e-02, 4.351e-02, 7.017e-03, -1.220e-01, -2.561e-02, -7.143e-02, -6.226e-02, -2.383e-02, -8.104e-03, 1.235e-01, -5.604e-02)); + r += mul(s0_4, M4(9.289e-02, 1.875e-02, -5.693e-02, 8.472e-02, 2.723e-02, -8.247e-02, 2.351e-02, -1.478e-01, 3.210e-01, -4.399e-03, -2.030e-01, -6.583e-02, -2.577e-01, 6.970e-02, -1.362e-01, -3.662e-01)); + r += mul(s0_5, M4(6.788e-02, 1.481e-01, 1.110e-01, 8.451e-02, 1.231e-01, -5.248e-02, 4.599e-02, -7.866e-02, 6.980e-02, 1.021e-01, -1.467e-02, -2.085e-01, -7.291e-02, -7.495e-02, -6.751e-02, -7.518e-02)); + r += mul(s0_6, M4(2.776e-02, -4.853e-03, 1.438e-02, -1.960e-02, 2.142e-02, 6.762e-03, -1.021e-02, 1.000e-02, 1.683e-02, 1.249e-01, 1.405e-02, 3.261e-02, 3.420e-02, 9.142e-03, 9.864e-02, 1.273e-01)); + r += mul(s0_7, M4(3.249e-03, 1.042e-02, -1.446e-01, 2.937e-02, 2.885e-02, -1.655e-01, -2.498e-01, 1.184e-01, -7.952e-02, -4.997e-02, -8.814e-02, 8.800e-02, -4.362e-02, -4.499e-02, 2.033e-01, -1.495e-02)); + r += mul(s0_8, M4(-3.336e-02, -9.648e-03, -3.515e-02, -2.298e-02, 1.141e-02, 6.649e-02, 6.324e-02, 1.812e-02, -7.799e-02, 3.529e-02, 2.367e-01, 7.042e-03, 4.538e-02, 1.188e-01, 2.026e-01, 3.327e-02)); + r += mul(s1_0, M4(6.094e-03, 3.831e-02, 2.601e-02, -5.651e-02, -1.984e-02, -8.521e-02, -3.315e-02, 2.927e-02, 9.320e-02, 3.414e-02, 1.526e-02, -6.515e-02, 1.464e-02, 5.190e-02, 1.250e-02, -2.450e-02)); + r += mul(s1_1, M4(-6.664e-02, 2.427e-02, -4.062e-02, -1.023e-01, 1.142e-03, 1.598e-02, 6.027e-02, 7.189e-02, 1.110e-01, -1.270e-01, -1.238e-01, -7.045e-03, -5.579e-02, -4.261e-02, -5.511e-02, 1.607e-01)); + r += mul(s1_2, M4(-4.784e-02, 4.332e-02, 4.622e-02, -1.832e-01, 2.271e-03, -2.586e-02, -4.214e-04, -2.251e-02, 1.093e-01, 2.737e-02, 5.186e-02, -2.277e-02, 4.190e-02, -3.350e-02, 7.191e-02, 5.261e-02)); + r += mul(s1_3, M4(-7.808e-02, 6.345e-03, 2.821e-02, -6.509e-02, -4.562e-02, -7.348e-02, -2.062e-02, -1.096e-01, -6.326e-02, 1.541e-02, 1.495e-01, -1.246e-01, 4.181e-02, -3.568e-02, -5.511e-02, -9.366e-02)); + r += mul(s1_4, M4(-4.934e-02, 9.367e-02, 3.998e-02, 1.580e-02, 7.092e-02, -2.118e-02, -5.418e-02, -2.437e-01, 3.455e-01, 3.027e-01, -1.929e-01, -7.602e-02, -2.531e-01, 1.413e-01, 3.251e-02, -3.941e-01)); + r += mul(s1_5, M4(8.890e-03, 8.483e-02, 2.803e-01, 6.689e-02, -1.312e-02, 6.201e-03, 6.569e-02, -4.399e-02, -2.803e-02, -6.854e-02, 4.189e-03, -1.196e-01, -2.446e-02, -1.737e-02, -1.135e-02, -8.770e-02)); + r += mul(s1_6, M4(-1.025e-02, 7.595e-02, -3.784e-02, -1.273e-03, 1.642e-03, -2.167e-02, 1.115e-01, 1.739e-02, 1.468e-02, 4.380e-02, -7.158e-03, 6.610e-02, -1.724e-02, 6.816e-02, -3.586e-02, 7.312e-02)); + r += mul(s1_7, M4(1.537e-02, 5.858e-02, -1.293e-02, 5.912e-03, 1.436e-02, -1.255e-01, -1.164e-01, 5.451e-02, -1.606e-02, 7.959e-02, -1.098e-01, 2.954e-02, 3.584e-02, 1.064e-01, 4.900e-02, 1.483e-01)); + r += mul(s1_8, M4(-4.516e-02, -1.848e-02, -4.929e-02, -5.298e-02, -1.838e-02, 5.770e-02, 2.665e-01, -1.645e-02, -1.324e-02, -3.306e-02, 2.753e-01, -6.406e-02, 1.989e-02, 6.731e-02, 4.207e-02, 5.124e-02)); + r += mul(s2_0, M4(8.669e-03, -4.284e-02, -5.007e-02, 1.656e-02, -5.270e-02, 3.006e-02, 6.241e-02, -6.480e-02, 4.663e-02, -2.888e-02, -5.864e-03, 2.422e-02, -5.882e-02, 4.813e-02, 4.304e-02, -8.405e-02)); + r += mul(s2_1, M4(2.559e-02, 1.371e-02, 7.386e-03, -1.152e-01, -4.218e-02, 2.676e-02, -1.053e-03, 2.170e-01, 5.879e-03, 2.691e-02, 2.323e-02, 2.206e-03, 4.784e-02, -1.199e-01, -4.400e-02, -2.469e-01)); + r += mul(s2_2, M4(1.379e-02, 4.408e-02, -5.730e-03, -5.580e-02, 2.329e-02, -1.601e-02, -9.255e-02, 9.784e-02, 3.808e-02, -2.988e-02, 1.248e-02, -1.850e-02, 1.154e-01, 1.170e-01, 4.576e-02, -1.395e-01)); + r += mul(s2_3, M4(8.206e-02, -3.295e-02, 2.331e-02, 7.162e-02, 1.918e-01, -1.477e-01, -1.075e-01, 7.347e-02, -1.932e-02, -5.538e-02, -2.213e-02, 1.610e-02, -4.277e-03, 8.670e-02, 1.180e-01, 1.101e-01)); + r += mul(s2_4, M4(-1.037e-01, 3.606e-02, -3.500e-03, -4.871e-02, 3.480e-02, 3.394e-01, 2.708e-01, -8.373e-02, 1.522e-02, -3.404e-02, 3.080e-02, -1.670e-02, 2.218e-01, 1.636e-01, 1.746e-01, 1.966e-01)); + r += mul(s2_5, M4(-5.810e-02, -1.016e-01, -1.705e-02, 4.430e-02, -1.122e-01, 4.012e-02, -7.739e-02, 1.988e-02, -7.404e-02, 1.663e-03, -4.575e-02, 6.022e-02, 2.325e-01, 2.560e-01, 1.496e-01, 1.737e-01)); + r += mul(s2_6, M4(-3.489e-02, -7.750e-03, 3.257e-02, -6.685e-02, -2.556e-02, -6.975e-02, -7.688e-02, -6.276e-02, 3.238e-02, -6.852e-03, 8.972e-04, 4.346e-02, -4.617e-02, -7.906e-02, -1.260e-01, -2.206e-01)); + r += mul(s2_7, M4(-7.169e-02, -1.108e-01, -7.025e-02, -4.756e-02, -1.275e-03, 5.717e-02, 1.381e-02, -4.280e-02, -7.935e-02, 8.469e-02, 2.669e-02, -4.783e-02, -6.415e-02, -7.410e-02, -5.996e-01, -7.421e-02)); + r += mul(s2_8, M4(5.732e-02, -2.094e-02, 4.487e-02, 1.684e-03, 3.583e-02, 6.555e-02, -9.093e-02, 2.819e-02, 3.207e-02, 2.379e-02, -7.058e-02, 3.650e-02, 7.129e-02, 8.081e-02, -2.055e-01, 2.068e-02)); + r += mul(s3_0, M4(-1.351e-01, 2.487e-02, 6.509e-02, -1.377e-01, -7.213e-02, 4.431e-02, 6.395e-02, -2.582e-02, 2.499e-02, 2.643e-02, -5.115e-02, -4.884e-02, 3.498e-02, -4.305e-02, -2.118e-02, 3.186e-02)); + r += mul(s3_1, M4(3.457e-02, 9.418e-02, -9.542e-02, 1.424e-02, -4.137e-02, 3.093e-02, -1.165e-01, 2.549e-01, 8.566e-02, -4.637e-02, -1.015e-01, -7.352e-02, 9.651e-02, -9.373e-03, -3.242e-02, -1.090e-01)); + r += mul(s3_2, M4(-3.211e-02, 6.423e-02, -3.871e-02, -1.077e-01, -2.926e-02, 4.379e-02, -1.110e-02, 3.742e-02, 4.095e-02, 2.133e-03, 5.670e-02, 6.495e-03, 2.460e-03, -1.015e-02, 1.909e-02, -2.733e-02)); + r += mul(s3_3, M4(1.325e-01, -4.033e-02, -7.704e-02, 1.749e-02, 3.448e-02, 8.126e-03, -1.382e-01, 7.792e-02, -1.686e-01, 1.118e-01, 9.789e-02, 8.128e-02, 1.519e-02, -3.246e-02, 3.822e-02, 1.307e-02)); + r += mul(s3_4, M4(-2.686e-01, -1.360e-01, -2.257e-01, -1.253e-01, 1.299e-01, 3.730e-01, 5.186e-03, 4.527e-02, 5.434e-02, -3.687e-01, -7.868e-02, -2.339e-01, -5.053e-02, -1.593e-02, -1.770e-02, 7.165e-03)); + r += mul(s3_5, M4(-1.215e-01, -4.649e-02, -7.364e-02, 9.947e-03, -6.909e-02, 5.499e-02, -5.504e-02, 3.746e-02, -1.890e-01, 3.576e-02, 8.347e-02, 8.117e-02, 6.363e-02, 8.396e-03, 2.059e-02, -4.964e-02)); + r += mul(s3_6, M4(-3.645e-02, -1.210e-02, 2.680e-02, -2.743e-02, 6.655e-02, -3.608e-02, -9.769e-02, -5.215e-02, -7.768e-02, 8.398e-02, 3.276e-01, 2.325e-02, 4.869e-02, 8.209e-03, -3.908e-02, 5.922e-02)); + r += mul(s3_7, M4(-1.293e-03, 2.624e-02, -1.241e-01, 4.513e-02, -6.860e-02, -1.391e-02, -1.504e-01, -9.386e-02, 6.850e-02, -1.116e-02, -2.939e-01, 1.790e-01, 2.221e-02, -3.076e-02, -1.267e-01, 3.525e-03)); + r += mul(s3_8, M4(5.885e-02, 4.716e-02, 1.394e-01, -9.032e-03, 9.534e-03, -2.841e-04, -2.279e-01, 5.041e-02, 9.390e-02, 1.978e-01, 2.290e-01, 6.311e-02, 1.709e-02, -2.264e-02, 6.404e-03, 5.286e-02)); + r += mul(s4_0, M4(-3.989e-02, 1.043e-01, 2.167e-02, -4.671e-02, 1.772e-02, 2.149e-02, -4.354e-02, 2.864e-02, 3.404e-02, 8.198e-03, -3.285e-02, 4.617e-02, 5.805e-02, -9.201e-02, -9.894e-02, 6.796e-02)); + r += mul(s4_1, M4(-1.147e-01, 2.413e-02, -4.551e-02, -4.119e-02, -5.651e-02, 3.723e-02, 8.087e-02, -8.765e-02, 6.067e-02, 3.875e-02, -3.697e-02, -2.072e-02, 8.575e-02, -5.648e-03, 2.253e-03, -1.111e-02)); + r += mul(s4_2, M4(2.399e-02, 1.158e-02, 8.217e-03, -8.103e-03, 4.760e-04, 2.772e-02, -5.089e-02, 1.294e-02, 2.670e-02, -7.396e-03, 3.874e-02, -1.791e-03, -1.335e-02, 3.958e-03, 6.292e-02, 3.807e-02)); + r += mul(s4_3, M4(-5.314e-02, 1.613e-01, -6.274e-02, 3.254e-02, -2.636e-02, 7.294e-02, 1.556e-01, -1.084e-01, -1.058e-01, 6.138e-02, -2.246e-02, -1.212e-01, 5.899e-02, -9.672e-02, -6.812e-03, -2.475e-02)); + r += mul(s4_4, M4(7.112e-02, -5.885e-02, 3.622e-02, 1.856e-01, 2.469e-01, -1.692e-01, -3.037e-01, 2.660e-02, -1.142e-01, 8.413e-02, -5.529e-02, -6.762e-02, -5.900e-02, -7.882e-02, -3.681e-02, -5.285e-02)); + r += mul(s4_5, M4(-1.528e-01, -6.756e-03, -1.334e-01, 1.072e-01, 1.372e-01, -7.455e-04, 8.798e-02, -1.432e-01, -4.227e-02, 4.008e-02, -3.368e-03, 1.463e-03, 7.924e-02, 5.418e-02, 1.207e-01, 4.361e-02)); + r += mul(s4_6, M4(9.618e-02, -3.959e-02, -1.578e-01, 1.450e-02, 1.987e-02, -7.605e-03, -2.451e-02, 3.678e-02, 7.314e-02, 2.967e-02, 7.232e-02, 8.189e-02, 1.972e-02, -1.425e-02, -6.249e-02, 2.753e-02)); + r += mul(s4_7, M4(-7.886e-02, -7.267e-03, -4.764e-02, -1.432e-01, -6.068e-02, -1.336e-02, -7.154e-02, 2.613e-02, 4.865e-02, 8.846e-02, -5.496e-02, 3.770e-02, 1.078e-02, -4.761e-02, 4.391e-02, -3.580e-03)); + r += mul(s4_8, M4(-5.023e-02, 1.002e-01, -1.076e-01, -9.511e-02, -4.760e-02, -1.926e-03, -1.515e-02, -3.226e-02, 1.395e-02, 1.796e-02, 3.720e-02, 7.361e-03, -9.088e-03, -3.335e-02, 8.838e-03, -3.721e-02)); + r += mul(s5_0, M4(3.031e-02, 3.363e-02, 3.030e-03, 9.107e-02, -1.014e-01, -7.719e-03, 5.239e-02, -1.150e-02, 7.454e-03, -4.267e-02, -1.852e-02, -7.081e-03, 6.605e-02, -5.212e-02, 3.289e-02, 3.480e-02)); + r += mul(s5_1, M4(-2.654e-02, 4.664e-02, -7.637e-03, -3.020e-02, -2.783e-02, 1.532e-02, -4.592e-02, -1.898e-01, -9.555e-02, -8.444e-02, -7.414e-03, -3.203e-01, 3.379e-02, -2.939e-02, 6.682e-04, -9.268e-02)); + r += mul(s5_2, M4(-1.157e-01, 1.323e-02, -4.952e-02, 3.422e-02, -9.972e-02, -1.913e-02, 3.900e-02, -1.708e-02, 1.075e-01, -8.384e-02, 7.815e-02, -5.829e-02, 5.690e-02, -8.337e-02, 7.177e-03, -2.996e-03)); + r += mul(s5_3, M4(4.825e-02, 2.533e-05, -9.939e-02, 2.553e-02, 7.429e-02, 2.843e-02, -4.936e-02, 7.694e-02, -3.963e-02, -2.134e-01, -9.395e-02, -1.557e-01, 8.082e-02, -2.030e-01, 2.238e-02, -4.166e-02)); + r += mul(s5_4, M4(1.793e-01, -1.955e-01, 4.245e-03, -3.467e-02, -1.411e-01, -8.908e-02, -2.465e-01, -1.062e-01, -1.475e-01, -1.815e-01, 2.231e-01, -2.141e-01, -8.714e-02, -1.863e-02, 8.106e-02, -1.652e-01)); + r += mul(s5_5, M4(-4.051e-02, 8.651e-02, 1.911e-02, -4.472e-02, -2.548e-02, 4.238e-03, 8.314e-02, 8.535e-04, -4.288e-02, 1.326e-02, -4.075e-02, -6.859e-02, -1.268e-02, -2.006e-03, 1.020e-01, -7.679e-03)); + r += mul(s5_6, M4(-2.600e-02, -5.874e-02, -6.383e-02, -4.344e-02, -7.475e-03, -8.672e-02, -2.676e-02, 1.682e-02, 1.675e-02, -8.346e-02, 2.572e-02, 4.356e-02, -8.300e-02, -7.561e-03, 1.339e-02, -1.199e-02)); + r += mul(s5_7, M4(-2.708e-02, -1.052e-02, 2.861e-01, 2.421e-02, -7.350e-02, 6.591e-02, -1.028e-01, 1.598e-02, -2.900e-03, -1.052e-01, 2.706e-01, 3.259e-02, -1.327e-02, 1.789e-02, -3.740e-02, 9.238e-03)); + r += mul(s5_8, M4(-4.033e-03, 5.990e-02, -4.944e-02, -1.359e-02, 3.455e-02, 4.470e-03, 8.746e-02, -2.432e-02, -4.091e-02, -7.442e-02, -4.041e-02, 4.723e-02, 3.592e-02, 3.698e-02, -1.842e-02, -6.572e-02)); + r += mul(s6_0, M4(6.685e-03, 7.485e-02, -1.710e-02, 1.088e-02, 3.259e-02, -4.319e-03, -7.286e-02, 1.912e-01, -6.803e-02, 5.241e-02, 1.160e-01, -2.947e-02, -3.404e-02, -7.214e-02, -9.967e-02, 5.521e-03)); + r += mul(s6_1, M4(-2.227e-02, 1.553e-01, 3.670e-02, -3.572e-02, 2.372e-01, -1.481e-01, -2.047e-01, 2.490e-01, -3.064e-03, -6.818e-03, -4.601e-03, -8.369e-03, 4.889e-02, 2.389e-02, 2.834e-02, 1.179e-01)); + r += mul(s6_2, M4(1.370e-03, 2.922e-02, -8.196e-02, 3.682e-02, -1.510e-02, 1.928e-02, 8.079e-02, 4.465e-02, 1.874e-03, -2.775e-02, -3.510e-02, 1.074e-02, 1.147e-02, -3.479e-02, -8.156e-02, 7.632e-02)); + r += mul(s6_3, M4(-2.052e-02, 5.468e-02, 8.972e-02, 1.748e-02, 1.062e-02, -5.604e-02, -2.104e-02, -1.168e-01, -2.826e-02, 8.708e-02, -3.005e-02, 4.044e-02, 2.717e-03, -8.515e-02, -2.545e-02, 1.748e-02)); + r += mul(s6_4, M4(-3.675e-02, -7.230e-02, -1.514e-01, -6.986e-02, -2.293e-01, 1.193e-01, -1.353e-01, 3.926e-02, 4.473e-02, -8.426e-02, -5.745e-02, 2.172e-01, -1.196e-01, 3.766e-02, -1.090e-01, 2.146e-02)); + r += mul(s6_5, M4(-1.926e-02, 3.526e-02, -8.507e-03, 8.632e-02, -1.409e-02, 4.581e-03, -4.015e-02, -5.444e-03, 2.476e-03, -3.332e-03, 1.063e-01, 1.064e-02, 1.944e-02, -1.168e-02, -3.969e-02, 7.619e-02)); + r += mul(s6_6, M4(7.458e-02, -8.309e-04, 5.156e-02, 3.045e-02, -2.027e-02, 4.268e-02, -9.577e-02, -2.027e-02, 1.083e-02, -2.694e-02, -9.081e-02, -3.254e-02, 1.457e-02, 2.705e-02, -4.239e-02, -1.168e-02)); + r += mul(s6_7, M4(-1.551e-02, 1.555e-02, 3.333e-02, 1.551e-02, -2.358e-02, -1.985e-02, 5.884e-02, -2.825e-02, 2.197e-02, 1.169e-01, -4.803e-02, 8.349e-03, 2.392e-03, -2.082e-02, -3.377e-02, -4.084e-02)); + r += mul(s6_8, M4(6.048e-02, -3.139e-02, -3.140e-02, 4.238e-02, -7.827e-03, -5.632e-02, -3.971e-02, -2.092e-02, -1.757e-02, 2.497e-02, 9.934e-03, -1.906e-02, -6.897e-02, -6.564e-02, 3.216e-02, -5.010e-02)); + r += mul(s7_0, M4(3.706e-02, -3.405e-02, -3.631e-02, 4.191e-02, 8.408e-02, -7.818e-02, -4.613e-02, 8.811e-02, -4.304e-02, 8.061e-03, 2.657e-03, -1.014e-01, 3.056e-02, -6.020e-02, -7.336e-02, -1.122e-02)); + r += mul(s7_1, M4(1.492e-02, 3.068e-02, 4.643e-02, -4.821e-02, 5.475e-02, -3.932e-02, -1.925e-02, 1.088e-01, 7.003e-02, 1.456e-01, 6.545e-02, 1.387e-02, -4.953e-02, 6.606e-02, 8.140e-02, 1.567e-01)); + r += mul(s7_2, M4(3.314e-02, 2.750e-02, 6.535e-02, 2.061e-02, 5.650e-03, 7.274e-02, 6.622e-02, -3.237e-02, -2.759e-02, -3.971e-02, -9.094e-02, 6.809e-02, 1.583e-01, -4.280e-02, -1.262e-02, 2.825e-02)); + r += mul(s7_3, M4(1.734e-02, -8.997e-02, 5.895e-03, -3.471e-02, 5.080e-02, -2.903e-02, 6.595e-02, -2.016e-02, 5.886e-03, 1.329e-01, 2.321e-02, -3.734e-02, 2.672e-02, -4.128e-02, 3.937e-02, -5.398e-02)); + r += mul(s7_4, M4(-5.764e-02, -8.530e-03, -5.284e-03, 1.995e-02, -1.535e-01, -7.842e-02, 2.841e-01, 5.771e-02, -4.518e-01, -9.468e-02, -1.945e-01, -1.713e-02, -9.584e-02, -1.448e-01, 9.651e-02, -2.114e-01)); + r += mul(s7_5, M4(4.921e-02, -2.335e-02, 6.736e-03, 9.647e-02, 4.729e-02, -1.626e-01, 1.514e-02, 2.695e-02, 1.966e-02, 2.227e-03, 4.152e-02, -5.731e-02, -4.104e-02, -2.117e-01, -1.313e-01, 3.599e-02)); + r += mul(s7_6, M4(-2.224e-03, -5.215e-02, 8.148e-03, -1.054e-02, -2.011e-02, -7.110e-03, -4.378e-02, -4.554e-02, 1.837e-02, 2.168e-02, 6.596e-02, -3.392e-03, 3.671e-02, -2.368e-02, 7.803e-02, 2.117e-02)); + r += mul(s7_7, M4(5.059e-02, 5.411e-02, 1.153e-01, -2.684e-02, 2.687e-02, 2.498e-02, 1.224e-01, -5.599e-02, 1.326e-01, 5.794e-02, 1.564e-01, 6.672e-03, -2.287e-02, 7.290e-04, -5.060e-02, -1.173e-02)); + r += mul(s7_8, M4(1.739e-02, 1.692e-02, 7.444e-02, -9.690e-02, 2.404e-02, -5.695e-02, -8.741e-03, -4.248e-02, -1.166e-02, 3.177e-02, 3.432e-02, -4.448e-03, -4.017e-02, -1.342e-02, 8.129e-02, -4.351e-03)); + r += V4(3.523e-03, -8.447e-03, 2.570e-03, 6.650e-04); + return r; +} + +void Pass8(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t4[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t5[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t6[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t7[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 9 +//!DESC conv8 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t4, t5, t6, t7 +//!OUT t0, t1, t2, t3 + +#define l0(x, y) V4(O(t4, float2(x, y))) +#define l1(x, y) V4(O(t5, float2(x, y))) +#define l2(x, y) V4(O(t6, float2(x, y))) +#define l3(x, y) V4(O(t7, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.163e-03, 3.758e-02, -1.293e-02, -1.812e-03, 1.047e-02, -1.221e-02, 1.272e-02, 3.990e-02, -3.764e-03, -8.226e-02, 3.142e-03, 2.449e-03, 5.394e-02, 4.200e-02, 3.250e-02, 2.179e-02)); + r += mul(s0_1, M4(-4.977e-02, 1.285e-02, 6.215e-03, 1.122e-03, 1.038e-01, 2.604e-02, -1.625e-03, 8.618e-02, 6.954e-02, -5.197e-02, -1.722e-02, -5.276e-02, -2.537e-02, -5.418e-03, -3.673e-02, -1.730e-02)); + r += mul(s0_2, M4(2.432e-02, -6.323e-02, 6.821e-03, -4.970e-04, 4.663e-02, 1.828e-02, 5.051e-03, -2.768e-02, 5.109e-02, 1.437e-02, -5.003e-02, 2.399e-02, 2.915e-02, 4.627e-03, -1.239e-02, 2.087e-02)); + r += mul(s0_3, M4(1.510e-02, -4.971e-02, 5.598e-03, -3.265e-04, 2.438e-02, 5.457e-02, 1.265e-02, 1.988e-02, -1.251e-02, 1.782e-02, -1.172e-02, -4.812e-02, 4.248e-02, 2.189e-02, 4.387e-02, -4.777e-02)); + r += mul(s0_4, M4(-2.666e-02, 5.312e-02, 3.600e-02, 3.440e-02, -5.164e-02, 1.049e-01, -8.496e-02, -8.078e-02, 1.077e-02, -2.606e-02, -3.434e-02, 1.633e-02, 1.834e-02, 3.657e-01, 1.811e-02, -1.689e-02)); + r += mul(s0_5, M4(1.503e-01, 5.142e-02, 3.078e-02, -2.233e-03, -3.540e-02, -7.650e-02, -5.646e-02, 6.509e-02, -5.972e-02, 9.989e-02, -7.630e-02, -4.226e-02, -1.713e-01, 1.778e-02, 2.337e-02, 5.412e-03)); + r += mul(s0_6, M4(1.487e-02, 1.589e-02, 1.909e-02, 2.397e-03, 8.638e-03, -5.430e-03, 1.950e-02, -4.088e-02, -3.951e-02, 8.155e-02, -4.656e-02, -4.532e-02, 5.799e-03, -3.091e-03, 7.556e-02, -9.244e-03)); + r += mul(s0_7, M4(-3.682e-03, 2.624e-02, 1.519e-02, 1.910e-02, -4.077e-02, -3.815e-02, 2.944e-01, -1.562e-02, 5.634e-02, -1.586e-02, -8.042e-02, 5.548e-02, 2.209e-02, 5.010e-04, -1.945e-01, -7.837e-02)); + r += mul(s0_8, M4(-4.235e-03, 7.396e-03, 3.855e-02, 2.943e-02, 3.392e-04, 2.840e-02, 4.555e-02, 3.071e-02, -2.447e-02, -2.686e-02, -1.474e-01, -1.236e-01, -4.732e-02, 2.316e-02, -1.916e-02, -2.356e-02)); + r += mul(s1_0, M4(-5.662e-02, -1.745e-02, -5.033e-03, -1.262e-02, -1.416e-02, -2.888e-02, -5.133e-03, 2.311e-02, -2.334e-02, 2.234e-02, 3.550e-02, -3.701e-02, 9.749e-03, 1.249e-02, 8.601e-03, 2.353e-02)); + r += mul(s1_1, M4(7.913e-02, 6.084e-02, -4.344e-02, 6.112e-02, 2.960e-02, 3.782e-02, -2.122e-02, 3.911e-02, 1.010e-01, -3.291e-02, -1.743e-02, 9.940e-02, -8.126e-02, -1.161e-02, -5.808e-02, -3.259e-02)); + r += mul(s1_2, M4(-1.213e-02, 1.115e-03, -3.606e-02, 1.455e-02, 1.664e-02, 1.329e-02, -2.419e-03, 8.746e-03, 2.372e-02, 9.913e-02, 1.971e-02, -4.579e-02, 9.815e-03, 4.292e-02, -1.999e-02, -1.893e-02)); + r += mul(s1_3, M4(-1.179e-02, -4.023e-02, -6.974e-03, 2.424e-02, 3.545e-02, 5.325e-02, -1.323e-02, 2.684e-02, -7.408e-03, 1.760e-02, 1.047e-01, -5.806e-03, 6.792e-03, -4.425e-02, -5.191e-02, -3.494e-02)); + r += mul(s1_4, M4(1.662e-01, -1.880e-01, -1.949e-02, -1.400e-01, -2.261e-02, 6.909e-02, -8.400e-02, -3.382e-02, -4.781e-02, 7.937e-02, 1.563e-01, -1.889e-01, 1.464e-01, -5.023e-02, -9.010e-02, 1.950e-01)); + r += mul(s1_5, M4(1.639e-01, -1.646e-01, -4.730e-03, 4.396e-02, -8.718e-02, -1.147e-01, -7.944e-02, 1.089e-02, 8.579e-02, 1.786e-01, -1.294e-01, 4.159e-02, -4.083e-01, -1.187e-01, 1.349e-03, 9.304e-02)); + r += mul(s1_6, M4(3.600e-03, 3.908e-02, 1.479e-01, 2.928e-02, 2.900e-02, 1.703e-02, 2.995e-02, -1.953e-02, -1.578e-02, 3.095e-03, -4.326e-03, 2.029e-02, -7.464e-04, 1.157e-02, 4.377e-02, -1.657e-02)); + r += mul(s1_7, M4(-1.218e-02, 1.004e-01, -1.666e-02, 2.707e-01, -7.272e-02, -6.441e-02, 1.782e-01, -3.960e-02, 1.067e-02, 1.358e-02, -8.120e-02, -9.335e-02, 7.539e-02, -4.229e-02, -1.822e-01, 8.759e-02)); + r += mul(s1_8, M4(-1.353e-01, 6.616e-02, -1.582e-01, -1.910e-02, 5.130e-02, 5.146e-02, 2.419e-04, 5.632e-03, 4.300e-02, -9.703e-02, -1.843e-01, -8.131e-02, -2.054e-01, 5.484e-02, 4.258e-02, 8.032e-02)); + r += mul(s2_0, M4(-1.443e-03, 1.266e-02, -4.377e-03, 1.456e-02, 9.075e-03, 3.822e-02, 1.862e-02, -8.763e-02, 3.358e-02, 3.704e-02, 5.637e-02, -1.078e-02, -5.603e-03, -8.833e-03, 6.170e-02, -3.101e-02)); + r += mul(s2_1, M4(-8.875e-03, -4.255e-02, -8.639e-03, -3.144e-02, 5.338e-02, 1.993e-02, -2.897e-02, -2.618e-02, -2.192e-02, -1.938e-02, 6.725e-02, 1.013e-02, -8.374e-02, 1.854e-01, 1.681e-01, 1.666e-01)); + r += mul(s2_2, M4(-3.852e-02, -1.331e-02, -8.824e-03, 1.680e-02, -9.953e-03, 4.866e-02, -2.119e-02, -2.544e-02, 1.733e-02, 8.523e-03, 5.700e-02, -2.003e-02, 1.287e-01, 8.605e-02, 3.843e-03, -3.362e-02)); + r += mul(s2_3, M4(-1.649e-02, -5.156e-02, 2.322e-02, 2.621e-02, -4.374e-03, -4.031e-02, 1.849e-02, -2.824e-02, 1.557e-02, -2.727e-02, 6.803e-02, 1.272e-02, -3.532e-02, 2.136e-02, -1.791e-02, -3.532e-02)); + r += mul(s2_4, M4(-1.233e-01, 1.773e-01, -1.449e-02, -1.111e-01, 1.522e-01, 5.892e-02, -7.358e-02, -2.490e-03, -7.480e-02, -9.953e-02, 4.489e-02, 5.109e-02, 9.398e-03, 1.778e-01, 5.933e-02, -1.200e-01)); + r += mul(s2_5, M4(4.894e-02, 6.952e-02, 1.113e-03, -3.899e-02, 1.489e-01, 2.495e-02, -5.650e-02, -2.200e-02, 3.888e-02, -5.917e-02, 5.631e-02, -5.648e-03, 4.334e-02, -1.391e-01, -1.235e-01, 1.303e-01)); + r += mul(s2_6, M4(-2.882e-02, -9.277e-04, 2.776e-02, 4.659e-03, -6.010e-03, 8.332e-03, 1.627e-02, 6.162e-03, 1.398e-02, 6.050e-04, 8.861e-02, 8.111e-03, -2.020e-02, -2.625e-02, -2.498e-02, 3.772e-02)); + r += mul(s2_7, M4(-2.603e-02, -4.788e-02, 1.212e-01, 2.412e-02, 1.096e-01, 6.875e-02, -4.077e-02, -2.471e-02, 1.411e-02, 2.778e-02, 2.659e-02, -7.912e-03, 2.304e-02, 1.183e-02, -2.530e-01, -6.461e-02)); + r += mul(s2_8, M4(-1.540e-02, 1.168e-03, 1.128e-02, -1.633e-02, 2.524e-02, 3.186e-02, 5.005e-03, -7.069e-03, 5.819e-04, 8.252e-03, 1.526e-01, 1.689e-02, 3.763e-02, -1.503e-02, -6.911e-02, -1.912e-02)); + r += mul(s3_0, M4(-2.372e-03, 1.034e-02, 1.729e-03, 2.638e-02, 7.424e-03, 4.830e-02, 3.837e-02, -1.182e-01, -3.794e-02, 4.551e-02, 4.351e-02, -6.389e-02, -3.141e-02, -7.017e-02, -1.263e-02, -2.644e-02)); + r += mul(s3_1, M4(-2.983e-02, 4.225e-02, -1.368e-02, -1.883e-03, -3.442e-02, -1.005e-01, -2.421e-02, 6.129e-02, 1.177e-01, 1.918e-01, 1.059e-01, -4.087e-02, 5.202e-02, -1.259e-01, -1.377e-02, -2.708e-02)); + r += mul(s3_2, M4(2.647e-02, -7.393e-02, -2.562e-02, 2.553e-02, 1.038e-01, -2.515e-02, -1.384e-02, -5.238e-02, 3.187e-02, 4.110e-02, 4.839e-02, -3.347e-02, 4.652e-02, 2.220e-02, -2.716e-02, 2.800e-02)); + r += mul(s3_3, M4(5.081e-03, 5.175e-02, 5.579e-02, 2.651e-02, -6.240e-02, -8.506e-02, 1.635e-02, 7.569e-02, 3.674e-02, 7.343e-02, 9.829e-02, -8.204e-03, -1.582e-02, 6.919e-02, -8.234e-02, -3.219e-03)); + r += mul(s3_4, M4(-7.487e-02, 3.794e-01, 1.092e-01, 6.132e-02, -2.458e-01, 5.238e-02, -7.515e-02, 4.969e-02, 9.227e-02, 8.565e-02, 2.066e-01, -2.635e-01, 5.388e-02, 3.474e-02, -5.871e-02, 8.452e-02)); + r += mul(s3_5, M4(1.793e-01, 3.243e-01, 6.174e-02, 3.000e-02, -1.568e-01, -2.187e-02, -6.003e-02, 7.308e-02, -1.456e-01, 1.412e-01, 1.136e-01, 1.225e-02, -2.162e-02, -2.000e-02, -1.172e-01, -7.278e-02)); + r += mul(s3_6, M4(2.557e-05, -3.526e-03, 7.598e-02, 2.888e-02, -1.007e-02, -1.121e-02, 7.371e-02, -2.658e-03, -4.684e-03, 1.377e-01, 1.506e-01, -1.538e-01, -1.258e-02, -3.806e-03, -2.926e-02, -5.665e-03)); + r += mul(s3_7, M4(-7.977e-02, 7.167e-02, 1.215e-01, 1.001e-01, 1.639e-01, 5.450e-02, -3.106e-02, 2.305e-02, 2.065e-01, 1.043e-03, 4.629e-01, 7.788e-02, 1.561e-02, -1.805e-03, -7.499e-03, -5.985e-03)); + r += mul(s3_8, M4(-3.189e-01, 8.572e-02, 5.205e-02, -5.879e-03, 3.003e-02, -3.599e-02, -9.245e-03, 6.641e-02, 9.694e-02, 1.107e-01, 1.427e-01, -1.131e-01, -2.880e-02, -4.217e-04, -7.266e-02, -4.372e-02)); + r += mul(s4_0, M4(7.380e-03, 2.803e-02, -9.678e-03, 1.445e-02, -1.174e-01, -1.182e-01, -5.408e-02, 1.256e-01, 1.203e-02, 5.464e-02, 5.106e-02, -1.193e-01, 2.103e-02, 2.005e-02, 6.424e-03, -7.796e-03)); + r += mul(s4_1, M4(1.144e-01, 9.673e-02, 4.603e-02, -1.514e-02, 5.934e-02, 3.573e-01, -2.150e-02, 4.749e-02, -1.334e-01, -4.821e-02, 7.544e-02, -2.522e-02, -7.433e-02, -1.898e-02, -6.274e-02, 9.512e-03)); + r += mul(s4_2, M4(1.210e-01, -7.077e-02, 4.060e-02, -4.624e-02, 1.264e-01, 1.023e-01, 1.113e-02, -2.238e-02, -2.822e-02, 1.062e-02, -7.220e-04, 7.964e-03, 9.206e-02, -1.255e-01, -5.626e-03, 6.488e-03)); + r += mul(s4_3, M4(-2.819e-02, -5.091e-02, -2.555e-02, 5.825e-02, 2.027e-02, -1.595e-02, 7.369e-02, 1.607e-01, 7.973e-03, -9.789e-03, 6.486e-02, -5.431e-02, 1.557e-02, 2.131e-02, -6.740e-02, -3.800e-03)); + r += mul(s4_4, M4(2.899e-02, -1.963e-01, -4.447e-03, -1.937e-01, 1.544e-02, 5.877e-01, 1.209e-01, 2.231e-01, 5.151e-02, -1.451e-01, -1.265e-01, 3.621e-01, -4.030e-02, 1.096e-02, 1.394e-01, 1.566e-01)); + r += mul(s4_5, M4(-1.220e-01, -1.074e-01, -4.688e-02, 1.139e-01, -1.270e-01, -1.958e-01, -6.958e-02, 5.790e-02, -2.125e-01, 2.384e-02, -1.143e-01, -2.472e-02, -4.480e-02, 1.414e-01, 2.537e-02, -1.956e-02)); + r += mul(s4_6, M4(-2.249e-03, 4.939e-02, 2.547e-02, 6.518e-03, 1.783e-02, -4.854e-02, 9.588e-02, 1.246e-01, -5.187e-02, 1.011e-02, 6.236e-02, 7.764e-02, 5.926e-02, 1.052e-03, -9.210e-02, 1.820e-02)); + r += mul(s4_7, M4(1.112e-02, -1.005e-03, 5.627e-02, -4.650e-03, -1.019e-01, -2.774e-02, -4.797e-02, -9.489e-02, 1.611e-02, 2.311e-02, 4.969e-02, 2.918e-03, 9.366e-03, 2.128e-02, -2.667e-02, 8.454e-03)); + r += mul(s4_8, M4(7.954e-02, -1.162e-02, -3.825e-02, 3.051e-02, 5.782e-02, 1.317e-02, -2.331e-02, 1.714e-01, -5.622e-02, -4.566e-03, -1.264e-01, -4.613e-02, -3.620e-03, 2.095e-02, 1.811e-01, 4.435e-02)); + r += mul(s5_0, M4(1.911e-02, 4.114e-02, -2.698e-02, 7.956e-03, 4.051e-02, 1.081e-01, -2.460e-02, 3.704e-02, -2.240e-02, -7.579e-03, 2.443e-02, -3.274e-02, -5.654e-02, -1.421e-01, 9.256e-03, 2.626e-03)); + r += mul(s5_1, M4(5.279e-02, 1.891e-01, 2.931e-03, 6.000e-02, -5.619e-02, 9.849e-03, 7.842e-03, 4.068e-02, 7.803e-02, -7.617e-03, 2.789e-02, -1.069e-01, 8.581e-02, 2.530e-02, -5.946e-02, 4.150e-02)); + r += mul(s5_2, M4(-1.838e-02, -5.041e-03, -2.386e-02, -9.564e-03, 8.040e-03, -1.063e-01, -1.715e-02, 2.375e-02, -3.968e-02, 2.704e-02, 1.302e-02, -2.781e-02, -1.921e-02, 3.051e-02, 3.794e-03, 2.820e-03)); + r += mul(s5_3, M4(-1.307e-02, -3.145e-02, -1.016e-03, 8.268e-03, 3.836e-02, -9.889e-02, 2.164e-02, 2.833e-02, 4.851e-03, -3.649e-02, -4.677e-02, -1.289e-02, -4.046e-02, 1.519e-01, -5.701e-02, 1.659e-02)); + r += mul(s5_4, M4(1.599e-02, -1.972e-01, 5.258e-02, -9.934e-02, -6.811e-02, 5.773e-03, 2.508e-02, 3.029e-02, -4.904e-02, -2.255e-02, -4.280e-02, 1.142e-01, 1.684e-01, -2.098e-01, -3.852e-02, 1.056e-01)); + r += mul(s5_5, M4(-1.391e-01, 1.769e-01, 1.968e-02, 3.184e-02, 2.109e-02, 3.398e-02, -3.454e-02, 2.694e-02, -2.308e-02, -5.924e-02, -4.630e-02, 2.703e-03, -2.114e-01, -4.276e-02, -2.947e-02, 8.727e-02)); + r += mul(s5_6, M4(-2.128e-02, 1.915e-02, 1.806e-02, 8.424e-03, 4.411e-03, 3.856e-02, 4.922e-03, -1.195e-02, -1.828e-02, -2.112e-02, -1.254e-02, 7.101e-02, 1.845e-02, -1.504e-02, -2.836e-02, 9.746e-03)); + r += mul(s5_7, M4(2.673e-02, -7.386e-03, 5.003e-02, -5.457e-02, 9.805e-02, -1.111e-02, 1.450e-02, 5.296e-02, -1.907e-02, -3.562e-02, -5.054e-02, -4.874e-02, 1.863e-02, 1.324e-01, 3.033e-02, 1.632e-02)); + r += mul(s5_8, M4(6.026e-02, -5.803e-03, 3.790e-02, -1.785e-02, -6.598e-03, 6.772e-03, 2.008e-02, -7.773e-03, 3.404e-02, -5.681e-02, -4.015e-02, -1.481e-02, 3.457e-02, 8.365e-02, -1.311e-02, 2.835e-02)); + r += mul(s6_0, M4(1.874e-02, 2.963e-02, 8.589e-03, 9.893e-03, 1.142e-02, 5.424e-03, 9.955e-03, 9.789e-03, -4.377e-02, -2.456e-01, 6.007e-02, -3.494e-02, 9.568e-03, -1.820e-02, -1.093e-02, 1.087e-03)); + r += mul(s6_1, M4(-1.324e-01, 3.039e-02, -5.110e-03, -1.549e-03, 1.369e-02, 7.104e-02, -3.902e-03, -1.145e-02, 3.795e-01, -4.257e-01, 3.671e-02, 1.384e-02, 1.646e-02, -2.939e-02, 9.106e-03, 2.023e-02)); + r += mul(s6_2, M4(4.218e-02, -3.713e-02, -3.178e-02, 3.314e-02, 5.291e-03, 1.776e-02, 3.924e-04, -1.052e-02, -9.073e-02, -3.172e-01, -3.482e-02, -5.055e-02, -1.385e-02, 7.729e-02, 2.638e-03, -1.892e-04)); + r += mul(s6_3, M4(2.294e-02, -6.769e-02, 1.210e-02, 9.301e-03, -1.003e-02, 1.296e-02, -3.793e-02, 9.515e-03, 7.019e-03, 6.073e-02, 1.563e-01, -7.581e-03, 1.393e-02, 1.405e-02, -6.916e-02, 1.075e-02)); + r += mul(s6_4, M4(-8.817e-02, 2.766e-03, -5.930e-02, 1.035e-01, -6.339e-02, 2.813e-02, 9.339e-02, -1.344e-01, 1.301e-01, 2.397e-01, -4.362e-02, 2.980e-02, 1.294e-01, 7.722e-02, 1.689e-02, 5.884e-02)); + r += mul(s6_5, M4(1.634e-01, -1.067e-01, -3.926e-02, 1.696e-02, 2.136e-01, 1.045e-02, -8.671e-03, -6.394e-03, -2.086e-01, 6.436e-02, -8.610e-02, -3.077e-02, -1.447e-01, 1.470e-02, -2.206e-02, -8.833e-02)); + r += mul(s6_6, M4(4.121e-02, 3.653e-03, 9.934e-03, -6.902e-03, 3.086e-03, -1.948e-03, -3.164e-03, -6.264e-03, 1.644e-02, -4.309e-02, 1.099e-01, 1.237e-02, 4.810e-03, 3.916e-02, -9.033e-02, -1.602e-02)); + r += mul(s6_7, M4(4.115e-02, -7.165e-03, -2.393e-02, -1.639e-02, 2.697e-02, -2.052e-02, -5.419e-02, -4.974e-03, 6.181e-02, -9.166e-03, 3.334e-02, 2.322e-02, 1.120e-01, 8.432e-02, 6.979e-03, -1.082e-01)); + r += mul(s6_8, M4(1.035e-01, -2.271e-02, -2.727e-03, 1.913e-02, -3.115e-02, -2.378e-03, -1.533e-02, -3.342e-02, -1.665e-01, -3.854e-02, -7.222e-02, 4.908e-02, -1.622e-01, 2.703e-02, 1.628e-03, 2.494e-02)); + r += mul(s7_0, M4(2.206e-02, -7.713e-02, -1.060e-02, -7.856e-03, -8.739e-02, -8.483e-02, 1.705e-02, 2.507e-02, 1.730e-02, 1.474e-02, 1.893e-02, 1.194e-02, -2.405e-02, -5.729e-02, -1.529e-02, 2.111e-02)); + r += mul(s7_1, M4(4.148e-02, 4.829e-02, -9.404e-03, 2.608e-02, 6.859e-02, -8.264e-02, 4.951e-02, -2.350e-02, -4.359e-02, 2.061e-05, 4.833e-03, 4.083e-03, 2.436e-03, 3.365e-04, 1.053e-02, -4.982e-03)); + r += mul(s7_2, M4(2.788e-02, 5.720e-02, -1.417e-02, -1.855e-02, -1.366e-01, 1.563e-02, 1.777e-02, 2.607e-02, 9.303e-04, 1.798e-02, 6.918e-03, -1.112e-02, 1.452e-02, -3.567e-03, -1.566e-02, -2.902e-02)); + r += mul(s7_3, M4(4.434e-02, 1.092e-01, 6.465e-02, -1.144e-01, -1.169e-01, -7.731e-02, 4.387e-02, 4.264e-02, 1.655e-04, -1.567e-02, -3.360e-03, 2.005e-03, 7.195e-02, 1.235e-01, -5.993e-02, 6.344e-02)); + r += mul(s7_4, M4(-2.846e-01, 7.017e-01, 8.025e-02, 3.689e-01, 2.017e-01, 9.575e-02, 1.644e-01, 3.223e-02, 7.108e-02, 3.252e-02, 2.997e-02, 4.112e-02, 1.675e-01, 4.688e-01, 5.569e-02, 4.097e-01)); + r += mul(s7_5, M4(-8.491e-02, 5.125e-02, -4.314e-02, 4.776e-02, 4.496e-02, -1.387e-01, -2.997e-02, 5.575e-02, -1.348e-01, 2.937e-02, 5.464e-03, -4.188e-04, -5.936e-02, 9.907e-02, -2.264e-02, 4.540e-02)); + r += mul(s7_6, M4(4.639e-02, 2.088e-02, -5.529e-02, 5.430e-02, -2.772e-02, -9.927e-02, 7.334e-02, 5.674e-02, -5.370e-03, 1.712e-02, 1.511e-02, -9.812e-03, 7.644e-03, -4.335e-03, -9.858e-02, 1.102e-02)); + r += mul(s7_7, M4(-3.219e-02, 3.117e-02, -6.292e-02, -2.947e-01, -5.795e-02, -4.024e-02, -8.836e-03, -7.493e-02, -3.911e-03, 1.426e-02, 8.336e-02, 5.121e-03, 9.672e-02, 2.570e-01, 3.033e-03, -7.363e-02)); + r += mul(s7_8, M4(1.485e-01, -7.624e-02, 4.307e-02, 6.218e-02, 2.407e-02, 2.447e-02, 6.895e-03, 7.684e-02, 4.433e-02, -1.227e-02, 4.716e-02, 1.408e-02, -2.840e-01, 8.665e-02, 8.153e-02, 9.530e-02)); + r += V4(3.794e-03, -6.271e-04, 2.485e-03, 1.034e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.688e-02, 1.620e-03, 2.600e-03, -1.661e-02, -1.969e-02, -4.045e-02, 2.004e-02, 1.275e-02, -2.568e-03, -1.516e-02, -1.471e-02, 3.537e-02, 1.000e-02, -4.602e-03, 3.973e-02, 4.770e-03)); + r += mul(s0_1, M4(-6.627e-02, -1.956e-03, -2.893e-02, -1.149e-02, 7.862e-02, -2.627e-02, 2.400e-02, 3.658e-02, -2.325e-02, -4.384e-02, -8.477e-03, -2.188e-02, -4.177e-02, -5.982e-03, 6.463e-03, 5.629e-04)); + r += mul(s0_2, M4(-5.602e-02, 2.109e-02, 1.306e-02, -2.271e-02, 2.987e-02, 2.223e-02, -1.418e-02, 7.313e-02, 3.753e-02, -4.956e-03, -2.664e-02, -3.003e-02, 3.356e-02, -3.528e-04, -6.530e-03, 9.253e-02)); + r += mul(s0_3, M4(-3.501e-02, 2.454e-02, 4.258e-02, -2.786e-03, -8.423e-03, -3.658e-03, 3.081e-02, -5.229e-02, -3.026e-03, -3.401e-03, -4.111e-02, 2.753e-02, 4.135e-02, -4.806e-02, -4.145e-02, -8.509e-03)); + r += mul(s0_4, M4(-6.815e-02, -1.008e-02, 9.775e-02, -1.491e-03, 3.135e-01, 1.355e-01, 4.295e-01, 1.489e-01, -8.228e-02, 1.565e-01, -4.864e-02, -1.931e-03, 1.782e-01, 1.140e-01, -5.738e-02, -1.471e-01)); + r += mul(s0_5, M4(-6.358e-02, -3.115e-02, 1.176e-02, -1.185e-01, -5.756e-02, -1.359e-03, 2.088e-02, 1.216e-02, 2.319e-01, 8.109e-03, -1.532e-02, 1.001e-01, -4.522e-02, -3.685e-02, 8.178e-02, 1.908e-01)); + r += mul(s0_6, M4(1.539e-03, -2.277e-02, -4.300e-02, -7.362e-03, 1.287e-02, -3.248e-02, 3.525e-02, 6.688e-04, 1.585e-02, -1.905e-02, 4.454e-02, 6.523e-04, 2.880e-02, 2.347e-02, 1.953e-02, -5.062e-03)); + r += mul(s0_7, M4(-3.124e-02, -3.693e-02, 1.489e-02, 1.767e-03, 1.441e-02, -1.741e-01, -1.433e-02, 7.572e-02, -1.182e-02, 6.519e-02, 7.046e-02, 1.714e-02, -4.300e-02, -2.017e-01, -2.466e-03, 2.791e-02)); + r += mul(s0_8, M4(-7.179e-03, 4.757e-02, 1.911e-02, 3.829e-02, -1.630e-02, -9.665e-03, 4.565e-02, -5.551e-02, -9.989e-03, -1.013e-01, -4.229e-02, -7.202e-02, 1.935e-02, -4.290e-03, -1.460e-02, -1.759e-04)); + r += mul(s1_0, M4(8.306e-03, -4.549e-02, 3.787e-02, 3.353e-02, -4.052e-02, 1.061e-02, 3.073e-02, -4.544e-03, 3.342e-02, -1.539e-02, 4.609e-02, 4.170e-02, 3.968e-03, -5.158e-03, 3.410e-02, -2.230e-02)); + r += mul(s1_1, M4(-8.669e-02, 8.401e-03, -8.128e-02, -1.108e-01, 2.188e-02, 5.959e-03, -6.349e-02, 3.816e-02, 4.950e-02, 1.902e-02, -4.972e-02, 6.662e-03, -5.517e-02, 1.307e-02, -1.023e-01, 1.569e-01)); + r += mul(s1_2, M4(-4.339e-03, -9.116e-03, 3.269e-02, 6.631e-03, 3.124e-02, 1.329e-02, 3.488e-02, 5.174e-02, -1.194e-01, -5.892e-03, 1.328e-01, -7.424e-02, 3.150e-02, 9.712e-03, -1.382e-01, 1.354e-01)); + r += mul(s1_3, M4(2.349e-02, -1.110e-01, 1.577e-01, 4.993e-02, -7.000e-02, 1.363e-02, -6.165e-02, -6.123e-02, 8.230e-03, -5.006e-03, 6.500e-02, -1.023e-03, -2.211e-04, -7.154e-03, -2.293e-02, 1.220e-03)); + r += mul(s1_4, M4(7.914e-03, -1.903e-02, 1.502e-02, -2.245e-01, 3.243e-01, 1.363e-01, 2.421e-01, 1.736e-01, -1.952e-01, 1.249e-01, -1.436e-01, 8.891e-02, 5.208e-02, 5.334e-01, -4.287e-01, 3.536e-02)); + r += mul(s1_5, M4(-6.245e-02, -2.399e-02, -5.681e-02, -1.695e-01, -7.771e-02, -4.787e-02, -2.778e-02, 7.013e-03, 3.291e-01, -5.536e-03, -1.948e-01, 1.189e-01, -1.030e-01, 7.201e-02, 6.521e-02, 3.350e-01)); + r += mul(s1_6, M4(9.517e-03, 2.329e-01, -1.247e-01, -1.798e-02, -2.311e-02, -5.092e-02, 6.913e-02, 2.517e-03, 3.602e-02, 2.896e-02, 3.974e-02, -1.510e-03, 6.330e-02, 1.061e-01, -1.001e-02, 5.840e-03)); + r += mul(s1_7, M4(-1.230e-01, -3.088e-02, 1.096e-01, -2.161e-02, 2.731e-02, -1.112e-01, -1.242e-01, 7.487e-02, 4.701e-03, 1.032e-01, 4.261e-02, -1.760e-03, -2.495e-02, -1.617e-01, 9.928e-03, 2.328e-02)); + r += mul(s1_8, M4(-7.188e-02, 3.817e-02, 4.786e-02, -4.014e-02, -7.561e-03, 1.417e-02, -6.037e-03, 9.124e-03, -2.451e-02, -7.406e-02, -5.443e-03, -5.927e-03, 3.337e-02, 6.084e-02, -3.750e-02, 1.238e-02)); + r += mul(s2_0, M4(1.813e-02, -1.772e-03, 5.043e-03, 2.941e-02, 2.342e-02, -6.805e-03, -5.400e-02, -1.601e-02, -1.271e-02, -2.814e-02, 6.646e-02, -3.602e-02, -2.938e-02, 3.830e-02, -5.756e-02, 3.625e-02)); + r += mul(s2_1, M4(-2.340e-02, 2.797e-02, -9.775e-02, -4.790e-03, -5.640e-02, -3.446e-02, 5.422e-02, -1.253e-01, 6.237e-02, 8.356e-04, -3.274e-02, 4.805e-02, -9.531e-02, 4.626e-04, 2.233e-03, 1.519e-01)); + r += mul(s2_2, M4(6.482e-03, 5.675e-03, 6.174e-02, -3.843e-02, 3.552e-02, -1.070e-02, 3.641e-02, 6.807e-02, -4.083e-02, -5.972e-03, 2.947e-02, -3.639e-02, -3.855e-02, 2.284e-02, -2.588e-02, -8.389e-02)); + r += mul(s2_3, M4(6.596e-02, 5.844e-02, -8.882e-02, 2.183e-02, 3.510e-02, -1.529e-01, 2.239e-02, 6.559e-02, -7.805e-03, 4.094e-02, -1.105e-02, 5.632e-03, 1.079e-01, -6.351e-03, 8.070e-02, 2.619e-02)); + r += mul(s2_4, M4(-1.392e-01, -3.877e-02, 1.309e-01, 6.513e-02, 3.357e-02, -3.087e-02, -4.398e-02, -1.856e-01, -3.010e-02, -7.998e-03, -1.716e-02, 4.991e-02, -2.211e-01, -2.140e-01, 3.126e-03, 4.484e-02)); + r += mul(s2_5, M4(1.858e-02, -7.505e-02, 4.180e-02, -3.502e-02, 8.565e-03, 2.029e-03, -4.294e-02, 5.614e-02, 1.200e-02, 5.652e-02, -3.376e-02, -2.676e-02, 6.660e-02, -1.177e-02, -4.299e-02, 3.267e-02)); + r += mul(s2_6, M4(2.860e-02, -3.219e-02, -5.538e-03, -2.631e-03, -5.422e-02, 5.290e-02, 1.585e-02, -1.692e-02, -1.521e-03, 5.627e-03, 3.952e-03, -9.985e-03, 9.533e-03, 2.745e-02, -1.362e-03, 1.560e-02)); + r += mul(s2_7, M4(-2.660e-03, 3.732e-02, 8.189e-02, 2.610e-02, -2.065e-02, -7.852e-02, 5.515e-02, -6.198e-02, 1.183e-02, 2.200e-02, -8.053e-03, -1.738e-03, -3.579e-02, -2.565e-02, 3.979e-02, 9.076e-03)); + r += mul(s2_8, M4(2.500e-02, 4.805e-03, -2.546e-02, 1.595e-02, -3.212e-02, 1.141e-03, 5.625e-02, -1.051e-02, -2.014e-02, -3.098e-02, 4.631e-02, -8.121e-03, 4.655e-02, 5.989e-02, -2.590e-02, -5.759e-02)); + r += mul(s3_0, M4(2.755e-02, 1.721e-02, -1.964e-02, 1.815e-02, 2.594e-02, 5.194e-02, -9.954e-02, 7.764e-03, -3.718e-02, -8.173e-03, -2.634e-02, -3.145e-02, -1.658e-02, -2.413e-03, 1.497e-02, 2.813e-02)); + r += mul(s3_1, M4(-4.818e-02, 9.187e-03, 1.082e-01, -9.618e-03, -1.078e-03, -5.924e-02, 5.767e-02, -1.573e-01, -6.210e-02, -1.068e-01, 1.610e-01, -1.717e-01, 2.399e-02, -2.177e-02, -1.816e-01, 7.457e-02)); + r += mul(s3_2, M4(3.239e-02, -3.052e-03, -5.994e-02, 7.106e-02, -4.328e-03, 1.696e-02, -4.923e-02, 5.518e-02, 1.881e-02, -8.143e-02, 1.245e-02, 1.153e-02, 3.943e-02, -2.338e-02, 4.641e-02, -1.856e-02)); + r += mul(s3_3, M4(5.081e-02, -7.570e-03, -1.399e-01, -5.766e-03, 1.056e-01, -3.001e-02, -4.437e-02, 1.799e-02, 3.704e-02, -2.498e-01, 9.868e-02, -1.144e-02, 1.197e-01, 5.018e-02, -3.259e-02, -2.079e-02)); + r += mul(s3_4, M4(1.858e-01, -2.154e-02, 1.039e-01, 1.476e-01, 3.315e-01, 1.923e-01, -9.577e-02, -2.388e-01, -3.060e-01, 2.650e-02, -4.730e-01, -4.012e-01, -1.727e-01, -5.333e-02, 1.566e-01, -2.474e-02)); + r += mul(s3_5, M4(1.022e-02, -8.711e-02, 7.465e-02, 2.158e-03, -1.724e-02, 4.548e-02, -9.675e-02, -2.598e-02, -1.019e-01, -5.639e-02, 2.441e-01, 1.528e-01, 1.307e-01, 3.058e-02, -6.324e-02, 8.229e-02)); + r += mul(s3_6, M4(2.247e-02, -4.279e-02, -7.730e-03, 2.664e-03, -5.508e-02, 1.184e-01, -2.194e-02, -2.835e-02, -6.838e-02, -1.008e-01, 1.036e-01, -3.276e-02, 4.933e-02, -3.893e-03, -1.141e-02, 3.247e-03)); + r += mul(s3_7, M4(6.500e-02, -2.505e-02, -1.286e-01, 6.264e-02, 2.435e-02, -1.039e-01, 2.001e-02, -5.567e-02, -3.446e-02, 2.310e-04, 1.615e-01, -5.778e-02, -1.667e-02, -2.477e-02, 5.057e-02, 3.336e-02)); + r += mul(s3_8, M4(-3.972e-02, 9.174e-02, 3.314e-03, 7.951e-02, -2.571e-02, 1.782e-02, 5.530e-02, 2.399e-02, 1.464e-02, -1.346e-01, -1.693e-02, -6.348e-02, 2.907e-02, -1.423e-02, -2.121e-02, -3.235e-04)); + r += mul(s4_0, M4(-1.179e-02, -4.655e-02, 9.348e-03, -9.519e-03, -2.300e-01, 1.102e-03, 7.401e-02, 2.538e-02, 5.815e-02, 1.980e-01, -2.843e-02, -2.603e-02, 4.451e-02, -9.192e-03, 5.257e-02, -2.093e-02)); + r += mul(s4_1, M4(-3.952e-02, -6.088e-02, 1.147e-01, 1.162e-03, -1.728e-01, 1.465e-03, -8.939e-02, -2.393e-02, 3.740e-01, 5.118e-03, -2.724e-01, -1.146e-01, 3.515e-02, -1.743e-02, -1.492e-01, 1.724e-01)); + r += mul(s4_2, M4(3.989e-02, 4.006e-02, -6.266e-02, -4.154e-02, -3.374e-02, 1.111e-02, 1.076e-01, -5.814e-02, -1.787e-01, 6.540e-02, -5.756e-02, -4.558e-02, -5.229e-02, -1.605e-02, -3.009e-02, -7.247e-02)); + r += mul(s4_3, M4(-6.962e-02, -1.067e-01, 8.455e-02, 2.799e-02, 3.317e-02, 2.349e-01, -5.347e-02, 3.982e-02, 9.300e-02, -7.524e-02, -2.305e-01, 1.330e-01, 7.355e-02, -3.701e-03, -8.832e-02, -6.464e-02)); + r += mul(s4_4, M4(-1.125e-02, 2.497e-01, -9.808e-02, -1.238e-01, 2.612e-01, -5.283e-02, 9.616e-02, -2.290e-01, 2.161e-01, 3.140e-01, 1.661e-01, 8.358e-02, -1.595e-02, 1.697e-01, -8.378e-02, 4.489e-02)); + r += mul(s4_5, M4(-7.592e-02, -6.611e-02, 3.207e-02, 6.973e-02, -9.163e-02, 2.582e-02, -4.628e-02, -7.371e-02, 6.590e-02, 1.204e-01, -5.078e-02, -3.495e-02, -4.221e-02, -6.323e-02, 5.784e-02, -1.189e-02)); + r += mul(s4_6, M4(1.349e-02, 5.168e-02, -9.459e-03, -9.435e-03, -1.527e-02, -7.353e-02, 5.567e-02, 1.555e-02, -5.917e-03, 1.431e-01, 7.490e-03, -1.136e-02, -1.985e-02, 3.699e-02, -6.590e-03, -5.483e-02)); + r += mul(s4_7, M4(-4.645e-02, -4.170e-02, 1.860e-02, -2.707e-02, -1.969e-02, 5.237e-02, -5.482e-02, -1.671e-02, 2.997e-02, 4.133e-02, 5.390e-02, 9.477e-03, -1.651e-02, -4.991e-02, -2.497e-02, -8.465e-03)); + r += mul(s4_8, M4(-1.339e-02, 6.872e-02, -1.390e-02, -1.499e-02, 3.492e-03, 6.004e-02, 5.847e-02, 2.786e-02, -1.177e-02, -6.444e-02, -4.751e-04, 5.780e-03, -4.505e-02, 2.415e-02, 2.309e-02, -1.891e-02)); + r += mul(s5_0, M4(-1.439e-02, 8.856e-03, -4.062e-03, -2.768e-02, 2.892e-03, 1.205e-02, 3.124e-02, -3.116e-02, 1.968e-02, -6.956e-02, 2.068e-02, 1.044e-02, 1.051e-01, 5.463e-03, -5.207e-02, 7.550e-02)); + r += mul(s5_1, M4(1.705e-02, -1.009e-02, 2.104e-01, 2.771e-02, -2.763e-02, -1.016e-02, 2.913e-02, 1.721e-03, 4.321e-02, 3.347e-02, -2.001e-01, 5.190e-02, 2.019e-02, 2.347e-02, 3.138e-03, -7.216e-03)); + r += mul(s5_2, M4(6.213e-02, 2.215e-04, -6.324e-02, 3.181e-03, -2.937e-02, 2.684e-02, -3.033e-02, -9.759e-03, 7.617e-03, 5.678e-03, 4.383e-02, 2.802e-02, -3.703e-02, -7.386e-02, -1.076e-01, -6.615e-02)); + r += mul(s5_3, M4(-1.044e-01, -5.583e-02, 4.299e-02, -4.094e-03, 4.665e-03, 9.436e-02, 3.332e-02, -2.490e-02, -5.877e-03, 1.073e-01, -1.052e-01, -7.205e-04, 1.204e-01, -1.462e-02, 4.291e-02, 4.412e-02)); + r += mul(s5_4, M4(-9.580e-03, 4.627e-02, -1.851e-01, -7.692e-03, 4.703e-02, 8.875e-02, -6.477e-02, -5.981e-02, 1.665e-01, 1.911e-01, 4.702e-02, 9.924e-02, -1.685e-01, 8.893e-02, 2.032e-02, -9.426e-02)); + r += mul(s5_5, M4(5.551e-02, -1.719e-02, 1.029e-01, 8.649e-02, 7.177e-03, 6.821e-02, -2.894e-02, 2.611e-03, 5.830e-02, -2.083e-02, -5.045e-02, 3.884e-02, -1.416e-02, 1.542e-01, 4.160e-02, 3.347e-02)); + r += mul(s5_6, M4(-4.073e-03, 3.413e-02, 2.664e-02, -1.858e-02, -9.691e-03, 8.637e-02, -3.548e-02, 1.617e-03, 7.458e-03, -3.505e-03, 2.383e-02, 1.316e-03, 5.534e-03, 2.217e-02, -4.426e-02, 2.022e-02)); + r += mul(s5_7, M4(-3.211e-02, -1.215e-01, 1.905e-02, -2.665e-02, -1.887e-02, 1.581e-02, 1.180e-02, -1.465e-02, -6.945e-03, -6.458e-02, 2.229e-02, 2.173e-03, 5.289e-03, 1.785e-01, -2.382e-02, -7.196e-02)); + r += mul(s5_8, M4(8.361e-03, -4.776e-02, -4.062e-02, -4.979e-03, -1.914e-02, -4.516e-02, 2.518e-02, -4.994e-02, -2.544e-02, 7.889e-03, 3.192e-02, 1.479e-02, 4.122e-02, -1.223e-01, -3.672e-02, 6.956e-02)); + r += mul(s6_0, M4(2.982e-02, -2.237e-03, 3.926e-02, -1.372e-02, -2.704e-02, 1.751e-03, 2.775e-02, 1.043e-02, 1.676e-01, -2.489e-01, 7.805e-02, 4.061e-02, -6.912e-03, 6.682e-03, -3.892e-02, -4.953e-03)); + r += mul(s6_1, M4(-6.340e-02, -1.980e-02, -7.033e-02, 9.388e-03, -3.794e-02, -2.723e-02, 1.208e-01, -4.089e-02, 6.312e-01, 5.578e-02, -1.312e-01, -5.149e-02, 7.622e-02, 1.538e-02, -2.418e-02, 2.198e-02)); + r += mul(s6_2, M4(-1.989e-03, 2.276e-02, -5.157e-03, 2.582e-03, 1.265e-01, -1.245e-04, -4.509e-02, 6.542e-02, 2.313e-02, 4.024e-02, -1.322e-01, -1.653e-01, -6.536e-03, -9.204e-03, 5.613e-02, -1.824e-02)); + r += mul(s6_3, M4(1.234e-01, -4.512e-03, -4.484e-02, -4.470e-03, -8.846e-03, -8.538e-02, -8.290e-03, -4.593e-02, -3.324e-02, -7.474e-02, 3.080e-02, -6.522e-02, -4.026e-02, 1.896e-02, 1.481e-02, -1.753e-03)); + r += mul(s6_4, M4(-4.701e-02, 1.293e-02, 3.089e-02, -1.295e-01, 1.708e-02, 2.212e-02, -7.435e-02, -8.472e-03, 2.279e-01, -3.650e-02, 2.674e-01, -2.877e-01, 2.375e-01, -4.478e-02, 1.948e-01, 5.877e-02)); + r += mul(s6_5, M4(7.374e-03, 5.838e-02, 1.708e-02, -2.078e-02, 4.462e-02, -5.084e-02, 2.283e-02, 4.827e-02, -1.656e-01, 6.517e-02, -5.528e-02, -2.095e-02, 2.431e-02, 1.133e-02, -1.470e-01, 2.211e-02)); + r += mul(s6_6, M4(3.417e-02, 2.976e-02, -1.042e-02, -2.068e-03, 2.498e-02, 7.744e-02, 1.088e-02, -7.598e-03, -5.186e-02, 2.792e-02, -1.575e-02, -6.310e-03, -4.620e-02, -2.011e-02, 3.923e-02, -9.526e-03)); + r += mul(s6_7, M4(4.330e-03, -4.339e-02, 6.602e-03, -3.648e-02, -4.646e-03, -1.361e-01, 5.384e-02, -2.349e-02, 1.334e-01, -1.469e-01, 2.850e-02, -4.574e-02, 6.043e-02, -5.685e-02, -5.057e-02, -4.692e-03)); + r += mul(s6_8, M4(2.838e-03, 3.079e-02, -1.281e-02, -6.331e-02, 4.269e-02, -2.337e-02, -4.065e-02, 5.503e-02, -1.305e-01, 1.135e-01, 1.826e-02, 1.203e-02, -2.164e-02, -3.076e-03, 2.937e-02, 4.005e-02)); + r += mul(s7_0, M4(6.965e-02, 4.338e-03, 1.194e-01, 2.811e-02, -2.574e-02, 1.949e-02, 3.243e-02, 4.437e-02, 7.250e-02, -2.422e-04, -1.091e-03, -1.469e-02, 7.245e-03, -7.447e-03, 2.299e-02, 4.998e-03)); + r += mul(s7_1, M4(-8.725e-05, -2.240e-02, -8.736e-02, -1.595e-02, 7.126e-02, 5.370e-02, -1.651e-01, 1.672e-02, 7.959e-02, 1.543e-02, -7.129e-02, 3.632e-02, 6.645e-03, -2.822e-02, -2.425e-03, 3.294e-02)); + r += mul(s7_2, M4(-2.685e-02, 3.116e-02, -1.898e-02, -4.718e-02, 5.786e-03, 1.592e-02, 8.082e-04, -1.662e-02, -2.474e-02, 1.294e-02, -5.428e-03, 1.814e-02, -3.227e-02, 3.449e-03, -7.038e-02, -3.596e-02)); + r += mul(s7_3, M4(3.813e-02, -2.099e-03, 3.946e-02, -4.075e-02, -4.265e-03, 5.593e-02, -8.187e-02, 4.551e-02, -7.894e-03, -2.068e-02, 2.511e-02, -6.803e-04, -1.260e-01, -8.680e-02, -1.011e-02, -4.539e-02)); + r += mul(s7_4, M4(-2.995e-01, 1.242e-01, 4.846e-02, 2.231e-02, 4.581e-02, 1.627e-01, -1.363e-02, -4.450e-02, -1.975e-02, -1.774e-02, 5.768e-02, -1.498e-02, 7.242e-01, 1.627e-01, 2.366e-02, 7.746e-01)); + r += mul(s7_5, M4(-2.051e-02, 9.773e-02, 1.240e-01, 8.467e-02, 3.193e-02, 3.783e-02, -7.459e-02, 3.517e-02, -1.552e-02, -2.133e-02, -2.022e-03, -1.416e-02, -1.032e-01, 2.212e-03, -8.253e-02, -2.403e-02)); + r += mul(s7_6, M4(6.575e-02, -1.535e-01, 6.263e-02, 1.718e-02, 2.965e-02, 1.927e-02, 7.256e-03, 5.714e-03, -1.267e-03, -2.908e-02, 1.035e-04, 2.608e-03, -2.016e-02, 4.315e-02, 1.622e-02, -3.870e-02)); + r += mul(s7_7, M4(-7.318e-03, -4.186e-02, -2.571e-02, -7.417e-03, 1.255e-02, -1.175e-01, -6.481e-02, 2.642e-02, -1.088e-02, 3.584e-02, 1.026e-02, 1.582e-02, 1.733e-01, -6.457e-02, -2.087e-01, 6.831e-03)); + r += mul(s7_8, M4(9.267e-02, -4.128e-02, -4.563e-02, -1.067e-01, 4.111e-02, 7.397e-02, -9.920e-03, 2.718e-02, 2.727e-03, -1.785e-02, -1.196e-03, -1.561e-03, -4.283e-03, 9.742e-02, 2.194e-02, 9.397e-02)); + r += V4(-1.575e-03, 9.477e-05, 3.103e-03, 2.705e-03); + return r; +} + +V4 f2(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-6.286e-02, -2.502e-04, -1.782e-02, -1.386e-02, -4.582e-03, 1.342e-02, -2.703e-02, -1.885e-02, -2.375e-02, -1.743e-02, -2.331e-02, -4.602e-02, 4.226e-02, 4.109e-03, 4.430e-03, -1.428e-02)); + r += mul(s0_1, M4(-3.584e-02, -5.431e-02, 2.948e-02, -1.796e-02, 3.327e-02, -7.598e-03, -2.264e-02, -5.021e-02, 2.741e-02, 1.018e-02, 2.019e-02, 1.274e-02, -3.174e-02, 3.511e-02, 7.139e-03, 1.037e-01)); + r += mul(s0_2, M4(-1.668e-02, 8.601e-03, -1.703e-02, 1.649e-02, -1.855e-02, -1.570e-02, -2.058e-03, -7.941e-03, 1.613e-02, 1.590e-02, 1.513e-02, 2.712e-02, 3.699e-02, -4.506e-02, -2.368e-03, -4.616e-02)); + r += mul(s0_3, M4(1.365e-01, 2.775e-02, 4.286e-02, 1.690e-02, -8.827e-02, 5.031e-02, -3.154e-02, 1.501e-02, 1.430e-02, -3.424e-02, -2.379e-02, 1.967e-02, 1.414e-02, 2.102e-02, 7.210e-02, 3.716e-02)); + r += mul(s0_4, M4(8.167e-02, 1.204e-01, -1.147e-01, 4.186e-02, 1.293e-02, 5.409e-02, 2.401e-02, 1.971e-01, 8.087e-02, -9.191e-02, -1.999e-02, 4.654e-02, -5.427e-02, 2.744e-01, 1.111e-01, -4.410e-02)); + r += mul(s0_5, M4(-4.244e-02, 4.083e-02, -2.514e-03, -6.810e-03, 7.438e-02, -7.925e-02, -3.725e-02, 3.084e-03, -1.768e-01, 1.198e-02, 3.945e-02, 1.902e-02, 5.694e-02, -1.233e-02, -5.633e-02, 3.284e-02)); + r += mul(s0_6, M4(-3.574e-02, -1.176e-02, -3.352e-02, -2.732e-02, 5.256e-02, -4.201e-02, 2.941e-02, 2.193e-02, -1.392e-01, 7.087e-03, 4.034e-02, 3.648e-02, 7.982e-02, -4.713e-02, -3.490e-02, 3.462e-02)); + r += mul(s0_7, M4(-1.267e-01, 4.749e-02, 9.924e-02, -4.168e-02, 1.007e-01, 1.167e-01, 6.051e-02, -8.266e-03, -1.707e-01, 7.560e-02, -6.771e-02, -2.184e-03, 8.028e-02, 1.047e-01, 5.680e-02, 4.491e-02)); + r += mul(s0_8, M4(4.774e-02, 3.688e-03, -1.564e-03, -8.394e-03, -8.898e-02, 6.652e-02, 1.923e-02, -2.762e-02, -4.141e-02, -3.580e-02, 2.316e-02, 6.226e-02, -1.643e-02, 5.038e-02, 4.202e-02, -8.634e-04)); + r += mul(s1_0, M4(-2.159e-02, 4.608e-02, 2.270e-02, 6.714e-02, 3.474e-02, 1.003e-02, -7.786e-03, 5.896e-03, 1.480e-02, -5.471e-02, -1.593e-03, 2.905e-02, -8.552e-03, 5.134e-02, 1.262e-02, -5.044e-02)); + r += mul(s1_1, M4(-8.847e-02, 1.202e-01, -3.535e-03, -9.698e-02, 2.149e-02, -2.206e-02, -2.659e-02, -4.115e-02, 3.202e-02, 3.496e-02, -5.574e-02, 4.296e-03, -4.881e-02, 5.946e-03, 2.066e-03, 5.490e-02)); + r += mul(s1_2, M4(-7.083e-02, -3.596e-02, -2.115e-02, 5.332e-02, 4.928e-03, -2.566e-02, -1.139e-03, 4.865e-03, 3.236e-02, 1.707e-02, 2.865e-02, 6.467e-02, 6.417e-02, -2.363e-02, -1.192e-02, -3.437e-02)); + r += mul(s1_3, M4(9.685e-02, -1.008e-01, -3.245e-02, -3.055e-02, -9.284e-02, 4.332e-02, -3.618e-02, 1.080e-02, 1.041e-02, -2.281e-02, -7.568e-02, -5.285e-02, -2.318e-02, 3.665e-02, 2.219e-02, -1.400e-02)); + r += mul(s1_4, M4(1.862e-01, 4.608e-01, -6.891e-02, 1.809e-01, -3.542e-02, 1.006e-01, 4.801e-02, 9.250e-02, 4.619e-01, 2.440e-02, 2.894e-01, 3.075e-02, -1.535e-01, -1.170e-01, 5.626e-03, -2.781e-01)); + r += mul(s1_5, M4(2.493e-02, 9.614e-02, 4.884e-02, -7.251e-02, 2.368e-02, -3.675e-02, -3.287e-02, 8.828e-03, 1.801e-01, -1.446e-01, -5.158e-02, 1.015e-01, 3.986e-02, -5.168e-02, -7.153e-02, 1.293e-02)); + r += mul(s1_6, M4(-5.589e-02, -8.774e-02, 3.786e-02, 3.986e-02, 3.144e-02, 4.375e-02, 6.141e-02, 3.453e-02, -8.236e-03, -2.857e-02, 3.869e-03, 4.619e-03, 5.677e-02, -1.208e-01, -6.910e-02, 2.016e-02)); + r += mul(s1_7, M4(-1.521e-01, 2.723e-01, 4.824e-02, -2.627e-01, 9.133e-02, 4.254e-02, 3.662e-02, -3.833e-02, 2.406e-01, -5.310e-02, -2.103e-01, 1.255e-01, 5.581e-01, 9.681e-02, -3.147e-02, -7.051e-02)); + r += mul(s1_8, M4(-1.136e-02, -2.118e-02, -9.550e-03, 3.598e-02, -8.392e-02, 4.512e-02, 4.338e-02, 1.109e-02, 1.403e-01, 1.283e-01, 4.961e-02, 4.027e-02, -1.252e-01, 5.900e-02, 8.668e-02, -6.595e-02)); + r += mul(s2_0, M4(-1.008e-02, -2.903e-02, -6.333e-03, -4.459e-02, 2.505e-03, 1.551e-02, 3.357e-02, 5.356e-02, -2.329e-02, 3.266e-02, -7.303e-03, 7.180e-03, 3.004e-02, -8.346e-03, -2.725e-02, 2.817e-02)); + r += mul(s2_1, M4(3.495e-03, -9.106e-03, 2.350e-02, 3.874e-02, 4.396e-03, 1.342e-01, 5.563e-02, 3.178e-02, -1.553e-03, -9.208e-02, 1.462e-02, -3.384e-02, 8.389e-02, -2.999e-01, 6.650e-02, -1.166e-01)); + r += mul(s2_2, M4(1.174e-02, -1.292e-02, -8.445e-03, 7.933e-04, 5.201e-02, -2.256e-02, -2.047e-02, 6.584e-03, -3.796e-02, 1.583e-02, 1.837e-02, 2.655e-02, 9.952e-02, 3.144e-02, -2.651e-02, 2.076e-01)); + r += mul(s2_3, M4(4.431e-02, -7.747e-02, -4.542e-02, -6.469e-02, 5.572e-02, 3.031e-03, 4.995e-02, -2.386e-02, 8.360e-03, -6.044e-02, -4.929e-03, -6.369e-02, 9.520e-02, -1.262e-01, -7.337e-03, 9.376e-03)); + r += mul(s2_4, M4(-7.080e-02, 1.169e-01, 1.829e-02, 7.257e-02, 2.309e-02, -2.177e-02, -3.192e-02, 3.540e-04, 4.603e-02, 3.443e-02, -4.078e-02, 5.128e-02, 3.726e-01, 5.696e-01, -8.425e-02, 7.807e-02)); + r += mul(s2_5, M4(-1.185e-01, 6.520e-02, 5.123e-02, -1.715e-02, -1.738e-02, 1.367e-02, 2.443e-02, 1.685e-02, 8.273e-02, 5.137e-03, 4.677e-03, -5.286e-02, 3.105e-01, -1.479e-01, 5.733e-02, -1.704e-01)); + r += mul(s2_6, M4(2.104e-02, -2.878e-02, -3.234e-02, -4.314e-02, -4.102e-02, 1.488e-02, -2.447e-02, 4.758e-02, 7.857e-03, 2.584e-02, 1.786e-03, 6.276e-03, 1.611e-02, -2.310e-02, -7.171e-03, -2.552e-02)); + r += mul(s2_7, M4(4.631e-02, 1.781e-02, 6.916e-03, 5.503e-02, -1.158e-01, 8.648e-02, 5.399e-02, -4.525e-03, -3.435e-02, -2.144e-02, -1.508e-02, -1.361e-02, 9.343e-02, 7.544e-02, -2.271e-02, 6.940e-02)); + r += mul(s2_8, M4(8.131e-02, 1.685e-02, 3.085e-02, -3.030e-02, -4.887e-02, 7.200e-03, 4.149e-02, 3.742e-02, -3.739e-02, -1.525e-02, -3.245e-02, -2.925e-03, -9.070e-02, -4.021e-02, -1.551e-02, -1.308e-02)); + r += mul(s3_0, M4(-2.061e-02, -4.966e-02, -2.100e-02, -6.209e-02, -2.488e-02, -5.143e-02, 2.484e-03, 1.816e-02, -1.113e-02, -2.592e-02, 6.455e-02, 8.904e-02, 6.529e-02, 4.113e-03, 2.603e-02, 2.897e-02)); + r += mul(s3_1, M4(-2.977e-02, 8.082e-02, -6.461e-03, 1.056e-01, -1.162e-02, 1.645e-02, 7.118e-02, 8.885e-04, -3.361e-02, 1.990e-01, 8.852e-03, 1.638e-01, 5.633e-02, -4.294e-02, -1.971e-02, -2.437e-02)); + r += mul(s3_2, M4(3.122e-02, -2.438e-02, -5.720e-03, -7.262e-02, 5.823e-02, -2.232e-02, -1.086e-02, 3.184e-03, 3.344e-02, 9.806e-02, 1.689e-02, 1.518e-03, 8.669e-03, -2.063e-02, 6.094e-03, 8.015e-03)); + r += mul(s3_3, M4(3.223e-02, -2.139e-02, -5.512e-02, -2.707e-02, 1.505e-01, 1.679e-02, 1.116e-01, -3.540e-02, -1.019e-01, 1.859e-01, 1.957e-02, 1.048e-02, -1.553e-01, -7.867e-02, -4.302e-02, 4.312e-03)); + r += mul(s3_4, M4(-2.339e-02, -2.018e-01, -4.587e-02, -5.147e-03, -1.460e-02, -1.873e-02, -1.039e-02, -1.318e-01, 7.216e-03, -1.232e-01, 2.339e-01, -5.143e-02, 1.457e-01, 2.479e-01, 4.917e-02, 9.243e-02)); + r += mul(s3_5, M4(-9.042e-02, -2.171e-03, 3.064e-02, -3.581e-02, -8.730e-02, -7.698e-03, -2.273e-02, -3.261e-02, -7.416e-02, 5.982e-02, 1.367e-02, 1.760e-01, 5.369e-02, -1.153e-01, -1.900e-02, -4.893e-03)); + r += mul(s3_6, M4(5.882e-02, 7.972e-03, -4.724e-02, -3.461e-02, -3.556e-02, -4.754e-02, -1.491e-01, 6.491e-02, -1.312e-01, 5.117e-02, 1.480e-01, 1.391e-01, 3.492e-02, -2.692e-02, 4.632e-03, -1.603e-02)); + r += mul(s3_7, M4(-1.485e-02, -1.118e-02, 1.468e-01, -1.223e-01, 1.117e-01, 1.987e-02, -1.356e-01, -1.361e-01, 1.004e-01, 1.534e-01, 4.730e-02, 1.054e-01, -1.001e-01, 7.442e-02, 8.265e-02, 1.123e-02)); + r += mul(s3_8, M4(-6.086e-02, 5.548e-03, -7.503e-02, 5.356e-02, 6.002e-02, 2.448e-02, -1.519e-02, -2.142e-02, 2.339e-02, 1.101e-01, 1.470e-01, 3.847e-02, -9.741e-02, -4.376e-03, -1.496e-02, 1.391e-02)); + r += mul(s4_0, M4(-3.301e-02, 2.267e-02, -4.043e-02, -2.886e-02, -5.309e-02, -6.401e-02, -4.860e-02, -1.382e-01, 4.547e-02, -1.195e-01, 2.847e-02, 1.183e-01, 1.525e-02, 6.961e-03, -8.481e-03, 2.313e-02)); + r += mul(s4_1, M4(-5.179e-02, 7.837e-02, 2.924e-02, -6.807e-02, 9.977e-02, -1.343e-01, 6.610e-02, -1.625e-01, 1.068e-01, -2.686e-01, 4.198e-02, 1.333e-01, 8.296e-02, -1.314e-01, 4.368e-02, -7.842e-02)); + r += mul(s4_2, M4(-1.260e-02, 3.823e-02, 6.907e-02, 3.324e-02, -3.521e-03, -7.009e-03, -9.421e-03, 1.509e-01, 1.365e-01, 1.235e-02, -1.439e-02, 4.430e-02, -6.141e-03, 9.008e-02, -3.424e-02, 2.807e-02)); + r += mul(s4_3, M4(1.348e-02, 3.553e-02, 2.777e-02, -4.292e-03, -3.161e-01, -5.409e-02, -2.340e-01, 5.708e-02, -2.639e-03, -9.781e-02, 7.296e-02, -1.164e-01, -8.418e-02, 4.056e-02, 9.247e-03, -1.973e-02)); + r += mul(s4_4, M4(1.374e-01, -1.948e-01, 3.840e-02, -3.181e-02, 3.337e-01, 5.918e-01, -4.292e-02, -4.522e-01, -1.993e-01, 1.913e-01, 5.210e-02, -2.841e-01, 1.024e-01, -1.802e-01, -2.334e-01, -6.746e-02)); + r += mul(s4_5, M4(-2.478e-02, -3.843e-03, 2.707e-02, 6.649e-02, 7.579e-02, -9.347e-02, 8.742e-02, -4.274e-02, 6.592e-02, 1.459e-02, -5.017e-02, -2.843e-02, -1.101e-01, 5.774e-02, 8.689e-02, -6.216e-03)); + r += mul(s4_6, M4(-7.471e-02, -3.950e-02, -2.088e-02, 2.672e-02, 8.373e-02, 6.904e-02, -4.597e-02, -1.743e-01, 2.439e-02, -9.662e-02, -3.455e-02, 1.243e-02, 7.625e-03, 3.214e-02, -1.285e-02, -9.281e-03)); + r += mul(s4_7, M4(2.173e-01, 8.858e-02, -1.822e-01, -2.232e-02, 6.016e-02, -1.324e-01, -4.636e-01, -6.116e-02, -1.560e-01, -8.520e-02, -2.715e-01, -2.290e-02, 1.059e-01, 2.833e-02, 1.261e-01, 2.703e-04)); + r += mul(s4_8, M4(6.084e-02, 1.447e-02, 1.216e-01, 3.908e-03, -1.112e-01, -1.095e-02, -3.872e-02, -1.106e-01, -8.092e-02, -2.069e-02, -2.778e-02, 2.449e-03, 3.380e-02, 1.123e-02, 5.228e-03, 1.590e-02)); + r += mul(s5_0, M4(9.058e-03, 1.953e-02, -3.575e-02, 2.096e-02, -8.534e-02, 7.803e-04, -3.327e-02, -7.176e-02, -5.335e-02, 1.376e-02, -1.116e-02, -3.823e-02, 6.955e-03, -5.436e-02, 4.908e-02, 3.213e-02)); + r += mul(s5_1, M4(-9.066e-02, 3.020e-02, -2.233e-02, -4.195e-02, 9.071e-02, -4.585e-03, 2.065e-02, 5.307e-02, 1.055e-02, 6.886e-03, 5.105e-02, -1.949e-04, 2.005e-02, 8.028e-02, 1.400e-02, -4.166e-02)); + r += mul(s5_2, M4(-7.524e-02, -3.158e-02, 5.335e-02, 1.800e-02, 5.551e-02, -4.970e-03, -8.957e-03, -1.808e-03, 3.732e-02, -2.736e-02, -1.708e-03, -5.193e-03, -7.433e-02, 1.694e-03, -5.927e-03, -5.865e-02)); + r += mul(s5_3, M4(1.841e-02, 4.228e-02, 1.305e-02, -1.773e-02, 1.443e-01, -3.841e-02, -6.603e-03, -1.000e-01, 5.555e-02, -9.809e-02, -4.187e-02, -3.300e-02, -1.449e-01, -7.725e-02, -1.307e-01, 3.371e-02)); + r += mul(s5_4, M4(2.110e-01, -1.452e-01, 8.378e-02, -6.101e-02, -1.204e-02, 3.459e-02, -1.857e-02, -5.599e-02, 1.034e-01, -7.572e-02, -1.066e-01, -2.685e-02, 1.186e-01, 1.014e-01, -7.138e-03, -1.229e-01)); + r += mul(s5_5, M4(5.220e-02, 1.147e-01, -8.863e-03, 1.597e-02, -1.671e-01, 6.502e-03, 5.946e-04, 6.353e-03, -5.116e-02, -5.064e-02, -2.231e-03, -2.255e-02, 8.614e-02, -1.615e-02, -3.855e-02, 8.267e-03)); + r += mul(s5_6, M4(-6.879e-03, -5.188e-02, -1.344e-02, 4.907e-02, -1.293e-01, -1.520e-02, -6.090e-02, 8.918e-06, -1.082e-01, -4.470e-03, -9.794e-03, -7.308e-02, 7.560e-02, -1.006e-02, -3.079e-03, -2.419e-02)); + r += mul(s5_7, M4(1.966e-01, 7.878e-02, -6.765e-03, 2.455e-02, -6.439e-03, -7.800e-03, -4.959e-04, -2.479e-02, -8.629e-03, -2.413e-02, -4.563e-02, -1.130e-02, -7.494e-02, 1.117e-02, -7.940e-02, 5.176e-02)); + r += mul(s5_8, M4(2.362e-01, -3.172e-02, 1.990e-03, 2.828e-02, -6.788e-04, 9.385e-03, 1.395e-03, 4.595e-03, 3.466e-02, -2.217e-02, -1.113e-02, 2.319e-02, 4.322e-03, 7.255e-02, 8.528e-03, -8.646e-02)); + r += mul(s6_0, M4(6.236e-04, -5.599e-03, 1.662e-03, -1.610e-02, -3.268e-02, 3.736e-02, 9.799e-03, 6.862e-02, 1.440e-01, -3.291e-02, -5.290e-02, 5.718e-02, -1.472e-02, 6.329e-03, 3.317e-04, 3.432e-03)); + r += mul(s6_1, M4(7.624e-03, -1.078e-01, 2.509e-02, -5.998e-02, 2.957e-02, 1.002e-01, 2.523e-02, -2.646e-02, 1.129e-01, 1.652e-01, 3.334e-02, 2.243e-01, 2.305e-02, -3.856e-02, -2.499e-02, -3.774e-02)); + r += mul(s6_2, M4(8.486e-03, -4.194e-02, -2.126e-03, -3.028e-03, -9.381e-03, -7.450e-05, -2.301e-03, -1.200e-02, 1.061e-01, 5.228e-02, -6.584e-02, -1.281e-01, -4.153e-03, 2.036e-02, -8.197e-03, 6.957e-04)); + r += mul(s6_3, M4(8.359e-02, -1.529e-02, 7.150e-02, 4.381e-02, 3.876e-02, 9.446e-02, 8.667e-02, 4.710e-02, -1.704e-01, 8.304e-02, 1.315e-01, 9.065e-02, -1.248e-02, 3.451e-02, -1.663e-02, -3.212e-02)); + r += mul(s6_4, M4(4.922e-02, 7.176e-02, -6.640e-02, -9.425e-02, -3.222e-02, -1.085e-01, -3.865e-02, 1.571e-01, -3.700e-01, 1.043e-01, -2.323e-02, -5.991e-02, -4.323e-02, 1.331e-02, -1.283e-02, 9.605e-02)); + r += mul(s6_5, M4(1.564e-01, -9.708e-02, -2.115e-02, 1.420e-02, 1.994e-02, -4.618e-02, 2.682e-02, 2.128e-02, -4.299e-01, 3.603e-02, -1.667e-02, 4.865e-02, 3.222e-02, -3.111e-02, 2.877e-02, 1.951e-02)); + r += mul(s6_6, M4(-5.226e-02, -3.439e-02, -1.115e-02, 1.559e-02, 1.080e-02, -4.797e-02, -5.588e-02, 3.681e-02, -1.168e-02, 8.262e-02, 3.484e-02, 3.941e-02, -3.204e-02, 5.511e-02, -2.526e-03, 2.378e-02)); + r += mul(s6_7, M4(-3.656e-02, 8.768e-03, -1.877e-02, 2.896e-02, -3.234e-02, 1.029e-01, 7.208e-02, -1.354e-02, 4.724e-02, -2.792e-02, -6.029e-02, -1.563e-01, -1.274e-01, -4.066e-02, 1.253e-01, -1.948e-03)); + r += mul(s6_8, M4(-8.171e-02, -1.723e-02, 1.593e-02, -2.655e-02, 2.431e-02, 3.102e-02, 2.482e-02, 6.723e-03, -2.363e-02, -4.291e-02, 6.611e-03, 6.381e-02, -1.239e-01, -2.593e-03, -9.796e-04, 2.874e-02)); + r += mul(s7_0, M4(1.176e-02, 5.270e-03, -1.059e-03, 1.153e-02, 3.849e-02, -5.237e-02, -1.249e-03, -3.835e-02, 5.869e-02, -2.144e-02, -2.259e-03, -4.426e-04, -8.524e-03, -1.560e-02, 6.030e-03, -2.879e-02)); + r += mul(s7_1, M4(3.799e-02, -5.750e-02, -4.045e-02, 2.964e-02, 2.058e-01, -7.604e-02, -6.212e-02, 3.370e-02, -1.373e-02, -1.059e-01, -9.752e-03, -3.277e-02, 3.861e-02, -4.689e-02, 2.058e-02, 3.350e-02)); + r += mul(s7_2, M4(-8.731e-04, 9.982e-03, 1.355e-02, 5.808e-03, 9.372e-03, -5.953e-02, 1.993e-02, -3.465e-02, 1.375e-02, -3.096e-02, 1.506e-02, 3.740e-02, -3.321e-03, 2.036e-02, 2.091e-03, -1.008e-02)); + r += mul(s7_3, M4(-8.942e-03, 2.918e-02, 1.244e-01, 3.212e-01, -2.777e-02, -3.158e-02, -3.150e-02, 8.206e-02, 2.294e-02, 7.014e-03, 1.237e-02, -1.788e-02, -6.398e-02, 1.291e-01, -1.265e-01, -1.873e-02)); + r += mul(s7_4, M4(-1.340e-02, -7.785e-02, 9.694e-02, -4.805e-01, -2.603e-01, 2.979e-01, 1.150e-01, -1.201e-03, 8.901e-02, 6.518e-02, -4.128e-02, -8.574e-03, -4.618e-02, -3.740e-01, 8.044e-03, -1.624e-01)); + r += mul(s7_5, M4(1.173e-01, -1.139e-01, -4.628e-02, 8.430e-02, 6.913e-02, -6.278e-02, -4.680e-02, -1.164e-01, -1.316e-02, 3.677e-02, 1.484e-02, -4.159e-03, -9.132e-02, 1.718e-02, 4.480e-02, 4.812e-03)); + r += mul(s7_6, M4(4.462e-02, 9.087e-03, -9.374e-02, -3.102e-03, 1.323e-01, -7.278e-02, -1.291e-02, -3.531e-02, -1.859e-02, 1.378e-02, -8.155e-03, -7.871e-03, -6.520e-02, 4.532e-02, -6.542e-03, 2.745e-02)); + r += mul(s7_7, M4(-1.086e-01, 3.684e-02, 1.988e-01, 2.470e-01, 2.397e-01, -2.523e-02, -1.419e-01, 4.321e-02, 1.231e-02, -1.233e-02, 2.661e-02, 7.225e-03, -8.916e-04, -2.241e-01, -9.885e-03, -6.903e-02)); + r += mul(s7_8, M4(7.242e-02, -3.178e-02, 3.298e-02, -8.243e-02, -5.032e-02, -3.423e-02, 1.194e-02, -3.255e-02, 6.636e-02, 1.658e-02, -5.523e-03, -2.126e-02, -8.023e-02, 4.985e-02, -5.195e-02, 3.102e-02)); + r += V4(2.176e-03, 6.722e-03, 4.071e-04, 1.471e-03); + return r; +} + +V4 f3(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(9.681e-03, -3.008e-02, -4.885e-02, -1.980e-02, -2.557e-02, 8.902e-03, 2.876e-02, 3.702e-02, -3.984e-03, -5.590e-02, 3.719e-02, -5.321e-02, 4.944e-03, 2.265e-02, -5.642e-02, -3.355e-02)); + r += mul(s0_1, M4(1.749e-02, 4.415e-02, -4.624e-02, -1.619e-02, -5.776e-02, -3.455e-03, 9.170e-03, -3.880e-02, -4.960e-02, -3.457e-02, 7.778e-02, 9.934e-02, 3.774e-02, 9.627e-02, -1.190e-01, -2.539e-02)); + r += mul(s0_2, M4(-2.970e-02, -1.385e-02, -1.654e-02, 1.286e-02, 1.187e-02, -1.049e-02, -5.015e-03, 3.357e-02, 9.840e-02, 3.333e-02, 3.595e-02, -6.867e-02, -1.029e-01, -9.563e-03, -3.838e-02, 1.842e-02)); + r += mul(s0_3, M4(2.468e-03, -3.105e-03, 3.139e-02, -3.924e-04, 4.153e-02, 4.857e-02, 2.048e-02, -3.807e-02, -9.976e-03, 3.462e-02, 3.193e-02, -9.280e-03, -8.785e-02, 3.561e-03, 5.118e-02, 1.315e-01)); + r += mul(s0_4, M4(1.860e-03, -2.834e-02, -3.496e-02, -1.925e-01, -1.548e-01, -2.941e-02, -1.305e-01, -1.316e-01, -3.740e-02, 2.467e-01, 2.256e-01, 5.527e-02, 2.511e-01, -1.136e-01, 3.445e-02, -2.657e-01)); + r += mul(s0_5, M4(-3.462e-02, 1.757e-02, -3.593e-03, -4.448e-03, -3.035e-02, 6.164e-02, 8.270e-03, 3.816e-02, -5.575e-02, -4.886e-02, 1.928e-02, 2.159e-02, -2.496e-01, 3.416e-02, -1.923e-02, -2.403e-02)); + r += mul(s0_6, M4(-9.527e-03, 1.884e-03, 3.132e-02, 3.839e-02, 1.489e-02, 5.617e-03, 4.702e-03, 2.492e-02, -3.944e-02, 8.726e-03, -3.927e-03, 3.775e-04, 1.182e-02, 4.459e-02, 1.514e-02, -1.748e-03)); + r += mul(s0_7, M4(-5.730e-02, -3.515e-03, 4.216e-02, 1.560e-02, -1.060e-01, 2.248e-02, 1.111e-02, 9.832e-03, 7.457e-02, 3.146e-03, -8.370e-02, -1.173e-01, -2.263e-03, -1.129e-02, 2.702e-02, -3.823e-02)); + r += mul(s0_8, M4(-7.514e-03, 1.146e-02, 3.054e-02, -1.619e-02, -1.817e-02, -6.031e-03, -3.696e-02, -2.777e-02, -4.793e-02, 1.139e-02, -1.696e-03, 6.132e-02, -2.482e-02, 9.432e-03, 3.504e-02, -2.315e-03)); + r += mul(s1_0, M4(1.862e-03, 3.773e-02, -8.036e-02, 1.519e-01, -3.710e-03, 2.376e-02, 3.439e-02, 2.167e-02, 2.711e-02, 5.150e-02, 3.869e-02, 2.169e-02, 1.250e-02, -1.996e-02, -1.736e-01, -8.277e-02)); + r += mul(s1_1, M4(-2.311e-02, -1.085e-01, -3.403e-02, -2.320e-01, -2.981e-02, -1.928e-02, 2.786e-02, -4.477e-02, 1.169e-01, -1.826e-01, 1.570e-01, -9.813e-02, 9.840e-02, 8.023e-02, -2.586e-01, 1.433e-01)); + r += mul(s1_2, M4(1.803e-03, 4.658e-02, -1.892e-02, 2.893e-02, -1.528e-02, 1.273e-02, 2.555e-03, 8.782e-03, -5.553e-03, 7.205e-02, 6.603e-02, 6.278e-02, -1.120e-01, -6.004e-02, -1.222e-01, 1.163e-02)); + r += mul(s1_3, M4(-1.018e-01, -4.000e-02, 2.640e-02, 2.042e-02, 5.657e-02, 4.927e-02, 3.377e-02, -2.825e-02, 1.425e-02, -2.015e-02, -8.182e-02, -2.919e-02, -7.378e-02, 2.429e-02, 1.319e-01, 7.956e-02)); + r += mul(s1_4, M4(1.068e-01, -9.681e-02, 1.092e-01, -5.858e-01, -1.574e-01, -1.582e-01, -2.159e-01, -1.526e-01, 9.058e-02, 9.935e-02, -2.312e-02, 5.522e-01, 3.799e-01, -2.598e-01, 1.344e-01, -1.568e-01)); + r += mul(s1_5, M4(-1.111e-02, -9.091e-02, -1.685e-02, -4.801e-02, -2.188e-02, 2.544e-03, -7.639e-02, 3.189e-02, -5.523e-02, 1.968e-01, 1.340e-01, -8.667e-02, -3.461e-01, 9.813e-03, 3.383e-02, 5.120e-02)); + r += mul(s1_6, M4(-3.401e-04, 4.871e-02, 1.394e-02, 8.957e-02, 2.540e-02, 3.912e-02, 3.234e-02, -1.502e-02, -1.504e-02, 5.820e-04, -1.122e-02, 7.805e-03, 4.213e-02, 1.403e-02, -7.371e-02, 5.537e-02)); + r += mul(s1_7, M4(5.145e-02, 1.466e-02, -1.231e-02, -2.232e-01, -9.556e-02, -4.633e-03, 4.627e-02, 3.679e-02, 6.171e-02, 7.696e-02, -2.454e-02, -6.367e-03, 2.456e-02, -2.865e-02, -3.183e-02, -2.806e-02)); + r += mul(s1_8, M4(-5.166e-02, 4.631e-02, 4.860e-02, 4.666e-02, 3.595e-04, -3.630e-03, 1.516e-02, -9.129e-03, -6.539e-02, -1.232e-02, -1.745e-02, -9.037e-02, -7.875e-02, -1.470e-02, -1.751e-02, -2.477e-02)); + r += mul(s2_0, M4(-2.081e-02, -2.560e-02, 7.354e-03, 1.161e-02, 6.217e-03, -2.986e-02, 1.154e-02, 2.615e-02, -2.085e-02, 9.902e-03, 2.071e-03, -5.216e-04, 5.957e-02, -7.398e-03, 2.064e-02, 1.406e-01)); + r += mul(s2_1, M4(3.722e-02, 5.744e-02, 1.443e-01, -2.925e-02, 1.370e-02, 1.174e-01, 1.074e-01, -3.303e-02, 8.991e-03, 3.061e-03, 3.371e-03, 1.175e-02, -5.190e-02, -6.181e-02, 1.753e-01, -1.287e-01)); + r += mul(s2_2, M4(-4.176e-02, -6.274e-04, 3.549e-02, -3.713e-02, -9.366e-02, 1.537e-02, 2.337e-02, -3.934e-02, 2.923e-02, 1.346e-02, 7.163e-03, 1.328e-02, 4.833e-03, 4.618e-02, -5.331e-02, -3.940e-02)); + r += mul(s2_3, M4(8.528e-03, -5.794e-02, 2.796e-02, 3.357e-02, -8.052e-02, -9.799e-02, -3.645e-02, 5.612e-02, -1.166e-02, -4.189e-02, -1.061e-02, 7.611e-03, 2.040e-02, 2.967e-02, -6.062e-02, 6.390e-02)); + r += mul(s2_4, M4(-4.279e-02, -5.724e-02, 4.008e-03, -5.154e-02, -1.041e-02, -1.733e-01, -1.431e-01, -3.059e-03, 1.470e-02, 2.059e-02, -5.137e-02, -9.846e-03, 4.126e-02, 2.147e-01, 2.649e-02, -1.278e-01)); + r += mul(s2_5, M4(-1.001e-01, -6.696e-02, 8.699e-03, -5.281e-02, -3.896e-02, -5.937e-02, -3.633e-02, -1.026e-02, 5.608e-02, -5.115e-02, 2.682e-03, -1.474e-02, -2.244e-01, -3.160e-02, 3.403e-02, 7.687e-02)); + r += mul(s2_6, M4(4.213e-03, -2.668e-02, -2.583e-02, 1.132e-02, 1.083e-02, 5.358e-02, 1.379e-02, -5.371e-02, -2.338e-02, 9.030e-03, 1.420e-02, -1.605e-02, 9.140e-03, -8.769e-03, -3.644e-02, -1.330e-02)); + r += mul(s2_7, M4(-4.586e-02, -4.933e-04, -9.498e-02, 1.749e-02, -1.759e-02, 4.007e-02, 6.127e-02, -5.414e-02, 9.016e-03, -2.079e-03, 3.343e-03, 1.004e-02, 7.801e-03, 1.966e-02, 4.390e-03, -2.055e-02)); + r += mul(s2_8, M4(7.917e-03, -7.674e-02, -3.768e-02, -2.986e-02, -3.577e-03, 6.074e-02, 2.197e-02, 1.776e-02, -1.944e-02, 3.794e-02, 1.098e-02, 4.473e-03, -7.910e-02, -9.479e-03, -2.516e-02, -4.335e-03)); + r += mul(s3_0, M4(-2.815e-02, -2.422e-03, 1.804e-02, -5.957e-03, 6.596e-02, -1.254e-01, 6.526e-02, 4.307e-02, -1.893e-02, 3.540e-02, 6.049e-02, -4.571e-02, -2.629e-03, 1.568e-02, -7.711e-03, 8.974e-03)); + r += mul(s3_1, M4(2.423e-02, 4.847e-02, 1.679e-02, -5.669e-04, -4.052e-03, 1.177e-01, 1.660e-01, 1.093e-02, -5.386e-02, -9.583e-03, -1.242e-01, 4.574e-02, -2.228e-02, -3.213e-02, 1.371e-01, 5.518e-03)); + r += mul(s3_2, M4(-8.338e-02, -1.656e-02, 2.950e-03, -3.721e-02, -3.461e-02, 3.840e-04, 4.377e-02, -1.068e-02, -4.114e-02, 4.027e-02, 4.902e-02, -7.158e-02, 2.055e-02, 6.079e-02, 3.644e-02, 4.520e-02)); + r += mul(s3_3, M4(-5.441e-03, -4.069e-02, 7.030e-02, -2.745e-02, 3.331e-02, -1.003e-01, -4.375e-01, 1.753e-01, -8.514e-02, -9.203e-04, 1.121e-01, -9.410e-02, -2.350e-02, 2.985e-02, 3.703e-02, 8.273e-02)); + r += mul(s3_4, M4(5.347e-02, -7.722e-01, 1.841e-01, -5.509e-02, 8.975e-02, -2.680e-01, -3.254e-01, -1.064e-01, 1.529e-01, -6.629e-02, 4.410e-01, 1.618e-01, 2.479e-02, 1.833e-01, 9.445e-02, -2.289e-01)); + r += mul(s3_5, M4(5.126e-02, 5.849e-02, -5.219e-03, 9.645e-02, 9.359e-02, -3.570e-02, -8.843e-02, 1.006e-02, -2.361e-01, 7.791e-02, 7.898e-02, -3.856e-02, -1.233e-01, -6.706e-04, 2.387e-02, 4.867e-02)); + r += mul(s3_6, M4(-1.548e-02, 3.366e-02, 7.603e-02, 5.374e-02, 9.097e-03, 5.063e-02, -8.491e-04, 7.755e-03, -6.799e-02, 5.193e-02, 3.761e-04, 1.009e-02, -1.239e-02, -1.220e-02, -4.171e-03, 1.343e-02)); + r += mul(s3_7, M4(2.320e-03, -9.519e-02, -4.596e-02, 3.958e-02, -2.254e-02, -3.177e-02, 7.613e-03, -5.295e-02, -3.478e-02, 5.697e-02, -7.504e-02, -1.152e-01, 5.974e-03, -3.027e-03, 6.663e-03, -1.355e-02)); + r += mul(s3_8, M4(2.738e-02, -6.541e-03, -1.734e-02, -3.262e-02, -4.650e-03, 1.989e-02, 2.403e-02, 7.109e-03, -7.896e-02, -3.010e-02, -3.856e-02, -4.335e-02, -4.571e-02, -2.111e-02, -3.043e-02, 1.725e-02)); + r += mul(s4_0, M4(-1.964e-02, -3.675e-02, 2.942e-02, -1.417e-02, -8.570e-02, 2.328e-02, -1.705e-01, -3.193e-04, 6.156e-02, -2.407e-02, -2.030e-02, -6.600e-02, 3.456e-02, -1.314e-02, 1.694e-02, -3.869e-02)); + r += mul(s4_1, M4(-2.846e-02, -1.182e-01, -1.167e-02, 9.350e-03, -2.006e-01, -7.553e-02, 5.109e-02, -2.008e-02, 9.335e-02, 2.042e-02, 2.823e-01, 2.446e-01, -1.102e-01, 9.506e-02, -1.160e-01, 2.432e-01)); + r += mul(s4_2, M4(4.094e-02, -2.665e-02, 1.606e-01, -5.195e-02, 9.016e-03, 6.199e-02, 5.034e-02, 2.114e-02, 1.492e-01, -3.953e-02, 3.046e-02, 1.560e-03, 4.637e-02, -3.696e-03, 2.390e-03, -1.347e-01)); + r += mul(s4_3, M4(-1.473e-02, -3.120e-02, 9.582e-02, -5.774e-02, -1.346e-02, -1.387e-01, -1.362e-01, -1.796e-01, -9.409e-02, -2.495e-01, 4.522e-02, 2.231e-01, -3.688e-02, 6.567e-02, 4.169e-02, 4.207e-02)); + r += mul(s4_4, M4(-1.090e-02, 3.370e-03, 2.411e-01, 8.764e-03, -6.157e-02, 1.821e-01, 8.114e-02, -4.756e-01, 1.711e-01, 2.427e-01, 7.778e-02, 7.863e-02, 9.319e-02, -1.679e-01, -1.381e-01, 6.603e-02)); + r += mul(s4_5, M4(-1.301e-01, 7.742e-02, 1.319e-01, 7.278e-02, -3.592e-02, 2.957e-02, 4.727e-02, 5.763e-02, 1.522e-01, -3.927e-02, -1.522e-02, -9.234e-02, -1.164e-01, -1.746e-02, -1.671e-02, 2.957e-02)); + r += mul(s4_6, M4(1.661e-02, -7.393e-03, -4.879e-02, 3.085e-03, 4.210e-02, -4.314e-02, -3.971e-02, -7.896e-02, 2.791e-02, 6.567e-02, -2.164e-02, -4.939e-03, 2.815e-02, 3.187e-02, 1.803e-02, -4.191e-03)); + r += mul(s4_7, M4(8.770e-03, -4.219e-02, -5.425e-02, -6.481e-02, -3.305e-02, 6.806e-02, 1.077e-01, 1.753e-01, -4.405e-02, 3.918e-02, -1.311e-02, -2.641e-03, 2.801e-02, 3.008e-02, 4.854e-02, -8.295e-03)); + r += mul(s4_8, M4(-9.797e-03, 1.242e-03, -2.370e-02, -2.221e-02, -6.149e-03, -1.874e-02, -1.991e-02, -2.733e-02, -3.855e-02, -2.105e-02, 2.489e-03, 2.475e-02, -5.248e-03, 4.986e-02, -4.264e-04, -2.850e-02)); + r += mul(s5_0, M4(7.117e-03, 2.173e-02, 3.156e-02, 5.267e-03, 8.027e-03, -4.454e-02, -7.556e-02, -3.434e-02, -3.034e-02, 4.286e-03, -1.329e-03, 9.390e-02, 2.082e-02, 4.265e-02, 2.317e-02, 9.256e-02)); + r += mul(s5_1, M4(-1.828e-02, -6.204e-02, -4.113e-02, -6.140e-02, -7.433e-03, -2.275e-02, -1.067e-01, -4.810e-02, 3.146e-02, 1.470e-02, 9.895e-02, 7.080e-02, 2.503e-02, -1.323e-01, -5.634e-02, -1.158e-01)); + r += mul(s5_2, M4(2.929e-02, 7.951e-02, 6.912e-02, 1.737e-02, 4.312e-02, 1.538e-03, -5.973e-03, 2.283e-02, -9.977e-03, -7.604e-03, 1.749e-02, -8.725e-03, 2.287e-02, 1.039e-02, -5.078e-02, 3.387e-02)); + r += mul(s5_3, M4(2.977e-02, -8.352e-03, -1.156e-02, 1.297e-02, -3.331e-02, -1.692e-03, 3.981e-02, -1.112e-02, 4.806e-03, -5.421e-02, 2.894e-02, 7.254e-02, -6.607e-03, 7.883e-03, -7.508e-02, -3.811e-02)); + r += mul(s5_4, M4(-3.712e-02, -2.705e-01, -8.473e-03, 4.084e-02, 2.750e-02, -2.059e-02, 5.705e-02, 5.270e-02, 1.364e-02, -4.693e-03, -8.906e-02, -3.422e-02, -9.573e-02, 1.385e-02, 1.578e-01, 2.145e-02)); + r += mul(s5_5, M4(-5.351e-02, -1.961e-02, 4.591e-03, -3.584e-02, -1.286e-02, -3.033e-02, -3.979e-02, -1.666e-02, 4.667e-02, -3.338e-02, -4.259e-02, 5.857e-02, 1.343e-01, 4.236e-02, 1.804e-02, -1.117e-01)); + r += mul(s5_6, M4(1.295e-02, 3.869e-02, 7.838e-04, 1.346e-02, 1.874e-02, -2.082e-02, -4.106e-02, -1.802e-02, 2.252e-02, -2.069e-02, -3.166e-02, -2.319e-02, 2.788e-02, -1.365e-02, -3.017e-03, 3.420e-02)); + r += mul(s5_7, M4(-2.580e-03, -6.460e-03, 2.103e-02, -5.888e-02, 1.605e-02, 1.482e-02, 1.040e-02, -3.538e-02, -7.412e-03, -4.952e-03, -2.650e-04, 3.137e-02, -4.772e-02, 4.511e-02, -1.680e-02, 1.280e-02)); + r += mul(s5_8, M4(3.699e-02, 4.401e-02, 2.479e-02, 2.072e-02, -3.227e-03, 1.561e-02, -1.124e-02, 4.791e-03, 1.048e-02, 6.933e-03, -5.328e-04, 3.336e-02, -1.840e-02, -4.067e-02, 6.226e-03, -5.353e-02)); + r += mul(s6_0, M4(1.188e-02, -4.641e-02, -1.262e-02, -1.655e-02, 3.987e-03, 3.772e-02, 1.940e-02, -3.615e-02, -2.383e-02, -2.400e-02, 2.002e-01, -2.954e-02, 1.634e-02, 6.644e-03, -4.035e-03, 4.184e-03)); + r += mul(s6_1, M4(2.277e-02, 4.263e-03, -4.491e-02, 4.744e-02, -7.567e-02, 2.924e-02, -1.726e-02, 5.299e-02, 1.664e-01, -1.233e-01, 2.356e-03, -2.703e-01, -5.187e-02, -3.551e-02, 3.325e-02, 6.128e-02)); + r += mul(s6_2, M4(1.989e-02, 6.053e-03, 1.846e-02, 6.184e-02, -2.607e-02, -2.363e-02, 4.897e-02, -4.577e-02, 3.373e-01, -9.162e-02, 4.499e-02, 8.682e-02, 4.547e-02, 4.030e-03, 7.774e-04, 1.701e-03)); + r += mul(s6_3, M4(-5.472e-02, -3.444e-03, 1.608e-02, 6.295e-02, 1.196e-02, 6.397e-02, 5.240e-04, -1.021e-02, -3.050e-02, 5.217e-02, -8.370e-02, -1.127e-01, 2.977e-02, 6.301e-02, -1.641e-02, -3.698e-02)); + r += mul(s6_4, M4(7.836e-02, -1.085e-02, 6.898e-02, -1.623e-01, 5.813e-02, -1.794e-02, -7.797e-03, -6.090e-02, 7.416e-02, 1.549e-01, 9.543e-03, -1.116e-01, -1.078e-01, 1.926e-01, -5.152e-02, 1.227e-01)); + r += mul(s6_5, M4(1.059e-02, 2.364e-02, 2.401e-02, 6.629e-02, -7.339e-02, -3.505e-02, -9.375e-02, 1.857e-02, 8.049e-02, -3.784e-02, -3.831e-03, -1.878e-02, 1.048e-01, -2.087e-02, -7.422e-03, -4.039e-03)); + r += mul(s6_6, M4(-2.365e-03, -9.990e-03, -5.154e-02, 2.194e-02, 3.747e-02, 2.086e-02, -3.379e-02, 3.596e-03, -2.596e-02, 2.081e-03, 3.514e-02, -5.221e-02, 1.538e-02, 5.609e-02, 5.790e-02, -4.784e-02)); + r += mul(s6_7, M4(-1.720e-02, -1.905e-02, -2.715e-02, -2.962e-02, -1.308e-02, -1.083e-02, 2.486e-02, -3.409e-03, 2.073e-02, -2.824e-02, 8.650e-03, 2.239e-02, -1.353e-02, 1.140e-01, 2.117e-01, 8.122e-02)); + r += mul(s6_8, M4(5.500e-03, -2.479e-02, -3.083e-02, 3.443e-06, 8.760e-03, -2.614e-02, -1.346e-02, 1.243e-02, 1.106e-01, 5.146e-02, 1.032e-03, 2.408e-02, 1.198e-02, 6.767e-02, 3.023e-02, 7.031e-03)); + r += mul(s7_0, M4(-7.089e-03, -4.844e-02, -1.208e-02, -3.648e-02, 1.701e-02, 4.478e-02, -1.072e-01, 6.097e-02, -1.165e-03, -3.233e-02, -6.162e-02, 1.612e-03, -6.546e-03, -1.170e-02, -9.039e-03, -6.963e-02)); + r += mul(s7_1, M4(5.550e-02, -1.003e-02, -1.168e-01, 8.157e-02, 2.255e-02, -3.154e-02, -1.085e-01, -3.478e-02, 4.729e-02, -3.235e-02, -7.653e-02, 7.446e-02, -3.744e-02, 1.918e-02, -3.136e-02, 9.058e-02)); + r += mul(s7_2, M4(2.581e-02, -1.672e-02, 2.845e-03, 2.215e-02, 3.825e-02, -1.008e-02, 6.349e-03, 7.030e-02, 6.275e-02, 1.736e-02, -2.221e-02, 2.294e-02, 5.530e-02, -1.884e-02, -8.266e-03, -6.339e-03)); + r += mul(s7_3, M4(4.480e-02, 4.562e-02, -1.532e-01, 2.755e-02, -7.727e-03, -7.523e-02, -9.262e-02, 6.129e-02, -9.996e-03, 1.562e-02, 2.900e-02, -8.342e-03, -5.049e-02, 1.154e-01, 1.553e-01, -4.222e-02)); + r += mul(s7_4, M4(-2.480e-01, -4.181e-02, 8.623e-02, 2.482e-02, 9.856e-02, -7.836e-02, -1.145e-01, -2.366e-01, -6.881e-03, 5.524e-02, 1.590e-02, -9.871e-03, -5.909e-02, -6.382e-02, 2.760e-01, 1.855e-01)); + r += mul(s7_5, M4(-2.085e-01, 6.150e-02, 4.880e-02, -1.906e-02, -1.101e-01, -9.671e-02, -3.984e-02, 3.062e-02, 1.820e-02, 8.405e-03, 2.241e-02, -4.043e-02, 2.095e-01, 8.533e-02, 4.196e-02, 4.850e-02)); + r += mul(s7_6, M4(-4.355e-02, -5.505e-02, -1.267e-01, -5.453e-02, 4.345e-02, 1.526e-02, -1.739e-02, 1.774e-02, -2.131e-02, 4.405e-03, 9.372e-03, 1.876e-02, -3.428e-02, 5.894e-02, 5.885e-02, -3.260e-02)); + r += mul(s7_7, M4(8.225e-04, 4.162e-02, 1.538e-01, -4.316e-03, -2.047e-02, -8.498e-03, 7.413e-03, 9.833e-02, -1.454e-02, 1.027e-02, 1.865e-03, 1.100e-02, 4.003e-02, 2.181e-02, 1.353e-01, 1.522e-01)); + r += mul(s7_8, M4(2.791e-02, 1.638e-02, -1.935e-02, 2.872e-03, -1.592e-02, 1.637e-02, 6.777e-03, 3.492e-03, -1.286e-02, -5.990e-03, 7.047e-03, -2.066e-02, 6.342e-03, 6.939e-02, 4.972e-02, -3.984e-02)); + r += V4(-4.684e-03, 6.890e-04, -5.830e-05, -5.559e-03); + return r; +} + +void Pass9(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t2[gxy] = f2(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + t3[gxy] = f3(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); +} + +//!PASS 10 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0, t1, t2, t3 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) +#define l2(x, y) V4(O(t2, float2(x, y))) +#define l3(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8, V4 s4_0, V4 s4_1, V4 s4_2, V4 s4_3, V4 s4_4, V4 s4_5, V4 s4_6, V4 s4_7, V4 s4_8, V4 s5_0, V4 s5_1, V4 s5_2, V4 s5_3, V4 s5_4, V4 s5_5, V4 s5_6, V4 s5_7, V4 s5_8, V4 s6_0, V4 s6_1, V4 s6_2, V4 s6_3, V4 s6_4, V4 s6_5, V4 s6_6, V4 s6_7, V4 s6_8, V4 s7_0, V4 s7_1, V4 s7_2, V4 s7_3, V4 s7_4, V4 s7_5, V4 s7_6, V4 s7_7, V4 s7_8) { + V4 r = 0.0; + r += mul(s0_0, M4(6.033e-02, 3.884e-03, 4.972e-03, 1.856e-02, -2.252e-02, -6.567e-03, 1.834e-03, 1.889e-03, -3.430e-02, -5.868e-03, -2.753e-02, -9.852e-03, 1.149e-01, 5.219e-03, -9.645e-02, -1.800e-02)); + r += mul(s0_1, M4(-1.126e-02, 2.901e-02, 4.006e-03, -6.551e-03, 4.707e-03, -2.672e-02, -1.670e-03, 1.306e-02, 2.051e-02, -2.240e-02, 3.236e-02, 4.654e-03, 1.470e-01, -3.097e-01, 4.721e-02, 1.152e-01)); + r += mul(s0_2, M4(4.366e-03, -2.694e-03, 5.250e-03, 5.320e-03, 4.481e-03, 1.638e-02, 9.726e-03, 4.904e-03, -1.148e-03, 2.204e-02, -8.883e-03, 6.839e-05, -2.482e-02, 6.181e-02, 2.736e-03, -8.909e-03)); + r += mul(s0_3, M4(-7.610e-03, 7.788e-03, 1.847e-02, -2.180e-02, 2.830e-02, 6.135e-03, 8.182e-03, 1.302e-02, 2.205e-02, 2.265e-02, 1.210e-02, 1.428e-02, 4.035e-02, 3.488e-02, 2.405e-02, -6.122e-04)); + r += mul(s0_4, M4(1.537e-02, 1.506e-02, -7.631e-03, 4.580e-02, -5.733e-03, 5.314e-02, -1.093e-01, -1.055e-01, -3.086e-02, -3.601e-02, -5.565e-02, -3.967e-02, 7.196e-02, -4.304e-02, -1.039e-01, 2.038e-01)); + r += mul(s0_5, M4(9.955e-04, -1.193e-02, -6.331e-03, -1.254e-02, -1.031e-02, -3.337e-02, 4.839e-03, -1.563e-03, 8.448e-04, -4.703e-03, 1.218e-02, -3.763e-03, 1.555e-02, 6.467e-02, 4.887e-03, -1.659e-02)); + r += mul(s0_6, M4(-9.537e-03, -8.011e-04, -1.289e-02, -2.877e-03, -1.477e-03, 8.749e-04, 8.176e-03, -4.993e-03, -1.018e-03, 1.302e-03, 1.075e-03, 4.430e-03, -8.630e-03, -1.426e-02, 1.284e-03, -6.897e-03)); + r += mul(s0_7, M4(-9.303e-03, -1.874e-02, -7.506e-03, -3.287e-03, 1.361e-03, -8.719e-03, 1.358e-02, 3.613e-02, 1.732e-03, 3.429e-03, 5.717e-03, -5.605e-03, -2.391e-02, -5.230e-03, -4.358e-02, -1.130e-02)); + r += mul(s0_8, M4(-4.785e-03, -2.385e-03, -1.026e-03, -1.010e-02, -6.487e-04, 6.573e-03, -1.054e-03, -7.562e-03, 1.978e-03, 1.197e-03, 2.030e-03, 9.124e-03, -3.354e-03, 3.436e-03, 2.368e-02, -4.266e-02)); + r += mul(s1_0, M4(-1.529e-01, 2.039e-01, 8.447e-03, 3.038e-02, -1.505e-02, -1.508e-02, 7.441e-03, 6.314e-03, -3.746e-02, -3.202e-02, -2.150e-02, -3.430e-02, -1.702e-02, 5.759e-03, -3.104e-02, -2.623e-03)); + r += mul(s1_1, M4(-1.082e-03, 4.792e-02, 6.862e-03, -2.338e-02, 8.142e-03, 2.402e-03, 8.938e-03, 1.018e-02, 1.801e-01, 1.204e-01, 1.753e-01, 1.470e-01, 4.248e-02, -2.265e-02, 4.831e-02, -7.421e-03)); + r += mul(s1_2, M4(3.095e-03, -2.495e-03, 5.918e-03, 7.425e-03, 1.247e-03, 1.278e-02, 7.347e-03, 1.108e-02, -2.195e-02, 3.677e-02, -3.238e-02, 3.665e-03, -6.743e-03, 7.914e-03, 3.957e-03, -1.294e-03)); + r += mul(s1_3, M4(-6.194e-02, 9.325e-02, -2.823e-01, 2.610e-01, 1.242e-01, 1.063e-02, -9.538e-02, 1.163e-02, 6.713e-03, 1.593e-02, -1.142e-02, -7.270e-06, 4.482e-02, 2.000e-02, 5.476e-02, 1.686e-02)); + r += mul(s1_4, M4(3.596e-02, -2.554e-02, 9.773e-03, 7.261e-02, 1.376e-01, 2.608e-01, -2.402e-01, -3.362e-01, -6.171e-02, -6.656e-02, -5.303e-02, -5.042e-02, 8.435e-02, 4.511e-02, -6.856e-02, -1.688e-02)); + r += mul(s1_5, M4(-4.948e-03, -1.384e-02, -9.850e-03, -1.294e-02, -8.958e-03, -2.200e-02, 4.866e-03, -1.164e-02, -1.419e-02, -2.802e-02, 3.073e-04, -2.253e-02, 3.047e-03, 5.896e-02, 1.658e-03, 3.601e-02)); + r += mul(s1_6, M4(-1.752e-02, -1.393e-03, 1.343e-02, -6.912e-04, -1.211e-02, -1.511e-03, 1.580e-02, -5.149e-05, -6.598e-04, -1.994e-03, -3.715e-03, 1.939e-03, -8.240e-03, -1.204e-02, -1.863e-03, -5.386e-03)); + r += mul(s1_7, M4(-9.921e-03, -1.642e-02, -1.441e-04, -2.140e-02, -9.104e-03, -2.207e-02, 1.952e-02, 3.773e-02, 9.063e-03, 8.820e-03, 9.917e-03, -1.275e-03, -3.198e-02, -1.296e-02, -3.471e-02, -3.518e-04)); + r += mul(s1_8, M4(-1.870e-03, 1.285e-03, -2.583e-03, -6.924e-03, -1.586e-03, 1.603e-03, 4.076e-04, 3.254e-03, -5.009e-04, 1.443e-03, -5.724e-03, 8.396e-03, 4.439e-03, 1.328e-03, 2.810e-02, -4.291e-02)); + r += mul(s2_0, M4(-2.825e-02, 7.778e-03, 6.879e-03, 6.775e-04, 1.669e-03, -1.838e-02, -3.742e-02, -1.220e-02, 8.573e-03, -2.186e-05, 5.825e-03, 6.538e-03, -8.814e-02, 1.068e-03, -1.412e-02, -2.859e-03)); + r += mul(s2_1, M4(2.240e-02, -3.845e-02, -1.420e-02, 1.136e-02, -1.729e-01, 6.307e-03, 5.652e-02, 9.547e-02, -5.163e-02, -2.987e-02, -5.771e-03, 4.459e-03, 3.096e-03, 2.589e-02, 4.600e-03, -1.655e-02)); + r += mul(s2_2, M4(-1.673e-02, 4.319e-03, -7.842e-03, -1.910e-02, 3.317e-02, 1.533e-01, 4.235e-03, -4.861e-02, -2.429e-03, -2.997e-02, -1.245e-03, 6.226e-03, -7.891e-03, -1.584e-02, -1.856e-03, 7.774e-03)); + r += mul(s2_3, M4(-4.381e-02, 2.754e-02, -6.510e-02, 2.031e-02, -9.400e-02, -3.504e-02, -1.971e-02, 4.256e-03, -2.762e-02, -1.132e-02, 7.760e-03, -4.663e-03, -1.978e-01, 1.753e-02, -1.958e-01, 4.394e-02)); + r += mul(s2_4, M4(2.428e-01, -2.087e-01, 2.311e-01, -2.088e-01, 1.598e-01, 1.140e-01, 1.588e-01, -3.334e-01, 1.880e-01, 1.135e-01, 5.103e-02, 8.828e-02, -3.033e-02, 3.760e-01, -4.199e-02, 2.935e-01)); + r += mul(s2_5, M4(9.823e-03, 7.790e-02, 1.327e-02, 7.703e-02, -5.115e-02, -8.814e-02, -7.008e-02, 1.904e-01, 1.031e-02, 7.739e-02, 2.008e-02, 6.279e-03, 2.631e-02, -1.120e-02, 1.851e-02, -2.875e-02)); + r += mul(s2_6, M4(-4.129e-03, 6.841e-04, -1.334e-02, 8.805e-03, -1.776e-02, 5.161e-03, -1.021e-02, 1.459e-03, -1.630e-02, -1.006e-03, -1.680e-02, -1.178e-02, -1.506e-02, 4.431e-03, -9.363e-02, -3.570e-03)); + r += mul(s2_7, M4(-2.595e-02, -8.209e-03, 2.180e-02, -3.919e-02, 2.377e-03, -4.880e-03, 1.113e-02, 3.670e-02, 1.886e-02, 4.059e-03, 7.378e-02, 1.729e-02, -1.076e-02, -6.551e-02, -2.519e-02, 3.988e-02)); + r += mul(s2_8, M4(-2.461e-03, -7.184e-03, -6.953e-03, 2.376e-02, 4.969e-03, 7.072e-03, -6.686e-03, 1.143e-02, -7.839e-03, -5.919e-03, -1.470e-02, 4.468e-02, -8.697e-03, -2.448e-03, -1.257e-02, -1.114e-02)); + r += mul(s3_0, M4(-1.878e-02, -2.434e-03, 8.435e-03, -5.416e-04, -5.024e-02, -4.839e-03, -2.418e-02, -6.214e-03, 3.156e-02, -6.234e-03, 1.266e-02, -2.019e-03, -3.627e-02, -1.740e-02, -1.324e-02, -1.049e-02)); + r += mul(s3_1, M4(9.307e-03, -9.192e-03, -2.072e-02, 1.029e-02, 8.159e-02, -7.533e-02, 1.932e-02, 3.406e-03, -1.063e-01, -4.969e-02, 1.397e-03, 3.384e-02, 5.695e-03, -2.487e-04, -7.244e-03, -7.036e-03)); + r += mul(s3_2, M4(-9.433e-03, 1.165e-03, -9.516e-03, -1.801e-02, 8.395e-03, 9.396e-02, 1.441e-02, 1.786e-02, 1.334e-03, -3.240e-02, -1.211e-02, -9.708e-03, -2.537e-03, -5.786e-03, 1.472e-04, 1.409e-02)); + r += mul(s3_3, M4(-3.034e-02, 4.203e-03, -5.334e-02, -3.159e-03, -3.479e-02, -2.786e-02, -3.998e-02, -1.344e-02, -8.960e-02, -2.011e-02, -5.261e-02, 2.207e-02, 4.456e-03, -2.643e-02, -3.284e-02, -2.436e-02)); + r += mul(s3_4, M4(8.252e-02, -7.086e-02, 1.282e-01, -4.984e-02, 2.513e-02, 4.663e-02, 8.703e-02, -3.284e-02, 1.492e-01, 3.702e-01, 1.355e-01, -1.026e-01, 9.108e-02, 1.341e-01, 5.950e-02, 7.397e-02)); + r += mul(s3_5, M4(1.464e-02, 1.050e-01, 2.294e-02, 8.619e-02, -1.945e-02, -5.211e-02, -3.405e-02, 7.557e-02, 1.460e-02, 3.943e-02, 1.422e-02, 3.550e-02, 1.362e-02, 7.609e-03, 6.740e-03, -1.501e-02)); + r += mul(s3_6, M4(-9.561e-04, -4.434e-03, -3.246e-03, 1.240e-03, -9.618e-03, 5.566e-04, -1.053e-02, -9.058e-03, -2.425e-02, -2.365e-02, 2.156e-02, -2.252e-02, -2.594e-02, -4.650e-04, -3.137e-02, -1.236e-02)); + r += mul(s3_7, M4(-1.013e-02, -3.561e-03, -1.650e-02, -2.594e-02, 7.368e-03, -1.746e-03, 2.423e-02, 1.971e-02, 2.600e-02, -1.256e-02, 2.182e-02, 5.653e-02, -1.538e-02, -4.211e-02, -3.166e-03, -1.983e-02)); + r += mul(s3_8, M4(-1.749e-03, -1.131e-02, -1.036e-03, 2.914e-02, 1.159e-03, 4.383e-03, -5.259e-03, 8.035e-03, -2.202e-03, -8.608e-04, -7.083e-03, 4.011e-02, -1.159e-02, -3.661e-04, -1.470e-02, 3.702e-03)); + r += mul(s4_0, M4(1.241e-01, -2.722e-03, -1.522e-01, -2.910e-02, -7.858e-02, -1.887e-02, -1.970e-02, -8.743e-03, 3.292e-02, 1.835e-03, -1.332e-02, 1.231e-02, 1.371e-02, 1.383e-02, -1.290e-03, 2.727e-04)); + r += mul(s4_1, M4(2.705e-01, 3.412e-01, -2.322e-01, -3.342e-01, 1.073e-01, 2.674e-03, 2.774e-02, 3.236e-02, -2.611e-02, -2.326e-03, 4.171e-02, 1.519e-02, -2.056e-02, -1.122e-02, -1.940e-02, -5.084e-02)); + r += mul(s4_2, M4(1.196e-02, 4.752e-02, -2.241e-02, -7.073e-02, -9.354e-03, 2.420e-02, 2.362e-02, 2.388e-03, -9.792e-04, -6.070e-02, -1.017e-02, -1.761e-05, -2.084e-02, -2.422e-02, -2.179e-02, -3.296e-02)); + r += mul(s4_3, M4(-8.960e-03, -2.168e-02, 4.790e-03, 6.261e-03, -8.020e-02, -1.778e-02, -9.870e-02, -1.462e-02, 2.071e-02, -6.225e-03, 7.382e-03, 9.693e-03, -1.874e-02, 8.222e-03, 3.224e-02, -8.453e-03)); + r += mul(s4_4, M4(-3.010e-02, -2.755e-04, 1.521e-02, -9.902e-03, 1.081e-02, -1.996e-02, 9.496e-02, -7.462e-02, -4.012e-02, 2.716e-03, -1.808e-03, -7.398e-02, 9.465e-02, -6.016e-02, -5.529e-02, 4.329e-02)); + r += mul(s4_5, M4(7.685e-04, -1.718e-02, -2.018e-03, 2.241e-02, 1.576e-02, -2.413e-02, -2.552e-02, 1.115e-01, 2.040e-02, 2.674e-02, -2.387e-02, 2.060e-02, 1.030e-02, 1.147e-01, 1.819e-02, 6.421e-02)); + r += mul(s4_6, M4(3.099e-04, -7.753e-04, -4.291e-03, -2.896e-03, -2.850e-03, 2.901e-03, -2.640e-02, 2.571e-03, -1.092e-03, -7.156e-04, 5.573e-03, -1.358e-03, -9.563e-03, -7.458e-03, -8.650e-03, -2.034e-03)); + r += mul(s4_7, M4(8.655e-03, 5.108e-03, 8.159e-03, 1.230e-02, -6.498e-04, 5.563e-03, -8.667e-03, -2.757e-02, -1.389e-03, 7.668e-04, -7.603e-04, 9.307e-03, -2.008e-02, -4.393e-03, -9.184e-03, -3.818e-03)); + r += mul(s4_8, M4(4.210e-04, 4.429e-03, -2.299e-03, 3.601e-04, 9.826e-03, 9.722e-03, 4.011e-03, -2.770e-02, 2.060e-03, 1.865e-04, 2.297e-03, 1.553e-02, 1.851e-03, 3.818e-03, 3.571e-02, -2.679e-02)); + r += mul(s5_0, M4(-1.703e-02, -2.141e-02, 3.662e-03, -1.035e-02, -4.829e-02, -1.841e-02, -2.961e-02, -8.827e-03, 1.959e-02, 2.521e-02, -3.347e-02, 1.142e-02, 2.545e-02, 1.529e-02, 1.503e-02, -9.263e-03)); + r += mul(s5_1, M4(8.298e-03, 1.209e-02, 5.804e-03, 2.954e-02, 1.871e-01, -5.285e-02, 6.019e-03, 5.980e-02, 2.602e-01, -2.048e-01, -1.827e-01, 1.879e-01, -1.194e-01, 1.137e-01, -3.773e-02, -9.990e-02)); + r += mul(s5_2, M4(-3.284e-03, -1.084e-02, 1.225e-03, -1.040e-02, -9.596e-03, 3.377e-03, 1.911e-02, 1.322e-02, 5.204e-03, -1.390e-01, 1.931e-02, 2.311e-02, -1.113e-02, -4.676e-02, -1.083e-02, -1.427e-02)); + r += mul(s5_3, M4(-7.960e-04, -1.255e-02, -1.112e-02, -4.686e-03, -8.179e-02, -2.207e-02, -7.642e-02, -1.261e-02, 2.133e-02, -1.009e-02, 3.408e-02, -6.496e-04, -2.391e-03, 2.106e-02, -3.746e-02, 8.466e-03)); + r += mul(s5_4, M4(-8.877e-03, 1.617e-02, -2.680e-03, -3.528e-02, -4.871e-02, -3.554e-02, 3.489e-01, -2.440e-01, -4.708e-02, 3.845e-02, -3.043e-02, -7.832e-02, 7.207e-02, -2.007e-01, 1.665e-01, 6.463e-02)); + r += mul(s5_5, M4(-6.225e-04, -1.320e-02, -4.382e-03, 2.419e-02, 2.625e-02, 2.141e-02, -1.971e-02, 2.993e-02, 8.798e-03, 1.972e-02, -2.796e-02, 6.802e-02, 1.662e-02, 1.999e-01, 2.263e-03, -2.605e-02)); + r += mul(s5_6, M4(-1.662e-03, -9.862e-04, -1.924e-03, -1.690e-03, -5.530e-04, 9.113e-04, -2.399e-02, 1.501e-04, 4.862e-04, -4.257e-04, 8.270e-03, -4.402e-04, -7.361e-03, -8.730e-03, -9.817e-03, -3.118e-04)); + r += mul(s5_7, M4(5.400e-03, 2.631e-03, 9.178e-03, 1.119e-02, 1.026e-03, 1.547e-02, -3.413e-02, -1.751e-02, 9.076e-04, 1.486e-03, -9.140e-04, 1.067e-03, -6.821e-03, 8.481e-03, -3.967e-02, 5.920e-04)); + r += mul(s5_8, M4(1.815e-03, 4.437e-03, -2.263e-04, -7.537e-04, 1.101e-02, 8.392e-03, 3.906e-03, -1.950e-02, 3.161e-03, 1.800e-03, -1.970e-03, 1.948e-02, -6.366e-03, -4.956e-04, 2.726e-02, -1.865e-02)); + r += mul(s6_0, M4(7.153e-02, 1.432e-02, 5.029e-03, 1.068e-02, -2.239e-03, 5.988e-04, 3.456e-03, -1.638e-03, -1.509e-02, 3.740e-04, -6.526e-03, -8.932e-05, 1.896e-02, 7.404e-03, 5.672e-03, 8.338e-03)); + r += mul(s6_1, M4(2.280e-02, -1.824e-02, 2.243e-02, 4.961e-02, 1.042e-01, 4.623e-02, -1.059e-02, 4.627e-04, -1.596e-02, -2.779e-02, 2.384e-02, 1.553e-02, 2.837e-02, -5.811e-03, -1.298e-03, 3.311e-03)); + r += mul(s6_2, M4(-6.403e-03, -1.052e-02, -9.886e-03, 5.213e-03, 1.813e-02, 7.935e-02, 2.713e-02, 2.399e-02, 5.155e-03, 3.446e-03, 2.308e-03, 1.125e-03, -7.171e-03, -1.546e-02, 6.358e-03, -1.187e-02)); + r += mul(s6_3, M4(7.386e-02, 5.118e-02, 1.043e-01, 3.452e-02, 3.457e-02, -3.300e-03, -1.363e-02, -4.357e-03, -2.312e-02, 4.680e-03, -1.498e-02, -1.621e-03, -5.287e-03, -2.001e-02, 1.342e-02, -4.378e-03)); + r += mul(s6_4, M4(1.004e-01, -3.057e-01, 7.350e-03, -2.862e-01, -1.540e-01, -3.552e-02, 1.702e-01, 5.136e-02, 4.456e-02, -2.083e-02, -8.627e-02, -4.431e-02, -3.276e-01, 3.085e-01, -1.260e-01, 2.106e-01)); + r += mul(s6_5, M4(1.999e-03, 3.940e-02, 1.170e-02, -1.264e-02, 1.583e-02, -6.350e-02, -4.114e-02, 3.691e-02, -2.899e-02, 1.204e-03, 1.770e-02, -2.299e-02, 7.905e-03, -1.410e-02, 7.572e-03, 1.972e-02)); + r += mul(s6_6, M4(2.522e-03, 2.384e-03, 2.441e-02, 4.844e-04, 1.894e-02, 1.933e-03, -1.505e-03, 8.773e-03, -1.008e-02, -6.878e-03, 5.712e-03, -6.634e-03, -8.502e-03, -6.767e-03, -1.307e-02, -1.115e-02)); + r += mul(s6_7, M4(-1.665e-02, 2.475e-02, 2.821e-02, -7.399e-02, -3.618e-02, -1.538e-02, -1.023e-01, -8.032e-02, 2.562e-03, 1.034e-02, 7.017e-03, 2.552e-02, -1.381e-02, 4.178e-02, -6.669e-02, 3.070e-02)); + r += mul(s6_8, M4(-1.780e-03, -5.765e-03, -1.343e-02, 3.723e-02, -2.313e-03, -1.388e-02, -1.401e-02, -2.972e-02, 1.695e-02, 2.476e-03, 2.459e-02, 1.473e-02, 3.527e-02, 2.297e-02, 2.847e-02, -1.627e-02)); + r += mul(s7_0, M4(2.734e-02, 9.389e-03, 1.899e-02, 9.436e-03, 1.607e-02, -5.627e-03, -4.848e-03, -6.998e-03, -4.030e-03, -1.852e-03, -5.729e-03, -6.533e-08, 1.703e-02, 7.887e-03, 7.217e-03, 4.398e-03)); + r += mul(s7_1, M4(5.733e-03, 1.714e-02, 2.708e-02, 4.638e-02, 9.693e-02, 7.149e-02, -3.035e-02, -1.722e-02, 9.700e-03, -7.294e-03, -1.946e-02, -1.432e-02, 6.034e-03, -1.510e-02, 2.483e-02, 3.817e-03)); + r += mul(s7_2, M4(-2.516e-03, -1.529e-02, -7.591e-03, 7.588e-03, 3.502e-02, 8.863e-02, 1.774e-02, -8.667e-04, 9.195e-03, 2.465e-02, 1.436e-03, -1.286e-02, -1.302e-02, 8.339e-04, 1.971e-03, -1.640e-02)); + r += mul(s7_3, M4(-3.061e-02, 6.541e-02, -4.342e-02, 3.258e-02, 6.861e-02, -4.957e-03, -2.240e-02, -1.054e-02, -5.297e-02, 2.006e-02, 2.643e-04, -5.721e-03, -3.480e-02, -6.939e-04, 1.051e-02, 3.157e-03)); + r += mul(s7_4, M4(3.394e-03, -1.038e-01, -1.067e-01, -8.081e-02, -3.834e-01, -9.184e-02, 3.959e-01, 1.383e-01, -2.757e-01, -2.394e-01, 1.315e-01, 9.302e-02, 3.851e-02, 6.530e-02, -5.134e-02, 2.032e-02)); + r += mul(s7_5, M4(1.928e-02, 2.826e-02, 3.062e-02, -2.727e-02, 5.898e-02, -1.647e-01, -6.162e-02, 1.155e-01, -1.409e-02, -1.304e-01, 8.152e-03, 5.409e-02, -7.279e-03, -7.746e-02, 1.113e-02, 6.624e-02)); + r += mul(s7_6, M4(4.472e-03, -7.955e-04, 3.064e-03, 9.964e-03, 1.995e-02, -4.696e-04, -6.926e-03, 8.025e-03, 3.883e-03, 2.538e-03, 1.678e-02, -1.129e-02, -1.199e-02, -3.737e-03, -3.190e-02, -2.174e-03)); + r += mul(s7_7, M4(-1.147e-02, 1.181e-02, 1.281e-02, -3.577e-02, -1.517e-02, -4.086e-03, -1.170e-01, -1.094e-01, -4.812e-02, -2.669e-02, 1.685e-01, 1.296e-01, 1.777e-03, 1.122e-02, 2.007e-02, -1.995e-02)); + r += mul(s7_8, M4(-1.510e-04, -6.915e-03, -1.230e-02, 2.398e-02, -3.685e-03, -1.359e-03, -3.283e-02, -3.793e-02, 1.634e-02, -9.149e-03, 1.263e-02, 8.960e-02, 2.716e-02, 2.490e-02, 1.270e-02, -2.679e-02)); + r += V4(3.226e-04, 2.004e-04, 1.483e-04, 9.562e-05); + return tanh(r); +} + +void Pass10(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 s4_0 = l2(-1.0, -1.0); + V4 s4_1 = l2(0.0, -1.0); + V4 s4_2 = l2(1.0, -1.0); + V4 s4_3 = l2(-1.0, 0.0); + V4 s4_4 = l2(0.0, 0.0); + V4 s4_5 = l2(1.0, 0.0); + V4 s4_6 = l2(-1.0, 1.0); + V4 s4_7 = l2(0.0, 1.0); + V4 s4_8 = l2(1.0, 1.0); + V4 s5_0 = -max(-s4_0, 0.0); + V4 s5_1 = -max(-s4_1, 0.0); + V4 s5_2 = -max(-s4_2, 0.0); + V4 s5_3 = -max(-s4_3, 0.0); + V4 s5_4 = -max(-s4_4, 0.0); + V4 s5_5 = -max(-s4_5, 0.0); + V4 s5_6 = -max(-s4_6, 0.0); + V4 s5_7 = -max(-s4_7, 0.0); + V4 s5_8 = -max(-s4_8, 0.0); + s4_0 = max(s4_0, 0.0); + s4_1 = max(s4_1, 0.0); + s4_2 = max(s4_2, 0.0); + s4_3 = max(s4_3, 0.0); + s4_4 = max(s4_4, 0.0); + s4_5 = max(s4_5, 0.0); + s4_6 = max(s4_6, 0.0); + s4_7 = max(s4_7, 0.0); + s4_8 = max(s4_8, 0.0); + + V4 s6_0 = l3(-1.0, -1.0); + V4 s6_1 = l3(0.0, -1.0); + V4 s6_2 = l3(1.0, -1.0); + V4 s6_3 = l3(-1.0, 0.0); + V4 s6_4 = l3(0.0, 0.0); + V4 s6_5 = l3(1.0, 0.0); + V4 s6_6 = l3(-1.0, 1.0); + V4 s6_7 = l3(0.0, 1.0); + V4 s6_8 = l3(1.0, 1.0); + V4 s7_0 = -max(-s6_0, 0.0); + V4 s7_1 = -max(-s6_1, 0.0); + V4 s7_2 = -max(-s6_2, 0.0); + V4 s7_3 = -max(-s6_3, 0.0); + V4 s7_4 = -max(-s6_4, 0.0); + V4 s7_5 = -max(-s6_5, 0.0); + V4 s7_6 = -max(-s6_6, 0.0); + V4 s7_7 = -max(-s6_7, 0.0); + V4 s7_8 = -max(-s6_8, 0.0); + s6_0 = max(s6_0, 0.0); + s6_1 = max(s6_1, 0.0); + s6_2 = max(s6_2, 0.0); + s6_3 = max(s6_3, 0.0); + s6_4 = max(s6_4, 0.0); + s6_5 = max(s6_5, 0.0); + s6_6 = max(s6_6, 0.0); + s6_7 = max(s6_7, 0.0); + s6_8 = max(s6_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8, s4_0, s4_1, s4_2, s4_3, s4_4, s4_5, s4_6, s4_7, s4_8, s5_0, s5_1, s5_2, s5_3, s5_4, s5_5, s5_6, s5_7, s5_8, s6_0, s6_1, s6_2, s6_3, s6_4, s6_5, s6_6, s6_7, s6_8, s7_0, s7_1, s7_2, s7_3, s7_4, s7_5, s7_6, s7_7, s7_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-8x4C-NVL-DN.hlsl b/src/Effects/CuNNy/CuNNy-8x4C-NVL-DN.hlsl new file mode 100644 index 000000000..69bcd3894 --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-8x4C-NVL-DN.hlsl @@ -0,0 +1,778 @@ +// CuNNy 8x4C BILINEAR RGB NVL DN - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-DN-D04N08 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0 + +#define l0(x, y) min16float((dot(float3(-1.880e-01, -3.696e-01, -8.936e-02), O(INPUT, float2(x, y)).rgb) + 5.137e-01)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(6.049e-03, -3.524e-01, -1.308e-01, -6.691e-02) * s0_0; + r += V4(1.720e-02, -7.092e-02, -3.030e-01, 1.654e-01) * s0_1; + r += V4(-6.706e-03, 2.289e-01, 1.982e-03, -5.756e-02) * s0_2; + r += V4(-2.761e-02, 5.050e-01, -2.036e-01, 1.265e-01) * s0_3; + r += V4(-8.654e-01, -6.035e-01, -2.119e-01, 5.055e-01) * s0_4; + r += V4(-7.114e-03, 2.325e-02, 5.721e-02, 4.585e-02) * s0_5; + r += V4(2.796e-01, 1.680e-01, 1.353e-01, 1.286e-02) * s0_6; + r += V4(5.684e-01, 3.022e-01, 6.426e-01, 8.931e-02) * s0_7; + r += V4(3.723e-02, -2.036e-01, 2.732e-02, -4.101e-02) * s0_8; + r += V4(1.324e-02, -9.379e-05, 8.452e-03, 5.165e-02); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.216e-02, 1.062e-01, -3.433e-03, -1.923e-01, 6.300e-02, -4.594e-01, 2.025e-01, 8.655e-03, -5.497e-02, 1.694e-01, -1.806e-01, 2.115e-01, -6.176e-02, 1.167e-02, -5.987e-02, 1.167e-01)); + r += mul(s0_1, M4(-1.646e-01, -5.524e-01, -1.352e-01, 1.704e-01, 3.398e-02, -2.598e-01, 1.616e-01, -1.772e-01, -5.648e-02, 2.755e-01, 2.638e-02, -2.657e-02, 3.774e-02, -6.833e-02, -1.141e-01, -2.438e-01)); + r += mul(s0_2, M4(-1.459e-01, 9.939e-02, -6.457e-04, 2.352e-02, 5.006e-02, -7.759e-01, -4.862e-02, -3.366e-02, 9.508e-02, 1.537e-01, -6.771e-02, -1.260e-01, 1.067e-01, -5.893e-02, -9.811e-02, -1.060e-02)); + r += mul(s0_3, M4(-2.901e-01, 2.907e-01, 2.178e-01, -3.877e-01, 9.034e-03, 8.718e-03, -1.213e-01, 9.252e-02, 3.286e-01, -8.247e-02, -5.573e-02, -3.852e-01, -1.371e-01, 1.877e-01, 2.337e-01, 5.324e-01)); + r += mul(s0_4, M4(-9.182e-01, 1.013e-01, 2.969e-01, 7.117e-01, -2.367e-01, -7.128e-02, 1.828e-01, 5.993e-01, -2.965e-01, 1.323e-01, 3.117e-02, -3.215e-01, -1.410e-01, 5.359e-02, -1.137e-01, -2.603e-01)); + r += mul(s0_5, M4(-1.071e-01, -8.801e-02, 9.524e-03, -2.937e-02, 7.723e-02, 1.195e-01, -9.056e-02, 6.161e-02, 1.962e-01, -2.740e-01, -9.418e-02, 1.141e-01, 6.203e-02, -1.084e-01, 2.402e-01, -2.066e-01)); + r += mul(s0_6, M4(2.226e-01, -2.259e-01, -2.499e-02, -9.184e-02, -1.499e-01, -3.737e-02, 1.576e-01, 1.084e-01, -2.221e-01, -1.080e-02, 2.643e-02, -1.023e-01, 1.068e-01, 1.193e-01, -2.781e-01, 3.396e-01)); + r += mul(s0_7, M4(7.520e-01, -1.043e-01, -4.535e-02, 2.775e-01, 1.577e-01, -1.526e-01, 1.796e-01, 1.085e-01, -1.012e+00, 4.333e-02, 1.270e-02, -1.692e-01, 1.127e-01, -2.847e-01, -1.784e-01, -3.956e-01)); + r += mul(s0_8, M4(2.206e-01, 1.370e-01, -7.453e-02, 1.050e-01, 8.412e-02, -1.396e-01, 1.707e-02, -1.654e-02, -2.116e-01, -7.944e-02, 1.244e-01, -6.709e-02, -5.577e-02, 1.619e-01, -2.818e-01, 1.460e-01)); + r += mul(s1_0, M4(1.180e-01, -2.345e-01, 5.406e-02, -1.102e-01, 1.559e-02, -3.865e-01, -1.077e-01, 1.442e-02, -1.405e-01, 1.578e-01, -3.338e-02, 1.157e-01, -1.676e-01, 4.656e-02, -1.507e-01, 2.590e-02)); + r += mul(s1_1, M4(-3.112e-02, -5.537e-01, -3.626e-01, -2.915e-01, 7.495e-02, 4.473e-01, -1.847e-01, -8.743e-02, -3.290e-02, 3.660e-02, 1.252e-01, 1.058e-02, 1.193e-01, 6.421e-02, -1.456e-01, -1.693e-01)); + r += mul(s1_2, M4(-1.047e-01, -4.306e-01, 6.486e-03, 1.137e-01, 2.935e-02, -3.608e-01, 5.242e-02, -2.374e-02, 1.130e-01, -4.864e-02, -7.302e-02, -2.205e-02, 8.227e-02, -8.403e-02, -9.468e-02, 8.095e-02)); + r += mul(s1_3, M4(-3.759e-02, 2.709e-01, 1.269e-01, -4.994e-01, -1.577e-02, 1.871e-01, -2.532e-01, 8.960e-02, 2.298e-01, -2.462e-01, -1.634e-02, -3.955e-01, 2.750e-02, -4.812e-02, -2.441e-01, 9.926e-01)); + r += mul(s1_4, M4(-7.288e-01, 5.644e-01, 1.042e+00, 6.160e-01, -4.271e-01, 4.419e-01, 1.437e-01, 3.840e-01, -1.220e-01, -8.627e-01, 6.664e-02, -1.220e-02, 5.260e-02, 1.505e-01, -2.182e-01, -6.116e-01)); + r += mul(s1_5, M4(1.659e-01, 2.566e-01, -5.954e-02, -9.187e-02, -8.251e-02, 1.091e-01, -1.506e-01, 1.370e-01, 3.056e-01, -3.512e-01, -4.956e-03, 7.008e-02, 1.320e-01, -3.995e-01, -8.603e-03, -3.542e-01)); + r += mul(s1_6, M4(2.549e-01, -7.946e-02, -1.755e-01, -2.902e-02, -1.912e-01, 2.349e-01, 6.770e-02, 9.683e-02, -2.690e-01, -1.715e-01, 5.692e-02, -1.064e-01, 2.998e-01, 7.619e-02, 8.040e-03, 2.706e-01)); + r += mul(s1_7, M4(7.320e-01, 1.397e-01, -5.600e-02, 9.609e-02, -1.267e-01, 6.841e-02, 2.429e-01, 3.167e-02, -6.816e-01, -3.313e-03, 5.622e-02, -4.727e-02, -3.420e-01, 4.283e-02, -3.250e-01, -4.118e-01)); + r += mul(s1_8, M4(1.607e-01, 1.581e-01, -6.049e-02, 9.118e-02, -1.583e-02, 2.918e-01, 1.703e-02, -1.206e-01, -2.114e-01, -1.248e-01, 6.689e-02, -2.131e-02, -7.779e-02, 1.069e-01, -1.181e-01, 2.230e-01)); + r += V4(1.959e-02, -5.807e-03, 9.415e-02, 7.247e-03); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t1 +//!OUT t0 + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(9.727e-02, 1.849e-01, 2.125e-02, 1.933e-01, 9.183e-02, 8.307e-03, -9.035e-02, 3.241e-02, 1.141e-01, 8.739e-02, -9.547e-02, 1.616e-01, 2.912e-02, -1.780e-02, 5.433e-02, 2.720e-02)); + r += mul(s0_1, M4(-1.524e-01, -9.138e-02, 8.798e-02, -1.691e-01, 8.519e-03, 3.597e-02, -1.784e-02, 3.049e-02, 3.078e-02, 1.823e-01, 1.051e-02, -5.317e-02, -1.977e-01, 1.013e-01, 1.215e-01, 4.261e-02)); + r += mul(s0_2, M4(-1.992e-02, -1.191e-01, -1.365e-03, 3.976e-02, 3.452e-03, 7.503e-03, 4.850e-03, 8.970e-03, -7.652e-03, 1.166e-01, 9.888e-02, 3.423e-03, -3.354e-01, -3.335e-01, -2.226e-02, -1.509e-01)); + r += mul(s0_3, M4(-7.994e-02, 1.374e-01, -1.701e-02, -2.530e-01, 2.153e-01, -6.957e-03, -1.405e-01, -6.175e-02, 7.274e-03, 1.734e-01, -9.107e-02, -1.303e-01, -1.265e-01, 1.669e-02, 3.494e-02, -8.377e-02)); + r += mul(s0_4, M4(-1.124e+00, 1.355e-02, -1.979e-01, -4.092e-01, -1.276e-01, -1.096e-01, 5.949e-02, 1.073e-01, -4.780e-02, 1.378e-01, 1.905e-01, -9.525e-02, -5.999e-01, 1.274e-01, 8.416e-01, 2.483e-01)); + r += mul(s0_5, M4(3.312e-01, 2.036e-01, -5.231e-02, 5.357e-02, 1.666e-03, -2.102e-03, -3.213e-03, 4.747e-02, 1.130e-01, 3.492e-01, -1.263e-01, 4.100e-01, -5.859e-01, 4.875e-02, 2.227e-01, 3.127e-01)); + r += mul(s0_6, M4(-3.699e-02, 6.066e-02, 3.448e-03, -4.158e-03, -4.048e-03, -3.619e-02, -8.830e-02, -8.917e-03, 2.990e-02, 6.919e-03, 9.803e-02, 2.188e-02, 5.674e-02, -3.122e-02, -6.793e-02, 8.573e-02)); + r += mul(s0_7, M4(-1.255e-01, 1.754e-01, -1.332e-01, -1.124e-01, -2.163e-01, 1.552e-02, -7.485e-04, 4.194e-02, -1.899e-01, 1.334e-01, -1.721e-01, -3.487e-01, 3.847e-01, -3.823e-02, 1.121e-02, -7.128e-02)); + r += mul(s0_8, M4(7.152e-02, -1.631e-02, 4.810e-02, 1.435e-01, 3.881e-02, -3.596e-02, -7.544e-03, -1.071e-01, -8.509e-02, 1.110e-01, 8.542e-02, 1.980e-02, -1.134e-01, -7.967e-02, -1.586e-01, 2.511e-01)); + r += mul(s1_0, M4(2.326e-01, 4.791e-02, -1.996e-01, 1.352e-02, -9.909e-03, 1.117e-01, 2.198e-02, -6.683e-02, 1.356e-01, 2.830e-01, -8.418e-02, 2.137e-01, -1.401e-02, -7.056e-02, 5.360e-02, 6.243e-02)); + r += mul(s1_1, M4(7.739e-01, -3.172e-01, -2.031e-01, 2.054e-01, -1.263e-01, -7.571e-03, 8.090e-02, -1.372e-01, 1.053e-01, 2.982e-01, -6.235e-02, 1.452e-02, 1.973e-01, 9.233e-02, -1.067e-01, 1.088e-01)); + r += mul(s1_2, M4(-1.136e-01, -1.332e-01, -7.369e-02, 2.046e-01, -9.302e-02, 2.722e-02, 9.461e-02, -1.895e-01, 1.216e-02, 2.595e-01, 1.028e-01, 8.413e-02, -1.339e-01, -2.259e-01, -1.047e-01, 5.994e-02)); + r += mul(s1_3, M4(1.224e-01, -3.713e-02, -2.383e-01, -1.743e-01, -1.876e-01, 1.155e-01, 2.212e-01, -1.375e-01, 1.618e-01, 2.628e-01, -1.161e-01, -1.826e-01, 8.003e-02, -1.961e-02, -6.278e-02, -5.710e-02)); + r += mul(s1_4, M4(-2.647e-01, -1.603e-01, -7.731e-01, 1.958e-01, -4.093e-01, -1.110e-01, 3.352e-01, -3.093e-02, -6.201e-01, 3.073e-01, 3.779e-01, -2.733e-01, 4.035e-01, 1.230e-01, -1.606e-01, 9.421e-02)); + r += mul(s1_5, M4(1.981e-01, -8.801e-03, -9.874e-03, -4.003e-02, 2.686e-03, -1.346e-01, -1.813e-02, -1.003e-01, 1.561e-01, 3.252e-01, -1.189e-01, 2.014e-01, 1.343e-01, 4.088e-02, -9.918e-02, 1.025e+00)); + r += mul(s1_6, M4(-2.323e-02, 3.284e-02, -5.099e-03, -3.025e-02, -1.458e-02, -1.640e-02, 1.268e-01, -3.787e-02, 5.078e-02, 4.529e-02, 1.050e-02, -8.079e-03, -1.530e-02, -6.509e-02, -1.620e-01, 6.662e-02)); + r += mul(s1_7, M4(3.972e-02, 8.570e-02, -8.723e-02, -3.746e-02, -1.902e-01, 5.121e-02, 1.161e-01, -4.624e-02, -6.268e-02, 1.852e-01, -1.535e-01, -2.023e-01, 2.476e-01, -2.211e-02, -1.590e-01, -3.109e-02)); + r += mul(s1_8, M4(-8.025e-03, -4.798e-02, 5.162e-02, 6.616e-02, -2.416e-02, -5.815e-02, -1.334e-02, -1.029e-01, 5.381e-02, 1.539e-01, 4.511e-02, 1.426e-01, -5.511e-02, -9.311e-02, -3.072e-02, 1.572e-01)); + r += V4(3.240e-02, -1.989e-01, -2.700e-02, 6.578e-03); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(9.384e-02, 1.183e-01, 5.136e-02, -4.583e-01, -1.060e-01, 6.124e-02, -1.479e-01, -2.457e-01, -5.881e-02, 4.756e-03, -2.540e-02, -5.047e-02, -1.897e-01, 4.062e-02, 1.226e-02, 1.465e-01)); + r += mul(s0_1, M4(-1.890e-01, -9.535e-02, 2.627e-01, 3.224e-01, 1.050e-01, -3.922e-02, -3.551e-01, -2.632e-01, -2.349e-01, -5.605e-02, -2.856e-01, 4.331e-01, -2.614e-02, -6.027e-02, -3.236e-02, 2.873e-01)); + r += mul(s0_2, M4(-1.702e-01, 7.462e-02, 2.168e-01, 4.212e-01, 8.150e-03, 6.671e-02, -2.781e-01, -1.322e-01, -3.933e-02, 2.698e-02, -3.420e-01, -1.116e-02, -1.788e-02, 8.701e-03, -1.044e-01, 1.264e-01)); + r += mul(s0_3, M4(3.573e-01, -4.592e-02, 4.539e-01, 2.854e-01, -6.463e-01, -1.763e-01, 6.236e-01, 7.125e-02, 4.126e-01, -1.621e-02, 1.685e-02, 2.328e-01, -5.456e-01, -2.113e-01, 1.424e-01, 1.414e-01)); + r += mul(s0_4, M4(3.838e-01, -1.008e+00, 4.023e-01, 1.302e+00, -1.503e-01, 4.245e-02, 1.496e+00, -3.479e-01, -3.763e-01, -7.877e-01, 4.081e-01, -2.192e-01, -2.853e-01, 2.123e-01, -3.407e-01, 2.423e-01)); + r += mul(s0_5, M4(5.073e-03, -2.123e-01, 1.851e-01, 1.482e-01, -2.814e-01, 1.262e-01, 6.890e-01, -2.317e-01, 6.427e-02, -5.801e-02, -3.684e-02, 7.526e-02, 1.309e-02, -2.125e-02, -7.760e-02, 4.795e-02)); + r += mul(s0_6, M4(1.409e-01, -1.062e-01, 1.665e-01, 5.277e-01, 6.676e-01, -1.872e-01, 1.251e+00, 1.165e-01, -2.287e-02, -5.235e-02, -2.028e-03, -3.305e-02, -1.968e-01, 1.898e-01, -9.538e-02, -1.418e-01)); + r += mul(s0_7, M4(7.353e-02, -3.073e-01, 1.789e-01, 2.137e-01, -6.435e-01, -6.052e-01, 2.259e+00, 2.884e-02, 7.105e-04, 1.247e-01, -7.393e-02, 2.539e-02, 1.194e-01, 1.870e-01, -1.126e-01, 2.444e-02)); + r += mul(s0_8, M4(3.853e-02, -2.242e-01, 1.470e-01, 1.701e-02, 4.586e-02, 2.027e-01, 7.448e-01, -4.414e-01, 9.096e-03, 1.277e-01, 4.010e-02, 1.064e-02, 2.401e-02, 1.901e-02, 1.956e-02, 8.744e-02)); + r += mul(s1_0, M4(-4.741e-02, 1.819e-03, -8.321e-02, -1.496e-01, -1.801e-02, 4.682e-02, -6.041e-02, -7.243e-02, -1.478e-01, 4.970e-02, 6.424e-02, -5.378e-02, -9.117e-02, 5.496e-02, -2.648e-02, -4.042e-02)); + r += mul(s1_1, M4(-8.815e-02, 5.938e-02, -2.433e-01, 1.737e-01, 1.095e-01, -5.108e-02, -5.729e-02, 8.334e-03, -2.763e-01, -6.431e-02, -2.454e-02, 4.055e-01, 2.113e-02, -1.298e-01, -3.908e-02, -1.780e-02)); + r += mul(s1_2, M4(-1.905e-02, 3.894e-02, -1.293e-01, 8.303e-03, -7.800e-03, -5.508e-03, 8.606e-02, -7.501e-02, 1.542e-02, 3.046e-02, -2.920e-01, -4.240e-02, -3.932e-02, -1.813e-02, -8.213e-02, 1.017e-01)); + r += mul(s1_3, M4(1.965e-01, 3.626e-02, 3.418e-02, 9.779e-02, -6.664e-02, -2.295e-02, -2.736e-02, 1.091e-01, 1.129e-01, -3.896e-02, 1.171e-02, -2.870e-02, -1.382e-01, -1.691e-01, 3.018e-01, -1.186e-01)); + r += mul(s1_4, M4(1.075e-01, -6.894e-01, 1.714e-01, 5.097e-01, 9.868e-03, 1.087e-01, 2.107e-01, -6.591e-02, -3.233e-01, -9.792e-01, -1.189e-01, -5.480e-01, -1.157e-01, 5.941e-02, -5.770e-01, -1.030e-01)); + r += mul(s1_5, M4(3.289e-02, 3.941e-02, 1.824e-01, 7.260e-04, -9.787e-03, 3.128e-02, -1.333e-01, 1.352e-01, 5.954e-03, -2.520e-01, -8.536e-02, -3.566e-01, 2.998e-02, -5.941e-02, -8.531e-02, -4.232e-02)); + r += mul(s1_6, M4(2.592e-02, -7.528e-02, -1.956e-02, 1.002e-01, 2.992e-02, -1.673e-01, 4.413e-02, 1.683e-01, 1.440e-02, -1.047e-02, 1.425e-02, -1.292e-01, -1.777e-01, 1.220e-01, -6.381e-02, 4.174e-02)); + r += mul(s1_7, M4(-3.107e-02, -8.612e-02, 1.248e-02, -8.544e-02, -1.161e-01, 7.718e-02, -1.150e-01, -1.699e-01, -1.392e-02, 7.590e-02, -5.195e-02, -3.599e-01, 4.872e-02, 1.381e-01, -1.143e-01, -1.473e-03)); + r += mul(s1_8, M4(1.277e-02, 3.020e-02, 4.658e-02, 8.071e-02, 6.867e-02, -2.693e-02, 7.897e-02, -1.264e-02, -1.035e-03, 1.509e-01, 4.169e-02, -1.716e-01, -4.694e-03, 1.627e-02, 7.171e-03, -4.496e-02)); + r += V4(4.014e-03, -2.020e-02, 1.560e-02, -2.352e-02); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 5 +//!DESC conv4 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t1 +//!OUT t0 + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.174e-02, -2.020e-01, -6.843e-03, 1.049e-01, 1.680e-01, -6.387e-01, -1.541e-01, -1.952e-01, -4.586e-02, -1.580e-01, -5.507e-02, 1.065e-01, -5.257e-03, -9.464e-02, -9.788e-02, 1.221e-01)); + r += mul(s0_1, M4(1.365e-01, -4.220e-02, -4.186e-02, -1.569e-01, -5.527e-01, -1.180e-01, -2.274e-01, -2.007e-01, 2.207e-02, 1.190e-02, 3.746e-02, -1.565e-01, -2.808e-02, 1.657e-02, -5.376e-02, -1.093e-02)); + r += mul(s0_2, M4(-7.935e-02, -3.809e-02, -3.727e-02, -4.730e-02, -8.556e-02, 3.451e-04, -8.191e-02, 8.086e-02, 2.051e-02, 7.072e-03, 2.537e-02, 2.793e-02, 9.384e-04, -3.624e-02, -2.171e-02, 7.103e-02)); + r += mul(s0_3, M4(-1.261e-02, 2.716e-01, 2.739e-01, -7.349e-02, -2.130e-02, -4.131e-01, -1.851e-01, 1.065e-01, -7.827e-02, 2.868e-01, -1.500e-01, -1.442e-01, -1.842e-02, -2.983e-01, -4.232e-02, 1.395e-01)); + r += mul(s0_4, M4(2.733e-01, 4.015e-01, 4.102e-01, -2.027e-01, 4.229e-01, 2.213e-01, 3.628e-01, -1.011e-01, -4.893e-01, 1.333e-01, -4.245e-02, -8.133e-02, -1.086e-02, -1.089e-01, -8.720e-02, 1.513e-01)); + r += mul(s0_5, M4(8.521e-02, 1.460e-01, 1.589e-01, -2.075e-01, -5.391e-02, 7.449e-03, -6.763e-02, -2.352e-01, 4.055e-02, -1.812e-02, -1.413e-02, 9.240e-02, -3.070e-02, -4.975e-03, -8.972e-02, -2.225e-02)); + r += mul(s0_6, M4(1.880e-01, -1.481e-01, 1.001e-01, 6.339e-02, -6.208e-02, -2.814e-02, -5.944e-03, 1.002e-01, -7.822e-02, 1.010e-01, -2.161e-02, 9.175e-02, 1.495e-02, 1.645e-02, 8.901e-03, -3.865e-02)); + r += mul(s0_7, M4(4.449e-01, -1.089e-01, -1.249e-01, -8.911e-01, 3.096e-02, 1.724e-01, 5.605e-02, -7.605e-02, -9.644e-02, -1.191e-01, -1.332e-01, 2.544e-02, 5.659e-02, -2.706e-04, -9.886e-02, 9.218e-02)); + r += mul(s0_8, M4(7.394e-02, -2.112e-01, 1.505e-02, -1.236e-01, -1.848e-02, -2.716e-02, -6.663e-02, 2.764e-02, -1.120e-02, 3.440e-03, -1.443e-02, 1.745e-02, -3.847e-02, -4.228e-03, -8.888e-02, 2.134e-02)); + r += mul(s1_0, M4(6.588e-03, -6.764e-02, -2.660e-02, -3.967e-02, 6.459e-02, -6.345e-01, -5.784e-01, 9.294e-02, 2.426e-02, -9.858e-02, -9.036e-02, -9.545e-02, 2.094e-02, -1.001e-01, -1.145e-01, -6.470e-02)); + r += mul(s1_1, M4(-2.633e-03, 5.849e-02, 3.154e-02, -7.386e-02, -6.412e-01, -4.405e-01, -5.885e-01, 1.657e-01, -1.757e-01, -1.882e-02, -1.023e-01, -1.713e-01, -1.047e-01, -1.558e-01, -1.509e-01, -2.815e-01)); + r += mul(s1_2, M4(1.880e-01, -3.790e-02, 1.112e-01, 1.672e-02, -1.713e-01, 2.611e-02, -9.008e-02, 9.359e-02, -6.567e-02, 9.399e-02, 3.743e-02, 3.662e-02, 3.190e-02, -1.466e-01, -1.154e-01, 1.692e-02)); + r += mul(s1_3, M4(-1.733e-02, 1.381e-01, 8.342e-02, -5.893e-02, -1.467e-02, -4.365e-01, -3.057e-01, 1.506e-01, 7.300e-02, 6.777e-01, -5.484e-03, -3.499e-01, 1.978e-01, -6.846e-01, -2.921e-01, -1.173e-01)); + r += mul(s1_4, M4(-1.829e-01, -4.506e-01, -5.685e-02, 8.260e-01, 3.056e-01, 1.803e-01, 1.908e-01, -2.029e-01, -1.578e-01, 5.039e-01, 3.016e-01, -4.971e-01, -4.977e-01, 4.537e-01, -4.268e-01, 7.878e-01)); + r += mul(s1_5, M4(-3.251e-01, -1.229e-01, -1.447e-01, 3.290e-01, -2.134e-01, -6.542e-03, -7.109e-02, -1.004e-01, 3.887e-02, -1.008e-01, -7.490e-02, 6.126e-02, 2.757e-01, -1.980e-01, -1.792e-01, 2.722e-01)); + r += mul(s1_6, M4(4.765e-02, -5.401e-02, 4.164e-02, 1.847e-03, -3.178e-02, -4.201e-02, -2.504e-02, 1.350e-02, -1.436e-01, 1.654e-01, -1.099e-02, -3.733e-02, 1.118e-01, -2.529e-01, -1.353e-01, -9.309e-02)); + r += mul(s1_7, M4(1.684e-01, -1.978e-01, 2.645e-02, -9.582e-02, 2.618e-02, 9.350e-02, -2.281e-02, -1.901e-01, 1.176e-02, -1.571e-01, 1.491e-02, -2.105e-01, -1.685e-01, -2.459e-01, -2.166e-01, 1.082e-01)); + r += mul(s1_8, M4(2.225e-02, 7.813e-02, -4.112e-02, 6.166e-02, -4.143e-02, -2.160e-02, -7.478e-02, -2.251e-02, -1.306e-02, -6.002e-02, -7.496e-02, -2.538e-03, 7.824e-02, 9.597e-02, -3.546e-03, -1.794e-01)); + r += V4(-5.942e-03, -2.718e-02, -1.234e-02, 3.307e-02); + return r; +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 6 +//!DESC conv5 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.069e-01, 1.009e-01, -5.972e-02, -1.732e-02, -9.217e-02, 9.177e-03, -3.127e-02, -5.872e-02, -1.364e-02, -9.990e-04, 1.518e-01, 5.861e-02, -9.835e-02, -1.155e-01, 6.714e-02, -5.142e-02)); + r += mul(s0_1, M4(1.404e-02, 1.372e-01, -2.759e-01, -4.361e-02, -1.407e-01, 1.570e-01, -1.216e-01, -7.289e-02, 3.088e-01, -1.285e-01, 1.107e-01, 1.651e-01, 1.596e-01, -1.569e-01, 1.437e-02, -1.455e-01)); + r += mul(s0_2, M4(-4.001e-02, 1.772e-01, -2.761e-01, 4.916e-02, -1.489e-01, 1.680e-01, -5.244e-02, 1.334e-01, 1.245e-01, -2.321e-01, 5.371e-01, -2.549e-01, -9.624e-02, -1.072e-01, 2.322e-01, -2.261e-01)); + r += mul(s0_3, M4(-2.291e-01, 7.774e-04, -1.015e-02, 6.036e-02, -1.133e-01, 7.554e-02, 1.081e-01, 1.704e-01, 2.123e-01, -2.065e-01, 4.928e-02, 2.352e-03, -2.488e-01, -1.765e-01, 2.044e-01, 1.302e-02)); + r += mul(s0_4, M4(3.195e-01, -5.410e-01, -4.771e-01, -1.713e-01, 2.778e-01, -1.028e-01, 8.603e-02, 2.162e-01, 1.466e-02, 2.633e-02, -3.299e-01, -5.183e-02, -3.598e-01, -4.015e-01, 5.674e-02, -1.429e-01)); + r += mul(s0_5, M4(-1.480e-01, 2.440e-01, -2.189e-01, 1.407e-01, -3.439e-01, 2.624e-01, 4.947e-01, 7.813e-01, 1.067e-01, -6.781e-02, -5.271e-02, -1.331e-02, -2.133e-01, -1.038e-01, 4.267e-01, -4.026e-01)); + r += mul(s0_6, M4(-1.086e-01, 2.607e-01, -1.897e-01, -1.710e-01, 6.096e-02, -1.121e-01, 8.797e-02, -8.204e-02, 4.825e-02, -9.364e-02, 8.472e-02, -1.923e-02, -1.755e-01, 1.086e-01, -3.987e-02, 1.737e-02)); + r += mul(s0_7, M4(5.606e-02, 4.516e-02, -7.352e-02, 7.654e-02, -6.706e-02, 2.674e-01, -2.388e-01, -1.997e-01, 9.871e-02, -9.055e-02, 1.274e-01, 1.854e-01, -1.765e-01, -1.779e-01, 1.114e-01, -1.882e-01)); + r += mul(s0_8, M4(-4.811e-02, 2.057e-01, -2.913e-01, 1.265e-01, 1.304e-01, 1.462e-01, -4.432e-03, 4.191e-01, 6.606e-02, -1.382e-01, 1.052e-01, -3.990e-01, 9.737e-02, -9.675e-02, 6.216e-02, -2.130e-01)); + r += mul(s1_0, M4(-1.183e-01, -5.696e-02, 9.372e-02, 3.074e-03, -2.694e-02, -2.272e-02, -3.489e-02, -2.667e-02, 1.635e-01, -5.761e-04, -1.677e-03, -1.076e-01, -5.411e-02, -1.100e-02, 1.742e-02, 6.403e-02)); + r += mul(s1_1, M4(-4.462e-03, 3.912e-02, -1.208e-01, -9.360e-02, -1.260e-01, 1.602e-02, -1.047e-01, -1.252e-01, 2.940e-01, 1.068e-01, -2.602e-01, 1.692e-01, 1.120e-01, -2.613e-02, -1.083e-02, 1.754e-02)); + r += mul(s1_2, M4(2.307e-02, 1.240e-01, -2.024e-01, 1.761e-01, -2.326e-01, 3.209e-02, 5.352e-02, 3.399e-02, 1.754e-01, -3.059e-01, 4.554e-01, -2.412e-01, 4.242e-03, 3.919e-02, 7.769e-02, -1.155e-01)); + r += mul(s1_3, M4(-1.946e-01, -9.445e-02, 1.698e-01, 1.165e-01, -1.571e-01, 1.700e-02, 5.682e-02, 4.628e-02, 4.425e-01, -1.872e-01, 3.713e-02, 8.537e-02, 4.211e-02, -6.178e-02, 1.398e-02, 5.929e-02)); + r += mul(s1_4, M4(5.957e-01, -6.855e-01, -3.668e-01, -2.565e-01, -4.383e-02, -8.094e-02, -2.101e-02, -2.446e-01, -7.781e-02, 5.879e-01, -5.272e-01, -1.786e-01, -2.396e-01, -4.148e-01, 5.226e-02, 9.011e-02)); + r += mul(s1_5, M4(-4.655e-02, 1.107e-01, -1.109e-01, 3.601e-01, -2.103e-01, 3.712e-01, 1.666e-01, 3.972e-01, -2.227e-02, -2.115e-02, -7.054e-02, -1.216e-01, 4.739e-03, 1.201e-01, 1.335e-01, -1.775e-01)); + r += mul(s1_6, M4(-7.542e-02, 9.157e-02, 1.143e-02, -7.961e-02, -3.812e-02, 1.722e-02, 1.396e-02, -3.920e-02, -6.220e-03, -6.723e-02, 9.364e-02, -4.804e-02, -8.885e-02, 1.313e-01, -7.872e-02, 2.733e-02)); + r += mul(s1_7, M4(-3.879e-01, -2.705e-01, 3.305e-01, -1.542e-01, -1.179e-01, 9.695e-02, -1.353e-01, -2.320e-01, 1.433e-02, -2.689e-01, 2.066e-01, 3.704e-01, -5.587e-02, -6.296e-02, 6.326e-02, 1.881e-02)); + r += mul(s1_8, M4(-4.722e-02, -5.909e-02, 4.089e-02, -8.851e-02, 2.017e-01, -2.652e-02, 9.432e-02, 3.252e-01, -2.219e-01, 2.142e-02, -4.496e-02, 5.456e-02, 2.364e-02, 1.081e-01, -9.898e-02, 9.928e-02)); + r += V4(1.102e-03, 4.481e-03, 3.096e-03, -9.818e-03); + return r; +} + +void Pass6(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 7 +//!DESC conv6 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t1 +//!OUT t0 + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.488e-02, 3.507e-02, 3.848e-02, -5.906e-02, 9.669e-02, 3.121e-02, -2.182e-02, 1.691e-01, -1.132e-01, -7.602e-02, -5.000e-02, -6.017e-03, 3.962e-02, 1.086e-01, -3.343e-04, 9.002e-02)); + r += mul(s0_1, M4(9.453e-02, -1.793e-01, -6.074e-02, 5.317e-03, 1.056e-01, 3.460e-01, 5.291e-02, 7.825e-02, 5.510e-02, 4.818e-02, -1.119e-02, 3.913e-02, -8.177e-02, -1.060e-01, -9.989e-03, -9.245e-02)); + r += mul(s0_2, M4(-8.190e-02, 1.375e-01, -4.322e-02, -6.721e-02, 1.645e-02, -1.392e-01, 7.103e-02, -1.950e-02, 4.302e-03, -3.213e-02, -7.517e-03, -3.406e-03, -2.132e-02, 1.333e-01, -6.553e-02, 7.300e-02)); + r += mul(s0_3, M4(-1.102e-01, 3.005e-01, -8.521e-02, 3.002e-01, 1.866e-01, 1.089e-01, -2.968e-02, 1.271e-01, -3.566e-01, 1.224e-01, -7.462e-02, -2.765e-01, 5.175e-02, 1.567e-01, 1.450e-01, -1.948e-01)); + r += mul(s0_4, M4(1.558e-01, 3.780e-02, 9.697e-02, -2.485e-01, -3.560e-01, -3.667e-01, 1.396e-01, 1.020e+00, -2.319e-01, -2.878e-01, -2.849e-01, 5.648e-01, 2.094e-01, -5.684e-01, 1.482e-01, -6.172e-01)); + r += mul(s0_5, M4(-1.276e-01, -1.685e-01, 4.271e-01, -1.489e-01, 2.154e-01, 2.661e-01, -1.093e-01, -7.859e-02, 6.618e-02, 9.795e-02, 2.778e-02, -1.286e-01, -1.527e-01, -3.586e-01, 2.523e-01, 9.196e-02)); + r += mul(s0_6, M4(-1.354e-01, -6.680e-02, 5.541e-02, -5.314e-02, 1.639e-02, -1.639e-01, -1.856e-01, -1.863e-01, -1.519e-01, -5.459e-02, 1.027e-01, 6.492e-02, 3.482e-02, -9.074e-03, 1.861e-01, 1.393e-01)); + r += mul(s0_7, M4(1.907e-02, 1.189e-02, -5.038e-01, -8.478e-02, 3.643e-01, 1.086e-02, 3.067e-01, 1.071e-01, -6.552e-01, 1.505e-01, -7.394e-01, 1.155e-01, -1.815e-01, -1.739e-02, -2.723e-01, -1.607e-01)); + r += mul(s0_8, M4(-8.319e-02, -2.563e-02, -1.127e-01, -7.792e-02, 1.295e-01, 1.091e-01, 2.920e-02, -5.761e-02, -9.443e-02, 7.429e-03, -2.117e-01, -3.670e-02, -7.118e-02, -4.469e-02, -6.460e-02, -1.261e-02)); + r += mul(s1_0, M4(2.400e-02, -2.740e-02, -3.394e-02, 5.817e-02, -6.716e-02, -5.672e-02, -7.339e-02, -3.921e-02, -9.506e-02, -3.805e-02, -3.235e-02, -8.145e-02, 1.265e-02, 7.308e-02, -5.707e-02, 1.141e-01)); + r += mul(s1_1, M4(-1.565e-01, 1.052e-01, -8.934e-02, -6.945e-02, 3.804e-02, 2.091e-01, -1.102e-01, 2.394e-01, 6.041e-02, -9.942e-02, -6.054e-03, 4.857e-02, -7.265e-02, 1.596e-02, 9.135e-02, -8.397e-02)); + r += mul(s1_2, M4(-9.449e-02, 1.121e-01, -1.101e-01, -2.980e-02, 5.100e-02, -6.337e-02, 1.692e-01, -5.062e-02, -3.931e-02, 1.083e-01, 3.952e-03, 9.801e-04, -6.425e-02, 8.015e-02, -1.628e-01, 8.317e-02)); + r += mul(s1_3, M4(7.400e-02, 8.412e-02, 2.984e-02, 8.693e-02, -1.474e-01, -3.529e-02, -6.134e-02, -1.107e-01, -3.264e-01, 8.009e-02, -2.261e-01, -1.472e-01, -4.683e-02, -1.258e-01, 1.061e-01, -1.125e-01)); + r += mul(s1_4, M4(4.970e-01, -1.211e-01, 2.379e-01, 2.124e-01, -1.003e-01, -5.656e-01, 5.001e-02, 4.959e-01, 1.538e-01, -7.985e-01, -2.085e-01, 2.220e-01, 7.247e-02, 6.581e-02, -9.437e-02, -3.066e-01)); + r += mul(s1_5, M4(-8.611e-02, -9.199e-02, 2.518e-01, -7.482e-02, -1.208e-01, 1.015e-01, 3.428e-02, -1.354e-01, 1.038e-01, -4.497e-02, 2.744e-01, -4.281e-02, 4.090e-02, -2.726e-01, 1.839e-01, 1.138e-01)); + r += mul(s1_6, M4(-8.703e-02, -4.776e-02, -1.477e-01, -1.870e-02, -1.072e-01, 3.204e-02, -8.396e-03, 1.175e-01, 1.685e-01, -1.427e-01, 2.152e-01, -2.155e-01, 1.898e-03, 5.924e-02, -1.089e-02, 5.197e-02)); + r += mul(s1_7, M4(-9.679e-02, 1.961e-02, 1.636e-01, -6.049e-02, -7.071e-03, 1.519e-01, -6.303e-01, 4.739e-02, -3.331e-01, 8.291e-02, -5.944e-01, -7.677e-02, -1.164e-01, -4.580e-02, 1.419e-01, 6.839e-02)); + r += mul(s1_8, M4(6.174e-02, -4.004e-02, 1.256e-02, -4.981e-02, 4.659e-03, 8.371e-02, -1.664e-01, -2.897e-02, -1.253e-01, 2.381e-02, -1.147e-01, -8.724e-02, -3.736e-02, 1.140e-02, -1.550e-01, 6.350e-03)); + r += V4(-6.918e-03, -1.945e-03, -7.751e-03, 1.645e-02); + return r; +} + +void Pass7(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 8 +//!DESC conv7 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-4.116e-02, -3.385e-02, -4.697e-02, 4.650e-02, -3.488e-02, 1.006e-01, -4.538e-03, 4.637e-02, 1.288e-01, 7.769e-03, 1.150e-01, -7.930e-03, 1.045e-02, 4.849e-02, 2.767e-02, 4.909e-02)); + r += mul(s0_1, M4(-5.332e-01, -5.254e-01, -3.541e-01, -3.525e-01, 1.117e-02, 2.929e-02, 6.817e-02, 9.115e-02, 1.055e+00, 6.141e-02, 3.976e-01, 4.649e-02, 2.561e-01, -1.191e-01, 1.230e-03, -1.047e-01)); + r += mul(s0_2, M4(-1.450e-01, -2.829e-01, -6.857e-01, -4.294e-01, 3.217e-02, 2.745e-02, 5.242e-02, 3.556e-02, 1.284e-01, 4.292e-01, 7.161e-01, 2.220e-01, -1.508e-02, 1.802e-01, 1.842e-01, 9.827e-02)); + r += mul(s0_3, M4(1.381e-01, 5.690e-02, 5.107e-02, 6.625e-02, -1.173e-01, -7.448e-02, -1.152e-01, -1.808e-01, -1.470e-01, -1.833e-01, -1.653e-01, -1.217e-01, 9.096e-02, 5.579e-02, 1.128e-02, 9.791e-02)); + r += mul(s0_4, M4(5.936e-01, -2.579e-01, 5.761e-01, -7.051e-01, -7.023e-01, 2.824e-01, 2.057e-01, 3.628e-01, 1.006e-02, 3.209e-01, 6.969e-02, -3.464e-01, 4.768e-01, -3.194e-01, -4.817e-02, 3.050e-02)); + r += mul(s0_5, M4(2.145e-01, -1.899e-01, 1.446e-01, 2.497e-02, -8.750e-02, -3.154e-01, -5.060e-01, -7.413e-02, -8.542e-02, -4.198e-02, -1.528e-01, -1.812e-01, -2.597e-01, 8.374e-02, -5.592e-01, -2.557e-01)); + r += mul(s0_6, M4(5.713e-02, -4.294e-03, 2.388e-02, -7.124e-02, -2.163e-02, -3.642e-03, 3.839e-02, -6.934e-02, -9.052e-02, -1.153e-02, 1.213e-02, 7.120e-02, -3.698e-02, 4.260e-02, -7.245e-02, 7.898e-02)); + r += mul(s0_7, M4(2.780e-02, 1.944e-02, 1.415e-01, 1.216e-01, 9.163e-02, -3.069e-02, -1.829e-02, -2.182e-01, 5.815e-02, -1.923e-02, -5.934e-02, -3.487e-02, -1.082e-01, 1.362e-01, 8.120e-02, 2.621e-01)); + r += mul(s0_8, M4(9.334e-03, -1.300e-02, 4.936e-02, 1.751e-01, -1.214e-01, 1.629e-02, -1.131e-01, 7.402e-02, 1.134e-02, 1.663e-03, -5.887e-03, -8.862e-02, 1.029e-01, -5.629e-02, 9.127e-02, -6.668e-02)); + r += mul(s1_0, M4(1.165e-02, 4.389e-02, 6.299e-03, 7.939e-02, -2.769e-02, 9.353e-02, 6.239e-02, 1.341e-02, 4.713e-02, -2.731e-03, 5.256e-02, -3.515e-02, -8.911e-02, -1.425e-01, -7.889e-02, -1.627e-01)); + r += mul(s1_1, M4(-2.869e-01, 2.838e-02, -2.541e-02, 5.216e-02, 2.660e-01, -2.095e-01, 1.375e-01, -2.562e-02, 2.715e-01, 1.694e-01, 9.471e-02, -7.292e-03, 3.257e-01, -2.247e-01, 7.698e-03, -2.076e-01)); + r += mul(s1_2, M4(1.832e-02, -1.860e-01, -4.951e-02, -1.392e-03, 9.307e-02, 7.671e-02, 1.043e-01, -3.675e-02, 1.433e-03, 1.219e-01, 1.978e-01, 5.960e-02, 9.624e-02, 1.448e-01, 3.561e-01, 3.054e-02)); + r += mul(s1_3, M4(-4.647e-02, 4.225e-03, 3.830e-02, -3.233e-02, -1.532e-01, -6.289e-01, -3.037e-01, -4.131e-01, -1.794e-01, -4.090e-02, -9.644e-02, -4.828e-02, 7.978e-02, 6.792e-03, 5.043e-02, 4.905e-02)); + r += mul(s1_4, M4(2.967e-01, -5.750e-02, 1.168e-01, -2.681e-02, 1.232e-01, -2.481e-03, 8.164e-01, 2.468e-01, -3.721e-01, -5.041e-02, -4.796e-01, -2.778e-02, 3.623e-01, -8.387e-01, -5.229e-01, -4.492e-01)); + r += mul(s1_5, M4(-1.507e-02, 1.343e-01, 8.567e-02, 8.923e-02, -2.766e-03, -1.548e-01, -2.588e-01, -1.295e-01, 1.777e-02, -9.243e-02, -4.495e-02, -5.528e-02, -1.071e-01, -1.284e-01, -4.142e-01, -1.800e-01)); + r += mul(s1_6, M4(-4.695e-03, -9.431e-04, -1.256e-02, -4.959e-03, 1.607e-01, -8.763e-02, 2.039e-01, -1.243e-01, 3.725e-02, -5.612e-02, -3.615e-03, -2.475e-02, 4.955e-02, 4.065e-02, -1.879e-02, -1.195e-01)); + r += mul(s1_7, M4(-1.039e-02, 5.631e-02, 2.655e-02, 7.419e-02, 1.286e-01, -6.430e-02, 4.800e-02, -4.480e-02, 5.067e-03, -4.197e-02, -3.342e-02, -7.461e-02, -3.225e-02, 6.062e-03, 5.391e-02, -7.135e-02)); + r += mul(s1_8, M4(5.195e-02, 3.799e-02, 1.130e-01, -8.811e-03, -4.285e-02, 1.609e-02, -8.972e-03, 3.530e-02, -6.932e-02, -3.013e-03, -5.208e-02, 5.823e-02, -4.561e-02, -1.068e-01, -1.458e-01, -5.739e-02)); + r += V4(1.569e-02, 1.505e-02, 2.765e-02, 1.258e-02); + return r; +} + +void Pass8(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 9 +//!DESC conv8 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t1 +//!OUT t0 + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(4.724e-03, 6.987e-03, -3.797e-03, 2.147e-02, -5.616e-03, 1.123e-02, -2.768e-02, 8.185e-03, -4.051e-03, 5.608e-05, -9.522e-02, 2.924e-02, -5.976e-03, 8.331e-03, 7.513e-02, -2.513e-02)); + r += mul(s0_1, M4(-2.224e-02, 1.093e-04, 5.901e-02, 2.350e-02, 1.167e-01, -7.837e-02, 1.939e-01, 1.987e-01, 5.530e-02, -4.759e-05, 1.221e-01, 4.764e-02, -8.813e-02, 7.695e-02, -4.577e-01, 1.671e-02)); + r += mul(s0_2, M4(-5.696e-02, 6.005e-03, -5.620e-02, -8.978e-02, 4.014e-02, -3.822e-02, 1.081e-01, -6.532e-03, 9.444e-03, 7.498e-03, -3.228e-02, 4.908e-02, -2.043e-02, 2.374e-02, 2.163e-02, -4.505e-02)); + r += mul(s0_3, M4(-8.098e-02, 1.943e-02, -5.744e-02, 3.824e-02, -2.071e-01, 1.036e-01, -6.926e-02, -2.348e-01, 2.378e-01, -1.069e-01, -5.307e-02, 1.161e-01, 1.881e-01, -5.785e-02, -6.570e-02, 2.227e-01)); + r += mul(s0_4, M4(7.577e-02, -4.125e-02, 1.714e-01, -6.934e-01, -2.448e-01, 1.146e-01, 2.354e-01, -4.935e-01, -2.321e-01, -8.273e-02, 5.890e-02, 5.704e-01, 4.833e-02, 2.875e-02, 1.163e-01, -1.802e-01)); + r += mul(s0_5, M4(2.287e-01, -3.461e-02, -2.542e-02, 2.882e-02, 7.142e-02, -1.556e-01, 4.055e-02, 1.534e-02, -1.647e-01, 3.087e-03, -6.811e-02, -3.896e-02, 1.334e-01, 1.188e-01, -1.847e-01, 4.293e-02)); + r += mul(s0_6, M4(-3.094e-02, 2.712e-03, 3.387e-03, 1.877e-02, 9.494e-02, -2.863e-02, -4.239e-02, -3.402e-02, 5.541e-03, -1.178e-02, 1.795e-02, -3.515e-02, -3.044e-02, -2.463e-02, -1.320e-02, 8.952e-02)); + r += mul(s0_7, M4(1.035e-01, -3.181e-02, 1.902e-02, 3.973e-03, 2.267e-01, -2.620e-01, 1.821e-01, 1.631e-01, 1.494e-02, 6.125e-02, -6.176e-02, -2.497e-02, -1.364e-02, 7.542e-02, -8.480e-02, -4.648e-02)); + r += mul(s0_8, M4(-1.466e-01, 3.028e-02, 2.798e-02, -7.887e-02, -4.370e-02, 1.408e-02, -6.161e-02, -3.034e-02, 6.567e-02, 2.071e-02, 3.126e-02, 6.993e-02, -5.556e-02, 1.507e-02, 2.991e-02, -4.924e-02)); + r += mul(s1_0, M4(1.637e-02, -2.767e-02, 8.568e-02, -4.254e-02, 3.215e-02, 1.987e-04, -3.697e-02, 3.787e-02, 2.236e-02, -6.576e-02, 7.400e-02, 1.093e-01, 3.271e-03, 1.809e-03, 1.011e-02, 1.509e-01)); + r += mul(s1_1, M4(5.538e-02, -5.865e-02, 4.351e-01, 2.494e-01, 1.101e-01, -1.484e-02, 5.176e-01, 3.999e-02, -4.782e-03, 1.155e-01, -2.099e-01, 5.012e-03, -1.919e-01, 2.292e-01, -5.378e-01, -1.223e-01)); + r += mul(s1_2, M4(-5.691e-02, 7.653e-02, -2.572e-01, -1.332e-01, -5.652e-02, -5.008e-02, 7.840e-02, -3.729e-02, 6.942e-02, 6.483e-04, -2.243e-05, 8.430e-02, -6.848e-02, -2.096e-02, -3.908e-02, -9.062e-02)); + r += mul(s1_3, M4(2.725e-01, -1.841e-01, -6.710e-03, 3.965e-01, -1.298e-01, -4.014e-03, 2.007e-01, -3.700e-01, 5.329e-01, -4.014e-01, 2.619e-02, 1.606e-01, 2.179e-01, -1.403e-01, 4.227e-02, 8.568e-02)); + r += mul(s1_4, M4(4.188e-01, -7.320e-01, -5.609e-01, -6.087e-01, -7.521e-01, 7.363e-01, -6.253e-01, -2.011e-01, -1.017e+00, 3.331e-02, -2.135e-02, 2.084e-01, 6.074e-01, -9.824e-01, 5.154e-01, 1.748e-01)); + r += mul(s1_5, M4(1.733e-01, 5.176e-01, -7.335e-02, 1.899e-02, 1.028e-01, -6.330e-02, -1.632e-01, 9.241e-05, -1.357e-01, -1.131e-01, 9.644e-02, -1.424e-02, -1.835e-02, 7.296e-01, -3.204e-01, -2.966e-02)); + r += mul(s1_6, M4(4.798e-02, -1.047e-01, 3.646e-02, 6.703e-02, 5.371e-02, 4.759e-02, -2.975e-02, -6.945e-02, 7.985e-02, -1.101e-01, 3.034e-02, -1.472e-02, -3.827e-02, 9.839e-03, -4.922e-03, 4.307e-03)); + r += mul(s1_7, M4(-8.950e-02, -1.253e-02, -1.730e-05, 3.862e-02, 2.692e-01, -4.645e-01, 2.399e-01, 2.744e-01, -4.503e-02, 1.724e-01, -7.935e-02, -5.200e-02, -2.132e-03, -1.926e-02, 2.926e-02, -2.288e-02)); + r += mul(s1_8, M4(-1.072e-01, -1.145e-02, 6.605e-03, -1.090e-01, -7.524e-03, 8.598e-02, -7.698e-02, -6.976e-02, 5.869e-02, -5.499e-02, 3.529e-02, 7.813e-02, -1.794e-01, 4.212e-02, -4.479e-03, -7.253e-02)); + r += V4(8.112e-05, 3.290e-03, -6.342e-04, 1.340e-02); + return r; +} + +void Pass9(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 10 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.348e-01, -9.107e-02, -4.849e-02, 4.484e-04, -3.384e-02, -6.768e-02, -9.628e-03, -1.766e-02, -9.939e-03, -2.182e-02, -1.288e-02, 8.518e-03, 2.218e-02, -1.184e-03, 1.240e-03, 1.065e-02)); + r += mul(s0_1, M4(7.301e-02, 1.014e-01, -1.363e-02, -4.850e-02, -2.842e-01, -3.060e-02, -4.154e-02, 4.057e-03, -3.458e-02, -6.335e-02, -2.660e-02, -1.335e-02, -7.944e-03, 8.560e-03, 4.588e-02, 7.580e-03)); + r += mul(s0_2, M4(-9.349e-05, -2.142e-02, -1.258e-04, -9.330e-03, -5.058e-03, 3.912e-02, -2.976e-02, 2.410e-02, -8.512e-03, 4.954e-02, -2.093e-02, -2.582e-03, 1.648e-02, 7.942e-03, 1.520e-02, 3.414e-02)); + r += mul(s0_3, M4(-1.021e-01, -1.140e-01, 2.412e-01, 1.289e-02, -1.192e-01, -1.140e-01, 2.395e-01, 6.930e-03, -2.027e-01, -4.824e-02, 1.243e-01, 3.820e-03, 9.280e-03, 2.866e-02, 2.106e-02, 3.644e-03)); + r += mul(s0_4, M4(8.727e-02, 8.162e-02, 1.478e-01, 5.348e-01, -3.115e-01, -2.605e-02, 1.510e-01, 7.249e-01, -8.110e-02, -6.698e-01, 1.080e-01, -8.090e-02, -3.492e-01, -1.891e-01, -1.877e-01, -1.319e-01)); + r += mul(s0_5, M4(-5.622e-03, 2.237e-02, -4.008e-03, -1.980e-02, -1.837e-02, -3.311e-02, 4.289e-02, 3.256e-02, -2.178e-02, 2.653e-02, -1.722e-03, 8.373e-02, -8.042e-02, -2.962e-01, -4.643e-03, -6.865e-02)); + r += mul(s0_6, M4(6.248e-03, -2.320e-02, 1.883e-03, -1.430e-02, 1.224e-02, 5.634e-03, -1.964e-02, -1.627e-02, 2.010e-02, 1.174e-02, -3.919e-02, 9.559e-04, 3.016e-02, -2.836e-03, 7.667e-02, 3.552e-02)); + r += mul(s0_7, M4(-6.141e-03, 1.380e-02, 1.024e-02, -1.210e-02, 4.548e-02, 3.626e-02, -9.142e-02, -7.666e-02, -3.241e-02, -2.296e-02, -3.244e-02, -2.870e-01, 4.427e-02, 8.899e-02, -1.327e-01, 4.920e-02)); + r += mul(s0_8, M4(-2.801e-03, -9.930e-04, -2.770e-03, 1.623e-02, 2.158e-03, -1.258e-02, -3.089e-02, 3.211e-02, -9.620e-03, 1.776e-02, -4.337e-03, 4.676e-02, 1.130e-02, -7.436e-03, -3.572e-02, -1.742e-01)); + r += mul(s1_0, M4(-4.306e-02, -6.039e-02, -1.642e-02, -1.966e-02, -5.996e-02, -1.743e-01, -3.128e-02, 1.714e-02, -4.357e-03, -7.720e-03, -4.532e-03, 4.571e-03, 3.988e-02, 2.067e-02, 1.548e-02, -2.964e-04)); + r += mul(s1_1, M4(-6.070e-02, -9.324e-02, 7.472e-03, 2.173e-02, -7.996e-02, -5.139e-02, -5.545e-02, -1.891e-02, -1.767e-02, -1.527e-02, -2.906e-02, 1.310e-02, 2.594e-02, 7.495e-02, -7.681e-03, -4.678e-03)); + r += mul(s1_2, M4(4.883e-02, -3.167e-02, 2.862e-02, 3.357e-02, -8.454e-03, 9.265e-03, -1.657e-02, -8.086e-03, -1.170e-02, -3.549e-02, 7.437e-03, 1.425e-02, 1.441e-02, -1.961e-02, 1.560e-02, -1.122e-02)); + r += mul(s1_3, M4(-6.156e-02, -6.763e-02, 1.987e-01, 2.459e-02, -5.710e-02, -2.009e-01, 4.581e-01, -1.181e-02, -9.054e-02, -5.658e-02, 3.432e-02, 2.004e-02, 6.965e-03, -1.655e-02, 5.178e-03, -1.236e-02)); + r += mul(s1_4, M4(-1.301e-01, -5.093e-02, 7.676e-01, 6.003e-01, -9.216e-02, -2.228e-03, 7.034e-02, 1.851e-01, -5.318e-01, -1.852e-01, -2.980e-02, 1.919e-02, -8.147e-01, -1.773e-01, -2.675e-01, 2.314e-02)); + r += mul(s1_5, M4(-9.962e-03, -1.888e-01, -1.877e-02, 1.190e-01, -2.283e-02, -1.241e-02, 2.969e-04, 3.894e-02, -5.077e-02, 2.300e-01, -6.192e-02, 1.793e-01, 5.384e-03, -4.378e-01, 2.970e-02, -1.125e-01)); + r += mul(s1_6, M4(1.376e-02, -2.891e-03, 9.292e-03, -1.288e-03, 2.615e-02, 2.656e-02, -8.111e-02, -1.779e-02, -7.512e-03, 9.174e-03, -4.553e-02, -1.139e-02, 2.259e-02, 4.351e-03, 4.963e-02, 2.002e-02)); + r += mul(s1_7, M4(9.993e-03, 1.250e-02, -2.090e-02, 6.839e-03, 3.502e-03, 2.070e-03, -5.530e-02, -2.855e-03, 1.144e-02, -4.191e-02, -8.395e-02, -2.056e-01, 6.909e-02, 7.425e-02, -2.374e-01, 8.636e-02)); + r += mul(s1_8, M4(-1.762e-02, -3.804e-04, 2.643e-02, 4.383e-02, 1.748e-03, -1.201e-02, -8.452e-03, -1.216e-02, -1.203e-02, -3.454e-02, -1.957e-02, 2.212e-01, -1.375e-02, -1.094e-02, -1.245e-02, -2.124e-01)); + r += V4(3.107e-03, 3.655e-03, 5.416e-04, 5.397e-04); + return tanh(r); +} + +void Pass10(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-8x4C-NVL.hlsl b/src/Effects/CuNNy/CuNNy-8x4C-NVL.hlsl new file mode 100644 index 000000000..cec1ad5a7 --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-8x4C-NVL.hlsl @@ -0,0 +1,778 @@ +// CuNNy 8x4C BILINEAR RGB NVL - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-D04N08 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0 + +#define l0(x, y) min16float((dot(float3(2.666e-01, 5.050e-01, 1.135e-01), O(INPUT, float2(x, y)).rgb) + -8.258e-01)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-2.544e-02, -4.130e-01, -2.634e-01, 2.417e-02) * s0_0; + r += V4(1.256e-02, -8.013e-02, 9.539e-02, -7.111e-02) * s0_1; + r += V4(1.768e-02, -2.469e-01, -1.627e-01, 8.569e-02) * s0_2; + r += V4(-1.554e-01, 3.441e-02, -1.508e-01, 2.491e-02) * s0_3; + r += V4(1.628e-01, 8.679e-01, -1.960e-02, -5.810e-01) * s0_4; + r += V4(-1.237e-02, -1.704e-01, 2.915e-01, -5.922e-01) * s0_5; + r += V4(7.925e-01, 5.570e-03, 7.074e-02, 4.442e-04) * s0_6; + r += V4(-7.910e-01, -1.530e-02, -8.229e-02, 3.149e-03) * s0_7; + r += V4(-3.973e-03, 2.262e-02, -1.213e-01, 3.843e-02) * s0_8; + r += V4(-8.495e-04, -1.121e-04, 1.842e-02, 5.844e-02); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(4.254e-02, 1.997e-01, 4.636e-02, -4.800e-02, 2.043e-01, -4.096e-02, -7.212e-02, 1.408e-02, -3.916e-01, 2.630e-03, 7.016e-02, 9.613e-02, 1.773e-01, -2.723e-01, -9.458e-02, -1.890e-01)); + r += mul(s0_1, M4(2.350e-01, -8.474e-01, -4.044e-01, -9.188e-01, 9.560e-03, 5.061e-02, 1.092e-02, 1.781e-01, -2.144e-01, 3.203e-02, 6.349e-02, -8.272e-02, -3.105e-01, -3.917e-02, -1.320e-02, -1.541e-01)); + r += mul(s0_2, M4(-8.130e-01, -1.003e-01, 8.195e-02, -7.597e-01, 5.207e-02, 3.470e-02, -8.823e-03, -1.131e-01, -4.029e-02, 7.571e-02, -2.010e-01, 2.487e-01, 1.677e-01, -5.118e-02, -1.070e-01, 7.606e-02)); + r += mul(s0_3, M4(-1.158e-02, 4.898e-02, 1.202e-02, 5.012e-01, -5.343e-02, 4.756e-02, -2.438e-01, 6.399e-02, 2.822e-01, -2.863e-02, 1.996e-01, -7.099e-02, -1.323e-01, -3.797e-01, 5.385e-02, -1.014e-01)); + r += mul(s0_4, M4(2.812e-01, 7.903e-01, -1.733e-01, 6.668e-01, 4.775e-01, 5.452e-01, 7.089e-01, -1.851e-01, -2.382e-01, -5.180e-02, -3.623e-01, -3.040e-01, -4.313e-01, -1.167e-02, 1.235e-01, 1.436e-01)); + r += mul(s0_5, M4(-1.291e-01, -3.022e-02, -4.083e-01, -5.939e-02, -4.249e-01, -1.750e-01, 1.094e-01, -1.176e-01, 1.374e-02, 1.342e-01, 2.086e-01, 2.841e-01, 2.347e-01, 1.450e-01, 7.604e-02, 2.176e-01)); + r += mul(s0_6, M4(8.130e-02, -7.215e-02, -5.249e-02, 9.518e-03, -1.979e-01, -4.441e-02, -1.857e-01, -4.227e-01, 2.149e-01, -1.610e-01, 1.655e-01, -8.841e-02, 1.409e-01, -1.059e-01, 2.037e-01, -2.744e-03)); + r += mul(s0_7, M4(-7.266e-02, 1.638e-02, -1.639e-01, 1.957e-02, -2.857e-01, 1.936e-01, -1.243e-01, -1.490e-01, 1.525e-01, -8.934e-02, 7.415e-02, -1.779e-01, 1.648e-02, -6.456e-02, 7.053e-02, -9.530e-02)); + r += mul(s0_8, M4(-6.960e-02, -8.960e-02, -1.757e-02, -1.370e-01, -5.137e-01, -1.179e-01, -4.053e-01, -1.987e-01, 7.100e-02, 2.928e-02, -9.682e-02, 2.403e-01, 1.814e-01, 2.131e-02, 5.579e-02, 5.457e-02)); + r += mul(s1_0, M4(-2.737e-02, 5.272e-02, -1.801e-02, -2.491e-01, 2.871e-01, -3.704e-02, -6.568e-02, 2.905e-02, 1.011e-01, -3.782e-01, -8.696e-02, 4.682e-01, 3.233e-01, -3.060e-01, -3.251e-02, 1.165e+00)); + r += mul(s1_1, M4(-4.994e-01, 3.049e-02, -8.802e-02, -6.179e-02, 7.133e-02, -1.957e-02, -4.465e-02, 1.130e-01, 7.255e-02, 6.956e-03, -1.204e-01, 3.699e-01, -8.844e-02, 4.624e-01, -9.881e-02, -2.512e-01)); + r += mul(s1_2, M4(-3.645e-01, 1.274e-01, 2.387e-01, -1.963e-01, -5.995e-02, -5.943e-02, 9.694e-02, -2.518e-01, -2.797e-01, 1.598e-01, -1.371e-02, 4.000e-01, 2.213e-01, 9.692e-02, -3.302e-01, 1.132e+00)); + r += mul(s1_3, M4(-8.539e-03, -6.535e-02, 5.575e-02, 1.928e-01, 1.156e-01, 5.227e-02, -3.039e-01, 4.794e-01, 1.441e-01, 1.929e-01, -4.689e-02, 2.023e-02, 1.330e-01, -1.358e+00, -5.393e-01, 7.907e-01)); + r += mul(s1_4, M4(1.701e-01, -3.479e-02, 5.404e-01, -2.491e-01, 4.564e-01, 6.659e-01, 7.009e-01, -2.288e-02, -7.696e-01, -4.959e-01, 2.881e-01, -4.322e-01, -9.013e-01, -4.765e-01, 5.556e-02, -1.805e-01)); + r += mul(s1_5, M4(-2.424e-01, 8.034e-03, -4.699e-02, -2.628e-01, -4.682e-01, 2.977e-02, 2.258e-01, -1.419e-01, 3.514e-01, 6.860e-03, 2.147e-01, 3.806e-01, 3.747e-01, 1.403e-01, 3.106e-01, 9.680e-01)); + r += mul(s1_6, M4(1.776e-01, -4.873e-02, -1.403e-01, -1.817e-02, -3.551e-01, 4.838e-04, -2.786e-01, -6.048e-01, 3.082e-01, -4.703e-01, 2.419e-01, -3.002e-01, -4.310e-01, -6.490e-01, 1.343e+00, -1.019e+00)); + r += mul(s1_7, M4(4.689e-02, -2.927e-02, -7.494e-02, -3.516e-02, -2.217e-01, -3.189e-01, 2.202e-01, -2.936e-01, 4.772e-02, -1.609e-01, 9.853e-02, -4.214e-01, 2.780e-01, -1.073e-01, 1.102e-01, -2.033e-01)); + r += mul(s1_8, M4(-9.468e-02, 4.428e-02, 1.269e-01, -1.086e-01, -1.106e-01, -1.367e-01, -3.356e-01, 4.656e-03, 4.648e-02, -1.743e-02, -2.074e-01, -3.745e-02, 1.281e-01, -3.233e-01, 6.533e-01, 3.705e-01)); + r += V4(1.016e-03, 5.583e-03, -1.608e-02, -1.996e-04); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t1 +//!OUT t0 + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-4.810e-02, 2.379e-02, -8.471e-02, 1.305e-01, -5.897e-02, 1.263e-01, -9.639e-02, 9.150e-02, 9.002e-03, -1.763e-01, 8.275e-02, -2.357e-01, 7.181e-02, -7.360e-02, 4.629e-02, -8.259e-02)); + r += mul(s0_1, M4(6.774e-02, 9.108e-02, -3.750e-01, 8.014e-02, 2.890e-01, 9.986e-02, -1.262e-02, -1.285e-01, -2.789e-01, -1.145e-01, -4.982e-02, -1.101e-01, -2.051e-02, -2.271e-01, 1.343e-01, -8.643e-02)); + r += mul(s0_2, M4(-5.433e-02, 6.899e-02, -3.350e-01, -7.837e-02, -1.076e-01, 1.912e-02, -9.061e-02, 1.919e-01, 9.387e-02, -4.206e-02, 1.861e-01, -4.416e-03, -1.560e-01, -4.364e-02, 4.364e-01, 8.765e-02)); + r += mul(s0_3, M4(2.382e-01, 3.032e-01, -1.313e-01, -1.154e-01, 1.008e-01, 3.058e-01, -8.513e-02, 2.713e-01, -9.875e-02, 3.017e-01, 3.203e-02, 5.762e-01, -2.056e-03, -7.698e-02, 8.681e-02, 4.245e-02)); + r += mul(s0_4, M4(2.643e-01, 1.750e-01, 4.850e-02, 3.131e-03, 2.785e-01, 1.598e-01, 5.772e-01, -4.118e-04, -4.270e-01, -2.447e-01, 4.486e-01, 9.155e-02, -3.428e-01, -2.583e-01, -3.721e-02, 6.278e-02)); + r += mul(s0_5, M4(-1.080e-01, -5.514e-02, -3.648e-01, -2.319e-02, -2.100e-01, -4.065e-02, 1.126e-01, 3.970e-02, 9.824e-02, 1.377e-02, 1.295e-01, -2.512e-02, 1.115e-01, 7.094e-02, 3.413e-01, -5.245e-02)); + r += mul(s0_6, M4(1.991e-01, 4.710e-02, -9.305e-02, -1.471e-01, -8.221e-02, 1.134e-01, -1.718e-01, -2.606e-01, -8.167e-02, -1.462e-02, -1.094e-01, -1.569e-01, 2.133e-02, 3.374e-02, 4.583e-02, 1.228e-01)); + r += mul(s0_7, M4(-2.135e-01, 6.874e-02, -4.993e-02, 1.156e-02, -4.261e-01, 1.366e-01, 4.250e-02, -5.707e-02, -1.966e-01, -6.106e-02, 1.265e-01, -3.076e-03, 2.043e-03, -3.072e-02, 1.043e-01, 3.422e-01)); + r += mul(s0_8, M4(7.235e-02, -3.542e-04, -1.435e-02, -3.815e-02, -8.855e-02, 8.327e-02, 1.954e-01, 1.462e-01, 1.615e-01, -4.957e-02, 1.596e-02, -8.625e-02, 6.574e-02, -9.799e-02, 5.401e-03, 7.595e-02)); + r += mul(s1_0, M4(1.245e-01, -2.812e-03, 1.486e-02, 1.246e-01, -5.943e-02, 1.170e-01, -1.068e-01, 8.960e-02, 5.354e-03, -2.039e-01, 8.228e-02, -2.530e-01, -2.789e-03, -6.932e-02, -3.187e-02, -5.794e-02)); + r += mul(s1_1, M4(-2.539e-02, 4.598e-02, -1.205e-01, 1.597e-01, 2.391e-01, 1.269e-01, -1.116e-02, 1.498e-02, -2.388e-01, -1.548e-01, -7.389e-02, -1.083e-02, -1.181e-01, -7.069e-02, 9.383e-03, -2.018e-01)); + r += mul(s1_2, M4(-1.248e-02, 3.267e-02, -2.761e-01, -2.043e-02, -8.520e-02, 3.937e-02, -1.372e-01, 1.821e-02, 6.915e-02, -4.061e-02, 1.782e-01, -4.619e-02, 6.811e-02, -5.458e-04, 3.193e-01, 8.892e-03)); + r += mul(s1_3, M4(-1.580e-01, 7.536e-02, -6.680e-02, 1.891e-01, 1.196e-01, 3.476e-01, -6.321e-02, 1.972e-01, -9.851e-02, 4.483e-01, 9.326e-03, 5.272e-01, -1.478e-01, -4.009e-02, -3.561e-02, -2.549e-01)); + r += mul(s1_4, M4(-1.253e-01, 1.345e-01, 4.994e-01, 2.000e-01, 2.728e-01, 1.672e-01, 5.501e-01, -1.736e-02, -5.782e-01, -2.191e-01, 4.380e-01, 4.346e-02, -3.006e-01, -5.220e-02, -1.613e-01, 6.023e-02)); + r += mul(s1_5, M4(1.276e-01, -8.319e-02, -2.115e-01, 1.471e-01, -1.669e-01, -2.484e-02, 9.906e-02, 1.836e-02, 1.010e-01, 1.847e-02, 1.027e-01, -1.680e-02, -1.880e-01, 1.377e-01, 3.823e-02, -8.256e-02)); + r += mul(s1_6, M4(-3.200e-01, -7.023e-02, -1.243e-01, -2.003e-02, -7.863e-02, 6.650e-02, -1.264e-01, -1.862e-01, -9.119e-02, -4.374e-02, -1.195e-01, -6.902e-02, -1.360e-01, 3.356e-02, -3.667e-02, -1.815e-01)); + r += mul(s1_7, M4(1.462e-02, 1.001e-01, 2.453e-01, -1.298e-02, -4.372e-01, 1.509e-01, 8.011e-02, -1.323e-01, -1.980e-01, -4.785e-02, 1.733e-01, 1.100e-02, -2.153e-01, 6.711e-02, 2.595e-03, 1.213e-01)); + r += mul(s1_8, M4(-3.794e-03, 2.239e-02, -6.960e-02, 7.342e-02, -1.882e-01, 1.159e-01, 1.876e-01, 3.125e-02, 2.242e-01, -5.956e-02, 1.328e-02, -5.400e-02, 2.205e-02, -6.049e-02, -9.151e-02, -1.137e-01)); + r += V4(-1.437e-02, -2.276e-02, 2.275e-02, 6.547e-04); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.886e-03, -1.503e-01, -6.378e-01, 4.214e-02, -1.255e-01, 1.146e-01, -1.917e-01, -6.556e-02, -3.368e-02, 6.874e-02, 2.796e-01, -2.936e-02, -3.239e-02, 3.923e-02, -6.439e-02, 1.313e-02)); + r += mul(s0_1, M4(4.357e-01, -1.067e-01, 3.330e-01, -8.295e-02, -4.004e-01, 3.113e-01, -4.222e-02, 2.290e-01, -1.861e-01, 9.039e-02, -1.132e-01, 1.077e-01, -1.603e-02, 6.296e-02, 4.907e-01, 3.396e-02)); + r += mul(s0_2, M4(-3.290e-01, -1.073e-01, 1.064e-02, -2.792e-03, -4.366e-01, 3.239e-01, -1.383e-01, 1.918e-01, 3.058e-02, 1.006e-01, -6.898e-02, -1.451e-02, -1.882e-01, 2.248e-01, 1.744e-02, -3.155e-02)); + r += mul(s0_3, M4(2.403e-02, -1.353e-01, 1.895e-01, -2.285e-01, -1.211e-01, 1.771e-01, 2.135e-01, 1.900e-01, -4.204e-03, 3.719e-02, -4.772e-01, 2.006e-01, -2.532e-03, 5.872e-02, 2.901e-01, -9.450e-02)); + r += mul(s0_4, M4(8.054e-02, 1.389e-02, -2.060e-02, -3.042e-01, -2.476e-01, 9.905e-02, -9.248e-01, 3.372e-01, -5.254e-01, 4.455e-01, 5.707e-02, 1.057e-01, -3.525e-01, 3.349e-01, -3.414e-01, 7.090e-02)); + r += mul(s0_5, M4(-1.889e-01, -2.290e-01, -4.930e-02, -1.824e-01, -2.062e+00, 6.868e-02, 2.552e-01, 3.883e-01, 5.778e-02, 9.141e-02, 9.917e-02, -1.164e-01, 4.359e-02, 2.105e-01, -7.911e-02, -1.916e-01)); + r += mul(s0_6, M4(-2.267e-02, -6.231e-03, -9.718e-03, 3.770e-04, -6.982e-02, 4.184e-02, -2.296e-01, -9.542e-02, 5.236e-02, -5.412e-02, -1.757e-01, -1.054e-01, 1.414e-02, -7.772e-02, -1.338e-02, 3.928e-02)); + r += mul(s0_7, M4(5.776e-02, 4.703e-02, 3.914e-02, -1.617e-02, -3.606e-01, 3.037e-01, -3.096e-01, 3.562e-02, 3.108e-01, -3.684e-01, 3.725e-02, -2.050e-01, -1.494e-02, 8.741e-02, 5.992e-02, 2.655e-02)); + r += mul(s0_8, M4(3.614e-02, -1.212e-01, 2.507e-02, -5.858e-02, -1.121e-01, -3.433e-01, 6.613e-02, -6.943e-01, 2.233e-02, -5.467e-02, -6.900e-03, -2.566e-01, -1.106e-01, 2.016e-02, -3.700e-02, -2.886e-01)); + r += mul(s1_0, M4(-5.136e-02, -2.190e-01, -1.035e+00, -5.722e-02, 2.876e-02, 5.070e-02, 3.532e-01, -6.778e-03, 2.930e-04, -6.219e-02, 2.314e-01, -5.210e-02, 1.508e-02, -4.390e-02, -7.749e-02, -9.658e-03)); + r += mul(s1_1, M4(3.663e-01, -9.746e-02, -6.582e-01, -3.676e-01, -1.694e-01, 7.883e-02, -1.613e-01, 2.328e-02, 2.595e-04, -3.763e-02, -9.946e-02, -6.137e-02, 1.429e-01, -1.964e-01, 2.439e-01, 4.898e-02)); + r += mul(s1_2, M4(7.884e-02, 1.842e-01, -1.309e-01, 4.895e-02, 4.820e-02, 8.364e-02, 1.189e-02, -1.438e-02, -7.934e-02, 4.775e-02, -6.137e-02, -1.335e-02, -4.416e-02, 3.584e-02, 1.751e-04, -1.178e-02)); + r += mul(s1_3, M4(-9.861e-03, -1.277e-01, 2.389e-03, -3.232e-01, -2.782e-03, 1.115e-01, -6.485e-02, 2.093e-01, 2.056e-01, 2.527e-02, -1.772e-01, 1.863e-02, 5.983e-02, -8.103e-02, 3.076e-01, -2.027e-01)); + r += mul(s1_4, M4(1.001e-01, 3.476e-01, -1.305e-01, -1.653e-01, 8.890e-02, -4.170e-01, -1.530e-01, 7.048e-02, -5.605e-01, 1.093e-01, 2.038e-01, -2.320e-01, -1.287e-01, -2.173e-01, -1.630e-01, -9.691e-02)); + r += mul(s1_5, M4(-2.778e-01, 1.393e-01, -2.802e-02, -5.375e-02, -4.550e-01, -1.661e-01, 2.293e-03, -5.984e-02, -5.070e-02, -8.852e-02, 7.806e-02, 2.187e-02, 1.901e-01, -3.219e-01, -1.937e-01, -2.336e-01)); + r += mul(s1_6, M4(-8.489e-02, 1.968e-01, -7.760e-02, 1.388e-01, 4.713e-03, 1.527e-01, 8.535e-02, 1.643e-02, 1.429e-01, -1.558e-01, 2.339e-01, 2.762e-01, 1.694e-02, -4.245e-02, -2.793e-02, -3.332e-02)); + r += mul(s1_7, M4(-4.377e-02, 3.486e-01, -1.766e-01, -1.065e-01, -1.645e-01, -8.722e-04, -1.147e-01, 1.663e-01, 6.801e-02, -3.539e-01, 1.560e-02, -1.819e-01, 1.440e-02, -1.221e-02, 3.693e-02, 5.886e-03)); + r += mul(s1_8, M4(5.940e-02, 1.624e-01, 1.526e-02, 6.692e-02, 1.812e-01, -8.647e-02, 3.210e-02, -3.751e-04, 2.884e-02, -4.717e-02, 4.121e-03, 5.144e-02, -1.995e-02, -2.827e-01, 6.148e-03, 7.209e-02)); + r += V4(1.575e-02, -2.007e-01, -3.519e-03, -9.082e-03); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 5 +//!DESC conv4 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t1 +//!OUT t0 + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-6.479e-02, -9.976e-02, -1.507e-01, -9.934e-02, -1.046e-02, -1.471e-01, -4.218e-02, -8.348e-04, -5.963e-02, 1.519e-03, 5.897e-03, 5.284e-02, -4.467e-01, 4.779e-01, -1.953e-02, 1.951e-01)); + r += mul(s0_1, M4(-5.276e-02, -1.201e-01, -1.160e-01, 6.076e-02, -4.798e-02, -3.491e-01, -3.055e-01, -1.607e-01, -8.989e-02, 1.221e-01, -1.561e-01, 6.227e-02, -1.598e-01, -6.666e-01, 6.029e-01, -5.466e-01)); + r += mul(s0_2, M4(-1.331e-01, -4.988e-02, -2.217e-02, 3.405e-02, 2.261e-02, 1.352e-01, 1.124e-02, 8.259e-02, -3.548e-02, 2.454e-01, 4.417e-02, 2.297e-01, 1.780e-01, -2.203e-01, 5.913e-02, -2.201e-01)); + r += mul(s0_3, M4(1.348e-01, 5.544e-01, -4.335e-01, -3.619e-01, 1.011e-01, 2.665e-01, -2.627e-01, -1.800e-01, -1.158e-01, -8.543e-02, -7.868e-03, 2.056e-01, 1.988e-01, 1.174e+00, -1.291e-01, 1.131e-01)); + r += mul(s0_4, M4(4.504e-01, 1.025e-01, -1.449e-01, -3.442e-02, -4.525e-01, -1.513e-01, -8.135e-02, -9.669e-02, -3.287e-01, 5.251e-01, -6.540e-01, 7.386e-02, 2.603e-01, -8.246e-01, -1.378e-01, 2.363e+00)); + r += mul(s0_5, M4(-7.102e-02, -5.554e-02, -3.489e-02, -6.688e-02, 2.877e-01, -6.258e-02, 8.515e-02, -2.109e-01, -2.723e-01, 1.543e-01, 1.285e-01, 9.366e-02, 3.135e-02, -3.700e-01, -4.111e-01, 1.822e+00)); + r += mul(s0_6, M4(-4.018e-02, -3.412e-01, 5.388e-02, 4.947e-01, -3.234e-02, -6.778e-02, 3.825e-02, 1.313e-01, -6.083e-02, 3.439e-02, -1.081e-01, 6.456e-02, 2.287e-02, -2.470e-01, 2.026e-02, -1.886e-02)); + r += mul(s0_7, M4(2.410e-01, 1.529e-01, -1.370e-01, -1.389e-01, 1.549e-01, 8.308e-03, 3.064e-02, 3.925e-02, -9.013e-02, 1.131e-01, -9.240e-02, 3.740e-01, -1.009e-01, -6.576e-02, -1.491e-01, -3.452e-02)); + r += mul(s0_8, M4(-1.628e-01, -2.480e-02, -6.569e-02, 3.873e-02, 1.604e-02, 1.651e-02, -4.681e-02, -1.647e-02, -1.648e-02, 1.541e-01, 2.284e-02, 6.545e-01, 1.799e-03, 1.193e-03, -1.215e-01, 5.919e-02)); + r += mul(s1_0, M4(-1.115e-02, -5.014e-02, -1.499e-01, -7.414e-04, -6.944e-02, -4.168e-02, -1.254e-01, -6.576e-02, 2.946e-04, -2.669e-02, 4.109e-02, 1.949e-02, 1.242e-01, 1.753e-01, 9.717e-02, 1.446e-01)); + r += mul(s1_1, M4(-1.327e-02, -1.462e-01, -8.510e-02, -1.228e-02, 1.772e-01, 1.009e-01, -4.342e-02, -8.827e-02, -6.663e-02, -1.245e-01, -4.625e-02, -4.285e-02, 7.586e-02, -1.208e-01, 2.705e-01, -1.558e-01)); + r += mul(s1_2, M4(-7.024e-02, -3.045e-02, -1.916e-02, 4.979e-02, -9.145e-02, 2.285e-01, 4.612e-02, 2.217e-01, 7.690e-02, -4.332e-02, 6.032e-03, -2.370e-02, 3.802e-01, -8.124e-02, 1.982e-02, -8.310e-02)); + r += mul(s1_3, M4(1.238e-01, 5.787e-01, -5.332e-01, -2.806e-01, 1.208e-01, 6.549e-02, -2.040e-01, -2.578e-02, -5.878e-02, -1.496e-01, 1.213e-01, 1.489e-02, 9.569e-02, 1.964e-01, 6.477e-02, -2.939e-01)); + r += mul(s1_4, M4(5.825e-01, 2.257e-01, -1.943e-01, 1.101e-01, -3.240e-01, -2.967e-01, -4.203e-02, -3.636e-01, -1.062e-01, -3.799e-02, -4.444e-01, -7.607e-02, -3.056e-01, -2.926e-01, -4.582e-02, 2.795e-01)); + r += mul(s1_5, M4(-9.076e-02, -5.130e-02, -3.718e-02, -6.163e-02, 1.831e-01, -1.199e-01, 9.176e-02, -2.456e-01, 2.362e-01, -1.854e-01, -1.394e-01, 3.560e-03, 2.070e-02, -6.903e-02, -5.061e-02, 3.068e-02)); + r += mul(s1_6, M4(-4.988e-02, -3.880e-01, 3.001e-02, 3.892e-01, -2.827e-02, -2.880e-02, 4.071e-02, 2.861e-01, -4.016e-02, -1.085e-01, 9.207e-03, -7.367e-02, 9.072e-03, 8.960e-02, 5.334e-03, -6.480e-02)); + r += mul(s1_7, M4(2.900e-01, 1.450e-01, -1.401e-01, -2.809e-01, 1.218e-01, -3.153e-03, -2.544e-02, 1.898e-01, -7.197e-02, -3.721e-01, 4.042e-02, 9.918e-02, -1.132e-01, 3.578e-02, 4.000e-02, 6.991e-02)); + r += mul(s1_8, M4(-1.493e-01, -2.310e-02, -6.133e-02, 5.322e-02, -4.879e-02, -5.139e-02, -8.058e-02, 4.140e-02, 2.511e-01, 3.669e-02, -1.003e-01, -1.457e-01, 1.528e-01, 1.177e-01, 6.665e-02, -3.084e-02)); + r += V4(2.513e-04, -2.994e-02, -5.133e-02, -8.977e-03); + return r; +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 6 +//!DESC conv5 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(4.575e-01, 2.412e-01, 1.926e-01, 5.873e-02, 2.954e-02, -1.424e-01, 7.881e-03, 2.358e-04, -5.872e-02, -1.007e-01, -3.632e-02, 5.718e-02, 1.389e-01, -4.163e-02, -1.379e-01, 2.160e-03)); + r += mul(s0_1, M4(1.347e-01, -8.074e-01, -1.155e-01, 2.242e-01, -2.673e-01, 4.053e-01, 8.867e-02, -2.840e-02, 9.443e-02, 2.632e-01, 9.207e-02, -1.793e-02, 1.519e-01, 3.302e-03, 2.027e-01, 2.643e-02)); + r += mul(s0_2, M4(1.462e-02, -7.543e-02, -6.080e-02, 7.431e-02, -3.673e-02, -1.665e-01, -2.745e-01, -4.416e-02, -3.270e-01, 7.677e-01, 7.241e-01, -1.157e-01, -8.204e-03, 2.172e-02, 3.183e-01, 3.931e-02)); + r += mul(s0_3, M4(1.168e+00, -8.427e-01, -3.237e-03, 5.416e-02, 1.694e-02, -1.042e-01, -2.173e-01, -1.089e-01, -9.881e-02, -1.109e-01, -1.003e-01, -5.080e-02, -9.279e-02, -1.111e-01, -2.699e-02, -2.297e-02)); + r += mul(s0_4, M4(-4.884e-01, -4.472e-01, -9.701e-02, 8.789e-01, 1.962e-02, 5.041e-01, 3.221e-01, -4.622e-02, 9.039e-02, -2.531e-01, 6.228e-01, 1.590e-02, 1.804e-02, 7.795e-02, -8.005e-02, -6.310e-03)); + r += mul(s0_5, M4(-6.567e-02, -5.161e-02, 5.550e-02, 5.285e-02, -6.147e-02, -1.840e-01, 2.028e-01, 4.014e-01, 4.070e-01, -1.022e-01, 1.414e+00, -3.126e-01, 7.508e-03, 1.013e-01, -7.300e-02, -4.282e-01)); + r += mul(s0_6, M4(1.721e+00, 1.776e-01, -8.690e-02, -1.102e-01, -8.467e-02, -2.165e-02, 6.238e-02, 2.052e-02, 2.763e-01, -3.472e-02, -1.179e-01, 2.993e-02, -6.860e-02, 1.887e-02, 3.140e-02, -6.853e-02)); + r += mul(s0_7, M4(1.937e-01, 1.975e-01, -2.456e-01, -1.360e+00, 1.792e-01, -5.969e-02, -7.670e-02, 2.606e-01, 1.355e-01, -9.109e-03, 2.756e-01, 6.674e-02, 1.312e-02, -1.542e-02, 2.236e-02, 1.997e-01)); + r += mul(s0_8, M4(4.255e-02, -1.452e-02, -8.732e-02, -1.084e-01, 1.495e-02, 1.302e-02, -9.151e-02, -2.814e-01, 5.197e-02, 2.866e-02, 5.490e-01, 4.310e-01, 3.666e-02, -3.380e-03, -2.830e-02, -8.223e-02)); + r += mul(s1_0, M4(2.549e-02, 7.469e-02, -5.290e-02, -4.972e-02, -2.340e-01, -1.875e-01, 1.656e-01, 5.697e-02, -8.570e-02, -1.520e-01, -2.622e-02, 1.043e-02, -2.377e-01, -3.927e-02, 1.539e-01, 4.528e-02)); + r += mul(s1_1, M4(-1.188e-02, -9.781e-02, 1.606e-01, 5.138e-02, -4.165e-01, 8.262e-01, 1.709e-01, -1.063e-01, 8.393e-03, 7.300e-02, -9.347e-02, -6.226e-02, -3.633e-01, -4.453e-01, 2.190e-01, 2.415e-01)); + r += mul(s1_2, M4(-4.011e-02, 3.404e-02, 1.013e-01, 3.551e-02, 9.692e-02, -2.109e-01, 1.897e-01, -2.192e-01, -1.703e-01, 5.317e-01, 1.354e-01, -2.027e-01, -3.658e-01, -1.845e-01, -5.465e-01, 1.436e-01)); + r += mul(s1_3, M4(7.674e-01, 1.677e-01, -7.875e-02, 7.537e-03, -4.911e-01, -1.083e-01, 7.183e-03, -1.107e-01, -2.514e-02, -1.257e-01, -5.070e-02, -3.886e-02, 1.368e-01, -1.991e-02, -1.698e-01, -7.850e-03)); + r += mul(s1_4, M4(-5.096e-02, 7.912e-02, -2.105e-01, 1.149e-01, 9.798e-02, 2.243e-01, -3.434e-01, 3.492e-01, -1.265e-01, -1.839e-01, -1.337e-01, -6.909e-02, -8.552e-01, 1.334e-01, 8.652e-01, -3.408e-01)); + r += mul(s1_5, M4(-2.933e-02, 1.424e-01, 6.542e-02, -1.710e-01, -1.459e-01, -3.069e-02, -1.275e-01, -9.443e-02, 2.657e-01, -4.784e-04, -6.729e-03, -1.910e-01, -4.628e-01, 3.808e-02, -1.470e-01, 1.480e-01)); + r += mul(s1_6, M4(1.512e-01, -1.755e-02, -5.440e-02, 1.317e-02, -7.181e-02, -6.842e-03, -7.375e-02, -8.356e-02, 7.332e-02, -9.437e-02, -1.008e-01, -4.731e-02, -9.102e-02, -8.192e-03, 7.862e-04, 6.417e-02)); + r += mul(s1_7, M4(2.457e-01, -1.058e-01, -2.777e-02, -1.532e-03, 7.609e-02, 3.452e-02, 1.774e-01, 3.296e-01, 6.779e-02, -6.683e-02, 1.485e-01, 7.321e-02, -3.082e-02, -4.348e-02, 3.558e-03, 9.111e-03)); + r += mul(s1_8, M4(1.104e-01, 5.040e-03, 9.642e-03, -8.991e-02, -2.134e-01, 3.758e-02, -1.244e-01, -1.987e-01, -7.007e-02, 6.792e-03, 1.369e-01, 5.332e-01, -5.354e-02, -2.024e-02, -1.038e-01, -4.812e-02)); + r += V4(4.102e-03, 1.192e-03, -2.598e-03, -2.812e-03); + return r; +} + +void Pass6(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 7 +//!DESC conv6 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t1 +//!OUT t0 + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(6.200e-02, 5.385e-02, -5.478e-02, 3.955e-02, -1.722e-02, -1.194e-01, 8.331e-02, -9.296e-02, -2.161e-02, 8.716e-02, -5.918e-02, 1.032e-01, 4.954e-02, -3.822e-02, 8.472e-02, -2.191e-01)); + r += mul(s0_1, M4(2.503e-01, 5.635e-02, 7.355e-03, -2.025e-01, 7.104e-02, -1.324e-01, -3.051e-02, 2.246e-02, -4.480e-02, 6.693e-03, 4.467e-02, 3.388e-02, 4.262e-01, 1.488e-01, -8.809e-01, 5.350e-01)); + r += mul(s0_2, M4(-7.511e-03, 1.921e-01, -3.653e-01, 2.096e-02, 2.413e-02, 4.846e-02, -1.538e-01, 3.359e-02, 5.958e-03, -1.033e-02, 2.389e-02, 1.283e-02, -5.270e-02, 2.842e-01, 5.681e-02, -3.578e-02)); + r += mul(s0_3, M4(-2.198e-02, -1.674e-02, 3.330e-02, 3.249e-02, -4.430e-02, 9.217e-02, -3.348e-02, -3.546e-01, 1.228e-01, 3.875e-02, 7.220e-03, 6.719e-02, -8.768e-01, -1.165e-02, -3.862e-02, -2.045e-02)); + r += mul(s0_4, M4(-6.935e-01, -4.898e-01, 2.252e-01, -1.647e-01, -6.408e-02, 4.562e-01, -6.617e-01, 1.220e-01, 1.053e-02, -9.937e-02, -1.118e-02, 3.272e-01, -9.081e-02, 2.353e-02, 4.776e-01, -1.238e-01)); + r += mul(s0_5, M4(2.481e-01, -3.296e-01, -3.372e-02, -2.008e-02, 5.924e-03, 1.762e-02, 3.642e-01, -1.182e-01, -2.219e-02, -4.332e-02, -9.762e-02, 3.537e-02, 2.114e-02, -5.440e-02, 3.124e-01, 5.069e-02)); + r += mul(s0_6, M4(-5.465e-02, -5.352e-03, -3.419e-03, -6.733e-02, -8.079e-02, -6.569e-02, -1.494e-02, -3.462e-01, -8.125e-03, 2.572e-03, -3.894e-02, -3.246e-02, -1.566e-02, -3.004e-02, 1.145e-01, 6.794e-02)); + r += mul(s0_7, M4(4.788e-02, 7.675e-03, -7.030e-02, -2.384e-02, -3.070e-01, -7.080e-01, -2.017e-01, 9.579e-02, 1.259e-01, -1.004e-02, -1.287e-01, 3.334e-02, -9.642e-02, -8.073e-02, 2.546e-02, 5.204e-02)); + r += mul(s0_8, M4(-6.015e-02, 1.650e-01, -5.471e-02, -1.454e-01, -2.785e-02, -1.831e-01, 1.123e-01, 3.453e-02, -1.179e-02, 1.722e-02, -1.068e-02, -2.608e-02, 1.514e-04, -1.287e-02, -7.741e-03, -9.765e-03)); + r += mul(s1_0, M4(-4.922e-02, -5.675e-03, -2.161e-02, 3.164e-02, -2.003e-02, -3.890e-02, 5.198e-02, -1.811e-03, -3.385e-02, -1.510e-02, -2.289e-02, 1.009e-01, 4.427e-02, -1.763e-01, 1.255e-01, -5.073e-02)); + r += mul(s1_1, M4(1.057e-01, -8.124e-02, 1.131e-01, -1.361e-01, 4.740e-02, -6.425e-02, 8.930e-03, 5.318e-02, 5.266e-02, -6.003e-02, 1.320e-01, 4.163e-02, 1.277e-01, -2.404e-01, -1.696e-01, 2.204e-01)); + r += mul(s1_2, M4(2.723e-02, 1.918e-01, -2.822e-01, -1.877e-02, -4.599e-03, 7.591e-02, -1.128e-01, -6.519e-03, 2.311e-02, -1.684e-01, 2.293e-01, -1.042e-01, -1.882e-02, 4.970e-02, -1.309e-01, -8.894e-03)); + r += mul(s1_3, M4(4.883e-02, 2.819e-02, 4.318e-02, 3.186e-02, 7.782e-02, 1.741e-01, -8.927e-02, 4.005e-02, 5.888e-02, -1.057e-01, 9.692e-02, 8.032e-02, -1.086e-01, 6.323e-02, -8.520e-02, -1.273e-02)); + r += mul(s1_4, M4(-1.746e-01, -2.834e-02, -3.694e-02, 3.226e-01, -2.541e-01, 6.860e-01, -1.436e-01, 1.705e-01, 2.614e-01, -6.751e-02, 5.646e-02, 3.666e-01, -2.621e-02, 4.951e-01, -1.090e-01, -3.168e-01)); + r += mul(s1_5, M4(1.513e-01, 5.210e-02, 2.625e-01, -6.303e-02, -2.252e-02, -9.485e-02, 4.776e-01, -1.789e-01, -1.291e-01, -9.714e-02, -1.427e-01, -1.165e-01, 2.415e-02, 9.790e-02, 6.024e-02, -9.622e-02)); + r += mul(s1_6, M4(3.751e-02, -2.907e-02, -1.762e-02, -9.545e-02, 2.866e-01, -7.329e-02, -9.787e-03, 4.513e-03, -9.486e-02, -2.446e-02, -2.357e-02, -5.002e-02, 4.973e-02, 6.256e-02, -2.532e-02, -1.817e-02)); + r += mul(s1_7, M4(-6.855e-02, -6.762e-02, -6.269e-02, -6.947e-02, -1.389e-01, -1.915e-01, -4.806e-02, 1.870e-01, 1.298e-01, 6.268e-03, -5.985e-02, -5.396e-02, -3.048e-02, -5.396e-03, -9.720e-02, 3.289e-03)); + r += mul(s1_8, M4(-2.052e-02, -8.106e-02, -1.721e-02, 9.911e-03, -8.521e-02, 4.832e-02, -1.708e-01, -6.445e-02, -9.788e-02, 8.836e-02, -1.204e-01, -1.123e-01, 1.514e-02, 1.628e-02, -5.003e-02, -6.128e-03)); + r += V4(1.448e-03, -2.432e-03, -8.004e-04, 5.896e-05); + return r; +} + +void Pass7(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 8 +//!DESC conv7 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0 +//!OUT t1 + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(5.901e-02, -1.033e-01, -1.441e-01, 4.291e-02, 2.355e-02, -1.199e-01, -1.741e-01, -5.263e-03, -6.030e-03, -4.043e-02, 1.910e-01, 8.326e-03, 2.913e-02, 1.969e-02, -1.380e-01, 9.492e-02)); + r += mul(s0_1, M4(-1.616e-01, 1.649e-01, -1.133e-02, -1.037e-01, -1.060e-02, 2.299e-01, -5.302e-02, -2.329e-01, -8.540e-02, 2.232e-01, 2.647e-01, 3.922e-01, 5.387e-02, 5.841e-01, -1.264e-01, -1.440e-01)); + r += mul(s0_2, M4(-1.944e-02, -7.262e-02, 9.583e-02, 3.448e-02, 4.402e-02, 5.319e-02, -2.384e-02, 4.652e-02, 6.280e-02, -4.195e-02, 1.573e-02, 7.059e-02, 1.029e-01, -1.784e-02, -3.735e-02, -4.952e-02)); + r += mul(s0_3, M4(7.393e-02, -1.825e-01, -2.983e-01, -5.798e-02, -2.475e-01, -4.958e-02, 6.660e-01, -2.202e-01, -9.158e-02, 4.280e-04, 2.472e-01, -2.979e-01, -9.887e-02, 6.188e-02, 2.163e-01, -9.358e-03)); + r += mul(s0_4, M4(-8.664e-01, 2.357e-01, 3.390e-01, -5.275e-01, -2.213e-01, -4.992e-01, 5.479e-01, 4.245e-01, -7.542e-02, 4.854e-01, -3.525e-01, 3.950e-01, 3.619e-01, -3.968e-01, -3.447e-01, 5.089e-01)); + r += mul(s0_5, M4(-9.239e-02, -6.370e-01, -7.252e-02, -3.435e-01, -1.057e-01, 1.616e-01, -4.413e-02, 1.824e-01, 2.001e-02, -1.343e-01, -5.730e-02, 7.302e-02, -2.361e-02, -9.044e-02, -1.041e-01, 2.971e-01)); + r += mul(s0_6, M4(-2.803e-02, -8.707e-02, -1.407e-01, -2.685e-02, 1.099e-01, 1.721e-01, 1.612e-01, 6.962e-02, -1.659e-02, 7.845e-02, 2.165e-01, -7.067e-02, 1.666e-02, 7.051e-02, 6.373e-02, 4.391e-02)); + r += mul(s0_7, M4(-1.560e-01, -2.698e-02, -5.684e-01, -1.184e-01, 7.742e-01, -1.023e-03, -8.177e-02, 2.857e-01, 2.253e-02, -1.400e-02, -6.523e-02, 7.644e-02, 1.789e-01, -8.433e-03, 1.041e-01, 7.009e-02)); + r += mul(s0_8, M4(-1.491e-01, -2.037e-01, -2.499e-01, -7.730e-02, 1.051e-01, -1.718e-02, -1.762e-01, 4.808e-02, -3.068e-03, 1.737e-02, -3.772e-04, 4.732e-02, 7.205e-02, 7.901e-02, -1.759e-02, 8.476e-02)); + r += mul(s1_0, M4(4.810e-02, -1.822e-02, -1.150e-01, -1.679e-02, -5.481e-02, -7.544e-02, 2.213e-01, 2.615e-02, -2.628e-03, -1.482e-01, -5.570e-02, 5.137e-02, -1.381e-02, -1.878e-03, -3.132e-02, -3.309e-02)); + r += mul(s1_1, M4(1.101e-01, 1.003e-01, -4.307e-01, -2.520e-02, 1.138e-02, -1.966e-01, 6.664e-02, 1.114e-01, -1.431e-01, 3.634e-01, 4.274e-02, -8.279e-02, -5.291e-02, 3.540e-01, 8.995e-02, -1.401e-01)); + r += mul(s1_2, M4(7.230e-02, 4.684e-01, -6.542e-02, -2.792e-01, 2.936e-02, 3.476e-03, -1.024e-02, 1.880e-01, 1.898e-02, 2.529e-02, 8.537e-03, -6.073e-03, 1.025e-01, -2.320e-01, -1.804e-02, 5.471e-02)); + r += mul(s1_3, M4(-9.258e-03, -7.731e-03, 4.285e-02, -4.725e-02, -3.878e-02, -1.749e-02, -1.681e-02, -1.020e-01, -3.975e-02, 1.609e-02, 8.299e-02, -1.824e-01, -2.500e-02, 3.516e-02, 8.591e-02, 1.714e-02)); + r += mul(s1_4, M4(-2.210e-01, 1.534e-01, 3.410e-01, -2.552e-01, -5.090e-02, 1.582e-02, 1.802e-01, -1.333e-01, -5.371e-01, 3.751e-01, -1.323e-01, 3.018e-01, 1.756e-01, -9.756e-02, -4.873e-01, 4.985e-01)); + r += mul(s1_5, M4(-1.073e-02, 2.919e-01, -2.025e-01, 3.240e-01, 4.318e-02, -1.972e-02, -1.612e-01, 3.528e-01, -6.472e-02, -6.212e-02, 3.146e-02, 6.391e-02, 4.950e-02, -6.270e-01, -1.985e-02, 4.680e-02)); + r += mul(s1_6, M4(-2.215e-02, 1.836e-02, 5.021e-02, -3.016e-02, -7.854e-03, 1.135e-02, 3.407e-02, -2.923e-02, -5.384e-03, 6.570e-02, 2.437e-01, -8.712e-02, 2.275e-02, -2.291e-03, -7.378e-02, 5.231e-02)); + r += mul(s1_7, M4(-4.186e-02, 6.944e-02, 8.353e-02, -1.927e-02, 3.937e-02, 2.105e-02, 7.152e-02, 5.635e-03, 1.114e-01, -3.772e-02, -1.853e-01, 6.636e-02, 4.654e-02, -1.008e-01, -1.625e-01, 7.888e-02)); + r += mul(s1_8, M4(5.288e-02, -5.516e-02, -4.014e-02, 8.854e-02, 2.434e-02, 9.192e-02, -1.203e-02, 6.813e-02, 4.626e-02, -4.892e-02, 4.700e-03, 7.578e-02, -5.040e-02, 3.497e-02, 3.176e-02, -9.741e-02)); + r += V4(2.671e-03, -5.536e-03, -4.013e-03, 4.378e-03); + return r; +} + +void Pass8(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t1[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 9 +//!DESC conv8 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t1 +//!OUT t0 + +#define l0(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.283e-02, 5.262e-02, 1.580e-02, 4.991e-02, 6.836e-02, -3.234e-02, 5.630e-02, 1.275e-01, 5.398e-03, 9.866e-04, -1.054e-02, 1.601e-02, 1.546e-02, -7.786e-02, -2.630e-02, -3.023e-02)); + r += mul(s0_1, M4(9.285e-02, 3.403e-01, -4.572e-02, 1.431e-01, 2.876e-01, -3.271e-01, -8.133e-04, 5.998e-01, 4.515e-02, 9.836e-02, 2.315e-02, 1.724e-01, -8.080e-02, -1.978e-01, -5.366e-02, -4.535e-02)); + r += mul(s0_2, M4(1.708e-02, -8.374e-02, -1.831e-02, 1.744e-02, 4.902e-02, -1.037e-02, -3.508e-02, 3.501e-02, 1.160e-01, 2.529e-01, 4.235e-02, 4.233e-02, -5.953e-03, -1.398e-01, -8.815e-03, 1.053e-02)); + r += mul(s0_3, M4(-2.836e-03, -2.496e-01, 2.703e-02, 9.490e-02, 3.985e-01, -9.458e-02, 1.355e-01, 5.917e-01, 5.597e-03, -8.963e-02, 5.238e-02, 4.360e-02, -1.070e-01, 7.593e-02, 6.376e-02, -1.498e-01)); + r += mul(s0_4, M4(3.214e-01, -8.045e-01, 6.621e-01, -1.261e-01, -1.487e+00, 1.086e+00, 3.779e-01, -1.762e+00, 2.721e-01, -3.815e-02, -1.450e-01, 4.063e-01, 2.804e-01, 3.876e-01, 2.607e-01, 2.174e-01)); + r += mul(s0_5, M4(-3.896e-01, 3.340e-01, -2.529e-01, -6.519e-02, -1.815e-01, 5.542e-02, -1.669e-01, 1.732e-02, 2.995e-01, 4.942e-02, 6.557e-02, -1.386e-01, -1.392e-01, 2.822e-01, 2.016e-02, -1.313e-01)); + r += mul(s0_6, M4(-2.130e-02, 4.137e-02, 7.324e-02, 4.834e-03, 9.333e-02, -2.998e-01, 4.229e-01, 9.535e-02, -2.595e-02, 2.955e-02, 7.491e-02, -3.028e-02, -2.850e-02, 1.582e-02, -1.076e-01, -3.159e-02)); + r += mul(s0_7, M4(-3.601e-02, 5.993e-02, -1.190e-02, -6.800e-02, 6.894e-03, -2.095e-01, -9.548e-02, -2.539e-02, -2.390e-02, 2.947e-02, 1.581e-01, -5.305e-03, 1.029e-01, -1.456e-01, -3.526e-02, 9.251e-02)); + r += mul(s0_8, M4(-7.206e-02, 9.690e-02, -4.464e-02, -6.999e-03, 3.140e-02, -4.201e-02, -6.364e-03, 5.280e-03, -1.412e-01, 1.696e-01, -1.274e-01, -9.546e-02, 5.285e-02, -1.072e-01, 5.994e-02, 1.293e-02)); + r += mul(s1_0, M4(-1.808e-02, 1.243e-01, -6.814e-02, -4.219e-03, 1.273e-02, 2.752e-02, 3.764e-02, 3.650e-02, 7.663e-04, 6.843e-03, 1.380e-02, -3.235e-02, 5.400e-02, -5.352e-02, 1.190e-02, -1.028e-01)); + r += mul(s1_1, M4(2.568e-01, 2.764e-01, 7.740e-02, 1.273e-01, 7.059e-02, 6.668e-02, 4.211e-02, 6.293e-02, -4.164e-02, 2.210e-01, -1.293e-02, 8.369e-02, 2.046e-01, 1.238e-01, 9.491e-02, 4.614e-02)); + r += mul(s1_2, M4(-2.387e-02, 3.174e-01, 8.165e-02, -6.680e-02, -1.516e-02, 1.482e-02, -1.342e-02, 1.692e-02, -2.288e-02, -6.891e-02, -5.559e-02, 4.771e-02, 3.290e-02, 1.234e-01, 4.334e-02, -5.106e-02)); + r += mul(s1_3, M4(6.216e-02, -2.114e-01, -1.616e-01, 1.664e-01, 3.796e-02, 6.036e-02, -1.106e-01, 1.398e-01, -3.139e-02, -6.274e-02, 4.988e-02, -6.274e-02, 2.296e-02, -5.131e-02, 5.052e-02, -8.866e-02)); + r += mul(s1_4, M4(2.647e-01, -7.858e-01, 1.597e-01, -8.262e-01, -3.213e-01, 2.427e-01, 1.686e-01, -4.251e-01, 1.505e-01, 3.244e-02, 1.023e-01, 1.962e-01, -1.116e-01, 3.525e-01, 8.848e-01, -1.945e-01)); + r += mul(s1_5, M4(-2.549e-01, -1.429e-01, -3.696e-02, 3.042e-01, -1.256e-01, 2.760e-02, -3.650e-02, 7.985e-02, -1.958e-01, 3.076e-01, -9.253e-02, -8.512e-02, -1.708e-01, -3.422e-04, -8.181e-02, 2.319e-01)); + r += mul(s1_6, M4(-3.382e-02, 6.627e-02, 1.158e-01, -3.044e-02, -7.983e-03, -7.855e-02, 1.729e-02, 3.219e-04, -1.764e-02, 4.065e-02, -1.400e-02, -2.387e-02, 2.673e-03, 5.460e-03, -4.992e-02, -1.573e-02)); + r += mul(s1_7, M4(-2.505e-02, 1.763e-01, -4.433e-01, -1.024e-01, 1.391e-01, -2.435e-01, -5.358e-02, 5.203e-02, 3.157e-02, 2.012e-02, 7.424e-03, 3.723e-02, -2.388e-02, 7.204e-02, -4.522e-01, -1.187e-02)); + r += mul(s1_8, M4(9.737e-02, 7.067e-02, 4.072e-02, 4.303e-02, 2.890e-02, -1.810e-02, 5.156e-03, -1.953e-02, -3.503e-02, 7.492e-02, 1.402e-02, -9.796e-03, 2.320e-01, -2.135e-01, 1.462e-01, 1.194e-01)); + r += V4(-5.006e-05, -2.252e-04, -1.752e-03, 4.586e-04); + return r; +} + +void Pass9(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); +} + +//!PASS 10 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.670e-02, -1.964e-03, 2.191e-02, 3.109e-02, 1.911e-02, -2.017e-02, -2.948e-02, -2.237e-02, -3.845e-02, -7.954e-03, -3.472e-02, -2.253e-02, -1.571e-02, -6.613e-03, -1.489e-02, -2.647e-02)); + r += mul(s0_1, M4(-6.714e-02, -2.106e-02, 7.577e-03, 1.788e-02, 8.081e-02, 8.813e-02, -5.510e-02, -2.724e-02, 1.150e-01, 5.284e-02, -8.964e-02, -3.024e-02, 5.215e-02, 5.334e-02, -1.180e-02, 6.927e-03)); + r += mul(s0_2, M4(1.036e-02, 1.826e-02, -8.095e-03, -9.967e-03, 1.368e-03, 3.479e-02, -1.887e-03, -2.161e-02, -3.464e-02, -1.124e-01, -4.623e-03, -5.295e-03, -7.199e-03, -4.285e-02, 8.862e-03, -1.610e-02)); + r += mul(s0_3, M4(2.388e-01, -1.001e-03, 1.699e-01, -4.519e-02, -3.274e-01, 1.550e-01, 3.748e-02, 3.435e-02, -1.655e-01, 1.227e-02, -1.372e-01, 4.700e-02, -1.636e-01, 1.222e-02, -1.323e-01, 3.239e-02)); + r += mul(s0_4, M4(1.698e-01, 4.561e-01, -1.355e-01, 1.831e-01, -3.815e-01, -7.832e-01, 1.738e-01, 4.516e-02, 2.803e-01, -4.239e-01, 8.945e-01, -1.339e-02, -3.701e-01, -3.731e-01, 1.765e-01, -1.343e-01)); + r += mul(s0_5, M4(-4.653e-02, -8.470e-02, -1.076e-03, -7.153e-02, 1.022e-02, -2.560e-02, -1.154e-02, 2.252e-02, -1.053e-01, 4.014e-01, -1.479e-01, 3.667e-01, 9.425e-02, -8.079e-02, 5.594e-03, 4.870e-02)); + r += mul(s0_6, M4(-6.274e-02, -3.430e-02, -5.955e-02, 1.220e-02, -6.075e-02, 1.284e-02, -8.384e-02, 2.143e-01, -2.050e-02, -8.887e-03, -1.445e-02, 1.797e-02, 1.436e-01, -8.067e-04, 1.013e-01, 3.847e-03)); + r += mul(s0_7, M4(6.862e-02, -7.230e-02, -2.461e-01, -3.760e-01, 4.038e-02, -2.634e-02, -2.725e-01, -4.389e-01, 9.088e-03, -1.873e-02, -9.497e-02, -1.860e-01, -1.038e-01, 2.502e-01, -6.194e-01, 4.470e-02)); + r += mul(s0_8, M4(-1.984e-02, 4.173e-02, 5.328e-02, 5.554e-02, 1.241e-03, -2.290e-03, 5.972e-02, 4.381e-02, -3.320e-03, -1.434e-04, -5.754e-02, -6.072e-02, -6.854e-03, 6.781e-02, 1.208e-01, -5.469e-02)); + r += mul(s1_0, M4(7.050e-02, -3.676e-02, 7.009e-03, 1.431e-02, -1.258e-02, -6.854e-03, -9.803e-04, 5.955e-03, -3.077e-03, -2.372e-02, 8.060e-03, -5.992e-02, -7.957e-02, 2.905e-02, 3.914e-04, -1.408e-02)); + r += mul(s1_1, M4(-1.068e-01, 4.589e-02, -1.399e-02, -8.157e-03, 1.811e-02, 7.241e-03, 9.447e-03, 3.242e-03, 5.152e-02, 8.667e-02, -2.512e-02, -2.978e-02, 1.382e-01, 5.481e-02, -2.199e-02, -2.739e-02)); + r += mul(s1_2, M4(3.676e-02, 1.705e-02, -4.520e-03, -6.449e-03, 1.006e-02, 9.807e-03, -6.046e-03, -1.299e-03, -5.035e-02, -4.415e-02, 9.619e-03, -1.059e-02, -6.952e-03, -1.803e-02, -4.042e-03, -1.751e-02)); + r += mul(s1_3, M4(5.123e-02, 4.500e-02, 2.099e-01, -7.254e-03, -7.977e-02, 2.822e-02, -1.546e-01, -3.748e-02, -2.378e-01, -1.836e-02, -3.508e-02, -2.147e-03, 3.371e-02, -4.720e-02, -5.574e-02, -1.592e-02)); + r += mul(s1_4, M4(-5.764e-01, 5.998e-01, -2.288e-01, 7.223e-01, -1.855e-01, -3.467e-01, 5.173e-02, -8.967e-02, 3.308e-01, -8.987e-02, 2.397e-01, 3.701e-01, -7.970e-02, -9.046e-01, 2.397e-01, -1.626e-01)); + r += mul(s1_5, M4(1.177e-02, -1.538e-01, 4.138e-02, -5.198e-02, 3.165e-03, 3.827e-02, -5.913e-03, 8.727e-03, 7.885e-02, 2.979e-01, -6.160e-02, 1.198e-01, 1.186e-02, 9.421e-02, -4.101e-02, 4.185e-03)); + r += mul(s1_6, M4(-7.690e-02, -4.820e-03, -1.106e-01, 4.040e-02, -6.883e-02, -3.284e-02, 1.259e-02, 1.509e-01, 6.378e-03, -5.293e-04, -3.690e-02, 6.274e-02, 1.401e-01, -3.801e-03, 1.489e-01, -1.044e-02)); + r += mul(s1_7, M4(1.140e-01, -1.333e-01, -1.739e-01, -1.739e-01, 4.736e-02, -1.306e-02, -3.673e-01, -6.127e-01, -3.477e-02, -6.090e-02, 2.430e-02, -2.666e-01, -6.599e-02, 2.794e-01, -1.724e-01, -2.744e-01)); + r += mul(s1_8, M4(1.045e-02, 6.106e-02, 3.463e-02, 6.708e-02, -1.028e-02, -2.277e-02, 6.536e-02, 8.227e-02, -5.566e-02, -3.941e-02, -6.862e-03, -1.219e-02, -1.438e-02, -4.651e-02, 5.359e-02, 4.650e-02)); + r += V4(-1.731e-03, -2.098e-03, -1.131e-03, -1.644e-03); + return tanh(r); +} + +void Pass10(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-8x8C-NVL-DN.hlsl b/src/Effects/CuNNy/CuNNy-8x8C-NVL-DN.hlsl new file mode 100644 index 000000000..d55e1e9ba --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-8x8C-NVL-DN.hlsl @@ -0,0 +1,1573 @@ +// CuNNy 8x8C BILINEAR RGB NVL DN - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-DN-D08N08 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t2; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t3; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0, t1 + +#define l0(x, y) min16float((dot(float3(1.925e-01, 3.819e-01, 8.369e-02), O(INPUT, float2(x, y)).rgb) + -5.387e-01)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(2.639e-02, -1.701e-01, -3.774e-03, -1.132e-01) * s0_0; + r += V4(7.110e-02, 1.313e-01, -1.082e-02, 2.661e-02) * s0_1; + r += V4(-7.133e-02, 3.915e-02, 1.220e-02, -3.065e-02) * s0_2; + r += V4(-1.310e-01, -4.277e-01, 5.738e-01, -1.361e-01) * s0_3; + r += V4(8.785e-02, -3.337e-02, -2.604e-02, 5.665e-01) * s0_4; + r += V4(-1.597e-01, 4.606e-01, -4.603e-03, -5.048e-02) * s0_5; + r += V4(5.069e-02, 2.608e-02, -2.872e-02, -1.027e-01) * s0_6; + r += V4(1.334e-01, -3.456e-02, -1.744e-03, 7.886e-02) * s0_7; + r += V4(2.759e-02, 1.279e-02, -7.839e-03, -2.046e-01) * s0_8; + r += V4(4.846e-02, 9.244e-03, -3.897e-02, 9.766e-03); + return r; +} + +V4 f1(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-2.867e-01, 2.141e-01, 4.437e-02, -5.342e-02) * s0_0; + r += V4(-4.092e-01, -1.685e-01, -3.220e-02, -2.083e-01) * s0_1; + r += V4(-4.212e-02, -3.446e-02, 3.496e-02, -6.681e-02) * s0_2; + r += V4(6.756e-02, 4.954e-01, -9.457e-02, 1.597e-01) * s0_3; + r += V4(7.022e-01, -4.053e-02, 3.086e-02, -3.529e-01) * s0_4; + r += V4(-1.718e-02, -4.736e-01, -1.362e-02, 2.655e-02) * s0_5; + r += V4(6.323e-02, 1.250e-02, -2.611e-01, 2.058e-02) * s0_6; + r += V4(-1.266e-01, -5.605e-03, -3.361e-01, 1.510e-01) * s0_7; + r += V4(4.938e-02, -7.117e-04, 2.257e-02, -7.702e-03) * s0_8; + r += V4(1.322e-02, 1.053e-02, 6.702e-02, 8.937e-03); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-6.787e-02, 1.507e-01, 1.386e-01, 1.354e-01, -1.647e-02, 2.280e-01, -6.392e-02, -1.182e-01, -3.002e-01, 5.059e-02, -4.223e-02, 1.296e-01, 3.034e-01, 9.575e-02, 2.396e-01, 2.604e-01)); + r += mul(s0_1, M4(1.771e-01, 1.258e-01, 9.676e-02, -5.836e-02, 2.180e-01, 9.400e-02, -1.364e-01, 1.365e-01, -1.889e-01, 6.458e-02, 2.028e-01, 1.279e-01, -2.230e-01, 4.919e-02, 3.303e-01, 2.852e-02)); + r += mul(s0_2, M4(-2.253e-02, -3.429e-02, 1.720e-01, 3.019e-02, 1.779e-01, -1.289e-01, -1.485e-02, 2.294e-01, 6.777e-02, 2.094e-01, -8.003e-02, -2.814e-01, 5.090e-02, -1.822e-02, 4.285e-02, 9.212e-03)); + r += mul(s0_3, M4(4.345e-01, -1.625e-02, 2.947e-02, 5.848e-02, -6.072e-01, -1.842e-01, -2.682e-01, -8.853e-02, -4.014e-01, 1.387e-01, -1.617e-01, 2.580e-02, 5.142e-01, 7.146e-02, -7.968e-02, -5.864e-02)); + r += mul(s0_4, M4(-3.146e-01, -1.256e-02, -1.312e-01, 1.912e-02, -3.825e-01, -7.159e-02, -8.802e-02, -2.785e-01, 1.166e+00, 3.918e-01, 1.766e-01, -9.949e-01, -3.057e-01, 3.247e-01, -5.187e-02, -3.849e-01)); + r += mul(s0_5, M4(-1.266e-01, -3.163e-02, 5.540e-02, -1.063e-04, -3.565e-01, 2.094e-02, -7.364e-02, 4.674e-02, -3.207e-01, -4.732e-02, -3.064e-02, -7.816e-01, -2.368e-01, -1.159e-01, 4.110e-02, 1.199e-01)); + r += mul(s0_6, M4(-5.285e-02, 1.116e-01, -1.387e-01, 9.422e-02, -1.251e-02, -1.096e-02, 3.989e-01, 1.370e-02, 2.573e-03, -6.050e-02, 9.572e-02, -2.029e-01, 4.006e-02, 4.752e-02, -2.514e-01, -7.318e-02)); + r += mul(s0_7, M4(1.563e-01, -6.792e-02, 7.001e-02, 1.968e-02, 3.360e-01, -1.072e-01, 1.499e-01, -4.911e-02, -4.225e-01, -7.392e-02, -9.010e-01, -1.592e-01, -7.005e-03, -3.628e-01, -2.941e-01, 6.075e-02)); + r += mul(s0_8, M4(-6.457e-02, -1.010e-01, -1.549e-01, -8.569e-02, 2.008e-01, -3.078e-02, -1.027e-01, 1.431e-01, -1.821e-01, 4.419e-02, -1.641e-01, -1.295e-01, 2.477e-03, -1.839e-01, 7.758e-02, -4.918e-02)); + r += mul(s1_0, M4(8.745e-02, 4.435e-02, -1.817e-01, -6.904e-01, -5.046e-01, -6.161e-02, -2.511e-01, 1.648e-01, 5.188e-02, 7.367e-02, 2.228e-01, -8.911e-02, 3.976e-01, 1.385e-01, 3.897e-01, 2.256e-01)); + r += mul(s1_1, M4(-2.179e-02, 2.662e-01, -1.836e-01, 6.580e-01, -2.441e-02, 1.059e-01, 2.016e-01, -5.101e-02, 2.634e-02, 7.840e-02, 2.103e-02, 1.695e-01, -1.253e-01, -5.145e-02, 1.500e-01, 1.995e-01)); + r += mul(s1_2, M4(3.784e-02, -6.314e-02, 7.330e-02, -3.828e-01, -3.898e-03, 1.618e-01, 8.129e-02, 1.630e-01, 2.141e-02, 1.426e-01, -1.058e-02, 6.683e-02, -1.627e-02, 2.531e-03, 7.962e-02, 1.641e-01)); + r += mul(s1_3, M4(2.354e-01, -1.180e-02, 4.459e-02, -2.539e-01, -1.247e-02, 2.407e-01, 2.427e-01, -7.432e-02, -1.158e-01, 1.015e-02, -7.599e-02, -5.581e-02, 3.942e-01, -2.540e-02, -3.743e-01, 2.689e-01)); + r += mul(s1_4, M4(1.472e-01, -5.494e-01, 2.751e-02, -6.865e-01, 4.673e-01, 2.978e-01, -3.744e-01, 4.839e-02, 3.663e-01, 6.252e-03, 4.537e-02, -3.700e-02, -4.528e-01, 4.329e-01, 2.170e-01, -5.655e-02)); + r += mul(s1_5, M4(-3.136e-01, 3.527e-02, -9.977e-02, 1.680e-01, 1.746e-01, 2.170e-01, -1.577e-01, -3.133e-02, -2.571e-01, 4.381e-02, 5.043e-02, 4.327e-01, -2.274e-01, -2.338e-01, 1.237e-01, 9.396e-02)); + r += mul(s1_6, M4(-2.045e-01, -3.270e-01, 3.275e-01, 1.302e-01, 1.187e-01, -2.118e-02, 3.357e-01, 3.389e-02, 1.123e-01, 5.050e-03, -6.020e-03, 1.285e-02, 1.499e-01, 2.891e-01, -6.994e-01, 6.691e-02)); + r += mul(s1_7, M4(7.207e-02, 1.789e-01, 4.475e-01, 7.563e-02, 1.562e-01, 9.745e-02, 2.281e-01, 1.472e-01, -1.739e-01, 2.556e-02, -3.121e-01, 4.985e-02, -4.985e-02, -5.638e-01, -4.190e-01, 1.795e-01)); + r += mul(s1_8, M4(-3.225e-02, -5.762e-02, -2.681e-02, -3.269e-01, -1.883e-01, -1.365e-01, 8.040e-02, -2.820e-02, -4.708e-02, -1.241e-01, 2.407e-02, 5.468e-02, 8.962e-03, -5.522e-02, -5.487e-02, 5.633e-02)); + r += mul(s2_0, M4(-7.475e-02, 2.239e-02, 1.336e-02, -1.309e-02, -2.294e-01, 1.259e-02, -6.364e-02, 1.705e-01, -1.467e-01, 6.734e-02, -1.129e-01, 2.675e-01, -8.728e-02, -2.415e-01, -5.943e-02, 8.068e-02)); + r += mul(s2_1, M4(1.161e-01, 1.003e-01, -9.628e-03, -5.014e-02, -1.559e-01, -6.243e-02, 3.428e-01, -1.662e-01, 8.781e-02, -5.090e-02, -1.466e-01, 6.924e-02, -7.831e-02, -7.191e-02, -3.093e-01, -1.182e-01)); + r += mul(s2_2, M4(-1.187e-02, -1.124e-01, 1.241e-01, 3.818e-02, -3.489e-02, 1.072e-01, -3.086e-02, 8.455e-02, -3.857e-02, 1.693e-01, -1.791e-01, 1.621e-01, 5.023e-02, 6.312e-02, -1.419e-01, 3.264e-02)); + r += mul(s2_3, M4(6.541e-01, 1.355e-02, 6.582e-01, 4.098e-02, 5.969e-01, 1.744e-01, 1.758e-01, 1.798e-01, -2.511e-02, 6.407e-02, -4.659e-02, 1.100e-01, -3.252e-01, -2.088e-02, -3.156e-02, 2.349e-01)); + r += mul(s2_4, M4(-5.107e-01, 6.148e-01, -2.237e-01, -2.672e-01, 3.043e-01, 4.147e-01, -1.371e-02, -9.339e-02, -7.676e-02, -1.914e-02, 4.465e-02, -1.130e-01, 3.640e-01, -4.571e-01, 1.863e-02, 1.653e-01)); + r += mul(s2_5, M4(-1.206e-02, -6.812e-02, 1.585e-01, 1.884e-01, 2.366e-01, 3.310e-01, -1.584e-01, 4.540e-02, 6.728e-04, 3.926e-02, -1.147e-01, 1.093e-01, 2.546e-01, 1.105e-01, 1.950e-02, 1.075e-01)); + r += mul(s2_6, M4(1.302e-01, -1.658e-01, -4.834e-01, -2.295e-02, -1.106e-01, 2.600e-02, -2.262e-01, 8.762e-02, -5.511e-02, -1.613e-03, 1.347e-02, -2.278e-04, -9.678e-02, 7.248e-02, 1.184e-01, -1.321e-01)); + r += mul(s2_7, M4(-6.153e-02, -5.202e-01, -9.300e-01, 1.067e-01, 1.116e-01, -2.270e-01, 2.099e-02, 4.166e-02, 2.248e-02, 7.370e-02, 4.101e-01, 9.718e-02, 2.057e-01, -1.217e-01, 9.265e-02, -1.137e-01)); + r += mul(s2_8, M4(-1.353e-01, -2.862e-01, -6.823e-02, -2.380e-02, -6.254e-02, -8.065e-02, 1.598e-01, -1.998e-02, 5.988e-02, 1.871e-01, 8.992e-02, -7.480e-02, 2.728e-02, 1.082e-01, 3.140e-01, 1.540e-01)); + r += mul(s3_0, M4(-1.587e-01, 7.444e-02, -1.777e-02, 2.381e-02, 1.406e-01, 3.196e-01, -1.374e-03, 9.152e-02, -5.543e-01, -8.394e-03, 1.747e-01, 4.359e-01, -7.423e-02, -3.368e-01, -9.362e-02, 7.286e-02)); + r += mul(s3_1, M4(1.121e-01, 1.016e-01, 3.174e-02, -5.360e-02, -1.795e-02, 1.725e-02, -1.327e-02, 1.641e-01, -7.362e-01, -1.406e-01, -7.589e-01, 8.071e-01, -2.497e-01, -1.097e-01, -2.655e-01, -1.087e-01)); + r += mul(s3_2, M4(7.331e-03, -2.768e-02, 1.184e-01, 1.065e-01, 1.451e-01, -4.143e-02, -1.411e-01, 1.550e-01, 6.200e-01, 1.024e+00, -7.276e-02, 8.515e-01, 1.175e-01, 1.839e-01, -2.676e-01, -1.548e-02)); + r += mul(s3_3, M4(6.424e-01, 2.821e-02, 5.957e-01, 1.530e-01, 1.276e-01, -2.298e-01, -1.210e-01, 1.548e-01, 7.219e-01, 1.262e-01, 9.240e-01, 3.466e-01, -2.720e-01, -5.621e-02, -4.165e-02, 1.123e-01)); + r += mul(s3_4, M4(-4.418e-01, 5.566e-01, 6.313e-03, -4.863e-01, -4.189e-01, -1.606e-01, 2.613e-01, -4.014e-01, -4.206e-01, -4.682e-01, -3.779e-01, 5.871e-01, 2.516e-01, -5.022e-01, 2.759e-01, 1.313e-01)); + r += mul(s3_5, M4(-3.682e-02, -2.897e-01, 2.019e-01, 3.667e-03, -1.666e-01, 2.953e-02, -1.446e-01, -6.928e-02, -1.346e-01, -6.624e-02, -1.522e-02, 8.483e-01, 3.168e-01, 1.247e-01, -6.584e-02, -8.030e-02)); + r += mul(s3_6, M4(1.670e-01, -4.091e-02, 1.881e-01, 1.276e-01, -1.441e-01, 1.636e-01, -1.322e-01, 8.716e-02, 9.290e-02, 3.344e-01, 2.737e-01, -4.509e-01, -1.560e-01, 8.292e-02, -5.494e-02, 2.625e-02)); + r += mul(s3_7, M4(6.209e-02, -8.711e-01, -2.422e-02, -1.158e-02, 1.969e-01, -4.698e-01, -1.016e-01, -1.333e-01, 5.883e-01, -9.214e-02, 1.058e+00, 2.119e-01, 3.611e-01, 2.443e-01, 1.390e-01, 3.819e-01)); + r += mul(s3_8, M4(-1.502e-01, -5.064e-01, 8.467e-02, -2.262e-02, 2.649e-01, 1.169e-01, -5.726e-02, 1.341e-01, 2.349e-01, 7.791e-01, 6.799e-02, 7.027e-02, -5.221e-02, 1.357e-01, 1.382e-01, -3.748e-02)); + r += V4(-4.129e-03, -4.378e-02, 1.747e-02, 3.267e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(7.766e-02, 4.523e-03, -1.029e-01, 3.952e-02, 1.207e-01, -3.205e-02, -1.516e-01, -4.993e-02, -2.106e-01, 5.000e-01, 2.802e-01, -1.797e-01, 1.157e-02, 4.642e-01, -1.830e-01, 9.749e-03)); + r += mul(s0_1, M4(5.684e-02, -1.916e-01, 1.998e-02, 8.908e-02, -1.295e-01, 1.184e-01, 3.992e-02, -3.736e-01, 5.210e-01, -1.915e-01, -1.448e-02, -3.299e-01, 1.325e-01, 5.057e-01, 1.203e-01, 1.044e-01)); + r += mul(s0_2, M4(-7.759e-02, 1.978e-01, 2.185e-01, -1.370e-01, 5.627e-02, 3.739e-02, 1.866e-02, 4.120e-02, 1.212e-01, -2.967e-01, -4.022e-02, -1.058e-01, 9.193e-02, -6.720e-02, -8.270e-02, -3.294e-02)); + r += mul(s0_3, M4(-1.156e-01, -3.838e-01, -1.304e-01, 1.556e-01, 2.972e-01, -3.797e-02, -8.053e-02, 1.932e-01, -8.240e-01, 8.246e-01, 6.350e-01, -4.248e-01, 1.592e-03, -2.154e-01, 3.861e-01, -8.019e-02)); + r += mul(s0_4, M4(1.134e-01, 3.460e-01, -1.323e-01, 1.038e-01, 3.182e-01, 5.052e-02, -2.690e-01, 4.359e-01, -5.274e-01, -1.360e+00, 5.110e-01, -8.171e-01, -4.418e-02, 3.662e-01, -4.468e-01, -8.695e-02)); + r += mul(s0_5, M4(-5.947e-02, 6.545e-02, 7.860e-02, 1.032e-01, 3.691e-01, -1.867e-02, -6.669e-02, -1.166e-01, 4.599e-01, -6.066e-01, -1.696e+00, -7.313e-01, -2.449e-01, -8.068e-02, 1.883e-03, -1.603e-01)); + r += mul(s0_6, M4(1.844e-01, -8.927e-02, 2.002e-02, -4.609e-04, -2.650e-01, 7.654e-02, -1.074e-01, -6.939e-02, 3.150e-01, -2.191e-01, 4.955e-01, -1.906e-02, -1.020e-01, -4.901e-02, -1.017e-01, -2.561e-01)); + r += mul(s0_7, M4(-8.724e-02, -1.306e-01, 4.306e-02, -1.211e-01, -6.401e-02, -5.717e-02, 1.212e-01, 2.038e-01, -7.175e-02, -3.181e-01, 3.271e-01, 6.668e-02, -6.730e-02, 1.929e-01, 2.173e-01, 1.680e-01)); + r += mul(s0_8, M4(1.231e-02, 5.079e-02, 1.005e-01, 8.730e-02, 5.038e-02, 9.516e-02, 2.623e-01, 7.546e-02, -2.105e-02, 2.273e-02, -5.382e-02, 9.471e-02, 6.055e-02, 2.890e-02, 5.401e-02, 1.443e-02)); + r += mul(s1_0, M4(4.897e-01, 4.202e-01, -8.175e-01, 2.352e-01, -1.649e-01, 1.083e-01, 1.852e-01, 2.373e-02, 1.422e-02, -5.769e-02, -3.236e-02, -4.748e-02, 1.910e-01, -1.096e-01, 3.096e-02, 1.352e-01)); + r += mul(s1_1, M4(4.412e-01, -6.797e-01, -1.467e-01, -6.489e-01, -7.817e-03, 5.137e-03, 1.653e-01, 2.525e-01, 1.898e-01, -1.662e-02, -7.295e-03, 1.482e-01, 2.308e-01, -2.795e-01, -7.044e-02, -2.044e-01)); + r += mul(s1_2, M4(2.954e-01, 7.991e-03, -3.515e-02, 5.353e-01, 3.823e-01, 1.324e-01, -1.496e-01, 2.410e-01, 1.570e-01, -2.524e-02, -9.177e-02, 6.568e-02, 1.920e-01, 1.506e-02, -4.787e-02, -1.636e-01)); + r += mul(s1_3, M4(-2.704e-01, 3.949e-02, -1.541e-01, -3.039e-01, -5.590e-01, -4.014e-01, 3.342e-01, 6.506e-02, -1.872e-01, 1.954e-01, -1.813e-02, 5.556e-02, 8.577e-02, 1.206e-01, 9.298e-02, 4.025e-02)); + r += mul(s1_4, M4(-5.198e-01, 7.954e-01, -1.264e-01, -7.453e-01, -1.635e-01, -1.080e-01, 6.140e-01, -3.089e-01, -9.613e-02, -3.290e-01, 1.678e-01, -7.862e-02, 4.744e-02, -8.617e-02, -3.783e-01, -5.302e-02)); + r += mul(s1_5, M4(-3.316e-01, -9.648e-02, 3.075e-01, -8.622e-02, -2.702e-02, 2.573e-01, 4.803e-02, 2.886e-01, 1.101e-01, -5.590e-02, -2.187e-01, 4.012e-02, -6.776e-02, -2.531e-02, 1.187e-01, 1.914e-01)); + r += mul(s1_6, M4(1.292e-01, 7.338e-01, -1.035e+00, -3.613e-01, -1.081e-01, 5.978e-02, 1.108e-01, 1.432e-01, -7.837e-02, 7.895e-02, 1.205e-02, -5.820e-02, 9.230e-02, -2.629e-01, 1.338e-01, 1.457e-02)); + r += mul(s1_7, M4(1.799e-01, -2.778e-01, 2.910e-01, -3.160e-01, 1.320e-01, -2.639e-01, 1.126e-01, -4.365e-01, -1.801e-01, 4.184e-02, 1.418e-01, 1.234e-01, -1.337e-01, -8.716e-02, 6.319e-02, -4.482e-02)); + r += mul(s1_8, M4(-1.452e-01, 9.640e-02, 2.817e-01, 1.741e-01, -1.779e-01, 2.690e-02, 3.388e-02, 1.115e-02, -1.610e-01, -4.691e-02, -1.704e-01, -2.261e-01, 7.297e-02, 5.985e-02, 1.668e-01, 1.194e-02)); + r += mul(s2_0, M4(1.225e-01, -1.822e-01, -7.392e-02, 7.928e-02, -9.254e-02, 6.534e-03, 1.812e-01, 6.578e-02, 6.421e-02, -6.216e-02, -1.401e-01, -1.043e-01, -3.504e-01, -6.918e-02, 1.484e-01, 1.426e-01)); + r += mul(s2_1, M4(4.606e-02, -1.660e-02, -1.607e-01, -4.997e-02, 1.538e-01, 1.111e-02, 2.039e-01, -1.516e-02, -5.033e-02, -5.356e-02, 9.295e-02, -2.669e-01, -1.850e-01, 7.764e-02, 5.003e-02, -4.675e-02)); + r += mul(s2_2, M4(5.484e-02, 2.393e-02, 6.362e-03, 1.969e-01, 3.522e-01, -3.946e-02, -2.846e-01, 3.291e-01, 2.223e-01, 1.481e-01, -1.728e-03, 6.660e-02, -4.053e-02, -6.892e-02, 4.244e-02, 2.214e-01)); + r += mul(s2_3, M4(-3.490e-01, -9.302e-02, 2.349e-01, -2.202e-01, -4.376e-01, -1.789e-01, 1.899e-01, 5.442e-02, -1.084e-01, 3.997e-02, -1.352e-01, 1.549e-01, 8.160e-02, 2.716e-01, 2.301e-02, 1.718e-01)); + r += mul(s2_4, M4(3.561e-02, 5.771e-02, -6.185e-01, -7.735e-02, -4.608e-01, -4.666e-02, 6.230e-01, -1.867e-01, -4.888e-02, 1.459e-01, -2.126e-01, -1.254e-01, -3.141e-01, -7.960e-02, 2.873e-01, 2.510e-02)); + r += mul(s2_5, M4(1.481e-01, 1.674e-02, -1.207e-01, 2.081e-01, -1.548e-01, 1.376e-01, 4.633e-02, 2.706e-01, 2.653e-02, 8.545e-02, 1.186e-01, 1.113e-01, -5.347e-02, -7.582e-03, 1.496e-01, -1.793e-02)); + r += mul(s2_6, M4(-1.792e-01, 1.004e-01, 4.906e-02, -1.008e-01, 1.174e-01, -5.349e-02, 7.906e-02, 1.349e-01, -4.414e-02, -5.315e-02, -1.098e-03, 9.865e-02, 8.420e-02, -9.036e-02, -1.430e-01, -1.643e-01)); + r += mul(s2_7, M4(-4.867e-01, 1.544e-01, 4.995e-01, 8.678e-02, -1.243e-01, -9.942e-02, 3.613e-01, -2.650e-01, 8.803e-02, 6.569e-02, -1.275e-01, -6.514e-02, -2.140e-02, 2.578e-04, -6.209e-02, -1.538e-01)); + r += mul(s2_8, M4(-4.464e-01, 9.445e-02, -8.204e-02, -1.032e-01, 8.754e-04, 2.704e-02, 2.153e-02, 2.418e-02, 6.872e-02, -8.452e-02, -3.028e-02, 6.132e-02, 7.423e-02, -1.119e-01, -1.669e-02, 3.309e-02)); + r += mul(s3_0, M4(8.549e-02, 4.522e-02, 1.895e-02, -1.330e-01, 2.075e-01, -1.273e-01, -2.992e-02, -3.167e-02, -4.418e-01, -4.636e-01, -3.327e-01, 1.122e+00, -2.823e-01, -4.644e-01, 8.801e-02, 1.657e-01)); + r += mul(s3_1, M4(1.285e-01, -2.622e-02, -6.361e-02, -1.547e-01, 2.056e-01, 2.397e-01, -1.266e-03, -5.679e-01, 1.275e-01, -1.047e+00, 2.648e-01, 2.793e+00, -1.654e-01, 1.219e-01, 8.593e-02, -2.056e-01)); + r += mul(s3_2, M4(3.391e-02, 5.880e-02, -1.770e-03, 1.562e-01, -1.247e-04, 3.836e-03, -3.243e-02, -1.212e-01, -3.804e-01, -6.416e-01, 1.051e+00, 3.382e-01, -1.993e-01, 1.436e-01, 7.728e-02, -1.012e-01)); + r += mul(s3_3, M4(-1.693e-01, -2.344e-01, 1.774e-01, 8.301e-02, 1.180e-01, -2.529e-01, -1.157e-01, -1.074e-01, 4.684e-02, 1.431e-01, 3.555e-01, 9.063e-01, 3.541e-02, 1.555e-01, -2.294e-01, 3.541e-01)); + r += mul(s3_4, M4(6.790e-01, 2.978e-03, -5.333e-01, -7.826e-02, 4.578e-02, -7.635e-02, -6.772e-01, -2.766e-02, -5.894e-02, 4.245e-02, -7.763e-01, -1.412e+00, -1.240e-01, -3.159e-01, 4.795e-01, 2.910e-01)); + r += mul(s3_5, M4(1.609e-01, 4.055e-02, 1.846e-02, -3.977e-02, 2.659e-01, 1.511e-02, -2.099e-01, -4.121e-01, 4.071e-01, 4.634e-01, 2.884e-01, 9.777e-01, -1.012e-01, 5.495e-02, 1.572e-01, -3.293e-01)); + r += mul(s3_6, M4(-8.705e-02, -1.337e-01, -1.727e-01, -9.255e-02, 1.578e-02, -2.723e-02, -1.492e-01, -1.372e-01, -2.315e-01, 5.413e-01, 4.250e-01, 4.866e-01, -1.362e-01, -1.312e-01, -1.257e-01, 1.292e-01)); + r += mul(s3_7, M4(-1.415e-01, -5.995e-02, 1.151e-01, 1.606e-01, -3.936e-01, 2.124e-01, -9.745e-02, 3.440e-01, -5.216e-01, -3.266e-01, -5.189e-01, -7.262e-01, 1.290e-02, 9.216e-02, 1.121e-01, 1.337e-01)); + r += mul(s3_8, M4(2.335e-02, -4.137e-02, -2.396e-02, -4.996e-02, 2.455e-01, 8.565e-02, 2.153e-01, 6.100e-02, 6.328e-01, -8.356e-01, 1.118e-01, -2.496e-01, -3.962e-02, -7.803e-02, -1.017e-01, -1.817e-01)); + r += V4(-2.496e-02, -1.212e-01, -1.419e-02, 2.482e-02); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.773e-02, -4.916e-02, 1.204e-01, -2.493e-02, 6.937e-01, 2.135e-01, 1.818e-01, -3.537e-01, 1.529e-01, 6.021e-02, 1.170e-01, -1.189e-01, 1.189e-01, -9.206e-03, 1.108e-01, 3.896e-02)); + r += mul(s0_1, M4(-1.620e-01, -1.064e-01, -2.412e-01, -2.743e-01, 2.700e-01, 2.419e-01, -6.450e-02, -2.533e-01, -3.513e-01, -1.194e-01, -3.858e-01, -6.738e-01, -1.275e-03, 1.531e-02, -6.212e-02, 6.180e-02)); + r += mul(s0_2, M4(-1.407e-01, -2.679e-01, -1.344e-01, 6.411e-02, -1.541e-01, 1.592e-01, 7.251e-02, 2.381e-02, 2.503e-01, 1.899e-01, 1.577e-02, 1.255e-01, -6.473e-02, -1.242e-01, -2.175e-03, 4.059e-02)); + r += mul(s0_3, M4(-1.168e-01, 7.074e-03, -6.052e-02, -8.601e-02, -1.645e-01, 5.184e-02, 1.711e-01, 2.114e-01, 5.484e-02, -3.865e-02, 6.780e-02, 1.577e-02, -2.114e-02, 8.978e-02, 3.936e-02, 1.152e-01)); + r += mul(s0_4, M4(4.902e-01, 1.460e-01, 1.333e-01, 1.720e-01, -8.563e-02, -3.756e-01, -2.379e-01, 5.975e-02, 2.295e-01, -1.261e-01, 6.562e-02, -4.971e-01, 7.983e-02, 1.276e-01, -4.241e-02, -1.105e-01)); + r += mul(s0_5, M4(3.213e-01, -5.127e-01, -9.751e-02, -8.480e-02, 9.720e-02, 6.087e-02, 5.854e-02, -1.636e-01, -4.187e-02, 1.622e-01, 1.296e-01, 1.646e-01, -2.426e-01, -7.077e-02, -7.194e-02, -1.413e-02)); + r += mul(s0_6, M4(5.106e-02, 1.813e-02, -1.207e-01, 1.479e-01, 7.105e-02, -1.572e-02, 2.355e-01, 3.700e-02, -2.041e-01, 1.066e-01, -1.663e-02, 5.370e-02, -1.855e-01, 2.424e-03, -1.153e-02, 3.895e-02)); + r += mul(s0_7, M4(3.740e-02, 2.510e-01, 4.747e-02, -1.610e-02, -1.276e-01, 1.675e-02, -1.122e-01, -7.178e-02, -3.253e-01, -1.639e-01, -1.312e-01, 6.179e-02, 1.135e-01, 2.657e-02, -1.152e-01, -9.269e-02)); + r += mul(s0_8, M4(-3.307e-02, 1.119e-01, -1.452e-02, 1.724e-01, -5.484e-02, 2.508e-03, -3.591e-02, 6.865e-02, 1.889e-01, 1.723e-01, 1.635e-01, 1.258e-01, 1.857e-02, 1.273e-01, 1.566e-01, -1.652e-02)); + r += mul(s1_0, M4(-2.277e-01, -1.423e-01, 1.949e-01, 4.971e-02, -3.095e-01, -7.076e-02, -1.716e-01, -3.408e-01, 1.357e-01, 1.048e-01, -3.445e-02, -6.842e-02, 2.769e-01, 1.989e-02, 8.616e-02, 2.153e-01)); + r += mul(s1_1, M4(-2.882e-01, -3.427e-01, 9.095e-02, 7.691e-01, 1.701e-01, 3.928e-01, -3.796e-01, -4.772e-01, 2.376e-01, 1.403e-01, 1.900e-01, 4.017e-02, 1.762e-01, 7.482e-02, -3.929e-03, 2.218e-03)); + r += mul(s1_2, M4(-2.240e-01, -3.791e-01, 4.275e-02, 1.327e-01, -1.085e-01, 2.639e-02, 1.546e-01, -2.289e-02, -6.099e-01, -1.290e-01, -1.267e-01, 5.642e-02, -1.649e-01, -2.882e-01, 1.917e-02, 1.338e-01)); + r += mul(s1_3, M4(2.616e-01, 8.183e-03, -1.014e-02, -1.328e-01, 3.280e-01, 1.409e-01, -3.935e-01, -5.285e-02, -1.023e-02, -7.158e-02, 4.159e-03, -4.778e-02, -5.562e-01, 3.113e-02, 4.077e-02, -2.240e-01)); + r += mul(s1_4, M4(4.202e-02, 9.718e-02, -9.851e-02, -7.479e-01, 3.624e-02, -4.618e-01, -2.312e-01, 1.559e-01, -6.908e-01, 1.833e-01, -2.083e-01, 6.335e-02, -2.836e-02, 2.194e-01, 1.387e-01, -2.445e-01)); + r += mul(s1_5, M4(1.029e-01, -3.759e-01, 2.112e-02, -3.595e-02, 4.997e-03, -8.256e-02, 1.003e-01, -1.616e-01, 2.521e-01, 7.646e-02, -1.807e-01, 8.995e-02, -4.928e-01, -4.602e-02, -1.609e-01, 8.111e-02)); + r += mul(s1_6, M4(2.841e-01, 7.997e-02, -1.299e-01, -3.545e-02, -3.290e-02, 4.075e-04, 1.653e-01, 3.074e-02, 6.276e-02, 5.983e-02, -3.728e-02, -6.306e-02, -1.742e-01, -1.482e-01, -3.579e-01, 6.636e-02)); + r += mul(s1_7, M4(1.981e-01, 1.679e-01, -6.883e-02, -2.588e-01, -2.647e-01, 8.313e-02, -5.174e-02, 1.209e-01, -2.284e-01, -1.392e-01, -1.328e-01, 3.198e-02, 1.279e-01, -4.582e-02, -2.727e-01, 6.927e-02)); + r += mul(s1_8, M4(1.525e-01, -2.406e-01, 8.315e-02, 7.265e-03, -2.063e-03, -9.131e-02, 1.110e-03, 3.074e-02, 1.791e-01, 5.909e-02, -8.521e-02, -6.441e-02, 2.074e-01, 7.446e-02, 1.452e-01, 7.400e-02)); + r += mul(s2_0, M4(1.997e-02, 2.843e-01, 8.649e-02, 9.571e-02, -1.781e-01, -9.968e-03, -2.890e-02, -7.883e-02, 2.019e-02, -1.216e-02, 1.812e-01, 2.368e-01, 6.113e-02, 3.176e-02, 7.946e-02, 1.075e-01)); + r += mul(s2_1, M4(1.032e-01, -5.896e-01, 1.941e-01, 3.837e-01, 1.188e-01, 8.780e-02, 3.861e-02, -1.744e-01, 4.878e-02, -7.646e-02, 2.619e-01, 1.448e-01, 5.215e-02, 4.933e-02, -6.758e-02, 8.912e-02)); + r += mul(s2_2, M4(-2.291e-01, -1.530e-01, 1.102e-01, 5.480e-02, 4.077e-01, -8.378e-03, -7.649e-02, -6.372e-02, 2.670e-01, 1.017e-01, 2.667e-01, -8.936e-03, -1.644e-02, -4.544e-02, -6.324e-02, -5.411e-02)); + r += mul(s2_3, M4(-1.113e-01, 7.794e-02, -2.500e-01, 2.054e-01, 7.857e-02, 3.525e-02, -6.270e-02, 1.187e-01, 1.222e-01, 3.236e-02, -2.597e-01, -1.654e-01, 2.588e-02, -2.900e-01, -8.871e-02, -1.734e-02)); + r += mul(s2_4, M4(7.606e-01, -5.853e-01, 1.359e-01, -1.206e-01, -2.826e-01, 2.765e-01, 2.347e-01, 3.174e-01, -2.738e-01, -6.352e-02, 1.287e-01, -2.197e-01, -4.855e-01, -1.005e-02, -1.929e-01, -3.095e-02)); + r += mul(s2_5, M4(2.727e-01, -2.414e-01, 1.192e-01, -2.436e-01, 1.855e-01, -9.626e-02, -1.515e-01, 1.784e-01, -1.328e-02, 3.473e-03, 1.846e-01, -1.311e-01, 2.775e-01, 1.262e-01, 8.677e-03, 5.321e-02)); + r += mul(s2_6, M4(3.195e-01, 1.377e-02, -1.920e-01, -9.662e-02, 2.030e-01, 1.059e-02, -1.692e-01, -1.024e-02, -3.002e-02, -7.144e-03, -1.783e-01, -5.337e-02, 1.894e-01, 5.339e-02, 2.365e-02, -9.065e-02)); + r += mul(s2_7, M4(-3.501e-01, 1.304e-01, -3.536e-01, -7.251e-02, 3.473e-02, 9.994e-02, -2.137e-01, -1.372e-01, -1.165e-02, -6.450e-02, -3.175e-01, -6.435e-02, 1.299e-01, -1.793e-01, 1.847e-01, -5.373e-02)); + r += mul(s2_8, M4(2.007e-01, -2.978e-01, 2.092e-01, -1.322e-01, -6.292e-02, 3.227e-01, -3.444e-01, 7.335e-02, 4.040e-03, -5.816e-02, -1.870e-03, -2.017e-02, -1.157e-01, 1.134e-01, -3.315e-02, 4.837e-02)); + r += mul(s3_0, M4(5.560e-02, 2.772e-02, 8.952e-02, 7.943e-02, -1.587e-01, 1.216e-02, 3.493e-03, 3.803e-02, 2.253e-01, 1.839e-01, 4.247e-02, 1.694e-01, -4.995e-02, 3.428e-01, -1.507e-01, 3.553e-01)); + r += mul(s3_1, M4(1.367e-02, -1.897e-01, -9.217e-02, 8.214e-03, 1.802e-01, 7.154e-02, -6.387e-02, 1.552e-02, -3.173e-01, -1.991e-01, 2.504e-01, 5.033e-01, -3.434e-01, 2.710e-03, 7.297e-02, 5.751e-01)); + r += mul(s3_2, M4(-1.310e-02, 2.518e-01, -8.101e-03, -5.719e-02, 1.436e-01, -1.783e-01, -6.461e-02, -4.487e-02, 4.401e-02, -1.240e-02, 1.103e-01, 2.310e-01, 4.486e-02, 5.592e-02, -6.506e-02, -1.333e-01)); + r += mul(s3_3, M4(9.502e-02, 5.695e-02, -1.551e-01, 1.754e-02, 2.585e-02, 3.478e-02, -1.198e-01, 1.182e-01, -1.117e-01, 1.009e-01, -2.105e-01, 1.057e-01, 2.102e-01, 1.993e-01, -3.809e-02, -8.390e-02)); + r += mul(s3_4, M4(-3.507e-01, -2.784e-01, -2.989e-02, -2.785e-01, 4.284e-02, -1.155e-01, 1.934e-01, -1.492e-01, 5.998e-01, -1.442e-01, -9.265e-02, -1.053e-01, -2.690e-01, -2.468e-01, -2.995e-01, -3.690e-02)); + r += mul(s3_5, M4(5.049e-02, 1.616e-01, 1.367e-01, -2.065e-03, -9.968e-02, -1.378e-01, -1.028e-02, 1.187e-01, -6.145e-02, -3.774e-02, 8.126e-02, -3.003e-01, 4.898e-01, -1.995e-01, -1.431e-01, -2.276e-03)); + r += mul(s3_6, M4(4.055e-01, 1.422e-01, -1.436e-01, 3.012e-02, -6.197e-02, 7.033e-02, 4.149e-02, 8.287e-02, -1.137e-01, -1.588e-01, -5.402e-02, 2.393e-02, 2.222e-01, 1.236e-01, 4.226e-02, 2.098e-01)); + r += mul(s3_7, M4(2.309e-02, -9.689e-02, -8.908e-02, -2.339e-01, 1.829e-01, -8.781e-02, 1.132e-01, -3.135e-01, -2.134e-01, 9.304e-02, -2.253e-01, -9.650e-02, -2.415e-03, -1.840e-01, 1.981e-01, -1.488e-01)); + r += mul(s3_8, M4(-1.466e-01, 1.364e-01, 1.174e-01, 5.666e-02, -1.027e-01, 1.929e-01, 8.108e-02, 5.678e-02, -1.460e-01, 8.134e-02, -1.315e-01, 6.725e-02, 1.327e-01, 1.911e-01, -2.188e-01, 7.731e-02)); + r += V4(2.298e-02, -1.923e-02, -2.658e-02, 1.368e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.130e-01, 1.214e-01, -1.108e-02, 1.352e-01, 6.377e-02, -1.713e-01, 1.967e-02, -1.919e-01, 1.150e-02, -9.545e-04, 1.293e-01, 6.316e-02, 1.290e-02, -8.948e-02, 6.572e-02, 1.822e-02)); + r += mul(s0_1, M4(2.092e-01, 2.239e-01, -8.298e-02, 2.838e-01, -2.186e-01, 2.432e-01, -1.698e-01, 1.926e-01, -4.677e-01, -1.034e-02, -1.518e-01, 1.623e-02, -1.795e-02, 5.413e-02, -5.460e-02, 2.335e-02)); + r += mul(s0_2, M4(2.608e-01, 1.430e-01, -1.394e-02, 7.100e-02, 6.597e-02, 8.825e-02, 7.601e-02, -1.882e-02, -1.082e-01, 7.205e-02, 1.003e-01, -1.760e-01, 2.237e-01, -9.588e-02, -1.699e-02, 4.950e-02)); + r += mul(s0_3, M4(-1.373e-01, -1.715e-01, -1.798e-01, 7.263e-02, 1.256e-02, -2.401e-01, -1.070e-01, -1.565e-01, 1.507e-01, 1.169e-01, 4.934e-02, 1.995e-02, -8.259e-02, 2.094e-01, -6.138e-02, -7.254e-02)); + r += mul(s0_4, M4(-2.767e-01, -4.899e-01, -7.727e-01, -1.497e-01, -3.017e-03, -3.284e-02, 2.596e-01, -4.209e-01, -1.953e-01, 1.452e-01, 7.560e-03, -3.027e-01, -1.194e-01, 1.329e-01, -1.687e-01, 1.195e-01)); + r += mul(s0_5, M4(-1.280e-01, -3.473e-01, -9.935e-02, -1.362e-01, 7.692e-02, 1.451e-01, 1.568e-02, 6.818e-02, 6.812e-02, -3.212e-01, -6.089e-03, -2.132e-01, -1.721e-01, -1.182e-01, -5.266e-02, -1.555e-01)); + r += mul(s0_6, M4(-3.717e-02, -4.738e-02, -2.076e-01, 4.773e-02, -8.280e-02, -9.293e-02, -6.792e-02, -8.102e-02, 7.954e-02, 2.675e-02, 1.215e-02, 3.023e-02, -7.328e-03, -7.478e-02, 7.216e-02, 2.117e-01)); + r += mul(s0_7, M4(-1.612e-01, -3.211e-01, -2.805e-01, -5.244e-01, -5.702e-02, -6.611e-03, -5.634e-02, 1.504e-01, 1.134e-01, -6.816e-02, -3.059e-02, 2.929e-02, 8.199e-02, -1.340e-01, -5.846e-02, -1.619e-01)); + r += mul(s0_8, M4(-6.973e-02, -5.321e-01, 1.599e-02, -6.657e-02, 5.820e-02, 3.689e-02, 3.444e-02, -6.053e-02, 2.894e-02, -9.890e-02, 9.278e-02, -1.179e-01, -5.478e-02, -1.127e-01, -8.066e-03, 9.613e-03)); + r += mul(s1_0, M4(-4.428e-02, 6.076e-02, -4.348e-02, 9.205e-02, -3.087e-02, -2.955e-02, -1.082e-01, -1.677e-01, 6.964e-02, -6.678e-02, 1.186e-02, 4.676e-02, 1.122e-01, 2.608e-02, -7.710e-02, 1.314e-02)); + r += mul(s1_1, M4(3.823e-02, 9.601e-02, -3.015e-02, 1.601e-01, -3.331e-01, 4.881e-02, 1.877e-01, 4.381e-02, -4.786e-02, -6.752e-02, -1.870e-01, -1.172e-01, 3.151e-01, -1.060e-01, -2.141e-01, 1.811e-02)); + r += mul(s1_2, M4(1.288e-01, 7.224e-02, 1.647e-01, 9.426e-02, -1.369e-01, -3.410e-02, 2.160e-02, -2.060e-02, 1.585e-01, 1.380e-01, -1.537e-01, 1.532e-01, 2.734e-01, 4.569e-03, -7.428e-02, 4.233e-02)); + r += mul(s1_3, M4(1.178e-01, -2.371e-02, 1.479e-01, -2.215e-01, 1.109e-01, -1.197e-01, -1.628e-01, -4.200e-01, -1.274e-01, -3.016e-02, -2.777e-02, -7.318e-02, 1.257e-01, 3.237e-01, 2.290e-01, 1.030e-01)); + r += mul(s1_4, M4(4.789e-01, 4.644e-02, -1.997e-01, -7.073e-01, 4.108e-02, -2.901e-01, 3.818e-01, -4.996e-01, -1.879e-01, 5.111e-02, -1.602e-01, 4.706e-02, -7.003e-01, -1.329e-01, -9.032e-01, 1.350e-01)); + r += mul(s1_5, M4(4.773e-01, 1.039e-01, 8.412e-02, 1.490e-01, 3.035e-02, 1.400e-01, -6.338e-02, 8.991e-02, 4.904e-01, 3.650e-01, -3.968e-02, 3.972e-01, -2.319e-01, -4.904e-02, -5.767e-01, -2.348e-02)); + r += mul(s1_6, M4(-7.230e-02, 6.087e-02, -2.064e-02, -6.899e-03, -1.217e-01, -1.687e-01, -1.857e-01, -1.239e-01, -6.371e-02, 6.974e-02, 1.337e-02, 8.767e-02, 2.289e-01, 4.729e-01, 9.679e-02, 1.118e-01)); + r += mul(s1_7, M4(-5.391e-02, 4.743e-02, -4.716e-02, 1.767e-01, -4.998e-02, -2.546e-01, 8.780e-02, 4.767e-02, -7.362e-03, -6.664e-02, 1.749e-01, 7.709e-02, -2.150e-01, -8.501e-02, -3.625e-01, -1.570e-01)); + r += mul(s1_8, M4(1.795e-01, -1.680e-01, 5.013e-02, 8.052e-02, 4.902e-02, 8.731e-03, 1.408e-02, 3.582e-02, -8.009e-02, 3.017e-02, -2.784e-02, 8.367e-02, 1.287e-01, 8.771e-02, -3.202e-02, 2.532e-02)); + r += mul(s2_0, M4(-1.419e-02, -1.381e-01, 1.070e-02, 1.267e-01, 5.872e-02, 8.846e-02, 1.327e-01, 3.797e-02, -9.500e-02, -4.932e-02, -1.433e-02, -5.332e-02, -9.478e-02, -4.424e-02, 1.445e-02, 1.336e-01)); + r += mul(s2_1, M4(-2.619e-01, 2.202e-01, 8.033e-02, 3.468e-01, 3.048e-01, -1.002e-02, -1.701e-01, -6.405e-02, 3.158e-02, -1.062e-01, 1.464e-01, 1.837e-01, 7.958e-02, 5.367e-03, -9.772e-02, 5.937e-02)); + r += mul(s2_2, M4(4.148e-01, -9.892e-02, -1.164e-01, 9.342e-02, -2.268e-01, 3.121e-02, -1.576e-01, -4.941e-02, -3.639e-01, -6.088e-02, -2.668e-02, -3.922e-02, 2.593e-02, 9.331e-02, 8.824e-02, 6.521e-03)); + r += mul(s2_3, M4(-2.006e-01, -6.678e-02, 3.283e-01, 4.849e-01, 1.016e-01, -1.125e-01, -1.296e-01, -1.680e-01, 2.772e-01, 1.909e-01, 3.458e-01, -1.367e-01, 5.993e-02, 2.428e-01, 1.871e-02, -1.480e-01)); + r += mul(s2_4, M4(-1.486e-02, 3.916e-01, 1.216e-01, -4.918e-01, -5.480e-02, 1.275e-01, -1.182e-01, 5.374e-01, -6.052e-02, 3.231e-01, 3.526e-02, -1.833e-01, -6.420e-02, -1.273e-03, -1.800e-01, 7.652e-02)); + r += mul(s2_5, M4(-2.349e-01, 7.653e-02, 8.108e-02, 1.898e-01, -1.317e-01, -2.513e-01, -2.790e-02, -3.601e-02, 2.290e-01, -1.461e-01, 8.279e-02, 1.899e-01, -4.883e-02, 8.442e-02, -5.347e-02, -4.900e-02)); + r += mul(s2_6, M4(-3.134e-01, 8.996e-02, 3.181e-02, -1.457e-01, -9.578e-02, 1.116e-01, -6.355e-02, -1.488e-01, -2.890e-02, 2.076e-02, 6.027e-02, 1.788e-02, 1.403e-01, 8.653e-02, 1.352e-01, -6.965e-02)); + r += mul(s2_7, M4(2.188e-01, 2.762e-01, 8.939e-02, 2.626e-01, 2.829e-01, 1.395e-02, -2.333e-02, -3.416e-02, 1.208e-02, 5.859e-02, 2.510e-01, 3.831e-01, -1.118e-01, -6.555e-03, -4.403e-02, -4.370e-02)); + r += mul(s2_8, M4(1.361e-02, 1.759e-01, -6.858e-02, 1.548e-01, -9.035e-03, -1.969e-01, -1.406e-01, 4.979e-02, 1.587e-02, -1.111e-01, -1.971e-02, 3.073e-02, 5.107e-02, 1.063e-02, -1.163e-02, 4.328e-02)); + r += mul(s3_0, M4(-3.105e-02, -3.297e-03, -1.346e-02, 3.279e-02, -6.446e-02, -8.300e-03, 1.195e-02, 1.704e-02, -2.825e-02, -7.198e-02, -1.241e-01, -5.453e-03, -1.010e-01, -2.661e-01, -1.093e-01, -4.753e-01)); + r += mul(s3_1, M4(6.416e-02, 3.290e-03, -6.664e-02, 3.536e-02, 1.340e-01, 6.209e-02, 1.751e-02, 3.663e-02, 1.215e-01, 2.067e-01, 2.157e-02, 2.474e-01, 8.811e-02, 3.345e-01, -1.319e-01, 4.883e-01)); + r += mul(s3_2, M4(-2.049e-01, 3.166e-02, 2.001e-02, 1.423e-04, 6.755e-02, 2.682e-02, -1.911e-02, 5.750e-02, 2.807e-02, -8.508e-02, 9.625e-03, -4.121e-02, 5.264e-01, -1.940e-01, 1.523e-02, 1.072e-01)); + r += mul(s3_3, M4(-3.709e-02, -2.069e-02, 2.408e-01, 2.808e-02, 3.065e-02, -1.316e-01, -4.231e-02, -9.350e-02, 8.105e-02, -1.048e-01, 1.708e-01, -8.174e-02, 3.743e-01, -3.742e-02, 2.901e-01, 7.091e-01)); + r += mul(s3_4, M4(-1.409e-01, -2.233e-02, 2.452e-02, -2.700e-01, -2.514e-01, 1.795e-01, 1.342e-01, 7.064e-01, -3.364e-01, 1.200e-01, -4.678e-01, -1.900e-01, -5.496e-01, 2.325e-01, -2.933e-02, 6.220e-01)); + r += mul(s3_5, M4(-1.505e-01, 1.194e-01, -1.962e-02, 8.578e-02, -1.999e-01, -1.274e-01, -1.435e-01, -2.373e-01, 3.485e-01, -1.336e-01, 2.627e-01, 1.902e-02, -4.103e-01, -2.210e-01, 4.443e-01, -5.294e-01)); + r += mul(s3_6, M4(1.998e-01, 6.277e-02, -3.951e-02, 5.659e-02, -1.930e-02, -4.212e-02, -5.306e-02, -1.381e-01, -1.701e-01, 1.967e-01, 1.383e-02, -2.463e-01, 2.810e-01, 2.186e-01, 2.522e-01, -5.045e-01)); + r += mul(s3_7, M4(2.436e-01, 1.870e-01, 8.677e-03, -4.233e-02, 4.271e-01, 2.146e-01, 8.672e-02, -2.921e-01, 1.262e-03, -2.255e-01, 9.194e-03, 1.431e-01, 4.044e-02, 4.419e-01, -1.759e-01, 1.003e-01)); + r += mul(s3_8, M4(-3.728e-02, -2.172e-01, -1.118e-01, 7.946e-02, -4.604e-02, 2.679e-02, 2.423e-02, -5.371e-03, 1.024e-01, -7.889e-02, -3.896e-02, -8.837e-03, 9.574e-02, -3.400e-01, 1.938e-01, -4.385e-01)); + r += V4(2.654e-02, 3.773e-02, -2.198e-02, -1.098e-02); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-9.930e-02, -8.715e-02, -9.548e-02, -1.855e-02, 1.095e-01, 2.065e-01, 1.197e-01, -1.570e-02, 1.574e-01, 1.091e-01, -6.599e-02, 4.981e-03, -1.202e-02, -1.267e-02, -3.307e-02, -4.164e-02)); + r += mul(s0_1, M4(-5.548e-02, -8.579e-02, -1.219e-01, -2.839e-03, 8.177e-02, 3.298e-02, 2.258e-02, -9.106e-02, 1.195e-01, -8.941e-03, 2.294e-02, 3.104e-02, 5.574e-02, 9.188e-02, 3.466e-02, -3.615e-02)); + r += mul(s0_2, M4(-6.549e-02, -9.322e-02, 1.286e-02, -6.585e-03, -9.047e-02, -4.453e-02, -4.657e-02, 4.226e-02, 1.639e-01, 2.342e-01, -1.120e-02, -4.821e-02, -1.004e-01, -2.352e-03, -2.815e-02, 3.063e-02)); + r += mul(s0_3, M4(2.220e-02, 5.185e-03, 1.132e-01, 1.700e-01, 1.671e-01, -1.331e-03, -4.262e-02, -5.368e-01, -2.847e-01, -6.449e-02, -1.573e-01, -1.291e-01, 1.065e-01, -3.527e-02, 2.175e-02, -9.759e-02)); + r += mul(s0_4, M4(-6.274e-02, 5.046e-02, -7.344e-02, -1.713e-02, 3.599e-02, -1.940e-01, 4.244e-02, 5.161e-02, -2.285e-01, 2.347e-01, -1.516e-01, 4.668e-02, 5.899e-02, 4.766e-01, 2.878e-02, -1.491e-01)); + r += mul(s0_5, M4(1.430e-02, -1.483e-01, 1.337e-01, 5.809e-02, -1.794e-02, 3.310e-02, -6.586e-02, -2.245e-02, 4.123e-01, -5.310e-02, 2.135e-01, 1.468e-02, -2.746e-01, 6.546e-02, -2.512e-01, 6.142e-02)); + r += mul(s0_6, M4(4.337e-02, -2.250e-02, 1.685e-01, 5.046e-02, -4.996e-01, -8.999e-02, -3.918e-01, -1.998e-02, -5.723e-02, -1.890e-01, -7.521e-02, -1.420e-02, 2.147e-01, -7.020e-02, -1.031e-01, 7.691e-02)); + r += mul(s0_7, M4(6.315e-04, -9.858e-02, -1.540e-01, 2.171e-02, 3.620e-01, -1.191e-01, 1.217e-01, 8.591e-02, 2.791e-02, 9.235e-02, -2.327e-02, -9.034e-03, 2.280e-01, 4.347e-01, -3.150e-01, 8.650e-02)); + r += mul(s0_8, M4(-2.714e-02, -2.215e-01, 1.516e-01, -3.802e-02, -1.612e-02, -5.077e-02, -1.311e-01, 2.074e-02, 2.457e-02, 7.442e-02, 1.970e-03, 1.698e-02, -2.027e-01, 6.232e-02, -2.108e-01, -1.618e-02)); + r += mul(s1_0, M4(1.029e-01, 5.622e-02, -1.224e-01, 6.814e-02, 1.126e-01, 1.469e-01, 3.430e-02, -2.006e-02, 9.965e-03, 1.500e-01, 3.369e-01, 4.728e-02, -1.169e-01, 7.054e-02, -3.382e-02, 3.463e-02)); + r += mul(s1_1, M4(1.890e-04, 2.958e-01, -9.510e-02, 6.847e-02, 1.335e-01, 1.174e-01, -4.600e-02, -1.080e-01, 1.446e-01, 1.380e-01, -1.213e-03, 5.221e-02, -7.797e-03, -7.192e-02, -2.139e-02, -1.013e-01)); + r += mul(s1_2, M4(-1.966e-02, -7.701e-02, -1.022e-01, -2.180e-02, -1.363e-02, -4.144e-02, -1.094e-01, 1.956e-02, 4.011e-02, -1.149e-01, 9.707e-02, -5.067e-02, -1.240e-01, -4.850e-02, 5.535e-04, -7.421e-02)); + r += mul(s1_3, M4(-1.511e-01, 2.479e-01, -2.632e-02, 2.342e-01, 1.382e-01, 2.093e-01, -2.834e-01, -4.066e-01, -1.914e-01, 6.572e-02, 2.396e-03, -2.234e-01, 7.510e-02, 7.839e-02, -4.164e-02, 3.453e-02)); + r += mul(s1_4, M4(-2.032e-01, 4.775e-01, -3.155e-01, 4.759e-02, -3.128e-03, 7.992e-02, -1.580e-01, 1.370e-01, 7.606e-02, -2.428e-01, -3.431e-01, 8.765e-02, 1.948e-01, 3.910e-02, 1.793e-01, -2.253e-01)); + r += mul(s1_5, M4(-6.497e-02, 3.718e-01, -1.607e-01, 1.969e-01, 1.130e-01, 1.543e-01, -6.874e-02, 3.665e-02, -2.082e-02, -1.314e-01, 1.726e-01, -3.860e-02, -1.390e-01, -6.533e-02, 1.400e-02, -1.440e-02)); + r += mul(s1_6, M4(2.533e-02, 4.261e-01, -4.830e-02, -3.112e-02, -4.698e-01, 1.500e-01, -3.778e-01, -4.040e-02, 1.883e-01, -4.829e-02, -1.404e-01, 1.089e-01, -7.179e-02, -2.045e-01, -9.545e-02, 1.307e-01)); + r += mul(s1_7, M4(1.579e-01, 5.817e-01, -3.106e-01, 2.197e-01, 1.791e-01, -5.029e-02, -2.680e-01, 8.439e-02, -3.268e-02, 3.793e-01, 4.710e-02, -7.101e-02, -4.163e-01, -5.762e-01, 6.999e-02, -3.126e-02)); + r += mul(s1_8, M4(-2.019e-01, 2.995e-01, -1.985e-01, 3.372e-02, -1.814e-03, -8.885e-02, -1.580e-01, 5.131e-02, -3.058e-01, 1.882e-01, -1.935e-01, -9.108e-04, -1.722e-01, -1.703e-01, 2.751e-02, -1.436e-01)); + r += mul(s2_0, M4(-1.572e-01, 7.107e-02, 1.546e-01, 7.852e-02, 2.804e-01, 9.207e-02, 1.863e-01, -2.990e-02, 7.031e-02, 7.487e-03, -2.383e-01, 9.415e-02, -4.405e-02, -2.135e-01, 4.289e-02, -1.969e-02)); + r += mul(s2_1, M4(1.009e-01, 6.349e-02, -1.495e-03, 1.668e-02, -3.615e-02, -1.591e-01, -2.520e-01, 6.259e-03, 6.579e-02, -1.644e-01, -5.543e-02, -5.906e-02, -9.854e-02, 8.397e-02, 8.320e-02, -8.373e-02)); + r += mul(s2_2, M4(-3.516e-02, 1.545e-03, 3.351e-03, -3.178e-02, 1.191e-01, -9.185e-02, 2.679e-01, 3.951e-02, 1.385e-01, -8.283e-02, -4.421e-02, 6.449e-02, -1.340e-01, -8.220e-02, -1.149e-02, -3.884e-02)); + r += mul(s2_3, M4(-9.423e-02, -5.891e-02, 2.833e-01, 1.868e-01, 5.074e-02, 1.226e-01, -1.307e-01, -2.509e-01, -1.022e-01, 2.094e-02, 1.032e-01, 9.668e-02, 1.411e-01, -6.280e-02, 1.329e-02, 8.977e-02)); + r += mul(s2_4, M4(1.336e-01, -6.825e-02, 9.910e-02, 6.299e-03, -3.647e-01, -1.297e-01, -5.259e-02, -2.980e-01, 4.552e-01, 1.724e-01, 2.430e-01, -4.053e-01, 6.858e-02, 2.330e-01, -1.802e-01, -2.499e-01)); + r += mul(s2_5, M4(2.282e-01, -5.794e-03, 6.329e-02, -1.208e-01, 2.021e-01, -2.298e-02, 1.093e-01, 6.128e-02, -5.259e-02, 1.390e-01, -1.171e-01, 1.162e-01, -1.808e-01, 2.376e-01, -3.242e-01, -4.872e-03)); + r += mul(s2_6, M4(9.993e-03, 2.233e-01, 2.358e-01, 7.060e-02, -2.864e-02, -7.382e-02, -2.750e-01, 1.675e-01, -1.010e-01, 6.265e-02, 3.681e-02, -4.289e-02, -4.836e-03, 3.679e-02, -8.175e-02, -1.268e-02)); + r += mul(s2_7, M4(-4.403e-01, 1.954e-01, 1.783e-01, -2.415e-03, 2.802e-02, -1.300e-01, -1.612e-01, -1.006e-01, 3.618e-02, -1.846e-01, 2.350e-02, 1.274e-01, -6.638e-02, 9.782e-02, -6.339e-02, 2.921e-02)); + r += mul(s2_8, M4(-3.557e-02, 3.254e-02, -1.660e-02, -8.243e-02, 1.413e-01, 9.227e-02, -2.222e-02, 1.104e-01, 1.241e-01, 2.385e-01, -7.126e-02, 1.685e-01, -2.223e-02, 1.654e-01, -1.238e-01, 2.770e-02)); + r += mul(s3_0, M4(3.766e-02, -1.169e-01, -1.768e-01, 1.940e-02, -1.264e-01, 6.004e-02, 7.806e-02, -5.408e-02, -5.266e-02, 1.241e-01, 5.232e-02, 5.166e-02, -1.548e-01, -2.214e-02, -1.899e-02, 3.862e-02)); + r += mul(s3_1, M4(1.318e-01, -1.692e-02, -2.739e-02, 8.390e-02, -2.753e-01, 9.796e-02, -1.220e-01, 3.006e-02, 7.237e-02, -1.026e-01, -4.912e-02, -5.414e-02, 1.599e-02, 1.103e-02, 2.587e-01, -1.008e-01)); + r += mul(s3_2, M4(-2.140e-02, -1.934e-02, 2.338e-02, 3.088e-02, 3.095e-02, -1.189e-01, 1.848e-01, -1.983e-02, 1.788e-02, -8.243e-02, -4.502e-03, -3.665e-02, 5.258e-02, -9.368e-02, 8.604e-02, -6.375e-02)); + r += mul(s3_3, M4(3.559e-02, -4.663e-01, -1.866e-01, 9.592e-02, 1.656e-01, -2.107e-02, 1.781e-01, -1.324e-01, -1.047e-01, 4.386e-02, 3.693e-02, 6.419e-02, 3.781e-02, -1.108e-01, 3.440e-01, 1.116e-01)); + r += mul(s3_4, M4(9.007e-02, -1.986e-01, -2.803e-02, 1.300e-01, -2.704e-02, 4.319e-02, 3.338e-02, -9.694e-02, 3.277e-01, -2.424e-02, 3.779e-01, -5.205e-01, 1.618e-01, 1.366e-01, 8.522e-02, -2.863e-01)); + r += mul(s3_5, M4(1.929e-01, 3.554e-02, -8.457e-03, 6.138e-02, -1.672e-02, -1.217e-01, 1.997e-01, -8.514e-02, -6.749e-02, 5.450e-02, 7.215e-03, -7.587e-02, -3.529e-02, -1.021e-02, 5.863e-02, 2.202e-02)); + r += mul(s3_6, M4(1.631e-01, -1.250e-01, 7.616e-03, 1.028e-01, -1.353e-01, -3.640e-01, -1.947e-01, -9.438e-02, -5.227e-02, -1.033e-01, -1.453e-02, -1.795e-02, 1.948e-01, -7.952e-02, 1.281e-01, -1.553e-02)); + r += mul(s3_7, M4(-4.316e-01, 6.337e-02, -6.498e-02, 7.992e-04, 1.663e-01, -1.837e-01, 1.463e-01, -8.458e-02, -9.212e-02, -6.334e-02, -1.500e-01, 1.331e-01, -8.807e-02, 1.654e-01, 7.674e-03, -3.696e-03)); + r += mul(s3_8, M4(8.960e-02, 1.206e-02, -1.317e-01, 1.738e-02, -1.972e-02, 7.104e-02, 9.549e-03, 3.120e-04, -5.724e-02, -1.955e-02, -9.568e-02, 1.298e-01, -1.468e-01, 9.693e-02, 1.818e-02, -3.696e-02)); + r += V4(-5.503e-03, -1.971e-02, 4.307e-02, 2.403e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-8.596e-02, -5.285e-02, 5.158e-02, 8.907e-02, 4.899e-02, -2.187e-02, -3.438e-02, -1.706e-01, -7.313e-02, -6.446e-03, -1.219e-01, 5.467e-02, 1.417e-02, 8.520e-02, 6.738e-02, -3.482e-02)); + r += mul(s0_1, M4(3.106e-02, -2.627e-03, 1.389e-02, 3.474e-02, 3.618e-02, -1.077e-01, -4.858e-02, -8.000e-02, 1.019e-01, -9.408e-02, -1.601e-01, 1.003e-01, -7.328e-03, 1.167e-01, 1.367e-01, -1.538e-01)); + r += mul(s0_2, M4(-1.290e-01, -7.244e-02, 4.047e-04, 5.332e-02, -5.096e-02, -6.015e-02, 4.490e-02, -4.033e-02, 1.587e-01, -6.248e-02, -2.208e-01, -5.304e-02, 5.434e-02, 1.597e-01, 3.131e-02, -3.238e-02)); + r += mul(s0_3, M4(-3.771e-02, -8.350e-02, -8.219e-02, 2.164e-02, -5.266e-02, 1.337e-01, 5.790e-02, -3.579e-02, 1.371e-01, 7.213e-02, 2.308e-01, 1.958e-01, -9.141e-02, 1.143e-01, 7.658e-02, -4.488e-02)); + r += mul(s0_4, M4(1.631e-02, -2.806e-02, 1.665e-01, -1.948e-02, -1.633e-01, -2.538e-03, -1.970e-01, 2.337e-02, 9.817e-02, 3.921e-01, -1.388e-01, 4.121e-02, 8.042e-03, -5.681e-02, 2.117e-01, -8.773e-02)); + r += mul(s0_5, M4(-1.510e-01, -7.396e-02, 1.387e-01, 1.461e-01, -1.826e-01, -2.693e-02, 9.142e-02, 9.284e-02, -1.825e-01, -9.509e-02, 6.374e-02, -1.388e-01, -8.189e-02, 1.223e-01, -1.750e-02, 1.406e-01)); + r += mul(s0_6, M4(1.135e-02, -9.048e-02, -1.223e-01, 2.888e-02, 1.880e-01, 1.797e-01, 4.345e-01, -8.939e-02, -1.525e-01, 5.877e-02, -9.220e-02, 6.612e-02, 2.258e-02, 1.177e-01, -8.183e-02, 8.373e-03)); + r += mul(s0_7, M4(1.696e-01, 1.752e-01, -6.929e-02, 1.645e-01, 8.546e-02, 9.341e-02, 1.696e-01, -3.056e-02, -7.792e-02, 8.699e-02, 1.695e-01, -3.911e-03, -2.366e-01, 8.806e-02, -4.111e-01, -1.648e-02)); + r += mul(s0_8, M4(5.684e-02, -8.478e-02, -1.300e-01, 9.553e-02, -1.272e-02, -9.817e-03, 5.397e-02, 8.448e-02, -1.665e-02, 1.940e-02, -1.479e-01, -1.231e-01, -1.558e-01, 4.587e-02, 9.477e-02, 1.733e-01)); + r += mul(s1_0, M4(4.928e-02, 1.630e-01, 3.530e-02, -2.609e-01, 2.080e-02, 2.517e-02, -5.341e-02, 2.393e-02, 2.070e-01, -7.999e-02, -5.407e-02, 9.927e-02, 1.005e-01, -1.452e-01, -1.293e-01, 1.179e-03)); + r += mul(s1_1, M4(4.045e-01, 1.635e-01, -7.885e-02, -2.496e-01, 8.646e-02, -9.838e-02, -6.499e-02, -5.604e-02, -1.479e-01, 8.245e-02, 2.084e-01, 3.130e-02, 3.350e-02, -1.592e-01, -3.301e-03, -1.571e-01)); + r += mul(s1_2, M4(1.435e-01, -1.269e-01, 4.380e-02, -9.012e-02, 5.184e-02, 3.579e-02, -8.166e-02, -2.591e-02, 2.181e-01, -2.666e-01, -1.091e-01, 1.673e-01, 6.085e-02, -2.818e-02, 2.975e-02, -1.134e-01)); + r += mul(s1_3, M4(4.309e-02, -3.624e-02, 3.632e-02, -1.492e-01, 1.492e-01, 1.318e-01, -3.694e-02, 2.850e-01, 2.685e-01, -6.430e-02, 1.423e-01, -1.073e-01, 7.645e-04, -1.715e-01, -2.463e-02, 6.346e-02)); + r += mul(s1_4, M4(7.196e-01, -1.132e-01, -4.236e-02, -4.937e-01, 1.175e-01, 4.154e-02, -3.213e-01, 1.145e-02, -2.131e-01, 2.879e-02, -2.353e-01, 1.775e-01, 4.716e-02, 4.443e-02, -1.883e-03, 1.204e-01)); + r += mul(s1_5, M4(4.435e-01, 3.854e-01, -1.692e-01, -1.702e-01, -6.404e-02, 1.060e-01, -2.896e-02, 2.275e-02, 9.162e-03, -2.673e-01, -1.698e-01, -1.770e-01, -1.354e-01, 9.096e-02, -1.099e-01, 3.785e-01)); + r += mul(s1_6, M4(2.284e-01, 1.976e-02, -1.113e-01, -2.744e-01, 1.518e-01, 8.682e-02, 3.085e-01, 1.258e-01, -3.514e-02, 9.450e-02, -4.096e-02, -2.853e-02, -8.595e-03, -3.154e-01, -3.680e-02, 2.172e-01)); + r += mul(s1_7, M4(1.785e-01, 1.978e-01, -1.906e-02, 2.771e-02, 1.501e-01, 2.575e-02, 2.389e-01, 3.010e-01, 1.457e-01, -4.462e-02, -2.300e-01, -1.614e-01, -2.747e-01, -7.497e-02, -4.463e-01, 2.659e-01)); + r += mul(s1_8, M4(3.257e-01, 1.932e-01, -5.289e-03, 3.878e-03, -1.571e-01, 4.645e-03, 7.888e-03, 1.243e-01, 5.842e-02, -2.256e-02, -6.602e-03, -3.731e-02, -7.605e-02, -3.408e-01, -1.688e-01, -5.470e-03)); + r += mul(s2_0, M4(-3.495e-02, -1.928e-02, 4.994e-02, 1.395e-01, -1.112e-01, 1.772e-01, -1.634e-01, -3.699e-02, 1.127e-01, 6.082e-03, -5.189e-02, -4.303e-02, -7.775e-02, -6.178e-02, -7.501e-02, 1.320e-01)); + r += mul(s2_1, M4(-4.850e-02, -1.289e-01, -9.325e-02, -1.643e-02, 2.239e-01, -9.011e-02, -2.566e-02, 5.301e-02, 1.855e-01, -9.348e-02, -1.585e-02, -2.001e-01, -5.371e-02, -5.272e-02, 1.356e-01, 1.703e-01)); + r += mul(s2_2, M4(1.056e-01, -6.602e-02, -3.005e-02, 9.795e-03, 5.049e-02, -9.108e-02, 3.054e-02, -1.752e-01, -1.561e-01, 3.075e-01, 8.300e-02, -8.430e-02, 1.265e-02, -2.776e-01, -5.878e-02, 1.144e-02)); + r += mul(s2_3, M4(1.382e-01, -2.842e-01, -2.699e-02, -2.153e-01, 2.187e-01, 2.651e-01, 1.307e-01, 1.926e-01, 5.445e-02, -2.797e-01, -8.728e-02, 7.571e-02, 6.814e-02, -1.233e-01, -3.357e-02, 1.947e-01)); + r += mul(s2_4, M4(-1.063e-02, 1.618e-01, -1.176e-01, 8.081e-02, -4.808e-02, 2.455e-02, -4.511e-02, -1.988e-01, 7.006e-02, -5.232e-01, 1.190e-01, 1.209e-01, 1.514e-03, -3.750e-02, -3.545e-01, -7.031e-02)); + r += mul(s2_5, M4(2.690e-01, 6.288e-02, -8.282e-02, -5.746e-02, 2.216e-02, 1.134e-01, -2.658e-01, 1.436e-01, -2.117e-01, 4.009e-03, -8.882e-02, -1.534e-02, 7.304e-02, -1.343e-01, 1.021e-01, 1.895e-01)); + r += mul(s2_6, M4(1.037e-03, -1.050e-01, -2.224e-01, -2.244e-01, 4.082e-02, 2.096e-01, 2.538e-01, 1.739e-01, -1.823e-02, 2.410e-02, 2.399e-01, -2.812e-01, 1.423e-01, -9.570e-02, -5.401e-02, -1.056e-02)); + r += mul(s2_7, M4(1.985e-01, -1.232e-02, -2.831e-01, -9.744e-02, -2.427e-01, -1.069e-01, -9.460e-02, 1.293e-01, -9.502e-02, -2.604e-01, 1.861e-01, -1.988e-01, -5.062e-02, -2.880e-02, 2.069e-01, 4.192e-02)); + r += mul(s2_8, M4(1.906e-01, -2.576e-02, -1.644e-01, -8.586e-02, -1.002e-01, 1.869e-02, 8.363e-02, -1.062e-01, -3.330e-01, 5.037e-02, 1.923e-01, -6.374e-02, -2.618e-02, -3.867e-02, 2.036e-01, 1.382e-01)); + r += mul(s3_0, M4(-1.223e-01, 4.404e-02, 7.597e-02, 1.037e-01, 5.300e-02, -3.714e-01, 8.780e-02, 1.064e-01, 4.379e-02, 5.496e-02, -5.741e-02, -8.834e-02, 4.607e-02, 2.440e-01, -3.621e-02, -1.449e-01)); + r += mul(s3_1, M4(1.114e-02, -3.579e-03, -1.118e-01, -7.019e-02, 1.253e-01, 1.410e-01, 1.507e-01, 1.204e-01, 4.912e-02, -2.110e-01, -1.393e-01, 1.398e-02, -3.292e-02, 2.104e-01, 1.470e-01, -4.723e-01)); + r += mul(s3_2, M4(-7.613e-02, -3.437e-02, 3.357e-02, 2.053e-02, 2.749e-02, -1.624e-02, 1.085e-01, 4.885e-02, -8.099e-02, -1.709e-01, 1.212e-01, -1.369e-01, 8.667e-03, -2.034e-01, 1.859e-02, -2.609e-01)); + r += mul(s3_3, M4(-2.751e-01, -6.155e-02, 3.194e-02, 2.257e-01, 2.247e-01, -1.563e-01, -2.388e-01, 4.747e-01, 8.550e-02, -2.046e-01, 5.261e-02, 1.302e-02, -2.718e-01, 8.046e-02, 6.971e-03, -4.401e-02)); + r += mul(s3_4, M4(-2.097e-01, 4.087e-02, 1.145e-01, 3.885e-01, -2.739e-01, 4.656e-01, 1.839e-01, -4.552e-02, 1.422e-01, -8.299e-02, 1.631e-01, 8.810e-02, -2.652e-01, 3.779e-01, -1.959e-01, -7.052e-01)); + r += mul(s3_5, M4(2.480e-03, 2.248e-01, 9.405e-02, 1.128e-01, 1.134e-01, -7.725e-02, -7.705e-02, 4.144e-02, 1.185e-02, -1.614e-01, -6.596e-02, 1.191e-01, 1.946e-01, 6.701e-02, 4.365e-02, -2.465e-01)); + r += mul(s3_6, M4(-2.183e-01, 6.265e-03, -2.004e-02, 1.360e-01, 9.385e-02, -6.143e-03, -3.924e-02, 2.857e-01, -1.091e-01, 5.286e-02, -2.204e-03, -4.854e-02, -2.588e-01, -8.500e-02, -7.099e-02, -3.542e-02)); + r += mul(s3_7, M4(-2.584e-01, -1.560e-01, 3.706e-02, 8.145e-03, -4.860e-02, -6.664e-02, -2.890e-01, 9.694e-02, 6.597e-02, -1.415e-01, -9.524e-02, 4.655e-03, -1.705e-01, 1.622e-01, 2.766e-01, -1.033e-01)); + r += mul(s3_8, M4(1.214e-02, 6.487e-02, -1.258e-03, 1.522e-01, 8.112e-02, -1.089e-02, -2.167e-02, -1.711e-01, -1.753e-01, -7.256e-02, 8.314e-02, 6.349e-02, 1.489e-01, 4.952e-02, 1.646e-01, -7.355e-02)); + r += V4(-1.384e-02, -2.911e-02, -8.125e-03, 2.809e-03); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 5 +//!DESC conv4 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-9.839e-02, 3.062e-02, -3.104e-02, 1.230e-01, -3.336e-02, -6.990e-02, -5.725e-02, -1.051e-01, 5.154e-02, -1.304e-01, -2.518e-02, 3.975e-03, -2.458e-02, -4.391e-02, -1.022e-01, 1.249e-01)); + r += mul(s0_1, M4(1.530e-01, 1.379e-01, -4.847e-02, -8.142e-02, -1.899e-01, -9.819e-03, -2.943e-02, 8.787e-02, 8.270e-02, -7.839e-04, 4.236e-03, 2.756e-03, 1.612e-01, -1.871e-01, -1.973e-01, -8.765e-02)); + r += mul(s0_2, M4(1.230e-01, 7.002e-02, 2.676e-02, -2.607e-01, 3.729e-02, -1.075e-01, -5.825e-02, 9.552e-02, -6.701e-02, 5.642e-02, 5.425e-02, -1.487e-01, 6.427e-02, -1.606e-01, -2.290e-01, 2.280e-01)); + r += mul(s0_3, M4(-1.096e-01, 4.115e-02, -5.260e-02, 9.679e-02, 1.028e-01, -1.253e-01, -1.221e-01, -1.339e-02, 1.119e-01, -8.725e-02, 1.090e-01, -8.990e-02, 1.657e-01, -5.536e-02, 7.055e-02, -6.021e-03)); + r += mul(s0_4, M4(3.182e-01, -1.503e-01, -9.298e-02, 9.546e-02, 2.257e-01, -1.978e-01, -1.203e-01, -5.362e-02, -2.116e-02, -3.685e-02, 6.188e-02, 7.871e-02, 6.547e-01, 3.793e-01, -9.293e-03, -5.081e-01)); + r += mul(s0_5, M4(6.746e-02, -3.872e-02, -8.004e-02, 4.643e-02, -1.157e-01, -1.149e-01, -1.323e-01, -1.319e-02, -3.194e-02, -1.266e-02, -4.837e-02, -6.710e-02, 1.123e-01, -1.855e-01, -9.259e-03, -1.861e-01)); + r += mul(s0_6, M4(-7.725e-02, -4.151e-02, -7.387e-02, 1.089e-01, -5.642e-02, 7.030e-03, 7.847e-02, 1.234e-01, 9.461e-03, -4.967e-02, 1.502e-02, 3.836e-02, 1.171e-02, 3.758e-02, 3.336e-02, 5.168e-02)); + r += mul(s0_7, M4(2.541e-02, 1.428e-01, 2.426e-03, 2.451e-02, -1.436e-01, 5.267e-02, 2.402e-02, -4.748e-02, 2.185e-02, -5.397e-02, -4.221e-02, -4.758e-02, 1.504e-01, 1.818e-01, -3.334e-04, 1.985e-01)); + r += mul(s0_8, M4(5.651e-02, 1.104e-01, 1.186e-01, -4.816e-02, -1.772e-03, -3.797e-02, -1.169e-01, 9.888e-02, -7.066e-02, -2.928e-02, 1.300e-01, -9.514e-02, 8.911e-02, 1.399e-01, 8.996e-02, 7.361e-02)); + r += mul(s1_0, M4(-4.999e-02, 7.076e-02, 1.754e-01, 2.998e-01, -5.965e-02, 8.642e-02, 6.403e-02, 1.597e-02, -1.367e-02, -1.751e-01, -1.432e-01, -1.337e-01, -1.356e-01, -1.147e-02, 4.994e-02, 1.355e-01)); + r += mul(s1_1, M4(-1.462e-01, 2.612e-01, 1.009e-01, 6.179e-02, 1.171e-01, -2.310e-01, -1.324e-01, -1.989e-01, -7.842e-02, 2.017e-01, -2.558e-02, 1.725e-01, -1.879e-01, 2.705e-01, 9.276e-02, 2.009e-01)); + r += mul(s1_2, M4(-5.937e-02, 1.314e-02, 1.771e-01, -8.971e-02, 9.120e-02, -8.247e-03, 6.149e-02, -1.310e-02, 1.856e-01, -8.410e-02, 3.653e-02, 1.994e-02, 1.377e-02, -2.608e-02, 2.918e-02, 1.410e-01)); + r += mul(s1_3, M4(-1.270e-01, 2.306e-01, 1.223e-01, 5.712e-02, 6.862e-02, -1.441e-02, -1.116e-02, -1.195e-01, 4.816e-02, -9.698e-03, -1.642e-01, 2.214e-01, -1.748e-01, -1.574e-01, 7.939e-02, 4.879e-02)); + r += mul(s1_4, M4(-6.803e-02, -2.231e-01, -9.303e-02, 3.363e-01, 4.699e-01, -6.197e-01, -1.678e-01, 1.660e-02, -3.526e-01, -2.941e-01, 4.667e-01, -3.648e-01, -5.144e-01, 7.678e-01, 4.839e-01, -5.598e-01)); + r += mul(s1_5, M4(-7.123e-02, -2.319e-01, -3.813e-02, 1.547e-01, 2.632e-01, -2.001e-02, 4.992e-02, -1.581e-01, -4.053e-02, -2.970e-04, 5.472e-02, -6.788e-02, -2.869e-01, -9.887e-02, 4.475e-02, -1.027e-01)); + r += mul(s1_6, M4(7.336e-02, 1.784e-02, -1.559e-01, 7.106e-02, 9.975e-02, -6.405e-03, 7.519e-02, 7.405e-02, -9.000e-02, 8.590e-02, -2.557e-01, -1.597e-02, -4.081e-02, 7.353e-02, 3.324e-02, -5.861e-02)); + r += mul(s1_7, M4(2.380e-02, -1.385e-01, -1.038e-01, 2.752e-02, -4.866e-02, 6.411e-02, 3.531e-01, -1.330e-01, 2.261e-01, -7.266e-02, -5.772e-02, -6.657e-02, -1.588e-01, 1.268e-01, 1.618e-02, 9.695e-02)); + r += mul(s1_8, M4(1.300e-01, -1.683e-01, -1.481e-02, -1.442e-02, 2.942e-02, -4.528e-03, 3.715e-01, -2.056e-01, -1.070e-01, 7.589e-02, -1.326e-01, 9.583e-02, -1.052e-01, 4.535e-02, -5.072e-02, 1.023e-01)); + r += mul(s2_0, M4(4.804e-02, 4.605e-02, 1.479e-01, 2.794e-02, 1.518e-02, 1.881e-02, 5.332e-02, 6.881e-02, -9.226e-02, -1.212e-02, 5.999e-03, 4.409e-02, -2.364e-01, -4.576e-02, 4.537e-02, 1.615e-01)); + r += mul(s2_1, M4(-1.719e-02, 2.187e-01, 1.772e-01, 1.933e-02, -3.135e-02, 4.963e-02, 1.303e-02, -2.623e-02, -1.273e-01, 1.462e-02, 7.118e-02, -1.498e-01, 1.978e-01, 7.777e-02, -1.052e-01, 1.954e-01)); + r += mul(s2_2, M4(-2.936e-02, -1.679e-02, 1.548e-01, 2.521e-02, 4.974e-02, 5.132e-02, 1.145e-02, 4.237e-02, -2.103e-01, 3.828e-03, -3.888e-02, 1.429e-01, -2.964e-02, -1.423e-02, -2.417e-01, 3.932e-01)); + r += mul(s2_3, M4(-1.881e-01, -2.086e-02, 1.815e-04, 1.164e-01, -7.054e-02, 8.297e-02, 7.999e-02, 3.579e-02, -1.240e-02, 5.415e-02, -4.885e-02, -7.243e-02, -7.780e-02, -8.180e-02, 1.529e-01, 1.616e-01)); + r += mul(s2_4, M4(-1.410e-01, 9.917e-02, 2.589e-01, -2.888e-01, -1.248e-01, 2.510e-01, 3.366e-02, 1.437e-01, 3.793e-01, 5.004e-02, 3.701e-01, 3.111e-01, -3.195e-01, -3.731e-02, 1.132e-01, 1.961e-01)); + r += mul(s2_5, M4(-8.966e-02, -5.875e-02, -1.135e-01, 8.984e-02, 5.129e-02, 7.536e-02, 2.763e-01, 1.203e-02, -2.020e-01, 1.617e-01, -1.427e-02, 6.661e-02, -1.930e-01, -7.980e-03, 8.300e-02, 1.697e-01)); + r += mul(s2_6, M4(5.726e-02, -1.777e-02, -4.372e-02, 3.170e-03, 2.430e-02, -1.211e-02, -4.311e-02, -1.215e-01, -1.066e-01, 1.810e-02, 1.013e-01, -2.315e-02, -3.001e-01, -8.701e-02, -1.455e-01, 4.155e-02)); + r += mul(s2_7, M4(1.431e-01, 2.344e-03, 1.489e-02, 4.575e-02, 7.550e-02, 3.571e-02, -2.517e-01, -5.387e-03, 4.699e-02, 5.390e-02, 2.202e-01, -7.897e-02, 6.326e-02, -1.588e-01, -1.852e-01, -3.037e-02)); + r += mul(s2_8, M4(-3.479e-02, -8.214e-02, 6.944e-02, 5.269e-02, -7.719e-03, -7.718e-02, -8.393e-02, -8.959e-02, -2.111e-01, 1.286e-01, 1.948e-01, -8.095e-02, -2.335e-02, 9.535e-02, -1.144e-01, 3.688e-02)); + r += mul(s3_0, M4(8.765e-02, -6.740e-02, 1.200e-01, 6.992e-02, 1.884e-02, -1.592e-02, -9.304e-02, 1.706e-01, 6.023e-02, 1.494e-01, -1.165e-01, 3.848e-02, -2.808e-02, 4.820e-02, -4.830e-02, 9.253e-02)); + r += mul(s3_1, M4(-1.087e-01, 2.039e-01, -8.116e-02, 1.500e-01, 1.942e-01, -2.291e-02, -1.321e-02, 1.630e-02, 2.216e-01, 3.536e-02, -2.321e-01, 1.143e-01, 2.093e-01, -6.126e-02, -3.792e-02, 6.886e-02)); + r += mul(s3_2, M4(1.225e-01, -6.231e-02, -1.999e-01, 9.921e-02, -4.257e-02, -2.975e-02, -4.073e-02, 7.069e-02, -5.033e-02, 2.268e-03, -1.908e-01, 9.678e-02, -4.540e-02, 4.195e-02, -1.990e-02, -9.002e-02)); + r += mul(s3_3, M4(8.620e-02, 1.856e-02, 1.201e-01, 1.343e-01, 6.518e-03, 1.274e-01, 1.006e-01, -4.803e-02, 2.593e-02, 1.861e-01, -2.316e-01, -1.882e-01, 2.021e-02, -5.658e-02, -1.339e-02, -6.240e-02)); + r += mul(s3_4, M4(3.285e-01, -2.068e-02, 4.279e-01, -7.153e-02, 1.368e-02, 2.029e-01, 3.056e-01, -2.398e-02, -2.007e-02, -6.963e-02, 4.423e-01, 2.373e-01, -3.829e-03, -8.114e-02, 1.907e-01, 6.471e-02)); + r += mul(s3_5, M4(6.460e-02, -2.118e-01, -1.407e-01, 8.041e-02, 5.512e-02, 1.740e-01, 2.092e-01, 1.569e-02, 2.071e-02, -5.675e-02, -9.011e-02, 5.301e-02, 9.519e-02, -2.758e-02, -3.116e-02, -2.141e-02)); + r += mul(s3_6, M4(-2.575e-02, 1.869e-02, 7.810e-02, -7.650e-03, 2.013e-01, 2.214e-02, -8.345e-02, 2.879e-02, 1.492e-01, -4.496e-02, -1.297e-01, 2.238e-03, 7.626e-02, -7.449e-02, -5.789e-02, 5.411e-02)); + r += mul(s3_7, M4(5.731e-02, 8.709e-02, 8.776e-02, 1.547e-01, 9.076e-02, -1.621e-01, -3.538e-01, -3.413e-02, 1.609e-01, -6.938e-03, -5.128e-02, 7.688e-02, -5.171e-03, 9.220e-02, 2.424e-02, 1.621e-02)); + r += mul(s3_8, M4(6.538e-02, -3.415e-02, 5.399e-02, -8.863e-02, -7.563e-02, -1.511e-01, -2.978e-01, 7.569e-02, -1.429e-01, 3.110e-02, -2.972e-02, -8.001e-03, -8.429e-02, -1.092e-02, -4.655e-02, 3.067e-02)); + r += V4(-3.137e-02, 6.441e-03, -1.512e-02, 2.709e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-3.334e-02, -7.793e-02, 7.471e-02, 4.792e-02, -9.991e-03, 1.358e-02, 7.063e-02, -7.185e-02, 1.122e-01, 1.016e-01, 1.666e-01, -4.230e-02, -2.916e-03, 1.093e-01, 3.944e-02, -6.470e-02)); + r += mul(s0_1, M4(-3.424e-02, -5.060e-03, -9.649e-03, 1.311e-01, 9.390e-02, -6.332e-03, 8.698e-02, -1.949e-03, 5.118e-02, 4.049e-02, 7.113e-02, 1.951e-01, 1.856e-01, 2.065e-01, -7.642e-02, -1.852e-02)); + r += mul(s0_2, M4(-6.507e-03, -1.195e-02, -1.476e-01, -4.432e-02, 1.839e-02, 2.119e-02, -3.374e-03, -7.739e-03, -4.710e-03, -6.300e-02, -3.058e-02, -1.966e-03, -4.888e-03, 6.550e-02, 3.826e-02, 4.369e-02)); + r += mul(s0_3, M4(-2.796e-03, 6.707e-02, 1.750e-01, 5.898e-02, -6.279e-02, 1.146e-01, 4.500e-02, -1.178e-01, 1.591e-02, 4.649e-03, 1.249e-01, 1.531e-02, 2.940e-02, -3.842e-02, 1.162e-02, 8.735e-02)); + r += mul(s0_4, M4(1.997e-01, 1.499e-01, -2.234e-01, 2.460e-01, -6.721e-02, 2.135e-01, -1.372e-01, -1.864e-01, 1.432e-01, 1.012e-01, -3.441e-02, 7.842e-02, -2.026e-01, 1.294e-01, 3.063e-01, 2.969e-01)); + r += mul(s0_5, M4(1.302e-02, -1.252e-01, -1.174e-01, 2.114e-01, -5.837e-02, -1.377e-01, 4.956e-03, -1.486e-01, -7.287e-03, -1.002e-01, 1.431e-01, -8.579e-02, -2.642e-02, -9.106e-02, -5.609e-02, 3.664e-02)); + r += mul(s0_6, M4(-3.524e-02, 6.427e-02, 5.395e-02, -6.112e-02, -3.246e-02, -3.689e-02, 5.133e-02, 6.382e-02, -7.140e-02, 8.407e-02, -1.051e-02, 1.865e-02, -1.559e-02, 8.857e-02, -5.051e-02, 1.129e-03)); + r += mul(s0_7, M4(-7.399e-02, -1.028e-01, -1.066e-01, 1.430e-01, -1.403e-01, 9.422e-02, -8.995e-02, -1.358e-02, -8.900e-02, 1.519e-01, -7.974e-02, -1.915e-01, 2.726e-02, 2.748e-01, -2.521e-02, 9.171e-02)); + r += mul(s0_8, M4(4.422e-02, -1.013e-01, -9.089e-02, 3.341e-02, -3.750e-02, 1.762e-01, 8.286e-02, -4.945e-02, -1.285e-01, 1.504e-02, -4.042e-02, -7.892e-02, 3.263e-02, -2.562e-02, -1.577e-01, 2.407e-02)); + r += mul(s1_0, M4(-9.014e-02, 9.935e-03, 4.158e-02, 6.269e-02, -3.854e-02, -6.123e-02, 5.101e-02, -1.229e-01, -5.554e-02, -4.848e-02, -5.602e-02, -2.115e-01, -5.692e-02, 9.588e-03, 8.485e-02, 7.810e-02)); + r += mul(s1_1, M4(-5.449e-02, -5.076e-02, 7.205e-02, -5.263e-02, -1.293e-01, 1.127e-01, -1.122e-01, -6.078e-02, 8.158e-02, -2.493e-01, 4.842e-02, 1.959e-01, 5.976e-02, -1.003e-01, -6.969e-02, 1.654e-01)); + r += mul(s1_2, M4(-2.894e-02, 1.048e-01, 3.925e-03, -1.321e-01, -4.509e-02, -1.643e-02, -1.334e-01, 9.845e-02, -1.127e-01, -1.049e-01, 1.350e-02, -1.013e-02, -1.599e-01, -6.552e-02, 3.246e-02, 3.401e-02)); + r += mul(s1_3, M4(1.324e-01, -7.379e-02, 6.628e-02, -1.243e-01, -9.905e-02, 1.141e-01, 5.375e-02, -1.293e-01, -2.330e-02, -3.277e-01, 5.023e-02, -1.837e-01, 6.815e-02, -4.447e-02, 1.480e-01, -2.077e-03)); + r += mul(s1_4, M4(3.168e-01, 3.120e-01, 4.723e-02, -1.814e-01, -4.271e-01, 3.554e-01, -2.607e-01, 3.691e-02, 1.565e-01, -4.785e-01, 1.167e-01, -4.891e-02, -1.643e-01, -2.565e-01, 6.425e-01, -9.561e-02)); + r += mul(s1_5, M4(-4.394e-02, 1.831e-01, 2.602e-02, 8.348e-02, -4.285e-01, -2.126e-01, -2.658e-01, 1.340e-01, -6.773e-02, -1.315e-01, 9.904e-02, -1.672e-01, 1.995e-01, 1.307e-01, 1.985e-01, -8.368e-02)); + r += mul(s1_6, M4(-8.468e-03, 2.030e-01, -8.770e-04, -1.421e-01, -5.327e-02, 1.042e-01, 7.595e-02, -7.467e-02, -2.326e-03, -1.496e-01, -3.598e-02, 1.430e-02, 6.833e-02, 1.257e-01, 5.333e-03, 1.382e-01)); + r += mul(s1_7, M4(-7.236e-02, 2.202e-01, 7.527e-03, -1.157e-01, -4.825e-01, -1.938e-01, -2.971e-01, -7.394e-02, -3.056e-02, 6.438e-02, -8.167e-02, -1.749e-01, 7.804e-02, 3.541e-01, 1.631e-01, 1.105e-01)); + r += mul(s1_8, M4(-3.117e-02, 1.456e-01, 1.217e-02, -9.099e-02, -2.173e-01, -5.803e-02, -1.068e-01, -6.206e-02, -1.187e-04, 9.938e-02, 4.721e-02, -3.500e-02, -7.154e-02, -4.535e-02, -1.900e-02, 6.525e-03)); + r += mul(s2_0, M4(3.437e-02, 3.957e-03, -1.536e-02, 2.984e-02, 8.057e-03, 1.446e-02, -2.851e-02, -1.680e-02, 9.630e-04, -3.550e-02, 4.249e-02, -5.379e-02, -2.274e-02, 3.657e-02, 6.309e-02, -1.412e-01)); + r += mul(s2_1, M4(-4.765e-02, -7.905e-02, -7.251e-02, -8.459e-02, -1.866e-02, 4.668e-02, -2.496e-02, -3.761e-02, 6.520e-02, 9.971e-02, -1.199e-02, -3.565e-02, -2.936e-02, -1.288e-02, -8.909e-03, 1.604e-01)); + r += mul(s2_2, M4(-2.684e-02, 3.571e-02, 2.920e-02, -1.400e-01, -8.129e-03, 2.701e-02, -1.237e-02, 6.292e-02, 3.752e-02, -3.476e-02, 7.166e-02, 2.922e-02, 2.153e-02, -2.079e-02, 2.298e-01, 2.646e-01)); + r += mul(s2_3, M4(5.891e-02, -3.401e-02, 1.310e-01, 1.284e-01, -3.706e-02, -6.929e-03, 5.705e-02, 2.617e-02, 1.908e-03, 1.155e-01, 4.786e-02, 1.471e-01, -8.026e-02, 6.384e-02, 3.177e-01, -6.423e-02)); + r += mul(s2_4, M4(1.941e-01, -1.938e-04, -4.664e-02, 3.134e-01, -7.269e-02, -2.396e-01, -5.068e-02, -3.354e-01, 2.428e-01, 2.566e-01, -3.293e-01, 3.857e-01, -1.129e-02, 1.540e-01, -9.700e-02, -1.510e-01)); + r += mul(s2_5, M4(4.818e-02, 1.620e-01, 1.519e-01, 2.711e-03, -1.559e-02, 1.144e-01, 7.668e-03, -1.534e-01, 3.862e-03, -4.583e-02, 5.226e-02, -6.465e-02, 6.788e-02, 5.557e-02, 6.880e-02, 9.542e-02)); + r += mul(s2_6, M4(1.471e-01, 3.141e-02, 1.711e-02, 3.585e-02, 7.758e-02, 3.487e-03, -4.199e-02, 1.119e-02, -9.641e-02, 7.175e-02, -6.454e-03, -8.769e-02, 4.452e-02, 3.013e-02, 2.107e-01, 1.447e-02)); + r += mul(s2_7, M4(1.941e-01, -1.489e-01, 1.093e-01, 1.092e-02, 1.309e-01, 1.173e-01, 3.414e-02, -1.350e-01, -2.374e-01, -1.292e-01, -1.259e-01, 1.377e-02, -5.705e-02, 1.641e-01, -3.345e-02, -1.443e-01)); + r += mul(s2_8, M4(5.172e-02, 8.306e-03, 6.669e-02, -2.207e-02, -2.737e-02, 7.314e-02, -5.039e-02, -6.565e-02, -1.124e-01, -1.920e-01, -8.356e-02, 2.342e-02, 8.945e-03, 2.739e-02, 8.816e-03, 7.638e-02)); + r += mul(s3_0, M4(-2.968e-02, 1.037e-01, -2.673e-02, 1.869e-02, 9.621e-02, -5.237e-02, 1.487e-03, -9.423e-02, 3.269e-02, -5.264e-02, -4.862e-02, 2.446e-02, 6.554e-02, -8.896e-02, 2.104e-02, -1.165e-01)); + r += mul(s3_1, M4(-9.417e-03, 6.345e-02, -7.850e-03, 2.754e-01, -2.751e-02, -1.201e-01, -5.333e-02, -1.217e-01, 8.778e-02, 1.394e-01, -1.787e-01, 1.017e-01, -3.779e-02, 6.921e-02, 2.664e-02, 1.665e-01)); + r += mul(s3_2, M4(3.141e-02, 7.360e-02, 9.011e-02, -1.109e-02, 1.985e-02, -4.239e-02, 1.468e-01, -2.676e-02, 1.225e-01, -5.115e-02, 3.496e-02, 8.530e-02, 1.594e-02, -2.731e-02, 3.492e-02, -4.212e-02)); + r += mul(s3_3, M4(7.578e-02, 3.966e-03, -7.774e-03, 4.263e-02, 3.758e-02, -1.590e-01, 1.144e-01, -1.083e-01, 9.865e-02, 3.237e-03, -1.194e-01, 3.048e-02, -1.231e-01, 5.687e-02, 3.601e-02, -2.234e-01)); + r += mul(s3_4, M4(3.291e-01, 2.283e-01, -2.254e-01, 1.114e-01, -3.019e-01, -1.677e-01, -1.735e-01, -5.636e-01, 4.295e-01, 1.206e-01, -1.169e-01, -9.182e-02, 2.546e-01, 2.820e-01, -9.122e-02, 1.703e-01)); + r += mul(s3_5, M4(1.178e-01, 1.875e-01, 7.948e-02, -1.907e-02, -9.888e-02, 8.592e-02, 8.759e-02, -2.194e-01, -4.479e-02, 1.983e-02, 1.738e-02, -1.237e-01, -9.644e-02, -6.784e-02, -1.594e-02, 5.128e-02)); + r += mul(s3_6, M4(7.520e-02, 6.732e-02, 1.898e-02, 3.384e-02, 1.156e-01, 7.703e-02, -9.079e-02, 2.659e-02, 2.860e-02, 2.034e-01, -4.492e-02, -5.377e-02, 4.771e-02, 1.099e-01, 2.032e-02, 4.955e-02)); + r += mul(s3_7, M4(-1.573e-02, 1.987e-01, -9.155e-02, 1.445e-01, 2.224e-01, 7.036e-02, 1.126e-01, -2.900e-01, -3.571e-02, 6.868e-02, -5.302e-02, -2.629e-02, -2.798e-01, 1.832e-01, -1.850e-01, -8.079e-02)); + r += mul(s3_8, M4(-1.843e-02, -2.045e-02, -2.602e-02, -6.202e-02, 1.303e-01, 1.797e-01, 1.169e-01, -2.712e-02, -5.147e-02, -5.439e-02, 2.103e-02, -6.035e-02, 4.517e-02, 3.473e-02, 6.015e-02, -9.014e-02)); + r += V4(1.952e-02, -1.174e-02, -1.100e-02, -1.307e-02); + return r; +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 6 +//!DESC conv5 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-6.394e-02, -8.671e-02, 1.570e-02, -4.415e-02, 1.044e-01, 1.031e-01, 2.257e-02, -1.991e-02, -6.187e-02, 6.388e-02, 9.901e-02, 1.943e-02, 3.702e-02, 5.943e-03, 1.052e-02, 5.888e-02)); + r += mul(s0_1, M4(8.377e-02, 1.451e-01, 1.580e-01, 3.663e-02, 1.333e-01, 3.462e-02, 2.649e-02, 7.588e-02, 2.175e-02, 1.744e-01, 1.048e-01, -1.644e-01, -5.844e-02, 1.648e-02, 1.306e-01, -5.721e-02)); + r += mul(s0_2, M4(-7.420e-02, 5.839e-02, -2.164e-02, 2.832e-03, 1.776e-02, -1.732e-01, 1.505e-02, -1.082e-02, -1.647e-02, 3.072e-01, 1.187e-01, -1.273e-02, -9.205e-02, 1.119e-01, -2.918e-02, 9.009e-02)); + r += mul(s0_3, M4(-4.099e-03, -5.330e-02, -6.372e-02, -4.979e-02, -5.943e-02, -3.211e-02, -1.558e-01, 1.997e-01, -5.598e-02, -1.344e-02, 8.655e-02, 1.490e-01, -3.303e-02, -3.863e-02, 1.987e-03, -3.842e-02)); + r += mul(s0_4, M4(9.400e-02, 1.535e-01, 8.863e-02, 1.286e-01, 4.315e-01, -1.199e-01, 1.476e-01, 2.280e-01, -3.763e-02, 1.189e-01, 2.648e-01, 1.310e-01, -2.201e-01, -6.282e-02, 2.480e-01, 3.236e-02)); + r += mul(s0_5, M4(3.101e-02, -7.681e-02, -6.753e-02, 3.084e-02, 8.940e-02, -1.872e-01, -2.982e-02, -8.335e-02, 7.990e-02, 1.128e-01, 2.703e-01, 2.021e-01, -9.396e-02, 4.201e-02, 6.420e-03, -4.602e-02)); + r += mul(s0_6, M4(4.940e-02, -6.328e-03, -1.384e-02, -5.572e-02, 3.489e-02, -3.705e-02, 8.067e-02, -4.078e-02, -7.071e-03, 2.446e-02, 1.633e-02, -1.022e-02, -4.371e-02, 2.045e-02, 5.237e-02, -1.685e-02)); + r += mul(s0_7, M4(2.613e-02, 3.379e-02, -5.088e-02, 6.559e-02, -7.908e-03, -7.828e-02, -1.304e-02, -3.928e-02, 3.253e-02, -8.783e-02, 7.669e-03, -1.617e-01, 2.507e-02, 1.387e-01, 1.676e-01, 6.068e-02)); + r += mul(s0_8, M4(-1.872e-02, 6.736e-03, -2.342e-02, 3.332e-03, 1.419e-02, -3.932e-02, -8.530e-02, -5.934e-02, 8.273e-03, 8.759e-03, 6.867e-02, -3.344e-02, 8.183e-02, 1.472e-01, 3.134e-02, 5.993e-02)); + r += mul(s1_0, M4(6.022e-02, -1.966e-02, -1.612e-02, -6.922e-02, 7.477e-02, 1.176e-01, 1.365e-03, 8.882e-02, -4.594e-02, 7.740e-02, 6.959e-02, 8.633e-02, 3.034e-02, -8.553e-02, -6.847e-02, 2.439e-02)); + r += mul(s1_1, M4(1.861e-01, 1.628e-01, 9.891e-02, 8.686e-02, 1.521e-01, 2.777e-02, -4.557e-02, -5.964e-02, -7.298e-02, 9.138e-02, 9.796e-02, 1.437e-02, -6.912e-02, -1.500e-01, 1.108e-02, 8.537e-02)); + r += mul(s1_2, M4(1.272e-01, 1.039e-01, -1.040e-02, 5.735e-02, -9.561e-03, -7.849e-02, 2.013e-02, 6.743e-02, -1.308e-01, 2.097e-01, -2.586e-02, -2.612e-02, -1.021e-01, -2.057e-01, -3.857e-02, 1.757e-02)); + r += mul(s1_3, M4(8.053e-02, -6.616e-02, -1.770e-02, -1.982e-01, -2.444e-02, 7.095e-02, -3.175e-02, 1.033e-01, -5.762e-02, 3.046e-02, 1.113e-01, -7.795e-02, 7.733e-02, -2.446e-01, -9.412e-02, -1.653e-01)); + r += mul(s1_4, M4(3.202e-01, 3.417e-01, 2.862e-01, 1.982e-01, 2.573e-01, 8.468e-02, 1.595e-01, 4.027e-01, -1.133e-01, 1.141e-01, 3.135e-01, 8.849e-02, -3.401e-01, -4.237e-01, -9.999e-03, 1.012e-01)); + r += mul(s1_5, M4(2.600e-01, 5.980e-02, 8.030e-02, -2.103e-02, -1.986e-02, -5.828e-02, -1.421e-01, -1.034e-01, 4.901e-02, 4.389e-02, 5.034e-02, 1.400e-01, 6.483e-02, -2.524e-01, -5.724e-02, 6.231e-02)); + r += mul(s1_6, M4(1.074e-01, 3.976e-02, 3.519e-02, -4.123e-02, 6.299e-02, 5.759e-02, 6.204e-02, 4.752e-02, 5.410e-02, -4.425e-05, 4.521e-02, -8.902e-02, -5.916e-02, -2.006e-01, -1.372e-01, -1.440e-01)); + r += mul(s1_7, M4(9.065e-02, 2.786e-03, -7.057e-02, -7.517e-03, -4.347e-02, 6.086e-02, -2.635e-02, 4.724e-03, 1.253e-02, 4.647e-03, 2.887e-02, -1.678e-02, -8.265e-02, -6.195e-02, 1.822e-02, 7.331e-02)); + r += mul(s1_8, M4(4.428e-02, 6.706e-02, -4.415e-02, -1.130e-02, -1.310e-02, 4.253e-02, -9.984e-02, -1.037e-02, 3.263e-02, 8.474e-02, 7.599e-02, -3.215e-02, 4.988e-02, -1.178e-01, -1.416e-02, -2.848e-02)); + r += mul(s2_0, M4(8.826e-02, 8.038e-02, 7.920e-02, 9.084e-02, 6.145e-02, 8.284e-03, -5.767e-02, 9.238e-03, -4.508e-03, -8.890e-02, -1.910e-02, -1.507e-01, -7.447e-02, 4.104e-02, -2.612e-02, -8.667e-02)); + r += mul(s2_1, M4(-5.439e-02, 3.984e-01, 5.645e-02, -1.242e-01, -8.338e-02, -1.595e-01, -6.036e-02, 5.165e-02, 2.823e-01, 1.645e-01, 5.408e-02, 1.663e-01, 2.206e-02, -1.240e-01, -7.492e-02, 5.293e-02)); + r += mul(s2_2, M4(1.161e-02, 1.763e-01, 3.373e-02, 7.828e-03, 1.142e-01, 1.688e-01, 1.343e-01, 7.300e-02, 8.175e-02, 1.114e-01, -6.659e-02, 4.685e-02, 2.217e-02, -3.111e-02, 1.022e-01, -5.566e-02)); + r += mul(s2_3, M4(1.668e-02, 1.160e-01, 1.852e-01, 2.139e-01, -5.002e-02, 4.529e-02, -2.762e-02, 1.528e-01, 1.020e-01, -4.068e-02, -5.659e-02, 6.207e-03, 2.146e-02, -7.677e-02, -1.098e-02, -1.738e-01)); + r += mul(s2_4, M4(-3.080e-01, -1.794e-01, 3.752e-01, -1.512e-02, 6.082e-02, -1.906e-01, 4.469e-02, -1.824e-01, 5.373e-01, 2.499e-01, -3.037e-02, 4.862e-01, 2.590e-01, 3.245e-01, 1.882e-01, -5.488e-01)); + r += mul(s2_5, M4(1.734e-01, 1.910e-01, 1.513e-02, 5.352e-02, -6.647e-02, 5.546e-02, 4.049e-02, 1.364e-01, -3.987e-02, -2.656e-01, -1.042e-01, 3.166e-01, -9.156e-02, -1.295e-01, 7.885e-02, 2.314e-02)); + r += mul(s2_6, M4(9.283e-03, 4.879e-02, 7.797e-02, 3.678e-02, 1.426e-02, -4.030e-02, -1.420e-02, 1.217e-03, 3.934e-02, 8.811e-03, -3.593e-03, -9.359e-02, -2.613e-02, -6.882e-02, -1.049e-01, -6.001e-02)); + r += mul(s2_7, M4(-1.466e-01, -2.775e-02, -5.452e-02, -3.920e-02, 3.784e-02, -2.177e-02, 1.765e-03, -5.121e-03, 7.044e-02, -2.550e-02, 4.265e-02, 1.714e-01, 1.928e-02, -1.084e-01, -1.362e-01, -6.649e-02)); + r += mul(s2_8, M4(1.515e-02, 2.673e-02, -6.883e-02, 2.487e-02, -2.320e-02, -8.358e-02, -1.656e-02, -7.216e-03, -4.509e-03, 8.023e-02, 5.342e-02, -6.369e-02, -2.025e-02, 1.406e-02, -5.119e-02, 3.383e-02)); + r += mul(s3_0, M4(-9.426e-02, 2.166e-02, -3.618e-02, 1.094e-01, 2.509e-01, -4.044e-02, 5.655e-02, -3.421e-01, 8.115e-03, -1.962e-01, -2.723e-02, -7.238e-02, -1.999e-02, 4.283e-03, 2.814e-02, -1.090e-02)); + r += mul(s3_1, M4(-1.476e-01, -9.419e-04, -2.845e-01, -3.093e-02, 7.671e-02, 1.561e-01, 2.363e-01, -1.520e-01, -4.572e-02, -7.799e-02, 1.145e-01, 7.141e-02, -1.601e-02, -1.268e-01, -5.096e-02, -1.117e-01)); + r += mul(s3_2, M4(1.561e-01, -1.141e-01, -4.663e-02, -4.326e-02, 9.767e-02, -8.240e-02, 1.372e-01, -3.313e-02, -2.856e-02, 2.021e-01, 4.114e-02, -1.512e-01, -4.197e-03, -1.235e-01, 5.185e-02, -9.070e-02)); + r += mul(s3_3, M4(6.872e-02, 3.742e-02, -2.118e-02, -2.090e-02, -1.240e-02, -2.806e-02, -8.335e-03, 2.746e-02, -8.620e-02, -4.219e-02, 2.922e-02, 3.157e-02, 1.153e-01, -9.716e-02, -4.720e-02, -1.023e-01)); + r += mul(s3_4, M4(-2.349e-01, -4.080e-01, -3.222e-01, -4.920e-01, 1.183e-01, -4.490e-02, 1.509e-01, -7.753e-02, -8.539e-02, 7.595e-02, -1.085e-01, 2.213e-01, 2.203e-01, 4.651e-02, 4.022e-02, -4.619e-01)); + r += mul(s3_5, M4(1.305e-01, -6.924e-02, -2.362e-02, -1.763e-01, -7.495e-02, -5.263e-02, -2.810e-03, 3.534e-03, -4.620e-02, 1.199e-03, 1.183e-01, 7.935e-02, 7.310e-03, -1.576e-01, 5.596e-02, 1.310e-01)); + r += mul(s3_6, M4(1.163e-02, 5.437e-02, 9.811e-02, 8.154e-02, 1.875e-02, -5.344e-02, 1.110e-03, -1.606e-02, 1.176e-02, 3.321e-02, 7.622e-02, -1.049e-01, -4.287e-02, -2.004e-01, -1.664e-01, -1.601e-01)); + r += mul(s3_7, M4(-5.847e-02, 3.982e-02, -9.274e-02, -3.528e-02, 1.357e-02, -8.811e-02, 2.798e-02, 1.610e-02, -8.094e-02, 1.221e-01, 1.545e-01, 1.393e-01, 1.614e-02, -6.596e-03, -9.230e-02, 7.828e-02)); + r += mul(s3_8, M4(1.433e-02, 2.302e-02, 1.756e-03, 4.419e-02, 2.216e-02, -1.352e-01, -2.377e-02, -7.755e-02, -1.540e-01, 6.615e-02, -9.143e-04, -1.092e-01, -5.908e-02, 8.977e-03, -1.693e-01, 1.599e-01)); + r += V4(1.103e-02, -1.627e-02, -1.286e-02, -1.415e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-5.521e-03, -3.227e-02, -8.959e-02, -2.310e-02, -3.505e-02, -1.074e-01, 1.005e-01, -1.186e-02, -2.056e-03, -1.544e-02, 1.421e-01, -8.042e-02, 1.062e-02, -6.143e-02, -4.901e-03, -9.454e-02)); + r += mul(s0_1, M4(3.801e-02, -5.568e-02, 3.259e-02, 6.786e-02, -6.511e-02, 5.286e-02, 1.561e-02, -4.836e-02, -9.207e-02, -1.990e-01, 4.230e-01, 2.050e-02, 4.370e-02, 1.434e-01, -5.622e-02, 6.287e-02)); + r += mul(s0_2, M4(-7.980e-02, 5.040e-02, -2.226e-02, -2.332e-02, -1.077e-02, -1.836e-02, -2.621e-02, -1.471e-02, 2.554e-02, 4.099e-02, 1.355e-01, -1.841e-01, 8.085e-03, -3.333e-02, 8.552e-03, -2.047e-02)); + r += mul(s0_3, M4(6.407e-02, 1.156e-01, -1.245e-01, -4.790e-02, -1.968e-01, -5.708e-02, 5.560e-02, 3.975e-01, -3.533e-02, -1.222e-01, -1.292e-02, -6.471e-02, -4.069e-03, -4.856e-02, 2.772e-02, -2.131e-01)); + r += mul(s0_4, M4(5.784e-03, -4.214e-01, 9.652e-02, 2.624e-01, -4.463e-01, -3.606e-01, -3.593e-02, 2.150e-01, 1.693e-01, 1.568e-01, -4.593e-02, -3.257e-01, 7.479e-02, -8.113e-03, 3.176e-01, 2.823e-01)); + r += mul(s0_5, M4(-4.752e-02, 1.916e-01, -1.042e-01, -1.253e-02, -2.057e-02, -3.405e-02, -5.519e-02, -3.911e-02, 2.241e-01, -1.885e-02, 1.204e-01, 4.924e-02, 2.986e-02, 5.848e-02, -3.441e-02, -7.179e-02)); + r += mul(s0_6, M4(6.014e-02, 4.232e-02, -5.073e-02, -2.059e-02, -4.711e-02, -1.640e-01, 9.222e-02, 2.780e-01, -1.505e-03, -2.975e-02, 4.008e-02, -2.294e-02, 6.812e-02, 5.715e-03, -1.852e-01, -8.323e-02)); + r += mul(s0_7, M4(6.323e-02, -4.207e-02, 2.370e-02, 1.695e-01, -6.565e-02, 1.543e-01, -6.446e-02, -5.231e-02, 8.916e-03, -8.248e-03, 3.969e-02, 9.619e-02, 1.804e-01, -1.653e-01, 1.140e-01, 2.200e-01)); + r += mul(s0_8, M4(8.670e-03, -3.521e-02, -1.453e-01, -9.260e-02, 2.707e-02, 1.519e-01, -8.643e-02, -1.226e-01, 4.304e-02, 7.913e-03, 1.759e-02, 2.059e-01, 1.695e-02, -1.457e-02, 1.015e-01, -5.574e-02)); + r += mul(s1_0, M4(-9.047e-04, -7.526e-02, 3.870e-02, -3.091e-03, -3.185e-02, -3.210e-02, -5.562e-02, 1.604e-01, -4.968e-02, 3.186e-02, 1.647e-01, 4.125e-02, -5.160e-02, -1.243e-02, -4.020e-02, -8.829e-02)); + r += mul(s1_1, M4(2.446e-02, -1.086e-01, 1.076e-01, 1.603e-01, -1.886e-01, 6.601e-02, 2.700e-03, -1.031e-02, -1.411e-01, -1.737e-02, -1.220e-02, 1.722e-01, -9.362e-02, 8.900e-02, 3.836e-02, 2.557e-02)); + r += mul(s1_2, M4(5.523e-02, 1.215e-01, 2.999e-02, -7.301e-02, -1.061e-01, -2.018e-02, 1.581e-03, 3.590e-02, -7.798e-02, -2.899e-02, 8.488e-02, 6.095e-02, -5.692e-03, 4.157e-02, -4.302e-02, 8.434e-02)); + r += mul(s1_3, M4(4.727e-02, -1.131e-01, 1.154e-02, -3.369e-01, -2.170e-02, -8.931e-02, 1.535e-01, 8.759e-02, 4.050e-02, -1.016e-01, 8.183e-02, -1.638e-01, 3.286e-02, -2.050e-01, -9.847e-02, 8.546e-02)); + r += mul(s1_4, M4(8.860e-02, -4.114e-01, 3.215e-01, 3.164e-01, -3.487e-01, -4.634e-01, 1.398e-01, 3.320e-01, 1.261e-01, -1.121e-01, 5.128e-02, 2.666e-01, -4.104e-04, -4.406e-01, -1.674e-01, 2.686e-01)); + r += mul(s1_5, M4(1.196e-01, 7.047e-04, 1.721e-01, 1.966e-02, -3.580e-02, 1.681e-02, -8.139e-02, 1.200e-01, 2.414e-02, 8.583e-02, 5.284e-02, -8.748e-03, -1.313e-01, 8.085e-02, 2.055e-02, -4.308e-02)); + r += mul(s1_6, M4(2.875e-02, -6.798e-02, 3.081e-02, 9.796e-02, -8.332e-03, -1.172e-01, 6.169e-02, 2.643e-02, 7.465e-03, -9.946e-02, 6.580e-02, -9.836e-02, 5.662e-02, 1.801e-02, -8.294e-02, -1.172e-01)); + r += mul(s1_7, M4(5.495e-02, -9.538e-04, -3.587e-02, 1.118e-01, -4.600e-02, 2.166e-01, 6.635e-02, 2.927e-02, 5.700e-02, -5.216e-02, 4.287e-02, -3.235e-02, 2.396e-01, -2.299e-01, -1.761e-01, -9.716e-02)); + r += mul(s1_8, M4(5.748e-02, 2.837e-02, -5.760e-02, 4.236e-02, 2.148e-02, 7.319e-02, -7.836e-02, -1.546e-01, -1.942e-02, 2.458e-02, 1.181e-01, 7.293e-02, -2.915e-02, -2.068e-02, -3.460e-02, -1.232e-01)); + r += mul(s2_0, M4(3.632e-02, -1.866e-02, 2.030e-01, -4.521e-02, 8.081e-02, 8.579e-02, 3.637e-02, -7.211e-02, -7.259e-02, -9.637e-02, -3.099e-02, 8.137e-03, -5.767e-03, 1.281e-02, 2.569e-02, 1.671e-02)); + r += mul(s2_1, M4(1.033e-01, 1.037e-02, 3.472e-01, 3.228e-02, -1.171e-01, 2.538e-02, -5.485e-02, 2.071e-02, 4.456e-02, -1.482e-01, 4.948e-02, 1.478e-01, 1.292e-02, -3.387e-02, 1.172e-02, -5.736e-02)); + r += mul(s2_2, M4(-5.565e-03, 3.590e-02, 5.145e-02, -4.043e-02, -3.857e-02, -1.070e-01, 1.745e-01, 8.157e-02, -2.818e-02, 8.915e-02, 1.199e-01, 1.283e-03, 7.934e-02, -5.920e-02, -1.159e-02, -1.067e-01)); + r += mul(s2_3, M4(4.071e-02, -6.976e-02, 4.140e-02, 1.077e-01, 2.992e-02, 4.067e-02, -3.996e-02, 1.372e-01, 1.240e-02, -1.392e-01, 8.767e-02, -5.693e-02, -5.435e-02, -1.238e-01, 1.790e-02, -1.646e-01)); + r += mul(s2_4, M4(1.929e-01, 2.386e-01, -1.810e-01, 4.484e-01, -2.357e-02, -6.054e-02, -6.168e-02, -2.420e-01, -6.921e-02, -3.946e-01, -2.529e-01, 1.010e-01, 3.271e-01, -6.237e-02, -1.207e-02, -1.547e-01)); + r += mul(s2_5, M4(-1.669e-02, -7.439e-02, 1.247e-01, 8.483e-02, 1.398e-01, -2.795e-02, -8.120e-02, 6.695e-02, 9.151e-02, 3.799e-03, -2.224e-01, 2.380e-01, 9.462e-02, -1.340e-01, -1.219e-01, 4.606e-02)); + r += mul(s2_6, M4(-6.699e-03, 7.497e-02, 7.645e-02, 8.640e-02, -1.486e-02, -3.314e-02, 2.684e-02, -6.462e-02, 3.503e-03, -4.940e-02, 7.803e-02, -4.800e-02, -3.313e-03, -6.215e-02, -1.157e-02, 4.294e-02)); + r += mul(s2_7, M4(5.624e-02, 2.786e-01, -6.348e-02, -8.999e-02, -1.007e-01, 3.742e-03, 1.282e-02, -3.012e-02, 7.205e-02, -4.481e-03, 5.465e-02, -8.988e-02, 5.866e-02, 1.780e-01, -3.760e-01, -2.454e-01)); + r += mul(s2_8, M4(9.374e-03, -7.492e-03, -9.421e-02, -2.475e-02, -2.297e-03, 6.944e-02, 2.745e-02, 7.689e-02, 5.201e-03, 4.984e-02, 1.676e-01, 1.915e-02, 3.006e-03, -9.448e-02, 2.945e-02, 4.559e-02)); + r += mul(s3_0, M4(4.476e-02, 2.980e-01, -1.164e-01, -1.511e-01, 1.188e-01, -3.359e-01, 2.101e-01, -3.195e-01, -2.468e-03, 5.351e-02, -1.329e-01, -1.496e-01, -8.304e-02, 4.541e-02, -8.868e-03, 3.964e-03)); + r += mul(s3_1, M4(-1.505e-01, 2.723e-02, -9.004e-02, 2.151e-01, -3.771e-02, -1.941e-01, 3.681e-01, -2.844e-01, 1.730e-01, 2.429e-02, -2.395e-01, 1.791e-02, -9.777e-02, 5.106e-02, -6.921e-03, -9.913e-02)); + r += mul(s3_2, M4(4.079e-02, 7.965e-02, -1.219e-01, -1.476e-01, -8.922e-02, -1.848e-01, 9.943e-02, -6.997e-02, -6.066e-02, 1.087e-01, 2.364e-02, -1.383e-01, 3.358e-02, -1.140e-01, -1.239e-02, -1.002e-01)); + r += mul(s3_3, M4(-2.038e-02, -2.087e-01, -2.781e-02, 2.218e-01, -4.068e-03, -2.081e-01, -2.048e-01, -1.173e-01, 4.122e-02, 1.402e-01, -1.797e-03, 1.284e-01, -6.767e-02, -6.230e-02, -1.694e-01, 2.256e-02)); + r += mul(s3_4, M4(1.255e-01, 2.984e-01, -5.116e-01, 3.584e-01, -1.813e-01, -3.424e-01, 1.837e-01, -5.376e-03, -2.635e-01, -1.358e-01, -5.520e-01, -4.240e-02, 1.161e-01, -6.542e-02, -1.056e-01, -4.898e-02)); + r += mul(s3_5, M4(-1.399e-02, -5.929e-02, -3.799e-02, 5.016e-02, 6.597e-02, -7.250e-02, -9.893e-02, 1.723e-01, 2.748e-01, 2.064e-02, -1.016e-01, -1.385e-02, -2.072e-02, -2.790e-02, -6.064e-02, -2.727e-02)); + r += mul(s3_6, M4(-1.990e-02, -8.099e-02, 2.959e-02, -9.899e-02, -6.404e-02, -7.691e-02, 1.050e-01, 4.486e-02, 6.109e-02, -5.415e-02, 2.468e-02, -3.233e-02, -3.263e-02, 5.391e-02, -2.056e-01, -7.300e-02)); + r += mul(s3_7, M4(-6.814e-02, 1.238e-01, 3.873e-02, -2.108e-02, -1.586e-01, -2.624e-02, -1.219e-02, -1.521e-01, 1.786e-01, -8.109e-02, 2.714e-02, -8.674e-02, 2.940e-02, -4.832e-02, -1.425e-01, 4.084e-02)); + r += mul(s3_8, M4(2.428e-02, -4.195e-02, -6.436e-02, -1.113e-01, -9.072e-03, 1.545e-02, 4.824e-02, -1.465e-01, -4.214e-02, -1.313e-01, -6.468e-02, 2.691e-02, 2.508e-02, 1.355e-01, -1.529e-01, -2.560e-02)); + r += V4(3.107e-02, -3.014e-02, 6.615e-03, -1.582e-02); + return r; +} + +void Pass6(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 7 +//!DESC conv6 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.404e-02, 2.997e-02, -3.284e-02, -5.164e-02, -8.224e-02, 1.916e-02, -5.887e-02, -1.108e-01, 5.058e-02, -9.101e-02, -2.799e-02, -1.015e-01, -2.853e-03, -5.750e-02, -3.651e-02, 7.911e-02)); + r += mul(s0_1, M4(7.598e-02, -4.379e-03, -1.111e-01, 1.520e-01, 6.809e-02, 2.420e-02, -1.512e-02, -8.763e-02, -5.779e-02, 2.712e-03, 2.095e-01, -1.363e-02, 6.453e-03, -8.446e-02, -4.731e-02, 2.419e-02)); + r += mul(s0_2, M4(7.891e-02, -1.816e-02, 7.892e-02, 8.296e-02, -1.422e-02, 5.360e-02, 8.078e-02, -2.628e-02, -6.979e-03, -2.700e-02, -8.674e-02, 8.292e-02, -1.558e-02, -4.723e-02, -1.121e-01, 6.815e-02)); + r += mul(s0_3, M4(2.828e-02, -4.739e-02, -2.074e-01, -3.196e-01, -3.889e-02, 8.126e-02, 1.927e-01, 2.481e-01, 1.304e-01, -2.371e-02, -3.977e-02, -9.958e-02, -1.919e-03, 3.208e-02, 4.055e-02, -1.030e-01)); + r += mul(s0_4, M4(-1.040e-01, 1.768e-01, -8.376e-02, 4.783e-01, -2.102e-01, -2.036e-01, 2.606e-01, -5.867e-02, 1.840e-01, -9.110e-02, 9.612e-02, 2.183e-01, -1.455e-01, 1.636e-01, 2.425e-01, -2.316e-01)); + r += mul(s0_5, M4(1.393e-01, -7.825e-02, -6.535e-02, 6.115e-02, 7.806e-02, -7.575e-02, 2.447e-03, -1.255e-02, -8.062e-02, -1.768e-02, 1.831e-02, 1.318e-02, -1.194e-01, 1.821e-02, -6.475e-02, 9.178e-02)); + r += mul(s0_6, M4(2.158e-02, 5.031e-02, 5.894e-02, -2.206e-02, 1.642e-02, -4.703e-02, 7.177e-03, 4.832e-02, -6.795e-03, 1.200e-01, 7.350e-02, 4.357e-02, -1.374e-02, -2.273e-02, 5.343e-02, -3.053e-02)); + r += mul(s0_7, M4(-2.053e-03, 2.418e-01, -7.675e-02, 7.563e-02, 1.879e-01, -5.891e-02, 1.363e-02, -5.479e-02, -2.454e-01, 3.717e-02, -6.543e-02, -9.897e-02, 1.506e-01, -1.457e-02, 7.962e-02, 2.634e-02)); + r += mul(s0_8, M4(-4.787e-02, 7.421e-02, 4.307e-02, -1.165e-01, -1.552e-01, -2.697e-02, -9.967e-02, 3.872e-02, -6.328e-02, 1.037e-01, -3.953e-02, 8.554e-02, 4.862e-02, -2.809e-02, -2.267e-02, -1.011e-02)); + r += mul(s1_0, M4(6.553e-02, -3.868e-03, -3.811e-02, 6.105e-03, 8.599e-03, 4.356e-02, 7.104e-02, -1.001e-01, -8.656e-02, -2.863e-02, 4.224e-02, -4.758e-02, 8.669e-04, 2.007e-02, 4.083e-02, 7.794e-02)); + r += mul(s1_1, M4(-2.462e-02, -1.574e-02, -4.985e-03, 1.459e-01, 1.573e-03, 8.120e-02, -1.362e-01, -1.597e-01, -2.973e-02, -2.999e-01, 1.528e-03, 4.741e-01, 9.943e-02, 7.836e-02, 1.532e-02, -4.459e-02)); + r += mul(s1_2, M4(1.434e-02, -2.119e-02, -3.105e-02, -1.281e-02, -1.791e-01, -3.910e-02, 5.519e-02, 1.343e-01, -1.333e-01, -8.837e-02, -1.076e-01, 1.067e-01, -1.577e-02, 6.211e-02, 1.116e-02, -6.399e-02)); + r += mul(s1_3, M4(-1.004e-01, 2.091e-02, -5.478e-02, -1.756e-01, -9.005e-02, 9.037e-02, 1.250e-01, 8.268e-02, 1.398e-01, -1.338e-01, -1.158e-01, -6.447e-02, 4.636e-02, 8.153e-02, 3.763e-02, -5.705e-02)); + r += mul(s1_4, M4(3.115e-01, -3.808e-01, 1.332e-01, 8.542e-02, 1.126e-01, -1.795e-01, 2.202e-01, 1.600e-01, 1.475e-01, 1.186e-01, 1.995e-01, -1.733e-01, -4.463e-01, 5.858e-01, 7.088e-02, -4.531e-01)); + r += mul(s1_5, M4(-5.391e-02, 5.806e-02, 7.177e-02, 2.734e-01, -1.093e-01, -7.192e-02, -2.031e-03, -9.404e-02, -2.599e-02, -2.688e-03, 2.064e-02, 1.819e-01, 8.417e-02, -3.743e-02, -5.253e-02, -2.005e-01)); + r += mul(s1_6, M4(1.709e-02, 5.473e-02, 1.583e-02, -1.412e-01, 3.210e-02, -5.364e-02, -1.279e-02, -4.324e-02, -2.110e-01, 6.215e-02, -7.099e-02, 1.011e-01, -3.931e-03, -3.861e-03, 2.847e-02, 9.082e-02)); + r += mul(s1_7, M4(-2.610e-01, 1.499e-01, -5.311e-02, 1.480e-02, 1.285e-02, -1.043e-01, -1.002e-01, -3.947e-02, -1.508e-01, 1.144e-01, 1.443e-01, -3.456e-02, 1.332e-01, 1.841e-01, 2.120e-01, -6.647e-03)); + r += mul(s1_8, M4(-1.090e-02, -3.561e-02, 3.492e-02, -7.376e-02, -8.500e-02, -4.378e-02, -1.281e-02, 2.313e-02, -2.974e-01, -1.025e-01, -5.699e-02, 2.294e-02, -4.649e-02, 6.585e-02, 7.421e-02, -6.087e-02)); + r += mul(s2_0, M4(9.719e-02, -1.495e-02, 1.597e-01, 6.545e-03, 1.616e-02, -1.136e-03, 4.194e-02, -2.144e-02, -4.504e-02, -2.040e-02, 3.672e-02, 7.161e-02, 5.032e-02, 7.780e-02, 7.739e-02, 6.251e-02)); + r += mul(s2_1, M4(1.598e-01, 7.514e-02, -3.737e-02, -4.716e-02, -1.324e-01, 1.908e-02, -2.415e-02, -6.085e-02, 9.660e-02, 5.538e-02, -6.763e-02, -8.017e-03, 4.100e-02, -4.304e-03, 9.956e-02, 3.581e-02)); + r += mul(s2_2, M4(4.912e-02, -3.518e-02, -3.550e-02, 5.028e-02, 8.666e-02, 1.122e-01, -1.027e-02, -1.043e-01, 8.323e-02, 4.898e-03, -1.090e-02, -1.912e-02, 2.538e-02, -5.575e-03, 4.678e-02, -5.409e-02)); + r += mul(s2_3, M4(8.412e-03, 1.402e-01, 2.510e-01, -2.978e-02, 2.691e-02, -7.400e-02, -1.719e-01, -2.765e-02, 7.918e-02, -8.331e-03, -2.848e-02, 1.808e-01, -5.017e-02, -1.434e-02, -5.966e-02, 4.193e-04)); + r += mul(s2_4, M4(-7.351e-02, -2.898e-01, -3.926e-01, -2.116e-01, 1.478e-01, -9.418e-02, -1.530e-01, -2.963e-01, -1.190e-01, 4.534e-01, -2.833e-01, -2.530e-01, 2.825e-01, -2.388e-02, 7.258e-02, 4.468e-02)); + r += mul(s2_5, M4(-7.228e-02, 4.241e-02, -1.818e-02, -4.262e-02, -1.139e-01, -5.711e-02, -7.106e-02, -7.861e-02, -4.809e-02, 1.474e-01, -7.394e-03, -4.692e-02, 2.199e-02, 1.038e-01, -5.060e-02, 8.474e-02)); + r += mul(s2_6, M4(1.236e-01, -7.740e-02, 4.034e-02, 2.473e-02, -1.046e-05, 4.154e-02, -5.698e-03, 2.456e-02, -5.245e-02, -1.311e-02, -1.292e-01, -5.838e-02, -4.741e-02, -1.506e-02, -5.012e-02, 2.703e-03)); + r += mul(s2_7, M4(8.191e-02, 1.102e-01, 4.677e-02, -1.667e-02, -2.850e-01, 1.548e-01, -4.856e-03, 1.074e-03, 1.559e-02, 3.353e-02, 2.302e-02, -4.748e-02, -7.978e-02, -1.328e-02, -8.056e-02, 1.167e-02)); + r += mul(s2_8, M4(6.904e-02, -2.015e-02, 7.204e-02, -3.156e-02, -1.374e-01, 5.652e-02, 5.052e-02, -4.269e-02, 3.874e-02, 1.011e-02, 6.918e-02, -5.973e-02, 1.694e-01, 6.863e-02, 8.218e-02, 4.696e-02)); + r += mul(s3_0, M4(1.036e-02, -9.171e-02, 2.046e-01, 1.921e-02, 2.592e-03, -3.041e-02, -1.027e-01, -1.301e-01, -9.272e-03, 5.752e-02, 4.755e-02, -1.453e-02, -3.280e-03, -3.442e-02, -1.207e-02, -2.463e-02)); + r += mul(s3_1, M4(2.136e-01, -2.675e-02, -1.771e-01, 2.694e-02, 3.292e-02, 3.093e-02, 2.271e-01, 2.195e-01, -5.501e-02, 5.664e-03, -2.300e-01, 3.389e-02, -6.249e-02, 3.345e-02, -5.372e-02, -5.311e-02)); + r += mul(s3_2, M4(3.520e-02, -1.165e-01, -7.715e-02, 3.538e-02, 2.150e-01, 1.602e-01, 1.783e-01, -6.312e-02, 1.099e-01, 1.013e-01, -6.535e-02, -1.370e-01, 2.222e-02, 8.245e-02, 3.791e-02, -5.384e-02)); + r += mul(s3_3, M4(4.063e-03, -2.201e-01, 6.289e-02, 1.063e-01, 7.458e-02, -9.281e-02, -1.792e-01, -1.193e-01, -2.276e-02, -2.212e-02, -1.820e-01, -3.791e-02, 6.566e-02, 3.899e-02, 3.693e-02, 2.944e-02)); + r += mul(s3_4, M4(-5.110e-01, -3.091e-01, -7.583e-01, 2.226e-01, -4.668e-02, -4.517e-01, -8.393e-03, -3.556e-01, -6.915e-01, 1.907e-01, -5.686e-01, -5.042e-02, 5.118e-01, 1.140e-01, 3.401e-01, -2.682e-02)); + r += mul(s3_5, M4(-7.635e-03, -9.166e-02, -2.271e-02, -5.200e-02, 1.570e-02, -5.974e-03, 1.862e-01, 7.035e-02, -2.859e-01, 4.639e-02, 4.377e-02, -4.687e-03, 1.782e-01, 1.578e-01, 3.609e-02, -2.364e-02)); + r += mul(s3_6, M4(1.410e-02, -1.417e-01, -9.227e-02, 8.895e-03, 8.245e-02, 1.675e-03, 1.763e-02, -4.726e-02, -3.146e-02, -4.661e-02, -1.085e-01, 6.988e-02, 5.238e-02, 8.178e-02, 7.645e-02, -6.760e-02)); + r += mul(s3_7, M4(2.610e-01, 1.194e-02, -1.065e-02, -9.494e-03, -1.750e-02, 1.167e-02, 2.371e-02, 3.010e-02, 7.709e-02, 2.302e-02, 9.363e-02, 2.560e-02, -1.188e-01, -6.296e-02, -2.176e-02, -1.106e-01)); + r += mul(s3_8, M4(1.396e-01, -1.282e-01, 3.914e-02, -2.227e-02, -1.906e-02, 5.945e-02, -1.260e-02, -7.651e-02, 8.742e-02, 1.698e-02, 8.794e-02, -3.933e-02, 2.223e-02, -2.413e-03, -2.701e-02, -4.654e-02)); + r += V4(1.289e-02, -6.634e-03, 3.674e-02, 1.609e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.600e-02, -1.109e-01, 7.068e-02, -1.811e-01, 1.063e-02, -6.863e-02, -6.434e-02, 1.375e-02, 2.456e-01, -4.348e-02, 5.149e-02, 2.022e-02, -3.151e-02, 1.061e-02, 1.882e-02, 3.657e-02)); + r += mul(s0_1, M4(6.848e-02, 6.202e-02, 6.749e-02, 2.417e-01, 8.964e-02, 1.551e-01, 3.107e-03, 5.573e-02, -1.239e-01, -6.104e-02, 2.686e-01, -1.557e-01, -2.527e-02, 3.832e-02, -4.138e-02, 1.948e-01)); + r += mul(s0_2, M4(-2.463e-01, -3.228e-02, 5.194e-02, -1.168e-01, -8.813e-02, -2.266e-02, -8.269e-03, 8.535e-02, 2.962e-02, 4.934e-02, 1.222e-02, -2.672e-02, 8.425e-02, 7.315e-02, -1.661e-03, 6.929e-02)); + r += mul(s0_3, M4(4.423e-02, -5.269e-02, -8.159e-02, -4.065e-02, -1.822e-01, 1.176e-01, 5.985e-02, 1.526e-01, 6.602e-02, -1.554e-01, -9.981e-02, -5.084e-02, -2.635e-02, -1.488e-01, -3.293e-02, -1.259e-01)); + r += mul(s0_4, M4(5.130e-01, 1.827e-02, -8.425e-02, -1.359e-01, 6.076e-02, -3.010e-02, -3.517e-02, -1.604e-01, 6.842e-02, 4.031e-01, -4.483e-01, 1.444e-01, 7.936e-02, 2.010e-01, 1.000e-02, 8.051e-02)); + r += mul(s0_5, M4(1.784e-02, -5.340e-02, 8.986e-03, -8.302e-02, 1.744e-01, -5.748e-02, 3.942e-02, 1.104e-02, -1.938e-01, -5.557e-02, -6.293e-03, -1.411e-01, 7.521e-03, 9.413e-02, 2.034e-02, 1.249e-01)); + r += mul(s0_6, M4(2.540e-02, 2.131e-01, 4.611e-02, 9.382e-02, 5.992e-02, -6.154e-02, -3.255e-02, -4.284e-02, -1.709e-02, 3.054e-02, -7.711e-02, -3.413e-02, 2.668e-02, 7.784e-03, 1.578e-01, -3.615e-03)); + r += mul(s0_7, M4(-6.617e-02, -1.114e-01, 3.306e-02, 2.025e-02, -2.133e-01, -2.866e-01, 1.364e-01, 2.138e-02, 2.444e-02, -1.753e-02, 3.907e-02, 6.912e-03, -3.994e-02, -2.617e-02, 1.246e-02, -3.568e-02)); + r += mul(s0_8, M4(1.790e-02, -6.453e-02, 1.921e-02, -2.243e-02, 9.646e-02, 2.163e-01, -5.867e-02, 3.342e-02, 5.237e-02, 1.622e-01, -1.256e-01, 1.834e-02, 1.350e-01, 3.223e-02, -1.533e-02, 2.442e-02)); + r += mul(s1_0, M4(-6.017e-02, -1.388e-01, 9.635e-02, -5.857e-02, 4.819e-02, -8.395e-02, -2.810e-02, -5.550e-02, 1.881e-01, 4.630e-02, 5.251e-02, 1.190e-02, -5.155e-02, -4.143e-02, 4.654e-02, 1.323e-01)); + r += mul(s1_1, M4(-1.534e-01, 2.133e-01, 7.453e-03, 3.310e-01, 3.330e-01, -2.181e-01, 7.039e-03, -3.036e-01, 5.444e-02, 2.537e-02, 2.069e-01, -4.402e-01, -1.355e-01, 2.013e-01, -5.209e-02, 8.730e-02)); + r += mul(s1_2, M4(-1.847e-01, -1.391e-01, 1.870e-02, 1.309e-01, 2.844e-01, 3.281e-02, 8.661e-02, -1.124e-01, -2.647e-01, -1.385e-01, 4.175e-02, -2.057e-01, -2.192e-02, -4.144e-02, -1.024e-02, 1.893e-01)); + r += mul(s1_3, M4(6.497e-02, -1.624e-01, -1.987e-01, -9.848e-02, 2.015e-02, -1.708e-01, 1.357e-02, -7.956e-03, 6.654e-02, 1.122e-01, 5.543e-02, 1.017e-01, -9.423e-02, -6.570e-02, 2.641e-02, -1.932e-02)); + r += mul(s1_4, M4(5.957e-01, -3.290e-03, -2.924e-02, -1.725e-01, 3.083e-01, -2.819e-01, -3.768e-02, -1.529e-01, -1.565e-01, 1.963e-01, -3.850e-01, -1.976e-02, 1.505e-01, -6.149e-02, 1.786e-01, -7.251e-02)); + r += mul(s1_5, M4(4.111e-01, 2.475e-01, 3.496e-03, -1.918e-02, 3.022e-01, -3.534e-02, -1.221e-01, -1.536e-02, 2.677e-03, -5.321e-02, -4.602e-03, -3.425e-02, -9.627e-02, 1.600e-01, -1.031e-02, 1.667e-01)); + r += mul(s1_6, M4(-1.234e-01, -8.749e-03, 2.104e-01, 4.344e-02, 7.180e-03, -2.356e-01, -2.194e-01, -7.217e-02, 5.686e-02, 1.327e-01, -1.764e-01, 3.995e-02, -4.611e-02, 5.706e-02, 1.884e-01, 5.417e-02)); + r += mul(s1_7, M4(-2.653e-02, -8.258e-02, -1.446e-01, -1.117e-02, 7.767e-02, -2.549e-01, 3.667e-02, -1.282e-02, -6.408e-03, -1.133e-01, -3.932e-01, -2.699e-02, -8.717e-02, -6.648e-02, 6.511e-03, -5.576e-02)); + r += mul(s1_8, M4(-1.220e-01, -7.955e-02, -6.599e-02, 1.149e-02, 9.638e-02, 1.366e-01, -2.627e-02, 8.338e-03, 1.672e-01, 1.442e-01, -1.766e-01, -1.002e-03, 7.156e-02, -8.644e-02, 6.749e-02, 4.908e-02)); + r += mul(s2_0, M4(-7.895e-02, -4.986e-02, 9.949e-02, -6.798e-03, 6.860e-02, 9.700e-04, -2.217e-02, -3.504e-02, 3.005e-02, 3.853e-02, 9.426e-02, -1.053e-02, -9.779e-02, -5.977e-02, 1.559e-02, 1.355e-03)); + r += mul(s2_1, M4(-4.166e-02, 2.806e-02, 3.031e-02, -1.847e-01, 5.010e-02, -2.266e-01, 6.752e-02, -8.594e-02, -1.181e-01, -3.677e-02, -4.894e-02, -3.854e-01, -9.499e-02, 8.388e-02, 2.762e-02, -6.678e-02)); + r += mul(s2_2, M4(-5.632e-02, 1.572e-02, 1.683e-02, 7.935e-02, 9.419e-02, 1.391e-01, 7.486e-03, -9.942e-02, 4.651e-02, 6.332e-03, -9.859e-04, 3.178e-02, -1.804e-01, -1.141e-01, -1.244e-02, -2.914e-01)); + r += mul(s2_3, M4(-9.909e-02, -9.091e-02, -1.990e-01, -6.067e-02, 6.513e-02, 3.841e-02, -1.060e-01, 9.766e-03, -5.435e-02, 1.849e-01, -1.739e-01, 3.043e-02, 2.413e-02, -1.910e-02, -1.332e-02, 2.362e-02)); + r += mul(s2_4, M4(2.024e-02, -1.499e-01, -2.345e-01, 6.952e-02, 1.034e-01, 8.021e-03, 5.147e-02, 4.872e-02, -5.598e-02, -1.413e-01, 1.956e-01, -1.350e-02, -8.105e-02, -4.961e-03, -4.300e-03, 5.564e-02)); + r += mul(s2_5, M4(2.284e-02, 3.897e-02, -3.260e-02, -6.460e-02, -1.718e-01, -6.209e-02, 1.432e-02, -8.207e-02, 5.573e-03, 1.411e-01, -7.778e-02, 6.243e-02, -5.605e-02, -1.684e-01, 1.622e-02, -1.153e-01)); + r += mul(s2_6, M4(-1.053e-01, 4.113e-02, 1.887e-01, 5.750e-02, -5.854e-03, 8.616e-02, -1.401e-02, -3.050e-03, 2.563e-02, -1.300e-02, 4.781e-02, -8.556e-03, 2.668e-02, 1.663e-02, 1.541e-02, 1.076e-02)); + r += mul(s2_7, M4(3.180e-02, 1.162e-01, 1.821e-01, -1.556e-02, 2.421e-02, 9.821e-03, -3.662e-02, -1.845e-02, 1.169e-01, 1.404e-01, -7.214e-02, -7.006e-03, 4.773e-02, -8.450e-02, -1.174e-01, -1.970e-02)); + r += mul(s2_8, M4(-1.512e-02, -1.228e-01, 8.843e-02, -5.915e-03, 7.202e-02, 7.004e-02, -3.259e-02, -3.609e-02, 2.087e-02, -6.377e-02, 1.081e-01, -2.104e-02, -5.860e-02, -1.030e-01, -1.644e-02, 4.554e-03)); + r += mul(s3_0, M4(-1.026e-01, -2.673e-01, 2.419e-01, 1.951e-02, 4.828e-02, -1.120e-01, 9.974e-02, -1.526e-01, 9.792e-03, -1.194e-02, 2.884e-02, -3.913e-02, -4.019e-02, 1.306e-03, -9.133e-03, 2.829e-02)); + r += mul(s3_1, M4(5.099e-02, 1.166e-01, 9.745e-02, -6.132e-01, 9.886e-02, -1.197e-01, 1.625e-01, 4.334e-02, 1.472e-02, -2.947e-01, 7.289e-02, -4.089e-01, 5.513e-02, -2.122e-01, 1.863e-03, -1.450e-01)); + r += mul(s3_2, M4(-8.166e-02, 7.367e-02, 6.029e-03, 6.953e-02, -2.331e-01, -1.240e-01, 5.302e-02, -2.107e-01, 1.353e-01, 7.785e-02, -4.803e-02, -4.513e-02, 1.067e-01, -3.485e-02, -8.357e-03, -2.302e-01)); + r += mul(s3_3, M4(5.918e-02, 7.926e-02, -3.945e-01, 1.734e-01, 4.372e-02, 3.205e-02, -9.344e-02, -4.708e-02, -1.211e-01, 2.742e-02, 2.032e-02, 1.747e-02, -6.007e-02, 5.451e-02, -8.460e-02, -2.765e-02)); + r += mul(s3_4, M4(6.409e-01, -1.212e-01, -1.105e-01, 5.278e-02, 5.982e-01, -3.474e-01, -1.078e-01, 2.872e-02, -1.287e-03, -2.148e-01, -1.752e-01, -8.497e-02, -3.730e-01, 8.165e-03, -5.113e-02, 1.547e-01)); + r += mul(s3_5, M4(-8.520e-02, 1.228e-01, 6.252e-03, 1.262e-01, -2.339e-01, -1.996e-01, 1.169e-01, -1.452e-01, -1.209e-02, 1.329e-01, 1.228e-01, -1.087e-01, -1.669e-01, -2.158e-01, 9.188e-02, -4.751e-02)); + r += mul(s3_6, M4(-2.664e-02, -4.621e-02, 1.539e-01, 1.262e-02, 2.615e-02, 1.102e-01, -3.000e-02, 2.546e-02, 2.668e-02, 4.050e-02, -1.118e-01, 2.701e-02, 1.231e-02, 2.743e-02, 3.393e-02, -1.372e-02)); + r += mul(s3_7, M4(5.715e-02, -1.962e-02, 8.036e-02, -3.092e-02, 1.051e-01, -8.764e-02, -1.678e-02, 3.628e-02, -1.425e-01, -1.649e-03, -2.529e-01, 7.452e-02, 8.522e-02, -5.112e-02, 1.689e-01, -2.235e-02)); + r += mul(s3_8, M4(-8.426e-02, -1.070e-01, 1.473e-01, -5.678e-02, -4.167e-02, -8.678e-02, -9.901e-02, 1.746e-02, -1.020e-01, -1.905e-01, 6.511e-02, -1.759e-02, -6.187e-02, 5.960e-03, -9.606e-03, 3.401e-02)); + r += V4(-5.133e-03, 1.777e-02, 4.068e-03, 1.012e-02); + return r; +} + +void Pass7(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 8 +//!DESC conv7 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.631e-02, 3.701e-02, -2.434e-02, 8.471e-02, 4.263e-02, 5.692e-02, 5.410e-02, 4.454e-03, -5.624e-02, -7.308e-02, -8.138e-03, -1.520e-02, -5.507e-03, -4.765e-02, 3.131e-02, -6.383e-02)); + r += mul(s0_1, M4(1.588e-02, 1.464e-02, 2.917e-02, -3.454e-02, -1.494e-03, -1.481e-02, -9.514e-02, -4.334e-02, 3.919e-02, 3.710e-02, 4.232e-02, 1.561e-01, 2.753e-02, -9.642e-02, -2.388e-01, -4.597e-02)); + r += mul(s0_2, M4(5.193e-03, -9.561e-03, 5.468e-02, -6.118e-02, 3.707e-02, 5.846e-02, 7.340e-02, 2.255e-02, -3.022e-02, 5.151e-02, 2.252e-02, 7.787e-03, 2.760e-02, 3.242e-02, 9.113e-02, -4.307e-02)); + r += mul(s0_3, M4(-8.921e-03, -7.181e-02, -1.512e-01, 4.666e-02, -4.676e-02, -1.771e-02, 6.837e-02, 8.735e-02, 3.111e-02, 7.935e-02, -4.188e-03, -1.714e-01, 2.293e-02, 9.058e-02, -1.764e-01, 8.294e-03)); + r += mul(s0_4, M4(1.993e-01, 1.421e-01, 7.711e-02, 6.236e-02, -3.828e-02, -1.568e-01, -2.417e-01, -1.350e-01, -8.580e-02, -2.849e-01, -1.312e-01, -3.715e-01, -3.884e-02, -4.602e-01, -1.172e-01, 2.457e-01)); + r += mul(s0_5, M4(1.442e-01, 6.769e-02, 1.666e-01, -5.753e-02, -8.668e-02, -7.359e-02, 3.895e-03, 1.682e-03, 1.593e-02, 9.310e-02, -4.170e-03, 1.061e-01, 4.005e-02, 7.271e-02, -1.133e-02, -6.095e-02)); + r += mul(s0_6, M4(1.652e-02, -4.932e-02, -1.026e-01, -4.651e-03, 2.605e-02, 4.361e-02, -8.225e-02, -1.536e-02, -1.655e-02, 5.030e-03, 4.067e-02, 8.822e-02, -2.308e-02, 8.050e-02, -6.368e-02, 6.068e-03)); + r += mul(s0_7, M4(5.290e-02, -1.020e-02, 1.377e-02, -2.169e-02, 3.890e-02, -7.155e-02, 6.368e-02, -3.030e-02, -3.352e-02, -7.496e-02, -3.534e-02, 4.126e-02, -1.180e-02, -1.737e-01, -3.262e-02, -1.906e-01)); + r += mul(s0_8, M4(5.808e-02, 8.817e-02, 5.857e-02, -5.555e-04, 3.944e-02, -1.533e-03, -1.000e-02, -2.643e-02, 8.856e-03, -9.440e-02, -1.108e-01, -3.988e-03, -7.107e-02, -1.128e-01, -6.348e-02, -5.594e-02)); + r += mul(s1_0, M4(6.233e-02, 2.632e-03, -2.944e-02, -7.241e-03, 6.527e-02, 5.843e-02, 7.877e-02, -3.049e-03, -4.898e-02, -9.257e-02, -7.349e-02, 2.061e-02, 1.027e-02, -9.056e-03, 1.359e-02, 6.851e-03)); + r += mul(s1_1, M4(5.769e-02, -1.509e-01, -4.090e-01, -3.105e-02, 6.866e-02, 8.362e-03, -2.706e-01, -1.117e-01, -5.216e-03, 1.158e-01, 8.544e-02, 7.913e-03, -1.341e-02, -4.537e-02, -6.818e-02, -3.521e-02)); + r += mul(s1_2, M4(7.527e-02, 4.825e-02, 1.968e-01, 4.230e-02, -9.816e-03, 1.132e-01, 1.272e-01, 6.139e-02, -5.991e-02, 8.797e-02, 2.490e-02, 8.849e-02, 6.118e-03, 8.134e-02, 1.287e-01, 1.401e-02)); + r += mul(s1_3, M4(3.013e-02, -1.096e-01, -3.818e-01, -2.105e-01, -7.226e-02, 1.442e-01, 1.028e-01, 1.038e-01, 6.223e-02, 2.133e-03, 1.029e-01, 5.877e-02, 3.329e-02, -1.971e-02, -1.260e-01, -4.246e-02)); + r += mul(s1_4, M4(4.756e-01, 6.473e-02, 1.863e-01, 2.217e-01, -3.590e-01, -6.434e-01, -1.796e-01, -1.997e-02, -2.041e-01, 8.227e-02, 1.277e-01, -2.091e-01, 1.817e-02, -2.976e-01, -3.877e-01, 8.980e-02)); + r += mul(s1_5, M4(3.625e-01, -6.200e-02, -1.578e-02, -6.555e-02, -1.656e-01, 1.131e-02, 3.952e-02, -5.259e-02, 2.280e-01, 2.605e-01, -1.093e-01, 2.026e-01, -4.178e-02, 2.472e-02, 1.802e-01, 5.444e-02)); + r += mul(s1_6, M4(2.555e-02, -6.827e-02, -1.084e-01, 3.976e-02, 1.274e-02, 6.716e-02, -4.575e-02, -2.343e-02, 2.861e-02, -7.587e-04, 5.146e-02, 1.335e-01, -1.543e-02, -4.756e-02, -5.113e-02, 1.725e-02)); + r += mul(s1_7, M4(2.567e-03, -1.154e-01, 2.448e-02, 3.738e-03, 1.572e-01, -1.440e-01, 7.550e-02, -1.769e-02, 6.346e-02, 5.747e-02, 9.293e-03, -1.939e-01, 8.411e-03, -2.033e-01, -1.109e-01, -1.597e-01)); + r += mul(s1_8, M4(1.227e-02, 1.120e-01, 9.656e-02, 1.212e-02, 4.532e-02, -1.865e-02, 2.360e-02, -2.847e-02, -1.250e-01, -1.511e-02, -4.564e-02, -2.920e-02, -3.807e-02, -2.316e-02, 2.247e-02, 4.428e-02)); + r += mul(s2_0, M4(-1.919e-03, -4.392e-03, -1.503e-01, 2.610e-01, 3.634e-02, 1.206e-02, 3.478e-02, -7.205e-02, 4.359e-02, 5.512e-03, -2.062e-02, -7.692e-02, -5.576e-03, -1.764e-02, -8.326e-02, 1.027e-01)); + r += mul(s2_1, M4(4.291e-02, -1.704e-02, 3.850e-01, -2.389e-03, 1.603e-02, 1.502e-02, -6.005e-04, -1.089e-01, 2.820e-02, -1.144e-03, -1.296e-02, 3.798e-02, 1.740e-02, -4.183e-02, 4.017e-02, 1.070e-01)); + r += mul(s2_2, M4(-1.513e-02, -7.006e-02, -6.258e-02, -4.526e-02, 2.048e-02, 6.189e-02, -8.497e-03, 5.349e-02, 1.713e-02, -7.838e-02, -9.167e-02, 1.616e-01, 1.160e-02, -2.925e-03, 6.162e-03, 1.576e-02)); + r += mul(s2_3, M4(1.732e-02, -7.628e-03, -3.389e-01, 9.102e-02, -1.819e-02, 3.728e-02, 4.961e-02, 7.595e-02, 9.210e-03, -2.414e-02, -1.018e-01, -2.461e-02, -2.735e-02, -1.492e-01, -9.396e-02, -1.685e-01)); + r += mul(s2_4, M4(-3.447e-01, -1.770e-01, 3.567e-01, 3.690e-02, -4.678e-02, 1.335e-01, 9.902e-02, -4.134e-02, -1.305e-02, -2.474e-02, 5.640e-02, -4.424e-01, -2.472e-02, -5.142e-02, -1.626e-01, 7.690e-02)); + r += mul(s2_5, M4(9.079e-02, -2.009e-01, -1.323e-01, -3.918e-02, 9.400e-03, 8.146e-02, 5.737e-02, -1.784e-02, -1.499e-01, -5.093e-02, -1.098e-01, 3.289e-02, 1.768e-02, 1.473e-02, 6.505e-02, 3.706e-02)); + r += mul(s2_6, M4(3.560e-02, 5.268e-02, -1.088e-01, 2.118e-02, 2.692e-02, 1.300e-02, 3.629e-02, -1.314e-01, -3.040e-03, -2.379e-02, 4.084e-02, 2.610e-02, -5.023e-02, -4.076e-03, 8.663e-02, 5.673e-02)); + r += mul(s2_7, M4(6.639e-02, -1.538e-01, 1.271e-03, 9.794e-02, 2.727e-02, -4.575e-03, 1.234e-02, 3.835e-03, -1.786e-02, -2.353e-02, -8.215e-06, 3.503e-02, 1.446e-02, -8.232e-02, -9.569e-02, -2.136e-02)); + r += mul(s2_8, M4(-6.274e-02, 3.597e-02, -1.374e-02, 1.240e-01, -4.918e-03, 9.546e-02, 2.940e-02, 3.907e-02, -3.768e-02, -3.928e-04, -2.817e-02, -2.097e-03, -1.098e-01, -6.711e-02, -1.060e-02, -3.996e-02)); + r += mul(s3_0, M4(-8.705e-04, 6.913e-03, -8.508e-02, 9.533e-02, -5.128e-02, 1.287e-01, -1.359e-01, -1.808e-01, 3.749e-02, -5.344e-02, -1.595e-01, -1.543e-01, 3.085e-02, 5.201e-02, 3.952e-02, -3.127e-02)); + r += mul(s3_1, M4(4.285e-02, 1.749e-02, 4.436e-02, 1.969e-01, 7.142e-02, 4.295e-01, 2.308e-01, -2.510e-01, 6.257e-02, -1.732e-01, 8.754e-02, -1.097e-01, -4.325e-03, -7.495e-03, -3.973e-02, 5.667e-03)); + r += mul(s3_2, M4(3.658e-02, 5.831e-03, 5.057e-02, -4.212e-02, -3.096e-01, 1.765e-01, 5.637e-02, 2.513e-01, 2.164e-01, -5.145e-02, -1.441e-02, 1.741e-02, 1.152e-02, -3.634e-03, -5.337e-02, 4.853e-03)); + r += mul(s3_3, M4(-2.759e-02, -1.018e-01, -2.492e-01, -1.179e-02, 2.017e-02, 3.613e-01, 4.407e-01, 2.802e-01, 2.645e-02, -3.572e-02, -4.548e-02, 6.994e-02, -1.924e-02, -6.029e-02, -4.268e-02, -3.740e-01)); + r += mul(s3_4, M4(-2.998e-02, -1.127e-01, 4.528e-02, -2.201e-03, -1.362e-01, -1.173e-02, 2.897e-01, 1.886e-01, 4.796e-02, -9.274e-03, -4.043e-02, -1.611e-01, 1.040e-01, 1.789e-01, 7.764e-02, 5.576e-02)); + r += mul(s3_5, M4(2.363e-02, 9.824e-02, 6.683e-02, 4.886e-02, 1.781e-01, -2.578e-01, -2.830e-01, -2.291e-01, 2.046e-01, -7.397e-02, -9.421e-03, -1.329e-01, 4.403e-02, -5.740e-02, -8.843e-02, -1.077e-01)); + r += mul(s3_6, M4(-9.686e-03, -6.426e-02, -8.451e-02, -1.543e-02, 4.212e-02, -8.461e-02, -6.925e-02, 1.326e-01, -2.320e-02, 1.063e-02, 5.104e-03, 2.349e-02, -1.832e-02, 1.542e-01, 2.940e-01, 9.564e-02)); + r += mul(s3_7, M4(-4.924e-02, 2.808e-02, 6.840e-02, -2.073e-02, 1.797e-01, -1.848e-02, 1.170e-01, -3.062e-01, 2.053e-02, -2.797e-03, -1.919e-02, -8.586e-02, 6.958e-02, 2.482e-01, 1.206e-01, -2.592e-01)); + r += mul(s3_8, M4(-1.581e-03, 1.504e-02, -6.646e-03, -2.199e-02, -2.461e-01, 2.447e-01, 2.928e-02, 6.105e-03, 3.845e-02, -8.580e-03, 2.356e-02, -3.423e-02, -5.714e-02, -1.385e-01, -1.413e-01, -1.655e-01)); + r += V4(4.493e-03, 9.879e-03, 4.742e-03, 7.609e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-7.606e-03, -1.450e-01, 7.048e-02, -5.560e-02, -2.107e-02, -8.767e-02, 4.197e-02, -3.903e-02, 2.785e-02, 4.231e-02, -8.629e-02, 7.399e-02, -4.145e-02, 2.668e-02, 4.936e-03, 2.811e-02)); + r += mul(s0_1, M4(-2.178e-02, -9.573e-02, 4.466e-02, -4.596e-02, 1.264e-01, -1.470e-02, -2.223e-02, -1.171e-01, 9.840e-02, 3.306e-02, -3.467e-02, 3.803e-02, -5.662e-02, 4.639e-03, -4.650e-02, -6.032e-02)); + r += mul(s0_2, M4(-2.858e-02, -5.296e-02, 3.520e-02, 8.093e-03, -5.357e-02, -3.564e-02, 1.554e-02, 1.925e-02, -1.468e-02, 6.853e-02, 5.027e-02, 2.541e-02, 6.782e-02, 8.630e-02, -1.517e-02, 7.121e-02)); + r += mul(s0_3, M4(3.350e-02, 7.664e-02, 1.083e-01, 4.846e-02, 1.295e-02, 2.439e-02, 6.116e-02, -6.071e-02, 6.696e-02, -6.066e-02, -1.552e-01, 1.984e-02, -1.023e-01, -1.433e-02, 2.393e-04, -3.211e-02)); + r += mul(s0_4, M4(-8.745e-02, -1.353e-01, -9.550e-02, -5.556e-02, 2.083e-01, -6.727e-02, 8.289e-02, 4.910e-01, -7.763e-02, 2.153e-01, 4.773e-01, -1.019e-01, -2.608e-01, -2.339e-01, -1.347e-01, 3.045e-02)); + r += mul(s0_5, M4(-4.816e-03, -5.705e-02, 5.751e-02, 6.914e-02, -2.323e-03, 1.954e-02, -6.234e-02, 2.904e-02, -1.618e-02, -5.947e-02, 8.672e-02, -9.847e-02, -1.798e-01, -3.539e-01, -1.188e-01, -1.909e-01)); + r += mul(s0_6, M4(-7.460e-02, -4.881e-02, 9.756e-02, 1.474e-02, -6.241e-02, -1.905e-02, 4.644e-02, 2.807e-04, 4.447e-02, 5.130e-02, -1.164e-01, -1.247e-03, -6.590e-02, -2.624e-02, -5.928e-02, -3.539e-02)); + r += mul(s0_7, M4(1.638e-01, 5.812e-02, -1.803e-01, -7.788e-02, -1.016e-01, -4.893e-02, -5.868e-02, -3.538e-03, -2.306e-01, -1.062e-01, 1.456e-01, 1.246e-01, -2.549e-01, 4.943e-02, 7.763e-02, 4.620e-02)); + r += mul(s0_8, M4(-3.654e-02, -7.453e-02, 1.955e-02, 4.617e-02, 4.147e-02, -1.313e-02, -4.974e-03, -3.849e-02, -6.676e-02, -2.096e-02, 2.696e-02, 4.776e-02, 2.142e-01, 3.247e-02, -5.714e-03, -8.951e-02)); + r += mul(s1_0, M4(-7.790e-02, -2.760e-01, 1.016e-01, -1.447e-01, -6.912e-02, -9.298e-02, 1.287e-02, -1.297e-02, -1.509e-02, 7.726e-02, -4.248e-02, 3.258e-02, 2.005e-02, -3.405e-03, -7.025e-03, 1.306e-02)); + r += mul(s1_1, M4(-3.396e-02, 2.075e-01, -3.389e-01, -1.892e-01, 7.255e-02, 1.213e-01, -1.733e-01, -1.080e-01, 5.638e-02, 1.893e-01, -1.445e-02, 1.323e-01, 6.380e-02, -1.103e-02, 6.004e-03, 3.340e-02)); + r += mul(s1_2, M4(9.550e-02, -7.969e-02, 1.467e-01, -1.510e-01, -6.064e-02, -1.011e-02, 9.627e-02, 5.701e-02, -1.502e-02, 5.514e-02, 1.194e-01, -8.703e-02, 1.962e-02, -2.335e-02, 1.273e-01, 3.821e-02)); + r += mul(s1_3, M4(5.118e-02, -4.244e-02, 3.218e-02, 2.105e-01, 2.683e-03, 1.639e-01, -7.032e-02, -6.492e-03, -6.695e-02, -1.580e-01, -3.038e-02, -1.280e-01, 5.194e-02, -4.606e-02, 7.701e-02, 4.344e-02)); + r += mul(s1_4, M4(-2.659e-01, -2.845e-01, -6.540e-02, 4.223e-02, -9.556e-02, 6.592e-01, -3.941e-01, 8.342e-01, 4.311e-01, -4.670e-01, 7.613e-01, 1.446e-02, -1.768e-01, 9.708e-02, -4.275e-02, -1.575e-01)); + r += mul(s1_5, M4(8.871e-02, -1.971e-01, 5.135e-02, 8.891e-02, -3.116e-01, 4.480e-02, 3.692e-02, 8.906e-02, 2.387e-01, -1.665e-01, 1.100e-01, -2.411e-01, -3.362e-01, 3.610e-03, -3.060e-02, 5.738e-02)); + r += mul(s1_6, M4(-8.271e-02, -1.283e-02, 5.737e-02, 2.650e-03, -3.927e-02, -7.014e-03, -8.666e-04, 6.659e-02, 3.380e-02, 7.843e-02, -5.220e-02, -4.679e-02, -5.453e-03, 1.609e-02, -4.788e-02, 2.133e-02)); + r += mul(s1_7, M4(2.159e-01, -8.350e-02, -1.979e-01, -4.520e-02, -1.429e-01, 4.874e-02, -9.640e-02, -6.331e-02, 1.226e-01, 1.200e-01, 2.666e-01, 3.174e-01, -2.204e-01, 1.731e-02, 3.981e-02, 5.188e-02)); + r += mul(s1_8, M4(-1.194e-01, -9.985e-02, 7.560e-02, 1.145e-01, -3.150e-04, 6.180e-02, -4.255e-02, -4.634e-02, -7.316e-02, 4.107e-02, 1.685e-02, -2.061e-03, 1.040e-01, 1.853e-02, -4.165e-02, -9.763e-02)); + r += mul(s2_0, M4(-1.972e-02, -1.625e-01, -3.716e-02, -6.495e-02, 3.466e-02, -3.497e-02, 4.673e-02, -1.410e-02, 1.877e-02, -7.723e-02, 2.751e-02, 1.365e-02, -8.140e-03, -1.770e-02, 1.922e-02, -1.498e-02)); + r += mul(s2_1, M4(-1.786e-02, -1.179e-01, 1.953e-01, 1.006e-01, 2.893e-02, 6.931e-03, -4.640e-03, -2.826e-02, 9.737e-02, -8.263e-02, 1.216e-01, -1.508e-01, -2.827e-03, -2.454e-02, -3.020e-02, -2.437e-02)); + r += mul(s2_2, M4(1.134e-02, -1.006e-02, 1.179e-02, 3.855e-02, 7.075e-02, -2.106e-02, 9.123e-02, -8.254e-02, 2.474e-02, 1.142e-02, -9.821e-02, 6.447e-05, -1.998e-02, -2.427e-02, -1.350e-02, 2.790e-02)); + r += mul(s2_3, M4(2.106e-01, 1.613e-01, -1.067e-01, -2.487e-02, -5.780e-02, -4.198e-03, -8.941e-02, -9.418e-03, 6.684e-02, 1.602e-02, -3.390e-02, 3.211e-02, -5.629e-02, 9.606e-02, 5.797e-02, -5.583e-03)); + r += mul(s2_4, M4(-1.209e-01, 5.758e-01, 1.261e-03, 1.741e-01, -2.419e-01, -1.311e-01, 5.495e-02, 1.059e-01, 1.174e-01, 1.139e-01, 7.558e-02, 3.146e-02, 3.048e-01, 1.029e-01, -4.014e-02, 5.124e-02)); + r += mul(s2_5, M4(2.601e-01, 1.167e-01, -1.328e-01, 6.108e-02, -2.140e-01, -1.833e-02, 7.347e-02, -1.673e-02, 8.485e-02, 7.450e-02, -4.355e-02, -2.229e-01, 2.606e-02, 5.274e-02, -3.851e-03, -4.787e-03)); + r += mul(s2_6, M4(-3.845e-02, 1.459e-02, -9.795e-04, -3.515e-03, 9.791e-02, -4.729e-03, 4.591e-02, 2.429e-02, -4.431e-02, 5.498e-04, -4.050e-02, 1.392e-02, -2.258e-01, -6.294e-02, -4.323e-02, -1.549e-02)); + r += mul(s2_7, M4(-1.184e-01, -2.039e-01, -1.069e-02, -2.209e-02, 1.933e-01, 4.867e-02, -1.127e-01, -1.196e-01, -9.386e-02, 2.226e-03, 1.569e-02, -4.063e-03, -5.044e-02, -1.930e-01, 7.287e-02, 1.237e-01)); + r += mul(s2_8, M4(-8.680e-02, -9.201e-02, 2.773e-02, -4.725e-02, 4.422e-02, -1.834e-02, 4.184e-02, 1.488e-02, -3.397e-02, 1.727e-04, 2.558e-02, 5.452e-02, -2.750e-02, 4.529e-02, -3.442e-02, 7.563e-02)); + r += mul(s3_0, M4(-7.896e-03, -1.086e-01, 5.701e-02, -3.965e-02, 8.469e-02, 1.352e-01, -6.528e-02, -1.477e-02, 2.109e-02, -8.591e-02, -4.694e-03, -3.514e-03, 5.960e-02, -4.514e-02, 1.488e-02, -3.246e-02)); + r += mul(s3_1, M4(5.433e-02, -5.217e-02, -1.891e-02, -3.308e-02, -1.775e-01, -3.605e-02, 1.126e-01, -2.166e-01, 4.270e-02, -4.366e-01, 1.504e-01, -2.861e-01, -1.690e-03, 1.295e-02, -9.738e-04, -6.364e-02)); + r += mul(s3_2, M4(2.461e-02, 3.878e-02, 2.036e-02, 5.172e-02, 1.882e-02, 1.194e-01, 2.614e-01, 4.576e-02, 1.069e-01, -1.662e-01, -9.787e-02, -4.312e-01, 6.190e-02, -4.314e-03, 8.764e-04, -1.972e-02)); + r += mul(s3_3, M4(1.434e-02, 1.003e-03, 1.437e-01, -4.020e-02, 1.354e-01, -1.095e-02, -4.405e-01, -2.827e-02, 4.254e-02, -1.466e-02, -3.134e-02, -3.096e-03, -8.806e-02, 5.043e-02, 4.063e-03, 5.549e-02)); + r += mul(s3_4, M4(-1.294e-01, 2.964e-02, 5.146e-02, 6.517e-02, -6.495e-01, -5.152e-02, 5.726e-01, 3.115e-01, 1.993e-01, -2.266e-01, -9.610e-02, 2.153e-01, 1.816e-02, -1.818e-01, 1.234e-01, -3.177e-01)); + r += mul(s3_5, M4(7.489e-02, -3.528e-02, 6.062e-03, -3.331e-02, 2.551e-01, 2.377e-01, -1.997e-01, -4.533e-02, -3.031e-01, -8.469e-02, 1.362e-01, 2.675e-01, -1.314e-01, -6.837e-03, -1.181e-01, -1.001e-02)); + r += mul(s3_6, M4(-6.568e-02, -2.240e-02, 4.283e-02, 1.558e-02, 6.800e-02, -5.739e-03, 6.958e-02, -2.239e-02, 2.790e-02, 5.060e-03, -7.806e-03, 2.765e-03, -1.731e-01, 1.712e-01, -3.499e-01, 6.484e-02)); + r += mul(s3_7, M4(1.238e-01, -8.504e-02, -4.829e-02, -1.088e-02, 7.962e-02, -1.782e-02, -9.934e-02, -5.628e-02, -4.412e-02, 4.612e-02, 1.926e-03, 7.206e-02, -7.886e-02, 5.169e-02, 2.761e-01, 4.394e-01)); + r += mul(s3_8, M4(-6.514e-02, -7.796e-02, -9.301e-03, 4.148e-02, 1.350e-01, -7.848e-02, 2.949e-02, -1.101e-01, 4.236e-02, 3.716e-02, 4.291e-02, 7.020e-02, 1.850e-01, -4.661e-03, -2.221e-01, -1.237e-01)); + r += V4(1.407e-02, -1.303e-02, 6.423e-03, -6.361e-03); + return r; +} + +void Pass8(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 9 +//!DESC conv8 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.716e-02, -2.891e-02, -2.183e-01, -7.676e-03, -1.552e-02, 1.494e-02, 2.825e-02, -3.651e-02, 4.604e-03, 5.645e-03, 8.134e-03, -1.298e-02, 2.144e-02, 1.195e-03, 1.450e-02, 1.619e-02)); + r += mul(s0_1, M4(-3.389e-02, -2.900e-01, -3.087e-01, -5.633e-02, 2.753e-02, 2.198e-02, -6.998e-02, -6.210e-02, 2.079e-02, 2.083e-02, 4.432e-02, -1.027e-02, 3.827e-02, 8.856e-04, 4.809e-03, -3.012e-02)); + r += mul(s0_2, M4(8.829e-03, -1.200e-02, -2.731e-02, 2.589e-02, -4.841e-03, -1.333e-01, -3.516e-02, -2.656e-02, -1.726e-03, -4.446e-03, -8.521e-02, -2.038e-02, 5.978e-02, 9.301e-02, 1.080e-02, 5.518e-02)); + r += mul(s0_3, M4(-8.477e-02, 6.865e-02, 1.804e-01, 1.239e-01, 2.939e-02, -5.360e-02, -1.714e-01, -4.654e-02, 5.288e-03, -1.770e-02, 1.804e-02, -1.039e-02, -1.376e-02, -4.660e-02, -1.135e-01, -8.134e-02)); + r += mul(s0_4, M4(4.002e-02, 2.814e-01, 3.318e-01, -2.789e-01, -1.151e-02, -6.062e-02, 1.340e-01, -2.157e-01, -1.594e-02, 3.660e-02, -2.938e-02, 2.064e-01, 5.313e-02, 9.726e-02, -1.831e-01, 1.773e-01)); + r += mul(s0_5, M4(1.286e-02, 5.915e-02, 9.298e-02, 7.291e-02, 2.413e-02, 1.863e-01, 4.568e-02, 1.337e-01, 1.420e-02, -1.661e-02, 8.140e-02, 2.640e-02, -4.800e-02, -3.460e-01, -3.232e-01, 8.426e-02)); + r += mul(s0_6, M4(-1.923e-02, -7.129e-02, 1.256e-01, 1.482e-02, 2.752e-02, 6.057e-02, -2.595e-02, 1.176e-02, -5.789e-03, 1.235e-02, 2.846e-02, -3.297e-02, 2.591e-02, 9.832e-03, -1.272e-02, -2.601e-02)); + r += mul(s0_7, M4(3.527e-02, 3.830e-01, 2.712e-01, 1.038e-01, 4.921e-02, -1.047e-01, -1.821e-01, 2.358e-02, -2.067e-02, 5.368e-02, 1.773e-01, -1.114e-02, -6.454e-02, -1.991e-02, -9.674e-02, -1.353e-01)); + r += mul(s0_8, M4(2.063e-02, -5.830e-02, -7.769e-03, -2.469e-02, -2.694e-03, -5.945e-02, 7.213e-02, 3.814e-02, -1.769e-02, 9.680e-02, -5.531e-03, -8.608e-02, 5.463e-02, 1.511e-01, 2.052e-01, 5.571e-02)); + r += mul(s1_0, M4(5.597e-02, -1.255e-01, -1.365e-01, -8.874e-02, -2.838e-02, 8.409e-03, 7.349e-02, 5.397e-03, 1.360e-02, -1.040e-02, -2.247e-02, 1.490e-02, -1.303e-02, 1.433e-02, 1.603e-02, 1.108e-02)); + r += mul(s1_1, M4(-6.497e-02, -1.599e-02, 2.908e-02, -2.089e-01, 5.060e-02, -7.639e-02, -1.720e-01, -1.082e-01, 1.348e-02, 2.165e-02, -1.413e-02, 9.410e-02, 4.757e-03, -1.502e-02, -9.644e-02, 2.782e-02)); + r += mul(s1_2, M4(1.597e-02, -7.105e-02, -3.690e-02, 3.880e-02, 2.393e-04, -3.600e-02, 7.562e-02, -9.162e-02, -4.744e-02, 5.457e-02, -5.874e-02, 2.780e-02, -9.128e-03, -4.318e-02, -6.162e-02, 9.923e-02)); + r += mul(s1_3, M4(-9.415e-02, 8.443e-02, 1.850e-01, 2.272e-01, 6.203e-02, -6.275e-02, -1.687e-01, -1.142e-01, -4.309e-02, 4.878e-03, 2.693e-02, 3.195e-02, 1.393e-02, -8.775e-03, -3.935e-02, -2.571e-02)); + r += mul(s1_4, M4(-8.029e-02, 9.122e-01, 1.307e-01, -3.950e-01, -2.208e-01, 1.042e-01, 1.690e-01, -9.825e-02, -5.692e-02, 7.496e-02, -1.333e-01, 4.595e-01, 8.206e-02, 5.508e-02, 1.307e-01, -1.802e-01)); + r += mul(s1_5, M4(4.041e-02, 7.446e-02, 1.326e-02, 4.840e-02, -3.398e-03, 8.501e-02, -1.043e-02, 2.989e-02, 9.050e-02, -6.651e-02, 3.538e-03, -2.544e-01, -1.664e-02, -1.277e-02, 7.405e-03, 1.360e-01)); + r += mul(s1_6, M4(4.049e-03, 2.225e-02, -1.467e-02, 4.199e-02, -1.629e-02, -2.449e-02, 9.364e-03, 3.742e-02, 1.461e-02, 1.914e-02, -1.475e-02, 2.694e-02, 1.939e-02, 1.508e-02, -2.493e-02, 2.145e-02)); + r += mul(s1_7, M4(-1.139e-02, -2.558e-02, 2.030e-02, -1.332e-02, 9.789e-02, -1.343e-01, -1.585e-01, -2.415e-02, -3.155e-02, 5.701e-02, 9.033e-02, 1.058e-02, -6.113e-02, 3.790e-02, 5.825e-02, 1.036e-01)); + r += mul(s1_8, M4(2.258e-03, 2.830e-04, 2.544e-03, 4.909e-02, -5.359e-02, -2.778e-02, 4.536e-02, 6.162e-03, -3.971e-03, 9.985e-02, -2.324e-02, -2.912e-02, 1.898e-02, 3.549e-02, 1.523e-02, -5.460e-02)); + r += mul(s2_0, M4(-2.683e-03, -2.313e-02, -9.765e-02, 6.443e-02, -1.147e-02, -4.148e-02, -6.236e-02, 3.064e-02, 1.535e-03, -4.514e-03, -1.590e-02, -6.948e-03, 2.944e-02, -1.097e-02, 4.388e-02, -6.514e-02)); + r += mul(s2_1, M4(-3.525e-02, 2.655e-02, 1.220e-01, 1.464e-02, 7.646e-02, -6.272e-02, -1.444e-01, -5.726e-02, -1.391e-02, -1.336e-02, -1.038e-01, 4.556e-02, 2.769e-02, 2.400e-02, 6.146e-02, -1.442e-01)); + r += mul(s2_2, M4(9.590e-03, -1.095e-02, -2.527e-02, -2.780e-02, -6.799e-03, -9.882e-02, -4.968e-02, -1.557e-02, -6.521e-03, 2.517e-02, 6.286e-03, 4.018e-02, 1.262e-02, 9.613e-03, 4.015e-02, -4.083e-02)); + r += mul(s2_3, M4(-3.107e-02, 2.050e-02, 2.190e-01, -5.231e-02, -1.644e-02, 1.728e-02, -9.647e-02, 2.000e-02, 6.885e-03, -3.772e-02, -1.800e-02, -1.167e-02, 3.840e-02, -8.962e-02, -1.389e-01, -9.490e-02)); + r += mul(s2_4, M4(5.263e-02, -1.751e-02, -9.476e-02, -5.722e-02, 1.043e+00, 2.111e-01, 1.155e-01, 2.278e-01, 2.259e-02, -1.039e-02, 7.633e-02, -5.180e-02, 2.861e-01, 2.293e-01, 2.457e-01, -1.031e+00)); + r += mul(s2_5, M4(-1.717e-02, 3.437e-02, 1.829e-02, 6.573e-02, 4.646e-02, 4.430e-01, 1.042e-01, -6.801e-02, 1.584e-02, 4.456e-02, 5.250e-02, -9.501e-02, -1.277e-02, -3.398e-02, 1.408e-02, 3.100e-02)); + r += mul(s2_6, M4(1.276e-02, -2.000e-02, -1.067e-01, 1.639e-02, -6.351e-03, -6.506e-04, 1.168e-03, -8.173e-03, -7.451e-03, 1.989e-02, 1.142e-02, -8.287e-03, 4.566e-02, 2.864e-02, -2.859e-02, 4.019e-02)); + r += mul(s2_7, M4(-3.441e-02, -1.582e-02, 3.287e-02, 7.107e-02, -4.680e-02, -8.453e-02, -8.472e-02, -1.265e-03, -9.706e-03, 4.809e-02, -3.750e-02, 9.239e-02, -3.768e-02, -1.209e-01, -1.171e-01, 2.288e-01)); + r += mul(s2_8, M4(-1.296e-03, 1.673e-02, -1.629e-02, -4.858e-02, 1.224e-02, -1.321e-01, -4.292e-02, 1.164e-01, 2.477e-02, 7.985e-02, -7.381e-02, 4.411e-02, 2.398e-02, -1.543e-02, 3.089e-02, -2.552e-02)); + r += mul(s3_0, M4(1.648e-02, 3.567e-03, -1.052e-01, 4.280e-02, -6.192e-04, -1.966e-02, -4.740e-02, -1.942e-03, -5.032e-03, 1.183e-02, -1.079e-02, 3.852e-03, 2.288e-02, 3.801e-03, 3.902e-02, -3.914e-02)); + r += mul(s3_1, M4(-5.489e-02, -1.027e-01, -1.111e-01, 1.519e-01, -1.327e-02, 1.596e-03, -1.675e-01, 5.952e-02, -6.748e-02, 2.754e-02, -5.609e-02, 6.575e-02, 2.188e-02, 9.031e-03, 4.638e-02, -1.094e-01)); + r += mul(s3_2, M4(3.213e-03, 4.675e-02, -2.631e-02, -1.086e-02, -1.903e-02, -1.028e-01, -1.084e-01, 2.649e-02, 9.822e-03, 7.725e-03, -1.691e-02, 3.222e-02, -9.035e-03, 2.980e-02, 5.749e-02, -3.044e-02)); + r += mul(s3_3, M4(-7.619e-02, -1.622e-03, 3.324e-01, 4.752e-02, -3.685e-02, -4.685e-02, -8.527e-02, 2.294e-02, -2.439e-02, -6.236e-03, -3.104e-02, 3.233e-02, 1.789e-02, -4.432e-02, -1.392e-01, -9.780e-02)); + r += mul(s3_4, M4(2.911e-01, 3.642e-01, 3.817e-01, -9.316e-02, 4.124e-02, -3.579e-02, 4.983e-02, 1.040e-01, 5.359e-02, -3.462e-01, 4.782e-02, -2.421e-01, 2.007e-02, -2.380e-01, -1.773e-01, -6.314e-01)); + r += mul(s3_5, M4(-3.313e-03, -1.201e-02, -1.415e-02, 8.734e-03, 3.021e-02, 9.567e-02, 2.296e-02, -6.178e-02, 5.850e-02, 6.168e-02, -6.368e-02, -5.051e-02, -2.251e-02, -3.667e-02, 3.398e-02, 2.872e-02)); + r += mul(s3_6, M4(4.334e-02, -4.642e-02, -2.431e-01, -3.343e-02, -2.479e-03, -5.827e-03, 3.528e-02, 3.433e-02, -4.470e-02, -2.127e-02, -7.138e-03, 3.796e-02, 1.379e-02, 4.208e-02, 4.114e-02, 2.813e-02)); + r += mul(s3_7, M4(-1.170e-02, -4.718e-02, 2.204e-02, 5.829e-02, -5.554e-03, -1.019e-01, -2.598e-01, -1.456e-01, 1.797e-02, 1.255e-01, -5.581e-02, 1.283e-01, -3.416e-02, -8.180e-02, -5.774e-02, 9.365e-02)); + r += mul(s3_8, M4(1.354e-02, 4.724e-02, 4.335e-02, 5.845e-02, -9.950e-03, -7.794e-02, -2.257e-02, 3.610e-03, 2.669e-02, -1.255e-01, -5.754e-02, 1.600e-01, -2.851e-03, 3.557e-02, 3.722e-02, 1.257e-02)); + r += V4(1.101e-03, 2.116e-04, -1.154e-03, -3.483e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-8.139e-03, 4.801e-02, 1.603e-02, 3.358e-02, 4.412e-02, -3.926e-03, 1.599e-02, -2.674e-02, 1.052e-02, 3.787e-03, -7.897e-03, 6.304e-03, -5.446e-03, 4.456e-03, 3.906e-03, -3.014e-02)); + r += mul(s0_1, M4(-4.183e-02, -4.484e-02, -1.794e-02, 8.394e-02, 9.001e-03, 8.132e-03, -1.436e-02, 6.112e-02, -3.553e-02, 1.347e-02, 1.129e-02, -1.392e-02, -5.328e-02, 4.777e-03, -2.372e-02, 1.061e-02)); + r += mul(s0_2, M4(-9.735e-02, 5.944e-02, 2.399e-02, -2.746e-02, -3.898e-02, 3.360e-03, -6.036e-03, -3.314e-02, 4.432e-02, -1.326e-02, -1.406e-02, 3.821e-02, -3.058e-02, -5.659e-03, 4.182e-04, -4.163e-02)); + r += mul(s0_3, M4(-1.753e-01, 8.851e-03, 9.107e-02, 6.816e-02, 5.333e-02, 1.918e-02, -3.936e-02, 1.615e-02, 5.222e-03, -9.198e-03, 2.073e-03, -2.037e-02, -3.624e-02, 5.469e-03, 3.329e-02, 7.935e-02)); + r += mul(s0_4, M4(3.188e-01, -3.351e-02, 3.946e-02, -1.936e-01, -8.892e-02, -2.977e-02, 1.599e-02, -8.213e-02, -2.080e-02, -4.108e-02, 7.681e-02, 5.185e-02, 3.339e-01, -1.861e-01, 8.615e-02, -4.358e-02)); + r += mul(s0_5, M4(-1.339e-01, 2.904e-02, -6.564e-02, 1.283e-01, -2.102e-01, -9.994e-02, 6.066e-02, 8.217e-02, 1.274e-01, 4.664e-02, 1.387e-02, -1.304e-01, -4.338e-01, 1.438e-01, -8.452e-02, 9.942e-02)); + r += mul(s0_6, M4(-3.172e-03, 1.855e-02, -1.299e-01, -5.566e-02, 1.342e-02, -3.773e-02, 1.164e-01, -4.885e-02, 2.672e-02, 2.120e-03, 6.741e-03, -1.942e-02, 1.131e-02, 1.268e-02, -6.915e-03, -3.804e-02)); + r += mul(s0_7, M4(-3.183e-02, 1.248e-01, 2.723e-01, 4.886e-02, -5.783e-02, -3.498e-02, -7.778e-02, 7.155e-02, -4.697e-02, -1.291e-02, 1.091e-02, -5.571e-02, 4.493e-02, 2.837e-03, 1.022e-01, -5.141e-02)); + r += mul(s0_8, M4(-6.313e-03, -4.400e-02, 5.050e-04, 2.227e-02, -9.350e-02, -4.613e-03, -4.296e-02, 1.727e-03, 6.421e-02, 5.700e-02, 7.157e-03, 6.188e-02, -2.251e-01, -8.961e-03, -4.093e-02, 9.974e-02)); + r += mul(s1_0, M4(-1.039e-01, 1.880e-02, 8.841e-03, 7.631e-02, -4.356e-03, 1.133e-03, 1.746e-02, -4.062e-02, -1.458e-02, -1.400e-02, -4.845e-03, 7.824e-03, 3.870e-02, -1.011e-03, -5.752e-03, -3.031e-02)); + r += mul(s1_1, M4(6.233e-02, -2.081e-02, 2.657e-02, 9.817e-02, -5.733e-02, 5.913e-02, -4.861e-03, 5.648e-02, 1.835e-02, -1.568e-02, 6.182e-03, -1.379e-02, 6.751e-03, 4.911e-03, -1.231e-02, 3.740e-02)); + r += mul(s1_2, M4(-4.699e-02, 8.618e-02, 1.229e-02, -6.548e-03, 2.591e-02, -9.380e-02, 4.808e-02, 1.493e-03, 5.835e-02, -2.621e-03, -2.251e-02, 2.527e-03, -8.374e-02, 4.826e-03, -2.984e-03, -1.785e-02)); + r += mul(s1_3, M4(-9.938e-03, -4.651e-02, 1.316e-01, -1.821e-01, -4.344e-02, 1.548e-02, -5.539e-02, 1.124e-01, 6.993e-03, -5.261e-03, 2.396e-02, 7.078e-03, -3.761e-02, 1.679e-02, 6.152e-02, -6.838e-04)); + r += mul(s1_4, M4(-1.082e-01, 1.483e-01, -1.165e-01, -6.575e-01, 3.764e-01, -5.165e-02, 2.034e-01, -3.174e-01, 2.801e-01, -2.852e-02, -1.561e-02, 4.623e-02, 8.767e-03, -6.216e-02, 7.259e-02, -4.773e-02)); + r += mul(s1_5, M4(-4.327e-02, -5.334e-02, -1.740e-02, 8.949e-02, 2.073e-01, -9.327e-01, -5.263e-02, 8.325e-03, 2.765e-01, 7.962e-01, 6.864e-02, -1.538e-01, -1.516e-01, 2.461e-02, -7.246e-03, 5.921e-02)); + r += mul(s1_6, M4(-8.185e-03, -3.844e-02, 7.961e-02, -9.410e-03, 4.108e-04, -2.167e-03, -1.127e-01, 2.967e-02, 1.753e-03, 4.788e-03, 4.251e-02, -1.486e-02, 2.875e-02, -1.012e-02, 3.946e-02, -3.900e-02)); + r += mul(s1_7, M4(-8.943e-03, 2.983e-02, -5.527e-02, -2.334e-02, -6.379e-02, 3.683e-02, -3.037e-01, 1.132e-01, 1.233e-02, 2.120e-02, 2.496e-01, -1.707e-01, -4.890e-03, 4.398e-02, -4.966e-02, 3.483e-02)); + r += mul(s1_8, M4(-4.941e-03, 7.157e-03, -1.099e-02, 4.138e-02, -7.410e-02, -5.163e-02, 3.074e-02, -3.082e-02, -2.063e-02, 2.558e-01, -1.401e-01, 1.954e-01, 1.225e-02, -6.256e-02, 2.581e-02, -3.332e-02)); + r += mul(s2_0, M4(3.635e-02, 5.231e-03, -4.170e-02, 4.580e-02, -4.653e-03, 9.252e-03, -2.538e-02, 5.636e-03, 5.214e-02, 6.077e-03, -1.398e-02, -2.888e-02, -4.214e-02, 1.758e-02, 2.678e-02, -2.664e-02)); + r += mul(s2_1, M4(-4.114e-02, 5.198e-02, 9.590e-03, -2.777e-02, -6.989e-02, 5.447e-03, -1.375e-02, 1.284e-01, -9.811e-04, 2.238e-02, 4.914e-03, 3.380e-02, 6.931e-02, -1.843e-02, 1.917e-02, 1.045e-02)); + r += mul(s2_2, M4(8.599e-02, -1.864e-02, -7.699e-03, 1.346e-02, 6.046e-03, 3.235e-02, 1.948e-02, -2.311e-03, -2.597e-02, 3.028e-02, 3.122e-02, -1.293e-02, -1.139e-03, 9.886e-03, 1.895e-02, 1.435e-02)); + r += mul(s2_3, M4(-9.374e-02, -4.919e-02, 9.379e-02, -7.869e-02, -1.614e-03, -1.447e-02, -1.151e-02, 1.634e-02, -4.178e-02, -4.215e-03, 1.515e-02, 3.309e-02, 1.458e-02, 4.229e-02, 1.160e-03, 3.808e-02)); + r += mul(s2_4, M4(-2.479e-02, 2.123e-03, -1.670e-02, 1.710e-01, 1.476e-01, 1.105e-01, 8.431e-03, -6.465e-01, 1.034e-01, -2.353e-02, -4.223e-02, -4.599e-02, 2.390e-01, -8.565e-02, 1.056e-01, -1.849e-02)); + r += mul(s2_5, M4(7.623e-02, 2.227e-02, 2.566e-03, -8.814e-02, -1.420e-01, -4.561e-02, -8.096e-03, 6.678e-02, 1.813e-01, -4.586e-02, -2.824e-03, 4.914e-03, -5.548e-02, 2.289e-02, -3.724e-02, -4.919e-02)); + r += mul(s2_6, M4(1.146e-02, -9.689e-03, -4.242e-02, 5.383e-02, 3.955e-03, -2.442e-02, 7.879e-02, -3.362e-02, 2.215e-02, 2.008e-03, 5.887e-03, -1.880e-02, -2.466e-02, -2.656e-02, 1.069e-01, -5.091e-02)); + r += mul(s2_7, M4(-1.438e-02, 1.372e-02, 4.616e-03, -6.651e-02, 1.724e-03, -2.181e-02, -1.028e-02, 1.685e-01, -1.914e-04, -5.789e-02, 1.803e-01, -4.568e-02, -2.648e-03, 9.449e-02, 1.811e-03, 1.118e-01)); + r += mul(s2_8, M4(5.457e-02, 7.597e-03, 3.242e-02, -1.486e-02, 8.487e-02, -4.658e-03, -5.511e-02, -2.655e-02, 1.510e-01, -1.640e-02, 2.092e-01, -2.047e-01, -3.228e-02, -5.188e-02, -3.577e-02, 6.335e-02)); + r += mul(s3_0, M4(2.751e-02, 1.164e-02, -7.354e-03, 1.738e-02, 5.841e-02, 2.209e-02, -2.322e-02, -1.156e-02, 6.615e-02, 1.279e-02, -4.472e-03, -9.096e-03, -1.528e-02, 1.383e-02, 1.389e-02, -3.084e-02)); + r += mul(s3_1, M4(-2.044e-01, 3.340e-03, -3.237e-04, -3.528e-02, -4.652e-02, -1.142e-02, 2.224e-02, 8.739e-02, 8.402e-02, -1.391e-02, -6.910e-03, 5.285e-02, 1.661e-02, 1.119e-02, 8.985e-03, 1.291e-02)); + r += mul(s3_2, M4(1.460e-01, -8.168e-02, 3.896e-02, -1.981e-02, -5.708e-02, 4.458e-02, 5.119e-03, -2.267e-02, -6.497e-02, 7.691e-02, -1.412e-02, -1.887e-03, 3.175e-02, 4.805e-02, 5.430e-03, 1.617e-02)); + r += mul(s3_3, M4(-6.320e-02, -3.050e-02, -9.449e-02, -8.528e-02, -3.759e-02, -7.222e-03, -4.294e-03, 6.067e-02, -5.506e-03, 9.794e-03, 3.820e-02, 4.139e-02, -5.172e-02, 6.190e-02, 3.339e-02, 6.331e-02)); + r += mul(s3_4, M4(-3.370e-01, 3.061e-01, 1.962e-01, 1.538e-01, 1.176e-01, -2.454e-02, 1.303e-02, -1.997e-01, 3.462e-01, 2.586e-02, -1.343e-01, -3.291e-01, -8.535e-02, -1.103e-01, -4.880e-02, 2.446e-01)); + r += mul(s3_5, M4(1.265e-01, -1.734e-01, 2.422e-02, -1.558e-01, -2.041e-02, 4.496e-02, -1.906e-02, 8.517e-02, -3.115e-01, 2.843e-01, 2.961e-02, 7.657e-03, -2.348e-03, 6.712e-02, -4.156e-02, -5.768e-02)); + r += mul(s3_6, M4(1.086e-02, -2.221e-03, 8.270e-02, 3.185e-02, -1.962e-02, -1.782e-03, -2.368e-02, -3.431e-02, -2.859e-02, -1.832e-02, -4.540e-02, 2.570e-02, -2.138e-03, -1.952e-02, 2.247e-04, -4.177e-02)); + r += mul(s3_7, M4(-1.425e-01, 6.336e-02, -1.040e-01, 5.089e-02, -3.597e-02, 1.705e-02, -2.995e-02, 1.966e-01, 1.487e-01, -6.668e-02, 6.159e-01, -2.875e-01, -1.333e-02, 2.252e-02, -1.610e-01, 4.135e-02)); + r += mul(s3_8, M4(-5.173e-02, -2.024e-02, 7.865e-02, 1.027e-02, 3.283e-02, 1.865e-02, 2.220e-02, -6.929e-02, 8.660e-03, 3.481e-02, -3.540e-03, 2.775e-02, -7.171e-02, -2.940e-02, 2.794e-02, 3.405e-02)); + r += V4(7.644e-03, -3.720e-03, -2.028e-03, -1.031e-03); + return r; +} + +void Pass9(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 10 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0, t1 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(8.460e-03, 7.227e-04, -9.095e-03, -1.062e-03, 1.404e-03, -3.055e-03, 1.972e-02, -3.617e-03, -1.110e-02, -2.615e-03, -2.163e-02, 7.194e-03, 1.631e-02, 5.920e-03, -2.313e-02, 9.194e-03)); + r += mul(s0_1, M4(1.074e-01, 4.925e-02, 3.811e-03, -3.480e-03, -1.927e-02, -9.323e-02, 3.442e-02, -3.375e-03, 8.942e-02, 5.369e-02, 3.605e-03, -2.149e-02, 1.238e-01, 1.032e-01, -7.511e-03, -2.420e-02)); + r += mul(s0_2, M4(-4.397e-03, 6.268e-02, -1.117e-03, 5.248e-03, 6.173e-03, -4.652e-02, 2.153e-03, -1.332e-02, -2.124e-02, 6.323e-02, -6.680e-03, 2.414e-02, -2.336e-02, 3.126e-02, -1.137e-03, 8.318e-03)); + r += mul(s0_3, M4(-2.841e-02, 1.182e-02, 5.716e-03, 1.522e-02, 1.087e-01, -2.623e-03, 2.560e-02, -1.135e-03, -2.773e-02, 4.115e-02, -9.738e-03, -5.203e-03, -4.173e-02, -2.201e-02, 3.295e-02, -1.266e-02)); + r += mul(s0_4, M4(-4.117e-01, -2.906e-01, 1.740e-01, 7.056e-02, 2.449e-01, 2.290e-01, 2.660e-02, -1.887e-01, -1.606e-01, -2.341e-01, 2.386e-02, 9.567e-02, 2.140e-01, 5.510e-02, 3.705e-01, 3.566e-01)); + r += mul(s0_5, M4(9.640e-03, -1.480e-01, -1.321e-02, 9.225e-02, -4.257e-03, -3.511e-02, -7.280e-03, -7.580e-02, -1.273e-02, 7.344e-02, -1.140e-02, 8.228e-02, -6.665e-02, 2.214e-02, -2.575e-02, 5.468e-02)); + r += mul(s0_6, M4(2.629e-02, 3.221e-03, 1.147e-02, 1.322e-02, 1.657e-03, -8.292e-03, 2.597e-02, -3.207e-04, 4.843e-03, -2.782e-03, 6.102e-03, 1.200e-02, 7.469e-03, 1.380e-02, -1.182e-02, -1.349e-02)); + r += mul(s0_7, M4(-2.984e-02, 2.692e-02, -3.633e-02, -2.845e-02, -5.878e-02, -4.681e-02, 9.777e-02, 7.897e-03, 2.619e-02, 4.158e-02, -6.065e-02, -5.580e-02, -1.509e-03, -3.725e-03, 3.659e-02, 2.728e-02)); + r += mul(s0_8, M4(4.502e-03, 5.966e-03, -6.348e-03, -2.444e-02, -1.109e-02, -9.121e-03, 1.412e-02, -2.219e-02, 2.528e-02, 1.424e-02, -1.909e-02, 3.809e-02, 3.335e-03, -6.195e-03, -5.777e-03, -1.163e-02)); + r += mul(s1_0, M4(7.927e-02, -3.654e-02, -7.104e-02, -5.741e-03, -4.468e-02, -1.148e-02, 3.049e-02, -7.024e-03, 4.900e-02, -1.474e-02, -2.352e-02, -7.655e-03, -6.889e-03, -1.154e-04, -1.898e-02, -1.786e-03)); + r += mul(s1_1, M4(-1.474e-01, 4.527e-01, 9.350e-02, -1.063e-02, -1.159e-01, -1.820e-01, 2.065e-02, -2.246e-02, 8.028e-02, 1.627e-01, -5.292e-02, -5.642e-02, 9.400e-02, 6.621e-02, -1.423e-02, -2.229e-02)); + r += mul(s1_2, M4(-4.682e-02, 7.007e-02, 4.030e-03, -5.523e-02, -4.159e-03, -3.023e-02, 3.451e-04, -1.617e-02, 1.268e-02, 2.771e-02, -5.629e-03, 1.612e-02, -2.279e-02, 1.539e-02, -6.265e-03, 3.223e-03)); + r += mul(s1_3, M4(-2.588e-01, 5.084e-02, 2.529e-01, 3.827e-02, 2.549e-02, 6.341e-03, 9.548e-02, 2.094e-02, -2.024e-02, 2.899e-02, -3.879e-02, 7.127e-03, -3.527e-02, -2.582e-02, 3.312e-03, -1.217e-02)); + r += mul(s1_4, M4(8.045e-02, -5.762e-01, -2.616e-01, -9.298e-02, 2.315e-01, -1.745e-01, 2.468e-02, 5.254e-01, -5.182e-01, -4.752e-01, 4.877e-01, 1.722e-01, 1.480e-01, 4.491e-02, 2.757e-01, 2.529e-01)); + r += mul(s1_5, M4(1.792e-02, -1.146e-02, 2.996e-02, 1.509e-01, -2.630e-02, -1.523e-02, 2.457e-02, -5.773e-02, -2.553e-02, 3.057e-02, -3.759e-02, 1.110e-01, -6.776e-02, -6.445e-03, -3.691e-02, 4.384e-02)); + r += mul(s1_6, M4(-9.344e-03, 5.605e-02, 3.183e-02, -6.304e-03, -1.731e-02, 2.691e-03, 2.398e-02, -1.864e-02, 1.464e-02, 8.938e-04, -1.099e-02, -1.108e-03, -4.259e-03, 6.059e-03, -2.336e-02, -1.933e-02)); + r += mul(s1_7, M4(-4.542e-02, -1.644e-02, 5.247e-02, -2.091e-02, -7.788e-02, -3.454e-02, 8.288e-02, -1.948e-02, 7.775e-02, 7.495e-02, -1.214e-01, -8.277e-02, -2.031e-02, -2.472e-02, 2.223e-02, 2.040e-02)); + r += mul(s1_8, M4(4.182e-03, 1.227e-02, -3.306e-02, -2.026e-02, -1.007e-02, -4.687e-03, 8.388e-03, -1.988e-02, 4.379e-02, 2.888e-02, -3.609e-02, -8.212e-03, 9.212e-03, -1.536e-02, -1.724e-02, -2.522e-02)); + r += mul(s2_0, M4(1.291e-02, -7.279e-03, -1.884e-02, -1.385e-02, 5.309e-02, -1.217e-01, -5.973e-02, 3.029e-03, 4.425e-02, -1.353e-02, -1.157e-02, -2.189e-02, 2.732e-02, -1.622e-03, -2.147e-02, -6.696e-03)); + r += mul(s2_1, M4(1.174e-03, 4.834e-02, 1.280e-03, -1.357e-02, 1.357e-04, -2.637e-02, -8.604e-03, -1.080e-02, -9.358e-02, 7.446e-02, -8.482e-02, 4.900e-03, 1.160e-01, 8.796e-02, 4.314e-02, -7.086e-02)); + r += mul(s2_2, M4(-1.170e-02, -3.009e-02, -6.555e-03, 7.647e-03, -1.085e-04, -7.936e-03, 6.060e-04, -2.554e-03, 4.105e-02, -1.081e-02, 2.310e-02, -4.364e-02, -3.687e-02, -5.708e-03, 1.309e-02, 5.824e-03)); + r += mul(s2_3, M4(-8.081e-02, 4.155e-02, 4.842e-03, 1.172e-03, 3.383e-01, -2.309e-01, 4.094e-01, -3.521e-01, -2.314e-02, 4.370e-03, 3.993e-02, -1.701e-02, 6.141e-02, 2.521e-02, 7.447e-02, -2.860e-02)); + r += mul(s2_4, M4(9.761e-02, -2.114e-01, 6.705e-02, 8.186e-02, -7.398e-02, 1.014e-01, -1.438e-02, 7.350e-02, -6.589e-02, -1.153e-01, -6.204e-02, 1.552e-01, 6.467e-03, -3.129e-01, -4.580e-01, 3.175e-01)); + r += mul(s2_5, M4(-3.723e-02, 4.215e-02, -3.603e-03, -8.876e-03, -3.673e-03, 6.429e-03, -2.291e-03, 1.391e-03, 3.951e-02, 3.606e-02, 4.704e-02, -1.833e-02, 1.196e-01, -1.401e-02, 5.382e-02, -9.219e-02)); + r += mul(s2_6, M4(2.195e-02, -7.400e-03, -5.462e-03, -8.206e-05, -3.480e-02, 6.492e-03, 2.162e-02, -6.141e-02, -9.732e-03, 4.517e-04, 1.305e-02, -3.104e-03, 2.624e-03, -3.583e-03, 3.643e-02, -6.009e-04)); + r += mul(s2_7, M4(-3.295e-02, -3.172e-03, 3.492e-02, -4.382e-02, 3.715e-02, -6.948e-03, -4.113e-02, 2.448e-02, 2.546e-02, 1.106e-02, -3.410e-02, -3.827e-03, -6.474e-02, 1.733e-02, 6.169e-02, -5.725e-02)); + r += mul(s2_8, M4(5.160e-03, -9.184e-03, -8.453e-03, 1.735e-02, -9.055e-04, 2.434e-03, -2.602e-03, -6.082e-03, 2.434e-03, 1.192e-03, 4.964e-03, 1.187e-02, -1.960e-02, -1.187e-02, 4.503e-02, 4.133e-03)); + r += mul(s3_0, M4(-3.137e-02, 2.267e-03, -2.172e-02, -6.702e-03, -5.108e-02, -1.915e-02, -5.771e-02, -1.442e-03, -5.126e-02, -3.757e-03, -2.437e-03, -2.560e-02, 8.972e-03, 5.875e-03, -2.397e-02, 6.039e-03)); + r += mul(s3_1, M4(4.555e-02, 3.433e-02, 8.142e-03, -2.053e-02, 1.649e-03, 1.561e-02, -5.300e-03, -3.121e-02, 5.693e-01, -9.351e-02, -1.264e-01, -1.818e-01, 1.213e-01, -1.382e-02, 3.586e-02, -3.418e-02)); + r += mul(s3_2, M4(-1.550e-02, 7.598e-03, -5.015e-03, 1.551e-02, -4.808e-03, -9.668e-03, -1.379e-03, -2.895e-05, -5.092e-02, -1.590e-01, -3.258e-02, 1.031e-01, -2.058e-02, -1.988e-02, 1.329e-02, 6.773e-03)); + r += mul(s3_3, M4(-1.773e-02, 6.275e-02, -1.810e-02, 3.388e-02, 4.458e-02, -5.655e-02, 4.576e-02, -1.008e-01, 2.308e-02, 2.453e-03, -1.786e-02, -2.168e-02, 3.045e-02, 4.050e-02, 4.333e-02, -6.095e-03)); + r += mul(s3_4, M4(3.527e-01, -5.137e-01, 2.419e-01, -9.781e-02, -1.164e-01, 2.269e-01, -7.007e-02, 2.617e-01, -1.806e-01, -9.152e-02, -5.043e-02, 2.164e-01, -5.430e-02, -2.751e-01, 6.120e-02, 8.343e-02)); + r += mul(s3_5, M4(-4.383e-02, 5.480e-02, -6.708e-03, 1.620e-02, 6.208e-05, -5.279e-03, 4.534e-03, -2.042e-02, 7.568e-02, 5.119e-02, 2.790e-02, -2.173e-01, 5.097e-02, -1.295e-02, 5.325e-02, -1.220e-01)); + r += mul(s3_6, M4(-1.231e-02, 1.110e-03, -2.134e-02, 2.965e-03, -1.631e-02, 1.171e-02, -8.523e-04, -2.774e-02, -2.671e-03, -5.021e-03, 1.219e-03, -1.524e-03, 3.867e-03, -2.020e-02, 4.456e-02, 6.040e-03)); + r += mul(s3_7, M4(-3.383e-02, 1.125e-02, 9.401e-02, -1.240e-01, 4.461e-02, -2.716e-02, -9.338e-03, 1.624e-03, 1.596e-02, 1.825e-02, -1.462e-02, -2.676e-02, -1.739e-02, 8.447e-03, 2.961e-02, -1.786e-02)); + r += mul(s3_8, M4(9.271e-03, -5.721e-03, -1.180e-02, 4.245e-02, -2.551e-04, 7.369e-03, -1.046e-02, 5.688e-03, -1.398e-02, 9.160e-06, 2.457e-02, 8.210e-03, -1.589e-03, -4.574e-03, 3.357e-02, 2.664e-02)); + r += V4(3.001e-04, 1.819e-03, -1.517e-04, 1.301e-03); + return tanh(r); +} + +void Pass10(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/CuNNy/CuNNy-8x8C-NVL.hlsl b/src/Effects/CuNNy/CuNNy-8x8C-NVL.hlsl new file mode 100644 index 000000000..4bf58285f --- /dev/null +++ b/src/Effects/CuNNy/CuNNy-8x8C-NVL.hlsl @@ -0,0 +1,1573 @@ +// CuNNy 8x8C BILINEAR RGB NVL - https://github.com/cunnyplapper/CuNNy + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME CuNNy-D08N08 + +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState SP; + +//!SAMPLER +//!FILTER LINEAR +SamplerState SL; + +//!COMMON +#define O(t, p) t.SampleLevel(SP, pos + p * pt, 0) +#define V4 min16float4 +#define M4 min16float4x4 + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t0; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t1; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t2; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +//!FORMAT R8G8B8A8_SNORM +Texture2D t3; + +//!PASS 1 +//!DESC in +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN INPUT +//!OUT t0, t1 + +#define l0(x, y) min16float((dot(float3(-2.295e-01, -4.396e-01, -9.400e-02), O(INPUT, float2(x, y)).rgb) + 4.020e-01)) + +V4 f0(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-6.143e-02, 4.017e-02, -1.294e-02, -3.488e-02) * s0_0; + r += V4(4.633e-02, 1.525e-01, -6.941e-02, -8.333e-03) * s0_1; + r += V4(1.821e-02, -4.740e-02, 4.913e-03, 2.561e-02) * s0_2; + r += V4(2.997e-02, 7.561e-02, -1.418e-01, 1.597e-01) * s0_3; + r += V4(6.072e-01, -2.588e-01, 3.815e-01, -1.866e-02) * s0_4; + r += V4(-3.722e-01, 8.227e-02, -5.957e-02, -2.609e-01) * s0_5; + r += V4(3.887e-02, -2.027e-02, 2.253e-02, 1.248e-01) * s0_6; + r += V4(-1.460e-01, 4.258e-02, -4.664e-02, 2.242e-01) * s0_7; + r += V4(-1.609e-01, -3.864e-03, -3.072e-02, -2.202e-01) * s0_8; + r += V4(-5.020e-03, -2.675e-02, -3.797e-02, 7.620e-03); + return r; +} + +V4 f1(min16float s0_0, min16float s0_1, min16float s0_2, min16float s0_3, min16float s0_4, min16float s0_5, min16float s0_6, min16float s0_7, min16float s0_8) { + V4 r = 0.0; + r += V4(-1.903e-03, 1.630e-02, -2.729e-02, -8.977e-03) * s0_0; + r += V4(1.878e-02, -3.107e-01, -4.692e-01, 2.842e-01) * s0_1; + r += V4(-9.660e-03, -8.709e-02, -1.921e-01, 1.169e-01) * s0_2; + r += V4(-4.066e-03, 2.283e-02, 2.079e-02, -7.654e-02) * s0_3; + r += V4(-1.286e-02, 5.680e-01, 2.607e-01, -3.642e-01) * s0_4; + r += V4(1.919e-02, -2.319e-01, 3.279e-01, 8.074e-02) * s0_5; + r += V4(5.403e-02, -2.921e-02, 1.082e-02, 6.616e-02) * s0_6; + r += V4(-3.475e-01, 2.298e-02, 1.890e-01, -5.933e-02) * s0_7; + r += V4(9.621e-03, 3.558e-02, -1.195e-01, -2.875e-02) * s0_8; + r += V4(1.049e-01, -1.181e-02, -1.058e-02, -1.062e-02); + return r; +} + +void Pass1(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + min16float s0_0 = l0(-1.0, -1.0); + min16float s0_1 = l0(0.0, -1.0); + min16float s0_2 = l0(1.0, -1.0); + min16float s0_3 = l0(-1.0, 0.0); + min16float s0_4 = l0(0.0, 0.0); + min16float s0_5 = l0(1.0, 0.0); + min16float s0_6 = l0(-1.0, 1.0); + min16float s0_7 = l0(0.0, 1.0); + min16float s0_8 = l0(1.0, 1.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8); +} + +//!PASS 2 +//!DESC conv1 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.079e-01, -6.209e-03, -5.165e-02, 3.059e-01, -1.088e-01, 1.518e-02, -1.715e-01, -4.790e-01, 9.833e-03, 8.433e-03, -1.041e-01, -1.785e-01, 3.452e-02, -9.679e-03, 4.601e-02, -9.835e-02)); + r += mul(s0_1, M4(-3.255e-02, 1.189e-01, 2.081e-01, 4.694e-01, 8.376e-01, 7.644e-02, 4.126e-03, -2.069e-01, 3.555e-01, 7.616e-02, 6.639e-03, 1.045e-01, 2.174e-02, 1.988e-01, 8.994e-02, -1.614e-01)); + r += mul(s0_2, M4(-2.806e-01, -2.092e-01, 1.030e-01, -1.734e-01, 1.701e-01, 3.388e-02, -8.975e-02, -2.286e-01, 8.847e-01, 2.175e-01, -3.606e-01, -2.783e-01, 5.524e-02, 6.168e-02, -1.753e-01, 9.562e-03)); + r += mul(s0_3, M4(-5.390e-02, 1.193e-01, -2.307e-01, 7.918e-01, 1.093e-01, 1.514e-01, -1.198e-01, 1.970e-01, -6.404e-02, -3.627e-02, -3.039e-01, 3.989e-01, -4.519e-02, 2.935e-02, -7.276e-02, 1.666e-01)); + r += mul(s0_4, M4(-4.617e-01, 4.250e-01, -1.689e-01, 2.009e-01, 2.763e-01, -5.223e-01, 6.812e-02, 7.042e-01, 6.269e-01, 4.437e-01, -8.346e-01, 2.686e-01, -1.802e-02, -1.487e-01, -3.822e-02, 2.222e-01)); + r += mul(s0_5, M4(3.233e-01, -1.180e-03, -4.640e-02, 1.197e-01, 4.480e-01, -6.224e-01, -1.695e-01, -3.179e-02, 1.063e+00, -2.599e-01, -8.862e-02, -1.894e-01, -2.136e-01, 1.618e-01, 1.563e-01, 1.551e-01)); + r += mul(s0_6, M4(7.462e-02, -1.786e-01, 1.469e-01, 3.023e-01, 1.644e-01, -1.871e-01, 2.594e-01, -4.580e-01, 1.328e-01, -1.746e-01, -9.667e-02, -5.360e-02, 6.538e-02, 3.959e-02, 7.461e-02, -1.333e-01)); + r += mul(s0_7, M4(-5.538e-02, -4.836e-01, -6.220e-02, -4.814e-01, 1.075e+00, -2.161e-01, 2.104e-01, 3.135e-01, -8.273e-02, 9.291e-02, 7.861e-02, 1.998e-01, -2.965e-02, -1.596e-01, -6.912e-02, 2.828e-02)); + r += mul(s0_8, M4(3.116e-02, 2.568e-01, 1.662e-02, -1.116e-01, 9.998e-01, -5.913e-01, -1.385e-01, -6.196e-01, 1.286e-02, 2.039e-01, -1.960e-01, 4.033e-01, 1.547e-01, -9.270e-02, -2.751e-02, 1.296e-01)); + r += mul(s1_0, M4(-4.361e-02, -2.010e-02, 2.791e-02, 1.056e-02, -1.917e-01, 1.587e-02, -7.196e-02, 1.375e-01, -2.334e-03, 4.510e-02, 3.333e-02, -1.447e-02, 3.589e-02, -4.001e-02, 7.667e-02, 6.927e-02)); + r += mul(s1_1, M4(1.005e-01, 3.245e-01, 2.143e-01, -2.198e-02, 9.360e-02, -8.266e-02, 2.428e-02, 7.946e-02, -8.792e-02, -5.322e-02, 1.436e-02, 5.331e-03, 1.547e-01, 1.074e-01, 2.043e-02, 3.151e-01)); + r += mul(s1_2, M4(-1.417e-01, -1.429e-01, 9.010e-02, -2.719e-01, -2.314e-01, -1.211e-01, -5.093e-02, 1.336e-03, -5.519e-02, -4.103e-02, -1.465e-01, 4.377e-02, 1.625e-01, 9.260e-02, -1.132e-01, 1.579e-01)); + r += mul(s1_3, M4(-4.690e-02, 3.414e-02, -3.446e-01, 5.756e-01, 4.127e-02, 5.835e-02, -7.117e-02, -1.233e-01, -1.676e-01, -6.558e-02, -1.035e-01, 2.054e-01, 3.734e-02, 1.632e-02, -1.065e-01, 1.426e-01)); + r += mul(s1_4, M4(2.777e-03, 5.095e-02, 7.937e-03, -5.967e-02, 1.193e-01, 3.256e-02, 9.556e-02, 8.563e-02, -1.386e-01, 2.129e-01, 1.093e-01, -4.675e-02, 1.955e-02, -5.246e-02, -2.056e-01, 2.337e-01)); + r += mul(s1_5, M4(3.632e-01, -1.052e-01, -4.334e-02, 7.445e-02, -1.537e-01, -5.209e-02, -7.747e-02, 9.552e-02, -1.148e-01, -2.824e-01, 1.545e-03, -7.691e-02, -7.978e-02, 2.649e-01, 1.413e-01, 9.935e-02)); + r += mul(s1_6, M4(-5.139e-02, 1.473e-01, 1.323e-01, 1.844e-01, -1.223e-01, -9.135e-02, 3.014e-01, -1.103e-01, 4.372e-02, 9.732e-02, 1.080e-01, 1.003e-01, 3.050e-02, -2.316e-02, 6.663e-02, -4.809e-02)); + r += mul(s1_7, M4(1.893e-01, -4.030e-01, 4.219e-02, -2.791e-01, 2.341e-01, 2.789e-01, -4.978e-02, -1.129e-01, 7.288e-02, -1.657e-01, 5.783e-02, -2.239e-01, -4.618e-02, -1.246e-01, -4.753e-02, -2.995e-02)); + r += mul(s1_8, M4(1.227e-01, -3.217e-02, 4.234e-02, -1.821e-01, -2.965e-01, -1.050e-01, -7.545e-03, -7.288e-02, -8.445e-02, 1.968e-01, -3.038e-02, -1.015e-02, 1.080e-01, -3.768e-02, -2.808e-02, 1.682e-01)); + r += mul(s2_0, M4(4.262e-02, 5.784e-02, 1.454e-02, 3.577e-02, -1.138e-01, 1.886e-01, 1.264e-01, 1.255e-01, -9.393e-02, -8.500e-02, -9.118e-02, -9.240e-04, -4.070e-02, -6.934e-02, 9.588e-02, -2.368e-01)); + r += mul(s2_1, M4(-3.233e-01, -1.860e-01, -1.585e-01, 9.243e-02, -6.862e-02, -2.190e-01, -1.071e-01, -3.454e-01, 4.177e-02, 4.386e-02, 2.941e-02, 5.866e-02, 2.344e-01, 2.047e-01, -2.278e-01, 4.611e-02)); + r += mul(s2_2, M4(8.711e-02, 2.480e-02, -9.937e-02, -3.438e-02, 9.121e-03, 3.109e-01, 7.038e-03, 2.100e-01, 1.072e-01, -1.671e-01, -1.322e-01, -6.579e-02, 2.988e-01, -2.884e-01, -1.310e-01, -2.357e-02)); + r += mul(s2_3, M4(-7.846e-02, 7.704e-02, -2.691e-01, -3.412e-01, 6.088e-02, -1.093e-01, -1.281e-01, 5.138e-01, 1.810e-02, 9.831e-02, 2.666e-01, -2.339e-01, 7.093e-02, -1.529e-01, 4.167e-02, -1.516e-01)); + r += mul(s2_4, M4(5.444e-02, 2.909e-02, -2.713e-01, -5.557e-02, -1.632e-01, 2.026e-01, -1.414e-01, -1.113e-02, -1.238e-01, -1.247e-01, 2.065e-01, 2.474e-01, 4.052e-01, -2.307e-02, 7.400e-02, -2.563e-01)); + r += mul(s2_5, M4(-8.010e-02, -2.663e-01, -1.548e-01, 5.065e-02, 1.857e-01, -4.630e-01, -9.547e-02, -1.356e-01, -1.459e-01, -1.654e-01, -1.372e-01, -1.509e-01, -2.538e-01, 8.992e-02, 6.959e-02, -4.835e-02)); + r += mul(s2_6, M4(-6.965e-02, -1.484e-02, -1.520e-01, 8.388e-03, -6.074e-01, -3.065e-02, -1.481e-01, -5.162e-01, 2.566e-02, -6.327e-04, -2.711e-01, 1.572e-01, 7.159e-02, 2.031e-01, 5.821e-02, -1.886e-01)); + r += mul(s2_7, M4(8.521e-02, 1.798e-01, -1.517e-01, 1.224e-01, -3.837e-01, -1.884e-01, -1.653e-01, -1.620e-01, -1.856e-01, 3.767e-01, -8.799e-03, 1.930e-01, 1.777e-01, -6.695e-01, 5.634e-02, -1.042e-01)); + r += mul(s2_8, M4(2.738e-02, 7.023e-02, -6.588e-02, 4.945e-02, -2.180e-01, -4.060e-01, -1.982e-01, -2.085e-01, 9.355e-02, 6.833e-03, -8.415e-02, -3.284e-02, 1.634e-01, 6.600e-01, 2.179e-02, 3.345e-01)); + r += mul(s3_0, M4(-3.002e-01, -2.938e-01, -1.650e-01, -7.428e-01, -9.888e-02, 2.103e-02, 8.913e-02, -1.157e-01, 2.042e-02, -9.108e-03, -5.994e-02, 5.539e-03, -1.426e-01, 1.501e-01, 1.070e-01, 5.497e-02)); + r += mul(s3_1, M4(-1.402e+00, -8.043e-01, -6.448e-01, -4.208e-02, 4.390e-02, 3.385e-02, -4.404e-02, -3.245e-02, 1.901e-01, -9.100e-02, -3.288e-02, -5.554e-02, -1.932e-01, -2.189e-01, -2.138e-01, -1.562e-01)); + r += mul(s3_2, M4(-2.942e+00, -1.968e-01, -2.618e-01, -9.005e-02, -3.651e-02, -3.300e-02, 4.116e-02, 1.509e-01, 9.940e-02, -3.205e-02, -1.049e-01, -4.534e-02, 4.099e-01, 1.030e-01, -1.503e-01, 8.364e-02)); + r += mul(s3_3, M4(2.111e-01, 4.100e-01, -2.844e-01, -7.623e-01, 1.595e-02, -3.347e-01, -1.606e-01, 3.324e-01, 6.465e-02, 2.734e-01, 2.176e-01, 7.220e-03, -7.500e-03, -2.152e-01, 6.906e-02, 1.170e-02)); + r += mul(s3_4, M4(1.670e-01, 4.952e-01, -5.006e-01, 9.297e-01, -7.439e-02, 6.976e-01, 3.422e-01, 1.587e-01, 2.158e-01, -5.071e-01, 7.355e-02, -3.825e-01, -3.580e-02, -3.637e-01, 5.284e-02, -6.810e-02)); + r += mul(s3_5, M4(-5.111e-01, -2.216e-01, -1.097e-01, -6.024e-01, 7.385e-02, -1.143e-01, -3.750e-02, 2.547e-02, -4.781e-02, 3.365e-02, -1.393e-01, -7.232e-02, -3.006e-01, -8.865e-02, 9.295e-02, 1.912e-02)); + r += mul(s3_6, M4(4.437e-01, 4.625e-01, 5.880e-02, 1.085e-01, -1.008e-01, 1.997e-01, 1.448e-02, 1.818e-01, -5.865e-02, -1.208e-01, -3.001e-01, 5.187e-02, -3.114e-01, 2.439e-01, 3.744e-02, -3.896e-01)); + r += mul(s3_7, M4(2.651e-01, 3.014e-01, 3.736e-02, 8.684e-01, 1.542e-01, -5.957e-01, -1.418e-01, -4.727e-01, -3.251e-01, 1.105e-01, -8.376e-02, -2.022e-01, -3.469e-01, 5.911e-01, 4.129e-02, 2.129e-01)); + r += mul(s3_8, M4(-1.414e-01, -1.073e-01, 1.316e-01, -1.006e-01, 1.568e-01, 3.334e-01, -1.739e-01, 2.173e-02, 2.287e-01, 2.815e-01, -7.362e-02, -1.505e-02, -2.690e-02, -1.337e-01, -9.948e-04, 1.909e-01)); + r += V4(-1.907e-02, -5.779e-04, 2.647e-01, -6.629e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.029e-02, -4.427e-02, -1.311e-02, -6.809e-02, -1.820e-01, 2.692e-02, -4.692e-01, 1.900e-01, -2.938e-01, -6.187e-02, 1.741e-01, -1.017e-01, 1.412e-01, 1.843e-02, 2.606e-02, 9.392e-02)); + r += mul(s0_1, M4(-4.781e-01, -1.942e-01, 1.552e-01, -3.645e-01, 1.029e-01, 1.159e-01, -8.964e-02, 9.631e-02, -3.120e-01, 2.637e-02, -1.032e-02, -1.627e-02, 1.046e-01, -3.700e-02, -3.136e-01, -1.138e-01)); + r += mul(s0_2, M4(-5.959e-01, 5.592e-02, -1.873e-01, -5.695e-02, 3.835e-01, 1.844e-01, 1.652e-02, 5.765e-01, -5.859e-01, 4.167e-02, -1.965e-01, -1.344e-01, 5.900e-01, 1.509e-02, 1.127e-01, -2.605e-02)); + r += mul(s0_3, M4(1.675e-01, 9.790e-02, 2.316e-01, -1.930e-01, -1.233e-01, 1.554e-01, 7.309e-01, -9.317e-02, 3.019e-01, 3.548e-02, 1.979e-01, 2.452e-01, -1.320e-01, -2.426e-03, -6.473e-02, 2.362e-01)); + r += mul(s0_4, M4(3.238e-01, -3.543e-01, 1.936e-02, -3.525e-01, 6.310e-02, 5.911e-01, 3.797e-01, -2.553e-01, 4.402e-01, -4.429e-02, 8.982e-01, 4.717e-01, -8.769e-02, 2.790e-01, -1.748e-01, 2.149e-01)); + r += mul(s0_5, M4(-1.619e-01, -8.726e-02, 9.183e-02, -2.527e-01, 2.260e-01, 3.167e-01, 1.057e-01, -6.114e-01, -1.583e-01, 1.678e-01, 2.295e-01, -3.196e-02, 4.209e-01, -6.903e-02, -1.373e-01, 2.212e-01)); + r += mul(s0_6, M4(6.322e-02, -5.028e-02, -5.582e-02, -8.759e-02, 1.828e-01, -7.608e-02, 3.712e-01, -2.018e-01, 7.812e-02, -1.612e-01, 3.925e-02, 2.454e-02, -3.222e-02, -5.491e-02, -4.835e-02, -8.672e-02)); + r += mul(s0_7, M4(2.302e-01, 3.311e-01, -1.024e-01, 1.689e-01, 1.384e-01, -6.313e-01, 2.495e-01, 1.109e+00, -1.373e-01, 4.860e-04, 3.466e-02, -4.459e-01, -8.015e-02, -1.001e-01, 1.988e-01, -7.793e-02)); + r += mul(s0_8, M4(-1.753e-01, 6.144e-02, 6.860e-03, -6.542e-02, 1.729e-01, 3.502e-02, -2.058e-01, 2.777e-01, 2.547e-01, -3.623e-01, -1.117e-01, -4.370e-01, 2.300e-01, -3.608e-02, 7.399e-02, -2.717e-02)); + r += mul(s1_0, M4(6.933e-02, 7.252e-03, -4.641e-01, 3.847e-01, -1.899e-01, -4.916e-02, -8.596e-02, 6.860e-02, -1.260e-02, -1.411e-02, -1.429e-01, -7.949e-02, 7.628e-02, -3.338e-02, 1.520e-01, -1.907e-01)); + r += mul(s1_1, M4(-1.096e-01, -1.356e-01, -5.957e-01, 1.985e-01, -1.362e-01, -3.702e-02, 5.611e-02, -1.867e-01, -1.512e-01, 7.857e-02, -5.154e-02, 5.215e-02, -8.526e-02, -7.589e-02, 4.015e-01, -3.616e-01)); + r += mul(s1_2, M4(-5.254e-01, 5.750e-02, -2.225e-01, 3.322e-01, -5.401e-02, 5.723e-02, -1.843e-01, 1.116e-01, -6.092e-02, -1.324e-02, 1.852e-01, 2.526e-01, 5.620e-01, -4.316e-02, 1.137e-01, -2.298e-01)); + r += mul(s1_3, M4(1.092e-01, 1.151e-01, -6.113e-01, 4.262e-01, -1.108e-02, 1.270e-01, 1.070e-01, 6.105e-02, -6.840e-02, 9.359e-03, 1.322e-02, 6.144e-02, -3.119e-02, -7.442e-02, -3.225e-02, 2.445e-01)); + r += mul(s1_4, M4(4.469e-01, -3.722e-01, 1.759e-02, 1.817e-02, -1.038e-01, 1.900e-01, -1.480e-02, -1.003e-01, 2.094e-02, -3.295e-01, 3.884e-02, -3.455e-01, -6.084e-02, 2.868e-01, -2.548e-01, 1.942e-01)); + r += mul(s1_5, M4(-2.857e-01, -4.954e-02, 1.537e-01, -3.915e-01, 1.973e-01, 1.338e-02, 1.035e-01, -5.796e-02, -3.144e-01, 9.483e-02, -1.388e-01, -1.597e-01, 2.885e-01, -5.359e-02, -1.151e-01, 2.880e-01)); + r += mul(s1_6, M4(-8.887e-03, 3.547e-03, -7.855e-02, -3.055e-01, 2.156e-01, 2.369e-02, -1.089e-01, 1.467e-02, 3.331e-02, 4.691e-02, -3.707e-02, -4.816e-02, 1.843e-02, -2.316e-02, -5.976e-02, -1.949e-01)); + r += mul(s1_7, M4(-1.638e-02, 2.342e-01, 1.319e-01, 1.744e-01, 1.130e-01, -3.627e-01, -1.653e-02, 5.432e-02, 1.635e-01, 3.669e-01, 2.821e-02, 7.278e-02, 4.355e-02, -3.466e-02, 1.538e-01, 1.175e-02)); + r += mul(s1_8, M4(-1.298e-01, 1.342e-01, 9.409e-03, -8.570e-02, 1.419e-01, -1.479e-01, -5.092e-02, 1.062e-01, -2.235e-03, -2.221e-01, -8.666e-02, 2.477e-01, 2.057e-01, -9.286e-02, 9.491e-03, 8.760e-02)); + r += mul(s2_0, M4(-3.941e-02, -5.823e-02, -8.454e-02, -6.757e-03, -4.737e-01, -2.306e-02, 6.786e-02, 1.628e-01, 3.545e-01, -1.548e-02, -4.186e-02, -1.596e-01, 3.441e-01, -5.585e-02, -1.227e-01, 4.865e-03)); + r += mul(s2_1, M4(-2.910e-02, 4.163e-02, -9.177e-02, 3.575e-02, -1.154e-01, 1.228e-01, -1.005e-01, 4.365e-01, 8.323e-02, -7.856e-04, 9.573e-02, -2.650e-01, 1.488e-01, -1.503e-02, 3.164e-02, 1.331e-01)); + r += mul(s2_2, M4(-7.101e-02, 3.065e-02, 4.069e-02, 8.709e-02, 1.813e-01, -7.222e-02, 4.975e-02, 2.198e-01, -1.686e-01, 7.047e-02, 9.845e-02, -2.971e-02, -1.017e-01, 3.237e-02, 8.955e-02, -4.723e-02)); + r += mul(s2_3, M4(-1.017e-01, 5.565e-02, 3.915e-02, -4.115e-02, -1.513e-01, 1.492e-02, -1.683e-01, 5.292e-01, -3.045e-01, 1.206e-02, -2.988e-02, -1.685e-01, -1.559e-01, -1.358e-02, -1.107e-02, 8.501e-02)); + r += mul(s2_4, M4(-1.248e-02, -3.360e-02, -1.182e-01, -9.052e-02, 4.120e-01, -2.568e-01, 1.347e-02, 2.203e-01, 5.053e-02, 7.868e-02, 1.555e-01, -5.636e-01, 1.146e-01, -5.034e-02, 3.738e-02, 1.086e-01)); + r += mul(s2_5, M4(-1.433e-02, 1.914e-02, 5.146e-02, 5.477e-02, -1.658e-01, 7.262e-03, -1.016e-01, 2.164e-01, -1.221e-02, 2.271e-01, 8.276e-02, -1.134e-01, -7.808e-02, 7.249e-02, -1.490e-01, 1.520e-01)); + r += mul(s2_6, M4(9.872e-02, 7.283e-02, 4.571e-03, 4.956e-03, 1.392e-01, 6.305e-02, 2.820e-01, 2.562e-01, 2.470e-01, 2.919e-01, -2.354e-01, 8.940e-02, -3.639e-01, 1.666e-02, -3.139e-01, 3.009e-01)); + r += mul(s2_7, M4(7.391e-02, -9.179e-03, 2.303e-02, -9.008e-02, -1.926e-01, 1.451e-01, -3.073e-02, 3.175e-01, 1.068e-01, 4.190e-01, -3.119e-01, -3.721e-01, 1.989e-01, 6.877e-02, -6.490e-02, 3.883e-01)); + r += mul(s2_8, M4(3.436e-02, -1.500e-01, 6.370e-02, 6.487e-02, -6.616e-02, 1.150e-01, -1.577e-02, -1.150e-02, 3.625e-02, 1.807e-01, 8.368e-02, -1.030e-01, 2.969e-01, -3.736e-01, -5.248e-01, -6.410e-02)); + r += mul(s3_0, M4(-1.904e-01, 3.793e-01, -4.094e-01, -3.506e-01, -3.057e-01, -5.702e-02, -1.412e-01, 1.145e-01, 3.071e-01, -1.208e-02, -2.916e-02, -1.679e-01, 2.943e-01, -3.413e-02, 1.985e-01, -1.143e-01)); + r += mul(s3_1, M4(-4.336e-01, 7.680e-01, -6.700e-01, -5.162e-01, 1.232e-01, 7.445e-02, -1.558e-01, 1.520e-01, 1.238e-01, -4.562e-02, 8.931e-02, 1.176e-01, -1.373e-01, 1.201e-01, 9.010e-02, 6.615e-02)); + r += mul(s3_2, M4(7.564e-02, 1.828e-02, -3.335e-01, 7.187e-01, -2.198e-01, -3.277e-02, 1.775e-01, -9.321e-02, -5.860e-02, 4.389e-02, -1.336e-02, -2.077e-03, 1.672e-01, 2.256e-02, 2.100e-01, 5.344e-02)); + r += mul(s3_3, M4(-6.569e-02, 1.159e-01, 7.211e-01, -2.791e-01, -1.254e-01, 9.532e-02, -1.811e-01, 3.237e-01, -4.603e-01, -7.759e-02, 9.809e-02, -5.216e-02, -3.020e-01, -5.589e-02, -8.145e-02, -1.713e-02)); + r += mul(s3_4, M4(-2.708e-01, -1.237e+00, 2.507e-01, -1.349e+00, 3.077e-01, -4.526e-01, 3.560e-01, -2.586e-01, 2.017e-01, 2.772e-01, 9.947e-02, 1.834e-01, -2.344e-01, 8.792e-02, 1.398e-01, 4.550e-02)); + r += mul(s3_5, M4(2.083e-01, 2.553e-01, 2.108e-01, 1.616e+00, 2.526e-01, 9.398e-03, -1.027e-01, -9.457e-03, 1.998e-02, 1.143e-01, 1.341e-01, 1.702e-03, -2.370e-01, 1.658e-01, -6.104e-02, 8.290e-02)); + r += mul(s3_6, M4(-8.079e-02, 1.286e-02, -2.603e-01, -3.768e-01, -1.689e-01, 1.810e-02, 4.330e-02, -1.769e-01, -3.120e-02, 3.590e-01, 2.761e-02, 1.187e-01, 1.727e-01, 7.357e-02, -1.063e-01, -2.804e-02)); + r += mul(s3_7, M4(1.037e-01, -5.252e-01, 3.481e-01, 3.128e-01, -8.170e-02, 7.354e-01, -5.280e-02, -1.332e-01, 2.176e-01, 6.305e-01, -1.500e-01, 3.806e-01, 3.882e-02, -3.656e-01, -3.983e-02, -2.263e-01)); + r += mul(s3_8, M4(-1.199e-02, 1.848e-01, -4.894e-01, -3.677e-01, 1.045e-01, -3.526e-02, -5.185e-02, 2.838e-02, 9.936e-02, 1.189e-01, -6.276e-03, 2.501e-01, 1.499e-01, -4.103e-01, -1.141e-01, 1.415e-01)); + r += V4(-1.419e-02, 1.370e-02, -1.518e-02, 1.616e-02); + return r; +} + +void Pass2(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 3 +//!DESC conv2 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.433e-01, -3.881e-01, 1.184e-01, -6.328e-02, 2.093e-01, -1.453e-01, -1.044e-01, -1.624e-03, -1.289e-01, 7.280e-02, -2.728e-02, -3.045e-02, -2.419e-02, -1.908e-02, -1.990e-03, 6.380e-02)); + r += mul(s0_1, M4(1.241e-01, 2.907e-01, -7.679e-02, 6.609e-02, 4.055e-01, 1.399e-01, 1.488e-01, -8.759e-02, 9.889e-02, -1.191e-02, -2.143e-02, -4.582e-02, 1.655e-02, -1.023e-01, -3.922e-02, 7.866e-02)); + r += mul(s0_2, M4(1.018e-01, 5.323e-02, -1.317e-01, 1.143e-02, 3.795e-01, 1.037e-01, -5.552e-02, 9.257e-03, -1.724e-01, -1.334e-01, -1.614e-02, -1.734e-02, 3.272e-01, -1.067e-01, 1.857e-01, 1.775e-02)); + r += mul(s0_3, M4(-9.351e-02, -8.100e-02, 1.528e-01, -6.036e-02, 5.321e-01, -2.519e-01, -1.133e-01, -1.084e-01, 9.980e-02, 6.664e-02, -4.920e-02, 5.423e-02, -3.489e-02, 6.527e-03, 4.906e-02, 3.445e-02)); + r += mul(s0_4, M4(-7.289e-01, -1.403e-01, -2.049e-01, -8.240e-03, -2.363e-01, 2.203e-01, 2.280e-01, -2.894e-01, 6.564e-03, -1.410e-01, 4.797e-02, -4.672e-02, 2.459e-01, 7.236e-02, 2.042e-01, 1.049e-01)); + r += mul(s0_5, M4(-1.679e-01, -1.223e-01, 5.324e-02, -2.159e-03, -1.943e-03, 1.791e-01, 1.407e-02, 6.576e-02, 1.712e-02, 2.206e-02, 1.580e-02, 5.191e-02, -2.160e-01, -4.008e-01, 1.662e-01, -1.676e-01)); + r += mul(s0_6, M4(2.380e-02, -1.506e-01, -1.850e-01, -7.010e-02, -7.339e-02, -1.088e-01, -1.633e-02, 8.762e-02, 4.002e-02, -9.515e-03, 1.053e-01, -1.318e-03, 3.687e-02, 1.289e-03, -8.238e-02, 3.815e-02)); + r += mul(s0_7, M4(1.500e-01, -2.010e-01, 7.452e-02, -9.216e-02, -9.507e-02, -9.233e-02, -1.655e-01, 9.834e-03, 9.053e-02, 1.116e-01, -6.831e-02, 1.544e-01, -1.290e-02, 2.871e-02, 4.676e-02, 6.820e-02)); + r += mul(s0_8, M4(1.485e-01, 1.043e-02, -1.230e-01, 7.149e-02, -2.033e-01, 3.896e-02, -1.704e-01, 2.718e-02, -8.099e-02, -1.492e-02, 2.033e-02, -1.470e-01, 3.724e-01, -6.581e-02, -8.568e-02, 1.446e-02)); + r += mul(s1_0, M4(2.111e-01, 7.064e-02, 1.062e-01, -3.356e-02, -1.960e-01, 7.451e-02, -4.637e-02, 4.996e-02, 3.633e-01, -2.194e-01, -1.977e-01, -4.877e-01, -4.173e-02, 7.349e-02, -8.448e-02, -2.853e-02)); + r += mul(s1_1, M4(-7.255e-02, 2.476e-01, -4.985e-02, -9.240e-03, -2.809e-01, 1.161e-01, -3.182e-02, -2.748e-02, -6.600e-02, -4.941e-03, -3.764e-01, -6.918e-01, -8.408e-02, 1.003e-01, -8.359e-02, -5.399e-02)); + r += mul(s1_2, M4(2.199e-01, -5.369e-03, -6.957e-02, -4.640e-02, 1.564e-01, -1.217e-01, -1.722e-02, -1.767e-02, -6.967e-01, -7.661e-01, -3.545e-01, 5.396e-01, 1.185e-01, -1.727e-01, -4.389e-02, -1.500e-01)); + r += mul(s1_3, M4(-1.075e-01, 1.627e-01, 1.380e-01, -1.190e-02, 9.164e-02, -1.048e-01, -1.901e-01, 3.976e-02, 1.650e-01, 6.450e-01, -3.591e-01, -2.435e-01, 1.574e-02, -1.217e-02, 3.572e-02, -4.591e-02)); + r += mul(s1_4, M4(-2.832e-02, -9.011e-02, -1.354e-01, -9.056e-02, -2.266e-01, 2.679e-01, 4.178e-01, 5.121e-02, -3.697e-01, 5.771e-01, -4.159e-01, -2.154e+00, 8.549e-02, -4.675e-02, 1.880e-01, 6.462e-04)); + r += mul(s1_5, M4(-2.837e-01, -1.020e-01, 1.500e-01, -6.129e-02, 8.551e-02, 6.597e-03, 1.699e-01, 3.306e-03, -1.656e-01, -7.203e-01, -3.878e-01, 6.471e-01, 1.882e-02, -5.250e-02, -6.837e-02, -2.959e-01)); + r += mul(s1_6, M4(8.637e-02, -1.856e-01, -1.333e-01, -8.966e-02, 7.885e-02, -4.568e-02, 5.150e-02, 7.534e-02, 2.183e-01, 5.912e-02, 3.611e-01, 1.868e-01, 7.463e-02, -6.546e-02, 8.653e-03, 2.389e-03)); + r += mul(s1_7, M4(-2.107e-01, -6.405e-02, 1.977e-01, -7.070e-02, 1.735e-01, -3.623e-02, 1.193e-01, -1.876e-02, -3.815e-01, 6.116e-01, -7.696e-01, 1.309e-02, -9.006e-02, 2.588e-01, 2.957e-02, 7.022e-02)); + r += mul(s1_8, M4(1.908e-01, 6.329e-02, -2.448e-01, 2.592e-02, -4.052e-02, -5.949e-02, 8.518e-02, 1.000e-02, -7.463e-01, -5.402e-02, 1.745e-01, 1.433e-01, 2.647e-01, -4.770e-02, 4.625e-02, 1.604e-02)); + r += mul(s2_0, M4(7.971e-02, -1.099e-01, -2.069e-02, -9.078e-02, -5.069e-02, -5.648e-02, 4.399e-02, 1.920e-01, 2.338e-02, -7.457e-02, -2.665e-02, -4.715e-02, 9.422e-02, -7.926e-02, 1.338e-02, -6.313e-02)); + r += mul(s2_1, M4(-1.406e-01, -1.581e-01, -1.626e-01, -2.596e-02, 5.828e-01, -2.151e-01, 1.535e-01, -6.966e-01, -5.149e-02, -8.108e-02, 1.054e-02, -4.783e-02, -1.567e-01, 2.909e-01, -6.311e-02, -2.142e-01)); + r += mul(s2_2, M4(-2.106e-02, -5.202e-02, 7.175e-02, -3.027e-02, 1.773e-02, 3.321e-01, 7.717e-02, -1.694e-01, -9.209e-02, -2.248e-02, 1.867e-02, -8.038e-02, 1.468e-01, -1.793e-01, -1.160e-01, -9.110e-02)); + r += mul(s2_3, M4(-4.802e-02, -2.944e-02, -9.839e-02, -1.450e-01, 8.564e-02, -7.952e-02, 4.528e-02, -2.834e-02, 6.636e-02, -2.842e-02, -8.696e-02, -1.222e-02, -3.515e-02, -7.263e-02, -1.196e-01, -6.308e-02)); + r += mul(s2_4, M4(5.217e-02, -1.035e-01, -3.715e-01, -5.292e-01, -1.049e-01, -2.436e-01, 5.207e-01, 9.208e-02, 5.585e-02, 9.715e-02, 4.273e-02, 2.886e-01, -2.044e-01, 8.106e-01, -3.056e-01, 1.353e-01)); + r += mul(s2_5, M4(-3.279e-01, 1.316e-01, -7.622e-02, -9.912e-02, -2.721e-01, -9.490e-02, 5.605e-01, -1.344e-01, -9.724e-02, -8.388e-02, -1.390e-01, 4.640e-01, -1.161e-01, -1.181e-01, -1.599e-01, -2.190e-01)); + r += mul(s2_6, M4(1.149e-01, 8.812e-02, -1.579e-01, -1.800e-01, 4.197e-02, -1.301e-01, 4.577e-02, 5.256e-02, 7.827e-02, 5.972e-02, 1.893e-02, -6.895e-03, -2.070e-04, 1.411e-01, 9.564e-03, 7.850e-02)); + r += mul(s2_7, M4(-1.247e-01, 1.674e-01, 1.048e-01, -6.845e-03, -1.308e-01, 9.539e-02, -1.721e-02, -1.677e-02, 4.760e-03, 1.023e-01, 1.821e-01, -3.942e-02, 2.139e-01, -1.663e-02, -9.827e-03, -6.087e-02)); + r += mul(s2_8, M4(3.102e-02, 1.901e-02, -2.138e-01, 5.284e-02, 3.217e-02, -3.770e-02, -3.509e-02, 2.685e-05, 9.077e-02, -2.235e-01, 3.352e-01, 1.639e-01, -1.189e-01, -1.500e-01, -1.289e-01, 1.001e-03)); + r += mul(s3_0, M4(-4.771e-02, -2.036e-01, 2.846e-02, 2.345e-03, -4.515e-02, 5.025e-02, -7.379e-02, -4.041e-02, -1.517e-02, 3.365e-02, -1.432e-01, -5.475e-03, 1.335e-02, -2.010e-01, -6.832e-03, -3.554e-02)); + r += mul(s3_1, M4(-1.906e-01, -5.119e-02, 1.121e-02, 3.558e-02, -3.803e-01, 2.685e-01, -1.029e-01, 4.900e-02, 2.261e-01, -1.602e-01, 1.923e-02, 1.846e-01, -6.525e-02, -3.315e-02, -1.742e-02, -1.288e-01)); + r += mul(s3_2, M4(-1.424e-01, -1.426e-01, 3.269e-03, 2.335e-04, 1.329e-02, -1.127e-01, -6.530e-02, -4.724e-02, 6.465e-02, 9.890e-02, 7.215e-02, -1.801e-01, -5.413e-02, -2.846e-01, -5.021e-02, -2.605e-02)); + r += mul(s3_3, M4(2.748e-02, -3.440e-01, 1.746e-01, 1.563e-01, 8.409e-02, -2.266e-01, -1.234e-01, -1.325e-01, 3.038e-02, 1.448e-01, -8.858e-02, -4.470e-02, 2.247e-01, -2.876e-01, -2.567e-02, -1.657e-01)); + r += mul(s3_4, M4(1.343e-01, -4.749e-01, 3.036e-02, 1.344e-01, -7.044e-01, 9.586e-01, 2.962e-02, 3.066e-01, 3.292e-01, -4.613e-02, 9.281e-02, -6.577e-02, 3.562e-02, 4.712e-01, -3.727e-02, -2.332e-02)); + r += mul(s3_5, M4(-6.552e-02, 1.718e-01, 1.372e-02, 1.942e-01, -3.202e-02, -2.690e-01, 7.061e-02, -2.143e-01, -4.725e-01, 2.260e-01, -1.108e-01, -2.049e-02, -1.247e-01, -3.232e-01, 1.640e-02, -1.974e-01)); + r += mul(s3_6, M4(5.384e-02, -4.926e-02, 8.471e-02, -5.508e-03, 3.127e-02, -1.024e-01, -6.807e-02, 1.355e-02, 8.709e-02, 3.431e-01, -2.891e-02, -1.667e-02, 5.495e-03, -8.979e-02, 1.211e-01, 8.106e-02)); + r += mul(s3_7, M4(-1.086e-01, 3.082e-01, 3.515e-01, 1.455e-01, -1.311e-01, 1.556e-01, -9.044e-02, -6.178e-02, 1.637e-01, 9.177e-02, -1.618e-01, -7.785e-02, 4.761e-02, -1.013e-01, -9.998e-02, -1.424e-02)); + r += mul(s3_8, M4(5.134e-01, 2.088e-01, 3.485e-01, 2.017e-01, 2.709e-01, 1.314e-01, -1.887e-01, 1.701e-02, -7.644e-02, -2.681e-01, 2.699e-01, 2.012e-02, -5.078e-02, -7.899e-02, 3.061e-02, -6.543e-02)); + r += V4(-6.188e-04, 6.040e-03, 8.696e-03, 2.214e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.903e-02, -1.951e-01, 3.731e-02, 2.407e-01, -4.087e-02, -1.242e-01, 1.591e-01, 1.461e-01, 1.144e-03, 6.971e-02, 6.376e-02, -4.822e-02, -4.229e-03, -2.948e-02, -2.453e-02, 3.279e-02)); + r += mul(s0_1, M4(-1.397e-01, -2.025e-01, -1.174e-01, -2.333e-01, -6.637e-02, 1.611e-01, -2.374e-01, 1.920e-01, -8.692e-03, 1.128e-02, -7.000e-02, 1.043e-01, -1.646e-01, -7.105e-02, -5.099e-02, -7.850e-02)); + r += mul(s0_2, M4(-3.493e-02, 1.631e-01, -1.295e-01, -3.915e-02, -8.689e-03, 2.153e-01, -1.786e-01, 8.576e-02, 3.957e-02, -1.725e-02, 6.660e-03, -9.336e-02, -1.944e-01, -2.239e-01, -1.146e-01, 6.279e-02)); + r += mul(s0_3, M4(1.048e-01, -2.091e-01, 1.755e-01, 1.312e-01, -4.776e-02, 3.309e-02, -1.652e-01, 4.016e-02, -3.431e-02, -9.573e-02, 2.284e-03, 4.407e-02, -7.497e-03, 5.382e-02, 1.636e-02, -3.983e-02)); + r += mul(s0_4, M4(2.202e-01, -1.826e-01, 1.413e-01, -8.920e-01, 2.626e-02, -6.326e-02, 1.165e-02, 2.816e-01, -1.011e-01, 1.952e-02, 1.909e-02, -1.977e-03, 1.714e-02, -1.762e-01, 1.561e-02, 1.896e-02)); + r += mul(s0_5, M4(-8.408e-02, -8.961e-02, -7.309e-02, -9.936e-03, 1.714e-01, 3.017e-01, -3.199e-02, 3.944e-02, 5.782e-02, 4.041e-02, 1.135e-01, -1.899e-02, -6.783e-01, -7.227e-01, -5.489e-02, 3.710e-01)); + r += mul(s0_6, M4(4.481e-02, -1.328e-01, 3.195e-01, 2.234e-01, 3.281e-02, -1.410e-01, -2.458e-01, -3.179e-02, -9.318e-03, -5.619e-02, -4.831e-02, 9.783e-03, 6.137e-05, -3.615e-02, -4.932e-02, 4.382e-02)); + r += mul(s0_7, M4(8.583e-02, -3.896e-01, -2.696e-01, -1.379e-01, 8.896e-04, -4.113e-02, -1.003e-01, -2.234e-01, -2.070e-02, 6.911e-02, 5.273e-02, 5.350e-02, 1.194e-01, -2.388e-01, -7.249e-02, 1.641e-01)); + r += mul(s0_8, M4(3.917e-03, 4.653e-02, 4.208e-02, -2.972e-02, 7.932e-02, 7.600e-02, 1.999e-01, 1.265e-01, 1.091e-01, -4.210e-02, -1.266e-01, -6.501e-02, -1.778e-01, -2.900e-01, -1.907e-01, -1.097e-01)); + r += mul(s1_0, M4(-1.594e-02, -1.106e-01, -6.409e-02, -8.242e-02, -6.067e-02, -1.114e-02, 1.503e-01, -2.305e-02, 9.436e-02, 5.593e-02, -8.865e-02, -3.842e-01, -1.487e-02, 6.278e-02, -9.395e-02, 1.747e-02)); + r += mul(s1_1, M4(-9.406e-02, 1.059e-01, -5.180e-02, 8.490e-02, 2.999e-02, 7.354e-02, 9.773e-02, 6.806e-02, -1.499e-01, 3.132e-01, -1.413e-01, 3.941e-01, -1.043e-01, 8.054e-02, 7.889e-02, 6.038e-02)); + r += mul(s1_2, M4(-5.049e-02, 8.765e-02, -3.552e-02, 5.389e-02, 1.399e-02, 4.153e-02, -9.329e-02, 5.096e-02, -5.803e-03, -3.089e-02, -3.274e-01, 2.702e-01, -1.782e-01, -3.271e-02, 3.820e-02, 6.556e-02)); + r += mul(s1_3, M4(1.295e-01, -1.074e-01, 1.311e-01, 1.953e-02, -7.175e-02, -2.176e-01, -4.113e-02, -6.624e-02, -1.264e-01, -1.084e+00, -8.971e-02, -4.195e-01, -3.640e-02, 1.017e-01, 1.374e-02, -3.757e-02)); + r += mul(s1_4, M4(1.402e-01, 7.449e-02, 2.512e-01, -3.099e-01, -1.449e-01, -2.211e-01, 2.472e-01, -8.322e-02, -5.802e-01, -9.452e-01, -3.769e-01, -7.299e-01, 1.703e-02, 5.013e-02, 2.580e-02, 1.018e-01)); + r += mul(s1_5, M4(-1.122e-01, -3.917e-02, 1.255e-01, 1.104e-01, 1.611e-01, 1.474e-01, -6.121e-02, -2.863e-02, 6.412e-01, -4.582e-01, -9.288e-01, -1.703e+00, -8.581e-01, -1.186e-01, -1.852e-02, 1.764e-01)); + r += mul(s1_6, M4(-2.302e-02, -1.122e-01, -6.733e-04, 8.064e-02, 6.468e-02, -9.918e-02, 4.609e-02, -3.326e-02, -7.172e-02, -3.604e-01, 4.166e-01, 1.496e-02, 1.941e-02, -8.164e-03, -1.005e-01, 3.534e-03)); + r += mul(s1_7, M4(-4.566e-02, 1.576e-01, -8.564e-02, -1.512e-02, 2.097e-02, -1.125e-01, 2.043e-02, -3.300e-01, -1.351e-01, -6.389e-01, -3.509e-01, -1.141e-01, 9.899e-02, -7.529e-02, 1.057e-01, 2.057e-02)); + r += mul(s1_8, M4(5.309e-02, 7.249e-02, 3.752e-02, -4.934e-02, 5.045e-02, 9.157e-02, 3.824e-03, -2.342e-02, 3.056e-01, -2.377e-03, 4.013e-01, -8.652e-02, -1.557e-01, -6.980e-02, -3.395e-02, -1.831e-01)); + r += mul(s2_0, M4(-2.140e-02, 3.373e-02, -6.324e-02, 1.341e-02, -6.006e-02, -1.390e-01, 1.304e-01, -7.396e-02, 1.053e-02, 4.035e-02, -7.677e-02, 2.362e-02, 1.909e-02, 2.325e-01, 1.447e-01, 7.323e-02)); + r += mul(s2_1, M4(1.795e-02, -3.169e-04, 1.030e-01, 1.265e-01, -1.093e-01, -1.128e-01, -1.884e-02, 1.473e-01, 4.227e-02, 4.771e-02, -5.342e-03, 3.107e-02, 9.337e-02, 3.386e-01, 9.080e-02, -1.308e-01)); + r += mul(s2_2, M4(-3.594e-02, -3.244e-02, 9.089e-02, 6.626e-02, 8.157e-03, 1.997e-01, 3.726e-01, -1.223e-01, 7.486e-02, 9.483e-02, -7.558e-02, 3.820e-02, 3.465e-02, 9.637e-02, 1.463e-01, 1.243e-01)); + r += mul(s2_3, M4(-2.022e-02, 5.208e-02, 1.797e-01, 1.583e-02, -1.421e-01, -2.013e-01, 1.746e-01, -9.217e-02, -5.599e-02, 1.221e-01, -1.353e-01, 2.996e-03, -1.491e-02, 2.928e-02, 2.489e-02, 2.708e-02)); + r += mul(s2_4, M4(-2.417e-01, 2.697e-01, 1.544e-02, 3.408e-01, 5.464e-01, -1.619e-01, -1.491e-01, -2.986e-02, -9.208e-02, -4.481e-01, -1.674e-01, -8.227e-02, -1.887e-01, -3.818e-02, 1.558e-01, -5.454e-02)); + r += mul(s2_5, M4(2.332e-01, 1.534e-01, -1.023e-01, -1.223e-01, -4.520e-01, 2.117e-02, 4.059e-01, 1.739e-01, 9.972e-02, -3.460e-01, -2.232e-01, -9.188e-02, -3.011e-01, 9.661e-02, 2.294e-01, 4.967e-01)); + r += mul(s2_6, M4(-2.340e-02, 4.372e-01, 5.542e-02, -9.070e-02, 6.642e-03, -7.093e-02, 3.614e-02, -1.240e-02, -1.341e-03, -6.169e-02, -1.849e-01, -2.486e-02, 2.831e-02, -1.802e-01, -2.525e-02, -5.149e-02)); + r += mul(s2_7, M4(-7.215e-02, 6.276e-01, 2.174e-01, 2.019e-01, 1.082e-01, 5.354e-02, 1.379e-02, -1.519e-01, 3.392e-02, -2.885e-01, 6.024e-02, 1.196e-01, -1.217e-02, 4.156e-02, 7.591e-02, -7.825e-02)); + r += mul(s2_8, M4(4.152e-02, 3.043e-02, -9.412e-03, 1.857e-01, -5.486e-02, -1.094e-02, 1.109e-01, 3.213e-02, 9.758e-02, -3.680e-01, -8.002e-02, 1.475e-01, -1.528e-01, 2.880e-01, 3.799e-01, 7.434e-02)); + r += mul(s3_0, M4(-1.587e-02, -1.111e-01, -1.103e-01, -8.218e-03, -2.111e-02, -1.207e-03, -2.931e-02, -3.011e-02, -9.070e-03, 8.353e-02, -7.659e-02, 7.656e-03, 9.231e-03, 7.664e-02, -1.290e-02, 1.033e-01)); + r += mul(s3_1, M4(-1.663e-02, -2.158e-01, -2.743e-02, -3.814e-02, -7.382e-02, -3.598e-02, -2.445e-01, -2.235e-02, -5.268e-02, 1.668e-02, 5.700e-02, 1.989e-01, 2.021e-02, 1.277e-01, -3.154e-02, -2.325e-02)); + r += mul(s3_2, M4(2.185e-02, -7.522e-02, 9.069e-02, -6.605e-02, -7.998e-02, 3.981e-02, -5.009e-02, -1.897e-01, 1.052e-01, 2.140e-01, -1.733e-01, -5.374e-02, -1.005e-02, -3.066e-02, -2.403e-02, 1.075e-01)); + r += mul(s3_3, M4(-7.010e-02, -2.416e-01, -9.685e-02, -1.127e-02, -1.742e-01, 6.831e-04, -1.254e-02, -1.752e-02, -7.957e-02, 5.913e-02, 1.330e-01, -3.948e-02, -3.154e-02, 3.913e-02, -1.559e-01, -2.551e-03)); + r += mul(s3_4, M4(-3.353e-01, -1.453e-01, -4.606e-01, -9.453e-02, 5.683e-01, 5.758e-02, -6.855e-01, -2.280e-02, -1.119e-01, -3.075e-01, -1.257e-01, 1.525e-01, -1.558e-01, -3.928e-02, -1.199e-01, -2.134e-01)); + r += mul(s3_5, M4(1.197e-01, -2.141e-01, -5.354e-02, -2.723e-01, -4.297e-01, -1.564e-01, -3.910e-03, 4.950e-01, 5.365e-01, 4.068e-01, 8.903e-02, -1.515e-01, -1.595e-01, -9.696e-02, 4.988e-02, 3.443e-01)); + r += mul(s3_6, M4(-4.645e-02, -1.016e-01, -4.400e-02, -1.985e-02, -1.540e-02, -7.543e-02, -6.173e-02, -4.023e-03, -1.087e-02, 9.029e-02, -2.016e-02, -8.725e-02, 2.866e-02, -1.784e-01, -1.198e-01, -4.092e-02)); + r += mul(s3_7, M4(1.047e-01, -3.071e-01, 4.047e-02, 2.940e-02, 9.727e-02, 1.644e-02, -4.037e-04, 9.290e-04, -2.380e-02, 2.100e-01, -4.936e-02, 1.706e-01, 1.528e-02, -1.440e-01, -1.978e-01, -1.937e-01)); + r += mul(s3_8, M4(-3.675e-02, -2.837e-01, -2.650e-01, -1.265e-01, -8.426e-02, 8.075e-02, -1.650e-01, 1.255e-01, 4.193e-02, -1.812e-02, 2.658e-02, -1.294e-02, -1.497e-02, 5.846e-02, 5.440e-02, -5.090e-02)); + r += V4(-6.100e-03, -5.629e-03, -2.164e-03, -5.154e-03); + return r; +} + +void Pass3(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 4 +//!DESC conv3 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(1.460e-01, -2.503e-03, -2.140e-02, 2.070e-01, 1.642e-01, 3.938e-02, -3.263e-02, 2.015e-01, -4.626e-02, 3.951e-02, 1.449e-02, 2.009e-01, 5.058e-02, -4.454e-02, -1.328e-02, 1.673e-01)); + r += mul(s0_1, M4(6.362e-02, -5.345e-03, -1.315e-02, 2.544e-02, 1.481e-01, 1.702e-02, 4.910e-02, 7.624e-02, -1.192e-01, 7.445e-02, -6.032e-02, 5.518e-02, -1.580e-01, 1.302e-01, 9.593e-02, -3.273e-01)); + r += mul(s0_2, M4(8.558e-02, -7.543e-02, -6.527e-02, 2.885e-02, 1.973e-01, -1.098e-01, -3.435e-02, 9.459e-02, 5.094e-02, 7.949e-03, 1.923e-02, -1.689e-01, 3.913e-02, -9.038e-02, 3.628e-02, -2.234e-01)); + r += mul(s0_3, M4(1.441e-01, -9.906e-02, 2.901e-02, -8.990e-02, 9.260e-02, 1.779e-01, -5.753e-02, 1.198e-01, -4.089e-02, -2.679e-02, 2.567e-01, -2.316e-01, 2.826e-02, 3.657e-02, -5.245e-02, -1.481e-01)); + r += mul(s0_4, M4(-6.895e-02, 1.641e-01, -2.386e-01, -2.188e-01, 1.864e-01, -2.987e-03, -9.756e-02, -2.084e-01, -6.674e-02, 5.989e-02, 2.618e-01, -1.881e-01, -2.528e-01, -7.958e-02, -1.960e-01, -8.874e-02)); + r += mul(s0_5, M4(-2.057e-01, 2.163e-01, -1.032e-01, 2.606e-02, 2.536e-01, -1.757e-01, -3.088e-02, 9.971e-02, 3.508e-02, 6.749e-02, -7.082e-02, -5.879e-02, 4.556e-01, -2.563e-01, 3.354e-01, -1.140e-01)); + r += mul(s0_6, M4(4.626e-02, -3.616e-02, 3.660e-02, -6.943e-02, -2.409e-02, -7.460e-02, 3.468e-02, -1.478e-01, 3.061e-04, 3.629e-02, 3.037e-02, -4.552e-02, -1.420e-01, -4.576e-02, -2.385e-02, 6.238e-03)); + r += mul(s0_7, M4(2.317e-02, -4.572e-02, 1.373e-01, -4.412e-02, -2.654e-01, 1.417e-01, -2.566e-02, 7.823e-02, 5.192e-02, 1.253e-02, 2.714e-02, 4.986e-02, -6.517e-02, 1.725e-01, -3.147e-01, 1.810e-01)); + r += mul(s0_8, M4(-7.655e-02, 1.568e-02, 4.386e-02, -2.880e-02, 1.294e-01, -4.386e-02, -1.892e-01, 1.013e-01, -4.186e-02, -1.205e-02, -1.252e-02, 2.672e-02, 9.946e-02, -1.116e-01, -8.791e-02, -5.216e-02)); + r += mul(s1_0, M4(1.416e-01, -2.957e-02, 4.620e-02, 2.061e-01, -1.485e-03, 2.902e-02, 8.762e-02, -1.343e-02, -9.476e-02, -8.555e-02, -1.367e-01, -5.685e-01, 1.643e-02, 5.774e-02, -2.975e-02, -5.926e-02)); + r += mul(s1_1, M4(-1.076e-01, 1.057e-01, 8.230e-02, 2.622e-01, -3.980e-02, 9.033e-02, 4.219e-02, 2.085e-01, -3.702e-01, 3.284e-01, -1.205e-01, 4.759e-01, -9.593e-03, 6.312e-02, -3.021e-02, -1.414e-01)); + r += mul(s1_2, M4(-7.337e-02, 1.111e-02, 2.710e-03, 2.014e-02, 4.307e-03, 5.057e-02, -2.255e-02, 1.778e-01, 3.245e-02, 5.045e-02, -2.178e-02, -4.287e-01, -8.326e-02, -4.708e-02, 3.045e-02, 2.593e-02)); + r += mul(s1_3, M4(3.675e-02, -1.743e-01, -1.996e-02, -2.225e-02, 3.629e-02, -4.751e-02, 4.292e-02, -4.781e-02, 1.730e-01, 1.333e-01, 1.877e-01, 7.252e-02, 5.021e-02, 4.278e-03, -8.554e-02, -1.798e-01)); + r += mul(s1_4, M4(-2.651e-01, 7.933e-02, -1.525e-01, -4.634e-02, -4.800e-02, 4.021e-02, 9.500e-02, 3.373e-02, -4.455e-02, 1.410e-01, 7.064e-01, 1.166e-01, -2.249e-01, 1.612e-01, -1.835e-01, -1.426e-01)); + r += mul(s1_5, M4(-2.822e-01, 1.256e-01, -6.588e-02, 5.687e-02, -1.322e-01, 6.351e-02, -1.372e-01, -4.876e-02, 6.367e-02, 1.225e-02, -3.775e-03, -3.821e-01, -8.246e-02, 2.336e-02, 5.701e-02, -9.513e-02)); + r += mul(s1_6, M4(-4.435e-02, -2.383e-02, 2.873e-02, 1.033e-01, -3.893e-02, -4.387e-03, 1.064e-01, -1.552e-03, -3.830e-02, -8.205e-03, 6.505e-02, -1.395e-01, -2.812e-02, -1.357e-01, -4.313e-02, -1.492e-01)); + r += mul(s1_7, M4(-4.363e-02, -2.478e-02, 8.346e-03, -4.201e-02, -5.849e-02, -1.863e-02, 6.421e-02, -7.445e-02, 1.853e-02, -3.674e-02, 6.083e-02, -4.624e-02, -2.120e-01, 3.527e-01, -3.120e-01, 9.880e-02)); + r += mul(s1_8, M4(-9.998e-02, 1.131e-02, 3.393e-02, 8.990e-02, -5.088e-02, -1.396e-02, 6.092e-02, -2.058e-02, 1.320e-02, -3.463e-02, 4.451e-02, -6.250e-02, 2.477e-01, -9.122e-02, 4.647e-02, 5.444e-02)); + r += mul(s2_0, M4(1.738e-02, 5.692e-05, -3.280e-02, 1.116e-01, -4.025e-02, -6.378e-03, 6.333e-02, -2.169e-02, 1.159e-03, 5.745e-02, -1.892e-02, -5.304e-02, -8.005e-03, 6.982e-03, 1.781e-02, -1.290e-01)); + r += mul(s2_1, M4(2.053e-02, 1.001e-01, -5.542e-02, 1.997e-01, -1.069e-01, -1.316e-01, -9.407e-03, -2.496e-01, -1.271e-01, 5.652e-02, 9.357e-02, -5.321e-02, -1.236e-02, -3.268e-02, -1.057e-01, 3.677e-01)); + r += mul(s2_2, M4(-1.572e-01, 1.607e-01, -1.469e-02, -2.933e-02, 1.361e-01, -1.904e-01, -1.410e-02, -1.019e-01, -2.218e-02, -1.175e-01, 1.221e-02, -9.969e-02, -1.374e-02, 1.115e-01, 5.505e-02, -4.364e-02)); + r += mul(s2_3, M4(2.645e-02, -1.315e-01, 5.348e-02, -6.960e-02, -8.126e-02, 2.138e-02, 4.465e-02, -1.993e-03, -2.183e-02, -1.264e-01, -7.401e-02, -1.451e-01, 1.713e-01, -9.877e-02, 1.130e-01, -1.241e-01)); + r += mul(s2_4, M4(-8.051e-01, -4.633e-02, -1.499e-01, 4.505e-02, -1.396e-01, 1.184e-01, -3.166e-02, 2.424e-01, -3.324e-02, -4.050e-02, -2.288e-01, 2.254e-02, 1.225e-01, 2.109e-01, 4.975e-01, 2.566e-02)); + r += mul(s2_5, M4(-1.325e-01, 3.603e-01, -1.102e-01, 8.552e-02, 1.978e-01, -1.439e-01, 2.342e-02, -1.104e-01, -9.703e-02, 1.217e-03, 6.674e-02, -4.943e-03, 1.047e-01, 1.100e-01, -7.425e-02, -1.426e-01)); + r += mul(s2_6, M4(6.620e-02, -5.105e-03, 5.341e-02, 6.188e-02, 1.215e-01, -4.942e-02, -2.350e-02, -2.389e-03, 7.577e-03, -1.194e-02, 7.483e-02, 2.797e-02, 8.517e-02, -1.188e-01, -9.627e-03, -3.775e-02)); + r += mul(s2_7, M4(-7.349e-02, -8.638e-02, 1.668e-01, -1.634e-01, -2.610e-02, 5.929e-02, 1.200e-01, 1.551e-01, -6.725e-02, 2.538e-02, 3.603e-02, 6.823e-02, -9.709e-02, -5.752e-03, -5.046e-02, -1.508e-01)); + r += mul(s2_8, M4(-1.805e-01, 7.995e-02, -1.702e-02, 6.758e-02, -5.362e-02, -2.128e-02, 5.766e-02, -1.263e-01, 1.730e-02, -2.622e-02, 8.422e-02, -8.742e-03, 1.736e-02, 5.187e-02, -1.939e-02, -6.008e-02)); + r += mul(s3_0, M4(2.769e-02, -6.180e-02, -1.595e-02, -5.555e-02, 7.106e-02, 1.989e-02, -8.007e-03, 3.022e-02, 8.952e-02, -8.760e-02, 3.358e-03, 5.172e-01, -5.316e-02, -1.774e-02, -1.529e-02, 3.360e-02)); + r += mul(s3_1, M4(2.083e-01, -1.959e-01, -3.492e-02, -1.062e-01, -1.991e-01, 1.627e-01, 4.752e-02, 1.935e-01, -4.772e-02, -4.620e-01, 6.199e-01, -5.559e-01, 5.407e-02, -1.701e-02, -9.253e-02, -2.324e-01)); + r += mul(s3_2, M4(-2.318e-02, 4.885e-02, 4.424e-02, -1.622e-01, 3.683e-02, 8.032e-03, 4.170e-03, -2.176e-01, 1.676e-01, -5.978e-01, 1.706e-01, 5.688e-02, -3.584e-02, -3.374e-02, 9.832e-02, 8.184e-02)); + r += mul(s3_3, M4(7.349e-02, -1.023e-01, 3.799e-02, -7.277e-02, -6.203e-02, -2.119e-02, 3.634e-02, -6.658e-02, -3.352e-01, 6.581e-01, 4.496e-01, -4.329e-02, 1.106e-01, -1.827e-01, 1.568e-01, 5.295e-02)); + r += mul(s3_4, M4(1.424e-01, -9.230e-01, 4.605e-02, 1.233e-02, 3.738e-02, -2.959e-02, -9.470e-02, -1.115e-02, 3.737e-01, -2.685e-01, -1.735e-02, 1.542e-01, -3.248e-01, 3.305e-01, 1.752e-01, 2.610e-02)); + r += mul(s3_5, M4(4.717e-01, -2.105e-01, 3.212e-02, 1.470e-02, 6.355e-02, 3.502e-02, -6.022e-04, 7.889e-02, -9.257e-03, -6.054e-01, 5.161e-01, 6.461e-02, 2.712e-01, -2.589e-01, 8.823e-02, 8.179e-02)); + r += mul(s3_6, M4(6.777e-02, 4.699e-02, -9.605e-03, 7.196e-02, 3.270e-02, 2.843e-02, 6.386e-02, 2.463e-02, -6.469e-02, -1.302e-01, 3.400e-02, 6.313e-01, 1.639e-01, -1.763e-01, -8.542e-02, 1.369e-01)); + r += mul(s3_7, M4(-1.241e-02, -2.214e-03, 2.025e-01, -6.725e-02, 7.727e-02, -1.428e-02, -5.117e-02, 1.164e-01, 3.355e-02, 1.263e-01, 2.701e-01, 3.418e-01, -1.227e-01, 7.170e-02, 3.163e-02, 8.146e-02)); + r += mul(s3_8, M4(-5.885e-02, -2.514e-02, -1.081e-01, 3.977e-02, -9.304e-02, 1.579e-02, 5.317e-02, -7.459e-04, -1.390e-02, -3.147e-01, 6.463e-01, 3.237e-01, 3.543e-02, 2.026e-02, -4.326e-02, 8.351e-02)); + r += V4(4.531e-03, -8.111e-04, -4.559e-03, 1.344e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(5.109e-03, -1.013e-01, -4.476e-03, 2.371e-02, -1.188e-02, 2.613e-02, 4.354e-03, 5.263e-02, 2.261e-02, 1.565e-01, 1.185e-01, -7.047e-02, -5.470e-03, -6.023e-02, -5.102e-02, -4.978e-02)); + r += mul(s0_1, M4(3.411e-02, -6.323e-02, 2.892e-01, 5.630e-02, -9.362e-02, 2.923e-02, 3.466e-01, 1.604e-01, -1.297e-01, 9.643e-02, -8.275e-02, -6.924e-02, -6.175e-03, 7.658e-03, -1.293e-01, 2.992e-02)); + r += mul(s0_2, M4(-6.653e-02, 7.399e-02, -6.861e-02, 2.062e-02, -1.039e-01, -7.505e-02, 1.044e-03, 5.407e-02, -4.158e-02, 1.475e-02, 1.392e-02, 6.334e-02, 9.506e-02, 1.937e-02, -4.186e-02, -3.346e-02)); + r += mul(s0_3, M4(-7.411e-02, -9.890e-02, 2.862e-02, -1.133e-02, -6.961e-02, 1.554e-01, -3.630e-02, 4.619e-02, -5.479e-02, -8.802e-02, -3.359e-02, 1.322e-02, 4.674e-02, 6.430e-02, -1.762e-01, -9.326e-02)); + r += mul(s0_4, M4(3.798e-01, 3.569e-01, 3.303e-02, -3.879e-01, 1.660e-01, 3.253e-01, 5.718e-01, 4.811e-01, 2.049e-01, 2.783e-01, 1.532e-01, 2.310e-01, 1.625e-01, 2.305e-01, 1.518e-01, 1.102e-01)); + r += mul(s0_5, M4(-2.511e-02, 7.963e-02, 1.293e-01, -1.074e-01, -3.559e-01, -3.133e-02, -3.580e-01, 1.722e-01, -2.363e-01, -6.018e-02, 2.465e-02, -2.001e-02, -3.252e-01, -1.818e-01, 3.119e-01, 2.003e-01)); + r += mul(s0_6, M4(-6.486e-02, 7.469e-03, 1.003e-02, 1.480e-02, 6.965e-02, -1.185e-02, -4.961e-02, -3.236e-02, -8.740e-02, -5.041e-02, 1.229e-02, 5.909e-02, -6.895e-03, -9.904e-02, 8.634e-02, 4.633e-02)); + r += mul(s0_7, M4(6.900e-03, -1.665e-02, -1.701e-02, -9.881e-02, -7.354e-02, -1.694e-01, 1.642e-01, 9.754e-02, 6.378e-02, 1.202e-02, 1.083e-01, -2.660e-02, -3.433e-01, -3.428e-01, 4.544e-02, 4.121e-02)); + r += mul(s0_8, M4(1.150e-02, -9.370e-03, 3.042e-02, -6.306e-02, -1.127e-01, 1.582e-02, 1.082e-02, 1.499e-01, 3.286e-03, 9.324e-03, -7.105e-02, 2.163e-02, 2.231e-01, -2.537e-02, 2.663e-02, 1.020e-01)); + r += mul(s1_0, M4(-2.882e-02, -1.364e-01, -6.985e-02, -8.632e-02, -1.159e-02, 5.210e-02, -1.038e-01, -3.735e-02, 1.732e-01, 1.714e-01, 4.997e-03, -9.303e-02, -7.473e-02, 5.196e-02, -7.526e-02, -1.339e-02)); + r += mul(s1_1, M4(7.270e-02, -5.112e-02, 2.064e-01, -9.973e-02, 1.341e-01, -9.910e-02, 1.171e-01, -1.909e-02, 2.098e-01, 2.391e-01, 1.460e-01, -2.052e-01, -9.156e-02, 3.022e-03, -7.790e-02, -1.620e-02)); + r += mul(s1_2, M4(8.411e-02, 2.544e-02, -8.592e-02, -1.476e-01, -8.090e-02, 2.901e-02, 3.733e-03, 7.357e-02, 1.347e-01, 1.987e-02, 1.459e-01, -1.547e-02, 1.014e-01, -2.559e-02, 8.887e-02, 2.969e-02)); + r += mul(s1_3, M4(9.597e-02, -3.545e-01, -2.156e-02, -4.908e-02, -5.108e-02, -8.399e-02, 5.381e-02, 1.013e-01, 3.602e-02, 2.681e-01, 2.220e-02, 1.373e-01, 4.991e-02, 4.378e-02, -7.537e-02, -2.068e-01)); + r += mul(s1_4, M4(5.888e-01, 2.576e-01, -3.241e-01, -4.970e-01, 1.664e-01, 2.416e-01, 3.465e-01, 7.672e-02, 1.097e-01, 5.451e-01, 1.206e-01, 4.713e-01, -2.924e-03, 2.240e-01, 1.714e-01, -3.315e-04)); + r += mul(s1_5, M4(1.887e-01, 8.490e-02, -8.236e-02, -2.263e-01, -4.718e-02, 6.606e-02, -2.596e-01, -2.097e-01, -1.909e-01, -3.157e-01, 1.629e-01, 3.072e-02, 4.002e-01, 1.577e-01, 1.484e-01, 5.068e-02)); + r += mul(s1_6, M4(-9.601e-03, -5.944e-02, -6.170e-03, -5.131e-02, -6.913e-02, -1.295e-02, -2.661e-02, 8.608e-03, -1.877e-01, -9.093e-02, 4.880e-02, 7.809e-02, 2.246e-01, 1.962e-01, -1.578e-02, -1.218e-01)); + r += mul(s1_7, M4(1.245e-01, -6.743e-02, -7.707e-02, -2.127e-01, -2.682e-02, -5.999e-02, -2.738e-02, -2.108e-02, 7.802e-03, -2.028e-01, 1.086e-01, -8.522e-02, -3.510e-01, -1.243e-01, -7.196e-02, 5.012e-02)); + r += mul(s1_8, M4(1.554e-01, 1.041e-01, -8.905e-02, -1.498e-02, -2.704e-02, -4.771e-03, -8.702e-02, 6.094e-02, -7.724e-02, -1.039e-01, -4.334e-02, -6.489e-02, 1.061e-02, -1.715e-01, 3.398e-01, 1.436e-01)); + r += mul(s2_0, M4(1.898e-02, -6.796e-02, -9.681e-02, 5.317e-02, -6.710e-02, -2.401e-01, 8.957e-02, -1.326e-01, -1.285e-01, -4.354e-02, -4.742e-02, 5.456e-02, -3.094e-02, 9.700e-02, -1.397e-01, -1.736e-01)); + r += mul(s2_1, M4(-1.130e-01, -8.727e-02, -6.717e-02, 1.105e-01, 3.471e-01, 4.923e-02, 3.048e-02, 1.696e-01, -5.733e-02, -4.620e-02, 2.674e-02, 2.742e-02, 1.125e-01, -3.257e-02, -1.607e-01, 2.017e-01)); + r += mul(s2_2, M4(-1.016e-01, 8.385e-03, -1.411e-01, -2.757e-02, -7.714e-03, 6.335e-02, 8.580e-02, -8.000e-03, 9.101e-04, -8.858e-02, -4.917e-02, -2.892e-02, -6.813e-02, -1.086e-02, 6.338e-02, -1.416e-02)); + r += mul(s2_3, M4(9.098e-02, 1.957e-01, -9.551e-02, 1.372e-02, 5.547e-02, -5.539e-02, -7.710e-02, 4.348e-02, 6.994e-02, -9.307e-03, 4.403e-02, 4.120e-02, -4.607e-02, 1.169e-02, -1.930e-01, 2.159e-02)); + r += mul(s2_4, M4(4.111e-01, 8.301e-01, -3.466e-01, -9.503e-01, 6.933e-02, -1.913e-02, 2.897e-01, 6.002e-02, 1.656e-01, -2.074e-01, -2.278e-01, -2.744e-01, 1.898e-01, 3.807e-01, 1.588e-01, 1.337e-01)); + r += mul(s2_5, M4(3.547e-02, 9.913e-02, 2.290e-01, -1.001e-01, 2.844e-01, -5.348e-02, 3.102e-01, 2.668e-01, -2.307e-02, 1.603e-02, -1.343e-01, -1.940e-01, -6.855e-02, 7.183e-03, 2.743e-01, 1.129e-01)); + r += mul(s2_6, M4(-2.187e-02, -2.622e-02, -4.526e-02, -6.321e-03, -8.359e-02, -1.268e-01, 8.651e-02, -6.939e-03, 2.957e-02, 5.836e-03, -2.792e-02, -3.501e-02, 7.605e-02, 2.245e-02, 3.698e-02, -4.302e-02)); + r += mul(s2_7, M4(1.205e-01, -8.147e-02, -2.218e-01, -2.645e-01, 7.159e-02, 2.437e-01, -2.439e-02, 1.300e-01, -3.909e-02, -1.879e-02, -1.024e-01, 1.197e-02, 4.967e-02, 1.628e-01, -2.973e-02, -5.674e-02)); + r += mul(s2_8, M4(-4.114e-02, 2.495e-03, -6.669e-02, -1.126e-01, 1.185e-03, -2.611e-01, 4.307e-02, -4.078e-02, 1.727e-02, 3.137e-02, 4.402e-02, -9.532e-03, -1.445e-01, -2.802e-02, 8.805e-02, -3.620e-02)); + r += mul(s3_0, M4(5.592e-02, -5.986e-02, 4.212e-02, -1.351e-02, -1.183e-01, -7.062e-02, -2.884e-02, -9.861e-02, 2.923e-01, -3.872e-01, -7.561e-02, -2.126e-01, -1.087e-02, 2.638e-01, -1.060e-01, -2.521e-02)); + r += mul(s3_1, M4(-2.393e-03, 1.342e-02, -2.694e-01, -3.229e-02, 9.839e-02, 1.256e-01, 1.459e-01, 5.196e-02, 6.062e-01, -5.927e-01, 5.081e-01, -8.134e-01, -5.183e-01, -9.299e-02, -4.591e-01, 4.634e-01)); + r += mul(s3_2, M4(-1.243e-01, -1.546e-03, -2.050e-01, -9.618e-02, 1.164e-01, 3.858e-03, 2.954e-01, 2.534e-02, 2.224e-01, 3.648e-01, -4.268e-01, -7.856e-01, -1.146e-01, -5.429e-03, -3.244e-01, 4.515e-02)); + r += mul(s3_3, M4(1.545e-02, 1.267e-01, 1.834e-01, 1.085e-01, 8.707e-02, 3.956e-02, -6.194e-02, -8.369e-02, 1.215e-01, 1.556e-01, -5.293e-01, -1.505e-01, 1.939e-01, 1.340e-01, -4.531e-01, -2.169e-01)); + r += mul(s3_4, M4(-4.717e-01, 4.645e-01, 4.459e-01, -9.550e-02, 3.874e-01, 3.174e-01, -2.450e-01, 9.813e-02, 4.528e-01, -9.403e-01, 9.279e-01, -4.499e-01, 5.985e-02, -6.119e-01, 4.211e-01, -1.085e-01)); + r += mul(s3_5, M4(-2.976e-01, -2.410e-01, 1.306e-01, 1.568e-01, 2.322e-01, 1.803e-01, 3.317e-01, 1.716e-01, 6.972e-01, 1.567e-01, -3.980e-01, -6.066e-01, 1.686e-02, -8.061e-02, -1.006e-01, 1.342e-01)); + r += mul(s3_6, M4(-8.888e-02, -1.646e-01, 7.995e-02, -2.675e-02, -4.782e-02, -7.957e-02, -5.547e-02, -2.723e-04, 2.751e-01, -3.776e-01, 1.641e-01, 3.858e-01, 1.835e-01, 7.779e-02, -5.251e-02, -1.195e-01)); + r += mul(s3_7, M4(-9.921e-02, -8.667e-02, -1.878e-02, -1.259e-01, 8.537e-02, -4.571e-02, 2.510e-01, 3.064e-02, 1.216e-01, -1.229e-01, 1.490e-01, -2.311e-01, 1.152e-02, -6.642e-02, -7.519e-02, 1.796e-02)); + r += mul(s3_8, M4(-4.726e-02, 2.303e-02, -6.555e-02, -3.746e-02, 4.466e-02, -5.962e-02, -4.969e-02, -8.936e-02, 2.212e-01, -2.673e-02, -1.224e-01, 2.754e-02, -6.249e-02, -4.974e-02, 2.964e-02, 1.651e-02)); + r += V4(7.221e-03, -6.181e-03, -9.146e-03, -1.076e-02); + return r; +} + +void Pass4(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 5 +//!DESC conv4 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.358e-02, 7.106e-02, -1.223e-01, -1.570e-01, -4.561e-02, 2.621e-02, -6.238e-02, -2.642e-02, -1.613e-01, -1.018e-01, -1.182e-01, 9.290e-02, 1.166e-01, -4.284e-02, 3.173e-02, 6.253e-02)); + r += mul(s0_1, M4(-1.356e-01, -2.774e-01, 4.431e-02, -2.950e-01, -3.286e-02, -2.192e-01, 5.332e-02, -8.862e-02, -7.113e-02, 1.994e-01, -1.422e-01, 3.142e-01, 7.644e-02, -5.537e-02, -1.903e-01, 1.820e-01)); + r += mul(s0_2, M4(-8.536e-02, 1.536e-01, 2.164e-02, -8.691e-02, -7.817e-02, 1.158e-01, 3.551e-02, 9.690e-02, -2.058e-01, -2.366e-01, -2.024e-01, 3.949e-02, 9.990e-02, -1.155e-01, -4.381e-02, 5.952e-02)); + r += mul(s0_3, M4(4.764e-02, -2.284e-01, 2.608e-01, -3.773e-01, 6.969e-03, -1.371e-01, 3.378e-02, -1.982e-01, -1.499e-01, 1.022e-01, 2.773e-01, -1.993e-01, 1.287e-01, -1.990e-01, -1.213e-01, 4.326e-02)); + r += mul(s0_4, M4(4.182e-01, -4.292e-01, 2.890e-01, -7.257e-02, 5.904e-02, 5.257e-01, 8.142e-02, 4.462e-01, -3.900e-01, -4.610e-02, -3.785e-01, 1.900e-01, 1.028e-01, -7.148e-02, -1.026e-01, 1.641e-01)); + r += mul(s0_5, M4(-1.978e-02, 1.734e-01, -1.815e-01, -1.499e-01, 2.661e-03, -5.529e-02, -6.137e-02, -1.076e-02, -9.873e-02, 7.319e-02, -1.055e-01, -1.914e-02, 3.545e-01, -2.098e-01, 2.723e-02, -1.403e-02)); + r += mul(s0_6, M4(2.502e-02, 4.622e-02, 1.371e-02, -5.810e-02, 6.768e-02, 6.884e-02, 7.794e-02, 4.459e-02, 1.565e-02, -5.750e-02, -1.899e-01, -3.387e-02, -7.821e-02, -3.829e-01, -2.889e-02, -6.215e-02)); + r += mul(s0_7, M4(2.837e-01, 2.153e-01, 6.464e-01, -2.259e-02, 1.787e-01, 5.850e-02, -1.661e-01, 8.230e-02, -2.558e-03, -9.202e-02, -1.143e-01, 5.795e-02, 2.725e-01, -2.512e-01, -3.417e-01, 1.418e-01)); + r += mul(s0_8, M4(-7.240e-02, 7.377e-02, 1.601e-02, -5.421e-02, 5.224e-02, 2.011e-02, 3.942e-02, -2.078e-02, 6.665e-02, 5.385e-02, -7.067e-02, -3.002e-02, 6.076e-01, -1.559e-01, -1.351e-01, 3.973e-02)); + r += mul(s1_0, M4(5.739e-02, -2.130e-02, 6.867e-02, 8.138e-02, -8.371e-02, 7.208e-02, 9.522e-02, -1.247e-02, 3.673e-02, -1.020e-01, -1.157e-01, -4.893e-02, 4.489e-02, 6.052e-03, -1.856e-02, 1.512e-02)); + r += mul(s1_1, M4(-8.925e-02, -1.358e-02, -4.710e-02, -5.965e-02, -1.423e-01, -1.121e-01, 3.344e-01, -2.959e-01, 8.419e-02, -1.992e-01, -1.728e-01, 1.496e-01, 7.465e-02, -6.513e-02, -1.158e-01, 4.417e-02)); + r += mul(s1_2, M4(-1.561e-02, 1.173e-01, 6.393e-02, 7.635e-02, -2.358e-01, 4.513e-01, 1.043e-01, 5.412e-02, -1.062e-01, -2.702e-01, -1.967e-01, 2.292e-02, 4.481e-02, -6.181e-02, -7.755e-02, 1.785e-02)); + r += mul(s1_3, M4(-1.536e-02, 1.336e-01, -1.401e-01, 1.672e-01, -3.804e-02, 1.629e-01, 3.546e-02, -2.329e-01, -1.882e-01, -6.580e-02, 6.342e-02, -2.430e-01, -4.527e-02, 3.083e-02, -1.829e-02, 5.247e-02)); + r += mul(s1_4, M4(-2.110e-01, 9.668e-02, -5.724e-02, 2.095e-01, -2.346e-01, 8.267e-01, -2.799e-01, 3.349e-01, -1.565e-01, 6.647e-02, 1.817e-02, 1.019e-01, 3.862e-02, -1.680e-01, 1.549e-01, 7.007e-02)); + r += mul(s1_5, M4(-1.472e-01, 2.887e-02, -9.785e-02, 1.098e-01, 3.725e-02, 4.719e-01, 1.409e-01, -1.149e-01, -1.343e-01, 4.555e-03, -7.537e-02, -6.092e-02, 7.429e-02, 1.599e-01, -7.409e-02, 1.111e-01)); + r += mul(s1_6, M4(5.192e-02, 5.279e-02, 5.139e-02, 5.538e-03, 3.032e-02, 1.416e-01, -1.027e-01, -1.048e-01, -1.343e-01, -2.519e-02, 9.241e-02, -3.552e-02, -3.274e-02, -1.035e-01, -2.752e-02, -3.711e-02)); + r += mul(s1_7, M4(1.405e-01, 1.283e-01, -1.631e-01, 1.448e-01, -1.476e-01, 3.194e-01, 4.787e-02, 8.664e-02, -2.308e-02, -2.718e-02, 2.198e-01, 5.863e-02, 5.409e-02, -1.074e-01, -3.430e-01, 7.548e-02)); + r += mul(s1_8, M4(6.984e-02, 8.299e-02, -1.025e-02, 1.410e-02, -3.269e-01, 1.130e-01, -4.935e-02, -7.883e-02, 2.730e-02, 2.576e-02, 9.849e-02, -8.321e-03, 1.082e-01, 7.446e-02, -4.441e-02, 9.677e-03)); + r += mul(s2_0, M4(-3.258e-02, -1.473e-03, 1.278e-01, -6.312e-04, 9.059e-04, -1.729e-02, 8.754e-03, -7.737e-02, -5.027e-02, 8.639e-02, -1.170e-02, 7.257e-02, 8.412e-02, 2.112e-02, -2.565e-01, 7.677e-03)); + r += mul(s2_1, M4(-7.643e-04, -2.024e-02, 1.767e-01, -7.645e-03, 6.422e-02, 2.239e-01, 1.450e-02, 1.947e-01, -1.279e-02, -1.349e-02, -1.409e-01, -1.273e-01, -9.158e-02, -1.500e-01, 7.653e-02, 2.805e-01)); + r += mul(s2_2, M4(3.071e-02, 2.711e-02, 6.236e-02, 1.223e-02, 2.077e-02, 1.323e-01, 5.334e-03, -5.884e-02, 6.902e-03, 5.473e-03, -3.114e-02, -2.389e-02, 3.126e-03, -7.234e-02, 4.478e-02, -1.808e-02)); + r += mul(s2_3, M4(-5.518e-02, 1.332e-01, -2.462e-02, -1.615e-01, -3.965e-02, -2.004e-02, 1.268e-01, -1.470e-02, -1.397e-01, -6.340e-02, 9.022e-02, -1.625e-01, -1.426e-01, -3.068e-01, -3.755e-02, -1.816e-01)); + r += mul(s2_4, M4(-2.434e-01, -3.850e-02, -2.343e-01, -1.456e-01, -3.490e-01, 1.238e-01, 4.962e-03, -6.624e-02, 2.030e-01, -1.929e-01, 1.951e-01, -3.391e-01, -9.956e-02, 2.692e-01, -3.039e-01, 1.984e-01)); + r += mul(s2_5, M4(-2.289e-02, -9.088e-03, 1.582e-02, 6.333e-02, -4.110e-01, -2.279e-01, -4.103e-02, -4.303e-02, 1.533e-02, 1.511e-01, -6.972e-02, 1.555e-01, -1.127e-01, -1.785e-01, -5.199e-02, -1.169e-01)); + r += mul(s2_6, M4(-5.538e-02, -2.687e-03, 7.738e-02, -3.888e-02, -6.798e-03, 5.840e-03, 9.394e-02, -9.093e-02, -2.274e-02, 5.489e-02, -1.046e-01, -9.950e-02, -1.291e-01, -3.497e-02, -1.789e-02, -2.169e-02)); + r += mul(s2_7, M4(1.547e-01, -3.511e-02, 6.851e-03, 4.498e-02, -4.047e-02, 1.323e-01, 2.036e-01, -3.356e-02, 2.381e-02, -2.907e-02, -3.230e-01, -5.017e-02, 7.335e-02, 8.047e-02, 2.422e-01, -7.968e-03)); + r += mul(s2_8, M4(1.402e-01, -1.278e-01, 4.702e-02, -2.929e-03, -2.774e-03, 4.017e-02, 1.142e-01, -5.435e-02, 5.163e-02, -4.898e-02, -1.112e-01, -1.679e-02, -1.153e-01, -1.449e-01, 3.081e-02, -1.046e-01)); + r += mul(s3_0, M4(8.011e-02, -1.008e-01, -3.350e-02, -5.067e-02, 3.134e-02, 1.965e-02, -6.230e-02, 6.086e-02, 1.045e-01, 3.276e-02, -1.810e-01, 6.820e-02, -1.228e-01, -8.570e-02, 2.025e-01, -1.825e-01)); + r += mul(s3_1, M4(1.911e-03, 2.398e-02, -6.180e-02, 8.361e-02, 1.777e-01, 9.055e-02, -2.063e-01, 1.300e-01, 8.911e-02, -1.316e-02, -1.055e-01, -1.246e-01, -3.907e-02, 1.910e-01, 1.273e-01, 1.199e-01)); + r += mul(s3_2, M4(1.061e-01, 3.932e-02, 1.409e-02, 1.843e-02, 1.023e-01, -1.627e-01, -7.256e-02, 1.090e-02, 5.967e-02, 5.053e-02, -5.669e-02, -3.334e-02, 5.388e-02, 9.133e-02, 8.598e-02, -9.190e-02)); + r += mul(s3_3, M4(1.566e-02, -8.323e-03, -2.284e-01, -1.353e-01, -1.207e-01, 2.292e-02, 1.171e-01, 9.358e-02, -4.992e-02, -4.395e-02, 1.335e-01, -1.698e-01, 5.811e-02, -1.714e-01, -1.425e-02, -2.387e-01)); + r += mul(s3_4, M4(-5.546e-01, 2.770e-01, -2.630e-01, -2.027e-01, -5.485e-02, -1.584e-01, 4.579e-01, -7.056e-02, -2.309e-01, 1.690e-01, -1.575e-01, -3.097e-01, 1.883e-01, -9.219e-03, -2.047e-01, -3.564e-01)); + r += mul(s3_5, M4(5.198e-03, 1.196e-01, -8.158e-02, -4.777e-02, -2.359e-01, -3.586e-01, 2.130e-01, 2.051e-01, 1.191e-03, 7.352e-02, -2.873e-02, 8.438e-02, 9.639e-02, -2.764e-01, -3.439e-02, -1.704e-01)); + r += mul(s3_6, M4(-9.012e-02, 9.280e-02, 8.295e-02, 8.577e-03, -3.140e-03, 1.152e-02, 5.301e-02, -5.006e-03, -5.893e-03, 3.360e-02, 3.761e-02, -3.912e-02, -8.357e-02, 8.906e-03, 2.089e-02, 1.899e-02)); + r += mul(s3_7, M4(1.114e-03, 3.887e-02, -4.030e-02, -3.748e-02, 3.333e-02, -5.122e-02, -2.400e-01, -2.969e-03, 2.300e-02, -1.357e-01, -2.767e-01, -1.186e-02, -1.660e-01, 8.940e-02, 3.164e-01, -5.897e-02)); + r += mul(s3_8, M4(3.387e-02, -8.658e-02, -3.218e-02, -4.372e-02, -1.515e-01, 5.072e-02, -1.683e-01, 4.557e-02, 1.132e-01, -6.613e-03, -1.422e-01, 1.617e-02, -1.129e-01, -1.111e-01, 3.890e-02, -6.358e-02)); + r += V4(-2.990e-03, -3.480e-03, 7.394e-03, 1.290e-02); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(3.519e-02, 4.799e-02, -2.984e-03, -1.240e-01, -6.249e-03, 2.523e-02, 1.077e-01, 4.787e-02, 2.420e-02, 7.819e-02, -1.956e-01, -7.709e-02, -5.882e-02, -3.366e-02, -1.714e-01, 1.200e-01)); + r += mul(s0_1, M4(-2.273e-01, 8.166e-02, 8.317e-02, 8.376e-02, -5.856e-02, 4.123e-03, -2.466e-01, -3.422e-02, -5.165e-02, 6.070e-02, 7.922e-02, -9.691e-02, 2.510e-01, -1.318e-02, -1.696e-01, 1.123e-01)); + r += mul(s0_2, M4(2.300e-01, -4.649e-02, -9.124e-02, -9.431e-02, 1.141e-01, -4.892e-03, 3.389e-02, -7.123e-02, -5.376e-02, 3.972e-02, 1.306e-01, 9.184e-02, 3.181e-02, -1.301e-02, 6.289e-02, -2.389e-02)); + r += mul(s0_3, M4(-3.802e-01, -2.023e-01, -2.227e-01, -9.450e-02, 2.842e-02, -1.421e-01, -1.397e-01, -9.477e-02, 8.360e-02, 5.211e-02, -4.870e-02, -2.897e-01, 3.206e-03, -1.743e-01, -3.055e-01, -4.716e-02)); + r += mul(s0_4, M4(3.208e-01, -4.644e-01, 8.642e-01, 7.726e-01, 2.570e-01, -3.573e-02, 5.571e-02, 4.619e-01, 1.200e-01, -1.772e-01, 1.733e-01, 3.480e-02, 1.119e-01, 7.200e-02, 5.641e-02, -1.770e-01)); + r += mul(s0_5, M4(-5.690e-01, 1.045e-02, 5.379e-01, -2.482e-01, -2.435e-01, 1.075e-02, -3.207e-02, 1.476e-01, 2.284e-02, 8.934e-02, 3.712e-02, 5.994e-02, -1.533e-01, -5.881e-02, 5.651e-02, -1.526e-02)); + r += mul(s0_6, M4(2.499e-02, 9.467e-03, 5.746e-02, -8.387e-02, -5.405e-02, -1.732e-02, 7.648e-02, 2.696e-02, -1.241e-01, 2.151e-02, -6.329e-02, -7.248e-02, -1.591e-01, -2.487e-02, -1.833e-01, 2.593e-03)); + r += mul(s0_7, M4(-2.208e-01, -3.703e-01, 2.619e-01, 5.898e-02, -1.777e-01, -7.585e-02, 6.970e-02, 4.988e-02, -4.151e-02, -7.482e-03, -1.184e-03, -1.444e-03, -1.106e-01, -1.452e-01, 2.712e-01, -5.314e-02)); + r += mul(s0_8, M4(3.481e-02, -3.760e-02, -1.266e-01, 2.756e-02, -1.371e-02, 5.496e-02, -1.166e-01, 4.062e-02, 6.351e-02, 2.666e-02, -1.949e-02, 4.756e-02, 3.367e-01, -3.686e-01, -2.675e-01, 1.228e-01)); + r += mul(s1_0, M4(2.598e-03, -6.811e-02, 4.981e-02, -1.319e-02, 1.206e-01, 2.461e-02, -3.766e-02, 2.058e-02, 8.023e-02, -1.693e-02, 3.585e-02, -9.837e-02, 8.781e-03, -1.964e-02, -4.176e-03, -4.753e-03)); + r += mul(s1_1, M4(1.442e-01, -8.370e-03, -7.432e-02, -3.089e-02, -1.981e-01, -4.136e-02, -1.909e-01, -1.179e-01, -1.917e-01, 9.283e-03, -6.196e-03, 7.644e-02, -3.266e-02, 2.654e-03, -3.667e-02, -3.045e-03)); + r += mul(s1_2, M4(1.606e-01, -5.138e-02, -6.107e-02, -9.435e-02, 1.106e-01, 3.374e-02, 4.798e-02, -1.686e-01, -9.666e-02, 1.708e-01, 1.023e-01, 8.971e-02, -4.152e-03, 3.151e-02, -8.008e-02, -8.659e-04)); + r += mul(s1_3, M4(2.334e-01, 1.165e-01, 1.431e-01, -6.154e-02, 2.323e-01, 5.190e-02, -3.558e-01, 5.900e-03, -1.352e-01, -6.950e-02, -1.556e-01, -8.683e-02, 2.186e-03, -3.382e-02, 9.595e-02, -1.367e-02)); + r += mul(s1_4, M4(-2.079e-01, 2.013e-01, -1.003e-01, 5.965e-01, 7.281e-01, -2.555e-02, -1.042e-01, 5.471e-01, -8.816e-03, -6.272e-01, 2.632e-01, -7.918e-03, -9.514e-02, -1.219e-01, -1.023e-01, -9.444e-02)); + r += mul(s1_5, M4(-2.075e-01, -2.995e-02, -3.489e-02, 1.102e-01, -2.727e-01, -5.656e-02, 1.026e-01, -2.174e-01, 5.193e-02, 9.603e-03, -8.547e-02, 2.008e-01, 3.839e-02, 2.151e-02, -2.244e-02, 7.860e-03)); + r += mul(s1_6, M4(-2.778e-01, -3.516e-02, 1.650e-01, -7.376e-02, -8.045e-02, -9.546e-02, -3.107e-02, -6.217e-02, 1.269e-01, 1.594e-01, 4.540e-03, 3.225e-03, -4.082e-02, 6.364e-03, -6.184e-02, 3.507e-02)); + r += mul(s1_7, M4(-1.143e-01, 1.010e-01, -5.168e-02, 1.587e-01, -1.409e-01, -7.917e-03, -8.995e-02, 1.142e-01, -1.148e-01, -5.357e-02, 7.352e-02, -1.038e-01, 1.130e-02, -5.021e-02, 7.683e-02, -4.051e-02)); + r += mul(s1_8, M4(3.006e-02, 1.525e-01, -1.064e-01, 9.476e-02, -1.579e-01, 1.593e-01, -1.578e-01, -1.912e-02, 5.637e-02, -9.418e-03, -5.185e-03, 4.674e-02, 1.222e-01, -7.692e-02, -7.098e-02, 2.074e-02)); + r += mul(s2_0, M4(-1.652e-02, 2.598e-02, -1.013e-01, 6.189e-02, 8.546e-03, 4.699e-02, -2.344e-03, 1.277e-02, 5.113e-02, -1.595e-02, 1.910e-01, -7.417e-02, 3.643e-02, 4.172e-02, -1.039e-01, -6.160e-02)); + r += mul(s2_1, M4(-3.874e-02, -3.482e-03, 7.300e-03, -1.355e-02, 2.448e-01, 4.666e-02, -9.449e-02, 2.339e-02, -1.203e-01, 3.261e-02, -3.038e-02, 9.335e-02, 4.708e-02, 9.716e-02, -4.104e-01, 1.606e-02)); + r += mul(s2_2, M4(2.614e-02, -7.252e-02, 9.566e-03, 3.402e-02, 6.985e-03, -2.278e-02, -1.924e-01, 5.004e-03, -2.337e-02, 3.580e-02, -4.597e-02, 2.932e-02, -1.673e-01, -8.288e-03, 2.899e-02, 3.634e-02)); + r += mul(s2_3, M4(3.155e-01, 9.085e-02, 4.781e-02, -6.959e-02, 3.001e-02, -2.332e-02, -1.132e-01, 1.024e-01, 4.363e-02, -4.525e-02, 6.052e-02, -2.970e-01, -2.196e-01, 2.141e-02, -3.035e-01, 9.429e-02)); + r += mul(s2_4, M4(-6.346e-02, 2.127e-01, -4.391e-02, 2.319e-01, -2.695e-01, -7.714e-03, -1.659e-01, -3.199e-01, 1.499e-01, -3.990e-01, 3.229e-02, -4.619e-01, 1.185e-01, 4.742e-02, 7.026e-01, -5.809e-02)); + r += mul(s2_5, M4(-7.044e-02, 8.127e-02, 2.569e-01, -8.552e-02, -1.192e-01, 9.709e-02, -3.134e-01, 1.632e-01, 2.647e-01, -5.529e-02, 2.336e-01, -1.118e-01, 4.060e-02, 5.644e-02, -7.560e-02, 1.106e-01)); + r += mul(s2_6, M4(-7.387e-02, -2.698e-03, -5.179e-02, 1.077e-03, -1.361e-01, -6.428e-02, -5.398e-02, -5.008e-03, -1.555e-01, 8.817e-02, 2.712e-02, -1.140e-01, 1.123e-01, 2.629e-01, 8.762e-04, 1.070e-01)); + r += mul(s2_7, M4(4.489e-02, 1.490e-01, 1.403e-01, 5.900e-02, -1.535e-02, 9.715e-02, -1.662e-01, 1.544e-02, -2.607e-01, 1.782e-01, 5.149e-02, -1.530e-01, 4.158e-02, -1.167e-01, 2.508e-01, -3.826e-02)); + r += mul(s2_8, M4(1.833e-02, -6.535e-02, -9.799e-04, 3.795e-02, -1.240e-01, 5.356e-02, -1.377e-01, 2.680e-02, -5.193e-02, 1.129e-01, -6.200e-02, 3.753e-02, -1.182e-01, -8.965e-02, -4.491e-02, 2.043e-02)); + r += mul(s3_0, M4(-1.485e-01, -4.827e-02, -6.587e-02, 4.206e-03, 1.429e-03, -7.075e-02, 1.010e-01, -1.434e-02, 1.431e-01, 9.461e-03, 1.974e-02, 6.288e-03, -1.066e-01, 3.071e-02, -5.972e-02, 6.134e-02)); + r += mul(s3_1, M4(1.832e-01, 9.780e-02, 6.492e-03, 7.153e-02, 4.468e-02, 4.766e-02, -2.764e-02, 1.125e-01, -6.547e-02, 4.916e-02, -2.809e-01, 2.147e-02, -4.627e-05, -1.787e-02, 1.779e-01, -7.133e-02)); + r += mul(s3_2, M4(9.684e-02, -1.149e-01, -7.433e-02, -3.439e-02, -9.873e-02, 6.903e-02, -5.451e-02, 1.204e-01, 2.747e-03, 6.107e-02, -1.211e-02, 1.021e-02, -1.020e-01, -2.111e-02, -3.111e-02, 7.253e-02)); + r += mul(s3_3, M4(8.451e-02, 1.777e-01, -6.733e-02, -3.111e-02, 5.124e-02, 6.421e-02, 2.028e-01, 3.174e-03, 5.908e-02, 3.863e-02, -1.465e-02, -3.614e-02, -2.398e-01, -1.425e-01, -1.372e-01, -5.310e-02)); + r += mul(s3_4, M4(-4.740e-02, 3.190e-01, -3.018e-01, 2.133e-01, -6.713e-02, -1.594e-01, -2.779e-01, -2.978e-01, 2.122e-01, -1.166e-01, -4.420e-01, -3.385e-01, -1.828e-01, -2.161e-01, 2.861e-01, 4.141e-02)); + r += mul(s3_5, M4(-1.008e-01, 1.219e-01, 1.655e-01, -7.892e-02, 1.575e-01, -1.729e-01, -3.729e-01, 2.347e-01, 1.649e-01, -8.625e-02, 1.831e-01, -1.423e-01, -2.275e-03, 1.461e-01, -6.245e-02, -9.654e-02)); + r += mul(s3_6, M4(4.563e-02, 1.920e-01, 1.096e-01, 1.130e-02, -4.992e-02, -2.560e-02, 9.352e-02, 4.260e-02, -1.082e-01, -1.228e-02, 4.508e-02, -5.029e-02, 2.940e-01, 7.883e-02, -1.268e-01, 9.003e-02)); + r += mul(s3_7, M4(-1.140e-01, 2.337e-01, 6.229e-02, 3.967e-02, -1.837e-02, 1.296e-01, -1.911e-01, -4.436e-02, -5.754e-02, 2.124e-01, -2.095e-02, -3.752e-02, 1.007e-01, -1.105e-01, 2.174e-01, 1.540e-02)); + r += mul(s3_8, M4(-5.357e-02, -1.439e-02, 7.558e-02, -2.317e-02, -1.318e-01, 2.876e-01, -1.304e-01, 6.115e-02, 5.664e-03, -1.197e-03, 3.258e-02, 2.902e-02, -5.115e-02, -1.880e-01, -2.181e-02, -9.175e-02)); + r += V4(-6.316e-03, -5.988e-03, -9.405e-03, -2.136e-03); + return r; +} + +void Pass5(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 6 +//!DESC conv5 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-8.608e-02, 2.569e-01, 1.222e-02, -2.428e-01, 2.819e-02, -2.222e-01, 8.316e-02, 1.236e-01, 7.319e-02, -2.436e-02, -3.285e-02, -4.959e-02, -1.564e-02, -4.520e-03, -2.403e-02, 1.644e-02)); + r += mul(s0_1, M4(8.699e-02, -1.096e-01, 1.068e-01, -3.740e-01, -1.080e-01, -1.857e-02, 4.223e-02, -3.891e-03, -8.229e-02, -1.946e-01, -1.551e-03, -1.387e-01, -5.238e-02, 2.071e-02, 2.358e-02, 5.090e-02)); + r += mul(s0_2, M4(9.500e-02, 1.498e-02, 3.952e-02, -2.201e-01, -5.773e-03, -4.568e-02, -1.513e-02, 1.650e-02, -3.844e-02, -1.894e-02, -6.321e-03, -1.478e-01, 6.779e-04, -1.872e-02, 3.647e-02, -1.142e-01)); + r += mul(s0_3, M4(1.158e-01, 1.180e-01, -7.400e-02, -1.253e-01, -3.329e-02, -9.601e-02, -1.655e-01, 1.764e-01, 4.556e-02, -1.037e-01, 1.404e-02, 4.284e-02, 1.355e-02, -4.159e-02, 1.028e-02, 5.967e-02)); + r += mul(s0_4, M4(3.790e-01, -1.771e-01, 4.289e-01, 3.968e-01, -6.453e-01, -3.229e-02, 6.839e-02, 4.000e-01, 7.875e-02, 3.322e-01, -9.510e-02, 6.532e-02, -2.647e-01, -1.873e-01, 1.182e-01, 1.928e-01)); + r += mul(s0_5, M4(1.861e-02, 4.475e-02, -4.634e-02, 4.190e-02, 7.957e-02, -1.773e-01, 7.434e-02, -3.605e-02, -1.383e-02, 3.314e-02, 7.457e-02, -3.542e-02, -1.410e-01, 1.218e-02, -1.978e-01, 1.382e-01)); + r += mul(s0_6, M4(-1.159e-02, 7.661e-02, -1.156e-02, 4.680e-02, 5.359e-02, -1.324e-01, -1.436e-01, -9.472e-02, -7.470e-02, -1.685e-01, 4.434e-03, -2.265e-02, -1.045e-01, 1.346e-02, -4.403e-02, 2.895e-02)); + r += mul(s0_7, M4(-8.194e-02, 2.863e-03, 2.583e-02, -6.814e-02, 4.180e-02, 1.948e-02, -5.094e-02, -8.335e-02, 1.078e-02, -9.731e-02, -7.838e-02, -7.272e-02, -3.306e-03, 8.672e-02, -1.243e-01, -1.003e-01)); + r += mul(s0_8, M4(-7.363e-02, 1.894e-02, -4.111e-02, 9.806e-03, 4.223e-02, -5.568e-02, 7.757e-02, 5.330e-02, 2.111e-02, -3.335e-02, 7.667e-03, -2.582e-02, 1.289e-01, -5.085e-02, 1.158e-01, 3.725e-02)); + r += mul(s1_0, M4(1.335e-02, 1.003e-01, 6.199e-02, 3.112e-03, -8.718e-02, 2.489e-02, -2.486e-02, 1.969e-01, 6.352e-02, -2.251e-01, 9.885e-03, -1.690e-01, 3.148e-02, -2.993e-02, -1.857e-03, -1.052e-01)); + r += mul(s1_1, M4(-8.664e-03, -1.227e-01, -2.031e-02, -2.300e-01, 5.105e-02, 1.422e-02, 1.043e-01, -4.809e-02, -1.366e-01, 1.288e-01, -3.921e-01, -1.088e-01, -5.613e-03, 7.480e-02, 6.498e-02, 2.244e-02)); + r += mul(s1_2, M4(1.231e-01, 4.020e-02, 4.395e-03, -1.853e-01, 4.126e-02, 3.574e-02, 3.323e-04, -9.040e-02, -2.706e-01, 5.809e-02, -2.337e-01, -3.410e-02, 6.976e-03, -6.686e-02, 1.172e-01, -5.432e-02)); + r += mul(s1_3, M4(-2.134e-01, -1.519e-01, 2.647e-02, -8.512e-02, -1.382e-01, 1.372e-01, -2.530e-02, -4.801e-02, 7.539e-02, -8.842e-02, -5.976e-02, 1.708e-01, 2.436e-01, -1.239e-01, 9.767e-02, 2.307e-02)); + r += mul(s1_4, M4(-2.123e-01, 2.003e-02, 2.607e-01, 5.098e-01, 2.415e-01, -4.337e-01, 2.104e-01, 4.542e-02, -5.372e-01, 4.096e-01, -1.995e-01, 1.650e-01, -4.643e-01, 4.214e-01, -2.897e-01, 3.722e-01)); + r += mul(s1_5, M4(4.852e-02, -4.823e-02, -9.304e-03, 6.935e-02, 3.634e-02, 4.152e-02, -2.441e-04, 8.184e-02, -3.163e-01, -1.569e-01, -1.265e-01, 2.367e-01, -2.178e-01, 1.617e-02, -3.774e-01, 5.316e-02)); + r += mul(s1_6, M4(-6.727e-03, 1.153e-01, 1.338e-02, 1.193e-01, 1.504e-01, 1.519e-01, 9.715e-03, 9.078e-03, -8.618e-02, -1.465e-01, -2.277e-02, -4.322e-03, -2.011e-01, -1.293e-01, 1.793e-02, 2.710e-03)); + r += mul(s1_7, M4(-9.969e-03, 2.211e-01, -4.299e-03, -2.202e-02, -4.619e-02, -8.966e-04, 9.219e-02, 1.181e-01, -4.056e-01, -1.222e-01, -3.333e-01, -2.313e-01, -5.614e-02, -1.141e-01, -1.407e-01, -1.764e-01)); + r += mul(s1_8, M4(-9.313e-04, 1.319e-02, 7.414e-02, 6.145e-02, -6.932e-02, 3.310e-02, -6.980e-02, -2.850e-02, -1.856e-01, -1.571e-01, -1.655e-01, -1.211e-01, 4.781e-02, -1.536e-01, 1.794e-01, 6.533e-03)); + r += mul(s2_0, M4(2.814e-02, 6.999e-02, 1.721e-02, -6.695e-02, 7.155e-02, 1.598e-01, 4.115e-02, -2.464e-02, -6.175e-03, 5.353e-02, -4.210e-02, -6.383e-02, -5.254e-02, 3.658e-02, -6.324e-02, 4.018e-02)); + r += mul(s2_1, M4(-7.643e-02, -8.591e-02, -1.327e-01, 1.490e-01, -4.778e-02, -1.665e-03, 3.515e-02, 4.224e-02, 2.654e-02, -3.990e-03, 1.914e-02, -3.699e-02, -1.815e-01, 6.398e-02, -9.678e-02, -1.639e-01)); + r += mul(s2_2, M4(3.666e-02, 1.137e-02, -3.516e-03, -1.270e-01, -5.116e-02, -3.455e-02, -1.130e-02, -2.826e-02, -2.717e-02, 1.390e-02, 1.004e-02, 6.270e-02, 5.819e-03, 4.421e-02, 4.943e-02, -2.765e-01)); + r += mul(s2_3, M4(-1.111e-01, 2.642e-03, 2.653e-02, -1.920e-02, 1.284e-01, 6.685e-02, -6.475e-02, -2.750e-02, -8.811e-02, -3.021e-01, 1.387e-01, -8.707e-02, -9.734e-02, 3.683e-01, -6.483e-02, 1.673e-02)); + r += mul(s2_4, M4(-8.755e-02, -2.692e-01, -1.860e-01, 8.398e-02, -2.588e-01, 5.441e-01, -1.563e-02, 2.783e-02, 7.085e-02, 1.881e-01, 4.063e-02, -1.307e-01, 3.826e-01, 3.605e-02, 1.551e-01, -3.976e-02)); + r += mul(s2_5, M4(1.761e-01, 9.465e-02, 1.023e-01, 1.684e-01, 5.145e-02, -9.373e-02, 1.927e-01, -2.001e-01, -8.632e-02, 7.821e-02, 3.875e-02, -3.849e-03, -9.294e-02, 1.570e-01, -9.343e-02, 1.712e-02)); + r += mul(s2_6, M4(-6.383e-02, -5.647e-03, -2.071e-02, 3.716e-02, 7.636e-02, 2.504e-02, 4.806e-02, -1.517e-02, 8.862e-02, 8.993e-02, 2.257e-02, -1.228e-02, 1.534e-01, -1.728e-02, -3.967e-02, -3.449e-02)); + r += mul(s2_7, M4(-2.106e-01, -1.526e-01, 6.760e-02, -1.012e-02, 2.699e-02, 3.012e-03, -3.014e-02, -1.204e-02, -7.079e-03, -1.713e-01, 3.694e-02, 6.143e-02, -5.451e-02, -1.359e-01, -9.926e-02, -1.442e-01)); + r += mul(s2_8, M4(-9.338e-02, 1.171e-01, 9.731e-03, 7.911e-02, 2.203e-02, 5.235e-03, 3.585e-03, -2.420e-02, -3.234e-02, 1.790e-02, -6.909e-02, -2.004e-02, -1.564e-01, 1.182e-01, -1.297e-01, -5.639e-03)); + r += mul(s3_0, M4(-3.005e-02, -6.725e-02, 1.321e-02, 4.847e-03, 1.036e-01, 2.312e-01, -1.014e-01, -1.152e-01, 9.548e-02, -1.285e-01, 2.631e-02, 1.150e-01, -7.107e-02, -1.159e-01, -8.623e-02, 3.015e-03)); + r += mul(s3_1, M4(-1.199e-01, 1.896e-02, -1.750e-01, 5.793e-02, -1.067e-01, 3.761e-01, 4.887e-02, -3.395e-01, 6.265e-02, 3.455e-02, 5.806e-02, 1.456e-01, -6.617e-02, -1.070e-01, 2.967e-03, 8.557e-03)); + r += mul(s3_2, M4(-1.013e-01, -1.106e-02, -3.594e-02, 2.021e-02, 4.372e-02, -1.061e-01, 2.014e-02, -1.715e-01, -4.093e-02, -2.106e-02, -6.451e-02, 1.431e-01, 5.745e-02, -8.813e-03, 1.078e-02, -1.050e-01)); + r += mul(s3_3, M4(5.771e-03, 2.398e-01, 5.475e-03, -2.666e-02, 2.469e-01, 2.725e-01, -1.278e-01, 5.428e-02, -9.521e-02, -2.876e-01, 8.087e-02, -8.050e-02, -8.112e-02, -3.085e-02, -4.978e-02, 3.844e-02)); + r += mul(s3_4, M4(-2.283e-01, 1.275e-01, -1.267e-01, -1.878e-02, 8.566e-02, 7.676e-01, 4.032e-02, 5.152e-01, -3.872e-02, 5.059e-01, 2.330e-01, 1.399e-02, 5.226e-01, 3.846e-02, 5.762e-01, -1.439e-01)); + r += mul(s3_5, M4(-6.461e-04, 2.103e-01, 1.016e-01, 1.416e-01, 1.046e-01, 3.651e-03, 8.893e-02, -8.801e-02, 7.038e-02, -7.385e-02, 1.304e-01, -3.234e-02, -3.917e-02, -4.112e-02, 3.192e-02, -1.320e-01)); + r += mul(s3_6, M4(-5.442e-02, 1.120e-01, 1.744e-02, -4.865e-02, -1.390e-01, -2.964e-01, -7.290e-02, -1.213e-01, 9.644e-02, -8.644e-02, 4.706e-02, -1.755e-02, 6.284e-03, -7.466e-02, 5.034e-02, -1.122e-01)); + r += mul(s3_7, M4(-1.130e-01, -7.809e-02, 6.065e-03, 2.930e-03, -1.685e-01, -6.866e-02, -1.093e-01, -1.189e-01, 5.726e-02, -1.833e-01, 1.726e-02, -3.833e-02, -1.400e-01, -3.362e-01, 3.635e-02, -3.936e-02)); + r += mul(s3_8, M4(-1.190e-01, 7.088e-02, -6.045e-02, -6.491e-02, -8.954e-02, -7.689e-02, -6.903e-02, -1.069e-01, 6.519e-02, -7.405e-02, -6.924e-03, -4.911e-02, -3.081e-02, 6.495e-03, -9.590e-02, 6.956e-02)); + r += V4(-5.867e-03, 8.204e-04, 8.838e-06, -4.451e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-1.613e-01, -4.226e-02, -1.004e-01, 2.744e-02, -5.920e-02, 2.726e-02, 5.529e-02, 1.777e-02, -2.057e-01, -4.471e-02, 4.853e-03, 1.897e-02, 6.077e-02, -8.693e-02, 2.211e-02, 2.221e-02)); + r += mul(s0_1, M4(-1.790e-02, -5.945e-02, 2.220e-01, 2.415e-02, -1.960e-02, 4.714e-02, -1.908e-01, 4.480e-02, 2.509e-01, -1.789e-02, 1.996e-01, -5.241e-02, -6.129e-02, 2.228e-02, -9.568e-03, 5.522e-02)); + r += mul(s0_2, M4(6.855e-02, -1.714e-01, 3.252e-03, -7.609e-02, -6.607e-03, -1.250e-01, -3.310e-02, 1.991e-02, -3.235e-02, -3.547e-02, -6.805e-03, 5.213e-02, 1.799e-03, -9.400e-02, -4.560e-02, -4.234e-02)); + r += mul(s0_3, M4(-3.910e-01, -1.046e-01, 2.434e-01, 7.156e-02, 5.429e-02, -6.191e-02, -1.441e-01, 2.060e-01, -1.385e-01, 1.037e-01, -4.380e-02, -8.286e-02, 1.389e-01, -1.694e-02, -6.093e-02, -5.890e-02)); + r += mul(s0_4, M4(-2.968e-02, -1.900e-01, -1.226e-01, -9.187e-02, -4.346e-01, 6.699e-01, 2.262e-01, 4.149e-01, -3.449e-01, -1.810e-01, 4.280e-02, 1.218e-01, 6.111e-02, -1.049e-01, 1.369e-01, 6.181e-02)); + r += mul(s0_5, M4(-9.205e-03, 4.332e-02, -1.318e-01, 7.155e-02, 1.343e-01, -9.519e-02, 1.417e-01, 1.615e-01, -1.237e-02, 1.310e-01, 6.284e-02, 4.459e-02, -8.180e-02, -2.356e-01, -7.968e-02, -1.569e-01)); + r += mul(s0_6, M4(1.620e-02, -5.617e-02, -5.471e-02, -1.506e-02, -1.750e-02, 2.921e-02, 2.788e-02, -3.191e-02, 6.364e-02, 3.667e-02, -4.515e-02, -2.747e-02, -4.842e-02, 2.758e-02, -9.440e-02, -2.075e-02)); + r += mul(s0_7, M4(8.817e-02, -5.475e-02, -2.476e-02, 5.598e-03, -3.989e-02, -7.869e-02, 2.001e-01, 3.282e-02, -2.619e-02, -8.282e-03, 1.138e-01, 4.952e-02, -4.746e-02, 9.597e-02, 1.639e-01, 9.382e-02)); + r += mul(s0_8, M4(-1.727e-02, -3.994e-02, 5.955e-03, 1.735e-02, 3.433e-02, 2.114e-01, 3.957e-02, -3.448e-03, 4.624e-02, 4.745e-02, 3.676e-02, 2.724e-02, 3.361e-02, -2.538e-03, 2.072e-02, -4.143e-02)); + r += mul(s1_0, M4(6.291e-02, 3.066e-02, -1.528e-01, -1.213e-01, -1.881e-01, 1.398e-01, 1.107e-02, 2.913e-02, -4.372e-02, -1.707e-01, -1.233e-01, 1.214e-02, -5.108e-02, -8.090e-02, 2.695e-03, -4.899e-03)); + r += mul(s1_1, M4(-1.290e-01, -3.723e-02, 5.548e-02, -5.796e-02, 7.544e-02, 2.218e-02, 7.075e-02, 8.778e-03, 1.830e-01, -1.635e-01, -1.380e-02, 1.337e-01, -1.343e-01, -7.167e-02, 8.180e-02, 3.701e-02)); + r += mul(s1_2, M4(3.819e-02, -2.796e-01, 7.484e-02, -6.969e-02, 3.958e-03, -3.990e-02, -4.936e-03, -1.871e-02, -8.227e-02, -2.647e-01, -5.353e-02, 2.802e-01, 5.236e-02, -1.095e-01, 3.037e-02, 1.371e-02)); + r += mul(s1_3, M4(-7.472e-02, 7.277e-02, -9.259e-02, -5.970e-02, 1.904e-01, -7.408e-02, 9.517e-03, 5.773e-02, -2.063e-01, 7.309e-02, -1.632e-01, 9.394e-02, 6.461e-02, -4.990e-02, -4.256e-02, -1.354e-01)); + r += mul(s1_4, M4(-1.989e-02, -5.893e-02, -2.870e-01, 3.377e-02, -5.899e-02, -1.292e-01, 2.142e-01, 6.521e-02, -3.446e-01, 7.705e-02, -1.953e-01, 3.493e-01, 8.550e-01, -2.645e-01, -5.125e-02, 3.279e-01)); + r += mul(s1_5, M4(-1.666e-02, 4.650e-03, -1.421e-01, 8.864e-02, 8.253e-03, -3.432e-02, -8.438e-02, -1.618e-02, -7.692e-02, 2.310e-01, 1.882e-01, 2.958e-01, -1.735e-01, -4.556e-01, -1.155e-01, -3.658e-01)); + r += mul(s1_6, M4(-9.666e-02, 1.958e-02, -3.755e-02, 1.890e-02, -1.861e-01, -2.990e-02, 5.561e-02, -2.582e-02, 7.456e-02, 9.553e-02, -1.765e-01, -1.025e-01, -6.244e-02, 1.110e-01, -1.733e-01, -1.265e-01)); + r += mul(s1_7, M4(-3.723e-03, -3.252e-02, -3.220e-02, 7.520e-02, 1.130e-01, 6.318e-02, -2.606e-01, -6.368e-02, 1.609e-01, 6.315e-02, -4.849e-02, 8.580e-02, -7.162e-02, -1.825e-01, 3.036e-01, 3.148e-02)); + r += mul(s1_8, M4(-2.751e-02, -1.125e-02, -4.807e-02, -3.768e-02, -3.989e-02, -6.228e-02, -2.268e-02, -5.394e-03, 3.481e-02, -5.196e-03, 4.520e-02, 2.328e-01, -8.159e-03, -1.147e-01, 1.142e-01, 2.181e-02)); + r += mul(s2_0, M4(-7.023e-03, -1.082e-01, -1.112e-01, 8.964e-03, 1.738e-02, 6.085e-03, 1.381e-01, -1.118e-01, -8.075e-03, -2.861e-02, 2.852e-02, 3.504e-02, 5.891e-02, 2.802e-02, -7.550e-02, 2.339e-02)); + r += mul(s2_1, M4(-6.810e-04, 1.146e-01, -5.968e-03, -6.577e-02, 2.726e-01, 1.593e-01, 7.966e-02, 2.335e-02, -2.240e-02, -1.089e-01, -1.357e-02, -1.583e-02, 4.088e-02, -1.074e-01, -5.411e-02, 3.467e-01)); + r += mul(s2_2, M4(5.432e-02, -8.088e-02, 4.341e-02, -1.622e-03, -1.120e-01, -1.209e-01, -9.585e-02, -2.620e-02, 9.254e-03, -1.146e-02, -1.566e-02, 3.510e-02, 8.974e-02, -1.070e-01, -3.833e-03, 8.178e-02)); + r += mul(s2_3, M4(-4.635e-02, -1.177e-02, -1.033e-01, 1.648e-02, -1.173e-01, 6.739e-02, 1.362e-01, -6.356e-02, 2.983e-02, 2.365e-02, -5.309e-02, -3.019e-02, 8.626e-02, 3.954e-02, 1.349e-01, 1.739e-01)); + r += mul(s2_4, M4(-6.323e-02, -1.185e-02, 7.697e-02, 1.531e-01, 1.064e-02, -2.665e-01, 4.370e-01, -2.230e-02, 1.482e-02, 4.515e-02, -5.242e-02, 2.613e-02, 4.812e-01, -2.269e-02, 1.252e-02, 3.916e-01)); + r += mul(s2_5, M4(8.870e-02, 2.979e-01, -1.148e-01, 3.591e-02, 8.996e-02, -1.399e-02, 4.609e-02, -7.584e-02, 1.080e-02, 1.990e-02, -1.944e-02, 2.932e-02, -9.051e-02, -2.288e-01, -3.643e-01, -1.566e-01)); + r += mul(s2_6, M4(-4.092e-02, -1.432e-02, 5.140e-02, 7.572e-02, 8.516e-02, 4.077e-03, 2.068e-03, -5.832e-02, -6.287e-03, 2.525e-02, 2.263e-02, -4.400e-03, -4.119e-02, -6.518e-02, 1.449e-01, 3.534e-02)); + r += mul(s2_7, M4(-2.346e-03, -1.100e-02, -1.841e-01, -4.504e-02, 4.907e-02, -5.487e-03, -1.495e-02, -1.159e-02, 6.396e-02, 1.176e-01, -1.291e-01, -1.254e-01, 2.271e-01, 4.009e-02, 7.242e-02, -1.274e-01)); + r += mul(s2_8, M4(-4.918e-02, -1.015e-02, -5.141e-02, 4.864e-02, -1.127e-01, -6.540e-02, 1.881e-02, -8.765e-03, -2.301e-02, -3.138e-02, -2.005e-02, -9.395e-05, 1.087e-02, -7.551e-02, 1.812e-02, -5.127e-02)); + r += mul(s3_0, M4(2.168e-01, -5.872e-02, -6.033e-02, 2.508e-03, -1.379e-01, -5.535e-02, 5.969e-02, 5.551e-02, 8.818e-02, 5.425e-02, 7.221e-02, -6.894e-02, -1.672e-02, 1.812e-02, -3.621e-02, 1.131e-02)); + r += mul(s3_1, M4(4.283e-02, -1.942e-04, -1.753e-01, -1.483e-02, 5.554e-02, 4.928e-02, 9.082e-02, 4.843e-02, -6.584e-02, -1.871e-01, -9.180e-02, -8.086e-02, 7.754e-02, -1.453e-02, 1.538e-01, 1.044e-01)); + r += mul(s3_2, M4(3.916e-02, 2.089e-02, -9.051e-02, 3.198e-02, -1.236e-02, -6.463e-02, 1.668e-01, 9.283e-02, 1.492e-02, 1.011e-01, 4.945e-02, 1.140e-01, 7.369e-02, -5.768e-02, 5.828e-02, 4.817e-02)); + r += mul(s3_3, M4(-1.767e-02, 1.883e-02, -1.172e-01, -4.119e-02, -2.420e-01, -7.944e-02, 2.076e-01, 1.885e-01, -2.633e-02, 9.848e-02, -7.233e-02, -1.787e-01, 5.510e-02, -6.444e-02, 1.024e-01, 8.318e-02)); + r += mul(s3_4, M4(-1.234e-02, 8.177e-02, -3.822e-02, 9.576e-02, -1.588e-01, 1.682e-01, -1.860e-01, -3.175e-02, -2.992e-01, -2.866e-02, 5.652e-02, -4.088e-02, 3.770e-01, -6.704e-02, 3.433e-01, 1.025e-01)); + r += mul(s3_5, M4(1.490e-01, 3.375e-01, -1.473e-01, 1.287e-01, -7.768e-03, 1.830e-01, -5.415e-02, 3.301e-02, 6.827e-02, 2.061e-01, 1.572e-01, 1.906e-01, -2.636e-02, -1.488e-01, 4.210e-02, -9.244e-02)); + r += mul(s3_6, M4(1.140e-03, -3.620e-02, -9.971e-03, 5.804e-02, 1.823e-01, -1.234e-02, 4.516e-02, -2.637e-02, -3.658e-02, -1.711e-02, 3.558e-02, 2.008e-02, 2.854e-02, -5.525e-02, 5.287e-02, -5.876e-02)); + r += mul(s3_7, M4(6.883e-02, 6.152e-03, -8.080e-02, 4.011e-02, 1.149e-01, -9.289e-02, 1.060e-01, 4.384e-02, -7.819e-02, 1.448e-02, 1.486e-01, -1.965e-02, 4.570e-02, -2.967e-02, -1.034e-01, -1.041e-01)); + r += mul(s3_8, M4(-3.056e-03, 2.109e-02, 2.237e-02, 9.022e-02, -5.609e-02, -8.009e-03, 8.356e-02, 3.819e-02, -1.670e-02, 4.753e-02, 9.900e-02, 2.052e-02, -7.434e-02, 1.339e-02, -1.596e-02, 3.147e-03)); + r += V4(-1.543e-03, -4.932e-03, 6.376e-03, 2.635e-03); + return r; +} + +void Pass6(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 7 +//!DESC conv6 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.209e-01, -5.795e-02, -8.765e-02, -2.009e-02, 6.086e-03, -7.115e-02, 8.973e-03, -1.543e-01, -1.694e-01, -5.479e-02, 4.161e-02, 1.035e-02, 1.328e-02, 5.594e-02, 1.018e-05, 2.955e-02)); + r += mul(s0_1, M4(-3.967e-02, 1.680e-01, -2.233e-02, -8.411e-02, -1.698e-02, -2.763e-02, -1.059e-01, -2.100e-01, 3.937e-02, 3.267e-02, -8.229e-02, 3.501e-02, -2.628e-03, -4.292e-02, 2.431e-02, -3.400e-02)); + r += mul(s0_2, M4(1.509e-02, -3.018e-02, 5.379e-02, -6.856e-02, 2.673e-03, -4.967e-02, 2.877e-03, -1.595e-02, -1.026e-01, -5.521e-02, 1.200e-02, -4.588e-02, 2.993e-02, -1.958e-02, -2.616e-03, -2.579e-02)); + r += mul(s0_3, M4(4.993e-02, 1.236e-01, -1.067e-02, -5.176e-01, 2.549e-01, 4.287e-02, 7.064e-02, 1.690e-01, 9.888e-02, 1.328e-02, 4.869e-02, -5.790e-02, -1.404e-01, -6.974e-03, -4.548e-02, 2.544e-01)); + r += mul(s0_4, M4(1.472e-02, 3.096e-01, -3.115e-01, 2.413e-02, -7.457e-02, -1.535e-01, -3.765e-01, 6.845e-01, 1.154e-01, -1.489e-01, -2.606e-01, 6.946e-02, 7.796e-02, 3.208e-02, -1.062e-01, 2.085e-01)); + r += mul(s0_5, M4(1.892e-02, 7.663e-02, 7.539e-02, -1.873e-02, -8.866e-02, 4.203e-02, -3.389e-01, 1.615e-01, -7.032e-02, -1.146e-01, 9.530e-02, 6.399e-02, -5.494e-02, -8.184e-02, 1.078e-01, 2.335e-02)); + r += mul(s0_6, M4(-5.239e-03, 7.910e-02, 4.204e-03, 1.566e-02, -1.073e-01, -2.478e-02, -1.678e-02, -2.501e-02, 2.174e-02, -1.456e-02, 6.321e-02, 8.639e-02, -2.920e-02, -2.341e-01, 6.144e-02, -9.660e-02)); + r += mul(s0_7, M4(-6.268e-02, 2.019e-03, -7.203e-02, 1.529e-02, 1.932e-01, 8.225e-02, 5.777e-02, -2.183e-01, -8.861e-02, -1.371e-01, 2.163e-01, 1.881e-01, -4.031e-02, -2.713e-01, 3.510e-01, 1.920e-01)); + r += mul(s0_8, M4(-3.058e-02, 1.151e-01, -3.663e-02, -8.908e-03, 6.323e-02, 1.742e-01, -1.851e-01, -9.976e-02, -2.619e-02, -2.550e-02, 5.783e-02, 1.301e-02, 4.378e-02, -1.185e-01, -2.295e-02, -5.327e-02)); + r += mul(s1_0, M4(-2.656e-02, -8.664e-02, -1.887e-02, 3.009e-02, 5.418e-02, -1.342e-02, -6.789e-03, -1.552e-02, 2.416e-02, -1.698e-03, -2.975e-02, 1.135e-01, -3.067e-02, 4.174e-02, 2.610e-02, 2.015e-02)); + r += mul(s1_1, M4(7.872e-03, -4.486e-03, 9.428e-02, -1.019e-01, -8.668e-03, 3.952e-02, -1.387e-02, 4.557e-03, -6.755e-02, -1.692e-01, 3.735e-03, 1.189e-01, -3.229e-02, 3.917e-02, -9.192e-03, 2.140e-02)); + r += mul(s1_2, M4(5.343e-02, -4.132e-02, 7.494e-02, -5.720e-02, -5.339e-03, 4.912e-02, -3.645e-03, 1.655e-02, -8.378e-03, 1.803e-02, -1.056e-02, -6.085e-03, -2.281e-02, 2.981e-02, -9.818e-04, -3.084e-02)); + r += mul(s1_3, M4(7.573e-03, 4.110e-02, 3.253e-03, -1.924e-01, 3.217e-02, -9.499e-03, 2.526e-02, 6.588e-02, 6.349e-02, -3.005e-02, -3.070e-02, 1.590e-01, 6.700e-02, 4.547e-02, 2.119e-03, 9.841e-02)); + r += mul(s1_4, M4(-1.378e-01, 8.172e-02, -7.034e-02, 2.066e-01, 5.232e-03, -7.941e-02, -1.010e-01, 1.572e-01, 1.921e-01, 9.517e-02, -9.389e-01, -4.918e-02, 1.478e-01, -3.970e-01, 2.126e-01, -1.827e-02)); + r += mul(s1_5, M4(5.501e-02, -7.455e-02, 1.262e-01, 1.406e-02, -4.925e-02, -9.561e-02, 8.075e-02, 2.132e-02, -2.483e-02, 1.129e-01, -5.470e-02, 2.870e-02, -2.324e-02, -1.364e-01, 4.599e-02, 6.126e-03)); + r += mul(s1_6, M4(3.095e-02, 1.469e-02, 3.846e-02, 4.155e-03, -8.392e-03, -3.402e-02, 4.176e-02, 6.511e-03, 6.366e-02, -6.091e-02, 8.332e-02, 1.722e-02, 1.256e-01, -9.227e-03, -4.640e-02, -2.004e-02)); + r += mul(s1_7, M4(-1.292e-02, -9.372e-02, 6.924e-02, -7.925e-03, 6.668e-03, 7.838e-03, -5.530e-02, -2.253e-03, 1.959e-01, -1.509e-01, 1.427e-01, 1.159e-01, -7.810e-02, 7.828e-02, 1.069e-01, 1.240e-01)); + r += mul(s1_8, M4(-6.521e-03, 5.130e-02, -2.057e-02, -3.995e-02, 5.384e-02, 4.676e-02, -6.988e-02, -5.235e-02, 3.864e-02, -7.028e-02, 3.601e-02, 9.088e-03, -1.106e-02, -3.942e-02, 1.250e-04, -4.433e-02)); + r += mul(s2_0, M4(2.098e-02, -2.232e-02, -1.023e-01, -2.915e-03, 6.659e-03, -8.768e-02, 1.362e-03, 1.724e-02, -2.860e-02, -1.300e-02, 1.271e-03, -7.769e-02, -1.679e-02, -9.083e-04, -5.349e-02, 2.312e-02)); + r += mul(s2_1, M4(-1.207e-01, 4.680e-03, -1.289e-02, 3.844e-02, -2.073e-02, -4.596e-02, -7.886e-02, 2.531e-02, 3.798e-02, -1.523e-02, 1.300e-01, -3.368e-02, 1.875e-02, -1.294e-02, -7.884e-02, -5.748e-02)); + r += mul(s2_2, M4(-2.657e-02, 7.694e-02, -6.276e-02, -1.069e-03, 3.325e-02, -1.413e-02, 4.789e-02, 3.326e-02, 4.902e-02, -2.639e-02, 4.732e-03, 6.768e-02, 5.051e-02, -1.277e-01, 1.042e-01, -7.689e-02)); + r += mul(s2_3, M4(1.491e-01, 2.843e-02, -6.555e-02, -1.054e-01, -1.957e-01, 2.262e-01, -5.168e-02, -2.061e-01, 3.173e-01, 6.741e-02, -2.873e-02, 2.378e-01, 2.837e-02, 5.813e-03, -2.585e-02, -5.869e-02)); + r += mul(s2_4, M4(7.518e-01, -1.003e-01, 7.786e-02, -4.277e-01, -7.067e-02, 6.606e-01, -2.301e-01, -1.439e-02, 1.334e-01, -1.780e-01, 7.624e-03, 2.551e-01, -4.422e-01, -3.284e-02, -1.636e-01, 2.846e-01)); + r += mul(s2_5, M4(6.291e-02, -3.015e-01, 1.031e-01, 1.837e-02, -5.147e-02, 1.049e-01, -1.665e-01, -3.407e-02, -1.383e-01, -1.599e-01, 9.878e-03, 5.695e-02, -3.202e-02, -8.912e-02, 3.058e-01, 9.144e-02)); + r += mul(s2_6, M4(1.529e-01, 2.058e-02, 1.225e-02, 1.418e-02, 2.851e-02, -9.414e-02, 3.406e-02, -8.937e-02, 7.244e-02, -3.096e-02, 4.924e-03, 7.146e-02, -6.292e-02, -5.805e-02, 2.246e-02, -7.245e-02)); + r += mul(s2_7, M4(-9.794e-02, -3.174e-02, -3.934e-02, 1.694e-01, 1.580e-02, -1.106e-01, 1.071e-01, 9.675e-02, -1.938e-01, -9.670e-02, 1.396e-01, 1.392e-01, 1.158e-01, 1.606e-01, -7.912e-02, -4.225e-01)); + r += mul(s2_8, M4(4.833e-02, 8.418e-02, 4.940e-02, 7.383e-02, 3.532e-02, -4.740e-02, 4.360e-02, 3.728e-02, 1.727e-02, 1.183e-02, -3.121e-02, -1.380e-01, -5.586e-02, 1.812e-01, -7.809e-02, 2.813e-03)); + r += mul(s3_0, M4(-3.128e-02, -8.348e-03, -3.419e-02, 1.555e-02, 2.473e-02, -1.091e-01, -2.974e-02, -7.533e-04, -3.134e-02, 4.890e-02, 4.992e-02, 5.672e-03, -2.057e-01, -6.419e-02, -5.290e-03, -9.112e-02)); + r += mul(s3_1, M4(-1.988e-01, -6.346e-02, -2.376e-02, -1.026e-01, 3.478e-02, -1.038e-01, 8.392e-02, -4.286e-02, 8.850e-02, 5.137e-02, 3.114e-02, 3.773e-02, -5.500e-02, 5.521e-02, 3.891e-02, -7.728e-02)); + r += mul(s3_2, M4(1.495e-03, 1.989e-03, 8.935e-02, -3.143e-02, -2.298e-02, 3.887e-02, -4.775e-02, 2.035e-02, 2.637e-02, 4.364e-02, -3.181e-02, 5.997e-02, 3.970e-02, -4.506e-02, 4.399e-02, -7.875e-02)); + r += mul(s3_3, M4(1.929e-01, 6.065e-02, 1.131e-02, -6.712e-02, 2.405e-01, 8.786e-02, 2.701e-02, 3.849e-01, 6.113e-01, 1.128e-01, -5.000e-02, 4.534e-02, 1.853e-01, 1.202e-01, 7.714e-02, 2.600e-01)); + r += mul(s3_4, M4(4.894e-01, 7.720e-02, -2.085e-01, 1.759e-01, -8.599e-02, 2.499e-01, 2.671e-02, 3.832e-02, -7.451e-02, -4.900e-01, 2.272e-01, 3.038e-01, -1.101e-02, 1.394e-01, -2.065e-01, -2.118e-02)); + r += mul(s3_5, M4(1.279e-02, -1.004e-02, -1.078e-01, 5.152e-02, 4.421e-02, 9.399e-02, -1.011e-01, -2.129e-02, -4.416e-02, -9.283e-02, -7.153e-03, 8.909e-02, -9.666e-02, 3.185e-02, 1.265e-01, -9.346e-03)); + r += mul(s3_6, M4(-3.006e-02, -3.389e-03, -1.226e-02, -4.016e-03, -5.528e-02, -6.337e-02, 3.931e-02, 3.432e-02, -1.850e-01, -4.101e-03, -7.500e-02, 2.523e-01, -4.059e-02, 9.910e-03, 6.657e-02, 1.195e-01)); + r += mul(s3_7, M4(-3.706e-02, -4.632e-02, 1.353e-01, -1.655e-02, -1.189e-03, -1.299e-01, 1.459e-01, 7.592e-02, -8.454e-02, -4.301e-02, 1.108e-01, 2.686e-01, -1.628e-01, -9.140e-02, 7.202e-02, -4.312e-02)); + r += mul(s3_8, M4(2.611e-02, 4.438e-02, -1.065e-03, 8.827e-02, 5.581e-03, -1.020e-01, 5.709e-02, -1.106e-02, 6.134e-02, 5.575e-02, 5.738e-02, -1.392e-01, 1.741e-02, 1.431e-01, 7.709e-02, 4.472e-02)); + r += V4(2.856e-05, 8.552e-03, -8.514e-03, -8.667e-03); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-2.060e-02, -3.573e-02, 4.912e-02, 5.192e-02, -1.701e-02, -5.571e-02, 3.012e-02, -7.803e-02, 5.806e-02, -1.605e-02, -2.480e-02, 2.160e-03, 3.288e-02, -1.962e-03, -4.509e-02, 5.333e-03)); + r += mul(s0_1, M4(-7.057e-02, -1.169e-02, 8.115e-02, 3.020e-02, -2.626e-02, -1.616e-01, -1.885e-01, 1.023e-01, 8.520e-02, -9.444e-02, -1.475e-01, -9.016e-02, 6.451e-02, -2.741e-02, 5.301e-02, -8.370e-03)); + r += mul(s0_2, M4(3.020e-02, -9.219e-02, 8.349e-02, 9.079e-03, -1.881e-02, 4.455e-02, 1.154e-01, -2.889e-02, -1.542e-02, -7.914e-03, -5.572e-02, 3.873e-02, 1.079e-03, -4.719e-02, 2.548e-02, 3.440e-02)); + r += mul(s0_3, M4(1.301e-01, -8.388e-02, -5.687e-02, 2.733e-01, 3.401e-02, -1.413e-02, 4.602e-02, -2.813e-01, 4.540e-01, -5.388e-02, 7.387e-02, -6.504e-01, 1.325e-01, -2.873e-02, -2.975e-02, -2.031e-01)); + r += mul(s0_4, M4(7.265e-03, 3.294e-01, -4.150e-01, -4.157e-01, -6.122e-01, -2.782e-01, -2.743e-02, 2.658e-01, -2.096e-01, -1.486e-01, 1.571e-01, 3.423e-01, -1.726e-04, -4.478e-01, -7.431e-02, 1.408e-01)); + r += mul(s0_5, M4(-5.815e-03, -1.492e-01, 4.425e-02, -1.350e-02, 4.150e-02, 1.274e-01, -2.494e-01, -1.405e-02, 3.992e-02, -2.511e-03, -9.956e-03, 3.636e-02, 2.708e-02, 2.007e-01, 2.176e-01, -1.301e-02)); + r += mul(s0_6, M4(-2.974e-02, 3.332e-03, -4.831e-02, 3.652e-02, -2.911e-02, -1.271e-02, 3.832e-02, -2.472e-02, 3.249e-02, -1.190e-03, -4.770e-02, -8.276e-02, 1.235e-01, 8.707e-02, 1.179e-01, 1.313e-01)); + r += mul(s0_7, M4(1.030e-01, -1.146e-01, -6.309e-02, -1.055e-01, 3.441e-02, 3.396e-03, -1.176e-01, 1.386e-01, 1.495e-01, 2.413e-02, 1.509e-01, -5.060e-02, -1.727e-01, 7.154e-01, 1.993e-01, -3.198e-02)); + r += mul(s0_8, M4(-3.894e-02, 2.181e-02, 4.581e-02, 4.944e-02, -1.211e-01, 1.148e-01, -1.811e-01, -2.012e-02, 4.331e-02, 5.017e-02, -1.258e-02, 1.772e-02, 9.302e-02, -1.110e-02, 1.585e-02, -3.492e-02)); + r += mul(s1_0, M4(-1.173e-02, 9.897e-03, 7.565e-02, -8.842e-03, -1.854e-02, -7.564e-03, -2.706e-04, 1.776e-03, -9.569e-02, 2.963e-03, 4.260e-02, 2.437e-02, 1.912e-02, -1.021e-02, -5.637e-02, -2.496e-02)); + r += mul(s1_1, M4(-5.787e-02, -1.200e-02, 7.517e-02, 7.302e-02, 2.392e-02, 1.970e-02, -7.769e-02, 9.084e-03, 3.051e-02, 2.814e-02, -2.882e-03, -2.011e-02, 7.490e-02, 1.503e-02, 1.809e-02, -8.811e-02)); + r += mul(s1_2, M4(-1.795e-02, -1.067e-02, 1.235e-01, 5.245e-03, -6.887e-02, 4.047e-02, 3.475e-02, 6.296e-03, -3.501e-03, -3.751e-02, -9.597e-02, 2.536e-02, 1.646e-02, -2.891e-02, 1.570e-02, 1.715e-02)); + r += mul(s1_3, M4(8.565e-02, -6.574e-05, -5.121e-02, 5.086e-02, -7.024e-02, -5.662e-04, 2.205e-03, 4.116e-02, -4.728e-02, 4.219e-02, 4.900e-02, -1.208e-01, -2.718e-02, 3.903e-02, 2.372e-02, -3.676e-02)); + r += mul(s1_4, M4(-1.401e-01, 5.764e-01, -2.054e-01, -1.029e-01, -1.037e-01, -4.536e-02, 6.555e-02, 1.516e-02, -1.173e-01, 8.400e-02, 1.755e-01, 3.744e-01, -1.311e-01, -1.403e-01, 2.332e-01, 1.204e-01)); + r += mul(s1_5, M4(4.695e-02, -5.544e-02, 8.989e-02, -6.275e-02, 1.351e-01, -2.991e-02, -8.762e-03, -1.449e-02, -9.402e-03, 2.208e-02, -5.917e-02, -1.552e-02, 2.040e-02, -5.696e-04, 9.425e-02, 2.299e-02)); + r += mul(s1_6, M4(-4.948e-02, 7.441e-02, 2.896e-02, 2.147e-02, 2.291e-02, -2.075e-02, 3.712e-03, 2.035e-02, 2.247e-02, -3.774e-02, -4.785e-02, -1.356e-01, -5.951e-03, 5.747e-02, 8.200e-04, -2.847e-02)); + r += mul(s1_7, M4(1.020e-01, -2.641e-02, 1.961e-02, -1.182e-01, -8.813e-02, -1.600e-04, -1.931e-02, 1.197e-02, -3.958e-03, 1.959e-02, 4.974e-02, -1.126e-01, -1.102e-01, 1.223e-01, -4.062e-03, 6.450e-04)); + r += mul(s1_8, M4(-2.978e-02, 3.062e-02, 1.368e-02, 3.659e-03, -8.098e-02, -3.109e-02, 1.281e-02, 1.060e-03, -2.341e-02, -8.555e-03, -3.087e-02, -2.310e-02, 6.370e-02, -1.293e-02, 2.538e-02, -1.270e-03)); + r += mul(s2_0, M4(-7.273e-02, 2.366e-02, 4.031e-02, 6.153e-02, -7.381e-02, 2.800e-02, -1.501e-02, 1.604e-01, -5.307e-03, -4.300e-03, -7.255e-02, 1.191e-01, 2.080e-02, -2.249e-02, 2.204e-02, 5.714e-02)); + r += mul(s2_1, M4(-6.031e-02, -3.662e-02, -2.085e-01, 3.811e-02, -1.018e-01, -4.553e-03, -1.082e-01, -2.846e-03, 6.619e-02, 4.708e-02, -6.461e-02, -9.098e-02, -3.929e-03, 2.785e-02, -1.225e-03, -1.047e-01)); + r += mul(s2_2, M4(-6.829e-02, 6.371e-02, -8.082e-02, -2.556e-02, 1.878e-02, -2.257e-02, -9.831e-03, -5.339e-02, 6.919e-03, -8.110e-02, 3.922e-02, 1.624e-03, 3.858e-02, -6.258e-02, 1.776e-02, 2.512e-03)); + r += mul(s2_3, M4(-1.304e-01, -1.371e-02, -2.400e-02, 2.024e-01, 4.433e-01, -1.796e-01, -8.035e-02, -3.362e-01, 1.620e-01, 7.586e-03, -4.559e-02, 3.404e-02, 1.072e-01, -2.411e-02, 3.556e-02, -2.595e-01)); + r += mul(s2_4, M4(-9.545e-02, -6.805e-02, -6.113e-01, -3.604e-01, 1.015e-01, 1.480e-01, -5.419e-01, 7.857e-02, -1.547e-01, 9.204e-02, 1.295e-01, 2.123e-01, -6.725e-02, 1.549e-01, -5.152e-01, -1.548e-01)); + r += mul(s2_5, M4(-5.472e-03, 3.834e-02, 1.714e-01, -2.580e-03, -1.310e-02, 6.837e-02, -1.696e-01, 2.731e-02, 2.067e-02, 1.079e-01, 9.934e-02, -2.763e-02, -3.332e-02, -4.433e-02, 2.018e-01, -1.534e-02)); + r += mul(s2_6, M4(5.036e-02, 2.648e-02, -6.527e-03, -3.690e-02, -7.802e-02, 2.207e-02, 6.141e-02, 1.958e-01, -8.824e-03, 2.394e-02, -2.215e-02, 1.731e-02, 4.865e-02, -4.820e-02, 4.198e-02, -2.534e-01)); + r += mul(s2_7, M4(-5.007e-02, 4.344e-02, 4.250e-03, -4.466e-02, -4.042e-03, 1.976e-01, 1.295e-01, -7.906e-02, 1.381e-02, 4.920e-02, 1.303e-01, -1.579e-02, 3.775e-03, -1.275e-01, 7.502e-02, 2.351e-02)); + r += mul(s2_8, M4(-1.144e-01, 1.398e-01, 2.054e-01, -6.233e-02, -6.101e-02, -2.771e-02, 2.633e-02, -2.155e-02, -3.079e-02, -4.460e-02, -4.350e-03, 4.232e-02, 4.897e-02, 8.307e-02, -1.322e-01, -1.186e-02)); + r += mul(s3_0, M4(-2.531e-02, -1.739e-02, 2.189e-02, 6.500e-02, -8.803e-02, 3.329e-02, 1.011e-01, -7.934e-02, -1.538e-02, 8.243e-02, 8.416e-03, -5.044e-02, 3.723e-03, -5.774e-02, 1.004e-02, -1.064e-01)); + r += mul(s3_1, M4(-3.050e-02, 6.282e-03, 1.975e-02, -4.380e-02, -2.323e-02, -6.742e-02, -2.779e-02, 1.099e-01, 6.210e-02, -4.539e-02, -9.253e-02, -9.297e-02, 4.207e-02, -7.108e-02, -3.079e-02, -9.736e-04)); + r += mul(s3_2, M4(3.245e-02, -7.168e-02, -1.138e-01, -1.895e-02, 1.658e-02, 5.894e-02, -5.603e-02, -6.963e-03, -2.875e-02, 1.198e-02, 5.772e-02, -1.160e-02, 9.326e-03, -3.573e-02, 4.991e-03, 1.271e-02)); + r += mul(s3_3, M4(9.787e-02, -2.036e-02, 1.718e-02, -1.622e-02, 1.043e-01, -3.425e-02, -6.481e-02, -3.099e-01, 7.831e-02, 8.317e-02, 1.363e-03, -5.345e-02, 1.314e-01, 6.020e-02, -3.589e-02, -7.305e-01)); + r += mul(s3_4, M4(-1.829e-02, 4.974e-02, -2.094e-01, -2.861e-01, 1.128e-01, 1.619e-01, -2.358e-01, 2.109e-04, -4.152e-01, 2.425e-02, 2.893e-01, 1.625e-01, -2.212e-01, 9.801e-02, 8.805e-02, -1.555e-01)); + r += mul(s3_5, M4(-1.477e-01, 1.423e-01, -8.523e-02, 4.213e-02, 2.496e-02, 1.500e-03, -6.522e-02, -2.658e-02, 7.489e-02, 3.555e-02, -5.523e-03, -3.305e-02, -1.358e-01, -1.811e-02, 2.465e-02, -5.622e-03)); + r += mul(s3_6, M4(-5.342e-03, 3.661e-03, -1.275e-03, -7.544e-02, -2.144e-02, 1.679e-02, 3.667e-02, 9.079e-02, -1.567e-01, 2.069e-02, 4.591e-03, 8.125e-03, 8.032e-02, -3.126e-02, 3.645e-02, -2.959e-01)); + r += mul(s3_7, M4(8.163e-02, 2.339e-02, -9.055e-04, 3.790e-02, -2.948e-02, 4.977e-02, 7.393e-02, -8.920e-03, 2.544e-02, 3.568e-02, 1.493e-01, -1.212e-01, 1.065e-01, -1.628e-01, 1.593e-01, -8.379e-02)); + r += mul(s3_8, M4(1.707e-02, 8.862e-02, -3.193e-02, -8.568e-02, -1.546e-02, -4.577e-02, 8.805e-02, 1.346e-02, 1.543e-02, -5.755e-02, -3.642e-02, 2.221e-02, 1.673e-03, -2.868e-03, -6.763e-02, -6.137e-03)); + r += V4(5.044e-03, 1.567e-04, -1.145e-02, 6.927e-04); + return r; +} + +void Pass7(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 8 +//!DESC conv7 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t0, t1 +//!OUT t2, t3 + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-8.064e-02, -3.833e-02, -3.181e-02, 1.054e-02, 9.509e-02, -6.553e-02, -5.670e-03, -3.696e-02, 2.583e-02, -1.569e-02, 1.094e-02, -2.399e-02, -3.234e-02, -1.243e-01, 5.270e-03, -6.099e-02)); + r += mul(s0_1, M4(-4.438e-02, -1.485e-01, -1.386e-02, 4.858e-02, -5.601e-02, 1.331e-01, 3.263e-02, -9.788e-02, -2.371e-02, 9.461e-02, -2.250e-02, 1.755e-01, -5.625e-02, 3.249e-01, 1.192e-01, 7.269e-03)); + r += mul(s0_2, M4(-6.456e-02, 1.150e-01, 3.409e-02, -1.190e-02, -2.178e-02, 1.632e-02, -2.622e-02, -3.145e-02, -5.530e-02, 6.932e-03, 2.125e-02, -1.092e-02, 7.237e-03, -1.095e-02, -9.447e-02, 1.069e-01)); + r += mul(s0_3, M4(3.359e-02, -7.934e-02, -1.498e-02, -1.134e-02, -1.444e-02, 1.000e-01, -3.456e-02, 9.253e-02, 4.219e-01, -1.332e-01, -2.880e-03, 3.749e-01, 8.414e-02, 5.580e-02, 7.691e-02, 9.379e-02)); + r += mul(s0_4, M4(1.168e+00, 3.189e-01, 1.974e-03, 6.336e-01, -2.977e-01, -1.237e-01, -7.846e-02, 1.225e-02, -5.781e-01, 1.313e-02, -8.957e-01, 9.973e-02, -3.968e-01, 4.271e-01, -1.687e-01, -4.163e-01)); + r += mul(s0_5, M4(2.994e-02, -3.345e-01, 5.888e-02, 3.474e-01, 1.451e-02, 5.160e-02, 1.694e-02, -1.601e-02, -6.726e-02, -8.100e-02, 3.170e-01, 1.724e-01, 5.414e-02, 5.215e-02, 1.187e-01, 6.467e-02)); + r += mul(s0_6, M4(-7.173e-02, -5.387e-02, 2.614e-03, -1.457e-02, -5.838e-02, 1.020e-02, -1.882e-02, 3.052e-02, -1.867e-01, -1.652e-02, -4.128e-02, -3.654e-02, 1.469e-02, 2.656e-02, 2.224e-02, -3.825e-02)); + r += mul(s0_7, M4(-2.378e-01, 7.857e-03, -3.584e-02, 4.349e-03, -9.461e-02, 4.763e-02, -6.449e-02, 7.229e-02, 9.332e-02, 3.774e-02, -1.346e-01, -1.196e-01, -2.878e-01, 9.030e-02, 1.203e-01, 3.148e-03)); + r += mul(s0_8, M4(9.453e-02, 1.255e-01, -1.324e-01, 6.370e-02, 2.242e-03, -1.721e-03, 6.953e-02, 3.019e-03, -5.313e-02, -1.384e-02, 4.697e-02, 4.914e-02, -9.657e-02, 1.233e-01, -1.883e-01, -8.477e-03)); + r += mul(s1_0, M4(-1.283e-02, 3.713e-02, 6.623e-03, 6.693e-03, -5.631e-02, 2.185e-02, -3.356e-02, 2.070e-03, 5.509e-02, -2.332e-01, 4.553e-03, -3.012e-02, -1.517e-02, 1.337e-03, -6.578e-03, -2.650e-02)); + r += mul(s1_1, M4(3.377e-02, 2.493e-03, -9.238e-03, -6.058e-02, -1.275e-01, 8.259e-02, 5.112e-02, 3.219e-02, -2.216e-02, 7.512e-02, 3.070e-02, 7.029e-02, -1.924e-02, 4.763e-02, -2.827e-02, 4.378e-03)); + r += mul(s1_2, M4(-9.929e-03, 1.398e-02, 1.972e-02, 1.740e-02, -9.611e-03, 5.041e-02, -4.188e-02, 5.115e-02, -1.306e-02, -1.754e-03, -4.642e-02, 1.170e-02, -2.771e-02, -3.207e-02, -2.397e-02, -4.773e-02)); + r += mul(s1_3, M4(-2.679e-02, -2.625e-03, 1.924e-02, 9.094e-03, -2.648e-01, -3.603e-02, 1.072e-02, -1.963e-01, 1.635e-01, -2.366e-02, 1.518e-02, 1.552e-01, 6.552e-02, -4.113e-03, 1.333e-02, -2.104e-02)); + r += mul(s1_4, M4(2.173e-01, 2.712e-01, 1.597e-02, 2.260e-01, -6.777e-01, 1.775e-01, 3.145e-01, 3.556e-02, -2.059e-01, -2.622e-02, -2.541e-01, -5.547e-03, -1.087e-01, -4.131e-02, 2.261e-02, -9.224e-02)); + r += mul(s1_5, M4(-3.171e-02, -5.821e-02, -6.229e-02, 1.392e-01, 2.494e-02, 9.668e-02, -5.215e-02, 1.746e-01, -5.280e-02, -3.489e-02, 7.232e-02, -3.858e-03, 2.566e-02, 4.005e-02, -5.404e-02, -8.660e-02)); + r += mul(s1_6, M4(7.131e-03, 2.685e-02, -2.403e-03, 1.059e-02, 1.469e-01, 1.232e-01, 5.431e-02, 1.890e-01, -1.081e-01, 3.885e-02, -3.539e-02, 5.289e-02, -1.438e-02, -3.979e-03, -6.192e-03, -2.331e-02)); + r += mul(s1_7, M4(-1.071e-01, 7.591e-04, -6.525e-02, 1.632e-02, -1.323e-01, 3.493e-02, -9.612e-02, 2.512e-02, -7.017e-02, 4.360e-02, -6.099e-02, 3.901e-04, -1.764e-02, -1.061e-02, -3.409e-02, -2.255e-02)); + r += mul(s1_8, M4(-4.386e-02, -1.254e-02, 2.400e-02, 1.194e-01, 5.802e-02, 8.040e-02, -3.854e-02, 5.653e-02, -1.702e-02, -2.335e-02, 1.121e-01, 2.440e-02, -3.404e-04, 7.401e-02, -5.980e-02, -3.817e-02)); + r += mul(s2_0, M4(-9.579e-02, 4.650e-02, 8.575e-03, -5.575e-02, -8.117e-02, -7.162e-02, 6.056e-03, -3.501e-02, 9.302e-02, -1.216e-01, -6.163e-02, 6.897e-02, -3.371e-02, 1.772e-02, 3.013e-03, -5.936e-03)); + r += mul(s2_1, M4(4.708e-02, 1.280e-01, 3.359e-02, 1.003e-01, 9.448e-02, 3.132e-02, 8.137e-02, 9.316e-02, -5.728e-02, 5.457e-02, 9.155e-02, -3.774e-02, -2.305e-02, -1.017e-01, 2.738e-02, -2.373e-02)); + r += mul(s2_2, M4(8.975e-03, 8.207e-02, -6.346e-02, 8.974e-02, -2.279e-02, -9.128e-03, -4.230e-02, -8.024e-03, 5.563e-02, 9.479e-02, -9.852e-02, 8.121e-02, -1.145e-02, 6.518e-02, -2.581e-02, 2.733e-02)); + r += mul(s2_3, M4(1.479e-01, -2.068e-02, 3.906e-02, 8.235e-02, -3.507e-02, 1.935e-01, 1.466e-02, 2.432e-01, 5.253e-01, -3.897e-02, -4.775e-02, -9.807e-02, 5.332e-02, 9.679e-02, -1.589e-02, 7.846e-02)); + r += mul(s2_4, M4(1.030e-01, 3.311e-01, 1.128e-01, -1.244e-01, -1.489e-01, 2.902e-01, 2.805e-01, -1.558e-01, -1.289e-01, 3.306e-01, -1.475e-01, -6.570e-02, 3.265e-02, -1.141e-01, 6.703e-02, -1.717e-01)); + r += mul(s2_5, M4(2.855e-02, 1.184e-01, -2.904e-01, 2.258e-02, -7.943e-02, -5.343e-02, -8.054e-02, 1.255e-01, -1.108e-02, 9.190e-02, -1.725e-01, -6.260e-02, -8.194e-03, 5.840e-02, -1.704e-01, 7.028e-03)); + r += mul(s2_6, M4(1.684e-02, 1.411e-02, 8.570e-03, -1.440e-02, 3.143e-02, 5.820e-03, 1.764e-02, 5.872e-02, 7.943e-02, -5.400e-02, 8.544e-02, -3.393e-02, -7.690e-02, -2.082e-02, 2.975e-03, -5.316e-02)); + r += mul(s2_7, M4(-2.681e-02, 7.370e-04, -5.250e-02, -3.652e-02, -7.367e-02, -2.542e-02, 1.165e-01, -5.653e-02, 3.500e-02, 1.781e-02, -7.241e-02, -1.056e-03, -9.338e-02, 2.896e-03, -3.257e-02, 1.238e-01)); + r += mul(s2_8, M4(-6.529e-03, 3.786e-04, -9.472e-02, 5.894e-02, -4.906e-02, 1.019e-02, -3.093e-03, 3.903e-02, 2.610e-02, 6.776e-02, -7.901e-02, -1.485e-03, -9.116e-02, 1.427e-01, -6.813e-02, -1.681e-02)); + r += mul(s3_0, M4(-6.420e-03, 1.512e-02, 2.670e-02, -1.570e-01, -6.513e-03, 5.949e-02, 1.094e-02, -6.247e-02, -1.649e-02, -6.791e-02, 1.733e-03, 2.568e-02, -5.164e-02, 2.694e-02, 8.787e-03, -4.768e-02)); + r += mul(s3_1, M4(1.001e-01, 2.465e-01, 9.252e-02, -2.858e-02, 1.702e-02, -3.235e-02, 1.258e-01, -3.779e-03, 1.607e-02, 1.508e-01, 3.810e-02, -1.863e-02, -1.536e-02, -2.347e-01, -1.849e-03, 5.462e-03)); + r += mul(s3_2, M4(9.057e-03, 1.345e-01, 2.663e-03, -4.507e-02, 2.303e-02, 2.462e-02, -6.214e-02, 7.788e-02, 4.236e-02, 5.039e-02, 2.335e-02, 2.963e-02, -4.921e-02, 4.596e-02, -1.201e-02, 1.782e-02)); + r += mul(s3_3, M4(5.325e-02, 2.420e-01, 4.483e-03, 2.510e-01, -7.061e-03, -1.147e-01, -9.773e-03, 1.426e-01, 3.031e-03, 4.003e-02, -1.434e-02, 6.091e-02, 9.825e-02, 8.520e-02, 2.736e-02, 1.416e-02)); + r += mul(s3_4, M4(7.373e-02, 4.379e-01, -7.470e-02, 2.506e-01, -2.329e-01, 2.474e-01, 2.903e-01, -4.874e-01, -7.010e-02, 1.753e-01, 6.191e-02, 1.025e-01, 8.968e-02, -1.998e-01, 4.465e-05, -5.672e-01)); + r += mul(s3_5, M4(1.284e-01, 1.790e-01, -1.679e-01, 2.744e-01, -3.069e-02, -4.589e-02, -2.251e-01, -1.779e-02, 3.338e-02, 5.997e-02, 1.126e-03, 3.696e-03, -1.154e-01, -5.846e-02, -1.024e-01, 3.392e-01)); + r += mul(s3_6, M4(-2.364e-02, -9.130e-03, -3.655e-02, -1.954e-02, -9.995e-02, 2.973e-02, 4.064e-03, -4.305e-02, -2.863e-03, -8.699e-03, -1.583e-02, 4.408e-03, -6.298e-02, 2.150e-02, 1.992e-02, -2.189e-02)); + r += mul(s3_7, M4(-4.068e-03, -5.225e-02, 3.381e-02, 8.963e-03, 2.640e-02, -2.133e-02, 1.803e-01, -9.592e-02, -1.579e-01, 1.786e-02, 6.515e-02, 2.114e-01, -2.167e-01, -1.371e-01, 2.144e-03, 2.155e-02)); + r += mul(s3_8, M4(-3.349e-03, 1.488e-01, 8.888e-03, 1.001e-01, -4.780e-02, -1.406e-02, -2.940e-02, 4.185e-02, 2.758e-03, 2.820e-02, 5.186e-02, 2.930e-02, 1.372e-01, 1.122e-01, -1.140e-01, 1.343e-01)); + r += V4(-1.743e-03, -5.471e-03, -9.889e-04, 8.561e-05); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-5.525e-02, -5.493e-02, -8.973e-04, 4.167e-02, -5.567e-03, 5.194e-03, -5.363e-02, 1.513e-01, 9.020e-02, -4.135e-02, -3.216e-02, 5.381e-02, 7.401e-03, 7.252e-02, -1.617e-02, -1.196e-01)); + r += mul(s0_1, M4(-5.790e-02, -1.934e-02, -1.790e-03, 5.390e-02, -6.975e-02, 4.877e-02, 1.578e-01, -6.377e-03, 1.284e-01, -4.775e-02, -6.735e-02, -9.064e-02, 1.184e-01, 9.055e-02, 9.935e-02, 2.014e-01)); + r += mul(s0_2, M4(-4.004e-02, 4.488e-02, -4.212e-02, -6.286e-02, 1.711e-02, 2.731e-02, -2.406e-02, 1.097e-02, -3.399e-02, 8.065e-03, 3.894e-02, 1.282e-02, 2.036e-01, 4.754e-02, -4.522e-02, 6.568e-03)); + r += mul(s0_3, M4(-2.372e-02, 6.962e-02, 3.557e-02, -8.888e-03, -6.028e-02, -3.183e-02, -1.608e-01, -3.044e-02, -1.429e-02, 3.901e-02, -6.904e-02, -5.157e-01, -1.547e-02, -1.362e-01, 4.108e-03, -1.772e-01)); + r += mul(s0_4, M4(2.381e-01, -3.453e-01, -2.427e-01, 7.987e-02, 1.617e-01, 1.046e-01, 6.711e-02, 4.241e-01, -5.566e-01, 2.310e-01, 1.982e-01, 1.211e-02, 4.338e-02, -1.123e+00, -2.644e-01, -5.025e-01)); + r += mul(s0_5, M4(-4.302e-01, -6.545e-02, -6.868e-03, 2.568e-01, 2.493e-02, 2.488e-02, -1.175e-01, -1.257e-02, 5.920e-02, -3.840e-03, -1.888e-01, 7.050e-02, 8.341e-02, -1.762e-01, -9.946e-02, -3.864e-02)); + r += mul(s0_6, M4(-6.090e-03, 5.118e-02, 5.784e-02, 1.236e-02, -2.339e-02, 1.262e-03, -6.952e-02, -5.645e-02, 7.682e-02, 8.910e-02, -6.863e-02, -1.345e-01, 1.935e-02, -1.587e-02, 2.932e-03, 2.945e-02)); + r += mul(s0_7, M4(1.043e-02, 3.625e-01, -1.154e-01, 7.974e-02, -1.517e-02, 1.574e-02, 8.566e-02, -8.753e-02, 8.905e-02, 1.145e-01, 7.292e-02, -9.528e-02, -6.925e-02, 4.815e-03, -4.870e-03, 1.570e-02)); + r += mul(s0_8, M4(2.513e-01, -1.567e-01, 3.452e-02, -2.177e-01, 4.435e-03, 2.229e-02, -5.694e-02, 3.564e-02, -9.355e-03, -3.216e-03, 3.984e-03, -2.533e-02, 5.015e-03, -4.419e-03, 5.343e-03, -5.821e-02)); + r += mul(s1_0, M4(1.183e-02, 2.180e-03, 3.312e-02, 1.897e-02, -9.420e-02, -1.052e-01, -7.984e-02, -8.329e-02, 1.925e-02, -1.752e-02, -6.860e-03, -1.326e-01, -3.338e-02, -2.484e-02, 5.283e-03, -2.789e-03)); + r += mul(s1_1, M4(-5.266e-02, -3.840e-03, -5.604e-02, 6.279e-02, 2.299e-01, 1.305e-01, 1.038e-01, -9.023e-02, -6.376e-02, 3.853e-02, 1.382e-01, -3.984e-02, 6.131e-02, 1.529e-03, -2.258e-02, 6.728e-02)); + r += mul(s1_2, M4(-2.498e-02, 1.543e-02, 6.288e-03, 1.517e-03, 3.486e-02, 3.774e-02, -2.774e-02, 7.099e-02, 2.514e-02, 9.266e-03, -7.521e-03, 6.476e-03, -6.458e-02, 9.736e-03, 1.127e-02, 7.742e-03)); + r += mul(s1_3, M4(-1.946e-02, 9.133e-04, 3.637e-02, -4.983e-02, 9.720e-02, -4.280e-01, -1.311e-01, 2.993e-02, 5.112e-02, -4.233e-02, -6.549e-02, -1.818e-01, 6.990e-03, 1.344e-02, -5.027e-02, 1.111e-01)); + r += mul(s1_4, M4(-3.156e-02, -8.842e-02, -2.152e-01, 5.452e-03, 1.343e-01, 4.848e-01, 2.658e-01, -8.968e-03, 4.181e-02, -3.102e-02, 4.729e-01, 2.922e-01, 1.338e-01, -1.812e-02, 1.293e-02, -2.395e-01)); + r += mul(s1_5, M4(8.511e-02, -2.497e-02, -6.139e-03, 6.542e-02, 2.345e-01, 4.880e-02, -1.407e-01, 5.752e-02, 5.268e-02, 1.468e-02, 1.387e-02, 3.528e-02, -1.074e-01, 1.000e-02, 8.326e-02, 6.166e-02)); + r += mul(s1_6, M4(-7.543e-03, -3.122e-02, 1.798e-02, 2.674e-02, -1.737e-02, -2.950e-01, -5.185e-02, 1.724e-01, -1.144e-02, -4.360e-02, -2.832e-02, -3.791e-02, 4.188e-03, -1.949e-02, 3.661e-03, 1.626e-02)); + r += mul(s1_7, M4(-2.500e-02, 3.936e-02, -7.625e-02, -1.774e-04, -1.588e-01, 2.302e-02, 8.201e-02, -5.263e-02, -1.705e-02, 3.713e-02, 1.013e-01, -9.792e-02, 1.231e-03, 3.596e-02, -5.049e-02, 4.461e-02)); + r += mul(s1_8, M4(-1.184e-02, -1.957e-02, -6.394e-02, -7.422e-03, 3.436e-02, -4.979e-02, -6.860e-02, 5.641e-02, -5.356e-02, -8.266e-03, 3.798e-02, 1.110e-03, 6.696e-02, 2.150e-02, 3.553e-03, -2.353e-02)); + r += mul(s2_0, M4(-2.568e-02, 5.060e-02, 4.885e-02, -6.251e-02, -1.792e-01, -4.927e-02, -3.191e-03, -7.133e-04, 2.122e-02, 2.619e-01, -4.819e-02, -1.790e-02, 4.136e-02, 5.090e-02, 6.009e-02, 6.033e-02)); + r += mul(s2_1, M4(6.606e-02, -3.492e-02, -6.592e-02, -7.088e-02, -4.980e-03, -1.418e-01, -1.253e-01, -1.462e-02, -1.838e-02, 1.042e-01, 9.550e-02, -1.567e-01, -3.643e-02, -4.484e-02, 3.174e-03, -1.487e-01)); + r += mul(s2_2, M4(9.162e-02, 1.759e-02, -8.418e-02, -1.465e-02, -2.496e-02, -1.225e-02, -2.968e-02, -2.787e-02, 4.041e-03, -4.790e-03, -6.679e-02, 7.165e-03, -3.727e-02, 3.796e-02, 3.822e-02, 8.043e-03)); + r += mul(s2_3, M4(1.837e-03, -1.714e-01, -6.961e-02, 1.382e-01, 5.682e-02, -4.873e-01, -3.312e-02, 5.158e-01, 1.352e-01, -1.975e-01, 1.597e-02, 6.544e-02, -3.329e-02, -3.234e-02, 5.270e-03, -1.502e-01)); + r += mul(s2_4, M4(-5.789e-02, -7.730e-02, 6.364e-02, 5.596e-03, -2.386e-01, -5.458e-02, 8.180e-02, 2.441e-02, 6.474e-02, 5.300e-01, -4.721e-01, -3.701e-02, 2.024e-01, 1.321e-02, -9.498e-02, 4.516e-01)); + r += mul(s2_5, M4(2.882e-02, -2.058e-02, -1.082e-01, -6.370e-02, 1.480e-01, 1.513e-02, -1.423e-01, 2.692e-02, 2.782e-02, 7.790e-02, -1.576e-02, -8.523e-02, 2.453e-01, 6.333e-04, 8.954e-02, 2.339e-01)); + r += mul(s2_6, M4(4.058e-03, 2.871e-02, 1.125e-02, 5.663e-03, 2.601e-02, 6.732e-02, 1.797e-02, -3.068e-02, -2.809e-02, -5.919e-03, 4.540e-02, 2.821e-03, -7.565e-03, 4.519e-02, 2.327e-02, -3.882e-02)); + r += mul(s2_7, M4(-2.998e-02, -6.281e-02, 2.913e-02, -2.864e-02, -6.257e-02, 7.332e-02, 5.505e-04, 1.016e-01, -1.131e-01, -6.519e-02, -1.234e-01, 8.235e-02, -2.776e-02, -4.644e-02, -6.901e-02, 1.809e-02)); + r += mul(s2_8, M4(-2.236e-02, -9.187e-03, -1.409e-01, 4.984e-02, 2.245e-02, 1.562e-02, -1.853e-02, -5.231e-02, 3.672e-02, -2.155e-02, -5.280e-02, 3.258e-03, 2.879e-02, 7.168e-02, 4.822e-02, -5.377e-02)); + r += mul(s3_0, M4(1.415e-02, 1.026e-01, 1.554e-01, 1.670e-02, 3.504e-02, 3.557e-02, 4.013e-02, 8.621e-02, -1.502e-02, 7.253e-03, -1.344e-02, -5.635e-02, 1.603e-02, 2.626e-02, 2.599e-02, 1.666e-03)); + r += mul(s3_1, M4(-3.214e-02, -1.573e-01, 1.001e-01, -1.588e-01, 5.839e-02, -4.139e-02, 3.952e-02, -1.764e-02, 1.964e-02, -1.310e-02, 6.175e-03, -4.100e-02, -1.181e-02, -7.760e-02, -3.020e-02, 7.317e-04)); + r += mul(s3_2, M4(-1.052e-01, -3.080e-02, 3.791e-04, -3.890e-02, 6.022e-02, -4.121e-03, -1.037e-01, -2.708e-02, 1.788e-02, -4.931e-03, -3.259e-02, -2.086e-02, -3.673e-02, 4.576e-02, 5.512e-02, 1.266e-02)); + r += mul(s3_3, M4(-1.639e-01, -2.842e-01, -8.190e-02, 3.341e-02, 4.371e-02, -3.729e-02, -1.062e-01, 5.776e-02, -9.976e-02, 1.355e-01, 3.865e-02, -2.485e-01, 3.205e-02, -9.464e-02, -3.115e-02, 7.982e-02)); + r += mul(s3_4, M4(2.411e-01, -1.693e-01, 7.569e-01, -3.759e-01, -5.648e-01, 1.042e-01, 4.819e-01, 3.628e-03, 5.672e-01, 2.590e-01, -2.252e-01, 1.947e-01, 1.242e-01, 1.440e-01, 8.207e-02, 3.523e-01)); + r += mul(s3_5, M4(3.972e-01, -5.478e-02, -2.617e-01, 1.363e-01, 6.289e-02, 8.769e-03, -1.476e-01, 3.871e-02, 1.088e-01, 2.484e-02, -4.114e-02, -4.563e-02, 1.680e-01, 8.067e-02, 1.434e-01, 3.357e-02)); + r += mul(s3_6, M4(-1.022e-02, -5.556e-02, -3.107e-02, 4.839e-02, 1.079e-02, 1.388e-01, 2.482e-02, -1.166e-01, -5.799e-02, -3.306e-02, 6.809e-02, -6.095e-02, -4.195e-03, 6.835e-03, -1.020e-03, 1.108e-01)); + r += mul(s3_7, M4(-1.105e-01, 2.145e-02, -4.958e-02, 1.012e-01, -3.125e-02, -2.931e-02, 3.629e-02, 1.651e-01, -6.673e-02, 1.906e-02, -6.683e-02, 6.445e-02, -4.213e-02, -2.649e-02, -3.545e-02, 1.384e-01)); + r += mul(s3_8, M4(-1.362e-02, -2.108e-02, -1.410e-01, 9.828e-02, -1.097e-02, 2.497e-02, -9.738e-02, 1.768e-02, 5.047e-02, 3.760e-02, -7.576e-02, 2.529e-03, 8.835e-02, -2.202e-01, -1.604e-02, 4.994e-03)); + r += V4(2.086e-03, 2.299e-03, -5.180e-03, 1.039e-03); + return r; +} + +void Pass8(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t2[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t3[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 9 +//!DESC conv8 +//!BLOCK_SIZE 8 +//!NUM_THREADS 64 +//!IN t2, t3 +//!OUT t0, t1 + +#define l0(x, y) V4(O(t2, float2(x, y))) +#define l1(x, y) V4(O(t3, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(4.333e-02, -2.234e-02, 4.223e-03, -2.379e-02, 4.428e-02, -4.734e-03, -1.071e-03, 9.366e-03, -2.942e-01, 1.902e-01, -1.446e-01, -5.203e-02, -8.944e-02, -3.073e-02, -2.361e-02, 3.757e-02)); + r += mul(s0_1, M4(-9.616e-02, -6.887e-02, 1.052e-01, -4.074e-02, -1.591e-02, 5.493e-02, -2.139e-03, -3.894e-02, -1.097e-01, 4.568e-02, -7.025e-02, -2.882e-02, -6.000e-02, 2.235e-02, -5.432e-02, 5.809e-03)); + r += mul(s0_2, M4(-1.222e-02, 5.753e-02, 1.194e-01, -2.727e-02, -2.105e-02, 2.036e-02, -8.233e-02, -1.205e-02, -5.085e-02, -6.829e-03, -9.941e-03, -1.337e-02, 7.795e-02, -5.759e-02, 5.618e-02, -9.238e-03)); + r += mul(s0_3, M4(1.324e-01, -1.185e-01, 3.684e-03, 2.976e-02, 1.356e-01, -1.205e-01, -1.850e-02, -2.291e-02, -1.569e-01, 4.140e-01, 2.106e-01, -7.525e-01, 1.914e-02, 2.863e-02, -1.284e-02, -1.043e-01)); + r += mul(s0_4, M4(-1.022e-01, 2.592e-02, -1.560e-01, -6.939e-01, -1.890e-01, -3.180e-02, 8.099e-02, 8.758e-02, 5.179e-02, 1.440e-01, 1.557e-01, 6.581e-02, 6.784e-02, 2.236e-01, 7.923e-02, 1.808e-01)); + r += mul(s0_5, M4(-9.244e-02, -3.930e-02, 1.281e-01, 3.306e-02, -8.861e-02, 1.411e-01, 1.488e-01, 4.144e-02, 8.077e-03, 5.162e-02, 5.214e-02, 6.791e-03, 5.626e-02, -7.312e-02, -8.413e-02, 2.335e-02)); + r += mul(s0_6, M4(-3.044e-02, 1.477e-02, 1.234e-02, 1.927e-02, 4.953e-02, -7.250e-02, 4.041e-02, -3.293e-02, 1.257e-01, -6.445e-02, 5.049e-02, -1.587e-01, 6.960e-02, -1.008e-01, -1.715e-02, 1.055e-01)); + r += mul(s0_7, M4(1.099e-01, 4.099e-02, -4.497e-02, 1.613e-02, 1.081e-01, -5.289e-02, 7.797e-03, -8.610e-02, 1.004e-01, 2.457e-02, -9.614e-02, 2.222e-04, -1.304e-01, -6.372e-02, 1.048e-01, 7.533e-02)); + r += mul(s0_8, M4(-6.962e-02, -9.348e-03, 1.754e-03, -1.924e-03, -6.750e-02, 8.346e-02, -3.537e-02, 2.726e-02, -1.512e-02, 1.457e-03, 5.397e-04, 3.160e-03, 3.225e-03, -2.205e-02, 3.784e-03, -6.457e-02)); + r += mul(s1_0, M4(3.216e-03, -2.588e-02, -3.591e-03, 2.124e-02, 5.857e-02, -4.599e-02, 2.350e-02, -5.599e-03, -7.461e-02, 1.149e-02, -5.752e-03, -1.405e-02, -8.179e-02, -1.131e-02, -6.390e-04, -2.175e-02)); + r += mul(s1_1, M4(-4.224e-02, 1.465e-01, 2.090e-01, -7.787e-02, 3.813e-02, 6.197e-03, -2.953e-03, -5.295e-03, -1.369e-01, 1.265e-01, -8.472e-02, -1.717e-02, -8.354e-02, 6.081e-02, -1.704e-02, -2.941e-02)); + r += mul(s1_2, M4(5.532e-02, 4.211e-02, 1.382e-01, -1.554e-02, -3.309e-02, -1.236e-02, -5.637e-02, -9.740e-03, 3.499e-02, -1.187e-03, 2.183e-02, -4.328e-03, 3.071e-02, 6.290e-03, -3.430e-02, 1.809e-02)); + r += mul(s1_3, M4(3.097e-01, -9.147e-02, 5.491e-02, -2.231e-01, 3.357e-02, -1.675e-01, -3.862e-02, -3.948e-02, -9.614e-02, 2.037e-01, -8.482e-02, 1.575e-01, -2.503e-01, 4.100e-02, 4.688e-02, -5.487e-02)); + r += mul(s1_4, M4(-9.200e-01, 2.786e-01, -1.781e-01, -1.025e-01, -3.880e-01, 2.259e-01, -5.413e-02, 1.069e-01, -1.934e-01, 6.803e-02, 1.450e-01, 1.164e-02, -1.011e-01, 4.287e-01, 1.492e-02, 1.877e-01)); + r += mul(s1_5, M4(2.170e-03, -5.988e-02, 1.469e-01, 8.258e-02, 7.155e-02, 5.736e-02, 2.247e-02, 2.306e-02, -2.195e-02, 6.195e-02, -3.267e-02, 6.251e-02, 1.084e-01, -5.883e-02, 1.627e-02, -2.231e-04)); + r += mul(s1_6, M4(-2.249e-02, -2.655e-02, 5.447e-03, 1.252e-01, 2.021e-02, -3.117e-02, 1.387e-03, -3.282e-02, -3.373e-02, 1.007e-02, 8.572e-03, -4.438e-03, 3.576e-02, -2.237e-02, -8.820e-02, 7.987e-02)); + r += mul(s1_7, M4(5.065e-02, 2.282e-02, -9.641e-02, 6.878e-03, 6.660e-01, 2.497e-02, -1.353e-01, -1.281e-01, 1.927e-02, -6.053e-03, 4.760e-03, -1.569e-02, -8.824e-02, -1.988e-02, -5.204e-03, 9.302e-02)); + r += mul(s1_8, M4(-1.142e-01, -2.133e-02, -5.409e-02, -1.691e-02, -1.052e-01, 6.282e-02, -1.763e-01, 7.171e-03, -5.642e-04, 5.445e-03, -2.252e-02, -8.101e-03, -7.578e-02, -1.884e-02, 4.234e-02, -4.051e-02)); + r += mul(s2_0, M4(-3.340e-02, -9.286e-02, -2.409e-02, 5.065e-02, 1.146e-01, -3.051e-02, -5.897e-02, 1.339e-01, -1.820e-01, -8.258e-02, 7.841e-02, -4.576e-03, -2.235e-02, -2.630e-03, -2.450e-02, 1.445e-03)); + r += mul(s2_1, M4(3.666e-02, -1.334e-01, 1.470e-01, -2.621e-04, 1.844e-02, 2.054e-02, -1.071e-01, 1.125e-01, -9.043e-02, -3.179e-02, -1.576e-01, 4.819e-03, -1.119e-01, 3.672e-02, -1.960e-02, -2.050e-02)); + r += mul(s2_2, M4(-4.321e-02, 1.305e-03, -8.030e-03, 1.275e-02, -2.368e-02, -8.649e-02, 2.123e-01, -2.892e-02, -2.245e-02, -2.036e-02, 5.714e-02, -2.304e-02, 7.935e-02, 2.948e-02, 4.653e-02, 2.831e-02)); + r += mul(s2_3, M4(3.771e-02, 2.739e-02, -5.543e-02, -7.737e-02, 1.401e-01, -8.111e-02, -5.889e-02, -9.849e-02, 6.609e-02, -1.088e+00, -3.718e-01, 8.566e-01, -3.921e-02, -1.169e-02, 3.725e-02, 1.351e-02)); + r += mul(s2_4, M4(4.463e-02, -4.023e-01, 9.789e-02, -1.354e-02, -3.505e-01, 6.620e-02, 2.244e-01, -1.811e-01, 1.546e-01, 1.551e-01, 5.571e-01, -8.024e-02, 2.007e-01, -2.899e-02, 8.322e-02, 2.466e-02)); + r += mul(s2_5, M4(-8.966e-02, 9.537e-03, 1.072e-02, -2.888e-02, -9.071e-02, -2.265e-02, 1.060e-01, 2.784e-02, 2.326e-02, 2.158e-02, -1.252e-02, 4.109e-02, -9.991e-02, 2.266e-02, 1.081e-01, -3.239e-02)); + r += mul(s2_6, M4(7.485e-02, -5.121e-02, -7.197e-03, 2.364e-02, 2.601e-02, -3.873e-02, -3.163e-02, 6.095e-03, 1.731e-01, 1.597e-01, -2.013e-01, -1.612e-01, -3.529e-02, 4.730e-02, 3.587e-03, -4.909e-02)); + r += mul(s2_7, M4(-2.002e-01, -1.274e-01, 3.759e-02, -7.192e-02, 4.936e-02, 6.713e-03, -9.731e-02, -1.581e-02, 2.900e-01, 2.825e-03, -1.856e-03, 1.151e-01, 4.926e-02, -8.486e-02, 6.889e-02, -7.680e-02)); + r += mul(s2_8, M4(2.897e-02, 1.246e-02, 5.703e-02, 3.133e-02, -5.819e-02, 3.870e-02, 2.343e-02, 2.062e-02, -3.449e-02, -1.748e-03, -3.243e-03, 7.889e-03, -3.119e-02, 2.583e-02, 2.955e-02, 2.239e-02)); + r += mul(s3_0, M4(-6.212e-02, -2.815e-02, 3.233e-02, 1.243e-02, -7.495e-02, 5.928e-02, -1.435e-02, -3.235e-02, -4.753e-02, -8.307e-02, -6.735e-03, 2.073e-02, 3.210e-02, -1.466e-02, -3.795e-03, -7.752e-03)); + r += mul(s3_1, M4(8.327e-02, -6.238e-02, 8.281e-02, 1.640e-02, -2.766e-02, 1.126e-01, -1.703e-01, 5.964e-02, -6.483e-02, -1.239e-01, 9.791e-02, -5.774e-02, -4.405e-02, -1.194e-01, 1.150e-01, 8.248e-03)); + r += mul(s3_2, M4(-2.417e-02, -2.032e-03, -2.130e-02, 3.247e-03, 2.558e-02, 9.621e-03, 2.894e-03, 1.745e-02, -3.345e-03, 2.472e-02, 4.561e-02, 1.091e-04, -9.694e-02, 7.448e-02, 1.977e-02, -2.252e-02)); + r += mul(s3_3, M4(4.710e-02, 1.175e-01, -5.456e-02, 2.405e-02, -5.599e-02, 5.930e-02, 1.394e-02, -1.948e-01, 9.356e-02, 4.988e-03, -3.550e-02, 2.115e-02, 3.579e-02, 2.519e-03, 3.458e-02, 1.284e-02)); + r += mul(s3_4, M4(-1.539e-01, -2.812e-01, -1.729e-01, 3.786e-02, -7.088e-02, 9.694e-02, -3.798e-02, -2.858e-01, 2.162e-01, 2.841e-01, 6.325e-02, 8.461e-02, -1.131e-02, -4.421e-01, -7.511e-02, -1.567e-01)); + r += mul(s3_5, M4(-5.525e-02, 9.872e-03, 1.520e-02, -1.043e-02, -1.261e-01, 5.960e-02, -1.696e-01, 1.084e-02, 4.916e-04, -4.675e-02, 4.176e-02, -1.421e-02, -4.200e-03, 5.236e-02, 4.528e-01, -1.819e-02)); + r += mul(s3_6, M4(5.756e-02, -5.624e-02, -1.801e-02, 1.356e-01, -2.943e-02, -3.031e-03, 2.097e-02, -2.393e-02, -1.188e-02, 3.476e-02, -8.038e-02, -2.493e-02, -7.363e-02, 9.841e-02, 2.800e-02, -4.719e-02)); + r += mul(s3_7, M4(-1.646e-01, -1.462e-01, 4.622e-02, -1.341e-01, 7.500e-02, 9.408e-03, -3.332e-02, -9.753e-02, -1.242e-01, -5.388e-02, 8.329e-06, -8.211e-02, 2.022e-02, -1.792e-01, 2.668e-01, -1.639e-01)); + r += mul(s3_8, M4(-5.759e-02, 2.412e-02, -7.751e-03, 5.953e-02, -3.042e-03, 7.494e-02, -7.015e-02, 8.802e-03, 6.651e-02, 2.193e-02, 5.734e-03, 9.387e-04, 8.776e-03, -3.649e-02, -5.357e-02, 2.643e-02)); + r += V4(7.492e-03, 1.996e-03, 3.514e-04, -9.678e-04); + return r; +} + +V4 f1(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(-4.857e-03, 1.681e-02, 1.594e-02, 1.186e-02, 9.703e-03, 1.481e-02, 2.224e-02, -1.211e-02, -3.463e-02, 2.229e-01, -4.089e-02, -2.937e-02, -3.930e-02, -4.065e-02, -5.603e-02, 4.334e-03)); + r += mul(s0_1, M4(1.133e-01, -8.282e-02, -1.479e-01, 2.685e-02, -5.441e-02, 5.083e-02, 1.999e-02, -9.091e-03, 1.051e-01, -2.878e-03, 3.581e-02, -1.914e-01, 6.339e-02, 4.883e-02, 8.196e-02, -1.849e-02)); + r += mul(s0_2, M4(-1.835e-02, 4.866e-03, -1.972e-02, -5.721e-02, 3.678e-02, 7.178e-03, 4.936e-02, 1.327e-02, 7.159e-03, 1.530e-02, 9.492e-03, -2.915e-02, -3.703e-02, 3.455e-02, 4.313e-02, -7.483e-03)); + r += mul(s0_3, M4(4.954e-02, -3.734e-02, 1.704e-01, -9.261e-02, 4.356e-02, 4.334e-02, 1.375e-01, -6.750e-02, 1.645e-01, 3.545e-01, -1.076e-01, -2.754e-01, -8.735e-03, 8.428e-02, -2.600e-02, 5.157e-02)); + r += mul(s0_4, M4(-1.529e-01, 6.132e-02, 5.269e-01, 2.303e-01, 3.777e-02, -3.362e-02, -1.288e-01, 3.253e-02, -2.453e-03, -1.311e-01, -1.811e-01, 5.146e-01, 1.360e-01, -1.568e-01, 3.541e-01, -4.269e-01)); + r += mul(s0_5, M4(1.203e-01, 2.272e-02, 4.602e-02, -4.652e-02, 4.838e-02, -1.423e-01, -4.015e-02, 2.329e-01, 2.563e-02, 4.549e-03, -6.325e-03, 1.033e-01, -1.185e-01, -1.229e-03, -1.781e-03, 4.882e-02)); + r += mul(s0_6, M4(-1.493e-03, -5.072e-04, -7.398e-02, -4.071e-03, 1.147e-02, 7.946e-02, 3.359e-02, -6.394e-02, 3.505e-02, 2.111e-01, 1.077e-01, -9.330e-02, 2.340e-02, 1.186e-02, 1.139e-01, -2.444e-03)); + r += mul(s0_7, M4(-5.860e-03, -1.587e-02, -7.634e-02, 6.287e-03, 7.103e-02, 1.223e-01, 1.770e-01, -6.041e-02, -3.422e-02, -8.480e-03, 6.146e-02, 5.680e-02, 6.599e-03, -6.543e-02, -6.087e-02, -1.182e-01)); + r += mul(s0_8, M4(-3.065e-03, -2.381e-02, 1.076e-03, 4.174e-02, -1.275e-02, -8.464e-02, -1.656e-02, 1.118e-01, -5.084e-03, -1.276e-02, 1.028e-02, -2.447e-02, 2.944e-02, 3.333e-02, 1.571e-02, -4.768e-02)); + r += mul(s1_0, M4(1.033e-02, -1.755e-02, -6.591e-02, -2.916e-03, 5.268e-02, -1.639e-02, 3.173e-02, -3.870e-02, -1.196e-02, -5.798e-02, -6.512e-02, 6.685e-02, 1.077e-01, -1.033e-01, -6.763e-02, 6.647e-03)); + r += mul(s1_1, M4(4.385e-01, -5.927e-02, -3.308e-01, 1.535e-01, -5.186e-02, 9.066e-02, 5.980e-02, -5.399e-02, -1.484e-03, 1.159e-01, 1.228e-01, -4.994e-02, 4.690e-02, -5.008e-02, -4.004e-02, -1.684e-02)); + r += mul(s1_2, M4(-4.728e-02, 1.658e-02, -5.943e-02, 2.234e-03, -3.209e-02, 4.928e-03, 3.253e-03, 4.860e-04, 1.285e-02, -1.914e-02, 2.240e-02, 1.305e-02, -5.554e-02, -5.859e-04, -3.260e-03, 8.161e-03)); + r += mul(s1_3, M4(1.111e-02, -1.017e-02, 3.942e-01, -2.339e-01, -5.259e-02, -1.509e-01, 9.273e-02, 4.821e-03, 4.431e-02, -2.321e-02, 8.765e-03, 1.174e-01, -1.620e-01, -1.094e-01, -1.112e-01, 1.092e-01)); + r += mul(s1_4, M4(-1.112e-01, -3.016e-01, 5.767e-01, 2.991e-01, -2.363e-01, 1.773e-01, 4.842e-02, 1.479e-02, 4.656e-02, -6.919e-02, -1.145e-01, 3.517e-01, 1.694e-01, -1.172e-01, 1.981e-01, -3.146e-01)); + r += mul(s1_5, M4(7.860e-02, 1.250e-02, 8.584e-02, 2.115e-02, 4.508e-02, -1.057e-01, 3.477e-02, 1.894e-01, 1.702e-04, -3.281e-02, -4.114e-02, 1.520e-01, -3.739e-02, -2.410e-02, 4.224e-02, 4.905e-02)); + r += mul(s1_6, M4(-1.856e-02, -3.141e-02, -1.581e-01, -4.136e-02, -5.412e-02, 2.173e-01, -8.472e-02, 2.157e-02, -1.841e-02, 4.442e-03, -6.516e-02, -2.259e-03, -8.614e-03, -2.194e-02, 4.293e-02, 6.910e-02)); + r += mul(s1_7, M4(-6.917e-02, -1.204e-01, -1.636e-01, 1.444e-02, -1.302e-01, 1.454e-01, -1.236e-01, -1.381e-01, -2.625e-02, -1.014e-02, 3.965e-02, 5.456e-02, 1.004e-02, -7.594e-02, -2.646e-02, -8.390e-03)); + r += mul(s1_8, M4(-1.214e-02, -1.182e-02, -1.654e-02, 5.613e-02, -4.494e-02, -6.883e-02, -6.759e-02, 1.401e-01, 5.809e-03, -7.901e-03, 2.477e-03, 2.240e-02, 2.892e-02, 2.365e-04, 1.916e-02, -4.855e-02)); + r += mul(s2_0, M4(1.165e-02, -3.547e-02, -3.269e-02, 3.278e-02, 1.517e-01, 3.076e-01, 1.321e-01, -7.305e-02, -9.144e-02, -1.188e-01, -8.574e-02, 3.771e-02, 5.090e-02, -3.828e-03, -2.782e-03, 9.980e-03)); + r += mul(s2_1, M4(4.990e-02, -1.158e-02, -8.473e-03, -7.378e-02, 2.320e-01, 4.317e-03, -1.275e-01, -7.346e-02, 1.375e-01, -5.503e-02, 2.691e-02, 7.374e-02, -9.075e-02, -9.443e-03, -3.406e-02, -8.131e-03)); + r += mul(s2_2, M4(-2.091e-02, 2.630e-02, -1.171e-02, -8.149e-03, -6.488e-02, 3.674e-02, -1.582e-02, 2.703e-02, 1.933e-02, 9.368e-04, -2.847e-03, -6.541e-02, -8.207e-03, -2.102e-03, 5.476e-03, 2.813e-02)); + r += mul(s2_3, M4(-7.157e-02, 3.019e-01, 4.938e-02, 1.148e-02, -1.928e-02, 5.004e-02, 1.490e-01, -4.090e-02, -6.051e-01, 1.305e-01, 5.432e-02, 3.124e-01, -4.909e-03, 4.734e-03, -4.112e-02, 2.852e-02)); + r += mul(s2_4, M4(-6.570e-02, -1.792e-01, -3.370e-01, -1.151e-01, -1.800e-01, -4.234e-01, 2.290e-01, -1.410e-01, -1.107e-01, -3.994e-01, -6.890e-02, 1.616e-01, 2.725e-01, -1.534e-01, 1.134e-02, -1.122e-01)); + r += mul(s2_5, M4(2.107e-02, 7.061e-02, -3.419e-02, -7.876e-02, 1.059e-01, 2.269e-02, 3.643e-02, -9.355e-02, 2.354e-02, -2.920e-02, -1.486e-02, 6.596e-02, 1.459e-01, -7.315e-02, 1.001e-02, -1.196e-01)); + r += mul(s2_6, M4(7.558e-03, -5.529e-02, 2.896e-02, -5.776e-02, 4.176e-03, 2.669e-02, 8.191e-03, -1.173e-02, -8.356e-04, 2.705e-01, 2.322e-02, 1.236e-01, -1.824e-03, -9.138e-03, 5.993e-03, 2.423e-02)); + r += mul(s2_7, M4(2.899e-03, 4.622e-02, 1.248e-01, 5.803e-02, 2.360e-03, 1.574e-02, 9.107e-03, 2.627e-02, 2.870e-03, 1.155e-01, 2.568e-01, 1.185e-01, -1.852e-02, 7.471e-02, 1.442e-01, 4.149e-03)); + r += mul(s2_8, M4(2.587e-02, 2.002e-02, 1.244e-02, -7.156e-02, -2.962e-03, -4.371e-02, 9.750e-05, 2.554e-02, 1.524e-02, -3.555e-02, 3.165e-02, -1.252e-01, 7.889e-03, -2.808e-02, -2.502e-02, -9.080e-02)); + r += mul(s3_0, M4(-5.464e-02, -9.380e-02, -9.031e-02, 6.067e-02, 1.637e-02, -4.678e-02, -6.354e-02, 2.757e-02, -3.534e-02, -7.544e-02, -4.711e-02, -1.170e-02, -2.121e-02, 6.766e-02, 1.143e-02, -1.352e-02)); + r += mul(s3_1, M4(6.666e-02, -3.601e-02, -1.621e-02, 1.027e-02, -7.539e-02, 6.818e-02, 9.687e-02, 8.166e-03, 1.209e-01, -7.593e-02, -6.114e-02, -1.871e-02, -4.486e-03, -4.191e-02, -3.624e-02, -6.917e-02)); + r += mul(s3_2, M4(1.685e-03, 1.879e-02, -1.085e-02, -1.463e-02, -2.649e-02, -2.843e-02, -5.443e-02, 1.182e-01, -4.538e-02, 1.604e-02, -1.214e-02, -3.780e-02, 7.448e-02, 4.743e-02, 1.960e-02, -2.091e-02)); + r += mul(s3_3, M4(1.581e-03, 4.307e-01, 2.725e-02, -6.223e-03, -1.649e-02, -9.806e-02, -4.996e-02, 5.188e-02, -1.004e-01, 3.665e-01, 6.566e-02, -1.778e-02, 6.244e-02, -8.677e-02, 6.811e-02, -7.358e-02)); + r += mul(s3_4, M4(-1.159e-01, -2.202e-01, -4.450e-01, -1.843e-01, -2.163e-01, -5.406e-01, -1.782e-02, 3.599e-02, 4.676e-02, -2.842e-01, 2.868e-02, -3.187e-02, 5.708e-01, -4.671e-01, 2.904e-01, 7.765e-02)); + r += mul(s3_5, M4(1.722e-02, 6.820e-02, -2.501e-02, 2.062e-02, 7.785e-02, 3.089e-02, -1.011e-01, 7.250e-02, 1.777e-02, 2.478e-02, 2.426e-02, -1.451e-03, -3.446e-02, -2.670e-02, 2.924e-02, -3.201e-02)); + r += mul(s3_6, M4(1.407e-02, -6.091e-02, 9.407e-03, -4.578e-02, -1.060e-02, 4.716e-03, 1.340e-02, 9.612e-03, -4.227e-02, -1.122e-01, -5.321e-02, 5.625e-02, 2.228e-02, 6.253e-03, -6.240e-02, -3.802e-03)); + r += mul(s3_7, M4(-1.704e-02, -4.713e-03, 7.542e-02, -7.843e-03, -3.427e-02, 6.325e-03, 1.225e-02, 1.093e-02, 1.470e-02, 8.228e-02, 7.090e-02, 4.060e-02, -1.906e-02, 5.804e-02, 1.348e-01, -7.307e-02)); + r += mul(s3_8, M4(1.539e-02, 2.190e-03, -2.100e-02, -2.839e-02, 4.166e-03, -1.752e-02, -2.224e-02, 4.517e-02, -2.712e-02, -1.329e-02, -2.822e-02, -1.040e-02, 2.914e-02, -1.207e-02, -4.796e-02, -6.910e-02)); + r += V4(1.021e-04, 2.892e-03, 4.075e-03, -4.598e-04); + return r; +} + +void Pass9(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = Rmp8x8(tid.x) + blockStart; + uint2 size = GetInputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = (gxy + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + t0[gxy] = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + t1[gxy] = f1(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); +} + +//!PASS 10 +//!DESC out-shuffle +//!BLOCK_SIZE 16 +//!NUM_THREADS 64 +//!IN INPUT, t0, t1 +//!OUT OUTPUT + +#define l0(x, y) V4(O(t0, float2(x, y))) +#define l1(x, y) V4(O(t1, float2(x, y))) + +V4 f0(V4 s0_0, V4 s0_1, V4 s0_2, V4 s0_3, V4 s0_4, V4 s0_5, V4 s0_6, V4 s0_7, V4 s0_8, V4 s1_0, V4 s1_1, V4 s1_2, V4 s1_3, V4 s1_4, V4 s1_5, V4 s1_6, V4 s1_7, V4 s1_8, V4 s2_0, V4 s2_1, V4 s2_2, V4 s2_3, V4 s2_4, V4 s2_5, V4 s2_6, V4 s2_7, V4 s2_8, V4 s3_0, V4 s3_1, V4 s3_2, V4 s3_3, V4 s3_4, V4 s3_5, V4 s3_6, V4 s3_7, V4 s3_8) { + V4 r = 0.0; + r += mul(s0_0, M4(2.098e-02, -9.305e-03, -8.033e-03, 3.455e-02, 1.747e-02, -3.697e-02, -1.424e-02, 1.170e-02, -2.027e-02, -5.643e-02, -2.869e-02, -3.308e-02, -1.154e-01, -2.864e-02, -3.009e-02, -1.849e-02)); + r += mul(s0_1, M4(-2.191e-02, 7.817e-02, -1.705e-02, -2.475e-03, -2.317e-01, -3.827e-02, -4.318e-04, -7.348e-02, -3.294e-02, -1.082e-01, 2.578e-02, -3.462e-02, -7.983e-02, -2.407e-01, -8.092e-03, -3.626e-02)); + r += mul(s0_2, M4(-1.301e-02, -3.897e-02, -6.300e-03, -1.383e-02, 2.743e-02, -4.224e-02, 5.535e-03, -1.838e-02, 9.344e-03, 1.156e-02, -5.436e-03, 1.041e-02, -7.606e-02, -2.449e-02, -2.023e-02, 2.329e-02)); + r += mul(s0_3, M4(2.784e-01, 9.384e-02, 1.126e-01, -1.144e-01, 1.104e-01, -3.075e-03, 2.114e-01, -4.576e-02, 2.271e-01, 5.360e-02, -2.026e-02, 2.571e-01, 1.023e-01, -2.406e-02, 3.613e-02, 2.313e-02)); + r += mul(s0_4, M4(1.481e-01, 1.487e-01, 3.646e-02, 1.608e-01, -5.368e-01, 7.939e-01, -8.009e-01, 7.326e-01, -8.184e-02, 7.780e-02, 2.823e-02, 1.723e-02, 3.261e-02, 7.264e-02, 4.906e-01, -7.881e-02)); + r += mul(s0_5, M4(1.893e-02, 2.051e-02, -1.583e-02, -1.434e-02, -3.022e-02, -5.953e-02, 4.948e-02, -6.243e-02, 1.064e-02, 9.555e-03, 9.215e-03, 2.395e-03, -1.879e-02, -5.293e-02, -6.218e-02, 1.880e-01)); + r += mul(s0_6, M4(-1.623e-02, -9.043e-03, 8.706e-02, -2.571e-02, 4.310e-03, 1.182e-02, 2.041e-02, 5.692e-02, -2.960e-02, -2.983e-03, 4.222e-02, -6.760e-02, -7.396e-03, 5.672e-03, -7.944e-03, 4.287e-02)); + r += mul(s0_7, M4(-2.240e-02, -1.813e-02, 5.106e-02, 3.940e-02, -2.053e-03, 4.609e-03, -7.422e-02, 1.510e-01, -1.863e-02, -1.240e-02, -4.103e-02, 4.553e-02, -1.829e-02, -7.566e-03, 1.359e-03, -3.670e-02)); + r += mul(s0_8, M4(-5.476e-03, -1.114e-02, -4.037e-03, -2.126e-03, 2.712e-02, 1.440e-02, 2.411e-02, 2.362e-02, -5.747e-03, -5.246e-03, 1.138e-02, -1.869e-02, 1.133e-03, -4.128e-03, -5.698e-03, 7.431e-04)); + r += mul(s1_0, M4(-2.053e-02, -5.708e-02, 1.294e-01, -1.091e-02, -5.203e-03, -1.339e-02, -2.457e-02, 9.190e-03, -1.666e-01, -1.923e-02, 2.214e-02, -2.020e-02, -8.115e-02, -3.013e-02, -4.038e-02, -3.056e-02)); + r += mul(s1_1, M4(-2.557e-01, 5.294e-01, -1.299e-01, -1.493e-01, -1.234e-01, -1.255e-01, 2.948e-03, -4.235e-02, 3.493e-02, -1.870e-01, -2.104e-03, -4.000e-02, -3.905e-02, -2.193e-01, -2.203e-02, -9.760e-03)); + r += mul(s1_2, M4(1.733e-02, -7.787e-02, -6.604e-03, 6.678e-03, 7.696e-03, -2.369e-02, -6.539e-04, -1.102e-02, -1.193e-03, 1.231e-02, -1.389e-03, -9.574e-03, -5.801e-02, -2.889e-02, -2.686e-03, 1.507e-02)); + r += mul(s1_3, M4(2.792e-01, 9.448e-02, 3.361e-02, -5.689e-02, 1.630e-01, 1.120e-02, 1.771e-01, -2.116e-02, 4.607e-01, -2.987e-02, -1.319e-01, 1.699e-01, 1.127e-01, -3.390e-03, 5.671e-02, 8.547e-03)); + r += mul(s1_4, M4(1.412e-01, 6.567e-02, 2.311e-01, 1.009e-01, -7.339e-02, 3.683e-01, -2.510e-01, 2.821e-01, -1.446e-01, -1.170e-01, -6.035e-02, 5.471e-01, 1.075e-01, 9.204e-02, 3.083e-01, -4.666e-02)); + r += mul(s1_5, M4(4.185e-02, 9.992e-03, -9.781e-03, -1.284e-01, -6.054e-02, -1.004e-01, 3.920e-03, -7.670e-02, 6.425e-03, 1.315e-02, 1.833e-02, -2.078e-02, -4.423e-03, 1.608e-02, -4.473e-02, 1.431e-01)); + r += mul(s1_6, M4(-2.035e-02, -1.490e-02, 7.858e-02, -1.677e-02, -1.529e-02, 1.617e-02, 1.892e-02, 6.421e-02, -5.163e-02, -3.004e-04, 4.796e-02, -6.612e-02, -7.339e-03, 3.612e-03, -3.793e-03, 3.967e-02)); + r += mul(s1_7, M4(-1.587e-02, 2.826e-03, 4.107e-02, 3.865e-02, 1.741e-03, -8.144e-03, 3.549e-02, 4.874e-02, -1.309e-02, 9.007e-03, -4.065e-02, 1.889e-02, -1.401e-02, -6.740e-03, -8.651e-03, -3.680e-02)); + r += mul(s1_8, M4(-3.425e-02, -1.581e-02, 2.079e-03, 9.678e-03, 1.536e-02, 1.254e-02, -6.491e-03, 1.337e-02, -6.457e-03, -4.502e-03, 7.601e-03, -2.523e-02, -8.848e-04, -4.491e-03, -1.150e-02, -1.041e-02)); + r += mul(s2_0, M4(-1.231e-02, 1.957e-02, -4.860e-03, -3.485e-02, 2.150e-02, 2.795e-02, -8.654e-03, -1.193e-02, -3.417e-03, 1.825e-02, 1.567e-02, -3.503e-02, 1.815e-01, 3.099e-02, -2.550e-02, -7.071e-02)); + r += mul(s2_1, M4(4.765e-03, 1.726e-02, -1.563e-02, -3.967e-02, 1.431e-01, 1.655e-01, 8.577e-03, -2.072e-02, -1.189e-01, -1.901e-01, 1.268e-02, -1.182e-02, 1.360e-01, -9.727e-02, 3.545e-02, -3.795e-02)); + r += mul(s2_2, M4(-1.629e-02, -2.886e-02, 6.265e-04, -4.466e-03, -9.620e-03, -6.831e-03, -1.007e-03, -1.137e-02, 1.575e-02, -1.520e-02, 3.770e-03, -1.629e-03, 9.599e-03, 1.072e-02, -8.331e-03, -2.625e-03)); + r += mul(s2_3, M4(-4.895e-02, -1.444e-02, -1.074e-02, 4.024e-02, -7.837e-02, 7.142e-03, -3.005e-02, 5.887e-02, 4.426e-02, -4.724e-02, 1.118e-01, 1.978e-01, 3.873e-01, -6.170e-02, 5.417e-01, 2.115e-01)); + r += mul(s2_4, M4(-4.664e-02, 5.652e-02, 1.062e-01, 7.868e-02, -4.444e-01, -2.378e-01, 2.168e-01, 2.783e-01, 2.544e-01, 2.526e-01, -2.176e-01, -3.524e-01, -1.380e-01, -4.658e-01, 7.754e-02, -5.062e-01)); + r += mul(s2_5, M4(3.824e-02, -7.471e-03, -3.021e-02, 2.526e-02, -9.479e-03, -8.237e-02, -1.161e-02, 7.648e-02, -1.067e-01, -1.000e-01, 2.901e-03, -3.454e-02, -2.220e-03, -3.343e-02, 4.048e-03, -1.751e-03)); + r += mul(s2_6, M4(1.680e-02, 1.039e-03, 5.104e-02, -1.713e-01, 2.749e-03, 4.469e-03, -4.546e-02, 1.381e-02, -6.014e-03, 1.247e-04, -6.902e-03, 6.958e-02, -5.066e-02, -6.274e-03, 2.876e-02, -1.837e-01)); + r += mul(s2_7, M4(1.057e-02, -2.665e-02, 8.209e-02, 2.316e-01, 3.331e-02, 3.507e-03, -1.734e-01, -1.194e-01, -9.633e-03, 4.334e-03, 4.570e-02, -3.032e-02, 1.151e-02, -4.714e-03, -8.234e-03, -2.184e-02)); + r += mul(s2_8, M4(2.067e-02, -3.817e-03, -2.124e-02, -3.790e-02, -1.628e-03, 1.466e-02, -1.040e-02, -4.732e-02, 8.216e-03, -3.982e-03, -2.452e-02, 3.031e-03, -3.218e-03, 5.065e-03, -2.036e-02, -4.540e-03)); + r += mul(s3_0, M4(-1.121e-02, 6.935e-03, 8.246e-03, -1.607e-02, 4.960e-03, 1.605e-02, 3.055e-03, -1.948e-02, -1.705e-02, 3.543e-02, 3.453e-02, -2.897e-03, 8.019e-02, 1.047e-01, 3.947e-03, -5.653e-02)); + r += mul(s3_1, M4(-1.800e-02, -1.445e-02, -1.291e-02, 4.545e-03, 1.890e-01, 1.792e-01, -1.108e-02, -3.760e-02, -5.770e-01, -4.943e-01, 5.098e-01, 2.238e-01, 7.838e-02, 3.965e-02, 2.951e-02, 7.738e-03)); + r += mul(s3_2, M4(-3.283e-03, -4.729e-02, -8.269e-03, 1.385e-03, -2.860e-02, -7.765e-02, 4.138e-02, 9.117e-04, -1.147e-02, -2.068e-01, -6.331e-03, 2.325e-01, 1.822e-02, 2.032e-02, -2.723e-03, -8.994e-04)); + r += mul(s3_3, M4(-6.536e-03, -3.618e-02, -7.814e-02, 6.080e-02, -6.788e-02, -5.059e-04, -2.543e-02, 5.836e-02, 7.723e-02, -2.090e-02, 1.357e-01, 1.821e-01, 2.609e-01, 1.207e-02, 2.234e-01, 2.822e-01)); + r += mul(s3_4, M4(-2.409e-01, 6.348e-01, 3.062e-01, -1.282e-01, -3.467e-01, -1.607e-01, -4.471e-03, 2.832e-01, 2.496e-01, 2.512e-01, -1.490e-01, -2.456e-01, -2.036e-01, -2.331e-01, -5.261e-02, -1.896e-01)); + r += mul(s3_5, M4(4.482e-02, -5.530e-02, -2.249e-02, 8.728e-03, 2.920e-01, -4.287e-01, -1.998e-01, 3.981e-01, -8.622e-02, -9.097e-02, -2.153e-02, -2.622e-02, 2.039e-03, -3.998e-02, 1.589e-02, -4.862e-03)); + r += mul(s3_6, M4(1.152e-02, -4.285e-02, 1.577e-01, -1.897e-01, -7.030e-03, 3.144e-03, -4.456e-02, 1.236e-02, -6.315e-03, 9.912e-04, -1.745e-02, 6.373e-02, -3.957e-02, -1.127e-02, 2.343e-02, -1.559e-01)); + r += mul(s3_7, M4(2.342e-02, -1.236e-02, -6.990e-02, 1.352e-01, 1.165e-02, 3.656e-03, -1.004e-01, -1.514e-01, -1.951e-02, -2.067e-04, 3.870e-02, -4.895e-02, 1.210e-02, -2.797e-03, -5.429e-02, 3.699e-02)); + r += mul(s3_8, M4(2.117e-02, 1.253e-02, -2.078e-02, -3.665e-03, -3.600e-02, 1.398e-02, -2.210e-02, -9.164e-02, 8.846e-04, 2.017e-04, -1.795e-02, 1.074e-02, -1.653e-03, 6.362e-03, -7.357e-03, -2.392e-02)); + r += V4(-1.409e-03, -1.878e-03, -5.303e-04, -8.141e-04); + return tanh(r); +} + +void Pass10(uint2 blockStart, uint3 tid) { + float2 pt = float2(GetInputPt()); + uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart; + uint2 size = GetOutputSize(); + if (gxy.x >= size.x || gxy.y >= size.y) { + return; + } + float2 pos = ((gxy >> 1) + 0.5) * pt; + + V4 s0_0 = l0(-1.0, -1.0); + V4 s0_1 = l0(0.0, -1.0); + V4 s0_2 = l0(1.0, -1.0); + V4 s0_3 = l0(-1.0, 0.0); + V4 s0_4 = l0(0.0, 0.0); + V4 s0_5 = l0(1.0, 0.0); + V4 s0_6 = l0(-1.0, 1.0); + V4 s0_7 = l0(0.0, 1.0); + V4 s0_8 = l0(1.0, 1.0); + V4 s1_0 = -max(-s0_0, 0.0); + V4 s1_1 = -max(-s0_1, 0.0); + V4 s1_2 = -max(-s0_2, 0.0); + V4 s1_3 = -max(-s0_3, 0.0); + V4 s1_4 = -max(-s0_4, 0.0); + V4 s1_5 = -max(-s0_5, 0.0); + V4 s1_6 = -max(-s0_6, 0.0); + V4 s1_7 = -max(-s0_7, 0.0); + V4 s1_8 = -max(-s0_8, 0.0); + s0_0 = max(s0_0, 0.0); + s0_1 = max(s0_1, 0.0); + s0_2 = max(s0_2, 0.0); + s0_3 = max(s0_3, 0.0); + s0_4 = max(s0_4, 0.0); + s0_5 = max(s0_5, 0.0); + s0_6 = max(s0_6, 0.0); + s0_7 = max(s0_7, 0.0); + s0_8 = max(s0_8, 0.0); + + V4 s2_0 = l1(-1.0, -1.0); + V4 s2_1 = l1(0.0, -1.0); + V4 s2_2 = l1(1.0, -1.0); + V4 s2_3 = l1(-1.0, 0.0); + V4 s2_4 = l1(0.0, 0.0); + V4 s2_5 = l1(1.0, 0.0); + V4 s2_6 = l1(-1.0, 1.0); + V4 s2_7 = l1(0.0, 1.0); + V4 s2_8 = l1(1.0, 1.0); + V4 s3_0 = -max(-s2_0, 0.0); + V4 s3_1 = -max(-s2_1, 0.0); + V4 s3_2 = -max(-s2_2, 0.0); + V4 s3_3 = -max(-s2_3, 0.0); + V4 s3_4 = -max(-s2_4, 0.0); + V4 s3_5 = -max(-s2_5, 0.0); + V4 s3_6 = -max(-s2_6, 0.0); + V4 s3_7 = -max(-s2_7, 0.0); + V4 s3_8 = -max(-s2_8, 0.0); + s2_0 = max(s2_0, 0.0); + s2_1 = max(s2_1, 0.0); + s2_2 = max(s2_2, 0.0); + s2_3 = max(s2_3, 0.0); + s2_4 = max(s2_4, 0.0); + s2_5 = max(s2_5, 0.0); + s2_6 = max(s2_6, 0.0); + s2_7 = max(s2_7, 0.0); + s2_8 = max(s2_8, 0.0); + + V4 r = f0(s0_0, s0_1, s0_2, s0_3, s0_4, s0_5, s0_6, s0_7, s0_8, s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s3_0, s3_1, s3_2, s3_3, s3_4, s3_5, s3_6, s3_7, s3_8); + + static const float3x3 rgb2yuv = {0.299, 0.587, 0.114, -0.169, -0.331, 0.5, 0.5, -0.419, -0.081}; + static const float3x3 yuv2rgb = {1, -0.00093, 1.401687, 1, -0.3437, -0.71417, 1, 1.77216, 0.00099}; + float2 opt = float2(GetOutputPt()); + + pos -= 0.5f * opt; + float3 yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.x), yuv.yz)), 1); + + ++gxy.x; + pos.x += opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.y), yuv.yz)), 1); + + ++gxy.y; + pos.y += opt.y; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.w), yuv.yz)), 1); + + --gxy.x; + pos.x -= opt.x; + yuv = mul(rgb2yuv, INPUT.SampleLevel(SL, pos, 0).rgb); + OUTPUT[gxy] = float4(mul(yuv2rgb, float3(saturate(yuv.r + r.z), yuv.yz)), 1); +} diff --git a/src/Effects/Deband.hlsl b/src/Effects/Deband.hlsl index a67a4d7c1..d2e7fe921 100644 --- a/src/Effects/Deband.hlsl +++ b/src/Effects/Deband.hlsl @@ -2,9 +2,7 @@ // Port from https://github.com/haasn/gentoo-conf/blob/xor/home/nand/.mpv/shaders/deband.glsl //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!PARAMETER //!LABEL Threshold @@ -54,6 +52,11 @@ float grain; //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!SAMPLER //!FILTER LINEAR SamplerState sam; @@ -66,6 +69,7 @@ SamplerState sam1; //!PASS 1 //!STYLE PS //!IN INPUT +//!OUT OUTPUT // Wide usage friendly PRNG, shamelessly stolen from a GLSL tricks forum post float mod289(float x) { return x - floor(x / 289.0) * 289.0; } diff --git a/src/Effects/Effects.vcxproj b/src/Effects/Effects.vcxproj index bbb6bf1e2..24f4929fe 100644 --- a/src/Effects/Effects.vcxproj +++ b/src/Effects/Effects.vcxproj @@ -29,36 +29,24 @@ Document - - Document - Document Document - - Document - - + Document - + Document - - Document - - Document - - Document @@ -71,20 +59,14 @@ Document - - Document Document - - - - Document @@ -160,8 +142,6 @@ Document - - Document @@ -177,8 +157,6 @@ Document - - Document @@ -197,13 +175,9 @@ Document - - Document - - Document @@ -213,56 +187,153 @@ Document - - + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + Document + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + Document - - - - - - + + Document + + + + + + + + + + + + + + + + Document + + + Document + Document + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + Document - - + + Document + Document Document - - - - Document Document - - Document Document - - Document @@ -275,8 +346,6 @@ Document - - Document @@ -286,11 +355,81 @@ Document - - Document + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + + + Document + \ No newline at end of file diff --git a/src/Effects/Effects.vcxproj.filters b/src/Effects/Effects.vcxproj.filters index 756d193e8..fd851ffae 100644 --- a/src/Effects/Effects.vcxproj.filters +++ b/src/Effects/Effects.vcxproj.filters @@ -2,11 +2,8 @@ - - - @@ -59,7 +56,7 @@ Anime4K - + Anime4K @@ -156,24 +153,159 @@ FXAA + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + RAVU - + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + + RAVU + + RAVU RAVU - + RAVU + + NNEDI3 + + + NNEDI3 + NNEDI3 + + NNEDI3 + + + NNEDI3 + + + NNEDI3 + + + NNEDI3 + + + NNEDI3 + + + NNEDI3 + NNEDI3 + + NNEDI3 + NIS @@ -220,6 +352,75 @@ Pixel Art + + + + Anime4K + + + Anime4K + + + CuNNy + + + CuNNy + + + CuNNy + + + CuNNy + + + CuNNy + + + CuNNy + + + CuNNy + + + CuNNy + + + CuNNy + + + CuNNy + + + CuNNy + + + CuNNy + + + CuNNy + + + CuNNy + + + CuNNy + + + CuNNy + + + CuNNy + + + CuNNy + + + CuNNy + + + CuNNy + + @@ -261,5 +462,8 @@ {0b58f073-84cb-4c38-919d-80176ae408bc} + + {9157745b-aa96-42ce-bdc6-1230dffa326b} + \ No newline at end of file diff --git a/src/Effects/FSR/FSR_EASU.hlsl b/src/Effects/FSR/FSR_EASU.hlsl index aedd94126..c6419b9eb 100644 --- a/src/Effects/FSR/FSR_EASU.hlsl +++ b/src/Effects/FSR/FSR_EASU.hlsl @@ -2,11 +2,13 @@ // 移植自 https://github.com/GPUOpen-Effects/FidelityFX-FSR/blob/master/ffx-fsr/ffx_fsr1.h //!MAGPIE EFFECT -//!VERSION 3 +//!VERSION 4 //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; //!SAMPLER //!FILTER POINT @@ -15,6 +17,7 @@ SamplerState sam; //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -228,12 +231,13 @@ float3 FsrEasuF(uint2 pos, float4 con0, float4 con1, float4 con2, float2 con3) { void Pass1(uint2 blockStart, uint3 threadId) { uint2 gxy = blockStart + Rmp8x8(threadId.x); - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } uint2 inputSize = GetInputSize(); - uint2 outputSize = GetOutputSize(); float2 inputPt = GetInputPt(); float4 con0, con1, con2; @@ -271,20 +275,20 @@ void Pass1(uint2 blockStart, uint3 threadId) { con3[0] = 0; con3[1] = 4.0f * inputPt.y; - WriteToOutput(gxy, FsrEasuF(gxy, con0, con1, con2, con3)); + OUTPUT[gxy] = float4(FsrEasuF(gxy, con0, con1, con2, con3), 1); gxy.x += 8u; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, FsrEasuF(gxy, con0, con1, con2, con3)); + if (gxy.x < outputSize.x && gxy.y < outputSize.y) { + OUTPUT[gxy] = float4(FsrEasuF(gxy, con0, con1, con2, con3), 1); } gxy.y += 8u; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, FsrEasuF(gxy, con0, con1, con2, con3)); + if (gxy.x < outputSize.x && gxy.y < outputSize.y) { + OUTPUT[gxy] = float4(FsrEasuF(gxy, con0, con1, con2, con3), 1); } gxy.x -= 8u; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, FsrEasuF(gxy, con0, con1, con2, con3)); + if (gxy.x < outputSize.x && gxy.y < outputSize.y) { + OUTPUT[gxy] = float4(FsrEasuF(gxy, con0, con1, con2, con3), 1); } } diff --git a/src/Effects/FSR/FSR_RCAS.hlsl b/src/Effects/FSR/FSR_RCAS.hlsl index 6409c7229..6750fbde9 100644 --- a/src/Effects/FSR/FSR_RCAS.hlsl +++ b/src/Effects/FSR/FSR_RCAS.hlsl @@ -2,9 +2,7 @@ // 移植自 https://github.com/GPUOpen-Effects/FidelityFX-FSR/blob/master/ffx-fsr/ffx_fsr1.h //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!PARAMETER @@ -18,12 +16,19 @@ float sharpness; //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; + //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -108,7 +113,9 @@ float3 FsrRcasF(float3 b, float3 d, float3 e, float3 f, float3 h) { void Pass1(uint2 blockStart, uint3 threadId) { uint2 gxy = blockStart + (Rmp8x8(threadId.x) << 1); - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -126,20 +133,20 @@ void Pass1(uint2 blockStart, uint3 threadId) { src[3][1] = INPUT.Load(int3(gxy.x + 2, gxy.y, 0)).rgb; src[3][2] = INPUT.Load(int3(gxy.x + 2, gxy.y + 1, 0)).rgb; - WriteToOutput(gxy, FsrRcasF(src[1][0], src[0][1], src[1][1], src[2][1], src[1][2])); + OUTPUT[gxy] = float4(FsrRcasF(src[1][0], src[0][1], src[1][1], src[2][1], src[1][2]), 1); ++gxy.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, FsrRcasF(src[2][0], src[1][1], src[2][1], src[3][1], src[2][2])); + if (gxy.x < outputSize.x && gxy.y < outputSize.y) { + OUTPUT[gxy] = float4(FsrRcasF(src[2][0], src[1][1], src[2][1], src[3][1], src[2][2]), 1); } ++gxy.y; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, FsrRcasF(src[2][1], src[1][2], src[2][2], src[3][2], src[2][3])); + if (gxy.x < outputSize.x && gxy.y < outputSize.y) { + OUTPUT[gxy] = float4(FsrRcasF(src[2][1], src[1][2], src[2][2], src[3][2], src[2][3]), 1); } --gxy.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, FsrRcasF(src[1][1], src[0][2], src[1][2], src[2][2], src[1][3])); + if (gxy.x < outputSize.x && gxy.y < outputSize.y) { + OUTPUT[gxy] = float4(FsrRcasF(src[1][1], src[0][2], src[1][2], src[2][2], src[1][3]), 1); } } diff --git a/src/Effects/FSRCNNX/FSRCNNX.hlsl b/src/Effects/FSRCNNX/FSRCNNX.hlsl index 4a2e33a91..7c0088e31 100644 --- a/src/Effects/FSRCNNX/FSRCNNX.hlsl +++ b/src/Effects/FSRCNNX/FSRCNNX.hlsl @@ -3,14 +3,17 @@ //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 +//!VERSION 4 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + //!TEXTURE //!WIDTH INPUT_WIDTH //!HEIGHT INPUT_HEIGHT @@ -534,6 +537,7 @@ void Pass5(uint2 blockStart, uint3 threadId) { //!PASS 6 //!DESC sub-pixel convolution, aggregation //!IN tex3, tex4, INPUT +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -551,7 +555,8 @@ const static float3x3 yuv2rgb = { void Pass6(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -609,15 +614,9 @@ void Pass6(uint2 blockStart, uint3 threadId) { for (uint j = 0; j <= 1; ++j) { const uint2 destPos = gxy + uint2(i, j); - if (i != 0 || j != 0) { - if (!CheckViewport(destPos)) { - continue; - } - } - float2 originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * outputPt, 0).rgb); const uint index = i * 2 + j; - WriteToOutput(destPos, mul(yuv2rgb, float3(result[index], originUV))); + OUTPUT[destPos] = float4(mul(yuv2rgb, float3(result[index], originUV)), 1); } } } diff --git a/src/Effects/FSRCNNX/FSRCNNX_LineArt.hlsl b/src/Effects/FSRCNNX/FSRCNNX_LineArt.hlsl index fdc760c52..459ac5b8a 100644 --- a/src/Effects/FSRCNNX/FSRCNNX_LineArt.hlsl +++ b/src/Effects/FSRCNNX/FSRCNNX_LineArt.hlsl @@ -2,14 +2,17 @@ // 移植自 https://github.com/igv/FSRCNN-TensorFlow //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 +//!VERSION 4 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + //!TEXTURE //!WIDTH INPUT_WIDTH //!HEIGHT INPUT_HEIGHT @@ -531,6 +534,7 @@ void Pass5(uint2 blockStart, uint3 threadId) { //!PASS 6 //!DESC sub-pixel convolution, aggregation //!IN tex3, tex4, INPUT +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -548,7 +552,8 @@ const static float3x3 yuv2rgb = { void Pass6(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -606,15 +611,9 @@ void Pass6(uint2 blockStart, uint3 threadId) { for (uint j = 0; j <= 1; ++j) { const uint2 destPos = gxy + uint2(i, j); - if (i != 0 || j != 0) { - if (!CheckViewport(destPos)) { - continue; - } - } - float2 originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * outputPt, 0).rgb); const uint index = i * 2 + j; - WriteToOutput(destPos, mul(yuv2rgb, float3(result[index], originUV))); + OUTPUT[destPos] = float4(mul(yuv2rgb, float3(result[index], originUV)), 1); } } } diff --git a/src/Effects/FXAA/FXAA_High.hlsl b/src/Effects/FXAA/FXAA_High.hlsl index 6d0257add..2cd9115a4 100644 --- a/src/Effects/FXAA/FXAA_High.hlsl +++ b/src/Effects/FXAA/FXAA_High.hlsl @@ -1,15 +1,18 @@ // 移植自 https://github.com/libretro/slang-shaders/blob/master/anti-aliasing/shaders/fxaa.slang //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!SORT_NAME FXAA_1 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!SAMPLER //!FILTER LINEAR SamplerState sam; @@ -17,6 +20,7 @@ SamplerState sam; //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -26,7 +30,9 @@ SamplerState sam; void Pass1(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -57,14 +63,7 @@ void Pass1(uint2 blockStart, uint3 threadId) { [unroll] for (j = 0; j <= 1; ++j) { uint2 destPos = gxy + uint2(i, j); - - if (i != 0 || j != 0) { - if (!CheckViewport(gxy)) { - return; - } - } - - WriteToOutput(destPos, FXAA(src, i + 1, j + 1, INPUT, sam, (destPos + 0.5f) * inputPt, inputPt)); + OUTPUT[destPos] = float4(FXAA(src, i + 1, j + 1, INPUT, sam, (destPos + 0.5f) * inputPt, inputPt), 1); } } } diff --git a/src/Effects/FXAA/FXAA_Medium.hlsl b/src/Effects/FXAA/FXAA_Medium.hlsl index 5e8c4ae7d..f5c67268e 100644 --- a/src/Effects/FXAA/FXAA_Medium.hlsl +++ b/src/Effects/FXAA/FXAA_Medium.hlsl @@ -1,15 +1,18 @@ // 移植自 https://github.com/libretro/slang-shaders/blob/master/anti-aliasing/shaders/fxaa.slang //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!SORT_NAME FXAA_0 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!SAMPLER //!FILTER LINEAR SamplerState sam; @@ -17,6 +20,7 @@ SamplerState sam; //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -26,7 +30,9 @@ SamplerState sam; void Pass1(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -57,14 +63,7 @@ void Pass1(uint2 blockStart, uint3 threadId) { [unroll] for (j = 0; j <= 1; ++j) { uint2 destPos = gxy + uint2(i, j); - - if (i != 0 || j != 0) { - if (!CheckViewport(gxy)) { - return; - } - } - - WriteToOutput(destPos, FXAA(src, i + 1, j + 1, INPUT, sam, (destPos + 0.5f) * inputPt, inputPt)); + OUTPUT[destPos] = float4(FXAA(src, i + 1, j + 1, INPUT, sam, (destPos + 0.5f) * inputPt, inputPt), 1); } } } diff --git a/src/Effects/FXAA/FXAA_Ultra.hlsl b/src/Effects/FXAA/FXAA_Ultra.hlsl index 3a748f99c..ff3c1235c 100644 --- a/src/Effects/FXAA/FXAA_Ultra.hlsl +++ b/src/Effects/FXAA/FXAA_Ultra.hlsl @@ -1,15 +1,18 @@ // 移植自 https://github.com/libretro/slang-shaders/blob/master/anti-aliasing/shaders/fxaa.slang //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!SORT_NAME FXAA_2 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!SAMPLER //!FILTER LINEAR SamplerState sam; @@ -17,6 +20,7 @@ SamplerState sam; //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -26,7 +30,9 @@ SamplerState sam; void Pass1(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -57,14 +63,7 @@ void Pass1(uint2 blockStart, uint3 threadId) { [unroll] for (j = 0; j <= 1; ++j) { uint2 destPos = gxy + uint2(i, j); - - if (i != 0 || j != 0) { - if (!CheckViewport(gxy)) { - return; - } - } - - WriteToOutput(destPos, FXAA(src, i + 1, j + 1, INPUT, sam, (destPos + 0.5f) * inputPt, inputPt)); + OUTPUT[destPos] = float4(FXAA(src, i + 1, j + 1, INPUT, sam, (destPos + 0.5f) * inputPt, inputPt), 1); } } } diff --git a/src/Effects/ImageAdjustment.hlsl b/src/Effects/ImageAdjustment.hlsl index 91807a9fa..907e6e42a 100644 --- a/src/Effects/ImageAdjustment.hlsl +++ b/src/Effects/ImageAdjustment.hlsl @@ -1,9 +1,7 @@ // 移植自 https://github.com/libretro/slang-shaders/blob/3f67e1870dbd5be74ae2f09eaed0eeadce6abd15/misc/image-adjustment.slang //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!PARAMETER @@ -89,6 +87,11 @@ float b; //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -97,6 +100,7 @@ SamplerState sam; //!PASS 1 //!STYLE PS //!IN INPUT +//!OUT OUTPUT float3 RGBtoHSV(float3 c) { float4 K = float4(0.0, -1.0 / 3.0, 2.0 / 3.0, -1.0); diff --git a/src/Effects/Jinc.hlsl b/src/Effects/Jinc.hlsl index a4d2bfa9b..b6cf0cd0e 100644 --- a/src/Effects/Jinc.hlsl +++ b/src/Effects/Jinc.hlsl @@ -10,8 +10,7 @@ // B = 0.825 to get rid of dithering. Increase B to get a fine sharpness, though dithering returns. //!MAGPIE EFFECT -//!VERSION 3 -//!GENERIC_DOWNSCALER +//!VERSION 4 //!PARAMETER @@ -41,6 +40,9 @@ float ARStrength; //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -48,6 +50,7 @@ SamplerState sam; //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 8 //!NUM_THREADS 64 @@ -70,7 +73,9 @@ float4 resampler(float4 x, float wa, float wb) { void Pass1(uint2 blockStart, uint3 threadId) { uint2 gxy = Rmp8x8(threadId.x) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -126,5 +131,5 @@ void Pass1(uint2 blockStart, uint3 threadId) { color = lerp(color, clamp(color, min_sample, max_sample), ARStrength); // final sum and weight normalization - WriteToOutput(gxy, color); + OUTPUT[gxy] = float4(color, 1); } diff --git a/src/Effects/Lanczos.hlsl b/src/Effects/Lanczos.hlsl index 3a32e81fd..ed980933b 100644 --- a/src/Effects/Lanczos.hlsl +++ b/src/Effects/Lanczos.hlsl @@ -2,8 +2,7 @@ // 移植自 https://github.com/libretro/common-shaders/blob/master/windowed/shaders/lanczos6.cg //!MAGPIE EFFECT -//!VERSION 3 -//!GENERIC_DOWNSCALER +//!VERSION 4 //!PARAMETER @@ -17,6 +16,9 @@ float ARStrength; //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -25,6 +27,7 @@ SamplerState sam; //!PASS 1 //!STYLE PS //!IN INPUT +//!OUT OUTPUT #define FIX(c) max(abs(c), 1e-5) #define PI 3.14159265359 diff --git a/src/Effects/NIS/NIS.hlsl b/src/Effects/NIS/NIS.hlsl index 79ef03552..957e86857 100644 --- a/src/Effects/NIS/NIS.hlsl +++ b/src/Effects/NIS/NIS.hlsl @@ -1,7 +1,7 @@ // 移植自 https://github.com/NVIDIAGameWorks/NVIDIAImageScaling/blob/main/NIS/NIS_Scaler.h //!MAGPIE EFFECT -//!VERSION 3 +//!VERSION 4 //!PARAMETER @@ -15,6 +15,9 @@ float sharpness; //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; + //!TEXTURE //!SOURCE Coef_Scale.dds //!FORMAT R16G16B16A16_FLOAT @@ -32,6 +35,7 @@ SamplerState samplerLinearClamp; //!PASS 1 //!IN INPUT, coef_scaler, coef_usm +//!OUT OUTPUT //!BLOCK_SIZE 32,32 //!NUM_THREADS 256 @@ -431,12 +435,18 @@ void Pass1(uint2 blockStart, uint3 threadId) { // discretized phase const int fx_int = int(fx * kPhaseCount); + const uint2 outputSize = GetOutputSize(); + if (dstX >= outputSize.x) { + return; + } + for (int k = 0; k < NIS_BLOCK_WIDTH * NIS_BLOCK_HEIGHT / NIS_THREAD_GROUP_SIZE; ++k) { // y coord inside the output image const int dstY = dstBlockY + pos.y + k * (NIS_THREAD_GROUP_SIZE / NIS_BLOCK_WIDTH); - if (!CheckViewport(int2(dstX, dstY))) { + if (dstY >= outputSize.y) { return; } + // y coord inside the input image const float srcY = (0.5f + dstY) * kScaleY - 0.5f; @@ -487,13 +497,13 @@ void Pass1(uint2 blockStart, uint3 threadId) { // do bilinear tap for chroma upscaling - float3 op = INPUT.SampleLevel(samplerLinearClamp, float2((srcX + 0.5f) * kSrcNormX, (srcY + 0.5f) * kSrcNormY), 0).rgb; + float4 op = INPUT.SampleLevel(samplerLinearClamp, float2((srcX + 0.5f) * kSrcNormX, (srcY + 0.5f) * kSrcNormY), 0); const float corr = opY * (1.0f / NIS_SCALE_FLOAT) - getY(float3(op.x, op.y, op.z)); op.x += corr; op.y += corr; op.z += corr; - WriteToOutput(uint2(dstX, dstY), op); + OUTPUT[uint2(dstX, dstY)] = op; } } diff --git a/src/Effects/NIS/NVSharpen.hlsl b/src/Effects/NIS/NVSharpen.hlsl index 8ed729fea..8abd4734a 100644 --- a/src/Effects/NIS/NVSharpen.hlsl +++ b/src/Effects/NIS/NVSharpen.hlsl @@ -1,18 +1,9 @@ // 移植自 https://github.com/NVIDIAGameWorks/NVIDIAImageScaling/blob/main/NIS/NIS_Scaler.h //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 -//!TEXTURE -Texture2D INPUT; - -//!SAMPLER -//!FILTER LINEAR -SamplerState samplerLinearClamp; - //!PARAMETER //!LABEL Sharpness //!DEFAULT 0.5 @@ -21,9 +12,22 @@ SamplerState samplerLinearClamp; //!STEP 0.01 float sharpness; +//!TEXTURE +Texture2D INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState samplerLinearClamp; + //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 32, 32 //!NUM_THREADS 256 @@ -208,6 +212,8 @@ void Pass1(uint2 blockStart, uint3 threadId) { GroupMemoryBarrierWithGroupSync(); + const int2 outputSize = (int2)GetOutputSize(); + for (int k = int(threadIdx); k < NIS_BLOCK_WIDTH * NIS_BLOCK_HEIGHT; k += NIS_THREAD_GROUP_SIZE) { const int2 pos = int2(uint(k) % uint(NIS_BLOCK_WIDTH), uint(k) / uint(NIS_BLOCK_WIDTH)); @@ -215,7 +221,7 @@ void Pass1(uint2 blockStart, uint3 threadId) { const int dstX = dstBlockX + pos.x; const int dstY = dstBlockY + pos.y; - if (!CheckViewport(int2(dstX, dstY))) { + if (dstX >= outputSize.x || dstY >= outputSize.y) { continue; } @@ -238,9 +244,9 @@ void Pass1(uint2 blockStart, uint3 threadId) { // final USM is a weighted sum filter outputs const float usmY = (dirUSM.x * w.x + dirUSM.y * w.y + dirUSM.z * w.z + dirUSM.w * w.w); - float3 op = INPUT.SampleLevel(samplerLinearClamp, float2((dstX + 0.5f) * kSrcNormX, (dstY + 0.5f) * kSrcNormY), 0).rgb; + float4 op = INPUT.SampleLevel(samplerLinearClamp, float2((dstX + 0.5f) * kSrcNormX, (dstY + 0.5f) * kSrcNormY), 0); op += usmY; - WriteToOutput(uint2(dstX, dstY), op); + OUTPUT[uint2(dstX, dstY)] = op; } } diff --git a/src/Effects/NNEDI3/NNEDI3_nns128_win8x4.hlsl b/src/Effects/NNEDI3/NNEDI3_nns128_win8x4.hlsl new file mode 100644 index 000000000..8174df620 --- /dev/null +++ b/src/Effects/NNEDI3/NNEDI3_nns128_win8x4.hlsl @@ -0,0 +1,4024 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: nnedi3.py --nns 128 --win 8x4 --use-compute-shader --use-magpie +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME NNEDI3_128_4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 1 * 2 +//!HEIGHT INPUT_HEIGHT * 2 * 1 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!FORMAT R16_FLOAT +//!WIDTH INPUT_WIDTH * 1 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D temp; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_temp; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 2 + +//!PASS 1 +//!DESC NNEDI3 (double_y, nns128, win8x4) +//!IN INPUT +//!OUT temp +//!BLOCK_SIZE 32, 16 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[8]) { + float sum = 0.0, sumsq = 0.0; + [unroll] for (int i = 0; i < 8; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); + sumsq += dot(samples[i], samples[i]); + } + float mstd0 = sum / 32.0; + float mstd1 = sumsq / 32.0 - mstd0 * mstd0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); + mstd1 *= mstd2; + float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = W(0, -1106336704, -1104338237, 1041734649, -1112016931) + + W(1, -1134422686, -1108550970, -1135080854, -1117701445) + + W(2, -1097058997, 1036063029, 1053379177, -1099451832) + + W(3, -1096794255, 1068454954, 1068510503, -1092648385) + + W(4, -1096428577, 1045547931, -1114733521, -1101312270) + + W(5, -1108910446, -1102009308, -1146436468, -1118387353) + + W(6, -1110459731, -1122481515, -1107441476, -1118789516) + + W(7, -1101174078, 1017740065, -1113618698, -1107281979); + sum2 = W(0, -1107960738, 1034572820, -1105088633, -1147785184) + + W(1, 1022891960, 1003346080, -1103397515, 1035529310) + + W(2, -1112828174, -1096749055, 1056062200, -1105585021) + + W(3, -1099542471, 1067566060, -1127174872, 1034766166) + + W(4, 1028527356, -1090311143, 1048792675, -1117109292) + + W(5, -1124169384, 1028295012, -1106700271, -1116900804) + + W(6, 1003609248, -1101382023, 1033154740, -1121486356) + + W(7, 1019646072, -1102277943, 1037356874, -1114899026); + WS(-1075683047, -1087125913); + sum1 = W(0, 1042687663, 1052977914, -1087267578, 1041384601) + W(1, 1034752002, 1046022844, -1097639380, 1034807969) + + W(2, 1020877136, 1051043629, -1087388173, 1037740431) + W(3, 1043598702, 1049374510, -1095656786, 1044538321) + + W(4, 1032834596, 1044576234, -1108370107, -1113942663) + + W(5, 1049059763, -1108783446, -1101488315, 1043415765) + + W(6, 1041786741, -1105784362, -1111228387, 1011031587) + + W(7, 1050972591, -1089957133, -1108747496, 1034104676); + sum2 = W(0, -1129327146, -1112916736, -1109246006, -1168639863) + + W(1, -1149557934, -1120018795, 1012425107, -1122303521) + + W(2, 1027469999, -1105442406, 1044505132, -1114911422) + + W(3, -1116270499, 1056936993, 1054092693, -1109477794) + + W(4, -1107094547, 1036431709, 1015080154, -1122292075) + + W(5, -1114668504, 1029410453, -1102188513, 1038663029) + + W(6, -1136313139, -1126491682, -1123974759, -1124857486) + + W(7, -1115408970, -1112281372, -1115235380, 1004657207); + WS(1042212664, -1102203684); + sum1 = W(0, -1140902818, 1040147851, 1035472140, -1111794429) + + W(1, 1019874102, -1104962202, 1045102086, 1033944684) + + W(2, -1138465404, -1102798136, 1044852023, -1122108180) + + W(3, 1041713132, -1085739950, 1052255697, 1042129278) + W(4, 1031911135, -1090423514, 1050093153, 1026181805) + + W(5, 1032244040, -1096835716, -1114874577, 1045351411) + + W(6, -1122713961, 1015091005, -1125530488, 1045226448) + + W(7, 1032066151, -1100137137, 1025859399, 1037723887); + sum2 = W(0, 1025219442, -1109152196, 1026944460, -1136900198) + + W(1, -1119436548, -1122141349, 1040314652, -1112897189) + + W(2, 1041363772, 1032935700, -1124096755, -1144192892) + + W(3, 1026037858, -1095545125, -1132954458, 1046665648) + + W(4, -1106963956, 1029288712, 1063106013, -1112667488) + + W(5, -1148187164, 1048235476, 1044093468, -1091150222) + + W(6, -1115265228, 1017807431, -1119277141, -1098670428) + + W(7, 1027834104, -1124373883, -1115291573, -1099776205); + WS(-1106435384, 1038338229); + sum1 = W(0, 1040095954, 1057302312, -1085504617, 1038469973) + W(1, 1019822615, 1048464174, -1109673285, 1008999077) + + W(2, 1037825128, 1048602738, -1088007186, 1047759857) + W(3, 1041126536, 1046177110, -1089584127, 1042217259) + + W(4, 1042659628, -1113571175, -1112427435, -1122205838) + + W(5, 1046281616, 1010729037, -1099904421, 1042535855) + W(6, 1025627498, -1116720143, 985851280, 1020820192) + + W(7, 1052688116, -1090676327, -1109333660, 1034203799); + sum2 = W(0, 1016257202, 1059204215, 1078138204, 1045786159) + W(1, 1025056091, 1059357388, 1057901024, -1115583727) + + W(2, -1123344133, -1096239491, -1075026282, 1040463834) + + W(3, -1117490597, -1080318626, -1073900782, 1043123901) + + W(4, -1139226916, -1097775023, 1041808545, 1021989454) + + W(5, -1121712633, -1121076469, 1040778731, -1105966927) + + W(6, 1032149092, -1108589001, 1040543247, 1015040126) + + W(7, 1018740994, 1042934709, 1013253724, -1147555881); + WS(-1079558823, -1098708322); + sum1 = W(0, -1107155318, -1102044491, -1157836704, 1018716763) + + W(1, 1025234303, -1098872409, 1050456336, 1032647774) + W(2, 1015859078, -1087610848, 1055919296, 1043448582) + + W(3, 1036194249, -1086405173, 1051086989, 1041211303) + W(4, -1109510417, 1049149960, 1049231261, 1032229161) + + W(5, -1106597869, -1113006787, 1047976577, -1113988824) + + W(6, -1106784003, 1034815041, 1037659280, -1114873522) + + W(7, -1107821257, -1109243348, 1045770027, -1115826050); + sum2 = W(0, -1115121954, 1038073794, -1128914993, -1116577465) + + W(1, 1012713185, -1114116718, 1041575276, -1124395325) + + W(2, -1120959275, 1048906430, 1074552079, 1055266270) + W(3, 979525395, -1089285214, -1076455267, 1060162115) + + W(4, 1031308048, 1044939824, -1081125046, -1098098687) + W(5, 1023425822, 960279117, -1109266711, 1021768571) + + W(6, 1008898041, -1114246838, 1028966618, -1124862202) + + W(7, 1015643115, -1116674868, -1116558128, 1013382601); + WS(1045132600, -1120782135); + sum1 = W(0, -1105751784, 1057393815, -1105555137, -1117651397) + + W(1, -1105178898, 1048859946, -1097544977, 1030868229) + + W(2, -1104184899, 1059464001, -1093321826, -1113548192) + + W(3, -1114219853, 1046945589, -1096850414, -1114482291) + + W(4, -1110381909, -1115358437, 1045680426, -1100710867) + + W(5, 1038688878, -1101266388, 1054674458, -1101910270) + + W(6, -1129240105, 1006928425, 1040711722, -1109195062) + + W(7, 1028338913, 1001553277, 1051440502, -1106616547); + sum2 = + W(0, 1017265392, 1041015042, -1100279165, 1015388608) + W(1, -1109811903, 1031690548, -1110095441, -1118750728) + + W(2, 1023756444, 1041543835, -1121800962, 1018680528) + W(3, -1113627340, 1061785433, 1052269299, -1143924414) + + W(4, 1014378799, -1112539854, 1027655784, -1099334823) + + W(5, -1145950526, 1033767962, -1103181542, 1020584168) + + W(6, -1129429444, -1111764061, -1110834939, -1108053853) + + W(7, 1013677319, -1098545742, -1106577514, 1036964648); + WS(1052573852, -1122511817); + sum1 = + W(0, -1122539822, -1107197216, -1123108403, 1051517474) + W(1, 1036520122, -1104137920, 1038549849, 1023654291) + + W(2, -1150669710, -1095755763, 1032946604, 1039184399) + + W(3, -1131456914, -1089160552, 1050538028, 1041388597) + + W(4, 1027717464, -1090293588, 1052989852, -1168884669) + W(5, 1032118773, -1090056564, 1057974832, 1033029075) + + W(6, 1021054902, -1092174087, 1053464075, 1028526105) + W(7, 1030809195, -1086001910, 1057808582, 1041880934); + sum2 = W(0, -1085894681, -1108201039, 1070882268, -1081052443) + + W(1, 1038192816, -1090848333, -1093216289, -1102549623) + + W(2, -1098445483, 1052221636, 1067999858, -1087793608) + + W(3, -1102898046, 1071505414, 1073865984, -1090403840) + + W(4, 1008806191, -1098981857, 1058878464, -1095780376) + + W(5, 1050638036, -1086351349, -1083525828, 1055133376) + + W(6, -1102617523, -1096799400, -1107159037, -1096503933) + + W(7, 1057325981, -1088620184, -1083206959, 1053790412); + WS(-1073219892, -1080326418); + sum1 = W(0, -1114077580, -1101602915, 1057495924, -1151047116) + + W(1, -1142635506, -1103425331, 1053716920, -1114784094) + + W(2, -1142955026, -1090278522, 1055741332, -1111276924) + + W(3, -1123237842, -1092662145, 1051447237, -1121668701) + + W(4, 1038107057, -1091646784, 1052494629, 1000704387) + + W(5, -1103324917, -1114692451, 1049044089, -1109204280) + + W(6, 1026598572, -1101848592, 1043005848, 1010568075) + + W(7, -1095875035, 1038718196, 1047828379, -1156952092); + sum2 = + W(0, -1121663397, -1095443331, -1067370736, 1005231721) + W(1, -1116542136, 1050195534, -1071776975, 974104521) + + W(2, 1042431779, -1123573301, 1068672794, -1111985184) + W(3, 1037730633, -1100504352, 1077012119, 1047585075) + + W(4, 1010053773, 1043186658, 1070664889, 1016528922) + W(5, -1137348615, -1102861372, 1045361006, -1109020232) + + W(6, 1028087910, -1110454800, -1123945395, 1037668055) + + W(7, -1119025600, 1040229501, 1027642294, -1117277443); + WS(-1130027456, 1047744266); + sum1 = + W(0, 1030328637, -1103440100, 1050534604, -1130350917) + W(1, -1100312969, 1043447132, 1038395889, -1105733872) + + W(2, 1036569044, -1096543805, 1037345619, 1043114871) + W(3, 1044449278, -1093151398, -1106526388, 1022399622) + + W(4, -1106699970, 1024264999, -1115006287, 1020228332) + W(5, 1046212150, -1115860524, 1035372917, 1024836117) + + W(6, -1104762293, 1041344218, 980910400, -1123108872) + + W(7, -1124975021, 1032410566, 1044066628, -1112803412); + sum2 = + W(0, 1029880722, 1033391383, -1137819024, -1114516031) + W(1, -1098453144, 1041649400, -1137377888, 1025620716) + + W(2, 1029103498, -1095624145, 1040796761, 1035240799) + W(3, 1067660000, -1082091368, -1080643124, 1066795978) + + W(4, -1094604015, 1056223691, -1102259819, -1098376459) + + W(5, 1046293202, 1027973488, -1121978412, 1036814117) + + W(6, -1104816365, 1034193475, -1148596400, -1107858514) + + W(7, -1117927817, 1034286113, 1041009105, -1114981924); + WS(1049012636, 1037011386); + sum1 = W(0, -1108102978, -1097922903, 1049818257, 1041216474) + + W(1, 1042768739, -1089977373, 1052134241, -1106782690) + + W(2, 1036457352, -1083790961, 1059747641, -1120768141) + W(3, -1105146600, 1050515995, 1062551839, 975424142) + + W(4, -1100621772, 1053649082, -1088576433, 1042971413) + + W(5, -1100160944, 1044221701, -1093385002, 1047421135) + + W(6, -1149100234, 1020595485, -1108848982, -1142399358) + + W(7, -1116535239, -1110188826, -1147628986, 1034144007); + sum2 = W(0, -1130376652, -1109271051, -1098570547, 1029552052) + + W(1, -1109307779, 1040605589, -1097221185, -1097745138) + + W(2, 998991648, -1117670218, -1087670872, -1101762859) + + W(3, -1105231849, 1068278024, 1068017963, -1100990121) + + W(4, -1124853008, -1099193318, -1103240593, -1094879980) + + W(5, -1110904873, 1035466738, 1049832589, -1119653306) + + W(6, 1033926100, -1111928806, 1018913792, -1099498770) + + W(7, 1012787248, 1025845624, 1035573068, 1046656503); + WS(-1099128120, 1074104605); + sum1 = + W(0, 1001695642, 1031876531, -1106225286, 1039516460) + W(1, -1114738061, 1036082771, -1127447088, 1037331996) + + W(2, -1137881464, 1049670772, -1114090766, 1048871226) + W(3, 1035838167, 1045634202, -1084335000, 1024672505) + + W(4, 1036976671, 1039176783, -1092101927, -1111706306) + W(5, 1043227732, 1044499416, -1106295064, 1040216275) + + W(6, 1024532056, -1115296143, -1108352670, 1018355325) + + W(7, 1041576607, 1036111714, -1103209586, -1129216251); + sum2 = + W(0, -1125983803, 1057121124, -1097673861, -1097055530) + W(1, 1026289226, 1048908439, -1081617292, -1105528539) + + W(2, 1003361806, 1042597022, -1083990029, -1094090339) + W(3, 1036989304, -1101738899, 1066557847, 1060807350) + + W(4, -1121376018, -1101518001, 1050089807, 1045798092) + + W(5, -1127516251, -1114490175, 1021212715, 1007168711) + W(6, 999276462, 1032222895, -1107462587, 1021142275) + + W(7, 1020382491, -1111061815, 1048167829, 1024157832); + WS(1043392312, -1114334171); + sum1 = + W(0, 1041086874, 1046571639, -1101297859, -1108043111) + W(1, 1027676176, 1051603231, -1100311555, 1031499310) + + W(2, 1029494548, -1114765098, -1090719068, -1123479337) + + W(3, 1038182418, 1063881062, -1092551191, 1047470620) + W(4, 1033375641, 1036439434, -1093343060, 1029157954) + + W(5, 1014428948, 1039261900, -1095549099, -1113522785) + W(6, 1016176348, 1049353410, -1102133676, 1032866554) + + W(7, 1036956537, 1039119196, -1097401399, -1108903402); + sum2 = W(0, 1032922039, -1110774431, 1024080221, -1111786600) + W(1, 1034144522, 1041464609, 1024001520, 1038622987) + + W(2, 1035186417, -1089347840, -1102975683, -1098326560) + + W(3, -1106146153, 1063858171, 1062496232, -1094555153) + + W(4, 995983013, -1103182741, -1104694448, -1110476474) + + W(5, 1014708981, -1105498679, 1034761930, -1107944280) + + W(6, -1118508182, 1046443094, -1113235807, 1045574278) + + W(7, -1122673776, -1107403517, -1126790312, -1119406695); + WS(1055141020, 1060902337); + sum1 = + W(0, 1034435725, 1057965143, -1123568702, -1104499136) + W(1, -1133288704, 1052483105, -1121795134, 1006999415) + + W(2, 1026181508, 1052696320, -1094115828, -1153162900) + + W(3, 1033934442, -1122504460, -1084201687, 1029709087) + W(4, 1030709219, 1052884304, -1093347107, 1036285529) + + W(5, -1118188821, 1049027177, -1096042081, -1155692311) + + W(6, 1027652318, 1046782257, -1098441692, 1013713298) + + W(7, 1035419593, 1045790650, -1101051195, -1111952258); + sum2 = W(0, 1043194323, -1070666261, -1098866335, -1101126135) + + W(1, 1017945729, -1075189642, -1106608294, 1010301074) + W(2, 992756489, 1068575013, 1051129947, -1107911121) + + W(3, -1117717167, 1076566585, 1049134123, 1046700061) + W(4, 1023440269, 1056539999, 1047751107, 1020103457) + + W(5, -1108161809, -1098049115, -1097073139, 1026363561) + + W(6, 1034607052, 1036076910, 1011007330, 1035420194) + + W(7, -1115924363, -1112346970, 1016205049, -1137869730); + WS(-1090579868, 1070279725); + sum1 = W(0, 1032329895, 1048430680, -1101560157, -1129281319) + + W(1, -1102328821, 1050349544, -1091335294, 1012204235) + + W(2, -1113203900, 1052980239, 1039656780, 1033674783) + + W(3, -1098061089, 1057014372, -1084573078, -1115372202) + + W(4, -1105344998, 1057962188, -1103450436, -1108117804) + + W(5, 1034205973, 1038273502, 1043587871, -1111454739) + + W(6, -1106552005, 1046276062, -1098888483, -1115265901) + + W(7, 1041435478, 1049777060, 1002064136, -1109943484); + sum2 = W(0, 987673807, 1027892228, -1103141235, 1005138532) + W(1, 1036995374, -1102736131, 1049007178, 1028221332) + + W(2, -1095509641, 1034852634, -1090760003, -1123669832) + + W(3, -1094252408, 1059241808, 1060311666, -1128511149) + + W(4, -1119095571, -1103859919, -1129798465, 1032914452) + + W(5, -1103179232, 1043446486, -1100012675, 1006285508) + + W(6, 1040985329, -1109795439, 1051746582, -1122773902) + + W(7, -1114181285, 1004807604, -1113144024, 987177167); + WS(1058381774, -1086956646); + sum1 = + W(0, 1005434106, -1096145742, 1046241393, 1026532260) + W(1, -1149835360, -1100460324, 1039571170, -1120107684) + + W(2, 1034370117, -1096424577, 1054608457, -1107524072) + + W(3, 1036055789, 1033532617, -1110152984, -1123297125) + W(4, 1007595428, 1049990129, -1100950084, 1032832748) + + W(5, -1173712576, 1042033667, -1103984787, 1041584782) + + W(6, -1122685332, -1147483724, -1122634471, -1111048693) + + W(7, -1119060568, 1021597150, 1016207588, 1023439817); + sum2 = W(0, 1073050559, 1049113865, 1048373410, -1123382260) + W(1, 1069067455, 1035642194, -1101089925, 1035361564) + + W(2, -1096535796, -1105600932, 1050248373, -1105239942) + + W(3, -1073893027, -1114871718, -1113393188, 1000972215) + + W(4, -1077684470, -1104998692, 1015693919, -1112223483) + W(5, 1038335059, 1034875990, 1032989733, 987872957) + + W(6, 994568047, 1004288083, 1024635942, 1016564448) + W(7, -1109196141, 1019116562, 1041247426, 1019016937); + WS(1051792028, 1027146209); + sum1 = + W(0, -1173418992, -1096555104, 1048715847, 1037767898) + W(1, 1031492470, -1099279313, 1043543405, 1032372106) + + W(2, 1030866682, -1102472206, 1045948678, 1031937048) + W(3, -1111745573, -1086728512, 1048944276, 1030986133) + + W(4, 1037975597, -1098029143, 1053562136, 1035090980) + W(5, 1032231158, -1099704976, 1041023999, 1038793940) + + W(6, 1021051177, -1107704157, 1031959404, 1031982829) + W(7, 1029296729, -1088435526, 1046674105, 1043937550); + sum2 = + W(0, 1076285256, -1071495861, -1089164092, 1024798548) + W(1, 1067013205, -1080654015, 1044446905, -1130255717) + + W(2, 1054912747, -1092768710, -1089831336, -1111620633) + + W(3, -1103468782, 1053690657, 1049463752, -1129289568) + + W(4, -1082179927, 1062196550, -1116071286, 1022374674) + + W(5, -1086354611, 1059230433, 1055604285, -1147177200) + + W(6, -1086555076, 1065208788, -1102169042, 1008723412) + W(7, -1077304359, 1069826015, 1049830695, 990531682); + WS(-1116843232, 1041538044); + sum1 = + W(0, 1038718416, 1050813118, -1091780513, -1107356403) + W(1, 1041393778, 1048646402, -1107072481, 1032833522) + + W(2, 1023355030, 1043671731, -1097953353, -1126022145) + + W(3, 1040958951, 1037057641, -1088265503, -1185162751) + W(4, 1040537034, 1050123337, -1105750724, 1038008293) + + W(5, -1112360254, 1052799074, -1091053114, -1123581176) + + W(6, 1043346413, 1037436293, -1121572392, 1033129910) + + W(7, 1047211822, 1042212684, -1091261766, -1116284746); + sum2 = + W(0, -1127892588, -1090048491, -1076486023, 1071993665) + W(1, 1025248710, -1124324076, -1080184847, 1067215194) + + W(2, -1127740272, -1092974499, -1080442855, 1068762214) + + W(3, -1114879615, 1047668544, 1048567939, 1014309155) + W(4, -1125515678, 1042122418, 1066323214, -1079656754) + + W(5, -1135605467, 1052455522, 1068938556, -1077318729) + + W(6, 1015277154, -1102075660, 1066205534, -1083078137) + + W(7, -1118452308, 1052515578, 1058048535, -1086246531); + WS(1045082936, 1047750074); + sum1 = W(0, 1017867382, 1038967959, -1107005280, -1117189504) + + W(1, -1105827232, 1036533543, -1101431328, 1000061951) + + W(2, -1139557912, 1048524561, 1044741561, 1034808454) + + W(3, -1098507795, 1057427781, -1099209723, -1102176472) + + W(4, -1112138258, 1057555836, -1103886606, -1102736460) + + W(5, -1113781499, 1044700722, -1117912777, -1102956733) + + W(6, -1117154624, 1037485187, -1108908881, -1113728179) + + W(7, 1024003356, 1046059755, 1024367918, -1108156515); + sum2 = W(0, 1003184961, 1024857236, -1114215978, 1037587704) + W(1, 971431442, -1109323954, 1030818718, -1121823736) + + W(2, -1131894760, 1026446282, 1031262679, 1045339410) + + W(3, -1133283297, -1105665514, 1056963612, -1107079097) + + W(4, 984723076, -1116186208, 1055283667, -1090072253) + W(5, 1031831407, 1024492508, 975724297, -1102678968) + + W(6, -1125590160, -1121629964, -1116339968, -1103014356) + + W(7, 1031681833, -1148808497, 1026784592, -1123463096); + WS(1059869006, 1015916977); + sum1 = + W(0, -1111801867, -1103727218, 1051877295, -1115491581) + W(1, 1039164638, -1102716146, 1051739903, -1106752078) + + W(2, 1023424768, -1089648586, 1057614174, -1104830517) + + W(3, -1118862224, -1092420393, 1055627629, -1112797325) + + W(4, -1114194639, 1049092536, -1096079006, 1038662167) + W(5, -1099094340, 1052180160, 1008315659, 1030088557) + + W(6, -1118811114, 1032231232, -1121275805, -1131680577) + + W(7, -1096321008, 1046058153, 1039975892, -1175801754); + sum2 = + W(0, -1113427160, -1090644207, 1034414883, 1038986783) + + W(1, -1104433368, -1091764889, -1120815960, -1135559136) + + W(2, -1104169884, -1109333336, 995253792, 1033390214) + W(3, -1117600576, 1065407690, 1059846520, -1120264784) + + W(4, -1102319880, -1098732665, 1048874179, -1104270640) + + W(5, 1039627671, -1109931470, 1008884504, 1019376128) + W(6, -1097619580, 1043714297, 1021072320, -1111461786) + + W(7, 1047721667, 1041133481, -1103318888, -1142244432); + WS(1054131356, -1077482588); + sum1 = W(0, -1100136045, -1101215881, 1045307006, 1047064142) + + W(1, 989703980, -1100083265, 1039854844, -1120855287) + + W(2, -1114161471, -1091699940, 1051542530, 1042850313) + + W(3, -1117800461, -1088812180, 1056393300, -1122501592) + + W(4, 1013262449, -1095974704, 1053327664, -1118254671) + + W(5, -1112319975, 1033042241, 1059113280, -1111473941) + + W(6, 1027862775, -1095224480, 1049428707, -1119385054) + + W(7, -1103330903, -1102789769, 1055440053, -1109190398); + sum2 = W(0, -1098032416, 1063396296, 1043414797, -1093635160) + W(1, 1055692800, 1024719031, 1032661901, 1049215120) + + W(2, -1101668743, 1065491656, 1058961916, -1090478816) + + W(3, -1100435053, 1073645059, 1063424749, 1050905898) + + W(4, -1089098280, 1060161647, -1096580753, -1135528507) + + W(5, -1094416925, -1074198531, -1098776178, 1042380471) + + W(6, -1099202751, -1081296593, -1115458644, 1032695397) + + W(7, -1104551065, -1083400329, -1092624053, 1054277265); + WS(-1082679118, -1072338335); + sum1 = + W(0, -1126832308, -1092407662, 1041636256, 1032452512) + W(1, 998650886, -1099976035, 1035459839, -1118647007) + + W(2, 1029713605, -1098757312, 1053507448, -1113994637) + W(3, 1045660229, 1049242330, -1097348797, 1019901759) + + W(4, -1125245511, 1052609817, -1097111418, 1026546307) + W(5, 1001090478, 1049104969, -1099322690, 1041545965) + + W(6, -1126242526, 958956607, -1116731426, -1113342195) + + W(7, -1128346278, -1170516592, 1009511091, 1027607012); + sum2 = W(0, -1069228184, -1099529380, -1112044089, -1144476451) + + W(1, -1073602631, 1043301860, 1041147606, -1112019052) + + W(2, 1021776922, 1054858487, -1119872164, 1036271953) + W(3, 1076866983, 1044685536, -1106846135, 1018899191) + + W(4, 1069809281, 1062109344, 1042271976, -1114284680) + + W(5, -1101499289, -1097491140, 1035368326, -1126672913) + + W(6, 1013617559, -1149319294, 1025827813, -1112053703) + + W(7, 1036050969, -1113127688, -1115137330, -1146921887); + WS(-1125906880, 1029353026); + sum1 = + W(0, 1052620124, -1082830235, 1044570800, 1033131793) + W(1, 1040928385, -1087911853, 1048894344, 1031982257) + + W(2, 1053619569, -1088553290, 1054767249, -1118254573) + W(3, 1034478636, -1115166792, 1052323270, 1037418321) + + W(4, -1149501032, -1104403058, -1096342700, 1045103465) + + W(5, 1027228408, 1046902211, -1092690897, 1053685367) + W(6, 1033816441, -1140598044, -1112660402, 1032758420) + + W(7, -1143379631, -1115398454, -1089842615, 1056771243); + sum2 = W(0, 1052242091, -1123374674, -1088438592, -1109756837) + + W(1, -1105901664, 1032004261, -1109896025, -1117902306) + + W(2, 1041713460, 1057464262, -1106721416, -1106560728) + + W(3, -1098081544, 1055897675, 1057898227, -1101499608) + + W(4, -1101414896, -1108453649, 1044097375, -1114554169) + + W(5, -1140318759, -1099401944, 1041656473, 1042131382) + + W(6, -1123485994, 1039811869, 1042751001, -1104786072) + + W(7, -1108472633, -1092934450, 1033994653, 1048148308); + WS(-1080878567, 1068324028); + sum1 = W(0, -1115471279, -1095139206, 1057833304, 1032811018) + + W(1, 1019310859, -1103458412, 1049561152, -1118469827) + + W(2, -1121098697, -1090917913, 1058273109, -1107876783) + + W(3, -1115193805, -1094785898, 1054221326, -1109399058) + + W(4, -1153442440, -1100131090, 1050583410, -1121957752) + + W(5, -1104713039, -1118370765, 1048985266, -1109945243) + + W(6, 1023975060, -1102569730, 1041261354, -1125987028) + + W(7, -1094675607, 1044057356, 1045920684, -1144681671); + sum2 = W(0, -1122597644, 1052263517, 1084267296, 1037076324) + W(1, 1032420914, -1132353885, 1077171750, 1017945885) + + W(2, -1109997244, -1113437764, -1076080701, 1045994672) + + W(3, 1029919127, -1086124512, -1066836895, -1105598049) + + W(4, -1129031901, -1134522929, -1073829589, -1115453382) + + W(5, 1025876905, -1137529969, -1128728545, 1027437378) + + W(6, -1121404056, 1041285329, -1128683125, -1112127416) + + W(7, 1025649559, -1111439550, -1109934248, 1032857588); + WS(-1106960696, -1099012034); + sum1 = W(0, -1114528403, -1099071342, 1055554285, -1109121109) + + W(1, -1119077260, -1099370430, 1049964157, -1115766890) + + W(2, -1143558020, -1089278443, 1061104081, -1114965299) + + W(3, -1119877760, -1086538844, 1056629915, -1120868875) + + W(4, -1109244232, 1010994112, 1049997315, 1034547527) + + W(5, -1096650748, 1041376829, 1042073805, -1114495407) + + W(6, -1112604445, 1042854492, 1031563405, -1139584324) + + W(7, -1100402892, 1049345155, -1123516568, -1118254096); + sum2 = W(0, -1116593925, 1027806287, 1032648371, 1024756439) + + W(1, 1028908115, -1136470458, 1006654490, -1116529299) + W(2, 999050165, 1039840949, 1052059722, -1107372324) + + W(3, 1037914955, -1122048333, 1057985079, -1128153177) + + W(4, -1106109711, 1057135276, -1090440242, -1142966917) + + W(5, -1105957739, -1105275062, -1131926605, -1123338691) + + W(6, -1123834115, -1096566133, 1018741845, 1013415162) + + W(7, -1113783754, -1105721601, -1124260101, 1018182189); + WS(-1103089976, 1059868827); + sum1 = W(0, 1026631887, -1091300264, 1056895190, 1031003956) + + W(1, 1033737723, -1096562704, 1050565142, -1140469197) + + W(2, 1034000797, -1087537316, 1059766350, -1108911023) + + W(3, -1130496915, -1094386976, 1053939124, 1032926090) + + W(4, -1106694966, 1044429902, -1108577640, -1120892680) + + W(5, -1105866475, 1045093007, -1109922270, 1034482579) + + W(6, -1106727101, 1040636078, -1122075862, -1136817768) + + W(7, -1098705919, 1044615672, -1100982776, 1045052014); + sum2 = + W(0, -1114832682, 1044376552, 1052671036, -1084839384) + W(1, 1038307608, -1101986459, -1094811671, -1076225566) + + W(2, -1109268272, 1025272345, -1106762472, -1074779618) + + W(3, 1031259614, -1112738770, 1052706603, -1088390497) + W(4, -1136199018, 1014008179, 1040752903, 1066038403) + + W(5, -1118450441, 1042085570, -1100877636, 1071357616) + W(6, 1025376385, -1110075404, 1051973899, 1068318141) + + W(7, -1125957173, 1027010884, 1036345951, 1050587817); + WS(-1089897038, 998399462); + sum1 = W(0, -1100326508, -1114312802, 1049930251, -1141222070) + + W(1, -1099771006, -1109889234, 1047140918, -1114407115) + + W(2, -1103969415, -1092992143, 1060325312, -1106384368) + + W(3, 1030998252, -1098209637, 1062808044, 1017922927) + + W(4, -1111096702, -1116385180, -1128865717, 1023432554) + + W(5, -1129527451, -1096464604, 1048691588, -1125954964) + + W(6, -1115021151, -1111889198, -1137813989, -1121716413) + + W(7, -1124899748, -1109224641, 1043572929, 1020452535); + sum2 = + W(0, -1094975511, -1113696880, 1023473326, 1029862812) + W(1, -1089233591, 1032844980, 1036803698, -1117299445) + + W(2, -1089977002, 1066397244, -1129026618, 1025970568) + W(3, 1033778332, 1037917830, -1104280508, 1031377784) + + W(4, 1006981461, 1039504138, 1024702770, -1122944997) + W(5, -1137053176, 1032622380, -1121975322, 1022461928) + + W(6, -1125132764, 1021506052, 974010432, -1130740686) + W(7, 979684320, -1119563165, -1138205260, 1024270227); + WS(-1096120220, -1099720911); + sum1 = W(0, 1052677544, -1105594052, -1101878153, 990296621) + W(1, 1027883172, 1050583742, -1120505890, 1026529890) + + W(2, 1046184634, 1030711656, -1094174707, 1039246623) + W(3, 1036941212, 1050761182, -1087347673, 1031039363) + + W(4, 1037786724, 1044997418, -1091299260, 1038193547) + W(5, 1039372749, 1051859318, -1092171165, 1036921611) + + W(6, 992842790, 1041579690, -1105854769, -1115076484) + + W(7, 1040590154, 1052858474, -1087724293, 1007468831); + sum2 = + W(0, 1040381409, 1023777345, -1110031849, 1022623295) + W(1, -1131846701, -1095435966, -1146822820, -1096899181) + + W(2, 1030695069, -1111411494, -1089001391, 1049343346) + + W(3, 1015259161, -1085163747, -1074171332, -1107180310) + + W(4, 1026868853, -1100600319, -1078760233, 1043071461) + W(5, -1164136976, 1052695808, 1048168469, 1020495459) + + W(6, -1121164685, 1041440525, 1074132102, 1017314953) + W(7, -1109936403, 1051545155, 1072577306, 1050115198); + WS(-1080112807, -1111145054); + sum1 = W(0, -1103297132, 1049824404, -1133975251, 1026400021) + + W(1, -1106341774, 1049617192, -1122989109, 1042898005) + + W(2, -1098278655, 1059552348, -1086286580, 1041608840) + + W(3, 1024154699, 1031972225, -1137351813, -1145221295) + + W(4, 1037514093, -1085661228, 1056970252, -1102084285) + W(5, 1034961350, -1097823136, 992708574, 1041904442) + + W(6, -1125780682, -1114326813, 1021582000, 1038246926) + + W(7, -1109009483, -1119831648, 1040424089, 1042525799); + sum2 = + W(0, 1041258750, 989910477, -1134607603, -1109794979) + W(1, -1109734283, -1100922013, 1016064722, 1041809078) + + W(2, -1123542769, -1096844113, 1041392492, -1126381362) + W(3, -1113561957, 1051742866, 1057819912, 982123162) + + W(4, -1110564237, -1100235579, 1047439582, -1119465765) + + W(5, 1026811493, -1105813951, -1113868277, 1026735929) + + W(6, -1130694514, -1151284621, -1118083589, 1019553626) + + W(7, 1024057869, -1136706867, -1114600003, -1114678252); + WS(1043409720, -1105036943); + sum1 = + W(0, 1040191749, -1096303173, -1115184605, 1041655059) + W(1, 1033970558, -1095924673, 1041748910, 1041917006) + + W(2, 1042494496, -1093807736, 1044442592, -1160531625) + W(3, 1046767380, -1092831585, 1042876074, 1039311757) + + W(4, 972615552, -1096756208, -1140612185, 1020654492) + W(5, 1032778168, -1126846855, 1018241414, 1043700571) + + W(6, 1032217109, -1115229280, 1027878365, 1034107952) + W(7, 1032356247, -1098580285, 1034858676, 1036396440); + sum2 = W(0, 1027808259, -1139047739, 986751832, -1095774205) + W(1, 1010119787, 1043340308, 1026247831, -1093511024) + + W(2, -1111510504, -1106790738, 1065364148, -1089533357) + + W(3, -1113418759, -1096910874, 1040723354, 1053173071) + + W(4, 1032132185, 1040059331, 1038861011, -1106558855) + + W(5, -1121052855, -1115460243, -1113647147, 1027584935) + + W(6, 1014515195, 1017581734, 1041604470, -1121443175) + + W(7, 1010692603, -1120741339, -1135975307, 1014681515); + WS(1035857520, 1029952289); + sum1 = + W(0, -1143077795, 1037051011, -1110400621, -1121149466) + W(1, -1105351783, 1029971495, -1106469910, 1043652126) + + W(2, -1113811952, 1049064844, -1094483622, 1027875988) + W(3, -1106526780, 1035603988, 1058276716, 1035104701) + + W(4, 1035289215, -1083894414, 1056346487, -1114346217) + W(5, 999420801, -1098557596, 1050633420, -1123554317) + + W(6, -1129002434, -1111645039, 1042325343, -1130799743) + + W(7, -1113970285, -1130790920, 1026109447, 1025377311); + sum2 = W(0, -1118517607, 1026484093, 1039465846, 1008581907) + + W(1, -1101077931, 1046485772, 1030900361, -1114630284) + + W(2, 1029148161, -1098019518, 1048118782, -1106945018) + + W(3, -1087996420, 1071267691, 1064954318, -1097625455) + + W(4, 1047526016, -1082674625, -1084489705, -1156379726) + + W(5, -1108756119, 1032470282, -1118487613, 1024110641) + + W(6, 1038718292, -1106613565, -1112021711, 1020048570) + + W(7, 1024464693, -1113199029, -1120012731, 1036291722); + WS(1058189134, 1034857672); + sum1 = + W(0, -1171748679, 1031241360, -1100969760, 987174985) + W(1, -1105475825, 1044072484, -1094075536, 1045893176) + + W(2, -1097172873, 1057114388, -1082247827, 1042860595) + W(3, -1108837708, 1051374554, 1060030327, 1031954257) + + W(4, 1038034707, -1083662360, 1058651386, -1120638978) + + W(5, -1131134898, -1099379283, 1046921459, -1137288661) + + W(6, -1113151849, -1121036423, 1041721673, 1037681908) + + W(7, -1119273967, 1018101850, 1036514264, 1029972391); + sum2 = + W(0, 1003810984, 1020569783, -1128982694, 1043745048) + W(1, 1036204547, -1114489126, -1099380876, -1104004872) + + W(2, -1101998989, -1121260135, -1101918458, -1104165612) + + W(3, 1041029203, 1052640914, 1059611184, -1121067579) + W(4, -1098215637, 1059150102, 1040381797, -1100369008) + + W(5, 1044393168, -1098121773, -1094102036, 1032515174) + + W(6, -1110527635, 1037147118, 1028175839, -1114899975) + + W(7, 1027560485, -1110248773, -1113935142, 1000033398); + WS(-1107450480, 1040804833); + sum1 = W(0, 1040192716, 1040473489, -1095196747, -1124855402) + + W(1, 1022226221, 1047969017, -1098407451, 1020416041) + + W(2, 1002013395, 1044185574, -1087290490, -1160070647) + + W(3, -1117682794, 1060667554, -1089168888, 1045737613) + + W(4, 1001040843, 1057909434, -1111951852, -1101676720) + + W(5, 1000424795, 1049088596, -1119291421, -1103624738) + + W(6, -1120982404, 1038016429, 1031577591, -1104451167) + + W(7, 1025306833, 1042416864, -1112956481, -1115902938); + sum2 = W(0, 1014727333, -1124633329, 1026257754, -1111611661) + + W(1, -1117243126, -1107065949, -1111801245, -1136702760) + + W(2, 1029406572, 1041729284, -1084100906, -1103677442) + + W(3, 1027900147, -1094200388, 1052541581, 1050798725) + + W(4, -1112527674, -1104959017, 1070744988, -1105828213) + + W(5, -1114664521, 1040214496, -1111707080, -1111536822) + + W(6, -1111634336, 1005029260, -1127513012, -1106506739) + + W(7, -1107116047, 1049039902, -1107038609, -1140667375); + WS(-1113867888, 1064515135); + sum1 = W(0, -1113562537, 1048061265, 1011269086, -1105945126) + + W(1, -1107046062, 1043460993, -1098944190, -1148623547) + + W(2, -1103978495, 1060048584, -1107431310, -1111558073) + + W(3, -1098739202, 1062873548, 1045822761, -1114838028) + + W(4, -1122423580, -1119786749, 1025265092, -1101841139) + + W(5, 1019693149, 1023528257, -1112998912, -1117915947) + + W(6, -1116789880, -1106657727, -1142662356, -1106868487) + + W(7, 1020558520, -1137576074, -1104660585, -1122252061); + sum2 = W(0, -1145158406, 1022878354, -1111624996, 1010204019) + + W(1, 1012385731, -1129472106, 1015658802, -1123973089) + + W(2, 1016118130, 1044961455, 1040055751, -1157676569) + W(3, 1024197601, 1049544336, 1057430996, 1022031298) + + W(4, -1112179804, 1030346877, -1114498280, -1124728962) + + W(5, -1119519161, -1131221354, 1020564842, 1023003938) + + W(6, -1127326042, -1095482557, -1112131584, -1121117277) + + W(7, -1113792718, -1093377057, 1019749938, -1141925830); + WS(1051333020, -1087054195); + sum1 = + W(0, -1112042433, -1104387605, 1052246829, -1113527444) + W(1, 1019002067, -1096771599, 1048899277, -1098301717) + + W(2, -1115058903, -1102529947, 1054136106, -1095193948) + + W(3, 1046618143, -1093976551, 1044227957, 1044013586) + + W(4, -1107874438, -1089735051, 1060328029, -1107288943) + + W(5, -1148774132, -1129150690, 1046377228, 1038700845) + W(6, 1036768028, -1101876091, 1045160291, 1024370318) + + W(7, -1154881470, -1098135714, 1045432325, 1019966234); + sum2 = + W(0, -1114066012, 1041392892, 1044719218, -1097547793) + W(1, 1029900262, -1112610934, 1046575198, -1095644935) + + W(2, -1114806697, 1041587346, 1050162797, -1102972970) + + W(3, -1110038644, 1025557898, 1055420600, -1095936036) + W(4, 1036646832, 1055783298, -1099287148, 1028020818) + + W(5, -1110070018, -1102485451, -1100701871, -1113902766) + + W(6, 1020625547, 988088600, 1033323822, -1135974783) + W(7, 987867800, 1039502616, -1119304428, 1018696751); + WS(-1120103648, -1090070191); + sum1 = + W(0, -1117418940, 1049030490, -1112262737, -1114427969) + W(1, 1024592611, 1050801054, -1094705236, 1032013281) + + W(2, 1040968250, -1111205267, 1042055990, -1110419822) + + W(3, 1049918615, -1116228463, -1085341341, 1047944497) + + W(4, 1034410595, 1050512309, -1106227665, -1132367051) + W(5, 1035134604, -1102511675, 1036298997, 1025392341) + + W(6, -1119948353, 1034763283, -1138448657, -1111046724) + + W(7, -1144523437, 1039656998, -1110564136, -1120201127); + sum2 = + W(0, -1122606938, 1046354401, -1124762815, -1118515993) + W(1, -1141997982, 1040228902, -1102583710, 1036845124) + + W(2, 1027805544, -1143835982, 1052785838, -1106443071) + W(3, -1120462650, 1061896257, 1059397472, 1043676745) + + W(4, -1100560083, -1080731069, -1120768779, 995709275) + + W(5, -1105726471, -1098049024, -1126645315, -1128858827) + + W(6, 1025575588, 1026943708, 1011421951, -1112152255) + + W(7, -1113668318, 1032253740, -1122685018, -1127110207); + WS(1060158670, 1068766623); + sum1 = + W(0, -1116432453, 1056712636, -1087151539, 1040295867) + W(1, -1131098839, 1049784895, -1102593034, 1033948341) + + W(2, -1099753724, 1057908127, -1089291740, 1031077093) + W(3, 1034945056, 1044163070, -1094800699, 1027113355) + + W(4, -1122123599, 1028198895, 1036808107, -1132674679) + + W(5, 1040823512, -1122504371, -1120242995, -1117547229) + + W(6, 1040184285, -1123329903, -1127465369, 1040274980) + + W(7, 1050332416, -1096645803, -1132187055, -1137741501); + sum2 = + W(0, 1036379663, -1098232449, -1128107780, -1120677803) + W(1, -1108907739, 1058983203, 1035811659, -1120240179) + + W(2, 1070189048, 1016986873, 1040144345, 1029686139) + W(3, 1073213180, 1047923452, -1105399966, -1109097268) + + W(4, 1065321765, 1033582270, -1110681209, 1041308319) + W(5, -1076673488, 1031365639, 1039880527, -1109042515) + + W(6, -1079893128, 1038048757, -1116001167, 1034493417) + + W(7, -1072585926, -1107019906, 1018297481, -1131123922); + WS(-1112459888, 1031046963); + sum1 = W(0, -1112719265, -1128634346, 1043326546, -1115081924) + + W(1, -1129483154, -1120462421, -1103778738, -1114915140) + + W(2, -1103727283, -1112785958, 1050524372, -1104237825) + + W(3, -1091575740, 1056781779, 1067939282, -1096588493) + + W(4, -1104335643, -1153409228, 1048587330, -1108534894) + + W(5, -1155155940, -1108013960, -1132710977, -1114726045) + + W(6, -1114094348, -1118230989, -1109505653, 991235804) + + W(7, -1105426861, 1027667442, 1036777787, -1111746006); + sum2 = + W(0, -1119086167, 1037241732, -1114652243, -1131518324) + W(1, 1013020612, -1129818230, 1028260797, -1115730540) + + W(2, -1141008560, 1029187991, -1112264831, 1036245214) + + W(3, -1108719150, -1074141953, 1072760747, 1034705438) + W(4, 1023891147, 1019083994, 1042535682, -1119172530) + + W(5, -1145112744, -1124211508, -1131878972, -1133917220) + + W(6, 1018251990, -1122886966, 1037493286, -1114007324) + + W(7, -1116478859, 1027168441, -1129254230, 1018465006); + WS(-1086783566, -1086791567); + sum1 = W(0, -1115195632, -1095708330, -1123676804, 1041725033) + + W(1, -1153026662, -1093442522, 1018199561, -1121101892) + + W(2, -1134496297, -1098161001, 1037878573, 1034570216) + + W(3, -1108787789, 1044632085, 1064261167, -1103584185) + + W(4, -1119803561, -1091375169, 1057417647, -1108866704) + + W(5, -1145121243, -1107384199, 1052226946, -1122033784) + + W(6, -1131708777, -1100078210, 1046948682, -1117964907) + + W(7, -1110064394, -1104664012, 1047330733, 1022321300); + sum2 = W(0, 1031515378, -1077397139, -1134939312, -1112896962) + + W(1, -1121810184, -1079753374, -1097085111, 1035985281) + + W(2, 1010081392, 1044674665, 1052841311, -1105060263) + W(3, -1119765868, 1073378801, 1007836336, 1031953140) + + W(4, 1035546479, 1056367001, 1044809025, 1022446080) + + W(5, -1106495935, -1109376902, -1118570548, 1031289566) + W(6, 1028378344, 1035615024, 992033726, 1032016077) + + W(7, 1003402623, -1118889676, 1010750288, -1150317246); + WS(-1096711324, -1080143969); + sum1 = W(0, -1135181951, -1095339398, 1050582337, 1026786373) + + W(1, 1025399430, -1094144146, 1049483953, -1123204644) + + W(2, 1033906501, -1093853897, 1045129706, 1006467326) + W(3, 1041786535, -1089314149, 1046728732, 1031608549) + + W(4, -1133559462, -1089126233, 1045641984, -1135165794) + + W(5, 1003671004, -1110956839, 1051092970, 1017185520) + W(6, -1123695637, -1129083412, 1049638101, 996324016) + + W(7, -1105366874, -1113909272, 1057307443, 1009923752); + sum2 = + W(0, 1023911567, -1108118447, 1044892126, -1112738860) + W(1, 1004932796, -1134375038, -1138359022, 1020475455) + + W(2, -1119124867, 1035251904, 1063133445, -1102926770) + W(3, 1022775023, 1037995724, 1074655410, 1047698054) + + W(4, 1011639102, 1050162042, 1070062472, -1113839430) + W(5, 1036397588, 1037707684, -1083132698, -1117927031) + + W(6, 999444348, -1111062570, -1073510095, 1017195567) + + W(7, -1106309084, -1101285996, -1073687480, 1041743674); + WS(-1089880270, 1068594400); + sum1 = + W(0, -1114467040, -1104513741, 1050219169, -1118490053) + W(1, 1023170830, -1091458130, 1042545281, -1120682309) + + W(2, 1036273894, 1034367029, 1048674674, 1031296832) + W(3, -1107800974, -1100997781, 1042739212, -1110241363) + + W(4, 1029867434, -1095286649, 1053624831, 1034289100) + W(5, -1106779367, 1047811540, 1034908007, -1107012174) + + W(6, 1024593089, -1096228817, 1034674708, 1021221591) + + W(7, -1110449433, -1109179032, 1049808694, -1126204757); + sum2 = W(0, 1030004067, -1133064657, 1024174065, -1119836251) + + W(1, -1117028113, -1134091777, -1113615539, 1025775761) + + W(2, -1105887481, 1045929484, 1032698350, -1118489229) + + W(3, -1066126465, 1081917443, -1097201480, 1041573100) + + W(4, -1104777348, 1043726141, 1033613382, -1124268877) + + W(5, -1122550707, 1028310407, -1107697461, 1030764051) + + W(6, -1130918117, 1016764395, 1029778305, -1116797441) + + W(7, 1023720579, -1116064873, 1018371831, 1018350967); + WS(1067475431, -1126058166); + sum1 = + W(0, -1108052732, -1097851361, 1049362324, 1040199303) + W(1, 1043040440, -1108210069, 1048929252, -1109622414) + + W(2, 1048922396, -1084429932, 1054104112, 1041203782) + W(3, -1097574342, 1055911149, 1047147106, -1104274751) + + W(4, -1119673025, 1041902328, -1085779789, 1047880876) + + W(5, -1101774749, 1035827603, 1049622021, -1116198608) + + W(6, 1019400344, -1115594513, 1019477630, -1107876353) + + W(7, 1007801026, -1107003538, 1037086373, 1036701569); + sum2 = + W(0, 1013753738, 1020731454, -1120697178, 1009873360) + W(1, -1113150555, 1046742995, 1034035253, -1099358283) + + W(2, 1015248850, 1045156685, -1094710566, 1039470065) + W(3, 1042650722, -1104352239, -1128318417, 1046693145) + + W(4, 1040818978, -1089864705, 1045040370, 1036008061) + W(5, -1105652199, 1045331461, 1025091411, 1030627793) + + W(6, 1033430289, -1109224259, -1106535436, 1017876415) + + W(7, -1120377801, 1016214593, 1019751894, -1132110931); + WS(1060496974, -1099362699); + sum1 = W(0, 1054706808, -1109706041, -1098091909, -1129324363) + + W(1, 1036662868, 1041317773, -1123237413, 1015757258) + W(2, 1045895019, 1051554925, -1091928498, 1021341985) + + W(3, 1033303646, 1049334366, -1083798889, -1112852963) + + W(4, 1044095763, 1051166813, -1095315343, 1041804248) + + W(5, -1130190200, 1053590462, -1087007295, -1127492145) + + W(6, 1035564779, 1049285205, -1097088265, 1039262630) + + W(7, 1040746168, 1055551482, -1094550458, -1114905285); + sum2 = W(0, -1073385920, 1077926840, 1032559783, -1094451238) + + W(1, -1084887580, 1058818874, -1089669880, -1121878040) + + W(2, -1083140914, 1060159151, 1045764296, -1173171215) + + W(3, -1081800202, 1074845707, 1059775011, -1120435570) + + W(4, -1085954594, 1061906872, -1103860568, -1115590029) + + W(5, 1037413129, -1084431515, -1094536545, 1053315192) + + W(6, -1096433065, -1126824848, -1133232881, -1107838893) + + W(7, 1054210787, -1087315344, -1099186518, 1052978812); + WS(-1078369703, 1041267413); + sum1 = W(0, 1024386586, 1041630887, -1102607056, 1016693023) + + W(1, -1098405416, -1112635974, 1051293608, 1018393854) + + W(2, 1044040033, -1132435208, -1094040968, 1046099151) + + W(3, -1098286219, 1041292515, 1052507157, -1095618428) + + W(4, -1130684104, -1091497929, 1056853585, 1018959206) + + W(5, 1045750173, 1016159377, -1105775474, -1117949682) + + W(6, -1098338270, -1139597330, 1049412460, -1118540325) + + W(7, 1041584973, -1101611546, 1026100205, 1040982949); + sum2 = W(0, 1035615459, -1097925659, 1043665288, 1020457849) + W(1, 1028814353, 1038714809, 1033337220, -1103657321) + + W(2, 1062855981, -1077657685, 1051048913, -1109823715) + + W(3, 1052250964, -1101970047, -1113114333, 1058024935) + + W(4, 1056109526, -1098468896, -1085664062, 1056999181) + + W(5, -1106019250, -1114182260, 1040975429, -1105846491) + + W(6, 1048995011, -1104046747, -1109605260, 1042595027) + + W(7, -1115640692, -1130122971, 1034778788, -1112374505); + WS(1049151900, -1114127847); + sum1 = W(0, 1050216581, 1044969414, -1106856243, -1099359923) + + W(1, 1000015204, 1047621718, -1112471205, -1148711507) + + W(2, -1136218178, 1057314961, -1091938023, -1133921207) + + W(3, 1023136961, 1055877843, -1092033343, -1112054390) + + W(4, -1130455775, 1054358666, -1099719178, 1034273823) + + W(5, -1133465094, 1052966059, -1089783713, -1122838974) + + W(6, -1160090192, 1045850836, -1099053307, 1019980258) + + W(7, 1035991733, 1052491071, -1087984664, -1112322824); + sum2 = W(0, -1105478410, 1011113448, 1068049752, -1082501551) + + W(1, 1039552519, 1050864081, -1103020148, 1059003564) + + W(2, -1131405058, -1125310098, 1066695202, -1098156272) + + W(3, -1099466970, 1043215106, 1074596924, 1039344435) + W(4, 1049027924, 1034334973, 1067601936, -1099880092) + + W(5, -1115050596, -1095883745, -1084539788, -1107282542) + + W(6, -1129231446, 1036178144, -1075514015, -1097076855) + + W(7, 1049583286, -1088635155, -1074115054, 1042018058); + WS(-1081332839, -1093454830); + sum1 = W(0, -1109709318, 1035356975, 1039420520, -1111308088) + + W(1, -1104916884, 1037116623, -1122158672, 1014687039) + + W(2, -1108177419, -1125155242, 1053028347, -1112931654) + + W(3, -1117212646, 1030327966, -1104761999, 1020526050) + + W(4, -1119705461, 1050241163, 1032136206, -1119448076) + + W(5, 1032417267, -1103907222, -1115200780, -1112611266) + + W(6, -1118748528, 1046095669, -1115609017, -1136487898) + + W(7, 987327371, 1033262333, 1029855614, -1104533373); + sum2 = W(0, 1013207527, -1118344664, -1124997045, 1036829905) + + W(1, -1127355075, 1028266047, -1117884424, 1016171285) + + W(2, -1103159789, 1043690369, -1116887196, 1024917624) + W(3, 1040955478, 1051132935, 1047094008, 1041601336) + + W(4, 1008876024, -1108842627, 1057384051, -1127506713) + + W(5, -1116686760, 1041402295, -1100609336, 1033786361) + + W(6, 1035764738, -1105537330, -1098518151, -1114313206) + + W(7, -1154118338, -1128237045, -1089828796, -1102385561); + WS(1063446990, 1030048893); + sum1 = W(0, -1131562670, 1043069042, -1108681470, -1108650254) + + W(1, -1099483503, 1049571969, -1106599551, -1106453175) + + W(2, -1106193120, 1061428532, -1097709592, 1008661917) + + W(3, -1102405700, 1053095529, 1046689338, -1102088711) + + W(4, 1021119810, -1115069793, 1029025472, -1123233168) + + W(5, -1113568905, -1119868742, 1006738196, -1104754348) + + W(6, 1009340592, 1027160065, -1120287391, -1136963210) + + W(7, 1030255032, 1043118346, -1111789333, -1114502129); + sum2 = W(0, 1012371361, 1017109065, -1110016822, -1103671135) + + W(1, -1149491589, -1115161102, -1109821790, -1097972516) + + W(2, 1023756477, 1012418345, 1060485172, -1094449842) + + W(3, -1165255819, -1111308586, 1031379047, 1049769129) + W(4, -1111467932, 1015942947, 1046805034, 999148403) + + W(5, 1019335917, -1106016138, -1114576028, 1022790203) + + W(6, -1140694601, 1015823021, 1024897044, 1032463998) + + W(7, 1010513313, 1026481588, -1114618076, -1146831635); + WS(1060385486, 1040268319); + sum1 = W(0, -1115333181, -1105770459, 1048230464, 1022495614) + + W(1, -1113839600, -1104706211, -1122113023, -1115044751) + + W(2, 1026945989, -1085502934, 1051932092, 997701279) + W(3, 1041834949, -1112644646, 1065603891, 1044329112) + + W(4, -1100015113, -1102865873, -1105250533, 1038196649) + + W(5, 1010458454, -1112283012, 1054415097, -1113673817) + + W(6, -1113704543, -1108020290, -1122402466, -1105082025) + + W(7, -1110169699, -1115522389, 1046973419, -1115640050); + sum2 = W(0, -1115664423, 1041813775, -1142717933, -1120925434) + + W(1, -1122287706, 1041645390, -1094011677, -1119842210) + + W(2, 1044568139, -1087138046, -1124627987, -1120985148) + + W(3, -1099350461, 1067461852, 1027339570, -1113918825) + + W(4, -1102715015, -1116572542, 1045144111, -1100833175) + + W(5, 1042022920, -1109622665, 1054026149, -1101147879) + + W(6, 1031291077, -1113357449, -1122353792, -1112018489) + + W(7, 1032474264, -1110133469, 1047364150, -1109170005); + WS(1049043868, 1050086952); + sum1 = W(0, -1127858130, -1091279607, 1054362490, 1028884238) + + W(1, 1025810768, -1101354119, 1044465651, 1022883995) + W(2, 1034666762, -1089232423, 1050465752, 1032068165) + + W(3, 989092984, -1092016019, 1058690160, 1005443958) + W(4, -1115481017, -1089729669, 1053482909, 1007092039) + + W(5, -1119959429, -1104525366, 1051092259, -1149438996) + + W(6, 1012364567, -1121438163, 1019953121, -1137072337) + + W(7, -1097661520, -1160888088, 1051219071, 1017307814); + sum2 = + W(0, 1030282550, -1099759289, 1070065566, -1078832473) + W(1, 1012065197, 1050936278, 1068820850, -1076346523) + + W(2, 1049708534, 1018991152, 1068572253, -1075628240) + W(3, -1101566471, 1050588571, 1075164582, -1072955990) + + W(4, 1043525707, -1112383125, 1046261639, -1093459475) + + W(5, 1038111746, -1121285468, 1053555382, -1088795804) + W(6, 1040589253, 1039884984, 1040055978, -1099780818) + + W(7, -1106531005, -1113841779, 1058828770, -1090423367); + WS(-1083655502, 1074535575); + sum1 = W(0, 973651072, -1100106921, 1049212555, -1122472503) + W(1, 1028862937, -1096173461, 1052529147, 996549482) + + W(2, 1029517469, -1088250868, 1057957824, -1111380529) + + W(3, -1111330742, -1101351113, 1060855844, -1125410415) + + W(4, 1043404639, -1092528244, 1050267538, -1109626381) + + W(5, -1105397947, 1034877328, -1104065896, -1122247818) + + W(6, -1129726574, -1113904430, 1041335934, -1108333759) + + W(7, -1099887737, -1118418050, 1047916353, 1037233742); + sum2 = W(0, 1037582341, -1116649305, -1107051108, 1040538112) + + W(1, 1032149251, -1112916551, 1041643977, -1112067691) + + W(2, -1135487115, 1044443302, 1030763641, 1036263743) + + W(3, -1107247151, -1117703407, 1053360182, -1115814520) + + W(4, 1053968825, 1049963138, -1087331905, 1035849297) + + W(5, -1096732175, 1042101117, -1089009482, -1104153316) + + W(6, 1034930229, 1046616621, -1110671517, -1104877620) + + W(7, -1094249638, -1127010106, 1059951262, -1116847106); + WS(-1132786560, 1056578758); + sum1 = W(0, 1031072232, 1043718831, -1098888378, -1123786893) + + W(1, 1019453021, 1050053971, -1104586611, 1030360908) + + W(2, -1112153353, 1057505465, -1090760605, -1123598824) + + W(3, -1127115205, 1049535379, -1081773448, 1035671447) + + W(4, -1153314582, 1052394474, -1106021324, 1044649456) + + W(5, -1105591705, 1045418390, -1120283669, -1105554848) + W(6, 986346381, 1044283028, 1036365944, 1033186457) + + W(7, 1024604121, 1048806335, -1103287167, -1108912601); + sum2 = + W(0, 1040658557, 1011638864, 1041865287, -1109113498) + W(1, -1106354313, 1020064744, -1103147627, 1033931950) + + W(2, 1027144364, 1049840877, 1026998340, -1120927100) + W(3, -1103628435, 1050147177, 1053587315, -1102211113) + + W(4, -1123462592, -1117174472, -1118058864, 1033328738) + + W(5, -1130705048, -1105120318, 1036775410, -1101351534) + + W(6, -1126068784, -1113970774, -1104140895, 1028877580) + + W(7, 1016031184, -1135769248, 1030416564, -1112076066); + WS(1062711758, -1109562142); + sum1 = W(0, -1107075140, -1100622401, 1053288051, 1017596397) + + W(1, -1125545776, -1104747337, 1049612355, 1007433175) + + W(2, -1121788380, -1103229831, 1059638178, -1120034657) + + W(3, -1112684886, -1083301774, 1048337697, -1098025711) + + W(4, -1128625782, 1050510715, 1052952654, -1112854653) + + W(5, -1102890883, -1097610180, 1051307580, -1108240645) + + W(6, -1129119788, -1109077454, 1041285742, 1022176121) + + W(7, -1102110160, 1030737729, 1049115768, 1020667959); + sum2 = W(0, 1022257834, 977550902, -1113927342, 1036504102) + W(1, 1033804876, -1105772859, 1042844173, -1107093105) + + W(2, 1033963400, -1098748483, -1096777339, 1028160469) + + W(3, -1113268759, 1063293804, 1059312088, -1089861840) + + W(4, -1123922685, -1090033704, -1134320563, -1101525036) + + W(5, 1029999681, 1051039483, 1037687330, 1017532722) + W(6, 1015463858, -1104977553, 1024487701, -1122996995) + + W(7, -1125618402, 1022914778, 1004875175, -1116068477); + WS(1046002488, -1083997249); + sum1 = W(0, -1116203964, -1099156844, 1050076828, 1025075217) + W(1, 1044431961, 1028498222, 1049400252, 1035032656) + + W(2, 1041235242, -1096363997, 1050263757, 1032516798) + W(3, 1046371486, -1079124621, 1020370202, 1050189128) + + W(4, -1106267745, -1088685662, 1048790666, 1042510329) + + W(5, -1117705943, -1103556299, 1053355933, 1039724554) + W(6, -1122870401, 999993136, 1034829570, 1027729159) + + W(7, -1109240562, -1106798659, 1037276039, 1037249299); + sum2 = W(0, -1094982381, 1041875660, 1017839086, 1045890174) + + W(1, 1024745423, -1091311273, 1044409386, -1107764806) + W(2, -1097113645, 1042706326, 974632891, 1048260200) + + W(3, 1017043870, -1148119319, 1061393923, -1118443235) + + W(4, 1052555320, -1098101314, 1043955676, -1097368628) + + W(5, -1107563793, 1010824956, -1098402228, 1041238578) + + W(6, 1049262968, -1094774489, 1042935118, -1116185663) + + W(7, -1118735987, 1030619863, -1094515595, 1042214090); + WS(-1083255246, -1075588436); + sum1 = + W(0, -1154522904, 1026375684, -1099839750, 1037990411) + W(1, -1106371438, 1036249778, -1113752075, -1119606102) + + W(2, -1105464695, 1056687627, -1092436626, 1043935324) + W(3, 1036086422, -1097055709, 1050924256, 1036476796) + + W(4, 1023783355, -1088556578, 1056650964, -1111337285) + + W(5, -1127243930, -1108055408, 1045926824, -1110940389) + + W(6, 1030176790, -1105944807, 1039385437, -1124332025) + + W(7, 1031412139, -1123388097, 1041066449, 1021333175); + sum2 = W(0, -1117330831, 1032531181, 1043756688, 1060766869) + W(1, 1023708058, -1114369361, 1031926890, 1067300065) + + W(2, -1133288218, 1017487862, -1104593512, -1105945514) + + W(3, -1122435213, 1047518334, 1043258354, -1075696003) + + W(4, -1139180683, -1122916014, -1096895795, -1102393264) + + W(5, 1020252429, 1027012545, 1049767697, -1106946489) + W(6, 1025188082, -1103557501, 1019544307, 1020047431) + + W(7, -1109968059, 1046855474, -1112657711, -1113285900); + WS(1056055196, 1023945849); + sum1 = + W(0, -1104545849, 1036175954, 1032040419, -1111539988) + W(1, -1139517988, -1138851412, 1009002268, 1026875367) + + W(2, -1101013315, 1048468083, 1040659763, -1104005681) + + W(3, -1136100212, -1097789539, 1059870683, -1112873620) + + W(4, -1148373856, -1096999890, 1039368714, -1135793772) + + W(5, 1013350648, -1097071058, 1053644410, -1132670698) + W(6, -1120654743, 995622088, -1122818538, 1028030253) + + W(7, -1108930702, -1128233700, 1033002810, -1133516850); + sum2 = W(0, 1033182461, -1110992702, 1011638125, -1116812221) + + W(1, -1135790885, 1041801313, 976542168, -1135785781) + + W(2, 1036058972, -1115959119, -1106760241, 1013579453) + + W(3, -1145806187, -1083556559, -1068720208, 1034110873) + + W(4, 1026470367, 1047402951, 1080067579, 1048786168) + W(5, -1125994579, 1026458945, 1043021822, -1124564059) + + W(6, -1123856921, 1021989349, -1118809257, -1115658442) + + W(7, -1152817846, 1029289545, 1027196491, -1110636256); + WS(1047050040, 1036867972); + sum1 = W(0, 1031952202, 1041031945, -1098508279, -1117663192) + + W(1, 1040033213, 1050476745, -1097704487, 1033331580) + W(2, 1049413776, 1016870010, -1091275871, 1012948685) + + W(3, 1052329098, 1054273441, -1087382286, 1043041286) + + W(4, -1134253939, 1041748879, -1095309386, -1131302504) + + W(5, 1023712847, 1035493153, -1117526273, -1125396951) + + W(6, -1129317744, 1041656495, -1112254893, -1153578470) + + W(7, 1023414180, 1041394937, -1108492595, -1112644847); + sum2 = W(0, -1120609508, -1138324119, -1110367912, 1021280635) + + W(1, -1098653081, 1042859622, 1038385587, 1026624912) + W(2, -1074671298, 1072071026, 1026263792, 1026288466) + + W(3, -1081276780, 1068070176, -1107049493, -1122098825) + + W(4, -1097107702, 1044485226, 1024907208, 1040255596) + + W(5, -1110118090, 1040658983, -1122488627, -1152233050) + + W(6, -1137118351, -1118978815, 1038809437, -1126727249) + + W(7, -1119823807, 1035453187, -1111098449, 1006813455); + WS(1047287096, 1059538103); + sum1 = W(0, 1030804480, 1034749929, -1102474905, 1013951379) + + W(1, -1111595219, 1051408038, -1094490621, 1039425378) + + W(2, -1123153834, 1049213285, -1099609086, -1112788128) + + W(3, -1098057279, 1057344539, 1048597242, -1098419769) + + W(4, 1007980582, 1048793645, -1094890059, -1118093753) + + W(5, 1018775223, -1106759404, 1046324795, -1108341356) + + W(6, 1036089754, -1115115061, 1003815995, -1120388747) + + W(7, 1030691489, 1033654600, -1126686757, -1135294247); + sum2 = W(0, -1112864979, 1024004698, 1034996841, 1004307827) + + W(1, 1034721747, -1109416755, 1041661735, -1116757818) + W(2, -1108077379, 988806988, 1041642045, 1037366938) + + W(3, 1023366533, 1058238498, 1008174217, -1107547239) + + W(4, 1021246669, -1115279539, -1122858630, -1108868659) + + W(5, 1032508120, 1041332549, -1089151778, -1128614293) + + W(6, -1113986367, 1033457886, -1130797677, -1130732717) + + W(7, -1153257254, 1018109285, -1111548255, -1122125438); + WS(1066216871, -1084582294); + sum1 = + W(0, 1009745022, -1095963722, 1050775177, 1027534757) + W(1, 1023325836, -1096473834, 1046303345, -1126933960) + + W(2, 1034246149, -1087129751, 1057767679, -1118146623) + + W(3, -1103455511, -1142570129, 1064400323, -1103828265) + + W(4, 1025953385, -1094684497, 1038452388, 1018950609) + W(5, -1105242559, -1107829831, 992534739, 1030437560) + + W(6, 988118663, -1107520037, 1025316967, 1020674901) + W(7, -1107601837, -1117719279, 1044853309, 1033139589); + sum2 = + W(0, -1127013105, 1031626404, 1029958550, 1017794797) + W(1, 1018526115, -1117975133, -1113082476, 1023300847) + + W(2, -1123587641, 1062148372, 1035325186, -1145491637) + W(3, 1052346460, 1074624908, -1130733653, 1041942663) + + W(4, -1098654594, -1090512380, 1046581721, -1113152926) + + W(5, 1029804743, -1070854231, -1113725416, -1139181075) + + W(6, -1118240305, -1095518794, 1031776819, -1120299385) + + W(7, 1030491706, 1041467716, -1115594754, -1142763637); + WS(1033725552, -1082653885); + sum1 = W(0, 1027658456, -1091894661, 1054730080, 1024810077) + W(1, 1033791775, -1094613753, 1052727904, 1020875129) + + W(2, 1038169633, -1085886368, 1060322284, -1114253244) + + W(3, -1130834725, -1094605577, 1054915405, 1021660859) + + W(4, -1105755912, 1049096200, -1115316932, 1010914050) + + W(5, -1107114253, 1038006563, -1113871235, 1032412472) + + W(6, -1106878767, 1040399888, -1114082704, 1016596422) + + W(7, -1102176852, 1038046228, -1098681477, 1044096992); + sum2 = + W(0, -1126607908, -1109006144, -1093467963, 1059727060) + W(1, -1111819446, 1050929158, 1063076656, 1073053378) + + W(2, 1032709615, 1025316173, 1048889614, 1073749745) + W(3, -1119610542, -1107176811, -1090320472, 1058093447) + + W(4, 1027594385, -1123863107, 1046933317, -1079533534) + + W(5, 1029384695, -1122531221, 1051206364, -1073154062) + W(6, -1115269200, 1026796828, 997945380, -1075936158) + + W(7, 1032209203, -1115616555, -1109796884, -1088004581); + WS(-1087442510, 1045166814); + sum1 = + W(0, -1106921670, 1032009478, 1041337656, -1110338367) + W(1, 1032907600, -1127325529, -1100890040, 1042108264) + + W(2, -1111070674, -1106321881, 1060092296, -1105899594) + + W(3, 1047421629, -1085655355, 1044767811, 1027442111) + W(4, -1094092352, 1057239638, 1040377247, -1105935775) + + W(5, 1031075349, -1098381173, 1037296628, 1037469097) + + W(6, -1136240762, -1128650952, -1131454723, 1027139781) + + W(7, -1129052086, -1107389381, 1037463686, 991481919); + sum2 = W(0, -1119426781, 1040351141, -1104019881, 1015410156) + + W(1, -1109786529, 1043540747, -1106797782, 1043466418) + + W(2, 1041819143, -1105926532, -1098594115, 1048709183) + + W(3, 1018220925, -1133247980, 1066013053, -1092013542) + + W(4, -1103055916, -1096724541, -1136157126, -1109288787) + + W(5, -1124599508, 1038612076, -1097021086, 1027934232) + W(6, 986571056, 1019567362, 1025702119, -1140871148) + + W(7, 1024618671, -1143357456, 1008181894, 1027080058); + WS(1059279054, 1041683061); + sum1 = W(0, 1028823114, -1096417644, 1041071884, 1040139175) + + W(1, 1033060856, -1095251776, 1051348966, -1133443258) + + W(2, -1136683218, -1096755466, -1106154521, 1027763154) + + W(3, 1041097809, -1095420880, 1062896338, 1037083176) + + W(4, -1108712682, -1087904547, 1057053635, -1109247178) + + W(5, -1099515041, -1117180600, 1050313119, -1169250436) + + W(6, -1114499693, -1113115866, 1041134484, 1007514196) + + W(7, -1105525449, -1121084634, 1043053335, 1031868218); + sum2 = + W(0, -1104031080, 1039900182, 1028023758, -1125301559) + W(1, -1113246189, 1044206765, -1122555834, 1027754708) + + W(2, -1124792744, 1043086617, 1041018096, -1129842792) + + W(3, -1107688308, 1057123143, -1100462314, 1022410763) + + W(4, -1084040655, 1066602722, -1123151815, 1034604384) + W(5, -1089958554, 1037219700, 1032289221, 1029558442) + + W(6, -1098618924, -1111210157, 1028867168, -1139711519) + + W(7, -1098033920, 1036938090, -1143651414, -1122704124); + WS(-1093886876, -1092780259); + sum1 = W(0, 1031521076, 1034696971, -1098366291, 991412379) + W(1, 1040859655, 1027092583, -1107551117, 1023411481) + + W(2, -1123954783, 1057011018, -1084481759, 1037665034) + + W(3, -1115369432, 1055452141, -1086775792, 1037775165) + + W(4, 1027325374, 1055461116, 1050583573, -1102735091) + + W(5, -1130075936, 1043845385, -1098824964, -1106471523) + + W(6, 1017430099, 1041326696, 1032795870, -1113156096) + + W(7, 1005655277, 1045628689, -1110858724, -1108701199); + sum2 = W(0, -1099549152, -1115620025, 1042476476, -1128057356) + + W(1, -1101982776, -1110046443, -1138118945, -1104607948) + + W(2, 1039636960, 1067650764, 1056117723, -1129519972) + + W(3, 1065032135, -1096892917, -1084935200, 1039009532) + + W(4, 1046191187, -1084058149, 1043334912, 1042222420) + + W(5, -1112740199, -1089814493, -1116550886, -1114781123) + + W(6, -1120755726, 1022618460, 1030169574, 1023299940) + + W(7, -1138571865, -1106519005, -1123579094, 1029490149); + WS(-1103384376, 1050555318); + sum1 = + W(0, 990997212, 1056616014, -1113359817, -1104278389) + W(1, -1104723373, 1049493908, -1101985253, -1112669305) + + W(2, -1102560870, 1060440249, -1095607555, -1117918862) + + W(3, -1110645588, 1063537686, -1091747296, -1124628967) + + W(4, -1114771100, 1049398171, -1098384586, -1120789735) + + W(5, 1006886950, -1123261404, -1094355067, 1020875660) + W(6, 1011892746, 1042753094, -1098733239, 1029891897) + + W(7, 1041403778, 1041297647, -1099641616, -1143354303); + sum2 = + W(0, -1097904627, 1056762738, 1033294591, -1113205081) + W(1, -1102268820, 998260703, -1102966386, -1105114179) + + W(2, -1101696600, 1049156882, 1040206209, 1028552759) + W(3, -1102083058, 1055125136, 1048772561, 1023069474) + + W(4, -1100977750, 1040009482, 1004029807, -1108173489) + W(5, 1022682138, -1099421122, 1040485303, 1040370279) + + W(6, -1105396681, -1114104451, -1118051402, -1109779069) + + W(7, 1035758392, -1112582051, 1040482786, -1135668672); + WS(1042369848, -1095650924); + sum1 = + W(0, 1032801852, 1060985291, -1088222536, -1154392525) + W(1, -1124780413, 1056066249, -1093287088, 1017158008) + + W(2, -1142443027, 1059062488, -1089766974, 1041310978) + + W(3, 1031124370, 1046815795, -1084610255, -1111535759) + + W(4, 1024489633, 1051554984, -1096293908, -1118760812) + W(5, 1037041722, 1026983110, 1035048291, -1118618480) + + W(6, -1157391942, 1037131073, -1113190096, 1032589425) + + W(7, 1048283534, 1028118439, -1107690581, -1101944856); + sum2 = W(0, 1056251107, -1084477192, -1077785336, 1053829707) + + W(1, -1118435725, 1041984407, -1086371528, -1113013425) + + W(2, -1111043713, -1093515412, -1093010832, 1046411943) + + W(3, -1090037466, 1073185178, 1074151927, -1090694669) + + W(4, -1129656805, 1051712939, 1054463958, -1095714333) + + W(5, -1093926860, 1053076395, -1094829660, -1103046632) + + W(6, 1052716525, -1096065312, -1090812268, 1035367276) + + W(7, -1083761792, 1066458198, 1063500158, -1087950464); + WS(-1081634407, -1072784825); + sum1 = W(0, -1102740818, 1024862651, 1056867014, 1016622683) + + W(1, -1115394775, 1046040367, 1027952685, -1112643478) + + W(2, -1101924733, -1109613465, 1058509811, -1096240139) + + W(3, -1100839185, -1115978107, 1050187219, -1101581667) + + W(4, -1123768914, 1049722038, -1098437541, 1008822020) + + W(5, -1140848016, -1098661743, 1033615035, 1023463264) + + W(6, -1118249725, -1138059466, -1143195264, -1116668435) + + W(7, -1116477425, 1025943000, -1132571500, 1036595961); + sum2 = + W(0, -1139809909, 1046126277, -1119940899, 1040903120) + W(1, -1126178115, -1111752353, 1055245806, -1104413152) + + W(2, 1032546581, 1034216546, -1088954301, 1019748625) + W(3, -1097752958, 1042101074, 1058677046, -1089277711) + + W(4, 1042272246, 1043290796, -1087007443, 1028591970) + + W(5, -1118367610, -1112722386, 1048637958, -1139620757) + + W(6, -1138081685, 1034532478, -1140548675, 1017306737) + W(7, 1017774855, 1030093406, 1029648824, 1033828758); + WS(1050645916, 1033550915); + sum1 = W(0, -1118556120, -1107219907, 1039060494, 1030703933) + + W(1, -1108262319, 1043545243, -1145250020, -1110184190) + + W(2, 1043230361, -1086762442, 1059123936, -1110311564) + + W(3, -1094947334, -1104041435, 1057758321, -1100512367) + + W(4, -1103841584, 1058293794, -1096913490, 1044416908) + + W(5, 1018079143, -1097977901, 1039599795, -1132472896) + + W(6, 1032598742, 1041130248, -1151122424, -1147952664) + + W(7, 1008731683, -1123331655, -1123961290, 1022228292); + sum2 = W(0, 1040267615, -1112791644, 1010515766, 1024308395) + W(1, 1048409433, -1110540665, 1032365064, 1041630361) + + W(2, -1107165984, -1088877638, -1090038325, 1040244783) + + W(3, -1086251247, 1053330884, 1049266570, 1037513285) + + W(4, -1113957324, 1057377190, 1028890653, -1098504745) + + W(5, -1107976602, 1057673067, -1103454662, 1039941233) + + W(6, -1122078587, 1024512949, 1041725883, -1103661415) + + W(7, -1111525785, 1042772014, -1143554104, 1024552651); + WS(1051978908, -1102077462); + sum1 = + W(0, -1132165009, 1045761591, 1013540534, -1106078467) + W(1, -1117187439, 1051430047, -1096198150, 1014010165) + + W(2, -1108837850, 1060286099, -1130717861, -1121999817) + + W(3, 1037183202, 1046972447, -1085387922, 1030141582) + W(4, 1028473682, -1128782822, -1105434962, 1008925446) + + W(5, 1045655273, 1028047735, -1101497054, 1018887490) + + W(6, 1025240990, -1178179968, -1110960744, -1122721428) + + W(7, 1033734537, 1035331372, -1102517015, 1026996307); + sum2 = W(0, 1023072493, 974611657, -1101792417, -1112121991) + W(1, -1125531833, 1047662948, 1051899312, 1037288067) + + W(2, 1030396885, 1035351326, 1080526024, 1042102238) + + W(3, -1118880502, -1090109833, -1069022098, -1113214479) + + W(4, 1027419877, -1110942359, -1087739606, 1032127295) + + W(5, -1161271853, 1033315833, 1036440530, -1143368203) + + W(6, 1019485964, 1010290822, -1124554651, -1132377096) + + W(7, 1028708277, -1156901930, -1114394737, -1141169667); + WS(1058455886, -1096183470); + sum1 = W(0, -1105360672, -1112611850, 1060616064, -1104075433) + + W(1, -1112035684, 1032735242, 1033545283, -1111940643) + + W(2, -1105059667, 1048808633, 1051085625, -1115570744) + + W(3, -1103063975, 1042817013, -1097442806, -1097216429) + + W(4, -1112638549, 1058906409, -1102188695, -1107200030) + + W(5, -1104182327, 1047248804, -1103229812, -1108265532) + + W(6, -1106250503, 1047193699, -1106970890, -1119499692) + + W(7, -1103319623, 1057710235, -1109193562, -1106901731); + sum2 = W(0, -1135393651, -1114274101, -1117767149, -1110403581) + + W(1, -1131009665, 1016197705, -1130986601, -1136444051) + + W(2, 1004138181, -1143366053, 1026922589, -1112183743) + + W(3, -1111055824, 1051632163, 1048764370, -1121253595) + + W(4, -1134829555, -1111579981, 1036257166, 1009689523) + + W(5, -1127256265, 1038147066, -1112945554, 989872074) + + W(6, -1113946573, 1025185617, -1143942149, -1123668797) + + W(7, 1010148467, -1137859715, -1123068365, -1123716033); + WS(-1103618872, 1023577831); + sum1 = W(0, 1041702149, 1057806109, -1091063799, 1022049043) + W(1, 1032268701, 997904496, -1106010126, -1102660926) + + W(2, 1021026046, 1057675340, -1090884608, 1046995709) + + W(3, 1032012647, -1129702414, -1094077941, -1100395542) + + W(4, 1023662430, 1056283152, -1094732237, 1050214494) + + W(5, -1111766616, 1039716321, -1100869456, -1096243374) + + W(6, -1112590149, 1049875952, -1105996279, 1047647816) + + W(7, 1042231991, 1054291708, -1098425069, -1100607928); + sum2 = W(0, 1073849383, -1072987051, 1040644897, 1022236877) + + W(1, 1069429154, -1080077361, -1098556311, -1128073567) + + W(2, 1070121938, -1076996297, -1124122091, -1118780260) + + W(3, 1066905653, -1086754234, -1095882058, 1027638421) + + W(4, -1087755842, 1060194671, -1096747545, 1012182550) + + W(5, -1080359597, 1067521081, 1048551041, -1129108935) + + W(6, -1073496568, 1074652354, -1092793409, -1130588931) + + W(7, -1070245916, 1075883836, 1058695504, 1001812541); + WS(1010873216, -1100304815); + sum1 = + W(0, -1154187044, 1045945805, -1102222630, -1113759340) + W(1, 1028741017, 1051026373, -1103217414, 1022415534) + + W(2, -1114247598, 1058185854, -1097780567, -1105383733) + + W(3, -1099084427, 1059904750, -1091550752, -1111624796) + + W(4, -1104966895, 1052860588, -1108718914, -1114326870) + + W(5, 994968800, 1038231664, -1110664976, -1110217221) + W(6, -1113228468, 1040557287, 1017403508, -1108875519) + + W(7, 1028263947, 1032103905, 1017865354, -1109350878); + sum2 = W(0, -1114639192, -1100331344, 1035901690, 1012858414) + + W(1, 1039210963, 1069298433, 1043940791, -1126620651) + W(2, 1047528380, 1080352854, 1030733380, 1036529177) + + W(3, -1099502544, -1067747068, -1084572959, -1099630936) + + W(4, 1041344628, -1080996124, 1003800555, 1043527822) + + W(5, -1108972702, 1032407437, 1045939813, -1119860547) + + W(6, 1021595805, -1121090437, -1121287047, -1145664907) + + W(7, -1116883371, 1016604711, -1122259179, 1034105585); + WS(1044302648, -1104457270); + sum1 = W(0, -1115179413, 1029190781, -1108944696, -1115207128) + + W(1, -1101763748, 1045938783, -1102720380, 1013266237) + W(2, 999269803, 1034810490, 1038235218, 1025157307) + + W(3, -1106420033, 1055724168, -1101615303, -1105407492) + + W(4, 1041707045, 1053359959, -1094303566, 1045691477) + + W(5, -1128885581, -1100033022, 1043259278, -1113033137) + + W(6, -1110751840, 1037908026, 1011056201, -1114066571) + + W(7, 1029110244, 1035419021, -1123807773, -1113282394); + sum2 = + W(0, -1117238549, -1109414288, -1115125848, 1023475415) + + W(1, -1115295266, -1128441429, -1121954754, 1024122972) + + W(2, 1033754662, -1104877135, 1018153798, 1002646200) + W(3, -1095438097, 1053172955, 1038786210, -1106263750) + + W(4, -1106855439, 1056926069, -1104082080, 1034246226) + W(5, 1011060498, -1108715476, 1043675119, 1025499367) + + W(6, -1117460606, 1036225840, 1008366234, -1107090094) + + W(7, 1032653312, -1114450934, 1018894269, 1013321052); + WS(1068015911, 1043072951); + sum1 = + W(0, -1127841379, -1101991257, 1041159238, 1035382295) + W(1, 1035792844, -1091973141, 1040826438, -1098419575) + + W(2, -1155603908, -1110119406, 1052691071, 1043898844) + W(3, 1018341976, -1095315357, 1053842800, 1030324599) + + W(4, -1111053022, -1107228151, 1042506601, -1108241413) + + W(5, 1044403491, -1111437840, 1050221764, 1043481954) + + W(6, -1112313859, -1097295811, 1030249017, -1098652336) + + W(7, 1005208661, -1103670736, 1046985745, 1032196534); + sum2 = W(0, -1134315530, -1125722365, -1135384490, -1118933131) + + W(1, 1033824390, 1019923991, 998580556, 1027495017) + W(2, -1112867968, 1039464885, -1093291922, 1045077367) + + W(3, 1039862799, -1087456726, -1061395777, 1087627027) + + W(4, 1005143468, -1119378177, -1106942114, 1034637079) + + W(5, -1114969920, 1042059959, -1121281813, -1148550692) + + W(6, 1030354102, -1113780490, 1034179020, -1118611519) + + W(7, -1131720457, 1009971850, 1032010553, -1117397469); + WS(1060186318, -1131602669); + sum1 = W(0, 1022422466, 1050205114, -1099402401, -1112848707) + + W(1, -1102049002, 1052887138, -1102238785, -1106285089) + + W(2, 1050775273, 1052456073, -1100510367, 1039607513) + + W(3, -1097141731, 1049038746, -1088759247, -1098786589) + + W(4, 1056872997, 1040490435, -1122526498, 1048305743) + + W(5, -1099566807, 1039690728, -1099089908, -1104192622) + + W(6, 1025667942, 1043457059, -1102859210, 1032266998) + + W(7, 1032894309, 1045856748, -1106963501, -1118572779); + sum2 = + W(0, 989650422, -1120410947, 1047633630, -1081060940) + W(1, -1108234271, 1050158699, 1066272871, -1074101109) + + W(2, -1120122674, -1092804082, 1075424781, -1079387458) + W(3, 1045260201, 1024838498, 1061734347, 1050344942) + + W(4, -1114112879, -1125280065, -1091697647, 1043540897) + + W(5, 1037831837, -1119936776, 1003481288, 1039609275) + + W(6, -1117501250, -1114421818, 1011848513, -1131708367) + + W(7, 1024132690, 1035095434, -1106020023, 1026462111); + WS(1057810382, 986287880); + sum1 = W(0, -1105255365, -1110142276, 1041191503, 1050176972) + + W(1, 1027767677, -1106138450, 1031406779, -1130243957) + + W(2, 1037147968, -1092238936, 1027980992, 1041310918) + W(3, 1038550632, -1090210593, 1049547413, 1041377527) + + W(4, -1136315263, -1089134184, 1052451129, -1115128119) + + W(5, 1035749561, -1093055129, 1057536820, 1033537239) + + W(6, -1115075413, -1097497096, 1050343212, -1127231554) + + W(7, 1028257967, -1089420006, 1058108487, 1031965752); + sum2 = + W(0, -1099824579, 1070708271, 1075074245, -1097074436) + W(1, -1113793286, -1104040400, -1084124078, 1023561426) + + W(2, 1053512844, -1091831853, -1082668198, 1039380165) + + W(3, -1112943238, -1072548459, -1072503695, 1009216489) + + W(4, 1041834894, -1093778092, 1056090411, 1043969626) + W(5, -1099992002, 1057576575, 1053907302, -1107563771) + + W(6, -1120789532, 1064181862, 1058602971, 1034348623) + W(7, 1024916046, 1051972140, 1054786345, -1106961801); + WS(-1075707047, 1038147646); + sum1 = + W(0, 1038492938, 1050722763, -1095596015, 1040870942) + W(1, -1115961531, 1026800423, -1103730686, -1122935577) + + W(2, 1036899040, 1055626176, -1091210886, 1047700685) + W(3, 1041066756, 1052178217, -1090286882, 1036539085) + + W(4, 1038870159, 1027667391, -1094135001, -1102625947) + W(5, 1045702796, 1053246416, -1095582752, 1048523811) + + W(6, 1022987667, -1100305296, -1109653838, -1105859960) + + W(7, 1044017707, 1049962981, -1093296229, -1117734303); + sum2 = + W(0, -1145353723, 1027969677, -1115030411, 1052090154) + W(1, -1113260796, -1095928859, 1031463199, -1110261785) + + W(2, -1112198364, 1052169305, -1108227013, 1041877601) + + W(3, -1113367096, 1052353113, 1051016428, -1099630700) + + W(4, 1007958125, -1129450520, 1041462653, -1087173862) + W(5, 1047146251, 1051473061, 1045242344, -1102681887) + + W(6, -1122974002, -1091037095, 1032341221, -1095485390) + + W(7, 1021064313, 1050393555, 1020102815, -1111896409); + WS(-1102302520, 1068562064); + sum1 = W(0, -1130894152, -1101486038, 1044130034, 1031789673) + + W(1, 1029216267, -1094978851, 1029251017, -1102217970) + + W(2, 1029150351, -1104755260, 1053252560, 1042516209) + W(3, 1029804895, -1093484995, 1050941559, 1040658618) + + W(4, -1108054071, -1110271975, 1036880709, -1113036417) + + W(5, 1046304488, -1106599349, 1050863202, 1041743615) + + W(6, -1107326720, -1102131284, -1128083416, -1102226940) + + W(7, 1018120580, -1102274525, 1047248574, 1032422999); + sum2 = + W(0, 1019111797, 1022527979, -1131519898, 1029130971) + W(1, -1123809812, -1111065998, 1023737355, -1117143513) + + W(2, 1006739898, 1031759809, 1045357020, -1106220344) + W(3, 1041109085, -1095097056, 1082939698, -1064938697) + + W(4, -1126211453, 1034613952, 1027342607, -1122955245) + + W(5, 1028972357, -1107360163, 1033832936, -1112280546) + + W(6, -1118372569, 1033659253, -1112248823, 1032152906) + + W(7, 1025713585, -1118642675, -1118824157, 1028082979); + WS(1066566439, -1125753148); + sum1 = W(0, 1020091828, -1096367972, 1038767583, 1030663671) + W(1, 1041951168, -1098758994, 1048997640, 1025331919) + + W(2, -1130301450, -1093467737, -1130818632, -1108818909) + + W(3, 1052053147, -1089881409, 1064063659, 1044746920) + + W(4, -1131231944, -1092544881, -1116991019, 1022962726) + + W(5, 1033588155, -1098181717, 1042770882, 1041997189) + W(6, 1009411772, -1110070786, 1046860728, 1018371147) + + W(7, -1116682291, -1096392077, 1027361773, 1038578846); + sum2 = W(0, -1105507764, 1019815533, -1136431769, 1024962860) + + W(1, -1149004498, 1047279899, -1121679526, 1041000955) + + W(2, -1098298748, -1106660204, -1096656341, -1107413740) + + W(3, -1115729482, 1058550934, 1058580319, 1027135608) + + W(4, -1094576030, 1031191852, -1098228632, -1101441076) + + W(5, -1111478010, 1032716298, 1041490224, 1035067556) + + W(6, -1129010369, -1117054989, 1047791827, -1123460834) + + W(7, 1018600957, 1019851909, -1106522387, 1005788722); + WS(1050996380, 1066787661); + sum1 = W(0, 1042843177, -1109518091, -1098674409, -1112832565) + + W(1, -1120310187, 1043911830, -1097687209, -1133582755) + + W(2, 1033453959, 1059519229, -1096304487, -1116528532) + + W(3, -1107450543, 1061971625, -1098011863, -1105396874) + + W(4, 1017862620, 1051823058, -1095767039, 1033796902) + W(5, -1134072575, 1046601317, -1095249970, 999923683) + + W(6, 1025308393, 1042112788, -1098354285, 1030029487) + + W(7, 1033769739, 1046863997, -1098417599, -1109771063); + sum2 = + W(0, -1103321099, -1087654445, -1098379129, 1013739975) + W(1, 1033376724, -1114493691, -1096458683, 1015354012) + + W(2, -1108916223, 1049058628, 1036368268, -1121134774) + W(3, 1033194077, 1065240604, 1054396447, -1115257551) + + W(4, 1038197771, -1128335788, 1049730119, -1129699908) + + W(5, 1032881798, -1094150295, -1108472207, 1021079748) + W(6, 1017830932, 1006224046, 1031462702, -1117224382) + + W(7, -1138730935, -1112103411, -1102017203, 1038189385); + WS(1027314912, -1081149641); + sum1 = W(0, -1109480125, 1051429188, -1118844062, -1097611416) + + W(1, -1114947760, 1053252314, -1094077252, 1034118735) + + W(2, -1101195017, 1058130398, -1113091794, -1104617542) + + W(3, -1132100201, 1061393767, -1085308129, 1035982720) + + W(4, -1103759537, 1057940398, -1102037735, -1114189097) + + W(5, 1044342469, -1104522586, -1094497965, 1036294793) + + W(6, -1111893319, 1055491172, -1103334896, -1128796810) + + W(7, 1051194426, -1120202825, -1094870250, 1030785028); + sum2 = W(0, -1131039707, -1098833779, -1147567565, 1035790053) + + W(1, 1024995350, -1124760267, 1057070390, 995065627) + + W(2, -1104486127, -1078713050, -1091843304, 1037354874) + + W(3, 1015537291, -1065942779, -1081156610, -1154356731) + + W(4, 1053996441, 1047686732, -1094140189, -1107145709) + + W(5, -1103676904, 1084085461, 1060423478, 1026796886) + W(6, 1047124046, 1064521940, 1046849692, -1108476011) + + W(7, -1111129691, -1093915430, 1018309905, 1038591472); + WS(-1081542375, 1044780323); + sum1 = W(0, 1026864081, 1046719985, -1098857847, -1114219435) + + W(1, -1100303790, 1054288460, -1108679899, -1104586877) + + W(2, 1048337215, 1054459103, -1098101851, 1041408644) + + W(3, -1098110473, 1048901488, -1093175556, -1098306531) + + W(4, 1054474587, 1041639871, -1104037973, 1046672715) + + W(5, -1099969099, 1034589376, -1107626335, -1104199322) + + W(6, 1023999910, 1043694031, -1101510966, 1022681657) + + W(7, 1028627178, 1046235480, -1106600025, -1126646775); + sum2 = + W(0, 1017985090, 1024212320, -1099849981, 1067245211) + W(1, 1027847194, -1098721130, -1081468176, 1074345814) + + W(2, 1036519222, 1050387030, -1070464929, 1070597407) + + W(3, -1113167123, -1094712479, -1096550174, -1088417301) + + W(4, 1037009826, -1100822056, 1057690620, -1104880956) + + W(5, -1113380621, 1028269032, 1042714784, -1102737214) + W(6, 1031131596, 1009055356, -1145638655, 1000105719) + + W(7, -1115360802, -1113841920, 1046010973, -1112342255); + WS(1059294542, 1020616832); + sum1 = + W(0, -1157534552, -1096856701, 1050532499, -1112831597) + W(1, 1039080142, 1044861738, 1041652423, 1046540791) + + W(2, -1097783100, -1089052876, 1057029426, -1098369827) + + W(3, 1031217968, -1085780263, 1047119538, 1040272239) + W(4, -1164216296, 1044573672, 1039715347, 1024931118) + + W(5, -1138897989, -1097342660, 1043073721, -1103025619) + W(6, 1026686634, 1041475528, 1040308239, 1045404192) + + W(7, -1105498094, -1105558033, 1048391538, -1114523696); + sum2 = W(0, -1106291706, 1040318024, -1114208076, -1142193319) + + W(1, 1001372950, -1105705183, -1116263519, -1145917455) + + W(2, -1099590495, 1045484852, -1108179199, 1036599633) + + W(3, -1093748925, 1061316313, 1049891427, -1108356360) + + W(4, 1015225205, -1098311584, 1040668388, -1148092276) + + W(5, -1105937891, 1041085521, -1111581107, 1032743264) + + W(6, -1112612361, 1028378294, -1115787941, 1021281994) + + W(7, 995169980, 1035732349, -1113959318, 1024794158); + WS(1059376718, -1137270291); + sum1 = + W(0, -1118257199, 1043258576, -1114290826, -1112002778) + W(1, 1037392427, -1106073464, -1124279079, 1022160871) + + W(2, -1097794403, 1054680411, -1115190716, 1045843716) + W(3, 1035766677, -1099046488, 1052189312, 1011458515) + + W(4, 1026067385, -1088020070, 1054691490, -1110958220) + W(5, 1032186693, 1041392887, -1097245116, 1046761570) + + W(6, -1108679762, -1113652045, 1025610423, -1117145658) + + W(7, -1139304576, -1111666975, -1123561026, 1032305501); + sum2 = + W(0, 1025244035, 1033595807, 1016573022, 1002397687) + W(1, -1122852568, -1087687504, 1048825911, -1104089806) + + W(2, -1092220395, 1066421651, 1058107887, -1130145014) + + W(3, 1038019467, 1047700223, -1089540205, -1111655831) + W(4, 1048072683, -1084925862, 1045215493, 1032935415) + + W(5, -1113553750, 1050883425, -1098430697, 1015641098) + + W(6, -1119930901, -1105555859, 1041208433, 1001885951) + + W(7, -1139793711, -1146566911, -1124843514, -1131703250); + WS(1058596686, 1013962118); + sum1 = + W(0, 1000024554, 1052551424, -1088298614, 1039656505) + W(1, -1127551432, 1049056438, -1098656348, 1034731345) + + W(2, -1100000762, 1057499982, -1090756927, 1019232187) + W(3, 1035333436, 1036076760, -1097067922, 1033429441) + + W(4, -1122612871, -1127972943, 1040213184, -1125045580) + + W(5, 1044766998, -1116046252, 1015197910, -1113717771) + + W(6, 1041477861, -1118015335, -1123679237, 1037833508) + + W(7, 1051562743, -1099275107, -1122278672, -1122272135); + sum2 = + W(0, -1104434141, 1048601996, 1033271157, 1004930429) + W(1, 1029025211, -1091369704, -1119340081, 1028438774) + + W(2, -1078497608, 1052827694, 1025214064, -1106316897) + + W(3, -1079110377, -1106615386, -1109539756, 1034989376) + + W(4, -1085502108, -1115334546, 1050957039, -1108631008) + + W(5, 1066279808, 1037382016, -1106594885, 1023198169) + W(6, 1062913146, -1122817088, 1036274829, -1134348613) + + W(7, 1068620036, 1048688798, -1118139306, -1139733884); + WS(1044771128, 1023341948); + sum1 = + W(0, -1156220044, -1092336191, 1051560294, 1036528391) + W(1, 1034545464, -1098819215, 1044146859, 1029114797) + + W(2, 1033488922, -1096491302, 1051910286, 1022761170) + W(3, -1111313058, -1086850728, 1045449190, 1029767950) + + W(4, 1038420969, -1102939421, 1053458817, 1041877300) + W(5, 1008511890, -1100928894, 1038883707, 1034675856) + + W(6, 1013986230, -1115524377, 1012126018, 1037411178) + W(7, 1011084871, -1089482302, 1046748951, 1043196317); + sum2 = + W(0, -1071004894, 1076503146, 1057610169, -1122913984) + W(1, -1080574884, 1066884159, -1107351326, -1130255370) + + W(2, -1089987082, 1058011283, 1054000347, 1038970611) + W(3, 1049506323, -1098882467, -1090234844, 1014162118) + + W(4, 1065282653, -1084978169, 1030898490, -1114000490) + + W(5, 1062249589, -1088626314, -1090230279, 1022492087) + + W(6, 1060165079, -1082249713, 1044380003, -1131621088) + + W(7, 1070844945, -1077734377, -1096516163, 1013009282); + WS(-1111817840, -1134998409); + sum1 = W(0, 1022731056, -1102901203, 1029699069, 1021112442) + + W(1, 1045262352, -1094315057, 1052915216, -1107194439) + + W(2, 1044888721, -1085070720, 1058770140, -1097785743) + + W(3, 1029818259, -1111793509, 1057078063, -1117717150) + + W(4, -1105915720, 1055206544, -1095183540, 1040740592) + + W(5, -1114660906, 1046806370, -1097451385, 1036425016) + + W(6, 1027479949, -1123039746, -1112670352, 1005586201) + + W(7, 991785104, -1136267423, -1114500629, 1016465988); + sum2 = W(0, -1129690332, -1113356803, 997943457, 1015499837) + + W(1, 1030842707, 1026318374, -1119512995, -1180561029) + + W(2, -1123486113, 1041923626, 1037525758, -1144014736) + + W(3, 1054087898, -1089332833, -1088292904, 1037328869) + + W(4, -1105176966, 1068558125, 1040210770, -1107365912) + + W(5, -1097205966, -1141184456, 1050151959, -1114055561) + + W(6, -1098021434, -1107728348, 1018682892, 1009592392) + + W(7, -1106929221, -1104288342, 1006954668, 1013301204); + WS(-1100650808, 1043653943); + sum1 = W(0, 1038392637, 1044290651, -1095199164, 1033771919) + W(1, 1032036848, 1040683515, -1115398639, 1038316223) + + W(2, -1120772452, -1106266873, -1094097345, 1042896310) + + W(3, 1043637149, -1090111931, -1097393337, 1045751664) + + W(4, 1040259489, -1133467790, -1104941765, -1159203906) + + W(5, 1047398869, 1044787930, -1101572298, 1015063331) + W(6, 1038054351, 1023591523, 1016760834, 1030908740) + + W(7, 1050576478, 1037223428, -1102689443, -1125005703); + sum2 = W(0, 1018053796, 1046306039, -1101305605, 1031448374) + W(1, 1000548496, 1043541862, 1029064982, -1112893849) + + W(2, 1049079603, 1010667960, -1116813778, 1021972628) + W(3, 1039189619, 1064529690, 1038487223, -1105518867) + + W(4, -1128574308, 1047996002, -1100258391, 1037842238) + + W(5, -1118597354, -1087406141, 1048794746, 1033382833) + + W(6, -1106984497, -1095617964, -1114392997, 1021089548) + + W(7, -1104543855, -1086946593, 1045938007, -1140458600); + WS(-1121537248, 1047151836); + sum1 = W(0, -1096325448, 1052654400, 1000450324, -1116556387) + + W(1, -1113135282, 1035132488, -1110395025, 1023519458) + + W(2, -1098188693, 1046868890, -1139200797, -1111260975) + + W(3, -1112146268, -1134150082, 1049727010, -1110207458) + + W(4, -1108791588, -1129891280, 1041189572, -1104838938) + + W(5, -1117696601, -1098847494, 1056884317, -1098805187) + + W(6, -1123869651, 1032919412, 1041128337, -1146298440) + + W(7, -1114157115, -1107856679, 1058220805, -1103148146); + sum2 = W(0, -1142864271, 1043572739, -1125571574, -1107989855) + + W(1, 1021304865, 1026011378, 1028667063, -1140649559) + W(2, 1043366966, -1102859954, 1022196210, 1020722946) + + W(3, -1107584343, -1080136051, -1072538638, -1132367054) + + W(4, 1014758407, -1103597159, -1076945816, 1041979768) + + W(5, -1130465374, -1115654645, 1048815254, -1126224006) + + W(6, -1120740451, 1046511165, 1072551214, 1027667511) + W(7, 1000478551, 1053224660, 1074776028, 1048839210); + WS(-1086568910, 969651201); + sum1 = W(0, -1128189323, -1096064919, 1047521403, 1043461231) + + W(1, 1044154939, -1089612648, 1054908701, -1099266659) + + W(2, 1043534732, -1084798775, 1056987371, -1113308531) + + W(3, -1114366976, 1053159863, 1057685165, -1152040120) + + W(4, -1098469330, 1053431542, -1089249613, 1042761408) + + W(5, -1101372520, 1050787607, -1090701774, 1049635020) + + W(6, -1123038043, 1036747448, -1105030179, 1010635844) + + W(7, -1132910587, -1116020373, 1027268120, 1018344000); + sum2 = W(0, 1042932965, -1103892922, -1104364155, -1139842168) + + W(1, -1103428495, 1060495074, 1060109323, -1118896922) + W(2, 1039997403, 1052448567, 1064108701, 991756114) + + W(3, -1093583228, -1075640666, -1073056297, -1097481656) + + W(4, 1044856824, 1057723154, 1055665788, 1040659602) + W(5, -1121819542, 1057002090, 1060503917, -1099984054) + + W(6, -1177180368, -1112150955, -1116550688, 1034919451) + + W(7, 1028413178, -1118134166, -1110515035, 1041069777); + WS(-1084093518, -1116656412); + sum1 = W(0, 1002008836, 1058416208, -1096672513, -1112390467) + W(1, 959481663, 1026388179, -1109778819, 1023428581) + + W(2, -1104590931, 1059097973, -1088144465, -1112632704) + + W(3, 1050696243, 1042079029, -1096264126, 1040470858) + + W(4, -1106608235, 1041679827, -1101432288, -1102954054) + + W(5, 1047962207, 1049127990, -1100480244, 1047012782) + + W(6, 1030562773, 1031439243, -1113594772, -1112144502) + + W(7, 1015783795, 1042535660, -1093923750, -1145396437); + sum2 = W(0, 1007960967, 1027407916, -1129957403, -1132155403) + + W(1, -1110192070, 1010716935, 1018303319, -1121935910) + + W(2, 1049899326, -1097456083, 1037417999, -1137993343) + + W(3, 1056650247, -1070859565, -1094713389, -1142796285) + + W(4, 1078512141, -1068241064, -1096379657, 1039214170) + + W(5, 1081472440, -1079514242, 1043613542, -1111213656) + + W(6, 1058404171, -1094427160, 1025537830, 1032528613) + W(7, 999915485, 1030657127, -1116925932, 1024356221); + WS(1052225948, 1018668194); + sum1 = W(0, -1120453498, 1051775516, -1117579103, -1107074656) + + W(1, -1123160289, 1048325451, -1111291056, -1112742544) + + W(2, -1108654867, 1058214550, -1093397513, -1147977428) + + W(3, -1104839613, 1064670427, -1091445969, -1119335936) + + W(4, 1041378113, -1126638409, -1092047898, -1109154218) + + W(5, 1036965515, 1049967085, -1099225050, 1034167881) + + W(6, 1044186084, -1096299613, -1112850625, -1138567959) + + W(7, 1034438594, -1128237844, -1112057991, 1019177609); + sum2 = W(0, -1121001958, -1117606942, -1103097305, -1130038396) + + W(1, -1148685985, -1111847761, 1021935234, -1112487949) + + W(2, 1018483434, -1120566718, 1049895592, 1038078839) + + W(3, 1021815720, -1076982600, -1081972918, -1125812992) + + W(4, 1049361594, 1074852012, 1051964198, -1152546434) + + W(5, -1108634723, 1048076514, 1040925857, -1104919092) + + W(6, -1132118692, -1102180777, 1017568960, 1026180028) + + W(7, -1105580672, -1116866514, 1040155911, -1123068468); + WS(-1096382876, -1091051652); + sum1 = W(0, -1116126267, 1057278592, -1096000788, -1116910752) + + W(1, -1113197617, 1048826126, -1097274020, -1140807037) + + W(2, -1112424211, 1058810754, -1088739494, 1039925053) + + W(3, -1124324715, 1056304110, -1091311905, -1132679170) + + W(4, -1136579346, 1041995897, -1114916522, -1107191661) + + W(5, 1040875430, 1016295122, 1038826452, -1119450067) + + W(6, -1113905512, 1024916389, -1128507781, 1023437062) + + W(7, 1017323784, 1042534003, 1017648027, -1098370349); + sum2 = W(0, -1120311657, 1025682064, 1036682152, -1108273289) + + W(1, 1036113080, -1107470193, -1108061877, 1012612647) + + W(2, -1109045199, 1035621932, 1051542033, -1101063214) + + W(3, 1029662296, -1118009189, -1101792442, -1076098595) + + W(4, -1122358251, 1045290388, -1093646778, -1080861029) + + W(5, -1114085873, 1009830751, 1056395710, 1039398973) + W(6, 1016737279, 1035169596, -1091719234, 1065137390) + + W(7, -1115835645, 1030264440, 1052430993, 1072502688); + WS(1040082544, -1114755812); + sum1 = W(0, 1017784372, -1110974758, 1040296296, 1024480479) + + W(1, 1037505264, -1108514902, 1041424680, -1104471944) + + W(2, 1045745417, -1095324708, 1054913780, -1106439461) + + W(3, 1019379817, 1052579502, -1106396419, -1115066496) + + W(4, -1109308706, 1058051822, -1087512533, 1040857799) + + W(5, -1124056118, 1046163210, -1093666877, 1001482384) + W(6, 1025470519, 1038459986, -1106674096, 995688529) + + W(7, 1039469090, 998280780, -1109356390, -1108404770); + sum2 = W(0, -1162396366, 1045315846, 1034748092, 1035646876) + + W(1, -1117720653, -1118054954, 1034773210, -1100298043) + + W(2, -1104226850, 1057942904, 1043388435, -1097646834) + + W(3, -1105580348, 1059476362, 1067689202, -1093789486) + + W(4, -1105643813, -1090022037, -1083232471, 1040357620) + + W(5, 1031813906, 1045409162, -1101262587, -1113573448) + + W(6, 1011045214, -1106412098, -1124577575, 1012935222) + + W(7, 1018164327, 1023977529, -1102021751, 1024454049); + WS(1036525168, -1082462584); + sum1 = W(0, -1116308971, -1085123800, 1057675329, 1019612704) + + W(1, 1037298441, -1087372070, 1051660338, 1027541711) + + W(2, 1027885589, -1086414101, 1059597873, -1109257541) + + W(3, -1122874917, -1096064460, 1059581688, 1041073820) + + W(4, -1154089797, -1099879381, 1052784510, 1028059367) + + W(5, -1108384819, -1104319094, -1117339864, 1045372305) + + W(6, 1028179743, -1127380522, 1039251609, 1031554934) + + W(7, 1003463273, -1096394215, -1110971807, 1053758651); + sum2 = + W(0, 1060691160, -1081285622, -1080809247, 1052276353) + W(1, -1103295177, -1090050073, 1050684042, -1106187369) + + W(2, 1035985281, -1102851017, -1087522637, 1048638013) + + W(3, -1085964582, 1072728426, 1060852217, -1090587145) + W(4, -1105398566, 1060511611, 1058511518, 1029176048) + + W(5, -1107551093, 1033954581, 1041242888, -1094879840) + W(6, 1049030608, -1086630634, 1028348456, 1046256182) + + W(7, -1090293163, 1058069039, 1062481845, -1087039462); + WS(-1074352935, 1040600857); + sum1 = + W(0, 1042034194, 1057627204, -1090157751, 1024364622) + W(1, 1028652336, 1025049468, -1105875707, -1105990299) + + W(2, 1024054004, 1057567995, -1088931067, 1048022018) + W(3, 1030695172, 1022098295, -1095088235, -1100928004) + + W(4, 1029627411, 1052602222, -1094832321, 1049957878) + + W(5, -1117605317, 1039437212, -1104356174, -1095846092) + + W(6, -1114002447, 1049556590, -1107260676, 1047165477) + + W(7, 1041624512, 1053416947, -1101255271, -1098759166); + sum2 = W(0, -1075440350, 1073165865, -1112383192, -1148975848) + + W(1, -1079714919, 1065983198, 1048707236, -1115591486) + + W(2, -1079134350, 1068716226, -1135853868, 1038054064) + + W(3, -1081735031, 1066499710, -1098543278, -1125971282) + + W(4, 1055569671, -1095308006, 1021808504, 1040311395) + + W(5, 1066585571, -1080754250, -1108920844, -1110109816) + + W(6, 1071644179, -1074800849, 1050977418, 1035370814) + + W(7, 1075476470, -1073191304, -1090378667, -1120408347); + WS(1041022776, 1033480094); + sum1 = + W(0, -1132576057, 1026327841, -1110434490, -1119031508) + W(1, -1123321429, 1042302896, -1109312954, 1021323174) + + W(2, 1028959481, 1052457903, -1089918670, 1029242558) + W(3, -1104176578, 1063677500, 1025377120, -1118638740) + + W(4, 1046472198, -1097607903, -1090934974, 1013298461) + + W(5, -1121060138, -1123802109, 1046523967, -1106699146) + + W(6, 1027556403, -1118671609, -1112257611, 1030036314) + + W(7, -1115611836, 1039698959, 1000531209, -1116635705); + sum2 = + W(0, -1106027239, 1049930690, -1110218398, 1034564779) + W(1, -1122286155, -1127267122, -1108798456, 1018367610) + + W(2, -1102546078, 1041115945, 1058786995, -1095860458) + W(3, 1055155948, -1091935564, 1075372513, 1043590800) + + W(4, -1096754888, 1052250618, -1071163371, -1112711414) + + W(5, 1032068992, -1103388917, -1123143101, -1119323979) + + W(6, -1121774513, 1038737216, -1121707895, 1016553114) + + W(7, 1036170969, -1108790214, 1032355550, 1017858173); + WS(1055618972, -1117202987); + sum1 = + W(0, 1030962480, 1039894129, -1101932469, 1031993659) + W(1, -1101414710, 1053882576, -1089999221, 1048656807) + + W(2, -1098100923, 1057347458, -1085784352, 1043352452) + W(3, -1111740150, 1049814740, 1051098296, 1031031542) + + W(4, 1036980413, -1090053902, 1056052448, -1104607674) + + W(5, 1041005827, -1088233382, 1051763396, -1119077705) + W(6, 1021681163, -1105970903, 1044301630, 1022017143) + + W(7, -1112834597, -1117339136, 1032801282, 1039235901); + sum2 = W(0, -1114659327, -1101541229, -1107338771, 1039378885) + + W(1, -1123577690, 1043634319, 1057974128, -1111523166) + W(2, 1042921002, 1066882360, 1050113378, 1040130625) + + W(3, -1105097716, -1077668162, -1080704784, -1104122760) + + W(4, 1042773509, 1037629509, 1051155360, 1012227066) + W(5, -1093975266, 1058638399, 1052705661, -1110420726) + + W(6, 1032061179, -1108208741, 1008396554, -1125218199) + + W(7, 1000405669, -1131856909, -1111189711, 1025207949); + WS(-1091387548, -1116324289); + sum1 = W(0, 1026642697, -1112220622, 1049897926, 1027958127) + + W(1, 1044682252, -1093973688, 1047211372, -1098544270) + + W(2, 1041967213, 1046046637, 1050365286, -1112662293) + + W(3, 1032214050, -1086442024, -1112122925, -1106755108) + + W(4, -1131062694, -1099996941, 1046223135, -1116559746) + + W(5, 1015214154, 1053177927, -1132096750, 1034258897) + + W(6, -1137069945, -1098783494, 1024489425, -1111108842) + + W(7, -1110158325, 1037567917, 1042021623, 1031638516); + sum2 = + W(0, -1108095393, 1011776651, -1101257730, 1019145070) + W(1, 1016776222, 1024100809, -1105403134, 1032882678) + + W(2, 977935538, -1097248934, -1101986254, -1124027994) + + W(3, -1122468710, 1057627889, 1060632600, -1103033100) + W(4, 1019959206, 1042194141, -1090719303, 1039533068) + + W(5, 1030099429, -1117226417, 1037412790, -1110127978) + + W(6, -1104930054, 1051928720, -1109292621, -1125661478) + + W(7, 1002749526, 1041814459, -1115385474, -1108291818); + WS(1057965518, -1118811194); + sum1 = + W(0, -1104352985, -1106566686, 1052732873, -1107147458) + W(1, 1038892389, -1112643723, 1003767366, -1114250980) + + W(2, -1105058276, 1026830542, 1060358287, -1096574819) + W(3, 1042490232, -1091938531, 1019887594, 1048844541) + + W(4, -1121521774, 1058056073, -1118338013, 1024329053) + + W(5, -1099594944, -1101314965, 1042464324, -1107780158) + + W(6, 1033536020, -1098044778, 1024562028, 1032778872) + + W(7, -1095161930, 1039936570, 1046000094, -1107207172); + sum2 = + W(0, -1116205334, 993944814, 1032713731, -1106373772) + W(1, 1042905425, -1113078065, -1104977559, 1039619019) + + W(2, -1109155755, 1044406596, 1048672381, -1115343154) + W(3, 1044205280, 1040105843, -1104277422, 1048332350) + + W(4, 1035597621, 1043112037, -1103087244, 999501191) + W(5, 1008636092, -1123380436, -1153474446, -1106439836) + + W(6, 1036458751, -1105268129, 1015784078, 1021528398) + + W(7, -1092853308, 1005260887, 1036094123, -1106473413); + WS(1053420700, 1049909457); + sum1 = W(0, -1115258034, -1106165874, 1050401361, -1113250117) + + W(1, 1034071238, -1089903413, 1041332866, 1023742249) + W(2, 1027276239, 1036166247, 1049384898, -1130552842) + + W(3, -1115686559, -1100423414, 1037380859, -1120158506) + + W(4, 1041096236, -1093491554, 1052388787, 1037333727) + + W(5, -1102788144, 1049835542, 1034784425, -1103514783) + + W(6, 1034068157, -1095323722, 1031952854, 1037170040) + + W(7, -1107460367, -1110396954, 1050882791, -1114330639); + sum2 = W(0, -1112500393, 1024557142, -1136367764, 1020782048) + + W(1, 1028289272, -1165623582, 1035333264, -1122738507) + + W(2, 1049127312, -1095250820, -1114028070, 1025362120) + + W(3, 1086771603, -1062269001, -1087155359, 1034731082) + + W(4, 1048088739, -1095068995, 1022077560, -1125007838) + W(5, 1020087240, 999933935, 1034746908, -1120796207) + + W(6, 1014851096, -1135945992, -1120864651, 1026594640) + + W(7, -1146529543, 1025716258, -1127622484, -1128255426); + WS(1063806286, 1027083983); + sum1 = W(0, -1114823180, 1045480194, -1118427516, -1109705966) + + W(1, 1030313470, 1039206373, -1106496703, 1020453816) + + W(2, -1111791457, 1061885616, 1010686340, -1103432478) + + W(3, -1111977289, 1050451993, -1083830979, 1039658735) + + W(4, -1108043857, 1057912708, -1099420545, -1114670584) + + W(5, 1019729830, 1036575619, -1105267089, -1109909656) + + W(6, -1114640622, 1039765761, -1131920190, -1113587473) + + W(7, 1033983240, 1034424532, -1125840468, -1112770081); + sum2 = + W(0, 1033978022, -1122290634, 1016780797, -1164938552) + W(1, -1100194899, -1091664523, -1103111994, 1002490582) + + W(2, 1033701105, -1071413373, 1047766898, -1105595809) + W(3, 1050426550, 1075108998, -1096749849, 1051489664) + + W(4, -1104028404, 1062092502, 1027350687, -1108336227) + + W(5, 1034191613, -1103668501, -1131589290, 1021076655) + + W(6, -1141130302, 1034149895, -1131724600, -1149103768) + + W(7, 1010895851, -1140709165, 1027888251, -1131210373); + WS(1058288590, 1045994186); + sum1 = W(0, 1018135640, -1093342072, 1052167946, 1038262801) + + W(1, -1123855059, -1096797372, 1041780692, 1015346466) + + W(2, 1040223430, -1086390197, 1052872019, -1132654257) + + W(3, -1112801779, -1105293320, 1059840396, -1134794696) + + W(4, 1036317947, -1091216419, 1051866430, -1117665797) + + W(5, -1098800665, -1117054299, 1050341107, 1012611090) + + W(6, 1034707665, -1096209979, 1048652350, -1139713758) + + W(7, -1096517871, 1023470716, 1050625648, 1010858330); + sum2 = W(0, 1041114210, -1099008397, -1123917868, -1125418381) + + W(1, -1099284356, 1047276236, -1111523652, -1120564910) + + W(2, 1038292116, -1081084758, 1037708884, 1020505319) + + W(3, -1092208278, -1071665325, -1086778305, -1100128811) + + W(4, 1054381469, 1054365119, -1101037019, 1024474915) + + W(5, -1104534119, 1081581342, 1026238413, -1125603475) + + W(6, 1025804573, 1062322644, -1134916894, 1025011807) + + W(7, 1033400256, -1093538640, 1052085127, -1103707544); + WS(-1088887374, 1070119449); + sum1 = W(0, 1040460421, 1052058645, -1090595183, -1105506686) + + W(1, 1041554590, 1048371361, -1115426747, 1029097335) + + W(2, 1013850612, 1048732808, -1094790815, -1129317061) + + W(3, 1038262022, 1042754705, -1087902678, 1001782464) + W(4, 1042710919, 1047384127, -1106656361, 1038072126) + + W(5, -1106869091, 1054439312, -1090477876, -1131475029) + + W(6, 1044864290, 1032508990, -1122940954, 1028935005) + + W(7, 1046057615, 1042772520, -1091795237, -1119515740); + sum2 = + W(0, 1024604999, 1056461606, 1071685965, -1075212110) + W(1, -1112923336, 1030035123, 1066713447, -1080779344) + + W(2, 1027433809, 1050727550, 1068313262, -1078194918) + W(3, 1025504127, -1090534096, 1028028856, -1153515923) + + W(4, 1007245204, -1105339223, -1081367671, 1068056044) + + W(5, -1144581942, -1095507508, -1078816510, 1069903211) + + W(6, -1129047399, 1043793174, -1081156501, 1064173276) + + W(7, 1021294865, -1095638178, -1090770344, 1060536751); + WS(1040470840, 1011790950); + sum1 = W(0, 1015095158, 1043725275, -1105535856, -1108386992) + + W(1, -1120734562, 1051465082, -1108441440, 1030149375) + + W(2, -1116691398, 1058372660, -1095343471, -1112701741) + + W(3, -1118217942, 1055309890, -1081260869, 1026499745) + W(4, 1019898057, 1046636817, 1047492563, 1032201014) + + W(5, -1123629847, 1044874098, -1093096561, -1115283258) + + W(6, 1034607368, 1043010193, -1105294873, 1028457510) + + W(7, 1041822001, 1040878843, -1106259483, -1112399171); + sum2 = + W(0, 1022830461, -1117997331, 1037270083, 1023767946) + W(1, 1021246145, -1110014342, -1125616185, -1130403951) + + W(2, 1015115055, -1111993293, 1044158772, 1025796328) + W(3, -1114703004, 1036613835, 1073620398, 1050812248) + + W(4, -1120910531, 1023720090, -1076883688, -1112499600) + + W(5, 1015720307, -1117604953, -1087890801, 1028283916) + W(6, 983370825, -1120463052, 1025754476, -1104874229) + + W(7, -1123770073, 1036937497, -1113287589, 1033737696); + WS(1058309838, -1101980246); + sum1 = + W(0, -1118965337, -1110907254, 1038642111, 1028903966) + W(1, -1098779704, 1049643561, 1038999679, -1108240479) + + W(2, 1049071256, -1081904943, 1054751182, 1040887994) + W(3, -1113399754, 1042163763, 1056093093, -1118832415) + + W(4, 1026571427, 1052138696, -1083956910, 1046797612) + + W(5, -1110569018, -1106220306, 1050507993, -1104697276) + + W(6, -1131332475, 1025191299, -1146247429, -1123883748) + + W(7, -1128371676, -1129449041, 1030864044, 1017502086); + sum2 = W(0, -1137655511, -1111106723, 1035105104, -1112318930) + + W(1, 1043191716, 1033166752, -1112822728, 1027572041) + + W(2, -1121349336, 1033268708, -1104857525, 1035699332) + + W(3, -1097517035, 1052576386, 1055574555, -1097542777) + + W(4, 1046822141, -1107318253, -1114990789, -1110157019) + + W(5, -1129223371, -1102887232, -1121581721, 1031007843) + + W(6, -1116759551, 1040601750, -1152414880, -1157281192) + + W(7, -1124684976, 993535634, -1107957817, 1025775603); + WS(1063710542, -1111213649); + sum1 = W(0, 1039559517, 1052251350, -1101160384, -1113889808) + + W(1, -1110192145, 1049489370, -1097695264, 1031901152) + + W(2, -1107309859, 1058328276, -1093740712, -1104513406) + + W(3, -1118762901, 1061120005, -1096321197, -1097003636) + + W(4, -1108199122, 1049656673, -1097289639, -1112381384) + + W(5, 1037245627, 1033876357, -1114972772, -1176476024) + + W(6, -1139769966, 1041987905, -1102516745, 1022802380) + + W(7, 1035326852, 1045428971, -1103522251, -1115185874); + sum2 = W(0, 1026803387, -1115186477, 999853755, -1112425236) + W(1, 1021750253, 1031459540, 1031446318, -1106593609) + + W(2, 1040812059, -1116326399, 1058720440, -1086524651) + + W(3, -1107480079, 1040651083, 1071698983, -1075753828) + + W(4, 1041131835, 1020594503, 1051128601, -1096266051) + + W(5, -1115082464, 1033768858, 1034660038, -1108942599) + + W(6, 1006187407, -1132516997, 1027131696, -1123360193) + + W(7, 1001155939, 1015478283, 1041372466, -1104453127); + WS(1039772272, -1082982873); + sum1 = W(0, 1039127452, 1049032497, -1098139952, -1155633625) + + W(1, -1106264016, 1053169593, -1088020664, 1043520647) + + W(2, -1104546353, 1060522592, -1087153807, 1040400554) + + W(3, -1145518233, 1056693961, 1038834197, -1139907193) + + W(4, 1041713673, -1095982984, 1053429837, -1104883452) + + W(5, 1044410893, -1088607916, 1048228018, -1103034636) + + W(6, 1039725795, -1099512929, 1030077856, -1134131855) + + W(7, 1007790526, -1120546088, -1130616150, 1012687882); + sum2 = + W(0, 1032987173, -1092902636, 1027660911, 1032111389) + W(1, -1102803435, 1037122437, 1039508875, -1101516234) + + W(2, -1097110538, -1083842457, -1115097563, 1001644292) + + W(3, -1101871105, 1067610414, 1067552095, -1104358647) + + W(4, -1101555589, 1033006847, -1096151174, -1145661076) + + W(5, -1123533577, 1041814167, 1026567371, -1104681426) + + W(6, -1098769512, 1045724867, -1107225085, 1020958965) + W(7, 1048796624, 999019252, -1105667947, 1028203943); + WS(-1101412664, 1073047832); + sum1 = W(0, -1113880945, 1059007377, -1091904865, -1133362719) + + W(1, -1123950537, 1047579477, -1097814305, -1131741454) + + W(2, -1109995975, 1060416389, -1087199740, 1032803377) + + W(3, -1123565085, 1056063051, -1092124214, -1113780884) + + W(4, -1123728428, 1046383171, -1099567891, -1113889390) + + W(5, 1035385983, 1040643140, 1038341800, -1126485237) + + W(6, -1115318676, 1033389702, -1119557930, 1012558775) + + W(7, 1010675502, 1041003725, 1042882861, -1096461162); + sum2 = W(0, 1013686761, 1030949359, -1109223248, -1132915573) + W(1, -1123503757, 1039110679, 1035635962, 996209923) + + W(2, 1035262859, -1105775253, -1106751414, 1029135163) + + W(3, -1115490710, 1027204409, 1057425426, 1070858778) + W(4, 1025861040, 1019519396, 1043312829, 1067133266) + + W(5, 980681483, -1111394964, -1098000154, -1098902818) + + W(6, -1125276038, 1024413043, 1051287795, -1083268694) + + W(7, -1129892230, -1126757058, -1103421662, -1073095921); + WS(-1103532344, -1105840701); + sum1 = + W(0, -1106756472, -1105825052, 1050195304, 1044840709) + W(1, -1106322913, 1053932942, 1007328528, -1104890465) + + W(2, 1029810954, -1086676361, 1057202097, -1144507642) + + W(3, -1096389739, -1098358914, 1054492326, -1098185309) + + W(4, -1116067305, 1054020609, -1093369370, 1032208700) + W(5, 1035916925, -1096691570, 1012599092, 1048715303) + + W(6, -1105566449, 1051257001, 1027855335, -1100267787) + + W(7, 1034505108, 1026552303, -1101687900, 1050640157); + sum2 = W(0, -1111707317, -1101137684, -1116414033, -1103752872) + + W(1, -1119262447, -1109186725, -1107841286, 1034593530) + + W(2, 1037989791, 1049211868, 1056722210, -1110917586) + + W(3, 1056036881, -1099242284, -1094543747, 1049861706) + + W(4, -1105645897, 1038198606, 1053421235, 1030171051) + + W(5, 1006194414, -1118866635, -1104714858, -1103907620) + + W(6, -1146663095, -1109483964, -1120693058, 1032413269) + + W(7, -1129479912, -1114787638, -1115357885, 1029169157); + WS(1045178680, -1115117954); + sum1 = W(0, -1102680211, 1048681528, 1019008549, 1008490315) + + W(1, -1123018420, -1113607536, 1040296293, 1012835273) + + W(2, -1116849368, -1092105355, 1035615004, 1027033246) + + W(3, 1024407336, -1093530556, 1061339424, -1128145511) + + W(4, -1107241006, -1096193779, 1057711678, -1103222073) + + W(5, 1026392201, -1102060551, 1055864515, -1110547491) + + W(6, -1107918421, -1102337510, 1045891981, -1110097884) + + W(7, -1123314455, -1095199752, 1052562824, 1020395952); + sum2 = W(0, -1108321996, -1096948503, -1112108000, 1026091852) + + W(1, -1115537892, -1104693583, -1112710060, 1022753169) + + W(2, -1106228104, 1047339287, -1126080161, -1120501740) + + W(3, 998645667, 1055675007, 1051246853, -1105262859) + W(4, -1112292909, 1049114511, 1041989299, -1123292948) + + W(5, 1035781218, -1100682049, 1025978716, -1139782977) + + W(6, -1111036660, 1032758858, 1034053890, -1111667328) + + W(7, 1018530825, -1113969306, -1113712936, 1033000622); + WS(1044590904, 1058699692); + sum1 = W(0, 985175380, -1092606720, 1051210502, 1009220799) + W(1, 1034192409, -1090621088, 1050384326, -1109361592) + + W(2, 1020554347, -1093618783, 1055009987, -1117059707) + + W(3, 1024014533, -1113905855, 1062574818, -1128567457) + + W(4, -1115433194, -1096920415, 1054554325, 1015981863) + + W(5, -1128687821, -1112503613, 1031218808, 1039114097) + + W(6, 1008304190, -1100279725, -1105751509, -1127205548) + + W(7, -1115445028, -1101027255, -1144235755, 1035507338); + sum2 = W(0, 1036009101, -1108619644, 1010492213, -1131279719) + + W(1, -1117283755, 1041337919, -1100684466, 1035041542) + + W(2, 1033667347, 1012813669, 1015579759, -1124020763) + W(3, -1107239966, 1053310286, 1056064408, 1025470531) + + W(4, 1034685217, -1103814148, 1050883237, 1032198922) + + W(5, -1128406639, 1029066827, -1113069964, -1104788458) + + W(6, -1123321687, -1109499964, -1098878001, 1024066141) + + W(7, -1125402335, -1123197815, -1096911819, -1108347132); + WS(1044030776, -1089132931); + sum1 = W(0, 1034269487, -1093699058, 1045160768, 1040395197) + + W(1, 1032883970, -1097458090, 1050995424, -1135242887) + + W(2, 1042439154, -1086092656, 1057188990, 1040422743) + + W(3, 1044751430, -1078975458, -1096290982, 1048952172) + W(4, 1019707982, 1035194756, 1032938655, 1042217766) + + W(5, 1041434446, 1038040390, 1043682915, 1043586045) + W(6, -1133288904, -1131544335, 1022768458, 1029073256) + + W(7, -1128708109, -1101499019, 1043305900, 1044718778); + sum2 = W(0, 1043294282, -1107262777, -1132728616, -1112978295) + + W(1, -1127265324, -1107127922, -1133435576, -1101649503) + + W(2, 1035036911, 1048700262, -1115280193, -1142433873) + + W(3, 1051203976, -1114081637, 1060204569, -1138065032) + + W(4, -1092441683, 1027047222, -1096051714, 1049542158) + + W(5, 1037063919, -1087538045, 1059699424, 1027406094) + + W(6, -1095318743, 1043898666, -1091030613, 1036599707) + + W(7, -1096741875, -1114635611, 1057805441, -1117298502); + WS(-1083489614, -1078579141); + sum1 = W(0, -1113694287, -1091060438, 1048678744, 1022130309) + + W(1, 1048897616, -1104759380, 1055607032, 1045345264) + + W(2, -1104500498, -1089217572, 1025763911, -1098656951) + + W(3, 1029397720, -1096433234, 1056656214, 1045280313) + W(4, 1039983475, -1090900481, 1055730826, 1003335600) + + W(5, -1099529286, -1103761763, 1039576037, -1104729181) + + W(6, 1047818144, -1126261903, 1050842685, 1045845834) + + W(7, -1100400745, -1098862766, 1044462332, -1124798927); + sum2 = W(0, 1034867092, -1113896283, -1089397746, 1053130734) + + W(1, 1036831152, 1008734783, -1143881728, 1048547317) + + W(2, -1155378720, -1107821706, -1090933436, 1050664536) + + W(3, 1050005016, 1036439980, 1067046868, -1081990202) + + W(4, -1109019109, -1105188391, 1074023168, -1073616958) + + W(5, -1107857756, -1103852014, 1073004488, -1073650181) + + W(6, 1025727369, -1113326142, 1071596064, -1078582463) + + W(7, -1105627363, 1036502992, 1068047188, -1079035066); + WS(-1123566816, -1118788492); + sum1 = + W(0, 1034266605, -1131696345, -1102436480, 1029920799) + W(1, -1136553988, 1054570946, -1101485920, 1027694719) + + W(2, 1032767842, 1043367745, -1093672570, 1029228634) + W(3, -1119057557, 1057910725, -1081257723, 1038070160) + + W(4, 1032596498, 1051309065, -1104247863, 1041012541) + + W(5, -1138237144, 1050053561, -1097237249, -1104490509) + + W(6, 1034235800, 1044754161, -1100737384, 1036672371) + + W(7, 1038972738, 1047609962, -1118734125, -1101643609); + sum2 = W(0, 1001205015, -1108935456, -1123729961, -1151981614) + + W(1, 1006355343, 1048575251, 1037564428, -1123825361) + + W(2, 1030505158, -1114918170, -1096130861, 1015236500) + + W(3, -1119701641, -1088419213, -1068662368, -1095410362) + + W(4, 1039794598, 1032736312, 1076967646, 1049774729) + W(5, -1107202062, 1045905661, 1068403675, -1105335733) + + W(6, 1026836118, -1116946341, -1104305708, 1050153445) + + W(7, -1115440174, 1003805295, -1124006983, -1103176791); + WS(-1103567160, 1051728620); + sum1 = + W(0, 1022317012, 1037463598, -1094952698, 1033922872) + W(1, -1105517764, 1027268637, -1107670055, 1024776175) + + W(2, -1109296554, 1052072875, -1103880757, 1038888053) + W(3, 1030946149, -1094107172, 1052944567, 1043766492) + + W(4, 1032988986, -1087396213, 1057943536, -1110506946) + + W(5, -1117957958, -1098739651, 1052382091, -1108829569) + + W(6, 1025729823, -1104965205, 1036164793, -1132271979) + W(7, 1009731405, 1017302004, 1041821631, 1015545129); + sum2 = + W(0, 1031290011, -1127050317, -1113481669, -1080536402) + W(1, -1114551402, 1026829489, 1042490649, -1076466135) + + W(2, -1137203270, 1032334734, 1043198364, 1038797044) + W(3, 1025723227, -1098446681, -1127701979, 1074254195) + + W(4, 1020823723, 1041359246, 1057793899, 1035340712) + W(5, -1107112701, 1041376772, -1100591458, -1118262367) + + W(6, -1156747895, 1034398482, -1114292550, 1013380262) + + W(7, -1140368490, -1104694439, 1037536680, 1032104302); + WS(-1106439480, 1029090439); + sum1 = + W(0, 1034559195, 1042655458, -1102396839, 1026400220) + W(1, -1151887836, 1030625962, -1115609614, 1035325646) + + W(2, 1020849475, 1059377738, -1089162129, 1042909598) + W(3, 1042053132, -1084013087, -1083731653, 1039293086) + + W(4, 1043127920, -1107145304, 1052971191, 1029716960) + W(5, 1043814293, 1032281331, -1107154457, 1024778063) + + W(6, 1037260390, 1040913704, -1113717087, 1028666946) + W(7, 1043715864, 1033172451, 1017680531, -1130742978); + sum2 = + W(0, -1129107524, 1021999108, 1022835844, -1129901940) + W(1, -1123636938, -1148550480, 1016982596, -1140398984) + + W(2, -1116921954, -1104180688, 1016773924, 1030858450) + W(3, 1035143287, 1058583951, 1054454385, -1117386658) + + W(4, 1036996009, -1112895549, -1097314323, -1108156993) + + W(5, 1026655402, -1116180274, -1099699990, -1118751234) + + W(6, -1117010042, -1131502740, -1132536424, 1016259828) + + W(7, -1126794884, -1138164712, -1129763396, -1117013506); + WS(1048151864, 1059242544); + sum1 = + W(0, -1122367849, -1097114939, 1043794348, 1026875087) + W(1, -1121466768, -1097911384, 1033810391, 1014185912) + + W(2, 1042165293, -1085899247, 1063564267, -1106211614) + + W(3, -1116044868, 1042506911, 1042284551, -1111561384) + + W(4, -1105437493, 1056857214, -1089674786, 1040868541) + W(5, 1037358179, -1105063934, 1049648847, 1028107682) + + W(6, -1125163356, -1123578292, 1021855394, -1111395274) + + W(7, -1111398955, -1106114902, 1045389303, 1010402126); + sum2 = W(0, -1139648101, -1110557326, -1106733442, 1039330638) + + W(1, -1108856547, 1041627874, -1106727608, 987619817) + W(2, 1015236663, 1032833232, 1048813377, 1025349119) + + W(3, -1095822659, 1054902128, 1055403310, -1095503796) + + W(4, -1137977893, -1098765424, -1097123375, 1050144719) + + W(5, 1038373686, -1121592121, 1045561320, -1108448514) + + W(6, 1015865663, -1106594443, -1106386080, 1041979856) + + W(7, 1025690243, -1117564251, 1017823319, -1105901385); + WS(1058511566, 1043187024); + sum1 = + W(0, 1042620242, 1041476871, -1106861387, 1043942651) + W(1, 1027981614, 1044640335, -1120607404, 1040914531) + + W(2, 1037513401, 1039039385, -1101062960, 1040902562) + W(3, 1050564856, -1085132912, -1089194338, 1048598306) + + W(4, 1040390932, -1095196494, 1032203084, 1019897185) + W(5, 1044188393, -1116362934, -1134658518, 1032634704) + + W(6, 1032851398, -1107832665, 1026497359, 1027555603) + + W(7, 1043046140, -1105825814, -1105488567, 1032742569); + sum2 = W(0, -1094808435, -1083260450, 1041967320, 1026111290) + + W(1, -1132385524, -1089624726, -1102801635, 1034887813) + + W(2, -1110994277, -1098133391, 1050258559, -1123786474) + + W(3, 1039748829, 1063143581, 1037743779, 1033106845) + W(4, 1038866141, 1043371703, 1048943341, -1110363077) + + W(5, 1012861192, 1040218148, -1112998265, -1121754906) + W(6, 1030487914, 1012758152, 1031702418, 1012203560) + + W(7, -1132742376, 1052570990, -1105604919, -1111765769); + WS(-1087236686, 1061665912); + sum1 = W(0, -1115274057, 1023071688, -1107980923, -1119301463) + + W(1, -1116162424, 1030200068, -1101866270, -1128116268) + + W(2, -1104461343, 1054050807, -1092884195, -1109238785) + + W(3, -1093387666, 1067462106, 1056570668, -1095571507) + + W(4, -1113556238, 1050597447, 1044256537, -1103733247) + + W(5, -1115159031, 1031104576, -1106234576, -1111522325) + + W(6, -1126942285, -1121547516, -1128835891, -1111458030) + + W(7, -1121543315, 1042315272, -1114796114, -1109780718); + sum2 = + W(0, -1127633710, 1024134807, -1114424195, -1109129714) + W(1, 1024854732, 1020748524, -1098546148, 1025355297) + + W(2, -1123781128, -1117094752, -1079772750, -1102118069) + + W(3, 1040493279, 1065417820, 1059484680, -1114766469) + W(4, -1105896535, 1047826025, -1107504040, 1033237727) + + W(5, 1033983514, -1102101577, 1049649199, -1154145757) + + W(6, -1122459292, 1040101538, -1105983778, 1012710689) + + W(7, -1120616672, -1124130304, 1041220419, -1123999628); + WS(-1092106140, -1084163121); + sum1 = + W(0, 1036347305, 1049849489, -1101083230, -1106738110) + W(1, 1016904817, 1040213041, -1152694122, -1115216475) + + W(2, 1034414494, 1056746079, -1083169173, 1039797369) + W(3, 1040590819, 1048812489, -1107117385, 1022569263) + + W(4, 1047670567, -1084779712, 1051667767, -1112982247) + + W(5, 1030378702, 1040726752, -1109066592, -1125038036) + W(6, -1114654031, 1033236350, 1024925978, 1025136605) + + W(7, 1037868673, 1034988241, -1107106473, -1146668256); + sum2 = W(0, -1144361879, -1111008682, -1092072719, 1021883158) + + W(1, -1128843246, -1134810723, -1121102479, -1116668390) + + W(2, 1005924247, 1041181160, 1045581242, -1109114373) + + W(3, -1104940693, 1054697191, 1046555600, -1113715104) + + W(4, -1123234701, 1052731012, -1103970223, -1128555074) + + W(5, 1030348431, 1042931729, -1104348933, 1032936433) + + W(6, 1032437115, -1112071203, -1113690885, -1136719027) + + W(7, 1024671661, -1111460488, -1129885552, -1114960949); + WS(1058637774, -1109876822); + sum1 = + W(0, -1123254812, 1042623060, -1126486401, -1112485473) + W(1, 1041301297, -1105094644, -1111052512, 1037949088) + + W(2, -1108082435, 1059472952, -1094910408, 1016105496) + + W(3, -1113241451, 1043969956, -1094009726, 1038892497) + + W(4, -1110973875, -1113269955, 1058531737, -1093047519) + + W(5, 1027216056, 1026816394, -1100061186, 1032398170) + W(6, 1014894339, -1130602892, 1032486130, -1128303438) + + W(7, 1019588394, 1026167148, -1117291247, 1021704844); + sum2 = + W(0, 1019183838, -1103677237, 1041385745, -1115238116) + W(1, 1031923668, 1026573624, 1025625867, 1024201143) + + W(2, 1049015983, -1101357077, -1095136476, 1024442030) + + W(3, -1096362417, 1060010174, -1096248497, -1122837727) + + W(4, 1005830618, -1122620605, -1115221092, 1034369966) + W(5, 1036998273, -1106684685, 1049273019, 1031554545) + + W(6, -1119396253, 1023743335, -1112424814, 1004099770) + W(7, -1125844722, 970656667, 1011280453, 1010302645); + WS(1068382951, -1145853862); + sum1 = W(0, -1109092968, -1104283584, -1126224104, -1142984099) + + W(1, -1120835030, -1104742016, 1048835390, 1035504970) + + W(2, 1015957108, -1087137628, 1060142968, 1044651720) + W(3, 1033165317, -1082841532, 1050512116, 1036256010) + + W(4, 1015780402, 1048723479, 1050625794, 1040271692) + + W(5, -1098982451, -1119342334, 1046183055, -1112268532) + + W(6, -1106789039, 1037068029, 1033167819, -1115364877) + + W(7, -1105810191, 998682871, 1042826568, -1113754717); + sum2 = + W(0, 1042289605, -1105845074, -1105090355, 1041479887) + W(1, -1123754179, 1032442894, -1106552079, 1028813354) + + W(2, 1040865617, -1090127653, -1075116096, -1087510643) + + W(3, 1025996190, 1055606718, 1073431719, -1084272058) + W(4, -1100913998, -1104101067, 1067916428, 1045036562) + + W(5, -1118459936, 1026136504, 1042119939, -1131666845) + + W(6, -1114401734, 1031986833, -1112990087, 1021173748) + + W(7, -1118182115, 1045409779, 1031635830, -1131440550); + WS(-1115823328, -1115141930); + sum1 = W(0, 994860217, -1096937968, 1034643656, 1022968545) + W(1, 1043355533, -1093401724, 1045563268, -1100277972) + + W(2, 1046651158, -1082957735, 1056739037, -1106135459) + + W(3, 1031967699, 1057829358, 1053028032, -1113373017) + + W(4, -1112906011, 1054780763, -1085595451, 1036473238) + + W(5, -1139800861, 1049387203, -1100812438, 1042166401) + + W(6, 1032268536, 1029694230, -1112691098, -1109328859) + + W(7, 1021673189, 1032667294, -1163065290, 1026417494); + sum2 = + W(0, 1041009418, 1006794492, 1023178506, -1126244586) + W(1, -1106230161, 1034312638, -1120362931, 1021996758) + + W(2, -1113994011, -1108126219, -1105213924, -1131146954) + + W(3, -1100787264, 1060460225, 1054015707, 997147503) + W(4, -1102450993, -1119222967, 1053508358, -1107185372) + + W(5, -1114680808, -1097084093, -1133314452, -1126416406) + + W(6, 1031764893, -1125377404, -1141774536, -1126617106) + + W(7, -1145068408, -1105852265, -1120752408, -1131039214); + WS(1044911928, 1063248560); + sum1 = + W(0, -1117210934, -1089987573, 1050931427, -1154913199) + W(1, 1045807559, 1006551364, 1049202419, 1049427019) + + W(2, -1116245664, -1085623154, 1044690392, -1098224261) + + W(3, -1122601008, -1098705597, 1056494849, 1043742399) + W(4, 1041679280, -1089608778, 1054654497, 1009817082) + + W(5, -1102494868, -1098692501, 1047881833, -1102952115) + W(6, 1044187563, 1040126000, 1043119951, 1048685800) + + W(7, -1101059428, -1100421777, 1043071666, -1132265536); + sum2 = W(0, -1110897864, 1031814971, 1060526454, -1090228093) + + W(1, -1104703066, -1126406622, 1034850394, -1097166620) + + W(2, 1039476246, -1112535334, 1060516426, -1091926928) + + W(3, -1102025112, -1088994686, -1088701567, 1063814816) + + W(4, 1041770976, 1050082866, -1072347435, 1074296249) + W(5, 1044782005, 1038057456, -1075073775, 1073862535) + + W(6, -1112640404, 1029663347, -1077197270, 1067261757) + + W(7, 1045848501, -1098794387, -1081012351, 1067785227); + WS(-1105546040, 1026157880); + sum1 = + W(0, 1040950836, 1040614249, -1102499357, 1036650571) + W(1, 1036866652, 1043435937, 1032157499, 1032871003) + + W(2, 1045419231, 1048836647, -1088963702, 1035867609) + W(3, 1057171433, -1092874056, -1078642173, 1040635958) + + W(4, 1039696013, 1049109967, -1094603626, 1039966133) + W(5, 1040737235, 1037560064, 999044599, 1035782037) + + W(6, 1031337694, 1039556030, -1107844556, 1034875342) + W(7, 1043417390, 1020658754, -1101965321, 1029450491); + sum2 = + W(0, 999804672, 1033414560, -1131242272, -1111239120) + W(1, 965076992, -1142972544, -1152459008, -1114613368) + + W(2, 1017997216, -1117537424, 1043660652, 1024438240) + W(3, -1113573416, -1080122522, 1068973644, 1009242816) + + W(4, 1015814944, -1116987776, -1113471168, 1026341216) + W(5, 1013150208, 1009041344, 1023678672, -1111376032) + + W(6, 1004325632, 1030258512, -1115573696, -1124670336) + + W(7, -1116017408, -1123867424, 1025739248, 1011147520); + WS(-1081027239, 1060388068); + sum1 = + W(0, 1040945153, 1049191505, -1103567931, -1142725077) + W(1, 1018532824, 1052335227, -1106690587, -1126830164) + + W(2, 1048158700, 1053224518, -1098210230, 1044049241) + W(3, 1050078256, -1089652372, -1079677805, 1037048166) + + W(4, 1043330564, 1050009062, -1090769236, 1037549983) + W(5, 1045983912, 1019623144, -1102526621, -1124021470) + + W(6, 1034126717, 1043374892, -1113976903, 1031972104) + W(7, 1042366848, 1037373643, -1106920486, 1000910717); + sum2 = W(0, 1029289565, -1119356133, -1126159585, -1119814653) + + W(1, 1023484285, 1031916514, -1111485988, -1133461586) + + W(2, 1041097307, -1120749829, -1121314077, -1112201820) + + W(3, 1044843621, 1057057740, 1024531885, 1001931237) + W(4, -1101664529, 1055633817, -1102656055, 1035253182) + + W(5, -1108647964, -1098024997, 1016983721, 1019582313) + + W(6, -1111945130, 1038083406, -1105960100, 1011319026) + + W(7, -1129898257, -1101101054, 1022769465, -1110859200); + WS(-1121436896, -1083449266); + sum1 = + W(0, -1131437006, 1057845458, -1096100393, -1112818966) + W(1, -1126965659, 1026072602, -1106937331, 1025271436) + + W(2, -1106898372, 1058188821, -1089458543, -1120770471) + + W(3, 1049661628, 1047051462, -1093909527, 1039868950) + + W(4, -1110051320, 1036301673, -1105302990, -1101852248) + + W(5, 1048826500, 1049513187, -1097831955, 1048680544) + W(6, 1018834023, 1026266705, -1115312055, -1111779257) + + W(7, 1030351624, 1043516478, -1094725407, -1137214404); + sum2 = W(0, 1025140224, -1118806788, -1126785121, 1031495588) + W(1, 1016993788, 1030590564, -1118559458, 991152164) + + W(2, -1106156741, 1042721449, -1106137914, 1038491925) + + W(3, -1085967118, 1078987048, -1100137345, 1040984057) + + W(4, -1070014283, 1078555235, 1044823121, -1111978905) + + W(5, -1066270592, 1069325982, -1105174392, 1030579932) + + W(6, -1089828448, 1051485796, -1121082393, -1113499951) + + W(7, -1115300709, 1026006822, 1011662011, -1120641633); + WS(1060473294, -1125032523); + sum1 = W(0, -1118381578, 1043924207, 1028842287, -1103983036) + + W(1, -1131528546, 1048790648, -1094915206, 1032416951) + + W(2, -1103035725, 1057928103, 1048586166, -1131359957) + + W(3, 1044891665, -1096003632, -1089668066, 1041536420) + + W(4, 1017562024, -1104437021, 1048997587, -1119623443) + + W(5, 1038728371, -1127678449, -1106021950, 1026802338) + + W(6, 1013193724, -1138265298, 1016196772, -1112764523) + + W(7, 1017619335, 1019650740, -1112334062, 1028850580); + sum2 = W(0, -1124013145, 1023871771, 1041717973, 1027246941) + + W(1, -1154979909, -1104183315, -1104961174, -1105906795) + + W(2, -1113455493, 1024353365, -1071159846, -1114760758) + + W(3, 1042673063, -1099433949, 1075222357, 1048045634) + W(4, 1009341589, 1037029004, 1061754151, -1101645527) + + W(5, -1131887235, -1106752952, -1106459992, 1027783079) + + W(6, -1126494367, -1136822537, 1037905746, -1130069351) + + W(7, -1120394460, -1135604889, 1020599743, 1023779471); + WS(1063175758, 1049951270); + sum1 = W(0, 1051934199, -1084051495, 1040719576, 1028219745) + W(1, 1041620825, -1091045702, 1046456792, 1028165760) + + W(2, 1053157077, -1089200998, 1054477181, -1110307566) + + W(3, 1019501037, -1103613676, 1050464826, 1037889601) + + W(4, -1121035141, -1103906809, -1098419444, 1045652053) + + W(5, 1024394007, 1042850683, -1097805325, 1054554722) + W(6, 1034798936, -1117239270, 1011564046, 1031152711) + + W(7, 999909159, -1106705027, -1091232034, 1056053333); + sum2 = W(0, -1091471926, 1062079447, 1055700238, 1015596856) + W(1, 1042259987, -1095572989, 1041645665, 1033532620) + + W(2, -1103057728, -1089203271, 1037140407, 1035045090) + + W(3, 1049412228, -1092861102, -1093629070, 1042934527) + + W(4, 1044973062, 1034556296, -1095940003, 1032040702) + + W(5, -1116401558, 1047552828, -1106623325, -1106239816) + + W(6, 1011000463, -1110031497, -1107294450, 1033073048) + + W(7, -1130123924, 1051271629, 1053844390, -1095799213); + WS(-1085388366, -1090694979); + sum1 = W(0, -1123787314, 1044273497, -1108110651, -1115475932) + + W(1, -1119481145, -1130943626, 1041576542, -1117135031) + + W(2, 1032777214, 1047368143, -1087220302, 1040347561) + + W(3, -1112850502, 1059845268, 1027588771, -1101064470) + + W(4, 1041751591, -1095680144, 1036323946, 1037523789) + + W(5, -1125429276, 1039407200, -1111657568, -1126225621) + + W(6, -1140788444, 990533574, 1021334836, -1134357621) + + W(7, -1113892940, 1032644049, -1143449895, -1109271006); + sum2 = W(0, -1107190004, 1037736456, -1125693587, -1120404934) + + W(1, 1013881877, -1105376838, 1040619572, -1122228614) + + W(2, -1118295314, -1123416196, -1082924015, 1051908042) + + W(3, -1121103222, 1042717593, 1068021664, -1087341114) + + W(4, -1127822751, -1115549939, 1016978358, 1042189807) + + W(5, -1115317828, 1043660085, -1108863865, -1125959243) + + W(6, -1126179247, -1126402007, -1136349779, 1038966556) + + W(7, -1114801766, 1040920849, -1107267301, 1023515477); + WS(1065904679, -1122628785); + + return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); +} + +shared float inp[429]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { temp[pos] = (value); } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 11 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 429; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 11, y = (uint)id % 11; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (3)) + 0.5, float(group_base.y + y - (1)) + 0.5)).x; + } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[8]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 11]; + samples[1][1] = inp[local_pos + 12]; + samples[1][2] = inp[local_pos + 13]; + samples[1][3] = inp[local_pos + 14]; + samples[2][0] = inp[local_pos + 22]; + samples[2][1] = inp[local_pos + 23]; + samples[2][2] = inp[local_pos + 24]; + samples[2][3] = inp[local_pos + 25]; + samples[3][0] = inp[local_pos + 33]; + samples[3][1] = inp[local_pos + 34]; + samples[3][2] = inp[local_pos + 35]; + samples[3][3] = inp[local_pos + 36]; + samples[4][0] = inp[local_pos + 44]; + samples[4][1] = inp[local_pos + 45]; + samples[4][2] = inp[local_pos + 46]; + samples[4][3] = inp[local_pos + 47]; + samples[5][0] = inp[local_pos + 55]; + samples[5][1] = inp[local_pos + 56]; + samples[5][2] = inp[local_pos + 57]; + samples[5][3] = inp[local_pos + 58]; + samples[6][0] = inp[local_pos + 66]; + samples[6][1] = inp[local_pos + 67]; + samples[6][2] = inp[local_pos + 68]; + samples[6][3] = inp[local_pos + 69]; + samples[7][0] = inp[local_pos + 77]; + samples[7][1] = inp[local_pos + 78]; + samples[7][2] = inp[local_pos + 79]; + samples[7][3] = inp[local_pos + 80]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 34]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2) + ivec2(0, 1), ret); +} +//!PASS 2 +//!DESC NNEDI3 (double_x, nns128, win8x4) +//!IN INPUT, temp +//!OUT OUTPUT +//!BLOCK_SIZE 64, 8 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[8]) { + float sum = 0.0, sumsq = 0.0; + [unroll] for (int i = 0; i < 8; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); + sumsq += dot(samples[i], samples[i]); + } + float mstd0 = sum / 32.0; + float mstd1 = sumsq / 32.0 - mstd0 * mstd0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); + mstd1 *= mstd2; + float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = W(0, -1106336704, -1134422686, -1097058997, -1096794255) + + W(1, -1096428577, -1108910446, -1110459731, -1101174078) + + W(2, -1104338237, -1108550970, 1036063029, 1068454954) + + W(3, 1045547931, -1102009308, -1122481515, 1017740065) + + W(4, 1041734649, -1135080854, 1053379177, 1068510503) + + W(5, -1114733521, -1146436468, -1107441476, -1113618698) + + W(6, -1112016931, -1117701445, -1099451832, -1092648385) + + W(7, -1101312270, -1118387353, -1118789516, -1107281979); + sum2 = W(0, -1107960738, 1022891960, -1112828174, -1099542471) + + W(1, 1028527356, -1124169384, 1003609248, 1019646072) + W(2, 1034572820, 1003346080, -1096749055, 1067566060) + + W(3, -1090311143, 1028295012, -1101382023, -1102277943) + + W(4, -1105088633, -1103397515, 1056062200, -1127174872) + + W(5, 1048792675, -1106700271, 1033154740, 1037356874) + + W(6, -1147785184, 1035529310, -1105585021, 1034766166) + + W(7, -1117109292, -1116900804, -1121486356, -1114899026); + WS(-1075683047, -1087125913); + sum1 = W(0, 1042687663, 1034752002, 1020877136, 1043598702) + W(1, 1032834596, 1049059763, 1041786741, 1050972591) + + W(2, 1052977914, 1046022844, 1051043629, 1049374510) + + W(3, 1044576234, -1108783446, -1105784362, -1089957133) + + W(4, -1087267578, -1097639380, -1087388173, -1095656786) + + W(5, -1108370107, -1101488315, -1111228387, -1108747496) + + W(6, 1041384601, 1034807969, 1037740431, 1044538321) + W(7, -1113942663, 1043415765, 1011031587, 1034104676); + sum2 = W(0, -1129327146, -1149557934, 1027469999, -1116270499) + + W(1, -1107094547, -1114668504, -1136313139, -1115408970) + + W(2, -1112916736, -1120018795, -1105442406, 1056936993) + + W(3, 1036431709, 1029410453, -1126491682, -1112281372) + + W(4, -1109246006, 1012425107, 1044505132, 1054092693) + + W(5, 1015080154, -1102188513, -1123974759, -1115235380) + + W(6, -1168639863, -1122303521, -1114911422, -1109477794) + + W(7, -1122292075, 1038663029, -1124857486, 1004657207); + WS(1042212664, -1102203684); + sum1 = + W(0, -1140902818, 1019874102, -1138465404, 1041713132) + W(1, 1031911135, 1032244040, -1122713961, 1032066151) + + W(2, 1040147851, -1104962202, -1102798136, -1085739950) + + W(3, -1090423514, -1096835716, 1015091005, -1100137137) + W(4, 1035472140, 1045102086, 1044852023, 1052255697) + + W(5, 1050093153, -1114874577, -1125530488, 1025859399) + + W(6, -1111794429, 1033944684, -1122108180, 1042129278) + W(7, 1026181805, 1045351411, 1045226448, 1037723887); + sum2 = + W(0, 1025219442, -1119436548, 1041363772, 1026037858) + W(1, -1106963956, -1148187164, -1115265228, 1027834104) + + W(2, -1109152196, -1122141349, 1032935700, -1095545125) + + W(3, 1029288712, 1048235476, 1017807431, -1124373883) + W(4, 1026944460, 1040314652, -1124096755, -1132954458) + + W(5, 1063106013, 1044093468, -1119277141, -1115291573) + + W(6, -1136900198, -1112897189, -1144192892, 1046665648) + + W(7, -1112667488, -1091150222, -1098670428, -1099776205); + WS(-1106435384, 1038338229); + sum1 = W(0, 1040095954, 1019822615, 1037825128, 1041126536) + W(1, 1042659628, 1046281616, 1025627498, 1052688116) + + W(2, 1057302312, 1048464174, 1048602738, 1046177110) + + W(3, -1113571175, 1010729037, -1116720143, -1090676327) + + W(4, -1085504617, -1109673285, -1088007186, -1089584127) + + W(5, -1112427435, -1099904421, 985851280, -1109333660) + W(6, 1038469973, 1008999077, 1047759857, 1042217259) + + W(7, -1122205838, 1042535855, 1020820192, 1034203799); + sum2 = W(0, 1016257202, 1025056091, -1123344133, -1117490597) + + W(1, -1139226916, -1121712633, 1032149092, 1018740994) + + W(2, 1059204215, 1059357388, -1096239491, -1080318626) + + W(3, -1097775023, -1121076469, -1108589001, 1042934709) + + W(4, 1078138204, 1057901024, -1075026282, -1073900782) + W(5, 1041808545, 1040778731, 1040543247, 1013253724) + + W(6, 1045786159, -1115583727, 1040463834, 1043123901) + + W(7, 1021989454, -1105966927, 1015040126, -1147555881); + WS(-1079558823, -1098708322); + sum1 = + W(0, -1107155318, 1025234303, 1015859078, 1036194249) + W(1, -1109510417, -1106597869, -1106784003, -1107821257) + + W(2, -1102044491, -1098872409, -1087610848, -1086405173) + + W(3, 1049149960, -1113006787, 1034815041, -1109243348) + W(4, -1157836704, 1050456336, 1055919296, 1051086989) + + W(5, 1049231261, 1047976577, 1037659280, 1045770027) + W(6, 1018716763, 1032647774, 1043448582, 1041211303) + + W(7, 1032229161, -1113988824, -1114873522, -1115826050); + sum2 = W(0, -1115121954, 1012713185, -1120959275, 979525395) + W(1, 1031308048, 1023425822, 1008898041, 1015643115) + + W(2, 1038073794, -1114116718, 1048906430, -1089285214) + + W(3, 1044939824, 960279117, -1114246838, -1116674868) + + W(4, -1128914993, 1041575276, 1074552079, -1076455267) + + W(5, -1081125046, -1109266711, 1028966618, -1116558128) + + W(6, -1116577465, -1124395325, 1055266270, 1060162115) + + W(7, -1098098687, 1021768571, -1124862202, 1013382601); + WS(1045132600, -1120782135); + sum1 = W(0, -1105751784, -1105178898, -1104184899, -1114219853) + + W(1, -1110381909, 1038688878, -1129240105, 1028338913) + W(2, 1057393815, 1048859946, 1059464001, 1046945589) + + W(3, -1115358437, -1101266388, 1006928425, 1001553277) + + W(4, -1105555137, -1097544977, -1093321826, -1096850414) + + W(5, 1045680426, 1054674458, 1040711722, 1051440502) + + W(6, -1117651397, 1030868229, -1113548192, -1114482291) + + W(7, -1100710867, -1101910270, -1109195062, -1106616547); + sum2 = W(0, 1017265392, -1109811903, 1023756444, -1113627340) + + W(1, 1014378799, -1145950526, -1129429444, 1013677319) + W(2, 1041015042, 1031690548, 1041543835, 1061785433) + + W(3, -1112539854, 1033767962, -1111764061, -1098545742) + + W(4, -1100279165, -1110095441, -1121800962, 1052269299) + + W(5, 1027655784, -1103181542, -1110834939, -1106577514) + + W(6, 1015388608, -1118750728, 1018680528, -1143924414) + + W(7, -1099334823, 1020584168, -1108053853, 1036964648); + WS(1052573852, -1122511817); + sum1 = W(0, -1122539822, 1036520122, -1150669710, -1131456914) + + W(1, 1027717464, 1032118773, 1021054902, 1030809195) + + W(2, -1107197216, -1104137920, -1095755763, -1089160552) + + W(3, -1090293588, -1090056564, -1092174087, -1086001910) + + W(4, -1123108403, 1038549849, 1032946604, 1050538028) + W(5, 1052989852, 1057974832, 1053464075, 1057808582) + + W(6, 1051517474, 1023654291, 1039184399, 1041388597) + W(7, -1168884669, 1033029075, 1028526105, 1041880934); + sum2 = W(0, -1085894681, 1038192816, -1098445483, -1102898046) + + W(1, 1008806191, 1050638036, -1102617523, 1057325981) + + W(2, -1108201039, -1090848333, 1052221636, 1071505414) + + W(3, -1098981857, -1086351349, -1096799400, -1088620184) + + W(4, 1070882268, -1093216289, 1067999858, 1073865984) + + W(5, 1058878464, -1083525828, -1107159037, -1083206959) + + W(6, -1081052443, -1102549623, -1087793608, -1090403840) + + W(7, -1095780376, 1055133376, -1096503933, 1053790412); + WS(-1073219892, -1080326418); + sum1 = W(0, -1114077580, -1142635506, -1142955026, -1123237842) + + W(1, 1038107057, -1103324917, 1026598572, -1095875035) + + W(2, -1101602915, -1103425331, -1090278522, -1092662145) + + W(3, -1091646784, -1114692451, -1101848592, 1038718196) + + W(4, 1057495924, 1053716920, 1055741332, 1051447237) + W(5, 1052494629, 1049044089, 1043005848, 1047828379) + + W(6, -1151047116, -1114784094, -1111276924, -1121668701) + + W(7, 1000704387, -1109204280, 1010568075, -1156952092); + sum2 = + W(0, -1121663397, -1116542136, 1042431779, 1037730633) + W(1, 1010053773, -1137348615, 1028087910, -1119025600) + + W(2, -1095443331, 1050195534, -1123573301, -1100504352) + + W(3, 1043186658, -1102861372, -1110454800, 1040229501) + + W(4, -1067370736, -1071776975, 1068672794, 1077012119) + W(5, 1070664889, 1045361006, -1123945395, 1027642294) + + W(6, 1005231721, 974104521, -1111985184, 1047585075) + W(7, 1016528922, -1109020232, 1037668055, -1117277443); + WS(-1130027456, 1047744266); + sum1 = W(0, 1030328637, -1100312969, 1036569044, 1044449278) + + W(1, -1106699970, 1046212150, -1104762293, -1124975021) + + W(2, -1103440100, 1043447132, -1096543805, -1093151398) + + W(3, 1024264999, -1115860524, 1041344218, 1032410566) + W(4, 1050534604, 1038395889, 1037345619, -1106526388) + + W(5, -1115006287, 1035372917, 980910400, 1044066628) + W(6, -1130350917, -1105733872, 1043114871, 1022399622) + + W(7, 1020228332, 1024836117, -1123108872, -1112803412); + sum2 = W(0, 1029880722, -1098453144, 1029103498, 1067660000) + + W(1, -1094604015, 1046293202, -1104816365, -1117927817) + + W(2, 1033391383, 1041649400, -1095624145, -1082091368) + W(3, 1056223691, 1027973488, 1034193475, 1034286113) + + W(4, -1137819024, -1137377888, 1040796761, -1080643124) + + W(5, -1102259819, -1121978412, -1148596400, 1041009105) + + W(6, -1114516031, 1025620716, 1035240799, 1066795978) + + W(7, -1098376459, 1036814117, -1107858514, -1114981924); + WS(1049012636, 1037011386); + sum1 = W(0, -1108102978, 1042768739, 1036457352, -1105146600) + + W(1, -1100621772, -1100160944, -1149100234, -1116535239) + + W(2, -1097922903, -1089977373, -1083790961, 1050515995) + + W(3, 1053649082, 1044221701, 1020595485, -1110188826) + W(4, 1049818257, 1052134241, 1059747641, 1062551839) + + W(5, -1088576433, -1093385002, -1108848982, -1147628986) + + W(6, 1041216474, -1106782690, -1120768141, 975424142) + + W(7, 1042971413, 1047421135, -1142399358, 1034144007); + sum2 = W(0, -1130376652, -1109307779, 998991648, -1105231849) + + W(1, -1124853008, -1110904873, 1033926100, 1012787248) + + W(2, -1109271051, 1040605589, -1117670218, 1068278024) + + W(3, -1099193318, 1035466738, -1111928806, 1025845624) + + W(4, -1098570547, -1097221185, -1087670872, 1068017963) + + W(5, -1103240593, 1049832589, 1018913792, 1035573068) + + W(6, 1029552052, -1097745138, -1101762859, -1100990121) + + W(7, -1094879980, -1119653306, -1099498770, 1046656503); + WS(-1099128120, 1074104605); + sum1 = W(0, 1001695642, -1114738061, -1137881464, 1035838167) + W(1, 1036976671, 1043227732, 1024532056, 1041576607) + + W(2, 1031876531, 1036082771, 1049670772, 1045634202) + W(3, 1039176783, 1044499416, -1115296143, 1036111714) + + W(4, -1106225286, -1127447088, -1114090766, -1084335000) + + W(5, -1092101927, -1106295064, -1108352670, -1103209586) + + W(6, 1039516460, 1037331996, 1048871226, 1024672505) + + W(7, -1111706306, 1040216275, 1018355325, -1129216251); + sum2 = W(0, -1125983803, 1026289226, 1003361806, 1036989304) + W(1, -1121376018, -1127516251, 999276462, 1020382491) + + W(2, 1057121124, 1048908439, 1042597022, -1101738899) + + W(3, -1101518001, -1114490175, 1032222895, -1111061815) + + W(4, -1097673861, -1081617292, -1083990029, 1066557847) + + W(5, 1050089807, 1021212715, -1107462587, 1048167829) + + W(6, -1097055530, -1105528539, -1094090339, 1060807350) + + W(7, 1045798092, 1007168711, 1021142275, 1024157832); + WS(1043392312, -1114334171); + sum1 = W(0, 1041086874, 1027676176, 1029494548, 1038182418) + W(1, 1033375641, 1014428948, 1016176348, 1036956537) + + W(2, 1046571639, 1051603231, -1114765098, 1063881062) + W(3, 1036439434, 1039261900, 1049353410, 1039119196) + + W(4, -1101297859, -1100311555, -1090719068, -1092551191) + + W(5, -1093343060, -1095549099, -1102133676, -1097401399) + + W(6, -1108043111, 1031499310, -1123479337, 1047470620) + + W(7, 1029157954, -1113522785, 1032866554, -1108903402); + sum2 = W(0, 1032922039, 1034144522, 1035186417, -1106146153) + W(1, 995983013, 1014708981, -1118508182, -1122673776) + + W(2, -1110774431, 1041464609, -1089347840, 1063858171) + + W(3, -1103182741, -1105498679, 1046443094, -1107403517) + + W(4, 1024080221, 1024001520, -1102975683, 1062496232) + + W(5, -1104694448, 1034761930, -1113235807, -1126790312) + + W(6, -1111786600, 1038622987, -1098326560, -1094555153) + + W(7, -1110476474, -1107944280, 1045574278, -1119406695); + WS(1055141020, 1060902337); + sum1 = W(0, 1034435725, -1133288704, 1026181508, 1033934442) + W(1, 1030709219, -1118188821, 1027652318, 1035419593) + + W(2, 1057965143, 1052483105, 1052696320, -1122504460) + W(3, 1052884304, 1049027177, 1046782257, 1045790650) + + W(4, -1123568702, -1121795134, -1094115828, -1084201687) + + W(5, -1093347107, -1096042081, -1098441692, -1101051195) + + W(6, -1104499136, 1006999415, -1153162900, 1029709087) + + W(7, 1036285529, -1155692311, 1013713298, -1111952258); + sum2 = W(0, 1043194323, 1017945729, 992756489, -1117717167) + W(1, 1023440269, -1108161809, 1034607052, -1115924363) + + W(2, -1070666261, -1075189642, 1068575013, 1076566585) + + W(3, 1056539999, -1098049115, 1036076910, -1112346970) + + W(4, -1098866335, -1106608294, 1051129947, 1049134123) + + W(5, 1047751107, -1097073139, 1011007330, 1016205049) + + W(6, -1101126135, 1010301074, -1107911121, 1046700061) + + W(7, 1020103457, 1026363561, 1035420194, -1137869730); + WS(-1090579868, 1070279725); + sum1 = W(0, 1032329895, -1102328821, -1113203900, -1098061089) + + W(1, -1105344998, 1034205973, -1106552005, 1041435478) + W(2, 1048430680, 1050349544, 1052980239, 1057014372) + + W(3, 1057962188, 1038273502, 1046276062, 1049777060) + + W(4, -1101560157, -1091335294, 1039656780, -1084573078) + + W(5, -1103450436, 1043587871, -1098888483, 1002064136) + + W(6, -1129281319, 1012204235, 1033674783, -1115372202) + + W(7, -1108117804, -1111454739, -1115265901, -1109943484); + sum2 = + W(0, 987673807, 1036995374, -1095509641, -1094252408) + W(1, -1119095571, -1103179232, 1040985329, -1114181285) + + W(2, 1027892228, -1102736131, 1034852634, 1059241808) + W(3, -1103859919, 1043446486, -1109795439, 1004807604) + + W(4, -1103141235, 1049007178, -1090760003, 1060311666) + + W(5, -1129798465, -1100012675, 1051746582, -1113144024) + + W(6, 1005138532, 1028221332, -1123669832, -1128511149) + W(7, 1032914452, 1006285508, -1122773902, 987177167); + WS(1058381774, -1086956646); + sum1 = W(0, 1005434106, -1149835360, 1034370117, 1036055789) + + W(1, 1007595428, -1173712576, -1122685332, -1119060568) + + W(2, -1096145742, -1100460324, -1096424577, 1033532617) + + W(3, 1049990129, 1042033667, -1147483724, 1021597150) + W(4, 1046241393, 1039571170, 1054608457, -1110152984) + + W(5, -1100950084, -1103984787, -1122634471, 1016207588) + + W(6, 1026532260, -1120107684, -1107524072, -1123297125) + + W(7, 1032832748, 1041584782, -1111048693, 1023439817); + sum2 = + W(0, 1073050559, 1069067455, -1096535796, -1073893027) + W(1, -1077684470, 1038335059, 994568047, -1109196141) + + W(2, 1049113865, 1035642194, -1105600932, -1114871718) + W(3, -1104998692, 1034875990, 1004288083, 1019116562) + + W(4, 1048373410, -1101089925, 1050248373, -1113393188) + W(5, 1015693919, 1032989733, 1024635942, 1041247426) + + W(6, -1123382260, 1035361564, -1105239942, 1000972215) + W(7, -1112223483, 987872957, 1016564448, 1019016937); + WS(1051792028, 1027146209); + sum1 = W(0, -1173418992, 1031492470, 1030866682, -1111745573) + W(1, 1037975597, 1032231158, 1021051177, 1029296729) + + W(2, -1096555104, -1099279313, -1102472206, -1086728512) + + W(3, -1098029143, -1099704976, -1107704157, -1088435526) + + W(4, 1048715847, 1043543405, 1045948678, 1048944276) + W(5, 1053562136, 1041023999, 1031959404, 1046674105) + + W(6, 1037767898, 1032372106, 1031937048, 1030986133) + W(7, 1035090980, 1038793940, 1031982829, 1043937550); + sum2 = W(0, 1076285256, 1067013205, 1054912747, -1103468782) + + W(1, -1082179927, -1086354611, -1086555076, -1077304359) + + W(2, -1071495861, -1080654015, -1092768710, 1053690657) + + W(3, 1062196550, 1059230433, 1065208788, 1069826015) + W(4, -1089164092, 1044446905, -1089831336, 1049463752) + + W(5, -1116071286, 1055604285, -1102169042, 1049830695) + + W(6, 1024798548, -1130255717, -1111620633, -1129289568) + + W(7, 1022374674, -1147177200, 1008723412, 990531682); + WS(-1116843232, 1041538044); + sum1 = W(0, 1038718416, 1041393778, 1023355030, 1040958951) + W(1, 1040537034, -1112360254, 1043346413, 1047211822) + + W(2, 1050813118, 1048646402, 1043671731, 1037057641) + W(3, 1050123337, 1052799074, 1037436293, 1042212684) + + W(4, -1091780513, -1107072481, -1097953353, -1088265503) + + W(5, -1105750724, -1091053114, -1121572392, -1091261766) + + W(6, -1107356403, 1032833522, -1126022145, -1185162751) + + W(7, 1038008293, -1123581176, 1033129910, -1116284746); + sum2 = W(0, -1127892588, 1025248710, -1127740272, -1114879615) + + W(1, -1125515678, -1135605467, 1015277154, -1118452308) + + W(2, -1090048491, -1124324076, -1092974499, 1047668544) + + W(3, 1042122418, 1052455522, -1102075660, 1052515578) + + W(4, -1076486023, -1080184847, -1080442855, 1048567939) + + W(5, 1066323214, 1068938556, 1066205534, 1058048535) + W(6, 1071993665, 1067215194, 1068762214, 1014309155) + + W(7, -1079656754, -1077318729, -1083078137, -1086246531); + WS(1045082936, 1047750074); + sum1 = W(0, 1017867382, -1105827232, -1139557912, -1098507795) + + W(1, -1112138258, -1113781499, -1117154624, 1024003356) + + W(2, 1038967959, 1036533543, 1048524561, 1057427781) + W(3, 1057555836, 1044700722, 1037485187, 1046059755) + + W(4, -1107005280, -1101431328, 1044741561, -1099209723) + + W(5, -1103886606, -1117912777, -1108908881, 1024367918) + + W(6, -1117189504, 1000061951, 1034808454, -1102176472) + + W(7, -1102736460, -1102956733, -1113728179, -1108156515); + sum2 = W(0, 1003184961, 971431442, -1131894760, -1133283297) + W(1, 984723076, 1031831407, -1125590160, 1031681833) + + W(2, 1024857236, -1109323954, 1026446282, -1105665514) + + W(3, -1116186208, 1024492508, -1121629964, -1148808497) + + W(4, -1114215978, 1030818718, 1031262679, 1056963612) + W(5, 1055283667, 975724297, -1116339968, 1026784592) + + W(6, 1037587704, -1121823736, 1045339410, -1107079097) + + W(7, -1090072253, -1102678968, -1103014356, -1123463096); + WS(1059869006, 1015916977); + sum1 = W(0, -1111801867, 1039164638, 1023424768, -1118862224) + + W(1, -1114194639, -1099094340, -1118811114, -1096321008) + + W(2, -1103727218, -1102716146, -1089648586, -1092420393) + + W(3, 1049092536, 1052180160, 1032231232, 1046058153) + W(4, 1051877295, 1051739903, 1057614174, 1055627629) + + W(5, -1096079006, 1008315659, -1121275805, 1039975892) + + W(6, -1115491581, -1106752078, -1104830517, -1112797325) + + W(7, 1038662167, 1030088557, -1131680577, -1175801754); + sum2 = W(0, -1113427160, -1104433368, -1104169884, -1117600576) + + W(1, -1102319880, 1039627671, -1097619580, 1047721667) + + W(2, -1090644207, -1091764889, -1109333336, 1065407690) + + W(3, -1098732665, -1109931470, 1043714297, 1041133481) + W(4, 1034414883, -1120815960, 995253792, 1059846520) + + W(5, 1048874179, 1008884504, 1021072320, -1103318888) + + W(6, 1038986783, -1135559136, 1033390214, -1120264784) + + W(7, -1104270640, 1019376128, -1111461786, -1142244432); + WS(1054131356, -1077482588); + sum1 = + W(0, -1100136045, 989703980, -1114161471, -1117800461) + W(1, 1013262449, -1112319975, 1027862775, -1103330903) + + W(2, -1101215881, -1100083265, -1091699940, -1088812180) + + W(3, -1095974704, 1033042241, -1095224480, -1102789769) + W(4, 1045307006, 1039854844, 1051542530, 1056393300) + + W(5, 1053327664, 1059113280, 1049428707, 1055440053) + W(6, 1047064142, -1120855287, 1042850313, -1122501592) + + W(7, -1118254671, -1111473941, -1119385054, -1109190398); + sum2 = W(0, -1098032416, 1055692800, -1101668743, -1100435053) + + W(1, -1089098280, -1094416925, -1099202751, -1104551065) + + W(2, 1063396296, 1024719031, 1065491656, 1073645059) + + W(3, 1060161647, -1074198531, -1081296593, -1083400329) + + W(4, 1043414797, 1032661901, 1058961916, 1063424749) + + W(5, -1096580753, -1098776178, -1115458644, -1092624053) + + W(6, -1093635160, 1049215120, -1090478816, 1050905898) + + W(7, -1135528507, 1042380471, 1032695397, 1054277265); + WS(-1082679118, -1072338335); + sum1 = W(0, -1126832308, 998650886, 1029713605, 1045660229) + + W(1, -1125245511, 1001090478, -1126242526, -1128346278) + + W(2, -1092407662, -1099976035, -1098757312, 1049242330) + + W(3, 1052609817, 1049104969, 958956607, -1170516592) + W(4, 1041636256, 1035459839, 1053507448, -1097348797) + + W(5, -1097111418, -1099322690, -1116731426, 1009511091) + + W(6, 1032452512, -1118647007, -1113994637, 1019901759) + + W(7, 1026546307, 1041545965, -1113342195, 1027607012); + sum2 = W(0, -1069228184, -1073602631, 1021776922, 1076866983) + + W(1, 1069809281, -1101499289, 1013617559, 1036050969) + W(2, -1099529380, 1043301860, 1054858487, 1044685536) + + W(3, 1062109344, -1097491140, -1149319294, -1113127688) + + W(4, -1112044089, 1041147606, -1119872164, -1106846135) + + W(5, 1042271976, 1035368326, 1025827813, -1115137330) + + W(6, -1144476451, -1112019052, 1036271953, 1018899191) + + W(7, -1114284680, -1126672913, -1112053703, -1146921887); + WS(-1125906880, 1029353026); + sum1 = W(0, 1052620124, 1040928385, 1053619569, 1034478636) + W(1, -1149501032, 1027228408, 1033816441, -1143379631) + + W(2, -1082830235, -1087911853, -1088553290, -1115166792) + + W(3, -1104403058, 1046902211, -1140598044, -1115398454) + + W(4, 1044570800, 1048894344, 1054767249, 1052323270) + + W(5, -1096342700, -1092690897, -1112660402, -1089842615) + + W(6, 1033131793, 1031982257, -1118254573, 1037418321) + W(7, 1045103465, 1053685367, 1032758420, 1056771243); + sum2 = W(0, 1052242091, -1105901664, 1041713460, -1098081544) + + W(1, -1101414896, -1140318759, -1123485994, -1108472633) + + W(2, -1123374674, 1032004261, 1057464262, 1055897675) + + W(3, -1108453649, -1099401944, 1039811869, -1092934450) + + W(4, -1088438592, -1109896025, -1106721416, 1057898227) + + W(5, 1044097375, 1041656473, 1042751001, 1033994653) + + W(6, -1109756837, -1117902306, -1106560728, -1101499608) + + W(7, -1114554169, 1042131382, -1104786072, 1048148308); + WS(-1080878567, 1068324028); + sum1 = W(0, -1115471279, 1019310859, -1121098697, -1115193805) + + W(1, -1153442440, -1104713039, 1023975060, -1094675607) + + W(2, -1095139206, -1103458412, -1090917913, -1094785898) + + W(3, -1100131090, -1118370765, -1102569730, 1044057356) + + W(4, 1057833304, 1049561152, 1058273109, 1054221326) + W(5, 1050583410, 1048985266, 1041261354, 1045920684) + + W(6, 1032811018, -1118469827, -1107876783, -1109399058) + + W(7, -1121957752, -1109945243, -1125987028, -1144681671); + sum2 = W(0, -1122597644, 1032420914, -1109997244, 1029919127) + + W(1, -1129031901, 1025876905, -1121404056, 1025649559) + + W(2, 1052263517, -1132353885, -1113437764, -1086124512) + + W(3, -1134522929, -1137529969, 1041285329, -1111439550) + + W(4, 1084267296, 1077171750, -1076080701, -1066836895) + + W(5, -1073829589, -1128728545, -1128683125, -1109934248) + + W(6, 1037076324, 1017945885, 1045994672, -1105598049) + + W(7, -1115453382, 1027437378, -1112127416, 1032857588); + WS(-1106960696, -1099012034); + sum1 = W(0, -1114528403, -1119077260, -1143558020, -1119877760) + + W(1, -1109244232, -1096650748, -1112604445, -1100402892) + + W(2, -1099071342, -1099370430, -1089278443, -1086538844) + + W(3, 1010994112, 1041376829, 1042854492, 1049345155) + W(4, 1055554285, 1049964157, 1061104081, 1056629915) + + W(5, 1049997315, 1042073805, 1031563405, -1123516568) + + W(6, -1109121109, -1115766890, -1114965299, -1120868875) + + W(7, 1034547527, -1114495407, -1139584324, -1118254096); + sum2 = + W(0, -1116593925, 1028908115, 999050165, 1037914955) + W(1, -1106109711, -1105957739, -1123834115, -1113783754) + + W(2, 1027806287, -1136470458, 1039840949, -1122048333) + + W(3, 1057135276, -1105275062, -1096566133, -1105721601) + W(4, 1032648371, 1006654490, 1052059722, 1057985079) + + W(5, -1090440242, -1131926605, 1018741845, -1124260101) + + W(6, 1024756439, -1116529299, -1107372324, -1128153177) + + W(7, -1142966917, -1123338691, 1013415162, 1018182189); + WS(-1103089976, 1059868827); + sum1 = W(0, 1026631887, 1033737723, 1034000797, -1130496915) + + W(1, -1106694966, -1105866475, -1106727101, -1098705919) + + W(2, -1091300264, -1096562704, -1087537316, -1094386976) + + W(3, 1044429902, 1045093007, 1040636078, 1044615672) + W(4, 1056895190, 1050565142, 1059766350, 1053939124) + + W(5, -1108577640, -1109922270, -1122075862, -1100982776) + + W(6, 1031003956, -1140469197, -1108911023, 1032926090) + + W(7, -1120892680, 1034482579, -1136817768, 1045052014); + sum2 = + W(0, -1114832682, 1038307608, -1109268272, 1031259614) + W(1, -1136199018, -1118450441, 1025376385, -1125957173) + + W(2, 1044376552, -1101986459, 1025272345, -1112738770) + W(3, 1014008179, 1042085570, -1110075404, 1027010884) + + W(4, 1052671036, -1094811671, -1106762472, 1052706603) + W(5, 1040752903, -1100877636, 1051973899, 1036345951) + + W(6, -1084839384, -1076225566, -1074779618, -1088390497) + + W(7, 1066038403, 1071357616, 1068318141, 1050587817); + WS(-1089897038, 998399462); + sum1 = W(0, -1100326508, -1099771006, -1103969415, 1030998252) + + W(1, -1111096702, -1129527451, -1115021151, -1124899748) + + W(2, -1114312802, -1109889234, -1092992143, -1098209637) + + W(3, -1116385180, -1096464604, -1111889198, -1109224641) + + W(4, 1049930251, 1047140918, 1060325312, 1062808044) + W(5, -1128865717, 1048691588, -1137813989, 1043572929) + + W(6, -1141222070, -1114407115, -1106384368, 1017922927) + + W(7, 1023432554, -1125954964, -1121716413, 1020452535); + sum2 = W(0, -1094975511, -1089233591, -1089977002, 1033778332) + + W(1, 1006981461, -1137053176, -1125132764, 979684320) + W(2, -1113696880, 1032844980, 1066397244, 1037917830) + + W(3, 1039504138, 1032622380, 1021506052, -1119563165) + + W(4, 1023473326, 1036803698, -1129026618, -1104280508) + + W(5, 1024702770, -1121975322, 974010432, -1138205260) + W(6, 1029862812, -1117299445, 1025970568, 1031377784) + + W(7, -1122944997, 1022461928, -1130740686, 1024270227); + WS(-1096120220, -1099720911); + sum1 = W(0, 1052677544, 1027883172, 1046184634, 1036941212) + W(1, 1037786724, 1039372749, 992842790, 1040590154) + + W(2, -1105594052, 1050583742, 1030711656, 1050761182) + W(3, 1044997418, 1051859318, 1041579690, 1052858474) + + W(4, -1101878153, -1120505890, -1094174707, -1087347673) + + W(5, -1091299260, -1092171165, -1105854769, -1087724293) + + W(6, 990296621, 1026529890, 1039246623, 1031039363) + W(7, 1038193547, 1036921611, -1115076484, 1007468831); + sum2 = + W(0, 1040381409, -1131846701, 1030695069, 1015259161) + W(1, 1026868853, -1164136976, -1121164685, -1109936403) + + W(2, 1023777345, -1095435966, -1111411494, -1085163747) + + W(3, -1100600319, 1052695808, 1041440525, 1051545155) + + W(4, -1110031849, -1146822820, -1089001391, -1074171332) + + W(5, -1078760233, 1048168469, 1074132102, 1072577306) + W(6, 1022623295, -1096899181, 1049343346, -1107180310) + + W(7, 1043071461, 1020495459, 1017314953, 1050115198); + WS(-1080112807, -1111145054); + sum1 = W(0, -1103297132, -1106341774, -1098278655, 1024154699) + + W(1, 1037514093, 1034961350, -1125780682, -1109009483) + W(2, 1049824404, 1049617192, 1059552348, 1031972225) + + W(3, -1085661228, -1097823136, -1114326813, -1119831648) + + W(4, -1133975251, -1122989109, -1086286580, -1137351813) + + W(5, 1056970252, 992708574, 1021582000, 1040424089) + W(6, 1026400021, 1042898005, 1041608840, -1145221295) + + W(7, -1102084285, 1041904442, 1038246926, 1042525799); + sum2 = W(0, 1041258750, -1109734283, -1123542769, -1113561957) + + W(1, -1110564237, 1026811493, -1130694514, 1024057869) + + W(2, 989910477, -1100922013, -1096844113, 1051742866) + + W(3, -1100235579, -1105813951, -1151284621, -1136706867) + + W(4, -1134607603, 1016064722, 1041392492, 1057819912) + + W(5, 1047439582, -1113868277, -1118083589, -1114600003) + + W(6, -1109794979, 1041809078, -1126381362, 982123162) + + W(7, -1119465765, 1026735929, 1019553626, -1114678252); + WS(1043409720, -1105036943); + sum1 = W(0, 1040191749, 1033970558, 1042494496, 1046767380) + W(1, 972615552, 1032778168, 1032217109, 1032356247) + + W(2, -1096303173, -1095924673, -1093807736, -1092831585) + + W(3, -1096756208, -1126846855, -1115229280, -1098580285) + + W(4, -1115184605, 1041748910, 1044442592, 1042876074) + W(5, -1140612185, 1018241414, 1027878365, 1034858676) + + W(6, 1041655059, 1041917006, -1160531625, 1039311757) + W(7, 1020654492, 1043700571, 1034107952, 1036396440); + sum2 = W(0, 1027808259, 1010119787, -1111510504, -1113418759) + + W(1, 1032132185, -1121052855, 1014515195, 1010692603) + + W(2, -1139047739, 1043340308, -1106790738, -1096910874) + + W(3, 1040059331, -1115460243, 1017581734, -1120741339) + W(4, 986751832, 1026247831, 1065364148, 1040723354) + + W(5, 1038861011, -1113647147, 1041604470, -1135975307) + + W(6, -1095774205, -1093511024, -1089533357, 1053173071) + + W(7, -1106558855, 1027584935, -1121443175, 1014681515); + WS(1035857520, 1029952289); + sum1 = W(0, -1143077795, -1105351783, -1113811952, -1106526780) + + W(1, 1035289215, 999420801, -1129002434, -1113970285) + W(2, 1037051011, 1029971495, 1049064844, 1035603988) + + W(3, -1083894414, -1098557596, -1111645039, -1130790920) + + W(4, -1110400621, -1106469910, -1094483622, 1058276716) + + W(5, 1056346487, 1050633420, 1042325343, 1026109447) + W(6, -1121149466, 1043652126, 1027875988, 1035104701) + + W(7, -1114346217, -1123554317, -1130799743, 1025377311); + sum2 = W(0, -1118517607, -1101077931, 1029148161, -1087996420) + + W(1, 1047526016, -1108756119, 1038718292, 1024464693) + W(2, 1026484093, 1046485772, -1098019518, 1071267691) + + W(3, -1082674625, 1032470282, -1106613565, -1113199029) + + W(4, 1039465846, 1030900361, 1048118782, 1064954318) + + W(5, -1084489705, -1118487613, -1112021711, -1120012731) + + W(6, 1008581907, -1114630284, -1106945018, -1097625455) + + W(7, -1156379726, 1024110641, 1020048570, 1036291722); + WS(1058189134, 1034857672); + sum1 = + W(0, -1171748679, -1105475825, -1097172873, -1108837708) + + W(1, 1038034707, -1131134898, -1113151849, -1119273967) + W(2, 1031241360, 1044072484, 1057114388, 1051374554) + + W(3, -1083662360, -1099379283, -1121036423, 1018101850) + + W(4, -1100969760, -1094075536, -1082247827, 1060030327) + W(5, 1058651386, 1046921459, 1041721673, 1036514264) + + W(6, 987174985, 1045893176, 1042860595, 1031954257) + W(7, -1120638978, -1137288661, 1037681908, 1029972391); + sum2 = W(0, 1003810984, 1036204547, -1101998989, 1041029203) + + W(1, -1098215637, 1044393168, -1110527635, 1027560485) + + W(2, 1020569783, -1114489126, -1121260135, 1052640914) + + W(3, 1059150102, -1098121773, 1037147118, -1110248773) + + W(4, -1128982694, -1099380876, -1101918458, 1059611184) + + W(5, 1040381797, -1094102036, 1028175839, -1113935142) + + W(6, 1043745048, -1104004872, -1104165612, -1121067579) + + W(7, -1100369008, 1032515174, -1114899975, 1000033398); + WS(-1107450480, 1040804833); + sum1 = W(0, 1040192716, 1022226221, 1002013395, -1117682794) + W(1, 1001040843, 1000424795, -1120982404, 1025306833) + + W(2, 1040473489, 1047969017, 1044185574, 1060667554) + W(3, 1057909434, 1049088596, 1038016429, 1042416864) + + W(4, -1095196747, -1098407451, -1087290490, -1089168888) + + W(5, -1111951852, -1119291421, 1031577591, -1112956481) + + W(6, -1124855402, 1020416041, -1160070647, 1045737613) + + W(7, -1101676720, -1103624738, -1104451167, -1115902938); + sum2 = + W(0, 1014727333, -1117243126, 1029406572, 1027900147) + W(1, -1112527674, -1114664521, -1111634336, -1107116047) + + W(2, -1124633329, -1107065949, 1041729284, -1094200388) + + W(3, -1104959017, 1040214496, 1005029260, 1049039902) + W(4, 1026257754, -1111801245, -1084100906, 1052541581) + + W(5, 1070744988, -1111707080, -1127513012, -1107038609) + + W(6, -1111611661, -1136702760, -1103677442, 1050798725) + + W(7, -1105828213, -1111536822, -1106506739, -1140667375); + WS(-1113867888, 1064515135); + sum1 = W(0, -1113562537, -1107046062, -1103978495, -1098739202) + + W(1, -1122423580, 1019693149, -1116789880, 1020558520) + W(2, 1048061265, 1043460993, 1060048584, 1062873548) + + W(3, -1119786749, 1023528257, -1106657727, -1137576074) + + W(4, 1011269086, -1098944190, -1107431310, 1045822761) + + W(5, 1025265092, -1112998912, -1142662356, -1104660585) + + W(6, -1105945126, -1148623547, -1111558073, -1114838028) + + W(7, -1101841139, -1117915947, -1106868487, -1122252061); + sum2 = + W(0, -1145158406, 1012385731, 1016118130, 1024197601) + W(1, -1112179804, -1119519161, -1127326042, -1113792718) + + W(2, 1022878354, -1129472106, 1044961455, 1049544336) + + W(3, 1030346877, -1131221354, -1095482557, -1093377057) + + W(4, -1111624996, 1015658802, 1040055751, 1057430996) + W(5, -1114498280, 1020564842, -1112131584, 1019749938) + + W(6, 1010204019, -1123973089, -1157676569, 1022031298) + + W(7, -1124728962, 1023003938, -1121117277, -1141925830); + WS(1051333020, -1087054195); + sum1 = W(0, -1112042433, 1019002067, -1115058903, 1046618143) + + W(1, -1107874438, -1148774132, 1036768028, -1154881470) + + W(2, -1104387605, -1096771599, -1102529947, -1093976551) + + W(3, -1089735051, -1129150690, -1101876091, -1098135714) + + W(4, 1052246829, 1048899277, 1054136106, 1044227957) + W(5, 1060328029, 1046377228, 1045160291, 1045432325) + + W(6, -1113527444, -1098301717, -1095193948, 1044013586) + + W(7, -1107288943, 1038700845, 1024370318, 1019966234); + sum2 = W(0, -1114066012, 1029900262, -1114806697, -1110038644) + + W(1, 1036646832, -1110070018, 1020625547, 987867800) + W(2, 1041392892, -1112610934, 1041587346, 1025557898) + + W(3, 1055783298, -1102485451, 988088600, 1039502616) + W(4, 1044719218, 1046575198, 1050162797, 1055420600) + + W(5, -1099287148, -1100701871, 1033323822, -1119304428) + + W(6, -1097547793, -1095644935, -1102972970, -1095936036) + + W(7, 1028020818, -1113902766, -1135974783, 1018696751); + WS(-1120103648, -1090070191); + sum1 = + W(0, -1117418940, 1024592611, 1040968250, 1049918615) + W(1, 1034410595, 1035134604, -1119948353, -1144523437) + + W(2, 1049030490, 1050801054, -1111205267, -1116228463) + W(3, 1050512309, -1102511675, 1034763283, 1039656998) + + W(4, -1112262737, -1094705236, 1042055990, -1085341341) + + W(5, -1106227665, 1036298997, -1138448657, -1110564136) + + W(6, -1114427969, 1032013281, -1110419822, 1047944497) + + W(7, -1132367051, 1025392341, -1111046724, -1120201127); + sum2 = W(0, -1122606938, -1141997982, 1027805544, -1120462650) + + W(1, -1100560083, -1105726471, 1025575588, -1113668318) + + W(2, 1046354401, 1040228902, -1143835982, 1061896257) + + W(3, -1080731069, -1098049024, 1026943708, 1032253740) + + W(4, -1124762815, -1102583710, 1052785838, 1059397472) + + W(5, -1120768779, -1126645315, 1011421951, -1122685018) + + W(6, -1118515993, 1036845124, -1106443071, 1043676745) + + W(7, 995709275, -1128858827, -1112152255, -1127110207); + WS(1060158670, 1068766623); + sum1 = + W(0, -1116432453, -1131098839, -1099753724, 1034945056) + W(1, -1122123599, 1040823512, 1040184285, 1050332416) + + W(2, 1056712636, 1049784895, 1057908127, 1044163070) + W(3, 1028198895, -1122504371, -1123329903, -1096645803) + + W(4, -1087151539, -1102593034, -1089291740, -1094800699) + + W(5, 1036808107, -1120242995, -1127465369, -1132187055) + W(6, 1040295867, 1033948341, 1031077093, 1027113355) + + W(7, -1132674679, -1117547229, 1040274980, -1137741501); + sum2 = W(0, 1036379663, -1108907739, 1070189048, 1073213180) + + W(1, 1065321765, -1076673488, -1079893128, -1072585926) + + W(2, -1098232449, 1058983203, 1016986873, 1047923452) + W(3, 1033582270, 1031365639, 1038048757, -1107019906) + + W(4, -1128107780, 1035811659, 1040144345, -1105399966) + + W(5, -1110681209, 1039880527, -1116001167, 1018297481) + + W(6, -1120677803, -1120240179, 1029686139, -1109097268) + + W(7, 1041308319, -1109042515, 1034493417, -1131123922); + WS(-1112459888, 1031046963); + sum1 = W(0, -1112719265, -1129483154, -1103727283, -1091575740) + + W(1, -1104335643, -1155155940, -1114094348, -1105426861) + + W(2, -1128634346, -1120462421, -1112785958, 1056781779) + + W(3, -1153409228, -1108013960, -1118230989, 1027667442) + + W(4, 1043326546, -1103778738, 1050524372, 1067939282) + + W(5, 1048587330, -1132710977, -1109505653, 1036777787) + + W(6, -1115081924, -1114915140, -1104237825, -1096588493) + + W(7, -1108534894, -1114726045, 991235804, -1111746006); + sum2 = W(0, -1119086167, 1013020612, -1141008560, -1108719150) + + W(1, 1023891147, -1145112744, 1018251990, -1116478859) + + W(2, 1037241732, -1129818230, 1029187991, -1074141953) + + W(3, 1019083994, -1124211508, -1122886966, 1027168441) + + W(4, -1114652243, 1028260797, -1112264831, 1072760747) + + W(5, 1042535682, -1131878972, 1037493286, -1129254230) + + W(6, -1131518324, -1115730540, 1036245214, 1034705438) + + W(7, -1119172530, -1133917220, -1114007324, 1018465006); + WS(-1086783566, -1086791567); + sum1 = W(0, -1115195632, -1153026662, -1134496297, -1108787789) + + W(1, -1119803561, -1145121243, -1131708777, -1110064394) + + W(2, -1095708330, -1093442522, -1098161001, 1044632085) + + W(3, -1091375169, -1107384199, -1100078210, -1104664012) + + W(4, -1123676804, 1018199561, 1037878573, 1064261167) + W(5, 1057417647, 1052226946, 1046948682, 1047330733) + + W(6, 1041725033, -1121101892, 1034570216, -1103584185) + + W(7, -1108866704, -1122033784, -1117964907, 1022321300); + sum2 = W(0, 1031515378, -1121810184, 1010081392, -1119765868) + + W(1, 1035546479, -1106495935, 1028378344, 1003402623) + + W(2, -1077397139, -1079753374, 1044674665, 1073378801) + + W(3, 1056367001, -1109376902, 1035615024, -1118889676) + + W(4, -1134939312, -1097085111, 1052841311, 1007836336) + W(5, 1044809025, -1118570548, 992033726, 1010750288) + + W(6, -1112896962, 1035985281, -1105060263, 1031953140) + + W(7, 1022446080, 1031289566, 1032016077, -1150317246); + WS(-1096711324, -1080143969); + sum1 = W(0, -1135181951, 1025399430, 1033906501, 1041786535) + + W(1, -1133559462, 1003671004, -1123695637, -1105366874) + + W(2, -1095339398, -1094144146, -1093853897, -1089314149) + + W(3, -1089126233, -1110956839, -1129083412, -1113909272) + + W(4, 1050582337, 1049483953, 1045129706, 1046728732) + W(5, 1045641984, 1051092970, 1049638101, 1057307443) + + W(6, 1026786373, -1123204644, 1006467326, 1031608549) + W(7, -1135165794, 1017185520, 996324016, 1009923752); + sum2 = W(0, 1023911567, 1004932796, -1119124867, 1022775023) + W(1, 1011639102, 1036397588, 999444348, -1106309084) + + W(2, -1108118447, -1134375038, 1035251904, 1037995724) + + W(3, 1050162042, 1037707684, -1111062570, -1101285996) + + W(4, 1044892126, -1138359022, 1063133445, 1074655410) + + W(5, 1070062472, -1083132698, -1073510095, -1073687480) + + W(6, -1112738860, 1020475455, -1102926770, 1047698054) + + W(7, -1113839430, -1117927031, 1017195567, 1041743674); + WS(-1089880270, 1068594400); + sum1 = + W(0, -1114467040, 1023170830, 1036273894, -1107800974) + W(1, 1029867434, -1106779367, 1024593089, -1110449433) + + W(2, -1104513741, -1091458130, 1034367029, -1100997781) + + W(3, -1095286649, 1047811540, -1096228817, -1109179032) + W(4, 1050219169, 1042545281, 1048674674, 1042739212) + + W(5, 1053624831, 1034908007, 1034674708, 1049808694) + W(6, -1118490053, -1120682309, 1031296832, -1110241363) + + W(7, 1034289100, -1107012174, 1021221591, -1126204757); + sum2 = + W(0, 1030004067, -1117028113, -1105887481, -1066126465) + + W(1, -1104777348, -1122550707, -1130918117, 1023720579) + + W(2, -1133064657, -1134091777, 1045929484, 1081917443) + W(3, 1043726141, 1028310407, 1016764395, -1116064873) + + W(4, 1024174065, -1113615539, 1032698350, -1097201480) + W(5, 1033613382, -1107697461, 1029778305, 1018371831) + + W(6, -1119836251, 1025775761, -1118489229, 1041573100) + + W(7, -1124268877, 1030764051, -1116797441, 1018350967); + WS(1067475431, -1126058166); + sum1 = + W(0, -1108052732, 1043040440, 1048922396, -1097574342) + W(1, -1119673025, -1101774749, 1019400344, 1007801026) + + W(2, -1097851361, -1108210069, -1084429932, 1055911149) + + W(3, 1041902328, 1035827603, -1115594513, -1107003538) + W(4, 1049362324, 1048929252, 1054104112, 1047147106) + + W(5, -1085779789, 1049622021, 1019477630, 1037086373) + W(6, 1040199303, -1109622414, 1041203782, -1104274751) + + W(7, 1047880876, -1116198608, -1107876353, 1036701569); + sum2 = + W(0, 1013753738, -1113150555, 1015248850, 1042650722) + W(1, 1040818978, -1105652199, 1033430289, -1120377801) + + W(2, 1020731454, 1046742995, 1045156685, -1104352239) + W(3, -1089864705, 1045331461, -1109224259, 1016214593) + + W(4, -1120697178, 1034035253, -1094710566, -1128318417) + + W(5, 1045040370, 1025091411, -1106535436, 1019751894) + W(6, 1009873360, -1099358283, 1039470065, 1046693145) + + W(7, 1036008061, 1030627793, 1017876415, -1132110931); + WS(1060496974, -1099362699); + sum1 = W(0, 1054706808, 1036662868, 1045895019, 1033303646) + W(1, 1044095763, -1130190200, 1035564779, 1040746168) + + W(2, -1109706041, 1041317773, 1051554925, 1049334366) + W(3, 1051166813, 1053590462, 1049285205, 1055551482) + + W(4, -1098091909, -1123237413, -1091928498, -1083798889) + + W(5, -1095315343, -1087007295, -1097088265, -1094550458) + + W(6, -1129324363, 1015757258, 1021341985, -1112852963) + + W(7, 1041804248, -1127492145, 1039262630, -1114905285); + sum2 = W(0, -1073385920, -1084887580, -1083140914, -1081800202) + + W(1, -1085954594, 1037413129, -1096433065, 1054210787) + W(2, 1077926840, 1058818874, 1060159151, 1074845707) + + W(3, 1061906872, -1084431515, -1126824848, -1087315344) + + W(4, 1032559783, -1089669880, 1045764296, 1059775011) + + W(5, -1103860568, -1094536545, -1133232881, -1099186518) + + W(6, -1094451238, -1121878040, -1173171215, -1120435570) + + W(7, -1115590029, 1053315192, -1107838893, 1052978812); + WS(-1078369703, 1041267413); + sum1 = W(0, 1024386586, -1098405416, 1044040033, -1098286219) + + W(1, -1130684104, 1045750173, -1098338270, 1041584973) + + W(2, 1041630887, -1112635974, -1132435208, 1041292515) + + W(3, -1091497929, 1016159377, -1139597330, -1101611546) + + W(4, -1102607056, 1051293608, -1094040968, 1052507157) + + W(5, 1056853585, -1105775474, 1049412460, 1026100205) + W(6, 1016693023, 1018393854, 1046099151, -1095618428) + + W(7, 1018959206, -1117949682, -1118540325, 1040982949); + sum2 = W(0, 1035615459, 1028814353, 1062855981, 1052250964) + W(1, 1056109526, -1106019250, 1048995011, -1115640692) + + W(2, -1097925659, 1038714809, -1077657685, -1101970047) + + W(3, -1098468896, -1114182260, -1104046747, -1130122971) + + W(4, 1043665288, 1033337220, 1051048913, -1113114333) + + W(5, -1085664062, 1040975429, -1109605260, 1034778788) + + W(6, 1020457849, -1103657321, -1109823715, 1058024935) + + W(7, 1056999181, -1105846491, 1042595027, -1112374505); + WS(1049151900, -1114127847); + sum1 = W(0, 1050216581, 1000015204, -1136218178, 1023136961) + + W(1, -1130455775, -1133465094, -1160090192, 1035991733) + + W(2, 1044969414, 1047621718, 1057314961, 1055877843) + W(3, 1054358666, 1052966059, 1045850836, 1052491071) + + W(4, -1106856243, -1112471205, -1091938023, -1092033343) + + W(5, -1099719178, -1089783713, -1099053307, -1087984664) + + W(6, -1099359923, -1148711507, -1133921207, -1112054390) + + W(7, 1034273823, -1122838974, 1019980258, -1112322824); + sum2 = + W(0, -1105478410, 1039552519, -1131405058, -1099466970) + W(1, 1049027924, -1115050596, -1129231446, 1049583286) + + W(2, 1011113448, 1050864081, -1125310098, 1043215106) + W(3, 1034334973, -1095883745, 1036178144, -1088635155) + + W(4, 1068049752, -1103020148, 1066695202, 1074596924) + + W(5, 1067601936, -1084539788, -1075514015, -1074115054) + + W(6, -1082501551, 1059003564, -1098156272, 1039344435) + + W(7, -1099880092, -1107282542, -1097076855, 1042018058); + WS(-1081332839, -1093454830); + sum1 = W(0, -1109709318, -1104916884, -1108177419, -1117212646) + + W(1, -1119705461, 1032417267, -1118748528, 987327371) + W(2, 1035356975, 1037116623, -1125155242, 1030327966) + + W(3, 1050241163, -1103907222, 1046095669, 1033262333) + + W(4, 1039420520, -1122158672, 1053028347, -1104761999) + + W(5, 1032136206, -1115200780, -1115609017, 1029855614) + + W(6, -1111308088, 1014687039, -1112931654, 1020526050) + + W(7, -1119448076, -1112611266, -1136487898, -1104533373); + sum2 = + W(0, 1013207527, -1127355075, -1103159789, 1040955478) + W(1, 1008876024, -1116686760, 1035764738, -1154118338) + + W(2, -1118344664, 1028266047, 1043690369, 1051132935) + + W(3, -1108842627, 1041402295, -1105537330, -1128237045) + + W(4, -1124997045, -1117884424, -1116887196, 1047094008) + + W(5, 1057384051, -1100609336, -1098518151, -1089828796) + W(6, 1036829905, 1016171285, 1024917624, 1041601336) + + W(7, -1127506713, 1033786361, -1114313206, -1102385561); + WS(1063446990, 1030048893); + sum1 = W(0, -1131562670, -1099483503, -1106193120, -1102405700) + + W(1, 1021119810, -1113568905, 1009340592, 1030255032) + W(2, 1043069042, 1049571969, 1061428532, 1053095529) + + W(3, -1115069793, -1119868742, 1027160065, 1043118346) + + W(4, -1108681470, -1106599551, -1097709592, 1046689338) + + W(5, 1029025472, 1006738196, -1120287391, -1111789333) + + W(6, -1108650254, -1106453175, 1008661917, -1102088711) + + W(7, -1123233168, -1104754348, -1136963210, -1114502129); + sum2 = + W(0, 1012371361, -1149491589, 1023756477, -1165255819) + W(1, -1111467932, 1019335917, -1140694601, 1010513313) + + W(2, 1017109065, -1115161102, 1012418345, -1111308586) + W(3, 1015942947, -1106016138, 1015823021, 1026481588) + + W(4, -1110016822, -1109821790, 1060485172, 1031379047) + + W(5, 1046805034, -1114576028, 1024897044, -1114618076) + + W(6, -1103671135, -1097972516, -1094449842, 1049769129) + + W(7, 999148403, 1022790203, 1032463998, -1146831635); + WS(1060385486, 1040268319); + sum1 = W(0, -1115333181, -1113839600, 1026945989, 1041834949) + + W(1, -1100015113, 1010458454, -1113704543, -1110169699) + + W(2, -1105770459, -1104706211, -1085502934, -1112644646) + + W(3, -1102865873, -1112283012, -1108020290, -1115522389) + + W(4, 1048230464, -1122113023, 1051932092, 1065603891) + + W(5, -1105250533, 1054415097, -1122402466, 1046973419) + W(6, 1022495614, -1115044751, 997701279, 1044329112) + + W(7, 1038196649, -1113673817, -1105082025, -1115640050); + sum2 = W(0, -1115664423, -1122287706, 1044568139, -1099350461) + + W(1, -1102715015, 1042022920, 1031291077, 1032474264) + W(2, 1041813775, 1041645390, -1087138046, 1067461852) + + W(3, -1116572542, -1109622665, -1113357449, -1110133469) + + W(4, -1142717933, -1094011677, -1124627987, 1027339570) + + W(5, 1045144111, 1054026149, -1122353792, 1047364150) + + W(6, -1120925434, -1119842210, -1120985148, -1113918825) + + W(7, -1100833175, -1101147879, -1112018489, -1109170005); + WS(1049043868, 1050086952); + sum1 = + W(0, -1127858130, 1025810768, 1034666762, 989092984) + W(1, -1115481017, -1119959429, 1012364567, -1097661520) + + W(2, -1091279607, -1101354119, -1089232423, -1092016019) + + W(3, -1089729669, -1104525366, -1121438163, -1160888088) + + W(4, 1054362490, 1044465651, 1050465752, 1058690160) + W(5, 1053482909, 1051092259, 1019953121, 1051219071) + + W(6, 1028884238, 1022883995, 1032068165, 1005443958) + W(7, 1007092039, -1149438996, -1137072337, 1017307814); + sum2 = W(0, 1030282550, 1012065197, 1049708534, -1101566471) + W(1, 1043525707, 1038111746, 1040589253, -1106531005) + + W(2, -1099759289, 1050936278, 1018991152, 1050588571) + + W(3, -1112383125, -1121285468, 1039884984, -1113841779) + + W(4, 1070065566, 1068820850, 1068572253, 1075164582) + W(5, 1046261639, 1053555382, 1040055978, 1058828770) + + W(6, -1078832473, -1076346523, -1075628240, -1072955990) + + W(7, -1093459475, -1088795804, -1099780818, -1090423367); + WS(-1083655502, 1074535575); + sum1 = + W(0, 973651072, 1028862937, 1029517469, -1111330742) + W(1, 1043404639, -1105397947, -1129726574, -1099887737) + + W(2, -1100106921, -1096173461, -1088250868, -1101351113) + + W(3, -1092528244, 1034877328, -1113904430, -1118418050) + W(4, 1049212555, 1052529147, 1057957824, 1060855844) + + W(5, 1050267538, -1104065896, 1041335934, 1047916353) + W(6, -1122472503, 996549482, -1111380529, -1125410415) + + W(7, -1109626381, -1122247818, -1108333759, 1037233742); + sum2 = W(0, 1037582341, 1032149251, -1135487115, -1107247151) + + W(1, 1053968825, -1096732175, 1034930229, -1094249638) + + W(2, -1116649305, -1112916551, 1044443302, -1117703407) + + W(3, 1049963138, 1042101117, 1046616621, -1127010106) + W(4, -1107051108, 1041643977, 1030763641, 1053360182) + + W(5, -1087331905, -1089009482, -1110671517, 1059951262) + + W(6, 1040538112, -1112067691, 1036263743, -1115814520) + + W(7, 1035849297, -1104153316, -1104877620, -1116847106); + WS(-1132786560, 1056578758); + sum1 = W(0, 1031072232, 1019453021, -1112153353, -1127115205) + + W(1, -1153314582, -1105591705, 986346381, 1024604121) + W(2, 1043718831, 1050053971, 1057505465, 1049535379) + + W(3, 1052394474, 1045418390, 1044283028, 1048806335) + + W(4, -1098888378, -1104586611, -1090760605, -1081773448) + + W(5, -1106021324, -1120283669, 1036365944, -1103287167) + + W(6, -1123786893, 1030360908, -1123598824, 1035671447) + + W(7, 1044649456, -1105554848, 1033186457, -1108912601); + sum2 = + W(0, 1040658557, -1106354313, 1027144364, -1103628435) + W(1, -1123462592, -1130705048, -1126068784, 1016031184) + + W(2, 1011638864, 1020064744, 1049840877, 1050147177) + + W(3, -1117174472, -1105120318, -1113970774, -1135769248) + + W(4, 1041865287, -1103147627, 1026998340, 1053587315) + W(5, -1118058864, 1036775410, -1104140895, 1030416564) + + W(6, -1109113498, 1033931950, -1120927100, -1102211113) + + W(7, 1033328738, -1101351534, 1028877580, -1112076066); + WS(1062711758, -1109562142); + sum1 = W(0, -1107075140, -1125545776, -1121788380, -1112684886) + + W(1, -1128625782, -1102890883, -1129119788, -1102110160) + + W(2, -1100622401, -1104747337, -1103229831, -1083301774) + + W(3, 1050510715, -1097610180, -1109077454, 1030737729) + W(4, 1053288051, 1049612355, 1059638178, 1048337697) + + W(5, 1052952654, 1051307580, 1041285742, 1049115768) + W(6, 1017596397, 1007433175, -1120034657, -1098025711) + + W(7, -1112854653, -1108240645, 1022176121, 1020667959); + sum2 = + W(0, 1022257834, 1033804876, 1033963400, -1113268759) + W(1, -1123922685, 1029999681, 1015463858, -1125618402) + + W(2, 977550902, -1105772859, -1098748483, 1063293804) + W(3, -1090033704, 1051039483, -1104977553, 1022914778) + + W(4, -1113927342, 1042844173, -1096777339, 1059312088) + W(5, -1134320563, 1037687330, 1024487701, 1004875175) + + W(6, 1036504102, -1107093105, 1028160469, -1089861840) + + W(7, -1101525036, 1017532722, -1122996995, -1116068477); + WS(1046002488, -1083997249); + sum1 = W(0, -1116203964, 1044431961, 1041235242, 1046371486) + + W(1, -1106267745, -1117705943, -1122870401, -1109240562) + + W(2, -1099156844, 1028498222, -1096363997, -1079124621) + + W(3, -1088685662, -1103556299, 999993136, -1106798659) + W(4, 1050076828, 1049400252, 1050263757, 1020370202) + + W(5, 1048790666, 1053355933, 1034829570, 1037276039) + W(6, 1025075217, 1035032656, 1032516798, 1050189128) + + W(7, 1042510329, 1039724554, 1027729159, 1037249299); + sum2 = W(0, -1094982381, 1024745423, -1097113645, 1017043870) + + W(1, 1052555320, -1107563793, 1049262968, -1118735987) + + W(2, 1041875660, -1091311273, 1042706326, -1148119319) + + W(3, -1098101314, 1010824956, -1094774489, 1030619863) + W(4, 1017839086, 1044409386, 974632891, 1061393923) + + W(5, 1043955676, -1098402228, 1042935118, -1094515595) + + W(6, 1045890174, -1107764806, 1048260200, -1118443235) + + W(7, -1097368628, 1041238578, -1116185663, 1042214090); + WS(-1083255246, -1075588436); + sum1 = W(0, -1154522904, -1106371438, -1105464695, 1036086422) + + W(1, 1023783355, -1127243930, 1030176790, 1031412139) + W(2, 1026375684, 1036249778, 1056687627, -1097055709) + + W(3, -1088556578, -1108055408, -1105944807, -1123388097) + + W(4, -1099839750, -1113752075, -1092436626, 1050924256) + + W(5, 1056650964, 1045926824, 1039385437, 1041066449) + W(6, 1037990411, -1119606102, 1043935324, 1036476796) + + W(7, -1111337285, -1110940389, -1124332025, 1021333175); + sum2 = + W(0, -1117330831, 1023708058, -1133288218, -1122435213) + W(1, -1139180683, 1020252429, 1025188082, -1109968059) + + W(2, 1032531181, -1114369361, 1017487862, 1047518334) + W(3, -1122916014, 1027012545, -1103557501, 1046855474) + + W(4, 1043756688, 1031926890, -1104593512, 1043258354) + W(5, -1096895795, 1049767697, 1019544307, -1112657711) + + W(6, 1060766869, 1067300065, -1105945514, -1075696003) + + W(7, -1102393264, -1106946489, 1020047431, -1113285900); + WS(1056055196, 1023945849); + sum1 = W(0, -1104545849, -1139517988, -1101013315, -1136100212) + + W(1, -1148373856, 1013350648, -1120654743, -1108930702) + + W(2, 1036175954, -1138851412, 1048468083, -1097789539) + + W(3, -1096999890, -1097071058, 995622088, -1128233700) + W(4, 1032040419, 1009002268, 1040659763, 1059870683) + + W(5, 1039368714, 1053644410, -1122818538, 1033002810) + + W(6, -1111539988, 1026875367, -1104005681, -1112873620) + + W(7, -1135793772, -1132670698, 1028030253, -1133516850); + sum2 = + W(0, 1033182461, -1135790885, 1036058972, -1145806187) + W(1, 1026470367, -1125994579, -1123856921, -1152817846) + + W(2, -1110992702, 1041801313, -1115959119, -1083556559) + W(3, 1047402951, 1026458945, 1021989349, 1029289545) + + W(4, 1011638125, 976542168, -1106760241, -1068720208) + W(5, 1080067579, 1043021822, -1118809257, 1027196491) + + W(6, -1116812221, -1135785781, 1013579453, 1034110873) + + W(7, 1048786168, -1124564059, -1115658442, -1110636256); + WS(1047050040, 1036867972); + sum1 = W(0, 1031952202, 1040033213, 1049413776, 1052329098) + W(1, -1134253939, 1023712847, -1129317744, 1023414180) + + W(2, 1041031945, 1050476745, 1016870010, 1054273441) + W(3, 1041748879, 1035493153, 1041656495, 1041394937) + + W(4, -1098508279, -1097704487, -1091275871, -1087382286) + + W(5, -1095309386, -1117526273, -1112254893, -1108492595) + + W(6, -1117663192, 1033331580, 1012948685, 1043041286) + + W(7, -1131302504, -1125396951, -1153578470, -1112644847); + sum2 = W(0, -1120609508, -1098653081, -1074671298, -1081276780) + + W(1, -1097107702, -1110118090, -1137118351, -1119823807) + + W(2, -1138324119, 1042859622, 1072071026, 1068070176) + W(3, 1044485226, 1040658983, -1118978815, 1035453187) + + W(4, -1110367912, 1038385587, 1026263792, -1107049493) + + W(5, 1024907208, -1122488627, 1038809437, -1111098449) + + W(6, 1021280635, 1026624912, 1026288466, -1122098825) + + W(7, 1040255596, -1152233050, -1126727249, 1006813455); + WS(1047287096, 1059538103); + sum1 = W(0, 1030804480, -1111595219, -1123153834, -1098057279) + + W(1, 1007980582, 1018775223, 1036089754, 1030691489) + W(2, 1034749929, 1051408038, 1049213285, 1057344539) + + W(3, 1048793645, -1106759404, -1115115061, 1033654600) + + W(4, -1102474905, -1094490621, -1099609086, 1048597242) + + W(5, -1094890059, 1046324795, 1003815995, -1126686757) + + W(6, 1013951379, 1039425378, -1112788128, -1098419769) + + W(7, -1118093753, -1108341356, -1120388747, -1135294247); + sum2 = W(0, -1112864979, 1034721747, -1108077379, 1023366533) + + W(1, 1021246669, 1032508120, -1113986367, -1153257254) + W(2, 1024004698, -1109416755, 988806988, 1058238498) + + W(3, -1115279539, 1041332549, 1033457886, 1018109285) + W(4, 1034996841, 1041661735, 1041642045, 1008174217) + + W(5, -1122858630, -1089151778, -1130797677, -1111548255) + + W(6, 1004307827, -1116757818, 1037366938, -1107547239) + + W(7, -1108868659, -1128614293, -1130732717, -1122125438); + WS(1066216871, -1084582294); + sum1 = W(0, 1009745022, 1023325836, 1034246149, -1103455511) + W(1, 1025953385, -1105242559, 988118663, -1107601837) + + W(2, -1095963722, -1096473834, -1087129751, -1142570129) + + W(3, -1094684497, -1107829831, -1107520037, -1117719279) + + W(4, 1050775177, 1046303345, 1057767679, 1064400323) + W(5, 1038452388, 992534739, 1025316967, 1044853309) + + W(6, 1027534757, -1126933960, -1118146623, -1103828265) + + W(7, 1018950609, 1030437560, 1020674901, 1033139589); + sum2 = + W(0, -1127013105, 1018526115, -1123587641, 1052346460) + W(1, -1098654594, 1029804743, -1118240305, 1030491706) + + W(2, 1031626404, -1117975133, 1062148372, 1074624908) + + W(3, -1090512380, -1070854231, -1095518794, 1041467716) + + W(4, 1029958550, -1113082476, 1035325186, -1130733653) + + W(5, 1046581721, -1113725416, 1031776819, -1115594754) + W(6, 1017794797, 1023300847, -1145491637, 1041942663) + + W(7, -1113152926, -1139181075, -1120299385, -1142763637); + WS(1033725552, -1082653885); + sum1 = W(0, 1027658456, 1033791775, 1038169633, -1130834725) + + W(1, -1105755912, -1107114253, -1106878767, -1102176852) + + W(2, -1091894661, -1094613753, -1085886368, -1094605577) + + W(3, 1049096200, 1038006563, 1040399888, 1038046228) + W(4, 1054730080, 1052727904, 1060322284, 1054915405) + + W(5, -1115316932, -1113871235, -1114082704, -1098681477) + + W(6, 1024810077, 1020875129, -1114253244, 1021660859) + W(7, 1010914050, 1032412472, 1016596422, 1044096992); + sum2 = W(0, -1126607908, -1111819446, 1032709615, -1119610542) + + W(1, 1027594385, 1029384695, -1115269200, 1032209203) + + W(2, -1109006144, 1050929158, 1025316173, -1107176811) + + W(3, -1123863107, -1122531221, 1026796828, -1115616555) + + W(4, -1093467963, 1063076656, 1048889614, -1090320472) + W(5, 1046933317, 1051206364, 997945380, -1109796884) + + W(6, 1059727060, 1073053378, 1073749745, 1058093447) + + W(7, -1079533534, -1073154062, -1075936158, -1088004581); + WS(-1087442510, 1045166814); + sum1 = + W(0, -1106921670, 1032907600, -1111070674, 1047421629) + W(1, -1094092352, 1031075349, -1136240762, -1129052086) + + W(2, 1032009478, -1127325529, -1106321881, -1085655355) + + W(3, 1057239638, -1098381173, -1128650952, -1107389381) + + W(4, 1041337656, -1100890040, 1060092296, 1044767811) + W(5, 1040377247, 1037296628, -1131454723, 1037463686) + + W(6, -1110338367, 1042108264, -1105899594, 1027442111) + W(7, -1105935775, 1037469097, 1027139781, 991481919); + sum2 = + W(0, -1119426781, -1109786529, 1041819143, 1018220925) + W(1, -1103055916, -1124599508, 986571056, 1024618671) + + W(2, 1040351141, 1043540747, -1105926532, -1133247980) + + W(3, -1096724541, 1038612076, 1019567362, -1143357456) + + W(4, -1104019881, -1106797782, -1098594115, 1066013053) + + W(5, -1136157126, -1097021086, 1025702119, 1008181894) + W(6, 1015410156, 1043466418, 1048709183, -1092013542) + + W(7, -1109288787, 1027934232, -1140871148, 1027080058); + WS(1059279054, 1041683061); + sum1 = W(0, 1028823114, 1033060856, -1136683218, 1041097809) + + W(1, -1108712682, -1099515041, -1114499693, -1105525449) + + W(2, -1096417644, -1095251776, -1096755466, -1095420880) + + W(3, -1087904547, -1117180600, -1113115866, -1121084634) + + W(4, 1041071884, 1051348966, -1106154521, 1062896338) + W(5, 1057053635, 1050313119, 1041134484, 1043053335) + + W(6, 1040139175, -1133443258, 1027763154, 1037083176) + + W(7, -1109247178, -1169250436, 1007514196, 1031868218); + sum2 = W(0, -1104031080, -1113246189, -1124792744, -1107688308) + + W(1, -1084040655, -1089958554, -1098618924, -1098033920) + + W(2, 1039900182, 1044206765, 1043086617, 1057123143) + W(3, 1066602722, 1037219700, -1111210157, 1036938090) + + W(4, 1028023758, -1122555834, 1041018096, -1100462314) + + W(5, -1123151815, 1032289221, 1028867168, -1143651414) + + W(6, -1125301559, 1027754708, -1129842792, 1022410763) + + W(7, 1034604384, 1029558442, -1139711519, -1122704124); + WS(-1093886876, -1092780259); + sum1 = W(0, 1031521076, 1040859655, -1123954783, -1115369432) + + W(1, 1027325374, -1130075936, 1017430099, 1005655277) + W(2, 1034696971, 1027092583, 1057011018, 1055452141) + + W(3, 1055461116, 1043845385, 1041326696, 1045628689) + + W(4, -1098366291, -1107551117, -1084481759, -1086775792) + + W(5, 1050583573, -1098824964, 1032795870, -1110858724) + W(6, 991412379, 1023411481, 1037665034, 1037775165) + + W(7, -1102735091, -1106471523, -1113156096, -1108701199); + sum2 = W(0, -1099549152, -1101982776, 1039636960, 1065032135) + + W(1, 1046191187, -1112740199, -1120755726, -1138571865) + + W(2, -1115620025, -1110046443, 1067650764, -1096892917) + + W(3, -1084058149, -1089814493, 1022618460, -1106519005) + + W(4, 1042476476, -1138118945, 1056117723, -1084935200) + + W(5, 1043334912, -1116550886, 1030169574, -1123579094) + + W(6, -1128057356, -1104607948, -1129519972, 1039009532) + + W(7, 1042222420, -1114781123, 1023299940, 1029490149); + WS(-1103384376, 1050555318); + sum1 = W(0, 990997212, -1104723373, -1102560870, -1110645588) + + W(1, -1114771100, 1006886950, 1011892746, 1041403778) + W(2, 1056616014, 1049493908, 1060440249, 1063537686) + + W(3, 1049398171, -1123261404, 1042753094, 1041297647) + + W(4, -1113359817, -1101985253, -1095607555, -1091747296) + + W(5, -1098384586, -1094355067, -1098733239, -1099641616) + + W(6, -1104278389, -1112669305, -1117918862, -1124628967) + + W(7, -1120789735, 1020875660, 1029891897, -1143354303); + sum2 = W(0, -1097904627, -1102268820, -1101696600, -1102083058) + + W(1, -1100977750, 1022682138, -1105396681, 1035758392) + W(2, 1056762738, 998260703, 1049156882, 1055125136) + + W(3, 1040009482, -1099421122, -1114104451, -1112582051) + + W(4, 1033294591, -1102966386, 1040206209, 1048772561) + W(5, 1004029807, 1040485303, -1118051402, 1040482786) + + W(6, -1113205081, -1105114179, 1028552759, 1023069474) + + W(7, -1108173489, 1040370279, -1109779069, -1135668672); + WS(1042369848, -1095650924); + sum1 = W(0, 1032801852, -1124780413, -1142443027, 1031124370) + + W(1, 1024489633, 1037041722, -1157391942, 1048283534) + W(2, 1060985291, 1056066249, 1059062488, 1046815795) + + W(3, 1051554984, 1026983110, 1037131073, 1028118439) + + W(4, -1088222536, -1093287088, -1089766974, -1084610255) + + W(5, -1096293908, 1035048291, -1113190096, -1107690581) + + W(6, -1154392525, 1017158008, 1041310978, -1111535759) + + W(7, -1118760812, -1118618480, 1032589425, -1101944856); + sum2 = W(0, 1056251107, -1118435725, -1111043713, -1090037466) + + W(1, -1129656805, -1093926860, 1052716525, -1083761792) + + W(2, -1084477192, 1041984407, -1093515412, 1073185178) + + W(3, 1051712939, 1053076395, -1096065312, 1066458198) + + W(4, -1077785336, -1086371528, -1093010832, 1074151927) + + W(5, 1054463958, -1094829660, -1090812268, 1063500158) + + W(6, 1053829707, -1113013425, 1046411943, -1090694669) + + W(7, -1095714333, -1103046632, 1035367276, -1087950464); + WS(-1081634407, -1072784825); + sum1 = W(0, -1102740818, -1115394775, -1101924733, -1100839185) + + W(1, -1123768914, -1140848016, -1118249725, -1116477425) + + W(2, 1024862651, 1046040367, -1109613465, -1115978107) + + W(3, 1049722038, -1098661743, -1138059466, 1025943000) + W(4, 1056867014, 1027952685, 1058509811, 1050187219) + + W(5, -1098437541, 1033615035, -1143195264, -1132571500) + + W(6, 1016622683, -1112643478, -1096240139, -1101581667) + + W(7, 1008822020, 1023463264, -1116668435, 1036595961); + sum2 = W(0, -1139809909, -1126178115, 1032546581, -1097752958) + + W(1, 1042272246, -1118367610, -1138081685, 1017774855) + + W(2, 1046126277, -1111752353, 1034216546, 1042101074) + W(3, 1043290796, -1112722386, 1034532478, 1030093406) + + W(4, -1119940899, 1055245806, -1088954301, 1058677046) + + W(5, -1087007443, 1048637958, -1140548675, 1029648824) + + W(6, 1040903120, -1104413152, 1019748625, -1089277711) + + W(7, 1028591970, -1139620757, 1017306737, 1033828758); + WS(1050645916, 1033550915); + sum1 = + W(0, -1118556120, -1108262319, 1043230361, -1094947334) + W(1, -1103841584, 1018079143, 1032598742, 1008731683) + + W(2, -1107219907, 1043545243, -1086762442, -1104041435) + + W(3, 1058293794, -1097977901, 1041130248, -1123331655) + W(4, 1039060494, -1145250020, 1059123936, 1057758321) + + W(5, -1096913490, 1039599795, -1151122424, -1123961290) + + W(6, 1030703933, -1110184190, -1110311564, -1100512367) + + W(7, 1044416908, -1132472896, -1147952664, 1022228292); + sum2 = W(0, 1040267615, 1048409433, -1107165984, -1086251247) + + W(1, -1113957324, -1107976602, -1122078587, -1111525785) + + W(2, -1112791644, -1110540665, -1088877638, 1053330884) + + W(3, 1057377190, 1057673067, 1024512949, 1042772014) + W(4, 1010515766, 1032365064, -1090038325, 1049266570) + + W(5, 1028890653, -1103454662, 1041725883, -1143554104) + W(6, 1024308395, 1041630361, 1040244783, 1037513285) + + W(7, -1098504745, 1039941233, -1103661415, 1024552651); + WS(1051978908, -1102077462); + sum1 = W(0, -1132165009, -1117187439, -1108837850, 1037183202) + + W(1, 1028473682, 1045655273, 1025240990, 1033734537) + W(2, 1045761591, 1051430047, 1060286099, 1046972447) + + W(3, -1128782822, 1028047735, -1178179968, 1035331372) + + W(4, 1013540534, -1096198150, -1130717861, -1085387922) + + W(5, -1105434962, -1101497054, -1110960744, -1102517015) + + W(6, -1106078467, 1014010165, -1121999817, 1030141582) + + W(7, 1008925446, 1018887490, -1122721428, 1026996307); + sum2 = W(0, 1023072493, -1125531833, 1030396885, -1118880502) + + W(1, 1027419877, -1161271853, 1019485964, 1028708277) + W(2, 974611657, 1047662948, 1035351326, -1090109833) + + W(3, -1110942359, 1033315833, 1010290822, -1156901930) + + W(4, -1101792417, 1051899312, 1080526024, -1069022098) + + W(5, -1087739606, 1036440530, -1124554651, -1114394737) + + W(6, -1112121991, 1037288067, 1042102238, -1113214479) + + W(7, 1032127295, -1143368203, -1132377096, -1141169667); + WS(1058455886, -1096183470); + sum1 = W(0, -1105360672, -1112035684, -1105059667, -1103063975) + + W(1, -1112638549, -1104182327, -1106250503, -1103319623) + + W(2, -1112611850, 1032735242, 1048808633, 1042817013) + W(3, 1058906409, 1047248804, 1047193699, 1057710235) + + W(4, 1060616064, 1033545283, 1051085625, -1097442806) + + W(5, -1102188695, -1103229812, -1106970890, -1109193562) + + W(6, -1104075433, -1111940643, -1115570744, -1097216429) + + W(7, -1107200030, -1108265532, -1119499692, -1106901731); + sum2 = W(0, -1135393651, -1131009665, 1004138181, -1111055824) + + W(1, -1134829555, -1127256265, -1113946573, 1010148467) + + W(2, -1114274101, 1016197705, -1143366053, 1051632163) + + W(3, -1111579981, 1038147066, 1025185617, -1137859715) + + W(4, -1117767149, -1130986601, 1026922589, 1048764370) + + W(5, 1036257166, -1112945554, -1143942149, -1123068365) + + W(6, -1110403581, -1136444051, -1112183743, -1121253595) + + W(7, 1009689523, 989872074, -1123668797, -1123716033); + WS(-1103618872, 1023577831); + sum1 = W(0, 1041702149, 1032268701, 1021026046, 1032012647) + W(1, 1023662430, -1111766616, -1112590149, 1042231991) + + W(2, 1057806109, 997904496, 1057675340, -1129702414) + W(3, 1056283152, 1039716321, 1049875952, 1054291708) + + W(4, -1091063799, -1106010126, -1090884608, -1094077941) + + W(5, -1094732237, -1100869456, -1105996279, -1098425069) + + W(6, 1022049043, -1102660926, 1046995709, -1100395542) + + W(7, 1050214494, -1096243374, 1047647816, -1100607928); + sum2 = + W(0, 1073849383, 1069429154, 1070121938, 1066905653) + W(1, -1087755842, -1080359597, -1073496568, -1070245916) + + W(2, -1072987051, -1080077361, -1076996297, -1086754234) + + W(3, 1060194671, 1067521081, 1074652354, 1075883836) + W(4, 1040644897, -1098556311, -1124122091, -1095882058) + + W(5, -1096747545, 1048551041, -1092793409, 1058695504) + + W(6, 1022236877, -1128073567, -1118780260, 1027638421) + + W(7, 1012182550, -1129108935, -1130588931, 1001812541); + WS(1010873216, -1100304815); + sum1 = W(0, -1154187044, 1028741017, -1114247598, -1099084427) + + W(1, -1104966895, 994968800, -1113228468, 1028263947) + W(2, 1045945805, 1051026373, 1058185854, 1059904750) + + W(3, 1052860588, 1038231664, 1040557287, 1032103905) + + W(4, -1102222630, -1103217414, -1097780567, -1091550752) + + W(5, -1108718914, -1110664976, 1017403508, 1017865354) + + W(6, -1113759340, 1022415534, -1105383733, -1111624796) + + W(7, -1114326870, -1110217221, -1108875519, -1109350878); + sum2 = + W(0, -1114639192, 1039210963, 1047528380, -1099502544) + W(1, 1041344628, -1108972702, 1021595805, -1116883371) + + W(2, -1100331344, 1069298433, 1080352854, -1067747068) + + W(3, -1080996124, 1032407437, -1121090437, 1016604711) + W(4, 1035901690, 1043940791, 1030733380, -1084572959) + + W(5, 1003800555, 1045939813, -1121287047, -1122259179) + + W(6, 1012858414, -1126620651, 1036529177, -1099630936) + + W(7, 1043527822, -1119860547, -1145664907, 1034105585); + WS(1044302648, -1104457270); + sum1 = W(0, -1115179413, -1101763748, 999269803, -1106420033) + + W(1, 1041707045, -1128885581, -1110751840, 1029110244) + W(2, 1029190781, 1045938783, 1034810490, 1055724168) + + W(3, 1053359959, -1100033022, 1037908026, 1035419021) + + W(4, -1108944696, -1102720380, 1038235218, -1101615303) + + W(5, -1094303566, 1043259278, 1011056201, -1123807773) + + W(6, -1115207128, 1013266237, 1025157307, -1105407492) + + W(7, 1045691477, -1113033137, -1114066571, -1113282394); + sum2 = + W(0, -1117238549, -1115295266, 1033754662, -1095438097) + W(1, -1106855439, 1011060498, -1117460606, 1032653312) + + W(2, -1109414288, -1128441429, -1104877135, 1053172955) + + W(3, 1056926069, -1108715476, 1036225840, -1114450934) + + W(4, -1115125848, -1121954754, 1018153798, 1038786210) + W(5, -1104082080, 1043675119, 1008366234, 1018894269) + + W(6, 1023475415, 1024122972, 1002646200, -1106263750) + W(7, 1034246226, 1025499367, -1107090094, 1013321052); + WS(1068015911, 1043072951); + sum1 = W(0, -1127841379, 1035792844, -1155603908, 1018341976) + + W(1, -1111053022, 1044403491, -1112313859, 1005208661) + + W(2, -1101991257, -1091973141, -1110119406, -1095315357) + + W(3, -1107228151, -1111437840, -1097295811, -1103670736) + + W(4, 1041159238, 1040826438, 1052691071, 1053842800) + W(5, 1042506601, 1050221764, 1030249017, 1046985745) + + W(6, 1035382295, -1098419575, 1043898844, 1030324599) + + W(7, -1108241413, 1043481954, -1098652336, 1032196534); + sum2 = + W(0, -1134315530, 1033824390, -1112867968, 1039862799) + W(1, 1005143468, -1114969920, 1030354102, -1131720457) + + W(2, -1125722365, 1019923991, 1039464885, -1087456726) + + W(3, -1119378177, 1042059959, -1113780490, 1009971850) + + W(4, -1135384490, 998580556, -1093291922, -1061395777) + + W(5, -1106942114, -1121281813, 1034179020, 1032010553) + W(6, -1118933131, 1027495017, 1045077367, 1087627027) + + W(7, 1034637079, -1148550692, -1118611519, -1117397469); + WS(1060186318, -1131602669); + sum1 = W(0, 1022422466, -1102049002, 1050775273, -1097141731) + + W(1, 1056872997, -1099566807, 1025667942, 1032894309) + W(2, 1050205114, 1052887138, 1052456073, 1049038746) + + W(3, 1040490435, 1039690728, 1043457059, 1045856748) + + W(4, -1099402401, -1102238785, -1100510367, -1088759247) + + W(5, -1122526498, -1099089908, -1102859210, -1106963501) + + W(6, -1112848707, -1106285089, 1039607513, -1098786589) + + W(7, 1048305743, -1104192622, 1032266998, -1118572779); + sum2 = W(0, 989650422, -1108234271, -1120122674, 1045260201) + + W(1, -1114112879, 1037831837, -1117501250, 1024132690) + + W(2, -1120410947, 1050158699, -1092804082, 1024838498) + + W(3, -1125280065, -1119936776, -1114421818, 1035095434) + + W(4, 1047633630, 1066272871, 1075424781, 1061734347) + W(5, -1091697647, 1003481288, 1011848513, -1106020023) + + W(6, -1081060940, -1074101109, -1079387458, 1050344942) + + W(7, 1043540897, 1039609275, -1131708367, 1026462111); + WS(1057810382, 986287880); + sum1 = W(0, -1105255365, 1027767677, 1037147968, 1038550632) + + W(1, -1136315263, 1035749561, -1115075413, 1028257967) + + W(2, -1110142276, -1106138450, -1092238936, -1090210593) + + W(3, -1089134184, -1093055129, -1097497096, -1089420006) + + W(4, 1041191503, 1031406779, 1027980992, 1049547413) + W(5, 1052451129, 1057536820, 1050343212, 1058108487) + + W(6, 1050176972, -1130243957, 1041310918, 1041377527) + + W(7, -1115128119, 1033537239, -1127231554, 1031965752); + sum2 = W(0, -1099824579, -1113793286, 1053512844, -1112943238) + + W(1, 1041834894, -1099992002, -1120789532, 1024916046) + + W(2, 1070708271, -1104040400, -1091831853, -1072548459) + + W(3, -1093778092, 1057576575, 1064181862, 1051972140) + + W(4, 1075074245, -1084124078, -1082668198, -1072503695) + + W(5, 1056090411, 1053907302, 1058602971, 1054786345) + W(6, -1097074436, 1023561426, 1039380165, 1009216489) + + W(7, 1043969626, -1107563771, 1034348623, -1106961801); + WS(-1075707047, 1038147646); + sum1 = W(0, 1038492938, -1115961531, 1036899040, 1041066756) + W(1, 1038870159, 1045702796, 1022987667, 1044017707) + + W(2, 1050722763, 1026800423, 1055626176, 1052178217) + W(3, 1027667391, 1053246416, -1100305296, 1049962981) + + W(4, -1095596015, -1103730686, -1091210886, -1090286882) + + W(5, -1094135001, -1095582752, -1109653838, -1093296229) + + W(6, 1040870942, -1122935577, 1047700685, 1036539085) + + W(7, -1102625947, 1048523811, -1105859960, -1117734303); + sum2 = W(0, -1145353723, -1113260796, -1112198364, -1113367096) + + W(1, 1007958125, 1047146251, -1122974002, 1021064313) + W(2, 1027969677, -1095928859, 1052169305, 1052353113) + + W(3, -1129450520, 1051473061, -1091037095, 1050393555) + + W(4, -1115030411, 1031463199, -1108227013, 1051016428) + W(5, 1041462653, 1045242344, 1032341221, 1020102815) + + W(6, 1052090154, -1110261785, 1041877601, -1099630700) + + W(7, -1087173862, -1102681887, -1095485390, -1111896409); + WS(-1102302520, 1068562064); + sum1 = W(0, -1130894152, 1029216267, 1029150351, 1029804895) + + W(1, -1108054071, 1046304488, -1107326720, 1018120580) + + W(2, -1101486038, -1094978851, -1104755260, -1093484995) + + W(3, -1110271975, -1106599349, -1102131284, -1102274525) + + W(4, 1044130034, 1029251017, 1053252560, 1050941559) + W(5, 1036880709, 1050863202, -1128083416, 1047248574) + + W(6, 1031789673, -1102217970, 1042516209, 1040658618) + + W(7, -1113036417, 1041743615, -1102226940, 1032422999); + sum2 = + W(0, 1019111797, -1123809812, 1006739898, 1041109085) + W(1, -1126211453, 1028972357, -1118372569, 1025713585) + + W(2, 1022527979, -1111065998, 1031759809, -1095097056) + + W(3, 1034613952, -1107360163, 1033659253, -1118642675) + W(4, -1131519898, 1023737355, 1045357020, 1082939698) + + W(5, 1027342607, 1033832936, -1112248823, -1118824157) + + W(6, 1029130971, -1117143513, -1106220344, -1064938697) + + W(7, -1122955245, -1112280546, 1032152906, 1028082979); + WS(1066566439, -1125753148); + sum1 = W(0, 1020091828, 1041951168, -1130301450, 1052053147) + + W(1, -1131231944, 1033588155, 1009411772, -1116682291) + + W(2, -1096367972, -1098758994, -1093467737, -1089881409) + + W(3, -1092544881, -1098181717, -1110070786, -1096392077) + + W(4, 1038767583, 1048997640, -1130818632, 1064063659) + W(5, -1116991019, 1042770882, 1046860728, 1027361773) + + W(6, 1030663671, 1025331919, -1108818909, 1044746920) + W(7, 1022962726, 1041997189, 1018371147, 1038578846); + sum2 = W(0, -1105507764, -1149004498, -1098298748, -1115729482) + + W(1, -1094576030, -1111478010, -1129010369, 1018600957) + + W(2, 1019815533, 1047279899, -1106660204, 1058550934) + W(3, 1031191852, 1032716298, -1117054989, 1019851909) + + W(4, -1136431769, -1121679526, -1096656341, 1058580319) + + W(5, -1098228632, 1041490224, 1047791827, -1106522387) + + W(6, 1024962860, 1041000955, -1107413740, 1027135608) + + W(7, -1101441076, 1035067556, -1123460834, 1005788722); + WS(1050996380, 1066787661); + sum1 = W(0, 1042843177, -1120310187, 1033453959, -1107450543) + + W(1, 1017862620, -1134072575, 1025308393, 1033769739) + W(2, -1109518091, 1043911830, 1059519229, 1061971625) + + W(3, 1051823058, 1046601317, 1042112788, 1046863997) + + W(4, -1098674409, -1097687209, -1096304487, -1098011863) + + W(5, -1095767039, -1095249970, -1098354285, -1098417599) + + W(6, -1112832565, -1133582755, -1116528532, -1105396874) + + W(7, 1033796902, 999923683, 1030029487, -1109771063); + sum2 = W(0, -1103321099, 1033376724, -1108916223, 1033194077) + + W(1, 1038197771, 1032881798, 1017830932, -1138730935) + + W(2, -1087654445, -1114493691, 1049058628, 1065240604) + + W(3, -1128335788, -1094150295, 1006224046, -1112103411) + + W(4, -1098379129, -1096458683, 1036368268, 1054396447) + + W(5, 1049730119, -1108472207, 1031462702, -1102017203) + + W(6, 1013739975, 1015354012, -1121134774, -1115257551) + + W(7, -1129699908, 1021079748, -1117224382, 1038189385); + WS(1027314912, -1081149641); + sum1 = W(0, -1109480125, -1114947760, -1101195017, -1132100201) + + W(1, -1103759537, 1044342469, -1111893319, 1051194426) + W(2, 1051429188, 1053252314, 1058130398, 1061393767) + + W(3, 1057940398, -1104522586, 1055491172, -1120202825) + + W(4, -1118844062, -1094077252, -1113091794, -1085308129) + + W(5, -1102037735, -1094497965, -1103334896, -1094870250) + + W(6, -1097611416, 1034118735, -1104617542, 1035982720) + + W(7, -1114189097, 1036294793, -1128796810, 1030785028); + sum2 = W(0, -1131039707, 1024995350, -1104486127, 1015537291) + + W(1, 1053996441, -1103676904, 1047124046, -1111129691) + + W(2, -1098833779, -1124760267, -1078713050, -1065942779) + + W(3, 1047686732, 1084085461, 1064521940, -1093915430) + + W(4, -1147567565, 1057070390, -1091843304, -1081156610) + + W(5, -1094140189, 1060423478, 1046849692, 1018309905) + W(6, 1035790053, 995065627, 1037354874, -1154356731) + + W(7, -1107145709, 1026796886, -1108476011, 1038591472); + WS(-1081542375, 1044780323); + sum1 = W(0, 1026864081, -1100303790, 1048337215, -1098110473) + + W(1, 1054474587, -1099969099, 1023999910, 1028627178) + W(2, 1046719985, 1054288460, 1054459103, 1048901488) + + W(3, 1041639871, 1034589376, 1043694031, 1046235480) + + W(4, -1098857847, -1108679899, -1098101851, -1093175556) + + W(5, -1104037973, -1107626335, -1101510966, -1106600025) + + W(6, -1114219435, -1104586877, 1041408644, -1098306531) + + W(7, 1046672715, -1104199322, 1022681657, -1126646775); + sum2 = W(0, 1017985090, 1027847194, 1036519222, -1113167123) + + W(1, 1037009826, -1113380621, 1031131596, -1115360802) + + W(2, 1024212320, -1098721130, 1050387030, -1094712479) + + W(3, -1100822056, 1028269032, 1009055356, -1113841920) + + W(4, -1099849981, -1081468176, -1070464929, -1096550174) + + W(5, 1057690620, 1042714784, -1145638655, 1046010973) + W(6, 1067245211, 1074345814, 1070597407, -1088417301) + + W(7, -1104880956, -1102737214, 1000105719, -1112342255); + WS(1059294542, 1020616832); + sum1 = W(0, -1157534552, 1039080142, -1097783100, 1031217968) + + W(1, -1164216296, -1138897989, 1026686634, -1105498094) + + W(2, -1096856701, 1044861738, -1089052876, -1085780263) + + W(3, 1044573672, -1097342660, 1041475528, -1105558033) + W(4, 1050532499, 1041652423, 1057029426, 1047119538) + + W(5, 1039715347, 1043073721, 1040308239, 1048391538) + W(6, -1112831597, 1046540791, -1098369827, 1040272239) + + W(7, 1024931118, -1103025619, 1045404192, -1114523696); + sum2 = W(0, -1106291706, 1001372950, -1099590495, -1093748925) + + W(1, 1015225205, -1105937891, -1112612361, 995169980) + W(2, 1040318024, -1105705183, 1045484852, 1061316313) + + W(3, -1098311584, 1041085521, 1028378294, 1035732349) + + W(4, -1114208076, -1116263519, -1108179199, 1049891427) + + W(5, 1040668388, -1111581107, -1115787941, -1113959318) + + W(6, -1142193319, -1145917455, 1036599633, -1108356360) + + W(7, -1148092276, 1032743264, 1021281994, 1024794158); + WS(1059376718, -1137270291); + sum1 = + W(0, -1118257199, 1037392427, -1097794403, 1035766677) + W(1, 1026067385, 1032186693, -1108679762, -1139304576) + + W(2, 1043258576, -1106073464, 1054680411, -1099046488) + + W(3, -1088020070, 1041392887, -1113652045, -1111666975) + + W(4, -1114290826, -1124279079, -1115190716, 1052189312) + + W(5, 1054691490, -1097245116, 1025610423, -1123561026) + W(6, -1112002778, 1022160871, 1045843716, 1011458515) + + W(7, -1110958220, 1046761570, -1117145658, 1032305501); + sum2 = + W(0, 1025244035, -1122852568, -1092220395, 1038019467) + W(1, 1048072683, -1113553750, -1119930901, -1139793711) + + W(2, 1033595807, -1087687504, 1066421651, 1047700223) + + W(3, -1084925862, 1050883425, -1105555859, -1146566911) + + W(4, 1016573022, 1048825911, 1058107887, -1089540205) + W(5, 1045215493, -1098430697, 1041208433, -1124843514) + + W(6, 1002397687, -1104089806, -1130145014, -1111655831) + + W(7, 1032935415, 1015641098, 1001885951, -1131703250); + WS(1058596686, 1013962118); + sum1 = W(0, 1000024554, -1127551432, -1100000762, 1035333436) + + W(1, -1122612871, 1044766998, 1041477861, 1051562743) + W(2, 1052551424, 1049056438, 1057499982, 1036076760) + + W(3, -1127972943, -1116046252, -1118015335, -1099275107) + + W(4, -1088298614, -1098656348, -1090756927, -1097067922) + + W(5, 1040213184, 1015197910, -1123679237, -1122278672) + W(6, 1039656505, 1034731345, 1019232187, 1033429441) + + W(7, -1125045580, -1113717771, 1037833508, -1122272135); + sum2 = + W(0, -1104434141, 1029025211, -1078497608, -1079110377) + W(1, -1085502108, 1066279808, 1062913146, 1068620036) + + W(2, 1048601996, -1091369704, 1052827694, -1106615386) + + W(3, -1115334546, 1037382016, -1122817088, 1048688798) + + W(4, 1033271157, -1119340081, 1025214064, -1109539756) + + W(5, 1050957039, -1106594885, 1036274829, -1118139306) + W(6, 1004930429, 1028438774, -1106316897, 1034989376) + + W(7, -1108631008, 1023198169, -1134348613, -1139733884); + WS(1044771128, 1023341948); + sum1 = W(0, -1156220044, 1034545464, 1033488922, -1111313058) + W(1, 1038420969, 1008511890, 1013986230, 1011084871) + + W(2, -1092336191, -1098819215, -1096491302, -1086850728) + + W(3, -1102939421, -1100928894, -1115524377, -1089482302) + + W(4, 1051560294, 1044146859, 1051910286, 1045449190) + W(5, 1053458817, 1038883707, 1012126018, 1046748951) + + W(6, 1036528391, 1029114797, 1022761170, 1029767950) + W(7, 1041877300, 1034675856, 1037411178, 1043196317); + sum2 = W(0, -1071004894, -1080574884, -1089987082, 1049506323) + + W(1, 1065282653, 1062249589, 1060165079, 1070844945) + W(2, 1076503146, 1066884159, 1058011283, -1098882467) + + W(3, -1084978169, -1088626314, -1082249713, -1077734377) + + W(4, 1057610169, -1107351326, 1054000347, -1090234844) + + W(5, 1030898490, -1090230279, 1044380003, -1096516163) + + W(6, -1122913984, -1130255370, 1038970611, 1014162118) + + W(7, -1114000490, 1022492087, -1131621088, 1013009282); + WS(-1111817840, -1134998409); + sum1 = W(0, 1022731056, 1045262352, 1044888721, 1029818259) + W(1, -1105915720, -1114660906, 1027479949, 991785104) + + W(2, -1102901203, -1094315057, -1085070720, -1111793509) + + W(3, 1055206544, 1046806370, -1123039746, -1136267423) + W(4, 1029699069, 1052915216, 1058770140, 1057078063) + + W(5, -1095183540, -1097451385, -1112670352, -1114500629) + + W(6, 1021112442, -1107194439, -1097785743, -1117717150) + + W(7, 1040740592, 1036425016, 1005586201, 1016465988); + sum2 = W(0, -1129690332, 1030842707, -1123486113, 1054087898) + + W(1, -1105176966, -1097205966, -1098021434, -1106929221) + + W(2, -1113356803, 1026318374, 1041923626, -1089332833) + + W(3, 1068558125, -1141184456, -1107728348, -1104288342) + + W(4, 997943457, -1119512995, 1037525758, -1088292904) + W(5, 1040210770, 1050151959, 1018682892, 1006954668) + + W(6, 1015499837, -1180561029, -1144014736, 1037328869) + + W(7, -1107365912, -1114055561, 1009592392, 1013301204); + WS(-1100650808, 1043653943); + sum1 = + W(0, 1038392637, 1032036848, -1120772452, 1043637149) + W(1, 1040259489, 1047398869, 1038054351, 1050576478) + + W(2, 1044290651, 1040683515, -1106266873, -1090111931) + W(3, -1133467790, 1044787930, 1023591523, 1037223428) + + W(4, -1095199164, -1115398639, -1094097345, -1097393337) + + W(5, -1104941765, -1101572298, 1016760834, -1102689443) + W(6, 1033771919, 1038316223, 1042896310, 1045751664) + + W(7, -1159203906, 1015063331, 1030908740, -1125005703); + sum2 = + W(0, 1018053796, 1000548496, 1049079603, 1039189619) + W(1, -1128574308, -1118597354, -1106984497, -1104543855) + + W(2, 1046306039, 1043541862, 1010667960, 1064529690) + W(3, 1047996002, -1087406141, -1095617964, -1086946593) + + W(4, -1101305605, 1029064982, -1116813778, 1038487223) + + W(5, -1100258391, 1048794746, -1114392997, 1045938007) + + W(6, 1031448374, -1112893849, 1021972628, -1105518867) + + W(7, 1037842238, 1033382833, 1021089548, -1140458600); + WS(-1121537248, 1047151836); + sum1 = W(0, -1096325448, -1113135282, -1098188693, -1112146268) + + W(1, -1108791588, -1117696601, -1123869651, -1114157115) + + W(2, 1052654400, 1035132488, 1046868890, -1134150082) + + W(3, -1129891280, -1098847494, 1032919412, -1107856679) + + W(4, 1000450324, -1110395025, -1139200797, 1049727010) + W(5, 1041189572, 1056884317, 1041128337, 1058220805) + + W(6, -1116556387, 1023519458, -1111260975, -1110207458) + + W(7, -1104838938, -1098805187, -1146298440, -1103148146); + sum2 = + W(0, -1142864271, 1021304865, 1043366966, -1107584343) + W(1, 1014758407, -1130465374, -1120740451, 1000478551) + + W(2, 1043572739, 1026011378, -1102859954, -1080136051) + + W(3, -1103597159, -1115654645, 1046511165, 1053224660) + + W(4, -1125571574, 1028667063, 1022196210, -1072538638) + W(5, -1076945816, 1048815254, 1072551214, 1074776028) + + W(6, -1107989855, -1140649559, 1020722946, -1132367054) + + W(7, 1041979768, -1126224006, 1027667511, 1048839210); + WS(-1086568910, 969651201); + sum1 = W(0, -1128189323, 1044154939, 1043534732, -1114366976) + + W(1, -1098469330, -1101372520, -1123038043, -1132910587) + + W(2, -1096064919, -1089612648, -1084798775, 1053159863) + + W(3, 1053431542, 1050787607, 1036747448, -1116020373) + W(4, 1047521403, 1054908701, 1056987371, 1057685165) + + W(5, -1089249613, -1090701774, -1105030179, 1027268120) + + W(6, 1043461231, -1099266659, -1113308531, -1152040120) + + W(7, 1042761408, 1049635020, 1010635844, 1018344000); + sum2 = W(0, 1042932965, -1103428495, 1039997403, -1093583228) + + W(1, 1044856824, -1121819542, -1177180368, 1028413178) + + W(2, -1103892922, 1060495074, 1052448567, -1075640666) + + W(3, 1057723154, 1057002090, -1112150955, -1118134166) + + W(4, -1104364155, 1060109323, 1064108701, -1073056297) + + W(5, 1055665788, 1060503917, -1116550688, -1110515035) + + W(6, -1139842168, -1118896922, 991756114, -1097481656) + + W(7, 1040659602, -1099984054, 1034919451, 1041069777); + WS(-1084093518, -1116656412); + sum1 = W(0, 1002008836, 959481663, -1104590931, 1050696243) + W(1, -1106608235, 1047962207, 1030562773, 1015783795) + + W(2, 1058416208, 1026388179, 1059097973, 1042079029) + W(3, 1041679827, 1049127990, 1031439243, 1042535660) + + W(4, -1096672513, -1109778819, -1088144465, -1096264126) + + W(5, -1101432288, -1100480244, -1113594772, -1093923750) + + W(6, -1112390467, 1023428581, -1112632704, 1040470858) + + W(7, -1102954054, 1047012782, -1112144502, -1145396437); + sum2 = W(0, 1007960967, -1110192070, 1049899326, 1056650247) + W(1, 1078512141, 1081472440, 1058404171, 999915485) + + W(2, 1027407916, 1010716935, -1097456083, -1070859565) + + W(3, -1068241064, -1079514242, -1094427160, 1030657127) + + W(4, -1129957403, 1018303319, 1037417999, -1094713389) + + W(5, -1096379657, 1043613542, 1025537830, -1116925932) + + W(6, -1132155403, -1121935910, -1137993343, -1142796285) + + W(7, 1039214170, -1111213656, 1032528613, 1024356221); + WS(1052225948, 1018668194); + sum1 = W(0, -1120453498, -1123160289, -1108654867, -1104839613) + + W(1, 1041378113, 1036965515, 1044186084, 1034438594) + W(2, 1051775516, 1048325451, 1058214550, 1064670427) + + W(3, -1126638409, 1049967085, -1096299613, -1128237844) + + W(4, -1117579103, -1111291056, -1093397513, -1091445969) + + W(5, -1092047898, -1099225050, -1112850625, -1112057991) + + W(6, -1107074656, -1112742544, -1147977428, -1119335936) + + W(7, -1109154218, 1034167881, -1138567959, 1019177609); + sum2 = W(0, -1121001958, -1148685985, 1018483434, 1021815720) + + W(1, 1049361594, -1108634723, -1132118692, -1105580672) + + W(2, -1117606942, -1111847761, -1120566718, -1076982600) + + W(3, 1074852012, 1048076514, -1102180777, -1116866514) + + W(4, -1103097305, 1021935234, 1049895592, -1081972918) + W(5, 1051964198, 1040925857, 1017568960, 1040155911) + + W(6, -1130038396, -1112487949, 1038078839, -1125812992) + + W(7, -1152546434, -1104919092, 1026180028, -1123068468); + WS(-1096382876, -1091051652); + sum1 = W(0, -1116126267, -1113197617, -1112424211, -1124324715) + + W(1, -1136579346, 1040875430, -1113905512, 1017323784) + W(2, 1057278592, 1048826126, 1058810754, 1056304110) + + W(3, 1041995897, 1016295122, 1024916389, 1042534003) + + W(4, -1096000788, -1097274020, -1088739494, -1091311905) + + W(5, -1114916522, 1038826452, -1128507781, 1017648027) + + W(6, -1116910752, -1140807037, 1039925053, -1132679170) + + W(7, -1107191661, -1119450067, 1023437062, -1098370349); + sum2 = W(0, -1120311657, 1036113080, -1109045199, 1029662296) + + W(1, -1122358251, -1114085873, 1016737279, -1115835645) + + W(2, 1025682064, -1107470193, 1035621932, -1118009189) + W(3, 1045290388, 1009830751, 1035169596, 1030264440) + + W(4, 1036682152, -1108061877, 1051542033, -1101792442) + + W(5, -1093646778, 1056395710, -1091719234, 1052430993) + + W(6, -1108273289, 1012612647, -1101063214, -1076098595) + + W(7, -1080861029, 1039398973, 1065137390, 1072502688); + WS(1040082544, -1114755812); + sum1 = W(0, 1017784372, 1037505264, 1045745417, 1019379817) + W(1, -1109308706, -1124056118, 1025470519, 1039469090) + + W(2, -1110974758, -1108514902, -1095324708, 1052579502) + W(3, 1058051822, 1046163210, 1038459986, 998280780) + + W(4, 1040296296, 1041424680, 1054913780, -1106396419) + + W(5, -1087512533, -1093666877, -1106674096, -1109356390) + + W(6, 1024480479, -1104471944, -1106439461, -1115066496) + + W(7, 1040857799, 1001482384, 995688529, -1108404770); + sum2 = W(0, -1162396366, -1117720653, -1104226850, -1105580348) + + W(1, -1105643813, 1031813906, 1011045214, 1018164327) + W(2, 1045315846, -1118054954, 1057942904, 1059476362) + + W(3, -1090022037, 1045409162, -1106412098, 1023977529) + W(4, 1034748092, 1034773210, 1043388435, 1067689202) + + W(5, -1083232471, -1101262587, -1124577575, -1102021751) + + W(6, 1035646876, -1100298043, -1097646834, -1093789486) + + W(7, 1040357620, -1113573448, 1012935222, 1024454049); + WS(1036525168, -1082462584); + sum1 = W(0, -1116308971, 1037298441, 1027885589, -1122874917) + + W(1, -1154089797, -1108384819, 1028179743, 1003463273) + + W(2, -1085123800, -1087372070, -1086414101, -1096064460) + + W(3, -1099879381, -1104319094, -1127380522, -1096394215) + + W(4, 1057675329, 1051660338, 1059597873, 1059581688) + W(5, 1052784510, -1117339864, 1039251609, -1110971807) + + W(6, 1019612704, 1027541711, -1109257541, 1041073820) + W(7, 1028059367, 1045372305, 1031554934, 1053758651); + sum2 = + W(0, 1060691160, -1103295177, 1035985281, -1085964582) + W(1, -1105398566, -1107551093, 1049030608, -1090293163) + + W(2, -1081285622, -1090050073, -1102851017, 1072728426) + + W(3, 1060511611, 1033954581, -1086630634, 1058069039) + W(4, -1080809247, 1050684042, -1087522637, 1060852217) + + W(5, 1058511518, 1041242888, 1028348456, 1062481845) + W(6, 1052276353, -1106187369, 1048638013, -1090587145) + + W(7, 1029176048, -1094879840, 1046256182, -1087039462); + WS(-1074352935, 1040600857); + sum1 = W(0, 1042034194, 1028652336, 1024054004, 1030695172) + W(1, 1029627411, -1117605317, -1114002447, 1041624512) + + W(2, 1057627204, 1025049468, 1057567995, 1022098295) + W(3, 1052602222, 1039437212, 1049556590, 1053416947) + + W(4, -1090157751, -1105875707, -1088931067, -1095088235) + + W(5, -1094832321, -1104356174, -1107260676, -1101255271) + + W(6, 1024364622, -1105990299, 1048022018, -1100928004) + + W(7, 1049957878, -1095846092, 1047165477, -1098759166); + sum2 = W(0, -1075440350, -1079714919, -1079134350, -1081735031) + + W(1, 1055569671, 1066585571, 1071644179, 1075476470) + W(2, 1073165865, 1065983198, 1068716226, 1066499710) + + W(3, -1095308006, -1080754250, -1074800849, -1073191304) + + W(4, -1112383192, 1048707236, -1135853868, -1098543278) + + W(5, 1021808504, -1108920844, 1050977418, -1090378667) + + W(6, -1148975848, -1115591486, 1038054064, -1125971282) + + W(7, 1040311395, -1110109816, 1035370814, -1120408347); + WS(1041022776, 1033480094); + sum1 = W(0, -1132576057, -1123321429, 1028959481, -1104176578) + + W(1, 1046472198, -1121060138, 1027556403, -1115611836) + W(2, 1026327841, 1042302896, 1052457903, 1063677500) + + W(3, -1097607903, -1123802109, -1118671609, 1039698959) + + W(4, -1110434490, -1109312954, -1089918670, 1025377120) + + W(5, -1090934974, 1046523967, -1112257611, 1000531209) + + W(6, -1119031508, 1021323174, 1029242558, -1118638740) + + W(7, 1013298461, -1106699146, 1030036314, -1116635705); + sum2 = W(0, -1106027239, -1122286155, -1102546078, 1055155948) + + W(1, -1096754888, 1032068992, -1121774513, 1036170969) + + W(2, 1049930690, -1127267122, 1041115945, -1091935564) + + W(3, 1052250618, -1103388917, 1038737216, -1108790214) + + W(4, -1110218398, -1108798456, 1058786995, 1075372513) + + W(5, -1071163371, -1123143101, -1121707895, 1032355550) + + W(6, 1034564779, 1018367610, -1095860458, 1043590800) + + W(7, -1112711414, -1119323979, 1016553114, 1017858173); + WS(1055618972, -1117202987); + sum1 = W(0, 1030962480, -1101414710, -1098100923, -1111740150) + + W(1, 1036980413, 1041005827, 1021681163, -1112834597) + W(2, 1039894129, 1053882576, 1057347458, 1049814740) + + W(3, -1090053902, -1088233382, -1105970903, -1117339136) + + W(4, -1101932469, -1089999221, -1085784352, 1051098296) + + W(5, 1056052448, 1051763396, 1044301630, 1032801282) + W(6, 1031993659, 1048656807, 1043352452, 1031031542) + + W(7, -1104607674, -1119077705, 1022017143, 1039235901); + sum2 = + W(0, -1114659327, -1123577690, 1042921002, -1105097716) + W(1, 1042773509, -1093975266, 1032061179, 1000405669) + + W(2, -1101541229, 1043634319, 1066882360, -1077668162) + + W(3, 1037629509, 1058638399, -1108208741, -1131856909) + + W(4, -1107338771, 1057974128, 1050113378, -1080704784) + W(5, 1051155360, 1052705661, 1008396554, -1111189711) + + W(6, 1039378885, -1111523166, 1040130625, -1104122760) + + W(7, 1012227066, -1110420726, -1125218199, 1025207949); + WS(-1091387548, -1116324289); + sum1 = W(0, 1026642697, 1044682252, 1041967213, 1032214050) + + W(1, -1131062694, 1015214154, -1137069945, -1110158325) + + W(2, -1112220622, -1093973688, 1046046637, -1086442024) + + W(3, -1099996941, 1053177927, -1098783494, 1037567917) + + W(4, 1049897926, 1047211372, 1050365286, -1112122925) + W(5, 1046223135, -1132096750, 1024489425, 1042021623) + + W(6, 1027958127, -1098544270, -1112662293, -1106755108) + + W(7, -1116559746, 1034258897, -1111108842, 1031638516); + sum2 = W(0, -1108095393, 1016776222, 977935538, -1122468710) + W(1, 1019959206, 1030099429, -1104930054, 1002749526) + + W(2, 1011776651, 1024100809, -1097248934, 1057627889) + W(3, 1042194141, -1117226417, 1051928720, 1041814459) + + W(4, -1101257730, -1105403134, -1101986254, 1060632600) + + W(5, -1090719303, 1037412790, -1109292621, -1115385474) + + W(6, 1019145070, 1032882678, -1124027994, -1103033100) + + W(7, 1039533068, -1110127978, -1125661478, -1108291818); + WS(1057965518, -1118811194); + sum1 = W(0, -1104352985, 1038892389, -1105058276, 1042490232) + + W(1, -1121521774, -1099594944, 1033536020, -1095161930) + + W(2, -1106566686, -1112643723, 1026830542, -1091938531) + + W(3, 1058056073, -1101314965, -1098044778, 1039936570) + W(4, 1052732873, 1003767366, 1060358287, 1019887594) + + W(5, -1118338013, 1042464324, 1024562028, 1046000094) + + W(6, -1107147458, -1114250980, -1096574819, 1048844541) + + W(7, 1024329053, -1107780158, 1032778872, -1107207172); + sum2 = W(0, -1116205334, 1042905425, -1109155755, 1044205280) + + W(1, 1035597621, 1008636092, 1036458751, -1092853308) + W(2, 993944814, -1113078065, 1044406596, 1040105843) + + W(3, 1043112037, -1123380436, -1105268129, 1005260887) + + W(4, 1032713731, -1104977559, 1048672381, -1104277422) + + W(5, -1103087244, -1153474446, 1015784078, 1036094123) + + W(6, -1106373772, 1039619019, -1115343154, 1048332350) + + W(7, 999501191, -1106439836, 1021528398, -1106473413); + WS(1053420700, 1049909457); + sum1 = + W(0, -1115258034, 1034071238, 1027276239, -1115686559) + W(1, 1041096236, -1102788144, 1034068157, -1107460367) + + W(2, -1106165874, -1089903413, 1036166247, -1100423414) + + W(3, -1093491554, 1049835542, -1095323722, -1110396954) + W(4, 1050401361, 1041332866, 1049384898, 1037380859) + + W(5, 1052388787, 1034784425, 1031952854, 1050882791) + W(6, -1113250117, 1023742249, -1130552842, -1120158506) + + W(7, 1037333727, -1103514783, 1037170040, -1114330639); + sum2 = W(0, -1112500393, 1028289272, 1049127312, 1086771603) + W(1, 1048088739, 1020087240, 1014851096, -1146529543) + + W(2, 1024557142, -1165623582, -1095250820, -1062269001) + + W(3, -1095068995, 999933935, -1135945992, 1025716258) + + W(4, -1136367764, 1035333264, -1114028070, -1087155359) + + W(5, 1022077560, 1034746908, -1120864651, -1127622484) + + W(6, 1020782048, -1122738507, 1025362120, 1034731082) + + W(7, -1125007838, -1120796207, 1026594640, -1128255426); + WS(1063806286, 1027083983); + sum1 = W(0, -1114823180, 1030313470, -1111791457, -1111977289) + + W(1, -1108043857, 1019729830, -1114640622, 1033983240) + W(2, 1045480194, 1039206373, 1061885616, 1050451993) + + W(3, 1057912708, 1036575619, 1039765761, 1034424532) + + W(4, -1118427516, -1106496703, 1010686340, -1083830979) + + W(5, -1099420545, -1105267089, -1131920190, -1125840468) + + W(6, -1109705966, 1020453816, -1103432478, 1039658735) + + W(7, -1114670584, -1109909656, -1113587473, -1112770081); + sum2 = W(0, 1033978022, -1100194899, 1033701105, 1050426550) + + W(1, -1104028404, 1034191613, -1141130302, 1010895851) + + W(2, -1122290634, -1091664523, -1071413373, 1075108998) + + W(3, 1062092502, -1103668501, 1034149895, -1140709165) + + W(4, 1016780797, -1103111994, 1047766898, -1096749849) + + W(5, 1027350687, -1131589290, -1131724600, 1027888251) + + W(6, -1164938552, 1002490582, -1105595809, 1051489664) + + W(7, -1108336227, 1021076655, -1149103768, -1131210373); + WS(1058288590, 1045994186); + sum1 = + W(0, 1018135640, -1123855059, 1040223430, -1112801779) + W(1, 1036317947, -1098800665, 1034707665, -1096517871) + + W(2, -1093342072, -1096797372, -1086390197, -1105293320) + + W(3, -1091216419, -1117054299, -1096209979, 1023470716) + W(4, 1052167946, 1041780692, 1052872019, 1059840396) + + W(5, 1051866430, 1050341107, 1048652350, 1050625648) + W(6, 1038262801, 1015346466, -1132654257, -1134794696) + + W(7, -1117665797, 1012611090, -1139713758, 1010858330); + sum2 = W(0, 1041114210, -1099284356, 1038292116, -1092208278) + + W(1, 1054381469, -1104534119, 1025804573, 1033400256) + + W(2, -1099008397, 1047276236, -1081084758, -1071665325) + + W(3, 1054365119, 1081581342, 1062322644, -1093538640) + + W(4, -1123917868, -1111523652, 1037708884, -1086778305) + + W(5, -1101037019, 1026238413, -1134916894, 1052085127) + + W(6, -1125418381, -1120564910, 1020505319, -1100128811) + + W(7, 1024474915, -1125603475, 1025011807, -1103707544); + WS(-1088887374, 1070119449); + sum1 = W(0, 1040460421, 1041554590, 1013850612, 1038262022) + W(1, 1042710919, -1106869091, 1044864290, 1046057615) + + W(2, 1052058645, 1048371361, 1048732808, 1042754705) + W(3, 1047384127, 1054439312, 1032508990, 1042772520) + + W(4, -1090595183, -1115426747, -1094790815, -1087902678) + + W(5, -1106656361, -1090477876, -1122940954, -1091795237) + + W(6, -1105506686, 1029097335, -1129317061, 1001782464) + + W(7, 1038072126, -1131475029, 1028935005, -1119515740); + sum2 = + W(0, 1024604999, -1112923336, 1027433809, 1025504127) + W(1, 1007245204, -1144581942, -1129047399, 1021294865) + + W(2, 1056461606, 1030035123, 1050727550, -1090534096) + + W(3, -1105339223, -1095507508, 1043793174, -1095638178) + W(4, 1071685965, 1066713447, 1068313262, 1028028856) + + W(5, -1081367671, -1078816510, -1081156501, -1090770344) + + W(6, -1075212110, -1080779344, -1078194918, -1153515923) + + W(7, 1068056044, 1069903211, 1064173276, 1060536751); + WS(1040470840, 1011790950); + sum1 = W(0, 1015095158, -1120734562, -1116691398, -1118217942) + + W(1, 1019898057, -1123629847, 1034607368, 1041822001) + W(2, 1043725275, 1051465082, 1058372660, 1055309890) + + W(3, 1046636817, 1044874098, 1043010193, 1040878843) + + W(4, -1105535856, -1108441440, -1095343471, -1081260869) + + W(5, 1047492563, -1093096561, -1105294873, -1106259483) + + W(6, -1108386992, 1030149375, -1112701741, 1026499745) + + W(7, 1032201014, -1115283258, 1028457510, -1112399171); + sum2 = W(0, 1022830461, 1021246145, 1015115055, -1114703004) + W(1, -1120910531, 1015720307, 983370825, -1123770073) + + W(2, -1117997331, -1110014342, -1111993293, 1036613835) + + W(3, 1023720090, -1117604953, -1120463052, 1036937497) + + W(4, 1037270083, -1125616185, 1044158772, 1073620398) + + W(5, -1076883688, -1087890801, 1025754476, -1113287589) + + W(6, 1023767946, -1130403951, 1025796328, 1050812248) + + W(7, -1112499600, 1028283916, -1104874229, 1033737696); + WS(1058309838, -1101980246); + sum1 = W(0, -1118965337, -1098779704, 1049071256, -1113399754) + + W(1, 1026571427, -1110569018, -1131332475, -1128371676) + + W(2, -1110907254, 1049643561, -1081904943, 1042163763) + + W(3, 1052138696, -1106220306, 1025191299, -1129449041) + W(4, 1038642111, 1038999679, 1054751182, 1056093093) + + W(5, -1083956910, 1050507993, -1146247429, 1030864044) + + W(6, 1028903966, -1108240479, 1040887994, -1118832415) + + W(7, 1046797612, -1104697276, -1123883748, 1017502086); + sum2 = W(0, -1137655511, 1043191716, -1121349336, -1097517035) + + W(1, 1046822141, -1129223371, -1116759551, -1124684976) + + W(2, -1111106723, 1033166752, 1033268708, 1052576386) + W(3, -1107318253, -1102887232, 1040601750, 993535634) + + W(4, 1035105104, -1112822728, -1104857525, 1055574555) + + W(5, -1114990789, -1121581721, -1152414880, -1107957817) + + W(6, -1112318930, 1027572041, 1035699332, -1097542777) + + W(7, -1110157019, 1031007843, -1157281192, 1025775603); + WS(1063710542, -1111213649); + sum1 = W(0, 1039559517, -1110192145, -1107309859, -1118762901) + + W(1, -1108199122, 1037245627, -1139769966, 1035326852) + W(2, 1052251350, 1049489370, 1058328276, 1061120005) + + W(3, 1049656673, 1033876357, 1041987905, 1045428971) + + W(4, -1101160384, -1097695264, -1093740712, -1096321197) + + W(5, -1097289639, -1114972772, -1102516745, -1103522251) + + W(6, -1113889808, 1031901152, -1104513406, -1097003636) + + W(7, -1112381384, -1176476024, 1022802380, -1115185874); + sum2 = W(0, 1026803387, 1021750253, 1040812059, -1107480079) + W(1, 1041131835, -1115082464, 1006187407, 1001155939) + + W(2, -1115186477, 1031459540, -1116326399, 1040651083) + + W(3, 1020594503, 1033768858, -1132516997, 1015478283) + W(4, 999853755, 1031446318, 1058720440, 1071698983) + + W(5, 1051128601, 1034660038, 1027131696, 1041372466) + + W(6, -1112425236, -1106593609, -1086524651, -1075753828) + + W(7, -1096266051, -1108942599, -1123360193, -1104453127); + WS(1039772272, -1082982873); + sum1 = W(0, 1039127452, -1106264016, -1104546353, -1145518233) + + W(1, 1041713673, 1044410893, 1039725795, 1007790526) + W(2, 1049032497, 1053169593, 1060522592, 1056693961) + + W(3, -1095982984, -1088607916, -1099512929, -1120546088) + + W(4, -1098139952, -1088020664, -1087153807, 1038834197) + + W(5, 1053429837, 1048228018, 1030077856, -1130616150) + + W(6, -1155633625, 1043520647, 1040400554, -1139907193) + + W(7, -1104883452, -1103034636, -1134131855, 1012687882); + sum2 = W(0, 1032987173, -1102803435, -1097110538, -1101871105) + + W(1, -1101555589, -1123533577, -1098769512, 1048796624) + + W(2, -1092902636, 1037122437, -1083842457, 1067610414) + W(3, 1033006847, 1041814167, 1045724867, 999019252) + + W(4, 1027660911, 1039508875, -1115097563, 1067552095) + + W(5, -1096151174, 1026567371, -1107225085, -1105667947) + + W(6, 1032111389, -1101516234, 1001644292, -1104358647) + + W(7, -1145661076, -1104681426, 1020958965, 1028203943); + WS(-1101412664, 1073047832); + sum1 = W(0, -1113880945, -1123950537, -1109995975, -1123565085) + + W(1, -1123728428, 1035385983, -1115318676, 1010675502) + W(2, 1059007377, 1047579477, 1060416389, 1056063051) + + W(3, 1046383171, 1040643140, 1033389702, 1041003725) + + W(4, -1091904865, -1097814305, -1087199740, -1092124214) + + W(5, -1099567891, 1038341800, -1119557930, 1042882861) + + W(6, -1133362719, -1131741454, 1032803377, -1113780884) + + W(7, -1113889390, -1126485237, 1012558775, -1096461162); + sum2 = W(0, 1013686761, -1123503757, 1035262859, -1115490710) + + W(1, 1025861040, 980681483, -1125276038, -1129892230) + W(2, 1030949359, 1039110679, -1105775253, 1027204409) + + W(3, 1019519396, -1111394964, 1024413043, -1126757058) + + W(4, -1109223248, 1035635962, -1106751414, 1057425426) + + W(5, 1043312829, -1098000154, 1051287795, -1103421662) + W(6, -1132915573, 996209923, 1029135163, 1070858778) + + W(7, 1067133266, -1098902818, -1083268694, -1073095921); + WS(-1103532344, -1105840701); + sum1 = W(0, -1106756472, -1106322913, 1029810954, -1096389739) + + W(1, -1116067305, 1035916925, -1105566449, 1034505108) + + W(2, -1105825052, 1053932942, -1086676361, -1098358914) + + W(3, 1054020609, -1096691570, 1051257001, 1026552303) + W(4, 1050195304, 1007328528, 1057202097, 1054492326) + + W(5, -1093369370, 1012599092, 1027855335, -1101687900) + + W(6, 1044840709, -1104890465, -1144507642, -1098185309) + + W(7, 1032208700, 1048715303, -1100267787, 1050640157); + sum2 = W(0, -1111707317, -1119262447, 1037989791, 1056036881) + + W(1, -1105645897, 1006194414, -1146663095, -1129479912) + + W(2, -1101137684, -1109186725, 1049211868, -1099242284) + + W(3, 1038198606, -1118866635, -1109483964, -1114787638) + + W(4, -1116414033, -1107841286, 1056722210, -1094543747) + + W(5, 1053421235, -1104714858, -1120693058, -1115357885) + + W(6, -1103752872, 1034593530, -1110917586, 1049861706) + + W(7, 1030171051, -1103907620, 1032413269, 1029169157); + WS(1045178680, -1115117954); + sum1 = W(0, -1102680211, -1123018420, -1116849368, 1024407336) + + W(1, -1107241006, 1026392201, -1107918421, -1123314455) + + W(2, 1048681528, -1113607536, -1092105355, -1093530556) + + W(3, -1096193779, -1102060551, -1102337510, -1095199752) + + W(4, 1019008549, 1040296293, 1035615004, 1061339424) + W(5, 1057711678, 1055864515, 1045891981, 1052562824) + + W(6, 1008490315, 1012835273, 1027033246, -1128145511) + + W(7, -1103222073, -1110547491, -1110097884, 1020395952); + sum2 = + W(0, -1108321996, -1115537892, -1106228104, 998645667) + W(1, -1112292909, 1035781218, -1111036660, 1018530825) + + W(2, -1096948503, -1104693583, 1047339287, 1055675007) + + W(3, 1049114511, -1100682049, 1032758858, -1113969306) + + W(4, -1112108000, -1112710060, -1126080161, 1051246853) + + W(5, 1041989299, 1025978716, 1034053890, -1113712936) + W(6, 1026091852, 1022753169, -1120501740, -1105262859) + + W(7, -1123292948, -1139782977, -1111667328, 1033000622); + WS(1044590904, 1058699692); + sum1 = W(0, 985175380, 1034192409, 1020554347, 1024014533) + W(1, -1115433194, -1128687821, 1008304190, -1115445028) + + W(2, -1092606720, -1090621088, -1093618783, -1113905855) + + W(3, -1096920415, -1112503613, -1100279725, -1101027255) + + W(4, 1051210502, 1050384326, 1055009987, 1062574818) + W(5, 1054554325, 1031218808, -1105751509, -1144235755) + + W(6, 1009220799, -1109361592, -1117059707, -1128567457) + + W(7, 1015981863, 1039114097, -1127205548, 1035507338); + sum2 = W(0, 1036009101, -1117283755, 1033667347, -1107239966) + + W(1, 1034685217, -1128406639, -1123321687, -1125402335) + + W(2, -1108619644, 1041337919, 1012813669, 1053310286) + + W(3, -1103814148, 1029066827, -1109499964, -1123197815) + + W(4, 1010492213, -1100684466, 1015579759, 1056064408) + + W(5, 1050883237, -1113069964, -1098878001, -1096911819) + + W(6, -1131279719, 1035041542, -1124020763, 1025470531) + + W(7, 1032198922, -1104788458, 1024066141, -1108347132); + WS(1044030776, -1089132931); + sum1 = W(0, 1034269487, 1032883970, 1042439154, 1044751430) + W(1, 1019707982, 1041434446, -1133288904, -1128708109) + + W(2, -1093699058, -1097458090, -1086092656, -1078975458) + + W(3, 1035194756, 1038040390, -1131544335, -1101499019) + + W(4, 1045160768, 1050995424, 1057188990, -1096290982) + W(5, 1032938655, 1043682915, 1022768458, 1043305900) + + W(6, 1040395197, -1135242887, 1040422743, 1048952172) + W(7, 1042217766, 1043586045, 1029073256, 1044718778); + sum2 = W(0, 1043294282, -1127265324, 1035036911, 1051203976) + + W(1, -1092441683, 1037063919, -1095318743, -1096741875) + + W(2, -1107262777, -1107127922, 1048700262, -1114081637) + + W(3, 1027047222, -1087538045, 1043898666, -1114635611) + + W(4, -1132728616, -1133435576, -1115280193, 1060204569) + + W(5, -1096051714, 1059699424, -1091030613, 1057805441) + + W(6, -1112978295, -1101649503, -1142433873, -1138065032) + + W(7, 1049542158, 1027406094, 1036599707, -1117298502); + WS(-1083489614, -1078579141); + sum1 = W(0, -1113694287, 1048897616, -1104500498, 1029397720) + + W(1, 1039983475, -1099529286, 1047818144, -1100400745) + + W(2, -1091060438, -1104759380, -1089217572, -1096433234) + + W(3, -1090900481, -1103761763, -1126261903, -1098862766) + + W(4, 1048678744, 1055607032, 1025763911, 1056656214) + W(5, 1055730826, 1039576037, 1050842685, 1044462332) + + W(6, 1022130309, 1045345264, -1098656951, 1045280313) + + W(7, 1003335600, -1104729181, 1045845834, -1124798927); + sum2 = W(0, 1034867092, 1036831152, -1155378720, 1050005016) + + W(1, -1109019109, -1107857756, 1025727369, -1105627363) + + W(2, -1113896283, 1008734783, -1107821706, 1036439980) + + W(3, -1105188391, -1103852014, -1113326142, 1036502992) + + W(4, -1089397746, -1143881728, -1090933436, 1067046868) + + W(5, 1074023168, 1073004488, 1071596064, 1068047188) + W(6, 1053130734, 1048547317, 1050664536, -1081990202) + + W(7, -1073616958, -1073650181, -1078582463, -1079035066); + WS(-1123566816, -1118788492); + sum1 = + W(0, 1034266605, -1136553988, 1032767842, -1119057557) + W(1, 1032596498, -1138237144, 1034235800, 1038972738) + + W(2, -1131696345, 1054570946, 1043367745, 1057910725) + W(3, 1051309065, 1050053561, 1044754161, 1047609962) + + W(4, -1102436480, -1101485920, -1093672570, -1081257723) + + W(5, -1104247863, -1097237249, -1100737384, -1118734125) + + W(6, 1029920799, 1027694719, 1029228634, 1038070160) + W(7, 1041012541, -1104490509, 1036672371, -1101643609); + sum2 = W(0, 1001205015, 1006355343, 1030505158, -1119701641) + + W(1, 1039794598, -1107202062, 1026836118, -1115440174) + + W(2, -1108935456, 1048575251, -1114918170, -1088419213) + + W(3, 1032736312, 1045905661, -1116946341, 1003805295) + + W(4, -1123729961, 1037564428, -1096130861, -1068662368) + + W(5, 1076967646, 1068403675, -1104305708, -1124006983) + + W(6, -1151981614, -1123825361, 1015236500, -1095410362) + + W(7, 1049774729, -1105335733, 1050153445, -1103176791); + WS(-1103567160, 1051728620); + sum1 = W(0, 1022317012, -1105517764, -1109296554, 1030946149) + + W(1, 1032988986, -1117957958, 1025729823, 1009731405) + W(2, 1037463598, 1027268637, 1052072875, -1094107172) + + W(3, -1087396213, -1098739651, -1104965205, 1017302004) + + W(4, -1094952698, -1107670055, -1103880757, 1052944567) + + W(5, 1057943536, 1052382091, 1036164793, 1041821631) + W(6, 1033922872, 1024776175, 1038888053, 1043766492) + + W(7, -1110506946, -1108829569, -1132271979, 1015545129); + sum2 = + W(0, 1031290011, -1114551402, -1137203270, 1025723227) + W(1, 1020823723, -1107112701, -1156747895, -1140368490) + + W(2, -1127050317, 1026829489, 1032334734, -1098446681) + W(3, 1041359246, 1041376772, 1034398482, -1104694439) + + W(4, -1113481669, 1042490649, 1043198364, -1127701979) + + W(5, 1057793899, -1100591458, -1114292550, 1037536680) + + W(6, -1080536402, -1076466135, 1038797044, 1074254195) + + W(7, 1035340712, -1118262367, 1013380262, 1032104302); + WS(-1106439480, 1029090439); + sum1 = W(0, 1034559195, -1151887836, 1020849475, 1042053132) + W(1, 1043127920, 1043814293, 1037260390, 1043715864) + + W(2, 1042655458, 1030625962, 1059377738, -1084013087) + W(3, -1107145304, 1032281331, 1040913704, 1033172451) + + W(4, -1102396839, -1115609614, -1089162129, -1083731653) + + W(5, 1052971191, -1107154457, -1113717087, 1017680531) + W(6, 1026400220, 1035325646, 1042909598, 1039293086) + + W(7, 1029716960, 1024778063, 1028666946, -1130742978); + sum2 = W(0, -1129107524, -1123636938, -1116921954, 1035143287) + + W(1, 1036996009, 1026655402, -1117010042, -1126794884) + + W(2, 1021999108, -1148550480, -1104180688, 1058583951) + + W(3, -1112895549, -1116180274, -1131502740, -1138164712) + + W(4, 1022835844, 1016982596, 1016773924, 1054454385) + + W(5, -1097314323, -1099699990, -1132536424, -1129763396) + + W(6, -1129901940, -1140398984, 1030858450, -1117386658) + + W(7, -1108156993, -1118751234, 1016259828, -1117013506); + WS(1048151864, 1059242544); + sum1 = W(0, -1122367849, -1121466768, 1042165293, -1116044868) + + W(1, -1105437493, 1037358179, -1125163356, -1111398955) + + W(2, -1097114939, -1097911384, -1085899247, 1042506911) + + W(3, 1056857214, -1105063934, -1123578292, -1106114902) + + W(4, 1043794348, 1033810391, 1063564267, 1042284551) + W(5, -1089674786, 1049648847, 1021855394, 1045389303) + + W(6, 1026875087, 1014185912, -1106211614, -1111561384) + + W(7, 1040868541, 1028107682, -1111395274, 1010402126); + sum2 = W(0, -1139648101, -1108856547, 1015236663, -1095822659) + + W(1, -1137977893, 1038373686, 1015865663, 1025690243) + W(2, -1110557326, 1041627874, 1032833232, 1054902128) + + W(3, -1098765424, -1121592121, -1106594443, -1117564251) + + W(4, -1106733442, -1106727608, 1048813377, 1055403310) + + W(5, -1097123375, 1045561320, -1106386080, 1017823319) + W(6, 1039330638, 987619817, 1025349119, -1095503796) + + W(7, 1050144719, -1108448514, 1041979856, -1105901385); + WS(1058511566, 1043187024); + sum1 = W(0, 1042620242, 1027981614, 1037513401, 1050564856) + W(1, 1040390932, 1044188393, 1032851398, 1043046140) + + W(2, 1041476871, 1044640335, 1039039385, -1085132912) + + W(3, -1095196494, -1116362934, -1107832665, -1105825814) + + W(4, -1106861387, -1120607404, -1101062960, -1089194338) + + W(5, 1032203084, -1134658518, 1026497359, -1105488567) + W(6, 1043942651, 1040914531, 1040902562, 1048598306) + + W(7, 1019897185, 1032634704, 1027555603, 1032742569); + sum2 = + W(0, -1094808435, -1132385524, -1110994277, 1039748829) + W(1, 1038866141, 1012861192, 1030487914, -1132742376) + + W(2, -1083260450, -1089624726, -1098133391, 1063143581) + W(3, 1043371703, 1040218148, 1012758152, 1052570990) + + W(4, 1041967320, -1102801635, 1050258559, 1037743779) + W(5, 1048943341, -1112998265, 1031702418, -1105604919) + + W(6, 1026111290, 1034887813, -1123786474, 1033106845) + + W(7, -1110363077, -1121754906, 1012203560, -1111765769); + WS(-1087236686, 1061665912); + sum1 = W(0, -1115274057, -1116162424, -1104461343, -1093387666) + + W(1, -1113556238, -1115159031, -1126942285, -1121543315) + + W(2, 1023071688, 1030200068, 1054050807, 1067462106) + W(3, 1050597447, 1031104576, -1121547516, 1042315272) + + W(4, -1107980923, -1101866270, -1092884195, 1056570668) + + W(5, 1044256537, -1106234576, -1128835891, -1114796114) + + W(6, -1119301463, -1128116268, -1109238785, -1095571507) + + W(7, -1103733247, -1111522325, -1111458030, -1109780718); + sum2 = + W(0, -1127633710, 1024854732, -1123781128, 1040493279) + W(1, -1105896535, 1033983514, -1122459292, -1120616672) + + W(2, 1024134807, 1020748524, -1117094752, 1065417820) + W(3, 1047826025, -1102101577, 1040101538, -1124130304) + + W(4, -1114424195, -1098546148, -1079772750, 1059484680) + + W(5, -1107504040, 1049649199, -1105983778, 1041220419) + + W(6, -1109129714, 1025355297, -1102118069, -1114766469) + + W(7, 1033237727, -1154145757, 1012710689, -1123999628); + WS(-1092106140, -1084163121); + sum1 = W(0, 1036347305, 1016904817, 1034414494, 1040590819) + W(1, 1047670567, 1030378702, -1114654031, 1037868673) + + W(2, 1049849489, 1040213041, 1056746079, 1048812489) + W(3, -1084779712, 1040726752, 1033236350, 1034988241) + + W(4, -1101083230, -1152694122, -1083169173, -1107117385) + + W(5, 1051667767, -1109066592, 1024925978, -1107106473) + + W(6, -1106738110, -1115216475, 1039797369, 1022569263) + + W(7, -1112982247, -1125038036, 1025136605, -1146668256); + sum2 = W(0, -1144361879, -1128843246, 1005924247, -1104940693) + + W(1, -1123234701, 1030348431, 1032437115, 1024671661) + + W(2, -1111008682, -1134810723, 1041181160, 1054697191) + + W(3, 1052731012, 1042931729, -1112071203, -1111460488) + + W(4, -1092072719, -1121102479, 1045581242, 1046555600) + + W(5, -1103970223, -1104348933, -1113690885, -1129885552) + + W(6, 1021883158, -1116668390, -1109114373, -1113715104) + + W(7, -1128555074, 1032936433, -1136719027, -1114960949); + WS(1058637774, -1109876822); + sum1 = + W(0, -1123254812, 1041301297, -1108082435, -1113241451) + W(1, -1110973875, 1027216056, 1014894339, 1019588394) + + W(2, 1042623060, -1105094644, 1059472952, 1043969956) + W(3, -1113269955, 1026816394, -1130602892, 1026167148) + + W(4, -1126486401, -1111052512, -1094910408, -1094009726) + + W(5, 1058531737, -1100061186, 1032486130, -1117291247) + W(6, -1112485473, 1037949088, 1016105496, 1038892497) + + W(7, -1093047519, 1032398170, -1128303438, 1021704844); + sum2 = + W(0, 1019183838, 1031923668, 1049015983, -1096362417) + W(1, 1005830618, 1036998273, -1119396253, -1125844722) + + W(2, -1103677237, 1026573624, -1101357077, 1060010174) + W(3, -1122620605, -1106684685, 1023743335, 970656667) + + W(4, 1041385745, 1025625867, -1095136476, -1096248497) + + W(5, -1115221092, 1049273019, -1112424814, 1011280453) + + W(6, -1115238116, 1024201143, 1024442030, -1122837727) + W(7, 1034369966, 1031554545, 1004099770, 1010302645); + WS(1068382951, -1145853862); + sum1 = W(0, -1109092968, -1120835030, 1015957108, 1033165317) + + W(1, 1015780402, -1098982451, -1106789039, -1105810191) + + W(2, -1104283584, -1104742016, -1087137628, -1082841532) + + W(3, 1048723479, -1119342334, 1037068029, 998682871) + W(4, -1126224104, 1048835390, 1060142968, 1050512116) + + W(5, 1050625794, 1046183055, 1033167819, 1042826568) + W(6, -1142984099, 1035504970, 1044651720, 1036256010) + + W(7, 1040271692, -1112268532, -1115364877, -1113754717); + sum2 = + W(0, 1042289605, -1123754179, 1040865617, 1025996190) + W(1, -1100913998, -1118459936, -1114401734, -1118182115) + + W(2, -1105845074, 1032442894, -1090127653, 1055606718) + W(3, -1104101067, 1026136504, 1031986833, 1045409779) + + W(4, -1105090355, -1106552079, -1075116096, 1073431719) + + W(5, 1067916428, 1042119939, -1112990087, 1031635830) + W(6, 1041479887, 1028813354, -1087510643, -1084272058) + + W(7, 1045036562, -1131666845, 1021173748, -1131440550); + WS(-1115823328, -1115141930); + sum1 = W(0, 994860217, 1043355533, 1046651158, 1031967699) + W(1, -1112906011, -1139800861, 1032268536, 1021673189) + + W(2, -1096937968, -1093401724, -1082957735, 1057829358) + + W(3, 1054780763, 1049387203, 1029694230, 1032667294) + W(4, 1034643656, 1045563268, 1056739037, 1053028032) + + W(5, -1085595451, -1100812438, -1112691098, -1163065290) + + W(6, 1022968545, -1100277972, -1106135459, -1113373017) + + W(7, 1036473238, 1042166401, -1109328859, 1026417494); + sum2 = W(0, 1041009418, -1106230161, -1113994011, -1100787264) + + W(1, -1102450993, -1114680808, 1031764893, -1145068408) + + W(2, 1006794492, 1034312638, -1108126219, 1060460225) + + W(3, -1119222967, -1097084093, -1125377404, -1105852265) + + W(4, 1023178506, -1120362931, -1105213924, 1054015707) + + W(5, 1053508358, -1133314452, -1141774536, -1120752408) + + W(6, -1126244586, 1021996758, -1131146954, 997147503) + + W(7, -1107185372, -1126416406, -1126617106, -1131039214); + WS(1044911928, 1063248560); + sum1 = + W(0, -1117210934, 1045807559, -1116245664, -1122601008) + W(1, 1041679280, -1102494868, 1044187563, -1101059428) + + W(2, -1089987573, 1006551364, -1085623154, -1098705597) + + W(3, -1089608778, -1098692501, 1040126000, -1100421777) + W(4, 1050931427, 1049202419, 1044690392, 1056494849) + + W(5, 1054654497, 1047881833, 1043119951, 1043071666) + W(6, -1154913199, 1049427019, -1098224261, 1043742399) + + W(7, 1009817082, -1102952115, 1048685800, -1132265536); + sum2 = W(0, -1110897864, -1104703066, 1039476246, -1102025112) + + W(1, 1041770976, 1044782005, -1112640404, 1045848501) + + W(2, 1031814971, -1126406622, -1112535334, -1088994686) + + W(3, 1050082866, 1038057456, 1029663347, -1098794387) + W(4, 1060526454, 1034850394, 1060516426, -1088701567) + + W(5, -1072347435, -1075073775, -1077197270, -1081012351) + + W(6, -1090228093, -1097166620, -1091926928, 1063814816) + + W(7, 1074296249, 1073862535, 1067261757, 1067785227); + WS(-1105546040, 1026157880); + sum1 = W(0, 1040950836, 1036866652, 1045419231, 1057171433) + W(1, 1039696013, 1040737235, 1031337694, 1043417390) + + W(2, 1040614249, 1043435937, 1048836647, -1092874056) + W(3, 1049109967, 1037560064, 1039556030, 1020658754) + + W(4, -1102499357, 1032157499, -1088963702, -1078642173) + + W(5, -1094603626, 999044599, -1107844556, -1101965321) + W(6, 1036650571, 1032871003, 1035867609, 1040635958) + + W(7, 1039966133, 1035782037, 1034875342, 1029450491); + sum2 = W(0, 999804672, 965076992, 1017997216, -1113573416) + W(1, 1015814944, 1013150208, 1004325632, -1116017408) + + W(2, 1033414560, -1142972544, -1117537424, -1080122522) + + W(3, -1116987776, 1009041344, 1030258512, -1123867424) + + W(4, -1131242272, -1152459008, 1043660652, 1068973644) + + W(5, -1113471168, 1023678672, -1115573696, 1025739248) + + W(6, -1111239120, -1114613368, 1024438240, 1009242816) + + W(7, 1026341216, -1111376032, -1124670336, 1011147520); + WS(-1081027239, 1060388068); + sum1 = W(0, 1040945153, 1018532824, 1048158700, 1050078256) + W(1, 1043330564, 1045983912, 1034126717, 1042366848) + + W(2, 1049191505, 1052335227, 1053224518, -1089652372) + W(3, 1050009062, 1019623144, 1043374892, 1037373643) + + W(4, -1103567931, -1106690587, -1098210230, -1079677805) + + W(5, -1090769236, -1102526621, -1113976903, -1106920486) + + W(6, -1142725077, -1126830164, 1044049241, 1037048166) + + W(7, 1037549983, -1124021470, 1031972104, 1000910717); + sum2 = W(0, 1029289565, 1023484285, 1041097307, 1044843621) + + W(1, -1101664529, -1108647964, -1111945130, -1129898257) + + W(2, -1119356133, 1031916514, -1120749829, 1057057740) + + W(3, 1055633817, -1098024997, 1038083406, -1101101054) + + W(4, -1126159585, -1111485988, -1121314077, 1024531885) + + W(5, -1102656055, 1016983721, -1105960100, 1022769465) + + W(6, -1119814653, -1133461586, -1112201820, 1001931237) + + W(7, 1035253182, 1019582313, 1011319026, -1110859200); + WS(-1121436896, -1083449266); + sum1 = W(0, -1131437006, -1126965659, -1106898372, 1049661628) + + W(1, -1110051320, 1048826500, 1018834023, 1030351624) + W(2, 1057845458, 1026072602, 1058188821, 1047051462) + + W(3, 1036301673, 1049513187, 1026266705, 1043516478) + + W(4, -1096100393, -1106937331, -1089458543, -1093909527) + + W(5, -1105302990, -1097831955, -1115312055, -1094725407) + + W(6, -1112818966, 1025271436, -1120770471, 1039868950) + + W(7, -1101852248, 1048680544, -1111779257, -1137214404); + sum2 = W(0, 1025140224, 1016993788, -1106156741, -1085967118) + + W(1, -1070014283, -1066270592, -1089828448, -1115300709) + + W(2, -1118806788, 1030590564, 1042721449, 1078987048) + W(3, 1078555235, 1069325982, 1051485796, 1026006822) + + W(4, -1126785121, -1118559458, -1106137914, -1100137345) + + W(5, 1044823121, -1105174392, -1121082393, 1011662011) + W(6, 1031495588, 991152164, 1038491925, 1040984057) + + W(7, -1111978905, 1030579932, -1113499951, -1120641633); + WS(1060473294, -1125032523); + sum1 = W(0, -1118381578, -1131528546, -1103035725, 1044891665) + + W(1, 1017562024, 1038728371, 1013193724, 1017619335) + W(2, 1043924207, 1048790648, 1057928103, -1096003632) + + W(3, -1104437021, -1127678449, -1138265298, 1019650740) + + W(4, 1028842287, -1094915206, 1048586166, -1089668066) + + W(5, 1048997587, -1106021950, 1016196772, -1112334062) + + W(6, -1103983036, 1032416951, -1131359957, 1041536420) + + W(7, -1119623443, 1026802338, -1112764523, 1028850580); + sum2 = W(0, -1124013145, -1154979909, -1113455493, 1042673063) + + W(1, 1009341589, -1131887235, -1126494367, -1120394460) + + W(2, 1023871771, -1104183315, 1024353365, -1099433949) + + W(3, 1037029004, -1106752952, -1136822537, -1135604889) + + W(4, 1041717973, -1104961174, -1071159846, 1075222357) + + W(5, 1061754151, -1106459992, 1037905746, 1020599743) + + W(6, 1027246941, -1105906795, -1114760758, 1048045634) + + W(7, -1101645527, 1027783079, -1130069351, 1023779471); + WS(1063175758, 1049951270); + sum1 = W(0, 1051934199, 1041620825, 1053157077, 1019501037) + W(1, -1121035141, 1024394007, 1034798936, 999909159) + + W(2, -1084051495, -1091045702, -1089200998, -1103613676) + + W(3, -1103906809, 1042850683, -1117239270, -1106705027) + + W(4, 1040719576, 1046456792, 1054477181, 1050464826) + + W(5, -1098419444, -1097805325, 1011564046, -1091232034) + + W(6, 1028219745, 1028165760, -1110307566, 1037889601) + W(7, 1045652053, 1054554722, 1031152711, 1056053333); + sum2 = + W(0, -1091471926, 1042259987, -1103057728, 1049412228) + W(1, 1044973062, -1116401558, 1011000463, -1130123924) + + W(2, 1062079447, -1095572989, -1089203271, -1092861102) + + W(3, 1034556296, 1047552828, -1110031497, 1051271629) + W(4, 1055700238, 1041645665, 1037140407, -1093629070) + + W(5, -1095940003, -1106623325, -1107294450, 1053844390) + W(6, 1015596856, 1033532620, 1035045090, 1042934527) + + W(7, 1032040702, -1106239816, 1033073048, -1095799213); + WS(-1085388366, -1090694979); + sum1 = W(0, -1123787314, -1119481145, 1032777214, -1112850502) + + W(1, 1041751591, -1125429276, -1140788444, -1113892940) + + W(2, 1044273497, -1130943626, 1047368143, 1059845268) + W(3, -1095680144, 1039407200, 990533574, 1032644049) + + W(4, -1108110651, 1041576542, -1087220302, 1027588771) + + W(5, 1036323946, -1111657568, 1021334836, -1143449895) + + W(6, -1115475932, -1117135031, 1040347561, -1101064470) + + W(7, 1037523789, -1126225621, -1134357621, -1109271006); + sum2 = W(0, -1107190004, 1013881877, -1118295314, -1121103222) + + W(1, -1127822751, -1115317828, -1126179247, -1114801766) + + W(2, 1037736456, -1105376838, -1123416196, 1042717593) + + W(3, -1115549939, 1043660085, -1126402007, 1040920849) + + W(4, -1125693587, 1040619572, -1082924015, 1068021664) + + W(5, 1016978358, -1108863865, -1136349779, -1107267301) + + W(6, -1120404934, -1122228614, 1051908042, -1087341114) + + W(7, 1042189807, -1125959243, 1038966556, 1023515477); + WS(1065904679, -1122628785); + + return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); +} + +shared float inp[525]; + +#define CURRENT_PASS 2 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define temp_tex(pos) (float(texture(temp, pos).x)) +static const float2 temp_size = float2(GetInputSize().x * 1, GetInputSize().y * 2); +static const float2 temp_pt = float2(1.0 / (temp_size.x), 1.0 / (temp_size.y)); + +#define HOOKED_tex(pos) temp_tex(pos) +#define HOOKED_size temp_size +#define HOOKED_pt temp_pt + +void Pass2(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 525; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 15, y = (uint)id % 15; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (1)) + 0.5, float(group_base.y + y - (3)) + 0.5)).x; + } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[8]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 4]; + samples[1][1] = inp[local_pos + 5]; + samples[1][2] = inp[local_pos + 6]; + samples[1][3] = inp[local_pos + 7]; + samples[2][0] = inp[local_pos + 15]; + samples[2][1] = inp[local_pos + 16]; + samples[2][2] = inp[local_pos + 17]; + samples[2][3] = inp[local_pos + 18]; + samples[3][0] = inp[local_pos + 19]; + samples[3][1] = inp[local_pos + 20]; + samples[3][2] = inp[local_pos + 21]; + samples[3][3] = inp[local_pos + 22]; + samples[4][0] = inp[local_pos + 30]; + samples[4][1] = inp[local_pos + 31]; + samples[4][2] = inp[local_pos + 32]; + samples[4][3] = inp[local_pos + 33]; + samples[5][0] = inp[local_pos + 34]; + samples[5][1] = inp[local_pos + 35]; + samples[5][2] = inp[local_pos + 36]; + samples[5][3] = inp[local_pos + 37]; + samples[6][0] = inp[local_pos + 45]; + samples[6][1] = inp[local_pos + 46]; + samples[6][2] = inp[local_pos + 47]; + samples[6][3] = inp[local_pos + 48]; + samples[7][0] = inp[local_pos + 49]; + samples[7][1] = inp[local_pos + 50]; + samples[7][2] = inp[local_pos + 51]; + samples[7][3] = inp[local_pos + 52]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 18]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1) + ivec2(1, 0), ret); +} diff --git a/src/Effects/NNEDI3/NNEDI3_nns128_win8x6.hlsl b/src/Effects/NNEDI3/NNEDI3_nns128_win8x6.hlsl new file mode 100644 index 000000000..c3835c5ad --- /dev/null +++ b/src/Effects/NNEDI3/NNEDI3_nns128_win8x6.hlsl @@ -0,0 +1,5735 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: nnedi3.py --nns 128 --win 8x6 --use-compute-shader --use-magpie +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME NNEDI3_128_6 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 1 * 2 +//!HEIGHT INPUT_HEIGHT * 2 * 1 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!FORMAT R16_FLOAT +//!WIDTH INPUT_WIDTH * 1 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D temp; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_temp; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 2 + +//!PASS 1 +//!DESC NNEDI3 (double_y, nns128, win8x6) +//!IN INPUT +//!OUT temp +//!BLOCK_SIZE 32, 16 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[12]) { + float sum = 0.0, sumsq = 0.0; + [unroll] for (int i = 0; i < 12; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); + sumsq += dot(samples[i], samples[i]); + } + float mstd0 = sum / 48.0; + float mstd1 = sumsq / 48.0 - mstd0 * mstd0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); + mstd1 *= mstd2; + float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = W(0, -1138315554, 1017020226, 1054031418, -1098719688) + + W(1, -1110814528, -1144117236, -1151849881, -1120184142) + + W(2, 1052033568, -1094289569, 1026313661, 1029415347) + + W(3, -1130100551, -1113913797, 1057266643, -1094215666) + + W(4, -1136459684, -1122295896, -1139428474, -1137890034) + + W(5, 1050615732, -1084764287, 1037955767, -1158862002) + + W(6, -1115186183, -1105019328, 1060278709, -1093503073) + + W(7, -1125342359, -1115484761, 1024054031, 1030136834) + + W(8, 1051842684, -1122358095, -1118592012, 1027215228) + + W(9, -1123693866, -1119041199, 1049323809, -1104408867) + + W(10, -1141919603, -1148196514, -1134121575, 1009291685) + + W(11, 1050624038, -1101060057, -1117980571, -1131405947); + sum2 = + W(0, 1011608699, -1112090344, 1035039469, 1018826733) + W(1, -1130070276, -1126912052, 1021515189, 1032910614) + + W(2, -1112298310, -1103038162, 1030536589, 1027816837) + + W(3, -1133688887, -1111235489, 1068933685, 1035541017) + + W(4, 1002846254, -1119314146, -1109181339, 1050015381) + + W(5, 1079505355, -1096237270, 1047170650, -1132054353) + W(6, 1032724543, 1041065097, -1082386736, 1051919390) + + W(7, 1012706615, 1004049262, -1120491130, -1106592726) + + W(8, -1067816278, -1101133158, -1108914015, 1017068057) + + W(9, 997621755, -1112521193, -1094139890, -1120644044) + W(10, 1011888367, 1020965593, 1020760181, 1032648390) + + W(11, -1123411157, 1029904493, -1115273665, 1017829425); + WS(1038714296, 1044827315); + sum1 = W(0, 1008773074, -1137155124, 1058392080, -1100726684) + + W(1, -1097060854, 1034472653, 1000000044, -1139148008) + + W(2, 1051571686, -1106613964, 1034039029, -1136883834) + + W(3, -1125197471, -1110040196, 1058043460, -1093356205) + + W(4, -1112523275, 1033872898, -1155069097, -1114984084) + + W(5, 1059856605, -1087655353, -1137125990, -1122772178) + + W(6, -1128457331, -1117135435, 1053821137, -1093486798) + + W(7, -1134047198, -1125370043, -1147450330, 1038803511) + + W(8, 1050763009, -1088451200, -1131894321, 1027534056) + + W(9, -1142834606, -1110360429, 1046500044, -1109846237) + + W(10, 1021083253, -1126166913, -1131672268, 1046900069) + + W(11, 1049636107, -1093848946, -1108292068, 1020331091); + sum2 = W(0, -1123255245, -1116671780, 1046375724, 1061532718) + W(1, 998291536, -1094346395, 982126402, -1111301077) + + W(2, 1053914306, 1066141278, 1041257894, -1108898352) + W(3, 1020504368, 1011773496, 1013495848, 1064263836) + + W(4, 1023442398, 1023084860, 1036976721, -1102830064) + + W(5, -1106352300, 1055449836, -1101238723, 1037821947) + + W(6, -1113656730, 1031803093, -1103067006, -1076699072) + + W(7, -1103198508, -1123876988, -1157600449, 1045043848) + + W(8, -1094946683, -1085904680, 1048924521, 1041381351) + + W(9, -1129338356, 1026389126, 1036977493, -1099822488) + + W(10, -1109968297, -1109391389, 1032297389, 1022832316) + + W(11, -1098417421, -1092845936, 1053328593, 1023206676); + WS(-1087781687, -1134030789); + sum1 = + W(0, 1028179002, -1113846601, -1090428884, 1056064759) + W(1, -1116809458, -1123784128, 1037437606, -1129634048) + + W(2, -1098510574, 1049319247, -1105760812, 1016685513) + + W(3, -1106850597, 1045066941, -1095161561, 1048732210) + + W(4, 1007721067, -1105661778, 1040743460, -1108576055) + W(5, -1094444002, 1054487227, 1050640540, 1048222150) + + W(6, -1123964671, 1030205873, -1095134094, 1044300425) + + W(7, 1040160031, -1110013906, -1122790273, -1117192467) + + W(8, -1098192922, 1055338520, -1106372902, 1033480951) + W(9, 1036030878, 1027547489, -1101066654, 1034894336) + + W(10, -1109703824, -1112122819, 1027799500, -1110675758) + + W(11, -1095206484, 1053272331, -1138309939, 1033810456); + sum2 = W(0, 1023697539, -1119401943, 1010071413, 1025267901) + W(1, 988018625, -1122026299, -1134698244, 1013558867) + + W(2, -1112961274, 1033970227, -1116217767, 1005039212) + + W(3, 1018187921, -1119659280, 1019163177, 1048742053) + + W(4, -1103390691, -1115786374, 1024534132, 1038666215) + + W(5, -1097296472, 1082848284, -1064739318, 1042066180) + + W(6, -1145140771, -1120396841, 1033089456, 1060700209) + + W(7, -1086947071, 1017803659, -1142625275, 1035154544) + + W(8, -1105855221, -1115995862, 1042818438, -1115628759) + + W(9, -1141296989, -1131590401, -1156341795, 1035612397) + + W(10, -1109434149, -1150181080, 1007645751, 1021197862) + + W(11, -1120357988, -1123714371, 1029308077, 1011442501); + WS(1060770743, -1108197568); + sum1 = + W(0, 1016886775, 1029069588, -1095627128, 1046558478) + W(1, -1146067869, 1009558662, -1160198547, 1038531041) + + W(2, -1096395039, 1048524254, -1097778094, 1033422755) + W(3, 1015246637, 1041390628, -1086831062, 1058409012) + + W(4, -1098986327, 1033638277, -1139668038, 1022402527) + W(5, 1057717017, 1050284011, -1110221104, 1029754521) + + W(6, 1034731136, -1103954605, 1056693263, -1083755424) + + W(7, 1042951984, -1119102757, 1019206597, -1112398730) + + W(8, 1053908426, -1092005688, 1037994808, -1130383710) + + W(9, -1123983389, -1117714338, 1043180606, -1101033824) + + W(10, 1025216609, -1114155844, -1115649126, 1039312430) + + W(11, 1038174992, -1131529899, 1000398449, -1129441697); + sum2 = + W(0, -1104188915, 1041755532, 1039915367, 1028942726) + W(1, 1030445090, -1104654601, 1026676440, -1099902739) + + W(2, 1048552390, -1105392496, -1104867005, 1033528857) + + W(3, -1122447814, 1020538503, -1105911641, -1106738713) + + W(4, -1106850996, 989961401, 1024214572, -1096783078) + W(5, 1068731351, 1066518668, -1126700225, -1133955554) + + W(6, 1036156927, -1094671115, -1096057439, -1122053390) + + W(7, 1025639958, -1153726297, 1039804037, -1096595186) + + W(8, -1091471401, -1123632409, -1113480650, 1032072745) + + W(9, -1140508998, -1108992818, 1033306823, 1032650381) + + W(10, -1115144981, -1128986161, 1023379743, 1023478108) + + W(11, -1096750685, -1109607147, 1031767054, 1023561226); + WS(-1105940700, 1068951582); + sum1 = + W(0, 1000087558, -1126610867, 1040363554, -1103531583) + W(1, -1126498674, 1033586531, 1035444041, -1119652358) + + W(2, 1048983991, -1095183320, 1041424541, 1011542187) + W(3, 1016288952, -1105308514, -1129232537, 1035540828) + + W(4, -1116171169, 1018737568, 1033866603, 1025089333) + W(5, 1043404182, -1091977028, -1108874003, 1021129404) + + W(6, 1023501510, -1101500686, 1050470779, -1098956255) + W(7, -1107291841, 1033496245, 1030903912, 1024487034) + + W(8, 1032787659, 1016827662, -1115835365, 1023594100) + W(9, -1121882082, 1036868116, 1042688239, -1107477027) + + W(10, 1038297635, 1015384473, -1139243976, -1137536646) + + W(11, 1051055933, -1111432589, -1101253029, 1034920923); + sum2 = W(0, -1123978268, -1102554320, 1043643239, 1026913323) + + W(1, 1031431539, -1112242992, -1122059260, 1044079872) + + W(2, -1111536011, 1056944699, 1037617295, 1005844362) + + W(3, -1123592170, -1093658955, 1042723115, -1093217948) + + W(4, -1103624535, -1101249260, 1036141312, -1094337979) + + W(5, 1067907418, 1064662115, -1090550375, 1042545272) + + W(6, -1111710804, 1045282706, -1118927202, -1091166584) + + W(7, -1119161360, -1111294278, -1127043568, -1139302445) + + W(8, 1035940631, -1092135065, -1112133073, 1025042022) + W(9, 1040694361, 983065168, 1016057395, 1041761864) + + W(10, 1014979737, 1026789959, -1120657957, -1105233712) + + W(11, 1027935282, -1104693620, -1106943303, -1114223884); + WS(1061289911, -1074625008); + sum1 = + W(0, -1118986355, 1035013222, 1058027688, -1087592511) + W(1, -1107213995, 1048036382, 1042690449, -1110099132) + + W(2, 1047068971, 1013931199, 1008030621, -1106703359) + W(3, 1022020251, 1048074309, 1036127926, -1088015817) + + W(4, 1042727509, 1044183504, 1032007652, -1101544522) + W(5, 1058005973, -1099315493, -1093058723, 1029001296) + + W(6, 1040686510, -1102744618, 1048070472, -1091144488) + + W(7, 1042938237, -1106075703, -1108674269, 1043103143) + W(8, 1057593649, -1088950448, 1002612916, 1048869669) + + W(9, -1123719572, -1099009366, 1043345580, 1033106140) + + W(10, -1110941900, -1107279296, 1033202498, 1038471242) + + W(11, 1055515931, -1090129177, -1113377491, 1042195418); + sum2 = + W(0, 1034897388, 1047600068, -1092005485, 1032211690) + W(1, 1046053217, -1111289614, -1104469346, 1074182899) + + W(2, -1074957140, -1124075063, 981988140, 990179414) + W(3, -1104312342, 1080728354, -1067981267, -1100127529) + + W(4, 1036844207, -1123826485, 1033162274, 1077196082) + + W(5, -1071410100, -1093339345, 1030504779, -1109598746) + + W(6, 1001538203, 1039546617, -1101189403, 1036352485) + W(7, 1025223671, 1036519612, -1119055011, -1121476275) + + W(8, 1042351425, 1008083901, -1101604075, -1146561467) + + W(9, 1036889309, -1113609974, -1113906708, 1032631324) + + W(10, 1034478190, 1017678167, -1122390571, -1124909535) + + W(11, 1037135096, 1002134635, -1113103836, -1132393287); + WS(-1106894556, 1030995446); + sum1 = + W(0, 1025589699, 1033805806, 1034528997, -1098701288) + W(1, -1119393787, 1027839817, -1123387156, 1025291593) + + W(2, 1054610112, -1096105008, 1037953340, -1123546765) + + W(3, 1028556318, -1127599047, 1039781117, -1091536615) + + W(4, -1120434724, 1027349522, -1139656804, -1105750398) + + W(5, 1065523109, -1090836572, -1103173850, 1022652480) + W(6, 980812294, -1104274718, 1054896451, -1097995702) + + W(7, -1111869411, -1135075504, -1128131860, -1117867836) + + W(8, 1045458038, 1039238949, -1104116474, -1125284461) + + W(9, -1137342353, 1018769427, 1040199808, -1110811780) + + W(10, -1129786002, -1152753902, 1017797929, -1114245906) + + W(11, 1047692618, 1001735913, -1106977725, -1131064048); + sum2 = W(0, 1030581609, -1096073996, -1104952058, 1036583621) + + W(1, 1006888722, 1023568502, -1110091164, -1132437108) + + W(2, 1031847146, 1040176294, -1120421639, -1124563800) + W(3, 1024868796, 1051933742, 1077840619, 1056082302) + + W(4, -1114613612, 1026684617, -1124528958, 1042406776) + + W(5, -1071401906, -1080138625, 1012484405, -1114165836) + + W(6, -1175372351, -1117745155, -1108532142, 1039833458) + + W(7, 1044393926, -1139194300, 1022252519, -1117509823) + + W(8, -1107574374, 1041695761, -1112420268, 1023135287) + + W(9, -1123141270, -1110977956, 1034777920, -1119756119) + + W(10, 1028893644, -1122696127, -1123316826, 1029056744) + + W(11, 1034383482, 977900031, 1027961212, 1034470668); + WS(-1094248046, 1034739186); + sum1 = + W(0, -1121641997, 1043421097, -1090462057, 1057979744) + W(1, 1042504017, 1019430629, -1148807918, -1114752489) + + W(2, -1097763033, 1045790578, -1105205440, -1115505835) + + W(3, -1119896808, 1040593858, -1089339622, 1054873978) + W(4, 1029920941, 1008619887, -1118546659, 1043491504) + + W(5, -1089142502, 1054724534, -1114903193, 1038945077) + + W(6, -1117202258, 1033892386, -1089696734, 1056858543) + W(7, -1102740772, 1025474934, 1027770003, 1035924243) + + W(8, -1098379668, 1046524773, 1042806872, -1148381698) + + W(9, -1121511758, 1032482905, -1096843609, 1049846778) + + W(10, -1103488653, 1017864241, -1114510703, 1031829015) + + W(11, -1089945727, 1056083605, 1031492831, 1029503362); + sum2 = + W(0, 1032629114, -1111104155, -1111175727, 1069180600) + W(1, -1092657949, -1081878912, 1024571549, 1022922994) + + W(2, 1047799465, 1065968459, -1106624299, -1085703668) + + W(3, -1123705993, 1036988882, -1121840686, 1066031139) + + W(4, 1026540087, -1080884055, -1120833813, -1104365480) + + W(5, -1098359864, 1064223912, 1044325339, -1079207816) + W(6, 1031286487, 1038064580, 1008715668, 1062275672) + + W(7, 1048614780, -1083548865, -1113289848, -1119955010) + + W(8, -1115327032, 1060225053, -1117772151, -1085294305) + W(9, 1033808184, 1018164430, 1042276675, 1060364945) + + W(10, -1105153406, -1090372695, 1035572024, -1144675449) + + W(11, -1114533548, 1057151625, -1103516228, -1100806033); + WS(-1095016302, 1049780935); + sum1 = + W(0, -1140214663, 1050711943, 1044505338, -1092537964) + W(1, -1110639373, 1025322472, 1008440267, -1110792724) + + W(2, 1051364259, -1101785640, 1026645590, 1024485132) + W(3, -1137823987, 1036012635, 1051977632, -1091742968) + + W(4, -1130979716, 1018037601, 1035221148, -1104496021) + + W(5, 1058946996, -1090015552, -1106527650, 1032762847) + + W(6, 1007510683, -1145733059, 1056967247, -1090979415) + + W(7, -1146016549, 1010918699, 1029727784, -1109552317) + W(8, 1057598839, -1088424934, 1015167915, 1027572437) + + W(9, -1142229143, -1115585956, 1050931556, -1096995861) + + W(10, 1009720606, -1127055707, 1025812851, -1142816275) + + W(11, 1058283380, -1095074239, -1105047309, 1011865435); + sum2 = W(0, 1040969494, -1087779211, 1056604611, 1059045592) + W(1, -1085730308, 1035668628, 1036141535, 1033693595) + + W(2, -1095617275, 1037950807, 1051875566, -1110274633) + + W(3, -1136101349, -1096934819, 1057610078, 1059474718) + + W(4, -1104408796, -1106464684, 1015318832, 1042126031) + + W(5, 1059711341, 1079595174, 1011257519, -1106179801) + + W(6, -1112220931, 1036377831, -1098744628, 1065421414) + + W(7, -1102027272, 1035078820, -1111621316, 1046621966) + + W(8, -1088974739, -1079659864, -1097874228, 1045400280) + + W(9, 1042180910, -1129542795, -1112239718, -1077353318) + + W(10, -1123714793, -1122902412, -1115487487, 1052735842) + + W(11, -1089093989, -1076641223, -1089345613, 1057832501); + WS(-1081166556, -1096725839); + sum1 = W(0, -1146977357, -1114510289, 1050030946, -1102646113) + + W(1, -1113153825, -1114582784, -1130642485, -1119269199) + + W(2, 1045921361, -1098664649, 992772066, 1022415207) + W(3, 1032268722, 1038356317, 1039889859, 1027769520) + + W(4, 1041426283, 1025422681, -1111253552, -1095921640) + + W(5, 1057711094, -1089074675, -1107182252, -1098886685) + + W(6, 1024786511, 1038758688, 1053078718, -1098156409) + W(7, 1043642083, 1036466489, 1021187873, -1102108905) + + W(8, 1045805544, -1106011300, -1131324475, -1129952762) + + W(9, -1115200917, -1117501552, 1042557010, -1102728052) + + W(10, -1148235644, -1133449046, 1021141494, 1024092394) + + W(11, 1052716334, -1107570834, -1153593794, 1012650491); + sum2 = + W(0, -1154116071, 1032309947, -1123962703, -1133231158) + W(1, -1128886461, 1035599863, -1120032365, 1017679770) + + W(2, 1025906643, -1134787122, -1108727038, 1039832482) + + W(3, 1033357218, -1118521489, 1007375557, -1096414395) + W(4, 1020492423, 1035524914, -1132933754, 1020082264) + + W(5, -1114291801, 1064675801, -1080568363, 1059399952) + + W(6, -1165588207, -1122971142, -1114360955, -1097709607) + + W(7, -1108104258, 1040378905, 1029048100, 1015972752) + W(8, 1031540723, 1040110606, -1112615988, 1024318985) + + W(9, -1115144648, 1021714009, -1136421576, -1121524587) + + W(10, -1111963053, 1036458899, 1020455541, -1134334294) + + W(11, 1014310369, -1119507729, 1028363593, -1122706022); + WS(1058284215, -1100256461); + sum1 = W(0, -1136333391, 1042064203, 1048792691, -1108621703) + W(1, -1107171315, 1009105958, 977549609, 1017583666) + + W(2, 1037147366, -1098054746, 1036542212, -1118222526) + + W(3, 1023954799, -1103445177, 1061193316, -1084539629) + W(4, 1001865121, 1005641493, 1032458293, 1003134525) + + W(5, 1055543997, -1105743629, -1113492892, 1001049989) + + W(6, -1131855793, 1030077610, -1089393894, 1061657482) + + W(7, -1095280499, -1141919717, -1132003829, 1044505715) + + W(8, -1102389770, -1111653453, 1000070140, 1031530183) + + W(9, 1014651348, 1034368605, -1126089633, 1032593324) + + W(10, -1124982771, 957669220, 1007647892, -1109681210) + + W(11, 1030932473, -1134741175, 1012906393, -1123915241); + sum2 = + W(0, 1025749667, 1018718450, -1104497483, -1109821356) + W(1, 1032840840, 1009497507, -1112523495, 1033569638) + + W(2, -1111912018, 1026676021, -1114119398, 1017944258) + + W(3, 1034709000, -1099586256, -1087345020, -1092985416) + + W(4, 1018773778, 1033599708, 1021531102, -1096388774) + W(5, 1061077725, 1065372034, -1129773592, -1109427758) + + W(6, -1129693762, -1109846828, 1049253485, 1048850468) + + W(7, -1101038787, 1029395381, 1006685387, -1112379914) + + W(8, -1117456886, -1106255761, 1043058397, -1106769492) + + W(9, -1119027428, 1008781387, 1038233914, 1013968523) + + W(10, -1113511705, 1020319838, -1106459716, 1045287677) + + W(11, -1135366091, -1138431127, 1037121278, -1113647178); + WS(1048665454, 1062854025); + sum1 = + W(0, 1004974300, -1140401360, 1040482153, -1107105990) + W(1, -1113947114, 1004764812, 1032679478, -1099989608) + + W(2, 1038119146, -1094089019, 1043046180, -1132148363) + + W(3, 1041394910, -1108003052, 1050737065, -1084721726) + W(4, 1044891839, 1022372181, 1026224498, -1103518244) + + W(5, 1054936910, 1054691719, 1025601857, -1113753404) + W(6, -1128844481, 1044968661, -1084409495, 1057517320) + + W(7, 1022006333, 1033354242, -1137588617, 1040271757) + W(8, -1099872844, 1035416977, 1017193535, -1131287582) + + W(9, -1115578718, 1031459778, -1107158134, 1033669936) + + W(10, 1025595521, -1112957780, -1128306202, -1114933822) + + W(11, 1038726400, 1032390235, 1023203548, 1012659010); + sum2 = W(0, -1120028750, -1116919239, 1026537129, 1032246372) + + W(1, 1047938064, -1102851233, -1137390044, 1034599590) + + W(2, -1105861815, -1095438265, 1022176478, -1129352893) + + W(3, 1031876652, -1110926857, -1114848017, -1093245727) + + W(4, -1093210017, 1046245506, -1112211314, 1037510574) + + W(5, 1054207178, 1062401065, -1118385188, -1106645860) + + W(6, 1041988042, -1091078601, 1059021754, -1115099719) + + W(7, -1104427658, 1043294706, -1105672202, 1052042083) + + W(8, -1104155840, -1100501338, 1013766280, 1016480352) + + W(9, 1032816067, -1102117757, 1047757772, -1112097107) + + W(10, 1027075631, -1113418727, -1122489809, 1043447553) + + W(11, -1115066040, -1102195598, 1027463789, 1030659187); + WS(1046819548, -1112403026); + sum1 = + W(0, 1043575156, -1096536373, 1028458839, 1042563315) + W(1, -1147631681, -1134760216, -1118831073, 1040283909) + + W(2, 1047473762, -1108795847, -1115473730, 1037371629) + W(3, 1028002987, 1049135339, -1083606160, 1040849058) + + W(4, 1015994849, 1036578576, 1025124390, -1115136450) + W(5, 1052252597, 1045384892, -1102431101, -1147407721) + + W(6, 1023377962, -1127238030, 1054448495, -1081137782) + W(7, 1046487189, 1029054905, 1034880715, 1042113706) + + W(8, -1106041242, 1040945110, 1026652836, -1113175874) + + W(9, -1125071779, -1124049974, 1031800519, 1026387221) + + W(10, -1096718751, 1032040846, -1144027915, 1045174012) + + W(11, -1138801364, -1122981994, 1030118403, -1121286085); + sum2 = + W(0, -1116472244, 1036928405, 1035706238, -1121318128) + W(1, -1114868876, 1033058067, 1025044698, -1122373594) + + W(2, 1040619830, -1104861162, 1034962287, -1110745183) + + W(3, -1123391126, -1111538981, 1046111644, 1038958235) + + W(4, 1034772140, -1130393948, 1003069456, -1111848203) + W(5, 1044878634, 1050016986, -1113883437, 1005454840) + + W(6, -1129719684, 1027481573, 1017226015, 1040124875) + W(7, -1114732111, -1105702279, -1110477199, 990411281) + + W(8, 1030695866, -1116142138, -1102609372, 1029172520) + + W(9, 1023677849, -1136822924, -1114454329, -1113301330) + + W(10, 1029458562, -1112847239, 989635105, -1136314996) + + W(11, 1007553654, -1112411005, -1109652887, -1150621057); + WS(1054158702, -1098491196); + sum1 = W(0, -1129450472, -1119237644, -1101713087, 1033589815) + W(1, 1034513750, 1023147092, 983582877, 1026235385) + + W(2, -1098493798, 1048228620, -1123359016, -1173092218) + + W(3, -1121209423, 1037197666, -1083899440, 1060260620) + + W(4, -1103037013, 1035595873, -1121678922, 1031464488) + + W(5, -1103778771, 1056756834, -1122982297, 1020058530) + + W(6, 1031290553, -1099798307, 1057005120, -1093716116) + + W(7, 1049393023, -1117980637, -1119715259, -1102325483) + + W(8, 1046824098, -1102446942, 1028221304, -1124996559) + + W(9, 1032360987, -1107157175, 1037130136, -1111054172) + + W(10, 1041637742, -1115548960, -1115265987, 1036899466) + + W(11, 1011053924, -1132207900, 992781927, 995105423); + sum2 = + W(0, 1022318336, 1013555797, -1107715656, 1023151297) + W(1, 1033284778, -1133592549, -1115650269, 1016103802) + + W(2, -1119261847, -1114491928, -1130031974, -1142562170) + + W(3, 1023537352, 1041650416, 1043696631, 1042399799) + W(4, -1108010060, -1132296546, -1109544964, 1052577828) + + W(5, -1093802466, -1088107917, 1027122983, 1012286637) + + W(6, -1104797311, -1105646271, 1070961498, 1046742027) + + W(7, -1123089325, -1129646858, 1016525394, -1089308865) + + W(8, -1101461419, 1041655381, 1023301191, -1123666701) + + W(9, 1034835508, -1093424353, -1129647390, 1027817568) + + W(10, 1010592861, 1020078242, 1033208955, -1123233885) + + W(11, -1103638866, -1123805293, 1027104379, 1017326494); + WS(-1098756718, 1025257793); + sum1 = + W(0, -1170432841, 1023803110, -1090131561, 1057699745) + W(1, -1119694405, 1012926412, 992949730, -1138344011) + + W(2, -1107275570, 1040420307, -1117188674, -1123369303) + + W(3, 1001883457, 1026014126, -1096556254, 1054103406) + + W(4, -1115724526, -1127845816, 1015181254, -1121590721) + + W(5, -1096688477, 1044394251, 1029634610, 1035099253) + + W(6, -1113865961, -1118191492, -1094031636, 1054646511) + + W(7, -1140440641, -1114907963, 1025052052, -1131017933) + + W(8, -1105028073, 1053702338, -1105231572, 1041135600) + W(9, -1142935155, 992699702, -1106433523, 1018441892) + + W(10, 1028218227, -1110839852, 1028407518, -1125677724) + + W(11, -1094260670, 1056434389, -1115106656, 1033590433); + sum2 = + W(0, -1113857499, 1035336978, 1003212424, 1016146718) + W(1, -1124908942, 1007728744, 1017397942, 977119295) + + W(2, -1169985407, -1139136560, -1127032094, 1015257206) + W(3, 994248048, 1058657166, -1086726098, 1029663775) + + W(4, -1128987279, -1139895388, -1107301692, 1082282453) + + W(5, -1067974855, -1088717558, 1045162011, 989071903) + W(6, -1114604929, 1081592322, -1065596346, 1032721198) + + W(7, -1132432620, -1115423533, -1099171315, 1057428850) + + W(8, -1101606461, -1109521003, 1001306728, 1026551071) + + W(9, -1128156929, 1029982493, -1114706375, 1035913834) + + W(10, -1127893472, -1124574009, -1105825924, 1050573738) + + W(11, -1113547007, -1115579388, 1022139202, 999865176); + WS(1057488311, 1032489366); + sum1 = W(0, 998859696, -1107316871, -1130375162, 1045821468) + + W(1, 1009430200, -1127874328, -1126361486, 1034785118) + + W(2, -1098594694, 1031878554, -1137835434, -1124885440) + + W(3, 1028022609, -1114064513, -1105098668, 1057263754) + + W(4, -1104540630, -1121514949, -1139957040, 1048520075) + + W(5, -1099155379, -1127500588, 1033795278, 1026343507) + + W(6, 1015295094, -1112481238, -1109749909, 1049599048) + + W(7, -1105774078, 1007199100, -1122345661, -1154023155) + + W(8, -1112014022, 1025402492, 1046311438, -1119007402) + + W(9, -1126668079, -1133481329, -1114013594, 1034877470) + + W(10, -1109189150, 1026086402, -1129886863, -1108825451) + + W(11, -1116334683, 1046739556, -1119327433, -1138298796); + sum2 = + W(0, -1131728975, 1036764889, -1111227173, -1128881737) + W(1, 1037242625, -1122740141, 1035973522, -1115068573) + + W(2, 1022919798, 1043114423, 990889655, -1130091274) + W(3, 1044679054, -1096357732, -1128128484, -1098903856) + + W(4, 1034691185, -1126568042, 1057647035, -1081650015) + W(5, 1064645808, -1105946360, 997618289, 1036162942) + + W(6, 1029801795, 1008652352, -1094730608, -1121257166) + + W(7, -1119761613, -1145265129, -1120435503, 1020148594) + + W(8, 1034184152, -1122224678, -1123774451, 1016517254) + W(9, 1040526136, 1019038058, -1110501324, 1033879187) + + W(10, 1017411026, 1025838030, 1001204178, -1125968836) + + W(11, -1141167471, -1125677560, 1017460301, -1121815648); + WS(1059796919, -1120566973); + sum1 = + W(0, -1128913552, -1112300176, -1105877684, 1051198984) + W(1, 1031319296, -1128535084, -1121363289, 1042197939) + + W(2, -1097436160, 1044389601, -1156941767, 1024751122) + + W(3, -1114223569, 1034918960, -1084964393, 1058261945) + + W(4, -1115209492, 1025460631, -1170089852, 1019526904) + W(5, 1000497399, 1057668526, 1032065846, 1032881746) + + W(6, 1032486790, -1097594444, 1058860547, -1083919657) + W(7, 1045243539, 1010383044, 1007200827, -1109654209) + + W(8, 1031553246, -1093305914, 1041280427, 986576750) + W(9, 1031548233, -1121876014, 1038828418, -1113158982) + + W(10, 1014469175, 1034628234, 1025411673, -1114258964) + + W(11, -1106885131, 1024357474, -1128319413, 1012703988); + sum2 = W(0, -1143612781, 1030657759, -1142189493, -1116365956) + + W(1, -1108113093, -1144957229, 1024653023, -1114346787) + + W(2, -1145631357, -1122350618, -1120960689, 1013910379) + + W(3, 1031492195, -1115709916, 1041520845, -1109597137) + + W(4, -1100459142, 1028619709, -1118229715, -1122961088) + + W(5, 1058735511, 1063925227, -1102646250, 1003281301) + + W(6, 1037055061, 1023143173, -1123318690, -1097941231) + + W(7, -1097541479, 1040501280, -1103747903, 1032102149) + + W(8, -1113230658, -1096423541, -1119600722, 1015718737) + + W(9, 1027878177, 1035049663, -1107577516, 1034348655) + + W(10, 994779835, -1116733621, -1120661119, -1115297076) + + W(11, 1029140405, -1104473650, 1048901284, -1122107736); + WS(1025862512, -1097886171); + sum1 = + W(0, 1017995501, 1039868615, -1084617440, 1048852846) + W(1, 1036255511, 1003283810, 1028206791, 1039095646) + + W(2, -1095591910, 1046495341, 1032043925, 1031081728) + W(3, -1115115241, 1044724702, -1088992852, 1053236428) + + W(4, 974144948, -1128580235, 1023900364, 1040670187) + W(5, -1089161581, 1054642003, 1024473755, 1033561578) + + W(6, 973319892, 1036683863, -1092805231, 1050422468) + W(7, -1132527589, 1012484477, 1028215869, -1105649212) + + W(8, -1107053347, 1049220624, -1120787802, -1126192084) + + W(9, 1033665516, 1040757263, -1107160205, 1039010398) + W(10, 1025750940, 1035723265, 1031994596, -1101644966) + + W(11, -1102281943, 1042256548, 1034960116, -1141688154); + sum2 = W(0, 1017309480, -1113384832, 1051665642, -1106075829) + + W(1, 1041533127, -1143916576, -1115898040, -1105498372) + + W(2, 1043260681, 1041703091, -1122221676, -1119502308) + + W(3, 1026950620, -1140340528, -1109716244, -1109231476) + + W(4, 1047102067, 1032670038, -1131938440, -1101077232) + + W(5, 1039662810, 1049668212, -1109791178, -1130795288) + + W(6, -1120377052, -1114410560, -1113565360, 1032262130) + + W(7, 1039963022, -1112068676, -1147724576, 1042402941) + + W(8, -1097796100, 1040746517, -1131969000, 1031201932) + + W(9, -1108926548, -1114542528, -1108348496, -1140600496) + + W(10, 1032294606, -1108483346, -1120540424, 1040078450) + + W(11, -1097463639, 1048026467, -1107112902, 1028500076); + WS(1059594295, -1081462343); + sum1 = W(0, 1027153262, 1043504002, 1052804111, -1087238767) + W(1, 1041162890, 1012415888, 978268098, -1116748340) + + W(2, 1047257898, -1102654539, 1038115889, -1165722369) + + W(3, 1020914066, 1039643767, 1049749519, -1086204143) + W(4, 1044063108, 1033818911, 1037283618, 1038369681) + + W(5, 1054855273, -1088075529, 1037006730, 1032180390) + W(6, 1001487768, 1034058802, 1047397560, -1086503800) + + W(7, -1117492081, 991868273, 1024040629, 1040894970) + W(8, 1052275873, -1098202447, 1024505810, 1034352017) + + W(9, -1121199663, -1111840522, 1039603542, -1107277965) + + W(10, 995252542, -1131707508, 1022356543, 1036921959) + + W(11, 1026848211, -1098696769, 1036480704, -1136594828); + sum2 = W(0, 1028621181, -1120219585, 1035384087, 1034472183) + + W(1, -1123130413, -1120488613, -1110777341, -1127812186) + + W(2, -1126856730, -1129759266, -1106344343, -1132463093) + + W(3, 1027102605, -1121966109, 1038893967, -1123472797) + + W(4, -1121376361, -1111137329, 1030597213, -1109734497) + + W(5, 1046142043, 1054407172, -1110811845, 1013238549) + + W(6, -1106396561, -1139970197, -1102581665, -1114873213) + + W(7, -1147353578, -1113345499, 1041951037, 1005564394) + W(8, 1024169581, 1049042812, 1023037898, 1010521365) + + W(9, -1110468651, 1027913013, -1119264521, -1104920828) + + W(10, -1123538565, -1123696429, 1026771477, 1021745754) + + W(11, 1045072501, -1117275361, -1108801471, 934750635); + WS(1001790336, 1033193226); + sum1 = W(0, -1113292474, 1018580889, -1117909243, 1052553934) + + W(1, -1104113579, -1129785460, -1131356554, -1115176285) + + W(2, 1046326027, -1113855449, -1112351060, -1156866531) + + W(3, -1133969725, -1102427201, 1041639692, 1060794158) + + W(4, -1095014251, -1132017542, -1118664394, -1094752997) + + W(5, 1062953181, 999804065, -1105541862, -1130622370) + + W(6, 1027204153, -1105768635, 1031529696, -1122254720) + + W(7, -1102735326, 1022740922, -1111709325, -1119059021) + + W(8, 1040555139, -1110352445, 1031818996, -1121415144) + + W(9, 1024655285, -1121009370, -1115548569, 1026923705) + + W(10, 1008421480, 1008916224, -1127980619, -1131789226) + + W(11, 1030139737, -1100154756, 1016036429, -1122315305); + sum2 = + W(0, 999771042, 1019371878, 1035878909, -1106457448) + W(1, 1038257769, -1118330440, -1130012060, -1137594745) + + W(2, 991329315, 1043986431, -1108709562, -1156785507) + W(3, -1123589256, 1002965746, 1042318306, -1118271248) + + W(4, 1025444512, -1134019065, 1018901632, -1114341154) + W(5, 1053769225, 1057361082, -1113472862, 1021163648) + + W(6, -1124249760, -1115605194, 1010690665, -1102884302) + + W(7, 1015483632, -1123825168, 1017234680, 1017493044) + W(8, -1099274218, 1037230696, -1115820708, 1025071687) + + W(9, -1118367200, 1015905000, -1101115118, -1112309114) + + W(10, 1020142296, -1126466092, 1016197020, 1024661143) + + W(11, -1092869373, 1036147884, 1016005876, 1013380697); + WS(-1096043118, -1087645231); + sum1 = + W(0, -1115587665, 1029203067, 1045341850, -1106908023) + W(1, -1131991862, -1114910160, 1032694426, -1099485226) + + W(2, 1017432376, 1052615508, -1113683692, -1127306394) + + W(3, 1031441086, -1113336032, 1054321442, -1082902530) + W(4, 1050084000, 998100766, 1023546425, -1121275799) + + W(5, 1050158903, 1045376289, -1116607496, -1147820543) + + W(6, -1130229584, 1051148019, -1081266857, 1057440145) + + W(7, 1027649766, 1021215128, -1147531597, -1118364796) + W(8, 1047006846, -1118131945, -1120491559, 991663462) + + W(9, 1019733960, -1127235090, -1106232936, -1134199968) + + W(10, 1027530281, -1123554607, -1115129818, -1119188228) + + W(11, 1041137522, 1018710010, -1131853547, -1124042776); + sum2 = + W(0, -1129211209, 1023515648, 1040374834, -1110890211) + W(1, 1020265593, 1023672492, 1023861908, -1119658781) + + W(2, -1108471855, -1133136545, 1048083250, -1109425046) + + W(3, 1038235290, -1121364796, -1096619497, 1044872450) + + W(4, 1034188808, -1121333166, 1029997412, -1103304652) + + W(5, 1039720436, 1050855479, -1102257516, -1130250517) + + W(6, -1125523437, -1109858923, -1113539350, -1117035130) + + W(7, 1022698753, 1029442100, -1107362446, 1044349350) + W(8, 1038091910, -1107498682, 1024895152, -1122118916) + + W(9, -1114850415, 1032218024, -1121557752, -1130602657) + + W(10, -1162309256, 1020180841, 1013933265, 1027861436) + + W(11, 982770184, -1128608489, -1129090321, 939323929); + WS(1062090935, 1058767985); + sum1 = + W(0, 1016379967, -1111629724, -1115487571, 1040669191) + W(1, -1124650927, -1131647467, -1127587553, 1016014666) + + W(2, 1046030390, -1131263417, -1107621674, 1039946613) + + W(3, -1124869109, 1045897412, -1089079359, 1049140321) + + W(4, -1120715392, -1197661491, 1030044278, 1039181162) + W(5, -1107063038, 1046027952, 1035258018, 1033161790) + + W(6, -1135267218, -1111908667, 1056809425, -1082917799) + + W(7, 1051125844, -1129645073, 1021197901, 1032078908) + + W(8, -1100036461, 1040408361, -1117322707, -1124734374) + + W(9, -1146487047, -1129970444, 1008766834, -1115518322) + + W(10, -1115270659, 1029976235, 1001072263, 1037221664) + + W(11, -1118068997, 1034380765, -1135017032, -1119763541); + sum2 = + W(0, 1033054007, -1111327982, -1121722910, 1038740385) + W(1, -1119336676, -1124024618, -1116547758, 1041352776) + + W(2, 1034677105, -1115132829, -1124364804, 1035204548) + + W(3, -1123189128, 1032790615, 1033549613, -1092378351) + W(4, -1102497930, 1032784783, 1033469023, 1012971008) + + W(5, 1033020303, -1108961666, -1106694042, 1032253989) + + W(6, -1122818756, -1098982462, 1051050782, -1121004928) + W(7, 1035128176, 1009754028, 1026371779, 1042838787) + + W(8, -1098306997, 1045175561, 1039643256, -1118316692) + + W(9, -1136838880, -1110172088, 1034801784, 1019964530) + + W(10, 1015821086, -1125398316, -1137766108, 1004924993) + + W(11, -1120000922, 1027487385, 1017988993, 1028774587); + WS(1065136439, 1040354336); + sum1 = W(0, -1140671753, -1108811053, 1057254373, -1099781359) + + W(1, 1009251305, 1003726785, -1120274067, -1107804932) + + W(2, 1050226223, -1098842784, 1010162395, -1112753158) + + W(3, 1022747450, -1110160675, 1059609336, -1092867554) + + W(4, 1036945681, -1120831377, -1124666160, -1105899870) + + W(5, 1031464345, 1000731409, -1114655220, -1112429520) + + W(6, -1123069063, 1025125049, -1093137163, 1058399851) + + W(7, -1109467235, -1119063979, -1131113682, 1033405406) + + W(8, -1111386668, 1055942809, -1101547023, -1146511789) + + W(9, -1118356003, -1129239633, -1105328992, 1044975810) + + W(10, -1113825780, -1116155918, -1126498983, 1016924742) + + W(11, -1111235284, 1051704028, -1104493413, -1117793303); + sum2 = W(0, -1139839138, 1034041544, 1034669108, -1095769692) + W(1, -1105759439, 1039138956, 998321428, 1018598993) + + W(2, -1116818777, -1114897304, -1105398904, -1145690340) + + W(3, -1134067290, 987604943, 1007335842, -1102588486) + + W(4, -1108283066, 1018314989, 1024466364, -1102834025) + + W(5, 1061492513, 1062722215, -1098749835, 1025712340) + + W(6, 1008723794, -1131349085, -1121214551, -1107691916) + + W(7, -1108418109, 1022467761, -1119932526, -1118877643) + + W(8, -1116983417, -1117176868, 1041629273, -1121476700) + + W(9, 1028368392, 1031347596, -1114752830, 1009572226) + + W(10, -1103951670, 1033232162, -1132382549, -1113750023) + + W(11, -1103912225, 1035794880, 1046707789, -1110829011); + WS(1015535328, -1084984071); + sum1 = W(0, -1114800552, -1108603860, -1099862310, 1058689409) + + W(1, -1114937124, -1119809253, 1017574081, -1120445077) + + W(2, -1098244185, 1050368026, -1113854018, -1119794874) + + W(3, -1121052395, -1120661414, -1094435959, 1059588503) + + W(4, -1120887600, -1117398367, -1121642214, 1038434434) + + W(5, -1091868509, 1053265766, 1038069402, -1124845785) + + W(6, 1031943978, 1004694578, -1100742889, 1053762387) + W(7, 1032168457, -1126829041, -1119258038, 983354644) + + W(8, -1090330210, 1049777811, 1033335786, -1124490374) + + W(9, 1015340962, -1131668588, -1106546547, 1042220181) + + W(10, -1112611163, -1122991451, -1152091148, -1117988144) + + W(11, -1096238847, 1044693386, 1033654385, 1025017029); + sum2 = W(0, -1113763234, -1123863865, -1152383745, -1153318001) + + W(1, 1032880554, -1112434267, -1113925793, -1117743388) + + W(2, 1049718598, -1115051721, -1108241407, 1017487533) + + W(3, -1117079348, 1031541130, 1054034272, -1111062529) + + W(4, 1025815300, -1115045873, 1016562523, 1034144457) + W(5, 1045174355, 1029337732, 1040219109, -1115252534) + + W(6, -1106152769, -1108434934, 1019958281, -1107848604) + + W(7, -1122950542, -1112234054, 1021767953, 1028245712) + + W(8, 1048832333, 1036566626, 1032317223, -1144290245) + + W(9, -1110489214, -1107854239, 1037079656, -1106439039) + + W(10, -1114917041, -1111099643, -1118706081, -1131124226) + + W(11, 1041167636, -1100833945, 1023645948, -1123196065); + WS(1058837943, 1069665989); + sum1 = W(0, 962159770, -1123278397, 1059574623, -1100091288) + + W(1, -1106975399, 1026912571, -1131526763, -1133106088) + + W(2, 1052414436, -1100255560, 1034080493, 1026538575) + + W(3, -1123587520, -1111839826, 1056026369, -1090882007) + + W(4, 1026032901, -1128992103, 1016761717, -1119033369) + + W(5, 1050778647, -1089604336, 1030312842, -1142474253) + + W(6, -1116652202, -1112323118, 1057288447, -1090318286) + + W(7, -1115237492, -1126485147, 1020359383, -1123741278) + + W(8, 1051933055, -1098258189, -1123145400, 1024876312) + + W(9, -1124916236, -1111587013, 1049400292, -1098860821) + + W(10, 1023272149, -1130617057, -1120750038, 1031183749) + + W(11, 1050858601, -1113801599, -1102573630, -1147252941); + sum2 = + W(0, 1031825723, 1031898082, -1068962638, -1097279430) + W(1, -1106804175, 1033946762, -1121672345, -1123032728) + + W(2, -1070958719, 1042495248, 1004890268, 1015278793) + W(3, -1128689519, -1149677000, 1062859919, 1042236169) + + W(4, 1033261211, -1125393239, 1003655756, 1042326937) + W(5, 1079716701, -1097154173, 1047991146, -1129356209) + + W(6, -1112729716, 1016978721, 1069238165, 1043003442) + W(7, 1029430153, -1130449529, 1021714543, -1135526098) + + W(8, -1106783417, -1101596383, 1006662138, -1143028884) + + W(9, -1143241256, 1017362221, 1038638730, -1114855682) + + W(10, 1016716693, 1021676019, 1015056847, -1115756346) + + W(11, 1030095799, 1029691669, -1116617969, 1007567766); + WS(1027937136, 1051433372); + sum1 = W(0, 1024223452, 1027262836, -1095583884, 1036904555) + W(1, 1037383306, 1026871330, -1131494490, 1034970897) + + W(2, -1102510757, 1043230726, -1126475809, -1131579916) + + W(3, 1034208027, 1041574867, -1088128354, 1043513377) + W(4, 1038022750, 1022249046, -1113660825, 1051529138) + + W(5, -1082034838, -1088109312, 1049740992, -1121104697) + + W(6, 1026687777, 1043759333, -1107709564, 1040837474) + W(7, 1043020088, 1030621125, 1007645322, 1035492122) + + W(8, 1021392868, 1023668834, 1040579632, -1128113439) + W(9, 1019229306, 1025216066, -1116281753, 1033050768) + + W(10, 968839375, 1026071912, 1023660333, 1033175550) + W(11, 1019915650, 1041627229, 1040383308, 1024084817); + sum2 = W(0, -1122636491, 1019704225, 1049771287, -1108218455) + + W(1, 1003686724, 1007985666, -1114076695, 1037288383) + W(2, 1032297878, 1029534785, -1114871835, 1019192761) + + W(3, -1122337791, 1038646185, 1050229287, 1038576544) + + W(4, 1026526587, -1146139348, -1121313911, -1113812515) + + W(5, 1058219066, 1058416091, -1129529893, 1021492965) + + W(6, 1019616565, -1115379683, -1126947029, -1096957025) + + W(7, 1020162449, -1131201101, 1032037049, -1144905268) + + W(8, -1089173556, 1029636273, 1011141050, -1129337357) + + W(9, -1121846167, -1124810757, -1109798125, -1112204367) + + W(10, 1015768281, -1117257463, 1035628188, -1104091459) + + W(11, -1085784913, 1039375301, -1169657505, 1026702945); + WS(-1087300279, 1060388257); + sum1 = + W(0, -1129462823, 1023690092, -1095623283, -1106106692) + W(1, 1037669414, -1130365587, 1031115364, 1041026294) + + W(2, -1092905754, 1045171988, -1113959460, -1127843579) + + W(3, -1114720439, 1027269348, -1093094418, 1058204318) + + W(4, -1142625182, -1122973026, 1025637865, 1022155151) + W(5, -1092295181, 1060821043, 1044954958, 1027810288) + + W(6, -1150727557, 1032140389, -1091295733, 1047241920) + W(7, 1031904715, 1008074737, 1019407412, 1019365849) + + W(8, -1094422277, 1051757075, 1022315054, 1005685302) + W(9, 1005184420, 1035913085, -1097873082, 1047453654) + + W(10, -1121754201, 1011776124, -1144567588, -1142073248) + + W(11, -1089577936, 1052799910, 1027854342, 1023937224); + sum2 = W(0, -1120594034, -1166872476, 1040489823, -1102509951) + + W(1, -1086857984, 1045679794, -1141459412, 1004777140) + + W(2, -1122900789, -1110936244, -1095818924, -1104593759) + + W(3, -1136839806, -1113289360, 1021539093, 1067324619) + + W(4, -1093760272, -1110068744, 1028313984, 1013932442) + + W(5, -1095750682, 1045560576, 1065644346, -1103109579) + + W(6, -1127607807, 1019995585, 1045364618, -1096904945) + + W(7, -1123950010, 1034732462, -1122936307, -1124945717) + + W(8, 1031026044, -1100965201, 1042979309, 1018809021) + W(9, 1007613018, -1145964804, 1033830372, 1000232964) + + W(10, -1121148085, -1152320711, -1134584398, -1157638606) + + W(11, -1125456395, -1108078168, 1026162850, 980148252); + WS(-1090838638, 1050174926); + sum1 = + W(0, 1024715784, -1109115088, -1106716164, 1038102082) + W(1, -1115525408, 1016756989, 1033853293, 1049593122) + + W(2, -1120751425, 1051782073, -1136454131, 1035169257) + W(3, -1106406969, 994296459, -1094496661, 1043814476) + + W(4, -1105105482, -1108133484, 1042483832, 1045373935) + W(5, -1086184702, 1058236139, 1033520471, 1042591993) + + W(6, 1025782733, 1015694078, -1092092861, 1045378621) + + W(7, 1029286795, -1140252101, -1138965820, -1112080289) + + W(8, -1095245812, 1042079876, -1106373991, -1115290814) + + W(9, 1038488261, 1045351388, -1100690449, 1050438570) + + W(10, 1034575399, 1041627941, -1139164262, -1114987669) + + W(11, -1094326222, 1040938025, -1122199803, 1001134058); + sum2 = + W(0, 1051544715, -1087153288, -1090874159, 1047568122) + W(1, -1111412903, 1034895049, 1047565246, -1111682644) + + W(2, -1099176408, 1032035298, 1040133461, -1131191613) + + W(3, 1046232208, -1081925443, -1105576760, -1119008010) + + W(4, -1140028602, -1112144320, -1105592564, 1060283907) + + W(5, 1048906709, 1036693685, -1107739759, 1040322447) + + W(6, -1094324661, 1062548140, -1112367085, -1119206793) + + W(7, 1044620924, -1130045765, -1105924841, -1127110112) + + W(8, 1049091960, -1158298480, -1096403637, 1011461326) + W(9, 1021200095, 1051334754, -1107585156, 1036759049) + + W(10, 1046459856, 1012649582, -1103932050, 1033462267) + + W(11, 1048824237, -1114785543, -1114351857, -1111955333); + WS(1039738296, 1044009556); + sum1 = + W(0, 1031992376, -1113116036, 1058890327, -1088719167) + W(1, 1040542090, -1107110028, -1118478209, -1126664684) + + W(2, 1042459434, -1107130859, -1129453002, 1036863629) + W(3, 1036413734, 1028656912, 1054229481, -1088309806) + + W(4, 1042227988, -1133301245, 1042319122, -1095949365) + + W(5, 1057173004, -1090115281, -1127816747, -1121635021) + + W(6, 1025321013, 1046101634, 1048940614, -1090901446) + W(7, 1048439133, -1120611033, 1019779694, -1104617834) + + W(8, 1052520321, -1096019759, -1119199044, 1022364232) + + W(9, -1126468451, -1111694091, 1047137387, -1098195015) + + W(10, 1036380619, -1108092830, 1028741314, 1029160593) + + W(11, 1057385245, -1090793697, 1040359505, -1129794251); + sum2 = + W(0, -1088996856, 1009251693, 1062110924, -1115283476) + W(1, 1030923639, 1034262626, -1089163817, -1107842867) + + W(2, 1057525018, 1042234988, 1032774718, -1123202523) + + W(3, -1081345153, -1124806950, 1064189433, -1104109471) + + W(4, -1126776930, -1130902850, -1080285142, 1052635731) + + W(5, 1067630395, -1112028150, 1034769026, 1036915346) + + W(6, -1079794071, -1106464080, 1063058829, -1113992476) + + W(7, -1106165885, -1110986466, -1089981167, -1125456570) + + W(8, 1062842970, 1045262592, -1136163037, 1035294670) + W(9, -1091438196, -1115606726, 1054193960, 1042247754) + + W(10, 1032379012, -1135519157, -1099490886, -1106662163) + + W(11, 1053790298, -1104249669, -1108359351, 1024576891); + WS(-1096394862, 1050867114); + sum1 = W(0, 975777376, 1011677768, 1050217928, -1095631255) + W(1, -1105375964, 1033079080, 1035680965, -1097606072) + + W(2, 1041761501, -1108785275, 1035556439, -1107649602) + + W(3, 1017323049, 1017467204, 1059053443, -1088500104) + + W(4, 1042635014, 1020084753, -1164976624, -1122057965) + + W(5, -1098739346, -1111108333, 1025006046, -1112750027) + + W(6, -1126109949, 1039142936, -1089683010, 1062642602) + + W(7, 1038036638, -1142614292, -1117859264, -1137149130) + + W(8, -1104430516, 1040456976, 1027021261, 1017623258) + W(9, 1011370603, 1023416416, -1115445691, 1016594173) + + W(10, 1041295544, -1113983747, -1122131819, -1112401517) + + W(11, 1042596520, -1120553683, 1029575724, -1124248029); + sum2 = + W(0, -1131210775, 1029829507, 1018275001, -1122984086) + W(1, -1096338526, 1033255054, 1024840523, -1108448221) + + W(2, -1117395779, -1132016729, -1114466007, -1103146109) + + W(3, 1024067225, 1046565608, 1042461338, 1059140985) + W(4, -1107995649, -1119288636, 1022860453, -1107856867) + + W(5, -1092962046, -1089805596, -1166014069, -1099911282) + + W(6, -1125865258, 1042031083, 1053212914, 1058514952) + + W(7, 1052927486, -1113255745, -1130239171, -1106481128) + + W(8, -1102214835, 1027704725, 1041713274, -1106771914) + + W(9, 1024964869, -1127390051, -1114692565, -1118917379) + + W(10, 1037010892, -1111017176, -1117571915, -1124576676) + + W(11, 991892650, 1032905776, 1032336370, 1000523989); + WS(-1112511928, 1031640207); + sum1 = + W(0, 1037301926, -1102373753, 1058068053, -1091912761) + W(1, 1030698115, -1112730073, -1130112569, 1048837234) + + W(2, 1051625169, -1095738529, 1045615952, 1045904421) + + W(3, -1119711398, -1091456672, 1054040787, -1101742756) + + W(4, -1104227307, -1098654019, 1044751592, 1045582560) + + W(5, 1044375881, -1090186767, 1051640030, -1111879892) + + W(6, -1102628100, -1119033858, 1057193534, -1090190311) + + W(7, -1111999463, -1123401908, 1042267541, -1093436941) + + W(8, 1051238780, -1110077161, 1008998972, -1101737376) + W(9, 1021159833, 1050238014, 1045936610, -1098844722) + + W(10, 1050341641, 1043894710, -1122521821, -1100593739) + + W(11, 1058222955, -1090967581, -1108465231, -1112695488); + sum2 = + W(0, 1016218439, 1024549194, -1121292007, 1037076368) + W(1, -1112700443, 1032984152, 1009444234, -1117974589) + + W(2, -1111737758, -1110249739, 1020335555, -1112172263) + + W(3, -1111008440, 1042315856, 1047393563, -1086831522) + W(4, 1038599637, 1060801772, 1026480303, -1106327815) + + W(5, -1089871257, -1069929292, -1111547810, 1079084175) + + W(6, 1015374231, 1040022175, -1115783084, -1085683537) + W(7, 1049254465, 1057876572, -1115006890, 1025059842) + + W(8, 1043300713, 1033002057, -1118960414, 1033693800) + W(9, 1027237715, -1110870197, -1103999090, 1033912191) + + W(10, -1118358651, -1104475663, -1122692269, 1033099318) + + W(11, 1022298131, 1032775289, -1104218510, 1045399536); + WS(-1106120924, 1033431669); + sum1 = W(0, 1016553159, 1035990878, -1101729723, 1042266392) + W(1, 1024064284, 1018305971, -1146851238, 1041817850) + + W(2, 1024013341, 1027850273, 1026394008, 1021653787) + W(3, 1024375743, 1041767260, -1116899488, 977256084) + + W(4, 990294084, 996614956, 1026515514, 1047579686) + W(5, -1085522812, -1090381288, 1048833585, -1125739905) + + W(6, 1005429062, 1031819293, -1090064383, 1048109660) + W(7, 1038017843, 1016883609, 1029812010, 1026577053) + + W(8, -1118087802, -1113190325, 1040657490, -1121801815) + + W(9, 1008887700, 1014785832, -1126747713, 1024173993) + W(10, 1020235411, 1027952623, 1021177962, 1011005202) + + W(11, 1025078601, -1127218683, 1041410670, -1132785763); + sum2 = + W(0, 1032555846, -1110721648, -1094136567, 1030929664) + W(1, -1146526593, 1018020888, 1019641904, -1116342888) + + W(2, -1089264280, -1112511004, 1032788860, -1125819008) + + W(3, 1015684488, -1102693078, -1097199465, 1046449598) + W(4, 1012812816, 1004426241, -1123545328, 1027573888) + + W(5, 1056898914, 1052843165, 1033507644, -1122415072) + W(6, -1118308408, 1043950970, 1037769558, 1024674316) + + W(7, -1106559340, 1028547840, -1117707424, 1021888896) + W(8, 1041236542, -1109778580, 1017687520, 996318210) + + W(9, -1137876352, 1007632640, 1026403708, -1123597216) + + W(10, 1028523456, -1149117889, -1152988290, 1023916088) + + W(11, 1046276806, -1105717038, -1122312832, -1132920416); + WS(1034973624, 1056792353); + sum1 = W(0, -1120658336, -1130821617, -1114903379, -1130597942) + + W(1, -1109039532, -1124529164, -1121055921, -1114464179) + + W(2, -1125024178, -1102939831, -1115164458, -1124681556) + + W(3, -1128295926, -1106629009, 1008426736, -1118485143) + + W(4, -1104109440, -1138254780, 1021962732, -1093463666) + + W(5, 1062068439, 1067744046, -1094141737, 993935312) + W(6, -1127490506, -1097245927, 1050479246, 1049107361) + + W(7, -1105858697, -1132704804, -1119904327, -1132912612) + + W(8, -1107542555, 1045213129, -1119937620, -1120970067) + + W(9, -1122942113, -1118132038, 1028006365, -1113875250) + + W(10, -1141854766, -1122550700, -1119710033, -1115274080) + + W(11, 1025872901, 1036565277, -1121170098, -1120520661); + sum2 = W(0, 1020401308, -1120501349, 1037264768, -1090330474) + W(1, -1105802125, 1023925581, 1017661559, 996214904) + + W(2, -1116129767, -1084580575, 1032645479, -1119976615) + + W(3, -1129375401, -1140432422, 1035443152, -1097846115) + + W(4, -1102145888, 1025111856, -1130735541, 1024553914) + + W(5, 1051779064, 1064606263, 1038402436, -1136822486) + W(6, -1146688164, 1006774606, 1039827062, 1048687051) + + W(7, 1000828364, -1121332237, -1166762145, -1124013507) + + W(8, 1025600009, 1019182043, 1025151371, 1015612208) + + W(9, -1130185789, 1033491675, -1114545658, -1117487855) + + W(10, 1028059566, -1119338761, -1137125822, -1117903931) + + W(11, 1032817572, 1037037513, -1112238814, 999461036); + WS(-1082445367, -1085006700); + sum1 = + W(0, -1131835086, 1041974800, -1081109875, 1054173212) + W(1, 1035072463, -1137431795, 1034878908, 1043293403) + + W(2, -1092276724, 1051286356, 1034209806, 1038048136) + W(3, -1106661648, 1042063924, -1086386347, 1055927462) + + W(4, -1114712644, -1110863483, 1037617118, 1045196135) + W(5, -1094524616, 1058619216, 1036390662, 1042877649) + + W(6, -1121819052, 1037212703, -1094507255, 1050138098) + + W(7, 1015171028, -1123146704, 1022066011, -1105403448) + + W(8, -1104988703, 1028915053, -1139447658, -1114994477) + + W(9, 1036207796, 1046411795, -1105674409, 1041419787) + W(10, 1026111277, 1040360493, 1033542412, -1100599618) + + W(11, -1096619449, -1130824962, 1045523870, -1121943708); + sum2 = W(0, -1112963339, 1043137200, 1014823768, -1089890678) + + W(1, 1022229356, -1128614108, 1025676326, 1000796272) + W(2, 1031859059, -1102855534, 1031913791, 1018516204) + + W(3, -1117465202, 993751520, 1020211292, 1018575516) + + W(4, -1120060570, -1138611816, 1036266015, -1098528956) + + W(5, 1055374057, 1061198225, -1098119563, 1020738140) + W(6, -1124864900, 1032573075, 986723264, 1038055959) + + W(7, -1106568385, -1132953832, 1019242508, -1097473255) + + W(8, -1125510524, 1050136287, -1098582022, -1123833514) + + W(9, 1017632828, 1049454991, -1111044051, -1109681619) + + W(10, 1030233630, 1035860627, 1039721851, -1090503899) + + W(11, 1007293144, 1054985311, -1101215648, -1110059149); + WS(-1089141943, 1068474134); + sum1 = + W(0, 1024130588, -1121972404, 1032495505, 1036801227) + W(1, -1117579092, -1114425265, -1101143803, 1041396360) + + W(2, 1045416789, -1094238571, 1039470024, 1024684838) + W(3, 1039148539, -1105946681, -1106461946, 1057861426) + + W(4, -1113046718, -1108918837, -1124204515, 1052602058) + + W(5, -1094942270, -1098420551, 1047303536, -1127532529) + + W(6, 1026773217, -1091638882, 1059099726, 1040764762) + W(7, -1094617668, 1027200645, 1018218693, 1032280890) + + W(8, -1106993164, 1033290001, 1040300696, -1103088708) + + W(9, -1104179471, 1041946144, 1045088278, -1105244536) + + W(10, -1110195745, 1043167338, -1123573325, -1138642417) + + W(11, 1009482242, 1037819035, -1108347351, -1116739793); + sum2 = W(0, 1016644168, -1134771145, -1129326838, -1114142714) + + W(1, 1012771369, 1025799488, 998892514, -1114282184) + W(2, 1049635340, -1101404122, -1114952606, 1041138771) + + W(3, -1116049462, 1042383274, -1147070546, -1095734150) + + W(4, 1057697290, -1095991584, 1033248586, 1025008594) + + W(5, -1102476512, 1060588986, -1111850310, 1027653952) + + W(6, -1110140123, -1104878488, -1090718582, 1052949499) + + W(7, -1096580248, -1127102864, 1041609896, -1101611481) + + W(8, 1046973498, -1143476482, -1114336646, 1035005082) + + W(9, -1103095555, 1048842205, -1115539252, -1110997031) + + W(10, 1031782132, 992902692, 1030835892, -1122288558) + + W(11, 1028769716, 1022411808, -1118966953, -1120028296); + WS(1055908206, 1023803300); + sum1 = + W(0, 1033033649, -1114766205, 1059767687, -1093070812) + W(1, -1106762950, -1139512861, -1128828388, 1011459483) + + W(2, 1054454099, -1094569204, 1029622486, 1016172182) + W(3, 1027156540, -1106069078, 1057986836, -1086765600) + + W(4, 991938588, 1027600816, 1024886583, -1116675477) + W(5, 1059734148, -1090099632, -1098865153, 1027525694) + + W(6, -1122498726, -1116893112, 1056528229, -1094363024) + + W(7, -1101593593, -1135964969, 1032744244, 1045609495) + + W(8, 1043829406, -1102732570, -1139103892, 1036132330) + + W(9, -1114524705, -1113403236, 1047712168, -1103438026) + + W(10, 1022039246, -1117700205, 1005980052, 1049123968) + + W(11, 1049516377, -1098493775, -1097177334, 1035501771); + sum2 = + W(0, 1025334517, 1052224655, -1085507535, -1083097675) + W(1, -1097828858, 1062578782, -1115143239, 1029199567) + + W(2, -1092254017, -1079915962, -1092877982, 1029921983) + + W(3, -1112308287, 1050599738, -1091835304, -1078367265) + + W(4, -1101001669, 1036651638, -1115511381, -1137182696) + + W(5, 1050309385, -1085955474, 1031547399, 1017725718) + W(6, 1042864237, -1101494467, 1052849573, 1077549274) + + W(7, 974314816, -1103762806, 1031444191, -1094150391) + W(8, 1058681981, 1068550083, -1100178328, -1106558945) + + W(9, 1040795754, -1116631442, -1093246396, 1054678659) + + W(10, 1046838481, 1039431488, -1111573417, -1092627938) + + W(11, 1066882777, 1059124396, -1088438520, -1098096651); + WS(-1078207964, -1098790270); + sum1 = + W(0, -1117816013, 1017293260, 1049047675, -1100878354) + W(1, 1032784131, -1126192007, 1039615439, -1121106131) + + W(2, -1100078274, 1049964833, -1107321921, -1122801847) + + W(3, 1040159636, -1098516348, 1057717203, -1094050686) + W(4, 1040450075, 1034337038, 1032454162, -1100564368) + + W(5, 1041647209, -1119951725, -1099964381, 1014677674) + + W(6, 1029735698, -1142189601, -1090236946, 1052197359) + + W(7, -1116032848, 1028031798, 1026856980, -1115831559) + W(8, 1041839801, -1110599850, 1034137049, 1027844545) + + W(9, -1128361629, -1129405415, -1114821509, 1009705546) + + W(10, 1036993018, -1121696025, 1002339317, -1114960439) + + W(11, 1034781465, 1020602063, 993360895, -1130484229); + sum2 = + W(0, 1025029135, -1104367710, -1119205162, 1051361304) + W(1, -1097951274, 1037239658, -1115606678, -1132455965) + + W(2, 1051294613, -1090262380, 1046775583, 1027534783) + + W(3, -1098891737, 1054704406, -1118431701, -1094584814) + + W(4, -1111734067, -1137589907, -1111864983, 1035843132) + + W(5, 1040638061, 1042639742, -1115582191, -1161598998) + W(6, -1112278329, 1038310704, 1033551568, 1042180969) + + W(7, -1116801751, -1113612863, 1017169741, 1037432870) + + W(8, -1095570314, 1054301740, -1102111461, -1137659371) + + W(9, -1123958951, -1137648567, 1040693441, 1006683771) + + W(10, -1108749213, 1017443081, 1031819582, -1122384617) + + W(11, 1008777251, 1032304440, -1103720876, 1038413214); + WS(1060561207, -1110135205); + sum1 = + W(0, -1114856118, 1049197464, -1091745710, 1057921518) + W(1, 1045672489, 1021270499, -1124131449, -1108658488) + + W(2, -1095755605, 1045522218, -1100746808, -1112929307) + + W(3, -1121034566, 1041962623, -1091090769, 1055093690) + + W(4, 1019645080, -1157317541, -1112340961, 1047635080) + + W(5, -1089785676, 1056738433, -1111234056, 1036938084) + + W(6, -1112123800, 1029521634, -1088312300, 1057086969) + + W(7, -1095870350, -1148757666, 1034679769, 1045001085) + W(8, -1105390912, 1048793054, 1047514180, 1026507103) + + W(9, -1115322038, 1019494819, -1095959893, 1049205462) + + W(10, -1099214252, 1019330670, -1107671902, 1038785126) + + W(11, -1089779917, 1057170552, 1022895484, 1029094240); + sum2 = + W(0, -1110883135, 1041894205, -1114700148, -1082032478) + W(1, 1046875162, 1064081845, -1122184691, -1115970606) + + W(2, -1118217955, -1081043792, 1049320576, 1059051295) + W(3, 1008834310, 982318965, -1097954058, -1085641610) + + W(4, 982336213, 1064706681, 1033395903, 1040255170) + W(5, -1109327580, -1095688614, -1107088758, 1066526360) + + W(6, -1110716892, -1134396672, -1107386491, -1084362127) + + W(7, 1032307660, 1060847547, 1024095030, 1031402202) + W(8, -1114148625, -1093205100, -1122373488, 1058657081) + + W(9, -1113505190, -1121857157, -1116084956, -1087863650) + + W(10, 1045820748, 1055302318, -1113542640, 1031923695) + + W(11, -1128842340, -1094939475, 1044473869, 1041403755); + WS(-1088848183, -1091270356); + sum1 = W(0, -1113444636, 1022583331, -1092936920, 1050793382) + + W(1, 1035173048, -1115063927, -1127291412, 1011517178) + + W(2, -1094397598, 1055228191, -1112816343, 1007040780) + + W(3, -1131637032, 1047190142, -1092945549, 1056457416) + + W(4, 1039723405, -1118782984, -1105736260, 1040877723) + + W(5, -1086200137, 1051083611, 1039289519, -1114815627) + + W(6, 1030983166, 1035565944, -1102025475, 1056969730) + W(7, 1015654008, 998545680, -1127378047, -1097520394) + + W(8, -1104020948, 1051399464, -1108815670, -1127143385) + + W(9, 1037357901, -1114602800, -1116833526, 1044492479) + + W(10, -1116817631, 1025703109, -1115666437, -1108205485) + + W(11, -1102830535, 1047811509, 1032741190, -1123886254); + sum2 = + W(0, 1023906737, 1067295699, -1081442287, -1097077469) + W(1, 1029195957, 1026431960, -1110646223, 1065087692) + + W(2, -1085042817, 1037915281, 1015927544, -1132623721) + + W(3, -1119411664, 1065459703, -1083251842, -1096872835) + + W(4, 1018101230, 1018762696, 1010017751, -1118952488) + W(5, 1050008558, -1110920551, 1042598936, -1124571002) + + W(6, -1108429461, -1081475488, 1064700095, 1006981249) + + W(7, -1103158035, 1020104602, 1029589423, -1083105366) + W(8, 1064469883, 1036080906, 1041992018, -1114107558) + + W(9, -1110439418, -1095503285, 1057972141, -1106015672) + + W(10, -1108943564, 1036691137, -1125097343, -1083389324) + + W(11, 1061045473, 1051761632, -1114827780, -1119006739); + WS(1034259896, 1039623341); + sum1 = W(0, 1016878676, -1137539339, 1055868742, -1088089066) + W(1, 1043298073, 984347141, 1028460795, -1104998988) + + W(2, 1047938201, -1091378989, 1036240559, -1121755284) + + W(3, 1034359235, -1129847248, 1058114664, -1090213650) + W(4, 1043122195, 932956556, 1011385345, 1015052988) + + W(5, 1043619008, -1114958501, -1119276751, 1018241806) + + W(6, 1015855070, 1032818160, -1096687428, 1054476715) + W(7, -1110579607, 1026808723, 1016831138, 1044006969) + + W(8, -1092418872, 1049053750, -1101201217, 1034235663) + + W(9, -1128588247, 1033445320, -1100196186, 1040121848) + + W(10, -1106781444, 1010316760, 1025908283, 1043524531) + + W(11, -1095896370, 1044746961, 1018213241, -1120101215); + sum2 = + W(0, 951810592, -1108037836, -1092018666, -1130209970) + W(1, 1022643242, 1009312276, 1033659165, -1102042508) + + W(2, -1107310314, -1115641842, -1110096702, 1032067593) + + W(3, 1002259560, -1103529440, -1101738099, 1041635941) + + W(4, 1034513697, -1114108683, 1037243759, -1102191435) + W(5, 1066447662, 1067062587, -1100161415, 1041326077) + + W(6, -1112310958, -1111056455, 1032823775, -1098661250) + + W(7, 1027860693, -1120382671, 1034753865, -1119026649) + + W(8, 1031795157, -1099014553, -1096684817, 1026651225) + W(9, 992340497, -1101647251, 1040899047, -1117726225) + + W(10, -1114962434, 1038338139, -1114454490, 1024637657) + + W(11, 1046257437, -1111013301, -1112982645, -1106704138); + WS(-1096556910, 1067967914); + sum1 = W(0, 1032707155, -1122886335, -1095119782, 1055235700) + + W(1, -1120012638, 1031514684, 1024354487, 1031668012) + + W(2, -1103532312, 1048353625, -1105977207, -1126996321) + + W(3, -1131915011, 1021347526, -1095435756, 1044387963) + + W(4, -1105830818, -1116416117, 1043480447, 1041914807) + + W(5, -1090836352, 1053060297, -1133082414, 1043128001) + + W(6, -1118942187, 1036630543, -1091218783, 1041581564) + + W(7, 1017318739, -1110350280, -1132633269, 1034774528) + + W(8, -1104917967, 1052242886, -1121967338, 1039162573) + + W(9, 1031302952, -1125222297, -1100801113, 1033613430) + + W(10, -1130484760, -1148985233, 1031513592, -1106257250) + + W(11, -1104067043, 1050269976, -1109204860, 1034839630); + sum2 = + W(0, -1125934657, 1041393774, -1097497496, 1047775001) + W(1, -1131783044, -1122059316, -1120747465, 1032139854) + + W(2, -1114660789, -1105604596, 1018330620, -1129241452) + + W(3, -1126946517, -1107151652, -1096477783, 1048515105) + + W(4, -1096755902, -1147658728, 1019390634, 1046075673) + W(5, 1057927405, 1059160295, 1041946221, 1011994599) + + W(6, -1140291646, -1090083756, -1110210473, 1043731078) + + W(7, -1091797353, 1032850220, 1045993569, 1024325836) + W(8, -1137735680, 1039621515, 1046026631, -1126091098) + + W(9, -1110717499, -1116058677, 1033250412, -1116506490) + + W(10, -1106745231, 1009337229, 1038335355, -1097350707) + + W(11, -1127153776, 987432265, -1124643180, -1124186068); + WS(1061843767, -1077951557); + sum1 = + W(0, 1026388735, -1106366502, 1059311576, -1088217072) + W(1, 1041378862, -1102542817, -1120114128, -1131166601) + + W(2, 1043697070, -1105977039, -1130669927, 1039925540) + W(3, 1037862893, 1029755951, 1054385928, -1091419383) + + W(4, 1047467576, 989452730, 1029309653, -1091391038) + W(5, 1057568381, -1089136642, -1111164306, -1105668215) + + W(6, 1036389538, 1048316317, 1054220568, -1092475601) + + W(7, 1053509792, -1138232735, -1122000006, -1099454574) + + W(8, 1049723715, -1095416897, -1108260705, -1130981221) + + W(9, -1126306304, -1106178153, 1050234615, -1096877448) + + W(10, 1038585474, -1107228323, 1030215888, 1034019808) + + W(11, 1058416302, -1091809363, 1045917860, -1116719540); + sum2 = + W(0, 1061533858, 1041546460, -1081881922, -1104625991) + W(1, 1020257203, -1112819836, 1062451832, 1044663473) + + W(2, -1084977950, -1116240264, -1107478927, -1146254682) + + W(3, 1068272760, 1019905335, -1081565785, -1115490290) + W(4, 1038097782, 1018218872, 1070735043, -1103665132) + + W(5, -1079025547, -1094934042, -1129735749, -1108448519) + + W(6, 1070929067, 1027882908, -1082597652, 1002519540) + W(7, 1043320870, 1032555484, 1058522058, 1053579543) + + W(8, -1079765468, -1096683616, 997759194, -1111378699) + + W(9, 1059576348, 1030217484, -1088759123, -1103272320) + + W(10, -1110736144, 1018278443, 1052624489, 1042937819) + + W(11, -1090364693, 1039524167, 1041293151, -1120119454); + WS(-1084224055, -1088316584); + sum1 = + W(0, -1113613001, -1107673717, 1006747746, 1039555654) + W(1, 1033838821, -1159624430, 1003388692, 1028109079) + + W(2, -1098426505, 1048291572, 1024741190, -1124153911) + + W(3, -1106869627, 1012698375, -1088923256, 1059475506) + W(4, -1120760050, 1033833840, 1016662735, 1027116933) + + W(5, -1097246285, 1054502102, 1019707757, 1010228666) + W(6, 1022316973, -1094644889, 1057877820, -1093203288) + + W(7, 1048977331, -1114819581, -1136760929, 1023495179) + + W(8, 1033746004, -1098003683, 1036081339, -1110624834) + + W(9, 1028746387, -1107590588, 1041863252, -1105649846) + W(10, 1041797102, -1119454813, 984919127, 1024116594) + + W(11, 1022736621, -1114921814, -1127589064, -1122201606); + sum2 = W(0, 1015245257, -1113301472, -1126889814, -1122805777) + + W(1, 1024783162, -1125306444, -1113948506, -1150409563) + + W(2, 1027745966, -1121614859, 1024732276, -1120982285) + + W(3, -1130594159, -1113241340, 1053517180, 1035094088) + W(4, 1035927962, -1117285071, 1007533655, 988101766) + + W(5, -1115083078, -1117590123, 1033594898, -1115586407) + + W(6, -1122336092, -1114338752, -1120298083, 1041346715) + + W(7, -1136757529, -1179946326, -1131246557, 1042344950) + + W(8, 1009652067, 1010321235, -1112303516, -1122655190) + + W(9, -1121042747, 1024929700, 1023796132, 1022595647) + + W(10, -1107129305, 1031780006, -1112003767, 1034588627) + + W(11, 1015099871, 1021357977, -1116844702, -1109817399); + WS(1056759150, 1060224665); + sum1 = W(0, -1139374311, -1132085157, -1099925456, 1041263149) + + W(1, 1033127353, -1131329248, 995010733, 1023919167) + + W(2, -1105275165, -1143162806, 1020125661, -1128574472) + + W(3, -1111414410, 1047983919, -1089721802, 1045591848) + + W(4, 1047176279, -1124813330, -1108849608, 1035204919) + + W(5, 1047380996, 1054640011, -1109178223, -1115433611) + + W(6, 1028987391, 1044747100, -1111752399, -1086265361) + W(7, 1047077327, 1014274699, 1016981889, 1009635916) + + W(8, -1106656308, 1052567064, 1025513390, -1114033239) + + W(9, -1123115291, 1026679904, -1119632759, -1119813375) + + W(10, 1020940735, -1135589654, -1129401942, -1133052010) + + W(11, -1103728616, 1036660778, 1026259564, -1154487021); + sum2 = + W(0, 1023227060, -1114834111, -1123255684, -1111609489) + W(1, 1029177075, 1009084849, 1023565277, -1123243930) + + W(2, 1036838581, -1109001470, 1046335474, -1110362282) + + W(3, 1023373012, -1110717616, -1121973236, -1106028739) + + W(4, 1040383540, 1030364025, -1127246210, -1119831250) + + W(5, -1128787384, 1068968933, -1085459757, 1038001226) + W(6, 1035525242, 995075850, -1106791563, -1084327071) + + W(7, 1055427968, 1004844681, -1115895934, -1112704991) + + W(8, 1017209933, 1049794788, -1106438120, -1109625554) + + W(9, 1032252451, -1137853837, 1025363014, -1103092626) + + W(10, 1039281515, 1026824663, -1115664904, -1177558227) + + W(11, -1117762720, 1033559138, -1108939748, 1027051057); + WS(1059198391, -1114206899); + sum1 = + W(0, -1123230572, 1047908609, -1086695351, -1111534963) + W(1, 1041894153, -1168813170, 1033309054, -1113872057) + + W(2, 1005036838, -1127426130, 1035651728, 1029018614) + + W(3, -1136657179, -1131612168, 1053830093, -1097279138) + + W(4, -1114797926, 1015674230, 1042361008, 1032838544) + W(5, 1063074340, 1032841926, 1042266564, -1115081892) + + W(6, -1127872354, 1018582917, 1022976638, -1102974814) + W(7, 1036160183, 1022213546, 1026831155, 1025662981) + + W(8, 1048968528, -1086802103, 1043048448, -1119589535) + + W(9, 1034821095, -1122684754, 1037218004, -1094977286) + W(10, 1028546130, 1023015663, 1027013917, 1042918755) + + W(11, 1045668084, -1080167184, 1051954532, -1121567905); + sum2 = W(0, -1109294951, -1116747636, 1053044316, -1093594252) + + W(1, -1105874671, 1036768486, 1044771761, -1095594447) + + W(2, -1102224225, 1044254591, -1148207325, -1122217612) + + W(3, -1106240469, -1091993541, 1065361960, -1103264547) + + W(4, -1098859579, 1038496462, 1037653814, -1098134808) + + W(5, 1060397410, 1059446961, -1110887683, -1106764847) + + W(6, -1132138943, 1045429231, -1095097189, 1053539276) + + W(7, -1103270924, 1035231626, -1129034439, -1112675284) + + W(8, -1139768526, -1147713757, 1018090127, -1112984312) + + W(9, 1027920896, 1030264036, -1100471477, -1115698004) + + W(10, -1105560593, 1037287074, -1126749735, 1027709116) + + W(11, -1095674438, -1131922215, 1044415945, -1106415399); + WS(-1081905372, 1072993545); + sum1 = W(0, 1004609681, -1111485397, -1100618406, 1052555682) + + W(1, -1113666277, -1113131956, -1113931058, 1044870886) + + W(2, -1103601459, 1050421002, -1125853837, -1119475260) + + W(3, 1017897178, -1101161424, -1117978262, 1053397115) + + W(4, -1109207833, -1119057377, -1110350900, 1042868018) + + W(5, -1088871615, 1057434595, -1108715676, -1116054255) + + W(6, -1119842647, 1016958866, -1096217303, 1037835279) + W(7, 1041223333, 1033629886, 1029593625, 1036666966) + + W(8, -1090419947, 1054502387, -1131918060, 1019600139) + + W(9, -1114320538, 1034341084, -1105396084, 1036260396) + + W(10, 1046093262, 1034832815, 1026419555, -1123516147) + + W(11, -1100180286, 1041011339, -1108472674, 1025124935); + sum2 = + W(0, 1027207918, -1134820995, -1123507780, 1022927690) + W(1, -1148644129, -1124904791, -1131003531, 1019005665) + + W(2, 992028222, 1039006609, -1114458430, 1019252639) + W(3, -1155176792, 1018656479, 1020777698, -1108026125) + + W(4, 985006155, 1019665619, -1144704129, 1016837667) + + W(5, -1093100477, -1075529389, -1165905039, -1148497627) + + W(6, -1120277627, -1133755465, -1106617270, -1071234400) + + W(7, 1009164120, -1154476856, 1018692534, 1007330736) + W(8, 1026210395, -1098388857, 1080134310, 1041946375) + + W(9, -1122116985, 1008190855, 1015481059, 1043410538) + W(10, 1068769301, -1120913968, 1016313728, 1024160458) + + W(11, 1009441727, -1127804151, 1038162442, -1102924087); + WS(1052536174, -1151096569); + sum1 = W(0, 991116371, -1128173243, -1103836723, 1032573088) + W(1, 999976938, 1037991630, 1033302347, -1146822742) + + W(2, 1034951935, -1120898050, -1138114279, 1035558856) + + W(3, -1116642336, 1036050892, -1087917523, 1057405287) + + W(4, -1098834165, 1042054293, -1115536654, 1034768509) + + W(5, -1106200391, -1120590821, -1118223722, 986260550) + + W(6, 1020657940, -1102372039, 1061554222, -1082912644) + + W(7, 1047922098, -1130396508, -1114833734, 1030675614) + + W(8, 1041046871, 1002819162, -1107937520, 1023135729) + + W(9, 1031490598, -1114628406, 1046795201, -1124848916) + + W(10, 1038688870, -1142052488, -1158391502, 1032401267) + + W(11, 1027714602, -1124036191, -1118805976, -1177279769); + sum2 = W(0, 1035223119, 1038869307, -1108836158, -1105867416) + + W(1, -1106282608, 1028503999, -1127800761, 1034423710) + + W(2, -1116666352, -1109342419, 1047028407, -1119414296) + + W(3, 1038890181, -1109261585, -1087753137, 1027333841) + + W(4, -1148256348, -1113209005, 1028905699, -1095895125) + + W(5, 1061176216, 1057224086, -1148022324, 1010099974) + W(6, 994981383, -1098712671, -1099737664, 1049674522) + + W(7, -1103462952, 1020393479, -1127955051, 1032845262) + + W(8, -1208820627, 1011854042, 1043242553, -1115953239) + W(9, 998786660, 1030375171, -1126035264, 1017325847) + + W(10, -1121350765, 1024565993, -1121293280, -1114148994) + + W(11, 1042226462, -1120003540, -1117125009, 1001260628); + WS(1058902967, -1081860445); + sum1 = W(0, -1121371387, -1116017491, -1114517185, -1144385454) + + W(1, 1031405875, -1133995267, 1029881036, -1110722656) + + W(2, -1096281750, 1046612526, 1032371736, -1118231237) + + W(3, 992316765, -1103068528, -1110556335, 1050079932) + + W(4, -1114542190, 1037331941, -1117974353, -1120254383) + + W(5, -1087192661, 1061346066, -1113719056, -1117606232) + + W(6, 1019164800, -1111729689, -1138608617, 1054224317) + + W(7, -1147587566, 1027634660, -1115308289, -1123116948) + + W(8, -1119671953, 1023689473, -1121094047, -1114416951) + + W(9, 1031905998, -1110978129, 1026716344, 1034730016) + + W(10, 1032702160, -1150819433, -1118946572, 1027465948) + + W(11, 1026550240, 990814541, 1025765741, 1002408318); + sum2 = + W(0, -1127268449, 1017520494, 1046612660, -1104246964) + W(1, -1098833757, 1019779427, 1008157504, -1110533731) + + W(2, 1053115740, -1126879265, -1103206877, -1135658469) + + W(3, 1022079949, -1110799425, 1042237616, 1071620340) + W(4, 1047260740, -1112586418, -1129508015, 1038116680) + + W(5, -1081939523, -1082767624, 1046386206, 1021781126) + + W(6, 1023435994, -1149062134, 1046590815, -1091995420) + W(7, 1034566387, 1021597755, 987488696, -1109564641) + + W(8, 1019421448, -1133233221, -1113663072, 1025921212) + + W(9, -1149203238, 1031926637, 1025089506, -1113723868) + + W(10, -1131415815, 1022181144, 1025441916, -1140288129) + + W(11, 1002014090, 1008464055, -1120607589, -1133866981); + WS(-1118927728, -1114990634); + sum1 = + W(0, -1123938375, 1033195056, 1057535917, -1087859524) + W(1, -1107191040, 1047574307, 1040612950, -1114716774) + + W(2, 1047260544, -1132924433, 1007112943, -1106834116) + W(3, 1024746402, 1048819881, 1042772673, -1087499341) + + W(4, 1043648426, 1044210486, 1022214714, -1104426015) + W(5, 1057677234, -1097320317, -1093740782, 1028537764) + + W(6, 1038811590, -1105195831, 1045869257, -1091890215) + + W(7, 1042013107, -1106862316, -1112630087, 1042575344) + W(8, 1057533694, -1089715893, 1020754614, 1047539199) + + W(9, -1118622728, -1098438774, 1042859777, 1029276300) + + W(10, -1107815558, -1107232885, 1031232344, 1038434900) + + W(11, 1055588422, -1091230428, -1113991309, 1041569343); + sum2 = + W(0, -1117938617, -1098776947, 1052841626, -1111689798) + W(1, -1101990052, 1034353164, 1034943831, -1074997876) + + W(2, 1071950042, 1008522816, 1023979086, 1019125592) + W(3, 1047294563, -1067348911, 1078478928, 1042633900) + + W(4, -1106116215, 1021696573, -1112881764, -1073328658) + + W(5, 1075942353, -1105497105, 1040726051, 1032192788) + + W(6, -1113196518, -1106856879, 1048374094, -1100859969) + + W(7, -1128120408, -1129459590, 1017166837, 1007574096) + + W(8, -1110198115, 1027760421, 1041816709, -1114781060) + + W(9, -1119652380, 1040326435, 1016518417, -1106280857) + W(10, 1015116440, 983174391, -1122537978, 1003760381) + + W(11, -1132261857, -1146348222, 1015700123, 1019612376); + WS(1034111416, -1106797037); + sum1 = + W(0, -1127934684, -1113852291, -1096035023, 1052464285) + W(1, 1032415443, -1113420426, -1146675705, 1035613338) + + W(2, -1102929642, 1042719307, -1114220817, 1025128440) + W(3, 1006977829, 1034804131, -1090308252, 1054858571) + + W(4, 1032020487, -1114922591, -1113401378, 1037672145) + + W(5, -1089088820, 1057164897, -1123663635, -1136503085) + + W(6, 1019549798, 1044038057, -1096624416, 1050796251) + W(7, 1035431737, 1026704599, 1016064490, -1106829090) + + W(8, -1097224209, 1048920159, -1131720964, -1109868785) + + W(9, -1139061950, 1033066250, -1104516203, 1042039981) + W(10, -1114943722, 1024811962, 998716411, 995721454) + + W(11, -1095532026, 1049209393, 1041424208, -1138798477); + sum2 = + W(0, 1018373307, 1030944977, -1120549779, -1139755202) + W(1, -1101691927, 1049483493, -1117669473, 1035483586) + + W(2, -1113169149, -1114451942, -1102665013, 1049560407) + + W(3, 1036793172, -1120221588, 1031305133, 1047991436) + + W(4, -1083948126, 1057078402, -1107254646, -1138388666) + + W(5, -1118729068, 1050438576, -1083351573, 1060878230) + + W(6, 1027210469, -1119036598, -1123944089, 1058194810) + W(7, -1081291530, 1058504911, 983011887, 1026801669) + + W(8, -1136088418, -1103576008, -1106933299, 1044728114) + + W(9, -1124389554, -1120993070, 1045466087, 1037255439) + W(10, -1095248620, 1047223280, 999841656, 1036234905) + + W(11, -1110162376, -1114267729, -1124771645, 1036725012); + WS(1038841272, 1052605132); + sum1 = + W(0, 1031922765, -1100459309, 1037270288, 1026910357) + W(1, 1023447506, -1104596348, -1104568331, 1046914264) + + W(2, 1041702805, -1101939446, -1114279623, 1044685961) + + W(3, 1037544250, -1120141702, -1098675705, 1040827436) + + W(4, 1048352911, -1104877098, -1109808312, -1117060814) + + W(5, 1045728636, 1037079325, -1115507167, -1117405559) + + W(6, -1115625903, 1046551204, 1017164043, -1095425143) + W(7, 1040518727, 1032200833, 1042593752, -1097718198) + + W(8, 999474559, 1046662664, 1038294495, -1104511859) + W(9, -1105481799, 1032086688, 1031450455, -1103113592) + + W(10, -1106032688, 1039045107, 1033924054, -1112841053) + + W(11, 1034794943, 1015870568, 1041969331, -1111795079); + sum2 = W(0, -1119788132, 992683382, 1018867293, 1022669528) + W(1, 1044098145, -1102594356, 1033659476, -1104347191) + + W(2, 1020892153, 1043646320, 1022003003, -1105788334) + + W(3, -1112158198, 1044081475, -1090780387, 1053457206) + + W(4, 1053806225, -1097169858, 1008462738, 1034564598) + + W(5, -1098943438, -1103925357, 1061138285, -1089462725) + + W(6, -1128623485, 1037093866, 1024702018, -1094689316) + + W(7, 1061570617, -1090351234, -1135723434, -1118342628) + + W(8, 1037085984, -1130567667, 1042502677, -1124000960) + + W(9, 1030330775, 1008221186, -1108937272, -1111881166) + + W(10, 1045330453, -1102638867, -1125624097, -1116661382) + + W(11, 1041747819, 1006874186, 1015917808, -1119261652); + WS(1059476151, -1123203418); + sum1 = W(0, -1111725774, 1035594491, -1091810916, 1057461865) + + W(1, 1016189354, -1123857235, 1032359159, 1034919857) + W(2, -1140486487, 1048140086, 1034245225, 1028750247) + + W(3, -1110577724, -1111760306, -1084838663, 1060577506) + + W(4, -1104347695, -1113205590, -1105544893, 1046155316) + + W(5, -1086139015, 1052038472, 1027870402, -1121460986) + + W(6, -1121304930, 1044165509, -1095808892, 1057513991) + + W(7, -1103056264, -1148349487, -1115245634, 1020829292) + + W(8, -1096673622, 1053266340, -1108853472, -1113141864) + + W(9, 1036887935, 1039852002, -1127813812, 1045123672) + + W(10, 1031021091, 1036934292, -1110517831, -1120384574) + + W(11, -1090491960, 1055106407, -1133152957, 1011178567); + sum2 = + W(0, -1114868977, 1026556603, 1004734644, 1034344926) + W(1, 1040708934, -1131620842, -1107973502, -1115461868) + + W(2, -1092079110, 1042028492, -1103991549, -1105590488) + + W(3, 1044770988, -1104823400, 1059587552, 1024895663) + W(4, 1051421807, 1029419915, -1101288212, -1105554340) + + W(5, 1045430990, 1050688149, -1095880897, -1114582112) + W(6, 1025917421, 1036199790, 1037874646, 1047334362) + + W(7, -1118101029, 1015496806, 1028043472, -1116199065) + + W(8, -1137785064, -1116948841, -1122024357, -1121536685) + + W(9, -1111158861, 1020651575, -1093651456, 1044938016) + + W(10, -1099771430, -1121478353, -1123193100, -1128075734) + + W(11, -1127404110, 1042458099, 1007412048, -1127827256); + WS(-1096078190, -1082738059); + sum1 = W(0, 1007239809, -1108860284, -1100207850, 1053165251) + + W(1, -1112013420, -1115685558, -1114850361, 1046670329) + + W(2, -1101169270, 1050939564, -1126100266, -1124400725) + + W(3, -1130257143, -1103368477, -1116530972, 1051252601) + + W(4, -1112810720, -1112871192, -1112210107, 1045976117) + + W(5, -1088435015, 1056131912, -1124484747, -1117830909) + + W(6, -1118665957, 1029494103, -1092482434, 1045571555) + W(7, 1041969097, 1031308937, 1028242343, 1036298605) + + W(8, -1090334611, 1053021861, 1032260218, -1143992478) + + W(9, -1116764258, 1029804679, -1105280439, 1035862579) + + W(10, 1044490698, 1037078646, 1022403683, -1128309634) + + W(11, -1099682433, 1038809821, -1114977769, 1016808747); + sum2 = + W(0, -1133820219, 1027229337, -1115513384, 1007940354) + W(1, -1156542700, 1020775992, -1147891684, 1032424255) + + W(2, -1107406978, -1125237780, 1031863482, 994071912) + W(3, -1127336652, -1137665638, 1037916056, 1044701152) + + W(4, -1113898133, -1127018949, -1127200453, 1045946307) + + W(5, -1096925824, 1067148595, 1048747828, -1115116116) + W(6, 1020736496, -1116510358, 1031285965, 1076670751) + + W(7, -1101339101, -1122694416, 1001166860, -1145031593) + + W(8, -1138759126, 1039470145, -1070343554, -1106720041) + + W(9, 1022486646, -1116471963, 1020254812, -1109537872) + + W(10, -1082365934, 1034791139, -1135690985, -1126621635) + + W(11, -1122394148, 1025436583, -1107321525, 1039293842); + WS(1056919406, -1123699093); + sum1 = W(0, -1153021483, 985933670, -1089586019, 1058308826) + + W(1, -1122912091, -1130942473, 1016658279, -1142160193) + + W(2, -1106728023, 1043832952, -1111941676, -1162334614) + + W(3, -1169964908, 1032391598, -1095447822, 1054485203) + + W(4, -1114359010, -1115887538, -1131540465, -1109380555) + + W(5, -1095937215, 1051425640, 1038539876, 1035573952) + + W(6, -1116724456, -1112807501, -1092279079, 1057168451) + + W(7, -1129000267, -1120065804, 1018808083, -1115139838) + + W(8, -1105250414, 1053123845, -1105627696, 1037216107) + W(9, 986184318, 1023499017, -1104943256, 1034402292) + + W(10, 1011544866, -1114397383, 1026972628, -1119658051) + + W(11, -1093497009, 1056025192, -1129701482, 1031803900); + sum2 = + W(0, 1034281368, -1104410341, 1018060402, -1138447737) + W(1, 994323516, -1140256393, -1126716121, 1032410612) + + W(2, 1017706532, -1115589617, 1029627322, -1133676977) + + W(3, -1121241308, -1087414339, 1061712823, 1021983592) + + W(4, -1133709215, 1012505315, 1045372780, -1068615761) + + W(5, 1078313037, -1100339818, 1015111478, -1130902073) + + W(6, 1035929285, -1067644471, 1079888634, -1123293195) + W(7, 1023885100, 1028720332, 1047600787, -1091405946) + + W(8, 1048822788, 1030149270, -1124744731, -1123320040) + + W(9, 1004087438, -1118166494, 1023636550, -1120625820) + + W(10, 1010097407, 1022959192, 1041718259, -1097556864) + + W(11, 1035128990, 1021734756, 1009567515, -1136405679); + WS(1057577783, -1116545908); + sum1 = + W(0, -1140559047, 1023488522, 1031631673, -1127758170) + W(1, 1037581685, -1129099776, -1119858755, 1007841757) + + W(2, 1034355893, -1102068668, 1038802963, 1023940956) + W(3, 1001413734, -1099448121, 1034820815, -1139646818) + + W(4, 1020321573, 1023108226, 1019558160, -1135636631) + + W(5, -1095265297, 1043309332, -1118918103, -1133216315) + + W(6, 1027838711, -1106014610, -1133110549, 1058446926) + + W(7, -1104027725, 1027058075, -1123065945, -1112832085) + + W(8, 976310772, 1018622064, -1110623851, -1123708581) + W(9, -1133184076, -1132115376, 1031909175, 1033516923) + + W(10, -1124259695, 1025545342, -1116764451, 1007686986) + + W(11, 1033492939, 1033353766, -1114622946, -1118432359); + sum2 = + W(0, -1139693312, -1124752720, -1098950560, 999696497) + W(1, -1111434581, 1026280044, 1036960926, -1121090060) + + W(2, -1103693150, 1034222778, -1119707404, 1014405392) + + W(3, -1143311649, -1099787410, 1033289622, -1104157082) + + W(4, 1018041096, -1121205834, 990661185, -1112556974) + W(5, 1052753541, 1058657457, -1108624084, 1023217352) + + W(6, 1015972640, -1123282486, -1106573325, 1042484509) + + W(7, -1111132399, 1005957425, 1000525745, -1104610584) + W(8, 1031872996, 1044261031, -1121815492, 1017712752) + + W(9, -1129692664, 1028371280, -1115662969, 1026599244) + + W(10, 1018446608, -1139874592, 1028832752, -1104217347) + + W(11, -1115847157, 1041506121, -1129135268, 1010211304); + WS(1063874743, -1123600943); + sum1 = + W(0, 1026726381, -1110972995, -1088392527, 1060534134) + W(1, -1131352649, 1017113381, 1005084141, 1029034162) + + W(2, -1096434085, 1052968261, -1125442370, 1023149165) + + W(3, -1121313992, 1029564141, -1088422197, 1061531727) + + W(4, -1111100967, -1121647548, 1022502021, 1019350337) + W(5, -1089197156, 1048937234, 1007055431, 1028622922) + + W(6, 1010678471, 1021423943, -1089730182, 1053585019) + + W(7, -1124123103, -1132301739, 1012880846, -1112342611) + + W(8, -1099324833, 1049533342, 1037169096, -1122598880) + W(9, 1016038684, 1036724585, -1105649226, 1044791660) + + W(10, -1119334386, 1017220849, 1031948511, -1096649610) + + W(11, -1102131796, 1047748258, 1048887628, -1134858375); + sum2 = + W(0, 1041516351, -1098661524, 1065773241, 1067471192) + W(1, -1095722385, 1025030832, -1114001722, -1113317539) + + W(2, 1066164852, 1062194047, 1029392016, 1015621429) + W(3, 1031962338, 1041960143, -1094243603, -1131590099) + + W(4, 1045112423, 1033273068, -1127882717, -1113172562) + + W(5, -1071023147, -1071391818, 1027735196, -1106774969) + + W(6, 1024371132, 1057245216, -1087722937, -1082288010) + + W(7, 1047570295, 1021784721, -1116893380, -1108932190) + + W(8, 1043021464, 1036914992, -1110460182, -1147670187) + W(9, -1137233618, 1040530840, 1059923771, 1056604249) + + W(10, 1008215114, -1157997580, 1035584360, -1090505328) + + W(11, 1062948018, 1065534266, -1096088381, 1025226110); + WS(-1081605212, 1037230241); + sum1 = W(0, -1129286954, -1115600951, -1139829872, 1042245092) + + W(1, -1130516890, -1191245347, -1127382348, -1127078325) + + W(2, 1037397159, -1101819637, 1025459530, -1140137724) + + W(3, -1114123448, 1045220821, -1103957592, 1057239854) + + W(4, -1098019463, -1135821456, -1119205995, 1032978341) + + W(5, 1057561887, -1089716883, 1040837167, -1153142562) + + W(6, -1116053850, 1031622828, 1057799156, -1088291628) + + W(7, 1039979404, -1123070334, -1114061454, 1039357911) + + W(8, -1103125195, -1162281827, -1123433113, -1115244132) + + W(9, 1024011222, 1020277751, 1035843369, -1111135597) + W(10, 1008116332, 975751222, -1165173963, 1026874614) + + W(11, -1159950147, -1144308089, -1115664788, -1121723210); + sum2 = W(0, -1129847377, 1027890930, 1045384717, -1131041629) + + W(1, -1118537275, -1129495121, -1132201467, -1115667911) + + W(2, -1139027758, -1086235324, 1015945835, 1022067627) + + W(3, -1111059277, -1109124506, 1062718876, 1071241567) + + W(4, -1090110249, -1139816838, 1032250249, -1115518668) + + W(5, -1088514518, -1104078593, 1050303031, -1109320206) + + W(6, 1003251981, -1131706033, 1043614591, -1091065468) + + W(7, -1130925231, 1035917053, -1175110606, 1033851379) + + W(8, -1096199608, 1021865867, 1030828678, -1140186118) + + W(9, -1119100754, 1008685286, 1038009999, -1113376611) + + W(10, -1128909271, 1027374356, 1007928974, -1130851063) + + W(11, -1126833063, -1120766781, -1121346672, -1130315761); + WS(1048635758, -1111558989); + sum1 = W(0, -1116715915, -1114167026, 1019884197, 1046236914) + + W(1, 1023679491, -1123924777, 1007517554, -1144951432) + + W(2, -1100411873, 1042364482, 1030303424, -1115625243) + + W(3, -1125319757, 1032434734, -1082609609, 1057088310) + + W(4, 1025882348, 1018952757, -1130676803, -1116349759) + + W(5, -1099356457, 1060062511, 1027068912, -1117219752) + + W(6, 1032051720, 1007313161, -1114189151, -1105629450) + + W(7, 1046455412, -1128015465, -1122176818, 1037795522) + + W(8, -1098216348, 1032549225, 1037080648, -1117338239) + + W(9, 1019813529, 1011561671, -1118483848, -1156935944) + + W(10, 1024380849, 1001454266, -1125995833, 1022555096) + + W(11, -1115419480, 1017135051, 1020138940, 1015509889); + sum2 = + W(0, 1017200252, 1032764106, -1100965710, -1120952053) + W(1, 1023440400, 1026555164, -1128076598, 1032749056) + + W(2, 1036328960, -1105592489, 1037836738, -1121478225) + + W(3, 1020177744, 1042154489, -1079875623, -1093732085) + + W(4, -1110726007, 1032334674, 1027061114, -1091917968) + W(5, 1069362278, 1053281179, -1110949820, 1004458190) + + W(6, -1120062015, -1115647155, 1041923814, 1053015354) + + W(7, -1097734576, 1030547292, 1007295183, -1109350747) + + W(8, -1121245377, 1023497366, 1029656316, -1115087241) + W(9, 1005463470, -1136438311, 1023734232, 1044015507) + + W(10, -1107014120, 1024950244, -1120999608, -1152745757) + + W(11, -1115720719, 1040619345, 1039138178, -1107079388); + WS(1057226679, -1099093504); + sum1 = W(0, -1117743115, -1106860569, 1059123761, -1116606854) + + W(1, -1117312579, -1126837697, -1111361838, -1097721634) + + W(2, 1043835217, -1113320356, 1021938430, -1108681130) + + W(3, -1142218540, 1017711998, 1054886149, -1098795498) + + W(4, 1024346219, -1128001843, -1115584496, -1098477656) + + W(5, 1036645937, 1036044245, -1113074392, -1126342351) + + W(6, -1144474236, 1017808579, -1096319852, 1058878095) + + W(7, -1113306631, -1145404316, -1112915081, -1127808199) + + W(8, -1102898152, 1054951826, -1104150268, 1022603516) + + W(9, -1118048876, -1110778360, -1108186916, 1042821748) + + W(10, -1109837016, -1113519979, -1124754680, 988685674) + + W(11, -1103594844, 1057715322, -1104945799, -1166667748); + sum2 = + W(0, 1017552406, -1115134458, -1123239700, 1040415751) + W(1, 1024802817, -1116030864, -1121365003, -1113297695) + + W(2, 1033578978, -1119377084, -1140510683, -1154029132) + + W(3, 1024315197, -1121331284, 1032104202, 1033268884) + W(4, 1031096569, -1127078294, -1126393870, 1034056828) + + W(5, -1122610502, 1042552247, 1027588477, -1123710517) + W(6, 1006096662, -1116036592, 1042239151, 1014688747) + + W(7, -1116363642, -1142638070, 1026122801, -1122098200) + + W(8, -1128246386, 1038912262, -1106292650, 1002524342) + + W(9, -1115019758, -1138309587, -1130645774, 1011328923) + + W(10, -1119712705, -1123496021, 1010820131, -1114451237) + + W(11, 1021632898, 1013103251, -1106135611, 1026182445); + WS(-1108758968, -1120615143); + sum1 = + W(0, 1007512449, 1043847294, 1041722714, -1089504654) + W(1, 1035899020, 1021562716, -1118729201, -1115009907) + + W(2, 1047642150, -1097300100, 1024857813, -1157490338) + W(3, 1033236958, 1038269372, 1052029054, -1092318719) + + W(4, 1041772976, 1033997944, -1188181540, -1103491829) + + W(5, 1056274631, -1092329088, -1115586563, -1127937658) + + W(6, 1032455763, 1026842103, 1057598894, -1088834696) + W(7, 1037648114, 1034853541, -1139132956, 1028990457) + + W(8, 1044856958, -1094694701, 1028932916, -1139535439) + + W(9, -1126666947, -1112992067, 1049081300, -1102944265) + W(10, 1031373190, 999073631, 1028445809, 1045480960) + + W(11, 1008203541, -1095636207, 987483733, 1033314299); + sum2 = W(0, 1022601823, -1119857460, -1099325942, -1068582801) + + W(1, 1035739756, 1015907333, 1027503424, 1016942723) + + W(2, -1112391205, -1076271776, -1104095012, 1016973548) + + W(3, -1114440342, -1113768249, 1050839307, 1067690352) + + W(4, 1026494027, -1122697579, 1031020236, 1049278985) + W(5, -1097330938, 1077639092, 1047876791, 1015148595) + + W(6, -1115479573, 1023747258, 1019565806, 1061065209) + W(7, 1016405011, -1106941761, 1028798660, 1008539869) + + W(8, -1108724816, -1120523343, -1115660355, 1032414836) + + W(9, -1137878778, 1030670036, -1143955309, 1002861982) + + W(10, 1022661839, -1126087053, 973255146, -1124354630) + + W(11, -1115374863, 1029925423, 1033007729, -1124658022); + WS(-1125070560, -1096513533); + sum1 = + W(0, -1112401838, 1032850683, -1111199410, 1046757622) + W(1, 1033951818, -1110786345, -1141896047, 1035442769) + + W(2, -1105684773, 1047296038, 1013771590, -1113414691) + + W(3, -1114813980, 1027743049, -1093508577, 1061969278) + + W(4, -1100008383, -1131095256, -1125005414, 1036941992) + + W(5, -1099350541, -1107426238, 1023589122, -1114443933) + + W(6, 1026386210, -1104378805, 1060063436, -1092774776) + W(7, -1122038496, 996299870, -1115056418, 1037833707) + + W(8, -1112618105, 1028130403, -1110298148, -1117830096) + + W(9, 1012901881, 1022681350, 1035294797, -1127381572) + + W(10, 1030080099, -1140100259, -1125623196, 1033887999) + + W(11, 988362813, 1016528878, -1106008748, 1014753608); + sum2 = + W(0, -1128089453, 1024340419, -1102023306, -1103821003) + W(1, 1043342989, -1119644228, -1110645236, 1043920397) + + W(2, -1114712453, -1097377481, -1109265540, 1026656603) + + W(3, -1130916401, -1129344777, 1028225499, 972074333) + W(4, -1100613575, 1034952995, 1026446699, 1009781987) + + W(5, 1057874897, 1059140954, -1103696793, 1032660433) + W(6, -1115610756, -1102796808, 1052660862, 1038144249) + + W(7, -1116201867, -1130501689, 1032679495, 1037523581) + + W(8, -1102216434, -1107293486, -1131885037, 1029362471) + + W(9, -1114433002, 1023912055, -1127455825, 1006589878) + + W(10, -1105206442, 1013212643, -1123355757, 1026431795) + + W(11, -1128264837, -1102041035, -1127587329, 1005668278); + WS(1049867118, 1034735186); + sum1 = + W(0, -1127774262, 1029333619, 1019113028, 1024964758) + W(1, 1007980364, -1121402761, -1117373036, -1123455456) + + W(2, 1040322517, 1032014380, 1019723167, -1118901061) + W(3, 1031064438, -1105261096, 1040279186, 1048588372) + + W(4, -1114399774, -1126411220, -1112922680, -1096141178) + + W(5, -1101685436, 1050135703, -1100797936, -1110911420) + + W(6, 1012648832, -1120189874, 1044944438, 1034886579) + W(7, -1113398479, 1019321608, -1132535687, 1024540628) + + W(8, -1121818673, 1040490120, -1150021355, -1114680743) + + W(9, -1120670652, -1138107141, 1035373868, 1016312768) + + W(10, -1128775361, -1121234663, -1140537009, 1000234461) + + W(11, 1032620736, 1034120602, 1008873398, -1121766352); + sum2 = + W(0, -1161289046, -1111556088, 1038429491, 1024158411) + W(1, -1123794133, 1016121349, 1020587590, -1127668568) + + W(2, -1130067243, 1018324827, 1035096674, -1127328688) + + W(3, 1029682388, -1095922552, 1050898724, -1123472030) + + W(4, -1142905112, -1126947701, 1031608572, -1092239431) + + W(5, 1058359401, -1093717514, 1042534485, -1126252937) + + W(6, -1117304231, -1165373610, 1040200303, -1118204561) + + W(7, -1137719883, -1122434117, -1129471662, -1120133906) + + W(8, 1044552317, -1112781150, -1136196985, -1137031592) + W(9, 965398246, 1007233453, -1132308256, 1017795291) + + W(10, 1027736448, -1121369248, -1134569151, 1019046119) + + W(11, 1031851372, -1120826542, -1128305748, -1162116006); + WS(1063598519, 1040709546); + sum1 = + W(0, 989424433, -1129964132, -1092181324, 1056340507) + W(1, -1140166214, 1007420926, 1025004011, 1018517694) + + W(2, -1096845618, 1052096915, -1124688156, 1025298052) + + W(3, -1117397173, 1036280904, -1092686069, 1055848084) + W(4, 1027380346, -1114706068, 1010227902, 1042042835) + + W(5, -1082263275, -1117189370, 1041673920, 1030527159) + + W(6, -1116859551, 1025484624, -1087946945, 1053023013) + W(7, 1025279965, 1004378644, 1035862832, -1108371497) + + W(8, -1124000752, 1053676604, 1021321051, -1130325786) + W(9, 1012428267, 1028794752, 977020321, 1044581128) + + W(10, -1125256895, 1025029349, 1028295307, -1105766207) + + W(11, -1108973663, 1048031389, 1035434166, 1015026205); + sum2 = + W(0, -1128989779, 1040099849, -1100874000, 1021993174) + W(1, -1130783901, 1011642260, -1132752301, 992269838) + + W(2, 1034251701, -1130867684, 1034483450, -1123860313) + + W(3, -1121498089, 1034828720, -1104384455, -1113539068) + + W(4, -1122004711, 1016725938, -1112228563, 1034859451) + W(5, 1044543345, 1053848304, 1016645052, -1122625223) + + W(6, 1023519830, 1034954038, 1046406891, -1104908283) + W(7, 1032774629, 1011006880, -1111737432, -1122349299) + + W(8, 1028583216, -1100179639, 1016210650, -1114528083) + + W(9, 1023675120, 1019899084, -1114022376, -1111378604) + + W(10, 1033297067, 1021084026, -1134051546, -1112348174) + + W(11, 1040182737, -1097476412, 1025418184, -1141294629); + WS(1052377710, -1116561061); + sum1 = W(0, -1122160137, 1026711393, 1034438088, 1023922180) + + W(1, -1115207840, 1014756539, -1112995181, 1046449684) + + W(2, -1103073456, -1112523427, 1036366239, -1110629817) + + W(3, 1021266631, -1094207935, 1059878180, 1031595583) + W(4, -1096200621, 1025340632, 1017022397, 1051777944) + + W(5, -1091454284, -1107259466, 1043506315, -1122333286) + + W(6, 1032809511, -1105978112, -1108523806, 1059633699) + + W(7, -1105628259, -1126028515, -1105019076, 1045042328) + + W(8, -1137740923, -1094439010, 1043730779, -1114494718) + + W(9, 1019774100, -1140860791, -1113848547, 1048809290) + + W(10, -1114298535, 1024493780, -1119221338, -1140595366) + + W(11, 1015748571, 1019553093, -1117446555, -1121208702); + sum2 = W(0, -1125003387, 1023545558, 1001356924, 1019826271) + + W(1, 1029018751, -1114113554, -1110204990, -1122326332) + + W(2, 1049948311, -1106041547, -1122275300, 1028296019) + + W(3, 1008672566, 1039271760, -1104177316, 1040609653) + W(4, 1017003703, -1143932072, -1110854180, 997586352) + + W(5, -1099440370, 1057375034, -1099421810, -1122149120) + + W(6, 1015432805, 1043658401, -1101604747, -1098469549) + + W(7, 1052007058, -1118594118, 1007507570, -1126010948) + + W(8, -1112581048, 1041033112, 1018898909, -1129527698) + + W(9, -1129875419, -1128126180, 1034699764, -1108980340) + + W(10, -1118843549, 1031512617, -1145172348, 1018354299) + + W(11, -1118892801, 1026776081, -1123423040, -1147650596); + WS(1066846108, 1044745002); + sum1 = + W(0, 1019372186, 1049403789, 1042475733, -1091781177) + W(1, -1124410122, 1015897062, 978233216, -1107121557) + + W(2, 1051348497, -1100692098, 1039801345, 1009376476) + W(3, -1129136976, 1041504923, 1048713517, -1091573825) + + W(4, 1032893815, -1126291000, 1025331363, -1134682116) + W(5, 1054698948, -1089263023, 1040511932, 1006187856) + + W(6, -1142297744, 1039399714, 1049146911, -1091452887) + + W(7, -1146055296, 1016897132, 1015116199, -1122699462) + W(8, 1058118606, -1090043533, 1032222216, 1012743170) + + W(9, 1015645340, -1121336819, 1046729314, -1102394372) + + W(10, -1115550793, 1010428515, 1016287548, 1026221102) + + W(11, 1056609718, -1091564925, -1107840672, 1000566800); + sum2 = + W(0, -1121498998, 1040076187, 1040991236, -1097642453) + W(1, 1041191639, -1143069461, 1025173682, -1110698495) + + W(2, 1027215822, 1032193313, -1107046015, -1127878497) + + W(3, -1127952317, 1046879062, -1097422534, -1089335507) + + W(4, 1027648922, 1033858873, -1123064681, -1114452496) + + W(5, -1079622690, -1070315938, -1107556506, 1031644922) + + W(6, 1031325102, 1037259271, -1113150532, -1080614075) + + W(7, 1040688678, -1125227083, 1031547392, -1113826794) + W(8, 1059352243, 1063284099, 1044912350, -1135729999) + + W(9, -1116804726, -1113826560, 1032134129, 1075195474) + + W(10, -1107091577, -1112123781, 1013263415, -1114840697) + + W(11, 1048220488, 1071802337, 1050907604, -1104322253); + WS(-1083170743, 1032719415); + sum1 = + W(0, 1023701883, -1115388842, -1095142736, -1131637740) + W(1, 1043889005, 1012236805, 1027723212, 1037576352) + + W(2, -1104329709, 1047136718, -1114738168, -1143133226) + + W(3, -1122560214, 1028066352, -1093573195, 1050356544) + W(4, 1032119470, -1126174356, 989186599, 1034436696) + + W(5, -1091782714, 1058921626, 1035779088, 1024911192) + W(6, 1018265472, 1046871436, -1090370147, 1057401704) + + W(7, -1119260798, 1033076018, -1118377474, 1036384970) + + W(8, -1088395265, 1046747968, 1026791695, -1122725807) + W(9, 1034616094, 1041769608, -1092608855, 1048250192) + + W(10, 1007978244, 1033024419, -1143849572, 1033711598) + + W(11, -1085234190, 1051022467, 1035213927, 1028803056); + sum2 = + W(0, 1040592912, -1091007599, -1122569720, 1061904811) + W(1, -1096410255, -1107753434, -1148866748, 1043419584) + + W(2, -1102576481, -1110256028, -1106770914, 1043044202) + + W(3, 1032765896, -1111893630, -1114969246, 1042199718) + + W(4, -1108992598, -1138614142, -1113675298, -1100951705) + + W(5, 1060370086, 1060838438, -1088153932, -1121677972) + W(6, 1035431068, 1030657616, 1018512847, 1061366749) + + W(7, -1091610254, 1049599377, -1111525482, -1123081200) + + W(8, -1112438148, -1086083341, 1039676388, -1117594428) + W(9, 1031171080, 1009914590, 1033713452, 1029235040) + + W(10, -1104419583, -1154941432, -1111672592, 1048594219) + + W(11, -1120657004, -1090315205, 1040642594, 1041324320); + WS(-1087374135, 1068569819); + sum1 = W(0, 1034522569, 1036038485, 1051494643, -1092286050) + + W(1, 1024631242, -1124293233, -1114460210, -1134485343) + + W(2, 1048850073, -1097395434, -1155697773, 1035227470) + W(3, 1040675195, 998999958, 1055860825, -1093950865) + + W(4, 1034953465, 1022306499, -1156322949, -1102898978) + + W(5, 1058980799, -1086427635, 1036357020, -1115248568) + + W(6, -1126356143, 1032174021, 1050366134, -1097745188) + + W(7, -1115522123, 1025097211, 1038944141, 1032843110) + W(8, 1048730421, -1098221875, 1020968428, 1020281868) + + W(9, -1123272884, -1112707068, 1043911796, -1095510475) + + W(10, 1032316940, -1112095119, -1147588893, 1041667373) + + W(11, 1049367775, -1093210118, -1134843155, 1003691067); + sum2 = + W(0, 1036982689, -1101176821, 1032029724, 1031712503) + W(1, -1095262307, -1135236510, -1109420523, 1040154700) + + W(2, 1008871494, 1051308638, -1089037290, 1036178124) + W(3, 1038171016, -1103427381, 1053639476, -1103617514) + + W(4, -1077227331, 1056219784, -1114883799, 1011961296) + + W(5, -1094943648, -1090481186, -1077304111, 1063074442) + + W(6, 1032698755, -1115041464, 1046242469, 1048516585) + W(7, -1075605762, 1070767205, 1023400751, -1112162319) + + W(8, 1015257266, -1110670159, -1086532256, 1074120487) + + W(9, -1119043865, -1114657793, 1018562568, 1026474691) + + W(10, -1115849955, 1066414901, 1034471555, -1117007917) + + W(11, 1031877719, 1044794303, -1110424851, 1052188053); + WS(-1094340206, 1025238393); + sum1 = + W(0, 1025401267, 1041212259, 1051626551, -1087771683) + W(1, 1026720107, 1024873417, -1120317764, -1123694073) + + W(2, 1044638395, -1099303356, 1027277160, 1003045609) + W(3, 1029217422, 1040075594, 1051991350, -1089805402) + + W(4, 1042793877, 1025974530, 1017691154, -1128349902) + W(5, 1049159169, -1090474853, 1032035372, -1140916353) + + W(6, 1015273200, 1030937834, 1055056667, -1089928234) + W(7, 1038797403, 1026491292, 1006374863, 1030337996) + + W(8, 1045537234, -1095611655, 1028636183, -1125445197) + + W(9, -1149759301, -1112880429, 1047952828, -1103717662) + + W(10, 1034683820, 1018728702, 1023921508, 1048792866) + + W(11, -1109761292, -1097665994, 1024357062, 1024842472); + sum2 = + W(0, 1016272983, -1117564089, 1054344808, 1082928383) + W(1, 1043627742, -1113989008, -1125328223, 1007328518) + + W(2, 1047318060, 1074423709, 1037006400, -1137080694) + W(3, 1019432157, 1038058236, -1095657702, -1074337637) + + W(4, 1040243063, 1017883877, -1136495190, -1106032359) + + W(5, -1084625245, -1066615612, -1106132294, -1126758430) + + W(6, 1024638490, 1036423350, -1124414562, -1087090886) + + W(7, -1119023275, 1040797418, -1122054341, -1129861056) + + W(8, 1036648948, 1034359725, 1004167460, -1113361096) + W(9, 1016158573, -1141592892, -1120282232, 999766988) + + W(10, 1002865564, 1023713345, -1137665190, 1032273458) + + W(11, 1033205457, -1114245215, -1113153751, 1022755221); + WS(-1098231918, 1035887052); + sum1 = W(0, 1007686525, -1117693688, 1048268202, -1105099271) + + W(1, -1110112236, 1016554315, -1112046456, -1119004197) + + W(2, 1036810732, -1097653932, -1130249512, -1115219957) + + W(3, -1136641001, -1104788790, 1055375147, -1098720398) + + W(4, -1106155488, -1142876018, 1025581173, -1092353842) + + W(5, 1069647932, 1066085926, -1091717545, -1143806946) + + W(6, -1121240420, -1104580134, 1052170270, -1093227035) + + W(7, -1110987409, -1113561092, -1165513922, -1146015939) + + W(8, 1020096363, -1123833253, -1108464746, -1123809427) + + W(9, -1114143760, -1162096818, -1125578169, -1107379122) + + W(10, -1156567122, -1113074326, -1123656209, -1117522176) + + W(11, 1031911554, -1130540403, -1106840908, -1120698772); + sum2 = W(0, -1125891647, -1121500935, 1021750179, 1017645919) + + W(1, -1130193055, -1118741587, -1139529830, 1033326245) + + W(2, -1133323886, -1167588957, 1018366499, 1021086319) + + W(3, -1127048511, -1110257678, 1040835222, -1108825416) + + W(4, -1144215516, -1113356630, -1119620679, 1033808587) + + W(5, 1061147710, 1055027509, 1035778772, -1123951235) + + W(6, 1015042367, -1152963927, -1110018076, -1105425292) + + W(7, -1105010302, -1124596863, -1133002574, 1023704513) + + W(8, 1035246842, -1090179180, 1019226535, 995355927) + W(9, 1001053676, -1125968895, -1123027527, 1035235033) + + W(10, -1113493000, -1135606974, -1136336990, -1159911790) + + W(11, -1144777900, -1104047270, -1137733870, -1141401292); + WS(-1081201436, -1083122818); + sum1 = + W(0, 1024396773, -1118776317, -1090317103, 1056470156) + W(1, -1120183581, -1124728406, 1040103323, -1121204906) + + W(2, -1098486564, 1048677796, -1105729174, 1010393788) + + W(3, -1104054160, 1048678635, -1094484293, 1048629197) + + W(4, 1032093347, -1105359341, 1042893967, -1101546374) + W(5, -1101075087, 1052113969, 1048228642, 1048587688) + + W(6, -1121710378, 1036673102, -1095873942, 1033707976) + + W(7, 1044099610, -1107414311, -1122190892, -1112386175) + + W(8, -1099326495, 1057451847, -1102747354, 1034642220) + W(9, 1038539251, 1022162966, -1101153544, 1027207978) + + W(10, -1114063013, -1109685144, 1020559724, -1111471555) + + W(11, -1095630377, 1053998976, -1125760751, 1033222029); + sum2 = W(0, -1130151079, 1029357099, -1141051917, -1126143047) + + W(1, -1138853383, 1016378649, -1128601535, -1139320303) + + W(2, 1027307392, -1124015138, 1026568428, -1128887123) + + W(3, 1016586775, 1016703563, 1040480663, -1090482710) + W(4, 1051692940, 1024508650, -1116462462, 1036545095) + + W(5, -1084177500, -1061542668, 1087807110, -1109325640) + + W(6, 1006316053, 1027702761, 1027514935, -1081711054) + + W(7, 1065457143, -1115450794, -1136401487, -1115282656) + + W(8, 1033882273, 1042416533, -1101742731, 1030137534) + W(9, -1134837023, 1025282364, 975470826, -1107083018) + + W(10, 1035528582, 1016609727, 1015531121, -1128863347) + + W(11, 1013565163, 1030037156, -1123914522, -1122133136); + WS(1056806766, 1019813151); + sum1 = + W(0, -1109922347, 1038862168, -1094336356, 1045176100) + W(1, 1023835087, -1117706029, 1027672232, -1098777292) + + W(2, 1025923920, 1046907893, -1097985673, -1143840774) + + W(3, -1113027385, 1057340869, -1089223672, 1028090813) + + W(4, 1044113854, -1111671123, -1106442907, -1131946518) + W(5, 1049099726, 1060367739, 1010151151, 1025756788) + + W(6, -1119504732, 1048903346, -1102142547, -1102839017) + + W(7, 1051267347, -1111600768, -1127683323, -1100586740) + + W(8, -1122538541, 1052174932, -1093148714, 1040843731) + W(9, 1015134384, 1027127869, -1099585557, 1024110556) + + W(10, 1029024221, -1107564881, -1122196904, -1136830686) + + W(11, -1100472159, 1047790604, -1137237669, -1131252120); + sum2 = + W(0, 1016054025, -1119367753, -1115811972, 1024987046) + W(1, 1016843613, 1007599121, -1116580030, 1033363716) + + W(2, -1119979151, -1114756690, 1025664850, -1120202841) + + W(3, -1128856269, -1107794961, 1046175546, -1100926142) + + W(4, 1028330842, -1144919563, 1024017790, -1129548157) + W(5, 1051115205, 1048732221, 1029232538, -1123931557) + + W(6, -1120027976, 1034251322, -1095340379, 1044324276) + + W(7, -1105635461, -1122027017, 1003808451, 1039517400) + + W(8, -1127431641, -1104827086, 1042504965, -1118580981) + + W(9, -1120561807, 1019337653, 976018325, -1139095561) + + W(10, -1118828309, 1016896817, 1020365697, -1174223189) + + W(11, -1148320619, -1138276477, -1117203320, 1009819353); + WS(1067549148, 1061168738); + sum1 = + W(0, -1136860560, 1041916271, 1043173595, -1099729080) + W(1, 1028345500, -1117900922, 1019396970, -1102760394) + + W(2, 1046914407, -1097589844, 1036811500, -1118556558) + + W(3, 1034693791, -1100215160, 1063664325, -1085101004) + + W(4, 1044130369, -1118557363, 1031876061, -1127917744) + + W(5, 1041579695, 1044764001, -1111608196, -1148658183) + + W(6, -1126960466, 1038708237, -1084953669, 1062969721) + W(7, -1096862061, 1028601293, 1026075051, 1042570959) + + W(8, -1099553438, 1046958554, -1104831702, 1014732126) + + W(9, -1129065354, 1024557450, -1111656016, 1031927572) + + W(10, -1121741642, 999719805, -1125694250, -1114527138) + + W(11, 1036161892, -1129394940, 1028864534, -1111432076); + sum2 = + W(0, 1037988572, -1097183834, -1096111302, -1112255513) + + W(1, 1025606252, -1131066844, -1109648154, -1096035348) + + W(2, 1046143856, 1047451704, -1109281762, 1021094187) + W(3, -1115314649, -1106718097, 1065688064, 1048785443) + + W(4, 1025033440, -1118872350, -1110735622, 1044362532) + + W(5, -1100541529, -1089624797, -1124926742, 1035314838) + W(6, 1029414023, 1044653749, 1022961930, 1040629892) + + W(7, 1024229044, -1132267648, -1115532881, -1118097482) + + W(8, 1041170408, -1135819985, -1109992198, 1004115214) + + W(9, -1114457629, 1036871139, -1121640520, -1109975688) + + W(10, 1007526703, -1128646118, -1123631499, 1035597348) + + W(11, -1109551474, 1030316886, 1031542857, -1117895342); + WS(-1090126519, 1034760182); + sum1 = + W(0, 1022234099, 1038732905, -1095221408, 1049091087) + W(1, -1141106060, -1130848127, 987019599, 1040082783) + + W(2, -1096833350, 1049531159, -1100657291, 1036771650) + W(3, 1021371561, 1042926010, -1090005917, 1052645704) + + W(4, -1098472859, 1035488804, -1125185183, 1019049563) + W(5, 1049172122, 1048882035, -1113910991, 1025403478) + + W(6, 1040030303, -1101231100, 1056956325, -1086324445) + W(7, 1041783487, 1007113098, 1005824514, -1108355337) + + W(8, 1052986466, -1089320760, 1033537135, -1138596020) + + W(9, 1028122555, -1115211984, 1046568014, -1101798768) + + W(10, 1037621024, -1128732298, -1120592523, 1035983309) + + W(11, 1025899358, -1123937043, -1125397784, -1127374557); + sum2 = W(0, 1016736022, 1032939170, -1122689775, -1102551566) + + W(1, -1122487238, 1026411789, 1011977196, -1102682487) + + W(2, 1060510561, 1060805847, -1122236502, -1128029215) + + W(3, -1137999416, 1037611888, 1051792579, 1057985083) + W(4, 1036808922, 1019619350, 1019963070, -1102949496) + + W(5, -1073351999, -1073393989, -1103029047, -1125796177) + + W(6, 1030437859, 1039010410, 1059164734, 1057367527) + W(7, 1039165500, 1024266957, -1122046802, -1110345340) + + W(8, 1062467108, 1062012989, -1106860549, -1119780381) + + W(9, -1142727409, -1124097437, 1031495287, -1147452225) + + W(10, -1143421321, 1024156551, 1018753414, -1113761662) + + W(11, -1101619274, -1099879364, 1015482390, 1016465010); + WS(-1089242039, -1127205581); + sum1 = + W(0, -1124452989, 1047404342, -1086849964, -1114062493) + W(1, 1042532510, 1010360274, 1037734344, -1111320098) + + W(2, 1001557948, 998659036, 1038364441, 1032729828) + W(3, -1136469606, -1115981071, 1055753134, -1096047365) + + W(4, -1118490271, 1015888923, 1045032037, 1038279993) + W(5, 1059294849, -1107330927, 1044317529, -1118190491) + + W(6, -1131176455, 1006926037, 1006841992, -1105936042) + W(7, 1035319841, 1021147727, 1029283077, 1030509432) + + W(8, 1049404683, -1087265492, 1043122385, -1123196402) + + W(9, 1038076384, -1123275877, 1037635838, -1095492784) + W(10, 1027367204, 1027584778, 1029887056, 1043334248) + + W(11, 1044156233, -1080260267, 1053177470, -1118995315); + sum2 = + W(0, 1029897599, -1102843967, 1001547796, 1049416264) + W(1, -1120840401, -1115391670, -1114965417, 1046605828) + + W(2, 1025076963, -1100594038, 1041915682, 1020641677) + W(3, 1025776431, 1033633995, -1090433692, -1096668289) + + W(4, 1040646462, -1111492899, -1121485495, 1048791648) + W(5, 1044097140, -1117020477, 1042521794, 1034331443) + + W(6, -1140617930, -1104898739, 1042488362, -1102246327) + + W(7, -1112857889, -1115861875, -1113546368, 1031053127) + + W(8, -1118485295, 1034926551, -1109164591, -1134127978) + + W(9, 1031651703, -1108448442, 1045463546, 1047237110) + + W(10, 1015998805, -1120855845, -1117524097, -1108356905) + + W(11, 1040816680, 1044694712, -1109851905, -1124879769); + WS(-1081567068, -1085072352); + sum1 = + W(0, 1026202077, 1008391383, 1043376377, -1106037014) + W(1, -1119432853, 1015626934, 1017630556, -1102096537) + + W(2, 1035138955, -1113878797, 1041401156, -1112723700) + + W(3, 1034866076, -1105309517, 1062685095, -1091962582) + W(4, 1028045374, 995954372, -1120073907, -1113888457) + + W(5, -1088326467, -1099527071, 1036249665, -1112822301) + W(6, 990337572, 1038015146, -1087780001, 1065743931) + + W(7, -1122166240, 1036674595, -1122725732, 1010107993) + + W(8, -1095652846, 1048331161, 1030964177, -1165855255) + W(9, 1016189565, 1035371298, -1111927693, 1035819857) + + W(10, 1030882915, 1020668626, -1135328058, -1107104879) + + W(11, -1134788873, 1033560764, 1034792920, -1140134473); + sum2 = + W(0, -1101998430, 1025808817, 1026331301, 1041383807) + W(1, 1033833028, -1108635033, 1034880732, -1110518368) + + W(2, 1035202798, 1044537615, -1096440885, 1042833515) + + W(3, -1111573656, -1090739547, -1095903349, 1045506861) + + W(4, 1030373631, -1107072548, 1034691412, -1098171169) + W(5, 1068344140, 1069658374, -1094064924, 1037649108) + + W(6, 1017688506, 1047617175, -1085314459, -1090700374) + + W(7, -1106284721, -1117475933, 1032207084, -1112107064) + + W(8, -1108916250, -1135142667, -1113568166, 996218700) + + W(9, 1002612774, 1027687639, -1113844364, -1106371211) + W(10, 1043762008, -1115713024, 995363580, 1035291452) + + W(11, -1111293323, -1105384559, -1113615358, 1031725587); + WS(-1122270064, -1083487436); + sum1 = + W(0, 1022021054, -1111938972, -1089129474, 1061173530) + W(1, -1117042135, 990992466, 1000846209, 1031069285) + + W(2, -1092279467, 1053406092, -1112839626, 997371026) + W(3, -1120651002, 1035108132, -1089405705, 1061891890) + + W(4, -1115403968, -1123767278, -1133172181, -1122478547) + + W(5, -1088959911, 1046812368, 1035227362, -1136588785) + W(6, 1015175368, 1022898156, -1101358829, 1052509753) + + W(7, 983155781, 1014548513, 1008528664, -1109542844) + W(8, -1113192731, 1035774995, 1035355065, -1120262163) + + W(9, 1014262402, 1003755051, -1110785838, 1040815801) + + W(10, -1114859906, -1139309996, 1032206792, -1099224128) + + W(11, -1105614817, 1041852206, 1046440556, -1196238920); + sum2 = W(0, 1044179919, -1111986018, -1080384748, -1092393621) + W(1, 1048581079, 1031858735, 1022028328, 998943655) + + W(2, -1088604014, 1029389095, 1009323699, 1021777892) + + W(3, 1040258943, -1095034556, -1104000317, -1090514887) + + W(4, 1015383036, 1024626196, -1118974089, -1095846499) + + W(5, 1073459027, 1071708766, -1097133430, 1024702395) + + W(6, 1033279863, -1110448374, -1098039236, 1012336831) + + W(7, -1122793353, 1031448400, -1117110809, -1099083231) + + W(8, 1045892370, 1049307354, -1097759856, 1030521846) + + W(9, 1031745329, -1120681553, -1111672860, -1100485937) + + W(10, 1041920736, 1016567237, -1103290765, -1101323001) + + W(11, 1055006842, 1051814517, -1091667564, 1029007432); + WS(-1097130350, -1073679750); + sum1 = + W(0, -1154560184, -1142702368, -1095841490, 1034517635) + W(1, 1044574577, 992206103, 1025389788, 1008724804) + + W(2, -1096179481, 1053502809, -1112918112, -1143244972) + + W(3, -1145657140, 1035954194, -1095185831, 1045830013) + W(4, 1042446427, -1122388570, 1000633100, 1046084016) + + W(5, -1085973126, 1057148843, 1025817904, -1139683234) + + W(6, -1120139600, 1042008657, -1089907161, 1051926551) + W(7, 1017272605, 1018417129, 1032643785, -1106616153) + + W(8, -1096805620, 1052799826, -1123577295, 1017209342) + W(9, 1008755251, 1025929714, -1098086361, 1043469780) + + W(10, -1143679882, 1016358830, 1030414749, -1102859291) + + W(11, -1107347576, 1050172789, 1014891512, 1026775680); + sum2 = W(0, 1021941992, -1106123822, 1031547466, -1135582819) + + W(1, 1018035752, 1017098260, -1140009235, -1137197699) + + W(2, 1040410800, 1048777558, -1114989513, -1129449426) + + W(3, 1026649920, -1104678389, -1091687878, -1102399061) + + W(4, 1038853959, -1170697076, 1025391436, -1103278668) + + W(5, -1069452203, -1086654414, -1112110740, -1134437671) + + W(6, 1033557641, 1049392706, 1071493698, 1036526271) + W(7, -1155541821, 1030635558, -1110654493, 1026389826) + + W(8, 1075377385, 1051671224, -1119910153, -1127657747) + + W(9, -1117894486, 1046385471, -1093315115, 1003979070) + + W(10, -1130248744, 1010389927, 1031221238, -1107040085) + + W(11, -1101021764, 1042747507, -1119798403, 1007123883); + WS(-1110542776, 1046722292); + sum1 = + W(0, -1112464600, 1033517476, 1047470894, -1131671638) + W(1, 1005087552, -1142745264, 1016022658, -1108629467) + + W(2, -1133454244, 1032980471, -1105826521, 1026603482) + + W(3, 1027765771, -1106997347, 1053995028, -1092318054) + W(4, 1042584756, -1129658366, 981819586, -1113497042) + + W(5, -1100439522, 1042288212, -1109105451, 1017901338) + + W(6, 1026835189, -1116788199, -1093531706, 1061947018) + + W(7, -1098489206, -1142191304, 1025143735, -1106078850) + + W(8, 1048713001, -1121614518, -1105038597, 1040275689) + + W(9, -1111607705, 1041100086, -1106135867, 1012478420) + W(10, 977775992, 1016467704, 1024330241, -1124631714) + + W(11, -1115421707, 1041393523, -1114990145, 1020475210); + sum2 = W(0, 1031452303, -1132232893, -1109510079, 1018866101) + W(1, 1014551643, 1032688527, 992881643, -1116591675) + + W(2, 1032912109, -1116915567, 1024567259, -1114177042) + W(3, 978475180, 1016124301, -1114514404, 1055420465) + + W(4, -1097097100, 1042584708, -1121457613, -1114931492) + + W(5, 1032060799, 1059251768, -1118272021, -1109972047) + + W(6, 1034445399, -1094590847, 1051504728, -1088887374) + + W(7, 1048490818, 1024892699, -1130052481, 1040974964) + + W(8, -1087149232, 1044275534, 1035881493, -1108606213) + + W(9, 1032145701, -1110729382, 1039284419, -1114948355) + + W(10, 1032922179, -1121421993, -1116902843, 1035934107) + + W(11, 1032317221, -1104983612, 1032032409, -1133789139); + WS(1059165367, -1104520251); + sum1 = + W(0, 1007492796, -1108628644, -1117534006, 1041312427) + W(1, -1113108212, -1124001749, -1117692608, 1046236477) + + W(2, -1094176525, -1118419948, 1029454180, -1113899741) + + W(3, 1009135144, -1120534447, 1053590938, -1107420459) + + W(4, -1110779368, -1131761404, -1115310467, 1052736188) + + W(5, 1041258594, -1097199722, 1053879643, -1114949702) + W(6, 1014125520, -1118278150, 1001540436, 1055263011) + + W(7, -1098892966, 1025063448, -1113191163, 1036684740) + + W(8, -1106455985, -1100739533, 1045621277, -1105125705) + + W(9, -1129119585, 1015268651, -1112388022, 1040471133) + + W(10, -1126748266, -1126282358, -1118876992, 1008090175) + + W(11, -1114551242, 1020553106, 1026632706, -1127494856); + sum2 = + W(0, 1027505664, -1104527074, 1034957797, 1041601252) + W(1, -1108054404, -1122847824, -1115834059, 1048986559) + + W(2, -1098163961, -1125657446, 1041896195, 1034858548) + W(3, 1029084872, -1089420265, 1051828462, 1045110551) + + W(4, -1099714824, -1116127595, 1029257304, -1098414029) + + W(5, 1053778056, 1041501740, -1129563106, -1170669534) + + W(6, -1128264406, -1104406791, 1048509414, 1041829791) + + W(7, -1092212913, 1037831890, -1138405484, 1040673804) + + W(8, -1106881855, -1119286592, 1049994472, -1107117839) + + W(9, -1118563560, -1203758566, -1113115951, 1045914276) + + W(10, -1105182054, 1031508216, -1127212263, -1117936348) + + W(11, -1136870020, 1022365337, -1139619400, -1127498142); + WS(1064754871, 1064755352); + sum1 = + W(0, -1124996947, 1039345889, -1096693846, 1051509189) + W(1, -1110987505, 1036233792, -1126765813, 1034558935) + + W(2, -1096907004, 1048609181, -1122110878, 981607022) + W(3, -1111973122, 1025078634, -1092723587, 1055455058) + + W(4, -1103910610, 1033037628, 995438583, 1031530717) + W(5, 1042155040, 1038916133, 1036071207, 1028131846) + + W(6, -1131935965, -1102805216, 1055052309, -1085470304) + + W(7, 1036962198, -1113728073, 1011044801, -1112086641) + + W(8, 1050402072, -1096310694, 1042970795, -1155599823) + + W(9, 1017902933, -1108706152, 1045488762, -1110967031) + + W(10, 999916154, -1121653045, -1117569083, -1125705364) + + W(11, 1036105095, -1114430562, 1043170799, -1131031997); + sum2 = + W(0, -1133117125, 1035315397, 1034291601, -1093657955) + W(1, -1083656994, 1063017252, 1023646322, -1104375670) + + W(2, 1023753609, 1053589289, -1096844972, -1117431336) + + W(3, 1023723550, 1045506061, -1107539440, -1104725243) + + W(4, -1079943056, 1065218548, 1016880452, -1100884270) + + W(5, 1046403885, 1040733712, -1108767061, -1110535684) + W(6, -1128763803, 1037072250, -1123261308, 987308827) + + W(7, 1066015084, -1088643610, 1032406499, -1100571834) + W(8, 1040670310, 1047435788, 1056439411, -1092199491) + + W(9, -1112596197, 1043010850, -1109210140, -1103530057) + + W(10, 1038430454, -1140617522, 1024272758, -1106522969) + + W(11, 1037357283, 1046389911, 1050581867, -1093142505); + WS(-1141889920, -1150694570); + sum1 = + W(0, -1121776659, -1156363954, -1110656781, -1150990554) + W(1, 1001967189, 1009891810, 1035438770, 1043244278) + + W(2, -1108191526, 1049522698, 1024784007, 1026909213) + W(3, -1107472311, 1039611642, -1092681840, 1047918398) + + W(4, -1120026675, -1115519345, 1029335145, 1044045394) + W(5, -1089342271, 1044281805, 1041259438, 1032535224) + + W(6, 1032163820, 1031781705, -1107172434, -1142656241) + + W(7, 1035091531, -1156224954, -1122959321, -1113754566) + + W(8, -1110931796, -1118178969, -1114065315, -1114416227) + + W(9, 1035788170, 1041298854, -1106653204, 1044014582) + W(10, 1037418924, 1034927023, 1006930832, -1113010266) + + W(11, -1098776751, 1035377494, 968315538, -1127266575); + sum2 = + W(0, -1096670021, 1059540854, 1045575353, -1133958494) + W(1, 1031038260, -1118270456, -1102787949, 1039622534) + + W(2, 1037899964, 1013042349, -1110193614, 988987510) + W(3, -1099441746, 1064198145, 1030855272, 1040028330) + + W(4, -1117097690, 1032105974, 1043637876, -1086802625) + + W(5, -1101655681, 1036771998, -1125265460, -1117707023) + + W(6, 1052388948, -1085971065, 1033969343, 1032851236) + + W(7, -1103569731, -1134648980, 1042101483, -1113298762) + + W(8, -1105771070, -1120619083, 1049976190, 1021389004) + W(9, -1135324433, -1098667485, 1036465297, 981463467) + + W(10, -1101523135, -1119296945, 1042670443, -1104634628) + + W(11, -1104173140, 1030229840, 1035574893, 1035007302); + WS(1060329015, -1107100438); + sum1 = + W(0, -1139109867, 1016013223, 1052529520, -1098269535) + W(1, -1120993331, 1025836257, -1126768152, -1108878752) + + W(2, 1042283693, -1103310414, 1033078264, -1113589378) + + W(3, -1154508124, -1139382101, 1051369597, -1093610355) + W(4, 1033661966, 1029380042, 1024792785, 1046477388) + + W(5, 1051787130, -1088582834, 1015463488, -1124637616) + + W(6, -1145234942, 1046417836, 1040695538, -1090008057) + + W(7, 1035018943, -1127749832, -1121126338, 1044791733) + W(8, 1046651812, -1097854544, 1029328303, 1030094510) + + W(9, -1165337473, -1115375281, 1039020124, -1110993568) + + W(10, 980238373, -1117533511, -1128378904, 1030070044) + + W(11, 1046922680, -1105218789, -1121430349, 1016154976); + sum2 = + W(0, -1122821091, 1039646422, -1126218548, 1020520346) + W(1, 1030755819, 1003947528, 1016851522, -1156337616) + + W(2, -1123806894, -1105518185, 1031650855, 980292032) + W(3, -1106087013, -1131895450, 1035943528, 999070248) + + W(4, 1015158698, -1123933765, -1109184296, -1080883791) + + W(5, 1071805249, -1096231655, 1043321474, -1122746065) + + W(6, 1045004090, -1074990048, 1071092779, -1125969764) + + W(7, -1125796080, 1018391998, 1015529466, -1105476432) + + W(8, -1126394390, 1028426171, 1026097175, -1126690400) + W(9, 1026918939, -1104782698, 1031828962, 1030594487) + + W(10, -1140334484, 1018627618, -1115690469, 1022339242) + + W(11, -1122803425, -1127395662, -1110808154, -1149029768); + WS(1004067712, 1029538397); + sum1 = W(0, 1039684517, 1025050595, 1055781642, -1093421305) + + W(1, -1134522377, 1033902960, -1113057966, -1112884456) + + W(2, 1052279792, -1096595713, 1008080123, -1134909265) + + W(3, 1034662227, -1120136235, 1057037092, -1096171643) + W(4, 1038845557, 1000314570, 976341006, -1106127376) + + W(5, 1048957300, -1083333863, -1104830972, 1017566030) + + W(6, -1116361615, -1108532672, 1059667231, -1093970477) + + W(7, -1107387175, -1120056627, 1039793800, -1136083213) + + W(8, 1049781884, 1050503466, -1105145436, 1034175935) + + W(9, -1114202454, -1109109944, 1046150498, -1098876866) + + W(10, -1111371224, -1129568684, 1023346679, 1026195563) + + W(11, 1052752022, -1119746148, -1106375815, 1002478074); + sum2 = W(0, 996458418, 1023361218, 1020176326, -1105686968) + W(1, -1112170015, 1016740014, 1033154491, -1129026548) + + W(2, 1036155261, -1136279909, 1029838361, -1122796554) + + W(3, -1124552606, -1113149678, -1107309754, -1101182829) + + W(4, -1110765232, 1027141165, 983965669, -1096377075) + W(5, 1064108379, 1059185253, 1029555247, -1111484496) + + W(6, -1113544599, -1108470744, -1113642974, -1094171461) + + W(7, 1048386684, 1033692775, -1118592979, -1107366613) + + W(8, -1147140265, -1097506500, 995412594, -1108579478) + + W(9, 1024911403, 1040480632, -1123866147, 1038318571) + + W(10, 1045927904, -1126067866, -1120675710, 978762058) + + W(11, -1127049668, 1027330889, -1102738273, 1023291782); + WS(1047133404, -1079170418); + sum1 = W(0, 1009155056, -1122581460, 1042667713, 1030334583) + + W(1, -1108143194, 1027163842, -1116032717, -1127398915) + + W(2, 1041078541, -1113404572, -1123819760, -1122554086) + + W(3, -1135035740, -1111731450, 1051022508, -1111377849) + + W(4, -1112180142, 1023811915, -1132905800, -1098422622) + + W(5, 1065228285, -1104817319, -1102890728, -1133181796) + + W(6, 1006908676, -1136427723, 1042380775, -1091891141) + + W(7, -1111098047, 1001483047, -1123509635, 1019095722) + + W(8, 1050271906, -1103926112, 1023382911, -1116735535) + + W(9, -1129150906, 1037504657, -1107182030, -1121675713) + + W(10, -1136273159, 1015217807, -1147535977, 1012838771) + + W(11, 1023931745, -1123007382, -1123177070, -1130799202); + sum2 = W(0, -1151544588, 1017708852, -1147640076, 1008923057) + + W(1, 1011942449, 1018406594, -1155029688, -1121981859) + + W(2, -1129630940, 1024218233, 998377109, -1133221663) + + W(3, 1018065470, -1114670464, -1093073209, -1119653793) + + W(4, -1131988255, 1025316590, 1007239987, 1049278985) + + W(5, -1077783117, -1083801634, 1041518161, -1123603132) + + W(6, -1125453823, 1041079714, 1074285479, 1047560438) + + W(7, -1114079754, -1131599727, -1118997712, -1156221672) + + W(8, 1034878186, 1028846010, -1118519138, 1024940191) + W(9, 1007542759, -1120658712, 1016277382, 1036336794) + + W(10, -1121956184, -1131785133, -1133631258, -1113215345) + + W(11, 1033160981, 1022080564, 1027490516, -1130450701); + WS(1043550940, 1028476494); + sum1 = W(0, -1115183742, -1130981802, -1100607937, 1051762223) + + W(1, 1000007075, -1121847282, -1121780237, -1102302048) + + W(2, -1105099981, 1040535662, -1138550724, -1111548899) + + W(3, -1117842775, -1117998761, -1096882923, 1059510884) + + W(4, -1102853271, -1132334487, -1110838377, -1094318158) + + W(5, 1062548805, 1068023761, -1099046056, 1013492582) + W(6, 991794198, -1102037997, -1103616404, 1055882697) + + W(7, -1102330336, 1018468811, -1111061058, -1115328139) + + W(8, -1097580450, 1041348961, -1131654944, -1119471984) + + W(9, 1015980795, -1113105937, -1103768718, 1034161697) + + W(10, -1112888641, -1129682576, -1113960535, -1106426583) + + W(11, -1100373208, 1044552262, -1110284503, -1148988811); + sum2 = W(0, 1029690847, -1103820712, 998970743, 1028297515) + + W(1, -1122562191, -1133991676, -1115037203, 1043435491) + + W(2, -1096312501, 1046052979, -1115911439, 1007821180) + + W(3, -1127742454, -1100462798, 1044923803, -1099477234) + + W(4, 1030898927, -1113716427, 1036903221, 1028701483) + W(5, 1041049643, 1065575031, -1096910875, 1036974934) + + W(6, -1106227374, -1121926871, 1030485727, -1098681477) + + W(7, 1034498637, -1111652935, 1039025001, -1126383902) + + W(8, -1125321886, 1040854499, -1115709255, -1126914630) + + W(9, -1108879063, 1017141302, 1008577020, -1106467082) + + W(10, 1013474716, -1128843302, 1016988574, -1115441627) + + W(11, 1033537170, -1122382559, -1129067614, -1114648723); + WS(-1079050332, -1087647968); + sum1 = W(0, -1138480508, -1112879708, -1095708941, 1054719611) + + W(1, 1031575959, -1122888245, -1122115440, 1026750308) + + W(2, -1102419558, 1048762675, -1106236805, -1139407140) + + W(3, 1024624314, 1027382179, -1136300523, 1057423590) + + W(4, -1115045376, -1136279992, -1106223181, -1116109267) + + W(5, -1083772539, 1057166968, -1096438944, -1112079518) + + W(6, 1029330596, 1024806390, -1103579129, 1058690419) + + W(7, -1115914082, 1025354440, -1120448614, -1128868784) + + W(8, -1102450653, 1049252525, -1116410163, -1128866725) + + W(9, -1134294785, 1022822666, -1100406055, 1046884233) + + W(10, -1127656777, 1008311211, 1004544641, -1115751616) + + W(11, -1099401009, 1051036287, 1031406769, 1030150228); + sum2 = + W(0, 1018441725, -1121226957, 1025980595, -1143973174) + W(1, -1145354702, -1118705847, 1018735508, -1128043345) + + W(2, 1035475244, 1028848366, 1010180031, 1001175670) + W(3, -1106801879, 1036012925, -1093599779, -1119821797) + + W(4, -1106432772, -1114705464, 1042307735, -1105109021) + + W(5, 1063056253, 1055391108, -1103238145, 1024485983) + + W(6, -1108191465, 1026535678, -1097885523, -1118265757) + + W(7, -1114258989, -1109715589, -1153036923, 1024075219) + + W(8, 1035122552, 1031788992, -1114303163, 1025382280) + W(9, 1023515972, -1143871918, 1028845741, -1122707515) + + W(10, 1031455538, -1125366621, -1130614305, -1115606924) + + W(11, -1124060823, -1149779899, -1117711701, -1122890135); + WS(1061221431, 1002405371); + sum1 = + W(0, -1123061017, -1123480946, -1090405194, 1054235011) + W(1, 1026563665, 1000058490, -1109190069, 1033046959) + + W(2, -1104934581, 1044209302, -1108435303, -1121369187) + + W(3, -1111043962, 1050201177, -1102475777, 1057895846) + W(4, 1037490845, 1030151784, -1094345134, 1042710035) + + W(5, -1086823335, 1052530935, -1102625680, -1111380603) + + W(6, -1146354418, 1050099666, -1097235340, 1058601910) + W(7, 1033897294, 1040756961, -1113941840, 1026923762) + + W(8, -1091215734, 1050794548, -1126650490, -1126829850) + + W(9, -1145213652, 1032794752, -1094526523, 1041457976) + + W(10, -1110275296, 1032331475, 1017803349, 1029204218) + + W(11, -1089600016, 1055600534, 1021417496, 1033488871); + sum2 = + W(0, 1049937345, -1089706816, 1050626093, 1030967529) + W(1, -1115709839, 1039088353, 1058657690, -1087536311) + + W(2, -1106625833, -1105431871, 1036734782, -1109192755) + + W(3, 1071079778, -1078069733, 1020012689, 1044296437) + W(4, -1122006247, 1031394755, 1072348129, -1078647761) + + W(5, -1094966474, -1112600090, -1122763227, -1116226089) + + W(6, 1066389113, -1079714039, -1098171132, 1038432439) + + W(7, -1130276743, 1022577738, 1049150259, -1090508646) + W(8, 1050328795, 1044145959, 1024733405, 1012728894) + + W(9, -1119844839, 1022376790, -1105718319, -1126694997) + + W(10, 1021875579, -1123093509, -1113785170, -1103849791) + + W(11, 1052174137, 1034822829, -1112758982, 1013882918); + WS(-1092893294, -1089220584); + sum1 = W(0, 1008699951, -1127519944, 1053766130, -1103359682) + + W(1, -1104704049, 998920846, 1010493299, -1118558929) + W(2, 1051919929, -1096189801, 1032200864, 1028526614) + + W(3, -1124403149, -1106263811, 1056071340, -1097091836) + + W(4, -1111082358, -1149496748, 1025199074, -1113928672) + + W(5, 1052175436, -1086367179, 1015736929, 1019181009) + + W(6, -1117387089, -1098638288, 1062605050, -1096005139) + + W(7, -1120379023, -1115659002, 1015178277, 1041393441) + + W(8, 1047707912, -1102452900, -1123219920, 1027645503) + + W(9, -1125915034, -1112838969, 1053546873, -1101730612) + + W(10, 1019333603, -1131717807, -1130896954, 1035998582) + + W(11, 1049473086, -1095432549, -1136688761, -1136451331); + sum2 = W(0, -1143112647, 1034689803, -1115356591, -1121270233) + + W(1, 1017968792, 1021729984, 992704062, -1109431235) + W(2, 1039097516, 1048248946, -1124530814, 1005857263) + + W(3, -1124855502, 1033324328, -1073085951, -1106299172) + + W(4, 1023517229, -1126326370, 1038449046, -1098090520) + + W(5, -1063972596, -1091325299, -1113483013, 1015994888) + + W(6, -1105916124, 1038361982, 1062319475, -1090314642) + + W(7, -1141826407, 1013997779, 1035072543, 1040866773) + W(8, 1085456662, 1056006422, 1034240002, -1134900259) + + W(9, -1114904409, 1032024575, 1062308854, 1043935118) + + W(10, -1124783482, -1135264403, 1019251456, -1164332444) + + W(11, -1102205120, -1107459099, 1032935811, 1001043447); + WS(-1104754908, -1106735671); + sum1 = + W(0, 1030490103, -1117923535, -1088908700, 1033028505) + W(1, 1045999740, -1120248133, 1032595044, 1002237924) + + W(2, -1094516030, 1029213684, -1144798556, 1015682178) + + W(3, -1128810020, -1122216143, -1092804364, 1050263192) + + W(4, 1022746927, -1129595020, 1032319302, -1120462507) + + W(5, -1102093923, 1063837581, -1106958295, 1030078356) + W(6, 1019231224, 1025980110, -1088198897, 1058220138) + + W(7, -1107413149, 1021953856, 1025969502, -1123369072) + W(8, -1096286026, 1054477272, 995801516, -1153493672) + + W(9, 1016077744, 1034830735, -1102086589, 1045907999) + + W(10, -1115665070, 1019926661, 1024124136, -1104242198) + + W(11, -1103741447, 1049334234, 1030012489, 1022398552); + sum2 = W(0, -1116247065, 1035833889, -1087068607, -1107323417) + + W(1, -1109274147, 1021616338, 1015145430, -1107082251) + + W(2, -1079268655, -1102842161, 1016322550, -1133554677) + + W(3, 986051432, -1114579693, 1022672030, 1049680335) + W(4, -1111459111, 1010843781, -1117862601, 1043695503) + + W(5, 1067901888, 1032100715, 1026073955, 1017398706) + W(6, -1114715779, 1039299404, 1059152338, 1040660749) + + W(7, 992278036, 998891466, 1034116930, -1114493167) + W(8, 1008422125, -1115195239, -1173738319, -1139208557) + + W(9, -1144744186, -1120378069, 1020328850, 1018816198) + + W(10, 1033325633, -1138764845, 1026244953, 1019204162) + + W(11, -1111927269, 1021018582, -1123657301, 1016479082); + WS(-1092340590, -1082645376); + sum1 = + W(0, 1018128252, 1033191659, 1040446513, -1112709788) + W(1, 1031456758, -1113441282, 1018184990, -1106475329) + + W(2, 1036685563, -1105689791, 1040219053, -1127103440) + + W(3, 1028162329, -1101261661, 1061713267, -1086509296) + + W(4, 1042438664, -1123459213, -1117677972, -1102532636) + + W(5, 1042646693, 1038532783, -1111899057, 1018898486) + W(6, -1115495745, 1031605429, -1089327101, 1064314083) + + W(7, -1100451664, 1033438227, -1115409474, 1010011868) + + W(8, -1106573237, 1046976369, -1105951352, 1029058545) + + W(9, 1015642046, -1127814216, -1105713919, 1039016129) + + W(10, 1010066702, 1013299750, -1118723567, -1112449263) + + W(11, -1112423953, 1030956248, 1022848097, -1130132966); + sum2 = W(0, -1107775669, 1040551451, 1027302533, 1033412933) + + W(1, -1135101756, -1131050762, -1121347079, 1042459857) + + W(2, -1122087227, -1113656153, 1017950151, -1130160250) + + W(3, -1118975841, 1031184422, 1016161914, 1035641742) + + W(4, -1109563699, 1017438602, -1121125435, -1152381105) + W(5, 989383522, -1114487413, 1046120628, 987844130) + + W(6, -1123325897, -1105935240, 1045806619, 1041762467) + + W(7, -1114574520, -1117833439, 1012434826, -1115286413) + + W(8, -1125562074, 1029416951, 1035110196, -1173771715) + + W(9, -1129695564, -1124425722, -1111270899, 1028252326) + + W(10, 998528980, -1121778743, 1006663086, -1111096901) + + W(11, -1114174571, -1127668218, 997585257, -1126450718); + WS(1052284526, 1042464092); + sum1 = + W(0, 1010280196, 999914700, 1057530557, -1098111272) + W(1, -1101387838, 1027464615, -1136974408, -1110845507) + + W(2, 1046554367, -1100087104, -1110795931, -1128121686) + + W(3, 1026365340, -1109035161, 1060667985, -1090659644) + + W(4, -1102616600, 1001926272, 1020617774, -1110448219) + + W(5, 1063323154, -1094987596, -1105684762, 1025859742) + + W(6, 1023947782, -1110075532, 1043924417, -1095988958) + + W(7, -1124311688, 1017827156, -1127515216, 1038195741) + W(8, 1050039723, -1097163609, 1018127786, 1034844538) + + W(9, -1129271461, -1114945332, 1042422080, -1105136802) + + W(10, -1124435598, -1130637466, 1007810505, 1019473092) + + W(11, 1052688005, -1095692091, -1114597706, 1027019478); + sum2 = + W(0, 1025355846, -1119913429, 1034044275, -1140464466) + W(1, -1091074448, 1041357090, -1124685033, 1010871130) + + W(2, 1036346050, 1032812135, -1086522508, 1037018199) + W(3, 1029205900, 1016143206, -1128364593, 1071964190) + + W(4, -1074918519, 1033277276, 1011755468, 1033794365) + + W(5, -1133342818, 1067600837, -1089329657, -1111447139) + + W(6, -1113277680, -1140030878, 1020927778, 1039362648) + + W(7, -1106053895, -1118126225, 1035269289, 1020099653) + W(8, 1035477071, 1033733070, -1108456989, 1032003060) + + W(9, -1122502097, 991986873, 1011575434, 1008065452) + W(10, -1146528221, -1117647411, 1013457876, 1035095720) + + W(11, 1029499077, 994468553, -1106105254, 1009180302); + WS(-1085785015, -1080175544); + sum1 = W(0, 1035373246, -1103151077, 1058377668, -1091874455) + + W(1, 1025579124, -1111695537, -1125806618, 1045216768) + + W(2, 1050584367, -1094766367, 1043611260, 1043525714) + + W(3, -1118822551, -1094452856, 1054861331, -1107649205) + + W(4, -1106387815, -1102079139, 1044894533, 1039875576) + + W(5, 1046782533, -1090132878, 1049375324, -1105407270) + + W(6, -1102223008, -1151103277, 1057861265, -1090472533) + + W(7, -1115030560, -1144672552, 1041222777, -1094574331) + + W(8, 1051791466, -1111571937, -1137989279, -1102123612) + + W(9, 1023709757, 1048044105, 1043274206, -1098390656) + + W(10, 1049548184, 1041305154, -1122183565, -1103877399) + + W(11, 1058273114, -1092114299, -1106419572, -1116205169); + sum2 = + W(0, 998449416, -1118465870, -1124628345, -1148690252) + W(1, 1037770975, -1105952484, -1121990028, 1035743758) + + W(2, 1040306990, 1001004257, -1132010249, 1037805583) + W(3, 1039055377, -1103571680, -1098483719, 1060096717) + + W(4, -1108861100, -1087084008, -1115523342, 1049274722) + W(5, 964150684, 1077176219, 1047972499, -1070507600) + + W(6, -1132069983, -1106547389, 1024621733, 1060014437) + + W(7, -1098623231, -1092504349, 1032292249, -1126693677) + + W(8, -1103504000, 1016774454, 1034159573, -1105946989) + + W(9, -1128335569, 1034009109, 1043582890, -1107960970) + W(10, 1020716885, 1041667680, 998312734, -1116877357) + + W(11, -1116867442, -1139511962, 1042264637, -1102914489); + WS(1041580764, 1025947967); + sum1 = + W(0, 1032565172, 1034662848, 1053611982, -1093835868) + W(1, -1131110976, 1036244351, -1116825273, -1104706235) + + W(2, 1050747028, -1092905173, -1131069497, -1119290642) + + W(3, 1019951750, -1115255997, 1050206724, -1095669879) + W(4, 1036897764, 1017394926, 1022134396, -1107214727) + + W(5, 1063007652, -1096926641, -1105182467, 1029927621) + + W(6, -1111515158, -1098820316, 1060757006, -1089733625) + + W(7, -1098715869, -1106823604, 1034025399, 1032808736) + W(8, 1052025888, 1049974094, -1104277767, 1037476046) + + W(9, -1110133221, -1102823188, 1043880105, -1099659531) + + W(10, -1106501955, -1114565306, -1159719714, 1021100817) + + W(11, 1052817434, -1119725058, -1106408671, -1122404893); + sum2 = W(0, -1132209742, 1031078182, -1131885988, -1109418986) + W(1, 969823813, -1135041624, 986836209, 1029631070) + + W(2, 1025515900, -1129378830, 1027058278, 1012367442) + + W(3, -1121945777, -1119773491, 1029199828, -1113141662) + + W(4, 1030656895, -1116640321, -1170553346, 1020231728) + + W(5, -1120148774, 1055904551, -1099466937, 1014349374) + + W(6, -1138085578, 1023900693, -1122079840, 1057958803) + + W(7, -1089769002, 1019607807, -1127752664, -1133642976) + + W(8, 1019687015, 1044815557, -1096970616, 1019685270) + W(9, 1024317954, 1016594015, 1019770946, -1115676013) + + W(10, -1105180704, 1040370156, -1170071298, -1123929889) + + W(11, -1134784812, 1040873658, -1104818759, 1020720783); + WS(-1101021916, -1101341893); + sum1 = + W(0, 1026213188, -1104193847, -1099125811, 1055643976) + W(1, 1025577212, -1110018309, -1128959914, 1029994953) + + W(2, -1096018912, 1049220736, -1111820508, 1025253614) + + W(3, 1026395551, -1146733010, -1093634574, 1055313981) + + W(4, -1119144868, -1114303945, -1114900877, 1036038140) + + W(5, -1086773562, 1052012604, -1127551698, -1123712161) + + W(6, -1152047246, 1031817213, -1090412335, 1060627153) + + W(7, -1111302883, 1028280877, 1012420650, -1118727997) + + W(8, -1107844781, 1051291321, 1025725832, -1119269431) + W(9, 1006057769, 1021561044, -1104575744, 1049980081) + + W(10, -1112108341, 1031833384, 1026345301, -1106810537) + + W(11, -1104632714, 1047342967, 1003961523, 999050183); + sum2 = W(0, -1131988010, -1111316079, 1041465340, 1024389323) + + W(1, -1121573843, -1149117460, 1023490159, 1007442131) + + W(2, -1100903601, 1055734435, -1125698351, -1131908940) + + W(3, -1126411872, -1122871173, 1038743702, 1080207498) + + W(4, 1043717463, -1114017647, -1164111651, 1046145376) + + W(5, -1093182708, 1078445253, 1049726225, -1119425741) + + W(6, -1121501747, 1040330964, 1049859497, -1068399047) + + W(7, -1122596352, 1019240934, 1031886393, -1109560063) + + W(8, -1098635398, -1066503158, -1100760677, 1009702615) + + W(9, -1117332099, 1016982738, 1035352171, -1109489195) + + W(10, -1113462329, 1032687935, 1035364539, 1005935896) + + W(11, -1113093917, 1012210427, 1035980752, -1141270643); + WS(1046490332, 1041425064); + sum1 = W(0, 1024529499, -1116828595, -1093042204, 1043650965) + W(1, 1015429414, 1019425762, 1033054902, 1002731779) + + W(2, -1098109922, 1045029126, -1106337284, 995804238) + W(3, 1031945755, 1040980232, -1092614541, 1058500980) + + W(4, 1033182334, 1032419541, -1146773895, -1160896989) + + W(5, -1086741475, 1055608303, -1109695552, -1179835378) + + W(6, 1029605664, 1052617834, -1091564618, 1058257970) + W(7, -1174108025, 1044059953, 1004520717, 1015420456) + + W(8, -1090483479, 1019101890, 1029823567, -1127672093) + + W(9, 1031502108, 1044142352, -1094325635, 1041689704) + + W(10, -1123497953, 1032927635, 1024614604, 1032157630) + + W(11, -1086427940, 1049761551, 1041880275, 1040104968); + sum2 = W(0, -1122431319, 1026151548, -1122010793, 1049905644) + + W(1, -1102178930, -1124048569, -1114479591, -1106953243) + + W(2, 999999917, -1108260971, -1125976243, -1126306455) + W(3, 1024826864, 1048064546, 992235770, 1050719290) + + W(4, -1123774104, 1025075060, -1100833312, -1097347844) + + W(5, 1040448763, 1045454882, -1096948195, -1105554812) + W(6, 1043724086, 1037967160, 1044292364, 1049686405) + + W(7, 1038302972, 1034346164, -1109354990, -1109085075) + + W(8, 1032432738, -1111087807, -1108325988, -1114960687) + + W(9, -1130104331, -1110876131, -1143420941, -1098496523) + + W(10, -1118752296, 994535386, 1015245663, 1007980535) + + W(11, 1040568157, 1043523736, -1113347688, 1032072922); + WS(-1096079726, -1086813702); + sum1 = W(0, -1135457777, -1105268453, 1045566625, -1104980990) + + W(1, -1105625101, -1128467237, 1020632987, -1134572551) + + W(2, 1038076635, -1125909619, -1145052038, 1035122115) + + W(3, -1136898909, -1100910973, -1141635906, -1094783105) + + W(4, 1032999502, -1125440663, 1041289173, -1166801940) + W(5, 1058638213, 1057673490, 1030475041, 1035865453) + + W(6, 1012368645, 1038525034, -1104077708, -1100424110) + + W(7, -1113674176, 988204202, 1002632984, -1102504381) + + W(8, 1033251913, -1111352769, -1113040784, -1119749552) + + W(9, 1024629150, 1029902179, 1009307365, 1012100357) + W(10, 992583714, 1027550920, -1127204699, -1110256943) + + W(11, 1022942147, -1133590617, -1114381639, -1124393059); + sum2 = W(0, 1009177065, -1117003048, 1042183695, -1105766164) + + W(1, -1112946812, 1019480223, -1120740066, 1013092463) + + W(2, 1025589372, 1042721142, -1113055730, 1026018809) + + W(3, 1031366584, -1102800003, -1111208570, -1149080533) + + W(4, -1103263899, 1020058227, 1041576942, -1090661723) + W(5, 1068855162, 1040487387, 1030911277, 1007104581) + + W(6, -1100270767, 1059100650, -1085550755, 1023619821) + + W(7, -1103276630, 1029611268, 1045439690, -1097382737) + + W(8, 1044086134, -1099098175, -1114239046, -1162581429) + + W(9, -1125959183, 1022453518, 1032267243, -1126080241) + + W(10, -1107084863, 1037226018, 1033773937, -1106788899) + + W(11, 1025170288, -1106320787, 1019625936, -1116554192); + WS(1059841719, 1028884484); + sum1 = W(0, -1113543486, -1108411818, -1112074625, 1058342439) + + W(1, -1101044205, -1143395087, -1112448166, -1111506273) + + W(2, 1034453786, 1041783662, -1112657624, -1122260358) + + W(3, -1123514455, -1104706915, 1033474668, 1050338440) + + W(4, -1103529079, -1124590486, 1014814024, -1096756366) + + W(5, 1052483434, 1043430567, -1099567912, -1132902244) + + W(6, -1116573889, -1106354949, 1048710884, -1116369607) + + W(7, -1110811775, -1118491109, -1120616579, -1107115957) + + W(8, 1049505778, -1117001986, -1119641312, -1118809775) + + W(9, -1132301434, -1117349474, 1050151687, 1011608836) + + W(10, -1133908615, 996976763, -1127146586, -1105758103) + + W(11, 1056594572, -1106214629, -1113349351, -1114523384); + sum2 = W(0, -1131846847, -1142019487, -1119272484, 1043688817) + + W(1, -1110979463, -1128104285, 999597315, 973346488) + + W(2, -1113005256, -1112114230, -1112619479, 1003422803) + + W(3, 997751878, -1127636492, -1113445057, 1056371527) + + W(4, -1097247478, 1021279303, -1114921220, -1114646695) + + W(5, 1055277760, 1041350431, 1019660165, -1127196333) + W(6, 1031101790, -1098897499, 1032426103, 1038923617) + + W(7, -1105379211, -1138766624, -1115596944, 1027087956) + + W(8, 1036539833, -1123786644, 1026606310, -1129408714) + + W(9, 1024706196, -1111832500, -1114392819, -1130636099) + + W(10, 1012894502, -1130181692, -1120544321, 1029100436) + + W(11, 1010252474, -1123180191, -1125326592, -1164469676); + WS(-1092619630, -1089382730); + sum1 = + W(0, -1111364766, 1023648980, 1039669985, -1112475547) + W(1, -1102256285, 1036516737, 1043539187, 1020312451) + + W(2, -1096148415, 1048019422, 1027548272, -1110790259) + W(3, 984781533, -1115220152, 1055391282, -1085560118) + + W(4, 1048070471, 1016263062, 1030068626, -1101226428) + W(5, 1026945555, 1059189430, -1108016176, -1120756633) + + W(6, 1026274177, 1038621209, -1081505706, 1059392734) + W(7, 1040580240, 1007770772, -1111039089, -1105671421) + + W(8, 1045179252, -1105049488, 1037378816, 1034683832) + W(9, 1024515101, -1101912057, -1132720100, 1041558112) + + W(10, 1031071010, -1109624324, -1120853581, 1015856825) + + W(11, 1027675681, -1133761880, 1032748839, 1021933410); + sum2 = W(0, 1021752691, -1111677325, -1108952074, 1033183913) + + W(1, 1044091848, -1113755692, -1117827136, 1031932470) + + W(2, -1110048197, 1038437939, -1103930854, 1027358915) + + W(3, -1125983252, -1130612142, 1045228241, 1050756049) + + W(4, 1008734718, -1104619422, 990310070, -1104195843) + W(5, 1051763659, 1045197632, -1105931725, 1032921649) + + W(6, -1109957501, -1168184730, 1028013889, -1115591069) + + W(7, -1149204891, -1117367196, 1040724727, -1105272655) + + W(8, -1136328910, -1123843648, 1020655719, -1140627498) + + W(9, -1109878975, 1035492359, -1114456338, -1120329563) + + W(10, -1120613434, 1033876328, 1031887118, -1103738768) + + W(11, -1114723198, 1035246991, -1121689275, -1121880867); + WS(1051433070, 1041054969); + sum1 = W(0, 1027474774, 1025557871, -1105663671, 1035613609) + W(1, 1041537220, 1027665948, 995089009, 1019369911) + + W(2, 1032468488, 1037418314, 1036952965, -1122283488) + W(3, 1034863641, 1049084179, -1096978722, 1039456478) + + W(4, 1041156238, 1041167849, -1109189270, 1046403907) + + W(5, -1082587707, -1080639932, 1049567823, -1112387034) + + W(6, 1039503310, 1044512531, -1102025153, 1026454114) + W(7, 1040087661, 1039611744, -1126811322, 1035909664) + + W(8, -1107200379, 1042549431, 1039105725, -1137459494) + + W(9, 1033371404, 1033234174, -1104819461, 1042229543) + + W(10, -1131034584, 1031071714, 1010666397, 1028564471) + + W(11, -1106755535, 1034360184, 1031577769, 1033297350); + sum2 = + W(0, 1039097792, -1103860174, -1106228186, 1004583985) + W(1, 1031892389, -1133334119, -1128793460, -1106733661) + + W(2, 1050938198, -1097187313, 1046933969, -1107245750) + + W(3, -1140010118, 1043000544, -1100268114, 1021007203) + + W(4, -1107049139, 1039577114, 1022871695, -1106671798) + W(5, 1039252188, 1053089515, 1049075999, -1104825734) + + W(6, -1138263433, 1046664654, -1099105674, 1048715277) + + W(7, -1103930146, 1030321621, -1112923836, -1123142296) + + W(8, -1121884665, -1114597930, 1022883749, -1117968576) + + W(9, 1026478628, 1037719970, -1100173198, 1047464456) + + W(10, -1098080196, 1036807670, -1110368657, 1042580191) + + W(11, 1034926170, -1106315502, -1108178582, 1018138445); + WS(-1097236334, -1078251511); + sum1 = + W(0, -1147379729, -1107131303, -1102832418, 1058323440) + W(1, 1023750586, -1116361496, 981774949, 1007351583) + + W(2, -1125527926, 1053289720, -1121375351, -1124892056) + + W(3, -1145120489, 1033603653, -1095625930, 1056660465) + + W(4, 1026746467, -1128008098, -1112510828, 1043141838) + + W(5, -1082084799, 1032726417, 1033795604, -1117973855) + W(6, 975327689, 1029606457, -1090577043, 1058345481) + + W(7, 1025947909, -1148460889, 1008364580, 938727969) + W(8, -1093823475, 1053416161, -1131151340, -1125502955) + + W(9, -1140670990, 1020032977, -1098508888, 1046390514) + + W(10, -1119905544, 999577711, 1022969831, -1108966769) + + W(11, -1096801949, 1049766564, 1037296218, 1010324189); + sum2 = + W(0, 1024894779, -1102276353, -1120020336, -1081787381) + W(1, 1039662448, 1023553607, 1020144998, 1025017931) + + W(2, -1096890621, -1080908066, 992888305, -1118655405) + + W(3, -1118791847, -1107221556, 1050182775, -1102161510) + + W(4, -1130889346, -1120230550, 1007750652, 1046635995) + W(5, 1039741552, 1072604155, 1039835396, -1126400334) + + W(6, -1137666916, -1132306598, 1043589185, 1055213170) + W(7, -1110509653, 1020541198, 1023698735, 1023586615) + + W(8, -1108361045, -1115556916, -1122324709, 1020891302) + + W(9, 1009542396, 1030880679, -1114600836, 1037616572) + + W(10, 1000980713, -1119304668, -1129126018, 1015842286) + + W(11, 1024872627, -1105259074, -1124270218, 1033066840); + WS(-1095840110, 1065907853); + sum1 = + W(0, -1149009607, -1123781617, 1057715094, -1102734645) + W(1, -1101560548, 1034276991, 1029310442, -1130903741) + + W(2, 1048478500, 1041326469, 1036129479, 1021921030) + W(3, -1122029108, -1115466131, 1058442742, -1088947208) + + W(4, -1107277134, 1036949012, -1136794300, 964920945) + W(5, 1045634572, -1083008792, -1109559039, 1024401462) + + W(6, -1118254622, -1115351608, 1052165513, -1096684061) + + W(7, 1036543086, -1114280609, -1132243967, 1043406509) + W(8, 1053323726, -1089856222, 1024712701, 1035777670) + + W(9, -1126823604, -1116119295, 1042519141, -1127763004) + + W(10, 1009014011, -1132072137, 1010554921, 1040435870) + + W(11, 1051535704, -1095672430, -1114598742, 1024787534); + sum2 = + W(0, -1122765153, -1112086475, 1040388299, 966593378) + W(1, -1114107386, 975247631, -1119761608, -1118318168) + + W(2, 1023061820, -1102263327, -1113965021, -1137846028) + + W(3, -1115823403, -1121250504, -1128744886, -1110151426) + + W(4, 1045334069, -1111548146, -1123357438, -1119202357) + + W(5, 1057284387, 1059190541, -1118800384, 1023792011) + + W(6, -1105805209, 1024515185, -1103324019, -1106793279) + + W(7, -1164047122, -1112106446, 1031734555, -1130234658) + W(8, 1037222780, 1050053336, 1025627836, 1020007628) + + W(9, -1111986136, -1134784044, -1110952019, -1107073751) + + W(10, -1114668630, -1125560981, -1126486235, 1026675759) + + W(11, -1117543075, 1021583022, -1124432766, -1131408661); + WS(1024351088, -1094038469); + sum1 = W(0, 1016930687, 1038061004, 1052546457, -1086527812) + W(1, 1040515838, 1006605995, 1024919184, -1118029957) + + W(2, 1050276351, -1103330015, 1027568116, 1004978763) + W(3, 988693323, 1005748798, 1052906127, -1085615881) + + W(4, 1040353352, 1030413326, 1038267403, 1038925818) + W(5, 1062157201, -1097782940, -1124671689, 1031140124) + + W(6, 1001354275, 1032752795, 1055274272, -1088621706) + + W(7, -1113419105, 1009586849, -1123840710, 1034287148) + + W(8, 1050137483, -1090365828, 1017331974, 1023985459) + + W(9, -1125725377, -1110688307, 1036684158, -1108159426) + + W(10, 1018660298, -1129057942, -1120680465, 1036629872) + + W(11, -1112935826, -1098987606, 1039926041, -1120649652); + sum2 = + W(0, 1018662896, 1033363591, -1101021011, -1107389045) + W(1, 1013978472, -1120396150, -1131745232, 1031409358) + + W(2, -1109130821, 1050735765, -1122086781, 1024022584) + + W(3, -1136728576, 1036576685, -1097527529, 1043333749) + W(4, 1029525730, 1015209616, -1110788716, 1060006588) + + W(5, 1058553385, -1120731420, -1118628277, 1030135008) + + W(6, -1123530251, 1052968790, 1023922010, -1100470754) + + W(7, -1117232951, 1018906596, -1101470606, -1102632350) + + W(8, 1057288665, -1097217661, -1112638769, -1121049341) + + W(9, -1110519714, -1105224546, -1111209476, 1038218737) + + W(10, -1121310567, 1024891644, -1125138150, -1083015355) + + W(11, 1034932287, 1047940150, 1023530026, -1113292326); + WS(-1086599863, 1058406314); + sum1 = W(0, -1165120005, 1027409257, 1060099497, -1093079745) + + W(1, -1114693988, 1019186220, -1133002117, -1121445206) + + W(2, 1049800473, -1099695891, 1033258882, 1025328858) + + W(3, -1129945046, -1106007410, 1059141168, -1091476505) + + W(4, -1129815521, -1128567742, 1024249318, -1105883025) + + W(5, 1051534797, -1093891126, -1124179562, 1017248474) + + W(6, -1119162837, -1105279538, 1058010671, -1094225343) + + W(7, -1106482085, -1123837177, 1026085015, -1127421331) + + W(8, 1051659429, -1106347590, -1112036922, 1031899202) + + W(9, -1121003281, -1109094875, 1047889907, -1098091099) + + W(10, 1030868870, -1123224348, -1127955907, 1024561201) + + W(11, 1050835423, 999784785, -1097732400, -1141903913); + sum2 = W(0, -1116781913, 1031543998, 1082393585, 1050004941) + W(1, 1025243259, -1123250937, 1016621488, 1029140379) + + W(2, 1078957479, 1028477447, 1010603263, -1125975170) + + W(3, -1133566515, 1044191480, -1080327233, -1104837212) + + W(4, -1125439138, 1007793087, -1124282558, -1105795809) + + W(5, -1065075267, -1086493521, -1117319975, 1028403247) + + W(6, 1034216133, 1017978886, -1076777978, -1109285108) + + W(7, 988101688, -1125200922, -1128560494, -1175609568) + W(8, 1030943022, 1045155438, 1024551608, 1020903576) + + W(9, 991940908, -1120640387, -1110243052, 1033432207) + + W(10, -1129511338, -1119399555, 1017899660, 1019924506) + + W(11, -1113970478, -1130476854, 1020472787, 1028374268); + WS(-1102531292, -1104168529); + sum1 = + W(0, -1149508566, 1032023755, 1058049088, -1091020399) + W(1, -1123106485, 1018386839, -1156103398, -1123937923) + + W(2, 1044530702, -1093487559, 1028416434, 1022122127) + W(3, 1005530819, -1114986325, 1057825341, -1088576155) + + W(4, -1124442300, 1027828556, 1019304359, -1105594348) + + W(5, 1061967565, -1113700815, -1132463754, 1023613146) + + W(6, -1111288609, -1100927253, 1057378026, -1096522221) + + W(7, -1107169312, -1118265958, -1152861762, 1042848102) + + W(8, 1043852869, -1100187457, 1030031137, 1031835352) + + W(9, -1123770879, -1131000647, 1032195229, -1097716728) + + W(10, -1127676038, -1124424850, 992160490, 1048571064) + + W(11, 1021567685, -1095320739, 1015048992, -1151822630); + sum2 = + W(0, -1112466451, 1034684305, -1101371049, -1100658772) + W(1, 1043071493, -1119405084, -1130983341, 1035765483) + + W(2, -1113050760, -1100149859, 1039321546, -1140521940) + + W(3, 1032826781, -1117553496, -1096382267, 1042208912) + + W(4, -1131759707, -1127348451, -1114093653, -1101908611) + + W(5, 1063677104, 1062452463, -1106482074, 1028341682) + W(6, 1040752192, -1105770401, -1132405995, 1047214162) + + W(7, -1105645940, 1022142418, -1154197073, -1106309648) + + W(8, -1114901667, -1095028755, 1034888818, -1118255322) + W(9, 1026229169, 984953298, 1023704972, -1109270652) + + W(10, 1035103126, -1118485150, 1032051747, -1113409477) + + W(11, -1104964374, -1107539022, 1008251057, 1025943565); + WS(-1107552696, -1110437397); + sum1 = W(0, 994440345, -1114854865, 1038159592, -1169396004) + + W(1, -1112745281, 1003079356, -1123893979, -1115498472) + + W(2, -1143015444, -1103811554, -1176837895, -1126823399) + + W(3, -1123659045, -1107252796, 1048547885, -1102945115) + + W(4, -1113210869, -1127252913, 1033249360, -1098484960) + + W(5, 1066459081, 1054059005, -1092775086, 1036761683) + + W(6, -1115012423, -1105215024, 1050116824, -1110458371) + + W(7, -1107508006, -1117476787, 1025628848, -1113844817) + + W(8, -1114931568, 1027966800, -1115481499, -1139958260) + + W(9, -1120012907, 1015022176, -1139703834, -1137430074) + + W(10, -1123286831, -1143819946, -1138236651, -1112267573) + + W(11, 1030992175, 1038350450, -1114208046, -1119805695); + sum2 = + W(0, -1128900045, 1032112026, -1122103841, 1020429393) + W(1, -1123818335, 1025087925, -1134566747, -1121844433) + + W(2, 1023900033, 1031965476, -1168200234, -1116320886) + W(3, 1023101249, 1024457633, -1130431017, 1010339139) + + W(4, -1111989275, 1037399444, -1119409466, 1041370918) + + W(5, 1071310745, -1073631838, -1111141033, -1131868341) + + W(6, 1014344875, -1138667107, 1015570157, 1024644617) + W(7, -1128248465, 1027301561, -1124604689, 1023011873) + + W(8, 1025187637, 1038341472, 1032459330, -1118206868) + + W(9, -1135550339, -1123574415, 1029193653, -1117036677) + + W(10, 999769253, -1124415769, 1011013307, 1018662193) + + W(11, -1118001398, 1034637136, 1020580065, -1136885571); + WS(-1106869980, -1095812171); + sum1 = W(0, 998635262, 1030538232, -1104276842, 1021368885) + W(1, 1037657452, 1012233735, -1132768195, 1032306163) + + W(2, 1027943897, 1042581143, -1130589992, -1127936403) + + W(3, 1031165051, 1040655940, -1094369165, 1043839724) + W(4, 1045125152, 1030674671, -1109012795, 1049530762) + + W(5, -1079963696, -1098205560, 1052736873, -1106402911) + + W(6, 1031921458, 1035845131, -1094529094, 1041010327) + W(7, 1041646697, 1035635524, 959153786, 1029124089) + + W(8, -1138415831, 1043286153, 1033263566, -1117746707) + + W(9, 1015400066, 1034791747, -1107836910, 1040781721) + + W(10, -1127518347, 1026900011, 1020404450, -1126776961) + + W(11, -1127281161, 1032119517, 1032079673, 1007765715); + sum2 = + W(0, -1137391706, 1016897725, -1122041158, 1032188407) + W(1, 959684203, 1004755699, -1152109415, -1123613558) + + W(2, -1117579022, 1034093919, -1123954990, -1142267571) + + W(3, 1015680093, 1031100358, 1032233585, -1107212716) + W(4, -1144448755, 1020208893, -1108578627, 1031185182) + + W(5, 1068869480, -1079207220, -1112638895, -1131161181) + + W(6, 1012334874, 1018767069, 1031129926, -1118301998) + W(7, -1145613555, 1014639674, -1126811133, 1016912269) + + W(8, -1135215962, 1010098650, 1002322227, 1010341946) + + W(9, -1146463859, -1123894542, -1172726683, 1010207290) + + W(10, -1163026381, -1128211133, 1013671002, -1129388765) + + W(11, 1025914590, 998321459, -1185201771, 1014227898); + WS(-1106551004, 1058945856); + sum1 = W(0, -1136733265, 1027999917, 1036329486, 1021052191) + W(1, -1112842790, 996925861, 1029534949, -1110759309) + + W(2, 1039804839, -1090459221, 1042972690, -1126301053) + + W(3, 1040114552, -1101184472, 1055915689, -1088507490) + + W(4, -1116549941, 1035692146, 1023973738, -1136820001) + + W(5, 1045009467, -1102642413, 1048917175, -1116120689) + + W(6, 1008178533, -1120539979, -1099722578, 1057115504) + + W(7, -1103385510, 1032281663, -1121731675, 1050535890) + + W(8, -1092228133, -1141944531, 1042662182, 1025401864) + + W(9, -1122738710, 1036280349, -1110742697, 1036514925) + + W(10, -1123309658, -1122836256, 998670125, -1112884987) + + W(11, -1133144549, 1024140334, 1040183891, 1009223461); + sum2 = + W(0, -1166199047, 1029866216, -1143586281, 1016771040) + W(1, -1123676489, -1174784654, 1008667864, -1111274931) + + W(2, 1037662319, 1050961303, 1039205717, -1119230230) + W(3, -1137339492, 1026227362, 1052978055, 1073672156) + + W(4, 1048063806, -1117826448, -1142224481, -1119220040) + + W(5, -1079795268, -1075413433, 1028203090, 1028246046) + + W(6, -1152330850, 1031356244, -1109068899, 1029037052) + + W(7, -1113725553, -1160612324, 1020018908, -1116678638) + + W(8, 1041694498, 1026323256, -1115208722, 1024440460) + W(9, -1116618549, 1026327024, -1122243464, 1021575344) + + W(10, 1002430881, -1126523302, 1015962596, -1189397276) + + W(11, 1027749752, -1119692611, -1114291930, 1031666290); + WS(1045979868, -1116118129); + sum1 = W(0, 1019079208, -1115974509, 1054347779, -1096855346) + + W(1, -1118610636, -1120438410, -1124899402, -1114857165) + + W(2, 1046800787, -1098629066, 1029706377, 1017939836) + + W(3, -1142300538, -1110287856, 1053005207, -1093283235) + + W(4, 1028644213, -1131155654, 1011594153, -1099509298) + + W(5, 1063937495, -1097005655, 1015974258, -1116499825) + + W(6, -1134190949, -1112842949, 1058166871, -1090046872) + + W(7, 1040979430, -1119992573, -1132197046, -1107483943) + + W(8, 1048177899, -1104885511, -1129150352, -1120133576) + + W(9, -1116332853, -1111331677, 1047873428, -1102896822) + + W(10, 1028514810, -1118080608, -1135144470, 1018268010) + + W(11, 1056893199, -1096686406, -1118433447, -1112688517); + sum2 = + W(0, -1132220574, 1034058342, 1008450820, -1132397996) + W(1, -1105536207, 1025437040, -1115410889, -1116250603) + + W(2, -1126553639, -1097158033, -1108668742, -1118211898) + + W(3, 1017347763, 1029372102, -1105866140, 1060185134) + + W(4, -1106418349, 1034066570, -1123931551, -1101916549) + + W(5, 1053545154, 1060913993, -1104852271, -1102379864) + W(6, 1027749497, -1104316591, 1054419707, 1033775493) + + W(7, -1106993048, -1133416673, -1111299822, 1035393713) + + W(8, -1102688086, -1115290197, -1102129692, -1209279719) + + W(9, 1025854529, -1107314572, 1036131430, -1106841697) + + W(10, -1149271050, -1133826631, -1123916419, 1033668899) + + W(11, -1144517652, 990325833, -1114889391, -1129688184); + WS(1058774455, 1069380246); + sum1 = W(0, 1032626516, -1114780225, 1045065548, -1111975378) + + W(1, 1025328225, -1121960111, -1119757626, 1038823639) + + W(2, 1040221656, -1115053555, 1020222778, 1037972921) + W(3, 1033017026, -1098306133, 1021036585, 1050529784) + + W(4, -1115391173, -1112361500, 1015522813, 1034533838) + + W(5, -1082356272, -1095104409, 1045288819, -1139302935) + + W(6, -1128327131, -1103839696, 1057244812, -1133328720) + + W(7, -1116298540, 1018289699, 1036888920, -1112718001) + + W(8, 1032300191, -1111385431, 1042918995, -1110693582) + + W(9, 1032769109, -1126986782, 1043416793, -1124233691) + + W(10, 1029500622, 1027375220, -1148588495, 1012070438) + + W(11, 1045596067, -1105784401, 1037659662, -1114680879); + sum2 = + W(0, -1129789987, 1017265095, -1124616639, -1116091154) + W(1, 1033010488, 1011223606, 1019234847, -1124971531) + + W(2, -1114025560, 1022067335, -1114442452, -1117824661) + + W(3, -1128803579, 1043137983, -1132476030, -1102585265) + + W(4, 1028851843, 1031833634, -1112792165, 1047657371) + + W(5, -1114992592, 1052022695, -1115343414, -1136738894) + + W(6, 1011967806, 1034970984, -1098689491, 1033604172) + W(7, 1002830796, 988094894, 995307959, 1010908150) + + W(8, -1115509379, 1031244615, -1121158047, 1025799099) + W(9, 1018394359, -1107223361, 1006851110, 1032350856) + + W(10, -1114060980, -1118870225, -1112288614, 1027672251) + + W(11, -1128503755, 1024636851, 1027935499, -1127411999); + WS(1065814108, -1098421406); + sum1 = + W(0, 1030554015, -1104400078, -1097091133, 1050276747) + W(1, 1045406982, -1119880969, -1131576032, 1022555209) + + W(2, -1099298246, 1048827844, -1106685173, -1132305362) + + W(3, 1020712308, -1130072911, -1094842433, 1057743463) + + W(4, 1040093248, -1122176686, 1019668842, -1118231891) + W(5, -1084181571, 1051719984, 1018405640, 1026194364) + + W(6, -1178997255, 1041059117, -1098796514, 1058357819) + + W(7, -1118903138, 1004376640, 1023970854, -1115494780) + + W(8, -1094912271, 1056339293, -1115388505, 1024072531) + W(9, 1011631425, 1035736611, -1090107010, 1051879221) + + W(10, -1113309730, 1009992363, 1023862096, -1104560304) + + W(11, -1092396649, 1057941592, -1118151090, 1029155252); + sum2 = W(0, -1104337717, -1085961218, 1065899128, 1055232204) + + W(1, -1090132108, 1023963317, 1027104118, 1036459641) + + W(2, -1117484391, -1101810882, 1052764213, 1014918739) + + W(3, -1114433477, -1092980271, 1062039746, 1049041629) + + W(4, -1092098978, 1035158266, -1122138940, -1101372003) + + W(5, 1077763913, 1071302707, -1123596042, 1027553578) + + W(6, 1048327612, -1098271754, -1088377514, -1084489503) + + W(7, 1027311380, -1115760458, 1018207462, -1115566688) + + W(8, -1079225367, -1087139921, 1042626102, 1025357528) + + W(9, 1015118770, -1101224844, -1106150830, 1036291830) + + W(10, -1105072819, 1032466828, 1054676844, -1105452167) + + W(11, -1079350028, -1090077553, 1053993534, -1115160051); + WS(-1083527351, -1082119000); + sum1 = + W(0, 1025283773, -1100516229, -1103907601, 1054198743) + W(1, -1139734483, -1115248201, -1132057884, 1028453762) + + W(2, -1097545679, 1048327710, -1114897689, 1021031566) + + W(3, 1024000280, -1124447466, -1096840081, 1054350616) + + W(4, -1109407791, -1115380998, -1117776915, 1027818609) + + W(5, -1088298777, 1057412078, -1112548849, -1139302503) + W(6, 994019357, 1030853236, -1090997910, 1059405315) + + W(7, -1107996896, 1029448614, -1146975260, -1120656758) + + W(8, -1097629690, 1052404744, 1035903152, -1119316016) + W(9, 1021506300, 1023329542, -1102673054, 1051692752) + + W(10, -1110888465, 1026998601, 1016847260, -1111423375) + + W(11, -1097673748, 1047886934, 1034953432, -1149960989); + sum2 = W(0, 1023717076, 1034248479, -1110594365, -1113079527) + W(1, 1030579691, 999456782, 996584188, -1120794590) + + W(2, 1045315954, -1094151830, -1113958061, 1021526858) + + W(3, -1122858228, 1037241484, -1111180041, -1064957365) + + W(4, -1106054130, 1018369486, 1005163886, -1124781820) + + W(5, -1088752945, -1066051811, -1113733983, 1031457177) + + W(6, 1025356913, -1115274025, -1100621689, 1081496726) + + W(7, 1049150725, -1112407773, -1118691920, 1026099362) + W(8, 1052626156, 1084041658, 1032870315, 1022214782) + + W(9, 1028885707, -1121079636, -1114153113, 1034711477) + + W(10, 1042989109, -1108284893, -1114207359, 1010502519) + + W(11, 1038857859, -1115018893, -1106741724, 1023330420); + WS(1031453552, -1108810221); + sum1 = W(0, -1122540068, -1106571135, -1105389195, 1051764564) + + W(1, -1121040220, -1136080166, -1141490084, 1028200745) + + W(2, 1023785053, -1104087736, 1027661528, 1023137883) + + W(3, -1132153597, -1106394603, 1041720002, 1055195957) + + W(4, -1111907522, -1123745267, 1029503994, 1029495448) + + W(5, -1089108304, -1096098616, 1031937892, 1034024448) + + W(6, -1126370513, -1118016922, 1047749927, 1047204316) + + W(7, -1112974291, -1121356173, -1127727061, 1043712879) + + W(8, -1092241143, 1028938666, 1025162247, 1028910871) + W(9, -1132743443, 1035505061, 998106632, 1043190746) + + W(10, -1152510101, -1121641711, 1024122485, 1041979953) + + W(11, -1098695505, 1039644831, 1002213552, 1025100697); + sum2 = + W(0, 1032316109, -1117365450, 1028239450, -1106184057) + W(1, 1039709205, -1127567978, -1122133197, 1033736971) + + W(2, -1104139010, 1045998485, -1126266788, 1029398164) + + W(3, 1021061413, -1117276879, -1090538079, -1087751568) + + W(4, 1056538813, -1106793212, -1122180337, 1009179554) + W(5, 1022309577, 1065712953, -1098763613, 1016414657) + + W(6, -1115376331, 1034450077, -1125496737, -1100319643) + + W(7, 1017435581, 1032114232, -1123716599, -1100616286) + + W(8, 1059658657, -1100023584, 1022563437, -1113653553) + + W(9, 1008734994, -1130550593, 1014965154, -1115246006) + + W(10, 980175008, 1034308617, -1144451812, -1102305630) + + W(11, 1053491799, -1107982835, -1103966134, 1020870093); + WS(1060627383, -1111244297); + sum1 = + W(0, -1124372048, 1041984816, -1098607926, 1048999428) + W(1, -1110297584, 1037179887, -1124233202, 1023856878) + + W(2, -1097555068, 1050454749, -1119853981, -1124350564) + + W(3, -1115057922, 1037843884, -1092180401, 1057072683) + + W(4, -1100337986, 1037306108, -1126359548, 1013283185) + + W(5, 1035134080, 1037566628, -1117600845, -1180500637) + + W(6, -1135390133, -1106598024, 1055432750, -1088869991) + + W(7, 1045634091, -1107300041, 999175032, -1109047521) + W(8, 1050330552, -1097603878, 1040799043, -1132131255) + + W(9, -1137033126, -1108189319, 1046692025, -1106138715) + + W(10, -1129123036, -1126029544, -1120904377, -1127664786) + + W(11, 1039361415, -1115643873, 1042168959, -1132202938); + sum2 = + W(0, 1027553654, -1105419467, -1141314538, 1051401064) + W(1, 1061407816, -1086513034, -1128416633, 1035214610) + + W(2, 992160749, -1098506313, 1048701957, 1023990179) + W(3, -1120268362, -1102082588, 1039698771, 1041311911) + + W(4, 1065441970, -1083801169, 1026192109, 1034096310) + W(5, 1028842291, 1044707561, -1108357660, 1027929117) + + W(6, 1010812163, -1109637676, -1128101012, 1040877432) + + W(7, -1080219839, 1058347144, -1133939099, 1042926233) + + W(8, -1110346070, -1107620104, -1089748519, 1054974548) + + W(9, 1033310247, -1103557522, 1041169016, 1044637450) + W(10, -1106511250, 1012370076, 1007338654, 1039202903) + + W(11, -1150310139, -1101325278, -1094091750, 1054706164); + WS(1037089208, -1117319375); + sum1 = W(0, -1117307261, -1127081515, -1090315640, 1055484319) + + W(1, 1019801962, -1140393804, -1109658215, 1037266337) + + W(2, -1103803502, 1036834781, -1121027890, -1122236391) + + W(3, -1104716662, 1045840649, -1108965493, 1059715066) + + W(4, -1120952094, 1007521585, -1095157370, 1048384396) + + W(5, -1089739445, 1050929076, -1115632913, -1115666157) + + W(6, -1115283233, 1042005149, -1098438340, 1059599591) + + W(7, -1115851136, 1038771059, -1113906562, 1026318614) + + W(8, -1090508193, 1050537744, 1036887192, -1114842616) + + W(9, -1127634429, 1035073906, -1096110967, 1039184033) + + W(10, -1109698481, 1040209394, -1150522598, -1129730364) + + W(11, -1090097982, 1056916499, 994450682, 1023772594); + sum2 = W(0, -1096641595, 1051991389, -1107135642, -1120991144) + + W(1, -1123228782, -1119770484, -1086741683, 1059199896) + + W(2, 1044105651, 1025236133, 1032269711, 1020415755) + W(3, -1073585699, 1069582799, 1057042277, -1123268190) + + W(4, -1116194138, -1119862022, -1071753138, 1070628328) + + W(5, 1065961920, -1098135951, -1144560253, 1033682735) + + W(6, -1078410756, 1067042424, 1057516905, 1032298014) + + W(7, 1026966393, -1113740644, -1096289506, 1055331220) + + W(8, -1098611055, -1106950583, -1155415556, 1022995083) + + W(9, -1145261363, 1015112751, 1043415065, 1007537613) + W(10, 992611856, -1144895945, 997353903, 1024293693) + + W(11, -1101981251, 1025642916, 1011854094, -1130196038); + WS(-1089262391, 1050517969); + sum1 = W(0, -1112721887, 956356282, 1041136081, -1112851037) + W(1, -1118798326, 1033047322, 1040766980, 1032255106) + + W(2, -1099047838, -1136806893, 1044101949, -1110781188) + + W(3, -1116916633, -1115255965, 1050954387, 1047658201) + + W(4, -1099177336, 1022949658, -1109323658, 1053521294) + + W(5, -1090918435, -1090498606, 1048722524, 984126311) + + W(6, 1043853249, -1109293354, 1049475859, -1111100085) + + W(7, 1044026608, -1111349460, -1102481809, 1024224759) + + W(8, 1046323061, -1101889105, -1109421665, -1151794972) + + W(9, 1025978335, 1036451442, -1129023222, -1112184834) + + W(10, 1044252857, -1140137768, -1134508350, 998852344) + + W(11, -1126452038, 1037224108, -1113020120, -1119083507); + sum2 = + W(0, -1120544497, -1104964203, 1056285145, -1109256198) + W(1, -1107706742, 1033408182, 1042905493, -1097077111) + + W(2, -1118581415, 1026029661, 1025674690, -1118988963) + + W(3, 1045262161, -1098230931, -1079914544, 1056552993) + + W(4, -1103396776, 1021102038, -1104438232, 1057982485) + + W(5, 1057557770, -1104081201, 1048351556, -1136298684) + + W(6, 1023630957, -1106247773, 1048964135, -1114436673) + + W(7, 1018818866, -1112938187, -1122372280, 1011970003) + W(8, 1020547274, 1033190701, -1103728916, 1034200267) + + W(9, 1027866949, 1026340389, -1113753028, -1107460568) + + W(10, 1046941904, -1117493164, -1121491826, 1016978228) + + W(11, 1034078959, 1034442922, -1106194166, -1172844144); + WS(1055467886, -1126093527); + sum1 = + W(0, -1115675946, 1037777366, 1043818615, -1096353162) + W(1, -1150025924, 1035905869, 1039731987, -1126460340) + + W(2, -1103677379, 1012406341, 1031349769, -1103947080) + + W(3, -1108949528, -1145838757, 1060367897, -1088026493) + W(4, 1034695514, 1034985426, 1014043817, 1049783617) + + W(5, -1102044139, -1095567535, 1044285022, -1129716323) + W(6, 1025589529, 991251120, -1109258124, 1051881528) + + W(7, 1011738461, -1107675487, -1102050167, 1042727635) + + W(8, 1049311757, -1095440907, -1115570324, 1042590402) + + W(9, 1035820699, -1123043835, -1116399889, 1032817677) + + W(10, 1040150333, -1104125432, -1114599603, 1024161077) + + W(11, 1046137806, -1116994158, -1110796915, -1140552997); + sum2 = + W(0, -1115939175, -1111463085, 1026058510, 1015189774) + W(1, -1116961377, 1003109525, 1042939513, 1009748958) + + W(2, -1101036999, 1048391804, 1019493983, -1128500291) + + W(3, -1106984126, -1104760450, 1046231331, -1101164712) + + W(4, 1013441892, -1127287103, 1036071940, 1026404786) + W(5, 1060091622, 1059732347, -1113485706, 1036806652) + + W(6, -1133154938, 1042746589, -1109115274, -1090421446) + + W(7, -1102245299, -1110906499, -1110872047, 1018643239) + + W(8, 1042116887, -1092755101, -1104832958, 1042929180) + W(9, 1038271101, 1024219237, -1104891113, 1018386748) + + W(10, 1043357489, -1108912836, -1124388167, -1107044825) + + W(11, -1137025366, -1112446842, -1123717753, 1034520620); + WS(1062208951, 1065603754); + sum1 = + W(0, 1032466911, 1029341462, 1059167044, -1090134924) + W(1, -1133534677, 987089513, 1012820793, 1027274819) + + W(2, 1052950767, -1094303727, 1027990706, 1032570772) + W(3, 1029562264, 997585115, 1057349364, -1085543304) + + W(4, 1035990447, -1136982833, 1020285479, 1040141401) + + W(5, -1108524682, -1080037127, 1037410929, -1125543221) + W(6, 993753125, 1032063722, 1049874906, -1095616607) + + W(7, 1039846496, 1015158861, 1033268866, -1153074023) + W(8, 1048703978, -1112675170, 1023735025, -1145342854) + + W(9, -1131051261, 1015103815, 1050186639, -1111683969) + W(10, 1040888889, 1031358985, 1028903855, 1040294280) + + W(11, 1053103700, -1091636059, -1118753424, 1022936933); + sum2 = + W(0, 1035386648, 1042887622, -1086410483, -1090258329) + W(1, 1060158548, -1113981975, 1023547272, -1099060438) + + W(2, 1053956416, -1112148135, 1050315819, -1099847621) + + W(3, 1025878508, -1099619867, -1094814183, -1093316197) + + W(4, 1046080958, 1046524888, 1026832668, -1099104088) + W(5, 1069089225, 1060781490, -1103190742, -1112424275) + + W(6, 1029828312, 1048769602, 1044213412, -1091192206) + + W(7, -1105160931, 1051146364, -1098240729, -1121505868) + + W(8, 1035342184, -1105713821, -1084784918, 1019461112) + W(9, 1044150428, 1040524432, 1040447750, -1089411157) + + W(10, 1058372334, 1028619520, -1103643200, -1115369075) + + W(11, 1049544904, 1058550641, -1080579858, 1040133188); + WS(-1078970012, -1070975772); + sum1 = + W(0, 1023443287, -1111813336, -1095332548, 1048282532) + W(1, 1036630695, 999582566, 1008154783, 1034628686) + + W(2, -1098370186, 1052880922, -1109395910, -1140663171) + + W(3, 1008498119, 1020510238, -1091304148, 1054365856) + W(4, 1031904252, -1128098380, 1032996598, -1119594955) + + W(5, -1097391342, 1065196819, -1105409007, 1030941172) + + W(6, -1130007992, 1037186855, -1088889903, 1037830473) + W(7, 1022933044, 1003729918, 1026985054, -1129455456) + + W(8, -1093273302, 1040256647, 1030320779, -1138808617) + W(9, 1001505728, 1024916861, -1097878954, 1042516535) + + W(10, -1131972409, 1018809669, 1027285064, -1118416824) + + W(11, -1097408559, 1044458956, 1037289397, 1005961310); + sum2 = + W(0, -1114009537, 1019032853, 1024640816, 1029944441) + W(1, -1146945026, -1136399129, 1035366037, -1143037026) + + W(2, -1101234014, -1105571142, 1031686749, 1018164069) + + W(3, -1111463643, -1122373240, 1068131945, 1044188353) + + W(4, -1128619453, -1125359481, -1119334122, 1056620469) + W(5, 1075463933, -1093783941, 1046426762, 999713778) + + W(6, -1132366661, -1103744316, -1075751794, 1050699227) + + W(7, -1109652943, -1123073190, 1001109746, 1002774938) + + W(8, -1072865378, -1098127223, -1131952529, 1019524751) + + W(9, 1035022899, -1102405348, 1052446989, 1028903402) + W(10, -1123943246, 983357578, -1112054465, 1046894720) + + W(11, 1019415611, -1119380340, 1017598049, 1013611461); + WS(-1137905088, -1096938393); + sum1 = + W(0, -1127951952, -1116133184, -1092624022, 1045295113) + W(1, 1023908704, 1031880695, -1139830244, 1040763803) + + W(2, -1104553656, 1046745657, -1103252327, 1040451684) + + W(3, -1108742721, 1041221580, -1090149133, 1057147883) + W(4, -1103732914, 1026434167, 1019792636, 1009683312) + + W(5, 1044078592, -1112574564, -1127338650, 1007135764) + + W(6, 1018370522, -1107181880, 1058802794, -1085568476) + W(7, 1040869579, 1014113348, 1020799581, 1002965001) + + W(8, 1043495791, -1103617227, 991652540, -1125537591) + W(9, 1021418002, 1022849180, 1033578226, 1032014262) + + W(10, 1013121207, 1016284007, -1148599498, 1039287108) + + W(11, -1113073030, 1022418564, 1011288682, -1116579933); + sum2 = W(0, 1036469548, -1097965159, -1116680345, 1001756270) + + W(1, 1032969412, -1131471357, 1017461301, -1101551098) + + W(2, -1125992347, -1115644020, -1124365977, 1016380025) + + W(3, 1017339130, -1094477168, 1058922542, 1035448965) + W(4, 1040541302, 1001720584, -1095997080, 1035631286) + + W(5, -1103956246, -1105032669, -1112710338, -1115585175) + + W(6, -1099654066, 1054988432, 1040393300, 1039533461) + W(7, 1017600743, 1001178742, -1105423964, 1047389393) + + W(8, 1040203374, -1122662741, -1114085496, 1001141734) + + W(9, -1113359567, 1041958533, -1108890296, -1130070269) + + W(10, -1134082186, 1010209588, -1131580992, 1043362318) + + W(11, 1024609216, 1025676040, 1015920174, -1122393401); + WS(1017525984, 1039916017); + sum1 = + W(0, -1124853567, 1018048624, -1095739637, 1045916039) + W(1, 1035770156, 1019449355, -1147061122, -1127291978) + + W(2, -1104996150, 1043181303, 1038596810, -1118699785) + + W(3, -1129462517, 1049183858, -1085471393, 1054258797) + W(4, 1048116160, 1018729693, -1119594613, 1032566282) + + W(5, -1090223247, 1054999864, 1024906308, -1125493497) + W(6, 999306018, 1036307723, -1104470177, -1108082389) + + W(7, 1035021201, -1111613764, -1156505185, -1150114695) + + W(8, -1105492077, 1055713893, -1114305018, 1013642059) + + W(9, 1012193026, -1116656134, -1109848430, -1134406033) + + W(10, 1014226964, -1117195563, 1010192066, -1111959392) + + W(11, -1107232129, 1049351719, -1132413284, 990662981); + sum2 = + W(0, 1033095635, -1113611213, -1120605005, 1010827478) + W(1, -1096460028, 992544217, -1123818427, 1038354785) + + W(2, 1043017802, 1005632845, -1090567932, 1043858115) + W(3, 1023960168, -1136308834, 1016597499, 1067061797) + + W(4, -1078682340, 1041505996, -1115456886, 1031365710) + + W(5, -1105318511, 1065783254, -1094955795, -1104546945) + + W(6, -1130014725, -1121052824, 1032279133, 1033387493) + + W(7, -1114814318, -1114466677, -1134763238, 1037262635) + + W(8, -1129541437, 1024758296, 994121897, -1115137122) + W(9, 1020936831, -1129095405, 1028982810, -1141733133) + + W(10, -1126300988, 1007094454, 1030713030, 1030934712) + + W(11, 969324748, 973320358, -1148320949, -1122716507); + WS(1033221560, 1063777383); + sum1 = + W(0, 1019126833, -1112638616, 1057738402, -1088696201) + W(1, 1044063732, -1130166589, 1022703059, -1107233655) + + W(2, 1049316970, -1093677291, 1018619290, -1127677637) + + W(3, 1034317754, -1107419652, 1057780266, -1091493000) + + W(4, 1042363102, -1141893947, -1141147507, 1017313945) + + W(5, 1039925560, -1105678958, -1123751101, -1141892011) + + W(6, 1023815261, 1030431247, -1101232178, 1054033952) + W(7, -1103422471, 1028589441, -1145478905, 1043218964) + + W(8, -1097249286, 1050643572, -1099574259, 1032480102) + + W(9, -1139797991, 1030899327, -1105363093, 1039373871) + + W(10, -1109462943, 1000558881, 1017044691, 1042038278) + + W(11, -1097041200, 1044902925, -1143343447, -1115509735); + sum2 = + W(0, -1130982538, -1106875761, 1056618334, 1055977286) + W(1, -1102508839, 1020425242, 1021127838, 1011019608) + + W(2, 1055137831, 1058761027, -1115387544, 1013848532) + W(3, 1013698904, 1024572751, 1048771047, -1103617496) + + W(4, -1128394642, 1016018634, -1227267307, 1039986653) + + W(5, -1073860127, -1072531400, 1043687089, -1112657573) + + W(6, -1180063220, -1113103478, -1108123535, 1052255088) + + W(7, -1126700500, 1035781641, -1130392938, -1104759550) + + W(8, 1059126644, 1063802283, -1109991236, -1125248586) + W(9, -1129014326, 1035543741, 1045481337, 1048895081) + + W(10, 1031917163, -1126407949, 1032270489, -1129087757) + + W(11, 1042704754, 1016319978, -1121934766, 1030839801); + WS(-1091503470, -1126838795); + sum1 = W(0, -1124121237, -1134568843, 1047693172, -1109412515) + + W(1, -1129997061, 1024589216, -1113414670, -1126036384) + + W(2, 1049117760, -1103447906, 1034526751, -1113770234) + + W(3, 1028321829, 1033159265, -1105247938, -1096437464) + + W(4, -1143270742, 1036105485, -1111356688, 1037814911) + + W(5, 1058889082, -1098797565, 1022658271, -1114645460) + + W(6, 1026686479, 1031580097, 1012762455, -1104719461) + + W(7, -1109175079, 1023714995, -1121133649, 1040854417) + + W(8, -1120516448, -1106636212, 1042645116, -1138248951) + + W(9, -1120913068, -1106928812, 1049438209, -1129886751) + + W(10, -1104071908, -1132081196, -1126384695, 1027944427) + + W(11, 1040171928, -1106734009, 1012750451, -1145462266); + sum2 = + W(0, -1121971015, 1025966975, -1127561691, 1032121603) + W(1, -1118316871, -1147835451, -1109269766, 1042653975) + + W(2, 1038662008, -1104797770, 1018661176, 1023052754) + W(3, -1096046215, 1058579369, -1095336395, 1041625968) + + W(4, -1131103951, -1129857431, -1096349825, 1055073186) + + W(5, -1156170871, -1098327149, 1035437895, -1114991229) + + W(6, -1104467071, 1043843774, 1054347264, -1090818470) + W(7, 1042639369, 1011784142, -1105457821, 1043492169) + + W(8, -1116732629, 1028261711, -1115472379, -1157273063) + + W(9, -1114558576, -1138844670, 1043300107, -1117277405) + + W(10, -1115061521, 1024244091, -1113812495, 1035958317) + + W(11, 1025184897, -1130419167, -1130646919, -1148362739); + WS(1065968028, 1020949470); + sum1 = W(0, 1032158503, 1043191673, 1050691771, -1089572661) + W(1, 983294118, 1033006978, 1018701500, 1020082981) + + W(2, 1043953282, -1139338281, 1041437600, -1139648989) + W(3, 1028916776, 999481917, 1047062836, -1091936976) + + W(4, 1042367115, 1033705422, 1031923218, 1033906881) + W(5, 1032996498, -1081715803, 1040306653, -1132542121) + + W(6, -1143490705, -1123072324, 1057457238, -1087112685) + + W(7, 1019100064, -1138694873, 1005807263, 1012896153) + + W(8, 1057445286, -1088032573, -1123572193, 1033839136) + + W(9, 1028568398, 1015001550, 1051530049, -1100585253) + W(10, 1039280729, 1027376729, 1026731038, 1035245559) + + W(11, 1057446979, -1095410205, -1104642968, 1015844228); + sum2 = + W(0, -1107158830, -1098284578, 1059620359, 1059101624) + W(1, -1080819081, 1041045600, -1114179967, 1050997929) + + W(2, -1120628180, -1084072333, 1055292451, 1041809552) + W(3, -1106205598, 1032837846, 1056621385, 1033838752) + + W(4, -1084530179, -1150759871, -1112529879, -1138556160) + + W(5, 1064698181, 1049137220, -1118657190, 1036987884) + W(6, 1036191348, -1103494095, 1049117565, -1100194061) + + W(7, 1035375516, 1040790470, 1045048740, 1033058370) + W(8, -1087565422, -1096659207, 1055648971, -1098721116) + + W(9, -1112261511, -1109765233, 1050635491, 1042760064) + + W(10, -1101810418, 1028926760, 1045813672, 1040601336) + + W(11, -1087363324, -1087988644, 1059720072, -1118399816); + WS(-1079370588, -1069938997); + sum1 = + W(0, -1107111607, 1024046068, 1034755057, -1106911318) + W(1, 1032333322, -1122522177, 1031876694, 1040468568) + + W(2, 1048995971, -1106422470, 1043602212, -1133546645) + + W(3, -1108546779, -1110778478, 1044693184, -1092550445) + + W(4, -1124221431, -1123999146, 1007409349, 1041967908) + + W(5, 1056322870, -1089848952, 1044063648, -1129245296) + W(6, -1119937056, 1037532265, 992363881, -1115839591) + + W(7, -1137125241, -1121719236, -1110402417, 1027170100) + + W(8, -1123931633, -1098485404, 1015794334, 1009488187) + W(9, 1034313646, 1028424065, 1041273350, 1027494176) + + W(10, 1038675968, -1146748727, -1115195974, 1030920188) + + W(11, 1034999584, -1101830589, 1024639630, -1115284904); + sum2 = + W(0, -1116010697, 1044972838, -1107525170, 1025111869) + W(1, -1115770744, 1027404867, 1039856206, -1094195694) + + W(2, 1054041965, -1105218154, 1021276689, -1122132060) + W(3, 1025692273, 1041658757, -1095593913, 1047241736) + + W(4, -1113081166, 1036997767, 1047723216, -1090167008) + + W(5, 1054432792, -1103340827, 1030984707, -1109230755) + + W(6, 1054500142, -1087657974, 1048962216, -1105033641) + W(7, 1026520309, 1000189578, 1045538686, -1114739465) + + W(8, -1099510609, 1043141319, 1008623973, 949401483) + W(9, 1020883249, -1103808398, 1041852115, -1104502432) + + W(10, 1031142833, -1121181330, 1037455508, 1025495534) + + W(11, -1112321007, 1020729805, 1034321699, 989367833); + WS(1051966318, -1113875142); + sum1 = + W(0, 1035883793, 1040145278, 1051696565, -1091560996) + W(1, -1133003627, -1126824689, -1111332346, -1135399329) + + W(2, 1051177307, -1096861164, -1117751383, 1039745286) + W(3, 1045917727, 1017502534, 1054251573, -1091248610) + + W(4, -1178468754, 1020593891, 1006041029, -1096989371) + + W(5, 1061428120, -1087158964, -1126263439, 1008594943) + + W(6, -1129901007, 1043615094, 1037934795, -1098884432) + W(7, -1106662601, 1037849734, 1042663274, 1025318012) + + W(8, 1050890887, -1096301045, 1017933485, 1035118155) + + W(9, -1119690564, -1113005508, 1046007992, -1093706893) + + W(10, 1038615540, -1114145271, 1014471632, 1043398529) + + W(11, 1049249377, -1093100168, -1116546293, 1028529816); + sum2 = + W(0, -1120038729, 995641288, 1035481768, 1011267102) + W(1, 1042749492, 1026093812, 1032134121, 1034430176) + + W(2, -1099098436, -1099374838, 1055985885, -1113501317) + W(3, -1112337284, 1011533369, 960227969, 1055781369) + + W(4, 1067101038, -1095011910, 1014623636, 1032554184) + W(5, -1122840036, 1046041384, 1069533386, -1086401299) + + W(6, 1015804972, -1114631943, -1123247827, 1055178264) + + W(7, 1065618150, -1079450114, -1111055788, 1037430467) + + W(8, -1115890560, -1124103196, 1058921177, -1075689828) + W(9, 1025605092, 1022278663, 1022436109, 1039303021) + + W(10, -1106092627, -1082339747, -1118260382, 1020696197) + + W(11, 1010081074, -1116158530, -1118372702, -1095588850); + WS(-1087762743, -1114043214); + sum1 = + W(0, 1028903325, 1040067739, 1045936105, -1095653187) + W(1, -1122706133, 1029496147, -1111481047, 981566559) + + W(2, 1050679259, -1096161947, 1026181394, -1149217596) + W(3, 1010874850, 1032756947, 1052053924, -1090950614) + + W(4, 1034645023, -1132850610, -1117223539, -1130847267) + + W(5, 1055533279, -1084280212, 1030922562, -1154701224) + + W(6, -1114811697, -1123313460, 1059392176, -1097768267) + + W(7, 1030340896, -1113406339, 1027957028, -1113871531) + W(8, 1053658934, 1005713692, -1102292436, 1020850850) + + W(9, -1115120063, -1126050567, 1039338221, -1120312930) + + W(10, -1107839810, 1017761874, 1025677743, -1113266015) + + W(11, 1051823712, -1118568631, -1112577800, -1117302546); + sum2 = + W(0, 1001016005, -1150454109, 1001531747, 983352173) + W(1, -1106746798, -1157019725, -1125241033, -1111980552) + + W(2, 1031575403, -1103460210, 1036252996, 1023495862) + W(3, 1018218852, 1033028659, 1016765132, -1103428024) + + W(4, -1128273041, 1033057508, -1111720092, 1037513337) + + W(5, -1085543392, -1076627395, 1046375720, -1112606370) + + W(6, 1015413875, -1109924763, 1053354837, 1074369634) + W(7, 1055587240, 1019967806, -1140722987, -1144232926) + + W(8, 1054900052, 1051272122, -1094917738, -1107285947) + + W(9, -1138333196, -1117260807, 1041131548, -1098379064) + + W(10, -1106193524, 1012881631, 1010568725, -1113652225) + + W(11, 1045881209, -1098914955, -1098562564, 1031041657); + WS(-1090517687, -1095444575); + sum1 = + W(0, 1025742446, 1032739299, 1054789967, -1095546301) + W(1, 1033023507, 1028563508, -1111454559, -1106175885) + + W(2, 1031326631, -1101847659, -1107099097, -1117676152) + + W(3, 1040962578, -1110724347, 1052551894, -1090925684) + W(4, 1036045063, 1036888990, 1035513890, -1118213794) + + W(5, 1059778428, -1089262225, 1035395209, 1007580971) + W(6, 1025779251, -1103218657, 1045793474, -1096439373) + + W(7, -1101529212, 949923192, 1041617005, 1040368314) + W(8, 1051667296, -1099734269, 1050132438, 1037985265) + + W(9, -1106922364, -1105137317, 1040121301, -1104705805) + + W(10, -1102593253, -1113513373, 1024699903, 1024907982) + + W(11, 1052247674, -1097282993, 1016797696, 1026673059); + sum2 = + W(0, 1052208474, -1095438834, 1040952518, -1101013864) + W(1, 1045777263, -1125550479, 1035064858, -1104348375) + + W(2, -1095663473, 1048824145, -1099869349, -1127115193) + + W(3, 1064043431, -1079843968, 1060639651, -1097227129) + W(4, 1049789703, 1024044553, 1055780907, -1084432555) + + W(5, 1054823629, -1105218176, -1118379111, 972648308) + W(6, 1048070172, -1106077690, -1098542607, 1048082838) + + W(7, -1102784042, 1021849499, 1057138231, -1086965748) + W(8, 1057295667, -1106201667, 1043327775, 1016298873) + + W(9, -1100953655, 1048708391, -1095587456, 1042233875) + + W(10, -1107731485, -1116706874, 1042120650, -1106942577) + + W(11, 1035260440, -1136871967, 1006771796, 1030896448); + WS(-1115614648, 1042425791); + sum1 = W(0, -1120207204, -1115327070, -1093340989, 1053481388) + + W(1, 1030984860, -1113874973, -1124027509, 1016926292) + + W(2, -1096731140, 1053311332, -1116407641, 1010793530) + + W(3, -1144413692, 1038910502, -1096554678, 1057006919) + + W(4, 1038600290, -1116271283, -1105701711, 1037845356) + + W(5, -1085064420, 1049566369, 1043585442, -1114795809) + + W(6, 1014568102, 1042236823, -1100005974, 1056376458) + W(7, 1018535375, 992031960, -1120774251, -1105850383) + + W(8, -1098496607, 1048579325, -1122768840, -1130485552) + + W(9, 1029691462, -1134596907, -1135406214, 1042582346) + + W(10, -1120431498, 1029193328, -1119274774, -1117126533) + + W(11, -1098152617, 1043887136, 1035562443, -1140222054); + sum2 = W(0, -1127240204, -1079712608, 1066527571, 1049757184) + + W(1, -1122509441, -1120167031, 1024332524, -1082891577) + + W(2, 1063276166, 1025750476, -1127423195, 976948665) + W(3, -1145474239, -1081879450, 1065092416, 1045572932) + + W(4, -1127801782, -1123809997, -1111478947, 1047818786) + + W(5, 1033988518, -1093152038, -1126799575, 1030959654) + + W(6, 1029335642, 1066272635, -1081647719, -1129453741) + W(7, 1033523902, 995040830, -1112394176, 1064292926) + + W(8, -1085935960, -1107360776, -1109456127, 1026103880) + + W(9, 1042612281, 1048742105, -1090025738, 1042389166) + + W(10, 1029374360, -1116538748, -1152251822, 1063028070) + + W(11, -1088819358, -1095668919, -1163717821, 1033003323); + WS(1005558656, -1134386388); + + return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); +} + +shared float inp[507]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { temp[pos] = (value); } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 13 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 507; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 13, y = (uint)id % 13; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (3)) + 0.5, float(group_base.y + y - (2)) + 0.5)).x; + } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[12]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 4]; + samples[1][1] = inp[local_pos + 5]; + samples[1][2] = inp[local_pos + 13]; + samples[1][3] = inp[local_pos + 14]; + samples[2][0] = inp[local_pos + 15]; + samples[2][1] = inp[local_pos + 16]; + samples[2][2] = inp[local_pos + 17]; + samples[2][3] = inp[local_pos + 18]; + samples[3][0] = inp[local_pos + 26]; + samples[3][1] = inp[local_pos + 27]; + samples[3][2] = inp[local_pos + 28]; + samples[3][3] = inp[local_pos + 29]; + samples[4][0] = inp[local_pos + 30]; + samples[4][1] = inp[local_pos + 31]; + samples[4][2] = inp[local_pos + 39]; + samples[4][3] = inp[local_pos + 40]; + samples[5][0] = inp[local_pos + 41]; + samples[5][1] = inp[local_pos + 42]; + samples[5][2] = inp[local_pos + 43]; + samples[5][3] = inp[local_pos + 44]; + samples[6][0] = inp[local_pos + 52]; + samples[6][1] = inp[local_pos + 53]; + samples[6][2] = inp[local_pos + 54]; + samples[6][3] = inp[local_pos + 55]; + samples[7][0] = inp[local_pos + 56]; + samples[7][1] = inp[local_pos + 57]; + samples[7][2] = inp[local_pos + 65]; + samples[7][3] = inp[local_pos + 66]; + samples[8][0] = inp[local_pos + 67]; + samples[8][1] = inp[local_pos + 68]; + samples[8][2] = inp[local_pos + 69]; + samples[8][3] = inp[local_pos + 70]; + samples[9][0] = inp[local_pos + 78]; + samples[9][1] = inp[local_pos + 79]; + samples[9][2] = inp[local_pos + 80]; + samples[9][3] = inp[local_pos + 81]; + samples[10][0] = inp[local_pos + 82]; + samples[10][1] = inp[local_pos + 83]; + samples[10][2] = inp[local_pos + 91]; + samples[10][3] = inp[local_pos + 92]; + samples[11][0] = inp[local_pos + 93]; + samples[11][1] = inp[local_pos + 94]; + samples[11][2] = inp[local_pos + 95]; + samples[11][3] = inp[local_pos + 96]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 41]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2) + ivec2(0, 1), ret); +} +//!PASS 2 +//!DESC NNEDI3 (double_x, nns128, win8x6) +//!IN INPUT, temp +//!OUT OUTPUT +//!BLOCK_SIZE 64, 8 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[12]) { + float sum = 0.0, sumsq = 0.0; + [unroll] for (int i = 0; i < 12; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); + sumsq += dot(samples[i], samples[i]); + } + float mstd0 = sum / 48.0; + float mstd1 = sumsq / 48.0 - mstd0 * mstd0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); + mstd1 *= mstd2; + float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = W(0, -1138315554, -1151849881, -1130100551, -1139428474) + + W(1, -1115186183, 1024054031, -1123693866, -1134121575) + + W(2, 1017020226, -1120184142, -1113913797, -1137890034) + + W(3, -1105019328, 1030136834, -1119041199, 1009291685) + W(4, 1054031418, 1052033568, 1057266643, 1050615732) + + W(5, 1060278709, 1051842684, 1049323809, 1050624038) + + W(6, -1098719688, -1094289569, -1094215666, -1084764287) + + W(7, -1093503073, -1122358095, -1104408867, -1101060057) + + W(8, -1110814528, 1026313661, -1136459684, 1037955767) + + W(9, -1125342359, -1118592012, -1141919603, -1117980571) + + W(10, -1144117236, 1029415347, -1122295896, -1158862002) + + W(11, -1115484761, 1027215228, -1148196514, -1131405947); + sum2 = + W(0, 1011608699, 1021515189, -1133688887, -1109181339) + W(1, 1032724543, -1120491130, 997621755, 1020760181) + + W(2, -1112090344, 1032910614, -1111235489, 1050015381) + + W(3, 1041065097, -1106592726, -1112521193, 1032648390) + W(4, 1035039469, -1112298310, 1068933685, 1079505355) + + W(5, -1082386736, -1067816278, -1094139890, -1123411157) + + W(6, 1018826733, -1103038162, 1035541017, -1096237270) + + W(7, 1051919390, -1101133158, -1120644044, 1029904493) + W(8, -1130070276, 1030536589, 1002846254, 1047170650) + + W(9, 1012706615, -1108914015, 1011888367, -1115273665) + + W(10, -1126912052, 1027816837, -1119314146, -1132054353) + + W(11, 1004049262, 1017068057, 1020965593, 1017829425); + WS(1038714296, 1044827315); + sum1 = W(0, 1008773074, 1000000044, -1125197471, -1155069097) + + W(1, -1128457331, -1147450330, -1142834606, -1131672268) + + W(2, -1137155124, -1139148008, -1110040196, -1114984084) + + W(3, -1117135435, 1038803511, -1110360429, 1046900069) + W(4, 1058392080, 1051571686, 1058043460, 1059856605) + + W(5, 1053821137, 1050763009, 1046500044, 1049636107) + + W(6, -1100726684, -1106613964, -1093356205, -1087655353) + + W(7, -1093486798, -1088451200, -1109846237, -1093848946) + + W(8, -1097060854, 1034039029, -1112523275, -1137125990) + + W(9, -1134047198, -1131894321, 1021083253, -1108292068) + + W(10, 1034472653, -1136883834, 1033872898, -1122772178) + + W(11, -1125370043, 1027534056, -1126166913, 1020331091); + sum2 = + W(0, -1123255245, 982126402, 1020504368, 1036976721) + W(1, -1113656730, -1157600449, -1129338356, 1032297389) + + W(2, -1116671780, -1111301077, 1011773496, -1102830064) + W(3, 1031803093, 1045043848, 1026389126, 1022832316) + + W(4, 1046375724, 1053914306, 1013495848, -1106352300) + + W(5, -1103067006, -1094946683, 1036977493, -1098417421) + W(6, 1061532718, 1066141278, 1064263836, 1055449836) + + W(7, -1076699072, -1085904680, -1099822488, -1092845936) + + W(8, 998291536, 1041257894, 1023442398, -1101238723) + W(9, -1103198508, 1048924521, -1109968297, 1053328593) + + W(10, -1094346395, -1108898352, 1023084860, 1037821947) + + W(11, -1123876988, 1041381351, -1109391389, 1023206676); + WS(-1087781687, -1134030789); + sum1 = W(0, 1028179002, 1037437606, -1106850597, 1040743460) + + W(1, -1123964671, -1122790273, 1036030878, 1027799500) + + W(2, -1113846601, -1129634048, 1045066941, -1108576055) + + W(3, 1030205873, -1117192467, 1027547489, -1110675758) + + W(4, -1090428884, -1098510574, -1095161561, -1094444002) + + W(5, -1095134094, -1098192922, -1101066654, -1095206484) + + W(6, 1056064759, 1049319247, 1048732210, 1054487227) + W(7, 1044300425, 1055338520, 1034894336, 1053272331) + + W(8, -1116809458, -1105760812, 1007721067, 1050640540) + + W(9, 1040160031, -1106372902, -1109703824, -1138309939) + + W(10, -1123784128, 1016685513, -1105661778, 1048222150) + + W(11, -1110013906, 1033480951, -1112122819, 1033810456); + sum2 = W(0, 1023697539, -1134698244, 1018187921, 1024534132) + + W(1, -1145140771, -1142625275, -1141296989, 1007645751) + + W(2, -1119401943, 1013558867, -1119659280, 1038666215) + + W(3, -1120396841, 1035154544, -1131590401, 1021197862) + + W(4, 1010071413, -1112961274, 1019163177, -1097296472) + + W(5, 1033089456, -1105855221, -1156341795, -1120357988) + + W(6, 1025267901, 1033970227, 1048742053, 1082848284) + W(7, 1060700209, -1115995862, 1035612397, -1123714371) + + W(8, 988018625, -1116217767, -1103390691, -1064739318) + + W(9, -1086947071, 1042818438, -1109434149, 1029308077) + + W(10, -1122026299, 1005039212, -1115786374, 1042066180) + + W(11, 1017803659, -1115628759, -1150181080, 1011442501); + WS(1060770743, -1108197568); + sum1 = W(0, 1016886775, -1160198547, 1015246637, -1139668038) + + W(1, 1034731136, 1019206597, -1123983389, -1115649126) + W(2, 1029069588, 1038531041, 1041390628, 1022402527) + + W(3, -1103954605, -1112398730, -1117714338, 1039312430) + + W(4, -1095627128, -1096395039, -1086831062, 1057717017) + + W(5, 1056693263, 1053908426, 1043180606, 1038174992) + W(6, 1046558478, 1048524254, 1058409012, 1050284011) + + W(7, -1083755424, -1092005688, -1101033824, -1131529899) + + W(8, -1146067869, -1097778094, -1098986327, -1110221104) + + W(9, 1042951984, 1037994808, 1025216609, 1000398449) + W(10, 1009558662, 1033422755, 1033638277, 1029754521) + + W(11, -1119102757, -1130383710, -1114155844, -1129441697); + sum2 = W(0, -1104188915, 1026676440, -1122447814, 1024214572) + + W(1, 1036156927, 1039804037, -1140508998, 1023379743) + + W(2, 1041755532, -1099902739, 1020538503, -1096783078) + + W(3, -1094671115, -1096595186, -1108992818, 1023478108) + + W(4, 1039915367, 1048552390, -1105911641, 1068731351) + + W(5, -1096057439, -1091471401, 1033306823, -1096750685) + + W(6, 1028942726, -1105392496, -1106738713, 1066518668) + + W(7, -1122053390, -1123632409, 1032650381, -1109607147) + + W(8, 1030445090, -1104867005, -1106850996, -1126700225) + + W(9, 1025639958, -1113480650, -1115144981, 1031767054) + + W(10, -1104654601, 1033528857, 989961401, -1133955554) + + W(11, -1153726297, 1032072745, -1128986161, 1023561226); + WS(-1105940700, 1068951582); + sum1 = + W(0, 1000087558, 1035444041, 1016288952, 1033866603) + W(1, 1023501510, 1030903912, -1121882082, -1139243976) + + W(2, -1126610867, -1119652358, -1105308514, 1025089333) + + W(3, -1101500686, 1024487034, 1036868116, -1137536646) + W(4, 1040363554, 1048983991, -1129232537, 1043404182) + + W(5, 1050470779, 1032787659, 1042688239, 1051055933) + W(6, -1103531583, -1095183320, 1035540828, -1091977028) + + W(7, -1098956255, 1016827662, -1107477027, -1111432589) + + W(8, -1126498674, 1041424541, -1116171169, -1108874003) + + W(9, -1107291841, -1115835365, 1038297635, -1101253029) + + W(10, 1033586531, 1011542187, 1018737568, 1021129404) + W(11, 1033496245, 1023594100, 1015384473, 1034920923); + sum2 = W(0, -1123978268, -1122059260, -1123592170, 1036141312) + + W(1, -1111710804, -1127043568, 1040694361, -1120657957) + + W(2, -1102554320, 1044079872, -1093658955, -1094337979) + + W(3, 1045282706, -1139302445, 983065168, -1105233712) + W(4, 1043643239, -1111536011, 1042723115, 1067907418) + + W(5, -1118927202, 1035940631, 1016057395, 1027935282) + W(6, 1026913323, 1056944699, -1093217948, 1064662115) + + W(7, -1091166584, -1092135065, 1041761864, -1104693620) + + W(8, 1031431539, 1037617295, -1103624535, -1090550375) + + W(9, -1119161360, -1112133073, 1014979737, -1106943303) + + W(10, -1112242992, 1005844362, -1101249260, 1042545272) + + W(11, -1111294278, 1025042022, 1026789959, -1114223884); + WS(1061289911, -1074625008); + sum1 = + W(0, -1118986355, 1042690449, 1022020251, 1032007652) + W(1, 1040686510, -1108674269, -1123719572, 1033202498) + + W(2, 1035013222, -1110099132, 1048074309, -1101544522) + + W(3, -1102744618, 1043103143, -1099009366, 1038471242) + W(4, 1058027688, 1047068971, 1036127926, 1058005973) + + W(5, 1048070472, 1057593649, 1043345580, 1055515931) + W(6, -1087592511, 1013931199, -1088015817, -1099315493) + + W(7, -1091144488, -1088950448, 1033106140, -1090129177) + + W(8, -1107213995, 1008030621, 1042727509, -1093058723) + + W(9, 1042938237, 1002612916, -1110941900, -1113377491) + + W(10, 1048036382, -1106703359, 1044183504, 1029001296) + + W(11, -1106075703, 1048869669, -1107279296, 1042195418); + sum2 = + W(0, 1034897388, -1104469346, -1104312342, 1033162274) + W(1, 1001538203, -1119055011, 1036889309, -1122390571) + + W(2, 1047600068, 1074182899, 1080728354, 1077196082) + W(3, 1039546617, -1121476275, -1113609974, -1124909535) + + W(4, -1092005485, -1074957140, -1067981267, -1071410100) + + W(5, -1101189403, 1042351425, -1113906708, 1037135096) + + W(6, 1032211690, -1124075063, -1100127529, -1093339345) + W(7, 1036352485, 1008083901, 1032631324, 1002134635) + + W(8, 1046053217, 981988140, 1036844207, 1030504779) + W(9, 1025223671, -1101604075, 1034478190, -1113103836) + + W(10, -1111289614, 990179414, -1123826485, -1109598746) + + W(11, 1036519612, -1146561467, 1017678167, -1132393287); + WS(-1106894556, 1030995446); + sum1 = W(0, 1025589699, -1123387156, 1028556318, -1139656804) + + W(1, 980812294, -1128131860, -1137342353, 1017797929) + + W(2, 1033805806, 1025291593, -1127599047, -1105750398) + + W(3, -1104274718, -1117867836, 1018769427, -1114245906) + + W(4, 1034528997, 1054610112, 1039781117, 1065523109) + W(5, 1054896451, 1045458038, 1040199808, 1047692618) + + W(6, -1098701288, -1096105008, -1091536615, -1090836572) + + W(7, -1097995702, 1039238949, -1110811780, 1001735913) + + W(8, -1119393787, 1037953340, -1120434724, -1103173850) + + W(9, -1111869411, -1104116474, -1129786002, -1106977725) + + W(10, 1027839817, -1123546765, 1027349522, 1022652480) + + W(11, -1135075504, -1125284461, -1152753902, -1131064048); + sum2 = + W(0, 1030581609, -1110091164, 1024868796, -1124528958) + W(1, -1175372351, 1022252519, -1123141270, -1123316826) + + W(2, -1096073996, -1132437108, 1051933742, 1042406776) + + W(3, -1117745155, -1117509823, -1110977956, 1029056744) + + W(4, -1104952058, 1031847146, 1077840619, -1071401906) + + W(5, -1108532142, -1107574374, 1034777920, 1034383482) + W(6, 1036583621, 1040176294, 1056082302, -1080138625) + + W(7, 1039833458, 1041695761, -1119756119, 977900031) + W(8, 1006888722, -1120421639, -1114613612, 1012484405) + + W(9, 1044393926, -1112420268, 1028893644, 1027961212) + + W(10, 1023568502, -1124563800, 1026684617, -1114165836) + + W(11, -1139194300, 1023135287, -1122696127, 1034470668); + WS(-1094248046, 1034739186); + sum1 = W(0, -1121641997, -1148807918, -1119896808, -1118546659) + + W(1, -1117202258, 1027770003, -1121511758, -1114510703) + + W(2, 1043421097, -1114752489, 1040593858, 1043491504) + W(3, 1033892386, 1035924243, 1032482905, 1031829015) + + W(4, -1090462057, -1097763033, -1089339622, -1089142502) + + W(5, -1089696734, -1098379668, -1096843609, -1089945727) + + W(6, 1057979744, 1045790578, 1054873978, 1054724534) + W(7, 1056858543, 1046524773, 1049846778, 1056083605) + + W(8, 1042504017, -1105205440, 1029920941, -1114903193) + + W(9, -1102740772, 1042806872, -1103488653, 1031492831) + + W(10, 1019430629, -1115505835, 1008619887, 1038945077) + + W(11, 1025474934, -1148381698, 1017864241, 1029503362); + sum2 = W(0, 1032629114, 1024571549, -1123705993, -1120833813) + + W(1, 1031286487, -1113289848, 1033808184, 1035572024) + + W(2, -1111104155, 1022922994, 1036988882, -1104365480) + + W(3, 1038064580, -1119955010, 1018164430, -1144675449) + + W(4, -1111175727, 1047799465, -1121840686, -1098359864) + + W(5, 1008715668, -1115327032, 1042276675, -1114533548) + W(6, 1069180600, 1065968459, 1066031139, 1064223912) + + W(7, 1062275672, 1060225053, 1060364945, 1057151625) + W(8, -1092657949, -1106624299, 1026540087, 1044325339) + + W(9, 1048614780, -1117772151, -1105153406, -1103516228) + + W(10, -1081878912, -1085703668, -1080884055, -1079207816) + + W(11, -1083548865, -1085294305, -1090372695, -1100806033); + WS(-1095016302, 1049780935); + sum1 = + W(0, -1140214663, 1008440267, -1137823987, 1035221148) + W(1, 1007510683, 1029727784, -1142229143, 1025812851) + + W(2, 1050711943, -1110792724, 1036012635, -1104496021) + + W(3, -1145733059, -1109552317, -1115585956, -1142816275) + + W(4, 1044505338, 1051364259, 1051977632, 1058946996) + W(5, 1056967247, 1057598839, 1050931556, 1058283380) + + W(6, -1092537964, -1101785640, -1091742968, -1090015552) + + W(7, -1090979415, -1088424934, -1096995861, -1095074239) + + W(8, -1110639373, 1026645590, -1130979716, -1106527650) + + W(9, -1146016549, 1015167915, 1009720606, -1105047309) + W(10, 1025322472, 1024485132, 1018037601, 1032762847) + + W(11, 1010918699, 1027572437, -1127055707, 1011865435); + sum2 = + W(0, 1040969494, 1036141535, -1136101349, 1015318832) + W(1, -1112220931, -1111621316, 1042180910, -1115487487) + + W(2, -1087779211, 1033693595, -1096934819, 1042126031) + W(3, 1036377831, 1046621966, -1129542795, 1052735842) + + W(4, 1056604611, -1095617275, 1057610078, 1059711341) + + W(5, -1098744628, -1088974739, -1112239718, -1089093989) + + W(6, 1059045592, 1037950807, 1059474718, 1079595174) + W(7, 1065421414, -1079659864, -1077353318, -1076641223) + + W(8, -1085730308, 1051875566, -1104408796, 1011257519) + + W(9, -1102027272, -1097874228, -1123714793, -1089345613) + + W(10, 1035668628, -1110274633, -1106464684, -1106179801) + + W(11, 1035078820, 1045400280, -1122902412, 1057832501); + WS(-1081166556, -1096725839); + sum1 = + W(0, -1146977357, -1130642485, 1032268722, -1111253552) + W(1, 1024786511, 1021187873, -1115200917, 1021141494) + + W(2, -1114510289, -1119269199, 1038356317, -1095921640) + + W(3, 1038758688, -1102108905, -1117501552, 1024092394) + W(4, 1050030946, 1045921361, 1039889859, 1057711094) + + W(5, 1053078718, 1045805544, 1042557010, 1052716334) + W(6, -1102646113, -1098664649, 1027769520, -1089074675) + + W(7, -1098156409, -1106011300, -1102728052, -1107570834) + + W(8, -1113153825, 992772066, 1041426283, -1107182252) + + W(9, 1043642083, -1131324475, -1148235644, -1153593794) + + W(10, -1114582784, 1022415207, 1025422681, -1098886685) + + W(11, 1036466489, -1129952762, -1133449046, 1012650491); + sum2 = + W(0, -1154116071, -1120032365, 1033357218, -1132933754) + W(1, -1165588207, 1029048100, -1115144648, 1020455541) + + W(2, 1032309947, 1017679770, -1118521489, 1020082264) + W(3, -1122971142, 1015972752, 1021714009, -1134334294) + + W(4, -1123962703, 1025906643, 1007375557, -1114291801) + + W(5, -1114360955, 1031540723, -1136421576, 1014310369) + + W(6, -1133231158, -1134787122, -1096414395, 1064675801) + + W(7, -1097709607, 1040110606, -1121524587, -1119507729) + + W(8, -1128886461, -1108727038, 1020492423, -1080568363) + + W(9, -1108104258, -1112615988, -1111963053, 1028363593) + + W(10, 1035599863, 1039832482, 1035524914, 1059399952) + + W(11, 1040378905, 1024318985, 1036458899, -1122706022); + WS(1058284215, -1100256461); + sum1 = W(0, -1136333391, 977549609, 1023954799, 1032458293) + W(1, -1131855793, -1132003829, 1014651348, 1007647892) + + W(2, 1042064203, 1017583666, -1103445177, 1003134525) + W(3, 1030077610, 1044505715, 1034368605, -1109681210) + + W(4, 1048792691, 1037147366, 1061193316, 1055543997) + + W(5, -1089393894, -1102389770, -1126089633, 1030932473) + + W(6, -1108621703, -1098054746, -1084539629, -1105743629) + + W(7, 1061657482, -1111653453, 1032593324, -1134741175) + + W(8, -1107171315, 1036542212, 1001865121, -1113492892) + + W(9, -1095280499, 1000070140, -1124982771, 1012906393) + + W(10, 1009105958, -1118222526, 1005641493, 1001049989) + + W(11, -1141919717, 1031530183, 957669220, -1123915241); + sum2 = W(0, 1025749667, -1112523495, 1034709000, 1021531102) + + W(1, -1129693762, 1006685387, -1119027428, -1106459716) + + W(2, 1018718450, 1033569638, -1099586256, -1096388774) + + W(3, -1109846828, -1112379914, 1008781387, 1045287677) + + W(4, -1104497483, -1111912018, -1087345020, 1061077725) + + W(5, 1049253485, -1117456886, 1038233914, -1135366091) + + W(6, -1109821356, 1026676021, -1092985416, 1065372034) + + W(7, 1048850468, -1106255761, 1013968523, -1138431127) + + W(8, 1032840840, -1114119398, 1018773778, -1129773592) + + W(9, -1101038787, 1043058397, -1113511705, 1037121278) + + W(10, 1009497507, 1017944258, 1033599708, -1109427758) + + W(11, 1029395381, -1106769492, 1020319838, -1113647178); + WS(1048665454, 1062854025); + sum1 = + W(0, 1004974300, 1032679478, 1041394910, 1026224498) + W(1, -1128844481, -1137588617, -1115578718, -1128306202) + + W(2, -1140401360, -1099989608, -1108003052, -1103518244) + + W(3, 1044968661, 1040271757, 1031459778, -1114933822) + W(4, 1040482153, 1038119146, 1050737065, 1054936910) + + W(5, -1084409495, -1099872844, -1107158134, 1038726400) + + W(6, -1107105990, -1094089019, -1084721726, 1054691719) + W(7, 1057517320, 1035416977, 1033669936, 1032390235) + + W(8, -1113947114, 1043046180, 1044891839, 1025601857) + W(9, 1022006333, 1017193535, 1025595521, 1023203548) + + W(10, 1004764812, -1132148363, 1022372181, -1113753404) + + W(11, 1033354242, -1131287582, -1112957780, 1012659010); + sum2 = + W(0, -1120028750, -1137390044, 1031876652, -1112211314) + W(1, 1041988042, -1105672202, 1032816067, -1122489809) + + W(2, -1116919239, 1034599590, -1110926857, 1037510574) + + W(3, -1091078601, 1052042083, -1102117757, 1043447553) + + W(4, 1026537129, -1105861815, -1114848017, 1054207178) + + W(5, 1059021754, -1104155840, 1047757772, -1115066040) + + W(6, 1032246372, -1095438265, -1093245727, 1062401065) + + W(7, -1115099719, -1100501338, -1112097107, -1102195598) + + W(8, 1047938064, 1022176478, -1093210017, -1118385188) + W(9, -1104427658, 1013766280, 1027075631, 1027463789) + + W(10, -1102851233, -1129352893, 1046245506, -1106645860) + + W(11, 1043294706, 1016480352, -1113418727, 1030659187); + WS(1046819548, -1112403026); + sum1 = + W(0, 1043575156, -1118831073, 1028002987, 1025124390) + W(1, 1023377962, 1034880715, -1125071779, -1144027915) + + W(2, -1096536373, 1040283909, 1049135339, -1115136450) + + W(3, -1127238030, 1042113706, -1124049974, 1045174012) + W(4, 1028458839, 1047473762, -1083606160, 1052252597) + + W(5, 1054448495, -1106041242, 1031800519, -1138801364) + W(6, 1042563315, -1108795847, 1040849058, 1045384892) + + W(7, -1081137782, 1040945110, 1026387221, -1122981994) + + W(8, -1147631681, -1115473730, 1015994849, -1102431101) + + W(9, 1046487189, 1026652836, -1096718751, 1030118403) + + W(10, -1134760216, 1037371629, 1036578576, -1147407721) + + W(11, 1029054905, -1113175874, 1032040846, -1121286085); + sum2 = + W(0, -1116472244, 1025044698, -1123391126, 1003069456) + W(1, -1129719684, -1110477199, 1023677849, 989635105) + + W(2, 1036928405, -1122373594, -1111538981, -1111848203) + + W(3, 1027481573, 990411281, -1136822924, -1136314996) + W(4, 1035706238, 1040619830, 1046111644, 1044878634) + + W(5, 1017226015, 1030695866, -1114454329, 1007553654) + W(6, -1121318128, -1104861162, 1038958235, 1050016986) + + W(7, 1040124875, -1116142138, -1113301330, -1112411005) + + W(8, -1114868876, 1034962287, 1034772140, -1113883437) + + W(9, -1114732111, -1102609372, 1029458562, -1109652887) + + W(10, 1033058067, -1110745183, -1130393948, 1005454840) + + W(11, -1105702279, 1029172520, -1112847239, -1150621057); + WS(1054158702, -1098491196); + sum1 = W(0, -1129450472, 983582877, -1121209423, -1121678922) + + W(1, 1031290553, -1119715259, 1032360987, -1115265987) + + W(2, -1119237644, 1026235385, 1037197666, 1031464488) + + W(3, -1099798307, -1102325483, -1107157175, 1036899466) + + W(4, -1101713087, -1098493798, -1083899440, -1103778771) + + W(5, 1057005120, 1046824098, 1037130136, 1011053924) + W(6, 1033589815, 1048228620, 1060260620, 1056756834) + + W(7, -1093716116, -1102446942, -1111054172, -1132207900) + + W(8, 1034513750, -1123359016, -1103037013, -1122982297) + W(9, 1049393023, 1028221304, 1041637742, 992781927) + + W(10, 1023147092, -1173092218, 1035595873, 1020058530) + + W(11, -1117980637, -1124996559, -1115548960, 995105423); + sum2 = + W(0, 1022318336, -1115650269, 1023537352, -1109544964) + W(1, -1104797311, 1016525394, 1034835508, 1033208955) + + W(2, 1013555797, 1016103802, 1041650416, 1052577828) + + W(3, -1105646271, -1089308865, -1093424353, -1123233885) + + W(4, -1107715656, -1119261847, 1043696631, -1093802466) + + W(5, 1070961498, -1101461419, -1129647390, -1103638866) + + W(6, 1023151297, -1114491928, 1042399799, -1088107917) + W(7, 1046742027, 1041655381, 1027817568, -1123805293) + + W(8, 1033284778, -1130031974, -1108010060, 1027122983) + W(9, -1123089325, 1023301191, 1010592861, 1027104379) + + W(10, -1133592549, -1142562170, -1132296546, 1012286637) + + W(11, -1129646858, -1123666701, 1020078242, 1017326494); + WS(-1098756718, 1025257793); + sum1 = W(0, -1170432841, 992949730, 1001883457, 1015181254) + W(1, -1113865961, 1025052052, -1142935155, 1028407518) + + W(2, 1023803110, -1138344011, 1026014126, -1121590721) + + W(3, -1118191492, -1131017933, 992699702, -1125677724) + + W(4, -1090131561, -1107275570, -1096556254, -1096688477) + + W(5, -1094031636, -1105028073, -1106433523, -1094260670) + + W(6, 1057699745, 1040420307, 1054103406, 1044394251) + W(7, 1054646511, 1053702338, 1018441892, 1056434389) + + W(8, -1119694405, -1117188674, -1115724526, 1029634610) + + W(9, -1140440641, -1105231572, 1028218227, -1115106656) + + W(10, 1012926412, -1123369303, -1127845816, 1035099253) + + W(11, -1114907963, 1041135600, -1110839852, 1033590433); + sum2 = + W(0, -1113857499, 1017397942, 994248048, -1107301692) + W(1, -1114604929, -1099171315, -1128156929, -1105825924) + + W(2, 1035336978, 977119295, 1058657166, 1082282453) + W(3, 1081592322, 1057428850, 1029982493, 1050573738) + + W(4, 1003212424, -1169985407, -1086726098, -1067974855) + + W(5, -1065596346, -1101606461, -1114706375, -1113547007) + + W(6, 1016146718, -1139136560, 1029663775, -1088717558) + + W(7, 1032721198, -1109521003, 1035913834, -1115579388) + + W(8, -1124908942, -1127032094, -1128987279, 1045162011) + + W(9, -1132432620, 1001306728, -1127893472, 1022139202) + W(10, 1007728744, 1015257206, -1139895388, 989071903) + + W(11, -1115423533, 1026551071, -1124574009, 999865176); + WS(1057488311, 1032489366); + sum1 = W(0, 998859696, -1126361486, 1028022609, -1139957040) + + W(1, 1015295094, -1122345661, -1126668079, -1129886863) + + W(2, -1107316871, 1034785118, -1114064513, 1048520075) + + W(3, -1112481238, -1154023155, -1133481329, -1108825451) + + W(4, -1130375162, -1098594694, -1105098668, -1099155379) + + W(5, -1109749909, -1112014022, -1114013594, -1116334683) + + W(6, 1045821468, 1031878554, 1057263754, -1127500588) + W(7, 1049599048, 1025402492, 1034877470, 1046739556) + + W(8, 1009430200, -1137835434, -1104540630, 1033795278) + + W(9, -1105774078, 1046311438, -1109189150, -1119327433) + + W(10, -1127874328, -1124885440, -1121514949, 1026343507) + + W(11, 1007199100, -1119007402, 1026086402, -1138298796); + sum2 = W(0, -1131728975, 1035973522, 1044679054, 1057647035) + W(1, 1029801795, -1120435503, 1040526136, 1001204178) + + W(2, 1036764889, -1115068573, -1096357732, -1081650015) + + W(3, 1008652352, 1020148594, 1019038058, -1125968836) + + W(4, -1111227173, 1022919798, -1128128484, 1064645808) + + W(5, -1094730608, 1034184152, -1110501324, -1141167471) + + W(6, -1128881737, 1043114423, -1098903856, -1105946360) + + W(7, -1121257166, -1122224678, 1033879187, -1125677560) + W(8, 1037242625, 990889655, 1034691185, 997618289) + + W(9, -1119761613, -1123774451, 1017411026, 1017460301) + + W(10, -1122740141, -1130091274, -1126568042, 1036162942) + + W(11, -1145265129, 1016517254, 1025838030, -1121815648); + WS(1059796919, -1120566973); + sum1 = + W(0, -1128913552, -1121363289, -1114223569, -1170089852) + W(1, 1032486790, 1007200827, 1031548233, 1025411673) + + W(2, -1112300176, 1042197939, 1034918960, 1019526904) + + W(3, -1097594444, -1109654209, -1121876014, -1114258964) + + W(4, -1105877684, -1097436160, -1084964393, 1000497399) + + W(5, 1058860547, 1031553246, 1038828418, -1106885131) + W(6, 1051198984, 1044389601, 1058261945, 1057668526) + + W(7, -1083919657, -1093305914, -1113158982, 1024357474) + + W(8, 1031319296, -1156941767, -1115209492, 1032065846) + W(9, 1045243539, 1041280427, 1014469175, -1128319413) + + W(10, -1128535084, 1024751122, 1025460631, 1032881746) + W(11, 1010383044, 986576750, 1034628234, 1012703988); + sum2 = + W(0, -1143612781, 1024653023, 1031492195, -1118229715) + W(1, 1037055061, -1103747903, 1027878177, -1120661119) + + W(2, 1030657759, -1114346787, -1115709916, -1122961088) + + W(3, 1023143173, 1032102149, 1035049663, -1115297076) + W(4, -1142189493, -1145631357, 1041520845, 1058735511) + + W(5, -1123318690, -1113230658, -1107577516, 1029140405) + + W(6, -1116365956, -1122350618, -1109597137, 1063925227) + + W(7, -1097941231, -1096423541, 1034348655, -1104473650) + + W(8, -1108113093, -1120960689, -1100459142, -1102646250) + + W(9, -1097541479, -1119600722, 994779835, 1048901284) + W(10, -1144957229, 1013910379, 1028619709, 1003281301) + + W(11, 1040501280, 1015718737, -1116733621, -1122107736); + WS(1025862512, -1097886171); + sum1 = W(0, 1017995501, 1028206791, -1115115241, 1023900364) + W(1, 973319892, 1028215869, 1033665516, 1031994596) + + W(2, 1039868615, 1039095646, 1044724702, 1040670187) + W(3, 1036683863, -1105649212, 1040757263, -1101644966) + + W(4, -1084617440, -1095591910, -1088992852, -1089161581) + + W(5, -1092805231, -1107053347, -1107160205, -1102281943) + + W(6, 1048852846, 1046495341, 1053236428, 1054642003) + W(7, 1050422468, 1049220624, 1039010398, 1042256548) + + W(8, 1036255511, 1032043925, 974144948, 1024473755) + W(9, -1132527589, -1120787802, 1025750940, 1034960116) + + W(10, 1003283810, 1031081728, -1128580235, 1033561578) + + W(11, 1012484477, -1126192084, 1035723265, -1141688154); + sum2 = + W(0, 1017309480, -1115898040, 1026950620, -1131938440) + + W(1, -1120377052, -1147724576, -1108926548, -1120540424) + + W(2, -1113384832, -1105498372, -1140340528, -1101077232) + + W(3, -1114410560, 1042402941, -1114542528, 1040078450) + W(4, 1051665642, 1043260681, -1109716244, 1039662810) + + W(5, -1113565360, -1097796100, -1108348496, -1097463639) + + W(6, -1106075829, 1041703091, -1109231476, 1049668212) + W(7, 1032262130, 1040746517, -1140600496, 1048026467) + + W(8, 1041533127, -1122221676, 1047102067, -1109791178) + + W(9, 1039963022, -1131969000, 1032294606, -1107112902) + + W(10, -1143916576, -1119502308, 1032670038, -1130795288) + + W(11, -1112068676, 1031201932, -1108483346, 1028500076); + WS(1059594295, -1081462343); + sum1 = W(0, 1027153262, 978268098, 1020914066, 1037283618) + W(1, 1001487768, 1024040629, -1121199663, 1022356543) + + W(2, 1043504002, -1116748340, 1039643767, 1038369681) + W(3, 1034058802, 1040894970, -1111840522, 1036921959) + + W(4, 1052804111, 1047257898, 1049749519, 1054855273) + W(5, 1047397560, 1052275873, 1039603542, 1026848211) + + W(6, -1087238767, -1102654539, -1086204143, -1088075529) + + W(7, -1086503800, -1098202447, -1107277965, -1098696769) + + W(8, 1041162890, 1038115889, 1044063108, 1037006730) + W(9, -1117492081, 1024505810, 995252542, 1036480704) + + W(10, 1012415888, -1165722369, 1033818911, 1032180390) + + W(11, 991868273, 1034352017, -1131707508, -1136594828); + sum2 = W(0, 1028621181, -1110777341, 1027102605, 1030597213) + + W(1, -1106396561, 1041951037, -1110468651, 1026771477) + + W(2, -1120219585, -1127812186, -1121966109, -1109734497) + + W(3, -1139970197, 1005564394, 1027913013, 1021745754) + W(4, 1035384087, -1126856730, 1038893967, 1046142043) + + W(5, -1102581665, 1024169581, -1119264521, 1045072501) + + W(6, 1034472183, -1129759266, -1123472797, 1054407172) + + W(7, -1114873213, 1049042812, -1104920828, -1117275361) + + W(8, -1123130413, -1106344343, -1121376361, -1110811845) + + W(9, -1147353578, 1023037898, -1123538565, -1108801471) + + W(10, -1120488613, -1132463093, -1111137329, 1013238549) + + W(11, -1113345499, 1010521365, -1123696429, 934750635); + WS(1001790336, 1033193226); + sum1 = W(0, -1113292474, -1131356554, -1133969725, -1118664394) + + W(1, 1027204153, -1111709325, 1024655285, -1127980619) + + W(2, 1018580889, -1115176285, -1102427201, -1094752997) + + W(3, -1105768635, -1119059021, -1121009370, -1131789226) + + W(4, -1117909243, 1046326027, 1041639692, 1062953181) + W(5, 1031529696, 1040555139, -1115548569, 1030139737) + + W(6, 1052553934, -1113855449, 1060794158, 999804065) + + W(7, -1122254720, -1110352445, 1026923705, -1100154756) + + W(8, -1104113579, -1112351060, -1095014251, -1105541862) + + W(9, -1102735326, 1031818996, 1008421480, 1016036429) + + W(10, -1129785460, -1156866531, -1132017542, -1130622370) + + W(11, 1022740922, -1121415144, 1008916224, -1122315305); + sum2 = + W(0, 999771042, -1130012060, -1123589256, 1018901632) + W(1, -1124249760, 1017234680, -1118367200, 1016197020) + + W(2, 1019371878, -1137594745, 1002965746, -1114341154) + W(3, -1115605194, 1017493044, 1015905000, 1024661143) + + W(4, 1035878909, 991329315, 1042318306, 1053769225) + W(5, 1010690665, -1099274218, -1101115118, -1092869373) + + W(6, -1106457448, 1043986431, -1118271248, 1057361082) + + W(7, -1102884302, 1037230696, -1112309114, 1036147884) + + W(8, 1038257769, -1108709562, 1025444512, -1113472862) + W(9, 1015483632, -1115820708, 1020142296, 1016005876) + + W(10, -1118330440, -1156785507, -1134019065, 1021163648) + + W(11, -1123825168, 1025071687, -1126466092, 1013380697); + WS(-1096043118, -1087645231); + sum1 = W(0, -1115587665, 1032694426, 1031441086, 1023546425) + + W(1, -1130229584, -1147531597, 1019733960, -1115129818) + + W(2, 1029203067, -1099485226, -1113336032, -1121275799) + + W(3, 1051148019, -1118364796, -1127235090, -1119188228) + + W(4, 1045341850, 1017432376, 1054321442, 1050158903) + W(5, -1081266857, 1047006846, -1106232936, 1041137522) + + W(6, -1106908023, 1052615508, -1082902530, 1045376289) + + W(7, 1057440145, -1118131945, -1134199968, 1018710010) + + W(8, -1131991862, -1113683692, 1050084000, -1116607496) + + W(9, 1027649766, -1120491559, 1027530281, -1131853547) + + W(10, -1114910160, -1127306394, 998100766, -1147820543) + + W(11, 1021215128, 991663462, -1123554607, -1124042776); + sum2 = + W(0, -1129211209, 1023861908, 1038235290, 1029997412) + W(1, -1125523437, -1107362446, -1114850415, 1013933265) + + W(2, 1023515648, -1119658781, -1121364796, -1103304652) + + W(3, -1109858923, 1044349350, 1032218024, 1027861436) + W(4, 1040374834, -1108471855, -1096619497, 1039720436) + + W(5, -1113539350, 1038091910, -1121557752, 982770184) + W(6, -1110890211, -1133136545, 1044872450, 1050855479) + + W(7, -1117035130, -1107498682, -1130602657, -1128608489) + + W(8, 1020265593, 1048083250, 1034188808, -1102257516) + W(9, 1022698753, 1024895152, -1162309256, -1129090321) + + W(10, 1023672492, -1109425046, -1121333166, -1130250517) + + W(11, 1029442100, -1122118916, 1020180841, 939323929); + WS(1062090935, 1058767985); + sum1 = + W(0, 1016379967, -1127587553, -1124869109, 1030044278) + W(1, -1135267218, 1021197901, -1146487047, 1001072263) + + W(2, -1111629724, 1016014666, 1045897412, 1039181162) + W(3, -1111908667, 1032078908, -1129970444, 1037221664) + + W(4, -1115487571, 1046030390, -1089079359, -1107063038) + + W(5, 1056809425, -1100036461, 1008766834, -1118068997) + W(6, 1040669191, -1131263417, 1049140321, 1046027952) + + W(7, -1082917799, 1040408361, -1115518322, 1034380765) + + W(8, -1124650927, -1107621674, -1120715392, 1035258018) + + W(9, 1051125844, -1117322707, -1115270659, -1135017032) + + W(10, -1131647467, 1039946613, -1197661491, 1033161790) + + W(11, -1129645073, -1124734374, 1029976235, -1119763541); + sum2 = + W(0, 1033054007, -1116547758, -1123189128, 1033469023) + W(1, -1122818756, 1026371779, -1136838880, -1137766108) + + W(2, -1111327982, 1041352776, 1032790615, 1012971008) + W(3, -1098982462, 1042838787, -1110172088, 1004924993) + + W(4, -1121722910, 1034677105, 1033549613, 1033020303) + W(5, 1051050782, -1098306997, 1034801784, -1120000922) + + W(6, 1038740385, -1115132829, -1092378351, -1108961666) + + W(7, -1121004928, 1045175561, 1019964530, 1027487385) + + W(8, -1119336676, -1124364804, -1102497930, -1106694042) + + W(9, 1035128176, 1039643256, 1015821086, 1017988993) + W(10, -1124024618, 1035204548, 1032784783, 1032253989) + + W(11, 1009754028, -1118316692, -1125398316, 1028774587); + WS(1065136439, 1040354336); + sum1 = W(0, -1140671753, -1120274067, 1022747450, -1124666160) + + W(1, -1123069063, -1131113682, -1118356003, -1126498983) + + W(2, -1108811053, -1107804932, -1110160675, -1105899870) + + W(3, 1025125049, 1033405406, -1129239633, 1016924742) + W(4, 1057254373, 1050226223, 1059609336, 1031464345) + + W(5, -1093137163, -1111386668, -1105328992, -1111235284) + + W(6, -1099781359, -1098842784, -1092867554, 1000731409) + + W(7, 1058399851, 1055942809, 1044975810, 1051704028) + W(8, 1009251305, 1010162395, 1036945681, -1114655220) + + W(9, -1109467235, -1101547023, -1113825780, -1104493413) + + W(10, 1003726785, -1112753158, -1120831377, -1112429520) + + W(11, -1119063979, -1146511789, -1116155918, -1117793303); + sum2 = W(0, -1139839138, 998321428, -1134067290, 1024466364) + + W(1, 1008723794, -1119932526, 1028368392, -1132382549) + W(2, 1034041544, 1018598993, 987604943, -1102834025) + + W(3, -1131349085, -1118877643, 1031347596, -1113750023) + + W(4, 1034669108, -1116818777, 1007335842, 1061492513) + + W(5, -1121214551, -1116983417, -1114752830, -1103912225) + + W(6, -1095769692, -1114897304, -1102588486, 1062722215) + + W(7, -1107691916, -1117176868, 1009572226, 1035794880) + + W(8, -1105759439, -1105398904, -1108283066, -1098749835) + + W(9, -1108418109, 1041629273, -1103951670, 1046707789) + + W(10, 1039138956, -1145690340, 1018314989, 1025712340) + + W(11, 1022467761, -1121476700, 1033232162, -1110829011); + WS(1015535328, -1084984071); + sum1 = W(0, -1114800552, 1017574081, -1121052395, -1121642214) + + W(1, 1031943978, -1119258038, 1015340962, -1152091148) + + W(2, -1108603860, -1120445077, -1120661414, 1038434434) + + W(3, 1004694578, 983354644, -1131668588, -1117988144) + + W(4, -1099862310, -1098244185, -1094435959, -1091868509) + + W(5, -1100742889, -1090330210, -1106546547, -1096238847) + + W(6, 1058689409, 1050368026, 1059588503, 1053265766) + W(7, 1053762387, 1049777811, 1042220181, 1044693386) + + W(8, -1114937124, -1113854018, -1120887600, 1038069402) + + W(9, 1032168457, 1033335786, -1112611163, 1033654385) + + W(10, -1119809253, -1119794874, -1117398367, -1124845785) + + W(11, -1126829041, -1124490374, -1122991451, 1025017029); + sum2 = W(0, -1113763234, -1113925793, -1117079348, 1016562523) + + W(1, -1106152769, 1021767953, -1110489214, -1118706081) + + W(2, -1123863865, -1117743388, 1031541130, 1034144457) + + W(3, -1108434934, 1028245712, -1107854239, -1131124226) + + W(4, -1152383745, 1049718598, 1054034272, 1045174355) + W(5, 1019958281, 1048832333, 1037079656, 1041167636) + + W(6, -1153318001, -1115051721, -1111062529, 1029337732) + + W(7, -1107848604, 1036566626, -1106439039, -1100833945) + + W(8, 1032880554, -1108241407, 1025815300, 1040219109) + + W(9, -1122950542, 1032317223, -1114917041, 1023645948) + + W(10, -1112434267, 1017487533, -1115045873, -1115252534) + + W(11, -1112234054, -1144290245, -1111099643, -1123196065); + WS(1058837943, 1069665989); + sum1 = W(0, 962159770, -1131526763, -1123587520, 1016761717) + + W(1, -1116652202, 1020359383, -1124916236, -1120750038) + + W(2, -1123278397, -1133106088, -1111839826, -1119033369) + + W(3, -1112323118, -1123741278, -1111587013, 1031183749) + + W(4, 1059574623, 1052414436, 1056026369, 1050778647) + W(5, 1057288447, 1051933055, 1049400292, 1050858601) + + W(6, -1100091288, -1100255560, -1090882007, -1089604336) + + W(7, -1090318286, -1098258189, -1098860821, -1113801599) + + W(8, -1106975399, 1034080493, 1026032901, 1030312842) + + W(9, -1115237492, -1123145400, 1023272149, -1102573630) + + W(10, 1026912571, 1026538575, -1128992103, -1142474253) + + W(11, -1126485147, 1024876312, -1130617057, -1147252941); + sum2 = + W(0, 1031825723, -1121672345, -1128689519, 1003655756) + W(1, -1112729716, 1021714543, -1143241256, 1015056847) + + W(2, 1031898082, -1123032728, -1149677000, 1042326937) + + W(3, 1016978721, -1135526098, 1017362221, -1115756346) + + W(4, -1068962638, -1070958719, 1062859919, 1079716701) + W(5, 1069238165, -1106783417, 1038638730, 1030095799) + + W(6, -1097279430, 1042495248, 1042236169, -1097154173) + + W(7, 1043003442, -1101596383, -1114855682, 1029691669) + W(8, -1106804175, 1004890268, 1033261211, 1047991146) + + W(9, 1029430153, 1006662138, 1016716693, -1116617969) + + W(10, 1033946762, 1015278793, -1125393239, -1129356209) + + W(11, -1130449529, -1143028884, 1021676019, 1007567766); + WS(1027937136, 1051433372); + sum1 = W(0, 1024223452, -1131494490, 1034208027, -1113660825) + W(1, 1026687777, 1007645322, 1019229306, 1023660333) + + W(2, 1027262836, 1034970897, 1041574867, 1051529138) + W(3, 1043759333, 1035492122, 1025216066, 1033175550) + + W(4, -1095583884, -1102510757, -1088128354, -1082034838) + + W(5, -1107709564, 1021392868, -1116281753, 1019915650) + + W(6, 1036904555, 1043230726, 1043513377, -1088109312) + W(7, 1040837474, 1023668834, 1033050768, 1041627229) + + W(8, 1037383306, -1126475809, 1038022750, 1049740992) + W(9, 1043020088, 1040579632, 968839375, 1040383308) + + W(10, 1026871330, -1131579916, 1022249046, -1121104697) + + W(11, 1030621125, -1128113439, 1026071912, 1024084817); + sum2 = + W(0, -1122636491, -1114076695, -1122337791, -1121313911) + W(1, 1019616565, 1032037049, -1121846167, 1035628188) + + W(2, 1019704225, 1037288383, 1038646185, -1113812515) + + W(3, -1115379683, -1144905268, -1124810757, -1104091459) + + W(4, 1049771287, 1032297878, 1050229287, 1058219066) + + W(5, -1126947029, -1089173556, -1109798125, -1085784913) + + W(6, -1108218455, 1029534785, 1038576544, 1058416091) + W(7, -1096957025, 1029636273, -1112204367, 1039375301) + + W(8, 1003686724, -1114871835, 1026526587, -1129529893) + W(9, 1020162449, 1011141050, 1015768281, -1169657505) + + W(10, 1007985666, 1019192761, -1146139348, 1021492965) + + W(11, -1131201101, -1129337357, -1117257463, 1026702945); + WS(-1087300279, 1060388257); + sum1 = + W(0, -1129462823, 1031115364, -1114720439, 1025637865) + W(1, -1150727557, 1019407412, 1005184420, -1144567588) + + W(2, 1023690092, 1041026294, 1027269348, 1022155151) + W(3, 1032140389, 1019365849, 1035913085, -1142073248) + + W(4, -1095623283, -1092905754, -1093094418, -1092295181) + + W(5, -1091295733, -1094422277, -1097873082, -1089577936) + + W(6, -1106106692, 1045171988, 1058204318, 1060821043) + W(7, 1047241920, 1051757075, 1047453654, 1052799910) + + W(8, 1037669414, -1113959460, -1142625182, 1044954958) + W(9, 1031904715, 1022315054, -1121754201, 1027854342) + + W(10, -1130365587, -1127843579, -1122973026, 1027810288) + + W(11, 1008074737, 1005685302, 1011776124, 1023937224); + sum2 = W(0, -1120594034, -1141459412, -1136839806, 1028313984) + + W(1, -1127607807, -1122936307, 1007613018, -1134584398) + + W(2, -1166872476, 1004777140, -1113289360, 1013932442) + + W(3, 1019995585, -1124945717, -1145964804, -1157638606) + + W(4, 1040489823, -1122900789, 1021539093, -1095750682) + + W(5, 1045364618, 1031026044, 1033830372, -1125456395) + + W(6, -1102509951, -1110936244, 1067324619, 1045560576) + + W(7, -1096904945, -1100965201, 1000232964, -1108078168) + + W(8, -1086857984, -1095818924, -1093760272, 1065644346) + + W(9, -1123950010, 1042979309, -1121148085, 1026162850) + + W(10, 1045679794, -1104593759, -1110068744, -1103109579) + + W(11, 1034732462, 1018809021, -1152320711, 980148252); + WS(-1090838638, 1050174926); + sum1 = W(0, 1024715784, 1033853293, -1106406969, 1042483832) + + W(1, 1025782733, -1138965820, 1038488261, -1139164262) + W(2, -1109115088, 1049593122, 994296459, 1045373935) + + W(3, 1015694078, -1112080289, 1045351388, -1114987669) + + W(4, -1106716164, -1120751425, -1094496661, -1086184702) + + W(5, -1092092861, -1095245812, -1100690449, -1094326222) + + W(6, 1038102082, 1051782073, 1043814476, 1058236139) + W(7, 1045378621, 1042079876, 1050438570, 1040938025) + + W(8, -1115525408, -1136454131, -1105105482, 1033520471) + + W(9, 1029286795, -1106373991, 1034575399, -1122199803) + + W(10, 1016756989, 1035169257, -1108133484, 1042591993) + + W(11, -1140252101, -1115290814, 1041627941, 1001134058); + sum2 = + W(0, 1051544715, 1047565246, 1046232208, -1105592564) + W(1, -1094324661, -1105924841, 1021200095, -1103932050) + + W(2, -1087153288, -1111682644, -1081925443, 1060283907) + + W(3, 1062548140, -1127110112, 1051334754, 1033462267) + + W(4, -1090874159, -1099176408, -1105576760, 1048906709) + + W(5, -1112367085, 1049091960, -1107585156, 1048824237) + W(6, 1047568122, 1032035298, -1119008010, 1036693685) + + W(7, -1119206793, -1158298480, 1036759049, -1114785543) + + W(8, -1111412903, 1040133461, -1140028602, -1107739759) + + W(9, 1044620924, -1096403637, 1046459856, -1114351857) + + W(10, 1034895049, -1131191613, -1112144320, 1040322447) + + W(11, -1130045765, 1011461326, 1012649582, -1111955333); + WS(1039738296, 1044009556); + sum1 = W(0, 1031992376, -1118478209, 1036413734, 1042319122) + W(1, 1025321013, 1019779694, -1126468451, 1028741314) + + W(2, -1113116036, -1126664684, 1028656912, -1095949365) + + W(3, 1046101634, -1104617834, -1111694091, 1029160593) + W(4, 1058890327, 1042459434, 1054229481, 1057173004) + + W(5, 1048940614, 1052520321, 1047137387, 1057385245) + + W(6, -1088719167, -1107130859, -1088309806, -1090115281) + + W(7, -1090901446, -1096019759, -1098195015, -1090793697) + + W(8, 1040542090, -1129453002, 1042227988, -1127816747) + + W(9, 1048439133, -1119199044, 1036380619, 1040359505) + + W(10, -1107110028, 1036863629, -1133301245, -1121635021) + + W(11, -1120611033, 1022364232, -1108092830, -1129794251); + sum2 = W(0, -1088996856, -1089163817, -1081345153, -1080285142) + + W(1, -1079794071, -1089981167, -1091438196, -1099490886) + + W(2, 1009251693, -1107842867, -1124806950, 1052635731) + + W(3, -1106464080, -1125456570, -1115606726, -1106662163) + + W(4, 1062110924, 1057525018, 1064189433, 1067630395) + W(5, 1063058829, 1062842970, 1054193960, 1053790298) + + W(6, -1115283476, 1042234988, -1104109471, -1112028150) + + W(7, -1113992476, 1045262592, 1042247754, -1104249669) + + W(8, 1030923639, 1032774718, -1126776930, 1034769026) + + W(9, -1106165885, -1136163037, 1032379012, -1108359351) + + W(10, 1034262626, -1123202523, -1130902850, 1036915346) + + W(11, -1110986466, 1035294670, -1135519157, 1024576891); + WS(-1096394862, 1050867114); + sum1 = + W(0, 975777376, 1035680965, 1017323049, -1164976624) + W(1, -1126109949, -1117859264, 1011370603, -1122131819) + + W(2, 1011677768, -1097606072, 1017467204, -1122057965) + + W(3, 1039142936, -1137149130, 1023416416, -1112401517) + W(4, 1050217928, 1041761501, 1059053443, -1098739346) + + W(5, -1089683010, -1104430516, -1115445691, 1042596520) + + W(6, -1095631255, -1108785275, -1088500104, -1111108333) + + W(7, 1062642602, 1040456976, 1016594173, -1120553683) + W(8, -1105375964, 1035556439, 1042635014, 1025006046) + + W(9, 1038036638, 1027021261, 1041295544, 1029575724) + W(10, 1033079080, -1107649602, 1020084753, -1112750027) + + W(11, -1142614292, 1017623258, -1113983747, -1124248029); + sum2 = + W(0, -1131210775, 1024840523, 1024067225, 1022860453) + W(1, -1125865258, -1130239171, 1024964869, -1117571915) + + W(2, 1029829507, -1108448221, 1046565608, -1107856867) + + W(3, 1042031083, -1106481128, -1127390051, -1124576676) + + W(4, 1018275001, -1117395779, 1042461338, -1092962046) + W(5, 1053212914, -1102214835, -1114692565, 991892650) + + W(6, -1122984086, -1132016729, 1059140985, -1089805596) + + W(7, 1058514952, 1027704725, -1118917379, 1032905776) + + W(8, -1096338526, -1114466007, -1107995649, -1166014069) + + W(9, 1052927486, 1041713274, 1037010892, 1032336370) + + W(10, 1033255054, -1103146109, -1119288636, -1099911282) + + W(11, -1113255745, -1106771914, -1111017176, 1000523989); + WS(-1112511928, 1031640207); + sum1 = + W(0, 1037301926, -1130112569, -1119711398, 1044751592) + W(1, -1102628100, 1042267541, 1021159833, -1122521821) + + W(2, -1102373753, 1048837234, -1091456672, 1045582560) + + W(3, -1119033858, -1093436941, 1050238014, -1100593739) + W(4, 1058068053, 1051625169, 1054040787, 1044375881) + + W(5, 1057193534, 1051238780, 1045936610, 1058222955) + + W(6, -1091912761, -1095738529, -1101742756, -1090186767) + + W(7, -1090190311, -1110077161, -1098844722, -1090967581) + + W(8, 1030698115, 1045615952, -1104227307, 1051640030) + W(9, -1111999463, 1008998972, 1050341641, -1108465231) + + W(10, -1112730073, 1045904421, -1098654019, -1111879892) + + W(11, -1123401908, -1101737376, 1043894710, -1112695488); + sum2 = + W(0, 1016218439, 1009444234, -1111008440, 1026480303) + W(1, 1015374231, -1115006890, 1027237715, -1122692269) + + W(2, 1024549194, -1117974589, 1042315856, -1106327815) + W(3, 1040022175, 1025059842, -1110870197, 1033099318) + + W(4, -1121292007, -1111737758, 1047393563, -1089871257) + + W(5, -1115783084, 1043300713, -1103999090, 1022298131) + + W(6, 1037076368, -1110249739, -1086831522, -1069929292) + + W(7, -1085683537, 1033002057, 1033912191, 1032775289) + W(8, -1112700443, 1020335555, 1038599637, -1111547810) + + W(9, 1049254465, -1118960414, -1118358651, -1104218510) + + W(10, 1032984152, -1112172263, 1060801772, 1079084175) + + W(11, 1057876572, 1033693800, -1104475663, 1045399536); + WS(-1106120924, 1033431669); + sum1 = W(0, 1016553159, -1146851238, 1024375743, 1026515514) + W(1, 1005429062, 1029812010, 1008887700, 1021177962) + + W(2, 1035990878, 1041817850, 1041767260, 1047579686) + W(3, 1031819293, 1026577053, 1014785832, 1011005202) + + W(4, -1101729723, 1024013341, -1116899488, -1085522812) + + W(5, -1090064383, -1118087802, -1126747713, 1025078601) + + W(6, 1042266392, 1027850273, 977256084, -1090381288) + W(7, 1048109660, -1113190325, 1024173993, -1127218683) + + W(8, 1024064284, 1026394008, 990294084, 1048833585) + W(9, 1038017843, 1040657490, 1020235411, 1041410670) + + W(10, 1018305971, 1021653787, 996614956, -1125739905) + + W(11, 1016883609, -1121801815, 1027952623, -1132785763); + sum2 = + W(0, 1032555846, 1019641904, 1015684488, -1123545328) + W(1, -1118308408, -1117707424, -1137876352, -1152988290) + + W(2, -1110721648, -1116342888, -1102693078, 1027573888) + W(3, 1043950970, 1021888896, 1007632640, 1023916088) + + W(4, -1094136567, -1089264280, -1097199465, 1056898914) + W(5, 1037769558, 1041236542, 1026403708, 1046276806) + + W(6, 1030929664, -1112511004, 1046449598, 1052843165) + + W(7, 1024674316, -1109778580, -1123597216, -1105717038) + + W(8, -1146526593, 1032788860, 1012812816, 1033507644) + W(9, -1106559340, 1017687520, 1028523456, -1122312832) + + W(10, 1018020888, -1125819008, 1004426241, -1122415072) + + W(11, 1028547840, 996318210, -1149117889, -1132920416); + WS(1034973624, 1056792353); + sum1 = W(0, -1120658336, -1121055921, -1128295926, 1021962732) + + W(1, -1127490506, -1119904327, -1122942113, -1119710033) + + W(2, -1130821617, -1114464179, -1106629009, -1093463666) + + W(3, -1097245927, -1132912612, -1118132038, -1115274080) + + W(4, -1114903379, -1125024178, 1008426736, 1062068439) + + W(5, 1050479246, -1107542555, 1028006365, 1025872901) + + W(6, -1130597942, -1102939831, -1118485143, 1067744046) + + W(7, 1049107361, 1045213129, -1113875250, 1036565277) + + W(8, -1109039532, -1115164458, -1104109440, -1094141737) + + W(9, -1105858697, -1119937620, -1141854766, -1121170098) + + W(10, -1124529164, -1124681556, -1138254780, 993935312) + + W(11, -1132704804, -1120970067, -1122550700, -1120520661); + sum2 = + W(0, 1020401308, 1017661559, -1129375401, -1130735541) + + W(1, -1146688164, -1166762145, -1130185789, -1137125822) + + W(2, -1120501349, 996214904, -1140432422, 1024553914) + W(3, 1006774606, -1124013507, 1033491675, -1117903931) + + W(4, 1037264768, -1116129767, 1035443152, 1051779064) + W(5, 1039827062, 1025600009, -1114545658, 1032817572) + + W(6, -1090330474, -1084580575, -1097846115, 1064606263) + + W(7, 1048687051, 1019182043, -1117487855, 1037037513) + W(8, -1105802125, 1032645479, -1102145888, 1038402436) + + W(9, 1000828364, 1025151371, 1028059566, -1112238814) + + W(10, 1023925581, -1119976615, 1025111856, -1136822486) + + W(11, -1121332237, 1015612208, -1119338761, 999461036); + WS(-1082445367, -1085006700); + sum1 = W(0, -1131835086, 1034878908, -1106661648, 1037617118) + + W(1, -1121819052, 1022066011, 1036207796, 1033542412) + W(2, 1041974800, 1043293403, 1042063924, 1045196135) + + W(3, 1037212703, -1105403448, 1046411795, -1100599618) + + W(4, -1081109875, -1092276724, -1086386347, -1094524616) + + W(5, -1094507255, -1104988703, -1105674409, -1096619449) + + W(6, 1054173212, 1051286356, 1055927462, 1058619216) + W(7, 1050138098, 1028915053, 1041419787, -1130824962) + + W(8, 1035072463, 1034209806, -1114712644, 1036390662) + W(9, 1015171028, -1139447658, 1026111277, 1045523870) + + W(10, -1137431795, 1038048136, -1110863483, 1042877649) + + W(11, -1123146704, -1114994477, 1040360493, -1121943708); + sum2 = + W(0, -1112963339, 1025676326, -1117465202, 1036266015) + W(1, -1124864900, 1019242508, 1017632828, 1039721851) + + W(2, 1043137200, 1000796272, 993751520, -1098528956) + W(3, 1032573075, -1097473255, 1049454991, -1090503899) + + W(4, 1014823768, 1031859059, 1020211292, 1055374057) + W(5, 986723264, -1125510524, -1111044051, 1007293144) + + W(6, -1089890678, -1102855534, 1018575516, 1061198225) + W(7, 1038055959, 1050136287, -1109681619, 1054985311) + + W(8, 1022229356, 1031913791, -1120060570, -1098119563) + + W(9, -1106568385, -1098582022, 1030233630, -1101215648) + + W(10, -1128614108, 1018516204, -1138611816, 1020738140) + + W(11, -1132953832, -1123833514, 1035860627, -1110059149); + WS(-1089141943, 1068474134); + sum1 = + W(0, 1024130588, -1101143803, 1039148539, -1124204515) + W(1, 1026773217, 1018218693, -1104179471, -1123573325) + + W(2, -1121972404, 1041396360, -1105946681, 1052602058) + + W(3, -1091638882, 1032280890, 1041946144, -1138642417) + + W(4, 1032495505, 1045416789, -1106461946, -1094942270) + W(5, 1059099726, -1106993164, 1045088278, 1009482242) + + W(6, 1036801227, -1094238571, 1057861426, -1098420551) + W(7, 1040764762, 1033290001, -1105244536, 1037819035) + + W(8, -1117579092, 1039470024, -1113046718, 1047303536) + + W(9, -1094617668, 1040300696, -1110195745, -1108347351) + + W(10, -1114425265, 1024684838, -1108918837, -1127532529) + + W(11, 1027200645, -1103088708, 1043167338, -1116739793); + sum2 = W(0, 1016644168, 998892514, -1116049462, 1033248586) + W(1, -1110140123, 1041609896, -1103095555, 1030835892) + + W(2, -1134771145, -1114282184, 1042383274, 1025008594) + + W(3, -1104878488, -1101611481, 1048842205, -1122288558) + + W(4, -1129326838, 1049635340, -1147070546, -1102476512) + + W(5, -1090718582, 1046973498, -1115539252, 1028769716) + + W(6, -1114142714, -1101404122, -1095734150, 1060588986) + + W(7, 1052949499, -1143476482, -1110997031, 1022411808) + + W(8, 1012771369, -1114952606, 1057697290, -1111850310) + + W(9, -1096580248, -1114336646, 1031782132, -1118966953) + + W(10, 1025799488, 1041138771, -1095991584, 1027653952) + + W(11, -1127102864, 1035005082, 992902692, -1120028296); + WS(1055908206, 1023803300); + sum1 = W(0, 1033033649, -1128828388, 1027156540, 1024886583) + + W(1, -1122498726, 1032744244, -1114524705, 1005980052) + + W(2, -1114766205, 1011459483, -1106069078, -1116675477) + + W(3, -1116893112, 1045609495, -1113403236, 1049123968) + W(4, 1059767687, 1054454099, 1057986836, 1059734148) + + W(5, 1056528229, 1043829406, 1047712168, 1049516377) + + W(6, -1093070812, -1094569204, -1086765600, -1090099632) + + W(7, -1094363024, -1102732570, -1103438026, -1098493775) + + W(8, -1106762950, 1029622486, 991938588, -1098865153) + + W(9, -1101593593, -1139103892, 1022039246, -1097177334) + + W(10, -1139512861, 1016172182, 1027600816, 1027525694) + + W(11, -1135964969, 1036132330, -1117700205, 1035501771); + sum2 = W(0, 1025334517, -1115143239, -1112308287, -1115511381) + + W(1, 1042864237, 1031444191, 1040795754, -1111573417) + W(2, 1052224655, 1029199567, 1050599738, -1137182696) + + W(3, -1101494467, -1094150391, -1116631442, -1092627938) + + W(4, -1085507535, -1092254017, -1091835304, 1050309385) + + W(5, 1052849573, 1058681981, -1093246396, 1066882777) + + W(6, -1083097675, -1079915962, -1078367265, -1085955474) + + W(7, 1077549274, 1068550083, 1054678659, 1059124396) + + W(8, -1097828858, -1092877982, -1101001669, 1031547399) + + W(9, 974314816, -1100178328, 1046838481, -1088438520) + W(10, 1062578782, 1029921983, 1036651638, 1017725718) + + W(11, -1103762806, -1106558945, 1039431488, -1098096651); + WS(-1078207964, -1098790270); + sum1 = + W(0, -1117816013, 1039615439, 1040159636, 1032454162) + W(1, 1029735698, 1026856980, -1128361629, 1002339317) + + W(2, 1017293260, -1121106131, -1098516348, -1100564368) + + W(3, -1142189601, -1115831559, -1129405415, -1114960439) + + W(4, 1049047675, -1100078274, 1057717203, 1041647209) + W(5, -1090236946, 1041839801, -1114821509, 1034781465) + + W(6, -1100878354, 1049964833, -1094050686, -1119951725) + + W(7, 1052197359, -1110599850, 1009705546, 1020602063) + W(8, 1032784131, -1107321921, 1040450075, -1099964381) + + W(9, -1116032848, 1034137049, 1036993018, 993360895) + W(10, -1126192007, -1122801847, 1034337038, 1014677674) + + W(11, 1028031798, 1027844545, -1121696025, -1130484229); + sum2 = + W(0, 1025029135, -1115606678, -1098891737, -1111864983) + W(1, -1112278329, 1017169741, -1123958951, 1031819582) + + W(2, -1104367710, -1132455965, 1054704406, 1035843132) + + W(3, 1038310704, 1037432870, -1137648567, -1122384617) + + W(4, -1119205162, 1051294613, -1118431701, 1040638061) + W(5, 1033551568, -1095570314, 1040693441, 1008777251) + + W(6, 1051361304, -1090262380, -1094584814, 1042639742) + W(7, 1042180969, 1054301740, 1006683771, 1032304440) + + W(8, -1097951274, 1046775583, -1111734067, -1115582191) + + W(9, -1116801751, -1102111461, -1108749213, -1103720876) + + W(10, 1037239658, 1027534783, -1137589907, -1161598998) + + W(11, -1113612863, -1137659371, 1017443081, 1038413214); + WS(1060561207, -1110135205); + sum1 = W(0, -1114856118, -1124131449, -1121034566, -1112340961) + + W(1, -1112123800, 1034679769, -1115322038, -1107671902) + + W(2, 1049197464, -1108658488, 1041962623, 1047635080) + W(3, 1029521634, 1045001085, 1019494819, 1038785126) + + W(4, -1091745710, -1095755605, -1091090769, -1089785676) + + W(5, -1088312300, -1105390912, -1095959893, -1089779917) + + W(6, 1057921518, 1045522218, 1055093690, 1056738433) + W(7, 1057086969, 1048793054, 1049205462, 1057170552) + + W(8, 1045672489, -1100746808, 1019645080, -1111234056) + + W(9, -1095870350, 1047514180, -1099214252, 1022895484) + + W(10, 1021270499, -1112929307, -1157317541, 1036938084) + + W(11, -1148757666, 1026507103, 1019330670, 1029094240); + sum2 = + W(0, -1110883135, -1122184691, 1008834310, 1033395903) + W(1, -1110716892, 1024095030, -1113505190, -1113542640) + + W(2, 1041894205, -1115970606, 982318965, 1040255170) + W(3, -1134396672, 1031402202, -1121857157, 1031923695) + + W(4, -1114700148, -1118217955, -1097954058, -1109327580) + + W(5, -1107386491, -1114148625, -1116084956, -1128842340) + + W(6, -1082032478, -1081043792, -1085641610, -1095688614) + + W(7, -1084362127, -1093205100, -1087863650, -1094939475) + + W(8, 1046875162, 1049320576, 982336213, -1107088758) + W(9, 1032307660, -1122373488, 1045820748, 1044473869) + + W(10, 1064081845, 1059051295, 1064706681, 1066526360) + W(11, 1060847547, 1058657081, 1055302318, 1041403755); + WS(-1088848183, -1091270356); + sum1 = W(0, -1113444636, -1127291412, -1131637032, -1105736260) + + W(1, 1030983166, -1127378047, 1037357901, -1115666437) + W(2, 1022583331, 1011517178, 1047190142, 1040877723) + + W(3, 1035565944, -1097520394, -1114602800, -1108205485) + + W(4, -1092936920, -1094397598, -1092945549, -1086200137) + + W(5, -1102025475, -1104020948, -1116833526, -1102830535) + + W(6, 1050793382, 1055228191, 1056457416, 1051083611) + W(7, 1056969730, 1051399464, 1044492479, 1047811509) + + W(8, 1035173048, -1112816343, 1039723405, 1039289519) + + W(9, 1015654008, -1108815670, -1116817631, 1032741190) + + W(10, -1115063927, 1007040780, -1118782984, -1114815627) + + W(11, 998545680, -1127143385, 1025703109, -1123886254); + sum2 = + W(0, 1023906737, -1110646223, -1119411664, 1010017751) + W(1, -1108429461, 1029589423, -1110439418, -1125097343) + + W(2, 1067295699, 1065087692, 1065459703, -1118952488) + + W(3, -1081475488, -1083105366, -1095503285, -1083389324) + + W(4, -1081442287, -1085042817, -1083251842, 1050008558) + W(5, 1064700095, 1064469883, 1057972141, 1061045473) + + W(6, -1097077469, 1037915281, -1096872835, -1110920551) + + W(7, 1006981249, 1036080906, -1106015672, 1051761632) + W(8, 1029195957, 1015927544, 1018101230, 1042598936) + + W(9, -1103158035, 1041992018, -1108943564, -1114827780) + + W(10, 1026431960, -1132623721, 1018762696, -1124571002) + + W(11, 1020104602, -1114107558, 1036691137, -1119006739); + WS(1034259896, 1039623341); + sum1 = W(0, 1016878676, 1028460795, 1034359235, 1011385345) + W(1, 1015855070, 1016831138, -1128588247, 1025908283) + + W(2, -1137539339, -1104998988, -1129847248, 1015052988) + + W(3, 1032818160, 1044006969, 1033445320, 1043524531) + W(4, 1055868742, 1047938201, 1058114664, 1043619008) + + W(5, -1096687428, -1092418872, -1100196186, -1095896370) + + W(6, -1088089066, -1091378989, -1090213650, -1114958501) + + W(7, 1054476715, 1049053750, 1040121848, 1044746961) + W(8, 1043298073, 1036240559, 1043122195, -1119276751) + + W(9, -1110579607, -1101201217, -1106781444, 1018213241) + + W(10, 984347141, -1121755284, 932956556, 1018241806) + + W(11, 1026808723, 1034235663, 1010316760, -1120101215); + sum2 = W(0, 951810592, 1033659165, 1002259560, 1037243759) + W(1, -1112310958, 1034753865, 992340497, -1114454490) + + W(2, -1108037836, -1102042508, -1103529440, -1102191435) + + W(3, -1111056455, -1119026649, -1101647251, 1024637657) + + W(4, -1092018666, -1107310314, -1101738099, 1066447662) + + W(5, 1032823775, 1031795157, 1040899047, 1046257437) + W(6, -1130209970, -1115641842, 1041635941, 1067062587) + + W(7, -1098661250, -1099014553, -1117726225, -1111013301) + + W(8, 1022643242, -1110096702, 1034513697, -1100161415) + + W(9, 1027860693, -1096684817, -1114962434, -1112982645) + + W(10, 1009312276, 1032067593, -1114108683, 1041326077) + + W(11, -1120382671, 1026651225, 1038338139, -1106704138); + WS(-1096556910, 1067967914); + sum1 = + W(0, 1032707155, 1024354487, -1131915011, 1043480447) + W(1, -1118942187, -1132633269, 1031302952, 1031513592) + + W(2, -1122886335, 1031668012, 1021347526, 1041914807) + W(3, 1036630543, 1034774528, -1125222297, -1106257250) + + W(4, -1095119782, -1103532312, -1095435756, -1090836352) + + W(5, -1091218783, -1104917967, -1100801113, -1104067043) + + W(6, 1055235700, 1048353625, 1044387963, 1053060297) + W(7, 1041581564, 1052242886, 1033613430, 1050269976) + + W(8, -1120012638, -1105977207, -1105830818, -1133082414) + + W(9, 1017318739, -1121967338, -1130484760, -1109204860) + + W(10, 1031514684, -1126996321, -1116416117, 1043128001) + + W(11, -1110350280, 1039162573, -1148985233, 1034839630); + sum2 = W(0, -1125934657, -1120747465, -1126946517, 1019390634) + + W(1, -1140291646, 1045993569, -1110717499, 1038335355) + + W(2, 1041393774, 1032139854, -1107151652, 1046075673) + + W(3, -1090083756, 1024325836, -1116058677, -1097350707) + + W(4, -1097497496, -1114660789, -1096477783, 1057927405) + + W(5, -1110210473, -1137735680, 1033250412, -1127153776) + + W(6, 1047775001, -1105604596, 1048515105, 1059160295) + W(7, 1043731078, 1039621515, -1116506490, 987432265) + + W(8, -1131783044, 1018330620, -1096755902, 1041946221) + + W(9, -1091797353, 1046026631, -1106745231, -1124643180) + + W(10, -1122059316, -1129241452, -1147658728, 1011994599) + + W(11, 1032850220, -1126091098, 1009337229, -1124186068); + WS(1061843767, -1077951557); + sum1 = + W(0, 1026388735, -1120114128, 1037862893, 1029309653) + W(1, 1036389538, -1122000006, -1126306304, 1030215888) + + W(2, -1106366502, -1131166601, 1029755951, -1091391038) + + W(3, 1048316317, -1099454574, -1106178153, 1034019808) + W(4, 1059311576, 1043697070, 1054385928, 1057568381) + + W(5, 1054220568, 1049723715, 1050234615, 1058416302) + + W(6, -1088217072, -1105977039, -1091419383, -1089136642) + + W(7, -1092475601, -1095416897, -1096877448, -1091809363) + + W(8, 1041378862, -1130669927, 1047467576, -1111164306) + W(9, 1053509792, -1108260705, 1038585474, 1045917860) + + W(10, -1102542817, 1039925540, 989452730, -1105668215) + + W(11, -1138232735, -1130981221, -1107228323, -1116719540); + sum2 = W(0, 1061533858, 1062451832, 1068272760, 1070735043) + W(1, 1070929067, 1058522058, 1059576348, 1052624489) + + W(2, 1041546460, 1044663473, 1019905335, -1103665132) + W(3, 1027882908, 1053579543, 1030217484, 1042937819) + + W(4, -1081881922, -1084977950, -1081565785, -1079025547) + + W(5, -1082597652, -1079765468, -1088759123, -1090364693) + + W(6, -1104625991, -1116240264, -1115490290, -1094934042) + + W(7, 1002519540, -1096683616, -1103272320, 1039524167) + + W(8, 1020257203, -1107478927, 1038097782, -1129735749) + W(9, 1043320870, 997759194, -1110736144, 1041293151) + + W(10, -1112819836, -1146254682, 1018218872, -1108448519) + + W(11, 1032555484, -1111378699, 1018278443, -1120119454); + WS(-1084224055, -1088316584); + sum1 = W(0, -1113613001, 1003388692, -1106869627, 1016662735) + W(1, 1022316973, -1136760929, 1028746387, 984919127) + + W(2, -1107673717, 1028109079, 1012698375, 1027116933) + + W(3, -1094644889, 1023495179, -1107590588, 1024116594) + + W(4, 1006747746, -1098426505, -1088923256, -1097246285) + + W(5, 1057877820, 1033746004, 1041863252, 1022736621) + W(6, 1039555654, 1048291572, 1059475506, 1054502102) + + W(7, -1093203288, -1098003683, -1105649846, -1114921814) + + W(8, 1033838821, 1024741190, -1120760050, 1019707757) + W(9, 1048977331, 1036081339, 1041797102, -1127589064) + + W(10, -1159624430, -1124153911, 1033833840, 1010228666) + + W(11, -1114819581, -1110624834, -1119454813, -1122201606); + sum2 = + W(0, 1015245257, -1113948506, -1130594159, 1007533655) + + W(1, -1122336092, -1131246557, -1121042747, -1112003767) + + W(2, -1113301472, -1150409563, -1113241340, 988101766) + W(3, -1114338752, 1042344950, 1024929700, 1034588627) + + W(4, -1126889814, 1027745966, 1053517180, -1115083078) + W(5, -1120298083, 1009652067, 1023796132, 1015099871) + + W(6, -1122805777, -1121614859, 1035094088, -1117590123) + W(7, 1041346715, 1010321235, 1022595647, 1021357977) + + W(8, 1024783162, 1024732276, 1035927962, 1033594898) + + W(9, -1136757529, -1112303516, -1107129305, -1116844702) + + W(10, -1125306444, -1120982285, -1117285071, -1115586407) + + W(11, -1179946326, -1122655190, 1031780006, -1109817399); + WS(1056759150, 1060224665); + sum1 = + W(0, -1139374311, 995010733, -1111414410, -1108849608) + W(1, 1028987391, 1016981889, -1123115291, -1129401942) + + W(2, -1132085157, 1023919167, 1047983919, 1035204919) + W(3, 1044747100, 1009635916, 1026679904, -1133052010) + + W(4, -1099925456, -1105275165, -1089721802, 1047380996) + + W(5, -1111752399, -1106656308, -1119632759, -1103728616) + + W(6, 1041263149, -1143162806, 1045591848, 1054640011) + W(7, -1086265361, 1052567064, -1119813375, 1036660778) + + W(8, 1033127353, 1020125661, 1047176279, -1109178223) + W(9, 1047077327, 1025513390, 1020940735, 1026259564) + + W(10, -1131329248, -1128574472, -1124813330, -1115433611) + + W(11, 1014274699, -1114033239, -1135589654, -1154487021); + sum2 = + W(0, 1023227060, 1023565277, 1023373012, -1127246210) + W(1, 1035525242, -1115895934, 1032252451, -1115664904) + + W(2, -1114834111, -1123243930, -1110717616, -1119831250) + + W(3, 995075850, -1112704991, -1137853837, -1177558227) + + W(4, -1123255684, 1036838581, -1121973236, -1128787384) + + W(5, -1106791563, 1017209933, 1025363014, -1117762720) + + W(6, -1111609489, -1109001470, -1106028739, 1068968933) + + W(7, -1084327071, 1049794788, -1103092626, 1033559138) + W(8, 1029177075, 1046335474, 1040383540, -1085459757) + + W(9, 1055427968, -1106438120, 1039281515, -1108939748) + + W(10, 1009084849, -1110362282, 1030364025, 1038001226) + + W(11, 1004844681, -1109625554, 1026824663, 1027051057); + WS(1059198391, -1114206899); + sum1 = + W(0, -1123230572, 1033309054, -1136657179, 1042361008) + W(1, -1127872354, 1026831155, 1034821095, 1027013917) + + W(2, 1047908609, -1113872057, -1131612168, 1032838544) + W(3, 1018582917, 1025662981, -1122684754, 1042918755) + + W(4, -1086695351, 1005036838, 1053830093, 1063074340) + W(5, 1022976638, 1048968528, 1037218004, 1045668084) + + W(6, -1111534963, -1127426130, -1097279138, 1032841926) + + W(7, -1102974814, -1086802103, -1094977286, -1080167184) + + W(8, 1041894153, 1035651728, -1114797926, 1042266564) + W(9, 1036160183, 1043048448, 1028546130, 1051954532) + + W(10, -1168813170, 1029018614, 1015674230, -1115081892) + + W(11, 1022213546, -1119589535, 1023015663, -1121567905); + sum2 = W(0, -1109294951, 1044771761, -1106240469, 1037653814) + + W(1, -1132138943, -1129034439, 1027920896, -1126749735) + + W(2, -1116747636, -1095594447, -1091993541, -1098134808) + + W(3, 1045429231, -1112675284, 1030264036, 1027709116) + W(4, 1053044316, -1102224225, 1065361960, 1060397410) + + W(5, -1095097189, -1139768526, -1100471477, -1095674438) + + W(6, -1093594252, 1044254591, -1103264547, 1059446961) + + W(7, 1053539276, -1147713757, -1115698004, -1131922215) + + W(8, -1105874671, -1148207325, -1098859579, -1110887683) + + W(9, -1103270924, 1018090127, -1105560593, 1044415945) + + W(10, 1036768486, -1122217612, 1038496462, -1106764847) + + W(11, 1035231626, -1112984312, 1037287074, -1106415399); + WS(-1081905372, 1072993545); + sum1 = + W(0, 1004609681, -1113931058, 1017897178, -1110350900) + W(1, -1119842647, 1029593625, -1114320538, 1026419555) + + W(2, -1111485397, 1044870886, -1101161424, 1042868018) + W(3, 1016958866, 1036666966, 1034341084, -1123516147) + + W(4, -1100618406, -1103601459, -1117978262, -1088871615) + + W(5, -1096217303, -1090419947, -1105396084, -1100180286) + + W(6, 1052555682, 1050421002, 1053397115, 1057434595) + W(7, 1037835279, 1054502387, 1036260396, 1041011339) + + W(8, -1113666277, -1125853837, -1109207833, -1108715676) + + W(9, 1041223333, -1131918060, 1046093262, -1108472674) + + W(10, -1113131956, -1119475260, -1119057377, -1116054255) + + W(11, 1033629886, 1019600139, 1034832815, 1025124935); + sum2 = W(0, 1027207918, -1131003531, -1155176792, -1144704129) + + W(1, -1120277627, 1018692534, -1122116985, 1016313728) + + W(2, -1134820995, 1019005665, 1018656479, 1016837667) + W(3, -1133755465, 1007330736, 1008190855, 1024160458) + + W(4, -1123507780, 992028222, 1020777698, -1093100477) + W(5, -1106617270, 1026210395, 1015481059, 1009441727) + + W(6, 1022927690, 1039006609, -1108026125, -1075529389) + + W(7, -1071234400, -1098388857, 1043410538, -1127804151) + + W(8, -1148644129, -1114458430, 985006155, -1165905039) + W(9, 1009164120, 1080134310, 1068769301, 1038162442) + + W(10, -1124904791, 1019252639, 1019665619, -1148497627) + + W(11, -1154476856, 1041946375, -1120913968, -1102924087); + WS(1052536174, -1151096569); + sum1 = W(0, 991116371, 1033302347, -1116642336, -1115536654) + + W(1, 1020657940, -1114833734, 1031490598, -1158391502) + + W(2, -1128173243, -1146822742, 1036050892, 1034768509) + + W(3, -1102372039, 1030675614, -1114628406, 1032401267) + + W(4, -1103836723, 1034951935, -1087917523, -1106200391) + + W(5, 1061554222, 1041046871, 1046795201, 1027714602) + W(6, 1032573088, -1120898050, 1057405287, -1120590821) + + W(7, -1082912644, 1002819162, -1124848916, -1124036191) + + W(8, 999976938, -1138114279, -1098834165, -1118223722) + + W(9, 1047922098, -1107937520, 1038688870, -1118805976) + W(10, 1037991630, 1035558856, 1042054293, 986260550) + + W(11, -1130396508, 1023135729, -1142052488, -1177279769); + sum2 = W(0, 1035223119, -1127800761, 1038890181, 1028905699) + W(1, 994981383, -1127955051, 998786660, -1121293280) + + W(2, 1038869307, 1034423710, -1109261585, -1095895125) + + W(3, -1098712671, 1032845262, 1030375171, -1114148994) + + W(4, -1108836158, -1116666352, -1087753137, 1061176216) + + W(5, -1099737664, -1208820627, -1126035264, 1042226462) + + W(6, -1105867416, -1109342419, 1027333841, 1057224086) + + W(7, 1049674522, 1011854042, 1017325847, -1120003540) + + W(8, -1106282608, 1047028407, -1148256348, -1148022324) + + W(9, -1103462952, 1043242553, -1121350765, -1117125009) + + W(10, 1028503999, -1119414296, -1113209005, 1010099974) + + W(11, 1020393479, -1115953239, 1024565993, 1001260628); + WS(1058902967, -1081860445); + sum1 = + W(0, -1121371387, 1029881036, 992316765, -1117974353) + W(1, 1019164800, -1115308289, 1031905998, -1118946572) + + W(2, -1116017491, -1110722656, -1103068528, -1120254383) + + W(3, -1111729689, -1123116948, -1110978129, 1027465948) + + W(4, -1114517185, -1096281750, -1110556335, -1087192661) + + W(5, -1138608617, -1119671953, 1026716344, 1026550240) + W(6, -1144385454, 1046612526, 1050079932, 1061346066) + + W(7, 1054224317, 1023689473, 1034730016, 990814541) + W(8, 1031405875, 1032371736, -1114542190, -1113719056) + + W(9, -1147587566, -1121094047, 1032702160, 1025765741) + + W(10, -1133995267, -1118231237, 1037331941, -1117606232) + + W(11, 1027634660, -1114416951, -1150819433, 1002408318); + sum2 = W(0, -1127268449, 1008157504, 1022079949, -1129508015) + W(1, 1023435994, 987488696, -1149203238, 1025441916) + + W(2, 1017520494, -1110533731, -1110799425, 1038116680) + + W(3, -1149062134, -1109564641, 1031926637, -1140288129) + + W(4, 1046612660, 1053115740, 1042237616, -1081939523) + W(5, 1046590815, 1019421448, 1025089506, 1002014090) + + W(6, -1104246964, -1126879265, 1071620340, -1082767624) + + W(7, -1091995420, -1133233221, -1113723868, 1008464055) + + W(8, -1098833757, -1103206877, 1047260740, 1046386206) + + W(9, 1034566387, -1113663072, -1131415815, -1120607589) + + W(10, 1019779427, -1135658469, -1112586418, 1021781126) + + W(11, 1021597755, 1025921212, 1022181144, -1133866981); + WS(-1118927728, -1114990634); + sum1 = W(0, -1123938375, 1040612950, 1024746402, 1022214714) + + W(1, 1038811590, -1112630087, -1118622728, 1031232344) + + W(2, 1033195056, -1114716774, 1048819881, -1104426015) + + W(3, -1105195831, 1042575344, -1098438774, 1038434900) + W(4, 1057535917, 1047260544, 1042772673, 1057677234) + + W(5, 1045869257, 1057533694, 1042859777, 1055588422) + + W(6, -1087859524, -1132924433, -1087499341, -1097320317) + + W(7, -1091890215, -1089715893, 1029276300, -1091230428) + + W(8, -1107191040, 1007112943, 1043648426, -1093740782) + + W(9, 1042013107, 1020754614, -1107815558, -1113991309) + + W(10, 1047574307, -1106834116, 1044210486, 1028537764) + + W(11, -1106862316, 1047539199, -1107232885, 1041569343); + sum2 = W(0, -1117938617, 1034943831, 1047294563, -1112881764) + + W(1, -1113196518, 1017166837, -1119652380, -1122537978) + + W(2, -1098776947, -1074997876, -1067348911, -1073328658) + + W(3, -1106856879, 1007574096, 1040326435, 1003760381) + W(4, 1052841626, 1071950042, 1078478928, 1075942353) + + W(5, 1048374094, -1110198115, 1016518417, -1132261857) + + W(6, -1111689798, 1008522816, 1042633900, -1105497105) + + W(7, -1100859969, 1027760421, -1106280857, -1146348222) + + W(8, -1101990052, 1023979086, -1106116215, 1040726051) + + W(9, -1128120408, 1041816709, 1015116440, 1015700123) + W(10, 1034353164, 1019125592, 1021696573, 1032192788) + + W(11, -1129459590, -1114781060, 983174391, 1019612376); + WS(1034111416, -1106797037); + sum1 = W(0, -1127934684, -1146675705, 1006977829, -1113401378) + + W(1, 1019549798, 1016064490, -1139061950, 998716411) + W(2, -1113852291, 1035613338, 1034804131, 1037672145) + + W(3, 1044038057, -1106829090, 1033066250, 995721454) + + W(4, -1096035023, -1102929642, -1090308252, -1089088820) + + W(5, -1096624416, -1097224209, -1104516203, -1095532026) + + W(6, 1052464285, 1042719307, 1054858571, 1057164897) + W(7, 1050796251, 1048920159, 1042039981, 1049209393) + + W(8, 1032415443, -1114220817, 1032020487, -1123663635) + + W(9, 1035431737, -1131720964, -1114943722, 1041424208) + + W(10, -1113420426, 1025128440, -1114922591, -1136503085) + + W(11, 1026704599, -1109868785, 1024811962, -1138798477); + sum2 = W(0, 1018373307, -1117669473, 1036793172, -1107254646) + W(1, 1027210469, 983011887, -1124389554, 999841656) + + W(2, 1030944977, 1035483586, -1120221588, -1138388666) + + W(3, -1119036598, 1026801669, -1120993070, 1036234905) + + W(4, -1120549779, -1113169149, 1031305133, -1118729068) + + W(5, -1123944089, -1136088418, 1045466087, -1110162376) + + W(6, -1139755202, -1114451942, 1047991436, 1050438576) + + W(7, 1058194810, -1103576008, 1037255439, -1114267729) + + W(8, -1101691927, -1102665013, -1083948126, -1083351573) + + W(9, -1081291530, -1106933299, -1095248620, -1124771645) + + W(10, 1049483493, 1049560407, 1057078402, 1060878230) + + W(11, 1058504911, 1044728114, 1047223280, 1036725012); + WS(1038841272, 1052605132); + sum1 = + W(0, 1031922765, -1104568331, 1037544250, -1109808312) + W(1, -1115625903, 1042593752, -1105481799, 1033924054) + + W(2, -1100459309, 1046914264, -1120141702, -1117060814) + + W(3, 1046551204, -1097718198, 1032086688, -1112841053) + W(4, 1037270288, 1041702805, -1098675705, 1045728636) + + W(5, 1017164043, 999474559, 1031450455, 1034794943) + W(6, 1026910357, -1101939446, 1040827436, 1037079325) + + W(7, -1095425143, 1046662664, -1103113592, 1015870568) + + W(8, 1023447506, -1114279623, 1048352911, -1115507167) + W(9, 1040518727, 1038294495, -1106032688, 1041969331) + + W(10, -1104596348, 1044685961, -1104877098, -1117405559) + + W(11, 1032200833, -1104511859, 1039045107, -1111795079); + sum2 = W(0, -1119788132, 1033659476, -1112158198, 1008462738) + + W(1, -1128623485, -1135723434, 1030330775, -1125624097) + + W(2, 992683382, -1104347191, 1044081475, 1034564598) + W(3, 1037093866, -1118342628, 1008221186, -1116661382) + + W(4, 1018867293, 1020892153, -1090780387, -1098943438) + + W(5, 1024702018, 1037085984, -1108937272, 1041747819) + W(6, 1022669528, 1043646320, 1053457206, -1103925357) + + W(7, -1094689316, -1130567667, -1111881166, 1006874186) + + W(8, 1044098145, 1022003003, 1053806225, 1061138285) + W(9, 1061570617, 1042502677, 1045330453, 1015917808) + + W(10, -1102594356, -1105788334, -1097169858, -1089462725) + + W(11, -1090351234, -1124000960, -1102638867, -1119261652); + WS(1059476151, -1123203418); + sum1 = W(0, -1111725774, 1032359159, -1110577724, -1105544893) + + W(1, -1121304930, -1115245634, 1036887935, -1110517831) + + W(2, 1035594491, 1034919857, -1111760306, 1046155316) + W(3, 1044165509, 1020829292, 1039852002, -1120384574) + + W(4, -1091810916, -1140486487, -1084838663, -1086139015) + + W(5, -1095808892, -1096673622, -1127813812, -1090491960) + + W(6, 1057461865, 1048140086, 1060577506, 1052038472) + W(7, 1057513991, 1053266340, 1045123672, 1055106407) + + W(8, 1016189354, 1034245225, -1104347695, 1027870402) + + W(9, -1103056264, -1108853472, 1031021091, -1133152957) + + W(10, -1123857235, 1028750247, -1113205590, -1121460986) + + W(11, -1148349487, -1113141864, 1036934292, 1011178567); + sum2 = + W(0, -1114868977, -1107973502, 1044770988, -1101288212) + W(1, 1025917421, 1028043472, -1111158861, -1123193100) + + W(2, 1026556603, -1115461868, -1104823400, -1105554340) + + W(3, 1036199790, -1116199065, 1020651575, -1128075734) + W(4, 1004734644, -1092079110, 1059587552, 1045430990) + + W(5, 1037874646, -1137785064, -1093651456, -1127404110) + W(6, 1034344926, 1042028492, 1024895663, 1050688149) + + W(7, 1047334362, -1116948841, 1044938016, 1042458099) + W(8, 1040708934, -1103991549, 1051421807, -1095880897) + + W(9, -1118101029, -1122024357, -1099771430, 1007412048) + + W(10, -1131620842, -1105590488, 1029419915, -1114582112) + + W(11, 1015496806, -1121536685, -1121478353, -1127827256); + WS(-1096078190, -1082738059); + sum1 = + W(0, 1007239809, -1114850361, -1130257143, -1112210107) + W(1, -1118665957, 1028242343, -1116764258, 1022403683) + + W(2, -1108860284, 1046670329, -1103368477, 1045976117) + W(3, 1029494103, 1036298605, 1029804679, -1128309634) + + W(4, -1100207850, -1101169270, -1116530972, -1088435015) + + W(5, -1092482434, -1090334611, -1105280439, -1099682433) + + W(6, 1053165251, 1050939564, 1051252601, 1056131912) + W(7, 1045571555, 1053021861, 1035862579, 1038809821) + + W(8, -1112013420, -1126100266, -1112810720, -1124484747) + + W(9, 1041969097, 1032260218, 1044490698, -1114977769) + + W(10, -1115685558, -1124400725, -1112871192, -1117830909) + + W(11, 1031308937, -1143992478, 1037078646, 1016808747); + sum2 = W(0, -1133820219, -1147891684, -1127336652, -1127200453) + + W(1, 1020736496, 1001166860, 1022486646, -1135690985) + W(2, 1027229337, 1032424255, -1137665638, 1045946307) + + W(3, -1116510358, -1145031593, -1116471963, -1126621635) + + W(4, -1115513384, -1107406978, 1037916056, -1096925824) + + W(5, 1031285965, -1138759126, 1020254812, -1122394148) + + W(6, 1007940354, -1125237780, 1044701152, 1067148595) + W(7, 1076670751, 1039470145, -1109537872, 1025436583) + + W(8, -1156542700, 1031863482, -1113898133, 1048747828) + + W(9, -1101339101, -1070343554, -1082365934, -1107321525) + + W(10, 1020775992, 994071912, -1127018949, -1115116116) + + W(11, -1122694416, -1106720041, 1034791139, 1039293842); + WS(1056919406, -1123699093); + sum1 = W(0, -1153021483, 1016658279, -1169964908, -1131540465) + + W(1, -1116724456, 1018808083, 986184318, 1026972628) + W(2, 985933670, -1142160193, 1032391598, -1109380555) + + W(3, -1112807501, -1115139838, 1023499017, -1119658051) + + W(4, -1089586019, -1106728023, -1095447822, -1095937215) + + W(5, -1092279079, -1105250414, -1104943256, -1093497009) + + W(6, 1058308826, 1043832952, 1054485203, 1051425640) + W(7, 1057168451, 1053123845, 1034402292, 1056025192) + + W(8, -1122912091, -1111941676, -1114359010, 1038539876) + + W(9, -1129000267, -1105627696, 1011544866, -1129701482) + + W(10, -1130942473, -1162334614, -1115887538, 1035573952) + + W(11, -1120065804, 1037216107, -1114397383, 1031803900); + sum2 = W(0, 1034281368, -1126716121, -1121241308, 1045372780) + W(1, 1035929285, 1047600787, 1004087438, 1041718259) + + W(2, -1104410341, 1032410612, -1087414339, -1068615761) + + W(3, -1067644471, -1091405946, -1118166494, -1097556864) + + W(4, 1018060402, 1017706532, 1061712823, 1078313037) + W(5, 1079888634, 1048822788, 1023636550, 1035128990) + + W(6, -1138447737, -1115589617, 1021983592, -1100339818) + + W(7, -1123293195, 1030149270, -1120625820, 1021734756) + W(8, 994323516, 1029627322, -1133709215, 1015111478) + + W(9, 1023885100, -1124744731, 1010097407, 1009567515) + + W(10, -1140256393, -1133676977, 1012505315, -1130902073) + + W(11, 1028720332, -1123320040, 1022959192, -1136405679); + WS(1057577783, -1116545908); + sum1 = W(0, -1140559047, -1119858755, 1001413734, 1019558160) + + W(1, 1027838711, -1123065945, -1133184076, -1116764451) + + W(2, 1023488522, 1007841757, -1099448121, -1135636631) + + W(3, -1106014610, -1112832085, -1132115376, 1007686986) + + W(4, 1031631673, 1034355893, 1034820815, -1095265297) + W(5, -1133110549, 976310772, 1031909175, 1033492939) + + W(6, -1127758170, -1102068668, -1139646818, 1043309332) + + W(7, 1058446926, 1018622064, 1033516923, 1033353766) + W(8, 1037581685, 1038802963, 1020321573, -1118918103) + + W(9, -1104027725, -1110623851, -1124259695, -1114622946) + + W(10, -1129099776, 1023940956, 1023108226, -1133216315) + + W(11, 1027058075, -1123708581, 1025545342, -1118432359); + sum2 = W(0, -1139693312, 1036960926, -1143311649, 990661185) + W(1, 1015972640, 1000525745, -1129692664, 1028832752) + + W(2, -1124752720, -1121090060, -1099787410, -1112556974) + + W(3, -1123282486, -1104610584, 1028371280, -1104217347) + + W(4, -1098950560, -1103693150, 1033289622, 1052753541) + + W(5, -1106573325, 1031872996, -1115662969, -1115847157) + + W(6, 999696497, 1034222778, -1104157082, 1058657457) + W(7, 1042484509, 1044261031, 1026599244, 1041506121) + + W(8, -1111434581, -1119707404, 1018041096, -1108624084) + + W(9, -1111132399, -1121815492, 1018446608, -1129135268) + + W(10, 1026280044, 1014405392, -1121205834, 1023217352) + + W(11, 1005957425, 1017712752, -1139874592, 1010211304); + WS(1063874743, -1123600943); + sum1 = W(0, 1026726381, 1005084141, -1121313992, 1022502021) + W(1, 1010678471, 1012880846, 1016038684, 1031948511) + + W(2, -1110972995, 1029034162, 1029564141, 1019350337) + + W(3, 1021423943, -1112342611, 1036724585, -1096649610) + + W(4, -1088392527, -1096434085, -1088422197, -1089197156) + + W(5, -1089730182, -1099324833, -1105649226, -1102131796) + + W(6, 1060534134, 1052968261, 1061531727, 1048937234) + W(7, 1053585019, 1049533342, 1044791660, 1047748258) + + W(8, -1131352649, -1125442370, -1111100967, 1007055431) + + W(9, -1124123103, 1037169096, -1119334386, 1048887628) + + W(10, 1017113381, 1023149165, -1121647548, 1028622922) + + W(11, -1132301739, -1122598880, 1017220849, -1134858375); + sum2 = + W(0, 1041516351, -1114001722, 1031962338, -1127882717) + W(1, 1024371132, -1116893380, -1137233618, 1035584360) + + W(2, -1098661524, -1113317539, 1041960143, -1113172562) + + W(3, 1057245216, -1108932190, 1040530840, -1090505328) + + W(4, 1065773241, 1066164852, -1094243603, -1071023147) + W(5, -1087722937, 1043021464, 1059923771, 1062948018) + + W(6, 1067471192, 1062194047, -1131590099, -1071391818) + W(7, -1082288010, 1036914992, 1056604249, 1065534266) + + W(8, -1095722385, 1029392016, 1045112423, 1027735196) + W(9, 1047570295, -1110460182, 1008215114, -1096088381) + + W(10, 1025030832, 1015621429, 1033273068, -1106774969) + + W(11, 1021784721, -1147670187, -1157997580, 1025226110); + WS(-1081605212, 1037230241); + sum1 = W(0, -1129286954, -1127382348, -1114123448, -1119205995) + + W(1, -1116053850, -1114061454, 1024011222, -1165173963) + + W(2, -1115600951, -1127078325, 1045220821, 1032978341) + W(3, 1031622828, 1039357911, 1020277751, 1026874614) + + W(4, -1139829872, 1037397159, -1103957592, 1057561887) + + W(5, 1057799156, -1103125195, 1035843369, -1159950147) + + W(6, 1042245092, -1101819637, 1057239854, -1089716883) + + W(7, -1088291628, -1162281827, -1111135597, -1144308089) + + W(8, -1130516890, 1025459530, -1098019463, 1040837167) + + W(9, 1039979404, -1123433113, 1008116332, -1115664788) + + W(10, -1191245347, -1140137724, -1135821456, -1153142562) + + W(11, -1123070334, -1115244132, 975751222, -1121723210); + sum2 = W(0, -1129847377, -1132201467, -1111059277, 1032250249) + + W(1, 1003251981, -1175110606, -1119100754, 1007928974) + + W(2, 1027890930, -1115667911, -1109124506, -1115518668) + + W(3, -1131706033, 1033851379, 1008685286, -1130851063) + + W(4, 1045384717, -1139027758, 1062718876, -1088514518) + + W(5, 1043614591, -1096199608, 1038009999, -1126833063) + + W(6, -1131041629, -1086235324, 1071241567, -1104078593) + + W(7, -1091065468, 1021865867, -1113376611, -1120766781) + + W(8, -1118537275, 1015945835, -1090110249, 1050303031) + + W(9, -1130925231, 1030828678, -1128909271, -1121346672) + + W(10, -1129495121, 1022067627, -1139816838, -1109320206) + + W(11, 1035917053, -1140186118, 1027374356, -1130315761); + WS(1048635758, -1111558989); + sum1 = + W(0, -1116715915, 1007517554, -1125319757, -1130676803) + W(1, 1032051720, -1122176818, 1019813529, -1125995833) + + W(2, -1114167026, -1144951432, 1032434734, -1116349759) + W(3, 1007313161, 1037795522, 1011561671, 1022555096) + + W(4, 1019884197, -1100411873, -1082609609, -1099356457) + + W(5, -1114189151, -1098216348, -1118483848, -1115419480) + + W(6, 1046236914, 1042364482, 1057088310, 1060062511) + W(7, -1105629450, 1032549225, -1156935944, 1017135051) + + W(8, 1023679491, 1030303424, 1025882348, 1027068912) + W(9, 1046455412, 1037080648, 1024380849, 1020138940) + + W(10, -1123924777, -1115625243, 1018952757, -1117219752) + + W(11, -1128015465, -1117338239, 1001454266, 1015509889); + sum2 = W(0, 1017200252, -1128076598, 1020177744, 1027061114) + + W(1, -1120062015, 1007295183, 1005463470, -1120999608) + + W(2, 1032764106, 1032749056, 1042154489, -1091917968) + + W(3, -1115647155, -1109350747, -1136438311, -1152745757) + + W(4, -1100965710, 1036328960, -1079875623, 1069362278) + + W(5, 1041923814, -1121245377, 1023734232, -1115720719) + + W(6, -1120952053, -1105592489, -1093732085, 1053281179) + + W(7, 1053015354, 1023497366, 1044015507, 1040619345) + W(8, 1023440400, 1037836738, -1110726007, -1110949820) + + W(9, -1097734576, 1029656316, -1107014120, 1039138178) + + W(10, 1026555164, -1121478225, 1032334674, 1004458190) + + W(11, 1030547292, -1115087241, 1024950244, -1107079388); + WS(1057226679, -1099093504); + sum1 = W(0, -1117743115, -1111361838, -1142218540, -1115584496) + + W(1, -1144474236, -1112915081, -1118048876, -1124754680) + + W(2, -1106860569, -1097721634, 1017711998, -1098477656) + + W(3, 1017808579, -1127808199, -1110778360, 988685674) + W(4, 1059123761, 1043835217, 1054886149, 1036645937) + + W(5, -1096319852, -1102898152, -1108186916, -1103594844) + + W(6, -1116606854, -1113320356, -1098795498, 1036044245) + + W(7, 1058878095, 1054951826, 1042821748, 1057715322) + W(8, -1117312579, 1021938430, 1024346219, -1113074392) + + W(9, -1113306631, -1104150268, -1109837016, -1104945799) + + W(10, -1126837697, -1108681130, -1128001843, -1126342351) + + W(11, -1145404316, 1022603516, -1113519979, -1166667748); + sum2 = + W(0, 1017552406, -1121365003, 1024315197, -1126393870) + W(1, 1006096662, 1026122801, -1115019758, 1010820131) + + W(2, -1115134458, -1113297695, -1121331284, 1034056828) + + W(3, -1116036592, -1122098200, -1138309587, -1114451237) + + W(4, -1123239700, 1033578978, 1032104202, -1122610502) + + W(5, 1042239151, -1128246386, -1130645774, 1021632898) + W(6, 1040415751, -1119377084, 1033268884, 1042552247) + + W(7, 1014688747, 1038912262, 1011328923, 1013103251) + W(8, 1024802817, -1140510683, 1031096569, 1027588477) + + W(9, -1116363642, -1106292650, -1119712705, -1106135611) + + W(10, -1116030864, -1154029132, -1127078294, -1123710517) + + W(11, -1142638070, 1002524342, -1123496021, 1026182445); + WS(-1108758968, -1120615143); + sum1 = + W(0, 1007512449, -1118729201, 1033236958, -1188181540) + W(1, 1032455763, -1139132956, -1126666947, 1028445809) + + W(2, 1043847294, -1115009907, 1038269372, -1103491829) + W(3, 1026842103, 1028990457, -1112992067, 1045480960) + + W(4, 1041722714, 1047642150, 1052029054, 1056274631) + W(5, 1057598894, 1044856958, 1049081300, 1008203541) + + W(6, -1089504654, -1097300100, -1092318719, -1092329088) + + W(7, -1088834696, -1094694701, -1102944265, -1095636207) + + W(8, 1035899020, 1024857813, 1041772976, -1115586563) + W(9, 1037648114, 1028932916, 1031373190, 987483733) + + W(10, 1021562716, -1157490338, 1033997944, -1127937658) + + W(11, 1034853541, -1139535439, 999073631, 1033314299); + sum2 = + W(0, 1022601823, 1027503424, -1114440342, 1031020236) + W(1, -1115479573, 1028798660, -1137878778, 973255146) + + W(2, -1119857460, 1016942723, -1113768249, 1049278985) + W(3, 1023747258, 1008539869, 1030670036, -1124354630) + + W(4, -1099325942, -1112391205, 1050839307, -1097330938) + + W(5, 1019565806, -1108724816, -1143955309, -1115374863) + + W(6, -1068582801, -1076271776, 1067690352, 1077639092) + W(7, 1061065209, -1120523343, 1002861982, 1029925423) + + W(8, 1035739756, -1104095012, 1026494027, 1047876791) + W(9, 1016405011, -1115660355, 1022661839, 1033007729) + + W(10, 1015907333, 1016973548, -1122697579, 1015148595) + + W(11, -1106941761, 1032414836, -1126087053, -1124658022); + WS(-1125070560, -1096513533); + sum1 = W(0, -1112401838, -1141896047, -1114813980, -1125005414) + + W(1, 1026386210, -1115056418, 1012901881, -1125623196) + W(2, 1032850683, 1035442769, 1027743049, 1036941992) + + W(3, -1104378805, 1037833707, 1022681350, 1033887999) + + W(4, -1111199410, -1105684773, -1093508577, -1099350541) + + W(5, 1060063436, -1112618105, 1035294797, 988362813) + W(6, 1046757622, 1047296038, 1061969278, -1107426238) + + W(7, -1092774776, 1028130403, -1127381572, 1016528878) + + W(8, 1033951818, 1013771590, -1100008383, 1023589122) + + W(9, -1122038496, -1110298148, 1030080099, -1106008748) + + W(10, -1110786345, -1113414691, -1131095256, -1114443933) + + W(11, 996299870, -1117830096, -1140100259, 1014753608); + sum2 = W(0, -1128089453, -1110645236, -1130916401, 1026446699) + + W(1, -1115610756, 1032679495, -1114433002, -1123355757) + + W(2, 1024340419, 1043920397, -1129344777, 1009781987) + W(3, -1102796808, 1037523581, 1023912055, 1026431795) + + W(4, -1102023306, -1114712453, 1028225499, 1057874897) + + W(5, 1052660862, -1102216434, -1127455825, -1128264837) + + W(6, -1103821003, -1097377481, 972074333, 1059140954) + + W(7, 1038144249, -1107293486, 1006589878, -1102041035) + + W(8, 1043342989, -1109265540, -1100613575, -1103696793) + + W(9, -1116201867, -1131885037, -1105206442, -1127587329) + + W(10, -1119644228, 1026656603, 1034952995, 1032660433) + + W(11, -1130501689, 1029362471, 1013212643, 1005668278); + WS(1049867118, 1034735186); + sum1 = W(0, -1127774262, -1117373036, 1031064438, -1112922680) + + W(1, 1012648832, -1132535687, -1120670652, -1140537009) + + W(2, 1029333619, -1123455456, -1105261096, -1096141178) + + W(3, -1120189874, 1024540628, -1138107141, 1000234461) + + W(4, 1019113028, 1040322517, 1040279186, -1101685436) + W(5, 1044944438, -1121818673, 1035373868, 1032620736) + + W(6, 1024964758, 1032014380, 1048588372, 1050135703) + W(7, 1034886579, 1040490120, 1016312768, 1034120602) + + W(8, 1007980364, 1019723167, -1114399774, -1100797936) + + W(9, -1113398479, -1150021355, -1128775361, 1008873398) + + W(10, -1121402761, -1118901061, -1126411220, -1110911420) + + W(11, 1019321608, -1114680743, -1121234663, -1121766352); + sum2 = + W(0, -1161289046, 1020587590, 1029682388, 1031608572) + W(1, -1117304231, -1129471662, 965398246, -1134569151) + + W(2, -1111556088, -1127668568, -1095922552, -1092239431) + + W(3, -1165373610, -1120133906, 1007233453, 1019046119) + W(4, 1038429491, -1130067243, 1050898724, 1058359401) + + W(5, 1040200303, 1044552317, -1132308256, 1031851372) + W(6, 1024158411, 1018324827, -1123472030, -1093717514) + + W(7, -1118204561, -1112781150, 1017795291, -1120826542) + + W(8, -1123794133, 1035096674, -1142905112, 1042534485) + + W(9, -1137719883, -1136196985, 1027736448, -1128305748) + + W(10, 1016121349, -1127328688, -1126947701, -1126252937) + + W(11, -1122434117, -1137031592, -1121369248, -1162116006); + WS(1063598519, 1040709546); + sum1 = + W(0, 989424433, 1025004011, -1117397173, 1010227902) + W(1, -1116859551, 1035862832, 1012428267, 1028295307) + + W(2, -1129964132, 1018517694, 1036280904, 1042042835) + W(3, 1025484624, -1108371497, 1028794752, -1105766207) + + W(4, -1092181324, -1096845618, -1092686069, -1082263275) + + W(5, -1087946945, -1124000752, 977020321, -1108973663) + W(6, 1056340507, 1052096915, 1055848084, -1117189370) + + W(7, 1053023013, 1053676604, 1044581128, 1048031389) + W(8, -1140166214, -1124688156, 1027380346, 1041673920) + + W(9, 1025279965, 1021321051, -1125256895, 1035434166) + W(10, 1007420926, 1025298052, -1114706068, 1030527159) + + W(11, 1004378644, -1130325786, 1025029349, 1015026205); + sum2 = W(0, -1128989779, -1132752301, -1121498089, -1112228563) + + W(1, 1023519830, -1111737432, 1023675120, -1134051546) + W(2, 1040099849, 992269838, 1034828720, 1034859451) + + W(3, 1034954038, -1122349299, 1019899084, -1112348174) + + W(4, -1100874000, 1034251701, -1104384455, 1044543345) + + W(5, 1046406891, 1028583216, -1114022376, 1040182737) + + W(6, 1021993174, -1130867684, -1113539068, 1053848304) + + W(7, -1104908283, -1100179639, -1111378604, -1097476412) + + W(8, -1130783901, 1034483450, -1122004711, 1016645052) + W(9, 1032774629, 1016210650, 1033297067, 1025418184) + + W(10, 1011642260, -1123860313, 1016725938, -1122625223) + + W(11, 1011006880, -1114528083, 1021084026, -1141294629); + WS(1052377710, -1116561061); + sum1 = + W(0, -1122160137, -1112995181, 1021266631, 1017022397) + W(1, 1032809511, -1105019076, 1019774100, -1119221338) + + W(2, 1026711393, 1046449684, -1094207935, 1051777944) + + W(3, -1105978112, 1045042328, -1140860791, -1140595366) + + W(4, 1034438088, -1103073456, 1059878180, -1091454284) + + W(5, -1108523806, -1137740923, -1113848547, 1015748571) + + W(6, 1023922180, -1112523427, 1031595583, -1107259466) + W(7, 1059633699, -1094439010, 1048809290, 1019553093) + + W(8, -1115207840, 1036366239, -1096200621, 1043506315) + + W(9, -1105628259, 1043730779, -1114298535, -1117446555) + + W(10, 1014756539, -1110629817, 1025340632, -1122333286) + + W(11, -1126028515, -1114494718, 1024493780, -1121208702); + sum2 = + W(0, -1125003387, -1110204990, 1008672566, -1110854180) + W(1, 1015432805, 1007507570, -1129875419, -1145172348) + + W(2, 1023545558, -1122326332, 1039271760, 997586352) + W(3, 1043658401, -1126010948, -1128126180, 1018354299) + + W(4, 1001356924, 1049948311, -1104177316, -1099440370) + + W(5, -1101604747, -1112581048, 1034699764, -1118892801) + + W(6, 1019826271, -1106041547, 1040609653, 1057375034) + W(7, -1098469549, 1041033112, -1108980340, 1026776081) + + W(8, 1029018751, -1122275300, 1017003703, -1099421810) + + W(9, 1052007058, 1018898909, -1118843549, -1123423040) + + W(10, -1114113554, 1028296019, -1143932072, -1122149120) + + W(11, -1118594118, -1129527698, 1031512617, -1147650596); + WS(1066846108, 1044745002); + sum1 = W(0, 1019372186, 978233216, -1129136976, 1025331363) + W(1, -1142297744, 1015116199, 1015645340, 1016287548) + + W(2, 1049403789, -1107121557, 1041504923, -1134682116) + + W(3, 1039399714, -1122699462, -1121336819, 1026221102) + W(4, 1042475733, 1051348497, 1048713517, 1054698948) + + W(5, 1049146911, 1058118606, 1046729314, 1056609718) + + W(6, -1091781177, -1100692098, -1091573825, -1089263023) + + W(7, -1091452887, -1090043533, -1102394372, -1091564925) + + W(8, -1124410122, 1039801345, 1032893815, 1040511932) + + W(9, -1146055296, 1032222216, -1115550793, -1107840672) + + W(10, 1015897062, 1009376476, -1126291000, 1006187856) + + W(11, 1016897132, 1012743170, 1010428515, 1000566800); + sum2 = + W(0, -1121498998, 1025173682, -1127952317, -1123064681) + W(1, 1031325102, 1031547392, -1116804726, 1013263415) + + W(2, 1040076187, -1110698495, 1046879062, -1114452496) + + W(3, 1037259271, -1113826794, -1113826560, -1114840697) + + W(4, 1040991236, 1027215822, -1097422534, -1079622690) + W(5, -1113150532, 1059352243, 1032134129, 1048220488) + + W(6, -1097642453, 1032193313, -1089335507, -1070315938) + + W(7, -1080614075, 1063284099, 1075195474, 1071802337) + W(8, 1041191639, -1107046015, 1027648922, -1107556506) + + W(9, 1040688678, 1044912350, -1107091577, 1050907604) + + W(10, -1143069461, -1127878497, 1033858873, 1031644922) + + W(11, -1125227083, -1135729999, -1112123781, -1104322253); + WS(-1083170743, 1032719415); + sum1 = W(0, 1023701883, 1027723212, -1122560214, 989186599) + W(1, 1018265472, -1118377474, 1034616094, -1143849572) + + W(2, -1115388842, 1037576352, 1028066352, 1034436696) + W(3, 1046871436, 1036384970, 1041769608, 1033711598) + + W(4, -1095142736, -1104329709, -1093573195, -1091782714) + + W(5, -1090370147, -1088395265, -1092608855, -1085234190) + + W(6, -1131637740, 1047136718, 1050356544, 1058921626) + W(7, 1057401704, 1046747968, 1048250192, 1051022467) + + W(8, 1043889005, -1114738168, 1032119470, 1035779088) + W(9, -1119260798, 1026791695, 1007978244, 1035213927) + + W(10, 1012236805, -1143133226, -1126174356, 1024911192) + + W(11, 1033076018, -1122725807, 1033024419, 1028803056); + sum2 = + W(0, 1040592912, -1148866748, 1032765896, -1113675298) + W(1, 1035431068, -1111525482, 1031171080, -1111672592) + + W(2, -1091007599, 1043419584, -1111893630, -1100951705) + + W(3, 1030657616, -1123081200, 1009914590, 1048594219) + + W(4, -1122569720, -1102576481, -1114969246, 1060370086) + + W(5, 1018512847, -1112438148, 1033713452, -1120657004) + W(6, 1061904811, -1110256028, 1042199718, 1060838438) + + W(7, 1061366749, -1086083341, 1029235040, -1090315205) + + W(8, -1096410255, -1106770914, -1108992598, -1088153932) + + W(9, -1091610254, 1039676388, -1104419583, 1040642594) + + W(10, -1107753434, 1043044202, -1138614142, -1121677972) + + W(11, 1049599377, -1117594428, -1154941432, 1041324320); + WS(-1087374135, 1068569819); + sum1 = + W(0, 1034522569, -1114460210, 1040675195, -1156322949) + W(1, -1126356143, 1038944141, -1123272884, -1147588893) + + W(2, 1036038485, -1134485343, 998999958, -1102898978) + W(3, 1032174021, 1032843110, -1112707068, 1041667373) + + W(4, 1051494643, 1048850073, 1055860825, 1058980799) + W(5, 1050366134, 1048730421, 1043911796, 1049367775) + + W(6, -1092286050, -1097395434, -1093950865, -1086427635) + + W(7, -1097745188, -1098221875, -1095510475, -1093210118) + + W(8, 1024631242, -1155697773, 1034953465, 1036357020) + W(9, -1115522123, 1020968428, 1032316940, -1134843155) + + W(10, -1124293233, 1035227470, 1022306499, -1115248568) + + W(11, 1025097211, 1020281868, -1112095119, 1003691067); + sum2 = + W(0, 1036982689, -1109420523, 1038171016, -1114883799) + W(1, 1032698755, 1023400751, -1119043865, 1034471555) + + W(2, -1101176821, 1040154700, -1103427381, 1011961296) + + W(3, -1115041464, -1112162319, -1114657793, -1117007917) + + W(4, 1032029724, 1008871494, 1053639476, -1094943648) + W(5, 1046242469, 1015257266, 1018562568, 1031877719) + + W(6, 1031712503, 1051308638, -1103617514, -1090481186) + W(7, 1048516585, -1110670159, 1026474691, 1044794303) + + W(8, -1095262307, -1089037290, -1077227331, -1077304111) + + W(9, -1075605762, -1086532256, -1115849955, -1110424851) + + W(10, -1135236510, 1036178124, 1056219784, 1063074442) + + W(11, 1070767205, 1074120487, 1066414901, 1052188053); + WS(-1094340206, 1025238393); + sum1 = W(0, 1025401267, -1120317764, 1029217422, 1017691154) + W(1, 1015273200, 1006374863, -1149759301, 1023921508) + + W(2, 1041212259, -1123694073, 1040075594, -1128349902) + + W(3, 1030937834, 1030337996, -1112880429, 1048792866) + W(4, 1051626551, 1044638395, 1051991350, 1049159169) + + W(5, 1055056667, 1045537234, 1047952828, -1109761292) + + W(6, -1087771683, -1099303356, -1089805402, -1090474853) + + W(7, -1089928234, -1095611655, -1103717662, -1097665994) + + W(8, 1026720107, 1027277160, 1042793877, 1032035372) + W(9, 1038797403, 1028636183, 1034683820, 1024357062) + + W(10, 1024873417, 1003045609, 1025974530, -1140916353) + + W(11, 1026491292, -1125445197, 1018728702, 1024842472); + sum2 = + W(0, 1016272983, -1125328223, 1019432157, -1136495190) + W(1, 1024638490, -1122054341, 1016158573, -1137665190) + + W(2, -1117564089, 1007328518, 1038058236, -1106032359) + + W(3, 1036423350, -1129861056, -1141592892, 1032273458) + + W(4, 1054344808, 1047318060, -1095657702, -1084625245) + + W(5, -1124414562, 1036648948, -1120282232, 1033205457) + + W(6, 1082928383, 1074423709, -1074337637, -1066615612) + W(7, -1087090886, 1034359725, 999766988, -1114245215) + + W(8, 1043627742, 1037006400, 1040243063, -1106132294) + W(9, -1119023275, 1004167460, 1002865564, -1113153751) + + W(10, -1113989008, -1137080694, 1017883877, -1126758430) + + W(11, 1040797418, -1113361096, 1023713345, 1022755221); + WS(-1098231918, 1035887052); + sum1 = W(0, 1007686525, -1112046456, -1136641001, 1025581173) + + W(1, -1121240420, -1165513922, -1114143760, -1123656209) + + W(2, -1117693688, -1119004197, -1104788790, -1092353842) + + W(3, -1104580134, -1146015939, -1162096818, -1117522176) + + W(4, 1048268202, 1036810732, 1055375147, 1069647932) + W(5, 1052170270, 1020096363, -1125578169, 1031911554) + + W(6, -1105099271, -1097653932, -1098720398, 1066085926) + + W(7, -1093227035, -1123833253, -1107379122, -1130540403) + + W(8, -1110112236, -1130249512, -1106155488, -1091717545) + + W(9, -1110987409, -1108464746, -1156567122, -1106840908) + + W(10, 1016554315, -1115219957, -1142876018, -1143806946) + + W(11, -1113561092, -1123809427, -1113074326, -1120698772); + sum2 = W(0, -1125891647, -1139529830, -1127048511, -1119620679) + + W(1, 1015042367, -1133002574, 1001053676, -1136336990) + + W(2, -1121500935, 1033326245, -1110257678, 1033808587) + + W(3, -1152963927, 1023704513, -1125968895, -1159911790) + + W(4, 1021750179, -1133323886, 1040835222, 1061147710) + + W(5, -1110018076, 1035246842, -1123027527, -1144777900) + + W(6, 1017645919, -1167588957, -1108825416, 1055027509) + + W(7, -1105425292, -1090179180, 1035235033, -1104047270) + + W(8, -1130193055, 1018366499, -1144215516, 1035778772) + + W(9, -1105010302, 1019226535, -1113493000, -1137733870) + + W(10, -1118741587, 1021086319, -1113356630, -1123951235) + + W(11, -1124596863, 995355927, -1135606974, -1141401292); + WS(-1081201436, -1083122818); + sum1 = W(0, 1024396773, 1040103323, -1104054160, 1042893967) + + W(1, -1121710378, -1122190892, 1038539251, 1020559724) + + W(2, -1118776317, -1121204906, 1048678635, -1101546374) + + W(3, 1036673102, -1112386175, 1022162966, -1111471555) + + W(4, -1090317103, -1098486564, -1094484293, -1101075087) + + W(5, -1095873942, -1099326495, -1101153544, -1095630377) + + W(6, 1056470156, 1048677796, 1048629197, 1052113969) + W(7, 1033707976, 1057451847, 1027207978, 1053998976) + + W(8, -1120183581, -1105729174, 1032093347, 1048228642) + + W(9, 1044099610, -1102747354, -1114063013, -1125760751) + + W(10, -1124728406, 1010393788, -1105359341, 1048587688) + + W(11, -1107414311, 1034642220, -1109685144, 1033222029); + sum2 = + W(0, -1130151079, -1128601535, 1016586775, -1116462462) + W(1, 1006316053, -1136401487, -1134837023, 1015531121) + + W(2, 1029357099, -1139320303, 1016703563, 1036545095) + W(3, 1027702761, -1115282656, 1025282364, -1128863347) + + W(4, -1141051917, 1027307392, 1040480663, -1084177500) + W(5, 1027514935, 1033882273, 975470826, 1013565163) + + W(6, -1126143047, -1124015138, -1090482710, -1061542668) + + W(7, -1081711054, 1042416533, -1107083018, 1030037156) + W(8, -1138853383, 1026568428, 1051692940, 1087807110) + + W(9, 1065457143, -1101742731, 1035528582, -1123914522) + + W(10, 1016378649, -1128887123, 1024508650, -1109325640) + + W(11, -1115450794, 1030137534, 1016609727, -1122133136); + WS(1056806766, 1019813151); + sum1 = W(0, -1109922347, 1027672232, -1113027385, -1106442907) + + W(1, -1119504732, -1127683323, 1015134384, -1122196904) + + W(2, 1038862168, -1098777292, 1057340869, -1131946518) + + W(3, 1048903346, -1100586740, 1027127869, -1136830686) + + W(4, -1094336356, 1025923920, -1089223672, 1049099726) + + W(5, -1102142547, -1122538541, -1099585557, -1100472159) + + W(6, 1045176100, 1046907893, 1028090813, 1060367739) + W(7, -1102839017, 1052174932, 1024110556, 1047790604) + + W(8, 1023835087, -1097985673, 1044113854, 1010151151) + + W(9, 1051267347, -1093148714, 1029024221, -1137237669) + + W(10, -1117706029, -1143840774, -1111671123, 1025756788) + + W(11, -1111600768, 1040843731, -1107564881, -1131252120); + sum2 = + W(0, 1016054025, -1116580030, -1128856269, 1024017790) + W(1, -1120027976, 1003808451, -1120561807, 1020365697) + + W(2, -1119367753, 1033363716, -1107794961, -1129548157) + + W(3, 1034251322, 1039517400, 1019337653, -1174223189) + W(4, -1115811972, -1119979151, 1046175546, 1051115205) + + W(5, -1095340379, -1127431641, 976018325, -1148320619) + + W(6, 1024987046, -1114756690, -1100926142, 1048732221) + + W(7, 1044324276, -1104827086, -1139095561, -1138276477) + W(8, 1016843613, 1025664850, 1028330842, 1029232538) + + W(9, -1105635461, 1042504965, -1118828309, -1117203320) + + W(10, 1007599121, -1120202841, -1144919563, -1123931557) + + W(11, -1122027017, -1118580981, 1016896817, 1009819353); + WS(1067549148, 1061168738); + sum1 = W(0, -1136860560, 1019396970, 1034693791, 1031876061) + + W(1, -1126960466, 1026075051, -1129065354, -1125694250) + + W(2, 1041916271, -1102760394, -1100215160, -1127917744) + + W(3, 1038708237, 1042570959, 1024557450, -1114527138) + W(4, 1043173595, 1046914407, 1063664325, 1041579695) + + W(5, -1084953669, -1099553438, -1111656016, 1036161892) + + W(6, -1099729080, -1097589844, -1085101004, 1044764001) + + W(7, 1062969721, 1046958554, 1031927572, -1129394940) + W(8, 1028345500, 1036811500, 1044130369, -1111608196) + + W(9, -1096862061, -1104831702, -1121741642, 1028864534) + + W(10, -1117900922, -1118556558, -1118557363, -1148658183) + + W(11, 1028601293, 1014732126, 999719805, -1111432076); + sum2 = + W(0, 1037988572, -1109648154, -1115314649, -1110735622) + + W(1, 1029414023, -1115532881, -1114457629, -1123631499) + + W(2, -1097183834, -1096035348, -1106718097, 1044362532) + + W(3, 1044653749, -1118097482, 1036871139, 1035597348) + W(4, -1096111302, 1046143856, 1065688064, -1100541529) + + W(5, 1022961930, 1041170408, -1121640520, -1109551474) + + W(6, -1112255513, 1047451704, 1048785443, -1089624797) + + W(7, 1040629892, -1135819985, -1109975688, 1030316886) + + W(8, 1025606252, -1109281762, 1025033440, -1124926742) + W(9, 1024229044, -1109992198, 1007526703, 1031542857) + + W(10, -1131066844, 1021094187, -1118872350, 1035314838) + + W(11, -1132267648, 1004115214, -1128646118, -1117895342); + WS(-1090126519, 1034760182); + sum1 = + W(0, 1022234099, 987019599, 1021371561, -1125185183) + W(1, 1040030303, 1005824514, 1028122555, -1120592523) + + W(2, 1038732905, 1040082783, 1042926010, 1019049563) + W(3, -1101231100, -1108355337, -1115211984, 1035983309) + + W(4, -1095221408, -1096833350, -1090005917, 1049172122) + W(5, 1056956325, 1052986466, 1046568014, 1025899358) + + W(6, 1049091087, 1049531159, 1052645704, 1048882035) + + W(7, -1086324445, -1089320760, -1101798768, -1123937043) + + W(8, -1141106060, -1100657291, -1098472859, -1113910991) + + W(9, 1041783487, 1033537135, 1037621024, -1125397784) + W(10, -1130848127, 1036771650, 1035488804, 1025403478) + + W(11, 1007113098, -1138596020, -1128732298, -1127374557); + sum2 = + W(0, 1016736022, 1011977196, -1137999416, 1019963070) + W(1, 1030437859, -1122046802, -1142727409, 1018753414) + + W(2, 1032939170, -1102682487, 1037611888, -1102949496) + + W(3, 1039010410, -1110345340, -1124097437, -1113761662) + + W(4, -1122689775, 1060510561, 1051792579, -1073351999) + W(5, 1059164734, 1062467108, 1031495287, -1101619274) + + W(6, -1102551566, 1060805847, 1057985083, -1073393989) + + W(7, 1057367527, 1062012989, -1147452225, -1099879364) + + W(8, -1122487238, -1122236502, 1036808922, -1103029047) + + W(9, 1039165500, -1106860549, -1143421321, 1015482390) + + W(10, 1026411789, -1128029215, 1019619350, -1125796177) + + W(11, 1024266957, -1119780381, 1024156551, 1016465010); + WS(-1089242039, -1127205581); + sum1 = W(0, -1124452989, 1037734344, -1136469606, 1045032037) + + W(1, -1131176455, 1029283077, 1038076384, 1029887056) + + W(2, 1047404342, -1111320098, -1115981071, 1038279993) + + W(3, 1006926037, 1030509432, -1123275877, 1043334248) + W(4, -1086849964, 1001557948, 1055753134, 1059294849) + + W(5, 1006841992, 1049404683, 1037635838, 1044156233) + W(6, -1114062493, 998659036, -1096047365, -1107330927) + + W(7, -1105936042, -1087265492, -1095492784, -1080260267) + + W(8, 1042532510, 1038364441, -1118490271, 1044317529) + W(9, 1035319841, 1043122385, 1027367204, 1053177470) + + W(10, 1010360274, 1032729828, 1015888923, -1118190491) + + W(11, 1021147727, -1123196402, 1027584778, -1118995315); + sum2 = W(0, 1029897599, -1114965417, 1025776431, -1121485495) + + W(1, -1140617930, -1113546368, 1031651703, -1117524097) + + W(2, -1102843967, 1046605828, 1033633995, 1048791648) + + W(3, -1104898739, 1031053127, -1108448442, -1108356905) + + W(4, 1001547796, 1025076963, -1090433692, 1044097140) + W(5, 1042488362, -1118485295, 1045463546, 1040816680) + + W(6, 1049416264, -1100594038, -1096668289, -1117020477) + + W(7, -1102246327, 1034926551, 1047237110, 1044694712) + W(8, -1120840401, 1041915682, 1040646462, 1042521794) + + W(9, -1112857889, -1109164591, 1015998805, -1109851905) + + W(10, -1115391670, 1020641677, -1111492899, 1034331443) + + W(11, -1115861875, -1134127978, -1120855845, -1124879769); + WS(-1081567068, -1085072352); + sum1 = W(0, 1026202077, 1017630556, 1034866076, -1120073907) + W(1, 990337572, -1122725732, 1016189565, -1135328058) + + W(2, 1008391383, -1102096537, -1105309517, -1113888457) + + W(3, 1038015146, 1010107993, 1035371298, -1107104879) + W(4, 1043376377, 1035138955, 1062685095, -1088326467) + + W(5, -1087780001, -1095652846, -1111927693, -1134788873) + + W(6, -1106037014, -1113878797, -1091962582, -1099527071) + + W(7, 1065743931, 1048331161, 1035819857, 1033560764) + W(8, -1119432853, 1041401156, 1028045374, 1036249665) + + W(9, -1122166240, 1030964177, 1030882915, 1034792920) + + W(10, 1015626934, -1112723700, 995954372, -1112822301) + + W(11, 1036674595, -1165855255, 1020668626, -1140134473); + sum2 = W(0, -1101998430, 1034880732, -1111573656, 1034691412) + W(1, 1017688506, 1032207084, 1002612774, 995363580) + + W(2, 1025808817, -1110518368, -1090739547, -1098171169) + + W(3, 1047617175, -1112107064, 1027687639, 1035291452) + W(4, 1026331301, 1035202798, -1095903349, 1068344140) + + W(5, -1085314459, -1108916250, -1113844364, -1111293323) + + W(6, 1041383807, 1044537615, 1045506861, 1069658374) + + W(7, -1090700374, -1135142667, -1106371211, -1105384559) + + W(8, 1033833028, -1096440885, 1030373631, -1094064924) + + W(9, -1106284721, -1113568166, 1043762008, -1113615358) + + W(10, -1108635033, 1042833515, -1107072548, 1037649108) + + W(11, -1117475933, 996218700, -1115713024, 1031725587); + WS(-1122270064, -1083487436); + sum1 = W(0, 1022021054, 1000846209, -1120651002, -1133172181) + W(1, 1015175368, 1008528664, 1014262402, 1032206792) + + W(2, -1111938972, 1031069285, 1035108132, -1122478547) + + W(3, 1022898156, -1109542844, 1003755051, -1099224128) + + W(4, -1089129474, -1092279467, -1089405705, -1088959911) + + W(5, -1101358829, -1113192731, -1110785838, -1105614817) + + W(6, 1061173530, 1053406092, 1061891890, 1046812368) + W(7, 1052509753, 1035774995, 1040815801, 1041852206) + + W(8, -1117042135, -1112839626, -1115403968, 1035227362) + + W(9, 983155781, 1035355065, -1114859906, 1046440556) + W(10, 990992466, 997371026, -1123767278, -1136588785) + + W(11, 1014548513, -1120262163, -1139309996, -1196238920); + sum2 = + W(0, 1044179919, 1022028328, 1040258943, -1118974089) + W(1, 1033279863, -1117110809, 1031745329, -1103290765) + + W(2, -1111986018, 998943655, -1095034556, -1095846499) + + W(3, -1110448374, -1099083231, -1120681553, -1101323001) + + W(4, -1080384748, -1088604014, -1104000317, 1073459027) + + W(5, -1098039236, 1045892370, -1111672860, 1055006842) + + W(6, -1092393621, 1029389095, -1090514887, 1071708766) + W(7, 1012336831, 1049307354, -1100485937, 1051814517) + + W(8, 1048581079, 1009323699, 1015383036, -1097133430) + + W(9, -1122793353, -1097759856, 1041920736, -1091667564) + + W(10, 1031858735, 1021777892, 1024626196, 1024702395) + W(11, 1031448400, 1030521846, 1016567237, 1029007432); + WS(-1097130350, -1073679750); + sum1 = + W(0, -1154560184, 1025389788, -1145657140, 1000633100) + W(1, -1120139600, 1032643785, 1008755251, 1030414749) + + W(2, -1142702368, 1008724804, 1035954194, 1046084016) + W(3, 1042008657, -1106616153, 1025929714, -1102859291) + + W(4, -1095841490, -1096179481, -1095185831, -1085973126) + + W(5, -1089907161, -1096805620, -1098086361, -1107347576) + + W(6, 1034517635, 1053502809, 1045830013, 1057148843) + W(7, 1051926551, 1052799826, 1043469780, 1050172789) + + W(8, 1044574577, -1112918112, 1042446427, 1025817904) + W(9, 1017272605, -1123577295, -1143679882, 1014891512) + + W(10, 992206103, -1143244972, -1122388570, -1139683234) + + W(11, 1018417129, 1017209342, 1016358830, 1026775680); + sum2 = + W(0, 1021941992, -1140009235, 1026649920, 1025391436) + W(1, 1033557641, -1110654493, -1117894486, 1031221238) + + W(2, -1106123822, -1137197699, -1104678389, -1103278668) + + W(3, 1049392706, 1026389826, 1046385471, -1107040085) + W(4, 1031547466, 1040410800, -1091687878, -1069452203) + + W(5, 1071493698, 1075377385, -1093315115, -1101021764) + + W(6, -1135582819, 1048777558, -1102399061, -1086654414) + W(7, 1036526271, 1051671224, 1003979070, 1042747507) + + W(8, 1018035752, -1114989513, 1038853959, -1112110740) + + W(9, -1155541821, -1119910153, -1130248744, -1119798403) + + W(10, 1017098260, -1129449426, -1170697076, -1134437671) + + W(11, 1030635558, -1127657747, 1010389927, 1007123883); + WS(-1110542776, 1046722292); + sum1 = W(0, -1112464600, 1016022658, 1027765771, 981819586) + W(1, 1026835189, 1025143735, -1111607705, 1024330241) + + W(2, 1033517476, -1108629467, -1106997347, -1113497042) + + W(3, -1116788199, -1106078850, 1041100086, -1124631714) + + W(4, 1047470894, -1133454244, 1053995028, -1100439522) + + W(5, -1093531706, 1048713001, -1106135867, -1115421707) + + W(6, -1131671638, 1032980471, -1092318054, 1042288212) + + W(7, 1061947018, -1121614518, 1012478420, 1041393523) + + W(8, 1005087552, -1105826521, 1042584756, -1109105451) + + W(9, -1098489206, -1105038597, 977775992, -1114990145) + + W(10, -1142745264, 1026603482, -1129658366, 1017901338) + + W(11, -1142191304, 1040275689, 1016467704, 1020475210); + sum2 = W(0, 1031452303, 992881643, 978475180, -1121457613) + W(1, 1034445399, -1130052481, 1032145701, -1116902843) + + W(2, -1132232893, -1116591675, 1016124301, -1114931492) + + W(3, -1094590847, 1040974964, -1110729382, 1035934107) + + W(4, -1109510079, 1032912109, -1114514404, 1032060799) + + W(5, 1051504728, -1087149232, 1039284419, 1032317221) + W(6, 1018866101, -1116915567, 1055420465, 1059251768) + + W(7, -1088887374, 1044275534, -1114948355, -1104983612) + + W(8, 1014551643, 1024567259, -1097097100, -1118272021) + W(9, 1048490818, 1035881493, 1032922179, 1032032409) + + W(10, 1032688527, -1114177042, 1042584708, -1109972047) + + W(11, 1024892699, -1108606213, -1121421993, -1133789139); + WS(1059165367, -1104520251); + sum1 = + W(0, 1007492796, -1117692608, 1009135144, -1115310467) + W(1, 1014125520, -1113191163, -1129119585, -1118876992) + + W(2, -1108628644, 1046236477, -1120534447, 1052736188) + W(3, -1118278150, 1036684740, 1015268651, 1008090175) + + W(4, -1117534006, -1094176525, 1053590938, 1041258594) + + W(5, 1001540436, -1106455985, -1112388022, -1114551242) + + W(6, 1041312427, -1118419948, -1107420459, -1097199722) + + W(7, 1055263011, -1100739533, 1040471133, 1020553106) + W(8, -1113108212, 1029454180, -1110779368, 1053879643) + + W(9, -1098892966, 1045621277, -1126748266, 1026632706) + + W(10, -1124001749, -1113899741, -1131761404, -1114949702) + + W(11, 1025063448, -1105125705, -1126282358, -1127494856); + sum2 = W(0, 1027505664, -1115834059, 1029084872, 1029257304) + + W(1, -1128264406, -1138405484, -1118563560, -1127212263) + + W(2, -1104527074, 1048986559, -1089420265, -1098414029) + + W(3, -1104406791, 1040673804, -1203758566, -1117936348) + + W(4, 1034957797, -1098163961, 1051828462, 1053778056) + + W(5, 1048509414, -1106881855, -1113115951, -1136870020) + + W(6, 1041601252, -1125657446, 1045110551, 1041501740) + W(7, 1041829791, -1119286592, 1045914276, 1022365337) + + W(8, -1108054404, 1041896195, -1099714824, -1129563106) + + W(9, -1092212913, 1049994472, -1105182054, -1139619400) + + W(10, -1122847824, 1034858548, -1116127595, -1170669534) + + W(11, 1037831890, -1107117839, 1031508216, -1127498142); + WS(1064754871, 1064755352); + sum1 = W(0, -1124996947, -1126765813, -1111973122, 995438583) + + W(1, -1131935965, 1011044801, 1017902933, -1117569083) + W(2, 1039345889, 1034558935, 1025078634, 1031530717) + + W(3, -1102805216, -1112086641, -1108706152, -1125705364) + + W(4, -1096693846, -1096907004, -1092723587, 1042155040) + + W(5, 1055052309, 1050402072, 1045488762, 1036105095) + W(6, 1051509189, 1048609181, 1055455058, 1038916133) + + W(7, -1085470304, -1096310694, -1110967031, -1114430562) + + W(8, -1110987505, -1122110878, -1103910610, 1036071207) + W(9, 1036962198, 1042970795, 999916154, 1043170799) + + W(10, 1036233792, 981607022, 1033037628, 1028131846) + + W(11, -1113728073, -1155599823, -1121653045, -1131031997); + sum2 = + W(0, -1133117125, 1023646322, 1023723550, 1016880452) + W(1, -1128763803, 1032406499, -1112596197, 1024272758) + + W(2, 1035315397, -1104375670, 1045506061, -1100884270) + + W(3, 1037072250, -1100571834, 1043010850, -1106522969) + W(4, 1034291601, 1023753609, -1107539440, 1046403885) + + W(5, -1123261308, 1040670310, -1109210140, 1037357283) + + W(6, -1093657955, 1053589289, -1104725243, 1040733712) + W(7, 987308827, 1047435788, -1103530057, 1046389911) + + W(8, -1083656994, -1096844972, -1079943056, -1108767061) + + W(9, 1066015084, 1056439411, 1038430454, 1050581867) + W(10, 1063017252, -1117431336, 1065218548, -1110535684) + + W(11, -1088643610, -1092199491, -1140617522, -1093142505); + WS(-1141889920, -1150694570); + sum1 = + W(0, -1121776659, 1035438770, -1107472311, 1029335145) + W(1, 1032163820, -1122959321, 1035788170, 1006930832) + + W(2, -1156363954, 1043244278, 1039611642, 1044045394) + W(3, 1031781705, -1113754566, 1041298854, -1113010266) + + W(4, -1110656781, -1108191526, -1092681840, -1089342271) + + W(5, -1107172434, -1110931796, -1106653204, -1098776751) + + W(6, -1150990554, 1049522698, 1047918398, 1044281805) + W(7, -1142656241, -1118178969, 1044014582, 1035377494) + + W(8, 1001967189, 1024784007, -1120026675, 1041259438) + W(9, 1035091531, -1114065315, 1037418924, 968315538) + + W(10, 1009891810, 1026909213, -1115519345, 1032535224) + + W(11, -1156224954, -1114416227, 1034927023, -1127266575); + sum2 = W(0, -1096670021, -1102787949, -1099441746, 1043637876) + + W(1, 1052388948, 1042101483, -1135324433, 1042670443) + W(2, 1059540854, 1039622534, 1064198145, -1086802625) + + W(3, -1085971065, -1113298762, -1098667485, -1104634628) + + W(4, 1045575353, 1037899964, 1030855272, -1101655681) + + W(5, 1033969343, -1105771070, 1036465297, -1104173140) + + W(6, -1133958494, 1013042349, 1040028330, 1036771998) + W(7, 1032851236, -1120619083, 981463467, 1030229840) + + W(8, 1031038260, -1110193614, -1117097690, -1125265460) + + W(9, -1103569731, 1049976190, -1101523135, 1035574893) + + W(10, -1118270456, 988987510, 1032105974, -1117707023) + + W(11, -1134648980, 1021389004, -1119296945, 1035007302); + WS(1060329015, -1107100438); + sum1 = W(0, -1139109867, -1126768152, -1154508124, 1024792785) + + W(1, -1145234942, -1121126338, -1165337473, -1128378904) + + W(2, 1016013223, -1108878752, -1139382101, 1046477388) + + W(3, 1046417836, 1044791733, -1115375281, 1030070044) + W(4, 1052529520, 1042283693, 1051369597, 1051787130) + + W(5, 1040695538, 1046651812, 1039020124, 1046922680) + + W(6, -1098269535, -1103310414, -1093610355, -1088582834) + + W(7, -1090008057, -1097854544, -1110993568, -1105218789) + + W(8, -1120993331, 1033078264, 1033661966, 1015463488) + W(9, 1035018943, 1029328303, 980238373, -1121430349) + + W(10, 1025836257, -1113589378, 1029380042, -1124637616) + + W(11, -1127749832, 1030094510, -1117533511, 1016154976); + sum2 = + W(0, -1122821091, 1016851522, -1106087013, -1109184296) + W(1, 1045004090, 1015529466, 1026918939, -1115690469) + + W(2, 1039646422, -1156337616, -1131895450, -1080883791) + + W(3, -1074990048, -1105476432, -1104782698, 1022339242) + + W(4, -1126218548, -1123806894, 1035943528, 1071805249) + + W(5, 1071092779, -1126394390, 1031828962, -1122803425) + W(6, 1020520346, -1105518185, 999070248, -1096231655) + + W(7, -1125969764, 1028426171, 1030594487, -1127395662) + W(8, 1030755819, 1031650855, 1015158698, 1043321474) + + W(9, -1125796080, 1026097175, -1140334484, -1110808154) + + W(10, 1003947528, 980292032, -1123933765, -1122746065) + + W(11, 1018391998, -1126690400, 1018627618, -1149029768); + WS(1004067712, 1029538397); + sum1 = W(0, 1039684517, -1113057966, 1034662227, 976341006) + W(1, -1116361615, 1039793800, -1114202454, 1023346679) + + W(2, 1025050595, -1112884456, -1120136235, -1106127376) + + W(3, -1108532672, -1136083213, -1109109944, 1026195563) + + W(4, 1055781642, 1052279792, 1057037092, 1048957300) + W(5, 1059667231, 1049781884, 1046150498, 1052752022) + + W(6, -1093421305, -1096595713, -1096171643, -1083333863) + + W(7, -1093970477, 1050503466, -1098876866, -1119746148) + + W(8, -1134522377, 1008080123, 1038845557, -1104830972) + + W(9, -1107387175, -1105145436, -1111371224, -1106375815) + + W(10, 1033902960, -1134909265, 1000314570, 1017566030) + + W(11, -1120056627, 1034175935, -1129568684, 1002478074); + sum2 = W(0, 996458418, 1033154491, -1124552606, 983965669) + W(1, -1113544599, -1118592979, 1024911403, -1120675710) + + W(2, 1023361218, -1129026548, -1113149678, -1096377075) + + W(3, -1108470744, -1107366613, 1040480632, 978762058) + W(4, 1020176326, 1036155261, -1107309754, 1064108379) + + W(5, -1113642974, -1147140265, -1123866147, -1127049668) + + W(6, -1105686968, -1136279909, -1101182829, 1059185253) + + W(7, -1094171461, -1097506500, 1038318571, 1027330889) + + W(8, -1112170015, 1029838361, -1110765232, 1029555247) + W(9, 1048386684, 995412594, 1045927904, -1102738273) + + W(10, 1016740014, -1122796554, 1027141165, -1111484496) + + W(11, 1033692775, -1108579478, -1126067866, 1023291782); + WS(1047133404, -1079170418); + sum1 = W(0, 1009155056, -1116032717, -1135035740, -1132905800) + + W(1, 1006908676, -1123509635, -1129150906, -1147535977) + + W(2, -1122581460, -1127398915, -1111731450, -1098422622) + + W(3, -1136427723, 1019095722, 1037504657, 1012838771) + W(4, 1042667713, 1041078541, 1051022508, 1065228285) + + W(5, 1042380775, 1050271906, -1107182030, 1023931745) + + W(6, 1030334583, -1113404572, -1111377849, -1104817319) + + W(7, -1091891141, -1103926112, -1121675713, -1123007382) + + W(8, -1108143194, -1123819760, -1112180142, -1102890728) + + W(9, -1111098047, 1023382911, -1136273159, -1123177070) + + W(10, 1027163842, -1122554086, 1023811915, -1133181796) + + W(11, 1001483047, -1116735535, 1015217807, -1130799202); + sum2 = W(0, -1151544588, -1155029688, 1018065470, 1007239987) + + W(1, -1125453823, -1118997712, 1007542759, -1133631258) + + W(2, 1017708852, -1121981859, -1114670464, 1049278985) + + W(3, 1041079714, -1156221672, -1120658712, -1113215345) + + W(4, -1147640076, -1129630940, -1093073209, -1077783117) + + W(5, 1074285479, 1034878186, 1016277382, 1033160981) + W(6, 1008923057, 1024218233, -1119653793, -1083801634) + + W(7, 1047560438, 1028846010, 1036336794, 1022080564) + W(8, 1011942449, 998377109, -1131988255, 1041518161) + + W(9, -1114079754, -1118519138, -1121956184, 1027490516) + + W(10, 1018406594, -1133221663, 1025316590, -1123603132) + + W(11, -1131599727, 1024940191, -1131785133, -1130450701); + WS(1043550940, 1028476494); + sum1 = W(0, -1115183742, -1121780237, -1117842775, -1110838377) + + W(1, 991794198, -1111061058, 1015980795, -1113960535) + + W(2, -1130981802, -1102302048, -1117998761, -1094318158) + + W(3, -1102037997, -1115328139, -1113105937, -1106426583) + + W(4, -1100607937, -1105099981, -1096882923, 1062548805) + + W(5, -1103616404, -1097580450, -1103768718, -1100373208) + + W(6, 1051762223, 1040535662, 1059510884, 1068023761) + W(7, 1055882697, 1041348961, 1034161697, 1044552262) + + W(8, 1000007075, -1138550724, -1102853271, -1099046056) + + W(9, -1102330336, -1131654944, -1112888641, -1110284503) + + W(10, -1121847282, -1111548899, -1132334487, 1013492582) + + W(11, 1018468811, -1119471984, -1129682576, -1148988811); + sum2 = + W(0, 1029690847, -1115037203, -1127742454, 1036903221) + W(1, -1106227374, 1039025001, -1108879063, 1016988574) + + W(2, -1103820712, 1043435491, -1100462798, 1028701483) + + W(3, -1121926871, -1126383902, 1017141302, -1115441627) + W(4, 998970743, -1096312501, 1044923803, 1041049643) + + W(5, 1030485727, -1125321886, 1008577020, 1033537170) + W(6, 1028297515, 1046052979, -1099477234, 1065575031) + + W(7, -1098681477, 1040854499, -1106467082, -1122382559) + + W(8, -1122562191, -1115911439, 1030898927, -1096910875) + + W(9, 1034498637, -1115709255, 1013474716, -1129067614) + + W(10, -1133991676, 1007821180, -1113716427, 1036974934) + + W(11, -1111652935, -1126914630, -1128843302, -1114648723); + WS(-1079050332, -1087647968); + sum1 = W(0, -1138480508, -1122115440, 1024624314, -1106223181) + + W(1, 1029330596, -1120448614, -1134294785, 1004544641) + + W(2, -1112879708, 1026750308, 1027382179, -1116109267) + + W(3, 1024806390, -1128868784, 1022822666, -1115751616) + + W(4, -1095708941, -1102419558, -1136300523, -1083772539) + + W(5, -1103579129, -1102450653, -1100406055, -1099401009) + + W(6, 1054719611, 1048762675, 1057423590, 1057166968) + W(7, 1058690419, 1049252525, 1046884233, 1051036287) + + W(8, 1031575959, -1106236805, -1115045376, -1096438944) + + W(9, -1115914082, -1116410163, -1127656777, 1031406769) + + W(10, -1122888245, -1139407140, -1136279992, -1112079518) + + W(11, 1025354440, -1128866725, 1008311211, 1030150228); + sum2 = + W(0, 1018441725, 1018735508, -1106801879, 1042307735) + W(1, -1108191465, -1153036923, 1023515972, -1130614305) + + W(2, -1121226957, -1128043345, 1036012925, -1105109021) + + W(3, 1026535678, 1024075219, -1143871918, -1115606924) + W(4, 1025980595, 1035475244, -1093599779, 1063056253) + + W(5, -1097885523, 1035122552, 1028845741, -1124060823) + + W(6, -1143973174, 1028848366, -1119821797, 1055391108) + + W(7, -1118265757, 1031788992, -1122707515, -1149779899) + + W(8, -1145354702, 1010180031, -1106432772, -1103238145) + + W(9, -1114258989, -1114303163, 1031455538, -1117711701) + + W(10, -1118705847, 1001175670, -1114705464, 1024485983) + + W(11, -1109715589, 1025382280, -1125366621, -1122890135); + WS(1061221431, 1002405371); + sum1 = W(0, -1123061017, -1109190069, -1111043962, -1094345134) + + W(1, -1146354418, -1113941840, -1145213652, 1017803349) + + W(2, -1123480946, 1033046959, 1050201177, 1042710035) + W(3, 1050099666, 1026923762, 1032794752, 1029204218) + + W(4, -1090405194, -1104934581, -1102475777, -1086823335) + + W(5, -1097235340, -1091215734, -1094526523, -1089600016) + + W(6, 1054235011, 1044209302, 1057895846, 1052530935) + W(7, 1058601910, 1050794548, 1041457976, 1055600534) + + W(8, 1026563665, -1108435303, 1037490845, -1102625680) + + W(9, 1033897294, -1126650490, -1110275296, 1021417496) + + W(10, 1000058490, -1121369187, 1030151784, -1111380603) + + W(11, 1040756961, -1126829850, 1032331475, 1033488871); + sum2 = W(0, 1049937345, 1058657690, 1071079778, 1072348129) + W(1, 1066389113, 1049150259, -1119844839, -1113785170) + + W(2, -1089706816, -1087536311, -1078069733, -1078647761) + + W(3, -1079714039, -1090508646, 1022376790, -1103849791) + + W(4, 1050626093, -1106625833, 1020012689, -1094966474) + + W(5, -1098171132, 1050328795, -1105718319, 1052174137) + + W(6, 1030967529, -1105431871, 1044296437, -1112600090) + + W(7, 1038432439, 1044145959, -1126694997, 1034822829) + + W(8, -1115709839, 1036734782, -1122006247, -1122763227) + + W(9, -1130276743, 1024733405, 1021875579, -1112758982) + + W(10, 1039088353, -1109192755, 1031394755, -1116226089) + + W(11, 1022577738, 1012728894, -1123093509, 1013882918); + WS(-1092893294, -1089220584); + sum1 = W(0, 1008699951, 1010493299, -1124403149, 1025199074) + + W(1, -1117387089, 1015178277, -1125915034, -1130896954) + + W(2, -1127519944, -1118558929, -1106263811, -1113928672) + + W(3, -1098638288, 1041393441, -1112838969, 1035998582) + W(4, 1053766130, 1051919929, 1056071340, 1052175436) + + W(5, 1062605050, 1047707912, 1053546873, 1049473086) + + W(6, -1103359682, -1096189801, -1097091836, -1086367179) + + W(7, -1096005139, -1102452900, -1101730612, -1095432549) + + W(8, -1104704049, 1032200864, -1111082358, 1015736929) + + W(9, -1120379023, -1123219920, 1019333603, -1136688761) + + W(10, 998920846, 1028526614, -1149496748, 1019181009) + + W(11, -1115659002, 1027645503, -1131717807, -1136451331); + sum2 = + W(0, -1143112647, 992704062, -1124855502, 1038449046) + W(1, -1105916124, 1035072543, -1114904409, 1019251456) + + W(2, 1034689803, -1109431235, 1033324328, -1098090520) + W(3, 1038361982, 1040866773, 1032024575, -1164332444) + + W(4, -1115356591, 1039097516, -1073085951, -1063972596) + + W(5, 1062319475, 1085456662, 1062308854, -1102205120) + + W(6, -1121270233, 1048248946, -1106299172, -1091325299) + + W(7, -1090314642, 1056006422, 1043935118, -1107459099) + + W(8, 1017968792, -1124530814, 1023517229, -1113483013) + + W(9, -1141826407, 1034240002, -1124783482, 1032935811) + + W(10, 1021729984, 1005857263, -1126326370, 1015994888) + + W(11, 1013997779, -1134900259, -1135264403, 1001043447); + WS(-1104754908, -1106735671); + sum1 = W(0, 1030490103, 1032595044, -1128810020, 1032319302) + W(1, 1019231224, 1025969502, 1016077744, 1024124136) + + W(2, -1117923535, 1002237924, -1122216143, -1120462507) + + W(3, 1025980110, -1123369072, 1034830735, -1104242198) + + W(4, -1088908700, -1094516030, -1092804364, -1102093923) + + W(5, -1088198897, -1096286026, -1102086589, -1103741447) + + W(6, 1033028505, 1029213684, 1050263192, 1063837581) + W(7, 1058220138, 1054477272, 1045907999, 1049334234) + + W(8, 1045999740, -1144798556, 1022746927, -1106958295) + + W(9, -1107413149, 995801516, -1115665070, 1030012489) + + W(10, -1120248133, 1015682178, -1129595020, 1030078356) + + W(11, 1021953856, -1153493672, 1019926661, 1022398552); + sum2 = + W(0, -1116247065, 1015145430, 986051432, -1117862601) + W(1, -1114715779, 1034116930, -1144744186, 1026244953) + + W(2, 1035833889, -1107082251, -1114579693, 1043695503) + + W(3, 1039299404, -1114493167, -1120378069, 1019204162) + + W(4, -1087068607, -1079268655, 1022672030, 1067901888) + W(5, 1059152338, 1008422125, 1020328850, -1111927269) + + W(6, -1107323417, -1102842161, 1049680335, 1032100715) + W(7, 1040660749, -1115195239, 1018816198, 1021018582) + + W(8, -1109274147, 1016322550, -1111459111, 1026073955) + W(9, 992278036, -1173738319, 1033325633, -1123657301) + + W(10, 1021616338, -1133554677, 1010843781, 1017398706) + + W(11, 998891466, -1139208557, -1138764845, 1016479082); + WS(-1092340590, -1082645376); + sum1 = + W(0, 1018128252, 1018184990, 1028162329, -1117677972) + W(1, -1115495745, -1115409474, 1015642046, -1118723567) + + W(2, 1033191659, -1106475329, -1101261661, -1102532636) + + W(3, 1031605429, 1010011868, -1127814216, -1112449263) + W(4, 1040446513, 1036685563, 1061713267, 1042646693) + + W(5, -1089327101, -1106573237, -1105713919, -1112423953) + + W(6, -1112709788, -1105689791, -1086509296, 1038532783) + W(7, 1064314083, 1046976369, 1039016129, 1030956248) + + W(8, 1031456758, 1040219053, 1042438664, -1111899057) + W(9, -1100451664, -1105951352, 1010066702, 1022848097) + + W(10, -1113441282, -1127103440, -1123459213, 1018898486) + + W(11, 1033438227, 1029058545, 1013299750, -1130132966); + sum2 = + W(0, -1107775669, -1121347079, -1118975841, -1121125435) + + W(1, -1123325897, 1012434826, -1129695564, 1006663086) + W(2, 1040551451, 1042459857, 1031184422, -1152381105) + + W(3, -1105935240, -1115286413, -1124425722, -1111096901) + + W(4, 1027302533, -1122087227, 1016161914, 989383522) + W(5, 1045806619, -1125562074, -1111270899, -1114174571) + + W(6, 1033412933, -1113656153, 1035641742, -1114487413) + W(7, 1041762467, 1029416951, 1028252326, -1127668218) + + W(8, -1135101756, 1017950151, -1109563699, 1046120628) + W(9, -1114574520, 1035110196, 998528980, 997585257) + + W(10, -1131050762, -1130160250, 1017438602, 987844130) + + W(11, -1117833439, -1173771715, -1121778743, -1126450718); + WS(1052284526, 1042464092); + sum1 = W(0, 1010280196, -1136974408, 1026365340, 1020617774) + + W(1, 1023947782, -1127515216, -1129271461, 1007810505) + + W(2, 999914700, -1110845507, -1109035161, -1110448219) + + W(3, -1110075532, 1038195741, -1114945332, 1019473092) + W(4, 1057530557, 1046554367, 1060667985, 1063323154) + + W(5, 1043924417, 1050039723, 1042422080, 1052688005) + + W(6, -1098111272, -1100087104, -1090659644, -1094987596) + + W(7, -1095988958, -1097163609, -1105136802, -1095692091) + + W(8, -1101387838, -1110795931, -1102616600, -1105684762) + + W(9, -1124311688, 1018127786, -1124435598, -1114597706) + + W(10, 1027464615, -1128121686, 1001926272, 1025859742) + + W(11, 1017827156, 1034844538, -1130637466, 1027019478); + sum2 = W(0, 1025355846, -1124685033, 1029205900, 1011755468) + + W(1, -1113277680, 1035269289, -1122502097, 1013457876) + + W(2, -1119913429, 1010871130, 1016143206, 1033794365) + W(3, -1140030878, 1020099653, 991986873, 1035095720) + + W(4, 1034044275, 1036346050, -1128364593, -1133342818) + W(5, 1020927778, 1035477071, 1011575434, 1029499077) + + W(6, -1140464466, 1032812135, 1071964190, 1067600837) + W(7, 1039362648, 1033733070, 1008065452, 994468553) + + W(8, -1091074448, -1086522508, -1074918519, -1089329657) + + W(9, -1106053895, -1108456989, -1146528221, -1106105254) + + W(10, 1041357090, 1037018199, 1033277276, -1111447139) + + W(11, -1118126225, 1032003060, -1117647411, 1009180302); + WS(-1085785015, -1080175544); + sum1 = W(0, 1035373246, -1125806618, -1118822551, 1044894533) + + W(1, -1102223008, 1041222777, 1023709757, -1122183565) + + W(2, -1103151077, 1045216768, -1094452856, 1039875576) + + W(3, -1151103277, -1094574331, 1048044105, -1103877399) + + W(4, 1058377668, 1050584367, 1054861331, 1046782533) + W(5, 1057861265, 1051791466, 1043274206, 1058273114) + + W(6, -1091874455, -1094766367, -1107649205, -1090132878) + + W(7, -1090472533, -1111571937, -1098390656, -1092114299) + + W(8, 1025579124, 1043611260, -1106387815, 1049375324) + + W(9, -1115030560, -1137989279, 1049548184, -1106419572) + + W(10, -1111695537, 1043525714, -1102079139, -1105407270) + + W(11, -1144672552, -1102123612, 1041305154, -1116205169); + sum2 = + W(0, 998449416, -1121990028, 1039055377, -1115523342) + W(1, -1132069983, 1032292249, -1128335569, 998312734) + + W(2, -1118465870, 1035743758, -1103571680, 1049274722) + + W(3, -1106547389, -1126693677, 1034009109, -1116877357) + + W(4, -1124628345, 1040306990, -1098483719, 964150684) + W(5, 1024621733, -1103504000, 1043582890, -1116867442) + + W(6, -1148690252, 1001004257, 1060096717, 1077176219) + W(7, 1060014437, 1016774454, -1107960970, -1139511962) + + W(8, 1037770975, -1132010249, -1108861100, 1047972499) + W(9, -1098623231, 1034159573, 1020716885, 1042264637) + + W(10, -1105952484, 1037805583, -1087084008, -1070507600) + + W(11, -1092504349, -1105946989, 1041667680, -1102914489); + WS(1041580764, 1025947967); + sum1 = W(0, 1032565172, -1116825273, 1019951750, 1022134396) + + W(1, -1111515158, 1034025399, -1110133221, -1159719714) + + W(2, 1034662848, -1104706235, -1115255997, -1107214727) + + W(3, -1098820316, 1032808736, -1102823188, 1021100817) + W(4, 1053611982, 1050747028, 1050206724, 1063007652) + + W(5, 1060757006, 1052025888, 1043880105, 1052817434) + + W(6, -1093835868, -1092905173, -1095669879, -1096926641) + + W(7, -1089733625, 1049974094, -1099659531, -1119725058) + + W(8, -1131110976, -1131069497, 1036897764, -1105182467) + + W(9, -1098715869, -1104277767, -1106501955, -1106408671) + + W(10, 1036244351, -1119290642, 1017394926, 1029927621) + + W(11, -1106823604, 1037476046, -1114565306, -1122404893); + sum2 = + W(0, -1132209742, 986836209, -1121945777, -1170553346) + W(1, -1138085578, -1127752664, 1024317954, -1170071298) + + W(2, 1031078182, 1029631070, -1119773491, 1020231728) + W(3, 1023900693, -1133642976, 1016594015, -1123929889) + + W(4, -1131885988, 1025515900, 1029199828, -1120148774) + + W(5, -1122079840, 1019687015, 1019770946, -1134784812) + + W(6, -1109418986, -1129378830, -1113141662, 1055904551) + + W(7, 1057958803, 1044815557, -1115676013, 1040873658) + W(8, 969823813, 1027058278, 1030656895, -1099466937) + + W(9, -1089769002, -1096970616, -1105180704, -1104818759) + + W(10, -1135041624, 1012367442, -1116640321, 1014349374) + + W(11, 1019607807, 1019685270, 1040370156, 1020720783); + WS(-1101021916, -1101341893); + sum1 = W(0, 1026213188, -1128959914, 1026395551, -1114900877) + + W(1, -1152047246, 1012420650, 1006057769, 1026345301) + + W(2, -1104193847, 1029994953, -1146733010, 1036038140) + + W(3, 1031817213, -1118727997, 1021561044, -1106810537) + + W(4, -1099125811, -1096018912, -1093634574, -1086773562) + + W(5, -1090412335, -1107844781, -1104575744, -1104632714) + + W(6, 1055643976, 1049220736, 1055313981, 1052012604) + W(7, 1060627153, 1051291321, 1049980081, 1047342967) + + W(8, 1025577212, -1111820508, -1119144868, -1127551698) + + W(9, -1111302883, 1025725832, -1112108341, 1003961523) + + W(10, -1110018309, 1025253614, -1114303945, -1123712161) + + W(11, 1028280877, -1119269431, 1031833384, 999050183); + sum2 = + W(0, -1131988010, 1023490159, -1126411872, -1164111651) + W(1, -1121501747, 1031886393, -1117332099, 1035364539) + + W(2, -1111316079, 1007442131, -1122871173, 1046145376) + W(3, 1040330964, -1109560063, 1016982738, 1005935896) + + W(4, 1041465340, -1100903601, 1038743702, -1093182708) + + W(5, 1049859497, -1098635398, 1035352171, -1113093917) + W(6, 1024389323, 1055734435, 1080207498, 1078445253) + + W(7, -1068399047, -1066503158, -1109489195, 1012210427) + + W(8, -1121573843, -1125698351, 1043717463, 1049726225) + + W(9, -1122596352, -1100760677, -1113462329, 1035980752) + + W(10, -1149117460, -1131908940, -1114017647, -1119425741) + + W(11, 1019240934, 1009702615, 1032687935, -1141270643); + WS(1046490332, 1041425064); + sum1 = W(0, 1024529499, 1033054902, 1031945755, -1146773895) + W(1, 1029605664, 1004520717, 1031502108, 1024614604) + + W(2, -1116828595, 1002731779, 1040980232, -1160896989) + W(3, 1052617834, 1015420456, 1044142352, 1032157630) + + W(4, -1093042204, -1098109922, -1092614541, -1086741475) + + W(5, -1091564618, -1090483479, -1094325635, -1086427940) + + W(6, 1043650965, 1045029126, 1058500980, 1055608303) + W(7, 1058257970, 1019101890, 1041689704, 1049761551) + + W(8, 1015429414, -1106337284, 1033182334, -1109695552) + + W(9, -1174108025, 1029823567, -1123497953, 1041880275) + + W(10, 1019425762, 995804238, 1032419541, -1179835378) + + W(11, 1044059953, -1127672093, 1032927635, 1040104968); + sum2 = W(0, -1122431319, -1114479591, 1024826864, -1100833312) + + W(1, 1043724086, -1109354990, -1130104331, 1015245663) + + W(2, 1026151548, -1106953243, 1048064546, -1097347844) + + W(3, 1037967160, -1109085075, -1110876131, 1007980535) + W(4, -1122010793, 999999917, 992235770, 1040448763) + + W(5, 1044292364, 1032432738, -1143420941, 1040568157) + W(6, 1049905644, -1108260971, 1050719290, 1045454882) + + W(7, 1049686405, -1111087807, -1098496523, 1043523736) + + W(8, -1102178930, -1125976243, -1123774104, -1096948195) + + W(9, 1038302972, -1108325988, -1118752296, -1113347688) + + W(10, -1124048569, -1126306455, 1025075060, -1105554812) + + W(11, 1034346164, -1114960687, 994535386, 1032072922); + WS(-1096079726, -1086813702); + sum1 = W(0, -1135457777, 1020632987, -1136898909, 1041289173) + + W(1, 1012368645, 1002632984, 1024629150, -1127204699) + + W(2, -1105268453, -1134572551, -1100910973, -1166801940) + + W(3, 1038525034, -1102504381, 1029902179, -1110256943) + + W(4, 1045566625, 1038076635, -1141635906, 1058638213) + W(5, -1104077708, 1033251913, 1009307365, 1022942147) + + W(6, -1104980990, -1125909619, -1094783105, 1057673490) + + W(7, -1100424110, -1111352769, 1012100357, -1133590617) + + W(8, -1105625101, -1145052038, 1032999502, 1030475041) + + W(9, -1113674176, -1113040784, 992583714, -1114381639) + + W(10, -1128467237, 1035122115, -1125440663, 1035865453) + + W(11, 988204202, -1119749552, 1027550920, -1124393059); + sum2 = + W(0, 1009177065, -1120740066, 1031366584, 1041576942) + W(1, -1100270767, 1045439690, -1125959183, 1033773937) + + W(2, -1117003048, 1013092463, -1102800003, -1090661723) + + W(3, 1059100650, -1097382737, 1022453518, -1106788899) + W(4, 1042183695, 1025589372, -1111208570, 1068855162) + + W(5, -1085550755, 1044086134, 1032267243, 1025170288) + W(6, -1105766164, 1042721142, -1149080533, 1040487387) + + W(7, 1023619821, -1099098175, -1126080241, -1106320787) + + W(8, -1112946812, -1113055730, -1103263899, 1030911277) + + W(9, -1103276630, -1114239046, -1107084863, 1019625936) + + W(10, 1019480223, 1026018809, 1020058227, 1007104581) + + W(11, 1029611268, -1162581429, 1037226018, -1116554192); + WS(1059841719, 1028884484); + sum1 = W(0, -1113543486, -1112448166, -1123514455, 1014814024) + + W(1, -1116573889, -1120616579, -1132301434, -1127146586) + + W(2, -1108411818, -1111506273, -1104706915, -1096756366) + + W(3, -1106354949, -1107115957, -1117349474, -1105758103) + + W(4, -1112074625, 1034453786, 1033474668, 1052483434) + W(5, 1048710884, 1049505778, 1050151687, 1056594572) + + W(6, 1058342439, 1041783662, 1050338440, 1043430567) + + W(7, -1116369607, -1117001986, 1011608836, -1106214629) + + W(8, -1101044205, -1112657624, -1103529079, -1099567912) + + W(9, -1110811775, -1119641312, -1133908615, -1113349351) + + W(10, -1143395087, -1122260358, -1124590486, -1132902244) + + W(11, -1118491109, -1118809775, 996976763, -1114523384); + sum2 = W(0, -1131846847, 999597315, 997751878, -1114921220) + W(1, 1031101790, -1115596944, 1024706196, -1120544321) + + W(2, -1142019487, 973346488, -1127636492, -1114646695) + + W(3, -1098897499, 1027087956, -1111832500, 1029100436) + + W(4, -1119272484, -1113005256, -1113445057, 1055277760) + + W(5, 1032426103, 1036539833, -1114392819, 1010252474) + W(6, 1043688817, -1112114230, 1056371527, 1041350431) + + W(7, 1038923617, -1123786644, -1130636099, -1123180191) + + W(8, -1110979463, -1112619479, -1097247478, 1019660165) + + W(9, -1105379211, 1026606310, 1012894502, -1125326592) + + W(10, -1128104285, 1003422803, 1021279303, -1127196333) + + W(11, -1138766624, -1129408714, -1130181692, -1164469676); + WS(-1092619630, -1089382730); + sum1 = W(0, -1111364766, 1043539187, 984781533, 1030068626) + W(1, 1026274177, -1111039089, 1024515101, -1120853581) + + W(2, 1023648980, 1020312451, -1115220152, -1101226428) + + W(3, 1038621209, -1105671421, -1101912057, 1015856825) + + W(4, 1039669985, -1096148415, 1055391282, 1026945555) + + W(5, -1081505706, 1045179252, -1132720100, 1027675681) + + W(6, -1112475547, 1048019422, -1085560118, 1059189430) + + W(7, 1059392734, -1105049488, 1041558112, -1133761880) + + W(8, -1102256285, 1027548272, 1048070471, -1108016176) + W(9, 1040580240, 1037378816, 1031071010, 1032748839) + + W(10, 1036516737, -1110790259, 1016263062, -1120756633) + + W(11, 1007770772, 1034683832, -1109624324, 1021933410); + sum2 = + W(0, 1021752691, -1117827136, -1125983252, 990310070) + W(1, -1109957501, 1040724727, -1109878975, 1031887118) + + W(2, -1111677325, 1031932470, -1130612142, -1104195843) + + W(3, -1168184730, -1105272655, 1035492359, -1103738768) + + W(4, -1108952074, -1110048197, 1045228241, 1051763659) + + W(5, 1028013889, -1136328910, -1114456338, -1114723198) + W(6, 1033183913, 1038437939, 1050756049, 1045197632) + + W(7, -1115591069, -1123843648, -1120329563, 1035246991) + + W(8, 1044091848, -1103930854, 1008734718, -1105931725) + + W(9, -1149204891, 1020655719, -1120613434, -1121689275) + + W(10, -1113755692, 1027358915, -1104619422, 1032921649) + + W(11, -1117367196, -1140627498, 1033876328, -1121880867); + WS(1051433070, 1041054969); + sum1 = W(0, 1027474774, 995089009, 1034863641, -1109189270) + W(1, 1039503310, -1126811322, 1033371404, 1010666397) + + W(2, 1025557871, 1019369911, 1049084179, 1046403907) + W(3, 1044512531, 1035909664, 1033234174, 1028564471) + + W(4, -1105663671, 1032468488, -1096978722, -1082587707) + + W(5, -1102025153, -1107200379, -1104819461, -1106755535) + + W(6, 1035613609, 1037418314, 1039456478, -1080639932) + W(7, 1026454114, 1042549431, 1042229543, 1034360184) + + W(8, 1041537220, 1036952965, 1041156238, 1049567823) + W(9, 1040087661, 1039105725, -1131034584, 1031577769) + + W(10, 1027665948, -1122283488, 1041167849, -1112387034) + + W(11, 1039611744, -1137459494, 1031071714, 1033297350); + sum2 = + W(0, 1039097792, -1128793460, -1140010118, 1022871695) + W(1, -1138263433, -1112923836, 1026478628, -1110368657) + + W(2, -1103860174, -1106733661, 1043000544, -1106671798) + + W(3, 1046664654, -1123142296, 1037719970, 1042580191) + W(4, -1106228186, 1050938198, -1100268114, 1039252188) + + W(5, -1099105674, -1121884665, -1100173198, 1034926170) + + W(6, 1004583985, -1097187313, 1021007203, 1053089515) + W(7, 1048715277, -1114597930, 1047464456, -1106315502) + + W(8, 1031892389, 1046933969, -1107049139, 1049075999) + + W(9, -1103930146, 1022883749, -1098080196, -1108178582) + + W(10, -1133334119, -1107245750, 1039577114, -1104825734) + + W(11, 1030321621, -1117968576, 1036807670, 1018138445); + WS(-1097236334, -1078251511); + sum1 = W(0, -1147379729, 981774949, -1145120489, -1112510828) + W(1, 975327689, 1008364580, -1140670990, 1022969831) + + W(2, -1107131303, 1007351583, 1033603653, 1043141838) + W(3, 1029606457, 938727969, 1020032977, -1108966769) + + W(4, -1102832418, -1125527926, -1095625930, -1082084799) + + W(5, -1090577043, -1093823475, -1098508888, -1096801949) + + W(6, 1058323440, 1053289720, 1056660465, 1032726417) + W(7, 1058345481, 1053416161, 1046390514, 1049766564) + + W(8, 1023750586, -1121375351, 1026746467, 1033795604) + + W(9, 1025947909, -1131151340, -1119905544, 1037296218) + + W(10, -1116361496, -1124892056, -1128008098, -1117973855) + + W(11, -1148460889, -1125502955, 999577711, 1010324189); + sum2 = + W(0, 1024894779, 1020144998, -1118791847, 1007750652) + W(1, -1137666916, 1023698735, 1009542396, -1129126018) + + W(2, -1102276353, 1025017931, -1107221556, 1046635995) + W(3, -1132306598, 1023586615, 1030880679, 1015842286) + + W(4, -1120020336, -1096890621, 1050182775, 1039741552) + + W(5, 1043589185, -1108361045, -1114600836, 1024872627) + + W(6, -1081787381, -1080908066, -1102161510, 1072604155) + + W(7, 1055213170, -1115556916, 1037616572, -1105259074) + W(8, 1039662448, 992888305, -1130889346, 1039835396) + + W(9, -1110509653, -1122324709, 1000980713, -1124270218) + + W(10, 1023553607, -1118655405, -1120230550, -1126400334) + + W(11, 1020541198, 1020891302, -1119304668, 1033066840); + WS(-1095840110, 1065907853); + sum1 = W(0, -1149009607, 1029310442, -1122029108, -1136794300) + + W(1, -1118254622, -1132243967, -1126823604, 1010554921) + + W(2, -1123781617, -1130903741, -1115466131, 964920945) + + W(3, -1115351608, 1043406509, -1116119295, 1040435870) + W(4, 1057715094, 1048478500, 1058442742, 1045634572) + + W(5, 1052165513, 1053323726, 1042519141, 1051535704) + + W(6, -1102734645, 1041326469, -1088947208, -1083008792) + + W(7, -1096684061, -1089856222, -1127763004, -1095672430) + + W(8, -1101560548, 1036129479, -1107277134, -1109559039) + + W(9, 1036543086, 1024712701, 1009014011, -1114598742) + W(10, 1034276991, 1021921030, 1036949012, 1024401462) + + W(11, -1114280609, 1035777670, -1132072137, 1024787534); + sum2 = + W(0, -1122765153, -1119761608, -1115823403, -1123357438) + + W(1, -1105805209, 1031734555, -1111986136, -1126486235) + + W(2, -1112086475, -1118318168, -1121250504, -1119202357) + + W(3, 1024515185, -1130234658, -1134784044, 1026675759) + W(4, 1040388299, 1023061820, -1128744886, 1057284387) + + W(5, -1103324019, 1037222780, -1110952019, -1117543075) + + W(6, 966593378, -1102263327, -1110151426, 1059190541) + W(7, -1106793279, 1050053336, -1107073751, 1021583022) + + W(8, -1114107386, -1113965021, 1045334069, -1118800384) + + W(9, -1164047122, 1025627836, -1114668630, -1124432766) + + W(10, 975247631, -1137846028, -1111548146, 1023792011) + + W(11, -1112106446, 1020007628, -1125560981, -1131408661); + WS(1024351088, -1094038469); + sum1 = W(0, 1016930687, 1024919184, 988693323, 1038267403) + W(1, 1001354275, -1123840710, -1125725377, -1120680465) + + W(2, 1038061004, -1118029957, 1005748798, 1038925818) + W(3, 1032752795, 1034287148, -1110688307, 1036629872) + + W(4, 1052546457, 1050276351, 1052906127, 1062157201) + W(5, 1055274272, 1050137483, 1036684158, -1112935826) + + W(6, -1086527812, -1103330015, -1085615881, -1097782940) + + W(7, -1088621706, -1090365828, -1108159426, -1098987606) + + W(8, 1040515838, 1027568116, 1040353352, -1124671689) + W(9, -1113419105, 1017331974, 1018660298, 1039926041) + + W(10, 1006605995, 1004978763, 1030413326, 1031140124) + + W(11, 1009586849, 1023985459, -1129057942, -1120649652); + sum2 = + W(0, 1018662896, -1131745232, -1136728576, -1110788716) + + W(1, -1123530251, -1101470606, -1110519714, -1125138150) + + W(2, 1033363591, 1031409358, 1036576685, 1060006588) + W(3, 1052968790, -1102632350, -1105224546, -1083015355) + + W(4, -1101021011, -1109130821, -1097527529, 1058553385) + + W(5, 1023922010, 1057288665, -1111209476, 1034932287) + W(6, -1107389045, 1050735765, 1043333749, -1120731420) + + W(7, -1100470754, -1097217661, 1038218737, 1047940150) + + W(8, 1013978472, -1122086781, 1029525730, -1118628277) + + W(9, -1117232951, -1112638769, -1121310567, 1023530026) + + W(10, -1120396150, 1024022584, 1015209616, 1030135008) + + W(11, 1018906596, -1121049341, 1024891644, -1113292326); + WS(-1086599863, 1058406314); + sum1 = W(0, -1165120005, -1133002117, -1129945046, 1024249318) + + W(1, -1119162837, 1026085015, -1121003281, -1127955907) + + W(2, 1027409257, -1121445206, -1106007410, -1105883025) + + W(3, -1105279538, -1127421331, -1109094875, 1024561201) + + W(4, 1060099497, 1049800473, 1059141168, 1051534797) + W(5, 1058010671, 1051659429, 1047889907, 1050835423) + + W(6, -1093079745, -1099695891, -1091476505, -1093891126) + + W(7, -1094225343, -1106347590, -1098091099, 999784785) + + W(8, -1114693988, 1033258882, -1129815521, -1124179562) + + W(9, -1106482085, -1112036922, 1030868870, -1097732400) + + W(10, 1019186220, 1025328858, -1128567742, 1017248474) + + W(11, -1123837177, 1031899202, -1123224348, -1141903913); + sum2 = W(0, -1116781913, 1016621488, -1133566515, -1124282558) + + W(1, 1034216133, -1128560494, 991940908, 1017899660) + W(2, 1031543998, 1029140379, 1044191480, -1105795809) + + W(3, 1017978886, -1175609568, -1120640387, 1019924506) + + W(4, 1082393585, 1078957479, -1080327233, -1065075267) + + W(5, -1076777978, 1030943022, -1110243052, -1113970478) + + W(6, 1050004941, 1028477447, -1104837212, -1086493521) + + W(7, -1109285108, 1045155438, 1033432207, -1130476854) + + W(8, 1025243259, 1010603263, -1125439138, -1117319975) + W(9, 988101688, 1024551608, -1129511338, 1020472787) + + W(10, -1123250937, -1125975170, 1007793087, 1028403247) + + W(11, -1125200922, 1020903576, -1119399555, 1028374268); + WS(-1102531292, -1104168529); + sum1 = + W(0, -1149508566, -1156103398, 1005530819, 1019304359) + W(1, -1111288609, -1152861762, -1123770879, 992160490) + + W(2, 1032023755, -1123937923, -1114986325, -1105594348) + + W(3, -1100927253, 1042848102, -1131000647, 1048571064) + W(4, 1058049088, 1044530702, 1057825341, 1061967565) + + W(5, 1057378026, 1043852869, 1032195229, 1021567685) + + W(6, -1091020399, -1093487559, -1088576155, -1113700815) + + W(7, -1096522221, -1100187457, -1097716728, -1095320739) + + W(8, -1123106485, 1028416434, -1124442300, -1132463754) + + W(9, -1107169312, 1030031137, -1127676038, 1015048992) + W(10, 1018386839, 1022122127, 1027828556, 1023613146) + + W(11, -1118265958, 1031835352, -1124424850, -1151822630); + sum2 = + W(0, -1112466451, -1130983341, 1032826781, -1114093653) + W(1, 1040752192, -1154197073, 1026229169, 1032051747) + + W(2, 1034684305, 1035765483, -1117553496, -1101908611) + + W(3, -1105770401, -1106309648, 984953298, -1113409477) + + W(4, -1101371049, -1113050760, -1096382267, 1063677104) + + W(5, -1132405995, -1114901667, 1023704972, -1104964374) + + W(6, -1100658772, -1100149859, 1042208912, 1062452463) + + W(7, 1047214162, -1095028755, -1109270652, -1107539022) + + W(8, 1043071493, 1039321546, -1131759707, -1106482074) + W(9, -1105645940, 1034888818, 1035103126, 1008251057) + + W(10, -1119405084, -1140521940, -1127348451, 1028341682) + + W(11, 1022142418, -1118255322, -1118485150, 1025943565); + WS(-1107552696, -1110437397); + sum1 = + W(0, 994440345, -1123893979, -1123659045, 1033249360) + W(1, -1115012423, 1025628848, -1120012907, -1138236651) + + W(2, -1114854865, -1115498472, -1107252796, -1098484960) + + W(3, -1105215024, -1113844817, 1015022176, -1112267573) + + W(4, 1038159592, -1143015444, 1048547885, 1066459081) + W(5, 1050116824, -1114931568, -1139703834, 1030992175) + + W(6, -1169396004, -1103811554, -1102945115, 1054059005) + + W(7, -1110458371, 1027966800, -1137430074, 1038350450) + + W(8, -1112745281, -1176837895, -1113210869, -1092775086) + + W(9, -1107508006, -1115481499, -1123286831, -1114208046) + + W(10, 1003079356, -1126823399, -1127252913, 1036761683) + + W(11, -1117476787, -1139958260, -1143819946, -1119805695); + sum2 = + W(0, -1128900045, -1134566747, 1023101249, -1119409466) + W(1, 1014344875, -1124604689, -1135550339, 1011013307) + + W(2, 1032112026, -1121844433, 1024457633, 1041370918) + W(3, -1138667107, 1023011873, -1123574415, 1018662193) + + W(4, -1122103841, 1023900033, -1130431017, 1071310745) + W(5, 1015570157, 1025187637, 1029193653, -1118001398) + + W(6, 1020429393, 1031965476, 1010339139, -1073631838) + W(7, 1024644617, 1038341472, -1117036677, 1034637136) + + W(8, -1123818335, -1168200234, -1111989275, -1111141033) + + W(9, -1128248465, 1032459330, 999769253, 1020580065) + W(10, 1025087925, -1116320886, 1037399444, -1131868341) + + W(11, 1027301561, -1118206868, -1124415769, -1136885571); + WS(-1106869980, -1095812171); + sum1 = W(0, 998635262, -1132768195, 1031165051, -1109012795) + W(1, 1031921458, 959153786, 1015400066, 1020404450) + + W(2, 1030538232, 1032306163, 1040655940, 1049530762) + W(3, 1035845131, 1029124089, 1034791747, -1126776961) + + W(4, -1104276842, 1027943897, -1094369165, -1079963696) + + W(5, -1094529094, -1138415831, -1107836910, -1127281161) + + W(6, 1021368885, 1042581143, 1043839724, -1098205560) + W(7, 1041010327, 1043286153, 1040781721, 1032119517) + + W(8, 1037657452, -1130589992, 1045125152, 1052736873) + W(9, 1041646697, 1033263566, -1127518347, 1032079673) + + W(10, 1012233735, -1127936403, 1030674671, -1106402911) + + W(11, 1035635524, -1117746707, 1026900011, 1007765715); + sum2 = + W(0, -1137391706, -1152109415, 1015680093, -1108578627) + W(1, 1012334874, -1126811133, -1146463859, 1013671002) + + W(2, 1016897725, -1123613558, 1031100358, 1031185182) + W(3, 1018767069, 1016912269, -1123894542, -1129388765) + + W(4, -1122041158, -1117579022, 1032233585, 1068869480) + + W(5, 1031129926, -1135215962, -1172726683, 1025914590) + + W(6, 1032188407, 1034093919, -1107212716, -1079207220) + W(7, -1118301998, 1010098650, 1010207290, 998321459) + + W(8, 959684203, -1123954990, -1144448755, -1112638895) + + W(9, -1145613555, 1002322227, -1163026381, -1185201771) + + W(10, 1004755699, -1142267571, 1020208893, -1131161181) + + W(11, 1014639674, 1010341946, -1128211133, 1014227898); + WS(-1106551004, 1058945856); + sum1 = W(0, -1136733265, 1029534949, 1040114552, 1023973738) + W(1, 1008178533, -1121731675, -1122738710, 998670125) + + W(2, 1027999917, -1110759309, -1101184472, -1136820001) + + W(3, -1120539979, 1050535890, 1036280349, -1112884987) + W(4, 1036329486, 1039804839, 1055915689, 1045009467) + + W(5, -1099722578, -1092228133, -1110742697, -1133144549) + + W(6, 1021052191, -1090459221, -1088507490, -1102642413) + + W(7, 1057115504, -1141944531, 1036514925, 1024140334) + + W(8, -1112842790, 1042972690, -1116549941, 1048917175) + + W(9, -1103385510, 1042662182, -1123309658, 1040183891) + + W(10, 996925861, -1126301053, 1035692146, -1116120689) + + W(11, 1032281663, 1025401864, -1122836256, 1009223461); + sum2 = + W(0, -1166199047, 1008667864, -1137339492, -1142224481) + W(1, -1152330850, 1020018908, -1116618549, 1015962596) + + W(2, 1029866216, -1111274931, 1026227362, -1119220040) + + W(3, 1031356244, -1116678638, 1026327024, -1189397276) + + W(4, -1143586281, 1037662319, 1052978055, -1079795268) + + W(5, -1109068899, 1041694498, -1122243464, 1027749752) + W(6, 1016771040, 1050961303, 1073672156, -1075413433) + + W(7, 1029037052, 1026323256, 1021575344, -1119692611) + W(8, -1123676489, 1039205717, 1048063806, 1028203090) + + W(9, -1113725553, -1115208722, 1002430881, -1114291930) + + W(10, -1174784654, -1119230230, -1117826448, 1028246046) + + W(11, -1160612324, 1024440460, -1126523302, 1031666290); + WS(1045979868, -1116118129); + sum1 = W(0, 1019079208, -1124899402, -1142300538, 1011594153) + + W(1, -1134190949, -1132197046, -1116332853, -1135144470) + + W(2, -1115974509, -1114857165, -1110287856, -1099509298) + + W(3, -1112842949, -1107483943, -1111331677, 1018268010) + + W(4, 1054347779, 1046800787, 1053005207, 1063937495) + W(5, 1058166871, 1048177899, 1047873428, 1056893199) + + W(6, -1096855346, -1098629066, -1093283235, -1097005655) + + W(7, -1090046872, -1104885511, -1102896822, -1096686406) + + W(8, -1118610636, 1029706377, 1028644213, 1015974258) + + W(9, 1040979430, -1129150352, 1028514810, -1118433447) + + W(10, -1120438410, 1017939836, -1131155654, -1116499825) + + W(11, -1119992573, -1120133576, -1118080608, -1112688517); + sum2 = + W(0, -1132220574, -1115410889, 1017347763, -1123931551) + W(1, 1027749497, -1111299822, 1025854529, -1123916419) + + W(2, 1034058342, -1116250603, 1029372102, -1101916549) + + W(3, -1104316591, 1035393713, -1107314572, 1033668899) + + W(4, 1008450820, -1126553639, -1105866140, 1053545154) + + W(5, 1054419707, -1102688086, 1036131430, -1144517652) + + W(6, -1132397996, -1097158033, 1060185134, 1060913993) + W(7, 1033775493, -1115290197, -1106841697, 990325833) + + W(8, -1105536207, -1108668742, -1106418349, -1104852271) + + W(9, -1106993048, -1102129692, -1149271050, -1114889391) + + W(10, 1025437040, -1118211898, 1034066570, -1102379864) + + W(11, -1133416673, -1209279719, -1133826631, -1129688184); + WS(1058774455, 1069380246); + sum1 = W(0, 1032626516, -1119757626, 1033017026, 1015522813) + + W(1, -1128327131, 1036888920, 1032769109, -1148588495) + + W(2, -1114780225, 1038823639, -1098306133, 1034533838) + + W(3, -1103839696, -1112718001, -1126986782, 1012070438) + + W(4, 1045065548, 1040221656, 1021036585, -1082356272) + W(5, 1057244812, 1032300191, 1043416793, 1045596067) + + W(6, -1111975378, -1115053555, 1050529784, -1095104409) + + W(7, -1133328720, -1111385431, -1124233691, -1105784401) + + W(8, 1025328225, 1020222778, -1115391173, 1045288819) + W(9, -1116298540, 1042918995, 1029500622, 1037659662) + + W(10, -1121960111, 1037972921, -1112361500, -1139302935) + + W(11, 1018289699, -1110693582, 1027375220, -1114680879); + sum2 = W(0, -1129789987, 1019234847, -1128803579, -1112792165) + + W(1, 1011967806, 995307959, 1018394359, -1112288614) + W(2, 1017265095, -1124971531, 1043137983, 1047657371) + + W(3, 1034970984, 1010908150, -1107223361, 1027672251) + + W(4, -1124616639, -1114025560, -1132476030, -1114992592) + + W(5, -1098689491, -1115509379, 1006851110, -1128503755) + + W(6, -1116091154, 1022067335, -1102585265, 1052022695) + W(7, 1033604172, 1031244615, 1032350856, 1024636851) + + W(8, 1033010488, -1114442452, 1028851843, -1115343414) + + W(9, 1002830796, -1121158047, -1114060980, 1027935499) + + W(10, 1011223606, -1117824661, 1031833634, -1136738894) + + W(11, 988094894, 1025799099, -1118870225, -1127411999); + WS(1065814108, -1098421406); + sum1 = W(0, 1030554015, -1131576032, 1020712308, 1019668842) + W(1, -1178997255, 1023970854, 1011631425, 1023862096) + + W(2, -1104400078, 1022555209, -1130072911, -1118231891) + + W(3, 1041059117, -1115494780, 1035736611, -1104560304) + + W(4, -1097091133, -1099298246, -1094842433, -1084181571) + + W(5, -1098796514, -1094912271, -1090107010, -1092396649) + + W(6, 1050276747, 1048827844, 1057743463, 1051719984) + W(7, 1058357819, 1056339293, 1051879221, 1057941592) + + W(8, 1045406982, -1106685173, 1040093248, 1018405640) + + W(9, -1118903138, -1115388505, -1113309730, -1118151090) + + W(10, -1119880969, -1132305362, -1122176686, 1026194364) + + W(11, 1004376640, 1024072531, 1009992363, 1029155252); + sum2 = W(0, -1104337717, 1027104118, -1114433477, -1122138940) + + W(1, 1048327612, 1018207462, 1015118770, 1054676844) + + W(2, -1085961218, 1036459641, -1092980271, -1101372003) + + W(3, -1098271754, -1115566688, -1101224844, -1105452167) + + W(4, 1065899128, -1117484391, 1062039746, 1077763913) + + W(5, -1088377514, -1079225367, -1106150830, -1079350028) + + W(6, 1055232204, -1101810882, 1049041629, 1071302707) + + W(7, -1084489503, -1087139921, 1036291830, -1090077553) + + W(8, -1090132108, 1052764213, -1092098978, -1123596042) + + W(9, 1027311380, 1042626102, -1105072819, 1053993534) + W(10, 1023963317, 1014918739, 1035158266, 1027553578) + + W(11, -1115760458, 1025357528, 1032466828, -1115160051); + WS(-1083527351, -1082119000); + sum1 = W(0, 1025283773, -1132057884, 1024000280, -1117776915) + W(1, 994019357, -1146975260, 1021506300, 1016847260) + + W(2, -1100516229, 1028453762, -1124447466, 1027818609) + + W(3, 1030853236, -1120656758, 1023329542, -1111423375) + + W(4, -1103907601, -1097545679, -1096840081, -1088298777) + + W(5, -1090997910, -1097629690, -1102673054, -1097673748) + + W(6, 1054198743, 1048327710, 1054350616, 1057412078) + W(7, 1059405315, 1052404744, 1051692752, 1047886934) + + W(8, -1139734483, -1114897689, -1109407791, -1112548849) + + W(9, -1107996896, 1035903152, -1110888465, 1034953432) + + W(10, -1115248201, 1021031566, -1115380998, -1139302503) + + W(11, 1029448614, -1119316016, 1026998601, -1149960989); + sum2 = W(0, 1023717076, 996584188, -1122858228, 1005163886) + W(1, 1025356913, -1118691920, 1028885707, -1114207359) + + W(2, 1034248479, -1120794590, 1037241484, -1124781820) + + W(3, -1115274025, 1026099362, -1121079636, 1010502519) + + W(4, -1110594365, 1045315954, -1111180041, -1088752945) + + W(5, -1100621689, 1052626156, -1114153113, 1038857859) + + W(6, -1113079527, -1094151830, -1064957365, -1066051811) + + W(7, 1081496726, 1084041658, 1034711477, -1115018893) + + W(8, 1030579691, -1113958061, -1106054130, -1113733983) + + W(9, 1049150725, 1032870315, 1042989109, -1106741724) + W(10, 999456782, 1021526858, 1018369486, 1031457177) + + W(11, -1112407773, 1022214782, -1108284893, 1023330420); + WS(1031453552, -1108810221); + sum1 = + W(0, -1122540068, -1141490084, -1132153597, 1029503994) + + W(1, -1126370513, -1127727061, -1132743443, 1024122485) + + W(2, -1106571135, 1028200745, -1106394603, 1029495448) + W(3, -1118016922, 1043712879, 1035505061, 1041979953) + + W(4, -1105389195, 1023785053, 1041720002, -1089108304) + W(5, 1047749927, -1092241143, 998106632, -1098695505) + + W(6, 1051764564, -1104087736, 1055195957, -1096098616) + W(7, 1047204316, 1028938666, 1043190746, 1039644831) + + W(8, -1121040220, 1027661528, -1111907522, 1031937892) + + W(9, -1112974291, 1025162247, -1152510101, 1002213552) + + W(10, -1136080166, 1023137883, -1123745267, 1034024448) + + W(11, -1121356173, 1028910871, -1121641711, 1025100697); + sum2 = + W(0, 1032316109, -1122133197, 1021061413, -1122180337) + W(1, -1115376331, -1123716599, 1008734994, -1144451812) + + W(2, -1117365450, 1033736971, -1117276879, 1009179554) + + W(3, 1034450077, -1100616286, -1130550593, -1102305630) + + W(4, 1028239450, -1104139010, -1090538079, 1022309577) + W(5, -1125496737, 1059658657, 1014965154, 1053491799) + + W(6, -1106184057, 1045998485, -1087751568, 1065712953) + + W(7, -1100319643, -1100023584, -1115246006, -1107982835) + + W(8, 1039709205, -1126266788, 1056538813, -1098763613) + W(9, 1017435581, 1022563437, 980175008, -1103966134) + + W(10, -1127567978, 1029398164, -1106793212, 1016414657) + + W(11, 1032114232, -1113653553, 1034308617, 1020870093); + WS(1060627383, -1111244297); + sum1 = W(0, -1124372048, -1124233202, -1115057922, -1126359548) + + W(1, -1135390133, 999175032, -1137033126, -1120904377) + W(2, 1041984816, 1023856878, 1037843884, 1013283185) + + W(3, -1106598024, -1109047521, -1108189319, -1127664786) + + W(4, -1098607926, -1097555068, -1092180401, 1035134080) + + W(5, 1055432750, 1050330552, 1046692025, 1039361415) + W(6, 1048999428, 1050454749, 1057072683, 1037566628) + + W(7, -1088869991, -1097603878, -1106138715, -1115643873) + + W(8, -1110297584, -1119853981, -1100337986, -1117600845) + + W(9, 1045634091, 1040799043, -1129123036, 1042168959) + + W(10, 1037179887, -1124350564, 1037306108, -1180500637) + + W(11, -1107300041, -1132131255, -1126029544, -1132202938); + sum2 = + W(0, 1027553654, -1128416633, -1120268362, 1026192109) + W(1, 1010812163, -1133939099, 1033310247, 1007338654) + + W(2, -1105419467, 1035214610, -1102082588, 1034096310) + + W(3, -1109637676, 1042926233, -1103557522, 1039202903) + W(4, -1141314538, 992160749, 1039698771, 1028842291) + + W(5, -1128101012, -1110346070, 1041169016, -1150310139) + + W(6, 1051401064, -1098506313, 1041311911, 1044707561) + W(7, 1040877432, -1107620104, 1044637450, -1101325278) + + W(8, 1061407816, 1048701957, 1065441970, -1108357660) + + W(9, -1080219839, -1089748519, -1106511250, -1094091750) + + W(10, -1086513034, 1023990179, -1083801169, 1027929117) + + W(11, 1058347144, 1054974548, 1012370076, 1054706164); + WS(1037089208, -1117319375); + sum1 = W(0, -1117307261, -1109658215, -1104716662, -1095157370) + + W(1, -1115283233, -1113906562, -1127634429, -1150522598) + + W(2, -1127081515, 1037266337, 1045840649, 1048384396) + W(3, 1042005149, 1026318614, 1035073906, -1129730364) + + W(4, -1090315640, -1103803502, -1108965493, -1089739445) + + W(5, -1098438340, -1090508193, -1096110967, -1090097982) + + W(6, 1055484319, 1036834781, 1059715066, 1050929076) + W(7, 1059599591, 1050537744, 1039184033, 1056916499) + + W(8, 1019801962, -1121027890, -1120952094, -1115632913) + + W(9, -1115851136, 1036887192, -1109698481, 994450682) + + W(10, -1140393804, -1122236391, 1007521585, -1115666157) + + W(11, 1038771059, -1114842616, 1040209394, 1023772594); + sum2 = W(0, -1096641595, -1086741683, -1073585699, -1071753138) + + W(1, -1078410756, -1096289506, -1145261363, 997353903) + W(2, 1051991389, 1059199896, 1069582799, 1070628328) + + W(3, 1067042424, 1055331220, 1015112751, 1024293693) + W(4, -1107135642, 1044105651, 1057042277, 1065961920) + + W(5, 1057516905, -1098611055, 1043415065, -1101981251) + + W(6, -1120991144, 1025236133, -1123268190, -1098135951) + + W(7, 1032298014, -1106950583, 1007537613, 1025642916) + + W(8, -1123228782, 1032269711, -1116194138, -1144560253) + + W(9, 1026966393, -1155415556, 992611856, 1011854094) + + W(10, -1119770484, 1020415755, -1119862022, 1033682735) + + W(11, -1113740644, 1022995083, -1144895945, -1130196038); + WS(-1089262391, 1050517969); + sum1 = W(0, -1112721887, 1040766980, -1116916633, -1109323658) + + W(1, 1043853249, -1102481809, 1025978335, -1134508350) + W(2, 956356282, 1032255106, -1115255965, 1053521294) + + W(3, -1109293354, 1024224759, 1036451442, 998852344) + W(4, 1041136081, -1099047838, 1050954387, -1090918435) + + W(5, 1049475859, 1046323061, -1129023222, -1126452038) + + W(6, -1112851037, -1136806893, 1047658201, -1090498606) + + W(7, -1111100085, -1101889105, -1112184834, 1037224108) + + W(8, -1118798326, 1044101949, -1099177336, 1048722524) + + W(9, 1044026608, -1109421665, 1044252857, -1113020120) + + W(10, 1033047322, -1110781188, 1022949658, 984126311) + + W(11, -1111349460, -1151794972, -1140137768, -1119083507); + sum2 = + W(0, -1120544497, 1042905493, 1045262161, -1104438232) + W(1, 1023630957, -1122372280, 1027866949, -1121491826) + + W(2, -1104964203, -1097077111, -1098230931, 1057982485) + + W(3, -1106247773, 1011970003, 1026340389, 1016978228) + W(4, 1056285145, -1118581415, -1079914544, 1057557770) + + W(5, 1048964135, 1020547274, -1113753028, 1034078959) + W(6, -1109256198, 1026029661, 1056552993, -1104081201) + + W(7, -1114436673, 1033190701, -1107460568, 1034442922) + + W(8, -1107706742, 1025674690, -1103396776, 1048351556) + + W(9, 1018818866, -1103728916, 1046941904, -1106194166) + + W(10, 1033408182, -1118988963, 1021102038, -1136298684) + + W(11, -1112938187, 1034200267, -1117493164, -1172844144); + WS(1055467886, -1126093527); + sum1 = + W(0, -1115675946, 1039731987, -1108949528, 1014043817) + W(1, 1025589529, -1102050167, 1035820699, -1114599603) + + W(2, 1037777366, -1126460340, -1145838757, 1049783617) + W(3, 991251120, 1042727635, -1123043835, 1024161077) + + W(4, 1043818615, -1103677379, 1060367897, -1102044139) + + W(5, -1109258124, 1049311757, -1116399889, 1046137806) + + W(6, -1096353162, 1012406341, -1088026493, -1095567535) + + W(7, 1051881528, -1095440907, 1032817677, -1116994158) + W(8, -1150025924, 1031349769, 1034695514, 1044285022) + + W(9, 1011738461, -1115570324, 1040150333, -1110796915) + + W(10, 1035905869, -1103947080, 1034985426, -1129716323) + + W(11, -1107675487, 1042590402, -1104125432, -1140552997); + sum2 = W(0, -1115939175, 1042939513, -1106984126, 1036071940) + + W(1, -1133154938, -1110872047, 1038271101, -1124388167) + + W(2, -1111463085, 1009748958, -1104760450, 1026404786) + + W(3, 1042746589, 1018643239, 1024219237, -1107044825) + W(4, 1026058510, -1101036999, 1046231331, 1060091622) + + W(5, -1109115274, 1042116887, -1104891113, -1137025366) + + W(6, 1015189774, 1048391804, -1101164712, 1059732347) + + W(7, -1090421446, -1092755101, 1018386748, -1112446842) + + W(8, -1116961377, 1019493983, 1013441892, -1113485706) + + W(9, -1102245299, -1104832958, 1043357489, -1123717753) + + W(10, 1003109525, -1128500291, -1127287103, 1036806652) + + W(11, -1110906499, 1042929180, -1108912836, 1034520620); + WS(1062208951, 1065603754); + sum1 = W(0, 1032466911, 1012820793, 1029562264, 1020285479) + W(1, 993753125, 1033268866, -1131051261, 1028903855) + + W(2, 1029341462, 1027274819, 997585115, 1040141401) + W(3, 1032063722, -1153074023, 1015103815, 1040294280) + + W(4, 1059167044, 1052950767, 1057349364, -1108524682) + W(5, 1049874906, 1048703978, 1050186639, 1053103700) + + W(6, -1090134924, -1094303727, -1085543304, -1080037127) + + W(7, -1095616607, -1112675170, -1111683969, -1091636059) + + W(8, -1133534677, 1027990706, 1035990447, 1037410929) + W(9, 1039846496, 1023735025, 1040888889, -1118753424) + + W(10, 987089513, 1032570772, -1136982833, -1125543221) + + W(11, 1015158861, -1145342854, 1031358985, 1022936933); + sum2 = W(0, 1035386648, 1023547272, 1025878508, 1026832668) + W(1, 1029828312, -1098240729, 1044150428, -1103643200) + + W(2, 1042887622, -1099060438, -1099619867, -1099104088) + + W(3, 1048769602, -1121505868, 1040524432, -1115369075) + + W(4, -1086410483, 1053956416, -1094814183, 1069089225) + W(5, 1044213412, 1035342184, 1040447750, 1049544904) + + W(6, -1090258329, -1112148135, -1093316197, 1060781490) + + W(7, -1091192206, -1105713821, -1089411157, 1058550641) + + W(8, 1060158548, 1050315819, 1046080958, -1103190742) + + W(9, -1105160931, -1084784918, 1058372334, -1080579858) + + W(10, -1113981975, -1099847621, 1046524888, -1112424275) + + W(11, 1051146364, 1019461112, 1028619520, 1040133188); + WS(-1078970012, -1070975772); + sum1 = W(0, 1023443287, 1008154783, 1008498119, 1032996598) + W(1, -1130007992, 1026985054, 1001505728, 1027285064) + + W(2, -1111813336, 1034628686, 1020510238, -1119594955) + + W(3, 1037186855, -1129455456, 1024916861, -1118416824) + + W(4, -1095332548, -1098370186, -1091304148, -1097391342) + + W(5, -1088889903, -1093273302, -1097878954, -1097408559) + + W(6, 1048282532, 1052880922, 1054365856, 1065196819) + W(7, 1037830473, 1040256647, 1042516535, 1044458956) + + W(8, 1036630695, -1109395910, 1031904252, -1105409007) + + W(9, 1022933044, 1030320779, -1131972409, 1037289397) + + W(10, 999582566, -1140663171, -1128098380, 1030941172) + + W(11, 1003729918, -1138808617, 1018809669, 1005961310); + sum2 = + W(0, -1114009537, 1035366037, -1111463643, -1119334122) + W(1, -1132366661, 1001109746, 1035022899, -1112054465) + + W(2, 1019032853, -1143037026, -1122373240, 1056620469) + + W(3, -1103744316, 1002774938, -1102405348, 1046894720) + W(4, 1024640816, -1101234014, 1068131945, 1075463933) + + W(5, -1075751794, -1072865378, 1052446989, 1019415611) + + W(6, 1029944441, -1105571142, 1044188353, -1093783941) + + W(7, 1050699227, -1098127223, 1028903402, -1119380340) + + W(8, -1146945026, 1031686749, -1128619453, 1046426762) + + W(9, -1109652943, -1131952529, -1123943246, 1017598049) + + W(10, -1136399129, 1018164069, -1125359481, 999713778) + + W(11, -1123073190, 1019524751, 983357578, 1013611461); + WS(-1137905088, -1096938393); + sum1 = W(0, -1127951952, -1139830244, -1108742721, 1019792636) + + W(1, 1018370522, 1020799581, 1021418002, -1148599498) + W(2, -1116133184, 1040763803, 1041221580, 1009683312) + + W(3, -1107181880, 1002965001, 1022849180, 1039287108) + + W(4, -1092624022, -1104553656, -1090149133, 1044078592) + + W(5, 1058802794, 1043495791, 1033578226, -1113073030) + W(6, 1045295113, 1046745657, 1057147883, -1112574564) + + W(7, -1085568476, -1103617227, 1032014262, 1022418564) + + W(8, 1023908704, -1103252327, -1103732914, -1127338650) + W(9, 1040869579, 991652540, 1013121207, 1011288682) + + W(10, 1031880695, 1040451684, 1026434167, 1007135764) + + W(11, 1014113348, -1125537591, 1016284007, -1116579933); + sum2 = + W(0, 1036469548, 1017461301, 1017339130, -1095997080) + W(1, -1099654066, -1105423964, -1113359567, -1131580992) + + W(2, -1097965159, -1101551098, -1094477168, 1035631286) + W(3, 1054988432, 1047389393, 1041958533, 1043362318) + + W(4, -1116680345, -1125992347, 1058922542, -1103956246) + + W(5, 1040393300, 1040203374, -1108890296, 1024609216) + W(6, 1001756270, -1115644020, 1035448965, -1105032669) + + W(7, 1039533461, -1122662741, -1130070269, 1025676040) + + W(8, 1032969412, -1124365977, 1040541302, -1112710338) + + W(9, 1017600743, -1114085496, -1134082186, 1015920174) + + W(10, -1131471357, 1016380025, 1001720584, -1115585175) + + W(11, 1001178742, 1001141734, 1010209588, -1122393401); + WS(1017525984, 1039916017); + sum1 = W(0, -1124853567, -1147061122, -1129462517, -1119594613) + + W(1, 999306018, -1156505185, 1012193026, 1010192066) + W(2, 1018048624, -1127291978, 1049183858, 1032566282) + + W(3, 1036307723, -1150114695, -1116656134, -1111959392) + + W(4, -1095739637, -1104996150, -1085471393, -1090223247) + + W(5, -1104470177, -1105492077, -1109848430, -1107232129) + + W(6, 1045916039, 1043181303, 1054258797, 1054999864) + W(7, -1108082389, 1055713893, -1134406033, 1049351719) + + W(8, 1035770156, 1038596810, 1048116160, 1024906308) + W(9, 1035021201, -1114305018, 1014226964, -1132413284) + + W(10, 1019449355, -1118699785, 1018729693, -1125493497) + + W(11, -1111613764, 1013642059, -1117195563, 990662981); + sum2 = + W(0, 1033095635, -1123818427, 1023960168, -1115456886) + W(1, -1130014725, -1134763238, 1020936831, 1030713030) + + W(2, -1113611213, 1038354785, -1136308834, 1031365710) + + W(3, -1121052824, 1037262635, -1129095405, 1030934712) + + W(4, -1120605005, 1043017802, 1016597499, -1105318511) + W(5, 1032279133, -1129541437, 1028982810, 969324748) + + W(6, 1010827478, 1005632845, 1067061797, 1065783254) + W(7, 1033387493, 1024758296, -1141733133, 973320358) + + W(8, -1096460028, -1090567932, -1078682340, -1094955795) + + W(9, -1114814318, 994121897, -1126300988, -1148320949) + W(10, 992544217, 1043858115, 1041505996, -1104546945) + + W(11, -1114466677, -1115137122, 1007094454, -1122716507); + WS(1033221560, 1063777383); + sum1 = W(0, 1019126833, 1022703059, 1034317754, -1141147507) + + W(1, 1023815261, -1145478905, -1139797991, 1017044691) + + W(2, -1112638616, -1107233655, -1107419652, 1017313945) + + W(3, 1030431247, 1043218964, 1030899327, 1042038278) + W(4, 1057738402, 1049316970, 1057780266, 1039925560) + + W(5, -1101232178, -1097249286, -1105363093, -1097041200) + + W(6, -1088696201, -1093677291, -1091493000, -1105678958) + + W(7, 1054033952, 1050643572, 1039373871, 1044902925) + W(8, 1044063732, 1018619290, 1042363102, -1123751101) + + W(9, -1103422471, -1099574259, -1109462943, -1143343447) + + W(10, -1130166589, -1127677637, -1141893947, -1141892011) + + W(11, 1028589441, 1032480102, 1000558881, -1115509735); + sum2 = W(0, -1130982538, 1021127838, 1013698904, -1227267307) + + W(1, -1180063220, -1130392938, -1129014326, 1032270489) + + W(2, -1106875761, 1011019608, 1024572751, 1039986653) + + W(3, -1113103478, -1104759550, 1035543741, -1129087757) + + W(4, 1056618334, 1055137831, 1048771047, -1073860127) + W(5, -1108123535, 1059126644, 1045481337, 1042704754) + + W(6, 1055977286, 1058761027, -1103617496, -1072531400) + W(7, 1052255088, 1063802283, 1048895081, 1016319978) + + W(8, -1102508839, -1115387544, -1128394642, 1043687089) + + W(9, -1126700500, -1109991236, 1031917163, -1121934766) + + W(10, 1020425242, 1013848532, 1016018634, -1112657573) + + W(11, 1035781641, -1125248586, -1126407949, 1030839801); + WS(-1091503470, -1126838795); + sum1 = W(0, -1124121237, -1113414670, 1028321829, -1111356688) + + W(1, 1026686479, -1121133649, -1120913068, -1126384695) + + W(2, -1134568843, -1126036384, 1033159265, 1037814911) + + W(3, 1031580097, 1040854417, -1106928812, 1027944427) + W(4, 1047693172, 1049117760, -1105247938, 1058889082) + + W(5, 1012762455, -1120516448, 1049438209, 1040171928) + + W(6, -1109412515, -1103447906, -1096437464, -1098797565) + + W(7, -1104719461, -1106636212, -1129886751, -1106734009) + + W(8, -1129997061, 1034526751, -1143270742, 1022658271) + + W(9, -1109175079, 1042645116, -1104071908, 1012750451) + + W(10, 1024589216, -1113770234, 1036105485, -1114645460) + + W(11, 1023714995, -1138248951, -1132081196, -1145462266); + sum2 = W(0, -1121971015, -1109269766, -1096046215, -1096349825) + + W(1, -1104467071, -1105457821, -1114558576, -1113812495) + + W(2, 1025966975, 1042653975, 1058579369, 1055073186) + W(3, 1043843774, 1043492169, -1138844670, 1035958317) + + W(4, -1127561691, 1038662008, -1095336395, -1156170871) + + W(5, 1054347264, -1116732629, 1043300107, 1025184897) + + W(6, 1032121603, -1104797770, 1041625968, -1098327149) + + W(7, -1090818470, 1028261711, -1117277405, -1130419167) + + W(8, -1118316871, 1018661176, -1131103951, 1035437895) + + W(9, 1042639369, -1115472379, -1115061521, -1130646919) + + W(10, -1147835451, 1023052754, -1129857431, -1114991229) + + W(11, 1011784142, -1157273063, 1024244091, -1148362739); + WS(1065968028, 1020949470); + sum1 = W(0, 1032158503, 1018701500, 1028916776, 1031923218) + W(1, -1143490705, 1005807263, 1028568398, 1026731038) + + W(2, 1043191673, 1020082981, 999481917, 1033906881) + W(3, -1123072324, 1012896153, 1015001550, 1035245559) + + W(4, 1050691771, 1043953282, 1047062836, 1032996498) + W(5, 1057457238, 1057445286, 1051530049, 1057446979) + + W(6, -1089572661, -1139338281, -1091936976, -1081715803) + + W(7, -1087112685, -1088032573, -1100585253, -1095410205) + + W(8, 983294118, 1041437600, 1042367115, 1040306653) + W(9, 1019100064, -1123572193, 1039280729, -1104642968) + + W(10, 1033006978, -1139648989, 1033705422, -1132542121) + + W(11, -1138694873, 1033839136, 1027376729, 1015844228); + sum2 = + W(0, -1107158830, -1114179967, -1106205598, -1112529879) + W(1, 1036191348, 1045048740, -1112261511, 1045813672) + + W(2, -1098284578, 1050997929, 1032837846, -1138556160) + + W(3, -1103494095, 1033058370, -1109765233, 1040601336) + W(4, 1059620359, -1120628180, 1056621385, 1064698181) + + W(5, 1049117565, -1087565422, 1050635491, -1087363324) + W(6, 1059101624, -1084072333, 1033838752, 1049137220) + + W(7, -1100194061, -1096659207, 1042760064, -1087988644) + + W(8, -1080819081, 1055292451, -1084530179, -1118657190) + + W(9, 1035375516, 1055648971, -1101810418, 1059720072) + W(10, 1041045600, 1041809552, -1150759871, 1036987884) + + W(11, 1040790470, -1098721116, 1028926760, -1118399816); + WS(-1079370588, -1069938997); + sum1 = W(0, -1107111607, 1031876694, -1108546779, 1007409349) + + W(1, -1119937056, -1110402417, 1034313646, -1115195974) + + W(2, 1024046068, 1040468568, -1110778478, 1041967908) + W(3, 1037532265, 1027170100, 1028424065, 1030920188) + + W(4, 1034755057, 1048995971, 1044693184, 1056322870) + W(5, 992363881, -1123931633, 1041273350, 1034999584) + + W(6, -1106911318, -1106422470, -1092550445, -1089848952) + + W(7, -1115839591, -1098485404, 1027494176, -1101830589) + + W(8, 1032333322, 1043602212, -1124221431, 1044063648) + W(9, -1137125241, 1015794334, 1038675968, 1024639630) + + W(10, -1122522177, -1133546645, -1123999146, -1129245296) + + W(11, -1121719236, 1009488187, -1146748727, -1115284904); + sum2 = W(0, -1116010697, 1039856206, 1025692273, 1047723216) + W(1, 1054500142, 1045538686, 1020883249, 1037455508) + + W(2, 1044972838, -1094195694, 1041658757, -1090167008) + + W(3, -1087657974, -1114739465, -1103808398, 1025495534) + + W(4, -1107525170, 1054041965, -1095593913, 1054432792) + + W(5, 1048962216, -1099510609, 1041852115, -1112321007) + + W(6, 1025111869, -1105218154, 1047241736, -1103340827) + + W(7, -1105033641, 1043141319, -1104502432, 1020729805) + + W(8, -1115770744, 1021276689, -1113081166, 1030984707) + W(9, 1026520309, 1008623973, 1031142833, 1034321699) + + W(10, 1027404867, -1122132060, 1036997767, -1109230755) + + W(11, 1000189578, 949401483, -1121181330, 989367833); + WS(1051966318, -1113875142); + sum1 = + W(0, 1035883793, -1111332346, 1045917727, 1006041029) + W(1, -1129901007, 1042663274, -1119690564, 1014471632) + + W(2, 1040145278, -1135399329, 1017502534, -1096989371) + W(3, 1043615094, 1025318012, -1113005508, 1043398529) + + W(4, 1051696565, 1051177307, 1054251573, 1061428120) + W(5, 1037934795, 1050890887, 1046007992, 1049249377) + + W(6, -1091560996, -1096861164, -1091248610, -1087158964) + + W(7, -1098884432, -1096301045, -1093706893, -1093100168) + + W(8, -1133003627, -1117751383, -1178468754, -1126263439) + + W(9, -1106662601, 1017933485, 1038615540, -1116546293) + + W(10, -1126824689, 1039745286, 1020593891, 1008594943) + + W(11, 1037849734, 1035118155, -1114145271, 1028529816); + sum2 = + W(0, -1120038729, 1032134121, -1112337284, 1014623636) + W(1, 1015804972, -1111055788, 1025605092, -1118260382) + + W(2, 995641288, 1034430176, 1011533369, 1032554184) + W(3, -1114631943, 1037430467, 1022278663, 1020696197) + + W(4, 1035481768, -1099098436, 960227969, -1122840036) + W(5, -1123247827, -1115890560, 1022436109, 1010081074) + + W(6, 1011267102, -1099374838, 1055781369, 1046041384) + W(7, 1055178264, -1124103196, 1039303021, -1116158530) + + W(8, 1042749492, 1055985885, 1067101038, 1069533386) + W(9, 1065618150, 1058921177, -1106092627, -1118372702) + + W(10, 1026093812, -1113501317, -1095011910, -1086401299) + + W(11, -1079450114, -1075689828, -1082339747, -1095588850); + WS(-1087762743, -1114043214); + sum1 = W(0, 1028903325, -1111481047, 1010874850, -1117223539) + + W(1, -1114811697, 1027957028, -1115120063, 1025677743) + W(2, 1040067739, 981566559, 1032756947, -1130847267) + + W(3, -1123313460, -1113871531, -1126050567, -1113266015) + + W(4, 1045936105, 1050679259, 1052053924, 1055533279) + W(5, 1059392176, 1053658934, 1039338221, 1051823712) + + W(6, -1095653187, -1096161947, -1090950614, -1084280212) + + W(7, -1097768267, 1005713692, -1120312930, -1118568631) + + W(8, -1122706133, 1026181394, 1034645023, 1030922562) + + W(9, 1030340896, -1102292436, -1107839810, -1112577800) + + W(10, 1029496147, -1149217596, -1132850610, -1154701224) + + W(11, -1113406339, 1020850850, 1017761874, -1117302546); + sum2 = W(0, 1001016005, -1125241033, 1018218852, -1111720092) + + W(1, 1015413875, -1140722987, -1138333196, 1010568725) + + W(2, -1150454109, -1111980552, 1033028659, 1037513337) + + W(3, -1109924763, -1144232926, -1117260807, -1113652225) + + W(4, 1001531747, 1031575403, 1016765132, -1085543392) + W(5, 1053354837, 1054900052, 1041131548, 1045881209) + + W(6, 983352173, -1103460210, -1103428024, -1076627395) + + W(7, 1074369634, 1051272122, -1098379064, -1098914955) + + W(8, -1106746798, 1036252996, -1128273041, 1046375720) + + W(9, 1055587240, -1094917738, -1106193524, -1098562564) + + W(10, -1157019725, 1023495862, 1033057508, -1112606370) + + W(11, 1019967806, -1107285947, 1012881631, 1031041657); + WS(-1090517687, -1095444575); + sum1 = W(0, 1025742446, -1111454559, 1040962578, 1035513890) + W(1, 1025779251, 1041617005, -1106922364, 1024699903) + + W(2, 1032739299, -1106175885, -1110724347, -1118213794) + + W(3, -1103218657, 1040368314, -1105137317, 1024907982) + W(4, 1054789967, 1031326631, 1052551894, 1059778428) + + W(5, 1045793474, 1051667296, 1040121301, 1052247674) + + W(6, -1095546301, -1101847659, -1090925684, -1089262225) + + W(7, -1096439373, -1099734269, -1104705805, -1097282993) + + W(8, 1033023507, -1107099097, 1036045063, 1035395209) + + W(9, -1101529212, 1050132438, -1102593253, 1016797696) + + W(10, 1028563508, -1117676152, 1036888990, 1007580971) + + W(11, 949923192, 1037985265, -1113513373, 1026673059); + sum2 = W(0, 1052208474, 1035064858, 1064043431, 1055780907) + W(1, 1048070172, 1057138231, -1100953655, 1042120650) + + W(2, -1095438834, -1104348375, -1079843968, -1084432555) + + W(3, -1106077690, -1086965748, 1048708391, -1106942577) + + W(4, 1040952518, -1095663473, 1060639651, 1054823629) + + W(5, -1098542607, 1057295667, -1095587456, 1035260440) + + W(6, -1101013864, 1048824145, -1097227129, -1105218176) + + W(7, 1048082838, -1106201667, 1042233875, -1136871967) + + W(8, 1045777263, -1099869349, 1049789703, -1118379111) + + W(9, -1102784042, 1043327775, -1107731485, 1006771796) + + W(10, -1125550479, -1127115193, 1024044553, 972648308) + + W(11, 1021849499, 1016298873, -1116706874, 1030896448); + WS(-1115614648, 1042425791); + sum1 = + W(0, -1120207204, -1124027509, -1144413692, -1105701711) + + W(1, 1014568102, -1120774251, 1029691462, -1119274774) + W(2, -1115327070, 1016926292, 1038910502, 1037845356) + + W(3, 1042236823, -1105850383, -1134596907, -1117126533) + + W(4, -1093340989, -1096731140, -1096554678, -1085064420) + + W(5, -1100005974, -1098496607, -1135406214, -1098152617) + + W(6, 1053481388, 1053311332, 1057006919, 1049566369) + W(7, 1056376458, 1048579325, 1042582346, 1043887136) + + W(8, 1030984860, -1116407641, 1038600290, 1043585442) + W(9, 1018535375, -1122768840, -1120431498, 1035562443) + + W(10, -1113874973, 1010793530, -1116271283, -1114795809) + + W(11, 992031960, -1130485552, 1029193328, -1140222054); + sum2 = W(0, -1127240204, 1024332524, -1145474239, -1111478947) + + W(1, 1029335642, -1112394176, 1042612281, -1152251822) + + W(2, -1079712608, -1082891577, -1081879450, 1047818786) + + W(3, 1066272635, 1064292926, 1048742105, 1063028070) + W(4, 1066527571, 1063276166, 1065092416, 1033988518) + + W(5, -1081647719, -1085935960, -1090025738, -1088819358) + + W(6, 1049757184, 1025750476, 1045572932, -1093152038) + + W(7, -1129453741, -1107360776, 1042389166, -1095668919) + + W(8, -1122509441, -1127423195, -1127801782, -1126799575) + + W(9, 1033523902, -1109456127, 1029374360, -1163717821) + + W(10, -1120167031, 976948665, -1123809997, 1030959654) + + W(11, 995040830, 1026103880, -1116538748, 1033003323); + WS(1005558656, -1134386388); + + return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); +} + +shared float inp[555]; + +#define CURRENT_PASS 2 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define temp_tex(pos) (float(texture(temp, pos).x)) +static const float2 temp_size = float2(GetInputSize().x * 1, GetInputSize().y * 2); +static const float2 temp_pt = float2(1.0 / (temp_size.x), 1.0 / (temp_size.y)); + +#define HOOKED_tex(pos) temp_tex(pos) +#define HOOKED_size temp_size +#define HOOKED_pt temp_pt + +void Pass2(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 555; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 15, y = (uint)id % 15; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (2)) + 0.5, float(group_base.y + y - (3)) + 0.5)).x; + } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[12]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 4]; + samples[1][1] = inp[local_pos + 5]; + samples[1][2] = inp[local_pos + 6]; + samples[1][3] = inp[local_pos + 7]; + samples[2][0] = inp[local_pos + 15]; + samples[2][1] = inp[local_pos + 16]; + samples[2][2] = inp[local_pos + 17]; + samples[2][3] = inp[local_pos + 18]; + samples[3][0] = inp[local_pos + 19]; + samples[3][1] = inp[local_pos + 20]; + samples[3][2] = inp[local_pos + 21]; + samples[3][3] = inp[local_pos + 22]; + samples[4][0] = inp[local_pos + 30]; + samples[4][1] = inp[local_pos + 31]; + samples[4][2] = inp[local_pos + 32]; + samples[4][3] = inp[local_pos + 33]; + samples[5][0] = inp[local_pos + 34]; + samples[5][1] = inp[local_pos + 35]; + samples[5][2] = inp[local_pos + 36]; + samples[5][3] = inp[local_pos + 37]; + samples[6][0] = inp[local_pos + 45]; + samples[6][1] = inp[local_pos + 46]; + samples[6][2] = inp[local_pos + 47]; + samples[6][3] = inp[local_pos + 48]; + samples[7][0] = inp[local_pos + 49]; + samples[7][1] = inp[local_pos + 50]; + samples[7][2] = inp[local_pos + 51]; + samples[7][3] = inp[local_pos + 52]; + samples[8][0] = inp[local_pos + 60]; + samples[8][1] = inp[local_pos + 61]; + samples[8][2] = inp[local_pos + 62]; + samples[8][3] = inp[local_pos + 63]; + samples[9][0] = inp[local_pos + 64]; + samples[9][1] = inp[local_pos + 65]; + samples[9][2] = inp[local_pos + 66]; + samples[9][3] = inp[local_pos + 67]; + samples[10][0] = inp[local_pos + 75]; + samples[10][1] = inp[local_pos + 76]; + samples[10][2] = inp[local_pos + 77]; + samples[10][3] = inp[local_pos + 78]; + samples[11][0] = inp[local_pos + 79]; + samples[11][1] = inp[local_pos + 80]; + samples[11][2] = inp[local_pos + 81]; + samples[11][3] = inp[local_pos + 82]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 33]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1) + ivec2(1, 0), ret); +} diff --git a/src/Effects/NNEDI3/NNEDI3_nns16_win8x4.hlsl b/src/Effects/NNEDI3/NNEDI3_nns16_win8x4.hlsl index 3414e5493..6566bffff 100644 --- a/src/Effects/NNEDI3/NNEDI3_nns16_win8x4.hlsl +++ b/src/Effects/NNEDI3/NNEDI3_nns16_win8x4.hlsl @@ -1,206 +1,732 @@ -// nnedi3-nns16-win8x4 -// 移植自 https://github.com/bjin/mpv-prescalers/blob/cc02ed95c1fe05b72bc21d41257c4c085e6e409b/compute/nnedi3-nns16-win8x4.hook -// 有半像素的偏移 +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: nnedi3.py --nns 16 --win 8x4 --use-compute-shader --use-magpie +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 - +//!VERSION 4 +//!SORT_NAME NNEDI3_016_4 //!TEXTURE Texture2D INPUT; //!SAMPLER //!FILTER POINT -SamplerState sam; +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 1 * 2 +//!HEIGHT INPUT_HEIGHT * 2 * 1 +Texture2D OUTPUT; //!SAMPLER //!FILTER LINEAR -SamplerState sam1; +SamplerState sam_INPUT_LINEAR; //!TEXTURE -//!WIDTH INPUT_WIDTH -//!HEIGHT INPUT_HEIGHT * 2 //!FORMAT R16_FLOAT -Texture2D tex1; +//!WIDTH INPUT_WIDTH * 1 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D temp; -//!COMMON +//!SAMPLER +//!FILTER POINT +SamplerState sam_temp; -#define T(x) asfloat(x) -#define W(i,w0,w1,w2,w3) dot(samples[i],float4(T(w0),T(w1),T(w2),T(w3))) -#define WS(w0,w1) sum1 = exp(sum1 * mstd2 + T(w0)); sum2 = sum2 * mstd2 + T(w1); wsum += sum1; vsum += sum1*(sum2/(1.0+abs(sum2))) +//!COMMON +#include "prescalers.hlsli" +#define LAST_PASS 2 //!PASS 1 -//!DESC double_y +//!DESC NNEDI3 (double_y, nns16, win8x4) //!IN INPUT -//!OUT tex1 -//!BLOCK_SIZE 32,16 -//!NUM_THREADS 32,8 - - -float nnedi3(float4 samples[8]) { +//!OUT temp +//!BLOCK_SIZE 32, 16 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[8]) { float sum = 0.0, sumsq = 0.0; - [unroll] - for (int i = 0; i < 8; i++) { - sum += dot(samples[i], 1.0f); + [unroll] for (int i = 0; i < 8; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); sumsq += dot(samples[i], samples[i]); } - float mstd0 = sum / 32.0; float mstd1 = sumsq / 32.0 - mstd0 * mstd0; - // 不能使用 lerp,否则结果可能为 nan - float mstd2 = mstd1 >= 1.192092896e-7 ? rsqrt(mstd1) : 0.0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); mstd1 *= mstd2; - float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = W(0, -1123354974, -1112248839, 1046299686, -1143613552) + + W(1, -1118620174, 1024662558, 1028038478, -1129268360) + + W(2, 1016130204, -1087068557, 1063313277, -1103342192) + + W(3, -1103968288, 1048182784, 1047279381, -1115088511) + + W(4, -1101453425, 1059583965, -1088182320, 1003350800) + + W(5, -1117908518, -1119323982, 1034186247, -1134684248) + + W(6, -1122284590, 1027638054, -1124394588, -1111377363) + + W(7, -1122818124, -1137723992, 978245507, 1028117438); + sum2 = W(0, -1162931039, -1131063526, 1029801649, -1117642655) + + W(1, -1136248556, -1131086728, 1031011705, -1128864654) + + W(2, -1115594515, -1128443230, 1042762789, -1107118398) + + W(3, -1119907402, 1044675527, 1050674207, -1113986381) + + W(4, 1022791334, -1107588397, 1009001220, -1186206458) + + W(5, 1017500018, -1111169922, -1112569685, 1017255694) + + W(6, -1156766128, -1125594766, -1148613464, 993928432) + + W(7, 1014782692, -1135599628, -1114139175, 1007622876); + WS(1038828992, 1041685264); + sum1 = W(0, -1114329248, 1049950910, -1097681183, 1028668144) + W(1, 995958527, 1027336960, -1107326552, 1025858258) + + W(2, -1117673776, 1060640651, -1085831405, 1033402064) + + W(3, 1034401008, 1045782072, -1105157973, -1122828000) + + W(4, 1038612842, -1098159517, 1053136924, -1110558370) + + W(5, 1035088196, -1106507532, 1032016120, -1113173980) + + W(6, 1008781376, -1124000392, 1023707152, 1012109856) + + W(7, 1029875310, -1105439902, 1034119968, -1114749520); + sum2 = + W(0, 1031315360, -1099468189, -1112139926, 1036663822) + W(1, -1131767489, -1140834082, 1024287080, -1122285462) + + W(2, 1023637252, -1100127579, -1117241706, 1038018354) + + W(3, -1107869385, 1052854494, 1052996200, -1112496415) + + W(4, -1107666272, 1034036134, 1027811452, -1110479054) + + W(5, -1117110288, 1024451620, 1027157968, -1112615559) + W(6, -1124350185, 1003450083, -1131082337, 998992195) + + W(7, -1110538107, 1041131277, 1035032776, -1106762474); + WS(-1086074680, 1053637716); + sum1 = W(0, -1121345387, 1042002951, -1113042450, -1121398619) + + W(1, -1148805338, -1165378922, -1115297518, 991217235) + + W(2, -1136570733, 1052460699, -1107443934, -1117268427) + + W(3, 1049266593, -1094571489, -1098765182, 1036113926) + + W(4, 1027081787, -1124281856, 1043313411, -1136658365) + + W(5, -1133439181, 1040734807, 1006695533, -1112513138) + + W(6, -1158465386, -1121708851, 1016359031, 1021173351) + + W(7, -1120818857, 1035650578, 1027853163, -1106476275); + sum2 = W(0, 1026517575, -1170492850, -1138816415, -1143472678) + + W(1, 1017334370, 1003954710, -1132363566, 998846550) + + W(2, 1051558711, -1096673587, -1136175651, -1124275402) + + W(3, 1071692777, -1077357700, -1098960792, 1018703670) + + W(4, 1049822619, -1098179385, -1116986501, 1007812651) + W(5, 1020207734, 996694924, 1003290486, 1007766851) + + W(6, 1022251878, -1122577241, -1141894102, 1009415395) + + W(7, 1019995718, 1015494226, -1126828734, -1163222937); + WS(1051521136, 1027207116); + sum1 = + W(0, -1122694020, 1010830545, -1124291704, 1018062184) + W(1, -1121133108, -1124202632, 1037913146, -1116091286) + + W(2, -1102175837, 1057246783, -1093542759, 1041281977) + W(3, -1116351908, 1026322980, 982577970, -1125394504) + + W(4, 1045518980, -1089509425, 1055793637, 1008755233) + W(5, 1009393969, 1025178484, -1118947636, -1127575032) + + W(6, 1008379217, -1117338572, 1001093793, 1015898776) + W(7, 1015772516, 1009646833, 1001810977, -1121163492); + sum2 = W(0, -1137495011, -1135527491, 1027730022, -1118108263) + + W(1, 1013616911, -1123650952, 1024465134, -1128775579) + + W(2, -1135578111, 1013443151, 1049128967, -1098008683) + + W(3, 1029346938, -1114797945, 1068130737, -1080443718) + + W(4, 1017473747, -1122100892, 1046423571, -1101482344) + + W(5, 1012413655, -1128721387, -1143058109, -1137148015) + + W(6, -1133405571, -1166794345, 1020545683, -1128178767) + + W(7, 1008139351, -1156685818, -1126785325, 991435034); + WS(1057767608, -1132080751); + sum1 = W(0, 1026028453, 1025766741, 1035118319, 1012106581) + W(1, 1026017621, -1135552917, 1040474693, -1138611630) + + W(2, -1117947285, 1051769667, -1111744027, 1030333189) + + W(3, 1048679017, -1083959172, -1084413328, 1045191121) + + W(4, 1025261389, -1120826122, 1049618505, -1122181545) + + W(5, 1011196341, 1045191525, -1110336171, 1030480605) + W(6, 1015828970, 1028389741, 1028257397, 1027514349) + + W(7, 1025013027, 1039505775, -1123719333, 1020294666); + sum2 = W(0, 1017587161, -1101123140, 1040188371, 988296658) + W(1, 1028118553, -1103020887, 1022642341, 1010063898) + + W(2, 1008167722, -1099714612, 1039093756, 1026403646) + W(3, 1005112948, 1049070164, 1046164698, 1033545355) + + W(4, -1125344655, 1032013714, -1111525569, 1002132020) + + W(5, 1015776789, 1022049457, -1098832696, 1037334715) + + W(6, -1148301500, 1009340114, -1115917000, -1139728254) + + W(7, -1138850406, -1167693540, -1103378287, 1035581889); + WS(-1099372256, -1088618788); + sum1 = W(0, -1112538182, 1048693927, -1112344546, -1109099742) + + W(1, -1113349022, 1033711782, -1129092599, -1110127398) + + W(2, -1103996671, 1064716592, -1086749016, 1032699126) + + W(3, 1024020908, -1143605597, 1044926535, -1121424940) + + W(4, 1046614908, -1085173359, 1062252083, -1130166943) + + W(5, -1111225386, 1004694493, 1040479887, -1106709441) + + W(6, -1110537326, -1108087402, 1034104622, -1120726228) + + W(7, -1114146165, -1138402062, 1042110371, -1106064827); + sum2 = W(0, 987083788, 1013472954, -1120418118, 979955865) + + W(1, -1144106823, -1131186779, -1122269098, -1163904780) + + W(2, -1120467381, -1139561796, 1038342084, -1115615181) + + W(3, -1121977305, 1044091298, 1042996066, -1127292875) + + W(4, -1118651341, 1038343490, -1118476220, -1123141745) + + W(5, -1162389292, -1115306287, -1128689408, 1014320394) + + W(6, -1152635694, -1155962630, -1132569906, -1135582470) + + W(7, 964510307, -1117365756, -1141833923, 1008840046); + WS(1041282784, 1044242623); + sum1 = W(0, -1119885764, -1171512555, 1003864029, 1025494836) + + W(1, -1119816052, -1121861252, 1040963149, -1113504879) + + W(2, -1100880653, 1057266723, -1094412795, 1043843337) + + W(3, -1113812594, 1010135439, -1118004569, -1125989575) + + W(4, 1046531310, -1089952515, 1056310444, -1156936827) + + W(5, 1015358999, 1031135156, -1114099002, -1122714492) + + W(6, 1005085853, -1115226950, 1015234855, 1003362397) + W(7, 1021011107, 1003139037, 992693307, -1120612644); + sum2 = + W(0, 1005317381, -1142619324, -1126266146, 1026462555) + W(1, -1143827754, 1012902153, -1128784654, 1020893616) + + W(2, 1019060164, -1114788024, -1094218173, 1054132458) + + W(3, 1009279342, -1098688460, -1078812823, 1070492026) + + W(4, 1014092605, -1120377499, -1099532818, 1048935725) + W(5, -1131000233, 1017453102, 1007638067, 1011358224) + + W(6, 1012779564, -1139793504, -1130333980, 1015734963) + + W(7, -1137528453, -1147729078, 1018177647, 987943782); + WS(1046635232, 1024078131); + sum1 = W(0, 1002735212, 1035063871, -1097977761, 1040314319) + W(1, 1025138813, 1034039879, -1105608655, 1035664624) + + W(2, 1017042555, 1044122447, -1094991056, 1038536855) + + W(3, -1132524982, -1110416695, 1051547730, -1114843703) + + W(4, 1031803657, -1092481954, 1050188814, 1003107468) + W(5, 1033606155, -1094320024, 1047410847, 1019470987) + + W(6, 1021596219, -1107502027, 1031346589, 1021345835) + + W(7, 1015508823, -1103391009, 1046101811, -1136683190); + sum2 = + W(0, -1096475926, 1044036812, 1052862983, -1106234474) + W(1, -1112281069, -1112231286, 1024115789, -1121785528) + + W(2, -1116645717, -1111398905, 1051331710, -1130292776) + + W(3, 1041647377, -1096068583, 1038036111, 1037359643) + W(4, -1113263240, 1026411348, 1042458641, -1111704128) + + W(5, 1023473494, -1114320784, 1028002558, -1123406807) + + W(6, -1117017643, -1138574198, 1037890580, -1109714921) + + W(7, 1039764966, -1104710548, -1106844581, 1041123403); + WS(-1088554040, -1076674880); + sum1 = + W(0, 1026292820, -1132973070, -1144171612, -1130131975) + W(1, 1016736263, 1034501898, -1110973538, 1028857234) + + W(2, 1042339025, -1089525132, 1052671191, -1108906970) + + W(3, -1110236986, 1037427962, -1123890785, -1112145786) + + W(4, -1103961368, 1056478885, -1092344862, 1002874044) + W(5, 1016313655, -1118983748, 1041641985, 1025897228) + + W(6, -1151588920, 1038469390, 1010979982, -1130905399) + + W(7, 1014755782, -1123320716, 1017396903, 1033705562); + sum2 = W(0, 1013915195, -1133182691, -1127318198, 1020584890) + + W(1, 1007730851, 1024414743, -1121307593, 1005058566) + W(2, 981970521, -1111248658, 1035588225, -1124411850) + + W(3, 1028189234, 1040952978, 1057294107, 1029625115) + + W(4, -1121038101, -1109339192, -1107404728, 1026110889) + + W(5, -1142484934, -1094377458, 1024397525, 1023925523) + + W(6, -1146368902, -1116592821, -1118541421, -1140327971) + + W(7, 1010322539, -1112421528, 1019759378, -1199698720); + WS(1063581112, 1015292283); + sum1 = + W(0, -1123806598, -1125096044, 1046804719, -1117498166) + W(1, -1124445804, 1037634467, 1028314614, 1006823135) + + W(2, 1036776315, -1083793455, 1064148787, -1106689849) + + W(3, -1112186771, -1098422117, 1034155462, 1004978479) + + W(4, -1102837698, 1058965073, -1089226130, 1033810693) + + W(5, -1117642958, -1106625757, 1037373467, 1029436414) + W(6, -1137018200, 1036181095, 994321759, -1119765454) + + W(7, 1010580432, -1127761788, 1021285644, 1034713459); + sum2 = W(0, -1127012521, -1110373665, -1121983257, 1021812843) + + W(1, -1129458054, -1122115974, -1121551577, 1015201109) + + W(2, -1134632819, -1118435057, -1107711610, 1039413537) + + W(3, -1113739078, 1041258512, 1043546644, -1127386873) + + W(4, -1106078947, 1025961773, 1048226293, -1110385416) + + W(5, -1115241196, 1041055451, -1131486243, -1135801459) + + W(6, -1122814807, 1025056413, -1139476701, -1132245806) + + W(7, -1119046895, 1029845331, 1018415015, -1140149017); + WS(-1109010880, -1087548956); + sum1 = W(0, 1034947768, -1095012676, 1046023882, 1029737824) + W(1, 1034343312, -1102610188, 1039446704, 1025692706) + + W(2, 1016751552, -1096454908, 1042564604, 1038373096) + + W(3, 1019661856, -1091443170, -1105694067, 1039271048) + + W(4, -1126501287, -1131030249, 1044246468, 1012879825) + + W(5, 1017025648, 1042942296, -1103700296, 1041317114) + W(6, 1030724160, 1019936112, -1141422594, 1029263800) + + W(7, -1140792121, 1024647464, -1107855416, 1041193844); + sum2 = + W(0, 1034034732, -1107522705, -1105460279, 1021740679) + W(1, -1113997103, -1121503695, 1038975878, -1112744336) + + W(2, 1028771217, -1114143244, 1032873918, -1121564954) + W(3, 1025456143, -1105773446, 1059420344, 1024971971) + + W(4, 1035315492, -1109746606, 1040681265, -1122379806) + + W(5, -1102403849, -1106040358, 1046039582, -1106873869) + + W(6, 1018212015, -1106459627, 1026290649, -1130313815) + + W(7, -1099438501, 1039219872, 1046943722, -1105420350); + WS(-1086299832, -1077288694); + sum1 = W(0, 1021716686, -1099039878, -1111509136, 1039618828) + + W(1, -1132921948, -1108540692, 1021468846, -1131678690) + + W(2, -1113901292, -1158126306, -1096197083, 1041516082) + + W(3, -1108835908, 1055092577, 1062013047, -1118733319) + + W(4, 1023078294, -1089051407, 1050708993, -1122936235) + W(5, 965138311, -1113759276, 1022391342, 1015065790) + + W(6, 998651320, -1107695832, -1133490396, 997649137) + W(7, -1130194922, -1113503632, 991635057, 1023538631); + sum2 = W(0, -1133976495, 1035891239, -1130801609, -1113698362) + + W(1, 1027343155, 1030599513, -1108453664, 1016406968) + + W(2, -1149877867, 1037590422, 1012747883, -1108226898) + + W(3, -1119506980, 1054189655, -1119322812, -1120928356) + + W(4, -1126385541, 1041308688, -1107379808, 1016225738) + + W(5, 1016526837, -1112736561, -1119223720, 988482485) + W(6, 994153115, 1004824957, -1116360142, 1018050885) + + W(7, -1140785051, -1120347934, -1129452107, -1117792638); + WS(-1113279936, 1066223903); + sum1 = W(0, -1128171420, 1040261344, -1112013315, -1123695998) + + W(1, -1141738481, -1140107833, -1116929726, -1154978689) + + W(2, -1138940153, 1050703688, -1108200895, -1123177006) + + W(3, 1044160156, -1100167260, -1100730273, 1034288823) + + W(4, 1020686276, -1130335589, 1040782300, -1141423761) + + W(5, -1129655596, 1035637471, 1024316286, -1114187043) + W(6, 964173357, -1124525100, 1014134393, 1013984857) + + W(7, -1123239900, 1032644739, 1029624526, -1108229911); + sum2 = W(0, -1115606620, 1021458196, 1009639320, -1131253088) + + W(1, -1125272644, 1017345212, 1016051020, -1143902384) + + W(2, -1099614716, 1047257730, -1120838650, 1020803060) + + W(3, -1080575150, 1068148121, -1113655261, 1032085971) + + W(4, -1102155153, 1044966894, -1132238288, 1016311348) + + W(5, -1122847678, 1026244022, -1130782536, -1137376840) + + W(6, -1123394906, 1017049220, 967940860, -1137115752) + + W(7, -1129056732, 1010161976, 1004223696, -1136984808); + WS(1060545080, -1126581603); + sum1 = + W(0, 1032630360, -1112268976, 1045186906, -1125010622) + W(1, 1037657648, -1128752350, 1032285712, 1029508223) + + W(2, 1043836232, -1090205186, 1053340438, -1108078856) + W(3, 1037448680, 1048595306, -1094666759, 1041691860) + + W(4, 976149203, 1057651571, -1082657749, 1042698525) + W(5, 1031833596, 1035187792, -1092127852, 1040118132) + + W(6, 1031675647, 1034806588, -1104761760, 1033087420) + W(7, 1025282125, 1043419290, -1096441814, 1034587656); + sum2 = + W(0, -1123698886, 1034075649, 998149095, -1113635181) + W(1, -1126365381, 1026991402, -1118780236, -1168196508) + + W(2, -1135914762, 1019253181, 1023543366, -1114469118) + + W(3, -1121651762, 1047572688, 1038479879, -1145545780) + W(4, -1118625490, 1035108181, -1114677625, 992781287) + + W(5, -1122087574, -1115886918, 1011684618, -1139655050) + + W(6, -1147908244, 1016718341, -1132109957, -1142844852) + + W(7, -1134045690, -1117034488, -1137057610, 1007905050); + WS(-1083899832, -1105526146); + sum1 = W(0, 1026357515, -1119744955, -1117075907, -1111407198) + + W(1, -1139718894, -1125720471, -1106102943, -1152407445) + + W(2, 1044187583, -1092285679, 1048719011, -1107209883) + + W(3, -1105573131, 1062437883, 1052836221, -1107292779) + + W(4, -1104526300, 1058460257, -1089717563, -1122559055) + + W(5, -1119529939, 1022150135, -1123085499, -1119739267) + + W(6, -1125768375, 1033366698, -1114009838, -1119196243) + + W(7, -1132776678, 1009731342, -1112611206, -1129505495); + sum2 = W(0, -1110807022, 1025172792, 1033543849, -1123816828) + + W(1, -1129400032, -1117035240, 999654946, -1144812946) + + W(2, -1105612607, 1035443403, 1039345667, -1120747576) + + W(3, -1123619892, -1135427545, 1053020794, -1113498942) + + W(4, -1131262448, -1111010692, 1047843748, -1113301822) + + W(5, 1016529300, -1115955576, -1135856481, -1146605522) + + W(6, -1129444600, -1117326476, 1022819536, -1119691028) + + W(7, -1136239801, -1121250556, 998047364, -1135792457); + WS(-1107513792, 1064663354); + sum1 = W(0, 1030862455, -1113532308, 1032378968, -1123071015) + + W(1, -1161118946, 1021510766, -1127591630, 1009770420) + + W(2, 1040244826, -1091621085, 1051734861, -1107582956) + + W(3, -1104300038, 1046262406, 1034822530, -1108820108) + + W(4, -1102940181, 1054782000, -1095483267, -1125175670) + + W(5, -1135077628, 1019068110, 1031948820, 1025488559) + + W(6, -1135539484, 1036941280, -1172984259, -1126076542) + + W(7, 1011863892, -1128724830, -1120336759, 1036426604); + sum2 = + W(0, -1135206239, -1140752647, 1022777359, 974924014) + W(1, -1139065871, -1123380440, 1021581075, -1133276463) + + W(2, 1026230428, 988696695, -1122295168, 1029689087) + W(3, 1025917606, -1092786651, -1085937537, -1140169471) + + W(4, 1027050280, 1049996339, 1032573953, -1135329695) + W(5, 1013849783, 1057784826, -1130048007, -1124883951) + + W(6, 1016077019, 1033822297, 1032545188, 1011238415) + W(7, -1127829351, 1034470972, -1137094527, 1001568686); + WS(1058918200, -1121082995); - sum1 = W(0, -1123354974, -1112248839, 1046299686, -1143613552) + W(1, -1118620174, 1024662558, 1028038478, -1129268360) + W(2, 1016130204, -1087068557, 1063313277, -1103342192) + W(3, -1103968288, 1048182784, 1047279381, -1115088511) + W(4, -1101453425, 1059583965, -1088182320, 1003350800) + W(5, -1117908518, -1119323982, 1034186247, -1134684248) + W(6, -1122284590, 1027638054, -1124394588, -1111377363) + W(7, -1122818124, -1137723992, 978245507, 1028117438); sum2 = W(0, -1162931039, -1131063526, 1029801649, -1117642655) + W(1, -1136248556, -1131086728, 1031011705, -1128864654) + W(2, -1115594515, -1128443230, 1042762789, -1107118398) + W(3, -1119907402, 1044675527, 1050674207, -1113986381) + W(4, 1022791334, -1107588397, 1009001220, -1186206458) + W(5, 1017500018, -1111169922, -1112569685, 1017255694) + W(6, -1156766128, -1125594766, -1148613464, 993928432) + W(7, 1014782692, -1135599628, -1114139175, 1007622876); WS(1038828992, 1041685264); - sum1 = W(0, -1114329248, 1049950910, -1097681183, 1028668144) + W(1, 995958527, 1027336960, -1107326552, 1025858258) + W(2, -1117673776, 1060640651, -1085831405, 1033402064) + W(3, 1034401008, 1045782072, -1105157973, -1122828000) + W(4, 1038612842, -1098159517, 1053136924, -1110558370) + W(5, 1035088196, -1106507532, 1032016120, -1113173980) + W(6, 1008781376, -1124000392, 1023707152, 1012109856) + W(7, 1029875310, -1105439902, 1034119968, -1114749520); sum2 = W(0, 1031315360, -1099468189, -1112139926, 1036663822) + W(1, -1131767489, -1140834082, 1024287080, -1122285462) + W(2, 1023637252, -1100127579, -1117241706, 1038018354) + W(3, -1107869385, 1052854494, 1052996200, -1112496415) + W(4, -1107666272, 1034036134, 1027811452, -1110479054) + W(5, -1117110288, 1024451620, 1027157968, -1112615559) + W(6, -1124350185, 1003450083, -1131082337, 998992195) + W(7, -1110538107, 1041131277, 1035032776, -1106762474); WS(-1086074680, 1053637716); - sum1 = W(0, -1121345387, 1042002951, -1113042450, -1121398619) + W(1, -1148805338, -1165378922, -1115297518, 991217235) + W(2, -1136570733, 1052460699, -1107443934, -1117268427) + W(3, 1049266593, -1094571489, -1098765182, 1036113926) + W(4, 1027081787, -1124281856, 1043313411, -1136658365) + W(5, -1133439181, 1040734807, 1006695533, -1112513138) + W(6, -1158465386, -1121708851, 1016359031, 1021173351) + W(7, -1120818857, 1035650578, 1027853163, -1106476275); sum2 = W(0, 1026517575, -1170492850, -1138816415, -1143472678) + W(1, 1017334370, 1003954710, -1132363566, 998846550) + W(2, 1051558711, -1096673587, -1136175651, -1124275402) + W(3, 1071692777, -1077357700, -1098960792, 1018703670) + W(4, 1049822619, -1098179385, -1116986501, 1007812651) + W(5, 1020207734, 996694924, 1003290486, 1007766851) + W(6, 1022251878, -1122577241, -1141894102, 1009415395) + W(7, 1019995718, 1015494226, -1126828734, -1163222937); WS(1051521136, 1027207116); - sum1 = W(0, -1122694020, 1010830545, -1124291704, 1018062184) + W(1, -1121133108, -1124202632, 1037913146, -1116091286) + W(2, -1102175837, 1057246783, -1093542759, 1041281977) + W(3, -1116351908, 1026322980, 982577970, -1125394504) + W(4, 1045518980, -1089509425, 1055793637, 1008755233) + W(5, 1009393969, 1025178484, -1118947636, -1127575032) + W(6, 1008379217, -1117338572, 1001093793, 1015898776) + W(7, 1015772516, 1009646833, 1001810977, -1121163492); sum2 = W(0, -1137495011, -1135527491, 1027730022, -1118108263) + W(1, 1013616911, -1123650952, 1024465134, -1128775579) + W(2, -1135578111, 1013443151, 1049128967, -1098008683) + W(3, 1029346938, -1114797945, 1068130737, -1080443718) + W(4, 1017473747, -1122100892, 1046423571, -1101482344) + W(5, 1012413655, -1128721387, -1143058109, -1137148015) + W(6, -1133405571, -1166794345, 1020545683, -1128178767) + W(7, 1008139351, -1156685818, -1126785325, 991435034); WS(1057767608, -1132080751); - sum1 = W(0, 1026028453, 1025766741, 1035118319, 1012106581) + W(1, 1026017621, -1135552917, 1040474693, -1138611630) + W(2, -1117947285, 1051769667, -1111744027, 1030333189) + W(3, 1048679017, -1083959172, -1084413328, 1045191121) + W(4, 1025261389, -1120826122, 1049618505, -1122181545) + W(5, 1011196341, 1045191525, -1110336171, 1030480605) + W(6, 1015828970, 1028389741, 1028257397, 1027514349) + W(7, 1025013027, 1039505775, -1123719333, 1020294666); sum2 = W(0, 1017587161, -1101123140, 1040188371, 988296658) + W(1, 1028118553, -1103020887, 1022642341, 1010063898) + W(2, 1008167722, -1099714612, 1039093756, 1026403646) + W(3, 1005112948, 1049070164, 1046164698, 1033545355) + W(4, -1125344655, 1032013714, -1111525569, 1002132020) + W(5, 1015776789, 1022049457, -1098832696, 1037334715) + W(6, -1148301500, 1009340114, -1115917000, -1139728254) + W(7, -1138850406, -1167693540, -1103378287, 1035581889); WS(-1099372256, -1088618788); - sum1 = W(0, -1112538182, 1048693927, -1112344546, -1109099742) + W(1, -1113349022, 1033711782, -1129092599, -1110127398) + W(2, -1103996671, 1064716592, -1086749016, 1032699126) + W(3, 1024020908, -1143605597, 1044926535, -1121424940) + W(4, 1046614908, -1085173359, 1062252083, -1130166943) + W(5, -1111225386, 1004694493, 1040479887, -1106709441) + W(6, -1110537326, -1108087402, 1034104622, -1120726228) + W(7, -1114146165, -1138402062, 1042110371, -1106064827); sum2 = W(0, 987083788, 1013472954, -1120418118, 979955865) + W(1, -1144106823, -1131186779, -1122269098, -1163904780) + W(2, -1120467381, -1139561796, 1038342084, -1115615181) + W(3, -1121977305, 1044091298, 1042996066, -1127292875) + W(4, -1118651341, 1038343490, -1118476220, -1123141745) + W(5, -1162389292, -1115306287, -1128689408, 1014320394) + W(6, -1152635694, -1155962630, -1132569906, -1135582470) + W(7, 964510307, -1117365756, -1141833923, 1008840046); WS(1041282784, 1044242623); - sum1 = W(0, -1119885764, -1171512555, 1003864029, 1025494836) + W(1, -1119816052, -1121861252, 1040963149, -1113504879) + W(2, -1100880653, 1057266723, -1094412795, 1043843337) + W(3, -1113812594, 1010135439, -1118004569, -1125989575) + W(4, 1046531310, -1089952515, 1056310444, -1156936827) + W(5, 1015358999, 1031135156, -1114099002, -1122714492) + W(6, 1005085853, -1115226950, 1015234855, 1003362397) + W(7, 1021011107, 1003139037, 992693307, -1120612644); sum2 = W(0, 1005317381, -1142619324, -1126266146, 1026462555) + W(1, -1143827754, 1012902153, -1128784654, 1020893616) + W(2, 1019060164, -1114788024, -1094218173, 1054132458) + W(3, 1009279342, -1098688460, -1078812823, 1070492026) + W(4, 1014092605, -1120377499, -1099532818, 1048935725) + W(5, -1131000233, 1017453102, 1007638067, 1011358224) + W(6, 1012779564, -1139793504, -1130333980, 1015734963) + W(7, -1137528453, -1147729078, 1018177647, 987943782); WS(1046635232, 1024078131); - sum1 = W(0, 1002735212, 1035063871, -1097977761, 1040314319) + W(1, 1025138813, 1034039879, -1105608655, 1035664624) + W(2, 1017042555, 1044122447, -1094991056, 1038536855) + W(3, -1132524982, -1110416695, 1051547730, -1114843703) + W(4, 1031803657, -1092481954, 1050188814, 1003107468) + W(5, 1033606155, -1094320024, 1047410847, 1019470987) + W(6, 1021596219, -1107502027, 1031346589, 1021345835) + W(7, 1015508823, -1103391009, 1046101811, -1136683190); sum2 = W(0, -1096475926, 1044036812, 1052862983, -1106234474) + W(1, -1112281069, -1112231286, 1024115789, -1121785528) + W(2, -1116645717, -1111398905, 1051331710, -1130292776) + W(3, 1041647377, -1096068583, 1038036111, 1037359643) + W(4, -1113263240, 1026411348, 1042458641, -1111704128) + W(5, 1023473494, -1114320784, 1028002558, -1123406807) + W(6, -1117017643, -1138574198, 1037890580, -1109714921) + W(7, 1039764966, -1104710548, -1106844581, 1041123403); WS(-1088554040, -1076674880); - sum1 = W(0, 1026292820, -1132973070, -1144171612, -1130131975) + W(1, 1016736263, 1034501898, -1110973538, 1028857234) + W(2, 1042339025, -1089525132, 1052671191, -1108906970) + W(3, -1110236986, 1037427962, -1123890785, -1112145786) + W(4, -1103961368, 1056478885, -1092344862, 1002874044) + W(5, 1016313655, -1118983748, 1041641985, 1025897228) + W(6, -1151588920, 1038469390, 1010979982, -1130905399) + W(7, 1014755782, -1123320716, 1017396903, 1033705562); sum2 = W(0, 1013915195, -1133182691, -1127318198, 1020584890) + W(1, 1007730851, 1024414743, -1121307593, 1005058566) + W(2, 981970521, -1111248658, 1035588225, -1124411850) + W(3, 1028189234, 1040952978, 1057294107, 1029625115) + W(4, -1121038101, -1109339192, -1107404728, 1026110889) + W(5, -1142484934, -1094377458, 1024397525, 1023925523) + W(6, -1146368902, -1116592821, -1118541421, -1140327971) + W(7, 1010322539, -1112421528, 1019759378, -1199698720); WS(1063581112, 1015292283); - sum1 = W(0, -1123806598, -1125096044, 1046804719, -1117498166) + W(1, -1124445804, 1037634467, 1028314614, 1006823135) + W(2, 1036776315, -1083793455, 1064148787, -1106689849) + W(3, -1112186771, -1098422117, 1034155462, 1004978479) + W(4, -1102837698, 1058965073, -1089226130, 1033810693) + W(5, -1117642958, -1106625757, 1037373467, 1029436414) + W(6, -1137018200, 1036181095, 994321759, -1119765454) + W(7, 1010580432, -1127761788, 1021285644, 1034713459); sum2 = W(0, -1127012521, -1110373665, -1121983257, 1021812843) + W(1, -1129458054, -1122115974, -1121551577, 1015201109) + W(2, -1134632819, -1118435057, -1107711610, 1039413537) + W(3, -1113739078, 1041258512, 1043546644, -1127386873) + W(4, -1106078947, 1025961773, 1048226293, -1110385416) + W(5, -1115241196, 1041055451, -1131486243, -1135801459) + W(6, -1122814807, 1025056413, -1139476701, -1132245806) + W(7, -1119046895, 1029845331, 1018415015, -1140149017); WS(-1109010880, -1087548956); - sum1 = W(0, 1034947768, -1095012676, 1046023882, 1029737824) + W(1, 1034343312, -1102610188, 1039446704, 1025692706) + W(2, 1016751552, -1096454908, 1042564604, 1038373096) + W(3, 1019661856, -1091443170, -1105694067, 1039271048) + W(4, -1126501287, -1131030249, 1044246468, 1012879825) + W(5, 1017025648, 1042942296, -1103700296, 1041317114) + W(6, 1030724160, 1019936112, -1141422594, 1029263800) + W(7, -1140792121, 1024647464, -1107855416, 1041193844); sum2 = W(0, 1034034732, -1107522705, -1105460279, 1021740679) + W(1, -1113997103, -1121503695, 1038975878, -1112744336) + W(2, 1028771217, -1114143244, 1032873918, -1121564954) + W(3, 1025456143, -1105773446, 1059420344, 1024971971) + W(4, 1035315492, -1109746606, 1040681265, -1122379806) + W(5, -1102403849, -1106040358, 1046039582, -1106873869) + W(6, 1018212015, -1106459627, 1026290649, -1130313815) + W(7, -1099438501, 1039219872, 1046943722, -1105420350); WS(-1086299832, -1077288694); - sum1 = W(0, 1021716686, -1099039878, -1111509136, 1039618828) + W(1, -1132921948, -1108540692, 1021468846, -1131678690) + W(2, -1113901292, -1158126306, -1096197083, 1041516082) + W(3, -1108835908, 1055092577, 1062013047, -1118733319) + W(4, 1023078294, -1089051407, 1050708993, -1122936235) + W(5, 965138311, -1113759276, 1022391342, 1015065790) + W(6, 998651320, -1107695832, -1133490396, 997649137) + W(7, -1130194922, -1113503632, 991635057, 1023538631); sum2 = W(0, -1133976495, 1035891239, -1130801609, -1113698362) + W(1, 1027343155, 1030599513, -1108453664, 1016406968) + W(2, -1149877867, 1037590422, 1012747883, -1108226898) + W(3, -1119506980, 1054189655, -1119322812, -1120928356) + W(4, -1126385541, 1041308688, -1107379808, 1016225738) + W(5, 1016526837, -1112736561, -1119223720, 988482485) + W(6, 994153115, 1004824957, -1116360142, 1018050885) + W(7, -1140785051, -1120347934, -1129452107, -1117792638); WS(-1113279936, 1066223903); - sum1 = W(0, -1128171420, 1040261344, -1112013315, -1123695998) + W(1, -1141738481, -1140107833, -1116929726, -1154978689) + W(2, -1138940153, 1050703688, -1108200895, -1123177006) + W(3, 1044160156, -1100167260, -1100730273, 1034288823) + W(4, 1020686276, -1130335589, 1040782300, -1141423761) + W(5, -1129655596, 1035637471, 1024316286, -1114187043) + W(6, 964173357, -1124525100, 1014134393, 1013984857) + W(7, -1123239900, 1032644739, 1029624526, -1108229911); sum2 = W(0, -1115606620, 1021458196, 1009639320, -1131253088) + W(1, -1125272644, 1017345212, 1016051020, -1143902384) + W(2, -1099614716, 1047257730, -1120838650, 1020803060) + W(3, -1080575150, 1068148121, -1113655261, 1032085971) + W(4, -1102155153, 1044966894, -1132238288, 1016311348) + W(5, -1122847678, 1026244022, -1130782536, -1137376840) + W(6, -1123394906, 1017049220, 967940860, -1137115752) + W(7, -1129056732, 1010161976, 1004223696, -1136984808); WS(1060545080, -1126581603); - sum1 = W(0, 1032630360, -1112268976, 1045186906, -1125010622) + W(1, 1037657648, -1128752350, 1032285712, 1029508223) + W(2, 1043836232, -1090205186, 1053340438, -1108078856) + W(3, 1037448680, 1048595306, -1094666759, 1041691860) + W(4, 976149203, 1057651571, -1082657749, 1042698525) + W(5, 1031833596, 1035187792, -1092127852, 1040118132) + W(6, 1031675647, 1034806588, -1104761760, 1033087420) + W(7, 1025282125, 1043419290, -1096441814, 1034587656); sum2 = W(0, -1123698886, 1034075649, 998149095, -1113635181) + W(1, -1126365381, 1026991402, -1118780236, -1168196508) + W(2, -1135914762, 1019253181, 1023543366, -1114469118) + W(3, -1121651762, 1047572688, 1038479879, -1145545780) + W(4, -1118625490, 1035108181, -1114677625, 992781287) + W(5, -1122087574, -1115886918, 1011684618, -1139655050) + W(6, -1147908244, 1016718341, -1132109957, -1142844852) + W(7, -1134045690, -1117034488, -1137057610, 1007905050); WS(-1083899832, -1105526146); - sum1 = W(0, 1026357515, -1119744955, -1117075907, -1111407198) + W(1, -1139718894, -1125720471, -1106102943, -1152407445) + W(2, 1044187583, -1092285679, 1048719011, -1107209883) + W(3, -1105573131, 1062437883, 1052836221, -1107292779) + W(4, -1104526300, 1058460257, -1089717563, -1122559055) + W(5, -1119529939, 1022150135, -1123085499, -1119739267) + W(6, -1125768375, 1033366698, -1114009838, -1119196243) + W(7, -1132776678, 1009731342, -1112611206, -1129505495); sum2 = W(0, -1110807022, 1025172792, 1033543849, -1123816828) + W(1, -1129400032, -1117035240, 999654946, -1144812946) + W(2, -1105612607, 1035443403, 1039345667, -1120747576) + W(3, -1123619892, -1135427545, 1053020794, -1113498942) + W(4, -1131262448, -1111010692, 1047843748, -1113301822) + W(5, 1016529300, -1115955576, -1135856481, -1146605522) + W(6, -1129444600, -1117326476, 1022819536, -1119691028) + W(7, -1136239801, -1121250556, 998047364, -1135792457); WS(-1107513792, 1064663354); - sum1 = W(0, 1030862455, -1113532308, 1032378968, -1123071015) + W(1, -1161118946, 1021510766, -1127591630, 1009770420) + W(2, 1040244826, -1091621085, 1051734861, -1107582956) + W(3, -1104300038, 1046262406, 1034822530, -1108820108) + W(4, -1102940181, 1054782000, -1095483267, -1125175670) + W(5, -1135077628, 1019068110, 1031948820, 1025488559) + W(6, -1135539484, 1036941280, -1172984259, -1126076542) + W(7, 1011863892, -1128724830, -1120336759, 1036426604); sum2 = W(0, -1135206239, -1140752647, 1022777359, 974924014) + W(1, -1139065871, -1123380440, 1021581075, -1133276463) + W(2, 1026230428, 988696695, -1122295168, 1029689087) + W(3, 1025917606, -1092786651, -1085937537, -1140169471) + W(4, 1027050280, 1049996339, 1032573953, -1135329695) + W(5, 1013849783, 1057784826, -1130048007, -1124883951) + W(6, 1016077019, 1033822297, 1032545188, 1011238415) + W(7, -1127829351, 1034470972, -1137094527, 1001568686); WS(1058918200, -1121082995); return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); } -float GetLuma(float3 color) { - return dot(float3(0.299f, 0.587f, 0.114f), color); -} +shared float inp[429]; -groupshared float inp[429]; +#define CURRENT_PASS 1 -void Pass1(uint2 blockStart, uint3 threadId) { - const float2 inputPt = GetInputPt(); +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { temp[pos] = (value); } - const uint2 group_base = uint2(blockStart.x, blockStart.y >> 1); - for (int id = threadId.x * MP_NUM_THREADS_Y + threadId.y; id < 429; id += MP_NUM_THREADS_X * MP_NUM_THREADS_Y) { +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 11 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 429; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { uint x = (uint)id / 11, y = (uint)id % 11; - inp[id] = GetLuma(INPUT.SampleLevel(sam, inputPt * float2(group_base.x + x - 3 + 0.5, group_base.y + y - 1 + 0.5), 0).rgb); + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (3)) + 0.5, float(group_base.y + y - (1)) + 0.5)).x; } - - GroupMemoryBarrierWithGroupSync(); - - float4 ret = 0.0; - float4 ret0 = 0.0; - float4 samples[8]; - const uint local_pos = threadId.x * 11 + threadId.y; - [unroll] - for (int i = 0; i < 8; ++i) { - [unroll] - for (int j = 0; j < 4; ++j) { - samples[i][j] = inp[local_pos + i * 11 + j]; - } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[8]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 11]; + samples[1][1] = inp[local_pos + 12]; + samples[1][2] = inp[local_pos + 13]; + samples[1][3] = inp[local_pos + 14]; + samples[2][0] = inp[local_pos + 22]; + samples[2][1] = inp[local_pos + 23]; + samples[2][2] = inp[local_pos + 24]; + samples[2][3] = inp[local_pos + 25]; + samples[3][0] = inp[local_pos + 33]; + samples[3][1] = inp[local_pos + 34]; + samples[3][2] = inp[local_pos + 35]; + samples[3][3] = inp[local_pos + 36]; + samples[4][0] = inp[local_pos + 44]; + samples[4][1] = inp[local_pos + 45]; + samples[4][2] = inp[local_pos + 46]; + samples[4][3] = inp[local_pos + 47]; + samples[5][0] = inp[local_pos + 55]; + samples[5][1] = inp[local_pos + 56]; + samples[5][2] = inp[local_pos + 57]; + samples[5][3] = inp[local_pos + 58]; + samples[6][0] = inp[local_pos + 66]; + samples[6][1] = inp[local_pos + 67]; + samples[6][2] = inp[local_pos + 68]; + samples[6][3] = inp[local_pos + 69]; + samples[7][0] = inp[local_pos + 77]; + samples[7][1] = inp[local_pos + 78]; + samples[7][2] = inp[local_pos + 79]; + samples[7][3] = inp[local_pos + 80]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 34]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; } - - const uint2 destPos = blockStart + uint2(threadId.x, threadId.y * 2); - tex1[destPos] = samples[3][1]; - tex1[destPos + uint2(0, 1)] = nnedi3(samples); +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2) + ivec2(0, 1), ret); } - - //!PASS 2 -//!DESC double_x -//!IN tex1, INPUT -//!BLOCK_SIZE 64,8 -//!NUM_THREADS 32,8 - -float nnedi3(float4 samples[8]) { +//!DESC NNEDI3 (double_x, nns16, win8x4) +//!IN INPUT, temp +//!OUT OUTPUT +//!BLOCK_SIZE 64, 8 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[8]) { float sum = 0.0, sumsq = 0.0; - [unroll] - for (int i = 0; i < 8; i++) { - sum += dot(samples[i], 1.0f); + [unroll] for (int i = 0; i < 8; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); sumsq += dot(samples[i], samples[i]); } - float mstd0 = sum / 32.0; float mstd1 = sumsq / 32.0 - mstd0 * mstd0; - // 不能使用 lerp,否则结果可能为 nan - float mstd2 = mstd1 >= 1.192092896e-7 ? rsqrt(mstd1) : 0.0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); mstd1 *= mstd2; - float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = W(0, -1123354974, -1118620174, 1016130204, -1103968288) + + W(1, -1101453425, -1117908518, -1122284590, -1122818124) + + W(2, -1112248839, 1024662558, -1087068557, 1048182784) + + W(3, 1059583965, -1119323982, 1027638054, -1137723992) + W(4, 1046299686, 1028038478, 1063313277, 1047279381) + + W(5, -1088182320, 1034186247, -1124394588, 978245507) + + W(6, -1143613552, -1129268360, -1103342192, -1115088511) + + W(7, 1003350800, -1134684248, -1111377363, 1028117438); + sum2 = + W(0, -1162931039, -1136248556, -1115594515, -1119907402) + W(1, 1022791334, 1017500018, -1156766128, 1014782692) + + W(2, -1131063526, -1131086728, -1128443230, 1044675527) + + W(3, -1107588397, -1111169922, -1125594766, -1135599628) + + W(4, 1029801649, 1031011705, 1042762789, 1050674207) + W(5, 1009001220, -1112569685, -1148613464, -1114139175) + + W(6, -1117642655, -1128864654, -1107118398, -1113986381) + + W(7, -1186206458, 1017255694, 993928432, 1007622876); + WS(1038828992, 1041685264); + sum1 = W(0, -1114329248, 995958527, -1117673776, 1034401008) + W(1, 1038612842, 1035088196, 1008781376, 1029875310) + + W(2, 1049950910, 1027336960, 1060640651, 1045782072) + + W(3, -1098159517, -1106507532, -1124000392, -1105439902) + + W(4, -1097681183, -1107326552, -1085831405, -1105157973) + + W(5, 1053136924, 1032016120, 1023707152, 1034119968) + W(6, 1028668144, 1025858258, 1033402064, -1122828000) + + W(7, -1110558370, -1113173980, 1012109856, -1114749520); + sum2 = W(0, 1031315360, -1131767489, 1023637252, -1107869385) + + W(1, -1107666272, -1117110288, -1124350185, -1110538107) + + W(2, -1099468189, -1140834082, -1100127579, 1052854494) + + W(3, 1034036134, 1024451620, 1003450083, 1041131277) + W(4, -1112139926, 1024287080, -1117241706, 1052996200) + + W(5, 1027811452, 1027157968, -1131082337, 1035032776) + + W(6, 1036663822, -1122285462, 1038018354, -1112496415) + + W(7, -1110479054, -1112615559, 998992195, -1106762474); + WS(-1086074680, 1053637716); + sum1 = W(0, -1121345387, -1148805338, -1136570733, 1049266593) + + W(1, 1027081787, -1133439181, -1158465386, -1120818857) + + W(2, 1042002951, -1165378922, 1052460699, -1094571489) + + W(3, -1124281856, 1040734807, -1121708851, 1035650578) + + W(4, -1113042450, -1115297518, -1107443934, -1098765182) + + W(5, 1043313411, 1006695533, 1016359031, 1027853163) + W(6, -1121398619, 991217235, -1117268427, 1036113926) + + W(7, -1136658365, -1112513138, 1021173351, -1106476275); + sum2 = W(0, 1026517575, 1017334370, 1051558711, 1071692777) + W(1, 1049822619, 1020207734, 1022251878, 1019995718) + + W(2, -1170492850, 1003954710, -1096673587, -1077357700) + + W(3, -1098179385, 996694924, -1122577241, 1015494226) + + W(4, -1138816415, -1132363566, -1136175651, -1098960792) + + W(5, -1116986501, 1003290486, -1141894102, -1126828734) + + W(6, -1143472678, 998846550, -1124275402, 1018703670) + + W(7, 1007812651, 1007766851, 1009415395, -1163222937); + WS(1051521136, 1027207116); + sum1 = W(0, -1122694020, -1121133108, -1102175837, -1116351908) + + W(1, 1045518980, 1009393969, 1008379217, 1015772516) + W(2, 1010830545, -1124202632, 1057246783, 1026322980) + + W(3, -1089509425, 1025178484, -1117338572, 1009646833) + + W(4, -1124291704, 1037913146, -1093542759, 982577970) + W(5, 1055793637, -1118947636, 1001093793, 1001810977) + + W(6, 1018062184, -1116091286, 1041281977, -1125394504) + + W(7, 1008755233, -1127575032, 1015898776, -1121163492); + sum2 = W(0, -1137495011, 1013616911, -1135578111, 1029346938) + + W(1, 1017473747, 1012413655, -1133405571, 1008139351) + + W(2, -1135527491, -1123650952, 1013443151, -1114797945) + + W(3, -1122100892, -1128721387, -1166794345, -1156685818) + + W(4, 1027730022, 1024465134, 1049128967, 1068130737) + W(5, 1046423571, -1143058109, 1020545683, -1126785325) + + W(6, -1118108263, -1128775579, -1098008683, -1080443718) + + W(7, -1101482344, -1137148015, -1128178767, 991435034); + WS(1057767608, -1132080751); + sum1 = + W(0, 1026028453, 1026017621, -1117947285, 1048679017) + W(1, 1025261389, 1011196341, 1015828970, 1025013027) + + W(2, 1025766741, -1135552917, 1051769667, -1083959172) + W(3, -1120826122, 1045191525, 1028389741, 1039505775) + + W(4, 1035118319, 1040474693, -1111744027, -1084413328) + + W(5, 1049618505, -1110336171, 1028257397, -1123719333) + W(6, 1012106581, -1138611630, 1030333189, 1045191121) + + W(7, -1122181545, 1030480605, 1027514349, 1020294666); + sum2 = W(0, 1017587161, 1028118553, 1008167722, 1005112948) + + W(1, -1125344655, 1015776789, -1148301500, -1138850406) + + W(2, -1101123140, -1103020887, -1099714612, 1049070164) + + W(3, 1032013714, 1022049457, 1009340114, -1167693540) + W(4, 1040188371, 1022642341, 1039093756, 1046164698) + + W(5, -1111525569, -1098832696, -1115917000, -1103378287) + + W(6, 988296658, 1010063898, 1026403646, 1033545355) + W(7, 1002132020, 1037334715, -1139728254, 1035581889); + WS(-1099372256, -1088618788); + sum1 = W(0, -1112538182, -1113349022, -1103996671, 1024020908) + + W(1, 1046614908, -1111225386, -1110537326, -1114146165) + + W(2, 1048693927, 1033711782, 1064716592, -1143605597) + + W(3, -1085173359, 1004694493, -1108087402, -1138402062) + + W(4, -1112344546, -1129092599, -1086749016, 1044926535) + + W(5, 1062252083, 1040479887, 1034104622, 1042110371) + + W(6, -1109099742, -1110127398, 1032699126, -1121424940) + + W(7, -1130166943, -1106709441, -1120726228, -1106064827); + sum2 = W(0, 987083788, -1144106823, -1120467381, -1121977305) + + W(1, -1118651341, -1162389292, -1152635694, 964510307) + + W(2, 1013472954, -1131186779, -1139561796, 1044091298) + + W(3, 1038343490, -1115306287, -1155962630, -1117365756) + + W(4, -1120418118, -1122269098, 1038342084, 1042996066) + + W(5, -1118476220, -1128689408, -1132569906, -1141833923) + + W(6, 979955865, -1163904780, -1115615181, -1127292875) + + W(7, -1123141745, 1014320394, -1135582470, 1008840046); + WS(1041282784, 1044242623); + sum1 = W(0, -1119885764, -1119816052, -1100880653, -1113812594) + + W(1, 1046531310, 1015358999, 1005085853, 1021011107) + W(2, -1171512555, -1121861252, 1057266723, 1010135439) + + W(3, -1089952515, 1031135156, -1115226950, 1003139037) + + W(4, 1003864029, 1040963149, -1094412795, -1118004569) + W(5, 1056310444, -1114099002, 1015234855, 992693307) + + W(6, 1025494836, -1113504879, 1043843337, -1125989575) + + W(7, -1156936827, -1122714492, 1003362397, -1120612644); + sum2 = W(0, 1005317381, -1143827754, 1019060164, 1009279342) + + W(1, 1014092605, -1131000233, 1012779564, -1137528453) + + W(2, -1142619324, 1012902153, -1114788024, -1098688460) + + W(3, -1120377499, 1017453102, -1139793504, -1147729078) + + W(4, -1126266146, -1128784654, -1094218173, -1078812823) + + W(5, -1099532818, 1007638067, -1130333980, 1018177647) + W(6, 1026462555, 1020893616, 1054132458, 1070492026) + + W(7, 1048935725, 1011358224, 1015734963, 987943782); + WS(1046635232, 1024078131); + sum1 = W(0, 1002735212, 1025138813, 1017042555, -1132524982) + W(1, 1031803657, 1033606155, 1021596219, 1015508823) + + W(2, 1035063871, 1034039879, 1044122447, -1110416695) + + W(3, -1092481954, -1094320024, -1107502027, -1103391009) + + W(4, -1097977761, -1105608655, -1094991056, 1051547730) + + W(5, 1050188814, 1047410847, 1031346589, 1046101811) + W(6, 1040314319, 1035664624, 1038536855, -1114843703) + + W(7, 1003107468, 1019470987, 1021345835, -1136683190); + sum2 = W(0, -1096475926, -1112281069, -1116645717, 1041647377) + + W(1, -1113263240, 1023473494, -1117017643, 1039764966) + + W(2, 1044036812, -1112231286, -1111398905, -1096068583) + + W(3, 1026411348, -1114320784, -1138574198, -1104710548) + + W(4, 1052862983, 1024115789, 1051331710, 1038036111) + W(5, 1042458641, 1028002558, 1037890580, -1106844581) + + W(6, -1106234474, -1121785528, -1130292776, 1037359643) + + W(7, -1111704128, -1123406807, -1109714921, 1041123403); + WS(-1088554040, -1076674880); + sum1 = W(0, 1026292820, 1016736263, 1042339025, -1110236986) + + W(1, -1103961368, 1016313655, -1151588920, 1014755782) + + W(2, -1132973070, 1034501898, -1089525132, 1037427962) + + W(3, 1056478885, -1118983748, 1038469390, -1123320716) + + W(4, -1144171612, -1110973538, 1052671191, -1123890785) + + W(5, -1092344862, 1041641985, 1010979982, 1017396903) + + W(6, -1130131975, 1028857234, -1108906970, -1112145786) + + W(7, 1002874044, 1025897228, -1130905399, 1033705562); + sum2 = W(0, 1013915195, 1007730851, 981970521, 1028189234) + W(1, -1121038101, -1142484934, -1146368902, 1010322539) + + W(2, -1133182691, 1024414743, -1111248658, 1040952978) + + W(3, -1109339192, -1094377458, -1116592821, -1112421528) + + W(4, -1127318198, -1121307593, 1035588225, 1057294107) + + W(5, -1107404728, 1024397525, -1118541421, 1019759378) + + W(6, 1020584890, 1005058566, -1124411850, 1029625115) + + W(7, 1026110889, 1023925523, -1140327971, -1199698720); + WS(1063581112, 1015292283); + sum1 = W(0, -1123806598, -1124445804, 1036776315, -1112186771) + + W(1, -1102837698, -1117642958, -1137018200, 1010580432) + + W(2, -1125096044, 1037634467, -1083793455, -1098422117) + + W(3, 1058965073, -1106625757, 1036181095, -1127761788) + W(4, 1046804719, 1028314614, 1064148787, 1034155462) + + W(5, -1089226130, 1037373467, 994321759, 1021285644) + W(6, -1117498166, 1006823135, -1106689849, 1004978479) + + W(7, 1033810693, 1029436414, -1119765454, 1034713459); + sum2 = + W(0, -1127012521, -1129458054, -1134632819, -1113739078) + + W(1, -1106078947, -1115241196, -1122814807, -1119046895) + + W(2, -1110373665, -1122115974, -1118435057, 1041258512) + W(3, 1025961773, 1041055451, 1025056413, 1029845331) + + W(4, -1121983257, -1121551577, -1107711610, 1043546644) + + W(5, 1048226293, -1131486243, -1139476701, 1018415015) + W(6, 1021812843, 1015201109, 1039413537, -1127386873) + + W(7, -1110385416, -1135801459, -1132245806, -1140149017); + WS(-1109010880, -1087548956); + sum1 = W(0, 1034947768, 1034343312, 1016751552, 1019661856) + W(1, -1126501287, 1017025648, 1030724160, -1140792121) + + W(2, -1095012676, -1102610188, -1096454908, -1091443170) + + W(3, -1131030249, 1042942296, 1019936112, 1024647464) + W(4, 1046023882, 1039446704, 1042564604, -1105694067) + + W(5, 1044246468, -1103700296, -1141422594, -1107855416) + + W(6, 1029737824, 1025692706, 1038373096, 1039271048) + W(7, 1012879825, 1041317114, 1029263800, 1041193844); + sum2 = W(0, 1034034732, -1113997103, 1028771217, 1025456143) + + W(1, 1035315492, -1102403849, 1018212015, -1099438501) + + W(2, -1107522705, -1121503695, -1114143244, -1105773446) + + W(3, -1109746606, -1106040358, -1106459627, 1039219872) + + W(4, -1105460279, 1038975878, 1032873918, 1059420344) + W(5, 1040681265, 1046039582, 1026290649, 1046943722) + + W(6, 1021740679, -1112744336, -1121564954, 1024971971) + + W(7, -1122379806, -1106873869, -1130313815, -1105420350); + WS(-1086299832, -1077288694); + sum1 = W(0, 1021716686, -1132921948, -1113901292, -1108835908) + W(1, 1023078294, 965138311, 998651320, -1130194922) + + W(2, -1099039878, -1108540692, -1158126306, 1055092577) + + W(3, -1089051407, -1113759276, -1107695832, -1113503632) + + W(4, -1111509136, 1021468846, -1096197083, 1062013047) + W(5, 1050708993, 1022391342, -1133490396, 991635057) + + W(6, 1039618828, -1131678690, 1041516082, -1118733319) + + W(7, -1122936235, 1015065790, 997649137, 1023538631); + sum2 = W(0, -1133976495, 1027343155, -1149877867, -1119506980) + + W(1, -1126385541, 1016526837, 994153115, -1140785051) + W(2, 1035891239, 1030599513, 1037590422, 1054189655) + + W(3, 1041308688, -1112736561, 1004824957, -1120347934) + + W(4, -1130801609, -1108453664, 1012747883, -1119322812) + + W(5, -1107379808, -1119223720, -1116360142, -1129452107) + + W(6, -1113698362, 1016406968, -1108226898, -1120928356) + + W(7, 1016225738, 988482485, 1018050885, -1117792638); + WS(-1113279936, 1066223903); + sum1 = + W(0, -1128171420, -1141738481, -1138940153, 1044160156) + W(1, 1020686276, -1129655596, 964173357, -1123239900) + + W(2, 1040261344, -1140107833, 1050703688, -1100167260) + + W(3, -1130335589, 1035637471, -1124525100, 1032644739) + + W(4, -1112013315, -1116929726, -1108200895, -1100730273) + + W(5, 1040782300, 1024316286, 1014134393, 1029624526) + W(6, -1123695998, -1154978689, -1123177006, 1034288823) + + W(7, -1141423761, -1114187043, 1013984857, -1108229911); + sum2 = W(0, -1115606620, -1125272644, -1099614716, -1080575150) + + W(1, -1102155153, -1122847678, -1123394906, -1129056732) + + W(2, 1021458196, 1017345212, 1047257730, 1068148121) + W(3, 1044966894, 1026244022, 1017049220, 1010161976) + + W(4, 1009639320, 1016051020, -1120838650, -1113655261) + + W(5, -1132238288, -1130782536, 967940860, 1004223696) + + W(6, -1131253088, -1143902384, 1020803060, 1032085971) + + W(7, 1016311348, -1137376840, -1137115752, -1136984808); + WS(1060545080, -1126581603); + sum1 = W(0, 1032630360, 1037657648, 1043836232, 1037448680) + W(1, 976149203, 1031833596, 1031675647, 1025282125) + + W(2, -1112268976, -1128752350, -1090205186, 1048595306) + + W(3, 1057651571, 1035187792, 1034806588, 1043419290) + W(4, 1045186906, 1032285712, 1053340438, -1094666759) + + W(5, -1082657749, -1092127852, -1104761760, -1096441814) + + W(6, -1125010622, 1029508223, -1108078856, 1041691860) + + W(7, 1042698525, 1040118132, 1033087420, 1034587656); + sum2 = W(0, -1123698886, -1126365381, -1135914762, -1121651762) + + W(1, -1118625490, -1122087574, -1147908244, -1134045690) + + W(2, 1034075649, 1026991402, 1019253181, 1047572688) + W(3, 1035108181, -1115886918, 1016718341, -1117034488) + + W(4, 998149095, -1118780236, 1023543366, 1038479879) + + W(5, -1114677625, 1011684618, -1132109957, -1137057610) + + W(6, -1113635181, -1168196508, -1114469118, -1145545780) + + W(7, 992781287, -1139655050, -1142844852, 1007905050); + WS(-1083899832, -1105526146); + sum1 = W(0, 1026357515, -1139718894, 1044187583, -1105573131) + + W(1, -1104526300, -1119529939, -1125768375, -1132776678) + + W(2, -1119744955, -1125720471, -1092285679, 1062437883) + + W(3, 1058460257, 1022150135, 1033366698, 1009731342) + W(4, -1117075907, -1106102943, 1048719011, 1052836221) + + W(5, -1089717563, -1123085499, -1114009838, -1112611206) + + W(6, -1111407198, -1152407445, -1107209883, -1107292779) + + W(7, -1122559055, -1119739267, -1119196243, -1129505495); + sum2 = W(0, -1110807022, -1129400032, -1105612607, -1123619892) + + W(1, -1131262448, 1016529300, -1129444600, -1136239801) + + W(2, 1025172792, -1117035240, 1035443403, -1135427545) + + W(3, -1111010692, -1115955576, -1117326476, -1121250556) + + W(4, 1033543849, 999654946, 1039345667, 1053020794) + W(5, 1047843748, -1135856481, 1022819536, 998047364) + + W(6, -1123816828, -1144812946, -1120747576, -1113498942) + + W(7, -1113301822, -1146605522, -1119691028, -1135792457); + WS(-1107513792, 1064663354); + sum1 = W(0, 1030862455, -1161118946, 1040244826, -1104300038) + + W(1, -1102940181, -1135077628, -1135539484, 1011863892) + + W(2, -1113532308, 1021510766, -1091621085, 1046262406) + + W(3, 1054782000, 1019068110, 1036941280, -1128724830) + W(4, 1032378968, -1127591630, 1051734861, 1034822530) + + W(5, -1095483267, 1031948820, -1172984259, -1120336759) + + W(6, -1123071015, 1009770420, -1107582956, -1108820108) + + W(7, -1125175670, 1025488559, -1126076542, 1036426604); + sum2 = + W(0, -1135206239, -1139065871, 1026230428, 1025917606) + W(1, 1027050280, 1013849783, 1016077019, -1127829351) + + W(2, -1140752647, -1123380440, 988696695, -1092786651) + W(3, 1049996339, 1057784826, 1033822297, 1034470972) + + W(4, 1022777359, 1021581075, -1122295168, -1085937537) + + W(5, 1032573953, -1130048007, 1032545188, -1137094527) + W(6, 974924014, -1133276463, 1029689087, -1140169471) + + W(7, -1135329695, -1124883951, 1011238415, 1001568686); + WS(1058918200, -1121082995); - sum1 = W(0, -1123354974, -1118620174, 1016130204, -1103968288) + W(1, -1101453425, -1117908518, -1122284590, -1122818124) + W(2, -1112248839, 1024662558, -1087068557, 1048182784) + W(3, 1059583965, -1119323982, 1027638054, -1137723992) + W(4, 1046299686, 1028038478, 1063313277, 1047279381) + W(5, -1088182320, 1034186247, -1124394588, 978245507) + W(6, -1143613552, -1129268360, -1103342192, -1115088511) + W(7, 1003350800, -1134684248, -1111377363, 1028117438); sum2 = W(0, -1162931039, -1136248556, -1115594515, -1119907402) + W(1, 1022791334, 1017500018, -1156766128, 1014782692) + W(2, -1131063526, -1131086728, -1128443230, 1044675527) + W(3, -1107588397, -1111169922, -1125594766, -1135599628) + W(4, 1029801649, 1031011705, 1042762789, 1050674207) + W(5, 1009001220, -1112569685, -1148613464, -1114139175) + W(6, -1117642655, -1128864654, -1107118398, -1113986381) + W(7, -1186206458, 1017255694, 993928432, 1007622876); WS(1038828992, 1041685264); - sum1 = W(0, -1114329248, 995958527, -1117673776, 1034401008) + W(1, 1038612842, 1035088196, 1008781376, 1029875310) + W(2, 1049950910, 1027336960, 1060640651, 1045782072) + W(3, -1098159517, -1106507532, -1124000392, -1105439902) + W(4, -1097681183, -1107326552, -1085831405, -1105157973) + W(5, 1053136924, 1032016120, 1023707152, 1034119968) + W(6, 1028668144, 1025858258, 1033402064, -1122828000) + W(7, -1110558370, -1113173980, 1012109856, -1114749520); sum2 = W(0, 1031315360, -1131767489, 1023637252, -1107869385) + W(1, -1107666272, -1117110288, -1124350185, -1110538107) + W(2, -1099468189, -1140834082, -1100127579, 1052854494) + W(3, 1034036134, 1024451620, 1003450083, 1041131277) + W(4, -1112139926, 1024287080, -1117241706, 1052996200) + W(5, 1027811452, 1027157968, -1131082337, 1035032776) + W(6, 1036663822, -1122285462, 1038018354, -1112496415) + W(7, -1110479054, -1112615559, 998992195, -1106762474); WS(-1086074680, 1053637716); - sum1 = W(0, -1121345387, -1148805338, -1136570733, 1049266593) + W(1, 1027081787, -1133439181, -1158465386, -1120818857) + W(2, 1042002951, -1165378922, 1052460699, -1094571489) + W(3, -1124281856, 1040734807, -1121708851, 1035650578) + W(4, -1113042450, -1115297518, -1107443934, -1098765182) + W(5, 1043313411, 1006695533, 1016359031, 1027853163) + W(6, -1121398619, 991217235, -1117268427, 1036113926) + W(7, -1136658365, -1112513138, 1021173351, -1106476275); sum2 = W(0, 1026517575, 1017334370, 1051558711, 1071692777) + W(1, 1049822619, 1020207734, 1022251878, 1019995718) + W(2, -1170492850, 1003954710, -1096673587, -1077357700) + W(3, -1098179385, 996694924, -1122577241, 1015494226) + W(4, -1138816415, -1132363566, -1136175651, -1098960792) + W(5, -1116986501, 1003290486, -1141894102, -1126828734) + W(6, -1143472678, 998846550, -1124275402, 1018703670) + W(7, 1007812651, 1007766851, 1009415395, -1163222937); WS(1051521136, 1027207116); - sum1 = W(0, -1122694020, -1121133108, -1102175837, -1116351908) + W(1, 1045518980, 1009393969, 1008379217, 1015772516) + W(2, 1010830545, -1124202632, 1057246783, 1026322980) + W(3, -1089509425, 1025178484, -1117338572, 1009646833) + W(4, -1124291704, 1037913146, -1093542759, 982577970) + W(5, 1055793637, -1118947636, 1001093793, 1001810977) + W(6, 1018062184, -1116091286, 1041281977, -1125394504) + W(7, 1008755233, -1127575032, 1015898776, -1121163492); sum2 = W(0, -1137495011, 1013616911, -1135578111, 1029346938) + W(1, 1017473747, 1012413655, -1133405571, 1008139351) + W(2, -1135527491, -1123650952, 1013443151, -1114797945) + W(3, -1122100892, -1128721387, -1166794345, -1156685818) + W(4, 1027730022, 1024465134, 1049128967, 1068130737) + W(5, 1046423571, -1143058109, 1020545683, -1126785325) + W(6, -1118108263, -1128775579, -1098008683, -1080443718) + W(7, -1101482344, -1137148015, -1128178767, 991435034); WS(1057767608, -1132080751); - sum1 = W(0, 1026028453, 1026017621, -1117947285, 1048679017) + W(1, 1025261389, 1011196341, 1015828970, 1025013027) + W(2, 1025766741, -1135552917, 1051769667, -1083959172) + W(3, -1120826122, 1045191525, 1028389741, 1039505775) + W(4, 1035118319, 1040474693, -1111744027, -1084413328) + W(5, 1049618505, -1110336171, 1028257397, -1123719333) + W(6, 1012106581, -1138611630, 1030333189, 1045191121) + W(7, -1122181545, 1030480605, 1027514349, 1020294666); sum2 = W(0, 1017587161, 1028118553, 1008167722, 1005112948) + W(1, -1125344655, 1015776789, -1148301500, -1138850406) + W(2, -1101123140, -1103020887, -1099714612, 1049070164) + W(3, 1032013714, 1022049457, 1009340114, -1167693540) + W(4, 1040188371, 1022642341, 1039093756, 1046164698) + W(5, -1111525569, -1098832696, -1115917000, -1103378287) + W(6, 988296658, 1010063898, 1026403646, 1033545355) + W(7, 1002132020, 1037334715, -1139728254, 1035581889); WS(-1099372256, -1088618788); - sum1 = W(0, -1112538182, -1113349022, -1103996671, 1024020908) + W(1, 1046614908, -1111225386, -1110537326, -1114146165) + W(2, 1048693927, 1033711782, 1064716592, -1143605597) + W(3, -1085173359, 1004694493, -1108087402, -1138402062) + W(4, -1112344546, -1129092599, -1086749016, 1044926535) + W(5, 1062252083, 1040479887, 1034104622, 1042110371) + W(6, -1109099742, -1110127398, 1032699126, -1121424940) + W(7, -1130166943, -1106709441, -1120726228, -1106064827); sum2 = W(0, 987083788, -1144106823, -1120467381, -1121977305) + W(1, -1118651341, -1162389292, -1152635694, 964510307) + W(2, 1013472954, -1131186779, -1139561796, 1044091298) + W(3, 1038343490, -1115306287, -1155962630, -1117365756) + W(4, -1120418118, -1122269098, 1038342084, 1042996066) + W(5, -1118476220, -1128689408, -1132569906, -1141833923) + W(6, 979955865, -1163904780, -1115615181, -1127292875) + W(7, -1123141745, 1014320394, -1135582470, 1008840046); WS(1041282784, 1044242623); - sum1 = W(0, -1119885764, -1119816052, -1100880653, -1113812594) + W(1, 1046531310, 1015358999, 1005085853, 1021011107) + W(2, -1171512555, -1121861252, 1057266723, 1010135439) + W(3, -1089952515, 1031135156, -1115226950, 1003139037) + W(4, 1003864029, 1040963149, -1094412795, -1118004569) + W(5, 1056310444, -1114099002, 1015234855, 992693307) + W(6, 1025494836, -1113504879, 1043843337, -1125989575) + W(7, -1156936827, -1122714492, 1003362397, -1120612644); sum2 = W(0, 1005317381, -1143827754, 1019060164, 1009279342) + W(1, 1014092605, -1131000233, 1012779564, -1137528453) + W(2, -1142619324, 1012902153, -1114788024, -1098688460) + W(3, -1120377499, 1017453102, -1139793504, -1147729078) + W(4, -1126266146, -1128784654, -1094218173, -1078812823) + W(5, -1099532818, 1007638067, -1130333980, 1018177647) + W(6, 1026462555, 1020893616, 1054132458, 1070492026) + W(7, 1048935725, 1011358224, 1015734963, 987943782); WS(1046635232, 1024078131); - sum1 = W(0, 1002735212, 1025138813, 1017042555, -1132524982) + W(1, 1031803657, 1033606155, 1021596219, 1015508823) + W(2, 1035063871, 1034039879, 1044122447, -1110416695) + W(3, -1092481954, -1094320024, -1107502027, -1103391009) + W(4, -1097977761, -1105608655, -1094991056, 1051547730) + W(5, 1050188814, 1047410847, 1031346589, 1046101811) + W(6, 1040314319, 1035664624, 1038536855, -1114843703) + W(7, 1003107468, 1019470987, 1021345835, -1136683190); sum2 = W(0, -1096475926, -1112281069, -1116645717, 1041647377) + W(1, -1113263240, 1023473494, -1117017643, 1039764966) + W(2, 1044036812, -1112231286, -1111398905, -1096068583) + W(3, 1026411348, -1114320784, -1138574198, -1104710548) + W(4, 1052862983, 1024115789, 1051331710, 1038036111) + W(5, 1042458641, 1028002558, 1037890580, -1106844581) + W(6, -1106234474, -1121785528, -1130292776, 1037359643) + W(7, -1111704128, -1123406807, -1109714921, 1041123403); WS(-1088554040, -1076674880); - sum1 = W(0, 1026292820, 1016736263, 1042339025, -1110236986) + W(1, -1103961368, 1016313655, -1151588920, 1014755782) + W(2, -1132973070, 1034501898, -1089525132, 1037427962) + W(3, 1056478885, -1118983748, 1038469390, -1123320716) + W(4, -1144171612, -1110973538, 1052671191, -1123890785) + W(5, -1092344862, 1041641985, 1010979982, 1017396903) + W(6, -1130131975, 1028857234, -1108906970, -1112145786) + W(7, 1002874044, 1025897228, -1130905399, 1033705562); sum2 = W(0, 1013915195, 1007730851, 981970521, 1028189234) + W(1, -1121038101, -1142484934, -1146368902, 1010322539) + W(2, -1133182691, 1024414743, -1111248658, 1040952978) + W(3, -1109339192, -1094377458, -1116592821, -1112421528) + W(4, -1127318198, -1121307593, 1035588225, 1057294107) + W(5, -1107404728, 1024397525, -1118541421, 1019759378) + W(6, 1020584890, 1005058566, -1124411850, 1029625115) + W(7, 1026110889, 1023925523, -1140327971, -1199698720); WS(1063581112, 1015292283); - sum1 = W(0, -1123806598, -1124445804, 1036776315, -1112186771) + W(1, -1102837698, -1117642958, -1137018200, 1010580432) + W(2, -1125096044, 1037634467, -1083793455, -1098422117) + W(3, 1058965073, -1106625757, 1036181095, -1127761788) + W(4, 1046804719, 1028314614, 1064148787, 1034155462) + W(5, -1089226130, 1037373467, 994321759, 1021285644) + W(6, -1117498166, 1006823135, -1106689849, 1004978479) + W(7, 1033810693, 1029436414, -1119765454, 1034713459); sum2 = W(0, -1127012521, -1129458054, -1134632819, -1113739078) + W(1, -1106078947, -1115241196, -1122814807, -1119046895) + W(2, -1110373665, -1122115974, -1118435057, 1041258512) + W(3, 1025961773, 1041055451, 1025056413, 1029845331) + W(4, -1121983257, -1121551577, -1107711610, 1043546644) + W(5, 1048226293, -1131486243, -1139476701, 1018415015) + W(6, 1021812843, 1015201109, 1039413537, -1127386873) + W(7, -1110385416, -1135801459, -1132245806, -1140149017); WS(-1109010880, -1087548956); - sum1 = W(0, 1034947768, 1034343312, 1016751552, 1019661856) + W(1, -1126501287, 1017025648, 1030724160, -1140792121) + W(2, -1095012676, -1102610188, -1096454908, -1091443170) + W(3, -1131030249, 1042942296, 1019936112, 1024647464) + W(4, 1046023882, 1039446704, 1042564604, -1105694067) + W(5, 1044246468, -1103700296, -1141422594, -1107855416) + W(6, 1029737824, 1025692706, 1038373096, 1039271048) + W(7, 1012879825, 1041317114, 1029263800, 1041193844); sum2 = W(0, 1034034732, -1113997103, 1028771217, 1025456143) + W(1, 1035315492, -1102403849, 1018212015, -1099438501) + W(2, -1107522705, -1121503695, -1114143244, -1105773446) + W(3, -1109746606, -1106040358, -1106459627, 1039219872) + W(4, -1105460279, 1038975878, 1032873918, 1059420344) + W(5, 1040681265, 1046039582, 1026290649, 1046943722) + W(6, 1021740679, -1112744336, -1121564954, 1024971971) + W(7, -1122379806, -1106873869, -1130313815, -1105420350); WS(-1086299832, -1077288694); - sum1 = W(0, 1021716686, -1132921948, -1113901292, -1108835908) + W(1, 1023078294, 965138311, 998651320, -1130194922) + W(2, -1099039878, -1108540692, -1158126306, 1055092577) + W(3, -1089051407, -1113759276, -1107695832, -1113503632) + W(4, -1111509136, 1021468846, -1096197083, 1062013047) + W(5, 1050708993, 1022391342, -1133490396, 991635057) + W(6, 1039618828, -1131678690, 1041516082, -1118733319) + W(7, -1122936235, 1015065790, 997649137, 1023538631); sum2 = W(0, -1133976495, 1027343155, -1149877867, -1119506980) + W(1, -1126385541, 1016526837, 994153115, -1140785051) + W(2, 1035891239, 1030599513, 1037590422, 1054189655) + W(3, 1041308688, -1112736561, 1004824957, -1120347934) + W(4, -1130801609, -1108453664, 1012747883, -1119322812) + W(5, -1107379808, -1119223720, -1116360142, -1129452107) + W(6, -1113698362, 1016406968, -1108226898, -1120928356) + W(7, 1016225738, 988482485, 1018050885, -1117792638); WS(-1113279936, 1066223903); - sum1 = W(0, -1128171420, -1141738481, -1138940153, 1044160156) + W(1, 1020686276, -1129655596, 964173357, -1123239900) + W(2, 1040261344, -1140107833, 1050703688, -1100167260) + W(3, -1130335589, 1035637471, -1124525100, 1032644739) + W(4, -1112013315, -1116929726, -1108200895, -1100730273) + W(5, 1040782300, 1024316286, 1014134393, 1029624526) + W(6, -1123695998, -1154978689, -1123177006, 1034288823) + W(7, -1141423761, -1114187043, 1013984857, -1108229911); sum2 = W(0, -1115606620, -1125272644, -1099614716, -1080575150) + W(1, -1102155153, -1122847678, -1123394906, -1129056732) + W(2, 1021458196, 1017345212, 1047257730, 1068148121) + W(3, 1044966894, 1026244022, 1017049220, 1010161976) + W(4, 1009639320, 1016051020, -1120838650, -1113655261) + W(5, -1132238288, -1130782536, 967940860, 1004223696) + W(6, -1131253088, -1143902384, 1020803060, 1032085971) + W(7, 1016311348, -1137376840, -1137115752, -1136984808); WS(1060545080, -1126581603); - sum1 = W(0, 1032630360, 1037657648, 1043836232, 1037448680) + W(1, 976149203, 1031833596, 1031675647, 1025282125) + W(2, -1112268976, -1128752350, -1090205186, 1048595306) + W(3, 1057651571, 1035187792, 1034806588, 1043419290) + W(4, 1045186906, 1032285712, 1053340438, -1094666759) + W(5, -1082657749, -1092127852, -1104761760, -1096441814) + W(6, -1125010622, 1029508223, -1108078856, 1041691860) + W(7, 1042698525, 1040118132, 1033087420, 1034587656); sum2 = W(0, -1123698886, -1126365381, -1135914762, -1121651762) + W(1, -1118625490, -1122087574, -1147908244, -1134045690) + W(2, 1034075649, 1026991402, 1019253181, 1047572688) + W(3, 1035108181, -1115886918, 1016718341, -1117034488) + W(4, 998149095, -1118780236, 1023543366, 1038479879) + W(5, -1114677625, 1011684618, -1132109957, -1137057610) + W(6, -1113635181, -1168196508, -1114469118, -1145545780) + W(7, 992781287, -1139655050, -1142844852, 1007905050); WS(-1083899832, -1105526146); - sum1 = W(0, 1026357515, -1139718894, 1044187583, -1105573131) + W(1, -1104526300, -1119529939, -1125768375, -1132776678) + W(2, -1119744955, -1125720471, -1092285679, 1062437883) + W(3, 1058460257, 1022150135, 1033366698, 1009731342) + W(4, -1117075907, -1106102943, 1048719011, 1052836221) + W(5, -1089717563, -1123085499, -1114009838, -1112611206) + W(6, -1111407198, -1152407445, -1107209883, -1107292779) + W(7, -1122559055, -1119739267, -1119196243, -1129505495); sum2 = W(0, -1110807022, -1129400032, -1105612607, -1123619892) + W(1, -1131262448, 1016529300, -1129444600, -1136239801) + W(2, 1025172792, -1117035240, 1035443403, -1135427545) + W(3, -1111010692, -1115955576, -1117326476, -1121250556) + W(4, 1033543849, 999654946, 1039345667, 1053020794) + W(5, 1047843748, -1135856481, 1022819536, 998047364) + W(6, -1123816828, -1144812946, -1120747576, -1113498942) + W(7, -1113301822, -1146605522, -1119691028, -1135792457); WS(-1107513792, 1064663354); - sum1 = W(0, 1030862455, -1161118946, 1040244826, -1104300038) + W(1, -1102940181, -1135077628, -1135539484, 1011863892) + W(2, -1113532308, 1021510766, -1091621085, 1046262406) + W(3, 1054782000, 1019068110, 1036941280, -1128724830) + W(4, 1032378968, -1127591630, 1051734861, 1034822530) + W(5, -1095483267, 1031948820, -1172984259, -1120336759) + W(6, -1123071015, 1009770420, -1107582956, -1108820108) + W(7, -1125175670, 1025488559, -1126076542, 1036426604); sum2 = W(0, -1135206239, -1139065871, 1026230428, 1025917606) + W(1, 1027050280, 1013849783, 1016077019, -1127829351) + W(2, -1140752647, -1123380440, 988696695, -1092786651) + W(3, 1049996339, 1057784826, 1033822297, 1034470972) + W(4, 1022777359, 1021581075, -1122295168, -1085937537) + W(5, 1032573953, -1130048007, 1032545188, -1137094527) + W(6, 974924014, -1133276463, 1029689087, -1140169471) + W(7, -1135329695, -1124883951, 1011238415, 1001568686); WS(1058918200, -1121082995); return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); } -const static float2x3 rgb2uv = { - -0.169, -0.331, 0.5, - 0.5, -0.419, -0.081 -}; +shared float inp[525]; -const static float3x3 yuv2rgb = { - 1, -0.00093, 1.401687, - 1, -0.3437, -0.71417, - 1, 1.77216, 0.00099 -}; +#define CURRENT_PASS 2 -groupshared float inp[525]; +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} -void Pass2(uint2 blockStart, uint3 threadId) { - const float2 inputPt = GetInputPt(); - const float2 outputPt = GetOutputPt(); +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); - const uint2 group_base = uint2(blockStart.x >> 1, blockStart.y); - for (int id = threadId.x * MP_NUM_THREADS_Y + threadId.y; id < 525; id += MP_NUM_THREADS_X * MP_NUM_THREADS_Y) { - uint x = (uint)id / 15, y = (uint)id % 15; - inp[id] = tex1.SampleLevel(sam, inputPt * float2(group_base.x + x - 1 + 0.5, (group_base.y + y - 3 + 0.5) * 0.5), 0).r; - } +#define temp_tex(pos) (float(texture(temp, pos).x)) +static const float2 temp_size = float2(GetInputSize().x * 1, GetInputSize().y * 2); +static const float2 temp_pt = float2(1.0 / (temp_size.x), 1.0 / (temp_size.y)); - GroupMemoryBarrierWithGroupSync(); +#define HOOKED_tex(pos) temp_tex(pos) +#define HOOKED_size temp_size +#define HOOKED_pt temp_pt - uint2 destPos = blockStart + uint2(threadId.x * 2, threadId.y); - if (!CheckViewport(destPos)) { - return; - } - - float4 ret = 0.0; - float4 ret0 = 0.0; - float4 samples[8]; - const uint local_pos = threadId.x * 15 + threadId.y; - [unroll] - for (int i = 0; i < 8; ++i) { - [unroll] - for (int j = 0; j < 4; ++j) { - samples[i][j] = inp[local_pos + (i / 2) * 15 + (i % 2) * 4 + j]; - } +void Pass2(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 525; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 15, y = (uint)id % 15; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (1)) + 0.5, float(group_base.y + y - (3)) + 0.5)).x; } - - float2 originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * outputPt, 0).rgb); - WriteToOutput(destPos, mul(yuv2rgb, float3(samples[2][3], originUV))); - - ++destPos.x; - if (!CheckViewport(destPos)) { + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[8]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 4]; + samples[1][1] = inp[local_pos + 5]; + samples[1][2] = inp[local_pos + 6]; + samples[1][3] = inp[local_pos + 7]; + samples[2][0] = inp[local_pos + 15]; + samples[2][1] = inp[local_pos + 16]; + samples[2][2] = inp[local_pos + 17]; + samples[2][3] = inp[local_pos + 18]; + samples[3][0] = inp[local_pos + 19]; + samples[3][1] = inp[local_pos + 20]; + samples[3][2] = inp[local_pos + 21]; + samples[3][3] = inp[local_pos + 22]; + samples[4][0] = inp[local_pos + 30]; + samples[4][1] = inp[local_pos + 31]; + samples[4][2] = inp[local_pos + 32]; + samples[4][3] = inp[local_pos + 33]; + samples[5][0] = inp[local_pos + 34]; + samples[5][1] = inp[local_pos + 35]; + samples[5][2] = inp[local_pos + 36]; + samples[5][3] = inp[local_pos + 37]; + samples[6][0] = inp[local_pos + 45]; + samples[6][1] = inp[local_pos + 46]; + samples[6][2] = inp[local_pos + 47]; + samples[6][3] = inp[local_pos + 48]; + samples[7][0] = inp[local_pos + 49]; + samples[7][1] = inp[local_pos + 50]; + samples[7][2] = inp[local_pos + 51]; + samples[7][3] = inp[local_pos + 52]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 18]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { return; } - - originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * outputPt, 0).rgb); - WriteToOutput(destPos, mul(yuv2rgb, float3(nnedi3(samples), originUV))); +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1) + ivec2(1, 0), ret); } diff --git a/src/Effects/NNEDI3/NNEDI3_nns16_win8x6.hlsl b/src/Effects/NNEDI3/NNEDI3_nns16_win8x6.hlsl new file mode 100644 index 000000000..a52b77a07 --- /dev/null +++ b/src/Effects/NNEDI3/NNEDI3_nns16_win8x6.hlsl @@ -0,0 +1,953 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: nnedi3.py --nns 16 --win 8x6 --use-compute-shader --use-magpie +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME NNEDI3_016_6 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 1 * 2 +//!HEIGHT INPUT_HEIGHT * 2 * 1 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!FORMAT R16_FLOAT +//!WIDTH INPUT_WIDTH * 1 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D temp; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_temp; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 2 + +//!PASS 1 +//!DESC NNEDI3 (double_y, nns16, win8x6) +//!IN INPUT +//!OUT temp +//!BLOCK_SIZE 32, 16 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[12]) { + float sum = 0.0, sumsq = 0.0; + [unroll] for (int i = 0; i < 12; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); + sumsq += dot(samples[i], samples[i]); + } + float mstd0 = sum / 48.0; + float mstd1 = sumsq / 48.0 - mstd0 * mstd0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); + mstd1 *= mstd2; + float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = + W(0, -1126897990, 1027745880, 1024250604, 1024642508) + W(1, -1121959908, -1149906049, -1130469888, -1121396864) + + W(2, 1039079928, -1107295041, -1147395201, -1126556538) + + W(3, -1113607518, 1041026790, 1022159130, 1044630722) + W(4, -1107136294, 1005058137, -1116173177, 1042195560) + + W(5, -1098313415, 999141354, 1019497054, 1007702352) + W(6, 1015526727, 1018714920, 1042189511, -1106681307) + + W(7, 1035880216, -1121374916, -1133977224, 1026239260) + + W(8, -1106606352, 1038936227, -1124106064, 1025050132) + W(9, 990390561, -1131068140, 1013770942, -1122507740) + + W(10, -1136584888, -1135809122, -1122292152, 1015308851) + + W(11, -1122039043, 1031978820, -1116330759, 1018900008); + sum2 = W(0, 1017133506, 1011515348, -1139818306, -1123730089) + W(1, 996184056, -1138856554, 1023321012, 1029416248) + + W(2, -1115999672, 1020129658, 1015618084, 1007066512) + + W(3, -1119553894, 1057587887, -1090489276, -1109933138) + + W(4, 1016266760, -1145378916, -1112177411, 1071604647) + + W(5, -1079392139, -1097028615, 1028448562, 1008681896) + + W(6, -1165256880, 1051025857, -1098617840, -1105405946) + + W(7, -1155286464, 1000343320, -1133574805, 1035052104) + + W(8, -1139515542, -1135392452, -1138601606, 991053648) + + W(9, 1020043526, 1010374724, -1121583660, -1142174380) + + W(10, 997185888, -1155288808, -1135761830, 1018728192) + + W(11, 1024878156, 1002597928, -1131188096, -1132781834); + WS(1018288640, 1027735986); + sum1 = + W(0, 1012158232, -1178449286, 1044498160, -1128542910) + W(1, -1115962871, 1002517720, 1006778572, -1114624234) + + W(2, 1032943202, 1027108853, 1017365062, 964628492) + W(3, 1025063871, -1104570115, 1059928494, -1088743921) + + W(4, 1032615126, -1134936888, -1156175041, 1028919475) + + W(5, -1097612337, -1106124541, 1026836706, -1146238776) + + W(6, 1010747802, 1034856692, -1085331503, 1059914122) + + W(7, -1114177498, 1020458158, -1140348884, -1127457566) + + W(8, 1031833306, 1032056909, -1122073627, 1016604174) + W(9, 1020162890, -1122825993, -1119592595, 1033999672) + + W(10, 1022377282, 998219705, -1172026051, -1115773453) + + W(11, 1038136595, 1027508251, -1129465364, 1023799671); + sum2 = + W(0, -1126840972, -1130460798, 1019075916, 1017322604) + W(1, -1131054760, -1131047996, -1145399745, 985194115) + + W(2, -1120812206, -1129997452, 1006903064, -1143360737) + + W(3, -1139273136, -1112997847, -1139625904, 1042717692) + + W(4, -1114175000, -1130986946, 991527106, -1120456092) + W(5, 1043975251, 1051048254, -1113881740, 1007107280) + + W(6, -1135317632, 1001121889, -1150833602, -1121880440) + + W(7, 978663174, -1143215153, -1139461992, 1017866680) + + W(8, -1128878392, -1112673669, 1026044394, -1125685806) + + W(9, -1129486378, 1006765920, -1133504840, -1126929736) + + W(10, 1014584312, -1144361281, 995542402, 1000306721) + + W(11, -1142139489, -1114488494, 1007041936, -1134951296); + WS(1042433344, -1111851638); + sum1 = W(0, -1128612156, -1112658226, -1119638967, 1043958886) + + W(1, -1120465263, -1128976934, -1139940268, -1123380939) + + W(2, -1126908022, 1033805831, -1115346894, -1142120768) + + W(3, -1122042583, -1128727592, -1097703246, 1057665642) + + W(4, -1104545545, 1005565040, 984858240, -1107767030) + + W(5, 1052387104, 1046318672, -1108167869, -1148354296) + + W(6, 999630836, -1114896432, 1054789077, -1095395475) + + W(7, 1029397739, -1133849404, -1146630760, -1115281716) + + W(8, 1030603948, -1117224401, -1163176544, -1117808895) + + W(9, -1126512698, -1129996802, 1028419819, -1123618471) + + W(10, -1117439993, 1013349902, 996431920, -1123547845) + + W(11, 1026334318, -1113258842, -1134051464, -1120421311); + sum2 = + W(0, 1022431497, -1109389142, 1004613154, 1028727631) + W(1, 1029503922, -1132574761, -1132240188, -1119299282) + + W(2, -1139248009, -1129989652, -1140046689, -1114039002) + + W(3, 1024165374, -1107432916, 1041447926, 1047487962) + W(4, 1017218352, -1135952741, -1114822837, 1044244351) + + W(5, -1108646182, -1100679909, 1040665470, -1123756570) + + W(6, -1120729932, 1031006195, 1047688354, -1126089152) + + W(7, -1120804126, -1148002498, -1124855948, 983982854) + W(8, 1009435309, 1033956847, -1107003694, 1028342876) + + W(9, -1126342960, -1158996358, -1122846542, -1123334894) + + W(10, -1140927562, -1117057946, -1128289576, -1121099750) + + W(11, 1036127241, 1039673953, -1102421772, 1026336008); + WS(1015433728, 1058400049); + sum1 = + W(0, -1139873791, 1031161269, -1113693508, 1033801204) + W(1, -1119172737, -1143910182, -1133909491, 1032977294) + + W(2, -1112917766, -1131731326, 989007258, 1019358132) + W(3, 1023506921, -1116372870, -1116140698, 1045725159) + + W(4, -1122523445, 1008313039, -1230944644, 1035249566) + + W(5, -1103376612, -1102794347, 1044071755, -1115540344) + + W(6, -1118840528, -1120831281, 1044830734, -1116748777) + + W(7, 1030473357, -1126204226, 1028378783, -1114963068) + + W(8, -1141442286, 1032646513, 1018738506, -1118552369) + + W(9, -1121050287, 1032892305, 1023234585, -1112562780) + + W(10, 1021910870, 1016154651, 1033465034, -1105610222) + + W(11, 1034039600, 1030129285, -1122899972, -1124368226); + sum2 = W(0, -1138428449, -1158711528, -1124467432, -1140697417) + + W(1, 1030243467, 1012442941, 992976916, 1013039401) + W(2, -1130455464, -1123518198, 1033499227, 975746961) + + W(3, -1142924106, -1128734961, -1113146735, -1099387353) + + W(4, 1051222006, -1122081826, 976851025, 1036130613) + + W(5, -1097860430, -1077268149, 1072898808, -1117904739) + + W(6, 989093448, 1010050489, -1108810723, -1091225653) + W(7, 1056060393, -1131990027, 997652548, -1137359275) + + W(8, -1122996798, 1032494444, 1025590581, 951236744) + W(9, -1153131756, 990210276, -1140348735, -1115493835) + + W(10, 1025171621, 1006284898, -1134977059, -1138876101) + + W(11, -1127238416, 1018469149, 1026307569, -1146863422); + WS(-1143089152, 1030017260); + sum1 = + W(0, 1012276081, -1116644609, 1019444907, -1124688427) + W(1, 1029853709, -1130860131, 1001605962, -1127223379) + + W(2, -1119160665, 1035777366, -1136557285, -1130309965) + + W(3, 1024406997, -1109637089, 1048989101, -1098625404) + + W(4, 1038057505, -1130883561, -1155861797, -1115433381) + + W(5, 1044433671, 1006101820, -1111190908, 1009046005) + W(6, -1155627981, 1036571679, -1098184025, 1048780603) + + W(7, -1112291813, 1025361773, -1122534699, 1028189701) + + W(8, 1039597237, -1104960796, -1130076067, 1018788475) + + W(9, 1018348791, -1126280255, -1117935161, 1029641477) + + W(10, 1012573277, -1125993892, -1120990241, 1036379833) + + W(11, -1136463217, -1111599465, -1154886405, 1020397819); + sum2 = W(0, -1153319600, 1008405084, -1118973116, -1140784820) + + W(1, 1012585128, 1010769460, -1147284080, 985822624) + W(2, 1010505984, -1129308604, 1021293048, 1001814848) + + W(3, 1008968960, -1142311064, -1101248908, 1037448945) + W(4, 1024969278, -1160749952, 995456320, 1022276922) + + W(5, -1089187936, 1057794596, 1033366347, -1123619202) + + W(6, -1140178660, -1140411728, -1109859050, 1029773785) + + W(7, 1024400778, -1136545168, -1146954776, 1005012008) + W(8, 1017518401, 1015531414, 1007802556, 1000322872) + + W(9, -1142030464, 1003782736, 982409184, 974134143) + W(10, 1003482728, -1152799248, -1170856127, 1006946188) + + W(11, 995727232, 960534268, 1009923956, 985284128); + WS(1064472528, -1121594920); + sum1 = + W(0, -1142654991, 1027230343, -1112807213, 1027061019) + W(1, -1128825126, -1164359388, -1143599223, 1032290711) + + W(2, -1113392623, 1016010466, 991342574, 1014490160) + W(3, 1014568428, -1136037408, -1115590690, 1034098395) + + W(4, 1008695068, -1148094031, 1010500896, 1002050167) + + W(5, -1113734161, -1112872467, 1027642302, -1127829894) + + W(6, -1124387333, -1122938499, 1038834309, -1130883382) + + W(7, 1013984188, -1138058188, 1020884834, -1120250507) + W(8, 1029912912, 1015162858, 1015817710, -1124941766) + + W(9, -1131205634, 1025589157, 1019867389, -1123484555) + + W(10, 1015459258, 1008886302, 1026841191, -1110863224) + + W(11, 1031947569, 1019435182, -1129521612, -1130075526); + sum2 = + W(0, 1003807591, -1154115373, 1000124719, 1017182228) + W(1, -1126980607, -1130234859, -1147429191, -1139843175) + + W(2, 1001833687, 1024488826, -1116401990, 987658746) + W(3, 1002635095, 1018649088, 1008095031, 1040714709) + + W(4, -1105844805, 1013729967, -1132089351, 1016729308) + + W(5, -1105992985, 1063780536, -1085442794, 1024604622) + W(6, -1147602519, 1024344696, 1014141127, 1047200342) + + W(7, -1101306502, 995366957, -1151072125, -1155997437) + + W(8, -1132427785, 1020609216, -1122913939, -1147894927) + W(9, 964968041, 1001714367, -1141957575, 1023684454) + + W(10, -1125194898, -1146690231, 1011860423, -1141691791) + + W(11, -1139390003, 1017456200, -1128761080, -1146063807); + WS(1061878800, -1131153991); + sum1 = + W(0, -1123872727, 1015115512, -1099302516, 1041224340) + W(1, -1144166978, -1171049230, 1018625288, 1031144036) + + W(2, -1102371221, 1009910425, 1014687697, 1022902338) + W(3, -1127640224, 1036357847, -1085394744, 1052022073) + + W(4, -1115552350, -1132534141, 1026350045, -1108974562) + + W(5, 1059569738, 1058525661, -1125187302, 1016189168) + W(6, 1013916191, -1107191102, 1050617832, -1088226291) + + W(7, 1037730450, -1123531112, 1018183052, 1006433282) + W(8, 1032504563, -1097316565, 1040234099, -1127405808) + + W(9, -1145362866, 1014427177, 1031877738, -1109508096) + W(10, 1015825508, 1018548825, 1016048056, 1026198990) + + W(11, 1033421596, -1098228398, 1035235966, -1137247201); + sum2 = W(0, -1131301730, 1031269327, -1127010401, -1109842974) + + W(1, -1181736700, -1180777340, 973798558, -1131640108) + + W(2, 1028981651, -1125259759, -1167651134, -1160957999) + + W(3, -1127780866, 1013454096, -1149526184, -1113692773) + + W(4, -1123287814, 993986728, 1013478572, -1109509101) + W(5, 1051779317, 1047088883, -1109788940, 1020962386) + + W(6, -1160424319, -1117315078, 1028380081, -1134194124) + + W(7, -1115287133, -1136947718, -1135840779, -1131160392) + + W(8, -1137527992, 1028175261, -1121515979, -1138138790) + + W(9, -1164912671, -1145619912, 998238336, 1018886164) + + W(10, -1125209194, -1152989064, -1138738786, -1127332243) + + W(11, -1148504424, 1027237057, -1142455024, -1123011340); + WS(-1146021888, 1053974589); + sum1 = + W(0, 1029642476, -1119368753, 1042969521, -1095098901) + W(1, 1046685039, 984849429, 1013890275, -1134074211) + + W(2, 1042359026, -1107285127, 1031018217, -1135393367) + + W(3, -1176939092, 1007708103, 1045769551, -1096985546) + W(4, 1036262392, -1139413615, 1022266947, 1017736689) + + W(5, -1101301107, 1034918881, 1003810877, 1024875117) + W(6, -1146466657, 1027345005, -1094644679, 1050538529) + + W(7, -1120828825, -1172526890, 1004183253, 1032510570) + W(8, -1091538585, 1051699648, 1011534979, 1017671961) + + W(9, -1160650069, 1019378973, -1107179580, 1036824506) + + W(10, -1133351451, -1160823333, -1127783457, 1031489314) + + W(11, -1095508207, 1048776768, 1035618600, 1006585957); + sum2 = W(0, 1031363252, -1091101506, 1048232756, 1057852755) + W(1, -1095952784, 1016290300, 1030774484, 1001500224) + + W(2, -1110436898, -1132290932, -1131305343, -1126601761) + + W(3, 1015165558, -1110787951, 1016237906, 1043794074) + W(4, -1113356328, 1003743696, 1007437656, 965388167) + + W(5, 1014973676, 1047525730, -1152923833, 1022650220) + W(6, 1020087968, 1003188992, -1123006886, 1011818344) + + W(7, -1111245491, 1021501454, -1158035650, 1041338676) + + W(8, -1105090874, -1129296549, -1131940021, 1017537464) + + W(9, -1137051446, -1134903850, -1123217223, 1034851396) + + W(10, -1117639196, -1133259176, 1018262350, 1033269727) + + W(11, -1104724635, -1106365430, 1024945328, 1019937714); + WS(-1077057896, -1083600334); + sum1 = W(0, 1017420011, 1011471785, 1029223422, -1116040414) + W(1, 1017123181, 1016511669, 1014201033, 1019976613) + + W(2, -1126437509, 1015478313, 1024110818, -1167731667) + + W(3, 1017846781, -1138042285, 1049638570, -1103217262) + W(4, 1023111893, 1009386661, 999765850, 1040273597) + + W(5, -1090770241, -1087230893, 1030676769, 1023090125) + + W(6, -1162024122, 1016487629, 1029091694, 1046437488) + W(7, -1112046985, 1020460717, 985808522, 1027730222) + + W(8, 1037672698, 1024768280, -1120839802, 1025489318) + W(9, 1019153993, 1010855969, 1027546578, 1028909230) + + W(10, 1023955584, -1134545259, 1011766057, 1025127228) + + W(11, 1025680213, 1017109109, -1128064723, 1027741830); + sum2 = + W(0, 1023774756, -1107003878, 1020767940, -1118294055) + W(1, -1113997093, 1021408408, -1152708847, 1013240776) + + W(2, -1108605887, -1128830540, -1139588328, -1119578529) + + W(3, 1005727232, -1108761818, 1050907301, -1097736561) + + W(4, 1032528025, -1135972104, -1128030280, 1032847770) + W(5, 1058054639, 1008347200, 1039669350, -1131826954) + + W(6, 1004577664, 1024878510, -1106188814, 1049418167) + W(7, -1108856812, 999382680, -1116453887, -1129071264) + + W(8, 1040942692, -1105809360, -1104688291, 1019392776) + + W(9, 1020705336, -1124253692, -1115446820, 1014050712) + + W(10, 1018266740, -1117167612, -1127775332, -1114566712) + + W(11, 1042743894, -1132221182, -1103534695, 1022204104); + WS(1034686080, -1080904524); + sum1 = + W(0, -1139332721, 1025190657, -1143163562, 1041601261) + W(1, 1024768205, -1137907141, -1156631187, 1024127465) + + W(2, 1040892278, 1028605547, -1129308018, 1012089369) + W(3, 1023562901, 1006799241, -1104914606, 1052908885) + + W(4, -1117860929, 1019594656, 1011454089, -1145135178) + + W(5, -1089193318, -1091833281, 1036300940, -1143330794) + + W(6, 1009225011, -1129417722, 1043909393, -1103073573) + W(7, 1040987970, 992909011, 1012327853, 1017495114) + + W(8, -1119873834, 1025246703, 1033652713, -1123933213) + + W(9, 1010687981, 1027561839, -1136185891, -1124345098) + W(10, 1024209623, 1018355139, 1010798725, 1010795083) + + W(11, -1118482716, 1032670633, 1027144528, -1123266333); + sum2 = + W(0, 998154484, -1124228589, -1132108902, -1115676434) + W(1, -1123985162, 1004957466, -1136847690, 1028193069) + + W(2, -1123281782, -1123302060, -1132306691, 1011392625) + + W(3, -1120010648, 1043298286, -1097765474, 1027211577) + + W(4, -1114822183, -1127542967, -1145824866, -1115567961) + + W(5, 1059221182, 1034703777, -1131429597, 1022587458) + W(6, 1015307650, -1106126812, 1048600788, -1099334080) + + W(7, 1029215805, -1127163397, 994166396, -1111174068) + W(8, -1130476352, 1015056080, 1023836215, -1122559367) + + W(9, 1000606426, -1128437454, 1026255089, -1137618020) + + W(10, -1127893362, -1171736302, 1010815409, -1110538383) + + W(11, -1118584150, 1028199647, 1025007180, -1124423270); + WS(-1097173920, -1100403112); + sum1 = W(0, -1133792968, -1126599342, 1026626987, -1109988694) + + W(1, -1128510918, -1124691470, -1124511038, -1134319356) + + W(2, -1112479512, -1122054529, -1138055228, -1131431128) + + W(3, -1133667884, -1113753548, 1051379210, -1097159959) + + W(4, 1031366423, -1128464692, -1126404688, -1113718896) + + W(5, 1058852431, 1058630415, -1108453759, -1122909907) + + W(6, -1129657589, 1034489098, -1097104011, 1049904553) + + W(7, -1111244112, 1006087192, -1123548289, 1017816566) + + W(8, 1007326848, -1104990865, -1129654222, -1138955724) + + W(9, -1134226372, -1122628437, -1112737379, 983139170) + + W(10, -1143321192, -1123473736, -1120375479, 1029275393) + + W(11, -1116837058, -1110311540, -1132471000, -1149064600); + sum2 = W(0, -1133003813, -1145103116, -1105221269, 1033080040) + + W(1, 1016862101, -1129731365, -1170659932, 1024883426) + + W(2, -1117429423, 1028547885, -1128891234, -1147341896) + + W(3, 1006656308, -1122208183, -1098340061, 1042272545) + + W(4, -1121562483, -1121650606, 1031055883, -1101651786) + + W(5, 1055658740, 1058321046, -1100689547, 1031708925) + + W(6, -1122785076, -1107240567, 1035604404, -1112738821) + + W(7, -1115182870, -1123396988, -1138148825, -1137951645) + + W(8, -1131811521, 1003752088, 1026865631, -1133076983) + + W(9, -1134424500, -1131665157, -1130287800, 1015669581) + + W(10, -1129373191, -1131162259, -1131089901, -1116779622) + + W(11, -1123356625, 1033205575, -1134576021, -1127933595); + WS(1049422752, 1064394145); + sum1 = W(0, 1016583527, -1106085006, 995307718, 1042273115) + W(1, -1113049442, 1025810280, 997641734, -1123841888) + + W(2, 1031369872, 1021597381, -1122854832, 1006187755) + + W(3, -1129211865, 1041111742, -1088517333, 1058826428) + + W(4, -1113933244, 1019889767, -1131677043, 1032245856) + + W(5, -1098988005, -1105331685, 1032610296, -1131685097) + + W(6, 1021172552, -1110939130, 1058612208, -1090507155) + + W(7, 1037338632, -1155049030, 1021691141, -1105269375) + + W(8, 1030057089, 1043687978, -1122591528, -1134096210) + + W(9, -1133007562, -1137128282, 1036830720, -1120823228) + + W(10, -1116248270, 1025994697, 1026669144, -1106745812) + + W(11, 1034516890, 1038691348, -1117945591, -1126546729); + sum2 = W(0, 1015668141, -1138201662, -1111996311, -1127284815) + + W(1, -1125087482, 1020174885, -1124041461, -1140877219) + + W(2, -1116450062, -1123578506, 1024732308, -1139064970) + + W(3, 1005775275, 1027346708, -1125910350, -1106280325) + + W(4, 1034158307, -1133423524, 1015274173, 1016303395) + + W(5, -1108948194, 1052974100, 1032925063, -1161498797) + + W(6, -1138139200, -1106503093, -1104963655, 1053021197) + + W(7, -1107449032, -1134898868, 992639399, -1117618841) + W(8, 1031763952, 957951850, 994113735, 1013272790) + + W(9, -1132053353, -1115775134, 1015724405, 1016609913) + + W(10, -1132927280, -1132485274, -1129319398, -1122071744) + + W(11, 1034411590, -1140595900, -1140186580, -1164791981); + WS(-1101497152, -1084603877); + sum1 = + W(0, -1136425045, 1016522037, 967194407, 1019848413) + W(1, -1129523533, -1142614610, -1140218249, -1157845066) + + W(2, 1029505522, -1119357636, -1140249161, -1135395837) + + W(3, -1121565262, 1035402982, 1022903246, 1027088345) + + W(4, -1121932442, -1148904362, -1122160667, 1027884002) + + W(5, -1107598171, 1024422013, -1127296803, 1002411186) + W(6, 1006883159, 1025282390, 1025270942, -1117602990) + + W(7, 1030372258, -1130529549, -1132497425, 1022271101) + + W(8, -1120772739, 1030415880, -1129818261, 1018540973) + + W(9, 1004502690, -1138792353, -1154700189, -1171556244) + + W(10, -1138666305, -1138856043, -1128604789, 995143101) + + W(11, -1128284203, 1025955498, -1121511513, 1011955033); + sum2 = W(0, -1126668299, -1131366283, 1024971228, 1000957181) + + W(1, -1151515419, 1005199725, -1137964827, -1117612139) + + W(2, 1034620123, -1119890411, -1145021381, -1136862175) + + W(3, 1015963121, -1097765254, 1049249869, 1026062254) + W(4, 1001872029, 1007955643, 1030757650, -1083955387) + + W(5, 1064229708, -1107214224, 1026637176, -1125717658) + + W(6, -1137547503, -1103492737, 1047078464, -1122275403) + + W(7, 1027173860, -1169614250, 997720155, -1118797430) + W(8, 1017921725, 1016072153, -1135832789, 923654805) + + W(9, -1132279825, -1131387718, 1024786888, -1133941049) + + W(10, -1148432117, 1002011725, -1152589275, -1140632131) + + W(11, -1144191965, 996433547, -1140699475, 1005736109); + WS(1059552336, -1136539026); + sum1 = + W(0, 990367896, 1041343484, -1096612504, 1033353841) + W(1, -1125599349, 1028944863, 1010957914, 1036710283) + + W(2, -1107358947, 1029016441, -1132821402, 1024290996) + + W(3, -1154541352, 1045269292, -1087221074, 1042554433) + + W(4, -1154580200, 1023892422, 1017372383, -1112141659) + W(5, 1058232297, 1029783110, -1114120867, 1023410731) + + W(6, 1026284586, -1116984235, 1051438086, -1087458720) + W(7, 1033522371, -1144215764, 1015461809, 1018013925) + + W(8, 1047713030, -1095293300, 1032365167, -1144750420) + W(9, 1014364322, 1006339428, 1032067931, -1114380761) + + W(10, 1004597796, 1001346936, 1021777309, 1032228520) + + W(11, 1045851190, -1099415088, 1030006574, -1130073781); + sum2 = W(0, -1153914788, -1101809160, 1052877341, 1046574229) + + W(1, -1095334336, 1023520281, -1126180245, -1115520194) + + W(2, 1022007580, 1000424166, -1113807813, 1021218858) + W(3, 995844276, -1114410922, 1055965696, 1034680258) + + W(4, -1109583292, 1008634443, -1141303142, 1033573989) + + W(5, -1098900400, -1098051352, 1033797491, -1115608949) + W(6, 1026951758, 998799030, 1023481081, 1045079279) + + W(7, 1032986287, 1032307290, 990856044, -1110191966) + W(8, 1023185808, -1106708743, 1025876178, -1128938562) + + W(9, 1004850742, -1129252703, 1031073312, 984863273) + W(10, -1137844345, 1017335440, 1015235936, 1016759632) + + W(11, -1104219784, -1103050031, 1038371038, 1020607644); + WS(-1080660584, -1085825159); + sum1 = + W(0, 1013708199, -1123370319, -1145658646, -1118786339) + W(1, 1028171867, -1144908790, 998525366, -1131079022) + + W(2, -1111041043, 1035331132, 1017605134, -1131113128) + + W(3, 1026247587, -1110742584, 1047524760, -1095527502) + + W(4, 1042485668, -1130744068, 1009982783, -1113918027) + W(5, 1038280501, 1041941518, -1110999603, 992723116) + + W(6, -1136883881, 1032009669, -1096311074, 1051037928) + + W(7, -1106204846, 1025830203, -1128223794, 1025751155) + + W(8, 1042402294, -1106649743, -1132447358, 1017749654) + W(9, 999596614, -1126831290, -1118872454, 1032615945) + + W(10, 1002160934, -1127230527, -1126850910, 1033490448) + + W(11, 1023947050, -1111971999, 971034337, 1018668086); + sum2 = W(0, 988660617, 1017543700, 1015794522, -1133704409) + W(1, 1003471274, -1140119133, -1145776834, 1002138986) + + W(2, 1001599498, 1024621822, -1135257421, -1136500105) + + W(3, -1133422913, 1031822055, 1041494739, -1102581932) + + W(4, 970658596, -1163479081, -1126488793, 1032911160) + W(5, 1056510750, -1089051586, 1026713544, 1009057465) + + W(6, 999416722, 1018658069, 1023998101, -1111744235) + W(7, 945757471, 1000517690, 999055930, 1007351961) + + W(8, -1138508317, 1009295285, 998080468, -1137960905) + W(9, 987033481, -1162261577, 991201876, -1140892226) + + W(10, -1156050276, -1186683976, -1179419172, 999395634) + + W(11, -1141702058, -1147317506, 1007988669, -1146609818); + WS(1064784784, -1120346387); + sum1 = W(0, -1150678408, 1015721531, 1049255678, -1099108228) + + W(1, -1149551256, -1136953142, 1000581420, -1110077251) + + W(2, 1043607805, -1107416484, 1017163947, -1140022794) + + W(3, 1006062348, -1107299655, 1059242626, -1089544734) + + W(4, 1023526494, -1139533474, 1015088861, -1132691862) + + W(5, -1123916922, -1130977491, 1022505321, 1012221798) + + W(6, -1136518116, -1148196556, -1096371932, 1057929313) + + W(7, -1104456865, 1014035238, -1126533711, 1013224070) + + W(8, -1100407642, 1048500643, -1111675367, 1026165050) + + W(9, 1012432222, -1124886999, -1132580564, 1035479729) + + W(10, -1127245287, -1136458552, -1122704190, 1014270588) + + W(11, -1102354822, 1044504531, 1007459698, 1017479699); + sum2 = W(0, -1140771860, 1031694512, -1104948969, -1115570202) + + W(1, 1040745971, -1127298441, -1125513054, -1122230843) + W(2, 993388690, 1042093481, -1111499166, 995262946) + + W(3, -1131667695, 979286214, 1026183534, 1042830623) + W(4, -1119680402, 1002124441, -1131288705, 1025077104) + + W(5, -1111209187, -1112764939, 982469091, -1123012516) + W(6, 978159878, -1108853537, 1041617383, 1043422569) + + W(7, -1120447085, -1129740789, 1012596136, -1102087836) + + W(8, 1045410736, 1034771561, -1109907689, -1125016939) + + W(9, 1011933560, -1117751010, 1030126174, 1014235016) + + W(10, -1127258987, 1004566649, -1121534607, -1113389694) + + W(11, 1044425994, 1025820984, -1115100280, -1119639931); + WS(-1088649680, 1067112300); + + return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); +} + +shared float inp[507]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { temp[pos] = (value); } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 13 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 507; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 13, y = (uint)id % 13; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (3)) + 0.5, float(group_base.y + y - (2)) + 0.5)).x; + } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[12]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 4]; + samples[1][1] = inp[local_pos + 5]; + samples[1][2] = inp[local_pos + 13]; + samples[1][3] = inp[local_pos + 14]; + samples[2][0] = inp[local_pos + 15]; + samples[2][1] = inp[local_pos + 16]; + samples[2][2] = inp[local_pos + 17]; + samples[2][3] = inp[local_pos + 18]; + samples[3][0] = inp[local_pos + 26]; + samples[3][1] = inp[local_pos + 27]; + samples[3][2] = inp[local_pos + 28]; + samples[3][3] = inp[local_pos + 29]; + samples[4][0] = inp[local_pos + 30]; + samples[4][1] = inp[local_pos + 31]; + samples[4][2] = inp[local_pos + 39]; + samples[4][3] = inp[local_pos + 40]; + samples[5][0] = inp[local_pos + 41]; + samples[5][1] = inp[local_pos + 42]; + samples[5][2] = inp[local_pos + 43]; + samples[5][3] = inp[local_pos + 44]; + samples[6][0] = inp[local_pos + 52]; + samples[6][1] = inp[local_pos + 53]; + samples[6][2] = inp[local_pos + 54]; + samples[6][3] = inp[local_pos + 55]; + samples[7][0] = inp[local_pos + 56]; + samples[7][1] = inp[local_pos + 57]; + samples[7][2] = inp[local_pos + 65]; + samples[7][3] = inp[local_pos + 66]; + samples[8][0] = inp[local_pos + 67]; + samples[8][1] = inp[local_pos + 68]; + samples[8][2] = inp[local_pos + 69]; + samples[8][3] = inp[local_pos + 70]; + samples[9][0] = inp[local_pos + 78]; + samples[9][1] = inp[local_pos + 79]; + samples[9][2] = inp[local_pos + 80]; + samples[9][3] = inp[local_pos + 81]; + samples[10][0] = inp[local_pos + 82]; + samples[10][1] = inp[local_pos + 83]; + samples[10][2] = inp[local_pos + 91]; + samples[10][3] = inp[local_pos + 92]; + samples[11][0] = inp[local_pos + 93]; + samples[11][1] = inp[local_pos + 94]; + samples[11][2] = inp[local_pos + 95]; + samples[11][3] = inp[local_pos + 96]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 41]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2) + ivec2(0, 1), ret); +} +//!PASS 2 +//!DESC NNEDI3 (double_x, nns16, win8x6) +//!IN INPUT, temp +//!OUT OUTPUT +//!BLOCK_SIZE 64, 8 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[12]) { + float sum = 0.0, sumsq = 0.0; + [unroll] for (int i = 0; i < 12; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); + sumsq += dot(samples[i], samples[i]); + } + float mstd0 = sum / 48.0; + float mstd1 = sumsq / 48.0 - mstd0 * mstd0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); + mstd1 *= mstd2; + float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = W(0, -1126897990, -1130469888, -1113607518, -1116173177) + + W(1, 1015526727, -1133977224, 990390561, -1122292152) + W(2, 1027745880, -1121396864, 1041026790, 1042195560) + + W(3, 1018714920, 1026239260, -1131068140, 1015308851) + W(4, 1024250604, 1039079928, 1022159130, -1098313415) + + W(5, 1042189511, -1106606352, 1013770942, -1122039043) + W(6, 1024642508, -1107295041, 1044630722, 999141354) + + W(7, -1106681307, 1038936227, -1122507740, 1031978820) + + W(8, -1121959908, -1147395201, -1107136294, 1019497054) + + W(9, 1035880216, -1124106064, -1136584888, -1116330759) + + W(10, -1149906049, -1126556538, 1005058137, 1007702352) + + W(11, -1121374916, 1025050132, -1135809122, 1018900008); + sum2 = W(0, 1017133506, 1023321012, -1119553894, -1112177411) + + W(1, -1165256880, -1133574805, 1020043526, -1135761830) + + W(2, 1011515348, 1029416248, 1057587887, 1071604647) + W(3, 1051025857, 1035052104, 1010374724, 1018728192) + + W(4, -1139818306, -1115999672, -1090489276, -1079392139) + + W(5, -1098617840, -1139515542, -1121583660, 1024878156) + + W(6, -1123730089, 1020129658, -1109933138, -1097028615) + + W(7, -1105405946, -1135392452, -1142174380, 1002597928) + W(8, 996184056, 1015618084, 1016266760, 1028448562) + + W(9, -1155286464, -1138601606, 997185888, -1131188096) + + W(10, -1138856554, 1007066512, -1145378916, 1008681896) + + W(11, 1000343320, 991053648, -1155288808, -1132781834); + WS(1018288640, 1027735986); + sum1 = + W(0, 1012158232, 1006778572, 1025063871, -1156175041) + W(1, 1010747802, -1140348884, 1020162890, -1172026051) + + W(2, -1178449286, -1114624234, -1104570115, 1028919475) + + W(3, 1034856692, -1127457566, -1122825993, -1115773453) + + W(4, 1044498160, 1032943202, 1059928494, -1097612337) + W(5, -1085331503, 1031833306, -1119592595, 1038136595) + + W(6, -1128542910, 1027108853, -1088743921, -1106124541) + W(7, 1059914122, 1032056909, 1033999672, 1027508251) + + W(8, -1115962871, 1017365062, 1032615126, 1026836706) + + W(9, -1114177498, -1122073627, 1022377282, -1129465364) + + W(10, 1002517720, 964628492, -1134936888, -1146238776) + W(11, 1020458158, 1016604174, 998219705, 1023799671); + sum2 = W(0, -1126840972, -1145399745, -1139273136, 991527106) + + W(1, -1135317632, -1139461992, -1129486378, 995542402) + + W(2, -1130460798, 985194115, -1112997847, -1120456092) + W(3, 1001121889, 1017866680, 1006765920, 1000306721) + + W(4, 1019075916, -1120812206, -1139625904, 1043975251) + + W(5, -1150833602, -1128878392, -1133504840, -1142139489) + + W(6, 1017322604, -1129997452, 1042717692, 1051048254) + + W(7, -1121880440, -1112673669, -1126929736, -1114488494) + + W(8, -1131054760, 1006903064, -1114175000, -1113881740) + W(9, 978663174, 1026044394, 1014584312, 1007041936) + + W(10, -1131047996, -1143360737, -1130986946, 1007107280) + + W(11, -1143215153, -1125685806, -1144361281, -1134951296); + WS(1042433344, -1111851638); + sum1 = W(0, -1128612156, -1139940268, -1122042583, 984858240) + W(1, 999630836, -1146630760, -1126512698, 996431920) + + W(2, -1112658226, -1123380939, -1128727592, -1107767030) + + W(3, -1114896432, -1115281716, -1129996802, -1123547845) + + W(4, -1119638967, -1126908022, -1097703246, 1052387104) + + W(5, 1054789077, 1030603948, 1028419819, 1026334318) + W(6, 1043958886, 1033805831, 1057665642, 1046318672) + + W(7, -1095395475, -1117224401, -1123618471, -1113258842) + + W(8, -1120465263, -1115346894, -1104545545, -1108167869) + + W(9, 1029397739, -1163176544, -1117439993, -1134051464) + + W(10, -1128976934, -1142120768, 1005565040, -1148354296) + + W(11, -1133849404, -1117808895, 1013349902, -1120421311); + sum2 = + W(0, 1022431497, -1132240188, 1024165374, -1114822837) + + W(1, -1120729932, -1124855948, -1126342960, -1128289576) + + W(2, -1109389142, -1119299282, -1107432916, 1044244351) + + W(3, 1031006195, 983982854, -1158996358, -1121099750) + W(4, 1004613154, -1139248009, 1041447926, -1108646182) + + W(5, 1047688354, 1009435309, -1122846542, 1036127241) + W(6, 1028727631, -1129989652, 1047487962, -1100679909) + + W(7, -1126089152, 1033956847, -1123334894, 1039673953) + W(8, 1029503922, -1140046689, 1017218352, 1040665470) + + W(9, -1120804126, -1107003694, -1140927562, -1102421772) + + W(10, -1132574761, -1114039002, -1135952741, -1123756570) + + W(11, -1148002498, 1028342876, -1117057946, 1026336008); + WS(1015433728, 1058400049); + sum1 = + W(0, -1139873791, -1133909491, 1023506921, -1230944644) + W(1, -1118840528, 1028378783, -1121050287, 1033465034) + + W(2, 1031161269, 1032977294, -1116372870, 1035249566) + + W(3, -1120831281, -1114963068, 1032892305, -1105610222) + + W(4, -1113693508, -1112917766, -1116140698, -1103376612) + + W(5, 1044830734, -1141442286, 1023234585, 1034039600) + W(6, 1033801204, -1131731326, 1045725159, -1102794347) + + W(7, -1116748777, 1032646513, -1112562780, 1030129285) + W(8, -1119172737, 989007258, -1122523445, 1044071755) + + W(9, 1030473357, 1018738506, 1021910870, -1122899972) + + W(10, -1143910182, 1019358132, 1008313039, -1115540344) + + W(11, -1126204226, -1118552369, 1016154651, -1124368226); + sum2 = W(0, -1138428449, 992976916, -1142924106, 976851025) + W(1, 989093448, 997652548, -1153131756, -1134977059) + + W(2, -1158711528, 1013039401, -1128734961, 1036130613) + + W(3, 1010050489, -1137359275, 990210276, -1138876101) + + W(4, -1124467432, -1130455464, -1113146735, -1097860430) + + W(5, -1108810723, -1122996798, -1140348735, -1127238416) + + W(6, -1140697417, -1123518198, -1099387353, -1077268149) + + W(7, -1091225653, 1032494444, -1115493835, 1018469149) + W(8, 1030243467, 1033499227, 1051222006, 1072898808) + + W(9, 1056060393, 1025590581, 1025171621, 1026307569) + W(10, 1012442941, 975746961, -1122081826, -1117904739) + + W(11, -1131990027, 951236744, 1006284898, -1146863422); + WS(-1143089152, 1030017260); + sum1 = W(0, 1012276081, 1001605962, 1024406997, -1155861797) + + W(1, -1155627981, -1122534699, 1018348791, -1120990241) + + W(2, -1116644609, -1127223379, -1109637089, -1115433381) + + W(3, 1036571679, 1028189701, -1126280255, 1036379833) + W(4, 1019444907, -1119160665, 1048989101, 1044433671) + + W(5, -1098184025, 1039597237, -1117935161, -1136463217) + + W(6, -1124688427, 1035777366, -1098625404, 1006101820) + + W(7, 1048780603, -1104960796, 1029641477, -1111599465) + + W(8, 1029853709, -1136557285, 1038057505, -1111190908) + + W(9, -1112291813, -1130076067, 1012573277, -1154886405) + + W(10, -1130860131, -1130309965, -1130883561, 1009046005) + + W(11, 1025361773, 1018788475, -1125993892, 1020397819); + sum2 = W(0, -1153319600, -1147284080, 1008968960, 995456320) + + W(1, -1140178660, -1146954776, -1142030464, -1170856127) + + W(2, 1008405084, 985822624, -1142311064, 1022276922) + W(3, -1140411728, 1005012008, 1003782736, 1006946188) + + W(4, -1118973116, 1010505984, -1101248908, -1089187936) + W(5, -1109859050, 1017518401, 982409184, 995727232) + + W(6, -1140784820, -1129308604, 1037448945, 1057794596) + W(7, 1029773785, 1015531414, 974134143, 960534268) + + W(8, 1012585128, 1021293048, 1024969278, 1033366347) + W(9, 1024400778, 1007802556, 1003482728, 1009923956) + + W(10, 1010769460, 1001814848, -1160749952, -1123619202) + + W(11, -1136545168, 1000322872, -1152799248, 985284128); + WS(1064472528, -1121594920); + sum1 = W(0, -1142654991, -1143599223, 1014568428, 1010500896) + + W(1, -1124387333, 1020884834, -1131205634, 1026841191) + + W(2, 1027230343, 1032290711, -1136037408, 1002050167) + + W(3, -1122938499, -1120250507, 1025589157, -1110863224) + + W(4, -1112807213, -1113392623, -1115590690, -1113734161) + + W(5, 1038834309, 1029912912, 1019867389, 1031947569) + W(6, 1027061019, 1016010466, 1034098395, -1112872467) + + W(7, -1130883382, 1015162858, -1123484555, 1019435182) + W(8, -1128825126, 991342574, 1008695068, 1027642302) + + W(9, 1013984188, 1015817710, 1015459258, -1129521612) + + W(10, -1164359388, 1014490160, -1148094031, -1127829894) + + W(11, -1138058188, -1124941766, 1008886302, -1130075526); + sum2 = + W(0, 1003807591, -1147429191, 1002635095, -1132089351) + W(1, -1147602519, -1151072125, 964968041, 1011860423) + + W(2, -1154115373, -1139843175, 1018649088, 1016729308) + + W(3, 1024344696, -1155997437, 1001714367, -1141691791) + W(4, 1000124719, 1001833687, 1008095031, -1105992985) + + W(5, 1014141127, -1132427785, -1141957575, -1139390003) + W(6, 1017182228, 1024488826, 1040714709, 1063780536) + + W(7, 1047200342, 1020609216, 1023684454, 1017456200) + + W(8, -1126980607, -1116401990, -1105844805, -1085442794) + + W(9, -1101306502, -1122913939, -1125194898, -1128761080) + + W(10, -1130234859, 987658746, 1013729967, 1024604622) + + W(11, 995366957, -1147894927, -1146690231, -1146063807); + WS(1061878800, -1131153991); + sum1 = + W(0, -1123872727, 1018625288, -1127640224, 1026350045) + W(1, 1013916191, 1018183052, -1145362866, 1016048056) + + W(2, 1015115512, 1031144036, 1036357847, -1108974562) + W(3, -1107191102, 1006433282, 1014427177, 1026198990) + + W(4, -1099302516, -1102371221, -1085394744, 1059569738) + W(5, 1050617832, 1032504563, 1031877738, 1033421596) + + W(6, 1041224340, 1009910425, 1052022073, 1058525661) + + W(7, -1088226291, -1097316565, -1109508096, -1098228398) + + W(8, -1144166978, 1014687697, -1115552350, -1125187302) + W(9, 1037730450, 1040234099, 1015825508, 1035235966) + + W(10, -1171049230, 1022902338, -1132534141, 1016189168) + + W(11, -1123531112, -1127405808, 1018548825, -1137247201); + sum2 = + W(0, -1131301730, 973798558, -1127780866, 1013478572) + W(1, -1160424319, -1135840779, -1164912671, -1138738786) + + W(2, 1031269327, -1131640108, 1013454096, -1109509101) + + W(3, -1117315078, -1131160392, -1145619912, -1127332243) + + W(4, -1127010401, 1028981651, -1149526184, 1051779317) + W(5, 1028380081, -1137527992, 998238336, -1148504424) + + W(6, -1109842974, -1125259759, -1113692773, 1047088883) + + W(7, -1134194124, 1028175261, 1018886164, 1027237057) + + W(8, -1181736700, -1167651134, -1123287814, -1109788940) + + W(9, -1115287133, -1121515979, -1125209194, -1142455024) + + W(10, -1180777340, -1160957999, 993986728, 1020962386) + + W(11, -1136947718, -1138138790, -1152989064, -1123011340); + WS(-1146021888, 1053974589); + sum1 = + W(0, 1029642476, 1013890275, -1176939092, 1022266947) + W(1, -1146466657, 1004183253, -1160650069, -1127783457) + + W(2, -1119368753, -1134074211, 1007708103, 1017736689) + W(3, 1027345005, 1032510570, 1019378973, 1031489314) + + W(4, 1042969521, 1042359026, 1045769551, -1101301107) + + W(5, -1094644679, -1091538585, -1107179580, -1095508207) + + W(6, -1095098901, -1107285127, -1096985546, 1034918881) + W(7, 1050538529, 1051699648, 1036824506, 1048776768) + + W(8, 1046685039, 1031018217, 1036262392, 1003810877) + W(9, -1120828825, 1011534979, -1133351451, 1035618600) + + W(10, 984849429, -1135393367, -1139413615, 1024875117) + + W(11, -1172526890, 1017671961, -1160823333, 1006585957); + sum2 = W(0, 1031363252, 1030774484, 1015165558, 1007437656) + W(1, 1020087968, -1158035650, -1137051446, 1018262350) + + W(2, -1091101506, 1001500224, -1110787951, 965388167) + W(3, 1003188992, 1041338676, -1134903850, 1033269727) + + W(4, 1048232756, -1110436898, 1016237906, 1014973676) + + W(5, -1123006886, -1105090874, -1123217223, -1104724635) + + W(6, 1057852755, -1132290932, 1043794074, 1047525730) + + W(7, 1011818344, -1129296549, 1034851396, -1106365430) + + W(8, -1095952784, -1131305343, -1113356328, -1152923833) + + W(9, -1111245491, -1131940021, -1117639196, 1024945328) + + W(10, 1016290300, -1126601761, 1003743696, 1022650220) + + W(11, 1021501454, 1017537464, -1133259176, 1019937714); + WS(-1077057896, -1083600334); + sum1 = W(0, 1017420011, 1014201033, 1017846781, 999765850) + W(1, -1162024122, 985808522, 1019153993, 1011766057) + + W(2, 1011471785, 1019976613, -1138042285, 1040273597) + W(3, 1016487629, 1027730222, 1010855969, 1025127228) + + W(4, 1029223422, -1126437509, 1049638570, -1090770241) + W(5, 1029091694, 1037672698, 1027546578, 1025680213) + + W(6, -1116040414, 1015478313, -1103217262, -1087230893) + + W(7, 1046437488, 1024768280, 1028909230, 1017109109) + W(8, 1017123181, 1024110818, 1023111893, 1030676769) + + W(9, -1112046985, -1120839802, 1023955584, -1128064723) + + W(10, 1016511669, -1167731667, 1009386661, 1023090125) + + W(11, 1020460717, 1025489318, -1134545259, 1027741830); + sum2 = + W(0, 1023774756, -1152708847, 1005727232, -1128030280) + W(1, 1004577664, -1116453887, 1020705336, -1127775332) + + W(2, -1107003878, 1013240776, -1108761818, 1032847770) + + W(3, 1024878510, -1129071264, -1124253692, -1114566712) + + W(4, 1020767940, -1108605887, 1050907301, 1058054639) + W(5, -1106188814, 1040942692, -1115446820, 1042743894) + + W(6, -1118294055, -1128830540, -1097736561, 1008347200) + + W(7, 1049418167, -1105809360, 1014050712, -1132221182) + + W(8, -1113997093, -1139588328, 1032528025, 1039669350) + + W(9, -1108856812, -1104688291, 1018266740, -1103534695) + + W(10, 1021408408, -1119578529, -1135972104, -1131826954) + + W(11, 999382680, 1019392776, -1117167612, 1022204104); + WS(1034686080, -1080904524); + sum1 = W(0, -1139332721, -1156631187, 1023562901, 1011454089) + W(1, 1009225011, 1012327853, 1010687981, 1010798725) + + W(2, 1025190657, 1024127465, 1006799241, -1145135178) + W(3, -1129417722, 1017495114, 1027561839, 1010795083) + + W(4, -1143163562, 1040892278, -1104914606, -1089193318) + + W(5, 1043909393, -1119873834, -1136185891, -1118482716) + + W(6, 1041601261, 1028605547, 1052908885, -1091833281) + + W(7, -1103073573, 1025246703, -1124345098, 1032670633) + + W(8, 1024768205, -1129308018, -1117860929, 1036300940) + W(9, 1040987970, 1033652713, 1024209623, 1027144528) + + W(10, -1137907141, 1012089369, 1019594656, -1143330794) + + W(11, 992909011, -1123933213, 1018355139, -1123266333); + sum2 = W(0, 998154484, -1136847690, -1120010648, -1145824866) + W(1, 1015307650, 994166396, 1000606426, 1010815409) + + W(2, -1124228589, 1028193069, 1043298286, -1115567961) + + W(3, -1106126812, -1111174068, -1128437454, -1110538383) + + W(4, -1132108902, -1123281782, -1097765474, 1059221182) + + W(5, 1048600788, -1130476352, 1026255089, -1118584150) + + W(6, -1115676434, -1123302060, 1027211577, 1034703777) + + W(7, -1099334080, 1015056080, -1137618020, 1028199647) + + W(8, -1123985162, -1132306691, -1114822183, -1131429597) + + W(9, 1029215805, 1023836215, -1127893362, 1025007180) + + W(10, 1004957466, 1011392625, -1127542967, 1022587458) + + W(11, -1127163397, -1122559367, -1171736302, -1124423270); + WS(-1097173920, -1100403112); + sum1 = W(0, -1133792968, -1124511038, -1133667884, -1126404688) + + W(1, -1129657589, -1123548289, -1134226372, -1120375479) + + W(2, -1126599342, -1134319356, -1113753548, -1113718896) + + W(3, 1034489098, 1017816566, -1122628437, 1029275393) + W(4, 1026626987, -1112479512, 1051379210, 1058852431) + + W(5, -1097104011, 1007326848, -1112737379, -1116837058) + + W(6, -1109988694, -1122054529, -1097159959, 1058630415) + + W(7, 1049904553, -1104990865, 983139170, -1110311540) + + W(8, -1128510918, -1138055228, 1031366423, -1108453759) + + W(9, -1111244112, -1129654222, -1143321192, -1132471000) + + W(10, -1124691470, -1131431128, -1128464692, -1122909907) + + W(11, 1006087192, -1138955724, -1123473736, -1149064600); + sum2 = W(0, -1133003813, -1170659932, 1006656308, 1031055883) + + W(1, -1122785076, -1138148825, -1134424500, -1131089901) + + W(2, -1145103116, 1024883426, -1122208183, -1101651786) + + W(3, -1107240567, -1137951645, -1131665157, -1116779622) + + W(4, -1105221269, -1117429423, -1098340061, 1055658740) + + W(5, 1035604404, -1131811521, -1130287800, -1123356625) + + W(6, 1033080040, 1028547885, 1042272545, 1058321046) + W(7, -1112738821, 1003752088, 1015669581, 1033205575) + + W(8, 1016862101, -1128891234, -1121562483, -1100689547) + + W(9, -1115182870, 1026865631, -1129373191, -1134576021) + + W(10, -1129731365, -1147341896, -1121650606, 1031708925) + + W(11, -1123396988, -1133076983, -1131162259, -1127933595); + WS(1049422752, 1064394145); + sum1 = W(0, 1016583527, 997641734, -1129211865, -1131677043) + W(1, 1021172552, 1021691141, -1133007562, 1026669144) + + W(2, -1106085006, -1123841888, 1041111742, 1032245856) + + W(3, -1110939130, -1105269375, -1137128282, -1106745812) + + W(4, 995307718, 1031369872, -1088517333, -1098988005) + W(5, 1058612208, 1030057089, 1036830720, 1034516890) + + W(6, 1042273115, 1021597381, 1058826428, -1105331685) + + W(7, -1090507155, 1043687978, -1120823228, 1038691348) + + W(8, -1113049442, -1122854832, -1113933244, 1032610296) + + W(9, 1037338632, -1122591528, -1116248270, -1117945591) + + W(10, 1025810280, 1006187755, 1019889767, -1131685097) + + W(11, -1155049030, -1134096210, 1025994697, -1126546729); + sum2 = + W(0, 1015668141, -1124041461, 1005775275, 1015274173) + W(1, -1138139200, 992639399, -1132053353, -1129319398) + + W(2, -1138201662, -1140877219, 1027346708, 1016303395) + + W(3, -1106503093, -1117618841, -1115775134, -1122071744) + + W(4, -1111996311, -1116450062, -1125910350, -1108948194) + + W(5, -1104963655, 1031763952, 1015724405, 1034411590) + + W(6, -1127284815, -1123578506, -1106280325, 1052974100) + W(7, 1053021197, 957951850, 1016609913, -1140595900) + + W(8, -1125087482, 1024732308, 1034158307, 1032925063) + W(9, -1107449032, 994113735, -1132927280, -1140186580) + + W(10, 1020174885, -1139064970, -1133423524, -1161498797) + + W(11, -1134898868, 1013272790, -1132485274, -1164791981); + WS(-1101497152, -1084603877); + sum1 = + W(0, -1136425045, -1140218249, -1121565262, -1122160667) + + W(1, 1006883159, -1132497425, 1004502690, -1128604789) + W(2, 1016522037, -1157845066, 1035402982, 1027884002) + + W(3, 1025282390, 1022271101, -1138792353, 995143101) + W(4, 967194407, 1029505522, 1022903246, -1107598171) + + W(5, 1025270942, -1120772739, -1154700189, -1128284203) + + W(6, 1019848413, -1119357636, 1027088345, 1024422013) + W(7, -1117602990, 1030415880, -1171556244, 1025955498) + + W(8, -1129523533, -1140249161, -1121932442, -1127296803) + + W(9, 1030372258, -1129818261, -1138666305, -1121511513) + + W(10, -1142614610, -1135395837, -1148904362, 1002411186) + + W(11, -1130529549, 1018540973, -1138856043, 1011955033); + sum2 = + W(0, -1126668299, -1137964827, 1015963121, 1030757650) + W(1, -1137547503, 997720155, -1132279825, -1152589275) + + W(2, -1131366283, -1117612139, -1097765254, -1083955387) + + W(3, -1103492737, -1118797430, -1131387718, -1140632131) + + W(4, 1024971228, 1034620123, 1049249869, 1064229708) + W(5, 1047078464, 1017921725, 1024786888, -1144191965) + + W(6, 1000957181, -1119890411, 1026062254, -1107214224) + W(7, -1122275403, 1016072153, -1133941049, 996433547) + + W(8, -1151515419, -1145021381, 1001872029, 1026637176) + + W(9, 1027173860, -1135832789, -1148432117, -1140699475) + + W(10, 1005199725, -1136862175, 1007955643, -1125717658) + + W(11, -1169614250, 923654805, 1002011725, 1005736109); + WS(1059552336, -1136539026); + sum1 = W(0, 990367896, 1010957914, -1154541352, 1017372383) + W(1, 1026284586, 1015461809, 1014364322, 1021777309) + + W(2, 1041343484, 1036710283, 1045269292, -1112141659) + W(3, -1116984235, 1018013925, 1006339428, 1032228520) + + W(4, -1096612504, -1107358947, -1087221074, 1058232297) + + W(5, 1051438086, 1047713030, 1032067931, 1045851190) + W(6, 1033353841, 1029016441, 1042554433, 1029783110) + + W(7, -1087458720, -1095293300, -1114380761, -1099415088) + + W(8, -1125599349, -1132821402, -1154580200, -1114120867) + + W(9, 1033522371, 1032365167, 1004597796, 1030006574) + W(10, 1028944863, 1024290996, 1023892422, 1023410731) + + W(11, -1144215764, -1144750420, 1001346936, -1130073781); + sum2 = W(0, -1153914788, -1126180245, 995844276, -1141303142) + W(1, 1026951758, 990856044, 1004850742, 1015235936) + + W(2, -1101809160, -1115520194, -1114410922, 1033573989) + + W(3, 998799030, -1110191966, -1129252703, 1016759632) + W(4, 1052877341, 1022007580, 1055965696, -1098900400) + + W(5, 1023481081, 1023185808, 1031073312, -1104219784) + W(6, 1046574229, 1000424166, 1034680258, -1098051352) + + W(7, 1045079279, -1106708743, 984863273, -1103050031) + + W(8, -1095334336, -1113807813, -1109583292, 1033797491) + + W(9, 1032986287, 1025876178, -1137844345, 1038371038) + + W(10, 1023520281, 1021218858, 1008634443, -1115608949) + + W(11, 1032307290, -1128938562, 1017335440, 1020607644); + WS(-1080660584, -1085825159); + sum1 = W(0, 1013708199, 998525366, 1026247587, 1009982783) + W(1, -1136883881, -1128223794, 999596614, -1126850910) + + W(2, -1123370319, -1131079022, -1110742584, -1113918027) + + W(3, 1032009669, 1025751155, -1126831290, 1033490448) + + W(4, -1145658646, -1111041043, 1047524760, 1038280501) + + W(5, -1096311074, 1042402294, -1118872454, 1023947050) + + W(6, -1118786339, 1035331132, -1095527502, 1041941518) + + W(7, 1051037928, -1106649743, 1032615945, -1111971999) + + W(8, 1028171867, 1017605134, 1042485668, -1110999603) + W(9, -1106204846, -1132447358, 1002160934, 971034337) + + W(10, -1144908790, -1131113128, -1130744068, 992723116) + + W(11, 1025830203, 1017749654, -1127230527, 1018668086); + sum2 = W(0, 988660617, -1145776834, -1133422913, -1126488793) + W(1, 999416722, 999055930, 987033481, -1179419172) + + W(2, 1017543700, 1002138986, 1031822055, 1032911160) + W(3, 1018658069, 1007351961, -1162261577, 999395634) + + W(4, 1015794522, 1001599498, 1041494739, 1056510750) + W(5, 1023998101, -1138508317, 991201876, -1141702058) + + W(6, -1133704409, 1024621822, -1102581932, -1089051586) + + W(7, -1111744235, 1009295285, -1140892226, -1147317506) + + W(8, 1003471274, -1135257421, 970658596, 1026713544) + W(9, 945757471, 998080468, -1156050276, 1007988669) + + W(10, -1140119133, -1136500105, -1163479081, 1009057465) + + W(11, 1000517690, -1137960905, -1186683976, -1146609818); + WS(1064784784, -1120346387); + sum1 = + W(0, -1150678408, 1000581420, 1006062348, 1015088861) + W(1, -1136518116, -1126533711, 1012432222, -1122704190) + + W(2, 1015721531, -1110077251, -1107299655, -1132691862) + + W(3, -1148196556, 1013224070, -1124886999, 1014270588) + W(4, 1049255678, 1043607805, 1059242626, -1123916922) + + W(5, -1096371932, -1100407642, -1132580564, -1102354822) + + W(6, -1099108228, -1107416484, -1089544734, -1130977491) + + W(7, 1057929313, 1048500643, 1035479729, 1044504531) + W(8, -1149551256, 1017163947, 1023526494, 1022505321) + + W(9, -1104456865, -1111675367, -1127245287, 1007459698) + + W(10, -1136953142, -1140022794, -1139533474, 1012221798) + + W(11, 1014035238, 1026165050, -1136458552, 1017479699); + sum2 = W(0, -1140771860, -1125513054, -1131667695, -1131288705) + + W(1, 978159878, 1012596136, 1011933560, -1121534607) + W(2, 1031694512, -1122230843, 979286214, 1025077104) + + W(3, -1108853537, -1102087836, -1117751010, -1113389694) + + W(4, -1104948969, 993388690, 1026183534, -1111209187) + W(5, 1041617383, 1045410736, 1030126174, 1044425994) + + W(6, -1115570202, 1042093481, 1042830623, -1112764939) + W(7, 1043422569, 1034771561, 1014235016, 1025820984) + + W(8, 1040745971, -1111499166, -1119680402, 982469091) + + W(9, -1120447085, -1109907689, -1127258987, -1115100280) + + W(10, -1127298441, 995262946, 1002124441, -1123012516) + + W(11, -1129740789, -1125016939, 1004566649, -1119639931); + WS(-1088649680, 1067112300); + + return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); +} + +shared float inp[555]; + +#define CURRENT_PASS 2 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define temp_tex(pos) (float(texture(temp, pos).x)) +static const float2 temp_size = float2(GetInputSize().x * 1, GetInputSize().y * 2); +static const float2 temp_pt = float2(1.0 / (temp_size.x), 1.0 / (temp_size.y)); + +#define HOOKED_tex(pos) temp_tex(pos) +#define HOOKED_size temp_size +#define HOOKED_pt temp_pt + +void Pass2(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 555; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 15, y = (uint)id % 15; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (2)) + 0.5, float(group_base.y + y - (3)) + 0.5)).x; + } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[12]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 4]; + samples[1][1] = inp[local_pos + 5]; + samples[1][2] = inp[local_pos + 6]; + samples[1][3] = inp[local_pos + 7]; + samples[2][0] = inp[local_pos + 15]; + samples[2][1] = inp[local_pos + 16]; + samples[2][2] = inp[local_pos + 17]; + samples[2][3] = inp[local_pos + 18]; + samples[3][0] = inp[local_pos + 19]; + samples[3][1] = inp[local_pos + 20]; + samples[3][2] = inp[local_pos + 21]; + samples[3][3] = inp[local_pos + 22]; + samples[4][0] = inp[local_pos + 30]; + samples[4][1] = inp[local_pos + 31]; + samples[4][2] = inp[local_pos + 32]; + samples[4][3] = inp[local_pos + 33]; + samples[5][0] = inp[local_pos + 34]; + samples[5][1] = inp[local_pos + 35]; + samples[5][2] = inp[local_pos + 36]; + samples[5][3] = inp[local_pos + 37]; + samples[6][0] = inp[local_pos + 45]; + samples[6][1] = inp[local_pos + 46]; + samples[6][2] = inp[local_pos + 47]; + samples[6][3] = inp[local_pos + 48]; + samples[7][0] = inp[local_pos + 49]; + samples[7][1] = inp[local_pos + 50]; + samples[7][2] = inp[local_pos + 51]; + samples[7][3] = inp[local_pos + 52]; + samples[8][0] = inp[local_pos + 60]; + samples[8][1] = inp[local_pos + 61]; + samples[8][2] = inp[local_pos + 62]; + samples[8][3] = inp[local_pos + 63]; + samples[9][0] = inp[local_pos + 64]; + samples[9][1] = inp[local_pos + 65]; + samples[9][2] = inp[local_pos + 66]; + samples[9][3] = inp[local_pos + 67]; + samples[10][0] = inp[local_pos + 75]; + samples[10][1] = inp[local_pos + 76]; + samples[10][2] = inp[local_pos + 77]; + samples[10][3] = inp[local_pos + 78]; + samples[11][0] = inp[local_pos + 79]; + samples[11][1] = inp[local_pos + 80]; + samples[11][2] = inp[local_pos + 81]; + samples[11][3] = inp[local_pos + 82]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 33]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1) + ivec2(1, 0), ret); +} diff --git a/src/Effects/NNEDI3/NNEDI3_nns256_win8x4.hlsl b/src/Effects/NNEDI3/NNEDI3_nns256_win8x4.hlsl new file mode 100644 index 000000000..ca90b7a87 --- /dev/null +++ b/src/Effects/NNEDI3/NNEDI3_nns256_win8x4.hlsl @@ -0,0 +1,7880 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: nnedi3.py --nns 256 --win 8x4 --use-compute-shader --use-magpie +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME NNEDI3_256_4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 1 * 2 +//!HEIGHT INPUT_HEIGHT * 2 * 1 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!FORMAT R16_FLOAT +//!WIDTH INPUT_WIDTH * 1 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D temp; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_temp; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 2 + +//!PASS 1 +//!DESC NNEDI3 (double_y, nns256, win8x4) +//!IN INPUT +//!OUT temp +//!BLOCK_SIZE 32, 16 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[8]) { + float sum = 0.0, sumsq = 0.0; + [unroll] for (int i = 0; i < 8; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); + sumsq += dot(samples[i], samples[i]); + } + float mstd0 = sum / 32.0; + float mstd1 = sumsq / 32.0 - mstd0 * mstd0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); + mstd1 *= mstd2; + float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = W(0, 1024871952, -1091961907, 1049937838, 1023328805) + W(1, 1026430476, -1103990249, 1052012307, 994719130) + + W(2, 1041094851, -1086639391, 1057524239, -1110763811) + + W(3, 992987016, -1085365048, 1063527595, -1102193240) + W(4, 1034372511, -1087585419, 1061081007, 1046466539) + + W(5, 1007997243, -1093414958, 1057188600, -1131716095) + + W(6, 1038248733, -1100793414, -1104832955, 1042167685) + + W(7, 1009484684, -1099588502, -1114088341, 1037858632); + sum2 = W(0, -1122170202, -1115075619, -1113824159, 1016746290) + + W(1, -1117362461, 1050294809, 1022334174, 1016781702) + + W(2, -1125932452, 1041506545, -1098709995, 1031697879) + + W(3, -1100385246, -1100233118, -1104291570, 1033455014) + + W(4, 1020071642, -1100362988, 1072797337, 1050444141) + W(5, 1025043913, 1047005177, 1053652830, -1106922881) + + W(6, 1017078274, 1012082941, -1100425628, -1081909907) + + W(7, 1018043750, -1112302377, -1155649364, -1086585813); + WS(-1096195455, 1061976972); + sum1 = + W(0, -1107583849, -1094469481, 1057261454, 1032308806) + W(1, 1045016510, -1104061589, 1051975353, 1046697387) + + W(2, -1102048399, -1082981042, 1058403546, -1093320254) + + W(3, 1050507709, -1086991635, 1061386858, 1051876218) + + W(4, -1138600128, -1087760032, 1052980216, -1106246183) + + W(5, 1004076226, -1088999402, 1052828202, -1112945129) + W(6, 1049677870, -1111975782, 1055128500, 1045880594) + + W(7, -1106918697, -1089081125, 1048525839, -1116677906); + sum2 = + W(0, 976892690, -1095110912, -1080663089, 1075827337) + W(1, 1043646796, -1114450783, -1074060607, 1075290035) + + W(2, -1131052969, 1052886335, -1068434194, 1070007941) + W(3, 1047930742, -1113296119, 1048011859, 1046007784) + + W(4, -1099144563, -1088067101, 1042920440, -1090506241) + + W(5, 1035354913, 1040578653, 1058047003, -1099371585) + + W(6, -1115191227, -1102673565, 1057110709, -1096675048) + + W(7, -1111106463, 1051335047, 1038060178, -1096378099); + WS(-1095930303, -1128843438); + sum1 = W(0, -1113225514, 1000563038, 1038763268, -1104281110) + + W(1, 1030842449, -1103461662, 1052682210, -1105449143) + + W(2, -1120882464, -1098779087, 1055587560, -1106219642) + + W(3, -1111398934, -1156245815, 1046999855, -1105879965) + + W(4, -1098488134, 1057689814, -1090605975, 1039743648) + + W(5, -1098728413, 1058114717, -1106232348, 1030902068) + + W(6, -1111237296, 1041725787, -1106563184, -1111582036) + + W(7, -1104159890, 1046074787, -1119168797, 1033984854); + sum2 = W(0, -1109947141, -1078961084, 1053833634, 1049299145) + + W(1, -1087141194, -1081851502, -1102894339, 1040634467) + + W(2, 1007133897, 1057831088, 1046573470, 1035108031) + W(3, -1099413759, 1067440534, 1058568743, -1114324751) + + W(4, 1026690396, -1095618356, 1058025266, -1094447449) + + W(5, 1045509921, 1022398658, -1115002147, 1023241606) + W(6, -1117657274, 1029712906, -1109832065, 987652806) + + W(7, 1051942463, 1049871690, -1099467737, -1104714655); + WS(1018627052, -1077328537); + sum1 = W(0, -1107592280, -1098360952, 1054425801, -1121256842) + + W(1, 1011043872, -1103889731, 1045838144, 1016847523) + + W(2, -1138466445, -1088654431, 1063939041, -1114759963) + + W(3, 1041422472, -1079526160, 1057394281, -1109966789) + + W(4, -1112294435, -1109020730, 1061143403, -1115564074) + + W(5, 1012707679, -1095389295, 1057286118, -1117689671) + + W(6, -1150731309, -1102056896, 1040465365, -1133534770) + + W(7, -1118891707, -1101414784, 1042544156, 1028492087); + sum2 = + W(0, -1161717665, 1033867626, 1028201267, -1124228530) + W(1, -1138679732, -1115218580, -1096378423, 1029766795) + + W(2, -1121504639, 1030621655, 1071708903, -1128256742) + W(3, 1050854432, -1096361601, 1079482342, 1035298280) + + W(4, -1106511096, 1044499063, -1064184100, 1035336882) + + W(5, 1030373815, -1104067382, -1089118805, -1105361644) + W(6, 980628162, 1040061476, 1048273255, -1119150405) + + W(7, 1031200951, -1122098343, -1109436715, 1035049634); + WS(1045693694, 1057374369); + sum1 = W(0, 1048369598, 1034312519, -1131023893, -1099588978) + + W(1, -1104752283, 1045853799, -1098373234, 1019902402) + + W(2, 1031950009, 1057046586, -1095062180, -1105510198) + + W(3, -1123958928, 1065585622, -1089164628, 1033283356) + + W(4, -1103831441, 1061666460, -1092432965, -1115031649) + + W(5, -1106189273, 1060939321, -1087345809, -1122647270) + + W(6, -1103850789, 1050689810, -1092965100, -1126666680) + + W(7, -1110300085, 1060417841, -1091010256, -1103574093); + sum2 = W(0, -1087427905, -1098709872, 1072153031, -1089481634) + + W(1, 1058683530, -1099141199, -1088885778, 1056590706) + + W(2, -1081734921, 1032637312, 1061509476, -1092495010) + W(3, 1050776004, 1072181481, 1071890032, 1048814690) + + W(4, 1042479954, 1036599206, 1052304550, -1095810808) + + W(5, 1025544982, -1080357543, -1080881103, -1097308566) + + W(6, -1105492903, -1113642582, 1050862866, -1094462248) + + W(7, 1058740427, -1086527647, -1078556244, 1039671439); + WS(-1081549664, 1060153996); + sum1 = W(0, 992286043, 1063391104, -1087586274, -1105647870) + + W(1, -1115318586, 1055271797, -1095899708, 1025423024) + + W(2, -1105782456, 1062019397, -1085498120, 1033921889) + + W(3, 1026471870, 1061503832, -1089250259, -1113405924) + + W(4, -1114468402, 1058772501, -1090055577, -1113674825) + + W(5, 1037910210, 1045594046, -1102496824, -1106446561) + + W(6, -1130210466, 1049908468, -1103786988, -1118412923) + + W(7, 1043803882, 1040755007, -1104619012, -1096097592); + sum2 = W(0, 1053790549, -1080860351, -1068185653, -1082483044) + + W(1, -1110257416, -1097530578, -1077346152, -1088866791) + + W(2, 1033688102, 1041061421, 1052310674, 1052658871) + W(3, -1102619530, 1064966059, 1072556372, -1104178634) + + W(4, -1117483359, 1050344847, 1075004369, -1108410079) + + W(5, -1098152061, 1040479494, 1064749402, 1040931499) + + W(6, -1105743952, 1051151965, -1110156824, -1137891932) + + W(7, -1083553679, 1065962098, 1045213317, 1053124980); + WS(-1079020096, -1097079011); + sum1 = W(0, -1109501731, -1096742621, 1057943754, 1016000225) + + W(1, 1033910555, -1092191510, 1055662529, 1020967408) + + W(2, 1015347636, -1089091160, 1059647341, -1102027816) + + W(3, -1108027795, -1087072713, 1068092504, -1109882648) + + W(4, 1042979875, -1087558450, 1059824411, -1106889398) + + W(5, 1019929124, -1089417126, -1117499817, -1117366414) + + W(6, 1040369234, -1104831498, 1043480574, -1104827345) + + W(7, -1134559750, -1091818621, 1050143023, 1049289276); + sum2 = W(0, 1037631964, -1097479797, 1051023588, 1045348435) + + W(1, -1105851814, 1051380185, -1097950410, 1028003104) + + W(2, 1036652885, 1028410699, 1034374804, -1113269608) + W(3, -1110757790, 1044594400, 1053237761, 1041832368) + + W(4, 1024508007, 1041068006, -1132209182, 1033918380) + + W(5, -1139201564, 1046036650, -1088293598, -1089693526) + + W(6, -1113225064, 1054960427, -1081781977, -1099925151) + + W(7, -1104657620, -1105334641, 1065714224, -1106359824); + WS(-1087384991, 1052875812); + sum1 = W(0, 1041733252, -1109905397, 1041497057, -1113613058) + + W(1, -1138647561, -1104994285, 1032882549, -1117825756) + + W(2, 1034391503, -1107522048, 1050093354, -1108789794) + + W(3, -1106477875, 1057597757, -1091247383, 1028766931) + + W(4, -1100509868, 1062613266, -1087942224, 1042722079) + + W(5, -1098694249, 1055828187, -1090201627, 1013355333) + + W(6, -1110959830, 1052485297, -1098203032, 1034047139) + + W(7, -1120730421, 1044647290, -1100016695, 1038812681); + sum2 = W(0, 1019350107, 1041831066, 1051650697, -1103332568) + + W(1, -1111428867, -1130655130, -1112274143, -1077765094) + + W(2, 1017374087, -1138336400, -1092155273, -1073481211) + + W(3, -1112005123, 1035402260, -1114034096, -1087514353) + + W(4, 1028700445, -1116945124, 1045953881, 1069095754) + W(5, -1112772903, 1041636268, 1041190902, 1074086485) + + W(6, 1027823157, -1107238338, 1049171133, 1057277153) + W(7, 999439052, 1033528252, 1027833254, -1107727987); + WS(1050457727, 1041689780); + sum1 = + W(0, -1131312139, 992358355, -1092744895, 1048008788) + W(1, -1116681402, 1014710691, 1027381214, 1036450832) + + W(2, 1030961105, 1040368912, -1089348299, 1053966377) + W(3, -1140959985, -1105027369, 1050737138, 933247243) + + W(4, 1046496193, -1089290871, 1038810352, 1043432004) + W(5, 1048638526, -1090315138, 1055368676, -1119062181) + + W(6, 1007270019, -1098770776, 1040416748, 1023583738) + W(7, 1049181764, -1086147588, 1051301702, 1037141075); + sum2 = + W(0, -1107193278, 1057496780, 1072935738, -1080570956) + W(1, 1019433289, 1031729137, -1080020989, 1054527180) + + W(2, 1039188525, 1037283932, -1078950448, -1096917141) + + W(3, -1123892633, -1086728254, -1087453268, 1050123895) + + W(4, 1039851328, -1087309268, 1051352061, 1053948249) + W(5, 1032631611, -1092570464, 1059438018, 1042769096) + + W(6, 1044641782, 1035754242, 1051006721, 1029707984) + W(7, -1097026795, 1063903066, 1056300653, -1106545458); + WS(-1084263519, -1088250567); + sum1 = + W(0, -1107101246, 1048954180, -1118116730, -1111017296) + W(1, 1049480649, -1096698757, 1027018719, 1043604572) + + W(2, -1097447514, 1059331526, -1108150329, -1098833704) + + W(3, 1049647488, -1119095878, -1090501921, 1050343697) + + W(4, -1099235545, 1040553109, 1044929028, -1096264890) + W(5, 1018612505, 1041330418, -1094468036, 1044030041) + + W(6, -1103944776, 1040723088, 1047463532, -1121471547) + + W(7, 1032447885, 1023387386, -1106285556, -1175819022); + sum2 = W(0, 991719940, -1157381748, -1100952734, 1029794653) + W(1, 1034086314, -1121095386, 1027786887, 1016292894) + + W(2, 1055828179, -1103829652, -1085350121, 1067178121) + + W(3, 1034571486, -1100672658, -1118546758, -1098007098) + + W(4, 1049088345, -1090417467, 1049271738, 1008607005) + + W(5, -1098363481, 1050795943, -1140682323, -1110134488) + + W(6, -1111430799, -1113801830, 1035813474, -1115483467) + + W(7, -1111365517, 1037803588, -1164121191, -1133015817); + WS(1066054400, -1125664425); + sum1 = W(0, 1015260777, 1054574294, -1095241461, -1127045294) + + W(1, -1108545200, 1053284635, -1094415406, -1111343263) + + W(2, 1023624464, 1059735702, -1088764634, 1041542629) + + W(3, -1109354663, 1058150961, -1078845388, -1130514988) + + W(4, 1023465234, 1058102002, -1090401103, 1035892576) + + W(5, -1108921531, 1063397581, -1105651068, -1136829776) + + W(6, -1116445024, 1046571067, -1116790272, 1030749305) + + W(7, -1128865616, 1046728251, -1104354311, -1106930027); + sum2 = W(0, 1041565398, -1104697837, -1115419200, -1158883614) + + W(1, -1103469615, 1029373537, 1031260861, 1035036640) + W(2, 1029690269, 1057447179, 1026403525, 1034486240) + + W(3, 1035794320, 1080145714, 1043319440, 1041655252) + W(4, 1035452008, -1103623095, 1050811672, -1114789346) + + W(5, -1107146771, -1066097425, -1103531154, -1114794272) + + W(6, -1124704946, -1097535851, -1090176537, 1036709484) + + W(7, 1040850628, -1103109309, 1055196268, -1102936938); + WS(-1087901375, 1071177135); + sum1 = + W(0, -1121594272, -1110224028, 1030219348, 994425384) + W(1, -1122618691, -1109965218, 1047787782, 1027034090) + + W(2, -1114434254, -1120206908, -1114914192, -1106192913) + + W(3, 1044010486, 1010935165, -1113546820, 1040356168) + + W(4, -1113833999, -1108541318, 1046030645, -1112701798) + + W(5, -1108159437, 1041417229, 1035224125, -1130438655) + W(6, 997526195, -1109330685, 1017890711, -1151173965) + + W(7, -1123472883, 1039497451, 1022061139, -1125745551); + sum2 = W(0, 1042217679, -1116367239, 1026421701, -1140328609) + + W(1, -1108814485, 1042859710, 1027395369, -1114630403) + + W(2, 1058334461, -1078638996, 1028343990, -1105295199) + + W(3, -1080767453, 1073929687, -1097310248, 1028261520) + + W(4, 1050991857, -1092080339, 1042808122, -1122297114) + + W(5, 1030602271, -1116721722, 1051491707, -1105219597) + W(6, 1034906705, -1106162437, 1024141024, 997359482) + + W(7, -1127683613, 1035114111, -1112071525, 1017171678); + WS(1066545696, -1154623394); + sum1 = + W(0, -1105442505, -1114054203, 1057182265, -1123432545) + W(1, -1115505461, -1098488296, 1049322687, 1010507424) + + W(2, -1102287846, -1092352928, 1062190966, -1097309632) + + W(3, 1023914903, -1089108436, 1059635472, 1045294808) + W(4, 1021900696, -1087887754, 1057132007, -1098559779) + + W(5, 1016559928, -1096061654, 1043675118, 1042399967) + + W(6, -1142506442, -1097827287, 1048482034, -1110816589) + + W(7, -1129232035, -1095543703, 1055016957, 1026256390); + sum2 = + W(0, -1094410020, 1040526244, 1038890507, -1120085021) + W(1, -1073520568, -1119660436, -1105577163, 1014274624) + + W(2, -1076806727, 1072065972, 1038803075, -1119386280) + W(3, 1007932024, 1074284701, -1096715819, 1032586275) + + W(4, -1129053184, 1045336233, 1048625414, -1148623280) + + W(5, -1126774165, -1114285571, -1106066396, 1033069015) + + W(6, 1016297320, 1027498382, 1035667090, -1113847206) + + W(7, 1026440100, -1109537500, -1128514892, 1028560484); + WS(1055250687, -1106970626); + sum1 = W(0, 1033119688, 1042190962, -1096546392, -1117586055) + + W(1, -1105149829, 1055784922, -1103230410, -1125652468) + + W(2, 1043588463, 1039447718, -1093610964, 1035816565) + + W(3, -1098690473, 1060328969, -1094495495, -1103963220) + + W(4, 1051357302, -1105877415, -1104321127, 1047255951) + + W(5, -1111820283, 1050469874, -1102978667, -1108600424) + + W(6, 1036109552, -1123255849, 1034127854, -1157028943) + + W(7, -1119401059, 1044861546, -1097968467, 1018776031); + sum2 = W(0, 1038757267, 1030211853, 1033894636, -1133782171) + + W(1, -1111076574, -1147844455, 1039985174, -1117005539) + + W(2, 1036628998, 1002666567, -1092985165, -1150866734) + + W(3, -1107631850, -1080902130, -1084645521, 1049641986) + + W(4, 1045867530, -1089131777, 1069738325, 1051354211) + + W(5, -1126199158, 1067727291, -1096580382, -1100550183) + + W(6, 1034641438, 1050167793, -1100096489, -1104598763) + + W(7, 1014361489, -1114432338, -1100261485, 1033251244); + WS(1063662431, 1022143153); + sum1 = + W(0, -1101665898, 1028101088, 1045825111, -1107336877) + W(1, 1044567949, -1100200043, -1112112432, 1049135824) + + W(2, -1113912242, -1110575390, 1011332621, -1117269540) + + W(3, 1054985008, -1090770105, 1036354680, 1043762234) + W(4, -1089448239, 1042239925, 1053092900, -1090665684) + + W(5, 1031890901, 1034205190, 1020582213, 1045380738) + W(6, -1106608329, 1041117720, -1122250931, -1108114694) + + W(7, 1016341751, 1032506575, 1037368233, 1027635960); + sum2 = + W(0, -1109621945, 995625223, 1033690799, -1127273987) + W(1, 1044140338, -1125871763, -1131300267, 1031533647) + + W(2, 1011973298, 1059194369, 1019603073, -1100093028) + W(3, 1054192939, -1085775903, -1105557074, 1049649037) + + W(4, -1083519810, -1085690186, 1066625604, -1099380904) + + W(5, 1050669158, 1055305067, -1093331249, 1041330252) + W(6, 1015542539, -1115586758, 1025625789, -1109873113) + + W(7, 1007706050, 1024423563, -1163078190, 1021669909); + WS(1065395904, 1046290614); + sum1 = W(0, 996799560, -1125542553, 1038203972, -1122586727) + W(1, 1015596034, 1043656950, -1105153011, 1036793959) + + W(2, -1100668872, 1056396433, -1085640859, 1049715632) + + W(3, -1100927819, 1054862894, -1101762943, 1035554923) + + W(4, 1043886221, -1088911438, 1057024083, -1115152096) + + W(5, 1025322396, -1099487682, 1036429768, -1122121748) + + W(6, 1035443240, -1105531411, 1021072448, 1039652277) + + W(7, -1114934548, -1117673547, 1035195023, 1024299964); + sum2 = W(0, 1029237671, 1017509749, -1104176053, 1032328729) + + W(1, -1133651707, -1140172751, 1038596873, 1037378915) + + W(2, -1114304490, 1030543605, 1050264307, -1104642832) + + W(3, -1114162505, -1092617066, -1093179067, 1049979232) + + W(4, -1125875827, 1051105475, 1071302855, -1110244360) + + W(5, -1112015576, 1033357967, -1090684841, -1085540383) + + W(6, 1015895733, 1011455075, -1100815852, -1135016203) + + W(7, 1035088799, -1105982664, 1003487558, 1015899853); + WS(1058730335, 1038095077); + sum1 = W(0, 1050009523, -1090412329, -1116071407, -1120619710) + + W(1, -1156396202, 1022785342, -1112028282, 1033123659) + W(2, 1045232946, 990514519, 1027669243, -1138222105) + + W(3, -1123440788, 1051849503, 1025171300, 1042856679) + W(4, 994853272, 1050259497, -1091444451, 1043400402) + + W(5, 1029074928, 1048747736, -1091116387, 1046148791) + + W(6, -1116532263, 1040337077, -1095024325, 1031722640) + + W(7, 1016043038, 1047238893, -1085293672, 1050565349); + sum2 = W(0, 1046272294, 1056160584, -1069609928, -1086350256) + + W(1, -1119023958, 1047372879, -1100807597, 1012350161) + + W(2, -1106071503, 1066704996, 1063602974, -1093103672) + + W(3, -1097641458, 1062851774, 1071319304, -1165704932) + + W(4, -1099339001, -1127558738, -1123458905, -1099266547) + + W(5, -1122591395, -1095719050, 1049925865, 1039176837) + + W(6, -1120322169, 1012197993, 1037702687, -1104052164) + + W(7, -1112624405, -1094334166, 1046841945, 1043315186); + WS(-1102208382, 1082454872); + sum1 = W(0, -1121527792, -1097489215, 1044710709, 1024559218) + + W(1, 1019782131, -1109352802, 1043005718, 1025756170) + + W(2, -1111677734, -1089475261, 1059136208, -1098383546) + + W(3, 1042764822, -1094323164, -1113184692, 1034230961) + + W(4, -1098291428, 1057233791, -1102862820, -1121795355) + + W(5, -1109244559, 1055571354, -1113821821, 1034138424) + + W(6, -1122839755, 1036609181, 1016630678, -1148370726) + + W(7, 1023050790, 1023873367, -1122283881, 1021065311); + sum2 = W(0, 1012975921, -1113722375, -1109752074, 1009270385) + + W(1, 1033684352, -1113902527, -1108511603, 1034062718) + + W(2, -1119837718, 1034379158, 1047596894, -1096952633) + W(3, 1048900749, 1065557700, 1065261424, 1042274647) + + W(4, -1088385914, 1033601684, -1110433219, -1110816087) + + W(5, -1106132930, -1078602486, -1113936114, 1038223706) + + W(6, -1107812043, 1048231286, -1106600777, 1037668398) + + W(7, 1047279942, -1117907500, -1110420048, -1111923151); + WS(1055139903, 1066543323); + sum1 = W(0, -1113216706, 1025777755, 1062476912, -1094834883) + + W(1, 1034160554, -1106690282, 1047449287, -1115394087) + + W(2, -1110997976, 1025857819, 1040533578, -1100641176) + + W(3, -1104052311, -1108040417, -1088276389, -1109281747) + + W(4, -1130057219, 1042814916, 1043316038, -1111790927) + + W(5, -1100344024, 1058801014, -1096553156, -1131081926) + + W(6, -1109060119, 1040801319, 1003175286, -1127882001) + + W(7, -1102181302, 1061190298, -1109923206, -1116502963); + sum2 = W(0, 1032900543, -1093611286, -1093105706, -1103368978) + + W(1, 1026043047, -1143136704, -1104881087, 996537094) + + W(2, 1011969739, -1129502699, 1056641171, -1097512177) + W(3, 1005946567, 1065137024, 1062580590, 1038772124) + + W(4, -1104378201, 1034934846, -1137732946, -1108835309) + + W(5, -1106573410, 1044311979, -1102271464, 1035638256) + + W(6, -1149079854, -1128312106, -1106196955, 1028654910) + + W(7, -1098913457, -1107526541, -1097055308, -1144299515); + WS(1037662203, -1095462961); + sum1 = + W(0, 1051208476, 1054332722, -1088537607, 1009988859) + W(1, 1007151360, 1048962332, -1096098779, -1120684294) + + W(2, 1047867778, 1056131053, -1089718811, 1043868050) + W(3, -1120692693, 1060311124, -1086623130, 1008468855) + + W(4, -1102286423, 1053179033, -1088149909, 1024446024) + W(5, 1029685138, 1057524976, -1093380227, 1041242535) + + W(6, -1117174983, 1045179237, -1094989189, -1121112609) + + W(7, 998066935, 1053688986, -1094538289, 1022948968); + sum2 = W(0, -1075642964, 1064397427, 1042055631, 1049259770) + + W(1, -1078284587, 1070403917, 1050515956, -1100572061) + + W(2, -1077677369, 1072178178, -1094922849, 1038272909) + + W(3, -1086975712, 1071930150, -1096417404, -1110188613) + + W(4, 1050575143, -1093947328, -1094640520, 1033007627) + + W(5, -1109971278, -1119680819, 1045435393, -1109186235) + + W(6, 1043550667, -1134729809, -1109099138, 1040318497) + + W(7, 1046375822, -1094009133, -1106076462, 1038207101); + WS(-1091974591, 1028524890); + sum1 = + W(0, -1119527298, 1050666155, -1104886813, -1119869997) + W(1, 1034997099, -1101120961, -1100807815, 1044100139) + + W(2, -1099802012, 1057798952, -1099838668, -1103774309) + + W(3, 1033463662, -1146674958, 1021707158, -1112176434) + + W(4, -1090658116, 1048486316, 1057612011, -1094617942) + W(5, 1044419888, 1054164791, -1093871840, 1036492177) + + W(6, -1099353751, 1033437822, 1029138830, -1113521820) + + W(7, -1110855131, 1053708795, -1104286795, 1028051362); + sum2 = + W(0, 1029285075, -1114745613, 1024121756, -1127985632) + W(1, -1113245689, 1037297246, 1034295695, -1103172653) + + W(2, 1046869988, -1113507597, -1081105309, -1097344586) + + W(3, -1099734421, 1057852752, 1063764387, -1092930743) + W(4, 1045806866, 1031538739, -1093953622, 1039548141) + + W(5, -1099048447, 1019950674, 1060684479, -1113791115) + W(6, 1040440310, 1039903515, -1099720743, 1024790472) + + W(7, -1129472848, -1115367833, 1051222485, -1106398082); + WS(1058893599, -1078409713); + sum1 = + W(0, -1115244150, -1102829117, 1039336887, -1115623287) + W(1, 1036724056, -1100240992, 1053376406, -1114512019) + + W(2, -1098555735, -1094663483, 1053054383, 1041993169) + W(3, 1049246955, -1084866106, 1065100884, 1018575674) + + W(4, 1045222582, -1085064800, 1054590883, -1114852275) + + W(5, -1138283737, -1093243506, 1056764679, -1108377677) + + W(6, 1044966206, -1093950047, 1053059489, -1118180448) + + W(7, -1115412967, -1095580366, 1050610273, -1122449691); + sum2 = W(0, -1130929736, -1101060123, 1045192083, -1105822172) + + W(1, 1009552185, -1090337753, 1061442923, -1103557108) + + W(2, -1084383778, 1072779570, 1050903982, -1106762492) + + W(3, 1050909748, 1053567468, -1089269841, -1117595750) + + W(4, 1049209322, -1079161361, 1029505799, -1111371347) + + W(5, -1096700837, 1057662166, -1094014597, 1025228376) + + W(6, 1051369678, -1097661058, 1046387683, -1138825972) + + W(7, -1129474226, -1112521942, -1105984421, 1030887459); + WS(-1109369595, 1048740969); + sum1 = + W(0, 1040995480, 1051718495, -1091711555, -1125552391) + W(1, 1030414773, 1050319297, -1095167863, 1032958437) + + W(2, -1118646413, 1059003715, -1085356599, 1042985722) + W(3, 1043594790, 1046504696, -1083562713, 1045509569) + + W(4, -1129436350, -1135436979, -1096690237, 1023899916) + + W(5, 1037121811, 1043825845, -1098326935, 1019546753) + W(6, 1030207412, 1049475887, -1120665100, -1130219052) + + W(7, 1043560049, 1058279379, -1098718953, -1105013544); + sum2 = W(0, -1113177829, -1117552581, -1100446453, 1041583669) + + W(1, 1043809265, -1109343151, 1039662186, 1009024316) + + W(2, -1095416462, 1052574118, -1114145263, -1118793923) + + W(3, 1034071291, 1063553374, 1053677817, 1018689733) + W(4, -1120341831, 1070912061, -1101063801, 1031195477) + + W(5, 1043900974, -1112203792, 1042882834, -1105538343) + + W(6, 1020015593, -1091338378, -1094258913, -1114748986) + + W(7, -1119209873, -1072794661, 1055602011, -1109237633); + WS(-1093437503, 1065718162); + sum1 = + W(0, 1037550096, 1049488055, -1103061781, -1114711965) + W(1, -1101135841, 1042594425, 1046266118, -1097873069) + + W(2, 1044878747, 1049041945, -1087470164, 1046063170) + + W(3, -1107014286, 1066142676, -1106438298, -1091362312) + + W(4, 1042111087, -1100146203, -1111606545, 1037494709) + W(5, 1037051057, 1045825417, -1091131027, 1040258368) + + W(6, -1099961615, 1040663590, -1133427370, -1109172298) + + W(7, 1041500301, -1119554198, -1103590594, 1033770775); + sum2 = + W(0, -1106488910, -1123106758, 1050172947, -1124712996) + W(1, 1026201604, -1113590739, -1092748399, 1043391307) + + W(2, 1035711566, -1084666863, -1097703384, -1098783459) + + W(3, 1060425407, 1061776657, -1099741350, 1048423650) + W(4, -1098764525, 1055047321, -1120322586, 1041036039) + + W(5, -1096618367, -1119175762, 1050324229, -1099280352) + + W(6, -1182808622, -1103500958, 1025620288, 1048645442) + + W(7, -1127820908, 1041613778, -1105222946, -1118590758); + WS(1056660607, -1113798601); + sum1 = W(0, -1115305597, 1041913228, 1012656821, 1026504084) + W(1, -1101671294, 1008348404, 998498191, -1151777337) + + W(2, 1016801995, 1031963795, -1102006873, 1032505286) + + W(3, 1039904137, -1093680051, 1057450908, -1108358078) + + W(4, 1037148876, -1089471492, 1049874551, 1031920291) + W(5, 1040453028, 1034024758, -1111681498, 1043455038) + + W(6, -1110559226, -1111400057, -1109457443, -1129849213) + + W(7, -1154296528, 1031051312, 1036430914, -1125050675); + sum2 = W(0, 1042939003, 1024638457, -1123927190, 998901155) + W(1, -1114796094, -1101207597, 1042777565, 1022659417) + + W(2, 1044529628, 1027593065, -1098782283, 1017512357) + + W(3, -1124781020, -1097759574, 1059165645, -1104939928) + + W(4, -1106748864, -1130307786, 1062760553, -1098668376) + + W(5, 1016881133, 1033669310, 1017094385, -1089791762) + + W(6, -1108227590, 1022409637, -1160993898, -1106217568) + + W(7, 1027959261, 1019735589, -1125388747, -1115680109); + WS(1066155712, 1004288134); + sum1 = W(0, 1047132567, 1049267618, -1089062279, -1138510971) + + W(1, 1029697589, 1048612844, -1092380195, 1046969501) + W(2, 1014062303, 1055750611, -1088453014, 1024451359) + + W(3, 1041631965, 1059093756, -1084647474, 1049129552) + W(4, 991515041, 1057479944, -1091963789, 1040560290) + + W(5, -1123217244, 1056993158, -1088742789, 1043007167) + + W(6, -1154741365, 1047138005, -1098495965, -1122630737) + + W(7, 1030265807, 1055767146, -1086886676, -1122583780); + sum2 = W(0, 1011892235, 1040858440, -1127819845, 1010964983) + + W(1, -1111382628, -1103755803, -1109875610, -1106716388) + + W(2, 1037768402, -1125277089, -1096621612, 1032593498) + + W(3, 1032283234, -1084511950, -1071556311, 1040462902) + + W(4, -1114728106, -1090323786, -1071973667, -1100404332) + + W(5, 1009460031, -1159433014, -1099343032, 1040394500) + + W(6, -1112526532, 1053584755, 1076043190, 1007208771) + W(7, 1030203700, 1052886875, 1079690007, 1051792925); + WS(-1086041375, 1049976369); + sum1 = + W(0, -1125608345, -1096855193, 1037693680, 1048607039) + W(1, 1025261033, -1097190986, 1051176741, 1030637974) + + W(2, 1032012444, -1086237883, 1059434222, 1026121224) + W(3, 1046732297, -1083643033, 1062665585, -1103554484) + + W(4, -1114869141, -1093631755, 1052924527, -1110078044) + + W(5, 1036081274, -1095059844, 1052562329, -1109004102) + W(6, 1036645660, -1132519191, 1041010310, 1035782593) + + W(7, -1119140736, -1095180224, 1048775023, -1118200972); + sum2 = + W(0, 1029601330, -1115199196, -1096390757, -1079547524) + + W(1, -1144560396, 1026958814, -1100298556, -1081990625) + W(2, 1032763954, 1032369083, 1076866217, 1041174181) + + W(3, -1115106220, -1106065675, -1108884178, 1041059708) + + W(4, -1111454114, 1042189754, -1116766618, 1024602232) + + W(5, -1121335223, 1029124674, 1025985538, -1127158528) + + W(6, -1109520646, 1022674491, -1115268590, 1007020790) + W(7, 1021645795, 1026253022, 1011072742, 1003377428); + WS(-1111980027, 1060626277); + sum1 = W(0, 1023621139, -1102882983, 1044706422, 1028429765) + + W(1, 1032179266, -1092055397, 1043529367, -1113080214) + + W(2, 1030954414, -1089823274, 1057090380, -1101761443) + + W(3, 1037158526, -1095093232, 1064981515, -1098683411) + + W(4, 1036765152, -1090725010, 1047303429, 1033439719) + W(5, 1036680203, -1139402199, 1042557255, 1030304674) + + W(6, -1146606447, -1116432156, 1028925358, -1104492078) + + W(7, -1119312598, -1108873773, 1041638915, -1112219839); + sum2 = + W(0, -1107703976, 1045439570, 1032849260, -1122224669) + W(1, 1046455264, -1090239140, -1096329612, 1035962913) + + W(2, -1093472442, 1028879562, 1030082819, -1098806672) + W(3, 1048924762, 1064195571, 1036056807, 1041148911) + + W(4, -1097201227, 1016778259, 1059898488, -1091828205) + W(5, 1049435664, 1027385222, -1091675747, 1041687668) + + W(6, -1106239872, 1028121257, 1053658286, -1098243254) + + W(7, -1119402461, -1105208479, -1117889585, -1104565052); + WS(1062681599, 1040842201); + sum1 = + W(0, -1108789463, -1104300209, 1042117608, -1118282400) + + W(1, -1115361429, 1050445194, -1097788769, -1114475276) + + W(2, 1048981817, -1089127203, 1032428200, 1049574381) + W(3, -1092926921, 1063952639, 1049970971, -1090427073) + + W(4, 1054602509, 1043197406, -1088418847, 1045778638) + W(5, 1029602904, -1106041808, 1049956321, -1112080775) + + W(6, -1117967305, 1039191171, -1112931212, -1097758566) + + W(7, 1025443329, -1114238594, -1123051255, 1033393295); + sum2 = W(0, 1038735267, -1126315713, -1111714958, -1111656758) + + W(1, -1100048068, 1043050536, -1115313021, -1115683900) + + W(2, -1115453090, 1053683464, 1046662525, -1093358619) + + W(3, -1110884051, 1058830244, 1061563017, -1105966973) + + W(4, -1090743949, -1100540415, 1052621807, 1037413595) + + W(5, -1153955669, 1037942519, -1104363663, -1101879362) + + W(6, -1112598570, -1098889179, -1118760183, 1044482581) + + W(7, -1103797208, 1040830228, 1007755797, -1098877089); + WS(1062776447, 1069975051); + sum1 = W(0, -1130308944, 1036893844, -1104137265, -1108240801) + + W(1, -1112835078, -1114635034, -1098478453, 1029922778) + + W(2, -1113144126, 1050058840, -1104852259, -1111877761) + + W(3, -1094805377, 1071110128, 1041368142, -1088740684) + + W(4, -1098008538, 1061455014, -1095764665, -1112520550) + + W(5, -1114270939, 1044746969, -1109080309, -1119589855) + + W(6, -1117364667, 1018226642, -1103882015, -1121527653) + + W(7, -1117827459, 1049407081, -1120810969, -1103102129); + sum2 = W(0, 1028834725, -1113117380, 1037052904, -1113210551) + W(1, -1110297977, 1026197871, 996515330, 1032917698) + + W(2, 1035091068, -1099362318, 1043940575, -1106692355) + + W(3, 1034973002, 1077416661, -1069860026, -1103487680) + W(4, -1110025415, 1050456944, 969317772, 1034055293) + + W(5, -1154986018, -1112814852, 1021906202, 1006658352) + + W(6, -1120031976, 1027999489, -1115577362, 1018186436) + + W(7, 1030684381, 1016280910, 1013156184, -1124181989); + WS(-1082599007, -1084302409); + sum1 = + W(0, -1127095446, 1056665680, -1087212124, 1035858193) + W(1, 1011517278, 1052281350, -1096260148, -1128228554) + + W(2, -1110892475, 1058821842, -1085877656, 1043137767) + W(3, 1024933540, 1061132051, -1087957451, 1023709101) + + W(4, -1111586789, 1054434479, -1089270826, 1037286635) + + W(5, 1039136849, 1052720805, -1102718609, -1111842501) + + W(6, -1108207315, 1043783817, -1106422582, -1130056576) + + W(7, 1049617952, -1159632952, -1106709957, -1115207149); + sum2 = W(0, 1048459811, -1095160239, 1026509768, -1123792544) + + W(1, 1071226724, 1065850492, 1043007163, -1127547903) + W(2, 1075494724, -1121381976, 1043168485, 1027169380) + + W(3, 1074401127, -1099027008, -1093896652, -1107011198) + + W(4, 1044864951, 1031151008, 1012395574, 1037067951) + W(5, -1077687291, 1057399482, 1019701051, 1021690579) + + W(6, -1072311776, 1045213405, -1109946848, -1124089088) + + W(7, -1066953112, -1095971461, 1027278540, -1166810856); + WS(-1118215158, -1130722305); + sum1 = W(0, 1039784439, 1045520358, -1094865274, 966095017) + + W(1, -1119983743, 1040649785, -1098235299, -1166742058) + + W(2, 1027132090, -1126797149, -1085881139, 1023429181) + + W(3, -1101267710, 1070303194, -1106608186, -1127173569) + + W(4, -1106141431, 1059129261, -1088850304, -1112024885) + + W(5, -1119840977, 1056727436, -1093134907, 1030946302) + + W(6, -1131926580, 1040938982, -1098615277, -1160489077) + + W(7, -1158357571, 1050582583, -1104503154, -1106263629); + sum2 = W(0, -1122933952, 1043676402, -1122578476, 1034421913) + + W(1, 1029904221, -1092335308, -1096734640, -1121365342) + + W(2, -1104768174, 1050667883, -1067932755, -1103315560) + + W(3, 1044174015, -1106663265, 1078656606, 1049347404) + W(4, -1122786276, 1035773679, 1056344301, 1027752905) + + W(5, 1032455763, 1033986578, -1106732625, -1114333025) + + W(6, -1118952582, -1123822916, 1033374173, 1025409455) + + W(7, -1120585334, 1030027555, -1116084330, -1129253837); + WS(-1112296443, -1083191171); + sum1 = + W(0, 1030599873, 1049894985, -1100141489, -1124642344) + W(1, -1102688301, 1052276159, -1095734081, -1114229790) + + W(2, 1038739494, 1055519004, 1044438853, 1039505712) + W(3, -1091052013, 1061107959, -1078809864, -1108354137) + + W(4, -1120252367, 1059731464, -1100785926, 1037196526) + + W(5, -1118018759, 1052059899, -1095185489, -1129006466) + + W(6, -1105737735, 1050382861, -1092887220, 1034647259) + + W(7, 1036799090, 1050879021, -1113977245, -1123394945); + sum2 = + W(0, -1134366274, 1032198029, -1102569648, -1113629492) + W(1, 1040286055, -1093721101, 1059013351, -1099320981) + + W(2, -1103185345, 1064064153, -1076328416, 1062572765) + + W(3, -1089934078, 1038410574, 1075980540, -1079072621) + + W(4, -1104699884, 1053501328, -1081344728, 1057269325) + + W(5, -1135474394, 1053898256, -1093863519, 1039501040) + + W(6, 1039368980, -1088730122, 1061819985, -1089994189) + W(7, 984214720, 1045555557, -1089334779, 1050815727); + WS(1051639487, -1077445112); + sum1 = W(0, 1023584208, -1087680633, 1058381481, 1019116206) + + W(1, 1019039018, -1097242582, 1049175941, -1110819225) + + W(2, 1046978951, -1087958558, 1059595066, 1041321753) + + W(3, -1114024277, -1084584703, 1058705186, -1110784681) + + W(4, 1042440780, -1089578188, 1060807824, -1120949436) + + W(5, 1036430651, -1090497932, 1057261083, -1111841304) + + W(6, 1019910405, -1100840226, 1047705823, -1108976675) + + W(7, 1017051223, -1089989738, 1052092149, 1037750032); + sum2 = W(0, -1103483317, 1043027417, 1043672886, -1126025102) + + W(1, -1103949071, 1024633771, -1105891820, -1101612494) + + W(2, 1036613850, 1050158855, -1113242263, 1051748473) + + W(3, -1096200772, -1096718424, 1053817256, -1144187697) + + W(4, -1094761445, 1057240079, 1043447632, -1129269697) + + W(5, -1090478844, 1037855330, 1053166295, -1114569970) + + W(6, -1091384835, 1036780676, 1043680942, -1102484885) + + W(7, 1024429003, -1103545669, 1034776200, 1044262941); + WS(-1102141694, 1066754929); + sum1 = + W(0, 1024767465, -1097150806, -1121670572, 1047316983) + W(1, 1028037205, -1105574095, 1021251426, -1123186307) + + W(2, -1148374821, -1092607882, 1053426631, 1027553675) + W(3, 996156698, -1086231302, 1064662112, 1037457517) + + W(4, 1034886379, -1085570014, 1060264850, -1103359865) + W(5, 1011245536, -1090150659, 1058905527, 1031219256) + + W(6, 1017269224, -1093685286, 1047037217, -1108923303) + + W(7, 1023537404, -1100403588, 1048736520, 1039125668); + sum2 = W(0, -1127568330, 1005444026, -1099850392, -1069105992) + + W(1, 1031019669, -1121000217, 1052911126, -1075891845) + + W(2, 1013355589, -1135062981, 1040255711, 1052106082) + + W(3, -1123247753, -1099801823, -1120914990, 1078854269) + + W(4, -1131101666, 1044314830, 1055431083, 1061745892) + + W(5, -1126559289, 1052295626, -1112663055, -1097592921) + + W(6, -1128009624, -1112001042, 1029993103, 1033712114) + + W(7, -1122774763, 1018744598, -1105142519, 1037317546); + WS(-1092827839, 1049601702); + sum1 = + W(0, -1102748075, -1114726817, 1043768535, 1001409895) + W(1, 1048973336, -1100496851, 1037939345, 1025584388) + + W(2, -1101661842, 1056141590, -1092531310, 1043222609) + W(3, 1035794226, -1092935501, 1048278549, 1028885900) + + W(4, 1035295380, -1090189951, 1059458263, -1097823129) + W(5, 951002293, -1127762491, -1107678146, 1046084402) + + W(6, 1028700102, -1107070641, 1040739973, 1018269854) + + W(7, 1019995195, -1116434785, -1103464157, 1033486885); + sum2 = W(0, 1047648616, -1099866854, 1040911688, -1104812914) + + W(1, 1049954711, -1087014785, -1127547935, 1032774583) + + W(2, -1086499077, 1059941103, 1006897087, -1118719608) + + W(3, 1036131868, 1059724034, -1147690453, -1112213150) + + W(4, 1021844703, -1104253425, -1094365093, 1020986307) + + W(5, -1144052469, -1116739540, 1046092804, 1037506252) + + W(6, 1036766656, -1123734224, -1117733622, 1020773629) + + W(7, -1114852932, -1119117496, 1045874226, -1120467144); + WS(1062974879, -1132215613); + sum1 = W(0, -1181157822, -1096960767, 1047884458, 1046591191) + + W(1, 1033235448, -1103300066, 1024586364, 1028472621) + + W(2, -1120645311, -1098273063, 1040665968, 1027902609) + + W(3, 1032693326, -1084817024, 1065496294, -1105457745) + + W(4, 1014937094, -1086871647, 1061273423, -1099580452) + + W(5, 1012846527, -1093312183, 1057013666, -1121668195) + + W(6, 1029466479, -1094276984, 1048826162, -1123904424) + + W(7, 1027727527, -1099477807, 1049029078, 1033797162); + sum2 = W(0, -1120861164, -1114727130, 1051943787, 1073213517) + + W(1, 1023555157, -1127207629, 1021361633, 1067585393) + + W(2, -1114715157, 1049207316, -1100379929, -1095215585) + + W(3, 1039733942, -1100512081, 1049204344, -1072102567) + + W(4, -1111876903, 1044392032, 1029680046, -1085077901) + + W(5, -1121732899, -1113639142, 1040523175, 1017790060) + + W(6, 1028106223, -1107243417, -1113800818, -1115100698) + + W(7, -1136150442, 1035475508, 1017564481, -1111101803); + WS(1038961915, -1122010239); + sum1 = W(0, 1004620105, 1048595660, -1099376311, -1116246236) + + W(1, -1113106351, 1052250558, -1096833644, 1027857155) + + W(2, -1112775210, 1058549636, -1088891129, 1036452944) + + W(3, -1125534724, 1061573929, -1086812373, 1040782707) + + W(4, -1137271196, 1057522353, -1093214133, 1018281129) + + W(5, -1126589322, 1048822826, -1095914356, -1113857108) + + W(6, 1026657556, 1040718305, -1105329661, 1010557432) + + W(7, -1119328748, 1048641524, -1103878453, -1106455448); + sum2 = + W(0, -1116580406, 1027562258, 1055923683, 1077914250) + W(1, 1024927837, -1162615720, -1098456431, 1064911462) + + W(2, -1129828931, 1047581344, -1107918233, -1076359256) + + W(3, -1115420496, -1109513493, -1093462217, -1072190840) + + W(4, 1011185933, 1038795182, 1043574176, 1023726209) + W(5, -1123989928, -1118168090, 1042713369, -1114062645) + + W(6, -1146345634, 1030671240, -1118222006, -1119388396) + + W(7, -1132229127, 1038841827, 1029869485, -1111766417); + WS(1058520191, -1103814144); + sum1 = + W(0, 1017038583, -1136595532, -1118053743, 1027978638) + W(1, 1027734728, 1016280509, -1128563239, -1097122909) + + W(2, 1045254802, -1109207324, 1044218638, -1104843131) + + W(3, -1107262661, 1062837304, -1096672636, 1033499055) + + W(4, -1105442246, 1058887264, -1085861660, 1025985831) + W(5, 1031430877, 1040464732, -1103880229, 1030848699) + + W(6, -1126950445, 1030731221, -1107144426, -1131329888) + + W(7, 1024536063, 1029953906, -1145128486, -1109411553); + sum2 = W(0, 1034702970, -1176551015, 1007931783, -1110263367) + + W(1, -1125598974, -1168898884, -1090251816, -1089294892) + + W(2, -1114553387, 1041527709, 1067159008, -1085860022) + + W(3, 1027536136, -1101298022, 1043575777, 1061443693) + + W(4, -1115924578, 1036785061, 1028484706, -1099982080) + + W(5, 1035052048, -1106074654, -1104273626, 1042454777) + + W(6, -1132148431, 1030578400, -1148595850, 1027614804) + + W(7, 1025333585, 1024211757, -1114791455, -1123742626); + WS(1061983711, -1098811342); + sum1 = + W(0, 1012577103, 1051270689, -1108510228, -1107438075) + W(1, -1126897281, 1046420622, -1106195699, -1107699151) + + W(2, -1105225996, 1058683087, -1088411008, 1036711743) + W(3, 1024253613, 1058369439, -1089548804, 1026749110) + + W(4, 1053413654, -1097514912, -1118840551, -1116819519) + + W(5, -1137207095, 1052214131, -1096732498, 1024709491) + + W(6, 1042674340, -1105013139, -1109015536, -1140694819) + + W(7, -1109172700, 1050302029, -1103000311, -1154413112); + sum2 = + W(0, 1018267361, 1036503366, -1126101987, -1099489904) + W(1, -1110181012, 1041828239, -1116783782, -1102883314) + + W(2, -1146613180, 1050199247, 1071387060, 1052019906) + W(3, 1038715850, -1085432172, -1080125372, 1047113133) + + W(4, 1024296823, 1038941240, -1093021050, 1035497971) + + W(5, 1036970119, -1123003584, -1114155200, -1112911441) + + W(6, 1022003098, 1013422114, -1149124484, -1119585859) + + W(7, -1124702635, 1021003447, -1143100940, -1131838862); + WS(1061194367, -1092415693); + sum1 = W(0, 1036390333, 1051201914, -1104881972, -1115149537) + + W(1, -1126263470, 1053058480, -1172862130, 1030237668) + + W(2, 1025725222, 1054554894, -1093268199, 1021350042) + W(3, 1043857157, 1047231542, -1076465248, 1036659934) + + W(4, 1035642972, 1057662314, -1087389459, 1039247491) + W(5, 1044002978, 1050872286, -1093150623, 1026362746) + + W(6, 998479342, 1045022613, -1111846627, 1017611295) + W(7, 1045699251, 1049392206, -1095910796, 1018350484); + sum2 = W(0, -1129487679, 1018376667, -1097777178, -1117386048) + + W(1, 1042193002, -1087595144, -1098446206, -1106566736) + + W(2, -1108339354, 1054016721, -1106280983, -1108034123) + + W(3, -1095819059, 1063707892, 1068295659, -1104582586) + + W(4, 1041127864, 1036978867, 1052165187, -1102127104) + + W(5, -1103499785, -1103064253, -1110264861, 1036459063) + + W(6, 1034257999, -1103241982, -1135489703, -1104779766) + + W(7, -1118637794, -1112191100, -1107248782, 996655259); + WS(-1094699455, 1066131816); + sum1 = + W(0, 1049793485, -1081848423, 1059460685, 1042128105) + W(1, 1029004921, -1088299640, 1048637514, -1139594888) + + W(2, 1050253240, -1084057276, 1060796209, 1026023615) + W(3, 1043166228, -1084821317, 1059873140, 1020508084) + + W(4, 1029407446, -1084563631, 1055943200, -1125172935) + W(5, 1041612884, -1104203619, 1047596309, 1048504406) + + W(6, 1035949114, -1099802604, 1050483936, -1115227886) + + W(7, 1012128942, -1096241030, -1105809528, 1051471881); + sum2 = + W(0, 1060779432, 1051802488, -1086062785, -1114203174) + W(1, -1118034101, -1097493973, 998397725, -1112558489) + + W(2, -1104519356, 1057101737, -1092333837, -1099984065) + + W(3, -1119327164, 1060027297, 1066349798, -1091352583) + W(4, -1101970936, 1044004337, 979464170, -1094257268) + + W(5, -1106905521, 1057962293, -1133317807, -1102635709) + + W(6, 1002331293, -1098665363, 1046794021, -1094504499) + + W(7, -1082144605, -1086721729, 1070824403, -1114333706); + WS(-1074268304, 1079667699); + sum1 = + W(0, -1107189325, -1101508903, 1054199089, 1035260131) + W(1, 1027760991, -1090199334, 1054303683, -1107632246) + + W(2, -1136873402, -1090363950, 1062422475, 1005391451) + + W(3, -1113641470, -1087996124, 1067750847, -1101800081) + + W(4, 1031162561, -1083961527, -1113534839, 1034855959) + W(5, 1033891144, -1096484091, 1052689284, 1046337921) + + W(6, -1126712892, -1098094611, 1031942551, -1150999418) + + W(7, 1035917755, -1093567651, 1045977323, 1042992013); + sum2 = + W(0, -1109104193, -1100757902, 1040683096, -1125509919) + W(1, -1121805095, 1045040397, -1125556022, 1026278254) + + W(2, 1045986101, 1080470972, 1049174250, -1112163985) + W(3, 1048674700, -1080397282, -1095181876, 1050380898) + + W(4, -1091901802, -1070026310, -1122013099, -1131668233) + + W(5, 1048446997, 1054845861, -1110936398, 1033243862) + + W(6, -1113936425, -1109557127, 1038355211, -1144282357) + + W(7, 1027039508, -1113850952, -1103996340, 1036778147); + WS(-1085238047, -1082053459); + sum1 = W(0, -1106787663, 1039996894, 1048926451, -1104330662) + + W(1, -1138285833, 1010512412, -1119213276, -1128637723) + + W(2, -1108632776, 1053399812, -1130975030, -1108017347) + + W(3, -1094657608, 1045540841, 1010216242, -1096910157) + + W(4, -1101713608, 1051732998, 1034301867, -1101323868) + W(5, 1037328759, 1044326914, 1025236158, 1015829447) + + W(6, -1111697288, 976543556, -1128978334, -1111437732) + + W(7, -1109379868, 1057105320, -1109745879, -1108664266); + sum2 = + W(0, -1124623913, 1025825838, 1042257288, 1021162350) + W(1, 1032956443, -1119700813, -1135835500, -1114452863) + + W(2, 1027185909, 1032253087, 1053804277, -1109212782) + W(3, -1101336864, 1046009611, 1048892371, -1102267892) + + W(4, -1094263361, 1060071017, 1036906607, -1105958464) + + W(5, 1029736763, -1108021530, -1101254223, 1037505372) + + W(6, -1112090109, -1111711122, -1115069322, 996911332) + + W(7, 1039412150, -1092338318, -1114145526, -1125671849); + WS(1060867039, -1136390908); + sum1 = + W(0, -1127355032, -1085412379, 1049179798, 1047055468) + W(1, 1034805910, -1094311080, 1049208039, -1108149971) + + W(2, 1035163860, -1093352008, 1060583566, 1036232649) + W(3, 1029669941, -1084691216, 1063283264, -1103012671) + + W(4, 1042066222, -1088428116, 1058522472, 1024178471) + W(5, 1021674692, -1088125293, 1055147879, 1026233836) + + W(6, 1040577786, -1096644095, 1050065089, -1126656770) + + W(7, 1021261983, -1091872390, 1041474713, 1049799631); + sum2 = W(0, 1035672114, -1063752230, -1093487673, 1013645866) + + W(1, -1098827199, -1085747691, 1041803367, 1034418818) + + W(2, 1049155846, 1077059336, 1041184099, -1106855503) + W(3, 1035178389, 1075347264, -1096808218, 1049524211) + + W(4, -1107113032, 1056308660, -1129382302, -1122362105) + + W(5, -1144855824, 1034654874, -1119639669, 1030856901) + + W(6, 1035201737, -1121418857, -1122522987, 1027103888) + + W(7, -1127184278, 1014935340, 1011408026, -1115004359); + WS(-1090674303, -1100343233); + sum1 = W(0, -1120232797, -1091881416, 1051419109, 1036288004) + + W(1, 1040422273, -1094093465, 1052500896, 1023553551) + + W(2, 1026412864, -1085938046, 1061270771, -1112170192) + + W(3, -1138867958, -1089254334, 1067891162, -1115349187) + + W(4, 1033879777, -1089797193, 1060067146, -1112030904) + W(5, 1033565615, -1085407367, 1016645247, 998486818) + + W(6, 1019476032, -1095206984, 1041715172, 1027103050) + W(7, 988102399, -1105032111, 1036835111, 1040345019); + sum2 = W(0, 1040618154, -1101425729, 1012698939, -1121139419) + + W(1, -1114130867, 1043012943, -1135257643, 1033653009) + + W(2, -1122148687, 1049717343, 1047429512, 1038119907) + W(3, 1044229537, 1078734719, 1020747990, -1131387750) + + W(4, -1110193301, 1058416067, 1045573328, -1162837210) + + W(5, -1097134960, -1066172228, -1105653505, -1124290574) + + W(6, 1037070268, -1087270128, -1098650023, -1142454518) + + W(7, 1037459780, 1051160418, 1029335953, -1119987003); + WS(-1083751903, -1076119444); + sum1 = W(0, -1115245576, 1056842307, -1093345814, -1108216057) + + W(1, -1106029427, 1054460828, -1096614035, -1118873130) + + W(2, -1102084379, 1062160243, -1087844584, 1032187759) + + W(3, -1113929101, 1061549380, -1088621915, -1118044066) + + W(4, -1115339007, 1051628466, -1095344475, -1125935063) + + W(5, 1029734646, 1034628298, 1039622221, -1114025640) + + W(6, -1110488366, 1040780998, 1023285463, -1103117229) + + W(7, -1147960477, 1027250487, 1052549785, -1101242061); + sum2 = W(0, -1142266798, 1041896729, -1109207785, -1116438034) + + W(1, -1105635988, -1133541327, -1119640286, -1143926222) + + W(2, -1108468488, 1052249127, -1132044399, -1116983273) + + W(3, -1104742183, 1059101684, 1057719514, -1107716245) + + W(4, 1004562934, -1116644500, 1049160976, 1036001553) + + W(5, -1111519795, 1022626513, 1040987637, -1093640310) + + W(6, 1009402627, 1036478091, -1103527129, -1115139384) + + W(7, 1007186587, -1114250343, -1094402391, -1102449905); + WS(1032624635, 1033521535); + sum1 = W(0, -1131553039, -1098906814, 1051615627, 1045470610) + + W(1, -1117491096, -1100587359, 1047570260, 1025984961) + + W(2, -1112834618, -1081513385, 1060987922, 1036113314) + + W(3, 1049918570, -1078593782, 1043587024, 1040182861) + W(4, 1045795365, -1105649745, 1056209202, 1012071435) + + W(5, 1016822387, -1092053629, 1059398075, 1032631435) + W(6, 1035361729, -1096260814, 1049440024, 1023888652) + + W(7, -1113168565, -1095243062, 1051600524, 1034850724); + sum2 = W(0, -1111826236, -1092384767, -1095745094, 1034661269) + + W(1, -1099208280, 1027875109, -1100947515, 1048636172) + + W(2, 1026789677, 1064661507, -1112642295, -1098451609) + + W(3, -1106561040, 1062273543, 1068550638, -1099981129) + + W(4, -1093448876, -1087164405, -1104229197, -1118651857) + + W(5, 1046407154, -1108191451, -1106740917, 1049739863) + + W(6, -1102666308, 1044380270, -1103404028, -1095995165) + + W(7, 1042570396, -1104593420, -1105841536, 1048025970); + WS(-1085606847, 1061985400); + sum1 = + W(0, -1108997980, -1089905075, 1057688104, 1040581229) + W(1, -1117164584, -1114895517, 1056544190, -1114811305) + + W(2, 1031768707, -1095264598, 1062390314, -1107526493) + + W(3, -1103105924, -1077801587, 1055481056, -1110202349) + + W(4, 1041900981, -1097956992, 1062127874, -1118208771) + + W(5, -1110420181, -1088999487, 1051900674, -1108027100) + + W(6, 1037113665, -1098379486, 1048102065, 1024450541) + W(7, 1014163116, -1096151518, 1057425872, 1041210774); + sum2 = W(0, 1025785873, 1048760593, 1035955606, -1126760968) + + W(1, 1048912334, -1090084783, -1119218892, 1008603006) + + W(2, -1096464036, -1114768213, 1007739746, -1094704139) + + W(3, 1056133856, 1066763200, 1064934573, -1080670875) + + W(4, -1104154812, -1086306359, -1089946445, 1031309677) + + W(5, -1121968309, 1058187354, 1050702067, -1125625514) + + W(6, 1033410161, -1113492694, 1025685973, -1109240237) + + W(7, 1015120151, -1128581905, -1102128709, 1039335694); + WS(-1090966143, -1078936607); + sum1 = W(0, -1116393227, -1086961670, 1058706816, -1116819366) + + W(1, -1108278025, -1108945587, 1041859343, 1035952359) + + W(2, 1051625062, -1086695325, 1060094467, 1041797992) + + W(3, -1105522375, -1081986687, 1059190367, -1104043025) + + W(4, 1058727904, -1089148367, 1061098519, 1053094890) + + W(5, -1091195697, -1086453117, 1055062080, -1093101306) + + W(6, 1052016408, -1092689140, 1056115606, 1015960496) + + W(7, -1109728971, -1091172115, 1058930855, 1034498693); + sum2 = + W(0, 1023528661, -1118025227, 1075422545, -1073269716) + W(1, -1119417071, 1050920275, 1065042273, -1077701631) + + W(2, -1105978590, -1112399565, 1069638520, -1078616397) + + W(3, 1009798795, -1098493686, 1067662601, -1083676116) + W(4, 1040834561, 1036263434, -1093932676, 1044144047) + + W(5, -1111364082, -1124489419, -1079439326, 1070390370) + + W(6, 1041397121, 1048582480, -1078082886, 1068400636) + + W(7, 1033368796, -1089345484, -1070108577, 1077727906); + WS(-1079921856, 1049308945); + sum1 = W(0, 1030068962, -1165865787, -1187780005, 1025440517) + + W(1, -1133949626, -1118082277, -1108803021, -1111921110) + + W(2, 1050882798, 1053008345, 1038634079, -1120535059) + + W(3, 1018623990, -1094507016, -1106873963, -1104818073) + + W(4, -1106924392, 1052481994, -1087778738, 1046575523) + + W(5, -1146454049, 1043504665, -1101591143, 1041667424) + + W(6, -1116987117, 1047635255, -1101174683, 1031519031) + + W(7, 1040905769, 1026870489, -1110197734, 1031554658); + sum2 = + W(0, -1116556974, 1059176005, -1096412953, -1114011261) + W(1, -1095066961, 1038856572, 1050790436, -1173649795) + + W(2, -1100395256, -1076691076, 1058179896, 1024138419) + W(3, 1050308919, 1055715442, -1097771226, 1040413046) + + W(4, 1040792410, 1046498927, 1039711359, -1106323994) + W(5, 1032406727, -1123959066, -1106160519, 1033840078) + + W(6, 1011901446, 1028959950, 1018212767, -1127468978) + + W(7, -1141998384, 1032868606, 1038360912, -1107910389); + WS(1060842367, -1123947436); + sum1 = + W(0, 1024730985, 1048950736, -1097448124, -1122548355) + W(1, 1023159627, 1047784339, -1098772880, 1026072115) + + W(2, -1105241169, 1057388094, -1087773444, 1037590622) + W(3, 1017803651, 1060112037, -1085451727, 1037652837) + + W(4, 1037913699, 1062491953, -1088044111, 1034810634) + W(5, 1035962309, 1057656339, -1089872870, 1033785172) + + W(6, 1028045772, -1114528913, -1102522557, 1008900343) + + W(7, 1032728670, 997860858, -1103124285, -1114288296); + sum2 = W(0, 1026131421, 1001103237, -1133854939, -1114822986) + + W(1, -1112740722, -1121600279, 1038637096, -1108393699) + + W(2, 1046858645, -1108221277, 1037424280, 1029862733) + + W(3, 1032928238, -1097923247, -1103185020, -1101308237) + + W(4, 1042166107, 1069212508, -1120290433, 1036213344) + W(5, 1040344552, 1057336110, 1019342369, -1145606165) + + W(6, -1082879617, -1095628134, 1039461314, -1111520861) + + W(7, -1091819295, -1107037379, -1122429445, 1034167562); + WS(1038606587, 1058047160); + sum1 = + W(0, 1026865207, -1095018974, 1044423619, -1136300775) + W(1, 1042118304, -1093024127, 1048945010, -1117800769) + + W(2, 1051307036, -1082724754, 1054092644, 1005722288) + W(3, 1033639368, -1100730470, -1097872036, 1044405134) + + W(4, 1046742309, 1036658562, -1091674216, 1047528601) + W(5, -1109598961, 1040951763, 1033100505, 1044812414) + + W(6, 1028927539, -1113143704, 1030352835, 1032954286) + W(7, 1015037525, -1098528366, 1047837568, 1044220906); + sum2 = W(0, 1050657429, 1045860939, -1111592920, -1103122208) + + W(1, -1103301852, 1052242374, 1013554208, -1106328874) + + W(2, 1060573163, -1100974944, -1111727364, 1015168980) + + W(3, -1091436731, 1066745360, 1055083328, -1113723980) + + W(4, -1104427284, -1079344782, -1098647652, 1051192034) + + W(5, -1097267636, -1113016512, 1034990326, 1034643320) + + W(6, 1038196249, -1119497008, -1110499846, -1122594772) + + W(7, -1107511544, -1113648424, 1042188938, -1140986496); + WS(-1095257599, 1052962039); + sum1 = + W(0, -1106746181, -1097695335, 1053309708, 1036635571) + W(1, 1031300369, -1111219488, 1042046133, 1032102291) + + W(2, 1046667354, -1083536631, 1060606673, -1110849586) + + W(3, 1037361867, -1088861218, 1053570633, -1103814096) + + W(4, 1040829967, -1107201338, -1101583222, -1130665360) + + W(5, -1128966365, -1115618056, 1033575580, 1023220920) + W(6, 1034540700, -1117058422, 1029127944, 1009452933) + + W(7, -1115420664, -1138753100, 1041019129, 1041468947); + sum2 = W(0, -1137205193, -1089705070, -1116057142, -1106134583) + + W(1, -1121249932, 1025195792, 1014073009, -1131583764) + + W(2, -1137006201, 1049206515, 1040872535, 1023734245) + W(3, -1101149788, 1054529493, 1044729510, 1030258377) + + W(4, 1034796937, 1039074115, 1024555690, 1045578999) + W(5, -1115985020, 1050074620, -1108168105, 1041497164) + + W(6, -1140489533, -1104546681, -1105229574, -1131316168) + + W(7, 1026538132, -1103055048, -1093226929, 1036128894); + WS(1058062751, -1100017341); + sum1 = + W(0, -1129599006, 1055064995, -1094744478, -1110864465) + W(1, 1020928695, 1054737968, -1108714620, 1035506764) + + W(2, -1098371705, 1061839865, -1089137095, -1103907470) + + W(3, -1111175932, 1057524464, -1081607655, -1115102560) + + W(4, -1108928286, 1062722401, -1111400813, 1038105413) + + W(5, -1108487635, 1039434185, -1089739654, -1103382940) + W(6, 1028685770, 1053936411, 1037008853, 1038153674) + + W(7, -1134533769, 1054765971, -1094983833, -1101904010); + sum2 = + W(0, 1046625968, 1034733544, -1147999170, 1026537136) + W(1, -1114205611, -1100522159, -1093312378, -1103988998) + + W(2, -1097342682, 1034693600, 1057288295, 1049229753) + W(3, -1079666171, 1066071967, 1059803421, -1114956972) + + W(4, 1041595755, -1087077400, -1088579916, -1104980683) + W(5, 977545170, 1062799024, 1057064650, 1050850828) + + W(6, -1106070846, -1097179722, -1086854504, 997711860) + + W(7, -1145403458, 1015933409, 1055355590, -1115357129); + WS(1033636603, -1075190676); + sum1 = W(0, 1030520392, 1051253486, -1096251285, -1117233131) + + W(1, -1125697208, 1043969999, -1100430004, -1122877346) + + W(2, -1114269946, 1058583983, -1085953223, 1036037681) + + W(3, -1109653337, 1064791929, -1093290764, -1109909101) + + W(4, -1102211630, 1057642082, -1088005167, -1123582788) + + W(5, 1032435172, 1055881859, -1098086422, 1029223000) + + W(6, -1120127764, 1033241689, -1102333361, -1125435099) + + W(7, 996293820, 1053053914, -1119839106, -1103406294); + sum2 = + W(0, -1120200303, -1113787644, -1109535466, 1041916719) + W(1, 1024626781, 1041562727, -1106166077, 1028143351) + + W(2, 1013431061, -1114475538, 1029531851, -1097688999) + + W(3, -1124733431, -1112371618, 1051805575, 1068535590) + W(4, 1027814217, 1032748777, 1061661699, 1074776935) + + W(5, 999438554, -1098117165, -1114245048, 1029048009) + W(6, -1116912567, 1042886038, 1039886302, -1076933565) + + W(7, 1007930813, -1140542509, -1093997696, -1072079702); + WS(1051996799, 1040628126); + sum1 = + W(0, -1106876926, -1096999331, 1064227732, 1032518006) + W(1, 1042171100, -1106536569, 1056769919, -1132737450) + + W(2, 1014013938, -1087433914, 1057736892, -1121739327) + W(3, 1040804174, -1079279688, 1052514885, 1035728257) + + W(4, 1038633599, -1085040172, 1059334684, -1115112214) + + W(5, 1024260165, -1090750082, 1058371760, -1126170017) + + W(6, 1032103150, -1096245875, 1051111478, -1129820331) + + W(7, -1111941370, -1091794355, 1050507101, 1036168684); + sum2 = W(0, -1097197656, -1105938617, -1063141508, 1047663602) + + W(1, 1040517176, -1119182790, -1072247475, -1106333570) + + W(2, -1107105879, 1048788314, 1073986693, 1017343300) + W(3, 1050425804, 1048225474, 1082212097, 1014706728) + + W(4, -1108102513, 1050055380, 1063649950, 1023560998) + + W(5, 1044838414, -1097162220, -1108483496, -1105252179) + + W(6, 1026907274, 1028263310, 1040597170, 1016727612) + W(7, 978890108, 1043193320, -1096883524, 1025416282); + WS(-1079662656, 1075711984); + sum1 = W(0, -1136749190, 1054381451, -1099895821, -1111920714) + + W(1, -1108214827, 1049467004, -1104318006, -1124862997) + + W(2, -1112256994, 1060357369, -1094055613, -1099490085) + + W(3, -1102094902, 1064079536, -1088977984, -1093653775) + + W(4, 1019461885, 1058102849, -1094018397, 1047054555) + + W(5, -1113236619, 1050217386, -1093377643, 1026036560) + + W(6, -1115387518, 1046694646, -1102869707, -1122648846) + + W(7, 1015637672, 1053154499, -1097489705, -1117787456); + sum2 = + W(0, 1021596406, -1120307380, 1037868882, -1094559059) + W(1, 1027700788, 1049653321, 1038499992, -1099826066) + + W(2, 1031902674, -1100871897, 1074972319, -1070411479) + W(3, 1035450862, 1054975194, 1073269037, -1076192259) + + W(4, -1146279384, -1106853601, 992333684, 1024067187) + W(5, 1036068426, 1042197470, 1032586692, -1107184127) + + W(6, -1108044351, 1034125536, -1129244749, 1032061183) + + W(7, 1030087008, -1133477273, 1010404410, -1104237818); + WS(-1121360374, -1077155152); + sum1 = W(0, -1105583845, -1092428892, 1053987635, 1036329976) + + W(1, 1042009047, -1091586093, 1057449542, 1039912928) + + W(2, -1135496901, -1087526962, 1037542524, -1114441548) + + W(3, 1057317824, -1088356100, 1067018703, 1035631092) + W(4, 1037107234, -1084718223, 1053143538, 1024846769) + + W(5, 1028773828, -1086275726, 1051794540, 1020991498) + W(6, 1042777544, -1096882940, 1049337758, 1032759069) + + W(7, -1116736729, -1086839898, 1050512766, 1041911145); + sum2 = W(0, -1122349048, -1108922684, -1102817435, 1024154790) + + W(1, 1027223630, 1050237475, 1047134367, -1105600910) + + W(2, -1123059666, -1093984844, -1086242189, 1041276041) + + W(3, -1097496437, 1061603028, 1069845393, -1090292072) + + W(4, -1101147358, 1054912466, -1091188340, -1122818012) + + W(5, 1008041239, -1136134543, -1107987422, -1109079234) + + W(6, -1113710228, 1050990810, -1126348732, 1047206075) + + W(7, -1103013196, 1037079763, -1097181289, -1109791787); + WS(-1087087711, 1067831143); + sum1 = + W(0, 1034272753, -1110686621, 1034360523, 1006467012) + W(1, 1043354114, -1094089577, 1051348739, -1097882829) + + W(2, 1046815315, -1090322161, 1056352764, -1098171061) + W(3, 1041026663, 1019000899, 1049291409, -1099191041) + + W(4, -1116058688, 1048378971, -1098372646, 1036956791) + + W(5, -1131457456, 1054466495, -1090870248, 1036945505) + + W(6, -1115558139, 1045446607, -1103803571, -1124941614) + + W(7, 1032034947, 1043401342, -1098529593, 1026141794); + sum2 = W(0, 1046033632, -1147554302, 1021728539, 1034466187) + + W(1, -1114282503, 1065651536, 1061103542, -1117467216) + W(2, 1044655964, 1052996561, 1047372498, 1040544990) + + W(3, -1106001859, -1076523823, -1081697027, -1105745303) + + W(4, -1104436397, -1095873418, -1094800655, -1119054372) + + W(5, -1101664028, 1059971501, 1058036758, -1110996319) + + W(6, 1015764662, 1037179306, -1113369666, -1120190386) + + W(7, 1023732858, -1119149512, 1033040332, 1028498617); + WS(1040537598, -1164677141); + sum1 = W(0, 1044447583, 1058921623, -1079854668, 1051360885) + W(1, 987031402, 1052904912, -1089009751, 1009017491) + + W(2, -1121775302, 1059494147, -1082135643, 1049103971) + + W(3, 1042738997, 1059765434, -1085199868, 1042102879) + W(4, 1038434732, 1053249925, -1087718171, 1037325507) + + W(5, 1049696256, 1045203303, -1094593271, 1033340813) + W(6, 1024826006, 1049549231, -1098502833, 1040792940) + + W(7, 1044766000, 1032179929, -1111725730, -1103499809); + sum2 = W(0, -1105450770, -1076904032, 1034745372, 1064235564) + + W(1, -1099930216, -1110661913, -1104228342, -1133206564) + + W(2, -1087670985, 1054929191, 1055270313, -1159337186) + + W(3, -1097641768, 1067640588, 1058849806, -1095527236) + + W(4, -1093727900, 1043325457, 1055379319, -1097371942) + + W(5, 1034849198, 1041628489, 1047258529, -1123329564) + + W(6, -1089823109, 1024934811, -1091022953, 1053074558) + + W(7, 1026936119, 1071055811, -1093861556, -1079977593); + WS(-1072228928, 1076959210); + sum1 = W(0, 1049440860, -1102211417, 1052420434, 1046258910) + + W(1, -1100954468, -1090494943, 1049042513, -1094165323) + + W(2, 1040518102, -1089397123, 1057628335, -1113742940) + + W(3, 1055014377, -1085929645, 1059399711, -1115570152) + + W(4, -1102413547, -1092416799, 1055649007, -1095616949) + + W(5, 1056933930, -1103710088, 1056457065, 1054320058) + + W(6, -1100924366, -1099959722, 1043274798, -1096288332) + + W(7, 1025160513, -1097219837, 1043339918, -1113497047); + sum2 = + W(0, 1037249746, -1091193116, 1058952598, -1099278743) + W(1, -1106259401, 1057292562, -1089499743, 1035766214) + + W(2, -1118512137, 1045032769, 990544085, -1120311473) + W(3, 1033553146, -1101074553, 1052938455, -1107284433) + + W(4, -1118437421, 1047290760, -1096914685, 1032001159) + + W(5, 1049532353, -1085599381, 1060306657, -1105441959) + + W(6, -1112295163, 1045403080, -1095081062, 1037527897) + + W(7, -1112769969, 1036239865, 1036770528, -1117837555); + WS(-1095248895, -1088416713); + sum1 = W(0, 991086878, -1112623265, 1049658160, 1025280780) + W(1, 1041397006, 1023033498, -1129854724, 1018002536) + + W(2, -1100414321, -1095527100, 1056723856, -1101036339) + + W(3, -1106522737, -1085500137, 1061179113, -1098081679) + + W(4, 1032194452, -1103431285, 1050067968, -1121954805) + + W(5, 1040515607, -1093888960, 1055178889, 1040437430) + + W(6, -1113785099, 1025813699, 1027165141, -1107604207) + + W(7, -1119710941, -1112974208, 1044420741, -1122583614); + sum2 = W(0, -1115281709, 1049351198, -1101546004, -1132663474) + + W(1, 1038308115, -1105447726, 1044927538, 1036918381) + + W(2, -1086190321, 1061131477, -1105825160, -1104633062) + + W(3, -1079410563, 1070791012, -1124003323, 1035199903) + + W(4, 1055352031, -1080774986, 1054621998, -1119052371) + + W(5, -1106071111, 1050939464, -1100497740, 1038597174) + + W(6, 1042421121, -1096576742, 1042910101, -1126972585) + + W(7, 999244165, 1021655353, -1123561791, -1141223925); + WS(1057802399, -1081431823); + sum1 = W(0, -1122831751, -1093480916, 1050038833, 1031929377) + + W(1, -1131892376, -1097592621, 1050095280, 1003466633) + + W(2, 1041203877, -1087992005, 1058335651, -1150646211) + + W(3, 1034931072, -1086955360, 1062569267, -1108251076) + + W(4, 1020249162, -1094219074, 1052617634, -1123920403) + + W(5, 1023514439, -1099624791, 1053253320, -1110979235) + + W(6, -1183605377, -1104094712, 1037479577, 1022403704) + + W(7, -1102408215, -1103351679, 1047799764, 1031403703); + sum2 = W(0, -1104748298, 1032567526, 1030425414, 1028805714) + W(1, 1035677532, 1015597088, -1106402955, 1010146816) + + W(2, -1097271446, 1042180726, 1036072793, -1122721452) + + W(3, -1071563129, -1131466492, 1026492053, -1139412188) + + W(4, -1073203825, -1088097679, 1021201077, -1121571686) + + W(5, -1105574565, 1049167232, 1046304011, 1013464552) + + W(6, 1074066983, -1104411749, -1108010622, -1115684312) + + W(7, 1076846054, 1054919113, 1040423027, -1159010626); + WS(1057159391, -1098185256); + sum1 = + W(0, -1115997840, 1062337777, -1099145229, -1108105457) + W(1, -1139848194, 1059107990, -1096884471, 1026934458) + + W(2, -1110900998, 1058844902, -1083284577, -1128707950) + + W(3, 1040272534, 1060188982, -1081840204, 1050661700) + W(4, -1124007614, 1050305613, -1086268415, 1025671020) + + W(5, 1022311680, 1055455830, -1088086212, 1018441343) + W(6, -1122049771, 1052112283, -1093503993, 1032949738) + + W(7, 1025640163, 1058488623, -1095187909, -1115150240); + sum2 = + W(0, 1056249112, -1063663228, -1090194820, -1098679170) + W(1, -1087901783, -1065703489, 1049313091, 1049727089) + + W(2, 1050240307, 1050922399, 1059446328, -1118968312) + W(3, -1096100666, 1074103439, -1091782161, 1043947681) + + W(4, 1050011361, 1078895155, 1051412884, -1103451878) + W(5, 1046757851, 1075028289, 999915799, 1046896549) + + W(6, -1111613200, 1011769164, -1097222567, 1036070334) + + W(7, 1031802636, 1041242105, 1040180956, -1120669738); + WS(-1076050352, 1072169512); + sum1 = + W(0, -1112010473, -1127017300, 1032805651, -1127012698) + W(1, -1110549812, -1096217239, 1055039585, 1017026201) + + W(2, 1041474168, -1101316183, -1107675851, 1027743346) + + W(3, -1102358403, -1098236010, 1058693100, -1103052317) + + W(4, 1035256808, -1097462672, 1050718238, 1025923518) + W(5, -1103941101, 1015309803, 1047069339, -1108585145) + + W(6, -1126807163, -1109990262, 1040782175, 1029935559) + + W(7, -1127729580, -1124828306, 1033573870, 1030091394); + sum2 = + W(0, -1115222730, 1013406324, 1048651312, -1105744683) + W(1, -1103808603, 1044516259, -1091636530, 1036643390) + + W(2, -1133607278, 1056099238, -1082257366, 1056116581) + + W(3, 1035011293, -1093760521, 1065398243, -1087652578) + + W(4, 1045426354, -1097063741, 1052543946, -1106429625) + W(5, 1005150868, 1046105407, -1105190986, 1031045850) + + W(6, 1038138903, -1106358807, -1143243436, -1129675875) + + W(7, 1014354189, 1034790704, -1124610775, 1033997397); + WS(1065781680, 1039008007); + sum1 = + W(0, 1041247860, 1052873269, -1087848896, -1122380132) + W(1, -1132228043, 1052030887, -1090939837, 1043969975) + + W(2, -1116017645, 1057769989, -1086321043, -1167945460) + + W(3, 1029031978, 1065817909, -1090130600, 1043124941) + W(4, -1114552992, 1059292430, -1093123180, 1037414446) + + W(5, 1018467582, 1052683781, -1089813139, 1037105136) + W(6, -1119698444, 1045381867, -1093950448, 1016187596) + + W(7, 1039500994, 1042767258, -1088918334, -1139723011); + sum2 = W(0, 1038177586, -1105284792, -1114385091, 1023745983) + + W(1, 1024852101, -1131141686, -1105200928, 1044997454) + + W(2, 1031995320, -1123455081, 1057384109, -1102654886) + W(3, 1016571646, 999573799, 1073904332, 1045931554) + + W(4, 1033741319, -1107419527, 1075963065, -1113399825) + + W(5, -1108656521, 1052574782, -1130292726, 1041285787) + + W(6, 1035134998, -1099142208, -1072886122, -1101763500) + + W(7, -1120710323, -1103717942, -1070457563, 1044467987); + WS(-1090516543, -1087531312); + sum1 = W(0, -1105337757, 1040358473, 1026287071, -1112133319) + + W(1, -1135375282, 1025750638, -1109155234, 1043065477) + + W(2, -1113059195, 1034124454, -1111070925, 1049735832) + + W(3, -1111479074, -1082649657, -1113637434, 1048811350) + + W(4, 1044282546, -1098312091, 1056333811, -1104740686) + + W(5, 1036163039, -1114570479, 1044131649, -1111303053) + + W(6, -1123738614, -1112260786, 1049914271, -1128712224) + + W(7, -1110047807, 1044483760, 1039433994, -1136462731); + sum2 = W(0, -1122757257, 1027164873, 1002742314, -1111953049) + + W(1, 1009904581, -1112829049, -1115430893, -1116391321) + + W(2, -1113467627, 1020364514, -1098221748, 1025748661) + + W(3, -1099518373, 1064179702, 1059311413, -1098741962) + + W(4, 1032421699, -1092934445, -1139891301, 1036551469) + + W(5, -1095677862, 1039050377, 1041708988, -1119491053) + + W(6, 1020800146, -1106885375, -1124766450, 1025848261) + + W(7, -1112743661, 1022655162, 1032689603, -1122814953); + WS(1059528063, 1057564569); + sum1 = W(0, 993737087, -1096201122, 1052073475, 1036364765) + W(1, 1023855952, -1112481934, 1044928820, -1119101474) + + W(2, 1051519336, -1089271861, 1057871347, 1026047377) + + W(3, -1100822612, -1101379550, -1090210244, -1095066725) + + W(4, 1027584566, -1129312776, 1027452795, 1043887545) + + W(5, -1109486813, -1101173547, 1051680014, 1038809676) + + W(6, -1127107174, 1031993577, 1038521522, -1152581582) + + W(7, 1035631854, -1097162722, 1050823828, 1037595092); + sum2 = + W(0, -1106514375, -1115270098, 1025588625, -1109616345) + W(1, 1034094594, 1029678117, -1103891997, 1044342701) + + W(2, -1112949179, -1095879627, 1029083209, -1107320484) + + W(3, 1009317700, 1052560633, 1062070608, -1103446924) + + W(4, -1126355148, -1128108776, -1100067512, -1131810302) + + W(5, 1021633582, 1034084538, 1035054672, -1098743078) + W(6, 1010987268, 1040297892, -1117785257, -1123491293) + + W(7, -1106252693, 1049223604, 990783407, -1102218346); + WS(1061977215, -1122204685); + sum1 = + W(0, -1124031333, 1053794732, -1090279300, 1017895407) + W(1, -1109554099, 1053305789, -1096176108, 1027213295) + + W(2, -1105250724, 1058003360, -1093816822, 1024566052) + W(3, 1036027519, 1059361796, -1081723655, 1033714163) + + W(4, 959521497, 1055117810, -1090510080, -1125004641) + W(5, 1055353398, 1051669497, -1097892492, 1006220452) + + W(6, 1038275624, 1037705519, -1101480172, -1112881733) + + W(7, -1136942561, 1047158647, -1098784634, -1145512357); + sum2 = W(0, -1113155109, 1028856727, 993552399, 1036996819) + W(1, 1032430477, 1021325630, -1128331832, -1139715524) + + W(2, -1105712018, 1050891972, 1042508384, -1111526035) + W(3, 1048044904, 1070862297, 1042132916, 1040898786) + + W(4, -1081128362, 1074403261, 1044319918, 1025048067) + + W(5, -1070110178, 1035516985, -1115800491, 1038841221) + + W(6, -1092435520, -1108990845, 1033559089, -1119663830) + + W(7, -1092804751, -1122256510, -1109033129, 1007165668); + WS(-1096328959, 1070879408); + sum1 = W(0, 1047483149, 1056909741, -1084253114, 1043610432) + + W(1, -1131726569, 1050717891, -1097194674, -1120381614) + + W(2, 1017405984, 1049879450, -1084428714, 1029856307) + W(3, 1016192218, 1064784543, -1088019878, 1046790989) + + W(4, -1134185032, 1054175424, -1087922381, 1044095416) + + W(5, -1130210629, 1055866677, -1096076442, -1106891777) + + W(6, 1039283496, 1052923070, -1098682228, 1045347898) + + W(7, 1022798351, 1050331269, -1089646420, -1103492015); + sum2 = + W(0, -1096731787, -1092572599, -1072213302, 1074300082) + W(1, 1041094096, -1083918511, -1081783724, 1061218424) + + W(2, 1030029833, -1081752188, -1088492050, 1062246408) + + W(3, -1134738767, 1050600846, -1098909280, 1055931246) + W(4, -1112066554, 1060518769, 1054813136, 1030722387) + + W(5, 1049526473, 1056806956, 1066457366, -1080408758) + W(6, -1103571503, 1052566197, 1061557517, -1082314750) + + W(7, 1038503381, 1040255570, 1074348460, -1074863778); + WS(-1093955647, 1022010191); + sum1 = + W(0, 1027472455, 1051129150, -1126583382, -1110971745) + W(1, -1118029508, 1040858863, -1110637352, -1129027011) + + W(2, 1017520774, 1058284788, -1083829535, 1043546956) + W(3, -1100976252, 1064764899, -1092969620, 1032062845) + + W(4, 1048038797, -1094249656, -1093269900, 1049944248) + + W(5, 1019653215, 1054989120, -1098669306, -1114080636) + + W(6, 1015004427, -1137681219, -1101321193, 1039113958) + + W(7, 1006149335, 1047833420, -1092663640, -1120661351); + sum2 = + W(0, -1125700855, -1109340305, 1041739108, -1138307987) + W(1, 1010923567, -1099241242, -1121076246, 1010348245) + + W(2, -1097524591, -1097784845, -1122648580, 1043843262) + + W(3, -1086770251, 1061340682, 1059475190, -1104554020) + + W(4, -1119687254, -1091236020, -1097965354, 1042867821) + + W(5, -1136610695, 1059369820, 1048517727, -1105357691) + W(6, 1034141799, -1103210716, 1020884025, 1021404807) + + W(7, -1146172877, 1042204527, -1117941982, 1026766467); + WS(1057685119, 1067243116); + sum1 = + W(0, -1129459555, -1098308652, 1053516928, 1029422892) + W(1, 1016923756, -1100618635, 1049587364, -1111775882) + + W(2, 1004671913, -1090418529, 1057639529, -1114348966) + + W(3, -1095493364, -1088191051, 1065512192, -1112549294) + + W(4, -1098633785, -1089905135, 1058720438, -1103551521) + + W(5, -1112175056, -1101363479, 1052631981, -1111153757) + W(6, 1046138501, -1106099897, 1043712557, 995391206) + + W(7, -1131471418, -1097291652, 1052947846, 1020473821); + sum2 = W(0, -1131225411, -1140292462, 1018110051, 1018006973) + + W(1, -1130956898, 1023877693, -1137158572, -1128428036) + + W(2, -1103681442, 1042914798, -1224504659, 1020723155) + + W(3, -1077027280, 1069127322, 1043315619, -1147115303) + + W(4, -1076028808, 1071847872, -1120948958, 1020554514) + + W(5, 1031803188, -1103125793, 1043270022, -1133954299) + + W(6, -1101294062, 1043076855, -1113817590, 990350747) + W(7, 949706042, -1132087796, 1015417250, 1025989218); + WS(-1097928959, -1082976358); + sum1 = + W(0, 1027664520, 1057835343, -1089326063, -1108038941) + W(1, -1123678010, 1050550727, -1096462854, 1032858337) + + W(2, -1106569905, 1057764410, -1083721351, 1021803093) + W(3, 1052451146, 1062299789, -1087127986, 1042469806) + + W(4, -1103556709, 1049034640, -1097735566, -1104920125) + + W(5, 1038567635, 1057826487, -1094118206, 1031016631) + + W(6, -1108799690, 1041175724, -1100805265, -1129264698) + + W(7, 1040222651, 1060823717, -1089053734, -1107162784); + sum2 = W(0, 1059738223, -1087082495, -1092378156, 1034470847) + + W(1, 1048150071, -1095900368, 1046247330, 1025055065) + W(2, 1052511773, -1108343021, 1033545850, 1045395587) + + W(3, -1080186607, 1063110279, -1090059649, -1094453880) + + W(4, -1078381471, 1074904245, -1107095292, 1053230483) + + W(5, -1072132845, 1071639576, 1039712953, -1103894182) + + W(6, -1071450961, 1076364217, -1113577164, 1032308634) + + W(7, -1071397334, 1077341373, 1049211915, -1120241798); + WS(-1093482751, 1040242403); + sum1 = W(0, -1101135214, 1051279307, -1160860306, -1112732162) + + W(1, -1123956286, 1045008905, -1106067054, -1135242283) + + W(2, -1107524598, 1049489177, -1101021318, 1029341500) + + W(3, -1138012647, -1132973569, -1113266773, -1121978915) + + W(4, 1034129455, -1096848358, 1049504725, -1098727290) + + W(5, 1042720691, -1098383795, 1059206814, -1102892456) + + W(6, -1130324184, -1104810279, 1049756222, -1104163113) + + W(7, 998898338, -1118268600, 1052043149, -1099073308); + sum2 = W(0, 1050603120, 1055811875, -1086548128, 1034695967) + + W(1, -1101078445, -1122671355, -1087609936, -1108269543) + + W(2, 1048531671, 1038805789, -1088336312, -1089139562) + + W(3, -1105220793, 1066111209, 1071017234, -1104320030) + + W(4, -1091713187, 1026095418, -1115602704, -1100901763) + + W(5, 1034851673, -1130092003, -1089969509, 1049804877) + + W(6, -1122031169, -1137910742, -1121274059, -1103744367) + + W(7, 1020293663, -1094690648, 1053616941, 1054827086); + WS(-1112146683, -1077736475); + sum1 = W(0, -1128590341, -1099064529, 1049253908, 1039097715) + + W(1, -1130792414, -1090490649, 1028859267, 1017948777) + + W(2, -1147973299, -1086623314, 1040872136, 1025770233) + + W(3, -1117834531, -1101063739, 1068918685, -1115616742) + + W(4, 1007065251, -1084449173, 1059458719, -1104116690) + + W(5, 1033724609, -1094725676, 1057886934, -1132647657) + W(6, 1024083798, -1097837640, 1044653558, 998082042) + + W(7, -1122914526, -1097461352, 1048946351, 1030833130); + sum2 = + W(0, 1038545045, 1044965772, 1038508364, 1032278560) + W(1, -1104316542, -1073797636, -1092377302, -1115941315) + + W(2, -1098913527, -1069601753, 1050236814, -1109127211) + W(3, 1054073276, 1079919017, 1044931722, 1042591612) + + W(4, 990615436, 1065620582, 1034922622, 997021068) + W(5, -1111960943, -1103381017, 1033398315, 1042220416) + + W(6, -1127436605, 1039512539, -1113162193, -1138197651) + + W(7, -1131799749, -1105195767, 1039327537, -1121388557); + WS(-1090505151, -1074550453); + sum1 = W(0, -1112838599, -1106878250, 1057615003, -1131143051) + + W(1, 1028498739, -1103427629, 1042960070, 1023192930) + + W(2, 1023419227, -1088602192, 1063645564, -1096958907) + + W(3, -1117182312, -1088941275, 1053033883, 1048242260) + + W(4, 1018948122, -1087514842, 1058641540, -1098164287) + + W(5, -1126145258, -1096920942, 1043052986, 1045189988) + + W(6, 1024711581, -1096615233, 1050742275, -1105551137) + + W(7, -1125422420, -1095073209, 1053972531, 1026824959); + sum2 = W(0, 1051549752, 1021553919, -1106404197, 1031994963) + + W(1, 1081443218, -1111116920, 1044837835, -1113046369) + + W(2, 1072492444, -1073777225, -1113676512, 1032624041) + + W(3, 1048388769, -1067599847, -1089379201, 1025956540) + + W(4, 1035162847, -1096587672, -1109546151, 1027498280) + + W(5, -1139079606, 1029232562, 1030543674, -1132168419) + + W(6, -1121149576, -1115288719, -1122606689, 1034044071) + + W(7, -1123800689, 1042218632, -1125599605, -1115044631); + WS(1051119487, 1041244378); + sum1 = W(0, -1124974044, -1137238551, -1116593698, -1121323393) + + W(1, 1028120435, -1113671305, -1103177170, 1033078132) + + W(2, 1048876359, 1036964902, -1099611151, -1156053771) + + W(3, 1037118035, 1061504071, -1104164974, -1129960608) + + W(4, -1131477080, 1047204942, -1081804989, 1048068278) + + W(5, -1118427428, 1050660570, -1114906180, 1011539382) + + W(6, -1120234265, 1030267847, -1098831475, 1035846233) + + W(7, -1125985859, 1049717597, -1103929310, -1115091743); + sum2 = W(0, 1018090488, 1027537220, 1019973834, -1117503915) + + W(1, -1104952272, 1048078363, -1095697091, 1045164128) + + W(2, -1105226064, 1055192952, -1101823824, -1106818318) + + W(3, -1128161694, -1102612387, 1074095781, -1087428157) + + W(4, -1134153828, -1102761937, -1074759173, 1061969021) + + W(5, 1024328280, -1101688104, 1063478353, -1092430375) + + W(6, -1118410723, 1035228556, -1092594125, 1050476000) + + W(7, -1117225997, 1016223417, 1041310640, -1110796930); + WS(1062303263, 1031082743); + sum1 = + W(0, -1113216195, 1050954912, -1095964628, -1121789260) + W(1, -1099215391, 1012178225, -1095180416, 1027015379) + + W(2, 1052151297, 1065129152, -1088785601, 1041954507) + W(3, 1026092610, 1047492794, -1089641431, -1107401421) + + W(4, 1051942565, 1063700475, -1089295544, 1049634587) + + W(5, -1098039588, 1049517720, -1091476104, -1112964196) + + W(6, -1101893697, 1028060738, -1102410709, 1035520455) + + W(7, 1008063218, 1054989200, -1096414030, -1114231491); + sum2 = W(0, 1033145987, -1175661064, 1021827842, -1165845202) + + W(1, -1111827051, -1152013649, -1104895942, -1187919119) + + W(2, -1103256184, 1050523165, 1034611383, -1125020091) + + W(3, -1063595607, 1084357731, -1094652398, 1042933311) + + W(4, -1101985731, 1048919805, 1029250458, -1108910008) + + W(5, -1141099147, 1025442742, -1102082303, 1040766920) + + W(6, -1136421288, -1145624409, 1021034188, -1126731425) + + W(7, 1030655939, -1133270551, -1121842648, 1017996705); + WS(1057958943, -1104691893); + sum1 = + W(0, -1111014615, -1105424217, 1044401084, -1118336456) + W(1, -1128240917, 1046008396, -1100901945, 1038452148) + + W(2, -1146251723, -1090082375, 1057785284, -1119525902) + + W(3, -1095236657, 1054329938, 1040293502, -1093887914) + W(4, 1047913107, 1003615948, -1099597392, 1045607070) + + W(5, -1103203951, 1013130820, 1051440085, -1107674167) + + W(6, 1033356080, 1044802714, -1102604718, -1113361665) + + W(7, 1023941889, -1101796865, 1036253756, 1037874639); + sum2 = W(0, 1010987018, 1041119120, -1110188142, -1127404253) + + W(1, -1105807463, -1111572952, 1045029856, 1029477872) + + W(2, -1102396693, -1117220749, 1057260834, 1019931869) + + W(3, 1054217500, -1087890507, -1087358521, 1056265374) + + W(4, -1123211893, 1070205457, -1081682663, -1091128546) + + W(5, 1029628166, -1101117061, 1051101181, -1124058781) + + W(6, -1106898625, -1113015450, 1042553654, 1025549544) + + W(7, 1028744914, -1121743361, -1135525482, 992401866); + WS(1063883327, -1171419961); + sum1 = + W(0, -1131801844, 1049833398, -1102183011, -1101850371) + W(1, 1021098616, -1123871229, -1097479398, 1031224740) + + W(2, 1022023532, 1009731296, 1056730033, -1123343997) + W(3, -1121742227, 1044941350, 1041013425, -1127214008) + + W(4, -1090925859, 1060653970, -1095529687, -1107029405) + + W(5, 1024490200, 1043051923, -1097251239, 1042080318) + + W(6, -1102607801, 1048967870, -1104750961, -1107657120) + + W(7, -1123544461, 1050859794, -1097963532, 1009310536); + sum2 = W(0, -1106082604, 1047581965, -1097195715, -1113941985) + + W(1, 1041248381, -1108094251, -1105642610, -1131232309) + + W(2, -1132935321, -1111186257, 1042983438, 1050509566) + + W(3, -1106587714, 1050591326, 1048334662, -1103031697) + + W(4, -1104172881, 1039028256, 1046273630, -1127691207) + + W(5, -1118244665, 1025922877, -1128737701, -1115007353) + + W(6, -1115368488, -1111866155, 1044426238, -1112751356) + + W(7, -1139264801, -1104769894, -1122157893, 1031938310); + WS(1064944927, 1037595256); + sum1 = W(0, -1116864018, -1089228760, 1063040538, -1107296590) + + W(1, 1023806679, -1095304482, 1057549004, -1106875789) + + W(2, 1017115436, -1087165700, 1060715233, -1103245163) + + W(3, 1009660557, -1086682215, 1064233001, -1106392275) + + W(4, 1028149000, -1088874517, 1056983460, -1132085516) + + W(5, -1109610248, -1099761351, 1052917089, -1107160974) + + W(6, -1148661421, -1116139780, 1041500021, -1129006079) + + W(7, -1102752831, -1131587330, 1045020525, -1152394658); + sum2 = + W(0, 1045832950, -1084008560, -1084775302, 1058474163) + W(1, -1110716731, 1049995423, -1093173621, -1102301410) + + W(2, 1025147898, -1089165996, 1054478967, -1128684365) + W(3, -1098226948, 1066542501, 1068786857, 1031931484) + + W(4, -1099749141, 1049362032, 1047038722, -1091779956) + + W(5, -1098250414, 1055260506, -1126019654, -1105326436) + + W(6, 1042544569, -1086997229, -1127724076, 1053449115) + + W(7, -1089530617, 1067285885, -1088213452, -1090548395); + WS(-1082101344, -1089856493); + sum1 = + W(0, 1050831902, -1105691402, -1101015969, 1029275441) + W(1, -1112019073, 1030717682, -1098775681, -1120813832) + + W(2, 1030303546, 1055521064, -1097748442, -1139280937) + W(3, -1129997454, 1064695661, -1085520515, 965412887) + + W(4, -1110327174, 1061962758, -1086646154, 1041358014) + + W(5, -1117704076, 1058754169, -1089610799, -1122362798) + + W(6, -1112251093, 1047731210, -1093958751, 1028938947) + + W(7, -1131596868, 1056158989, -1094349907, 1016935004); + sum2 = W(0, -1065128139, -1101130808, 1034565987, -1119435550) + + W(1, -1076824095, 1050826865, -1109355365, 1023890205) + + W(2, 1067439244, 1046966330, -1109830496, 1012867020) + W(3, 1079260085, 1049869634, -1112614302, 1015248786) + + W(4, 1056740148, 1051236860, -1123646153, -1122661198) + + W(5, -1103928218, -1114183173, 1051726295, -1136992532) + + W(6, -1135208116, 1033256035, 1035002893, -1119472799) + + W(7, 1039164015, -1104627926, -1109091637, -1136910724); + WS(-1090711679, 1035967541); + sum1 = + W(0, -1111795270, -1104454427, 1050513973, 1030482278) + W(1, 1015715144, 1029360119, 1050454265, -1138269301) + + W(2, -1118064815, -1084526809, 1063535365, -1132452908) + + W(3, -1124564760, -1083278737, 1060834888, 1007072853) + W(4, 1033380937, -1083950349, 1050327592, 1028355007) + + W(5, 1041725555, -1091233609, 1054941014, 1029566323) + W(6, 1039158191, -1102432923, 1040417584, -1140656289) + + W(7, 1036708611, -1097260861, 1042099839, 1035976950); + sum2 = W(0, 1030795338, -1099705557, -1100315818, -1124401732) + + W(1, -1104698754, -1088930785, -1129318753, 1047822852) + + W(2, -1086482027, -1087114188, -1088371087, 1041847465) + + W(3, -1086674278, 1063363970, 1059258270, 1007827043) + W(4, 1048689193, 1066258058, 1052412907, -1114489829) + + W(5, -1101252049, 1055138353, 1035784301, 1039056173) + + W(6, -1127107620, -1113111809, -1107499570, 1036879119) + + W(7, -1097010897, 1050900036, 1046424313, -1112035485); + WS(-1098505599, -1075865372); + sum1 = W(0, -1119631078, -1131715643, -1121509433, -1113178620) + + W(1, 1024188415, 1032178901, -1128987132, 1024883696) + + W(2, -1125460739, 1056750314, -1131145214, -1114020928) + + W(3, -1113150415, 1055366257, -1096447994, -1102601578) + + W(4, -1098117352, 1063530169, -1086227497, -1111953177) + + W(5, 1028789822, 1038512989, -1117796833, -1101470933) + + W(6, -1115205937, 1036256496, 1016180195, -1141768534) + + W(7, -1134520549, 1042179631, 1032402616, -1114797153); + sum2 = W(0, -1098762498, 1039073687, 1035541163, 1030962008) + W(1, -1112826554, 1047439072, 1032611741, 981436096) + + W(2, -1099883844, 1024348972, 1050751610, 1048715774) + + W(3, -1090598021, 1068429254, 1066295533, -1089341195) + + W(4, 1060690639, -1078855206, -1081223881, -1094541311) + + W(5, -1101866176, 1052294530, -1097770507, 1041458546) + + W(6, 1051055486, -1095975199, -1126362074, 1044471119) + + W(7, -1111565268, 1034134076, -1121819739, 1024624177); + WS(1057165023, -1106509195); + sum1 = W(0, -1110663097, -1090373040, 1057389257, 1036322023) + + W(1, 1023353031, -1094296089, 1051297995, -1110472134) + + W(2, 1009641338, -1087820187, 1064196683, -1103525103) + + W(3, -1116674588, -1090764772, 1064337073, -1108191858) + + W(4, -1121179137, -1101809276, 1058239248, -1105116828) + + W(5, -1127866586, -1095695571, 1053143796, -1130262917) + + W(6, 1019708412, -1097119704, 1049182394, -1117889432) + + W(7, -1105922998, -1088811352, 1042633923, 1047174317); + sum2 = W(0, -1112942365, -1106822390, -1102090209, 1043730009) + + W(1, 1048482322, -1115190843, 1042851055, 1035412483) + + W(2, -1100072975, 1034045428, -1103979279, -1105576965) + + W(3, -1097118067, 1068163863, 1056641862, 1028572696) + W(4, 991349250, 1063618710, -1105663689, -1113934073) + + W(5, -1108096389, -1107942623, 1049912171, -1105853831) + + W(6, 1053525633, -1090492700, -1130278628, 1039916278) + + W(7, 1000921969, -1076676559, -1111476815, -1106999228); + WS(-1086493375, -1079336981); + sum1 = W(0, 977772865, -1102983835, 1026410857, 1015269519) + + W(1, -1123924042, -1108602044, -1098146918, 1049374456) + + W(2, -1109993624, 1041059645, -1106936850, 1036994068) + + W(3, -1107908519, -1094902379, 1061697492, 1041174790) + + W(4, 1036636329, -1083260999, 1061962303, -1103125248) + + W(5, 1038934102, -1084987377, 1057432285, -1125738921) + + W(6, 1034769240, -1102484540, 1051050960, -1130650164) + + W(7, -1099048778, -1108629750, 1050989872, 1040252563); + sum2 = + W(0, -1104970914, 1042634099, 1052960430, -1112577728) + W(1, 1018133697, -1097448347, -1108255972, -1096543501) + + W(2, 1040842491, 1055662701, 1062139198, -1090800341) + W(3, -1093587851, 1074010545, 1064732329, -1093194079) + + W(4, -1093237537, -1089724369, -1083390778, 1015865539) + + W(5, -1102375178, -1099392752, 1025575413, 991452982) + + W(6, -1099804326, 1049771835, -1104347883, -1126119433) + + W(7, 1054809337, -1092159031, -1096888027, 1028159711); + WS(-1088469887, 1068090411); + sum1 = + W(0, -1113244581, 1001301201, -1115331947, -1124374314) + W(1, -1123900274, 1023861926, 1035790701, -1105829923) + + W(2, 1049496346, -1082959002, 1057295251, -1096521435) + + W(3, -1110903326, 1050701400, 1048296635, -1119292644) + + W(4, -1119953152, 1060902689, -1081663133, 1048590454) + W(5, 1031904934, 1043052181, -1113327662, 1027826066) + + W(6, 1034227191, 1043817559, -1110964279, -1106468266) + + W(7, 1025939062, 1038313117, 1034951606, -1117293966); + sum2 = W(0, 1046922708, -1106472578, 1033443280, -1111979165) + + W(1, 1033420508, -1114867820, -1109208012, 1050643686) + + W(2, -1112865289, -1087802072, 1020845388, -1101832932) + + W(3, -1096930035, 1058113813, 1048730658, -1129026525) + + W(4, -1101802092, 1050476308, 1059721445, -1104558996) + + W(5, 1017341032, -1103417477, -1107242470, -1135034393) + + W(6, 1036773342, -1105807138, -1136542442, 1037376476) + + W(7, -1107450994, 1033379085, -1104249476, 1023773731); + WS(1052578175, -1092643724); + sum1 = W(0, -1106584961, -1102735475, 1053304083, 993436516) + W(1, 1030893452, -1100779014, 1049737317, 1037510015) + + W(2, 1033955624, -1082492039, 1060101900, -1101687426) + + W(3, 1037358070, -1086815439, 1065824329, -1103135573) + + W(4, 1036710537, -1088009424, 1041943502, 1029815263) + + W(5, -1128189432, -1095075429, 1057719155, 1040440469) + + W(6, -1118327615, -1106908643, 1051335460, -1106074638) + + W(7, -1129060558, -1102841939, 1036237087, 1027836379); + sum2 = + W(0, -1115440098, 1015710974, 1041402597, -1130229431) + W(1, 1029771501, 1033337736, -1114856806, 1034787000) + + W(2, 1018065220, -1094728975, 1037520917, -1082769104) + W(3, 999202537, 1039453548, 1064530169, -1074584982) + + W(4, 1036811205, 1050012979, -1094040689, -1090370130) + W(5, -1111720654, 1032576025, 1073761042, 1032162938) + + W(6, -1125064265, -1103861773, 1063409170, -1105633547) + + W(7, 1019498859, 1040788513, -1090935047, 1034133532); + WS(-1104397694, 1058392920); + sum1 = W(0, 1012876448, 1051523414, -1094911217, 1023486907) + + W(1, -1126111401, 1050625047, -1106828476, 1033392977) + + W(2, -1142522206, 1043902243, -1089440467, -1119682235) + + W(3, -1115125816, 1063861817, -1093833780, 1019625524) + + W(4, -1103048369, 1056801600, -1087921625, -1096529922) + + W(5, 999529969, 1053396405, -1107310585, 1015514383) + W(6, 1016404519, 1042081404, -1104958721, 1039287934) + + W(7, -1117545451, 1054067798, -1097217844, -1113687714); + sum2 = W(0, -1130009672, 1013515790, 1013756514, -1153587550) + + W(1, 1027809153, -1106180134, 1023886448, 1024050371) + + W(2, -1106384796, 1042067106, 1052609397, -1094102229) + + W(3, 1051460883, -1094561503, 1078005003, -1070026280) + + W(4, -1104521112, 1032257325, 1072194837, -1075632186) + + W(5, 1033821004, -1124940473, -1094906083, 1053282219) + + W(6, -1112259960, -1168427353, 1046140857, -1098407638) + + W(7, 1028508475, -1120291522, -1140922271, 1033718481); + WS(1063327007, -1121248448); + sum1 = + W(0, 1017612078, -1113236667, 1042967269, 1035231217) + W(1, 1043030576, -1096763312, 1049218219, -1115130868) + + W(2, 1039059525, -1091103118, 1058517677, -1107367965) + W(3, -1115811128, 1053297652, 1033504868, 1009543939) + + W(4, -1095615210, 1063340055, -1082785422, 1047061435) + + W(5, -1099887459, 1054079124, -1088587000, 1042859328) + + W(6, -1140707177, 1041492110, -1101731443, -1120288724) + + W(7, -1120397682, -1126620524, 1029718569, -1104957341); + sum2 = W(0, 1037909727, -1119813454, -1099212072, 1032936033) + + W(1, -1098298527, 1048025725, 1045330821, -1097673782) + + W(2, -1126569464, 1041835288, -1106229244, -1109675885) + + W(3, -1106955476, 1067026849, 1067954224, -1092590328) + + W(4, -1096969233, -1085288934, -1095495837, 1004812688) + + W(5, -1118592354, 1042571631, -1108169766, -1096486003) + + W(6, 1042882430, -1117602980, -1164085849, 1034064234) + + W(7, 1037216424, -1116313332, -1100305071, 1045459772); + WS(-1114652667, 1051933605); + sum1 = + W(0, 1026700110, -1102428549, 1043087092, -1109604752) + W(1, 1037508767, -1105027032, 1048715843, -1113263092) + + W(2, -1128033780, -1106431647, 1062796090, -1098797270) + + W(3, 1034155035, 1043250735, -1089603768, 1023780992) + + W(4, -1102462575, 1059213449, -1090890112, -1112475685) + + W(5, 1024543544, 1038435705, -1106007014, -1121524254) + W(6, -1147042251, 1038748492, -1119096106, 986589794) + + W(7, 1032374633, 1016229610, -1104514210, 1007891224); + sum2 = W(0, -1129141996, 1043866561, 1029019378, 1050935854) + + W(1, -1106880495, -1096921120, -1092013580, -1111274547) + + W(2, 1031191254, -1123077406, -1086673510, -1096956427) + + W(3, -1102244397, 1064276171, 1064706574, -1115781076) + + W(4, -1101040753, 1061828176, -1113110329, -1106057540) + + W(5, 1046080919, -1095750750, -1091369762, 1029696990) + + W(6, -1112883155, 1042185570, 1042119352, -1113619581) + + W(7, 1031234782, -1128566730, -1105256367, 1012473722); + WS(1059465279, -1101420399); + sum1 = + W(0, 1033088680, 1043444330, -1101682761, -1124073107) + W(1, -1124176798, 1000916382, -1126261266, -1118729178) + + W(2, 1014063089, 1057398923, -1089602004, 1032573720) + W(3, 1035053989, 1034190184, 1039770557, -1114558729) + + W(4, 1045820204, -1087528660, 1057859064, -1096323421) + W(5, 1038292302, 1036787650, -1106381771, 1041172764) + + W(6, -1115007182, 1027988010, -1104773247, 1024683157) + + W(7, -1131477704, 1042732770, -1102530758, 1029511616); + sum2 = W(0, -1093261173, -1081648717, -1107343332, 1020947921) + + W(1, -1100338012, -1096055698, 1016623505, -1121572837) + + W(2, 1016590929, 1049309077, 1058913944, 1035038069) + W(3, -1160318928, 1060463741, 1061699617, -1121635630) + + W(4, -1111261089, -1123677731, -1095509190, -1116667427) + + W(5, 1032510827, 1042247124, 988275856, -1130213341) + W(6, 1037987285, 1035681279, -1111758125, 1033159595) + + W(7, -1106728517, 1035240533, 979544481, -1112486763); + WS(1060076127, 1072958059); + sum1 = + W(0, -1103118654, -1090537599, 1060753617, -1129584897) + W(1, 1043365506, -1097065432, 1046496158, 1025963976) + + W(2, -1115420333, -1086314390, 1060760746, -1106610556) + + W(3, 1031523031, -1088162978, 1055888777, 1040410096) + W(4, 1033412302, -1089193875, 1052645932, -1138875303) + + W(5, 1032254402, -1091959462, 1054239287, 1032279415) + W(6, 1031863013, -1111327644, 1044639698, 1032182482) + + W(7, -1127560355, -1086013855, 1056703661, 1044337291); + sum2 = + W(0, 1034238418, -1122429687, 1038471498, -1105287037) + W(1, 1026722605, -1113650971, 1047703041, -1106809975) + + W(2, -1123083006, -1101416602, 1050567163, -1099784158) + + W(3, 1044566349, -1095747837, 1073798913, -1077245954) + + W(4, -1114043742, 1044375513, 1074668356, -1072322090) + W(5, 1029836970, 1032414162, 1076147895, -1070521946) + + W(6, -1124457715, -1132025334, 1076524889, -1071353549) + + W(7, -1112207598, -1112893824, 1071535287, -1076180558); + WS(-1114470395, -1114976351); + sum1 = W(0, 1017446070, 991904560, 1034584479, -1110976571) + W(1, 1040990896, 1041627584, -1107221597, -1099251857) + + W(2, -1129681934, 1047992216, -1115356259, -1111571481) + + W(3, -1120446036, 1061349282, -1083497834, 1039733326) + + W(4, -1097641691, 1061559014, -1089507216, 1036247843) + + W(5, -1110115727, 1055187519, -1098995485, -1114440077) + + W(6, -1110184487, 1045114861, -1109668803, -1115117640) + + W(7, 1033457695, 1047040436, -1107186297, -1120899067); + sum2 = W(0, -1093678598, -1084434022, 1064533419, 1042743345) + + W(1, 1002676269, 1057422860, 1067683118, -1108978134) + + W(2, 1055748356, 1066493380, -1078621547, -1117769694) + + W(3, 1044340647, -1079318791, -1079939901, 1015093293) + + W(4, -1118111722, -1116089022, 1054002901, 1049699290) + W(5, -1120084888, 1044705221, 1039926847, 986467602) + + W(6, 1001595675, -1112694927, -1112070298, -1107171922) + + W(7, -1123642691, 1031220790, 1023903081, 1037662763); + WS(1048785023, 1039045299); + sum1 = + W(0, -1104130562, -1090140327, 1061197562, -1133671882) + W(1, 1043168659, -1100442863, 1040570004, 1032880081) + + W(2, -1117502116, -1085428843, 1062309944, -1102819715) + + W(3, 1029857553, -1093347651, 1050495728, 1038995485) + W(4, 1034838603, -1089406196, 1050259320, -1127149063) + + W(5, 1027207566, -1093521697, 1054560318, 1026681976) + W(6, 1024393315, -1120775857, 1039464338, 1035433414) + + W(7, -1130533431, -1086838999, 1056208688, 1041402190); + sum2 = + W(0, -1118465298, -1142302474, -1114395216, 1037219807) + W(1, 1011712973, 1025023956, -1104707163, 1039062755) + + W(2, -1128433260, 1041730218, -1095905892, 1046574176) + + W(3, 1029767219, -1097771962, -1080533844, 1069124140) + + W(4, 1021354231, -1094677866, -1075595300, 1073795377) + W(5, 982009542, -1122047812, -1072373863, 1075522683) + + W(6, 1014788797, 1028525757, -1072719492, 1074436097) + W(7, 1033540398, 1020814500, -1078361330, 1068741752); + WS(1040594174, 1002085105); + sum1 = + W(0, 1041861603, -1113198522, -1137236930, 1041085614) + W(1, -1097746587, 1055071640, -1100468781, -1121524679) + + W(2, 1032164980, -1088672026, 1047493466, -1109849488) + W(3, 1031577622, 1050744420, -1105375081, 1048884604) + + W(4, -1102982955, 1057599999, -1087672398, -1113508732) + + W(5, 1044101411, -1103363988, 1050064611, -1098107838) + W(6, 1034678964, 1036351259, -1102178050, 1043931332) + + W(7, 1026039088, 1034848362, 1033863059, -1106488144); + sum2 = W(0, -1099743490, 1011906269, 1037961444, -1108436276) + W(1, 1035577370, 1026663055, -1152297667, 991169347) + + W(2, -1115293072, 1057096352, -1095995741, 1045881501) + + W(3, -1102421679, 979639118, 1050233559, -1097952901) + W(4, 1038726510, 1044440375, 1022991474, -1122343653) + + W(5, 1032210717, 1032214892, -1139108605, 1026937641) + + W(6, -1107407193, 1028754425, -1115369483, -1105994898) + + W(7, -1120037205, -1123040177, -1104470915, 1022469878); + WS(1066613200, -1123971367); + sum1 = + W(0, 1021072883, -1096708150, 1051074184, 1026036124) + W(1, -1125712504, -1090955664, 1051398158, -1108624754) + + W(2, 1041158633, -1159875842, 1058934814, 1040756057) + + W(3, -1106077125, -1079911082, 1061682240, -1090753503) + + W(4, 1036847484, -1093258685, 1055629981, -1114271443) + W(5, 1032267227, -1103560179, 1054180539, 1025501945) + + W(6, 1016782186, -1089925031, 1052861897, -1105228376) + + W(7, 1031886911, -1106725252, 1047227273, 1043536294); + sum2 = W(0, -1120579723, -1102098813, -1103583809, 1039910130) + + W(1, -1104595791, 1059079365, 1035649855, -1110832147) + + W(2, 1060943463, -1078775742, 1055196914, -1115424415) + + W(3, -1081375340, 1070420847, 1036446407, -1092581761) + + W(4, 1048074442, -1106373275, 1050043459, -1101688097) + + W(5, 1049316438, -1084015554, 1050724789, -1115969635) + + W(6, -1088959333, 1065759760, -1088425301, 1051846236) + + W(7, 1054216959, -1087995034, 1052608338, -1103333947); + WS(1015488492, -1082266482); + sum1 = W(0, -1115294884, -1096364772, 1054950795, -1109722877) + + W(1, 1041881447, 1036768790, 1051075280, 1018280484) + + W(2, -1116482020, -1083421327, 1062057653, -1105734603) + + W(3, 1028536641, -1079950554, 1057564011, -1124656723) + + W(4, 1044897017, -1088856963, 1054170231, -1129161159) + + W(5, -1108243791, -1096513911, 1056273768, -1113027740) + + W(6, 1033207234, -1104622888, 1056919206, -1132093050) + + W(7, 1015505847, -1096156044, 1054606378, 1025701004); + sum2 = W(0, -1103335141, -1123809016, 1056172663, -1093357581) + + W(1, -1087173612, -1080484881, 1047550056, -1112078382) + + W(2, 1045744538, 1068142929, 1051886373, -1102262951) + + W(3, -1106751239, 1065979347, 1064817240, -1097769156) + + W(4, -1089877767, 1048007586, 1065470183, -1095892093) + + W(5, 1054342226, -1086806497, -1098340366, 1033891425) + + W(6, 1047940388, -1098712592, -1102735881, -1105000175) + + W(7, -1120076732, -1082944418, -1088981287, 1059622670); + WS(-1089551423, -1072840444); + sum1 = + W(0, 1041019406, 1055991358, -1086576149, 1027645528) + W(1, 1022173115, 1057526040, -1100702364, -1141651861) + + W(2, -1113297010, 1036785718, -1093552675, 1016689751) + W(3, 1015463395, 1059475300, -1089572736, 1031793146) + + W(4, -1103473726, 1056836657, -1091007140, -1125097480) + + W(5, 1040814976, 1050051043, -1090411145, 1041821422) + W(6, 1007312016, 1050846086, -1098618125, 1015074267) + + W(7, 1029778436, 1053584058, -1097362556, -1104623366); + sum2 = + W(0, 1069423067, -1076703167, 1041694716, 1032293264) + W(1, 1072187327, -1073855626, -1115348910, 1008297781) + + W(2, 1072064387, -1079134619, -1107764634, -1120515458) + + W(3, 1072314518, -1077864031, -1089838794, 1037018177) + + W(4, 1066704589, -1081021850, 1036250076, -1121141704) + W(5, 1063804587, -1081933355, 1028352388, 1025803257) + + W(6, 1053139391, -1094622730, -1114321992, -1123317222) + + W(7, 1042659879, -1099654949, 1035224325, 1027779018); + WS(1053893247, -1106302313); + sum1 = + W(0, 1031892188, 1044357278, -1093335703, -1124627014) + W(1, -1110506435, 1051962248, -1098499127, -1121076939) + + W(2, -1127073177, 1061666784, -1085429924, 1038775118) + W(3, 1047997302, 1064449402, -1082436051, 1042378233) + + W(4, 1033243951, 1041901099, -1118050391, -1100759903) + + W(5, -1107646187, 1053582704, -1104422127, -1107823800) + + W(6, -1112186286, 1037482077, -1118004236, -1134911574) + + W(7, -1112631434, 1050110011, -1103845867, -1111029158); + sum2 = W(0, 1045534781, -1106914963, -1093884354, 1048929407) + + W(1, -1136883904, -1105365809, 1058094665, -1099297773) + + W(2, 1027794169, -1087950271, -1076257974, 1057853479) + + W(3, -1091599460, 1049402813, 1070891205, -1101014716) + + W(4, -1105954111, 1059435817, 1057695328, -1088483015) + + W(5, -1110353364, 1056244223, -1096007662, -1116753268) + + W(6, 1026306116, -1106437513, 1042940020, -1107558514) + + W(7, -1105327762, 1049633694, 1019026622, -1111740866); + WS(1043803134, 1031905225); + sum1 = W(0, -1113675573, 1045344728, -1093561052, -1125193272) + + W(1, -1137620096, 1015450714, 1039772705, -1142985628) + + W(2, 1045527725, 1036603287, -1087612823, 1043570724) + W(3, 1047471777, 1057861093, -1131204708, 1032608022) + + W(4, 1025442024, 1007995990, -1109713128, -1125123818) + + W(5, -1101766076, 1048683192, -1098589181, 1017746514) + + W(6, 1041363873, -1103914745, -1142310312, -1137687994) + + W(7, -1104559247, 1053784172, -1096789826, -1120823545); + sum2 = + W(0, -1102611800, 1052608919, -1092971881, 1033368250) + W(1, 1042572795, -1112317124, 1050084573, -1099740129) + + W(2, -1143715563, 1036000712, -1099050864, 1046340985) + W(3, 1047906835, 1058925768, 1062269077, -1106615878) + + W(4, 1034463821, -1089628524, -1100033854, -1143169701) + + W(5, -1097650393, -1097284017, -1155008883, 1034459058) + + W(6, 1044772028, -1090965565, 1043620595, -1102441858) + + W(7, -1105069125, 1053414419, -1099181286, -1145018737); + WS(1065606800, 1041895077); + sum1 = + W(0, 1041785011, 1041397665, -1108226929, -1113822832) + W(1, -1113293285, 1050236482, -1089967688, -1105474774) + + W(2, 1031552121, 1063682744, -1092426598, 1045393909) + + W(3, -1114204019, 1055253222, -1085544687, -1106251932) + + W(4, 1014586031, 1057927896, -1092878524, 1018782386) + W(5, 1032452466, 1054837340, -1094102329, -1135035573) + + W(6, -1103346054, 1043307490, -1097746379, -1114747425) + + W(7, 1025447425, 1058487624, -1098262983, -1119448485); + sum2 = + W(0, 1038489223, -1094743304, -1079775068, 1069957645) + W(1, 1032193624, -1106563024, -1075431018, 1074314813) + + W(2, -1137158564, 1029053577, -1075905408, 1067854461) + + W(3, 1048862753, -1103121169, 1061439417, -1090496129) + + W(4, -1107218869, 1018988618, 1061842524, -1083145579) + + W(5, -1112577633, 1015522148, 1055667456, -1093776696) + W(6, 1034028859, -1107822714, 1032002642, 1038458963) + + W(7, -1110240510, 1017646246, 1047594841, -1100108785); + WS(1050857279, 1035401177); + sum1 = + W(0, 1029370009, 1049972466, -1100844155, -1114359850) + W(1, -1121232094, 1045749679, -1097541146, -1119608387) + + W(2, -1118971861, 1054737744, -1089471141, 1032044074) + + W(3, -1116554354, 1064433520, -1088203652, 1039513726) + + W(4, -1102118870, 1057059356, -1089223685, 1028806533) + W(5, 1035949272, 1055161550, -1095333099, 1039358524) + + W(6, -1118591397, 1035470985, -1098430605, -1114373271) + + W(7, -1155732684, 1050840570, -1105548665, -1113669781); + sum2 = + W(0, 1034282103, 1040723224, 1028955705, -1104697256) + W(1, -1111920318, -1118965841, 1031483971, -1123254751) + + W(2, 1023538768, -1128437695, 1043259383, 1034554870) + + W(3, -1133376386, 1038672881, -1148076972, -1078609243) + + W(4, -1119100907, 993338648, -1084940093, -1075024478) + W(5, 1014776166, 1050613235, 1050550535, -1100774866) + + W(6, -1117007635, -1108740075, -1098463376, 1069274341) + + W(7, -1121555310, 1040276790, 1054296012, 1072481706); + WS(1059068159, -1095269543); + sum1 = + W(0, 1022243158, 1058751902, -1083015108, 1040310153) + W(1, -1126585055, 1052633434, -1095802212, 1009201364) + + W(2, -1113503115, 1061437964, -1083734386, 1040208083) + + W(3, 1034732162, 1062365313, -1087136899, -1116554445) + W(4, 1029695369, 1054753811, -1089074173, 1044292534) + + W(5, 1045709362, 1047783152, -1093733821, -1141695574) + W(6, 1005846771, 1043895001, -1096742887, 1042140272) + + W(7, 1049870212, -1119990530, -1106170901, -1106848091); + sum2 = + W(0, -1110453349, 1048158098, 1064329884, -1096499673) + W(1, -1118716929, -1117235638, -1122504669, 1023790996) + + W(2, -1111084235, 1037169053, 1037379653, 1035642078) + W(3, 1027079271, 1044450691, -1106426334, -1103100998) + + W(4, 1058163522, -1086959067, -1081956377, 1059064502) + + W(5, 1060433363, -1096141012, -1081432626, -1109003581) + + W(6, 1058751114, -1102281652, -1086790317, 1048822766) + + W(7, -1077532089, 1061563184, 1073667932, -1089552462); + WS(-1079238176, -1098575359); + sum1 = + W(0, -1122061272, 1040856756, -1106993315, -1113276757) + W(1, 1012614613, -1098659523, 1049053518, -1110111537) + + W(2, 1049213476, 1050219649, -1088887964, 1048270483) + W(3, -1091368434, 1046432350, 1061247706, -1098550683) + + W(4, 1050034455, -1092787824, -1113029435, 1053252708) + + W(5, -1115399887, 1045840143, -1096901418, -1120030007) + + W(6, -1105000541, -1102913028, 1045569657, -1144293604) + + W(7, 1040609063, -1104981396, -1106428832, 1037272948); + sum2 = W(0, 1035271628, -1096995906, -1097624644, 1051183735) + + W(1, 1033447544, 1032410306, 1048740225, -1095652467) + + W(2, -1113618952, 1053850685, -1109777102, 1037584786) + + W(3, -1104023060, 1054515359, 1058355960, -1116173683) + + W(4, 1022009109, -1114408860, -1139294461, -1096371453) + + W(5, -1104458884, 1030897990, -1116975083, 1030595307) + + W(6, 1044684178, -1115683354, -1099524388, -1117706649) + + W(7, -1104406640, -1106540716, 1036671037, -1108415342); + WS(1064155455, 1041078114); + sum1 = + W(0, -1114884791, -1133388468, 1031759625, -1116707016) + W(1, 1038026665, -1103926126, 1051074474, -1135572861) + + W(2, 1040569148, -1088265213, 1049866386, -1145284192) + + W(3, -1123095214, -1088521037, 1060913876, -1110455500) + + W(4, -1113346810, -1094483240, 1044072016, -1116167073) + + W(5, -1098688777, 1052099081, 1034732568, 1029730485) + W(6, -1106297864, 1038941559, -1138713347, 1017022612) + + W(7, 1033022404, 1027852876, 1019301565, 1028095637); + sum2 = W(0, 1027468398, -1120484835, 1033713911, 1037828351) + + W(1, -1111732215, -1115316427, -1113615213, -1102210431) + + W(2, -1149402831, 1042551085, 1015721118, 1027887653) + + W(3, -1136133524, -1101059653, 1005178376, -1105049969) + + W(4, 1058132410, 1067179636, -1112798365, 1033627060) + + W(5, -1084703700, -1093846517, 1036573120, -1112780439) + + W(6, -1103172541, 1015696434, 1035924289, 1025169534) + + W(7, 1011948492, -1119207903, -1106241198, 1034168377); + WS(1056869759, -1097268032); + sum1 = W(0, 1018133293, 1036411431, 1039633317, -1111689725) + + W(1, -1105532941, 1007771751, 1034605623, -1107937040) + + W(2, 1027243330, -1116024394, 1050783724, -1105991139) + + W(3, -1097428310, 1032024955, 1010531964, -1109581452) + + W(4, -1099204213, -1130187397, 1045069064, -1103861656) + + W(5, -1133851766, 1034973653, 1044277428, 1044990678) + + W(6, -1138393927, -1114469768, 1046520720, -1106353867) + + W(7, -1118550618, -1118964077, 1036629184, -1155661924); + sum2 = + W(0, 1031819015, 1040620760, -1130428603, -1130628701) + W(1, -1113612713, 1045122152, -1113774176, -1118722775) + + W(2, 1040659447, -1122637452, -1093905874, 1015363301) + + W(3, -1113225510, -1080510307, -1073670430, -1129641903) + + W(4, 1023788280, -1103769905, 1040925011, 1045479736) + W(5, -1115035325, 1050277803, 1074164392, 997650792) + + W(6, 1009487418, 1041168723, 1056579340, 1043373785) + W(7, 1023947578, -1112728330, 1044922942, -1122436477); + WS(1064095487, 1025785067); + sum1 = + W(0, 1026371284, -1106851535, -1128892272, 1040514533) + W(1, -1097495743, 1042476562, 1049973404, -1103190740) + + W(2, 1050378694, -1104849584, -1115675316, 1040022019) + W(3, 1008396404, -1085976658, 1058008769, 1040395090) + + W(4, 1042871873, -1097042470, -1098054097, 1050097558) + + W(5, -1103138663, -1109264236, 1052615848, -1102548473) + + W(6, -1136206974, 1035073279, 1037088776, -1157051795) + + W(7, 1034556564, -1094929447, 1045135411, 1023679636); + sum2 = + W(0, -1124722923, 1033420628, -1105442746, 1029168594) + W(1, -1114618025, -1100145939, 1047309531, 1048079041) + + W(2, 1043439029, -1115804833, -1084852425, -1101613108) + + W(3, 1036363299, 1057589449, -1089136554, 1042227257) + W(4, -1096133271, 1053618149, 1028964970, 1045151454) + + W(5, 1049502378, -1091358477, 1056312267, 1041752672) + W(6, -1106999272, 1030436134, -1100909906, 1036382707) + + W(7, 1013819494, -1119548384, 1043379377, -1113267324); + WS(1060109055, 1023402244); + sum1 = W(0, 1039025762, 1051909299, -1098089231, -1107880995) + + W(1, -1109278395, 1054481389, -1093104548, 1018380380) + + W(2, -1125210976, 1061074499, -1087624466, -1167447218) + + W(3, -1107337262, 1069080830, -1092709234, -1119879878) + + W(4, -1119314274, 991275263, -1085745760, 1030906608) + W(5, 1038278743, 1049054227, -1093068092, 1028271716) + + W(6, -1120902815, 1043342663, -1102438460, -1122586985) + + W(7, 1024999351, 1051802331, -1098889074, -1107737008); + sum2 = + W(0, 995982518, 1035578813, -1118279893, -1115504751) + W(1, 1033194341, -1121421677, -1114972659, -1108849613) + + W(2, 1038373562, 1046888388, 1069469835, 1041670894) + W(3, 1040171687, -1103362299, 1078927590, 1050450381) + + W(4, -1108098969, 1050255581, -1065023683, -1097423961) + + W(5, 1027047733, -1096418856, -1082214759, -1114805949) + W(6, -1147696971, 1042040695, 1045909115, 964966831) + + W(7, 1033796110, -1116244525, -1112457727, 1025540676); + WS(-1092049407, -1078216845); + sum1 = W(0, 1019093687, 1057591089, -1089127853, -1109784091) + + W(1, 1030002432, 1050969332, -1096074010, 1043174565) + + W(2, -1100550858, 1056027116, -1082843219, -1126191805) + + W(3, 1055647027, 1063070242, -1088645600, 1049441628) + + W(4, -1103872766, 1039049940, -1097545633, -1097650089) + + W(5, 1041444210, 1057957242, -1095349663, 1024042875) + + W(6, -1115845919, 1034942714, -1103665506, -1123963793) + + W(7, 1042339236, 1059965491, -1089209262, -1105758607); + sum2 = + W(0, -1086426610, 1062347208, 1053593730, -1108806990) + W(1, -1096143108, 1055082976, -1101252044, -1138780047) + + W(2, -1089951657, 1045157118, -1111940928, -1099705126) + W(3, 1062855195, 1048769854, 1037201995, 1058343455) + + W(4, 1068897447, -1073459143, -1091098547, -1094950430) + + W(5, 1074016711, -1078468086, -1119407664, 1046274438) + + W(6, 1075026151, -1071781830, 1031118734, -1128975824) + + W(7, 1074160921, -1072043968, -1095931446, 1035271906); + WS(-1092377983, 1032564911); + sum1 = + W(0, 1026968586, 1051240140, -1140824311, -1108814006) + W(1, 1001025293, -1135919542, -1114206145, 1031981930) + + W(2, -1101464865, 1047585891, 1035412731, -1105914151) + + W(3, -1099757657, 1046337282, -1093593858, -1111723206) + + W(4, -1099131534, 1042168181, 1042300945, -1114245051) + W(5, 1021990836, 1042765483, -1105010841, 1047084274) + + W(6, -1124857771, 1039475666, -1129901419, -1110153104) + + W(7, 1016095931, 1031087233, -1150842169, 1021006644); + sum2 = W(0, -1152594313, -1103837493, -1119502913, 1035550709) + + W(1, -1114711758, -1102219265, 1049238470, -1111809635) + + W(2, -1122222477, 1021928584, 1051353268, 1038283603) + + W(3, -1119994373, 1043085394, 1069806319, -1113383091) + + W(4, -1117872135, -1098150014, 1040245198, 1038911980) + + W(5, -1136114154, -1114380427, -1084770114, -1106271331) + + W(6, -1118889611, -1103586212, -1097500544, -1109409577) + + W(7, -1120239593, 999997765, 1032457879, -1130668925); + WS(1066805616, -1146277627); + sum1 = + W(0, -1135230785, -1097679853, 1046488473, 1036204713) + W(1, -1149155717, -1106566573, 1047676826, -1098488427) + + W(2, 1034010150, -1092037473, 1047117051, -1103697017) + W(3, 1042337650, -1085421897, 1062693860, 1057957971) + + W(4, -1127867356, -1091217395, 1049537116, -1114566390) + + W(5, 1036367663, -1094061036, 1056189429, 1034870184) + W(6, 1011804270, -1108406741, 1017502967, -1103604141) + + W(7, 998521116, -1096997053, 1049188263, 1029261884); + sum2 = W(0, -1122819855, 1035645255, -1107161682, 1042480496) + + W(1, 1029215207, -1111163705, 1019166485, -1107159841) + + W(2, -1109976389, 1026839491, -1129473215, -1115971786) + + W(3, 1044451524, -1095838714, 1082201538, -1067229991) + + W(4, -1130065951, -1147512571, 1052773623, -1091650896) + + W(5, 1027965847, -1156411358, 1033233466, -1142132409) + + W(6, -1119702735, -1110332882, 1034307583, -1107215185) + + W(7, 1036873692, -1114357123, 1001850219, 1025815163); + WS(1040205182, 1040987841); + sum1 = + W(0, 1032709552, 1050828331, -1098789018, -1130845956) + W(1, 1028035406, 1054299632, -1102534446, -1131566290) + + W(2, -1152624047, 1000765841, -1087146945, 1024089890) + W(3, 1047238622, 1064828620, -1084918512, 1044521480) + + W(4, -1110798127, 1059217829, -1080798168, 1041701723) + + W(5, -1160283945, 1057730984, -1094152707, -1105870556) + + W(6, -1129145589, 1050797759, -1154982570, -1121779940) + + W(7, 1035307633, 1040620053, -1104995442, -1110323595); + sum2 = W(0, -1104834685, 1039834326, 1040649104, -1113573633) + + W(1, -1098615503, 1050496819, 1034170750, -1119108596) + + W(2, -1090508283, 1045914856, -1118657109, -1128160390) + + W(3, -1080862372, 1070974917, -1102559609, -1095002000) + + W(4, 1048874616, -1085801376, 1041220807, 1053594264) + W(5, 1063492499, -1081343708, 1047362948, 1022825140) + + W(6, 1035824034, 1033985362, -1099586035, 1041895669) + + W(7, 1062512173, -1089072248, -1130222490, -1115149866); + WS(-1087487423, 1060332710); + sum1 = W(0, -1105878689, -1095746139, 1053913286, 1037671952) + + W(1, 1042437466, -1089004262, 1055197099, -1117440135) + + W(2, 1040585322, -1081828961, 1059867107, -1114610404) + + W(3, -1121111477, -1099866351, 1061690846, 1028605502) + + W(4, -1112195269, -1115422148, -1104536871, 1040822242) + + W(5, -1112892785, 1039314504, -1102951638, 1041433743) + + W(6, -1122925576, -1117471766, -1107184778, 1048192776) + + W(7, 1012646976, -1108492170, -1130420108, 1029799624); + sum2 = W(0, 1017331955, -1097747777, -1097037835, -1162485367) + + W(1, -1104677463, 1040900608, 1016355419, -1104406873) + + W(2, -1117673238, -1081336501, -1089023490, -1117660642) + + W(3, -1094338757, 1073518733, 1068423959, -1096388411) + + W(4, -1119646366, -1103573743, 1048344480, -1093216188) + + W(5, -1104710153, -1106204508, 1057220074, 1028492960) + + W(6, -1107625181, 1041186571, 1033091565, -1092192530) + + W(7, 1022869095, -1097636016, 1053959289, 1049060345); + WS(-1096756863, 1051174382); + sum1 = + W(0, 1025372546, -1115668125, 1000611497, 1034391118) + W(1, -1106905723, 1039995714, -1105014230, 1041916975) + + W(2, -1114360751, 1043101563, -1117013085, 1027806076) + + W(3, 1025624474, -1089490746, 1058174613, -1111998130) + W(4, 1037998160, -1092891318, 1045000731, 1008039732) + + W(5, 1032794399, -1107085565, 1035643438, 1030397185) + W(6, 1034992970, -1114405501, -1121180749, 1018926829) + + W(7, -1114244794, -1168885266, -1120880459, 1029203969); + sum2 = + W(0, -1089960886, -1095856011, 1007316785, 1036775569) + W(1, -1092452707, -1106157919, 1041476280, -1107942220) + + W(2, 1049541326, 1074170001, 1050706137, 1021239050) + W(3, 1032119458, -1085150802, -1083810164, -1132331444) + + W(4, 1032929374, 1046048366, 1043182557, 1036225521) + + W(5, -1110883627, -1112079347, -1129593193, -1113579728) + + W(6, -1123073435, 1027735021, -1214669080, 1009639193) + + W(7, 1026522309, -1111935169, 1009058861, 1022066674); + WS(1063524863, 1010978751); + sum1 = W(0, 1033376264, -1090913225, 1054337940, 1034437913) + W(1, 1035248130, -1094935586, 1045510498, 1040021853) + + W(2, 1029138555, -1084407830, 1057961148, -1141033844) + + W(3, 1040606570, -1085174115, 1062831778, 1032741433) + + W(4, -1146340206, -1080254739, 1060252175, -1127614262) + + W(5, -1122169062, -1094320024, 1058700983, -1136275261) + + W(6, -1113033206, 1033710501, 1045946247, 1007782611) + W(7, -1115451138, 955403507, 1033261261, 1030604567); + sum2 = W(0, -1096400044, 1053621644, 1017015195, 1037111777) + W(1, 1043917783, 1009306318, 1005897821, -1106811629) + + W(2, -1123678571, 1051339656, -1141989437, 1047789047) + + W(3, -1090244436, 1062093709, 1065539532, -1093438727) + + W(4, 1046901313, -1080576226, -1083332175, 1049955112) + + W(5, 1048870815, -1106342004, -1097183812, 1043219374) + + W(6, 1033884063, -1098524562, -1097464341, 1050393105) + + W(7, 1053211427, -1124418811, -1092155120, 1044056917); + WS(-1085369887, -1080302329); + sum1 = W(0, 1021216124, -1112260523, 1035888345, -1110274640) + + W(1, -1121762938, -1119192737, 1049037307, -1111476420) + + W(2, -1103047566, 1050211017, 1061443675, -1097892613) + + W(3, -1094342263, 1058411732, 1044828010, -1097171068) + + W(4, -1105630871, -1139271463, -1119204121, -1112160677) + + W(5, -1107174593, 1042276856, -1100915542, 1026261917) + + W(6, 1025950537, -1113852108, -1174284878, -1110403297) + + W(7, -1146340105, -1119668597, -1119790119, -1132483222); + sum2 = W(0, -1115512925, 1035074177, -1191992884, -1115656893) + + W(1, -1146353705, 1040461813, 1043381354, -1137020948) + W(2, 1036066439, 1042528734, 1059049535, 1021899784) + + W(3, -1134998124, 1058355837, 1058956516, -1112812041) + + W(4, -1093678923, -1081621864, -1095612272, 1012903452) + + W(5, -1122018103, -1089980594, 1050844919, -1123282157) + + W(6, -1110317749, 1049357806, -1100731037, -1122890057) + + W(7, 1002801625, -1101025299, 1036998024, 1023124620); + WS(-1106521214, -1077666555); + sum1 = W(0, 1003321829, 1052142545, -1102912320, -1108686032) + + W(1, -1120901013, 1052094618, -1105733097, -1118608535) + + W(2, -1105222370, 1060439250, -1085669204, 1045702647) + + W(3, -1107011833, 1059969903, -1113880763, -1106161276) + + W(4, 1029823211, -1095361591, 1052882623, -1103122454) + + W(5, 1000924822, 1039904896, -1107186882, -1106172071) + + W(6, -1127794128, 1000548911, -1118745688, -1113781993) + + W(7, 1027793952, 1046166153, -1096551831, -1132637458); + sum2 = W(0, 1034079401, 1025264110, -1123371956, -1116991392) + + W(1, -1123608512, -1119278092, -1104246380, -1156408861) + + W(2, 999851902, 1031056700, 1051208451, -1098806852) + W(3, -1128074608, 1059405503, 1063218900, 1038716729) + + W(4, -1102099502, -1123208440, -1110007228, -1136590423) + + W(5, -1109589984, -1081290516, -1131612640, 1034885175) + + W(6, -1127758712, 1015173988, -1124651080, 1009682031) + + W(7, 1032460032, -1121242732, 1025045004, -1115963068); + WS(1061862175, -1081928168); + sum1 = W(0, -1105584175, -1105871745, 1043886056, 1027658214) + W(1, 1024897880, -1093982770, 1052657028, 992865878) + + W(2, 1038516044, -1092069365, 1055074797, -1121064957) + + W(3, 1040792761, -1083616743, 1065026281, -1106374835) + + W(4, 1041867802, -1089303929, 1056987882, 1015027694) + + W(5, -1146323132, -1089274268, 1054263413, -1132520748) + + W(6, 1033649803, -1106858177, 1043742846, -1137844354) + + W(7, -1113939311, -1096909360, 1049053899, 1017619030); + sum2 = W(0, 1078367032, 1027322757, 1023449836, -1115583846) + W(1, 1056220347, 1044756153, -1148770222, 993742973) + + W(2, -1078889751, -1096547764, 1043960988, -1130380138) + + W(3, -1073546091, -1101560565, -1126862726, -1147412718) + + W(4, -1104234997, 1042443775, 1029550531, -1115461788) + + W(5, -1129365336, 1048890957, -1133306383, 1019814478) + + W(6, -1132392998, -1104280017, -1130422948, -1143007558) + + W(7, -1105729449, 1028418751, 1042854212, -1123464959); + WS(1052938943, -1103606318); + sum1 = W(0, -1114719934, -1087051802, 1058073278, -1114447856) + + W(1, -1110318513, -1108424920, 1044445625, 1040769086) + + W(2, 1051495307, -1085940948, 1060893495, 1037779432) + + W(3, -1100729633, -1082198177, 1058839039, -1103818980) + + W(4, 1058926789, -1090825863, 1060585177, 1053070735) + + W(5, -1090569589, -1088355266, 1053564817, -1091337376) + + W(6, 1050591187, -1096343413, 1054456219, -1131842819) + + W(7, -1120060704, -1090847836, 1059253187, 1026807692); + sum2 = W(0, 1015621136, 1033006755, -1072583370, 1074691924) + + W(1, 1032703681, -1098289077, -1084108607, 1067219112) + + W(2, 1048700747, -1098808895, -1081224226, 1064576163) + + W(3, -1098355550, 1046984196, -1081041246, 1066863963) + + W(4, 1047054427, -1083721832, 1069053051, -1084686190) + + W(5, -1097901072, 1061110777, 1046858869, -1086075738) + + W(6, -1126056922, -1109410901, 1068664236, -1079374171) + + W(7, -1165737020, 1047097145, 1076043754, -1071293986); + WS(-1083248351, 1041375270); + sum1 = + W(0, 998616699, -1087439008, 1054645196, 1045294540) + W(1, -1142901727, -1092067298, 1052452449, -1128264947) + + W(2, 1043282847, -1097778587, 1050602866, 1042620067) + + W(3, -1128981712, -1084179155, 1058025241, -1146688137) + + W(4, 1049050406, -1090979219, 1061357806, -1127282486) + + W(5, -1110272759, -1086455104, 1053431193, -1125997182) + W(6, 1045114289, -1095775369, 1052134295, 981342481) + + W(7, -1170526381, -1087021556, 1053157267, 1046767364); + sum2 = + W(0, -1076239116, 1068235144, 1054062613, -1114529265) + W(1, -1079299620, 1068687973, 1019509447, -1116897962) + + W(2, -1079042334, 1068481951, 1057377046, 1041823371) + W(3, -1083365485, 1066921525, -1100834259, 1032806429) + + W(4, 1049726715, -1106704213, -1095788298, -1104989857) + + W(5, 1066758659, -1080829299, -1107253929, -1107915189) + + W(6, 1069675355, -1077473679, -1112126912, 1021289065) + + W(7, 1075688460, -1073149491, -1090199615, 1034867372); + WS(-1089965567, 1041739713); + sum1 = W(0, 1014059625, 1050606464, -1097348868, 1016608410) + + W(1, -1109541748, 1046384755, -1098095776, -1110057610) + + W(2, -1120181702, 1052952467, -1098227712, -1165355548) + + W(3, -1113826001, 1062759283, -1097909490, -1103909923) + + W(4, -1099389050, 1061539071, -1089710746, 1018410859) + + W(5, -1107441048, 1056969105, -1097990985, -1097884829) + + W(6, -1110918155, 1043585729, -1112431723, -1109883584) + + W(7, -1129336123, 1054439728, -1101291373, -1118732474); + sum2 = + W(0, 1035083591, -1113211599, 1032203968, -1112771443) + W(1, -1112359157, 1037267536, -1129901649, 999833884) + + W(2, 1037205500, -1110383577, 1050641640, -1113848439) + W(3, -1119560270, 1053336900, 1062183610, 1048127168) + + W(4, -1130432545, 1036846600, 1052549520, -1087091182) + W(5, 1033928454, 1029308948, 1037218786, -1080175934) + + W(6, -1111608107, 1035214305, -1098666781, -1106600446) + + W(7, 1027763767, -1137000642, 1041216407, -1105808372); + WS(1040803966, -1079223548); + sum1 = W(0, -1103550802, -1108233168, -1115946235, 1021078213) + + W(1, -1113401739, -1097729981, 1016715200, -1123835458) + + W(2, -1105545077, -1112466052, 1051122342, -1103483180) + + W(3, -1095164520, 1056711305, 1072346450, -1092700173) + + W(4, -1102562113, 1033129898, 1046866637, -1104392485) + + W(5, -1106149592, -1100704991, 1041579689, 1033532004) + + W(6, -1123094707, -1106323272, 1017795558, -1128003929) + + W(7, -1105989383, -1109921151, 1019070562, -1109398092); + sum2 = W(0, -1132003761, -1097131135, 1033952970, -1114013558) + + W(1, -1126076753, -1098724323, -1102131199, 1027012993) + + W(2, 1028715469, -1094337061, 1043946979, -1109613274) + + W(3, -1094940877, 1075670208, 1037045260, 1026138301) + + W(4, -1096054213, -1092647285, 1052392413, -1107006741) + + W(5, 1050064171, -1119763725, -1101691459, 1032910718) + + W(6, -1095828963, -1104609741, -1104710657, -1127911473) + + W(7, 1024912709, -1103386045, 1042676905, -1109320626); + WS(-1077711088, -1080462700); + sum1 = + W(0, 1016976667, -1095463505, 1048382760, 1033669226) + W(1, -1143635214, -1094028818, 1043759401, 1023995271) + + W(2, 1050435883, -1084473735, 1060944248, 1038367764) + W(3, 1025648038, -1081613943, 1061061657, -1104130812) + + W(4, 1033190198, -1092817267, 1059085444, 1035230572) + + W(5, -1119077035, -1089963906, 1058923652, -1115548134) + + W(6, -1127874241, -1112556483, 1024757337, 998948881) + + W(7, -1124293633, -1096263708, 1052630131, 1035279350); + sum2 = + W(0, -1118714905, 1038737245, 1045939428, -1084863446) + W(1, -1128906105, 1033599435, -1124872105, -1120736313) + + W(2, -1104617441, 1047074065, 1073600723, 1072627800) + + W(3, -1102497202, -1081197801, -1069501955, 1076529178) + + W(4, 1052988680, 1051590837, -1078764847, 1066278053) + W(5, 1025452854, 1036478618, -1078626366, -1115729701) + + W(6, -1147569173, 1034730428, -1094352359, -1119746081) + + W(7, 1045309195, -1096538670, -1097394746, 1001372341); + WS(-1085605823, 1039908372); + sum1 = W(0, -1110519177, -1101464226, 1050281295, -1136474287) + + W(1, 998302542, -1097280531, 1040574050, 1017687606) + W(2, -1105109851, 1050325520, -1112501642, 1027599892) + + W(3, 1051114493, -1093718398, -1095999987, 1038451002) + + W(4, 1047758692, -1109283231, 1052111506, -1112873934) + + W(5, 1047411644, -1103804777, 1013903280, 1026946257) + + W(6, 1008966514, -1107759461, -1131548857, 1007362713) + + W(7, 1027414133, -1099881946, 1037344125, 1030395745); + sum2 = W(0, -1115357326, -1098998597, 1033193484, -1119934044) + + W(1, -1129481498, -1104582263, 1041990652, -1109942748) + + W(2, 1037423788, -1098625511, 1052524895, 1028176618) + W(3, -1149109833, 1036322812, 1029624616, 1032249206) + + W(4, -1107237807, 1050926614, -1102468922, -1117055607) + + W(5, -1098320683, 1045914812, 1016354632, 1026298641) + + W(6, -1118016391, 1029306918, 1029333966, -1116685452) + + W(7, -1111060805, 1035368549, 1036818044, -1113790705); + WS(1066221936, -1112686252); + sum1 = + W(0, -1111426931, 1042737360, 1039181581, -1110677245) + W(1, 1049363148, -1108750629, -1105687720, 1035534704) + + W(2, -1090113310, 1053136109, 1051677110, -1101864162) + W(3, 1038796336, -1095853365, 1031102479, 1019901392) + + W(4, -1102958241, -1108890233, 1058267293, -1107302241) + + W(5, 1041899238, 1023718298, -1089545128, 1048841797) + W(6, -1104001476, 1033191264, 1047293124, -1106653499) + + W(7, -1112879133, -1165972390, 1010166848, 989728967); + sum2 = + W(0, 1019579309, -1121628281, -1104132268, -1133616314) + W(1, -1115216952, 1041466962, -1110659898, 1046938970) + + W(2, -1101014764, 1044350744, -1092586315, -1089581193) + + W(3, 1035949455, -1096356807, 1067161402, -1097777539) + W(4, -1125343389, 1019949697, 1044980625, 1025080038) + + W(5, 1020295703, 1011899626, 1028945397, 1042942659) + W(6, -1107189524, 1020414781, 1024223321, 1000298789) + + W(7, 1032252124, -1108620092, -1106879950, 1034429670); + WS(1065647552, 1042223795); + sum1 = + W(0, -1118213891, -1090185691, 1054600851, 1042956769) + W(1, 1047859212, -1099254654, 1056675965, 1045938459) + + W(2, -1102035956, -1085546115, 1052764224, -1095542592) + + W(3, 1049817209, -1090115457, 1060918524, 1048397149) + + W(4, -1136727619, -1086839140, 1050058698, -1107166488) + + W(5, 1018314812, -1089622500, 1053194074, -1115710386) + W(6, 1048950515, -1103143659, 1055030644, 1046421002) + + W(7, -1111834851, -1088857895, 1046059111, 1019098869); + sum2 = + W(0, 1038047795, 1045715537, 1062099960, -1074982428) + W(1, -1100534663, -1109612723, 1074456323, -1071944747) + + W(2, 1043709757, -1109559353, 1074047524, -1086420775) + + W(3, -1098906327, -1088589586, 1061406543, -1087600981) + + W(4, 1054998296, 1046750383, -1109444720, 1058024333) + W(5, -1097979318, 1030039799, -1095255331, 1040647828) + + W(6, 1042447686, -1112135838, -1106577876, 1042372587) + + W(7, -1135615252, -1108474833, -1098188594, 1051479186); + WS(-1099261566, -1102058551); + sum1 = W(0, 1037460750, -1099740733, 1035868896, 1046250530) + W(1, -1118752449, 1038344912, 1028635221, 1040547388) + + W(2, 1048855473, -1097823268, -1135561696, 1049218244) + + W(3, 1046116085, -1077722267, -1089090112, 1053686776) + + W(4, 1041532007, -1086845489, 1054063629, 1041462458) + W(5, 1043443965, -1109540512, 1036245196, 1041323791) + + W(6, 1035797482, -1112472539, 1039520068, -1116084210) + + W(7, 1048656673, 1006300060, -1115462467, 1050214602); + sum2 = + W(0, -1107023704, -1099880087, 1047853298, -1106650230) + W(1, -1103130345, 1049640504, -1100868353, 1035820630) + + W(2, 1025081245, -1099241173, 1050410790, -1111373979) + W(3, 1007138702, 1060797185, -1096656068, 1048085306) + + W(4, 1020633935, -1096100738, 1053061345, -1098540289) + W(5, 1042694232, 1034621436, -1092716332, 1045275377) + + W(6, -1116958023, -1100840753, 1057695342, -1095110662) + + W(7, -1117764771, 1051814059, -1092707384, 1026583068); + WS(-1081437504, -1086037448); + sum1 = W(0, 1002898847, 1048903167, -1098037791, -1124004134) + + W(1, 1034161364, 1048827465, -1110924302, 1038960439) + + W(2, -1097339820, 1058952469, -1089826679, -1109622563) + + W(3, -1102052735, 1062493730, -1082951605, 1046141833) + + W(4, -1122224578, 1056452878, -1095802941, 1037494008) + + W(5, -1116847174, 1052097743, -1093057352, -1142221339) + + W(6, -1124219870, 1046287775, -1115921615, 1018754897) + + W(7, -1114337484, 1051622635, -1097214606, -1115514650); + sum2 = + W(0, 1026296217, -1098267531, 1056065541, -1107249906) + W(1, 1046658552, 1054123387, -1080723543, 1049528827) + + W(2, -1092482457, 1031912087, 1068312252, -1094476247) + + W(3, 1048926974, -1103258874, -1095327126, -1114829012) + + W(4, -1093986590, 1064081451, -1101319294, -1110151549) + + W(5, 1051240634, -1081783631, 1054428583, -1109929036) + W(6, 1031417427, 1052301313, -1095108256, 1023898518) + + W(7, -1116201775, -1136084293, 1032992382, -1131461985); + WS(1059931039, 1045326059); + sum1 = + W(0, -1105339195, 1027626820, 1035394960, 1020852317) + W(1, -1116497429, -1102542884, 1045819641, -1110241387) + + W(2, -1104905094, 1042432944, 1050447189, -1120454715) + W(3, 1048600749, -1087368655, 1050459387, 1028063406) + + W(4, 1017781140, -1090841955, 1058872902, -1135766366) + W(5, 1028510504, -1098837680, 1038310627, 1046575867) + + W(6, -1104151378, -1135818261, 1009721182, -1121800177) + + W(7, 1006123003, -1102238241, 1039487803, -1138241103); + sum2 = + W(0, -1108715925, 1033521028, -1130002309, 1043372642) + W(1, -1125695753, -1107049318, 1043711791, -1096562690) + + W(2, -1106861101, -1126359337, 1069432650, 1055907536) + W(3, 1037467647, 1024736242, 1069236178, -1077602923) + + W(4, 1049447879, -1096919459, -1089672176, -1079492159) + + W(5, -1108013135, 1030099711, -1104043463, 1048192702) + W(6, 1016159100, 1038861546, -1102967963, 1041544949) + + W(7, 1025122350, 1027296385, -1115278753, 1038199238); + WS(1065273279, -1155200022); + sum1 = + W(0, 1032043190, -1110724474, 1048813356, -1107914275) + W(1, 1042748400, -1113644224, -1113329098, 1017813644) + + W(2, 1044363154, -1103694886, -1138873982, -1119126921) + + W(3, -1101712885, 1049530899, 1013956115, 1030506334) + W(4, 1020290996, 1032343156, -1088512539, 1047677002) + + W(5, 1042545917, -1119655327, -1118975351, 1035449518) + + W(6, 1023679047, -1113927803, -1112426340, 1040385292) + W(7, 969146311, 1014066252, -1102619473, 1037002750); + sum2 = + W(0, -1105379539, 1040024171, 1054804566, 1020126178) + W(1, -1114326988, 1033596237, -1100858724, 1058095820) + + W(2, -1128505094, 1027874005, -1108932212, -1095228668) + + W(3, -1127815582, -1090164335, 1015111122, -1114002666) + + W(4, -1144273240, 1026082941, 1057491035, -1109234902) + + W(5, 1011114412, -1098155637, -1123558275, -1117423595) + + W(6, 1010998384, 1010514392, 1041639386, -1148517800) + W(7, 1008109660, -1110656550, 1025837089, 1028462436); + WS(1064822335, -1095840736); + sum1 = + W(0, 1049019197, 1056391992, -1082098174, 1047279757) + W(1, -1114924672, 1052717533, -1097259572, -1158101509) + + W(2, 1006539024, 1053226600, -1083318869, 1028150783) + W(3, 1027273471, 1063307800, -1085358981, 1048033194) + + W(4, 1024001604, 1051465786, -1093212548, 1040827508) + W(5, 1023329786, 1056189595, -1096912972, -1122795750) + + W(6, 1034076369, 1051405604, -1097995110, 1040247311) + + W(7, 1032078653, 1049885284, -1090164049, -1102713147); + sum2 = W(0, 1044291516, 1057945825, 1075245425, -1075047571) + + W(1, -1105536143, 1059138923, 1063262829, -1084742426) + + W(2, -1123279074, 1064153610, 1058517731, -1087050661) + + W(3, -1102729189, -1117658388, 1055818216, -1094508761) + + W(4, 1019015608, -1082824416, -1095331241, -1107921801) + + W(5, -1098099014, -1088923796, -1082492094, 1066223948) + + W(6, 1041233257, -1114762777, -1084810008, 1065694420) + + W(7, -1116414073, -1093119165, -1076437009, 1070896480); + WS(-1090760447, 1052695066); + sum1 = W(0, 1032827391, -1086160295, 1058956361, -1122589400) + + W(1, 1004836815, -1100507696, 1050270371, 1011775197) + + W(2, 1040820154, -1085279840, 1061054293, -1126907788) + + W(3, 1033221771, -1085951050, 1060000376, -1109718672) + + W(4, -1142824410, -1090476887, 1053225609, 1031459881) + + W(5, 1019263487, -1096291378, 1052048107, 1014310644) + W(6, 1033909728, -1104244158, 1044967298, 1002236306) + + W(7, 1030288497, -1096098979, -1123372967, 1050105184); + sum2 = W(0, -1115548553, -1119133376, 1034230379, -1106450540) + + W(1, 1010795990, 1054475337, 1049648601, -1130742967) + W(2, 1025233265, -1126101085, 1031862009, 1074765700) + + W(3, -1108773712, -1096206384, 1056453743, 1076253468) + + W(4, 1028577219, 1025243447, -1092511031, 1067142167) + + W(5, -1123367926, -1123699076, 1040231624, -1082850789) + + W(6, 1027045545, -1116348436, 1052333940, -1074864896) + + W(7, 1016487959, 1026289255, -1098871930, -1066441953); + WS(-1098442559, 1026469881); + sum1 = W(0, -1149983818, 1050517915, -1114000872, -1106643136) + + W(1, -1119522441, 1049663660, -1102209381, -1107005098) + + W(2, -1111872220, 1057681670, -1097110004, -1107624656) + + W(3, -1148708523, 1054270423, -1080449641, 1044397088) + + W(4, 1033974589, 1065584902, -1089390067, 1040137492) + W(5, 1034687077, 1060962908, -1088971425, 1011262833) + + W(6, -1121497336, 1043551013, -1096207069, -1152197144) + + W(7, 1040191316, 1037520938, -1098201593, -1111636393); + sum2 = W(0, -1093666199, 1067471948, 1044138823, -1087858714) + + W(1, 1049995354, -1083836095, -1089657247, 1040673003) + + W(2, -1104373549, 1067519925, 1068116530, -1086283540) + + W(3, -1108191449, 1075025493, 1070698122, -1107890740) + + W(4, -1086862321, -1079149522, -1077256496, 1052510667) + + W(5, -1109961150, -1092122258, -1092830464, 1044801958) + + W(6, -1105617480, -1118106953, -1104600439, -1113062539) + + W(7, 1057348983, -1090446878, -1095892285, 1040940184); + WS(-1089367999, -1080592817); + sum1 = + W(0, 1050901372, 1020565079, -1098563453, -1123752726) + W(1, -1142600967, 1046045566, -1094642094, 1032349343) + + W(2, 1043029392, 1051791856, -1092783656, 1017360893) + W(3, 1042662963, 1059303201, -1082254536, 1040706037) + + W(4, -1131715556, 1060541458, -1088235102, 1038984656) + W(5, 1028335344, 1057651322, -1083519131, 1042767141) + + W(6, 1009439676, 1050310918, -1088554180, 1039511244) + W(7, 1040529906, 1061742083, -1084895582, 1037250730); + sum2 = + W(0, -1086948904, 1072921984, -1102597600, -1081008410) + W(1, 1044777691, -1096214776, -1086612116, 1053678014) + + W(2, -1085906527, 1069427540, 1055370552, -1094318103) + W(3, -1107333159, 1069014322, 1068187895, 1040782350) + + W(4, -1094251850, 1068759557, 1049876941, -1097053445) + + W(5, -1101622483, -1095651744, -1098339283, 1052418274) + + W(6, 1054103540, -1080635176, -1098368915, -1116179967) + + W(7, -1095334551, -1071443034, -1119710681, 1057760894); + WS(-1072329816, 1074376722); + sum1 = + W(0, -1119826815, -1097135645, 1051061560, 1001332238) + W(1, 1034466519, -1100415357, 1050212013, 1009264740) + + W(2, -1121811611, -1123247055, 1057003368, -1119531350) + + W(3, 1040071236, -1078161261, 1050094223, 1047608180) + W(4, 1034412551, -1086793716, 1060987117, -1139666027) + + W(5, -1125783811, -1092987820, 1053182609, 1053959834) + + W(6, 1025497578, -1117997032, 1030369595, -1130537576) + W(7, 989295697, -1097292445, 1040352158, 1003451511); + sum2 = + W(0, 1010850411, 1033963983, 1041326744, -1110591524) + W(1, -1119692159, 1024673399, -1126975570, 1039643589) + + W(2, -1110113403, -1107122545, 1049177676, -1098235771) + W(3, 1041465994, 1041917938, 1075051781, 1061676083) + + W(4, 1019542894, 1031918417, 1057547658, -1072022345) + W(5, 1041361680, 1016152214, -1100210466, -1080793678) + + W(6, -1128076954, -1119110501, -1101468559, 1038326573) + + W(7, 1035827461, -1108745692, -1111890885, -1115279667); + WS(-1089734463, 1065567745); + sum1 = W(0, -1115310942, -1089748010, 1053493076, 1041536229) + + W(1, 1048676446, 1033541431, 1047890913, -1111760907) + W(2, 1051913151, -1091844976, 1060749357, 1031051700) + + W(3, 1041943745, -1078026925, 1057682191, 1033727319) + + W(4, 1032260949, -1083863979, 1060855949, -1117552023) + + W(5, -1123414147, -1088695462, 1057895644, -1123512523) + + W(6, 1025799735, -1097284573, 1049355215, -1134918146) + + W(7, -1118314271, -1090712276, 1052101531, 1037093278); + sum2 = W(0, -1087048499, 1067110585, 1062551012, -1088705293) + + W(1, -1089227831, -1073947481, -1107011493, 1063056164) + + W(2, -1081828818, -1100054441, 1044700801, -1091908804) + + W(3, -1106022394, 1057992187, 1053279159, 1049660447) + W(4, 1057638724, -1096117153, 1047077354, 1041467137) + + W(5, 1053589051, 1049824990, -1098858111, -1112699600) + W(6, 1030725267, 1008667589, 1055551992, 991046018) + + W(7, 1050363813, 1047082764, -1093863007, -1118599281); + WS(-1079109040, -1085312521); + sum1 = + W(0, 1023769582, 1037365911, -1113299189, -1127522065) + W(1, -1140282322, -1117579705, 1042794748, -1098682508) + + W(2, -1105105824, 1052645482, -1086336671, 1041518392) + W(3, 1041551140, 1061549365, -1087637436, 1049784787) + + W(4, 1030694836, -1092653220, 1051894263, -1104535548) + + W(5, -1111084358, 1050958145, -1111723928, -1112540051) + W(6, 955351653, -1113919416, 1039072513, 997865134) + + W(7, -1133450021, 1047664699, -1111528112, -1121426302); + sum2 = W(0, -1112127727, -1182735741, 1027967369, -1099828018) + + W(1, 1037755147, -1116775242, -1108084106, -1106208929) + + W(2, -1117076760, -1110833901, 1047419656, -1106621688) + + W(3, -1096944427, 1050960349, 1057406540, -1093670465) + W(4, 920706880, 1057824022, -1106635563, 1046907280) + + W(5, -1126463908, 1025802869, 1030718323, -1104727129) + + W(6, 1025583781, -1117624689, -1129307942, 1024731263) + + W(7, 1016301114, -1106817101, -1146210056, -1133403952); + WS(1066445424, -1114782683); + sum1 = W(0, -1105169880, -1110498524, 1042140161, 1033176686) + + W(1, 1037849343, -1092096849, 1045982494, 1027992139) + + W(2, -1111719878, -1089773556, 1059787797, -1104315937) + + W(3, 1053025475, -1093199580, 1058443885, 1034263873) + + W(4, -1106443352, -1085224723, 1058982105, -1099213011) + + W(5, -1118185334, 1017432168, 1051015495, 1015617473) + + W(6, -1115180998, -1097477655, 1049335790, -1134277975) + + W(7, -1104379365, -1120659192, 1047806371, -1124174113); + sum2 = + W(0, 1035156885, -1104443938, 1020765936, 1034215267) + W(1, 1010861092, -1111353322, -1131256669, -1119467871) + + W(2, 1053142713, -1092519845, 1043066295, 1043572605) + W(3, 1050643378, 1033492110, -1093594884, -1120345789) + + W(4, -1093995815, 1061622434, -1090891830, 1035132204) + + W(5, -1100307988, 1045835461, 1051701313, -1120193257) + + W(6, -1094190457, 1054276307, -1115303200, -1107758441) + + W(7, -1095534688, -1118030035, 1044764145, 1010759898); + WS(1053797695, 1034928741); + sum1 = + W(0, -1113709609, 1027306577, -1089618673, 1039738506) + W(1, -1101985872, 1036410350, -1094568298, 1041675949) + + W(2, -1098832221, 1048222316, -1089721378, 1049870789) + W(3, -1106547453, 1049396519, 1057932790, 1032602030) + + W(4, 1032341932, -1093507345, 1051551069, 1041034929) + W(5, 1038712045, -1098128846, 1042310981, 1020376402) + + W(6, 1020651977, -1102358117, 1032076306, 1038606871) + W(7, 1037110646, -1095739626, 1049811664, 1036847389); + sum2 = + W(0, -1107569978, -1096371488, -1091206842, -1113379832) + + W(1, 1048834253, -1100415470, -1140716449, -1098639187) + + W(2, 1032955728, -1097656368, -1099982688, -1102236471) + + W(3, 1047116738, 1057366407, 1067804624, -1112454303) + W(4, -1095875849, 1049724515, -1107732848, 1037256860) + + W(5, 1024141754, 1047568587, -1101898886, 1039735469) + W(6, -1117854930, 1025784853, -1104360749, 1034482735) + + W(7, 1031286951, 1040928209, -1099991629, -1122662653); + WS(-1100599294, -1113486107); + sum1 = W(0, 1041832895, 1058015130, -1095482492, -1123207408) + + W(1, -1113994869, 1043005476, -1095369841, -1120610968) + + W(2, -1101582062, 1065539217, -1088297536, 1040056719) + + W(3, -1100675730, 1054749782, -1085935041, -1111595960) + + W(4, 1015862616, 1046936430, 1054414015, -1110844045) + W(5, 1033573462, 1043027144, -1096194081, 1041950395) + + W(6, -1123901241, -1122192647, -1118572185, 1007333963) + + W(7, -1147102474, 1047923809, -1106262926, -1097939280); + sum2 = W(0, 1033496750, -1105017139, -1118941046, 1012703263) + + W(1, -1118187780, 1043450683, -1142467441, 1009095593) + + W(2, 1041080086, -1106458832, -1096734279, 1025973118) + + W(3, -1102935461, 1057933296, 1063076098, -1089439993) + + W(4, 1023898015, -1098239652, -1119844388, -1124882200) + + W(5, -1122438380, -1111698692, 1048742138, 1004297609) + + W(6, 1007185073, 1023645924, -1096898561, 1046154393) + W(7, 999989929, 1034431733, -1090457759, 1048791679); + WS(1058454143, -1086058342); + sum1 = W(0, -1115074879, 1051341170, -1123276713, 1040408770) + + W(1, -1099743710, 1054019099, -1112675511, 1042487060) + + W(2, -1103933280, 1057240637, -1095853513, 1044345774) + + W(3, -1113530407, -1085932301, 1048593300, 1034191179) + + W(4, -1124606630, -1090466976, 1059272419, -1105205160) + + W(5, -1115568198, -1097279225, 1052135974, -1111240730) + + W(6, -1123572016, -1108988935, 1039376368, -1135917288) + + W(7, -1118699368, -1095706385, 1041536544, 1043157563); + sum2 = W(0, 1047642666, -1106684338, -1097565895, 1038757740) + + W(1, -1094360366, -1104496052, -1137000462, -1120606628) + + W(2, -1111839157, -1101578828, 1017360469, 1043520604) + + W(3, -1101124559, 1062389037, 1056361687, -1110716481) + + W(4, -1119401007, -1099504899, 1057357121, -1121032787) + + W(5, -1114275291, -1102577927, -1111580608, -1174578992) + + W(6, 1010413358, -1121638606, 1047557438, 1020758339) + + W(7, 1018914035, -1100286365, -1108198790, 1031842399); + WS(-1116191222, -1087222261); + sum1 = W(0, 1051378220, -1092140039, -1098282527, -1115126509) + + W(1, 1026061480, 1041212802, -1108686318, 1018050395) + W(2, 1051132746, 1028855632, -1114615088, 1028919646) + + W(3, -1131139769, 1054776996, -1096808426, 1031880915) + + W(4, 1029438530, 1052461054, -1090181940, 1047407636) + W(5, 1010253799, 1056191968, -1089672580, 1041055121) + + W(6, -1140691900, 1041996679, -1095415146, 1025713945) + + W(7, 1032111196, 1051463563, -1084069643, 1049760258); + sum2 = W(0, -1120688670, 1075791226, 1072772368, 1033088733) + + W(1, 988781159, -1079951996, -1096143661, -1126178338) + + W(2, 1023531678, -1080919050, -1087274515, 1039982901) + + W(3, 1034244658, -1078043585, -1074830486, -1153355920) + + W(4, 1038669521, 1061514147, -1138433181, 1047086021) + + W(5, -1125227753, 1050383220, -1091737459, -1117938706) + + W(6, -1122152897, 1043495776, 1043710967, 1036258954) + + W(7, -1123518214, 1056154705, 1064633897, -1090859740); + WS(-1086114623, -1084816591); + sum1 = + W(0, -1119067590, 1036163639, -1106221778, 1015833809) + W(1, 1041053515, -1112478813, -1114565991, 1041839788) + + W(2, -1123050946, 1045609440, -1102274148, 1038604869) + W(3, 1050414506, -1091176571, 1041957568, 1049671879) + + W(4, 1017204584, -1096458464, -1140250052, 1016035267) + W(5, 1033253648, -1092456237, 1042499428, 1004159150) + + W(6, -1109025761, 1039781304, 1024592194, 1019624502) + + W(7, -1142909644, 1019336538, -1110741472, 1038938427); + sum2 = + W(0, 1006657945, -1115603798, 1043167348, 1032033598) + W(1, -1145756082, -1129023472, 1029036192, -1122908214) + + W(2, -1102183527, 1024575987, -1134024937, 1013148193) + + W(3, 1043063832, -1070476153, -1079118960, -1117007948) + W(4, 1048257970, 1078068971, 1058188063, 1026806223) + + W(5, 1030027167, 1034687231, 1026633068, -1106350665) + W(6, -1129422896, 1014471457, -1138449465, 1037397710) + + W(7, -1120006792, 998969122, 1035752331, -1119066852); + WS(1058895967, -1115291633); + sum1 = W(0, -1125313920, -1118075736, -1101537045, -1102743433) + + W(1, -1110715998, 1028840739, -1106860038, -1113756805) + + W(2, -1110540830, 1050429809, -1109809347, -1112821659) + + W(3, -1093243535, 1056177832, 1062536899, -1094426092) + + W(4, -1098773368, 1050921353, 1050429425, -1112140166) + + W(5, -1114664694, 1021103494, 1042267175, -1105504016) + + W(6, -1108229374, 1039051533, -1104944731, -1123143221) + + W(7, -1128878150, 1048765371, 1019165331, -1113092552); + sum2 = W(0, -1118991740, 1049660250, -1072802862, -1093107290) + + W(1, 1035161218, -1092533458, -1084844108, -1131472334) + + W(2, 1031409558, 1052404039, 1059153934, -1099814906) + W(3, 1025873763, 1059098325, 1066715964, 1050189246) + + W(4, -1130385854, 1035329255, 1054800035, 1033113369) + + W(5, -1109675818, 1033133876, -1121630224, 1032032260) + W(6, 1035221069, -1100849576, 1017402854, 999987753) + + W(7, -1106515599, 1041079456, 1051036492, -1109656332); + WS(-1088275071, -1079832501); + sum1 = + W(0, -1121609972, -1140631216, 1031902612, -1114449795) + W(1, 1054795207, 1001216956, 1053928586, 1049117898) + + W(2, -1091175970, -1109051587, -1100362195, -1089874193) + + W(3, 1036185063, -1108520810, -1118566269, 1024185910) + W(4, 1036253081, -1112588624, 1050418310, 1033419200) + + W(5, -1095852779, -1111932116, -1106845532, -1098704820) + + W(6, 1056993046, 1018114011, 1057036918, 1052334541) + + W(7, -1099614386, -1114447781, -1106898567, -1103629998); + sum2 = W(0, 1034198694, -1099753021, 1052074243, -1101897553) + W(1, 981839325, 1036479145, 1033443852, -1125560160) + + W(2, 1050051875, -1087878645, 1061175239, -1090657055) + + W(3, 1043520387, -1099082069, 1044393022, -1126528836) + + W(4, -1099547613, 1049181233, 1036533430, -1113239720) + + W(5, 1020237946, -1106552091, -1125692060, -1111189720) + + W(6, -1112528204, 1051666481, -1100952955, 1038783930) + + W(7, 1032634213, -1105193718, 1038429221, -1110455840); + WS(1055684799, 1057467177); + sum1 = + W(0, -1115635180, -1084593518, 1057349085, 1044528285) + W(1, 1040983065, -1097583958, 1048845932, -1108137881) + + W(2, 1033664034, -1088354001, 1059080519, 1042415046) + W(3, 1047016048, -1083875169, 1057461395, -1130549589) + + W(4, 1040303666, -1089538899, 1055847004, 1025308788) + W(5, 1031194256, -1090088564, 1052697432, 1040121956) + + W(6, 1036112338, -1096939300, 1050363555, -1124884732) + + W(7, 1031542812, -1092334596, 1025617138, 1051059157); + sum2 = W(0, 1039076253, 1084724252, 1055999642, -1111202323) + W(1, 1043377438, 1057352437, -1108204793, 982595482) + + W(2, -1111770710, -1070809771, -1115649819, 1026864091) + + W(3, -1129064764, -1070013447, -1084654208, 1008953953) + + W(4, 1041506513, 1024963427, 1027889169, 1041107952) + + W(5, -1117340191, -1115045950, 1023558847, -1112891517) + + W(6, -1113661124, 1037860995, -1108774739, -1161342746) + + W(7, 1006037802, -1120758055, 1034794526, 1036271073); + WS(-1089965247, 1033154456); + sum1 = + W(0, -1118330000, 1037068672, 1043453383, -1112803845) + W(1, 1043361746, -1091726905, 1027410848, 1030795409) + + W(2, 1028536899, -1106295439, 1047994631, -1111747193) + + W(3, -1106541856, -1087991002, 1053447482, -1109712917) + + W(4, -1103233833, -1094017778, 1058853977, -1110327970) + + W(5, 1037178020, 1026207675, 1049286760, -1111739348) + W(6, -1122355891, -1102222128, 1044913013, 1037236782) + + W(7, -1148310999, -1106998176, 1056026770, -1104668925); + sum2 = W(0, -1104081637, -1104712414, 1038273275, -1102746442) + + W(1, -1115350296, 1065017407, 1027805677, 1038124336) + + W(2, -1102551062, -1101193417, -1097697554, 1025981021) + + W(3, 1033477476, 1054909386, 1069044481, -1090398660) + W(4, 1048930937, -1096226206, 1051154993, 1049254524) + + W(5, -1092653578, -1094950793, -1097937335, -1111378843) + + W(6, 1049862570, 1042131871, -1115393002, -1100983150) + + W(7, -1095610208, -1100838677, -1090656582, 1038627466); + WS(1053522367, -1088249107); + sum1 = W(0, 1041083642, 1049354980, -1104528935, -1116245577) + + W(1, -1109062519, 1050121221, -1089361286, -1108913352) + + W(2, 1041255149, 1062047573, -1094323010, 1044478835) + + W(3, -1129341567, 1050256849, -1085946707, -1106612546) + + W(4, -1122816004, 1058306025, -1092704114, 1010650184) + + W(5, 1034422596, 1054029415, -1096738148, -1127765836) + + W(6, -1105678140, 1043380368, -1095183726, 1004046811) + + W(7, 1002201935, 1057986114, -1098664466, -1116617606); + sum2 = W(0, -1115691041, 1047105417, 1066880006, -1078420733) + + W(1, -1122196578, 1040512713, 1072356775, -1074089832) + + W(2, -1133382325, -1115639944, 1070833917, -1079250237) + + W(3, -1119943136, -1093984910, -1106032356, 1053171659) + + W(4, 1035619338, -1128164365, -1084317659, 1063302165) + + W(5, 1026693101, -1112542694, -1103058628, 1050228647) + + W(6, -1121646682, 1040561229, -1106694088, -1111273903) + + W(7, 1035287249, -1122951759, -1109474775, 1041535222); + WS(1054980735, -1118400611); + sum1 = + W(0, 1023857233, 1056052581, -1108899938, -1098911041) + W(1, -1136534172, 1054619866, -1084632317, 1014993130) + + W(2, -1109579886, 1062631892, -1087670245, 1019079234) + W(3, 1024279465, 1063340895, -1087714282, 984908360) + + W(4, 1031638446, 1054891904, -1089981519, 1040973935) + W(5, 1042081899, 1032051762, -1091518889, 1034243544) + + W(6, 1025101580, 1036582853, -1100292439, 1038263499) + W(7, 1037741818, 1040187076, -1096282711, 1035333164); + sum2 = W(0, 1048663857, -1103126769, -1090522781, -1135969141) + + W(1, -1115607290, -1089284711, -1135630263, -1096474341) + + W(2, 1050959246, -1106476140, -1116194786, -1110761886) + + W(3, -1094885185, 1065597134, 1068281966, -1097679100) + + W(4, 1032263390, -1114774312, -1114594327, 1030603925) + + W(5, -1094433235, 1044112203, 1034641663, 1025157613) + + W(6, 1034671855, -1110680696, -1120338125, -1121778209) + + W(7, -1100445080, 1044150663, 1039802827, -1101519530); + WS(-1099426814, 1028666567); + sum1 = + W(0, 1017347599, 1050515008, -1097546199, -1122852442) + W(1, -1114817850, 1054909169, -1091252816, 1034424401) + + W(2, -1102332157, 1058789976, -1088740593, 1040697053) + W(3, 1038414608, 1049870885, 1026332624, -1119196265) + + W(4, 1040863441, -1096181150, 1050979384, -1096850298) + + W(5, 1043586729, -1094822147, 1046083753, -1101867410) + + W(6, 1033560385, -1110297504, 1043042893, -1112505198) + + W(7, 1024225691, 1035801215, -1106319780, 1022200238); + sum2 = W(0, 1032296335, -1113854030, -1123218166, 1034439881) + + W(1, -1113414658, 1056314261, 1057288090, -1106401889) + W(2, 1032045591, 1056282685, 1048444709, 1027355020) + + W(3, -1096252543, -1076472908, -1076465599, -1099548772) + + W(4, 1039836782, 1041975936, 1054239999, 1041344971) + W(5, -1130462124, 1060789196, 1064414385, -1137373456) + + W(6, 1032505935, -1113045826, -1101415652, 1031341874) + + W(7, 1017080436, -1114544525, 1017354944, -1131418272); + WS(1048405758, 1021439377); + sum1 = W(0, -1095783591, 1056714988, -1095028931, -1099026446) + + W(1, 1004365570, 1053912029, -1097931508, 1040281084) + + W(2, -1111296393, 1060368018, -1093355890, 1042667324) + + W(3, -1090472608, 1058906144, -1084869763, -1099317986) + + W(4, 1053249340, 1059710205, -1111710658, 1058667863) + + W(5, -1096289768, 1050004859, -1089344390, -1094050929) + + W(6, -1116930177, 1050104788, -1098648430, 1032145973) + + W(7, 1031327257, 1057263364, -1095692170, -1127343131); + sum2 = + W(0, -1137650289, -1102128439, 1065729447, -1085103011) + W(1, -1108324664, 1038586891, 1042582877, -1096095433) + + W(2, -1107068637, -1102631398, 1055966210, -1096657292) + + W(3, 1053004242, -1096144010, 1066736444, -1084794119) + + W(4, -1096186406, 1065778432, -1077760412, 1061684194) + + W(5, 1044011702, -1087564484, -1109340585, 1053274132) + W(6, 1031462490, 1041037240, -1088233524, 1060428365) + + W(7, 1016473473, -1095964556, -1079487884, 1070082531); + WS(-1120621558, -1109747932); + sum1 = W(0, 1041405257, 1056552380, -1086075216, 1021959803) + W(1, 1034046808, 1057052863, -1102283433, 1009707220) + + W(2, -1118447615, 1034826244, -1089852749, 1002695151) + + W(3, 1031113092, 1061462741, -1087896594, 1030648525) + + W(4, -1118172841, 1057041023, -1089300195, 1017359341) + + W(5, 1039149197, 1051800560, -1089266110, 1034838445) + W(6, 1025628006, 1051243933, -1098077639, 1033512688) + + W(7, 1023987646, 1054503948, -1098034299, -1099231069); + sum2 = + W(0, -1072120352, 1076686179, -1096491690, -1109988374) + W(1, -1071453180, 1075980741, 1055107632, -1125676035) + + W(2, -1069998225, 1075876198, -1100816518, 1033428305) + W(3, -1071241469, 1076219984, 1048961210, 1018908197) + + W(4, -1073669228, 1072219376, -1123192163, 1040773271) + + W(5, -1079882002, 1071235859, -1097056656, -1129279043) + + W(6, -1086087106, 1056170781, 1049622318, 1030403985) + + W(7, -1096258120, 1057784547, -1098632303, -1116156171); + WS(1014286296, 1057122707); + sum1 = W(0, -1100574608, 1045128364, 1045637171, -1113867461) + + W(1, -1134927261, -1112841149, 1041785732, 1021982785) + + W(2, -1096946922, 1023155519, 1051848326, -1106336253) + + W(3, -1115033128, -1094208427, 1066937726, -1099783438) + + W(4, -1104533485, -1099169249, 1058599142, -1105189673) + + W(5, -1121259095, -1097752701, 1049921902, -1131816685) + + W(6, -1131995140, -1111007544, -1118760834, -1114789302) + + W(7, -1118839498, -1093400894, 1043420320, -1116552190); + sum2 = W(0, 1049653051, -1095117458, 1052542945, 1018695699) + + W(1, -1094509070, 1054995367, 1021265631, -1101605993) + W(2, 1050525055, 1042728281, 1046926465, 1044635344) + + W(3, -1103662151, 1058124872, 1063131010, -1104882803) + W(4, 1043452805, 1049859565, 1029162962, 1042944443) + + W(5, -1102028885, -1097129382, -1116537562, -1110047725) + + W(6, -1109754017, -1098619851, -1092457406, -1107003327) + + W(7, -1111709279, 1051259496, -1078653130, -1114867517); + WS(-1089617919, -1078924764); + sum1 = + W(0, 1038586191, 1054651431, -1087542485, 1032587193) + W(1, -1112517470, 1043170193, -1101086846, -1121720345) + + W(2, 1021690773, 1058605998, -1088163691, 1034793549) + + W(3, -1148410045, 1062049174, -1087004528, -1139319682) + + W(4, -1123773533, 1056236357, -1089020247, 1040971627) + W(5, 1037589846, 1058401044, -1089711896, 1052009144) + + W(6, -1114886949, 1034515388, -1095006441, 1017461287) + + W(7, 1015916580, 1056139353, -1089662071, -1138694970); + sum2 = + W(0, 1041183203, -1104791361, -1113641717, 1040266660) + W(1, -1109901203, -1103309504, 1018299463, -1124518657) + + W(2, 1047303939, -1109225788, 993644855, 1039912380) + W(3, -1146453442, 1050748307, 1050538753, -1107008276) + + W(4, -1103370030, 1032462009, 1024125183, -1094411631) + W(5, 1035308463, 1055570013, 1058013570, -1087158984) + + W(6, -1107592095, -1104507532, 1044744090, -1091454514) + + W(7, -1129723226, 1050955773, -1110543421, -1122733934); + WS(-1102088830, 1068463311); + sum1 = W(0, 1007817678, 1054791531, -1088166471, 1034628887) + + W(1, -1114291787, 1051084317, -1097682087, -1130503387) + + W(2, -1106493040, 1059023407, -1087032011, 1042839161) + + W(3, -1121377148, 1060487316, -1087032009, 1025313797) + + W(4, -1120569345, 1053262386, -1090195409, 1040112167) + + W(5, 1037434082, 1049438849, -1104445288, -1112558866) + + W(6, -1121570543, 1043319351, -1110294871, -1130274577) + + W(7, 1048479966, 1037267389, -1112047510, -1107488086); + sum2 = + W(0, -1092702026, 1046572938, -1113806693, 1023924734) + W(1, -1078842502, -1092761370, 1010298098, -1134107544) + + W(2, -1073165872, 1048469719, -1106037347, -1114438827) + + W(3, -1075365553, 1050413401, 1041503306, 1017902665) + W(4, -1101579557, -1110447061, 1037192623, 1011674154) + + W(5, 1066341075, -1104166524, 1033398310, -1109271682) + W(6, 1071992070, -1123716833, 1031702363, 1028552632) + + W(7, 1076187261, 1056626852, -1110743783, -1123830383); + WS(1050299903, -1120086405); + sum1 = W(0, 1001761330, 1015037916, 1045981578, 1032160884) + W(1, 1051426245, 1042953374, -1094503950, -1131440192) + + W(2, -1097473350, -1103780307, 1055197076, -1100940622) + + W(3, 1050955122, -1082933123, -1099126616, 1056645367) + + W(4, -1098718343, 1055455844, -1118831961, -1095742542) + + W(5, -1119699408, -1145769035, 1049761702, 1031622998) + + W(6, 1038117517, -1132909758, -1105955296, 1041481643) + + W(7, -1107622229, 1039587926, 1047033256, -1103124921); + sum2 = + W(0, 1033493706, -1100025935, -1117747708, -1131691420) + W(1, 1004282338, 1036668146, -1097774825, 1042472719) + + W(2, 1032395114, 1043333590, -1106149887, -1122758767) + + W(3, 1035721710, 1058018229, -1110236537, -1106477628) + + W(4, -1138557465, 1035348312, -1105971349, 1046834331) + + W(5, -1094630455, 1040910383, 1017216296, -1101697584) + W(6, 1050323990, 1034465026, -1110352462, 1040100106) + + W(7, -1102984389, 1044530527, -1108832665, -1099114129); + WS(1059761855, -1093333930); + sum1 = W(0, -1114738580, 1026403632, -1105736123, -1114933837) + + W(1, -1114617627, 1045397583, -1109519752, 1044731740) + + W(2, -1102727321, 1028768699, -1092041414, -1111636685) + + W(3, 1053910378, 1067649611, -1090720496, 1058839312) + + W(4, -1094270689, -1104323849, -1112030179, -1095377736) + + W(5, -1107452471, 1042776267, -1103704317, 1034877772) + + W(6, -1130455238, 1036450873, -1131764742, 1031929286) + + W(7, -1108958070, 1044075076, -1106063752, -1113359528); + sum2 = W(0, 1033753751, -1111931472, 1037689465, 1023732592) + + W(1, -1102724392, 1032416705, -1105246922, -1126463180) + + W(2, -1104466296, 1029992729, 1052475163, -1118149766) + + W(3, -1083668043, 1069568682, -1090236396, -1106129378) + + W(4, 1048180527, -1114422988, -1122305941, 1038071226) + + W(5, -1130490520, -1133345747, -1123788932, -1124715334) + + W(6, -1103926430, 1038116955, -1127253836, -1117123194) + + W(7, 1029490699, -1129834714, -1122171294, 1029091180); + WS(1058247519, 1058950523); + sum1 = W(0, -1149972914, -1088063023, 1055543086, 1037155715) + + W(1, 1038957612, -1093535488, 1052729022, 1026693813) + W(2, 1039970419, -1086741707, 1058924163, 1030719286) + + W(3, 1008158544, -1077128706, 1057693884, 1032127358) + W(4, 1041515382, -1087332234, 1060656868, 1000958261) + + W(5, 1036144305, 1032313207, 1056531259, -1112386573) + + W(6, 1048236362, -1113848378, 1049522521, -1144612480) + + W(7, -1101573740, -1089306475, 1052006205, 1039554949); + sum2 = + W(0, 1037957789, 1050553491, -1089722328, 1050994656) + W(1, -1128637884, 1004874497, 1058234547, -1095571828) + + W(2, -1110859440, -1106433025, -1098594890, 1041665953) + + W(3, 1052981065, 1058989771, 1052545864, -1129708900) + W(4, 1040710261, 1058251934, -1106724054, -1111634443) + + W(5, -1090346288, -1076253449, 1032950959, 1015279680) + + W(6, -1107045102, -1079150504, 1055215751, 1053553553) + + W(7, -1092015484, 1073146781, 993278274, -1088813426); + WS(-1079689312, 1054955487); + sum1 = + W(0, -1112827293, -1094091800, 1053343716, 1046408387) + W(1, 1038847520, -1099397203, 1057879171, -1099233409) + + W(2, -1106748654, -1102151838, 1023820222, -1154890390) + + W(3, 1038138418, -1086663035, 1057249493, 1012875058) + W(4, 1035397268, -1087965823, 1057985827, -1109124523) + + W(5, 1020226162, -1095805620, 1053821069, -1126965269) + W(6, 1030022886, -1098762192, 1051466543, 1036334304) + + W(7, -1108673687, -1096846716, 1049463311, 1027304762); + sum2 = W(0, 1029906557, -1118908362, -1075330978, 1069116571) + + W(1, -1110945783, -1110004947, -1081100797, 1067844910) + + W(2, 1046588193, -1089040350, -1089891752, 1066162938) + + W(3, -1121400966, -1106443118, -1103643831, 1048795077) + + W(4, -1106274145, 1054044230, -1095873523, 1049522444) + + W(5, 1040948990, -1115139995, -1120742988, 1042365723) + + W(6, -1114149785, -1112260837, -1107023588, 1011222152) + + W(7, 1030737726, 1025987216, 1017833328, 1031208089); + WS(1045323518, -1099573370); + sum1 = + W(0, 1043494028, 1054678847, -1087400961, 1041221965) + W(1, -1110296205, 1050638833, -1098183847, 1038272854) + + W(2, -1129034139, 1058657792, -1085969705, 1035507984) + W(3, 1040205415, 1062545342, -1082320511, 1038672050) + + W(4, 1019451328, 1053087270, -1092792902, 1018934299) + W(5, 1037480760, 1054717567, -1090230188, 1028598065) + + W(6, -1144942569, 1044080591, -1094581114, 1032398451) + + W(7, 1019441962, 1056118375, -1090342307, -1135273190); + sum2 = W(0, -1115726367, -1100272494, 1026681555, -1095030133) + + W(1, 1028786141, -1113941052, -1107020345, 1043410282) + + W(2, 1044450180, -1104186275, 1044693748, -1120312046) + + W(3, -1097529180, 1042859033, 1067055887, -1093369648) + + W(4, 1047479398, -1097402721, 1064288021, -1083945294) + + W(5, -1105649332, 1051025911, 1073649944, -1073003592) + + W(6, -1127420023, 1025042835, 1067916626, -1079526678) + + W(7, 1035799966, -1139194229, 1054550763, -1086831079); + WS(-1086660959, -1095040438); + sum1 = W(0, -1137772688, -1101831491, 1047762270, 1026497863) + + W(1, -1103872010, -1108733164, 1048734598, -1112526127) + + W(2, 1033664792, -1099393082, 1053294440, 1036444877) + + W(3, -1114303830, -1091611870, 1032897582, -1105948909) + + W(4, 1043631948, -1111668992, 1054700448, 1045765800) + + W(5, -1110960013, -1092153253, 1051790266, -1102627253) + + W(6, -1112201312, -1104258339, 1038982672, 1030853145) + + W(7, 1004207675, -1111896549, 1042342295, 1024477884); + sum2 = + W(0, 1047134358, -1100773581, 1038575217, -1121016162) + W(1, 1032860761, 1019143260, -1097448906, 1027754828) + + W(2, 1058187056, -1086969962, 1050436365, -1118814167) + + W(3, -1089859690, 1060822499, -1092585260, 1047705700) + W(4, -1073724300, 1074524802, -1097001013, 992280012) + + W(5, -1113017653, -1127402252, 1052022456, -1115523910) + + W(6, -1123513506, 986153015, -1101541136, 1026625966) + W(7, 1005834992, 1026902445, 1040058169, -1115254226); + WS(1065625968, 1033455989); + sum1 = W(0, 1037701789, -1151616802, -1102944307, 1035789757) + + W(1, -1137040283, -1099349487, 1053320242, -1106274637) + + W(2, 1000406064, 1047711372, -1098462560, 1048627484) + + W(3, -1094784938, -1115632781, 1053161728, -1098374228) + + W(4, 1050863995, -1088133648, 1049613697, -1120581030) + + W(5, -1103592251, 1049293870, -1128175786, -1114622845) + + W(6, 1032127959, -1118922740, 1017489163, 1038993832) + + W(7, 1038622703, -1120882929, -1119810116, -1165953346); + sum2 = W(0, 1035866397, -1099238508, 1052038184, -1107491561) + + W(1, -1103116216, 1047962584, -1105248240, 1027069673) + + W(2, 1059558286, -1082734190, 1057669233, -1098068789) + + W(3, 1058384307, -1088122523, -1091086098, 1053762892) + + W(4, -1096953271, 1056069525, -1083280561, 1064349048) + + W(5, 1042132369, -1106560803, 1032403631, -1107462772) + + W(6, -1111426470, 1038442279, -1104452063, 1040922744) + + W(7, -1128202268, 1028886015, -1150402298, 1012204557); + WS(1066439152, -1108830929); + sum1 = W(0, -1102916748, 1055608939, -1103898045, 1013732302) + + W(1, -1108788820, 1051866141, -1097938907, 1035488645) + + W(2, -1097607254, 1058768962, -1093577324, 1043113957) + + W(3, -1099017414, 1030462560, -1125603603, 1027216291) + + W(4, -1168846782, -1113005110, 1040477611, -1143171172) + + W(5, -1109193920, -1109987210, 1050862423, -1118580993) + + W(6, -1128476473, -1109670205, 1039038758, -1107320747) + + W(7, -1106724277, -1105116142, 1053895221, -1107171108); + sum2 = + W(0, 1050905005, 1051409035, -1123874142, -1099079861) + W(1, 1051922636, -1107944278, -1131839644, 1035659117) + + W(2, 1052154527, 1040207956, -1100926997, -1109355656) + W(3, 1005525738, 1060123319, 1046139234, 1029590720) + + W(4, 1024591478, 1032045878, -1113252250, 1032543174) + W(5, -1094205878, -1096175996, 1039675350, 1044310065) + + W(6, -1094330307, -1086096303, -1128729660, -1129395200) + + W(7, -1105421569, -1081193369, 1062516858, 1042520685); + WS(-1094347903, 1040885342); + sum1 = + W(0, 1027015666, -1119901274, -1109404651, -1145327523) + W(1, -1097151811, 1047615033, 1051228067, -1096635113) + + W(2, 1050782150, -1115052947, -1096976391, 1048638245) + + W(3, -1106442815, 1051587896, -1096200649, -1105180361) + + W(4, 1051184308, -1105704339, -1126487153, 1051518582) + + W(5, -1091810963, 1046407197, 1040585575, -1092825760) + W(6, 1034433919, 1009768300, -1132445914, 1038762547) + + W(7, 1042242605, -1122889273, -1108464678, 1040013733); + sum2 = W(0, -1121205117, -1124056141, -1106581817, 1026565438) + + W(1, 1031829824, -1109805410, 1007706034, 1029970286) + W(2, -1105324964, 1045427632, 1046732003, 1047283254) + + W(3, -1090390675, 1059020251, 1057752640, -1089850616) + + W(4, -1110420773, -1107061510, -1101115525, 1031976810) + + W(5, 1018005966, -1118368537, -1103994134, 1036394258) + + W(6, -1113281242, -1115675784, 1036673543, -1112565336) + + W(7, 1032068637, -1113920834, 1042522746, -1145818031); + WS(1066366016, -1121083386); + sum1 = + W(0, -1127500850, 1026624689, 1013565235, -1127181143) + W(1, -1122305754, -1114017662, 1052467062, 1035730049) + + W(2, 1025706181, -1088657475, -1121688536, 1045310349) + + W(3, -1104671484, -1098749979, 1066224034, 1026340635) + + W(4, 1043832110, -1085705014, -1112016562, -1121566277) + + W(5, -1111488580, 1037547214, 1041900405, -1127673329) + W(6, -1123271764, -1112264441, 1043675185, 998683632) + + W(7, 1015742169, -1105147876, 1022047173, -1132045794); + sum2 = W(0, -1102951634, 1043023116, -1104735391, 1035018995) + + W(1, 1044058166, -1093545812, 1058451408, -1112978123) + + W(2, 1037657608, -1093647750, -1104370519, 1043811519) + + W(3, -1091261805, 1070997171, -1105620254, 1052846459) + + W(4, 1057746121, -1082044166, -1091103100, -1097428497) + + W(5, -1106029228, 1051593249, -1096225251, -1127092852) + + W(6, 1006392595, -1100798567, 1052307063, -1107453736) + + W(7, -1119100020, 1013819138, -1121077487, 1041369362); + WS(1059191103, 1030618557); + sum1 = + W(0, 1034200101, 1032351793, -1100440271, -1148363237) + W(1, 1053678608, 1049973094, -1098031908, 1026486548) + + W(2, -1114145283, 1058794603, -1087452589, -1194991971) + + W(3, 1047695489, 1053292689, -1078695803, 1043453347) + W(4, -1114663316, 1055818163, -1125225546, 1033675947) + + W(5, -1137634576, 1053263444, -1094543429, 966472909) + + W(6, -1125278731, 1041016873, -1116533557, -1115520783) + + W(7, -1138534517, 1043320696, -1104858229, -1119664758); + sum2 = + W(0, 1021496216, -1095812325, -1116672596, 1031503244) + W(1, -1072453458, -1094113031, -1112299630, 1023122856) + + W(2, -1079530578, 1068683999, -1135088113, 1036078566) + W(3, 1063841069, 1074699170, -1126165044, 1036242384) + + W(4, -1095958945, 1043477830, -1107446259, -1100441243) + + W(5, 1042172731, 1031880682, -1119142347, 1009915385) + W(6, 1015153576, -1121439437, 1037216194, -1114450875) + + W(7, -1132334880, -1115200049, 1034903618, 1017712520); + WS(-1096433855, 1052342409); + sum1 = W(0, -1117693364, 1042163431, 1006023611, -1113444755) + + W(1, 1041070378, -1095385709, 1034697390, 1049542464) + + W(2, -1105417036, 1060080198, -1109633387, -1112382959) + + W(3, 1052133240, -1090036408, -1085473397, 1048891757) + + W(4, 1042648272, -1090327769, 1057448056, -1111238376) + + W(5, -1123398616, 1043184694, -1105658040, -1106505885) + + W(6, 1044512510, -1111854479, -1104977148, 1049508979) + + W(7, -1105041790, 1043408999, 1041051171, -1110665802); + sum2 = + W(0, 1039259027, 1043188759, -1165551167, -1113326246) + W(1, -1119373866, -1082984200, -1105050294, 1045229872) + + W(2, -1086821333, -1077733706, -1103802686, -1109909290) + + W(3, 1052826002, 1067327309, 1068967257, 1052700624) + W(4, -1117342490, -1149070344, 1054626023, -1098764713) + + W(5, 1036052293, 1052358305, -1101813629, -1099183932) + + W(6, 1037381955, -1102382047, -1103461210, 1052205497) + + W(7, -1101694007, 1050777563, -1106555317, -1096821402); + WS(1046655614, 1069864308); + sum1 = W(0, 1035633391, -1106200785, -1105260712, 1050507575) + W(1, 1040943483, 1031319761, 1026979270, 1034433460) + + W(2, 1045417382, -1120751363, -1106053451, 1045765948) + + W(3, 1043913370, -1093886310, -1121044950, 1041581532) + + W(4, 1050212129, -1098821847, 1042106970, 1029812249) + W(5, 1047844019, -1088130094, 1049054225, 1017929306) + + W(6, 1039058852, -1092291401, 1041620790, 1028938706) + + W(7, 1043891960, -1082722808, 1042598038, 1034869942); + sum2 = W(0, -1098805029, -1082465618, 1017798412, 1045866690) + + W(1, -1101136548, -1084472150, -1108050411, 1039888035) + + W(2, -1094351375, -1085764460, 1031330790, 1027046574) + + W(3, -1097728468, -1140347496, 1053843229, 1038111290) + + W(4, 1040878916, -1100091144, 1043199898, -1100094208) + W(5, 1049713965, 1049328541, 986797508, -1104239104) + + W(6, 1037060075, 1062808960, -1119994130, -1098800085) + + W(7, 1058810464, 1071144827, -1097822096, 1017716576); + WS(-1077527440, 1065234224); + sum1 = + W(0, 1031059492, -1088676934, 1055802445, -1159469258) + W(1, 1014606833, -1098001853, 1051162632, 1001612013) + + W(2, 1041495444, -1087241405, 1059501583, -1109463457) + + W(3, 1031584061, -1083911392, 1061276187, -1105280809) + + W(4, -1143158030, -1095088526, 1050088278, 1038556792) + W(5, -1140389945, -1096536807, 1049527800, 986827883) + + W(6, 1035424519, -1104484775, 1042937212, 1030875577) + W(7, 1013235228, -1100223239, 1037087943, 1047418837); + sum2 = + W(0, 1026928347, 1019217071, 1027288437, -1112677997) + W(1, 1006719462, -1098369589, -1099640354, -1146387796) + + W(2, -1103317527, 1041304779, 1050941648, -1072403340) + + W(3, 1043604137, -1134457784, -1112266437, -1071428003) + + W(4, -1106627965, 1048134757, 1027462477, -1082446250) + W(5, 1034526167, -1118427047, 1040843010, 1059493547) + + W(6, -1117999031, 1029994729, -1104465031, 1072461191) + + W(7, -1134258967, -1113504087, 1054764710, 1077848621); + WS(1034219259, -1130863201); + sum1 = W(0, -1122470601, 1036468120, 1033355302, -1104067775) + + W(1, -1125729883, 1031973022, -1125640547, -1112351746) + + W(2, -1119868741, 1045421587, -1122038588, -1106594245) + + W(3, -1092963050, 1062931954, 1044707408, -1094830635) + + W(4, -1102984950, 1062738826, -1104269382, -1127079049) + + W(5, 1017182198, 1050208654, -1106320445, -1109997276) + + W(6, -1107264969, 990945063, -1110744610, -1112571199) + + W(7, -1116589116, 1023027040, -1098846428, -1117279521); + sum2 = W(0, -1120041672, 1041156572, -1103868400, 1036471018) + W(1, 999703935, 1029144962, 1036670980, -1107170022) + + W(2, -1109032994, 1037796455, 1043292537, -1115113192) + W(3, 1041627711, 1046556501, 1059414852, 1029249656) + + W(4, -1111815622, 1060385029, 1048639871, -1126242312) + + W(5, 1029813110, 1040466231, -1105513272, 1043143286) + + W(6, -1110751572, -1103822492, -1100107078, -1101226736) + + W(7, -1114401610, -1076222058, 1043280503, 1034327293); + WS(-1097041087, -1081891922); + sum1 = W(0, 1045201037, -1097075376, -1126257330, -1115394372) + + W(1, 1042240134, -1091271089, 1049467348, -1097795722) + + W(2, 1049780286, -1094847921, 1057610047, -1097045891) + + W(3, 1039927583, 1059654550, -1141428591, -1103383438) + + W(4, -1122695635, 1056430510, -1089703504, 1036098616) + + W(5, -1110484942, 1057233913, -1087958744, 1033721138) + + W(6, -1117417409, 1041827328, -1096746586, 1035539581) + + W(7, 1039840796, 1050554535, -1096159486, -1131932502); + sum2 = + W(0, 1012982255, 1052214657, -1136022111, 1032353073) + W(1, -1101224821, 1042508745, -1093748656, 1020863779) + + W(2, -1109376974, 1042044290, 1045672838, -1113516472) + W(3, -1116588919, 1071547407, 1065241977, 1040242686) + + W(4, -1096183361, -1089469015, 1049249478, -1120964187) + + W(5, -1092858264, -1090264503, 1047260540, -1103746594) + + W(6, -1090462581, 1039700797, 1046964814, -1100885683) + + W(7, -1119652925, -1083767147, -1107087862, 1028764718); + WS(-1088621983, 1079497913); + sum1 = W(0, 1027161409, 1052712773, -1102058289, -1100368317) + + W(1, -1102075462, 1051824794, -1098602113, 1021667349) + + W(2, -1120075900, 1057942154, -1096292664, -1106147487) + + W(3, -1109063205, 1060662493, -1082645434, 1039175801) + + W(4, -1104005172, 1062931830, -1090788424, -1129475640) + + W(5, -1136555883, 1057800198, -1090185155, 1036809656) + + W(6, -1112015962, 1047971474, -1098101583, 1009161622) + + W(7, 1033292370, 1059753504, -1088742454, -1143403778); + sum2 = W(0, -1150684740, -1095235693, 1044985054, 1035589955) + + W(1, -1114056841, -1115238312, -1127339724, -1108218072) + + W(2, -1164208415, -1080604054, -1106549807, 1022130402) + + W(3, 1034698244, -1071093346, -1080554397, -1106288272) + + W(4, -1134574697, -1083926997, -1104923511, 1024268734) + + W(5, 1043895716, 1068501149, 1052487431, 1036117000) + W(6, -1101980686, 1079699126, 1054997905, -1130803558) + + W(7, 1050191679, 1056576712, 1029700329, 1031345667); + WS(-1087955103, 1023517655); + sum1 = + W(0, -1106663590, -1090071677, 1062902614, 1013889298) + W(1, 1037773274, -1103425461, 1055773525, -1100022495) + + W(2, -1116651838, -1089596734, 1059014377, -1101161913) + + W(3, 1022742037, -1082614473, 1060267482, 1030094516) + W(4, 1040438284, -1088542372, 1057943128, -1120043085) + + W(5, -1117813952, -1092404165, 1054561254, -1116951793) + + W(6, 1032561623, -1100716355, 1051603940, 1035114019) + + W(7, -1102318801, -1098396937, 1049819671, 1026546470); + sum2 = W(0, -1107653444, -1094858135, 1084595456, -1065438414) + + W(1, 1032757863, 1061839668, 1074196920, -1069856939) + + W(2, -1150202006, 1049735190, 1067688119, -1073375528) + + W(3, 1037010009, -1123088190, 1070729644, -1082206819) + + W(4, 1026258576, -1089824138, 1050587980, -1086758222) + + W(5, 1041748319, -1098358767, 1057889952, -1097734258) + + W(6, -1131622502, 1049699018, -1127981435, -1106404220) + + W(7, -1112646994, -1103776938, 1048602984, -1098587943); + WS(-1089607615, 1063931357); + sum1 = W(0, -1156148665, -1089422721, 1059281720, 1041177357) + + W(1, 1033042784, -1099513696, 1053472432, -1107985659) + + W(2, 1027235975, -1086976804, 1060428066, -1122980350) + + W(3, -1117005684, -1083936660, 1061258678, -1103260375) + + W(4, 1045226606, -1089259880, 1055780369, -1111710595) + + W(5, -1112863441, -1092850238, 1057981025, -1109580750) + + W(6, 1040283212, -1096473171, 1051704489, -1125267169) + + W(7, -1102751384, -1096124078, 1054893029, 1035448166); + sum2 = W(0, 1024298597, 1074606150, 1080820206, 1030125639) + W(1, 1037699935, 1048614297, 1047271097, -1121834510) + + W(2, 1030404477, -1085515118, -1079656120, -1166463196) + + W(3, -1094400051, -1071672142, -1066721483, -1101580999) + + W(4, 1052543759, -1091215560, -1078287609, 1028051862) + + W(5, 1024136176, -1095087497, -1096732136, 1009399200) + + W(6, -1159281410, 1050441058, 1057126689, -1117118368) + + W(7, -1102232478, 1071366890, 1074685383, -1113696211); + WS(-1083901183, 1060981851); + sum1 = + W(0, -1106299749, 1048978486, 1016862419, -1121286832) + W(1, 1039492049, -1107896957, -1115539879, 1049662006) + + W(2, -1113140273, -1119192025, -1098481927, -1106196840) + + W(3, 1055856482, -1090009641, 1058119768, -1149152444) + + W(4, -1103186821, -1088174932, 1052019064, -1101797848) + + W(5, -1108369378, 1034759564, 1048477348, -1146708651) + W(6, -1115937041, 1008970428, 1034559233, 1024756171) + + W(7, -1127954738, 1027686380, 1044214632, -1111878699); + sum2 = W(0, 1030335348, -1130369060, -1106797797, 1006653296) + + W(1, -1110488253, 1056034410, 1040331307, -1094310491) + + W(2, -1096373435, -1097486091, 1063611375, -1113782011) + + W(3, 1042224202, -1094958491, -1106971469, 1042512890) + + W(4, 1038378538, 1057631708, -1093142933, -1140559356) + + W(5, -1117770266, -1114209587, 1037075531, -1114686161) + + W(6, 1036930446, -1105273765, -1115233817, -1115373149) + + W(7, -1168276161, -1115537765, 1025218793, 1026640233); + WS(1063762143, -1098158381); + sum1 = W(0, 1027555010, -1106988831, 1031921883, 1033567284) + + W(1, -1113176415, 1049655577, 1032851437, -1099137927) + + W(2, 1044835884, -1089572014, -1106152127, 1052671923) + + W(3, -1094480963, 1041189880, 1057311541, -1101762329) + + W(4, 1042205032, 1035602487, -1100163979, -1106014811) + + W(5, 1038186408, -1095655321, 1046700975, 1044902280) + W(6, -1112247611, 995617211, 1050763922, -1094051821) + + W(7, 1041241393, -1102946366, -1119959180, 1044480349); + sum2 = + W(0, 1010916279, 1040615985, -1103581164, -1126424725) + W(1, -1108274646, 1037755544, 1053142923, -1119616718) + + W(2, -1108043460, -1107213139, -1091384808, 1062997316) + + W(3, 1049469526, -1085933104, -1093682556, 1050086980) + + W(4, 1061284555, -1098201753, -1095835641, 1044548392) + + W(5, -1097521196, 1027381238, 1044225612, -1099920539) + W(6, 1047936551, 1007266289, -1101597200, 1043928239) + + W(7, -1106103943, 1039340208, 1011948943, -1120667237); + WS(1060336095, -1119657045); + sum1 = + W(0, 1023788715, 1048279897, -1106238784, -1110161279) + W(1, -1117126579, 1044311660, -1096869700, -1136931731) + + W(2, -1108875843, 1062204714, 1019770348, -1115905233) + W(3, 1022815367, 1060724749, -1081389759, 1034904806) + + W(4, -1100562722, 1063043033, -1084472996, -1124661292) + + W(5, 1043213137, 1050929706, -1097593661, 1017832853) + + W(6, -1111482971, 1037241746, -1106654286, -1109716592) + + W(7, -1142747249, 1051192212, -1102133781, -1109353889); + sum2 = + W(0, 1036664563, 1009247708, 1013790140, 1035577258) + W(1, -1103288529, -1126560341, -1102369678, 1008412166) + + W(2, -1102134383, -1067246446, 1047852878, 1016094574) + W(3, 1057556566, 1067728497, -1093152080, 1045655865) + + W(4, -1099512639, 1078180911, -1110504691, -1123477495) + + W(5, 1031668335, -1085231458, 1042704555, 1023608957) + + W(6, -1103860103, 1047956293, -1132621098, -1112760821) + + W(7, 1028018661, -1114058193, 1030531898, -1120624893); + WS(1035518203, 1045613832); + sum1 = + W(0, 1044215468, 1057279232, -1097478568, 1042423606) + W(1, -1096085823, 1043513429, -1096451901, -1098498663) + + W(2, -1102751483, 1057361220, -1091716958, 1012832112) + W(3, 1007706274, 1061539183, -1085528932, 1054368634) + + W(4, -1098118842, 1057962612, -1093017477, -1103819295) + + W(5, 1051305356, 1058710773, -1091718961, 1057808613) + + W(6, -1096250919, 1037420261, -1100741095, -1100568821) + + W(7, -1106455273, 1053125757, -1097230063, -1111733504); + sum2 = W(0, -1105709018, 1058997495, -1086433444, 1022365788) + + W(1, 1050542737, -1084246791, 1065459234, -1101303092) + + W(2, -1106108137, -1103685622, 1055198365, 1040995532) + + W(3, 1027983028, -1106510960, -1102412567, 1044759128) + + W(4, 1053295819, -1089515541, 1057925132, -1104466949) + + W(5, -1091042532, 1066680974, -1080963495, 1043286350) + + W(6, 1049054259, -1088412186, 1056230430, -1096360229) + + W(7, -1108416827, 1037143386, 1049505110, -1106131860); + WS(-1103921662, 1072713673); + sum1 = W(0, 1042712209, -1111681022, 1029885560, -1123292759) + + W(1, -1130675027, -1104528338, 1032835743, -1124533373) + + W(2, 1041149044, -1103688784, 1053127408, -1101763282) + + W(3, -1108819823, 1055222237, -1089836355, -1143773237) + + W(4, -1099465033, 1063848575, -1089832742, 1044926036) + + W(5, -1102885798, 1053207587, -1090841456, 1032528402) + + W(6, -1108891552, 1051374172, -1098121506, 1040294582) + + W(7, -1122152216, 1044600823, -1101098252, 1018003689); + sum2 = W(0, -1116466906, 1017580326, -1100144980, 1018640494) + W(1, 1032691193, 1031996320, 1045143645, 1067424527) + + W(2, -1110682367, 1040673767, 1055366708, 1071822180) + + W(3, -1134651946, -1121084452, -1112456349, 1057195246) + + W(4, 1019799157, 1012141662, 1037079065, -1078565795) + + W(5, 1020309206, -1123645954, 1036705771, -1073530916) + + W(6, 1028255175, -1125118807, -1112096331, -1087156462) + + W(7, -1118897648, 1005021204, 1021253254, 1033125682); + WS(1054959295, 1011151216); + sum1 = W(0, -1124554365, 1043778462, -1140499785, -1103855992) + + W(1, 1028540695, -1104732576, -1111020475, -1113072532) + + W(2, -1107828893, 1063706911, -1098481604, -1121738665) + + W(3, -1135601758, 1052343180, -1081762405, 1048654720) + + W(4, -1135515507, 1050410381, 1053053447, -1096321673) + + W(5, 1032466990, -1128754020, -1112972157, 1012954617) + + W(6, -1110097877, 1034222620, 1023071124, -1110289546) + + W(7, 950109203, 1043960229, -1120112616, -1114439871); + sum2 = W(0, -1104841432, 1047456971, 1024424907, 991739097) + W(1, 1047690020, -1094867354, 1033457856, -1129013032) + + W(2, -1084375561, 1068880529, 1046341732, -1103560341) + + W(3, -1086552004, 1065412328, -1087761187, 1059009805) + + W(4, 1046416693, -1080289298, 1062384539, -1098849357) + + W(5, 1025710892, -1110380578, -1103331799, 1045332030) + + W(6, -1130856953, -1105039836, 1024509174, -1104241974) + + W(7, -1113936449, 1016290713, -1126452938, 1019994493); + WS(1061669311, 1066543312); + sum1 = + W(0, -1120030840, -1097490380, 1038501126, 1026361650) + W(1, -1111872182, 1051889621, -1110528784, -1112085217) + + W(2, 1046388965, -1085299564, 1050043436, 1019064063) + W(3, -1109640224, 1043197314, 1054292822, -1116217609) + + W(4, 1051640445, -1099578605, -1095700775, 1045441021) + + W(5, -1104742312, -1114905437, 1056051928, -1097066005) + + W(6, 1040612129, 1033952272, -1108619674, -1146667493) + + W(7, 1018742870, -1095793020, 1044337364, 1039394788); + sum2 = + W(0, 1041249103, -1098300344, 1041141085, -1123648820) + W(1, 1040922446, -1147221163, 1014590689, -1115292810) + + W(2, 1047169006, -1097103704, -1094689158, 1021735499) + W(3, -1133026152, 1055827658, 1056443342, 1023898168) + + W(4, 1045305983, -1082349433, 1051958562, -1153021086) + + W(5, -1121038685, -1105225178, 1035590360, 1038130213) + W(6, 1032067159, 1036618956, -1098250629, 1035045284) + + W(7, 986169209, -1102095468, -1145857885, 1030074795); + WS(1066007616, 1040865170); + sum1 = W(0, 1016703369, -1154003525, -1104662792, -1111826720) + + W(1, -1106398093, 1038441464, 1031083397, -1146451379) + + W(2, -1114560988, -1112021256, 1052887594, -1102003526) + + W(3, -1096325697, 1050813825, 1061249791, -1102172436) + + W(4, -1094335779, 1059033474, -1126656183, -1104413998) + + W(5, 1023023221, -1108543232, -1106571274, -1109007926) + + W(6, -1113231876, 1040892233, -1121820472, -1112600993) + + W(7, 1022983872, 1034570898, -1115038347, -1123919488); + sum2 = W(0, -1117895883, -1109314178, 1043196819, 1029963629) + + W(1, 1036640719, -1103775527, 1047073701, -1117061975) + + W(2, -1092695525, -1079308608, 1032498235, 1053147047) + + W(3, -1109277526, -1088245301, 1071134194, -1113344034) + + W(4, 1054484712, 1032194306, -1098274857, 1027577620) + + W(5, -1102073467, 1058556412, -1102222667, -1104158962) + + W(6, 1047716642, -1097777200, 1028797614, 1019489703) + + W(7, -1115714917, 1049638324, -1109885254, -1106961770); + WS(-1112959995, -1090797387); + sum1 = + W(0, 998546749, -1107113888, 1042251615, 1034524319) + W(1, -1102515009, -1157171375, 1043372984, -1109045169) + + W(2, 1018649191, -1095395659, 1057847906, 1025715818) + W(3, 1024148768, -1088418302, 1049085946, -1107024218) + + W(4, 1036601843, -1108499994, 1054314562, 1036766314) + W(5, 991750171, -1092700079, 1052242560, -1102346050) + + W(6, -1134314668, -1100812608, 1010001228, 1041369791) + + W(7, 1024106779, -1106951576, 1043704436, -1136260253); + sum2 = W(0, -1097076796, 1045091130, 1028367411, 1016501386) + W(1, 1000463738, 1024736081, 1040206702, -1122385083) + + W(2, -1083533587, 1064625577, -1112850496, -1104146558) + + W(3, 1064100942, -1096201033, -1086258418, 1050125885) + + W(4, 1074997490, -1071550159, 1052069491, -1105686824) + + W(5, 1043707697, -1106033160, -1098690685, 1041978517) + + W(6, 1028507309, -1118548401, 1050202273, -1109567340) + + W(7, -1152835380, -1111712908, -1113053149, 1032203802); + WS(1065286463, -1155116140); + sum1 = W(0, -1127743664, -1099417962, 1047837241, 1035108393) + + W(1, 1036265535, -1091060822, 1038987540, -1121911642) + + W(2, -1118137852, -1088882712, 1061626690, -1119901843) + + W(3, -1121018025, -1089265061, 1069311516, -1100290432) + + W(4, 1024160014, -1088513194, 1053957977, -1101311755) + W(5, 1028265374, -1090887512, 1051214117, 980641778) + + W(6, 1031461691, -1097685517, 1050330231, 1030945477) + + W(7, -1116217572, -1096201501, 1045642342, 1036102670); + sum2 = W(0, 1023964675, -1104287950, -1095527688, -1107378538) + + W(1, 1054178647, -1098771138, -1106164282, -1107123343) + + W(2, -1104607531, 1030951671, 1049293814, -1102317974) + + W(3, -1088748842, 1062304002, 1069791349, -1093097006) + + W(4, 1034720649, 1056993385, -1106129898, -1107155572) + + W(5, 1027793575, -1095484784, -1093744890, -1104614583) + + W(6, 1026748348, -1103591056, 1040804508, -1111048704) + + W(7, 1048440611, -1115722179, -1102843948, 1036145149); + WS(-1107864827, -1106183398); + sum1 = W(0, -1106667222, 1028246417, 1062610107, -1094330385) + + W(1, 1032383543, -1105288567, 1048732230, -1113355593) + + W(2, -1104095901, 1024277645, 1033194034, -1100412503) + + W(3, -1105126732, 1031807228, -1097352540, -1110531197) + + W(4, -1119082647, 1024663749, 1042072353, -1106817153) + + W(5, -1099092218, 1059042594, -1097841594, -1119219065) + + W(6, -1106477481, 1038514313, -1132497297, -1116226478) + + W(7, -1100466619, 1061247795, -1116357250, -1112377140); + sum2 = W(0, -1124057659, 1049202878, 1055877653, 1043135164) + + W(1, -1107748661, 1029365940, 1040631102, -1120955446) + + W(2, -1106641450, -1100669682, -1121942624, 1048773070) + + W(3, -1106278570, -1090514146, -1097531540, -1096084011) + + W(4, -1115427744, 1050790854, -1095195500, 1040842880) + + W(5, 1045306633, -1101749265, 1044511371, -1101432694) + + W(6, -1107251222, 1044664667, -1123742683, -1108344435) + + W(7, 1044220643, 1051259000, 1035140356, -1156279482); + WS(-1107710971, -1103335008); + sum1 = W(0, 1020774038, 1057965326, -1096736580, -1108312647) + + W(1, -1097452822, 1052789750, -1097416740, 1007270548) + + W(2, -1103681302, 1061528673, -1090723228, 1008435167) + + W(3, -1142400543, 1059882105, -1082070814, 1032878150) + + W(4, -1130541650, 1058381533, -1096313647, 1024150455) + + W(5, 1035627390, 1052101646, -1092903493, 1029066731) + W(6, 1026485526, 1047827121, -1095953623, 1024752368) + + W(7, 1026933710, 1048909172, -1094072619, -1129872006); + sum2 = W(0, -1091198687, 1051358993, 1026242284, 1036983987) + + W(1, -1096592719, 1056674994, -1102658445, -1112372364) + + W(2, -1088722291, 1050253089, 1039492851, -1109827680) + W(3, 1017819408, 1057806550, 1048044980, 1033031265) + + W(4, -1104841225, -1098514434, 1047848116, -1106004699) + + W(5, 1047280108, -1095745001, -1114927114, -1128936304) + + W(6, -1133964792, -1121773836, 1009730080, 1044703642) + + W(7, 1046210234, -1101840089, -1115459009, -1121859563); + WS(1046617982, -1079283690); + sum1 = W(0, -1112235521, -1103103206, 1050398983, 1031782004) + + W(1, -1112371935, 1005568807, 1053710959, -1112406582) + + W(2, 1046025067, -1082068590, 1059470764, -1109731202) + + W(3, -1114138877, 1053438832, 1056782757, -1120370051) + + W(4, -1100106092, 1050481934, -1084354719, 1047048563) + + W(5, -1106636112, -1113851306, 1044662919, -1114617505) + + W(6, 1006684852, -1101804759, 1028720557, -1122334215) + + W(7, -1134743124, -1101795702, 1040488963, 1035089325); + sum2 = + W(0, -1123583696, -1114190125, -1123424510, 1035265901) + W(1, 1041752924, -1107057915, -1108427412, 1011193218) + + W(2, -1096033091, 1054169171, -1101097272, -1097576107) + W(3, 1045050816, 1066165326, 1060681517, 1037309084) + + W(4, -1111810372, -1090934807, -1093738003, -1094306097) + + W(5, 1044170848, -1094482563, -1094742214, 1047478278) + W(6, 1035342189, 1030376713, -1122528198, 999245333) + + W(7, -1107148363, -1140242036, 1046835784, -1118953940); + WS(1057107647, -1115492411); + sum1 = + W(0, 1043432615, 1037909322, -1092726398, 1042270260) + W(1, 1017716083, 1044219192, -1094557923, 1036944691) + + W(2, -1115224236, 1059480955, -1089724826, 1049175094) + W(3, 1034158091, 1055275264, -1079486955, 1036017407) + + W(4, 1031606075, 1062186253, -1094016652, -1112299996) + W(5, 1040917102, 1051479740, -1090199321, 1012181234) + + W(6, -1119339928, 1036604907, -1111430132, 1026270626) + + W(7, 1006053250, 1049031914, -1124163213, -1106538704); + sum2 = W(0, 1018646782, 1037675363, 1056993548, -1095820604) + + W(1, -1107779390, 1041358579, -1114299594, -1106961783) + + W(2, 1048577488, -1103110037, -1108103869, -1113810892) + + W(3, -1094766117, 1063077105, 1069120744, -1094104293) + + W(4, 1029078775, -1094239344, 1019393926, -1100654825) + + W(5, -1115207772, -1097294293, -1090842684, 1030620215) + + W(6, 1041354677, -1106609062, -1103795001, 1015746998) + + W(7, -1134027235, 1005749079, -1111892981, 1031888221); + WS(1038408187, -1104646224); + sum1 = + W(0, -1100666444, 1031841147, 1045835257, -1121385297) + W(1, 1032659338, -1095811512, 1046356758, -1115316924) + + W(2, -1096791479, 1046629869, 1055310447, -1109718991) + W(3, 1050254507, -1086582314, 1049546009, 1043876936) + + W(4, -1125163769, -1092033141, 1059064223, -1115882186) + + W(5, 1034716030, -1095590162, 1032941357, 1051195252) + W(6, -1104041563, 1012740704, 1016606100, -1110386378) + + W(7, 1024265746, -1097384267, 1043234370, -1121465454); + sum2 = + W(0, 1033146053, 1033178697, -1110558215, -1107600367) + W(1, 1034495435, 1013274853, -1103797816, 1046600596) + + W(2, 1033891153, 1044565157, -1074394842, -1096372667) + + W(3, 1015618236, -1083473003, -1075035519, 1071496075) + W(4, -1102272365, 1047899954, 1070896015, 1070598351) + + W(5, 1026402836, 1033829854, -1107786181, -1103057086) + + W(6, -1126192412, 1026659107, 1039466503, -1099451771) + + W(7, -1130670616, -1129244632, 1033633210, -1106841766); + WS(1060540543, -1145107984); + sum1 = W(0, 1016949312, -1111708420, 999025948, -1110878953) + W(1, 1033700674, -1115158282, 1037057540, 1034636061) + + W(2, 1008485935, -1112773425, -1111113150, 1006219749) + + W(3, 1050394829, -1105627788, -1105528438, 1047777444) + + W(4, 1025975118, -1166436080, -1092533512, 1034379342) + + W(5, 1027755062, 1049090616, -1097657169, 1023310559) + + W(6, -1166944971, -1131929874, -1125616567, 1042471825) + + W(7, 1036625882, 1027903943, -1104970374, 1029991232); + sum2 = W(0, -1112727972, 1028528033, 1050127712, 1039139414) + + W(1, 1041061913, 1047913039, -1120606988, -1090975722) + + W(2, 1030171918, 1058299937, -1075312291, -1096898908) + + W(3, 1049192765, -1088725965, 1051042420, 1056682084) + W(4, -1097932217, 1006741474, 1061621025, 1043329296) + + W(5, -1108991614, 1028842331, -1120436768, -1136718330) + + W(6, -1128255593, -1119404040, 1041035795, 1019658885) + + W(7, -1128975721, 1019613109, 1038145345, 1017746289); + WS(1058512095, 1047466767); + sum1 = W(0, 1025672397, -1102753041, 1037445664, 1037433000) + W(1, 1032743763, 1015664161, 1032570339, 1033220516) + + W(2, 1046625694, -1102686879, -1091026739, 1035056566) + + W(3, 1040236986, -1099087483, -1089271035, 1024398415) + + W(4, 1041791363, -1097904327, -1100626131, 1043847942) + + W(5, 1042422310, -1097334084, 1051389809, 1042353123) + W(6, 1024317081, -1123206279, 1041773046, 1037697617) + + W(7, 1034505416, -1096407969, 1050962859, 1047698739); + sum2 = W(0, -1111694157, -1097429792, 1052756319, -1115035673) + + W(1, -1113457231, 1040318944, -1131566802, -1108486657) + + W(2, 1007020677, -1109123041, 1066240251, 1057627634) + + W(3, -1110494513, -1087577139, 1071158756, 1052796029) + + W(4, 1036239980, -1115766281, 1055974927, 1037312144) + W(5, 1042151096, 1033854974, -1085216854, 1031946717) + + W(6, 1014804229, 1056323633, -1076147337, -1107413695) + + W(7, 1038268008, 1058473731, -1078122726, -1088208012); + WS(-1087119871, 1051442968); + sum1 = + W(0, -1106951233, -1101188351, 1055191809, -1118371512) + W(1, 1026199466, -1102133666, 1046522759, 1024298399) + + W(2, -1111827529, -1096477343, 1061746551, -1102462991) + + W(3, 1034911407, -1081083296, 1062137809, -1118895495) + + W(4, -1110066258, -1093686286, 1056808794, -1105243164) + + W(5, 1036906330, -1089794286, 1061657374, 1028468933) + W(6, -1121489041, -1102503648, 1025414769, 1001909715) + + W(7, -1123695161, -1096524016, 1043754020, 1039027467); + sum2 = W(0, 1036012528, -1111568121, -1125130883, 1019317279) + + W(1, -1136414534, 1023613585, 1037328619, -1141870971) + + W(2, 1028357901, 1046290638, -1077265349, -1105362146) + + W(3, 1018170307, -1086518431, -1068062671, 1041918725) + W(4, 996800246, 1032866656, 1083861052, 1040576139) + + W(5, -1115554829, 1040198779, 1057186482, -1110577355) + + W(6, 1018408279, -1115393969, -1101393956, 1032099345) + + W(7, -1128152611, 1031067220, 1024816461, -1109411807); + WS(1041081598, -1101063046); + sum1 = W(0, 1038662072, -1123374599, 1035193857, -1104056554) + + W(1, -1105628103, 1048894326, -1098151405, 1009505700) + + W(2, 1046838359, 1044003395, -1109376817, -1107771451) + + W(3, -1112508084, 1062900743, -1082115704, 1039692648) + + W(4, -1112561351, 1058339600, -1087104856, 1049374543) + + W(5, 1047812813, 1041428913, -1097118497, -1126547869) + + W(6, -1106996345, 1044355834, -1098603955, 1037690530) + + W(7, 1045311157, 1013224727, -1110120844, -1121969199); + sum2 = W(0, 1034938637, -1102665080, -1083931487, -1096043986) + + W(1, -1110454828, 1066739686, 1057500561, 1050111041) + W(2, 1044300205, 1063593500, 1069787057, 1057117236) + + W(3, -1094841314, -1072571644, -1073076170, -1094917255) + + W(4, 1049263704, 1050585763, 1057940110, 992791419) + W(5, -1123904781, 1033188794, 1049962317, -1119131193) + + W(6, 1032882093, 1040935597, -1103057884, -1129197337) + + W(7, 1018550156, 992411043, 1032437745, -1139242755); + WS(1047493374, 1019974383); + sum1 = W(0, 1039818779, 1024992878, -1099980259, -1119405261) + + W(1, 1022499782, 1056719767, -1093967742, 1017537921) + + W(2, -1111466513, 1056992349, -1089692488, -1134657044) + + W(3, -1128872350, 1066523241, -1081677329, 1032226588) + + W(4, -1102282821, 1057279594, -1104336096, -1110606353) + + W(5, 1027318580, 1050310409, -1095763174, 1029746358) + + W(6, -1112367361, 1043633508, -1147598863, -1105211995) + + W(7, -1121442618, 1050332018, -1103201160, -1109613299); + sum2 = W(0, -1109381660, -1099927212, 1032613200, 1013674604) + + W(1, 1024006001, 1048105027, 1031703418, -1113331152) + W(2, 1048332575, 1083214625, 1051352300, 1010546556) + + W(3, 1030946168, -1068785948, -1082836395, -1137462284) + + W(4, -1111460550, -1080184440, 1047472657, 1033485356) + + W(5, -1115660792, -1125303858, 1027323348, 1034637753) + + W(6, 1027780517, -1129923962, 1006215943, -1112671380) + + W(7, -1125113546, 972536438, -1123338357, 1033990282); + WS(1049191295, 1026054180); + sum1 = + W(0, 1015431080, -1104530451, -1120791464, 1041795830) + W(1, -1114491642, 1045418990, -1105318659, -1107859847) + + W(2, -1106844240, -1091806904, 1059085476, 1035107546) + + W(3, 1052544606, -1092275830, -1096734436, 1050727987) + + W(4, -1101337172, 1032037152, 1055073816, -1098252244) + W(5, 1006821496, -1091613549, 1054997266, 1024892180) + + W(6, 1048579163, -1136631349, -1117752159, -1114835508) + + W(7, -1107367092, -1104046587, 1054027444, -1107321874); + sum2 = W(0, 1026733028, 1045033945, 1006278578, 1045459634) + W(1, -1099170713, 1052123052, -1100795787, 1045732660) + + W(2, -1116840904, 1011374557, -1137517037, 1036547859) + + W(3, -1119731112, 1034116255, -1093051584, 1049735621) + + W(4, -1105853851, 1042679995, 1032967726, -1115758581) + + W(5, 1023766170, -1103542762, 1030006514, 1012804929) + + W(6, -1109160117, 1002727514, 1010971477, -1107202197) + + W(7, -1110446049, -1116987471, 1032206215, -1108882618); + WS(1061349183, 1052960956); + sum1 = W(0, -1129777715, -1091852750, 1051531432, 1028688348) + + W(1, -1138324169, -1098630008, 1050117038, 1001458973) + + W(2, 1040354025, -1086948121, 1058780391, 1026112190) + + W(3, -1113069161, -1087783598, 1063142264, -1101543047) + + W(4, 1026875928, -1090578505, 1054898923, 1001807913) + + W(5, -1148679486, -1098392878, 1055753851, -1107707133) + + W(6, 1026350741, -1105317245, 1035239971, 1009551482) + + W(7, -1099366619, -1113156712, 1049214940, 1035358529); + sum2 = W(0, 1030464194, 1016786912, -1124271380, -1112870762) + + W(1, -1114101658, -1111442214, 1036708968, 1035127660) + + W(2, 1031167290, 1040962599, -1145350210, -1119263136) + + W(3, 1076423743, 1052649879, -1106763968, 1030450749) + W(4, 1075592137, 1059016603, 1036666460, -1114823676) + + W(5, -1122765276, -1109148248, -1102467968, 1030453841) + + W(6, -1073082101, 1031660172, 1040647493, -1120712732) + + W(7, -1067763317, -1093716078, -1123536642, 1004437586); + WS(1049240575, 1032641532); + sum1 = W(0, 1013730783, 1058635434, -1096215015, -1112522509) + + W(1, -1105480706, 1033988870, -1101342144, -1114702617) + + W(2, -1114036922, 1060052265, -1095577996, 1026538642) + + W(3, -1113879072, 1060507082, -1090178495, -1118644393) + + W(4, -1104229785, 1060417175, -1088104859, 1026113715) + + W(5, -1108053747, 1056801557, -1096180920, -1115402364) + + W(6, -1114764169, 1045744608, -1096622362, -1165415829) + + W(7, -1146082265, 1050162924, 1035143657, -1096614130); + sum2 = W(0, -1122276311, -1070701037, -1102500792, -1123484857) + + W(1, 1035291415, -1088124412, -1135890037, -1122878839) + + W(2, -1120194245, 1066415245, 1041496354, 1004310705) + W(3, 1040446858, 1074084237, -1104713726, 1043449641) + + W(4, -1120530587, 1049852139, -1121031703, 1033156545) + + W(5, -1138075205, -1122804719, -1103825640, 1027648711) + + W(6, 1025217366, 1023048268, -1123990667, 1022832518) + + W(7, -1118638617, -1139513797, 1028358694, -1136916673); + WS(1058199967, 1050173679); + sum1 = + W(0, 1035142580, 1049466778, -1090097411, -1180331449) + W(1, 1027370573, 1058691576, -1101732588, 1040649373) + + W(2, -1130262537, 1039593430, -1086371009, -1120260556) + + W(3, 1035504777, 1068404719, -1088745136, 1040680775) + W(4, -1114438555, 1055172389, -1085029652, 1044382793) + + W(5, -1117883815, 1049428736, -1087836907, -1124005336) + + W(6, 1019391016, 1057860609, -1099893249, 1042216563) + + W(7, 1002653538, 1038434456, -1090680108, -1112700144); + sum2 = + W(0, -1116884059, -1098591960, 1049382153, -1103314039) + W(1, 1053572077, 1057696884, 1027480579, -1088718218) + + W(2, -1098664530, -1094622297, -1090176467, -1083506692) + + W(3, 1052736946, 1068603564, 1060239128, -1093271444) + + W(4, -1109898534, -1110319390, 1035982822, -1100206230) + + W(5, -1100793752, -1092295148, -1110105636, 1032439700) + W(6, 1049155169, 1060435963, 1048950002, 1034453968) + + W(7, -1110972354, -1093610507, 1038370396, 1006875670); + WS(-1089372991, 1071972514); + sum1 = + W(0, -1138920887, 1036533027, -1102318256, 994211079) + W(1, -1111770809, -1104793556, 1048489074, -1105180974) + + W(2, -1102811010, 1055175791, -1087761408, 1048668925) + + W(3, -1115715559, -1099167419, 1059252133, 1002658941) + + W(4, 1039086134, -1087507272, 1053162297, -1118760057) + + W(5, -1097310634, 1049070430, -1114433380, 1015366427) + + W(6, 1031148825, -1119324807, -1118494745, 1042346841) + W(7, 1025838913, 1003453989, 1042268638, 1035401151); + sum2 = W(0, 1030112837, 1042265630, 1026971571, -1110663804) + + W(1, -1120976510, 1036120703, -1114486026, 1036038820) + W(2, 990074782, 1034066763, 1060265655, -1093575269) + + W(3, -1093456890, 1054439327, 1048349987, -1097582212) + + W(4, 1037465699, -1093016348, -1099844302, 1034062294) + W(5, 1026987255, 1018637880, 1021593632, 1015927322) + + W(6, -1141406991, -1135344000, -1122669452, -1131991172) + + W(7, -1139369648, 1039303764, -1105461723, -1112258092); + WS(1064707295, -1106068023); + sum1 = + W(0, -1099232505, -1122722466, 1050522762, -1163785122) + W(1, 1035233224, -1096714286, 1050648196, 1029641572) + + W(2, 1008917721, -1092366070, 1057836626, -1106158547) + + W(3, -1113461090, -1085444145, 1065526616, -1112344246) + + W(4, 1030936392, -1087864449, 1058128499, -1120989814) + W(5, 1004172005, -1089654074, 1053303385, 1023496586) + + W(6, 1027313190, -1109977746, 1046037453, -1132212895) + + W(7, -1115756158, -1096643772, 1045431365, 1026679026); + sum2 = + W(0, -1064765544, -1093639717, 1043466147, -1118670633) + W(1, -1081497441, 1052279998, -1106897145, 1026058867) + + W(2, 1073825520, 1049786220, -1111313590, -1120615525) + W(3, 1076205171, 1062543003, 1028519541, 1005098917) + + W(4, -1119211273, -1112239696, -1110796836, 1002459349) + + W(5, 1027623494, -1095053933, 1038884306, 1024155429) + W(6, 1025169073, 1041086677, -1138045203, -1139354443) + + W(7, 1044157267, -1111173396, -1103601885, 1031190784); + WS(-1111449083, -1157616163); + sum1 = W(0, -1140902742, 1050785866, -1096435822, -1106580347) + + W(1, -1108906886, 1041550344, -1095149714, -1111739842) + + W(2, -1107746707, 1063519767, -1097214657, -1112949561) + + W(3, -1091492130, 1070223306, 1059314470, -1090511327) + + W(4, -1106259270, 1056548545, -1091358630, -1115482851) + + W(5, -1123578095, 1033679887, -1102873285, -1102694659) + + W(6, -1112828291, 1033485706, -1098893397, -1119291392) + + W(7, -1129218172, 1037279241, -1105253969, -1105806553); + sum2 = + W(0, 1036877913, -1106537131, 1030920508, -1107276407) + W(1, -1107073590, 1029358898, -1148982309, -1158332371) + + W(2, -1107699229, 1059152956, -1101290720, -1104066278) + W(3, 1043855187, 1059494373, 1057939604, 1057817844) + + W(4, -1112562117, -1100478450, 1054005616, -1089434898) + + W(5, 1015957137, 1042289801, -1078154452, 1041374005) + + W(6, -1137092650, -1117856531, 1052159854, -1109759735) + + W(7, -1113777301, 1034651775, -1095096566, -1112996199); + WS(-1078462192, -1081042006); + sum1 = W(0, 1031777492, -1099264287, 1046006998, 987183826) + W(1, 1032754472, 1043123090, -1098614104, 1036343655) + + W(2, 1041664052, -1089905073, -1129274543, 1009990171) + + W(3, -1097597536, 1061087074, -1106235787, -1097811109) + + W(4, -1104816664, 1058804947, -1084543524, 1031138063) + + W(5, -1113883983, 1049386855, 1054195792, -1097275448) + + W(6, 1034668408, 1033759807, -1107840264, 1026038601) + + W(7, -1123431127, 1048603595, 1024992909, -1120606380); + sum2 = W(0, 1045113399, 1042319052, -1096815184, -1116743882) + + W(1, -1114091637, -1104675582, -1111655631, -1101972302) + + W(2, -1107885511, -1091877193, -1112520325, 1050965596) + + W(3, -1138476145, 1051152873, 1061790396, 1049921296) + + W(4, -1105540689, -1091823953, 1058061302, -1092103725) + + W(5, 1035866831, 1034466874, -1097657024, 1042351236) + + W(6, -1106333432, 1039480371, 1040922298, -1102843808) + + W(7, 1027850472, 1012452041, -1158709515, -1125733657); + WS(1056694143, -1116016311); + sum1 = W(0, -1113789237, 1051083644, -1097139864, -1123205479) + + W(1, -1100431022, -1126988186, -1094809972, 1034447326) + + W(2, 1050877939, 1064972735, -1089357240, 1036660918) + + W(3, 1018856325, 1040063173, -1094444318, -1108178982) + + W(4, 1050520537, 1063261729, -1089641983, 1047968461) + + W(5, -1097203011, 1049408537, -1094346349, -1113292277) + + W(6, -1103919120, 1026641091, -1100978845, 1038849023) + + W(7, -1146363818, 1053332672, -1097363714, -1114302469); + sum2 = W(0, -1117908102, 1019359999, -1120905960, 1017197102) + + W(1, 1013765983, -1145413682, 1028148573, 1027705717) + + W(2, 1045942570, -1095749402, 1032861566, -1127537989) + + W(3, 1086231745, -1062836828, -1087097136, 1031521084) + + W(4, 1048993826, -1094656357, -1145044098, 1027867224) + + W(5, -1110131683, 1037693606, 1040390747, -1114691836) + + W(6, 1005670802, -1125761673, -1112982831, 1031905995) + + W(7, -1126497913, 1018172516, 1016402027, -1122299754); + WS(1058898623, -1129627348); + sum1 = + W(0, -1115961291, 1024824451, 1015893437, -1111982511) + W(1, -1114910804, 1028416398, -1107386562, 1040362701) + + W(2, -1099814536, 1053775806, -1081677098, 1049488325) + W(3, 1014875535, 1049045373, 1054905786, 1016424474) + + W(4, 1042946610, -1081844071, 1057687981, -1105282974) + W(5, 1008636276, 1031972351, 1045923489, -1118478155) + + W(6, -1106406626, 1007262993, 1043411542, 1018226304) + W(7, -1120594878, 1044145240, 1016529769, 1008933435); + sum2 = W(0, -1133545882, 1024635319, -1123675276, 1049399700) + + W(1, 1048887568, -1104650552, 1009836758, -1109494815) + + W(2, -1092588040, -1106613727, -1090053582, -1105276085) + + W(3, 1040041167, 1061771033, 1063348295, -1099749985) + + W(4, -1098849827, 1059347143, -1095509129, -1094328117) + + W(5, 1009986974, 1020453405, -1091467004, 1040581639) + W(6, 1044263399, -1100016935, 1034750124, 1041257354) + + W(7, -1120224962, 1003801667, 1013849670, -1116729410); + WS(1051954047, 1053754534); + sum1 = W(0, -1115617616, 1038078984, 1012663597, -1106689047) + + W(1, 1024010640, 1036755054, -1111840872, 1027644834) + + W(2, -1105864485, 1063482792, -1094956956, 1000291764) + + W(3, -1104806436, 1054877383, -1088048944, -1099610254) + + W(4, -1109064955, 1061183100, -1092794954, 1030290339) + + W(5, -1116880702, 1032370277, -1106496725, -1110075188) + + W(6, -1125878182, 1043596932, -1106933961, -1134690355) + + W(7, 1017392662, 1041423460, 1027390113, -1125796047); + sum2 = W(0, 1032585330, 1041153226, -1098242715, 1045110917) + + W(1, -1120362784, 1038083675, -1097998955, 1041971655) + + W(2, 1053495069, -1085822417, -1097864806, -1094674500) + + W(3, -1103911013, 1062089523, 1056336567, -1090235355) + + W(4, -1152525762, -1088542584, 1061326318, -1100885631) + + W(5, -1128147020, 1044421032, 1046053205, -1141849761) + + W(6, 1012657824, -1113855086, 1042938789, -1114518568) + + W(7, -1104983105, 1041652917, -1160964996, 1032164357); + WS(1061027871, -1104546242); + sum1 = W(0, -1105616597, 999947493, 1020340785, 1044086916) + W(1, 1044372474, -1093243621, 1054887248, -1111771647) + + W(2, 1021564129, -1093936864, 1050165368, 1042456335) + + W(3, 1044930783, -1082760108, 1059757863, -1109427891) + + W(4, 1046184033, -1089254915, 1059995997, -1109416824) + + W(5, 1039863327, -1087020314, 1055330116, -1114227250) + + W(6, 1037242932, -1095491356, 1049586618, -1119355674) + + W(7, 1036593932, -1089177610, 1053764638, 1025046581); + sum2 = W(0, -1071093827, 1060306884, 1049935328, -1107356858) + + W(1, 1026041138, -1087262504, -1137957530, 1030953599) + + W(2, 1068370917, -1080863241, 1024099582, 1026067688) + + W(3, 1068180608, -1104665609, -1097098353, -1149593287) + + W(4, -1099034443, 1065726420, -1115701817, 1031805059) + + W(5, -1126398467, 1036496204, 1043337171, -1111590832) + + W(6, -1114641099, 1052019191, -1155572887, -1110320626) + + W(7, 1032038973, -1112342652, 1039831002, -1165279566); + WS(-1085156031, 1045210454); + sum1 = + W(0, 1032928297, 1062377574, -1090582815, -1097791665) + W(1, -1111119484, 1050029461, -1097825399, 1025846477) + + W(2, 1023710638, 1062121112, -1087364538, 1032229097) + W(3, 1012189550, 1061895670, -1084850381, 1038427330) + + W(4, -1119437633, 1058328143, -1086721078, 1040335420) + W(5, 1039427612, 1050106073, -1087870448, 1036234257) + + W(6, -1114651991, 1051812528, -1095972450, 1026163704) + + W(7, 1039591187, 1050020631, -1089842983, -1146480404); + sum2 = W(0, -1123647935, 1061817512, 1083879676, 1056516802) + W(1, 1039341132, 1048143772, 1075567701, 1037735150) + + W(2, 1035983028, -1106274437, -1081957201, 1028242764) + + W(3, -1100132407, -1080635557, -1069811820, -1096421593) + + W(4, -1107056231, -1101127944, -1080342730, 1035329144) + + W(5, 1035406254, -1113472410, -1083251742, 1030744652) + + W(6, -1116224056, 1025367216, -1113852980, -1094803270) + + W(7, 1039408558, -1101636583, -1087422720, 1049804166); + WS(-1079692512, 1054562755); + sum1 = + W(0, -1109749038, 1042438683, -1092016161, 1050333996) + W(1, -1107273417, 1049923207, -1091034933, 1042543591) + + W(2, -1102577952, 1044388569, -1089719139, 1055304278) + W(3, 1030213964, -1113189609, 1050309800, 1034159515) + + W(4, 1036563817, -1089184118, 1048990040, 1040366472) + W(5, 1046944740, -1087748930, 1053009554, -1122229091) + + W(6, 1031914983, -1096053014, 1048709856, 1006842402) + W(7, 1050658577, -1091524869, 1049517424, 1034826102); + sum2 = W(0, 1041125733, -1094982607, 1055140379, 1061130686) + W(1, -1097603156, 1045744428, 1049918932, 990353951) + + W(2, -1103003109, -1102781952, -1106862916, 1058588396) + + W(3, -1107000988, 1063877853, 1066829095, -1096233798) + + W(4, 1032085636, -1096755169, -1090474230, -1095838692) + + W(5, 1048646468, 1041101455, -1081598978, -1089860215) + + W(6, -1094740103, 1038162032, 1049572267, -1096717844) + + W(7, 1056003520, -1106723297, -1085369603, 1044183141); + WS(-1080283264, 1053171958); + sum1 = + W(0, 1045885175, 1054395509, -1083475508, 1028512496) + W(1, -1113658461, 1050045174, -1095192933, 1035446876) + + W(2, 1034538909, 1055879386, -1085810962, 1026195253) + W(3, 974135701, 1060600957, -1084797135, 1044250612) + + W(4, -1122513036, 1061640838, -1086057732, 1037521197) + + W(5, -1118011958, 1060753746, -1087204563, -1117222317) + W(6, 998696774, 1054788217, -1092652351, 1038575473) + + W(7, -1108239136, 1061053402, -1108529436, -1101819472); + sum2 = + W(0, 1049760823, 1082611215, -1114287788, -1130959534) + W(1, -1132622893, 1077078383, 1055018182, 1035090608) + + W(2, 1033001558, 1066257283, -1101461080, -1112584386) + + W(3, -1136534149, 986681348, -1078537691, -1090394759) + W(4, 1043696998, -1084657320, 1054805894, 1058031741) + + W(5, -1092886300, -1069070549, 1065047159, 1038822180) + + W(6, -1098156850, -1072372844, 1049854091, 1041633511) + + W(7, 1058228696, -1074505611, -1082348593, -1115499033); + WS(-1073398920, 1057727890); + sum1 = + W(0, 1041979362, 1052994159, -1092671491, -1110295050) + W(1, -1106188342, 1056847849, -1087976920, 1033778006) + + W(2, -1108985859, 1060197326, -1085676269, 1040328063) + + W(3, 1040300287, 1059196498, -1108460501, -1114468666) + + W(4, 1044683952, -1094535618, 1051731585, -1102804911) + + W(5, 1046284093, -1094105983, 1046385743, -1098429184) + + W(6, 1044621499, -1102914751, 1031818215, -1104640754) + W(7, 990388050, 1043962286, -1104493131, 1033981777); + sum2 = W(0, 1021160276, -1089430647, -1102895535, 1031984432) + + W(1, -1093219342, -1135654176, 1044152573, -1096279754) + + W(2, -1089562294, -1079796815, -1095163331, 1011763192) + + W(3, -1106388958, 1074674993, 1074136497, -1101895278) + + W(4, -1099093932, -1098010580, -1095203310, -1117345690) + + W(5, -1180890998, 1052929457, -1099885509, -1095468231) + + W(6, -1090965228, 1040584224, 1041387674, -1107912603) + + W(7, 1057228486, 1047748991, -1131585606, 1045144673); + WS(-1087643711, 1073414034); + sum1 = + W(0, 1043421796, 1043019738, -1091862260, 1028452722) + W(1, -1116796288, 1056957030, -1093379129, -1112484637) + + W(2, 1042590607, 1048028053, -1089120888, 1043084437) + + W(3, -1098636497, 1065994910, -1081315117, -1120978641) + + W(4, 1008405302, 1061262410, -1088787029, 1046109207) + W(5, 1005024220, 1059267719, -1093593189, -1105879924) + + W(6, -1106384411, 1045820026, -1108751805, 1024805700) + + W(7, 1029633907, 1054563645, -1106230054, -1095994501); + sum2 = W(0, 1015257573, -1101361376, -1151605286, -1114431290) + + W(1, -1117575950, 1051853927, -1126719224, -1122150057) + + W(2, 1040590395, 1007636882, -1099060090, -1123376079) + + W(3, 1040031817, -1085239439, -1069257368, -1122292554) + + W(4, -1104875608, 1029484438, 1078894141, 1045790053) + + W(5, -1111928738, 1055085469, 1065488662, -1105709439) + + W(6, -1124331304, -1104410806, -1097362053, 1046590167) + + W(7, -1104400845, 1055334030, -1099504604, -1096100911); + WS(-1089052703, 1050218486); + sum1 = W(0, 1024924308, 1032977094, -1127622480, 1016061078) + + W(1, -1124263665, 1048865735, -1103037451, 1027347950) + + W(2, -1114112032, 1032370348, -1093324152, 1047024376) + + W(3, -1101857739, 1057017435, -1130658874, -1114101947) + + W(4, 1004177297, -1096925880, 1049797356, -1136753151) + + W(5, -1132101549, -1106597310, -1147023991, -1136950735) + + W(6, 1037546120, -1101706323, -1148374781, 1016234194) + + W(7, -1127618735, -1138126252, 1041185327, -1135392976); + sum2 = + W(0, -1103127935, 1033687244, 1044282132, -1115309191) + W(1, 1034551493, -1111330163, -1132159854, -1101537844) + + W(2, -1131095837, -1111327049, 1043675418, 989659429) + W(3, 1056852609, -1097897810, 1038437355, -1126658673) + + W(4, 1041291951, -1092598078, -1085166716, -1101159602) + + W(5, 1031312786, 1027703970, -1122838335, 1065573620) + W(6, -1119687326, -1119650108, 1030316614, 1035948285) + + W(7, -1125922018, 1037109557, -1115941571, 1021015548); + WS(1065854560, -1114586365); + sum1 = W(0, 1025132845, -1093129789, 1039292161, 1052478106) + + W(1, -1132208108, -1097600366, 1047014713, -1114318540) + + W(2, 1043375962, -1094779500, 1054220253, 1045472794) + W(3, 1036127555, -1085251325, 1059484531, 1040640636) + + W(4, 1042813603, -1085614646, 1058902029, 1026772633) + W(5, 1042302785, -1083792532, 1058554289, 1030110605) + + W(6, 1041475414, -1088034874, 1049492791, -1123458066) + + W(7, 1044294516, -1081301837, 1057966909, 1048041627); + sum2 = + W(0, -1082315074, -1085719643, 1072912386, -1097858732) + W(1, 1044643809, 1026206592, -1098026838, -1097387905) + + W(2, -1098370418, 1060376399, 1068488422, -1097681385) + W(3, 1032322202, 1066420950, 1072201946, -1095903963) + + W(4, -1088636595, 1042910065, 1055106268, -1092707091) + + W(5, 1057109638, 1025783712, -1087763572, -1104219349) + + W(6, -1093539506, -1117795056, -1084317601, -1087895305) + + W(7, 1061431798, -1120476284, -1077472196, -1105350768); + WS(-1071370880, 1080498273); + sum1 = + W(0, -1117491793, -1117320376, 1025681812, -1112958490) + W(1, 1039364315, -1100646881, 1049740511, -1103504723) + + W(2, 1035846886, -1094786920, 1058631773, -1105396320) + W(3, 1023528601, 1045848824, -1094741601, 1032469201) + + W(4, -1131889533, 1057069771, -1088346742, 1034336534) + + W(5, -1106314247, 1055228236, -1102894479, -1144513794) + + W(6, 1028375004, 1020389492, -1138962992, -1116565942) + W(7, 1013100112, -1127017775, 995411618, 1002008579); + sum2 = W(0, -1097850747, -1108946242, -1129761151, 1024320851) + + W(1, -1107398437, -1114277717, -1118690429, 1012636527) + + W(2, -1085613590, 1063125687, 1040789135, 1029278628) + + W(3, 1042292809, -1084555354, -1101999238, 1026191463) + + W(4, 1054106205, 1059764079, 1051725304, -1119736004) + + W(5, -1106648125, 1041901467, -1128317719, -1127460399) + + W(6, 1035361236, -1103098774, 1022272136, -1115360404) + + W(7, -1122554235, -1148617751, 1025957669, 997652141); + WS(1058528159, 1053906024); + sum1 = W(0, -1112917396, -1104256080, 1023681564, 1039659283) + + W(1, 1035675727, -1092229473, 1058833811, 1021887717) + + W(2, -1104938189, -1097223269, 1051100098, 1033492802) + + W(3, 1040468760, -1082484606, 1061928249, -1119337706) + + W(4, -1105254898, -1103704557, 1057519273, -1099217437) + + W(5, 1020887917, -1100912091, 1047691241, -1127325712) + + W(6, -1113141838, -1118771074, 1047329733, -1114402703) + + W(7, -1149793215, -1099449368, 1044776397, 1024214504); + sum2 = W(0, -1129442596, 1040644144, -1106302200, -1105488042) + + W(1, 1023778714, 1033307012, 1056981996, 1014815489) + W(2, -1113953277, 1056896617, 1082625405, 1048992482) + + W(3, 1026809294, -1080168883, -1064674592, 1043950232) + + W(4, 1035644478, 1030172798, -1098216535, -1120732434) + + W(5, 1034718426, 1046899220, -1107268878, -1121557578) + + W(6, 1016827280, -1114672577, -1138831353, 1026207834) + + W(7, 1038084720, -1138817753, -1118740750, -1122753582); + WS(1050028863, 1057904824); + sum1 = W(0, 1016998719, -1115527732, 1049960370, 1033316845) + + W(1, 1031484924, -1114471481, -1097014217, 1045307870) + + W(2, -1113918453, -1092713340, 1059124695, -1113659631) + + W(3, 1035763396, -1086973069, 1055821255, -1113580281) + + W(4, -1100192683, 1048884339, 1045748179, -1104759887) + + W(5, 1046082071, -1093669960, 1046233018, 1024760783) + W(6, 1023767163, -1109475380, 972758359, 1028768132) + + W(7, -1109585801, -1102684193, 1047341446, -1118597711); + sum2 = W(0, 1042577042, 1058174637, -1093234196, 1036828005) + W(1, 1035985609, 1022451597, 1028288917, 1036599280) + + W(2, -1132583692, -1083350192, -1089654696, 1043707159) + + W(3, -1108557005, -1102051996, 1066585170, -1099096459) + + W(4, 1045882832, -1096734805, -1112976831, -1117191962) + + W(5, 1049978360, 1040525485, -1101233023, 1038823523) + + W(6, -1123148614, -1111555351, 1016281700, 1010883764) + + W(7, 1037998467, 1012738322, 1035922998, -1106116243); + WS(1064158815, 1035299335); + sum1 = + W(0, -1112803360, 1016610641, 1043480489, 1023721675) + W(1, 987363362, -1111853690, -1116662313, -1114846141) + + W(2, 1040202935, -1090418272, 1043761841, 1040437218) + W(3, -1148543050, -1086418793, 1062267936, 1044974085) + + W(4, -1119103150, -1096774480, 1026731899, -1112576238) + + W(5, 1016866897, -1106302329, 1046949595, 1035797256) + + W(6, -1120253212, -1125874772, 1044534239, -1118929285) + + W(7, -1136961231, -1111706212, 1038415821, 1017229376); + sum2 = W(0, 982347958, -1130481715, 1039697927, 1029826582) + + W(1, 1032509555, -1105443060, -1102232537, -1106323503) + + W(2, -1111294086, 1041532435, 1059036310, -1087181565) + + W(3, 1034453505, -1090026061, 1065332554, -1095157490) + + W(4, -1107492941, -1115327745, 1049776291, 1032022369) + + W(5, 1047671755, -1099067440, 1035057967, -1113855935) + + W(6, -1114281099, 1007189687, -1116755236, -1137758215) + + W(7, 1041657921, -1106888056, 1039394631, -1116441678); + WS(1064590463, 1056702913); + sum1 = W(0, 1029043770, -1118119137, 1044521535, -1108622774) + W(1, 1053372231, 1049750478, 1040212560, 1055292354) + + W(2, -1096751755, -1090400466, 1048069253, -1086796959) + + W(3, 1042753121, -1089893441, -1098259842, 1028130891) + + W(4, -1132040847, 1035942347, 1046519145, 1043794349) + + W(5, -1098166823, -1098407696, -1126503113, -1097599727) + + W(6, 1054745035, 1057009004, 1029470901, 1057963224) + + W(7, -1101919647, -1105867448, -1137574860, -1099480944); + sum2 = W(0, -1112469736, 1047380321, -1107309008, -1130012001) + + W(1, -1109993908, 1048112351, -1096267194, 1044766357) + + W(2, -1113626244, 1039961237, -1149218892, 1024449363) + + W(3, -1101975228, 1053720344, 1041967405, -1105185961) + + W(4, -1113395603, 1033059361, -1105421186, 1034928166) + + W(5, -1115391866, -1117206471, 1041082150, -1103730760) + + W(6, -1111764318, 1050486894, -1097114178, 1041700935) + + W(7, -1114050909, 1029031616, -1116211284, -1133808950); + WS(1049282175, -1081654589); + sum1 = W(0, 1026879882, 1056840376, -1089715922, 1023576205) + + W(1, -1106636470, 1051340514, -1103588481, 1019713064) + + W(2, 1025593670, 1059046350, -1087728227, 1018583793) + + W(3, -1120434947, 1060994015, -1081081863, -1116831948) + + W(4, -1109057316, 1062254571, -1089160278, 1040563949) + + W(5, -1103814142, 1054415372, -1110084674, -1148705741) + + W(6, -1108405355, 1044610849, -1097611347, -1130543166) + + W(7, 1025046173, 1057281100, -1102762237, -1108038889); + sum2 = W(0, 1021240677, -1112389783, 1025718250, -1104779102) + + W(1, -1117500355, -1154883848, -1096682611, 1051908652) + + W(2, -1104166105, -1102583420, 1048911763, -1093040032) + + W(3, -1110155670, 1061056453, 1065471868, 1051220588) + + W(4, -1101525323, -1123615532, -1149445264, -1113750073) + + W(5, -1099065181, 1038434309, -1090281103, -1098156535) + + W(6, -1135933351, 1041908272, 1042017461, 1046751376) + + W(7, -1100288377, -1120914319, -1131934595, -1100861861); + WS(1044733566, -1111466942); + sum1 = W(0, 1033634167, 1060020808, -1092913556, -1112663104) + + W(1, -1105078967, 1041929508, -1103349137, -1134753630) + + W(2, -1104340430, 1062221635, -1092289734, -1126194825) + + W(3, -1106426842, 1060048443, -1090785767, -1110397240) + + W(4, -1101553619, 1059584252, -1094348262, -1114529117) + + W(5, -1110993602, 1054717575, -1097827057, -1113238887) + + W(6, -1113465444, 1044713794, -1098824243, -1140846474) + + W(7, 1020428386, 1047352089, 1041060918, -1094820883); + sum2 = W(0, 1040248415, 1084299283, 1054267917, 1002599056) + W(1, -1113076108, 1066146464, 1032173778, 1029289900) + + W(2, 1043824330, -1073408210, -1102778436, 1027082476) + + W(3, -1105221470, -1067326117, -1084477148, -1116688572) + + W(4, 1036602392, -1097425816, -1149511007, 1024010398) + + W(5, -1134849104, 1016852200, 1040914456, -1121212984) + + W(6, -1163695359, -1119512248, 1026379762, 1007530464) + + W(7, 1011808464, -1118870508, 1011171408, 1020903456); + WS(1041204862, 1020998748); + sum1 = + W(0, 1041320410, 1040787048, -1129649268, -1136864165) + W(1, -1111643799, 1044400490, -1096018340, -1123167632) + + W(2, 1033147201, -1112000574, 1041936758, 1014392453) + W(3, 1033305824, 1057857240, -1090212595, 1043382402) + + W(4, -1110534482, 1058317173, -1083609992, 1046901861) + W(5, 1048653459, 1013490791, -1108280561, 1040774794) + + W(6, -1106102028, 1040475768, -1098465902, -1121200123) + + W(7, -1129433652, 1032979455, -1104103124, -1123801518); + sum2 = W(0, 1033367466, -1105739926, 1035166617, -1121294834) + + W(1, 1036408153, 1041467605, -1116824529, 1020242785) + + W(2, 1049195658, -1097896497, -1127250349, 1024987646) + + W(3, -1116950481, 1069287141, -1103787577, -1113169844) + + W(4, -1081345874, 1045676499, -1104285546, -1109755223) + + W(5, 1035726191, -1097322898, -1110450029, -1162599282) + + W(6, -1109751565, 1034595972, 1041775082, -1128984227) + + W(7, 1034565507, 1022861151, -1111377560, 1015705141); + WS(1061587071, -1140717261); + sum1 = W(0, -1112565188, -1082585266, 1063001355, 1029148274) + + W(1, 1035683412, -1091946062, 1057075562, -1133150565) + + W(2, 1034588141, -1086181493, 1061257307, -1121255338) + + W(3, -1105710858, -1084260507, 1062016535, 1031972073) + + W(4, 1024277837, -1087521200, 1059484653, -1140527211) + + W(5, -1104681461, -1094344584, 1051726365, 1015752222) + + W(6, 1033519565, -1109665588, 1053582679, -1140031633) + + W(7, -1097426768, -1092361421, 1050023351, 1054184694); + sum2 = W(0, 1052095029, -1068831962, -1076286348, 1061242602) + + W(1, 1040595366, -1076957703, -1081349117, 1046002483) + + W(2, -1098987175, -1081538085, -1088771362, 1040290526) + + W(3, -1084153168, 1037971476, 1057908839, -1093902656) + + W(4, -1091290481, 1070110671, -1114213425, -1094367529) + + W(5, -1096720628, 1069986978, 1063483875, -1094489561) + + W(6, -1101102486, 1074264190, 1068552571, -1099236623) + + W(7, 1044734380, 1070997731, 1075267150, -1081182731); + WS(-1071242520, -1095264341); + sum1 = + W(0, -1097248122, 1057257956, -1095464807, -1098637431) + W(1, 1017326143, 1050464317, -1098731544, 1039614697) + + W(2, -1105336241, 1059871125, -1096805078, 1034135507) + + W(3, -1091856432, 1058219580, -1088025845, -1098503641) + W(4, 1051920949, 1056167546, 1032394345, 1057766069) + + W(5, -1097210280, 1051126389, -1089070234, -1095933097) + + W(6, -1122345698, 1047883744, -1102431837, 1030493340) + + W(7, 999615444, 1055359408, -1098523426, -1112287963); + sum2 = W(0, -1132961311, 1038597560, -1089057302, 1055074030) + + W(1, 1034954689, -1114842936, -1113751042, 1048893008) + + W(2, -1131131128, 1039456110, -1095192990, 1046429121) + + W(3, -1105685093, -1104727669, -1097910004, 1057064004) + + W(4, 1044104138, -1085849936, 1064526735, -1091871118) + + W(5, -1118659572, 1053272959, 1045257331, -1096323208) + + W(6, -1117347443, -1117746499, 1054461586, -1090448877) + + W(7, -1140373421, 1049759548, 1062278408, -1081803378); + WS(1055746431, 1040288248); + sum1 = W(0, -1154016945, 1057716289, -1093759650, -1122750430) + + W(1, -1117613130, 1055590360, -1094487986, -1117149477) + + W(2, -1108247483, 1059644857, -1089867895, 1031955359) + + W(3, -1114072538, 1067207252, -1085229664, 1016534394) + + W(4, -1103530014, 1062212336, -1089860607, 1044969554) + + W(5, -1134724696, -1104299500, -1088563377, -1137442922) + + W(6, -1102903742, 1049302591, -1097577389, 1035178328) + + W(7, 1046496384, 1054319858, -1094962569, -1108831509); + sum2 = W(0, 1041968914, 1051752153, -1123800846, -1117011966) + + W(1, 1026703145, -1115260470, 1035127537, 1031245794) + W(2, 1033601916, 1038625472, 1035458769, -1109678904) + + W(3, 1035338047, 1052354182, 1035609386, 1050054059) + + W(4, -1106207855, -1118891350, 1057116581, -1098815779) + + W(5, -1094112140, -1081023345, -1104080857, 1038926653) + + W(6, -1100978937, -1084581041, 1055976243, -1108176195) + + W(7, -1106593629, 1066756564, -1101750238, -1107244818); + WS(-1089881759, 1037957184); + sum1 = + W(0, 1000251530, -1098029886, -1105974859, 1041712355) + W(1, -1117869170, -1095515176, 1060224425, 1040628361) + + W(2, 1029072469, -1089055515, 1060210489, 1042449912) + W(3, 1031851625, -1079829205, 1061092856, -1119516507) + + W(4, 1024508279, -1094362449, 1060581840, -1102207965) + + W(5, -1105582638, -1105281331, 1056017638, -1107063314) + + W(6, -1121710869, -1112643769, 1040568609, -1112932479) + + W(7, -1105403363, -1102959900, 1050032516, -1124065669); + sum2 = W(0, 1051114274, -1102532150, -1093533301, 1042271071) + + W(1, -1105565679, -1114476854, 1038543175, -1112348507) + + W(2, 1055378846, -1082969540, -1075201485, -1087360189) + + W(3, -1098769061, 1064670328, 1077554590, -1100903801) + + W(4, -1106275827, -1102001010, 1048871628, -1115312306) + + W(5, -1101598109, 1044770567, 1040464379, 1031777408) + + W(6, -1109849433, -1114021751, -1111404687, -1125205988) + + W(7, -1104426610, 1049459491, 1049461049, -1104546886); + WS(-1089609215, -1090633405); + sum1 = W(0, -1095985139, 1060468726, 1029992257, -1106189340) + + W(1, -1131730646, -1114989785, 1035463235, -1121474072) + + W(2, -1106836118, -1111340153, 1053834481, -1109753904) + + W(3, -1120898567, -1083229835, 1057093887, -1170836447) + + W(4, -1122196780, -1097505195, 1060127651, -1099640750) + + W(5, -1108220230, -1091862496, 1056095953, -1106303115) + + W(6, -1113602180, -1118278055, 1053644551, -1102165833) + + W(7, -1111282801, -1094974325, 1060964715, -1114233933); + sum2 = W(0, -1102755055, 1028184991, 1014375557, 1018724927) + + W(1, -1123071369, 1018963304, 1034540123, -1154538439) + + W(2, -1101642458, 1042446539, -1110958917, 1030328833) + + W(3, -1121616776, 1048904011, 1055634924, -1104340897) + + W(4, -1123659230, 1022095962, -1122273513, -1123674010) + + W(5, 1034971308, -1102092583, 1047229966, -1103087688) + + W(6, -1114444448, -1115812270, 1006913192, -1118183380) + + W(7, 992820811, -1115600403, 1032293939, -1116477720); + WS(-1089497119, -1083970920); + sum1 = + W(0, 1049400081, 1017038202, -1096866095, -1129668560) + W(1, -1112985379, 1054102421, -1092440080, 1023489999) + + W(2, 1032734776, 1052273107, -1090492445, 1039999326) + W(3, 1046931016, 1050677101, -1077775563, 1047866250) + + W(4, 1032966898, 1056472798, -1095500286, 1031139121) + W(5, 1026703569, 1054012759, -1141821439, -1128248842) + + W(6, 1023567673, 1046736000, -1103091876, 1037611037) + + W(7, 1020804657, 1054561791, -1096856494, -1102316761); + sum2 = W(0, -1119432539, -1113150368, 1049552369, -1123950204) + + W(1, -1114702364, -1107904829, -1109676099, 1034167140) + + W(2, -1131454616, 1051795717, 1049548364, 1018171432) + W(3, 1045807900, 1065572002, 1052256292, 1024318084) + + W(4, -1107556037, -1104213250, 1058452060, -1104886632) + + W(5, -1138592655, -1096394963, -1077576443, -1103410805) + + W(6, -1120197723, 991774812, -1102019177, -1103678409) + + W(7, -1116354551, 991245404, 1049782966, -1123412946); + WS(-1092399743, 1070790531); + sum1 = + W(0, 1031557241, -1110110152, -1086332927, 1040307665) + W(1, 1024013147, -1123928476, -1104003948, 1029330819) + + W(2, 1045977841, -1100428029, -1091187884, 1054647477) + + W(3, 1049827334, -1093314535, -1110202001, 1048337215) + W(4, 1044712430, -1098211617, 1031718940, 1045074033) + + W(5, 1042539942, -1120318828, 1031171411, 1042256462) + W(6, 1022243624, -1110797385, -1118825252, 1034920639) + + W(7, 1042631871, -1103475236, 1047659449, 1041342974); + sum2 = W(0, 1035080118, 1049091174, 1068767241, 1056655092) + W(1, -1111978522, 1049791001, 1068692590, -1120502660) + + W(2, 1043189101, 1038894820, 1041587068, 1054889999) + + W(3, -1107203795, -1088677682, -1079138335, -1094007639) + + W(4, 1020874649, -1074681252, -1088844169, -1106617291) + + W(5, 1040978901, 1032440616, 1033618204, 1037089622) + + W(6, -1103963851, -1139760881, 1036797334, -1115116202) + + W(7, 1014904337, -1132242473, 1040720920, 1029103732); + WS(-1076899872, -1090262268); + sum1 = W(0, -1111775583, -1105212520, 1053805936, -1130865048) + + W(1, -1105515965, -1109220950, 1044900104, -1124779047) + + W(2, 1031134981, -1105276619, 1061229259, -1108606366) + + W(3, -1152866339, -1081197371, 1056451270, 1012899053) + + W(4, 1022185487, -1085735312, 1061180827, -1111684786) + + W(5, 1031461535, -1090100176, 1061600458, 1038272628) + W(6, 991487050, -1106571081, 1040836429, -1123743158) + + W(7, 1013082739, -1094472404, -1114201801, 1043636133); + sum2 = W(0, -1092758077, 1054372816, 1025999909, 1025560437) + W(1, -1099940688, 1036990346, 1044094629, 1015649794) + + W(2, -1096959860, -1102824961, -1093700170, -1134776466) + + W(3, -1093749782, 1072234824, 1074465248, -1118772240) + + W(4, 1047974360, -1091574668, -1086059593, -1097328702) + + W(5, -1120633714, -1098403340, -1083936129, 1035273338) + + W(6, 1036248767, -1097220109, 1045585843, -1098634116) + + W(7, -1113388587, 1024848209, -1093741871, 1051989609); + WS(-1097318719, -1106686758); + sum1 = + W(0, -1138034233, 1031803579, -1107190316, -1118470609) + W(1, 1022220430, -1105675680, -1110575515, 1013564641) + + W(2, -1115259277, 1060345356, -1091489993, 1028754310) + W(3, 1037878284, 1040095311, -1082557626, 1043380040) + + W(4, -1109043646, 1057210483, 1054616755, -1126688788) + + W(5, -1118564628, 1018511203, -1113047197, -1103180903) + + W(6, -1132126739, 1038086620, 1037558661, -1105313401) + + W(7, -1123355602, 1040876381, 1040895458, -1110366427); + sum2 = + W(0, -1116780480, 1011610145, -1126532250, 1031867194) + W(1, 1034654933, -1109786563, 1049246354, -1117436106) + + W(2, 1033980194, 1034290987, -1098441369, 1050453777) + W(3, -1106740681, 1058839838, 1055488745, -1098876833) + + W(4, 1037910476, -1112504437, -1081564854, -1101879397) + + W(5, -1110588743, 1048688059, -1109313096, -1105182695) + + W(6, 1033369701, -1100404580, 1046210019, 1033744368) + W(7, 1006481529, 1024874106, -1122526519, 1042001428); + WS(1061957727, 1058150789); + sum1 = + W(0, 1009728708, -1099288198, 1046412713, 1036440691) + W(1, 1031914819, -1097230757, 1055587995, -1109853493) + + W(2, 1043097483, -1087266183, 1056802398, -1106212891) + W(3, -1106863550, 1043273168, 1049133325, 1023154886) + + W(4, -1097372952, 1055416522, -1086687979, 1047286155) + + W(5, -1108191069, 1050026271, -1105368103, 1036106272) + + W(6, 1032793786, -1110590381, -1107437206, 1024106902) + + W(7, -1129266281, -1108247452, 1042988739, -1120124353); + sum2 = W(0, 1021676194, 1026925373, -1130821928, 1011207236) + + W(1, -1104415808, 1041631623, 1047340087, -1118374396) + W(2, 1041444786, 1049347642, 1055108729, 1032289888) + + W(3, -1104915444, -1075706495, -1078646873, -1097824639) + + W(4, 1042839409, 1068941883, 1058328878, 1041964936) + W(5, 1025561656, 1049874161, 1054741696, -1122940124) + + W(6, -1111488206, -1096918949, -1108156193, 1033049167) + + W(7, -1119893126, 1030080935, -1107876167, -1131526532); + WS(1058678303, 1013994144); + sum1 = + W(0, -1118673511, 1050426208, -1111022430, -1110204509) + W(1, -1145988558, 1032382213, 1019043839, -1116037137) + + W(2, -1107600786, 1059103555, -1086017695, 1043824660) + + W(3, -1097799452, 1061451838, -1090188381, -1111434373) + + W(4, 1050901309, -1098106458, 1050664020, -1123560275) + W(5, 1025019066, 1032049821, -1093177128, 1043538778) + + W(6, 1034380585, 1038038624, -1133222475, -1104489131) + + W(7, -1117490811, 1043957152, -1113568728, -1110765932); + sum2 = + W(0, -1105859137, 1052524883, -1116369180, 1025172812) + W(1, 1047608842, -1096529292, 1033298597, 1035180205) + + W(2, -1101390223, -1103682938, -1095014501, -1138831970) + + W(3, 1011548907, 1018027622, 1063707518, -1096368530) + W(4, -1098146078, 1039330573, 1056731707, -1094263126) + + W(5, 1047719826, -1101134187, -1086128756, 1061932077) + + W(6, -1099968186, 1054816538, 1037701277, -1095466983) + + W(7, 1028887915, -1105962981, -1150004284, 1016476277); + WS(1063038079, 1033861047); + sum1 = + W(0, -1111914898, 1016779545, -1107257338, -1114919788) + W(1, 1040884807, 1014390112, 1045397811, -1162045399) + + W(2, -1102681756, -1109833994, -1096677860, -1101855164) + + W(3, 1061986003, -1095449911, 1065846455, 1049909751) + + W(4, -1095429920, -1097838100, 1029008656, -1097193708) + + W(5, -1109559865, 1019923216, 1039008680, -1113534130) + W(6, 1034752240, -1148961393, 1033037520, 1007645095) + + W(7, -1104233906, 1009274926, 1029851870, -1103320918); + sum2 = W(0, -1112457849, -1117177515, 1031942991, -1106095669) + + W(1, -1106002020, 1017665294, 1033022206, 1041685053) + + W(2, -1103752624, -1098231154, -1133545004, -1095196752) + + W(3, -1114732591, 1062239338, 1061426536, 1046863079) + + W(4, -1102536580, -1097906884, -1109528685, -1105989872) + + W(5, 1029961226, -1101585142, 1032626487, -1120505375) + + W(6, -1114728817, 1037597321, -1114069131, 1028074076) + + W(7, 1038070251, -1112453861, 1032988844, -1124888510); + WS(1060691839, -1121680521); + sum1 = W(0, -1112633460, -1105323108, 1045243383, -1113361469) + + W(1, 1034282296, -1097943963, -1117038833, -1109479713) + + W(2, 1016470792, -1091137362, 1060606770, -1110311799) + + W(3, -1124091490, -1083253895, 1065759511, -1102522287) + + W(4, 1036530779, -1090987401, 1062311590, -1114082482) + + W(5, -1110508885, -1090137469, 1056765570, -1104777690) + + W(6, 1036991882, -1098291034, 1054157412, 1016563642) + + W(7, -1133939630, -1093525810, 1057599172, -1167556095); + sum2 = + W(0, -1097189010, 1057901747, -1086723052, -1086497310) + W(1, 1041454563, 1039698409, -1087530727, -1090377488) + + W(2, -1110771302, -1122301550, -1094374456, -1110538585) + + W(3, 1043926258, -1125642190, 1064818137, 1061014784) + W(4, -1109373654, -1108285138, 1036021291, 1034378960) + + W(5, -1109768355, 1020817735, -1109979249, 1044647130) + W(6, -1114304258, 1040279727, 1055924705, 1013465199) + + W(7, 1043669875, -1101494695, 1040712355, 1050381101); + WS(-1087684831, -1094525449); + sum1 = W(0, 1038107891, -1093934649, 1052573184, 1038198313) + + W(1, -1121734275, -1096975274, 1049163549, -1113930844) + + W(2, 1035814365, -1093954738, 1040479417, 1035537105) + + W(3, -1107604308, -1090018791, 1066601083, -1102439448) + + W(4, 1038052214, -1088075930, 1058232216, -1113118893) + + W(5, -1101928869, -1090123804, 1057618495, -1148589172) + + W(6, -1104767443, -1106254226, 1040796439, -1146582942) + + W(7, -1127021124, -1100235009, 1051666987, 1033628463); + sum2 = + W(0, -1102391770, 1038892506, -1109920182, 1034045130) + W(1, 1032495868, 1032348345, 1035913564, -1111661823) + + W(2, -1140349122, 1055934176, 1043439866, -1148365864) + W(3, 1051494072, 1066287204, 1049172795, 1035742233) + + W(4, -1080481573, 1070533091, 1048700512, 1024670194) + W(5, -1072283498, 1056179234, -1122911277, 1043482108) + + W(6, -1085993190, -1098172645, 1042548330, -1117726050) + + W(7, -1091499210, 1038613315, -1103404378, 1024343165); + WS(-1095407551, -1073547033); + sum1 = W(0, -1103915126, 1043640456, -1106314263, -1099126911) + + W(1, -1101976843, 1041775803, -1098551608, 1023142916) + + W(2, 1024335748, 1061599658, -1096990689, -1110220795) + + W(3, -1108044768, 1057524528, -1095708126, -1106562202) + + W(4, 1050893513, 1060862745, -1102020840, 1056016670) + + W(5, -1095324014, 1028413803, -1092797676, -1099653918) + + W(6, -1107189281, 1044362232, -1110398586, 1031215069) + + W(7, -1114975198, 1051352312, -1097969466, -1138223071); + sum2 = W(0, -1112168075, 1034632723, 1034960798, -1112860057) + + W(1, -1111493200, -1122920764, 1040740621, -1108202591) + + W(2, -1123529566, 1009053647, -1105159128, -1115675116) + + W(3, 1022286331, 1049936313, 1057592446, -1112708946) + + W(4, -1103466952, 1051909492, -1126339526, -1146770753) + + W(5, 1035493883, -1099948229, 1041517424, -1116950762) + + W(6, -1113566520, 1029419308, 1016655048, -1115594520) + + W(7, -1106265315, -1107051081, 1024151142, -1103163741); + WS(1063407871, 1051042354); + sum1 = W(0, 1005154604, 1049807310, -1113122997, -1107282310) + + W(1, -1120529264, 1052429035, -1100831547, 1042685852) + + W(2, -1106161584, 1057743574, -1088929544, -1113923042) + + W(3, -1130858780, 1063894449, -1086654500, -1129272494) + + W(4, -1114354154, 1058090310, -1089886688, -1164896285) + + W(5, -1127255711, 1052514789, -1092092086, -1131567786) + + W(6, 1023693521, 1044231916, -1106897472, -1131959253) + + W(7, -1115504868, 1049931918, -1097639884, -1115987013); + sum2 = W(0, -1113031572, 1043434467, -1088941888, -1067187434) + + W(1, 1030565628, -1108784254, 1056568736, -1079041185) + + W(2, -1127702936, -1111949688, 1036016830, 1074040966) + W(3, 992883874, 1036810375, 1060861120, 1075709893) + + W(4, 1014952720, -1111107284, 1023742716, -1099161149) + W(5, 990495554, 1042918247, -1098281783, 992814626) + + W(6, 1018238740, -1120169364, 1022041004, 1039733673) + + W(7, 1023316920, -1112993936, -1112517088, 1028892682); + WS(1053166591, -1107822593); + sum1 = W(0, 1033874440, -1120335240, -1105709561, 1042509273) + + W(1, 1022746982, 1031831786, -1108379508, 1008295657) + W(2, 1035703758, 1052124034, -1093762507, 1044631286) + + W(3, 1019810386, 1048792350, 1046826502, -1124953572) + + W(4, 1044260196, -1095582904, 1053612659, -1112928126) + + W(5, 1042374543, -1088629918, 1045805451, -1113071488) + + W(6, 1040749291, -1097950065, 1037463342, -1120529605) + + W(7, 1041374685, -1085974448, 1042845918, -1132054272); + sum2 = W(0, 1010990056, -1096561981, 1040308359, -1119485380) + + W(1, 1039106059, -1097601992, 1052717590, -1115258493) + + W(2, -1102786881, 1031463124, 1058176412, -1101721469) + + W(3, -1114641521, 1058004743, 1045709336, -1138490792) + + W(4, -1133768253, 1042505523, 1034355291, -1101570252) + + W(5, -1101476912, 1041182740, -1096768246, 1029590734) + + W(6, -1103607882, -1105959219, 1033275384, 1037679123) + + W(7, -1113060394, -1099775079, -1105804625, 1036565969); + WS(1028916214, 1067075549); + sum1 = + W(0, 1039584518, -1148995491, -1098323934, 1026160863) + W(1, -1119264713, -1109885152, -1096386895, 1022848653) + + W(2, 1016298993, 1055889168, -1096985498, -1150434165) + + W(3, -1105861885, 1067421167, -1085982162, 1034469007) + + W(4, -1113980813, 1059632007, -1087532161, 1044152506) + W(5, 1020015061, 1055091716, -1088331431, 1024761132) + + W(6, -1134777055, 1045130509, -1095878225, 1032543553) + + W(7, 1032661512, 1052375930, -1104015130, -1104676378); + sum2 = W(0, -1120016029, -1101422768, -1104653664, -1122344899) + + W(1, -1096222017, -1103630239, -1105545071, 1042131820) + + W(2, -1119382227, 1040689805, -1120927281, -1120071349) + + W(3, -1092715875, 1064582511, 1058022283, -1101606830) + + W(4, -1110818689, 1043406218, 1060808344, -1096987934) + + W(5, 1049876716, -1089499537, -1107109410, 1051590178) + + W(6, -1103484103, 1029271710, -1107008053, -1099435532) + + W(7, 1032841990, -1117522613, -1105419897, 1050342947); + WS(1047020030, 1040511430); + sum1 = + W(0, -1108637223, -1092384755, 1057603229, 1031927268) + W(1, 1033110649, -1090443530, 1053549631, -1122999800) + + W(2, 1044090139, -1085982557, 1061618217, 1026366340) + W(3, 1043424750, -1080046633, 1047924234, 1032840448) + + W(4, 1036624046, -1083957593, 1053999549, 1015929108) + W(5, -1120308481, -1096279270, 1057048888, 1020572028) + + W(6, 1033721201, -1149642107, 1057845673, 994286645) + W(7, -1106849700, -1100324533, 1052833857, 1039624063); + sum2 = W(0, 1035208568, -1124082321, -1093736918, 1048736861) + + W(1, 1043030610, 1008382386, 1050323039, -1118377912) + + W(2, -1107699159, -1107061123, -1114950423, -1106119918) + + W(3, 1044796248, 1049576136, 1074526989, -1111443567) + W(4, 1034655700, 1051573865, 1073198167, -1132389881) + + W(5, -1103668262, 1048541752, -1080621030, 1034733436) + + W(6, 1045164876, -1093669891, -1074078238, 1034120076) + + W(7, -1095424772, -1134934914, -1079139349, 1029501600); + WS(-1080108544, 1072234904); + sum1 = + W(0, -1115070415, 1027588151, 1031137458, 1018404227) + W(1, -1130610603, 1029085890, -1104252646, 1044293186) + + W(2, -1098588213, 1032775046, 1056995037, 1028038521) + W(3, 1057466749, -1089931562, -1083673968, 1050777268) + + W(4, -1104374087, -1123371019, 1052314097, -1093125043) + + W(5, 1029557602, -1094030554, 1050628415, 1046325824) + W(6, 1018553236, 1034779836, 1033643941, -1108340394) + + W(7, 1001112594, 999211181, 1036988180, 1020739741); + sum2 = W(0, -1115557063, 1043108929, -1138826120, -1108049897) + + W(1, -1118678156, 1033401680, -1093193803, -1109597407) + + W(2, 1045099863, 1036186051, -1079584214, 1013268663) + + W(3, -1102856438, -1124450766, 1068262877, 1049634970) + + W(4, 1035209289, -1112637977, -1121148137, 1046161245) + + W(5, 1035167397, -1114646959, 1041792105, -1111895568) + + W(6, -1117208818, 1012643139, -1138286224, -1127251156) + + W(7, -1120387027, -1132473655, 1032348301, -1116707942); + WS(1061770399, 1033097145); + sum1 = W(0, -1127803310, -1100493560, 1049484359, -1117956049) + + W(1, -1108446526, -1122290538, 1040952192, -1101349332) + + W(2, 1046619869, -1090144134, 1049567736, 1015706547) + + W(3, -1102069216, -1124504076, 1060283011, 1051006353) + + W(4, -1115166002, -1096124978, 1036170309, 1027971186) + + W(5, -1107462472, -1113245389, 1049188012, -1105896346) + + W(6, -1113123881, -1115437168, 1004781969, 1011773295) + + W(7, -1109525661, -1114380655, 1043620172, -1118021614); + sum2 = W(0, -1115090363, 1035607607, 1036976780, -1102631851) + + W(1, -1146121477, 1032878473, 1008730983, -1105868946) + + W(2, 1029869322, 1020296178, 1050702114, -1106613659) + W(3, 1039239074, -1094567498, 1060438996, 1056742871) + + W(4, -1123366919, -1133024223, -1092544833, 1056526109) + + W(5, -1123569769, -1111505684, -1088372125, -1103439045) + + W(6, -1115297931, 1036037468, -1119747923, -1114092237) + + W(7, 1022224092, 1025152664, 1015952550, -1109382916); + WS(1063262431, 1041246684); + sum1 = + W(0, -1095604910, -1094141452, 1054301899, 1050361575) + W(1, 1035931377, -1103734011, 1050116188, 1015464930) + + W(2, -1116408259, -1088213584, 1059695011, -1130477940) + + W(3, -1113418944, -1084116356, 1063045487, 1005018697) + + W(4, 1035543059, -1088057339, 1060587189, -1114430457) + + W(5, -1109446615, -1084926720, 1062081288, -1112900435) + W(6, 1041501409, -1095142868, 1055163174, 968128030) + + W(7, -1111045021, -1085134126, 1061043357, 1012274924); + sum2 = W(0, -1088652568, 1079171932, 1068650823, -1089634291) + + W(1, -1101936220, 1073764649, 1062997529, -1106083041) + + W(2, -1091191468, 1074855165, 1053027607, -1102140198) + + W(3, -1102444501, -1113430470, 1039600983, -1123235056) + + W(4, -1123800532, -1095064501, -1105207914, 1028735871) + + W(5, -1111069004, -1078839186, -1082346847, 1056073450) + + W(6, -1088813372, -1078450334, -1088354544, -1114344542) + + W(7, 1050243792, -1071519758, -1081653443, 1060600412); + WS(-1071343712, -1080188504); + sum1 = W(0, 1048814204, 1024112567, -1101821021, -1155239198) + + W(1, -1112944093, 992179242, -1105846854, -1115614373) + + W(2, 1038698214, 1035991718, -1110088095, -1117961066) + + W(3, -1110388460, 1065540811, -1086620536, 1022833410) + + W(4, -1102353903, 1062800155, -1085891530, 1037955557) + + W(5, -1113346827, 1057270438, -1092189183, -1130491710) + + W(6, -1115354285, 1047814390, -1094362793, 1026576596) + + W(7, -1172877482, 1055244391, -1096652125, 1021561214); + sum2 = W(0, 1077488778, 1057844910, -1110901413, -1157420388) + + W(1, 1067869852, 1017329883, -1123912240, 1016773013) + + W(2, -1080283979, -1112258962, 1048616910, -1127671945) + + W(3, -1070445594, 1038200220, -1103189448, 1027002057) + + W(4, -1085461181, 1045877800, 1033584054, -1139116556) + + W(5, -1113067149, 1016515143, -1113680120, -1108492850) + + W(6, -1135625536, 1029029881, 1006246833, 1033508697) + + W(7, -1105631704, 1026840239, 1028070058, -1122607860); + WS(-1114300667, -1123436789); + sum1 = + W(0, 1048745214, 1024457460, -1084727981, -1124831645) + W(1, -1106612992, 1049280824, -1099711469, -1120312678) + + W(2, 1040619730, 1061265254, -1091611130, 991636461) + W(3, -1101020989, 1066754197, -1090437274, -1115033343) + + W(4, -1120049178, 1060475010, -1087975654, 1037659252) + + W(5, -1129452308, 1056957204, -1087165059, 1036876438) + + W(6, -1112058252, 1052291799, -1095257281, 1024723233) + + W(7, 1040777596, 1051532558, -1091858685, -1128167880); + sum2 = + W(0, -1102675226, -1096018291, -1064870457, 1050758265) + W(1, 1045758589, 1033992894, -1078407710, -1091427154) + + W(2, -1106608364, 1053924217, 1074354229, 1044800412) + W(3, 1037856863, 1023354017, 1077566439, -1122690967) + + W(4, -1121700495, 1045521037, 1055978728, 1037435194) + + W(5, 1038509645, -1112080608, -1100523264, -1101170582) + W(6, 1041300787, 1019757091, 1032114149, 1048475585) + + W(7, 1013024414, -1111416466, -1096094745, 1031501486); + WS(-1081349952, -1073531246); + sum1 = + W(0, -1130396178, -1092471918, 1052933181, -1120779255) + W(1, -1123247001, -1094799967, 1040552427, 1032899791) + + W(2, 1049234666, -1087091570, 1062313536, 1048808240) + W(3, -1123595694, -1083589055, 1056428784, 1047688662) + + W(4, 1044575749, -1090515766, 1060166366, 1025076592) + + W(5, -1118849787, -1088301188, 1052569014, -1104574961) + + W(6, 1027091408, -1100895532, 956727337, -1114767697) + W(7, 1015989072, -1095518837, 1057005798, 1028829344); + sum2 = + W(0, 1036727690, -1101780564, 1047063866, -1095151348) + W(1, 1038605852, 1036224480, 1003722492, -1101329253) + + W(2, 1000461252, 1043313873, 1071364196, -1071443435) + W(3, -1108807283, 1058417373, 1075919102, -1072909475) + + W(4, -1118764382, -1110550387, 1031984760, -1137970650) + + W(5, 1040753555, 1041024731, 1052825370, -1094681513) + W(6, 1025474323, 1032765623, -1104266299, -1130737363) + + W(7, 1026410269, -1114514162, 1049040777, -1100563228); + WS(-1087415039, 1075227720); + sum1 = W(0, 1014202565, 1030200505, 1042588557, 1032642210) + W(1, -1121306305, -1093044977, 1045750086, 1039690838) + + W(2, -1115466670, 1052765481, -1099185023, -1099716881) + + W(3, 1055732347, -1086016434, -1102841730, 1054066516) + + W(4, -1088701572, -1108244135, 1060432915, -1089156724) + + W(5, 1045428619, 1044242725, 1001272287, 1045967960) + W(6, -1106855619, -1102530383, 1047811955, 994787170) + + W(7, -1113272762, 1042147022, 1043051755, -1114980876); + sum2 = W(0, 1034817173, 1033088277, -1105460751, -1119967238) + + W(1, -1106843150, -1115275500, 1042281307, -1122100142) + + W(2, 1035283571, -1108330169, -1117933953, -1118763227) + + W(3, -1117324330, 1059951304, 1055263105, 1033190213) + + W(4, -1106783723, -1114610359, 1036736577, -1099584511) + + W(5, 1033652183, -1130129964, 1034893511, -1097591398) + + W(6, -1105003316, -1100855920, 1040893249, 1043864443) + + W(7, -1109219720, 1017452836, 1026860110, -1097701018); + WS(1061885343, -1093433134); + sum1 = W(0, -1119074171, -1116612499, 1043937396, 1026065358) + + W(1, 1040591342, -1106037336, 1035985530, 1035233146) + W(2, 1024365415, -1090085717, 1054530437, 1023445318) + + W(3, -1112185530, -1110885309, -1105284753, 1041032751) + + W(4, -1102861510, 1058148680, -1085809524, 1043298226) + + W(5, 1047951017, -1098920364, 1042740717, 1028431483) + + W(6, 1033079635, -1104145445, -1143903027, 1030877520) + + W(7, 1030794670, -1102965524, 1048819605, -1110782149); + sum2 = W(0, -1120425303, -1115506247, -1089905659, -1106986872) + + W(1, 1041777983, -1109249837, -1082926483, -1093100415) + + W(2, 1008642595, 1051302633, 976609774, -1100742938) + W(3, 1038578170, 1062460419, 1062992305, -1121360027) + + W(4, -1103931105, 1046700430, 1016533657, 1026171953) + + W(5, -1144970974, -1110379832, 1046952726, -1123505732) + + W(6, -1139607395, -1121481149, -1128426413, 1047068010) + + W(7, -1105571910, -1125592805, 1044012895, -1108759193); + WS(1057387711, 1071476886); + sum1 = W(0, 1029774760, -1088145158, 1058722665, 1026644842) + + W(1, 1046876472, -1090879061, 1046940920, -1124024690) + + W(2, 1049447728, -1087750555, 1062289140, 1034805113) + + W(3, -1104814472, -1081209882, 1060559271, -1101227374) + + W(4, 1047761369, -1102041597, 1057163193, 1041428045) + + W(5, -1107998911, -1088704948, 1046238393, -1112057972) + + W(6, 1035672969, -1101378696, 1049034574, -1123650077) + + W(7, 1025453339, -1090772220, 1051154538, 1045125730); + sum2 = + W(0, -1079574021, 1066168402, -1106633910, 1033041398) + W(1, -1076424748, 1072246945, 1039775524, 1025560740) + + W(2, -1084651933, 1055648588, -1122282375, 1042767844) + + W(3, -1079383446, 1069722149, 1050613020, -1122422479) + + W(4, -1094010079, -1105729737, -1112550614, 1029478726) + + W(5, -1105513599, 1052366950, 1046757765, 1029747528) + W(6, -1094989946, 1046266897, 1041413657, -1132333344) + + W(7, -1083946107, 1064021500, -1117589808, -1110924549); + WS(-1088291167, -1076464582); + sum1 = W(0, 1038365083, 1048475643, -1115380053, -1113517956) + + W(1, 1013092187, 1039614653, -1115741870, -1118168192) + + W(2, 1049005176, -1105893787, -1115211424, 1038779144) + + W(3, -1096083994, -1090164821, -1106465144, -1103584918) + + W(4, 1046666973, 1048630844, -1092124773, 1051487172) + W(5, 1029385568, 1052597003, -1097273638, 1036949051) + + W(6, -1130846211, 1030762351, -1104199584, 1029432699) + + W(7, 1035921309, 1049653618, -1098780292, 1041505439); + sum2 = W(0, -1102442432, -1119046775, 1038119755, -1116688635) + + W(1, 1042447103, -1103497340, -1117033259, -1109692823) + + W(2, 1009200726, -1108371687, -1111757198, -1136365839) + + W(3, -1140140535, 1063726871, 1053430514, -1103585210) + + W(4, 1018235219, 1023996549, -1097582390, 1035998106) + + W(5, 1038566242, -1100556235, -1130738637, -1122627505) + + W(6, -1105836948, -1134905751, 1027482933, -1113769016) + + W(7, -1119461411, -1115426548, 1038939472, -1131103707); + WS(1063535295, -1106643391); + sum1 = W(0, 998897947, 1049160779, -1095715331, 1022592892) + W(1, -1114069468, 1050551697, -1114097198, 1035333911) + + W(2, 1030590255, 1037435628, -1091439068, -1105875754) + + W(3, -1120649212, 1059662980, -1098500149, 1036951763) + + W(4, -1112623985, 1051258674, -1093374928, -1097348874) + + W(5, -1135800742, 1051146291, -1112852607, 1021555078) + + W(6, -1126057998, 1042046535, -1104302276, 1032877804) + + W(7, -1118923687, 1051522915, -1098602138, -1118947123); + sum2 = + W(0, 1020791040, 1023004440, -1131522740, -1130592194) + W(1, -1119848331, 1035188360, 1010552055, -1114014595) + + W(2, 1028098574, 1036866165, -1087402238, 1057774258) + W(3, 1024075228, -1081188844, -1063807577, 1085749988) + + W(4, 1036335573, 1036267331, -1074297638, 1072260443) + W(5, -1114214294, 1020492762, 1044255896, -1101071935) + + W(6, 1028067710, 1026996488, -1109527927, 1041502854) + + W(7, -1123998545, 1028180222, -1125487816, -1121770211); + WS(1065370528, -1140007425); + sum1 = W(0, -1119191691, -1089109360, 1051697742, 1048704645) + + W(1, -1123119348, -1093467562, 1051845168, -1120571445) + + W(2, 1043193134, -1104035088, 1049231373, 1047054964) + + W(3, -1144777506, -1082641845, 1058881533, -1128753754) + + W(4, 1048874737, -1090665335, 1060727795, -1122636375) + + W(5, -1114692372, -1085597050, 1053776045, -1135014444) + + W(6, 1047204584, -1097312497, 1050811579, 962936920) + W(7, 1028382351, -1084438004, 1054559607, 1047044271); + sum2 = W(0, 1072219394, -1078126224, -1093298188, 1011861728) + + W(1, 1068804680, -1079031251, -1110022070, 1040923692) + + W(2, 1068134846, -1078951161, -1084926454, -1105857084) + + W(3, 1066171639, -1087436811, -1107767815, -1121892273) + + W(4, -1095987744, 1039512383, 1055586357, 1026590003) + W(5, -1080918851, 1067164113, 1031793821, 1040751494) + + W(6, -1077110574, 1069504242, 1041030661, -1123439499) + + W(7, -1071545857, 1075315134, 1055897321, -1109554506); + WS(-1089696543, -1143233957); + sum1 = + W(0, -1115482013, -1129127938, 1041381507, -1122467892) + W(1, 1045323653, -1098483714, -1110243009, 1049605572) + + W(2, -1096516931, 1045383939, 1059582003, -1090424421) + + W(3, 1035013745, -1091150045, -1106321908, 1043667267) + + W(4, -1162367211, 1043411549, 1034020454, -1109629626) + W(5, 1054106403, -1086835476, 1026676964, 1048548650) + + W(6, -1107204469, 1048447553, 1034191370, -1105600748) + + W(7, 1036179510, -1103773800, -1159158363, 1035568493); + sum2 = W(0, -1134008573, 1032064725, -1117795542, -1113496357) + + W(1, -1125596513, -1121806942, 1035355315, 998755498) + W(2, 1011008945, 1045441656, 1024667789, -1119910472) + + W(3, 1051842604, 1065460002, -1089961367, 1041632626) + + W(4, -1125276577, -1114964420, -1081892598, 1035390555) + + W(5, -1118729604, -1130705389, 1026896379, 1032979221) + + W(6, -1111959837, -1121883022, 1020053498, -1129745921) + + W(7, -1117720498, 1034360078, 1023648883, -1115672512); + WS(1065278079, -1120402802); + sum1 = + W(0, 1031924074, 1052390115, -1097070063, -1116671142) + W(1, -1115548942, 1049303261, -1099757447, -1118597165) + + W(2, -1106896516, 1045251552, 1040981136, 1041336272) + W(3, -1112658654, 1060948388, -1081623390, 1043338841) + + W(4, -1103991244, 1062622505, -1085365853, -1145407604) + + W(5, -1121670257, 1054395391, -1096062708, 1032773481) + + W(6, -1115583119, 1045812503, -1101077740, -1123616408) + + W(7, 1031270934, 1048618059, -1099232531, -1127437558); + sum2 = + W(0, -1114021356, -1110767736, -1100377718, 1034493514) + W(1, 1032565152, -1122384971, 1041858293, 1019779384) + + W(2, 1056835748, -1085457975, -1085697040, 1016590225) + + W(3, -1085160508, 1068814369, 1060746614, -1097525969) + W(4, 1058685385, -1085213012, 1057245366, 1043875459) + + W(5, -1086618535, 1063574785, -1094120608, -1097974912) + + W(6, 1047242231, -1144776155, -1093482365, 1054776673) + + W(7, 1032715041, -1097434793, 1056356490, -1096653037); + WS(1055688959, -1109584743); + + return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); +} + +shared float inp[429]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { temp[pos] = (value); } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 11 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 429; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 11, y = (uint)id % 11; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (3)) + 0.5, float(group_base.y + y - (1)) + 0.5)).x; + } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[8]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 11]; + samples[1][1] = inp[local_pos + 12]; + samples[1][2] = inp[local_pos + 13]; + samples[1][3] = inp[local_pos + 14]; + samples[2][0] = inp[local_pos + 22]; + samples[2][1] = inp[local_pos + 23]; + samples[2][2] = inp[local_pos + 24]; + samples[2][3] = inp[local_pos + 25]; + samples[3][0] = inp[local_pos + 33]; + samples[3][1] = inp[local_pos + 34]; + samples[3][2] = inp[local_pos + 35]; + samples[3][3] = inp[local_pos + 36]; + samples[4][0] = inp[local_pos + 44]; + samples[4][1] = inp[local_pos + 45]; + samples[4][2] = inp[local_pos + 46]; + samples[4][3] = inp[local_pos + 47]; + samples[5][0] = inp[local_pos + 55]; + samples[5][1] = inp[local_pos + 56]; + samples[5][2] = inp[local_pos + 57]; + samples[5][3] = inp[local_pos + 58]; + samples[6][0] = inp[local_pos + 66]; + samples[6][1] = inp[local_pos + 67]; + samples[6][2] = inp[local_pos + 68]; + samples[6][3] = inp[local_pos + 69]; + samples[7][0] = inp[local_pos + 77]; + samples[7][1] = inp[local_pos + 78]; + samples[7][2] = inp[local_pos + 79]; + samples[7][3] = inp[local_pos + 80]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 34]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2) + ivec2(0, 1), ret); +} +//!PASS 2 +//!DESC NNEDI3 (double_x, nns256, win8x4) +//!IN INPUT, temp +//!OUT OUTPUT +//!BLOCK_SIZE 64, 8 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[8]) { + float sum = 0.0, sumsq = 0.0; + [unroll] for (int i = 0; i < 8; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); + sumsq += dot(samples[i], samples[i]); + } + float mstd0 = sum / 32.0; + float mstd1 = sumsq / 32.0 - mstd0 * mstd0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); + mstd1 *= mstd2; + float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = W(0, 1024871952, 1026430476, 1041094851, 992987016) + W(1, 1034372511, 1007997243, 1038248733, 1009484684) + + W(2, -1091961907, -1103990249, -1086639391, -1085365048) + + W(3, -1087585419, -1093414958, -1100793414, -1099588502) + + W(4, 1049937838, 1052012307, 1057524239, 1063527595) + W(5, 1061081007, 1057188600, -1104832955, -1114088341) + + W(6, 1023328805, 994719130, -1110763811, -1102193240) + + W(7, 1046466539, -1131716095, 1042167685, 1037858632); + sum2 = W(0, -1122170202, -1117362461, -1125932452, -1100385246) + + W(1, 1020071642, 1025043913, 1017078274, 1018043750) + W(2, -1115075619, 1050294809, 1041506545, -1100233118) + + W(3, -1100362988, 1047005177, 1012082941, -1112302377) + + W(4, -1113824159, 1022334174, -1098709995, -1104291570) + + W(5, 1072797337, 1053652830, -1100425628, -1155649364) + W(6, 1016746290, 1016781702, 1031697879, 1033455014) + + W(7, 1050444141, -1106922881, -1081909907, -1086585813); + WS(-1096195455, 1061976972); + sum1 = W(0, -1107583849, 1045016510, -1102048399, 1050507709) + + W(1, -1138600128, 1004076226, 1049677870, -1106918697) + + W(2, -1094469481, -1104061589, -1082981042, -1086991635) + + W(3, -1087760032, -1088999402, -1111975782, -1089081125) + + W(4, 1057261454, 1051975353, 1058403546, 1061386858) + W(5, 1052980216, 1052828202, 1055128500, 1048525839) + + W(6, 1032308806, 1046697387, -1093320254, 1051876218) + + W(7, -1106246183, -1112945129, 1045880594, -1116677906); + sum2 = W(0, 976892690, 1043646796, -1131052969, 1047930742) + + W(1, -1099144563, 1035354913, -1115191227, -1111106463) + + W(2, -1095110912, -1114450783, 1052886335, -1113296119) + + W(3, -1088067101, 1040578653, -1102673565, 1051335047) + + W(4, -1080663089, -1074060607, -1068434194, 1048011859) + + W(5, 1042920440, 1058047003, 1057110709, 1038060178) + W(6, 1075827337, 1075290035, 1070007941, 1046007784) + + W(7, -1090506241, -1099371585, -1096675048, -1096378099); + WS(-1095930303, -1128843438); + sum1 = W(0, -1113225514, 1030842449, -1120882464, -1111398934) + + W(1, -1098488134, -1098728413, -1111237296, -1104159890) + + W(2, 1000563038, -1103461662, -1098779087, -1156245815) + + W(3, 1057689814, 1058114717, 1041725787, 1046074787) + W(4, 1038763268, 1052682210, 1055587560, 1046999855) + + W(5, -1090605975, -1106232348, -1106563184, -1119168797) + + W(6, -1104281110, -1105449143, -1106219642, -1105879965) + + W(7, 1039743648, 1030902068, -1111582036, 1033984854); + sum2 = + W(0, -1109947141, -1087141194, 1007133897, -1099413759) + W(1, 1026690396, 1045509921, -1117657274, 1051942463) + + W(2, -1078961084, -1081851502, 1057831088, 1067440534) + W(3, -1095618356, 1022398658, 1029712906, 1049871690) + + W(4, 1053833634, -1102894339, 1046573470, 1058568743) + + W(5, 1058025266, -1115002147, -1109832065, -1099467737) + + W(6, 1049299145, 1040634467, 1035108031, -1114324751) + W(7, -1094447449, 1023241606, 987652806, -1104714655); + WS(1018627052, -1077328537); + sum1 = W(0, -1107592280, 1011043872, -1138466445, 1041422472) + + W(1, -1112294435, 1012707679, -1150731309, -1118891707) + + W(2, -1098360952, -1103889731, -1088654431, -1079526160) + + W(3, -1109020730, -1095389295, -1102056896, -1101414784) + + W(4, 1054425801, 1045838144, 1063939041, 1057394281) + W(5, 1061143403, 1057286118, 1040465365, 1042544156) + + W(6, -1121256842, 1016847523, -1114759963, -1109966789) + + W(7, -1115564074, -1117689671, -1133534770, 1028492087); + sum2 = W(0, -1161717665, -1138679732, -1121504639, 1050854432) + + W(1, -1106511096, 1030373815, 980628162, 1031200951) + W(2, 1033867626, -1115218580, 1030621655, -1096361601) + + W(3, 1044499063, -1104067382, 1040061476, -1122098343) + + W(4, 1028201267, -1096378423, 1071708903, 1079482342) + + W(5, -1064184100, -1089118805, 1048273255, -1109436715) + + W(6, -1124228530, 1029766795, -1128256742, 1035298280) + + W(7, 1035336882, -1105361644, -1119150405, 1035049634); + WS(1045693694, 1057374369); + sum1 = W(0, 1048369598, -1104752283, 1031950009, -1123958928) + + W(1, -1103831441, -1106189273, -1103850789, -1110300085) + + W(2, 1034312519, 1045853799, 1057046586, 1065585622) + W(3, 1061666460, 1060939321, 1050689810, 1060417841) + + W(4, -1131023893, -1098373234, -1095062180, -1089164628) + + W(5, -1092432965, -1087345809, -1092965100, -1091010256) + + W(6, -1099588978, 1019902402, -1105510198, 1033283356) + + W(7, -1115031649, -1122647270, -1126666680, -1103574093); + sum2 = + W(0, -1087427905, 1058683530, -1081734921, 1050776004) + W(1, 1042479954, 1025544982, -1105492903, 1058740427) + + W(2, -1098709872, -1099141199, 1032637312, 1072181481) + + W(3, 1036599206, -1080357543, -1113642582, -1086527647) + + W(4, 1072153031, -1088885778, 1061509476, 1071890032) + W(5, 1052304550, -1080881103, 1050862866, -1078556244) + + W(6, -1089481634, 1056590706, -1092495010, 1048814690) + + W(7, -1095810808, -1097308566, -1094462248, 1039671439); + WS(-1081549664, 1060153996); + sum1 = W(0, 992286043, -1115318586, -1105782456, 1026471870) + + W(1, -1114468402, 1037910210, -1130210466, 1043803882) + W(2, 1063391104, 1055271797, 1062019397, 1061503832) + + W(3, 1058772501, 1045594046, 1049908468, 1040755007) + + W(4, -1087586274, -1095899708, -1085498120, -1089250259) + + W(5, -1090055577, -1102496824, -1103786988, -1104619012) + + W(6, -1105647870, 1025423024, 1033921889, -1113405924) + + W(7, -1113674825, -1106446561, -1118412923, -1096097592); + sum2 = W(0, 1053790549, -1110257416, 1033688102, -1102619530) + + W(1, -1117483359, -1098152061, -1105743952, -1083553679) + + W(2, -1080860351, -1097530578, 1041061421, 1064966059) + W(3, 1050344847, 1040479494, 1051151965, 1065962098) + + W(4, -1068185653, -1077346152, 1052310674, 1072556372) + + W(5, 1075004369, 1064749402, -1110156824, 1045213317) + + W(6, -1082483044, -1088866791, 1052658871, -1104178634) + + W(7, -1108410079, 1040931499, -1137891932, 1053124980); + WS(-1079020096, -1097079011); + sum1 = W(0, -1109501731, 1033910555, 1015347636, -1108027795) + + W(1, 1042979875, 1019929124, 1040369234, -1134559750) + + W(2, -1096742621, -1092191510, -1089091160, -1087072713) + + W(3, -1087558450, -1089417126, -1104831498, -1091818621) + + W(4, 1057943754, 1055662529, 1059647341, 1068092504) + W(5, 1059824411, -1117499817, 1043480574, 1050143023) + + W(6, 1016000225, 1020967408, -1102027816, -1109882648) + + W(7, -1106889398, -1117366414, -1104827345, 1049289276); + sum2 = W(0, 1037631964, -1105851814, 1036652885, -1110757790) + + W(1, 1024508007, -1139201564, -1113225064, -1104657620) + + W(2, -1097479797, 1051380185, 1028410699, 1044594400) + W(3, 1041068006, 1046036650, 1054960427, -1105334641) + + W(4, 1051023588, -1097950410, 1034374804, 1053237761) + + W(5, -1132209182, -1088293598, -1081781977, 1065714224) + + W(6, 1045348435, 1028003104, -1113269608, 1041832368) + + W(7, 1033918380, -1089693526, -1099925151, -1106359824); + WS(-1087384991, 1052875812); + sum1 = W(0, 1041733252, -1138647561, 1034391503, -1106477875) + + W(1, -1100509868, -1098694249, -1110959830, -1120730421) + + W(2, -1109905397, -1104994285, -1107522048, 1057597757) + + W(3, 1062613266, 1055828187, 1052485297, 1044647290) + W(4, 1041497057, 1032882549, 1050093354, -1091247383) + + W(5, -1087942224, -1090201627, -1098203032, -1100016695) + + W(6, -1113613058, -1117825756, -1108789794, 1028766931) + + W(7, 1042722079, 1013355333, 1034047139, 1038812681); + sum2 = W(0, 1019350107, -1111428867, 1017374087, -1112005123) + W(1, 1028700445, -1112772903, 1027823157, 999439052) + + W(2, 1041831066, -1130655130, -1138336400, 1035402260) + + W(3, -1116945124, 1041636268, -1107238338, 1033528252) + + W(4, 1051650697, -1112274143, -1092155273, -1114034096) + + W(5, 1045953881, 1041190902, 1049171133, 1027833254) + + W(6, -1103332568, -1077765094, -1073481211, -1087514353) + + W(7, 1069095754, 1074086485, 1057277153, -1107727987); + WS(1050457727, 1041689780); + sum1 = W(0, -1131312139, -1116681402, 1030961105, -1140959985) + + W(1, 1046496193, 1048638526, 1007270019, 1049181764) + W(2, 992358355, 1014710691, 1040368912, -1105027369) + + W(3, -1089290871, -1090315138, -1098770776, -1086147588) + + W(4, -1092744895, 1027381214, -1089348299, 1050737138) + W(5, 1038810352, 1055368676, 1040416748, 1051301702) + + W(6, 1048008788, 1036450832, 1053966377, 933247243) + W(7, 1043432004, -1119062181, 1023583738, 1037141075); + sum2 = W(0, -1107193278, 1019433289, 1039188525, -1123892633) + + W(1, 1039851328, 1032631611, 1044641782, -1097026795) + W(2, 1057496780, 1031729137, 1037283932, -1086728254) + + W(3, -1087309268, -1092570464, 1035754242, 1063903066) + + W(4, 1072935738, -1080020989, -1078950448, -1087453268) + + W(5, 1051352061, 1059438018, 1051006721, 1056300653) + W(6, -1080570956, 1054527180, -1096917141, 1050123895) + + W(7, 1053948249, 1042769096, 1029707984, -1106545458); + WS(-1084263519, -1088250567); + sum1 = W(0, -1107101246, 1049480649, -1097447514, 1049647488) + + W(1, -1099235545, 1018612505, -1103944776, 1032447885) + + W(2, 1048954180, -1096698757, 1059331526, -1119095878) + W(3, 1040553109, 1041330418, 1040723088, 1023387386) + + W(4, -1118116730, 1027018719, -1108150329, -1090501921) + + W(5, 1044929028, -1094468036, 1047463532, -1106285556) + + W(6, -1111017296, 1043604572, -1098833704, 1050343697) + + W(7, -1096264890, 1044030041, -1121471547, -1175819022); + sum2 = W(0, 991719940, 1034086314, 1055828179, 1034571486) + W(1, 1049088345, -1098363481, -1111430799, -1111365517) + + W(2, -1157381748, -1121095386, -1103829652, -1100672658) + + W(3, -1090417467, 1050795943, -1113801830, 1037803588) + + W(4, -1100952734, 1027786887, -1085350121, -1118546758) + + W(5, 1049271738, -1140682323, 1035813474, -1164121191) + + W(6, 1029794653, 1016292894, 1067178121, -1098007098) + + W(7, 1008607005, -1110134488, -1115483467, -1133015817); + WS(1066054400, -1125664425); + sum1 = W(0, 1015260777, -1108545200, 1023624464, -1109354663) + + W(1, 1023465234, -1108921531, -1116445024, -1128865616) + + W(2, 1054574294, 1053284635, 1059735702, 1058150961) + W(3, 1058102002, 1063397581, 1046571067, 1046728251) + + W(4, -1095241461, -1094415406, -1088764634, -1078845388) + + W(5, -1090401103, -1105651068, -1116790272, -1104354311) + + W(6, -1127045294, -1111343263, 1041542629, -1130514988) + + W(7, 1035892576, -1136829776, 1030749305, -1106930027); + sum2 = + W(0, 1041565398, -1103469615, 1029690269, 1035794320) + W(1, 1035452008, -1107146771, -1124704946, 1040850628) + + W(2, -1104697837, 1029373537, 1057447179, 1080145714) + + W(3, -1103623095, -1066097425, -1097535851, -1103109309) + + W(4, -1115419200, 1031260861, 1026403525, 1043319440) + W(5, 1050811672, -1103531154, -1090176537, 1055196268) + + W(6, -1158883614, 1035036640, 1034486240, 1041655252) + + W(7, -1114789346, -1114794272, 1036709484, -1102936938); + WS(-1087901375, 1071177135); + sum1 = W(0, -1121594272, -1122618691, -1114434254, 1044010486) + + W(1, -1113833999, -1108159437, 997526195, -1123472883) + + W(2, -1110224028, -1109965218, -1120206908, 1010935165) + + W(3, -1108541318, 1041417229, -1109330685, 1039497451) + + W(4, 1030219348, 1047787782, -1114914192, -1113546820) + W(5, 1046030645, 1035224125, 1017890711, 1022061139) + + W(6, 994425384, 1027034090, -1106192913, 1040356168) + + W(7, -1112701798, -1130438655, -1151173965, -1125745551); + sum2 = W(0, 1042217679, -1108814485, 1058334461, -1080767453) + + W(1, 1050991857, 1030602271, 1034906705, -1127683613) + + W(2, -1116367239, 1042859710, -1078638996, 1073929687) + + W(3, -1092080339, -1116721722, -1106162437, 1035114111) + + W(4, 1026421701, 1027395369, 1028343990, -1097310248) + W(5, 1042808122, 1051491707, 1024141024, -1112071525) + + W(6, -1140328609, -1114630403, -1105295199, 1028261520) + + W(7, -1122297114, -1105219597, 997359482, 1017171678); + WS(1066545696, -1154623394); + sum1 = W(0, -1105442505, -1115505461, -1102287846, 1023914903) + + W(1, 1021900696, 1016559928, -1142506442, -1129232035) + + W(2, -1114054203, -1098488296, -1092352928, -1089108436) + + W(3, -1087887754, -1096061654, -1097827287, -1095543703) + + W(4, 1057182265, 1049322687, 1062190966, 1059635472) + W(5, 1057132007, 1043675118, 1048482034, 1055016957) + + W(6, -1123432545, 1010507424, -1097309632, 1045294808) + + W(7, -1098559779, 1042399967, -1110816589, 1026256390); + sum2 = + W(0, -1094410020, -1073520568, -1076806727, 1007932024) + W(1, -1129053184, -1126774165, 1016297320, 1026440100) + + W(2, 1040526244, -1119660436, 1072065972, 1074284701) + W(3, 1045336233, -1114285571, 1027498382, -1109537500) + + W(4, 1038890507, -1105577163, 1038803075, -1096715819) + + W(5, 1048625414, -1106066396, 1035667090, -1128514892) + + W(6, -1120085021, 1014274624, -1119386280, 1032586275) + + W(7, -1148623280, 1033069015, -1113847206, 1028560484); + WS(1055250687, -1106970626); + sum1 = W(0, 1033119688, -1105149829, 1043588463, -1098690473) + + W(1, 1051357302, -1111820283, 1036109552, -1119401059) + W(2, 1042190962, 1055784922, 1039447718, 1060328969) + + W(3, -1105877415, 1050469874, -1123255849, 1044861546) + + W(4, -1096546392, -1103230410, -1093610964, -1094495495) + + W(5, -1104321127, -1102978667, 1034127854, -1097968467) + + W(6, -1117586055, -1125652468, 1035816565, -1103963220) + + W(7, 1047255951, -1108600424, -1157028943, 1018776031); + sum2 = W(0, 1038757267, -1111076574, 1036628998, -1107631850) + + W(1, 1045867530, -1126199158, 1034641438, 1014361489) + + W(2, 1030211853, -1147844455, 1002666567, -1080902130) + + W(3, -1089131777, 1067727291, 1050167793, -1114432338) + + W(4, 1033894636, 1039985174, -1092985165, -1084645521) + + W(5, 1069738325, -1096580382, -1100096489, -1100261485) + + W(6, -1133782171, -1117005539, -1150866734, 1049641986) + + W(7, 1051354211, -1100550183, -1104598763, 1033251244); + WS(1063662431, 1022143153); + sum1 = + W(0, -1101665898, 1044567949, -1113912242, 1054985008) + W(1, -1089448239, 1031890901, -1106608329, 1016341751) + + W(2, 1028101088, -1100200043, -1110575390, -1090770105) + W(3, 1042239925, 1034205190, 1041117720, 1032506575) + + W(4, 1045825111, -1112112432, 1011332621, 1036354680) + W(5, 1053092900, 1020582213, -1122250931, 1037368233) + + W(6, -1107336877, 1049135824, -1117269540, 1043762234) + + W(7, -1090665684, 1045380738, -1108114694, 1027635960); + sum2 = W(0, -1109621945, 1044140338, 1011973298, 1054192939) + W(1, -1083519810, 1050669158, 1015542539, 1007706050) + + W(2, 995625223, -1125871763, 1059194369, -1085775903) + + W(3, -1085690186, 1055305067, -1115586758, 1024423563) + + W(4, 1033690799, -1131300267, 1019603073, -1105557074) + + W(5, 1066625604, -1093331249, 1025625789, -1163078190) + + W(6, -1127273987, 1031533647, -1100093028, 1049649037) + + W(7, -1099380904, 1041330252, -1109873113, 1021669909); + WS(1065395904, 1046290614); + sum1 = W(0, 996799560, 1015596034, -1100668872, -1100927819) + W(1, 1043886221, 1025322396, 1035443240, -1114934548) + + W(2, -1125542553, 1043656950, 1056396433, 1054862894) + + W(3, -1088911438, -1099487682, -1105531411, -1117673547) + + W(4, 1038203972, -1105153011, -1085640859, -1101762943) + + W(5, 1057024083, 1036429768, 1021072448, 1035195023) + W(6, -1122586727, 1036793959, 1049715632, 1035554923) + + W(7, -1115152096, -1122121748, 1039652277, 1024299964); + sum2 = + W(0, 1029237671, -1133651707, -1114304490, -1114162505) + W(1, -1125875827, -1112015576, 1015895733, 1035088799) + + W(2, 1017509749, -1140172751, 1030543605, -1092617066) + W(3, 1051105475, 1033357967, 1011455075, -1105982664) + + W(4, -1104176053, 1038596873, 1050264307, -1093179067) + + W(5, 1071302855, -1090684841, -1100815852, 1003487558) + W(6, 1032328729, 1037378915, -1104642832, 1049979232) + + W(7, -1110244360, -1085540383, -1135016203, 1015899853); + WS(1058730335, 1038095077); + sum1 = W(0, 1050009523, -1156396202, 1045232946, -1123440788) + W(1, 994853272, 1029074928, -1116532263, 1016043038) + + W(2, -1090412329, 1022785342, 990514519, 1051849503) + W(3, 1050259497, 1048747736, 1040337077, 1047238893) + + W(4, -1116071407, -1112028282, 1027669243, 1025171300) + + W(5, -1091444451, -1091116387, -1095024325, -1085293672) + + W(6, -1120619710, 1033123659, -1138222105, 1042856679) + + W(7, 1043400402, 1046148791, 1031722640, 1050565349); + sum2 = + W(0, 1046272294, -1119023958, -1106071503, -1097641458) + + W(1, -1099339001, -1122591395, -1120322169, -1112624405) + + W(2, 1056160584, 1047372879, 1066704996, 1062851774) + W(3, -1127558738, -1095719050, 1012197993, -1094334166) + + W(4, -1069609928, -1100807597, 1063602974, 1071319304) + W(5, -1123458905, 1049925865, 1037702687, 1046841945) + + W(6, -1086350256, 1012350161, -1093103672, -1165704932) + + W(7, -1099266547, 1039176837, -1104052164, 1043315186); + WS(-1102208382, 1082454872); + sum1 = W(0, -1121527792, 1019782131, -1111677734, 1042764822) + + W(1, -1098291428, -1109244559, -1122839755, 1023050790) + + W(2, -1097489215, -1109352802, -1089475261, -1094323164) + + W(3, 1057233791, 1055571354, 1036609181, 1023873367) + W(4, 1044710709, 1043005718, 1059136208, -1113184692) + + W(5, -1102862820, -1113821821, 1016630678, -1122283881) + + W(6, 1024559218, 1025756170, -1098383546, 1034230961) + + W(7, -1121795355, 1034138424, -1148370726, 1021065311); + sum2 = W(0, 1012975921, 1033684352, -1119837718, 1048900749) + + W(1, -1088385914, -1106132930, -1107812043, 1047279942) + + W(2, -1113722375, -1113902527, 1034379158, 1065557700) + + W(3, 1033601684, -1078602486, 1048231286, -1117907500) + + W(4, -1109752074, -1108511603, 1047596894, 1065261424) + + W(5, -1110433219, -1113936114, -1106600777, -1110420048) + + W(6, 1009270385, 1034062718, -1096952633, 1042274647) + + W(7, -1110816087, 1038223706, 1037668398, -1111923151); + WS(1055139903, 1066543323); + sum1 = W(0, -1113216706, 1034160554, -1110997976, -1104052311) + + W(1, -1130057219, -1100344024, -1109060119, -1102181302) + + W(2, 1025777755, -1106690282, 1025857819, -1108040417) + W(3, 1042814916, 1058801014, 1040801319, 1061190298) + + W(4, 1062476912, 1047449287, 1040533578, -1088276389) + + W(5, 1043316038, -1096553156, 1003175286, -1109923206) + + W(6, -1094834883, -1115394087, -1100641176, -1109281747) + + W(7, -1111790927, -1131081926, -1127882001, -1116502963); + sum2 = W(0, 1032900543, 1026043047, 1011969739, 1005946567) + + W(1, -1104378201, -1106573410, -1149079854, -1098913457) + + W(2, -1093611286, -1143136704, -1129502699, 1065137024) + + W(3, 1034934846, 1044311979, -1128312106, -1107526541) + + W(4, -1093105706, -1104881087, 1056641171, 1062580590) + + W(5, -1137732946, -1102271464, -1106196955, -1097055308) + + W(6, -1103368978, 996537094, -1097512177, 1038772124) + + W(7, -1108835309, 1035638256, 1028654910, -1144299515); + WS(1037662203, -1095462961); + sum1 = W(0, 1051208476, 1007151360, 1047867778, -1120692693) + W(1, -1102286423, 1029685138, -1117174983, 998066935) + + W(2, 1054332722, 1048962332, 1056131053, 1060311124) + W(3, 1053179033, 1057524976, 1045179237, 1053688986) + + W(4, -1088537607, -1096098779, -1089718811, -1086623130) + + W(5, -1088149909, -1093380227, -1094989189, -1094538289) + + W(6, 1009988859, -1120684294, 1043868050, 1008468855) + + W(7, 1024446024, 1041242535, -1121112609, 1022948968); + sum2 = W(0, -1075642964, -1078284587, -1077677369, -1086975712) + + W(1, 1050575143, -1109971278, 1043550667, 1046375822) + W(2, 1064397427, 1070403917, 1072178178, 1071930150) + + W(3, -1093947328, -1119680819, -1134729809, -1094009133) + + W(4, 1042055631, 1050515956, -1094922849, -1096417404) + + W(5, -1094640520, 1045435393, -1109099138, -1106076462) + + W(6, 1049259770, -1100572061, 1038272909, -1110188613) + + W(7, 1033007627, -1109186235, 1040318497, 1038207101); + WS(-1091974591, 1028524890); + sum1 = W(0, -1119527298, 1034997099, -1099802012, 1033463662) + + W(1, -1090658116, 1044419888, -1099353751, -1110855131) + + W(2, 1050666155, -1101120961, 1057798952, -1146674958) + W(3, 1048486316, 1054164791, 1033437822, 1053708795) + + W(4, -1104886813, -1100807815, -1099838668, 1021707158) + + W(5, 1057612011, -1093871840, 1029138830, -1104286795) + + W(6, -1119869997, 1044100139, -1103774309, -1112176434) + + W(7, -1094617942, 1036492177, -1113521820, 1028051362); + sum2 = + W(0, 1029285075, -1113245689, 1046869988, -1099734421) + W(1, 1045806866, -1099048447, 1040440310, -1129472848) + + W(2, -1114745613, 1037297246, -1113507597, 1057852752) + W(3, 1031538739, 1019950674, 1039903515, -1115367833) + + W(4, 1024121756, 1034295695, -1081105309, 1063764387) + W(5, -1093953622, 1060684479, -1099720743, 1051222485) + + W(6, -1127985632, -1103172653, -1097344586, -1092930743) + + W(7, 1039548141, -1113791115, 1024790472, -1106398082); + WS(1058893599, -1078409713); + sum1 = W(0, -1115244150, 1036724056, -1098555735, 1049246955) + + W(1, 1045222582, -1138283737, 1044966206, -1115412967) + + W(2, -1102829117, -1100240992, -1094663483, -1084866106) + + W(3, -1085064800, -1093243506, -1093950047, -1095580366) + + W(4, 1039336887, 1053376406, 1053054383, 1065100884) + W(5, 1054590883, 1056764679, 1053059489, 1050610273) + + W(6, -1115623287, -1114512019, 1041993169, 1018575674) + + W(7, -1114852275, -1108377677, -1118180448, -1122449691); + sum2 = + W(0, -1130929736, 1009552185, -1084383778, 1050909748) + W(1, 1049209322, -1096700837, 1051369678, -1129474226) + + W(2, -1101060123, -1090337753, 1072779570, 1053567468) + + W(3, -1079161361, 1057662166, -1097661058, -1112521942) + + W(4, 1045192083, 1061442923, 1050903982, -1089269841) + W(5, 1029505799, -1094014597, 1046387683, -1105984421) + + W(6, -1105822172, -1103557108, -1106762492, -1117595750) + + W(7, -1111371347, 1025228376, -1138825972, 1030887459); + WS(-1109369595, 1048740969); + sum1 = W(0, 1040995480, 1030414773, -1118646413, 1043594790) + W(1, -1129436350, 1037121811, 1030207412, 1043560049) + + W(2, 1051718495, 1050319297, 1059003715, 1046504696) + W(3, -1135436979, 1043825845, 1049475887, 1058279379) + + W(4, -1091711555, -1095167863, -1085356599, -1083562713) + + W(5, -1096690237, -1098326935, -1120665100, -1098718953) + + W(6, -1125552391, 1032958437, 1042985722, 1045509569) + + W(7, 1023899916, 1019546753, -1130219052, -1105013544); + sum2 = + W(0, -1113177829, 1043809265, -1095416462, 1034071291) + W(1, -1120341831, 1043900974, 1020015593, -1119209873) + + W(2, -1117552581, -1109343151, 1052574118, 1063553374) + + W(3, 1070912061, -1112203792, -1091338378, -1072794661) + + W(4, -1100446453, 1039662186, -1114145263, 1053677817) + + W(5, -1101063801, 1042882834, -1094258913, 1055602011) + W(6, 1041583669, 1009024316, -1118793923, 1018689733) + + W(7, 1031195477, -1105538343, -1114748986, -1109237633); + WS(-1093437503, 1065718162); + sum1 = W(0, 1037550096, -1101135841, 1044878747, -1107014286) + + W(1, 1042111087, 1037051057, -1099961615, 1041500301) + W(2, 1049488055, 1042594425, 1049041945, 1066142676) + + W(3, -1100146203, 1045825417, 1040663590, -1119554198) + + W(4, -1103061781, 1046266118, -1087470164, -1106438298) + + W(5, -1111606545, -1091131027, -1133427370, -1103590594) + + W(6, -1114711965, -1097873069, 1046063170, -1091362312) + + W(7, 1037494709, 1040258368, -1109172298, 1033770775); + sum2 = W(0, -1106488910, 1026201604, 1035711566, 1060425407) + + W(1, -1098764525, -1096618367, -1182808622, -1127820908) + + W(2, -1123106758, -1113590739, -1084666863, 1061776657) + + W(3, 1055047321, -1119175762, -1103500958, 1041613778) + + W(4, 1050172947, -1092748399, -1097703384, -1099741350) + + W(5, -1120322586, 1050324229, 1025620288, -1105222946) + + W(6, -1124712996, 1043391307, -1098783459, 1048423650) + + W(7, 1041036039, -1099280352, 1048645442, -1118590758); + WS(1056660607, -1113798601); + sum1 = + W(0, -1115305597, -1101671294, 1016801995, 1039904137) + W(1, 1037148876, 1040453028, -1110559226, -1154296528) + + W(2, 1041913228, 1008348404, 1031963795, -1093680051) + W(3, -1089471492, 1034024758, -1111400057, 1031051312) + + W(4, 1012656821, 998498191, -1102006873, 1057450908) + W(5, 1049874551, -1111681498, -1109457443, 1036430914) + + W(6, 1026504084, -1151777337, 1032505286, -1108358078) + + W(7, 1031920291, 1043455038, -1129849213, -1125050675); + sum2 = + W(0, 1042939003, -1114796094, 1044529628, -1124781020) + W(1, -1106748864, 1016881133, -1108227590, 1027959261) + + W(2, 1024638457, -1101207597, 1027593065, -1097759574) + W(3, -1130307786, 1033669310, 1022409637, 1019735589) + + W(4, -1123927190, 1042777565, -1098782283, 1059165645) + + W(5, 1062760553, 1017094385, -1160993898, -1125388747) + W(6, 998901155, 1022659417, 1017512357, -1104939928) + + W(7, -1098668376, -1089791762, -1106217568, -1115680109); + WS(1066155712, 1004288134); + sum1 = W(0, 1047132567, 1029697589, 1014062303, 1041631965) + W(1, 991515041, -1123217244, -1154741365, 1030265807) + + W(2, 1049267618, 1048612844, 1055750611, 1059093756) + W(3, 1057479944, 1056993158, 1047138005, 1055767146) + + W(4, -1089062279, -1092380195, -1088453014, -1084647474) + + W(5, -1091963789, -1088742789, -1098495965, -1086886676) + + W(6, -1138510971, 1046969501, 1024451359, 1049129552) + + W(7, 1040560290, 1043007167, -1122630737, -1122583780); + sum2 = + W(0, 1011892235, -1111382628, 1037768402, 1032283234) + W(1, -1114728106, 1009460031, -1112526532, 1030203700) + + W(2, 1040858440, -1103755803, -1125277089, -1084511950) + + W(3, -1090323786, -1159433014, 1053584755, 1052886875) + + W(4, -1127819845, -1109875610, -1096621612, -1071556311) + + W(5, -1071973667, -1099343032, 1076043190, 1079690007) + W(6, 1010964983, -1106716388, 1032593498, 1040462902) + + W(7, -1100404332, 1040394500, 1007208771, 1051792925); + WS(-1086041375, 1049976369); + sum1 = W(0, -1125608345, 1025261033, 1032012444, 1046732297) + + W(1, -1114869141, 1036081274, 1036645660, -1119140736) + + W(2, -1096855193, -1097190986, -1086237883, -1083643033) + + W(3, -1093631755, -1095059844, -1132519191, -1095180224) + + W(4, 1037693680, 1051176741, 1059434222, 1062665585) + W(5, 1052924527, 1052562329, 1041010310, 1048775023) + + W(6, 1048607039, 1030637974, 1026121224, -1103554484) + + W(7, -1110078044, -1109004102, 1035782593, -1118200972); + sum2 = W(0, 1029601330, -1144560396, 1032763954, -1115106220) + + W(1, -1111454114, -1121335223, -1109520646, 1021645795) + + W(2, -1115199196, 1026958814, 1032369083, -1106065675) + W(3, 1042189754, 1029124674, 1022674491, 1026253022) + + W(4, -1096390757, -1100298556, 1076866217, -1108884178) + + W(5, -1116766618, 1025985538, -1115268590, 1011072742) + + W(6, -1079547524, -1081990625, 1041174181, 1041059708) + + W(7, 1024602232, -1127158528, 1007020790, 1003377428); + WS(-1111980027, 1060626277); + sum1 = W(0, 1023621139, 1032179266, 1030954414, 1037158526) + W(1, 1036765152, 1036680203, -1146606447, -1119312598) + + W(2, -1102882983, -1092055397, -1089823274, -1095093232) + + W(3, -1090725010, -1139402199, -1116432156, -1108873773) + + W(4, 1044706422, 1043529367, 1057090380, 1064981515) + W(5, 1047303429, 1042557255, 1028925358, 1041638915) + + W(6, 1028429765, -1113080214, -1101761443, -1098683411) + + W(7, 1033439719, 1030304674, -1104492078, -1112219839); + sum2 = + W(0, -1107703976, 1046455264, -1093472442, 1048924762) + W(1, -1097201227, 1049435664, -1106239872, -1119402461) + + W(2, 1045439570, -1090239140, 1028879562, 1064195571) + W(3, 1016778259, 1027385222, 1028121257, -1105208479) + + W(4, 1032849260, -1096329612, 1030082819, 1036056807) + W(5, 1059898488, -1091675747, 1053658286, -1117889585) + + W(6, -1122224669, 1035962913, -1098806672, 1041148911) + + W(7, -1091828205, 1041687668, -1098243254, -1104565052); + WS(1062681599, 1040842201); + sum1 = + W(0, -1108789463, -1115361429, 1048981817, -1092926921) + W(1, 1054602509, 1029602904, -1117967305, 1025443329) + + W(2, -1104300209, 1050445194, -1089127203, 1063952639) + + W(3, 1043197406, -1106041808, 1039191171, -1114238594) + W(4, 1042117608, -1097788769, 1032428200, 1049970971) + + W(5, -1088418847, 1049956321, -1112931212, -1123051255) + + W(6, -1118282400, -1114475276, 1049574381, -1090427073) + + W(7, 1045778638, -1112080775, -1097758566, 1033393295); + sum2 = W(0, 1038735267, -1100048068, -1115453090, -1110884051) + + W(1, -1090743949, -1153955669, -1112598570, -1103797208) + + W(2, -1126315713, 1043050536, 1053683464, 1058830244) + + W(3, -1100540415, 1037942519, -1098889179, 1040830228) + + W(4, -1111714958, -1115313021, 1046662525, 1061563017) + + W(5, 1052621807, -1104363663, -1118760183, 1007755797) + + W(6, -1111656758, -1115683900, -1093358619, -1105966973) + + W(7, 1037413595, -1101879362, 1044482581, -1098877089); + WS(1062776447, 1069975051); + sum1 = W(0, -1130308944, -1112835078, -1113144126, -1094805377) + + W(1, -1098008538, -1114270939, -1117364667, -1117827459) + + W(2, 1036893844, -1114635034, 1050058840, 1071110128) + W(3, 1061455014, 1044746969, 1018226642, 1049407081) + + W(4, -1104137265, -1098478453, -1104852259, 1041368142) + + W(5, -1095764665, -1109080309, -1103882015, -1120810969) + + W(6, -1108240801, 1029922778, -1111877761, -1088740684) + + W(7, -1112520550, -1119589855, -1121527653, -1103102129); + sum2 = + W(0, 1028834725, -1110297977, 1035091068, 1034973002) + W(1, -1110025415, -1154986018, -1120031976, 1030684381) + + W(2, -1113117380, 1026197871, -1099362318, 1077416661) + W(3, 1050456944, -1112814852, 1027999489, 1016280910) + + W(4, 1037052904, 996515330, 1043940575, -1069860026) + W(5, 969317772, 1021906202, -1115577362, 1013156184) + + W(6, -1113210551, 1032917698, -1106692355, -1103487680) + + W(7, 1034055293, 1006658352, 1018186436, -1124181989); + WS(-1082599007, -1084302409); + sum1 = W(0, -1127095446, 1011517278, -1110892475, 1024933540) + + W(1, -1111586789, 1039136849, -1108207315, 1049617952) + W(2, 1056665680, 1052281350, 1058821842, 1061132051) + + W(3, 1054434479, 1052720805, 1043783817, -1159632952) + + W(4, -1087212124, -1096260148, -1085877656, -1087957451) + + W(5, -1089270826, -1102718609, -1106422582, -1106709957) + + W(6, 1035858193, -1128228554, 1043137767, 1023709101) + + W(7, 1037286635, -1111842501, -1130056576, -1115207149); + sum2 = W(0, 1048459811, 1071226724, 1075494724, 1074401127) + + W(1, 1044864951, -1077687291, -1072311776, -1066953112) + + W(2, -1095160239, 1065850492, -1121381976, -1099027008) + + W(3, 1031151008, 1057399482, 1045213405, -1095971461) + W(4, 1026509768, 1043007163, 1043168485, -1093896652) + + W(5, 1012395574, 1019701051, -1109946848, 1027278540) + + W(6, -1123792544, -1127547903, 1027169380, -1107011198) + + W(7, 1037067951, 1021690579, -1124089088, -1166810856); + WS(-1118215158, -1130722305); + sum1 = W(0, 1039784439, -1119983743, 1027132090, -1101267710) + + W(1, -1106141431, -1119840977, -1131926580, -1158357571) + + W(2, 1045520358, 1040649785, -1126797149, 1070303194) + W(3, 1059129261, 1056727436, 1040938982, 1050582583) + + W(4, -1094865274, -1098235299, -1085881139, -1106608186) + + W(5, -1088850304, -1093134907, -1098615277, -1104503154) + + W(6, 966095017, -1166742058, 1023429181, -1127173569) + + W(7, -1112024885, 1030946302, -1160489077, -1106263629); + sum2 = + W(0, -1122933952, 1029904221, -1104768174, 1044174015) + W(1, -1122786276, 1032455763, -1118952582, -1120585334) + + W(2, 1043676402, -1092335308, 1050667883, -1106663265) + W(3, 1035773679, 1033986578, -1123822916, 1030027555) + + W(4, -1122578476, -1096734640, -1067932755, 1078656606) + + W(5, 1056344301, -1106732625, 1033374173, -1116084330) + + W(6, 1034421913, -1121365342, -1103315560, 1049347404) + + W(7, 1027752905, -1114333025, 1025409455, -1129253837); + WS(-1112296443, -1083191171); + sum1 = W(0, 1030599873, -1102688301, 1038739494, -1091052013) + + W(1, -1120252367, -1118018759, -1105737735, 1036799090) + + W(2, 1049894985, 1052276159, 1055519004, 1061107959) + W(3, 1059731464, 1052059899, 1050382861, 1050879021) + + W(4, -1100141489, -1095734081, 1044438853, -1078809864) + + W(5, -1100785926, -1095185489, -1092887220, -1113977245) + + W(6, -1124642344, -1114229790, 1039505712, -1108354137) + + W(7, 1037196526, -1129006466, 1034647259, -1123394945); + sum2 = W(0, -1134366274, 1040286055, -1103185345, -1089934078) + + W(1, -1104699884, -1135474394, 1039368980, 984214720) + W(2, 1032198029, -1093721101, 1064064153, 1038410574) + + W(3, 1053501328, 1053898256, -1088730122, 1045555557) + + W(4, -1102569648, 1059013351, -1076328416, 1075980540) + + W(5, -1081344728, -1093863519, 1061819985, -1089334779) + + W(6, -1113629492, -1099320981, 1062572765, -1079072621) + + W(7, 1057269325, 1039501040, -1089994189, 1050815727); + WS(1051639487, -1077445112); + sum1 = W(0, 1023584208, 1019039018, 1046978951, -1114024277) + W(1, 1042440780, 1036430651, 1019910405, 1017051223) + + W(2, -1087680633, -1097242582, -1087958558, -1084584703) + + W(3, -1089578188, -1090497932, -1100840226, -1089989738) + + W(4, 1058381481, 1049175941, 1059595066, 1058705186) + W(5, 1060807824, 1057261083, 1047705823, 1052092149) + + W(6, 1019116206, -1110819225, 1041321753, -1110784681) + + W(7, -1120949436, -1111841304, -1108976675, 1037750032); + sum2 = W(0, -1103483317, -1103949071, 1036613850, -1096200772) + + W(1, -1094761445, -1090478844, -1091384835, 1024429003) + + W(2, 1043027417, 1024633771, 1050158855, -1096718424) + W(3, 1057240079, 1037855330, 1036780676, -1103545669) + + W(4, 1043672886, -1105891820, -1113242263, 1053817256) + W(5, 1043447632, 1053166295, 1043680942, 1034776200) + + W(6, -1126025102, -1101612494, 1051748473, -1144187697) + + W(7, -1129269697, -1114569970, -1102484885, 1044262941); + WS(-1102141694, 1066754929); + sum1 = W(0, 1024767465, 1028037205, -1148374821, 996156698) + W(1, 1034886379, 1011245536, 1017269224, 1023537404) + + W(2, -1097150806, -1105574095, -1092607882, -1086231302) + + W(3, -1085570014, -1090150659, -1093685286, -1100403588) + + W(4, -1121670572, 1021251426, 1053426631, 1064662112) + W(5, 1060264850, 1058905527, 1047037217, 1048736520) + + W(6, 1047316983, -1123186307, 1027553675, 1037457517) + + W(7, -1103359865, 1031219256, -1108923303, 1039125668); + sum2 = W(0, -1127568330, 1031019669, 1013355589, -1123247753) + + W(1, -1131101666, -1126559289, -1128009624, -1122774763) + + W(2, 1005444026, -1121000217, -1135062981, -1099801823) + + W(3, 1044314830, 1052295626, -1112001042, 1018744598) + + W(4, -1099850392, 1052911126, 1040255711, -1120914990) + + W(5, 1055431083, -1112663055, 1029993103, -1105142519) + + W(6, -1069105992, -1075891845, 1052106082, 1078854269) + + W(7, 1061745892, -1097592921, 1033712114, 1037317546); + WS(-1092827839, 1049601702); + sum1 = W(0, -1102748075, 1048973336, -1101661842, 1035794226) + W(1, 1035295380, 951002293, 1028700102, 1019995195) + + W(2, -1114726817, -1100496851, 1056141590, -1092935501) + + W(3, -1090189951, -1127762491, -1107070641, -1116434785) + + W(4, 1043768535, 1037939345, -1092531310, 1048278549) + + W(5, 1059458263, -1107678146, 1040739973, -1103464157) + W(6, 1001409895, 1025584388, 1043222609, 1028885900) + + W(7, -1097823129, 1046084402, 1018269854, 1033486885); + sum2 = W(0, 1047648616, 1049954711, -1086499077, 1036131868) + + W(1, 1021844703, -1144052469, 1036766656, -1114852932) + + W(2, -1099866854, -1087014785, 1059941103, 1059724034) + + W(3, -1104253425, -1116739540, -1123734224, -1119117496) + + W(4, 1040911688, -1127547935, 1006897087, -1147690453) + + W(5, -1094365093, 1046092804, -1117733622, 1045874226) + + W(6, -1104812914, 1032774583, -1118719608, -1112213150) + + W(7, 1020986307, 1037506252, 1020773629, -1120467144); + WS(1062974879, -1132215613); + sum1 = W(0, -1181157822, 1033235448, -1120645311, 1032693326) + W(1, 1014937094, 1012846527, 1029466479, 1027727527) + + W(2, -1096960767, -1103300066, -1098273063, -1084817024) + + W(3, -1086871647, -1093312183, -1094276984, -1099477807) + + W(4, 1047884458, 1024586364, 1040665968, 1065496294) + W(5, 1061273423, 1057013666, 1048826162, 1049029078) + + W(6, 1046591191, 1028472621, 1027902609, -1105457745) + + W(7, -1099580452, -1121668195, -1123904424, 1033797162); + sum2 = + W(0, -1120861164, 1023555157, -1114715157, 1039733942) + W(1, -1111876903, -1121732899, 1028106223, -1136150442) + + W(2, -1114727130, -1127207629, 1049207316, -1100512081) + + W(3, 1044392032, -1113639142, -1107243417, 1035475508) + W(4, 1051943787, 1021361633, -1100379929, 1049204344) + + W(5, 1029680046, 1040523175, -1113800818, 1017564481) + W(6, 1073213517, 1067585393, -1095215585, -1072102567) + + W(7, -1085077901, 1017790060, -1115100698, -1111101803); + WS(1038961915, -1122010239); + sum1 = W(0, 1004620105, -1113106351, -1112775210, -1125534724) + + W(1, -1137271196, -1126589322, 1026657556, -1119328748) + + W(2, 1048595660, 1052250558, 1058549636, 1061573929) + W(3, 1057522353, 1048822826, 1040718305, 1048641524) + + W(4, -1099376311, -1096833644, -1088891129, -1086812373) + + W(5, -1093214133, -1095914356, -1105329661, -1103878453) + + W(6, -1116246236, 1027857155, 1036452944, 1040782707) + + W(7, 1018281129, -1113857108, 1010557432, -1106455448); + sum2 = + W(0, -1116580406, 1024927837, -1129828931, -1115420496) + + W(1, 1011185933, -1123989928, -1146345634, -1132229127) + + W(2, 1027562258, -1162615720, 1047581344, -1109513493) + W(3, 1038795182, -1118168090, 1030671240, 1038841827) + + W(4, 1055923683, -1098456431, -1107918233, -1093462217) + + W(5, 1043574176, 1042713369, -1118222006, 1029869485) + W(6, 1077914250, 1064911462, -1076359256, -1072190840) + + W(7, 1023726209, -1114062645, -1119388396, -1111766417); + WS(1058520191, -1103814144); + sum1 = W(0, 1017038583, 1027734728, 1045254802, -1107262661) + + W(1, -1105442246, 1031430877, -1126950445, 1024536063) + + W(2, -1136595532, 1016280509, -1109207324, 1062837304) + W(3, 1058887264, 1040464732, 1030731221, 1029953906) + + W(4, -1118053743, -1128563239, 1044218638, -1096672636) + + W(5, -1085861660, -1103880229, -1107144426, -1145128486) + + W(6, 1027978638, -1097122909, -1104843131, 1033499055) + + W(7, 1025985831, 1030848699, -1131329888, -1109411553); + sum2 = W(0, 1034702970, -1125598974, -1114553387, 1027536136) + + W(1, -1115924578, 1035052048, -1132148431, 1025333585) + + W(2, -1176551015, -1168898884, 1041527709, -1101298022) + + W(3, 1036785061, -1106074654, 1030578400, 1024211757) + W(4, 1007931783, -1090251816, 1067159008, 1043575777) + + W(5, 1028484706, -1104273626, -1148595850, -1114791455) + + W(6, -1110263367, -1089294892, -1085860022, 1061443693) + + W(7, -1099982080, 1042454777, 1027614804, -1123742626); + WS(1061983711, -1098811342); + sum1 = W(0, 1012577103, -1126897281, -1105225996, 1024253613) + + W(1, 1053413654, -1137207095, 1042674340, -1109172700) + W(2, 1051270689, 1046420622, 1058683087, 1058369439) + + W(3, -1097514912, 1052214131, -1105013139, 1050302029) + + W(4, -1108510228, -1106195699, -1088411008, -1089548804) + + W(5, -1118840551, -1096732498, -1109015536, -1103000311) + + W(6, -1107438075, -1107699151, 1036711743, 1026749110) + + W(7, -1116819519, 1024709491, -1140694819, -1154413112); + sum2 = W(0, 1018267361, -1110181012, -1146613180, 1038715850) + + W(1, 1024296823, 1036970119, 1022003098, -1124702635) + W(2, 1036503366, 1041828239, 1050199247, -1085432172) + + W(3, 1038941240, -1123003584, 1013422114, 1021003447) + + W(4, -1126101987, -1116783782, 1071387060, -1080125372) + + W(5, -1093021050, -1114155200, -1149124484, -1143100940) + + W(6, -1099489904, -1102883314, 1052019906, 1047113133) + + W(7, 1035497971, -1112911441, -1119585859, -1131838862); + WS(1061194367, -1092415693); + sum1 = W(0, 1036390333, -1126263470, 1025725222, 1043857157) + W(1, 1035642972, 1044002978, 998479342, 1045699251) + + W(2, 1051201914, 1053058480, 1054554894, 1047231542) + W(3, 1057662314, 1050872286, 1045022613, 1049392206) + + W(4, -1104881972, -1172862130, -1093268199, -1076465248) + + W(5, -1087389459, -1093150623, -1111846627, -1095910796) + + W(6, -1115149537, 1030237668, 1021350042, 1036659934) + W(7, 1039247491, 1026362746, 1017611295, 1018350484); + sum2 = W(0, -1129487679, 1042193002, -1108339354, -1095819059) + + W(1, 1041127864, -1103499785, 1034257999, -1118637794) + + W(2, 1018376667, -1087595144, 1054016721, 1063707892) + + W(3, 1036978867, -1103064253, -1103241982, -1112191100) + + W(4, -1097777178, -1098446206, -1106280983, 1068295659) + + W(5, 1052165187, -1110264861, -1135489703, -1107248782) + + W(6, -1117386048, -1106566736, -1108034123, -1104582586) + + W(7, -1102127104, 1036459063, -1104779766, 996655259); + WS(-1094699455, 1066131816); + sum1 = W(0, 1049793485, 1029004921, 1050253240, 1043166228) + W(1, 1029407446, 1041612884, 1035949114, 1012128942) + + W(2, -1081848423, -1088299640, -1084057276, -1084821317) + + W(3, -1084563631, -1104203619, -1099802604, -1096241030) + + W(4, 1059460685, 1048637514, 1060796209, 1059873140) + W(5, 1055943200, 1047596309, 1050483936, -1105809528) + + W(6, 1042128105, -1139594888, 1026023615, 1020508084) + + W(7, -1125172935, 1048504406, -1115227886, 1051471881); + sum2 = W(0, 1060779432, -1118034101, -1104519356, -1119327164) + + W(1, -1101970936, -1106905521, 1002331293, -1082144605) + + W(2, 1051802488, -1097493973, 1057101737, 1060027297) + + W(3, 1044004337, 1057962293, -1098665363, -1086721729) + + W(4, -1086062785, 998397725, -1092333837, 1066349798) + W(5, 979464170, -1133317807, 1046794021, 1070824403) + + W(6, -1114203174, -1112558489, -1099984065, -1091352583) + + W(7, -1094257268, -1102635709, -1094504499, -1114333706); + WS(-1074268304, 1079667699); + sum1 = W(0, -1107189325, 1027760991, -1136873402, -1113641470) + + W(1, 1031162561, 1033891144, -1126712892, 1035917755) + + W(2, -1101508903, -1090199334, -1090363950, -1087996124) + + W(3, -1083961527, -1096484091, -1098094611, -1093567651) + + W(4, 1054199089, 1054303683, 1062422475, 1067750847) + W(5, -1113534839, 1052689284, 1031942551, 1045977323) + + W(6, 1035260131, -1107632246, 1005391451, -1101800081) + + W(7, 1034855959, 1046337921, -1150999418, 1042992013); + sum2 = W(0, -1109104193, -1121805095, 1045986101, 1048674700) + + W(1, -1091901802, 1048446997, -1113936425, 1027039508) + + W(2, -1100757902, 1045040397, 1080470972, -1080397282) + + W(3, -1070026310, 1054845861, -1109557127, -1113850952) + + W(4, 1040683096, -1125556022, 1049174250, -1095181876) + + W(5, -1122013099, -1110936398, 1038355211, -1103996340) + + W(6, -1125509919, 1026278254, -1112163985, 1050380898) + + W(7, -1131668233, 1033243862, -1144282357, 1036778147); + WS(-1085238047, -1082053459); + sum1 = W(0, -1106787663, -1138285833, -1108632776, -1094657608) + + W(1, -1101713608, 1037328759, -1111697288, -1109379868) + + W(2, 1039996894, 1010512412, 1053399812, 1045540841) + W(3, 1051732998, 1044326914, 976543556, 1057105320) + + W(4, 1048926451, -1119213276, -1130975030, 1010216242) + + W(5, 1034301867, 1025236158, -1128978334, -1109745879) + + W(6, -1104330662, -1128637723, -1108017347, -1096910157) + + W(7, -1101323868, 1015829447, -1111437732, -1108664266); + sum2 = W(0, -1124623913, 1032956443, 1027185909, -1101336864) + + W(1, -1094263361, 1029736763, -1112090109, 1039412150) + + W(2, 1025825838, -1119700813, 1032253087, 1046009611) + + W(3, 1060071017, -1108021530, -1111711122, -1092338318) + + W(4, 1042257288, -1135835500, 1053804277, 1048892371) + + W(5, 1036906607, -1101254223, -1115069322, -1114145526) + + W(6, 1021162350, -1114452863, -1109212782, -1102267892) + + W(7, -1105958464, 1037505372, 996911332, -1125671849); + WS(1060867039, -1136390908); + sum1 = W(0, -1127355032, 1034805910, 1035163860, 1029669941) + W(1, 1042066222, 1021674692, 1040577786, 1021261983) + + W(2, -1085412379, -1094311080, -1093352008, -1084691216) + + W(3, -1088428116, -1088125293, -1096644095, -1091872390) + + W(4, 1049179798, 1049208039, 1060583566, 1063283264) + W(5, 1058522472, 1055147879, 1050065089, 1041474713) + + W(6, 1047055468, -1108149971, 1036232649, -1103012671) + + W(7, 1024178471, 1026233836, -1126656770, 1049799631); + sum2 = + W(0, 1035672114, -1098827199, 1049155846, 1035178389) + W(1, -1107113032, -1144855824, 1035201737, -1127184278) + + W(2, -1063752230, -1085747691, 1077059336, 1075347264) + W(3, 1056308660, 1034654874, -1121418857, 1014935340) + + W(4, -1093487673, 1041803367, 1041184099, -1096808218) + + W(5, -1129382302, -1119639669, -1122522987, 1011408026) + + W(6, 1013645866, 1034418818, -1106855503, 1049524211) + + W(7, -1122362105, 1030856901, 1027103888, -1115004359); + WS(-1090674303, -1100343233); + sum1 = W(0, -1120232797, 1040422273, 1026412864, -1138867958) + W(1, 1033879777, 1033565615, 1019476032, 988102399) + + W(2, -1091881416, -1094093465, -1085938046, -1089254334) + + W(3, -1089797193, -1085407367, -1095206984, -1105032111) + + W(4, 1051419109, 1052500896, 1061270771, 1067891162) + W(5, 1060067146, 1016645247, 1041715172, 1036835111) + + W(6, 1036288004, 1023553551, -1112170192, -1115349187) + + W(7, -1112030904, 998486818, 1027103050, 1040345019); + sum2 = + W(0, 1040618154, -1114130867, -1122148687, 1044229537) + W(1, -1110193301, -1097134960, 1037070268, 1037459780) + + W(2, -1101425729, 1043012943, 1049717343, 1078734719) + W(3, 1058416067, -1066172228, -1087270128, 1051160418) + + W(4, 1012698939, -1135257643, 1047429512, 1020747990) + W(5, 1045573328, -1105653505, -1098650023, 1029335953) + + W(6, -1121139419, 1033653009, 1038119907, -1131387750) + + W(7, -1162837210, -1124290574, -1142454518, -1119987003); + WS(-1083751903, -1076119444); + sum1 = W(0, -1115245576, -1106029427, -1102084379, -1113929101) + + W(1, -1115339007, 1029734646, -1110488366, -1147960477) + + W(2, 1056842307, 1054460828, 1062160243, 1061549380) + W(3, 1051628466, 1034628298, 1040780998, 1027250487) + + W(4, -1093345814, -1096614035, -1087844584, -1088621915) + + W(5, -1095344475, 1039622221, 1023285463, 1052549785) + + W(6, -1108216057, -1118873130, 1032187759, -1118044066) + + W(7, -1125935063, -1114025640, -1103117229, -1101242061); + sum2 = W(0, -1142266798, -1105635988, -1108468488, -1104742183) + + W(1, 1004562934, -1111519795, 1009402627, 1007186587) + W(2, 1041896729, -1133541327, 1052249127, 1059101684) + + W(3, -1116644500, 1022626513, 1036478091, -1114250343) + + W(4, -1109207785, -1119640286, -1132044399, 1057719514) + + W(5, 1049160976, 1040987637, -1103527129, -1094402391) + + W(6, -1116438034, -1143926222, -1116983273, -1107716245) + + W(7, 1036001553, -1093640310, -1115139384, -1102449905); + WS(1032624635, 1033521535); + sum1 = W(0, -1131553039, -1117491096, -1112834618, 1049918570) + + W(1, 1045795365, 1016822387, 1035361729, -1113168565) + + W(2, -1098906814, -1100587359, -1081513385, -1078593782) + + W(3, -1105649745, -1092053629, -1096260814, -1095243062) + + W(4, 1051615627, 1047570260, 1060987922, 1043587024) + W(5, 1056209202, 1059398075, 1049440024, 1051600524) + + W(6, 1045470610, 1025984961, 1036113314, 1040182861) + W(7, 1012071435, 1032631435, 1023888652, 1034850724); + sum2 = W(0, -1111826236, -1099208280, 1026789677, -1106561040) + + W(1, -1093448876, 1046407154, -1102666308, 1042570396) + + W(2, -1092384767, 1027875109, 1064661507, 1062273543) + + W(3, -1087164405, -1108191451, 1044380270, -1104593420) + + W(4, -1095745094, -1100947515, -1112642295, 1068550638) + + W(5, -1104229197, -1106740917, -1103404028, -1105841536) + + W(6, 1034661269, 1048636172, -1098451609, -1099981129) + + W(7, -1118651857, 1049739863, -1095995165, 1048025970); + WS(-1085606847, 1061985400); + sum1 = W(0, -1108997980, -1117164584, 1031768707, -1103105924) + + W(1, 1041900981, -1110420181, 1037113665, 1014163116) + + W(2, -1089905075, -1114895517, -1095264598, -1077801587) + + W(3, -1097956992, -1088999487, -1098379486, -1096151518) + + W(4, 1057688104, 1056544190, 1062390314, 1055481056) + W(5, 1062127874, 1051900674, 1048102065, 1057425872) + + W(6, 1040581229, -1114811305, -1107526493, -1110202349) + + W(7, -1118208771, -1108027100, 1024450541, 1041210774); + sum2 = + W(0, 1025785873, 1048912334, -1096464036, 1056133856) + W(1, -1104154812, -1121968309, 1033410161, 1015120151) + + W(2, 1048760593, -1090084783, -1114768213, 1066763200) + + W(3, -1086306359, 1058187354, -1113492694, -1128581905) + + W(4, 1035955606, -1119218892, 1007739746, 1064934573) + W(5, -1089946445, 1050702067, 1025685973, -1102128709) + + W(6, -1126760968, 1008603006, -1094704139, -1080670875) + + W(7, 1031309677, -1125625514, -1109240237, 1039335694); + WS(-1090966143, -1078936607); + sum1 = W(0, -1116393227, -1108278025, 1051625062, -1105522375) + + W(1, 1058727904, -1091195697, 1052016408, -1109728971) + + W(2, -1086961670, -1108945587, -1086695325, -1081986687) + + W(3, -1089148367, -1086453117, -1092689140, -1091172115) + + W(4, 1058706816, 1041859343, 1060094467, 1059190367) + W(5, 1061098519, 1055062080, 1056115606, 1058930855) + + W(6, -1116819366, 1035952359, 1041797992, -1104043025) + + W(7, 1053094890, -1093101306, 1015960496, 1034498693); + sum2 = W(0, 1023528661, -1119417071, -1105978590, 1009798795) + + W(1, 1040834561, -1111364082, 1041397121, 1033368796) + + W(2, -1118025227, 1050920275, -1112399565, -1098493686) + + W(3, 1036263434, -1124489419, 1048582480, -1089345484) + W(4, 1075422545, 1065042273, 1069638520, 1067662601) + + W(5, -1093932676, -1079439326, -1078082886, -1070108577) + + W(6, -1073269716, -1077701631, -1078616397, -1083676116) + + W(7, 1044144047, 1070390370, 1068400636, 1077727906); + WS(-1079921856, 1049308945); + sum1 = + W(0, 1030068962, -1133949626, 1050882798, 1018623990) + W(1, -1106924392, -1146454049, -1116987117, 1040905769) + + W(2, -1165865787, -1118082277, 1053008345, -1094507016) + W(3, 1052481994, 1043504665, 1047635255, 1026870489) + + W(4, -1187780005, -1108803021, 1038634079, -1106873963) + + W(5, -1087778738, -1101591143, -1101174683, -1110197734) + + W(6, 1025440517, -1111921110, -1120535059, -1104818073) + + W(7, 1046575523, 1041667424, 1031519031, 1031554658); + sum2 = + W(0, -1116556974, -1095066961, -1100395256, 1050308919) + W(1, 1040792410, 1032406727, 1011901446, -1141998384) + + W(2, 1059176005, 1038856572, -1076691076, 1055715442) + W(3, 1046498927, -1123959066, 1028959950, 1032868606) + + W(4, -1096412953, 1050790436, 1058179896, -1097771226) + W(5, 1039711359, -1106160519, 1018212767, 1038360912) + + W(6, -1114011261, -1173649795, 1024138419, 1040413046) + + W(7, -1106323994, 1033840078, -1127468978, -1107910389); + WS(1060842367, -1123947436); + sum1 = W(0, 1024730985, 1023159627, -1105241169, 1017803651) + W(1, 1037913699, 1035962309, 1028045772, 1032728670) + + W(2, 1048950736, 1047784339, 1057388094, 1060112037) + W(3, 1062491953, 1057656339, -1114528913, 997860858) + + W(4, -1097448124, -1098772880, -1087773444, -1085451727) + + W(5, -1088044111, -1089872870, -1102522557, -1103124285) + + W(6, -1122548355, 1026072115, 1037590622, 1037652837) + + W(7, 1034810634, 1033785172, 1008900343, -1114288296); + sum2 = W(0, 1026131421, -1112740722, 1046858645, 1032928238) + + W(1, 1042166107, 1040344552, -1082879617, -1091819295) + + W(2, 1001103237, -1121600279, -1108221277, -1097923247) + + W(3, 1069212508, 1057336110, -1095628134, -1107037379) + + W(4, -1133854939, 1038637096, 1037424280, -1103185020) + + W(5, -1120290433, 1019342369, 1039461314, -1122429445) + + W(6, -1114822986, -1108393699, 1029862733, -1101308237) + + W(7, 1036213344, -1145606165, -1111520861, 1034167562); + WS(1038606587, 1058047160); + sum1 = W(0, 1026865207, 1042118304, 1051307036, 1033639368) + W(1, 1046742309, -1109598961, 1028927539, 1015037525) + + W(2, -1095018974, -1093024127, -1082724754, -1100730470) + + W(3, 1036658562, 1040951763, -1113143704, -1098528366) + + W(4, 1044423619, 1048945010, 1054092644, -1097872036) + W(5, -1091674216, 1033100505, 1030352835, 1047837568) + + W(6, -1136300775, -1117800769, 1005722288, 1044405134) + + W(7, 1047528601, 1044812414, 1032954286, 1044220906); + sum2 = W(0, 1050657429, -1103301852, 1060573163, -1091436731) + + W(1, -1104427284, -1097267636, 1038196249, -1107511544) + + W(2, 1045860939, 1052242374, -1100974944, 1066745360) + + W(3, -1079344782, -1113016512, -1119497008, -1113648424) + + W(4, -1111592920, 1013554208, -1111727364, 1055083328) + + W(5, -1098647652, 1034990326, -1110499846, 1042188938) + + W(6, -1103122208, -1106328874, 1015168980, -1113723980) + + W(7, 1051192034, 1034643320, -1122594772, -1140986496); + WS(-1095257599, 1052962039); + sum1 = W(0, -1106746181, 1031300369, 1046667354, 1037361867) + + W(1, 1040829967, -1128966365, 1034540700, -1115420664) + + W(2, -1097695335, -1111219488, -1083536631, -1088861218) + + W(3, -1107201338, -1115618056, -1117058422, -1138753100) + + W(4, 1053309708, 1042046133, 1060606673, 1053570633) + W(5, -1101583222, 1033575580, 1029127944, 1041019129) + + W(6, 1036635571, 1032102291, -1110849586, -1103814096) + + W(7, -1130665360, 1023220920, 1009452933, 1041468947); + sum2 = + W(0, -1137205193, -1121249932, -1137006201, -1101149788) + + W(1, 1034796937, -1115985020, -1140489533, 1026538132) + W(2, -1089705070, 1025195792, 1049206515, 1054529493) + + W(3, 1039074115, 1050074620, -1104546681, -1103055048) + W(4, -1116057142, 1014073009, 1040872535, 1044729510) + + W(5, 1024555690, -1108168105, -1105229574, -1093226929) + + W(6, -1106134583, -1131583764, 1023734245, 1030258377) + + W(7, 1045578999, 1041497164, -1131316168, 1036128894); + WS(1058062751, -1100017341); + sum1 = W(0, -1129599006, 1020928695, -1098371705, -1111175932) + + W(1, -1108928286, -1108487635, 1028685770, -1134533769) + + W(2, 1055064995, 1054737968, 1061839865, 1057524464) + W(3, 1062722401, 1039434185, 1053936411, 1054765971) + + W(4, -1094744478, -1108714620, -1089137095, -1081607655) + + W(5, -1111400813, -1089739654, 1037008853, -1094983833) + + W(6, -1110864465, 1035506764, -1103907470, -1115102560) + + W(7, 1038105413, -1103382940, 1038153674, -1101904010); + sum2 = W(0, 1046625968, -1114205611, -1097342682, -1079666171) + + W(1, 1041595755, 977545170, -1106070846, -1145403458) + W(2, 1034733544, -1100522159, 1034693600, 1066071967) + + W(3, -1087077400, 1062799024, -1097179722, 1015933409) + + W(4, -1147999170, -1093312378, 1057288295, 1059803421) + + W(5, -1088579916, 1057064650, -1086854504, 1055355590) + + W(6, 1026537136, -1103988998, 1049229753, -1114956972) + + W(7, -1104980683, 1050850828, 997711860, -1115357129); + WS(1033636603, -1075190676); + sum1 = W(0, 1030520392, -1125697208, -1114269946, -1109653337) + + W(1, -1102211630, 1032435172, -1120127764, 996293820) + W(2, 1051253486, 1043969999, 1058583983, 1064791929) + + W(3, 1057642082, 1055881859, 1033241689, 1053053914) + + W(4, -1096251285, -1100430004, -1085953223, -1093290764) + + W(5, -1088005167, -1098086422, -1102333361, -1119839106) + + W(6, -1117233131, -1122877346, 1036037681, -1109909101) + + W(7, -1123582788, 1029223000, -1125435099, -1103406294); + sum2 = W(0, -1120200303, 1024626781, 1013431061, -1124733431) + W(1, 1027814217, 999438554, -1116912567, 1007930813) + + W(2, -1113787644, 1041562727, -1114475538, -1112371618) + + W(3, 1032748777, -1098117165, 1042886038, -1140542509) + + W(4, -1109535466, -1106166077, 1029531851, 1051805575) + + W(5, 1061661699, -1114245048, 1039886302, -1093997696) + + W(6, 1041916719, 1028143351, -1097688999, 1068535590) + + W(7, 1074776935, 1029048009, -1076933565, -1072079702); + WS(1051996799, 1040628126); + sum1 = W(0, -1106876926, 1042171100, 1014013938, 1040804174) + W(1, 1038633599, 1024260165, 1032103150, -1111941370) + + W(2, -1096999331, -1106536569, -1087433914, -1079279688) + + W(3, -1085040172, -1090750082, -1096245875, -1091794355) + + W(4, 1064227732, 1056769919, 1057736892, 1052514885) + W(5, 1059334684, 1058371760, 1051111478, 1050507101) + + W(6, 1032518006, -1132737450, -1121739327, 1035728257) + + W(7, -1115112214, -1126170017, -1129820331, 1036168684); + sum2 = + W(0, -1097197656, 1040517176, -1107105879, 1050425804) + W(1, -1108102513, 1044838414, 1026907274, 978890108) + + W(2, -1105938617, -1119182790, 1048788314, 1048225474) + W(3, 1050055380, -1097162220, 1028263310, 1043193320) + + W(4, -1063141508, -1072247475, 1073986693, 1082212097) + + W(5, 1063649950, -1108483496, 1040597170, -1096883524) + W(6, 1047663602, -1106333570, 1017343300, 1014706728) + + W(7, 1023560998, -1105252179, 1016727612, 1025416282); + WS(-1079662656, 1075711984); + sum1 = W(0, -1136749190, -1108214827, -1112256994, -1102094902) + + W(1, 1019461885, -1113236619, -1115387518, 1015637672) + W(2, 1054381451, 1049467004, 1060357369, 1064079536) + + W(3, 1058102849, 1050217386, 1046694646, 1053154499) + + W(4, -1099895821, -1104318006, -1094055613, -1088977984) + + W(5, -1094018397, -1093377643, -1102869707, -1097489705) + + W(6, -1111920714, -1124862997, -1099490085, -1093653775) + + W(7, 1047054555, 1026036560, -1122648846, -1117787456); + sum2 = W(0, 1021596406, 1027700788, 1031902674, 1035450862) + W(1, -1146279384, 1036068426, -1108044351, 1030087008) + + W(2, -1120307380, 1049653321, -1100871897, 1054975194) + + W(3, -1106853601, 1042197470, 1034125536, -1133477273) + W(4, 1037868882, 1038499992, 1074972319, 1073269037) + + W(5, 992333684, 1032586692, -1129244749, 1010404410) + + W(6, -1094559059, -1099826066, -1070411479, -1076192259) + + W(7, 1024067187, -1107184127, 1032061183, -1104237818); + WS(-1121360374, -1077155152); + sum1 = W(0, -1105583845, 1042009047, -1135496901, 1057317824) + + W(1, 1037107234, 1028773828, 1042777544, -1116736729) + + W(2, -1092428892, -1091586093, -1087526962, -1088356100) + + W(3, -1084718223, -1086275726, -1096882940, -1086839898) + + W(4, 1053987635, 1057449542, 1037542524, 1067018703) + W(5, 1053143538, 1051794540, 1049337758, 1050512766) + + W(6, 1036329976, 1039912928, -1114441548, 1035631092) + W(7, 1024846769, 1020991498, 1032759069, 1041911145); + sum2 = W(0, -1122349048, 1027223630, -1123059666, -1097496437) + + W(1, -1101147358, 1008041239, -1113710228, -1103013196) + + W(2, -1108922684, 1050237475, -1093984844, 1061603028) + + W(3, 1054912466, -1136134543, 1050990810, 1037079763) + + W(4, -1102817435, 1047134367, -1086242189, 1069845393) + + W(5, -1091188340, -1107987422, -1126348732, -1097181289) + + W(6, 1024154790, -1105600910, 1041276041, -1090292072) + + W(7, -1122818012, -1109079234, 1047206075, -1109791787); + WS(-1087087711, 1067831143); + sum1 = W(0, 1034272753, 1043354114, 1046815315, 1041026663) + + W(1, -1116058688, -1131457456, -1115558139, 1032034947) + + W(2, -1110686621, -1094089577, -1090322161, 1019000899) + + W(3, 1048378971, 1054466495, 1045446607, 1043401342) + W(4, 1034360523, 1051348739, 1056352764, 1049291409) + + W(5, -1098372646, -1090870248, -1103803571, -1098529593) + + W(6, 1006467012, -1097882829, -1098171061, -1099191041) + + W(7, 1036956791, 1036945505, -1124941614, 1026141794); + sum2 = + W(0, 1046033632, -1114282503, 1044655964, -1106001859) + W(1, -1104436397, -1101664028, 1015764662, 1023732858) + + W(2, -1147554302, 1065651536, 1052996561, -1076523823) + + W(3, -1095873418, 1059971501, 1037179306, -1119149512) + W(4, 1021728539, 1061103542, 1047372498, -1081697027) + + W(5, -1094800655, 1058036758, -1113369666, 1033040332) + + W(6, 1034466187, -1117467216, 1040544990, -1105745303) + + W(7, -1119054372, -1110996319, -1120190386, 1028498617); + WS(1040537598, -1164677141); + sum1 = W(0, 1044447583, 987031402, -1121775302, 1042738997) + W(1, 1038434732, 1049696256, 1024826006, 1044766000) + + W(2, 1058921623, 1052904912, 1059494147, 1059765434) + W(3, 1053249925, 1045203303, 1049549231, 1032179929) + + W(4, -1079854668, -1089009751, -1082135643, -1085199868) + + W(5, -1087718171, -1094593271, -1098502833, -1111725730) + + W(6, 1051360885, 1009017491, 1049103971, 1042102879) + W(7, 1037325507, 1033340813, 1040792940, -1103499809); + sum2 = W(0, -1105450770, -1099930216, -1087670985, -1097641768) + + W(1, -1093727900, 1034849198, -1089823109, 1026936119) + + W(2, -1076904032, -1110661913, 1054929191, 1067640588) + W(3, 1043325457, 1041628489, 1024934811, 1071055811) + + W(4, 1034745372, -1104228342, 1055270313, 1058849806) + + W(5, 1055379319, 1047258529, -1091022953, -1093861556) + + W(6, 1064235564, -1133206564, -1159337186, -1095527236) + + W(7, -1097371942, -1123329564, 1053074558, -1079977593); + WS(-1072228928, 1076959210); + sum1 = W(0, 1049440860, -1100954468, 1040518102, 1055014377) + + W(1, -1102413547, 1056933930, -1100924366, 1025160513) + + W(2, -1102211417, -1090494943, -1089397123, -1085929645) + + W(3, -1092416799, -1103710088, -1099959722, -1097219837) + + W(4, 1052420434, 1049042513, 1057628335, 1059399711) + W(5, 1055649007, 1056457065, 1043274798, 1043339918) + + W(6, 1046258910, -1094165323, -1113742940, -1115570152) + + W(7, -1095616949, 1054320058, -1096288332, -1113497047); + sum2 = + W(0, 1037249746, -1106259401, -1118512137, 1033553146) + W(1, -1118437421, 1049532353, -1112295163, -1112769969) + + W(2, -1091193116, 1057292562, 1045032769, -1101074553) + W(3, 1047290760, -1085599381, 1045403080, 1036239865) + + W(4, 1058952598, -1089499743, 990544085, 1052938455) + W(5, -1096914685, 1060306657, -1095081062, 1036770528) + + W(6, -1099278743, 1035766214, -1120311473, -1107284433) + + W(7, 1032001159, -1105441959, 1037527897, -1117837555); + WS(-1095248895, -1088416713); + sum1 = W(0, 991086878, 1041397006, -1100414321, -1106522737) + + W(1, 1032194452, 1040515607, -1113785099, -1119710941) + + W(2, -1112623265, 1023033498, -1095527100, -1085500137) + + W(3, -1103431285, -1093888960, 1025813699, -1112974208) + + W(4, 1049658160, -1129854724, 1056723856, 1061179113) + W(5, 1050067968, 1055178889, 1027165141, 1044420741) + + W(6, 1025280780, 1018002536, -1101036339, -1098081679) + + W(7, -1121954805, 1040437430, -1107604207, -1122583614); + sum2 = W(0, -1115281709, 1038308115, -1086190321, -1079410563) + + W(1, 1055352031, -1106071111, 1042421121, 999244165) + W(2, 1049351198, -1105447726, 1061131477, 1070791012) + + W(3, -1080774986, 1050939464, -1096576742, 1021655353) + + W(4, -1101546004, 1044927538, -1105825160, -1124003323) + + W(5, 1054621998, -1100497740, 1042910101, -1123561791) + + W(6, -1132663474, 1036918381, -1104633062, 1035199903) + + W(7, -1119052371, 1038597174, -1126972585, -1141223925); + WS(1057802399, -1081431823); + sum1 = W(0, -1122831751, -1131892376, 1041203877, 1034931072) + + W(1, 1020249162, 1023514439, -1183605377, -1102408215) + + W(2, -1093480916, -1097592621, -1087992005, -1086955360) + + W(3, -1094219074, -1099624791, -1104094712, -1103351679) + + W(4, 1050038833, 1050095280, 1058335651, 1062569267) + W(5, 1052617634, 1053253320, 1037479577, 1047799764) + + W(6, 1031929377, 1003466633, -1150646211, -1108251076) + + W(7, -1123920403, -1110979235, 1022403704, 1031403703); + sum2 = + W(0, -1104748298, 1035677532, -1097271446, -1071563129) + W(1, -1073203825, -1105574565, 1074066983, 1076846054) + + W(2, 1032567526, 1015597088, 1042180726, -1131466492) + W(3, -1088097679, 1049167232, -1104411749, 1054919113) + + W(4, 1030425414, -1106402955, 1036072793, 1026492053) + W(5, 1021201077, 1046304011, -1108010622, 1040423027) + + W(6, 1028805714, 1010146816, -1122721452, -1139412188) + + W(7, -1121571686, 1013464552, -1115684312, -1159010626); + WS(1057159391, -1098185256); + sum1 = W(0, -1115997840, -1139848194, -1110900998, 1040272534) + + W(1, -1124007614, 1022311680, -1122049771, 1025640163) + W(2, 1062337777, 1059107990, 1058844902, 1060188982) + + W(3, 1050305613, 1055455830, 1052112283, 1058488623) + + W(4, -1099145229, -1096884471, -1083284577, -1081840204) + + W(5, -1086268415, -1088086212, -1093503993, -1095187909) + + W(6, -1108105457, 1026934458, -1128707950, 1050661700) + + W(7, 1025671020, 1018441343, 1032949738, -1115150240); + sum2 = W(0, 1056249112, -1087901783, 1050240307, -1096100666) + + W(1, 1050011361, 1046757851, -1111613200, 1031802636) + + W(2, -1063663228, -1065703489, 1050922399, 1074103439) + W(3, 1078895155, 1075028289, 1011769164, 1041242105) + + W(4, -1090194820, 1049313091, 1059446328, -1091782161) + W(5, 1051412884, 999915799, -1097222567, 1040180956) + + W(6, -1098679170, 1049727089, -1118968312, 1043947681) + + W(7, -1103451878, 1046896549, 1036070334, -1120669738); + WS(-1076050352, 1072169512); + sum1 = W(0, -1112010473, -1110549812, 1041474168, -1102358403) + + W(1, 1035256808, -1103941101, -1126807163, -1127729580) + + W(2, -1127017300, -1096217239, -1101316183, -1098236010) + + W(3, -1097462672, 1015309803, -1109990262, -1124828306) + + W(4, 1032805651, 1055039585, -1107675851, 1058693100) + W(5, 1050718238, 1047069339, 1040782175, 1033573870) + + W(6, -1127012698, 1017026201, 1027743346, -1103052317) + + W(7, 1025923518, -1108585145, 1029935559, 1030091394); + sum2 = W(0, -1115222730, -1103808603, -1133607278, 1035011293) + + W(1, 1045426354, 1005150868, 1038138903, 1014354189) + W(2, 1013406324, 1044516259, 1056099238, -1093760521) + + W(3, -1097063741, 1046105407, -1106358807, 1034790704) + + W(4, 1048651312, -1091636530, -1082257366, 1065398243) + + W(5, 1052543946, -1105190986, -1143243436, -1124610775) + + W(6, -1105744683, 1036643390, 1056116581, -1087652578) + + W(7, -1106429625, 1031045850, -1129675875, 1033997397); + WS(1065781680, 1039008007); + sum1 = W(0, 1041247860, -1132228043, -1116017645, 1029031978) + + W(1, -1114552992, 1018467582, -1119698444, 1039500994) + W(2, 1052873269, 1052030887, 1057769989, 1065817909) + + W(3, 1059292430, 1052683781, 1045381867, 1042767258) + + W(4, -1087848896, -1090939837, -1086321043, -1090130600) + + W(5, -1093123180, -1089813139, -1093950448, -1088918334) + + W(6, -1122380132, 1043969975, -1167945460, 1043124941) + + W(7, 1037414446, 1037105136, 1016187596, -1139723011); + sum2 = W(0, 1038177586, 1024852101, 1031995320, 1016571646) + W(1, 1033741319, -1108656521, 1035134998, -1120710323) + + W(2, -1105284792, -1131141686, -1123455081, 999573799) + + W(3, -1107419527, 1052574782, -1099142208, -1103717942) + + W(4, -1114385091, -1105200928, 1057384109, 1073904332) + + W(5, 1075963065, -1130292726, -1072886122, -1070457563) + + W(6, 1023745983, 1044997454, -1102654886, 1045931554) + + W(7, -1113399825, 1041285787, -1101763500, 1044467987); + WS(-1090516543, -1087531312); + sum1 = W(0, -1105337757, -1135375282, -1113059195, -1111479074) + + W(1, 1044282546, 1036163039, -1123738614, -1110047807) + + W(2, 1040358473, 1025750638, 1034124454, -1082649657) + + W(3, -1098312091, -1114570479, -1112260786, 1044483760) + + W(4, 1026287071, -1109155234, -1111070925, -1113637434) + + W(5, 1056333811, 1044131649, 1049914271, 1039433994) + W(6, -1112133319, 1043065477, 1049735832, 1048811350) + + W(7, -1104740686, -1111303053, -1128712224, -1136462731); + sum2 = + W(0, -1122757257, 1009904581, -1113467627, -1099518373) + W(1, 1032421699, -1095677862, 1020800146, -1112743661) + + W(2, 1027164873, -1112829049, 1020364514, 1064179702) + W(3, -1092934445, 1039050377, -1106885375, 1022655162) + + W(4, 1002742314, -1115430893, -1098221748, 1059311413) + + W(5, -1139891301, 1041708988, -1124766450, 1032689603) + + W(6, -1111953049, -1116391321, 1025748661, -1098741962) + + W(7, 1036551469, -1119491053, 1025848261, -1122814953); + WS(1059528063, 1057564569); + sum1 = W(0, 993737087, 1023855952, 1051519336, -1100822612) + W(1, 1027584566, -1109486813, -1127107174, 1035631854) + + W(2, -1096201122, -1112481934, -1089271861, -1101379550) + + W(3, -1129312776, -1101173547, 1031993577, -1097162722) + + W(4, 1052073475, 1044928820, 1057871347, -1090210244) + W(5, 1027452795, 1051680014, 1038521522, 1050823828) + + W(6, 1036364765, -1119101474, 1026047377, -1095066725) + + W(7, 1043887545, 1038809676, -1152581582, 1037595092); + sum2 = + W(0, -1106514375, 1034094594, -1112949179, 1009317700) + W(1, -1126355148, 1021633582, 1010987268, -1106252693) + + W(2, -1115270098, 1029678117, -1095879627, 1052560633) + W(3, -1128108776, 1034084538, 1040297892, 1049223604) + + W(4, 1025588625, -1103891997, 1029083209, 1062070608) + W(5, -1100067512, 1035054672, -1117785257, 990783407) + + W(6, -1109616345, 1044342701, -1107320484, -1103446924) + + W(7, -1131810302, -1098743078, -1123491293, -1102218346); + WS(1061977215, -1122204685); + sum1 = W(0, -1124031333, -1109554099, -1105250724, 1036027519) + + W(1, 959521497, 1055353398, 1038275624, -1136942561) + W(2, 1053794732, 1053305789, 1058003360, 1059361796) + + W(3, 1055117810, 1051669497, 1037705519, 1047158647) + + W(4, -1090279300, -1096176108, -1093816822, -1081723655) + + W(5, -1090510080, -1097892492, -1101480172, -1098784634) + + W(6, 1017895407, 1027213295, 1024566052, 1033714163) + + W(7, -1125004641, 1006220452, -1112881733, -1145512357); + sum2 = W(0, -1113155109, 1032430477, -1105712018, 1048044904) + + W(1, -1081128362, -1070110178, -1092435520, -1092804751) + + W(2, 1028856727, 1021325630, 1050891972, 1070862297) + W(3, 1074403261, 1035516985, -1108990845, -1122256510) + + W(4, 993552399, -1128331832, 1042508384, 1042132916) + W(5, 1044319918, -1115800491, 1033559089, -1109033129) + + W(6, 1036996819, -1139715524, -1111526035, 1040898786) + + W(7, 1025048067, 1038841221, -1119663830, 1007165668); + WS(-1096328959, 1070879408); + sum1 = W(0, 1047483149, -1131726569, 1017405984, 1016192218) + + W(1, -1134185032, -1130210629, 1039283496, 1022798351) + W(2, 1056909741, 1050717891, 1049879450, 1064784543) + + W(3, 1054175424, 1055866677, 1052923070, 1050331269) + + W(4, -1084253114, -1097194674, -1084428714, -1088019878) + + W(5, -1087922381, -1096076442, -1098682228, -1089646420) + + W(6, 1043610432, -1120381614, 1029856307, 1046790989) + + W(7, 1044095416, -1106891777, 1045347898, -1103492015); + sum2 = + W(0, -1096731787, 1041094096, 1030029833, -1134738767) + W(1, -1112066554, 1049526473, -1103571503, 1038503381) + + W(2, -1092572599, -1083918511, -1081752188, 1050600846) + W(3, 1060518769, 1056806956, 1052566197, 1040255570) + + W(4, -1072213302, -1081783724, -1088492050, -1098909280) + + W(5, 1054813136, 1066457366, 1061557517, 1074348460) + W(6, 1074300082, 1061218424, 1062246408, 1055931246) + + W(7, 1030722387, -1080408758, -1082314750, -1074863778); + WS(-1093955647, 1022010191); + sum1 = W(0, 1027472455, -1118029508, 1017520774, -1100976252) + W(1, 1048038797, 1019653215, 1015004427, 1006149335) + + W(2, 1051129150, 1040858863, 1058284788, 1064764899) + W(3, -1094249656, 1054989120, -1137681219, 1047833420) + + W(4, -1126583382, -1110637352, -1083829535, -1092969620) + + W(5, -1093269900, -1098669306, -1101321193, -1092663640) + + W(6, -1110971745, -1129027011, 1043546956, 1032062845) + + W(7, 1049944248, -1114080636, 1039113958, -1120661351); + sum2 = W(0, -1125700855, 1010923567, -1097524591, -1086770251) + + W(1, -1119687254, -1136610695, 1034141799, -1146172877) + + W(2, -1109340305, -1099241242, -1097784845, 1061340682) + + W(3, -1091236020, 1059369820, -1103210716, 1042204527) + + W(4, 1041739108, -1121076246, -1122648580, 1059475190) + + W(5, -1097965354, 1048517727, 1020884025, -1117941982) + + W(6, -1138307987, 1010348245, 1043843262, -1104554020) + + W(7, 1042867821, -1105357691, 1021404807, 1026766467); + WS(1057685119, 1067243116); + sum1 = W(0, -1129459555, 1016923756, 1004671913, -1095493364) + + W(1, -1098633785, -1112175056, 1046138501, -1131471418) + + W(2, -1098308652, -1100618635, -1090418529, -1088191051) + + W(3, -1089905135, -1101363479, -1106099897, -1097291652) + + W(4, 1053516928, 1049587364, 1057639529, 1065512192) + W(5, 1058720438, 1052631981, 1043712557, 1052947846) + + W(6, 1029422892, -1111775882, -1114348966, -1112549294) + + W(7, -1103551521, -1111153757, 995391206, 1020473821); + sum2 = + W(0, -1131225411, -1130956898, -1103681442, -1077027280) + W(1, -1076028808, 1031803188, -1101294062, 949706042) + + W(2, -1140292462, 1023877693, 1042914798, 1069127322) + W(3, 1071847872, -1103125793, 1043076855, -1132087796) + + W(4, 1018110051, -1137158572, -1224504659, 1043315619) + + W(5, -1120948958, 1043270022, -1113817590, 1015417250) + + W(6, 1018006973, -1128428036, 1020723155, -1147115303) + W(7, 1020554514, -1133954299, 990350747, 1025989218); + WS(-1097928959, -1082976358); + sum1 = W(0, 1027664520, -1123678010, -1106569905, 1052451146) + + W(1, -1103556709, 1038567635, -1108799690, 1040222651) + W(2, 1057835343, 1050550727, 1057764410, 1062299789) + + W(3, 1049034640, 1057826487, 1041175724, 1060823717) + + W(4, -1089326063, -1096462854, -1083721351, -1087127986) + + W(5, -1097735566, -1094118206, -1100805265, -1089053734) + + W(6, -1108038941, 1032858337, 1021803093, 1042469806) + + W(7, -1104920125, 1031016631, -1129264698, -1107162784); + sum2 = + W(0, 1059738223, 1048150071, 1052511773, -1080186607) + W(1, -1078381471, -1072132845, -1071450961, -1071397334) + + W(2, -1087082495, -1095900368, -1108343021, 1063110279) + W(3, 1074904245, 1071639576, 1076364217, 1077341373) + + W(4, -1092378156, 1046247330, 1033545850, -1090059649) + + W(5, -1107095292, 1039712953, -1113577164, 1049211915) + W(6, 1034470847, 1025055065, 1045395587, -1094453880) + + W(7, 1053230483, -1103894182, 1032308634, -1120241798); + WS(-1093482751, 1040242403); + sum1 = W(0, -1101135214, -1123956286, -1107524598, -1138012647) + + W(1, 1034129455, 1042720691, -1130324184, 998898338) + W(2, 1051279307, 1045008905, 1049489177, -1132973569) + + W(3, -1096848358, -1098383795, -1104810279, -1118268600) + + W(4, -1160860306, -1106067054, -1101021318, -1113266773) + + W(5, 1049504725, 1059206814, 1049756222, 1052043149) + + W(6, -1112732162, -1135242283, 1029341500, -1121978915) + + W(7, -1098727290, -1102892456, -1104163113, -1099073308); + sum2 = W(0, 1050603120, -1101078445, 1048531671, -1105220793) + + W(1, -1091713187, 1034851673, -1122031169, 1020293663) + + W(2, 1055811875, -1122671355, 1038805789, 1066111209) + + W(3, 1026095418, -1130092003, -1137910742, -1094690648) + + W(4, -1086548128, -1087609936, -1088336312, 1071017234) + + W(5, -1115602704, -1089969509, -1121274059, 1053616941) + + W(6, 1034695967, -1108269543, -1089139562, -1104320030) + + W(7, -1100901763, 1049804877, -1103744367, 1054827086); + WS(-1112146683, -1077736475); + sum1 = + W(0, -1128590341, -1130792414, -1147973299, -1117834531) + W(1, 1007065251, 1033724609, 1024083798, -1122914526) + + W(2, -1099064529, -1090490649, -1086623314, -1101063739) + + W(3, -1084449173, -1094725676, -1097837640, -1097461352) + + W(4, 1049253908, 1028859267, 1040872136, 1068918685) + W(5, 1059458719, 1057886934, 1044653558, 1048946351) + + W(6, 1039097715, 1017948777, 1025770233, -1115616742) + W(7, -1104116690, -1132647657, 998082042, 1030833130); + sum2 = + W(0, 1038545045, -1104316542, -1098913527, 1054073276) + W(1, 990615436, -1111960943, -1127436605, -1131799749) + + W(2, 1044965772, -1073797636, -1069601753, 1079919017) + + W(3, 1065620582, -1103381017, 1039512539, -1105195767) + W(4, 1038508364, -1092377302, 1050236814, 1044931722) + + W(5, 1034922622, 1033398315, -1113162193, 1039327537) + W(6, 1032278560, -1115941315, -1109127211, 1042591612) + + W(7, 997021068, 1042220416, -1138197651, -1121388557); + WS(-1090505151, -1074550453); + sum1 = W(0, -1112838599, 1028498739, 1023419227, -1117182312) + + W(1, 1018948122, -1126145258, 1024711581, -1125422420) + + W(2, -1106878250, -1103427629, -1088602192, -1088941275) + + W(3, -1087514842, -1096920942, -1096615233, -1095073209) + + W(4, 1057615003, 1042960070, 1063645564, 1053033883) + W(5, 1058641540, 1043052986, 1050742275, 1053972531) + + W(6, -1131143051, 1023192930, -1096958907, 1048242260) + + W(7, -1098164287, 1045189988, -1105551137, 1026824959); + sum2 = W(0, 1051549752, 1081443218, 1072492444, 1048388769) + + W(1, 1035162847, -1139079606, -1121149576, -1123800689) + + W(2, 1021553919, -1111116920, -1073777225, -1067599847) + + W(3, -1096587672, 1029232562, -1115288719, 1042218632) + + W(4, -1106404197, 1044837835, -1113676512, -1089379201) + + W(5, -1109546151, 1030543674, -1122606689, -1125599605) + + W(6, 1031994963, -1113046369, 1032624041, 1025956540) + + W(7, 1027498280, -1132168419, 1034044071, -1115044631); + WS(1051119487, 1041244378); + sum1 = W(0, -1124974044, 1028120435, 1048876359, 1037118035) + + W(1, -1131477080, -1118427428, -1120234265, -1125985859) + + W(2, -1137238551, -1113671305, 1036964902, 1061504071) + W(3, 1047204942, 1050660570, 1030267847, 1049717597) + + W(4, -1116593698, -1103177170, -1099611151, -1104164974) + + W(5, -1081804989, -1114906180, -1098831475, -1103929310) + + W(6, -1121323393, 1033078132, -1156053771, -1129960608) + + W(7, 1048068278, 1011539382, 1035846233, -1115091743); + sum2 = W(0, 1018090488, -1104952272, -1105226064, -1128161694) + + W(1, -1134153828, 1024328280, -1118410723, -1117225997) + + W(2, 1027537220, 1048078363, 1055192952, -1102612387) + + W(3, -1102761937, -1101688104, 1035228556, 1016223417) + + W(4, 1019973834, -1095697091, -1101823824, 1074095781) + + W(5, -1074759173, 1063478353, -1092594125, 1041310640) + + W(6, -1117503915, 1045164128, -1106818318, -1087428157) + + W(7, 1061969021, -1092430375, 1050476000, -1110796930); + WS(1062303263, 1031082743); + sum1 = W(0, -1113216195, -1099215391, 1052151297, 1026092610) + + W(1, 1051942565, -1098039588, -1101893697, 1008063218) + W(2, 1050954912, 1012178225, 1065129152, 1047492794) + + W(3, 1063700475, 1049517720, 1028060738, 1054989200) + + W(4, -1095964628, -1095180416, -1088785601, -1089641431) + + W(5, -1089295544, -1091476104, -1102410709, -1096414030) + + W(6, -1121789260, 1027015379, 1041954507, -1107401421) + + W(7, 1049634587, -1112964196, 1035520455, -1114231491); + sum2 = W(0, 1033145987, -1111827051, -1103256184, -1063595607) + + W(1, -1101985731, -1141099147, -1136421288, 1030655939) + + W(2, -1175661064, -1152013649, 1050523165, 1084357731) + + W(3, 1048919805, 1025442742, -1145624409, -1133270551) + + W(4, 1021827842, -1104895942, 1034611383, -1094652398) + + W(5, 1029250458, -1102082303, 1021034188, -1121842648) + + W(6, -1165845202, -1187919119, -1125020091, 1042933311) + + W(7, -1108910008, 1040766920, -1126731425, 1017996705); + WS(1057958943, -1104691893); + sum1 = + W(0, -1111014615, -1128240917, -1146251723, -1095236657) + W(1, 1047913107, -1103203951, 1033356080, 1023941889) + + W(2, -1105424217, 1046008396, -1090082375, 1054329938) + W(3, 1003615948, 1013130820, 1044802714, -1101796865) + + W(4, 1044401084, -1100901945, 1057785284, 1040293502) + W(5, -1099597392, 1051440085, -1102604718, 1036253756) + + W(6, -1118336456, 1038452148, -1119525902, -1093887914) + + W(7, 1045607070, -1107674167, -1113361665, 1037874639); + sum2 = + W(0, 1010987018, -1105807463, -1102396693, 1054217500) + W(1, -1123211893, 1029628166, -1106898625, 1028744914) + + W(2, 1041119120, -1111572952, -1117220749, -1087890507) + + W(3, 1070205457, -1101117061, -1113015450, -1121743361) + + W(4, -1110188142, 1045029856, 1057260834, -1087358521) + + W(5, -1081682663, 1051101181, 1042553654, -1135525482) + W(6, -1127404253, 1029477872, 1019931869, 1056265374) + + W(7, -1091128546, -1124058781, 1025549544, 992401866); + WS(1063883327, -1171419961); + sum1 = W(0, -1131801844, 1021098616, 1022023532, -1121742227) + + W(1, -1090925859, 1024490200, -1102607801, -1123544461) + + W(2, 1049833398, -1123871229, 1009731296, 1044941350) + W(3, 1060653970, 1043051923, 1048967870, 1050859794) + + W(4, -1102183011, -1097479398, 1056730033, 1041013425) + + W(5, -1095529687, -1097251239, -1104750961, -1097963532) + + W(6, -1101850371, 1031224740, -1123343997, -1127214008) + + W(7, -1107029405, 1042080318, -1107657120, 1009310536); + sum2 = W(0, -1106082604, 1041248381, -1132935321, -1106587714) + + W(1, -1104172881, -1118244665, -1115368488, -1139264801) + + W(2, 1047581965, -1108094251, -1111186257, 1050591326) + + W(3, 1039028256, 1025922877, -1111866155, -1104769894) + + W(4, -1097195715, -1105642610, 1042983438, 1048334662) + + W(5, 1046273630, -1128737701, 1044426238, -1122157893) + + W(6, -1113941985, -1131232309, 1050509566, -1103031697) + + W(7, -1127691207, -1115007353, -1112751356, 1031938310); + WS(1064944927, 1037595256); + sum1 = W(0, -1116864018, 1023806679, 1017115436, 1009660557) + + W(1, 1028149000, -1109610248, -1148661421, -1102752831) + + W(2, -1089228760, -1095304482, -1087165700, -1086682215) + + W(3, -1088874517, -1099761351, -1116139780, -1131587330) + + W(4, 1063040538, 1057549004, 1060715233, 1064233001) + W(5, 1056983460, 1052917089, 1041500021, 1045020525) + + W(6, -1107296590, -1106875789, -1103245163, -1106392275) + + W(7, -1132085516, -1107160974, -1129006079, -1152394658); + sum2 = + W(0, 1045832950, -1110716731, 1025147898, -1098226948) + W(1, -1099749141, -1098250414, 1042544569, -1089530617) + + W(2, -1084008560, 1049995423, -1089165996, 1066542501) + W(3, 1049362032, 1055260506, -1086997229, 1067285885) + + W(4, -1084775302, -1093173621, 1054478967, 1068786857) + + W(5, 1047038722, -1126019654, -1127724076, -1088213452) + + W(6, 1058474163, -1102301410, -1128684365, 1031931484) + + W(7, -1091779956, -1105326436, 1053449115, -1090548395); + WS(-1082101344, -1089856493); + sum1 = W(0, 1050831902, -1112019073, 1030303546, -1129997454) + + W(1, -1110327174, -1117704076, -1112251093, -1131596868) + + W(2, -1105691402, 1030717682, 1055521064, 1064695661) + W(3, 1061962758, 1058754169, 1047731210, 1056158989) + + W(4, -1101015969, -1098775681, -1097748442, -1085520515) + + W(5, -1086646154, -1089610799, -1093958751, -1094349907) + + W(6, 1029275441, -1120813832, -1139280937, 965412887) + + W(7, 1041358014, -1122362798, 1028938947, 1016935004); + sum2 = + W(0, -1065128139, -1076824095, 1067439244, 1079260085) + W(1, 1056740148, -1103928218, -1135208116, 1039164015) + + W(2, -1101130808, 1050826865, 1046966330, 1049869634) + W(3, 1051236860, -1114183173, 1033256035, -1104627926) + + W(4, 1034565987, -1109355365, -1109830496, -1112614302) + + W(5, -1123646153, 1051726295, 1035002893, -1109091637) + W(6, -1119435550, 1023890205, 1012867020, 1015248786) + + W(7, -1122661198, -1136992532, -1119472799, -1136910724); + WS(-1090711679, 1035967541); + sum1 = W(0, -1111795270, 1015715144, -1118064815, -1124564760) + + W(1, 1033380937, 1041725555, 1039158191, 1036708611) + + W(2, -1104454427, 1029360119, -1084526809, -1083278737) + + W(3, -1083950349, -1091233609, -1102432923, -1097260861) + + W(4, 1050513973, 1050454265, 1063535365, 1060834888) + W(5, 1050327592, 1054941014, 1040417584, 1042099839) + + W(6, 1030482278, -1138269301, -1132452908, 1007072853) + + W(7, 1028355007, 1029566323, -1140656289, 1035976950); + sum2 = W(0, 1030795338, -1104698754, -1086482027, -1086674278) + + W(1, 1048689193, -1101252049, -1127107620, -1097010897) + + W(2, -1099705557, -1088930785, -1087114188, 1063363970) + + W(3, 1066258058, 1055138353, -1113111809, 1050900036) + + W(4, -1100315818, -1129318753, -1088371087, 1059258270) + + W(5, 1052412907, 1035784301, -1107499570, 1046424313) + W(6, -1124401732, 1047822852, 1041847465, 1007827043) + + W(7, -1114489829, 1039056173, 1036879119, -1112035485); + WS(-1098505599, -1075865372); + sum1 = W(0, -1119631078, 1024188415, -1125460739, -1113150415) + + W(1, -1098117352, 1028789822, -1115205937, -1134520549) + + W(2, -1131715643, 1032178901, 1056750314, 1055366257) + W(3, 1063530169, 1038512989, 1036256496, 1042179631) + + W(4, -1121509433, -1128987132, -1131145214, -1096447994) + + W(5, -1086227497, -1117796833, 1016180195, 1032402616) + + W(6, -1113178620, 1024883696, -1114020928, -1102601578) + + W(7, -1111953177, -1101470933, -1141768534, -1114797153); + sum2 = W(0, -1098762498, -1112826554, -1099883844, -1090598021) + + W(1, 1060690639, -1101866176, 1051055486, -1111565268) + W(2, 1039073687, 1047439072, 1024348972, 1068429254) + + W(3, -1078855206, 1052294530, -1095975199, 1034134076) + W(4, 1035541163, 1032611741, 1050751610, 1066295533) + + W(5, -1081223881, -1097770507, -1126362074, -1121819739) + + W(6, 1030962008, 981436096, 1048715774, -1089341195) + W(7, -1094541311, 1041458546, 1044471119, 1024624177); + WS(1057165023, -1106509195); + sum1 = W(0, -1110663097, 1023353031, 1009641338, -1116674588) + + W(1, -1121179137, -1127866586, 1019708412, -1105922998) + + W(2, -1090373040, -1094296089, -1087820187, -1090764772) + + W(3, -1101809276, -1095695571, -1097119704, -1088811352) + + W(4, 1057389257, 1051297995, 1064196683, 1064337073) + W(5, 1058239248, 1053143796, 1049182394, 1042633923) + + W(6, 1036322023, -1110472134, -1103525103, -1108191858) + + W(7, -1105116828, -1130262917, -1117889432, 1047174317); + sum2 = W(0, -1112942365, 1048482322, -1100072975, -1097118067) + + W(1, 991349250, -1108096389, 1053525633, 1000921969) + W(2, -1106822390, -1115190843, 1034045428, 1068163863) + + W(3, 1063618710, -1107942623, -1090492700, -1076676559) + + W(4, -1102090209, 1042851055, -1103979279, 1056641862) + + W(5, -1105663689, 1049912171, -1130278628, -1111476815) + + W(6, 1043730009, 1035412483, -1105576965, 1028572696) + + W(7, -1113934073, -1105853831, 1039916278, -1106999228); + WS(-1086493375, -1079336981); + sum1 = W(0, 977772865, -1123924042, -1109993624, -1107908519) + + W(1, 1036636329, 1038934102, 1034769240, -1099048778) + + W(2, -1102983835, -1108602044, 1041059645, -1094902379) + + W(3, -1083260999, -1084987377, -1102484540, -1108629750) + + W(4, 1026410857, -1098146918, -1106936850, 1061697492) + W(5, 1061962303, 1057432285, 1051050960, 1050989872) + + W(6, 1015269519, 1049374456, 1036994068, 1041174790) + + W(7, -1103125248, -1125738921, -1130650164, 1040252563); + sum2 = W(0, -1104970914, 1018133697, 1040842491, -1093587851) + + W(1, -1093237537, -1102375178, -1099804326, 1054809337) + + W(2, 1042634099, -1097448347, 1055662701, 1074010545) + + W(3, -1089724369, -1099392752, 1049771835, -1092159031) + + W(4, 1052960430, -1108255972, 1062139198, 1064732329) + + W(5, -1083390778, 1025575413, -1104347883, -1096888027) + + W(6, -1112577728, -1096543501, -1090800341, -1093194079) + + W(7, 1015865539, 991452982, -1126119433, 1028159711); + WS(-1088469887, 1068090411); + sum1 = W(0, -1113244581, -1123900274, 1049496346, -1110903326) + + W(1, -1119953152, 1031904934, 1034227191, 1025939062) + W(2, 1001301201, 1023861926, -1082959002, 1050701400) + + W(3, 1060902689, 1043052181, 1043817559, 1038313117) + W(4, -1115331947, 1035790701, 1057295251, 1048296635) + + W(5, -1081663133, -1113327662, -1110964279, 1034951606) + + W(6, -1124374314, -1105829923, -1096521435, -1119292644) + + W(7, 1048590454, 1027826066, -1106468266, -1117293966); + sum2 = + W(0, 1046922708, 1033420508, -1112865289, -1096930035) + W(1, -1101802092, 1017341032, 1036773342, -1107450994) + + W(2, -1106472578, -1114867820, -1087802072, 1058113813) + + W(3, 1050476308, -1103417477, -1105807138, 1033379085) + W(4, 1033443280, -1109208012, 1020845388, 1048730658) + + W(5, 1059721445, -1107242470, -1136542442, -1104249476) + + W(6, -1111979165, 1050643686, -1101832932, -1129026525) + + W(7, -1104558996, -1135034393, 1037376476, 1023773731); + WS(1052578175, -1092643724); + sum1 = + W(0, -1106584961, 1030893452, 1033955624, 1037358070) + W(1, 1036710537, -1128189432, -1118327615, -1129060558) + + W(2, -1102735475, -1100779014, -1082492039, -1086815439) + + W(3, -1088009424, -1095075429, -1106908643, -1102841939) + + W(4, 1053304083, 1049737317, 1060101900, 1065824329) + W(5, 1041943502, 1057719155, 1051335460, 1036237087) + + W(6, 993436516, 1037510015, -1101687426, -1103135573) + W(7, 1029815263, 1040440469, -1106074638, 1027836379); + sum2 = W(0, -1115440098, 1029771501, 1018065220, 999202537) + W(1, 1036811205, -1111720654, -1125064265, 1019498859) + + W(2, 1015710974, 1033337736, -1094728975, 1039453548) + W(3, 1050012979, 1032576025, -1103861773, 1040788513) + + W(4, 1041402597, -1114856806, 1037520917, 1064530169) + + W(5, -1094040689, 1073761042, 1063409170, -1090935047) + + W(6, -1130229431, 1034787000, -1082769104, -1074584982) + + W(7, -1090370130, 1032162938, -1105633547, 1034133532); + WS(-1104397694, 1058392920); + sum1 = W(0, 1012876448, -1126111401, -1142522206, -1115125816) + + W(1, -1103048369, 999529969, 1016404519, -1117545451) + W(2, 1051523414, 1050625047, 1043902243, 1063861817) + + W(3, 1056801600, 1053396405, 1042081404, 1054067798) + + W(4, -1094911217, -1106828476, -1089440467, -1093833780) + + W(5, -1087921625, -1107310585, -1104958721, -1097217844) + + W(6, 1023486907, 1033392977, -1119682235, 1019625524) + + W(7, -1096529922, 1015514383, 1039287934, -1113687714); + sum2 = W(0, -1130009672, 1027809153, -1106384796, 1051460883) + + W(1, -1104521112, 1033821004, -1112259960, 1028508475) + + W(2, 1013515790, -1106180134, 1042067106, -1094561503) + + W(3, 1032257325, -1124940473, -1168427353, -1120291522) + + W(4, 1013756514, 1023886448, 1052609397, 1078005003) + W(5, 1072194837, -1094906083, 1046140857, -1140922271) + + W(6, -1153587550, 1024050371, -1094102229, -1070026280) + + W(7, -1075632186, 1053282219, -1098407638, 1033718481); + WS(1063327007, -1121248448); + sum1 = W(0, 1017612078, 1043030576, 1039059525, -1115811128) + + W(1, -1095615210, -1099887459, -1140707177, -1120397682) + + W(2, -1113236667, -1096763312, -1091103118, 1053297652) + + W(3, 1063340055, 1054079124, 1041492110, -1126620524) + W(4, 1042967269, 1049218219, 1058517677, 1033504868) + + W(5, -1082785422, -1088587000, -1101731443, 1029718569) + + W(6, 1035231217, -1115130868, -1107367965, 1009543939) + + W(7, 1047061435, 1042859328, -1120288724, -1104957341); + sum2 = W(0, 1037909727, -1098298527, -1126569464, -1106955476) + + W(1, -1096969233, -1118592354, 1042882430, 1037216424) + + W(2, -1119813454, 1048025725, 1041835288, 1067026849) + + W(3, -1085288934, 1042571631, -1117602980, -1116313332) + + W(4, -1099212072, 1045330821, -1106229244, 1067954224) + + W(5, -1095495837, -1108169766, -1164085849, -1100305071) + + W(6, 1032936033, -1097673782, -1109675885, -1092590328) + + W(7, 1004812688, -1096486003, 1034064234, 1045459772); + WS(-1114652667, 1051933605); + sum1 = W(0, 1026700110, 1037508767, -1128033780, 1034155035) + + W(1, -1102462575, 1024543544, -1147042251, 1032374633) + + W(2, -1102428549, -1105027032, -1106431647, 1043250735) + + W(3, 1059213449, 1038435705, 1038748492, 1016229610) + W(4, 1043087092, 1048715843, 1062796090, -1089603768) + + W(5, -1090890112, -1106007014, -1119096106, -1104514210) + + W(6, -1109604752, -1113263092, -1098797270, 1023780992) + + W(7, -1112475685, -1121524254, 986589794, 1007891224); + sum2 = W(0, -1129141996, -1106880495, 1031191254, -1102244397) + + W(1, -1101040753, 1046080919, -1112883155, 1031234782) + + W(2, 1043866561, -1096921120, -1123077406, 1064276171) + + W(3, 1061828176, -1095750750, 1042185570, -1128566730) + + W(4, 1029019378, -1092013580, -1086673510, 1064706574) + + W(5, -1113110329, -1091369762, 1042119352, -1105256367) + + W(6, 1050935854, -1111274547, -1096956427, -1115781076) + + W(7, -1106057540, 1029696990, -1113619581, 1012473722); + WS(1059465279, -1101420399); + sum1 = W(0, 1033088680, -1124176798, 1014063089, 1035053989) + + W(1, 1045820204, 1038292302, -1115007182, -1131477704) + W(2, 1043444330, 1000916382, 1057398923, 1034190184) + + W(3, -1087528660, 1036787650, 1027988010, 1042732770) + + W(4, -1101682761, -1126261266, -1089602004, 1039770557) + + W(5, 1057859064, -1106381771, -1104773247, -1102530758) + + W(6, -1124073107, -1118729178, 1032573720, -1114558729) + + W(7, -1096323421, 1041172764, 1024683157, 1029511616); + sum2 = W(0, -1093261173, -1100338012, 1016590929, -1160318928) + + W(1, -1111261089, 1032510827, 1037987285, -1106728517) + + W(2, -1081648717, -1096055698, 1049309077, 1060463741) + + W(3, -1123677731, 1042247124, 1035681279, 1035240533) + W(4, -1107343332, 1016623505, 1058913944, 1061699617) + + W(5, -1095509190, 988275856, -1111758125, 979544481) + W(6, 1020947921, -1121572837, 1035038069, -1121635630) + + W(7, -1116667427, -1130213341, 1033159595, -1112486763); + WS(1060076127, 1072958059); + sum1 = W(0, -1103118654, 1043365506, -1115420333, 1031523031) + + W(1, 1033412302, 1032254402, 1031863013, -1127560355) + + W(2, -1090537599, -1097065432, -1086314390, -1088162978) + + W(3, -1089193875, -1091959462, -1111327644, -1086013855) + + W(4, 1060753617, 1046496158, 1060760746, 1055888777) + W(5, 1052645932, 1054239287, 1044639698, 1056703661) + + W(6, -1129584897, 1025963976, -1106610556, 1040410096) + + W(7, -1138875303, 1032279415, 1032182482, 1044337291); + sum2 = W(0, 1034238418, 1026722605, -1123083006, 1044566349) + + W(1, -1114043742, 1029836970, -1124457715, -1112207598) + + W(2, -1122429687, -1113650971, -1101416602, -1095747837) + + W(3, 1044375513, 1032414162, -1132025334, -1112893824) + W(4, 1038471498, 1047703041, 1050567163, 1073798913) + + W(5, 1074668356, 1076147895, 1076524889, 1071535287) + + W(6, -1105287037, -1106809975, -1099784158, -1077245954) + + W(7, -1072322090, -1070521946, -1071353549, -1076180558); + WS(-1114470395, -1114976351); + sum1 = W(0, 1017446070, 1040990896, -1129681934, -1120446036) + + W(1, -1097641691, -1110115727, -1110184487, 1033457695) + W(2, 991904560, 1041627584, 1047992216, 1061349282) + + W(3, 1061559014, 1055187519, 1045114861, 1047040436) + + W(4, 1034584479, -1107221597, -1115356259, -1083497834) + + W(5, -1089507216, -1098995485, -1109668803, -1107186297) + + W(6, -1110976571, -1099251857, -1111571481, 1039733326) + + W(7, 1036247843, -1114440077, -1115117640, -1120899067); + sum2 = + W(0, -1093678598, 1002676269, 1055748356, 1044340647) + W(1, -1118111722, -1120084888, 1001595675, -1123642691) + + W(2, -1084434022, 1057422860, 1066493380, -1079318791) + + W(3, -1116089022, 1044705221, -1112694927, 1031220790) + + W(4, 1064533419, 1067683118, -1078621547, -1079939901) + W(5, 1054002901, 1039926847, -1112070298, 1023903081) + + W(6, 1042743345, -1108978134, -1117769694, 1015093293) + W(7, 1049699290, 986467602, -1107171922, 1037662763); + WS(1048785023, 1039045299); + sum1 = W(0, -1104130562, 1043168659, -1117502116, 1029857553) + + W(1, 1034838603, 1027207566, 1024393315, -1130533431) + + W(2, -1090140327, -1100442863, -1085428843, -1093347651) + + W(3, -1089406196, -1093521697, -1120775857, -1086838999) + + W(4, 1061197562, 1040570004, 1062309944, 1050495728) + W(5, 1050259320, 1054560318, 1039464338, 1056208688) + + W(6, -1133671882, 1032880081, -1102819715, 1038995485) + + W(7, -1127149063, 1026681976, 1035433414, 1041402190); + sum2 = W(0, -1118465298, 1011712973, -1128433260, 1029767219) + W(1, 1021354231, 982009542, 1014788797, 1033540398) + + W(2, -1142302474, 1025023956, 1041730218, -1097771962) + + W(3, -1094677866, -1122047812, 1028525757, 1020814500) + + W(4, -1114395216, -1104707163, -1095905892, -1080533844) + + W(5, -1075595300, -1072373863, -1072719492, -1078361330) + + W(6, 1037219807, 1039062755, 1046574176, 1069124140) + W(7, 1073795377, 1075522683, 1074436097, 1068741752); + WS(1040594174, 1002085105); + sum1 = W(0, 1041861603, -1097746587, 1032164980, 1031577622) + W(1, -1102982955, 1044101411, 1034678964, 1026039088) + + W(2, -1113198522, 1055071640, -1088672026, 1050744420) + + W(3, 1057599999, -1103363988, 1036351259, 1034848362) + + W(4, -1137236930, -1100468781, 1047493466, -1105375081) + + W(5, -1087672398, 1050064611, -1102178050, 1033863059) + + W(6, 1041085614, -1121524679, -1109849488, 1048884604) + + W(7, -1113508732, -1098107838, 1043931332, -1106488144); + sum2 = W(0, -1099743490, 1035577370, -1115293072, -1102421679) + + W(1, 1038726510, 1032210717, -1107407193, -1120037205) + W(2, 1011906269, 1026663055, 1057096352, 979639118) + + W(3, 1044440375, 1032214892, 1028754425, -1123040177) + + W(4, 1037961444, -1152297667, -1095995741, 1050233559) + + W(5, 1022991474, -1139108605, -1115369483, -1104470915) + + W(6, -1108436276, 991169347, 1045881501, -1097952901) + + W(7, -1122343653, 1026937641, -1105994898, 1022469878); + WS(1066613200, -1123971367); + sum1 = W(0, 1021072883, -1125712504, 1041158633, -1106077125) + W(1, 1036847484, 1032267227, 1016782186, 1031886911) + + W(2, -1096708150, -1090955664, -1159875842, -1079911082) + + W(3, -1093258685, -1103560179, -1089925031, -1106725252) + + W(4, 1051074184, 1051398158, 1058934814, 1061682240) + W(5, 1055629981, 1054180539, 1052861897, 1047227273) + + W(6, 1026036124, -1108624754, 1040756057, -1090753503) + + W(7, -1114271443, 1025501945, -1105228376, 1043536294); + sum2 = W(0, -1120579723, -1104595791, 1060943463, -1081375340) + + W(1, 1048074442, 1049316438, -1088959333, 1054216959) + + W(2, -1102098813, 1059079365, -1078775742, 1070420847) + + W(3, -1106373275, -1084015554, 1065759760, -1087995034) + + W(4, -1103583809, 1035649855, 1055196914, 1036446407) + W(5, 1050043459, 1050724789, -1088425301, 1052608338) + + W(6, 1039910130, -1110832147, -1115424415, -1092581761) + + W(7, -1101688097, -1115969635, 1051846236, -1103333947); + WS(1015488492, -1082266482); + sum1 = W(0, -1115294884, 1041881447, -1116482020, 1028536641) + + W(1, 1044897017, -1108243791, 1033207234, 1015505847) + + W(2, -1096364772, 1036768790, -1083421327, -1079950554) + + W(3, -1088856963, -1096513911, -1104622888, -1096156044) + + W(4, 1054950795, 1051075280, 1062057653, 1057564011) + W(5, 1054170231, 1056273768, 1056919206, 1054606378) + + W(6, -1109722877, 1018280484, -1105734603, -1124656723) + + W(7, -1129161159, -1113027740, -1132093050, 1025701004); + sum2 = + W(0, -1103335141, -1087173612, 1045744538, -1106751239) + W(1, -1089877767, 1054342226, 1047940388, -1120076732) + + W(2, -1123809016, -1080484881, 1068142929, 1065979347) + + W(3, 1048007586, -1086806497, -1098712592, -1082944418) + W(4, 1056172663, 1047550056, 1051886373, 1064817240) + + W(5, 1065470183, -1098340366, -1102735881, -1088981287) + + W(6, -1093357581, -1112078382, -1102262951, -1097769156) + + W(7, -1095892093, 1033891425, -1105000175, 1059622670); + WS(-1089551423, -1072840444); + sum1 = W(0, 1041019406, 1022173115, -1113297010, 1015463395) + W(1, -1103473726, 1040814976, 1007312016, 1029778436) + + W(2, 1055991358, 1057526040, 1036785718, 1059475300) + W(3, 1056836657, 1050051043, 1050846086, 1053584058) + + W(4, -1086576149, -1100702364, -1093552675, -1089572736) + + W(5, -1091007140, -1090411145, -1098618125, -1097362556) + + W(6, 1027645528, -1141651861, 1016689751, 1031793146) + + W(7, -1125097480, 1041821422, 1015074267, -1104623366); + sum2 = W(0, 1069423067, 1072187327, 1072064387, 1072314518) + W(1, 1066704589, 1063804587, 1053139391, 1042659879) + + W(2, -1076703167, -1073855626, -1079134619, -1077864031) + + W(3, -1081021850, -1081933355, -1094622730, -1099654949) + + W(4, 1041694716, -1115348910, -1107764634, -1089838794) + + W(5, 1036250076, 1028352388, -1114321992, 1035224325) + W(6, 1032293264, 1008297781, -1120515458, 1037018177) + + W(7, -1121141704, 1025803257, -1123317222, 1027779018); + WS(1053893247, -1106302313); + sum1 = W(0, 1031892188, -1110506435, -1127073177, 1047997302) + + W(1, 1033243951, -1107646187, -1112186286, -1112631434) + + W(2, 1044357278, 1051962248, 1061666784, 1064449402) + W(3, 1041901099, 1053582704, 1037482077, 1050110011) + + W(4, -1093335703, -1098499127, -1085429924, -1082436051) + + W(5, -1118050391, -1104422127, -1118004236, -1103845867) + + W(6, -1124627014, -1121076939, 1038775118, 1042378233) + + W(7, -1100759903, -1107823800, -1134911574, -1111029158); + sum2 = + W(0, 1045534781, -1136883904, 1027794169, -1091599460) + W(1, -1105954111, -1110353364, 1026306116, -1105327762) + + W(2, -1106914963, -1105365809, -1087950271, 1049402813) + + W(3, 1059435817, 1056244223, -1106437513, 1049633694) + W(4, -1093884354, 1058094665, -1076257974, 1070891205) + + W(5, 1057695328, -1096007662, 1042940020, 1019026622) + W(6, 1048929407, -1099297773, 1057853479, -1101014716) + + W(7, -1088483015, -1116753268, -1107558514, -1111740866); + WS(1043803134, 1031905225); + sum1 = W(0, -1113675573, -1137620096, 1045527725, 1047471777) + + W(1, 1025442024, -1101766076, 1041363873, -1104559247) + W(2, 1045344728, 1015450714, 1036603287, 1057861093) + + W(3, 1007995990, 1048683192, -1103914745, 1053784172) + + W(4, -1093561052, 1039772705, -1087612823, -1131204708) + + W(5, -1109713128, -1098589181, -1142310312, -1096789826) + + W(6, -1125193272, -1142985628, 1043570724, 1032608022) + + W(7, -1125123818, 1017746514, -1137687994, -1120823545); + sum2 = W(0, -1102611800, 1042572795, -1143715563, 1047906835) + + W(1, 1034463821, -1097650393, 1044772028, -1105069125) + + W(2, 1052608919, -1112317124, 1036000712, 1058925768) + + W(3, -1089628524, -1097284017, -1090965565, 1053414419) + + W(4, -1092971881, 1050084573, -1099050864, 1062269077) + + W(5, -1100033854, -1155008883, 1043620595, -1099181286) + + W(6, 1033368250, -1099740129, 1046340985, -1106615878) + + W(7, -1143169701, 1034459058, -1102441858, -1145018737); + WS(1065606800, 1041895077); + sum1 = W(0, 1041785011, -1113293285, 1031552121, -1114204019) + + W(1, 1014586031, 1032452466, -1103346054, 1025447425) + W(2, 1041397665, 1050236482, 1063682744, 1055253222) + + W(3, 1057927896, 1054837340, 1043307490, 1058487624) + + W(4, -1108226929, -1089967688, -1092426598, -1085544687) + + W(5, -1092878524, -1094102329, -1097746379, -1098262983) + + W(6, -1113822832, -1105474774, 1045393909, -1106251932) + + W(7, 1018782386, -1135035573, -1114747425, -1119448485); + sum2 = W(0, 1038489223, 1032193624, -1137158564, 1048862753) + + W(1, -1107218869, -1112577633, 1034028859, -1110240510) + + W(2, -1094743304, -1106563024, 1029053577, -1103121169) + + W(3, 1018988618, 1015522148, -1107822714, 1017646246) + + W(4, -1079775068, -1075431018, -1075905408, 1061439417) + + W(5, 1061842524, 1055667456, 1032002642, 1047594841) + W(6, 1069957645, 1074314813, 1067854461, -1090496129) + + W(7, -1083145579, -1093776696, 1038458963, -1100108785); + WS(1050857279, 1035401177); + sum1 = W(0, 1029370009, -1121232094, -1118971861, -1116554354) + + W(1, -1102118870, 1035949272, -1118591397, -1155732684) + + W(2, 1049972466, 1045749679, 1054737744, 1064433520) + W(3, 1057059356, 1055161550, 1035470985, 1050840570) + + W(4, -1100844155, -1097541146, -1089471141, -1088203652) + + W(5, -1089223685, -1095333099, -1098430605, -1105548665) + + W(6, -1114359850, -1119608387, 1032044074, 1039513726) + + W(7, 1028806533, 1039358524, -1114373271, -1113669781); + sum2 = + W(0, 1034282103, -1111920318, 1023538768, -1133376386) + W(1, -1119100907, 1014776166, -1117007635, -1121555310) + + W(2, 1040723224, -1118965841, -1128437695, 1038672881) + W(3, 993338648, 1050613235, -1108740075, 1040276790) + + W(4, 1028955705, 1031483971, 1043259383, -1148076972) + W(5, -1084940093, 1050550535, -1098463376, 1054296012) + + W(6, -1104697256, -1123254751, 1034554870, -1078609243) + + W(7, -1075024478, -1100774866, 1069274341, 1072481706); + WS(1059068159, -1095269543); + sum1 = W(0, 1022243158, -1126585055, -1113503115, 1034732162) + W(1, 1029695369, 1045709362, 1005846771, 1049870212) + + W(2, 1058751902, 1052633434, 1061437964, 1062365313) + W(3, 1054753811, 1047783152, 1043895001, -1119990530) + + W(4, -1083015108, -1095802212, -1083734386, -1087136899) + + W(5, -1089074173, -1093733821, -1096742887, -1106170901) + + W(6, 1040310153, 1009201364, 1040208083, -1116554445) + + W(7, 1044292534, -1141695574, 1042140272, -1106848091); + sum2 = W(0, -1110453349, -1118716929, -1111084235, 1027079271) + + W(1, 1058163522, 1060433363, 1058751114, -1077532089) + W(2, 1048158098, -1117235638, 1037169053, 1044450691) + + W(3, -1086959067, -1096141012, -1102281652, 1061563184) + + W(4, 1064329884, -1122504669, 1037379653, -1106426334) + + W(5, -1081956377, -1081432626, -1086790317, 1073667932) + + W(6, -1096499673, 1023790996, 1035642078, -1103100998) + + W(7, 1059064502, -1109003581, 1048822766, -1089552462); + WS(-1079238176, -1098575359); + sum1 = W(0, -1122061272, 1012614613, 1049213476, -1091368434) + + W(1, 1050034455, -1115399887, -1105000541, 1040609063) + + W(2, 1040856756, -1098659523, 1050219649, 1046432350) + + W(3, -1092787824, 1045840143, -1102913028, -1104981396) + + W(4, -1106993315, 1049053518, -1088887964, 1061247706) + + W(5, -1113029435, -1096901418, 1045569657, -1106428832) + + W(6, -1113276757, -1110111537, 1048270483, -1098550683) + + W(7, 1053252708, -1120030007, -1144293604, 1037272948); + sum2 = W(0, 1035271628, 1033447544, -1113618952, -1104023060) + + W(1, 1022009109, -1104458884, 1044684178, -1104406640) + + W(2, -1096995906, 1032410306, 1053850685, 1054515359) + + W(3, -1114408860, 1030897990, -1115683354, -1106540716) + + W(4, -1097624644, 1048740225, -1109777102, 1058355960) + + W(5, -1139294461, -1116975083, -1099524388, 1036671037) + + W(6, 1051183735, -1095652467, 1037584786, -1116173683) + + W(7, -1096371453, 1030595307, -1117706649, -1108415342); + WS(1064155455, 1041078114); + sum1 = W(0, -1114884791, 1038026665, 1040569148, -1123095214) + + W(1, -1113346810, -1098688777, -1106297864, 1033022404) + + W(2, -1133388468, -1103926126, -1088265213, -1088521037) + + W(3, -1094483240, 1052099081, 1038941559, 1027852876) + W(4, 1031759625, 1051074474, 1049866386, 1060913876) + + W(5, 1044072016, 1034732568, -1138713347, 1019301565) + + W(6, -1116707016, -1135572861, -1145284192, -1110455500) + + W(7, -1116167073, 1029730485, 1017022612, 1028095637); + sum2 = + W(0, 1027468398, -1111732215, -1149402831, -1136133524) + W(1, 1058132410, -1084703700, -1103172541, 1011948492) + + W(2, -1120484835, -1115316427, 1042551085, -1101059653) + + W(3, 1067179636, -1093846517, 1015696434, -1119207903) + W(4, 1033713911, -1113615213, 1015721118, 1005178376) + + W(5, -1112798365, 1036573120, 1035924289, -1106241198) + + W(6, 1037828351, -1102210431, 1027887653, -1105049969) + + W(7, 1033627060, -1112780439, 1025169534, 1034168377); + WS(1056869759, -1097268032); + sum1 = W(0, 1018133293, -1105532941, 1027243330, -1097428310) + + W(1, -1099204213, -1133851766, -1138393927, -1118550618) + + W(2, 1036411431, 1007771751, -1116024394, 1032024955) + + W(3, -1130187397, 1034973653, -1114469768, -1118964077) + + W(4, 1039633317, 1034605623, 1050783724, 1010531964) + W(5, 1045069064, 1044277428, 1046520720, 1036629184) + + W(6, -1111689725, -1107937040, -1105991139, -1109581452) + + W(7, -1103861656, 1044990678, -1106353867, -1155661924); + sum2 = + W(0, 1031819015, -1113612713, 1040659447, -1113225510) + W(1, 1023788280, -1115035325, 1009487418, 1023947578) + + W(2, 1040620760, 1045122152, -1122637452, -1080510307) + + W(3, -1103769905, 1050277803, 1041168723, -1112728330) + + W(4, -1130428603, -1113774176, -1093905874, -1073670430) + + W(5, 1040925011, 1074164392, 1056579340, 1044922942) + W(6, -1130628701, -1118722775, 1015363301, -1129641903) + + W(7, 1045479736, 997650792, 1043373785, -1122436477); + WS(1064095487, 1025785067); + sum1 = W(0, 1026371284, -1097495743, 1050378694, 1008396404) + + W(1, 1042871873, -1103138663, -1136206974, 1034556564) + + W(2, -1106851535, 1042476562, -1104849584, -1085976658) + + W(3, -1097042470, -1109264236, 1035073279, -1094929447) + + W(4, -1128892272, 1049973404, -1115675316, 1058008769) + + W(5, -1098054097, 1052615848, 1037088776, 1045135411) + W(6, 1040514533, -1103190740, 1040022019, 1040395090) + + W(7, 1050097558, -1102548473, -1157051795, 1023679636); + sum2 = W(0, -1124722923, -1114618025, 1043439029, 1036363299) + + W(1, -1096133271, 1049502378, -1106999272, 1013819494) + + W(2, 1033420628, -1100145939, -1115804833, 1057589449) + + W(3, 1053618149, -1091358477, 1030436134, -1119548384) + + W(4, -1105442746, 1047309531, -1084852425, -1089136554) + + W(5, 1028964970, 1056312267, -1100909906, 1043379377) + W(6, 1029168594, 1048079041, -1101613108, 1042227257) + + W(7, 1045151454, 1041752672, 1036382707, -1113267324); + WS(1060109055, 1023402244); + sum1 = W(0, 1039025762, -1109278395, -1125210976, -1107337262) + + W(1, -1119314274, 1038278743, -1120902815, 1024999351) + W(2, 1051909299, 1054481389, 1061074499, 1069080830) + + W(3, 991275263, 1049054227, 1043342663, 1051802331) + + W(4, -1098089231, -1093104548, -1087624466, -1092709234) + + W(5, -1085745760, -1093068092, -1102438460, -1098889074) + + W(6, -1107880995, 1018380380, -1167447218, -1119879878) + + W(7, 1030906608, 1028271716, -1122586985, -1107737008); + sum2 = W(0, 995982518, 1033194341, 1038373562, 1040171687) + W(1, -1108098969, 1027047733, -1147696971, 1033796110) + + W(2, 1035578813, -1121421677, 1046888388, -1103362299) + + W(3, 1050255581, -1096418856, 1042040695, -1116244525) + + W(4, -1118279893, -1114972659, 1069469835, 1078927590) + + W(5, -1065023683, -1082214759, 1045909115, -1112457727) + + W(6, -1115504751, -1108849613, 1041670894, 1050450381) + + W(7, -1097423961, -1114805949, 964966831, 1025540676); + WS(-1092049407, -1078216845); + sum1 = W(0, 1019093687, 1030002432, -1100550858, 1055647027) + + W(1, -1103872766, 1041444210, -1115845919, 1042339236) + W(2, 1057591089, 1050969332, 1056027116, 1063070242) + + W(3, 1039049940, 1057957242, 1034942714, 1059965491) + + W(4, -1089127853, -1096074010, -1082843219, -1088645600) + + W(5, -1097545633, -1095349663, -1103665506, -1089209262) + + W(6, -1109784091, 1043174565, -1126191805, 1049441628) + + W(7, -1097650089, 1024042875, -1123963793, -1105758607); + sum2 = W(0, -1086426610, -1096143108, -1089951657, 1062855195) + + W(1, 1068897447, 1074016711, 1075026151, 1074160921) + W(2, 1062347208, 1055082976, 1045157118, 1048769854) + + W(3, -1073459143, -1078468086, -1071781830, -1072043968) + + W(4, 1053593730, -1101252044, -1111940928, 1037201995) + + W(5, -1091098547, -1119407664, 1031118734, -1095931446) + + W(6, -1108806990, -1138780047, -1099705126, 1058343455) + + W(7, -1094950430, 1046274438, -1128975824, 1035271906); + WS(-1092377983, 1032564911); + sum1 = W(0, 1026968586, 1001025293, -1101464865, -1099757657) + + W(1, -1099131534, 1021990836, -1124857771, 1016095931) + + W(2, 1051240140, -1135919542, 1047585891, 1046337282) + W(3, 1042168181, 1042765483, 1039475666, 1031087233) + + W(4, -1140824311, -1114206145, 1035412731, -1093593858) + + W(5, 1042300945, -1105010841, -1129901419, -1150842169) + + W(6, -1108814006, 1031981930, -1105914151, -1111723206) + + W(7, -1114245051, 1047084274, -1110153104, 1021006644); + sum2 = W(0, -1152594313, -1114711758, -1122222477, -1119994373) + + W(1, -1117872135, -1136114154, -1118889611, -1120239593) + + W(2, -1103837493, -1102219265, 1021928584, 1043085394) + + W(3, -1098150014, -1114380427, -1103586212, 999997765) + + W(4, -1119502913, 1049238470, 1051353268, 1069806319) + + W(5, 1040245198, -1084770114, -1097500544, 1032457879) + + W(6, 1035550709, -1111809635, 1038283603, -1113383091) + + W(7, 1038911980, -1106271331, -1109409577, -1130668925); + WS(1066805616, -1146277627); + sum1 = W(0, -1135230785, -1149155717, 1034010150, 1042337650) + W(1, -1127867356, 1036367663, 1011804270, 998521116) + + W(2, -1097679853, -1106566573, -1092037473, -1085421897) + + W(3, -1091217395, -1094061036, -1108406741, -1096997053) + + W(4, 1046488473, 1047676826, 1047117051, 1062693860) + W(5, 1049537116, 1056189429, 1017502967, 1049188263) + + W(6, 1036204713, -1098488427, -1103697017, 1057957971) + + W(7, -1114566390, 1034870184, -1103604141, 1029261884); + sum2 = W(0, -1122819855, 1029215207, -1109976389, 1044451524) + + W(1, -1130065951, 1027965847, -1119702735, 1036873692) + + W(2, 1035645255, -1111163705, 1026839491, -1095838714) + + W(3, -1147512571, -1156411358, -1110332882, -1114357123) + + W(4, -1107161682, 1019166485, -1129473215, 1082201538) + W(5, 1052773623, 1033233466, 1034307583, 1001850219) + + W(6, 1042480496, -1107159841, -1115971786, -1067229991) + + W(7, -1091650896, -1142132409, -1107215185, 1025815163); + WS(1040205182, 1040987841); + sum1 = W(0, 1032709552, 1028035406, -1152624047, 1047238622) + + W(1, -1110798127, -1160283945, -1129145589, 1035307633) + + W(2, 1050828331, 1054299632, 1000765841, 1064828620) + W(3, 1059217829, 1057730984, 1050797759, 1040620053) + + W(4, -1098789018, -1102534446, -1087146945, -1084918512) + + W(5, -1080798168, -1094152707, -1154982570, -1104995442) + + W(6, -1130845956, -1131566290, 1024089890, 1044521480) + + W(7, 1041701723, -1105870556, -1121779940, -1110323595); + sum2 = W(0, -1104834685, -1098615503, -1090508283, -1080862372) + + W(1, 1048874616, 1063492499, 1035824034, 1062512173) + W(2, 1039834326, 1050496819, 1045914856, 1070974917) + + W(3, -1085801376, -1081343708, 1033985362, -1089072248) + + W(4, 1040649104, 1034170750, -1118657109, -1102559609) + + W(5, 1041220807, 1047362948, -1099586035, -1130222490) + + W(6, -1113573633, -1119108596, -1128160390, -1095002000) + + W(7, 1053594264, 1022825140, 1041895669, -1115149866); + WS(-1087487423, 1060332710); + sum1 = + W(0, -1105878689, 1042437466, 1040585322, -1121111477) + W(1, -1112195269, -1112892785, -1122925576, 1012646976) + + W(2, -1095746139, -1089004262, -1081828961, -1099866351) + + W(3, -1115422148, 1039314504, -1117471766, -1108492170) + W(4, 1053913286, 1055197099, 1059867107, 1061690846) + + W(5, -1104536871, -1102951638, -1107184778, -1130420108) + + W(6, 1037671952, -1117440135, -1114610404, 1028605502) + W(7, 1040822242, 1041433743, 1048192776, 1029799624); + sum2 = W(0, 1017331955, -1104677463, -1117673238, -1094338757) + + W(1, -1119646366, -1104710153, -1107625181, 1022869095) + + W(2, -1097747777, 1040900608, -1081336501, 1073518733) + + W(3, -1103573743, -1106204508, 1041186571, -1097636016) + + W(4, -1097037835, 1016355419, -1089023490, 1068423959) + W(5, 1048344480, 1057220074, 1033091565, 1053959289) + + W(6, -1162485367, -1104406873, -1117660642, -1096388411) + + W(7, -1093216188, 1028492960, -1092192530, 1049060345); + WS(-1096756863, 1051174382); + sum1 = W(0, 1025372546, -1106905723, -1114360751, 1025624474) + + W(1, 1037998160, 1032794399, 1034992970, -1114244794) + + W(2, -1115668125, 1039995714, 1043101563, -1089490746) + + W(3, -1092891318, -1107085565, -1114405501, -1168885266) + + W(4, 1000611497, -1105014230, -1117013085, 1058174613) + + W(5, 1045000731, 1035643438, -1121180749, -1120880459) + + W(6, 1034391118, 1041916975, 1027806076, -1111998130) + W(7, 1008039732, 1030397185, 1018926829, 1029203969); + sum2 = + W(0, -1089960886, -1092452707, 1049541326, 1032119458) + W(1, 1032929374, -1110883627, -1123073435, 1026522309) + + W(2, -1095856011, -1106157919, 1074170001, -1085150802) + + W(3, 1046048366, -1112079347, 1027735021, -1111935169) + W(4, 1007316785, 1041476280, 1050706137, -1083810164) + + W(5, 1043182557, -1129593193, -1214669080, 1009058861) + + W(6, 1036775569, -1107942220, 1021239050, -1132331444) + + W(7, 1036225521, -1113579728, 1009639193, 1022066674); + WS(1063524863, 1010978751); + sum1 = W(0, 1033376264, 1035248130, 1029138555, 1040606570) + + W(1, -1146340206, -1122169062, -1113033206, -1115451138) + + W(2, -1090913225, -1094935586, -1084407830, -1085174115) + + W(3, -1080254739, -1094320024, 1033710501, 955403507) + W(4, 1054337940, 1045510498, 1057961148, 1062831778) + + W(5, 1060252175, 1058700983, 1045946247, 1033261261) + W(6, 1034437913, 1040021853, -1141033844, 1032741433) + + W(7, -1127614262, -1136275261, 1007782611, 1030604567); + sum2 = W(0, -1096400044, 1043917783, -1123678571, -1090244436) + + W(1, 1046901313, 1048870815, 1033884063, 1053211427) + W(2, 1053621644, 1009306318, 1051339656, 1062093709) + + W(3, -1080576226, -1106342004, -1098524562, -1124418811) + + W(4, 1017015195, 1005897821, -1141989437, 1065539532) + + W(5, -1083332175, -1097183812, -1097464341, -1092155120) + + W(6, 1037111777, -1106811629, 1047789047, -1093438727) + + W(7, 1049955112, 1043219374, 1050393105, 1044056917); + WS(-1085369887, -1080302329); + sum1 = W(0, 1021216124, -1121762938, -1103047566, -1094342263) + + W(1, -1105630871, -1107174593, 1025950537, -1146340105) + + W(2, -1112260523, -1119192737, 1050211017, 1058411732) + + W(3, -1139271463, 1042276856, -1113852108, -1119668597) + + W(4, 1035888345, 1049037307, 1061443675, 1044828010) + + W(5, -1119204121, -1100915542, -1174284878, -1119790119) + + W(6, -1110274640, -1111476420, -1097892613, -1097171068) + + W(7, -1112160677, 1026261917, -1110403297, -1132483222); + sum2 = + W(0, -1115512925, -1146353705, 1036066439, -1134998124) + + W(1, -1093678923, -1122018103, -1110317749, 1002801625) + W(2, 1035074177, 1040461813, 1042528734, 1058355837) + + W(3, -1081621864, -1089980594, 1049357806, -1101025299) + + W(4, -1191992884, 1043381354, 1059049535, 1058956516) + W(5, -1095612272, 1050844919, -1100731037, 1036998024) + + W(6, -1115656893, -1137020948, 1021899784, -1112812041) + + W(7, 1012903452, -1123282157, -1122890057, 1023124620); + WS(-1106521214, -1077666555); + sum1 = W(0, 1003321829, -1120901013, -1105222370, -1107011833) + + W(1, 1029823211, 1000924822, -1127794128, 1027793952) + W(2, 1052142545, 1052094618, 1060439250, 1059969903) + + W(3, -1095361591, 1039904896, 1000548911, 1046166153) + + W(4, -1102912320, -1105733097, -1085669204, -1113880763) + + W(5, 1052882623, -1107186882, -1118745688, -1096551831) + + W(6, -1108686032, -1118608535, 1045702647, -1106161276) + + W(7, -1103122454, -1106172071, -1113781993, -1132637458); + sum2 = W(0, 1034079401, -1123608512, 999851902, -1128074608) + + W(1, -1102099502, -1109589984, -1127758712, 1032460032) + + W(2, 1025264110, -1119278092, 1031056700, 1059405503) + + W(3, -1123208440, -1081290516, 1015173988, -1121242732) + + W(4, -1123371956, -1104246380, 1051208451, 1063218900) + + W(5, -1110007228, -1131612640, -1124651080, 1025045004) + + W(6, -1116991392, -1156408861, -1098806852, 1038716729) + + W(7, -1136590423, 1034885175, 1009682031, -1115963068); + WS(1061862175, -1081928168); + sum1 = W(0, -1105584175, 1024897880, 1038516044, 1040792761) + + W(1, 1041867802, -1146323132, 1033649803, -1113939311) + + W(2, -1105871745, -1093982770, -1092069365, -1083616743) + + W(3, -1089303929, -1089274268, -1106858177, -1096909360) + + W(4, 1043886056, 1052657028, 1055074797, 1065026281) + W(5, 1056987882, 1054263413, 1043742846, 1049053899) + + W(6, 1027658214, 992865878, -1121064957, -1106374835) + + W(7, 1015027694, -1132520748, -1137844354, 1017619030); + sum2 = W(0, 1078367032, 1056220347, -1078889751, -1073546091) + + W(1, -1104234997, -1129365336, -1132392998, -1105729449) + + W(2, 1027322757, 1044756153, -1096547764, -1101560565) + + W(3, 1042443775, 1048890957, -1104280017, 1028418751) + + W(4, 1023449836, -1148770222, 1043960988, -1126862726) + + W(5, 1029550531, -1133306383, -1130422948, 1042854212) + + W(6, -1115583846, 993742973, -1130380138, -1147412718) + + W(7, -1115461788, 1019814478, -1143007558, -1123464959); + WS(1052938943, -1103606318); + sum1 = W(0, -1114719934, -1110318513, 1051495307, -1100729633) + + W(1, 1058926789, -1090569589, 1050591187, -1120060704) + + W(2, -1087051802, -1108424920, -1085940948, -1082198177) + + W(3, -1090825863, -1088355266, -1096343413, -1090847836) + + W(4, 1058073278, 1044445625, 1060893495, 1058839039) + W(5, 1060585177, 1053564817, 1054456219, 1059253187) + + W(6, -1114447856, 1040769086, 1037779432, -1103818980) + + W(7, 1053070735, -1091337376, -1131842819, 1026807692); + sum2 = W(0, 1015621136, 1032703681, 1048700747, -1098355550) + + W(1, 1047054427, -1097901072, -1126056922, -1165737020) + + W(2, 1033006755, -1098289077, -1098808895, 1046984196) + + W(3, -1083721832, 1061110777, -1109410901, 1047097145) + + W(4, -1072583370, -1084108607, -1081224226, -1081041246) + + W(5, 1069053051, 1046858869, 1068664236, 1076043754) + W(6, 1074691924, 1067219112, 1064576163, 1066863963) + + W(7, -1084686190, -1086075738, -1079374171, -1071293986); + WS(-1083248351, 1041375270); + sum1 = W(0, 998616699, -1142901727, 1043282847, -1128981712) + + W(1, 1049050406, -1110272759, 1045114289, -1170526381) + + W(2, -1087439008, -1092067298, -1097778587, -1084179155) + + W(3, -1090979219, -1086455104, -1095775369, -1087021556) + + W(4, 1054645196, 1052452449, 1050602866, 1058025241) + W(5, 1061357806, 1053431193, 1052134295, 1053157267) + + W(6, 1045294540, -1128264947, 1042620067, -1146688137) + + W(7, -1127282486, -1125997182, 981342481, 1046767364); + sum2 = W(0, -1076239116, -1079299620, -1079042334, -1083365485) + + W(1, 1049726715, 1066758659, 1069675355, 1075688460) + W(2, 1068235144, 1068687973, 1068481951, 1066921525) + + W(3, -1106704213, -1080829299, -1077473679, -1073149491) + + W(4, 1054062613, 1019509447, 1057377046, -1100834259) + + W(5, -1095788298, -1107253929, -1112126912, -1090199615) + + W(6, -1114529265, -1116897962, 1041823371, 1032806429) + + W(7, -1104989857, -1107915189, 1021289065, 1034867372); + WS(-1089965567, 1041739713); + sum1 = W(0, 1014059625, -1109541748, -1120181702, -1113826001) + + W(1, -1099389050, -1107441048, -1110918155, -1129336123) + + W(2, 1050606464, 1046384755, 1052952467, 1062759283) + W(3, 1061539071, 1056969105, 1043585729, 1054439728) + + W(4, -1097348868, -1098095776, -1098227712, -1097909490) + + W(5, -1089710746, -1097990985, -1112431723, -1101291373) + + W(6, 1016608410, -1110057610, -1165355548, -1103909923) + + W(7, 1018410859, -1097884829, -1109883584, -1118732474); + sum2 = W(0, 1035083591, -1112359157, 1037205500, -1119560270) + + W(1, -1130432545, 1033928454, -1111608107, 1027763767) + + W(2, -1113211599, 1037267536, -1110383577, 1053336900) + + W(3, 1036846600, 1029308948, 1035214305, -1137000642) + W(4, 1032203968, -1129901649, 1050641640, 1062183610) + + W(5, 1052549520, 1037218786, -1098666781, 1041216407) + W(6, -1112771443, 999833884, -1113848439, 1048127168) + + W(7, -1087091182, -1080175934, -1106600446, -1105808372); + WS(1040803966, -1079223548); + sum1 = W(0, -1103550802, -1113401739, -1105545077, -1095164520) + + W(1, -1102562113, -1106149592, -1123094707, -1105989383) + + W(2, -1108233168, -1097729981, -1112466052, 1056711305) + + W(3, 1033129898, -1100704991, -1106323272, -1109921151) + + W(4, -1115946235, 1016715200, 1051122342, 1072346450) + W(5, 1046866637, 1041579689, 1017795558, 1019070562) + + W(6, 1021078213, -1123835458, -1103483180, -1092700173) + + W(7, -1104392485, 1033532004, -1128003929, -1109398092); + sum2 = + W(0, -1132003761, -1126076753, 1028715469, -1094940877) + W(1, -1096054213, 1050064171, -1095828963, 1024912709) + + W(2, -1097131135, -1098724323, -1094337061, 1075670208) + + W(3, -1092647285, -1119763725, -1104609741, -1103386045) + + W(4, 1033952970, -1102131199, 1043946979, 1037045260) + W(5, 1052392413, -1101691459, -1104710657, 1042676905) + + W(6, -1114013558, 1027012993, -1109613274, 1026138301) + + W(7, -1107006741, 1032910718, -1127911473, -1109320626); + WS(-1077711088, -1080462700); + sum1 = W(0, 1016976667, -1143635214, 1050435883, 1025648038) + + W(1, 1033190198, -1119077035, -1127874241, -1124293633) + + W(2, -1095463505, -1094028818, -1084473735, -1081613943) + + W(3, -1092817267, -1089963906, -1112556483, -1096263708) + + W(4, 1048382760, 1043759401, 1060944248, 1061061657) + W(5, 1059085444, 1058923652, 1024757337, 1052630131) + + W(6, 1033669226, 1023995271, 1038367764, -1104130812) + W(7, 1035230572, -1115548134, 998948881, 1035279350); + sum2 = W(0, -1118714905, -1128906105, -1104617441, -1102497202) + + W(1, 1052988680, 1025452854, -1147569173, 1045309195) + W(2, 1038737245, 1033599435, 1047074065, -1081197801) + + W(3, 1051590837, 1036478618, 1034730428, -1096538670) + + W(4, 1045939428, -1124872105, 1073600723, -1069501955) + + W(5, -1078764847, -1078626366, -1094352359, -1097394746) + + W(6, -1084863446, -1120736313, 1072627800, 1076529178) + + W(7, 1066278053, -1115729701, -1119746081, 1001372341); + WS(-1085605823, 1039908372); + sum1 = W(0, -1110519177, 998302542, -1105109851, 1051114493) + W(1, 1047758692, 1047411644, 1008966514, 1027414133) + + W(2, -1101464226, -1097280531, 1050325520, -1093718398) + + W(3, -1109283231, -1103804777, -1107759461, -1099881946) + + W(4, 1050281295, 1040574050, -1112501642, -1095999987) + + W(5, 1052111506, 1013903280, -1131548857, 1037344125) + W(6, -1136474287, 1017687606, 1027599892, 1038451002) + + W(7, -1112873934, 1026946257, 1007362713, 1030395745); + sum2 = W(0, -1115357326, -1129481498, 1037423788, -1149109833) + + W(1, -1107237807, -1098320683, -1118016391, -1111060805) + + W(2, -1098998597, -1104582263, -1098625511, 1036322812) + + W(3, 1050926614, 1045914812, 1029306918, 1035368549) + W(4, 1033193484, 1041990652, 1052524895, 1029624616) + + W(5, -1102468922, 1016354632, 1029333966, 1036818044) + + W(6, -1119934044, -1109942748, 1028176618, 1032249206) + + W(7, -1117055607, 1026298641, -1116685452, -1113790705); + WS(1066221936, -1112686252); + sum1 = + W(0, -1111426931, 1049363148, -1090113310, 1038796336) + W(1, -1102958241, 1041899238, -1104001476, -1112879133) + + W(2, 1042737360, -1108750629, 1053136109, -1095853365) + + W(3, -1108890233, 1023718298, 1033191264, -1165972390) + W(4, 1039181581, -1105687720, 1051677110, 1031102479) + + W(5, 1058267293, -1089545128, 1047293124, 1010166848) + W(6, -1110677245, 1035534704, -1101864162, 1019901392) + + W(7, -1107302241, 1048841797, -1106653499, 989728967); + sum2 = + W(0, 1019579309, -1115216952, -1101014764, 1035949455) + W(1, -1125343389, 1020295703, -1107189524, 1032252124) + + W(2, -1121628281, 1041466962, 1044350744, -1096356807) + W(3, 1019949697, 1011899626, 1020414781, -1108620092) + + W(4, -1104132268, -1110659898, -1092586315, 1067161402) + + W(5, 1044980625, 1028945397, 1024223321, -1106879950) + + W(6, -1133616314, 1046938970, -1089581193, -1097777539) + + W(7, 1025080038, 1042942659, 1000298789, 1034429670); + WS(1065647552, 1042223795); + sum1 = W(0, -1118213891, 1047859212, -1102035956, 1049817209) + + W(1, -1136727619, 1018314812, 1048950515, -1111834851) + + W(2, -1090185691, -1099254654, -1085546115, -1090115457) + + W(3, -1086839140, -1089622500, -1103143659, -1088857895) + + W(4, 1054600851, 1056675965, 1052764224, 1060918524) + W(5, 1050058698, 1053194074, 1055030644, 1046059111) + + W(6, 1042956769, 1045938459, -1095542592, 1048397149) + + W(7, -1107166488, -1115710386, 1046421002, 1019098869); + sum2 = W(0, 1038047795, -1100534663, 1043709757, -1098906327) + + W(1, 1054998296, -1097979318, 1042447686, -1135615252) + + W(2, 1045715537, -1109612723, -1109559353, -1088589586) + + W(3, 1046750383, 1030039799, -1112135838, -1108474833) + W(4, 1062099960, 1074456323, 1074047524, 1061406543) + + W(5, -1109444720, -1095255331, -1106577876, -1098188594) + + W(6, -1074982428, -1071944747, -1086420775, -1087600981) + + W(7, 1058024333, 1040647828, 1042372587, 1051479186); + WS(-1099261566, -1102058551); + sum1 = W(0, 1037460750, -1118752449, 1048855473, 1046116085) + W(1, 1041532007, 1043443965, 1035797482, 1048656673) + + W(2, -1099740733, 1038344912, -1097823268, -1077722267) + + W(3, -1086845489, -1109540512, -1112472539, 1006300060) + + W(4, 1035868896, 1028635221, -1135561696, -1089090112) + + W(5, 1054063629, 1036245196, 1039520068, -1115462467) + W(6, 1046250530, 1040547388, 1049218244, 1053686776) + + W(7, 1041462458, 1041323791, -1116084210, 1050214602); + sum2 = W(0, -1107023704, -1103130345, 1025081245, 1007138702) + + W(1, 1020633935, 1042694232, -1116958023, -1117764771) + + W(2, -1099880087, 1049640504, -1099241173, 1060797185) + + W(3, -1096100738, 1034621436, -1100840753, 1051814059) + + W(4, 1047853298, -1100868353, 1050410790, -1096656068) + + W(5, 1053061345, -1092716332, 1057695342, -1092707384) + + W(6, -1106650230, 1035820630, -1111373979, 1048085306) + + W(7, -1098540289, 1045275377, -1095110662, 1026583068); + WS(-1081437504, -1086037448); + sum1 = W(0, 1002898847, 1034161364, -1097339820, -1102052735) + + W(1, -1122224578, -1116847174, -1124219870, -1114337484) + + W(2, 1048903167, 1048827465, 1058952469, 1062493730) + W(3, 1056452878, 1052097743, 1046287775, 1051622635) + + W(4, -1098037791, -1110924302, -1089826679, -1082951605) + + W(5, -1095802941, -1093057352, -1115921615, -1097214606) + + W(6, -1124004134, 1038960439, -1109622563, 1046141833) + + W(7, 1037494008, -1142221339, 1018754897, -1115514650); + sum2 = W(0, 1026296217, 1046658552, -1092482457, 1048926974) + + W(1, -1093986590, 1051240634, 1031417427, -1116201775) + + W(2, -1098267531, 1054123387, 1031912087, -1103258874) + + W(3, 1064081451, -1081783631, 1052301313, -1136084293) + + W(4, 1056065541, -1080723543, 1068312252, -1095327126) + + W(5, -1101319294, 1054428583, -1095108256, 1032992382) + + W(6, -1107249906, 1049528827, -1094476247, -1114829012) + + W(7, -1110151549, -1109929036, 1023898518, -1131461985); + WS(1059931039, 1045326059); + sum1 = W(0, -1105339195, -1116497429, -1104905094, 1048600749) + + W(1, 1017781140, 1028510504, -1104151378, 1006123003) + + W(2, 1027626820, -1102542884, 1042432944, -1087368655) + + W(3, -1090841955, -1098837680, -1135818261, -1102238241) + + W(4, 1035394960, 1045819641, 1050447189, 1050459387) + W(5, 1058872902, 1038310627, 1009721182, 1039487803) + + W(6, 1020852317, -1110241387, -1120454715, 1028063406) + + W(7, -1135766366, 1046575867, -1121800177, -1138241103); + sum2 = W(0, -1108715925, -1125695753, -1106861101, 1037467647) + + W(1, 1049447879, -1108013135, 1016159100, 1025122350) + + W(2, 1033521028, -1107049318, -1126359337, 1024736242) + + W(3, -1096919459, 1030099711, 1038861546, 1027296385) + W(4, -1130002309, 1043711791, 1069432650, 1069236178) + + W(5, -1089672176, -1104043463, -1102967963, -1115278753) + + W(6, 1043372642, -1096562690, 1055907536, -1077602923) + + W(7, -1079492159, 1048192702, 1041544949, 1038199238); + WS(1065273279, -1155200022); + sum1 = W(0, 1032043190, 1042748400, 1044363154, -1101712885) + W(1, 1020290996, 1042545917, 1023679047, 969146311) + + W(2, -1110724474, -1113644224, -1103694886, 1049530899) + + W(3, 1032343156, -1119655327, -1113927803, 1014066252) + + W(4, 1048813356, -1113329098, -1138873982, 1013956115) + + W(5, -1088512539, -1118975351, -1112426340, -1102619473) + + W(6, -1107914275, 1017813644, -1119126921, 1030506334) + + W(7, 1047677002, 1035449518, 1040385292, 1037002750); + sum2 = + W(0, -1105379539, -1114326988, -1128505094, -1127815582) + W(1, -1144273240, 1011114412, 1010998384, 1008109660) + + W(2, 1040024171, 1033596237, 1027874005, -1090164335) + W(3, 1026082941, -1098155637, 1010514392, -1110656550) + + W(4, 1054804566, -1100858724, -1108932212, 1015111122) + W(5, 1057491035, -1123558275, 1041639386, 1025837089) + + W(6, 1020126178, 1058095820, -1095228668, -1114002666) + + W(7, -1109234902, -1117423595, -1148517800, 1028462436); + WS(1064822335, -1095840736); + sum1 = W(0, 1049019197, -1114924672, 1006539024, 1027273471) + W(1, 1024001604, 1023329786, 1034076369, 1032078653) + + W(2, 1056391992, 1052717533, 1053226600, 1063307800) + W(3, 1051465786, 1056189595, 1051405604, 1049885284) + + W(4, -1082098174, -1097259572, -1083318869, -1085358981) + + W(5, -1093212548, -1096912972, -1097995110, -1090164049) + + W(6, 1047279757, -1158101509, 1028150783, 1048033194) + + W(7, 1040827508, -1122795750, 1040247311, -1102713147); + sum2 = W(0, 1044291516, -1105536143, -1123279074, -1102729189) + + W(1, 1019015608, -1098099014, 1041233257, -1116414073) + + W(2, 1057945825, 1059138923, 1064153610, -1117658388) + + W(3, -1082824416, -1088923796, -1114762777, -1093119165) + + W(4, 1075245425, 1063262829, 1058517731, 1055818216) + + W(5, -1095331241, -1082492094, -1084810008, -1076437009) + + W(6, -1075047571, -1084742426, -1087050661, -1094508761) + + W(7, -1107921801, 1066223948, 1065694420, 1070896480); + WS(-1090760447, 1052695066); + sum1 = W(0, 1032827391, 1004836815, 1040820154, 1033221771) + W(1, -1142824410, 1019263487, 1033909728, 1030288497) + + W(2, -1086160295, -1100507696, -1085279840, -1085951050) + + W(3, -1090476887, -1096291378, -1104244158, -1096098979) + + W(4, 1058956361, 1050270371, 1061054293, 1060000376) + W(5, 1053225609, 1052048107, 1044967298, -1123372967) + + W(6, -1122589400, 1011775197, -1126907788, -1109718672) + + W(7, 1031459881, 1014310644, 1002236306, 1050105184); + sum2 = W(0, -1115548553, 1010795990, 1025233265, -1108773712) + + W(1, 1028577219, -1123367926, 1027045545, 1016487959) + + W(2, -1119133376, 1054475337, -1126101085, -1096206384) + + W(3, 1025243447, -1123699076, -1116348436, 1026289255) + W(4, 1034230379, 1049648601, 1031862009, 1056453743) + + W(5, -1092511031, 1040231624, 1052333940, -1098871930) + + W(6, -1106450540, -1130742967, 1074765700, 1076253468) + + W(7, 1067142167, -1082850789, -1074864896, -1066441953); + WS(-1098442559, 1026469881); + sum1 = W(0, -1149983818, -1119522441, -1111872220, -1148708523) + + W(1, 1033974589, 1034687077, -1121497336, 1040191316) + W(2, 1050517915, 1049663660, 1057681670, 1054270423) + + W(3, 1065584902, 1060962908, 1043551013, 1037520938) + + W(4, -1114000872, -1102209381, -1097110004, -1080449641) + + W(5, -1089390067, -1088971425, -1096207069, -1098201593) + + W(6, -1106643136, -1107005098, -1107624656, 1044397088) + + W(7, 1040137492, 1011262833, -1152197144, -1111636393); + sum2 = W(0, -1093666199, 1049995354, -1104373549, -1108191449) + + W(1, -1086862321, -1109961150, -1105617480, 1057348983) + + W(2, 1067471948, -1083836095, 1067519925, 1075025493) + + W(3, -1079149522, -1092122258, -1118106953, -1090446878) + + W(4, 1044138823, -1089657247, 1068116530, 1070698122) + + W(5, -1077256496, -1092830464, -1104600439, -1095892285) + + W(6, -1087858714, 1040673003, -1086283540, -1107890740) + + W(7, 1052510667, 1044801958, -1113062539, 1040940184); + WS(-1089367999, -1080592817); + sum1 = W(0, 1050901372, -1142600967, 1043029392, 1042662963) + W(1, -1131715556, 1028335344, 1009439676, 1040529906) + + W(2, 1020565079, 1046045566, 1051791856, 1059303201) + W(3, 1060541458, 1057651322, 1050310918, 1061742083) + + W(4, -1098563453, -1094642094, -1092783656, -1082254536) + + W(5, -1088235102, -1083519131, -1088554180, -1084895582) + + W(6, -1123752726, 1032349343, 1017360893, 1040706037) + W(7, 1038984656, 1042767141, 1039511244, 1037250730); + sum2 = W(0, -1086948904, 1044777691, -1085906527, -1107333159) + + W(1, -1094251850, -1101622483, 1054103540, -1095334551) + + W(2, 1072921984, -1096214776, 1069427540, 1069014322) + + W(3, 1068759557, -1095651744, -1080635176, -1071443034) + + W(4, -1102597600, -1086612116, 1055370552, 1068187895) + + W(5, 1049876941, -1098339283, -1098368915, -1119710681) + + W(6, -1081008410, 1053678014, -1094318103, 1040782350) + + W(7, -1097053445, 1052418274, -1116179967, 1057760894); + WS(-1072329816, 1074376722); + sum1 = W(0, -1119826815, 1034466519, -1121811611, 1040071236) + W(1, 1034412551, -1125783811, 1025497578, 989295697) + + W(2, -1097135645, -1100415357, -1123247055, -1078161261) + + W(3, -1086793716, -1092987820, -1117997032, -1097292445) + + W(4, 1051061560, 1050212013, 1057003368, 1050094223) + W(5, 1060987117, 1053182609, 1030369595, 1040352158) + + W(6, 1001332238, 1009264740, -1119531350, 1047608180) + + W(7, -1139666027, 1053959834, -1130537576, 1003451511); + sum2 = W(0, 1010850411, -1119692159, -1110113403, 1041465994) + + W(1, 1019542894, 1041361680, -1128076954, 1035827461) + W(2, 1033963983, 1024673399, -1107122545, 1041917938) + + W(3, 1031918417, 1016152214, -1119110501, -1108745692) + + W(4, 1041326744, -1126975570, 1049177676, 1075051781) + + W(5, 1057547658, -1100210466, -1101468559, -1111890885) + + W(6, -1110591524, 1039643589, -1098235771, 1061676083) + + W(7, -1072022345, -1080793678, 1038326573, -1115279667); + WS(-1089734463, 1065567745); + sum1 = W(0, -1115310942, 1048676446, 1051913151, 1041943745) + + W(1, 1032260949, -1123414147, 1025799735, -1118314271) + + W(2, -1089748010, 1033541431, -1091844976, -1078026925) + + W(3, -1083863979, -1088695462, -1097284573, -1090712276) + + W(4, 1053493076, 1047890913, 1060749357, 1057682191) + W(5, 1060855949, 1057895644, 1049355215, 1052101531) + + W(6, 1041536229, -1111760907, 1031051700, 1033727319) + + W(7, -1117552023, -1123512523, -1134918146, 1037093278); + sum2 = W(0, -1087048499, -1089227831, -1081828818, -1106022394) + + W(1, 1057638724, 1053589051, 1030725267, 1050363813) + W(2, 1067110585, -1073947481, -1100054441, 1057992187) + + W(3, -1096117153, 1049824990, 1008667589, 1047082764) + W(4, 1062551012, -1107011493, 1044700801, 1053279159) + + W(5, 1047077354, -1098858111, 1055551992, -1093863007) + + W(6, -1088705293, 1063056164, -1091908804, 1049660447) + + W(7, 1041467137, -1112699600, 991046018, -1118599281); + WS(-1079109040, -1085312521); + sum1 = W(0, 1023769582, -1140282322, -1105105824, 1041551140) + + W(1, 1030694836, -1111084358, 955351653, -1133450021) + W(2, 1037365911, -1117579705, 1052645482, 1061549365) + + W(3, -1092653220, 1050958145, -1113919416, 1047664699) + + W(4, -1113299189, 1042794748, -1086336671, -1087637436) + + W(5, 1051894263, -1111723928, 1039072513, -1111528112) + + W(6, -1127522065, -1098682508, 1041518392, 1049784787) + + W(7, -1104535548, -1112540051, 997865134, -1121426302); + sum2 = + W(0, -1112127727, 1037755147, -1117076760, -1096944427) + W(1, 920706880, -1126463908, 1025583781, 1016301114) + + W(2, -1182735741, -1116775242, -1110833901, 1050960349) + + W(3, 1057824022, 1025802869, -1117624689, -1106817101) + W(4, 1027967369, -1108084106, 1047419656, 1057406540) + + W(5, -1106635563, 1030718323, -1129307942, -1146210056) + + W(6, -1099828018, -1106208929, -1106621688, -1093670465) + + W(7, 1046907280, -1104727129, 1024731263, -1133403952); + WS(1066445424, -1114782683); + sum1 = W(0, -1105169880, 1037849343, -1111719878, 1053025475) + + W(1, -1106443352, -1118185334, -1115180998, -1104379365) + + W(2, -1110498524, -1092096849, -1089773556, -1093199580) + + W(3, -1085224723, 1017432168, -1097477655, -1120659192) + + W(4, 1042140161, 1045982494, 1059787797, 1058443885) + W(5, 1058982105, 1051015495, 1049335790, 1047806371) + + W(6, 1033176686, 1027992139, -1104315937, 1034263873) + + W(7, -1099213011, 1015617473, -1134277975, -1124174113); + sum2 = + W(0, 1035156885, 1010861092, 1053142713, 1050643378) + W(1, -1093995815, -1100307988, -1094190457, -1095534688) + + W(2, -1104443938, -1111353322, -1092519845, 1033492110) + + W(3, 1061622434, 1045835461, 1054276307, -1118030035) + W(4, 1020765936, -1131256669, 1043066295, -1093594884) + + W(5, -1090891830, 1051701313, -1115303200, 1044764145) + + W(6, 1034215267, -1119467871, 1043572605, -1120345789) + + W(7, 1035132204, -1120193257, -1107758441, 1010759898); + WS(1053797695, 1034928741); + sum1 = W(0, -1113709609, -1101985872, -1098832221, -1106547453) + + W(1, 1032341932, 1038712045, 1020651977, 1037110646) + W(2, 1027306577, 1036410350, 1048222316, 1049396519) + + W(3, -1093507345, -1098128846, -1102358117, -1095739626) + + W(4, -1089618673, -1094568298, -1089721378, 1057932790) + + W(5, 1051551069, 1042310981, 1032076306, 1049811664) + W(6, 1039738506, 1041675949, 1049870789, 1032602030) + + W(7, 1041034929, 1020376402, 1038606871, 1036847389); + sum2 = + W(0, -1107569978, 1048834253, 1032955728, 1047116738) + W(1, -1095875849, 1024141754, -1117854930, 1031286951) + + W(2, -1096371488, -1100415470, -1097656368, 1057366407) + W(3, 1049724515, 1047568587, 1025784853, 1040928209) + + W(4, -1091206842, -1140716449, -1099982688, 1067804624) + + W(5, -1107732848, -1101898886, -1104360749, -1099991629) + + W(6, -1113379832, -1098639187, -1102236471, -1112454303) + + W(7, 1037256860, 1039735469, 1034482735, -1122662653); + WS(-1100599294, -1113486107); + sum1 = W(0, 1041832895, -1113994869, -1101582062, -1100675730) + + W(1, 1015862616, 1033573462, -1123901241, -1147102474) + W(2, 1058015130, 1043005476, 1065539217, 1054749782) + + W(3, 1046936430, 1043027144, -1122192647, 1047923809) + + W(4, -1095482492, -1095369841, -1088297536, -1085935041) + + W(5, 1054414015, -1096194081, -1118572185, -1106262926) + + W(6, -1123207408, -1120610968, 1040056719, -1111595960) + + W(7, -1110844045, 1041950395, 1007333963, -1097939280); + sum2 = W(0, 1033496750, -1118187780, 1041080086, -1102935461) + W(1, 1023898015, -1122438380, 1007185073, 999989929) + + W(2, -1105017139, 1043450683, -1106458832, 1057933296) + + W(3, -1098239652, -1111698692, 1023645924, 1034431733) + + W(4, -1118941046, -1142467441, -1096734279, 1063076098) + + W(5, -1119844388, 1048742138, -1096898561, -1090457759) + + W(6, 1012703263, 1009095593, 1025973118, -1089439993) + + W(7, -1124882200, 1004297609, 1046154393, 1048791679); + WS(1058454143, -1086058342); + sum1 = W(0, -1115074879, -1099743710, -1103933280, -1113530407) + + W(1, -1124606630, -1115568198, -1123572016, -1118699368) + + W(2, 1051341170, 1054019099, 1057240637, -1085932301) + + W(3, -1090466976, -1097279225, -1108988935, -1095706385) + + W(4, -1123276713, -1112675511, -1095853513, 1048593300) + + W(5, 1059272419, 1052135974, 1039376368, 1041536544) + W(6, 1040408770, 1042487060, 1044345774, 1034191179) + + W(7, -1105205160, -1111240730, -1135917288, 1043157563); + sum2 = W(0, 1047642666, -1094360366, -1111839157, -1101124559) + + W(1, -1119401007, -1114275291, 1010413358, 1018914035) + + W(2, -1106684338, -1104496052, -1101578828, 1062389037) + + W(3, -1099504899, -1102577927, -1121638606, -1100286365) + + W(4, -1097565895, -1137000462, 1017360469, 1056361687) + + W(5, 1057357121, -1111580608, 1047557438, -1108198790) + + W(6, 1038757740, -1120606628, 1043520604, -1110716481) + + W(7, -1121032787, -1174578992, 1020758339, 1031842399); + WS(-1116191222, -1087222261); + sum1 = W(0, 1051378220, 1026061480, 1051132746, -1131139769) + W(1, 1029438530, 1010253799, -1140691900, 1032111196) + + W(2, -1092140039, 1041212802, 1028855632, 1054776996) + W(3, 1052461054, 1056191968, 1041996679, 1051463563) + + W(4, -1098282527, -1108686318, -1114615088, -1096808426) + + W(5, -1090181940, -1089672580, -1095415146, -1084069643) + + W(6, -1115126509, 1018050395, 1028919646, 1031880915) + W(7, 1047407636, 1041055121, 1025713945, 1049760258); + sum2 = + W(0, -1120688670, 988781159, 1023531678, 1034244658) + W(1, 1038669521, -1125227753, -1122152897, -1123518214) + + W(2, 1075791226, -1079951996, -1080919050, -1078043585) + W(3, 1061514147, 1050383220, 1043495776, 1056154705) + + W(4, 1072772368, -1096143661, -1087274515, -1074830486) + + W(5, -1138433181, -1091737459, 1043710967, 1064633897) + + W(6, 1033088733, -1126178338, 1039982901, -1153355920) + + W(7, 1047086021, -1117938706, 1036258954, -1090859740); + WS(-1086114623, -1084816591); + sum1 = W(0, -1119067590, 1041053515, -1123050946, 1050414506) + + W(1, 1017204584, 1033253648, -1109025761, -1142909644) + + W(2, 1036163639, -1112478813, 1045609440, -1091176571) + + W(3, -1096458464, -1092456237, 1039781304, 1019336538) + + W(4, -1106221778, -1114565991, -1102274148, 1041957568) + + W(5, -1140250052, 1042499428, 1024592194, -1110741472) + W(6, 1015833809, 1041839788, 1038604869, 1049671879) + + W(7, 1016035267, 1004159150, 1019624502, 1038938427); + sum2 = + W(0, 1006657945, -1145756082, -1102183527, 1043063832) + W(1, 1048257970, 1030027167, -1129422896, -1120006792) + + W(2, -1115603798, -1129023472, 1024575987, -1070476153) + W(3, 1078068971, 1034687231, 1014471457, 998969122) + + W(4, 1043167348, 1029036192, -1134024937, -1079118960) + W(5, 1058188063, 1026633068, -1138449465, 1035752331) + + W(6, 1032033598, -1122908214, 1013148193, -1117007948) + + W(7, 1026806223, -1106350665, 1037397710, -1119066852); + WS(1058895967, -1115291633); + sum1 = W(0, -1125313920, -1110715998, -1110540830, -1093243535) + + W(1, -1098773368, -1114664694, -1108229374, -1128878150) + + W(2, -1118075736, 1028840739, 1050429809, 1056177832) + W(3, 1050921353, 1021103494, 1039051533, 1048765371) + + W(4, -1101537045, -1106860038, -1109809347, 1062536899) + + W(5, 1050429425, 1042267175, -1104944731, 1019165331) + + W(6, -1102743433, -1113756805, -1112821659, -1094426092) + + W(7, -1112140166, -1105504016, -1123143221, -1113092552); + sum2 = + W(0, -1118991740, 1035161218, 1031409558, 1025873763) + W(1, -1130385854, -1109675818, 1035221069, -1106515599) + + W(2, 1049660250, -1092533458, 1052404039, 1059098325) + W(3, 1035329255, 1033133876, -1100849576, 1041079456) + + W(4, -1072802862, -1084844108, 1059153934, 1066715964) + W(5, 1054800035, -1121630224, 1017402854, 1051036492) + + W(6, -1093107290, -1131472334, -1099814906, 1050189246) + + W(7, 1033113369, 1032032260, 999987753, -1109656332); + WS(-1088275071, -1079832501); + sum1 = W(0, -1121609972, 1054795207, -1091175970, 1036185063) + + W(1, 1036253081, -1095852779, 1056993046, -1099614386) + + W(2, -1140631216, 1001216956, -1109051587, -1108520810) + + W(3, -1112588624, -1111932116, 1018114011, -1114447781) + + W(4, 1031902612, 1053928586, -1100362195, -1118566269) + + W(5, 1050418310, -1106845532, 1057036918, -1106898567) + + W(6, -1114449795, 1049117898, -1089874193, 1024185910) + + W(7, 1033419200, -1098704820, 1052334541, -1103629998); + sum2 = W(0, 1034198694, 981839325, 1050051875, 1043520387) + W(1, -1099547613, 1020237946, -1112528204, 1032634213) + + W(2, -1099753021, 1036479145, -1087878645, -1099082069) + + W(3, 1049181233, -1106552091, 1051666481, -1105193718) + W(4, 1052074243, 1033443852, 1061175239, 1044393022) + + W(5, 1036533430, -1125692060, -1100952955, 1038429221) + + W(6, -1101897553, -1125560160, -1090657055, -1126528836) + + W(7, -1113239720, -1111189720, 1038783930, -1110455840); + WS(1055684799, 1057467177); + sum1 = W(0, -1115635180, 1040983065, 1033664034, 1047016048) + W(1, 1040303666, 1031194256, 1036112338, 1031542812) + + W(2, -1084593518, -1097583958, -1088354001, -1083875169) + + W(3, -1089538899, -1090088564, -1096939300, -1092334596) + + W(4, 1057349085, 1048845932, 1059080519, 1057461395) + W(5, 1055847004, 1052697432, 1050363555, 1025617138) + + W(6, 1044528285, -1108137881, 1042415046, -1130549589) + + W(7, 1025308788, 1040121956, -1124884732, 1051059157); + sum2 = W(0, 1039076253, 1043377438, -1111770710, -1129064764) + + W(1, 1041506513, -1117340191, -1113661124, 1006037802) + + W(2, 1084724252, 1057352437, -1070809771, -1070013447) + + W(3, 1024963427, -1115045950, 1037860995, -1120758055) + + W(4, 1055999642, -1108204793, -1115649819, -1084654208) + + W(5, 1027889169, 1023558847, -1108774739, 1034794526) + W(6, -1111202323, 982595482, 1026864091, 1008953953) + + W(7, 1041107952, -1112891517, -1161342746, 1036271073); + WS(-1089965247, 1033154456); + sum1 = + W(0, -1118330000, 1043361746, 1028536899, -1106541856) + W(1, -1103233833, 1037178020, -1122355891, -1148310999) + + W(2, 1037068672, -1091726905, -1106295439, -1087991002) + + W(3, -1094017778, 1026207675, -1102222128, -1106998176) + W(4, 1043453383, 1027410848, 1047994631, 1053447482) + + W(5, 1058853977, 1049286760, 1044913013, 1056026770) + W(6, -1112803845, 1030795409, -1111747193, -1109712917) + + W(7, -1110327970, -1111739348, 1037236782, -1104668925); + sum2 = W(0, -1104081637, -1115350296, -1102551062, 1033477476) + + W(1, 1048930937, -1092653578, 1049862570, -1095610208) + + W(2, -1104712414, 1065017407, -1101193417, 1054909386) + + W(3, -1096226206, -1094950793, 1042131871, -1100838677) + + W(4, 1038273275, 1027805677, -1097697554, 1069044481) + + W(5, 1051154993, -1097937335, -1115393002, -1090656582) + + W(6, -1102746442, 1038124336, 1025981021, -1090398660) + + W(7, 1049254524, -1111378843, -1100983150, 1038627466); + WS(1053522367, -1088249107); + sum1 = W(0, 1041083642, -1109062519, 1041255149, -1129341567) + + W(1, -1122816004, 1034422596, -1105678140, 1002201935) + W(2, 1049354980, 1050121221, 1062047573, 1050256849) + + W(3, 1058306025, 1054029415, 1043380368, 1057986114) + + W(4, -1104528935, -1089361286, -1094323010, -1085946707) + + W(5, -1092704114, -1096738148, -1095183726, -1098664466) + + W(6, -1116245577, -1108913352, 1044478835, -1106612546) + + W(7, 1010650184, -1127765836, 1004046811, -1116617606); + sum2 = W(0, -1115691041, -1122196578, -1133382325, -1119943136) + + W(1, 1035619338, 1026693101, -1121646682, 1035287249) + + W(2, 1047105417, 1040512713, -1115639944, -1093984910) + + W(3, -1128164365, -1112542694, 1040561229, -1122951759) + + W(4, 1066880006, 1072356775, 1070833917, -1106032356) + + W(5, -1084317659, -1103058628, -1106694088, -1109474775) + + W(6, -1078420733, -1074089832, -1079250237, 1053171659) + + W(7, 1063302165, 1050228647, -1111273903, 1041535222); + WS(1054980735, -1118400611); + sum1 = W(0, 1023857233, -1136534172, -1109579886, 1024279465) + W(1, 1031638446, 1042081899, 1025101580, 1037741818) + + W(2, 1056052581, 1054619866, 1062631892, 1063340895) + W(3, 1054891904, 1032051762, 1036582853, 1040187076) + + W(4, -1108899938, -1084632317, -1087670245, -1087714282) + + W(5, -1089981519, -1091518889, -1100292439, -1096282711) + + W(6, -1098911041, 1014993130, 1019079234, 984908360) + W(7, 1040973935, 1034243544, 1038263499, 1035333164); + sum2 = W(0, 1048663857, -1115607290, 1050959246, -1094885185) + + W(1, 1032263390, -1094433235, 1034671855, -1100445080) + + W(2, -1103126769, -1089284711, -1106476140, 1065597134) + + W(3, -1114774312, 1044112203, -1110680696, 1044150663) + + W(4, -1090522781, -1135630263, -1116194786, 1068281966) + + W(5, -1114594327, 1034641663, -1120338125, 1039802827) + + W(6, -1135969141, -1096474341, -1110761886, -1097679100) + + W(7, 1030603925, 1025157613, -1121778209, -1101519530); + WS(-1099426814, 1028666567); + sum1 = + W(0, 1017347599, -1114817850, -1102332157, 1038414608) + W(1, 1040863441, 1043586729, 1033560385, 1024225691) + + W(2, 1050515008, 1054909169, 1058789976, 1049870885) + W(3, -1096181150, -1094822147, -1110297504, 1035801215) + + W(4, -1097546199, -1091252816, -1088740593, 1026332624) + + W(5, 1050979384, 1046083753, 1043042893, -1106319780) + W(6, -1122852442, 1034424401, 1040697053, -1119196265) + + W(7, -1096850298, -1101867410, -1112505198, 1022200238); + sum2 = + W(0, 1032296335, -1113414658, 1032045591, -1096252543) + W(1, 1039836782, -1130462124, 1032505935, 1017080436) + + W(2, -1113854030, 1056314261, 1056282685, -1076472908) + + W(3, 1041975936, 1060789196, -1113045826, -1114544525) + + W(4, -1123218166, 1057288090, 1048444709, -1076465599) + W(5, 1054239999, 1064414385, -1101415652, 1017354944) + + W(6, 1034439881, -1106401889, 1027355020, -1099548772) + + W(7, 1041344971, -1137373456, 1031341874, -1131418272); + WS(1048405758, 1021439377); + sum1 = W(0, -1095783591, 1004365570, -1111296393, -1090472608) + + W(1, 1053249340, -1096289768, -1116930177, 1031327257) + W(2, 1056714988, 1053912029, 1060368018, 1058906144) + + W(3, 1059710205, 1050004859, 1050104788, 1057263364) + + W(4, -1095028931, -1097931508, -1093355890, -1084869763) + + W(5, -1111710658, -1089344390, -1098648430, -1095692170) + + W(6, -1099026446, 1040281084, 1042667324, -1099317986) + + W(7, 1058667863, -1094050929, 1032145973, -1127343131); + sum2 = W(0, -1137650289, -1108324664, -1107068637, 1053004242) + + W(1, -1096186406, 1044011702, 1031462490, 1016473473) + + W(2, -1102128439, 1038586891, -1102631398, -1096144010) + + W(3, 1065778432, -1087564484, 1041037240, -1095964556) + W(4, 1065729447, 1042582877, 1055966210, 1066736444) + + W(5, -1077760412, -1109340585, -1088233524, -1079487884) + + W(6, -1085103011, -1096095433, -1096657292, -1084794119) + + W(7, 1061684194, 1053274132, 1060428365, 1070082531); + WS(-1120621558, -1109747932); + sum1 = W(0, 1041405257, 1034046808, -1118447615, 1031113092) + W(1, -1118172841, 1039149197, 1025628006, 1023987646) + + W(2, 1056552380, 1057052863, 1034826244, 1061462741) + W(3, 1057041023, 1051800560, 1051243933, 1054503948) + + W(4, -1086075216, -1102283433, -1089852749, -1087896594) + + W(5, -1089300195, -1089266110, -1098077639, -1098034299) + + W(6, 1021959803, 1009707220, 1002695151, 1030648525) + W(7, 1017359341, 1034838445, 1033512688, -1099231069); + sum2 = W(0, -1072120352, -1071453180, -1069998225, -1071241469) + + W(1, -1073669228, -1079882002, -1086087106, -1096258120) + + W(2, 1076686179, 1075980741, 1075876198, 1076219984) + W(3, 1072219376, 1071235859, 1056170781, 1057784547) + + W(4, -1096491690, 1055107632, -1100816518, 1048961210) + + W(5, -1123192163, -1097056656, 1049622318, -1098632303) + + W(6, -1109988374, -1125676035, 1033428305, 1018908197) + + W(7, 1040773271, -1129279043, 1030403985, -1116156171); + WS(1014286296, 1057122707); + sum1 = W(0, -1100574608, -1134927261, -1096946922, -1115033128) + + W(1, -1104533485, -1121259095, -1131995140, -1118839498) + + W(2, 1045128364, -1112841149, 1023155519, -1094208427) + + W(3, -1099169249, -1097752701, -1111007544, -1093400894) + + W(4, 1045637171, 1041785732, 1051848326, 1066937726) + W(5, 1058599142, 1049921902, -1118760834, 1043420320) + + W(6, -1113867461, 1021982785, -1106336253, -1099783438) + + W(7, -1105189673, -1131816685, -1114789302, -1116552190); + sum2 = + W(0, 1049653051, -1094509070, 1050525055, -1103662151) + W(1, 1043452805, -1102028885, -1109754017, -1111709279) + + W(2, -1095117458, 1054995367, 1042728281, 1058124872) + W(3, 1049859565, -1097129382, -1098619851, 1051259496) + + W(4, 1052542945, 1021265631, 1046926465, 1063131010) + W(5, 1029162962, -1116537562, -1092457406, -1078653130) + + W(6, 1018695699, -1101605993, 1044635344, -1104882803) + + W(7, 1042944443, -1110047725, -1107003327, -1114867517); + WS(-1089617919, -1078924764); + sum1 = W(0, 1038586191, -1112517470, 1021690773, -1148410045) + + W(1, -1123773533, 1037589846, -1114886949, 1015916580) + W(2, 1054651431, 1043170193, 1058605998, 1062049174) + + W(3, 1056236357, 1058401044, 1034515388, 1056139353) + + W(4, -1087542485, -1101086846, -1088163691, -1087004528) + + W(5, -1089020247, -1089711896, -1095006441, -1089662071) + + W(6, 1032587193, -1121720345, 1034793549, -1139319682) + + W(7, 1040971627, 1052009144, 1017461287, -1138694970); + sum2 = + W(0, 1041183203, -1109901203, 1047303939, -1146453442) + W(1, -1103370030, 1035308463, -1107592095, -1129723226) + + W(2, -1104791361, -1103309504, -1109225788, 1050748307) + + W(3, 1032462009, 1055570013, -1104507532, 1050955773) + W(4, -1113641717, 1018299463, 993644855, 1050538753) + + W(5, 1024125183, 1058013570, 1044744090, -1110543421) + W(6, 1040266660, -1124518657, 1039912380, -1107008276) + + W(7, -1094411631, -1087158984, -1091454514, -1122733934); + WS(-1102088830, 1068463311); + sum1 = W(0, 1007817678, -1114291787, -1106493040, -1121377148) + + W(1, -1120569345, 1037434082, -1121570543, 1048479966) + W(2, 1054791531, 1051084317, 1059023407, 1060487316) + + W(3, 1053262386, 1049438849, 1043319351, 1037267389) + + W(4, -1088166471, -1097682087, -1087032011, -1087032009) + + W(5, -1090195409, -1104445288, -1110294871, -1112047510) + + W(6, 1034628887, -1130503387, 1042839161, 1025313797) + + W(7, 1040112167, -1112558866, -1130274577, -1107488086); + sum2 = W(0, -1092702026, -1078842502, -1073165872, -1075365553) + + W(1, -1101579557, 1066341075, 1071992070, 1076187261) + W(2, 1046572938, -1092761370, 1048469719, 1050413401) + + W(3, -1110447061, -1104166524, -1123716833, 1056626852) + + W(4, -1113806693, 1010298098, -1106037347, 1041503306) + + W(5, 1037192623, 1033398310, 1031702363, -1110743783) + + W(6, 1023924734, -1134107544, -1114438827, 1017902665) + + W(7, 1011674154, -1109271682, 1028552632, -1123830383); + WS(1050299903, -1120086405); + sum1 = W(0, 1001761330, 1051426245, -1097473350, 1050955122) + + W(1, -1098718343, -1119699408, 1038117517, -1107622229) + + W(2, 1015037916, 1042953374, -1103780307, -1082933123) + + W(3, 1055455844, -1145769035, -1132909758, 1039587926) + + W(4, 1045981578, -1094503950, 1055197076, -1099126616) + + W(5, -1118831961, 1049761702, -1105955296, 1047033256) + + W(6, 1032160884, -1131440192, -1100940622, 1056645367) + + W(7, -1095742542, 1031622998, 1041481643, -1103124921); + sum2 = W(0, 1033493706, 1004282338, 1032395114, 1035721710) + + W(1, -1138557465, -1094630455, 1050323990, -1102984389) + + W(2, -1100025935, 1036668146, 1043333590, 1058018229) + W(3, 1035348312, 1040910383, 1034465026, 1044530527) + + W(4, -1117747708, -1097774825, -1106149887, -1110236537) + + W(5, -1105971349, 1017216296, -1110352462, -1108832665) + + W(6, -1131691420, 1042472719, -1122758767, -1106477628) + + W(7, 1046834331, -1101697584, 1040100106, -1099114129); + WS(1059761855, -1093333930); + sum1 = W(0, -1114738580, -1114617627, -1102727321, 1053910378) + + W(1, -1094270689, -1107452471, -1130455238, -1108958070) + + W(2, 1026403632, 1045397583, 1028768699, 1067649611) + W(3, -1104323849, 1042776267, 1036450873, 1044075076) + + W(4, -1105736123, -1109519752, -1092041414, -1090720496) + + W(5, -1112030179, -1103704317, -1131764742, -1106063752) + + W(6, -1114933837, 1044731740, -1111636685, 1058839312) + + W(7, -1095377736, 1034877772, 1031929286, -1113359528); + sum2 = W(0, 1033753751, -1102724392, -1104466296, -1083668043) + + W(1, 1048180527, -1130490520, -1103926430, 1029490699) + + W(2, -1111931472, 1032416705, 1029992729, 1069568682) + + W(3, -1114422988, -1133345747, 1038116955, -1129834714) + + W(4, 1037689465, -1105246922, 1052475163, -1090236396) + + W(5, -1122305941, -1123788932, -1127253836, -1122171294) + + W(6, 1023732592, -1126463180, -1118149766, -1106129378) + + W(7, 1038071226, -1124715334, -1117123194, 1029091180); + WS(1058247519, 1058950523); + sum1 = W(0, -1149972914, 1038957612, 1039970419, 1008158544) + W(1, 1041515382, 1036144305, 1048236362, -1101573740) + + W(2, -1088063023, -1093535488, -1086741707, -1077128706) + + W(3, -1087332234, 1032313207, -1113848378, -1089306475) + + W(4, 1055543086, 1052729022, 1058924163, 1057693884) + W(5, 1060656868, 1056531259, 1049522521, 1052006205) + + W(6, 1037155715, 1026693813, 1030719286, 1032127358) + + W(7, 1000958261, -1112386573, -1144612480, 1039554949); + sum2 = + W(0, 1037957789, -1128637884, -1110859440, 1052981065) + W(1, 1040710261, -1090346288, -1107045102, -1092015484) + + W(2, 1050553491, 1004874497, -1106433025, 1058989771) + W(3, 1058251934, -1076253449, -1079150504, 1073146781) + + W(4, -1089722328, 1058234547, -1098594890, 1052545864) + W(5, -1106724054, 1032950959, 1055215751, 993278274) + + W(6, 1050994656, -1095571828, 1041665953, -1129708900) + + W(7, -1111634443, 1015279680, 1053553553, -1088813426); + WS(-1079689312, 1054955487); + sum1 = W(0, -1112827293, 1038847520, -1106748654, 1038138418) + + W(1, 1035397268, 1020226162, 1030022886, -1108673687) + + W(2, -1094091800, -1099397203, -1102151838, -1086663035) + + W(3, -1087965823, -1095805620, -1098762192, -1096846716) + + W(4, 1053343716, 1057879171, 1023820222, 1057249493) + W(5, 1057985827, 1053821069, 1051466543, 1049463311) + + W(6, 1046408387, -1099233409, -1154890390, 1012875058) + + W(7, -1109124523, -1126965269, 1036334304, 1027304762); + sum2 = W(0, 1029906557, -1110945783, 1046588193, -1121400966) + + W(1, -1106274145, 1040948990, -1114149785, 1030737726) + + W(2, -1118908362, -1110004947, -1089040350, -1106443118) + + W(3, 1054044230, -1115139995, -1112260837, 1025987216) + + W(4, -1075330978, -1081100797, -1089891752, -1103643831) + + W(5, -1095873523, -1120742988, -1107023588, 1017833328) + + W(6, 1069116571, 1067844910, 1066162938, 1048795077) + W(7, 1049522444, 1042365723, 1011222152, 1031208089); + WS(1045323518, -1099573370); + sum1 = W(0, 1043494028, -1110296205, -1129034139, 1040205415) + + W(1, 1019451328, 1037480760, -1144942569, 1019441962) + W(2, 1054678847, 1050638833, 1058657792, 1062545342) + + W(3, 1053087270, 1054717567, 1044080591, 1056118375) + + W(4, -1087400961, -1098183847, -1085969705, -1082320511) + + W(5, -1092792902, -1090230188, -1094581114, -1090342307) + + W(6, 1041221965, 1038272854, 1035507984, 1038672050) + W(7, 1018934299, 1028598065, 1032398451, -1135273190); + sum2 = + W(0, -1115726367, 1028786141, 1044450180, -1097529180) + W(1, 1047479398, -1105649332, -1127420023, 1035799966) + + W(2, -1100272494, -1113941052, -1104186275, 1042859033) + + W(3, -1097402721, 1051025911, 1025042835, -1139194229) + W(4, 1026681555, -1107020345, 1044693748, 1067055887) + + W(5, 1064288021, 1073649944, 1067916626, 1054550763) + W(6, -1095030133, 1043410282, -1120312046, -1093369648) + + W(7, -1083945294, -1073003592, -1079526678, -1086831079); + WS(-1086660959, -1095040438); + sum1 = W(0, -1137772688, -1103872010, 1033664792, -1114303830) + + W(1, 1043631948, -1110960013, -1112201312, 1004207675) + + W(2, -1101831491, -1108733164, -1099393082, -1091611870) + + W(3, -1111668992, -1092153253, -1104258339, -1111896549) + + W(4, 1047762270, 1048734598, 1053294440, 1032897582) + W(5, 1054700448, 1051790266, 1038982672, 1042342295) + + W(6, 1026497863, -1112526127, 1036444877, -1105948909) + + W(7, 1045765800, -1102627253, 1030853145, 1024477884); + sum2 = W(0, 1047134358, 1032860761, 1058187056, -1089859690) + + W(1, -1073724300, -1113017653, -1123513506, 1005834992) + + W(2, -1100773581, 1019143260, -1086969962, 1060822499) + W(3, 1074524802, -1127402252, 986153015, 1026902445) + + W(4, 1038575217, -1097448906, 1050436365, -1092585260) + + W(5, -1097001013, 1052022456, -1101541136, 1040058169) + + W(6, -1121016162, 1027754828, -1118814167, 1047705700) + + W(7, 992280012, -1115523910, 1026625966, -1115254226); + WS(1065625968, 1033455989); + sum1 = W(0, 1037701789, -1137040283, 1000406064, -1094784938) + + W(1, 1050863995, -1103592251, 1032127959, 1038622703) + + W(2, -1151616802, -1099349487, 1047711372, -1115632781) + + W(3, -1088133648, 1049293870, -1118922740, -1120882929) + + W(4, -1102944307, 1053320242, -1098462560, 1053161728) + + W(5, 1049613697, -1128175786, 1017489163, -1119810116) + + W(6, 1035789757, -1106274637, 1048627484, -1098374228) + + W(7, -1120581030, -1114622845, 1038993832, -1165953346); + sum2 = + W(0, 1035866397, -1103116216, 1059558286, 1058384307) + W(1, -1096953271, 1042132369, -1111426470, -1128202268) + + W(2, -1099238508, 1047962584, -1082734190, -1088122523) + + W(3, 1056069525, -1106560803, 1038442279, 1028886015) + W(4, 1052038184, -1105248240, 1057669233, -1091086098) + + W(5, -1083280561, 1032403631, -1104452063, -1150402298) + + W(6, -1107491561, 1027069673, -1098068789, 1053762892) + + W(7, 1064349048, -1107462772, 1040922744, 1012204557); + WS(1066439152, -1108830929); + sum1 = W(0, -1102916748, -1108788820, -1097607254, -1099017414) + + W(1, -1168846782, -1109193920, -1128476473, -1106724277) + + W(2, 1055608939, 1051866141, 1058768962, 1030462560) + + W(3, -1113005110, -1109987210, -1109670205, -1105116142) + + W(4, -1103898045, -1097938907, -1093577324, -1125603603) + + W(5, 1040477611, 1050862423, 1039038758, 1053895221) + W(6, 1013732302, 1035488645, 1043113957, 1027216291) + + W(7, -1143171172, -1118580993, -1107320747, -1107171108); + sum2 = W(0, 1050905005, 1051922636, 1052154527, 1005525738) + + W(1, 1024591478, -1094205878, -1094330307, -1105421569) + + W(2, 1051409035, -1107944278, 1040207956, 1060123319) + + W(3, 1032045878, -1096175996, -1086096303, -1081193369) + + W(4, -1123874142, -1131839644, -1100926997, 1046139234) + + W(5, -1113252250, 1039675350, -1128729660, 1062516858) + + W(6, -1099079861, 1035659117, -1109355656, 1029590720) + + W(7, 1032543174, 1044310065, -1129395200, 1042520685); + WS(-1094347903, 1040885342); + sum1 = W(0, 1027015666, -1097151811, 1050782150, -1106442815) + + W(1, 1051184308, -1091810963, 1034433919, 1042242605) + + W(2, -1119901274, 1047615033, -1115052947, 1051587896) + + W(3, -1105704339, 1046407197, 1009768300, -1122889273) + + W(4, -1109404651, 1051228067, -1096976391, -1096200649) + + W(5, -1126487153, 1040585575, -1132445914, -1108464678) + + W(6, -1145327523, -1096635113, 1048638245, -1105180361) + + W(7, 1051518582, -1092825760, 1038762547, 1040013733); + sum2 = + W(0, -1121205117, 1031829824, -1105324964, -1090390675) + W(1, -1110420773, 1018005966, -1113281242, 1032068637) + + W(2, -1124056141, -1109805410, 1045427632, 1059020251) + + W(3, -1107061510, -1118368537, -1115675784, -1113920834) + + W(4, -1106581817, 1007706034, 1046732003, 1057752640) + W(5, -1101115525, -1103994134, 1036673543, 1042522746) + + W(6, 1026565438, 1029970286, 1047283254, -1089850616) + + W(7, 1031976810, 1036394258, -1112565336, -1145818031); + WS(1066366016, -1121083386); + sum1 = W(0, -1127500850, -1122305754, 1025706181, -1104671484) + + W(1, 1043832110, -1111488580, -1123271764, 1015742169) + + W(2, 1026624689, -1114017662, -1088657475, -1098749979) + + W(3, -1085705014, 1037547214, -1112264441, -1105147876) + + W(4, 1013565235, 1052467062, -1121688536, 1066224034) + W(5, -1112016562, 1041900405, 1043675185, 1022047173) + + W(6, -1127181143, 1035730049, 1045310349, 1026340635) + + W(7, -1121566277, -1127673329, 998683632, -1132045794); + sum2 = W(0, -1102951634, 1044058166, 1037657608, -1091261805) + + W(1, 1057746121, -1106029228, 1006392595, -1119100020) + + W(2, 1043023116, -1093545812, -1093647750, 1070997171) + + W(3, -1082044166, 1051593249, -1100798567, 1013819138) + + W(4, -1104735391, 1058451408, -1104370519, -1105620254) + + W(5, -1091103100, -1096225251, 1052307063, -1121077487) + + W(6, 1035018995, -1112978123, 1043811519, 1052846459) + + W(7, -1097428497, -1127092852, -1107453736, 1041369362); + WS(1059191103, 1030618557); + sum1 = W(0, 1034200101, 1053678608, -1114145283, 1047695489) + + W(1, -1114663316, -1137634576, -1125278731, -1138534517) + + W(2, 1032351793, 1049973094, 1058794603, 1053292689) + W(3, 1055818163, 1053263444, 1041016873, 1043320696) + + W(4, -1100440271, -1098031908, -1087452589, -1078695803) + + W(5, -1125225546, -1094543429, -1116533557, -1104858229) + + W(6, -1148363237, 1026486548, -1194991971, 1043453347) + + W(7, 1033675947, 966472909, -1115520783, -1119664758); + sum2 = W(0, 1021496216, -1072453458, -1079530578, 1063841069) + + W(1, -1095958945, 1042172731, 1015153576, -1132334880) + + W(2, -1095812325, -1094113031, 1068683999, 1074699170) + + W(3, 1043477830, 1031880682, -1121439437, -1115200049) + + W(4, -1116672596, -1112299630, -1135088113, -1126165044) + + W(5, -1107446259, -1119142347, 1037216194, 1034903618) + W(6, 1031503244, 1023122856, 1036078566, 1036242384) + + W(7, -1100441243, 1009915385, -1114450875, 1017712520); + WS(-1096433855, 1052342409); + sum1 = W(0, -1117693364, 1041070378, -1105417036, 1052133240) + + W(1, 1042648272, -1123398616, 1044512510, -1105041790) + + W(2, 1042163431, -1095385709, 1060080198, -1090036408) + + W(3, -1090327769, 1043184694, -1111854479, 1043408999) + + W(4, 1006023611, 1034697390, -1109633387, -1085473397) + + W(5, 1057448056, -1105658040, -1104977148, 1041051171) + + W(6, -1113444755, 1049542464, -1112382959, 1048891757) + + W(7, -1111238376, -1106505885, 1049508979, -1110665802); + sum2 = W(0, 1039259027, -1119373866, -1086821333, 1052826002) + + W(1, -1117342490, 1036052293, 1037381955, -1101694007) + + W(2, 1043188759, -1082984200, -1077733706, 1067327309) + + W(3, -1149070344, 1052358305, -1102382047, 1050777563) + + W(4, -1165551167, -1105050294, -1103802686, 1068967257) + + W(5, 1054626023, -1101813629, -1103461210, -1106555317) + + W(6, -1113326246, 1045229872, -1109909290, 1052700624) + + W(7, -1098764713, -1099183932, 1052205497, -1096821402); + WS(1046655614, 1069864308); + sum1 = W(0, 1035633391, 1040943483, 1045417382, 1043913370) + W(1, 1050212129, 1047844019, 1039058852, 1043891960) + + W(2, -1106200785, 1031319761, -1120751363, -1093886310) + + W(3, -1098821847, -1088130094, -1092291401, -1082722808) + + W(4, -1105260712, 1026979270, -1106053451, -1121044950) + + W(5, 1042106970, 1049054225, 1041620790, 1042598038) + W(6, 1050507575, 1034433460, 1045765948, 1041581532) + + W(7, 1029812249, 1017929306, 1028938706, 1034869942); + sum2 = W(0, -1098805029, -1101136548, -1094351375, -1097728468) + + W(1, 1040878916, 1049713965, 1037060075, 1058810464) + + W(2, -1082465618, -1084472150, -1085764460, -1140347496) + + W(3, -1100091144, 1049328541, 1062808960, 1071144827) + W(4, 1017798412, -1108050411, 1031330790, 1053843229) + + W(5, 1043199898, 986797508, -1119994130, -1097822096) + W(6, 1045866690, 1039888035, 1027046574, 1038111290) + + W(7, -1100094208, -1104239104, -1098800085, 1017716576); + WS(-1077527440, 1065234224); + sum1 = W(0, 1031059492, 1014606833, 1041495444, 1031584061) + W(1, -1143158030, -1140389945, 1035424519, 1013235228) + + W(2, -1088676934, -1098001853, -1087241405, -1083911392) + + W(3, -1095088526, -1096536807, -1104484775, -1100223239) + + W(4, 1055802445, 1051162632, 1059501583, 1061276187) + W(5, 1050088278, 1049527800, 1042937212, 1037087943) + + W(6, -1159469258, 1001612013, -1109463457, -1105280809) + + W(7, 1038556792, 986827883, 1030875577, 1047418837); + sum2 = + W(0, 1026928347, 1006719462, -1103317527, 1043604137) + W(1, -1106627965, 1034526167, -1117999031, -1134258967) + + W(2, 1019217071, -1098369589, 1041304779, -1134457784) + + W(3, 1048134757, -1118427047, 1029994729, -1113504087) + + W(4, 1027288437, -1099640354, 1050941648, -1112266437) + W(5, 1027462477, 1040843010, -1104465031, 1054764710) + + W(6, -1112677997, -1146387796, -1072403340, -1071428003) + + W(7, -1082446250, 1059493547, 1072461191, 1077848621); + WS(1034219259, -1130863201); + sum1 = W(0, -1122470601, -1125729883, -1119868741, -1092963050) + + W(1, -1102984950, 1017182198, -1107264969, -1116589116) + + W(2, 1036468120, 1031973022, 1045421587, 1062931954) + W(3, 1062738826, 1050208654, 990945063, 1023027040) + + W(4, 1033355302, -1125640547, -1122038588, 1044707408) + + W(5, -1104269382, -1106320445, -1110744610, -1098846428) + + W(6, -1104067775, -1112351746, -1106594245, -1094830635) + + W(7, -1127079049, -1109997276, -1112571199, -1117279521); + sum2 = + W(0, -1120041672, 999703935, -1109032994, 1041627711) + W(1, -1111815622, 1029813110, -1110751572, -1114401610) + + W(2, 1041156572, 1029144962, 1037796455, 1046556501) + W(3, 1060385029, 1040466231, -1103822492, -1076222058) + + W(4, -1103868400, 1036670980, 1043292537, 1059414852) + W(5, 1048639871, -1105513272, -1100107078, 1043280503) + + W(6, 1036471018, -1107170022, -1115113192, 1029249656) + + W(7, -1126242312, 1043143286, -1101226736, 1034327293); + WS(-1097041087, -1081891922); + sum1 = W(0, 1045201037, 1042240134, 1049780286, 1039927583) + + W(1, -1122695635, -1110484942, -1117417409, 1039840796) + + W(2, -1097075376, -1091271089, -1094847921, 1059654550) + + W(3, 1056430510, 1057233913, 1041827328, 1050554535) + W(4, -1126257330, 1049467348, 1057610047, -1141428591) + + W(5, -1089703504, -1087958744, -1096746586, -1096159486) + + W(6, -1115394372, -1097795722, -1097045891, -1103383438) + + W(7, 1036098616, 1033721138, 1035539581, -1131932502); + sum2 = W(0, 1012982255, -1101224821, -1109376974, -1116588919) + + W(1, -1096183361, -1092858264, -1090462581, -1119652925) + + W(2, 1052214657, 1042508745, 1042044290, 1071547407) + + W(3, -1089469015, -1090264503, 1039700797, -1083767147) + + W(4, -1136022111, -1093748656, 1045672838, 1065241977) + + W(5, 1049249478, 1047260540, 1046964814, -1107087862) + W(6, 1032353073, 1020863779, -1113516472, 1040242686) + + W(7, -1120964187, -1103746594, -1100885683, 1028764718); + WS(-1088621983, 1079497913); + sum1 = W(0, 1027161409, -1102075462, -1120075900, -1109063205) + + W(1, -1104005172, -1136555883, -1112015962, 1033292370) + + W(2, 1052712773, 1051824794, 1057942154, 1060662493) + W(3, 1062931830, 1057800198, 1047971474, 1059753504) + + W(4, -1102058289, -1098602113, -1096292664, -1082645434) + + W(5, -1090788424, -1090185155, -1098101583, -1088742454) + + W(6, -1100368317, 1021667349, -1106147487, 1039175801) + + W(7, -1129475640, 1036809656, 1009161622, -1143403778); + sum2 = + W(0, -1150684740, -1114056841, -1164208415, 1034698244) + W(1, -1134574697, 1043895716, -1101980686, 1050191679) + + W(2, -1095235693, -1115238312, -1080604054, -1071093346) + + W(3, -1083926997, 1068501149, 1079699126, 1056576712) + + W(4, 1044985054, -1127339724, -1106549807, -1080554397) + + W(5, -1104923511, 1052487431, 1054997905, 1029700329) + W(6, 1035589955, -1108218072, 1022130402, -1106288272) + + W(7, 1024268734, 1036117000, -1130803558, 1031345667); + WS(-1087955103, 1023517655); + sum1 = W(0, -1106663590, 1037773274, -1116651838, 1022742037) + + W(1, 1040438284, -1117813952, 1032561623, -1102318801) + + W(2, -1090071677, -1103425461, -1089596734, -1082614473) + + W(3, -1088542372, -1092404165, -1100716355, -1098396937) + + W(4, 1062902614, 1055773525, 1059014377, 1060267482) + W(5, 1057943128, 1054561254, 1051603940, 1049819671) + + W(6, 1013889298, -1100022495, -1101161913, 1030094516) + + W(7, -1120043085, -1116951793, 1035114019, 1026546470); + sum2 = W(0, -1107653444, 1032757863, -1150202006, 1037010009) + + W(1, 1026258576, 1041748319, -1131622502, -1112646994) + + W(2, -1094858135, 1061839668, 1049735190, -1123088190) + + W(3, -1089824138, -1098358767, 1049699018, -1103776938) + + W(4, 1084595456, 1074196920, 1067688119, 1070729644) + W(5, 1050587980, 1057889952, -1127981435, 1048602984) + + W(6, -1065438414, -1069856939, -1073375528, -1082206819) + + W(7, -1086758222, -1097734258, -1106404220, -1098587943); + WS(-1089607615, 1063931357); + sum1 = W(0, -1156148665, 1033042784, 1027235975, -1117005684) + + W(1, 1045226606, -1112863441, 1040283212, -1102751384) + + W(2, -1089422721, -1099513696, -1086976804, -1083936660) + + W(3, -1089259880, -1092850238, -1096473171, -1096124078) + + W(4, 1059281720, 1053472432, 1060428066, 1061258678) + W(5, 1055780369, 1057981025, 1051704489, 1054893029) + + W(6, 1041177357, -1107985659, -1122980350, -1103260375) + + W(7, -1111710595, -1109580750, -1125267169, 1035448166); + sum2 = W(0, 1024298597, 1037699935, 1030404477, -1094400051) + + W(1, 1052543759, 1024136176, -1159281410, -1102232478) + + W(2, 1074606150, 1048614297, -1085515118, -1071672142) + + W(3, -1091215560, -1095087497, 1050441058, 1071366890) + + W(4, 1080820206, 1047271097, -1079656120, -1066721483) + + W(5, -1078287609, -1096732136, 1057126689, 1074685383) + + W(6, 1030125639, -1121834510, -1166463196, -1101580999) + + W(7, 1028051862, 1009399200, -1117118368, -1113696211); + WS(-1083901183, 1060981851); + sum1 = W(0, -1106299749, 1039492049, -1113140273, 1055856482) + + W(1, -1103186821, -1108369378, -1115937041, -1127954738) + + W(2, 1048978486, -1107896957, -1119192025, -1090009641) + + W(3, -1088174932, 1034759564, 1008970428, 1027686380) + + W(4, 1016862419, -1115539879, -1098481927, 1058119768) + W(5, 1052019064, 1048477348, 1034559233, 1044214632) + + W(6, -1121286832, 1049662006, -1106196840, -1149152444) + + W(7, -1101797848, -1146708651, 1024756171, -1111878699); + sum2 = W(0, 1030335348, -1110488253, -1096373435, 1042224202) + + W(1, 1038378538, -1117770266, 1036930446, -1168276161) + + W(2, -1130369060, 1056034410, -1097486091, -1094958491) + + W(3, 1057631708, -1114209587, -1105273765, -1115537765) + + W(4, -1106797797, 1040331307, 1063611375, -1106971469) + + W(5, -1093142933, 1037075531, -1115233817, 1025218793) + + W(6, 1006653296, -1094310491, -1113782011, 1042512890) + + W(7, -1140559356, -1114686161, -1115373149, 1026640233); + WS(1063762143, -1098158381); + sum1 = + W(0, 1027555010, -1113176415, 1044835884, -1094480963) + W(1, 1042205032, 1038186408, -1112247611, 1041241393) + + W(2, -1106988831, 1049655577, -1089572014, 1041189880) + W(3, 1035602487, -1095655321, 995617211, -1102946366) + + W(4, 1031921883, 1032851437, -1106152127, 1057311541) + W(5, -1100163979, 1046700975, 1050763922, -1119959180) + + W(6, 1033567284, -1099137927, 1052671923, -1101762329) + + W(7, -1106014811, 1044902280, -1094051821, 1044480349); + sum2 = + W(0, 1010916279, -1108274646, -1108043460, 1049469526) + W(1, 1061284555, -1097521196, 1047936551, -1106103943) + + W(2, 1040615985, 1037755544, -1107213139, -1085933104) + W(3, -1098201753, 1027381238, 1007266289, 1039340208) + + W(4, -1103581164, 1053142923, -1091384808, -1093682556) + + W(5, -1095835641, 1044225612, -1101597200, 1011948943) + + W(6, -1126424725, -1119616718, 1062997316, 1050086980) + + W(7, 1044548392, -1099920539, 1043928239, -1120667237); + WS(1060336095, -1119657045); + sum1 = W(0, 1023788715, -1117126579, -1108875843, 1022815367) + + W(1, -1100562722, 1043213137, -1111482971, -1142747249) + + W(2, 1048279897, 1044311660, 1062204714, 1060724749) + W(3, 1063043033, 1050929706, 1037241746, 1051192212) + + W(4, -1106238784, -1096869700, 1019770348, -1081389759) + + W(5, -1084472996, -1097593661, -1106654286, -1102133781) + + W(6, -1110161279, -1136931731, -1115905233, 1034904806) + + W(7, -1124661292, 1017832853, -1109716592, -1109353889); + sum2 = W(0, 1036664563, -1103288529, -1102134383, 1057556566) + + W(1, -1099512639, 1031668335, -1103860103, 1028018661) + + W(2, 1009247708, -1126560341, -1067246446, 1067728497) + + W(3, 1078180911, -1085231458, 1047956293, -1114058193) + + W(4, 1013790140, -1102369678, 1047852878, -1093152080) + + W(5, -1110504691, 1042704555, -1132621098, 1030531898) + W(6, 1035577258, 1008412166, 1016094574, 1045655865) + + W(7, -1123477495, 1023608957, -1112760821, -1120624893); + WS(1035518203, 1045613832); + sum1 = W(0, 1044215468, -1096085823, -1102751483, 1007706274) + + W(1, -1098118842, 1051305356, -1096250919, -1106455273) + + W(2, 1057279232, 1043513429, 1057361220, 1061539183) + W(3, 1057962612, 1058710773, 1037420261, 1053125757) + + W(4, -1097478568, -1096451901, -1091716958, -1085528932) + + W(5, -1093017477, -1091718961, -1100741095, -1097230063) + + W(6, 1042423606, -1098498663, 1012832112, 1054368634) + + W(7, -1103819295, 1057808613, -1100568821, -1111733504); + sum2 = W(0, -1105709018, 1050542737, -1106108137, 1027983028) + + W(1, 1053295819, -1091042532, 1049054259, -1108416827) + + W(2, 1058997495, -1084246791, -1103685622, -1106510960) + + W(3, -1089515541, 1066680974, -1088412186, 1037143386) + + W(4, -1086433444, 1065459234, 1055198365, -1102412567) + + W(5, 1057925132, -1080963495, 1056230430, 1049505110) + W(6, 1022365788, -1101303092, 1040995532, 1044759128) + + W(7, -1104466949, 1043286350, -1096360229, -1106131860); + WS(-1103921662, 1072713673); + sum1 = W(0, 1042712209, -1130675027, 1041149044, -1108819823) + + W(1, -1099465033, -1102885798, -1108891552, -1122152216) + + W(2, -1111681022, -1104528338, -1103688784, 1055222237) + + W(3, 1063848575, 1053207587, 1051374172, 1044600823) + W(4, 1029885560, 1032835743, 1053127408, -1089836355) + + W(5, -1089832742, -1090841456, -1098121506, -1101098252) + + W(6, -1123292759, -1124533373, -1101763282, -1143773237) + + W(7, 1044926036, 1032528402, 1040294582, 1018003689); + sum2 = W(0, -1116466906, 1032691193, -1110682367, -1134651946) + + W(1, 1019799157, 1020309206, 1028255175, -1118897648) + W(2, 1017580326, 1031996320, 1040673767, -1121084452) + + W(3, 1012141662, -1123645954, -1125118807, 1005021204) + + W(4, -1100144980, 1045143645, 1055366708, -1112456349) + + W(5, 1037079065, 1036705771, -1112096331, 1021253254) + W(6, 1018640494, 1067424527, 1071822180, 1057195246) + + W(7, -1078565795, -1073530916, -1087156462, 1033125682); + WS(1054959295, 1011151216); + sum1 = W(0, -1124554365, 1028540695, -1107828893, -1135601758) + + W(1, -1135515507, 1032466990, -1110097877, 950109203) + W(2, 1043778462, -1104732576, 1063706911, 1052343180) + + W(3, 1050410381, -1128754020, 1034222620, 1043960229) + + W(4, -1140499785, -1111020475, -1098481604, -1081762405) + + W(5, 1053053447, -1112972157, 1023071124, -1120112616) + + W(6, -1103855992, -1113072532, -1121738665, 1048654720) + + W(7, -1096321673, 1012954617, -1110289546, -1114439871); + sum2 = + W(0, -1104841432, 1047690020, -1084375561, -1086552004) + W(1, 1046416693, 1025710892, -1130856953, -1113936449) + + W(2, 1047456971, -1094867354, 1068880529, 1065412328) + + W(3, -1080289298, -1110380578, -1105039836, 1016290713) + + W(4, 1024424907, 1033457856, 1046341732, -1087761187) + W(5, 1062384539, -1103331799, 1024509174, -1126452938) + + W(6, 991739097, -1129013032, -1103560341, 1059009805) + + W(7, -1098849357, 1045332030, -1104241974, 1019994493); + WS(1061669311, 1066543312); + sum1 = + W(0, -1120030840, -1111872182, 1046388965, -1109640224) + W(1, 1051640445, -1104742312, 1040612129, 1018742870) + + W(2, -1097490380, 1051889621, -1085299564, 1043197314) + + W(3, -1099578605, -1114905437, 1033952272, -1095793020) + + W(4, 1038501126, -1110528784, 1050043436, 1054292822) + W(5, -1095700775, 1056051928, -1108619674, 1044337364) + + W(6, 1026361650, -1112085217, 1019064063, -1116217609) + + W(7, 1045441021, -1097066005, -1146667493, 1039394788); + sum2 = W(0, 1041249103, 1040922446, 1047169006, -1133026152) + W(1, 1045305983, -1121038685, 1032067159, 986169209) + + W(2, -1098300344, -1147221163, -1097103704, 1055827658) + + W(3, -1082349433, -1105225178, 1036618956, -1102095468) + + W(4, 1041141085, 1014590689, -1094689158, 1056443342) + + W(5, 1051958562, 1035590360, -1098250629, -1145857885) + + W(6, -1123648820, -1115292810, 1021735499, 1023898168) + + W(7, -1153021086, 1038130213, 1035045284, 1030074795); + WS(1066007616, 1040865170); + sum1 = W(0, 1016703369, -1106398093, -1114560988, -1096325697) + + W(1, -1094335779, 1023023221, -1113231876, 1022983872) + + W(2, -1154003525, 1038441464, -1112021256, 1050813825) + + W(3, 1059033474, -1108543232, 1040892233, 1034570898) + W(4, -1104662792, 1031083397, 1052887594, 1061249791) + + W(5, -1126656183, -1106571274, -1121820472, -1115038347) + + W(6, -1111826720, -1146451379, -1102003526, -1102172436) + + W(7, -1104413998, -1109007926, -1112600993, -1123919488); + sum2 = W(0, -1117895883, 1036640719, -1092695525, -1109277526) + + W(1, 1054484712, -1102073467, 1047716642, -1115714917) + + W(2, -1109314178, -1103775527, -1079308608, -1088245301) + + W(3, 1032194306, 1058556412, -1097777200, 1049638324) + W(4, 1043196819, 1047073701, 1032498235, 1071134194) + + W(5, -1098274857, -1102222667, 1028797614, -1109885254) + + W(6, 1029963629, -1117061975, 1053147047, -1113344034) + + W(7, 1027577620, -1104158962, 1019489703, -1106961770); + WS(-1112959995, -1090797387); + sum1 = W(0, 998546749, -1102515009, 1018649191, 1024148768) + W(1, 1036601843, 991750171, -1134314668, 1024106779) + + W(2, -1107113888, -1157171375, -1095395659, -1088418302) + + W(3, -1108499994, -1092700079, -1100812608, -1106951576) + + W(4, 1042251615, 1043372984, 1057847906, 1049085946) + W(5, 1054314562, 1052242560, 1010001228, 1043704436) + + W(6, 1034524319, -1109045169, 1025715818, -1107024218) + + W(7, 1036766314, -1102346050, 1041369791, -1136260253); + sum2 = W(0, -1097076796, 1000463738, -1083533587, 1064100942) + + W(1, 1074997490, 1043707697, 1028507309, -1152835380) + W(2, 1045091130, 1024736081, 1064625577, -1096201033) + + W(3, -1071550159, -1106033160, -1118548401, -1111712908) + + W(4, 1028367411, 1040206702, -1112850496, -1086258418) + + W(5, 1052069491, -1098690685, 1050202273, -1113053149) + + W(6, 1016501386, -1122385083, -1104146558, 1050125885) + + W(7, -1105686824, 1041978517, -1109567340, 1032203802); + WS(1065286463, -1155116140); + sum1 = W(0, -1127743664, 1036265535, -1118137852, -1121018025) + + W(1, 1024160014, 1028265374, 1031461691, -1116217572) + + W(2, -1099417962, -1091060822, -1088882712, -1089265061) + + W(3, -1088513194, -1090887512, -1097685517, -1096201501) + + W(4, 1047837241, 1038987540, 1061626690, 1069311516) + W(5, 1053957977, 1051214117, 1050330231, 1045642342) + + W(6, 1035108393, -1121911642, -1119901843, -1100290432) + + W(7, -1101311755, 980641778, 1030945477, 1036102670); + sum2 = W(0, 1023964675, 1054178647, -1104607531, -1088748842) + W(1, 1034720649, 1027793575, 1026748348, 1048440611) + + W(2, -1104287950, -1098771138, 1030951671, 1062304002) + + W(3, 1056993385, -1095484784, -1103591056, -1115722179) + + W(4, -1095527688, -1106164282, 1049293814, 1069791349) + + W(5, -1106129898, -1093744890, 1040804508, -1102843948) + + W(6, -1107378538, -1107123343, -1102317974, -1093097006) + + W(7, -1107155572, -1104614583, -1111048704, 1036145149); + WS(-1107864827, -1106183398); + sum1 = W(0, -1106667222, 1032383543, -1104095901, -1105126732) + + W(1, -1119082647, -1099092218, -1106477481, -1100466619) + + W(2, 1028246417, -1105288567, 1024277645, 1031807228) + W(3, 1024663749, 1059042594, 1038514313, 1061247795) + + W(4, 1062610107, 1048732230, 1033194034, -1097352540) + + W(5, 1042072353, -1097841594, -1132497297, -1116357250) + + W(6, -1094330385, -1113355593, -1100412503, -1110531197) + + W(7, -1106817153, -1119219065, -1116226478, -1112377140); + sum2 = W(0, -1124057659, -1107748661, -1106641450, -1106278570) + + W(1, -1115427744, 1045306633, -1107251222, 1044220643) + + W(2, 1049202878, 1029365940, -1100669682, -1090514146) + + W(3, 1050790854, -1101749265, 1044664667, 1051259000) + + W(4, 1055877653, 1040631102, -1121942624, -1097531540) + + W(5, -1095195500, 1044511371, -1123742683, 1035140356) + + W(6, 1043135164, -1120955446, 1048773070, -1096084011) + + W(7, 1040842880, -1101432694, -1108344435, -1156279482); + WS(-1107710971, -1103335008); + sum1 = + W(0, 1020774038, -1097452822, -1103681302, -1142400543) + W(1, -1130541650, 1035627390, 1026485526, 1026933710) + + W(2, 1057965326, 1052789750, 1061528673, 1059882105) + W(3, 1058381533, 1052101646, 1047827121, 1048909172) + + W(4, -1096736580, -1097416740, -1090723228, -1082070814) + + W(5, -1096313647, -1092903493, -1095953623, -1094072619) + + W(6, -1108312647, 1007270548, 1008435167, 1032878150) + W(7, 1024150455, 1029066731, 1024752368, -1129872006); + sum2 = W(0, -1091198687, -1096592719, -1088722291, 1017819408) + + W(1, -1104841225, 1047280108, -1133964792, 1046210234) + W(2, 1051358993, 1056674994, 1050253089, 1057806550) + + W(3, -1098514434, -1095745001, -1121773836, -1101840089) + + W(4, 1026242284, -1102658445, 1039492851, 1048044980) + + W(5, 1047848116, -1114927114, 1009730080, -1115459009) + + W(6, 1036983987, -1112372364, -1109827680, 1033031265) + + W(7, -1106004699, -1128936304, 1044703642, -1121859563); + WS(1046617982, -1079283690); + sum1 = W(0, -1112235521, -1112371935, 1046025067, -1114138877) + + W(1, -1100106092, -1106636112, 1006684852, -1134743124) + + W(2, -1103103206, 1005568807, -1082068590, 1053438832) + + W(3, 1050481934, -1113851306, -1101804759, -1101795702) + + W(4, 1050398983, 1053710959, 1059470764, 1056782757) + W(5, -1084354719, 1044662919, 1028720557, 1040488963) + + W(6, 1031782004, -1112406582, -1109731202, -1120370051) + + W(7, 1047048563, -1114617505, -1122334215, 1035089325); + sum2 = + W(0, -1123583696, 1041752924, -1096033091, 1045050816) + W(1, -1111810372, 1044170848, 1035342189, -1107148363) + + W(2, -1114190125, -1107057915, 1054169171, 1066165326) + + W(3, -1090934807, -1094482563, 1030376713, -1140242036) + + W(4, -1123424510, -1108427412, -1101097272, 1060681517) + + W(5, -1093738003, -1094742214, -1122528198, 1046835784) + + W(6, 1035265901, 1011193218, -1097576107, 1037309084) + W(7, -1094306097, 1047478278, 999245333, -1118953940); + WS(1057107647, -1115492411); + sum1 = W(0, 1043432615, 1017716083, -1115224236, 1034158091) + W(1, 1031606075, 1040917102, -1119339928, 1006053250) + + W(2, 1037909322, 1044219192, 1059480955, 1055275264) + W(3, 1062186253, 1051479740, 1036604907, 1049031914) + + W(4, -1092726398, -1094557923, -1089724826, -1079486955) + + W(5, -1094016652, -1090199321, -1111430132, -1124163213) + + W(6, 1042270260, 1036944691, 1049175094, 1036017407) + + W(7, -1112299996, 1012181234, 1026270626, -1106538704); + sum2 = W(0, 1018646782, -1107779390, 1048577488, -1094766117) + + W(1, 1029078775, -1115207772, 1041354677, -1134027235) + + W(2, 1037675363, 1041358579, -1103110037, 1063077105) + + W(3, -1094239344, -1097294293, -1106609062, 1005749079) + + W(4, 1056993548, -1114299594, -1108103869, 1069120744) + + W(5, 1019393926, -1090842684, -1103795001, -1111892981) + + W(6, -1095820604, -1106961783, -1113810892, -1094104293) + + W(7, -1100654825, 1030620215, 1015746998, 1031888221); + WS(1038408187, -1104646224); + sum1 = + W(0, -1100666444, 1032659338, -1096791479, 1050254507) + W(1, -1125163769, 1034716030, -1104041563, 1024265746) + + W(2, 1031841147, -1095811512, 1046629869, -1086582314) + + W(3, -1092033141, -1095590162, 1012740704, -1097384267) + W(4, 1045835257, 1046356758, 1055310447, 1049546009) + + W(5, 1059064223, 1032941357, 1016606100, 1043234370) + W(6, -1121385297, -1115316924, -1109718991, 1043876936) + + W(7, -1115882186, 1051195252, -1110386378, -1121465454); + sum2 = + W(0, 1033146053, 1034495435, 1033891153, 1015618236) + W(1, -1102272365, 1026402836, -1126192412, -1130670616) + + W(2, 1033178697, 1013274853, 1044565157, -1083473003) + W(3, 1047899954, 1033829854, 1026659107, -1129244632) + + W(4, -1110558215, -1103797816, -1074394842, -1075035519) + + W(5, 1070896015, -1107786181, 1039466503, 1033633210) + W(6, -1107600367, 1046600596, -1096372667, 1071496075) + + W(7, 1070598351, -1103057086, -1099451771, -1106841766); + WS(1060540543, -1145107984); + sum1 = W(0, 1016949312, 1033700674, 1008485935, 1050394829) + W(1, 1025975118, 1027755062, -1166944971, 1036625882) + + W(2, -1111708420, -1115158282, -1112773425, -1105627788) + + W(3, -1166436080, 1049090616, -1131929874, 1027903943) + + W(4, 999025948, 1037057540, -1111113150, -1105528438) + + W(5, -1092533512, -1097657169, -1125616567, -1104970374) + + W(6, -1110878953, 1034636061, 1006219749, 1047777444) + W(7, 1034379342, 1023310559, 1042471825, 1029991232); + sum2 = + W(0, -1112727972, 1041061913, 1030171918, 1049192765) + W(1, -1097932217, -1108991614, -1128255593, -1128975721) + + W(2, 1028528033, 1047913039, 1058299937, -1088725965) + W(3, 1006741474, 1028842331, -1119404040, 1019613109) + + W(4, 1050127712, -1120606988, -1075312291, 1051042420) + W(5, 1061621025, -1120436768, 1041035795, 1038145345) + + W(6, 1039139414, -1090975722, -1096898908, 1056682084) + + W(7, 1043329296, -1136718330, 1019658885, 1017746289); + WS(1058512095, 1047466767); + sum1 = W(0, 1025672397, 1032743763, 1046625694, 1040236986) + W(1, 1041791363, 1042422310, 1024317081, 1034505416) + + W(2, -1102753041, 1015664161, -1102686879, -1099087483) + + W(3, -1097904327, -1097334084, -1123206279, -1096407969) + + W(4, 1037445664, 1032570339, -1091026739, -1089271035) + + W(5, -1100626131, 1051389809, 1041773046, 1050962859) + W(6, 1037433000, 1033220516, 1035056566, 1024398415) + + W(7, 1043847942, 1042353123, 1037697617, 1047698739); + sum2 = W(0, -1111694157, -1113457231, 1007020677, -1110494513) + + W(1, 1036239980, 1042151096, 1014804229, 1038268008) + + W(2, -1097429792, 1040318944, -1109123041, -1087577139) + + W(3, -1115766281, 1033854974, 1056323633, 1058473731) + W(4, 1052756319, -1131566802, 1066240251, 1071158756) + + W(5, 1055974927, -1085216854, -1076147337, -1078122726) + + W(6, -1115035673, -1108486657, 1057627634, 1052796029) + + W(7, 1037312144, 1031946717, -1107413695, -1088208012); + WS(-1087119871, 1051442968); + sum1 = W(0, -1106951233, 1026199466, -1111827529, 1034911407) + + W(1, -1110066258, 1036906330, -1121489041, -1123695161) + + W(2, -1101188351, -1102133666, -1096477343, -1081083296) + + W(3, -1093686286, -1089794286, -1102503648, -1096524016) + + W(4, 1055191809, 1046522759, 1061746551, 1062137809) + W(5, 1056808794, 1061657374, 1025414769, 1043754020) + + W(6, -1118371512, 1024298399, -1102462991, -1118895495) + + W(7, -1105243164, 1028468933, 1001909715, 1039027467); + sum2 = + W(0, 1036012528, -1136414534, 1028357901, 1018170307) + W(1, 996800246, -1115554829, 1018408279, -1128152611) + + W(2, -1111568121, 1023613585, 1046290638, -1086518431) + W(3, 1032866656, 1040198779, -1115393969, 1031067220) + + W(4, -1125130883, 1037328619, -1077265349, -1068062671) + + W(5, 1083861052, 1057186482, -1101393956, 1024816461) + W(6, 1019317279, -1141870971, -1105362146, 1041918725) + + W(7, 1040576139, -1110577355, 1032099345, -1109411807); + WS(1041081598, -1101063046); + sum1 = W(0, 1038662072, -1105628103, 1046838359, -1112508084) + + W(1, -1112561351, 1047812813, -1106996345, 1045311157) + + W(2, -1123374599, 1048894326, 1044003395, 1062900743) + W(3, 1058339600, 1041428913, 1044355834, 1013224727) + + W(4, 1035193857, -1098151405, -1109376817, -1082115704) + + W(5, -1087104856, -1097118497, -1098603955, -1110120844) + + W(6, -1104056554, 1009505700, -1107771451, 1039692648) + + W(7, 1049374543, -1126547869, 1037690530, -1121969199); + sum2 = + W(0, 1034938637, -1110454828, 1044300205, -1094841314) + W(1, 1049263704, -1123904781, 1032882093, 1018550156) + + W(2, -1102665080, 1066739686, 1063593500, -1072571644) + W(3, 1050585763, 1033188794, 1040935597, 992411043) + + W(4, -1083931487, 1057500561, 1069787057, -1073076170) + W(5, 1057940110, 1049962317, -1103057884, 1032437745) + + W(6, -1096043986, 1050111041, 1057117236, -1094917255) + + W(7, 992791419, -1119131193, -1129197337, -1139242755); + WS(1047493374, 1019974383); + sum1 = W(0, 1039818779, 1022499782, -1111466513, -1128872350) + + W(1, -1102282821, 1027318580, -1112367361, -1121442618) + + W(2, 1024992878, 1056719767, 1056992349, 1066523241) + W(3, 1057279594, 1050310409, 1043633508, 1050332018) + + W(4, -1099980259, -1093967742, -1089692488, -1081677329) + + W(5, -1104336096, -1095763174, -1147598863, -1103201160) + + W(6, -1119405261, 1017537921, -1134657044, 1032226588) + + W(7, -1110606353, 1029746358, -1105211995, -1109613299); + sum2 = + W(0, -1109381660, 1024006001, 1048332575, 1030946168) + W(1, -1111460550, -1115660792, 1027780517, -1125113546) + + W(2, -1099927212, 1048105027, 1083214625, -1068785948) + + W(3, -1080184440, -1125303858, -1129923962, 972536438) + W(4, 1032613200, 1031703418, 1051352300, -1082836395) + + W(5, 1047472657, 1027323348, 1006215943, -1123338357) + W(6, 1013674604, -1113331152, 1010546556, -1137462284) + + W(7, 1033485356, 1034637753, -1112671380, 1033990282); + WS(1049191295, 1026054180); + sum1 = W(0, 1015431080, -1114491642, -1106844240, 1052544606) + + W(1, -1101337172, 1006821496, 1048579163, -1107367092) + + W(2, -1104530451, 1045418990, -1091806904, -1092275830) + + W(3, 1032037152, -1091613549, -1136631349, -1104046587) + + W(4, -1120791464, -1105318659, 1059085476, -1096734436) + + W(5, 1055073816, 1054997266, -1117752159, 1054027444) + W(6, 1041795830, -1107859847, 1035107546, 1050727987) + + W(7, -1098252244, 1024892180, -1114835508, -1107321874); + sum2 = W(0, 1026733028, -1099170713, -1116840904, -1119731112) + + W(1, -1105853851, 1023766170, -1109160117, -1110446049) + + W(2, 1045033945, 1052123052, 1011374557, 1034116255) + W(3, 1042679995, -1103542762, 1002727514, -1116987471) + + W(4, 1006278578, -1100795787, -1137517037, -1093051584) + + W(5, 1032967726, 1030006514, 1010971477, 1032206215) + W(6, 1045459634, 1045732660, 1036547859, 1049735621) + + W(7, -1115758581, 1012804929, -1107202197, -1108882618); + WS(1061349183, 1052960956); + sum1 = + W(0, -1129777715, -1138324169, 1040354025, -1113069161) + W(1, 1026875928, -1148679486, 1026350741, -1099366619) + + W(2, -1091852750, -1098630008, -1086948121, -1087783598) + + W(3, -1090578505, -1098392878, -1105317245, -1113156712) + + W(4, 1051531432, 1050117038, 1058780391, 1063142264) + W(5, 1054898923, 1055753851, 1035239971, 1049214940) + + W(6, 1028688348, 1001458973, 1026112190, -1101543047) + W(7, 1001807913, -1107707133, 1009551482, 1035358529); + sum2 = + W(0, 1030464194, -1114101658, 1031167290, 1076423743) + W(1, 1075592137, -1122765276, -1073082101, -1067763317) + + W(2, 1016786912, -1111442214, 1040962599, 1052649879) + W(3, 1059016603, -1109148248, 1031660172, -1093716078) + + W(4, -1124271380, 1036708968, -1145350210, -1106763968) + + W(5, 1036666460, -1102467968, 1040647493, -1123536642) + + W(6, -1112870762, 1035127660, -1119263136, 1030450749) + + W(7, -1114823676, 1030453841, -1120712732, 1004437586); + WS(1049240575, 1032641532); + sum1 = W(0, 1013730783, -1105480706, -1114036922, -1113879072) + + W(1, -1104229785, -1108053747, -1114764169, -1146082265) + + W(2, 1058635434, 1033988870, 1060052265, 1060507082) + W(3, 1060417175, 1056801557, 1045744608, 1050162924) + + W(4, -1096215015, -1101342144, -1095577996, -1090178495) + + W(5, -1088104859, -1096180920, -1096622362, 1035143657) + + W(6, -1112522509, -1114702617, 1026538642, -1118644393) + + W(7, 1026113715, -1115402364, -1165415829, -1096614130); + sum2 = W(0, -1122276311, 1035291415, -1120194245, 1040446858) + + W(1, -1120530587, -1138075205, 1025217366, -1118638617) + + W(2, -1070701037, -1088124412, 1066415245, 1074084237) + + W(3, 1049852139, -1122804719, 1023048268, -1139513797) + + W(4, -1102500792, -1135890037, 1041496354, -1104713726) + + W(5, -1121031703, -1103825640, -1123990667, 1028358694) + + W(6, -1123484857, -1122878839, 1004310705, 1043449641) + + W(7, 1033156545, 1027648711, 1022832518, -1136916673); + WS(1058199967, 1050173679); + sum1 = W(0, 1035142580, 1027370573, -1130262537, 1035504777) + + W(1, -1114438555, -1117883815, 1019391016, 1002653538) + W(2, 1049466778, 1058691576, 1039593430, 1068404719) + + W(3, 1055172389, 1049428736, 1057860609, 1038434456) + + W(4, -1090097411, -1101732588, -1086371009, -1088745136) + + W(5, -1085029652, -1087836907, -1099893249, -1090680108) + + W(6, -1180331449, 1040649373, -1120260556, 1040680775) + + W(7, 1044382793, -1124005336, 1042216563, -1112700144); + sum2 = W(0, -1116884059, 1053572077, -1098664530, 1052736946) + + W(1, -1109898534, -1100793752, 1049155169, -1110972354) + + W(2, -1098591960, 1057696884, -1094622297, 1068603564) + + W(3, -1110319390, -1092295148, 1060435963, -1093610507) + + W(4, 1049382153, 1027480579, -1090176467, 1060239128) + W(5, 1035982822, -1110105636, 1048950002, 1038370396) + + W(6, -1103314039, -1088718218, -1083506692, -1093271444) + + W(7, -1100206230, 1032439700, 1034453968, 1006875670); + WS(-1089372991, 1071972514); + sum1 = W(0, -1138920887, -1111770809, -1102811010, -1115715559) + + W(1, 1039086134, -1097310634, 1031148825, 1025838913) + + W(2, 1036533027, -1104793556, 1055175791, -1099167419) + + W(3, -1087507272, 1049070430, -1119324807, 1003453989) + + W(4, -1102318256, 1048489074, -1087761408, 1059252133) + + W(5, 1053162297, -1114433380, -1118494745, 1042268638) + W(6, 994211079, -1105180974, 1048668925, 1002658941) + + W(7, -1118760057, 1015366427, 1042346841, 1035401151); + sum2 = W(0, 1030112837, -1120976510, 990074782, -1093456890) + + W(1, 1037465699, 1026987255, -1141406991, -1139369648) + W(2, 1042265630, 1036120703, 1034066763, 1054439327) + + W(3, -1093016348, 1018637880, -1135344000, 1039303764) + + W(4, 1026971571, -1114486026, 1060265655, 1048349987) + + W(5, -1099844302, 1021593632, -1122669452, -1105461723) + + W(6, -1110663804, 1036038820, -1093575269, -1097582212) + + W(7, 1034062294, 1015927322, -1131991172, -1112258092); + WS(1064707295, -1106068023); + sum1 = W(0, -1099232505, 1035233224, 1008917721, -1113461090) + + W(1, 1030936392, 1004172005, 1027313190, -1115756158) + + W(2, -1122722466, -1096714286, -1092366070, -1085444145) + + W(3, -1087864449, -1089654074, -1109977746, -1096643772) + + W(4, 1050522762, 1050648196, 1057836626, 1065526616) + W(5, 1058128499, 1053303385, 1046037453, 1045431365) + + W(6, -1163785122, 1029641572, -1106158547, -1112344246) + + W(7, -1120989814, 1023496586, -1132212895, 1026679026); + sum2 = W(0, -1064765544, -1081497441, 1073825520, 1076205171) + + W(1, -1119211273, 1027623494, 1025169073, 1044157267) + W(2, -1093639717, 1052279998, 1049786220, 1062543003) + + W(3, -1112239696, -1095053933, 1041086677, -1111173396) + + W(4, 1043466147, -1106897145, -1111313590, 1028519541) + + W(5, -1110796836, 1038884306, -1138045203, -1103601885) + + W(6, -1118670633, 1026058867, -1120615525, 1005098917) + + W(7, 1002459349, 1024155429, -1139354443, 1031190784); + WS(-1111449083, -1157616163); + sum1 = W(0, -1140902742, -1108906886, -1107746707, -1091492130) + + W(1, -1106259270, -1123578095, -1112828291, -1129218172) + + W(2, 1050785866, 1041550344, 1063519767, 1070223306) + W(3, 1056548545, 1033679887, 1033485706, 1037279241) + + W(4, -1096435822, -1095149714, -1097214657, 1059314470) + + W(5, -1091358630, -1102873285, -1098893397, -1105253969) + + W(6, -1106580347, -1111739842, -1112949561, -1090511327) + + W(7, -1115482851, -1102694659, -1119291392, -1105806553); + sum2 = + W(0, 1036877913, -1107073590, -1107699229, 1043855187) + W(1, -1112562117, 1015957137, -1137092650, -1113777301) + + W(2, -1106537131, 1029358898, 1059152956, 1059494373) + W(3, -1100478450, 1042289801, -1117856531, 1034651775) + + W(4, 1030920508, -1148982309, -1101290720, 1057939604) + + W(5, 1054005616, -1078154452, 1052159854, -1095096566) + + W(6, -1107276407, -1158332371, -1104066278, 1057817844) + + W(7, -1089434898, 1041374005, -1109759735, -1112996199); + WS(-1078462192, -1081042006); + sum1 = W(0, 1031777492, 1032754472, 1041664052, -1097597536) + + W(1, -1104816664, -1113883983, 1034668408, -1123431127) + + W(2, -1099264287, 1043123090, -1089905073, 1061087074) + W(3, 1058804947, 1049386855, 1033759807, 1048603595) + + W(4, 1046006998, -1098614104, -1129274543, -1106235787) + + W(5, -1084543524, 1054195792, -1107840264, 1024992909) + W(6, 987183826, 1036343655, 1009990171, -1097811109) + + W(7, 1031138063, -1097275448, 1026038601, -1120606380); + sum2 = + W(0, 1045113399, -1114091637, -1107885511, -1138476145) + W(1, -1105540689, 1035866831, -1106333432, 1027850472) + + W(2, 1042319052, -1104675582, -1091877193, 1051152873) + W(3, -1091823953, 1034466874, 1039480371, 1012452041) + + W(4, -1096815184, -1111655631, -1112520325, 1061790396) + + W(5, 1058061302, -1097657024, 1040922298, -1158709515) + + W(6, -1116743882, -1101972302, 1050965596, 1049921296) + + W(7, -1092103725, 1042351236, -1102843808, -1125733657); + WS(1056694143, -1116016311); + sum1 = W(0, -1113789237, -1100431022, 1050877939, 1018856325) + + W(1, 1050520537, -1097203011, -1103919120, -1146363818) + + W(2, 1051083644, -1126988186, 1064972735, 1040063173) + W(3, 1063261729, 1049408537, 1026641091, 1053332672) + + W(4, -1097139864, -1094809972, -1089357240, -1094444318) + + W(5, -1089641983, -1094346349, -1100978845, -1097363714) + + W(6, -1123205479, 1034447326, 1036660918, -1108178982) + + W(7, 1047968461, -1113292277, 1038849023, -1114302469); + sum2 = + W(0, -1117908102, 1013765983, 1045942570, 1086231745) + W(1, 1048993826, -1110131683, 1005670802, -1126497913) + + W(2, 1019359999, -1145413682, -1095749402, -1062836828) + + W(3, -1094656357, 1037693606, -1125761673, 1018172516) + + W(4, -1120905960, 1028148573, 1032861566, -1087097136) + + W(5, -1145044098, 1040390747, -1112982831, 1016402027) + W(6, 1017197102, 1027705717, -1127537989, 1031521084) + + W(7, 1027867224, -1114691836, 1031905995, -1122299754); + WS(1058898623, -1129627348); + sum1 = W(0, -1115961291, -1114910804, -1099814536, 1014875535) + + W(1, 1042946610, 1008636276, -1106406626, -1120594878) + W(2, 1024824451, 1028416398, 1053775806, 1049045373) + + W(3, -1081844071, 1031972351, 1007262993, 1044145240) + + W(4, 1015893437, -1107386562, -1081677098, 1054905786) + W(5, 1057687981, 1045923489, 1043411542, 1016529769) + + W(6, -1111982511, 1040362701, 1049488325, 1016424474) + + W(7, -1105282974, -1118478155, 1018226304, 1008933435); + sum2 = + W(0, -1133545882, 1048887568, -1092588040, 1040041167) + W(1, -1098849827, 1009986974, 1044263399, -1120224962) + + W(2, 1024635319, -1104650552, -1106613727, 1061771033) + W(3, 1059347143, 1020453405, -1100016935, 1003801667) + + W(4, -1123675276, 1009836758, -1090053582, 1063348295) + + W(5, -1095509129, -1091467004, 1034750124, 1013849670) + + W(6, 1049399700, -1109494815, -1105276085, -1099749985) + + W(7, -1094328117, 1040581639, 1041257354, -1116729410); + WS(1051954047, 1053754534); + sum1 = W(0, -1115617616, 1024010640, -1105864485, -1104806436) + + W(1, -1109064955, -1116880702, -1125878182, 1017392662) + + W(2, 1038078984, 1036755054, 1063482792, 1054877383) + W(3, 1061183100, 1032370277, 1043596932, 1041423460) + + W(4, 1012663597, -1111840872, -1094956956, -1088048944) + + W(5, -1092794954, -1106496725, -1106933961, 1027390113) + + W(6, -1106689047, 1027644834, 1000291764, -1099610254) + + W(7, 1030290339, -1110075188, -1134690355, -1125796047); + sum2 = + W(0, 1032585330, -1120362784, 1053495069, -1103911013) + W(1, -1152525762, -1128147020, 1012657824, -1104983105) + + W(2, 1041153226, 1038083675, -1085822417, 1062089523) + W(3, -1088542584, 1044421032, -1113855086, 1041652917) + + W(4, -1098242715, -1097998955, -1097864806, 1056336567) + + W(5, 1061326318, 1046053205, 1042938789, -1160964996) + W(6, 1045110917, 1041971655, -1094674500, -1090235355) + + W(7, -1100885631, -1141849761, -1114518568, 1032164357); + WS(1061027871, -1104546242); + sum1 = W(0, -1105616597, 1044372474, 1021564129, 1044930783) + W(1, 1046184033, 1039863327, 1037242932, 1036593932) + + W(2, 999947493, -1093243621, -1093936864, -1082760108) + + W(3, -1089254915, -1087020314, -1095491356, -1089177610) + + W(4, 1020340785, 1054887248, 1050165368, 1059757863) + W(5, 1059995997, 1055330116, 1049586618, 1053764638) + + W(6, 1044086916, -1111771647, 1042456335, -1109427891) + + W(7, -1109416824, -1114227250, -1119355674, 1025046581); + sum2 = + W(0, -1071093827, 1026041138, 1068370917, 1068180608) + W(1, -1099034443, -1126398467, -1114641099, 1032038973) + + W(2, 1060306884, -1087262504, -1080863241, -1104665609) + + W(3, 1065726420, 1036496204, 1052019191, -1112342652) + W(4, 1049935328, -1137957530, 1024099582, -1097098353) + + W(5, -1115701817, 1043337171, -1155572887, 1039831002) + + W(6, -1107356858, 1030953599, 1026067688, -1149593287) + + W(7, 1031805059, -1111590832, -1110320626, -1165279566); + WS(-1085156031, 1045210454); + sum1 = + W(0, 1032928297, -1111119484, 1023710638, 1012189550) + W(1, -1119437633, 1039427612, -1114651991, 1039591187) + + W(2, 1062377574, 1050029461, 1062121112, 1061895670) + W(3, 1058328143, 1050106073, 1051812528, 1050020631) + + W(4, -1090582815, -1097825399, -1087364538, -1084850381) + + W(5, -1086721078, -1087870448, -1095972450, -1089842983) + + W(6, -1097791665, 1025846477, 1032229097, 1038427330) + W(7, 1040335420, 1036234257, 1026163704, -1146480404); + sum2 = + W(0, -1123647935, 1039341132, 1035983028, -1100132407) + W(1, -1107056231, 1035406254, -1116224056, 1039408558) + + W(2, 1061817512, 1048143772, -1106274437, -1080635557) + + W(3, -1101127944, -1113472410, 1025367216, -1101636583) + + W(4, 1083879676, 1075567701, -1081957201, -1069811820) + + W(5, -1080342730, -1083251742, -1113852980, -1087422720) + + W(6, 1056516802, 1037735150, 1028242764, -1096421593) + W(7, 1035329144, 1030744652, -1094803270, 1049804166); + WS(-1079692512, 1054562755); + sum1 = W(0, -1109749038, -1107273417, -1102577952, 1030213964) + + W(1, 1036563817, 1046944740, 1031914983, 1050658577) + W(2, 1042438683, 1049923207, 1044388569, -1113189609) + + W(3, -1089184118, -1087748930, -1096053014, -1091524869) + + W(4, -1092016161, -1091034933, -1089719139, 1050309800) + + W(5, 1048990040, 1053009554, 1048709856, 1049517424) + W(6, 1050333996, 1042543591, 1055304278, 1034159515) + + W(7, 1040366472, -1122229091, 1006842402, 1034826102); + sum2 = + W(0, 1041125733, -1097603156, -1103003109, -1107000988) + W(1, 1032085636, 1048646468, -1094740103, 1056003520) + + W(2, -1094982607, 1045744428, -1102781952, 1063877853) + + W(3, -1096755169, 1041101455, 1038162032, -1106723297) + W(4, 1055140379, 1049918932, -1106862916, 1066829095) + + W(5, -1090474230, -1081598978, 1049572267, -1085369603) + W(6, 1061130686, 990353951, 1058588396, -1096233798) + + W(7, -1095838692, -1089860215, -1096717844, 1044183141); + WS(-1080283264, 1053171958); + sum1 = W(0, 1045885175, -1113658461, 1034538909, 974135701) + W(1, -1122513036, -1118011958, 998696774, -1108239136) + + W(2, 1054395509, 1050045174, 1055879386, 1060600957) + W(3, 1061640838, 1060753746, 1054788217, 1061053402) + + W(4, -1083475508, -1095192933, -1085810962, -1084797135) + + W(5, -1086057732, -1087204563, -1092652351, -1108529436) + + W(6, 1028512496, 1035446876, 1026195253, 1044250612) + + W(7, 1037521197, -1117222317, 1038575473, -1101819472); + sum2 = W(0, 1049760823, -1132622893, 1033001558, -1136534149) + + W(1, 1043696998, -1092886300, -1098156850, 1058228696) + W(2, 1082611215, 1077078383, 1066257283, 986681348) + + W(3, -1084657320, -1069070549, -1072372844, -1074505611) + + W(4, -1114287788, 1055018182, -1101461080, -1078537691) + + W(5, 1054805894, 1065047159, 1049854091, -1082348593) + + W(6, -1130959534, 1035090608, -1112584386, -1090394759) + + W(7, 1058031741, 1038822180, 1041633511, -1115499033); + WS(-1073398920, 1057727890); + sum1 = + W(0, 1041979362, -1106188342, -1108985859, 1040300287) + W(1, 1044683952, 1046284093, 1044621499, 990388050) + + W(2, 1052994159, 1056847849, 1060197326, 1059196498) + W(3, -1094535618, -1094105983, -1102914751, 1043962286) + + W(4, -1092671491, -1087976920, -1085676269, -1108460501) + + W(5, 1051731585, 1046385743, 1031818215, -1104493131) + W(6, -1110295050, 1033778006, 1040328063, -1114468666) + + W(7, -1102804911, -1098429184, -1104640754, 1033981777); + sum2 = W(0, 1021160276, -1093219342, -1089562294, -1106388958) + + W(1, -1099093932, -1180890998, -1090965228, 1057228486) + + W(2, -1089430647, -1135654176, -1079796815, 1074674993) + + W(3, -1098010580, 1052929457, 1040584224, 1047748991) + + W(4, -1102895535, 1044152573, -1095163331, 1074136497) + + W(5, -1095203310, -1099885509, 1041387674, -1131585606) + + W(6, 1031984432, -1096279754, 1011763192, -1101895278) + + W(7, -1117345690, -1095468231, -1107912603, 1045144673); + WS(-1087643711, 1073414034); + sum1 = W(0, 1043421796, -1116796288, 1042590607, -1098636497) + + W(1, 1008405302, 1005024220, -1106384411, 1029633907) + W(2, 1043019738, 1056957030, 1048028053, 1065994910) + + W(3, 1061262410, 1059267719, 1045820026, 1054563645) + + W(4, -1091862260, -1093379129, -1089120888, -1081315117) + + W(5, -1088787029, -1093593189, -1108751805, -1106230054) + + W(6, 1028452722, -1112484637, 1043084437, -1120978641) + + W(7, 1046109207, -1105879924, 1024805700, -1095994501); + sum2 = + W(0, 1015257573, -1117575950, 1040590395, 1040031817) + W(1, -1104875608, -1111928738, -1124331304, -1104400845) + + W(2, -1101361376, 1051853927, 1007636882, -1085239439) + W(3, 1029484438, 1055085469, -1104410806, 1055334030) + + W(4, -1151605286, -1126719224, -1099060090, -1069257368) + + W(5, 1078894141, 1065488662, -1097362053, -1099504604) + + W(6, -1114431290, -1122150057, -1123376079, -1122292554) + + W(7, 1045790053, -1105709439, 1046590167, -1096100911); + WS(-1089052703, 1050218486); + sum1 = W(0, 1024924308, -1124263665, -1114112032, -1101857739) + + W(1, 1004177297, -1132101549, 1037546120, -1127618735) + W(2, 1032977094, 1048865735, 1032370348, 1057017435) + + W(3, -1096925880, -1106597310, -1101706323, -1138126252) + + W(4, -1127622480, -1103037451, -1093324152, -1130658874) + + W(5, 1049797356, -1147023991, -1148374781, 1041185327) + + W(6, 1016061078, 1027347950, 1047024376, -1114101947) + + W(7, -1136753151, -1136950735, 1016234194, -1135392976); + sum2 = + W(0, -1103127935, 1034551493, -1131095837, 1056852609) + W(1, 1041291951, 1031312786, -1119687326, -1125922018) + + W(2, 1033687244, -1111330163, -1111327049, -1097897810) + + W(3, -1092598078, 1027703970, -1119650108, 1037109557) + W(4, 1044282132, -1132159854, 1043675418, 1038437355) + + W(5, -1085166716, -1122838335, 1030316614, -1115941571) + + W(6, -1115309191, -1101537844, 989659429, -1126658673) + + W(7, -1101159602, 1065573620, 1035948285, 1021015548); + WS(1065854560, -1114586365); + sum1 = W(0, 1025132845, -1132208108, 1043375962, 1036127555) + W(1, 1042813603, 1042302785, 1041475414, 1044294516) + + W(2, -1093129789, -1097600366, -1094779500, -1085251325) + + W(3, -1085614646, -1083792532, -1088034874, -1081301837) + + W(4, 1039292161, 1047014713, 1054220253, 1059484531) + W(5, 1058902029, 1058554289, 1049492791, 1057966909) + + W(6, 1052478106, -1114318540, 1045472794, 1040640636) + + W(7, 1026772633, 1030110605, -1123458066, 1048041627); + sum2 = + W(0, -1082315074, 1044643809, -1098370418, 1032322202) + W(1, -1088636595, 1057109638, -1093539506, 1061431798) + + W(2, -1085719643, 1026206592, 1060376399, 1066420950) + W(3, 1042910065, 1025783712, -1117795056, -1120476284) + + W(4, 1072912386, -1098026838, 1068488422, 1072201946) + + W(5, 1055106268, -1087763572, -1084317601, -1077472196) + + W(6, -1097858732, -1097387905, -1097681385, -1095903963) + + W(7, -1092707091, -1104219349, -1087895305, -1105350768); + WS(-1071370880, 1080498273); + sum1 = W(0, -1117491793, 1039364315, 1035846886, 1023528601) + + W(1, -1131889533, -1106314247, 1028375004, 1013100112) + + W(2, -1117320376, -1100646881, -1094786920, 1045848824) + + W(3, 1057069771, 1055228236, 1020389492, -1127017775) + W(4, 1025681812, 1049740511, 1058631773, -1094741601) + + W(5, -1088346742, -1102894479, -1138962992, 995411618) + + W(6, -1112958490, -1103504723, -1105396320, 1032469201) + + W(7, 1034336534, -1144513794, -1116565942, 1002008579); + sum2 = W(0, -1097850747, -1107398437, -1085613590, 1042292809) + + W(1, 1054106205, -1106648125, 1035361236, -1122554235) + + W(2, -1108946242, -1114277717, 1063125687, -1084555354) + + W(3, 1059764079, 1041901467, -1103098774, -1148617751) + + W(4, -1129761151, -1118690429, 1040789135, -1101999238) + + W(5, 1051725304, -1128317719, 1022272136, 1025957669) + W(6, 1024320851, 1012636527, 1029278628, 1026191463) + + W(7, -1119736004, -1127460399, -1115360404, 997652141); + WS(1058528159, 1053906024); + sum1 = W(0, -1112917396, 1035675727, -1104938189, 1040468760) + + W(1, -1105254898, 1020887917, -1113141838, -1149793215) + + W(2, -1104256080, -1092229473, -1097223269, -1082484606) + + W(3, -1103704557, -1100912091, -1118771074, -1099449368) + + W(4, 1023681564, 1058833811, 1051100098, 1061928249) + W(5, 1057519273, 1047691241, 1047329733, 1044776397) + + W(6, 1039659283, 1021887717, 1033492802, -1119337706) + + W(7, -1099217437, -1127325712, -1114402703, 1024214504); + sum2 = W(0, -1129442596, 1023778714, -1113953277, 1026809294) + W(1, 1035644478, 1034718426, 1016827280, 1038084720) + + W(2, 1040644144, 1033307012, 1056896617, -1080168883) + + W(3, 1030172798, 1046899220, -1114672577, -1138817753) + + W(4, -1106302200, 1056981996, 1082625405, -1064674592) + + W(5, -1098216535, -1107268878, -1138831353, -1118740750) + + W(6, -1105488042, 1014815489, 1048992482, 1043950232) + + W(7, -1120732434, -1121557578, 1026207834, -1122753582); + WS(1050028863, 1057904824); + sum1 = W(0, 1016998719, 1031484924, -1113918453, 1035763396) + + W(1, -1100192683, 1046082071, 1023767163, -1109585801) + + W(2, -1115527732, -1114471481, -1092713340, -1086973069) + + W(3, 1048884339, -1093669960, -1109475380, -1102684193) + + W(4, 1049960370, -1097014217, 1059124695, 1055821255) + W(5, 1045748179, 1046233018, 972758359, 1047341446) + + W(6, 1033316845, 1045307870, -1113659631, -1113580281) + + W(7, -1104759887, 1024760783, 1028768132, -1118597711); + sum2 = + W(0, 1042577042, 1035985609, -1132583692, -1108557005) + W(1, 1045882832, 1049978360, -1123148614, 1037998467) + + W(2, 1058174637, 1022451597, -1083350192, -1102051996) + + W(3, -1096734805, 1040525485, -1111555351, 1012738322) + + W(4, -1093234196, 1028288917, -1089654696, 1066585170) + + W(5, -1112976831, -1101233023, 1016281700, 1035922998) + W(6, 1036828005, 1036599280, 1043707159, -1099096459) + + W(7, -1117191962, 1038823523, 1010883764, -1106116243); + WS(1064158815, 1035299335); + sum1 = W(0, -1112803360, 987363362, 1040202935, -1148543050) + + W(1, -1119103150, 1016866897, -1120253212, -1136961231) + + W(2, 1016610641, -1111853690, -1090418272, -1086418793) + + W(3, -1096774480, -1106302329, -1125874772, -1111706212) + + W(4, 1043480489, -1116662313, 1043761841, 1062267936) + W(5, 1026731899, 1046949595, 1044534239, 1038415821) + + W(6, 1023721675, -1114846141, 1040437218, 1044974085) + + W(7, -1112576238, 1035797256, -1118929285, 1017229376); + sum2 = W(0, 982347958, 1032509555, -1111294086, 1034453505) + W(1, -1107492941, 1047671755, -1114281099, 1041657921) + + W(2, -1130481715, -1105443060, 1041532435, -1090026061) + + W(3, -1115327745, -1099067440, 1007189687, -1106888056) + + W(4, 1039697927, -1102232537, 1059036310, 1065332554) + W(5, 1049776291, 1035057967, -1116755236, 1039394631) + + W(6, 1029826582, -1106323503, -1087181565, -1095157490) + + W(7, 1032022369, -1113855935, -1137758215, -1116441678); + WS(1064590463, 1056702913); + sum1 = + W(0, 1029043770, 1053372231, -1096751755, 1042753121) + W(1, -1132040847, -1098166823, 1054745035, -1101919647) + + W(2, -1118119137, 1049750478, -1090400466, -1089893441) + + W(3, 1035942347, -1098407696, 1057009004, -1105867448) + W(4, 1044521535, 1040212560, 1048069253, -1098259842) + + W(5, 1046519145, -1126503113, 1029470901, -1137574860) + + W(6, -1108622774, 1055292354, -1086796959, 1028130891) + + W(7, 1043794349, -1097599727, 1057963224, -1099480944); + sum2 = W(0, -1112469736, -1109993908, -1113626244, -1101975228) + + W(1, -1113395603, -1115391866, -1111764318, -1114050909) + + W(2, 1047380321, 1048112351, 1039961237, 1053720344) + W(3, 1033059361, -1117206471, 1050486894, 1029031616) + + W(4, -1107309008, -1096267194, -1149218892, 1041967405) + + W(5, -1105421186, 1041082150, -1097114178, -1116211284) + + W(6, -1130012001, 1044766357, 1024449363, -1105185961) + + W(7, 1034928166, -1103730760, 1041700935, -1133808950); + WS(1049282175, -1081654589); + sum1 = W(0, 1026879882, -1106636470, 1025593670, -1120434947) + + W(1, -1109057316, -1103814142, -1108405355, 1025046173) + + W(2, 1056840376, 1051340514, 1059046350, 1060994015) + W(3, 1062254571, 1054415372, 1044610849, 1057281100) + + W(4, -1089715922, -1103588481, -1087728227, -1081081863) + + W(5, -1089160278, -1110084674, -1097611347, -1102762237) + + W(6, 1023576205, 1019713064, 1018583793, -1116831948) + + W(7, 1040563949, -1148705741, -1130543166, -1108038889); + sum2 = W(0, 1021240677, -1117500355, -1104166105, -1110155670) + + W(1, -1101525323, -1099065181, -1135933351, -1100288377) + + W(2, -1112389783, -1154883848, -1102583420, 1061056453) + + W(3, -1123615532, 1038434309, 1041908272, -1120914319) + + W(4, 1025718250, -1096682611, 1048911763, 1065471868) + + W(5, -1149445264, -1090281103, 1042017461, -1131934595) + + W(6, -1104779102, 1051908652, -1093040032, 1051220588) + + W(7, -1113750073, -1098156535, 1046751376, -1100861861); + WS(1044733566, -1111466942); + sum1 = W(0, 1033634167, -1105078967, -1104340430, -1106426842) + + W(1, -1101553619, -1110993602, -1113465444, 1020428386) + + W(2, 1060020808, 1041929508, 1062221635, 1060048443) + W(3, 1059584252, 1054717575, 1044713794, 1047352089) + + W(4, -1092913556, -1103349137, -1092289734, -1090785767) + + W(5, -1094348262, -1097827057, -1098824243, 1041060918) + + W(6, -1112663104, -1134753630, -1126194825, -1110397240) + + W(7, -1114529117, -1113238887, -1140846474, -1094820883); + sum2 = + W(0, 1040248415, -1113076108, 1043824330, -1105221470) + W(1, 1036602392, -1134849104, -1163695359, 1011808464) + + W(2, 1084299283, 1066146464, -1073408210, -1067326117) + + W(3, -1097425816, 1016852200, -1119512248, -1118870508) + + W(4, 1054267917, 1032173778, -1102778436, -1084477148) + W(5, -1149511007, 1040914456, 1026379762, 1011171408) + + W(6, 1002599056, 1029289900, 1027082476, -1116688572) + W(7, 1024010398, -1121212984, 1007530464, 1020903456); + WS(1041204862, 1020998748); + sum1 = W(0, 1041320410, -1111643799, 1033147201, 1033305824) + + W(1, -1110534482, 1048653459, -1106102028, -1129433652) + + W(2, 1040787048, 1044400490, -1112000574, 1057857240) + W(3, 1058317173, 1013490791, 1040475768, 1032979455) + + W(4, -1129649268, -1096018340, 1041936758, -1090212595) + + W(5, -1083609992, -1108280561, -1098465902, -1104103124) + + W(6, -1136864165, -1123167632, 1014392453, 1043382402) + + W(7, 1046901861, 1040774794, -1121200123, -1123801518); + sum2 = + W(0, 1033367466, 1036408153, 1049195658, -1116950481) + W(1, -1081345874, 1035726191, -1109751565, 1034565507) + + W(2, -1105739926, 1041467605, -1097896497, 1069287141) + W(3, 1045676499, -1097322898, 1034595972, 1022861151) + + W(4, 1035166617, -1116824529, -1127250349, -1103787577) + + W(5, -1104285546, -1110450029, 1041775082, -1111377560) + + W(6, -1121294834, 1020242785, 1024987646, -1113169844) + + W(7, -1109755223, -1162599282, -1128984227, 1015705141); + WS(1061587071, -1140717261); + sum1 = W(0, -1112565188, 1035683412, 1034588141, -1105710858) + + W(1, 1024277837, -1104681461, 1033519565, -1097426768) + + W(2, -1082585266, -1091946062, -1086181493, -1084260507) + + W(3, -1087521200, -1094344584, -1109665588, -1092361421) + + W(4, 1063001355, 1057075562, 1061257307, 1062016535) + W(5, 1059484653, 1051726365, 1053582679, 1050023351) + + W(6, 1029148274, -1133150565, -1121255338, 1031972073) + + W(7, -1140527211, 1015752222, -1140031633, 1054184694); + sum2 = + W(0, 1052095029, 1040595366, -1098987175, -1084153168) + W(1, -1091290481, -1096720628, -1101102486, 1044734380) + + W(2, -1068831962, -1076957703, -1081538085, 1037971476) + W(3, 1070110671, 1069986978, 1074264190, 1070997731) + + W(4, -1076286348, -1081349117, -1088771362, 1057908839) + + W(5, -1114213425, 1063483875, 1068552571, 1075267150) + W(6, 1061242602, 1046002483, 1040290526, -1093902656) + + W(7, -1094367529, -1094489561, -1099236623, -1081182731); + WS(-1071242520, -1095264341); + sum1 = W(0, -1097248122, 1017326143, -1105336241, -1091856432) + + W(1, 1051920949, -1097210280, -1122345698, 999615444) + W(2, 1057257956, 1050464317, 1059871125, 1058219580) + + W(3, 1056167546, 1051126389, 1047883744, 1055359408) + + W(4, -1095464807, -1098731544, -1096805078, -1088025845) + + W(5, 1032394345, -1089070234, -1102431837, -1098523426) + + W(6, -1098637431, 1039614697, 1034135507, -1098503641) + + W(7, 1057766069, -1095933097, 1030493340, -1112287963); + sum2 = W(0, -1132961311, 1034954689, -1131131128, -1105685093) + + W(1, 1044104138, -1118659572, -1117347443, -1140373421) + + W(2, 1038597560, -1114842936, 1039456110, -1104727669) + + W(3, -1085849936, 1053272959, -1117746499, 1049759548) + + W(4, -1089057302, -1113751042, -1095192990, -1097910004) + + W(5, 1064526735, 1045257331, 1054461586, 1062278408) + W(6, 1055074030, 1048893008, 1046429121, 1057064004) + + W(7, -1091871118, -1096323208, -1090448877, -1081803378); + WS(1055746431, 1040288248); + sum1 = W(0, -1154016945, -1117613130, -1108247483, -1114072538) + + W(1, -1103530014, -1134724696, -1102903742, 1046496384) + + W(2, 1057716289, 1055590360, 1059644857, 1067207252) + W(3, 1062212336, -1104299500, 1049302591, 1054319858) + + W(4, -1093759650, -1094487986, -1089867895, -1085229664) + + W(5, -1089860607, -1088563377, -1097577389, -1094962569) + + W(6, -1122750430, -1117149477, 1031955359, 1016534394) + + W(7, 1044969554, -1137442922, 1035178328, -1108831509); + sum2 = + W(0, 1041968914, 1026703145, 1033601916, 1035338047) + W(1, -1106207855, -1094112140, -1100978937, -1106593629) + + W(2, 1051752153, -1115260470, 1038625472, 1052354182) + + W(3, -1118891350, -1081023345, -1084581041, 1066756564) + + W(4, -1123800846, 1035127537, 1035458769, 1035609386) + W(5, 1057116581, -1104080857, 1055976243, -1101750238) + + W(6, -1117011966, 1031245794, -1109678904, 1050054059) + + W(7, -1098815779, 1038926653, -1108176195, -1107244818); + WS(-1089881759, 1037957184); + sum1 = W(0, 1000251530, -1117869170, 1029072469, 1031851625) + + W(1, 1024508279, -1105582638, -1121710869, -1105403363) + + W(2, -1098029886, -1095515176, -1089055515, -1079829205) + + W(3, -1094362449, -1105281331, -1112643769, -1102959900) + + W(4, -1105974859, 1060224425, 1060210489, 1061092856) + W(5, 1060581840, 1056017638, 1040568609, 1050032516) + + W(6, 1041712355, 1040628361, 1042449912, -1119516507) + + W(7, -1102207965, -1107063314, -1112932479, -1124065669); + sum2 = W(0, 1051114274, -1105565679, 1055378846, -1098769061) + + W(1, -1106275827, -1101598109, -1109849433, -1104426610) + + W(2, -1102532150, -1114476854, -1082969540, 1064670328) + + W(3, -1102001010, 1044770567, -1114021751, 1049459491) + + W(4, -1093533301, 1038543175, -1075201485, 1077554590) + + W(5, 1048871628, 1040464379, -1111404687, 1049461049) + + W(6, 1042271071, -1112348507, -1087360189, -1100903801) + + W(7, -1115312306, 1031777408, -1125205988, -1104546886); + WS(-1089609215, -1090633405); + sum1 = W(0, -1095985139, -1131730646, -1106836118, -1120898567) + + W(1, -1122196780, -1108220230, -1113602180, -1111282801) + + W(2, 1060468726, -1114989785, -1111340153, -1083229835) + + W(3, -1097505195, -1091862496, -1118278055, -1094974325) + + W(4, 1029992257, 1035463235, 1053834481, 1057093887) + W(5, 1060127651, 1056095953, 1053644551, 1060964715) + + W(6, -1106189340, -1121474072, -1109753904, -1170836447) + + W(7, -1099640750, -1106303115, -1102165833, -1114233933); + sum2 = W(0, -1102755055, -1123071369, -1101642458, -1121616776) + + W(1, -1123659230, 1034971308, -1114444448, 992820811) + W(2, 1028184991, 1018963304, 1042446539, 1048904011) + + W(3, 1022095962, -1102092583, -1115812270, -1115600403) + + W(4, 1014375557, 1034540123, -1110958917, 1055634924) + W(5, -1122273513, 1047229966, 1006913192, 1032293939) + + W(6, 1018724927, -1154538439, 1030328833, -1104340897) + + W(7, -1123674010, -1103087688, -1118183380, -1116477720); + WS(-1089497119, -1083970920); + sum1 = W(0, 1049400081, -1112985379, 1032734776, 1046931016) + W(1, 1032966898, 1026703569, 1023567673, 1020804657) + + W(2, 1017038202, 1054102421, 1052273107, 1050677101) + W(3, 1056472798, 1054012759, 1046736000, 1054561791) + + W(4, -1096866095, -1092440080, -1090492445, -1077775563) + + W(5, -1095500286, -1141821439, -1103091876, -1096856494) + + W(6, -1129668560, 1023489999, 1039999326, 1047866250) + + W(7, 1031139121, -1128248842, 1037611037, -1102316761); + sum2 = W(0, -1119432539, -1114702364, -1131454616, 1045807900) + + W(1, -1107556037, -1138592655, -1120197723, -1116354551) + + W(2, -1113150368, -1107904829, 1051795717, 1065572002) + W(3, -1104213250, -1096394963, 991774812, 991245404) + + W(4, 1049552369, -1109676099, 1049548364, 1052256292) + + W(5, 1058452060, -1077576443, -1102019177, 1049782966) + + W(6, -1123950204, 1034167140, 1018171432, 1024318084) + + W(7, -1104886632, -1103410805, -1103678409, -1123412946); + WS(-1092399743, 1070790531); + sum1 = W(0, 1031557241, 1024013147, 1045977841, 1049827334) + W(1, 1044712430, 1042539942, 1022243624, 1042631871) + + W(2, -1110110152, -1123928476, -1100428029, -1093314535) + + W(3, -1098211617, -1120318828, -1110797385, -1103475236) + + W(4, -1086332927, -1104003948, -1091187884, -1110202001) + + W(5, 1031718940, 1031171411, -1118825252, 1047659449) + W(6, 1040307665, 1029330819, 1054647477, 1048337215) + + W(7, 1045074033, 1042256462, 1034920639, 1041342974); + sum2 = W(0, 1035080118, -1111978522, 1043189101, -1107203795) + + W(1, 1020874649, 1040978901, -1103963851, 1014904337) + W(2, 1049091174, 1049791001, 1038894820, -1088677682) + + W(3, -1074681252, 1032440616, -1139760881, -1132242473) + + W(4, 1068767241, 1068692590, 1041587068, -1079138335) + W(5, -1088844169, 1033618204, 1036797334, 1040720920) + + W(6, 1056655092, -1120502660, 1054889999, -1094007639) + + W(7, -1106617291, 1037089622, -1115116202, 1029103732); + WS(-1076899872, -1090262268); + sum1 = W(0, -1111775583, -1105515965, 1031134981, -1152866339) + W(1, 1022185487, 1031461535, 991487050, 1013082739) + + W(2, -1105212520, -1109220950, -1105276619, -1081197371) + + W(3, -1085735312, -1090100176, -1106571081, -1094472404) + + W(4, 1053805936, 1044900104, 1061229259, 1056451270) + W(5, 1061180827, 1061600458, 1040836429, -1114201801) + + W(6, -1130865048, -1124779047, -1108606366, 1012899053) + + W(7, -1111684786, 1038272628, -1123743158, 1043636133); + sum2 = W(0, -1092758077, -1099940688, -1096959860, -1093749782) + + W(1, 1047974360, -1120633714, 1036248767, -1113388587) + + W(2, 1054372816, 1036990346, -1102824961, 1072234824) + + W(3, -1091574668, -1098403340, -1097220109, 1024848209) + + W(4, 1025999909, 1044094629, -1093700170, 1074465248) + + W(5, -1086059593, -1083936129, 1045585843, -1093741871) + + W(6, 1025560437, 1015649794, -1134776466, -1118772240) + + W(7, -1097328702, 1035273338, -1098634116, 1051989609); + WS(-1097318719, -1106686758); + sum1 = W(0, -1138034233, 1022220430, -1115259277, 1037878284) + + W(1, -1109043646, -1118564628, -1132126739, -1123355602) + + W(2, 1031803579, -1105675680, 1060345356, 1040095311) + W(3, 1057210483, 1018511203, 1038086620, 1040876381) + + W(4, -1107190316, -1110575515, -1091489993, -1082557626) + + W(5, 1054616755, -1113047197, 1037558661, 1040895458) + W(6, -1118470609, 1013564641, 1028754310, 1043380040) + + W(7, -1126688788, -1103180903, -1105313401, -1110366427); + sum2 = W(0, -1116780480, 1034654933, 1033980194, -1106740681) + + W(1, 1037910476, -1110588743, 1033369701, 1006481529) + W(2, 1011610145, -1109786563, 1034290987, 1058839838) + + W(3, -1112504437, 1048688059, -1100404580, 1024874106) + + W(4, -1126532250, 1049246354, -1098441369, 1055488745) + + W(5, -1081564854, -1109313096, 1046210019, -1122526519) + + W(6, 1031867194, -1117436106, 1050453777, -1098876833) + + W(7, -1101879397, -1105182695, 1033744368, 1042001428); + WS(1061957727, 1058150789); + sum1 = W(0, 1009728708, 1031914819, 1043097483, -1106863550) + + W(1, -1097372952, -1108191069, 1032793786, -1129266281) + + W(2, -1099288198, -1097230757, -1087266183, 1043273168) + + W(3, 1055416522, 1050026271, -1110590381, -1108247452) + W(4, 1046412713, 1055587995, 1056802398, 1049133325) + + W(5, -1086687979, -1105368103, -1107437206, 1042988739) + + W(6, 1036440691, -1109853493, -1106212891, 1023154886) + + W(7, 1047286155, 1036106272, 1024106902, -1120124353); + sum2 = W(0, 1021676194, -1104415808, 1041444786, -1104915444) + + W(1, 1042839409, 1025561656, -1111488206, -1119893126) + + W(2, 1026925373, 1041631623, 1049347642, -1075706495) + W(3, 1068941883, 1049874161, -1096918949, 1030080935) + + W(4, -1130821928, 1047340087, 1055108729, -1078646873) + + W(5, 1058328878, 1054741696, -1108156193, -1107876167) + + W(6, 1011207236, -1118374396, 1032289888, -1097824639) + + W(7, 1041964936, -1122940124, 1033049167, -1131526532); + WS(1058678303, 1013994144); + sum1 = W(0, -1118673511, -1145988558, -1107600786, -1097799452) + + W(1, 1050901309, 1025019066, 1034380585, -1117490811) + W(2, 1050426208, 1032382213, 1059103555, 1061451838) + + W(3, -1098106458, 1032049821, 1038038624, 1043957152) + + W(4, -1111022430, 1019043839, -1086017695, -1090188381) + + W(5, 1050664020, -1093177128, -1133222475, -1113568728) + + W(6, -1110204509, -1116037137, 1043824660, -1111434373) + + W(7, -1123560275, 1043538778, -1104489131, -1110765932); + sum2 = W(0, -1105859137, 1047608842, -1101390223, 1011548907) + + W(1, -1098146078, 1047719826, -1099968186, 1028887915) + + W(2, 1052524883, -1096529292, -1103682938, 1018027622) + + W(3, 1039330573, -1101134187, 1054816538, -1105962981) + + W(4, -1116369180, 1033298597, -1095014501, 1063707518) + + W(5, 1056731707, -1086128756, 1037701277, -1150004284) + + W(6, 1025172812, 1035180205, -1138831970, -1096368530) + + W(7, -1094263126, 1061932077, -1095466983, 1016476277); + WS(1063038079, 1033861047); + sum1 = W(0, -1111914898, 1040884807, -1102681756, 1061986003) + + W(1, -1095429920, -1109559865, 1034752240, -1104233906) + + W(2, 1016779545, 1014390112, -1109833994, -1095449911) + + W(3, -1097838100, 1019923216, -1148961393, 1009274926) + + W(4, -1107257338, 1045397811, -1096677860, 1065846455) + W(5, 1029008656, 1039008680, 1033037520, 1029851870) + + W(6, -1114919788, -1162045399, -1101855164, 1049909751) + + W(7, -1097193708, -1113534130, 1007645095, -1103320918); + sum2 = W(0, -1112457849, -1106002020, -1103752624, -1114732591) + + W(1, -1102536580, 1029961226, -1114728817, 1038070251) + + W(2, -1117177515, 1017665294, -1098231154, 1062239338) + + W(3, -1097906884, -1101585142, 1037597321, -1112453861) + + W(4, 1031942991, 1033022206, -1133545004, 1061426536) + + W(5, -1109528685, 1032626487, -1114069131, 1032988844) + + W(6, -1106095669, 1041685053, -1095196752, 1046863079) + + W(7, -1105989872, -1120505375, 1028074076, -1124888510); + WS(1060691839, -1121680521); + sum1 = W(0, -1112633460, 1034282296, 1016470792, -1124091490) + + W(1, 1036530779, -1110508885, 1036991882, -1133939630) + + W(2, -1105323108, -1097943963, -1091137362, -1083253895) + + W(3, -1090987401, -1090137469, -1098291034, -1093525810) + + W(4, 1045243383, -1117038833, 1060606770, 1065759511) + W(5, 1062311590, 1056765570, 1054157412, 1057599172) + + W(6, -1113361469, -1109479713, -1110311799, -1102522287) + + W(7, -1114082482, -1104777690, 1016563642, -1167556095); + sum2 = W(0, -1097189010, 1041454563, -1110771302, 1043926258) + + W(1, -1109373654, -1109768355, -1114304258, 1043669875) + + W(2, 1057901747, 1039698409, -1122301550, -1125642190) + + W(3, -1108285138, 1020817735, 1040279727, -1101494695) + + W(4, -1086723052, -1087530727, -1094374456, 1064818137) + + W(5, 1036021291, -1109979249, 1055924705, 1040712355) + + W(6, -1086497310, -1090377488, -1110538585, 1061014784) + + W(7, 1034378960, 1044647130, 1013465199, 1050381101); + WS(-1087684831, -1094525449); + sum1 = W(0, 1038107891, -1121734275, 1035814365, -1107604308) + + W(1, 1038052214, -1101928869, -1104767443, -1127021124) + + W(2, -1093934649, -1096975274, -1093954738, -1090018791) + + W(3, -1088075930, -1090123804, -1106254226, -1100235009) + + W(4, 1052573184, 1049163549, 1040479417, 1066601083) + W(5, 1058232216, 1057618495, 1040796439, 1051666987) + + W(6, 1038198313, -1113930844, 1035537105, -1102439448) + + W(7, -1113118893, -1148589172, -1146582942, 1033628463); + sum2 = W(0, -1102391770, 1032495868, -1140349122, 1051494072) + + W(1, -1080481573, -1072283498, -1085993190, -1091499210) + + W(2, 1038892506, 1032348345, 1055934176, 1066287204) + W(3, 1070533091, 1056179234, -1098172645, 1038613315) + + W(4, -1109920182, 1035913564, 1043439866, 1049172795) + + W(5, 1048700512, -1122911277, 1042548330, -1103404378) + + W(6, 1034045130, -1111661823, -1148365864, 1035742233) + + W(7, 1024670194, 1043482108, -1117726050, 1024343165); + WS(-1095407551, -1073547033); + sum1 = W(0, -1103915126, -1101976843, 1024335748, -1108044768) + + W(1, 1050893513, -1095324014, -1107189281, -1114975198) + + W(2, 1043640456, 1041775803, 1061599658, 1057524528) + W(3, 1060862745, 1028413803, 1044362232, 1051352312) + + W(4, -1106314263, -1098551608, -1096990689, -1095708126) + + W(5, -1102020840, -1092797676, -1110398586, -1097969466) + + W(6, -1099126911, 1023142916, -1110220795, -1106562202) + + W(7, 1056016670, -1099653918, 1031215069, -1138223071); + sum2 = W(0, -1112168075, -1111493200, -1123529566, 1022286331) + + W(1, -1103466952, 1035493883, -1113566520, -1106265315) + + W(2, 1034632723, -1122920764, 1009053647, 1049936313) + + W(3, 1051909492, -1099948229, 1029419308, -1107051081) + + W(4, 1034960798, 1040740621, -1105159128, 1057592446) + W(5, -1126339526, 1041517424, 1016655048, 1024151142) + + W(6, -1112860057, -1108202591, -1115675116, -1112708946) + + W(7, -1146770753, -1116950762, -1115594520, -1103163741); + WS(1063407871, 1051042354); + sum1 = W(0, 1005154604, -1120529264, -1106161584, -1130858780) + + W(1, -1114354154, -1127255711, 1023693521, -1115504868) + + W(2, 1049807310, 1052429035, 1057743574, 1063894449) + W(3, 1058090310, 1052514789, 1044231916, 1049931918) + + W(4, -1113122997, -1100831547, -1088929544, -1086654500) + + W(5, -1089886688, -1092092086, -1106897472, -1097639884) + + W(6, -1107282310, 1042685852, -1113923042, -1129272494) + + W(7, -1164896285, -1131567786, -1131959253, -1115987013); + sum2 = + W(0, -1113031572, 1030565628, -1127702936, 992883874) + W(1, 1014952720, 990495554, 1018238740, 1023316920) + + W(2, 1043434467, -1108784254, -1111949688, 1036810375) + + W(3, -1111107284, 1042918247, -1120169364, -1112993936) + + W(4, -1088941888, 1056568736, 1036016830, 1060861120) + W(5, 1023742716, -1098281783, 1022041004, -1112517088) + + W(6, -1067187434, -1079041185, 1074040966, 1075709893) + W(7, -1099161149, 992814626, 1039733673, 1028892682); + WS(1053166591, -1107822593); + sum1 = W(0, 1033874440, 1022746982, 1035703758, 1019810386) + W(1, 1044260196, 1042374543, 1040749291, 1041374685) + + W(2, -1120335240, 1031831786, 1052124034, 1048792350) + + W(3, -1095582904, -1088629918, -1097950065, -1085974448) + + W(4, -1105709561, -1108379508, -1093762507, 1046826502) + + W(5, 1053612659, 1045805451, 1037463342, 1042845918) + W(6, 1042509273, 1008295657, 1044631286, -1124953572) + + W(7, -1112928126, -1113071488, -1120529605, -1132054272); + sum2 = W(0, 1010990056, 1039106059, -1102786881, -1114641521) + + W(1, -1133768253, -1101476912, -1103607882, -1113060394) + + W(2, -1096561981, -1097601992, 1031463124, 1058004743) + + W(3, 1042505523, 1041182740, -1105959219, -1099775079) + W(4, 1040308359, 1052717590, 1058176412, 1045709336) + + W(5, 1034355291, -1096768246, 1033275384, -1105804625) + + W(6, -1119485380, -1115258493, -1101721469, -1138490792) + + W(7, -1101570252, 1029590734, 1037679123, 1036565969); + WS(1028916214, 1067075549); + sum1 = + W(0, 1039584518, -1119264713, 1016298993, -1105861885) + W(1, -1113980813, 1020015061, -1134777055, 1032661512) + + W(2, -1148995491, -1109885152, 1055889168, 1067421167) + W(3, 1059632007, 1055091716, 1045130509, 1052375930) + + W(4, -1098323934, -1096386895, -1096985498, -1085982162) + + W(5, -1087532161, -1088331431, -1095878225, -1104015130) + + W(6, 1026160863, 1022848653, -1150434165, 1034469007) + W(7, 1044152506, 1024761132, 1032543553, -1104676378); + sum2 = W(0, -1120016029, -1096222017, -1119382227, -1092715875) + + W(1, -1110818689, 1049876716, -1103484103, 1032841990) + + W(2, -1101422768, -1103630239, 1040689805, 1064582511) + + W(3, 1043406218, -1089499537, 1029271710, -1117522613) + + W(4, -1104653664, -1105545071, -1120927281, 1058022283) + + W(5, 1060808344, -1107109410, -1107008053, -1105419897) + + W(6, -1122344899, 1042131820, -1120071349, -1101606830) + + W(7, -1096987934, 1051590178, -1099435532, 1050342947); + WS(1047020030, 1040511430); + sum1 = W(0, -1108637223, 1033110649, 1044090139, 1043424750) + + W(1, 1036624046, -1120308481, 1033721201, -1106849700) + + W(2, -1092384755, -1090443530, -1085982557, -1080046633) + + W(3, -1083957593, -1096279270, -1149642107, -1100324533) + + W(4, 1057603229, 1053549631, 1061618217, 1047924234) + W(5, 1053999549, 1057048888, 1057845673, 1052833857) + + W(6, 1031927268, -1122999800, 1026366340, 1032840448) + W(7, 1015929108, 1020572028, 994286645, 1039624063); + sum2 = W(0, 1035208568, 1043030610, -1107699159, 1044796248) + + W(1, 1034655700, -1103668262, 1045164876, -1095424772) + + W(2, -1124082321, 1008382386, -1107061123, 1049576136) + + W(3, 1051573865, 1048541752, -1093669891, -1134934914) + + W(4, -1093736918, 1050323039, -1114950423, 1074526989) + + W(5, 1073198167, -1080621030, -1074078238, -1079139349) + + W(6, 1048736861, -1118377912, -1106119918, -1111443567) + + W(7, -1132389881, 1034733436, 1034120076, 1029501600); + WS(-1080108544, 1072234904); + sum1 = + W(0, -1115070415, -1130610603, -1098588213, 1057466749) + W(1, -1104374087, 1029557602, 1018553236, 1001112594) + + W(2, 1027588151, 1029085890, 1032775046, -1089931562) + W(3, -1123371019, -1094030554, 1034779836, 999211181) + + W(4, 1031137458, -1104252646, 1056995037, -1083673968) + W(5, 1052314097, 1050628415, 1033643941, 1036988180) + + W(6, 1018404227, 1044293186, 1028038521, 1050777268) + W(7, -1093125043, 1046325824, -1108340394, 1020739741); + sum2 = W(0, -1115557063, -1118678156, 1045099863, -1102856438) + + W(1, 1035209289, 1035167397, -1117208818, -1120387027) + + W(2, 1043108929, 1033401680, 1036186051, -1124450766) + + W(3, -1112637977, -1114646959, 1012643139, -1132473655) + + W(4, -1138826120, -1093193803, -1079584214, 1068262877) + + W(5, -1121148137, 1041792105, -1138286224, 1032348301) + + W(6, -1108049897, -1109597407, 1013268663, 1049634970) + + W(7, 1046161245, -1111895568, -1127251156, -1116707942); + WS(1061770399, 1033097145); + sum1 = W(0, -1127803310, -1108446526, 1046619869, -1102069216) + + W(1, -1115166002, -1107462472, -1113123881, -1109525661) + + W(2, -1100493560, -1122290538, -1090144134, -1124504076) + + W(3, -1096124978, -1113245389, -1115437168, -1114380655) + + W(4, 1049484359, 1040952192, 1049567736, 1060283011) + W(5, 1036170309, 1049188012, 1004781969, 1043620172) + + W(6, -1117956049, -1101349332, 1015706547, 1051006353) + + W(7, 1027971186, -1105896346, 1011773295, -1118021614); + sum2 = + W(0, -1115090363, -1146121477, 1029869322, 1039239074) + W(1, -1123366919, -1123569769, -1115297931, 1022224092) + + W(2, 1035607607, 1032878473, 1020296178, -1094567498) + W(3, -1133024223, -1111505684, 1036037468, 1025152664) + + W(4, 1036976780, 1008730983, 1050702114, 1060438996) + W(5, -1092544833, -1088372125, -1119747923, 1015952550) + + W(6, -1102631851, -1105868946, -1106613659, 1056742871) + + W(7, 1056526109, -1103439045, -1114092237, -1109382916); + WS(1063262431, 1041246684); + sum1 = + W(0, -1095604910, 1035931377, -1116408259, -1113418944) + W(1, 1035543059, -1109446615, 1041501409, -1111045021) + + W(2, -1094141452, -1103734011, -1088213584, -1084116356) + + W(3, -1088057339, -1084926720, -1095142868, -1085134126) + + W(4, 1054301899, 1050116188, 1059695011, 1063045487) + W(5, 1060587189, 1062081288, 1055163174, 1061043357) + + W(6, 1050361575, 1015464930, -1130477940, 1005018697) + W(7, -1114430457, -1112900435, 968128030, 1012274924); + sum2 = W(0, -1088652568, -1101936220, -1091191468, -1102444501) + + W(1, -1123800532, -1111069004, -1088813372, 1050243792) + + W(2, 1079171932, 1073764649, 1074855165, -1113430470) + + W(3, -1095064501, -1078839186, -1078450334, -1071519758) + + W(4, 1068650823, 1062997529, 1053027607, 1039600983) + + W(5, -1105207914, -1082346847, -1088354544, -1081653443) + + W(6, -1089634291, -1106083041, -1102140198, -1123235056) + + W(7, 1028735871, 1056073450, -1114344542, 1060600412); + WS(-1071343712, -1080188504); + sum1 = W(0, 1048814204, -1112944093, 1038698214, -1110388460) + + W(1, -1102353903, -1113346827, -1115354285, -1172877482) + + W(2, 1024112567, 992179242, 1035991718, 1065540811) + W(3, 1062800155, 1057270438, 1047814390, 1055244391) + + W(4, -1101821021, -1105846854, -1110088095, -1086620536) + + W(5, -1085891530, -1092189183, -1094362793, -1096652125) + + W(6, -1155239198, -1115614373, -1117961066, 1022833410) + + W(7, 1037955557, -1130491710, 1026576596, 1021561214); + sum2 = W(0, 1077488778, 1067869852, -1080283979, -1070445594) + + W(1, -1085461181, -1113067149, -1135625536, -1105631704) + + W(2, 1057844910, 1017329883, -1112258962, 1038200220) + W(3, 1045877800, 1016515143, 1029029881, 1026840239) + + W(4, -1110901413, -1123912240, 1048616910, -1103189448) + + W(5, 1033584054, -1113680120, 1006246833, 1028070058) + + W(6, -1157420388, 1016773013, -1127671945, 1027002057) + + W(7, -1139116556, -1108492850, 1033508697, -1122607860); + WS(-1114300667, -1123436789); + sum1 = W(0, 1048745214, -1106612992, 1040619730, -1101020989) + + W(1, -1120049178, -1129452308, -1112058252, 1040777596) + + W(2, 1024457460, 1049280824, 1061265254, 1066754197) + W(3, 1060475010, 1056957204, 1052291799, 1051532558) + + W(4, -1084727981, -1099711469, -1091611130, -1090437274) + + W(5, -1087975654, -1087165059, -1095257281, -1091858685) + + W(6, -1124831645, -1120312678, 991636461, -1115033343) + + W(7, 1037659252, 1036876438, 1024723233, -1128167880); + sum2 = W(0, -1102675226, 1045758589, -1106608364, 1037856863) + + W(1, -1121700495, 1038509645, 1041300787, 1013024414) + W(2, -1096018291, 1033992894, 1053924217, 1023354017) + + W(3, 1045521037, -1112080608, 1019757091, -1111416466) + + W(4, -1064870457, -1078407710, 1074354229, 1077566439) + + W(5, 1055978728, -1100523264, 1032114149, -1096094745) + + W(6, 1050758265, -1091427154, 1044800412, -1122690967) + + W(7, 1037435194, -1101170582, 1048475585, 1031501486); + WS(-1081349952, -1073531246); + sum1 = W(0, -1130396178, -1123247001, 1049234666, -1123595694) + + W(1, 1044575749, -1118849787, 1027091408, 1015989072) + + W(2, -1092471918, -1094799967, -1087091570, -1083589055) + + W(3, -1090515766, -1088301188, -1100895532, -1095518837) + + W(4, 1052933181, 1040552427, 1062313536, 1056428784) + W(5, 1060166366, 1052569014, 956727337, 1057005798) + + W(6, -1120779255, 1032899791, 1048808240, 1047688662) + + W(7, 1025076592, -1104574961, -1114767697, 1028829344); + sum2 = W(0, 1036727690, 1038605852, 1000461252, -1108807283) + W(1, -1118764382, 1040753555, 1025474323, 1026410269) + + W(2, -1101780564, 1036224480, 1043313873, 1058417373) + + W(3, -1110550387, 1041024731, 1032765623, -1114514162) + W(4, 1047063866, 1003722492, 1071364196, 1075919102) + + W(5, 1031984760, 1052825370, -1104266299, 1049040777) + + W(6, -1095151348, -1101329253, -1071443435, -1072909475) + + W(7, -1137970650, -1094681513, -1130737363, -1100563228); + WS(-1087415039, 1075227720); + sum1 = + W(0, 1014202565, -1121306305, -1115466670, 1055732347) + W(1, -1088701572, 1045428619, -1106855619, -1113272762) + + W(2, 1030200505, -1093044977, 1052765481, -1086016434) + + W(3, -1108244135, 1044242725, -1102530383, 1042147022) + + W(4, 1042588557, 1045750086, -1099185023, -1102841730) + W(5, 1060432915, 1001272287, 1047811955, 1043051755) + + W(6, 1032642210, 1039690838, -1099716881, 1054066516) + W(7, -1089156724, 1045967960, 994787170, -1114980876); + sum2 = W(0, 1034817173, -1106843150, 1035283571, -1117324330) + + W(1, -1106783723, 1033652183, -1105003316, -1109219720) + + W(2, 1033088277, -1115275500, -1108330169, 1059951304) + + W(3, -1114610359, -1130129964, -1100855920, 1017452836) + + W(4, -1105460751, 1042281307, -1117933953, 1055263105) + W(5, 1036736577, 1034893511, 1040893249, 1026860110) + + W(6, -1119967238, -1122100142, -1118763227, 1033190213) + + W(7, -1099584511, -1097591398, 1043864443, -1097701018); + WS(1061885343, -1093433134); + sum1 = + W(0, -1119074171, 1040591342, 1024365415, -1112185530) + W(1, -1102861510, 1047951017, 1033079635, 1030794670) + + W(2, -1116612499, -1106037336, -1090085717, -1110885309) + + W(3, 1058148680, -1098920364, -1104145445, -1102965524) + + W(4, 1043937396, 1035985530, 1054530437, -1105284753) + W(5, -1085809524, 1042740717, -1143903027, 1048819605) + + W(6, 1026065358, 1035233146, 1023445318, 1041032751) + W(7, 1043298226, 1028431483, 1030877520, -1110782149); + sum2 = W(0, -1120425303, 1041777983, 1008642595, 1038578170) + + W(1, -1103931105, -1144970974, -1139607395, -1105571910) + + W(2, -1115506247, -1109249837, 1051302633, 1062460419) + + W(3, 1046700430, -1110379832, -1121481149, -1125592805) + + W(4, -1089905659, -1082926483, 976609774, 1062992305) + W(5, 1016533657, 1046952726, -1128426413, 1044012895) + + W(6, -1106986872, -1093100415, -1100742938, -1121360027) + + W(7, 1026171953, -1123505732, 1047068010, -1108759193); + WS(1057387711, 1071476886); + sum1 = W(0, 1029774760, 1046876472, 1049447728, -1104814472) + W(1, 1047761369, -1107998911, 1035672969, 1025453339) + + W(2, -1088145158, -1090879061, -1087750555, -1081209882) + + W(3, -1102041597, -1088704948, -1101378696, -1090772220) + + W(4, 1058722665, 1046940920, 1062289140, 1060559271) + W(5, 1057163193, 1046238393, 1049034574, 1051154538) + + W(6, 1026644842, -1124024690, 1034805113, -1101227374) + + W(7, 1041428045, -1112057972, -1123650077, 1045125730); + sum2 = W(0, -1079574021, -1076424748, -1084651933, -1079383446) + + W(1, -1094010079, -1105513599, -1094989946, -1083946107) + + W(2, 1066168402, 1072246945, 1055648588, 1069722149) + W(3, -1105729737, 1052366950, 1046266897, 1064021500) + + W(4, -1106633910, 1039775524, -1122282375, 1050613020) + + W(5, -1112550614, 1046757765, 1041413657, -1117589808) + + W(6, 1033041398, 1025560740, 1042767844, -1122422479) + + W(7, 1029478726, 1029747528, -1132333344, -1110924549); + WS(-1088291167, -1076464582); + sum1 = W(0, 1038365083, 1013092187, 1049005176, -1096083994) + W(1, 1046666973, 1029385568, -1130846211, 1035921309) + + W(2, 1048475643, 1039614653, -1105893787, -1090164821) + W(3, 1048630844, 1052597003, 1030762351, 1049653618) + + W(4, -1115380053, -1115741870, -1115211424, -1106465144) + + W(5, -1092124773, -1097273638, -1104199584, -1098780292) + + W(6, -1113517956, -1118168192, 1038779144, -1103584918) + + W(7, 1051487172, 1036949051, 1029432699, 1041505439); + sum2 = W(0, -1102442432, 1042447103, 1009200726, -1140140535) + + W(1, 1018235219, 1038566242, -1105836948, -1119461411) + + W(2, -1119046775, -1103497340, -1108371687, 1063726871) + + W(3, 1023996549, -1100556235, -1134905751, -1115426548) + + W(4, 1038119755, -1117033259, -1111757198, 1053430514) + + W(5, -1097582390, -1130738637, 1027482933, 1038939472) + + W(6, -1116688635, -1109692823, -1136365839, -1103585210) + + W(7, 1035998106, -1122627505, -1113769016, -1131103707); + WS(1063535295, -1106643391); + sum1 = W(0, 998897947, -1114069468, 1030590255, -1120649212) + + W(1, -1112623985, -1135800742, -1126057998, -1118923687) + + W(2, 1049160779, 1050551697, 1037435628, 1059662980) + W(3, 1051258674, 1051146291, 1042046535, 1051522915) + + W(4, -1095715331, -1114097198, -1091439068, -1098500149) + + W(5, -1093374928, -1112852607, -1104302276, -1098602138) + + W(6, 1022592892, 1035333911, -1105875754, 1036951763) + + W(7, -1097348874, 1021555078, 1032877804, -1118947123); + sum2 = W(0, 1020791040, -1119848331, 1028098574, 1024075228) + + W(1, 1036335573, -1114214294, 1028067710, -1123998545) + + W(2, 1023004440, 1035188360, 1036866165, -1081188844) + W(3, 1036267331, 1020492762, 1026996488, 1028180222) + + W(4, -1131522740, 1010552055, -1087402238, -1063807577) + + W(5, -1074297638, 1044255896, -1109527927, -1125487816) + + W(6, -1130592194, -1114014595, 1057774258, 1085749988) + + W(7, 1072260443, -1101071935, 1041502854, -1121770211); + WS(1065370528, -1140007425); + sum1 = W(0, -1119191691, -1123119348, 1043193134, -1144777506) + + W(1, 1048874737, -1114692372, 1047204584, 1028382351) + + W(2, -1089109360, -1093467562, -1104035088, -1082641845) + + W(3, -1090665335, -1085597050, -1097312497, -1084438004) + + W(4, 1051697742, 1051845168, 1049231373, 1058881533) + W(5, 1060727795, 1053776045, 1050811579, 1054559607) + + W(6, 1048704645, -1120571445, 1047054964, -1128753754) + + W(7, -1122636375, -1135014444, 962936920, 1047044271); + sum2 = W(0, 1072219394, 1068804680, 1068134846, 1066171639) + + W(1, -1095987744, -1080918851, -1077110574, -1071545857) + + W(2, -1078126224, -1079031251, -1078951161, -1087436811) + + W(3, 1039512383, 1067164113, 1069504242, 1075315134) + + W(4, -1093298188, -1110022070, -1084926454, -1107767815) + + W(5, 1055586357, 1031793821, 1041030661, 1055897321) + W(6, 1011861728, 1040923692, -1105857084, -1121892273) + + W(7, 1026590003, 1040751494, -1123439499, -1109554506); + WS(-1089696543, -1143233957); + sum1 = + W(0, -1115482013, 1045323653, -1096516931, 1035013745) + W(1, -1162367211, 1054106403, -1107204469, 1036179510) + + W(2, -1129127938, -1098483714, 1045383939, -1091150045) + + W(3, 1043411549, -1086835476, 1048447553, -1103773800) + + W(4, 1041381507, -1110243009, 1059582003, -1106321908) + W(5, 1034020454, 1026676964, 1034191370, -1159158363) + + W(6, -1122467892, 1049605572, -1090424421, 1043667267) + + W(7, -1109629626, 1048548650, -1105600748, 1035568493); + sum2 = W(0, -1134008573, -1125596513, 1011008945, 1051842604) + + W(1, -1125276577, -1118729604, -1111959837, -1117720498) + + W(2, 1032064725, -1121806942, 1045441656, 1065460002) + + W(3, -1114964420, -1130705389, -1121883022, 1034360078) + + W(4, -1117795542, 1035355315, 1024667789, -1089961367) + + W(5, -1081892598, 1026896379, 1020053498, 1023648883) + W(6, -1113496357, 998755498, -1119910472, 1041632626) + + W(7, 1035390555, 1032979221, -1129745921, -1115672512); + WS(1065278079, -1120402802); + sum1 = W(0, 1031924074, -1115548942, -1106896516, -1112658654) + + W(1, -1103991244, -1121670257, -1115583119, 1031270934) + + W(2, 1052390115, 1049303261, 1045251552, 1060948388) + W(3, 1062622505, 1054395391, 1045812503, 1048618059) + + W(4, -1097070063, -1099757447, 1040981136, -1081623390) + + W(5, -1085365853, -1096062708, -1101077740, -1099232531) + + W(6, -1116671142, -1118597165, 1041336272, 1043338841) + + W(7, -1145407604, 1032773481, -1123616408, -1127437558); + sum2 = + W(0, -1114021356, 1032565152, 1056835748, -1085160508) + W(1, 1058685385, -1086618535, 1047242231, 1032715041) + + W(2, -1110767736, -1122384971, -1085457975, 1068814369) + + W(3, -1085213012, 1063574785, -1144776155, -1097434793) + + W(4, -1100377718, 1041858293, -1085697040, 1060746614) + + W(5, 1057245366, -1094120608, -1093482365, 1056356490) + W(6, 1034493514, 1019779384, 1016590225, -1097525969) + + W(7, 1043875459, -1097974912, 1054776673, -1096653037); + WS(1055688959, -1109584743); + + return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); +} + +shared float inp[525]; + +#define CURRENT_PASS 2 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define temp_tex(pos) (float(texture(temp, pos).x)) +static const float2 temp_size = float2(GetInputSize().x * 1, GetInputSize().y * 2); +static const float2 temp_pt = float2(1.0 / (temp_size.x), 1.0 / (temp_size.y)); + +#define HOOKED_tex(pos) temp_tex(pos) +#define HOOKED_size temp_size +#define HOOKED_pt temp_pt + +void Pass2(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 525; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 15, y = (uint)id % 15; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (1)) + 0.5, float(group_base.y + y - (3)) + 0.5)).x; + } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[8]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 4]; + samples[1][1] = inp[local_pos + 5]; + samples[1][2] = inp[local_pos + 6]; + samples[1][3] = inp[local_pos + 7]; + samples[2][0] = inp[local_pos + 15]; + samples[2][1] = inp[local_pos + 16]; + samples[2][2] = inp[local_pos + 17]; + samples[2][3] = inp[local_pos + 18]; + samples[3][0] = inp[local_pos + 19]; + samples[3][1] = inp[local_pos + 20]; + samples[3][2] = inp[local_pos + 21]; + samples[3][3] = inp[local_pos + 22]; + samples[4][0] = inp[local_pos + 30]; + samples[4][1] = inp[local_pos + 31]; + samples[4][2] = inp[local_pos + 32]; + samples[4][3] = inp[local_pos + 33]; + samples[5][0] = inp[local_pos + 34]; + samples[5][1] = inp[local_pos + 35]; + samples[5][2] = inp[local_pos + 36]; + samples[5][3] = inp[local_pos + 37]; + samples[6][0] = inp[local_pos + 45]; + samples[6][1] = inp[local_pos + 46]; + samples[6][2] = inp[local_pos + 47]; + samples[6][3] = inp[local_pos + 48]; + samples[7][0] = inp[local_pos + 49]; + samples[7][1] = inp[local_pos + 50]; + samples[7][2] = inp[local_pos + 51]; + samples[7][3] = inp[local_pos + 52]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 18]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1) + ivec2(1, 0), ret); +} diff --git a/src/Effects/NNEDI3/NNEDI3_nns256_win8x6.hlsl b/src/Effects/NNEDI3/NNEDI3_nns256_win8x6.hlsl new file mode 100644 index 000000000..cd7248910 --- /dev/null +++ b/src/Effects/NNEDI3/NNEDI3_nns256_win8x6.hlsl @@ -0,0 +1,11232 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: nnedi3.py --nns 256 --win 8x6 --use-compute-shader --use-magpie +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME NNEDI3_256_6 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 1 * 2 +//!HEIGHT INPUT_HEIGHT * 2 * 1 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!FORMAT R16_FLOAT +//!WIDTH INPUT_WIDTH * 1 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D temp; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_temp; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 2 + +//!PASS 1 +//!DESC NNEDI3 (double_y, nns256, win8x6) +//!IN INPUT +//!OUT temp +//!BLOCK_SIZE 32, 16 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[12]) { + float sum = 0.0, sumsq = 0.0; + [unroll] for (int i = 0; i < 12; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); + sumsq += dot(samples[i], samples[i]); + } + float mstd0 = sum / 48.0; + float mstd1 = sumsq / 48.0 - mstd0 * mstd0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); + mstd1 *= mstd2; + float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = W(0, -1118812211, -1118354985, -1108702207, -1107177196) + + W(1, -1116025261, -1113369587, -1126504964, -1117760744) + + W(2, -1127312137, 1040658850, -1105559165, 1024463101) + + W(3, -1120674615, -1117458482, -1124477727, 1041726453) + + W(4, -1102924251, -1120327001, 1037346316, -1097807929) + + W(5, 1051925971, 1061593601, -1098269094, 1015141868) + + W(6, -1120777358, -1114916026, 1042061230, 1053108884) + + W(7, -1109857889, -1129934237, -1137814218, -1101147334) + + W(8, 1032645830, 1029100907, -1121960158, -1114359336) + W(9, -1128256674, 978731342, 1026597474, 1023094846) + + W(10, -1133406593, 1018102856, -1118580651, -1112914505) + + W(11, 1040041685, 1010634196, -1137769344, -1124738995); + sum2 = + W(0, 1012167794, 1034293816, 1040949842, -1073125242) + W(1, -1098861131, 1034317546, 975711915, -1103749327) + + W(2, -1105000392, -1097051670, 1026609280, -1107080190) + + W(3, 1007876102, 1042152488, -1113573827, 1048636347) + W(4, -1103172582, 1037196612, -1112492311, 1032768248) + + W(5, 1065119806, 1064777150, 1028248806, -1106667446) + W(6, 1037438381, 1026775674, 1049707133, 1035198208) + + W(7, 1042916747, 1024046979, -1104922990, -1112883791) + + W(8, -1118855570, 1049268872, -1108803486, -1112062411) + + W(9, 1035896848, 1009005510, 1019305156, -1110039318) + W(10, 1037762592, 1031133421, -1129478003, 1029694272) + + W(11, -1105233202, 1045598091, 1002108025, -1116379320); + WS(1002542528, -1081598301); + sum1 = + W(0, -1123106403, 1018532849, 1058293919, -1096563748) + W(1, 1033211315, -1104454821, 1003194766, 1031358712) + + W(2, 1048723343, -1102943230, 1043311017, 1033034550) + + W(3, -1115291860, -1100442248, 1060312385, -1084853826) + + W(4, -1122032382, -1105126162, -1120609928, -1102109750) + + W(5, 1065191364, -1082116255, 1048832547, -1093430837) + + W(6, 1033033375, -1124327881, 1057579241, -1090047916) + W(7, 1045565001, 1033480943, -1104418325, 1022719310) + + W(8, 1055391647, -1090520747, 1034840623, -1104668395) + W(9, 1031657633, 1042404749, 1048926634, -1115672903) + + W(10, 1040065984, 1041617645, -1140694509, -1173350542) + + W(11, 1057234642, -1088607906, 1034960904, -1100859732); + sum2 = + W(0, -1118979564, 1038135851, 1040555036, 1046730291) + W(1, -1098841046, 1010901456, -1136455954, -1098903542) + + W(2, -1113356687, -1091503884, 1033064908, -1105119573) + W(3, 1031647880, 1043936541, 1041172918, 1053446821) + + W(4, -1105844234, 1050988870, -1098733088, -1095471090) + + W(5, 1058853402, 1057969905, -1107613268, -1096003667) + + W(6, 1032618663, -1114646115, -1111675314, 1026777488) + + W(7, -1104403193, 1040941647, -1115252794, 1050331000) + + W(8, 1042547849, 1032649026, -1109177822, -1135105484) + + W(9, -1105564107, -1107943513, -1102521857, -1097022326) + + W(10, -1106932655, -1111654963, 1028338874, 1030299514) + + W(11, 1048036907, 1045220138, -1104926499, 1015146572); + WS(-1089089900, 1057806976); + sum1 = + W(0, -1107295812, 1034138799, -1095422036, 1050726826) + W(1, 1041715821, -1107540610, -1127595840, 1025412102) + + W(2, -1098433268, -1189029283, -1122343505, -1114195981) + + W(3, -1123024597, 1036945929, -1092642698, 1055732365) + + W(4, -1122551116, 1004508092, -1107414907, 1043589025) + + W(5, -1086058047, 1066834483, 1012137296, -1123010964) + W(6, 1016079705, 1048626936, -1087422851, 1060391966) + + W(7, -1112593208, 1021997657, -1114511976, 1026901142) + + W(8, -1090938821, 1050840209, 1032081267, -1116540066) + + W(9, -1121404144, -1122208804, -1099844512, 1046023279) + + W(10, 1025525231, -1117735535, -1120167114, -1132433569) + + W(11, -1092394401, 1054523402, 1034856335, -1140325311); + sum2 = + W(0, -1100068589, 1045053907, 1035618626, 1049401249) + W(1, 1036897142, -1103084156, 1040406565, -1112537807) + + W(2, -1097933278, -1091711434, 1040848725, 1039746012) + + W(3, -1105732125, 1034765996, -1099579155, 1056573344) + + W(4, -1089717374, 1030295740, -1129255625, -1096936254) + + W(5, 1047111871, 1055190008, 1050298277, -1110783667) + W(6, -1110986904, -1098576495, 1056019034, 1058665437) + + W(7, -1089302244, 1041764164, 1041854514, -1119869595) + + W(8, -1119227686, -1092612461, 1058580142, -1100808273) + + W(9, -1108063740, -1108123574, 1029490732, -1121089271) + + W(10, -1127874601, 1022690203, 1040297872, -1106886121) + + W(11, 996712344, -1113107096, -1110437244, 1034569294); + WS(1049963735, -1124746024); + sum1 = W(0, 1006389467, 1034213253, 1058411598, -1089111694) + W(1, -1120169982, 1033778855, 991131306, -1110639240) + + W(2, 1049374726, -1111102680, 1039576302, -1134195605) + + W(3, -1124991168, -1124116580, 1060442825, -1086690096) + + W(4, 1018630958, -1122058050, 1036315168, -1106688436) + + W(5, 1064243752, -1080857842, 1032153615, 1024033695) + + W(6, -1114080443, 1016950458, 1058630778, -1086343400) + W(7, 1034713638, 1016006830, 1034164414, 1034354600) + + W(8, 1057235431, -1086990857, 1036089759, 1017381585) + + W(9, -1168983685, -1118357326, 1051622657, -1099687954) + + W(10, 1035150121, 1027377121, 1034869786, 1046100667) + + W(11, 1028310995, -1090257402, -1126482082, -1130216101); + sum2 = W(0, -1114775505, -1111353121, 1051023520, 1077280281) + + W(1, 1042396970, -1100155636, 1022693528, 1040828458) + W(2, 1041444385, 1083159907, 1039028690, 1038899070) + + W(3, 1033686534, -1108626677, -1101109690, -1090827263) + + W(4, -1129576724, -1108327399, -1159675846, -1179590680) + + W(5, -1089609004, -1064966677, -1146898193, -1128357112) + + W(6, -1112709023, 1041114289, -1101828769, -1073695373) + + W(7, -1117377551, 1026559236, 1033838002, 1028829848) + W(8, 1037546580, -1090323484, 1046069066, 1017563688) + + W(9, -1125624388, -1108121837, 1033655640, 1042738654) + + W(10, -1098820901, -1149039617, 1032684170, 1037026782) + + W(11, -1104771455, -1104201752, 1049907200, -1119733667); + WS(-1082925548, -1131273240); + sum1 = + W(0, -1130676005, 1028984921, -1090453298, 1056020338) + W(1, 1036092472, 1031745797, 1024417890, 1026031368) + + W(2, -1094705109, 1048479050, 1026370227, 1023514525) + W(3, -1127912110, 1046418962, -1088411833, 1054709085) + + W(4, 1040943828, -1139610597, -1123681912, 1048544370) + W(5, -1080625999, 1056570469, 1016526011, 1039178347) + + W(6, -1128802908, 1032747842, -1078821070, 1060414079) + + W(7, -1134933227, -1115579031, 1009745483, -1115143804) + + W(8, -1093720869, 1058018725, -1146727344, 1033865376) + W(9, 1030244593, 1035809011, -1115224705, 1048613352) + + W(10, -1126383553, -1123856496, -1124372951, 1012870858) + + W(11, -1099806226, 1053689379, 1034696072, 1032018765); + sum2 = W(0, -1117505397, -1114926763, 1048798551, -1102904639) + + W(1, 1025448640, -1112830121, -1126832117, -1140231915) + + W(2, -1146496262, -1113101086, -1129185935, -1125809392) + + W(3, -1116692107, -1125548157, 1045683415, -1125191934) + + W(4, 1034411663, -1121525964, -1107644007, -1125354742) + + W(5, 1053833339, 1000279686, -1120274227, -1113082203) + + W(6, -1117458583, 1042669208, 1057710126, -1104919217) + + W(7, 1041220979, -1111650191, -1123105101, 1027773818) + + W(8, 1032098567, -1107525999, 1022009255, 1035574395) + + W(9, -1117032217, -1115355410, -1100115369, 1032507221) + + W(10, 1021108475, -1112638630, 1004815486, -1119603373) + + W(11, -1110288078, -1103622420, 1039592007, -1117835672); + WS(-1089340204, 1064822548); + sum1 = + W(0, 1024560260, 1040632746, 1042654740, -1106019185) + W(1, -1121721479, 1037194524, 1017532199, -1123651609) + + W(2, 1053882699, -1094337163, 1033472482, 1028050126) + W(3, 1036465700, 998268921, 1058440180, -1083627654) + + W(4, -1112650528, 993325743, 1032538389, -1115892569) + W(5, 1062655475, -1081203240, 1042941034, 1013010462) + + W(6, 1031720480, -1109887627, 1057084090, -1087921069) + W(7, 1046201917, 1035154240, 1035008482, 1033415588) + + W(8, 1054084900, -1086686700, 1038013786, 1021897635) + W(9, 1020865894, -1112010572, 1046711885, -1098821294) + + W(10, 1039144481, 1027972046, 1041385023, 1043778521) + + W(11, 1050203279, -1083224116, 1043447032, -1131974615); + sum2 = W(0, -1108565409, 1034353453, -1088688588, 1053209723) + + W(1, 1049996954, -1107721619, 1041490776, 1018486577) + + W(2, -1105841420, -1092734414, 1039814215, -1122233136) + + W(3, -1103680498, -1113519263, 1037914109, -1094792328) + + W(4, 1034343810, -1116717915, -1113858207, 1052082200) + W(5, 1062919581, 1049986423, 1012724150, 1040029087) + + W(6, 1018958125, -1097140178, -1095558118, -1107726366) + + W(7, -1098318745, 1007719490, -1139146344, 1046768714) + + W(8, 1049328132, 1044527939, 1024888668, -1100531711) + W(9, 1024460115, -1098207895, 1041960027, 1046336394) + + W(10, -1106186917, 1037888927, -1111959213, -1128703602) + + W(11, 1035231667, -1110040773, 1036710347, -1102670680); + WS(-1083458668, -1076208878); + sum1 = W(0, 1028803476, 1040683401, 1046406904, -1098733730) + + W(1, 1004468631, 1030855367, -1119221604, -1097616301) + + W(2, 1051879501, -1093169213, -1110693690, -1111140815) + + W(3, -1136427363, 1042493937, 1058517387, -1088547874) + + W(4, 1024040880, -1127640694, 1033962333, -1106128994) + + W(5, 1062475632, -1086787145, 1045297574, -1108587948) + + W(6, -1108948664, -1096558041, 1061490456, -1088927588) + + W(7, 1028806548, -1104411844, 1041264181, 1044433886) + W(8, 1053313566, 1035939910, 1038834422, 1040762412) + + W(9, -1108358441, -1097860853, 1049919352, -1096390160) + + W(10, -1095128350, -1118015030, 1032628870, -1123751924) + + W(11, 1054299289, -1130402734, -1106314264, -1165494119); + sum2 = W(0, 1016611708, 1016478676, -1110195557, -1101004152) + + W(1, 1040169462, -1128480930, 1037888148, 1037533502) + W(2, 1036667983, 1029018929, 1051795874, -1116498505) + + W(3, -1105282980, 1012928145, -1126800264, -1092116144) + + W(4, 1041335879, -1105668811, 1016374797, 1042320298) + + W(5, -1123884642, 1048907311, 1052196876, -1110762465) + + W(6, -1146676672, -1119445205, -1097480948, 1063932318) + + W(7, -1105880602, -1096424000, -1118267731, -1133007186) + + W(8, -1111241984, 1045206632, -1088285853, 1016609657) + + W(9, -1130451858, -1123652878, 1039970164, 1051255124) + + W(10, -1095104183, 1029046774, 1032410610, -1114217777) + + W(11, 1039354964, 1032089604, -1090379026, 1046489649); + WS(-1104670894, -1131977046); + sum1 = + W(0, -1112648188, -1127207577, 1054983097, -1088875575) + W(1, 1031188251, -1103988635, -1122872327, 1044660591) + + W(2, 1050049705, -1112061830, 1037435556, 1027063042) + W(3, 1034658686, -1159198685, 1055452918, -1088327234) + + W(4, 1047857485, -1117747167, -1105196647, -1091967838) + + W(5, 1062208630, -1080890076, 1027642075, -1097581231) + W(6, 1040705957, 1050515731, 1055187309, -1103107959) + + W(7, 1054501223, 1026629396, -1105551222, -1105182655) + + W(8, 1051606290, -1088400827, -1109004816, -1115427963) + + W(9, -1121117111, -1127322710, 1047450666, -1103254713) + + W(10, 1042878098, -1119280070, 1017793890, 1049557794) + + W(11, 1052382814, -1097595959, 1041414585, -1121163375); + sum2 = + W(0, 1016354951, 1027513536, -1143898815, -1120711631) + W(1, -1121835937, 1011202413, -1117974191, 1035918257) + + W(2, 1029824032, 1038928177, -1110582057, 1027020738) + W(3, 1039379806, 1037237024, -1103985849, -1101088822) + + W(4, 1009730353, -1113735870, 1055799129, -1144611727) + + W(5, -1097216592, -1097673825, 1040607236, 1044957698) + + W(6, 1043516685, 1028380863, -1106760625, -1124050651) + + W(7, -1115312251, -1114960631, 1037116388, 1042440009) + + W(8, -1103814691, -1105579212, 1038947942, 1027787831) + + W(9, 1033575170, -1106882882, -1123395355, 1026359109) + + W(10, -1135830635, -1133116095, -1131729016, 1025801393) + + W(11, -1136138683, 1011024469, -1112603972, 1008198659); + WS(-1096559831, 1037484900); + sum1 = + W(0, 1028249855, 1038422322, 1057397273, -1091437192) + W(1, -1101563608, 1033785689, -1134487991, 1027879764) + + W(2, 1048869250, -1098773026, 1029769503, 1031362249) + W(3, 1034313250, 1029515489, 1056640275, -1086850776) + + W(4, -1127965876, -1173649736, 1016173128, -1112460521) + + W(5, 1065757625, -1083502823, -1131269971, -1128247556) + + W(6, 1027417370, -1107825326, 1058322735, -1083394976) + W(7, 1044386388, 1035484541, 1028347039, -1123304579) + + W(8, 1054334819, -1089694535, -1107441417, -1132373103) + + W(9, 1000575210, 1030471249, 1052511918, -1098313541) + W(10, 1031752252, 1034873120, 1035635068, -1120703806) + + W(11, 1052376319, -1102551488, -1098473097, -1142130892); + sum2 = W(0, -1112511339, -1073555081, 1076402541, -1104998932) + + W(1, -1101584674, 1037626737, 1040888051, -1070793862) + + W(2, 1075658117, 1054215981, -1115233903, 1015165170) + W(3, 1033770332, -1069228230, 1075655149, 1042820552) + + W(4, 1010151672, 1023929473, 1027239578, -1071158503) + W(5, 1077820889, 1046227202, 1035248614, 1033175695) + + W(6, -1109060021, -1080567434, 1045918702, -1095422364) + + W(7, 1038949494, 1025525751, -1135961020, 1047598893) + + W(8, -1118850699, -1101978428, 1049027220, 1032251645) + W(9, -1113507810, 948656059, 1035044244, 1045389579) + + W(10, 1028467783, -1117328038, -1110941727, -1129804625) + + W(11, 1026016583, -1103164541, -1118124613, 1042587545); + WS(-1081931126, 1073966570); + sum1 = + W(0, -1121849711, 1035175459, -1121951547, 1027761070) + W(1, 1025905706, -1114686833, -1146361331, -1097410109) + + W(2, 1026127169, 1045225025, 1037511592, -1113648733) + W(3, 1017322599, 1043585907, -1144746721, -1106255910) + + W(4, 1043091875, 1017686828, -1106692309, -1120101763) + + W(5, -1091000962, 1065386286, -1111228857, -1108319001) + + W(6, 1025148299, 1050307440, -1079735571, 1061827739) + W(7, -1120837551, 1018597782, -1106210884, 1038219466) + + W(8, -1091953433, 1056380469, -1102991437, -1147707682) + + W(9, 1006422511, 1023773090, -1096168868, 1049682211) + + W(10, 1003336935, -1110127893, -1115138804, -1132131774) + + W(11, -1102445962, 1046599772, 1040799505, 1028136822); + sum2 = + W(0, -1106398945, 1035173169, -1101335301, 1053305068) + W(1, -1121291886, -1109509581, 1034732755, -1090565718) + + W(2, 1056357842, 1050400788, -1115544374, -1107192051) + + W(3, -1136096426, 1037348813, -1122254044, 1036743473) + + W(4, -1105444760, 1016457143, -1116038033, -1089313148) + + W(5, 1066071262, 1057700465, -1086616818, 1022772267) + W(6, 1044672515, 1037418911, -1095175639, -1084949846) + + W(7, 1050477957, 1033704361, -1100200943, 1045738761) + W(8, -1132458755, 1003355997, -1102270714, 1035291569) + + W(9, 1037208365, -1130864309, 1007521414, 1030076164) + + W(10, 1045722667, -1111299866, 1025870712, -1117649125) + + W(11, 1025322164, -1106413087, 999303933, 1024551876); + WS(1052508503, 1065718149); + sum1 = W(0, 1042149474, 962035242, 1048663227, -1097510801) + W(1, 1028239019, 1038442493, -1109127212, -1098223900) + + W(2, 1043423478, -1096660204, -1104342914, -1109357577) + + W(3, 1041796883, 1027445007, 1055339346, -1087758861) + W(4, 1027971236, 1040109559, 1033122295, -1108112926) + + W(5, 1061167739, -1089406218, 1037128774, 1025450507) + + W(6, -1120120612, -1098175173, 1054196775, -1094841886) + + W(7, -1105773058, -1123303012, 1045736850, 1043181559) + + W(8, 1055761732, -1105769888, 1042761157, 1044237597) + + W(9, -1110374755, -1100319159, 1041284046, -1103555357) + + W(10, -1101757237, -1111907879, 1038133568, -1102543586) + + W(11, 1051299336, -1120191499, -1105847257, 1039590240); + sum2 = + W(0, -1109097964, 1037136687, 1031673749, -1125393555) + W(1, 1019467600, 1025995330, 1021868522, -1116667451) + + W(2, -1094712046, 1041352693, -1104433838, 1008084051) + W(3, 1007318978, -1107845433, 1038331125, 1034962829) + + W(4, -1122570447, 1039892459, 1032613655, -1109068920) + + W(5, 1040720319, -1112580519, -1121851072, 1025598855) + + W(6, 1002517286, 1016069779, -1111158812, -1106409067) + + W(7, -1113430896, 1045465420, -1126879582, -1103760436) + + W(8, 1056111085, -1106729155, 1049773824, -1110468674) + + W(9, 1031113174, 1030143702, -1106358629, -1106975050) + + W(10, 1036295980, 1020538422, -1114937810, -1118059752) + + W(11, -1121843086, 1047380650, -1122235886, -1113447212); + WS(1056997868, 1008833773); + sum1 = + W(0, -1118496039, 1006806901, 1039826714, -1123143270) + W(1, 1006039247, 1017005492, 1036420187, -1130107576) + + W(2, -1123687353, -1104570591, 1032886196, -1125702370) + + W(3, -1113594582, -1115019369, 1053157409, -1093231896) + + W(4, -1105180689, 1028719950, 1034344230, 1045363911) + W(5, 1059756929, -1095316624, -1102708199, 1009228202) + + W(6, 1038393121, -1090717022, 1058230383, -1095564524) + + W(7, 1035814292, -1119964195, -1109888100, 1037930126) + + W(8, 1045739824, -1098051412, -1129186168, 1033820442) + + W(9, 1032434307, -1110427933, 1042446790, -1101398261) + + W(10, 1025625861, 1028568173, -1131937728, -1112104883) + + W(11, 1052782212, -1104229594, -1112516090, -1136905401); + sum2 = + W(0, 1034320453, -1118604595, 1025235375, 1028595208) + W(1, 1002638229, -1116609216, -1112051735, -1109468006) + + W(2, -1107232926, 1046387359, -1105038970, 1005965131) + W(3, 1043966073, -1106879339, 1049289065, 1044354198) + + W(4, 1045499601, -1111658272, 1033608293, -1088213587) + + W(5, 1067079268, -1087106166, 1035867996, -1132447732) + + W(6, -1092579307, 1064370547, -1088486363, -1108961766) + + W(7, -1096457250, 1042442572, 1051236380, -1095838484) + + W(8, 1044420987, -1104528882, 1028839698, -1134350406) + + W(9, -1102939053, 1043763107, -1107783352, 1035658672) + + W(10, -1122532695, -1131375963, 1027336318, 1016664388) + + W(11, -1116658227, 1023955051, -1126291119, -1135660575); + WS(1064849644, -1092967401); + sum1 = + W(0, 1041078624, 1033930742, 1053270962, -1091277968) + W(1, -1115974645, -1124489961, -1103081652, 1034184171) + + W(2, 1058076017, -1091900963, -1101902009, 1046377811) + + W(3, 1045204674, -1098756264, 1059044453, -1097334921) + + W(4, 1038364767, -1096742869, -1126422222, 1038407355) + W(5, 1061713418, -1078564449, 1031099397, 1034335299) + + W(6, -1107755791, -1103118957, 1064682577, -1099822850) + + W(7, 1025967234, 1015202107, 1049230600, -1103625882) + W(8, 1047988615, -1091654943, 1010917525, -1098744549) + + W(9, -1101861336, 1035198826, 1055582710, -1096942230) + + W(10, 1021900159, 1047801952, 1041473799, -1102710964) + + W(11, 1058355518, -1098047297, -1106099948, -1101582418); + sum2 = + W(0, 1034970587, 1039267337, -1109640195, -1085205368) + W(1, 1065312771, -1111782462, -1105865147, 1026053125) + + W(2, 1027743203, -1076908464, 1068235631, 1029167111) + W(3, 1024111791, 1040193826, 1042644379, -1072741915) + + W(4, 1074530244, 1035172075, 1018431373, 1037956157) + W(5, -1088412806, -1079896887, 1069229068, 1048206286) + + W(6, -1107243650, -1106797747, -1104275404, 1057561219) + + W(7, -1110308581, -1092973955, 1038385161, 1031902437) + W(8, 1031274183, 1052902509, 1045193510, -1105822229) + + W(9, -1114574492, -1114137980, -1115379021, -1103620900) + + W(10, -1167188013, -1102796809, -1129893537, -1128932411) + + W(11, 1040558376, 1054347378, -1099424233, -1106619819); + WS(-1085552428, 1044647857); + sum1 = + W(0, -1130568573, -1120131932, -1097293585, 1052681344) + W(1, -1115182247, 1016796643, -1111247047, 1046596325) + + W(2, -1095715202, 1050957098, -1125685565, -1123522377) + + W(3, 1001536536, 1026932885, -1090361246, 1059711152) + W(4, -1097304503, 1028362085, -1104704953, 1049575094) + + W(5, -1085535724, 1060788917, -1122898434, -1112833505) + + W(6, 1029690778, 1042872064, -1105723034, 1048249845) + + W(7, -1115360055, 1008149481, -1104108900, -1127841478) + + W(8, -1113844876, -1106926654, 1050188162, -1111804152) + + W(9, 1012447989, -1114289465, -1127895515, 1034645781) + + W(10, -1110595670, 1032897312, -1107166286, -1110288351) + + W(11, 1028149942, 1043016176, 1041406167, -1119471349); + sum2 = + W(0, 1049638564, -1122573304, -1097365817, -1120650912) + W(1, 1039129637, -1114566411, 1012096208, -1169058693) + + W(2, -1118822370, 1032693677, 1036528332, -1119534724) + + W(3, 1055008508, -1092917400, -1124083512, -1127709292) + + W(4, -1113518885, 1015256750, 1058022553, -1080721407) + W(5, 1056959548, 1044260765, -1122124802, 1042163670) + + W(6, 1059601221, -1081684685, 1056508637, -1099806388) + + W(7, 1019567420, -1110975736, 1065544846, -1080284825) + W(8, 1051359763, 1042746095, -1101554026, 1032382474) + + W(9, 1035938200, -1096574070, 1044088441, 1033735876) + + W(10, 1041548497, -1110809702, 1043980555, -1098850345) + + W(11, 1031681402, -1121369890, -1108142320, 1022534793); + WS(1049904727, -1085461498); + sum1 = + W(0, -1114441151, 1047758995, -1092338425, 1054893264) + W(1, -1095327506, 1042581180, 1032673159, -1122472531) + + W(2, -1101190388, 1042608805, -1108884127, -1119554504) + + W(3, 1027012354, 1048177385, -1096531431, 1050188989) + W(4, -1104987150, 1041262473, 1041600521, -1116753522) + + W(5, 1059669870, -1088410395, -1123782065, -1120686401) + + W(6, 1035856141, -1105032264, 1063099682, -1084283112) + + W(7, 1042263123, -1117364890, 1033107055, -1109908945) + + W(8, 1058897901, -1088007491, 1042892731, -1107384925) + + W(9, 1031163069, -1104848436, 1049115478, -1096369309) + + W(10, 1029697804, -1138499405, -1139856090, -1111353615) + + W(11, 1055941567, -1096366496, 1047202227, -1100444021); + sum2 = + W(0, -1101934313, 1068410613, 1053392914, 1045964051) + W(1, -1109754454, -1118726053, -1127864237, 1066297935) + + W(2, 1047339448, -1107136628, 1023924243, 1029832629) + W(3, -1112463889, 1052441146, -1111508378, 1035674775) + + W(4, -1121555471, -1111696115, 1047304216, -1077079694) + + W(5, 1044634164, 1046050296, -1111827987, 1036477638) + + W(6, -1097499627, -1082290320, -1086326188, -1113442140) + + W(7, 1006763480, -1119582249, 1041678537, -1095668928) + W(8, 1053023843, -1135364198, 1034831467, 1024856185) + + W(9, -1128505773, -1097722710, 1050833435, 1028981469) + + W(10, -1128234727, -1112331027, 1039199998, -1096167330) + + W(11, -1112581958, 1028034745, 1032814306, 1017921306); + WS(-1087326956, 1033622098); + sum1 = W(0, -1113899675, 1045896801, 1041409216, -1108955706) + + W(1, -1104543128, 1041055226, 1035193378, -1161971013) + + W(2, -1108871427, 1048600721, -1114333163, -1106692192) + + W(3, 1030557818, 1045303135, -1104804773, -1094955846) + + W(4, 1045266364, 1039063182, -1119324954, -1095480777) + + W(5, 1041468886, 1051943793, -1096626659, -1112593372) + + W(6, 1042470612, -1110770003, -1098059200, 1025944442) + + W(7, 1048775867, -1107180444, -1117399605, -1109576477) + + W(8, 1052050234, -1097508910, 1043056710, 1041486173) + W(9, 1021883592, -1100233714, 1041862393, 1030612641) + + W(10, -1122051827, -1114362202, 1035419591, -1122013269) + + W(11, 1017834329, -1106071902, 1042839849, -1121041260); + sum2 = + W(0, 1008283263, -1111634453, -1112224655, -1121896666) + W(1, 1031996897, -1106176579, -1114415273, 1032259795) + + W(2, 1046234660, 1020399084, -1105304684, 1027107696) + + W(3, -1114655921, -1123951654, 1039099048, -1139717487) + + W(4, 1017260660, -1106922258, 1036167174, -1116061542) + W(5, 1049934438, 1050902690, -1102142181, 1022104088) + + W(6, -1111159985, 1039368345, 1003410015, 1039007446) + W(7, 1003551903, -1122048418, 1033204129, -1102899145) + + W(8, -1113540553, 1039510238, -1114854307, -1135188919) + + W(9, -1122340118, 1024476440, -1117681522, 1032062517) + + W(10, 992233789, -1128403972, -1127458940, -1118949310) + + W(11, -1122317014, 1033150391, -1113963477, -1125862176); + WS(1068173014, -1111006676); + sum1 = + W(0, 1031049568, -1103201566, 1048004603, -1100468360) + W(1, 1046039773, 1012485196, -1115395480, -1112513259) + + W(2, -1117860850, -1173290307, -1123264224, -1127850837) + + W(3, 1019703498, -1112438249, -1104427535, 1042677725) + W(4, 1030284012, 1029876039, -1113567107, 1042819653) + + W(5, -1083606526, 1064316383, -1122621123, 1036820754) + W(6, 1015808760, 1048780036, -1083053360, 1063571221) + + W(7, -1094902185, 1035184606, -1109811912, 1039150546) + + W(8, -1090564527, 1060600586, -1103353426, 1024997805) + W(9, 1027899359, 1034094633, -1092330682, 1050639762) + + W(10, -1106486660, -1149057999, -1103105894, 1040353932) + + W(11, -1095220995, 1054475004, 1034806311, -1123061719); + sum2 = + W(0, 1014853092, -1122580470, -1120994367, 1047459793) + W(1, 1064875701, -1132455031, -1120466254, -1114547761) + + W(2, 1038991680, 1047680543, 1054395623, 1011283964) + W(3, 1027909890, 1030745116, 1034511499, -1098401240) + + W(4, 1045153300, -1108441710, -1117512854, -1119656010) + + W(5, 1041066172, -1121088174, -1085223893, 1018552602) + W(6, 1026011110, 1035574738, -1109149586, 1045982327) + + W(7, -1084582209, -1106692167, -1110873488, -1120221385) + + W(8, 1024235728, 1024310650, -1099510820, 1026658994) + W(9, 1036041188, 1020780404, -1112980367, -1122000913) + + W(10, 1029465320, 1023023932, -1115497806, -1122615179) + + W(11, 1020521688, 1042402937, -1105747852, 1004613671); + WS(-1095915991, 979918868); + sum1 = + W(0, -1115463727, 1032823819, 1026707497, -1119898332) + W(1, 1026568544, -1121143952, -1137886897, 1037523161) + + W(2, -1131549790, 1035412665, -1108324895, -1129452238) + + W(3, 1011021190, 1029800132, -1108429845, -1136100063) + + W(4, -1138504034, -1120493188, 1011860489, 1043477300) + + W(5, -1100874368, -1112838682, 1047390560, -1147670274) + + W(6, -1129491953, -1105888052, 1047336484, -1098267600) + + W(7, 1036689246, 1028096953, 1007409140, -1114264129) + W(8, 1049886943, -1097768883, 1037432366, -1134502280) + + W(9, -1124741815, 1021218335, 1042964547, -1106237874) + + W(10, 996441389, -1157333673, -1122228982, 1030459197) + + W(11, 1038061098, -1108915952, -1121991146, -1177453050); + sum2 = W(0, -1121902268, 1020062213, 1034601005, -1096503308) + + W(1, 1050858947, -1108630019, -1128394666, -1143339877) + + W(2, -1111450467, 1052764693, -1099720262, -1130142915) + + W(3, 1039974898, -1102502778, 1034366199, -1095299550) + + W(4, 1055940597, -1105816149, -1114856664, 1034423857) + + W(5, -1093114037, 1060522961, -1087523263, 1037139152) + W(6, 1032116923, 1030950002, 1027196884, 1057088106) + + W(7, -1085850393, 1054601799, -1115163450, 1038088024) + + W(8, 1036157347, -1104371255, -1107617878, 1043807995) + + W(9, -1112859101, 1036537682, -1130356560, 1031719642) + + W(10, -1119144319, -1122652165, -1128925353, 1019282123) + + W(11, 1032717961, -1110891365, 1017512699, 1025497552); + WS(1066530838, 1025703389); + sum1 = + W(0, -1120765920, -1132051981, -1129191897, 1042371592) + W(1, 1007708012, -1116932051, 1009297086, -1139418709) + + W(2, 1032587897, -1118415861, -1122811552, -1134432681) + + W(3, -1122351141, -1109474502, 1057153612, -1120712699) + + W(4, -1105657359, -1127344014, 1015487755, -1098776251) + + W(5, 1049522345, -1099055070, -1099127683, 1026629028) + W(6, 1015844344, -1109199593, 1041525985, 1049700744) + + W(7, -1111888320, -1119906716, -1127613121, -1123764613) + + W(8, -1109817213, 1050593755, -1113004743, -1136900213) + + W(9, -1133586812, -1121408403, 1010121533, 1036084166) + + W(10, 984437698, -1118848634, -1124468594, -1145136640) + + W(11, -1107128582, 1038803505, 1036904923, -1123829719); + sum2 = W(0, -1117430114, 1041261247, 1033806419, -1126022952) + + W(1, -1155381873, -1125967712, 1039120940, -1107749198) + + W(2, -1097831330, 1041137317, -1127685702, -1116954634) + + W(3, -1106133189, -1099018179, 1064523288, 1057416214) + + W(4, -1099691831, 1032736142, 1026507473, -1131506304) + + W(5, 1045711847, -1092069746, 1026250214, -1120045450) + + W(6, 1031550625, -1123617030, -1088977338, 1059767697) + + W(7, -1098651966, 1021746164, -1142215401, -1123190888) + + W(8, 1052831473, -1088761837, -1118397760, 1028302350) + + W(9, 1026312489, -1112954810, -1133526188, 1033531722) + + W(10, -1130823120, 990513889, -1139250704, -1137414048) + + W(11, -1122137188, -1110686692, 1029280576, -1115066264); + WS(1066680246, 1040921440); + sum1 = W(0, -1119730500, -1110280861, -1092083138, 1053565584) + + W(1, -1110918026, 1029997623, 1034101258, 1011417444) + + W(2, 1031601302, 1054599907, -1143895330, -1113777127) + + W(3, -1132236433, 1027753539, -1090607216, 1058261942) + + W(4, -1111808624, 1039976245, -1104735450, 1038200908) + + W(5, -1078665075, 1061430978, -1099071584, -1112447940) + + W(6, 1035235184, 1035268043, -1096046953, 1061610294) + + W(7, -1106320677, 1023744576, -1111591250, -1121734057) + + W(8, -1093429316, 1056142824, -1105323662, -1145165385) + + W(9, 1024518355, 1032771355, -1094352558, 1053165350) + + W(10, -1138833863, -1113453535, -1121802685, -1126371080) + + W(11, -1102572591, 1055282545, 1035738553, 1031263347); + sum2 = W(0, 1033717276, 1028210443, 1046091050, -1119519632) + + W(1, 1050909876, -1117678961, -1129082210, -1130739278) + + W(2, -1085374293, 1026401017, -1110182528, 1042909501) + + W(3, -1111396517, -1117565460, 1033058760, -1106338078) + + W(4, -1149282827, -1099518378, 1053521042, -1113906791) + + W(5, 1062822204, 1066908993, -1081393742, 1044082334) + + W(6, -1096499765, -1106119849, -1089123399, 1034907371) + + W(7, -1094008056, -1097984366, 1041638909, 1044690885) + W(8, 1003636411, 1054286387, 1035142168, 1040282994) + + W(9, -1115028859, 1000486481, 1048601259, -1096894542) + + W(10, 1020535023, -1109925208, -1143837439, 1015998178) + + W(11, -1103883056, 1041460877, 1028271260, 1021313035); + WS(-1111691100, -1070453585); + sum1 = + W(0, 1040350706, 1032181048, 1054597571, -1090063653) + W(1, -1105977411, -1114266718, -1107702492, 1006352113) + + W(2, 1056547603, -1111618639, 1011306277, 1034245578) + W(3, 1045915011, -1101176752, 1059824936, -1087007388) + + W(4, 1037450066, -1130610528, -1103582444, -1098104500) + + W(5, 1062411659, -1079908657, -1113612402, -1111998342) + + W(6, 1036820082, -1111995209, 1062112057, -1098160558) + W(7, 1043358554, 1021846218, 1030081641, -1105752467) + + W(8, 1042786985, -1094958736, -1108376090, 1026051990) + + W(9, -1112926972, -1111976126, 1050598527, -1104521261) + + W(10, 1024420791, 1032139296, 1034445457, 1025879721) + + W(11, 1058122296, -1107753094, -1098378691, 1018599200); + sum2 = W(0, -1102183736, 1044690091, 1025447267, -1103679886) + + W(1, 1034791609, -1120655593, 1045283790, -1117083441) + + W(2, 1036724432, -1096666126, 1043810354, -1109094473) + + W(3, -1099060082, -1109435037, -1100626978, 935783211) + + W(4, -1097803022, 1029730663, 1029589699, 1005932683) + W(5, 1066859805, 1061824774, 1034749891, 1023292171) + + W(6, -1103326288, -1109954065, -1114206859, -1098480165) + + W(7, -1094988176, -1109570093, -1146696619, -1130369043) + + W(8, 1024539831, -1113161303, 1050252440, 1012313206) + + W(9, -1123727405, -1115538567, -1123732285, -1109149493) + + W(10, 1029732207, -1106431920, -1106785078, 1039027144) + + W(11, 1040901138, -1129688427, -1105144948, 1015686163); + WS(-1105638574, -1077878805); + sum1 = W(0, -1112101313, 1043313362, -1104398291, 1034529536) + + W(1, -1120033234, 1017824488, 1025565072, 1041884710) + + W(2, -1093904634, 1043314021, -1106696920, 1028280843) + + W(3, -1118212870, 1045058176, -1086702118, 1058479092) + + W(4, -1128822905, 1038132788, 1034991091, -1099609869) + + W(5, 1055637093, 1049084915, -1126325576, -1136921361) + + W(6, 1037618766, -1100255017, 1061981800, -1082434966) + + W(7, 1050170340, -1125947450, 1033034081, -1097107341) + + W(8, 1053455331, -1087005060, 1046293466, -1124008918) + + W(9, 1023523536, -1104696333, 1048950646, -1098585534) + + W(10, 1042248226, -1126565485, -1143864013, -1128917761) + + W(11, 1037339676, -1105402705, 1032623999, -1114937252); + sum2 = + W(0, 1040832216, -1125419559, -1166596642, -1076839498) + W(1, -1085872442, 1038631745, 1042032808, -1103788280) + + W(2, 1045241846, -1091833274, -1105795498, 1037000995) + W(3, 1039550307, 1043848052, 1051795538, 1035730071) + + W(4, -1094587067, -1115278352, 1029252447, -1099123277) + + W(5, 1068949095, 1070654393, 1007431650, -1103746979) + W(6, 1040592387, 1047689572, -1094163017, -1096947569) + + W(7, -1089767553, 1041143367, -1111709033, -1102650535) + + W(8, -1111700665, 1050259859, 1041510738, 1025070825) + W(9, 1050208698, -1105352144, -1117027793, 1051072204) + + W(10, -1100476212, -1116523862, -1091595987, 1033340873) + + W(11, 1027304955, 1048679723, 1057584085, -1093749689); + WS(-1095944791, 1081861902); + sum1 = + W(0, -1129669743, 1034635115, 1057433596, -1090131839) + W(1, -1120322428, 1008560041, -1116664839, -1122706061) + + W(2, 1048908041, -1093164666, 1030509816, 1023557506) + W(3, -1148272609, 1033259121, 1058166921, -1087001587) + + W(4, 1035303897, 1013701241, -1121494877, -1124251544) + + W(5, 1058746094, -1083481847, 1044900399, -1117995899) + + W(6, -1115602986, -1126517523, 1055593874, -1085306808) + + W(7, 1041073282, -1116643091, 1028329496, -1128061368) + W(8, 1057488289, -1091485686, 1023593721, 1023656437) + + W(9, -1123378150, -1130313237, 1049860117, -1102755929) + + W(10, 1034451003, -1164007844, 1027525168, -1119991479) + + W(11, 1061071735, -1105495231, -1102595129, 1024551167); + sum2 = W(0, 1029670841, 999172620, 1042251423, -1105031588) + W(1, 1027957701, 1026093121, -1115575058, -1099435099) + + W(2, 1041110640, -1120449287, 1035596553, -1156632055) + + W(3, 1042858197, -1139359478, 1058527193, 1010020318) + + W(4, 1019832195, -1126960339, -1101600217, 1027943101) + + W(5, 1075961547, -1109667225, 1011086126, 1010028142) + W(6, -1117748556, 1048715384, 1073424302, 1057314889) + + W(7, 1033526397, 1037810339, 1035438169, -1098854617) + W(8, -1086643829, 1037113027, 975610078, -1115077778) + + W(9, -1114535210, 1036336651, -1080060710, -1123725073) + + W(10, 1012557694, -1133987070, 1032334939, 1041563473) + + W(11, -1067239867, -1100548924, -1095465488, 1036754195); + WS(-1085172076, 1069147958); + sum1 = W(0, 1025703205, 1027233863, 1026640114, 1029190628) + + W(1, -1111659152, -1139375968, -1141293989, -1106615031) + + W(2, 1042354526, -1106775167, -1115892869, 1028377583) + + W(3, 1041286826, -1115436126, 1053740861, -1106293072) + + W(4, 1035182992, -1156734143, -1119005195, -1112858253) + + W(5, 1054852793, -1089581852, 1027435128, -1121669673) + + W(6, 1026450561, 1042609938, -1119314307, -1098775605) + + W(7, -1137028117, 1034668065, -1117134469, -1122683754) + + W(8, 1026745790, -1108063055, 1038436612, -1119291218) + + W(9, -1120895076, 1044686038, -1108709290, 1015550627) + + W(10, -1112739478, 1027259179, 1025414366, -1117169125) + + W(11, 1023400633, -1125318352, 1031194923, -1118201212); + sum2 = W(0, -1154464437, 1027039233, 1008089899, -1117609897) + + W(1, -1118717800, 1027401421, 1004660066, -1115032564) + + W(2, 1041471507, 1033488023, 1033628240, -1112728150) + W(3, 1011666487, -1147080182, 1053281755, 1082321679) + + W(4, 1046901100, 1012824907, 1016373458, 975139636) + W(5, -1081313755, -1065846603, 1050032460, -1108305112) + + W(6, -1117096132, 1025013868, -1118862913, -1122974098) + + W(7, -1106934499, 1038621352, 1027534293, -1117310194) + + W(8, 1040077608, -1130639872, -1137731251, -1137500513) + + W(9, -1118720211, 1025607183, -1134922624, -1143672974) + + W(10, -1177023016, -1129758225, 1024315262, -1145025250) + + W(11, 993302909, -1114202318, 1017914834, 1025570630); + WS(1065820150, -1112682631); + sum1 = + W(0, -1105640672, -1140735040, -1096706537, 1051064512) + + W(1, -1114855500, -1128129890, -1136938293, 1045425461) + + W(2, -1095619178, 1046810501, 1041816072, -1108472292) + W(3, 1007936778, 1012508339, -1101065539, 1057299292) + + W(4, 1015987827, 1036938130, -1099447769, 1052020966) + W(5, -1083835034, 1054321083, 1049485320, -1101761488) + + W(6, 1033964109, -1110876672, -1127256032, 1044289505) + + W(7, -1106506396, 1035708141, -1110772895, 1038536521) + + W(8, -1101414038, 1042038046, 1031650401, -1113134674) + + W(9, -1107676126, 1034428376, -1102412811, 1050440565) + W(10, -1098866309, 1031811610, 948858222, 1041035251) + + W(11, -1104437005, -1126355329, 1044647659, -1119496853); + sum2 = + W(0, -1116689599, -1104060794, 1023887631, 1047625619) + W(1, -1104647113, 1032108768, 1033662400, 1047714485) + + W(2, -1101000300, -1113896185, 1049921725, -1114038586) + + W(3, -1110264521, -1106889746, -1123902349, 1058674488) + + W(4, -1100964722, -1134500667, -1113832562, 1055059659) + + W(5, 1047334183, -1105663288, 1060005487, -1105023935) + + W(6, 1034493072, -1089308922, 1060541265, -1079751447) + W(7, -1088726381, 1046968990, 1024644211, 1027816870) + + W(8, -1145174230, 1040502911, 1026206299, -1116742649) + + W(9, -1105470304, 1032487381, -1099777158, 1059286709) + + W(10, -1093081927, 1042150309, 1030330543, 1043118546) + + W(11, -1100795602, -1110882655, 1033716355, -1127164330); + WS(1051219671, 1032040132); + sum1 = + W(0, 1024246061, -1110293770, -1098745559, 1035978230) + W(1, 1050231066, -1111943280, 1029213629, -1111062337) + + W(2, -1104763504, 1041701281, -1106571859, 1016965359) + + W(3, -1142580623, 1026027124, -1097349705, 1051781481) + W(4, 1040668770, 1026054164, 1007533386, 1035026513) + + W(5, -1082429923, 1065410260, -1111350032, 1035026168) + + W(6, -1132388626, 1036422600, -1083869189, 1060856625) + W(7, -1113287631, 987392292, 1016678619, 1011246152) + + W(8, -1095610025, 1057796630, -1122887388, 1019565189) + W(9, 998523489, -1173422083, -1095350964, 1045145830) + + W(10, -1109374678, -1110518465, 1023830470, -1130424090) + + W(11, -1094623957, 1055180598, 1007169387, 1026555204); + sum2 = + W(0, -1123481859, -1131096266, -1107023625, 1040776673) + W(1, -1073406322, 1074128739, 1024186334, -1121050215) + + W(2, 1040345453, -1129271163, -1080765061, 1066657253) + + W(3, -1181265672, 1023491721, -1110082893, 1044851152) + + W(4, -1077100552, 1068015119, -1136581772, -1115413373) + + W(5, -1116173932, -1097567007, -1085034176, 1066670217) + W(6, 1007273042, 1036272276, 1041634800, 1051516690) + + W(7, -1104398720, -1111857710, -1130526339, -1110187157) + + W(8, 1028205855, -1106625929, 1019889879, 1026394385) + W(9, -1134191606, -1123342647, 1036476425, 1035231370) + + W(10, -1115096299, 990445793, 1035801320, -1125507188) + + W(11, -1124495703, 1007786534, -1105128420, 1030299973); + WS(-1124324720, 1038677119); + sum1 = W(0, 1008341069, 1038389179, 1056361124, -1081451931) + W(1, 1047683325, -1130646284, 1031967089, 993181316) + + W(2, 1050863173, -1096246661, 1035916906, -1134539381) + + W(3, 1023486232, -1145165264, 1054230472, -1086637900) + W(4, 1027831167, 986748732, 1037528348, 991007864) + + W(5, 1063082151, -1088130503, 1026128198, 1003368116) + + W(6, -1122921519, 1024190949, 1050043692, -1092787999) + + W(7, 1020947750, -1115252008, 1029849459, 1044031254) + W(8, 1049660054, -1099337955, 1025351359, 1036720064) + + W(9, -1114006089, -1123515526, 1043779649, -1095996120) + + W(10, 1037985981, -1116396881, 1035528454, 1052524910) + + W(11, -1097977265, -1102904005, -1114005272, 1029521402); + sum2 = W(0, 1042203848, -1089098687, 1035967917, 1068788369) + + W(1, -1095505730, 1025788011, -1133163156, -1098025067) + + W(2, 1052777785, -1096461215, -1097973178, -1111191248) + + W(3, -1108707706, 1036383110, 1043356562, -1090569807) + + W(4, -1127034898, 1023876171, 1027923803, 1058285406) + W(5, 1046924897, -1088764217, 1056967609, 1046203397) + + W(6, -1098359820, 1062333205, -1080913711, -1095261121) + + W(7, 1040836988, -1107189985, 1043843260, 1048839159) + + W(8, -1088408400, -1098396864, 1046078649, -1113219498) + + W(9, 1021888231, 1058743586, -1097507024, -1093746709) + + W(10, -1111389506, -1110869404, 1023965418, -1078465930) + + W(11, 1067915456, 1064748817, -1099047850, 1040158004); + WS(-1080347734, -1094728165); + sum1 = + W(0, -1115633753, 1036436624, -1113632799, 1018734050) + W(1, -1104273977, 1022550473, -1128052610, 1031912915) + + W(2, -1104119121, -1104173746, -1115038313, -1128796792) + + W(3, 996364134, 1038598142, -1095896866, 1042956529) + W(4, -1098511458, 1027191321, 1027254679, -1114408211) + + W(5, 1064358521, 1051156401, 1006576423, -1123707765) + W(6, 1027401421, -1097652239, 1061684826, -1087880029) + + W(7, 1037804713, -1125317723, -1125749028, -1109729611) + + W(8, 1051089073, -1093838037, 1039011098, -1116369959) + + W(9, -1143663749, -1111208674, 1044518984, -1100689848) + + W(10, -1157246715, -1130384675, 1017291157, -1118442418) + + W(11, 1048577269, -1104163629, -1145301883, -1113863292); + sum2 = W(0, -1137561210, 1000810292, -1109671087, -1098257975) + + W(1, -1113334223, 1010643898, 1008161562, 1017087401) + + W(2, -1143663956, -1088410023, 1033424863, -1122425914) + + W(3, -1138249722, -1104015266, -1100429296, -1110483119) + + W(4, -1113376685, -1120544050, 1032804591, 1045940061) + W(5, 1064636422, 1065774501, 1041401738, 1015219657) + + W(6, -1118405314, -1112933693, 1041071232, -1111449957) + + W(7, -1107119710, 1028103016, -1149548391, 1040845572) + + W(8, -1095622064, -1114533177, 1041680128, -1125778629) + + W(9, -1140666666, -1131915309, -1113635957, -1105461804) + + W(10, -1114424843, -1127495397, -1121723310, -1125770861) + + W(11, -1156294183, -1154727079, -1143304660, -1128411685); + WS(1062796012, -1081433938); + sum1 = + W(0, -1140258862, 1047697940, 1041493798, -1090901033) + W(1, 1032453887, 1008978681, 1019305782, -1137205890) + + W(2, 1051824452, -1095124244, 1034552166, 1031420374) + W(3, 1017521696, 1040574503, 1048941321, -1090555023) + + W(4, 1032035723, -1124609380, -1128637410, -1131514496) + + W(5, 1061551411, -1084025976, 1036871279, 1025475023) + W(6, 1030042599, -1115750349, 1058760695, -1089279069) + + W(7, 1041251643, 1016399695, -1140240046, -1114177400) + W(8, 1056942277, -1089170928, 1032547975, 1024887097) + + W(9, 1023786378, -1119424318, 1045023263, -1099538126) + + W(10, -1117686145, 1002611913, 1027083972, 1025144619) + + W(11, 1056967544, -1087239225, 1027338481, 1012109898); + sum2 = W(0, 1004838046, 1036867068, 1023348885, -1120560082) + W(1, 1032448279, 1025411331, 1009005547, -1112484311) + + W(2, -1113333873, 1019969949, -1115333043, -1123665230) + + W(3, 1023830295, 1034701810, -1115269491, -1089366478) + + W(4, 1028612787, 1024216939, -1121204348, -1131856601) + + W(5, -1084820204, -1067115412, -1132671203, 1034346460) + + W(6, 1026709605, 1008859907, -1095888786, -1071017431) + + W(7, -1112864526, -1116027280, -1145957446, -1129570235) + + W(8, 1052224270, 1066362408, 1045423025, -1133553563) + W(9, 1024434367, -1120209470, 1043852487, 1084005284) + + W(10, 1025066805, 1032270816, -1122812157, 1023686531) + + W(11, 1047863595, 1063525936, 1043035962, -1105823812); + WS(-1103681710, -1114530079); + sum1 = + W(0, 1025861737, -1130912909, -1110124091, 1040844938) + W(1, -1105267215, 1021540535, 1029506744, -1104283663) + + W(2, -1117715067, 1046218351, -1121870249, 1032871129) + + W(3, 1032236919, -1132593150, 1039686571, -1098764800) + + W(4, 1038793978, -1120294159, 1035998951, -1102933559) + + W(5, -1097542514, 1059317136, -1096018272, 1036773558) + + W(6, -1111043394, 1040041573, -1088677703, 1057765873) + W(7, -1105386706, -1111111618, 1007410083, 992747222) + + W(8, -1112367617, 1054923388, -1101946581, 1031187710) + + W(9, -1126534927, 1034020670, -1105036246, 1029185429) + + W(10, -1122605111, -1138057273, -1123346113, -1119406504) + + W(11, 1035634224, 1040434219, -1143951794, 1023823648); + sum2 = + W(0, -1133865733, 1041378526, -1102114760, 1040042063) + W(1, 1036263014, -1111718847, -1114357788, 1025611699) + + W(2, 1041852494, -1102827642, 1023561916, -1109687464) + + W(3, -1113774565, 1057780422, -1090351620, 1041105879) + + W(4, -1107817782, 1041802453, -1094909862, -1134317911) + + W(5, 1058263821, -1094128428, 1049906607, -1106696300) + + W(6, 1041934157, -1084534604, 1059786982, -1121888905) + + W(7, -1106476521, 1023851848, 1048238594, -1105040821) + W(8, -1114163954, 1038597269, 1034361268, 1032032810) + + W(9, 1041091101, -1096429323, 1034035361, -1139774900) + + W(10, 1016844568, -1111010993, 1042608690, -1128928913) + + W(11, -1102786592, 1035391793, 1014428216, 1031652841); + WS(1059372396, -1131086843); + sum1 = W(0, -1116770958, 1038224730, -1145642197, 1033085351) + + W(1, -1114157744, -1129272066, -1133338231, -1110641250) + + W(2, 1044958408, -1100693257, 1030339791, 1026076824) + W(3, -1109423369, 1048611339, 1054215533, 1024247225) + + W(4, -1106016522, -1113286331, -1142676381, 1026922691) + + W(5, 1059821143, -1085100606, 1039898225, -1126911267) + + W(6, -1119417706, 1012241985, 1063416476, -1082741615) + + W(7, 1046979692, -1119319754, 1016864285, 1033593306) + + W(8, -1109435540, -1105005730, -1157274234, -1131274519) + + W(9, -1134266292, -1108924145, 1051611888, -1096553463) + + W(10, 1033932042, 1018258200, 1026073927, 1029871326) + + W(11, 1046037553, -1091104802, 1022970360, -1118999381); + sum2 = W(0, -1117639690, 1032542252, 1049036492, -1106428174) + + W(1, -1100598545, -1124515281, -1126294727, -1105983103) + + W(2, 1053585046, -1090074420, -1101677356, 1042350020) + + W(3, -1135696947, -1105990346, 1063294224, 1074893281) + + W(4, -1088449090, -1128187082, 996342293, -1101410582) + + W(5, -1084067012, -1121077701, 1052666358, -1118105649) + + W(6, 1025603183, 1031145839, 1030876063, -1079475296) + W(7, 1049420880, 1033189178, -1120264553, 1029472991) + + W(8, -1090564487, 1059480413, -1099961793, 1012743453) + + W(9, 1034416033, -1125334020, 1047874805, -1099339963) + + W(10, 1047783255, 1016211057, 1029227505, 1016716795) + + W(11, -1103677712, -1097572114, -1106211131, 1026065323); + WS(998487488, 1024735998); + sum1 = + W(0, 1034787125, -1096711418, -1093645783, 1058035879) + W(1, -1113381667, 1020339515, 1033987197, 1049342972) + + W(2, -1089249828, 1055719968, 1033680357, 1037116250) + + W(3, -1118966639, -1099717199, -1085775960, 1060189357) + + W(4, -1108537011, -1109989161, -1138173503, 1051641300) + + W(5, -1083720757, 1061848612, -1102783527, 1046511483) + W(6, 1022076300, 1049184050, -1086133195, 1062875388) + + W(7, -1129404117, 1032957443, -1112157509, 1032130968) + + W(8, -1087092580, 1007190411, -1110331987, 1011591327) + W(9, 1034213589, 1048086279, -1098046771, 1051240059) + + W(10, 1030325168, 1039628088, -1125240838, 1039987293) + + W(11, -1087792286, 1039984791, 1051616838, -1135236550); + sum2 = W(0, 1001577162, 1048277958, -1094388043, -1079852425) + W(1, 1055450674, 1040994203, 1041063142, 1048804575) + + W(2, 1061039504, -1095199392, 1054780006, 1035580564) + + W(3, -1112011211, 1037383696, 1055288082, -1077655189) + + W(4, 1058201467, -1111912036, 1039747614, 1042988518) + + W(5, -1084834029, -1103423898, 1071463850, -1100510960) + + W(6, -1137882877, -1097597287, -1087507005, 1064308251) + + W(7, 1058560116, -1097442490, -1105672858, -1099442373) + + W(8, 1044806256, 1065240337, -1080251704, -1090250140) + + W(9, 1031849538, -1108394523, -1093629181, 1071708239) + + W(10, -1090257183, 1028605291, -1108158369, -1155656404) + + W(11, 1059993792, 1061107030, -1073879137, -1106111844); + WS(-1077135094, 1050907162); + sum1 = + W(0, -1132921461, -1117094275, -1098008869, 1043699095) + + W(1, -1112620946, -1122422352, -1127648729, 1033699545) + + W(2, -1094355462, 1055483630, 1042990903, 1034045812) + W(3, -1124984131, 1030394994, -1089194394, 1053279460) + + W(4, 1047326318, 1011377167, -1118007170, 1045491604) + W(5, -1081313730, 1065160347, -1121246728, 1042128328) + + W(6, -1117294457, 1039295895, -1088142214, 1056280352) + + W(7, -1110840279, -1103632934, -1126452986, 1040542752) + + W(8, -1094044967, 1058021117, -1106681402, 1039870541) + + W(9, -1124706520, 1018438944, -1098093746, 1041063570) + + W(10, -1128181366, -1109127577, -1119126518, 1035722382) + + W(11, -1094059515, 1050783700, 1018820533, 1037083890); + sum2 = W(0, 1003021837, -1110495495, 1016902499, -1116948709) + + W(1, 1029108513, -1113320609, 1029225785, 1032075003) + W(2, 993748010, 1041772340, -1097222635, 1042534143) + + W(3, -1123823650, -1122423961, 1028513681, 1083263801) + + W(4, -1063916786, 1039031010, 1023947681, 1043723635) + + W(5, -1096435650, 1074788880, -1074844879, -1104645285) + + W(6, -1119691195, -1138212418, -1120166015, 1038793960) + + W(7, -1115182489, -1124877446, 1024608678, 1017994477) + + W(8, -1119557273, 1017310535, 1021770517, -1131419477) + + W(9, -1125446899, -1115371525, 1032720188, 1032034162) + + W(10, 1014563122, -1119401015, 1034042378, 1002413729) + + W(11, -1115060009, -1118011761, 1027576529, 999290841); + WS(1048452526, 1015184705); + sum1 = W(0, -1138645804, -1110581160, -1106270342, 1048623137) + + W(1, 1006760678, -1113210911, 1032256237, 1049910352) + W(2, -1113826958, 1036222192, 1042522323, 1029873877) + + W(3, -1114005032, -1103860667, -1097372109, 1057099626) + + W(4, -1101854594, -1111141177, 1026512560, -1098570768) + + W(5, -1087095027, 1061337654, -1106501979, 1024448974) + + W(6, -1140244890, -1119844941, -1095003728, 1054485156) + + W(7, -1115808507, -1125584845, -1131077462, 1040825100) + + W(8, -1095439444, 1049129464, 1027903818, -1111469876) + + W(9, 1024452554, 1017183931, -1128526324, 1044366494) + + W(10, -1153767820, 1027351533, -1122466948, -1119780203) + + W(11, -1106181921, 1040163591, 1034550428, -1117633442); + sum2 = + W(0, -1117856953, 1034706164, 999139177, -1115089108) + W(1, 970725323, 1017499348, 1002437005, -1114697961) + + W(2, 1016679180, 1009230224, 1019562884, -1131336757) + W(3, -1122373390, -1110151254, 1034965747, 1007700984) + + W(4, -1131774725, 1031482891, 1023823647, -1078943314) + + W(5, 1069645782, -1105149666, 1038179083, -1120925641) + + W(6, -1131092262, -1111296752, 1026082346, 1033262730) + W(7, 1014219446, 1015408436, -1119824143, 1008147760) + + W(8, 1005900489, -1142960238, -1130072184, 1001625665) + + W(9, -1129026789, 1015792571, -1134452913, 1015281188) + + W(10, 1012295392, -1137039581, -1118651901, 1029244251) + + W(11, -1121319377, 1026230788, -1122304449, 1019807852); + WS(1061396972, -1097419696); + sum1 = + W(0, 1040610541, -1110826049, -1087862644, -1108888122) + W(1, 1051684947, -1117156776, -1116833412, 1042368898) + + W(2, 1000165941, 1058527541, -1102910221, -1135746300) + + W(3, 1015657075, -1113808270, -1108964145, 1056284429) + W(4, 1041698836, 1026275932, -1126977972, 1036447253) + + W(5, -1085843362, 1053927177, -1107922836, 1041042462) + W(6, 1033993320, 1043284110, -1088558114, 1062174380) + + W(7, 991752527, -1137078595, -1125945514, 1048078310) + W(8, -1086794197, 1055390198, -1112253878, 1035139905) + + W(9, 1028019732, 1028739112, -1087573386, 1042115560) + + W(10, 1039363202, -1118906322, -1120964750, 1052576556) + + W(11, -1079810761, 1051035743, 1047360574, 1038370422); + sum2 = W(0, -1104315228, -1103350409, -1094844926, 1062539510) + + W(1, -1096027281, 1014020953, -1117475550, 1024254038) + + W(2, -1091235931, -1112967686, 1042482062, -1119186298) + + W(3, 1019617669, -1116633366, -1089312569, 1056743222) + + W(4, -1129394013, 1028745390, -1096755788, 1042486592) + + W(5, -1104902685, 1052266072, 1051055108, -1093268483) + W(6, 1043056626, 1038440719, 1045881224, 1037341407) + + W(7, 1044017054, 1054988226, -1097959989, -1097184876) + + W(8, -1101957876, 1012499609, -1101861122, -1102938164) + + W(9, 1038305467, -1107570984, 1058820589, 1048266302) + + W(10, -1105903455, 1026503526, -1121707278, -1092525975) + + W(11, 1058872796, -1099676541, 1041661910, -1135013257); + WS(-1084431788, -1073850193); + sum1 = W(0, -1121456839, -1130755883, -1131651239, 1035028387) + + W(1, 1035837265, -1139076328, 1023326765, -1119027356) + + W(2, -1113039213, 1052115068, 1031790055, -1110649673) + + W(3, -1111055334, 1007276024, 1041028837, -1087869490) + + W(4, 1043959725, -1126605404, -1131954777, 1032958718) + + W(5, -1121014635, 1031197854, -1151139494, 1037205893) + + W(6, -1127100870, 1051511197, -1080291009, 1064028192) + + W(7, -1097929447, 1022476742, -1115233108, 1038479638) + + W(8, -1101242168, 1046762496, 1016490449, 1029406204) + W(9, 1032290155, 1033362061, -1095204295, 1048797598) + + W(10, 974160200, -1117985767, -1113154576, -1118671783) + + W(11, -1109590335, 1048241682, 1033898418, 1016018525); + sum2 = W(0, 975878233, 1035912472, 1033266404, -1098777501) + W(1, -1115369638, 1024165887, 1027199947, 1026239811) + + W(2, -1101163205, -1120465270, 1047867282, -1114558090) + + W(3, 1032202892, -1109490583, -1092678923, 1026269275) + + W(4, 1048647954, -1127793863, 1011577542, -1099491895) + + W(5, 1052721602, 1058060839, 1026065727, -1110013405) + W(6, 989447596, 1027909011, -1115079323, 1022663675) + + W(7, -1107120056, -1115589183, -1126785583, -1142596491) + + W(8, 1042994318, -1109244590, -1139333782, 1028177183) + + W(9, -1112180928, 1032601180, -1117880304, -1139999774) + + W(10, -1113232790, -1119188249, 1027448187, 1030066347) + + W(11, 999387083, -1113199846, -1128307819, 1010561254); + WS(1060891500, 1063998119); + sum1 = + W(0, 1009319059, 1034817251, 1060685055, -1082111981) + W(1, -1146641530, 1021753903, 1021371624, -1123888698) + + W(2, 1050963407, -1089953728, 1042831681, -1158245846) + W(3, 1020060242, 1007070174, 1059880676, -1083685732) + + W(4, 1044491976, 1024323659, 1034805925, 1031377177) + W(5, 1058492133, -1080402290, 1049293617, 974645755) + + W(6, -1119463744, 1035958587, 1057972187, -1087669949) + W(7, 1044279073, 1017781825, 1029917861, 1040845210) + + W(8, 1045390927, -1092052092, 1030870792, 1029410098) + W(9, 1022228167, -1119265271, 1050698442, -1101265316) + + W(10, 1040824939, 1009963113, 1028919037, 1041595485) + + W(11, 1042995863, -1093474029, -1117179330, 1033823734); + sum2 = + W(0, 1043252027, 1031085111, -1089976691, 1015926083) + W(1, 1075090314, -1090475122, 1039685669, -1103588836) + + W(2, -1136546874, 1047286193, 1055918010, 1041438201) + W(3, 1021402919, -1094345084, -1106220008, 1049855126) + + W(4, -1100279282, -1097421747, 1043798699, -1097680472) + + W(5, 1053086422, -1093227067, -1086978538, 1055157984) + + W(6, -1106121381, -1098140010, 1041831351, 1030378489) + + W(7, -1081018505, 1047503463, -1107286980, -1117022930) + + W(8, 1053158416, 1047431201, -1090494906, 1040447738) + + W(9, -1116219530, -1111854401, 1043898913, -1088912308) + + W(10, 1049023531, 1013873926, -1105216890, -1165267820) + + W(11, 1065796563, 1047204695, -1076607098, 1060337257); + WS(-1071907259, -1110912590); + sum1 = W(0, -1125193009, 1047025832, -1082520498, 1060301769) + + W(1, 1034040338, 1027334603, -1128862543, 1037187527) + W(2, -1091004805, 1051495910, 999732421, 1007930762) + + W(3, -1134196340, 1045585380, -1082717575, 1059488668) + W(4, 1034460834, 1012148988, 1030754750, 1046461539) + + W(5, -1081098270, 1059029683, 1039737374, -1153534395) + + W(6, -1115532794, 1034859053, -1085641231, 1058265227) + W(7, 1026869576, 1008981841, 1035978988, 1044478211) + + W(8, -1094770022, 1047385124, 1035505465, -1141674924) + + W(9, -1122396199, 1038023446, -1100806498, 1051401113) + + W(10, -1108119003, -1131397284, 1030566929, -1115297724) + + W(11, -1096364930, 1030610907, 1049444580, 1004145403); + sum2 = W(0, -1094095783, 1071341959, -1106266089, -1079612356) + + W(1, -1096220802, 1053270877, 1046265071, 1041114835) + + W(2, -1087794787, -1090342602, -1119797941, -1103321092) + + W(3, -1105766507, -1093757522, 1036032273, -1097206613) + + W(4, -1100480279, 1044676561, 1052972269, -1096593198) + + W(5, 1065807667, 1072329002, -1091841958, -1113258696) + + W(6, -1118583019, -1091593516, -1120668367, 1052074546) + + W(7, -1107977625, 1022355226, 1048411623, -1091154406) + + W(8, -1098744738, 1035089459, 1049018868, -1099949977) + + W(9, -1102184915, 1057911473, -1101075731, 1053464489) + + W(10, -1101117382, 1048742227, 1063609884, -1073590228) + + W(11, 1041493985, 1066861510, -1098841365, -1096064815); + WS(-1073583387, -1097642521); + sum1 = W(0, -1113793830, 1040986278, -1123577624, 1026212810) + + W(1, 1031641306, -1117606078, 1025630043, 1018743169) + W(2, 1051202676, -1107655120, 1015332695, 1026666645) + + W(3, 999171711, 1043520539, -1085714085, 1058537807) + W(4, -1102186706, 999394704, -1113970392, -1126196031) + + W(5, -1137921726, -1102760649, 1042214156, -1098706922) + + W(6, 1041107242, -1117788829, 1058609607, -1084066799) + + W(7, 1050734989, 1028174279, -1126372055, 1023908016) + + W(8, -1120709796, 1038867522, -1104026556, -1105230460) + + W(9, 1019259188, 999617264, 1049515695, -1105494638) + W(10, 1038371877, -1131430066, -1141619744, 997975216) + + W(11, 1048163249, -1113662887, -1118473257, -1117999154); + sum2 = + W(0, 1020465344, -1114187457, 1042661620, -1108558660) + W(1, 1022323488, 1024517966, -1122047285, 1035498840) + + W(2, -1112200610, 990338200, 1037081003, -1114108748) + W(3, -1132946638, 990134284, 1050532943, -1098817485) + + W(4, -1116108394, 1033071181, 1029560286, -1105572872) + W(5, 1053825241, 1046181371, -1112231866, 1024966834) + + W(6, -1137208000, -1125836673, 1035380426, 1052951690) + + W(7, -1098433232, 1018142458, 1032274109, -1107008827) + + W(8, 1041934457, -1099854916, -1115059895, -1114933005) + + W(9, 996568996, -1143550666, -1132156656, -1101729420) + + W(10, -1106305341, 1025004711, -1139618609, -1133886920) + + W(11, -1115627477, -1120597401, -1145950712, -1129654551); + WS(1066138518, -1093674260); + sum1 = + W(0, 1029114768, -1096639483, 1057895286, -1095967018) + W(1, 1030209323, -1115838467, 1016625343, -1107076945) + + W(2, 1046373881, -1098130813, 1027610352, 1036809869) + W(3, 1023256234, -1097327813, 1060592282, -1090692519) + + W(4, 1010824623, -1116997111, -1135602902, -1112968866) + + W(5, 1048700903, -1096019117, -1147523994, 1034515722) + + W(6, -1154917052, 1039946769, -1102042589, 1052511498) + + W(7, -1107963320, 1025259308, -1126509217, -1145778087) + + W(8, -1102212410, 1050590801, -1106421308, 1031910367) + + W(9, -1119361423, 1040195377, -1110743776, 1037092318) + + W(10, -1114290095, 1015369596, -1120895465, 1035798312) + + W(11, -1096085673, 1054517602, -1103975991, 1032370884); + sum2 = + W(0, 1020680083, -1098235374, 1040052819, 1039185699) + W(1, -1105651771, 999965284, 1026207298, -1129289080) + + W(2, 1000882372, 1041518935, -1108930987, 1025606660) + W(3, -1114338038, 1035819795, -1104532064, 1060121453) + + W(4, -1132043948, -1127194998, 1034412140, 1034843888) + + W(5, -1096717996, -1098039050, 1059664416, -1123799241) + + W(6, -1119552802, 1030579314, 1025849568, -1085068698) + W(7, 1064006582, 1020099573, -1132183796, 1031435954) + + W(8, -1126927262, -1105751584, -1108966794, -1130436386) + + W(9, 1014438782, -1118921036, 1034356940, 1039772175) + + W(10, -1090790505, 1040432622, 1026463590, -1110299389) + + W(11, 1043556407, 1060018227, -1083675102, -1098649260); + WS(-1098394199, 1021768394); + sum1 = + W(0, -1106976773, 1047476460, 1035645360, -1107202165) + W(1, -1121988294, 1008308247, 1030592418, -1106379994) + + W(2, 1042879449, -1121922344, 986778214, -1140215924) + W(3, -1109895628, 1041629170, 1052123789, -1091385002) + + W(4, 1042582852, -1120193402, 1031813276, -1108680996) + + W(5, 1053022447, -1090269731, 1042227034, -1127396653) + + W(6, -1114022359, 1028771909, -1107385030, -1107044710) + + W(7, 1044112989, 973525940, -1124672038, -1123109432) + + W(8, 1044862779, -1127679127, -1115617842, -1139398721) + W(9, 980431986, 1035760915, 1023833154, -1107740288) + + W(10, 1030134009, 1014367554, -1118216524, 1039049769) + + W(11, 1040198899, -1099192133, 1025832513, -1116507498); + sum2 = W(0, -1098169790, 1055345266, -1106123518, 1039932359) + W(1, -1111072353, 1012808524, 968544056, 1027898732) + + W(2, 1021773530, 1036378047, -1123343911, -1132858861) + + W(3, -1162122654, 1053687424, -1089212318, 1035540057) + + W(4, -1111121590, 1037852727, 1056997121, -1077105818) + + W(5, 1068095045, -1098361429, 1044527270, -1103837325) + + W(6, 1029158812, 1039110353, -1090252350, 1000425352) + + W(7, -1115281069, 1036594354, -1112228820, 1037144043) + + W(8, -1124556656, 1050038066, -1114307158, -1134065606) + + W(9, 1026696161, 1022799116, -1130232224, -1118055459) + + W(10, -1116705210, 1032061348, -1112296199, 1042942743) + + W(11, -1104975561, 1040445489, 1023167430, -1114916168); + WS(1064984812, 1025677564); + sum1 = + W(0, 1043884023, -1096329284, -1084712758, 1057726586) + W(1, 1043356655, 1012739123, 1030924649, 1033742216) + + W(2, -1103101243, 1046959260, 1018532660, -1131015624) + + W(3, 1038396303, -1101734196, -1087209573, 1057188432) + W(4, 1046520365, 1002161670, 1036152448, -1105624932) + + W(5, -1086845720, 1061206275, -1114153679, 1044816791) + W(6, 1017544939, 1017123640, -1084903475, 1060371326) + + W(7, -1111584053, 1024962678, 1042241831, -1108991823) + W(8, -1087299343, 1055419738, 1028245989, 1043667629) + + W(9, -1121467129, 1037154774, -1108220133, 1041931586) + + W(10, 994164350, -1114955616, 1047065797, -1096026219) + + W(11, -1087523701, 1055087388, 1051664559, 1040331724); + sum2 = + W(0, 1043159502, -1104158106, -1085409392, 1067096435) + W(1, -1090972778, -1098664617, -1105699647, 1050222709) + + W(2, -1090385012, -1106627676, -1102151020, 1050757442) + + W(3, 1038273264, 1025130152, -1106188919, 1052171093) + W(4, -1086083449, 1039140273, 1032147900, 1032288328) + + W(5, 1061149199, 1075677902, -1070862987, 1030992941) + W(6, 1048938986, -1103450106, 1050752016, 1075039778) + + W(7, -1070168511, 1040257314, -1106879197, -1110143950) + + W(8, 1049612036, 1073503496, -1072257998, 1041175752) + W(9, -1128156680, -1098742781, 1057886527, 1075355126) + + W(10, -1070483864, 1030488248, 1049056829, -1091104362) + + W(11, -1095713225, 1082426924, -1070643407, -1098188298); + WS(-1072457547, -1104871915); + sum1 = W(0, 1018087532, -1138749297, -1129333765, 983826087) + W(1, 1026100589, 1007352484, 1014551830, 1036550950) + + W(2, -1131367952, 1050222745, -1118152278, -1123482688) + + W(3, -1143311830, 1001512248, -1083142279, 1055877542) + + W(4, -1106883828, 1034580622, 1031881454, -1098112129) + + W(5, 1014989673, 1020808474, 1016760626, -1109491581) + + W(6, 1033102921, -1104222042, 1059974718, -1086706094) + + W(7, 1045231050, -1122062610, 1020957612, -1104597767) + + W(8, 1057969217, -1096646942, 1040528910, -1114916930) + + W(9, -1158925470, -1125431314, 1048954301, -1105443081) + + W(10, 1027306868, -1147978606, 1015585054, 1042534317) + + W(11, 1043242379, -1106542638, -1120756612, -1122581117); + sum2 = + W(0, -1120473304, 1034530006, -1151412702, -1124099243) + W(1, 1026058392, -1131120105, -1112539216, 1032795060) + + W(2, 1009244103, -1115907580, -1114805763, 1025202928) + W(3, -1132295147, 1046686841, 1037608248, 1020085733) + + W(4, -1120554932, 998829429, -1099708049, 1054832981) + W(5, 1050403530, 1033863239, -1154011810, -1131382687) + + W(6, -1120301365, -1109933894, 1051005406, -1107650300) + + W(7, 1036477609, 1020773937, -1106531527, -1089566811) + + W(8, -1106362502, 1030750625, -1122049677, -1143352615) + + W(9, -1119309605, 1004612697, -1114169666, -1130967605) + + W(10, 1028389062, -1142800679, -1165429700, -1105741108) + + W(11, 1026755208, -1146151019, 1026996282, 1016610669); + WS(1057548396, -1118860492); + sum1 = W(0, 1024314991, 1024521628, 1019907062, -1109277847) + W(1, 1042186208, -1127185475, 1015464700, 1035643506) + + W(2, 1035362560, 1033181378, 1019936830, 1015346289) + W(3, 1033090170, 1022826491, -1102305323, 1038789717) + + W(4, 1047455161, 1015056043, 1024068436, 1044765945) + W(5, -1077384746, -1093016121, 1050623422, 1035221881) + + W(6, 1023937802, 1040253828, -1095947197, 1047039772) + W(7, 1039235079, 1019339797, 1007695803, 1032416458) + + W(8, -1110302756, 1032628147, 1012944520, 1030260028) + W(9, 1026830221, 1031527745, -1110046895, 1032291149) + + W(10, 1025595072, -1124358344, -1142835770, 1041535598) + + W(11, -1106773417, 1038601945, 1031404576, 1039118899); + sum2 = + W(0, 1028948662, -1119520873, 1040692087, -1099276378) + W(1, 1042585669, -1108959122, -1108748842, 1035535229) + + W(2, -1128105382, -1125068566, -1119570187, -1105777938) + + W(3, 1035054379, 1034598190, -1105801492, 1046560183) + W(4, -1104135640, 1040780747, -1114955281, 1040844095) + + W(5, 1058969109, 1035247795, -1104363062, -1115696651) + + W(6, -1123628391, 1017632867, -1140061979, 1037021603) + + W(7, -1119904127, -1122498951, 1033670708, 1028317650) + + W(8, -1098716294, 1041621971, -1109815486, 1039115990) + + W(9, -1113438145, -1152158269, -1100355456, 1042440154) + + W(10, 1033417138, -1102421790, 1009774019, 1033306906) + + W(11, -1097137530, 1045186907, -1096381982, 1043903068); + WS(-1111617372, -1089239798); + sum1 = + W(0, -1120946745, -1109363280, 1036859267, -1107333692) + W(1, -1131126919, 1028471724, 1043411666, -1105933140) + + W(2, -1100282243, 1036658471, 1046725686, -1102252305) + + W(3, -1105344783, -1122482989, 1033736099, -1096910672) + + W(4, -1138971218, 1048148056, 1034980559, -1097138762) + + W(5, 1040067055, 1063625005, -1100591479, -1115573502) + W(6, 1035032490, 1028445691, -1085774908, 1059841631) + + W(7, -1101046354, 1018704810, -1102176538, 1038047807) + W(8, 985367106, 1050659758, -1099856795, 1038410359) + + W(9, 1042691462, -1104930291, -1095561837, 1056206133) + + W(10, -1114722211, -1104725204, -1106084756, 1036625252) + + W(11, -1105930796, 1039222942, 1033845467, 1032806392); + sum2 = + W(0, 1032901751, -1102980188, -1097455677, 1019096213) + W(1, 1032175116, -1112661460, -1107914664, 1034775565) + + W(2, -1079544271, -1123759676, -1101940532, 1048109574) + + W(3, 1029069579, -1107463876, -1107584024, 1039965269) + W(4, 1028816201, -1102645998, 987500422, -1115674720) + + W(5, 1066142773, 1060530292, 1036298034, 1048822254) + W(6, -1107918292, -1122769714, 1049477386, 1039765158) + + W(7, -1100641824, -1106440053, 1041931397, 1028640927) + + W(8, -1110827944, -1133849713, 1041944181, -1113520406) + + W(9, -1106970712, -1133982193, 1040943686, -1110406142) + + W(10, -1113302080, 1032469881, 1026585516, -1112107626) + + W(11, 1033119735, -1129682772, 1007346649, -1114426196); + WS(1055684951, -1075449937); + sum1 = W(0, 1025203701, 1021145317, -1089325052, 1050955910) + W(1, 1044060206, 1023449767, -1109225282, 1041132226) + + W(2, -1098148265, 1048074946, -1122722677, -1117792831) + + W(3, -1105594014, 1045336591, -1089314051, 1059061644) + + W(4, -1138312640, 1024771419, -1102495686, 1048290762) + + W(5, -1085251962, 1063401377, -1135340574, 1034487612) + + W(6, -1115277256, 1046212734, -1086898788, 1058388246) + + W(7, -1108206354, -1122363216, -1131312361, 1033816821) + + W(8, -1089905605, 1057949871, 1012872015, 1023079171) + W(9, 1003392922, 1042625602, -1090587428, 1049257380) + + W(10, 1013091209, -1119440554, -1119401549, 1032304674) + + W(11, -1086086859, 1056447675, 1030899626, 1029355212); + sum2 = + W(0, 1061277558, -1075680192, 1064394375, 1041412946) + W(1, -1097187807, 1043650757, 1070915797, -1078675502) + + W(2, -1099840736, 1036217426, 1041322393, -1128025338) + + W(3, 1077465728, -1072669743, -1080083122, 1053075748) + + W(4, -1098777830, -1113957037, 1077005703, -1071891757) + + W(5, -1110541808, 1035413026, -1105806062, 1028555409) + + W(6, 1067272551, -1079654744, 1034295344, -1098463344) + W(7, 1042040059, 1035679275, 1035834095, -1094224921) + + W(8, 1034313258, 1050839735, -1099205009, -1113787473) + + W(9, -1140563054, -1124876273, 1044374977, 1012713199) + + W(10, 1043077130, 1015942004, -1101597023, -1106967769) + + W(11, 1056268760, 1033733408, -1097742853, 1036121231); + WS(-1084991020, -1093321377); + sum1 = + W(0, -1124603198, 1041431403, 1058092855, -1087194206) + W(1, 1031898965, 1027465426, 1020905181, -1106261797) + + W(2, 1050746614, -1093664550, 1020373118, 1019387090) + W(3, 1034567442, 1012534679, 1060858482, -1086211050) + + W(4, 1037199077, 942151344, 1033810210, -1100435893) + W(5, 1063338984, -1081371861, 1028138434, -1130539845) + + W(6, 1026468027, -1113177704, 1057782107, -1091610836) + W(7, 1041359812, 1019123574, 1034346016, 1029015460) + + W(8, 1050350895, -1090363161, 1026545296, 1032894586) + W(9, 1012576759, -1109083497, 1047981412, -1099126963) + + W(10, 1017607178, 1018549369, 1040898454, 1045957208) + + W(11, 1051078083, -1088769118, -1114891854, 1024075622); + sum2 = + W(0, 1049096145, -1096421724, -1104462777, 1019605429) + W(1, -1107039596, 1039363330, 1050639335, -1100344181) + + W(2, 1044471716, 1054069324, -1112798845, -1113551308) + + W(3, 1060252300, -1085369111, -1101216805, -1124780213) + W(4, 1041817400, 999612981, 1068716746, -1076762820) + + W(5, 1050347336, 1025609425, -1099054195, 1018249637) + W(6, 1070836883, -1073687271, 1052234046, -1096692411) + + W(7, 1029953841, 1028531297, 1071114171, -1075542147) + W(8, 1009296899, 1043747806, 1030832137, -1109453871) + + W(9, 1069477538, -1076870984, 1047758574, 1026561745) + + W(10, -1135279155, 1027426027, 1068968491, -1078719402) + + W(11, -1114781860, 1035243068, -1140937749, -1125031723); + WS(-1082530796, 1061926473); + sum1 = W(0, -1111745736, -1107298323, 1048819916, 1033356744) + + W(1, 1034157517, -1119982339, 1030277984, -1127531019) + + W(2, 1039611697, -1113172467, -1127895905, 1009757802) + + W(3, -1123296661, -1109831840, 1058045501, 1035918242) + + W(4, -1146238814, -1125062126, 1039021467, -1118701125) + + W(5, -1099048749, -1088898044, -1099659681, 1039341706) + + W(6, -1130604760, -1101933527, 1052265202, -1125679275) + + W(7, 1032846000, -1111731430, 1012520514, -1114851579) + W(8, 1034215157, 1029808585, 1005542493, 1016627083) + + W(9, -1112767704, 1016124742, 1037161440, -1110952702) + + W(10, 1024904046, -1137100912, 1017586258, -1131932512) + + W(11, 1010862879, 1040689749, -1114167271, -1130467695); + sum2 = W(0, -1123675689, 1027790609, -1107220732, -1138688370) + + W(1, -1114685917, -1131813881, 1025856461, -1115257125) + + W(2, -1103597907, 1036222096, -1104487390, -1138958066) + + W(3, -1129839981, 1041635489, -1083432255, -1099581339) + + W(4, -1113019592, 1015980625, 1039506778, -1087170514) + + W(5, 1074899174, 1052206445, -1103205528, 1015820001) + + W(6, -1111122039, 1054414527, -1087814585, 1033877978) + + W(7, -1114160943, 1023501105, -1108253250, 1040911707) + + W(8, -1119836905, 1035305654, -1128600489, -1135082354) + + W(9, 1042547871, -1107819629, -1108795814, 1040418607) + + W(10, -1113593105, 1028405761, 1023517501, -1119501461) + + W(11, -1120345851, 974999585, -1136105250, 1023691837); + WS(1064809580, 1026007555); + sum1 = W(0, -1117346201, -1139810490, 1018638922, 1046259126) + + W(1, -1110265860, -1107316166, 1012678303, -1117193870) + + W(2, 1041509643, -1116046893, -1107022182, 1042684025) + + W(3, -1112794575, 1032058104, -1089321581, 1058768682) + + W(4, 1049529906, -1107194416, 1024755966, -1127734554) + + W(5, -1106321711, 1057441571, -1097674775, -1115653262) + + W(6, -1119597108, 1040786347, 1042250009, -1088983804) + + W(7, 1052962187, -1115453338, -1109792204, -1106953519) + + W(8, -1136225478, 1052663642, -1105006210, -1104239151) + + W(9, 1039580268, -1120656619, -1140389406, 1033398978) + + W(10, -1138416249, 1026892022, -1109340249, -1123630216) + + W(11, 1031754384, 1030125211, -1114619763, -1113265368); + sum2 = W(0, 1002173025, -1126476274, 993251281, 1035289805) + W(1, 1026092616, -1119256919, 1022454821, -1112202702) + + W(2, 1027055467, -1110248161, -1106697397, 1035602798) + + W(3, -1114052781, 1040267060, -1106967376, 1050537246) + + W(4, 1051631259, -1119463370, 1026488668, -1105276944) + + W(5, 1037292044, -1103163514, -1119693560, -1109176262) + + W(6, 1024785122, 1041996701, 1032247852, 1036995069) + W(7, 1026699348, -1123291766, -1114947733, 1024974628) + + W(8, -1103384730, 1044720036, -1113586574, -1103668424) + + W(9, 1041552522, -1106672600, 1024347316, -1136575544) + + W(10, 1012871948, -1104716490, -1114686018, 1014734543) + + W(11, 1038837578, 1011597497, -1113050536, -1124038001); + WS(1065652022, 1032044813); + sum1 = + W(0, 1008146233, 1024151812, -1109382253, 1046726755) + W(1, -1118504831, -1145994760, -1138253430, 1033154018) + + W(2, -1111423088, -1114751985, 1030270670, 1011776865) + + W(3, -1111490857, 1037025524, -1089787453, 1060185520) + + W(4, -1108385813, 1031075253, 1034343482, -1098142828) + + W(5, 1056996567, -1102691401, 1034117937, -1115595081) + + W(6, 1027957290, -1093711910, 1064926206, -1084615165) + W(7, 1042611828, -1141498448, 1037589943, 1019326054) + + W(8, 1020462798, -1096112169, 999245356, 1030480467) + W(9, 1035138662, -1137243780, 1028825917, -1123985530) + + W(10, -1106962907, 1020535720, -1130048270, 1033030167) + + W(11, 1017317515, -1105819364, -1131349303, -1161641125); + sum2 = W(0, 1006780909, 1018874621, -1118402441, -1125742470) + + W(1, 1022906649, 1002124531, 1032029355, -1106542042) + + W(2, -1123089851, 1042324218, 1032351097, -1127943559) + + W(3, 928839112, -1114343998, -1111882995, -1150379126) + + W(4, 1030201786, -1112138839, -1118040285, -1106692361) + + W(5, 1057618964, 1059562898, -1096401122, 1036092239) + + W(6, -1112805367, 1043467647, -1114692462, -1124030112) + + W(7, -1101041619, -1119061832, -1105940221, 1041450718) + + W(8, 1035656455, -1096899429, -1105376866, 1038704957) + + W(9, 1025941308, 1036583459, -1122657152, -1115452253) + + W(10, 1025207396, -1115257437, 1031169874, -1106475204) + + W(11, -1181112366, -1108009053, -1117532473, 1040262599); + WS(1060219372, -1086892801); + sum1 = + W(0, -1103145773, 1039153858, 1051640744, -1098525574) + W(1, -1104573181, 1018968139, 1041685441, -1139059104) + + W(2, -1111094658, -1114399172, 1034492410, -1114173214) + + W(3, -1114046300, -1139626983, 1060139625, -1105262039) + + W(4, -1112320535, -1114750605, -1105288038, 1051743245) + + W(5, 1048845947, -1084251893, 1048366229, -1119683574) + W(6, 1036049319, 1005204005, 1056234019, -1132035760) + + W(7, 1045757805, -1113866359, -1105447101, -1096900831) + + W(8, 1058595229, -1097183901, -1102598303, -1132035040) + + W(9, -1153064218, 1006778518, 1030114274, -1095492788) + + W(10, 1044243314, 1021887633, 1037687846, -1097346218) + + W(11, 1052830405, -1105878061, -1114955456, -1107291810); + sum2 = + W(0, -1116149857, 1025241726, -1099180395, 1043244905) + W(1, -1108018378, 1036684654, -1115656995, 1004099725) + + W(2, -1094029030, 1042198638, 1030698655, -1113978987) + W(3, 1030898684, 1025926274, 1029208805, 1050305603) + + W(4, -1105752531, 1030311314, -1101937257, 1029996270) + + W(5, -1093188603, 1051892024, 1032179392, -1143836228) + W(6, -1172884346, 1015423640, 1043486640, 1010118001) + + W(7, 1042453668, -1107174775, -1108962642, -1098527410) + + W(8, 1057927618, -1111697287, -1110311603, 1032971351) + + W(9, -1122887710, -1102965987, 1033678913, -1106879588) + + W(10, 1043859461, -1114814873, 1035380843, -1099100919) + + W(11, 1054675670, -1115267941, -1124505249, -1114563702); + WS(1054285911, 1050558006); + sum1 = + W(0, -1123510162, 1038924416, -1103298920, 1049975562) + W(1, -1107117058, -1132690963, -1131523010, 1041880394) + + W(2, -1093800836, 1050884477, -1103773453, 1013663344) + + W(3, -1131464621, 1042834130, -1095165898, 1056805171) + W(4, -1101616339, 1008580322, 1019709806, 1022473868) + + W(5, 1057683379, -1101108530, -1121085405, -1112183179) + + W(6, 1029482925, -1100861061, 1065493943, -1084744157) + + W(7, 1044641696, -1107880231, 1019519396, -1099456110) + + W(8, 1059660975, -1084827161, 1037612741, -1118057053) + + W(9, 1008876867, -1103795177, 1052993120, -1096422344) + W(10, 1041661496, -1120002940, 990268585, 1023983666) + + W(11, 1049686449, -1102767109, -1112995616, -1125681410); + sum2 = W(0, -1122499415, -1115660402, 1019721643, 1052675919) + + W(1, -1134010714, -1107636671, -1135750526, -1103885919) + + W(2, -1163427536, -1111228057, -1097257542, 1043515230) + + W(3, 1025808744, -1107570715, 1050733408, 1050715774) + + W(4, -1097200131, -1113330231, 1004729468, -1109043837) + + W(5, 1068391994, 1068640266, -1098019427, 1035061875) + + W(6, 1038371609, -1101018403, -1088555463, -1109791041) + + W(7, -1104872484, 1015805682, 1025309219, -1106866200) + + W(8, 1029079067, -1082318320, -1103711089, -1134559274) + + W(9, 1037233345, -1100735475, 1034253450, 1048468038) + + W(10, -1096395820, 1043601518, -1107252608, 1049544712) + + W(11, -1147071004, -1092391889, 1032346912, -1139706642); + WS(-1090663639, -1077388844); + sum1 = + W(0, 1018174482, -1118693311, -1093711755, 1049109025) + W(1, 1040639418, 1010315758, -1160081154, 1029100670) + + W(2, -1098783931, 1046071575, 1021012498, -1115518041) + + W(3, -1125628059, -1120731528, -1088582516, 1052420297) + + W(4, -1106387497, -1181480889, -1132569784, -1106887239) + + W(5, -1095941442, 1068562223, 986381179, 1034527238) + W(6, -1117916196, -1106415899, -1087125152, 1064382514) + + W(7, -1096200966, 974332196, -1117849353, -1097169074) + + W(8, -1094690684, 1058209900, -1116837948, 1015836076) + + W(9, 1021610031, -1146324700, -1097735927, 1048904111) + + W(10, 1025702951, -1123641014, -1122253105, -1168966176) + + W(11, -1093052224, 1056110513, -1121509238, 1025570188); + sum2 = W(0, 1034436941, -1094381136, 1032181918, 1038416019) + + W(1, -1157362266, 1040127243, -1113720588, 1042206203) + + W(2, 998633097, -1112936962, 1041189037, -1108577062) + W(3, 1024698569, -1096498964, 1057336919, 1044225483) + + W(4, -1114037642, -1122557473, -1106229774, -1082651758) + + W(5, 1076874404, 1035551973, 1029476436, 1033287923) + W(6, 1036635696, -1065300211, 1081990850, 1041430251) + + W(7, 1035032611, 1024588020, 1046234567, -1070759890) + W(8, 1049370102, 1045207615, 1036601034, 1015731519) + + W(9, 1052080385, -1090746117, -1114040834, 1038285588) + + W(10, -1115422836, -1155116306, 1034351318, -1089089289) + + W(11, 1035723536, -1119691531, -1130950394, 1031958369); + WS(-1080363926, -1071486509); + sum1 = W(0, 1035667824, -1117755862, 1058850627, -1089812638) + + W(1, -1119781738, 1033333673, -1106562756, -1102851061) + + W(2, 1044522508, -1098855847, -1124105919, -1122920906) + + W(3, 1048836261, 1026770536, 1060900423, -1094273901) + + W(4, -1136163052, 1034167779, -1099668353, -1096598034) + + W(5, 1062774732, -1081351512, 1019768789, -1100720585) + + W(6, 1041756943, -1111989629, 1060314417, -1091724133) + + W(7, 1036217472, 1023569595, 1034350288, -1117155588) + W(8, 1058954340, -1097309107, 1034476769, 1037864032) + + W(9, -1111757455, -1106686285, 1047827972, -1089979637) + + W(10, 1034808863, -1108224543, 1036008540, -1110305168) + + W(11, 1058323473, -1097705308, -1112569431, 997120104); + sum2 = + W(0, -1130221439, 1026473921, -1098631230, 1059210683) + W(1, 991949852, -1088875775, 1039521143, 1040027805) + + W(2, 1049962270, 1037922688, -1128609979, 1043008084) + + W(3, -1106623296, -1103850305, -1102633929, 1032319554) + + W(4, 1032322677, -1081254616, 1040812416, 1044422007) + W(5, 1053913409, 1075309200, -1093367457, -1078110278) + + W(6, -1108121630, 1033782288, 1041179127, 1051007822) + + W(7, -1128820939, -1087823621, 1011405671, -1112409330) + + W(8, -1118313372, -1114991064, 1042490244, -1085620101) + W(9, 1024610419, 1042204829, 1033589003, 1064519547) + + W(10, -1108707286, 1038944538, -1120568012, 1007920671) + + W(11, -1105557487, 1041664007, -1108726636, -1097283302); + WS(-1085146860, -1078432897); + sum1 = + W(0, 1000247468, -1149696569, 1046594056, -1105608437) + W(1, 1029982264, -1128935441, 1037132754, -1105005939) + + W(2, 1049101493, -1092242085, 1035020227, -1114749360) + W(3, -1107087079, 1047641075, 1036712721, 1034057408) + + W(4, 1000135432, 1041253327, 1043827999, -1094624073) + + W(5, 1056540903, -1091288646, -1097712673, -1098691603) + + W(6, -1105171356, 1038438012, 1056427897, -1092984875) + W(7, 1048401189, 1031740017, 1034542459, -1098392071) + + W(8, 1052522438, -1099696227, -1113847620, -1124493285) + + W(9, -1139389265, 1037791108, 1039493090, -1123704450) + + W(10, 1041969240, 1035876258, 1019250012, -1111893867) + + W(11, 1053002294, -1096292923, -1109486882, -1117863020); + sum2 = + W(0, 981650725, 1022634928, -1122722467, -1134931805) + W(1, -1114801830, 1035155349, -1123188651, -1141995321) + + W(2, 1043118137, 1038372119, -1108690895, -1113558499) + + W(3, -1132394336, -1159373413, -1111962066, -1088039805) + + W(4, 1059960197, -1123558064, 1019312288, 1040593594) + + W(5, -1085264893, -1062981933, 1086226578, -1122982849) + + W(6, 1007032529, -1125839148, 1035586791, -1088420516) + W(7, 1052041256, 1041973847, 1019035800, -1144628769) + + W(8, 1030456162, 1033594923, -1121903247, -1112866008) + + W(9, -1139057577, -1130558040, 1025167346, -1132235322) + W(10, -1127116104, 999416881, 994405618, 996479090) + + W(11, -1121727683, -1141102409, 1010205301, 1015327588); + WS(1065314092, 1028007882); + sum1 = + W(0, 1031315399, -1118910333, -1110071882, 1027958726) + W(1, -1132049297, 1030345978, 1009229913, 1044274103) + + W(2, -1091088835, 1049048403, -1098774521, 1015648546) + + W(3, -1129846167, 1040482617, -1081937298, 1058342964) + W(4, -1097400260, 1033820695, 1030512684, 1028502523) + + W(5, 1057125791, 1041882615, -1128387300, -1123750794) + + W(6, 1027211107, -1098008831, 1063505604, -1081007746) + + W(7, 1042307230, -1119357859, 1024491495, -1114626726) + + W(8, 1056913060, -1094994912, 1046624506, -1108219417) + + W(9, -1133667543, 1030034106, 1045615294, -1103714125) + W(10, 1018202602, 1021380405, 1020076159, 1035646611) + + W(11, 1046315392, -1120947018, -1116178789, -1113806211); + sum2 = + W(0, -1105494348, 1051114204, -1106549278, -1106413411) + W(1, 1026636954, -1101805730, 1042860341, -1095282678) + + W(2, -1095274416, -1115620988, 1032482885, -1123651604) + + W(3, 1034523914, -1099570184, -1097084890, -1100406176) + + W(4, -1103782738, 1033985372, 1032509803, -1092355888) + W(5, 1068351366, 1061966336, -1111829172, 1041181693) + + W(6, 1037500440, -1093487707, 1045336627, 1064976080) + + W(7, -1099957245, -1105149847, -1136462181, -1101634066) + + W(8, -1093371043, -1095768559, 1011878641, 1041978329) + W(9, -1115571726, 1038218158, 1025607496, 1051011462) + + W(10, -1122142689, -1106959075, 1030953968, -1144657738) + + W(11, -1098135458, -1103066203, 1046841453, 1034424520); + WS(-1100053422, 1042143034); + sum1 = + W(0, -1121193300, 1038907933, 1037214192, -1104157285) + W(1, 1003265009, 927491180, 1027006179, -1101648146) + + W(2, 1052804861, -1116788520, -1105088109, 1036085839) + + W(3, -1116784408, 1051061595, -1092953078, -1115727182) + + W(4, 1044603252, -1112320669, -1124403186, -1096312949) + + W(5, 1063691363, -1094770533, -1110178331, -1132796507) + + W(6, 1032959000, 1021329739, 1057610578, -1085255924) + W(7, 1042273837, -1129217311, 1036096698, -1104405590) + + W(8, 1046101531, 1032460934, -1099699080, 1029210945) + W(9, -1106874909, 1032316755, 1033832353, -1101777344) + + W(10, 1042741769, -1111566277, -1140087089, 1026338531) + + W(11, 1044117027, -1123976854, -1110400285, 1023324661); + sum2 = + W(0, -1141740532, -1127364827, 1016858659, 1019476887) + W(1, 1027610232, -1114104878, -1141563308, -1115378106) + + W(2, 1014774982, 1033910688, -1098709711, 1038463125) + + W(3, -1146742484, -1097895309, 1061549017, -1098904537) + + W(4, 1048121474, 1009723338, 1037326381, -1089066292) + + W(5, -1086943216, -1087533486, 1044386108, -1128378528) + + W(6, -1100634945, 1074712949, -1081904953, 1051894188) + + W(7, 1035948687, -1114075451, -1112991637, 1036278807) + W(8, 1016249563, -1105464222, 1034738839, 1023041183) + + W(9, 998071241, 1023404119, -1104500239, 1039001405) + W(10, -1111082062, 1028418264, 1028661660, 1032269699) + + W(11, -1118089334, 1002948788, 1032376859, -1116656881); + WS(1064553004, -1122811923); + sum1 = + W(0, -1129171987, 1031404000, 1045766677, -1102182210) + W(1, 1024429036, -1144455651, 1032276908, -1105326008) + + W(2, 1049463355, -1094389820, 1026361994, -1121595412) + W(3, -1113233767, 1049333812, 1036005443, 974120215) + + W(4, 1035186995, 1034062296, 1041762428, -1095271692) + + W(5, 1059787495, -1088583546, -1095666629, -1100003363) + + W(6, -1110071667, 1021654775, 1057980603, -1092394330) + W(7, 1048809333, 1024774373, 1027123610, -1101813124) + + W(8, 1052117804, -1097358280, -1132328890, -1125809241) + + W(9, -1123823416, 1034262350, 1041561742, -1111517243) + + W(10, 1043990330, 1035275874, 1026480898, -1114787413) + + W(11, 1051913520, -1097569834, -1107060706, -1116630377); + sum2 = + W(0, 1027128731, -1118374135, 1034513610, -1123174378) + W(1, 1026835315, -1114420605, -1153474964, 1014938475) + + W(2, -1107487108, -1123472496, 1026726549, 1026647445) + W(3, 1032141903, -1106631533, 1042187602, 1054134699) + + W(4, -1092438474, 1008281595, -1114194268, 1049333862) + + W(5, -1091768288, 1082046784, -1066065893, 1032174407) + W(6, 1010972171, -1117272233, 1029258819, 1048672371) + + W(7, -1108402828, -1107223380, -1128303982, 1026850075) + + W(8, -1112584656, 1015595275, -1129520676, 1020663133) + + W(9, 1020076945, -1130317298, 1019094573, -1143083862) + + W(10, 1012988295, 1016020771, -1125252319, 1023253865) + + W(11, 1030375545, -1112246350, 1033146096, -1120189048); + WS(1066855734, -1119441794); + sum1 = + W(0, -1127562022, 1039804281, -1090674092, 1057948125) + W(1, -1113289318, 1040603681, -1123770274, -1106380690) + + W(2, -1096623933, 1041520634, -1102744925, -1104646487) + + W(3, -1115364812, 1049486942, -1088928155, 1058254390) + + W(4, -1126493725, 1030330554, -1108591004, 1052921753) + + W(5, -1086271011, 1066610309, -1125400685, 1047144630) + + W(6, -1109630903, 1002857256, -1088126629, 1047590014) + + W(7, -1097419497, -1118729135, 1012450637, 1049080999) + W(8, -1098448030, 1058670697, 1027654745, 1040353710) + + W(9, -1111453929, -1123146086, -1095526376, 1020669183) + + W(10, -1103558716, -1110153949, -1112819014, 1045715313) + + W(11, -1090620137, 1057080301, -1122765265, 1038000005); + sum2 = + W(0, 1029773001, 1045848843, -1100354950, 1042255280) + W(1, 1031741567, -1106457511, -1098400772, -1098127734) + + W(2, 985414438, -1114969170, -1101857955, -1121665039) + W(3, 1049403575, 1040542896, -1094506611, 1044461531) + + W(4, 1033605854, -1106685198, -1106957817, -1113384065) + + W(5, 1065317808, 1063282493, -1099888837, -1090380876) + + W(6, 1040180418, -1098054809, -1102424534, -1111953320) + + W(7, -1148110913, -1097784009, -1125940042, 1051521525) + + W(8, 1038107076, 1052337139, -1125555808, 1010307405) + + W(9, -1121768997, -1098090743, 1030043463, -1098581045) + + W(10, 1034608926, -1105861523, 1035657430, 1035815778) + + W(11, -1104826760, 1052586351, -1105779304, -1121200133); + WS(1024108216, 1053619151); + sum1 = + W(0, -1110237276, -1117217830, 1029541828, -1116974327) + W(1, 1028000106, 997960357, 1036125123, -1104025987) + + W(2, 1035829669, 1029841740, -1125285423, -1115997186) + + W(3, -1105593599, 1053313098, 1040682969, -1093167724) + + W(4, 1031701731, 1020700860, -1116690717, -1107833238) + W(5, 1058976450, 1049118258, -1094097902, 1017239158) + + W(6, -1138403916, -1112296045, -1104638900, -1102593951) + + W(7, 1055269059, -1122615091, -1118424632, -1108171422) + + W(8, 1053496066, -1123704112, -1100830255, 1029941429) + W(9, -1118568038, 1025019530, 997886569, -1107943218) + + W(10, 1036129823, -1111217181, -1117554448, -1113883668) + + W(11, 1047703644, -1108512765, -1109656827, 1020926429); + sum2 = W(0, -1119993039, -1109450011, 1040401784, -1106582025) + + W(1, 1033614749, -1147493015, 1038893624, -1096262037) + + W(2, 1053717996, -1117988753, 1000703247, -1114388777) + + W(3, -1105602869, 1055566572, 1043380434, -1092886002) + + W(4, 1034388956, 1043473668, -1106354535, -1158461197) + + W(5, -1105416337, 1072933379, -1081255560, 1043342555) + + W(6, 1032998629, -1117939535, -1087999087, -1083385663) + + W(7, 1066426555, -1094544057, 998835515, -1108137959) + W(8, 1027244587, 1045359897, -1093783038, 1045623937) + + W(9, -1119543359, 1037096144, -1117843291, -1101171319) + + W(10, 1051461624, -1102892249, 1026155597, -1114868339) + + W(11, 1031605777, 1022787365, -1100938466, 1041102940); + WS(1062840044, 1036517115); + sum1 = + W(0, 1027882709, 1024218619, -1088734638, 1057788973) + W(1, 1032695796, -1105941698, -1102360361, -1119309706) + + W(2, 1031157223, 1045291130, -1107257205, 1032367183) + W(3, 1037386637, 1051294017, -1082181941, 1057574554) + + W(4, 1039133128, 1015412087, 1026327034, -1105387538) + W(5, -1087289600, 1064725383, 1024151288, -1102302982) + + W(6, -1102333276, 1045232110, -1089919726, 1056789371) + W(7, -1131902346, 1046746652, 1042539613, 1042423170) + + W(8, -1088061047, 1052153154, 1045762482, -1110282969) + + W(9, -1113170154, -1107171938, -1126992520, -1124580994) + + W(10, -1130270357, -1116552186, -1117867630, 1042893141) + + W(11, -1098512042, -1109192172, 1047580087, 1032060742); + sum2 = W(0, -1128425377, -1106124336, -1106789449, -1116881359) + + W(1, -1108203604, -1154187588, 1027402769, -1123408727) + + W(2, 1046361983, 1040877836, 1041799035, 1037669599) + W(3, -1122701285, 1035952055, 1033758045, -1092736894) + + W(4, -1110885432, -1106801609, 1029460017, -1106291502) + + W(5, -1092942672, -1089281400, 1058715791, 1044648045) + + W(6, -1128656941, 1034330078, 1022825967, 1067937621) + + W(7, 1060265099, -1104725901, 1015114546, -1137130753) + + W(8, 1043050798, 1057367901, -1090127289, -1099584998) + + W(9, -1131912907, -1151273092, 1026393205, -1100743964) + + W(10, -1100147508, -1114467924, 1012677721, -1141059698) + + W(11, -1114857397, -1116376233, -1085394823, 1048436512); + WS(-1090906199, 1032077706); + sum1 = + W(0, -1133027020, -1114971356, 1042474992, -1109782777) + W(1, 1040233361, -1113511763, -1121893314, 1044790618) + + W(2, -1113284386, 1033127972, -1105262125, 1025873725) + + W(3, 1023318639, -1123234602, 1041326284, -1090554926) + W(4, 1034823031, 1011115622, 1029445080, 1040694938) + + W(5, -1105269270, 1053731809, -1109230339, -1129412204) + + W(6, 1001128051, 1035215242, -1090300845, 1054234875) + W(7, -1098565414, 1038160135, 1034316233, -1102125462) + + W(8, 1049843289, -1098556565, 1042417542, 1018477632) + + W(9, -1103958128, 1043268297, -1131346756, -1121911775) + + W(10, 1026215817, 1009497794, 1035906358, -1122145747) + + W(11, 1025511402, -1113919963, -1122686272, 998364239); + sum2 = W(0, 1010874613, 1030375188, -1115235366, 1049471257) + + W(1, -1100352469, 1040126787, 1031993168, -1098558104) + + W(2, 1051363302, -1094785947, 1040320218, -1131978284) + + W(3, -1110586713, -1116546039, 1056517150, 1031272431) + + W(4, -1111763071, -1124433226, 1035954309, -1089737673) + + W(5, 1057138262, -1102658594, 1043402933, 1027656919) + + W(6, 1034349301, -1094899703, -1098486858, -1112892354) + + W(7, 1057577914, -1092308561, -1122071401, 1053685859) + + W(8, -1093546729, 1046184196, 1039716432, -1116345243) + + W(9, -1140127141, -1108774266, 1034072089, -1109252950) + + W(10, -1122970680, 1016915650, 1029284667, -1124583674) + + W(11, -1122424073, 1037904141, 958822899, -1140678181); + WS(1065971990, -1117349785); + sum1 = + W(0, 1041698888, -1103591502, -1130045402, 1049080916) + W(1, 1009208654, 1021475879, -1112126136, 1043874709) + + W(2, 1044992637, 1047240445, -1121869537, -1134810027) + + W(3, 1034826515, -1101299034, -1083999976, 1060703412) + + W(4, -1123043996, -1144695867, -1129568652, -1186542860) + + W(5, -1081978560, 1061790267, 1028920692, 1017436104) + W(6, -1109843230, 1041410157, -1085461041, 1058214583) + + W(7, -1105683576, -1131209785, 1024013025, -1159955630) + + W(8, -1090063736, 1059365054, 1011681824, -1139574520) + W(9, 1009653886, 1030113747, -1097631850, 1049303674) + + W(10, -1125202109, 1000157667, -1137279078, -1120752640) + + W(11, -1094129699, 1052607100, 1021258909, 1022839599); + sum2 = W(0, -1117014919, -1073720352, -1104300581, 1055286111) + + W(1, -1105567211, -1108683793, -1130364155, -1086725672) + + W(2, -1091483554, -1109695767, 1032269150, 1033653176) + + W(3, -1106662873, 1069712606, 1061376514, -1114136197) + + W(4, -1138351682, -1115525434, -1114007102, 1062079613) + + W(5, 1060429625, 1052661025, 1018497014, 1043366723) + + W(6, 1049348603, -1102930718, -1098979910, -1131131469) + + W(7, 1032920333, -1107942477, -1111037227, 1020707077) + + W(8, -1091915359, 1050631696, -1117367449, -1139459182) + + W(9, 1016435713, -1114426132, 1043583392, -1112682766) + + W(10, -1122101547, -1122885189, 1035610526, 1034066723) + + W(11, -1091474650, -1112601210, 1039950258, 1026792554); + WS(-1087438700, -1079683283); + sum1 = + W(0, 1017584527, -1111085935, -1097724786, 1052950249) + W(1, -1112145065, 1032156829, -1126645293, -1110290014) + + W(2, -1098381907, 1053593812, -1106211842, -1129163939) + + W(3, 1024510860, 1033919541, -1083731542, 1063649365) + W(4, 1003633557, -1118260846, -1126938201, 1049726143) + + W(5, -1081500983, 1059087538, 1023563130, -1120786763) + W(6, 1025818403, 1042036038, -1097215121, 1052548297) + + W(7, 1019377518, -1116189667, -1163276521, -1114444262) + + W(8, -1091706404, 1057898452, -1104008328, 1023503580) + W(9, 1040771729, 1029908410, -1097070911, 1045403069) + + W(10, -1115067299, -1112674412, -1160528892, 1035428627) + + W(11, -1097940968, 1052829673, 1032435766, 1015355644); + sum2 = + W(0, 1027460995, -1139353737, -1116062036, -1105887481) + + W(1, 1043084390, -1112909236, -1126036613, -1172793556) + + W(2, 1041340702, 1031879973, -1108443334, -1144584219) + W(3, 1029366028, 1021322407, 1050063273, -1101108606) + + W(4, 1031078409, -1116288208, -1129309981, -1097919394) + + W(5, 1055277098, 1009470709, 1041616760, -1099143154) + W(6, 1020875062, -1121912528, -1106551211, 1047064096) + + W(7, -1107655532, -1100042276, 988377354, 1034370211) + W(8, 1041415105, 1034815104, 1018545561, -1108610892) + + W(9, -1109676838, -1137199485, -1128558209, 1034836242) + + W(10, 1014245687, -1111876576, 1030688918, -1107628664) + + W(11, -1134721053, 1049845283, -1104117922, -1123449114); + WS(1044595630, -1081949232); + sum1 = + W(0, 1039675598, -1112185757, -1088266627, -1103352647) + W(1, 1051863744, -1115854735, -1114551286, 1041771076) + + W(2, -1144202605, 1058269731, -1102436935, -1130126796) + + W(3, 1008233858, -1113654239, -1106609306, 1055957016) + W(4, 1041931682, 1017898629, -1121252161, 1036138408) + + W(5, -1087961804, 1060473075, -1105147684, 1041818984) + W(6, 1033942172, 1043328887, -1087619944, 1062639593) + + W(7, 1002098271, -1130413359, -1122512668, 1047911808) + + W(8, -1086197995, 1055667963, -1111264410, 1032948860) + W(9, 1026205083, 1028532559, -1087181441, 1042300482) + + W(10, 1038991809, -1115140949, -1118876117, 1051948425) + + W(11, -1079365891, 1051816684, 1047648830, 1035539513); + sum2 = + W(0, 1050752765, -1087877559, -1089125923, 1063798323) + W(1, 1013490077, -1098786735, -1102559074, 1042152886) + + W(2, 1035361880, 1052128007, -1090156566, 1029952535) + W(3, 1004621594, -1095393630, 1058416370, 1062325475) + + W(4, -1094287538, -1116454071, -1106905815, -1106434273) + + W(5, 1058355794, -1120474523, -1101206197, 1025568531) + W(6, 1038134564, -1099723068, 1056388755, 1051681161) + + W(7, -1100293430, 1009033997, -1114858390, 1036662936) + W(8, 1040360076, -1096401119, -1104699402, 997069429) + + W(9, 999530010, -1101776331, -1106345832, -1106373450) + + W(10, 1048788631, -1117506479, -1104442024, 1054232067) + + W(11, 1034476952, -1089943559, -1113690608, 1019208527); + WS(-1082323244, 1081334754); + sum1 = + W(0, 1017652669, -1114671162, 1045686283, -1100397239) + W(1, 1040334760, -1120290385, 1026074030, -1101582293) + + W(2, 1051997878, -1097273854, 1036251335, 1010893986) + W(3, 1026308835, -1102225205, 1056290627, -1091487775) + + W(4, 1044247257, -1114193219, -1139023631, -1128122150) + + W(5, -1109179969, 1033600923, 1034603313, 1015477828) + W(6, -1124784756, 1047028384, -1088067773, 1058585755) + + W(7, -1098471302, 1027532986, -1157466090, 1041367009) + + W(8, -1090935975, 1058457225, -1104671877, 1028455628) + + W(9, -1151223253, 1037350457, -1100014755, 1047302995) + + W(10, -1113240670, 1023216600, -1123067319, 1018518877) + + W(11, -1100605637, 1040259816, 1007040336, -1167442276); + sum2 = W(0, 1021161720, 1034310052, 1028060302, -1111691904) + + W(1, -1117456161, -1120681063, -1130912565, -1120070653) + + W(2, 1057895756, 1060473449, -1103940220, 1030806761) + W(3, -1130074229, 1013066141, 1050008660, 1048707954) + + W(4, 1041027109, -1131293055, -1117040706, -1097167140) + + W(5, -1077198236, -1071981659, -1101172514, 1009545637) + + W(6, 1025588007, -1126989997, 1036249174, 1055589119) + W(7, 1014637039, 1035622377, 1012731209, 1035986228) + + W(8, 1061480822, 1068029984, 1014223793, -1130249456) + W(9, -1117264939, 979500482, 1036420478, 1025639810) + + W(10, 1027808652, -1120874755, 1035821810, -1131150309) + + W(11, -1114248001, -1104930835, -1128414380, 1036288471); + WS(1053726551, 1012659382); + sum1 = W(0, 1032083097, -1106638998, -1098702088, 1047871676) + W(1, 998886703, 1022035144, -1115995457, 1034541187) + + W(2, -1098027112, 1032148771, 1042659740, -1112198366) + + W(3, 1025061558, -1115280977, -1088652495, 1060231518) + + W(4, -1106688851, 1034668492, -1122031383, -1104507249) + + W(5, -1092902123, 1065996118, -1104094145, 1024148149) + + W(6, 1038196358, -1114143426, -1085882325, 1058126194) + + W(7, -1102012910, 1010989620, -1115071689, 1029764374) + + W(8, -1097451113, 1059563219, -1121352461, 1012420282) + + W(9, -1130575936, 1031225348, -1098237300, 1035470703) + + W(10, -1128714910, -1123786037, -1113099877, -1102798468) + + W(11, -1111014420, 1054581342, 1000597622, 1024134712); + sum2 = W(0, -1124504165, 1043258069, -1119272862, -1120765402) + + W(1, -1132584009, -1130103221, 1044199209, -1105860909) + + W(2, -1096303760, 1027333462, -1112573193, 1030769098) + + W(3, -1105768285, -1149580326, 1030851358, -1110802573) + + W(4, 1028533186, -1111917317, 1030597508, 1054859448) + W(5, 1057797022, 1040041196, 1035436142, 1033418125) + + W(6, -1097181409, 1065166573, 1062380695, 1017534493) + + W(7, -1141430739, -1140263257, 1040834484, 1012576825) + + W(8, -1116955554, -1108797661, 1032147286, -1126074797) + + W(9, -1106395293, -1091772000, -1113754765, 1042765191) + + W(10, -1113752657, 1020067757, 1038991009, -1077787203) + + W(11, -1098893101, -1110880129, -1120959090, -1136443945); + WS(-1091585367, -1096979755); + sum1 = + W(0, -1125210148, -1104771642, -1104387016, 1039960904) + W(1, 1043768271, -1127914889, -1121704545, 1041680018) + + W(2, -1093500244, 1056326757, -1114046313, -1113011982) + + W(3, -1120024086, 1043362647, -1085845307, 1060708396) + + W(4, 1022927067, -1140249314, -1106223422, 1048656073) + + W(5, -1086139981, 1060053831, -1105827043, -1132509525) + W(6, 1027649645, 1038801018, 971788283, -1115241635) + + W(7, -1120254754, 1027284814, -1106879908, -1103521718) + + W(8, -1124812374, 1040958787, 1038747748, 1026834103) + W(9, 1033259600, -1106174833, 1011346436, 995843043) + + W(10, -1118768800, -1115807705, -1108105765, 1031226898) + + W(11, -1119913673, 1040930453, 1040461604, 1029803545); + sum2 = W(0, 1007158722, 1004004293, -1123076619, -1109929145) + + W(1, -1104848259, 1045822378, -1117465525, 1030693613) + + W(2, 1034532408, -1107544976, -1095482533, -1130708853) + + W(3, 1020157105, 1007550482, -1101572236, -1110988335) + + W(4, -1085485924, 1036333634, -1115575087, 1032793866) + + W(5, 1059720067, 1067665797, -1090269083, -1111111456) + + W(6, 1029731993, -1104867994, -1112732963, 1048427228) + + W(7, -1135504754, -1105802165, -1115951245, -1111503366) + + W(8, -1098900155, 1058832170, 1037004208, -1115747431) + + W(9, 1042169362, -1101768383, -1132362761, 1029991341) + + W(10, -1108400441, -1132122165, -1111020797, 1042858280) + + W(11, -1101011815, 1043396912, 1041064016, -1126685993); + WS(1050761175, 1067771859); + sum1 = + W(0, 1007679755, -1103008542, -1117542415, 1040506400) + W(1, -1107587308, -1123566389, 1016344868, 1051881455) + + W(2, -1099208581, 1050438683, -1138473483, 1035136294) + + W(3, -1111212068, -1113694143, -1094852820, 1036449755) + + W(4, -1102372368, -1110750261, 1035369236, 1057165115) + W(5, -1086714990, 1067282489, 1045850761, 1047098321) + + W(6, -1114152399, 1030154052, -1087862176, 1049754336) + + W(7, -1112794889, -1108921199, 1027679203, -1119705863) + + W(8, -1098820035, 1030273979, -1111030523, -1158006009) + + W(9, 1024513697, 1037582923, -1101037788, 1048082428) + + W(10, -1114576136, 1025557440, 1023429051, -1106945041) + + W(11, -1098056967, 1020854026, 1000524418, 1029171882); + sum2 = + W(0, -1108377721, 1042939389, -1118508723, -1101760284) + W(1, 1037932369, -1111488356, 1043256577, -1092756549) + + W(2, -1136988785, 1050996294, -1104406221, 1040203060) + + W(3, -1098471532, -1140040329, -1099870420, -1098704018) + + W(4, 1015984185, -1103619810, 1048495965, -1094992475) + W(5, 1064902851, 1062859454, -1098855018, 1045571731) + + W(6, -1103576309, 1017276157, 1045749581, -1089477514) + W(7, 1025174042, 1025902034, 1036658009, -1104050853) + + W(8, 1045838269, -1157061765, -1121418697, -1107737652) + + W(9, -1107738492, 1032200565, -1110226959, 1046491511) + + W(10, -1106492592, 1044444185, 1030436882, -1104348148) + + W(11, 1042649061, -1106567852, 1027657606, -1105647758); + WS(1057448172, 1069108917); + sum1 = + W(0, 1025232554, -1116799928, -1100727837, 1055215485) + + W(1, -1106508548, -1139419762, -1128422002, -1123655391) + + W(2, 1037047614, -1104723136, 1040448906, 1006660815) + W(3, 1032607415, -1111298160, -1102160962, 1056783106) + + W(4, -1103004521, 1016236833, -1106129908, -1119092894) + + W(5, 1036246919, -1165535249, 1040745899, -1123853933) + + W(6, -1141856298, 1044964179, -1088316592, 1049606186) + + W(7, -1132962087, 1016605067, 1015221011, -1128617181) + + W(8, -1116622467, 1044617768, -1103456342, -1160895193) + + W(9, 1010040610, -1116425884, 1027658969, -1132120236) + W(10, 1037741920, 1018376479, 1026592994, 1015937075) + + W(11, -1102536008, 1042573835, -1118303623, -1120915345); + sum2 = + W(0, -1128749003, 1042697161, -1101835107, 1038607216) + W(1, -1108371617, 1031384482, -1091479855, 1061865996) + + W(2, -1097573200, 1026943422, -1114744392, 1026017402) + + W(3, -1124909389, 1067634595, -1083343720, -1110364223) + + W(4, 1035072621, 1036186174, 1030767422, -1111988197) + + W(5, -1090130192, -1089515454, 1054823097, -1104718688) + + W(6, 1028475536, -1102439466, 1055493018, -1090714326) + + W(7, 1059932372, 1024781950, -1118686225, -1117920963) + + W(8, 1027292464, -1101900071, 1031097460, -1110105890) + W(9, -1133507353, 1022976741, 1030733458, 1033968573) + + W(10, -1132241300, 1040128898, 1003197653, 1008285339) + + W(11, 1002427213, 1033064671, -1101772178, 1026666744); + WS(1065065708, -1125796377); + sum1 = + W(0, -1112144555, -1106943759, 1017193539, 1048634147) + W(1, -1127916040, -1132404112, 1029634469, 1025116330) + + W(2, -1112668180, 1027133034, 1026809557, 1004217594) + + W(3, -1112840237, -1105630541, -1112954809, 1055950575) + + W(4, -1112907992, -1111964638, 1034269757, -1106751530) + + W(5, -1096457447, 1060800975, -1093799807, 1038388364) + + W(6, -1122801969, 1023923175, -1094266089, 1051425450) + + W(7, -1103940357, -1132895812, -1133240789, 1041797158) + + W(8, -1096385878, 1041695411, 1039766276, 1013909417) + W(9, 1016754760, -1113089188, -1106190977, 1044103184) + + W(10, -1130590086, 1032062573, -1119164014, -1130226634) + + W(11, -1114305030, 1036967948, 1008751883, 992673732); + sum2 = W(0, 1043431555, -1098806978, 1024222598, -1105154239) + W(1, 1031236307, 953732840, -1109692118, 1046643695) + + W(2, 1045861043, 1054740901, 1035789437, -1115663935) + + W(3, 1036275465, -1097975189, -1101643320, -1083291725) + + W(4, 1007109846, 1042301531, -1111890182, 1048664613) + W(5, 1058544270, 1068703829, -1081353417, 1035678115) + + W(6, -1102288351, 1049750262, -1098883379, -1080748028) + + W(7, 1031821902, -1120166561, 1045724738, -1098934938) + + W(8, 1049188212, 1058055770, -1100833476, 1047454611) + + W(9, -1109691788, 1032869755, -1107029975, -1100454574) + + W(10, 1051695731, -1096856793, 1007408138, -1111259526) + + W(11, 1048185689, -1148294292, -1114785167, 1041528279); + WS(1061652844, 1044003957); + sum1 = + W(0, -1118715405, 1022617806, 1044034950, -1101088515) + W(1, 1019688911, -1131961856, 1029075868, 1045689305) + + W(2, 1030904095, -1103346936, 1045834099, -1110934572) + + W(3, -1115471369, -1094261862, 1059625182, -1096538237) + + W(4, -1102841294, 1036052922, 1032954233, 1027529537) + W(5, 1057470859, -1086691558, 1041319556, -1148682371) + + W(6, 998963525, -1099266912, 1062131070, -1094363880) + W(7, -1166277444, 1017830866, -1111723799, 1045678528) + + W(8, -1098407445, -1105378993, 1028040508, -1109411965) + + W(9, 1036011862, -1119248992, 1047156095, -1123294521) + + W(10, -1135804811, 1030886475, -1152687195, -1108410990) + + W(11, 1037164834, 1016045215, -1106724595, -1123601124); + sum2 = W(0, -1123087716, -1115107680, 1039737456, 1045652727) + + W(1, -1106100707, -1128513463, -1122327585, -1105390433) + + W(2, -1106584843, -1139899009, -1102201417, -1114704327) + + W(3, 1034911826, 1034185422, 987338697, 1060423005) + W(4, -1111505321, 1009548801, -1115300382, 1042081129) + + W(5, -1094697399, -1103292099, -1094374885, 1026056400) + + W(6, -1147444258, 1036682846, 1048628369, -1079464406) + + W(7, 1066067653, -1112497116, 1031538244, 1043195731) + + W(8, -1100913302, -1102260713, 1062702469, -1107096307) + + W(9, -1122824244, -1112377777, 1041194171, -1101961861) + + W(10, 1013733313, -1125635761, 1026384688, 1024479380) + + W(11, 1036768370, -1106123677, 1034543612, 1037241080); + WS(1061983340, -1091535279); + sum1 = W(0, -1154828523, -1133965299, 1043906007, -1118138894) + + W(1, -1109842717, 1025343169, 1027504414, -1128653831) + + W(2, 1027190112, -1097538828, 1037523079, -1121887129) + + W(3, -1131271078, -1114200663, 1061948198, 1027560685) + + W(4, -1106988467, 1023298667, -1120408436, -1129743238) + + W(5, 1054216086, -1081158944, 1048594270, -1108007945) + + W(6, -1131629652, -1117839504, 1062537731, -1089869577) + + W(7, -1121464863, 1034422373, -1122568783, 1035797658) + + W(8, 1028696552, -1097887897, 1028427349, -1120763036) + + W(9, -1165314478, -1115205249, 1045640431, -1111951485) + + W(10, -1121440284, 1008081777, 995201141, 1019503421) + + W(11, 1044774081, -1105384996, -1119142300, -1122356070); + sum2 = W(0, 995087446, 1036608468, 965233164, 1033021468) + W(1, -1114419070, 1033025046, 1030495116, -1099055008) + + W(2, -1104807397, -1107407110, 1033869233, -1119280070) + + W(3, 1007114937, -1115487063, -1070882050, 1038417178) + + W(4, -1103406865, 1028798922, -1121939088, 1057848194) + + W(5, 1075270016, -1094462735, 1051754977, -1120762821) + + W(6, -1109334595, -1106051731, 1059573542, 1039375650) + + W(7, -1104596666, 1018124826, 1012755199, 1015526929) + W(8, -1102771000, -1124510890, 1028405316, 976567206) + + W(9, 1031009559, -1118519790, 1023903156, 1016192349) + + W(10, -1111745826, 1015800189, -1123906524, 1024955281) + + W(11, -1159976683, 1023974018, 955743793, -1129936862); + WS(1062927532, 1035014202); + sum1 = + W(0, 1039522007, -1114736246, 1037103646, -1104897563) + W(1, 1044380938, -1121785378, 1034505951, -1109500450) + + W(2, 1048942600, -1092956200, 1044086236, 1027254073) + W(3, 1040732909, 1011271190, 1053278895, -1091146839) + + W(4, 1044239087, -1145709740, 1002357921, 1044028441) + W(5, -1097469770, -1093973369, 1042916452, 981579331) + + W(6, 1016035605, 1050061010, -1085392865, 1051214559) + W(7, -1112456671, 1035761861, -1122944943, 1042353405) + + W(8, -1094832633, 1050975197, -1111813313, 1033165606) + W(9, 999726956, 1036129025, -1103977959, 1043068615) + + W(10, -1112116510, 1025151372, -1140151454, 1041525964) + + W(11, -1111797875, -1126246883, -1110444494, 1033358643); + sum2 = + W(0, 1038528863, -1084044031, -1082440456, 1050939225) + W(1, 1053868195, -1113963371, 1036214533, -1091834288) + + W(2, -1077502553, -1095085693, 1028074081, -1114781385) + + W(3, 1049223347, -1110305241, 1049812579, 1050231565) + W(4, 1046376147, 1035810200, -1113699843, 1042795689) + + W(5, 1068178608, 1060273714, -1132635925, -1160679206) + + W(6, 1041716852, 1050774414, -1092073446, -1115682722) + + W(7, -1098688235, 1036026944, -1100571883, 1043118649) + W(8, 1047476754, -1105112612, 1017697398, 1016345266) + + W(9, 1017295180, 1047177880, 1034666293, 1050691511) + W(10, -1092800689, 1040315316, -1101978327, 1050263507) + + W(11, 1054410728, -1101197699, -1104788145, -1112282035); + WS(-1086906028, 1058773265); + sum1 = W(0, -1132629757, 1044191380, 1032367334, -1096691584) + + W(1, -1152318558, -1109034905, 997844200, 1043197109) + W(2, 1050808506, -1095732027, 1042781664, 1031594505) + + W(3, 965362145, -1125429564, 1058285756, -1086608026) + + W(4, 1034636741, -1119832870, -1106996409, 1033453658) + + W(5, 1062016570, -1081591954, 1043388193, -1106441585) + + W(6, -1153797548, -1118095697, 1044397873, 1041881517) + + W(7, 1045470808, -1149103576, -1120050655, -1121403972) + + W(8, 1043993638, -1096430215, -1115428219, -1113585524) + + W(9, -1122322046, -1119804562, 1042486572, 1002316636) + + W(10, 1018594522, -1135705587, 1015091632, 1026125167) + + W(11, 1047754366, -1099777504, 1024196285, -1122333654); + sum2 = + W(0, 1048104076, -1096548381, -1107021820, 1018456960) + W(1, -1136070657, -1112321878, 1038532068, -1086001730) + + W(2, -1131935456, 1007620609, 1040453646, 1033875386) + W(3, -1129915388, -1083843015, 1060056432, 1033171254) + + W(4, -1111662792, -1117768272, -1101156186, 1035944594) + + W(5, 1066609131, 1050949241, -1110531416, 1032538050) + + W(6, -1128179824, -1102068111, 1027352212, -1107874139) + + W(7, -1104445548, -1114035189, -1128045264, 1044127780) + + W(8, 1048571914, -1104179329, -1127646596, 1032855260) + + W(9, -1131782160, -1114990278, -1120175088, 1038875644) + + W(10, -1110249452, -1144805954, -1109473444, 1041203218) + + W(11, 1038985844, -1112054433, 1034344442, -1130080864); + WS(1046014126, 1065769758); + sum1 = + W(0, 1012096182, -1121314108, -1091239207, 1052142286) + W(1, 1042130570, 1019346753, -1112076254, 1039437825) + + W(2, -1094624040, 1048611696, -1109865911, -1152131961) + + W(3, -1100987245, 1043473803, -1091961473, 1060125676) + + W(4, -1128757714, -1123465492, -1106802021, 1045721640) + + W(5, -1088552177, 1062377804, -1119711185, 1039063160) + + W(6, -1106286530, 1049347084, -1089539427, 1053897896) + + W(7, -1128945819, -1114302645, -1129503003, -1109380841) + + W(8, -1091526873, 1057150062, -1119080717, 1015225093) + W(9, 1021132153, 1049245244, -1091375076, 1048631067) + + W(10, 1034078686, -1135518836, -1112401299, -1120677869) + + W(11, -1088888106, 1057709495, -1122337355, 1014030866); + sum2 = + W(0, -1089976447, 1064913799, -1094573985, -1109226965) + W(1, 1030445629, -1115506338, -1078304891, 1068350548) + + W(2, 1019506715, -1120561974, 1021062218, 1018259030) + W(3, -1071851632, 1070575185, 1065710309, 1002672995) + + W(4, -1127826384, 1012290398, -1070679577, 1072673707) + W(5, 1065504629, -1098180006, 1031139735, 995896746) + + W(6, -1081022179, 1063744071, 1056839493, -1117214353) + + W(7, -1112765526, 1010152224, -1113339548, -1128451504) + + W(8, -1124929726, -1111255228, 1037321868, -1118014213) + + W(9, 1011136331, 1040942179, -1106886716, 1028658493) + W(10, -1127063938, 967860273, 1035458322, -1115281660) + + W(11, -1111465381, 1033265670, 1001938923, -1119523230); + WS(-1096468055, 1053785380); + sum1 = W(0, -1110473361, -1116957735, 1034879511, 1017136705) + + W(1, -1116669644, -1118632404, 1020813409, -1114360891) + + W(2, -1114475674, -1111337627, -1135792006, -1120549867) + + W(3, -1111899059, -1123200592, -1109008383, 1028379180) + + W(4, -1105125143, -1162426624, 1027946631, -1090968927) + + W(5, 1066746382, 1051825655, -1094808947, -1173354793) + + W(6, 1003556839, -1107232681, 1064704330, -1094442401) + + W(7, -1116312257, -1119514205, -1143096715, -1117882342) + + W(8, 1046069775, -1129057075, -1106172505, -1113957025) + + W(9, 996386562, -1118523124, 1042611044, -1114700263) + + W(10, -1135525501, -1130080208, -1123588698, -1105979736) + + W(11, 1040361862, 1022269000, -1106276184, -1123525720); + sum2 = + W(0, -1127419095, -1122531649, -1111543417, 1043407189) + W(1, 1013519927, -1134679399, -1140125367, 1025350612) + + W(2, -1108347637, 1028851928, -1117946771, 991440379) + W(3, 1028088432, -1123116186, -1101718819, 1048774619) + + W(4, 1041147594, -1127107007, -1126099139, 1020760995) + + W(5, 1056423339, -1086358248, -1112971994, -1115422093) + + W(6, -1114986450, 1019570567, 1073762237, -1076137651) + W(7, -1118107520, 1037345920, 1024738032, 1039021380) + + W(8, -1100544782, 1044261707, 1015761083, -1103403994) + + W(9, -1114843176, 1018690059, 1049061203, -1099227611) + + W(10, -1111772147, 1042824177, 1023949256, -1119198021) + + W(11, 1026425060, -1123270518, 1027419728, -1129333383); + WS(-1089097708, -1091261619); + sum1 = W(0, -1127316247, 1003986316, -1111263852, 1044425283) + + W(1, 1033605983, -1116984894, -1113526029, -1120011361) + + W(2, -1112997295, 1047767320, -1118942860, -1121673366) + + W(3, -1118797136, 1046355762, -1081822066, 1061869832) + + W(4, -1110810403, 1027944882, -1123552881, -1103137072) + + W(5, -1100403388, 1058200050, 1009152314, -1124559262) + + W(6, -1123795687, -1103576251, 1053114278, -1092855018) + + W(7, 1046189751, -1131347855, 1015138359, 1032220600) + + W(8, -1138618770, -1160534136, 1031283151, -1118591413) + + W(9, -1129675572, -1120446878, 1007314666, 1034662460) + + W(10, 1016196275, -1131488026, -1126584008, 1027028747) + + W(11, -1123471427, 1018063772, -1128724178, -1122517738); + sum2 = + W(0, -1151923233, 1040701127, -1110923755, -1103532415) + W(1, 1041909162, -1121169096, 1026311687, -1109901381) + + W(2, 1024679085, 1016651554, 1036375055, -1135341024) + W(3, 1025121279, 1034378939, -1080291839, -1082840291) + + W(4, -1110170843, 1041541680, -1123775834, -1092342947) + + W(5, 1071256357, 1066054209, -1090243220, -1128413506) + W(6, 1033596173, -1096202739, 1042190483, 1056834874) + + W(7, -1097275958, 1043004954, -1105542037, 1051958537) + + W(8, -1103925783, -1199035157, 1038401459, -1097944284) + + W(9, 1035290939, -1098166743, 1046386099, 1035522294) + + W(10, -1113079432, 1042529081, -1108107550, 1047977685) + + W(11, -1114731953, 1030555165, 1015019440, -1099750845); + WS(1061142188, 1045552914); + sum1 = + W(0, 1022384747, 1020461367, 1058732728, -1095805239) + W(1, -1101590238, 1033080372, 1019603337, -1114371061) + + W(2, 1054684529, -1098135841, -1123665301, 1031766591) + W(3, 992843677, -1137489184, 1060634392, -1085459239) + + W(4, 1027392977, -1123755612, 1025369499, -1128616851) + + W(5, 1056292747, -1081060320, -1124699057, -1116775033) + + W(6, -1149984013, -1114859644, 1061516204, -1083596540) + + W(7, 1038698931, 1034158665, 1030794181, -1134916072) + + W(8, 1058073341, -1097701630, -1106352164, -1134014242) + + W(9, -1123167236, -1114858116, 1052882077, 1026645922) + W(10, 1006025241, 1037240581, 1034453383, 1009189588) + + W(11, 1050605022, -1099032542, -1100750969, -1122655396); + sum2 = W(0, 1041010686, 1045355628, -1091459780, -1092997038) + + W(1, 1025085151, 1019331790, -1100296712, -1110840017) + + W(2, -1109934927, -1109499465, -1110570114, -1115246837) + + W(3, 1042377248, 1028443719, -1120459763, 1023984711) + + W(4, -1120816159, 1025130703, -1124701214, -1097271182) + + W(5, 1068946970, 1068602839, -1090645327, 1036287199) + W(6, 1002573079, -1108318217, 1048538630, 1049236461) + + W(7, 1036483715, 1028224647, 1026372231, 1034868187) + + W(8, -1095496747, -1112347607, -1110453128, -1106754238) + + W(9, -1107309152, 1032249619, -1112114883, -1088682480) + + W(10, -1121128467, 1040581836, 1022594646, 1034940919) + + W(11, -1096153349, 1036467963, -1105549173, -1106253388); + WS(-1090464684, 1058848194); + sum1 = W(0, 1038070654, 1035807069, 1059156269, -1091590511) + + W(1, 1049775661, -1111666744, -1104784595, -1106022588) + + W(2, 1027584334, -1104101108, -1100086527, -1128618637) + + W(3, -1140651524, -1107342224, 1060192817, -1084437581) + + W(4, 1050662239, -1128690758, 1048797564, -1112865880) + + W(5, 1062566262, -1088742755, 1037690576, -1097281200) + + W(6, -1115845272, -1102596230, 1060113046, -1087393870) + + W(7, 1050391262, 1024450616, -1115304422, -1117573217) + + W(8, 1043041013, -1095911543, -1117869728, -1151911514) + + W(9, 1022142646, 1011767229, 1054886405, -1102839322) + + W(10, 1042443855, -1131868338, 1026901973, -1105151431) + + W(11, 1057383999, -1088896680, 1039830925, -1104457275); + sum2 = + W(0, -1106952896, -1173421195, -1113206734, -1136957987) + + W(1, -1152575873, -1112584615, 1050458431, -1100970530) + + W(2, 1037511626, 1029901269, -1116273053, 1045345833) + + W(3, -1084179073, -1117103305, 1063723378, -1106205415) + + W(4, 1039247706, 1031320371, -1068828969, 1049093414) + W(5, 1078393261, -1105390455, 1034474614, -1099252776) + + W(6, -1082769727, 1015202727, 1065452472, -1128426057) + W(7, 1032907306, 1035573879, 1054525389, -1093960872) + + W(8, -1106048166, 993579659, -1100143257, 1040309898) + W(9, -1098029126, 1043872404, 1031385535, -1114758982) + + W(10, -1155923695, -1124055116, 1038352192, -1107157710) + + W(11, 1030855073, 1034755506, 1034533524, -1124561432); + WS(-1106782638, -1120193880); + sum1 = W(0, 1020741911, -1116040418, 1053259451, -1112821605) + + W(1, -1101282812, -1111424287, 1021367862, -1121939617) + + W(2, 1050945756, -1100446872, 1049499553, -1129092267) + + W(3, 1038644232, -1109539853, 1055524943, -1091353929) + + W(4, -1100504250, 1016236516, -1126436322, -1098608828) + + W(5, 1065255335, -1085596156, -1101475552, 1033532871) + + W(6, 1043522250, -1101880327, 1061719830, -1096552606) + W(7, 1025835692, 1022700239, -1112066734, 993980163) + + W(8, 1048874997, -1090468372, -1114054607, -1114668904) + + W(9, 1018349205, -1115252655, 1051405991, -1106280862) + + W(10, 1027947689, -1147278953, 1033129938, -1133833389) + + W(11, 1042923217, -1102788916, -1104899626, 1028803059); + sum2 = + W(0, -1113068388, -1115580494, 1039529972, -1091718951) + W(1, -1076395174, 1046645532, 1036673453, 1024834555) + + W(2, -1098466798, 1052150318, -1080701891, -1104586573) + + W(3, -1118880240, -1115483686, 1043501593, 1043613567) + W(4, 1068474066, 1009076111, 1028813378, -1117546792) + + W(5, -1115055672, 1062484975, 1066750300, -1093753132) + + W(6, -1107832014, 1043490929, -1117282076, 1045088246) + + W(7, -1097140180, 1036864955, 1027909744, -1105092918) + W(8, 1042504898, -1094456673, 1049333443, 1022104546) + + W(9, -1122240512, 1040415904, -1107365158, 1030923561) + + W(10, -1109881554, 1033514298, 1011592783, -1119502452) + + W(11, -1112704202, -1123953380, 1043999999, -1115391680); + WS(-1093231703, 1042706757); + sum1 = + W(0, 1032826937, 1040624182, 1058499572, -1084803183) + W(1, -1104136529, 1041579305, 1026075841, 1000558899) + + W(2, 1052553144, -1099119122, 1025447538, 1031885534) + W(3, 1032961819, -1110755278, 1061343357, -1083389465) + + W(4, -1114092422, 1032264780, 1036684171, -1127770315) + + W(5, 1062021249, -1081334022, -1112317122, 1018739433) + + W(6, 1034757510, -1106938311, 1060752603, -1087386813) + W(7, -1132554758, 1031902455, 1036416025, 1019575916) + + W(8, 1057695045, -1090036539, -1109030926, 1037874311) + + W(9, 1015758976, -1114624210, 1051786740, -1098198604) + W(10, 1012060669, 1027119072, 1040004261, 1042264396) + + W(11, 1058906011, -1088285503, -1096739300, 1035404188); + sum2 = + W(0, -1134545280, 1057942802, -1072425931, -1067197419) + W(1, 1049446840, 1074141781, -1101426979, 1047102485) + + W(2, -1084592242, -1071603316, 1042726179, 1060445087) + + W(3, 1012752443, 1055001204, -1079302223, -1077093049) + W(4, 1050812050, 1060645469, 1036306510, -1096203167) + + W(5, -1123894686, -1098326846, -1129334697, 1037955576) + + W(6, 1034320105, -1114902087, 1062181631, 1072541263) + + W(7, -1097319760, -1094473234, -1111393843, -1096420171) + + W(8, 1062084093, 1067609499, -1093274589, -1087350702) + W(9, 1046638158, -1101073471, 1064536824, 1073214169) + + W(10, 1001153868, -1085693573, -1108867307, -1091559913) + + W(11, 1073505700, 1078744623, -1094526709, -1077663090); + WS(-1075107862, 1049521772); + sum1 = W(0, 1027861131, 1045206278, -1086495609, 1051717721) + W(1, 1033503081, 1031434621, 987763517, 1041173528) + + W(2, -1097093508, 1049572700, -1116084523, -1144572069) + + W(3, -1129668766, 1032319275, -1086275954, 1055584810) + + W(4, 1009361560, -1140119493, 1033675992, 1040210256) + W(5, -1081944455, 1064397290, 995768539, 1027980366) + + W(6, 1016072268, 1041485791, -1082045908, 1059363461) + W(7, -1112276757, -1129633421, 992091051, 1044520444) + + W(8, -1096923684, 1055988184, -1112468587, 1024276072) + + W(9, 1026084697, 1042655839, -1093981119, 1051323143) + + W(10, 1005879363, 1000619058, -1120420163, -1112688453) + + W(11, -1094413520, 1050124978, 1033578820, 1025196738); + sum2 = + W(0, 1033523983, -1091543751, 1048034710, 1051542012) + W(1, 1024239879, 1041022978, -1107297920, -1125142534) + + W(2, 1031510683, -1103344470, -1117305947, -1121264057) + + W(3, -1109353447, -1100365589, 995045613, -1120961303) + + W(4, -1099173366, -1115033235, -1097553497, -1121112319) + + W(5, 1061520608, 1061214811, 1044020024, 1053722991) + W(6, -1102290887, -1119424675, 1042981818, -1094266425) + + W(7, -1093433218, -1097363289, 987526362, -1094244068) + W(8, -1103162896, 1043830132, 1046119446, 1047286054) + + W(9, 1022298470, 1033606211, 1052169844, 1041952964) + W(10, 1043499670, -1106943326, 944749371, -1100627225) + + W(11, -1098189213, 1041818078, -1096785580, 1031561067); + WS(-1097146583, -1102489480); + sum1 = W(0, -1121239074, -1102624204, 1041928668, 1043173384) + + W(1, -1118777253, 1016656554, 1035066361, 1033907292) + W(2, -1096659727, 1051162503, -1108685857, 995611334) + + W(3, -1110655283, -1104909116, -1123609439, 1057278053) + + W(4, -1102566959, 1009112868, 1032257412, -1109600459) + + W(5, -1091326393, 1050132159, -1100309385, 1037330963) + + W(6, 1020382076, -1113387282, -1105975127, 1049365769) + + W(7, -1101876333, -1111738159, 1014217354, -1114213060) + + W(8, -1098203983, 1049080395, 1040792061, 1033561904) + W(9, 1032256145, -1114763275, 1045101428, 1033542535) + + W(10, -1121020400, -1149295615, -1120529307, -1123950514) + + W(11, -1093952115, 1055699093, 1035959564, -1122689753); + sum2 = + W(0, 1030001048, 1039849292, -1106927121, 1046837828) + W(1, -1131959957, -1116147800, -1107119695, -1106305989) + + W(2, 1060202814, -1095733546, 1012779430, -1124391765) + W(3, 1023713698, 1019090837, -1104587624, 1043085465) + + W(4, 1026454649, 1028873647, -1115026335, 1035696383) + W(5, 1057308499, 1053508746, 1038120705, -1104339358) + + W(6, 1024715164, 1036481868, 1047684818, 1034862818) + W(7, 1037173687, 1045294658, -1112691697, 1023388499) + + W(8, -1112160637, 1046794790, -1101425448, -1119391248) + + W(9, 1024758511, -1146519572, -1093257675, -1094465043) + + W(10, 1036335613, -1156574008, -1147275412, -1112564365) + + W(11, 1041456307, -1083960309, -1094791365, 1012709310); + WS(1057303084, -1084740383); + sum1 = + W(0, 1025949163, -1105987664, 1034968156, -1097863693) + W(1, 1046214631, -1130932027, 1023904890, -1104417576) + + W(2, 1048292239, -1093717272, 1042713619, -1126920426) + + W(3, 1033327896, -1097440039, 1057303604, -1088500414) + W(4, 1043920086, 1012093510, 1022060320, -1106403568) + + W(5, -1134851644, 1058205452, 1041892896, 1040636140) + W(6, -1131905805, 1043670267, -1082452830, 1059877080) + + W(7, -1101986947, 1028551783, -1114170862, 1041763972) + + W(8, -1089247692, 1058731430, -1107457363, 1041063330) + W(9, 1021513971, 1037885311, -1093401571, 1046703382) + + W(10, -1110573172, -1143718184, -1109381346, 1019385429) + + W(11, -1097845612, 1049842950, 1031126152, 1016340900); + sum2 = + W(0, -1097593869, -1100989715, 1026566131, 1061153405) + W(1, 1050695083, -1095509343, 1040446621, 1036240143) + + W(2, -1100851860, 1047874592, -1096668864, 1040237386) + + W(3, -1107347821, -1105186205, 1046892012, -1103469514) + + W(4, -1104864053, -1114614656, 1036994425, -1119554977) + + W(5, 1069594194, 1074408805, -1103368475, 1040210470) + W(6, 989701019, 1048367016, -1107809286, -1110319007) + + W(7, -1100341899, -1105386934, 1043906150, -1095197585) + + W(8, 1031148367, -1079941721, -1084422687, 1031716423) + W(9, 1040271244, -1102913875, 1038853877, 1042791078) + + W(10, -1093040701, 1036206107, 1016581730, 1041272891) + + W(11, -1105136265, -1086296266, -1099178193, 1042594729); + WS(-1085311468, 1078025451); + sum1 = + W(0, -1114246985, 1033236492, -1126048573, 1051596674) + W(1, -1110129580, -1109388771, -1123523234, 1042717314) + + W(2, -1100167817, 1050764301, -1101525701, 1021849401) + + W(3, -1123774423, 1046675224, -1090923842, 1057423711) + W(4, -1102550347, 1031509177, 1017942127, 1030027079) + + W(5, 1002547123, -1138337956, -1117387765, -1126623147) + + W(6, 1018695644, -1107516674, 1061445386, -1085962671) + + W(7, 1045061525, -1118825262, -1134274761, -1105985470) + + W(8, 1051174149, -1101362115, 1037275627, -1107617348) + W(9, 973249598, -1118366597, 1032724353, -1098415672) + + W(10, 1033244340, -1124427728, 1032511109, -1108369993) + + W(11, 1047097650, -1105953621, 1025818592, -1115961017); + sum2 = + W(0, -1123353384, -1131031713, 1029311455, 1047725718) + W(1, 1051601031, -1113968956, 1030359787, -1114557735) + + W(2, -1123787418, 1042470619, -1121374979, 1045218010) + + W(3, 1024787123, -1125418898, -1099438470, -1106551206) + + W(4, 1049816965, 1044805763, 1015567503, -1132430472) + W(5, 1049323738, -1097880179, -1088308991, 1039658119) + + W(6, -1115408082, 1029672716, 1042547290, -1089568094) + + W(7, -1098255621, 1042126156, 1026077135, -1118070691) + + W(8, 1050505809, -1166364565, 1026888229, -1115384233) + + W(9, -1137211259, -1125125186, -1110579463, -1115324497) + + W(10, -1112338809, 1043221816, 1015951538, -1135395683) + + W(11, 1040847724, 1028809582, -1102185033, 1014731468); + WS(1060142060, 1042832150); + sum1 = + W(0, 1031397804, 1036648197, 1052129567, -1093104885) + W(1, -1125587404, 1030014814, -1121488955, -1142445829) + + W(2, 1051097024, -1094159296, -1138833071, 1024131190) + + W(3, 1033996248, -1117943062, 1054892021, -1086111346) + W(4, 1028704007, 1024270930, 1018827426, -1111782928) + + W(5, 1065668620, -1081757070, 1032794495, -1123778927) + + W(6, -1131717262, -1108508447, 1059881804, -1087336873) + + W(7, 1047817147, 1022586319, 1033609702, -1106903604) + + W(8, 1058770516, -1090845580, -1104469332, -1125662417) + + W(9, 1010765225, 1015606653, 1050176059, -1100487494) + W(10, 1038701961, 1032196289, 1029196868, -1115637854) + + W(11, 1054801908, -1126749404, -1091639031, 1019613157); + sum2 = + W(0, 1016709191, 1043282989, -1105914972, -1104055195) + W(1, -1113893232, 1031619176, 1013621039, -1105412866) + + W(2, 1048947993, 1038685325, -1143937773, -1128320153) + + W(3, -1134054043, 1034830675, -1104630078, -1084033248) + + W(4, -1103493551, 990218298, 1033829499, -1123744731) + + W(5, -1088942993, -1072142874, -1098435347, 1026126526) + + W(6, -1133247239, -1107183956, -1093767876, 1073971039) + + W(7, 1052470850, 1042783259, 1014723479, -1110083630) + W(8, 1055362845, 1076386430, -1104225446, -1103699955) + + W(9, 1025542948, -1115844306, -1111303066, -1098251720) + + W(10, 1051939911, -1123473137, -1113297030, -1097083153) + + W(11, 1064727508, -1090433741, -1091328171, 1025388740); + WS(-1086199532, 1054743650); + sum1 = W(0, 1034444474, 1032011277, 1048862696, -1097849454) + W(1, 1006964268, 1027368614, 1027498242, -1095519909) + + W(2, 1053262121, -1090625833, 1040908045, -1113781807) + + W(3, 1040848158, -1107222607, 1061389071, -1084999869) + + W(4, 1038795271, -1118338586, 1023783640, -1152811127) + + W(5, 1030342167, -1113365765, 1038235355, -1108967739) + W(6, 999869046, 1044283945, -1084989084, 1060145155) + + W(7, -1104327823, 1006059384, -1115850147, 1044573214) + + W(8, -1095010778, 1054594461, -1101807134, 1028815960) + + W(9, 1025498402, 1036275699, -1106712265, 1045643928) + + W(10, -1122717633, -1124545277, -1134397103, -1123084051) + + W(11, 1032418005, 1033574715, 1015954881, -1135640905); + sum2 = + W(0, -1098769930, 1042608106, -1101852353, -1093087039) + W(1, -1139600545, 1036192806, 1041383757, -1089209738) + + W(2, -1121408570, 1038811718, -1093098827, -1121276186) + + W(3, 1031909130, -1102323575, -1108783974, -1089123880) + + W(4, -1121335963, 1045867083, 1040055926, -1105226719) + W(5, 1065973699, 1069194195, -1098519743, 1023817866) + + W(6, -1112037460, -1124786133, 1049738412, 1026907074) + + W(7, -1089198426, 1042688929, 1040275414, -1117623756) + + W(8, 1057999228, -1106112227, -1101165651, -1122635193) + + W(9, -1111784639, -1131302943, -1111524780, 1020629449) + + W(10, -1109243346, 1041146873, -1113621249, 1048083051) + + W(11, 1025491254, -1107530612, 1048264427, -1095495518); + WS(-1090983255, -1089207473); + sum1 = + W(0, 1011536056, -1126565928, 1055178415, -1101680475) + W(1, -1102945718, 1026884961, 1031895551, -1113602338) + + W(2, 1054038520, -1097154360, -1121652462, 990918528) + + W(3, -1113368985, -1111099810, 1055473117, -1089575247) + W(4, 994604183, 1022227241, 1041335941, -1099647257) + + W(5, 1064470249, -1084973552, -1121284913, -1151944468) + + W(6, -1112820133, -1102367072, 1063942234, -1086493203) + + W(7, 1041044307, -1119623498, 1033755607, 1030939267) + W(8, 1050197537, -1097159533, -1111487017, 1029397803) + + W(9, -1120463213, -1111909392, 1058279198, -1096914715) + + W(10, 1017418960, -1127714823, 1029129223, 1034973975) + + W(11, 1042393881, -1096995926, -1111098786, 1024652821); + sum2 = W(0, 1008868714, -1138668162, 988655570, -1108603395) + W(1, 1033342078, 1018452757, 1000134020, 1023930211) + + W(2, -1099344177, 1041622837, 1025638720, -1115444075) + + W(3, -1131179173, -1108628571, -1064898888, -1131013053) + + W(4, -1133763810, 1023801183, 1026383992, -1114490385) + + W(5, -1066369231, -1087427617, 1033730604, -1118168395) + + W(6, -1108980541, 1047990867, 1075540559, -1095163420) + + W(7, -1117896039, 1033324797, 1033178726, 1030525898) + W(8, 1086324754, 1057379348, 1031531103, -1116567631) + + W(9, -1113100227, -1131668265, 1049763634, 1040828448) + + W(10, -1119632759, 1028372534, 1025874080, -1138114226) + + W(11, -1100090533, -1109206595, 1025251971, -1128577333); + WS(-1109283164, -1114463829); + sum1 = W(0, 1026307305, 1048505142, 1056046207, -1087851802) + W(1, -1097387850, 1043738183, 1010065376, 1006853751) + + W(2, 1051849183, -1096622464, 1034233556, 1033008793) + + W(3, 1036887575, -1122796048, 1060373686, -1086026376) + + W(4, -1106692609, 1030721871, 1032762791, -1121781501) + + W(5, 1062361681, -1080983673, -1116217316, 1031320492) + + W(6, 1038442146, -1106474773, 1060935143, -1086759863) + + W(7, -1136008963, 1024808165, 1036926348, -1140109366) + + W(8, 1058739944, -1088322398, -1111740214, 1037732587) + + W(9, 983730417, -1113784606, 1052729243, -1098544579) + W(10, 973353381, 1028455576, 1042339968, 1033209828) + + W(11, 1060958338, -1085562739, -1098996574, 1032429188); + sum2 = W(0, 1033103311, -1088826877, 1076551262, 1082157528) + + W(1, -1088761642, -1073488400, 1036675590, -1093870598) + + W(2, 1064863709, 1075036637, 1044533603, -1083381518) + W(3, 1035316039, -1096260803, 1066303868, 1074566779) + + W(4, -1088656095, -1087052440, -1139597621, 1038770367) + + W(5, 1054257493, 1054121358, -1096732938, 1034260530) + + W(6, -1104037187, -1120383141, -1086745184, -1075712309) + + W(7, 1026461691, 1055295113, 1036924272, 1054470917) + W(8, -1080791393, -1075955022, 1058516172, 1062460380) + + W(9, -1111374359, 1048854624, -1081541832, -1073360354) + + W(10, 1036676016, 1061463967, 1033604811, 1052968175) + + W(11, -1074269835, -1068055688, 1036957577, 1072405502); + WS(-1073690779, -1087724268); + sum1 = W(0, 1020006630, 1040645890, 1037569760, 1040537741) + W(1, -1102207443, 1011752087, 944113971, 1024788392) + + W(2, 1033313762, 1022063376, -1117962707, 1010250996) + W(3, 1028328440, 1048604230, 1026748704, 1037819609) + + W(4, 1018608989, 1035208397, 1026519064, -1124203663) + + W(5, -1097395091, -1081034849, 1030158086, -1107990331) + + W(6, 1023877839, 1026558293, 1058432566, -1089181115) + W(7, 1048090929, 1030700463, 1026744019, -1108217132) + + W(8, 1052406658, -1095782133, 1028902900, -1115106384) + + W(9, -1116930962, 1024907750, 1046357449, -1103915032) + + W(10, 1040820950, 1006665970, 1037411798, -1124509669) + + W(11, 1052615509, -1095058991, 1037074644, -1120315546); + sum2 = + W(0, 1036147454, 991782552, -1100036784, -1102354425) + W(1, -1132125723, 1037581214, -1115074830, 1025479068) + + W(2, -1096384442, 1031211776, -1114551496, -1107514244) + + W(3, -1143660011, -1111873890, 1042154750, -1098733186) + + W(4, -1120425773, 1036528495, -1109526737, 1054093522) + W(5, 1036720443, 1061883500, 1042458762, -1113752408) + + W(6, -1113309480, 1016478163, 1015496998, 1040999099) + W(7, -1118319659, -1131440878, 1006285882, 1035621794) + + W(8, -1111840809, -1099055155, 1021928735, 1009305230) + + W(9, -1115649745, 1027286665, 1028637267, -1117281017) + + W(10, -1113807339, -1115311515, 1026243607, -1108696429) + + W(11, 1024623035, -1120753705, 1033776741, -1117838632); + WS(1053167575, 1034582410); + sum1 = + W(0, 1023109013, -1135820801, -1107146278, 1042183330) + W(1, -1106326923, 1038375967, -1105499654, 1041430288) + + W(2, 1045138183, 1015167422, 1048594337, -1103059717) + W(3, 1047073940, -1099071824, 1006706653, -1110119411) + + W(4, -1101264321, 1036901240, -1103829756, 1051022760) + + W(5, -1098544246, -1114407921, 1042439875, 1019390619) + + W(6, -1134567319, -1103780951, 1035133548, 1029794991) + + W(7, -1115149997, -1122256838, 1029509866, 1038026111) + + W(8, -1101742889, 1025810371, -1124355817, 1039002364) + + W(9, -1107075490, -1144576488, 1040882462, 1020673517) + + W(10, -1133994400, -1113322768, 1028951523, 1034415069) + + W(11, -1110494113, -1144644710, 1017843520, 1025775953); + sum2 = W(0, 1007469645, -1127684796, 1026289720, -1134400734) + + W(1, -1121586613, 1012304630, 1024932078, -1122451669) + + W(2, 1029479215, -1114161855, 1018968072, 1018258842) + W(3, -1123529450, 934668744, 1048216767, -1092503803) + + W(4, 1055026065, -1105610792, -1106895484, 1046133075) + + W(5, -1101979386, -1118826374, 1057475450, -1091642753) + + W(6, -1154152567, 1050482770, -1086100405, 1054881826) + + W(7, 1057991283, -1094138317, 1026396887, -1100501045) + + W(8, -1104578417, 1054831843, 1037697257, -1097381938) + + W(9, 1034932657, -1112663810, -1110628430, 1037309396) + + W(10, 1041220013, -1108645190, -1118589447, -1145059796) + + W(11, -1120799353, 1034243151, 1035239909, -1109776821); + WS(1065106092, 1009251236); + sum1 = W(0, -1127594260, -1119086128, -1106710454, 1041658930) + W(1, 1028936149, 999711717, 1015119794, 1041681344) + + W(2, -1108480942, 1053952044, -1098626929, -1128362792) + + W(3, -1105935716, 1038033220, -1085761952, 1059673337) + + W(4, -1106493133, 1030156593, 1030756271, -1103164984) + + W(5, 1048660750, 1035410928, 1042075767, -1105711219) + + W(6, 1015463627, -1097628571, 1062385221, -1083076739) + + W(7, 1048795361, -1109139763, 1033714469, -1096661826) + + W(8, 1057312254, -1097132616, 1042769246, -1109546740) + + W(9, -1121831017, -1162082196, 1045270286, -1100422942) + + W(10, 1033263484, -1128821631, 1032817313, 1037645501) + + W(11, 1038003425, -1134248242, -1109564844, -1113038632); + sum2 = + W(0, 1026504012, -1111982251, -1115575890, 1025515301) + W(1, 1038047283, -1121134093, -1115537349, -1109318355) + + W(2, 1037274256, 1036029355, -1140512157, -1118959675) + + W(3, 1004448402, -1126991046, 1032461625, -1105715099) + W(4, 1031934837, 966777949, -1120289237, -1114485349) + + W(5, 1046084064, 1035923056, -1127252256, 1001443794) + W(6, -1116193727, 1031030421, -1104386736, 1039615788) + + W(7, 1008094609, -1126229172, 1023273402, 1036795295) + W(8, 1033981186, 1034803763, -1108137243, 999509622) + + W(9, -1106169890, 1038895038, 1034486696, -1111316562) + + W(10, 1013721630, -1115420717, -1116296115, 1037382021) + + W(11, 1014573341, -1126611440, -1109430807, -1121268263); + WS(1058235500, 1054922309); + sum1 = W(0, -1126843838, -1110452882, -1096439037, 1054965252) + + W(1, -1126708475, -1113819273, -1115091447, 1026999796) + + W(2, -1109530936, 1035486938, 1040994960, -1112039092) + + W(3, 1037063794, 1027596138, -1126567064, 1052521111) + W(4, 1048136923, 1029145158, -1097782353, 1043595317) + + W(5, -1080972858, 1054616871, -1135709192, -1103625129) + + W(6, 1017345397, 1026915457, -1087153329, 1059777872) + + W(7, -1117730271, -1113712109, -1106496120, 1051692426) + + W(8, -1092439359, 1055199219, 1025849689, -1133876016) + + W(9, -1124168315, 1033470169, -1096481402, 1049045985) + + W(10, -1109862614, -1120397392, -1111050604, 1047889704) + + W(11, -1101856358, 1042302776, 1048178521, 1018059916); + sum2 = + W(0, 1032658614, -1104671284, 1033357456, -1107649550) + W(1, -1107785448, 1014985523, 1026047005, 1026757765) + + W(2, -1146410551, -1093450531, 1045006287, -1108588814) + + W(3, 1016793814, -1106565422, -1129670058, 1029536101) + W(4, 1032863350, -1114405495, 979115576, 1040092458) + + W(5, 1049738304, 1048902638, 1031141497, 1032866808) + W(6, 1025048255, -1118382609, 1047130549, 1043845633) + + W(7, 1024835039, -1128051766, -1120340424, 1024555245) + W(8, -1099524683, 1031379649, 1016575718, 1011865867) + + W(9, -1124528004, -1115048442, -1114147283, 1015256822) + + W(10, -1115840886, 1016407002, -1113479651, -1102456829) + + W(11, -1143577495, 1032535412, -1140327483, -1121997281); + WS(1050344663, 1040331465); + sum1 = + W(0, -1119034750, -1123846924, 1038870821, 1016005498) + W(1, -1113185550, -1180899430, 1027008397, 1032620906) + + W(2, -1145305726, -1104369525, 1047408652, -1117862216) + + W(3, -1123135107, -1107606109, 1045963311, -1101093248) + + W(4, -1106188838, 1031141157, 1034001074, -1113228665) + W(5, 1055978044, -1091899011, 1019406818, 1025902913) + + W(6, -1165785906, -1104523524, 1061494191, -1089807144) + + W(7, -1133495032, 995937181, -1109811775, 1044228977) + W(8, 994921841, -1100340494, 1039673315, -1126170822) + + W(9, 1012082163, -1136436803, 1043858443, -1113047285) + + W(10, 1003314783, -1155810234, 1021442477, 1015486907) + + W(11, 1032566715, -1121306753, -1110918342, 1012720916); + sum2 = W(0, 1017323653, 1006857268, 1027026905, -1096858156) + W(1, 1057582691, -1108630874, 983063098, 1027629321) + + W(2, 1041250623, -1096058068, 1069736593, -1103954988) + + W(3, -1124332548, 1040636195, -1098379711, -1081757810) + + W(4, -1098564102, 1039138402, -1136855038, 1050922570) + + W(5, -1095854749, 1046692173, -1092586095, 1040290673) + + W(6, 1038193266, -1107256285, -1124475180, 1055388353) + + W(7, -1101296593, 1026430908, -1115428487, 1024268384) + + W(8, -1107079153, 1044471504, -1099842491, -1135051275) + + W(9, 1037749325, -1105738834, 1035306909, 1018283467) + + W(10, -1130236866, -1127494735, -1162541650, -1112368096) + + W(11, 1041041721, 1034757871, -1116072515, -1128054004); + WS(1064629676, -1098084920); + sum1 = + W(0, 1033302207, 1017516913, 1037844037, 1004101612) + W(1, -1100831258, 1023773169, 1017176603, 986968687) + + W(2, 1048067236, -1094217842, 1032944847, 1036346276) + W(3, 1043155505, -1093133598, 1059745345, -1107275372) + + W(4, -1118901210, -1109611315, 1036523218, -1136867787) + + W(5, 1054752956, -1083111259, -1111571193, 1036061697) + + W(6, -1135635048, -1099849236, 1054803234, 1036740290) + + W(7, -1104247276, -1130856180, 1036902751, -1112073298) + + W(8, 1049105955, -1089373007, 1044914424, -1178541510) + W(9, -1154473427, 1015173986, 1045303547, 1007182507) + + W(10, 1032262742, 1028817260, 1031815708, -1108314079) + + W(11, 1049045791, -1096296960, 1028425028, -1121119813); + sum2 = W(0, 1031116538, -1106561538, 1041324255, -1097318271) + W(1, 1038093550, 1003684433, 1018758860, 1041892626) + + W(2, -1122918288, 1050772655, -1113042451, 1005733025) + + W(3, -1093507837, 1060901401, -1090730175, -1089173269) + + W(4, -1103574570, 1036702490, -1142239225, -1094547881) + + W(5, 1059998586, 1050575163, -1096117806, 1007559836) + + W(6, 1008547208, 1048344650, -1105692057, -1105982083) + + W(7, 1045224094, -1111284039, -1115133937, -1106548880) + + W(8, -1107738784, 1062167006, -1093777588, 1032209739) + + W(9, 1037272918, 1031802380, -1109731964, -1115007264) + + W(10, 1040225760, -1119496571, -1116833334, 1032935937) + + W(11, -1105354579, 1051274636, -1097190188, 1039213738); + WS(1059570988, -1104849743); + sum1 = W(0, 1015935639, -1105217736, -1100062838, 1055580097) + + W(1, -1114680436, 1027695934, -1114327981, -1117057793) + + W(2, -1095539196, 1051203348, -1104422643, -1122995670) + + W(3, 1032097032, 1045549308, -1097913193, 1057098153) + W(4, 1035503236, 1026067698, -1100671580, 994695167) + + W(5, -1081463016, 1065307314, -1100709080, -1115820943) + + W(6, -1118415719, 1019924475, -1088685699, 1060408535) + + W(7, -1100882497, -1121911751, -1146837726, 1044124420) + + W(8, -1103538521, 1060044676, -1127513406, 1040976712) + + W(9, -1128662677, -1133264506, -1090424260, 1048658511) + + W(10, -1106885414, -1109730008, -1106017873, 1026902699) + + W(11, -1096642336, 1058793797, -1112552037, 1032897105); + sum2 = + W(0, -1101738300, -1102635520, 1049960021, -1107186076) + W(1, 1031463651, -1109595133, 1026251680, 1042860418) + + W(2, 1045813894, 1037904592, 1043360780, 1032162510) + W(3, -1096487254, -1118004862, 1012147620, -1101520598) + + W(4, -1099464580, -1144663840, -1097132336, -1097577131) + + W(5, 1062244087, 1050699569, 1042220045, 1017587638) + W(6, -1103125896, 1034939290, 1030928730, -1118944274) + + W(7, -1129575892, -1122088198, -1098301296, -1107323885) + + W(8, 1018590462, 996952385, -1143727552, -1112243029) + W(9, -1145491968, 1046219299, 1042676852, -1132230456) + + W(10, 1038679603, 1028948530, -1102991820, -1101571742) + + W(11, 1048890187, -1120981264, 1022002534, -1108091801); + WS(-1095386967, -1111219145); + sum1 = + W(0, 989878260, 1046105185, 1042696371, -1089927208) + W(1, -1127509006, 1030962098, -1130549149, -1114353482) + + W(2, 1044731284, -1092958347, 1029760895, 1015099027) + W(3, 1027494660, -1126247292, 1059356463, -1090136954) + + W(4, -1123276603, 1002268463, 1027516453, -1101798869) + + W(5, 1066759339, -1086407791, -1134143337, 1008759050) + + W(6, -1138329195, -1109199416, 1060963410, -1086208082) + + W(7, 1037957160, 1030926587, 1029536895, -1149170463) + W(8, 1056388995, -1087382795, 1028489291, 1024487352) + + W(9, 1016226972, -1110945897, 1052477254, -1099705531) + W(10, 1033794703, 1027177352, 1031794584, 1041043200) + + W(11, 1048385630, -1090430272, -1116367629, -1155852515); + sum2 = W(0, 1037152773, -1107393101, -1107499267, -1071982339) + + W(1, 1050177526, 1034063245, -1155239869, -1118272906) + + W(2, -1123365743, -1066777138, -1099174901, -1111285661) + + W(3, -1110337901, -1121008946, 1053226346, 1065602467) + + W(4, -1114100047, -1111810642, 1031799544, 1040803769) + + W(5, -1091451910, 1078719649, 1051522302, 1032887971) + W(6, -1115579003, 1040826838, 1036591998, 1072207876) + + W(7, -1112448128, -1105619991, 1027082675, -1129277077) + W(8, 984589146, 1012365693, 1036247818, 1027337883) + + W(9, -1125050611, 1044810576, -1111221822, -1114663567) + + W(10, 1024897040, -1128259763, 1027786160, -1128372019) + + W(11, -1115106721, 1027240586, 1010421237, -1110086921); + WS(-1090330604, -1095913405); + sum1 = + W(0, 1020138108, -1155846064, -1089580553, 1060107168) + W(1, 1026904097, -1129005794, -1123891981, -1158857313) + + W(2, -1099259748, 1041623909, 1034391623, 1027969480) + W(3, 1027835331, 1042832881, -1086143680, 1061453552) + + W(4, -1115844409, -1126805065, -1104251929, 1026174194) + + W(5, -1083565678, 1065434151, -1108664129, 1015124739) + W(6, 1036891437, 1041459232, -1087459168, 1058328348) + + W(7, -1103857816, -1133565543, -1115340623, -1107002233) + + W(8, -1094838563, 1058061687, -1108388469, 1018898230) + W(9, 1034060158, 1043482052, -1096454718, 1046178603) + + W(10, -1124051644, -1128371111, -1111402939, -1101325435) + + W(11, -1097944271, 1056232524, -1124315511, 1016813002); + sum2 = W(0, -1123501879, 1032645553, 1031823713, 1073269342) + W(1, 1039470942, -1134706270, 994818552, -1106534525) + + W(2, 1055123028, 1086127434, -1106004733, -1108902493) + + W(3, 1024190209, 1041404870, 1003420836, -1097366650) + + W(4, 1048036108, -1131894299, 1026428551, -1104982451) + + W(5, -1080276792, -1064048639, 1029604730, 1040747994) + + W(6, -1113685733, 1042593674, -1127504591, -1073275019) + + W(7, -1102930579, -1114192145, 1033345966, -1115833227) + + W(8, 1051480126, -1090368136, -1127043711, 1028687353) + + W(9, -1113104927, 1034301013, -1116624943, 1039074558) + + W(10, 1016207563, -1110363669, 1032770548, 1019409377) + + W(11, 1017007809, -1104405577, 1023925896, 1028902826); + WS(-1090401964, -1152005757); + sum1 = W(0, -1107975723, -1110378885, -1108756369, 1036253290) + + W(1, -1114963960, -1113670180, 1027213860, -1114833045) + + W(2, -1095679285, -1126602324, 1031108853, -1122691487) + + W(3, -1098681011, -1110030634, -1106296032, 1055690689) + + W(4, -1100045596, -1107558750, 1041527967, -1088517567) + + W(5, 1070380432, 1071039479, -1089232274, 1041737494) + + W(6, -1104614908, -1108968043, -1108442736, 1052643167) + + W(7, -1099409217, -1114805320, -1117487349, -1117626874) + + W(8, -1098750614, 1027420045, -1149243844, -1114809497) + + W(9, -1139447119, 1013150495, -1103582017, 1037720897) + + W(10, -1112117145, -1129003237, -1114124023, -1107640642) + + W(11, -1109258534, 1028231256, 1004205268, -1115685308); + sum2 = + W(0, 1032241318, -1104603794, -1110024851, 1040884292) + W(1, -1102517567, 1041491703, -1099535875, 1042476400) + + W(2, 1036274242, -1098717699, 1017815860, -1106264274) + + W(3, 1055385702, -1090035137, -1088795135, 1054345172) + + W(4, -1094964358, 1050847172, -1088862448, 1058560544) + W(5, 1070638870, 1049287225, 1049908300, -1097728340) + + W(6, 1049303281, -1088861159, -1093985609, 1045145516) + + W(7, -1104972980, -1120926078, -1109469239, 1052772694) + + W(8, -1106832655, 1028821082, -1121780800, 1040141480) + + W(9, -1107728961, -1098593341, 1040915158, -1095210561) + + W(10, 1046695263, -1097969166, -1128383244, 1040732759) + + W(11, -1094544594, 1053404675, -1097835418, 1042680505); + WS(-1070853435, -1079594702); + sum1 = W(0, 1031368074, 1015638464, -1114370347, -1115839497) + + W(1, -1115223191, 1032466078, -1129832370, 1041167744) + + W(2, -1117069532, -1102391672, 1027397066, 1017764904) + + W(3, 1034753191, 1035847262, -1123068615, -1103157041) + + W(4, -1112317379, 1032880216, 1015854174, -1103546680) + + W(5, 1064504443, -1091637800, -1115479934, -1129698458) + + W(6, 1034404179, -1113675090, 1061831318, -1080200908) + W(7, 1034003429, 1015162251, 1016952500, 1024462894) + + W(8, 1053421228, -1096184888, 1024063661, -1140051844) + + W(9, -1121119193, 1024493276, 1039689052, -1127920108) + + W(10, 1021986479, -1133040549, 1030231874, -1128491667) + + W(11, 1047845552, -1114780422, -1108195108, -1121925385); + sum2 = + W(0, -1106137478, -1112484873, 1057355655, 1035747177) + W(1, -1121112608, -1111886750, 1043473407, -1102332364) + + W(2, -1095817987, -1115086293, -1132505032, 1028632460) + + W(3, -1106937584, -1092913054, 1065744344, 1023360745) + + W(4, -1113186427, -1106034045, 1026389244, -1107744995) + + W(5, 1065853704, 1072123343, -1092294539, 1034294011) + + W(6, -1112665680, -1097742336, -1081387458, -1079419356) + + W(7, -1121542644, 1024287965, 1037682857, 1043622402) + W(8, -1127502696, -1106562006, 1037780507, 1025382780) + + W(9, -1122471729, 1020098037, -1107102958, -1106082519) + + W(10, 1042075138, 1002478149, -1132137458, 1022975237) + + W(11, -1141866869, -1126279403, 1036152561, -1184105122); + WS(1049779927, -1112776705); + sum1 = + W(0, 1028071903, -1118237477, -1102803275, 1029391902) + W(1, 1034486619, -1146402448, -1114011991, 1043483133) + + W(2, -1098367878, -1104983021, 1030141823, 1024272361) + W(3, 1029458468, 1007633232, -1088820906, 1052808064) + + W(4, 1041927295, -1136262168, -1119065733, 1035764138) + + W(5, -1094340312, 1066199650, 1028311666, -1116603171) + + W(6, -1132325110, 1035328577, -1080604425, 1053987355) + W(7, -1120802495, 1022508086, 1021654911, 1034435922) + + W(8, -1090240908, 1056392483, 1033716813, -1120600656) + + W(9, -1140102008, -1138188744, -1148292210, 1036299814) + + W(10, -1137351220, -1178578023, -1114327724, -1122000063) + + W(11, -1113705707, 1048826612, 1019304377, 1020743116); + sum2 = + W(0, 1032570045, 1038214390, -1105258373, 1049712324) + W(1, -1104412018, -1123221706, -1105084280, 1026252154) + + W(2, 1013797397, -1092505394, -1106628914, 1042796766) + W(3, 1038138439, 1040403525, -1089116471, 1060934721) + + W(4, -1095609080, 997194667, -1106537061, -1094267982) + W(5, 1076181184, 1059180753, -1100140938, 1031304969) + + W(6, 1040196930, -1111441511, -1077048432, -1094660144) + + W(7, -1105239307, 1025898916, -1115643975, -1113353460) + + W(8, 1025617857, -1116351798, 1035183993, -1113508292) + + W(9, 1030892929, 1041615042, -1099622062, -1112051433) + + W(10, -1108612586, 1036578837, -1115802712, 1045634969) + + W(11, 1030853641, 1036834683, -1126459673, -1114596985); + WS(1046079918, -1101410372); + sum1 = + W(0, -1119247786, -1128322399, 1042510685, 1040172212) + W(1, -1102544268, -1115138862, 1028420729, 1041896248) + + W(2, 1041276767, 1029165630, 1023180436, 1035447066) + W(3, -1112243383, -1105916230, 1052547749, -1093769957) + + W(4, -1094151845, -1116787216, 992275696, 1048641796) + W(5, 1049660162, -1086983984, 1051181214, -1126725978) + + W(6, -1122025413, -1106854731, 1058242473, -1090223605) + + W(7, 1034028133, 1020616971, -1106451742, 1031954663) + W(8, 1042746961, -1101840568, 1035662026, -1106918309) + + W(9, 1031954516, 1008473970, 1045913078, -1131429676) + + W(10, -1136064862, 1031591591, -1132155589, -1111364501) + + W(11, 1038481040, 1034663574, -1123376855, -1113758304); + sum2 = W(0, -1141998719, -1109775790, 1044487768, -1123003936) + + W(1, -1128896096, -1116651188, -1145824639, -1114337335) + + W(2, 1032184652, -1105393491, -1105870753, 1028260552) + + W(3, 1031031156, 1035421278, -1118695686, 1043275908) + + W(4, -1123077850, -1117301622, 1020781312, -1094152732) + + W(5, 1023745864, 1060925239, -1116720242, -1118185274) + W(6, 1022361784, 1019796096, 1018742928, 1035441608) + + W(7, 1005384511, -1111756371, 1035727822, -1109500163) + + W(8, 1044822196, -1129786704, -1106753069, 1028382728) + + W(9, -1106658345, -1126048860, 1018234504, -1154747838) + + W(10, -1109895735, -1112885074, 1018984000, -1133379360) + + W(11, 1040264262, -1114483694, -1115363912, 1013789936); + WS(1065850966, 1065849900); + sum1 = + W(0, -1148044715, 1023026535, -1096615656, 1051437895) + W(1, 1037565454, 1009078433, 1016251913, 1037817733) + + W(2, -1090259708, 1044690723, 1031445334, -1134993406) + + W(3, -1121600570, 1025496281, -1086496425, 1060547800) + + W(4, -1123828273, -1122338716, -1122606683, 1030608138) + + W(5, -1089114938, 1067874188, -1103278290, 961099943) + W(6, -1119487441, 1035148220, -1087399018, 1061971613) + + W(7, -1103477215, -1115158089, -1123929445, 1038603949) + + W(8, -1089721837, 1051923078, 1017932657, -1152593000) + W(9, 1022387720, 1040789578, -1091666965, 1020371139) + + W(10, 1017045638, 1017681054, -1113618935, 1029056085) + + W(11, -1090055623, 1050691807, 1041207209, 1032486406); + sum2 = W(0, 1035054009, 1029203106, -1095822387, -1096883389) + + W(1, -1106861506, 1003858895, -1100085534, 1055497340) + + W(2, -1094806083, 1022251406, 1025821768, -1141198631) + + W(3, 1050236106, -1099101530, -1112580467, -1105263591) + + W(4, -1099047212, 1038164225, -1106312974, -1091493710) + + W(5, 1065636371, 1066985993, -1090221894, 1007864647) + W(6, 1040394388, 1035688516, 1057422694, 1050729384) + + W(7, 1040303734, -1131632326, -1111006317, -1100130836) + + W(8, 1038056034, -1113669663, -1095697855, 1034223408) + + W(9, -1122528690, 1051271785, -1090043318, -1109365057) + + W(10, -1132012078, 1022279531, -1120327060, 1037370697) + + W(11, -1101753600, -1098907474, 1042447412, -1111449439); + WS(-1097113303, -1098536273); + sum1 = + W(0, -1128711506, 1026190055, -1089449919, 1051085133) + W(1, 1039600836, 1017357112, 1001100740, -1129782332) + + W(2, -1090119792, 1054274400, 1006971045, -1151099700) + + W(3, -1116245678, 1036959597, -1089233276, 1059231684) + W(4, 1035061810, 1008174399, -1123204686, 1010531062) + + W(5, -1088111001, 1068549146, -1106500579, 1027830380) + + W(6, -1122390725, 1040533155, -1083999320, 1060528017) + W(7, -1110173729, -1118330949, 973480529, 1032146359) + + W(8, -1100399617, 1051645044, -1118570949, -1138008953) + + W(9, -1135175840, 1016950072, -1096337083, 1021720363) + + W(10, -1105366950, -1113983621, -1112078369, -1115082484) + + W(11, -1098983064, 1056811397, -1163217111, -1114515356); + sum2 = W(0, -1126544110, 1057867675, -1084782762, -1101472514) + + W(1, 1046930113, 1047938716, -1106788080, 1045040387) + + W(2, -1103936041, -1095964817, 1060484868, -1116398073) + + W(3, -1123763395, -1116688865, -1096694543, 1033632512) + + W(4, -1098897900, 1033301960, 1031886252, 1008770068) + W(5, 1065118928, 1070094219, 1055777327, -1112910333) + + W(6, 1019879781, -1111977015, -1120781351, -1093909441) + + W(7, -1105350592, -1132175940, -1106896864, -1104133845) + + W(8, -1094413904, -1091348732, -1098592917, 1028498658) + + W(9, 1025236903, -1123575137, 1055216743, -1087869152) + + W(10, 1032159773, -1111429301, -1103198659, -1095297610) + + W(11, 1063595180, -1101379604, -1090856164, -1148399072); + WS(-1088872108, 1052196610); + sum1 = W(0, -1115772244, -1114837443, 1040729928, 999143576) + + W(1, 1035543595, -1109636323, 1026268067, -1111544357) + + W(2, -1129315660, 1051137549, -1115388138, -1138847608) + + W(3, 1022683057, -1104977249, 1050293045, 1050616762) + + W(4, -1110255664, -1148093909, -1119255225, -1110507588) + + W(5, -1115663356, -1123488747, -1102766242, -1113579532) + + W(6, -1126845407, -1124752108, 1055556797, -1107532492) + + W(7, -1115121626, 1031428336, 1018606696, -1106979958) + + W(8, 1038400791, -1115698508, -1111362474, -1123058305) + + W(9, -1122729971, -1118779367, 1043464385, -1106851872) + + W(10, -1114761165, 1024891874, -1119390253, 1032957370) + + W(11, 1010467373, 1013947482, -1127308771, -1117161611); + sum2 = + W(0, 1021571414, -1136485425, 1033792089, 1044650880) + W(1, -1111938281, -1111133361, 1005045849, -1108696907) + + W(2, 1026572719, 1068805535, 1042295210, 1015874538) + W(3, 1032761087, 1015124882, -1097148617, 1072674603) + + W(4, 1053858128, -1115667125, -1131955840, -1109877489) + + W(5, -1072841573, -1073632622, -1114078156, 1025743417) + W(6, 1002892889, 1050593712, 1051731235, 1040667946) + + W(7, 986622244, 1029743517, -1113636159, -1105061066) + + W(8, 1040799594, -1114163718, -1105514756, -1150165522) + W(9, 1017257782, 1027289415, 1025178361, 1033059623) + + W(10, -1131411966, 1028606721, -1116947723, 1035889263) + + W(11, 1033387155, -1113785952, 1024784261, -1131685126); + WS(1059574956, -1147706177); + sum1 = + W(0, -1126851274, -1107330821, 1039522745, -1101979721) + W(1, 1039509326, -1113513772, 1032992154, -1114850125) + + W(2, 1036915515, -1103811902, 1037543219, -1122830319) + W(3, 990365335, -1111585801, 1058476188, -1091594576) + + W(4, 1044635632, -1115799180, 1021792788, 1025219833) + W(5, 1057257569, -1102147871, 1015035920, 999411009) + + W(6, 1014654722, 1042448198, -1130368142, 1049911159) + W(7, -1098033363, 1008106627, 1023708133, 1041111834) + + W(8, -1087441640, 1046832311, -1103709614, 1030688938) + W(9, 1021433863, 1028566264, -1103664757, 1042912388) + + W(10, -1112635561, 998865060, 1024270494, 1025900485) + + W(11, -1101532519, 1034347764, -1106561201, 1033251158); + sum2 = W(0, 1030668111, -1100054904, -1106823977, 1038847176) + + W(1, 1045749759, -1139922314, -1138525905, -1127093272) + + W(2, -1118630721, 1023505668, -1100949461, -1137789612) + + W(3, -1118095530, -1104427366, -1104404781, 1055088734) + + W(4, -1106737563, 1025350194, 1046358820, -1094333916) + + W(5, 1066330506, 1059364463, -1115593085, -1118124817) + + W(6, 1026007575, -1098434999, 1032956568, -1109156363) + + W(7, -1105827255, 1044080198, 1036395993, -1114547879) + + W(8, -1089680445, -1102029714, 1014767972, -1111415597) + + W(9, 1044323398, -1098255510, -1118009606, 1019564502) + + W(10, -1114455410, 1034872747, -1102154889, 1043811931) + + W(11, -1104253130, 1042217401, 1033201092, -1116871429); + WS(1056332375, 1037816258); + sum1 = W(0, -1121656782, -1103168373, -1119737691, 1044637987) + + W(1, 1028439953, -1147698697, -1121854558, -1112652978) + + W(2, -1116096096, 1045017865, -1108613751, 1015231409) + W(3, 988019291, 1032694229, -1099471067, 1051843657) + + W(4, -1124711667, -1126623722, 1027294260, 1032971073) + + W(5, -1088930875, 1058468850, -1105155884, 1025720722) + + W(6, -1141230927, 1036051832, -1088040995, 1059943427) + + W(7, -1110648631, -1125643860, -1146196851, 1029047507) + + W(8, -1094038173, 1054278049, 1023373473, 1025459107) + + W(9, -1124586778, -1138159876, -1105369135, 1040438911) + + W(10, -1109090662, -1140770656, -1119789479, -1116156558) + + W(11, -1100717841, 1052440082, -1121068380, 1033302707); + sum2 = + W(0, 1037403217, -1101035659, 1032316963, -1116665707) + W(1, -1159849898, 1025618366, 1056969468, -1090106390) + + W(2, 1038083410, -1119459300, 1009385423, -1128266716) + W(3, 1057593955, -1080354518, 1060881997, 1028006921) + + W(4, 1038355181, -1107076145, 1065979451, -1077736273) + + W(5, 1057826659, -1093367622, -1108823981, 1040978108) + W(6, 1052219437, -1095104394, 1040943565, 1050471411) + + W(7, 1028403474, -1107763897, 1048107842, -1110113632) + + W(8, -1093495590, -1098889696, 1041580846, 1031803963) + W(9, 1012201018, 1035332836, 1023246776, 1037291810) + + W(10, -1108503511, 1028340263, 1034400525, 1036232380) + + W(11, -1105354500, -1172133380, 1023204972, -1124125618); + WS(1057728492, -1114894260); + sum1 = + W(0, -1113553553, -1137973853, 1030725486, 1027451936) + W(1, 1024408310, -1122023828, -1129994721, 1032124939) + + W(2, 1023297510, 1039175515, -1104644784, 1015759447) + W(3, -1116323110, 1038491142, 1042012404, -1096381204) + + W(4, -1098358292, -1129065232, 1032700067, 1042214629) + W(5, 1063322480, 1002639809, -1097199096, 1025211173) + + W(6, 1022553594, -1102850457, 1049455560, -1095884801) + + W(7, 1038368473, -1119912573, -1174372438, -1103814977) + + W(8, 1038337085, -1099447670, 1041557913, -1120935138) + + W(9, 1025649872, -1108835354, 1036180712, -1107257155) + + W(10, -1143485305, 1022194670, -1204938694, -1117610625) + + W(11, 1041170774, -1110131186, -1139346043, -1131327617); + sum2 = W(0, -1132724746, -1136277690, -1115335750, -1142062564) + + W(1, -1111737990, 1021138001, -1125894137, 1023794749) + + W(2, 1019650853, -1102896527, 1036465851, -1117376889) + + W(3, 1008401986, 1034921985, -1111489002, -1095970428) + + W(4, -1093339450, 1036367329, -1123753553, -1135255538) + + W(5, 1065003137, 1052274635, -1130345777, -1114642052) + + W(6, 1028568703, -1095862669, 1042205114, 1050282098) + W(7, 961646223, 983274770, -1129573217, -1103833757) + + W(8, -1104966989, 1039075981, -1123958953, -1121876761) + + W(9, -1115138968, 989975657, 1028245440, -1124502561) + + W(10, 975314596, -1131382405, -1129237393, -1117578737) + + W(11, -1137003202, 1029353869, 981021860, -1129104973); + WS(1064957100, -1084259623); + sum1 = + W(0, -1147443767, 1048172674, 1050085746, -1087174820) + W(1, -1110620477, 1041967934, 1002237105, 1022388856) + + W(2, 1049301459, -1096483251, -1130736462, 1032395752) + + W(3, 1042679235, -1111985505, 1061123934, -1084863818) + W(4, 1036594273, 1029284270, 1025036588, -1105536623) + + W(5, 1059891343, -1085360136, -1096717087, 1027822410) + + W(6, 1039980881, -1111204540, 1063082922, -1087496217) + W(7, 1049051417, 1038157169, 1039876787, -1118973740) + + W(8, 1059164422, -1085023864, -1098175193, 1033526682) + + W(9, -1122119878, -1115201293, 1050147791, -1099749261) + + W(10, 1038851310, 1035926782, 1046679440, 1036937851) + + W(11, 1052477312, -1093770425, -1094492881, 1040430103); + sum2 = + W(0, 1043574668, -1079200674, 1070660923, 1052128984) + W(1, -1102512316, -1114631788, -1110691764, -1088609636) + + W(2, 1057860954, 1049301959, -1119415890, -1111966430) + + W(3, -1103083598, -1104512784, -1107138551, -1106877192) + + W(4, -1099577256, 1025811761, -1104224219, 1072315297) + + W(5, -1092267131, -1082025998, 1048136354, -1120067756) + + W(6, -1111324648, 1071717488, -1073631870, -1098154139) + + W(7, -1101281480, -1097839567, 1045365848, 1074062948) + W(8, -1072626825, 1060924493, 1049098296, 1046934152) + + W(9, -1123650350, 1072927288, -1076777698, 1055782129) + + W(10, 1053482338, -1142912019, 1040472402, 1075518858) + + W(11, -1068851440, -1085268633, 1045375219, -1113956244); + WS(-1075237718, 1059347685); + sum1 = + W(0, 1021242911, 1049968737, -1123217820, -1100043508) + W(1, -1123750750, 1039650209, 1033512399, -1159856274) + + W(2, 1052611492, -1096977477, 1035058824, -1164938034) + W(3, 1029870036, 1035044013, 1055398719, -1086974821) + + W(4, -1111424357, 1031585078, 1022930282, -1122614884) + + W(5, 1061884279, -1081627821, 1035852049, -1146263191) + + W(6, 1032776022, -1113136009, 1059035017, -1085989237) + W(7, 1046594056, 1040828171, 1018371807, -1134430929) + + W(8, 1058365139, -1083853344, 1040835864, -1113397210) + + W(9, 1019954077, -1114025505, 1054270736, -1094951367) + W(10, 1036293471, 1041271429, 1033340372, 1040742208) + + W(11, 1057072208, -1082038839, 1043435581, -1112152131); + sum2 = + W(0, -1109418256, -1082007392, 1074622373, -1087167323) + W(1, -1079889153, 1054843717, 1045686709, -1092895857) + + W(2, 1045059925, -1090791673, 1052961109, -1111299754) + W(3, 1025535098, -1097230803, 1022527077, 1061750517) + + W(4, -1094791227, -1097908774, 1041368995, -1087580936) + + W(5, 1072979117, 1066822104, -1121141584, 1039933817) + W(6, 1025255906, -1090709237, 1059630355, -1104688853) + + W(7, -1114952578, -1109459004, 1045341483, 1030380818) + + W(8, -1082271661, -1098772696, 1048788253, 1032974905) + + W(9, -1131647237, -1098811123, -1098333373, -1097425000) + + W(10, -1109808864, 1041274025, 1050563248, -1122444474) + + W(11, -1080249300, 1035536875, 1062091865, -1101634861); + WS(-1073405707, 1071333561); + sum1 = + W(0, 1032445482, -1107154225, -1086616986, 1053035442) + W(1, 1037191452, 1005358651, -1114759602, 1043648434) + + W(2, -1097373989, 1049497533, -1113096234, -1140703610) + + W(3, 1009365348, 1025826187, -1088696011, 1058380737) + W(4, 1042579156, -1147371482, -1116673978, 1050217769) + + W(5, -1084271170, 1057270871, 1027100008, 1008260703) + W(6, -1140494528, 1045274198, -1086550017, 1060414079) + + W(7, -1122447285, -1128134433, -1138343378, 1041826370) + + W(8, -1088202787, 1054213138, 1033632004, -1146407565) + + W(9, -1164102249, 1037855412, -1096484208, 1049814032) + + W(10, -1117413058, -1132301012, -1128324122, 1036590066) + + W(11, -1090407429, 1044502774, 1049279047, 1030979244); + sum2 = + W(0, -1104849756, 1044778147, 1087011564, 1051692028) + W(1, -1115275477, 1024288807, -1130309268, 1042344095) + + W(2, 1065552014, 1050304642, 1012814276, -1122813731) + + W(3, 1032168017, -1115874267, -1071484991, -1095624232) + W(4, 1034711867, 1018071134, 1006608216, 1016899230) + + W(5, -1064792716, -1080996320, 1012706900, -1113948677) + + W(6, 1025939811, 1015318926, -1108487910, 1038353709) + W(7, 1031018877, 1039145567, -1116205504, 1017541078) + + W(8, -1111002668, 1015541566, 1017981722, -1121567210) + + W(9, -1134020588, 1025032291, 1026184365, -1115175630) + + W(10, 1016059306, -1161414849, 1015836550, -1134707560) + + W(11, -1105554232, 1035428553, 1030487759, 957459976); + WS(-1093094231, 1019545057); + sum1 = + W(0, -1147370279, 1024662611, 1045730364, -1100871081) + W(1, -1113292876, -1115285964, -1111661690, 1043371102) + + W(2, 1007712219, -1103400446, -1125293706, 1035921815) + + W(3, 1036370005, -1098598749, 1063151515, -1086049630) + + W(4, 1038700136, -1106696483, -1130128916, -1121967929) + + W(5, 1048771569, -1094208743, 1042591246, 1031896756) + W(6, -1138886402, 1034324947, -1102866212, 1057052051) + + W(7, -1109292474, -1117085921, -1114353490, 1030255498) + + W(8, 1043747359, -1103168823, 1040829042, -1117982022) + + W(9, -1139052495, 1020719663, 1042970997, -1114366281) + W(10, -1113681601, 986496271, -1123566046, 983686292) + + W(11, 1044423271, -1105757546, -1119014482, -1116127556); + sum2 = + W(0, 1017737448, -1113949677, -1109074925, -1106456753) + W(1, 1033894674, 1002006465, -1112488549, 1043174240) + + W(2, -1099690943, 1036040708, -1136025401, -1127095326) + W(3, 1041909593, -1095542187, 1055357640, 990810979) + + W(4, 1036060652, -1106928479, -1119128646, -1123930650) + + W(5, 1063813292, 1064786485, -1095508468, 1041202855) + W(6, 995222883, 1011641777, 1029075282, -1080550900) + + W(7, -1096849813, 1028541984, -1138532089, 1043431269) + + W(8, -1111808055, -1099317256, 1042059844, 1013308785) + + W(9, -1122958336, -1133447353, -1121958092, -1111251400) + + W(10, 1031456264, -1130037494, 1002378369, 1011720393) + + W(11, -1111615901, -1122017034, 1041840135, 1011740497); + WS(1065464534, 1066200435); + sum1 = W(0, -1120929867, -1114034816, -1093428139, 1053507241) + + W(1, 1039630850, 1039737375, 1037468205, -1098649416) + + W(2, -1095417420, 1053719708, -1117479019, -1104296205) + + W(3, -1104046500, 1044270480, -1095455089, 1059517274) + + W(4, -1116571174, 1034668025, -1118049328, 1030873567) + + W(5, -1078956988, 1062996894, 1037682987, 1027940302) + W(6, 1026495526, 1027018036, -1097112752, 1062358792) + + W(7, -1100052716, -1105628906, -1102710553, 1029661039) + + W(8, -1090364038, 1051059848, -1134398359, 1047874257) + W(9, 1047356358, 992085460, -1098761015, 1053033759) + + W(10, -1124964177, -1105711476, -1104379061, 1014898857) + + W(11, -1089997432, 1058137077, -1119205176, 1036556777); + sum2 = + W(0, 1052570920, -1079960756, 1059860803, 1024102895) + W(1, -1109439044, 1028585912, -1096757231, -1072214843) + + W(2, 1077140634, 1045135233, -1135125103, 1026412089) + W(3, 1032706704, -1067716640, 1078085184, 1007395147) + + W(4, 1039504993, 999691338, 1048916325, -1071149720) + W(5, 1074719494, 1048879105, -1119420973, 1031274943) + + W(6, 1032567852, -1113086642, -1126012658, -1105291454) + + W(7, 1042487387, -1115876939, 1027784571, 1026889523) + W(8, 1023965646, -1125605772, -1107726802, 1031655696) + + W(9, -1123035498, -1102782368, 1033915049, 1036400237) + + W(10, 1040624471, -1118021143, 1032789056, -1113248449) + + W(11, -1131155204, -1120868355, -1123377002, 1033633154); + WS(-1088460652, -1077196042); + sum1 = + W(0, -1106714405, -1126618358, -1100538888, 1061836401) + W(1, -1105561066, 1017778831, 1012745873, -1104692362) + + W(2, -1105672234, 1049398341, -1107347643, -1107815087) + + W(3, -1106817926, -1128320455, -1096420642, 1063373516) + + W(4, -1095103574, 1021813111, 1014233321, -1113350277) + + W(5, 1032760733, 1062672955, -1104500106, -1130991024) + W(6, -1139695810, -1104636978, 980853696, 1040237156) + + W(7, -1136108756, 1015562593, -1112340330, -1111992333) + + W(8, -1098221750, 1049303442, -1098848505, -1124042178) + + W(9, -1112616426, -1112151140, 1032069058, -1115352833) + + W(10, 1028357771, -1117430379, 1014140598, -1103199781) + + W(11, -1124202648, 1047738902, -1108236605, 1024298444); + sum2 = + W(0, 1042942941, -1095373315, 1047404354, 1041600542) + W(1, 1029337611, -1116792463, -1103739979, 1042549384) + + W(2, -1104635751, 1049880149, -1113660916, 1033409028) + + W(3, 1021675082, -1100136800, -1156628070, 1042313351) + + W(4, -1103511888, -1100575516, 1031504716, 1032277385) + W(5, 1056265483, -1095000083, 1052284225, 1042607543) + + W(6, -1103784377, 1051314558, -1086627236, 1058671506) + + W(7, -1098025422, -1102539848, 1021140017, -1097536524) + + W(8, 1056610233, -1087499329, 1053296214, -1123811627) + W(9, 1032312275, 1033748340, -1097748456, 1053303874) + + W(10, -1097031052, 1034071897, -1113021145, 1006926454) + + W(11, 1032415346, -1106742186, 1040224097, -1116904421); + WS(-1090190636, 1052333694); + sum1 = W(0, -1118008162, 1046164882, -1087764995, 1050774996) + W(1, -1132114980, 992812356, 1017370722, 1025512151) + + W(2, -1107299674, -1113611419, -1133738119, -1143374172) + + W(3, 1028313190, 1039176172, -1107276920, 1042980645) + W(4, 1016759930, 1031741320, 1032465571, -1140639831) + + W(5, 1054861041, -1104478910, 1039228938, 991966796) + W(6, 1021024634, -1114523818, 1048774281, -1116066086) + + W(7, 1029146129, -1131726971, 1022145795, -1123803590) + + W(8, 1052546887, -1088290914, 1046828701, -1135053738) + + W(9, 974396430, -1114987978, 1033108808, -1101829203) + + W(10, 1025239231, -1135839572, 1023704762, -1150325779) + + W(11, 1052242867, -1082760046, 1050382392, -1117633184); + sum2 = W(0, -1112927492, -1126311221, -1124129707, -1089259297) + + W(1, -1121960162, -1127721129, 1000278501, -1112124316) + + W(2, 1030656727, -1136644755, -1119195008, -1143597573) + + W(3, -1115114722, -1096516945, 1062718459, -1089907054) + + W(4, -1113085858, 1010259715, 1038568093, -1095439368) + + W(5, 1069168233, 1066488720, -1108783853, 1016497897) + + W(6, -1107752992, 1035770391, -1092856910, 1057602472) + + W(7, -1101947316, -1142073349, 1036864823, -1173738153) + + W(8, -1097729376, -1119016562, -1105922885, 1034450025) + + W(9, -1111435217, -1127749525, -1108466839, 1028769031) + + W(10, -1119106133, -1113345821, 999311589, -1118042174) + + W(11, -1106807928, -1101412503, -1119408565, -1121786357); + WS(1050584535, 1068150000); + sum1 = + W(0, -1115260532, 1034938395, 1038987194, -1117589351) + W(1, -1115913835, 1013034091, 1010165469, -1115625742) + + W(2, 1047833470, -1104527215, -1116485512, 1026352434) + + W(3, -1112895478, 1043729220, -1102791097, 1041775364) + W(4, 1035117570, -1123123343, 1003680362, 1046586119) + + W(5, 1053057826, -1092950416, 1032942189, -1126776150) + + W(6, 1038151522, -1096382429, 1060286702, -1085160955) + + W(7, 1051158381, -1118268740, -1113655284, 1036369142) + W(8, 1031810112, -1099697720, -1109351189, 992197073) + + W(9, -1132334157, -1110737789, 1033665067, -1112334730) + + W(10, 1035094123, 1007592182, -1116596385, 1020311663) + + W(11, 1045919272, -1112800213, -1113211075, -1128843070); + sum2 = W(0, 1027450901, -1130117632, -1112751331, 1023748039) + W(1, -1129067048, 1020055214, 999052617, 1037992777) + + W(2, 1031860941, -1148181737, 1031574877, -1123191999) + + W(3, 998486633, 1035832253, -1100828230, -1107268336) + + W(4, -1108371420, -1176849298, -1098069390, 1058826704) + + W(5, 1066377631, 1031601953, -1110435251, 992240434) + W(6, 1050271656, -1075559944, 1059983898, -1105105514) + + W(7, -1111123861, 1022099846, -1144500281, 1051052067) + + W(8, -1089703828, 1006070601, -1135053221, -1122685646) + + W(9, 1034115809, -1104035684, 1037142243, 1031181077) + + W(10, 1013422629, -1154287506, -1111215245, 1041076323) + + W(11, -1116488683, -1123703892, 994427506, 1025316569); + WS(1065940278, 1034622363); + sum1 = + W(0, -1130882111, 1042781009, 1048563851, -1106282856) + W(1, 991352096, -1132163876, 1018945629, -1105035701) + + W(2, 1039562393, -1093168999, -1128564863, -1146137419) + + W(3, 1038546130, 1037583679, 1050832116, -1106056839) + W(4, 1040441019, 1034240611, -1109081570, -1089961211) + + W(5, 1065150616, -1081267407, -1125259845, -1103163033) + + W(6, -1116833467, -1106689964, 1063013875, -1093571617) + W(7, 1037655800, 1014989293, 1027338207, 1025705412) + + W(8, 1053064399, -1098826451, 1032573560, -1132178841) + + W(9, -1118439678, -1110899849, 1043578884, -1098971955) + + W(10, -1126802554, -1140385876, 1023675896, 1000818681) + + W(11, 1052844651, -1103816344, -1174556270, -1124029942); + sum2 = W(0, 1042045232, 1025081894, -1120251562, -1098032480) + + W(1, -1122897683, -1131598407, -1100582068, 1047796551) + + W(2, -1130142714, 1051286467, -1114446743, 1028094206) + + W(3, 1051353130, -1135649915, -1091982339, -1088973774) + + W(4, 1048662352, -1114986235, -1098661297, -1083821047) + + W(5, 1066411068, 1066192686, -1098476628, 1035402858) + + W(6, 1033093932, -1113123121, -1108263063, -1096817778) + + W(7, 1043244089, -1113670703, 1041505758, -1102707738) + + W(8, -1116829613, -1124748705, -1105512115, 1040252082) + + W(9, -1111611431, 1042774566, 1029093314, -1115318079) + + W(10, 1039955893, -1113507172, 1036803556, -1119704488) + + W(11, -1105374597, 1039210409, -1115075522, -1136180467); + WS(1056991468, -1087030746); + sum1 = + W(0, -1123211458, -1104816506, -1105628499, 1049039296) + + W(1, -1110865210, -1119944116, 1040392564, -1123900599) + W(2, 1035944426, 1031104918, 1038992913, 1019968535) + + W(3, -1107179074, -1097679367, -1092912076, 1058030045) + + W(4, -1092536332, -1113705364, 1032323379, 1050060447) + W(5, 1043269081, 1044634502, 1052766173, 1024115141) + + W(6, -1112698351, -1113372906, -1098944114, 1032247835) + + W(7, -1101773536, 1016666555, -1113154609, -1118981766) + + W(8, -1111852653, 1043769326, -1111088882, -1113641643) + W(9, 1035059984, 1036804874, 1030188977, 1035537687) + + W(10, 1017337855, 1034739876, -1117349658, -1117759553) + + W(11, -1099907470, 1048904265, -1120093295, -1119368937); + sum2 = W(0, 1038207100, -1100024892, 1019998746, -1116235844) + + W(1, 1028590129, -1132429552, 1055370704, -1091918952) + + W(2, -1108168198, 1051323790, 1017416940, -1131674494) + + W(3, 1017023961, -1082116017, 1065911065, -1096201210) + + W(4, 1032160655, -1107738152, -1102268135, -1125190319) + + W(5, 1060923636, -1094794141, 1048662690, -1115273448) + + W(6, -1122475964, 1048656820, -1095962080, 1023749917) + + W(7, 1005611185, -1120617509, 1023693826, -1118701724) + + W(8, 1025593786, -1112111831, -1121410776, 1032669658) + W(9, 1033230771, 1009208157, 1022143062, 1028403793) + + W(10, -1131420642, 1007544955, -1120372849, 1017593165) + + W(11, -1123799386, -1116942925, 1018425448, 990755743); + WS(1065408790, 1036624080); + sum1 = + W(0, 1031689276, -1095782030, 1040908418, 1044865119) + W(1, 1010781241, -1124793181, -1118902603, 1036811736) + + W(2, -1099523235, 1048657234, -1114118270, -1132122507) + + W(3, -1130392064, -1106389326, -1100633526, 1056569877) + + W(4, -1105362262, 1010247305, -1130349122, -1116251760) + + W(5, -1088986706, 1059819669, -1109527143, 1021657598) + + W(6, -1124855342, 1016267643, -1097124896, 1057017186) + + W(7, -1101014271, -1147394236, -1120117724, -1116551776) + + W(8, -1093812931, 1059687187, -1105256235, 1019294354) + W(9, 1032115325, 1029730958, -1102530619, 1046000250) + + W(10, 1028862634, -1131137795, -1114562026, -1110739566) + + W(11, -1093706094, 1058243684, -1118348593, 1010280523); + sum2 = W(0, 1008018405, 1028827850, 1048442625, 1022982076) + W(1, 1015261982, 1023643398, -1131603511, -1115409151) + + W(2, -1128423353, 976919119, -1110305290, -1142661114) + + W(3, 988821927, 1044469435, -1144038842, -1106419532) + + W(4, 1025710382, 1024565032, -1126539767, -1105522597) + + W(5, -1080807551, -1069309764, -1128896061, -1156968388) + + W(6, 1029044494, 1031515550, -1100063920, -1077869698) + + W(7, 1034212953, -1108906576, -1154431236, 1029512486) + W(8, 1047735839, 1057375362, 1047372093, 1040986767) + + W(9, -1144594818, -1118759177, 1049631559, 1074645104) + + W(10, -1121013958, -1109343217, -1134541747, 1017115388) + + W(11, 1040817807, 1074183528, 1028992794, 1013497529); + WS(-1134496480, -1135409208); + sum1 = + W(0, 1012816882, -1118561391, -1172095542, 1042727039) + W(1, -1101252924, 1038725048, -1131032216, 1023949458) + + W(2, -1135187513, -1111514505, 940438235, -1118209095) + W(3, 1015795842, 1025102291, -1122643706, 1044113108) + + W(4, -1099247502, 1039141810, 1017081284, -1098855254) + + W(5, 1062211658, -1088869665, 1037442288, -1110868352) + + W(6, 1032335660, -1109465919, 1053298610, -1090758606) + + W(7, 1036453998, -1124785329, -1114842158, 1018180354) + + W(8, 1055550179, -1097017583, 1038283500, -1123999226) + + W(9, -1121133019, 1039697400, -1104417019, 1024023145) + + W(10, -1127849669, 984058180, 1020892138, -1121622748) + + W(11, 1042088653, -1131309822, -1111728213, 985363057); + sum2 = + W(0, 1027844464, -1123756397, -1109401613, -1123223808) + W(1, 1035039619, -1128717516, -1120185020, 1023474305) + + W(2, 1009596088, 1048386893, -1119575836, -1197272208) + + W(3, 1033301171, -1120807818, -1105464515, -1117881742) + + W(4, 1029989604, -1129559846, -1142892826, 1045181163) + + W(5, -1069578385, -1079085438, 1032607110, 1013531724) + W(6, -1130897478, 1050514681, 1080182479, 1057107198) + + W(7, -1113328530, 1001587842, -1112219415, -1148494098) + + W(8, 1033916695, -1155326684, -1143648562, -1126181730) + + W(9, 1008317678, -1124897908, -1111316356, 1015620530) + + W(10, 1010222764, 1011089790, -1123809910, -1112292483) + + W(11, 1038900121, -1138311341, -1144599918, -1129634008); + WS(1063953772, 1006246735); + sum1 = W(0, -1129265445, 1023929568, 1050480356, -1094701786) + + W(1, 1037312131, 1034664643, -1103221880, -1113408580) + + W(2, 1053092565, -1092563365, -1114954585, 1012249970) + + W(3, -1129858606, -1152025615, 1054804888, -1099755687) + + W(4, 1041973195, 1007014628, -1113052252, -1124775629) + + W(5, 1058637767, -1081976315, 1043978961, 1034276907) + W(6, 970168057, -1116248388, 1061520345, -1085039957) + + W(7, -1109284733, -1121545966, 1035503657, -1112901548) + + W(8, 1052655373, -1106127939, 1037644971, 1031841028) + + W(9, -1114702297, -1114551939, 1043614208, -1104465584) + + W(10, 1036504960, -1122618723, 1026538155, 1012918985) + + W(11, 1053852345, -1094975651, -1153236991, 1015394179); + sum2 = W(0, -1108119214, -1098605940, 1035846227, 1036002502) + + W(1, -1110269476, -1115932080, -1096454441, 1047215192) + + W(2, -1114546522, 1051355228, 1034086715, 1019597651) + + W(3, -1110533412, -1103381544, 1042040175, -1102267339) + + W(4, -1102892813, -1130886478, -1119694402, -1128298432) + + W(5, 1054173861, 1055154953, 1033662351, -1108620470) + + W(6, -1127729168, -1115663656, -1103681801, 1052945690) + + W(7, 1018137856, 1033484697, -1110958880, -1114162078) + + W(8, 1035812038, -1097546689, -1135158408, -1106956606) + + W(9, 1023031785, 1023974294, 1020649501, 1033369183) + W(10, 998437331, 1029836288, -1130979192, -1102917557) + + W(11, 1043843622, 1033695483, -1106336801, 1026224246); + WS(1049960663, -1098656338); + sum1 = + W(0, -1129270334, -1097905355, -1106458043, 1053746044) + W(1, -1153859324, -1120671826, 1010743226, 1038429850) + + W(2, -1094238830, 1050579558, 1008020477, 1036488405) + W(3, -1116543140, 1026505899, -1084542826, 1060156652) + + W(4, -1101873487, 1033306211, 1036165346, -1111354490) + + W(5, -1093117384, 1065413702, -1122984978, 1035776465) + + W(6, 1026370235, 1012909508, -1096055524, -1110641506) + W(7, -1132001470, 1015184758, 1026933359, 1034045586) + + W(8, -1093158415, -1140159326, 1048347588, -1115507932) + + W(9, 1009750007, -1139624135, -1113672231, -1153271990) + + W(10, 1027525597, 1024719161, -1164389112, 1033987231) + + W(11, -1098353419, 1046556600, -1124973593, 1037016731); + sum2 = + W(0, 1007202997, -1107138382, -1101985157, -1105521455) + W(1, 998286218, -1135144653, 1038605814, -1092409649) + + W(2, -1111080585, 1031799434, -1127152184, -1138707909) + + W(3, -1111184485, -1086536233, 1058778058, -1102287591) + + W(4, 1043783051, 1018462794, -1097778206, 1033842165) + W(5, 1062592323, 1054124413, -1107786643, -1134709509) + + W(6, -1107862779, 1050500010, -1115904936, -1121603522) + + W(7, -1104929125, 1016336685, -1126800416, -1107791267) + + W(8, 1050870644, 1039372702, -1103666737, 1035379787) + W(9, -1122928880, 1035028069, -1115500703, 1030823202) + + W(10, -1123649878, -1120475406, -1117103670, 1024236113) + + W(11, -1137706913, 1012044041, 1042413442, -1119073178); + WS(1045571246, -1101396513); + sum1 = + W(0, 1027156266, -1105879340, -1095381975, 1052608133) + W(1, 1036979366, -1116386461, -1135540454, 1031079263) + + W(2, -1094890856, 1052732944, -1110160581, -1124519785) + + W(3, -1123739311, 1036462891, -1086841044, 1059410250) + + W(4, 1033495803, -1113888968, -1123826896, 1048275928) + W(5, -1081913239, 1057453824, 986236332, 1015417435) + + W(6, -1110505155, 1040289247, -1087831711, 1053535332) + + W(7, -1121128186, -1106215130, -1138173481, 1033674439) + + W(8, -1095054872, 1059217778, -1117509614, 1025380519) + W(9, 1019384210, 1035302207, -1100616678, 1056369955) + + W(10, 1002559041, -1132265864, -1138775820, -1109766005) + + W(11, -1096527684, 1057086003, 1031959012, 1009437253); + sum2 = + W(0, 1023702857, 1025579445, -1107828603, 1030949885) + W(1, 1030056541, -1138881396, -1116086512, 1041684936) + + W(2, -1100190580, -1127701406, -1101693823, 1031874044) + W(3, 1027064225, 991232624, -1127488618, 1063977916) + + W(4, 1026840857, -1115029594, 1024261913, 1049263730) + W(5, -1121382305, 1081524863, 1032271344, -1124133978) + + W(6, -1115739014, 994389040, 1049530396, 1074557390) + W(7, 1041139218, 1015337722, 1023117658, -1121855219) + + W(8, 1041567276, -1078514947, 1024600809, -1113125346) + + W(9, 1011341860, -1121905805, -1097196557, -1067412604) + + W(10, -1110332083, -1157524144, 1016675050, -1114246345) + + W(11, -1125328530, -1072747255, 1045882874, 1009384836); + WS(-1091844311, 1066789497); + sum1 = W(0, -1126657090, -1121934839, 1053346558, -1094816004) + + W(1, -1134360567, -1196074864, -1118280816, -1104655157) + + W(2, -1150447404, -1096277599, 1026981217, 1030410603) + + W(3, 1035276339, 1036892560, 1061301148, -1095276499) + W(4, 1035239934, -1117233544, 1034822275, 1041291861) + + W(5, 1054539137, -1085328499, -1132053288, 1032377106) + + W(6, -1122398524, 1016354085, 1061579109, -1094948141) + + W(7, 1041572288, -1113536398, 1027512241, -1100932977) + + W(8, 1052329175, -1090319102, -1138204721, 1028691858) + + W(9, -1105911320, -1113195754, 1010492736, -1109759879) + + W(10, 1026669711, 1016723124, 1036073133, -1137558275) + + W(11, 1052092547, -1096198988, -1122862318, 1004385343); + sum2 = + W(0, -1130924015, 1022730742, -1156842543, 1039023607) + W(1, -1147026848, 1024455841, 976327856, -1107671502) + + W(2, 1030703882, -1100688592, -1113451109, 1015244616) + W(3, 1006816288, -1102494949, 1050591037, 1039287442) + + W(4, 1030233046, -1121583866, 1032746467, -1063244855) + W(5, 1084548157, -1091568120, 1034384724, 1032201434) + + W(6, -1109752024, -1117144683, 1041535529, 1042495948) + + W(7, 1024850879, -1112832588, 1030725620, -1118200508) + W(8, 1030607420, -1100624377, 1037336210, 984232072) + + W(9, -1118656485, 1023153636, -1117378226, 1033109796) + + W(10, -1123040622, 1003511712, 1013150272, 1019358625) + + W(11, 1000111712, -1135441064, 1025293966, -1124806470); + WS(1062375148, 982896070); + sum1 = + W(0, 1041356906, -1092346958, -1098857839, 1052252533) + W(1, 1050501706, -1165233042, -1141741512, 1042371688) + + W(2, -1100631199, 1048733184, -1126563284, -1134397903) + + W(3, -1123251246, -1107640316, -1093348349, 1057292943) + + W(4, 1043133649, 1008063554, -1121772047, 1037798623) + W(5, -1081710855, 1063732968, -1111246422, 1032268279) + + W(6, -1138991637, 1035668286, -1084667272, 1061645578) + W(7, -1112114769, 988108978, -1156317853, 1026956462) + + W(8, -1088318752, 1059891330, -1106214611, 1037368483) + + W(9, -1161666833, 1016120313, -1093940123, 1051947731) + + W(10, -1109426316, -1138661789, 1011726474, -1115727625) + + W(11, -1084607638, 1059995535, -1190508359, 1040598420); + sum2 = W(0, -1087149149, -1081699534, 1075074133, 1074122230) + + W(1, 972078363, -1097056662, -1111774062, 1035582541) + W(2, 1078981756, 1067224402, -1092574897, 1045566397) + + W(3, -1122195827, -1087576154, 1080077903, 1066418532) + + W(4, -1094572008, 1049096521, 1039123010, -1082404199) + + W(5, -1086433828, -1082120864, -1101495601, -1109136155) + + W(6, 1035630599, -1111170675, -1073636555, -1078722695) + + W(7, -1124013413, 1034655424, 1055099903, -1119441337) + + W(8, -1080599553, -1083892454, 1051613332, 1023040696) + + W(9, 1042757145, -1097950160, -1080969229, 1038753292) + + W(10, 1020974546, 1027077203, 1060125526, -1098859385) + + W(11, -1075310961, -1085615144, 1056506110, -1129411423); + WS(-1073733835, -1077508759); + sum1 = + W(0, 1004911573, 1043185540, 1056421033, -1084678800) + W(1, 1025064353, 1009545653, 999004239, -1120942224) + + W(2, 1054442442, -1094509130, 1020812405, 1022285001) + W(3, 1008908284, -1136904037, 1059517190, -1087084867) + + W(4, -1149079216, -1118599969, 1032559088, -1114184455) + + W(5, 1066321721, -1089134469, -1127746091, 1007063543) + + W(6, -1123346838, -1112686066, 1063474337, -1088382556) + + W(7, 1027145139, -1149726053, 1021646261, 1032555200) + W(8, 1052962698, -1089853747, -1126936380, 1015641511) + + W(9, -1114632613, 1005792618, 1044159722, -1092765869) + W(10, 1027348136, 1023282088, 1036339578, 1040940927) + + W(11, 992460573, -1093589176, -1126618160, 981480241); + sum2 = + W(0, 1016441223, 1040812131, -1091551076, -1083270432) + W(1, 1054198423, 1001473573, -1123853806, -1136381199) + + W(2, 1020713637, -1123741604, -1108424657, -1110846033) + + W(3, 1034027700, -1121560396, -1092123318, 1057043619) + + W(4, -1115262538, 1008724259, 1016212045, -1100123577) + + W(5, 1069362941, 1065772322, -1092584961, -1120589670) + W(6, 1025170155, -1100550013, 1050653373, 1058297195) + + W(7, -1121642970, 1040111989, 1030169991, -1105283667) + + W(8, 1020763615, -1091989226, 1029919048, -1104422780) + + W(9, 1024326153, 1029487024, -1121149602, -1092470882) + + W(10, 1039520200, 1027795061, 1020004331, -1102325825) + + W(11, -1094990109, -1106605421, 1026718342, -1115881428); + WS(-1083424620, -1083774644); + sum1 = + W(0, 1025997707, -1107578509, -1116799831, 1051882641) + W(1, -1114213177, -1140244728, 995558823, -1112668224) + + W(2, -1098861996, 1045883512, -1127946135, -1116793621) + + W(3, -1132669247, -1109754213, -1099316274, 1060321715) + + W(4, -1106758402, -1131243108, 1015677409, 1048694360) + + W(5, -1079768279, 1059777970, 1040276008, -1129876893) + + W(6, -1113673299, -1099923633, -1093459346, 1064776049) + + W(7, -1108196294, -1167573435, -1146244698, 1044345284) + + W(8, -1098475085, 1036566882, 1008462282, 1020492609) + W(9, 1028038186, -1117921566, -1113981277, 1042756750) + + W(10, -1117078569, 1015090721, -1119640804, 1030760459) + + W(11, -1105585281, 1033934183, -1130472177, -1176961296); + sum2 = + W(0, -1120400279, -1120118931, -1114073896, 1044457775) + W(1, 1036833596, -1137956564, 1006239848, -1112851025) + + W(2, 1016448074, -1095953339, -1112980685, -1122147671) + + W(3, 1033967902, -1101370370, 1047917593, 1065044078) + W(4, -1119396143, 987310366, -1138878316, 1037781128) + + W(5, -1113781647, 1069276419, -1110041421, -1187381491) + + W(6, 1012842788, -1100258045, 1045546113, -1075186216) + + W(7, -1132903556, -1131003226, -1135736068, 1002346920) + + W(8, -1114666278, 1035717470, -1120766465, 1029533357) + + W(9, 1028356101, -1112182778, 1002645288, -1097061266) + W(10, 1015304410, 1003776904, -1120658117, 995892591) + + W(11, -1115876966, 1049663208, -1123504973, -1150885167); + WS(1055938007, -1113494123); + sum1 = + W(0, 1002908945, 1028210664, -1133494457, -1135464580) + W(1, 1008466835, -1131269290, 1005413771, -1117675639) + + W(2, 1035958810, -1095959351, 1040264713, 1035203030) + W(3, 1025040167, 1040926954, 1041989700, -1087581816) + + W(4, 1048292991, -1116685065, 1016082831, -1118443670) + + W(5, -1101702418, -1121242873, 1034336763, -1117662885) + + W(6, -1114102015, 1040854289, -1098047787, 1048632765) + W(7, 1043249233, -1132796663, 1024456642, 1046508410) + + W(8, -1093641223, 1052220147, -1183964258, -1144182458) + + W(9, -1121270129, 1019764906, -1111985289, 1025236686) + + W(10, 1036425152, -1137681823, 1006080470, 1029150738) + + W(11, -1102768646, 1042463785, -1131127167, 1031884341); + sum2 = + W(0, -1148833389, -1103329032, 1039527980, 1036545465) + W(1, -1140514638, -1111163586, 1034952858, -1114547380) + + W(2, 1019372679, -1111710878, 1042452640, 1025974311) + W(3, -1131645803, -1099667252, 1017530647, 1040867364) + + W(4, 1048394746, -1102843950, 1044582101, 1046239383) + W(5, -1095619156, 1074171367, 1046568773, 1030125627) + + W(6, -1100126968, 1038848953, 1053527025, -1072583834) + W(7, -1104419698, 1045672947, 1041029558, 1023994452) + + W(8, -1122648352, -1089175055, -1095650184, 1033990232) + + W(9, -1101188496, 1040593566, 1048348234, -1110512168) + + W(10, -1113024814, 1039972451, 1038559400, -1104909140) + + W(11, 1034383711, 1048723535, -1105559093, -1123744674); + WS(1054254551, 1014537956); + sum1 = + W(0, -1121039367, 1035774671, -1112598570, -1132150029) + W(1, -1109088445, 1040615567, 1018675757, -1130008697) + + W(2, 1008273253, -1122251932, -1109769252, 1003469451) + W(3, 1022410264, 1030747950, -1093495169, 1041075666) + + W(4, -1103842329, 1040565356, 1028321069, -1107714718) + + W(5, 1061840455, -1100441032, -1105690119, -1126038165) + + W(6, 1032078897, -1097508911, 1062686448, -1084814013) + + W(7, 1049125597, -1115375938, 1035879540, -1100889147) + + W(8, 1058139578, -1090418276, 1045096257, -1105705223) + W(9, 998902321, -1111836109, 1050284599, -1098262366) + + W(10, 1018266794, 1030169589, 1035017184, -1112194220) + + W(11, 1050365160, -1104639554, -1137878196, -1123088958); + sum2 = + W(0, 1011204314, -1130282463, -1119643322, 1040567914) + W(1, 1058038651, -1092395975, -1151906447, 1008292026) + + W(2, 1036989282, -1093705400, 1048788001, 1041384381) + W(3, -1132982086, -1113509623, 1023195387, 1046529995) + + W(4, 1069827352, -1082188469, 1015334447, -1118416042) + W(5, 1031871454, 1053486166, 1061661513, -1087135929) + + W(6, -1126682819, 1010705513, 1036745108, 1053627848) + W(7, -1076895592, 1050130047, 1031867988, 1015562377) + + W(8, -1105750465, -1097180463, -1083563182, 1062283738) + + W(9, -1118412630, 1033032763, 1024841875, 1042871577) + + W(10, -1102862368, -1108358221, 1019402007, -1171211484) + + W(11, -1132045005, -1098551422, -1093245544, 1058279550); + WS(1057163500, -1105993220); + sum1 = + W(0, -1117201821, -1127293622, 1046510122, -1119279260) + W(1, -1111026820, 1018908832, -1121502764, 1045431725) + + W(2, -1097880614, 1032698105, 1044535287, -1110256660) + + W(3, 1035599249, -1100979748, 1061106926, -1097882833) + + W(4, -1095331735, 1024080836, -1105708961, 1049617722) + + W(5, -1099817760, -1088567265, 1053288920, -1117593824) + + W(6, 1039647740, -1110465655, -1119635500, 1058361641) + + W(7, -1096979757, 1027464313, -1100922079, 1042176840) + + W(8, -1109566363, -1097567811, 1050620014, -1113789929) + + W(9, 1009912816, 1018827936, -1147860066, 1036626725) + W(10, 999121112, -1159432152, -1136735831, 1001114727) + + W(11, 1037588842, -1111863248, 1037098289, 1012647057); + sum2 = W(0, 1032576295, -1107895050, 1016729470, 1038099246) + + W(1, -1122727069, -1114110583, -1123342042, 1039805910) + + W(2, -1102939450, -1120156902, -1128970625, -1124467804) + + W(3, -1102694716, 1052615154, 1050355566, -1100691281) + + W(4, 1046407453, -1110408836, -1132748357, 1027679479) + W(5, 1057040101, -1098318717, 997134844, 1035923921) + + W(6, -1114775905, -1183264704, 1041423933, 1028185223) + + W(7, -1094682594, 1040728790, 1033817148, -1127944484) + + W(8, -1098800812, 1043085543, 1023808656, -1104134547) + W(9, 942234367, 1017960690, 1027447559, -1113846578) + + W(10, 1033077879, -1111084310, -1109637447, -1117652574) + + W(11, -1113353192, 1025700351, -1136470782, 1024627457); + WS(1069430838, -1077807532); + sum1 = W(0, -1123171840, 926704291, 1028287308, 1024652287) + + W(1, -1121358347, -1124249574, -1123870031, 1035074891) + + W(2, 1030377682, -1104443381, -1137933441, -1123723658) + + W(3, 1012978707, -1102713036, 1060355037, 1049114747) + + W(4, -1102473935, -1124350158, 1026708965, -1094582899) + + W(5, 1064667974, 1049334299, -1095444677, -1155502135) + + W(6, 1032241614, -1104095024, 1044883747, -1097738289) + + W(7, -1112982200, 996719039, -1110089696, -1131766218) + + W(8, 1023923417, -1112895132, -1115022670, -1114119663) + + W(9, 1013657340, -1123449267, 1023074654, -1133494194) + + W(10, -1123735342, -1129702439, -1120091289, -1126388352) + + W(11, -1134219998, -1105084689, -1125652635, -1115595936); + sum2 = + W(0, -1176390445, -1120278537, -1122813389, 1041053822) + W(1, 1022193201, -1120846771, 1032791968, -1112747779) + + W(2, -1119517685, 1042596904, -1132233097, -1150821318) + + W(3, -1107216374, 1039675898, 1061000499, 1032712030) + W(4, 1037891533, -1120324549, 1039748018, 1025598156) + + W(5, 1059349510, 1055783944, 1009116037, 1006903493) + W(6, -1104895086, 1049256042, -1087513873, -1099074048) + + W(7, -1107732985, -1117395279, 1038133354, -1105015824) + + W(8, 1030867610, -1084050573, -1101549778, 1034659541) + + W(9, -1123666927, 1024448666, -1108606159, 1040697602) + + W(10, 1022775251, -1115429715, 1013658503, -1110290749) + + W(11, 1029186614, -1106116496, -1106656496, 1028618234); + WS(-1107817820, -1083770194); + sum1 = + W(0, 1033900161, -1103638846, -1091016130, 1053216608) + W(1, 1041711845, 1032252891, -1129878691, 1019165999) + + W(2, -1105602789, 1056291523, -1114138529, 1005244003) + + W(3, -1129439246, 1023785569, -1097747004, 1057499003) + + W(4, -1107402783, -1129556050, -1131338575, 1041664750) + + W(5, -1076846267, 1059566574, 1043057225, 1037921123) + W(6, 1015365308, 1007653393, -1089110318, 1057822998) + + W(7, -1097881621, -1126092988, -1128788741, 1026305479) + + W(8, -1101714279, 1057215924, -1104471663, 1041959808) + W(9, 1034359265, 1029035509, -1095981534, 1051239662) + + W(10, 1029122288, 1025745172, 1024049691, -1111132977) + + W(11, -1089968180, 1055184137, 1032118072, 1027268745); + sum2 = W(0, -1108694073, 1017327589, 1040440313, 1037365040) + + W(1, -1097181356, -1123414724, 1045019344, 1034893158) + + W(2, 1020539438, 1043169414, 1044490295, -1142407924) + + W(3, -1102469381, -1100848825, -1091152854, -1179472005) + + W(4, -1098856738, -1124070306, 1037993179, -1146521528) + + W(5, 1062010427, 1059039676, -1105305386, -1103656828) + + W(6, -1102369829, -1136622056, 1017850914, 1038217042) + + W(7, -1109601321, 1025666621, 1036817135, -1122158468) + + W(8, -1095264144, -1119445175, -1098482637, -1105116469) + + W(9, -1116848662, 1036214504, 1045324437, 1048192551) + + W(10, 1034613852, 1027457629, -1123721670, -1109074807) + + W(11, 1047380031, -1115655698, -1105047780, -1121369495); + WS(-1121449656, -1084978473); + sum1 = W(0, 1045775831, -1099605930, -1086366198, 1054843763) + W(1, 1044622847, 1025802254, 1021114964, 1028648016) + + W(2, -1100836955, 1049277222, -1121703995, -1116658981) + + W(3, 1036495664, -1113195701, -1085601027, 1059006957) + + W(4, 1039106531, -1138285574, 1032769888, -1106810756) + + W(5, -1088747404, 1062277595, -1122091582, 1043922200) + + W(6, -1134400474, 1033523886, -1083936243, 1060137529) + + W(7, -1102614275, 1020367213, 1043049231, -1109837568) + + W(8, -1087596695, 1057384673, 1011169881, 1042132230) + + W(9, -1112792957, 1039225431, -1102874546, 1045407794) + + W(10, -1118753961, -1132370673, 1043659613, -1097941829) + + W(11, -1088234269, 1055835664, 1048366450, 1039326553); + sum2 = + W(0, 1027272459, -1105747151, 1058588727, 1050147388) + W(1, -1089616072, 1040922163, -1122536322, -1103046200) + + W(2, 1025881493, 1054947781, -1098050255, -1157391810) + W(3, -1115599503, 1043039962, 1025318337, 1058930692) + + W(4, 1020160348, -1097372724, -1109953333, 1029006014) + + W(5, -1079698237, -1072349877, 1078384354, 1029236731) + + W(6, -1101108438, 1049577028, -1116019666, -1069493839) + + W(7, 1078006681, -1111844849, -1160853572, 1035298821) + + W(8, 1050247454, -1072151832, 1075828182, -1095907204) + + W(9, -1125199072, 1043042004, -1104971590, -1072978341) + + W(10, 1074567753, 1046502148, -1105305527, 1053260045) + + W(11, -1094505007, -1072202568, 1072013431, 1037030073); + WS(-1076413686, 1050217089); + sum1 = W(0, 1037264211, -1103937453, 1032015257, 1043075881) + + W(1, -1122840185, -1144808135, -1140703701, 981259974) + + W(2, 1015092336, -1110472573, -1115654547, -1126940256) + + W(3, 1010880426, -1126374874, -1117441203, -1140207123) + + W(4, -1121072785, -1127053849, -1119658251, 1042459879) + + W(5, -1095655209, 1058975156, 1016890098, 1040888602) + + W(6, -1116247362, -1115463620, -1089060935, 1059127610) + + W(7, -1095200233, 1021095812, 1031154598, 1040532067) + W(8, -1097693400, 1049753500, 1024224509, 990976051) + + W(9, -1125273098, -1110800684, -1116339093, 1026525586) + + W(10, -1116764924, -1118091276, -1131976189, 1020764716) + + W(11, -1115082479, 1038259412, 1027865572, 1016317274); + sum2 = W(0, 1057721492, -1087030854, -1098169441, 976966634) + W(1, 1023517212, 1010046217, 1028643349, -1118716844) + + W(2, 1050324940, 1023362490, -1141117066, 1027499437) + + W(3, 1060288138, -1080168014, 1016748144, -1115171624) + + W(4, 1023617033, -1115470708, 1059252612, -1080273709) + + W(5, -1095791676, 1047291739, -1099550108, 1034407868) + + W(6, -1088865108, 1065839894, 1025332663, -1136639192) + + W(7, 1041241265, -1116997486, -1086775589, 1065802380) + + W(8, 1048744751, -1124716700, 1003901823, -1138422172) + + W(9, -1123025940, 1052992874, -1103675095, 1045176631) + + W(10, -1105682362, 1028187249, -1093527126, 1051057083) + + W(11, 1047968677, -1122660154, 1020688386, -1121332252); + WS(1066004950, -1130673709); + sum1 = + W(0, -1116101174, 1031654568, -1088044468, 1052873383) + W(1, 1041536990, -1136500130, -1143212072, -1115365365) + + W(2, -1112634148, 1046315731, 1018103788, -1125315968) + + W(3, -1110468285, 1037471350, -1095270314, 1053790292) + + W(4, 1044233532, -1143909091, -1115338446, 1044807147) + + W(5, -1086079158, 1057379349, -1114923292, 1020745202) + + W(6, -1108483849, 1036857292, -1084922887, 1062574892) + W(7, -1111082699, 1016815907, 1016794668, 1049500535) + + W(8, -1089120237, 1059566963, -1112535089, -1120057699) + + W(9, -1129241385, -1116011560, -1096691029, 1043154809) + + W(10, 1013224078, 1009076749, 1013476922, 1018991691) + + W(11, -1092328502, 1052540519, 1045681849, -1111113254); + sum2 = W(0, 1049010278, 1059657103, -1081123115, -1096253478) + + W(1, 1022187211, 1042763848, 1037489391, -1120407882) + + W(2, -1091075041, 1044030983, -1110198086, -1123055347) + + W(3, -1120558834, 1065561208, -1094693777, -1088412254) + + W(4, 1026065263, 1010460099, -1115441721, 1065354919) + W(5, -1086070082, -1088804298, 979808314, 1037997843) + + W(6, -1107156556, 1033902725, 1048072918, 1059822273) + W(7, 1015305348, -1106407542, 992841734, -1090673211) + + W(8, 1058489027, 1048768363, -1103664360, -1127526979) + + W(9, 1011333881, -1082019922, 1067731839, -1110539158) + + W(10, -1102578162, 1046102351, 1042878013, -1076779646) + + W(11, 1065573397, 1056386736, 1041735451, -1098151359); + WS(1047691950, 1051513987); + sum1 = + W(0, 1022367675, -1106171493, 1047705594, -1115678670) + W(1, -1110486184, -1119968336, 1023115866, -1148261013) + + W(2, -1138130365, 1039875161, 1038987295, -1146539325) + + W(3, 1025923810, -1110614526, -1138856554, -1098498099) + + W(4, -1111291916, 1031736900, -1121417283, 1048684730) + + W(5, -1092459648, 1059839847, 1037143648, -1113038256) + + W(6, -1135269348, 1043654816, -1088007377, 1055365231) + W(7, -1116325726, 999460043, -1123344254, 1037977761) + + W(8, -1117423671, -1110502987, 1038170008, -1124410005) + + W(9, -1153116613, 1018209505, -1145599589, -1108540005) + + W(10, 1024662707, -1170140361, -1140725337, 1012206837) + + W(11, -1130685354, 1013205632, -1106962771, 1022378313); + sum2 = + W(0, -1153456458, 1034638423, 1026219462, -1118314715) + W(1, -1119545213, 1020158000, -1134949160, -1132037434) + + W(2, -1110235102, 1026400860, 1033998497, -1129681397) + W(3, 976205336, 1047224115, 1041432515, -1098555575) + + W(4, 1038320636, 1034767622, -1107040555, -1111241686) + + W(5, -1098519781, 1048945857, 1042089138, -1100590807) + W(6, 1031938263, -1102088763, 1016119332, 1066956313) + + W(7, -1089428981, -1131704894, -1148458760, -1109506803) + + W(8, 1045200456, 1038098069, -1088147582, 1033722488) + W(9, -1128058621, -1119972538, 1026438298, 1020127420) + + W(10, -1103340362, 1036980701, 1018102832, -1122339703) + + W(11, 1012163112, 1026446934, -1114149729, -1126638758); + WS(1065730166, 1060778308); + sum1 = W(0, -1109508860, -1157499671, 1041425921, 999270323) + + W(1, 1009856488, -1122732458, -1155017835, -1122491724) + + W(2, 1039607429, -1108915110, 1019270959, 1019165315) + + W(3, -1113193022, -1101845990, 1059840031, -1093896302) + + W(4, 1048952862, -1112299503, 1018445113, -1123710636) + + W(5, -1103201623, 1034241095, -1124851701, 1031136123) + + W(6, -1116526739, 1037579925, -1086536234, 1065443257) + + W(7, -1100984082, -1130156262, 1027367671, -1109701838) + + W(8, -1119532176, -1115473720, 1042597895, -1112137490) + + W(9, -1113248331, 1024052458, -1131587533, 1023021127) + + W(10, 988759974, 1035869970, -1116098736, -1155438706) + + W(11, -1135484603, -1169161851, -1109972554, 1026435614); + sum2 = W(0, -1117967445, 1023936292, 1048994422, 1036573845) + W(1, 1025878262, -1136777476, 1040648348, 1027011316) + + W(2, -1081925843, -1090561506, -1121546197, 1000734832) + + W(3, -1112630617, -1104815876, 1069895227, 1061477130) + + W(4, 1006994724, -1120512530, 1042642774, 1032485416) + + W(5, -1079286264, -1076993331, 1017402756, 1043800887) + + W(6, -1134880336, -1097739621, 1067803238, 1074358700) + + W(7, -1094759274, -1115175264, -1108734545, 1008252288) + + W(8, -1093635474, -1082750513, -1124517798, -1106635673) + + W(9, 1042723069, 1027781874, 1039731885, -1152591489) + + W(10, -1128233335, 1050798200, -1110919553, 1017816544) + + W(11, -1113045602, 1040942069, 1015619872, -1104563368); + WS(1051081815, 1004177576); + sum1 = W(0, -1127006616, 1047378036, 1040477302, -1095193921) + W(1, 984028115, 1015654525, 1009183853, -1123222753) + + W(2, -1133548407, -1099314072, 1033152150, 1000220336) + + W(3, 1030806032, 1030530563, 1061394914, -1084986498) + + W(4, -1134209438, 1016832613, 1035550201, -1112167248) + + W(5, 1067589976, -1091645900, -1104155199, 1007541309) + + W(6, -1148492092, -1102096100, 1061693295, -1092420382) + + W(7, -1126214664, 1020442767, 1031890286, -1119963793) + + W(8, 1056018877, -1084371172, -1117645071, 1016050244) + + W(9, -1122950699, -1117414047, 1051876647, -1100397982) + + W(10, -1115478541, 1015156587, 1033051522, -1118879820) + + W(11, 1054656608, -1096248194, -1098736399, -1120116135); + sum2 = + W(0, 1038737431, -1086393652, -1122890851, 1062117952) + W(1, 1031555324, -1105710539, -1107492679, 1061143042) + + W(2, -1087802869, -1089713201, 1047951996, 1021198999) + W(3, 1048033211, -1086071985, 1050554685, 1059347527) + + W(4, 1026779363, -1103176732, -1120313369, 1050160290) + W(5, 1074235084, 1068761695, -1098359769, 1024031768) + + W(6, 1015799927, 1036302581, -1097102610, 1051722018) + + W(7, 1043634021, -1105088271, -1127256174, -1118303985) + + W(8, -1080828834, -1090432670, -1093586557, 1023611974) + + W(9, -1108431753, 1030572627, 1051308486, -1096060997) + + W(10, -1093678452, -1113357802, 1025558434, 1049210573) + + W(11, -1086660156, -1089293346, -1099341241, 1042868230); + WS(-1081907798, -1114644056); + sum1 = W(0, 1023775725, -1099395677, 1055907631, -1100928459) + + W(1, -1119324179, -1109439504, -1170980595, -1134709766) + + W(2, 1050593936, -1112754945, 1032148262, 1017060589) + + W(3, 1026005873, -1099241994, 1058604967, -1087793848) + + W(4, 1009969504, -1128826918, -1126334628, -1120114113) + + W(5, 1054687330, -1087665246, 1038203192, -1138112959) + + W(6, -1135823392, 1034531271, 1038229186, -1104299004) + + W(7, 1028440637, 1038462084, -1129163353, -1108351113) + W(8, 1034921283, 1040795209, -1104169898, 999099597) + + W(9, -1151688884, 1019473342, 1018134455, 1040889398) + + W(10, -1106565731, 1007377992, -1129940170, -1110039318) + + W(11, -1120708196, 1054318790, -1107725509, 1027667523); + sum2 = W(0, -1119441358, -1109792567, -1127769135, -1107196821) + + W(1, 1030789742, -1113410550, -1129970231, -1120391232) + W(2, 1050978947, 992017798, 1040616080, 1030775978) + + W(3, 1013923693, -1103233485, 1033489482, -1095646457) + + W(4, 1038693473, -1115392393, -1135011770, -1103316489) + + W(5, 1054134565, 1049668759, -1124339641, -1108882435) + + W(6, 1015830944, -1104701758, 1049300223, 1056508485) + W(7, 1043974386, -1145057435, -1115083307, 961266788) + + W(8, -1110832497, 1047186396, -1086696217, -1118394614) + + W(9, 1009913072, -1120471806, 1049274561, -1110580647) + + W(10, -1110322035, 1032794842, -1117182320, 1032096200) + + W(11, -1124368769, -1109467751, -1103392519, 1029776437); + WS(1054547415, -1080700728); + sum1 = W(0, -1118959255, -1106813137, -1136179854, 1049136707) + + W(1, -1121699838, -1110798213, 1000886579, 1020745112) + + W(2, 1042266506, -1113159542, -1135369949, -1133353861) + + W(3, -1108938574, 1000629071, -1114102394, -1117329322) + + W(4, -1098171678, -1132246652, 1035589374, -1089948699) + + W(5, 1058763908, 1057279043, -1102488395, -1116069530) + + W(6, -1115953331, -1104374042, 1043996152, 1058534976) + + W(7, -1118338951, 1023843187, -1120209015, -1115354302) + + W(8, 1034434840, 1042941412, -1123101368, -1110056358) + + W(9, -1130255148, -1115551813, 1030595334, 1024662339) + + W(10, -1111680098, -1125104281, -1114831416, -1107411355) + + W(11, 1037557447, -1115931546, -1111356186, -1113468791); + sum2 = + W(0, -1104438281, 1010047952, 1041416137, -1126834256) + W(1, -1137643420, -1141914640, 1006975884, 1033813267) + + W(2, -1103312430, -1117623020, -1119875938, -1142859168) + + W(3, -1114845259, -1104617278, 1059878226, -1091497753) + + W(4, 1032827109, -1115736166, -1121830816, 1016682467) + W(5, 1030331467, 1058017528, -1098691452, 1030249718) + + W(6, 1035922248, -1119230330, 1042445877, 1016098448) + W(7, 1038105687, -1111253212, -1109021584, 1016925125) + + W(8, -1127060444, 1041560361, -1110812698, -1153808400) + + W(9, 1024387710, 1031893944, -1106783001, -1112957506) + + W(10, 1032692112, -1114320479, -1116080890, 1028042677) + + W(11, -1113418625, 1032681483, -1112120334, -1122393388); + WS(1052913623, 1049378679); + sum1 = + W(0, -1122838827, 1042960148, -1086626395, 1057292387) + W(1, 1037967453, -1155224026, -1140317417, -1131398976) + + W(2, -1096052024, 1051976144, -1115050310, 1021209962) + + W(3, -1125024735, 1047376245, -1085621156, 1057250406) + + W(4, 1033197251, -1137632582, -1119624720, -1142164473) + + W(5, -1082562971, 1065554348, -1105683981, 1041116332) + + W(6, -1114511683, 1052155164, -1087926293, 1052193006) + W(7, 1022767638, -1114068531, 1000517975, 1026436976) + + W(8, -1091398264, 1054127476, 1031945184, 1019309136) + W(9, 980913767, 1040745327, -1096441475, 1052669955) + + W(10, -1117629257, -1119607996, -1118732996, 1037554661) + + W(11, -1088065070, 1051850318, 1041425290, 1029050594); + sum2 = + W(0, -1128729473, -1113890565, 1030681183, -1116238263) + W(1, 1054381777, -1088819879, 1026695603, -1113695617) + + W(2, 1046246583, 1032966985, 1060417287, -1094452995) + W(3, -1122814467, 1012493154, -1119541727, 1063042504) + + W(4, 1065151040, -1073258706, 1038041001, 1007226362) + W(5, -1105895610, 1066720275, 1069419683, -1071852139) + + W(6, -1114364242, 1022932917, 1013035938, 1061802664) + W(7, 1070184946, -1071972143, 1029245131, -1121347205) + + W(8, -1109256305, 1054635319, 1067716376, -1076852924) + W(9, -1114309631, 1028217359, 1037707015, 1034508407) + + W(10, 1063333907, -1084913408, -1131614889, 1005475605) + + W(11, 1018572933, -1114817272, 1057796391, -1086618913); + WS(-1080205366, 1054586731); + sum1 = + W(0, 1028285119, -1104231365, -1105384067, 1018315643) + W(1, 1050517144, -1107795033, 1034516295, -1118125300) + + W(2, -1098317298, 1050987674, -1103331232, 1030553289) + + W(3, -1114270114, -1122896269, -1102251495, 1045079495) + + W(4, 1044797187, -1134423957, 1028158052, 1031422323) + W(5, -1084181862, 1065887519, -1106919576, 1039594145) + + W(6, -1122816794, 1042423964, -1083878719, 1058366107) + + W(7, -1136969466, -1127143669, 1015275709, -1113275778) + + W(8, -1102645126, 1055927247, -1127822612, 1007528111) + W(9, 1008209269, 1035567225, -1094749758, 1042520123) + + W(10, -1111681950, -1112108944, 1026827799, -1113291743) + + W(11, -1095155767, 1054820368, 1023800268, 1010334934); + sum2 = W(0, 1018519238, 999332143, 1041061518, 1033756160) + W(1, 1071473984, -1074851124, 1015687849, 1021267259) + + W(2, -1105547391, 1042746528, 1066765865, -1079967238) + + W(3, -1123243110, 1021268373, 1034089943, 1026544409) + + W(4, 1067795051, -1079279124, -1154321191, 1001850235) + + W(5, -1097359239, 1057524548, 1059938647, -1081222132) + + W(6, -1147705571, -1122367554, 1024181853, -1128118669) + + W(7, -1126584943, 1036308208, -1137574530, -1149097003) + W(8, 964634475, 1033087855, -1109247299, 995399503) + + W(9, 1025499637, 1028798873, -1139074510, -1129467393) + + W(10, 1002507759, 1006964374, -1123293958, -1120881342) + + W(11, 1038775557, -1131901697, -1137941026, -1131371017); + WS(1032292188, -1145187004); + sum1 = + W(0, -1121091224, 1026282854, -1110430066, 1041424886) + W(1, -1163709786, -1130362518, -1124591439, 1032165735) + + W(2, -1099744666, 1053335502, -1097458647, 1026340962) + + W(3, -1121198813, 1048823572, -1088767505, 1045555550) + W(4, 1040806261, 1020242430, 1009701366, -1113803654) + + W(5, -1109489438, 1044961340, 1038271382, -1136150382) + + W(6, 1007646671, -1119045850, 1041537816, -1098280566) + + W(7, 1040783354, -1117600916, 1026658136, -1092992668) + W(8, 1047050636, 1043075732, 1037260307, 997023189) + + W(9, 1020346030, 1019935126, -1121506375, -1107270401) + + W(10, 1000329008, -1124456788, -1122546321, -1119672872) + + W(11, -1110555528, 1047792295, -1120965766, 1026784415); + sum2 = + W(0, -1120115102, 1027036707, 1028744443, 1037973272) + W(1, -1111409649, -1114092190, 1024423113, -1119221834) + + W(2, -1122478763, 1049287438, -1096539764, 1034186497) + W(3, -1131404107, 1034928052, 1034534080, 1047707985) + + W(4, 1032305459, -1111870688, -1162462122, -1115806512) + + W(5, -1117650486, 1065549495, 1059270295, -1104163224) + + W(6, -1116017912, -1133321326, -1108495610, -1079218451) + + W(7, 1053073262, -1116841516, 1035047655, -1101382306) + + W(8, -1105332910, -1101676179, -1108544028, 1038206879) + + W(9, -1146105278, 1029706393, 1038914207, -1109811124) + + W(10, -1125217018, 1004549098, 1011265421, 1023154260) + + W(11, -1113944011, -1116861489, 1023662974, 1021319568); + WS(1065238444, 1029810764); + sum1 = W(0, -1117664959, 1035631050, -1105662947, 1044376095) + + W(1, -1180059800, -1144996235, 1033625479, 998132025) + W(2, -1110409480, 1041425605, 1029921727, 1002652741) + + W(3, -1121511107, 1046740798, -1115600622, -1117226857) + W(4, 1047596134, 986508455, 1026353828, 1049143094) + + W(5, -1117398499, -1088462543, 1050498082, 1031133314) + + W(6, -1154871713, 1044265580, -1101622768, -1090821428) + + W(7, 1032067983, -1119889178, 980940712, 1037075730) + + W(8, -1109487884, -1099167518, -1134957959, 1038687768) + + W(9, 1018182811, 1028486865, -1112099063, 1039936213) + + W(10, 1021107193, -1122286680, 1025470327, -1128624253) + + W(11, -1120941494, -1117520833, 1019194744, 1032539083); + sum2 = + W(0, 1027136813, -1109237812, -1131106290, 1028116649) + W(1, -1115313320, -1132573667, -1126103938, 1029641913) + + W(2, 1047599733, 1041211299, -1119521049, 1022404146) + + W(3, -1134438995, -1108335880, -1097134429, -1092284431) + + W(4, -1100904528, 1016921434, 1023841561, 1016569578) + W(5, -1076957808, -1103461794, 1040752059, 1027441157) + + W(6, 1003327910, 1032415828, 1044824498, 1073063564) + W(7, 1045381268, -1106952976, -1130954514, 999116870) + + W(8, 1037851390, -1111597872, -1112786376, 1036890340) + + W(9, -1135210803, 1003972614, -1123469057, 1033926584) + + W(10, 1034574746, -1110049884, 994333964, -1138234067) + + W(11, 1043898273, -1115472968, -1137298819, -1139674515); + WS(-1111543132, 1053084187); + sum1 = W(0, 1033211657, 1029336727, 1056808676, -1089574549) + + W(1, -1112225370, 1035575202, -1116548448, -1100473306) + + W(2, 1042013257, -1097689349, 999357322, -1114691470) + W(3, 1043994373, 1036113196, 1061342772, -1087296749) + + W(4, 1042186215, 1040681774, -1112421659, -1100606697) + + W(5, 1060655799, -1082434496, -1100755553, 1045312421) + + W(6, 1033458902, -1117467379, 1059165410, -1087667391) + W(7, 1042539448, 1039236406, 1042588164, 1024205686) + + W(8, 1057924951, -1091727169, 1041805473, 1042949980) + + W(9, -1106715184, -1098518156, 1042727104, -1095816456) + + W(10, -1105873704, -1113675407, 1042106754, 1010302211) + + W(11, 1057649845, -1093601226, -1114265639, 1035739628); + sum2 = + W(0, 1036389819, 1006854517, 1049279774, -1104547241) + W(1, -1140479293, 1032391465, -1106855677, -1114095075) + + W(2, -1106885840, -1120692701, -1096936463, -1098591621) + + W(3, 1031620401, 1044374087, 1052305677, 1045612807) + W(4, 1051540981, -1115708783, -1109245550, -1105124479) + + W(5, 1054861276, 1058901209, -1092919117, -1081363743) + W(6, 997072274, -1105277948, 1057655059, 1054428932) + + W(7, -1129490106, -1090444858, -1132619277, 1043212463) + W(8, 1046093411, 1034709627, 1034565381, 1017473602) + + W(9, -1096737767, -1104242691, -1094755169, 1054770354) + + W(10, -1092959620, -1098730750, 1049991282, -1113287926) + + W(11, 1057224822, -1094426804, 1030915645, 1044114915); + WS(-1084020140, 1068126260); + sum1 = W(0, 1006295077, -1156074282, 1049019906, -1097931528) + + W(1, -1117241095, -1123621559, -1137936208, -1103771606) + + W(2, 1049395923, -1091558419, 1040511881, 1023627215) + + W(3, 1034316741, -1104490636, 1061237576, -1086810430) + + W(4, 1040083512, -1108467318, -1149620895, 1038480604) + + W(5, 1033598516, -1099102215, 1044377854, -1115694024) + + W(6, -1141477121, 1018703789, -1094713154, 1059440745) + + W(7, 1044961993, -1120119968, -1110713826, 1041233006) + + W(8, -1098857908, 1018408058, -1146014215, -1145494414) + + W(9, -1119964867, 1031629367, 1023717620, -1122483505) + + W(10, 1038059949, -1122997930, -1159113641, 1005339683) + + W(11, 1036793310, -1108866660, -1129764324, 1007729326); + sum2 = + W(0, 1015277664, 1025130698, 1027591084, -1117062871) + W(1, -1101670769, 1019605180, -1114453597, -1134749664) + + W(2, 1033917561, -1115087054, -1114105716, -1131991228) + + W(3, 1036987011, 1032879133, -1126832950, 1055956188) + + W(4, -1097729634, -1104354242, -1112547401, -1134874844) + + W(5, -1114337945, -1108159193, -1123425664, -1097147867) + + W(6, 1038859445, -1112115766, 1026538526, 1052262112) + W(7, 1058230624, -1101321392, -1116674795, 1018163604) + + W(8, -1114930086, 1032023389, 1047994902, -1100180330) + W(9, 983557058, -1128408082, 968847366, -1105383385) + + W(10, 1046456588, -1106875572, -1127718668, 1003243264) + + W(11, -1132712652, -1120777676, 1042953566, 1009596296); + WS(1044178094, -1112419455); + sum1 = + W(0, -1131480425, 1028145051, 1050373320, -1113746394) + W(1, -1097767254, 1005496122, 1014469647, -1106904202) + + W(2, 1050878345, -1101407218, 1036986154, 1009224698) + W(3, 1033460669, -1114114798, 1059597501, -1089975999) + + W(4, -1106579916, 1010606701, -1111970637, -1103921210) + + W(5, 1061375910, -1083033439, 1025899544, -1155279741) + + W(6, 1018155424, -1104772954, 1062331213, -1089702156) + W(7, 1034746546, 1027233072, 1028778773, -1105642670) + + W(8, 1054878354, -1100724747, 1032512190, 1015772371) + + W(9, -1120733619, -1122369255, 1050980308, -1095744476) + + W(10, -1113254377, -1131981388, 1036392279, -1105836421) + + W(11, 1057064179, -1096031599, -1120232930, -1140419566); + sum2 = + W(0, 1044058702, -1120335698, 1041124235, 1021344157) + W(1, -1087803383, 1057976488, -1105410550, 1016883675) + + W(2, 1035757794, -1104385836, -1085495757, 1058928386) + W(3, 1033398047, 1032351899, -1105578788, 1068114101) + + W(4, -1081571635, 1051244378, -1119512188, -1103364509) + + W(5, -1105098443, 1052569394, -1086748155, 1048853947) + + W(6, -1121578506, -1123961512, 1037191880, 1027075889) + + W(7, 1031589750, -1130402783, -1129503263, 1043612533) + + W(8, 1025665274, -1095293941, 1048618044, -1124003088) + + W(9, 1005093795, -1110855577, 1024086018, -1113226379) + + W(10, -1121765158, 1010599720, -1116781712, 1044044613) + + W(11, 1023695645, -1106316439, 1039073287, -1120128288); + WS(-1095246679, -1087513362); + sum1 = W(0, 1027634667, 1005582218, 1054139855, -1093682933) + W(1, 1002440559, -1142612208, 998672596, 1034119552) + + W(2, 1049059024, -1099832192, 1040529483, 1024339285) + + W(3, -1149130110, -1098084154, 1052649123, -1089047862) + + W(4, -1122861959, 1025618962, 1026870673, -1141537276) + + W(5, 1065515488, -1088853800, -1126585773, 1014930618) + + W(6, 1006905115, -1098155038, 1064339388, -1084571535) + + W(7, -1099945809, -1117714471, 1027369115, -1115874224) + + W(8, 1053781857, -1097071228, -1105399392, -1136815962) + + W(9, -1156874400, -1130796284, 1050040635, -1107001190) + + W(10, 1023818537, 1018962754, 1027010724, -1110211381) + + W(11, 1056042372, -1095503614, -1117200441, 1028298602); + sum2 = + W(0, 1021849900, 1041438934, -1128850825, 1035803991) + W(1, -1100551265, 1028120411, -1131412357, -1119369172) + + W(2, 1021256857, -1111021944, 1042971908, -1109090883) + W(3, -1118206650, 1040592619, 1046750596, 1038923952) + + W(4, -1109478151, 1026074630, -1125196517, -1114002177) + + W(5, 1025252531, 1072474004, -1083937830, -1100770381) + W(6, 1035111533, 1032710077, 1048692472, 1077302109) + + W(7, -1067064389, 1049109423, -1144348946, 1044882286) + + W(8, -1128830461, -1095554578, -1084038225, 1041329596) + + W(9, 1021086832, -1106944856, 1032666924, 1044371493) + W(10, -1106119426, 1033664959, 1020034266, 1034465258) + + W(11, 1034167215, -1126088605, -1103479472, -1114595648); + WS(-1086863724, -1072185677); + sum1 = + W(0, 1028385114, 999422396, 1054099261, -1096116227) + W(1, -1144842721, -1139590961, -1117432553, -1108936247) + + W(2, 1053618221, -1097427086, 1026659130, 1021035463) + W(3, 1039695343, -1104614655, 1060257868, -1085349189) + + W(4, 1037424170, -1123772160, -1109809663, -1104994067) + + W(5, 1062802560, -1083398775, 1038629270, -1103266023) + + W(6, 1021420394, -1154652341, 1056152893, -1099176000) + + W(7, 1048207210, 1017905387, -1123921882, -1146863602) + + W(8, 1045520765, -1110026089, -1118540449, -1111374457) + + W(9, -1130768465, -1114930982, 1044268562, -1104661441) + + W(10, -1129199023, 997564786, 1022384484, 1034006493) + + W(11, 1050379178, -1115290030, -1101129750, -1108736604); + sum2 = + W(0, -1122660132, 1040406414, 1007232002, -1096101684) + W(1, -1114861255, 1041847394, 1028330563, 1017997140) + + W(2, 1025921746, 1022732374, 1041150821, -1105481885) + W(3, -1115262781, 1025264396, 1043786284, -1102241564) + + W(4, -1094431556, 1055232297, 1045687787, -1103384157) + W(5, 1040909479, 1055010544, -1078483413, 1061241514) + + W(6, -1108704409, -1151694751, -1108714653, 1057419270) + W(7, -1079156700, 1063042984, 1029063313, 986929807) + + W(8, 1043690826, -1126901852, -1078269109, 1067726336) + + W(9, -1116092784, 1026201848, -1119932664, 1047767958) + + W(10, -1092610189, 1054314043, 1013546756, -1115691336) + + W(11, 1038266954, -1115096031, -1098859524, 1052159629); + WS(-1100931758, -1079897221); + sum1 = + W(0, 1002503979, -1105750735, -1109125113, 1042140489) + W(1, -1126416416, -1119888216, 1034941050, 1038153885) + + W(2, 1017797614, 1051948423, 1024057510, 1035399096) + W(3, -1106318298, -1105238364, -1095922988, 1034322060) + + W(4, -1103862864, -1104877795, 1040768819, 1049149819) + W(5, -1090093369, 1059094088, 1040510958, 1042879258) + + W(6, -1115730241, 1028324215, -1091054939, 1049017573) + + W(7, -1112163155, -1120272065, -1141069099, -1106412131) + + W(8, -1102230944, 1050624300, -1106256757, -1112985689) + + W(9, 1027555620, 1039911116, -1127686569, 1045992036) + + W(10, 1017980086, 1033961137, -1123362142, -1114353739) + + W(11, -1097766172, 1043140938, 1018029782, -1117933580); + sum2 = + W(0, -1118098210, 1039272612, 1033865352, -1107428188) + W(1, 1041731770, -1138511840, -1106915273, -1120265428) + + W(2, 1043187452, -1111026913, -1108614463, -1129236656) + + W(3, 1038839398, 1047577900, -1107536005, 1034998667) + + W(4, -1104254290, 1049639209, -1115506453, -1112696018) + + W(5, -1090204449, 1068488571, -1073443109, 1065503173) + W(6, 1032442192, -1123012045, 982996928, -1096446249) + + W(7, 1040126834, 1035312463, 1016491198, -1123570623) + W(8, 1042935567, 1013219076, 1041050172, -1104804103) + + W(9, 996494424, -1121836120, -1120793710, -1101384300) + W(10, 998034192, 1040382819, -1145352236, 1026228225) + + W(11, 1030161309, 1023915500, 1038049010, -1106858818); + WS(1060652716, -1122012062); + sum1 = + W(0, -1136700086, -1113850590, -1093156519, 1056384406) + W(1, -1101587139, 1021600449, -1120727741, 1022556549) + + W(2, -1098011208, 1052398904, -1106430355, -1114714908) + + W(3, -1136846536, 1030968440, -1100004083, 1062519968) + + W(4, -1132405458, 1016310696, -1102129381, 1036351121) + + W(5, -1079676696, 1061355096, -1107897312, -1113036024) + + W(6, 1035874533, 1049961616, -1097573322, 1059521337) + W(7, 1048756772, 1022545811, -1111246103, -1119797202) + + W(8, -1087704492, 1052440101, -1109566980, 956768943) + W(9, 1019584793, 1037896960, -1096988084, 1048869443) + + W(10, 1020055223, -1121983087, -1111739075, 1033695743) + + W(11, -1092447605, 1048091420, 1043876621, 1018273385); + sum2 = W(0, -1134928360, 1033535752, -1108867200, 1056652859) + + W(1, -1094974673, 1034782854, 1028726521, -1102458559) + + W(2, 1031241917, 1048810536, -1100509489, -1123876292) + + W(3, -1102649789, -1114678181, 1028753942, 1051021341) + + W(4, -1100441695, -1100025719, 1041988393, 1050124192) + W(5, 1056497468, 1036208495, 1034323322, 1037384204) + + W(6, -1105797881, -1102215651, -1090602802, -1099717915) + + W(7, -1115767916, -1098766868, -1125378124, 1032107719) + + W(8, 1057082230, -1114740583, 1041259997, 1024245049) + W(9, 1034345705, 1014416128, 1023046526, 1002570703) + + W(10, -1122784508, -1114560124, -1134413224, -1119150822) + + W(11, 1025484112, -1101875417, 1037850051, -1112409616); + WS(-1104650926, -1097620835); + sum1 = + W(0, -1121602225, 1025795527, 1046345311, -1105175020) + W(1, -1160491701, 1000929055, 1021304961, -1105437010) + + W(2, -1108985665, 1045711217, -1117138161, -1145522137) + + W(3, -1132438763, 1039870965, 1041625463, -1093274991) + + W(4, 1040158758, -1117941464, 1033606955, -1097752993) + + W(5, -1144407721, 1062570208, 1049985633, -1122762228) + + W(6, -1119908749, 1037504305, -1100934531, -1099088029) + + W(7, 1037586881, -1109694278, 1032325913, -1109081755) + W(8, 1047145475, -1097698093, 1004919651, 1026835862) + + W(9, -1112547088, -1121171172, -1119544288, -1136783989) + + W(10, -1124949278, 1024727664, 1011690407, -1116159153) + + W(11, 1036418740, -1106563213, 999804916, -1126831120); + sum2 = + W(0, -1103930431, -1122873970, 1048581640, -1139346648) + W(1, 1030984702, -1115391660, 1043997541, -1100601593) + + W(2, -1108032519, 1042591516, -1123657132, -1107477718) + + W(3, -1102154605, -1174042309, 1029742536, 1045200151) + + W(4, -1106767363, 1032437784, 1046282803, -1084779372) + W(5, 1068068442, 1052027066, -1086671314, 1025856678) + + W(6, -1106269056, -1114322527, 1051594943, -1098651742) + + W(7, 1024653780, 1036550683, -1116052960, 1046060831) + + W(8, -1119470820, -1115540046, -1127107345, -1121355040) + + W(9, 993204113, -1115511338, -1138871916, 1021777728) + + W(10, -1163609763, -1115373797, -1113135778, 1041954938) + + W(11, 1020181836, -1107159950, -1134598872, -1122730046); + WS(1066180726, 1071088208); + sum1 = + W(0, 1019239309, -1125280121, -1096761986, 1046613601) + W(1, -1107071425, 1022045682, -1139409238, 1001471451) + + W(2, 1012641888, -1137946220, 1047813898, 1006921793) + + W(3, 1016498838, -1110062156, -1103040193, -1089511315) + + W(4, 1045045936, 1010038708, -1117575980, 1036666525) + W(5, -1098591157, 1064065326, 1024305865, 1025055882) + + W(6, 1000209679, 1049354787, -1079760364, 1058946251) + W(7, -1110782371, 990559266, -1113394675, 1048739493) + + W(8, -1090655988, 1056455200, 1013890551, 1028621651) + W(9, 1030691589, -1119525817, -1102803453, 1048597558) + + W(10, 981956228, -1118324476, -1133365628, -1103045922) + + W(11, -1099636767, 1050574049, 1043476657, -1146495466); + sum2 = W(0, -1132513434, 1043448685, -1099364915, -1092624350) + + W(1, 1056566992, -1119372495, -1110784932, -1133524329) + + W(2, 1037755510, 1047633966, -1103089496, -1105722911) + + W(3, 1015163300, -1107748756, -1102119678, -1120785461) + + W(4, 1032253510, -1123661455, 1015068960, 1038994810) + + W(5, 1041992963, 1052941265, -1122308723, -1115405106) + + W(6, 1033174530, -1104237963, 1059473601, -1112171819) + + W(7, 1028984470, -1126727948, -1135254713, -1120882016) + + W(8, 1015880446, -1149514732, -1121912217, 1038410412) + + W(9, 1032888151, -1105704866, 1034086136, -1106883398) + + W(10, 1037486360, -1113287020, -1132281738, 1042278645) + + W(11, -1087682569, 1032085587, -1121651695, 1025242322); + WS(1036385628, 1044378228); + sum1 = + W(0, -1125923878, -1119989961, -1113894352, 1034400041) + W(1, 1040468885, -1119159494, -1122454178, 1024777096) + + W(2, -1109744594, 1050462805, -1118152051, -1136375156) + + W(3, -1106272614, 1045742974, -1081686591, 1057689729) + + W(4, -1112362495, 1016953331, 1034604284, -1106270482) + W(5, 1049041391, 1056885773, -1111838475, 1033159371) + + W(6, -1112814039, -1097223397, 1058546239, -1082103097) + + W(7, 1051410218, -1127987422, 1030894675, -1112273196) + W(8, 1043400251, -1111542214, 1042722054, 1008300000) + + W(9, -1123079474, -1131680651, 1024933634, 1032256832) + + W(10, -1104716944, 1020481857, -1130721815, 974203052) + + W(11, -1127331477, 1030325078, 1028514056, -1122308858); + sum2 = W(0, -1126857538, -1124098806, 1017023528, -1105780084) + + W(1, -1128747572, -1133097491, -1118763531, -1105376950) + + W(2, -1096176503, -1094874635, -1140350635, 1036694112) + + W(3, 1031801448, 1037674477, 1067983244, 1071605005) + + W(4, -1109792866, -1111308161, 1032593429, -1110580495) + + W(5, -1078904043, -1075832971, -1097243929, 1036064001) + + W(6, 1004108563, -1116861031, 1060638644, 1065456263) + W(7, 1040323331, 1039332997, -1119835024, 1039467599) + + W(8, -1111109918, -1112838992, -1114824160, -1103055030) + + W(9, 1016299757, -1109178305, -1098624444, -1104548941) + + W(10, 1033955221, 1027541135, -1169697445, 1033383777) + + W(11, 1040587540, 1006549790, -1127435882, 1022453269); + WS(1044586414, 987793058); + sum1 = W(0, 981244734, -1094962922, 1056192743, -1106851755) + + W(1, -1119312576, -1109764848, 1029861564, -1105742952) + + W(2, 1054313512, -1099782448, 1027895469, 1032524411) + + W(3, -1127551587, -1103252055, 1058042871, -1098041547) + + W(4, 1019737521, -1110892330, 1008852553, -1106380176) + + W(5, 1048369556, -1096188128, -1121596880, 1024237680) + + W(6, -1110264932, 1039776063, -1104749720, 1052995119) + + W(7, -1106765091, -1120426409, -1120415085, 1036900841) + + W(8, -1097596121, 1058305414, -1098125906, 1026360866) + + W(9, -1128191927, 1025145152, -1105335335, 1045503990) + + W(10, -1106717007, -1151171624, -1111219811, 1028803677) + + W(11, -1103570473, 1055249646, -1101993823, 1031597860); + sum2 = + W(0, -1170894487, 1038321486, 1053283699, -1093894288) + W(1, -1100030896, 1046510222, -1116947191, 1032730216) + + W(2, -1102463031, -1109920837, 1047091130, -1098101478) + + W(3, 1031282910, 1034140662, 1044178467, -1089191295) + + W(4, -1097946325, 1046381950, -1112734885, -1104751545) + + W(5, 1054507486, 1065532097, -1095028025, -1113780206) + + W(6, 1042944653, -1102906173, -1128523954, -1115680584) + + W(7, -1094781909, 1020709100, -1112368696, -1115060896) + + W(8, 1038784522, 1029167633, 1050920778, -1114933696) + W(9, 1023647443, -1124833342, -1109068581, 1021288403) + + W(10, -1113980416, -1114621382, -1149951158, -1103764076) + + W(11, -1105151256, 1048070264, 1052479172, -1120673083); + WS(-1114317660, -1079530866); + sum1 = + W(0, 1005492722, -1115918380, -1124407701, -1116105277) + W(1, 1041270838, -1125596386, 1045601427, -1100871485) + + W(2, 1040297908, -1115200394, 1034785295, 1024917256) + + W(3, -1125684614, -1122621547, -1098312611, -1103806788) + + W(4, 1049040249, 1026343653, 1040691817, -1094477942) + W(5, 1054673340, -1097950923, -1103823208, 1033886401) + + W(6, -1138701159, 1044150368, 1041833209, -1088150296) + + W(7, -1129519613, 1039772509, -1122106750, 1041157065) + + W(8, -1120050519, 1040435582, -1121264402, 1028146611) + W(9, 1010492340, 1039210328, -1116922669, 1025767275) + + W(10, -1133614768, 1030520276, -1132247342, 1037992735) + + W(11, 1025359158, 1032362656, -1107139719, 1032249339); + sum2 = W(0, -1155531695, 1035594747, -1111112193, -1101564588) + + W(1, -1116834132, -1130931838, -1116868484, -1130123766) + + W(2, 1034328851, -1113096514, -1115452418, -1123313694) + + W(3, -1135508412, -1121397012, 1033705026, 1029679690) + + W(4, -1152418495, 1018386998, -1106991844, -1115576312) + + W(5, 1050004218, 1048453590, 1029930774, -1127714868) + + W(6, -1135674828, -1126221532, -1117911568, 1050922209) + + W(7, 1007646710, -1121358676, 1001487940, -1107263445) + + W(8, -1123887534, 1043692754, -1114385718, 1023927325) + + W(9, -1119877490, -1121744498, 1017446183, 1034884616) + + W(10, -1122169416, -1115969900, -1131775200, -1125291430) + + W(11, -1110635219, 1032206024, 1016330315, -1122013064); + WS(1066254326, -1103165682); + sum1 = + W(0, 1032736195, -1107990998, 1040177417, -1102376817) + W(1, 1034252186, 1019040113, 1027879829, -1114107453) + + W(2, -1121888407, 1024426522, -1134855959, 1012754494) + + W(3, 1031762634, -1103287009, 1058340070, -1086180914) + + W(4, 1048880356, -1128188760, -1123062443, 1041209406) + + W(5, -1097968365, 1045901207, -1117801777, -1116847305) + + W(6, 1004281096, 1039171204, -1082040497, 1060563077) + + W(7, -1108119503, -1149008906, -1148973229, -1113506869) + + W(8, 1042461256, 1045252895, 1021862138, -1140572507) + W(9, -1119568276, 1029657432, -1110264442, 1035749465) + + W(10, 1017288487, 1021245074, -1130151676, 1042306865) + + W(11, -1106785311, 1042341554, -1125959462, 1010461490); + sum2 = W(0, 1021177914, -1111267114, 1038432054, -1105822865) + + W(1, 1037666445, -1122267795, -1124089023, 1044764288) + + W(2, -1106310853, -1100663157, 1039738660, -1121288215) + + W(3, 1026640236, 1026571855, -1115065858, -1088005063) + + W(4, -1097556837, 1045614354, 1019818775, -1103902938) + + W(5, 1063205014, 1066152263, -1094681119, -1112191805) + + W(6, 1019369915, -1139759728, 1054784388, -1093573252) + + W(7, -1095778937, 1041592438, -1106530127, 1046869720) + + W(8, -1096246655, -1098236584, 1051664274, -1110480242) + + W(9, 1038187852, -1116925721, -1115197568, 1033917272) + + W(10, -1104072801, 1015983837, 1030498751, -1107211067) + + W(11, 1035488589, -1107121631, 1038730556, 1022742082); + WS(1062838508, -1098141683); + sum1 = + W(0, -1134697126, -1129876167, -1116304310, -1099161722) + W(1, 1038618718, 1030514076, 1033720508, 1021336509) + + W(2, -1104597903, 1019102333, 1045312786, -1118743028) + + W(3, -1121504814, 1014720225, -1101916102, 1049624810) + + W(4, 1036131058, -1134396481, -1119656591, 1046652247) + W(5, -1082370571, 1058084844, 1029119981, 1009920435) + + W(6, 950602286, 1047400131, -1091801796, 1053105145) + W(7, -1119759594, -1132133195, -1114868293, 1033072079) + + W(8, -1096699752, 1050650780, -1119884597, -1133963803) + + W(9, 1034808260, -1139606129, -1121911577, 1034036747) + W(10, 1024218735, 990998386, -1113093265, 1026913289) + + W(11, -1098146380, 1048726310, 1017329950, -1121758433); + sum2 = + W(0, -1120814392, 1026947049, -1124609851, 1029294693) + W(1, -1089366560, 1054044776, 1018600110, -1114533760) + + W(2, 1043306429, -1096357662, -1078857162, -1109773692) + + W(3, 1027325255, -1112504706, 1030055965, 1075131584) + + W(4, -1106088729, -1105806639, -1124538308, -1107191892) + + W(5, -1090928488, -1118772404, 1060040329, -1102685120) + + W(6, -1127260613, 1023329910, 1046523168, -1092017575) + + W(7, 1045316672, -1119137158, 1007599328, -1124222597) + W(8, 1015958190, -1113584525, 1047822624, 1032716843) + + W(9, -1128821133, 1024046209, 1022187138, -1109468012) + + W(10, 1021757630, -1120065319, -1125977712, 1019591350) + + W(11, -1115445424, -1105922548, 1038940063, 1016647840); + WS(1054286935, 1054427377); + sum1 = + W(0, -1139016422, 1031868724, -1103470657, 1027450488) + W(1, 1036611766, -1113202838, 1015966225, 1024526894) + + W(2, -1100637795, 1046711412, -1105992614, 1036202402) + + W(3, 1034481375, -1107528289, 1043922306, -1098573729) + + W(4, 1045477269, -1121078688, 1029692191, -1123790766) + + W(5, -1096987004, 1036148808, -1138151730, 1006910195) + + W(6, -1118014037, 1033739398, -1103397016, 1053311986) + + W(7, -1103945702, 1033532379, 1037138557, -1101429240) + W(8, 1045510949, 1016118627, 1013880803, -1135069562) + + W(9, -1107878452, 1035472806, -1113746932, -1127637067) + + W(10, 1041047838, -1112788047, 996243233, -1114181077) + + W(11, 998962565, 1041113135, -1118755335, 1020627009); + sum2 = W(0, 998302909, -1114977231, -1111023634, 1036510752) + W(1, 1004185429, 1033017445, 1020343611, 1031676937) + + W(2, -1115901635, -1117993552, 1041465226, -1113757911) + + W(3, 1029585253, -1113383024, -1091081120, -1099268355) + + W(4, -1113451820, 1021941233, 1014080934, 1053648356) + + W(5, -1095412525, -1083342245, -1115471327, -1161911507) + + W(6, -1135780688, 1044096370, 1066870285, 1034064765) + + W(7, -1118193886, -1120710156, -1115911996, 1042686855) + + W(8, -1137657706, 1037488276, 1023464409, 1018751107) + W(9, 1018594371, -1108781434, 1043350243, 1014772326) + + W(10, -1138084208, -1148724493, -1117868456, 1037265024) + + W(11, -1126645268, -1113946823, 1032826620, 967002060); + WS(1069042774, 1023813606); + sum1 = + W(0, -1120569815, 1029846837, -1087916882, 1052864835) + W(1, 1037940281, -1130288148, 982627530, -1113747494) + + W(2, -1109565588, 1048856613, 1011966045, -1136915076) + + W(3, -1113058670, 1033076468, -1095056850, 1056781822) + + W(4, 1041407837, -1140629239, -1122018428, 1039673254) + + W(5, -1085384902, 1059308179, -1107102153, 1027021082) + + W(6, -1110295833, 1038190732, -1083727323, 1063669175) + W(7, -1111254596, 1007651659, 1021479086, 1046432919) + + W(8, -1089999723, 1059014405, -1110936865, -1128736648) + + W(9, 1001627822, -1134594983, -1094559921, 1044534497) + + W(10, 1025983877, -1139332738, 1013049407, 1016231853) + + W(11, -1094610970, 1048044230, 1046858383, -1114487747); + sum2 = + W(0, -1097118790, -1088323667, 1067177084, 1049654149) + W(1, -1120538318, -1108580354, -1115645027, 1030684990) + + W(2, 1056476304, -1101283859, 1040556965, -1124995735) + + W(3, -1109249259, -1081609065, 1058423359, 1057270411) + + W(4, -1122455315, 1017763415, 1042765123, -1082143899) + + W(5, 1061996602, 1055747315, -1120372038, -1114753052) + + W(6, 1015795842, -1102809807, 1006287482, -1087207348) + W(7, -1106540179, 1040354664, 1028792702, 1059694558) + + W(8, -1085597397, -1106003210, 1042932361, 1033427208) + + W(9, -1116071919, 1064574838, -1080103447, 1023804763) + + W(10, 1048665861, -1098395984, -1105083736, 1072110122) + + W(11, -1081832849, -1090423898, -1101912272, 1049644025); + WS(1046279854, -1100682627); + sum1 = W(0, -1155754074, -1109896894, -1107576521, 1035511522) + + W(1, 1002805634, -1116898232, -1110611178, -1114421037) + + W(2, 1030120983, -1124410677, -1114543731, 1008443967) + + W(3, 1022309140, -1131162933, -1094043528, 1038811574) + + W(4, 1039227081, -1122237996, -1118509483, 1027993210) + + W(5, 1062355587, 1059809630, -1113128332, -1141634631) + + W(6, -1109973457, 1040243222, -1094900881, 1038778317) + + W(7, -1105636701, 1021837827, 1022155595, -1117693450) + + W(8, -1110047247, 1046487839, -1109843880, -1132048229) + + W(9, -1107208537, 1019138289, -1110631994, -1127423844) + + W(10, 1004796827, -1123472091, -1117715774, 996768085) + + W(11, -1107567326, 1039784548, -1112311954, -1128081977); + sum2 = W(0, -1155805362, -1107499970, 1022847832, 1035480223) + + W(1, -1120962569, -1110725720, -1111094572, -1112866732) + + W(2, 1044264749, -1148903065, -1114470280, 1021130376) + + W(3, 1028160711, -1106540686, -1155674450, 1040150233) + + W(4, 1010774604, -1120260095, -1123681391, -1140638716) + + W(5, 1057429467, 1045538131, 1016428102, 1009992044) + W(6, -1116453985, 1031083122, -1106158641, 1031696457) + + W(7, -1120179561, 1026515918, -1119301235, -1139408516) + + W(8, 1025824831, 1041846065, -1118124541, -1114275912) + + W(9, -1108559318, 1034592379, -1111164410, -1120456085) + + W(10, 999266665, 1011855672, -1115385578, -1116565509) + + W(11, -1121519923, -1129601606, -1119709653, -1115407958); + WS(1068834358, -1130516755); + sum1 = + W(0, -1110020716, 1034114243, 1048068617, -1102038774) + W(1, 1031998965, -1110620517, 1043574522, -1095778974) + + W(2, 1048936405, 1035735542, -1100504766, 1038840817) + W(3, -1110718798, 1047457188, 1038642250, -1105529591) + + W(4, 1047377413, -1109142655, -1129582842, -1108460854) + + W(5, -1114164062, -1096174934, -1130108598, 1023990363) + + W(6, -1114402010, 1054126843, -1110583455, -1112126358) + + W(7, 1044906670, -1122044916, 1034180284, -1096959660) + W(8, 1049433806, 1043709316, -1095231947, 1040978881) + + W(9, 1025857751, 1033414773, -1136840220, -1123546692) + + W(10, -1130058411, -1131191403, -1111247176, 1039130852) + + W(11, -1119539023, -1112879245, 1035429307, -1115530151); + sum2 = W(0, 970017593, -1108165952, 1047729556, -1102798485) + + W(1, -1142827268, 1029593938, -1125442959, 1040753488) + + W(2, 1000227776, -1088611471, 1049645408, -1108133201) + + W(3, -1107196630, 1033984686, 1038319755, -1087549565) + + W(4, -1127043239, -1114349374, -1108273260, 1051322703) + + W(5, 1055231112, 1062180091, -1103837458, 1035145460) + + W(6, -1130992285, -1113564209, -1094436986, 1046553690) + + W(7, -1107399737, -1114342182, -1132402709, 1041587650) + + W(8, 986706110, -1113546876, 1048231400, -1146643074) + + W(9, 1017358256, -1116630320, -1124437274, -1120036494) + + W(10, -1124561928, -1126967424, 988670622, -1122802752) + + W(11, -1132214419, 1032902155, -1145381770, 1014303584); + WS(1067517750, 1033639701); + sum1 = + W(0, 1025509678, -1102897085, 1023511162, 1037697110) + W(1, 1039047450, -1138182274, -1130341242, -1132678155) + + W(2, -1096924480, 1040728225, -1108434135, -1138791135) + + W(3, -1120272434, -1151042791, -1094474081, 1056422164) + W(4, 1032921189, 1010850641, 1018375590, 1013105800) + + W(5, -1086033191, 1062857884, -1122589009, 1034477848) + + W(6, -1116961737, 1041000592, -1085852953, 1058773976) + + W(7, -1100255843, -1112868181, 1026047982, 1032243033) + + W(8, -1093123217, 1063905370, -1106044526, 1026848855) + W(9, 1025041660, 1034904976, -1093479580, 1050274619) + + W(10, -1112977566, -1122948276, -1113724179, -1123793645) + + W(11, -1091335173, 1057695471, -1136654701, 1007925107); + sum2 = + W(0, -1101843642, -1101718380, 1046322966, 1051125372) + W(1, -1084762721, 1050651985, 1047092496, -1110599661) + + W(2, 1053918698, -1092942861, 1058235489, -1101055683) + + W(3, 1032746990, -1127672356, -1100340247, 1040369276) + + W(4, -1090389206, 1050412863, -1098074183, -1096713677) + + W(5, 1069438643, 1074732797, -1088693676, -1103557505) + W(6, 1053786699, -1104014372, 1025809994, 1050011098) + + W(7, 1053232821, 1052371166, -1096910060, 1035860070) + W(8, -1083706115, -1080423025, 999981171, -1099152238) + + W(9, 1043846267, -1097849415, 1048729218, -1117530220) + + W(10, -1097505133, 1041618808, 1023876031, 1051096852) + + W(11, -1081917128, -1091623712, 1050919626, -1123531943); + WS(-1098378327, -1087753140); + sum1 = + W(0, -1149815841, -1123262056, -1110243207, 1053738584) + W(1, -1096838314, 1031741109, 1013838405, -1131552366) + + W(2, 997589309, 1025605911, 967065877, -1135340592) + W(3, 1021155853, -1114416842, -1104972312, 1049179692) + + W(4, -1108406847, 1029594857, 1025974631, -1103162646) + + W(5, 1042062186, -1094476095, -1130981226, -1145022506) + + W(6, 1017740987, -1099404299, 1057054155, -1093068705) + W(7, 1023051300, 1005012974, 1020943940, -1100518162) + + W(8, 1059648114, -1101042271, 1010424693, -1112923496) + + W(9, -1137239299, -1111541722, 1047878460, -1111532695) + + W(10, -1117167136, 1030823984, 1031952605, -1098489580) + + W(11, 1061110492, -1100382705, 989087216, -1113821120); + sum2 = + W(0, 1034695587, 1032463913, -1087773887, 1051055536) + W(1, -1103774157, 1028563957, -1111830177, 1031752431) + + W(2, -1104893727, 1010301739, 1003908630, -1111301343) + + W(3, 1017885202, -1103205904, -1114987001, 1045597173) + + W(4, -1107230114, 1042437558, -1116805918, 1045285117) + W(5, 1061301409, 1065451004, 999416278, -1106181364) + + W(6, 1039973913, -1092035876, 1053784665, 1036331211) + W(7, -1118216068, 1014136811, -1113376464, 1044034906) + + W(8, -1096259933, -1101789830, 1047293827, -1127455968) + + W(9, 1024357631, -1097056010, 1015259894, -1105822235) + + W(10, -1119063058, -1137717779, -1109344122, 1034392181) + + W(11, -1095057905, -1098597360, -1139528651, 1017174230); + WS(1051991511, -1090129628); + sum1 = + W(0, -1119709994, 1035475128, 1044364174, -1099369674) + W(1, 1024914253, -1122358045, 1028979258, -1103260128) + + W(2, 1048862488, -1107028176, 1037758213, -1142000036) + + W(3, 1025569135, -1099928080, 1054464119, -1092954947) + W(4, 1045752341, -1110227337, 1025156752, 1041023848) + + W(5, -1103260308, 1048877896, 992983000, 1023847918) + W(6, -1113846787, 1030722332, -1090331224, 1036105176) + + W(7, -1115078629, 1018698636, 1032797781, 1040845476) + W(8, 1039529634, -1113372113, 1027244574, -1139349278) + + W(9, -1137003429, 1015665914, 1015668019, -1104520406) + + W(10, 1039091960, -1131269376, -1126457798, 1032471277) + + W(11, 1036006161, -1111431094, -1113568276, 1004297100); + sum2 = + W(0, 1043798657, -1098470129, -1126555274, 1029962999) + W(1, 1028782608, -1131048284, -1117698064, -1096590117) + + W(2, 1047971790, 1037727050, -1161955740, 1022180969) + W(3, -1102458928, -1084690772, 1067678357, 1008201251) + + W(4, -1103096568, -1120783295, -1101130902, 1065791971) + + W(5, -1099131342, -1083609361, 1051059538, 1014843769) + W(6, 1025730631, -1123809426, 1036453804, 1031198846) + + W(7, -1106261335, 1020274026, 1026623851, 1019910088) + + W(8, -1117295572, -1113754273, 1041141064, -1111232000) + W(9, 995294154, 990258218, -1116170672, 1030505142) + + W(10, 1028328942, -1127235720, 1012466631, -1124630920) + + W(11, 1010360391, -1136369090, -1119361560, 1001879503); + WS(1062559660, 1025273829); + sum1 = + W(0, 1017067222, -1098241838, -1088364567, 1060515311) + W(1, -1113064427, 1036248943, 1004255955, 1041017424) + + W(2, -1088941672, 1050253504, -1115329590, -1133026900) + + W(3, 1019590495, -1136327879, -1087512143, 1064814036) + + W(4, -1113817187, 1008140615, -1109975080, 1032386976) + + W(5, -1083706517, 1061681108, -1146803505, 1028690158) + W(6, 1028071367, 1031451349, -1089169701, 1061080357) + + W(7, -1112935650, 1010963867, 1004648665, -1112687016) + W(8, -1092690193, 1052988124, 1031379878, 1030651112) + + W(9, -1129853588, 1040197151, -1100415991, 1048643203) + + W(10, -1112391860, -1133141907, 999516190, -1105242363) + + W(11, -1093365052, 1050105214, 1051227835, -1139571464); + sum2 = + W(0, 1058977722, -1088438618, -1070888183, -1080154822) + W(1, 1057028315, 1020668753, -1123128465, -1084402469) + + W(2, -1068917363, -1109667848, -1133954517, -1117456198) + + W(3, 1051128320, -1097793743, -1079680807, -1080832517) + + W(4, 1057005952, 1031920962, 1025585093, -1092491618) + W(5, 1078112044, 1058335327, -1089499329, 1039195999) + + W(6, 1043020026, -1090336452, 1082393215, 1067339759) + W(7, 1038175360, 1046190750, -1090331128, -1094761348) + + W(8, 1070728174, 1057746923, -1096630558, -1139309423) + + W(9, 1037612985, 1032815200, -1090225897, -1122043266) + + W(10, 1051816122, -1116527206, -1101815127, -1080227574) + + W(11, 1066541275, 1067785018, -1086032969, 1046599598); + WS(-1077531606, -1068627295); + sum1 = + W(0, -1124767318, -1113367136, -1095446127, 1055377197) + W(1, -1111814927, -1126382267, 1034866077, 1048375560) + + W(2, -1099058463, 1047513184, 1045191539, 1025894069) + + W(3, -1101653116, -1104403272, -1090535733, 1058086815) + + W(4, -1094831865, -1117513042, -1130761376, 1049955828) + + W(5, -1087242437, 1061174628, 1048150628, 1008100281) + W(6, -1106412978, 1036710310, -1088900579, 1057172304) + + W(7, -1103308083, 999030326, -1121310187, -1108970480) + + W(8, -1096168584, 1056094244, -1102502166, -1115104524) + + W(9, 1040285922, 1049541454, -1097928011, 1047121222) + + W(10, 1043510842, 1032672660, -1107982930, -1107579101) + + W(11, -1095682732, 1054731422, -1103869114, 1025118882); + sum2 = + W(0, -1118325221, 1021086937, 1043003051, -1131106753) + W(1, -1122810381, 1007248641, -1109912068, -1119472758) + + W(2, 1026424546, 1030452362, -1109242129, 1007569065) + W(3, 1055626857, 1051951524, -1092928023, -1103671831) + + W(4, 1034708821, -1112697430, 1073210842, 1028908234) + W(5, -1081037490, -1079815727, 1035695099, 1034566233) + + W(6, 1050117205, 1044779683, -1093398148, -1105002867) + + W(7, 1037167425, -1131640977, -1108710534, 1039597247) + W(8, 1044106321, 1032627417, -1112762482, 1020894429) + + W(9, 1007734945, -1130127373, -1119182686, -1108845723) + + W(10, -1120996683, -1144614067, 1005945059, 1026547602) + + W(11, 1019051617, 1023992222, 999154803, 1009135009); + WS(1027136184, 1037475189); + sum1 = + W(0, 1032095571, -1116112165, -1115063802, 1049018533) + W(1, -1119314641, -1107767341, -1100957374, 1049686375) + + W(2, 1038845436, -1106083983, -1120251015, 1047825580) + W(3, 1042581547, 1028374302, -1088748902, 1046087792) + + W(4, 1034473457, -1104556803, 1008293424, -1096656387) + W(5, 1053200569, 1044615055, -1099043646, 1041068763) + + W(6, -1125080965, 1033692244, 1024446429, -1090279185) + W(7, 1029265287, 1035958465, 1046313162, -1105885187) + + W(8, -1103807360, 1052904956, 1033985068, -1106489925) + + W(9, -1104233593, -1122471385, 1048125173, -1106111045) + + W(10, -1106885357, 1037737330, 1032243376, -1122778646) + + W(11, -1114896431, -1113201013, 1045413959, -1123269230); + sum2 = W(0, -1109957285, 1028061230, 1045893852, -1110812851) + + W(1, -1108048047, 1027149256, 1046942588, -1090820835) + + W(2, 1047577084, 1048459161, -1114577527, -1106934834) + + W(3, -1108558913, -1168618571, -1096957845, 1058947879) + + W(4, -1104353646, -1104794282, 1035311457, 1053631926) + + W(5, -1097789717, 1028951753, 1041621301, -1139749789) + + W(6, -1103268017, -1140171429, 1058472778, -1086199154) + + W(7, 1047742834, -1105608688, 1020819182, 1024395745) + W(8, 1010859305, 1018177816, -1101676043, 1051639728) + + W(9, 1017489970, 1034074787, -1118924631, -1119421881) + + W(10, 1030918112, -1105627073, -1119040125, -1111153127) + + W(11, -1134098557, 1029986143, -1110045381, 1035525353); + WS(1059085676, -1120419895); + sum1 = + W(0, -1123531970, 1010567522, -1140828475, 1031892193) + W(1, -1126417758, 1013434325, 1015920803, 1041225150) + + W(2, 1042370229, -1105839574, 1021017477, -1117671948) + + W(3, -1129458830, 1048968044, -1081896608, 1052692461) + + W(4, -1106306215, 1033141013, 1003238718, -1107069702) + + W(5, 1061545382, -1113798029, 1033163859, -1110196995) + + W(6, 1024388894, -1107554082, 1057779808, -1080962214) + W(7, 1051334024, 998455538, 1019022757, -1131681870) + + W(8, 1048429719, 1023517883, 974463598, -1104852314) + W(9, -1122285937, -1110859547, 1046207852, -1107360247) + + W(10, -1137698561, -1130795454, 1029064795, 1018058500) + + W(11, 1043188126, -1109838114, -1182091254, -1115208005); + sum2 = W(0, -1115293356, 1051048471, -1116254969, -1108089569) + + W(1, -1110389222, 1032305896, -1104270657, 1028093784) + + W(2, 1055158904, -1104160418, 1040482382, -1109483288) + + W(3, 995357221, -1111420185, -1108447426, -1113016550) + + W(4, -1105273681, 1033267880, -1140125409, -1087783718) + + W(5, 1057721666, 1060085251, -1106428297, -1106001572) + + W(6, 1041981586, 1031459460, -1086769840, 1041174723) + W(7, 1046452617, -1115354420, 1016190873, 1035133120) + + W(8, -1119040585, -1119561801, 1045849559, -1127947625) + + W(9, -1140513641, 1028088400, -1121069799, -1114345654) + + W(10, 1044224673, -1119205024, -1115228320, 1019234873) + + W(11, 1043934745, -1097229960, -1114369945, 1033437588); + WS(1063842732, 1069263660); + sum1 = W(0, -1134238373, -1122877239, 1055968823, -1099884346) + + W(1, -1101209335, 1027783644, 1031532473, -1122091454) + + W(2, 1052972391, -1097152888, -1119905385, 1025876211) + + W(3, -1118861678, -1114405937, 1057777668, -1088480739) + + W(4, 1025124405, -1133551424, 1031846503, -1102409375) + + W(5, 1062381841, -1081966868, -1166909210, -1124858713) + + W(6, -1118980549, -1105793535, 1065218890, -1086950263) + + W(7, 1045788186, -1146513705, 1032122901, -1113900631) + + W(8, 1051224528, -1107218856, -1105943487, -1136389690) + + W(9, -1119645672, -1129606328, 1055096307, -1097976619) + + W(10, 1011470685, -1153551023, 1033668289, -1133229476) + + W(11, 1043160456, -1106065147, -1107257190, 1024085520); + sum2 = + W(0, 1008982555, 1017497319, -1114050188, 1044555710) + W(1, -1115112254, -1121509319, -1122047761, -1119731325) + + W(2, 1048705804, -1104624717, -1113283108, 1042591944) + + W(3, -1134583037, -1110768789, 1082656561, 1041907901) + + W(4, 1041138312, -1111081434, -1137222855, 1050149080) + W(5, 1079097716, -1088882238, 1033378952, 1018241317) + + W(6, -1140752305, 1041298195, -1069364548, 1050137131) + + W(7, 1048604087, -1128224049, 1029699842, -1094516021) + + W(8, -1064571216, -1099205492, -1105508475, 1008906239) + + W(9, -1114302140, 1037679865, -1105564781, -1109039739) + + W(10, 1026612451, -1118064246, 1026101716, 1029205125) + + W(11, -1121859551, 1040352044, -1131493091, 1024193619); + WS(-1129102704, 1046511454); + sum1 = + W(0, -1120255189, 1003871004, -1098175836, 1060542285) + W(1, -1104614392, -1115237700, 992833099, 1029362986) + + W(2, -1113473140, 1049212165, -1118651222, 1012659838) + W(3, -1134582632, 988783847, -1095364566, 1059935407) + + W(4, -1098721330, 1026034940, -1138794987, 1011030567) + + W(5, -1091886143, 1049733902, -1109648580, -1116104473) + + W(6, -1157138191, -1118838023, 1040190185, -1115543549) + + W(7, -1121023765, 1036784617, -1129425099, -1111313565) + + W(8, -1149570791, 1041064643, -1105481630, -1112251695) + + W(9, 1016407976, -1115457860, 1037653026, -1119854466) + + W(10, 1031662787, 1023835049, -1133147960, -1106432157) + + W(11, 1038304902, 1029519512, -1111089569, -1170197274); + sum2 = W(0, 1039353726, 1022956598, -1093991321, -1088722171) + + W(1, 1039854738, -1125147089, -1104263814, 1036699032) + + W(2, -1154678850, -1088770012, -1104468959, -1123571799) + + W(3, 1027240563, -1120983737, -1089878099, -1101846939) + + W(4, -1096446657, 1036003207, 1028285409, -1106986979) + + W(5, 1067214284, 1070132546, 1029156699, -1117321918) + W(6, 1032871415, -1095425364, 1047639440, 1045480826) + + W(7, -1098508409, 1027342659, -1151417050, -1136397826) + + W(8, 1051464555, -1099095249, 1024325599, -1150420266) + + W(9, 1027668005, -1108376211, -1104038071, 1043597107) + + W(10, -1148394393, -1138411071, 1023556565, -1098325150) + + W(11, 1061372327, -1086577064, 1034968116, 1031516803); + WS(1056411607, -1109579684); + sum1 = + W(0, -1129654332, 1040494825, 1053570965, -1089114662) + W(1, -1099475130, 1041782836, 1027988397, 1031736403) + + W(2, 1051564806, -1091392278, 1018661052, 1032323860) + W(3, 1041758889, -1118623100, 1061336939, -1087917688) + + W(4, 1034322317, 1035039650, 1027974595, -1104250813) + W(5, 1057393436, -1081469355, -1094207710, 1010041732) + + W(6, 1038643645, 1023561152, 1065029703, -1091410052) + W(7, 1052271760, 1041287822, 1038422303, -1115303377) + + W(8, 1052932495, -1085011849, -1096091282, 1020820966) + + W(9, -1115801276, -1111435064, 1051403778, -1108233957) + + W(10, 1036087894, 1036272138, 1047822542, 1037517815) + + W(11, 1056446507, -1089621406, -1095410877, 1043391417); + sum2 = + W(0, -1114696922, 1063327246, -1085101471, -1089121793) + W(1, 1049274948, 1041765085, 1043151870, 1054945342) + + W(2, -1091433742, -1108711949, 1036153834, -1114908726) + + W(3, -1098855398, -1111180476, 1044429796, -1096795753) + + W(4, 1047012469, 1017938665, 1045725515, -1078968963) + W(5, 1071264665, 1049748951, 1043607695, -1113887753) + + W(6, 1041130784, -1075182266, 1068966947, 1041099170) + W(7, 1008700501, 1052357470, -1093166728, -1073598210) + + W(8, 1074573142, -1107925652, -1106854972, -1097732836) + + W(9, 1048010869, -1072729035, 1072575447, -1087601324) + + W(10, -1100708600, 1035577978, -1097485062, -1069127579) + + W(11, 1082548410, 1057234494, -1095686167, -1117196119); + WS(-1075355670, -1094395357); + sum1 = + W(0, -1145694380, 1013276974, 1022872131, -1102394069) + W(1, -1132601278, 1032373060, 1031927794, 1019602069) + + W(2, 1012968093, -1105184299, -1113047243, -1123577887) + + W(3, -1126751284, 1034056645, 1064151904, -1089968245) + + W(4, -1112578984, 1012023536, -1107635165, 1046251906) + + W(5, 1059501808, -1079531190, 1045395638, -1121043161) + W(6, 1035883256, -1107984424, 1052823861, 1040654683) + + W(7, 1035604635, -1136193989, -1117999477, -1101824103) + + W(8, 1052884390, -1097950850, -1102570614, -1124087697) + + W(9, 1025403748, -1117853800, 1040309473, 1021798787) + + W(10, 1016601958, 1014648876, -1135056568, -1113050587) + + W(11, 1051481338, -1099617206, -1117559395, 1015049529); + sum2 = W(0, 1040559153, 1024400373, -1112035522, -1102969122) + + W(1, 1035694118, 1008704054, -1099966915, 1050797480) + + W(2, -1095559069, 1048535090, -1109162397, 1035803260) + + W(3, 1051040838, -1098907063, -1091507667, -1081423354) + + W(4, 1049424016, 1032276403, -1122400566, -1088192853) + + W(5, 1063399780, 1065515124, -1098794388, -1104133833) + + W(6, -1103745056, -1132746540, 1064106398, -1104928579) + + W(7, -1097345189, 1042270874, -1127876766, 1040989775) + + W(8, -1101527709, 1047332946, -1105732065, -1109516912) + + W(9, -1118336076, 999475557, 1025976147, -1171911780) + + W(10, -1107661001, 1033064723, -1122216569, 1024449813) + + W(11, -1118033876, 1047796018, 1036172042, -1106418589); + WS(1045480366, -1089018411); + sum1 = W(0, 956510844, -1147321796, -1098659472, 1046172476) + W(1, -1117032824, 1012429717, 998930446, -1119371847) + + W(2, -1114996930, 1016456206, 1032232105, 992745416) + W(3, -1129843310, 1033743797, -1084286683, 1060852743) + + W(4, 1034928007, -1120189081, -1113698796, 1047477674) + + W(5, -1080535200, 1065762350, 1036591131, -1130833259) + W(6, 1001814128, 993727556, -1100569285, 1055209670) + + W(7, 1034876769, -1112309127, -1126854725, 1019312750) + + W(8, -1095107249, 1054147605, -1103835661, -1162991986) + + W(9, 1033942209, 991522899, -1106428344, 1040201800) + + W(10, -1110790848, 1019870324, -1127622934, -1124727396) + + W(11, -1102902186, 1049320503, -1117790432, -1166423644); + sum2 = + W(0, -1135134951, 1034251754, -1112699779, 1028246411) + W(1, 1024730233, -1141862453, -1138029383, -1112450391) + + W(2, 1048844919, -1097747276, 1036047504, -1116303285) + W(3, 1039247354, 1048129179, -1081251706, 1030988939) + + W(4, 1032907358, 1032058431, -1105815405, 1013788103) + + W(5, -1102377508, -1085286825, -1098604264, 1026446373) + + W(6, 1043479193, -1107279344, 1074756442, 1066704374) + + W(7, -1121203556, -1115422697, -1108579602, -1098532570) + + W(8, -1079912447, 1048531717, -1097926469, 1017067577) + W(9, 1036464698, -1125069222, 1051417558, 1025432715) + + W(10, 1037541288, -1110631868, -1114044136, -1106884027) + + W(11, -1113485350, 1037742006, -1114746618, 1026757097); + WS(-1129198960, -1098545020); + sum1 = + W(0, 1016631370, -1103774667, -1097986678, 1055978916) + W(1, 998946812, -1113370572, -1109829274, 1046260982) + + W(2, -1103302894, 1038066845, 1013747092, 1026014767) + W(3, 1027654249, 1041444949, -1085753764, 1057520791) + + W(4, 1033950432, -1133701934, -1135763497, 1029491033) + + W(5, -1087937321, 1064242364, -1108378356, -1124979408) + + W(6, 1022371969, 1033440347, -1100503343, 1043702710) + W(7, -1149232679, 1034434352, -1109147378, 1047095266) + + W(8, -1092825640, -1106434743, 1039795292, -1123636138) + + W(9, -1124465415, -1135425502, -1114972810, 1042256612) + + W(10, -1134578155, -1133096682, 1000105708, 1031236725) + + W(11, -1092048676, 1048160666, 1047273267, -1110866582); + sum2 = + W(0, 1008784866, -1101060946, 1045200185, 1046672822) + W(1, -1118279822, 994298612, -1111810524, 1041574978) + + W(2, -1109901275, -1123470329, -1113194466, -1124039764) + + W(3, -1123287968, -1113715041, -1111729623, 1052382391) + + W(4, -1105523541, 1016737108, 1035375858, 1009783259) + W(5, -1099551228, 1057574679, -1090683445, 1025528776) + + W(6, -1131156064, -1117592464, 1028611861, 1037781521) + W(7, -1089242363, 1044936991, 1014562100, 1037249766) + + W(8, -1126723736, -1088911756, 1057541056, 1038965019) + + W(9, 1013976797, -1112331585, 1048583824, -1105025107) + + W(10, 1046019340, -1100984465, 1025684805, -1116058888) + + W(11, -1123970907, -1098796399, 1058673062, -1111874598); + WS(1055927127, 1032414456); + sum1 = W(0, 1026777470, -1105922759, 1039415013, -1114467391) + + W(1, 1040445212, -1109775801, -1114645447, 1038629978) + + W(2, -1109541160, 1049649890, -1118657663, 1029983880) + + W(3, -1107741345, -1118870909, -1121580331, -1099187900) + + W(4, 1040802710, -1103039897, 1034928754, 1048976902) + W(5, 1035267569, -1110167275, 1050521172, 1032803169) + + W(6, -1106080726, 1031310464, -1092363639, 1044706191) + + W(7, -1122260014, -1106266495, 1036990351, -1116578243) + + W(8, 1052806443, -1098780639, 1045812872, -1117906236) + + W(9, -1132545269, -1121324945, -1105847083, 1042096283) + + W(10, -1112784326, 1031828590, -1115024108, 1028926430) + + W(11, -1107491323, 1026721601, 1028974308, -1113700371); + sum2 = + W(0, 1046339838, -1095477634, 1041080449, -1106007640) + W(1, 1022253579, -1124447256, -1116647554, 1046850624) + + W(2, -1097525459, 1046513622, -1109703351, 1043170037) + W(3, 1008329020, 1046890022, -1087064144, 1055899106) + + W(4, 1048733947, -1100004355, 1019769686, -1090363791) + + W(5, 1051375999, -1097018549, 1049672544, -1097807314) + + W(6, -1108633040, 1015468535, 1042338708, -1122727333) + + W(7, 1051464963, -1129042543, 1035617664, -1092874283) + + W(8, 1061754259, -1100515622, 1025134369, -1112390214) + + W(9, 1033551944, -1110478242, -1118600243, 1043707610) + + W(10, -1099218588, 1040819075, -1127469148, 1032940188) + + W(11, -1108757958, 1042447169, -1108633308, -1120359161); + WS(1063732396, 1030954530); + sum1 = + W(0, 1032126752, 1036299263, 1040042182, -1085959612) + W(1, 1040218780, 1017251428, 976782235, -1151769923) + + W(2, -1126287138, -1100277765, 1040546448, -1135394341) + + W(3, 1026279044, 1034233519, 1024363996, -1112294345) + W(4, 1044348191, -1164354746, -1118035684, 1048463661) + + W(5, -1092651532, -1099766629, 1048686290, 1013744650) + W(6, 1028715099, 1036782808, -1113174091, 1035866674) + + W(7, 1009750781, -1137751148, -1121995199, 1036166050) + W(8, -1102126950, 1049742039, -1118854702, 972654113) + + W(9, 1032435871, 1020132772, -1118937966, -1116277291) + + W(10, 1040779899, -1123833085, -1124714096, 1046021200) + + W(11, -1096484767, 1041160144, 1026126509, 1032085003); + sum2 = + W(0, 1023560328, -1114346246, 1055635447, 1080093571) + W(1, 1053265582, -1109086292, 1005726416, 1027252420) + + W(2, 1058379047, 1066361916, 1041698127, 1032891532) + + W(3, -1116687692, -1128267680, -1093219742, -1079490865) + + W(4, 1044641580, -1142986448, -1115408842, -1102003721) + + W(5, -1075825777, -1069273193, -1103412699, 1040229437) + + W(6, 1041728877, 1038357336, -1122468504, 1048667285) + W(7, -1104322529, 1008854368, -1105589301, 1012806192) + + W(8, 1043596623, 1040586029, -1123753164, -1116993440) + W(9, 1038558105, 1025579970, -1110943172, 1032482622) + + W(10, -1134749344, -1125346072, -1113100068, 1032847335) + + W(11, 1037773232, -1123027612, 1003892288, 1027065900); + WS(-1091386327, 1040820769); + sum1 = W(0, -1133727678, -1113530145, 1024243708, 1033777591) + + W(1, 1009482248, -1122301794, 1015097725, -1145488419) + + W(2, -1111308865, 1043618337, -1113425624, 1016257779) + + W(3, -1138479056, 1016526204, -1103657483, 1057905859) + + W(4, -1104459527, 1026855702, -1114059050, 1038143634) + + W(5, -1091368514, 1056300329, -1103801765, -1136938187) + + W(6, 1024901707, -1118547201, -1102986721, 1019703993) + + W(7, 1043076580, -1118577003, -1122041679, 1031893970) + + W(8, -1104061090, 1024793576, 1039830345, -1111999542) + + W(9, 1013224377, -1120867010, 1020963773, -1114932462) + + W(10, 1042855834, -1119434421, -1123564392, 1007161468) + + W(11, -1118947879, 1024563890, -1129826522, 1018984621); + sum2 = + W(0, -1118014300, -1107425211, 988341556, -1122432623) + W(1, 1031639360, -1122715046, -1121259996, 1034914627) + + W(2, 1058886039, 1043257251, -1110426684, 1030027284) + W(3, -1118480989, 1057124947, 1080471640, 1049657622) + + W(4, 981320073, 1022014335, -1120519112, 1041680563) + W(5, -1067455811, -1080970083, -1105451271, 1012008135) + + W(6, 1034237400, -1110358856, -1105716188, -1091987171) + + W(7, 1044181951, -1121988626, 1031820662, -1114315301) + + W(8, -1113532913, 1051213388, -1110623014, 1009779611) + + W(9, -1116062192, 1010762999, 1033223294, -1109593637) + W(10, 1038375349, -1113750985, 1029964608, 974005161) + + W(11, -1106855886, 1037720533, 999181553, 1028099986); + WS(1067317974, -1128063738); + sum1 = + W(0, -1112292702, 1047000343, -1088068429, 1048838463) + W(1, -1121751190, 1027999791, 1030230219, 1008892739) + + W(2, -1130941083, -1108932456, 1013016624, 1017454988) + W(3, 1023947008, 1038318737, -1122235065, 1034700890) + + W(4, -1165199081, 1031463650, 1037678225, -1110797816) + W(5, 1055157388, -1096039434, 1039874719, 1025959551) + + W(6, 1016840879, -1118959774, 1048428783, -1113377160) + W(7, 1031603132, 1018062255, 1032095758, -1114918032) + + W(8, 1052878187, -1088738347, 1045859362, -1143147336) + + W(9, 1018620213, -1114992535, 1036886165, -1100769370) + W(10, 1028131502, 1018649043, 1023530868, 1004226268) + + W(11, 1052393190, -1082056652, 1050791676, -1115371169); + sum2 = W(0, 1017164622, -1091622621, 1072454217, 1067262657) + W(1, -1098954881, 1035940089, 1004731920, 1038964407) + + W(2, -1113390241, -1112910469, 1038887363, -1108193245) + + W(3, 1046790460, 1049069382, -1080520963, 1044434118) + W(4, 1033204278, 1032575148, -1106036842, 1047078409) + + W(5, -1071223185, -1073367678, -1106847320, -1127548642) + + W(6, 1036535757, -1107481011, 1059476479, -1099641871) + + W(7, 967303234, 1015223408, -1103678216, -1098544714) + W(8, 1061994978, 1029437169, 1030771421, -1109007913) + + W(9, 1025471738, 1040369699, 1053016271, 1051233508) + + W(10, -1123160801, 1042599225, -1122761780, -1107095641) + + W(11, 1054597152, 1062276141, -1113404397, -1116480773); + WS(-1095745367, -1102532016); + sum1 = + W(0, -1117700567, -1131139906, -1104729417, 1031266390) + + W(1, 1034962037, -1123836106, -1125050061, -1115476726) + + W(2, 1049785973, 1034862742, -1108925228, 1021444802) + W(3, -1104292247, 1044141479, -1096094522, 1043929817) + + W(4, -1121682994, -1112024639, 1032821374, 1043020835) + W(5, -1103984511, 1025371182, 1003529681, 1036065165) + + W(6, -1106685309, 1033588284, -1135965844, 1048857569) + + W(7, -1108409524, -1114922921, 1026094807, 1027591436) + W(8, -1092452582, 1049600882, 1029191534, 1028793076) + + W(9, -1109617979, 1031476842, 1035377859, -1127826823) + + W(10, -1105178749, 1025559229, 1025256272, -1109574827) + + W(11, -1108767399, 1045422790, -1147768356, 1027631502); + sum2 = + W(0, -1102917712, 1050493331, -1104683957, 1038417192) + W(1, 1032679083, -1124032472, -1090566997, 1058641835) + + W(2, -1106617401, -1131983160, -1117134944, -1142618034) + + W(3, -1083236080, 1072277942, -1083529216, 1026745412) + W(4, 1030312114, 1033461689, -1084121809, 1066117726) + + W(5, -1110147426, -1097349120, 1045086676, -1107466492) + + W(6, -1092542987, 1055792221, 1053260074, -1092834676) + + W(7, -1115504874, 1040055828, -1098664696, 1016159325) + + W(8, 1047496584, 1048447149, -1103742512, -1111688997) + + W(9, -1115432352, -1123932559, 1040670728, -1110071086) + + W(10, 1018575153, 1025562022, -1109069600, -1107091755) + + W(11, 1041712051, 1019306140, 1023956348, 995416610); + WS(1064673964, 1027541745); + sum1 = + W(0, 1040862122, 1042078126, 1060076351, -1092017101) + W(1, 1050924251, -1113513485, -1105951646, -1104573695) + + W(2, 1032906140, -1098380055, -1103234546, -1122608113) + + W(3, -1159900529, -1104241265, 1059885441, -1084208856) + + W(4, 1045405901, -1114509959, 1047048323, -1111978190) + + W(5, 1064315273, -1089015252, 1048739529, -1097330869) + + W(6, -1123048515, -1096920009, 1059865934, -1085419935) + + W(7, 1043557114, -1130206982, -1115789415, 1032018046) + W(8, 1050382000, -1095947620, 1032498465, 1009712656) + + W(9, 1032172675, 1022298419, 1053685923, -1104053605) + W(10, 1042167691, 1011282770, 1028373376, -1099040790) + + W(11, 1058530774, -1087017843, 1037400788, -1099616447); + sum2 = W(0, 1039738446, -1137481092, 1036746190, 998253152) + W(1, 1035100433, 1029249908, -1096514001, 1050849818) + + W(2, -1109747727, -1102987618, -1144728624, -1103405950) + + W(3, 1067583917, 1017170018, -1078331016, 1044897487) + + W(4, -1108804343, -1113657787, 1083413745, 1041988969) + + W(5, -1066189939, -1088427847, 1030429056, 1049519441) + + W(6, 1067818743, -1115429835, -1077977055, -1125950758) + + W(7, -1124211336, -1105416150, -1089267428, 1054896719) + + W(8, 1050059376, -1108032139, 1045807183, -1104699523) + + W(9, 1050540062, -1108022123, -1119460484, 1038480975) + + W(10, -1164441214, 1036658641, -1102208699, 1042362507) + + W(11, -1140654200, -1098634922, -1138500556, -1135144148); + WS(-1084384556, -1100810808); + sum1 = + W(0, 1032390692, -1101205955, -1095424763, 1056723089) + W(1, -1103994552, -1113631133, 1033258828, -1114756638) + + W(2, -1115332589, 1044941511, -1103092466, 1038921097) + W(3, 1036208454, 1034225747, -1089988048, 1059213081) + + W(4, 1016698694, 1041433148, -1153131217, -1123281148) + + W(5, -1080799329, 1058219330, -1102604175, 1029458059) + W(6, 1035878930, 1052764669, -1086451675, 1065718984) + + W(7, 1010056863, 1046701849, -1113170436, -1110681734) + + W(8, -1089641270, 1051546784, -1105901266, -1114617106) + + W(9, 1034716867, -1132636906, -1111992667, 1048907447) + + W(10, -1104305197, 1022756491, 1008810370, 1010363102) + + W(11, -1089944572, 1051751177, 1025009925, 1042675492); + sum2 = + W(0, 1003892755, -1106845972, -1130802613, 1042856976) + W(1, -1090921483, 1035865477, -1123544386, 1040443741) + + W(2, -1099129814, 1046395618, -1117156960, -1128022157) + W(3, 1040199158, 1041556166, 1034864555, 1049252005) + + W(4, -1101642940, 1029291710, -1099919704, -1098641213) + + W(5, 1044973210, 1051151809, -1094482604, 1033022953) + W(6, 1046124074, 1043027552, 1046986152, -1120255910) + + W(7, 1049495257, 1028643166, -1114201767, -1102894487) + + W(8, 1040886200, -1098031952, -1113386876, 1031856421) + + W(9, -1137485313, 1031291554, -1097152858, -1103612985) + + W(10, -1132183417, -1106468948, 1018916205, 1029095170) + + W(11, 1035815457, 1034278467, 1026205590, 1020718685); + WS(-1097545175, -1081485407); + sum1 = W(0, 1036015463, 1034150438, 1057909318, -1085934785) + W(1, 1035924004, 1033963168, 1036906638, -1107749409) + + W(2, 1041348600, -1091465804, -1120240750, -1115496908) + + W(3, 1033438601, 1025404544, 1058891216, -1088212159) + + W(4, 1040930578, 1023737332, -1148467606, -1130068931) + + W(5, 1062920275, -1085185870, 1044931644, 1034996968) + + W(6, -1116883369, -1110996648, 1058446394, -1083820335) + + W(7, -1110014855, -1109644005, 1043481795, -1129433572) + + W(8, 1057530498, -1097522985, 1046628778, 1044774695) + + W(9, -1107248831, -1112307200, 1041812911, -1098817647) + + W(10, -1112689630, -1109394217, 1035852867, -1133343797) + + W(11, 1058924580, -1088845017, 1031062731, 991839047); + sum2 = + W(0, -1085422352, -1096165509, 1057773529, -1090487063) + W(1, 1044955346, 1048850208, -1073537081, -1097506962) + + W(2, 1050305732, 1042351306, -1096519547, -1098077313) + W(3, -1090145600, 1035648589, 1070820271, 1042246902) + + W(4, 1012707558, 1040847724, -1103264022, 1037776879) + W(5, 1064009122, 1057815879, 1035447241, 1039355809) + + W(6, -1098060014, -1106454260, -1102687355, 1056492080) + + W(7, -1098251612, -1128314591, 1039561049, -1132410675) + + W(8, 1037201191, -1132940238, 1055262754, 1034334977) + + W(9, -1111561066, -1095706003, -1106323076, 1047053654) + + W(10, -1098685023, -1107078913, -1097648243, -1093619532) + + W(11, 1065891070, -1104795343, 1017091667, 1044351754); + WS(-1079771574, 1075069839); + sum1 = W(0, 1032166989, -1118529175, 1055702741, -1092970539) + + W(1, -1117950828, -1113551430, -1111167476, 1021133405) + + W(2, 1048648437, -1092445782, 1033539229, -1142239002) + + W(3, -1123178206, -1093562003, 1062276827, -1088433116) + + W(4, -1104828877, -1110397888, 1032734815, -1104246611) + + W(5, 1071187580, -1118830448, -1114529494, -1119974813) + + W(6, 1029165557, -1098100446, 1061220800, -1097110780) + + W(7, -1112849365, -1139975764, -1112820125, -1114628655) + + W(8, 1049405702, -1093425874, 1023590922, -1111568211) + + W(9, -1135576864, -1133453360, 1042497282, -1109854691) + + W(10, 992807281, 974859957, -1114861753, -1119446905) + + W(11, 1051379002, -1094782027, -1104282614, -1107804608); + sum2 = + W(0, -1108373663, 1058265898, -1105791278, -1097633487) + W(1, 1040059628, 1041333883, 1008476139, -1092753943) + + W(2, -1098184263, 1036219103, -1096394805, -1106791026) + + W(3, -1104454680, 1052155727, -1106622092, 1039768739) + W(4, 1043022076, 1034264586, 1048684757, -1094584251) + + W(5, 1051207789, 1059813061, -1089567461, 1043988712) + W(6, -1093428939, 1044002260, 1040647411, 1037804238) + + W(7, 1053976656, -1094438691, 1053127551, 1027921055) + W(8, -1092246791, 1058749571, -1109099791, 1046191998) + + W(9, -1096653504, -1109762731, 1028193495, -1091622541) + + W(10, -1128546349, -1105323978, 1052320010, -1093142139) + + W(11, -1102532444, 1056925595, -1105143398, 1027895571); + WS(-1080085654, 1070612946); + sum1 = + W(0, 1024243697, -1111019203, -1099863023, 1051294614) + W(1, 1018553499, 1001929499, -1118763904, 1017624971) + + W(2, -1093242824, 1049086721, -1110032568, -1129474933) + + W(3, -1129448298, 998267115, -1096657110, 1054244159) + + W(4, -1107816470, -1103030358, -1125372076, 1049904509) + + W(5, -1083526619, 1063525996, 1050014740, 1034265690) + W(6, -1130529432, 1044325341, -1084273056, 1060516258) + + W(7, -1120428313, 1028809164, -1113325424, 1036368923) + + W(8, -1089131829, 1052725165, 1042939778, -1137068591) + W(9, 1028582463, 1022422215, -1100657096, 1045179095) + + W(10, -1105723916, 995816851, -1115811081, 1029887376) + + W(11, -1095229037, 1047234173, 1034815359, 1023513072); + sum2 = + W(0, 1027629060, -1114994936, 1044032624, -1120685804) + W(1, 1028515824, -1128032940, 1025235210, 1042086314) + + W(2, -1110089750, 1041443307, -1109301707, 1028557604) + + W(3, -1125103522, -1109536432, 1027377576, 1041551160) + + W(4, 1038947979, -1095260923, -1130467636, 1043514454) + W(5, 1040857173, 1078385077, -1069674168, 1041445431) + + W(6, 1000499297, -1116030212, 1049094807, 1071531230) + W(7, -1072009528, 1044521776, 1020465580, 1034381305) + + W(8, -1102199059, -1097624207, 1042363648, -1111317304) + + W(9, -1138182160, -1133867384, 1040209410, 1042950964) + + W(10, -1100125727, 1026231984, 1014590712, 1032991925) + + W(11, -1107097337, -1110894461, 1020171064, -1115643044); + WS(-1094677847, 1071331518); + sum1 = + W(0, -1108370254, 1050522446, -1090412394, 1059909490) + W(1, -1131659964, 1045668787, 1027561514, -1102155388) + + W(2, -1096900614, 1033926552, -1108396174, -1099365497) + + W(3, -1098732332, 1051883284, -1088235704, 1060113893) + + W(4, -1104366446, 1036562753, -1118400576, 1052444946) + W(5, -1081990504, 1065971548, 1016076284, 1045429432) + + W(6, -1098654343, 1048632224, -1083209683, 1057796766) + W(7, -1094058291, 1027649399, 1020701942, 1036179296) + + W(8, -1095695623, 1055081266, 1044926426, -1115499084) + + W(9, -1114608461, 1043748547, -1096819473, 1048355474) + + W(10, -1102235562, 1020598162, -1102267173, 1046936641) + + W(11, -1088405429, 1057781222, -1122894705, 1041162909); + sum2 = W(0, -1102306900, 1042298222, 1021581591, -1097774924) + W(1, 1051289508, 1044651623, 1039340517, 1011988276) + + W(2, -1091773202, -1083547573, 1050176620, 1060043835) + + W(3, 1036459319, -1104503945, -1086047943, -1076320925) + + W(4, 1066124397, 1072230275, 1032447827, -1099734245) + + W(5, -1071833762, -1070847208, 1069182349, 1080482397) + + W(6, -1099327251, 1032726509, -1087097517, -1079468544) + + W(7, 1058492703, 1067612874, 1052797014, 1032685073) + W(8, -1094401898, -1084551630, 1061745990, 1060905071) + + W(9, -1122082449, 1045894449, -1089730964, -1096261152) + + W(10, 1029975482, 1035766217, -1098452307, 1031495568) + + W(11, -1094627244, -1097740674, 1062784611, 1036170953); + WS(-1075403638, -1098484659); + sum1 = + W(0, 1008218969, -1141902179, 1045612697, -1098198307) + W(1, -1115216112, 1014117412, 1031048294, 1016567255) + + W(2, 1051272109, -1097302541, 1045061466, 1032424528) + + W(3, -1123131291, -1102849342, 1045999461, -1089514713) + + W(4, -1110301017, -1117285565, 1045188911, 1018204181) + W(5, 1067242878, -1083074125, 1055219615, 1039741805) + + W(6, -1123984690, -1097928441, 1057827361, -1090995143) + + W(7, 1042127630, -1138318152, 1016373997, -1102752375) + W(8, 1039307800, -1089521325, 1018247819, 990571063) + + W(9, 1038284987, -1119205801, 1052680217, -1111274305) + W(10, 1043301703, 1035406958, 993498863, -1111653947) + + W(11, 1048974634, -1089001074, -1112012330, 991913685); + sum2 = W(0, 1032878555, -1136655860, 1031615749, -1113247036) + + W(1, -1102324444, 1033821831, -1123900432, 1023477569) + + W(2, 1032172309, 1040892014, 1041677780, -1122117887) + W(3, 1029453781, 993352402, -1101515073, -1093800370) + + W(4, -1099560811, -1111754290, -1120535472, 1007422372) + + W(5, 1063516373, 1059067488, -1101931537, -1118366060) + + W(6, 1040510410, -1110556919, -1114866186, 1049733424) + + W(7, -1091691569, -1131546954, -1109544472, -1138628828) + + W(8, -1101753271, -1112478460, -1137342172, 1043560141) + + W(9, 1036988619, -1137184004, 1046060209, -1128679382) + + W(10, -1112686045, -1110783375, -1113659102, 1035524869) + + W(11, -1110546008, -1106958218, 999359657, 1026284797); + WS(1018938736, 1060529869); + sum1 = + W(0, -1114076530, 1037607357, 1045714156, -1166977277) + W(1, -1102825464, 1003359422, 1008598737, -1120766810) + + W(2, 1044442542, 1024586298, 985048227, 1019680094) + W(3, 1029696041, -1113865353, 1057228923, -1084710912) + + W(4, 1048654858, -1122235660, -1118730378, 1019917938) + W(5, 1049628033, 1048848151, -1109196652, 1032134772) + + W(6, 1036159584, 1041300082, -1081803320, 1052825757) + W(7, -1101625253, 1023446241, 1012443924, -1119787914) + + W(8, 1042784239, -1110703311, 1021290536, 1025035938) + W(9, 1034257674, -1102751430, 1016494120, -1108049641) + + W(10, 1039349129, -1122160707, -1133449206, 1018668169) + + W(11, 1031639516, -1111061268, 1018121541, 1028394476); + sum2 = W(0, -1146778654, 1033337426, -1101837401, 1023259685) + + W(1, -1165315756, 1017616818, 1009999826, -1117028555) + + W(2, -1107426057, -1113617305, 1042598704, -1119841713) + + W(3, 1042445717, -1092512747, -1106280442, 1063964069) + + W(4, -1101414619, 1033400884, 1027431889, -1100415596) + + W(5, 1063146362, 1058459732, -1136881638, -1110929163) + + W(6, -1118789096, -1100913756, -1086144449, -1101061454) + + W(7, -1099212326, 1043348599, 1034933139, 1031653672) + + W(8, -1121957589, -1139753565, 1035181957, -1110261015) + + W(9, -1102516465, 1049736808, -1104136456, 1034026738) + + W(10, 1031514566, -1119239229, 1039233552, -1123203031) + + W(11, -1123128270, -1143324392, -1103321885, 1032933914); + WS(1057790316, -1113005641); + sum1 = W(0, -1123468520, 1034968039, -1107161453, 1035442035) + + W(1, -1107573447, 1040623159, 1016842734, -1180462681) + + W(2, -1109372445, -1126387193, -1110219956, 1001956750) + + W(3, 1017823704, -1141354132, -1097660717, 1045333479) + + W(4, -1111689634, 1034883047, 1031334797, -1111663246) + + W(5, 1060769665, -1097006056, 1016331253, -1121213099) + + W(6, 1029517379, -1096962432, 1062816796, -1085875335) + W(7, 1042106418, 999707420, 1032276500, -1102337515) + + W(8, 1057344992, -1087615930, 1042154624, -1110655190) + + W(9, 1024531558, -1112383277, 1050585827, -1099229077) + + W(10, 1021430571, 1027213229, 1032934320, -1115337892) + + W(11, 1048852876, -1104188731, 1007994773, -1125102461); + sum2 = + W(0, 1027505374, 1006384241, 1038217397, -1102596332) + W(1, -1088198084, 1054112069, -1119743709, 1032385395) + + W(2, -1124784904, 1055108824, -1101781911, -1110486748) + + W(3, 1022357321, -1110226975, 1036163691, -1095998103) + + W(4, -1077861124, 1063443275, -1129702556, 1009992786) + + W(5, 1038388564, -1096068823, -1082211809, 1060625206) + + W(6, 1025908250, -1117540770, -1107551412, 1027585837) + + W(7, 1067639721, -1095993202, -1116658238, 1015280579) + W(8, 1043662427, 1052401764, 1063419774, -1084462256) + + W(9, 1007287620, 1013929491, -1110875284, -1112465838) + + W(10, 1041946916, 1040943353, -1132286080, -1123806249) + + W(11, 1032548500, 1049275421, 1054953848, -1088060635); + WS(1057314092, 1031126097); + sum1 = W(0, -1113264897, 1047286031, 1051515170, -1091122512) + + W(1, 1035529026, -1125355113, 1042921300, 1038748523) + W(2, 1044373061, -1119263830, 1016709539, 1029610636) + + W(3, -1118216326, 1040626286, 1057172805, -1081529962) + W(4, 1043973191, 1029771624, 1036778483, 1048990343) + + W(5, -1111196723, -1076778621, 1049229393, -1112976162) + + W(6, 1035020911, 1036098930, 1053560288, -1085296972) + W(7, 1046938632, 1041831703, -1108928881, 1042428894) + + W(8, 1053915059, -1112351005, -1123285785, -1103970660) + + W(9, 1036290288, 1016422504, 1044112316, -1100809115) + W(10, 1035877806, 1044125996, 1012907241, 1041504752) + + W(11, 1048512334, -1107245185, -1123727583, -1123672296); + sum2 = + W(0, -1110279027, -1116888772, 1033554595, -1108669327) + W(1, 1015808739, 1027314221, 1044898087, -1115916604) + + W(2, -1111117443, 1045218355, -1106102431, -1116221606) + + W(3, -1096945908, -1107202311, 1035778723, -1103416606) + W(4, 1041666953, 1024714919, 1039092196, 1045884385) + + W(5, -1113256321, 1065794034, -1107247823, -1098987808) + + W(6, -1113241071, -1103988696, -1115920774, -1090569433) + + W(7, 1049716916, 1049758114, -1099008684, 1049112986) + + W(8, -1139004677, 1055599565, -1106016659, -1091770239) + + W(9, 1044106833, -1123386226, -1112080343, -1099737884) + + W(10, 1031391070, 1053158291, -1103116396, -1134049769) + + W(11, -1114347803, 1044073709, -1108799013, -1105054228); + WS(-1078536214, 1004530797); + sum1 = W(0, 1027375859, -1101920227, 1025777344, 1044389645) + W(1, -1110651587, 1018050380, 1017444741, 1031432968) + + W(2, -1103056081, 1048716617, -1137653198, -1125959547) + + W(3, -1120029560, -1106564803, -1107107772, 1051154454) + + W(4, -1107166356, 1019570356, 1000019140, 1048371434) + + W(5, -1082038651, 1066595410, -1130046381, -1125449965) + + W(6, -1114543207, -1118108517, -1087843395, 1055824714) + + W(7, -1105265945, 1033235496, 1013557197, 1042631950) + + W(8, -1090887738, 1054676989, 1030116559, -1126792065) + + W(9, -1122136338, 1018155212, -1103973545, 1041035048) + + W(10, 1023828021, -1122035417, -1148145247, 1016114854) + + W(11, -1102912795, 1016296812, 1040191506, -1142050258); + sum2 = + W(0, 1027035378, 1026113022, -1111907604, -1117578886) + W(1, -1140538653, 1018825264, -1132701065, 1034348094) + + W(2, 1044772326, 987582920, -1136739637, 1015195718) + W(3, -1114259972, 1019537869, -1115459713, -1091701771) + + W(4, -1106970806, 1029368182, 1024730711, -1139897757) + + W(5, -1083470341, -1064559027, 1035917626, -1129456736) + W(6, 986229224, -1140805741, 1024256637, 1083026821) + + W(7, 1051507274, 1025912688, 1013886009, -1118691470) + W(8, 1050967575, 1062853848, 1035548789, -1107265712) + + W(9, -1129435432, 1009541569, 1013091981, -1097816209) + + W(10, 1030786995, 1031335734, -1141056234, -1129312440) + + W(11, 1028089297, 1036032976, -1102804079, -1115863996); + WS(1053759831, 1032374114); + sum1 = + W(0, 972229904, -1121671930, 1054724605, -1092393035) + W(1, 1018308729, -1129466650, -1109228149, -1112583528) + + W(2, -1112974870, -1101934908, 1027590122, 1029939172) + W(3, 1034978575, 1034149296, 1061211455, -1092299422) + + W(4, 1034030567, -1113423424, 1041049216, 1029443406) + W(5, 1048871720, -1093922438, -1111847647, 1038901743) + + W(6, -1120680758, 1029497788, 1059772533, -1094240300) + + W(7, 1041914031, -1106909549, 1029299866, -1098177536) + + W(8, 1052523382, -1091878482, -1113968603, 1040006220) + + W(9, -1104501302, -1147362406, -1143596105, -1112394307) + + W(10, 1028043992, -1140269215, 1034604409, -1129221556) + + W(11, 1050895314, -1097371836, -1126204095, 998375305); + sum2 = + W(0, -1150066816, -1150466984, 1023864952, -1110066210) + W(1, 1023825317, -1124858867, -1122485893, 1032358015) + + W(2, -1120268940, 1042589181, 1027888694, -1130877326) + + W(3, -1114773240, 1050624105, -1092046440, 1028033997) + + W(4, -1151325172, -1135440218, 1024800634, 1086980496) + + W(5, -1062089081, -1086968029, 1038366470, -1140173368) + W(6, 996682648, 1049846439, -1092228228, 1024724796) + + W(7, -1121152696, 1022098295, -1123997738, -1116390866) + + W(8, 1022750294, 1041929871, -1118357792, 1017805328) + W(9, 1019128023, 1016213345, 1015347232, -1114433107) + + W(10, 1031614438, -1119866592, -1127165963, -1127449775) + + W(11, 1000268865, -1139900476, -1136800730, 1025894920); + WS(1063005484, 1009613411); + sum1 = W(0, 1023879031, -1099686359, 1052137768, 1046007565) + + W(1, -1104853008, 1005501772, -1116658758, -1121736524) + + W(2, 1047373394, -1096622041, 1032552475, -1119708225) + + W(3, 1016433014, -1101192360, 1060630207, -1088767007) + + W(4, -1107092245, -1118541917, 1020845368, -1114633573) + + W(5, 1066103327, -1084203279, 1041296218, -1116191290) + + W(6, 1014732617, -1095872969, 1062822060, -1090088910) + + W(7, 1027255591, 1022596005, -1133265115, -1106126199) + + W(8, 1059882369, -1090037515, -1131360500, -1115075910) + + W(9, -1123204757, -1114430117, 1050777829, -1103091289) + + W(10, -1122457924, 1022253604, 1010953149, -1135381594) + + W(11, 1058652475, -1092699671, -1108476770, -1114779071); + sum2 = + W(0, 1018912922, 1027578843, 1033809184, -1094114890) + W(1, -1100264522, -1107937173, -1115513506, -1122957333) + + W(2, -1107632498, 1001948858, 1024979575, 996744179) + W(3, 1035927400, 1036987406, -1102289594, 1056054406) + + W(4, -1110397259, -1110541201, -1104590179, -1100032664) + + W(5, 1055409420, 1048553428, -1121465059, -1114033403) + W(6, 1040736265, -1110179624, 1055289786, 1051787669) + + W(7, -1147035242, 1041331760, -1107721632, -1125426330) + + W(8, -1112721987, -1094744691, 1045301422, -1113161109) + + W(9, 1025188211, -1103038285, 1036015348, 1032878358) + + W(10, -1131004626, -1108457170, -1112944230, 1043904940) + + W(11, -1110109848, -1118996080, -1124556646, 1028913131); + WS(-1088267692, 1058858468); + sum1 = W(0, -1112379634, 1030761249, 1050799110, -1099191401) + + W(1, 1033839199, -1111715307, 1029162995, 1004146884) + W(2, 1045209037, -1095201623, 981108966, 1002675052) + + W(3, -1140270752, -1122297718, 1060358148, -1091143388) + + W(4, 1043211072, -1120236170, -1141342278, 1040340278) + + W(5, 1057423488, -1083233902, 1032118859, -1118812007) + + W(6, -1155361775, 1036240851, 1058630580, -1098879351) + + W(7, 1032700466, 1017441470, -1113272402, -1129581456) + + W(8, 1044230767, -1096101987, 1030417559, -1148456452) + + W(9, -1140210569, 1042224566, -1104611845, -1122560784) + + W(10, -1140846885, -1124250818, 1015213304, 994119730) + + W(11, -1109863885, -1109785875, 1009738084, -1131671893); + sum2 = + W(0, -1113443438, 1021821461, -1122246023, -1146734500) + W(1, -1128559783, -1125128801, 1023812788, 1044300514) + + W(2, -1104002403, 1024116650, -1115812447, -1134793082) + + W(3, -1131593767, -1104994806, 1038382254, 1049088332) + + W(4, -1105739887, -1131329759, -1112801638, 1060658961) + + W(5, -1099826888, -1089766136, 1025686404, 1034755166) + W(6, -1105757720, 1044687616, 1066144433, 1042354562) + + W(7, -1117250635, -1114066084, -1107427633, -1087213560) + + W(8, 1057721748, -1129092349, 1035306954, 1031417186) + + W(9, -1107137147, -1093064102, -1097329958, 1044181933) + + W(10, -1106719387, -1117543346, 1049954034, -1093773442) + + W(11, 1039897892, -1105235988, 1040800736, -1128306605); + WS(1042978478, 1051058289); + sum1 = + W(0, -1105382966, 1032357083, -1099095273, 1056839899) + W(1, -1122236410, 1024938700, 1035278676, 1023966158) + + W(2, -1110890773, 1050844654, 1019140850, -1122653091) + + W(3, -1102112151, -1113794846, -1087097191, 1060126757) + + W(4, -1107056589, -1122892351, -1102711504, 1052488610) + + W(5, -1081382120, 1063120271, -1150461374, -1119829084) + + W(6, -1113404858, 1046698475, -1086454106, 1060142827) + + W(7, -1103751812, 1007453138, -1111458068, 1025992701) + + W(8, -1097860134, 1054395618, -1120625160, -1106500335) + + W(9, 1038103836, 1042579843, -1105233954, 1048420817) + W(10, 1025690583, 1032216339, -1101215220, 1026938905) + + W(11, -1091436978, 1056719627, -1112415123, 1010325087); + sum2 = + W(0, 986675002, -1098687074, 995475005, -1115454695) + W(1, -1110655697, 1018060844, -1112567440, 1041166801) + + W(2, -1097111455, -1132868679, -1114152579, -1101427932) + + W(3, 1033516168, -1099338776, 1062728692, -1120015952) + W(4, 1023628514, 1045275251, -1097585273, 1019263552) + + W(5, 1066654492, 1054175336, -1097107880, -1098419362) + W(6, 1031672616, -1108963639, 1034345380, 1038275134) + + W(7, -1118909665, 1049612882, 1029453516, -1104454378) + + W(8, -1124466258, -1096244103, 1049130538, -1100742770) + + W(9, -1106591291, -1109440188, -1092068145, 1039104546) + + W(10, -1093717692, 1000911598, 981423732, 1024870634) + + W(11, 1048117247, -1119846616, 1042494451, -1140045927); + WS(-1097071959, 1072623846); + sum1 = + W(0, 1026094363, 1010541764, -1087379998, 1028105114) + W(1, 1046169702, -1135291959, -1114366565, 1023927408) + + W(2, -1100421531, 1051865478, -1105971271, -1125008010) + + W(3, 1026372640, 1034504814, -1091057687, 1057630379) + W(4, 1041980470, 1012391943, -1127140609, 1038480008) + + W(5, -1086095524, 1064048287, -1104375598, 1011497593) + W(6, 1020533357, 1046400295, -1083615449, 1061319210) + + W(7, -1113258549, -1126436590, -1164090721, 1040252248) + + W(8, -1089701071, 1056141130, -1149976804, 1008261592) + + W(9, -1143600974, 1040642843, -1093225865, 1049626241) + + W(10, -1120849689, -1126913777, 993477434, 1032065949) + + W(11, -1090227881, 1047950199, 1048635489, 1026040178); + sum2 = + W(0, 1012042818, 1045767812, -1063490706, -1101778611) + W(1, -1112785819, 1012780879, 1021360395, -1098807230) + + W(2, -1081789373, -1114664520, 1033999610, 1021391529) + W(3, -1107209227, 1044720012, 1074390906, 1049922132) + + W(4, -1110057121, -1130294258, 1027378250, 1042281858) + W(5, 1079115935, -1094869567, 1045861970, 1031106226) + + W(6, -1105921842, 1016599978, 1058048856, -1118592797) + + W(7, 1034050759, -1122317121, 1041772128, -1127223374) + + W(8, -1108782236, -1121088549, -1129798306, 1025125464) + + W(9, -1127367716, -1115783877, -1128008216, -1137516645) + + W(10, 1033434514, -1149825053, -1127019644, 1027816090) + + W(11, 1032186898, -1111496361, -1136389257, 1031888403); + WS(-1103312814, -1092017335); + sum1 = W(0, -1127703091, -1101144796, 1054306333, -1126801713) + + W(1, 1023075088, 989243788, 1018146514, -1115460311) + W(2, 1039997174, 1026063214, 1027296474, -1122138660) + + W(3, -1121799134, -1106383343, 1024945271, -1131110905) + + W(4, -1132250026, 1032388672, 1017072510, -1133230391) + + W(5, -1088288960, 1059941407, -1113425894, 982272924) + + W(6, -1131539818, 1034901054, -1088630417, 1061737047) + + W(7, -1097522374, 1021266411, -1112727125, 1027622014) + + W(8, -1096972398, 1057455253, -1097018828, -1135487483) + + W(9, 1017229265, 1018005017, -1101045202, 1048616129) + + W(10, -1112216732, -1117699788, -1109754009, 1034197373) + + W(11, -1098488314, 1051714580, -1142469689, 995701513); + sum2 = W(0, 1015133861, -1105189044, -1093732269, -1094162786) + + W(1, -1109540616, 1030009493, 1037113072, -1115931822) + + W(2, -1102686481, -1113599737, 1040400825, -1114813207) + + W(3, -1114822552, -1098364789, 1041653115, 1041635421) + + W(4, -1118060506, 1027616765, 1028491873, -1113527921) + + W(5, 1065997875, 1064813540, -1123044799, -1116517245) + + W(6, -1105557810, 1024139573, -1130473293, 1037786550) + + W(7, -1105450383, 1021543889, 1038591890, -1112247318) + + W(8, -1098512761, 1036196676, -1106060901, 1005718790) + + W(9, -1105949948, 1007564387, -1125828389, -1114108758) + + W(10, -1118151575, -1122858549, 1032708826, -1120342494) + + W(11, -1125733097, -1132225813, 1010062515, 1029912177); + WS(1054094679, 1053313313); + sum1 = + W(0, -1133457438, -1123263126, 1046955525, -1124016519) + + W(1, 1039131745, -1120644986, -1120986711, -1130558777) + + W(2, 1034791245, 1044418743, -1102726978, 1007447622) + W(3, 1034987238, -1097984725, 1059693200, -1102062065) + + W(4, -1136653836, -1119688622, 1028444909, -1096198619) + + W(5, 1056744813, -1098120268, -1097784815, -1150742701) + + W(6, 1035642130, 1007426149, -1111414009, -1111514230) + W(7, -1121981368, 1028941155, 1018062863, 1002886242) + + W(8, 1030506913, -1104691680, 1038384226, -1124505481) + + W(9, 1034710931, -1123562906, -1113706235, 1011520719) + + W(10, -1114621003, 1028268649, -1149170946, 1018181775) + + W(11, 1033357311, -1107703301, 1012043028, 1016894461); + sum2 = + W(0, 988916677, 1035831303, -1110571087, 1034464782) + W(1, -1106828560, -1178538002, -1137939637, -1140903881) + + W(2, 1038296707, -1090860425, 1044389789, 1041756630) + + W(3, -1123566910, -1103092742, -1107155945, 1056968489) + + W(4, -1079651989, 1029915845, 1028209713, -1131746560) + + W(5, -1117714466, 1064742714, 1050942503, -1106236653) + + W(6, -1123858844, -1101579019, 1038803820, 1040557782) + W(7, 1043224862, -1110415601, 1036944358, 1038814302) + + W(8, -1103151768, -1120375726, -1130430804, 1019097405) + + W(9, -1119538750, -1130948272, 1032643961, -1111036663) + + W(10, 1041889175, -1132733633, -1142606817, 1038676356) + + W(11, -1111989951, -1122089622, 1014857001, -1140337549); + WS(1060301740, -1107262085); + sum1 = W(0, 980901136, 1034782831, -1105836176, 1036556474) + W(1, -1123655719, -1123449511, 1027361860, 1047173818) + + W(2, -1090641081, 1050207945, -1104237805, 1032390687) + + W(3, -1140723427, 1039236098, -1089836727, 1053103024) + + W(4, -1097134658, 1023266998, 1026764890, -1167399456) + + W(5, 1057068694, 1041462691, -1168491986, -1128069487) + + W(6, 1040866884, -1096504911, 1061579759, -1084387656) + + W(7, 1041789973, -1111341496, 1025665684, -1107799148) + + W(8, 1058017849, -1086651203, 1041962562, -1121577073) + + W(9, 1033047081, -1104388354, 1050545524, -1099258584) + + W(10, 1042093191, -1128080260, -1120424813, 1042507977) + + W(11, 1043225643, -1106352190, -1107448273, -1118302731); + sum2 = W(0, -1120672932, 1046006360, -1112452525, -1103760793) + + W(1, -1135937657, 1037670355, 1020434122, -1097339905) + + W(2, 1059308780, 1054394959, 1038148088, -1114765629) + W(3, 1029602171, -1114812458, 1040180724, 1058139034) + + W(4, -1103543044, -1120562291, 986372169, -1107596062) + + W(5, -1073685765, -1075031098, -1098429000, -1131755817) + + W(6, 1039121856, 1036462865, 1068230783, 1055263295) + W(7, 1051067274, 1034635900, -1113202257, 1042494496) + + W(8, 1060720662, 1065727212, -1106499347, 1010504894) + + W(9, -1139183469, -1097212089, -1116372590, -1106882983) + + W(10, -1127997619, -1127537321, 1010867474, -1111658779) + + W(11, -1096232759, -1102585430, 998537328, -1141626877); + WS(-1096709719, -1109932402); + sum1 = W(0, 1015250582, -1143431239, 1041217222, 1009444904) + + W(1, -1127867147, -1115442975, -1109965037, 1015070528) + + W(2, 1044707911, -1099789672, 1028928427, 1033219242) + W(3, 1037158888, -1094706154, 1032459556, 1049449359) + + W(4, 1015515213, -1119289842, -1115453048, 1051852512) + + W(5, -1103734188, -1091866852, 1049692098, -1140261177) + + W(6, 1032345875, -1107272167, -1116721265, 1042112892) + + W(7, -1094357813, 1041543818, -1113173429, 1039101648) + + W(8, -1100636937, 1051337808, 1036490203, -1107580668) + W(9, 1033146133, -1112986991, 980311674, 1011811037) + + W(10, -1118660177, 1027230184, -1109546082, 1034073405) + + W(11, 1017051268, 1035868647, 1019523665, -1121586498); + sum2 = W(0, -1129366500, -1111869912, -1114572049, 1047691494) + + W(1, 1024096486, -1112777200, 1029756683, 1029022998) + W(2, 1041454921, 1051011830, -1094989992, 1033382685) + + W(3, -1113040482, 1046043365, 1063827282, -1075590857) + + W(4, -1094007689, 1046068934, 1041050942, 1028636589) + W(5, -1088878874, 1052419847, 1049374228, 1022134325) + + W(6, 1019865458, -1100561355, -1120053198, 1053136256) + + W(7, 1051405216, -1104418826, -1130192114, -1118786886) + + W(8, 1037601458, -1142379802, -1105102628, 1039472389) + + W(9, 1021094310, -1136660469, -1121128994, -1115896886) + + W(10, 1030093181, -1173694926, 1011878989, -1113939693) + + W(11, 1024511741, 1033422229, 1038213156, -1122209396); + WS(1065853238, 1014077745); + sum1 = + W(0, -1141216588, -1117433015, -1094201799, 1050233869) + W(1, 1044254087, -1138617969, 1026623353, -1129058045) + + W(2, -1096460535, 1049952264, 1008011942, 1022594791) + W(3, -1128763228, 1039902504, -1083943142, 1051933059) + + W(4, 1048823509, -1141149825, 1020370461, 1043759351) + W(5, -1077503527, 1055071872, 1025148605, 1032777398) + + W(6, 1026101893, 1050608829, -1089716396, 1055823041) + W(7, 1040921250, -1117303781, -1118113002, 1042454284) + + W(8, -1098731144, 1057515106, -1120507502, 1030831063) + W(9, 1032019108, 1027449020, -1095714819, 1050055875) + + W(10, -1149099066, -1131034510, -1113706417, 965645460) + + W(11, -1094767340, 1054942629, 1040832262, 1023411184); + sum2 = W(0, 1024492456, -1107369605, -1114418679, -1102696165) + + W(1, -1133142225, -1122518916, -1126703609, 1040389587) + + W(2, 1038677226, -1115181151, -1114014523, 1026327736) + + W(3, 1029353704, -1109233879, 1043636997, -1104069114) + + W(4, -1122395696, 1035228514, 1036516578, -1095300447) + + W(5, 1063429366, 1063594124, -1091978649, 1023044321) + + W(6, 1032071462, -1104963978, -1101740776, -1098164193) + + W(7, 1053626887, -1116601496, -1107363712, -1106122650) + + W(8, -1097459892, 1040620147, -1097146692, -1130578705) + + W(9, 1044306067, -1143085570, 1035238854, -1106357294) + + W(10, 1040976159, 1026795860, -1105180142, 1043865859) + + W(11, -1116890590, -1127904241, -1120794976, -1127926993); + WS(-1088917996, 1063906509); + sum1 = + W(0, -1110920777, 1044506738, 1047786311, -1095080042) + W(1, -1134941681, 1033765698, 1042151912, -1103723194) + + W(2, 1032125650, -1113114611, 1025189266, -1104090263) + + W(3, -1101936374, 1033303784, 1061302810, -1088574755) + W(4, -1115794445, 1041151454, 1005195703, 1049511025) + + W(5, 1034065915, -1080208865, 1048946939, -1113407178) + W(6, 1032265367, -1113286290, 1048810800, 1049193686) + + W(7, -1126346531, -1109067703, -1103459368, 1043019742) + + W(8, 1054491533, -1121283466, -1103486336, 1044875416) + + W(9, 1041691572, -1128222025, -1112859072, 1024774526) + + W(10, 1015955599, -1121559327, -1129700693, -1114181825) + + W(11, 1049358361, -1147499681, -1097425800, 1029016425); + sum2 = + W(0, -1103461552, -1127991186, -1120190757, -1108916090) + W(1, 1012492564, 1029245469, 1050674327, -1116051994) + + W(2, -1113530372, -1124692174, 1042343963, -1115385529) + + W(3, -1104590141, -1131508502, 1055990720, -1121470095) + W(4, 1032226081, 1026603353, 1015549042, 1050173609) + + W(5, 1072931527, 1046026095, 1048814953, 1024686773) + W(6, 1036919049, -1105608002, -1095048932, 1050729023) + + W(7, -1098673970, -1108452621, -1098769683, 1039941075) + + W(8, -1078020039, -1084802111, -1101605152, 1046452703) + + W(9, 1046313441, -1101603735, -1101033642, 1047911325) + + W(10, -1113534935, -1112756717, -1112862464, 1031505597) + + W(11, 1010804308, 1034713295, 1017021378, -1119772659); + WS(-1128039792, 1065254279); + sum1 = W(0, 949814928, -1114341117, -1094482850, 1052398616) + W(1, 1027872723, -1155305313, 1027171091, 1031806764) + + W(2, -1095186291, 1051880795, -1139824782, 1022544667) + + W(3, -1129242384, 1033646238, -1085842430, 1058904806) + + W(4, 1010706555, -1121892394, 1031101353, 1018879791) + + W(5, -1092086861, 1068407459, -1098849364, 1040052675) + + W(6, -1120975327, 1042055949, -1084252337, 1055432619) + + W(7, -1121195299, -1127339326, 1003634843, 1033681236) + W(8, -1089087466, 1036562814, 1023772639, 993927654) + + W(9, -1144006416, 1023909046, -1099056317, 1039311896) + + W(10, -1126801378, -1123991661, -1123053628, 1027677731) + + W(11, -1095496311, 1050962337, 1038931216, 1005742098); + sum2 = + W(0, -1120833032, -1124763415, -1118777010, 1030229073) + W(1, 1028911958, -1123843510, 1034915193, -1113475142) + + W(2, 1010824003, -1105412742, -1133618203, 1030755234) + W(3, -1115307960, 1025322201, 1064953874, 1042572184) + + W(4, 1022077590, -1135426163, 967112657, 1059271888) + W(5, 1082807588, -1098383492, 1048790378, -1113939962) + + W(6, 1023925021, -1102912552, -1080316189, 1054443836) + + W(7, -1103923621, 1038571089, -1107476032, -1118552210) + + W(8, -1064631277, -1098098835, 1029953956, -1109596012) + + W(9, 1033449936, -1103660545, -1110193476, -1121091158) + + W(10, -1106218101, 1035405992, -1132191993, 1033898849) + + W(11, 1039205719, 1028111985, 1036034024, -1126376787); + WS(-1100484014, -1082813103); + sum1 = + W(0, -1123138350, 1037916588, -1092840871, 1048819422) + W(1, 1042234192, -1212876940, -1124738939, -1123209507) + + W(2, -1095165544, 1053647432, -1117914254, 1018206961) + W(3, 991988525, 1049386547, -1086369653, 1054472273) + + W(4, 1039202605, -1120915541, -1115403050, 1038702225) + + W(5, -1082690349, 1063735148, -1121098737, 1024686130) + W(6, 1032429349, 1052139678, -1080932794, 1057734851) + + W(7, -1124146173, 1015928020, -1113715255, -1114440425) + + W(8, -1104541586, 1057909982, -1120991811, -1138122766) + + W(9, 1024280877, 1033284897, -1091044852, 1048946363) + + W(10, -1116174764, 1018911880, 1024090791, -1105213156) + + W(11, -1098590565, 1057027323, -1140316975, 1023234987); + sum2 = + W(0, 1017320792, -1151813795, -1114719136, -1127138406) + W(1, 1024237362, 1038691322, -1115193567, -1117132961) + + W(2, -1104426322, 1035777956, -1122656452, -1114810881) + + W(3, 1033153774, 1023133248, -1094043595, 1044557822) + + W(4, 1043828952, -1134215421, -1109723345, -1096979800) + + W(5, -1067432541, -1081748159, -1105451925, 1025666138) + + W(6, 1038092476, 1051436512, -1094705611, -1094662263) + + W(7, -1118217110, 1023161212, -1111785287, -1136600857) + + W(8, 1084038773, 1059186895, -1118750811, -1141123106) + W(9, -1119671739, 1043953938, 1051561136, 1023890070) + + W(10, 1026839182, 1009024009, -1109901657, 1038440342) + + W(11, -1097692319, 1021470996, -1114305884, -1122422926); + WS(-1090600151, 1059961608); + sum1 = + W(0, 1026132748, 1003856660, -1086859502, 1051724689) + W(1, 1051693682, 1032934656, -1119499740, 1023957057) + + W(2, -1105095800, 1035359281, 1030063117, -1123043606) + W(3, 1009974340, 1043713207, -1089372619, 1059175635) + + W(4, 1040699442, 1034542394, -1113885561, 977342510) + W(5, -1080432315, 1059908410, -1106762114, 1018557983) + + W(6, 1031101992, 1051525550, -1084623068, 1060487798) + W(7, -1114853678, 1019132432, -1111559238, 1025363409) + + W(8, -1086083942, 1059822844, -1116041416, 1031213982) + W(9, 1038517558, 1031878320, -1090836983, 1051599439) + + W(10, -1130086580, -1150140386, -1109274083, -1114083846) + + W(11, -1097114792, 1057278953, 1040974422, -1138998830); + sum2 = + W(0, 992923986, -1099561897, -1105536701, 1079165891) + W(1, -1067147152, -1122782084, -1162949315, -1123994385) + + W(2, -1114799541, 1071631223, -1073660425, -1122543983) + + W(3, 1026898877, -1116860796, 1049791100, 1068780303) + + W(4, -1081838471, -1095462698, -1114052913, 1033030662) + + W(5, 1035377948, 1075632919, -1083671230, -1097530520) + + W(6, -1129262506, 1024241717, -1088177074, -1093777203) + + W(7, 1057542893, -1107750254, 1033577774, 1027801571) + W(8, 1049717575, -1083766160, 1057183721, 1032284006) + + W(9, -1129225578, 1041458479, 1043978030, 1029517847) + W(10, -1103299573, 1025982179, 1036380420, 1050404867) + + W(11, -1091169113, -1083354335, 1061807320, 1016984746); + WS(-1077101366, 1062204989); + sum1 = + W(0, 1030791972, 1034919675, -1119759728, -1099266697) + W(1, 1044609922, -1131275309, 1033145401, -1109323772) + + W(2, 1033209467, 1032826652, -1108657922, -1142019948) + + W(3, 1041963978, -1109601924, 1053457281, -1084468746) + W(4, 1049423472, 1005679697, 974201395, -1098302775) + + W(5, 1054965265, -1116769210, 1034832195, -1125973710) + + W(6, -1122207423, 1049858413, -1085314770, 1055247733) + W(7, -1104833109, 1038354000, 987140211, -1163737350) + + W(8, -1111164798, 1051122507, -1113738245, 1030630601) + + W(9, 1038549201, -1104961646, -1103072597, 1043488164) + + W(10, -1121147841, -1132893044, -1110991006, 1022472166) + + W(11, -1115893275, 1036206931, -1131086146, 1033991117); + sum2 = + W(0, -1128313262, 1013333880, 1041733323, 1023499747) + W(1, -1111312052, 1026149469, -1111995834, 1035550861) + + W(2, 1024909109, -1107108555, 1015145454, -1188980193) + W(3, -1111474816, 1047569009, 1049082846, 1042800306) + + W(4, 1017141780, -1123639612, -1108547758, 1036006103) + + W(5, -1093864056, -1137016284, 1013541080, -1132476264) + + W(6, -1123519406, -1104835579, 1049905793, 1032249988) + W(7, 1034012446, 987971164, -1107039325, -1119199538) + + W(8, 1005924363, -1146750815, -1119513572, -1118671040) + + W(9, -1122872424, -1113807123, -1107099901, 1024429363) + + W(10, 1036805530, -1119021692, 1034342977, -1111926114) + + W(11, -1117079308, 1029021112, 1021769214, 1010774610); + WS(1062330988, -1108933484); + sum1 = W(0, -1130422235, -1127744279, -1103583502, -1112967222) + + W(1, 1030687296, 1027221408, 1014897395, 1022571909) + W(2, -1095189640, 1057599524, 1033146447, -1115561596) + + W(3, 1027643186, -1103670303, -1097916197, 1050533350) + + W(4, 1045948242, 1024802772, -1143835797, -1110385841) + + W(5, -1083565633, 1066390108, -1104991248, 1027456689) + + W(6, 1008148004, 1042533684, -1085814590, 1059916677) + + W(7, -1098839314, -1125614226, -1128962907, -1104774725) + + W(8, -1117452294, 1050496965, -1114792086, 1027191883) + + W(9, 1015454424, -1116892960, -1099231943, 1049304867) + + W(10, -1127702342, -1115468561, -1140840472, -1113171223) + + W(11, -1103031657, 1049771574, 1032045679, 1026854932); + sum2 = + W(0, 1032895988, 1032830726, -1115406541, -1088204952) + W(1, -1092446016, 1031989945, -1117467777, -1111744013) + + W(2, 1060942494, 1054289941, -1123665301, -1104702594) + W(3, 1028545567, 1005059811, 1047355779, 1071613570) + + W(4, 1060958044, 1046230999, -1111764526, 984631841) + W(5, -1080183554, -1077634808, 1046926373, -1100189171) + + W(6, 1041090350, 1049595248, -1134532596, -1093147012) + + W(7, -1113920155, 1034621337, -1115412116, -1107906030) + + W(8, 1035977506, 1042083474, -1097187857, -1132922134) + + W(9, -1125773241, -1128022386, 1020817981, 1037668146) + + W(10, -1142970826, -1117116166, 1023304410, 1041765137) + + W(11, 998580490, 1032116542, -1125184917, 1022449461); + WS(-1109331804, 1042368707); + sum1 = + W(0, -1127022377, 1017844485, 1059385318, -1100795701) + W(1, -1111987554, 1016900750, 1024616740, -1131144083) + + W(2, 1050963589, -1097056965, 1029893237, 1023074109) + + W(3, -1123466393, -1107976561, 1060165431, -1085607547) + + W(4, 1029216560, -1112887422, -1123208698, -1113085828) + W(5, 1059271336, -1083778221, 1039937652, 998408382) + + W(6, -1123129456, -1114039899, 1060839292, -1087428756) + + W(7, 1032072458, -1132276000, 1019270569, -1111910285) + + W(8, 1056907678, -1094957746, -1114977786, 1030127674) + W(9, 981412042, -1118232839, 1049849056, -1097540376) + + W(10, 1024925794, 1027413302, -1140654132, 1022571624) + + W(11, 1053487153, -1103251894, -1099334072, 1016237783); + sum2 = + W(0, 1032849952, 1026055449, -1062565523, -1095655110) + W(1, -1117249787, 1033345650, -1124884727, -1115329827) + + W(2, -1069645041, 1045607629, -1117682124, -1126204236) + + W(3, -1113876395, 1027815845, 1071954840, 1038549538) + W(4, 1041120073, -1111681252, -1129227720, 1037903820) + + W(5, 1082661778, -1099195860, 1049745271, 1027383795) + W(6, -1120368220, 1033732250, 1074466947, 1038448988) + + W(7, -1117133911, 1003632367, 1017198470, -1135699996) + + W(8, 1029601901, -1098438651, 1028175479, -1122462785) + W(9, -1139825528, 999495607, 1010255452, -1108984513) + + W(10, 1021673198, 1028818665, -1146042855, -1129930944) + + W(11, 1032971494, 1036170224, -1119888169, 1008778484); + WS(-1115348316, 1054745777); + sum1 = + W(0, -1137232980, 1036166838, 1049991788, -1091675395) + W(1, 1017816583, 1029575560, 1010567825, 1015838740) + + W(2, 1049110157, -1096298109, 1034100235, 1027659359) + W(3, 1027515738, -1156212270, 1058008076, -1087264777) + + W(4, 1034455816, 1022248439, -1129778165, -1111605669) + W(5, 1064241460, -1084812083, 1015442096, 1022178737) + + W(6, 1031078129, -1105267814, 1060214194, -1090012802) + + W(7, 1038960299, 1027280241, -1163675813, -1129353779) + W(8, 1054590870, -1089909917, 1024747424, 1032358751) + + W(9, 1021335208, -1109102196, 1041116785, -1099321920) + + W(10, -1128935590, -1140454872, 1015357019, 1044530957) + + W(11, 1051902330, -1087098097, 1001621141, 1023518060); + sum2 = + W(0, -1165172934, 1029679520, -1126280148, -1133513625) + W(1, 1007295249, -1112190701, 1024206556, 1020162972) + + W(2, -1122180110, 1032751166, -1123729058, 1031911051) + W(3, -1116697030, 1034059275, 997668227, 1058808820) + + W(4, -1106696521, -1130859572, 1027518769, 1040171419) + + W(5, -1097026624, 1078058683, 1051253388, -1110596693) + W(6, -1114129087, 1027996817, 1034726751, 1074823993) + + W(7, -1129919188, 1030310076, 1038375004, -1110434123) + + W(8, 1040744343, -1082346775, -1130957332, 1020514684) + + W(9, -1112102389, 1034335827, -1102815825, -1066028315) + + W(10, -1106067749, -1120038362, 1033334280, -1129105068) + + W(11, -1107441727, -1081349721, 1046963831, -1140190249); + WS(-1106429870, -1094013011); + sum1 = + W(0, -1180060253, 1032914805, 1051659804, -1083977393) + W(1, 1044666432, -1143253052, 1010730896, -1122572746) + + W(2, 1041447962, -1096070845, 1042077981, -1112889935) + W(3, 1021824818, 1007289698, 1049003466, -1090427610) + + W(4, 1040702780, 1019043703, -1126648308, 1032028260) + W(5, 1055000463, -1110426966, 1039434940, -1121648024) + + W(6, -1129917792, 1028442598, 1028832887, -1154202266) + + W(7, -1143541158, -1121855216, -1130361130, 1044381070) + + W(8, -1108621431, 1047276227, -1126798128, 1025638626) + + W(9, -1120274467, 997655159, -1127588999, -1106909952) + + W(10, 1034262401, -1112957869, -1140710069, 1048974915) + + W(11, -1092428073, 1037115308, -1116914978, 1027653084); + sum2 = W(0, -1113021585, 1044056205, -1096040530, -1099894114) + + W(1, -1101136816, -1129192316, 1027309129, 1042644451) + + W(2, -1098848264, -1100852865, 1051352325, -1127314210) + + W(3, 1040557912, -1130917417, -1086782093, 1055980834) + + W(4, -1094431966, 1001033976, -1105312463, -1097839840) + + W(5, 1066533545, 1060955207, -1105883820, 1028457481) + W(6, 1045635393, -1093830389, 1047866369, 1049480207) + + W(7, -1100511063, -1109915970, -1102513438, -1101732547) + + W(8, 1049511378, -1136131482, -1095199310, 1041484506) + + W(9, 1027399099, -1116545436, 1054444603, -1099150452) + + W(10, 1057783348, -1111537389, -1114789238, 1035602021) + + W(11, 1045074348, -1089010378, -1105061215, 1038428609); + WS(1043891118, 1068855714); + sum1 = W(0, 1034864935, -1116934668, -1092449819, 1054927906) + + W(1, 1034556804, -1137767170, -1115119790, -1132968331) + + W(2, -1094480060, 1036212657, -1105343195, -1110944332) + + W(3, 1021640868, 1038582262, -1087769325, 1059302628) + W(4, 1039390277, 1007166673, -1116826163, 1007221512) + + W(5, -1085866861, 1065404307, -1118670257, 1000130184) + + W(6, -1131811026, 1042862352, -1087019235, 1060871017) + + W(7, -1097654326, -1149095244, 1023733166, 1036137865) + + W(8, -1090795981, 1058788692, -1110180471, 1038210141) + + W(9, -1143217636, -1140157826, -1096174840, 1054585007) + + W(10, -1103987387, -1123846591, -1132040205, -1132089051) + + W(11, -1088777568, 1058320673, 1039274926, 1033742658); + sum2 = + W(0, -1109312558, 1043524956, -1128190925, -1098627702) + W(1, 1046310591, -1109532722, 1016877071, 1022447527) + + W(2, -1112207078, -1106898612, 1038513036, 1007593597) + + W(3, -1119267190, 1041115214, -1098728904, -1125515089) + + W(4, 1034661588, -1120394532, 1012463989, -1113751024) + + W(5, -1096567460, 1069702771, -1085518745, -1104054586) + + W(6, -1126846961, 1044302697, -1103900959, 1067943590) + + W(7, -1080776989, -1120974914, -1130793473, 1017381631) + + W(8, 1042180588, 1068665203, -1076263106, 1042247803) + W(9, 1028514185, -1118188819, -1098746809, 1066385707) + + W(10, -1082060374, 1009589981, 987754923, 1030898373) + + W(11, 1045698067, 1053793920, -1093259035, -1114854661); + WS(-1087649964, 1019782291); + sum1 = + W(0, -1120416323, 1030168161, 1040415828, -1105270513) + W(1, 1024428446, -1138699736, -1143853715, -1112895868) + + W(2, 1040272853, 1031966316, -1115504066, -1138905653) + W(3, 997344379, 1041161399, -1108639376, -1088122706) + + W(4, 1050439898, 986611002, -1117444008, -1109107148) + + W(5, 1066099001, -1107599501, -1101667648, -1107645603) + + W(6, 1021907901, 1047711365, 1016783474, -1089416651) + W(7, 1046888529, 998160367, -1121005123, -1121138472) + + W(8, 1034911086, -1111272734, 1033038819, -1138080961) + W(9, 1024251314, 1000308143, 1025062450, -1107120316) + + W(10, -1142414472, 1016114481, -1131127160, 1016698342) + + W(11, 1039033917, -1102333873, 1024070769, -1128756052); + sum2 = W(0, -1107127477, 1040784536, -1117663793, -1112675945) + + W(1, 1039537683, 1025325994, 1033768126, -1100939724) + W(2, 1017594321, 1057262150, -1092150404, 1019337948) + + W(3, 1023416069, -1105075327, -1107862291, -1072566694) + + W(4, 1066290117, -1099590626, -1125558334, 1031863699) + + W(5, -1098589794, 1078638992, -1077578123, 1051733383) + + W(6, 1043030557, -1103416126, 1051532765, -1086195181) + + W(7, 1053259006, -1119314417, -1118774605, -1104742149) + + W(8, 1047503953, -1109470897, 1050584179, -1100419866) + + W(9, -1151915405, 1042707477, -1106456825, 1041105499) + + W(10, -1103867215, 1038050610, 1000627574, -1121719211) + + W(11, 1014542959, -1106760943, 1033290657, 1002291590); + WS(1061891500, 1029739432); + sum1 = W(0, -1120639769, -1106216192, -1113834067, 1044238166) + + W(1, -1114217922, -1131739160, -1135804195, 1006283635) + + W(2, -1103570763, 991333090, -1139025510, -1118882723) + + W(3, -1111424495, -1105829118, -1096510251, 1059958553) + + W(4, -1105864021, -1144807936, 1029738303, -1090566706) + + W(5, 1051808376, 1070740438, -1094764771, 1034612526) + + W(6, -1117498485, -1116262763, -1092264601, 1055146526) + + W(7, -1102695282, -1134494805, -1118615961, 1034396863) + + W(8, -1100309453, 1034609097, -1115698885, 1017541191) + + W(9, -1125527056, 1019261400, -1103214946, 1035571491) + + W(10, -1137185181, -1114134173, -1118385394, -1120846617) + + W(11, -1103884676, 1043753671, -1118241703, 1003240643); + sum2 = W(0, -1121600381, 1026550907, 1038640235, -1118247849) + + W(1, -1129506938, -1121040584, 1006302792, -1138054916) + + W(2, -1115693397, 1032667651, -1150824496, 1022873234) + + W(3, -1119262688, 1026195583, 1049140618, -1131891756) + + W(4, 1032667233, -1116879102, 1002488024, -1109251816) + + W(5, -1070322023, 1075955836, 1037796079, -1123674716) + W(6, 1029037035, 989985520, 1050244555, -1093462820) + + W(7, 1041966968, 1023067742, 1026568605, -1119850715) + + W(8, -1111337441, 1039467523, -1119306593, -1115823182) + + W(9, -1136033516, 1023268674, 1034795019, -1123631905) + + W(10, 1011205316, 1030765775, 1023468893, -1113487368) + + W(11, 1032357843, -1111383976, -1140765060, -1117038411); + WS(-1084563692, -1094583093); + sum1 = + W(0, 1022703229, 1012279186, 1016172182, -1118740542) + W(1, 1030161881, -1118949361, -1162852794, -1124105959) + + W(2, 1043937588, 1029178707, -1113029519, 1029291849) + W(3, -1132568844, 1040618577, -1096721723, 1047241030) + + W(4, -1110742685, 1002005802, 1021772616, -1104495965) + W(5, 975997096, 1058138797, -1095587800, 1026228226) + + W(6, 1024085850, -1108936808, -1105257082, -1098942649) + + W(7, 1044167852, -1121437018, 996545514, -1134036365) + W(8, -1119752408, 1033384955, 1030217107, -1128298663) + + W(9, 1026223997, -1117471057, 1035927703, -1119041259) + + W(10, 1033396614, -1127843472, -1146052282, -1121618798) + + W(11, 1030206390, -1135741956, -1114495566, 1024441958); + sum2 = W(0, 1025430445, -1154040649, -1116838887, 1050769250) + + W(1, -1101164186, 1025324860, 1022296570, 1041313314) + + W(2, -1089338077, -1110563158, 1049390351, -1110459458) + + W(3, 1036981025, -1095051054, -1080092115, 1047738904) + + W(4, -1105225557, -1123670947, -1113204042, 1045521896) + + W(5, 1068042633, -1105725339, 1058198082, -1102419914) + + W(6, 1015494893, -1140159290, -1123762249, 1046459196) + + W(7, -1097909404, 1036400862, -1113488118, 1036274902) + + W(8, 1036460009, -1099991734, 1042800199, -1121611661) + + W(9, -1127432409, 1033705522, -1115504399, 1035671600) + + W(10, -1112380024, 1029427990, -1122316989, 1014652738) + + W(11, 1034428960, -1109634740, 1019626217, -1122111247); + WS(1070570422, -1118483066); + sum1 = + W(0, 1018165832, -1110069429, -1099959964, 1060331110) + W(1, -1162935118, -1139421786, -1132852684, 1031499417) + + W(2, -1099020957, 1050128623, 1032275410, 1009522871) + W(3, -1139725729, 1040072889, -1084885570, 1059366093) + + W(4, 1019497086, -1140197674, -1109730565, 1042906054) + W(5, -1081528768, 1062372980, 996066926, -1117746023) + + W(6, 1028156415, 1042919301, -1083523477, 1058454089) + + W(7, -1118811556, -1128372287, -1118139972, -1168228894) + + W(8, -1091258848, 1058887102, -1112630512, 1014662756) + W(9, 1024357779, 1039947243, -1095225642, 1048717802) + + W(10, -1117991970, -1119179525, -1122455036, -1103975241) + + W(11, -1094692227, 1057283318, 998336395, 1028666043); + sum2 = + W(0, 1041833519, -1105638218, -1090712556, -1068172350) + W(1, 1053466473, 1031038629, -1108615966, 1028972955) + + W(2, 1051586707, -1062545743, -1089644697, -1109024185) + + W(3, -1125372171, 1029956673, 1040982790, 1058445980) + W(4, 1044082250, -1115541422, -1161988394, 1048735800) + + W(5, -1113315600, 1081890452, 1021891615, -1110157560) + W(6, -1113215633, 1034458457, 1025381303, 1078990875) + + W(7, 1047100697, 1034093689, 1034531971, 1025753183) + W(8, -1116357686, 1057946443, -1138523317, -1121128958) + + W(9, -1118383435, 1036634769, -1096961046, -1129266047) + + W(10, -1118643722, -1164400298, 1028261725, 1000882603) + + W(11, 1047278155, 1020805991, 1003167691, 1023430939); + WS(-1086946092, 1067745931); + sum1 = + W(0, -1129718754, 1043160110, 1057051220, -1086793423) + W(1, 1029510552, 1019782728, 1029399808, -1107060738) + + W(2, 1052187432, -1093519641, 1041235060, 1028876859) + W(3, 1034551451, 1035261538, 1056679450, -1086890615) + + W(4, 1027399596, -1121687640, 1031195606, -1095383669) + + W(5, 1065908691, -1082115172, 1028581668, -1151056709) + + W(6, 1040134022, -1133919947, 1053194351, -1091092163) + W(7, 1039541441, 985161115, 1033121193, -1122710934) + + W(8, 1055695833, -1087806651, 1022886169, 1032179179) + W(9, 1023151712, -1107305618, 1050664148, -1106762967) + + W(10, 1035840335, -1137650955, 1042343314, 1042006276) + + W(11, 1051480109, -1086763775, -1123269599, 1023858088); + sum2 = + W(0, -1092038506, 1049598436, 1043882093, 1042320103) + W(1, -1131832297, -1121740952, -1100127293, 1048639033) + + W(2, 1037661877, -1103044471, 1002123811, 1029198842) + W(3, -1076970891, 1064718435, 1059313512, 1034035103) + + W(4, -1103733932, -1115216799, -1074419095, 1072396257) + + W(5, 1057486689, -1104719409, 1041582287, 1010136401) + W(6, -1072137471, 1071503744, 1051417643, -1111075093) + + W(7, -1115120295, -1128000009, -1071882845, 1074270589) + + W(8, 1058040444, 1044428409, -1139439905, 1024386386) + W(9, -1075499422, 1071966648, 1034550731, -1105998171) + + W(10, 1008560441, -1120184167, -1077760287, 1067893220) + + W(11, -1109701446, 1040427981, -1112073274, 1030441594); + WS(-1081254198, -1103496580); + sum1 = W(0, -1115414444, -1146587219, -1093427101, 1048836291) + + W(1, 1038644661, 1040942563, 1041339642, -1098606506) + + W(2, -1097683981, 1057498447, -1144521152, -1102869832) + + W(3, -1098407452, 1050459360, -1091994231, 1058816906) + + W(4, -1117884339, 1036627839, -1130637434, 1042214179) + + W(5, -1078168847, 1061921355, 1040740783, -1165859686) + + W(6, 1020760807, -1117423418, -1095228700, 1063391143) + + W(7, -1099277734, -1107645994, -1101995566, 1034598934) + + W(8, -1092752622, 1049596874, -1125383814, 1047950513) + + W(9, 1047565230, -1122745995, -1097351785, 1055225025) + + W(10, -1125503451, -1104517417, -1100079212, 1024450296) + + W(11, -1094816186, 1057483350, -1112883619, 1038686359); + sum2 = + W(0, -1101463653, 1057761413, -1092703936, -1109493923) + W(1, 1041760680, 1014720459, 1049429767, 1067192921) + + W(2, -1078098525, -1139460835, 1028945771, -1117756514) + + W(3, -1128244554, 1075421514, -1071536845, 1039603395) + + W(4, -1114414573, 1016362862, -1113723320, 1075836777) + + W(5, -1077415283, -1087182285, 1045554670, -1118637934) + + W(6, -1100253783, -1088665878, 1059548308, 1034610843) + + W(7, -1106673179, -1125369728, -1100739127, 1016894214) + + W(8, 1051692516, -1117875445, 1032645955, 1027339291) + W(9, 1041910404, -1116933337, -1101242161, 1013931579) + + W(10, -1122014712, 1013909643, -1105778187, -1111337535) + + W(11, 1048285068, 1038460759, -1113595007, -1124961818); + WS(-1089093868, 1047195432); + sum1 = W(0, -1139038025, -1108646892, 1042227448, -1116853265) + + W(1, 1032512066, 1015754506, -1146634508, -1102493258) + + W(2, 1050027260, -1113152371, 1037772585, -1111624526) + + W(3, -1107247574, -1102675263, 1055627170, -1102292967) + + W(4, 1040310103, -1122052409, -1111039036, 1027299238) + + W(5, -1094097439, 1059178599, -1120135685, -1123630421) + + W(6, -1111569431, 1043334313, -1086063146, 1064850945) + + W(7, -1097054397, 1035574535, -1131070116, 1035038251) + + W(8, -1091453053, 1055758905, -1107670363, -1126001639) + + W(9, -1123633457, -1130835530, -1104540112, 1047069456) + + W(10, -1109953256, 1015067608, -1125658710, 1006218639) + + W(11, -1100496626, 1028822677, 1033685440, 906381938); + sum2 = W(0, -1103632983, 1051248459, 1050224986, -1112111441) + + W(1, -1114055425, -1111599379, 1043805370, -1102825598) + + W(2, -1106748849, -1107415409, -1113209339, 1037386114) + + W(3, -1100282419, -1110573499, -1142028319, 1047715488) + + W(4, -1106125891, -1107474855, 1043204826, 1027245233) + W(5, 1068200484, 1064575785, 1034756297, 1042201726) + + W(6, -1107284068, -1115422824, -1110893073, -1116519441) + + W(7, -1121811913, -1115818225, 1032596944, -1088944566) + + W(8, -1082511144, -1166081973, 1036646608, 1036836551) + + W(9, -1115324019, -1119815045, -1101546175, 1033478296) + + W(10, -1139772871, -1107371535, 1038389706, -1103855212) + + W(11, -1099926119, -1112155627, 1042197713, 1026011708); + WS(-1121781432, -1075996235); + sum1 = + W(0, -1139185884, -1104344139, 1055178280, 1023675446) + W(1, -1115352921, -1116105386, 1017932812, -1106763371) + + W(2, 1050633065, -1102467779, -1113788396, -1134059780) + + W(3, 1031934942, -1097501556, 1061458986, -1098857564) + W(4, 987917899, -1113912527, 986151814, -1122902443) + + W(5, 1056941803, -1087567683, 1038547421, -1136836585) + + W(6, -1164665422, -1153685131, 1057254916, -1091469811) + + W(7, -1108760371, 1007170123, 1000199096, -1127990679) + + W(8, 1045616484, -1104204108, 1011665321, -1128021832) + W(9, 1007298263, 1001735501, 1030315686, -1129857478) + + W(10, -1121847517, -1154396360, -1140438078, -1113474827) + + W(11, 1046908556, -1103515070, -1118109260, -1121041198); + sum2 = + W(0, -1168649543, -1097479461, 1049098480, 1042706476) + W(1, 1041988609, -1106980555, -1129109975, -1104465921) + + W(2, 1049155382, -1102354715, -1112820525, -1122042062) + + W(3, 1016893178, -1103938775, 1057022497, 1043506300) + + W(4, 1047765539, -1105649317, -1132018102, -1118749694) + + W(5, -1098731951, -1097087336, 1046909100, 1031531696) + + W(6, -1117453381, -1115486085, -1119719509, 1057524747) + + W(7, -1100642605, -1114531186, 1023462612, 976518647) + W(8, -1106384437, 1036026738, -1121071990, 1040948740) + + W(9, -1106957470, 1025546665, -1098317799, 1045328977) + + W(10, -1106523641, -1113359258, -1115823387, 1034975399) + + W(11, -1105263623, 1035074429, 1003192501, 1033488186); + WS(1058868844, 1053114578); + sum1 = + W(0, 1019520880, 1047026923, 1040716289, -1095841007) + W(1, 1020793813, 1026453035, -1122446279, -1120049453) + + W(2, 1048040834, -1093651986, 1039005404, -1117418946) + W(3, 1037061470, 994701546, 1059285152, -1089525095) + + W(4, 1046415125, 1019785277, 1025396381, -1104790763) + W(5, 1063349990, -1081113852, 1040363303, -1114838686) + + W(6, 1029402067, -1104820950, 1062179110, -1086213616) + W(7, 1045703966, 1030676884, 1031841884, 1021084025) + + W(8, 1056787146, -1087503095, 1041196477, 1024749253) + + W(9, -1120186199, -1105237507, 1046529242, -1091401861) + W(10, 999605293, 1004229292, 1033549944, 1009185118) + + W(11, 1057448647, -1088312360, 1036113571, 1022861598); + sum2 = + W(0, 1050427341, -1089070569, 1053219089, 1067490860) + W(1, -1080930470, 1058659628, -1106859908, -1111523090) + + W(2, 1052246278, -1093125710, -1078811541, 1066232343) + W(3, 1035861035, 1043538575, 1059060280, -1078649576) + + W(4, -1074777053, 1073277874, -1112488680, -1097196972) + + W(5, -1088179143, -1087268097, -1075643449, 1065823003) + W(6, 1032409135, 1036654054, 1017346040, 1059384755) + + W(7, -1124572738, 1062269032, -1122621450, -1133854060) + W(8, 1043005089, 1062938540, 1049929252, 1043425453) + + W(9, -1104767713, -1130574088, -1092034159, -1093321870) + + W(10, -1112625720, -1092663388, 1046587865, -1093068011) + + W(11, 1052143300, 1059539225, 1059287102, -1098215774); + WS(-1074642550, 1059001037); + sum1 = + W(0, 1033877849, -1105864948, 1040506928, -1096349630) + W(1, 1046552293, -1110243859, -1114814273, -1133959838) + + W(2, -1108476642, 1023714759, -1108357401, -1127057913) + + W(3, 1017602926, -1106724665, -1098100854, 1055335876) + + W(4, 1032280783, -1147216585, -1122785432, 1042832010) + + W(5, -1084427489, 1065396984, -1132282059, 1033899323) + + W(6, -1119942606, 1046400581, -1083890617, 1062559324) + + W(7, -1113686945, 1032718711, -1121541554, 1036804396) + + W(8, -1088670813, 1061684023, -1104721044, 1025273824) + W(9, 1004629641, 1032924291, -1090515058, 1051195620) + + W(10, -1110306392, -1122497752, -1103073299, 1039804561) + + W(11, -1095458045, 1054342111, 1031943308, -1121848889); + sum2 = + W(0, 1024273115, 1021686194, 1043888107, -1108335658) + W(1, -1077891718, -1110849641, -1131612410, 1025348247) + + W(2, -1101286924, -1122239076, -1091711743, -1111108183) + + W(3, -1107407950, -1105777061, 1048538591, 1046097166) + W(4, -1097181377, 1033944270, 1041256788, 1042238427) + + W(5, -1090332406, 1036931728, 1065426853, -1111979954) + + W(6, -1117121277, -1126779625, 1053208564, 1040874893) + W(7, 1063431633, 1039617071, 1031505312, -1118046720) + + W(8, 1043322093, 1040745205, 1040529954, -1105370761) + + W(9, -1110072407, -1118330488, -1135252559, 1050743072) + + W(10, -1097546746, -1122096807, 1016793154, 1027338756) + + W(11, -1116082242, -1096125238, 1050690616, -1122718137); + WS(-1088674604, -1128666815); + sum1 = + W(0, -1123914807, -1112902532, 1031043843, 1051063687) + W(1, -1094561666, 1018478931, 1016031116, -1154837176) + + W(2, 1033184452, -1131397783, 1025159063, -1134237698) + W(3, 1014718188, -1114821559, 1015481863, 1027540319) + + W(4, -1102063788, 1024172061, 1024027612, -1098901213) + + W(5, 1054517603, -1102428675, -1104802482, -1126359028) + + W(6, -1145356432, -1103324234, 1051928805, -1094753613) + + W(7, 1021380397, -1133861065, 1024193015, -1099251759) + + W(8, 1059978813, -1100081596, -1111732847, -1114031158) + + W(9, -1124389866, -1119376425, 1047372601, -1110958225) + + W(10, -1154253637, 1028470484, 1032770888, -1101826270) + + W(11, 1062901239, -1101878062, -1113334654, -1114369951); + sum2 = + W(0, -1125911137, -1108931889, 1017628143, 1050970701) + W(1, 1035307535, -1132880011, 1012595671, -1114639139) + + W(2, 1032292317, -1128469007, -1112594616, 1026392505) + W(3, 999266341, 1007405169, -1114156858, -1110785518) + + W(4, 1028698628, -1126668834, 1044288444, -1110292957) + + W(5, -1067836662, -1074929312, -1112320536, 1035975658) + + W(6, -1101165077, 1050109344, -1072172899, -1105301985) + + W(7, 1032862316, -1115294669, 1041147790, 1041510968) + W(8, 1037860217, 1044362436, -1133865234, -1122721591) + + W(9, -1113177198, 1029583838, 1074693740, 1048694785) + W(10, -1121233078, 1027549527, 1025832748, 1046353900) + + W(11, 1081403750, 1057358913, 1015908463, -1114367420); + WS(-1092184279, -1114774309); + sum1 = + W(0, -1112548452, 1035183681, -1114927434, 1044352162) + W(1, 979364527, -1115173597, -1120800919, 1038305887) + + W(2, -1101125062, 1039166894, -1131609361, -1182492945) + + W(3, -1114639366, 1043708422, -1087724142, 1063427286) + W(4, -1093459700, 1021495134, 1008341430, 1037535198) + + W(5, -1103312811, 1034792975, 1048665209, -1113251558) + + W(6, 1009522319, -1098020135, 1062809460, -1088121787) + + W(7, 1043037714, -1113757330, -1120506536, 1029108045) + + W(8, -1118967585, -1115071968, 1023205859, -1113409875) + + W(9, 1034801619, -1128765878, 1002077113, -1114196826) + + W(10, 1033949151, -1123554950, 1022706306, -1106461058) + + W(11, 1043448960, -1129024277, -1126884683, -1115469664); + sum2 = + W(0, -1130731300, -1113218164, 1024876637, -1112264614) + W(1, -1099018413, 1026854381, -1146462793, 1028824397) + + W(2, -1129593633, -1113338336, -1111675537, -1102804304) + + W(3, -1114036842, 1013989596, 1044891209, 1058622224) + W(4, -1088855347, -1102462733, 1017173194, 1034818358) + + W(5, -1101530532, -1105838439, 1059579881, -1108694945) + + W(6, 1027612469, -1104368005, 1049606516, 1050794194) + W(7, 1042736448, -1114432119, -1121343755, 1040334859) + + W(8, -1097048862, -1102338893, 1043492825, 1019755962) + W(9, 1027895615, -1123082454, 1042764886, 1040822444) + + W(10, -1121501222, -1122392540, -1154268081, -1123775916) + + W(11, -1113704648, -1122592353, 1034905054, -1119817837); + WS(1061153836, -1122742154); + sum1 = W(0, 1027853554, -1109089440, -1102650256, 1051082942) + + W(1, 1027843188, -1131011401, -1115291137, 1043116226) + + W(2, -1115210274, 1047414994, -1112215273, -1119605372) + + W(3, -1171221910, -1138258466, -1112757431, 1055597733) + + W(4, 1034632359, -1126942769, -1112977164, 1050791197) + + W(5, -1077890273, 1055540065, 1042514461, -1126698730) + + W(6, -1121554637, 1034471002, -1082648980, 1061271560) + W(7, -1131618020, 998349933, 1022714404, 1041098678) + + W(8, -1093251577, 1053866013, 1046515591, 1025132479) + + W(9, -1157881362, -1134239506, -1097764996, 1044732088) + + W(10, 1036747322, -1120958927, -1123187178, 1034237593) + + W(11, -1092621429, 1032996986, 1043087924, 984532249); + sum2 = + W(0, 1031101174, -1102026314, 1042272104, -1126868316) + W(1, 1028240358, 1023923290, -1132731961, 1038074007) + + W(2, -1090146581, 1042331882, 1036936531, -1128194324) + W(3, 1022986308, -1092900468, 1028046198, 1039425711) + + W(4, -1122656394, -1114483483, 1017601076, 1030590838) + W(5, 1054179894, 1068886648, 1039233571, -1108833787) + + W(6, -1126674740, 1001758673, -1096188072, 1049555295) + W(7, -1086618714, 1026905222, 1022398052, 1001713969) + + W(8, 1035923987, -1096410864, -1090080092, 1029729918) + + W(9, 1013813081, -1140283033, -1106500726, 1036573795) + + W(10, -1110072380, 1035325123, 1010235161, -1138091097) + + W(11, 1036670019, -1105222854, -1134061337, 1035525391); + WS(-1098883799, 1037141561); + sum1 = W(0, -1114839991, 1039141586, -1110021469, 1020247259) + W(1, 1030453625, 1040211480, 1029657918, 1034970850) + + W(2, -1090767256, 1029681923, -1119339247, -1126798193) + + W(3, -1116594483, 1048999812, -1089859629, 1058872566) + + W(4, -1104869706, 1041448405, 1034543274, 1026227619) + + W(5, 1007604554, -1100094273, -1106749124, -1123771570) + + W(6, 1026816074, -1103366664, 1063309440, -1090400131) + + W(7, 1040832320, 1006844457, -1140770618, -1115755155) + + W(8, 1050338841, -1090199858, 1023759014, -1131945671) + + W(9, 1035386535, -1109617409, 1049028980, -1104209655) + + W(10, 1040157646, -1115455274, 1007197468, -1117505278) + + W(11, 1050252341, -1101914812, 1034144506, -1106217213); + sum2 = W(0, 1003211606, -1120528483, -1156174412, -1122088448) + + W(1, 1034591096, 1008615371, -1137674963, -1120827669) + + W(2, -1165578649, 1021421814, 1046263423, -1109822394) + + W(3, -1151639180, -1113012012, 1045965141, -1108742643) + + W(4, 1043768035, -1107512724, -1126381316, -1125167698) + + W(5, -1143882022, 1025314463, 1048434617, -1102660599) + + W(6, -1125497686, -1133545211, -1157195276, 1048741047) + + W(7, -1100558843, -1103528899, -1120928387, 1042115577) + + W(8, -1139515595, 1042307479, -1111490489, -1116374767) + + W(9, 1024650751, -1111755343, 1024020315, 1028526161) + + W(10, -1106766859, 1031989982, -1121709003, 1034768106) + + W(11, -1128894336, 1033940928, -1105393784, -1146160726); + WS(1056068055, 1054753321); + sum1 = W(0, -1151415881, 1039080395, 1057552055, -1089028090) + + W(1, -1115248008, 1035025164, -1148496799, -1110003853) + + W(2, 1043835198, 1042243518, 1050869997, 1014222010) + W(3, 1010657956, 1010453293, 1059020393, -1089501818) + + W(4, 1038857744, 1019596694, 1024019466, 1036235820) + W(5, 1056043521, -1077848098, 1040563886, -1121658024) + + W(6, -1130137728, 1020552269, 1059247988, -1085601017) + + W(7, 1031869090, -1123056704, 1008503186, 1036573197) + W(8, 1054798584, -1088101382, 1014726271, 1035081577) + + W(9, -1132133255, -1115513116, 1050516586, -1102383812) + + W(10, 1037313809, -1137744617, 1035499631, 1040242409) + + W(11, 1047345767, -1092379525, -1116861746, 1008326318); + sum2 = + W(0, 1028355794, -1096807405, 1040605863, 1067941768) + W(1, -1077104108, 1052817152, -1106782132, 1053669968) + + W(2, -1138391825, -1075220438, -1095467240, -1103015841) + + W(3, 1035193429, -1113415499, 1054131819, -1088459045) + W(4, -1094157882, 1049259157, 1019490884, 1045145961) + + W(5, -1117197986, 1059082808, 1063065831, -1103440804) + W(6, -1103311494, 1033457423, 1014178849, 1050105229) + + W(7, 1061761756, -1095094287, 1051099266, -1105194567) + + W(8, 1043284247, -1121773074, 1042603721, -1122898552) + + W(9, -1107701072, -1111393110, 1045435457, -1120483817) + + W(10, -1116379229, -1109769641, 1017055868, 995616995) + + W(11, -1110630971, 1023829774, 1044996767, 1018464332); + WS(-1081368566, -1084412265); + sum1 = + W(0, 1027068306, -1116002144, 1048181720, -1097381330) + W(1, -1110979184, 1021378954, 1015688387, -1139602460) + + W(2, 1048418201, -1109028275, 1018111780, 1036715310) + W(3, 1033201483, -1116483849, 1059204754, -1086309497) + + W(4, -1126061147, -1118145977, 1018141039, -1108067395) + + W(5, 1059054739, -1080843359, 1023153354, 1024269909) + W(6, 1032237687, -1114781410, 1060863016, 1035809139) + + W(7, 1043786804, 1014428471, 1022250212, -1101031910) + W(8, 1054995541, -1090938969, -1102361409, 1015803518) + + W(9, 1019999841, -1110135922, 1048891912, -1097995920) + + W(10, 1017536656, 1027639288, 1019305877, -1115253585) + + W(11, 1054003721, -1098655035, -1110370680, 1026153225); + sum2 = + W(0, -1105480981, 1050460434, -1098849709, 1048076097) + W(1, 1035320005, -1105501338, 1035973965, -1096496755) + + W(2, 1051612828, -1090117310, 1047120319, -1108725217) + W(3, 1037165381, 1049140757, -1090898849, 1053634690) + + W(4, 1021130333, 1039969785, -1106311589, 1006710678) + W(5, 1040931379, 1066845841, -1083953982, 1046728781) + + W(6, 1052000177, -1091756558, 1058463605, -1077185739) + W(7, 1057459457, 1038345201, -1104974154, 1044207757) + + W(8, -1090778289, 1058534342, -1091684787, -1141278071) + + W(9, -1120302417, 1032159893, 1039812947, -1102438650) + + W(10, 1041032826, 1040363142, 1040627526, -1100748342) + + W(11, 1045155050, -1096398368, 1045504944, -1109297232); + WS(1040286894, 1035975353); + sum1 = W(0, -1171293815, 1034319831, 1059664710, -1091927835) + + W(1, -1116625676, 1013574450, 1023182418, -1127987806) + + W(2, 1048633701, -1097464070, 1029070026, 1012611215) + W(3, 996039142, -1102399280, 1062256902, -1087363556) + + W(4, 1016696459, -1115123712, -1137235964, -1105596260) + + W(5, 1059153168, -1086480503, 1000027879, 1006568169) + + W(6, -1135192533, -1103790599, 1061115073, -1089057879) + + W(7, 1028964506, -1134032598, 1025821242, -1108335719) + + W(8, 1056014846, -1099300213, -1108586113, 1032814547) + + W(9, -1128818097, -1114500598, 1050028641, -1097571161) + + W(10, 1032749254, 1020632239, 1020463141, -1129600787) + + W(11, 1052737098, -1112421262, -1096430155, 1016193698); + sum2 = W(0, -1115556642, 1037372428, 1085969060, 1051814466) + + W(1, 1011471282, -1114698318, -1130243023, 1030557532) + W(2, 1077581449, 1043810263, 1030366783, 1034280644) + + W(3, 1032034792, 1049080310, -1074326256, -1103611514) + + W(4, -1109496698, -1140286158, -1126476135, -1112452010) + + W(5, -1064530882, -1081646826, -1129814679, 1022645135) + + W(6, 1032996496, -1108874582, -1074476109, 1033029539) + + W(7, 1035377868, -1126128243, -1138592966, -1125709403) + + W(8, -1104386166, 993676602, 1007963430, 1013431542) + W(9, -1121911846, -1147299853, 1029772508, 1034388196) + + W(10, 1005762565, -1114714998, 1029413614, -1125845903) + + W(11, -1109306490, 1000946837, 1024214725, 1024966722); + WS(-1128144240, -1105727419); + sum1 = W(0, 1008177348, 1038442911, 1034971295, 1037111480) + W(1, -1119737674, 1019898055, 1008158465, 1030921926) + + W(2, -1108323937, 1020025483, 1015575522, 1036155997) + W(3, 1025380533, -1116236765, 1041262911, 1039711206) + + W(4, -1096558626, 1023258159, 1041196118, -1091904121) + + W(5, -1116818069, -1090328900, -1106145913, 1038413850) + + W(6, -1123971020, -1110541401, -1118195565, 1053066642) + + W(7, -1107602136, -1123851725, 1038005880, -1155490648) + + W(8, 1050488689, -1108534613, 1033678004, 1027658998) + + W(9, -1110790464, -1118346788, 1005836543, 1032777199) + + W(10, 1034582855, -1118860082, 1022532691, -1132141153) + + W(11, 1035288564, 1043217853, -1110557919, -1161170171); + sum2 = + W(0, -1128930637, -1132037027, 1018701611, -1110976033) + W(1, -1129623857, 1029665439, 1013105493, -1110061572) + + W(2, 1028677045, -1113315034, -1136221161, -1120215489) + + W(3, 1027020519, 1023565141, -1097696705, -1136487341) + + W(4, 1045236380, 1025952555, -1111106512, -1118411413) + + W(5, 1051172043, 1064921249, -1100340880, -1120959951) + + W(6, 1033707008, -1104848450, 1036460110, -1097426277) + W(7, 1026225447, 1006240491, -1108465338, 1001195043) + + W(8, -1134254189, 1028959819, 1033145987, -1118442072) + + W(9, -1130609759, -1113096913, 1035127174, -1113952185) + + W(10, -1105387359, -1137489821, -1112986298, -1123693871) + + W(11, 1021784635, -1139702593, 1025786651, -1119362634); + WS(1066786198, 1018352061); + sum1 = + W(0, 1037807432, 1042991174, 1056116676, -1089303485) + W(1, -1115182496, 1044944769, -1147124480, -1118935590) + + W(2, 1053008404, -1093203077, 1032850856, 1019595829) + W(3, 1032919115, -1102401423, 1058826232, -1091158043) + + W(4, -1118679276, 1030098999, 1043153405, -1111703874) + W(5, 1052869895, -1077398372, -1112497438, 990101824) + + W(6, 1024885356, -1105747115, 1063596395, -1088790919) + W(7, 1041854800, 1037575334, 1040525108, -1109427003) + + W(8, 1052412861, -1096537905, -1158252416, 1022825908) + W(9, 1034093744, 1036876386, 1055303296, -1092801624) + + W(10, 1029739551, 1042956152, 1041644314, -1113217832) + + W(11, 1055747689, -1095292522, -1097656684, 1027108728); + sum2 = W(0, 1036031230, -1102747382, 1040389106, 1027629590) + + W(1, 1033840740, -1113736580, -1120880520, 1035081854) + W(2, 999021742, 1048871361, 1037408166, 1036561850) + + W(3, -1118383086, -1081312733, 1046511529, -1102228261) + + W(4, -1111365044, -1105995296, -1112120068, -1097589576) + + W(5, 1061513306, 1059108854, 1017984327, 1037901466) + W(6, 1044452135, -1136989239, -1098672489, 1030279668) + + W(7, 1032578550, 1024281094, -1113902667, -1101386787) + + W(8, 1028645652, -1090327880, -1098256610, 1003868670) + W(9, 1036248196, 1038098664, 1037598188, 1046091275) + + W(10, 1055272900, 1035521712, -1175016923, -1106083613) + + W(11, 1027306194, -1110606753, -1100293962, -1114802202); + WS(-1088826540, -1072617048); + sum1 = + W(0, 986338164, 1037920241, 1051799989, -1096657083) + W(1, -1131268440, -1122826702, 1023136598, -1108211448) + + W(2, 1051897846, -1097067830, -1134063855, 1022858827) + + W(3, -1127817408, -1117329516, 1060040519, -1085267217) + + W(4, 1034398076, 1016624516, 1038855104, -1098615294) + W(5, 1068445460, -1091986338, -1104903024, 1025833567) + + W(6, -1112014844, -1156351446, 1041659186, -1085224562) + + W(7, 1041222927, -1148095931, 1032100206, -1125030464) + + W(8, 1049473503, -1095210946, -1123500579, 1018816617) + + W(9, -1114071293, 1025960366, 1046320643, -1103740786) + + W(10, 1021522163, 1004333185, 1031252724, -1128205111) + + W(11, 1053193450, -1098483636, -1110505437, -1135786218); + sum2 = + W(0, 1025188267, -1139275200, 1040515214, -1131755519) + W(1, -1105181237, 1030566925, -1121532154, 1033600964) + + W(2, -1111129732, -1105820182, 1041295374, -1137053008) + + W(3, 1037371296, -1123953755, 1035395064, 1067277095) + + W(4, -1104867399, -1131759093, -1111771837, 1046721727) + + W(5, -1119362505, 1075696256, 1058324165, 1026363312) + W(6, 1040037716, -1098628025, 1046868281, -1070220409) + + W(7, -1095490474, -1118056117, -1106548534, 1040338433) + + W(8, -1095083293, -1079865944, 1030948623, 1018203389) + W(9, 1039102739, -1109104289, 1045077862, 1049591033) + + W(10, -1107729189, -1128931569, -1109851481, 1042987802) + + W(11, -1121821072, -1113640498, 1039251361, -1123620784); + WS(-1126600048, -1081356549); + sum1 = W(0, -1127841038, -1133929050, -1098403026, 1044657313) + + W(1, -1112364357, -1126110067, -1121393942, 1032965854) + + W(2, -1092154099, 1056638003, 1040562850, 1036225512) + + W(3, -1127713943, 1033968271, -1092503722, 1049224359) + + W(4, 1040769245, -1129561307, -1112866678, 1043820719) + + W(5, -1081570623, 1066854747, -1105175211, 1047740947) + + W(6, -1130256197, 1042942538, -1086843241, 1055316562) + + W(7, -1123699911, -1102743497, -1113126004, 1043379215) + + W(8, -1092479684, 1058087872, -1108064855, 1041161205) + + W(9, -1135790871, 1027795642, -1097678271, 1032418298) + + W(10, -1138101411, -1106598351, -1114110304, 1034600139) + + W(11, -1094287464, 1051778533, 999504676, 1039116783); + sum2 = + W(0, 1002695390, 1041236493, -1107205869, -1110230321) + W(1, 1025661924, 1013006047, -1122258341, 1024135109) + + W(2, -1167041196, -1120190201, 1051920007, -1101224590) + + W(3, -1122292123, 1018577975, 1048984334, -1063745707) + W(4, 1083223141, 1033459361, -1120933435, 1034742596) + + W(5, -1085659967, -1070580481, 1079255156, 1037086181) + W(6, 1032169765, -1148289286, 1041640777, 1042707591) + + W(7, -1110361303, -1105447820, -1117894001, -1155671947) + + W(8, -1108030745, 1049556353, -1097500712, 1029970779) + + W(9, 1003179774, 1027950533, -1123201389, -1106095214) + + W(10, 1030518681, 1006793819, -1138289315, -1116290259) + + W(11, -1139220243, 1050855401, -1105964729, -1115669085); + WS(1044741550, -1111040261); + sum1 = + W(0, -1123328853, -1129171790, -1119860449, 1045626213) + W(1, -1104602741, 1022127887, -1102049737, 1051367713) + + W(2, -1091471693, 1046305018, 1044417333, -1107859535) + W(3, 1040528310, -1097130129, 1035652177, 1049556145) + + W(4, -1091825946, 1033350174, -1104112191, 1051950321) + + W(5, -1092522751, 1053940037, 1049099055, -1124872112) + W(6, 1024305776, 1039437043, -1095677118, 1055561708) + + W(7, -1104689389, -1115808049, -1101315365, 1045396986) + + W(8, -1130673884, -1095468986, 1055485165, -1115536169) + + W(9, 1021709031, -1108128612, -1112716514, 1044018368) + + W(10, -1103796617, 1029190802, -1111623399, -1165073551) + + W(11, -1121220876, 1026385047, 1036376309, -1122289340); + sum2 = + W(0, -1104033148, 1039328248, 1040757730, 1023457753) + W(1, -1105261590, 1019934334, -1105436527, 1038128967) + + W(2, 1010939044, -1118329847, 1011822306, 998732076) + W(3, -1109964889, 1010051700, 1051778220, -1098103909) + + W(4, 1019752394, -1151355224, -1097126651, 1057153562) + + W(5, 1035481326, -1094975289, 1037637839, -1112777586) + + W(6, -1098141643, 1057745438, -1094952202, 1048659070) + W(7, 1023965510, 1012075272, -1090250993, 1060383381) + + W(8, -1102222701, -1098057265, 1049957743, -1134551782) + + W(9, -1116302593, 1035812245, -1133841678, -1123621243) + + W(10, -1104798795, 1031321862, -1118233874, 1044677482) + + W(11, -1105877607, -1160321672, 1036758948, -1124061226); + WS(1065794902, 1053657215); + sum1 = + W(0, -1121908120, 1038333639, 1027457764, -1123855831) + W(1, -1123864939, 1004753494, 1018314063, 1031009517) + + W(2, -1106391546, 1034446294, -1121443224, 1015085764) + W(3, 1015310777, 1039118671, -1097318831, 1039330595) + + W(4, 1026117491, -1152877546, -1124873339, 1034311430) + + W(5, -1141167585, -1090069831, 1040632671, -1119728987) + + W(6, 1031572020, 1041290230, 1054548019, -1096859758) + + W(7, 1028976296, -1155937596, -1147984576, -1120195837) + + W(8, 1053489270, -1090834490, 1043686014, -1132243578) + + W(9, 1024640407, -1119856235, 1041008530, -1105884449) + W(10, 1007855863, 999100652, 1026486289, -1114392412) + + W(11, 1038700424, -1105777031, 1023881277, -1148206328); + sum2 = + W(0, -1111593455, 1035072504, 1043803228, 1033408259) + W(1, -1104275307, 998670368, 1026283313, 1028902500) + + W(2, -1108682745, -1102620143, 1005020832, 1032181798) + W(3, -1106910582, 1047373071, 1040261276, 1048908013) + + W(4, -1100960524, 1023771046, -1123416451, 1053329938) + W(5, 1067600226, -1090850305, 1050480903, 1032569956) + + W(6, 1050577288, -1092564654, -1072392505, 1062297118) + + W(7, -1138210428, -1112013629, -1128641646, -1100521328) + + W(8, -1113219589, 1017836657, -1168415746, 1023611526) + W(9, 1047088729, -1096962366, 1049469167, 1033530157) + + W(10, 1025688819, -1112880371, -1113858977, 1017970524) + + W(11, 1037790473, -1125952722, -1131011166, 1021270423); + WS(1062391596, 1012575503); + sum1 = + W(0, 1031343933, -1118754907, -1124966418, -1110681406) + W(1, 1044511208, -1105353116, -1104891955, 1040079801) + + W(2, 1034244551, -1097175282, 1039695694, 1041572194) + W(3, 1032025473, -1098309187, -1115351741, 1051954479) + + W(4, 1024690888, -1106768761, 1018981700, 1047266492) + W(5, -1082667844, 1046130725, 1049723012, -1111177056) + + W(6, -1107154694, 1041578200, 1034155267, 1051147223) + W(7, -1103478091, 1041859398, 1037332142, -1102249736) + + W(8, -1116669206, 1052085167, 1031631968, -1100878141) + + W(9, -1113508494, 1040475388, -1108675062, 1003822408) + + W(10, 1001356052, 1037747241, -1120166325, -1099931343) + + W(11, 1037232762, 1045069376, -1120670778, -1115735812); + sum2 = W(0, 1033523600, 984378304, 1029410276, 1013286288) + W(1, -1149511552, -1114072340, -1111554425, 1035520350) + + W(2, 1027721316, -1092144316, 1040549613, 1027894332) + + W(3, -1129275556, -1139357832, -1120989222, 1051778774) + + W(4, -1101613332, -1138380800, -1122087690, 1042336361) + + W(5, 1063527919, 1062226267, 1044728263, -1113445964) + + W(6, -1124036306, -1098127108, -1086537602, -1106529787) + + W(7, -1102568312, 1042436209, 1037981702, -1108770624) + + W(8, -1087308230, 1023869712, 1043627655, -1103441850) + + W(9, -1119955572, 1031985496, 1038170956, -1106223749) + + W(10, -1109791564, 1038090706, -1122349214, 1030270440) + + W(11, 1040810725, -1113965912, 1025118636, -1120264936); + WS(1061969900, 1065420380); + sum1 = W(0, 1002358108, 1041410250, 1005238718, -1098164717) + + W(1, -1127598604, 1013481849, -1132400117, 1024825568) + + W(2, 1055004288, -1093641048, 1037205414, -1128588267) + + W(3, 1036273784, -1111416381, 1058195604, -1089452644) + + W(4, -1122200151, 995531041, -1121891281, -1112279056) + + W(5, 1065721300, -1081575699, 1041630292, -1118185583) + + W(6, 1031471239, -1100377015, 1058645356, -1095897092) + + W(7, -1115380038, 1031870564, 1014037550, -1110955448) + + W(8, 1050103969, -1095327860, 1033148051, -1125213521) + + W(9, 1015146068, -1140842745, 1047622119, -1122283864) + + W(10, -1112032609, 1031659065, 1021231496, -1109508230) + + W(11, 1050895308, -1101391147, -1107652840, -1167265945); + sum2 = W(0, 1037378832, -1097408481, -1089422359, -1134521568) + + W(1, 1024014288, -1156999910, -1105869799, 1040337956) + + W(2, 1064628576, 1048269110, -1113808990, -1121898726) + W(3, 1034348880, 1050274911, 1078490638, 1055313899) + + W(4, 1025241947, 1025759300, 1010791317, -1124897054) + + W(5, -1067190452, -1081189141, -1100728789, 1010618498) + + W(6, 1020451237, -1105197585, 1026583828, 1051135592) + W(7, 1047164907, 981635389, 1025476671, -1107003535) + + W(8, 1034175619, 1043712009, -1112306567, -1142752483) + + W(9, -1121382126, -1131733180, 1030289482, -1114815155) + + W(10, 1025919494, -1158764013, -1144944739, -1128932040) + + W(11, 1035014695, -1125325948, -1116208218, 1029088384); + WS(1048733783, 1027643848); + sum1 = + W(0, -1133845785, 1036515120, -1088559253, 1057953132) + W(1, 1034283684, -1114935677, -1133175894, 1023561851) + + W(2, -1093510507, 1049915371, -1130587597, 1012777537) + + W(3, -1135183403, 1042717327, -1088396394, 1060634166) + + W(4, -1127730531, -1122593837, -1116789548, 1030082171) + + W(5, -1080791005, 1063183646, -1114509469, 1031486794) + + W(6, -1148511502, 1048693393, -1089662985, 1058925305) + + W(7, -1138682841, -1116148770, -1134260761, 1034425221) + + W(8, -1090323862, 1044704560, 1038705162, -1166921432) + W(9, 1018609610, 1039337356, -1100061261, 1042697693) + + W(10, 1015444552, -1126426832, -1123833247, 1027869798) + + W(11, -1089888600, 1047384498, 1049640472, 1023971661); + sum2 = + W(0, 994989105, 1037891939, -1173716290, -1100065758) + W(1, -1120127409, 1036146011, -1144072856, -1103317800) + + W(2, 1052666546, -1122814128, -1094230404, 1054652553) + + W(3, -1122509885, 1037436931, 1001371192, -1129250144) + + W(4, -1083084807, 1061875701, 1026939205, -1127785128) + W(5, 1040815920, 1058553182, -1075758811, 1067689129) + + W(6, 1028398855, -1103788534, -1100616833, 1055924851) + + W(7, -1074914963, 1068744910, -1120878413, 1034163445) + W(8, 1020001090, 1053725018, -1075891465, 1067320464) + + W(9, 1025208177, -1127272012, -1134790452, 1036740791) + + W(10, -1081737398, 1065190583, -1113678678, 1026044831) + + W(11, 1043708121, -1123998093, -1084193900, 1062426867); + WS(-1084457324, 1066101859); + sum1 = W(0, -1146837948, -1111022815, -1115296098, 1042035070) + + W(1, 1030912362, -1121095658, 1016102797, -1125864729) + + W(2, 1038645029, 1047347075, -1106508822, -1128702979) + + W(3, -1115430590, 1047559389, -1083544605, 1059438663) + + W(4, -1104927285, 1022709564, -1115904029, 1034715654) + + W(5, 1037951214, 1052960194, -1167432352, -1129984885) + + W(6, -1139281484, -1103064096, 1057869073, -1083491782) + + W(7, 1045723406, -1123913107, -1117556663, -1108958078) + + W(8, -1109939240, 1046468604, 1012598283, -1123056725) + + W(9, 1034492603, -1131712724, -1123954086, -1121536740) + + W(10, 1019381994, 1026960983, -1124011056, -1131623214) + + W(11, -1123535023, 1043250206, -1110102674, -1128824061); + sum2 = + W(0, -1125705477, -1127395739, -1105227463, 1036906007) + W(1, 1026846896, 989272222, -1127756609, 1044805550) + + W(2, 1015549291, -1109261657, 1037043129, -1106679057) + W(3, 993770127, -1097909373, 1049252886, -1102191244) + + W(4, -1097396928, 1042524140, 1034569455, -1177736375) + W(5, 1058826852, 1061852121, -1116961215, 1013800282) + + W(6, 1010458600, -1116962018, -1119951878, -1086841826) + + W(7, -1103314123, 1033157194, -1119844207, 1043096893) + + W(8, -1094678765, -1128420796, 1038664018, -1128652243) + + W(9, -1122090273, 1035619005, -1119199398, 1021651597) + + W(10, 1035622840, -1124905499, 1019369801, -1112228363) + + W(11, -1109645851, 1038741216, 1019677755, -1126258736); + WS(1067036470, -1087655195); + sum1 = + W(0, -1143851962, 1018554747, 1050955184, -1110423387) + W(1, -1106360251, 1025799530, 1026890311, -1120379564) + + W(2, 1053786273, -1098337218, 1040602294, -1118031105) + + W(3, -1106604063, -1114993347, 1042552729, -1090196611) + + W(4, -1134551499, -1119796702, 1016195769, 1022258327) + + W(5, 1063572966, -1083965137, 1045568787, -1110210301) + W(6, -1138731339, 981655329, 1058367344, -1084059678) + + W(7, 1046272662, -1132188582, 1037080924, 1045778247) + W(8, 1055564522, -1090231591, 1028646847, -1117810092) + + W(9, -1121217270, 1025121994, 1047575674, -1096786628) + W(10, 1026932869, 1021012754, 1029907824, 1023696087) + + W(11, 1048845230, -1095677575, 1027665749, -1118338222); + sum2 = + W(0, -1113854255, -1108417433, 1042382895, 1025435556) + W(1, -1108154666, 1042045079, 1028862084, 1033605964) + + W(2, 1032534216, 1033584957, 1046901615, -1105415159) + W(3, -1104900936, -1109151867, 1017793091, 1033281558) + + W(4, -1111336587, 1039313065, -1104116954, -1102487521) + + W(5, 1067799294, -1099286824, 1036137805, 1005958605) + W(6, -1114719178, -1071728118, 1077900204, 1056033422) + + W(7, 1024983956, 1020191839, 1040474278, -1073652757) + W(8, 1051278318, -1113833767, 1034492621, -1114752489) + + W(9, 1048817634, -1090221130, 1048299521, 1011833167) + + W(10, 1033542750, 1024757388, -1122519186, -1102125815) + + W(11, -1105402373, -1107682623, -1127962013, 1032711916); + WS(-1087701164, 1071308133); + sum1 = + W(0, 1040776942, -1100025248, 1040244657, 1047131214) + W(1, -1116520012, -1152028354, -1112915908, 1039729431) + + W(2, -1108101926, -1104971284, -1123736282, -1120800451) + + W(3, 1032135583, -1111499300, -1107965963, 1036283892) + + W(4, -1105297773, 1024489952, -1109010607, 1047911115) + W(5, -1095250332, 1057920197, 1040789767, 1038925278) + + W(6, -1125896234, -1096854047, -1097276733, 1057601759) + + W(7, -1089523085, 1034463851, 1033070381, 1047586960) + W(8, -1104060437, 1046835436, 1046328829, -1130324381) + + W(9, 1001383873, -1100475875, -1129328465, 1017446731) + + W(10, -1107686492, -1112186573, -1127540900, 1025720505) + + W(11, -1115131352, 1041767855, 1031878575, 1015088161); + sum2 = + W(0, -1089646175, 1060062242, 1050551269, 1017421696) + W(1, -1124178965, 1023971829, -1112379036, 1018139178) + + W(2, -1096611550, 1040955729, -1115297526, -1112174927) + + W(3, -1087427677, 1066898400, -1116330565, 1035645225) + + W(4, -1111832492, 1035838465, -1088789602, 1066001716) + W(5, 1056235229, 1040223965, 1027731712, -1128027042) + + W(6, 1059010203, -1079880526, -1118197619, -1114176344) + + W(7, -1106279788, 1015547306, 1060985431, -1081253059) + + W(8, -1102574794, 1044677785, -1130074994, 1033729011) + + W(9, -1139584980, -1095878540, 1039647840, -1100382074) + + W(10, 1040262609, -1110899873, 1054225000, -1095013927) + + W(11, -1099698701, 1035015292, -1124507789, 1032866060); + WS(1063980716, -1121784884); + sum1 = + W(0, 1027357276, 1046480497, -1094692773, 1050884987) + W(1, 1046796692, 1025153935, -1104528667, -1121849329) + + W(2, -1090457523, 1048987838, -1098583576, -1107151594) + + W(3, -1122859745, 1049050976, -1090130146, 1057733925) + W(4, 1037378828, 1016110982, -1107033792, 1046903620) + + W(5, -1084796508, 1061180023, 1034725880, 1023598816) + + W(6, -1105809438, -1128776604, -1089370535, 1054317580) + + W(7, -1096435840, -1114135692, 1033196814, 1050194992) + W(8, -1105629197, 1055134019, 1049629536, 1040596522) + + W(9, -1111338526, -1112079704, -1093813713, 1046804578) + + W(10, -1097780651, -1113598601, -1111105883, 1037688238) + + W(11, -1094775914, 1057027842, 1038664949, -1127372967); + sum2 = + W(0, 1041589168, -1098133521, 1031854878, -1105720063) + W(1, 1035016760, -1121214780, 1024315846, -1106371173) + + W(2, 1051327081, 1023684981, -1128261352, 1011090873) + + W(3, 1043663909, -1106740015, -1108849860, -1124767392) + + W(4, 1035542785, 1029549000, 1040621380, -1107177437) + + W(5, -1106409587, -1127108094, -1111599720, 1034668152) + + W(6, 1040212968, -1111131552, -1120392794, -1102248051) + + W(7, 1051247500, -1136161665, 1033384296, 1040430896) + W(8, -1093765703, 1050935529, -1102687331, 1018485311) + + W(9, 1014426993, 1032409853, -1107120761, 1033359350) + W(10, 1037002974, -1126387868, 1022694293, 1003633577) + + W(11, -1104283048, -1128561530, 1026574958, 998242826); + WS(1047153326, -1105960410); + sum1 = W(0, 1032318440, -1105871782, -1111485966, 1043448020) + + W(1, -1110710801, 1019497606, -1133035902, 1024269605) + + W(2, -1098984649, 1044051929, 1028564080, 1019882933) + W(3, 1023033002, -1110369667, 1050315790, 1059563250) + + W(4, -1113831750, -1118335287, -1111741083, 1047130673) + + W(5, -1080272088, 1053459949, 1024144723, 1018957863) + W(6, 1009988015, 1043587009, -1086889743, 1061038806) + + W(7, -1117616080, -1122250988, -1126171361, -1113373146) + + W(8, -1098309317, 1044456898, -1145766181, -1124691083) + + W(9, -1183551252, 1035984354, -1105786176, 1048086975) + + W(10, -1114741783, -1123242346, -1118067977, -1111622952) + + W(11, -1106267538, 1037611273, 1040212689, -1150984053); + sum2 = W(0, -1132142402, -1109262360, 1044069954, -1111364284) + + W(1, 1031679549, 1018817977, 1019405455, 1034679642) + + W(2, -1104355470, -1108806456, -1097044621, 1034241557) + + W(3, 1022592575, -1099677890, -1119884477, -1066728208) + W(4, 1043056057, 988348592, 1002713172, 1048612806) + + W(5, -1094376769, 1079847520, 1057366851, -1114461524) + W(6, 993592760, 1036415285, -1110824904, 1060715429) + + W(7, -1104696735, -1107038514, -1120241477, -1118742739) + + W(8, 1031998111, -1103911380, 993276152, 1036085605) + W(9, 1024283135, -1125545553, -1128336378, 1028262793) + + W(10, 1012785110, -1128734189, -1139068036, -1129249399) + + W(11, 1030577347, -1127589036, -1133646774, -1182233281); + WS(1056964652, 1042968547); + sum1 = + W(0, 1026864302, 1044356666, -1111481597, -1091567350) + W(1, 1041608192, 1008927875, -1136392949, 1026146499) + + W(2, 1051097712, -1098882624, 1029687022, -1130908784) + W(3, 1024389106, 1030265493, 1040695131, -1090350559) + + W(4, 1042991030, -1127664937, -1126999334, 1038248593) + + W(5, 1052760426, -1081953599, 1043333208, -1121268145) + + W(6, 1026043785, -1126651679, 1056669112, -1095456258) + W(7, 1044232931, 952128323, -1130628252, -1146035092) + + W(8, 1038264227, -1102468122, 1038520784, -1146879000) + W(9, 1000688741, 986691602, 1027446877, -1109487565) + + W(10, 1033386798, -1137895230, 1002882868, 1033146167) + + W(11, 1040116499, -1110909592, 1040618297, 1023066122); + sum2 = + W(0, -1155037894, -1109445776, -1106399184, 1052866656) + W(1, 1046069094, 1012439553, -1117766868, -1132308185) + + W(2, 1041934367, -1111172948, 1042904039, -1108172404) + + W(3, 1042293027, -1101307898, -1137539889, 1058738313) + W(4, 1045435531, -1110297962, 1004126723, 995397702) + + W(5, 1035303811, 1064268129, 1051607279, -1110625272) + W(6, -1123150924, 1014206993, -1091518060, 1052685347) + + W(7, -1098813712, -1107185126, 1015608713, 1046458351) + + W(8, -1124849601, -1090299347, -1150133894, 1045339946) + + W(9, -1135323377, -1120597884, 1043824666, -1087905074) + + W(10, 1037110006, -1122419536, 1030487760, 1026431488) + + W(11, 1061366644, -1075431372, -1089430426, 1049588119); + WS(-1096061015, 1045980634); + sum1 = + W(0, -1136709393, 1047345176, -1089944925, -1113534082) + W(1, 1020871950, 1032683291, 1028828435, 1016418418) + + W(2, -1096285771, 1049565603, 1021274185, 1018404508) + W(3, 997001355, 1049157941, -1094897728, -1113216951) + + W(4, 1042276288, 1027484139, 1022855123, 1032879612) + W(5, -1097979037, -1094660427, 1039512084, -1119890439) + + W(6, -1128346047, 1042754713, 1014307589, 1031601097) + W(7, 1028680388, 1030972704, 1031977874, 1032419332) + + W(8, 1040846151, -1095562472, 1044068336, -1125789619) + W(9, 984865373, -1124110958, 1042092045, -1109834744) + + W(10, -1140756488, 1031142826, 1040707047, 1040469689) + + W(11, 1041011167, -1093905225, 1048548616, -1123584299); + sum2 = + W(0, -1114682679, 1055059251, 1064739409, -1095220576) + W(1, -1100660553, 1038444345, -1115477579, 1024869090) + + W(2, 1031266096, -1113806527, -1105830101, -1132048253) + + W(3, -1100789101, 1055732865, 1030176534, 1040530242) + + W(4, -1096186716, 1040636888, -1113658391, -1122542034) + + W(5, 1067742275, 1036337974, -1103152959, 1015520805) + + W(6, -1110942733, -1120682294, 1062911121, -1087169731) + + W(7, 1035448471, 1032588114, 1044868728, -1107016239) + W(8, -1081471397, 1042181045, 1052603278, -1102136445) + + W(9, 1028313432, -1152832742, -1080677577, 1042749891) + + W(10, 1033558186, -1134141481, 1046472362, -1087243594) + + W(11, -1077241450, 1067213860, 1052345796, -1112398105); + WS(-1086603372, 1061480206); + sum1 = + W(0, 1025472273, 1022341006, 1006839158, -1102342404) + W(1, 1042514621, -1114893026, -1143591172, 1027469190) + + W(2, -1128153798, -1124925900, 1043263108, -1114252238) + + W(3, -1124123051, -1141108008, 1051787090, -1093885235) + + W(4, 1027880941, -1121794488, -1110008795, 1042054049) + + W(5, -1090097115, 1058414043, -1105821275, 1039892842) + + W(6, -1124248793, 1051331968, -1081435553, 1063214861) + + W(7, -1094479791, 1028544109, -1127491490, 1044174205) + + W(8, -1089913687, 1057428893, -1099759469, 1041669328) + W(9, 1014214528, 1044005681, -1092574472, 1052102264) + + W(10, -1112069887, 977928239, -1120184170, -1155225358) + + W(11, -1096696337, 1048742922, 1035771431, 1039400116); + sum2 = + W(0, 1037224808, 1025638319, -1093017633, -1143762368) + W(1, -1119609796, 995865588, -1109009263, 1049599085) + + W(2, 1039705537, -1112746506, -1119534458, 1021427773) + + W(3, -1120793535, -1142324554, 1040559103, -1099136162) + + W(4, -1123476586, 988834049, -1110258597, 1028269513) + W(5, 1053657173, 1056581815, -1104556193, -1122952792) + + W(6, -1112240446, -1132053081, -1120149375, -1118127758) + + W(7, 1044376501, -1114993482, -1151274598, -1119573384) + + W(8, 1043501160, -1147801478, -1107739102, 1029987722) + + W(9, -1120536233, 1010517574, -1121251494, -1130373692) + + W(10, 1033895801, -1106681926, -1103902982, 1008384356) + + W(11, 1035850418, 1032229723, -1122521676, -1116132116); + WS(1037058908, -1087106898); + sum1 = W(0, 1032385376, 1026637522, -1098495714, 1041048768) + W(1, 1039729665, 1017191910, 1028077583, -1129390954) + + W(2, -1107088868, 1041268325, -1126519617, -1132138900) + + W(3, 1014613011, 1048467894, -1108180359, 1045140885) + W(4, 1040649531, 1035348883, -1151370839, 1048758199) + + W(5, -1093334653, -1080472255, -1114665834, 997380151) + + W(6, -1139761263, 1043852440, -1108393155, 1045570315) + + W(7, -1125126080, 1031307292, 1027279360, 1034990442) + W(8, -1097414435, 1045361342, 1035082182, 1030937245) + + W(9, 1022565566, 1017011558, -1111470776, 1028923180) + + W(10, 1023808326, -1138594135, 1031834108, 1036138504) + + W(11, -1104527470, 1036330497, 1027790590, 1036519916); + sum2 = W(0, -1111516802, -1131222514, -1097346809, 1049467820) + + W(1, -1118099733, -1111393075, 1036231024, -1098228691) + + W(2, -1128682274, 1020406578, -1102087439, 1039222222) + + W(3, -1130951686, 1034905648, -1113861539, 1024259929) + + W(4, 1049541587, -1113826501, 1031916934, -1090487171) + + W(5, 1052765954, 1061096371, -1108988196, 1025340241) + + W(6, -1130989326, 1032129266, -1108538807, -1104129123) + + W(7, 1033200380, 1029417921, -1129631002, 1035677128) + + W(8, -1122282521, -1108918961, 1042713844, -1111528334) + + W(9, -1123615369, -1103168970, 1038239484, -1114584358) + + W(10, -1097598443, 1034012496, -1119469958, 1049196148) + + W(11, -1116187122, -1129232626, 1018354634, 1029857705); + WS(1054906071, -1091145324); + sum1 = W(0, 1029398237, -1120406171, 1051536197, -1115196942) + + W(1, -1136290815, -1126429728, -1140775857, -1106052861) + + W(2, 1046952684, -1109008729, 1034481647, 1032555988) + + W(3, -1128656451, -1099422950, 1057886666, -1097962977) + + W(4, -1114344740, 1015919623, 1004160018, 1032725545) + + W(5, -1085942396, -1130426984, 1050651652, -1108426832) + + W(6, -1130732913, -1097841652, -1098611557, 1048146402) + + W(7, -1102043591, 1040987294, 1029686973, 1037934865) + + W(8, 1042440654, 1034532491, -1133864753, -1107032303) + + W(9, 1021855224, 1036573848, 1035129734, -1122054089) + + W(10, -1124817654, 1044439941, -1128798955, -1157619404) + + W(11, 1032622058, 1027356128, -1126847669, -1115113714); + sum2 = W(0, -1113802973, 1027581315, -1110955855, 1047020911) + + W(1, -1105727831, 1022816774, -1116219822, -1112877584) + + W(2, -1127685514, 1021259086, -1118276653, -1129541322) + + W(3, 1020553430, -1110295429, -1097495000, 1057623683) + + W(4, -1110729082, 997980814, -1109755343, 1027296115) + W(5, 1025841055, 1053623791, -1104267868, 1016281450) + + W(6, -1131329246, -1126007358, -1104102640, 1057318729) + + W(7, -1106795812, -1105649969, -1112950186, 1035099341) + + W(8, -1110724235, 1040083835, -1119773745, 1031563375) + + W(9, -1113313093, -1130784978, -1165405916, 1048620928) + + W(10, -1128480210, -1111917112, -1116367256, 1035863861) + + W(11, -1097352168, 1039940909, -1110646461, 1032754595); + WS(1063926444, -1083200389); + sum1 = + W(0, -1110900111, 1049653262, -1089993663, 1052688460) + W(1, -1098055945, 1042131972, 1015785179, -1116391059) + + W(2, -1106495964, 1038385529, -1108169455, -1121748392) + + W(3, -1132926346, 1048988121, -1104923071, 1045396422) + + W(4, -1110654965, 1039495970, 1034778082, -1111931241) + + W(5, 1061854120, -1089207439, -1111176153, -1117097761) + + W(6, 1029162938, 1002973694, 1062129309, -1085048198) + W(7, 1042255343, -1120523578, 1024169214, -1107911937) + + W(8, 1059656227, -1087799869, 1043917388, -1106362715) + + W(9, 1031086749, -1104277444, 1047872627, -1096506545) + + W(10, 1012240783, 1011366638, -1116484151, -1114837807) + + W(11, 1055218483, -1096830980, 1049045741, -1098874335); + sum2 = W(0, 1031260311, -1079593190, -1121246507, -1113337443) + + W(1, 985022717, 1032096214, 1008713252, -1080879443) + W(2, 1035854140, 1043424734, -1123981203, -1130846658) + + W(3, -1110413739, -1099919153, 1040572163, -1112705599) + + W(4, 1028350097, -1119780465, -1108083749, 1067198745) + + W(5, -1111993270, -1102589461, 1035118406, 1007072124) + W(6, 1024382525, 1061544601, 1060638292, 1044789771) + + W(7, -1119559917, -1132931022, -1107516795, 1037733964) + + W(8, -1122943484, 1040341398, -1123835438, -1131952682) + + W(9, 1032742989, 1034769578, -1098923291, -1116338100) + + W(10, 1021760610, 1028149723, -1107429215, 1037726418) + + W(11, 1034609836, 1030676197, -1114867192, -1115680064); + WS(-1091090263, 1040227933); + sum1 = + W(0, -1123880467, -1113261184, 1033284496, 1044923332) + W(1, -1105999782, -1135767575, -1114808049, 1027485320) + + W(2, 1019377225, -1098653061, 1042536309, -1127683258) + + W(3, 1014154590, -1101480756, -1102787036, 1059927492) + W(4, -1095990282, 1037484605, 1032220316, 1043855170) + + W(5, -1094753319, 1040011797, 1044858269, -1120454550) + + W(6, 1018131933, -1088775234, 1065098078, -1099661886) + + W(7, -1091964998, 1029352975, -1121223347, 1021676847) + + W(8, -1107869654, -1151140018, 1045062557, -1109078132) + + W(9, -1128795491, 1031227206, 1045897152, -1113799968) + + W(10, -1109510984, 1027724204, 1008528046, 1026364723) + + W(11, 1025551690, -1125551659, 1023695008, 1006311503); + sum2 = + W(0, -1123304381, -1135928985, 1039889929, -1113522433) + W(1, -1108136941, 1038356153, 1011470018, -1131363804) + + W(2, 1045400660, -1100907702, 1038781261, -1139124254) + + W(3, -1116901758, 1044779190, -1100997480, -1104972678) + + W(4, 1052809383, -1105005165, 1029761957, -1101932196) + W(5, 1055172736, 1058841413, -1093286229, 1040780481) + + W(6, -1135722528, -1097334030, -1087140500, 1039800739) + + W(7, 1034962749, -1105451984, -1111769341, 994986369) + W(8, 1054550160, -1122473050, -1105965260, 1042887374) + + W(9, -1107033568, 1042173475, -1110305303, -1109069053) + + W(10, 1038676003, -1114167820, 1032808415, 1025113569) + + W(11, -1115908038, 1032761157, -1155418681, -1152192529); + WS(1063982124, 1031276218); + sum1 = + W(0, 1041442545, 1031361481, 1056936890, -1090353464) + W(1, -1122143798, 1007942615, -1103090149, 1026538936) + + W(2, 1055470587, -1090262005, -1101092681, 1040474995) + + W(3, 1042807746, -1101400314, 1060963845, -1096383728) + W(4, 1031723710, -1103896066, 1024468200, 1034898060) + + W(5, 1061353486, -1078114170, -1122232454, 1027975174) + + W(6, -1106882536, -1103132501, 1063510555, -1106902698) + + W(7, 1043143621, 1030475455, 1049721332, -1112916420) + + W(8, 1048946651, -1088793180, -1172995470, -1101011290) + + W(9, -1101888878, -1139074135, 1054541302, -1098771630) + + W(10, 1032268200, 1044176219, 1042454319, -1110788513) + + W(11, 1058321972, -1093460753, -1104729748, -1108198577); + sum2 = + W(0, 1015674059, -1109357907, 1031332447, 1066992274) + W(1, -1074609058, 1048304191, 1034996578, 1024785272) + + W(2, 1048659656, 1076858856, -1070866381, 1030261153) + W(3, 1026624942, -1129998179, -1108514478, 1078632808) + + W(4, -1068251573, -1098508369, -1128049054, -1120348186) + + W(5, 1052415888, 1073772904, -1076608801, -1108782845) + W(6, 999367802, 1048014132, -1105319294, -1105884909) + + W(7, -1103939197, 1048310856, 1021382702, -1105300006) + + W(8, -1117149322, 1038594495, -1109221944, 1031934850) + W(9, -1124407536, 1046166047, 1010482847, 1038514249) + + W(10, -1098022157, 1046147873, 1029480964, -1116382211) + + W(11, -1154249511, -1098756758, -1125235504, 1038709270); + WS(-1084008748, -1076428689); + sum1 = + W(0, 995293936, -1119372516, -1088304948, 1059786207) + W(1, -1137964103, 1013109895, -1162911689, 1036299874) + + W(2, -1097059736, 1051547992, -1125552990, 1006687204) + + W(3, -1138016718, 1017340780, -1085861109, 1060565598) + + W(4, -1116363658, 1002349741, -1132274729, 1041960361) + + W(5, -1081600491, 1064118968, -1116908178, 1007519945) + + W(6, -1145295444, 1044385332, -1083799507, 1058261310) + W(7, -1119006543, 994300076, -1131268254, 1034368866) + + W(8, -1090459925, 1050466917, 1026422690, -1119097260) + + W(9, -1201649403, 1043013720, -1104403080, 1049921019) + + W(10, -1131911306, -1123993357, -1135278623, -1103541652) + + W(11, -1103180426, 1042671596, 1051353751, 1023041036); + sum2 = + W(0, 1033856632, -1090933982, 1068958128, 1067844920) + W(1, -1095515318, -1124923033, 1041148647, -1094617784) + + W(2, 1066199400, -1102124443, -1122244574, 1042779533) + + W(3, -1094518814, -1122277735, 1055138982, 1051248434) + W(4, 1036900546, -1103088355, 1045521814, 1062250596) + + W(5, -1070105649, -1078683071, 1046136146, -1114411292) + + W(6, -1128266986, 1027053741, -1070096660, -1079091570) + + W(7, 1021983978, -1114099181, 1033117136, 1050179515) + W(8, -1082392713, -1089309992, 1045719314, 1023274434) + + W(9, -1173165821, 1033890948, 1067349236, 1047411220) + + W(10, 1050937215, -1124200313, 1041230368, -1086799133) + + W(11, 1076979077, 1070525583, -1091587052, 1042022635); + WS(-1081223670, 1021288719); + sum1 = + W(0, 1011016529, 1035837139, 1042651768, -1098433890) + W(1, -1129696110, 995272815, -1123721507, 1032041909) + + W(2, 1036928499, -1100608474, 1024607124, 1027549342) + W(3, 1017381041, -1108640782, 1055254255, -1094722148) + + W(4, -1131079071, -1116321147, 1017648243, 1024313608) + W(5, 1061777248, -1083909329, 1047474926, 1017435439) + + W(6, 1028006000, -1105590117, 1061429156, -1087617676) + + W(7, 1036831026, -1122084432, -1122128468, -1108791100) + + W(8, 1052534382, -1091388554, 1033276873, -1139273509) + + W(9, 1009660876, -1128783630, 1049990152, -1104773376) + + W(10, -1150770463, 1024807730, 1007983923, -1116825572) + + W(11, 1053483289, -1093454083, 1005912461, -1125665142); + sum2 = + W(0, 1057380038, -1088994983, 1016685185, 998662038) + W(1, 1040844632, 1027712021, 1057597982, -1085734595) + + W(2, 983594263, 1031174835, -1116219635, -1109851055) + W(3, 1069621581, -1072283812, 1066773393, -1099503122) + + W(4, -1134907857, 1037507283, 1060801069, -1079782207) + + W(5, 1060848632, -1097910780, 1027931069, -1116080368) + W(6, 1060050844, -1081640783, 1043641192, 1038323247) + + W(7, -1111695032, -1123552119, 1041938366, 1046270010) + W(8, -1105013658, 1026094487, 1047921746, 1028446195) + + W(9, -1122435905, -1113922524, -1104981345, 1036576275) + + W(10, -1114519002, -1108250726, 1024344515, 1043446590) + + W(11, -1105295106, -1145544806, 1032973176, 1031771441); + WS(-1120405176, 1052288640); + sum1 = + W(0, -1136485047, 1040656325, 1051431647, -1114886102) + W(1, -1113721480, -1115542032, -1120969533, 1013423883) + + W(2, 1042796768, -1092939904, 1039390695, -1165530871) + + W(3, 1032711321, -1108004548, 1062809491, -1083705303) + W(4, 1026161897, 1020434857, 1027237267, -1107763377) + + W(5, 1060861813, -1089040632, -1116341945, 1024821617) + W(6, 1021305874, 1022040031, 1027813500, 1039332409) + + W(7, -1108229347, 1031183209, -1146847086, 1040265357) + + W(8, -1107918083, -1098943929, 1035322940, 1019272038) + + W(9, -1133175986, 1031093856, 1039657966, -1112315213) + + W(10, -1115465454, 1024346473, 1039673939, -1100629595) + + W(11, 1043412169, -1104541036, -1114748782, 1012723731); + sum2 = W(0, 1032873736, -1128467212, -1098460251, 1024553592) + + W(1, 1047433570, -1128334191, -1106233681, -1131275971) + + W(2, 1035809416, -1114616785, 1026776904, -1107845863) + + W(3, 1036788040, -1113770947, -1078975662, -1092555656) + + W(4, -1132131619, 1043173677, 1009987037, -1088748320) + + W(5, 1062703231, 1067981395, -1104531393, -1111351589) + + W(6, -1117147579, -1097209973, 1059662245, 1058574054) + W(7, -1106248497, 991598885, 1018109057, 1040210251) + + W(8, -1110493560, -1103422592, -1125640736, -1107997815) + + W(9, -1114508674, -1099376356, 1052516052, 1038056552) + + W(10, -1107638185, 1023829740, -1106556966, 1050939532) + + W(11, 992650581, 1026754378, -1107493388, -1106870741); + WS(1050656983, 1072047790); + sum1 = + W(0, -1128713579, 1048792739, 1050333168, -1097949975) + W(1, -1101689274, -1127716817, 1026709637, 1034799517) + + W(2, 1050171449, -1104033571, 1036522731, 1037025856) + W(3, 1023201012, 1014670385, 1053203577, -1085647978) + + W(4, -1115538280, -1116963676, 1030859291, -1116111658) + + W(5, 1064066450, -1080119690, 1043957990, -1112813084) + W(6, 1041237492, 1025355287, 1060032700, -1088145917) + + W(7, 1037919995, 1036783049, -1152997210, -1112906461) + + W(8, 1050694506, -1088560507, -1120212113, -1113565993) + + W(9, -1155686725, 1036463444, 1053095469, -1105136840) + W(10, 1035379762, 1035776666, 1040488948, 1002270506) + + W(11, 1046913707, -1093840327, -1108210918, -1122514141); + sum2 = W(0, 1032121925, 1067037293, -1074821338, -1094551116) + + W(1, 1045030197, -1115959604, -1105031648, 1072414374) + + W(2, -1081225596, 1050387420, 1035683027, -1117669800) + + W(3, -1130820039, 1072723545, -1069168235, 1050306407) + + W(4, 1054464199, -1109697271, -1105961083, 1076922042) + + W(5, -1068908234, -1080768789, -1129129027, -1106200850) + + W(6, 1040237041, 1072846596, 1068735095, -1110799161) + + W(7, -1109242003, 1041008693, -1091675607, -1079373087) + + W(8, 1050158994, 1048692149, -1102086715, -1107790175) + W(9, 1052906990, 1048404715, 1043049709, 1039557097) + + W(10, 1050068036, 1019897614, -1119953618, -1090018360) + + W(11, 1036354441, -1114340558, -1106557101, 1034398799); + WS(-1081468886, 1041730784); + sum1 = + W(0, -1114247594, 1021500053, 1028761377, 1044129269) + W(1, -1131183895, -1121682823, 999910687, 1042624634) + + W(2, 1005300019, 1053438789, -1096850135, 1029411807) + W(3, -1112649805, 1043096639, -1100727404, 1055682820) + + W(4, -1106832988, 1022343799, 1022168820, 1034747044) + W(5, 1035906940, -1086991443, 1038040260, -1113521452) + + W(6, 1009392321, -1103886814, 1062898916, -1084075064) + + W(7, 1044548897, -1110992968, 1025791509, -1101453262) + + W(8, 1054732110, -1098009154, 1037861846, -1110478453) + + W(9, -1115410306, 1026735815, 1042621601, -1097832968) + + W(10, 1022910341, -1125653940, 1020540891, 1034256626) + + W(11, 1041116931, -1117544535, -1121491970, -1112646442); + sum2 = W(0, -1109856908, 1025315462, -1111613060, -1108093549) + + W(1, 1041489120, -1114414815, 995541592, 1040254152) + W(2, 997512408, -1085520225, -1094645099, 1040714423) + + W(3, -1107331629, 1035675473, -1115826672, -1103361463) + + W(4, -1102705141, -1125997775, 1043476326, 1026228130) + + W(5, 1067108106, 1059978555, -1121534122, 1032256197) + + W(6, -1104942022, -1101803200, 1053887552, -1094292172) + + W(7, 1039802977, -1130615251, 1039471517, -1123010638) + + W(8, -1104388416, -1120534147, 1017265579, -1129091643) + + W(9, -1114856041, 1040268905, -1120685867, -1108094032) + + W(10, 1040793483, -1108206349, 1022399907, 1028920354) + + W(11, -1103458923, 1040596394, -1107135380, 1027593934); + WS(1054796503, -1111794816); + + return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); +} + +shared float inp[507]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { temp[pos] = (value); } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 13 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 507; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 13, y = (uint)id % 13; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (3)) + 0.5, float(group_base.y + y - (2)) + 0.5)).x; + } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[12]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 4]; + samples[1][1] = inp[local_pos + 5]; + samples[1][2] = inp[local_pos + 13]; + samples[1][3] = inp[local_pos + 14]; + samples[2][0] = inp[local_pos + 15]; + samples[2][1] = inp[local_pos + 16]; + samples[2][2] = inp[local_pos + 17]; + samples[2][3] = inp[local_pos + 18]; + samples[3][0] = inp[local_pos + 26]; + samples[3][1] = inp[local_pos + 27]; + samples[3][2] = inp[local_pos + 28]; + samples[3][3] = inp[local_pos + 29]; + samples[4][0] = inp[local_pos + 30]; + samples[4][1] = inp[local_pos + 31]; + samples[4][2] = inp[local_pos + 39]; + samples[4][3] = inp[local_pos + 40]; + samples[5][0] = inp[local_pos + 41]; + samples[5][1] = inp[local_pos + 42]; + samples[5][2] = inp[local_pos + 43]; + samples[5][3] = inp[local_pos + 44]; + samples[6][0] = inp[local_pos + 52]; + samples[6][1] = inp[local_pos + 53]; + samples[6][2] = inp[local_pos + 54]; + samples[6][3] = inp[local_pos + 55]; + samples[7][0] = inp[local_pos + 56]; + samples[7][1] = inp[local_pos + 57]; + samples[7][2] = inp[local_pos + 65]; + samples[7][3] = inp[local_pos + 66]; + samples[8][0] = inp[local_pos + 67]; + samples[8][1] = inp[local_pos + 68]; + samples[8][2] = inp[local_pos + 69]; + samples[8][3] = inp[local_pos + 70]; + samples[9][0] = inp[local_pos + 78]; + samples[9][1] = inp[local_pos + 79]; + samples[9][2] = inp[local_pos + 80]; + samples[9][3] = inp[local_pos + 81]; + samples[10][0] = inp[local_pos + 82]; + samples[10][1] = inp[local_pos + 83]; + samples[10][2] = inp[local_pos + 91]; + samples[10][3] = inp[local_pos + 92]; + samples[11][0] = inp[local_pos + 93]; + samples[11][1] = inp[local_pos + 94]; + samples[11][2] = inp[local_pos + 95]; + samples[11][3] = inp[local_pos + 96]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 41]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2) + ivec2(0, 1), ret); +} +//!PASS 2 +//!DESC NNEDI3 (double_x, nns256, win8x6) +//!IN INPUT, temp +//!OUT OUTPUT +//!BLOCK_SIZE 64, 8 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[12]) { + float sum = 0.0, sumsq = 0.0; + [unroll] for (int i = 0; i < 12; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); + sumsq += dot(samples[i], samples[i]); + } + float mstd0 = sum / 48.0; + float mstd1 = sumsq / 48.0 - mstd0 * mstd0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); + mstd1 *= mstd2; + float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = W(0, -1118812211, -1126504964, -1120674615, 1037346316) + + W(1, -1120777358, -1137814218, -1128256674, -1118580651) + + W(2, -1118354985, -1117760744, -1117458482, -1097807929) + + W(3, -1114916026, -1101147334, 978731342, -1112914505) + + W(4, -1108702207, -1127312137, -1124477727, 1051925971) + + W(5, 1042061230, 1032645830, 1026597474, 1040041685) + W(6, -1107177196, 1040658850, 1041726453, 1061593601) + + W(7, 1053108884, 1029100907, 1023094846, 1010634196) + + W(8, -1116025261, -1105559165, -1102924251, -1098269094) + + W(9, -1109857889, -1121960158, -1133406593, -1137769344) + + W(10, -1113369587, 1024463101, -1120327001, 1015141868) + + W(11, -1129934237, -1114359336, 1018102856, -1124738995); + sum2 = + W(0, 1012167794, 975711915, 1007876102, -1112492311) + W(1, 1037438381, -1104922990, 1035896848, -1129478003) + + W(2, 1034293816, -1103749327, 1042152488, 1032768248) + W(3, 1026775674, -1112883791, 1009005510, 1029694272) + + W(4, 1040949842, -1105000392, -1113573827, 1065119806) + + W(5, 1049707133, -1118855570, 1019305156, -1105233202) + + W(6, -1073125242, -1097051670, 1048636347, 1064777150) + W(7, 1035198208, 1049268872, -1110039318, 1045598091) + + W(8, -1098861131, 1026609280, -1103172582, 1028248806) + W(9, 1042916747, -1108803486, 1037762592, 1002108025) + + W(10, 1034317546, -1107080190, 1037196612, -1106667446) + + W(11, 1024046979, -1112062411, 1031133421, -1116379320); + WS(1002542528, -1081598301); + sum1 = W(0, -1123106403, 1003194766, -1115291860, -1120609928) + + W(1, 1033033375, -1104418325, 1031657633, -1140694509) + + W(2, 1018532849, 1031358712, -1100442248, -1102109750) + + W(3, -1124327881, 1022719310, 1042404749, -1173350542) + W(4, 1058293919, 1048723343, 1060312385, 1065191364) + + W(5, 1057579241, 1055391647, 1048926634, 1057234642) + + W(6, -1096563748, -1102943230, -1084853826, -1082116255) + + W(7, -1090047916, -1090520747, -1115672903, -1088607906) + + W(8, 1033211315, 1043311017, -1122032382, 1048832547) + W(9, 1045565001, 1034840623, 1040065984, 1034960904) + + W(10, -1104454821, 1033034550, -1105126162, -1093430837) + + W(11, 1033480943, -1104668395, 1041617645, -1100859732); + sum2 = + W(0, -1118979564, -1136455954, 1031647880, -1098733088) + W(1, 1032618663, -1115252794, -1105564107, 1028338874) + + W(2, 1038135851, -1098903542, 1043936541, -1095471090) + + W(3, -1114646115, 1050331000, -1107943513, 1030299514) + W(4, 1040555036, -1113356687, 1041172918, 1058853402) + + W(5, -1111675314, 1042547849, -1102521857, 1048036907) + W(6, 1046730291, -1091503884, 1053446821, 1057969905) + + W(7, 1026777488, 1032649026, -1097022326, 1045220138) + + W(8, -1098841046, 1033064908, -1105844234, -1107613268) + + W(9, -1104403193, -1109177822, -1106932655, -1104926499) + + W(10, 1010901456, -1105119573, 1050988870, -1096003667) + + W(11, 1040941647, -1135105484, -1111654963, 1015146572); + WS(-1089089900, 1057806976); + sum1 = W(0, -1107295812, -1127595840, -1123024597, -1107414907) + + W(1, 1016079705, -1114511976, -1121404144, -1120167114) + + W(2, 1034138799, 1025412102, 1036945929, 1043589025) + W(3, 1048626936, 1026901142, -1122208804, -1132433569) + + W(4, -1095422036, -1098433268, -1092642698, -1086058047) + + W(5, -1087422851, -1090938821, -1099844512, -1092394401) + + W(6, 1050726826, -1189029283, 1055732365, 1066834483) + W(7, 1060391966, 1050840209, 1046023279, 1054523402) + + W(8, 1041715821, -1122343505, -1122551116, 1012137296) + + W(9, -1112593208, 1032081267, 1025525231, 1034856335) + + W(10, -1107540610, -1114195981, 1004508092, -1123010964) + + W(11, 1021997657, -1116540066, -1117735535, -1140325311); + sum2 = W(0, -1100068589, 1040406565, -1105732125, -1129255625) + + W(1, -1110986904, 1041854514, -1108063740, 1040297872) + + W(2, 1045053907, -1112537807, 1034765996, -1096936254) + + W(3, -1098576495, -1119869595, -1108123574, -1106886121) + + W(4, 1035618626, -1097933278, -1099579155, 1047111871) + W(5, 1056019034, -1119227686, 1029490732, 996712344) + + W(6, 1049401249, -1091711434, 1056573344, 1055190008) + + W(7, 1058665437, -1092612461, -1121089271, -1113107096) + + W(8, 1036897142, 1040848725, -1089717374, 1050298277) + + W(9, -1089302244, 1058580142, -1127874601, -1110437244) + + W(10, -1103084156, 1039746012, 1030295740, -1110783667) + + W(11, 1041764164, -1100808273, 1022690203, 1034569294); + WS(1049963735, -1124746024); + sum1 = W(0, 1006389467, 991131306, -1124991168, 1036315168) + W(1, -1114080443, 1034164414, -1168983685, 1034869786) + + W(2, 1034213253, -1110639240, -1124116580, -1106688436) + + W(3, 1016950458, 1034354600, -1118357326, 1046100667) + W(4, 1058411598, 1049374726, 1060442825, 1064243752) + + W(5, 1058630778, 1057235431, 1051622657, 1028310995) + + W(6, -1089111694, -1111102680, -1086690096, -1080857842) + + W(7, -1086343400, -1086990857, -1099687954, -1090257402) + + W(8, -1120169982, 1039576302, 1018630958, 1032153615) + W(9, 1034713638, 1036089759, 1035150121, -1126482082) + + W(10, 1033778855, -1134195605, -1122058050, 1024033695) + + W(11, 1016006830, 1017381585, 1027377121, -1130216101); + sum2 = + W(0, -1114775505, 1022693528, 1033686534, -1159675846) + W(1, -1112709023, 1033838002, -1125624388, 1032684170) + + W(2, -1111353121, 1040828458, -1108626677, -1179590680) + + W(3, 1041114289, 1028829848, -1108121837, 1037026782) + W(4, 1051023520, 1041444385, -1101109690, -1089609004) + + W(5, -1101828769, 1037546580, 1033655640, -1104771455) + + W(6, 1077280281, 1083159907, -1090827263, -1064966677) + + W(7, -1073695373, -1090323484, 1042738654, -1104201752) + + W(8, 1042396970, 1039028690, -1129576724, -1146898193) + + W(9, -1117377551, 1046069066, -1098820901, 1049907200) + + W(10, -1100155636, 1038899070, -1108327399, -1128357112) + + W(11, 1026559236, 1017563688, -1149039617, -1119733667); + WS(-1082925548, -1131273240); + sum1 = + W(0, -1130676005, 1024417890, -1127912110, -1123681912) + W(1, -1128802908, 1009745483, 1030244593, -1124372951) + + W(2, 1028984921, 1026031368, 1046418962, 1048544370) + W(3, 1032747842, -1115143804, 1035809011, 1012870858) + + W(4, -1090453298, -1094705109, -1088411833, -1080625999) + + W(5, -1078821070, -1093720869, -1115224705, -1099806226) + + W(6, 1056020338, 1048479050, 1054709085, 1056570469) + W(7, 1060414079, 1058018725, 1048613352, 1053689379) + + W(8, 1036092472, 1026370227, 1040943828, 1016526011) + W(9, -1134933227, -1146727344, -1126383553, 1034696072) + + W(10, 1031745797, 1023514525, -1139610597, 1039178347) + + W(11, -1115579031, 1033865376, -1123856496, 1032018765); + sum2 = W(0, -1117505397, -1126832117, -1116692107, -1107644007) + + W(1, -1117458583, -1123105101, -1117032217, 1004815486) + + W(2, -1114926763, -1140231915, -1125548157, -1125354742) + + W(3, 1042669208, 1027773818, -1115355410, -1119603373) + + W(4, 1048798551, -1146496262, 1045683415, 1053833339) + + W(5, 1057710126, 1032098567, -1100115369, -1110288078) + + W(6, -1102904639, -1113101086, -1125191934, 1000279686) + + W(7, -1104919217, -1107525999, 1032507221, -1103622420) + + W(8, 1025448640, -1129185935, 1034411663, -1120274227) + W(9, 1041220979, 1022009255, 1021108475, 1039592007) + + W(10, -1112830121, -1125809392, -1121525964, -1113082203) + + W(11, -1111650191, 1035574395, -1112638630, -1117835672); + WS(-1089340204, 1064822548); + sum1 = + W(0, 1024560260, 1017532199, 1036465700, 1032538389) + W(1, 1031720480, 1035008482, 1020865894, 1041385023) + + W(2, 1040632746, -1123651609, 998268921, -1115892569) + W(3, -1109887627, 1033415588, -1112010572, 1043778521) + + W(4, 1042654740, 1053882699, 1058440180, 1062655475) + W(5, 1057084090, 1054084900, 1046711885, 1050203279) + + W(6, -1106019185, -1094337163, -1083627654, -1081203240) + + W(7, -1087921069, -1086686700, -1098821294, -1083224116) + + W(8, -1121721479, 1033472482, -1112650528, 1042941034) + W(9, 1046201917, 1038013786, 1039144481, 1043447032) + + W(10, 1037194524, 1028050126, 993325743, 1013010462) + W(11, 1035154240, 1021897635, 1027972046, -1131974615); + sum2 = + W(0, -1108565409, 1041490776, -1103680498, -1113858207) + W(1, 1018958125, -1139146344, 1024460115, -1111959213) + + W(2, 1034353453, 1018486577, -1113519263, 1052082200) + + W(3, -1097140178, 1046768714, -1098207895, -1128703602) + + W(4, -1088688588, -1105841420, 1037914109, 1062919581) + W(5, -1095558118, 1049328132, 1041960027, 1035231667) + + W(6, 1053209723, -1092734414, -1094792328, 1049986423) + + W(7, -1107726366, 1044527939, 1046336394, -1110040773) + W(8, 1049996954, 1039814215, 1034343810, 1012724150) + + W(9, -1098318745, 1024888668, -1106186917, 1036710347) + + W(10, -1107721619, -1122233136, -1116717915, 1040029087) + + W(11, 1007719490, -1100531711, 1037888927, -1102670680); + WS(-1083458668, -1076208878); + sum1 = + W(0, 1028803476, -1119221604, -1136427363, 1033962333) + W(1, -1108948664, 1041264181, -1108358441, 1032628870) + + W(2, 1040683401, -1097616301, 1042493937, -1106128994) + + W(3, -1096558041, 1044433886, -1097860853, -1123751924) + W(4, 1046406904, 1051879501, 1058517387, 1062475632) + + W(5, 1061490456, 1053313566, 1049919352, 1054299289) + + W(6, -1098733730, -1093169213, -1088547874, -1086787145) + + W(7, -1088927588, 1035939910, -1096390160, -1130402734) + + W(8, 1004468631, -1110693690, 1024040880, 1045297574) + W(9, 1028806548, 1038834422, -1095128350, -1106314264) + + W(10, 1030855367, -1111140815, -1127640694, -1108587948) + + W(11, -1104411844, 1040762412, -1118015030, -1165494119); + sum2 = W(0, 1016611708, 1037888148, -1105282980, 1016374797) + + W(1, -1146676672, -1118267731, -1130451858, 1032410610) + + W(2, 1016478676, 1037533502, 1012928145, 1042320298) + + W(3, -1119445205, -1133007186, -1123652878, -1114217777) + + W(4, -1110195557, 1036667983, -1126800264, -1123884642) + + W(5, -1097480948, -1111241984, 1039970164, 1039354964) + + W(6, -1101004152, 1029018929, -1092116144, 1048907311) + W(7, 1063932318, 1045206632, 1051255124, 1032089604) + + W(8, 1040169462, 1051795874, 1041335879, 1052196876) + + W(9, -1105880602, -1088285853, -1095104183, -1090379026) + + W(10, -1128480930, -1116498505, -1105668811, -1110762465) + + W(11, -1096424000, 1016609657, 1029046774, 1046489649); + WS(-1104670894, -1131977046); + sum1 = W(0, -1112648188, -1122872327, 1034658686, -1105196647) + + W(1, 1040705957, -1105551222, -1121117111, 1017793890) + + W(2, -1127207577, 1044660591, -1159198685, -1091967838) + + W(3, 1050515731, -1105182655, -1127322710, 1049557794) + W(4, 1054983097, 1050049705, 1055452918, 1062208630) + + W(5, 1055187309, 1051606290, 1047450666, 1052382814) + + W(6, -1088875575, -1112061830, -1088327234, -1080890076) + + W(7, -1103107959, -1088400827, -1103254713, -1097595959) + + W(8, 1031188251, 1037435556, 1047857485, 1027642075) + W(9, 1054501223, -1109004816, 1042878098, 1041414585) + + W(10, -1103988635, 1027063042, -1117747167, -1097581231) + + W(11, 1026629396, -1115427963, -1119280070, -1121163375); + sum2 = W(0, 1016354951, -1117974191, 1039379806, 1055799129) + W(1, 1043516685, 1037116388, 1033575170, -1131729016) + + W(2, 1027513536, 1035918257, 1037237024, -1144611727) + W(3, 1028380863, 1042440009, -1106882882, 1025801393) + + W(4, -1143898815, 1029824032, -1103985849, -1097216592) + + W(5, -1106760625, -1103814691, -1123395355, -1136138683) + + W(6, -1120711631, 1038928177, -1101088822, -1097673825) + + W(7, -1124050651, -1105579212, 1026359109, 1011024469) + + W(8, -1121835937, -1110582057, 1009730353, 1040607236) + + W(9, -1115312251, 1038947942, -1135830635, -1112603972) + + W(10, 1011202413, 1027020738, -1113735870, 1044957698) + + W(11, -1114960631, 1027787831, -1133116095, 1008198659); + WS(-1096559831, 1037484900); + sum1 = W(0, 1028249855, -1134487991, 1034313250, 1016173128) + W(1, 1027417370, 1028347039, 1000575210, 1035635068) + + W(2, 1038422322, 1027879764, 1029515489, -1112460521) + + W(3, -1107825326, -1123304579, 1030471249, -1120703806) + + W(4, 1057397273, 1048869250, 1056640275, 1065757625) + W(5, 1058322735, 1054334819, 1052511918, 1052376319) + + W(6, -1091437192, -1098773026, -1086850776, -1083502823) + + W(7, -1083394976, -1089694535, -1098313541, -1102551488) + + W(8, -1101563608, 1029769503, -1127965876, -1131269971) + + W(9, 1044386388, -1107441417, 1031752252, -1098473097) + + W(10, 1033785689, 1031362249, -1173649736, -1128247556) + + W(11, 1035484541, -1132373103, 1034873120, -1142130892); + sum2 = W(0, -1112511339, 1040888051, 1033770332, 1027239578) + + W(1, -1109060021, -1135961020, -1113507810, -1110941727) + + W(2, -1073555081, -1070793862, -1069228230, -1071158503) + + W(3, -1080567434, 1047598893, 948656059, -1129804625) + W(4, 1076402541, 1075658117, 1075655149, 1077820889) + + W(5, 1045918702, -1118850699, 1035044244, 1026016583) + W(6, -1104998932, 1054215981, 1042820552, 1046227202) + + W(7, -1095422364, -1101978428, 1045389579, -1103164541) + + W(8, -1101584674, -1115233903, 1010151672, 1035248614) + + W(9, 1038949494, 1049027220, 1028467783, -1118124613) + W(10, 1037626737, 1015165170, 1023929473, 1033175695) + + W(11, 1025525751, 1032251645, -1117328038, 1042587545); + WS(-1081931126, 1073966570); + sum1 = + W(0, -1121849711, -1146361331, 1017322599, -1106692309) + W(1, 1025148299, -1106210884, 1006422511, -1115138804) + + W(2, 1035175459, -1097410109, 1043585907, -1120101763) + W(3, 1050307440, 1038219466, 1023773090, -1132131774) + + W(4, -1121951547, 1026127169, -1144746721, -1091000962) + + W(5, -1079735571, -1091953433, -1096168868, -1102445962) + + W(6, 1027761070, 1045225025, -1106255910, 1065386286) + W(7, 1061827739, 1056380469, 1049682211, 1046599772) + + W(8, 1025905706, 1037511592, 1043091875, -1111228857) + W(9, -1120837551, -1102991437, 1003336935, 1040799505) + + W(10, -1114686833, -1113648733, 1017686828, -1108319001) + + W(11, 1018597782, -1147707682, -1110127893, 1028136822); + sum2 = + W(0, -1106398945, 1034732755, -1136096426, -1116038033) + W(1, 1044672515, -1100200943, 1037208365, 1025870712) + + W(2, 1035173169, -1090565718, 1037348813, -1089313148) + + W(3, 1037418911, 1045738761, -1130864309, -1117649125) + + W(4, -1101335301, 1056357842, -1122254044, 1066071262) + + W(5, -1095175639, -1132458755, 1007521414, 1025322164) + W(6, 1053305068, 1050400788, 1036743473, 1057700465) + + W(7, -1084949846, 1003355997, 1030076164, -1106413087) + + W(8, -1121291886, -1115544374, -1105444760, -1086616818) + + W(9, 1050477957, -1102270714, 1045722667, 999303933) + W(10, -1109509581, -1107192051, 1016457143, 1022772267) + + W(11, 1033704361, 1035291569, -1111299866, 1024551876); + WS(1052508503, 1065718149); + sum1 = W(0, 1042149474, -1109127212, 1041796883, 1033122295) + + W(1, -1120120612, 1045736850, -1110374755, 1038133568) + + W(2, 962035242, -1098223900, 1027445007, -1108112926) + + W(3, -1098175173, 1043181559, -1100319159, -1102543586) + + W(4, 1048663227, 1043423478, 1055339346, 1061167739) + W(5, 1054196775, 1055761732, 1041284046, 1051299336) + + W(6, -1097510801, -1096660204, -1087758861, -1089406218) + + W(7, -1094841886, -1105769888, -1103555357, -1120191499) + + W(8, 1028239019, -1104342914, 1027971236, 1037128774) + + W(9, -1105773058, 1042761157, -1101757237, -1105847257) + + W(10, 1038442493, -1109357577, 1040109559, 1025450507) + + W(11, -1123303012, 1044237597, -1111907879, 1039590240); + sum2 = + W(0, -1109097964, 1021868522, 1007318978, 1032613655) + W(1, 1002517286, -1126879582, 1031113174, -1114937810) + + W(2, 1037136687, -1116667451, -1107845433, -1109068920) + + W(3, 1016069779, -1103760436, 1030143702, -1118059752) + W(4, 1031673749, -1094712046, 1038331125, 1040720319) + + W(5, -1111158812, 1056111085, -1106358629, -1121843086) + + W(6, -1125393555, 1041352693, 1034962829, -1112580519) + + W(7, -1106409067, -1106729155, -1106975050, 1047380650) + + W(8, 1019467600, -1104433838, -1122570447, -1121851072) + + W(9, -1113430896, 1049773824, 1036295980, -1122235886) + W(10, 1025995330, 1008084051, 1039892459, 1025598855) + + W(11, 1045465420, -1110468674, 1020538422, -1113447212); + WS(1056997868, 1008833773); + sum1 = W(0, -1118496039, 1036420187, -1113594582, 1034344230) + + W(1, 1038393121, -1109888100, 1032434307, -1131937728) + + W(2, 1006806901, -1130107576, -1115019369, 1045363911) + + W(3, -1090717022, 1037930126, -1110427933, -1112104883) + + W(4, 1039826714, -1123687353, 1053157409, 1059756929) + W(5, 1058230383, 1045739824, 1042446790, 1052782212) + + W(6, -1123143270, -1104570591, -1093231896, -1095316624) + + W(7, -1095564524, -1098051412, -1101398261, -1104229594) + + W(8, 1006039247, 1032886196, -1105180689, -1102708199) + + W(9, 1035814292, -1129186168, 1025625861, -1112516090) + + W(10, 1017005492, -1125702370, 1028719950, 1009228202) + + W(11, -1119964195, 1033820442, 1028568173, -1136905401); + sum2 = + W(0, 1034320453, -1112051735, 1043966073, 1033608293) + W(1, -1092579307, 1051236380, -1102939053, 1027336318) + + W(2, -1118604595, -1109468006, -1106879339, -1088213587) + + W(3, 1064370547, -1095838484, 1043763107, 1016664388) + W(4, 1025235375, -1107232926, 1049289065, 1067079268) + + W(5, -1088486363, 1044420987, -1107783352, -1116658227) + + W(6, 1028595208, 1046387359, 1044354198, -1087106166) + W(7, -1108961766, -1104528882, 1035658672, 1023955051) + + W(8, 1002638229, -1105038970, 1045499601, 1035867996) + + W(9, -1096457250, 1028839698, -1122532695, -1126291119) + + W(10, -1116609216, 1005965131, -1111658272, -1132447732) + + W(11, 1042442572, -1134350406, -1131375963, -1135660575); + WS(1064849644, -1092967401); + sum1 = + W(0, 1041078624, -1103081652, 1045204674, -1126422222) + W(1, -1107755791, 1049230600, -1101861336, 1041473799) + + W(2, 1033930742, 1034184171, -1098756264, 1038407355) + + W(3, -1103118957, -1103625882, 1035198826, -1102710964) + W(4, 1053270962, 1058076017, 1059044453, 1061713418) + + W(5, 1064682577, 1047988615, 1055582710, 1058355518) + + W(6, -1091277968, -1091900963, -1097334921, -1078564449) + + W(7, -1099822850, -1091654943, -1096942230, -1098047297) + + W(8, -1115974645, -1101902009, 1038364767, 1031099397) + W(9, 1025967234, 1010917525, 1021900159, -1106099948) + + W(10, -1124489961, 1046377811, -1096742869, 1034335299) + + W(11, 1015202107, -1098744549, 1047801952, -1101582418); + sum2 = + W(0, 1034970587, -1105865147, 1024111791, 1018431373) + W(1, -1107243650, 1038385161, -1114574492, -1129893537) + + W(2, 1039267337, 1026053125, 1040193826, 1037956157) + W(3, -1106797747, 1031902437, -1114137980, -1128932411) + + W(4, -1109640195, 1027743203, 1042644379, -1088412806) + + W(5, -1104275404, 1031274183, -1115379021, 1040558376) + + W(6, -1085205368, -1076908464, -1072741915, -1079896887) + + W(7, 1057561219, 1052902509, -1103620900, 1054347378) + W(8, 1065312771, 1068235631, 1074530244, 1069229068) + + W(9, -1110308581, 1045193510, -1167188013, -1099424233) + + W(10, -1111782462, 1029167111, 1035172075, 1048206286) + + W(11, -1092973955, -1105822229, -1102796809, -1106619819); + WS(-1085552428, 1044647857); + sum1 = W(0, -1130568573, -1111247047, 1001536536, -1104704953) + + W(1, 1029690778, -1104108900, 1012447989, -1107166286) + + W(2, -1120131932, 1046596325, 1026932885, 1049575094) + + W(3, 1042872064, -1127841478, -1114289465, -1110288351) + + W(4, -1097293585, -1095715202, -1090361246, -1085535724) + + W(5, -1105723034, -1113844876, -1127895515, 1028149942) + + W(6, 1052681344, 1050957098, 1059711152, 1060788917) + W(7, 1048249845, -1106926654, 1034645781, 1043016176) + + W(8, -1115182247, -1125685565, -1097304503, -1122898434) + + W(9, -1115360055, 1050188162, -1110595670, 1041406167) + + W(10, 1016796643, -1123522377, 1028362085, -1112833505) + + W(11, 1008149481, -1111804152, 1032897312, -1119471349); + sum2 = W(0, 1049638564, 1012096208, 1055008508, 1058022553) + W(1, 1059601221, 1065544846, 1035938200, 1043980555) + + W(2, -1122573304, -1169058693, -1092917400, -1080721407) + + W(3, -1081684685, -1080284825, -1096574070, -1098850345) + + W(4, -1097365817, -1118822370, -1124083512, 1056959548) + + W(5, 1056508637, 1051359763, 1044088441, 1031681402) + W(6, -1120650912, 1032693677, -1127709292, 1044260765) + + W(7, -1099806388, 1042746095, 1033735876, -1121369890) + + W(8, 1039129637, 1036528332, -1113518885, -1122124802) + + W(9, 1019567420, -1101554026, 1041548497, -1108142320) + + W(10, -1114566411, -1119534724, 1015256750, 1042163670) + + W(11, -1110975736, 1032382474, -1110809702, 1022534793); + WS(1049904727, -1085461498); + sum1 = W(0, -1114441151, 1032673159, 1027012354, 1041600521) + W(1, 1035856141, 1033107055, 1031163069, -1139856090) + + W(2, 1047758995, -1122472531, 1048177385, -1116753522) + + W(3, -1105032264, -1109908945, -1104848436, -1111353615) + + W(4, -1092338425, -1101190388, -1096531431, 1059669870) + + W(5, 1063099682, 1058897901, 1049115478, 1055941567) + W(6, 1054893264, 1042608805, 1050188989, -1088410395) + + W(7, -1084283112, -1088007491, -1096369309, -1096366496) + + W(8, -1095327506, -1108884127, -1104987150, -1123782065) + + W(9, 1042263123, 1042892731, 1029697804, 1047202227) + + W(10, 1042581180, -1119554504, 1041262473, -1120686401) + + W(11, -1117364890, -1107384925, -1138499405, -1100444021); + sum2 = + W(0, -1101934313, -1127864237, -1112463889, 1047304216) + W(1, -1097499627, 1041678537, -1128505773, 1039199998) + + W(2, 1068410613, 1066297935, 1052441146, -1077079694) + + W(3, -1082290320, -1095668928, -1097722710, -1096167330) + + W(4, 1053392914, 1047339448, -1111508378, 1044634164) + W(5, -1086326188, 1053023843, 1050833435, -1112581958) + + W(6, 1045964051, -1107136628, 1035674775, 1046050296) + W(7, -1113442140, -1135364198, 1028981469, 1028034745) + + W(8, -1109754454, 1023924243, -1121555471, -1111827987) + + W(9, 1006763480, 1034831467, -1128234727, 1032814306) + + W(10, -1118726053, 1029832629, -1111696115, 1036477638) + + W(11, -1119582249, 1024856185, -1112331027, 1017921306); + WS(-1087326956, 1033622098); + sum1 = + W(0, -1113899675, 1035193378, 1030557818, -1119324954) + W(1, 1042470612, -1117399605, 1021883592, 1035419591) + + W(2, 1045896801, -1161971013, 1045303135, -1095480777) + + W(3, -1110770003, -1109576477, -1100233714, -1122013269) + + W(4, 1041409216, -1108871427, -1104804773, 1041468886) + W(5, -1098059200, 1052050234, 1041862393, 1017834329) + + W(6, -1108955706, 1048600721, -1094955846, 1051943793) + + W(7, 1025944442, -1097508910, 1030612641, -1106071902) + + W(8, -1104543128, -1114333163, 1045266364, -1096626659) + + W(9, 1048775867, 1043056710, -1122051827, 1042839849) + + W(10, 1041055226, -1106692192, 1039063182, -1112593372) + + W(11, -1107180444, 1041486173, -1114362202, -1121041260); + sum2 = + W(0, 1008283263, -1114415273, -1114655921, 1036167174) + W(1, -1111159985, 1033204129, -1122340118, -1127458940) + + W(2, -1111634453, 1032259795, -1123951654, -1116061542) + + W(3, 1039368345, -1102899145, 1024476440, -1118949310) + W(4, -1112224655, 1046234660, 1039099048, 1049934438) + + W(5, 1003410015, -1113540553, -1117681522, -1122317014) + + W(6, -1121896666, 1020399084, -1139717487, 1050902690) + W(7, 1039007446, 1039510238, 1032062517, 1033150391) + + W(8, 1031996897, -1105304684, 1017260660, -1102142181) + W(9, 1003551903, -1114854307, 992233789, -1113963477) + + W(10, -1106176579, 1027107696, -1106922258, 1022104088) + + W(11, -1122048418, -1135188919, -1128403972, -1125862176); + WS(1068173014, -1111006676); + sum1 = + W(0, 1031049568, -1115395480, 1019703498, -1113567107) + W(1, 1015808760, -1109811912, 1027899359, -1103105894) + + W(2, -1103201566, -1112513259, -1112438249, 1042819653) + W(3, 1048780036, 1039150546, 1034094633, 1040353932) + + W(4, 1048004603, -1117860850, -1104427535, -1083606526) + + W(5, -1083053360, -1090564527, -1092330682, -1095220995) + + W(6, -1100468360, -1173290307, 1042677725, 1064316383) + W(7, 1063571221, 1060600586, 1050639762, 1054475004) + + W(8, 1046039773, -1123264224, 1030284012, -1122621123) + + W(9, -1094902185, -1103353426, -1106486660, 1034806311) + + W(10, 1012485196, -1127850837, 1029876039, 1036820754) + + W(11, 1035184606, 1024997805, -1149057999, -1123061719); + sum2 = + W(0, 1014853092, -1120466254, 1027909890, -1117512854) + W(1, 1026011110, -1110873488, 1036041188, -1115497806) + + W(2, -1122580470, -1114547761, 1030745116, -1119656010) + + W(3, 1035574738, -1120221385, 1020780404, -1122615179) + W(4, -1120994367, 1038991680, 1034511499, 1041066172) + + W(5, -1109149586, 1024235728, -1112980367, 1020521688) + + W(6, 1047459793, 1047680543, -1098401240, -1121088174) + W(7, 1045982327, 1024310650, -1122000913, 1042402937) + + W(8, 1064875701, 1054395623, 1045153300, -1085223893) + + W(9, -1084582209, -1099510820, 1029465320, -1105747852) + + W(10, -1132455031, 1011283964, -1108441710, 1018552602) + + W(11, -1106692167, 1026658994, 1023023932, 1004613671); + WS(-1095915991, 979918868); + sum1 = + W(0, -1115463727, -1137886897, 1011021190, 1011860489) + W(1, -1129491953, 1007409140, -1124741815, -1122228982) + + W(2, 1032823819, 1037523161, 1029800132, 1043477300) + W(3, -1105888052, -1114264129, 1021218335, 1030459197) + + W(4, 1026707497, -1131549790, -1108429845, -1100874368) + W(5, 1047336484, 1049886943, 1042964547, 1038061098) + + W(6, -1119898332, 1035412665, -1136100063, -1112838682) + + W(7, -1098267600, -1097768883, -1106237874, -1108915952) + + W(8, 1026568544, -1108324895, -1138504034, 1047390560) + W(9, 1036689246, 1037432366, 996441389, -1121991146) + + W(10, -1121143952, -1129452238, -1120493188, -1147670274) + + W(11, 1028096953, -1134502280, -1157333673, -1177453050); + sum2 = W(0, -1121902268, -1128394666, 1039974898, -1114856664) + + W(1, 1032116923, -1115163450, -1112859101, -1128925353) + + W(2, 1020062213, -1143339877, -1102502778, 1034423857) + W(3, 1030950002, 1038088024, 1036537682, 1019282123) + + W(4, 1034601005, -1111450467, 1034366199, -1093114037) + + W(5, 1027196884, 1036157347, -1130356560, 1032717961) + + W(6, -1096503308, 1052764693, -1095299550, 1060522961) + + W(7, 1057088106, -1104371255, 1031719642, -1110891365) + + W(8, 1050858947, -1099720262, 1055940597, -1087523263) + + W(9, -1085850393, -1107617878, -1119144319, 1017512699) + + W(10, -1108630019, -1130142915, -1105816149, 1037139152) + + W(11, 1054601799, 1043807995, -1122652165, 1025497552); + WS(1066530838, 1025703389); + sum1 = + W(0, -1120765920, 1009297086, -1122351141, 1015487755) + W(1, 1015844344, -1127613121, -1133586812, -1124468594) + + W(2, -1132051981, -1139418709, -1109474502, -1098776251) + + W(3, -1109199593, -1123764613, -1121408403, -1145136640) + + W(4, -1129191897, 1032587897, 1057153612, 1049522345) + W(5, 1041525985, -1109817213, 1010121533, -1107128582) + + W(6, 1042371592, -1118415861, -1120712699, -1099055070) + W(7, 1049700744, 1050593755, 1036084166, 1038803505) + + W(8, 1007708012, -1122811552, -1105657359, -1099127683) + + W(9, -1111888320, -1113004743, 984437698, 1036904923) + + W(10, -1116932051, -1134432681, -1127344014, 1026629028) + + W(11, -1119906716, -1136900213, -1118848634, -1123829719); + sum2 = W(0, -1117430114, 1039120940, -1106133189, 1026507473) + + W(1, 1031550625, -1142215401, 1026312489, -1139250704) + + W(2, 1041261247, -1107749198, -1099018179, -1131506304) + + W(3, -1123617030, -1123190888, -1112954810, -1137414048) + + W(4, 1033806419, -1097831330, 1064523288, 1045711847) + + W(5, -1088977338, 1052831473, -1133526188, -1122137188) + + W(6, -1126022952, 1041137317, 1057416214, -1092069746) + + W(7, 1059767697, -1088761837, 1033531722, -1110686692) + + W(8, -1155381873, -1127685702, -1099691831, 1026250214) + + W(9, -1098651966, -1118397760, -1130823120, 1029280576) + + W(10, -1125967712, -1116954634, 1032736142, -1120045450) + + W(11, 1021746164, 1028302350, 990513889, -1115066264); + WS(1066680246, 1040921440); + sum1 = + W(0, -1119730500, 1034101258, -1132236433, -1104735450) + W(1, 1035235184, -1111591250, 1024518355, -1121802685) + + W(2, -1110280861, 1011417444, 1027753539, 1038200908) + W(3, 1035268043, -1121734057, 1032771355, -1126371080) + + W(4, -1092083138, 1031601302, -1090607216, -1078665075) + + W(5, -1096046953, -1093429316, -1094352558, -1102572591) + + W(6, 1053565584, 1054599907, 1058261942, 1061430978) + W(7, 1061610294, 1056142824, 1053165350, 1055282545) + + W(8, -1110918026, -1143895330, -1111808624, -1099071584) + + W(9, -1106320677, -1105323662, -1138833863, 1035738553) + + W(10, 1029997623, -1113777127, 1039976245, -1112447940) + + W(11, 1023744576, -1145165385, -1113453535, 1031263347); + sum2 = + W(0, 1033717276, -1129082210, -1111396517, 1053521042) + W(1, -1096499765, 1041638909, -1115028859, -1143837439) + + W(2, 1028210443, -1130739278, -1117565460, -1113906791) + + W(3, -1106119849, 1044690885, 1000486481, 1015998178) + W(4, 1046091050, -1085374293, 1033058760, 1062822204) + + W(5, -1089123399, 1003636411, 1048601259, -1103883056) + + W(6, -1119519632, 1026401017, -1106338078, 1066908993) + W(7, 1034907371, 1054286387, -1096894542, 1041460877) + + W(8, 1050909876, -1110182528, -1149282827, -1081393742) + + W(9, -1094008056, 1035142168, 1020535023, 1028271260) + + W(10, -1117678961, 1042909501, -1099518378, 1044082334) + + W(11, -1097984366, 1040282994, -1109925208, 1021313035); + WS(-1111691100, -1070453585); + sum1 = W(0, 1040350706, -1107702492, 1045915011, -1103582444) + + W(1, 1036820082, 1030081641, -1112926972, 1034445457) + + W(2, 1032181048, 1006352113, -1101176752, -1098104500) + + W(3, -1111995209, -1105752467, -1111976126, 1025879721) + + W(4, 1054597571, 1056547603, 1059824936, 1062411659) + W(5, 1062112057, 1042786985, 1050598527, 1058122296) + + W(6, -1090063653, -1111618639, -1087007388, -1079908657) + + W(7, -1098160558, -1094958736, -1104521261, -1107753094) + + W(8, -1105977411, 1011306277, 1037450066, -1113612402) + + W(9, 1043358554, -1108376090, 1024420791, -1098378691) + + W(10, -1114266718, 1034245578, -1130610528, -1111998342) + + W(11, 1021846218, 1026051990, 1032139296, 1018599200); + sum2 = + W(0, -1102183736, 1045283790, -1099060082, 1029589699) + + W(1, -1103326288, -1146696619, -1123727405, -1106785078) + + W(2, 1044690091, -1117083441, -1109435037, 1005932683) + + W(3, -1109954065, -1130369043, -1115538567, 1039027144) + + W(4, 1025447267, 1036724432, -1100626978, 1066859805) + W(5, -1114206859, 1024539831, -1123732285, 1040901138) + + W(6, -1103679886, -1096666126, 935783211, 1061824774) + + W(7, -1098480165, -1113161303, -1109149493, -1129688427) + + W(8, 1034791609, 1043810354, -1097803022, 1034749891) + W(9, -1094988176, 1050252440, 1029732207, -1105144948) + + W(10, -1120655593, -1109094473, 1029730663, 1023292171) + + W(11, -1109570093, 1012313206, -1106431920, 1015686163); + WS(-1105638574, -1077878805); + sum1 = W(0, -1112101313, 1025565072, -1118212870, 1034991091) + + W(1, 1037618766, 1033034081, 1023523536, -1143864013) + W(2, 1043313362, 1041884710, 1045058176, -1099609869) + + W(3, -1100255017, -1097107341, -1104696333, -1128917761) + + W(4, -1104398291, -1093904634, -1086702118, 1055637093) + + W(5, 1061981800, 1053455331, 1048950646, 1037339676) + W(6, 1034529536, 1043314021, 1058479092, 1049084915) + + W(7, -1082434966, -1087005060, -1098585534, -1105402705) + + W(8, -1120033234, -1106696920, -1128822905, -1126325576) + + W(9, 1050170340, 1046293466, 1042248226, 1032623999) + W(10, 1017824488, 1028280843, 1038132788, -1136921361) + + W(11, -1125947450, -1124008918, -1126565485, -1114937252); + sum2 = + W(0, 1040832216, 1042032808, 1039550307, 1029252447) + W(1, 1040592387, -1111709033, 1050208698, -1091595987) + + W(2, -1125419559, -1103788280, 1043848052, -1099123277) + + W(3, 1047689572, -1102650535, -1105352144, 1033340873) + W(4, -1166596642, 1045241846, 1051795538, 1068949095) + + W(5, -1094163017, -1111700665, -1117027793, 1027304955) + + W(6, -1076839498, -1091833274, 1035730071, 1070654393) + W(7, -1096947569, 1050259859, 1051072204, 1048679723) + + W(8, -1085872442, -1105795498, -1094587067, 1007431650) + + W(9, -1089767553, 1041510738, -1100476212, 1057584085) + + W(10, 1038631745, 1037000995, -1115278352, -1103746979) + + W(11, 1041143367, 1025070825, -1116523862, -1093749689); + WS(-1095944791, 1081861902); + sum1 = W(0, -1129669743, -1116664839, -1148272609, -1121494877) + + W(1, -1115602986, 1028329496, -1123378150, 1027525168) + + W(2, 1034635115, -1122706061, 1033259121, -1124251544) + + W(3, -1126517523, -1128061368, -1130313237, -1119991479) + + W(4, 1057433596, 1048908041, 1058166921, 1058746094) + W(5, 1055593874, 1057488289, 1049860117, 1061071735) + + W(6, -1090131839, -1093164666, -1087001587, -1083481847) + + W(7, -1085306808, -1091485686, -1102755929, -1105495231) + + W(8, -1120322428, 1030509816, 1035303897, 1044900399) + W(9, 1041073282, 1023593721, 1034451003, -1102595129) + + W(10, 1008560041, 1023557506, 1013701241, -1117995899) + + W(11, -1116643091, 1023656437, -1164007844, 1024551167); + sum2 = + W(0, 1029670841, -1115575058, 1042858197, -1101600217) + W(1, -1117748556, 1035438169, -1114535210, 1032334939) + + W(2, 999172620, -1099435099, -1139359478, 1027943101) + W(3, 1048715384, -1098854617, 1036336651, 1041563473) + + W(4, 1042251423, 1041110640, 1058527193, 1075961547) + W(5, 1073424302, -1086643829, -1080060710, -1067239867) + + W(6, -1105031588, -1120449287, 1010020318, -1109667225) + + W(7, 1057314889, 1037113027, -1123725073, -1100548924) + W(8, 1027957701, 1035596553, 1019832195, 1011086126) + + W(9, 1033526397, 975610078, 1012557694, -1095465488) + W(10, 1026093121, -1156632055, -1126960339, 1010028142) + + W(11, 1037810339, -1115077778, -1133987070, 1036754195); + WS(-1085172076, 1069147958); + sum1 = W(0, 1025703205, -1141293989, 1041286826, -1119005195) + + W(1, 1026450561, -1117134469, -1120895076, 1025414366) + + W(2, 1027233863, -1106615031, -1115436126, -1112858253) + + W(3, 1042609938, -1122683754, 1044686038, -1117169125) + W(4, 1026640114, 1042354526, 1053740861, 1054852793) + + W(5, -1119314307, 1026745790, -1108709290, 1023400633) + + W(6, 1029190628, -1106775167, -1106293072, -1089581852) + + W(7, -1098775605, -1108063055, 1015550627, -1125318352) + + W(8, -1111659152, -1115892869, 1035182992, 1027435128) + + W(9, -1137028117, 1038436612, -1112739478, 1031194923) + + W(10, -1139375968, 1028377583, -1156734143, -1121669673) + + W(11, 1034668065, -1119291218, 1027259179, -1118201212); + sum2 = + W(0, -1154464437, 1004660066, 1011666487, 1016373458) + W(1, -1117096132, 1027534293, -1118720211, 1024315262) + + W(2, 1027039233, -1115032564, -1147080182, 975139636) + W(3, 1025013868, -1117310194, 1025607183, -1145025250) + + W(4, 1008089899, 1041471507, 1053281755, -1081313755) + W(5, -1118862913, 1040077608, -1134922624, 993302909) + + W(6, -1117609897, 1033488023, 1082321679, -1065846603) + + W(7, -1122974098, -1130639872, -1143672974, -1114202318) + + W(8, -1118717800, 1033628240, 1046901100, 1050032460) + + W(9, -1106934499, -1137731251, -1177023016, 1017914834) + + W(10, 1027401421, -1112728150, 1012824907, -1108305112) + + W(11, 1038621352, -1137500513, -1129758225, 1025570630); + WS(1065820150, -1112682631); + sum1 = + W(0, -1105640672, -1136938293, 1007936778, -1099447769) + W(1, 1033964109, -1110772895, -1107676126, 948858222) + + W(2, -1140735040, 1045425461, 1012508339, 1052020966) + W(3, -1110876672, 1038536521, 1034428376, 1041035251) + + W(4, -1096706537, -1095619178, -1101065539, -1083835034) + + W(5, -1127256032, -1101414038, -1102412811, -1104437005) + + W(6, 1051064512, 1046810501, 1057299292, 1054321083) + W(7, 1044289505, 1042038046, 1050440565, -1126355329) + + W(8, -1114855500, 1041816072, 1015987827, 1049485320) + W(9, -1106506396, 1031650401, -1098866309, 1044647659) + + W(10, -1128129890, -1108472292, 1036938130, -1101761488) + + W(11, 1035708141, -1113134674, 1031811610, -1119496853); + sum2 = + W(0, -1116689599, 1033662400, -1110264521, -1113832562) + W(1, 1034493072, 1024644211, -1105470304, 1030330543) + + W(2, -1104060794, 1047714485, -1106889746, 1055059659) + W(3, -1089308922, 1027816870, 1032487381, 1043118546) + + W(4, 1023887631, -1101000300, -1123902349, 1047334183) + + W(5, 1060541265, -1145174230, -1099777158, -1100795602) + + W(6, 1047625619, -1113896185, 1058674488, -1105663288) + + W(7, -1079751447, 1040502911, 1059286709, -1110882655) + + W(8, -1104647113, 1049921725, -1100964722, 1060005487) + + W(9, -1088726381, 1026206299, -1093081927, 1033716355) + + W(10, 1032108768, -1114038586, -1134500667, -1105023935) + + W(11, 1046968990, -1116742649, 1042150309, -1127164330); + WS(1051219671, 1032040132); + sum1 = W(0, 1024246061, 1029213629, -1142580623, 1007533386) + W(1, -1132388626, 1016678619, 998523489, 1023830470) + + W(2, -1110293770, -1111062337, 1026027124, 1035026513) + + W(3, 1036422600, 1011246152, -1173422083, -1130424090) + + W(4, -1098745559, -1104763504, -1097349705, -1082429923) + + W(5, -1083869189, -1095610025, -1095350964, -1094623957) + + W(6, 1035978230, 1041701281, 1051781481, 1065410260) + W(7, 1060856625, 1057796630, 1045145830, 1055180598) + + W(8, 1050231066, -1106571859, 1040668770, -1111350032) + + W(9, -1113287631, -1122887388, -1109374678, 1007169387) + + W(10, -1111943280, 1016965359, 1026054164, 1035026168) + + W(11, 987392292, 1019565189, -1110518465, 1026555204); + sum2 = + W(0, -1123481859, 1024186334, -1181265672, -1136581772) + W(1, 1007273042, -1130526339, -1134191606, 1035801320) + + W(2, -1131096266, -1121050215, 1023491721, -1115413373) + + W(3, 1036272276, -1110187157, -1123342647, -1125507188) + + W(4, -1107023625, 1040345453, -1110082893, -1116173932) + + W(5, 1041634800, 1028205855, 1036476425, -1124495703) + W(6, 1040776673, -1129271163, 1044851152, -1097567007) + + W(7, 1051516690, -1106625929, 1035231370, 1007786534) + + W(8, -1073406322, -1080765061, -1077100552, -1085034176) + + W(9, -1104398720, 1019889879, -1115096299, -1105128420) + + W(10, 1074128739, 1066657253, 1068015119, 1066670217) + W(11, -1111857710, 1026394385, 990445793, 1030299973); + WS(-1124324720, 1038677119); + sum1 = W(0, 1008341069, 1031967089, 1023486232, 1037528348) + W(1, -1122921519, 1029849459, -1114006089, 1035528454) + + W(2, 1038389179, 993181316, -1145165264, 991007864) + W(3, 1024190949, 1044031254, -1123515526, 1052524910) + + W(4, 1056361124, 1050863173, 1054230472, 1063082151) + W(5, 1050043692, 1049660054, 1043779649, -1097977265) + + W(6, -1081451931, -1096246661, -1086637900, -1088130503) + + W(7, -1092787999, -1099337955, -1095996120, -1102904005) + + W(8, 1047683325, 1035916906, 1027831167, 1026128198) + W(9, 1020947750, 1025351359, 1037985981, -1114005272) + + W(10, -1130646284, -1134539381, 986748732, 1003368116) + + W(11, -1115252008, 1036720064, -1116396881, 1029521402); + sum2 = + W(0, 1042203848, -1133163156, -1108707706, 1027923803) + W(1, -1098359820, 1043843260, 1021888231, 1023965418) + + W(2, -1089098687, -1098025067, 1036383110, 1058285406) + W(3, 1062333205, 1048839159, 1058743586, -1078465930) + + W(4, 1035967917, 1052777785, 1043356562, 1046924897) + W(5, -1080913711, -1088408400, -1097507024, 1067915456) + + W(6, 1068788369, -1096461215, -1090569807, -1088764217) + + W(7, -1095261121, -1098396864, -1093746709, 1064748817) + + W(8, -1095505730, -1097973178, -1127034898, 1056967609) + + W(9, 1040836988, 1046078649, -1111389506, -1099047850) + + W(10, 1025788011, -1111191248, 1023876171, 1046203397) + + W(11, -1107189985, -1113219498, -1110869404, 1040158004); + WS(-1080347734, -1094728165); + sum1 = W(0, -1115633753, -1128052610, 996364134, 1027254679) + + W(1, 1027401421, -1125749028, -1143663749, 1017291157) + + W(2, 1036436624, 1031912915, 1038598142, -1114408211) + + W(3, -1097652239, -1109729611, -1111208674, -1118442418) + + W(4, -1113632799, -1104119121, -1095896866, 1064358521) + + W(5, 1061684826, 1051089073, 1044518984, 1048577269) + W(6, 1018734050, -1104173746, 1042956529, 1051156401) + + W(7, -1087880029, -1093838037, -1100689848, -1104163629) + + W(8, -1104273977, -1115038313, -1098511458, 1006576423) + + W(9, 1037804713, 1039011098, -1157246715, -1145301883) + + W(10, 1022550473, -1128796792, 1027191321, -1123707765) + + W(11, -1125317723, -1116369959, -1130384675, -1113863292); + sum2 = W(0, -1137561210, 1008161562, -1138249722, 1032804591) + + W(1, -1118405314, -1149548391, -1140666666, -1121723310) + + W(2, 1000810292, 1017087401, -1104015266, 1045940061) + + W(3, -1112933693, 1040845572, -1131915309, -1125770861) + + W(4, -1109671087, -1143663956, -1100429296, 1064636422) + + W(5, 1041071232, -1095622064, -1113635957, -1156294183) + + W(6, -1098257975, -1088410023, -1110483119, 1065774501) + + W(7, -1111449957, -1114533177, -1105461804, -1154727079) + + W(8, -1113334223, 1033424863, -1113376685, 1041401738) + + W(9, -1107119710, 1041680128, -1114424843, -1143304660) + + W(10, 1010643898, -1122425914, -1120544050, 1015219657) + + W(11, 1028103016, -1125778629, -1127495397, -1128411685); + WS(1062796012, -1081433938); + sum1 = W(0, -1140258862, 1019305782, 1017521696, -1128637410) + + W(1, 1030042599, -1140240046, 1023786378, 1027083972) + + W(2, 1047697940, -1137205890, 1040574503, -1131514496) + + W(3, -1115750349, -1114177400, -1119424318, 1025144619) + + W(4, 1041493798, 1051824452, 1048941321, 1061551411) + W(5, 1058760695, 1056942277, 1045023263, 1056967544) + + W(6, -1090901033, -1095124244, -1090555023, -1084025976) + + W(7, -1089279069, -1089170928, -1099538126, -1087239225) + + W(8, 1032453887, 1034552166, 1032035723, 1036871279) + W(9, 1041251643, 1032547975, -1117686145, 1027338481) + + W(10, 1008978681, 1031420374, -1124609380, 1025475023) + + W(11, 1016399695, 1024887097, 1002611913, 1012109898); + sum2 = + W(0, 1004838046, 1009005547, 1023830295, -1121204348) + W(1, 1026709605, -1145957446, 1024434367, -1122812157) + + W(2, 1036867068, -1112484311, 1034701810, -1131856601) + + W(3, 1008859907, -1129570235, -1120209470, 1023686531) + + W(4, 1023348885, -1113333873, -1115269491, -1084820204) + + W(5, -1095888786, 1052224270, 1043852487, 1047863595) + + W(6, -1120560082, 1019969949, -1089366478, -1067115412) + + W(7, -1071017431, 1066362408, 1084005284, 1063525936) + W(8, 1032448279, -1115333043, 1028612787, -1132671203) + + W(9, -1112864526, 1045423025, 1025066805, 1043035962) + W(10, 1025411331, -1123665230, 1024216939, 1034346460) + + W(11, -1116027280, -1133553563, 1032270816, -1105823812); + WS(-1103681710, -1114530079); + sum1 = + W(0, 1025861737, 1029506744, 1032236919, 1035998951) + W(1, -1111043394, 1007410083, -1126534927, -1123346113) + + W(2, -1130912909, -1104283663, -1132593150, -1102933559) + + W(3, 1040041573, 992747222, 1034020670, -1119406504) + W(4, -1110124091, -1117715067, 1039686571, -1097542514) + + W(5, -1088677703, -1112367617, -1105036246, 1035634224) + + W(6, 1040844938, 1046218351, -1098764800, 1059317136) + W(7, 1057765873, 1054923388, 1029185429, 1040434219) + + W(8, -1105267215, -1121870249, 1038793978, -1096018272) + + W(9, -1105386706, -1101946581, -1122605111, -1143951794) + + W(10, 1021540535, 1032871129, -1120294159, 1036773558) + + W(11, -1111111618, 1031187710, -1138057273, 1023823648); + sum2 = W(0, -1133865733, -1114357788, -1113774565, -1094909862) + + W(1, 1041934157, 1048238594, 1041091101, 1042608690) + W(2, 1041378526, 1025611699, 1057780422, -1134317911) + + W(3, -1084534604, -1105040821, -1096429323, -1128928913) + + W(4, -1102114760, 1041852494, -1090351620, 1058263821) + + W(5, 1059786982, -1114163954, 1034035361, -1102786592) + + W(6, 1040042063, -1102827642, 1041105879, -1094128428) + + W(7, -1121888905, 1038597269, -1139774900, 1035391793) + + W(8, 1036263014, 1023561916, -1107817782, 1049906607) + W(9, -1106476521, 1034361268, 1016844568, 1014428216) + + W(10, -1111718847, -1109687464, 1041802453, -1106696300) + + W(11, 1023851848, 1032032810, -1111010993, 1031652841); + WS(1059372396, -1131086843); + sum1 = + W(0, -1116770958, -1133338231, -1109423369, -1142676381) + + W(1, -1119417706, 1016864285, -1134266292, 1026073927) + W(2, 1038224730, -1110641250, 1048611339, 1026922691) + + W(3, 1012241985, 1033593306, -1108924145, 1029871326) + W(4, -1145642197, 1044958408, 1054215533, 1059821143) + + W(5, 1063416476, -1109435540, 1051611888, 1046037553) + W(6, 1033085351, -1100693257, 1024247225, -1085100606) + + W(7, -1082741615, -1105005730, -1096553463, -1091104802) + + W(8, -1114157744, 1030339791, -1106016522, 1039898225) + W(9, 1046979692, -1157274234, 1033932042, 1022970360) + + W(10, -1129272066, 1026076824, -1113286331, -1126911267) + + W(11, -1119319754, -1131274519, 1018258200, -1118999381); + sum2 = W(0, -1117639690, -1126294727, -1135696947, 996342293) + + W(1, 1025603183, -1120264553, 1034416033, 1029227505) + + W(2, 1032542252, -1105983103, -1105990346, -1101410582) + + W(3, 1031145839, 1029472991, -1125334020, 1016716795) + W(4, 1049036492, 1053585046, 1063294224, -1084067012) + + W(5, 1030876063, -1090564487, 1047874805, -1103677712) + + W(6, -1106428174, -1090074420, 1074893281, -1121077701) + + W(7, -1079475296, 1059480413, -1099339963, -1097572114) + + W(8, -1100598545, -1101677356, -1088449090, 1052666358) + + W(9, 1049420880, -1099961793, 1047783255, -1106211131) + + W(10, -1124515281, 1042350020, -1128187082, -1118105649) + + W(11, 1033189178, 1012743453, 1016211057, 1026065323); + WS(998487488, 1024735998); + sum1 = W(0, 1034787125, 1033987197, -1118966639, -1138173503) + + W(1, 1022076300, -1112157509, 1034213589, -1125240838) + + W(2, -1096711418, 1049342972, -1099717199, 1051641300) + W(3, 1049184050, 1032130968, 1048086279, 1039987293) + + W(4, -1093645783, -1089249828, -1085775960, -1083720757) + + W(5, -1086133195, -1087092580, -1098046771, -1087792286) + + W(6, 1058035879, 1055719968, 1060189357, 1061848612) + W(7, 1062875388, 1007190411, 1051240059, 1039984791) + + W(8, -1113381667, 1033680357, -1108537011, -1102783527) + + W(9, -1129404117, -1110331987, 1030325168, 1051616838) + + W(10, 1020339515, 1037116250, -1109989161, 1046511483) + + W(11, 1032957443, 1011591327, 1039628088, -1135236550); + sum2 = W(0, 1001577162, 1041063142, -1112011211, 1039747614) + + W(1, -1137882877, -1105672858, 1031849538, -1108158369) + + W(2, 1048277958, 1048804575, 1037383696, 1042988518) + + W(3, -1097597287, -1099442373, -1108394523, -1155656404) + + W(4, -1094388043, 1061039504, 1055288082, -1084834029) + + W(5, -1087507005, 1044806256, -1093629181, 1059993792) + + W(6, -1079852425, -1095199392, -1077655189, -1103423898) + + W(7, 1064308251, 1065240337, 1071708239, 1061107030) + W(8, 1055450674, 1054780006, 1058201467, 1071463850) + + W(9, 1058560116, -1080251704, -1090257183, -1073879137) + + W(10, 1040994203, 1035580564, -1111912036, -1100510960) + + W(11, -1097442490, -1090250140, 1028605291, -1106111844); + WS(-1077135094, 1050907162); + sum1 = W(0, -1132921461, -1127648729, -1124984131, -1118007170) + + W(1, -1117294457, -1126452986, -1124706520, -1119126518) + + W(2, -1117094275, 1033699545, 1030394994, 1045491604) + W(3, 1039295895, 1040542752, 1018438944, 1035722382) + + W(4, -1098008869, -1094355462, -1089194394, -1081313730) + + W(5, -1088142214, -1094044967, -1098093746, -1094059515) + + W(6, 1043699095, 1055483630, 1053279460, 1065160347) + W(7, 1056280352, 1058021117, 1041063570, 1050783700) + + W(8, -1112620946, 1042990903, 1047326318, -1121246728) + + W(9, -1110840279, -1106681402, -1128181366, 1018820533) + + W(10, -1122422352, 1034045812, 1011377167, 1042128328) + + W(11, -1103632934, 1039870541, -1109127577, 1037083890); + sum2 = W(0, 1003021837, 1029225785, -1123823650, 1023947681) + + W(1, -1119691195, 1024608678, -1125446899, 1034042378) + + W(2, -1110495495, 1032075003, -1122423961, 1043723635) + + W(3, -1138212418, 1017994477, -1115371525, 1002413729) + W(4, 1016902499, 993748010, 1028513681, -1096435650) + + W(5, -1120166015, -1119557273, 1032720188, -1115060009) + + W(6, -1116948709, 1041772340, 1083263801, 1074788880) + W(7, 1038793960, 1017310535, 1032034162, -1118011761) + + W(8, 1029108513, -1097222635, -1063916786, -1074844879) + + W(9, -1115182489, 1021770517, 1014563122, 1027576529) + + W(10, -1113320609, 1042534143, 1039031010, -1104645285) + + W(11, -1124877446, -1131419477, -1119401015, 999290841); + WS(1048452526, 1015184705); + sum1 = W(0, -1138645804, 1032256237, -1114005032, 1026512560) + + W(1, -1140244890, -1131077462, 1024452554, -1122466948) + + W(2, -1110581160, 1049910352, -1103860667, -1098570768) + + W(3, -1119844941, 1040825100, 1017183931, -1119780203) + + W(4, -1106270342, -1113826958, -1097372109, -1087095027) + + W(5, -1095003728, -1095439444, -1128526324, -1106181921) + + W(6, 1048623137, 1036222192, 1057099626, 1061337654) + W(7, 1054485156, 1049129464, 1044366494, 1040163591) + + W(8, 1006760678, 1042522323, -1101854594, -1106501979) + + W(9, -1115808507, 1027903818, -1153767820, 1034550428) + + W(10, -1113210911, 1029873877, -1111141177, 1024448974) + + W(11, -1125584845, -1111469876, 1027351533, -1117633442); + sum2 = W(0, -1117856953, 1002437005, -1122373390, 1023823647) + + W(1, -1131092262, -1119824143, -1129026789, -1118651901) + + W(2, 1034706164, -1114697961, -1110151254, -1078943314) + + W(3, -1111296752, 1008147760, 1015792571, 1029244251) + W(4, 999139177, 1016679180, 1034965747, 1069645782) + + W(5, 1026082346, 1005900489, -1134452913, -1121319377) + + W(6, -1115089108, 1009230224, 1007700984, -1105149666) + + W(7, 1033262730, -1142960238, 1015281188, 1026230788) + W(8, 970725323, 1019562884, -1131774725, 1038179083) + + W(9, 1014219446, -1130072184, 1012295392, -1122304449) + + W(10, 1017499348, -1131336757, 1031482891, -1120925641) + + W(11, 1015408436, 1001625665, -1137039581, 1019807852); + WS(1061396972, -1097419696); + sum1 = W(0, 1040610541, -1116833412, 1015657075, -1126977972) + + W(1, 1033993320, -1125945514, 1028019732, -1120964750) + + W(2, -1110826049, 1042368898, -1113808270, 1036447253) + W(3, 1043284110, 1048078310, 1028739112, 1052576556) + + W(4, -1087862644, 1000165941, -1108964145, -1085843362) + + W(5, -1088558114, -1086794197, -1087573386, -1079810761) + + W(6, -1108888122, 1058527541, 1056284429, 1053927177) + W(7, 1062174380, 1055390198, 1042115560, 1051035743) + + W(8, 1051684947, -1102910221, 1041698836, -1107922836) + W(9, 991752527, -1112253878, 1039363202, 1047360574) + + W(10, -1117156776, -1135746300, 1026275932, 1041042462) + + W(11, -1137078595, 1035139905, -1118906322, 1038370422); + sum2 = + W(0, -1104315228, -1117475550, 1019617669, -1096755788) + W(1, 1043056626, -1097959989, 1038305467, -1121707278) + + W(2, -1103350409, 1024254038, -1116633366, 1042486592) + + W(3, 1038440719, -1097184876, -1107570984, -1092525975) + + W(4, -1094844926, -1091235931, -1089312569, -1104902685) + + W(5, 1045881224, -1101957876, 1058820589, 1058872796) + W(6, 1062539510, -1112967686, 1056743222, 1052266072) + + W(7, 1037341407, 1012499609, 1048266302, -1099676541) + W(8, -1096027281, 1042482062, -1129394013, 1051055108) + + W(9, 1044017054, -1101861122, -1105903455, 1041661910) + + W(10, 1014020953, -1119186298, 1028745390, -1093268483) + + W(11, 1054988226, -1102938164, 1026503526, -1135013257); + WS(-1084431788, -1073850193); + sum1 = W(0, -1121456839, 1023326765, -1111055334, -1131954777) + + W(1, -1127100870, -1115233108, 1032290155, -1113154576) + + W(2, -1130755883, -1119027356, 1007276024, 1032958718) + + W(3, 1051511197, 1038479638, 1033362061, -1118671783) + + W(4, -1131651239, -1113039213, 1041028837, -1121014635) + + W(5, -1080291009, -1101242168, -1095204295, -1109590335) + + W(6, 1035028387, 1052115068, -1087869490, 1031197854) + W(7, 1064028192, 1046762496, 1048797598, 1048241682) + + W(8, 1035837265, 1031790055, 1043959725, -1151139494) + W(9, -1097929447, 1016490449, 974160200, 1033898418) + + W(10, -1139076328, -1110649673, -1126605404, 1037205893) + + W(11, 1022476742, 1029406204, -1117985767, 1016018525); + sum2 = + W(0, 975878233, 1027199947, 1032202892, 1011577542) + W(1, 989447596, -1126785583, -1112180928, 1027448187) + + W(2, 1035912472, 1026239811, -1109490583, -1099491895) + W(3, 1027909011, -1142596491, 1032601180, 1030066347) + + W(4, 1033266404, -1101163205, -1092678923, 1052721602) + W(5, -1115079323, 1042994318, -1117880304, 999387083) + + W(6, -1098777501, -1120465270, 1026269275, 1058060839) + + W(7, 1022663675, -1109244590, -1139999774, -1113199846) + + W(8, -1115369638, 1047867282, 1048647954, 1026065727) + + W(9, -1107120056, -1139333782, -1113232790, -1128307819) + + W(10, 1024165887, -1114558090, -1127793863, -1110013405) + + W(11, -1115589183, 1028177183, -1119188249, 1010561254); + WS(1060891500, 1063998119); + sum1 = W(0, 1009319059, 1021371624, 1020060242, 1034805925) + W(1, -1119463744, 1029917861, 1022228167, 1028919037) + + W(2, 1034817251, -1123888698, 1007070174, 1031377177) + W(3, 1035958587, 1040845210, -1119265271, 1041595485) + + W(4, 1060685055, 1050963407, 1059880676, 1058492133) + W(5, 1057972187, 1045390927, 1050698442, 1042995863) + + W(6, -1082111981, -1089953728, -1083685732, -1080402290) + + W(7, -1087669949, -1092052092, -1101265316, -1093474029) + + W(8, -1146641530, 1042831681, 1044491976, 1049293617) + W(9, 1044279073, 1030870792, 1040824939, -1117179330) + + W(10, 1021753903, -1158245846, 1024323659, 974645755) + + W(11, 1017781825, 1029410098, 1009963113, 1033823734); + sum2 = + W(0, 1043252027, 1039685669, 1021402919, 1043798699) + W(1, -1106121381, -1107286980, -1116219530, -1105216890) + + W(2, 1031085111, -1103588836, -1094345084, -1097680472) + + W(3, -1098140010, -1117022930, -1111854401, -1165267820) + + W(4, -1089976691, -1136546874, -1106220008, 1053086422) + W(5, 1041831351, 1053158416, 1043898913, 1065796563) + + W(6, 1015926083, 1047286193, 1049855126, -1093227067) + W(7, 1030378489, 1047431201, -1088912308, 1047204695) + + W(8, 1075090314, 1055918010, -1100279282, -1086978538) + + W(9, -1081018505, -1090494906, 1049023531, -1076607098) + + W(10, -1090475122, 1041438201, -1097421747, 1055157984) + + W(11, 1047503463, 1040447738, 1013873926, 1060337257); + WS(-1071907259, -1110912590); + sum1 = W(0, -1125193009, -1128862543, -1134196340, 1030754750) + + W(1, -1115532794, 1035978988, -1122396199, 1030566929) + W(2, 1047025832, 1037187527, 1045585380, 1046461539) + + W(3, 1034859053, 1044478211, 1038023446, -1115297724) + + W(4, -1082520498, -1091004805, -1082717575, -1081098270) + + W(5, -1085641231, -1094770022, -1100806498, -1096364930) + + W(6, 1060301769, 1051495910, 1059488668, 1059029683) + W(7, 1058265227, 1047385124, 1051401113, 1030610907) + + W(8, 1034040338, 999732421, 1034460834, 1039737374) + W(9, 1026869576, 1035505465, -1108119003, 1049444580) + + W(10, 1027334603, 1007930762, 1012148988, -1153534395) + + W(11, 1008981841, -1141674924, -1131397284, 1004145403); + sum2 = + W(0, -1094095783, 1046265071, -1105766507, 1052972269) + W(1, -1118583019, 1048411623, -1102184915, 1063609884) + + W(2, 1071341959, 1041114835, -1093757522, -1096593198) + + W(3, -1091593516, -1091154406, 1057911473, -1073590228) + + W(4, -1106266089, -1087794787, 1036032273, 1065807667) + + W(5, -1120668367, -1098744738, -1101075731, 1041493985) + + W(6, -1079612356, -1090342602, -1097206613, 1072329002) + W(7, 1052074546, 1035089459, 1053464489, 1066861510) + + W(8, -1096220802, -1119797941, -1100480279, -1091841958) + + W(9, -1107977625, 1049018868, -1101117382, -1098841365) + + W(10, 1053270877, -1103321092, 1044676561, -1113258696) + + W(11, 1022355226, -1099949977, 1048742227, -1096064815); + WS(-1073583387, -1097642521); + sum1 = + W(0, -1113793830, 1025630043, 999171711, -1113970392) + W(1, 1041107242, -1126372055, 1019259188, -1141619744) + + W(2, 1040986278, 1018743169, 1043520539, -1126196031) + W(3, -1117788829, 1023908016, 999617264, 997975216) + + W(4, -1123577624, 1051202676, -1085714085, -1137921726) + + W(5, 1058609607, -1120709796, 1049515695, 1048163249) + W(6, 1026212810, -1107655120, 1058537807, -1102760649) + + W(7, -1084066799, 1038867522, -1105494638, -1113662887) + + W(8, 1031641306, 1015332695, -1102186706, 1042214156) + W(9, 1050734989, -1104026556, 1038371877, -1118473257) + + W(10, -1117606078, 1026666645, 999394704, -1098706922) + + W(11, 1028174279, -1105230460, -1131430066, -1117999154); + sum2 = W(0, 1020465344, -1122047285, -1132946638, 1029560286) + + W(1, -1137208000, 1032274109, 996568996, -1139618609) + W(2, -1114187457, 1035498840, 990134284, -1105572872) + + W(3, -1125836673, -1107008827, -1143550666, -1133886920) + + W(4, 1042661620, -1112200610, 1050532943, 1053825241) + + W(5, 1035380426, 1041934457, -1132156656, -1115627477) + + W(6, -1108558660, 990338200, -1098817485, 1046181371) + + W(7, 1052951690, -1099854916, -1101729420, -1120597401) + + W(8, 1022323488, 1037081003, -1116108394, -1112231866) + + W(9, -1098433232, -1115059895, -1106305341, -1145950712) + + W(10, 1024517966, -1114108748, 1033071181, 1024966834) + + W(11, 1018142458, -1114933005, 1025004711, -1129654551); + WS(1066138518, -1093674260); + sum1 = W(0, 1029114768, 1016625343, 1023256234, -1135602902) + + W(1, -1154917052, -1126509217, -1119361423, -1120895465) + + W(2, -1096639483, -1107076945, -1097327813, -1112968866) + + W(3, 1039946769, -1145778087, 1040195377, 1035798312) + W(4, 1057895286, 1046373881, 1060592282, 1048700903) + + W(5, -1102042589, -1102212410, -1110743776, -1096085673) + + W(6, -1095967018, -1098130813, -1090692519, -1096019117) + + W(7, 1052511498, 1050590801, 1037092318, 1054517602) + W(8, 1030209323, 1027610352, 1010824623, -1147523994) + + W(9, -1107963320, -1106421308, -1114290095, -1103975991) + + W(10, -1115838467, 1036809869, -1116997111, 1034515722) + + W(11, 1025259308, 1031910367, 1015369596, 1032370884); + sum2 = + W(0, 1020680083, 1026207298, -1114338038, 1034412140) + W(1, -1119552802, -1132183796, 1014438782, 1026463590) + + W(2, -1098235374, -1129289080, 1035819795, 1034843888) + + W(3, 1030579314, 1031435954, -1118921036, -1110299389) + + W(4, 1040052819, 1000882372, -1104532064, -1096717996) + W(5, 1025849568, -1126927262, 1034356940, 1043556407) + + W(6, 1039185699, 1041518935, 1060121453, -1098039050) + W(7, -1085068698, -1105751584, 1039772175, 1060018227) + + W(8, -1105651771, -1108930987, -1132043948, 1059664416) + + W(9, 1064006582, -1108966794, -1090790505, -1083675102) + + W(10, 999965284, 1025606660, -1127194998, -1123799241) + + W(11, 1020099573, -1130436386, 1040432622, -1098649260); + WS(-1098394199, 1021768394); + sum1 = + W(0, -1106976773, 1030592418, -1109895628, 1031813276) + W(1, -1114022359, -1124672038, 980431986, -1118216524) + + W(2, 1047476460, -1106379994, 1041629170, -1108680996) + W(3, 1028771909, -1123109432, 1035760915, 1039049769) + + W(4, 1035645360, 1042879449, 1052123789, 1053022447) + W(5, -1107385030, 1044862779, 1023833154, 1040198899) + + W(6, -1107202165, -1121922344, -1091385002, -1090269731) + + W(7, -1107044710, -1127679127, -1107740288, -1099192133) + + W(8, -1121988294, 986778214, 1042582852, 1042227034) + W(9, 1044112989, -1115617842, 1030134009, 1025832513) + + W(10, 1008308247, -1140215924, -1120193402, -1127396653) + + W(11, 973525940, -1139398721, 1014367554, -1116507498); + sum2 = W(0, -1098169790, 968544056, -1162122654, 1056997121) + + W(1, 1029158812, -1112228820, 1026696161, -1112296199) + + W(2, 1055345266, 1027898732, 1053687424, -1077105818) + W(3, 1039110353, 1037144043, 1022799116, 1042942743) + + W(4, -1106123518, 1021773530, -1089212318, 1068095045) + + W(5, -1090252350, -1124556656, -1130232224, -1104975561) + + W(6, 1039932359, 1036378047, 1035540057, -1098361429) + W(7, 1000425352, 1050038066, -1118055459, 1040445489) + + W(8, -1111072353, -1123343911, -1111121590, 1044527270) + + W(9, -1115281069, -1114307158, -1116705210, 1023167430) + + W(10, 1012808524, -1132858861, 1037852727, -1103837325) + + W(11, 1036594354, -1134065606, 1032061348, -1114916168); + WS(1064984812, 1025677564); + sum1 = W(0, 1043884023, 1030924649, 1038396303, 1036152448) + W(1, 1017544939, 1042241831, -1121467129, 1047065797) + + W(2, -1096329284, 1033742216, -1101734196, -1105624932) + + W(3, 1017123640, -1108991823, 1037154774, -1096026219) + + W(4, -1084712758, -1103101243, -1087209573, -1086845720) + + W(5, -1084903475, -1087299343, -1108220133, -1087523701) + + W(6, 1057726586, 1046959260, 1057188432, 1061206275) + W(7, 1060371326, 1055419738, 1041931586, 1055087388) + + W(8, 1043356655, 1018532660, 1046520365, -1114153679) + W(9, -1111584053, 1028245989, 994164350, 1051664559) + + W(10, 1012739123, -1131015624, 1002161670, 1044816791) + + W(11, 1024962678, 1043667629, -1114955616, 1040331724); + sum2 = W(0, 1043159502, -1105699647, 1038273264, 1032147900) + + W(1, 1048938986, -1106879197, -1128156680, 1049056829) + + W(2, -1104158106, 1050222709, 1025130152, 1032288328) + + W(3, -1103450106, -1110143950, -1098742781, -1091104362) + + W(4, -1085409392, -1090385012, -1106188919, 1061149199) + + W(5, 1050752016, 1049612036, 1057886527, -1095713225) + W(6, 1067096435, -1106627676, 1052171093, 1075677902) + + W(7, 1075039778, 1073503496, 1075355126, 1082426924) + + W(8, -1090972778, -1102151020, -1086083449, -1070862987) + + W(9, -1070168511, -1072257998, -1070483864, -1070643407) + + W(10, -1098664617, 1050757442, 1039140273, 1030992941) + + W(11, 1040257314, 1041175752, 1030488248, -1098188298); + WS(-1072457547, -1104871915); + sum1 = + W(0, 1018087532, 1014551830, -1143311830, 1031881454) + W(1, 1033102921, 1020957612, -1158925470, 1015585054) + + W(2, -1138749297, 1036550950, 1001512248, -1098112129) + + W(3, -1104222042, -1104597767, -1125431314, 1042534317) + + W(4, -1129333765, -1131367952, -1083142279, 1014989673) + W(5, 1059974718, 1057969217, 1048954301, 1043242379) + + W(6, 983826087, 1050222745, 1055877542, 1020808474) + W(7, -1086706094, -1096646942, -1105443081, -1106542638) + + W(8, 1026100589, -1118152278, -1106883828, 1016760626) + W(9, 1045231050, 1040528910, 1027306868, -1120756612) + + W(10, 1007352484, -1123482688, 1034580622, -1109491581) + + W(11, -1122062610, -1114916930, -1147978606, -1122581117); + sum2 = + W(0, -1120473304, -1112539216, -1132295147, -1099708049) + + W(1, -1120301365, -1106531527, -1119309605, -1165429700) + + W(2, 1034530006, 1032795060, 1046686841, 1054832981) + W(3, -1109933894, -1089566811, 1004612697, -1105741108) + + W(4, -1151412702, 1009244103, 1037608248, 1050403530) + W(5, 1051005406, -1106362502, -1114169666, 1026755208) + + W(6, -1124099243, -1115907580, 1020085733, 1033863239) + + W(7, -1107650300, 1030750625, -1130967605, -1146151019) + + W(8, 1026058392, -1114805763, -1120554932, -1154011810) + + W(9, 1036477609, -1122049677, 1028389062, 1026996282) + W(10, -1131120105, 1025202928, 998829429, -1131382687) + + W(11, 1020773937, -1143352615, -1142800679, 1016610669); + WS(1057548396, -1118860492); + sum1 = W(0, 1024314991, 1015464700, 1033090170, 1024068436) + W(1, 1023937802, 1007695803, 1026830221, -1142835770) + + W(2, 1024521628, 1035643506, 1022826491, 1044765945) + W(3, 1040253828, 1032416458, 1031527745, 1041535598) + + W(4, 1019907062, 1035362560, -1102305323, -1077384746) + + W(5, -1095947197, -1110302756, -1110046895, -1106773417) + + W(6, -1109277847, 1033181378, 1038789717, -1093016121) + W(7, 1047039772, 1032628147, 1032291149, 1038601945) + + W(8, 1042186208, 1019936830, 1047455161, 1050623422) + W(9, 1039235079, 1012944520, 1025595072, 1031404576) + + W(10, -1127185475, 1015346289, 1015056043, 1035221881) + + W(11, 1019339797, 1030260028, -1124358344, 1039118899); + sum2 = W(0, 1028948662, -1108748842, 1035054379, -1114955281) + + W(1, -1123628391, 1033670708, -1113438145, 1009774019) + + W(2, -1119520873, 1035535229, 1034598190, 1040844095) + W(3, 1017632867, 1028317650, -1152158269, 1033306906) + + W(4, 1040692087, -1128105382, -1105801492, 1058969109) + + W(5, -1140061979, -1098716294, -1100355456, -1097137530) + + W(6, -1099276378, -1125068566, 1046560183, 1035247795) + W(7, 1037021603, 1041621971, 1042440154, 1045186907) + + W(8, 1042585669, -1119570187, -1104135640, -1104363062) + + W(9, -1119904127, -1109815486, 1033417138, -1096381982) + + W(10, -1108959122, -1105777938, 1040780747, -1115696651) + + W(11, -1122498951, 1039115990, -1102421790, 1043903068); + WS(-1111617372, -1089239798); + sum1 = W(0, -1120946745, 1043411666, -1105344783, 1034980559) + + W(1, 1035032490, -1102176538, 1042691462, -1106084756) + + W(2, -1109363280, -1105933140, -1122482989, -1097138762) + + W(3, 1028445691, 1038047807, -1104930291, 1036625252) + W(4, 1036859267, -1100282243, 1033736099, 1040067055) + + W(5, -1085774908, 985367106, -1095561837, -1105930796) + + W(6, -1107333692, 1036658471, -1096910672, 1063625005) + W(7, 1059841631, 1050659758, 1056206133, 1039222942) + + W(8, -1131126919, 1046725686, -1138971218, -1100591479) + + W(9, -1101046354, -1099856795, -1114722211, 1033845467) + + W(10, 1028471724, -1102252305, 1048148056, -1115573502) + + W(11, 1018704810, 1038410359, -1104725204, 1032806392); + sum2 = W(0, 1032901751, -1107914664, 1029069579, 987500422) + W(1, -1107918292, 1041931397, -1106970712, 1026585516) + + W(2, -1102980188, 1034775565, -1107463876, -1115674720) + + W(3, -1122769714, 1028640927, -1133982193, -1112107626) + + W(4, -1097455677, -1079544271, -1107584024, 1066142773) + + W(5, 1049477386, -1110827944, 1040943686, 1033119735) + W(6, 1019096213, -1123759676, 1039965269, 1060530292) + + W(7, 1039765158, -1133849713, -1110406142, -1129682772) + + W(8, 1032175116, -1101940532, 1028816201, 1036298034) + + W(9, -1100641824, 1041944181, -1113302080, 1007346649) + + W(10, -1112661460, 1048109574, -1102645998, 1048822254) + + W(11, -1106440053, -1113520406, 1032469881, -1114426196); + WS(1055684951, -1075449937); + sum1 = W(0, 1025203701, -1109225282, -1105594014, -1102495686) + + W(1, -1115277256, -1131312361, 1003392922, -1119401549) + + W(2, 1021145317, 1041132226, 1045336591, 1048290762) + W(3, 1046212734, 1033816821, 1042625602, 1032304674) + + W(4, -1089325052, -1098148265, -1089314051, -1085251962) + + W(5, -1086898788, -1089905605, -1090587428, -1086086859) + + W(6, 1050955910, 1048074946, 1059061644, 1063401377) + W(7, 1058388246, 1057949871, 1049257380, 1056447675) + + W(8, 1044060206, -1122722677, -1138312640, -1135340574) + + W(9, -1108206354, 1012872015, 1013091209, 1030899626) + + W(10, 1023449767, -1117792831, 1024771419, 1034487612) + + W(11, -1122363216, 1023079171, -1119440554, 1029355212); + sum2 = W(0, 1061277558, 1070915797, 1077465728, 1077005703) + W(1, 1067272551, 1035834095, -1140563054, -1101597023) + + W(2, -1075680192, -1078675502, -1072669743, -1071891757) + + W(3, -1079654744, -1094224921, -1124876273, -1106967769) + + W(4, 1064394375, -1099840736, -1080083122, -1110541808) + + W(5, 1034295344, 1034313258, 1044374977, 1056268760) + W(6, 1041412946, 1036217426, 1053075748, 1035413026) + + W(7, -1098463344, 1050839735, 1012713199, 1033733408) + + W(8, -1097187807, 1041322393, -1098777830, -1105806062) + + W(9, 1042040059, -1099205009, 1043077130, -1097742853) + + W(10, 1043650757, -1128025338, -1113957037, 1028555409) + + W(11, 1035679275, -1113787473, 1015942004, 1036121231); + WS(-1084991020, -1093321377); + sum1 = W(0, -1124603198, 1020905181, 1034567442, 1033810210) + W(1, 1026468027, 1034346016, 1012576759, 1040898454) + + W(2, 1041431403, -1106261797, 1012534679, -1100435893) + + W(3, -1113177704, 1029015460, -1109083497, 1045957208) + W(4, 1058092855, 1050746614, 1060858482, 1063338984) + + W(5, 1057782107, 1050350895, 1047981412, 1051078083) + + W(6, -1087194206, -1093664550, -1086211050, -1081371861) + + W(7, -1091610836, -1090363161, -1099126963, -1088769118) + + W(8, 1031898965, 1020373118, 1037199077, 1028138434) + W(9, 1041359812, 1026545296, 1017607178, -1114891854) + + W(10, 1027465426, 1019387090, 942151344, -1130539845) + + W(11, 1019123574, 1032894586, 1018549369, 1024075622); + sum2 = + W(0, 1049096145, 1050639335, 1060252300, 1068716746) + W(1, 1070836883, 1071114171, 1069477538, 1068968491) + + W(2, -1096421724, -1100344181, -1085369111, -1076762820) + + W(3, -1073687271, -1075542147, -1076870984, -1078719402) + + W(4, -1104462777, 1044471716, -1101216805, 1050347336) + W(5, 1052234046, 1009296899, 1047758574, -1114781860) + + W(6, 1019605429, 1054069324, -1124780213, 1025609425) + W(7, -1096692411, 1043747806, 1026561745, 1035243068) + + W(8, -1107039596, -1112798845, 1041817400, -1099054195) + + W(9, 1029953841, 1030832137, -1135279155, -1140937749) + W(10, 1039363330, -1113551308, 999612981, 1018249637) + + W(11, 1028531297, -1109453871, 1027426027, -1125031723); + WS(-1082530796, 1061926473); + sum1 = W(0, -1111745736, 1030277984, -1123296661, 1039021467) + + W(1, -1130604760, 1012520514, -1112767704, 1017586258) + + W(2, -1107298323, -1127531019, -1109831840, -1118701125) + + W(3, -1101933527, -1114851579, 1016124742, -1131932512) + + W(4, 1048819916, 1039611697, 1058045501, -1099048749) + W(5, 1052265202, 1034215157, 1037161440, 1010862879) + + W(6, 1033356744, -1113172467, 1035918242, -1088898044) + + W(7, -1125679275, 1029808585, -1110952702, 1040689749) + + W(8, 1034157517, -1127895905, -1146238814, -1099659681) + + W(9, 1032846000, 1005542493, 1024904046, -1114167271) + + W(10, -1119982339, 1009757802, -1125062126, 1039341706) + + W(11, -1111731430, 1016627083, -1137100912, -1130467695); + sum2 = W(0, -1123675689, 1025856461, -1129839981, 1039506778) + + W(1, -1111122039, -1108253250, 1042547871, 1023517501) + + W(2, 1027790609, -1115257125, 1041635489, -1087170514) + + W(3, 1054414527, 1040911707, -1107819629, -1119501461) + + W(4, -1107220732, -1103597907, -1083432255, 1074899174) + + W(5, -1087814585, -1119836905, -1108795814, -1120345851) + + W(6, -1138688370, 1036222096, -1099581339, 1052206445) + W(7, 1033877978, 1035305654, 1040418607, 974999585) + + W(8, -1114685917, -1104487390, -1113019592, -1103205528) + + W(9, -1114160943, -1128600489, -1113593105, -1136105250) + + W(10, -1131813881, -1138958066, 1015980625, 1015820001) + + W(11, 1023501105, -1135082354, 1028405761, 1023691837); + WS(1064809580, 1026007555); + sum1 = W(0, -1117346201, 1012678303, -1112794575, 1024755966) + + W(1, -1119597108, -1109792204, 1039580268, -1109340249) + + W(2, -1139810490, -1117193870, 1032058104, -1127734554) + + W(3, 1040786347, -1106953519, -1120656619, -1123630216) + + W(4, 1018638922, 1041509643, -1089321581, -1106321711) + + W(5, 1042250009, -1136225478, -1140389406, 1031754384) + + W(6, 1046259126, -1116046893, 1058768682, 1057441571) + W(7, -1088983804, 1052663642, 1033398978, 1030125211) + + W(8, -1110265860, -1107022182, 1049529906, -1097674775) + + W(9, 1052962187, -1105006210, -1138416249, -1114619763) + + W(10, -1107316166, 1042684025, -1107194416, -1115653262) + + W(11, -1115453338, -1104239151, 1026892022, -1113265368); + sum2 = + W(0, 1002173025, 1022454821, -1114052781, 1026488668) + W(1, 1024785122, -1114947733, 1041552522, -1114686018) + + W(2, -1126476274, -1112202702, 1040267060, -1105276944) + + W(3, 1041996701, 1024974628, -1106672600, 1014734543) + W(4, 993251281, 1027055467, -1106967376, 1037292044) + + W(5, 1032247852, -1103384730, 1024347316, 1038837578) + W(6, 1035289805, -1110248161, 1050537246, -1103163514) + + W(7, 1036995069, 1044720036, -1136575544, 1011597497) + W(8, 1026092616, -1106697397, 1051631259, -1119693560) + + W(9, 1026699348, -1113586574, 1012871948, -1113050536) + + W(10, -1119256919, 1035602798, -1119463370, -1109176262) + + W(11, -1123291766, -1103668424, -1104716490, -1124038001); + WS(1065652022, 1032044813); + sum1 = + W(0, 1008146233, -1138253430, -1111490857, 1034343482) + W(1, 1027957290, 1037589943, 1035138662, -1130048270) + + W(2, 1024151812, 1033154018, 1037025524, -1098142828) + W(3, -1093711910, 1019326054, -1137243780, 1033030167) + + W(4, -1109382253, -1111423088, -1089787453, 1056996567) + W(5, 1064926206, 1020462798, 1028825917, 1017317515) + + W(6, 1046726755, -1114751985, 1060185520, -1102691401) + + W(7, -1084615165, -1096112169, -1123985530, -1105819364) + + W(8, -1118504831, 1030270670, -1108385813, 1034117937) + W(9, 1042611828, 999245356, -1106962907, -1131349303) + + W(10, -1145994760, 1011776865, 1031075253, -1115595081) + + W(11, -1141498448, 1030480467, 1020535720, -1161641125); + sum2 = W(0, 1006780909, 1032029355, 928839112, -1118040285) + W(1, -1112805367, -1105940221, 1025941308, 1031169874) + + W(2, 1018874621, -1106542042, -1114343998, -1106692361) + + W(3, 1043467647, 1041450718, 1036583459, -1106475204) + + W(4, -1118402441, -1123089851, -1111882995, 1057618964) + + W(5, -1114692462, 1035656455, -1122657152, -1181112366) + + W(6, -1125742470, 1042324218, -1150379126, 1059562898) + + W(7, -1124030112, -1096899429, -1115452253, -1108009053) + + W(8, 1022906649, 1032351097, 1030201786, -1096401122) + + W(9, -1101041619, -1105376866, 1025207396, -1117532473) + + W(10, 1002124531, -1127943559, -1112138839, 1036092239) + + W(11, -1119061832, 1038704957, -1115257437, 1040262599); + WS(1060219372, -1086892801); + sum1 = W(0, -1103145773, 1041685441, -1114046300, -1105288038) + + W(1, 1036049319, -1105447101, -1153064218, 1037687846) + + W(2, 1039153858, -1139059104, -1139626983, 1051743245) + + W(3, 1005204005, -1096900831, 1006778518, -1097346218) + + W(4, 1051640744, -1111094658, 1060139625, 1048845947) + W(5, 1056234019, 1058595229, 1030114274, 1052830405) + + W(6, -1098525574, -1114399172, -1105262039, -1084251893) + + W(7, -1132035760, -1097183901, -1095492788, -1105878061) + + W(8, -1104573181, 1034492410, -1112320535, 1048366229) + + W(9, 1045757805, -1102598303, 1044243314, -1114955456) + + W(10, 1018968139, -1114173214, -1114750605, -1119683574) + + W(11, -1113866359, -1132035040, 1021887633, -1107291810); + sum2 = + W(0, -1116149857, -1115656995, 1030898684, -1101937257) + + W(1, -1172884346, -1108962642, -1122887710, 1035380843) + W(2, 1025241726, 1004099725, 1025926274, 1029996270) + + W(3, 1015423640, -1098527410, -1102965987, -1099100919) + + W(4, -1099180395, -1094029030, 1029208805, -1093188603) + W(5, 1043486640, 1057927618, 1033678913, 1054675670) + + W(6, 1043244905, 1042198638, 1050305603, 1051892024) + W(7, 1010118001, -1111697287, -1106879588, -1115267941) + + W(8, -1108018378, 1030698655, -1105752531, 1032179392) + + W(9, 1042453668, -1110311603, 1043859461, -1124505249) + + W(10, 1036684654, -1113978987, 1030311314, -1143836228) + + W(11, -1107174775, 1032971351, -1114814873, -1114563702); + WS(1054285911, 1050558006); + sum1 = W(0, -1123510162, -1131523010, -1131464621, 1019709806) + W(1, 1029482925, 1019519396, 1008876867, 990268585) + + W(2, 1038924416, 1041880394, 1042834130, 1022473868) + + W(3, -1100861061, -1099456110, -1103795177, 1023983666) + + W(4, -1103298920, -1093800836, -1095165898, 1057683379) + + W(5, 1065493943, 1059660975, 1052993120, 1049686449) + W(6, 1049975562, 1050884477, 1056805171, -1101108530) + + W(7, -1084744157, -1084827161, -1096422344, -1102767109) + + W(8, -1107117058, -1103773453, -1101616339, -1121085405) + + W(9, 1044641696, 1037612741, 1041661496, -1112995616) + + W(10, -1132690963, 1013663344, 1008580322, -1112183179) + + W(11, -1107880231, -1118057053, -1120002940, -1125681410); + sum2 = + W(0, -1122499415, -1135750526, 1025808744, 1004729468) + W(1, 1038371609, 1025309219, 1037233345, -1107252608) + + W(2, -1115660402, -1103885919, -1107570715, -1109043837) + + W(3, -1101018403, -1106866200, -1100735475, 1049544712) + + W(4, 1019721643, -1163427536, 1050733408, 1068391994) + W(5, -1088555463, 1029079067, 1034253450, -1147071004) + + W(6, 1052675919, -1111228057, 1050715774, 1068640266) + + W(7, -1109791041, -1082318320, 1048468038, -1092391889) + + W(8, -1134010714, -1097257542, -1097200131, -1098019427) + + W(9, -1104872484, -1103711089, -1096395820, 1032346912) + + W(10, -1107636671, 1043515230, -1113330231, 1035061875) + + W(11, 1015805682, -1134559274, 1043601518, -1139706642); + WS(-1090663639, -1077388844); + sum1 = W(0, 1018174482, -1160081154, -1125628059, -1132569784) + + W(1, -1117916196, -1117849353, 1021610031, -1122253105) + + W(2, -1118693311, 1029100670, -1120731528, -1106887239) + + W(3, -1106415899, -1097169074, -1146324700, -1168966176) + + W(4, -1093711755, -1098783931, -1088582516, -1095941442) + + W(5, -1087125152, -1094690684, -1097735927, -1093052224) + + W(6, 1049109025, 1046071575, 1052420297, 1068562223) + W(7, 1064382514, 1058209900, 1048904111, 1056110513) + + W(8, 1040639418, 1021012498, -1106387497, 986381179) + + W(9, -1096200966, -1116837948, 1025702951, -1121509238) + + W(10, 1010315758, -1115518041, -1181480889, 1034527238) + + W(11, 974332196, 1015836076, -1123641014, 1025570188); + sum2 = W(0, 1034436941, -1113720588, 1024698569, -1106229774) + W(1, 1036635696, 1046234567, 1052080385, 1034351318) + + W(2, -1094381136, 1042206203, -1096498964, -1082651758) + + W(3, -1065300211, -1070759890, -1090746117, -1089089289) + + W(4, 1032181918, 998633097, 1057336919, 1076874404) + W(5, 1081990850, 1049370102, -1114040834, 1035723536) + + W(6, 1038416019, -1112936962, 1044225483, 1035551973) + W(7, 1041430251, 1045207615, 1038285588, -1119691531) + + W(8, -1157362266, 1041189037, -1114037642, 1029476436) + + W(9, 1035032611, 1036601034, -1115422836, -1130950394) + + W(10, 1040127243, -1108577062, -1122557473, 1033287923) + + W(11, 1024588020, 1015731519, -1155116306, 1031958369); + WS(-1080363926, -1071486509); + sum1 = W(0, 1035667824, -1106562756, 1048836261, -1099668353) + + W(1, 1041756943, 1034350288, -1111757455, 1036008540) + + W(2, -1117755862, -1102851061, 1026770536, -1096598034) + + W(3, -1111989629, -1117155588, -1106686285, -1110305168) + + W(4, 1058850627, 1044522508, 1060900423, 1062774732) + W(5, 1060314417, 1058954340, 1047827972, 1058323473) + + W(6, -1089812638, -1098855847, -1094273901, -1081351512) + + W(7, -1091724133, -1097309107, -1089979637, -1097705308) + + W(8, -1119781738, -1124105919, -1136163052, 1019768789) + + W(9, 1036217472, 1034476769, 1034808863, -1112569431) + + W(10, 1033333673, -1122920906, 1034167779, -1100720585) + + W(11, 1023569595, 1037864032, -1108224543, 997120104); + sum2 = W(0, -1130221439, 1039521143, -1106623296, 1040812416) + + W(1, -1108121630, 1011405671, 1024610419, -1120568012) + + W(2, 1026473921, 1040027805, -1103850305, 1044422007) + W(3, 1033782288, -1112409330, 1042204829, 1007920671) + + W(4, -1098631230, 1049962270, -1102633929, 1053913409) + + W(5, 1041179127, -1118313372, 1033589003, -1105557487) + W(6, 1059210683, 1037922688, 1032319554, 1075309200) + + W(7, 1051007822, -1114991064, 1064519547, 1041664007) + W(8, 991949852, -1128609979, 1032322677, -1093367457) + + W(9, -1128820939, 1042490244, -1108707286, -1108726636) + + W(10, -1088875775, 1043008084, -1081254616, -1078110278) + + W(11, -1087823621, -1085620101, 1038944538, -1097283302); + WS(-1085146860, -1078432897); + sum1 = + W(0, 1000247468, 1037132754, -1107087079, 1043827999) + W(1, -1105171356, 1034542459, -1139389265, 1019250012) + + W(2, -1149696569, -1105005939, 1047641075, -1094624073) + + W(3, 1038438012, -1098392071, 1037791108, -1111893867) + W(4, 1046594056, 1049101493, 1036712721, 1056540903) + + W(5, 1056427897, 1052522438, 1039493090, 1053002294) + W(6, -1105608437, -1092242085, 1034057408, -1091288646) + + W(7, -1092984875, -1099696227, -1123704450, -1096292923) + + W(8, 1029982264, 1035020227, 1000135432, -1097712673) + W(9, 1048401189, -1113847620, 1041969240, -1109486882) + + W(10, -1128935441, -1114749360, 1041253327, -1098691603) + + W(11, 1031740017, -1124493285, 1035876258, -1117863020); + sum2 = W(0, 981650725, -1123188651, -1132394336, 1019312288) + W(1, 1007032529, 1019035800, -1139057577, 994405618) + + W(2, 1022634928, -1141995321, -1159373413, 1040593594) + + W(3, -1125839148, -1144628769, -1130558040, 996479090) + + W(4, -1122722467, 1043118137, -1111962066, -1085264893) + + W(5, 1035586791, 1030456162, 1025167346, -1121727683) + + W(6, -1134931805, 1038372119, -1088039805, -1062981933) + + W(7, -1088420516, 1033594923, -1132235322, -1141102409) + + W(8, -1114801830, -1108690895, 1059960197, 1086226578) + + W(9, 1052041256, -1121903247, -1127116104, 1010205301) + + W(10, 1035155349, -1113558499, -1123558064, -1122982849) + + W(11, 1041973847, -1112866008, 999416881, 1015327588); + WS(1065314092, 1028007882); + sum1 = + W(0, 1031315399, 1009229913, -1129846167, 1030512684) + W(1, 1027211107, 1024491495, -1133667543, 1020076159) + + W(2, -1118910333, 1044274103, 1040482617, 1028502523) + W(3, -1098008831, -1114626726, 1030034106, 1035646611) + + W(4, -1110071882, -1091088835, -1081937298, 1057125791) + W(5, 1063505604, 1056913060, 1045615294, 1046315392) + + W(6, 1027958726, 1049048403, 1058342964, 1041882615) + + W(7, -1081007746, -1094994912, -1103714125, -1120947018) + + W(8, -1132049297, -1098774521, -1097400260, -1128387300) + + W(9, 1042307230, 1046624506, 1018202602, -1116178789) + W(10, 1030345978, 1015648546, 1033820695, -1123750794) + + W(11, -1119357859, -1108219417, 1021380405, -1113806211); + sum2 = W(0, -1105494348, 1042860341, 1034523914, 1032509803) + + W(1, 1037500440, -1136462181, -1115571726, 1030953968) + + W(2, 1051114204, -1095282678, -1099570184, -1092355888) + + W(3, -1093487707, -1101634066, 1038218158, -1144657738) + + W(4, -1106549278, -1095274416, -1097084890, 1068351366) + + W(5, 1045336627, -1093371043, 1025607496, -1098135458) + + W(6, -1106413411, -1115620988, -1100406176, 1061966336) + + W(7, 1064976080, -1095768559, 1051011462, -1103066203) + + W(8, 1026636954, 1032482885, -1103782738, -1111829172) + + W(9, -1099957245, 1011878641, -1122142689, 1046841453) + + W(10, -1101805730, -1123651604, 1033985372, 1041181693) + + W(11, -1105149847, 1041978329, -1106959075, 1034424520); + WS(-1100053422, 1042143034); + sum1 = + W(0, -1121193300, 1027006179, -1116784408, -1124403186) + W(1, 1032959000, 1036096698, -1106874909, -1140087089) + + W(2, 1038907933, -1101648146, 1051061595, -1096312949) + W(3, 1021329739, -1104405590, 1032316755, 1026338531) + + W(4, 1037214192, 1052804861, -1092953078, 1063691363) + W(5, 1057610578, 1046101531, 1033832353, 1044117027) + + W(6, -1104157285, -1116788520, -1115727182, -1094770533) + + W(7, -1085255924, 1032460934, -1101777344, -1123976854) + + W(8, 1003265009, -1105088109, 1044603252, -1110178331) + + W(9, 1042273837, -1099699080, 1042741769, -1110400285) + + W(10, 927491180, 1036085839, -1112320669, -1132796507) + + W(11, -1129217311, 1029210945, -1111566277, 1023324661); + sum2 = + W(0, -1141740532, -1141563308, -1146742484, 1037326381) + W(1, -1100634945, -1112991637, 998071241, 1028661660) + + W(2, -1127364827, -1115378106, -1097895309, -1089066292) + + W(3, 1074712949, 1036278807, 1023404119, 1032269699) + W(4, 1016858659, 1014774982, 1061549017, -1086943216) + + W(5, -1081904953, 1016249563, -1104500239, -1118089334) + + W(6, 1019476887, 1033910688, -1098904537, -1087533486) + W(7, 1051894188, -1105464222, 1039001405, 1002948788) + + W(8, 1027610232, -1098709711, 1048121474, 1044386108) + W(9, 1035948687, 1034738839, -1111082062, 1032376859) + + W(10, -1114104878, 1038463125, 1009723338, -1128378528) + + W(11, -1114075451, 1023041183, 1028418264, -1116656881); + WS(1064553004, -1122811923); + sum1 = + W(0, -1129171987, 1032276908, -1113233767, 1041762428) + W(1, -1110071667, 1027123610, -1123823416, 1026480898) + + W(2, 1031404000, -1105326008, 1049333812, -1095271692) + + W(3, 1021654775, -1101813124, 1034262350, -1114787413) + W(4, 1045766677, 1049463355, 1036005443, 1059787495) + + W(5, 1057980603, 1052117804, 1041561742, 1051913520) + W(6, -1102182210, -1094389820, 974120215, -1088583546) + + W(7, -1092394330, -1097358280, -1111517243, -1097569834) + + W(8, 1024429036, 1026361994, 1035186995, -1095666629) + W(9, 1048809333, -1132328890, 1043990330, -1107060706) + + W(10, -1144455651, -1121595412, 1034062296, -1100003363) + + W(11, 1024774373, -1125809241, 1035275874, -1116630377); + sum2 = + W(0, 1027128731, -1153474964, 1032141903, -1114194268) + W(1, 1010972171, -1128303982, 1020076945, -1125252319) + + W(2, -1118374135, 1014938475, -1106631533, 1049333862) + + W(3, -1117272233, 1026850075, -1130317298, 1023253865) + + W(4, 1034513610, -1107487108, 1042187602, -1091768288) + W(5, 1029258819, -1112584656, 1019094573, 1030375545) + + W(6, -1123174378, -1123472496, 1054134699, 1082046784) + + W(7, 1048672371, 1015595275, -1143083862, -1112246350) + + W(8, 1026835315, 1026726549, -1092438474, -1066065893) + + W(9, -1108402828, -1129520676, 1012988295, 1033146096) + + W(10, -1114420605, 1026647445, 1008281595, 1032174407) + + W(11, -1107223380, 1020663133, 1016020771, -1120189048); + WS(1066855734, -1119441794); + sum1 = W(0, -1127562022, -1123770274, -1115364812, -1108591004) + + W(1, -1109630903, 1012450637, -1111453929, -1112819014) + + W(2, 1039804281, -1106380690, 1049486942, 1052921753) + W(3, 1002857256, 1049080999, -1123146086, 1045715313) + + W(4, -1090674092, -1096623933, -1088928155, -1086271011) + + W(5, -1088126629, -1098448030, -1095526376, -1090620137) + + W(6, 1057948125, 1041520634, 1058254390, 1066610309) + W(7, 1047590014, 1058670697, 1020669183, 1057080301) + + W(8, -1113289318, -1102744925, -1126493725, -1125400685) + + W(9, -1097419497, 1027654745, -1103558716, -1122765265) + + W(10, 1040603681, -1104646487, 1030330554, 1047144630) + + W(11, -1118729135, 1040353710, -1110153949, 1038000005); + sum2 = + W(0, 1029773001, -1098400772, 1049403575, -1106957817) + W(1, 1040180418, -1125940042, -1121768997, 1035657430) + + W(2, 1045848843, -1098127734, 1040542896, -1113384065) + + W(3, -1098054809, 1051521525, -1098090743, 1035815778) + W(4, -1100354950, 985414438, -1094506611, 1065317808) + + W(5, -1102424534, 1038107076, 1030043463, -1104826760) + W(6, 1042255280, -1114969170, 1044461531, 1063282493) + + W(7, -1111953320, 1052337139, -1098581045, 1052586351) + + W(8, 1031741567, -1101857955, 1033605854, -1099888837) + + W(9, -1148110913, -1125555808, 1034608926, -1105779304) + + W(10, -1106457511, -1121665039, -1106685198, -1090380876) + + W(11, -1097784009, 1010307405, -1105861523, -1121200133); + WS(1024108216, 1053619151); + sum1 = + W(0, -1110237276, 1036125123, -1105593599, -1116690717) + + W(1, -1138403916, -1118424632, -1118568038, -1117554448) + + W(2, -1117217830, -1104025987, 1053313098, -1107833238) + + W(3, -1112296045, -1108171422, 1025019530, -1113883668) + W(4, 1029541828, 1035829669, 1040682969, 1058976450) + + W(5, -1104638900, 1053496066, 997886569, 1047703644) + W(6, -1116974327, 1029841740, -1093167724, 1049118258) + + W(7, -1102593951, -1123704112, -1107943218, -1108512765) + + W(8, 1028000106, -1125285423, 1031701731, -1094097902) + + W(9, 1055269059, -1100830255, 1036129823, -1109656827) + W(10, 997960357, -1115997186, 1020700860, 1017239158) + + W(11, -1122615091, 1029941429, -1111217181, 1020926429); + sum2 = + W(0, -1119993039, 1038893624, -1105602869, -1106354535) + W(1, 1032998629, 998835515, -1119543359, 1026155597) + + W(2, -1109450011, -1096262037, 1055566572, -1158461197) + + W(3, -1117939535, -1108137959, 1037096144, -1114868339) + + W(4, 1040401784, 1053717996, 1043380434, -1105416337) + W(5, -1087999087, 1027244587, -1117843291, 1031605777) + + W(6, -1106582025, -1117988753, -1092886002, 1072933379) + + W(7, -1083385663, 1045359897, -1101171319, 1022787365) + W(8, 1033614749, 1000703247, 1034388956, -1081255560) + + W(9, 1066426555, -1093783038, 1051461624, -1100938466) + + W(10, -1147493015, -1114388777, 1043473668, 1043342555) + + W(11, -1094544057, 1045623937, -1102892249, 1041102940); + WS(1062840044, 1036517115); + sum1 = + W(0, 1027882709, -1102360361, 1037386637, 1026327034) + W(1, -1102333276, 1042539613, -1113170154, -1117867630) + + W(2, 1024218619, -1119309706, 1051294017, -1105387538) + W(3, 1045232110, 1042423170, -1107171938, 1042893141) + + W(4, -1088734638, 1031157223, -1082181941, -1087289600) + + W(5, -1089919726, -1088061047, -1126992520, -1098512042) + + W(6, 1057788973, 1045291130, 1057574554, 1064725383) + W(7, 1056789371, 1052153154, -1124580994, -1109192172) + + W(8, 1032695796, -1107257205, 1039133128, 1024151288) + W(9, -1131902346, 1045762482, -1130270357, 1047580087) + + W(10, -1105941698, 1032367183, 1015412087, -1102302982) + + W(11, 1046746652, -1110282969, -1116552186, 1032060742); + sum2 = + W(0, -1128425377, 1027402769, -1122701285, 1029460017) + W(1, -1128656941, 1015114546, -1131912907, 1012677721) + + W(2, -1106124336, -1123408727, 1035952055, -1106291502) + + W(3, 1034330078, -1137130753, -1151273092, -1141059698) + + W(4, -1106789449, 1046361983, 1033758045, -1092942672) + W(5, 1022825967, 1043050798, 1026393205, -1114857397) + + W(6, -1116881359, 1040877836, -1092736894, -1089281400) + + W(7, 1067937621, 1057367901, -1100743964, -1116376233) + + W(8, -1108203604, 1041799035, -1110885432, 1058715791) + + W(9, 1060265099, -1090127289, -1100147508, -1085394823) + + W(10, -1154187588, 1037669599, -1106801609, 1044648045) + + W(11, -1104725901, -1099584998, -1114467924, 1048436512); + WS(-1090906199, 1032077706); + sum1 = W(0, -1133027020, -1121893314, 1023318639, 1029445080) + + W(1, 1001128051, 1034316233, -1103958128, 1035906358) + + W(2, -1114971356, 1044790618, -1123234602, 1040694938) + + W(3, 1035215242, -1102125462, 1043268297, -1122145747) + + W(4, 1042474992, -1113284386, 1041326284, -1105269270) + + W(5, -1090300845, 1049843289, -1131346756, 1025511402) + + W(6, -1109782777, 1033127972, -1090554926, 1053731809) + + W(7, 1054234875, -1098556565, -1121911775, -1113919963) + + W(8, 1040233361, -1105262125, 1034823031, -1109230339) + + W(9, -1098565414, 1042417542, 1026215817, -1122686272) + + W(10, -1113511763, 1025873725, 1011115622, -1129412204) + + W(11, 1038160135, 1018477632, 1009497794, 998364239); + sum2 = W(0, 1010874613, 1031993168, -1110586713, 1035954309) + + W(1, 1034349301, -1122071401, -1140127141, 1029284667) + + W(2, 1030375188, -1098558104, -1116546039, -1089737673) + + W(3, -1094899703, 1053685859, -1108774266, -1124583674) + + W(4, -1115235366, 1051363302, 1056517150, 1057138262) + + W(5, -1098486858, -1093546729, 1034072089, -1122424073) + + W(6, 1049471257, -1094785947, 1031272431, -1102658594) + + W(7, -1112892354, 1046184196, -1109252950, 1037904141) + + W(8, -1100352469, 1040320218, -1111763071, 1043402933) + W(9, 1057577914, 1039716432, -1122970680, 958822899) + + W(10, 1040126787, -1131978284, -1124433226, 1027656919) + + W(11, -1092308561, -1116345243, 1016915650, -1140678181); + WS(1065971990, -1117349785); + sum1 = W(0, 1041698888, -1112126136, 1034826515, -1129568652) + + W(1, -1109843230, 1024013025, 1009653886, -1137279078) + + W(2, -1103591502, 1043874709, -1101299034, -1186542860) + + W(3, 1041410157, -1159955630, 1030113747, -1120752640) + + W(4, -1130045402, 1044992637, -1083999976, -1081978560) + + W(5, -1085461041, -1090063736, -1097631850, -1094129699) + + W(6, 1049080916, 1047240445, 1060703412, 1061790267) + W(7, 1058214583, 1059365054, 1049303674, 1052607100) + + W(8, 1009208654, -1121869537, -1123043996, 1028920692) + + W(9, -1105683576, 1011681824, -1125202109, 1021258909) + + W(10, 1021475879, -1134810027, -1144695867, 1017436104) + + W(11, -1131209785, -1139574520, 1000157667, 1022839599); + sum2 = W(0, -1117014919, -1130364155, -1106662873, -1114007102) + + W(1, 1049348603, -1111037227, 1016435713, 1035610526) + + W(2, -1073720352, -1086725672, 1069712606, 1062079613) + + W(3, -1102930718, 1020707077, -1114426132, 1034066723) + + W(4, -1104300581, -1091483554, 1061376514, 1060429625) + + W(5, -1098979910, -1091915359, 1043583392, -1091474650) + + W(6, 1055286111, -1109695767, -1114136197, 1052661025) + + W(7, -1131131469, 1050631696, -1112682766, -1112601210) + + W(8, -1105567211, 1032269150, -1138351682, 1018497014) + + W(9, 1032920333, -1117367449, -1122101547, 1039950258) + + W(10, -1108683793, 1033653176, -1115525434, 1043366723) + + W(11, -1107942477, -1139459182, -1122885189, 1026792554); + WS(-1087438700, -1079683283); + sum1 = + W(0, 1017584527, -1126645293, 1024510860, -1126938201) + W(1, 1025818403, -1163276521, 1040771729, -1160528892) + + W(2, -1111085935, -1110290014, 1033919541, 1049726143) + W(3, 1042036038, -1114444262, 1029908410, 1035428627) + + W(4, -1097724786, -1098381907, -1083731542, -1081500983) + + W(5, -1097215121, -1091706404, -1097070911, -1097940968) + + W(6, 1052950249, 1053593812, 1063649365, 1059087538) + W(7, 1052548297, 1057898452, 1045403069, 1052829673) + + W(8, -1112145065, -1106211842, 1003633557, 1023563130) + + W(9, 1019377518, -1104008328, -1115067299, 1032435766) + + W(10, 1032156829, -1129163939, -1118260846, -1120786763) + + W(11, -1116189667, 1023503580, -1112674412, 1015355644); + sum2 = W(0, 1027460995, -1126036613, 1029366028, -1129309981) + W(1, 1020875062, 988377354, -1109676838, 1030688918) + + W(2, -1139353737, -1172793556, 1021322407, -1097919394) + + W(3, -1121912528, 1034370211, -1137199485, -1107628664) + + W(4, -1116062036, 1041340702, 1050063273, 1055277098) + + W(5, -1106551211, 1041415105, -1128558209, -1134721053) + + W(6, -1105887481, 1031879973, -1101108606, 1009470709) + W(7, 1047064096, 1034815104, 1034836242, 1049845283) + + W(8, 1043084390, -1108443334, 1031078409, 1041616760) + + W(9, -1107655532, 1018545561, 1014245687, -1104117922) + + W(10, -1112909236, -1144584219, -1116288208, -1099143154) + + W(11, -1100042276, -1108610892, -1111876576, -1123449114); + WS(1044595630, -1081949232); + sum1 = + W(0, 1039675598, -1114551286, 1008233858, -1121252161) + W(1, 1033942172, -1122512668, 1026205083, -1118876117) + + W(2, -1112185757, 1041771076, -1113654239, 1036138408) + W(3, 1043328887, 1047911808, 1028532559, 1051948425) + + W(4, -1088266627, -1144202605, -1106609306, -1087961804) + + W(5, -1087619944, -1086197995, -1087181441, -1079365891) + + W(6, -1103352647, 1058269731, 1055957016, 1060473075) + W(7, 1062639593, 1055667963, 1042300482, 1051816684) + + W(8, 1051863744, -1102436935, 1041931682, -1105147684) + W(9, 1002098271, -1111264410, 1038991809, 1047648830) + + W(10, -1115854735, -1130126796, 1017898629, 1041818984) + + W(11, -1130413359, 1032948860, -1115140949, 1035539513); + sum2 = + W(0, 1050752765, -1102559074, 1004621594, -1106905815) + W(1, 1038134564, -1114858390, 999530010, -1104442024) + + W(2, -1087877559, 1042152886, -1095393630, -1106434273) + + W(3, -1099723068, 1036662936, -1101776331, 1054232067) + W(4, -1089125923, 1035361880, 1058416370, 1058355794) + + W(5, 1056388755, 1040360076, -1106345832, 1034476952) + W(6, 1063798323, 1052128007, 1062325475, -1120474523) + + W(7, 1051681161, -1096401119, -1106373450, -1089943559) + + W(8, 1013490077, -1090156566, -1094287538, -1101206197) + + W(9, -1100293430, -1104699402, 1048788631, -1113690608) + + W(10, -1098786735, 1029952535, -1116454071, 1025568531) + + W(11, 1009033997, 997069429, -1117506479, 1019208527); + WS(-1082323244, 1081334754); + sum1 = + W(0, 1017652669, 1026074030, 1026308835, -1139023631) + W(1, -1124784756, -1157466090, -1151223253, -1123067319) + + W(2, -1114671162, -1101582293, -1102225205, -1128122150) + + W(3, 1047028384, 1041367009, 1037350457, 1018518877) + W(4, 1045686283, 1051997878, 1056290627, -1109179969) + + W(5, -1088067773, -1090935975, -1100014755, -1100605637) + + W(6, -1100397239, -1097273854, -1091487775, 1033600923) + W(7, 1058585755, 1058457225, 1047302995, 1040259816) + + W(8, 1040334760, 1036251335, 1044247257, 1034603313) + W(9, -1098471302, -1104671877, -1113240670, 1007040336) + + W(10, -1120290385, 1010893986, -1114193219, 1015477828) + + W(11, 1027532986, 1028455628, 1023216600, -1167442276); + sum2 = + W(0, 1021161720, -1130912565, -1130074229, -1117040706) + W(1, 1025588007, 1012731209, -1117264939, 1035821810) + + W(2, 1034310052, -1120070653, 1013066141, -1097167140) + W(3, -1126989997, 1035986228, 979500482, -1131150309) + + W(4, 1028060302, 1057895756, 1050008660, -1077198236) + W(5, 1036249174, 1061480822, 1036420478, -1114248001) + + W(6, -1111691904, 1060473449, 1048707954, -1071981659) + W(7, 1055589119, 1068029984, 1025639810, -1104930835) + + W(8, -1117456161, -1103940220, 1041027109, -1101172514) + + W(9, 1014637039, 1014223793, 1027808652, -1128414380) + + W(10, -1120681063, 1030806761, -1131293055, 1009545637) + + W(11, 1035622377, -1130249456, -1120874755, 1036288471); + WS(1053726551, 1012659382); + sum1 = W(0, 1032083097, -1115995457, 1025061558, -1122031383) + + W(1, 1038196358, -1115071689, -1130575936, -1113099877) + + W(2, -1106638998, 1034541187, -1115280977, -1104507249) + + W(3, -1114143426, 1029764374, 1031225348, -1102798468) + + W(4, -1098702088, -1098027112, -1088652495, -1092902123) + + W(5, -1085882325, -1097451113, -1098237300, -1111014420) + + W(6, 1047871676, 1032148771, 1060231518, 1065996118) + W(7, 1058126194, 1059563219, 1035470703, 1054581342) + + W(8, 998886703, 1042659740, -1106688851, -1104094145) + + W(9, -1102012910, -1121352461, -1128714910, 1000597622) + + W(10, 1022035144, -1112198366, 1034668492, 1024148149) + + W(11, 1010989620, 1012420282, -1123786037, 1024134712); + sum2 = W(0, -1124504165, 1044199209, -1105768285, 1030597508) + + W(1, -1097181409, 1040834484, -1106395293, 1038991009) + + W(2, 1043258069, -1105860909, -1149580326, 1054859448) + + W(3, 1065166573, 1012576825, -1091772000, -1077787203) + + W(4, -1119272862, -1096303760, 1030851358, 1057797022) + + W(5, 1062380695, -1116955554, -1113754765, -1098893101) + + W(6, -1120765402, 1027333462, -1110802573, 1040041196) + + W(7, 1017534493, -1108797661, 1042765191, -1110880129) + + W(8, -1132584009, -1112573193, 1028533186, 1035436142) + + W(9, -1141430739, 1032147286, -1113752657, -1120959090) + + W(10, -1130103221, 1030769098, -1111917317, 1033418125) + + W(11, -1140263257, -1126074797, 1020067757, -1136443945); + WS(-1091585367, -1096979755); + sum1 = W(0, -1125210148, -1121704545, -1120024086, -1106223422) + + W(1, 1027649645, -1106879908, 1033259600, -1108105765) + + W(2, -1104771642, 1041680018, 1043362647, 1048656073) + + W(3, 1038801018, -1103521718, -1106174833, 1031226898) + + W(4, -1104387016, -1093500244, -1085845307, -1086139981) + + W(5, 971788283, -1124812374, 1011346436, -1119913673) + W(6, 1039960904, 1056326757, 1060708396, 1060053831) + + W(7, -1115241635, 1040958787, 995843043, 1040930453) + W(8, 1043768271, -1114046313, 1022927067, -1105827043) + + W(9, -1120254754, 1038747748, -1118768800, 1040461604) + + W(10, -1127914889, -1113011982, -1140249314, -1132509525) + + W(11, 1027284814, 1026834103, -1115807705, 1029803545); + sum2 = + W(0, 1007158722, -1117465525, 1020157105, -1115575087) + W(1, 1029731993, -1115951245, 1042169362, -1111020797) + + W(2, 1004004293, 1030693613, 1007550482, 1032793866) + W(3, -1104867994, -1111503366, -1101768383, 1042858280) + + W(4, -1123076619, 1034532408, -1101572236, 1059720067) + + W(5, -1112732963, -1098900155, -1132362761, -1101011815) + + W(6, -1109929145, -1107544976, -1110988335, 1067665797) + W(7, 1048427228, 1058832170, 1029991341, 1043396912) + + W(8, -1104848259, -1095482533, -1085485924, -1090269083) + + W(9, -1135504754, 1037004208, -1108400441, 1041064016) + + W(10, 1045822378, -1130708853, 1036333634, -1111111456) + + W(11, -1105802165, -1115747431, -1132122165, -1126685993); + WS(1050761175, 1067771859); + sum1 = W(0, 1007679755, 1016344868, -1111212068, 1035369236) + W(1, -1114152399, 1027679203, 1024513697, 1023429051) + + W(2, -1103008542, 1051881455, -1113694143, 1057165115) + + W(3, 1030154052, -1119705863, 1037582923, -1106945041) + + W(4, -1117542415, -1099208581, -1094852820, -1086714990) + + W(5, -1087862176, -1098820035, -1101037788, -1098056967) + + W(6, 1040506400, 1050438683, 1036449755, 1067282489) + W(7, 1049754336, 1030273979, 1048082428, 1020854026) + + W(8, -1107587308, -1138473483, -1102372368, 1045850761) + + W(9, -1112794889, -1111030523, -1114576136, 1000524418) + + W(10, -1123566389, 1035136294, -1110750261, 1047098321) + + W(11, -1108921199, -1158006009, 1025557440, 1029171882); + sum2 = + W(0, -1108377721, 1043256577, -1098471532, 1048495965) + W(1, -1103576309, 1036658009, -1107738492, 1030436882) + + W(2, 1042939389, -1092756549, -1140040329, -1094992475) + + W(3, 1017276157, -1104050853, 1032200565, -1104348148) + + W(4, -1118508723, -1136988785, -1099870420, 1064902851) + + W(5, 1045749581, 1045838269, -1110226959, 1042649061) + W(6, -1101760284, 1050996294, -1098704018, 1062859454) + + W(7, -1089477514, -1157061765, 1046491511, -1106567852) + + W(8, 1037932369, -1104406221, 1015984185, -1098855018) + + W(9, 1025174042, -1121418697, -1106492592, 1027657606) + + W(10, -1111488356, 1040203060, -1103619810, 1045571731) + + W(11, 1025902034, -1107737652, 1044444185, -1105647758); + WS(1057448172, 1069108917); + sum1 = + W(0, 1025232554, -1128422002, 1032607415, -1106129908) + W(1, -1141856298, 1015221011, 1010040610, 1026592994) + + W(2, -1116799928, -1123655391, -1111298160, -1119092894) + + W(3, 1044964179, -1128617181, -1116425884, 1015937075) + + W(4, -1100727837, 1037047614, -1102160962, 1036246919) + + W(5, -1088316592, -1116622467, 1027658969, -1102536008) + + W(6, 1055215485, -1104723136, 1056783106, -1165535249) + W(7, 1049606186, 1044617768, -1132120236, 1042573835) + + W(8, -1106508548, 1040448906, -1103004521, 1040745899) + + W(9, -1132962087, -1103456342, 1037741920, -1118303623) + + W(10, -1139419762, 1006660815, 1016236833, -1123853933) + + W(11, 1016605067, -1160895193, 1018376479, -1120915345); + sum2 = + W(0, -1128749003, -1091479855, -1124909389, 1030767422) + W(1, 1028475536, -1118686225, -1133507353, 1003197653) + + W(2, 1042697161, 1061865996, 1067634595, -1111988197) + W(3, -1102439466, -1117920963, 1022976741, 1008285339) + + W(4, -1101835107, -1097573200, -1083343720, -1090130192) + + W(5, 1055493018, 1027292464, 1030733458, 1002427213) + W(6, 1038607216, 1026943422, -1110364223, -1089515454) + + W(7, -1090714326, -1101900071, 1033968573, 1033064671) + + W(8, -1108371617, -1114744392, 1035072621, 1054823097) + + W(9, 1059932372, 1031097460, -1132241300, -1101772178) + + W(10, 1031384482, 1026017402, 1036186174, -1104718688) + + W(11, 1024781950, -1110105890, 1040128898, 1026666744); + WS(1065065708, -1125796377); + sum1 = W(0, -1112144555, 1029634469, -1112840237, 1034269757) + + W(1, -1122801969, -1133240789, 1016754760, -1119164014) + + W(2, -1106943759, 1025116330, -1105630541, -1106751530) + + W(3, 1023923175, 1041797158, -1113089188, -1130226634) + + W(4, 1017193539, -1112668180, -1112954809, -1096457447) + + W(5, -1094266089, -1096385878, -1106190977, -1114305030) + + W(6, 1048634147, 1027133034, 1055950575, 1060800975) + W(7, 1051425450, 1041695411, 1044103184, 1036967948) + + W(8, -1127916040, 1026809557, -1112907992, -1093799807) + + W(9, -1103940357, 1039766276, -1130590086, 1008751883) + + W(10, -1132404112, 1004217594, -1111964638, 1038388364) + + W(11, -1132895812, 1013909417, 1032062573, 992673732); + sum2 = + W(0, 1043431555, -1109692118, 1036275465, -1111890182) + W(1, -1102288351, 1045724738, -1109691788, 1007408138) + + W(2, -1098806978, 1046643695, -1097975189, 1048664613) + + W(3, 1049750262, -1098934938, 1032869755, -1111259526) + W(4, 1024222598, 1045861043, -1101643320, 1058544270) + + W(5, -1098883379, 1049188212, -1107029975, 1048185689) + + W(6, -1105154239, 1054740901, -1083291725, 1068703829) + + W(7, -1080748028, 1058055770, -1100454574, -1148294292) + + W(8, 1031236307, 1035789437, 1007109846, -1081353417) + W(9, 1031821902, -1100833476, 1051695731, -1114785167) + + W(10, 953732840, -1115663935, 1042301531, 1035678115) + + W(11, -1120166561, 1047454611, -1096856793, 1041528279); + WS(1061652844, 1044003957); + sum1 = W(0, -1118715405, 1029075868, -1115471369, 1032954233) + + W(1, 998963525, -1111723799, 1036011862, -1152687195) + W(2, 1022617806, 1045689305, -1094261862, 1027529537) + + W(3, -1099266912, 1045678528, -1119248992, -1108410990) + + W(4, 1044034950, 1030904095, 1059625182, 1057470859) + W(5, 1062131070, -1098407445, 1047156095, 1037164834) + + W(6, -1101088515, -1103346936, -1096538237, -1086691558) + + W(7, -1094363880, -1105378993, -1123294521, 1016045215) + + W(8, 1019688911, 1045834099, -1102841294, 1041319556) + + W(9, -1166277444, 1028040508, -1135804811, -1106724595) + + W(10, -1131961856, -1110934572, 1036052922, -1148682371) + + W(11, 1017830866, -1109411965, 1030886475, -1123601124); + sum2 = + W(0, -1123087716, -1122327585, 1034911826, -1115300382) + W(1, -1147444258, 1031538244, -1122824244, 1026384688) + + W(2, -1115107680, -1105390433, 1034185422, 1042081129) + W(3, 1036682846, 1043195731, -1112377777, 1024479380) + + W(4, 1039737456, -1106584843, 987338697, -1094697399) + W(5, 1048628369, -1100913302, 1041194171, 1036768370) + + W(6, 1045652727, -1139899009, 1060423005, -1103292099) + + W(7, -1079464406, -1102260713, -1101961861, -1106123677) + + W(8, -1106100707, -1102201417, -1111505321, -1094374885) + + W(9, 1066067653, 1062702469, 1013733313, 1034543612) + W(10, -1128513463, -1114704327, 1009548801, 1026056400) + + W(11, -1112497116, -1107096307, -1125635761, 1037241080); + WS(1061983340, -1091535279); + sum1 = + W(0, -1154828523, 1027504414, -1131271078, -1120408436) + W(1, -1131629652, -1122568783, -1165314478, 995201141) + + W(2, -1133965299, -1128653831, -1114200663, -1129743238) + + W(3, -1117839504, 1035797658, -1115205249, 1019503421) + W(4, 1043906007, 1027190112, 1061948198, 1054216086) + + W(5, 1062537731, 1028696552, 1045640431, 1044774081) + W(6, -1118138894, -1097538828, 1027560685, -1081158944) + + W(7, -1089869577, -1097887897, -1111951485, -1105384996) + + W(8, -1109842717, 1037523079, -1106988467, 1048594270) + + W(9, -1121464863, 1028427349, -1121440284, -1119142300) + + W(10, 1025343169, -1121887129, 1023298667, -1108007945) + + W(11, 1034422373, -1120763036, 1008081777, -1122356070); + sum2 = + W(0, 995087446, 1030495116, 1007114937, -1121939088) + W(1, -1109334595, 1012755199, 1031009559, -1123906524) + + W(2, 1036608468, -1099055008, -1115487063, 1057848194) + + W(3, -1106051731, 1015526929, -1118519790, 1024955281) + W(4, 965233164, -1104807397, -1070882050, 1075270016) + + W(5, 1059573542, -1102771000, 1023903156, -1159976683) + + W(6, 1033021468, -1107407110, 1038417178, -1094462735) + W(7, 1039375650, -1124510890, 1016192349, 1023974018) + + W(8, -1114419070, 1033869233, -1103406865, 1051754977) + W(9, -1104596666, 1028405316, -1111745826, 955743793) + + W(10, 1033025046, -1119280070, 1028798922, -1120762821) + + W(11, 1018124826, 976567206, 1015800189, -1129936862); + WS(1062927532, 1035014202); + sum1 = W(0, 1039522007, 1034505951, 1040732909, 1002357921) + W(1, 1016035605, -1122944943, 999726956, -1140151454) + + W(2, -1114736246, -1109500450, 1011271190, 1044028441) + W(3, 1050061010, 1042353405, 1036129025, 1041525964) + + W(4, 1037103646, 1048942600, 1053278895, -1097469770) + + W(5, -1085392865, -1094832633, -1103977959, -1111797875) + + W(6, -1104897563, -1092956200, -1091146839, -1093973369) + + W(7, 1051214559, 1050975197, 1043068615, -1126246883) + W(8, 1044380938, 1044086236, 1044239087, 1042916452) + + W(9, -1112456671, -1111813313, -1112116510, -1110444494) + + W(10, -1121785378, 1027254073, -1145709740, 981579331) + + W(11, 1035761861, 1033165606, 1025151372, 1033358643); + sum2 = W(0, 1038528863, 1036214533, 1049223347, -1113699843) + + W(1, 1041716852, -1100571883, 1017295180, -1101978327) + + W(2, -1084044031, -1091834288, -1110305241, 1042795689) + + W(3, 1050774414, 1043118649, 1047177880, 1050263507) + W(4, -1082440456, -1077502553, 1049812579, 1068178608) + + W(5, -1092073446, 1047476754, 1034666293, 1054410728) + W(6, 1050939225, -1095085693, 1050231565, 1060273714) + + W(7, -1115682722, -1105112612, 1050691511, -1101197699) + + W(8, 1053868195, 1028074081, 1046376147, -1132635925) + + W(9, -1098688235, 1017697398, -1092800689, -1104788145) + + W(10, -1113963371, -1114781385, 1035810200, -1160679206) + + W(11, 1036026944, 1016345266, 1040315316, -1112282035); + WS(-1086906028, 1058773265); + sum1 = W(0, -1132629757, 997844200, 965362145, -1106996409) + + W(1, -1153797548, -1120050655, -1122322046, 1015091632) + + W(2, 1044191380, 1043197109, -1125429564, 1033453658) + + W(3, -1118095697, -1121403972, -1119804562, 1026125167) + + W(4, 1032367334, 1050808506, 1058285756, 1062016570) + W(5, 1044397873, 1043993638, 1042486572, 1047754366) + + W(6, -1096691584, -1095732027, -1086608026, -1081591954) + + W(7, 1041881517, -1096430215, 1002316636, -1099777504) + + W(8, -1152318558, 1042781664, 1034636741, 1043388193) + W(9, 1045470808, -1115428219, 1018594522, 1024196285) + + W(10, -1109034905, 1031594505, -1119832870, -1106441585) + + W(11, -1149103576, -1113585524, -1135705587, -1122333654); + sum2 = W(0, 1048104076, 1038532068, -1129915388, -1101156186) + + W(1, -1128179824, -1128045264, -1131782160, -1109473444) + + W(2, -1096548381, -1086001730, -1083843015, 1035944594) + + W(3, -1102068111, 1044127780, -1114990278, 1041203218) + + W(4, -1107021820, -1131935456, 1060056432, 1066609131) + + W(5, 1027352212, 1048571914, -1120175088, 1038985844) + W(6, 1018456960, 1007620609, 1033171254, 1050949241) + + W(7, -1107874139, -1104179329, 1038875644, -1112054433) + + W(8, -1136070657, 1040453646, -1111662792, -1110531416) + + W(9, -1104445548, -1127646596, -1110249452, 1034344442) + + W(10, -1112321878, 1033875386, -1117768272, 1032538050) + + W(11, -1114035189, 1032855260, -1144805954, -1130080864); + WS(1046014126, 1065769758); + sum1 = W(0, 1012096182, -1112076254, -1100987245, -1106802021) + + W(1, -1106286530, -1129503003, 1021132153, -1112401299) + + W(2, -1121314108, 1039437825, 1043473803, 1045721640) + + W(3, 1049347084, -1109380841, 1049245244, -1120677869) + + W(4, -1091239207, -1094624040, -1091961473, -1088552177) + + W(5, -1089539427, -1091526873, -1091375076, -1088888106) + + W(6, 1052142286, 1048611696, 1060125676, 1062377804) + W(7, 1053897896, 1057150062, 1048631067, 1057709495) + + W(8, 1042130570, -1109865911, -1128757714, -1119711185) + + W(9, -1128945819, -1119080717, 1034078686, -1122337355) + + W(10, 1019346753, -1152131961, -1123465492, 1039063160) + + W(11, -1114302645, 1015225093, -1135518836, 1014030866); + sum2 = + W(0, -1089976447, -1078304891, -1071851632, -1070679577) + + W(1, -1081022179, -1113339548, 1011136331, 1035458322) + W(2, 1064913799, 1068350548, 1070575185, 1072673707) + + W(3, 1063744071, -1128451504, 1040942179, -1115281660) + W(4, -1094573985, 1019506715, 1065710309, 1065504629) + + W(5, 1056839493, -1124929726, -1106886716, -1111465381) + + W(6, -1109226965, -1120561974, 1002672995, -1098180006) + + W(7, -1117214353, -1111255228, 1028658493, 1033265670) + W(8, 1030445629, 1021062218, -1127826384, 1031139735) + + W(9, -1112765526, 1037321868, -1127063938, 1001938923) + W(10, -1115506338, 1018259030, 1012290398, 995896746) + + W(11, 1010152224, -1118014213, 967860273, -1119523230); + WS(-1096468055, 1053785380); + sum1 = W(0, -1110473361, 1020813409, -1111899059, 1027946631) + + W(1, 1003556839, -1143096715, 996386562, -1123588698) + + W(2, -1116957735, -1114360891, -1123200592, -1090968927) + + W(3, -1107232681, -1117882342, -1118523124, -1105979736) + + W(4, 1034879511, -1114475674, -1109008383, 1066746382) + W(5, 1064704330, 1046069775, 1042611044, 1040361862) + + W(6, 1017136705, -1111337627, 1028379180, 1051825655) + + W(7, -1094442401, -1129057075, -1114700263, 1022269000) + + W(8, -1116669644, -1135792006, -1105125143, -1094808947) + + W(9, -1116312257, -1106172505, -1135525501, -1106276184) + + W(10, -1118632404, -1120549867, -1162426624, -1173354793) + + W(11, -1119514205, -1113957025, -1130080208, -1123525720); + sum2 = + W(0, -1127419095, -1140125367, 1028088432, -1126099139) + W(1, -1114986450, 1024738032, -1114843176, 1023949256) + + W(2, -1122531649, 1025350612, -1123116186, 1020760995) + W(3, 1019570567, 1039021380, 1018690059, -1119198021) + + W(4, -1111543417, -1108347637, -1101718819, 1056423339) + + W(5, 1073762237, -1100544782, 1049061203, 1026425060) + W(6, 1043407189, 1028851928, 1048774619, -1086358248) + + W(7, -1076137651, 1044261707, -1099227611, -1123270518) + + W(8, 1013519927, -1117946771, 1041147594, -1112971994) + + W(9, -1118107520, 1015761083, -1111772147, 1027419728) + + W(10, -1134679399, 991440379, -1127107007, -1115422093) + + W(11, 1037345920, -1103403994, 1042824177, -1129333383); + WS(-1089097708, -1091261619); + sum1 = W(0, -1127316247, -1113526029, -1118797136, -1123552881) + + W(1, -1123795687, 1015138359, -1129675572, -1126584008) + + W(2, 1003986316, -1120011361, 1046355762, -1103137072) + + W(3, -1103576251, 1032220600, -1120446878, 1027028747) + + W(4, -1111263852, -1112997295, -1081822066, -1100403388) + + W(5, 1053114278, -1138618770, 1007314666, -1123471427) + W(6, 1044425283, 1047767320, 1061869832, 1058200050) + + W(7, -1092855018, -1160534136, 1034662460, 1018063772) + + W(8, 1033605983, -1118942860, -1110810403, 1009152314) + + W(9, 1046189751, 1031283151, 1016196275, -1128724178) + + W(10, -1116984894, -1121673366, 1027944882, -1124559262) + + W(11, -1131347855, -1118591413, -1131488026, -1122517738); + sum2 = + W(0, -1151923233, 1026311687, 1025121279, -1123775834) + W(1, 1033596173, -1105542037, 1035290939, -1108107550) + + W(2, 1040701127, -1109901381, 1034378939, -1092342947) + + W(3, -1096202739, 1051958537, -1098166743, 1047977685) + + W(4, -1110923755, 1024679085, -1080291839, 1071256357) + + W(5, 1042190483, -1103925783, 1046386099, -1114731953) + + W(6, -1103532415, 1016651554, -1082840291, 1066054209) + W(7, 1056834874, -1199035157, 1035522294, 1030555165) + + W(8, 1041909162, 1036375055, -1110170843, -1090243220) + + W(9, -1097275958, 1038401459, -1113079432, 1015019440) + + W(10, -1121169096, -1135341024, 1041541680, -1128413506) + + W(11, 1043004954, -1097944284, 1042529081, -1099750845); + WS(1061142188, 1045552914); + sum1 = W(0, 1022384747, 1019603337, 992843677, 1025369499) + W(1, -1149984013, 1030794181, -1123167236, 1034453383) + + W(2, 1020461367, -1114371061, -1137489184, -1128616851) + + W(3, -1114859644, -1134916072, -1114858116, 1009189588) + + W(4, 1058732728, 1054684529, 1060634392, 1056292747) + W(5, 1061516204, 1058073341, 1052882077, 1050605022) + + W(6, -1095805239, -1098135841, -1085459239, -1081060320) + + W(7, -1083596540, -1097701630, 1026645922, -1099032542) + + W(8, -1101590238, -1123665301, 1027392977, -1124699057) + + W(9, 1038698931, -1106352164, 1006025241, -1100750969) + + W(10, 1033080372, 1031766591, -1123755612, -1116775033) + + W(11, 1034158665, -1134014242, 1037240581, -1122655396); + sum2 = + W(0, 1041010686, -1100296712, 1042377248, -1124701214) + W(1, 1002573079, 1026372231, -1107309152, 1022594646) + + W(2, 1045355628, -1110840017, 1028443719, -1097271182) + W(3, -1108318217, 1034868187, 1032249619, 1034940919) + + W(4, -1091459780, -1109934927, -1120459763, 1068946970) + + W(5, 1048538630, -1095496747, -1112114883, -1096153349) + + W(6, -1092997038, -1109499465, 1023984711, 1068602839) + + W(7, 1049236461, -1112347607, -1088682480, 1036467963) + + W(8, 1025085151, -1110570114, -1120816159, -1090645327) + + W(9, 1036483715, -1110453128, -1121128467, -1105549173) + + W(10, 1019331790, -1115246837, 1025130703, 1036287199) + + W(11, 1028224647, -1106754238, 1040581836, -1106253388); + WS(-1090464684, 1058848194); + sum1 = W(0, 1038070654, -1104784595, -1140651524, 1048797564) + + W(1, -1115845272, -1115304422, 1022142646, 1026901973) + + W(2, 1035807069, -1106022588, -1107342224, -1112865880) + + W(3, -1102596230, -1117573217, 1011767229, -1105151431) + + W(4, 1059156269, 1027584334, 1060192817, 1062566262) + W(5, 1060113046, 1043041013, 1054886405, 1057383999) + + W(6, -1091590511, -1104101108, -1084437581, -1088742755) + + W(7, -1087393870, -1095911543, -1102839322, -1088896680) + + W(8, 1049775661, -1100086527, 1050662239, 1037690576) + W(9, 1050391262, -1117869728, 1042443855, 1039830925) + + W(10, -1111666744, -1128618637, -1128690758, -1097281200) + + W(11, 1024450616, -1151911514, -1131868338, -1104457275); + sum2 = + W(0, -1106952896, 1050458431, -1084179073, -1068828969) + W(1, -1082769727, 1054525389, -1098029126, 1038352192) + + W(2, -1173421195, -1100970530, -1117103305, 1049093414) + + W(3, 1015202727, -1093960872, 1043872404, -1107157710) + W(4, -1113206734, 1037511626, 1063723378, 1078393261) + + W(5, 1065452472, -1106048166, 1031385535, 1030855073) + + W(6, -1136957987, 1029901269, -1106205415, -1105390455) + + W(7, -1128426057, 993579659, -1114758982, 1034755506) + W(8, -1152575873, -1116273053, 1039247706, 1034474614) + + W(9, 1032907306, -1100143257, -1155923695, 1034533524) + + W(10, -1112584615, 1045345833, 1031320371, -1099252776) + + W(11, 1035573879, 1040309898, -1124055116, -1124561432); + WS(-1106782638, -1120193880); + sum1 = W(0, 1020741911, 1021367862, 1038644232, -1126436322) + W(1, 1043522250, -1112066734, 1018349205, 1033129938) + + W(2, -1116040418, -1121939617, -1109539853, -1098608828) + + W(3, -1101880327, 993980163, -1115252655, -1133833389) + W(4, 1053259451, 1050945756, 1055524943, 1065255335) + + W(5, 1061719830, 1048874997, 1051405991, 1042923217) + + W(6, -1112821605, -1100446872, -1091353929, -1085596156) + + W(7, -1096552606, -1090468372, -1106280862, -1102788916) + + W(8, -1101282812, 1049499553, -1100504250, -1101475552) + + W(9, 1025835692, -1114054607, 1027947689, -1104899626) + + W(10, -1111424287, -1129092267, 1016236516, 1033532871) + + W(11, 1022700239, -1114668904, -1147278953, 1028803059); + sum2 = + W(0, -1113068388, 1036673453, -1118880240, 1028813378) + W(1, -1107832014, 1027909744, -1122240512, 1011592783) + + W(2, -1115580494, 1024834555, -1115483686, -1117546792) + + W(3, 1043490929, -1105092918, 1040415904, -1119502452) + + W(4, 1039529972, -1098466798, 1043501593, -1115055672) + + W(5, -1117282076, 1042504898, -1107365158, -1112704202) + + W(6, -1091718951, 1052150318, 1043613567, 1062484975) + W(7, 1045088246, -1094456673, 1030923561, -1123953380) + + W(8, -1076395174, -1080701891, 1068474066, 1066750300) + + W(9, -1097140180, 1049333443, -1109881554, 1043999999) + + W(10, 1046645532, -1104586573, 1009076111, -1093753132) + + W(11, 1036864955, 1022104546, 1033514298, -1115391680); + WS(-1093231703, 1042706757); + sum1 = W(0, 1032826937, 1026075841, 1032961819, 1036684171) + W(1, 1034757510, 1036416025, 1015758976, 1040004261) + + W(2, 1040624182, 1000558899, -1110755278, -1127770315) + + W(3, -1106938311, 1019575916, -1114624210, 1042264396) + W(4, 1058499572, 1052553144, 1061343357, 1062021249) + + W(5, 1060752603, 1057695045, 1051786740, 1058906011) + + W(6, -1084803183, -1099119122, -1083389465, -1081334022) + + W(7, -1087386813, -1090036539, -1098198604, -1088285503) + + W(8, -1104136529, 1025447538, -1114092422, -1112317122) + + W(9, -1132554758, -1109030926, 1012060669, -1096739300) + + W(10, 1041579305, 1031885534, 1032264780, 1018739433) + + W(11, 1031902455, 1037874311, 1027119072, 1035404188); + sum2 = W(0, -1134545280, -1101426979, 1012752443, 1036306510) + + W(1, 1034320105, -1111393843, 1046638158, -1108867307) + + W(2, 1057942802, 1047102485, 1055001204, -1096203167) + + W(3, -1114902087, -1096420171, -1101073471, -1091559913) + + W(4, -1072425931, -1084592242, -1079302223, -1123894686) + + W(5, 1062181631, 1062084093, 1064536824, 1073505700) + + W(6, -1067197419, -1071603316, -1077093049, -1098326846) + + W(7, 1072541263, 1067609499, 1073214169, 1078744623) + W(8, 1049446840, 1042726179, 1050812050, -1129334697) + + W(9, -1097319760, -1093274589, 1001153868, -1094526709) + + W(10, 1074141781, 1060445087, 1060645469, 1037955576) + + W(11, -1094473234, -1087350702, -1085693573, -1077663090); + WS(-1075107862, 1049521772); + sum1 = W(0, 1027861131, 987763517, -1129668766, 1033675992) + W(1, 1016072268, 992091051, 1026084697, -1120420163) + + W(2, 1045206278, 1041173528, 1032319275, 1040210256) + W(3, 1041485791, 1044520444, 1042655839, -1112688453) + + W(4, -1086495609, -1097093508, -1086275954, -1081944455) + + W(5, -1082045908, -1096923684, -1093981119, -1094413520) + + W(6, 1051717721, 1049572700, 1055584810, 1064397290) + W(7, 1059363461, 1055988184, 1051323143, 1050124978) + + W(8, 1033503081, -1116084523, 1009361560, 995768539) + W(9, -1112276757, -1112468587, 1005879363, 1033578820) + + W(10, 1031434621, -1144572069, -1140119493, 1027980366) + + W(11, -1129633421, 1024276072, 1000619058, 1025196738); + sum2 = W(0, 1033523983, -1107297920, -1109353447, -1097553497) + W(1, -1102290887, 987526362, 1022298470, 944749371) + + W(2, -1091543751, -1125142534, -1100365589, -1121112319) + + W(3, -1119424675, -1094244068, 1033606211, -1100627225) + W(4, 1048034710, 1031510683, 995045613, 1061520608) + + W(5, 1042981818, -1103162896, 1052169844, -1098189213) + + W(6, 1051542012, -1103344470, -1120961303, 1061214811) + + W(7, -1094266425, 1043830132, 1041952964, 1041818078) + + W(8, 1024239879, -1117305947, -1099173366, 1044020024) + + W(9, -1093433218, 1046119446, 1043499670, -1096785580) + + W(10, 1041022978, -1121264057, -1115033235, 1053722991) + + W(11, -1097363289, 1047286054, -1106943326, 1031561067); + WS(-1097146583, -1102489480); + sum1 = W(0, -1121239074, 1035066361, -1110655283, 1032257412) + + W(1, 1020382076, 1014217354, 1032256145, -1120529307) + + W(2, -1102624204, 1033907292, -1104909116, -1109600459) + + W(3, -1113387282, -1114213060, -1114763275, -1123950514) + + W(4, 1041928668, -1096659727, -1123609439, -1091326393) + + W(5, -1105975127, -1098203983, 1045101428, -1093952115) + + W(6, 1043173384, 1051162503, 1057278053, 1050132159) + W(7, 1049365769, 1049080395, 1033542535, 1055699093) + + W(8, -1118777253, -1108685857, -1102566959, -1100309385) + + W(9, -1101876333, 1040792061, -1121020400, 1035959564) + W(10, 1016656554, 995611334, 1009112868, 1037330963) + + W(11, -1111738159, 1033561904, -1149295615, -1122689753); + sum2 = + W(0, 1030001048, -1107119695, 1023713698, -1115026335) + W(1, 1024715164, -1112691697, 1024758511, -1147275412) + + W(2, 1039849292, -1106305989, 1019090837, 1035696383) + W(3, 1036481868, 1023388499, -1146519572, -1112564365) + + W(4, -1106927121, 1060202814, -1104587624, 1057308499) + + W(5, 1047684818, -1112160637, -1093257675, 1041456307) + W(6, 1046837828, -1095733546, 1043085465, 1053508746) + + W(7, 1034862818, 1046794790, -1094465043, -1083960309) + W(8, -1131959957, 1012779430, 1026454649, 1038120705) + + W(9, 1037173687, -1101425448, 1036335613, -1094791365) + + W(10, -1116147800, -1124391765, 1028873647, -1104339358) + + W(11, 1045294658, -1119391248, -1156574008, 1012709310); + WS(1057303084, -1084740383); + sum1 = + W(0, 1025949163, 1023904890, 1033327896, 1022060320) + W(1, -1131905805, -1114170862, 1021513971, -1109381346) + + W(2, -1105987664, -1104417576, -1097440039, -1106403568) + + W(3, 1043670267, 1041763972, 1037885311, 1019385429) + W(4, 1034968156, 1048292239, 1057303604, -1134851644) + + W(5, -1082452830, -1089247692, -1093401571, -1097845612) + + W(6, -1097863693, -1093717272, -1088500414, 1058205452) + W(7, 1059877080, 1058731430, 1046703382, 1049842950) + + W(8, 1046214631, 1042713619, 1043920086, 1041892896) + W(9, -1101986947, -1107457363, -1110573172, 1031126152) + + W(10, -1130932027, -1126920426, 1012093510, 1040636140) + + W(11, 1028551783, 1041063330, -1143718184, 1016340900); + sum2 = + W(0, -1097593869, 1040446621, -1107347821, 1036994425) + W(1, 989701019, 1043906150, 1040271244, 1016581730) + + W(2, -1100989715, 1036240143, -1105186205, -1119554977) + + W(3, 1048367016, -1095197585, -1102913875, 1041272891) + W(4, 1026566131, -1100851860, 1046892012, 1069594194) + + W(5, -1107809286, 1031148367, 1038853877, -1105136265) + W(6, 1061153405, 1047874592, -1103469514, 1074408805) + + W(7, -1110319007, -1079941721, 1042791078, -1086296266) + + W(8, 1050695083, -1096668864, -1104864053, -1103368475) + + W(9, -1100341899, -1084422687, -1093040701, -1099178193) + + W(10, -1095509343, 1040237386, -1114614656, 1040210470) + + W(11, -1105386934, 1031716423, 1036206107, 1042594729); + WS(-1085311468, 1078025451); + sum1 = W(0, -1114246985, -1123523234, -1123774423, 1017942127) + + W(1, 1018695644, -1134274761, 973249598, 1032511109) + W(2, 1033236492, 1042717314, 1046675224, 1030027079) + + W(3, -1107516674, -1105985470, -1118366597, -1108369993) + + W(4, -1126048573, -1100167817, -1090923842, 1002547123) + + W(5, 1061445386, 1051174149, 1032724353, 1047097650) + W(6, 1051596674, 1050764301, 1057423711, -1138337956) + + W(7, -1085962671, -1101362115, -1098415672, -1105953621) + + W(8, -1110129580, -1101525701, -1102550347, -1117387765) + + W(9, 1045061525, 1037275627, 1033244340, 1025818592) + + W(10, -1109388771, 1021849401, 1031509177, -1126623147) + + W(11, -1118825262, -1107617348, -1124427728, -1115961017); + sum2 = + W(0, -1123353384, 1030359787, 1024787123, 1015567503) + W(1, -1115408082, 1026077135, -1137211259, 1015951538) + + W(2, -1131031713, -1114557735, -1125418898, -1132430472) + + W(3, 1029672716, -1118070691, -1125125186, -1135395683) + + W(4, 1029311455, -1123787418, -1099438470, 1049323738) + W(5, 1042547290, 1050505809, -1110579463, 1040847724) + + W(6, 1047725718, 1042470619, -1106551206, -1097880179) + + W(7, -1089568094, -1166364565, -1115324497, 1028809582) + + W(8, 1051601031, -1121374979, 1049816965, -1088308991) + + W(9, -1098255621, 1026888229, -1112338809, -1102185033) + + W(10, -1113968956, 1045218010, 1044805763, 1039658119) + + W(11, 1042126156, -1115384233, 1043221816, 1014731468); + WS(1060142060, 1042832150); + sum1 = W(0, 1031397804, -1121488955, 1033996248, 1018827426) + W(1, -1131717262, 1033609702, 1010765225, 1029196868) + + W(2, 1036648197, -1142445829, -1117943062, -1111782928) + + W(3, -1108508447, -1106903604, 1015606653, -1115637854) + + W(4, 1052129567, 1051097024, 1054892021, 1065668620) + W(5, 1059881804, 1058770516, 1050176059, 1054801908) + + W(6, -1093104885, -1094159296, -1086111346, -1081757070) + + W(7, -1087336873, -1090845580, -1100487494, -1126749404) + + W(8, -1125587404, -1138833071, 1028704007, 1032794495) + + W(9, 1047817147, -1104469332, 1038701961, -1091639031) + + W(10, 1030014814, 1024131190, 1024270930, -1123778927) + + W(11, 1022586319, -1125662417, 1032196289, 1019613157); + sum2 = + W(0, 1016709191, 1013621039, -1134054043, 1033829499) + W(1, -1133247239, 1014723479, 1025542948, -1113297030) + + W(2, 1043282989, -1105412866, 1034830675, -1123744731) + + W(3, -1107183956, -1110083630, -1115844306, -1097083153) + + W(4, -1105914972, 1048947993, -1104630078, -1088942993) + + W(5, -1093767876, 1055362845, -1111303066, 1064727508) + + W(6, -1104055195, 1038685325, -1084033248, -1072142874) + + W(7, 1073971039, 1076386430, -1098251720, -1090433741) + + W(8, -1113893232, -1143937773, -1103493551, -1098435347) + + W(9, 1052470850, -1104225446, 1051939911, -1091328171) + W(10, 1031619176, -1128320153, 990218298, 1026126526) + + W(11, 1042783259, -1103699955, -1123473137, 1025388740); + WS(-1086199532, 1054743650); + sum1 = W(0, 1034444474, 1027498242, 1040848158, 1023783640) + W(1, 999869046, -1115850147, 1025498402, -1134397103) + + W(2, 1032011277, -1095519909, -1107222607, -1152811127) + + W(3, 1044283945, 1044573214, 1036275699, -1123084051) + W(4, 1048862696, 1053262121, 1061389071, 1030342167) + + W(5, -1084989084, -1095010778, -1106712265, 1032418005) + + W(6, -1097849454, -1090625833, -1084999869, -1113365765) + + W(7, 1060145155, 1054594461, 1045643928, 1033574715) + W(8, 1006964268, 1040908045, 1038795271, 1038235355) + + W(9, -1104327823, -1101807134, -1122717633, 1015954881) + + W(10, 1027368614, -1113781807, -1118338586, -1108967739) + + W(11, 1006059384, 1028815960, -1124545277, -1135640905); + sum2 = + W(0, -1098769930, 1041383757, 1031909130, 1040055926) + W(1, -1112037460, 1040275414, -1111784639, -1113621249) + + W(2, 1042608106, -1089209738, -1102323575, -1105226719) + + W(3, -1124786133, -1117623756, -1131302943, 1048083051) + + W(4, -1101852353, -1121408570, -1108783974, 1065973699) + + W(5, 1049738412, 1057999228, -1111524780, 1025491254) + W(6, -1093087039, 1038811718, -1089123880, 1069194195) + + W(7, 1026907074, -1106112227, 1020629449, -1107530612) + + W(8, -1139600545, -1093098827, -1121335963, -1098519743) + + W(9, -1089198426, -1101165651, -1109243346, 1048264427) + + W(10, 1036192806, -1121276186, 1045867083, 1023817866) + + W(11, 1042688929, -1122635193, 1041146873, -1095495518); + WS(-1090983255, -1089207473); + sum1 = + W(0, 1011536056, 1031895551, -1113368985, 1041335941) + W(1, -1112820133, 1033755607, -1120463213, 1029129223) + + W(2, -1126565928, -1113602338, -1111099810, -1099647257) + + W(3, -1102367072, 1030939267, -1111909392, 1034973975) + W(4, 1055178415, 1054038520, 1055473117, 1064470249) + + W(5, 1063942234, 1050197537, 1058279198, 1042393881) + + W(6, -1101680475, -1097154360, -1089575247, -1084973552) + + W(7, -1086493203, -1097159533, -1096914715, -1096995926) + + W(8, -1102945718, -1121652462, 994604183, -1121284913) + + W(9, 1041044307, -1111487017, 1017418960, -1111098786) + W(10, 1026884961, 990918528, 1022227241, -1151944468) + + W(11, -1119623498, 1029397803, -1127714823, 1024652821); + sum2 = + W(0, 1008868714, 1000134020, -1131179173, 1026383992) + W(1, -1108980541, 1033178726, -1113100227, 1025874080) + + W(2, -1138668162, 1023930211, -1108628571, -1114490385) + + W(3, 1047990867, 1030525898, -1131668265, -1138114226) + + W(4, 988655570, -1099344177, -1064898888, -1066369231) + W(5, 1075540559, 1086324754, 1049763634, -1100090533) + + W(6, -1108603395, 1041622837, -1131013053, -1087427617) + + W(7, -1095163420, 1057379348, 1040828448, -1109206595) + W(8, 1033342078, 1025638720, -1133763810, 1033730604) + + W(9, -1117896039, 1031531103, -1119632759, 1025251971) + + W(10, 1018452757, -1115444075, 1023801183, -1118168395) + + W(11, 1033324797, -1116567631, 1028372534, -1128577333); + WS(-1109283164, -1114463829); + sum1 = W(0, 1026307305, 1010065376, 1036887575, 1032762791) + W(1, 1038442146, 1036926348, 983730417, 1042339968) + + W(2, 1048505142, 1006853751, -1122796048, -1121781501) + + W(3, -1106474773, -1140109366, -1113784606, 1033209828) + + W(4, 1056046207, 1051849183, 1060373686, 1062361681) + W(5, 1060935143, 1058739944, 1052729243, 1060958338) + + W(6, -1087851802, -1096622464, -1086026376, -1080983673) + + W(7, -1086759863, -1088322398, -1098544579, -1085562739) + + W(8, -1097387850, 1034233556, -1106692609, -1116217316) + + W(9, -1136008963, -1111740214, 973353381, -1098996574) + + W(10, 1043738183, 1033008793, 1030721871, 1031320492) + + W(11, 1024808165, 1037732587, 1028455576, 1032429188); + sum2 = + W(0, 1033103311, 1036675590, 1035316039, -1139597621) + W(1, -1104037187, 1036924272, -1111374359, 1033604811) + + W(2, -1088826877, -1093870598, -1096260803, 1038770367) + + W(3, -1120383141, 1054470917, 1048854624, 1052968175) + W(4, 1076551262, 1064863709, 1066303868, 1054257493) + + W(5, -1086745184, -1080791393, -1081541832, -1074269835) + + W(6, 1082157528, 1075036637, 1074566779, 1054121358) + + W(7, -1075712309, -1075955022, -1073360354, -1068055688) + + W(8, -1088761642, 1044533603, -1088656095, -1096732938) + W(9, 1026461691, 1058516172, 1036676016, 1036957577) + + W(10, -1073488400, -1083381518, -1087052440, 1034260530) + + W(11, 1055295113, 1062460380, 1061463967, 1072405502); + WS(-1073690779, -1087724268); + sum1 = W(0, 1020006630, 944113971, 1028328440, 1026519064) + W(1, 1023877839, 1026744019, -1116930962, 1037411798) + + W(2, 1040645890, 1024788392, 1048604230, -1124203663) + + W(3, 1026558293, -1108217132, 1024907750, -1124509669) + + W(4, 1037569760, 1033313762, 1026748704, -1097395091) + W(5, 1058432566, 1052406658, 1046357449, 1052615509) + + W(6, 1040537741, 1022063376, 1037819609, -1081034849) + + W(7, -1089181115, -1095782133, -1103915032, -1095058991) + + W(8, -1102207443, -1117962707, 1018608989, 1030158086) + W(9, 1048090929, 1028902900, 1040820950, 1037074644) + + W(10, 1011752087, 1010250996, 1035208397, -1107990331) + + W(11, 1030700463, -1115106384, 1006665970, -1120315546); + sum2 = + W(0, 1036147454, -1115074830, -1143660011, -1109526737) + W(1, -1113309480, 1006285882, -1115649745, 1026243607) + + W(2, 991782552, 1025479068, -1111873890, 1054093522) + W(3, 1016478163, 1035621794, 1027286665, -1108696429) + + W(4, -1100036784, -1096384442, 1042154750, 1036720443) + W(5, 1015496998, -1111840809, 1028637267, 1024623035) + + W(6, -1102354425, 1031211776, -1098733186, 1061883500) + + W(7, 1040999099, -1099055155, -1117281017, -1120753705) + + W(8, -1132125723, -1114551496, -1120425773, 1042458762) + + W(9, -1118319659, 1021928735, -1113807339, 1033776741) + + W(10, 1037581214, -1107514244, 1036528495, -1113752408) + + W(11, -1131440878, 1009305230, -1115311515, -1117838632); + WS(1053167575, 1034582410); + sum1 = + W(0, 1023109013, -1105499654, 1047073940, -1103829756) + W(1, -1134567319, 1029509866, -1107075490, 1028951523) + + W(2, -1135820801, 1041430288, -1099071824, 1051022760) + + W(3, -1103780951, 1038026111, -1144576488, 1034415069) + + W(4, -1107146278, 1045138183, 1006706653, -1098544246) + + W(5, 1035133548, -1101742889, 1040882462, -1110494113) + + W(6, 1042183330, 1015167422, -1110119411, -1114407921) + W(7, 1029794991, 1025810371, 1020673517, -1144644710) + + W(8, -1106326923, 1048594337, -1101264321, 1042439875) + + W(9, -1115149997, -1124355817, -1133994400, 1017843520) + + W(10, 1038375967, -1103059717, 1036901240, 1019390619) + + W(11, -1122256838, 1039002364, -1113322768, 1025775953); + sum2 = + W(0, 1007469645, 1024932078, -1123529450, -1106895484) + W(1, -1154152567, 1026396887, 1034932657, -1118589447) + + W(2, -1127684796, -1122451669, 934668744, 1046133075) + + W(3, 1050482770, -1100501045, -1112663810, -1145059796) + + W(4, 1026289720, 1029479215, 1048216767, -1101979386) + + W(5, -1086100405, -1104578417, -1110628430, -1120799353) + + W(6, -1134400734, -1114161855, -1092503803, -1118826374) + + W(7, 1054881826, 1054831843, 1037309396, 1034243151) + W(8, -1121586613, 1018968072, 1055026065, 1057475450) + + W(9, 1057991283, 1037697257, 1041220013, 1035239909) + W(10, 1012304630, 1018258842, -1105610792, -1091642753) + + W(11, -1094138317, -1097381938, -1108645190, -1109776821); + WS(1065106092, 1009251236); + sum1 = + W(0, -1127594260, 1015119794, -1105935716, 1030756271) + W(1, 1015463627, 1033714469, -1121831017, 1032817313) + + W(2, -1119086128, 1041681344, 1038033220, -1103164984) + + W(3, -1097628571, -1096661826, -1162082196, 1037645501) + + W(4, -1106710454, -1108480942, -1085761952, 1048660750) + W(5, 1062385221, 1057312254, 1045270286, 1038003425) + + W(6, 1041658930, 1053952044, 1059673337, 1035410928) + + W(7, -1083076739, -1097132616, -1100422942, -1134248242) + + W(8, 1028936149, -1098626929, -1106493133, 1042075767) + W(9, 1048795361, 1042769246, 1033263484, -1109564844) + + W(10, 999711717, -1128362792, 1030156593, -1105711219) + + W(11, -1109139763, -1109546740, -1128821631, -1113038632); + sum2 = W(0, 1026504012, -1115537349, 1004448402, -1120289237) + + W(1, -1116193727, 1023273402, -1106169890, -1116296115) + + W(2, -1111982251, -1109318355, -1126991046, -1114485349) + + W(3, 1031030421, 1036795295, 1038895038, 1037382021) + W(4, -1115575890, 1037274256, 1032461625, 1046084064) + + W(5, -1104386736, 1033981186, 1034486696, 1014573341) + W(6, 1025515301, 1036029355, -1105715099, 1035923056) + + W(7, 1039615788, 1034803763, -1111316562, -1126611440) + + W(8, 1038047283, -1140512157, 1031934837, -1127252256) + + W(9, 1008094609, -1108137243, 1013721630, -1109430807) + + W(10, -1121134093, -1118959675, 966777949, 1001443794) + + W(11, -1126229172, 999509622, -1115420717, -1121268263); + WS(1058235500, 1054922309); + sum1 = W(0, -1126843838, -1115091447, 1037063794, -1097782353) + + W(1, 1017345397, -1106496120, -1124168315, -1111050604) + + W(2, -1110452882, 1026999796, 1027596138, 1043595317) + W(3, 1026915457, 1051692426, 1033470169, 1047889704) + + W(4, -1096439037, -1109530936, -1126567064, -1080972858) + + W(5, -1087153329, -1092439359, -1096481402, -1101856358) + + W(6, 1054965252, 1035486938, 1052521111, 1054616871) + W(7, 1059777872, 1055199219, 1049045985, 1042302776) + + W(8, -1126708475, 1040994960, 1048136923, -1135709192) + + W(9, -1117730271, 1025849689, -1109862614, 1048178521) + + W(10, -1113819273, -1112039092, 1029145158, -1103625129) + + W(11, -1113712109, -1133876016, -1120397392, 1018059916); + sum2 = W(0, 1032658614, 1026047005, 1016793814, 979115576) + W(1, 1025048255, -1120340424, -1124528004, -1113479651) + + W(2, -1104671284, 1026757765, -1106565422, 1040092458) + + W(3, -1118382609, 1024555245, -1115048442, -1102456829) + + W(4, 1033357456, -1146410551, -1129670058, 1049738304) + + W(5, 1047130549, -1099524683, -1114147283, -1143577495) + + W(6, -1107649550, -1093450531, 1029536101, 1048902638) + W(7, 1043845633, 1031379649, 1015256822, 1032535412) + + W(8, -1107785448, 1045006287, 1032863350, 1031141497) + + W(9, 1024835039, 1016575718, -1115840886, -1140327483) + + W(10, 1014985523, -1108588814, -1114405495, 1032866808) + + W(11, -1128051766, 1011865867, 1016407002, -1121997281); + WS(1050344663, 1040331465); + sum1 = + W(0, -1119034750, 1027008397, -1123135107, 1034001074) + W(1, -1165785906, -1109811775, 1012082163, 1021442477) + + W(2, -1123846924, 1032620906, -1107606109, -1113228665) + + W(3, -1104523524, 1044228977, -1136436803, 1015486907) + W(4, 1038870821, -1145305726, 1045963311, 1055978044) + + W(5, 1061494191, 994921841, 1043858443, 1032566715) + W(6, 1016005498, -1104369525, -1101093248, -1091899011) + + W(7, -1089807144, -1100340494, -1113047285, -1121306753) + + W(8, -1113185550, 1047408652, -1106188838, 1019406818) + + W(9, -1133495032, 1039673315, 1003314783, -1110918342) + + W(10, -1180899430, -1117862216, 1031141157, 1025902913) + + W(11, 995937181, -1126170822, -1155810234, 1012720916); + sum2 = W(0, 1017323653, 983063098, -1124332548, -1136855038) + + W(1, 1038193266, -1115428487, 1037749325, -1162541650) + W(2, 1006857268, 1027629321, 1040636195, 1050922570) + + W(3, -1107256285, 1024268384, -1105738834, -1112368096) + + W(4, 1027026905, 1041250623, -1098379711, -1095854749) + + W(5, -1124475180, -1107079153, 1035306909, 1041041721) + + W(6, -1096858156, -1096058068, -1081757810, 1046692173) + + W(7, 1055388353, 1044471504, 1018283467, 1034757871) + W(8, 1057582691, 1069736593, -1098564102, -1092586095) + + W(9, -1101296593, -1099842491, -1130236866, -1116072515) + + W(10, -1108630874, -1103954988, 1039138402, 1040290673) + + W(11, 1026430908, -1135051275, -1127494735, -1128054004); + WS(1064629676, -1098084920); + sum1 = + W(0, 1033302207, 1017176603, 1043155505, 1036523218) + W(1, -1135635048, 1036902751, -1154473427, 1031815708) + + W(2, 1017516913, 986968687, -1093133598, -1136867787) + + W(3, -1099849236, -1112073298, 1015173986, -1108314079) + W(4, 1037844037, 1048067236, 1059745345, 1054752956) + + W(5, 1054803234, 1049105955, 1045303547, 1049045791) + W(6, 1004101612, -1094217842, -1107275372, -1083111259) + + W(7, 1036740290, -1089373007, 1007182507, -1096296960) + + W(8, -1100831258, 1032944847, -1118901210, -1111571193) + + W(9, -1104247276, 1044914424, 1032262742, 1028425028) + W(10, 1023773169, 1036346276, -1109611315, 1036061697) + + W(11, -1130856180, -1178541510, 1028817260, -1121119813); + sum2 = + W(0, 1031116538, 1018758860, -1093507837, -1142239225) + W(1, 1008547208, -1115133937, 1037272918, -1116833334) + + W(2, -1106561538, 1041892626, 1060901401, -1094547881) + W(3, 1048344650, -1106548880, 1031802380, 1032935937) + + W(4, 1041324255, -1122918288, -1090730175, 1059998586) + + W(5, -1105692057, -1107738784, -1109731964, -1105354579) + + W(6, -1097318271, 1050772655, -1089173269, 1050575163) + + W(7, -1105982083, 1062167006, -1115007264, 1051274636) + + W(8, 1038093550, -1113042451, -1103574570, -1096117806) + + W(9, 1045224094, -1093777588, 1040225760, -1097190188) + W(10, 1003684433, 1005733025, 1036702490, 1007559836) + + W(11, -1111284039, 1032209739, -1119496571, 1039213738); + WS(1059570988, -1104849743); + sum1 = W(0, 1015935639, -1114327981, 1032097032, -1100671580) + + W(1, -1118415719, -1146837726, -1128662677, -1106017873) + + W(2, -1105217736, -1117057793, 1045549308, 994695167) + W(3, 1019924475, 1044124420, -1133264506, 1026902699) + + W(4, -1100062838, -1095539196, -1097913193, -1081463016) + + W(5, -1088685699, -1103538521, -1090424260, -1096642336) + + W(6, 1055580097, 1051203348, 1057098153, 1065307314) + W(7, 1060408535, 1060044676, 1048658511, 1058793797) + + W(8, -1114680436, -1104422643, 1035503236, -1100709080) + + W(9, -1100882497, -1127513406, -1106885414, -1112552037) + + W(10, 1027695934, -1122995670, 1026067698, -1115820943) + + W(11, -1121911751, 1040976712, -1109730008, 1032897105); + sum2 = W(0, -1101738300, 1026251680, -1096487254, -1097132336) + + W(1, -1103125896, -1098301296, -1145491968, -1102991820) + + W(2, -1102635520, 1042860418, -1118004862, -1097577131) + + W(3, 1034939290, -1107323885, 1046219299, -1101571742) + W(4, 1049960021, 1045813894, 1012147620, 1062244087) + + W(5, 1030928730, 1018590462, 1042676852, 1048890187) + W(6, -1107186076, 1037904592, -1101520598, 1050699569) + + W(7, -1118944274, 996952385, -1132230456, -1120981264) + + W(8, 1031463651, 1043360780, -1099464580, 1042220045) + + W(9, -1129575892, -1143727552, 1038679603, 1022002534) + + W(10, -1109595133, 1032162510, -1144663840, 1017587638) + + W(11, -1122088198, -1112243029, 1028948530, -1108091801); + WS(-1095386967, -1111219145); + sum1 = W(0, 989878260, -1130549149, 1027494660, 1027516453) + W(1, -1138329195, 1029536895, 1016226972, 1031794584) + + W(2, 1046105185, -1114353482, -1126247292, -1101798869) + + W(3, -1109199416, -1149170463, -1110945897, 1041043200) + + W(4, 1042696371, 1044731284, 1059356463, 1066759339) + W(5, 1060963410, 1056388995, 1052477254, 1048385630) + + W(6, -1089927208, -1092958347, -1090136954, -1086407791) + + W(7, -1086208082, -1087382795, -1099705531, -1090430272) + + W(8, -1127509006, 1029760895, -1123276603, -1134143337) + + W(9, 1037957160, 1028489291, 1033794703, -1116367629) + W(10, 1030962098, 1015099027, 1002268463, 1008759050) + + W(11, 1030926587, 1024487352, 1027177352, -1155852515); + sum2 = + W(0, 1037152773, -1155239869, -1110337901, 1031799544) + W(1, -1115579003, 1027082675, -1125050611, 1027786160) + + W(2, -1107393101, -1118272906, -1121008946, 1040803769) + + W(3, 1040826838, -1129277077, 1044810576, -1128372019) + + W(4, -1107499267, -1123365743, 1053226346, -1091451910) + + W(5, 1036591998, 984589146, -1111221822, -1115106721) + W(6, -1071982339, -1066777138, 1065602467, 1078719649) + + W(7, 1072207876, 1012365693, -1114663567, 1027240586) + W(8, 1050177526, -1099174901, -1114100047, 1051522302) + + W(9, -1112448128, 1036247818, 1024897040, 1010421237) + + W(10, 1034063245, -1111285661, -1111810642, 1032887971) + + W(11, -1105619991, 1027337883, -1128259763, -1110086921); + WS(-1090330604, -1095913405); + sum1 = W(0, 1020138108, -1123891981, 1027835331, -1104251929) + + W(1, 1036891437, -1115340623, 1034060158, -1111402939) + + W(2, -1155846064, -1158857313, 1042832881, 1026174194) + + W(3, 1041459232, -1107002233, 1043482052, -1101325435) + + W(4, -1089580553, -1099259748, -1086143680, -1083565678) + + W(5, -1087459168, -1094838563, -1096454718, -1097944271) + + W(6, 1060107168, 1041623909, 1061453552, 1065434151) + W(7, 1058328348, 1058061687, 1046178603, 1056232524) + + W(8, 1026904097, 1034391623, -1115844409, -1108664129) + + W(9, -1103857816, -1108388469, -1124051644, -1124315511) + + W(10, -1129005794, 1027969480, -1126805065, 1015124739) + + W(11, -1133565543, 1018898230, -1128371111, 1016813002); + sum2 = + W(0, -1123501879, 994818552, 1024190209, 1026428551) + W(1, -1113685733, 1033345966, -1113104927, 1032770548) + + W(2, 1032645553, -1106534525, 1041404870, -1104982451) + W(3, 1042593674, -1115833227, 1034301013, 1019409377) + + W(4, 1031823713, 1055123028, 1003420836, -1080276792) + W(5, -1127504591, 1051480126, -1116624943, 1017007809) + + W(6, 1073269342, 1086127434, -1097366650, -1064048639) + + W(7, -1073275019, -1090368136, 1039074558, -1104405577) + + W(8, 1039470942, -1106004733, 1048036108, 1029604730) + W(9, -1102930579, -1127043711, 1016207563, 1023925896) + + W(10, -1134706270, -1108902493, -1131894299, 1040747994) + + W(11, -1114192145, 1028687353, -1110363669, 1028902826); + WS(-1090401964, -1152005757); + sum1 = W(0, -1107975723, 1027213860, -1098681011, 1041527967) + + W(1, -1104614908, -1117487349, -1139447119, -1114124023) + + W(2, -1110378885, -1114833045, -1110030634, -1088517567) + + W(3, -1108968043, -1117626874, 1013150495, -1107640642) + + W(4, -1108756369, -1095679285, -1106296032, 1070380432) + + W(5, -1108442736, -1098750614, -1103582017, -1109258534) + + W(6, 1036253290, -1126602324, 1055690689, 1071039479) + W(7, 1052643167, 1027420045, 1037720897, 1028231256) + + W(8, -1114963960, 1031108853, -1100045596, -1089232274) + + W(9, -1099409217, -1149243844, -1112117145, 1004205268) + + W(10, -1113670180, -1122691487, -1107558750, 1041737494) + + W(11, -1114805320, -1114809497, -1129003237, -1115685308); + sum2 = W(0, 1032241318, -1099535875, 1055385702, -1088862448) + + W(1, 1049303281, -1109469239, -1107728961, -1128383244) + + W(2, -1104603794, 1042476400, -1090035137, 1058560544) + + W(3, -1088861159, 1052772694, -1098593341, 1040732759) + + W(4, -1110024851, 1036274242, -1088795135, 1070638870) + + W(5, -1093985609, -1106832655, 1040915158, -1094544594) + + W(6, 1040884292, -1098717699, 1054345172, 1049287225) + W(7, 1045145516, 1028821082, -1095210561, 1053404675) + + W(8, -1102517567, 1017815860, -1094964358, 1049908300) + + W(9, -1104972980, -1121780800, 1046695263, -1097835418) + + W(10, 1041491703, -1106264274, 1050847172, -1097728340) + + W(11, -1120926078, 1040141480, -1097969166, 1042680505); + WS(-1070853435, -1079594702); + sum1 = + W(0, 1031368074, -1129832370, 1034753191, 1015854174) + W(1, 1034404179, 1016952500, -1121119193, 1030231874) + + W(2, 1015638464, 1041167744, 1035847262, -1103546680) + W(3, -1113675090, 1024462894, 1024493276, -1128491667) + + W(4, -1114370347, -1117069532, -1123068615, 1064504443) + W(5, 1061831318, 1053421228, 1039689052, 1047845552) + + W(6, -1115839497, -1102391672, -1103157041, -1091637800) + + W(7, -1080200908, -1096184888, -1127920108, -1114780422) + + W(8, -1115223191, 1027397066, -1112317379, -1115479934) + + W(9, 1034003429, 1024063661, 1021986479, -1108195108) + W(10, 1032466078, 1017764904, 1032880216, -1129698458) + + W(11, 1015162251, -1140051844, -1133040549, -1121925385); + sum2 = W(0, -1106137478, 1043473407, -1106937584, 1026389244) + + W(1, -1112665680, 1037682857, -1122471729, -1132137458) + + W(2, -1112484873, -1102332364, -1092913054, -1107744995) + + W(3, -1097742336, 1043622402, 1020098037, 1022975237) + W(4, 1057355655, -1095817987, 1065744344, 1065853704) + + W(5, -1081387458, -1127502696, -1107102958, -1141866869) + + W(6, 1035747177, -1115086293, 1023360745, 1072123343) + + W(7, -1079419356, -1106562006, -1106082519, -1126279403) + + W(8, -1121112608, -1132505032, -1113186427, -1092294539) + + W(9, -1121542644, 1037780507, 1042075138, 1036152561) + + W(10, -1111886750, 1028632460, -1106034045, 1034294011) + + W(11, 1024287965, 1025382780, 1002478149, -1184105122); + WS(1049779927, -1112776705); + sum1 = + W(0, 1028071903, -1114011991, 1029458468, -1119065733) + W(1, -1132325110, 1021654911, -1140102008, -1114327724) + + W(2, -1118237477, 1043483133, 1007633232, 1035764138) + W(3, 1035328577, 1034435922, -1138188744, -1122000063) + + W(4, -1102803275, -1098367878, -1088820906, -1094340312) + + W(5, -1080604425, -1090240908, -1148292210, -1113705707) + + W(6, 1029391902, -1104983021, 1052808064, 1066199650) + W(7, 1053987355, 1056392483, 1036299814, 1048826612) + + W(8, 1034486619, 1030141823, 1041927295, 1028311666) + W(9, -1120802495, 1033716813, -1137351220, 1019304377) + + W(10, -1146402448, 1024272361, -1136262168, -1116603171) + + W(11, 1022508086, -1120600656, -1178578023, 1020743116); + sum2 = + W(0, 1032570045, -1105084280, 1038138439, -1106537061) + W(1, 1040196930, -1115643975, 1030892929, -1115802712) + + W(2, 1038214390, 1026252154, 1040403525, -1094267982) + W(3, -1111441511, -1113353460, 1041615042, 1045634969) + + W(4, -1105258373, 1013797397, -1089116471, 1076181184) + + W(5, -1077048432, 1025617857, -1099622062, 1030853641) + W(6, 1049712324, -1092505394, 1060934721, 1059180753) + + W(7, -1094660144, -1116351798, -1112051433, 1036834683) + + W(8, -1104412018, -1106628914, -1095609080, -1100140938) + + W(9, -1105239307, 1035183993, -1108612586, -1126459673) + + W(10, -1123221706, 1042796766, 997194667, 1031304969) + + W(11, 1025898916, -1113508292, 1036578837, -1114596985); + WS(1046079918, -1101410372); + sum1 = W(0, -1119247786, 1028420729, -1112243383, 992275696) + + W(1, -1122025413, -1106451742, 1031954516, -1132155589) + + W(2, -1128322399, 1041896248, -1105916230, 1048641796) + + W(3, -1106854731, 1031954663, 1008473970, -1111364501) + W(4, 1042510685, 1041276767, 1052547749, 1049660162) + + W(5, 1058242473, 1042746961, 1045913078, 1038481040) + W(6, 1040172212, 1029165630, -1093769957, -1086983984) + + W(7, -1090223605, -1101840568, -1131429676, 1034663574) + + W(8, -1102544268, 1023180436, -1094151845, 1051181214) + + W(9, 1034028133, 1035662026, -1136064862, -1123376855) + + W(10, -1115138862, 1035447066, -1116787216, -1126725978) + + W(11, 1020616971, -1106918309, 1031591591, -1113758304); + sum2 = W(0, -1141998719, -1145824639, 1031031156, 1020781312) + + W(1, 1022361784, 1035727822, -1106658345, 1018984000) + + W(2, -1109775790, -1114337335, 1035421278, -1094152732) + + W(3, 1019796096, -1109500163, -1126048860, -1133379360) + + W(4, 1044487768, 1032184652, -1118695686, 1023745864) + W(5, 1018742928, 1044822196, 1018234504, 1040264262) + + W(6, -1123003936, -1105393491, 1043275908, 1060925239) + + W(7, 1035441608, -1129786704, -1154747838, -1114483694) + + W(8, -1128896096, -1105870753, -1123077850, -1116720242) + + W(9, 1005384511, -1106753069, -1109895735, -1115363912) + + W(10, -1116651188, 1028260552, -1117301622, -1118185274) + + W(11, -1111756371, 1028382728, -1112885074, 1013789936); + WS(1065850966, 1065849900); + sum1 = W(0, -1148044715, 1016251913, -1121600570, -1122606683) + + W(1, -1119487441, -1123929445, 1022387720, -1113618935) + + W(2, 1023026535, 1037817733, 1025496281, 1030608138) + W(3, 1035148220, 1038603949, 1040789578, 1029056085) + + W(4, -1096615656, -1090259708, -1086496425, -1089114938) + + W(5, -1087399018, -1089721837, -1091666965, -1090055623) + + W(6, 1051437895, 1044690723, 1060547800, 1067874188) + W(7, 1061971613, 1051923078, 1020371139, 1050691807) + + W(8, 1037565454, 1031445334, -1123828273, -1103278290) + + W(9, -1103477215, 1017932657, 1017045638, 1041207209) + + W(10, 1009078433, -1134993406, -1122338716, 961099943) + + W(11, -1115158089, -1152593000, 1017681054, 1032486406); + sum2 = + W(0, 1035054009, -1100085534, 1050236106, -1106312974) + W(1, 1040394388, -1111006317, -1122528690, -1120327060) + + W(2, 1029203106, 1055497340, -1099101530, -1091493710) + W(3, 1035688516, -1100130836, 1051271785, 1037370697) + + W(4, -1095822387, -1094806083, -1112580467, 1065636371) + + W(5, 1057422694, 1038056034, -1090043318, -1101753600) + + W(6, -1096883389, 1022251406, -1105263591, 1066985993) + + W(7, 1050729384, -1113669663, -1109365057, -1098907474) + + W(8, -1106861506, 1025821768, -1099047212, -1090221894) + + W(9, 1040303734, -1095697855, -1132012078, 1042447412) + + W(10, 1003858895, -1141198631, 1038164225, 1007864647) + + W(11, -1131632326, 1034223408, 1022279531, -1111449439); + WS(-1097113303, -1098536273); + sum1 = W(0, -1128711506, 1001100740, -1116245678, -1123204686) + + W(1, -1122390725, 973480529, -1135175840, -1112078369) + + W(2, 1026190055, -1129782332, 1036959597, 1010531062) + W(3, 1040533155, 1032146359, 1016950072, -1115082484) + + W(4, -1089449919, -1090119792, -1089233276, -1088111001) + + W(5, -1083999320, -1100399617, -1096337083, -1098983064) + + W(6, 1051085133, 1054274400, 1059231684, 1068549146) + W(7, 1060528017, 1051645044, 1021720363, 1056811397) + + W(8, 1039600836, 1006971045, 1035061810, -1106500579) + + W(9, -1110173729, -1118570949, -1105366950, -1163217111) + + W(10, 1017357112, -1151099700, 1008174399, 1027830380) + + W(11, -1118330949, -1138008953, -1113983621, -1114515356); + sum2 = W(0, -1126544110, -1106788080, -1123763395, 1031886252) + + W(1, 1019879781, -1106896864, 1025236903, -1103198659) + + W(2, 1057867675, 1045040387, -1116688865, 1008770068) + + W(3, -1111977015, -1104133845, -1123575137, -1095297610) + + W(4, -1084782762, -1103936041, -1096694543, 1065118928) + + W(5, -1120781351, -1094413904, 1055216743, 1063595180) + + W(6, -1101472514, -1095964817, 1033632512, 1070094219) + + W(7, -1093909441, -1091348732, -1087869152, -1101379604) + + W(8, 1046930113, 1060484868, -1098897900, 1055777327) + + W(9, -1105350592, -1098592917, 1032159773, -1090856164) + + W(10, 1047938716, -1116398073, 1033301960, -1112910333) + + W(11, -1132175940, 1028498658, -1111429301, -1148399072); + WS(-1088872108, 1052196610); + sum1 = + W(0, -1115772244, 1026268067, 1022683057, -1119255225) + W(1, -1126845407, 1018606696, -1122729971, -1119390253) + + W(2, -1114837443, -1111544357, -1104977249, -1110507588) + + W(3, -1124752108, -1106979958, -1118779367, 1032957370) + + W(4, 1040729928, -1129315660, 1050293045, -1115663356) + W(5, 1055556797, 1038400791, 1043464385, 1010467373) + + W(6, 999143576, 1051137549, 1050616762, -1123488747) + W(7, -1107532492, -1115698508, -1106851872, 1013947482) + + W(8, 1035543595, -1115388138, -1110255664, -1102766242) + + W(9, -1115121626, -1111362474, -1114761165, -1127308771) + + W(10, -1109636323, -1138847608, -1148093909, -1113579532) + + W(11, 1031428336, -1123058305, 1024891874, -1117161611); + sum2 = + W(0, 1021571414, 1005045849, 1032761087, -1131955840) + W(1, 1002892889, -1113636159, 1017257782, -1116947723) + + W(2, -1136485425, -1108696907, 1015124882, -1109877489) + + W(3, 1050593712, -1105061066, 1027289415, 1035889263) + W(4, 1033792089, 1026572719, -1097148617, -1072841573) + + W(5, 1051731235, 1040799594, 1025178361, 1033387155) + W(6, 1044650880, 1068805535, 1072674603, -1073632622) + + W(7, 1040667946, -1114163718, 1033059623, -1113785952) + + W(8, -1111938281, 1042295210, 1053858128, -1114078156) + W(9, 986622244, -1105514756, -1131411966, 1024784261) + + W(10, -1111133361, 1015874538, -1115667125, 1025743417) + + W(11, 1029743517, -1150165522, 1028606721, -1131685126); + WS(1059574956, -1147706177); + sum1 = W(0, -1126851274, 1032992154, 990365335, 1021792788) + W(1, 1014654722, 1023708133, 1021433863, 1024270494) + + W(2, -1107330821, -1114850125, -1111585801, 1025219833) + + W(3, 1042448198, 1041111834, 1028566264, 1025900485) + W(4, 1039522745, 1036915515, 1058476188, 1057257569) + + W(5, -1130368142, -1087441640, -1103664757, -1101532519) + + W(6, -1101979721, -1103811902, -1091594576, -1102147871) + + W(7, 1049911159, 1046832311, 1042912388, 1034347764) + W(8, 1039509326, 1037543219, 1044635632, 1015035920) + + W(9, -1098033363, -1103709614, -1112635561, -1106561201) + + W(10, -1113513772, -1122830319, -1115799180, 999411009) + + W(11, 1008106627, 1030688938, 998865060, 1033251158); + sum2 = W(0, 1030668111, -1138525905, -1118095530, 1046358820) + + W(1, 1026007575, 1036395993, 1044323398, -1102154889) + + W(2, -1100054904, -1127093272, -1104427366, -1094333916) + + W(3, -1098434999, -1114547879, -1098255510, 1043811931) + + W(4, -1106823977, -1118630721, -1104404781, 1066330506) + + W(5, 1032956568, -1089680445, -1118009606, -1104253130) + + W(6, 1038847176, 1023505668, 1055088734, 1059364463) + W(7, -1109156363, -1102029714, 1019564502, 1042217401) + + W(8, 1045749759, -1100949461, -1106737563, -1115593085) + + W(9, -1105827255, 1014767972, -1114455410, 1033201092) + + W(10, -1139922314, -1137789612, 1025350194, -1118124817) + + W(11, 1044080198, -1111415597, 1034872747, -1116871429); + WS(1056332375, 1037816258); + sum1 = W(0, -1121656782, -1121854558, 988019291, 1027294260) + + W(1, -1141230927, -1146196851, -1124586778, -1119789479) + + W(2, -1103168373, -1112652978, 1032694229, 1032971073) + + W(3, 1036051832, 1029047507, -1138159876, -1116156558) + + W(4, -1119737691, -1116096096, -1099471067, -1088930875) + + W(5, -1088040995, -1094038173, -1105369135, -1100717841) + + W(6, 1044637987, 1045017865, 1051843657, 1058468850) + W(7, 1059943427, 1054278049, 1040438911, 1052440082) + + W(8, 1028439953, -1108613751, -1124711667, -1105155884) + + W(9, -1110648631, 1023373473, -1109090662, -1121068380) + + W(10, -1147698697, 1015231409, -1126623722, 1025720722) + + W(11, -1125643860, 1025459107, -1140770656, 1033302707); + sum2 = W(0, 1037403217, 1056969468, 1057593955, 1065979451) + W(1, 1052219437, 1048107842, 1012201018, 1034400525) + + W(2, -1101035659, -1090106390, -1080354518, -1077736273) + + W(3, -1095104394, -1110113632, 1035332836, 1036232380) + W(4, 1032316963, 1038083410, 1060881997, 1057826659) + + W(5, 1040943565, -1093495590, 1023246776, -1105354500) + + W(6, -1116665707, -1119459300, 1028006921, -1093367622) + + W(7, 1050471411, -1098889696, 1037291810, -1172133380) + + W(8, -1159849898, 1009385423, 1038355181, -1108823981) + + W(9, 1028403474, 1041580846, -1108503511, 1023204972) + + W(10, 1025618366, -1128266716, -1107076145, 1040978108) + + W(11, -1107763897, 1031803963, 1028340263, -1124125618); + WS(1057728492, -1114894260); + sum1 = W(0, -1113553553, -1129994721, -1116323110, 1032700067) + + W(1, 1022553594, -1174372438, 1025649872, -1204938694) + + W(2, -1137973853, 1032124939, 1038491142, 1042214629) + + W(3, -1102850457, -1103814977, -1108835354, -1117610625) + + W(4, 1030725486, 1023297510, 1042012404, 1063322480) + W(5, 1049455560, 1038337085, 1036180712, 1041170774) + + W(6, 1027451936, 1039175515, -1096381204, 1002639809) + + W(7, -1095884801, -1099447670, -1107257155, -1110131186) + + W(8, 1024408310, -1104644784, -1098358292, -1097199096) + + W(9, 1038368473, 1041557913, -1143485305, -1139346043) + + W(10, -1122023828, 1015759447, -1129065232, 1025211173) + + W(11, -1119912573, -1120935138, 1022194670, -1131327617); + sum2 = W(0, -1132724746, -1125894137, 1008401986, -1123753553) + + W(1, 1028568703, -1129573217, -1115138968, -1129237393) + + W(2, -1136277690, 1023794749, 1034921985, -1135255538) + + W(3, -1095862669, -1103833757, 989975657, -1117578737) + + W(4, -1115335750, 1019650853, -1111489002, 1065003137) + + W(5, 1042205114, -1104966989, 1028245440, -1137003202) + + W(6, -1142062564, -1102896527, -1095970428, 1052274635) + + W(7, 1050282098, 1039075981, -1124502561, 1029353869) + + W(8, -1111737990, 1036465851, -1093339450, -1130345777) + W(9, 961646223, -1123958953, 975314596, 981021860) + + W(10, 1021138001, -1117376889, 1036367329, -1114642052) + + W(11, 983274770, -1121876761, -1131382405, -1129104973); + WS(1064957100, -1084259623); + sum1 = W(0, -1147443767, 1002237105, 1042679235, 1025036588) + W(1, 1039980881, 1039876787, -1122119878, 1046679440) + + W(2, 1048172674, 1022388856, -1111985505, -1105536623) + + W(3, -1111204540, -1118973740, -1115201293, 1036937851) + + W(4, 1050085746, 1049301459, 1061123934, 1059891343) + W(5, 1063082922, 1059164422, 1050147791, 1052477312) + + W(6, -1087174820, -1096483251, -1084863818, -1085360136) + + W(7, -1087496217, -1085023864, -1099749261, -1093770425) + + W(8, -1110620477, -1130736462, 1036594273, -1096717087) + + W(9, 1049051417, -1098175193, 1038851310, -1094492881) + + W(10, 1041967934, 1032395752, 1029284270, 1027822410) + + W(11, 1038157169, 1033526682, 1035926782, 1040430103); + sum2 = W(0, 1043574668, -1110691764, -1103083598, -1104224219) + + W(1, -1111324648, 1045365848, -1123650350, 1040472402) + + W(2, -1079200674, -1088609636, -1104512784, 1072315297) + + W(3, 1071717488, 1074062948, 1072927288, 1075518858) + W(4, 1070660923, 1057860954, -1107138551, -1092267131) + + W(5, -1073631870, -1072626825, -1076777698, -1068851440) + + W(6, 1052128984, 1049301959, -1106877192, -1082025998) + + W(7, -1098154139, 1060924493, 1055782129, -1085268633) + + W(8, -1102512316, -1119415890, -1099577256, 1048136354) + + W(9, -1101281480, 1049098296, 1053482338, 1045375219) + + W(10, -1114631788, -1111966430, 1025811761, -1120067756) + + W(11, -1097839567, 1046934152, -1142912019, -1113956244); + WS(-1075237718, 1059347685); + sum1 = W(0, 1021242911, 1033512399, 1029870036, 1022930282) + W(1, 1032776022, 1018371807, 1019954077, 1033340372) + + W(2, 1049968737, -1159856274, 1035044013, -1122614884) + + W(3, -1113136009, -1134430929, -1114025505, 1040742208) + + W(4, -1123217820, 1052611492, 1055398719, 1061884279) + W(5, 1059035017, 1058365139, 1054270736, 1057072208) + + W(6, -1100043508, -1096977477, -1086974821, -1081627821) + + W(7, -1085989237, -1083853344, -1094951367, -1082038839) + + W(8, -1123750750, 1035058824, -1111424357, 1035852049) + W(9, 1046594056, 1040835864, 1036293471, 1043435581) + + W(10, 1039650209, -1164938034, 1031585078, -1146263191) + + W(11, 1040828171, -1113397210, 1041271429, -1112152131); + sum2 = W(0, -1109418256, 1045686709, 1025535098, 1041368995) + W(1, 1025255906, 1045341483, -1131647237, 1050563248) + + W(2, -1082007392, -1092895857, -1097230803, -1087580936) + + W(3, -1090709237, 1030380818, -1098811123, -1122444474) + + W(4, 1074622373, 1045059925, 1022527077, 1072979117) + + W(5, 1059630355, -1082271661, -1098333373, -1080249300) + + W(6, -1087167323, -1090791673, 1061750517, 1066822104) + + W(7, -1104688853, -1098772696, -1097425000, 1035536875) + + W(8, -1079889153, 1052961109, -1094791227, -1121141584) + + W(9, -1114952578, 1048788253, -1109808864, 1062091865) + + W(10, 1054843717, -1111299754, -1097908774, 1039933817) + + W(11, -1109459004, 1032974905, 1041274025, -1101634861); + WS(-1073405707, 1071333561); + sum1 = W(0, 1032445482, -1114759602, 1009365348, -1116673978) + + W(1, -1140494528, -1138343378, -1164102249, -1128324122) + + W(2, -1107154225, 1043648434, 1025826187, 1050217769) + W(3, 1045274198, 1041826370, 1037855412, 1036590066) + + W(4, -1086616986, -1097373989, -1088696011, -1084271170) + + W(5, -1086550017, -1088202787, -1096484208, -1090407429) + + W(6, 1053035442, 1049497533, 1058380737, 1057270871) + W(7, 1060414079, 1054213138, 1049814032, 1044502774) + + W(8, 1037191452, -1113096234, 1042579156, 1027100008) + + W(9, -1122447285, 1033632004, -1117413058, 1049279047) + + W(10, 1005358651, -1140703610, -1147371482, 1008260703) + + W(11, -1128134433, -1146407565, -1132301012, 1030979244); + sum2 = + W(0, -1104849756, -1130309268, 1032168017, 1006608216) + W(1, 1025939811, -1116205504, -1134020588, 1015836550) + + W(2, 1044778147, 1042344095, -1115874267, 1016899230) + W(3, 1015318926, 1017541078, 1025032291, -1134707560) + + W(4, 1087011564, 1065552014, -1071484991, -1064792716) + + W(5, -1108487910, -1111002668, 1026184365, -1105554232) + + W(6, 1051692028, 1050304642, -1095624232, -1080996320) + W(7, 1038353709, 1015541566, -1115175630, 1035428553) + + W(8, -1115275477, 1012814276, 1034711867, 1012706900) + W(9, 1031018877, 1017981722, 1016059306, 1030487759) + + W(10, 1024288807, -1122813731, 1018071134, -1113948677) + + W(11, 1039145567, -1121567210, -1161414849, 957459976); + WS(-1093094231, 1019545057); + sum1 = W(0, -1147370279, -1111661690, 1036370005, -1130128916) + + W(1, -1138886402, -1114353490, -1139052495, -1123566046) + + W(2, 1024662611, 1043371102, -1098598749, -1121967929) + W(3, 1034324947, 1030255498, 1020719663, 983686292) + + W(4, 1045730364, 1007712219, 1063151515, 1048771569) + W(5, -1102866212, 1043747359, 1042970997, 1044423271) + + W(6, -1100871081, -1103400446, -1086049630, -1094208743) + + W(7, 1057052051, -1103168823, -1114366281, -1105757546) + + W(8, -1113292876, -1125293706, 1038700136, 1042591246) + + W(9, -1109292474, 1040829042, -1113681601, -1119014482) + + W(10, -1115285964, 1035921815, -1106696483, 1031896756) + + W(11, -1117085921, -1117982022, 986496271, -1116127556); + sum2 = + W(0, 1017737448, -1112488549, 1041909593, -1119128646) + W(1, 995222883, -1138532089, -1122958336, 1002378369) + + W(2, -1113949677, 1043174240, -1095542187, -1123930650) + + W(3, 1011641777, 1043431269, -1133447353, 1011720393) + W(4, -1109074925, -1099690943, 1055357640, 1063813292) + + W(5, 1029075282, -1111808055, -1121958092, -1111615901) + W(6, -1106456753, 1036040708, 990810979, 1064786485) + + W(7, -1080550900, -1099317256, -1111251400, -1122017034) + + W(8, 1033894674, -1136025401, 1036060652, -1095508468) + W(9, -1096849813, 1042059844, 1031456264, 1041840135) + + W(10, 1002006465, -1127095326, -1106928479, 1041202855) + + W(11, 1028541984, 1013308785, -1130037494, 1011740497); + WS(1065464534, 1066200435); + sum1 = W(0, -1120929867, 1037468205, -1104046500, -1118049328) + + W(1, 1026495526, -1102710553, 1047356358, -1104379061) + + W(2, -1114034816, -1098649416, 1044270480, 1030873567) + W(3, 1027018036, 1029661039, 992085460, 1014898857) + + W(4, -1093428139, -1095417420, -1095455089, -1078956988) + + W(5, -1097112752, -1090364038, -1098761015, -1089997432) + + W(6, 1053507241, 1053719708, 1059517274, 1062996894) + W(7, 1062358792, 1051059848, 1053033759, 1058137077) + + W(8, 1039630850, -1117479019, -1116571174, 1037682987) + + W(9, -1100052716, -1134398359, -1124964177, -1119205176) + + W(10, 1039737375, -1104296205, 1034668025, 1027940302) + + W(11, -1105628906, 1047874257, -1105711476, 1036556777); + sum2 = W(0, 1052570920, -1096757231, 1032706704, 1048916325) + W(1, 1032567852, 1027784571, -1123035498, 1032789056) + + W(2, -1079960756, -1072214843, -1067716640, -1071149720) + + W(3, -1113086642, 1026889523, -1102782368, -1113248449) + + W(4, 1059860803, 1077140634, 1078085184, 1074719494) + W(5, -1126012658, 1023965646, 1033915049, -1131155204) + + W(6, 1024102895, 1045135233, 1007395147, 1048879105) + + W(7, -1105291454, -1125605772, 1036400237, -1120868355) + + W(8, -1109439044, -1135125103, 1039504993, -1119420973) + + W(9, 1042487387, -1107726802, 1040624471, -1123377002) + W(10, 1028585912, 1026412089, 999691338, 1031274943) + + W(11, -1115876939, 1031655696, -1118021143, 1033633154); + WS(-1088460652, -1077196042); + sum1 = W(0, -1106714405, 1012745873, -1106817926, 1014233321) + + W(1, -1139695810, -1112340330, -1112616426, 1014140598) + + W(2, -1126618358, -1104692362, -1128320455, -1113350277) + + W(3, -1104636978, -1111992333, -1112151140, -1103199781) + + W(4, -1100538888, -1105672234, -1096420642, 1032760733) + + W(5, 980853696, -1098221750, 1032069058, -1124202648) + W(6, 1061836401, 1049398341, 1063373516, 1062672955) + + W(7, 1040237156, 1049303442, -1115352833, 1047738902) + + W(8, -1105561066, -1107347643, -1095103574, -1104500106) + + W(9, -1136108756, -1098848505, 1028357771, -1108236605) + + W(10, 1017778831, -1107815087, 1021813111, -1130991024) + + W(11, 1015562593, -1124042178, -1117430379, 1024298444); + sum2 = + W(0, 1042942941, -1103739979, 1021675082, 1031504716) + W(1, -1103784377, 1021140017, 1032312275, -1113021145) + + W(2, -1095373315, 1042549384, -1100136800, 1032277385) + W(3, 1051314558, -1097536524, 1033748340, 1006926454) + + W(4, 1047404354, -1104635751, -1156628070, 1056265483) + + W(5, -1086627236, 1056610233, -1097748456, 1032415346) + W(6, 1041600542, 1049880149, 1042313351, -1095000083) + + W(7, 1058671506, -1087499329, 1053303874, -1106742186) + + W(8, 1029337611, -1113660916, -1103511888, 1052284225) + + W(9, -1098025422, 1053296214, -1097031052, 1040224097) + + W(10, -1116792463, 1033409028, -1100575516, 1042607543) + + W(11, -1102539848, -1123811627, 1034071897, -1116904421); + WS(-1090190636, 1052333694); + sum1 = W(0, -1118008162, 1017370722, 1028313190, 1032465571) + W(1, 1021024634, 1022145795, 974396430, 1023704762) + + W(2, 1046164882, 1025512151, 1039176172, -1140639831) + + W(3, -1114523818, -1123803590, -1114987978, -1150325779) + + W(4, -1087764995, -1107299674, -1107276920, 1054861041) + + W(5, 1048774281, 1052546887, 1033108808, 1052242867) + W(6, 1050774996, -1113611419, 1042980645, -1104478910) + + W(7, -1116066086, -1088290914, -1101829203, -1082760046) + + W(8, -1132114980, -1133738119, 1016759930, 1039228938) + W(9, 1029146129, 1046828701, 1025239231, 1050382392) + + W(10, 992812356, -1143374172, 1031741320, 991966796) + + W(11, -1131726971, -1135053738, -1135839572, -1117633184); + sum2 = W(0, -1112927492, 1000278501, -1115114722, 1038568093) + + W(1, -1107752992, 1036864823, -1111435217, 999311589) + + W(2, -1126311221, -1112124316, -1096516945, -1095439368) + + W(3, 1035770391, -1173738153, -1127749525, -1118042174) + + W(4, -1124129707, 1030656727, 1062718459, 1069168233) + + W(5, -1092856910, -1097729376, -1108466839, -1106807928) + + W(6, -1089259297, -1136644755, -1089907054, 1066488720) + + W(7, 1057602472, -1119016562, 1028769031, -1101412503) + + W(8, -1121960162, -1119195008, -1113085858, -1108783853) + + W(9, -1101947316, -1105922885, -1119106133, -1119408565) + + W(10, -1127721129, -1143597573, 1010259715, 1016497897) + + W(11, -1142073349, 1034450025, -1113345821, -1121786357); + WS(1050584535, 1068150000); + sum1 = + W(0, -1115260532, 1010165469, -1112895478, 1003680362) + W(1, 1038151522, -1113655284, -1132334157, -1116596385) + + W(2, 1034938395, -1115625742, 1043729220, 1046586119) + W(3, -1096382429, 1036369142, -1110737789, 1020311663) + + W(4, 1038987194, 1047833470, -1102791097, 1053057826) + W(5, 1060286702, 1031810112, 1033665067, 1045919272) + + W(6, -1117589351, -1104527215, 1041775364, -1092950416) + + W(7, -1085160955, -1099697720, -1112334730, -1112800213) + + W(8, -1115913835, -1116485512, 1035117570, 1032942189) + + W(9, 1051158381, -1109351189, 1035094123, -1113211075) + + W(10, 1013034091, 1026352434, -1123123343, -1126776150) + + W(11, -1118268740, 992197073, 1007592182, -1128843070); + sum2 = + W(0, 1027450901, 999052617, 998486633, -1098069390) + W(1, 1050271656, -1144500281, 1034115809, -1111215245) + + W(2, -1130117632, 1037992777, 1035832253, 1058826704) + W(3, -1075559944, 1051052067, -1104035684, 1041076323) + + W(4, -1112751331, 1031860941, -1100828230, 1066377631) + + W(5, 1059983898, -1089703828, 1037142243, -1116488683) + + W(6, 1023748039, -1148181737, -1107268336, 1031601953) + + W(7, -1105105514, 1006070601, 1031181077, -1123703892) + + W(8, -1129067048, 1031574877, -1108371420, -1110435251) + + W(9, -1111123861, -1135053221, 1013422629, 994427506) + W(10, 1020055214, -1123191999, -1176849298, 992240434) + + W(11, 1022099846, -1122685646, -1154287506, 1025316569); + WS(1065940278, 1034622363); + sum1 = + W(0, -1130882111, 1018945629, 1038546130, -1109081570) + W(1, -1116833467, 1027338207, -1118439678, 1023675896) + + W(2, 1042781009, -1105035701, 1037583679, -1089961211) + + W(3, -1106689964, 1025705412, -1110899849, 1000818681) + W(4, 1048563851, 1039562393, 1050832116, 1065150616) + + W(5, 1063013875, 1053064399, 1043578884, 1052844651) + + W(6, -1106282856, -1093168999, -1106056839, -1081267407) + + W(7, -1093571617, -1098826451, -1098971955, -1103816344) + + W(8, 991352096, -1128564863, 1040441019, -1125259845) + W(9, 1037655800, 1032573560, -1126802554, -1174556270) + + W(10, -1132163876, -1146137419, 1034240611, -1103163033) + + W(11, 1014989293, -1132178841, -1140385876, -1124029942); + sum2 = W(0, 1042045232, -1100582068, 1051353130, -1098661297) + + W(1, 1033093932, 1041505758, -1111611431, 1036803556) + + W(2, 1025081894, 1047796551, -1135649915, -1083821047) + + W(3, -1113123121, -1102707738, 1042774566, -1119704488) + + W(4, -1120251562, -1130142714, -1091982339, 1066411068) + + W(5, -1108263063, -1116829613, 1029093314, -1105374597) + + W(6, -1098032480, 1051286467, -1088973774, 1066192686) + + W(7, -1096817778, -1124748705, -1115318079, 1039210409) + + W(8, -1122897683, -1114446743, 1048662352, -1098476628) + + W(9, 1043244089, -1105512115, 1039955893, -1115075522) + + W(10, -1131598407, 1028094206, -1114986235, 1035402858) + + W(11, -1113670703, 1040252082, -1113507172, -1136180467); + WS(1056991468, -1087030746); + sum1 = + W(0, -1123211458, 1040392564, -1107179074, 1032323379) + W(1, -1112698351, -1113154609, 1035059984, -1117349658) + + W(2, -1104816506, -1123900599, -1097679367, 1050060447) + + W(3, -1113372906, -1118981766, 1036804874, -1117759553) + + W(4, -1105628499, 1035944426, -1092912076, 1043269081) + + W(5, -1098944114, -1111852653, 1030188977, -1099907470) + W(6, 1049039296, 1031104918, 1058030045, 1044634502) + + W(7, 1032247835, 1043769326, 1035537687, 1048904265) + W(8, -1110865210, 1038992913, -1092536332, 1052766173) + + W(9, -1101773536, -1111088882, 1017337855, -1120093295) + + W(10, -1119944116, 1019968535, -1113705364, 1024115141) + + W(11, 1016666555, -1113641643, 1034739876, -1119368937); + sum2 = W(0, 1038207100, 1055370704, 1017023961, -1102268135) + + W(1, -1122475964, 1023693826, 1033230771, -1120372849) + + W(2, -1100024892, -1091918952, -1082116017, -1125190319) + + W(3, 1048656820, -1118701724, 1009208157, 1017593165) + W(4, 1019998746, -1108168198, 1065911065, 1060923636) + + W(5, -1095962080, 1025593786, 1022143062, -1123799386) + + W(6, -1116235844, 1051323790, -1096201210, -1094794141) + + W(7, 1023749917, -1112111831, 1028403793, -1116942925) + W(8, 1028590129, 1017416940, 1032160655, 1048662690) + + W(9, 1005611185, -1121410776, -1131420642, 1018425448) + + W(10, -1132429552, -1131674494, -1107738152, -1115273448) + + W(11, -1120617509, 1032669658, 1007544955, 990755743); + WS(1065408790, 1036624080); + sum1 = W(0, 1031689276, -1118902603, -1130392064, -1130349122) + + W(1, -1124855342, -1120117724, 1032115325, -1114562026) + + W(2, -1095782030, 1036811736, -1106389326, -1116251760) + + W(3, 1016267643, -1116551776, 1029730958, -1110739566) + + W(4, 1040908418, -1099523235, -1100633526, -1088986706) + + W(5, -1097124896, -1093812931, -1102530619, -1093706094) + + W(6, 1044865119, 1048657234, 1056569877, 1059819669) + W(7, 1057017186, 1059687187, 1046000250, 1058243684) + + W(8, 1010781241, -1114118270, -1105362262, -1109527143) + + W(9, -1101014271, -1105256235, 1028862634, -1118348593) + + W(10, -1124793181, -1132122507, 1010247305, 1021657598) + + W(11, -1147394236, 1019294354, -1131137795, 1010280523); + sum2 = + W(0, 1008018405, -1131603511, 988821927, -1126539767) + W(1, 1029044494, -1154431236, -1144594818, -1134541747) + + W(2, 1028827850, -1115409151, 1044469435, -1105522597) + W(3, 1031515550, 1029512486, -1118759177, 1017115388) + + W(4, 1048442625, -1128423353, -1144038842, -1080807551) + + W(5, -1100063920, 1047735839, 1049631559, 1040817807) + W(6, 1022982076, 976919119, -1106419532, -1069309764) + + W(7, -1077869698, 1057375362, 1074645104, 1074183528) + W(8, 1015261982, -1110305290, 1025710382, -1128896061) + + W(9, 1034212953, 1047372093, -1121013958, 1028992794) + + W(10, 1023643398, -1142661114, 1024565032, -1156968388) + + W(11, -1108906576, 1040986767, -1109343217, 1013497529); + WS(-1134496480, -1135409208); + sum1 = + W(0, 1012816882, -1131032216, 1015795842, 1017081284) + W(1, 1032335660, -1114842158, -1121133019, 1020892138) + + W(2, -1118561391, 1023949458, 1025102291, -1098855254) + + W(3, -1109465919, 1018180354, 1039697400, -1121622748) + + W(4, -1172095542, -1135187513, -1122643706, 1062211658) + + W(5, 1053298610, 1055550179, -1104417019, 1042088653) + W(6, 1042727039, -1111514505, 1044113108, -1088869665) + + W(7, -1090758606, -1097017583, 1024023145, -1131309822) + + W(8, -1101252924, 940438235, -1099247502, 1037442288) + W(9, 1036453998, 1038283500, -1127849669, -1111728213) + + W(10, 1038725048, -1118209095, 1039141810, -1110868352) + + W(11, -1124785329, -1123999226, 984058180, 985363057); + sum2 = + W(0, 1027844464, -1120185020, 1033301171, -1142892826) + W(1, -1130897478, -1112219415, 1008317678, -1123809910) + + W(2, -1123756397, 1023474305, -1120807818, 1045181163) + + W(3, 1050514681, -1148494098, -1124897908, -1112292483) + + W(4, -1109401613, 1009596088, -1105464515, -1069578385) + + W(5, 1080182479, 1033916695, -1111316356, 1038900121) + + W(6, -1123223808, 1048386893, -1117881742, -1079085438) + + W(7, 1057107198, -1155326684, 1015620530, -1138311341) + W(8, 1035039619, -1119575836, 1029989604, 1032607110) + + W(9, -1113328530, -1143648562, 1010222764, -1144599918) + + W(10, -1128717516, -1197272208, -1129559846, 1013531724) + + W(11, 1001587842, -1126181730, 1011089790, -1129634008); + WS(1063953772, 1006246735); + sum1 = + W(0, -1129265445, -1103221880, -1129858606, -1113052252) + W(1, 970168057, 1035503657, -1114702297, 1026538155) + + W(2, 1023929568, -1113408580, -1152025615, -1124775629) + + W(3, -1116248388, -1112901548, -1114551939, 1012918985) + W(4, 1050480356, 1053092565, 1054804888, 1058637767) + + W(5, 1061520345, 1052655373, 1043614208, 1053852345) + + W(6, -1094701786, -1092563365, -1099755687, -1081976315) + + W(7, -1085039957, -1106127939, -1104465584, -1094975651) + + W(8, 1037312131, -1114954585, 1041973195, 1043978961) + W(9, -1109284733, 1037644971, 1036504960, -1153236991) + + W(10, 1034664643, 1012249970, 1007014628, 1034276907) + + W(11, -1121545966, 1031841028, -1122618723, 1015394179); + sum2 = W(0, -1108119214, -1096454441, -1110533412, -1119694402) + + W(1, -1127729168, -1110958880, 1023031785, -1130979192) + + W(2, -1098605940, 1047215192, -1103381544, -1128298432) + + W(3, -1115663656, -1114162078, 1023974294, -1102917557) + + W(4, 1035846227, -1114546522, 1042040175, 1054173861) + W(5, -1103681801, 1035812038, 1020649501, 1043843622) + + W(6, 1036002502, 1051355228, -1102267339, 1055154953) + W(7, 1052945690, -1097546689, 1033369183, 1033695483) + + W(8, -1110269476, 1034086715, -1102892813, 1033662351) + + W(9, 1018137856, -1135158408, 998437331, -1106336801) + + W(10, -1115932080, 1019597651, -1130886478, -1108620470) + + W(11, 1033484697, -1106956606, 1029836288, 1026224246); + WS(1049960663, -1098656338); + sum1 = + W(0, -1129270334, 1010743226, -1116543140, 1036165346) + W(1, 1026370235, 1026933359, 1009750007, -1164389112) + + W(2, -1097905355, 1038429850, 1026505899, -1111354490) + W(3, 1012909508, 1034045586, -1139624135, 1033987231) + + W(4, -1106458043, -1094238830, -1084542826, -1093117384) + + W(5, -1096055524, -1093158415, -1113672231, -1098353419) + + W(6, 1053746044, 1050579558, 1060156652, 1065413702) + W(7, -1110641506, -1140159326, -1153271990, 1046556600) + + W(8, -1153859324, 1008020477, -1101873487, -1122984978) + + W(9, -1132001470, 1048347588, 1027525597, -1124973593) + + W(10, -1120671826, 1036488405, 1033306211, 1035776465) + + W(11, 1015184758, -1115507932, 1024719161, 1037016731); + sum2 = W(0, 1007202997, 1038605814, -1111184485, -1097778206) + + W(1, -1107862779, -1126800416, -1122928880, -1117103670) + + W(2, -1107138382, -1092409649, -1086536233, 1033842165) + + W(3, 1050500010, -1107791267, 1035028069, 1024236113) + + W(4, -1101985157, -1111080585, 1058778058, 1062592323) + + W(5, -1115904936, 1050870644, -1115500703, -1137706913) + + W(6, -1105521455, 1031799434, -1102287591, 1054124413) + + W(7, -1121603522, 1039372702, 1030823202, 1012044041) + W(8, 998286218, -1127152184, 1043783051, -1107786643) + + W(9, -1104929125, -1103666737, -1123649878, 1042413442) + + W(10, -1135144653, -1138707909, 1018462794, -1134709509) + + W(11, 1016336685, 1035379787, -1120475406, -1119073178); + WS(1045571246, -1101396513); + sum1 = W(0, 1027156266, -1135540454, -1123739311, -1123826896) + + W(1, -1110505155, -1138173481, 1019384210, -1138775820) + + W(2, -1105879340, 1031079263, 1036462891, 1048275928) + W(3, 1040289247, 1033674439, 1035302207, -1109766005) + + W(4, -1095381975, -1094890856, -1086841044, -1081913239) + + W(5, -1087831711, -1095054872, -1100616678, -1096527684) + + W(6, 1052608133, 1052732944, 1059410250, 1057453824) + W(7, 1053535332, 1059217778, 1056369955, 1057086003) + + W(8, 1036979366, -1110160581, 1033495803, 986236332) + W(9, -1121128186, -1117509614, 1002559041, 1031959012) + + W(10, -1116386461, -1124519785, -1113888968, 1015417435) + + W(11, -1106215130, 1025380519, -1132265864, 1009437253); + sum2 = W(0, 1023702857, -1116086512, 1027064225, 1024261913) + W(1, -1115739014, 1023117658, 1011341860, 1016675050) + + W(2, 1025579445, 1041684936, 991232624, 1049263730) + W(3, 994389040, -1121855219, -1121905805, -1114246345) + + W(4, -1107828603, -1100190580, -1127488618, -1121382305) + + W(5, 1049530396, 1041567276, -1097196557, -1125328530) + + W(6, 1030949885, -1127701406, 1063977916, 1081524863) + + W(7, 1074557390, -1078514947, -1067412604, -1072747255) + + W(8, 1030056541, -1101693823, 1026840857, 1032271344) + W(9, 1041139218, 1024600809, -1110332083, 1045882874) + + W(10, -1138881396, 1031874044, -1115029594, -1124133978) + + W(11, 1015337722, -1113125346, -1157524144, 1009384836); + WS(-1091844311, 1066789497); + sum1 = W(0, -1126657090, -1118280816, 1035276339, 1034822275) + + W(1, -1122398524, 1027512241, -1105911320, 1036073133) + + W(2, -1121934839, -1104655157, 1036892560, 1041291861) + + W(3, 1016354085, -1100932977, -1113195754, -1137558275) + + W(4, 1053346558, -1150447404, 1061301148, 1054539137) + W(5, 1061579109, 1052329175, 1010492736, 1052092547) + + W(6, -1094816004, -1096277599, -1095276499, -1085328499) + + W(7, -1094948141, -1090319102, -1109759879, -1096198988) + + W(8, -1134360567, 1026981217, 1035239934, -1132053288) + + W(9, 1041572288, -1138204721, 1026669711, -1122862318) + + W(10, -1196074864, 1030410603, -1117233544, 1032377106) + + W(11, -1113536398, 1028691858, 1016723124, 1004385343); + sum2 = + W(0, -1130924015, 976327856, 1006816288, 1032746467) + W(1, -1109752024, 1030725620, -1118656485, 1013150272) + + W(2, 1022730742, -1107671502, -1102494949, -1063244855) + + W(3, -1117144683, -1118200508, 1023153636, 1019358625) + W(4, -1156842543, 1030703882, 1050591037, 1084548157) + + W(5, 1041535529, 1030607420, -1117378226, 1000111712) + W(6, 1039023607, -1100688592, 1039287442, -1091568120) + + W(7, 1042495948, -1100624377, 1033109796, -1135441064) + + W(8, -1147026848, -1113451109, 1030233046, 1034384724) + W(9, 1024850879, 1037336210, -1123040622, 1025293966) + + W(10, 1024455841, 1015244616, -1121583866, 1032201434) + + W(11, -1112832588, 984232072, 1003511712, -1124806470); + WS(1062375148, 982896070); + sum1 = W(0, 1041356906, -1141741512, -1123251246, -1121772047) + + W(1, -1138991637, -1156317853, -1161666833, 1011726474) + + W(2, -1092346958, 1042371688, -1107640316, 1037798623) + + W(3, 1035668286, 1026956462, 1016120313, -1115727625) + + W(4, -1098857839, -1100631199, -1093348349, -1081710855) + + W(5, -1084667272, -1088318752, -1093940123, -1084607638) + + W(6, 1052252533, 1048733184, 1057292943, 1063732968) + W(7, 1061645578, 1059891330, 1051947731, 1059995535) + + W(8, 1050501706, -1126563284, 1043133649, -1111246422) + + W(9, -1112114769, -1106214611, -1109426316, -1190508359) + + W(10, -1165233042, -1134397903, 1008063554, 1032268279) + + W(11, 988108978, 1037368483, -1138661789, 1040598420); + sum2 = + W(0, -1087149149, -1111774062, -1122195827, 1039123010) + W(1, 1035630599, 1055099903, 1042757145, 1060125526) + + W(2, -1081699534, 1035582541, -1087576154, -1082404199) + + W(3, -1111170675, -1119441337, -1097950160, -1098859385) + + W(4, 1075074133, 1078981756, 1080077903, -1086433828) + + W(5, -1073636555, -1080599553, -1080969229, -1075310961) + + W(6, 1074122230, 1067224402, 1066418532, -1082120864) + + W(7, -1078722695, -1083892454, 1038753292, -1085615144) + + W(8, 972078363, -1092574897, -1094572008, -1101495601) + W(9, -1124013413, 1051613332, 1020974546, 1056506110) + + W(10, -1097056662, 1045566397, 1049096521, -1109136155) + + W(11, 1034655424, 1023040696, 1027077203, -1129411423); + WS(-1073733835, -1077508759); + sum1 = W(0, 1004911573, 999004239, 1008908284, 1032559088) + W(1, -1123346838, 1021646261, -1114632613, 1036339578) + + W(2, 1043185540, -1120942224, -1136904037, -1114184455) + + W(3, -1112686066, 1032555200, 1005792618, 1040940927) + W(4, 1056421033, 1054442442, 1059517190, 1066321721) + + W(5, 1063474337, 1052962698, 1044159722, 992460573) + + W(6, -1084678800, -1094509130, -1087084867, -1089134469) + + W(7, -1088382556, -1089853747, -1092765869, -1093589176) + + W(8, 1025064353, 1020812405, -1149079216, -1127746091) + + W(9, 1027145139, -1126936380, 1027348136, -1126618160) + + W(10, 1009545653, 1022285001, -1118599969, 1007063543) + + W(11, -1149726053, 1015641511, 1023282088, 981480241); + sum2 = W(0, 1016441223, -1123853806, 1034027700, 1016212045) + W(1, 1025170155, 1030169991, 1024326153, 1020004331) + + W(2, 1040812131, -1136381199, -1121560396, -1100123577) + + W(3, -1100550013, -1105283667, 1029487024, -1102325825) + + W(4, -1091551076, 1020713637, -1092123318, 1069362941) + + W(5, 1050653373, 1020763615, -1121149602, -1094990109) + + W(6, -1083270432, -1123741604, 1057043619, 1065772322) + + W(7, 1058297195, -1091989226, -1092470882, -1106605421) + + W(8, 1054198423, -1108424657, -1115262538, -1092584961) + + W(9, -1121642970, 1029919048, 1039520200, 1026718342) + + W(10, 1001473573, -1110846033, 1008724259, -1120589670) + + W(11, 1040111989, -1104422780, 1027795061, -1115881428); + WS(-1083424620, -1083774644); + sum1 = W(0, 1025997707, 995558823, -1132669247, 1015677409) + + W(1, -1113673299, -1146244698, 1028038186, -1119640804) + + W(2, -1107578509, -1112668224, -1109754213, 1048694360) + + W(3, -1099923633, 1044345284, -1117921566, 1030760459) + + W(4, -1116799831, -1098861996, -1099316274, -1079768279) + + W(5, -1093459346, -1098475085, -1113981277, -1105585281) + + W(6, 1051882641, 1045883512, 1060321715, 1059777970) + W(7, 1064776049, 1036566882, 1042756750, 1033934183) + + W(8, -1114213177, -1127946135, -1106758402, 1040276008) + + W(9, -1108196294, 1008462282, -1117078569, -1130472177) + + W(10, -1140244728, -1116793621, -1131243108, -1129876893) + + W(11, -1167573435, 1020492609, 1015090721, -1176961296); + sum2 = + W(0, -1120400279, 1006239848, 1033967902, -1138878316) + W(1, 1012842788, -1135736068, 1028356101, -1120658117) + + W(2, -1120118931, -1112851025, -1101370370, 1037781128) + + W(3, -1100258045, 1002346920, -1112182778, 995892591) + W(4, -1114073896, 1016448074, 1047917593, -1113781647) + + W(5, 1045546113, -1114666278, 1002645288, -1115876966) + W(6, 1044457775, -1095953339, 1065044078, 1069276419) + + W(7, -1075186216, 1035717470, -1097061266, 1049663208) + + W(8, 1036833596, -1112980685, -1119396143, -1110041421) + + W(9, -1132903556, -1120766465, 1015304410, -1123504973) + + W(10, -1137956564, -1122147671, 987310366, -1187381491) + + W(11, -1131003226, 1029533357, 1003776904, -1150885167); + WS(1055938007, -1113494123); + sum1 = W(0, 1002908945, 1005413771, 1025040167, 1016082831) + W(1, -1114102015, 1024456642, -1121270129, 1006080470) + + W(2, 1028210664, -1117675639, 1040926954, -1118443670) + W(3, 1040854289, 1046508410, 1019764906, 1029150738) + + W(4, -1133494457, 1035958810, 1041989700, -1101702418) + + W(5, -1098047787, -1093641223, -1111985289, -1102768646) + + W(6, -1135464580, -1095959351, -1087581816, -1121242873) + + W(7, 1048632765, 1052220147, 1025236686, 1042463785) + W(8, 1008466835, 1040264713, 1048292991, 1034336763) + + W(9, 1043249233, -1183964258, 1036425152, -1131127167) + + W(10, -1131269290, 1035203030, -1116685065, -1117662885) + + W(11, -1132796663, -1144182458, -1137681823, 1031884341); + sum2 = W(0, -1148833389, 1034952858, -1131645803, 1044582101) + + W(1, -1100126968, 1041029558, -1101188496, 1038559400) + + W(2, -1103329032, -1114547380, -1099667252, 1046239383) + + W(3, 1038848953, 1023994452, 1040593566, -1104909140) + W(4, 1039527980, 1019372679, 1017530647, -1095619156) + + W(5, 1053527025, -1122648352, 1048348234, 1034383711) + W(6, 1036545465, -1111710878, 1040867364, 1074171367) + + W(7, -1072583834, -1089175055, -1110512168, 1048723535) + + W(8, -1140514638, 1042452640, 1048394746, 1046568773) + + W(9, -1104419698, -1095650184, -1113024814, -1105559093) + + W(10, -1111163586, 1025974311, -1102843950, 1030125627) + + W(11, 1045672947, 1033990232, 1039972451, -1123744674); + WS(1054254551, 1014537956); + sum1 = W(0, -1121039367, 1018675757, 1022410264, 1028321069) + W(1, 1032078897, 1035879540, 998902321, 1035017184) + + W(2, 1035774671, -1130008697, 1030747950, -1107714718) + + W(3, -1097508911, -1100889147, -1111836109, -1112194220) + + W(4, -1112598570, 1008273253, -1093495169, 1061840455) + W(5, 1062686448, 1058139578, 1050284599, 1050365160) + + W(6, -1132150029, -1122251932, 1041075666, -1100441032) + + W(7, -1084814013, -1090418276, -1098262366, -1104639554) + + W(8, -1109088445, -1109769252, -1103842329, -1105690119) + + W(9, 1049125597, 1045096257, 1018266794, -1137878196) + + W(10, 1040615567, 1003469451, 1040565356, -1126038165) + + W(11, -1115375938, -1105705223, 1030169589, -1123088958); + sum2 = + W(0, 1011204314, -1151906447, -1132982086, 1015334447) + W(1, -1126682819, 1031867988, -1118412630, 1019402007) + + W(2, -1130282463, 1008292026, -1113509623, -1118416042) + + W(3, 1010705513, 1015562377, 1033032763, -1171211484) + W(4, -1119643322, 1036989282, 1023195387, 1031871454) + + W(5, 1036745108, -1105750465, 1024841875, -1132045005) + W(6, 1040567914, -1093705400, 1046529995, 1053486166) + + W(7, 1053627848, -1097180463, 1042871577, -1098551422) + W(8, 1058038651, 1048788001, 1069827352, 1061661513) + + W(9, -1076895592, -1083563182, -1102862368, -1093245544) + + W(10, -1092395975, 1041384381, -1082188469, -1087135929) + + W(11, 1050130047, 1062283738, -1108358221, 1058279550); + WS(1057163500, -1105993220); + sum1 = + W(0, -1117201821, -1121502764, 1035599249, -1105708961) + W(1, 1039647740, -1100922079, 1009912816, -1136735831) + + W(2, -1127293622, 1045431725, -1100979748, 1049617722) + W(3, -1110465655, 1042176840, 1018827936, 1001114727) + + W(4, 1046510122, -1097880614, 1061106926, -1099817760) + + W(5, -1119635500, -1109566363, -1147860066, 1037588842) + + W(6, -1119279260, 1032698105, -1097882833, -1088567265) + + W(7, 1058361641, -1097567811, 1036626725, -1111863248) + + W(8, -1111026820, 1044535287, -1095331735, 1053288920) + W(9, -1096979757, 1050620014, 999121112, 1037098289) + + W(10, 1018908832, -1110256660, 1024080836, -1117593824) + + W(11, 1027464313, -1113789929, -1159432152, 1012647057); + sum2 = W(0, 1032576295, -1123342042, -1102694716, -1132748357) + + W(1, -1114775905, 1033817148, 942234367, -1109637447) + W(2, -1107895050, 1039805910, 1052615154, 1027679479) + + W(3, -1183264704, -1127944484, 1017960690, -1117652574) + + W(4, 1016729470, -1102939450, 1050355566, 1057040101) + + W(5, 1041423933, -1098800812, 1027447559, -1113353192) + + W(6, 1038099246, -1120156902, -1100691281, -1098318717) + + W(7, 1028185223, 1043085543, -1113846578, 1025700351) + W(8, -1122727069, -1128970625, 1046407453, 997134844) + + W(9, -1094682594, 1023808656, 1033077879, -1136470782) + + W(10, -1114110583, -1124467804, -1110408836, 1035923921) + + W(11, 1040728790, -1104134547, -1111084310, 1024627457); + WS(1069430838, -1077807532); + sum1 = W(0, -1123171840, -1123870031, 1012978707, 1026708965) + + W(1, 1032241614, -1110089696, 1013657340, -1120091289) + + W(2, 926704291, 1035074891, -1102713036, -1094582899) + + W(3, -1104095024, -1131766218, -1123449267, -1126388352) + + W(4, 1028287308, 1030377682, 1060355037, 1064667974) + W(5, 1044883747, 1023923417, 1023074654, -1134219998) + + W(6, 1024652287, -1104443381, 1049114747, 1049334299) + + W(7, -1097738289, -1112895132, -1133494194, -1105084689) + + W(8, -1121358347, -1137933441, -1102473935, -1095444677) + + W(9, -1112982200, -1115022670, -1123735342, -1125652635) + + W(10, -1124249574, -1123723658, -1124350158, -1155502135) + + W(11, 996719039, -1114119663, -1129702439, -1115595936); + sum2 = W(0, -1176390445, 1032791968, -1107216374, 1039748018) + + W(1, -1104895086, 1038133354, -1123666927, 1013658503) + + W(2, -1120278537, -1112747779, 1039675898, 1025598156) + + W(3, 1049256042, -1105015824, 1024448666, -1110290749) + + W(4, -1122813389, -1119517685, 1061000499, 1059349510) + + W(5, -1087513873, 1030867610, -1108606159, 1029186614) + W(6, 1041053822, 1042596904, 1032712030, 1055783944) + + W(7, -1099074048, -1084050573, 1040697602, -1106116496) + + W(8, 1022193201, -1132233097, 1037891533, 1009116037) + + W(9, -1107732985, -1101549778, 1022775251, -1106656496) + + W(10, -1120846771, -1150821318, -1120324549, 1006903493) + + W(11, -1117395279, 1034659541, -1115429715, 1028618234); + WS(-1107817820, -1083770194); + sum1 = W(0, 1033900161, -1129878691, -1129439246, -1131338575) + + W(1, 1015365308, -1128788741, 1034359265, 1024049691) + W(2, -1103638846, 1019165999, 1023785569, 1041664750) + + W(3, 1007653393, 1026305479, 1029035509, -1111132977) + + W(4, -1091016130, -1105602789, -1097747004, -1076846267) + + W(5, -1089110318, -1101714279, -1095981534, -1089968180) + + W(6, 1053216608, 1056291523, 1057499003, 1059566574) + W(7, 1057822998, 1057215924, 1051239662, 1055184137) + + W(8, 1041711845, -1114138529, -1107402783, 1043057225) + + W(9, -1097881621, -1104471663, 1029122288, 1032118072) + + W(10, 1032252891, 1005244003, -1129556050, 1037921123) + + W(11, -1126092988, 1041959808, 1025745172, 1027268745); + sum2 = + W(0, -1108694073, 1045019344, -1102469381, 1037993179) + W(1, -1102369829, 1036817135, -1116848662, -1123721670) + + W(2, 1017327589, 1034893158, -1100848825, -1146521528) + + W(3, -1136622056, -1122158468, 1036214504, -1109074807) + + W(4, 1040440313, 1020539438, -1091152854, 1062010427) + W(5, 1017850914, -1095264144, 1045324437, 1047380031) + + W(6, 1037365040, 1043169414, -1179472005, 1059039676) + W(7, 1038217042, -1119445175, 1048192551, -1115655698) + + W(8, -1097181356, 1044490295, -1098856738, -1105305386) + + W(9, -1109601321, -1098482637, 1034613852, -1105047780) + + W(10, -1123414724, -1142407924, -1124070306, -1103656828) + + W(11, 1025666621, -1105116469, 1027457629, -1121369495); + WS(-1121449656, -1084978473); + sum1 = W(0, 1045775831, 1021114964, 1036495664, 1032769888) + W(1, -1134400474, 1043049231, -1112792957, 1043659613) + + W(2, -1099605930, 1028648016, -1113195701, -1106810756) + + W(3, 1033523886, -1109837568, 1039225431, -1097941829) + + W(4, -1086366198, -1100836955, -1085601027, -1088747404) + + W(5, -1083936243, -1087596695, -1102874546, -1088234269) + + W(6, 1054843763, 1049277222, 1059006957, 1062277595) + W(7, 1060137529, 1057384673, 1045407794, 1055835664) + + W(8, 1044622847, -1121703995, 1039106531, -1122091582) + + W(9, -1102614275, 1011169881, -1118753961, 1048366450) + + W(10, 1025802254, -1116658981, -1138285574, 1043922200) + + W(11, 1020367213, 1042132230, -1132370673, 1039326553); + sum2 = W(0, 1027272459, -1122536322, -1115599503, -1109953333) + + W(1, -1101108438, -1160853572, -1125199072, -1105305527) + + W(2, -1105747151, -1103046200, 1043039962, 1029006014) + W(3, 1049577028, 1035298821, 1043042004, 1053260045) + + W(4, 1058588727, 1025881493, 1025318337, -1079698237) + + W(5, -1116019666, 1050247454, -1104971590, -1094505007) + + W(6, 1050147388, 1054947781, 1058930692, -1072349877) + + W(7, -1069493839, -1072151832, -1072978341, -1072202568) + + W(8, -1089616072, -1098050255, 1020160348, 1078384354) + W(9, 1078006681, 1075828182, 1074567753, 1072013431) + + W(10, 1040922163, -1157391810, -1097372724, 1029236731) + + W(11, -1111844849, -1095907204, 1046502148, 1037030073); + WS(-1076413686, 1050217089); + sum1 = + W(0, 1037264211, -1140703701, 1010880426, -1119658251) + W(1, -1116247362, 1031154598, -1125273098, -1131976189) + + W(2, -1103937453, 981259974, -1126374874, 1042459879) + W(3, -1115463620, 1040532067, -1110800684, 1020764716) + + W(4, 1032015257, 1015092336, -1117441203, -1095655209) + + W(5, -1089060935, -1097693400, -1116339093, -1115082479) + + W(6, 1043075881, -1110472573, -1140207123, 1058975156) + W(7, 1059127610, 1049753500, 1026525586, 1038259412) + + W(8, -1122840185, -1115654547, -1121072785, 1016890098) + + W(9, -1095200233, 1024224509, -1116764924, 1027865572) + + W(10, -1144808135, -1126940256, -1127053849, 1040888602) + + W(11, 1021095812, 990976051, -1118091276, 1016317274); + sum2 = + W(0, 1057721492, 1028643349, 1060288138, 1059252612) + W(1, -1088865108, -1086775589, -1123025940, -1093527126) + + W(2, -1087030854, -1118716844, -1080168014, -1080273709) + + W(3, 1065839894, 1065802380, 1052992874, 1051057083) + W(4, -1098169441, 1050324940, 1016748144, -1095791676) + + W(5, 1025332663, 1048744751, -1103675095, 1047968677) + W(6, 976966634, 1023362490, -1115171624, 1047291739) + + W(7, -1136639192, -1124716700, 1045176631, -1122660154) + + W(8, 1023517212, -1141117066, 1023617033, -1099550108) + W(9, 1041241265, 1003901823, -1105682362, 1020688386) + + W(10, 1010046217, 1027499437, -1115470708, 1034407868) + + W(11, -1116997486, -1138422172, 1028187249, -1121332252); + WS(1066004950, -1130673709); + sum1 = W(0, -1116101174, -1143212072, -1110468285, -1115338446) + + W(1, -1108483849, 1016794668, -1129241385, 1013476922) + + W(2, 1031654568, -1115365365, 1037471350, 1044807147) + W(3, 1036857292, 1049500535, -1116011560, 1018991691) + + W(4, -1088044468, -1112634148, -1095270314, -1086079158) + + W(5, -1084922887, -1089120237, -1096691029, -1092328502) + + W(6, 1052873383, 1046315731, 1053790292, 1057379349) + W(7, 1062574892, 1059566963, 1043154809, 1052540519) + + W(8, 1041536990, 1018103788, 1044233532, -1114923292) + + W(9, -1111082699, -1112535089, 1013224078, 1045681849) + + W(10, -1136500130, -1125315968, -1143909091, 1020745202) + + W(11, 1016815907, -1120057699, 1009076749, -1111113254); + sum2 = W(0, 1049010278, 1037489391, -1120558834, -1115441721) + W(1, -1107156556, 992841734, 1011333881, 1042878013) + + W(2, 1059657103, -1120407882, 1065561208, 1065354919) + + W(3, 1033902725, -1090673211, -1082019922, -1076779646) + + W(4, -1081123115, -1091075041, -1094693777, -1086070082) + + W(5, 1048072918, 1058489027, 1067731839, 1065573397) + + W(6, -1096253478, 1044030983, -1088412254, -1088804298) + + W(7, 1059822273, 1048768363, -1110539158, 1056386736) + W(8, 1022187211, -1110198086, 1026065263, 979808314) + + W(9, 1015305348, -1103664360, -1102578162, 1041735451) + + W(10, 1042763848, -1123055347, 1010460099, 1037997843) + + W(11, -1106407542, -1127526979, 1046102351, -1098151359); + WS(1047691950, 1051513987); + sum1 = + W(0, 1022367675, 1023115866, 1025923810, -1121417283) + W(1, -1135269348, -1123344254, -1153116613, -1140725337) + + W(2, -1106171493, -1148261013, -1110614526, 1048684730) + W(3, 1043654816, 1037977761, 1018209505, 1012206837) + + W(4, 1047705594, -1138130365, -1138856554, -1092459648) + + W(5, -1088007377, -1117423671, -1145599589, -1130685354) + + W(6, -1115678670, 1039875161, -1098498099, 1059839847) + + W(7, 1055365231, -1110502987, -1108540005, 1013205632) + + W(8, -1110486184, 1038987295, -1111291916, 1037143648) + + W(9, -1116325726, 1038170008, 1024662707, -1106962771) + + W(10, -1119968336, -1146539325, 1031736900, -1113038256) + + W(11, 999460043, -1124410005, -1170140361, 1022378313); + sum2 = W(0, -1153456458, -1134949160, 976205336, -1107040555) + + W(1, 1031938263, -1148458760, -1128058621, 1018102832) + + W(2, 1034638423, -1132037434, 1047224115, -1111241686) + + W(3, -1102088763, -1109506803, -1119972538, -1122339703) + + W(4, 1026219462, -1110235102, 1041432515, -1098519781) + W(5, 1016119332, 1045200456, 1026438298, 1012163112) + + W(6, -1118314715, 1026400860, -1098555575, 1048945857) + W(7, 1066956313, 1038098069, 1020127420, 1026446934) + + W(8, -1119545213, 1033998497, 1038320636, 1042089138) + + W(9, -1089428981, -1088147582, -1103340362, -1114149729) + + W(10, 1020158000, -1129681397, 1034767622, -1100590807) + + W(11, -1131704894, 1033722488, 1036980701, -1126638758); + WS(1065730166, 1060778308); + sum1 = + W(0, -1109508860, -1155017835, -1113193022, 1018445113) + + W(1, -1116526739, 1027367671, -1113248331, -1116098736) + + W(2, -1157499671, -1122491724, -1101845990, -1123710636) + + W(3, 1037579925, -1109701838, 1024052458, -1155438706) + W(4, 1041425921, 1039607429, 1059840031, -1103201623) + + W(5, -1086536234, -1119532176, -1131587533, -1135484603) + + W(6, 999270323, -1108915110, -1093896302, 1034241095) + W(7, 1065443257, -1115473720, 1023021127, -1169161851) + + W(8, 1009856488, 1019270959, 1048952862, -1124851701) + W(9, -1100984082, 1042597895, 988759974, -1109972554) + + W(10, -1122732458, 1019165315, -1112299503, 1031136123) + + W(11, -1130156262, -1112137490, 1035869970, 1026435614); + sum2 = + W(0, -1117967445, 1040648348, -1112630617, 1042642774) + W(1, -1134880336, -1108734545, 1042723069, -1110919553) + + W(2, 1023936292, 1027011316, -1104815876, 1032485416) + W(3, -1097739621, 1008252288, 1027781874, 1017816544) + + W(4, 1048994422, -1081925843, 1069895227, -1079286264) + + W(5, 1067803238, -1093635474, 1039731885, -1113045602) + + W(6, 1036573845, -1090561506, 1061477130, -1076993331) + + W(7, 1074358700, -1082750513, -1152591489, 1040942069) + W(8, 1025878262, -1121546197, 1006994724, 1017402756) + + W(9, -1094759274, -1124517798, -1128233335, 1015619872) + + W(10, -1136777476, 1000734832, -1120512530, 1043800887) + + W(11, -1115175264, -1106635673, 1050798200, -1104563368); + WS(1051081815, 1004177576); + sum1 = W(0, -1127006616, 1009183853, 1030806032, 1035550201) + + W(1, -1148492092, 1031890286, -1122950699, 1033051522) + + W(2, 1047378036, -1123222753, 1030530563, -1112167248) + + W(3, -1102096100, -1119963793, -1117414047, -1118879820) + + W(4, 1040477302, -1133548407, 1061394914, 1067589976) + W(5, 1061693295, 1056018877, 1051876647, 1054656608) + + W(6, -1095193921, -1099314072, -1084986498, -1091645900) + + W(7, -1092420382, -1084371172, -1100397982, -1096248194) + + W(8, 984028115, 1033152150, -1134209438, -1104155199) + + W(9, -1126214664, -1117645071, -1115478541, -1098736399) + + W(10, 1015654525, 1000220336, 1016832613, 1007541309) + + W(11, 1020442767, 1016050244, 1015156587, -1120116135); + sum2 = + W(0, 1038737431, -1107492679, 1048033211, -1120313369) + W(1, 1015799927, -1127256174, -1108431753, 1025558434) + + W(2, -1086393652, 1061143042, -1086071985, 1050160290) + W(3, 1036302581, -1118303985, 1030572627, 1049210573) + + W(4, -1122890851, -1087802869, 1050554685, 1074235084) + + W(5, -1097102610, -1080828834, 1051308486, -1086660156) + + W(6, 1062117952, -1089713201, 1059347527, 1068761695) + + W(7, 1051722018, -1090432670, -1096060997, -1089293346) + + W(8, 1031555324, 1047951996, 1026779363, -1098359769) + + W(9, 1043634021, -1093586557, -1093678452, -1099341241) + + W(10, -1105710539, 1021198999, -1103176732, 1024031768) + + W(11, -1105088271, 1023611974, -1113357802, 1042868230); + WS(-1081907798, -1114644056); + sum1 = W(0, 1023775725, -1170980595, 1026005873, -1126334628) + + W(1, -1135823392, -1129163353, -1151688884, -1129940170) + + W(2, -1099395677, -1134709766, -1099241994, -1120114113) + + W(3, 1034531271, -1108351113, 1019473342, -1110039318) + W(4, 1055907631, 1050593936, 1058604967, 1054687330) + + W(5, 1038229186, 1034921283, 1018134455, -1120708196) + + W(6, -1100928459, -1112754945, -1087793848, -1087665246) + + W(7, -1104299004, 1040795209, 1040889398, 1054318790) + W(8, -1119324179, 1032148262, 1009969504, 1038203192) + + W(9, 1028440637, -1104169898, -1106565731, -1107725509) + + W(10, -1109439504, 1017060589, -1128826918, -1138112959) + + W(11, 1038462084, 999099597, 1007377992, 1027667523); + sum2 = + W(0, -1119441358, -1129970231, 1013923693, -1135011770) + W(1, 1015830944, -1115083307, 1009913072, -1117182320) + + W(2, -1109792567, -1120391232, -1103233485, -1103316489) + + W(3, -1104701758, 961266788, -1120471806, 1032096200) + W(4, -1127769135, 1050978947, 1033489482, 1054134565) + + W(5, 1049300223, -1110832497, 1049274561, -1124368769) + W(6, -1107196821, 992017798, -1095646457, 1049668759) + + W(7, 1056508485, 1047186396, -1110580647, -1109467751) + W(8, 1030789742, 1040616080, 1038693473, -1124339641) + + W(9, 1043974386, -1086696217, -1110322035, -1103392519) + + W(10, -1113410550, 1030775978, -1115392393, -1108882435) + + W(11, -1145057435, -1118394614, 1032794842, 1029776437); + WS(1054547415, -1080700728); + sum1 = W(0, -1118959255, 1000886579, -1108938574, 1035589374) + + W(1, -1115953331, -1120209015, -1130255148, -1114831416) + + W(2, -1106813137, 1020745112, 1000629071, -1089948699) + + W(3, -1104374042, -1115354302, -1115551813, -1107411355) + + W(4, -1136179854, 1042266506, -1114102394, 1058763908) + W(5, 1043996152, 1034434840, 1030595334, 1037557447) + + W(6, 1049136707, -1113159542, -1117329322, 1057279043) + + W(7, 1058534976, 1042941412, 1024662339, -1115931546) + + W(8, -1121699838, -1135369949, -1098171678, -1102488395) + + W(9, -1118338951, -1123101368, -1111680098, -1111356186) + + W(10, -1110798213, -1133353861, -1132246652, -1116069530) + + W(11, 1023843187, -1110056358, -1125104281, -1113468791); + sum2 = W(0, -1104438281, 1006975884, -1114845259, -1121830816) + + W(1, 1035922248, -1109021584, 1024387710, -1116080890) + + W(2, 1010047952, 1033813267, -1104617278, 1016682467) + W(3, -1119230330, 1016925125, 1031893944, 1028042677) + + W(4, 1041416137, -1103312430, 1059878226, 1030331467) + + W(5, 1042445877, -1127060444, -1106783001, -1113418625) + + W(6, -1126834256, -1117623020, -1091497753, 1058017528) + + W(7, 1016098448, 1041560361, -1112957506, 1032681483) + + W(8, -1137643420, -1119875938, 1032827109, -1098691452) + + W(9, 1038105687, -1110812698, 1032692112, -1112120334) + + W(10, -1141914640, -1142859168, -1115736166, 1030249718) + + W(11, -1111253212, -1153808400, -1114320479, -1122393388); + WS(1052913623, 1049378679); + sum1 = + W(0, -1122838827, -1140317417, -1125024735, -1119624720) + W(1, -1114511683, 1000517975, 980913767, -1118732996) + + W(2, 1042960148, -1131398976, 1047376245, -1142164473) + W(3, 1052155164, 1026436976, 1040745327, 1037554661) + + W(4, -1086626395, -1096052024, -1085621156, -1082562971) + + W(5, -1087926293, -1091398264, -1096441475, -1088065070) + + W(6, 1057292387, 1051976144, 1057250406, 1065554348) + W(7, 1052193006, 1054127476, 1052669955, 1051850318) + + W(8, 1037967453, -1115050310, 1033197251, -1105683981) + W(9, 1022767638, 1031945184, -1117629257, 1041425290) + + W(10, -1155224026, 1021209962, -1137632582, 1041116332) + + W(11, -1114068531, 1019309136, -1119607996, 1029050594); + sum2 = + W(0, -1128729473, 1026695603, -1122814467, 1038041001) + W(1, -1114364242, 1029245131, -1114309631, -1131614889) + + W(2, -1113890565, -1113695617, 1012493154, 1007226362) + W(3, 1022932917, -1121347205, 1028217359, 1005475605) + + W(4, 1030681183, 1046246583, -1119541727, -1105895610) + W(5, 1013035938, -1109256305, 1037707015, 1018572933) + + W(6, -1116238263, 1032966985, 1063042504, 1066720275) + W(7, 1061802664, 1054635319, 1034508407, -1114817272) + + W(8, 1054381777, 1060417287, 1065151040, 1069419683) + W(9, 1070184946, 1067716376, 1063333907, 1057796391) + + W(10, -1088819879, -1094452995, -1073258706, -1071852139) + + W(11, -1071972143, -1076852924, -1084913408, -1086618913); + WS(-1080205366, 1054586731); + sum1 = W(0, 1028285119, 1034516295, -1114270114, 1028158052) + W(1, -1122816794, 1015275709, 1008209269, 1026827799) + + W(2, -1104231365, -1118125300, -1122896269, 1031422323) + + W(3, 1042423964, -1113275778, 1035567225, -1113291743) + + W(4, -1105384067, -1098317298, -1102251495, -1084181862) + + W(5, -1083878719, -1102645126, -1094749758, -1095155767) + + W(6, 1018315643, 1050987674, 1045079495, 1065887519) + W(7, 1058366107, 1055927247, 1042520123, 1054820368) + + W(8, 1050517144, -1103331232, 1044797187, -1106919576) + + W(9, -1136969466, -1127822612, -1111681950, 1023800268) + + W(10, -1107795033, 1030553289, -1134423957, 1039594145) + + W(11, -1127143669, 1007528111, -1112108944, 1010334934); + sum2 = + W(0, 1018519238, 1015687849, -1123243110, -1154321191) + W(1, -1147705571, -1137574530, 1025499637, -1123293958) + + W(2, 999332143, 1021267259, 1021268373, 1001850235) + W(3, -1122367554, -1149097003, 1028798873, -1120881342) + + W(4, 1041061518, -1105547391, 1034089943, -1097359239) + W(5, 1024181853, 964634475, -1139074510, 1038775557) + + W(6, 1033756160, 1042746528, 1026544409, 1057524548) + W(7, -1128118669, 1033087855, -1129467393, -1131901697) + + W(8, 1071473984, 1066765865, 1067795051, 1059938647) + W(9, -1126584943, -1109247299, 1002507759, -1137941026) + + W(10, -1074851124, -1079967238, -1079279124, -1081222132) + + W(11, 1036308208, 995399503, 1006964374, -1131371017); + WS(1032292188, -1145187004); + sum1 = W(0, -1121091224, -1124591439, -1121198813, 1009701366) + + W(1, 1007646671, 1026658136, 1020346030, -1122546321) + W(2, 1026282854, 1032165735, 1048823572, -1113803654) + + W(3, -1119045850, -1092992668, 1019935126, -1119672872) + + W(4, -1110430066, -1099744666, -1088767505, -1109489438) + + W(5, 1041537816, 1047050636, -1121506375, -1110555528) + W(6, 1041424886, 1053335502, 1045555550, 1044961340) + + W(7, -1098280566, 1043075732, -1107270401, 1047792295) + + W(8, -1163709786, -1097458647, 1040806261, 1038271382) + + W(9, 1040783354, 1037260307, 1000329008, -1120965766) + + W(10, -1130362518, 1026340962, 1020242430, -1136150382) + + W(11, -1117600916, 997023189, -1124456788, 1026784415); + sum2 = + W(0, -1120115102, 1024423113, -1131404107, -1162462122) + W(1, -1116017912, 1035047655, -1146105278, 1011265421) + + W(2, 1027036707, -1119221834, 1034928052, -1115806512) + + W(3, -1133321326, -1101382306, 1029706393, 1023154260) + + W(4, 1028744443, -1122478763, 1034534080, -1117650486) + + W(5, -1108495610, -1105332910, 1038914207, -1113944011) + W(6, 1037973272, 1049287438, 1047707985, 1065549495) + + W(7, -1079218451, -1101676179, -1109811124, -1116861489) + + W(8, -1111409649, -1096539764, 1032305459, 1059270295) + + W(9, 1053073262, -1108544028, -1125217018, 1023662974) + + W(10, -1114092190, 1034186497, -1111870688, -1104163224) + + W(11, -1116841516, 1038206879, 1004549098, 1021319568); + WS(1065238444, 1029810764); + sum1 = W(0, -1117664959, 1033625479, -1121511107, 1026353828) + W(1, -1154871713, 980940712, 1018182811, 1025470327) + + W(2, 1035631050, 998132025, 1046740798, 1049143094) + W(3, 1044265580, 1037075730, 1028486865, -1128624253) + + W(4, -1105662947, -1110409480, -1115600622, -1117398499) + + W(5, -1101622768, -1109487884, -1112099063, -1120941494) + + W(6, 1044376095, 1041425605, -1117226857, -1088462543) + + W(7, -1090821428, -1099167518, 1039936213, -1117520833) + + W(8, -1180059800, 1029921727, 1047596134, 1050498082) + W(9, 1032067983, -1134957959, 1021107193, 1019194744) + + W(10, -1144996235, 1002652741, 986508455, 1031133314) + + W(11, -1119889178, 1038687768, -1122286680, 1032539083); + sum2 = + W(0, 1027136813, -1126103938, -1134438995, 1023841561) + W(1, 1003327910, -1130954514, -1135210803, 994333964) + + W(2, -1109237812, 1029641913, -1108335880, 1016569578) + W(3, 1032415828, 999116870, 1003972614, -1138234067) + + W(4, -1131106290, 1047599733, -1097134429, -1076957808) + + W(5, 1044824498, 1037851390, -1123469057, 1043898273) + W(6, 1028116649, 1041211299, -1092284431, -1103461794) + + W(7, 1073063564, -1111597872, 1033926584, -1115472968) + + W(8, -1115313320, -1119521049, -1100904528, 1040752059) + + W(9, 1045381268, -1112786376, 1034574746, -1137298819) + + W(10, -1132573667, 1022404146, 1016921434, 1027441157) + + W(11, -1106952976, 1036890340, -1110049884, -1139674515); + WS(-1111543132, 1053084187); + sum1 = + W(0, 1033211657, -1116548448, 1043994373, -1112421659) + W(1, 1033458902, 1042588164, -1106715184, 1042106754) + + W(2, 1029336727, -1100473306, 1036113196, -1100606697) + + W(3, -1117467379, 1024205686, -1098518156, 1010302211) + W(4, 1056808676, 1042013257, 1061342772, 1060655799) + + W(5, 1059165410, 1057924951, 1042727104, 1057649845) + + W(6, -1089574549, -1097689349, -1087296749, -1082434496) + + W(7, -1087667391, -1091727169, -1095816456, -1093601226) + + W(8, -1112225370, 999357322, 1042186215, -1100755553) + W(9, 1042539448, 1041805473, -1105873704, -1114265639) + + W(10, 1035575202, -1114691470, 1040681774, 1045312421) + + W(11, 1039236406, 1042949980, -1113675407, 1035739628); + sum2 = + W(0, 1036389819, -1106855677, 1031620401, -1109245550) + W(1, 997072274, -1132619277, -1096737767, 1049991282) + + W(2, 1006854517, -1114095075, 1044374087, -1105124479) + + W(3, -1105277948, 1043212463, -1104242691, -1113287926) + + W(4, 1049279774, -1106885840, 1052305677, 1054861276) + W(5, 1057655059, 1046093411, -1094755169, 1057224822) + + W(6, -1104547241, -1120692701, 1045612807, 1058901209) + W(7, 1054428932, 1034709627, 1054770354, -1094426804) + + W(8, -1140479293, -1096936463, 1051540981, -1092919117) + + W(9, -1129490106, 1034565381, -1092959620, 1030915645) + + W(10, 1032391465, -1098591621, -1115708783, -1081363743) + + W(11, -1090444858, 1017473602, -1098730750, 1044114915); + WS(-1084020140, 1068126260); + sum1 = + W(0, 1006295077, -1137936208, 1034316741, -1149620895) + + W(1, -1141477121, -1110713826, -1119964867, -1159113641) + + W(2, -1156074282, -1103771606, -1104490636, 1038480604) + W(3, 1018703789, 1041233006, 1031629367, 1005339683) + + W(4, 1049019906, 1049395923, 1061237576, 1033598516) + W(5, -1094713154, -1098857908, 1023717620, 1036793310) + + W(6, -1097931528, -1091558419, -1086810430, -1099102215) + + W(7, 1059440745, 1018408058, -1122483505, -1108866660) + W(8, -1117241095, 1040511881, 1040083512, 1044377854) + + W(9, 1044961993, -1146014215, 1038059949, -1129764324) + + W(10, -1123621559, 1023627215, -1108467318, -1115694024) + + W(11, -1120119968, -1145494414, -1122997930, 1007729326); + sum2 = + W(0, 1015277664, -1114453597, 1036987011, -1112547401) + W(1, 1038859445, -1116674795, 983557058, -1127718668) + + W(2, 1025130698, -1134749664, 1032879133, -1134874844) + + W(3, -1112115766, 1018163604, -1128408082, 1003243264) + + W(4, 1027591084, 1033917561, -1126832950, -1114337945) + W(5, 1026538526, -1114930086, 968847366, -1132712652) + + W(6, -1117062871, -1115087054, 1055956188, -1108159193) + + W(7, 1052262112, 1032023389, -1105383385, -1120777676) + + W(8, -1101670769, -1114105716, -1097729634, -1123425664) + + W(9, 1058230624, 1047994902, 1046456588, 1042953566) + + W(10, 1019605180, -1131991228, -1104354242, -1097147867) + + W(11, -1101321392, -1100180330, -1106875572, 1009596296); + WS(1044178094, -1112419455); + sum1 = W(0, -1131480425, 1014469647, 1033460669, -1111970637) + + W(1, 1018155424, 1028778773, -1120733619, 1036392279) + + W(2, 1028145051, -1106904202, -1114114798, -1103921210) + + W(3, -1104772954, -1105642670, -1122369255, -1105836421) + + W(4, 1050373320, 1050878345, 1059597501, 1061375910) + W(5, 1062331213, 1054878354, 1050980308, 1057064179) + + W(6, -1113746394, -1101407218, -1089975999, -1083033439) + + W(7, -1089702156, -1100724747, -1095744476, -1096031599) + + W(8, -1097767254, 1036986154, -1106579916, 1025899544) + + W(9, 1034746546, 1032512190, -1113254377, -1120232930) + + W(10, 1005496122, 1009224698, 1010606701, -1155279741) + + W(11, 1027233072, 1015772371, -1131981388, -1140419566); + sum2 = W(0, 1044058702, -1105410550, 1033398047, -1119512188) + + W(1, -1121578506, -1129503263, 1005093795, -1116781712) + + W(2, -1120335698, 1016883675, 1032351899, -1103364509) + + W(3, -1123961512, 1043612533, -1110855577, 1044044613) + + W(4, 1041124235, 1035757794, -1105578788, -1105098443) + W(5, 1037191880, 1025665274, 1024086018, 1023695645) + + W(6, 1021344157, -1104385836, 1068114101, 1052569394) + + W(7, 1027075889, -1095293941, -1113226379, -1106316439) + + W(8, -1087803383, -1085495757, -1081571635, -1086748155) + + W(9, 1031589750, 1048618044, -1121765158, 1039073287) + W(10, 1057976488, 1058928386, 1051244378, 1048853947) + + W(11, -1130402783, -1124003088, 1010599720, -1120128288); + WS(-1095246679, -1087513362); + sum1 = W(0, 1027634667, 998672596, -1149130110, 1026870673) + W(1, 1006905115, 1027369115, -1156874400, 1027010724) + + W(2, 1005582218, 1034119552, -1098084154, -1141537276) + + W(3, -1098155038, -1115874224, -1130796284, -1110211381) + + W(4, 1054139855, 1049059024, 1052649123, 1065515488) + W(5, 1064339388, 1053781857, 1050040635, 1056042372) + + W(6, -1093682933, -1099832192, -1089047862, -1088853800) + + W(7, -1084571535, -1097071228, -1107001190, -1095503614) + + W(8, 1002440559, 1040529483, -1122861959, -1126585773) + + W(9, -1099945809, -1105399392, 1023818537, -1117200441) + + W(10, -1142612208, 1024339285, 1025618962, 1014930618) + + W(11, -1117714471, -1136815962, 1018962754, 1028298602); + sum2 = + W(0, 1021849900, -1131412357, -1118206650, -1125196517) + W(1, 1035111533, -1144348946, 1021086832, 1020034266) + + W(2, 1041438934, -1119369172, 1040592619, -1114002177) + W(3, 1032710077, 1044882286, -1106944856, 1034465258) + + W(4, -1128850825, 1021256857, 1046750596, 1025252531) + W(5, 1048692472, -1128830461, 1032666924, 1034167215) + + W(6, 1035803991, -1111021944, 1038923952, 1072474004) + W(7, 1077302109, -1095554578, 1044371493, -1126088605) + + W(8, -1100551265, 1042971908, -1109478151, -1083937830) + + W(9, -1067064389, -1084038225, -1106119426, -1103479472) + + W(10, 1028120411, -1109090883, 1026074630, -1100770381) + + W(11, 1049109423, 1041329596, 1033664959, -1114595648); + WS(-1086863724, -1072185677); + sum1 = W(0, 1028385114, -1117432553, 1039695343, -1109809663) + + W(1, 1021420394, -1123921882, -1130768465, 1022384484) + + W(2, 999422396, -1108936247, -1104614655, -1104994067) + + W(3, -1154652341, -1146863602, -1114930982, 1034006493) + + W(4, 1054099261, 1053618221, 1060257868, 1062802560) + W(5, 1056152893, 1045520765, 1044268562, 1050379178) + + W(6, -1096116227, -1097427086, -1085349189, -1083398775) + + W(7, -1099176000, -1110026089, -1104661441, -1115290030) + + W(8, -1144842721, 1026659130, 1037424170, 1038629270) + + W(9, 1048207210, -1118540449, -1129199023, -1101129750) + + W(10, -1139590961, 1021035463, -1123772160, -1103266023) + + W(11, 1017905387, -1111374457, 997564786, -1108736604); + sum2 = W(0, -1122660132, 1028330563, -1115262781, 1045687787) + + W(1, -1108704409, 1029063313, -1116092784, 1013546756) + + W(2, 1040406414, 1017997140, 1025264396, -1103384157) + W(3, -1151694751, 986929807, 1026201848, -1115691336) + + W(4, 1007232002, 1025921746, 1043786284, 1040909479) + W(5, -1108714653, 1043690826, -1119932664, 1038266954) + + W(6, -1096101684, 1022732374, -1102241564, 1055010544) + + W(7, 1057419270, -1126901852, 1047767958, -1115096031) + + W(8, -1114861255, 1041150821, -1094431556, -1078483413) + + W(9, -1079156700, -1078269109, -1092610189, -1098859524) + + W(10, 1041847394, -1105481885, 1055232297, 1061241514) + + W(11, 1063042984, 1067726336, 1054314043, 1052159629); + WS(-1100931758, -1079897221); + sum1 = W(0, 1002503979, 1034941050, -1106318298, 1040768819) + + W(1, -1115730241, -1141069099, 1027555620, -1123362142) + + W(2, -1105750735, 1038153885, -1105238364, 1049149819) + + W(3, 1028324215, -1106412131, 1039911116, -1114353739) + + W(4, -1109125113, 1017797614, -1095922988, -1090093369) + + W(5, -1091054939, -1102230944, -1127686569, -1097766172) + + W(6, 1042140489, 1051948423, 1034322060, 1059094088) + W(7, 1049017573, 1050624300, 1045992036, 1043140938) + + W(8, -1126416416, 1024057510, -1103862864, 1040510958) + + W(9, -1112163155, -1106256757, 1017980086, 1018029782) + + W(10, -1119888216, 1035399096, -1104877795, 1042879258) + + W(11, -1120272065, -1112985689, 1033961137, -1117933580); + sum2 = W(0, -1118098210, -1106915273, 1038839398, -1115506453) + + W(1, 1032442192, 1016491198, 996494424, -1145352236) + W(2, 1039272612, -1120265428, 1047577900, -1112696018) + + W(3, -1123012045, -1123570623, -1121836120, 1026228225) + + W(4, 1033865352, 1043187452, -1107536005, -1090204449) + W(5, 982996928, 1042935567, -1120793710, 1030161309) + + W(6, -1107428188, -1111026913, 1034998667, 1068488571) + + W(7, -1096446249, 1013219076, -1101384300, 1023915500) + + W(8, 1041731770, -1108614463, -1104254290, -1073443109) + W(9, 1040126834, 1041050172, 998034192, 1038049010) + + W(10, -1138511840, -1129236656, 1049639209, 1065503173) + + W(11, 1035312463, -1104804103, 1040382819, -1106858818); + WS(1060652716, -1122012062); + sum1 = W(0, -1136700086, -1120727741, -1136846536, -1102129381) + + W(1, 1035874533, -1111246103, 1019584793, -1111739075) + + W(2, -1113850590, 1022556549, 1030968440, 1036351121) + W(3, 1049961616, -1119797202, 1037896960, 1033695743) + + W(4, -1093156519, -1098011208, -1100004083, -1079676696) + + W(5, -1097573322, -1087704492, -1096988084, -1092447605) + + W(6, 1056384406, 1052398904, 1062519968, 1061355096) + W(7, 1059521337, 1052440101, 1048869443, 1048091420) + + W(8, -1101587139, -1106430355, -1132405458, -1107897312) + + W(9, 1048756772, -1109566980, 1020055223, 1043876621) + + W(10, 1021600449, -1114714908, 1016310696, -1113036024) + + W(11, 1022545811, 956768943, -1121983087, 1018273385); + sum2 = + W(0, -1134928360, 1028726521, -1102649789, 1041988393) + W(1, -1105797881, -1125378124, 1034345705, -1134413224) + + W(2, 1033535752, -1102458559, -1114678181, 1050124192) + + W(3, -1102215651, 1032107719, 1014416128, -1119150822) + W(4, -1108867200, 1031241917, 1028753942, 1056497468) + + W(5, -1090602802, 1057082230, 1023046526, 1025484112) + W(6, 1056652859, 1048810536, 1051021341, 1036208495) + + W(7, -1099717915, -1114740583, 1002570703, -1101875417) + + W(8, -1094974673, -1100509489, -1100441695, 1034323322) + + W(9, -1115767916, 1041259997, -1122784508, 1037850051) + + W(10, 1034782854, -1123876292, -1100025719, 1037384204) + + W(11, -1098766868, 1024245049, -1114560124, -1112409616); + WS(-1104650926, -1097620835); + sum1 = W(0, -1121602225, 1021304961, -1132438763, 1033606955) + + W(1, -1119908749, 1032325913, -1112547088, 1011690407) + + W(2, 1025795527, -1105437010, 1039870965, -1097752993) + + W(3, 1037504305, -1109081755, -1121171172, -1116159153) + + W(4, 1046345311, -1108985665, 1041625463, -1144407721) + + W(5, -1100934531, 1047145475, -1119544288, 1036418740) + + W(6, -1105175020, 1045711217, -1093274991, 1062570208) + + W(7, -1099088029, -1097698093, -1136783989, -1106563213) + + W(8, -1160491701, -1117138161, 1040158758, 1049985633) + W(9, 1037586881, 1004919651, -1124949278, 999804916) + + W(10, 1000929055, -1145522137, -1117941464, -1122762228) + + W(11, -1109694278, 1026835862, 1024727664, -1126831120); + sum2 = + W(0, -1103930431, 1043997541, -1102154605, 1046282803) + W(1, -1106269056, -1116052960, 993204113, -1113135778) + + W(2, -1122873970, -1100601593, -1174042309, -1084779372) + + W(3, -1114322527, 1046060831, -1115511338, 1041954938) + W(4, 1048581640, -1108032519, 1029742536, 1068068442) + + W(5, 1051594943, -1119470820, -1138871916, 1020181836) + W(6, -1139346648, 1042591516, 1045200151, 1052027066) + + W(7, -1098651742, -1115540046, 1021777728, -1107159950) + + W(8, 1030984702, -1123657132, -1106767363, -1086671314) + + W(9, 1024653780, -1127107345, -1163609763, -1134598872) + + W(10, -1115391660, -1107477718, 1032437784, 1025856678) + + W(11, 1036550683, -1121355040, -1115373797, -1122730046); + WS(1066180726, 1071088208); + sum1 = W(0, 1019239309, -1139409238, 1016498838, -1117575980) + + W(1, 1000209679, -1113394675, 1030691589, -1133365628) + + W(2, -1125280121, 1001471451, -1110062156, 1036666525) + + W(3, 1049354787, 1048739493, -1119525817, -1103045922) + + W(4, -1096761986, 1012641888, -1103040193, -1098591157) + + W(5, -1079760364, -1090655988, -1102803453, -1099636767) + + W(6, 1046613601, -1137946220, -1089511315, 1064065326) + W(7, 1058946251, 1056455200, 1048597558, 1050574049) + + W(8, -1107071425, 1047813898, 1045045936, 1024305865) + W(9, -1110782371, 1013890551, 981956228, 1043476657) + + W(10, 1022045682, 1006921793, 1010038708, 1025055882) + + W(11, 990559266, 1028621651, -1118324476, -1146495466); + sum2 = + W(0, -1132513434, -1110784932, 1015163300, 1015068960) + W(1, 1033174530, -1135254713, 1032888151, -1132281738) + + W(2, 1043448685, -1133524329, -1107748756, 1038994810) + + W(3, -1104237963, -1120882016, -1105704866, 1042278645) + + W(4, -1099364915, 1037755510, -1102119678, 1041992963) + W(5, 1059473601, 1015880446, 1034086136, -1087682569) + + W(6, -1092624350, 1047633966, -1120785461, 1052941265) + + W(7, -1112171819, -1149514732, -1106883398, 1032085587) + + W(8, 1056566992, -1103089496, 1032253510, -1122308723) + + W(9, 1028984470, -1121912217, 1037486360, -1121651695) + + W(10, -1119372495, -1105722911, -1123661455, -1115405106) + + W(11, -1126727948, 1038410412, -1113287020, 1025242322); + WS(1036385628, 1044378228); + sum1 = W(0, -1125923878, -1122454178, -1106272614, 1034604284) + + W(1, -1112814039, 1030894675, -1123079474, -1130721815) + + W(2, -1119989961, 1024777096, 1045742974, -1106270482) + + W(3, -1097223397, -1112273196, -1131680651, 974203052) + + W(4, -1113894352, -1109744594, -1081686591, 1049041391) + + W(5, 1058546239, 1043400251, 1024933634, -1127331477) + W(6, 1034400041, 1050462805, 1057689729, 1056885773) + + W(7, -1082103097, -1111542214, 1032256832, 1030325078) + + W(8, 1040468885, -1118152051, -1112362495, -1111838475) + + W(9, 1051410218, 1042722054, -1104716944, 1028514056) + + W(10, -1119159494, -1136375156, 1016953331, 1033159371) + + W(11, -1127987422, 1008300000, 1020481857, -1122308858); + sum2 = W(0, -1126857538, -1118763531, 1031801448, 1032593429) + + W(1, 1004108563, -1119835024, 1016299757, -1169697445) + + W(2, -1124098806, -1105376950, 1037674477, -1110580495) + + W(3, -1116861031, 1039467599, -1109178305, 1033383777) + + W(4, 1017023528, -1096176503, 1067983244, -1078904043) + + W(5, 1060638644, -1111109918, -1098624444, 1040587540) + + W(6, -1105780084, -1094874635, 1071605005, -1075832971) + + W(7, 1065456263, -1112838992, -1104548941, 1006549790) + + W(8, -1128747572, -1140350635, -1109792866, -1097243929) + + W(9, 1040323331, -1114824160, 1033955221, -1127435882) + + W(10, -1133097491, 1036694112, -1111308161, 1036064001) + + W(11, 1039332997, -1103055030, 1027541135, 1022453269); + WS(1044586414, 987793058); + sum1 = W(0, 981244734, 1029861564, -1127551587, 1008852553) + + W(1, -1110264932, -1120415085, -1128191927, -1111219811) + + W(2, -1094962922, -1105742952, -1103252055, -1106380176) + + W(3, 1039776063, 1036900841, 1025145152, 1028803677) + W(4, 1056192743, 1054313512, 1058042871, 1048369556) + + W(5, -1104749720, -1097596121, -1105335335, -1103570473) + + W(6, -1106851755, -1099782448, -1098041547, -1096188128) + + W(7, 1052995119, 1058305414, 1045503990, 1055249646) + W(8, -1119312576, 1027895469, 1019737521, -1121596880) + + W(9, -1106765091, -1098125906, -1106717007, -1101993823) + + W(10, -1109764848, 1032524411, -1110892330, 1024237680) + + W(11, -1120426409, 1026360866, -1151171624, 1031597860); + sum2 = W(0, -1170894487, -1116947191, 1031282910, -1112734885) + + W(1, 1042944653, -1112368696, 1023647443, -1149951158) + + W(2, 1038321486, 1032730216, 1034140662, -1104751545) + + W(3, -1102906173, -1115060896, -1124833342, -1103764076) + + W(4, 1053283699, -1102463031, 1044178467, 1054507486) + + W(5, -1128523954, 1038784522, -1109068581, -1105151256) + + W(6, -1093894288, -1109920837, -1089191295, 1065532097) + + W(7, -1115680584, 1029167633, 1021288403, 1048070264) + + W(8, -1100030896, 1047091130, -1097946325, -1095028025) + + W(9, -1094781909, 1050920778, -1113980416, 1052479172) + + W(10, 1046510222, -1098101478, 1046381950, -1113780206) + + W(11, 1020709100, -1114933696, -1114621382, -1120673083); + WS(-1114317660, -1079530866); + sum1 = W(0, 1005492722, 1045601427, -1125684614, 1040691817) + + W(1, -1138701159, -1122106750, 1010492340, -1132247342) + + W(2, -1115918380, -1100871485, -1122621547, -1094477942) + + W(3, 1044150368, 1041157065, 1039210328, 1037992735) + W(4, -1124407701, 1040297908, -1098312611, 1054673340) + + W(5, 1041833209, -1120050519, -1116922669, 1025359158) + + W(6, -1116105277, -1115200394, -1103806788, -1097950923) + + W(7, -1088150296, 1040435582, 1025767275, 1032362656) + W(8, 1041270838, 1034785295, 1049040249, -1103823208) + + W(9, -1129519613, -1121264402, -1133614768, -1107139719) + + W(10, -1125596386, 1024917256, 1026343653, 1033886401) + + W(11, 1039772509, 1028146611, 1030520276, 1032249339); + sum2 = W(0, -1155531695, -1116868484, -1135508412, -1106991844) + + W(1, -1135674828, 1001487940, -1119877490, -1131775200) + + W(2, 1035594747, -1130123766, -1121397012, -1115576312) + + W(3, -1126221532, -1107263445, -1121744498, -1125291430) + + W(4, -1111112193, 1034328851, 1033705026, 1050004218) + + W(5, -1117911568, -1123887534, 1017446183, -1110635219) + + W(6, -1101564588, -1113096514, 1029679690, 1048453590) + W(7, 1050922209, 1043692754, 1034884616, 1032206024) + + W(8, -1116834132, -1115452418, -1152418495, 1029930774) + + W(9, 1007646710, -1114385718, -1122169416, 1016330315) + + W(10, -1130931838, -1123313694, 1018386998, -1127714868) + + W(11, -1121358676, 1023927325, -1115969900, -1122013064); + WS(1066254326, -1103165682); + sum1 = + W(0, 1032736195, 1027879829, 1031762634, -1123062443) + W(1, 1004281096, -1148973229, -1119568276, -1130151676) + + W(2, -1107990998, -1114107453, -1103287009, 1041209406) + + W(3, 1039171204, -1113506869, 1029657432, 1042306865) + W(4, 1040177417, -1121888407, 1058340070, -1097968365) + + W(5, -1082040497, 1042461256, -1110264442, -1106785311) + + W(6, -1102376817, 1024426522, -1086180914, 1045901207) + W(7, 1060563077, 1045252895, 1035749465, 1042341554) + + W(8, 1034252186, -1134855959, 1048880356, -1117801777) + + W(9, -1108119503, 1021862138, 1017288487, -1125959462) + + W(10, 1019040113, 1012754494, -1128188760, -1116847305) + + W(11, -1149008906, -1140572507, 1021245074, 1010461490); + sum2 = W(0, 1021177914, -1124089023, 1026640236, 1019818775) + W(1, 1019369915, -1106530127, 1038187852, 1030498751) + + W(2, -1111267114, 1044764288, 1026571855, -1103902938) + + W(3, -1139759728, 1046869720, -1116925721, -1107211067) + + W(4, 1038432054, -1106310853, -1115065858, 1063205014) + + W(5, 1054784388, -1096246655, -1115197568, 1035488589) + + W(6, -1105822865, -1100663157, -1088005063, 1066152263) + + W(7, -1093573252, -1098236584, 1033917272, -1107121631) + + W(8, 1037666445, 1039738660, -1097556837, -1094681119) + + W(9, -1095778937, 1051664274, -1104072801, 1038730556) + + W(10, -1122267795, -1121288215, 1045614354, -1112191805) + + W(11, 1041592438, -1110480242, 1015983837, 1022742082); + WS(1062838508, -1098141683); + sum1 = W(0, -1134697126, 1033720508, -1121504814, -1119656591) + + W(1, 950602286, -1114868293, 1034808260, -1113093265) + W(2, -1129876167, 1021336509, 1014720225, 1046652247) + + W(3, 1047400131, 1033072079, -1139606129, 1026913289) + + W(4, -1116304310, -1104597903, -1101916102, -1082370571) + + W(5, -1091801796, -1096699752, -1121911577, -1098146380) + + W(6, -1099161722, 1019102333, 1049624810, 1058084844) + W(7, 1053105145, 1050650780, 1034036747, 1048726310) + + W(8, 1038618718, 1045312786, 1036131058, 1029119981) + W(9, -1119759594, -1119884597, 1024218735, 1017329950) + + W(10, 1030514076, -1118743028, -1134396481, 1009920435) + + W(11, -1132133195, -1133963803, 990998386, -1121758433); + sum2 = + W(0, -1120814392, 1018600110, 1027325255, -1124538308) + W(1, -1127260613, 1007599328, -1128821133, -1125977712) + + W(2, 1026947049, -1114533760, -1112504706, -1107191892) + + W(3, 1023329910, -1124222597, 1024046209, 1019591350) + W(4, -1124609851, 1043306429, 1030055965, -1090928488) + + W(5, 1046523168, 1015958190, 1022187138, -1115445424) + W(6, 1029294693, -1096357662, 1075131584, -1118772404) + + W(7, -1092017575, -1113584525, -1109468012, -1105922548) + + W(8, -1089366560, -1078857162, -1106088729, 1060040329) + W(9, 1045316672, 1047822624, 1021757630, 1038940063) + + W(10, 1054044776, -1109773692, -1105806639, -1102685120) + + W(11, -1119137158, 1032716843, -1120065319, 1016647840); + WS(1054286935, 1054427377); + sum1 = W(0, -1139016422, 1015966225, 1034481375, 1029692191) + W(1, -1118014037, 1037138557, -1107878452, 996243233) + + W(2, 1031868724, 1024526894, -1107528289, -1123790766) + + W(3, 1033739398, -1101429240, 1035472806, -1114181077) + + W(4, -1103470657, -1100637795, 1043922306, -1096987004) + + W(5, -1103397016, 1045510949, -1113746932, 998962565) + W(6, 1027450488, 1046711412, -1098573729, 1036148808) + + W(7, 1053311986, 1016118627, -1127637067, 1041113135) + + W(8, 1036611766, -1105992614, 1045477269, -1138151730) + + W(9, -1103945702, 1013880803, 1041047838, -1118755335) + + W(10, -1113202838, 1036202402, -1121078688, 1006910195) + + W(11, 1033532379, -1135069562, -1112788047, 1020627009); + sum2 = + W(0, 998302909, 1020343611, 1029585253, 1014080934) + W(1, -1135780688, -1115911996, 1018594371, -1117868456) + + W(2, -1114977231, 1031676937, -1113383024, 1053648356) + W(3, 1044096370, 1042686855, -1108781434, 1037265024) + + W(4, -1111023634, -1115901635, -1091081120, -1095412525) + + W(5, 1066870285, -1137657706, 1043350243, -1126645268) + + W(6, 1036510752, -1117993552, -1099268355, -1083342245) + + W(7, 1034064765, 1037488276, 1014772326, -1113946823) + W(8, 1004185429, 1041465226, -1113451820, -1115471327) + + W(9, -1118193886, 1023464409, -1138084208, 1032826620) + + W(10, 1033017445, -1113757911, 1021941233, -1161911507) + + W(11, -1120710156, 1018751107, -1148724493, 967002060); + WS(1069042774, 1023813606); + sum1 = + W(0, -1120569815, 982627530, -1113058670, -1122018428) + W(1, -1110295833, 1021479086, 1001627822, 1013049407) + + W(2, 1029846837, -1113747494, 1033076468, 1039673254) + W(3, 1038190732, 1046432919, -1134594983, 1016231853) + + W(4, -1087916882, -1109565588, -1095056850, -1085384902) + + W(5, -1083727323, -1089999723, -1094559921, -1094610970) + + W(6, 1052864835, 1048856613, 1056781822, 1059308179) + W(7, 1063669175, 1059014405, 1044534497, 1048044230) + + W(8, 1037940281, 1011966045, 1041407837, -1107102153) + W(9, -1111254596, -1110936865, 1025983877, 1046858383) + + W(10, -1130288148, -1136915076, -1140629239, 1027021082) + + W(11, 1007651659, -1128736648, -1139332738, -1114487747); + sum2 = W(0, -1097118790, -1115645027, -1109249259, 1042765123) + + W(1, 1015795842, 1028792702, -1116071919, -1105083736) + + W(2, -1088323667, 1030684990, -1081609065, -1082143899) + + W(3, -1102809807, 1059694558, 1064574838, 1072110122) + W(4, 1067177084, 1056476304, 1058423359, 1061996602) + + W(5, 1006287482, -1085597397, -1080103447, -1081832849) + + W(6, 1049654149, -1101283859, 1057270411, 1055747315) + + W(7, -1087207348, -1106003210, 1023804763, -1090423898) + + W(8, -1120538318, 1040556965, -1122455315, -1120372038) + + W(9, -1106540179, 1042932361, 1048665861, -1101912272) + + W(10, -1108580354, -1124995735, 1017763415, -1114753052) + + W(11, 1040354664, 1033427208, -1098395984, 1049644025); + WS(1046279854, -1100682627); + sum1 = W(0, -1155754074, -1110611178, 1022309140, -1118509483) + + W(1, -1109973457, 1022155595, -1107208537, -1117715774) + + W(2, -1109896894, -1114421037, -1131162933, 1027993210) + + W(3, 1040243222, -1117693450, 1019138289, 996768085) + W(4, -1107576521, 1030120983, -1094043528, 1062355587) + + W(5, -1094900881, -1110047247, -1110631994, -1107567326) + + W(6, 1035511522, -1124410677, 1038811574, 1059809630) + W(7, 1038778317, 1046487839, -1127423844, 1039784548) + + W(8, 1002805634, -1114543731, 1039227081, -1113128332) + + W(9, -1105636701, -1109843880, 1004796827, -1112311954) + + W(10, -1116898232, 1008443967, -1122237996, -1141634631) + + W(11, 1021837827, -1132048229, -1123472091, -1128081977); + sum2 = + W(0, -1155805362, -1111094572, 1028160711, -1123681391) + + W(1, -1116453985, -1119301235, -1108559318, -1115385578) + + W(2, -1107499970, -1112866732, -1106540686, -1140638716) + + W(3, 1031083122, -1139408516, 1034592379, -1116565509) + W(4, 1022847832, 1044264749, -1155674450, 1057429467) + + W(5, -1106158641, 1025824831, -1111164410, -1121519923) + + W(6, 1035480223, -1148903065, 1040150233, 1045538131) + W(7, 1031696457, 1041846065, -1120456085, -1129601606) + + W(8, -1120962569, -1114470280, 1010774604, 1016428102) + + W(9, -1120179561, -1118124541, 999266665, -1119709653) + + W(10, -1110725720, 1021130376, -1120260095, 1009992044) + + W(11, 1026515918, -1114275912, 1011855672, -1115407958); + WS(1068834358, -1130516755); + sum1 = W(0, -1110020716, 1043574522, -1110718798, -1129582842) + + W(1, -1114402010, 1034180284, 1025857751, -1111247176) + + W(2, 1034114243, -1095778974, 1047457188, -1108460854) + + W(3, 1054126843, -1096959660, 1033414773, 1039130852) + W(4, 1048068617, 1048936405, 1038642250, -1114164062) + + W(5, -1110583455, 1049433806, -1136840220, -1119539023) + + W(6, -1102038774, 1035735542, -1105529591, -1096174934) + + W(7, -1112126358, 1043709316, -1123546692, -1112879245) + + W(8, 1031998965, -1100504766, 1047377413, -1130108598) + + W(9, 1044906670, -1095231947, -1130058411, 1035429307) + + W(10, -1110620517, 1038840817, -1109142655, 1023990363) + + W(11, -1122044916, 1040978881, -1131191403, -1115530151); + sum2 = W(0, 970017593, -1125442959, -1107196630, -1108273260) + + W(1, -1130992285, -1132402709, 1017358256, 988670622) + W(2, -1108165952, 1040753488, 1033984686, 1051322703) + + W(3, -1113564209, 1041587650, -1116630320, -1122802752) + + W(4, 1047729556, 1000227776, 1038319755, 1055231112) + W(5, -1094436986, 986706110, -1124437274, -1132214419) + + W(6, -1102798485, -1088611471, -1087549565, 1062180091) + + W(7, 1046553690, -1113546876, -1120036494, 1032902155) + + W(8, -1142827268, 1049645408, -1127043239, -1103837458) + + W(9, -1107399737, 1048231400, -1124561928, -1145381770) + + W(10, 1029593938, -1108133201, -1114349374, 1035145460) + + W(11, -1114342182, -1146643074, -1126967424, 1014303584); + WS(1067517750, 1033639701); + sum1 = W(0, 1025509678, -1130341242, -1120272434, 1018375590) + + W(1, -1116961737, 1026047982, 1025041660, -1113724179) + + W(2, -1102897085, -1132678155, -1151042791, 1013105800) + + W(3, 1041000592, 1032243033, 1034904976, -1123793645) + + W(4, 1023511162, -1096924480, -1094474081, -1086033191) + + W(5, -1085852953, -1093123217, -1093479580, -1091335173) + + W(6, 1037697110, 1040728225, 1056422164, 1062857884) + W(7, 1058773976, 1063905370, 1050274619, 1057695471) + + W(8, 1039047450, -1108434135, 1032921189, -1122589009) + + W(9, -1100255843, -1106044526, -1112977566, -1136654701) + + W(10, -1138182274, -1138791135, 1010850641, 1034477848) + + W(11, -1112868181, 1026848855, -1122948276, 1007925107); + sum2 = + W(0, -1101843642, 1047092496, 1032746990, -1098074183) + W(1, 1053786699, -1096910060, 1043846267, 1023876031) + + W(2, -1101718380, -1110599661, -1127672356, -1096713677) + + W(3, -1104014372, 1035860070, -1097849415, 1051096852) + W(4, 1046322966, 1053918698, -1100340247, 1069438643) + + W(5, 1025809994, -1083706115, 1048729218, -1081917128) + W(6, 1051125372, -1092942861, 1040369276, 1074732797) + + W(7, 1050011098, -1080423025, -1117530220, -1091623712) + + W(8, -1084762721, 1058235489, -1090389206, -1088693676) + W(9, 1053232821, 999981171, -1097505133, 1050919626) + + W(10, 1050651985, -1101055683, 1050412863, -1103557505) + + W(11, 1052371166, -1099152238, 1041618808, -1123531943); + WS(-1098378327, -1087753140); + sum1 = W(0, -1149815841, 1013838405, 1021155853, 1025974631) + W(1, 1017740987, 1020943940, -1137239299, 1031952605) + + W(2, -1123262056, -1131552366, -1114416842, -1103162646) + + W(3, -1099404299, -1100518162, -1111541722, -1098489580) + + W(4, -1110243207, 997589309, -1104972312, 1042062186) + W(5, 1057054155, 1059648114, 1047878460, 1061110492) + + W(6, 1053738584, 1025605911, 1049179692, -1094476095) + + W(7, -1093068705, -1101042271, -1111532695, -1100382705) + + W(8, -1096838314, 967065877, -1108406847, -1130981226) + W(9, 1023051300, 1010424693, -1117167136, 989087216) + + W(10, 1031741109, -1135340592, 1029594857, -1145022506) + + W(11, 1005012974, -1112923496, 1030823984, -1113821120); + sum2 = + W(0, 1034695587, -1111830177, 1017885202, -1116805918) + W(1, 1039973913, -1113376464, 1024357631, -1109344122) + + W(2, 1032463913, 1031752431, -1103205904, 1045285117) + W(3, -1092035876, 1044034906, -1097056010, 1034392181) + + W(4, -1087773887, -1104893727, -1114987001, 1061301409) + + W(5, 1053784665, -1096259933, 1015259894, -1095057905) + W(6, 1051055536, 1010301739, 1045597173, 1065451004) + + W(7, 1036331211, -1101789830, -1105822235, -1098597360) + + W(8, -1103774157, 1003908630, -1107230114, 999416278) + + W(9, -1118216068, 1047293827, -1119063058, -1139528651) + + W(10, 1028563957, -1111301343, 1042437558, -1106181364) + + W(11, 1014136811, -1127455968, -1137717779, 1017174230); + WS(1051991511, -1090129628); + sum1 = W(0, -1119709994, 1028979258, 1025569135, 1025156752) + + W(1, -1113846787, 1032797781, -1137003429, -1126457798) + + W(2, 1035475128, -1103260128, -1099928080, 1041023848) + W(3, 1030722332, 1040845476, 1015665914, 1032471277) + + W(4, 1044364174, 1048862488, 1054464119, -1103260308) + W(5, -1090331224, 1039529634, 1015668019, 1036006161) + + W(6, -1099369674, -1107028176, -1092954947, 1048877896) + + W(7, 1036105176, -1113372113, -1104520406, -1111431094) + W(8, 1024914253, 1037758213, 1045752341, 992983000) + + W(9, -1115078629, 1027244574, 1039091960, -1113568276) + + W(10, -1122358045, -1142000036, -1110227337, 1023847918) + + W(11, 1018698636, -1139349278, -1131269376, 1004297100); + sum2 = + W(0, 1043798657, -1117698064, -1102458928, -1101130902) + W(1, 1025730631, 1026623851, 995294154, 1012466631) + + W(2, -1098470129, -1096590117, -1084690772, 1065791971) + + W(3, -1123809426, 1019910088, 990258218, -1124630920) + W(4, -1126555274, 1047971790, 1067678357, -1099131342) + + W(5, 1036453804, -1117295572, -1116170672, 1010360391) + W(6, 1029962999, 1037727050, 1008201251, -1083609361) + + W(7, 1031198846, -1113754273, 1030505142, -1136369090) + + W(8, 1028782608, -1161955740, -1103096568, 1051059538) + + W(9, -1106261335, 1041141064, 1028328942, -1119361560) + + W(10, -1131048284, 1022180969, -1120783295, 1014843769) + + W(11, 1020274026, -1111232000, -1127235720, 1001879503); + WS(1062559660, 1025273829); + sum1 = W(0, 1017067222, 1004255955, 1019590495, -1109975080) + W(1, 1028071367, 1004648665, -1129853588, 999516190) + + W(2, -1098241838, 1041017424, -1136327879, 1032386976) + + W(3, 1031451349, -1112687016, 1040197151, -1105242363) + + W(4, -1088364567, -1088941672, -1087512143, -1083706517) + + W(5, -1089169701, -1092690193, -1100415991, -1093365052) + + W(6, 1060515311, 1050253504, 1064814036, 1061681108) + W(7, 1061080357, 1052988124, 1048643203, 1050105214) + + W(8, -1113064427, -1115329590, -1113817187, -1146803505) + + W(9, -1112935650, 1031379878, -1112391860, 1051227835) + + W(10, 1036248943, -1133026900, 1008140615, 1028690158) + + W(11, 1010963867, 1030651112, -1133141907, -1139571464); + sum2 = + W(0, 1058977722, -1123128465, 1051128320, 1025585093) + W(1, 1043020026, -1090331128, 1037612985, -1101815127) + + W(2, -1088438618, -1084402469, -1097793743, -1092491618) + + W(3, -1090336452, -1094761348, 1032815200, -1080227574) + + W(4, -1070888183, -1068917363, -1079680807, 1078112044) + + W(5, 1082393215, 1070728174, -1090225897, 1066541275) + + W(6, -1080154822, -1109667848, -1080832517, 1058335327) + + W(7, 1067339759, 1057746923, -1122043266, 1067785018) + W(8, 1057028315, -1133954517, 1057005952, -1089499329) + + W(9, 1038175360, -1096630558, 1051816122, -1086032969) + + W(10, 1020668753, -1117456198, 1031920962, 1039195999) + + W(11, 1046190750, -1139309423, -1116527206, 1046599598); + WS(-1077531606, -1068627295); + sum1 = W(0, -1124767318, 1034866077, -1101653116, -1130761376) + + W(1, -1106412978, -1121310187, 1040285922, -1107982930) + + W(2, -1113367136, 1048375560, -1104403272, 1049955828) + + W(3, 1036710310, -1108970480, 1049541454, -1107579101) + + W(4, -1095446127, -1099058463, -1090535733, -1087242437) + + W(5, -1088900579, -1096168584, -1097928011, -1095682732) + + W(6, 1055377197, 1047513184, 1058086815, 1061174628) + W(7, 1057172304, 1056094244, 1047121222, 1054731422) + + W(8, -1111814927, 1045191539, -1094831865, 1048150628) + + W(9, -1103308083, -1102502166, 1043510842, -1103869114) + + W(10, -1126382267, 1025894069, -1117513042, 1008100281) + + W(11, 999030326, -1115104524, 1032672660, 1025118882); + sum2 = + W(0, -1118325221, -1109912068, 1055626857, 1073210842) + W(1, 1050117205, -1108710534, 1007734945, 1005945059) + + W(2, 1021086937, -1119472758, 1051951524, 1028908234) + W(3, 1044779683, 1039597247, -1130127373, 1026547602) + + W(4, 1043003051, 1026424546, -1092928023, -1081037490) + + W(5, -1093398148, 1044106321, -1119182686, 1019051617) + + W(6, -1131106753, 1030452362, -1103671831, -1079815727) + + W(7, -1105002867, 1032627417, -1108845723, 1023992222) + + W(8, -1122810381, -1109242129, 1034708821, 1035695099) + W(9, 1037167425, -1112762482, -1120996683, 999154803) + + W(10, 1007248641, 1007569065, -1112697430, 1034566233) + + W(11, -1131640977, 1020894429, -1144614067, 1009135009); + WS(1027136184, 1037475189); + sum1 = + W(0, 1032095571, -1100957374, 1042581547, 1008293424) + W(1, -1125080965, 1046313162, -1104233593, 1032243376) + + W(2, -1116112165, 1049686375, 1028374302, -1096656387) + + W(3, 1033692244, -1105885187, -1122471385, -1122778646) + + W(4, -1115063802, 1038845436, -1088748902, 1053200569) + + W(5, 1024446429, -1103807360, 1048125173, -1114896431) + W(6, 1049018533, -1106083983, 1046087792, 1044615055) + + W(7, -1090279185, 1052904956, -1106111045, -1113201013) + + W(8, -1119314641, -1120251015, 1034473457, -1099043646) + + W(9, 1029265287, 1033985068, -1106885357, 1045413959) + + W(10, -1107767341, 1047825580, -1104556803, 1041068763) + + W(11, 1035958465, -1106489925, 1037737330, -1123269230); + sum2 = + W(0, -1109957285, 1046942588, -1108558913, 1035311457) + W(1, -1103268017, 1020819182, 1017489970, -1119040125) + + W(2, 1028061230, -1090820835, -1168618571, 1053631926) + + W(3, -1140171429, 1024395745, 1034074787, -1111153127) + + W(4, 1045893852, 1047577084, -1096957845, -1097789717) + + W(5, 1058472778, 1010859305, -1118924631, -1134098557) + W(6, -1110812851, 1048459161, 1058947879, 1028951753) + + W(7, -1086199154, 1018177816, -1119421881, 1029986143) + + W(8, -1108048047, -1114577527, -1104353646, 1041621301) + + W(9, 1047742834, -1101676043, 1030918112, -1110045381) + + W(10, 1027149256, -1106934834, -1104794282, -1139749789) + + W(11, -1105608688, 1051639728, -1105627073, 1035525353); + WS(1059085676, -1120419895); + sum1 = W(0, -1123531970, 1015920803, -1129458830, 1003238718) + + W(1, 1024388894, 1019022757, -1122285937, 1029064795) + W(2, 1010567522, 1041225150, 1048968044, -1107069702) + + W(3, -1107554082, -1131681870, -1110859547, 1018058500) + + W(4, -1140828475, 1042370229, -1081896608, 1061545382) + W(5, 1057779808, 1048429719, 1046207852, 1043188126) + + W(6, 1031892193, -1105839574, 1052692461, -1113798029) + + W(7, -1080962214, 1023517883, -1107360247, -1109838114) + + W(8, -1126417758, 1021017477, -1106306215, 1033163859) + + W(9, 1051334024, 974463598, -1137698561, -1182091254) + + W(10, 1013434325, -1117671948, 1033141013, -1110196995) + + W(11, 998455538, -1104852314, -1130795454, -1115208005); + sum2 = W(0, -1115293356, -1104270657, 995357221, -1140125409) + + W(1, 1041981586, 1016190873, -1140513641, -1115228320) + + W(2, 1051048471, 1028093784, -1111420185, -1087783718) + W(3, 1031459460, 1035133120, 1028088400, 1019234873) + + W(4, -1116254969, 1055158904, -1108447426, 1057721666) + + W(5, -1086769840, -1119040585, -1121069799, 1043934745) + + W(6, -1108089569, -1104160418, -1113016550, 1060085251) + + W(7, 1041174723, -1119561801, -1114345654, -1097229960) + + W(8, -1110389222, 1040482382, -1105273681, -1106428297) + + W(9, 1046452617, 1045849559, 1044224673, -1114369945) + + W(10, 1032305896, -1109483288, 1033267880, -1106001572) + + W(11, -1115354420, -1127947625, -1119205024, 1033437588); + WS(1063842732, 1069263660); + sum1 = W(0, -1134238373, 1031532473, -1118861678, 1031846503) + + W(1, -1118980549, 1032122901, -1119645672, 1033668289) + + W(2, -1122877239, -1122091454, -1114405937, -1102409375) + + W(3, -1105793535, -1113900631, -1129606328, -1133229476) + + W(4, 1055968823, 1052972391, 1057777668, 1062381841) + W(5, 1065218890, 1051224528, 1055096307, 1043160456) + + W(6, -1099884346, -1097152888, -1088480739, -1081966868) + + W(7, -1086950263, -1107218856, -1097976619, -1106065147) + + W(8, -1101209335, -1119905385, 1025124405, -1166909210) + + W(9, 1045788186, -1105943487, 1011470685, -1107257190) + + W(10, 1027783644, 1025876211, -1133551424, -1124858713) + + W(11, -1146513705, -1136389690, -1153551023, 1024085520); + sum2 = W(0, 1008982555, -1122047761, -1134583037, -1137222855) + + W(1, -1140752305, 1029699842, -1114302140, 1026101716) + + W(2, 1017497319, -1119731325, -1110768789, 1050149080) + + W(3, 1041298195, -1094516021, 1037679865, 1029205125) + W(4, -1114050188, 1048705804, 1082656561, 1079097716) + + W(5, -1069364548, -1064571216, -1105564781, -1121859551) + + W(6, 1044555710, -1104624717, 1041907901, -1088882238) + + W(7, 1050137131, -1099205492, -1109039739, 1040352044) + + W(8, -1115112254, -1113283108, 1041138312, 1033378952) + + W(9, 1048604087, -1105508475, 1026612451, -1131493091) + + W(10, -1121509319, 1042591944, -1111081434, 1018241317) + + W(11, -1128224049, 1008906239, -1118064246, 1024193619); + WS(-1129102704, 1046511454); + sum1 = W(0, -1120255189, 992833099, -1134582632, -1138794987) + + W(1, -1157138191, -1129425099, 1016407976, -1133147960) + W(2, 1003871004, 1029362986, 988783847, 1011030567) + + W(3, -1118838023, -1111313565, -1115457860, -1106432157) + + W(4, -1098175836, -1113473140, -1095364566, -1091886143) + + W(5, 1040190185, -1149570791, 1037653026, 1038304902) + W(6, 1060542285, 1049212165, 1059935407, 1049733902) + + W(7, -1115543549, 1041064643, -1119854466, 1029519512) + + W(8, -1104614392, -1118651222, -1098721330, -1109648580) + + W(9, -1121023765, -1105481630, 1031662787, -1111089569) + + W(10, -1115237700, 1012659838, 1026034940, -1116104473) + + W(11, 1036784617, -1112251695, 1023835049, -1170197274); + sum2 = W(0, 1039353726, -1104263814, 1027240563, 1028285409) + W(1, 1032871415, -1151417050, 1027668005, 1023556565) + + W(2, 1022956598, 1036699032, -1120983737, -1106986979) + + W(3, -1095425364, -1136397826, -1108376211, -1098325150) + + W(4, -1093991321, -1154678850, -1089878099, 1067214284) + + W(5, 1047639440, 1051464555, -1104038071, 1061372327) + + W(6, -1088722171, -1088770012, -1101846939, 1070132546) + + W(7, 1045480826, -1099095249, 1043597107, -1086577064) + + W(8, 1039854738, -1104468959, -1096446657, 1029156699) + + W(9, -1098508409, 1024325599, -1148394393, 1034968116) + + W(10, -1125147089, -1123571799, 1036003207, -1117321918) + + W(11, 1027342659, -1150420266, -1138411071, 1031516803); + WS(1056411607, -1109579684); + sum1 = W(0, -1129654332, 1027988397, 1041758889, 1027974595) + W(1, 1038643645, 1038422303, -1115801276, 1047822542) + + W(2, 1040494825, 1031736403, -1118623100, -1104250813) + + W(3, 1023561152, -1115303377, -1111435064, 1037517815) + W(4, 1053570965, 1051564806, 1061336939, 1057393436) + + W(5, 1065029703, 1052932495, 1051403778, 1056446507) + + W(6, -1089114662, -1091392278, -1087917688, -1081469355) + + W(7, -1091410052, -1085011849, -1108233957, -1089621406) + + W(8, -1099475130, 1018661052, 1034322317, -1094207710) + + W(9, 1052271760, -1096091282, 1036087894, -1095410877) + + W(10, 1041782836, 1032323860, 1035039650, 1010041732) + + W(11, 1041287822, 1020820966, 1036272138, 1043391417); + sum2 = W(0, -1114696922, 1043151870, -1098855398, 1045725515) + + W(1, 1041130784, -1093166728, 1048010869, -1097485062) + + W(2, 1063327246, 1054945342, -1111180476, -1078968963) + + W(3, -1075182266, -1073598210, -1072729035, -1069127579) + + W(4, -1085101471, -1091433742, 1044429796, 1071264665) + W(5, 1068966947, 1074573142, 1072575447, 1082548410) + + W(6, -1089121793, -1108711949, -1096795753, 1049748951) + + W(7, 1041099170, -1107925652, -1087601324, 1057234494) + W(8, 1049274948, 1036153834, 1047012469, 1043607695) + + W(9, 1008700501, -1106854972, -1100708600, -1095686167) + + W(10, 1041765085, -1114908726, 1017938665, -1113887753) + + W(11, 1052357470, -1097732836, 1035577978, -1117196119); + WS(-1075355670, -1094395357); + sum1 = W(0, -1145694380, 1031927794, -1126751284, -1107635165) + + W(1, 1035883256, -1117999477, 1025403748, -1135056568) + W(2, 1013276974, 1019602069, 1034056645, 1046251906) + + W(3, -1107984424, -1101824103, -1117853800, -1113050587) + + W(4, 1022872131, 1012968093, 1064151904, 1059501808) + W(5, 1052823861, 1052884390, 1040309473, 1051481338) + + W(6, -1102394069, -1105184299, -1089968245, -1079531190) + + W(7, 1040654683, -1097950850, 1021798787, -1099617206) + + W(8, -1132601278, -1113047243, -1112578984, 1045395638) + + W(9, 1035604635, -1102570614, 1016601958, -1117559395) + + W(10, 1032373060, -1123577887, 1012023536, -1121043161) + + W(11, -1136193989, -1124087697, 1014648876, 1015049529); + sum2 = W(0, 1040559153, -1099966915, 1051040838, -1122400566) + + W(1, -1103745056, -1127876766, -1118336076, -1122216569) + + W(2, 1024400373, 1050797480, -1098907063, -1088192853) + W(3, -1132746540, 1040989775, 999475557, 1024449813) + + W(4, -1112035522, -1095559069, -1091507667, 1063399780) + + W(5, 1064106398, -1101527709, 1025976147, -1118033876) + + W(6, -1102969122, 1048535090, -1081423354, 1065515124) + + W(7, -1104928579, 1047332946, -1171911780, 1047796018) + + W(8, 1035694118, -1109162397, 1049424016, -1098794388) + + W(9, -1097345189, -1105732065, -1107661001, 1036172042) + + W(10, 1008704054, 1035803260, 1032276403, -1104133833) + + W(11, 1042270874, -1109516912, 1033064723, -1106418589); + WS(1045480366, -1089018411); + sum1 = W(0, 956510844, 998930446, -1129843310, -1113698796) + W(1, 1001814128, -1126854725, 1033942209, -1127622934) + + W(2, -1147321796, -1119371847, 1033743797, 1047477674) + W(3, 993727556, 1019312750, 991522899, -1124727396) + + W(4, -1098659472, -1114996930, -1084286683, -1080535200) + + W(5, -1100569285, -1095107249, -1106428344, -1102902186) + + W(6, 1046172476, 1016456206, 1060852743, 1065762350) + W(7, 1055209670, 1054147605, 1040201800, 1049320503) + + W(8, -1117032824, 1032232105, 1034928007, 1036591131) + + W(9, 1034876769, -1103835661, -1110790848, -1117790432) + + W(10, 1012429717, 992745416, -1120189081, -1130833259) + + W(11, -1112309127, -1162991986, 1019870324, -1166423644); + sum2 = W(0, -1135134951, -1138029383, 1039247354, -1105815405) + + W(1, 1043479193, -1108579602, 1036464698, -1114044136) + + W(2, 1034251754, -1112450391, 1048129179, 1013788103) + + W(3, -1107279344, -1098532570, -1125069222, -1106884027) + + W(4, -1112699779, 1048844919, -1081251706, -1102377508) + + W(5, 1074756442, -1079912447, 1051417558, -1113485350) + + W(6, 1028246411, -1097747276, 1030988939, -1085286825) + W(7, 1066704374, 1048531717, 1025432715, 1037742006) + + W(8, 1024730233, 1036047504, 1032907358, -1098604264) + + W(9, -1121203556, -1097926469, 1037541288, -1114746618) + + W(10, -1141862453, -1116303285, 1032058431, 1026446373) + + W(11, -1115422697, 1017067577, -1110631868, 1026757097); + WS(-1129198960, -1098545020); + sum1 = W(0, 1016631370, -1109829274, 1027654249, -1135763497) + + W(1, 1022371969, -1109147378, -1124465415, 1000105708) + + W(2, -1103774667, 1046260982, 1041444949, 1029491033) + W(3, 1033440347, 1047095266, -1135425502, 1031236725) + + W(4, -1097986678, -1103302894, -1085753764, -1087937321) + + W(5, -1100503343, -1092825640, -1114972810, -1092048676) + + W(6, 1055978916, 1038066845, 1057520791, 1064242364) + W(7, 1043702710, -1106434743, 1042256612, 1048160666) + + W(8, 998946812, 1013747092, 1033950432, -1108378356) + W(9, -1149232679, 1039795292, -1134578155, 1047273267) + + W(10, -1113370572, 1026014767, -1133701934, -1124979408) + + W(11, 1034434352, -1123636138, -1133096682, -1110866582); + sum2 = + W(0, 1008784866, -1111810524, -1123287968, 1035375858) + W(1, -1131156064, 1014562100, 1013976797, 1025684805) + + W(2, -1101060946, 1041574978, -1113715041, 1009783259) + + W(3, -1117592464, 1037249766, -1112331585, -1116058888) + + W(4, 1045200185, -1109901275, -1111729623, -1099551228) + + W(5, 1028611861, -1126723736, 1048583824, -1123970907) + W(6, 1046672822, -1123470329, 1052382391, 1057574679) + + W(7, 1037781521, -1088911756, -1105025107, -1098796399) + + W(8, -1118279822, -1113194466, -1105523541, -1090683445) + + W(9, -1089242363, 1057541056, 1046019340, 1058673062) + W(10, 994298612, -1124039764, 1016737108, 1025528776) + + W(11, 1044936991, 1038965019, -1100984465, -1111874598); + WS(1055927127, 1032414456); + sum1 = W(0, 1026777470, -1114645447, -1107741345, 1034928754) + + W(1, -1106080726, 1036990351, -1132545269, -1115024108) + + W(2, -1105922759, 1038629978, -1118870909, 1048976902) + + W(3, 1031310464, -1116578243, -1121324945, 1028926430) + + W(4, 1039415013, -1109541160, -1121580331, 1035267569) + + W(5, -1092363639, 1052806443, -1105847083, -1107491323) + + W(6, -1114467391, 1049649890, -1099187900, -1110167275) + + W(7, 1044706191, -1098780639, 1042096283, 1026721601) + W(8, 1040445212, -1118657663, 1040802710, 1050521172) + + W(9, -1122260014, 1045812872, -1112784326, 1028974308) + + W(10, -1109775801, 1029983880, -1103039897, 1032803169) + + W(11, -1106266495, -1117906236, 1031828590, -1113700371); + sum2 = + W(0, 1046339838, -1116647554, 1008329020, 1019769686) + W(1, -1108633040, 1035617664, 1033551944, -1127469148) + + W(2, -1095477634, 1046850624, 1046890022, -1090363791) + + W(3, 1015468535, -1092874283, -1110478242, 1032940188) + + W(4, 1041080449, -1097525459, -1087064144, 1051375999) + + W(5, 1042338708, 1061754259, -1118600243, -1108757958) + + W(6, -1106007640, 1046513622, 1055899106, -1097018549) + + W(7, -1122727333, -1100515622, 1043707610, 1042447169) + W(8, 1022253579, -1109703351, 1048733947, 1049672544) + + W(9, 1051464963, 1025134369, -1099218588, -1108633308) + + W(10, -1124447256, 1043170037, -1100004355, -1097807314) + + W(11, -1129042543, -1112390214, 1040819075, -1120359161); + WS(1063732396, 1030954530); + sum1 = W(0, 1032126752, 976782235, 1026279044, -1118035684) + W(1, 1028715099, -1121995199, 1032435871, -1124714096) + + W(2, 1036299263, -1151769923, 1034233519, 1048463661) + W(3, 1036782808, 1036166050, 1020132772, 1046021200) + + W(4, 1040042182, -1126287138, 1024363996, -1092651532) + + W(5, -1113174091, -1102126950, -1118937966, -1096484767) + + W(6, -1085959612, -1100277765, -1112294345, -1099766629) + + W(7, 1035866674, 1049742039, -1116277291, 1041160144) + W(8, 1040218780, 1040546448, 1044348191, 1048686290) + + W(9, 1009750781, -1118854702, 1040779899, 1026126509) + + W(10, 1017251428, -1135394341, -1164354746, 1013744650) + + W(11, -1137751148, 972654113, -1123833085, 1032085003); + sum2 = W(0, 1023560328, 1005726416, -1116687692, -1115408842) + + W(1, 1041728877, -1105589301, 1038558105, -1113100068) + + W(2, -1114346246, 1027252420, -1128267680, -1102003721) + + W(3, 1038357336, 1012806192, 1025579970, 1032847335) + W(4, 1055635447, 1058379047, -1093219742, -1075825777) + + W(5, -1122468504, 1043596623, -1110943172, 1037773232) + + W(6, 1080093571, 1066361916, -1079490865, -1069273193) + + W(7, 1048667285, 1040586029, 1032482622, -1123027612) + W(8, 1053265582, 1041698127, 1044641580, -1103412699) + + W(9, -1104322529, -1123753164, -1134749344, 1003892288) + + W(10, -1109086292, 1032891532, -1142986448, 1040229437) + + W(11, 1008854368, -1116993440, -1125346072, 1027065900); + WS(-1091386327, 1040820769); + sum1 = W(0, -1133727678, 1015097725, -1138479056, -1114059050) + + W(1, 1024901707, -1122041679, 1013224377, -1123564392) + + W(2, -1113530145, -1145488419, 1016526204, 1038143634) + + W(3, -1118547201, 1031893970, -1120867010, 1007161468) + + W(4, 1024243708, -1111308865, -1103657483, -1091368514) + + W(5, -1102986721, -1104061090, 1020963773, -1118947879) + + W(6, 1033777591, 1043618337, 1057905859, 1056300329) + W(7, 1019703993, 1024793576, -1114932462, 1024563890) + + W(8, 1009482248, -1113425624, -1104459527, -1103801765) + + W(9, 1043076580, 1039830345, 1042855834, -1129826522) + + W(10, -1122301794, 1016257779, 1026855702, -1136938187) + + W(11, -1118577003, -1111999542, -1119434421, 1018984621); + sum2 = + W(0, -1118014300, -1121259996, -1118480989, -1120519112) + W(1, 1034237400, 1031820662, -1116062192, 1029964608) + + W(2, -1107425211, 1034914627, 1057124947, 1041680563) + W(3, -1110358856, -1114315301, 1010762999, 974005161) + + W(4, 988341556, 1058886039, 1080471640, -1067455811) + W(5, -1105716188, -1113532913, 1033223294, -1106855886) + + W(6, -1122432623, 1043257251, 1049657622, -1080970083) + + W(7, -1091987171, 1051213388, -1109593637, 1037720533) + W(8, 1031639360, -1110426684, 981320073, -1105451271) + + W(9, 1044181951, -1110623014, 1038375349, 999181553) + W(10, -1122715046, 1030027284, 1022014335, 1012008135) + + W(11, -1121988626, 1009779611, -1113750985, 1028099986); + WS(1067317974, -1128063738); + sum1 = W(0, -1112292702, 1030230219, 1023947008, 1037678225) + W(1, 1016840879, 1032095758, 1018620213, 1023530868) + + W(2, 1047000343, 1008892739, 1038318737, -1110797816) + + W(3, -1118959774, -1114918032, -1114992535, 1004226268) + + W(4, -1088068429, -1130941083, -1122235065, 1055157388) + + W(5, 1048428783, 1052878187, 1036886165, 1052393190) + W(6, 1048838463, -1108932456, 1034700890, -1096039434) + + W(7, -1113377160, -1088738347, -1100769370, -1082056652) + + W(8, -1121751190, 1013016624, -1165199081, 1039874719) + W(9, 1031603132, 1045859362, 1028131502, 1050791676) + + W(10, 1027999791, 1017454988, 1031463650, 1025959551) + + W(11, 1018062255, -1143147336, 1018649043, -1115371169); + sum2 = + W(0, 1017164622, 1004731920, 1046790460, -1106036842) + W(1, 1036535757, -1103678216, 1025471738, -1122761780) + + W(2, -1091622621, 1038964407, 1049069382, 1047078409) + + W(3, -1107481011, -1098544714, 1040369699, -1107095641) + + W(4, 1072454217, -1113390241, -1080520963, -1071223185) + W(5, 1059476479, 1061994978, 1053016271, 1054597152) + + W(6, 1067262657, -1112910469, 1044434118, -1073367678) + W(7, -1099641871, 1029437169, 1051233508, 1062276141) + + W(8, -1098954881, 1038887363, 1033204278, -1106847320) + W(9, 967303234, 1030771421, -1123160801, -1113404397) + + W(10, 1035940089, -1108193245, 1032575148, -1127548642) + + W(11, 1015223408, -1109007913, 1042599225, -1116480773); + WS(-1095745367, -1102532016); + sum1 = + W(0, -1117700567, -1125050061, -1104292247, 1032821374) + W(1, -1106685309, 1026094807, -1109617979, 1025256272) + + W(2, -1131139906, -1115476726, 1044141479, 1043020835) + W(3, 1033588284, 1027591436, 1031476842, -1109574827) + + W(4, -1104729417, 1049785973, -1096094522, -1103984511) + + W(5, -1135965844, -1092452582, 1035377859, -1108767399) + W(6, 1031266390, 1034862742, 1043929817, 1025371182) + + W(7, 1048857569, 1049600882, -1127826823, 1045422790) + W(8, 1034962037, -1108925228, -1121682994, 1003529681) + + W(9, -1108409524, 1029191534, -1105178749, -1147768356) + + W(10, -1123836106, 1021444802, -1112024639, 1036065165) + + W(11, -1114922921, 1028793076, 1025559229, 1027631502); + sum2 = W(0, -1102917712, -1090566997, -1083236080, -1084121809) + + W(1, -1092542987, -1098664696, -1115432352, -1109069600) + + W(2, 1050493331, 1058641835, 1072277942, 1066117726) + W(3, 1055792221, 1016159325, -1123932559, -1107091755) + + W(4, -1104683957, -1106617401, -1083529216, -1110147426) + + W(5, 1053260074, 1047496584, 1040670728, 1041712051) + W(6, 1038417192, -1131983160, 1026745412, -1097349120) + + W(7, -1092834676, 1048447149, -1110071086, 1019306140) + + W(8, 1032679083, -1117134944, 1030312114, 1045086676) + + W(9, -1115504874, -1103742512, 1018575153, 1023956348) + + W(10, -1124032472, -1142618034, 1033461689, -1107466492) + + W(11, 1040055828, -1111688997, 1025562022, 995416610); + WS(1064673964, 1027541745); + sum1 = W(0, 1040862122, -1105951646, -1159900529, 1047048323) + + W(1, -1123048515, -1115789415, 1032172675, 1028373376) + + W(2, 1042078126, -1104573695, -1104241265, -1111978190) + + W(3, -1096920009, 1032018046, 1022298419, -1099040790) + W(4, 1060076351, 1032906140, 1059885441, 1064315273) + + W(5, 1059865934, 1050382000, 1053685923, 1058530774) + + W(6, -1092017101, -1098380055, -1084208856, -1089015252) + + W(7, -1085419935, -1095947620, -1104053605, -1087017843) + + W(8, 1050924251, -1103234546, 1045405901, 1048739529) + W(9, 1043557114, 1032498465, 1042167691, 1037400788) + + W(10, -1113513485, -1122608113, -1114509959, -1097330869) + + W(11, -1130206982, 1009712656, 1011282770, -1099616447); + sum2 = + W(0, 1039738446, -1096514001, 1067583917, 1083413745) + W(1, 1067818743, -1089267428, 1050540062, -1102208699) + + W(2, -1137481092, 1050849818, 1017170018, 1041988969) + W(3, -1115429835, 1054896719, -1108022123, 1042362507) + + W(4, 1036746190, -1109747727, -1078331016, -1066189939) + + W(5, -1077977055, 1050059376, -1119460484, -1140654200) + + W(6, 998253152, -1102987618, 1044897487, -1088427847) + + W(7, -1125950758, -1108032139, 1038480975, -1098634922) + + W(8, 1035100433, -1144728624, -1108804343, 1030429056) + + W(9, -1124211336, 1045807183, -1164441214, -1138500556) + + W(10, 1029249908, -1103405950, -1113657787, 1049519441) + + W(11, -1105416150, -1104699523, 1036658641, -1135144148); + WS(-1084384556, -1100810808); + sum1 = W(0, 1032390692, 1033258828, 1036208454, -1153131217) + W(1, 1035878930, -1113170436, 1034716867, 1008810370) + + W(2, -1101205955, -1114756638, 1034225747, -1123281148) + + W(3, 1052764669, -1110681734, -1132636906, 1010363102) + + W(4, -1095424763, -1115332589, -1089988048, -1080799329) + + W(5, -1086451675, -1089641270, -1111992667, -1089944572) + + W(6, 1056723089, 1044941511, 1059213081, 1058219330) + W(7, 1065718984, 1051546784, 1048907447, 1051751177) + + W(8, -1103994552, -1103092466, 1016698694, -1102604175) + + W(9, 1010056863, -1105901266, -1104305197, 1025009925) + + W(10, -1113631133, 1038921097, 1041433148, 1029458059) + + W(11, 1046701849, -1114617106, 1022756491, 1042675492); + sum2 = + W(0, 1003892755, -1123544386, 1040199158, -1099919704) + W(1, 1046124074, -1114201767, -1137485313, 1018916205) + + W(2, -1106845972, 1040443741, 1041556166, -1098641213) + W(3, 1043027552, -1102894487, 1031291554, 1029095170) + + W(4, -1130802613, -1099129814, 1034864555, 1044973210) + W(5, 1046986152, 1040886200, -1097152858, 1035815457) + + W(6, 1042856976, 1046395618, 1049252005, 1051151809) + W(7, -1120255910, -1098031952, -1103612985, 1034278467) + + W(8, -1090921483, -1117156960, -1101642940, -1094482604) + + W(9, 1049495257, -1113386876, -1132183417, 1026205590) + + W(10, 1035865477, -1128022157, 1029291710, 1033022953) + + W(11, 1028643166, 1031856421, -1106468948, 1020718685); + WS(-1097545175, -1081485407); + sum1 = + W(0, 1036015463, 1036906638, 1033438601, -1148467606) + W(1, -1116883369, 1043481795, -1107248831, 1035852867) + + W(2, 1034150438, -1107749409, 1025404544, -1130068931) + + W(3, -1110996648, -1129433572, -1112307200, -1133343797) + + W(4, 1057909318, 1041348600, 1058891216, 1062920275) + W(5, 1058446394, 1057530498, 1041812911, 1058924580) + + W(6, -1085934785, -1091465804, -1088212159, -1085185870) + + W(7, -1083820335, -1097522985, -1098817647, -1088845017) + + W(8, 1035924004, -1120240750, 1040930578, 1044931644) + W(9, -1110014855, 1046628778, -1112689630, 1031062731) + + W(10, 1033963168, -1115496908, 1023737332, 1034996968) + + W(11, -1109644005, 1044774695, -1109394217, 991839047); + sum2 = + W(0, -1085422352, -1073537081, -1090145600, -1103264022) + + W(1, -1098060014, 1039561049, -1111561066, -1097648243) + + W(2, -1096165509, -1097506962, 1035648589, 1037776879) + + W(3, -1106454260, -1132410675, -1095706003, -1093619532) + + W(4, 1057773529, 1050305732, 1070820271, 1064009122) + W(5, -1102687355, 1037201191, -1106323076, 1065891070) + + W(6, -1090487063, 1042351306, 1042246902, 1057815879) + W(7, 1056492080, -1132940238, 1047053654, -1104795343) + + W(8, 1044955346, -1096519547, 1012707558, 1035447241) + W(9, -1098251612, 1055262754, -1098685023, 1017091667) + + W(10, 1048850208, -1098077313, 1040847724, 1039355809) + + W(11, -1128314591, 1034334977, -1107078913, 1044351754); + WS(-1079771574, 1075069839); + sum1 = W(0, 1032166989, -1111167476, -1123178206, 1032734815) + + W(1, 1029165557, -1112820125, -1135576864, -1114861753) + + W(2, -1118529175, 1021133405, -1093562003, -1104246611) + + W(3, -1098100446, -1114628655, -1133453360, -1119446905) + + W(4, 1055702741, 1048648437, 1062276827, 1071187580) + W(5, 1061220800, 1049405702, 1042497282, 1051379002) + + W(6, -1092970539, -1092445782, -1088433116, -1118830448) + + W(7, -1097110780, -1093425874, -1109854691, -1094782027) + + W(8, -1117950828, 1033539229, -1104828877, -1114529494) + + W(9, -1112849365, 1023590922, 992807281, -1104282614) + + W(10, -1113551430, -1142239002, -1110397888, -1119974813) + + W(11, -1139975764, -1111568211, 974859957, -1107804608); + sum2 = + W(0, -1108373663, 1008476139, -1104454680, 1048684757) + W(1, -1093428939, 1053127551, -1096653504, 1052320010) + + W(2, 1058265898, -1092753943, 1052155727, -1094584251) + + W(3, 1044002260, 1027921055, -1109762731, -1093142139) + + W(4, -1105791278, -1098184263, -1106622092, 1051207789) + + W(5, 1040647411, -1092246791, 1028193495, -1102532444) + W(6, -1097633487, 1036219103, 1039768739, 1059813061) + + W(7, 1037804238, 1058749571, -1091622541, 1056925595) + W(8, 1040059628, -1096394805, 1043022076, -1089567461) + + W(9, 1053976656, -1109099791, -1128546349, -1105143398) + + W(10, 1041333883, -1106791026, 1034264586, 1043988712) + + W(11, -1094438691, 1046191998, -1105323978, 1027895571); + WS(-1080085654, 1070612946); + sum1 = W(0, 1024243697, -1118763904, -1129448298, -1125372076) + + W(1, -1130529432, -1113325424, 1028582463, -1115811081) + + W(2, -1111019203, 1017624971, 998267115, 1049904509) + W(3, 1044325341, 1036368923, 1022422215, 1029887376) + + W(4, -1099863023, -1093242824, -1096657110, -1083526619) + + W(5, -1084273056, -1089131829, -1100657096, -1095229037) + + W(6, 1051294614, 1049086721, 1054244159, 1063525996) + W(7, 1060516258, 1052725165, 1045179095, 1047234173) + + W(8, 1018553499, -1110032568, -1107816470, 1050014740) + + W(9, -1120428313, 1042939778, -1105723916, 1034815359) + + W(10, 1001929499, -1129474933, -1103030358, 1034265690) + + W(11, 1028809164, -1137068591, 995816851, 1023513072); + sum2 = + W(0, 1027629060, 1025235210, -1125103522, -1130467636) + W(1, 1000499297, 1020465580, -1138182160, 1014590712) + + W(2, -1114994936, 1042086314, -1109536432, 1043514454) + + W(3, -1116030212, 1034381305, -1133867384, 1032991925) + W(4, 1044032624, -1110089750, 1027377576, 1040857173) + + W(5, 1049094807, -1102199059, 1040209410, -1107097337) + W(6, -1120685804, 1041443307, 1041551160, 1078385077) + + W(7, 1071531230, -1097624207, 1042950964, -1110894461) + + W(8, 1028515824, -1109301707, 1038947979, -1069674168) + + W(9, -1072009528, 1042363648, -1100125727, 1020171064) + + W(10, -1128032940, 1028557604, -1095260923, 1041445431) + + W(11, 1044521776, -1111317304, 1026231984, -1115643044); + WS(-1094677847, 1071331518); + sum1 = W(0, -1108370254, 1027561514, -1098732332, -1118400576) + + W(1, -1098654343, 1020701942, -1114608461, -1102267173) + + W(2, 1050522446, -1102155388, 1051883284, 1052444946) + W(3, 1048632224, 1036179296, 1043748547, 1046936641) + + W(4, -1090412394, -1096900614, -1088235704, -1081990504) + + W(5, -1083209683, -1095695623, -1096819473, -1088405429) + + W(6, 1059909490, 1033926552, 1060113893, 1065971548) + W(7, 1057796766, 1055081266, 1048355474, 1057781222) + + W(8, -1131659964, -1108396174, -1104366446, 1016076284) + + W(9, -1094058291, 1044926426, -1102235562, -1122894705) + + W(10, 1045668787, -1099365497, 1036562753, 1045429432) + + W(11, 1027649399, -1115499084, 1020598162, 1041162909); + sum2 = + W(0, -1102306900, 1039340517, 1036459319, 1032447827) + W(1, -1099327251, 1052797014, -1122082449, -1098452307) + + W(2, 1042298222, 1011988276, -1104503945, -1099734245) + W(3, 1032726509, 1032685073, 1045894449, 1031495568) + + W(4, 1021581591, -1091773202, -1086047943, -1071833762) + + W(5, -1087097517, -1094401898, -1089730964, -1094627244) + + W(6, -1097774924, -1083547573, -1076320925, -1070847208) + + W(7, -1079468544, -1084551630, -1096261152, -1097740674) + + W(8, 1051289508, 1050176620, 1066124397, 1069182349) + W(9, 1058492703, 1061745990, 1029975482, 1062784611) + + W(10, 1044651623, 1060043835, 1072230275, 1080482397) + W(11, 1067612874, 1060905071, 1035766217, 1036170953); + WS(-1075403638, -1098484659); + sum1 = + W(0, 1008218969, 1031048294, -1123131291, 1045188911) + W(1, -1123984690, 1016373997, 1038284987, 993498863) + + W(2, -1141902179, 1016567255, -1102849342, 1018204181) + + W(3, -1097928441, -1102752375, -1119205801, -1111653947) + + W(4, 1045612697, 1051272109, 1045999461, 1067242878) + W(5, 1057827361, 1039307800, 1052680217, 1048974634) + + W(6, -1098198307, -1097302541, -1089514713, -1083074125) + + W(7, -1090995143, -1089521325, -1111274305, -1089001074) + + W(8, -1115216112, 1045061466, -1110301017, 1055219615) + W(9, 1042127630, 1018247819, 1043301703, -1112012330) + + W(10, 1014117412, 1032424528, -1117285565, 1039741805) + W(11, -1138318152, 990571063, 1035406958, 991913685); + sum2 = W(0, 1032878555, -1123900432, 1029453781, -1120535472) + + W(1, 1040510410, -1109544472, 1036988619, -1113659102) + W(2, -1136655860, 1023477569, 993352402, 1007422372) + + W(3, -1110556919, -1138628828, -1137184004, 1035524869) + + W(4, 1031615749, 1032172309, -1101515073, 1063516373) + + W(5, -1114866186, -1101753271, 1046060209, -1110546008) + + W(6, -1113247036, 1040892014, -1093800370, 1059067488) + + W(7, 1049733424, -1112478460, -1128679382, -1106958218) + + W(8, -1102324444, 1041677780, -1099560811, -1101931537) + + W(9, -1091691569, -1137342172, -1112686045, 999359657) + + W(10, 1033821831, -1122117887, -1111754290, -1118366060) + + W(11, -1131546954, 1043560141, -1110783375, 1026284797); + WS(1018938736, 1060529869); + sum1 = + W(0, -1114076530, 1008598737, 1029696041, -1118730378) + W(1, 1036159584, 1012443924, 1034257674, -1133449206) + + W(2, 1037607357, -1120766810, -1113865353, 1019917938) + + W(3, 1041300082, -1119787914, -1102751430, 1018668169) + W(4, 1045714156, 1044442542, 1057228923, 1049628033) + + W(5, -1081803320, 1042784239, 1016494120, 1031639516) + W(6, -1166977277, 1024586298, -1084710912, 1048848151) + + W(7, 1052825757, -1110703311, -1108049641, -1111061268) + + W(8, -1102825464, 985048227, 1048654858, -1109196652) + W(9, -1101625253, 1021290536, 1039349129, 1018121541) + + W(10, 1003359422, 1019680094, -1122235660, 1032134772) + + W(11, 1023446241, 1025035938, -1122160707, 1028394476); + sum2 = W(0, -1146778654, 1009999826, 1042445717, 1027431889) + + W(1, -1118789096, 1034933139, -1102516465, 1039233552) + + W(2, 1033337426, -1117028555, -1092512747, -1100415596) + + W(3, -1100913756, 1031653672, 1049736808, -1123203031) + + W(4, -1101837401, -1107426057, -1106280442, 1063146362) + + W(5, -1086144449, -1121957589, -1104136456, -1123128270) + + W(6, 1023259685, -1113617305, 1063964069, 1058459732) + + W(7, -1101061454, -1139753565, 1034026738, -1143324392) + + W(8, -1165315756, 1042598704, -1101414619, -1136881638) + + W(9, -1099212326, 1035181957, 1031514566, -1103321885) + + W(10, 1017616818, -1119841713, 1033400884, -1110929163) + + W(11, 1043348599, -1110261015, -1119239229, 1032933914); + WS(1057790316, -1113005641); + sum1 = W(0, -1123468520, 1016842734, 1017823704, 1031334797) + W(1, 1029517379, 1032276500, 1024531558, 1032934320) + + W(2, 1034968039, -1180462681, -1141354132, -1111663246) + + W(3, -1096962432, -1102337515, -1112383277, -1115337892) + + W(4, -1107161453, -1109372445, -1097660717, 1060769665) + + W(5, 1062816796, 1057344992, 1050585827, 1048852876) + W(6, 1035442035, -1126387193, 1045333479, -1097006056) + + W(7, -1085875335, -1087615930, -1099229077, -1104188731) + + W(8, -1107573447, -1110219956, -1111689634, 1016331253) + + W(9, 1042106418, 1042154624, 1021430571, 1007994773) + W(10, 1040623159, 1001956750, 1034883047, -1121213099) + + W(11, 999707420, -1110655190, 1027213229, -1125102461); + sum2 = + W(0, 1027505374, -1119743709, 1022357321, -1129702556) + W(1, 1025908250, -1116658238, 1007287620, -1132286080) + + W(2, 1006384241, 1032385395, -1110226975, 1009992786) + W(3, -1117540770, 1015280579, 1013929491, -1123806249) + + W(4, 1038217397, -1124784904, 1036163691, 1038388564) + W(5, -1107551412, 1043662427, -1110875284, 1032548500) + + W(6, -1102596332, 1055108824, -1095998103, -1096068823) + + W(7, 1027585837, 1052401764, -1112465838, 1049275421) + + W(8, -1088198084, -1101781911, -1077861124, -1082211809) + + W(9, 1067639721, 1063419774, 1041946916, 1054953848) + W(10, 1054112069, -1110486748, 1063443275, 1060625206) + + W(11, -1095993202, -1084462256, 1040943353, -1088060635); + WS(1057314092, 1031126097); + sum1 = W(0, -1113264897, 1042921300, -1118216326, 1036778483) + + W(1, 1035020911, -1108928881, 1036290288, 1012907241) + W(2, 1047286031, 1038748523, 1040626286, 1048990343) + + W(3, 1036098930, 1042428894, 1016422504, 1041504752) + W(4, 1051515170, 1044373061, 1057172805, -1111196723) + + W(5, 1053560288, 1053915059, 1044112316, 1048512334) + + W(6, -1091122512, -1119263830, -1081529962, -1076778621) + + W(7, -1085296972, -1112351005, -1100809115, -1107245185) + + W(8, 1035529026, 1016709539, 1043973191, 1049229393) + W(9, 1046938632, -1123285785, 1035877806, -1123727583) + + W(10, -1125355113, 1029610636, 1029771624, -1112976162) + + W(11, 1041831703, -1103970660, 1044125996, -1123672296); + sum2 = W(0, -1110279027, 1044898087, -1096945908, 1039092196) + + W(1, -1113241071, -1099008684, 1044106833, -1103116396) + + W(2, -1116888772, -1115916604, -1107202311, 1045884385) + + W(3, -1103988696, 1049112986, -1123386226, -1134049769) + + W(4, 1033554595, -1111117443, 1035778723, -1113256321) + + W(5, -1115920774, -1139004677, -1112080343, -1114347803) + + W(6, -1108669327, 1045218355, -1103416606, 1065794034) + + W(7, -1090569433, 1055599565, -1099737884, 1044073709) + + W(8, 1015808739, -1106102431, 1041666953, -1107247823) + + W(9, 1049716916, -1106016659, 1031391070, -1108799013) + + W(10, 1027314221, -1116221606, 1024714919, -1098987808) + + W(11, 1049758114, -1091770239, 1053158291, -1105054228); + WS(-1078536214, 1004530797); + sum1 = + W(0, 1027375859, 1017444741, -1120029560, 1000019140) + W(1, -1114543207, 1013557197, -1122136338, -1148145247) + + W(2, -1101920227, 1031432968, -1106564803, 1048371434) + W(3, -1118108517, 1042631950, 1018155212, 1016114854) + + W(4, 1025777344, -1103056081, -1107107772, -1082038651) + + W(5, -1087843395, -1090887738, -1103973545, -1102912795) + + W(6, 1044389645, 1048716617, 1051154454, 1066595410) + W(7, 1055824714, 1054676989, 1041035048, 1016296812) + + W(8, -1110651587, -1137653198, -1107166356, -1130046381) + + W(9, -1105265945, 1030116559, 1023828021, 1040191506) + + W(10, 1018050380, -1125959547, 1019570356, -1125449965) + + W(11, 1033235496, -1126792065, -1122035417, -1142050258); + sum2 = + W(0, 1027035378, -1132701065, -1114259972, 1024730711) + W(1, 986229224, 1013886009, -1129435432, -1141056234) + + W(2, 1026113022, 1034348094, 1019537869, -1139897757) + + W(3, -1140805741, -1118691470, 1009541569, -1129312440) + + W(4, -1111907604, 1044772326, -1115459713, -1083470341) + W(5, 1024256637, 1050967575, 1013091981, 1028089297) + + W(6, -1117578886, 987582920, -1091701771, -1064559027) + W(7, 1083026821, 1062853848, -1097816209, 1036032976) + + W(8, -1140538653, -1136739637, -1106970806, 1035917626) + + W(9, 1051507274, 1035548789, 1030786995, -1102804079) + W(10, 1018825264, 1015195718, 1029368182, -1129456736) + + W(11, 1025912688, -1107265712, 1031335734, -1115863996); + WS(1053759831, 1032374114); + sum1 = W(0, 972229904, -1109228149, 1034978575, 1041049216) + W(1, -1120680758, 1029299866, -1104501302, 1034604409) + + W(2, -1121671930, -1112583528, 1034149296, 1029443406) + + W(3, 1029497788, -1098177536, -1147362406, -1129221556) + + W(4, 1054724605, -1112974870, 1061211455, 1048871720) + W(5, 1059772533, 1052523382, -1143596105, 1050895314) + + W(6, -1092393035, -1101934908, -1092299422, -1093922438) + + W(7, -1094240300, -1091878482, -1112394307, -1097371836) + + W(8, 1018308729, 1027590122, 1034030567, -1111847647) + + W(9, 1041914031, -1113968603, 1028043992, -1126204095) + + W(10, -1129466650, 1029939172, -1113423424, 1038901743) + + W(11, -1106909549, 1040006220, -1140269215, 998375305); + sum2 = + W(0, -1150066816, -1122485893, -1114773240, 1024800634) + W(1, 996682648, -1123997738, 1019128023, -1127165963) + + W(2, -1150466984, 1032358015, 1050624105, 1086980496) + W(3, 1049846439, -1116390866, 1016213345, -1127449775) + + W(4, 1023864952, -1120268940, -1092046440, -1062089081) + + W(5, -1092228228, 1022750294, 1015347232, 1000268865) + W(6, -1110066210, 1042589181, 1028033997, -1086968029) + + W(7, 1024724796, 1041929871, -1114433107, -1139900476) + W(8, 1023825317, 1027888694, -1151325172, 1038366470) + + W(9, -1121152696, -1118357792, 1031614438, -1136800730) + + W(10, -1124858867, -1130877326, -1135440218, -1140173368) + + W(11, 1022098295, 1017805328, -1119866592, 1025894920); + WS(1063005484, 1009613411); + sum1 = W(0, 1023879031, -1116658758, 1016433014, 1020845368) + + W(1, 1014732617, -1133265115, -1123204757, 1010953149) + + W(2, -1099686359, -1121736524, -1101192360, -1114633573) + + W(3, -1095872969, -1106126199, -1114430117, -1135381594) + + W(4, 1052137768, 1047373394, 1060630207, 1066103327) + W(5, 1062822060, 1059882369, 1050777829, 1058652475) + + W(6, 1046007565, -1096622041, -1088767007, -1084203279) + + W(7, -1090088910, -1090037515, -1103091289, -1092699671) + + W(8, -1104853008, 1032552475, -1107092245, 1041296218) + + W(9, 1027255591, -1131360500, -1122457924, -1108476770) + + W(10, 1005501772, -1119708225, -1118541917, -1116191290) + + W(11, 1022596005, -1115075910, 1022253604, -1114779071); + sum2 = + W(0, 1018912922, -1115513506, 1035927400, -1104590179) + W(1, 1040736265, -1107721632, 1025188211, -1112944230) + + W(2, 1027578843, -1122957333, 1036987406, -1100032664) + + W(3, -1110179624, -1125426330, -1103038285, 1043904940) + + W(4, 1033809184, -1107632498, -1102289594, 1055409420) + + W(5, 1055289786, -1112721987, 1036015348, -1110109848) + W(6, -1094114890, 1001948858, 1056054406, 1048553428) + + W(7, 1051787669, -1094744691, 1032878358, -1118996080) + + W(8, -1100264522, 1024979575, -1110397259, -1121465059) + + W(9, -1147035242, 1045301422, -1131004626, -1124556646) + + W(10, -1107937173, 996744179, -1110541201, -1114033403) + + W(11, 1041331760, -1113161109, -1108457170, 1028913131); + WS(-1088267692, 1058858468); + sum1 = W(0, -1112379634, 1029162995, -1140270752, -1141342278) + + W(1, -1155361775, -1113272402, -1140210569, 1015213304) + + W(2, 1030761249, 1004146884, -1122297718, 1040340278) + W(3, 1036240851, -1129581456, 1042224566, 994119730) + + W(4, 1050799110, 1045209037, 1060358148, 1057423488) + W(5, 1058630580, 1044230767, -1104611845, -1109863885) + + W(6, -1099191401, -1095201623, -1091143388, -1083233902) + + W(7, -1098879351, -1096101987, -1122560784, -1109785875) + + W(8, 1033839199, 981108966, 1043211072, 1032118859) + W(9, 1032700466, 1030417559, -1140846885, 1009738084) + + W(10, -1111715307, 1002675052, -1120236170, -1118812007) + + W(11, 1017441470, -1148456452, -1124250818, -1131671893); + sum2 = W(0, -1113443438, 1023812788, -1131593767, -1112801638) + + W(1, -1105757720, -1107427633, -1107137147, 1049954034) + + W(2, 1021821461, 1044300514, -1104994806, 1060658961) + + W(3, 1044687616, -1087213560, -1093064102, -1093773442) + + W(4, -1122246023, -1104002403, 1038382254, -1099826888) + + W(5, 1066144433, 1057721748, -1097329958, 1039897892) + + W(6, -1146734500, 1024116650, 1049088332, -1089766136) + + W(7, 1042354562, -1129092349, 1044181933, -1105235988) + + W(8, -1128559783, -1115812447, -1105739887, 1025686404) + + W(9, -1117250635, 1035306954, -1106719387, 1040800736) + + W(10, -1125128801, -1134793082, -1131329759, 1034755166) + + W(11, -1114066084, 1031417186, -1117543346, -1128306605); + WS(1042978478, 1051058289); + sum1 = W(0, -1105382966, 1035278676, -1102112151, -1102711504) + + W(1, -1113404858, -1111458068, 1038103836, -1101215220) + + W(2, 1032357083, 1023966158, -1113794846, 1052488610) + W(3, 1046698475, 1025992701, 1042579843, 1026938905) + + W(4, -1099095273, -1110890773, -1087097191, -1081382120) + + W(5, -1086454106, -1097860134, -1105233954, -1091436978) + + W(6, 1056839899, 1050844654, 1060126757, 1063120271) + W(7, 1060142827, 1054395618, 1048420817, 1056719627) + + W(8, -1122236410, 1019140850, -1107056589, -1150461374) + + W(9, -1103751812, -1120625160, 1025690583, -1112415123) + + W(10, 1024938700, -1122653091, -1122892351, -1119829084) + + W(11, 1007453138, -1106500335, 1032216339, 1010325087); + sum2 = W(0, 986675002, -1112567440, 1033516168, -1097585273) + W(1, 1031672616, 1029453516, -1106591291, 981423732) + + W(2, -1098687074, 1041166801, -1099338776, 1019263552) + + W(3, -1108963639, -1104454378, -1109440188, 1024870634) + + W(4, 995475005, -1097111455, 1062728692, 1066654492) + W(5, 1034345380, -1124466258, -1092068145, 1048117247) + + W(6, -1115454695, -1132868679, -1120015952, 1054175336) + + W(7, 1038275134, -1096244103, 1039104546, -1119846616) + + W(8, -1110655697, -1114152579, 1023628514, -1097107880) + + W(9, -1118909665, 1049130538, -1093717692, 1042494451) + + W(10, 1018060844, -1101427932, 1045275251, -1098419362) + + W(11, 1049612882, -1100742770, 1000911598, -1140045927); + WS(-1097071959, 1072623846); + sum1 = W(0, 1026094363, -1114366565, 1026372640, -1127140609) + + W(1, 1020533357, -1164090721, -1143600974, 993477434) + W(2, 1010541764, 1023927408, 1034504814, 1038480008) + + W(3, 1046400295, 1040252248, 1040642843, 1032065949) + + W(4, -1087379998, -1100421531, -1091057687, -1086095524) + + W(5, -1083615449, -1089701071, -1093225865, -1090227881) + + W(6, 1028105114, 1051865478, 1057630379, 1064048287) + W(7, 1061319210, 1056141130, 1049626241, 1047950199) + + W(8, 1046169702, -1105971271, 1041980470, -1104375598) + + W(9, -1113258549, -1149976804, -1120849689, 1048635489) + + W(10, -1135291959, -1125008010, 1012391943, 1011497593) + + W(11, -1126436590, 1008261592, -1126913777, 1026040178); + sum2 = + W(0, 1012042818, 1021360395, -1107209227, 1027378250) + W(1, -1105921842, 1041772128, -1127367716, -1127019644) + + W(2, 1045767812, -1098807230, 1044720012, 1042281858) + W(3, 1016599978, -1127223374, -1115783877, 1027816090) + + W(4, -1063490706, -1081789373, 1074390906, 1079115935) + + W(5, 1058048856, -1108782236, -1128008216, 1032186898) + + W(6, -1101778611, -1114664520, 1049922132, -1094869567) + + W(7, -1118592797, -1121088549, -1137516645, -1111496361) + + W(8, -1112785819, 1033999610, -1110057121, 1045861970) + + W(9, 1034050759, -1129798306, 1033434514, -1136389257) + + W(10, 1012780879, 1021391529, -1130294258, 1031106226) + + W(11, -1122317121, 1025125464, -1149825053, 1031888403); + WS(-1103312814, -1092017335); + sum1 = + W(0, -1127703091, 1018146514, -1121799134, 1017072510) + W(1, -1131539818, -1112727125, 1017229265, -1109754009) + + W(2, -1101144796, -1115460311, -1106383343, -1133230391) + + W(3, 1034901054, 1027622014, 1018005017, 1034197373) + W(4, 1054306333, 1039997174, 1024945271, -1088288960) + + W(5, -1088630417, -1096972398, -1101045202, -1098488314) + + W(6, -1126801713, 1026063214, -1131110905, 1059941407) + W(7, 1061737047, 1057455253, 1048616129, 1051714580) + + W(8, 1023075088, 1027296474, -1132250026, -1113425894) + + W(9, -1097522374, -1097018828, -1112216732, -1142469689) + + W(10, 989243788, -1122138660, 1032388672, 982272924) + W(11, 1021266411, -1135487483, -1117699788, 995701513); + sum2 = W(0, 1015133861, 1037113072, -1114822552, 1028491873) + + W(1, -1105557810, 1038591890, -1105949948, 1032708826) + + W(2, -1105189044, -1115931822, -1098364789, -1113527921) + + W(3, 1024139573, -1112247318, 1007564387, -1120342494) + + W(4, -1093732269, -1102686481, 1041653115, 1065997875) + + W(5, -1130473293, -1098512761, -1125828389, -1125733097) + + W(6, -1094162786, -1113599737, 1041635421, 1064813540) + + W(7, 1037786550, 1036196676, -1114108758, -1132225813) + + W(8, -1109540616, 1040400825, -1118060506, -1123044799) + + W(9, -1105450383, -1106060901, -1118151575, 1010062515) + + W(10, 1030009493, -1114813207, 1027616765, -1116517245) + + W(11, 1021543889, 1005718790, -1122858549, 1029912177); + WS(1054094679, 1053313313); + sum1 = W(0, -1133457438, -1120986711, 1034987238, 1028444909) + + W(1, 1035642130, 1018062863, 1034710931, -1149170946) + + W(2, -1123263126, -1130558777, -1097984725, -1096198619) + + W(3, 1007426149, 1002886242, -1123562906, 1018181775) + W(4, 1046955525, 1034791245, 1059693200, 1056744813) + + W(5, -1111414009, 1030506913, -1113706235, 1033357311) + + W(6, -1124016519, 1044418743, -1102062065, -1098120268) + + W(7, -1111514230, -1104691680, 1011520719, -1107703301) + + W(8, 1039131745, -1102726978, -1136653836, -1097784815) + + W(9, -1121981368, 1038384226, -1114621003, 1012043028) + + W(10, -1120644986, 1007447622, -1119688622, -1150742701) + + W(11, 1028941155, -1124505481, 1028268649, 1016894461); + sum2 = + W(0, 988916677, -1137939637, -1123566910, 1028209713) + W(1, -1123858844, 1036944358, -1119538750, -1142606817) + + W(2, 1035831303, -1140903881, -1103092742, -1131746560) + + W(3, -1101579019, 1038814302, -1130948272, 1038676356) + + W(4, -1110571087, 1038296707, -1107155945, -1117714466) + + W(5, 1038803820, -1103151768, 1032643961, -1111989951) + W(6, 1034464782, -1090860425, 1056968489, 1064742714) + + W(7, 1040557782, -1120375726, -1111036663, -1122089622) + + W(8, -1106828560, 1044389789, -1079651989, 1050942503) + W(9, 1043224862, -1130430804, 1041889175, 1014857001) + + W(10, -1178538002, 1041756630, 1029915845, -1106236653) + + W(11, -1110415601, 1019097405, -1132733633, -1140337549); + WS(1060301740, -1107262085); + sum1 = W(0, 980901136, 1027361860, -1140723427, 1026764890) + W(1, 1040866884, 1025665684, 1033047081, -1120424813) + + W(2, 1034782831, 1047173818, 1039236098, -1167399456) + + W(3, -1096504911, -1107799148, -1104388354, 1042507977) + + W(4, -1105836176, -1090641081, -1089836727, 1057068694) + + W(5, 1061579759, 1058017849, 1050545524, 1043225643) + W(6, 1036556474, 1050207945, 1053103024, 1041462691) + + W(7, -1084387656, -1086651203, -1099258584, -1106352190) + + W(8, -1123655719, -1104237805, -1097134658, -1168491986) + + W(9, 1041789973, 1041962562, 1042093191, -1107448273) + + W(10, -1123449511, 1032390687, 1023266998, -1128069487) + + W(11, -1111341496, -1121577073, -1128080260, -1118302731); + sum2 = W(0, -1120672932, 1020434122, 1029602171, 986372169) + W(1, 1039121856, -1113202257, -1139183469, 1010867474) + + W(2, 1046006360, -1097339905, -1114812458, -1107596062) + + W(3, 1036462865, 1042494496, -1097212089, -1111658779) + + W(4, -1112452525, 1059308780, 1040180724, -1073685765) + + W(5, 1068230783, 1060720662, -1116372590, -1096232759) + + W(6, -1103760793, 1054394959, 1058139034, -1075031098) + + W(7, 1055263295, 1065727212, -1106882983, -1102585430) + + W(8, -1135937657, 1038148088, -1103543044, -1098429000) + + W(9, 1051067274, -1106499347, -1127997619, 998537328) + + W(10, 1037670355, -1114765629, -1120562291, -1131755817) + + W(11, 1034635900, 1010504894, -1127537321, -1141626877); + WS(-1096709719, -1109932402); + sum1 = + W(0, 1015250582, -1109965037, 1037158888, -1115453048) + W(1, 1032345875, -1113173429, 1033146133, -1109546082) + + W(2, -1143431239, 1015070528, -1094706154, 1051852512) + + W(3, -1107272167, 1039101648, -1112986991, 1034073405) + W(4, 1041217222, 1044707911, 1032459556, -1103734188) + + W(5, -1116721265, -1100636937, 980311674, 1017051268) + W(6, 1009444904, -1099789672, 1049449359, -1091866852) + + W(7, 1042112892, 1051337808, 1011811037, 1035868647) + W(8, -1127867147, 1028928427, 1015515213, 1049692098) + + W(9, -1094357813, 1036490203, -1118660177, 1019523665) + + W(10, -1115442975, 1033219242, -1119289842, -1140261177) + + W(11, 1041543818, -1107580668, 1027230184, -1121586498); + sum2 = + W(0, -1129366500, 1029756683, -1113040482, 1041050942) + W(1, 1019865458, -1130192114, 1021094310, 1011878989) + + W(2, -1111869912, 1029022998, 1046043365, 1028636589) + + W(3, -1100561355, -1118786886, -1136660469, -1113939693) + + W(4, -1114572049, 1041454921, 1063827282, -1088878874) + + W(5, -1120053198, 1037601458, -1121128994, 1024511741) + W(6, 1047691494, 1051011830, -1075590857, 1052419847) + + W(7, 1053136256, -1142379802, -1115896886, 1033422229) + + W(8, 1024096486, -1094989992, -1094007689, 1049374228) + W(9, 1051405216, -1105102628, 1030093181, 1038213156) + + W(10, -1112777200, 1033382685, 1046068934, 1022134325) + + W(11, -1104418826, 1039472389, -1173694926, -1122209396); + WS(1065853238, 1014077745); + sum1 = W(0, -1141216588, 1026623353, -1128763228, 1020370461) + + W(1, 1026101893, -1118113002, 1032019108, -1113706417) + + W(2, -1117433015, -1129058045, 1039902504, 1043759351) + W(3, 1050608829, 1042454284, 1027449020, 965645460) + + W(4, -1094201799, -1096460535, -1083943142, -1077503527) + + W(5, -1089716396, -1098731144, -1095714819, -1094767340) + + W(6, 1050233869, 1049952264, 1051933059, 1055071872) + W(7, 1055823041, 1057515106, 1050055875, 1054942629) + + W(8, 1044254087, 1008011942, 1048823509, 1025148605) + W(9, 1040921250, -1120507502, -1149099066, 1040832262) + + W(10, -1138617969, 1022594791, -1141149825, 1032777398) + + W(11, -1117303781, 1030831063, -1131034510, 1023411184); + sum2 = W(0, 1024492456, -1126703609, 1029353704, 1036516578) + + W(1, 1032071462, -1107363712, 1044306067, -1105180142) + + W(2, -1107369605, 1040389587, -1109233879, -1095300447) + + W(3, -1104963978, -1106122650, -1143085570, 1043865859) + + W(4, -1114418679, 1038677226, 1043636997, 1063429366) + + W(5, -1101740776, -1097459892, 1035238854, -1116890590) + + W(6, -1102696165, -1115181151, -1104069114, 1063594124) + + W(7, -1098164193, 1040620147, -1106357294, -1127904241) + + W(8, -1133142225, -1114014523, -1122395696, -1091978649) + + W(9, 1053626887, -1097146692, 1040976159, -1120794976) + + W(10, -1122518916, 1026327736, 1035228514, 1023044321) + + W(11, -1116601496, -1130578705, 1026795860, -1127926993); + WS(-1088917996, 1063906509); + sum1 = W(0, -1110920777, 1042151912, -1101936374, 1005195703) + + W(1, 1032265367, -1103459368, 1041691572, -1129700693) + + W(2, 1044506738, -1103723194, 1033303784, 1049511025) + + W(3, -1113286290, 1043019742, -1128222025, -1114181825) + + W(4, 1047786311, 1032125650, 1061302810, 1034065915) + W(5, 1048810800, 1054491533, -1112859072, 1049358361) + + W(6, -1095080042, -1113114611, -1088574755, -1080208865) + + W(7, 1049193686, -1121283466, 1024774526, -1147499681) + + W(8, -1134941681, 1025189266, -1115794445, 1048946939) + + W(9, -1126346531, -1103486336, 1015955599, -1097425800) + + W(10, 1033765698, -1104090263, 1041151454, -1113407178) + + W(11, -1109067703, 1044875416, -1121559327, 1029016425); + sum2 = W(0, -1103461552, 1050674327, -1104590141, 1015549042) + + W(1, 1036919049, -1098769683, 1046313441, -1112862464) + + W(2, -1127991186, -1116051994, -1131508502, 1050173609) + + W(3, -1105608002, 1039941075, -1101603735, 1031505597) + + W(4, -1120190757, -1113530372, 1055990720, 1072931527) + + W(5, -1095048932, -1078020039, -1101033642, 1010804308) + + W(6, -1108916090, -1124692174, -1121470095, 1046026095) + + W(7, 1050729023, -1084802111, 1047911325, 1034713295) + W(8, 1012492564, 1042343963, 1032226081, 1048814953) + + W(9, -1098673970, -1101605152, -1113534935, 1017021378) + + W(10, 1029245469, -1115385529, 1026603353, 1024686773) + + W(11, -1108452621, 1046452703, -1112756717, -1119772659); + WS(-1128039792, 1065254279); + sum1 = W(0, 949814928, 1027171091, -1129242384, 1031101353) + + W(1, -1120975327, 1003634843, -1144006416, -1123053628) + + W(2, -1114341117, 1031806764, 1033646238, 1018879791) + W(3, 1042055949, 1033681236, 1023909046, 1027677731) + + W(4, -1094482850, -1095186291, -1085842430, -1092086861) + + W(5, -1084252337, -1089087466, -1099056317, -1095496311) + + W(6, 1052398616, 1051880795, 1058904806, 1068407459) + W(7, 1055432619, 1036562814, 1039311896, 1050962337) + + W(8, 1027872723, -1139824782, 1010706555, -1098849364) + + W(9, -1121195299, 1023772639, -1126801378, 1038931216) + + W(10, -1155305313, 1022544667, -1121892394, 1040052675) + + W(11, -1127339326, 993927654, -1123991661, 1005742098); + sum2 = + W(0, -1120833032, 1034915193, -1115307960, 967112657) + W(1, 1023925021, -1107476032, 1033449936, -1132191993) + + W(2, -1124763415, -1113475142, 1025322201, 1059271888) + + W(3, -1102912552, -1118552210, -1103660545, 1033898849) + + W(4, -1118777010, 1010824003, 1064953874, 1082807588) + + W(5, -1080316189, -1064631277, -1110193476, 1039205719) + + W(6, 1030229073, -1105412742, 1042572184, -1098383492) + + W(7, 1054443836, -1098098835, -1121091158, 1028111985) + W(8, 1028911958, -1133618203, 1022077590, 1048790378) + + W(9, -1103923621, 1029953956, -1106218101, 1036034024) + + W(10, -1123843510, 1030755234, -1135426163, -1113939962) + + W(11, 1038571089, -1109596012, 1035405992, -1126376787); + WS(-1100484014, -1082813103); + sum1 = W(0, -1123138350, -1124738939, 991988525, -1115403050) + + W(1, 1032429349, -1113715255, 1024280877, 1024090791) + W(2, 1037916588, -1123209507, 1049386547, 1038702225) + + W(3, 1052139678, -1114440425, 1033284897, -1105213156) + + W(4, -1092840871, -1095165544, -1086369653, -1082690349) + + W(5, -1080932794, -1104541586, -1091044852, -1098590565) + + W(6, 1048819422, 1053647432, 1054472273, 1063735148) + W(7, 1057734851, 1057909982, 1048946363, 1057027323) + + W(8, 1042234192, -1117914254, 1039202605, -1121098737) + + W(9, -1124146173, -1120991811, -1116174764, -1140316975) + + W(10, -1212876940, 1018206961, -1120915541, 1024686130) + + W(11, 1015928020, -1138122766, 1018911880, 1023234987); + sum2 = + W(0, 1017320792, -1115193567, 1033153774, -1109723345) + W(1, 1038092476, -1111785287, -1119671739, -1109901657) + + W(2, -1151813795, -1117132961, 1023133248, -1096979800) + + W(3, 1051436512, -1136600857, 1043953938, 1038440342) + + W(4, -1114719136, -1104426322, -1094043595, -1067432541) + + W(5, -1094705611, 1084038773, 1051561136, -1097692319) + + W(6, -1127138406, 1035777956, 1044557822, -1081748159) + W(7, -1094662263, 1059186895, 1023890070, 1021470996) + + W(8, 1024237362, -1122656452, 1043828952, -1105451925) + + W(9, -1118217110, -1118750811, 1026839182, -1114305884) + + W(10, 1038691322, -1114810881, -1134215421, 1025666138) + + W(11, 1023161212, -1141123106, 1009024009, -1122422926); + WS(-1090600151, 1059961608); + sum1 = W(0, 1026132748, -1119499740, 1009974340, -1113885561) + + W(1, 1031101992, -1111559238, 1038517558, -1109274083) + W(2, 1003856660, 1023957057, 1043713207, 977342510) + + W(3, 1051525550, 1025363409, 1031878320, -1114083846) + + W(4, -1086859502, -1105095800, -1089372619, -1080432315) + + W(5, -1084623068, -1086083942, -1090836983, -1097114792) + + W(6, 1051724689, 1035359281, 1059175635, 1059908410) + W(7, 1060487798, 1059822844, 1051599439, 1057278953) + + W(8, 1051693682, 1030063117, 1040699442, -1106762114) + + W(9, -1114853678, -1116041416, -1130086580, 1040974422) + + W(10, 1032934656, -1123043606, 1034542394, 1018557983) + + W(11, 1019132432, 1031213982, -1150140386, -1138998830); + sum2 = W(0, 992923986, -1162949315, 1026898877, -1114052913) + + W(1, -1129262506, 1033577774, -1129225578, 1036380420) + + W(2, -1099561897, -1123994385, -1116860796, 1033030662) + + W(3, 1024241717, 1027801571, 1041458479, 1050404867) + W(4, -1105536701, -1114799541, 1049791100, 1035377948) + + W(5, -1088177074, 1049717575, 1043978030, -1091169113) + W(6, 1079165891, 1071631223, 1068780303, 1075632919) + + W(7, -1093777203, -1083766160, 1029517847, -1083354335) + + W(8, -1067147152, -1073660425, -1081838471, -1083671230) + + W(9, 1057542893, 1057183721, -1103299573, 1061807320) + + W(10, -1122782084, -1122543983, -1095462698, -1097530520) + + W(11, -1107750254, 1032284006, 1025982179, 1016984746); + WS(-1077101366, 1062204989); + sum1 = W(0, 1030791972, 1033145401, 1041963978, 974201395) + W(1, -1122207423, 987140211, 1038549201, -1110991006) + + W(2, 1034919675, -1109323772, -1109601924, -1098302775) + + W(3, 1049858413, -1163737350, -1104961646, 1022472166) + + W(4, -1119759728, 1033209467, 1053457281, 1054965265) + + W(5, -1085314770, -1111164798, -1103072597, -1115893275) + + W(6, -1099266697, 1032826652, -1084468746, -1116769210) + + W(7, 1055247733, 1051122507, 1043488164, 1036206931) + W(8, 1044609922, -1108657922, 1049423472, 1034832195) + + W(9, -1104833109, -1113738245, -1121147841, -1131086146) + + W(10, -1131275309, -1142019948, 1005679697, -1125973710) + + W(11, 1038354000, 1030630601, -1132893044, 1033991117); + sum2 = + W(0, -1128313262, -1111995834, -1111474816, -1108547758) + + W(1, -1123519406, -1107039325, -1122872424, 1034342977) + W(2, 1013333880, 1035550861, 1047569009, 1036006103) + + W(3, -1104835579, -1119199538, -1113807123, -1111926114) + + W(4, 1041733323, 1024909109, 1049082846, -1093864056) + W(5, 1049905793, 1005924363, -1107099901, -1117079308) + + W(6, 1023499747, -1107108555, 1042800306, -1137016284) + W(7, 1032249988, -1146750815, 1024429363, 1029021112) + + W(8, -1111312052, 1015145454, 1017141780, 1013541080) + W(9, 1034012446, -1119513572, 1036805530, 1021769214) + + W(10, 1026149469, -1188980193, -1123639612, -1132476264) + + W(11, 987971164, -1118671040, -1119021692, 1010774610); + WS(1062330988, -1108933484); + sum1 = W(0, -1130422235, 1014897395, 1027643186, -1143835797) + + W(1, 1008148004, -1128962907, 1015454424, -1140840472) + + W(2, -1127744279, 1022571909, -1103670303, -1110385841) + + W(3, 1042533684, -1104774725, -1116892960, -1113171223) + + W(4, -1103583502, -1095189640, -1097916197, -1083565633) + + W(5, -1085814590, -1117452294, -1099231943, -1103031657) + + W(6, -1112967222, 1057599524, 1050533350, 1066390108) + W(7, 1059916677, 1050496965, 1049304867, 1049771574) + + W(8, 1030687296, 1033146447, 1045948242, -1104991248) + + W(9, -1098839314, -1114792086, -1127702342, 1032045679) + + W(10, 1027221408, -1115561596, 1024802772, 1027456689) + + W(11, -1125614226, 1027191883, -1115468561, 1026854932); + sum2 = + W(0, 1032895988, -1117467777, 1028545567, -1111764526) + W(1, 1041090350, -1115412116, -1125773241, 1023304410) + + W(2, 1032830726, -1111744013, 1005059811, 984631841) + W(3, 1049595248, -1107906030, -1128022386, 1041765137) + + W(4, -1115406541, 1060942494, 1047355779, -1080183554) + W(5, -1134532596, 1035977506, 1020817981, 998580490) + + W(6, -1088204952, 1054289941, 1071613570, -1077634808) + W(7, -1093147012, 1042083474, 1037668146, 1032116542) + + W(8, -1092446016, -1123665301, 1060958044, 1046926373) + + W(9, -1113920155, -1097187857, -1142970826, -1125184917) + + W(10, 1031989945, -1104702594, 1046230999, -1100189171) + + W(11, 1034621337, -1132922134, -1117116166, 1022449461); + WS(-1109331804, 1042368707); + sum1 = + W(0, -1127022377, 1024616740, -1123466393, -1123208698) + W(1, -1123129456, 1019270569, 981412042, -1140654132) + + W(2, 1017844485, -1131144083, -1107976561, -1113085828) + + W(3, -1114039899, -1111910285, -1118232839, 1022571624) + W(4, 1059385318, 1050963589, 1060165431, 1059271336) + + W(5, 1060839292, 1056907678, 1049849056, 1053487153) + + W(6, -1100795701, -1097056965, -1085607547, -1083778221) + + W(7, -1087428756, -1094957746, -1097540376, -1103251894) + + W(8, -1111987554, 1029893237, 1029216560, 1039937652) + W(9, 1032072458, -1114977786, 1024925794, -1099334072) + + W(10, 1016900750, 1023074109, -1112887422, 998408382) + + W(11, -1132276000, 1030127674, 1027413302, 1016237783); + sum2 = W(0, 1032849952, -1124884727, -1113876395, -1129227720) + + W(1, -1120368220, 1017198470, -1139825528, -1146042855) + + W(2, 1026055449, -1115329827, 1027815845, 1037903820) + W(3, 1033732250, -1135699996, 999495607, -1129930944) + + W(4, -1062565523, -1069645041, 1071954840, 1082661778) + W(5, 1074466947, 1029601901, 1010255452, 1032971494) + + W(6, -1095655110, 1045607629, 1038549538, -1099195860) + + W(7, 1038448988, -1098438651, -1108984513, 1036170224) + + W(8, -1117249787, -1117682124, 1041120073, 1049745271) + + W(9, -1117133911, 1028175479, 1021673198, -1119888169) + + W(10, 1033345650, -1126204236, -1111681252, 1027383795) + + W(11, 1003632367, -1122462785, 1028818665, 1008778484); + WS(-1115348316, 1054745777); + sum1 = W(0, -1137232980, 1010567825, 1027515738, -1129778165) + + W(1, 1031078129, -1163675813, 1021335208, 1015357019) + + W(2, 1036166838, 1015838740, -1156212270, -1111605669) + + W(3, -1105267814, -1129353779, -1109102196, 1044530957) + + W(4, 1049991788, 1049110157, 1058008076, 1064241460) + W(5, 1060214194, 1054590870, 1041116785, 1051902330) + + W(6, -1091675395, -1096298109, -1087264777, -1084812083) + + W(7, -1090012802, -1089909917, -1099321920, -1087098097) + + W(8, 1017816583, 1034100235, 1034455816, 1015442096) + W(9, 1038960299, 1024747424, -1128935590, 1001621141) + + W(10, 1029575560, 1027659359, 1022248439, 1022178737) + + W(11, 1027280241, 1032358751, -1140454872, 1023518060); + sum2 = + W(0, -1165172934, 1024206556, -1116697030, 1027518769) + W(1, -1114129087, 1038375004, -1112102389, 1033334280) + + W(2, 1029679520, 1020162972, 1034059275, 1040171419) + W(3, 1027996817, -1110434123, 1034335827, -1129105068) + + W(4, -1126280148, -1122180110, 997668227, -1097026624) + + W(5, 1034726751, 1040744343, -1102815825, -1107441727) + W(6, -1133513625, 1032751166, 1058808820, 1078058683) + + W(7, 1074823993, -1082346775, -1066028315, -1081349721) + + W(8, 1007295249, -1123729058, -1106696521, 1051253388) + + W(9, -1129919188, -1130957332, -1106067749, 1046963831) + + W(10, -1112190701, 1031911051, -1130859572, -1110596693) + + W(11, 1030310076, 1020514684, -1120038362, -1140190249); + WS(-1106429870, -1094013011); + sum1 = W(0, -1180060253, 1010730896, 1021824818, -1126648308) + + W(1, -1129917792, -1130361130, -1120274467, -1140710069) + + W(2, 1032914805, -1122572746, 1007289698, 1032028260) + W(3, 1028442598, 1044381070, 997655159, 1048974915) + + W(4, 1051659804, 1041447962, 1049003466, 1055000463) + + W(5, 1028832887, -1108621431, -1127588999, -1092428073) + + W(6, -1083977393, -1096070845, -1090427610, -1110426966) + + W(7, -1154202266, 1047276227, -1106909952, 1037115308) + W(8, 1044666432, 1042077981, 1040702780, 1039434940) + + W(9, -1143541158, -1126798128, 1034262401, -1116914978) + + W(10, -1143253052, -1112889935, 1019043703, -1121648024) + + W(11, -1121855216, 1025638626, -1112957869, 1027653084); + sum2 = W(0, -1113021585, 1027309129, 1040557912, -1105312463) + + W(1, 1045635393, -1102513438, 1027399099, -1114789238) + + W(2, 1044056205, 1042644451, -1130917417, -1097839840) + + W(3, -1093830389, -1101732547, -1116545436, 1035602021) + + W(4, -1096040530, -1098848264, -1086782093, 1066533545) + + W(5, 1047866369, 1049511378, 1054444603, 1045074348) + W(6, -1099894114, -1100852865, 1055980834, 1060955207) + + W(7, 1049480207, -1136131482, -1099150452, -1089010378) + + W(8, -1101136816, 1051352325, -1094431966, -1105883820) + + W(9, -1100511063, -1095199310, 1057783348, -1105061215) + + W(10, -1129192316, -1127314210, 1001033976, 1028457481) + + W(11, -1109915970, 1041484506, -1111537389, 1038428609); + WS(1043891118, 1068855714); + sum1 = W(0, 1034864935, -1115119790, 1021640868, -1116826163) + + W(1, -1131811026, 1023733166, -1143217636, -1132040205) + + W(2, -1116934668, -1132968331, 1038582262, 1007221512) + + W(3, 1042862352, 1036137865, -1140157826, -1132089051) + + W(4, -1092449819, -1094480060, -1087769325, -1085866861) + + W(5, -1087019235, -1090795981, -1096174840, -1088777568) + + W(6, 1054927906, 1036212657, 1059302628, 1065404307) + W(7, 1060871017, 1058788692, 1054585007, 1058320673) + + W(8, 1034556804, -1105343195, 1039390277, -1118670257) + + W(9, -1097654326, -1110180471, -1103987387, 1039274926) + + W(10, -1137767170, -1110944332, 1007166673, 1000130184) + + W(11, -1149095244, 1038210141, -1123846591, 1033742658); + sum2 = W(0, -1109312558, 1016877071, -1119267190, 1012463989) + + W(1, -1126846961, -1130793473, 1028514185, 987754923) + W(2, 1043524956, 1022447527, 1041115214, -1113751024) + + W(3, 1044302697, 1017381631, -1118188819, 1030898373) + + W(4, -1128190925, -1112207078, -1098728904, -1096567460) + + W(5, -1103900959, 1042180588, -1098746809, 1045698067) + + W(6, -1098627702, -1106898612, -1125515089, 1069702771) + + W(7, 1067943590, 1068665203, 1066385707, 1053793920) + W(8, 1046310591, 1038513036, 1034661588, -1085518745) + + W(9, -1080776989, -1076263106, -1082060374, -1093259035) + + W(10, -1109532722, 1007593597, -1120394532, -1104054586) + + W(11, -1120974914, 1042247803, 1009589981, -1114854661); + WS(-1087649964, 1019782291); + sum1 = + W(0, -1120416323, -1143853715, 997344379, -1117444008) + W(1, 1021907901, -1121005123, 1024251314, -1131127160) + + W(2, 1030168161, -1112895868, 1041161399, -1109107148) + W(3, 1047711365, -1121138472, 1000308143, 1016698342) + + W(4, 1040415828, 1040272853, -1108639376, 1066099001) + W(5, 1016783474, 1034911086, 1025062450, 1039033917) + + W(6, -1105270513, 1031966316, -1088122706, -1107599501) + + W(7, -1089416651, -1111272734, -1107120316, -1102333873) + + W(8, 1024428446, -1115504066, 1050439898, -1101667648) + W(9, 1046888529, 1033038819, -1142414472, 1024070769) + + W(10, -1138699736, -1138905653, 986611002, -1107645603) + + W(11, 998160367, -1138080961, 1016114481, -1128756052); + sum2 = + W(0, -1107127477, 1033768126, 1023416069, -1125558334) + W(1, 1043030557, -1118774605, -1151915405, 1000627574) + + W(2, 1040784536, -1100939724, -1105075327, 1031863699) + + W(3, -1103416126, -1104742149, 1042707477, -1121719211) + + W(4, -1117663793, 1017594321, -1107862291, -1098589794) + + W(5, 1051532765, 1047503953, -1106456825, 1014542959) + W(6, -1112675945, 1057262150, -1072566694, 1078638992) + + W(7, -1086195181, -1109470897, 1041105499, -1106760943) + + W(8, 1039537683, -1092150404, 1066290117, -1077578123) + W(9, 1053259006, 1050584179, -1103867215, 1033290657) + + W(10, 1025325994, 1019337948, -1099590626, 1051733383) + + W(11, -1119314417, -1100419866, 1038050610, 1002291590); + WS(1061891500, 1029739432); + sum1 = W(0, -1120639769, -1135804195, -1111424495, 1029738303) + + W(1, -1117498485, -1118615961, -1125527056, -1118385394) + + W(2, -1106216192, 1006283635, -1105829118, -1090566706) + + W(3, -1116262763, 1034396863, 1019261400, -1120846617) + + W(4, -1113834067, -1103570763, -1096510251, 1051808376) + + W(5, -1092264601, -1100309453, -1103214946, -1103884676) + + W(6, 1044238166, 991333090, 1059958553, 1070740438) + W(7, 1055146526, 1034609097, 1035571491, 1043753671) + + W(8, -1114217922, -1139025510, -1105864021, -1094764771) + + W(9, -1102695282, -1115698885, -1137185181, -1118241703) + + W(10, -1131739160, -1118882723, -1144807936, 1034612526) + + W(11, -1134494805, 1017541191, -1114134173, 1003240643); + sum2 = + W(0, -1121600381, 1006302792, -1119262688, 1002488024) + W(1, 1029037035, 1026568605, -1136033516, 1023468893) + + W(2, 1026550907, -1138054916, 1026195583, -1109251816) + W(3, 989985520, -1119850715, 1023268674, -1113487368) + + W(4, 1038640235, -1115693397, 1049140618, -1070322023) + W(5, 1050244555, -1111337441, 1034795019, 1032357843) + + W(6, -1118247849, 1032667651, -1131891756, 1075955836) + + W(7, -1093462820, 1039467523, -1123631905, -1111383976) + + W(8, -1129506938, -1150824496, 1032667233, 1037796079) + + W(9, 1041966968, -1119306593, 1011205316, -1140765060) + + W(10, -1121040584, 1022873234, -1116879102, -1123674716) + + W(11, 1023067742, -1115823182, 1030765775, -1117038411); + WS(-1084563692, -1094583093); + sum1 = W(0, 1022703229, -1162852794, -1132568844, 1021772616) + W(1, 1024085850, 996545514, 1026223997, -1146052282) + + W(2, 1012279186, -1124105959, 1040618577, -1104495965) + + W(3, -1108936808, -1134036365, -1117471057, -1121618798) + + W(4, 1016172182, 1043937588, -1096721723, 975997096) + W(5, -1105257082, -1119752408, 1035927703, 1030206390) + + W(6, -1118740542, 1029178707, 1047241030, 1058138797) + + W(7, -1098942649, 1033384955, -1119041259, -1135741956) + + W(8, 1030161881, -1113029519, -1110742685, -1095587800) + + W(9, 1044167852, 1030217107, 1033396614, -1114495566) + + W(10, -1118949361, 1029291849, 1002005802, 1026228226) + + W(11, -1121437018, -1128298663, -1127843472, 1024441958); + sum2 = + W(0, 1025430445, 1022296570, 1036981025, -1113204042) + W(1, 1015494893, -1113488118, -1127432409, -1122316989) + + W(2, -1154040649, 1041313314, -1095051054, 1045521896) + W(3, -1140159290, 1036274902, 1033705522, 1014652738) + + W(4, -1116838887, -1089338077, -1080092115, 1068042633) + + W(5, -1123762249, 1036460009, -1115504399, 1034428960) + + W(6, 1050769250, -1110563158, 1047738904, -1105725339) + + W(7, 1046459196, -1099991734, 1035671600, -1109634740) + + W(8, -1101164186, 1049390351, -1105225557, 1058198082) + + W(9, -1097909404, 1042800199, -1112380024, 1019626217) + + W(10, 1025324860, -1110459458, -1123670947, -1102419914) + + W(11, 1036400862, -1121611661, 1029427990, -1122111247); + WS(1070570422, -1118483066); + sum1 = + W(0, 1018165832, -1132852684, -1139725729, -1109730565) + W(1, 1028156415, -1118139972, 1024357779, -1122455036) + + W(2, -1110069429, 1031499417, 1040072889, 1042906054) + W(3, 1042919301, -1168228894, 1039947243, -1103975241) + + W(4, -1099959964, -1099020957, -1084885570, -1081528768) + + W(5, -1083523477, -1091258848, -1095225642, -1094692227) + + W(6, 1060331110, 1050128623, 1059366093, 1062372980) + W(7, 1058454089, 1058887102, 1048717802, 1057283318) + + W(8, -1162935118, 1032275410, 1019497086, 996066926) + W(9, -1118811556, -1112630512, -1117991970, 998336395) + + W(10, -1139421786, 1009522871, -1140197674, -1117746023) + + W(11, -1128372287, 1014662756, -1119179525, 1028666043); + sum2 = + W(0, 1041833519, -1108615966, -1125372171, -1161988394) + W(1, -1113215633, 1034531971, -1118383435, 1028261725) + + W(2, -1105638218, 1028972955, 1029956673, 1048735800) + W(3, 1034458457, 1025753183, 1036634769, 1000882603) + + W(4, -1090712556, 1051586707, 1040982790, -1113315600) + + W(5, 1025381303, -1116357686, -1096961046, 1047278155) + + W(6, -1068172350, -1062545743, 1058445980, 1081890452) + W(7, 1078990875, 1057946443, -1129266047, 1020805991) + + W(8, 1053466473, -1089644697, 1044082250, 1021891615) + W(9, 1047100697, -1138523317, -1118643722, 1003167691) + + W(10, 1031038629, -1109024185, -1115541422, -1110157560) + + W(11, 1034093689, -1121128958, -1164400298, 1023430939); + WS(-1086946092, 1067745931); + sum1 = W(0, -1129718754, 1029399808, 1034551451, 1031195606) + W(1, 1040134022, 1033121193, 1023151712, 1042343314) + + W(2, 1043160110, -1107060738, 1035261538, -1095383669) + + W(3, -1133919947, -1122710934, -1107305618, 1042006276) + + W(4, 1057051220, 1052187432, 1056679450, 1065908691) + W(5, 1053194351, 1055695833, 1050664148, 1051480109) + + W(6, -1086793423, -1093519641, -1086890615, -1082115172) + + W(7, -1091092163, -1087806651, -1106762967, -1086763775) + + W(8, 1029510552, 1041235060, 1027399596, 1028581668) + W(9, 1039541441, 1022886169, 1035840335, -1123269599) + + W(10, 1019782728, 1028876859, -1121687640, -1151056709) + + W(11, 985161115, 1032179179, -1137650955, 1023858088); + sum2 = W(0, -1092038506, -1100127293, -1076970891, -1074419095) + + W(1, -1072137471, -1071882845, -1075499422, -1077760287) + + W(2, 1049598436, 1048639033, 1064718435, 1072396257) + W(3, 1071503744, 1074270589, 1071966648, 1067893220) + + W(4, 1043882093, 1037661877, 1059313512, 1057486689) + W(5, 1051417643, 1058040444, 1034550731, -1109701446) + + W(6, 1042320103, -1103044471, 1034035103, -1104719409) + + W(7, -1111075093, 1044428409, -1105998171, 1040427981) + + W(8, -1131832297, 1002123811, -1103733932, 1041582287) + + W(9, -1115120295, -1139439905, 1008560441, -1112073274) + + W(10, -1121740952, 1029198842, -1115216799, 1010136401) + + W(11, -1128000009, 1024386386, -1120184167, 1030441594); + WS(-1081254198, -1103496580); + sum1 = W(0, -1115414444, 1041339642, -1098407452, -1130637434) + + W(1, 1020760807, -1101995566, 1047565230, -1100079212) + + W(2, -1146587219, -1098606506, 1050459360, 1042214179) + + W(3, -1117423418, 1034598934, -1122745995, 1024450296) + + W(4, -1093427101, -1097683981, -1091994231, -1078168847) + + W(5, -1095228700, -1092752622, -1097351785, -1094816186) + + W(6, 1048836291, 1057498447, 1058816906, 1061921355) + W(7, 1063391143, 1049596874, 1055225025, 1057483350) + + W(8, 1038644661, -1144521152, -1117884339, 1040740783) + + W(9, -1099277734, -1125383814, -1125503451, -1112883619) + + W(10, 1040942563, -1102869832, 1036627839, -1165859686) + + W(11, -1107645994, 1047950513, -1104517417, 1038686359); + sum2 = W(0, -1101463653, 1049429767, -1128244554, -1113723320) + + W(1, -1100253783, -1100739127, 1041910404, -1105778187) + + W(2, 1057761413, 1067192921, 1075421514, 1075836777) + + W(3, -1088665878, 1016894214, -1116933337, -1111337535) + + W(4, -1092703936, -1078098525, -1071536845, -1077415283) + + W(5, 1059548308, 1051692516, -1101242161, 1048285068) + + W(6, -1109493923, -1139460835, 1039603395, -1087182285) + + W(7, 1034610843, -1117875445, 1013931579, 1038460759) + W(8, 1041760680, 1028945771, -1114414573, 1045554670) + + W(9, -1106673179, 1032645955, -1122014712, -1113595007) + + W(10, 1014720459, -1117756514, 1016362862, -1118637934) + + W(11, -1125369728, 1027339291, 1013909643, -1124961818); + WS(-1089093868, 1047195432); + sum1 = W(0, -1139038025, -1146634508, -1107247574, -1111039036) + + W(1, -1111569431, -1131070116, -1123633457, -1125658710) + + W(2, -1108646892, -1102493258, -1102675263, 1027299238) + + W(3, 1043334313, 1035038251, -1130835530, 1006218639) + W(4, 1042227448, 1050027260, 1055627170, -1094097439) + + W(5, -1086063146, -1091453053, -1104540112, -1100496626) + + W(6, -1116853265, -1113152371, -1102292967, 1059178599) + + W(7, 1064850945, 1055758905, 1047069456, 1028822677) + W(8, 1032512066, 1037772585, 1040310103, -1120135685) + + W(9, -1097054397, -1107670363, -1109953256, 1033685440) + + W(10, 1015754506, -1111624526, -1122052409, -1123630421) + + W(11, 1035574535, -1126001639, 1015067608, 906381938); + sum2 = W(0, -1103632983, 1043805370, -1100282419, 1043204826) + + W(1, -1107284068, 1032596944, -1115324019, 1038389706) + + W(2, 1051248459, -1102825598, -1110573499, 1027245233) + + W(3, -1115422824, -1088944566, -1119815045, -1103855212) + + W(4, 1050224986, -1106748849, -1142028319, 1068200484) + + W(5, -1110893073, -1082511144, -1101546175, -1099926119) + + W(6, -1112111441, -1107415409, 1047715488, 1064575785) + + W(7, -1116519441, -1166081973, 1033478296, -1112155627) + + W(8, -1114055425, -1113209339, -1106125891, 1034756297) + + W(9, -1121811913, 1036646608, -1139772871, 1042197713) + + W(10, -1111599379, 1037386114, -1107474855, 1042201726) + + W(11, -1115818225, 1036836551, -1107371535, 1026011708); + WS(-1121781432, -1075996235); + sum1 = W(0, -1139185884, 1017932812, 1031934942, 986151814) + W(1, -1164665422, 1000199096, 1007298263, -1140438078) + + W(2, -1104344139, -1106763371, -1097501556, -1122902443) + + W(3, -1153685131, -1127990679, 1001735501, -1113474827) + + W(4, 1055178280, 1050633065, 1061458986, 1056941803) + W(5, 1057254916, 1045616484, 1030315686, 1046908556) + + W(6, 1023675446, -1102467779, -1098857564, -1087567683) + + W(7, -1091469811, -1104204108, -1129857478, -1103515070) + + W(8, -1115352921, -1113788396, 987917899, 1038547421) + + W(9, -1108760371, 1011665321, -1121847517, -1118109260) + + W(10, -1116105386, -1134059780, -1113912527, -1136836585) + + W(11, 1007170123, -1128021832, -1154396360, -1121041198); + sum2 = W(0, -1168649543, -1129109975, 1016893178, -1132018102) + + W(1, -1117453381, 1023462612, -1106957470, -1115823387) + + W(2, -1097479461, -1104465921, -1103938775, -1118749694) + + W(3, -1115486085, 976518647, 1025546665, 1034975399) + W(4, 1049098480, 1049155382, 1057022497, -1098731951) + + W(5, -1119719509, -1106384437, -1098317799, -1105263623) + + W(6, 1042706476, -1102354715, 1043506300, -1097087336) + W(7, 1057524747, 1036026738, 1045328977, 1035074429) + + W(8, 1041988609, -1112820525, 1047765539, 1046909100) + + W(9, -1100642605, -1121071990, -1106523641, 1003192501) + + W(10, -1106980555, -1122042062, -1105649317, 1031531696) + + W(11, -1114531186, 1040948740, -1113359258, 1033488186); + WS(1058868844, 1053114578); + sum1 = W(0, 1019520880, -1122446279, 1037061470, 1025396381) + W(1, 1029402067, 1031841884, -1120186199, 1033549944) + + W(2, 1047026923, -1120049453, 994701546, -1104790763) + + W(3, -1104820950, 1021084025, -1105237507, 1009185118) + W(4, 1040716289, 1048040834, 1059285152, 1063349990) + + W(5, 1062179110, 1056787146, 1046529242, 1057448647) + + W(6, -1095841007, -1093651986, -1089525095, -1081113852) + + W(7, -1086213616, -1087503095, -1091401861, -1088312360) + + W(8, 1020793813, 1039005404, 1046415125, 1040363303) + W(9, 1045703966, 1041196477, 999605293, 1036113571) + + W(10, 1026453035, -1117418946, 1019785277, -1114838686) + + W(11, 1030676884, 1024749253, 1004229292, 1022861598); + sum2 = + W(0, 1050427341, -1106859908, 1035861035, -1112488680) + W(1, 1032409135, -1122621450, -1104767713, 1046587865) + + W(2, -1089070569, -1111523090, 1043538575, -1097196972) + + W(3, 1036654054, -1133854060, -1130574088, -1093068011) + + W(4, 1053219089, 1052246278, 1059060280, -1088179143) + W(5, 1017346040, 1043005089, -1092034159, 1052143300) + + W(6, 1067490860, -1093125710, -1078649576, -1087268097) + + W(7, 1059384755, 1062938540, -1093321870, 1059539225) + + W(8, -1080930470, -1078811541, -1074777053, -1075643449) + + W(9, -1124572738, 1049929252, -1112625720, 1059287102) + W(10, 1058659628, 1066232343, 1073277874, 1065823003) + + W(11, 1062269032, 1043425453, -1092663388, -1098215774); + WS(-1074642550, 1059001037); + sum1 = + W(0, 1033877849, -1114814273, 1017602926, -1122785432) + W(1, -1119942606, -1121541554, 1004629641, -1103073299) + + W(2, -1105864948, -1133959838, -1106724665, 1042832010) + W(3, 1046400581, 1036804396, 1032924291, 1039804561) + + W(4, 1040506928, -1108476642, -1098100854, -1084427489) + + W(5, -1083890617, -1088670813, -1090515058, -1095458045) + + W(6, -1096349630, 1023714759, 1055335876, 1065396984) + W(7, 1062559324, 1061684023, 1051195620, 1054342111) + + W(8, 1046552293, -1108357401, 1032280783, -1132282059) + + W(9, -1113686945, -1104721044, -1110306392, 1031943308) + + W(10, -1110243859, -1127057913, -1147216585, 1033899323) + + W(11, 1032718711, 1025273824, -1122497752, -1121848889); + sum2 = + W(0, 1024273115, -1131612410, -1107407950, 1041256788) + W(1, -1117121277, 1031505312, -1110072407, 1016793154) + + W(2, 1021686194, 1025348247, -1105777061, 1042238427) + + W(3, -1126779625, -1118046720, -1118330488, 1027338756) + + W(4, 1043888107, -1101286924, 1048538591, -1090332406) + + W(5, 1053208564, 1043322093, -1135252559, -1116082242) + + W(6, -1108335658, -1122239076, 1046097166, 1036931728) + W(7, 1040874893, 1040745205, 1050743072, -1096125238) + + W(8, -1077891718, -1091711743, -1097181377, 1065426853) + + W(9, 1063431633, 1040529954, -1097546746, 1050690616) + + W(10, -1110849641, -1111108183, 1033944270, -1111979954) + + W(11, 1039617071, -1105370761, -1122096807, -1122718137); + WS(-1088674604, -1128666815); + sum1 = W(0, -1123914807, 1016031116, 1014718188, 1024027612) + + W(1, -1145356432, 1024193015, -1124389866, 1032770888) + + W(2, -1112902532, -1154837176, -1114821559, -1098901213) + + W(3, -1103324234, -1099251759, -1119376425, -1101826270) + + W(4, 1031043843, 1033184452, 1015481863, 1054517603) + W(5, 1051928805, 1059978813, 1047372601, 1062901239) + + W(6, 1051063687, -1131397783, 1027540319, -1102428675) + + W(7, -1094753613, -1100081596, -1110958225, -1101878062) + + W(8, -1094561666, 1025159063, -1102063788, -1104802482) + + W(9, 1021380397, -1111732847, -1154253637, -1113334654) + + W(10, 1018478931, -1134237698, 1024172061, -1126359028) + + W(11, -1133861065, -1114031158, 1028470484, -1114369951); + sum2 = + W(0, -1125911137, 1012595671, 999266341, 1044288444) + W(1, -1101165077, 1041147790, -1113177198, 1025832748) + + W(2, -1108931889, -1114639139, 1007405169, -1110292957) + W(3, 1050109344, 1041510968, 1029583838, 1046353900) + + W(4, 1017628143, 1032292317, -1114156858, -1067836662) + W(5, -1072172899, 1037860217, 1074693740, 1081403750) + + W(6, 1050970701, -1128469007, -1110785518, -1074929312) + + W(7, -1105301985, 1044362436, 1048694785, 1057358913) + W(8, 1035307535, -1112594616, 1028698628, -1112320536) + + W(9, 1032862316, -1133865234, -1121233078, 1015908463) + + W(10, -1132880011, 1026392505, -1126668834, 1035975658) + + W(11, -1115294669, -1122721591, 1027549527, -1114367420); + WS(-1092184279, -1114774309); + sum1 = W(0, -1112548452, -1120800919, -1114639366, 1008341430) + + W(1, 1009522319, -1120506536, 1034801619, 1022706306) + W(2, 1035183681, 1038305887, 1043708422, 1037535198) + + W(3, -1098020135, 1029108045, -1128765878, -1106461058) + + W(4, -1114927434, -1101125062, -1087724142, -1103312811) + + W(5, 1062809460, -1118967585, 1002077113, 1043448960) + W(6, 1044352162, 1039166894, 1063427286, 1034792975) + + W(7, -1088121787, -1115071968, -1114196826, -1129024277) + + W(8, 979364527, -1131609361, -1093459700, 1048665209) + W(9, 1043037714, 1023205859, 1033949151, -1126884683) + + W(10, -1115173597, -1182492945, 1021495134, -1113251558) + + W(11, -1113757330, -1113409875, -1123554950, -1115469664); + sum2 = W(0, -1130731300, -1146462793, -1114036842, 1017173194) + + W(1, 1027612469, -1121343755, 1027895615, -1154268081) + + W(2, -1113218164, 1028824397, 1013989596, 1034818358) + + W(3, -1104368005, 1040334859, -1123082454, -1123775916) + + W(4, 1024876637, -1129593633, 1044891209, -1101530532) + + W(5, 1049606516, -1097048862, 1042764886, -1113704648) + + W(6, -1112264614, -1113338336, 1058622224, -1105838439) + + W(7, 1050794194, -1102338893, 1040822444, -1122592353) + + W(8, -1099018413, -1111675537, -1088855347, 1059579881) + + W(9, 1042736448, 1043492825, -1121501222, 1034905054) + + W(10, 1026854381, -1102804304, -1102462733, -1108694945) + + W(11, -1114432119, 1019755962, -1122392540, -1119817837); + WS(1061153836, -1122742154); + sum1 = W(0, 1027853554, -1115291137, -1171221910, -1112977164) + + W(1, -1121554637, 1022714404, -1157881362, -1123187178) + + W(2, -1109089440, 1043116226, -1138258466, 1050791197) + + W(3, 1034471002, 1041098678, -1134239506, 1034237593) + + W(4, -1102650256, -1115210274, -1112757431, -1077890273) + + W(5, -1082648980, -1093251577, -1097764996, -1092621429) + + W(6, 1051082942, 1047414994, 1055597733, 1055540065) + W(7, 1061271560, 1053866013, 1044732088, 1032996986) + + W(8, 1027843188, -1112215273, 1034632359, 1042514461) + W(9, -1131618020, 1046515591, 1036747322, 1043087924) + + W(10, -1131011401, -1119605372, -1126942769, -1126698730) + + W(11, 998349933, 1025132479, -1120958927, 984532249); + sum2 = + W(0, 1031101174, -1132731961, 1022986308, 1017601076) + W(1, -1126674740, 1022398052, 1013813081, 1010235161) + + W(2, -1102026314, 1038074007, -1092900468, 1030590838) + + W(3, 1001758673, 1001713969, -1140283033, -1138091097) + W(4, 1042272104, -1090146581, 1028046198, 1054179894) + + W(5, -1096188072, 1035923987, -1106500726, 1036670019) + W(6, -1126868316, 1042331882, 1039425711, 1068886648) + + W(7, 1049555295, -1096410864, 1036573795, -1105222854) + W(8, 1028240358, 1036936531, -1122656394, 1039233571) + + W(9, -1086618714, -1090080092, -1110072380, -1134061337) + + W(10, 1023923290, -1128194324, -1114483483, -1108833787) + + W(11, 1026905222, 1029729918, 1035325123, 1035525391); + WS(-1098883799, 1037141561); + sum1 = W(0, -1114839991, 1029657918, -1116594483, 1034543274) + + W(1, 1026816074, -1140770618, 1035386535, 1007197468) + W(2, 1039141586, 1034970850, 1048999812, 1026227619) + + W(3, -1103366664, -1115755155, -1109617409, -1117505278) + + W(4, -1110021469, -1090767256, -1089859629, 1007604554) + + W(5, 1063309440, 1050338841, 1049028980, 1050252341) + W(6, 1020247259, 1029681923, 1058872566, -1100094273) + + W(7, -1090400131, -1090199858, -1104209655, -1101914812) + + W(8, 1030453625, -1119339247, -1104869706, -1106749124) + + W(9, 1040832320, 1023759014, 1040157646, 1034144506) + + W(10, 1040211480, -1126798193, 1041448405, -1123771570) + + W(11, 1006844457, -1131945671, -1115455274, -1106217213); + sum2 = W(0, 1003211606, -1137674963, -1151639180, -1126381316) + + W(1, -1125497686, -1120928387, 1024650751, -1121709003) + + W(2, -1120528483, -1120827669, -1113012012, -1125167698) + + W(3, -1133545211, 1042115577, -1111755343, 1034768106) + + W(4, -1156174412, -1165578649, 1045965141, -1143882022) + + W(5, -1157195276, -1139515595, 1024020315, -1128894336) + + W(6, -1122088448, 1021421814, -1108742643, 1025314463) + W(7, 1048741047, 1042307479, 1028526161, 1033940928) + + W(8, 1034591096, 1046263423, 1043768035, 1048434617) + + W(9, -1100558843, -1111490489, -1106766859, -1105393784) + + W(10, 1008615371, -1109822394, -1107512724, -1102660599) + + W(11, -1103528899, -1116374767, 1031989982, -1146160726); + WS(1056068055, 1054753321); + sum1 = W(0, -1151415881, -1148496799, 1010657956, 1024019466) + + W(1, -1130137728, 1008503186, -1132133255, 1035499631) + + W(2, 1039080395, -1110003853, 1010453293, 1036235820) + W(3, 1020552269, 1036573197, -1115513116, 1040242409) + + W(4, 1057552055, 1043835198, 1059020393, 1056043521) + W(5, 1059247988, 1054798584, 1050516586, 1047345767) + + W(6, -1089028090, 1042243518, -1089501818, -1077848098) + + W(7, -1085601017, -1088101382, -1102383812, -1092379525) + + W(8, -1115248008, 1050869997, 1038857744, 1040563886) + W(9, 1031869090, 1014726271, 1037313809, -1116861746) + + W(10, 1035025164, 1014222010, 1019596694, -1121658024) + + W(11, -1123056704, 1035081577, -1137744617, 1008326318); + sum2 = + W(0, 1028355794, -1106782132, 1035193429, 1019490884) + W(1, -1103311494, 1051099266, -1107701072, 1017055868) + + W(2, -1096807405, 1053669968, -1113415499, 1045145961) + W(3, 1033457423, -1105194567, -1111393110, 995616995) + + W(4, 1040605863, -1138391825, 1054131819, -1117197986) + W(5, 1014178849, 1043284247, 1045435457, -1110630971) + + W(6, 1067941768, -1075220438, -1088459045, 1059082808) + + W(7, 1050105229, -1121773074, -1120483817, 1023829774) + + W(8, -1077104108, -1095467240, -1094157882, 1063065831) + + W(9, 1061761756, 1042603721, -1116379229, 1044996767) + + W(10, 1052817152, -1103015841, 1049259157, -1103440804) + + W(11, -1095094287, -1122898552, -1109769641, 1018464332); + WS(-1081368566, -1084412265); + sum1 = W(0, 1027068306, 1015688387, 1033201483, 1018141039) + W(1, 1032237687, 1022250212, 1019999841, 1019305877) + + W(2, -1116002144, -1139602460, -1116483849, -1108067395) + + W(3, -1114781410, -1101031910, -1110135922, -1115253585) + + W(4, 1048181720, 1048418201, 1059204754, 1059054739) + W(5, 1060863016, 1054995541, 1048891912, 1054003721) + + W(6, -1097381330, -1109028275, -1086309497, -1080843359) + + W(7, 1035809139, -1090938969, -1097995920, -1098655035) + + W(8, -1110979184, 1018111780, -1126061147, 1023153354) + + W(9, 1043786804, -1102361409, 1017536656, -1110370680) + + W(10, 1021378954, 1036715310, -1118145977, 1024269909) + + W(11, 1014428471, 1015803518, 1027639288, 1026153225); + sum2 = + W(0, -1105480981, 1035973965, 1037165381, -1106311589) + W(1, 1052000177, -1104974154, -1120302417, 1040627526) + + W(2, 1050460434, -1096496755, 1049140757, 1006710678) + W(3, -1091756558, 1044207757, 1032159893, -1100748342) + + W(4, -1098849709, 1051612828, -1090898849, 1040931379) + W(5, 1058463605, -1090778289, 1039812947, 1045155050) + + W(6, 1048076097, -1090117310, 1053634690, 1066845841) + + W(7, -1077185739, 1058534342, -1102438650, -1096398368) + + W(8, 1035320005, 1047120319, 1021130333, -1083953982) + W(9, 1057459457, -1091684787, 1041032826, 1045504944) + + W(10, -1105501338, -1108725217, 1039969785, 1046728781) + + W(11, 1038345201, -1141278071, 1040363142, -1109297232); + WS(1040286894, 1035975353); + sum1 = + W(0, -1171293815, 1023182418, 996039142, -1137235964) + W(1, -1135192533, 1025821242, -1128818097, 1020463141) + + W(2, 1034319831, -1127987806, -1102399280, -1105596260) + + W(3, -1103790599, -1108335719, -1114500598, -1129600787) + + W(4, 1059664710, 1048633701, 1062256902, 1059153168) + W(5, 1061115073, 1056014846, 1050028641, 1052737098) + + W(6, -1091927835, -1097464070, -1087363556, -1086480503) + + W(7, -1089057879, -1099300213, -1097571161, -1112421262) + + W(8, -1116625676, 1029070026, 1016696459, 1000027879) + W(9, 1028964506, -1108586113, 1032749254, -1096430155) + + W(10, 1013574450, 1012611215, -1115123712, 1006568169) + + W(11, -1134032598, 1032814547, 1020632239, 1016193698); + sum2 = W(0, -1115556642, -1130243023, 1032034792, -1126476135) + + W(1, 1032996496, -1138592966, -1121911846, 1029413614) + + W(2, 1037372428, 1030557532, 1049080310, -1112452010) + + W(3, -1108874582, -1125709403, -1147299853, -1125845903) + + W(4, 1085969060, 1077581449, -1074326256, -1064530882) + + W(5, -1074476109, -1104386166, 1029772508, -1109306490) + + W(6, 1051814466, 1043810263, -1103611514, -1081646826) + W(7, 1033029539, 993676602, 1034388196, 1000946837) + + W(8, 1011471282, 1030366783, -1109496698, -1129814679) + W(9, 1035377868, 1007963430, 1005762565, 1024214725) + + W(10, -1114698318, 1034280644, -1140286158, 1022645135) + + W(11, -1126128243, 1013431542, -1114714998, 1024966722); + WS(-1128144240, -1105727419); + sum1 = + W(0, 1008177348, 1008158465, 1025380533, 1041196118) + W(1, -1123971020, 1038005880, -1110790464, 1022532691) + + W(2, 1038442911, 1030921926, -1116236765, -1091904121) + + W(3, -1110541401, -1155490648, -1118346788, -1132141153) + + W(4, 1034971295, -1108323937, 1041262911, -1116818069) + W(5, -1118195565, 1050488689, 1005836543, 1035288564) + + W(6, 1037111480, 1020025483, 1039711206, -1090328900) + W(7, 1053066642, -1108534613, 1032777199, 1043217853) + + W(8, -1119737674, 1015575522, -1096558626, -1106145913) + + W(9, -1107602136, 1033678004, 1034582855, -1110557919) + W(10, 1019898055, 1036155997, 1023258159, 1038413850) + + W(11, -1123851725, 1027658998, -1118860082, -1161170171); + sum2 = W(0, -1128930637, 1013105493, 1027020519, -1111106512) + + W(1, 1033707008, -1108465338, -1130609759, -1112986298) + + W(2, -1132037027, -1110061572, 1023565141, -1118411413) + + W(3, -1104848450, 1001195043, -1113096913, -1123693871) + + W(4, 1018701611, 1028677045, -1097696705, 1051172043) + W(5, 1036460110, -1134254189, 1035127174, 1021784635) + + W(6, -1110976033, -1113315034, -1136487341, 1064921249) + + W(7, -1097426277, 1028959819, -1113952185, -1139702593) + + W(8, -1129623857, -1136221161, 1045236380, -1100340880) + + W(9, 1026225447, 1033145987, -1105387359, 1025786651) + + W(10, 1029665439, -1120215489, 1025952555, -1120959951) + + W(11, 1006240491, -1118442072, -1137489821, -1119362634); + WS(1066786198, 1018352061); + sum1 = W(0, 1037807432, -1147124480, 1032919115, 1043153405) + W(1, 1024885356, 1040525108, 1034093744, 1041644314) + + W(2, 1042991174, -1118935590, -1102401423, -1111703874) + + W(3, -1105747115, -1109427003, 1036876386, -1113217832) + + W(4, 1056116676, 1053008404, 1058826232, 1052869895) + W(5, 1063596395, 1052412861, 1055303296, 1055747689) + + W(6, -1089303485, -1093203077, -1091158043, -1077398372) + + W(7, -1088790919, -1096537905, -1092801624, -1095292522) + + W(8, -1115182496, 1032850856, -1118679276, -1112497438) + + W(9, 1041854800, -1158252416, 1029739551, -1097656684) + W(10, 1044944769, 1019595829, 1030098999, 990101824) + + W(11, 1037575334, 1022825908, 1042956152, 1027108728); + sum2 = + W(0, 1036031230, -1120880520, -1118383086, -1112120068) + W(1, 1044452135, -1113902667, 1036248196, -1175016923) + + W(2, -1102747382, 1035081854, -1081312733, -1097589576) + + W(3, -1136989239, -1101386787, 1038098664, -1106083613) + W(4, 1040389106, 999021742, 1046511529, 1061513306) + + W(5, -1098672489, 1028645652, 1037598188, 1027306194) + W(6, 1027629590, 1048871361, -1102228261, 1059108854) + + W(7, 1030279668, -1090327880, 1046091275, -1110606753) + W(8, 1033840740, 1037408166, -1111365044, 1017984327) + + W(9, 1032578550, -1098256610, 1055272900, -1100293962) + + W(10, -1113736580, 1036561850, -1105995296, 1037901466) + + W(11, 1024281094, 1003868670, 1035521712, -1114802202); + WS(-1088826540, -1072617048); + sum1 = W(0, 986338164, 1023136598, -1127817408, 1038855104) + W(1, -1112014844, 1032100206, -1114071293, 1031252724) + + W(2, 1037920241, -1108211448, -1117329516, -1098615294) + + W(3, -1156351446, -1125030464, 1025960366, -1128205111) + + W(4, 1051799989, 1051897846, 1060040519, 1068445460) + W(5, 1041659186, 1049473503, 1046320643, 1053193450) + + W(6, -1096657083, -1097067830, -1085267217, -1091986338) + + W(7, -1085224562, -1095210946, -1103740786, -1098483636) + + W(8, -1131268440, -1134063855, 1034398076, -1104903024) + + W(9, 1041222927, -1123500579, 1021522163, -1110505437) + + W(10, -1122826702, 1022858827, 1016624516, 1025833567) + + W(11, -1148095931, 1018816617, 1004333185, -1135786218); + sum2 = W(0, 1025188267, -1121532154, 1037371296, -1111771837) + + W(1, 1040037716, -1106548534, 1039102739, -1109851481) + + W(2, -1139275200, 1033600964, -1123953755, 1046721727) + + W(3, -1098628025, 1040338433, -1109104289, 1042987802) + + W(4, 1040515214, -1111129732, 1035395064, -1119362505) + + W(5, 1046868281, -1095083293, 1045077862, -1121821072) + + W(6, -1131755519, -1105820182, 1067277095, 1075696256) + + W(7, -1070220409, -1079865944, 1049591033, -1113640498) + + W(8, -1105181237, 1041295374, -1104867399, 1058324165) + + W(9, -1095490474, 1030948623, -1107729189, 1039251361) + + W(10, 1030566925, -1137053008, -1131759093, 1026363312) + + W(11, -1118056117, 1018203389, -1128931569, -1123620784); + WS(-1126600048, -1081356549); + sum1 = W(0, -1127841038, -1121393942, -1127713943, -1112866678) + + W(1, -1130256197, -1113126004, -1135790871, -1114110304) + + W(2, -1133929050, 1032965854, 1033968271, 1043820719) + W(3, 1042942538, 1043379215, 1027795642, 1034600139) + + W(4, -1098403026, -1092154099, -1092503722, -1081570623) + + W(5, -1086843241, -1092479684, -1097678271, -1094287464) + + W(6, 1044657313, 1056638003, 1049224359, 1066854747) + W(7, 1055316562, 1058087872, 1032418298, 1051778533) + + W(8, -1112364357, 1040562850, 1040769245, -1105175211) + + W(9, -1123699911, -1108064855, -1138101411, 999504676) + + W(10, -1126110067, 1036225512, -1129561307, 1047740947) + + W(11, -1102743497, 1041161205, -1106598351, 1039116783); + sum2 = W(0, 1002695390, -1122258341, -1122292123, -1120933435) + + W(1, 1032169765, -1117894001, 1003179774, -1138289315) + W(2, 1041236493, 1024135109, 1018577975, 1034742596) + + W(3, -1148289286, -1155671947, 1027950533, -1116290259) + + W(4, -1107205869, -1167041196, 1048984334, -1085659967) + + W(5, 1041640777, -1108030745, -1123201389, -1139220243) + + W(6, -1110230321, -1120190201, -1063745707, -1070580481) + + W(7, 1042707591, 1049556353, -1106095214, 1050855401) + W(8, 1025661924, 1051920007, 1083223141, 1079255156) + + W(9, -1110361303, -1097500712, 1030518681, -1105964729) + + W(10, 1013006047, -1101224590, 1033459361, 1037086181) + + W(11, -1105447820, 1029970779, 1006793819, -1115669085); + WS(1044741550, -1111040261); + sum1 = W(0, -1123328853, -1102049737, 1040528310, -1104112191) + + W(1, 1024305776, -1101315365, 1021709031, -1111623399) + + W(2, -1129171790, 1051367713, -1097130129, 1051950321) + + W(3, 1039437043, 1045396986, -1108128612, -1165073551) + + W(4, -1119860449, -1091471693, 1035652177, -1092522751) + + W(5, -1095677118, -1130673884, -1112716514, -1121220876) + + W(6, 1045626213, 1046305018, 1049556145, 1053940037) + W(7, 1055561708, -1095468986, 1044018368, 1026385047) + + W(8, -1104602741, 1044417333, -1091825946, 1049099055) + + W(9, -1104689389, 1055485165, -1103796617, 1036376309) + + W(10, 1022127887, -1107859535, 1033350174, -1124872112) + + W(11, -1115808049, -1115536169, 1029190802, -1122289340); + sum2 = W(0, -1104033148, -1105436527, -1109964889, -1097126651) + + W(1, -1098141643, -1090250993, -1116302593, -1118233874) + + W(2, 1039328248, 1038128967, 1010051700, 1057153562) + W(3, 1057745438, 1060383381, 1035812245, 1044677482) + + W(4, 1040757730, 1010939044, 1051778220, 1035481326) + + W(5, -1094952202, -1102222701, -1133841678, -1105877607) + + W(6, 1023457753, -1118329847, -1098103909, -1094975289) + + W(7, 1048659070, -1098057265, -1123621243, -1160321672) + + W(8, -1105261590, 1011822306, 1019752394, 1037637839) + W(9, 1023965510, 1049957743, -1104798795, 1036758948) + + W(10, 1019934334, 998732076, -1151355224, -1112777586) + + W(11, 1012075272, -1134551782, 1031321862, -1124061226); + WS(1065794902, 1053657215); + sum1 = W(0, -1121908120, 1018314063, 1015310777, -1124873339) + + W(1, 1031572020, -1147984576, 1024640407, 1026486289) + W(2, 1038333639, 1031009517, 1039118671, 1034311430) + + W(3, 1041290230, -1120195837, -1119856235, -1114392412) + + W(4, 1027457764, -1106391546, -1097318831, -1141167585) + + W(5, 1054548019, 1053489270, 1041008530, 1038700424) + W(6, -1123855831, 1034446294, 1039330595, -1090069831) + + W(7, -1096859758, -1090834490, -1105884449, -1105777031) + + W(8, -1123864939, -1121443224, 1026117491, 1040632671) + W(9, 1028976296, 1043686014, 1007855863, 1023881277) + + W(10, 1004753494, 1015085764, -1152877546, -1119728987) + + W(11, -1155937596, -1132243578, 999100652, -1148206328); + sum2 = + W(0, -1111593455, 1026283313, -1106910582, -1123416451) + W(1, 1050577288, -1128641646, 1047088729, -1113858977) + + W(2, 1035072504, 1028902500, 1047373071, 1053329938) + W(3, -1092564654, -1100521328, -1096962366, 1017970524) + + W(4, 1043803228, -1108682745, 1040261276, 1067600226) + W(5, -1072392505, -1113219589, 1049469167, 1037790473) + + W(6, 1033408259, -1102620143, 1048908013, -1090850305) + W(7, 1062297118, 1017836657, 1033530157, -1125952722) + + W(8, -1104275307, 1005020832, -1100960524, 1050480903) + + W(9, -1138210428, -1168415746, 1025688819, -1131011166) + W(10, 998670368, 1032181798, 1023771046, 1032569956) + + W(11, -1112013629, 1023611526, -1112880371, 1021270423); + WS(1062391596, 1012575503); + sum1 = W(0, 1031343933, -1104891955, 1032025473, 1018981700) + + W(1, -1107154694, 1037332142, -1113508494, -1120166325) + + W(2, -1118754907, 1040079801, -1098309187, 1047266492) + + W(3, 1041578200, -1102249736, 1040475388, -1099931343) + + W(4, -1124966418, 1034244551, -1115351741, -1082667844) + + W(5, 1034155267, -1116669206, -1108675062, 1037232762) + + W(6, -1110681406, -1097175282, 1051954479, 1046130725) + W(7, 1051147223, 1052085167, 1003822408, 1045069376) + + W(8, 1044511208, 1039695694, 1024690888, 1049723012) + W(9, -1103478091, 1031631968, 1001356052, -1120670778) + + W(10, -1105353116, 1041572194, -1106768761, -1111177056) + + W(11, 1041859398, -1100878141, 1037747241, -1115735812); + sum2 = + W(0, 1033523600, -1111554425, -1129275556, -1122087690) + + W(1, -1124036306, 1037981702, -1119955572, -1122349214) + W(2, 984378304, 1035520350, -1139357832, 1042336361) + + W(3, -1098127108, -1108770624, 1031985496, 1030270440) + W(4, 1029410276, 1027721316, -1120989222, 1063527919) + + W(5, -1086537602, -1087308230, 1038170956, 1040810725) + W(6, 1013286288, -1092144316, 1051778774, 1062226267) + + W(7, -1106529787, 1023869712, -1106223749, -1113965912) + + W(8, -1149511552, 1040549613, -1101613332, 1044728263) + + W(9, -1102568312, 1043627655, -1109791564, 1025118636) + + W(10, -1114072340, 1027894332, -1138380800, -1113445964) + + W(11, 1042436209, -1103441850, 1038090706, -1120264936); + WS(1061969900, 1065420380); + sum1 = W(0, 1002358108, -1132400117, 1036273784, -1121891281) + W(1, 1031471239, 1014037550, 1015146068, 1021231496) + + W(2, 1041410250, 1024825568, -1111416381, -1112279056) + + W(3, -1100377015, -1110955448, -1140842745, -1109508230) + + W(4, 1005238718, 1055004288, 1058195604, 1065721300) + W(5, 1058645356, 1050103969, 1047622119, 1050895308) + + W(6, -1098164717, -1093641048, -1089452644, -1081575699) + + W(7, -1095897092, -1095327860, -1122283864, -1101391147) + + W(8, -1127598604, 1037205414, -1122200151, 1041630292) + + W(9, -1115380038, 1033148051, -1112032609, -1107652840) + + W(10, 1013481849, -1128588267, 995531041, -1118185583) + + W(11, 1031870564, -1125213521, 1031659065, -1167265945); + sum2 = W(0, 1037378832, -1105869799, 1034348880, 1010791317) + + W(1, 1020451237, 1025476671, -1121382126, -1144944739) + + W(2, -1097408481, 1040337956, 1050274911, -1124897054) + + W(3, -1105197585, -1107003535, -1131733180, -1128932040) + + W(4, -1089422359, 1064628576, 1078490638, -1067190452) + W(5, 1026583828, 1034175619, 1030289482, 1035014695) + + W(6, -1134521568, 1048269110, 1055313899, -1081189141) + + W(7, 1051135592, 1043712009, -1114815155, -1125325948) + + W(8, 1024014288, -1113808990, 1025241947, -1100728789) + + W(9, 1047164907, -1112306567, 1025919494, -1116208218) + + W(10, -1156999910, -1121898726, 1025759300, 1010618498) + + W(11, 981635389, -1142752483, -1158764013, 1029088384); + WS(1048733783, 1027643848); + sum1 = W(0, -1133845785, -1133175894, -1135183403, -1116789548) + + W(1, -1148511502, -1134260761, 1018609610, -1123833247) + + W(2, 1036515120, 1023561851, 1042717327, 1030082171) + W(3, 1048693393, 1034425221, 1039337356, 1027869798) + + W(4, -1088559253, -1093510507, -1088396394, -1080791005) + + W(5, -1089662985, -1090323862, -1100061261, -1089888600) + + W(6, 1057953132, 1049915371, 1060634166, 1063183646) + W(7, 1058925305, 1044704560, 1042697693, 1047384498) + + W(8, 1034283684, -1130587597, -1127730531, -1114509469) + + W(9, -1138682841, 1038705162, 1015444552, 1049640472) + + W(10, -1114935677, 1012777537, -1122593837, 1031486794) + + W(11, -1116148770, -1166921432, -1126426832, 1023971661); + sum2 = + W(0, 994989105, -1144072856, -1122509885, 1026939205) + W(1, 1028398855, -1120878413, 1025208177, -1113678678) + + W(2, 1037891939, -1103317800, 1037436931, -1127785128) + + W(3, -1103788534, 1034163445, -1127272012, 1026044831) + W(4, -1173716290, 1052666546, 1001371192, 1040815920) + + W(5, -1100616833, 1020001090, -1134790452, 1043708121) + + W(6, -1100065758, -1122814128, -1129250144, 1058553182) + + W(7, 1055924851, 1053725018, 1036740791, -1123998093) + + W(8, -1120127409, -1094230404, -1083084807, -1075758811) + + W(9, -1074914963, -1075891465, -1081737398, -1084193900) + + W(10, 1036146011, 1054652553, 1061875701, 1067689129) + W(11, 1068744910, 1067320464, 1065190583, 1062426867); + WS(-1084457324, 1066101859); + sum1 = W(0, -1146837948, 1016102797, -1115430590, -1115904029) + + W(1, -1139281484, -1117556663, 1034492603, -1124011056) + + W(2, -1111022815, -1125864729, 1047559389, 1034715654) + + W(3, -1103064096, -1108958078, -1131712724, -1131623214) + + W(4, -1115296098, 1038645029, -1083544605, 1037951214) + + W(5, 1057869073, -1109939240, -1123954086, -1123535023) + + W(6, 1042035070, 1047347075, 1059438663, 1052960194) + W(7, -1083491782, 1046468604, -1121536740, 1043250206) + + W(8, 1030912362, -1106508822, -1104927285, -1167432352) + + W(9, 1045723406, 1012598283, 1019381994, -1110102674) + + W(10, -1121095658, -1128702979, 1022709564, -1129984885) + + W(11, -1123913107, -1123056725, 1026960983, -1128824061); + sum2 = + W(0, -1125705477, -1127756609, 993770127, 1034569455) + W(1, 1010458600, -1119844207, -1122090273, 1019369801) + + W(2, -1127395739, 1044805550, -1097909373, -1177736375) + + W(3, -1116962018, 1043096893, 1035619005, -1112228363) + W(4, -1105227463, 1015549291, 1049252886, 1058826852) + + W(5, -1119951878, -1094678765, -1119199398, -1109645851) + + W(6, 1036906007, -1109261657, -1102191244, 1061852121) + + W(7, -1086841826, -1128420796, 1021651597, 1038741216) + + W(8, 1026846896, 1037043129, -1097396928, -1116961215) + W(9, -1103314123, 1038664018, 1035622840, 1019677755) + + W(10, 989272222, -1106679057, 1042524140, 1013800282) + + W(11, 1033157194, -1128652243, -1124905499, -1126258736); + WS(1067036470, -1087655195); + sum1 = W(0, -1143851962, 1026890311, -1106604063, 1016195769) + + W(1, -1138731339, 1037080924, -1121217270, 1029907824) + + W(2, 1018554747, -1120379564, -1114993347, 1022258327) + W(3, 981655329, 1045778247, 1025121994, 1023696087) + + W(4, 1050955184, 1053786273, 1042552729, 1063572966) + W(5, 1058367344, 1055564522, 1047575674, 1048845230) + + W(6, -1110423387, -1098337218, -1090196611, -1083965137) + + W(7, -1084059678, -1090231591, -1096786628, -1095677575) + + W(8, -1106360251, 1040602294, -1134551499, 1045568787) + W(9, 1046272662, 1028646847, 1026932869, 1027665749) + + W(10, 1025799530, -1118031105, -1119796702, -1110210301) + + W(11, -1132188582, -1117810092, 1021012754, -1118338222); + sum2 = + W(0, -1113854255, 1028862084, -1104900936, -1104116954) + W(1, -1114719178, 1040474278, 1048817634, -1122519186) + + W(2, -1108417433, 1033605964, -1109151867, -1102487521) + + W(3, -1071728118, -1073652757, -1090221130, -1102125815) + + W(4, 1042382895, 1032534216, 1017793091, 1067799294) + W(5, 1077900204, 1051278318, 1048299521, -1105402373) + + W(6, 1025435556, 1033584957, 1033281558, -1099286824) + W(7, 1056033422, -1113833767, 1011833167, -1107682623) + + W(8, -1108154666, 1046901615, -1111336587, 1036137805) + W(9, 1024983956, 1034492621, 1033542750, -1127962013) + + W(10, 1042045079, -1105415159, 1039313065, 1005958605) + + W(11, 1020191839, -1114752489, 1024757388, 1032711916); + WS(-1087701164, 1071308133); + sum1 = W(0, 1040776942, -1112915908, 1032135583, -1109010607) + + W(1, -1125896234, 1033070381, 1001383873, -1127540900) + + W(2, -1100025248, 1039729431, -1111499300, 1047911115) + + W(3, -1096854047, 1047586960, -1100475875, 1025720505) + + W(4, 1040244657, -1108101926, -1107965963, -1095250332) + + W(5, -1097276733, -1104060437, -1129328465, -1115131352) + + W(6, 1047131214, -1104971284, 1036283892, 1057920197) + W(7, 1057601759, 1046835436, 1017446731, 1041767855) + + W(8, -1116520012, -1123736282, -1105297773, 1040789767) + + W(9, -1089523085, 1046328829, -1107686492, 1031878575) + + W(10, -1152028354, -1120800451, 1024489952, 1038925278) + + W(11, 1034463851, -1130324381, -1112186573, 1015088161); + sum2 = W(0, -1089646175, -1112379036, -1087427677, -1088789602) + + W(1, 1059010203, 1060985431, -1139584980, 1054225000) + W(2, 1060062242, 1018139178, 1066898400, 1066001716) + + W(3, -1079880526, -1081253059, -1095878540, -1095013927) + + W(4, 1050551269, -1096611550, -1116330565, 1056235229) + + W(5, -1118197619, -1102574794, 1039647840, -1099698701) + + W(6, 1017421696, 1040955729, 1035645225, 1040223965) + W(7, -1114176344, 1044677785, -1100382074, 1035015292) + + W(8, -1124178965, -1115297526, -1111832492, 1027731712) + + W(9, -1106279788, -1130074994, 1040262609, -1124507789) + + W(10, 1023971829, -1112174927, 1035838465, -1128027042) + + W(11, 1015547306, 1033729011, -1110899873, 1032866060); + WS(1063980716, -1121784884); + sum1 = + W(0, 1027357276, -1104528667, -1122859745, -1107033792) + + W(1, -1105809438, 1033196814, -1111338526, -1111105883) + + W(2, 1046480497, -1121849329, 1049050976, 1046903620) + W(3, -1128776604, 1050194992, -1112079704, 1037688238) + + W(4, -1094692773, -1090457523, -1090130146, -1084796508) + + W(5, -1089370535, -1105629197, -1093813713, -1094775914) + + W(6, 1050884987, 1048987838, 1057733925, 1061180023) + W(7, 1054317580, 1055134019, 1046804578, 1057027842) + + W(8, 1046796692, -1098583576, 1037378828, 1034725880) + W(9, -1096435840, 1049629536, -1097780651, 1038664949) + + W(10, 1025153935, -1107151594, 1016110982, 1023598816) + + W(11, -1114135692, 1040596522, -1113598601, -1127372967); + sum2 = + W(0, 1041589168, 1024315846, 1043663909, 1040621380) + W(1, 1040212968, 1033384296, 1014426993, 1022694293) + + W(2, -1098133521, -1106371173, -1106740015, -1107177437) + + W(3, -1111131552, 1040430896, 1032409853, 1003633577) + W(4, 1031854878, 1051327081, -1108849860, -1106409587) + + W(5, -1120392794, -1093765703, -1107120761, -1104283048) + + W(6, -1105720063, 1023684981, -1124767392, -1127108094) + + W(7, -1102248051, 1050935529, 1033359350, -1128561530) + + W(8, 1035016760, -1128261352, 1035542785, -1111599720) + W(9, 1051247500, -1102687331, 1037002974, 1026574958) + + W(10, -1121214780, 1011090873, 1029549000, 1034668152) + + W(11, -1136161665, 1018485311, -1126387868, 998242826); + WS(1047153326, -1105960410); + sum1 = W(0, 1032318440, -1133035902, 1023033002, -1111741083) + + W(1, 1009988015, -1126171361, -1183551252, -1118067977) + + W(2, -1105871782, 1024269605, -1110369667, 1047130673) + + W(3, 1043587009, -1113373146, 1035984354, -1111622952) + + W(4, -1111485966, -1098984649, 1050315790, -1080272088) + + W(5, -1086889743, -1098309317, -1105786176, -1106267538) + + W(6, 1043448020, 1044051929, 1059563250, 1053459949) + W(7, 1061038806, 1044456898, 1048086975, 1037611273) + + W(8, -1110710801, 1028564080, -1113831750, 1024144723) + + W(9, -1117616080, -1145766181, -1114741783, 1040212689) + + W(10, 1019497606, 1019882933, -1118335287, 1018957863) + + W(11, -1122250988, -1124691083, -1123242346, -1150984053); + sum2 = W(0, -1132142402, 1019405455, 1022592575, 1002713172) + W(1, 993592760, -1120241477, 1024283135, -1139068036) + + W(2, -1109262360, 1034679642, -1099677890, 1048612806) + + W(3, 1036415285, -1118742739, -1125545553, -1129249399) + + W(4, 1044069954, -1104355470, -1119884477, -1094376769) + + W(5, -1110824904, 1031998111, -1128336378, 1030577347) + + W(6, -1111364284, -1108806456, -1066728208, 1079847520) + + W(7, 1060715429, -1103911380, 1028262793, -1127589036) + + W(8, 1031679549, -1097044621, 1043056057, 1057366851) + W(9, -1104696735, 993276152, 1012785110, -1133646774) + + W(10, 1018817977, 1034241557, 988348592, -1114461524) + + W(11, -1107038514, 1036085605, -1128734189, -1182233281); + WS(1056964652, 1042968547); + sum1 = W(0, 1026864302, -1136392949, 1024389106, -1126999334) + + W(1, 1026043785, -1130628252, 1000688741, 1002882868) + W(2, 1044356666, 1026146499, 1030265493, 1038248593) + + W(3, -1126651679, -1146035092, 986691602, 1033146167) + W(4, -1111481597, 1051097712, 1040695131, 1052760426) + + W(5, 1056669112, 1038264227, 1027446877, 1040116499) + + W(6, -1091567350, -1098882624, -1090350559, -1081953599) + + W(7, -1095456258, -1102468122, -1109487565, -1110909592) + + W(8, 1041608192, 1029687022, 1042991030, 1043333208) + W(9, 1044232931, 1038520784, 1033386798, 1040618297) + + W(10, 1008927875, -1130908784, -1127664937, -1121268145) + + W(11, 952128323, -1146879000, -1137895230, 1023066122); + sum2 = + W(0, -1155037894, -1117766868, 1042293027, 1004126723) + W(1, -1123150924, 1015608713, -1135323377, 1030487760) + + W(2, -1109445776, -1132308185, -1101307898, 995397702) + W(3, 1014206993, 1046458351, -1120597884, 1026431488) + + W(4, -1106399184, 1041934367, -1137539889, 1035303811) + + W(5, -1091518060, -1124849601, 1043824666, 1061366644) + W(6, 1052866656, -1111172948, 1058738313, 1064268129) + + W(7, 1052685347, -1090299347, -1087905074, -1075431372) + W(8, 1046069094, 1042904039, 1045435531, 1051607279) + + W(9, -1098813712, -1150133894, 1037110006, -1089430426) + + W(10, 1012439553, -1108172404, -1110297962, -1110625272) + + W(11, -1107185126, 1045339946, -1122419536, 1049588119); + WS(-1096061015, 1045980634); + sum1 = + W(0, -1136709393, 1028828435, 997001355, 1022855123) + W(1, -1128346047, 1031977874, 984865373, 1040707047) + + W(2, 1047345176, 1016418418, 1049157941, 1032879612) + W(3, 1042754713, 1032419332, -1124110958, 1040469689) + + W(4, -1089944925, -1096285771, -1094897728, -1097979037) + + W(5, 1014307589, 1040846151, 1042092045, 1041011167) + W(6, -1113534082, 1049565603, -1113216951, -1094660427) + + W(7, 1031601097, -1095562472, -1109834744, -1093905225) + W(8, 1020871950, 1021274185, 1042276288, 1039512084) + + W(9, 1028680388, 1044068336, -1140756488, 1048548616) + W(10, 1032683291, 1018404508, 1027484139, -1119890439) + + W(11, 1030972704, -1125789619, 1031142826, -1123584299); + sum2 = + W(0, -1114682679, -1115477579, -1100789101, -1113658391) + W(1, -1110942733, 1044868728, 1028313432, 1046472362) + + W(2, 1055059251, 1024869090, 1055732865, -1122542034) + + W(3, -1120682294, -1107016239, -1152832742, -1087243594) + + W(4, 1064739409, 1031266096, 1030176534, 1067742275) + W(5, 1062911121, -1081471397, -1080677577, -1077241450) + + W(6, -1095220576, -1113806527, 1040530242, 1036337974) + W(7, -1087169731, 1042181045, 1042749891, 1067213860) + + W(8, -1100660553, -1105830101, -1096186716, -1103152959) + + W(9, 1035448471, 1052603278, 1033558186, 1052345796) + W(10, 1038444345, -1132048253, 1040636888, 1015520805) + + W(11, 1032588114, -1102136445, -1134141481, -1112398105); + WS(-1086603372, 1061480206); + sum1 = W(0, 1025472273, -1143591172, -1124123051, -1110008795) + + W(1, -1124248793, -1127491490, 1014214528, -1120184170) + + W(2, 1022341006, 1027469190, -1141108008, 1042054049) + W(3, 1051331968, 1044174205, 1044005681, -1155225358) + + W(4, 1006839158, -1128153798, 1051787090, -1090097115) + + W(5, -1081435553, -1089913687, -1092574472, -1096696337) + + W(6, -1102342404, -1124925900, -1093885235, 1058414043) + + W(7, 1063214861, 1057428893, 1052102264, 1048742922) + W(8, 1042514621, 1043263108, 1027880941, -1105821275) + + W(9, -1094479791, -1099759469, -1112069887, 1035771431) + + W(10, -1114893026, -1114252238, -1121794488, 1039892842) + + W(11, 1028544109, 1041669328, 977928239, 1039400116); + sum2 = + W(0, 1037224808, -1109009263, -1120793535, -1110258597) + + W(1, -1112240446, -1151274598, -1120536233, -1103902982) + + W(2, 1025638319, 1049599085, -1142324554, 1028269513) + W(3, -1132053081, -1119573384, 1010517574, 1008384356) + + W(4, -1093017633, 1039705537, 1040559103, 1053657173) + W(5, -1120149375, 1043501160, -1121251494, 1035850418) + + W(6, -1143762368, -1112746506, -1099136162, 1056581815) + + W(7, -1118127758, -1147801478, -1130373692, 1032229723) + + W(8, -1119609796, -1119534458, -1123476586, -1104556193) + + W(9, 1044376501, -1107739102, 1033895801, -1122521676) + W(10, 995865588, 1021427773, 988834049, -1122952792) + + W(11, -1114993482, 1029987722, -1106681926, -1116132116); + WS(1037058908, -1087106898); + sum1 = W(0, 1032385376, 1028077583, 1014613011, -1151370839) + W(1, -1139761263, 1027279360, 1022565566, 1031834108) + + W(2, 1026637522, -1129390954, 1048467894, 1048758199) + W(3, 1043852440, 1034990442, 1017011558, 1036138504) + + W(4, -1098495714, -1107088868, -1108180359, -1093334653) + + W(5, -1108393155, -1097414435, -1111470776, -1104527470) + + W(6, 1041048768, 1041268325, 1045140885, -1080472255) + W(7, 1045570315, 1045361342, 1028923180, 1036330497) + + W(8, 1039729665, -1126519617, 1040649531, -1114665834) + + W(9, -1125126080, 1035082182, 1023808326, 1027790590) + W(10, 1017191910, -1132138900, 1035348883, 997380151) + + W(11, 1031307292, 1030937245, -1138594135, 1036519916); + sum2 = W(0, -1111516802, 1036231024, -1130951686, 1031916934) + + W(1, -1130989326, -1129631002, -1123615369, -1119469958) + + W(2, -1131222514, -1098228691, 1034905648, -1090487171) + + W(3, 1032129266, 1035677128, -1103168970, 1049196148) + + W(4, -1097346809, -1128682274, -1113861539, 1052765954) + + W(5, -1108538807, -1122282521, 1038239484, -1116187122) + + W(6, 1049467820, 1020406578, 1024259929, 1061096371) + + W(7, -1104129123, -1108918961, -1114584358, -1129232626) + + W(8, -1118099733, -1102087439, 1049541587, -1108988196) + + W(9, 1033200380, 1042713844, -1097598443, 1018354634) + + W(10, -1111393075, 1039222222, -1113826501, 1025340241) + + W(11, 1029417921, -1111528334, 1034012496, 1029857705); + WS(1054906071, -1091145324); + sum1 = + W(0, 1029398237, -1140775857, -1128656451, 1004160018) + W(1, -1130732913, 1029686973, 1021855224, -1128798955) + + W(2, -1120406171, -1106052861, -1099422950, 1032725545) + + W(3, -1097841652, 1037934865, 1036573848, -1157619404) + W(4, 1051536197, 1046952684, 1057886666, -1085942396) + + W(5, -1098611557, 1042440654, 1035129734, 1032622058) + + W(6, -1115196942, -1109008729, -1097962977, -1130426984) + + W(7, 1048146402, 1034532491, -1122054089, 1027356128) + W(8, -1136290815, 1034481647, -1114344740, 1050651652) + + W(9, -1102043591, -1133864753, -1124817654, -1126847669) + + W(10, -1126429728, 1032555988, 1015919623, -1108426832) + + W(11, 1040987294, -1107032303, 1044439941, -1115113714); + sum2 = W(0, -1113802973, -1116219822, 1020553430, -1109755343) + + W(1, -1131329246, -1112950186, -1113313093, -1116367256) + + W(2, 1027581315, -1112877584, -1110295429, 1027296115) + + W(3, -1126007358, 1035099341, -1130784978, 1035863861) + + W(4, -1110955855, -1127685514, -1097495000, 1025841055) + + W(5, -1104102640, -1110724235, -1165405916, -1097352168) + + W(6, 1047020911, 1021259086, 1057623683, 1053623791) + W(7, 1057318729, 1040083835, 1048620928, 1039940909) + + W(8, -1105727831, -1118276653, -1110729082, -1104267868) + + W(9, -1106795812, -1119773745, -1128480210, -1110646461) + + W(10, 1022816774, -1129541322, 997980814, 1016281450) + + W(11, -1105649969, 1031563375, -1111917112, 1032754595); + WS(1063926444, -1083200389); + sum1 = + W(0, -1110900111, 1015785179, -1132926346, 1034778082) + W(1, 1029162938, 1024169214, 1031086749, -1116484151) + + W(2, 1049653262, -1116391059, 1048988121, -1111931241) + + W(3, 1002973694, -1107911937, -1104277444, -1114837807) + + W(4, -1089993663, -1106495964, -1104923071, 1061854120) + W(5, 1062129309, 1059656227, 1047872627, 1055218483) + + W(6, 1052688460, 1038385529, 1045396422, -1089207439) + + W(7, -1085048198, -1087799869, -1096506545, -1096830980) + + W(8, -1098055945, -1108169455, -1110654965, -1111176153) + + W(9, 1042255343, 1043917388, 1012240783, 1049045741) + W(10, 1042131972, -1121748392, 1039495970, -1117097761) + + W(11, -1120523578, -1106362715, 1011366638, -1098874335); + sum2 = W(0, 1031260311, 1008713252, -1110413739, -1108083749) + + W(1, 1024382525, -1107516795, 1032742989, -1107429215) + + W(2, -1079593190, -1080879443, -1099919153, 1067198745) + + W(3, 1061544601, 1037733964, 1034769578, 1037726418) + W(4, -1121246507, 1035854140, 1040572163, -1111993270) + + W(5, 1060638292, -1122943484, -1098923291, 1034609836) + + W(6, -1113337443, 1043424734, -1112705599, -1102589461) + + W(7, 1044789771, 1040341398, -1116338100, 1030676197) + W(8, 985022717, -1123981203, 1028350097, 1035118406) + + W(9, -1119559917, -1123835438, 1021760610, -1114867192) + + W(10, 1032096214, -1130846658, -1119780465, 1007072124) + + W(11, -1132931022, -1131952682, 1028149723, -1115680064); + WS(-1091090263, 1040227933); + sum1 = + W(0, -1123880467, -1114808049, 1014154590, 1032220316) + W(1, 1018131933, -1121223347, -1128795491, 1008528046) + + W(2, -1113261184, 1027485320, -1101480756, 1043855170) + W(3, -1088775234, 1021676847, 1031227206, 1026364723) + + W(4, 1033284496, 1019377225, -1102787036, -1094753319) + W(5, 1065098078, -1107869654, 1045897152, 1025551690) + + W(6, 1044923332, -1098653061, 1059927492, 1040011797) + + W(7, -1099661886, -1151140018, -1113799968, -1125551659) + + W(8, -1105999782, 1042536309, -1095990282, 1044858269) + + W(9, -1091964998, 1045062557, -1109510984, 1023695008) + + W(10, -1135767575, -1127683258, 1037484605, -1120454550) + + W(11, 1029352975, -1109078132, 1027724204, 1006311503); + sum2 = W(0, -1123304381, 1011470018, -1116901758, 1029761957) + + W(1, -1135722528, -1111769341, -1107033568, 1032808415) + + W(2, -1135928985, -1131363804, 1044779190, -1101932196) + + W(3, -1097334030, 994986369, 1042173475, 1025113569) + W(4, 1039889929, 1045400660, -1100997480, 1055172736) + + W(5, -1087140500, 1054550160, -1110305303, -1115908038) + + W(6, -1113522433, -1100907702, -1104972678, 1058841413) + + W(7, 1039800739, -1122473050, -1109069053, 1032761157) + + W(8, -1108136941, 1038781261, 1052809383, -1093286229) + + W(9, 1034962749, -1105965260, 1038676003, -1155418681) + + W(10, 1038356153, -1139124254, -1105005165, 1040780481) + + W(11, -1105451984, 1042887374, -1114167820, -1152192529); + WS(1063982124, 1031276218); + sum1 = W(0, 1041442545, -1103090149, 1042807746, 1024468200) + + W(1, -1106882536, 1049721332, -1101888878, 1042454319) + + W(2, 1031361481, 1026538936, -1101400314, 1034898060) + + W(3, -1103132501, -1112916420, -1139074135, -1110788513) + + W(4, 1056936890, 1055470587, 1060963845, 1061353486) + W(5, 1063510555, 1048946651, 1054541302, 1058321972) + + W(6, -1090353464, -1090262005, -1096383728, -1078114170) + + W(7, -1106902698, -1088793180, -1098771630, -1093460753) + + W(8, -1122143798, -1101092681, 1031723710, -1122232454) + + W(9, 1043143621, -1172995470, 1032268200, -1104729748) + + W(10, 1007942615, 1040474995, -1103896066, 1027975174) + + W(11, 1030475455, -1101011290, 1044176219, -1108198577); + sum2 = W(0, 1015674059, 1034996578, 1026624942, -1128049054) + W(1, 999367802, 1021382702, -1124407536, 1029480964) + + W(2, -1109357907, 1024785272, -1129998179, -1120348186) + + W(3, 1048014132, -1105300006, 1046166047, -1116382211) + + W(4, 1031332447, 1048659656, -1108514478, 1052415888) + + W(5, -1105319294, -1117149322, 1010482847, -1154249511) + + W(6, 1066992274, 1076858856, 1078632808, 1073772904) + W(7, -1105884909, 1038594495, 1038514249, -1098756758) + + W(8, -1074609058, -1070866381, -1068251573, -1076608801) + + W(9, -1103939197, -1109221944, -1098022157, -1125235504) + + W(10, 1048304191, 1030261153, -1098508369, -1108782845) + + W(11, 1048310856, 1031934850, 1046147873, 1038709270); + WS(-1084008748, -1076428689); + sum1 = W(0, 995293936, -1162911689, -1138016718, -1132274729) + + W(1, -1145295444, -1131268254, -1201649403, -1135278623) + + W(2, -1119372516, 1036299874, 1017340780, 1041960361) + W(3, 1044385332, 1034368866, 1043013720, -1103541652) + + W(4, -1088304948, -1097059736, -1085861109, -1081600491) + + W(5, -1083799507, -1090459925, -1104403080, -1103180426) + + W(6, 1059786207, 1051547992, 1060565598, 1064118968) + W(7, 1058261310, 1050466917, 1049921019, 1042671596) + + W(8, -1137964103, -1125552990, -1116363658, -1116908178) + + W(9, -1119006543, 1026422690, -1131911306, 1051353751) + + W(10, 1013109895, 1006687204, 1002349741, 1007519945) + + W(11, 994300076, -1119097260, -1123993357, 1023041036); + sum2 = + W(0, 1033856632, 1041148647, -1094518814, 1045521814) + W(1, -1128266986, 1033117136, -1173165821, 1041230368) + + W(2, -1090933982, -1094617784, -1122277735, 1062250596) + + W(3, 1027053741, 1050179515, 1033890948, -1086799133) + W(4, 1068958128, 1066199400, 1055138982, -1070105649) + + W(5, -1070096660, -1082392713, 1067349236, 1076979077) + + W(6, 1067844920, -1102124443, 1051248434, -1078683071) + + W(7, -1079091570, -1089309992, 1047411220, 1070525583) + + W(8, -1095515318, -1122244574, 1036900546, 1046136146) + W(9, 1021983978, 1045719314, 1050937215, -1091587052) + + W(10, -1124923033, 1042779533, -1103088355, -1114411292) + + W(11, -1114099181, 1023274434, -1124200313, 1042022635); + WS(-1081223670, 1021288719); + sum1 = W(0, 1011016529, -1123721507, 1017381041, 1017648243) + W(1, 1028006000, -1122128468, 1009660876, 1007983923) + + W(2, 1035837139, 1032041909, -1108640782, 1024313608) + + W(3, -1105590117, -1108791100, -1128783630, -1116825572) + + W(4, 1042651768, 1036928499, 1055254255, 1061777248) + W(5, 1061429156, 1052534382, 1049990152, 1053483289) + + W(6, -1098433890, -1100608474, -1094722148, -1083909329) + + W(7, -1087617676, -1091388554, -1104773376, -1093454083) + + W(8, -1129696110, 1024607124, -1131079071, 1047474926) + + W(9, 1036831026, 1033276873, -1150770463, 1005912461) + W(10, 995272815, 1027549342, -1116321147, 1017435439) + + W(11, -1122084432, -1139273509, 1024807730, -1125665142); + sum2 = W(0, 1057380038, 1057597982, 1069621581, 1060801069) + W(1, 1060050844, 1041938366, -1122435905, 1024344515) + + W(2, -1088994983, -1085734595, -1072283812, -1079782207) + + W(3, -1081640783, 1046270010, -1113922524, 1043446590) + W(4, 1016685185, 983594263, 1066773393, 1060848632) + + W(5, 1043641192, -1105013658, -1104981345, -1105295106) + + W(6, 998662038, 1031174835, -1099503122, -1097910780) + W(7, 1038323247, 1026094487, 1036576275, -1145544806) + + W(8, 1040844632, -1116219635, -1134907857, 1027931069) + + W(9, -1111695032, 1047921746, -1114519002, 1032973176) + + W(10, 1027712021, -1109851055, 1037507283, -1116080368) + + W(11, -1123552119, 1028446195, -1108250726, 1031771441); + WS(-1120405176, 1052288640); + sum1 = + W(0, -1136485047, -1120969533, 1032711321, 1027237267) + W(1, 1021305874, -1146847086, -1133175986, 1039673939) + + W(2, 1040656325, 1013423883, -1108004548, -1107763377) + W(3, 1022040031, 1040265357, 1031093856, -1100629595) + + W(4, 1051431647, 1042796768, 1062809491, 1060861813) + W(5, 1027813500, -1107918083, 1039657966, 1043412169) + + W(6, -1114886102, -1092939904, -1083705303, -1089040632) + + W(7, 1039332409, -1098943929, -1112315213, -1104541036) + + W(8, -1113721480, 1039390695, 1026161897, -1116341945) + + W(9, -1108229347, 1035322940, -1115465454, -1114748782) + + W(10, -1115542032, -1165530871, 1020434857, 1024821617) + + W(11, 1031183209, 1019272038, 1024346473, 1012723731); + sum2 = + W(0, 1032873736, -1106233681, 1036788040, 1009987037) + W(1, -1117147579, 1018109057, -1114508674, -1106556966) + + W(2, -1128467212, -1131275971, -1113770947, -1088748320) + + W(3, -1097209973, 1040210251, -1099376356, 1050939532) + + W(4, -1098460251, 1035809416, -1078975662, 1062703231) + W(5, 1059662245, -1110493560, 1052516052, 992650581) + + W(6, 1024553592, -1114616785, -1092555656, 1067981395) + W(7, 1058574054, -1103422592, 1038056552, 1026754378) + + W(8, 1047433570, 1026776904, -1132131619, -1104531393) + + W(9, -1106248497, -1125640736, -1107638185, -1107493388) + + W(10, -1128334191, -1107845863, 1043173677, -1111351589) + + W(11, 991598885, -1107997815, 1023829740, -1106870741); + WS(1050656983, 1072047790); + sum1 = W(0, -1128713579, 1026709637, 1023201012, 1030859291) + + W(1, 1041237492, -1152997210, -1155686725, 1040488948) + + W(2, 1048792739, 1034799517, 1014670385, -1116111658) + W(3, 1025355287, -1112906461, 1036463444, 1002270506) + + W(4, 1050333168, 1050171449, 1053203577, 1064066450) + W(5, 1060032700, 1050694506, 1053095469, 1046913707) + + W(6, -1097949975, -1104033571, -1085647978, -1080119690) + + W(7, -1088145917, -1088560507, -1105136840, -1093840327) + + W(8, -1101689274, 1036522731, -1115538280, 1043957990) + + W(9, 1037919995, -1120212113, 1035379762, -1108210918) + + W(10, -1127716817, 1037025856, -1116963676, -1112813084) + + W(11, 1036783049, -1113565993, 1035776666, -1122514141); + sum2 = + W(0, 1032121925, -1105031648, -1130820039, -1105961083) + W(1, 1040237041, -1091675607, 1052906990, -1119953618) + + W(2, 1067037293, 1072414374, 1072723545, 1076922042) + W(3, 1072846596, -1079373087, 1048404715, -1090018360) + + W(4, -1074821338, -1081225596, -1069168235, -1068908234) + + W(5, 1068735095, 1050158994, 1043049709, 1036354441) + W(6, -1094551116, 1050387420, 1050306407, -1080768789) + + W(7, -1110799161, 1048692149, 1039557097, -1114340558) + W(8, 1045030197, 1035683027, 1054464199, -1129129027) + + W(9, -1109242003, -1102086715, 1050068036, -1106557101) + + W(10, -1115959604, -1117669800, -1109697271, -1106200850) + + W(11, 1041008693, -1107790175, 1019897614, 1034398799); + WS(-1081468886, 1041730784); + sum1 = W(0, -1114247594, 999910687, -1112649805, 1022168820) + W(1, 1009392321, 1025791509, -1115410306, 1020540891) + + W(2, 1021500053, 1042624634, 1043096639, 1034747044) + W(3, -1103886814, -1101453262, 1026735815, 1034256626) + + W(4, 1028761377, 1005300019, -1100727404, 1035906940) + W(5, 1062898916, 1054732110, 1042621601, 1041116931) + + W(6, 1044129269, 1053438789, 1055682820, -1086991443) + + W(7, -1084075064, -1098009154, -1097832968, -1117544535) + + W(8, -1131183895, -1096850135, -1106832988, 1038040260) + + W(9, 1044548897, 1037861846, 1022910341, -1121491970) + + W(10, -1121682823, 1029411807, 1022343799, -1113521452) + + W(11, -1110992968, -1110478453, -1125653940, -1112646442); + sum2 = W(0, -1109856908, 995541592, -1107331629, 1043476326) + + W(1, -1104942022, 1039471517, -1114856041, 1022399907) + W(2, 1025315462, 1040254152, 1035675473, 1026228130) + + W(3, -1101803200, -1123010638, 1040268905, 1028920354) + + W(4, -1111613060, 997512408, -1115826672, 1067108106) + + W(5, 1053887552, -1104388416, -1120685867, -1103458923) + + W(6, -1108093549, -1085520225, -1103361463, 1059978555) + + W(7, -1094292172, -1120534147, -1108094032, 1040596394) + + W(8, 1041489120, -1094645099, -1102705141, -1121534122) + + W(9, 1039802977, 1017265579, 1040793483, -1107135380) + + W(10, -1114414815, 1040714423, -1125997775, 1032256197) + + W(11, -1130615251, -1129091643, -1108206349, 1027593934); + WS(1054796503, -1111794816); + + return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); +} + +shared float inp[555]; + +#define CURRENT_PASS 2 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define temp_tex(pos) (float(texture(temp, pos).x)) +static const float2 temp_size = float2(GetInputSize().x * 1, GetInputSize().y * 2); +static const float2 temp_pt = float2(1.0 / (temp_size.x), 1.0 / (temp_size.y)); + +#define HOOKED_tex(pos) temp_tex(pos) +#define HOOKED_size temp_size +#define HOOKED_pt temp_pt + +void Pass2(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 555; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 15, y = (uint)id % 15; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (2)) + 0.5, float(group_base.y + y - (3)) + 0.5)).x; + } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[12]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 4]; + samples[1][1] = inp[local_pos + 5]; + samples[1][2] = inp[local_pos + 6]; + samples[1][3] = inp[local_pos + 7]; + samples[2][0] = inp[local_pos + 15]; + samples[2][1] = inp[local_pos + 16]; + samples[2][2] = inp[local_pos + 17]; + samples[2][3] = inp[local_pos + 18]; + samples[3][0] = inp[local_pos + 19]; + samples[3][1] = inp[local_pos + 20]; + samples[3][2] = inp[local_pos + 21]; + samples[3][3] = inp[local_pos + 22]; + samples[4][0] = inp[local_pos + 30]; + samples[4][1] = inp[local_pos + 31]; + samples[4][2] = inp[local_pos + 32]; + samples[4][3] = inp[local_pos + 33]; + samples[5][0] = inp[local_pos + 34]; + samples[5][1] = inp[local_pos + 35]; + samples[5][2] = inp[local_pos + 36]; + samples[5][3] = inp[local_pos + 37]; + samples[6][0] = inp[local_pos + 45]; + samples[6][1] = inp[local_pos + 46]; + samples[6][2] = inp[local_pos + 47]; + samples[6][3] = inp[local_pos + 48]; + samples[7][0] = inp[local_pos + 49]; + samples[7][1] = inp[local_pos + 50]; + samples[7][2] = inp[local_pos + 51]; + samples[7][3] = inp[local_pos + 52]; + samples[8][0] = inp[local_pos + 60]; + samples[8][1] = inp[local_pos + 61]; + samples[8][2] = inp[local_pos + 62]; + samples[8][3] = inp[local_pos + 63]; + samples[9][0] = inp[local_pos + 64]; + samples[9][1] = inp[local_pos + 65]; + samples[9][2] = inp[local_pos + 66]; + samples[9][3] = inp[local_pos + 67]; + samples[10][0] = inp[local_pos + 75]; + samples[10][1] = inp[local_pos + 76]; + samples[10][2] = inp[local_pos + 77]; + samples[10][3] = inp[local_pos + 78]; + samples[11][0] = inp[local_pos + 79]; + samples[11][1] = inp[local_pos + 80]; + samples[11][2] = inp[local_pos + 81]; + samples[11][3] = inp[local_pos + 82]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 33]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1) + ivec2(1, 0), ret); +} diff --git a/src/Effects/NNEDI3/NNEDI3_nns32_win8x4.hlsl b/src/Effects/NNEDI3/NNEDI3_nns32_win8x4.hlsl new file mode 100644 index 000000000..56b26f7a5 --- /dev/null +++ b/src/Effects/NNEDI3/NNEDI3_nns32_win8x4.hlsl @@ -0,0 +1,1193 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: nnedi3.py --nns 32 --win 8x4 --use-compute-shader --use-magpie +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME NNEDI3_032_4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 1 * 2 +//!HEIGHT INPUT_HEIGHT * 2 * 1 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!FORMAT R16_FLOAT +//!WIDTH INPUT_WIDTH * 1 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D temp; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_temp; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 2 + +//!PASS 1 +//!DESC NNEDI3 (double_y, nns32, win8x4) +//!IN INPUT +//!OUT temp +//!BLOCK_SIZE 32, 16 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[8]) { + float sum = 0.0, sumsq = 0.0; + [unroll] for (int i = 0; i < 8; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); + sumsq += dot(samples[i], samples[i]); + } + float mstd0 = sum / 32.0; + float mstd1 = sumsq / 32.0 - mstd0 * mstd0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); + mstd1 *= mstd2; + float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = W(0, 1036208225, -1091571194, 1048590727, 1032174663) + W(1, 1039469975, -1095740683, 1028885330, 1026075554) + + W(2, 1027560868, -1097328754, 1039928979, 1023201972) + W(3, 1039536921, 1025116802, 1017576356, 1042058704) + + W(4, -1144450173, 1043923973, -1098962113, 1033278580) + + W(5, 1035622988, 1049700439, -1090255529, 1046104373) + W(6, 1024461970, 1024545936, -1100301043, 1035224848) + + W(7, 1029776426, 1047463785, -1090407950, 1043393326); + sum2 = + W(0, -1098253927, 1055965966, 1006679350, -1096709168) + W(1, -1166389754, -1101782718, 983512221, -1105895383) + + W(2, -1106582410, 1045236553, -1097283179, 1040768379) + + W(3, 1034460863, -1132225959, -1090237017, 1030302550) + W(4, 1027738130, 1057568628, 1036184228, -1111760224) + + W(5, -1113669665, 1055278331, -1133967067, 1033275358) + + W(6, 1016925525, -1127810369, 1022264595, -1104949250) + + W(7, -1129223232, -1108092808, 1009711002, 1045297871); + WS(-1080723616, -1076327864); + sum1 = + W(0, 1017315691, -1106293281, 1041272315, -1113397025) + W(1, -1140602116, 1024505865, -1129176587, 1022464939) + + W(2, 1037257964, -1101486193, 1040932726, 1024865293) + W(3, -1107192473, 1027220277, -1098639185, 1043879330) + + W(4, 1022003974, 1032001763, 1032224091, -1118929755) + W(5, 1022805527, -1106692027, 1038889695, -1123579783) + + W(6, 1021604747, 1020788295, 1019749115, -1117432531) + W(7, 1014959990, -1109388172, 1036681163, 998707020); + sum2 = W(0, 1015755803, -1127131649, -1137718334, 1033341381) + + W(1, -1140061902, 1016927815, -1129880851, 1024947773) + + W(2, -1129835189, -1112279061, -1089663810, 1058036499) + + W(3, 1032555441, -1095938931, -1069364926, 1079454606) + + W(4, -1138877514, -1116204914, -1092530653, 1053318688) + + W(5, -1123339814, 1009679614, -1148409052, 1022404175) + + W(6, 1016646759, -1153656135, -1122312216, 1028043253) + + W(7, -1165772366, -1146249364, 1018619187, 1025789685); + WS(1058954560, 1004956589); + sum1 = + W(0, -1128607008, -1102342440, 1027139080, 1038455278) + W(1, 1038458494, -1105082889, -1147341058, -1156235011) + + W(2, 1047693245, -1085974686, 1057887999, -1103441674) + W(3, 1026348672, 1047296084, -1100046248, 1029167261) + + W(4, -1108459679, 1058722958, -1087115665, 1033885827) + + W(5, -1116524774, -1135229473, 1043502422, 1027001826) + W(6, 1024592800, 1019546620, 1020243248, -1124163292) + + W(7, -1117551096, -1122816147, 1043995308, 1023459588); + sum2 = + W(0, 1039897191, 1032715073, -1104821098, -1103487249) + W(1, -1116902056, 1035295215, -1120385367, -1118716257) + + W(2, -1115617116, 1051413477, 1060018690, -1091510775) + + W(3, -1116419565, -1089611288, -1132652148, 1048354743) + + W(4, -1121428266, 1039274043, 1008252604, 1040404198) + W(5, 1003022729, -1102315300, -1117381948, 1037629937) + + W(6, 1032782009, 1012620980, -1113601670, 1035102099) + + W(7, -1161229156, 1023483611, -1116040249, -1132320852); + WS(-1088253760, -1127575790); + sum1 = W(0, 1038728366, -1156102863, 1036225546, 1011512228) + W(1, 1033404580, 1042126700, -1111684630, 1043566527) + + W(2, 1036121471, -1099951467, 1050604224, 1032168346) + + W(3, 1045370723, -1083618166, -1088174091, 1050156122) + W(4, 994877095, 1040753147, -1098202972, 1037663513) + + W(5, 1036217881, -1099186053, 1039701042, 1034991255) + W(6, 1012008372, 1033329669, 1015138122, 1030253051) + + W(7, 1040247041, -1099351704, 1032444126, 1041638884); + sum2 = + W(0, 1030438042, -1147399085, -1093920639, -1109485523) + W(1, 1009286759, 1025260806, -1086940227, -1143426189) + + W(2, 1027607240, 1032031341, -1090745109, -1114490725) + W(3, 1019667875, 1065802217, 1049694895, -1135657207) + + W(4, -1113200383, 1046766533, -1117099650, 1032826151) + W(5, 1032828373, -1110418147, 1041550488, 1024844082) + + W(6, -1125602091, -1137630919, -1114583845, 993352218) + + W(7, 1008172775, -1119451994, 1034586333, -1137428855); + WS(-1079364256, 1057874888); + sum1 = W(0, 1038744583, 1023431711, -1097930673, 1027252187) + W(1, 1028964378, 1034807569, -1105253352, 1030066159) + + W(2, 1038770168, 1043817591, -1095573950, 1037737491) + + W(3, 1044691424, -1093522810, -1085514994, 1045101529) + + W(4, 1039465822, 1033837359, -1111615463, 1019830473) + W(5, 1031872786, 1026052487, 1038514603, 1014931844) + + W(6, 1031706975, 1024735357, 1029114647, 1030853165) + W(7, 1026510455, 1032349913, 1036441879, 1025856707); + sum2 = W(0, -1110129969, 1023276596, 1028080466, -1115008651) + + W(1, -1122608874, 1020476676, -1106237621, -1145788944) + + W(2, -1109784187, 1055608478, -1110801543, -1135649000) + + W(3, 1036903789, 1056514262, -1099727798, 1034305529) + W(4, 1016388156, -1122102418, 1033517697, 1019861916) + + W(5, -1134147112, 1032271709, -1114242709, -1104837577) + + W(6, 1025386706, -1109100200, -1110189468, 1027443202) + + W(7, -1114085729, 1030585586, -1128034084, -1105665752); + WS(-1091483264, -1079194671); + sum1 = W(0, -1131351604, -1116837930, -1139989065, 1024497830) + + W(1, -1113984995, 1032278999, 987288903, -1120582522) + + W(2, 1021186040, -1099425098, 1055014512, -1105371085) + + W(3, -1116327562, 1045060919, 1046630795, -1115581427) + + W(4, -1132080655, 1054575181, -1092379987, 1026057008) + + W(5, -1206829265, -1113261897, 1027226210, -1110689206) + + W(6, 1023600234, -1130307600, -1122295330, -1130576880) + + W(7, -1126861572, -1115285054, 1003385298, -1136751161); + sum2 = W(0, -1129629424, 1004745569, 1012138921, 1021845392) + W(1, 1028468886, -1114381934, 1032075412, 1000184001) + + W(2, -1120636360, 1032177433, -1114041388, 1017905952) + + W(3, -1136732761, 1051604488, -1122709320, -1141636449) + + W(4, 1036262769, -1115452378, -1105925279, -1138562729) + + W(5, 1022425868, 1026786204, -1102674451, 1006971361) + + W(6, 1019955588, -1123076816, 1002766561, -1146151969) + + W(7, -1123045172, -1139760465, -1135173233, -1127921184); + WS(1067689632, -1150117831); + sum1 = + W(0, -1109849008, 1047555836, 1035630764, -1099893915) + W(1, -1130553174, 1029999475, -1114674616, -1113991840) + + W(2, -1123520321, 1050420518, 1040630545, -1107722684) + + W(3, 1036413160, -1116179871, -1098283742, -1150636351) + W(4, 1029117869, 1035246488, 1031697439, 1041501593) + + W(5, -1127374955, -1104890550, -1116392999, 1019029347) + + W(6, -1140464318, 1020611803, -1125473743, 1031847583) + + W(7, 1027561031, -1113066609, -1114557056, 1006720846); + sum2 = W(0, 1032302578, 1032483043, 1055539881, -1089061083) + + W(1, -1114467512, 1037446674, 1053856893, -1095432519) + + W(2, 1035589071, -1114962225, 1060021172, -1087688027) + + W(3, -1140727686, -1107033941, 1044678306, -1119624965) + + W(4, 1028178655, 1017647091, -1086216329, 1060967891) + + W(5, -1123814512, -1102520792, -1096729370, 1056004093) + + W(6, -1131958732, 1028700703, -1098594526, 1046082925) + + W(7, -1114865396, -1106445462, -1103816765, 1049601918); + WS(1053174400, -1151490459); + sum1 = + W(0, -1122069115, 1032070534, -1106451229, 1016988943) + W(1, -1147738936, -1116405775, 1015337751, -1131178679) + + W(2, 1027718109, 1043517586, -1118269791, 1030116243) + W(3, 1032697654, -1108806494, -1117317899, 1014470144) + + W(4, 1025368761, 1041596435, 1044803791, -1112007328) + W(5, -1112696969, 1030488179, -1098708424, 1036691591) + + W(6, 1007532846, -1145227340, 1024445699, -1121635481) + + W(7, -1121758051, 1032005679, -1105947015, 1025294511); + sum2 = W(0, 1030975148, -1130068082, -1128045540, 1009416079) + + W(1, 1021517080, -1126516206, 1010660367, 1014043007) + + W(2, 1055956931, -1091517508, -1115164896, -1124472846) + + W(3, 1077778659, -1071480833, -1096786527, 1028586964) + + W(4, 1057214094, -1089646275, -1109767888, 1004868686) + + W(5, 1029844584, -1129174056, 1010452735, -1122799750) + + W(6, 1029913248, -1116251741, -1130073888, 1023899390) + + W(7, 1021753404, 998387790, -1161269496, -1126055830); + WS(1064255296, 1017910760); + sum1 = W(0, 1034554881, 1054734130, -1098111935, -1127407003) + + W(1, -1112041607, 1053450179, 1024002066, -1117294562) + + W(2, -1108750194, 1051103338, -1095577677, 1016696883) + + W(3, -1098306542, 1044871248, 1046694116, -1097130662) + + W(4, -1140839485, -1103441076, -1111281057, 983357671) + + W(5, -1117661336, 1027829882, 1048629833, -1104687573) + + W(6, -1142577806, -1122634224, 1032310597, -1118108640) + + W(7, -1131295699, 1024857124, 1048835924, -1098281839); + sum2 = W(0, -1120767464, 1053966016, 1035968374, -1147532546) + + W(1, 1032914397, 1073793292, 1040357442, -1129131123) + + W(2, -1109760249, 1060302091, -1122023462, -1130362887) + + W(3, 1040094052, -1072585870, -1086238928, -1116834009) + + W(4, 1016200567, -1094220304, -1172011891, 1036030945) + + W(5, 1009241688, -1113143087, 1043210576, 1015923256) + W(6, -1122384418, 1028063795, 1025380203, 1017132829) + + W(7, 1032959032, -1104349617, -1119727737, 1019714285); + WS(-1096343168, -1126236522); + sum1 = W(0, 1030703603, -1108689250, 1038321302, -1121378303) + + W(1, 1007810299, -1119306439, 1031818658, -1127608870) + + W(2, 1040041675, -1096469888, 1053153171, -1126859158) + + W(3, 1047119465, -1085509191, -1089344732, 1045457184) + + W(4, -1113575360, 1056154112, 1045446045, -1144694086) + W(5, -1132527909, 1020253526, 999324634, 1029954693) + + W(6, -1134236941, 1041829816, -1115315934, 1019451938) + + W(7, 1009418349, 1033231243, -1107897834, 1026209383); + sum2 = W(0, 983284602, 1020456556, -1131636264, 1015204808) + W(1, 1016178696, -1144975071, -1131007184, 1022946788) + + W(2, 1029794920, -1114118472, 997214653, 998054973) + W(3, 1029986296, 1041996066, 1053737341, 1023304596) + + W(4, -1109420788, -1139464095, -1097949071, 1029400914) + + W(5, -1131549160, -1114876306, -1113647896, 1024576522) + + W(6, -1118046352, 1015095008, -1115559594, -1124860928) + + W(7, -1117716768, 983229178, -1141234271, -1131778280); + WS(1054415488, 1031748714); + sum1 = W(0, 1032036500, 1016814449, -1098109636, 1018567785) + + W(1, -1106849745, 1048190511, -1110288412, -1119790601) + + W(2, 1032905605, 1025341687, -1090213441, 1041909072) + W(3, 998976710, 1051815981, 1042721902, 1029877301) + + W(4, 1035586875, -1102465150, -1117518441, 1033626399) + + W(5, 1024707071, -1113968464, 1046144796, -1106536335) + + W(6, -1137244643, -1127337837, -1117570737, 1018599181) + + W(7, 1040319748, -1115415046, -1135047459, -1122513053); + sum2 = + W(0, 990440998, -1103416430, -1120602807, 1034530622) + W(1, -1138393335, -1121991569, 1037302636, -1117267403) + + W(2, -1115155620, -1128966950, -1098451061, 1032576586) + + W(3, 1002996807, -1100463136, 1060125031, -1114490937) + + W(4, -1122715509, -1095499559, 1057202890, -1120765719) + + W(5, -1107496177, 1010012987, 1038403096, -1126507430) + W(6, -1109700727, 1000571127, 1011003397, 1027555951) + + W(7, -1100731818, 1048400711, 1041385707, -1105804206); + WS(1057399616, 1074070393); + sum1 = + W(0, -1116946950, 1049855515, -1097294408, 1045229301) + W(1, -1167692925, 1048650971, -1103643414, 1041758026) + + W(2, 1025124136, 1035712138, -1094845877, 1045455640) + W(3, 1027460150, -1096264273, -1106323062, 1036926493) + + W(4, 1033233658, -1093603523, 1045414146, 1036007790) + W(5, 1043057687, -1089515076, 1057610273, -1123310040) + + W(6, 1031827147, -1102565613, 1043872372, 1012034194) + W(7, 1036180887, -1089497938, 1048651645, 1034964477); + sum2 = W(0, -1099019906, -1135117191, 1053230727, -1102919364) + + W(1, 1016280180, -1104821294, -1108236474, 1021862852) + + W(2, -1104956478, 1031344902, 1044240449, -1102746088) + + W(3, -1160553787, 1057285857, 1060881655, -1121198562) + + W(4, -1115190705, -1108143797, -1141578351, -1107568696) + + W(5, 1040551613, -1096822242, -1110487530, 1027740054) + + W(6, -1110901592, -1121444630, 1033167055, -1115478973) + + W(7, 1043284947, -1108844527, -1097266933, 1039050835); + WS(-1079272096, -1088198283); + sum1 = W(0, -1150549691, -1112293630, 1038263474, -1117531256) + + W(1, -1136809437, -1147892734, -1131332343, 1024124492) + + W(2, 1035238887, -1101200910, 1042949876, 1022854919) + + W(3, -1114730926, -1153686203, -1109365886, 1038223988) + + W(4, 1013948517, 1033312670, -1123815596, -1137128325) + + W(5, 1024493634, -1111552818, 1033656538, -1131368443) + W(6, 1014747375, 993656219, 999554974, -1123012682) + + W(7, 1007675727, -1115285591, 1031561484, 983503863); + sum2 = + W(0, -1130912341, -1131869901, 1021624533, -1116201618) + W(1, 933663296, 973137042, 1031913949, -1114826138) + + W(2, 1024898014, 1022547165, 1049231168, -1097351564) + W(3, 1026863962, -1103034755, 1074345895, -1074065991) + + W(4, 1006981209, 1032168251, 1046910286, -1101411998) + W(5, 1015250909, -1122671508, 1033396431, -1116799454) + + W(6, -1128875465, 1002623282, 1015118461, -1122428830) + + W(7, -1163156681, -1124284861, 987851849, -1131138133); + WS(1066898592, -1135257599); + sum1 = W(0, -1110180874, 1039457202, -1099722419, 1033603984) + + W(1, -1104421608, 1036077428, -1109956850, 1025966004) + + W(2, -1108282395, 1042328969, -1108344348, 1027866348) + + W(3, -1109212258, 1047234565, 1050666829, -1127852161) + + W(4, -1112266449, -1102031401, 1052651555, -1117920074) + + W(5, -1104815307, -1101771699, 1056795268, -1103088922) + + W(6, -1117810340, -1107974833, 1044347095, -1114797145) + + W(7, -1107536862, -1105991798, 1052879183, -1109126692); + sum2 = W(0, -1109573955, 1053252716, -1111963093, -1104024699) + + W(1, -1130272686, -1099761318, 1023415227, -1113318439) + + W(2, -1112628352, 1053124236, 1036225124, -1113101390) + + W(3, -1135365682, 1015950468, -1098416341, 989675628) + W(4, -1127976571, 1055173972, 984548524, -1113626094) + + W(5, -1109041460, 1056516595, -1099834172, 1021900236) + + W(6, -1141484375, -1111992284, -1115296758, -1121224929) + + W(7, -1124075328, -1111067004, -1123095583, 1035158074); + WS(-1080514464, 1071098312); + sum1 = + W(0, 1029423638, -1104998922, 1034143047, -1121480226) + W(1, 1032590577, -1109637973, 1035675859, -1116754806) + + W(2, 1026820152, -1100165641, 1041959039, -1119258414) + W(3, 1032314563, 1052963835, 1044386890, -1124675269) + + W(4, -1121041060, 1049180397, -1094089003, -1141724766) + + W(5, -1118573868, 1051364482, -1091683740, 1026232616) + + W(6, -1120807462, 1040988017, -1104568584, -1142663074) + + W(7, -1138006521, 1046351030, -1100071438, -1128189972); + sum2 = W(0, -1103917515, 1047440331, 1045127249, -1101094893) + + W(1, 1015651007, -1161898742, -1107946310, 1034127906) + + W(2, -1110033797, 1037971494, 1032097498, -1105034150) + + W(3, -1102760582, 1064882055, 1062501861, -1105985747) + + W(4, -1111112749, -1094052244, -1097807648, 952358760) + + W(5, 1038662254, -1095531648, -1103513552, 1029080884) + + W(6, -1114159185, 1028288648, 1000704926, -1115269879) + + W(7, 1037944898, -1103088589, -1103412727, 1036625418); + WS(1048356096, 1025975827); + sum1 = W(0, -1127785256, 1042988217, -1143337570, -1113788528) + + W(1, -1122099232, 1042373466, -1109964850, 985728647) + W(2, -1101696686, 1057753530, -1091154007, 981326605) + + W(3, 1026577988, 1024353684, 1027827940, 1025248769) + W(4, 1038799529, -1089144738, 1052285203, 1031883373) + + W(5, 1035389109, -1100403603, -1125657656, 1042938314) + + W(6, 1026796836, -1117105634, -1109515886, 1041478413) + + W(7, 1026531900, -1118224663, -1110632314, 1024598728); + sum2 = W(0, 1012220951, -1127410563, -1133847817, -1150652997) + + W(1, -1116521159, -1105350472, 1023881724, -1121646209) + + W(2, -1112142878, 1052174871, 1044599155, -1123898264) + + W(3, 1029633040, -1097063130, -1106094805, 1019540966) + + W(4, -1134401830, -1157154125, 1059986787, -1101447493) + + W(5, -1123437332, 1029744370, -1111846084, -1113752268) + + W(6, -1127274631, -1133421881, 1028497368, -1115689394) + + W(7, -1127142758, 1031896001, -1111750394, -1144497399); + WS(1047538944, -1094881626); + sum1 = + W(0, -1131292909, 1026526343, -1137367995, -1137761291) + W(1, -1116924690, 1015768341, 1040643394, -1105713526) + + W(2, -1112297452, 1061821276, -1088680894, 1031500559) + + W(3, 1021777101, 1006944475, -1090839121, -1122499811) + + W(4, 1043477817, -1091685625, 1059324523, -1107169411) + + W(5, -1116873677, 1042550748, -1162815447, -1119456201) + + W(6, -1148100662, -1112344306, 1036475455, 998501030) + W(7, -1142312694, 1025260793, 1009991259, 1032218389); + sum2 = W(0, 1018368946, -1112001321, -1120361628, -1136144308) + + W(1, 1027383442, -1114299623, -1113414147, 995591994) + + W(2, 1026347909, -1159080892, -1114902877, -1130300135) + + W(3, -1120907217, 1048017951, 1047229175, -1113455406) + + W(4, -1113638165, 1043117803, 1034197636, -1104824263) + + W(5, 1015876554, -1117678872, -1153448320, -1135559770) + + W(6, -1121952605, 1016312397, 1030999359, -1116260909) + + W(7, -1157376536, -1122492527, -1146986041, 1024817261); + WS(1059019584, -1093542352); + sum1 = + W(0, -1118590060, 1030085820, 1044414089, -1139295455) + W(1, 1036739212, -1117660736, 1037660574, 1017695255) + + W(2, -1117950862, 1053611807, 1035930184, -1132182919) + + W(3, 1044992135, -1088498407, -1087395836, 1041445842) + + W(4, 1031932973, -1106481599, 1044296603, -1162575911) + + W(5, -1157350427, 1041316489, -1101366685, 1028651518) + W(6, 1018600951, 1033416637, -1113146202, 1029799946) + + W(7, -1135788111, 1042914684, -1105147443, 1024137952); + sum2 = W(0, -1139124704, -1098733365, -1134878408, 1017748092) + + W(1, -1129149604, -1096002059, -1139933424, -1120672660) + + W(2, -1112674264, -1099326395, 1006823136, 1007868832) + W(3, 1041796064, 1042574015, 1054885382, 1038924927) + + W(4, 1023328190, 993486752, 1040771663, -1118903044) + W(5, 983636161, 1013026008, 1012616072, -1117420588) + + W(6, -1131439188, -1121650464, -1119789204, 1032346590) + + W(7, 968306692, 1025526661, 1007743024, 1016029412); + WS(1034201600, 1032755867); + sum1 = W(0, -1153568391, -1096012775, 1054286804, -1151613767) + + W(1, 1027758260, -1105013167, 1040809774, -1113241540) + + W(2, -1120836106, -1096428191, 1052819554, -1109787424) + + W(3, -1106713574, 1052356477, 1045456334, -1103823871) + + W(4, -1110497539, 1040996254, -1104715738, -1140335480) + + W(5, -1113115007, 1046162344, -1106901340, -1136669162) + + W(6, -1129181937, 1026611286, -1119554608, -1129950605) + + W(7, -1106649828, 1046578147, -1111616164, 1018423993); + sum2 = + W(0, -1116195452, 1046553581, 1060291180, -1133268119) + W(1, -1114916062, 1044137813, 1067697792, 1023665145) + + W(2, 1015539387, 1039450853, 1056432615, 1037512606) + W(3, 1031939547, -1081686100, -1075409426, 1033868463) + + W(4, 1004500534, -1106570618, -1100622921, 1015733077) + W(5, 1002967134, 1023858175, 1018449047, -1120650920) + + W(6, -1116253852, 1032662783, 1002080710, 1016550645) + W(7, 1037899637, 1018521363, -1106597825, 1008358731); + WS(-1092032128, -1114982082); + sum1 = W(0, 1022785838, -1092594901, 1053421570, -1131318086) + + W(1, 1030256707, -1096516325, 1038780564, -1117764855) + + W(2, 1019093490, -1091986421, 1047196191, -1114799192) + + W(3, -1129380878, 1046024022, 1055287349, -1136363407) + + W(4, -1136259347, -1122621159, -1127922606, -1144800614) + + W(5, -1112386231, 1047446468, -1103792760, 1031400745) + + W(6, -1119087591, 983613607, -1127804734, -1141783466) + + W(7, -1105547264, 1044817638, -1107803612, 1035179430); + sum2 = W(0, 1041280609, -1101864626, -1094055741, 1033694830) + + W(1, -1122718101, -1105911709, -1103824921, -1107222411) + + W(2, 1032208518, -1098031958, -1096453130, -1124818562) + + W(3, -1105883428, 1058683727, 1064792422, -1108861898) + + W(4, 1032385022, -1120084233, 1036982526, -1126293810) + + W(5, -1105712369, 1049511237, 1028614917, -1108778587) + + W(6, 1026423073, -1101782173, 1024411853, 1022233154) + + W(7, -1100029914, 1048360263, 1045846387, -1104838815); + WS(-1109129728, 1010433912); + sum1 = W(0, -1117250164, -1096823624, 1040646763, 1019407999) + + W(1, 1006908285, -1105899410, 1019351431, -1137630799) + + W(2, -1122855062, -1092542200, 1052048931, -1109418342) + + W(3, -1107144297, 1057728383, 1064635860, -1105008732) + + W(4, -1106683568, 1037050434, -1117087348, -1114857797) + + W(5, -1123315850, 1026000812, -1107858814, 1029923270) + + W(6, -1130212999, -1117617698, -1107016783, -1137382471) + + W(7, -1125685047, -1122267030, -1100117973, 1034575732); + sum2 = W(0, -1153548328, -1138461074, -1109953492, 970383811) + + W(1, -1146428548, 1040260969, -1109276610, 1016706951) + + W(2, -1122720334, 1050908092, -1119113122, -1113772244) + + W(3, -1131960189, 1057542522, -1109440842, -1149783528) + + W(4, -1105417557, 1046314824, 1008998438, -1111645386) + + W(5, -1140382642, -1105004761, -1140136790, -1114961145) + + W(6, -1130524097, 1024281381, -1119699464, -1128062487) + + W(7, 1025793768, -1110366356, -1135544654, -1113390177); + WS(1028043776, 1066748487); + sum1 = + W(0, 1020869188, 1042153037, -1114798157, -1111756535) + W(1, 1029235758, -1142684082, -1109061593, 1025123714) + + W(2, -1130366216, 1055996350, -1084468040, 1039957533) + W(3, 1035504777, 1053860476, 1054127718, 1020063133) + + W(4, 1043701316, -1085695847, 1050138268, 1020852591) + W(5, 1028701624, -1103910983, -1111428405, 1027291444) + + W(6, -1140377801, -1112356264, -1142788434, 1023500800) + + W(7, -1122144394, -1125763849, -1122964802, -1137256281); + sum2 = + W(0, -1124029698, -1129530165, -1125964837, 1021970837) + W(1, 924100096, -1119644642, 1007880106, -1133743274) + + W(2, -1114278109, -1119677450, 1030663674, -1118803866) + + W(3, -1110737625, 1050522694, 1051750384, -1114387111) + W(4, -1113250169, 1031715210, -1120664914, 999385235) + + W(5, -1121059730, -1121636434, -1117515026, 1025411170) + + W(6, 998717971, -1145009875, -1122454562, 1019195093) + + W(7, -1125884533, -1119348450, 1010693450, -1127692741); + WS(1060837696, -1133947077); + sum1 = + W(0, -1111653883, 1043511426, 1035524535, -1102491073) + W(1, 1028161858, -1107876293, 1015032078, -1120891991) + + W(2, 1026751705, 1032976162, 1051427819, -1107214884) + + W(3, 1035200691, -1100826286, -1104551490, -1125941943) + W(4, 1000443428, 1046154066, 1014718172, 1038586510) + + W(5, -1124636570, -1109841015, -1105048272, 1029969865) + + W(6, -1153569903, 1028652293, -1150803951, 1023653077) + + W(7, 1010162044, -1123706233, -1122173975, 990755503); + sum2 = + W(0, -1109148006, -1124907300, -1092637531, 1058387135) + W(1, 1027230711, -1130116966, -1092832201, 1052271642) + + W(2, -1141349815, -1099589874, -1085994041, 1062146597) + + W(3, -1115012616, 1033368851, 1040685020, 1019043320) + W(4, 1025861327, -1097709653, 1063174823, -1085821967) + + W(5, -1129789352, 1048790586, 1056989663, -1089224415) + + W(6, 1018453676, -1111271581, 1049450817, -1100831168) + + W(7, 1031045509, 1042673858, 1044673285, -1096709646); + WS(1052991104, 1024635730); + sum1 = + W(0, 1035832004, 1054985227, -1101014640, -1119430356) + W(1, -1113667962, 1053766219, 1027437528, -1121298600) + + W(2, -1115665493, 1046292645, -1097461637, 1029858016) + + W(3, -1099876558, -1128304655, -1148766910, -1098528437) + + W(4, -1124670698, -1115760280, -1103841730, 1026860541) + + W(5, -1123144798, 1032906080, 1048476772, -1107490221) + + W(6, -1141396670, 1011324759, 1034147624, -1114702739) + W(7, 991927035, 1032830229, 1048946541, -1098098175); + sum2 = W(0, 1023525971, -1101540780, -1109590196, -1124206084) + + W(1, -1123155104, -1081244300, -1107189914, 989815561) + W(2, 1037524511, -1094597742, 1026840221, 979200786) + + W(3, -1119336486, 1068940029, 1038949441, 1039597181) + + W(4, -1120201684, 1051860638, 1014639773, -1126284432) + + W(5, -1140931614, 998725346, -1110977051, -1128575576) + + W(6, 1025256785, 1002156586, -1117345859, -1123250336) + + W(7, -1116496860, 1038812729, 1033886084, -1117282785); + WS(-1106197760, -1107941957); + sum1 = + W(0, -1126741205, 958134964, 1042417753, -1131232109) + W(1, 992223587, -1096404880, -1114006205, -1156891819) + + W(2, -1103296147, 1050569383, 1053212362, -1104768673) + + W(3, -1096167611, -1089676235, -1101748223, -1105076618) + + W(4, -1103227950, 1032016157, 1057274419, -1101041056) + W(5, 1041994672, 1020956357, 1042030605, -1124618361) + + W(6, -1124973941, -1180953690, 1049652990, -1114495652) + + W(7, 1027666259, 1043412107, 1052224033, -1117167007); + sum2 = + W(0, -1114619118, 1033666378, 1015708744, 1027002418) + W(1, -1149001791, 1045400432, -1103940412, -1125963088) + + W(2, -1110825372, 924614016, -1107881456, -1103385448) + + W(3, -1090736686, 1063026008, 1059931802, -1091159350) + + W(4, -1106216772, -1114127208, -1107266078, -1139192735) + + W(5, 1027272732, -1108719036, 1024735270, -1125128312) + W(6, 1033544093, 1032301903, -1134995311, 1019689496) + + W(7, -1120615480, -1114760302, 1034226031, 1032462491); + WS(-1076602784, -1079939509); + sum1 = W(0, 1033209209, -1103738752, 1025390185, 1002177000) + + W(1, -1124683769, -1119780485, 1041533536, -1112424745) + + W(2, 1015980198, -1096323000, 1052087227, -1104846498) + + W(3, -1141354248, 1042233324, 1039917313, -1113895617) + + W(4, -1106007132, 1054450248, -1105614956, -1119444855) + + W(5, -1114123684, 1052850909, -1098158311, -1112376802) + + W(6, -1121157953, 1043529432, -1107965437, 1021969694) + + W(7, 1019464370, 1045060053, -1103437636, -1109951699); + sum2 = + W(0, 1032197744, -1106233686, -1119285117, 1036373860) + W(1, -1119329897, -1117977933, 1035649904, -1113988579) + + W(2, 1026978731, -1119744119, 1046616172, -1150690390) + + W(3, 1042368013, -1074309068, -1080074847, -1148379867) + W(4, 1034747948, 1068607081, 1045039448, 1031972703) + + W(5, -1109569271, 1064708176, 1043980108, -1113275705) + W(6, 1037031349, -1119111329, 1029320825, 1015848847) + + W(7, -1111213511, 1041980892, 1028259198, 1008019670); + WS(-1093673600, -1131421273); + sum1 = + W(0, -1121371719, -1114120908, 1035660544, 1009001293) + W(1, -1126420325, -1110871822, 1021798446, 1008740253) + + W(2, 1033366837, -1087025669, 1058317703, -1120291359) + W(3, 1021303790, -1118558279, 1042294182, 1022689127) + + W(4, -1117842165, 1056872571, -1088792724, 1031810222) + + W(5, 1011797877, 1032969664, -1114500968, -1148303562) + + W(6, 1021502382, 1037611499, -1111697864, -1137856917) + + W(7, -1131840478, 1040800018, 1017988942, -1121301883); + sum2 = W(0, -1145039688, -1114206314, -1116540509, 1020565626) + + W(1, -1129835426, -1119983153, -1113040628, 1000979816) + + W(2, -1119915157, -1121785409, -1118504757, 989230112) + + W(3, -1110668832, 1051729955, 1051989643, -1112437186) + + W(4, -1111519800, 1012089860, 1039755978, -1112214518) + + W(5, 1002833896, -1118223897, 1025479957, 1015165050) + + W(6, -1127947266, -1130809610, -1158783904, -1128941978) + + W(7, 1006052904, -1123764449, -1127132522, 1025956493); + WS(1065682080, -1111828541); + sum1 = + W(0, 1015869752, -1145885026, 1022264616, -1126764880) + W(1, 1017604463, -1115077208, 1021224792, 1032942526) + + W(2, -1112250175, 1052074397, -1089954536, 1040282413) + W(3, 1024786188, -1121697500, 1047440331, 1035803452) + + W(4, 1036568133, -1086889687, 1049268572, -1125683619) + W(5, 1002255762, 1045248549, -1109225970, 1031850711) + + W(6, -1131800664, -1130857684, -1123514660, -1141787602) + + W(7, 982208647, 1037568127, -1113425822, 1017131424); + sum2 = W(0, -1122978131, 1016858600, 1028741871, 993636865) + W(1, 1016040732, 968811648, 1021399769, -1144040913) + + W(2, -1114060649, 1029169324, 1032336595, -1118019503) + + W(3, -1125004872, 1051074882, 1031746308, -1109484363) + + W(4, 1029971540, 1006053609, -1103346234, -1124468225) + + W(5, 1026938249, -1154135582, -1113140779, 1013417282) + + W(6, 1017189437, -1115290499, -1141169189, -1169769501) + + W(7, -1124460348, -1140773838, -1122277912, -1139883250); + WS(1068575136, 1057679145); + sum1 = + W(0, -1106541599, 1049880682, -1093397815, 1052783569) + W(1, 1008215334, 1049251178, -1111487207, 1042509671) + + W(2, 989927934, 1026486412, -1092992340, 1051194971) + W(3, 1042310507, -1095539136, -1103138323, 1040579493) + + W(4, 1018512183, -1094672658, 1043830943, 1038856114) + W(5, 1040070410, -1089191083, 1057944056, -1114567880) + + W(6, 1014261304, -1100618939, 1044894993, 1025292508) + W(7, 1041475113, -1087252724, 1049811244, 1038361721); + sum2 = + W(0, -1114445084, 1053593458, 1057823779, -1150175899) + W(1, -1136765855, 1055007475, 1054334485, -1141017037) + + W(2, 1039574032, -1105638736, -1096181786, 1042094581) + + W(3, -1113238946, -1078800745, -1078443122, -1134493359) + + W(4, 1030357662, -1104375874, 1049857835, 1020483059) + W(5, -1106803932, 1059224194, 1058193036, -1099695416) + + W(6, 964162478, 1050059734, 1043677394, -1118569338) + W(7, -1113028634, 1035501186, 1029634285, -1110238952); + WS(-1069843280, 1043992756); + sum1 = + W(0, -1124948267, 1027695014, -1108770183, 1008956039) + W(1, -1135429253, -1119099178, 1017935435, -1129507307) + + W(2, 1025550072, 1044103833, -1112757821, 1030807982) + W(3, 1025838726, -1113789227, -1130965835, 1014849865) + + W(4, 1019242118, 1039477491, 1042243413, -1114625158) + W(5, -1116233196, 1024839950, -1101790135, 1035190516) + + W(6, 998797102, -1142719774, 1012540023, -1127852007) + + W(7, -1130610171, 1028120504, -1106167085, 1023573770); + sum2 = + W(0, -1114838118, 1026300352, 1004928481, -1130791968) + W(1, -1121247640, 1028491168, -1130258048, -1127851536) + + W(2, -1095359738, 1053083344, 1024432884, 1025789396) + W(3, -1072953064, 1074653957, -1102013621, 1030571860) + + W(4, -1093135708, 1056568596, 1030571276, 984097412) + W(5, -1115141896, 1033233928, -1116002270, 1025870428) + + W(6, -1116140314, 1030713448, 1015669456, -1120604784) + + W(7, -1127503160, 1023653964, -1129660856, 1016702232); + WS(1066802848, -1129257078); + sum1 = W(0, 1015630667, 1033473243, -1104672775, -1114728473) + + W(1, -1118978065, -1155453851, -1122431286, -1121467910) + + W(2, -1113784260, 1035395194, -1115117829, -1105320751) + + W(3, -1101616805, 1065736411, 1061324843, -1095556296) + + W(4, -1108037466, 1045542397, -1100789945, -1104703871) + + W(5, -1114908350, 1040181775, -1114134583, -1109993148) + + W(6, -1114956027, -1177594330, -1124148059, -1111474306) + + W(7, -1111947911, 1036229652, -1138173719, -1106051686); + sum2 = W(0, -1109560646, -1155123513, 1035199782, -1115008786) + + W(1, -1134031950, -1108713026, -1107664082, 1019412439) + + W(2, -1107125821, 1038996664, 1043157052, -1123597732) + W(3, 1017238679, 1026897780, 1051508948, 1016515095) + + W(4, 1024343484, -1107646918, 1055083262, -1106563795) + + W(5, -1127256871, -1107869542, 998755997, -1116358964) + + W(6, 1004378333, -1112902766, -1159444594, -1117600772) + + W(7, -1129735047, -1145442397, 1012570830, -1110777694); + WS(-1093453440, 1062530498); + sum1 = W(0, -1124930739, -1115495349, 1025637074, 1023868966) + + W(1, 1018166794, -1109318263, 1039727605, 1023563962) + W(2, -1109529502, 1024699176, 931786146, -1131885027) + + W(3, -1114300581, 1040214918, 1052285453, -1104984446) + + W(4, 1025096231, -1105577322, 1009762919, -1110072888) + + W(5, 1023247327, 1043329716, -1105534248, -1137647791) + W(6, 998443854, -1123543752, 988548151, -1123805240) + + W(7, -1125424515, 1034740398, -1116356474, 1018092315); + sum2 = + W(0, -1116378324, 1018118074, 1027066059, -1110082211) + W(1, -1110334365, -1135013993, 1002595554, -1111471528) + + W(2, -1139723269, -1117515615, 1053899571, -1114430561) + + W(3, -1134041225, -1097345050, 1043336959, 1040454031) + W(4, -1147091114, 1039717826, 1040668505, 1033656482) + + W(5, -1112237914, -1107830137, 1010598309, -1117546906) + + W(6, -1123080609, 1032288465, 1016807738, -1115822981) + + W(7, -1131603942, 978891919, 1026333875, -1119917138); + WS(1067735712, -1080534052); + + return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); +} + +shared float inp[429]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { temp[pos] = (value); } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 11 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 429; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 11, y = (uint)id % 11; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (3)) + 0.5, float(group_base.y + y - (1)) + 0.5)).x; + } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[8]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 11]; + samples[1][1] = inp[local_pos + 12]; + samples[1][2] = inp[local_pos + 13]; + samples[1][3] = inp[local_pos + 14]; + samples[2][0] = inp[local_pos + 22]; + samples[2][1] = inp[local_pos + 23]; + samples[2][2] = inp[local_pos + 24]; + samples[2][3] = inp[local_pos + 25]; + samples[3][0] = inp[local_pos + 33]; + samples[3][1] = inp[local_pos + 34]; + samples[3][2] = inp[local_pos + 35]; + samples[3][3] = inp[local_pos + 36]; + samples[4][0] = inp[local_pos + 44]; + samples[4][1] = inp[local_pos + 45]; + samples[4][2] = inp[local_pos + 46]; + samples[4][3] = inp[local_pos + 47]; + samples[5][0] = inp[local_pos + 55]; + samples[5][1] = inp[local_pos + 56]; + samples[5][2] = inp[local_pos + 57]; + samples[5][3] = inp[local_pos + 58]; + samples[6][0] = inp[local_pos + 66]; + samples[6][1] = inp[local_pos + 67]; + samples[6][2] = inp[local_pos + 68]; + samples[6][3] = inp[local_pos + 69]; + samples[7][0] = inp[local_pos + 77]; + samples[7][1] = inp[local_pos + 78]; + samples[7][2] = inp[local_pos + 79]; + samples[7][3] = inp[local_pos + 80]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 34]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2) + ivec2(0, 1), ret); +} +//!PASS 2 +//!DESC NNEDI3 (double_x, nns32, win8x4) +//!IN INPUT, temp +//!OUT OUTPUT +//!BLOCK_SIZE 64, 8 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[8]) { + float sum = 0.0, sumsq = 0.0; + [unroll] for (int i = 0; i < 8; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); + sumsq += dot(samples[i], samples[i]); + } + float mstd0 = sum / 32.0; + float mstd1 = sumsq / 32.0 - mstd0 * mstd0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); + mstd1 *= mstd2; + float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = W(0, 1036208225, 1039469975, 1027560868, 1039536921) + W(1, -1144450173, 1035622988, 1024461970, 1029776426) + + W(2, -1091571194, -1095740683, -1097328754, 1025116802) + + W(3, 1043923973, 1049700439, 1024545936, 1047463785) + W(4, 1048590727, 1028885330, 1039928979, 1017576356) + + W(5, -1098962113, -1090255529, -1100301043, -1090407950) + + W(6, 1032174663, 1026075554, 1023201972, 1042058704) + W(7, 1033278580, 1046104373, 1035224848, 1043393326); + sum2 = + W(0, -1098253927, -1166389754, -1106582410, 1034460863) + W(1, 1027738130, -1113669665, 1016925525, -1129223232) + + W(2, 1055965966, -1101782718, 1045236553, -1132225959) + + W(3, 1057568628, 1055278331, -1127810369, -1108092808) + W(4, 1006679350, 983512221, -1097283179, -1090237017) + + W(5, 1036184228, -1133967067, 1022264595, 1009711002) + W(6, -1096709168, -1105895383, 1040768379, 1030302550) + + W(7, -1111760224, 1033275358, -1104949250, 1045297871); + WS(-1080723616, -1076327864); + sum1 = W(0, 1017315691, -1140602116, 1037257964, -1107192473) + W(1, 1022003974, 1022805527, 1021604747, 1014959990) + + W(2, -1106293281, 1024505865, -1101486193, 1027220277) + + W(3, 1032001763, -1106692027, 1020788295, -1109388172) + + W(4, 1041272315, -1129176587, 1040932726, -1098639185) + W(5, 1032224091, 1038889695, 1019749115, 1036681163) + + W(6, -1113397025, 1022464939, 1024865293, 1043879330) + + W(7, -1118929755, -1123579783, -1117432531, 998707020); + sum2 = W(0, 1015755803, -1140061902, -1129835189, 1032555441) + + W(1, -1138877514, -1123339814, 1016646759, -1165772366) + + W(2, -1127131649, 1016927815, -1112279061, -1095938931) + + W(3, -1116204914, 1009679614, -1153656135, -1146249364) + + W(4, -1137718334, -1129880851, -1089663810, -1069364926) + + W(5, -1092530653, -1148409052, -1122312216, 1018619187) + + W(6, 1033341381, 1024947773, 1058036499, 1079454606) + W(7, 1053318688, 1022404175, 1028043253, 1025789685); + WS(1058954560, 1004956589); + sum1 = + W(0, -1128607008, 1038458494, 1047693245, 1026348672) + W(1, -1108459679, -1116524774, 1024592800, -1117551096) + + W(2, -1102342440, -1105082889, -1085974686, 1047296084) + + W(3, 1058722958, -1135229473, 1019546620, -1122816147) + + W(4, 1027139080, -1147341058, 1057887999, -1100046248) + W(5, -1087115665, 1043502422, 1020243248, 1043995308) + + W(6, 1038455278, -1156235011, -1103441674, 1029167261) + + W(7, 1033885827, 1027001826, -1124163292, 1023459588); + sum2 = W(0, 1039897191, -1116902056, -1115617116, -1116419565) + + W(1, -1121428266, 1003022729, 1032782009, -1161229156) + + W(2, 1032715073, 1035295215, 1051413477, -1089611288) + W(3, 1039274043, -1102315300, 1012620980, 1023483611) + + W(4, -1104821098, -1120385367, 1060018690, -1132652148) + + W(5, 1008252604, -1117381948, -1113601670, -1116040249) + + W(6, -1103487249, -1118716257, -1091510775, 1048354743) + + W(7, 1040404198, 1037629937, 1035102099, -1132320852); + WS(-1088253760, -1127575790); + sum1 = W(0, 1038728366, 1033404580, 1036121471, 1045370723) + W(1, 994877095, 1036217881, 1012008372, 1040247041) + + W(2, -1156102863, 1042126700, -1099951467, -1083618166) + + W(3, 1040753147, -1099186053, 1033329669, -1099351704) + + W(4, 1036225546, -1111684630, 1050604224, -1088174091) + + W(5, -1098202972, 1039701042, 1015138122, 1032444126) + W(6, 1011512228, 1043566527, 1032168346, 1050156122) + + W(7, 1037663513, 1034991255, 1030253051, 1041638884); + sum2 = W(0, 1030438042, 1009286759, 1027607240, 1019667875) + W(1, -1113200383, 1032828373, -1125602091, 1008172775) + + W(2, -1147399085, 1025260806, 1032031341, 1065802217) + + W(3, 1046766533, -1110418147, -1137630919, -1119451994) + + W(4, -1093920639, -1086940227, -1090745109, 1049694895) + + W(5, -1117099650, 1041550488, -1114583845, 1034586333) + + W(6, -1109485523, -1143426189, -1114490725, -1135657207) + + W(7, 1032826151, 1024844082, 993352218, -1137428855); + WS(-1079364256, 1057874888); + sum1 = W(0, 1038744583, 1028964378, 1038770168, 1044691424) + W(1, 1039465822, 1031872786, 1031706975, 1026510455) + + W(2, 1023431711, 1034807569, 1043817591, -1093522810) + W(3, 1033837359, 1026052487, 1024735357, 1032349913) + + W(4, -1097930673, -1105253352, -1095573950, -1085514994) + + W(5, -1111615463, 1038514603, 1029114647, 1036441879) + W(6, 1027252187, 1030066159, 1037737491, 1045101529) + + W(7, 1019830473, 1014931844, 1030853165, 1025856707); + sum2 = W(0, -1110129969, -1122608874, -1109784187, 1036903789) + + W(1, 1016388156, -1134147112, 1025386706, -1114085729) + W(2, 1023276596, 1020476676, 1055608478, 1056514262) + + W(3, -1122102418, 1032271709, -1109100200, 1030585586) + + W(4, 1028080466, -1106237621, -1110801543, -1099727798) + + W(5, 1033517697, -1114242709, -1110189468, -1128034084) + + W(6, -1115008651, -1145788944, -1135649000, 1034305529) + + W(7, 1019861916, -1104837577, 1027443202, -1105665752); + WS(-1091483264, -1079194671); + sum1 = W(0, -1131351604, -1113984995, 1021186040, -1116327562) + + W(1, -1132080655, -1206829265, 1023600234, -1126861572) + + W(2, -1116837930, 1032278999, -1099425098, 1045060919) + + W(3, 1054575181, -1113261897, -1130307600, -1115285054) + + W(4, -1139989065, 987288903, 1055014512, 1046630795) + W(5, -1092379987, 1027226210, -1122295330, 1003385298) + + W(6, 1024497830, -1120582522, -1105371085, -1115581427) + + W(7, 1026057008, -1110689206, -1130576880, -1136751161); + sum2 = W(0, -1129629424, 1028468886, -1120636360, -1136732761) + + W(1, 1036262769, 1022425868, 1019955588, -1123045172) + W(2, 1004745569, -1114381934, 1032177433, 1051604488) + + W(3, -1115452378, 1026786204, -1123076816, -1139760465) + + W(4, 1012138921, 1032075412, -1114041388, -1122709320) + + W(5, -1105925279, -1102674451, 1002766561, -1135173233) + + W(6, 1021845392, 1000184001, 1017905952, -1141636449) + + W(7, -1138562729, 1006971361, -1146151969, -1127921184); + WS(1067689632, -1150117831); + sum1 = + W(0, -1109849008, -1130553174, -1123520321, 1036413160) + W(1, 1029117869, -1127374955, -1140464318, 1027561031) + + W(2, 1047555836, 1029999475, 1050420518, -1116179871) + W(3, 1035246488, -1104890550, 1020611803, -1113066609) + + W(4, 1035630764, -1114674616, 1040630545, -1098283742) + + W(5, 1031697439, -1116392999, -1125473743, -1114557056) + + W(6, -1099893915, -1113991840, -1107722684, -1150636351) + + W(7, 1041501593, 1019029347, 1031847583, 1006720846); + sum2 = W(0, 1032302578, -1114467512, 1035589071, -1140727686) + + W(1, 1028178655, -1123814512, -1131958732, -1114865396) + + W(2, 1032483043, 1037446674, -1114962225, -1107033941) + + W(3, 1017647091, -1102520792, 1028700703, -1106445462) + W(4, 1055539881, 1053856893, 1060021172, 1044678306) + + W(5, -1086216329, -1096729370, -1098594526, -1103816765) + + W(6, -1089061083, -1095432519, -1087688027, -1119624965) + + W(7, 1060967891, 1056004093, 1046082925, 1049601918); + WS(1053174400, -1151490459); + sum1 = + W(0, -1122069115, -1147738936, 1027718109, 1032697654) + W(1, 1025368761, -1112696969, 1007532846, -1121758051) + + W(2, 1032070534, -1116405775, 1043517586, -1108806494) + W(3, 1041596435, 1030488179, -1145227340, 1032005679) + + W(4, -1106451229, 1015337751, -1118269791, -1117317899) + + W(5, 1044803791, -1098708424, 1024445699, -1105947015) + W(6, 1016988943, -1131178679, 1030116243, 1014470144) + + W(7, -1112007328, 1036691591, -1121635481, 1025294511); + sum2 = W(0, 1030975148, 1021517080, 1055956931, 1077778659) + W(1, 1057214094, 1029844584, 1029913248, 1021753404) + + W(2, -1130068082, -1126516206, -1091517508, -1071480833) + + W(3, -1089646275, -1129174056, -1116251741, 998387790) + + W(4, -1128045540, 1010660367, -1115164896, -1096786527) + + W(5, -1109767888, 1010452735, -1130073888, -1161269496) + + W(6, 1009416079, 1014043007, -1124472846, 1028586964) + + W(7, 1004868686, -1122799750, 1023899390, -1126055830); + WS(1064255296, 1017910760); + sum1 = W(0, 1034554881, -1112041607, -1108750194, -1098306542) + + W(1, -1140839485, -1117661336, -1142577806, -1131295699) + + W(2, 1054734130, 1053450179, 1051103338, 1044871248) + W(3, -1103441076, 1027829882, -1122634224, 1024857124) + + W(4, -1098111935, 1024002066, -1095577677, 1046694116) + + W(5, -1111281057, 1048629833, 1032310597, 1048835924) + + W(6, -1127407003, -1117294562, 1016696883, -1097130662) + + W(7, 983357671, -1104687573, -1118108640, -1098281839); + sum2 = W(0, -1120767464, 1032914397, -1109760249, 1040094052) + + W(1, 1016200567, 1009241688, -1122384418, 1032959032) + W(2, 1053966016, 1073793292, 1060302091, -1072585870) + + W(3, -1094220304, -1113143087, 1028063795, -1104349617) + + W(4, 1035968374, 1040357442, -1122023462, -1086238928) + + W(5, -1172011891, 1043210576, 1025380203, -1119727737) + + W(6, -1147532546, -1129131123, -1130362887, -1116834009) + + W(7, 1036030945, 1015923256, 1017132829, 1019714285); + WS(-1096343168, -1126236522); + sum1 = W(0, 1030703603, 1007810299, 1040041675, 1047119465) + + W(1, -1113575360, -1132527909, -1134236941, 1009418349) + + W(2, -1108689250, -1119306439, -1096469888, -1085509191) + + W(3, 1056154112, 1020253526, 1041829816, 1033231243) + W(4, 1038321302, 1031818658, 1053153171, -1089344732) + + W(5, 1045446045, 999324634, -1115315934, -1107897834) + + W(6, -1121378303, -1127608870, -1126859158, 1045457184) + + W(7, -1144694086, 1029954693, 1019451938, 1026209383); + sum2 = W(0, 983284602, 1016178696, 1029794920, 1029986296) + + W(1, -1109420788, -1131549160, -1118046352, -1117716768) + + W(2, 1020456556, -1144975071, -1114118472, 1041996066) + + W(3, -1139464095, -1114876306, 1015095008, 983229178) + W(4, -1131636264, -1131007184, 997214653, 1053737341) + + W(5, -1097949071, -1113647896, -1115559594, -1141234271) + + W(6, 1015204808, 1022946788, 998054973, 1023304596) + W(7, 1029400914, 1024576522, -1124860928, -1131778280); + WS(1054415488, 1031748714); + sum1 = W(0, 1032036500, -1106849745, 1032905605, 998976710) + W(1, 1035586875, 1024707071, -1137244643, 1040319748) + + W(2, 1016814449, 1048190511, 1025341687, 1051815981) + + W(3, -1102465150, -1113968464, -1127337837, -1115415046) + + W(4, -1098109636, -1110288412, -1090213441, 1042721902) + + W(5, -1117518441, 1046144796, -1117570737, -1135047459) + + W(6, 1018567785, -1119790601, 1041909072, 1029877301) + + W(7, 1033626399, -1106536335, 1018599181, -1122513053); + sum2 = + W(0, 990440998, -1138393335, -1115155620, 1002996807) + W(1, -1122715509, -1107496177, -1109700727, -1100731818) + + W(2, -1103416430, -1121991569, -1128966950, -1100463136) + + W(3, -1095499559, 1010012987, 1000571127, 1048400711) + W(4, -1120602807, 1037302636, -1098451061, 1060125031) + + W(5, 1057202890, 1038403096, 1011003397, 1041385707) + W(6, 1034530622, -1117267403, 1032576586, -1114490937) + + W(7, -1120765719, -1126507430, 1027555951, -1105804206); + WS(1057399616, 1074070393); + sum1 = W(0, -1116946950, -1167692925, 1025124136, 1027460150) + W(1, 1033233658, 1043057687, 1031827147, 1036180887) + + W(2, 1049855515, 1048650971, 1035712138, -1096264273) + + W(3, -1093603523, -1089515076, -1102565613, -1089497938) + + W(4, -1097294408, -1103643414, -1094845877, -1106323062) + + W(5, 1045414146, 1057610273, 1043872372, 1048651645) + W(6, 1045229301, 1041758026, 1045455640, 1036926493) + + W(7, 1036007790, -1123310040, 1012034194, 1034964477); + sum2 = W(0, -1099019906, 1016280180, -1104956478, -1160553787) + + W(1, -1115190705, 1040551613, -1110901592, 1043284947) + + W(2, -1135117191, -1104821294, 1031344902, 1057285857) + + W(3, -1108143797, -1096822242, -1121444630, -1108844527) + + W(4, 1053230727, -1108236474, 1044240449, 1060881655) + + W(5, -1141578351, -1110487530, 1033167055, -1097266933) + + W(6, -1102919364, 1021862852, -1102746088, -1121198562) + + W(7, -1107568696, 1027740054, -1115478973, 1039050835); + WS(-1079272096, -1088198283); + sum1 = + W(0, -1150549691, -1136809437, 1035238887, -1114730926) + W(1, 1013948517, 1024493634, 1014747375, 1007675727) + + W(2, -1112293630, -1147892734, -1101200910, -1153686203) + + W(3, 1033312670, -1111552818, 993656219, -1115285591) + W(4, 1038263474, -1131332343, 1042949876, -1109365886) + + W(5, -1123815596, 1033656538, 999554974, 1031561484) + W(6, -1117531256, 1024124492, 1022854919, 1038223988) + + W(7, -1137128325, -1131368443, -1123012682, 983503863); + sum2 = W(0, -1130912341, 933663296, 1024898014, 1026863962) + W(1, 1006981209, 1015250909, -1128875465, -1163156681) + + W(2, -1131869901, 973137042, 1022547165, -1103034755) + + W(3, 1032168251, -1122671508, 1002623282, -1124284861) + W(4, 1021624533, 1031913949, 1049231168, 1074345895) + + W(5, 1046910286, 1033396431, 1015118461, 987851849) + + W(6, -1116201618, -1114826138, -1097351564, -1074065991) + + W(7, -1101411998, -1116799454, -1122428830, -1131138133); + WS(1066898592, -1135257599); + sum1 = W(0, -1110180874, -1104421608, -1108282395, -1109212258) + + W(1, -1112266449, -1104815307, -1117810340, -1107536862) + + W(2, 1039457202, 1036077428, 1042328969, 1047234565) + + W(3, -1102031401, -1101771699, -1107974833, -1105991798) + + W(4, -1099722419, -1109956850, -1108344348, 1050666829) + + W(5, 1052651555, 1056795268, 1044347095, 1052879183) + W(6, 1033603984, 1025966004, 1027866348, -1127852161) + + W(7, -1117920074, -1103088922, -1114797145, -1109126692); + sum2 = W(0, -1109573955, -1130272686, -1112628352, -1135365682) + + W(1, -1127976571, -1109041460, -1141484375, -1124075328) + + W(2, 1053252716, -1099761318, 1053124236, 1015950468) + + W(3, 1055173972, 1056516595, -1111992284, -1111067004) + + W(4, -1111963093, 1023415227, 1036225124, -1098416341) + + W(5, 984548524, -1099834172, -1115296758, -1123095583) + + W(6, -1104024699, -1113318439, -1113101390, 989675628) + + W(7, -1113626094, 1021900236, -1121224929, 1035158074); + WS(-1080514464, 1071098312); + sum1 = W(0, 1029423638, 1032590577, 1026820152, 1032314563) + + W(1, -1121041060, -1118573868, -1120807462, -1138006521) + + W(2, -1104998922, -1109637973, -1100165641, 1052963835) + + W(3, 1049180397, 1051364482, 1040988017, 1046351030) + W(4, 1034143047, 1035675859, 1041959039, 1044386890) + + W(5, -1094089003, -1091683740, -1104568584, -1100071438) + + W(6, -1121480226, -1116754806, -1119258414, -1124675269) + + W(7, -1141724766, 1026232616, -1142663074, -1128189972); + sum2 = W(0, -1103917515, 1015651007, -1110033797, -1102760582) + + W(1, -1111112749, 1038662254, -1114159185, 1037944898) + + W(2, 1047440331, -1161898742, 1037971494, 1064882055) + + W(3, -1094052244, -1095531648, 1028288648, -1103088589) + + W(4, 1045127249, -1107946310, 1032097498, 1062501861) + + W(5, -1097807648, -1103513552, 1000704926, -1103412727) + + W(6, -1101094893, 1034127906, -1105034150, -1105985747) + + W(7, 952358760, 1029080884, -1115269879, 1036625418); + WS(1048356096, 1025975827); + sum1 = W(0, -1127785256, -1122099232, -1101696686, 1026577988) + + W(1, 1038799529, 1035389109, 1026796836, 1026531900) + W(2, 1042988217, 1042373466, 1057753530, 1024353684) + + W(3, -1089144738, -1100403603, -1117105634, -1118224663) + + W(4, -1143337570, -1109964850, -1091154007, 1027827940) + + W(5, 1052285203, -1125657656, -1109515886, -1110632314) + W(6, -1113788528, 985728647, 981326605, 1025248769) + + W(7, 1031883373, 1042938314, 1041478413, 1024598728); + sum2 = W(0, 1012220951, -1116521159, -1112142878, 1029633040) + + W(1, -1134401830, -1123437332, -1127274631, -1127142758) + + W(2, -1127410563, -1105350472, 1052174871, -1097063130) + + W(3, -1157154125, 1029744370, -1133421881, 1031896001) + + W(4, -1133847817, 1023881724, 1044599155, -1106094805) + + W(5, 1059986787, -1111846084, 1028497368, -1111750394) + + W(6, -1150652997, -1121646209, -1123898264, 1019540966) + + W(7, -1101447493, -1113752268, -1115689394, -1144497399); + WS(1047538944, -1094881626); + sum1 = W(0, -1131292909, -1116924690, -1112297452, 1021777101) + + W(1, 1043477817, -1116873677, -1148100662, -1142312694) + + W(2, 1026526343, 1015768341, 1061821276, 1006944475) + W(3, -1091685625, 1042550748, -1112344306, 1025260793) + + W(4, -1137367995, 1040643394, -1088680894, -1090839121) + + W(5, 1059324523, -1162815447, 1036475455, 1009991259) + + W(6, -1137761291, -1105713526, 1031500559, -1122499811) + + W(7, -1107169411, -1119456201, 998501030, 1032218389); + sum2 = W(0, 1018368946, 1027383442, 1026347909, -1120907217) + + W(1, -1113638165, 1015876554, -1121952605, -1157376536) + + W(2, -1112001321, -1114299623, -1159080892, 1048017951) + + W(3, 1043117803, -1117678872, 1016312397, -1122492527) + + W(4, -1120361628, -1113414147, -1114902877, 1047229175) + + W(5, 1034197636, -1153448320, 1030999359, -1146986041) + + W(6, -1136144308, 995591994, -1130300135, -1113455406) + + W(7, -1104824263, -1135559770, -1116260909, 1024817261); + WS(1059019584, -1093542352); + sum1 = W(0, -1118590060, 1036739212, -1117950862, 1044992135) + + W(1, 1031932973, -1157350427, 1018600951, -1135788111) + + W(2, 1030085820, -1117660736, 1053611807, -1088498407) + + W(3, -1106481599, 1041316489, 1033416637, 1042914684) + W(4, 1044414089, 1037660574, 1035930184, -1087395836) + + W(5, 1044296603, -1101366685, -1113146202, -1105147443) + + W(6, -1139295455, 1017695255, -1132182919, 1041445842) + + W(7, -1162575911, 1028651518, 1029799946, 1024137952); + sum2 = W(0, -1139124704, -1129149604, -1112674264, 1041796064) + W(1, 1023328190, 983636161, -1131439188, 968306692) + + W(2, -1098733365, -1096002059, -1099326395, 1042574015) + + W(3, 993486752, 1013026008, -1121650464, 1025526661) + W(4, -1134878408, -1139933424, 1006823136, 1054885382) + + W(5, 1040771663, 1012616072, -1119789204, 1007743024) + W(6, 1017748092, -1120672660, 1007868832, 1038924927) + + W(7, -1118903044, -1117420588, 1032346590, 1016029412); + WS(1034201600, 1032755867); + sum1 = W(0, -1153568391, 1027758260, -1120836106, -1106713574) + + W(1, -1110497539, -1113115007, -1129181937, -1106649828) + + W(2, -1096012775, -1105013167, -1096428191, 1052356477) + + W(3, 1040996254, 1046162344, 1026611286, 1046578147) + W(4, 1054286804, 1040809774, 1052819554, 1045456334) + + W(5, -1104715738, -1106901340, -1119554608, -1111616164) + + W(6, -1151613767, -1113241540, -1109787424, -1103823871) + + W(7, -1140335480, -1136669162, -1129950605, 1018423993); + sum2 = + W(0, -1116195452, -1114916062, 1015539387, 1031939547) + W(1, 1004500534, 1002967134, -1116253852, 1037899637) + + W(2, 1046553581, 1044137813, 1039450853, -1081686100) + W(3, -1106570618, 1023858175, 1032662783, 1018521363) + + W(4, 1060291180, 1067697792, 1056432615, -1075409426) + W(5, -1100622921, 1018449047, 1002080710, -1106597825) + + W(6, -1133268119, 1023665145, 1037512606, 1033868463) + W(7, 1015733077, -1120650920, 1016550645, 1008358731); + WS(-1092032128, -1114982082); + sum1 = W(0, 1022785838, 1030256707, 1019093490, -1129380878) + + W(1, -1136259347, -1112386231, -1119087591, -1105547264) + + W(2, -1092594901, -1096516325, -1091986421, 1046024022) + + W(3, -1122621159, 1047446468, 983613607, 1044817638) + W(4, 1053421570, 1038780564, 1047196191, 1055287349) + + W(5, -1127922606, -1103792760, -1127804734, -1107803612) + + W(6, -1131318086, -1117764855, -1114799192, -1136363407) + + W(7, -1144800614, 1031400745, -1141783466, 1035179430); + sum2 = + W(0, 1041280609, -1122718101, 1032208518, -1105883428) + W(1, 1032385022, -1105712369, 1026423073, -1100029914) + + W(2, -1101864626, -1105911709, -1098031958, 1058683727) + + W(3, -1120084233, 1049511237, -1101782173, 1048360263) + + W(4, -1094055741, -1103824921, -1096453130, 1064792422) + W(5, 1036982526, 1028614917, 1024411853, 1045846387) + + W(6, 1033694830, -1107222411, -1124818562, -1108861898) + + W(7, -1126293810, -1108778587, 1022233154, -1104838815); + WS(-1109129728, 1010433912); + sum1 = W(0, -1117250164, 1006908285, -1122855062, -1107144297) + + W(1, -1106683568, -1123315850, -1130212999, -1125685047) + + W(2, -1096823624, -1105899410, -1092542200, 1057728383) + + W(3, 1037050434, 1026000812, -1117617698, -1122267030) + W(4, 1040646763, 1019351431, 1052048931, 1064635860) + + W(5, -1117087348, -1107858814, -1107016783, -1100117973) + + W(6, 1019407999, -1137630799, -1109418342, -1105008732) + + W(7, -1114857797, 1029923270, -1137382471, 1034575732); + sum2 = W(0, -1153548328, -1146428548, -1122720334, -1131960189) + + W(1, -1105417557, -1140382642, -1130524097, 1025793768) + + W(2, -1138461074, 1040260969, 1050908092, 1057542522) + + W(3, 1046314824, -1105004761, 1024281381, -1110366356) + + W(4, -1109953492, -1109276610, -1119113122, -1109440842) + + W(5, 1008998438, -1140136790, -1119699464, -1135544654) + + W(6, 970383811, 1016706951, -1113772244, -1149783528) + + W(7, -1111645386, -1114961145, -1128062487, -1113390177); + WS(1028043776, 1066748487); + sum1 = + W(0, 1020869188, 1029235758, -1130366216, 1035504777) + W(1, 1043701316, 1028701624, -1140377801, -1122144394) + + W(2, 1042153037, -1142684082, 1055996350, 1053860476) + + W(3, -1085695847, -1103910983, -1112356264, -1125763849) + + W(4, -1114798157, -1109061593, -1084468040, 1054127718) + + W(5, 1050138268, -1111428405, -1142788434, -1122964802) + + W(6, -1111756535, 1025123714, 1039957533, 1020063133) + W(7, 1020852591, 1027291444, 1023500800, -1137256281); + sum2 = W(0, -1124029698, 924100096, -1114278109, -1110737625) + + W(1, -1113250169, -1121059730, 998717971, -1125884533) + + W(2, -1129530165, -1119644642, -1119677450, 1050522694) + + W(3, 1031715210, -1121636434, -1145009875, -1119348450) + + W(4, -1125964837, 1007880106, 1030663674, 1051750384) + + W(5, -1120664914, -1117515026, -1122454562, 1010693450) + + W(6, 1021970837, -1133743274, -1118803866, -1114387111) + + W(7, 999385235, 1025411170, 1019195093, -1127692741); + WS(1060837696, -1133947077); + sum1 = + W(0, -1111653883, 1028161858, 1026751705, 1035200691) + W(1, 1000443428, -1124636570, -1153569903, 1010162044) + + W(2, 1043511426, -1107876293, 1032976162, -1100826286) + + W(3, 1046154066, -1109841015, 1028652293, -1123706233) + W(4, 1035524535, 1015032078, 1051427819, -1104551490) + + W(5, 1014718172, -1105048272, -1150803951, -1122173975) + + W(6, -1102491073, -1120891991, -1107214884, -1125941943) + + W(7, 1038586510, 1029969865, 1023653077, 990755503); + sum2 = W(0, -1109148006, 1027230711, -1141349815, -1115012616) + + W(1, 1025861327, -1129789352, 1018453676, 1031045509) + + W(2, -1124907300, -1130116966, -1099589874, 1033368851) + + W(3, -1097709653, 1048790586, -1111271581, 1042673858) + + W(4, -1092637531, -1092832201, -1085994041, 1040685020) + + W(5, 1063174823, 1056989663, 1049450817, 1044673285) + W(6, 1058387135, 1052271642, 1062146597, 1019043320) + + W(7, -1085821967, -1089224415, -1100831168, -1096709646); + WS(1052991104, 1024635730); + sum1 = W(0, 1035832004, -1113667962, -1115665493, -1099876558) + + W(1, -1124670698, -1123144798, -1141396670, 991927035) + + W(2, 1054985227, 1053766219, 1046292645, -1128304655) + W(3, -1115760280, 1032906080, 1011324759, 1032830229) + + W(4, -1101014640, 1027437528, -1097461637, -1148766910) + + W(5, -1103841730, 1048476772, 1034147624, 1048946541) + + W(6, -1119430356, -1121298600, 1029858016, -1098528437) + + W(7, 1026860541, -1107490221, -1114702739, -1098098175); + sum2 = W(0, 1023525971, -1123155104, 1037524511, -1119336486) + + W(1, -1120201684, -1140931614, 1025256785, -1116496860) + + W(2, -1101540780, -1081244300, -1094597742, 1068940029) + W(3, 1051860638, 998725346, 1002156586, 1038812729) + + W(4, -1109590196, -1107189914, 1026840221, 1038949441) + + W(5, 1014639773, -1110977051, -1117345859, 1033886084) + W(6, -1124206084, 989815561, 979200786, 1039597181) + + W(7, -1126284432, -1128575576, -1123250336, -1117282785); + WS(-1106197760, -1107941957); + sum1 = W(0, -1126741205, 992223587, -1103296147, -1096167611) + + W(1, -1103227950, 1041994672, -1124973941, 1027666259) + + W(2, 958134964, -1096404880, 1050569383, -1089676235) + W(3, 1032016157, 1020956357, -1180953690, 1043412107) + + W(4, 1042417753, -1114006205, 1053212362, -1101748223) + W(5, 1057274419, 1042030605, 1049652990, 1052224033) + + W(6, -1131232109, -1156891819, -1104768673, -1105076618) + + W(7, -1101041056, -1124618361, -1114495652, -1117167007); + sum2 = W(0, -1114619118, -1149001791, -1110825372, -1090736686) + + W(1, -1106216772, 1027272732, 1033544093, -1120615480) + W(2, 1033666378, 1045400432, 924614016, 1063026008) + + W(3, -1114127208, -1108719036, 1032301903, -1114760302) + + W(4, 1015708744, -1103940412, -1107881456, 1059931802) + + W(5, -1107266078, 1024735270, -1134995311, 1034226031) + + W(6, 1027002418, -1125963088, -1103385448, -1091159350) + + W(7, -1139192735, -1125128312, 1019689496, 1032462491); + WS(-1076602784, -1079939509); + sum1 = W(0, 1033209209, -1124683769, 1015980198, -1141354248) + + W(1, -1106007132, -1114123684, -1121157953, 1019464370) + + W(2, -1103738752, -1119780485, -1096323000, 1042233324) + + W(3, 1054450248, 1052850909, 1043529432, 1045060053) + W(4, 1025390185, 1041533536, 1052087227, 1039917313) + + W(5, -1105614956, -1098158311, -1107965437, -1103437636) + + W(6, 1002177000, -1112424745, -1104846498, -1113895617) + + W(7, -1119444855, -1112376802, 1021969694, -1109951699); + sum2 = + W(0, 1032197744, -1119329897, 1026978731, 1042368013) + W(1, 1034747948, -1109569271, 1037031349, -1111213511) + + W(2, -1106233686, -1117977933, -1119744119, -1074309068) + + W(3, 1068607081, 1064708176, -1119111329, 1041980892) + W(4, -1119285117, 1035649904, 1046616172, -1080074847) + + W(5, 1045039448, 1043980108, 1029320825, 1028259198) + W(6, 1036373860, -1113988579, -1150690390, -1148379867) + + W(7, 1031972703, -1113275705, 1015848847, 1008019670); + WS(-1093673600, -1131421273); + sum1 = W(0, -1121371719, -1126420325, 1033366837, 1021303790) + + W(1, -1117842165, 1011797877, 1021502382, -1131840478) + + W(2, -1114120908, -1110871822, -1087025669, -1118558279) + + W(3, 1056872571, 1032969664, 1037611499, 1040800018) + W(4, 1035660544, 1021798446, 1058317703, 1042294182) + + W(5, -1088792724, -1114500968, -1111697864, 1017988942) + + W(6, 1009001293, 1008740253, -1120291359, 1022689127) + + W(7, 1031810222, -1148303562, -1137856917, -1121301883); + sum2 = W(0, -1145039688, -1129835426, -1119915157, -1110668832) + + W(1, -1111519800, 1002833896, -1127947266, 1006052904) + + W(2, -1114206314, -1119983153, -1121785409, 1051729955) + + W(3, 1012089860, -1118223897, -1130809610, -1123764449) + + W(4, -1116540509, -1113040628, -1118504757, 1051989643) + + W(5, 1039755978, 1025479957, -1158783904, -1127132522) + W(6, 1020565626, 1000979816, 989230112, -1112437186) + + W(7, -1112214518, 1015165050, -1128941978, 1025956493); + WS(1065682080, -1111828541); + sum1 = W(0, 1015869752, 1017604463, -1112250175, 1024786188) + W(1, 1036568133, 1002255762, -1131800664, 982208647) + + W(2, -1145885026, -1115077208, 1052074397, -1121697500) + + W(3, -1086889687, 1045248549, -1130857684, 1037568127) + + W(4, 1022264616, 1021224792, -1089954536, 1047440331) + + W(5, 1049268572, -1109225970, -1123514660, -1113425822) + + W(6, -1126764880, 1032942526, 1040282413, 1035803452) + + W(7, -1125683619, 1031850711, -1141787602, 1017131424); + sum2 = W(0, -1122978131, 1016040732, -1114060649, -1125004872) + + W(1, 1029971540, 1026938249, 1017189437, -1124460348) + W(2, 1016858600, 968811648, 1029169324, 1051074882) + + W(3, 1006053609, -1154135582, -1115290499, -1140773838) + + W(4, 1028741871, 1021399769, 1032336595, 1031746308) + + W(5, -1103346234, -1113140779, -1141169189, -1122277912) + + W(6, 993636865, -1144040913, -1118019503, -1109484363) + + W(7, -1124468225, 1013417282, -1169769501, -1139883250); + WS(1068575136, 1057679145); + sum1 = W(0, -1106541599, 1008215334, 989927934, 1042310507) + W(1, 1018512183, 1040070410, 1014261304, 1041475113) + + W(2, 1049880682, 1049251178, 1026486412, -1095539136) + + W(3, -1094672658, -1089191083, -1100618939, -1087252724) + + W(4, -1093397815, -1111487207, -1092992340, -1103138323) + + W(5, 1043830943, 1057944056, 1044894993, 1049811244) + W(6, 1052783569, 1042509671, 1051194971, 1040579493) + + W(7, 1038856114, -1114567880, 1025292508, 1038361721); + sum2 = + W(0, -1114445084, -1136765855, 1039574032, -1113238946) + W(1, 1030357662, -1106803932, 964162478, -1113028634) + + W(2, 1053593458, 1055007475, -1105638736, -1078800745) + W(3, -1104375874, 1059224194, 1050059734, 1035501186) + + W(4, 1057823779, 1054334485, -1096181786, -1078443122) + W(5, 1049857835, 1058193036, 1043677394, 1029634285) + + W(6, -1150175899, -1141017037, 1042094581, -1134493359) + + W(7, 1020483059, -1099695416, -1118569338, -1110238952); + WS(-1069843280, 1043992756); + sum1 = + W(0, -1124948267, -1135429253, 1025550072, 1025838726) + W(1, 1019242118, -1116233196, 998797102, -1130610171) + + W(2, 1027695014, -1119099178, 1044103833, -1113789227) + W(3, 1039477491, 1024839950, -1142719774, 1028120504) + + W(4, -1108770183, 1017935435, -1112757821, -1130965835) + + W(5, 1042243413, -1101790135, 1012540023, -1106167085) + W(6, 1008956039, -1129507307, 1030807982, 1014849865) + + W(7, -1114625158, 1035190516, -1127852007, 1023573770); + sum2 = W(0, -1114838118, -1121247640, -1095359738, -1072953064) + + W(1, -1093135708, -1115141896, -1116140314, -1127503160) + + W(2, 1026300352, 1028491168, 1053083344, 1074653957) + W(3, 1056568596, 1033233928, 1030713448, 1023653964) + + W(4, 1004928481, -1130258048, 1024432884, -1102013621) + + W(5, 1030571276, -1116002270, 1015669456, -1129660856) + + W(6, -1130791968, -1127851536, 1025789396, 1030571860) + + W(7, 984097412, 1025870428, -1120604784, 1016702232); + WS(1066802848, -1129257078); + sum1 = W(0, 1015630667, -1118978065, -1113784260, -1101616805) + + W(1, -1108037466, -1114908350, -1114956027, -1111947911) + + W(2, 1033473243, -1155453851, 1035395194, 1065736411) + W(3, 1045542397, 1040181775, -1177594330, 1036229652) + + W(4, -1104672775, -1122431286, -1115117829, 1061324843) + + W(5, -1100789945, -1114134583, -1124148059, -1138173719) + + W(6, -1114728473, -1121467910, -1105320751, -1095556296) + + W(7, -1104703871, -1109993148, -1111474306, -1106051686); + sum2 = W(0, -1109560646, -1134031950, -1107125821, 1017238679) + + W(1, 1024343484, -1127256871, 1004378333, -1129735047) + + W(2, -1155123513, -1108713026, 1038996664, 1026897780) + + W(3, -1107646918, -1107869542, -1112902766, -1145442397) + + W(4, 1035199782, -1107664082, 1043157052, 1051508948) + W(5, 1055083262, 998755997, -1159444594, 1012570830) + + W(6, -1115008786, 1019412439, -1123597732, 1016515095) + + W(7, -1106563795, -1116358964, -1117600772, -1110777694); + WS(-1093453440, 1062530498); + sum1 = W(0, -1124930739, 1018166794, -1109529502, -1114300581) + + W(1, 1025096231, 1023247327, 998443854, -1125424515) + W(2, -1115495349, -1109318263, 1024699176, 1040214918) + + W(3, -1105577322, 1043329716, -1123543752, 1034740398) + W(4, 1025637074, 1039727605, 931786146, 1052285453) + + W(5, 1009762919, -1105534248, 988548151, -1116356474) + + W(6, 1023868966, 1023563962, -1131885027, -1104984446) + + W(7, -1110072888, -1137647791, -1123805240, 1018092315); + sum2 = W(0, -1116378324, -1110334365, -1139723269, -1134041225) + + W(1, -1147091114, -1112237914, -1123080609, -1131603942) + + W(2, 1018118074, -1135013993, -1117515615, -1097345050) + + W(3, 1039717826, -1107830137, 1032288465, 978891919) + W(4, 1027066059, 1002595554, 1053899571, 1043336959) + + W(5, 1040668505, 1010598309, 1016807738, 1026333875) + + W(6, -1110082211, -1111471528, -1114430561, 1040454031) + + W(7, 1033656482, -1117546906, -1115822981, -1119917138); + WS(1067735712, -1080534052); + + return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); +} + +shared float inp[525]; + +#define CURRENT_PASS 2 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define temp_tex(pos) (float(texture(temp, pos).x)) +static const float2 temp_size = float2(GetInputSize().x * 1, GetInputSize().y * 2); +static const float2 temp_pt = float2(1.0 / (temp_size.x), 1.0 / (temp_size.y)); + +#define HOOKED_tex(pos) temp_tex(pos) +#define HOOKED_size temp_size +#define HOOKED_pt temp_pt + +void Pass2(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 525; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 15, y = (uint)id % 15; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (1)) + 0.5, float(group_base.y + y - (3)) + 0.5)).x; + } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[8]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 4]; + samples[1][1] = inp[local_pos + 5]; + samples[1][2] = inp[local_pos + 6]; + samples[1][3] = inp[local_pos + 7]; + samples[2][0] = inp[local_pos + 15]; + samples[2][1] = inp[local_pos + 16]; + samples[2][2] = inp[local_pos + 17]; + samples[2][3] = inp[local_pos + 18]; + samples[3][0] = inp[local_pos + 19]; + samples[3][1] = inp[local_pos + 20]; + samples[3][2] = inp[local_pos + 21]; + samples[3][3] = inp[local_pos + 22]; + samples[4][0] = inp[local_pos + 30]; + samples[4][1] = inp[local_pos + 31]; + samples[4][2] = inp[local_pos + 32]; + samples[4][3] = inp[local_pos + 33]; + samples[5][0] = inp[local_pos + 34]; + samples[5][1] = inp[local_pos + 35]; + samples[5][2] = inp[local_pos + 36]; + samples[5][3] = inp[local_pos + 37]; + samples[6][0] = inp[local_pos + 45]; + samples[6][1] = inp[local_pos + 46]; + samples[6][2] = inp[local_pos + 47]; + samples[6][3] = inp[local_pos + 48]; + samples[7][0] = inp[local_pos + 49]; + samples[7][1] = inp[local_pos + 50]; + samples[7][2] = inp[local_pos + 51]; + samples[7][3] = inp[local_pos + 52]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 18]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1) + ivec2(1, 0), ret); +} diff --git a/src/Effects/NNEDI3/NNEDI3_nns32_win8x6.hlsl b/src/Effects/NNEDI3/NNEDI3_nns32_win8x6.hlsl new file mode 100644 index 000000000..ccfc7baf6 --- /dev/null +++ b/src/Effects/NNEDI3/NNEDI3_nns32_win8x6.hlsl @@ -0,0 +1,1643 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: nnedi3.py --nns 32 --win 8x6 --use-compute-shader --use-magpie +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME NNEDI3_032_6 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 1 * 2 +//!HEIGHT INPUT_HEIGHT * 2 * 1 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!FORMAT R16_FLOAT +//!WIDTH INPUT_WIDTH * 1 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D temp; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_temp; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 2 + +//!PASS 1 +//!DESC NNEDI3 (double_y, nns32, win8x6) +//!IN INPUT +//!OUT temp +//!BLOCK_SIZE 32, 16 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[12]) { + float sum = 0.0, sumsq = 0.0; + [unroll] for (int i = 0; i < 12; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); + sumsq += dot(samples[i], samples[i]); + } + float mstd0 = sum / 48.0; + float mstd1 = sumsq / 48.0 - mstd0 * mstd0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); + mstd1 *= mstd2; + float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = W(0, -1120567290, 994153301, -1139074345, 1021572063) + + W(1, -1117468940, -1119615974, -1132374181, 1016607238) + + W(2, 992947237, -1137843051, -1125619459, 1000210819) + + W(3, -1132591953, -1122316900, 1053527378, -1096186409) + + W(4, -1144213939, -1121684346, -1125130389, -1146864427) + + W(5, 1046512564, 1056444330, -1138699933, -1129869365) + + W(6, -1117911182, 1031756856, -1093298645, 1051252577) + W(7, 1023905988, 979248533, -1119806663, 1024055745) + + W(8, -1112059294, -1109040755, 1033651817, -1119537890) + + W(9, -1121091407, 1003125923, -1120338466, -1148456803) + + W(10, -1133630411, -1141776739, -1126690796, 1029429758) + + W(11, -1110173889, -1113641375, 1032546877, -1127458445); + sum2 = + W(0, -1126549409, 1022573468, -1140490164, -1114833146) + W(1, 1021805248, 1003196031, -1119263575, -1114028928) + + W(2, -1119708173, 1024108770, -1146392599, -1113990054) + + W(3, 1019494412, -1116443736, 1032492771, 1037187519) + W(4, 1026983682, 1006796332, -1124834267, -1099458226) + + W(5, 1055169273, 1055452063, -1095922228, -1126677517) + W(6, 1007192676, 1017309436, 1044169825, -1106256028) + + W(7, -1122158651, 1022777176, -1122284082, -1118444706) + + W(8, 1039787167, -1112035826, -1118290710, -1130573089) + + W(9, -1127647730, -1121042004, -1145097543, -1125070906) + + W(10, -1129575364, -1123455912, 1010488320, -1109973330) + + W(11, 1044065299, -1121131548, -1121253833, 1019346120); + WS(1060265584, 1063714812); + sum1 = W(0, 1021450242, -1128788280, -1111111528, 1039397963) + + W(1, -1131322533, 1011253684, -1123065753, 1022311347) + W(2, 999728711, -1108349889, 1022720848, 974355898) + + W(3, -1137050988, 1028928107, -1119265827, 1026074151) + + W(4, 1019796282, -1136338948, 1021294882, -1140362288) + + W(5, -1137639751, -1102959280, 1043907075, 1025863841) + + W(6, -1118036885, 1024570351, -1101623708, 1041760911) + + W(7, -1151514983, -1136816500, 1009531192, 1035626828) + + W(8, -1106406289, 1036057270, -1132552740, 1027651305) + + W(9, -1118665549, 1025121435, -1128138578, 1017790822) + + W(10, -1124857459, -1139070668, 992320459, 1029327365) + + W(11, -1106167682, 1037788386, -1134343810, 1024478285); + sum2 = W(0, 1023758242, -1130878336, -1120671625, 1027498838) + + W(1, -1144315403, -1131041230, -1124053764, 1017938673) + + W(2, -1145138723, 1027873020, -1122179044, -1127336921) + + W(3, 1000956747, 1028583936, 1005174931, 1053869854) + W(4, -1093535895, 1009416925, -1122894214, 1031015106) + + W(5, -1104963408, 1073183850, -1074622268, 1033668418) + W(6, 999883691, 994031846, 1001047899, 1053107786) + + W(7, -1094361386, 990806038, 1004424067, 999096115) + W(8, -1126082311, 1035733703, -1114468554, 992895974) + + W(9, -1150041590, -1134395125, 1002728051, 1022225293) + + W(10, -1123867271, -1131337021, 1009546537, 1005722235) + + W(11, -1135618057, 1023923178, -1126165581, -1138321549); + WS(1067199072, 981674447); + sum1 = + W(0, -1113726457, -1105288827, 1040785760, 1032552483) + W(1, -1124907206, -1139898486, 1018794771, -1113372949) + + W(2, -1099180638, 1040483381, -1114167068, -1127069135) + + W(3, -1115871564, -1095136821, 1050678235, 1054762461) + + W(4, -1107257499, -1118671794, 1008735750, -1102105679) + + W(5, 1052734745, 1065065389, -1097429016, 1035770473) + W(6, -1125817867, -1103744656, 1044281884, 1044820043) + + W(7, -1104994292, -1122788302, -1125149117, -1108798620) + + W(8, 1033282170, -1097094388, 1027211226, -1125646899) + + W(9, 1001830321, -1137689902, -1121192922, 1010644366) + + W(10, -1114303237, -1123221796, 1016463803, -1099284685) + + W(11, 1032013917, -1124668651, 1015961570, -1114483455); + sum2 = + W(0, -1124538541, 1021627528, -1127440395, 1027300307) + W(1, 1020622605, -1130074937, -1139313841, 989560618) + + W(2, 1028695072, 1011050585, 999023778, 1015498074) + W(3, -1119374292, 1014774531, -1102978428, 1052523334) + + W(4, -1119874204, -1126569197, 1010830983, 1026431506) + + W(5, -1087762234, 1053318756, 1038687085, -1128231773) + + W(6, 1020827626, -1118520012, 1027251929, -1113197028) + W(7, 1029469191, 1018969905, -1125616841, 1018739126) + + W(8, -1109480314, 1034265115, -1125820221, -1123965604) + + W(9, 1008596323, 1014458179, 1023848922, -1134832277) + + W(10, -1159659466, 1024095771, -1136528497, -1129452021) + + W(11, -1132845253, 1022117249, -1150305989, -1130502215); + WS(-1082327904, -1101742629); + sum1 = + W(0, -1128942355, 994965062, -1116531225, 1027052286) + W(1, -1127603958, 1021969515, 1007179909, -1124051394) + + W(2, 1022855203, -1119991818, 1015495233, -1129777663) + + W(3, -1172666584, 1029361427, 1044526075, -1110346311) + W(4, 1026992677, 1017142675, -1115398989, 1047686461) + + W(5, -1095192109, -1106285700, 1037065627, -1113404469) + + W(6, -1145393643, 1028067431, -1123739865, 1043850022) + + W(7, -1119388383, -1130059467, -1126910957, 1025118144) + + W(8, -1116394763, 1030498357, 1013662917, 1016457699) + + W(9, -1153678686, -1121058723, -1123838961, 1032721886) + + W(10, -1119902562, -1131255799, -1127439210, 1018201755) + + W(11, -1120454737, 1034890279, 1021249778, 1017348651); + sum2 = + W(0, 1014279427, -1130890464, -1129394348, 1014973315) + W(1, -1152876382, 1004833335, -1131491186, -1137235679) + + W(2, 1024729347, -1130557096, -1150339726, -1141673719) + + W(3, 1026571161, -1097554196, 1048952496, 1014221291) + W(4, 1016692910, -1159625756, 1042930837, -1073378768) + + W(5, 1073588633, -1105375875, 1035354953, -1124590261) + W(6, 1025279833, -1092458255, 1053816664, 1016092170) + + W(7, -1129170378, 989537980, 993445038, -1130245298) + W(8, 1029166823, -1119905200, 1027382919, -1138326931) + + W(9, -1133457551, -1120075373, 1030737081, -1147013927) + + W(10, -1124802440, 1017200998, 1014707171, -1135266971) + + W(11, -1151431678, -1161525308, 979085751, 1009604643); + WS(1067891072, -1132872541); + sum1 = + W(0, 1025237873, -1122834636, -1117007553, -1114151507) + W(1, 1031366707, 984935338, 1014334757, 1020114356) + + W(2, -1118569017, 976917621, 1039451348, -1152616053) + W(3, 1030238795, -1105197551, 1049984500, -1088419628) + + W(4, 1049092386, -1123762161, 1029025473, -1121917372) + + W(5, -1114461425, 1051488823, 1008484089, -1145922699) + W(6, 1007260181, 1038150728, -1084792932, 1053317187) + + W(7, -1122288663, 1012164149, -1121222990, 1024596920) + W(8, 1039495098, 987986858, -1131993235, 1000937675) + + W(9, 988616634, 1024170379, -1124531763, 1026119523) + W(10, 1022258228, 1011615629, -1123834925, 1015271403) + + W(11, 1027254009, 993078101, 999667975, 1004174827); + sum2 = W(0, -1144599930, -1116760996, 1032730282, 1032901632) + + W(1, 985378521, -1129170670, -1136232721, 1019759527) + + W(2, -1125055029, 1027655943, 1005941018, -1131155477) + + W(3, -1137221455, -1111064134, 1047170030, -1115058011) + + W(4, -1129460922, -1129924333, -1128645029, 1041961890) + + W(5, 1050767412, -1130649513, 1019394239, 1022276751) + + W(6, -1151231836, 1036336113, -1093680614, -1116185015) + + W(7, -1115704458, -1148456898, -1156748756, 1033905549) + + W(8, -1120368706, -1105887613, 1024734935, 1015791177) + + W(9, -1160295481, 1022489781, -1115425401, 1012694293) + + W(10, -1147415538, -1140100033, 1020235585, -1122711401) + + W(11, -1132924343, -1122854992, 1018590227, 1012472897); + WS(1059709408, 1024973287); + sum1 = + W(0, -1126012593, 996494647, -1119189544, 1028092109) + W(1, -1124577204, 1023428420, 1004611460, -1122880609) + + W(2, 1030391200, -1114579692, 1016065247, -1127475101) + + W(3, -1134152138, 1020165829, 1046947367, -1111968183) + W(4, 1002304420, 1021721777, -1114032340, 1049657976) + + W(5, -1094041084, -1106474499, 1040268033, -1114564220) + + W(6, -1131432537, 1014735450, 1018380112, 1044458845) + + W(7, -1114168306, -1128430825, -1126017195, 1032225804) + + W(8, -1112475533, 1030371012, 1023640576, 1019190017) + W(9, 991561039, -1115613273, -1123292160, 1033263557) + + W(10, -1119951545, -1127717781, -1124178504, 1022764985) + + W(11, -1122300512, 1034006830, 1017080432, 1017178441); + sum2 = + W(0, -1125800395, 1024962708, 1023886172, -1122338700) + W(1, -1171853367, 973576887, 1011909323, -1143341847) + + W(2, -1144213999, 1018807292, 1000330119, 984068764) + W(3, -1116511680, 1052819438, -1097320166, -1115186882) + + W(4, -1140479659, 1001426319, -1103527255, 1076690943) + + W(5, -1072687587, -1098249874, 1023038428, 1007326739) + + W(6, -1120293374, 1057154431, -1091630497, -1117298851) + + W(7, 1029135274, -1135413483, -1148349175, 990717342) + W(8, 1012280135, 1010582795, -1122270538, 1008233815) + + W(9, 1015270352, 1025045202, -1118805022, 990164302) + W(10, 1020995472, -1129534965, -1125483347, 1023577182) + + W(11, 1016249338, -1125194137, 1003973863, -1140102471); + WS(1066253200, 1009489111); + sum1 = + W(0, -1121073890, -1102592607, 1041243549, 1033075644) + W(1, -1123366116, -1130535196, 1024085178, -1107707469) + + W(2, 1026388586, -1107162241, 1004872905, -1145989825) + + W(3, -1130104696, -1106598971, -1121496677, 1050587318) + + W(4, -1113976447, -1120961954, 1023275236, -1116780301) + + W(5, 1049328440, 1057262167, -1116780079, 1023066500) + W(6, -1128214764, -1111680448, 1018012708, 1027096831) + + W(7, -1113850464, -1121521086, -1135799996, 1032050668) + + W(8, -1096674093, 1040843700, 1015552212, 1008378152) + + W(9, -1141685621, -1131519352, -1122519370, 1028580820) + + W(10, -1123000331, -1124335656, -1121776782, 1028096678) + + W(11, -1101864235, -1117086438, 1040749648, -1135644600); + sum2 = + W(0, 1013104936, -1106333857, 1041445982, -1143812784) + + W(1, -1111423698, -1153458272, -1117172679, -1121647283) + + W(2, 1046293466, -1098323478, 1025469214, -1123006845) + W(3, -1127525498, 1031691189, 1054977467, 1032669672) + + W(4, -1110723238, 1018478818, -1135146700, 1030458550) + W(5, 1051999059, 1016608418, 1041509878, -1111394764) + + W(6, -1122718205, -1111791784, 1044523088, -1093831562) + + W(7, 1024902469, -1127833490, 1027187072, 1009508096) + + W(8, -1099681845, 1044831040, -1110045970, -1140396324) + + W(9, -1119344027, -1127767754, -1185033988, -1117227205) + + W(10, -1129401382, -1129041118, -1128814570, 1037784205) + + W(11, -1107269301, -1118967703, -1121987113, 1021194206); + WS(1055988095, 1068562120); + sum1 = W(0, 1025054956, 1041692938, 1040251631, -1141880069) + + W(1, -1113980711, 1011769695, -1130996743, 1036197410) + W(2, 1032351834, 1043619883, 1002392085, 1020511427) + + W(3, 1024941510, 1042434277, -1100356972, 1026325313) + W(4, 1040781794, -1124414423, 1026193148, 1039706891) + + W(5, -1093055992, -1087193164, 1032223740, -1163338101) + + W(6, 1035977616, 1036807045, 1047058383, -1087410536) + W(7, 1047196369, 1015241743, 1028839961, 1029783554) + + W(8, 1043491547, -1097265155, 1027612260, -1129538215) + + W(9, 1016648336, 1019008523, 1044781893, -1104034167) + W(10, 1036828166, 1018634427, 1025665319, 1026886128) + + W(11, 1048991774, -1099060796, -1113322870, -1180507092); + sum2 = W(0, -1109623534, -1098330057, 1051762285, -1105318233) + + W(1, -1120494292, 1036873688, -1119308996, -1121926087) + + W(2, 1045894429, -1111281216, 1002875661, -1152598618) + + W(3, -1108667787, 1031129408, 1029096148, -1116694436) + + W(4, 1042501069, -1117793653, -1107757858, -1111457821) + + W(5, 1060518836, -1103134861, -1104320409, 1023528108) + + W(6, -1126585969, -1091241820, 1065480819, 1017373783) + + W(7, 1041202422, -1117516356, -1144042093, -1092220222) + + W(8, -1120268316, 1034184394, -1131983911, -1123122827) + + W(9, 1004201965, -1112242725, 1035423856, -1129342463) + + W(10, 1023586732, -1131565835, 1024608852, -1104308532) + + W(11, -1105721536, -1134873838, 1017105251, 1015347387); + WS(-1073915832, -1098883962); + sum1 = W(0, 1030932360, 1024626915, -1098288699, -1130380922) + + W(1, 1033018229, 1021355752, -1131602112, 1032898976) + + W(2, -1117814224, -1103847418, 1040490540, -1120290302) + + W(3, -1125999684, -1122370130, 1056014501, -1096397881) + + W(4, 1031863906, -1131922448, 1018154928, 1011274315) + + W(5, -1106175819, -1105327485, 1044317365, -1139369119) + + W(6, -1122265980, -1113904235, -1097956311, 1059753579) + + W(7, -1098424101, -1121102372, -1122315861, 1029902191) + + W(8, -1115232133, 1040451325, -1122968848, 1033247800) + + W(9, 1018577993, -1115348785, -1126206816, 1039068637) + + W(10, -1136273729, -1121007738, -1120663220, -1111387910) + + W(11, 1017122848, 1042824131, 1021433455, -1143567007); + sum2 = + W(0, -1118315643, 1046909717, -1100980148, -1129801877) + W(1, -1122172234, 1010610423, -1126395193, 1025514844) + + W(2, -1112473945, 1030594060, -1111527676, -1135186433) + + W(3, 1014636591, 1026986728, 1040985677, -1101816290) + W(4, 1023795758, 1030475382, -1138413531, -1112879553) + + W(5, 1027746798, -1094641835, 1027030278, -1121095046) + W(6, 1013622247, -1122377333, 1051150943, 1041324898) + + W(7, -1113671456, 1015864126, -1129252552, -1114813410) + + W(8, 1057006779, -1106485598, -1133454616, 999966259) + + W(9, -1122713894, -1144089687, -1138866101, -1115757412) + + W(10, 1001347783, -1127643580, -1131632742, -1140190295) + + W(11, 1035155819, 1016695560, -1139893847, -1124292419); + WS(1058139200, 1067005003); + sum1 = W(0, -1131277347, -1103206155, 1015263811, 1048238354) + + W(1, -1106607621, -1122297405, -1128484819, -1112832909) + + W(2, -1111332237, 1044526364, -1104751670, -1124214159) + + W(3, 1014594990, -1108301118, 1004409248, 1056580635) + + W(4, -1107635783, 1017238947, -1112913457, -1111811582) + + W(5, 1031432468, -1114379264, -1107787999, -1110162105) + + W(6, 1028891109, -1113179152, 1057408386, 1025802539) + + W(7, 1020725062, 1002560364, -1118434446, -1100757638) + + W(8, 1053177162, -1104236495, -1114959053, -1113724157) + + W(9, -1125459884, -1116265771, 1035803645, -1124751623) + + W(10, -1129296244, -1123244163, -1151139539, -1119227733) + + W(11, 1052437131, -1121289813, -1120069105, -1115210511); + sum2 = + W(0, -1121057595, 1023292260, -1111891368, 1035766063) + W(1, 1032208606, -1122161823, 1016432775, -1124268922) + + W(2, 1029388268, -1118389960, -1122510207, 1028529468) + + W(3, -1112629457, -1116199054, -1123843875, 1033023394) + + W(4, -1113138713, -1118001871, 1031151061, -1110795130) + W(5, 1054055516, 1054507013, 1019595258, 1026811027) + + W(6, -1126819952, -1103006120, 1020478730, -1104664187) + + W(7, -1114598707, -1116707921, 1021228732, -1134369449) + + W(8, -1135412321, -1121918429, 1023542227, 1018790016) + W(9, 1009585899, -1120667087, 993043492, 982538191) + + W(10, 993709924, -1146185904, -1127402608, 1016333862) + + W(11, 996548072, -1108029597, -1129671373, -1152045912); + WS(-1091835967, 1038509097); + sum1 = W(0, 1002958335, 1041562959, -1124369824, -1093861855) + + W(1, 1039801017, -1132073944, 1031236696, 1038000620) + W(2, 1029288008, -1100002304, 1038306915, 1032364667) + + W(3, -1122124314, 1042029266, -1104824881, -1087002164) + + W(4, 1037725118, -1122032128, 1034973544, 1034473413) + W(5, 1049571184, 1052741396, 1041619283, 1036587856) + + W(6, -1120202292, 1040514695, -1089024373, 1044570703) + + W(7, 1031782758, -1138629744, 1015820538, 1042048878) + W(8, -1089426268, 1042398171, 1038671876, 1010685600) + + W(9, 1033636218, 1029242778, -1107643796, 1023923250) + + W(10, 1025678032, 1027566834, -1121967836, 1042898689) + + W(11, -1091407169, 1030369604, 1043274836, -1153060031); + sum2 = W(0, 1016791821, -1110538641, -1114409767, 1045544748) + + W(1, -1111278622, -1127655665, 1013192458, 1023566599) + + W(2, -1113846927, 1029704719, -1125617069, 1012727082) + + W(3, -1123883287, 1000152052, -1109610416, 1034122997) + + W(4, -1110039103, -1131212885, 1015528749, -1112636904) + + W(5, 1054155729, 1054056541, -1108588248, 1000573476) + + W(6, -1131825297, -1116367667, -1109117303, 1030522539) + + W(7, -1132906850, -1123699129, -1132736674, -1130121037) + + W(8, 1016394661, -1116026974, -1135451026, -1126704253) + + W(9, 1019880949, -1114305335, 1030290683, 992430312) + W(10, -1134728538, 988228497, -1117749640, 1033496981) + + W(11, -1157815185, -1107824982, 1016739853, -1123545619); + WS(-1089794384, 1053743764); + sum1 = + W(0, 977181200, -1124062435, 1045792382, -1096122984) + W(1, 1035356461, -1146772642, 1022034304, -1123589329) + + W(2, 1045687772, -1100185218, 1029423919, 1022712508) + W(3, 999195154, 1008939465, 1043214811, -1089534650) + + W(4, 1037071922, 986801416, -1173982736, 967031199) + W(5, 1051096635, -1128424615, -1130937578, 1009586465) + + W(6, -1131079992, 1031201254, -1106608558, 1022835035) + W(7, 1033635179, 1012941585, -1117753925, 1038421055) + + W(8, -1104815387, 1033659882, -1112564552, 1008485953) + W(9, 1011212627, 1023184516, 975115280, 1025453978) + + W(10, -1148354182, 1023442138, -1131210280, 1042888267) + + W(11, -1113606040, -1114474998, -1115626236, 1020831720); + sum2 = + W(0, 1025756371, 1038582018, -1095235720, -1097242729) + W(1, 1048188946, -1145714840, -1152866061, -1136865390) + + W(2, 1027392563, -1108446917, 1009460859, 1022755591) + W(3, 1022411083, 1028991375, -1094985700, -1092586393) + + W(4, 1013992713, 1033471029, 1021941035, -1102180290) + W(5, 1062263751, 1055770556, -1098658877, 1018658879) + + W(6, 1018807869, -1127151005, 1023141397, 1024429444) + + W(7, -1162587434, 1032105067, -1162131898, -1103213990) + + W(8, 1032099771, 1045256871, -1104388978, -1135748387) + + W(9, 1030478695, -1148467374, -1139645537, -1115314196) + + W(10, 1033707193, 1008760659, -1117519259, -1101731450) + + W(11, 1050734432, 1044538365, -1097902791, 1019361371); + WS(-1092258911, -1089815334); + sum1 = W(0, -1139634407, 1045821685, -1119357002, 1034361159) + + W(1, -1137425069, -1130196683, -1147044493, 1034203057) + + W(2, 1015718019, 1051668019, -1115826689, 1023465264) + W(3, 1017866535, 1044613801, -1104936487, 1032636456) + + W(4, 1035725147, 1015877619, -1130890003, 1035729090) + + W(5, -1090722234, -1086460492, 1010439579, -1117806874) + + W(6, 1027130446, 1029847032, 1052628837, -1094763749) + W(7, 1043916211, 1025832214, 1016637069, -1102281888) + + W(8, 1057948020, -1095562645, -1114240161, -1123952546) + + W(9, 1016500860, -1131657511, 1044884520, -1106362539) + + W(10, 1035145793, 1029577644, 1026913637, -1121093118) + + W(11, 1053191568, -1098187032, -1102258214, 1011300119); + sum2 = + W(0, -1128703433, -1113670137, 1038503213, 1040684472) + W(1, -1099552212, 993905065, 1012757410, 1024378351) + + W(2, -1102109659, -1100043210, 1026887817, 1003273444) + W(3, 1007787634, -1111552612, 1041322248, 1041989415) + + W(4, -1104965070, -1144755940, 1023396449, -1131966925) + + W(5, 1061982310, 1066879681, -1107386978, 1019447753) + + W(6, 1017560605, -1114649634, -1104769108, -1101238360) + + W(7, -1104448348, 1019550873, 1012244530, 1040107467) + W(8, -1098175974, -1089047102, 1016563929, 1024790159) + + W(9, -1143125556, -1122758108, 1026757617, 1023979343) + + W(10, -1107880053, -1138003290, 1024938353, 1032092801) + + W(11, -1105089779, -1100231891, 1036400101, 1014825186); + WS(-1086002512, -1084928660); + sum1 = + W(0, -1131929465, -1115340892, 1050776738, -1103148388) + + W(1, -1143173743, -1130358353, 1017039401, -1113717074) + + W(2, 1042912513, 1010821235, -1128281895, -1122997746) + + W(3, 1020173741, -1102662114, 1053707114, -1104889757) + + W(4, -1111952748, -1139775729, -1134499377, -1097984585) + + W(5, 1055825787, 1049797700, -1098746549, -1117617116) + W(6, -1129468257, 999020755, -1108349590, 1042555580) + + W(7, -1106170922, 1014563169, -1138890277, -1114393442) + + W(8, 1033511359, 1041826234, -1103706929, 1010736529) + W(9, -1132813939, 1028151894, -1108075378, 1032548919) + + W(10, -1127274842, -1137959865, -1143488833, -1132982017) + + W(11, -1138015505, 1041074464, -1110410462, -1123442568); + sum2 = + W(0, 1000102260, -1118205558, 1051204856, 1041999613) + W(1, -1135930364, 1019347314, -1126633986, 1029900308) + + W(2, 1063980475, 1037162912, -1134402620, -1137379540) + + W(3, -1144291760, -1140439712, 1066576503, -1167793729) + + W(4, 1020225891, 1012525172, -1119827470, 1041420529) + + W(5, -1075398805, -1085093765, -1133429672, -1123883092) + + W(6, 1016232493, 979974721, -1098954833, 1017117707) + W(7, 1028764817, -1139078108, 1023762781, 1006017080) + + W(8, -1114660078, 1033481679, 1018587503, 990744512) + W(9, -1141218312, -1137204884, 1021034547, -1137323924) + + W(10, -1132685152, -1137103964, 1025293286, 1002898232) + + W(11, -1117648930, 1002077480, 1026769102, 1022792204); + WS(-1084377664, 1027078466); + sum1 = + W(0, 1028609881, 1023875764, -1113303893, 1031700430) + W(1, -1138486380, 1025866053, -1129675619, 1032239673) + + W(2, -1114746833, 1026119482, -1122216064, 1016759071) + W(3, 1022038247, 1043647401, -1084547118, 1051850662) + + W(4, -1132199979, 1032834541, 1010908070, 1024809124) + W(5, -1156932707, -1104988345, 1033025459, 1029398137) + + W(6, 1026611885, -1131519215, 1050946557, -1083734174) + W(7, 1043104191, 1007914454, 1030113110, -1128260416) + + W(8, 1047159719, 1027361605, 1018468419, 1021351811) + W(9, -1132403564, 1026951667, 1020650755, -1111325382) + + W(10, 1014159432, 1014779182, 1031201461, -1115829469) + + W(11, 1037768177, 1042797655, -1108759836, 1018395723); + sum2 = W(0, -1123315648, 1034590473, 1026150549, 1030314237) + + W(1, -1113579452, 1014056813, -1139192721, -1139997869) + + W(2, 969235347, -1112534256, -1158492453, -1132289868) + + W(3, -1123875414, -1115134908, 1042477897, 1037120039) + + W(4, -1098184684, 1018097190, 1030816333, 1008365317) + W(5, 1046059603, 1027803789, 1032895251, 1032146261) + + W(6, -1123326315, -1110803030, -1155992274, -1102080708) + + W(7, -1126452160, -1114996104, 1016532474, 1028786615) + + W(8, -1122978967, -1135183209, 1047700729, 1011346781) + + W(9, -1130303816, 1004595129, -1119026545, -1120008811) + + W(10, 999072441, -1115756663, -1131093304, 1015409534) + + W(11, -1111529432, -1109468455, 1047312851, -1122411639); + WS(1043014014, -1087744322); + sum1 = + W(0, 1027952711, -1123526530, 1008337244, 1035059438) + W(1, -1116610857, 1026995059, 1020523582, -1138321214) + + W(2, 1022379422, -1120526792, 1037330895, -1156487121) + + W(3, 1002118409, -1110182129, 1057818171, -1086364127) + W(4, 1040407761, 1009757724, 1021435438, 1032339433) + + W(5, -1110004232, -1118604170, 1040846837, -1153254001) + + W(6, -1138414444, 1035386835, -1083267015, 1057062103) + + W(7, -1107059287, 1021783366, 1003104545, -1137297015) + W(8, 1007303460, 1036507098, -1128420830, 1034169880) + + W(9, 1021955143, -1119534945, -1115695919, 1038869533) + + W(10, 1017909759, -1138765796, 1013303243, -1108462406) + + W(11, 1036022088, 1036993862, -1142792885, 1027514579); + sum2 = + W(0, -1129567658, -1117738234, 1034622765, 1030957051) + W(1, 1029443337, -1132249217, -1156092221, -1131091573) + + W(2, -1128107850, -1118479197, 1005860107, 969559787) + W(3, -1123579911, -1115037144, 1017778197, 1041246295) + + W(4, -1110926783, -1132354551, 1008715269, -1115010115) + + W(5, 1050631618, 1054490255, -1157251077, 1024782459) + + W(6, -1125160256, -1101087826, 1045062017, -1117275788) + W(7, -1108959270, 998415619, 1026524337, 1015586863) + + W(8, -1095615964, 1025836507, 1020928095, 1012990901) + W(9, -1137738801, -1113332277, 1020345131, 1027688491) + + W(10, -1122972204, -1161032699, 1017463077, 1019720419) + + W(11, -1101223712, -1120912313, 983922363, 1011300789); + WS(1054959103, -1083836992); + sum1 = + W(0, -1144415487, 1023853723, -1123173616, -1121737045) + + W(1, -1124541667, 1017937944, -1132171248, -1118146184) + + W(2, 1018672592, -1123041056, -1120856711, -1131928876) + + W(3, -1133053416, 1039313221, -1090265426, -1161038260) + W(4, 1034290206, 1027902624, 980216825, -1107434917) + + W(5, 1057912142, 1053211380, -1109871174, -1129566080) + W(6, 1018609320, 1027331294, 1044087220, -1089253699) + + W(7, 1040602782, -1121689404, 1017404666, -1111685883) + W(8, 1046306952, 1036559366, -1113560292, 1010241568) + + W(9, -1127349353, -1129432820, -1133031712, -1110305609) + + W(10, -1148202051, -1124796708, -1154920122, -1126004808) + + W(11, 1039769364, 1035028498, -1109624712, -1127304152); + sum2 = W(0, 1018338338, -1110884586, -1136808221, 1026570078) + + W(1, -1136194357, -1120452457, -1139755941, -1129406227) + + W(2, -1118232763, 1021052542, 1020321772, 1004896115) + + W(3, 1025801614, -1108237236, -1115176435, -1114736489) + + W(4, 1048907220, -1119060361, -1122001203, 1037408540) + + W(5, -1113269344, 1041519115, 1046652366, -1107310106) + + W(6, 1010171613, 1040327756, -1105808410, 1047951095) + + W(7, 1035402835, 1033649944, -1132321439, -1123391437) + + W(8, -1110685464, -1115014721, -1111631696, -1126977687) + + W(9, 1016232007, -1129086463, -1168436052, -1113536502) + + W(10, 1026410761, 1026056519, 1013259501, -1132299663) + + W(11, 1021765462, -1123780081, -1102273277, 1027695361); + WS(1057903600, 1067139767); + sum1 = W(0, -1130009103, -1107061793, 1043136838, -1102099951) + + W(1, -1115498357, -1123992244, 1012511358, -1111029392) + + W(2, 1028125200, 992918145, -1117969679, -1145160172) + + W(3, -1137761926, -1104154592, 1051222763, -1106189879) + + W(4, -1104467697, -1125356943, -1124300975, -1103367501) + + W(5, 1060113955, 1050079197, -1108729546, -1117739880) + W(6, 1008012526, 1009980758, 1024983063, 1048697267) + + W(7, -1111781863, -1165450354, -1117250069, -1113394087) + + W(8, 1016245011, 1034440698, -1104780916, -1114615608) + + W(9, -1162310274, 1026647906, -1117690736, 1041096574) + + W(10, -1117169584, -1139440358, -1115259538, -1114684710) + + W(11, 1025498192, 1041574755, -1120787335, -1118506516); + sum2 = W(0, 1011962608, 1038855085, -1122245118, -1126451804) + + W(1, -1106947014, 1026864717, -1120699406, -1131692032) + + W(2, -1112627842, -1110442893, 1008646548, -1123527836) + + W(3, 1007214128, 1038467393, -1122221782, 1042136464) + W(4, -1113116412, 999106843, 1026799984, -1112343444) + + W(5, -1106597578, 1054959792, -1109079281, 1011333214) + + W(6, -1145867047, -1113849522, 1026356645, 1031831390) + + W(7, -1109881757, -1117815384, 1029224075, -1138735380) + + W(8, -1114988845, 1046305460, -1109725169, 1032922020) + + W(9, -1116434980, -1120001434, 1020413259, 1008343524) + + W(10, -1123390380, -1119479700, 1022659583, -1118784264) + + W(11, -1123194990, 1047204017, -1114571313, 1000599735); + WS(-1101058110, 1058505899); + sum1 = W(0, 1008909245, -1123852394, 1037579744, 1024016996) + + W(1, -1121096497, 1022516663, 1015324303, -1112299044) + + W(2, 992056181, -1124723744, -1131134189, -1130344395) + + W(3, -1151491221, -1128516275, -1098678002, 1056678303) + + W(4, -1109419958, 1021039583, -1139701245, -1102217815) + + W(5, 1045087960, 1021619781, -1098726544, -1132041247) + + W(6, 1009250957, -1104833467, 1058072040, -1095085849) + + W(7, -1109643932, -1140161581, 1031005836, -1113648843) + + W(8, 1036986521, 1041104615, -1121889031, 1026848535) + + W(9, -1131857944, -1119585353, 1024521495, -1129179587) + + W(10, -1127292144, -1125244995, 1009310828, -1121962739) + + W(11, 1032839164, 1025292595, 1017031326, 1005622747); + sum2 = W(0, 992821683, 1023078017, 1023704638, -1112719649) + W(1, 1027637582, 1021903376, 1017011277, -1140746852) + + W(2, -1139829185, 1034266306, -1119668356, 1016759855) + W(3, 1011814063, 959773689, -1107874815, 1032357888) + + W(4, -1113625790, 1034398452, 999390943, -1112338116) + + W(5, -1113377165, 1057654961, -1086342498, 1051594315) + + W(6, -1121326865, 1035095219, -1108229047, 1012152366) + + W(7, -1101720659, 1035110699, -1128713872, 1024063493) + + W(8, 1033653422, -1122811174, -1136544465, 1031049449) + + W(9, -1144442308, 1023948090, -1133039950, 1020168485) + + W(10, -1121720102, 1018271190, -1138094133, 1032024680) + + W(11, -1140161389, -1113617432, 1019370668, 1021410443); + WS(-1098338111, 1023498750); + sum1 = + W(0, -1138701849, -1121510465, 1040383254, 1010706669) + W(1, -1149170878, 1021863397, 1022617405, -1112416999) + + W(2, 1041395124, -1110248019, 1020223099, 1016227961) + + W(3, -1168316434, -1180399652, -1097293461, 1058209828) + W(4, -1106291545, 1018489101, 989875557, 1037491593) + + W(5, -1094988553, -1095385553, 984962665, 1021609037) + W(6, 1014043241, -1102495433, 1059563452, -1097437690) + + W(7, -1119747464, -1145092370, 1028694643, -1121884733) + + W(8, -1118903112, 1046085501, -1122110350, 1026243438) + + W(9, 1012653195, -1122132512, 1027507886, -1130301425) + + W(10, -1123237039, 992349061, -1123740578, 1036415429) + + W(11, -1126796061, -1128488725, 989067449, -1164474953); + sum2 = + W(0, -1164068108, 1029420636, -1112077059, -1110964317) + W(1, 1028276360, -1148255859, -1120898035, 1036981221) + + W(2, -1106651210, -1141413971, 1028557670, -1129246253) + + W(3, 1013829433, 1035658823, -1106432341, 1035230257) + W(4, 1017251513, -1131530425, 972583599, 1039879187) + + W(5, -1108370344, 1044568046, 1026800348, -1126980980) + W(6, 1016837545, -1102894380, 1027609202, 1041920264) + + W(7, -1124056839, -1121785123, -1128855117, -1128781606) + + W(8, 1035930993, -1110600858, 1023713154, -1124361549) + + W(9, -1120505887, 1013125305, 1026372950, -1125837008) + + W(10, -1132458450, 1009116645, -1130000643, 1008550341) + + W(11, 1017927697, 1019426133, -1129712212, -1127748642); + WS(1064075168, -1085260633); + sum1 = + W(0, 1020097830, -1104011171, 1041894009, 1034297780) + W(1, -1113914242, -1138151419, 1004279127, -1113516590) + + W(2, 1036855277, -1106861493, 1031661466, -1151789197) + + W(3, 1028112237, -1103036207, 1045116105, -1112331770) + + W(4, 1017072798, -1129921062, 1026532451, -1102348791) + W(5, 1027716980, 1058153819, -1100287056, 1026370483) + + W(6, 961537748, -1120389361, -1096376936, 1043898973) + W(7, -1119328440, 1025128111, -1130706656, 1038928889) + + W(8, -1096709211, 1048347472, 1032287909, -1140511211) + + W(9, -1135275149, -1129088474, 996846509, -1164071066) + W(10, 1019059671, -1167790389, 994604265, 1031068695) + + W(11, -1102472071, -1118081687, 1040008649, -1140610427); + sum2 = + W(0, 1014633478, 1033869831, -1114935893, -1111281510) + W(1, 1016908923, 990649086, -1138053524, -1133342724) + + W(2, -1126366588, -1128243618, -1145483991, 996067150) + + W(3, -1140130516, 1003291347, 1043822142, -1107071420) + W(4, 1006526455, 1012165262, -1120104172, 1033872317) + + W(5, -1086850225, -1084734921, 1045857736, -1119271334) + W(6, 1012698026, 1006625651, 1030145358, 1067004758) + + W(7, 1020093203, -1129772456, -1123853902, -1121932314) + + W(8, 1040015383, 1046081705, -1118176982, -1144422823) + W(9, 1013120742, 1004319943, -1150276254, 1002809783) + + W(10, -1161820989, -1141799647, -1133638700, 975983930) + + W(11, 1028724356, 1031930717, -1104669476, -1143148999); + WS(-1102756414, -1120030182); + sum1 = W(0, -1114857721, -1102767115, 1043961809, -1124210446) + + W(1, -1117667780, -1120413918, 1007304745, -1115971707) + + W(2, -1101676399, 1028311347, -1113057893, -1125847632) + + W(3, -1113282502, -1095987354, 1053600405, 1046261656) + + W(4, -1103902228, -1114932345, -1135708169, -1101194087) + + W(5, 1065521342, 1068133511, -1095174540, 1027169042) + + W(6, -1130226572, -1099422340, 1046696200, 1042816511) + + W(7, -1101926052, -1125196828, -1115825727, -1109693396) + + W(8, 997946179, -1098872988, 1020822452, -1114420141) + + W(9, -1142173365, -1122657804, -1112668237, -1133742993) + + W(10, -1113901869, -1121096548, -1136250680, -1100198174) + + W(11, 1025742970, -1113205915, -1140085511, -1110338975); + sum2 = + W(0, 1022430051, -1119384349, -1127804673, 1011315171) + W(1, -1121146859, 1009328259, -1126993785, -1117046041) + + W(2, 1014745875, -1106680506, 1009189211, -1125156477) + + W(3, 1027244407, -1113440146, -1129639049, 1020590601) + + W(4, -1115604020, 1000906166, -1136699515, -1099757367) + + W(5, 1065124465, 1034863647, -1111752578, 1017063165) + W(6, -1115987625, 1038214736, -1113943514, 1035633573) + + W(7, -1124061319, -1117062841, 1024283610, -1152620459) + + W(8, -1130768777, -1116743921, -1120130033, 1024425511) + + W(9, -1120680273, 1013074835, -1113854522, -1127682237) + + W(10, -1148961286, -1119191933, -1137060371, 1015902709) + + W(11, -1112590762, 1012706011, -1113078716, 1024105421); + WS(-1077988040, -1098609713); + sum1 = W(0, 1020152245, 1040202093, -1114910389, -1104283732) + + W(1, 1039298438, -1125387021, 1002836371, 1027276515) + W(2, 1037438449, -1105856183, 1028081393, 1013826369) + + W(3, -1132015209, 1049452385, -1100627367, -1098108347) + + W(4, 1024125142, 1018674229, 1010566313, 1032875267) + W(5, 1030654167, -1102427678, -1131705423, 1017725301) + + W(6, 1010053945, 1036663212, -1113362709, -1104090280) + + W(7, 1042947115, -1141124531, 1020486319, 1034908465) + W(8, -1095852652, 1048934265, 1019783717, 1015682181) + + W(9, 1030571159, -1114877050, 1025947458, -1126406169) + + W(10, 1021770278, 1006527587, -1132252076, 1038229775) + + W(11, -1105166249, -1138504825, 1035155325, 1016700365); + sum2 = W(0, -1142267837, -1132626695, 1023852808, 1039691642) + + W(1, -1114839167, -1147709133, -1139598567, -1138132903) + + W(2, 1033876712, -1110747637, -1127924867, -1155304986) + + W(3, 967895252, -1131100139, 1012220303, 1061640184) + W(4, -1122160974, -1121532798, 1008554815, 1032537913) + + W(5, -1095058264, 1037101696, 1049832068, 992067354) + + W(6, -1145406733, -1112410791, -1110252153, -1091888991) + + W(7, 1027903240, -1128735955, 1024473654, 1023361695) + + W(8, -1104029741, -1161200181, -1126050715, 1025251228) + + W(9, -1120792990, -1130141475, 1031843465, -1119701362) + + W(10, -1132864167, -1157362170, 1020245499, 1015868043) + + W(11, -1196949327, 977066218, -1118663498, 1009680879); + WS(1050097375, -1144620389); + sum1 = W(0, -1128715377, 1044211901, -1107032850, -1104047251) + + W(1, 1044522647, -1128884681, 1011057731, 1033637193) + W(2, 1022227585, 1028225817, 1008625791, 1011579579) + + W(3, -1134438475, 1048874276, -1096643663, -1092741994) + + W(4, 1044273989, 1018667761, 1019193873, 1034983634) + W(5, -1114392598, 1036584266, -1113959235, 1025301657) + + W(6, 1022275225, 1042543166, -1092702793, 1025153498) + W(7, 1042105869, 1024565397, 1009602711, 1017343103) + + W(8, -1097617495, 1028276325, 1041046056, -1123355105) + + W(9, 1032927683, -1149239594, -1139445891, -1142331733) + + W(10, 1025445761, 1027058603, -1136253330, 1037918654) + + W(11, -1097624368, -1118553533, 1045159742, 1015982985); + sum2 = + W(0, -1125113091, 1033296420, 1004274749, -1102915066) + W(1, 1028147108, 1017918867, -1172679141, -1128383915) + + W(2, 1016688407, 1037338868, -1144981581, 991135609) + W(3, -1152889753, 1019343075, -1103752812, -1089888071) + + W(4, 1030353331, 1022111055, -1126905475, 1031861037) + + W(5, -1097163724, -1090194983, -1121675098, -1117974458) + + W(6, 1006649294, 1038268779, 1062746712, 1041005261) + W(7, 1019962267, 1026630548, -1117488938, -1113645001) + + W(8, 1051715314, -1118523722, 1006661278, -1115156997) + W(9, 1019337735, 1028739248, -1124587947, 1027501384) + + W(10, 1023891692, 1018829507, -1172252645, -1120621430) + + W(11, 1035778038, -1154227385, 1013440998, -1136345798); + WS(-1098112447, -1125216946); + sum1 = + W(0, 1010417568, -1112042689, 1004161984, 1041397655) + W(1, -1114418485, 1021110568, 1006322176, -1119953952) + + W(2, -1112614584, 1018267345, 1024614191, -1137912440) + + W(3, -1140685064, -1116450322, -1090400895, 1058507679) + + W(4, -1104618697, -1127130192, 1017991280, -1112784281) + + W(5, -1113885864, 1050855349, 1027345269, 1019594896) + + W(6, -1126853432, -1105219289, 1051228911, -1097135921) + + W(7, 1037685358, -1116355204, -1146614696, 1023451023) + W(8, -1126136276, -1122997076, 1039454784, 987030015) + + W(9, 1004323364, -1153582112, 1016683168, -1149231824) + + W(10, -1140802306, 988441407, 1019294567, -1110071894) + + W(11, 983864831, 1012208560, 1030420791, -1149650752); + sum2 = + W(0, 1035141604, -1152950404, -1100547860, -1104792229) + W(1, 1037875812, -1138078871, -1134943159, 1010887850) + + W(2, 1025641276, -1119115361, 1021611837, 1016878804) + + W(3, 1031353634, -1122370025, -1101375945, -1100366049) + + W(4, 1015511489, 1015482099, -1120750640, -1101926585) + + W(5, 1057546844, 1042205116, -1109199537, -1140091328) + W(6, 1025254666, -1104605825, 1048526282, 1042952063) + + W(7, -1105787312, 1032201266, -1114509439, 1035852377) + + W(8, -1108641700, 1039483674, -1109299079, -1132053601) + + W(9, 1028756554, -1114959163, 1033576762, 1030776026) + + W(10, -1143994351, 1007578680, -1113538005, -1148680422) + + W(11, 1037895648, 1041006229, -1107406256, -1124574444); + WS(1027823864, 1049081066); + sum1 = W(0, 1012131718, 1045092167, -1108779609, 1031292119) + + W(1, -1168425160, -1123848398, -1130128499, 1032126573) + + W(2, -1127539283, 1050389010, -1113259988, 1017365039) + W(3, 1020918007, 1043895467, -1101697729, 998802811) + + W(4, 1036534559, 1015583683, -1147020173, 1034108246) + + W(5, -1094495268, -1089568357, 1026355631, -1120941186) + + W(6, 1027080278, 1035993776, 1047905888, -1096020979) + W(7, 1044370080, 1024649726, 1020448605, -1104998342) + + W(8, 1057437086, -1097851474, -1112197645, -1123264842) + + W(9, 1012408472, -1129990263, 1041161122, -1105719893) + + W(10, 1035394325, 1027345300, 1024273997, -1123698278) + + W(11, 1052750589, -1101687913, -1103284196, 966350543); + sum2 = + W(0, 1023788251, -1115639872, 1042970848, 1041100183) + W(1, -1120506242, -1214633659, 982227227, -1150596845) + + W(2, 1046665761, 1053945290, -1121383639, -1143798279) + W(3, 1006064903, 1015277390, 1037025188, -1128807740) + + W(4, 1017634434, 1023282498, -1119196045, 1006169559) + W(5, -1080187678, -1078236510, 998104205, -1114831851) + + W(6, 1022374646, 1034991246, 1047303977, 1053440142) + W(7, 1037533712, 1023435593, -1139981943, -1110295858) + + W(8, 1052132899, 1058969577, -1112892146, -1124500500) + W(9, 1006364327, -1148612007, 1033799186, 1029728171) + + W(10, 1033828190, 1015376522, -1130167410, -1131776900) + + W(11, 1035867784, 1005561207, -1110685126, 1014287051); + WS(-1088189216, 1043089281); + sum1 = + W(0, -1140748743, 1049770738, -1088213532, 1042644994) + W(1, 1041481493, 1008330103, -1121398410, 1041680407) + + W(2, -1096476823, 1047024381, -1107153702, 1031833588) + W(3, 1026065228, 1048806759, -1084369379, 1050638745) + + W(4, 1035104215, 1020682979, -1149048846, 1043836791) + W(5, -1108723571, -1101872740, 1041455384, 1026205458) + + W(6, 1020127051, 1034371336, 1036745881, -1094226627) + W(7, 1040593116, 1006701623, 1036138263, 1024770968) + + W(8, 1041309981, -1096464492, 1045395982, -1120876186) + + W(9, -1128555260, 1037337862, 1011670631, -1106071481) + + W(10, 1040128941, 1024336228, 1038047047, -1137213911) + + W(11, 1028379642, -1102959693, 1016152290, 1028740542); + sum2 = + W(0, -1129618765, 1028575755, 1018469269, -1109549862) + W(1, 1022763469, -1123897489, 1004169862, -1114659435) + + W(2, 1020493837, 1025826707, -1116607191, -1153284395) + + W(3, -1163586391, 1025093243, -1120959857, 1018231561) + + W(4, 1017295613, -1131222849, 1012667011, -1110197183) + W(5, 1045602812, 1050121196, -1110473213, 1019030581) + + W(6, 1004789414, -1140439179, -1119013866, 1009644755) + + W(7, -1143499366, -1125130849, 1006867267, -1110600679) + + W(8, 1007539419, 1038682513, -1107944702, 1009132883) + W(9, -1148564598, 1019765437, -1126956769, 1017107081) + + W(10, -1123266847, 1006886939, 1023888443, -1106146141) + + W(11, 1005311510, 1040899502, -1114206671, -1127991173); + WS(-1082596160, 1043517696); + sum1 = + W(0, 1028909798, -1154689545, -1104169149, 1025985531) + W(1, 1029418231, 1024711058, -1153492953, 1032486047) + + W(2, -1117361518, -1118729126, 1034798166, -1125485751) + W(3, 967933898, -1128767583, 1049441299, 1031994846) + + W(4, 1006099469, 1008177302, 1019116907, 1037703496) + W(5, -1087176649, -1089391227, 1046438773, 986175666) + + W(6, -1132218099, -1115978788, -1115448907, 1056185125) + + W(7, -1106352341, -1125946755, -1129844133, 1027446332) + W(8, 1018413135, 1032249225, 991686873, 1030151438) + + W(9, 1011144392, -1146849277, -1154137305, 1036574828) + + W(10, -1131200252, 1006963806, 999709323, -1114471573) + + W(11, -1130624603, 1042483386, 1022706506, 1019997155); + sum2 = W(0, 987806561, -1103978637, 1039699877, 1032745921) + W(1, -1106709297, 1017765222, 1024001581, -1138508200) + + W(2, -1109586411, 1036006171, 984628641, -1151113585) + + W(3, -1132313544, -1113576691, -1109697688, 1041044376) + + W(4, -1135869680, -1128136972, -1138298076, 1037869389) + + W(5, 1004783368, 1057490671, -1123523352, -1126668402) + + W(6, 1003342504, 1041829747, -1101655248, -1120283284) + + W(7, 1037905909, -1142518888, -1131321664, -1102361734) + + W(8, 1023850463, -1118937466, -1133053924, -1119975873) + + W(9, 1015369606, 995389041, -1123567230, -1137071480) + + W(10, 1008345900, 1015435786, 1016585830, -1103350916) + + W(11, 1033013495, 1023653113, -1123775069, -1153724113); + WS(1066487464, -1081148228); + sum1 = + W(0, -1156758047, 1045622655, -1121579186, 1016497454) + W(1, 1026804003, 1025331662, -1115667101, 1039828621) + + W(2, 1016276052, 1034243041, -1129971954, 1022570928) + W(3, 1032245054, 1043227835, -1107379682, -1125065087) + + W(4, 1036683903, 1033790001, -1120457266, 1051174634) + W(5, -1081882686, -1081200961, 1050524446, 950141917) + + W(6, 1025970038, 1038187760, -1114502289, -1121918554) + W(7, 1033938044, 1035886143, 1028999903, 1035936180) + + W(8, -1110627634, -1121754318, 1045144826, -1123227402) + + W(9, -1129094085, 1028125692, 1032926021, -1138807120) + W(10, 1028558202, 1023360168, 1031685286, 1038703027) + + W(11, -1130240468, -1128847308, 1036935449, 1033245399); + sum2 = W(0, 990406129, 1013162021, -1111903383, 1031481662) + W(1, -1112035485, 1026146121, -1109318105, 1040565489) + + W(2, -1098989385, 1036428343, -1111376696, 1020848344) + + W(3, 1032976305, -1125670617, -1114873385, 1043786440) + W(4, 1007686716, 993596601, -1108583903, 1041812300) + + W(5, 1042303391, 1039454897, 1044627344, -1114987237) + W(6, 1009651516, -1123723867, 1001920896, 1032603577) + + W(7, -1107723593, 1030752081, 1027268958, 1031967357) + + W(8, -1113158574, -1097549253, 1043104932, -1112152994) + + W(9, -1109295843, 1033460548, 1015358203, -1117993943) + + W(10, -1142055788, -1128311189, 1034160517, -1113698315) + + W(11, 1032025507, -1119346601, -1111706149, 1024170945); + WS(-1085700080, -1080808385); + sum1 = + W(0, 1024296914, -1130852634, -1113008209, 1037784580) + W(1, -1128041639, 1011155704, -1122227946, 1020782261) + + W(2, 1011762792, -1106441949, 1023563097, 1007950112) + W(3, -1137292464, 1018157624, -1132968658, 1034492712) + + W(4, 1002605936, -1130475172, 1023737634, 1000516328) + W(5, -1126768286, -1096045179, 1048917408, 1027876850) + + W(6, -1114666035, 1007198528, -1104353680, 1044579508) + + W(7, -1126595297, -1148910448, 1017074878, 1034334952) + + W(8, -1108931914, 1035021791, -1127030516, 1030237498) + + W(9, -1116342907, 1028012332, -1128335092, 1016641544) + + W(10, -1124799637, -1178616327, 998628462, 1030076118) + + W(11, -1106440695, 1037317931, -1122557970, 1026212766); + sum2 = + W(0, -1127545645, 1009004191, 1015771798, -1131692274) + W(1, 1014637794, 996126556, 1018138398, -1136885460) + + W(2, -1151865619, -1125888003, 1023581282, 1018938725) + + W(3, -1144042592, -1141392422, -1107745249, -1091675886) + + W(4, 1057331451, -1136255574, 999025130, 1032831862) + W(5, -1097510643, -1072938398, 1076041086, -1112160457) + + W(6, -1139402988, 1017820345, -1117369881, -1091916978) + + W(7, 1056314077, -1150817344, -1150259805, -1138954980) + + W(8, 1017078443, -1117354278, 1032772879, -1138128772) + W(9, 985356315, 1013413304, -1132039248, -1124163047) + + W(10, 1020965023, 1018397555, -1133933532, -1164078621) + + W(11, -1132951491, -1138824868, 1026258229, 985080995); + WS(1065410184, 989310727); + sum1 = W(0, -1129262375, -1109321255, 1036945010, 1026154888) + + W(1, -1134197844, -1125355007, -1140125102, -1114916286) + + W(2, 1039766830, -1115626794, -1121887434, 1012320294) + + W(3, -1116437485, -1113105487, 1017194412, 1060001058) + + W(4, -1094555822, -1137406030, -1133156750, -1103994248) + + W(5, 1046741097, 1017259310, -1114897776, -1133486286) + + W(6, 1013595422, -1113291025, 1059311888, -1103200016) + + W(7, 1023750749, -1124720479, -1140436482, -1135877737) + + W(8, -1110004779, 1018378495, -1113774366, -1115411390) + + W(9, 1001232640, 1027066925, -1131666807, -1131905035) + + W(10, -1124072268, 1004266540, -1120957855, 1020121135) + + W(11, -1115618446, -1127100975, -1116648163, -1123146591); + sum2 = W(0, -1134742142, 1015903261, 1020154303, -1137672551) + W(1, 1024853570, 1005131169, 994239369, 1014594226) + + W(2, -1150882457, 1023023759, 1017377705, -1125421078) + + W(3, -1134258278, -1129761964, 1022217883, 1027518631) + + W(4, -1148789809, 1001175413, -1126643555, 1031995313) + + W(5, 1040302074, -1097618001, 1026258033, -1138047636) + + W(6, -1124787402, 1009465920, 1049285742, -1098762324) + + W(7, -1132512970, -1148246653, 1015413679, 1021296491) + + W(8, 1029784509, -1122992412, 1015729459, 1016986651) + + W(9, 991423945, -1131866907, -1135790753, -1126884057) + + W(10, -1130065044, -1142203369, 1010959842, 1022764367) + + W(11, -1122024941, -1118740892, 980872325, 992745521); + WS(1051937311, 1019742014); + sum1 = W(0, -1128733562, -1110586220, 1044317379, 1034449865) + + W(1, -1129053661, -1126315858, 997229903, 1007594774) + + W(2, -1115449474, 1040493967, -1122019556, -1118602611) + + W(3, 1023734487, -1114376035, 1058430122, -1106892452) + + W(4, -1137597924, 1007433460, -1116159949, -1126695312) + + W(5, -1131615423, -1096043495, -1114653189, -1115826781) + + W(6, -1132293538, 1032525373, 1028795565, 1048571437) + + W(7, -1104742917, 1027349977, -1139661928, -1154941499) + + W(8, 1043036192, -1110832404, -1106286569, 1018237258) + + W(9, -1123545457, 1019369518, -1111719346, 1031893623) + + W(10, -1118024837, -1123671919, 1016780849, 1026460481) + + W(11, -1122201669, 1016066306, -1121789868, 1018711058); + sum2 = W(0, 1008400273, -1127535498, -1110432262, -1120718262) + + W(1, -1113670226, 1025653041, 1016157529, -1127740478) + + W(2, -1101104373, -1108362087, -1146032407, -1134925453) + + W(3, -1144490357, 1041994790, -1088501039, 1042410198) + + W(4, -1132649478, 1011207517, -1138058389, 1049438349) + + W(5, 1056816205, -1105949033, 1045816390, -1153231998) + + W(6, -1125392389, -1105215135, 1056199589, -1108549667) + + W(7, -1122823248, -1127906971, -1130565626, -1113276412) + + W(8, 1025802623, 1029274599, -1139374331, 1000306913) + W(9, 965128306, -1135958083, 1003347845, -1152613734) + + W(10, -1123972337, -1157554354, -1135290574, -1114929610) + + W(11, 1003318865, 1031964529, 1019133207, -1130500213); + WS(-1117640696, 1034706864); + + return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); +} + +shared float inp[507]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { temp[pos] = (value); } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 13 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 507; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 13, y = (uint)id % 13; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (3)) + 0.5, float(group_base.y + y - (2)) + 0.5)).x; + } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[12]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 4]; + samples[1][1] = inp[local_pos + 5]; + samples[1][2] = inp[local_pos + 13]; + samples[1][3] = inp[local_pos + 14]; + samples[2][0] = inp[local_pos + 15]; + samples[2][1] = inp[local_pos + 16]; + samples[2][2] = inp[local_pos + 17]; + samples[2][3] = inp[local_pos + 18]; + samples[3][0] = inp[local_pos + 26]; + samples[3][1] = inp[local_pos + 27]; + samples[3][2] = inp[local_pos + 28]; + samples[3][3] = inp[local_pos + 29]; + samples[4][0] = inp[local_pos + 30]; + samples[4][1] = inp[local_pos + 31]; + samples[4][2] = inp[local_pos + 39]; + samples[4][3] = inp[local_pos + 40]; + samples[5][0] = inp[local_pos + 41]; + samples[5][1] = inp[local_pos + 42]; + samples[5][2] = inp[local_pos + 43]; + samples[5][3] = inp[local_pos + 44]; + samples[6][0] = inp[local_pos + 52]; + samples[6][1] = inp[local_pos + 53]; + samples[6][2] = inp[local_pos + 54]; + samples[6][3] = inp[local_pos + 55]; + samples[7][0] = inp[local_pos + 56]; + samples[7][1] = inp[local_pos + 57]; + samples[7][2] = inp[local_pos + 65]; + samples[7][3] = inp[local_pos + 66]; + samples[8][0] = inp[local_pos + 67]; + samples[8][1] = inp[local_pos + 68]; + samples[8][2] = inp[local_pos + 69]; + samples[8][3] = inp[local_pos + 70]; + samples[9][0] = inp[local_pos + 78]; + samples[9][1] = inp[local_pos + 79]; + samples[9][2] = inp[local_pos + 80]; + samples[9][3] = inp[local_pos + 81]; + samples[10][0] = inp[local_pos + 82]; + samples[10][1] = inp[local_pos + 83]; + samples[10][2] = inp[local_pos + 91]; + samples[10][3] = inp[local_pos + 92]; + samples[11][0] = inp[local_pos + 93]; + samples[11][1] = inp[local_pos + 94]; + samples[11][2] = inp[local_pos + 95]; + samples[11][3] = inp[local_pos + 96]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 41]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2) + ivec2(0, 1), ret); +} +//!PASS 2 +//!DESC NNEDI3 (double_x, nns32, win8x6) +//!IN INPUT, temp +//!OUT OUTPUT +//!BLOCK_SIZE 64, 8 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[12]) { + float sum = 0.0, sumsq = 0.0; + [unroll] for (int i = 0; i < 12; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); + sumsq += dot(samples[i], samples[i]); + } + float mstd0 = sum / 48.0; + float mstd1 = sumsq / 48.0 - mstd0 * mstd0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); + mstd1 *= mstd2; + float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = W(0, -1120567290, -1132374181, -1132591953, -1125130389) + + W(1, -1117911182, -1119806663, -1121091407, -1126690796) + + W(2, 994153301, 1016607238, -1122316900, -1146864427) + W(3, 1031756856, 1024055745, 1003125923, 1029429758) + + W(4, -1139074345, 992947237, 1053527378, 1046512564) + + W(5, -1093298645, -1112059294, -1120338466, -1110173889) + + W(6, 1021572063, -1137843051, -1096186409, 1056444330) + + W(7, 1051252577, -1109040755, -1148456803, -1113641375) + + W(8, -1117468940, -1125619459, -1144213939, -1138699933) + + W(9, 1023905988, 1033651817, -1133630411, 1032546877) + + W(10, -1119615974, 1000210819, -1121684346, -1129869365) + + W(11, 979248533, -1119537890, -1141776739, -1127458445); + sum2 = W(0, -1126549409, -1119263575, 1019494412, -1124834267) + + W(1, 1007192676, -1122284082, -1127647730, 1010488320) + + W(2, 1022573468, -1114028928, -1116443736, -1099458226) + + W(3, 1017309436, -1118444706, -1121042004, -1109973330) + + W(4, -1140490164, -1119708173, 1032492771, 1055169273) + + W(5, 1044169825, 1039787167, -1145097543, 1044065299) + W(6, -1114833146, 1024108770, 1037187519, 1055452063) + + W(7, -1106256028, -1112035826, -1125070906, -1121131548) + + W(8, 1021805248, -1146392599, 1026983682, -1095922228) + + W(9, -1122158651, -1118290710, -1129575364, -1121253833) + + W(10, 1003196031, -1113990054, 1006796332, -1126677517) + + W(11, 1022777176, -1130573089, -1123455912, 1019346120); + WS(1060265584, 1063714812); + sum1 = W(0, 1021450242, -1123065753, -1137050988, 1021294882) + + W(1, -1118036885, 1009531192, -1118665549, 992320459) + + W(2, -1128788280, 1022311347, 1028928107, -1140362288) + W(3, 1024570351, 1035626828, 1025121435, 1029327365) + + W(4, -1111111528, 999728711, -1119265827, -1137639751) + + W(5, -1101623708, -1106406289, -1128138578, -1106167682) + + W(6, 1039397963, -1108349889, 1026074151, -1102959280) + W(7, 1041760911, 1036057270, 1017790822, 1037788386) + + W(8, -1131322533, 1022720848, 1019796282, 1043907075) + + W(9, -1151514983, -1132552740, -1124857459, -1134343810) + + W(10, 1011253684, 974355898, -1136338948, 1025863841) + + W(11, -1136816500, 1027651305, -1139070668, 1024478285); + sum2 = W(0, 1023758242, -1124053764, 1000956747, -1122894214) + W(1, 999883691, 1004424067, -1150041590, 1009546537) + + W(2, -1130878336, 1017938673, 1028583936, 1031015106) + W(3, 994031846, 999096115, -1134395125, 1005722235) + + W(4, -1120671625, -1145138723, 1005174931, -1104963408) + + W(5, 1001047899, -1126082311, 1002728051, -1135618057) + W(6, 1027498838, 1027873020, 1053869854, 1073183850) + + W(7, 1053107786, 1035733703, 1022225293, 1023923178) + + W(8, -1144315403, -1122179044, -1093535895, -1074622268) + + W(9, -1094361386, -1114468554, -1123867271, -1126165581) + + W(10, -1131041230, -1127336921, 1009416925, 1033668418) + + W(11, 990806038, 992895974, -1131337021, -1138321549); + WS(1067199072, 981674447); + sum1 = W(0, -1113726457, 1018794771, -1115871564, 1008735750) + + W(1, -1125817867, -1125149117, 1001830321, 1016463803) + + W(2, -1105288827, -1113372949, -1095136821, -1102105679) + + W(3, -1103744656, -1108798620, -1137689902, -1099284685) + + W(4, 1040785760, -1099180638, 1050678235, 1052734745) + W(5, 1044281884, 1033282170, -1121192922, 1032013917) + + W(6, 1032552483, 1040483381, 1054762461, 1065065389) + W(7, 1044820043, -1097094388, 1010644366, -1124668651) + + W(8, -1124907206, -1114167068, -1107257499, -1097429016) + + W(9, -1104994292, 1027211226, -1114303237, 1015961570) + + W(10, -1139898486, -1127069135, -1118671794, 1035770473) + + W(11, -1122788302, -1125646899, -1123221796, -1114483455); + sum2 = W(0, -1124538541, -1139313841, -1119374292, 1010830983) + + W(1, 1020827626, -1125616841, 1008596323, -1136528497) + W(2, 1021627528, 989560618, 1014774531, 1026431506) + + W(3, -1118520012, 1018739126, 1014458179, -1129452021) + + W(4, -1127440395, 1028695072, -1102978428, -1087762234) + + W(5, 1027251929, -1109480314, 1023848922, -1132845253) + W(6, 1027300307, 1011050585, 1052523334, 1053318756) + + W(7, -1113197028, 1034265115, -1134832277, 1022117249) + W(8, 1020622605, 999023778, -1119874204, 1038687085) + + W(9, 1029469191, -1125820221, -1159659466, -1150305989) + + W(10, -1130074937, 1015498074, -1126569197, -1128231773) + + W(11, 1018969905, -1123965604, 1024095771, -1130502215); + WS(-1082327904, -1101742629); + sum1 = W(0, -1128942355, 1007179909, -1172666584, -1115398989) + + W(1, -1145393643, -1126910957, -1153678686, -1127439210) + + W(2, 994965062, -1124051394, 1029361427, 1047686461) + W(3, 1028067431, 1025118144, -1121058723, 1018201755) + + W(4, -1116531225, 1022855203, 1044526075, -1095192109) + + W(5, -1123739865, -1116394763, -1123838961, -1120454737) + + W(6, 1027052286, -1119991818, -1110346311, -1106285700) + + W(7, 1043850022, 1030498357, 1032721886, 1034890279) + W(8, -1127603958, 1015495233, 1026992677, 1037065627) + + W(9, -1119388383, 1013662917, -1119902562, 1021249778) + + W(10, 1021969515, -1129777663, 1017142675, -1113404469) + + W(11, -1130059467, 1016457699, -1131255799, 1017348651); + sum2 = W(0, 1014279427, -1131491186, 1026571161, 1042930837) + W(1, 1025279833, 993445038, -1133457551, 1014707171) + + W(2, -1130890464, -1137235679, -1097554196, -1073378768) + + W(3, -1092458255, -1130245298, -1120075373, -1135266971) + + W(4, -1129394348, 1024729347, 1048952496, 1073588633) + W(5, 1053816664, 1029166823, 1030737081, -1151431678) + + W(6, 1014973315, -1130557096, 1014221291, -1105375875) + + W(7, 1016092170, -1119905200, -1147013927, -1161525308) + + W(8, -1152876382, -1150339726, 1016692910, 1035354953) + + W(9, -1129170378, 1027382919, -1124802440, 979085751) + + W(10, 1004833335, -1141673719, -1159625756, -1124590261) + + W(11, 989537980, -1138326931, 1017200998, 1009604643); + WS(1067891072, -1132872541); + sum1 = W(0, 1025237873, 1014334757, 1030238795, 1029025473) + W(1, 1007260181, -1121222990, 988616634, -1123834925) + + W(2, -1122834636, 1020114356, -1105197551, -1121917372) + + W(3, 1038150728, 1024596920, 1024170379, 1015271403) + + W(4, -1117007553, -1118569017, 1049984500, -1114461425) + + W(5, -1084792932, 1039495098, -1124531763, 1027254009) + + W(6, -1114151507, 976917621, -1088419628, 1051488823) + W(7, 1053317187, 987986858, 1026119523, 993078101) + + W(8, 1031366707, 1039451348, 1049092386, 1008484089) + W(9, -1122288663, -1131993235, 1022258228, 999667975) + + W(10, 984935338, -1152616053, -1123762161, -1145922699) + + W(11, 1012164149, 1000937675, 1011615629, 1004174827); + sum2 = W(0, -1144599930, -1136232721, -1137221455, -1128645029) + + W(1, -1151231836, -1156748756, -1160295481, 1020235585) + + W(2, -1116760996, 1019759527, -1111064134, 1041961890) + + W(3, 1036336113, 1033905549, 1022489781, -1122711401) + W(4, 1032730282, -1125055029, 1047170030, 1050767412) + + W(5, -1093680614, -1120368706, -1115425401, -1132924343) + + W(6, 1032901632, 1027655943, -1115058011, -1130649513) + + W(7, -1116185015, -1105887613, 1012694293, -1122854992) + + W(8, 985378521, 1005941018, -1129460922, 1019394239) + W(9, -1115704458, 1024734935, -1147415538, 1018590227) + + W(10, -1129170670, -1131155477, -1129924333, 1022276751) + + W(11, -1148456898, 1015791177, -1140100033, 1012472897); + WS(1059709408, 1024973287); + sum1 = + W(0, -1126012593, 1004611460, -1134152138, -1114032340) + W(1, -1131432537, -1126017195, 991561039, -1124178504) + + W(2, 996494647, -1122880609, 1020165829, 1049657976) + W(3, 1014735450, 1032225804, -1115613273, 1022764985) + + W(4, -1119189544, 1030391200, 1046947367, -1094041084) + + W(5, 1018380112, -1112475533, -1123292160, -1122300512) + + W(6, 1028092109, -1114579692, -1111968183, -1106474499) + W(7, 1044458845, 1030371012, 1033263557, 1034006830) + + W(8, -1124577204, 1016065247, 1002304420, 1040268033) + W(9, -1114168306, 1023640576, -1119951545, 1017080432) + + W(10, 1023428420, -1127475101, 1021721777, -1114564220) + + W(11, -1128430825, 1019190017, -1127717781, 1017178441); + sum2 = W(0, -1125800395, 1011909323, -1116511680, -1103527255) + + W(1, -1120293374, -1148349175, 1015270352, -1125483347) + + W(2, 1024962708, -1143341847, 1052819438, 1076690943) + W(3, 1057154431, 990717342, 1025045202, 1023577182) + + W(4, 1023886172, -1144213999, -1097320166, -1072687587) + + W(5, -1091630497, 1012280135, -1118805022, 1016249338) + + W(6, -1122338700, 1018807292, -1115186882, -1098249874) + + W(7, -1117298851, 1010582795, 990164302, -1125194137) + + W(8, -1171853367, 1000330119, -1140479659, 1023038428) + + W(9, 1029135274, -1122270538, 1020995472, 1003973863) + W(10, 973576887, 984068764, 1001426319, 1007326739) + + W(11, -1135413483, 1008233815, -1129534965, -1140102471); + WS(1066253200, 1009489111); + sum1 = W(0, -1121073890, 1024085178, -1130104696, 1023275236) + + W(1, -1128214764, -1135799996, -1141685621, -1121776782) + + W(2, -1102592607, -1107707469, -1106598971, -1116780301) + + W(3, -1111680448, 1032050668, -1131519352, 1028096678) + + W(4, 1041243549, 1026388586, -1121496677, 1049328440) + + W(5, 1018012708, -1096674093, -1122519370, -1101864235) + + W(6, 1033075644, -1107162241, 1050587318, 1057262167) + W(7, 1027096831, 1040843700, 1028580820, -1117086438) + + W(8, -1123366116, 1004872905, -1113976447, -1116780079) + + W(9, -1113850464, 1015552212, -1123000331, 1040749648) + + W(10, -1130535196, -1145989825, -1120961954, 1023066500) + + W(11, -1121521086, 1008378152, -1124335656, -1135644600); + sum2 = W(0, 1013104936, -1117172679, -1127525498, -1135146700) + + W(1, -1122718205, 1027187072, -1119344027, -1128814570) + + W(2, -1106333857, -1121647283, 1031691189, 1030458550) + + W(3, -1111791784, 1009508096, -1127767754, 1037784205) + W(4, 1041445982, 1046293466, 1054977467, 1051999059) + + W(5, 1044523088, -1099681845, -1185033988, -1107269301) + + W(6, -1143812784, -1098323478, 1032669672, 1016608418) + + W(7, -1093831562, 1044831040, -1117227205, -1118967703) + + W(8, -1111423698, 1025469214, -1110723238, 1041509878) + + W(9, 1024902469, -1110045970, -1129401382, -1121987113) + + W(10, -1153458272, -1123006845, 1018478818, -1111394764) + + W(11, -1127833490, -1140396324, -1129041118, 1021194206); + WS(1055988095, 1068562120); + sum1 = W(0, 1025054956, -1130996743, 1024941510, 1026193148) + W(1, 1035977616, 1028839961, 1016648336, 1025665319) + + W(2, 1041692938, 1036197410, 1042434277, 1039706891) + W(3, 1036807045, 1029783554, 1019008523, 1026886128) + + W(4, 1040251631, 1032351834, -1100356972, -1093055992) + W(5, 1047058383, 1043491547, 1044781893, 1048991774) + + W(6, -1141880069, 1043619883, 1026325313, -1087193164) + + W(7, -1087410536, -1097265155, -1104034167, -1099060796) + + W(8, -1113980711, 1002392085, 1040781794, 1032223740) + W(9, 1047196369, 1027612260, 1036828166, -1113322870) + + W(10, 1011769695, 1020511427, -1124414423, -1163338101) + + W(11, 1015241743, -1129538215, 1018634427, -1180507092); + sum2 = W(0, -1109623534, -1119308996, -1108667787, -1107757858) + + W(1, -1126585969, -1144042093, 1004201965, 1024608852) + + W(2, -1098330057, -1121926087, 1031129408, -1111457821) + + W(3, -1091241820, -1092220222, -1112242725, -1104308532) + + W(4, 1051762285, 1045894429, 1029096148, 1060518836) + W(5, 1065480819, -1120268316, 1035423856, -1105721536) + + W(6, -1105318233, -1111281216, -1116694436, -1103134861) + + W(7, 1017373783, 1034184394, -1129342463, -1134873838) + + W(8, -1120494292, 1002875661, 1042501069, -1104320409) + + W(9, 1041202422, -1131983911, 1023586732, 1017105251) + + W(10, 1036873688, -1152598618, -1117793653, 1023528108) + + W(11, -1117516356, -1123122827, -1131565835, 1015347387); + WS(-1073915832, -1098883962); + sum1 = W(0, 1030932360, -1131602112, -1125999684, 1018154928) + + W(1, -1122265980, -1122315861, 1018577993, -1120663220) + + W(2, 1024626915, 1032898976, -1122370130, 1011274315) + + W(3, -1113904235, 1029902191, -1115348785, -1111387910) + + W(4, -1098288699, -1117814224, 1056014501, -1106175819) + + W(5, -1097956311, -1115232133, -1126206816, 1017122848) + + W(6, -1130380922, -1103847418, -1096397881, -1105327485) + + W(7, 1059753579, 1040451325, 1039068637, 1042824131) + W(8, 1033018229, 1040490540, 1031863906, 1044317365) + + W(9, -1098424101, -1122968848, -1136273729, 1021433455) + + W(10, 1021355752, -1120290302, -1131922448, -1139369119) + + W(11, -1121102372, 1033247800, -1121007738, -1143567007); + sum2 = W(0, -1118315643, -1126395193, 1014636591, -1138413531) + + W(1, 1013622247, -1129252552, -1122713894, -1131632742) + + W(2, 1046909717, 1025514844, 1026986728, -1112879553) + + W(3, -1122377333, -1114813410, -1144089687, -1140190295) + + W(4, -1100980148, -1112473945, 1040985677, 1027746798) + + W(5, 1051150943, 1057006779, -1138866101, 1035155819) + + W(6, -1129801877, 1030594060, -1101816290, -1094641835) + + W(7, 1041324898, -1106485598, -1115757412, 1016695560) + + W(8, -1122172234, -1111527676, 1023795758, 1027030278) + + W(9, -1113671456, -1133454616, 1001347783, -1139893847) + + W(10, 1010610423, -1135186433, 1030475382, -1121095046) + + W(11, 1015864126, 999966259, -1127643580, -1124292419); + WS(1058139200, 1067005003); + sum1 = W(0, -1131277347, -1128484819, 1014594990, -1112913457) + + W(1, 1028891109, -1118434446, -1125459884, -1151139539) + + W(2, -1103206155, -1112832909, -1108301118, -1111811582) + + W(3, -1113179152, -1100757638, -1116265771, -1119227733) + + W(4, 1015263811, -1111332237, 1004409248, 1031432468) + W(5, 1057408386, 1053177162, 1035803645, 1052437131) + + W(6, 1048238354, 1044526364, 1056580635, -1114379264) + + W(7, 1025802539, -1104236495, -1124751623, -1121289813) + + W(8, -1106607621, -1104751670, -1107635783, -1107787999) + + W(9, 1020725062, -1114959053, -1129296244, -1120069105) + + W(10, -1122297405, -1124214159, 1017238947, -1110162105) + + W(11, 1002560364, -1113724157, -1123244163, -1115210511); + sum2 = + W(0, -1121057595, 1016432775, -1112629457, 1031151061) + W(1, -1126819952, 1021228732, 1009585899, -1127402608) + + W(2, 1023292260, -1124268922, -1116199054, -1110795130) + + W(3, -1103006120, -1134369449, -1120667087, 1016333862) + + W(4, -1111891368, 1029388268, -1123843875, 1054055516) + W(5, 1020478730, -1135412321, 993043492, 996548072) + + W(6, 1035766063, -1118389960, 1033023394, 1054507013) + W(7, -1104664187, -1121918429, 982538191, -1108029597) + + W(8, 1032208606, -1122510207, -1113138713, 1019595258) + W(9, -1114598707, 1023542227, 993709924, -1129671373) + + W(10, -1122161823, 1028529468, -1118001871, 1026811027) + + W(11, -1116707921, 1018790016, -1146185904, -1152045912); + WS(-1091835967, 1038509097); + sum1 = W(0, 1002958335, 1031236696, -1122124314, 1034973544) + + W(1, -1120202292, 1015820538, 1033636218, -1121967836) + W(2, 1041562959, 1038000620, 1042029266, 1034473413) + + W(3, 1040514695, 1042048878, 1029242778, 1042898689) + W(4, -1124369824, 1029288008, -1104824881, 1049571184) + + W(5, -1089024373, -1089426268, -1107643796, -1091407169) + + W(6, -1093861855, -1100002304, -1087002164, 1052741396) + + W(7, 1044570703, 1042398171, 1023923250, 1030369604) + W(8, 1039801017, 1038306915, 1037725118, 1041619283) + + W(9, 1031782758, 1038671876, 1025678032, 1043274836) + + W(10, -1132073944, 1032364667, -1122032128, 1036587856) + + W(11, -1138629744, 1010685600, 1027566834, -1153060031); + sum2 = W(0, 1016791821, 1013192458, -1123883287, 1015528749) + + W(1, -1131825297, -1132736674, 1019880949, -1117749640) + + W(2, -1110538641, 1023566599, 1000152052, -1112636904) + + W(3, -1116367667, -1130121037, -1114305335, 1033496981) + + W(4, -1114409767, -1113846927, -1109610416, 1054155729) + + W(5, -1109117303, 1016394661, 1030290683, -1157815185) + W(6, 1045544748, 1029704719, 1034122997, 1054056541) + + W(7, 1030522539, -1116026974, 992430312, -1107824982) + + W(8, -1111278622, -1125617069, -1110039103, -1108588248) + + W(9, -1132906850, -1135451026, -1134728538, 1016739853) + + W(10, -1127655665, 1012727082, -1131212885, 1000573476) + + W(11, -1123699129, -1126704253, 988228497, -1123545619); + WS(-1089794384, 1053743764); + sum1 = W(0, 977181200, 1022034304, 999195154, -1173982736) + W(1, -1131079992, -1117753925, 1011212627, -1131210280) + + W(2, -1124062435, -1123589329, 1008939465, 967031199) + W(3, 1031201254, 1038421055, 1023184516, 1042888267) + + W(4, 1045792382, 1045687772, 1043214811, 1051096635) + W(5, -1106608558, -1104815387, 975115280, -1113606040) + + W(6, -1096122984, -1100185218, -1089534650, -1128424615) + + W(7, 1022835035, 1033659882, 1025453978, -1114474998) + W(8, 1035356461, 1029423919, 1037071922, -1130937578) + + W(9, 1033635179, -1112564552, -1148354182, -1115626236) + + W(10, -1146772642, 1022712508, 986801416, 1009586465) + + W(11, 1012941585, 1008485953, 1023442138, 1020831720); + sum2 = + W(0, 1025756371, -1152866061, 1022411083, 1021941035) + W(1, 1018807869, -1162131898, 1030478695, -1117519259) + + W(2, 1038582018, -1136865390, 1028991375, -1102180290) + + W(3, -1127151005, -1103213990, -1148467374, -1101731450) + + W(4, -1095235720, 1027392563, -1094985700, 1062263751) + W(5, 1023141397, 1032099771, -1139645537, 1050734432) + + W(6, -1097242729, -1108446917, -1092586393, 1055770556) + + W(7, 1024429444, 1045256871, -1115314196, 1044538365) + W(8, 1048188946, 1009460859, 1013992713, -1098658877) + + W(9, -1162587434, -1104388978, 1033707193, -1097902791) + + W(10, -1145714840, 1022755591, 1033471029, 1018658879) + + W(11, 1032105067, -1135748387, 1008760659, 1019361371); + WS(-1092258911, -1089815334); + sum1 = W(0, -1139634407, -1147044493, 1017866535, -1130890003) + + W(1, 1027130446, 1016637069, 1016500860, 1026913637) + W(2, 1045821685, 1034203057, 1044613801, 1035729090) + + W(3, 1029847032, -1102281888, -1131657511, -1121093118) + + W(4, -1119357002, 1015718019, -1104936487, -1090722234) + + W(5, 1052628837, 1057948020, 1044884520, 1053191568) + W(6, 1034361159, 1051668019, 1032636456, -1086460492) + + W(7, -1094763749, -1095562645, -1106362539, -1098187032) + + W(8, -1137425069, -1115826689, 1035725147, 1010439579) + + W(9, 1043916211, -1114240161, 1035145793, -1102258214) + + W(10, -1130196683, 1023465264, 1015877619, -1117806874) + + W(11, 1025832214, -1123952546, 1029577644, 1011300119); + sum2 = + W(0, -1128703433, 1012757410, 1007787634, 1023396449) + W(1, 1017560605, 1012244530, -1143125556, 1024938353) + + W(2, -1113670137, 1024378351, -1111552612, -1131966925) + + W(3, -1114649634, 1040107467, -1122758108, 1032092801) + W(4, 1038503213, -1102109659, 1041322248, 1061982310) + + W(5, -1104769108, -1098175974, 1026757617, -1105089779) + + W(6, 1040684472, -1100043210, 1041989415, 1066879681) + + W(7, -1101238360, -1089047102, 1023979343, -1100231891) + + W(8, -1099552212, 1026887817, -1104965070, -1107386978) + + W(9, -1104448348, 1016563929, -1107880053, 1036400101) + W(10, 993905065, 1003273444, -1144755940, 1019447753) + + W(11, 1019550873, 1024790159, -1138003290, 1014825186); + WS(-1086002512, -1084928660); + sum1 = W(0, -1131929465, 1017039401, 1020173741, -1134499377) + + W(1, -1129468257, -1138890277, -1132813939, -1143488833) + + W(2, -1115340892, -1113717074, -1102662114, -1097984585) + + W(3, 999020755, -1114393442, 1028151894, -1132982017) + W(4, 1050776738, 1042912513, 1053707114, 1055825787) + + W(5, -1108349590, 1033511359, -1108075378, -1138015505) + + W(6, -1103148388, 1010821235, -1104889757, 1049797700) + W(7, 1042555580, 1041826234, 1032548919, 1041074464) + + W(8, -1143173743, -1128281895, -1111952748, -1098746549) + + W(9, -1106170922, -1103706929, -1127274842, -1110410462) + + W(10, -1130358353, -1122997746, -1139775729, -1117617116) + + W(11, 1014563169, 1010736529, -1137959865, -1123442568); + sum2 = + W(0, 1000102260, -1126633986, -1144291760, -1119827470) + W(1, 1016232493, 1023762781, -1141218312, 1025293286) + + W(2, -1118205558, 1029900308, -1140439712, 1041420529) + W(3, 979974721, 1006017080, -1137204884, 1002898232) + + W(4, 1051204856, 1063980475, 1066576503, -1075398805) + + W(5, -1098954833, -1114660078, 1021034547, -1117648930) + + W(6, 1041999613, 1037162912, -1167793729, -1085093765) + W(7, 1017117707, 1033481679, -1137323924, 1002077480) + + W(8, -1135930364, -1134402620, 1020225891, -1133429672) + + W(9, 1028764817, 1018587503, -1132685152, 1026769102) + + W(10, 1019347314, -1137379540, 1012525172, -1123883092) + + W(11, -1139078108, 990744512, -1137103964, 1022792204); + WS(-1084377664, 1027078466); + sum1 = W(0, 1028609881, -1129675619, 1022038247, 1010908070) + W(1, 1026611885, 1030113110, -1132403564, 1031201461) + + W(2, 1023875764, 1032239673, 1043647401, 1024809124) + + W(3, -1131519215, -1128260416, 1026951667, -1115829469) + + W(4, -1113303893, -1114746833, -1084547118, -1156932707) + + W(5, 1050946557, 1047159719, 1020650755, 1037768177) + W(6, 1031700430, 1026119482, 1051850662, -1104988345) + + W(7, -1083734174, 1027361605, -1111325382, 1042797655) + + W(8, -1138486380, -1122216064, -1132199979, 1033025459) + + W(9, 1043104191, 1018468419, 1014159432, -1108759836) + W(10, 1025866053, 1016759071, 1032834541, 1029398137) + + W(11, 1007914454, 1021351811, 1014779182, 1018395723); + sum2 = W(0, -1123315648, -1139192721, -1123875414, 1030816333) + + W(1, -1123326315, 1016532474, -1130303816, -1131093304) + + W(2, 1034590473, -1139997869, -1115134908, 1008365317) + + W(3, -1110803030, 1028786615, 1004595129, 1015409534) + W(4, 1026150549, 969235347, 1042477897, 1046059603) + + W(5, -1155992274, -1122978967, -1119026545, -1111529432) + + W(6, 1030314237, -1112534256, 1037120039, 1027803789) + + W(7, -1102080708, -1135183209, -1120008811, -1109468455) + + W(8, -1113579452, -1158492453, -1098184684, 1032895251) + + W(9, -1126452160, 1047700729, 999072441, 1047312851) + W(10, 1014056813, -1132289868, 1018097190, 1032146261) + + W(11, -1114996104, 1011346781, -1115756663, -1122411639); + WS(1043014014, -1087744322); + sum1 = W(0, 1027952711, 1020523582, 1002118409, 1021435438) + W(1, -1138414444, 1003104545, 1021955143, 1013303243) + + W(2, -1123526530, -1138321214, -1110182129, 1032339433) + + W(3, 1035386835, -1137297015, -1119534945, -1108462406) + + W(4, 1008337244, 1022379422, 1057818171, -1110004232) + + W(5, -1083267015, 1007303460, -1115695919, 1036022088) + + W(6, 1035059438, -1120526792, -1086364127, -1118604170) + + W(7, 1057062103, 1036507098, 1038869533, 1036993862) + W(8, -1116610857, 1037330895, 1040407761, 1040846837) + + W(9, -1107059287, -1128420830, 1017909759, -1142792885) + + W(10, 1026995059, -1156487121, 1009757724, -1153254001) + + W(11, 1021783366, 1034169880, -1138765796, 1027514579); + sum2 = + W(0, -1129567658, -1156092221, -1123579911, 1008715269) + W(1, -1125160256, 1026524337, -1137738801, 1017463077) + + W(2, -1117738234, -1131091573, -1115037144, -1115010115) + + W(3, -1101087826, 1015586863, -1113332277, 1019720419) + W(4, 1034622765, -1128107850, 1017778197, 1050631618) + + W(5, 1045062017, -1095615964, 1020345131, -1101223712) + W(6, 1030957051, -1118479197, 1041246295, 1054490255) + + W(7, -1117275788, 1025836507, 1027688491, -1120912313) + + W(8, 1029443337, 1005860107, -1110926783, -1157251077) + W(9, -1108959270, 1020928095, -1122972204, 983922363) + + W(10, -1132249217, 969559787, -1132354551, 1024782459) + + W(11, 998415619, 1012990901, -1161032699, 1011300789); + WS(1054959103, -1083836992); + sum1 = + W(0, -1144415487, -1132171248, -1133053416, 980216825) + W(1, 1018609320, 1017404666, -1127349353, -1154920122) + + W(2, 1023853723, -1118146184, 1039313221, -1107434917) + + W(3, 1027331294, -1111685883, -1129432820, -1126004808) + + W(4, -1123173616, 1018672592, -1090265426, 1057912142) + W(5, 1044087220, 1046306952, -1133031712, 1039769364) + + W(6, -1121737045, -1123041056, -1161038260, 1053211380) + + W(7, -1089253699, 1036559366, -1110305609, 1035028498) + + W(8, -1124541667, -1120856711, 1034290206, -1109871174) + + W(9, 1040602782, -1113560292, -1148202051, -1109624712) + + W(10, 1017937944, -1131928876, 1027902624, -1129566080) + + W(11, -1121689404, 1010241568, -1124796708, -1127304152); + sum2 = + W(0, 1018338338, -1139755941, 1025801614, -1122001203) + W(1, 1010171613, -1132321439, 1016232007, 1013259501) + + W(2, -1110884586, -1129406227, -1108237236, 1037408540) + + W(3, 1040327756, -1123391437, -1129086463, -1132299663) + + W(4, -1136808221, -1118232763, -1115176435, -1113269344) + + W(5, -1105808410, -1110685464, -1168436052, 1021765462) + + W(6, 1026570078, 1021052542, -1114736489, 1041519115) + + W(7, 1047951095, -1115014721, -1113536502, -1123780081) + + W(8, -1136194357, 1020321772, 1048907220, 1046652366) + W(9, 1035402835, -1111631696, 1026410761, -1102273277) + + W(10, -1120452457, 1004896115, -1119060361, -1107310106) + + W(11, 1033649944, -1126977687, 1026056519, 1027695361); + WS(1057903600, 1067139767); + sum1 = W(0, -1130009103, 1012511358, -1137761926, -1124300975) + + W(1, 1008012526, -1117250069, -1162310274, -1115259538) + + W(2, -1107061793, -1111029392, -1104154592, -1103367501) + + W(3, 1009980758, -1113394087, 1026647906, -1114684710) + W(4, 1043136838, 1028125200, 1051222763, 1060113955) + + W(5, 1024983063, 1016245011, -1117690736, 1025498192) + W(6, -1102099951, 992918145, -1106189879, 1050079197) + + W(7, 1048697267, 1034440698, 1041096574, 1041574755) + + W(8, -1115498357, -1117969679, -1104467697, -1108729546) + + W(9, -1111781863, -1104780916, -1117169584, -1120787335) + + W(10, -1123992244, -1145160172, -1125356943, -1117739880) + + W(11, -1165450354, -1114615608, -1139440358, -1118506516); + sum2 = W(0, 1011962608, -1120699406, 1007214128, 1026799984) + + W(1, -1145867047, 1029224075, -1116434980, 1022659583) + + W(2, 1038855085, -1131692032, 1038467393, -1112343444) + + W(3, -1113849522, -1138735380, -1120001434, -1118784264) + + W(4, -1122245118, -1112627842, -1122221782, -1106597578) + + W(5, 1026356645, -1114988845, 1020413259, -1123194990) + + W(6, -1126451804, -1110442893, 1042136464, 1054959792) + W(7, 1031831390, 1046305460, 1008343524, 1047204017) + + W(8, -1106947014, 1008646548, -1113116412, -1109079281) + + W(9, -1109881757, -1109725169, -1123390380, -1114571313) + + W(10, 1026864717, -1123527836, 999106843, 1011333214) + + W(11, -1117815384, 1032922020, -1119479700, 1000599735); + WS(-1101058110, 1058505899); + sum1 = + W(0, 1008909245, 1015324303, -1151491221, -1139701245) + W(1, 1009250957, 1031005836, -1131857944, 1009310828) + + W(2, -1123852394, -1112299044, -1128516275, -1102217815) + + W(3, -1104833467, -1113648843, -1119585353, -1121962739) + + W(4, 1037579744, 992056181, -1098678002, 1045087960) + W(5, 1058072040, 1036986521, 1024521495, 1032839164) + + W(6, 1024016996, -1124723744, 1056678303, 1021619781) + W(7, -1095085849, 1041104615, -1129179587, 1025292595) + + W(8, -1121096497, -1131134189, -1109419958, -1098726544) + + W(9, -1109643932, -1121889031, -1127292144, 1017031326) + + W(10, 1022516663, -1130344395, 1021039583, -1132041247) + + W(11, -1140161581, 1026848535, -1125244995, 1005622747); + sum2 = + W(0, 992821683, 1017011277, 1011814063, 999390943) + W(1, -1121326865, -1128713872, -1144442308, -1138094133) + + W(2, 1023078017, -1140746852, 959773689, -1112338116) + W(3, 1035095219, 1024063493, 1023948090, 1032024680) + + W(4, 1023704638, -1139829185, -1107874815, -1113377165) + + W(5, -1108229047, 1033653422, -1133039950, -1140161389) + + W(6, -1112719649, 1034266306, 1032357888, 1057654961) + W(7, 1012152366, -1122811174, 1020168485, -1113617432) + + W(8, 1027637582, -1119668356, -1113625790, -1086342498) + + W(9, -1101720659, -1136544465, -1121720102, 1019370668) + + W(10, 1021903376, 1016759855, 1034398452, 1051594315) + W(11, 1035110699, 1031049449, 1018271190, 1021410443); + WS(-1098338111, 1023498750); + sum1 = W(0, -1138701849, 1022617405, -1168316434, 989875557) + W(1, 1014043241, 1028694643, 1012653195, -1123740578) + + W(2, -1121510465, -1112416999, -1180399652, 1037491593) + + W(3, -1102495433, -1121884733, -1122132512, 1036415429) + + W(4, 1040383254, 1041395124, -1097293461, -1094988553) + + W(5, 1059563452, -1118903112, 1027507886, -1126796061) + + W(6, 1010706669, -1110248019, 1058209828, -1095385553) + + W(7, -1097437690, 1046085501, -1130301425, -1128488725) + + W(8, -1149170878, 1020223099, -1106291545, 984962665) + + W(9, -1119747464, -1122110350, -1123237039, 989067449) + + W(10, 1021863397, 1016227961, 1018489101, 1021609037) + + W(11, -1145092370, 1026243438, 992349061, -1164474953); + sum2 = W(0, -1164068108, -1120898035, 1013829433, 972583599) + + W(1, 1016837545, -1128855117, -1120505887, -1130000643) + + W(2, 1029420636, 1036981221, 1035658823, 1039879187) + W(3, -1102894380, -1128781606, 1013125305, 1008550341) + + W(4, -1112077059, -1106651210, -1106432341, -1108370344) + + W(5, 1027609202, 1035930993, 1026372950, 1017927697) + W(6, -1110964317, -1141413971, 1035230257, 1044568046) + + W(7, 1041920264, -1110600858, -1125837008, 1019426133) + W(8, 1028276360, 1028557670, 1017251513, 1026800348) + + W(9, -1124056839, 1023713154, -1132458450, -1129712212) + + W(10, -1148255859, -1129246253, -1131530425, -1126980980) + + W(11, -1121785123, -1124361549, 1009116645, -1127748642); + WS(1064075168, -1085260633); + sum1 = W(0, 1020097830, 1004279127, 1028112237, 1026532451) + W(1, 961537748, -1130706656, -1135275149, 994604265) + + W(2, -1104011171, -1113516590, -1103036207, -1102348791) + + W(3, -1120389361, 1038928889, -1129088474, 1031068695) + W(4, 1041894009, 1036855277, 1045116105, 1027716980) + + W(5, -1096376936, -1096709211, 996846509, -1102472071) + + W(6, 1034297780, -1106861493, -1112331770, 1058153819) + + W(7, 1043898973, 1048347472, -1164071066, -1118081687) + + W(8, -1113914242, 1031661466, 1017072798, -1100287056) + + W(9, -1119328440, 1032287909, 1019059671, 1040008649) + + W(10, -1138151419, -1151789197, -1129921062, 1026370483) + + W(11, 1025128111, -1140511211, -1167790389, -1140610427); + sum2 = + W(0, 1014633478, -1138053524, -1140130516, -1120104172) + W(1, 1012698026, -1123853902, 1013120742, -1133638700) + + W(2, 1033869831, -1133342724, 1003291347, 1033872317) + W(3, 1006625651, -1121932314, 1004319943, 975983930) + + W(4, -1114935893, -1126366588, 1043822142, -1086850225) + + W(5, 1030145358, 1040015383, -1150276254, 1028724356) + + W(6, -1111281510, -1128243618, -1107071420, -1084734921) + + W(7, 1067004758, 1046081705, 1002809783, 1031930717) + W(8, 1016908923, -1145483991, 1006526455, 1045857736) + + W(9, 1020093203, -1118176982, -1161820989, -1104669476) + W(10, 990649086, 996067150, 1012165262, -1119271334) + + W(11, -1129772456, -1144422823, -1141799647, -1143148999); + WS(-1102756414, -1120030182); + sum1 = W(0, -1114857721, 1007304745, -1113282502, -1135708169) + + W(1, -1130226572, -1115825727, -1142173365, -1136250680) + + W(2, -1102767115, -1115971707, -1095987354, -1101194087) + + W(3, -1099422340, -1109693396, -1122657804, -1100198174) + + W(4, 1043961809, -1101676399, 1053600405, 1065521342) + W(5, 1046696200, 997946179, -1112668237, 1025742970) + + W(6, -1124210446, 1028311347, 1046261656, 1068133511) + + W(7, 1042816511, -1098872988, -1133742993, -1113205915) + + W(8, -1117667780, -1113057893, -1103902228, -1095174540) + + W(9, -1101926052, 1020822452, -1113901869, -1140085511) + + W(10, -1120413918, -1125847632, -1114932345, 1027169042) + + W(11, -1125196828, -1114420141, -1121096548, -1110338975); + sum2 = + W(0, 1022430051, -1126993785, 1027244407, -1136699515) + W(1, -1115987625, 1024283610, -1120680273, -1137060371) + + W(2, -1119384349, -1117046041, -1113440146, -1099757367) + + W(3, 1038214736, -1152620459, 1013074835, 1015902709) + W(4, -1127804673, 1014745875, -1129639049, 1065124465) + + W(5, -1113943514, -1130768777, -1113854522, -1112590762) + + W(6, 1011315171, -1106680506, 1020590601, 1034863647) + W(7, 1035633573, -1116743921, -1127682237, 1012706011) + + W(8, -1121146859, 1009189211, -1115604020, -1111752578) + + W(9, -1124061319, -1120130033, -1148961286, -1113078716) + + W(10, 1009328259, -1125156477, 1000906166, 1017063165) + + W(11, -1117062841, 1024425511, -1119191933, 1024105421); + WS(-1077988040, -1098609713); + sum1 = W(0, 1020152245, 1002836371, -1132015209, 1010566313) + W(1, 1010053945, 1020486319, 1030571159, -1132252076) + + W(2, 1040202093, 1027276515, 1049452385, 1032875267) + W(3, 1036663212, 1034908465, -1114877050, 1038229775) + + W(4, -1114910389, 1037438449, -1100627367, 1030654167) + + W(5, -1113362709, -1095852652, 1025947458, -1105166249) + + W(6, -1104283732, -1105856183, -1098108347, -1102427678) + + W(7, -1104090280, 1048934265, -1126406169, -1138504825) + + W(8, 1039298438, 1028081393, 1024125142, -1131705423) + W(9, 1042947115, 1019783717, 1021770278, 1035155325) + + W(10, -1125387021, 1013826369, 1018674229, 1017725301) + + W(11, -1141124531, 1015682181, 1006527587, 1016700365); + sum2 = + W(0, -1142267837, -1139598567, 967895252, 1008554815) + W(1, -1145406733, 1024473654, -1120792990, 1020245499) + + W(2, -1132626695, -1138132903, -1131100139, 1032537913) + + W(3, -1112410791, 1023361695, -1130141475, 1015868043) + W(4, 1023852808, 1033876712, 1012220303, -1095058264) + + W(5, -1110252153, -1104029741, 1031843465, -1196949327) + + W(6, 1039691642, -1110747637, 1061640184, 1037101696) + W(7, -1091888991, -1161200181, -1119701362, 977066218) + + W(8, -1114839167, -1127924867, -1122160974, 1049832068) + + W(9, 1027903240, -1126050715, -1132864167, -1118663498) + + W(10, -1147709133, -1155304986, -1121532798, 992067354) + + W(11, -1128735955, 1025251228, -1157362170, 1009680879); + WS(1050097375, -1144620389); + sum1 = + W(0, -1128715377, 1011057731, -1134438475, 1019193873) + W(1, 1022275225, 1009602711, 1032927683, -1136253330) + + W(2, 1044211901, 1033637193, 1048874276, 1034983634) + W(3, 1042543166, 1017343103, -1149239594, 1037918654) + + W(4, -1107032850, 1022227585, -1096643663, -1114392598) + + W(5, -1092702793, -1097617495, -1139445891, -1097624368) + + W(6, -1104047251, 1028225817, -1092741994, 1036584266) + + W(7, 1025153498, 1028276325, -1142331733, -1118553533) + W(8, 1044522647, 1008625791, 1044273989, -1113959235) + + W(9, 1042105869, 1041046056, 1025445761, 1045159742) + W(10, -1128884681, 1011579579, 1018667761, 1025301657) + + W(11, 1024565397, -1123355105, 1027058603, 1015982985); + sum2 = + W(0, -1125113091, -1172679141, -1152889753, -1126905475) + + W(1, 1006649294, -1117488938, 1019337735, -1172252645) + W(2, 1033296420, -1128383915, 1019343075, 1031861037) + + W(3, 1038268779, -1113645001, 1028739248, -1120621430) + + W(4, 1004274749, 1016688407, -1103752812, -1097163724) + W(5, 1062746712, 1051715314, -1124587947, 1035778038) + + W(6, -1102915066, 1037338868, -1089888071, -1090194983) + + W(7, 1041005261, -1118523722, 1027501384, -1154227385) + + W(8, 1028147108, -1144981581, 1030353331, -1121675098) + W(9, 1019962267, 1006661278, 1023891692, 1013440998) + + W(10, 1017918867, 991135609, 1022111055, -1117974458) + + W(11, 1026630548, -1115156997, 1018829507, -1136345798); + WS(-1098112447, -1125216946); + sum1 = + W(0, 1010417568, 1006322176, -1140685064, 1017991280) + W(1, -1126853432, -1146614696, 1004323364, 1019294567) + + W(2, -1112042689, -1119953952, -1116450322, -1112784281) + + W(3, -1105219289, 1023451023, -1153582112, -1110071894) + + W(4, 1004161984, -1112614584, -1090400895, -1113885864) + W(5, 1051228911, -1126136276, 1016683168, 983864831) + + W(6, 1041397655, 1018267345, 1058507679, 1050855349) + W(7, -1097135921, -1122997076, -1149231824, 1012208560) + + W(8, -1114418485, 1024614191, -1104618697, 1027345269) + W(9, 1037685358, 1039454784, -1140802306, 1030420791) + + W(10, 1021110568, -1137912440, -1127130192, 1019594896) + + W(11, -1116355204, 987030015, 988441407, -1149650752); + sum2 = + W(0, 1035141604, -1134943159, 1031353634, -1120750640) + W(1, 1025254666, -1114509439, 1028756554, -1113538005) + + W(2, -1152950404, 1010887850, -1122370025, -1101926585) + + W(3, -1104605825, 1035852377, -1114959163, -1148680422) + + W(4, -1100547860, 1025641276, -1101375945, 1057546844) + W(5, 1048526282, -1108641700, 1033576762, 1037895648) + + W(6, -1104792229, -1119115361, -1100366049, 1042205116) + W(7, 1042952063, 1039483674, 1030776026, 1041006229) + + W(8, 1037875812, 1021611837, 1015511489, -1109199537) + + W(9, -1105787312, -1109299079, -1143994351, -1107406256) + + W(10, -1138078871, 1016878804, 1015482099, -1140091328) + + W(11, 1032201266, -1132053601, 1007578680, -1124574444); + WS(1027823864, 1049081066); + sum1 = W(0, 1012131718, -1130128499, 1020918007, -1147020173) + W(1, 1027080278, 1020448605, 1012408472, 1024273997) + + W(2, 1045092167, 1032126573, 1043895467, 1034108246) + + W(3, 1035993776, -1104998342, -1129990263, -1123698278) + + W(4, -1108779609, -1127539283, -1101697729, -1094495268) + + W(5, 1047905888, 1057437086, 1041161122, 1052750589) + W(6, 1031292119, 1050389010, 998802811, -1089568357) + + W(7, -1096020979, -1097851474, -1105719893, -1101687913) + + W(8, -1168425160, -1113259988, 1036534559, 1026355631) + + W(9, 1044370080, -1112197645, 1035394325, -1103284196) + + W(10, -1123848398, 1017365039, 1015583683, -1120941186) + + W(11, 1024649726, -1123264842, 1027345300, 966350543); + sum2 = W(0, 1023788251, 982227227, 1006064903, -1119196045) + W(1, 1022374646, -1139981943, 1006364327, -1130167410) + + W(2, -1115639872, -1150596845, 1015277390, 1006169559) + + W(3, 1034991246, -1110295858, -1148612007, -1131776900) + + W(4, 1042970848, 1046665761, 1037025188, -1080187678) + W(5, 1047303977, 1052132899, 1033799186, 1035867784) + + W(6, 1041100183, 1053945290, -1128807740, -1078236510) + W(7, 1053440142, 1058969577, 1029728171, 1005561207) + + W(8, -1120506242, -1121383639, 1017634434, 998104205) + + W(9, 1037533712, -1112892146, 1033828190, -1110685126) + + W(10, -1214633659, -1143798279, 1023282498, -1114831851) + + W(11, 1023435593, -1124500500, 1015376522, 1014287051); + WS(-1088189216, 1043089281); + sum1 = W(0, -1140748743, -1121398410, 1026065228, -1149048846) + + W(1, 1020127051, 1036138263, -1128555260, 1038047047) + W(2, 1049770738, 1041680407, 1048806759, 1043836791) + + W(3, 1034371336, 1024770968, 1037337862, -1137213911) + + W(4, -1088213532, -1096476823, -1084369379, -1108723571) + + W(5, 1036745881, 1041309981, 1011670631, 1028379642) + W(6, 1042644994, 1047024381, 1050638745, -1101872740) + + W(7, -1094226627, -1096464492, -1106071481, -1102959693) + + W(8, 1041481493, -1107153702, 1035104215, 1041455384) + W(9, 1040593116, 1045395982, 1040128941, 1016152290) + + W(10, 1008330103, 1031833588, 1020682979, 1026205458) + + W(11, 1006701623, -1120876186, 1024336228, 1028740542); + sum2 = + W(0, -1129618765, 1004169862, -1163586391, 1012667011) + W(1, 1004789414, 1006867267, -1148564598, 1023888443) + + W(2, 1028575755, -1114659435, 1025093243, -1110197183) + + W(3, -1140439179, -1110600679, 1019765437, -1106146141) + + W(4, 1018469269, 1020493837, -1120959857, 1045602812) + W(5, -1119013866, 1007539419, -1126956769, 1005311510) + + W(6, -1109549862, 1025826707, 1018231561, 1050121196) + W(7, 1009644755, 1038682513, 1017107081, 1040899502) + + W(8, 1022763469, -1116607191, 1017295613, -1110473213) + + W(9, -1143499366, -1107944702, -1123266847, -1114206671) + + W(10, -1123897489, -1153284395, -1131222849, 1019030581) + + W(11, -1125130849, 1009132883, 1006886939, -1127991173); + WS(-1082596160, 1043517696); + sum1 = W(0, 1028909798, -1153492953, 967933898, 1019116907) + W(1, -1132218099, -1129844133, 1011144392, 999709323) + + W(2, -1154689545, 1032486047, -1128767583, 1037703496) + + W(3, -1115978788, 1027446332, -1146849277, -1114471573) + + W(4, -1104169149, -1117361518, 1049441299, -1087176649) + + W(5, -1115448907, 1018413135, -1154137305, -1130624603) + + W(6, 1025985531, -1118729126, 1031994846, -1089391227) + W(7, 1056185125, 1032249225, 1036574828, 1042483386) + + W(8, 1029418231, 1034798166, 1006099469, 1046438773) + W(9, -1106352341, 991686873, -1131200252, 1022706506) + + W(10, 1024711058, -1125485751, 1008177302, 986175666) + + W(11, -1125946755, 1030151438, 1006963806, 1019997155); + sum2 = W(0, 987806561, 1024001581, -1132313544, -1138298076) + W(1, 1003342504, -1131321664, 1015369606, 1016585830) + + W(2, -1103978637, -1138508200, -1113576691, 1037869389) + + W(3, 1041829747, -1102361734, 995389041, -1103350916) + + W(4, 1039699877, -1109586411, -1109697688, 1004783368) + + W(5, -1101655248, 1023850463, -1123567230, 1033013495) + W(6, 1032745921, 1036006171, 1041044376, 1057490671) + + W(7, -1120283284, -1118937466, -1137071480, 1023653113) + + W(8, -1106709297, 984628641, -1135869680, -1123523352) + + W(9, 1037905909, -1133053924, 1008345900, -1123775069) + + W(10, 1017765222, -1151113585, -1128136972, -1126668402) + + W(11, -1142518888, -1119975873, 1015435786, -1153724113); + WS(1066487464, -1081148228); + sum1 = + W(0, -1156758047, -1115667101, 1032245054, -1120457266) + W(1, 1025970038, 1028999903, -1129094085, 1031685286) + + W(2, 1045622655, 1039828621, 1043227835, 1051174634) + W(3, 1038187760, 1035936180, 1028125692, 1038703027) + + W(4, -1121579186, 1016276052, -1107379682, -1081882686) + + W(5, -1114502289, -1110627634, 1032926021, -1130240468) + + W(6, 1016497454, 1034243041, -1125065087, -1081200961) + + W(7, -1121918554, -1121754318, -1138807120, -1128847308) + + W(8, 1026804003, -1129971954, 1036683903, 1050524446) + W(9, 1033938044, 1045144826, 1028558202, 1036935449) + + W(10, 1025331662, 1022570928, 1033790001, 950141917) + W(11, 1035886143, -1123227402, 1023360168, 1033245399); + sum2 = W(0, 990406129, -1109318105, 1032976305, -1108583903) + W(1, 1009651516, 1027268958, -1109295843, 1034160517) + + W(2, 1013162021, 1040565489, -1125670617, 1041812300) + + W(3, -1123723867, 1031967357, 1033460548, -1113698315) + + W(4, -1111903383, -1098989385, -1114873385, 1042303391) + + W(5, 1001920896, -1113158574, 1015358203, 1032025507) + W(6, 1031481662, 1036428343, 1043786440, 1039454897) + + W(7, 1032603577, -1097549253, -1117993943, -1119346601) + + W(8, -1112035485, -1111376696, 1007686716, 1044627344) + + W(9, -1107723593, 1043104932, -1142055788, -1111706149) + + W(10, 1026146121, 1020848344, 993596601, -1114987237) + + W(11, 1030752081, -1112152994, -1128311189, 1024170945); + WS(-1085700080, -1080808385); + sum1 = W(0, 1024296914, -1122227946, -1137292464, 1023737634) + + W(1, -1114666035, 1017074878, -1116342907, 998628462) + W(2, -1130852634, 1020782261, 1018157624, 1000516328) + + W(3, 1007198528, 1034334952, 1028012332, 1030076118) + + W(4, -1113008209, 1011762792, -1132968658, -1126768286) + + W(5, -1104353680, -1108931914, -1128335092, -1106440695) + + W(6, 1037784580, -1106441949, 1034492712, -1096045179) + W(7, 1044579508, 1035021791, 1016641544, 1037317931) + + W(8, -1128041639, 1023563097, 1002605936, 1048917408) + + W(9, -1126595297, -1127030516, -1124799637, -1122557970) + + W(10, 1011155704, 1007950112, -1130475172, 1027876850) + + W(11, -1148910448, 1030237498, -1178616327, 1026212766); + sum2 = W(0, -1127545645, 1018138398, -1144042592, 999025130) + + W(1, -1139402988, -1150259805, 985356315, -1133933532) + + W(2, 1009004191, -1136885460, -1141392422, 1032831862) + + W(3, 1017820345, -1138954980, 1013413304, -1164078621) + + W(4, 1015771798, -1151865619, -1107745249, -1097510643) + + W(5, -1117369881, 1017078443, -1132039248, -1132951491) + + W(6, -1131692274, -1125888003, -1091675886, -1072938398) + + W(7, -1091916978, -1117354278, -1124163047, -1138824868) + + W(8, 1014637794, 1023581282, 1057331451, 1076041086) + W(9, 1056314077, 1032772879, 1020965023, 1026258229) + + W(10, 996126556, 1018938725, -1136255574, -1112160457) + + W(11, -1150817344, -1138128772, 1018397555, 985080995); + WS(1065410184, 989310727); + sum1 = W(0, -1129262375, -1140125102, -1116437485, -1133156750) + + W(1, 1013595422, -1140436482, 1001232640, -1120957855) + + W(2, -1109321255, -1114916286, -1113105487, -1103994248) + + W(3, -1113291025, -1135877737, 1027066925, 1020121135) + W(4, 1036945010, 1039766830, 1017194412, 1046741097) + + W(5, 1059311888, -1110004779, -1131666807, -1115618446) + + W(6, 1026154888, -1115626794, 1060001058, 1017259310) + + W(7, -1103200016, 1018378495, -1131905035, -1127100975) + + W(8, -1134197844, -1121887434, -1094555822, -1114897776) + + W(9, 1023750749, -1113774366, -1124072268, -1116648163) + + W(10, -1125355007, 1012320294, -1137406030, -1133486286) + + W(11, -1124720479, -1115411390, 1004266540, -1123146591); + sum2 = W(0, -1134742142, 994239369, -1134258278, -1126643555) + W(1, -1124787402, 1015413679, 991423945, 1010959842) + + W(2, 1015903261, 1014594226, -1129761964, 1031995313) + W(3, 1009465920, 1021296491, -1131866907, 1022764367) + + W(4, 1020154303, -1150882457, 1022217883, 1040302074) + + W(5, 1049285742, 1029784509, -1135790753, -1122024941) + + W(6, -1137672551, 1023023759, 1027518631, -1097618001) + + W(7, -1098762324, -1122992412, -1126884057, -1118740892) + + W(8, 1024853570, 1017377705, -1148789809, 1026258033) + W(9, -1132512970, 1015729459, -1130065044, 980872325) + + W(10, 1005131169, -1125421078, 1001175413, -1138047636) + + W(11, -1148246653, 1016986651, -1142203369, 992745521); + WS(1051937311, 1019742014); + sum1 = + W(0, -1128733562, 997229903, 1023734487, -1116159949) + W(1, -1132293538, -1139661928, -1123545457, 1016780849) + + W(2, -1110586220, 1007594774, -1114376035, -1126695312) + + W(3, 1032525373, -1154941499, 1019369518, 1026460481) + W(4, 1044317379, -1115449474, 1058430122, -1131615423) + + W(5, 1028795565, 1043036192, -1111719346, -1122201669) + + W(6, 1034449865, 1040493967, -1106892452, -1096043495) + W(7, 1048571437, -1110832404, 1031893623, 1016066306) + + W(8, -1129053661, -1122019556, -1137597924, -1114653189) + + W(9, -1104742917, -1106286569, -1118024837, -1121789868) + + W(10, -1126315858, -1118602611, 1007433460, -1115826781) + + W(11, 1027349977, 1018237258, -1123671919, 1018711058); + sum2 = + W(0, 1008400273, 1016157529, -1144490357, -1138058389) + W(1, -1125392389, -1130565626, 965128306, -1135290574) + + W(2, -1127535498, -1127740478, 1041994790, 1049438349) + + W(3, -1105215135, -1113276412, -1135958083, -1114929610) + + W(4, -1110432262, -1101104373, -1088501039, 1056816205) + W(5, 1056199589, 1025802623, 1003347845, 1003318865) + + W(6, -1120718262, -1108362087, 1042410198, -1105949033) + + W(7, -1108549667, 1029274599, -1152613734, 1031964529) + + W(8, -1113670226, -1146032407, -1132649478, 1045816390) + + W(9, -1122823248, -1139374331, -1123972337, 1019133207) + + W(10, 1025653041, -1134925453, 1011207517, -1153231998) + + W(11, -1127906971, 1000306913, -1157554354, -1130500213); + WS(-1117640696, 1034706864); + + return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); +} + +shared float inp[555]; + +#define CURRENT_PASS 2 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define temp_tex(pos) (float(texture(temp, pos).x)) +static const float2 temp_size = float2(GetInputSize().x * 1, GetInputSize().y * 2); +static const float2 temp_pt = float2(1.0 / (temp_size.x), 1.0 / (temp_size.y)); + +#define HOOKED_tex(pos) temp_tex(pos) +#define HOOKED_size temp_size +#define HOOKED_pt temp_pt + +void Pass2(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 555; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 15, y = (uint)id % 15; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (2)) + 0.5, float(group_base.y + y - (3)) + 0.5)).x; + } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[12]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 4]; + samples[1][1] = inp[local_pos + 5]; + samples[1][2] = inp[local_pos + 6]; + samples[1][3] = inp[local_pos + 7]; + samples[2][0] = inp[local_pos + 15]; + samples[2][1] = inp[local_pos + 16]; + samples[2][2] = inp[local_pos + 17]; + samples[2][3] = inp[local_pos + 18]; + samples[3][0] = inp[local_pos + 19]; + samples[3][1] = inp[local_pos + 20]; + samples[3][2] = inp[local_pos + 21]; + samples[3][3] = inp[local_pos + 22]; + samples[4][0] = inp[local_pos + 30]; + samples[4][1] = inp[local_pos + 31]; + samples[4][2] = inp[local_pos + 32]; + samples[4][3] = inp[local_pos + 33]; + samples[5][0] = inp[local_pos + 34]; + samples[5][1] = inp[local_pos + 35]; + samples[5][2] = inp[local_pos + 36]; + samples[5][3] = inp[local_pos + 37]; + samples[6][0] = inp[local_pos + 45]; + samples[6][1] = inp[local_pos + 46]; + samples[6][2] = inp[local_pos + 47]; + samples[6][3] = inp[local_pos + 48]; + samples[7][0] = inp[local_pos + 49]; + samples[7][1] = inp[local_pos + 50]; + samples[7][2] = inp[local_pos + 51]; + samples[7][3] = inp[local_pos + 52]; + samples[8][0] = inp[local_pos + 60]; + samples[8][1] = inp[local_pos + 61]; + samples[8][2] = inp[local_pos + 62]; + samples[8][3] = inp[local_pos + 63]; + samples[9][0] = inp[local_pos + 64]; + samples[9][1] = inp[local_pos + 65]; + samples[9][2] = inp[local_pos + 66]; + samples[9][3] = inp[local_pos + 67]; + samples[10][0] = inp[local_pos + 75]; + samples[10][1] = inp[local_pos + 76]; + samples[10][2] = inp[local_pos + 77]; + samples[10][3] = inp[local_pos + 78]; + samples[11][0] = inp[local_pos + 79]; + samples[11][1] = inp[local_pos + 80]; + samples[11][2] = inp[local_pos + 81]; + samples[11][3] = inp[local_pos + 82]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 33]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1) + ivec2(1, 0), ret); +} diff --git a/src/Effects/NNEDI3/NNEDI3_nns64_win8x4.hlsl b/src/Effects/NNEDI3/NNEDI3_nns64_win8x4.hlsl new file mode 100644 index 000000000..677401e65 --- /dev/null +++ b/src/Effects/NNEDI3/NNEDI3_nns64_win8x4.hlsl @@ -0,0 +1,2111 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: nnedi3.py --nns 64 --win 8x4 --use-compute-shader --use-magpie +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 +//!SORT_NAME NNEDI3_064_4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 1 * 2 +//!HEIGHT INPUT_HEIGHT * 2 * 1 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!FORMAT R16_FLOAT +//!WIDTH INPUT_WIDTH * 1 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D temp; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_temp; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 2 + +//!PASS 1 +//!DESC NNEDI3 (double_y, nns64, win8x4) +//!IN INPUT +//!OUT temp +//!BLOCK_SIZE 32, 16 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[8]) { + float sum = 0.0, sumsq = 0.0; + [unroll] for (int i = 0; i < 8; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); + sumsq += dot(samples[i], samples[i]); + } + float mstd0 = sum / 32.0; + float mstd1 = sumsq / 32.0 - mstd0 * mstd0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); + mstd1 *= mstd2; + float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = + W(0, 1040079017, -1115658697, -1107150591, -1119789534) + W(1, -1129953383, 1052077971, -1102425550, 1027055297) + + W(2, -1112255447, 1056937139, -1089460941, 1033358144) + + W(3, -1100320116, 1052646725, 1029401490, -1121552872) + + W(4, 1039284509, -1091236407, 1057992884, -1097729145) + W(5, 991372703, -1106109018, 1043570389, -1111268755) + + W(6, 1013856427, -1112818013, 1044897031, -1110301789) + + W(7, -1108390802, 1030287040, 1028923089, -1132036495); + sum2 = W(0, -1108031914, -1118430023, 1012288055, 1013768747) + + W(1, 1026461175, 1041449883, 1027150813, -1122068949) + W(2, 1032549274, 1072973866, 1049697467, 1026059807) + + W(3, 1041814131, -1076980873, -1082819831, 1036691428) + + W(4, 1017353224, -1111091488, 1041587393, -1152272302) + + W(5, -1121140359, 1028730196, 1027133694, -1116481805) + + W(6, -1132764107, -1121430441, -1122016279, 1025296987) + + W(7, -1138465879, -1127880506, -1141547063, 1012449399); + WS(-1098744132, 1034278418); + sum1 = W(0, -1103397220, -1145635621, -1105248316, 1050544938) + + W(1, -1124165708, 1041815523, -1120033612, 1021903208) + + W(2, -1110800938, -1099348442, 1050332487, 1035939706) + W(3, 1026719460, -1092523700, 1047722705, 994253234) + + W(4, 1022646298, -1100009602, 1037024150, 1017012340) + W(5, 1032193166, -1088889154, 1057456646, 992386946) + + W(6, 1022064347, -1104673163, 1045722836, -1126668889) + + W(7, -1136800346, -1098235958, 1051861685, 1007925993); + sum2 = W(0, -1097631923, 1051137052, 1049915324, -1103655169) + + W(1, 1038271179, -1138123466, -1101382095, 1030554459) + + W(2, 1016052501, 1047950529, 1053016941, -1103587135) + W(3, 1038061501, 1066995446, 1044310058, 1039796029) + + W(4, -1098093629, 1052164534, -1101624661, 1022295275) + + W(5, 1036023945, -1086456688, -1097740488, 1002073396) + + W(6, -1103222197, -1092315790, 1025704121, 1009745942) + + W(7, 1049634734, -1088937960, -1095498431, 1037237655); + WS(-1082862242, -1106529553); + sum1 = W(0, -1130854575, 1035507577, -1111381151, -1121770975) + + W(1, -1136558799, -1134799464, -1109420173, 1040932752) + + W(2, -1106220300, 1050921813, -1090777253, 1042678624) + W(3, 1011297096, 1017516694, 1058584025, 1023914089) + + W(4, 1032803517, -1083278305, 1053793791, -1114891124) + + W(5, 1003802452, -1113237716, 1038764911, 1024022374) + + W(6, -1130818358, -1166618304, 1011564912, 1003125716) + + W(7, -1115833284, 1036440480, -1125528356, -1148371520); + sum2 = W(0, -1113253112, -1114468103, 1046106511, 1029911106) + + W(1, 1017383795, -1105748811, 1042779663, -1111927878) + + W(2, -1098397046, 1042070822, 1049941293, -1106009556) + + W(3, -1105653936, 1062131274, 1049624420, -1113543954) + + W(4, 1043748521, -1088478625, -1093962905, -1110290200) + + W(5, -1130700635, 1039489157, -1102421728, 1032413051) + + W(6, 1040646747, -1112481937, -1120850913, 1032000907) + + W(7, -1134989990, -1118262998, -1124158867, 1027657598); + WS(1055738180, 1034492849); + sum1 = W(0, -1130881323, -1111251428, 1042092597, 1006685651) + + W(1, -1114926341, -1128721888, 1039888312, -1103919286) + + W(2, 1041145701, -1099536939, 1055510938, -1104400342) + + W(3, -1109424308, 1051914904, -1131357522, -1113433279) + + W(4, -1112247108, 1058202118, -1089549409, 1022435899) + + W(5, -1112831856, 1051860704, -1104754439, -1139575663) + + W(6, -1114665081, 1026763985, -1109075065, -1129253153) + + W(7, -1121239258, -1130346670, -1117425768, -1142807791); + sum2 = + W(0, -1110077216, -1118181126, 1049383148, -1110853226) + W(1, 1036549925, -1116107500, 1034915268, -1119533468) + + W(2, -1103858204, 1042025735, 1044994486, -1101175453) + W(3, 1011211476, 1056055130, 1058209450, -1113779586) + + W(4, 1008268344, -1090440399, -1098004228, -1109857174) + + W(5, 1023981444, -1117941654, -1099569204, 1034145803) + W(6, 1010863636, 1001166765, -1121873216, 1026441284) + + W(7, 1034090633, -1130298175, -1107211558, 1033292040); + WS(1056018244, -1109435672); + sum1 = W(0, 1028147653, -1121172806, -1156629272, 1029524274) + W(1, 1025593127, -1108190871, 1019648283, 998861994) + + W(2, 1019179235, 1044333967, -1105021998, -1126875818) + + W(3, 1042157337, -1098176216, -1084323638, 1048944704) + + W(4, -1110312463, 1052400296, 1052575762, -1112766254) + + W(5, 1019247612, 1041545807, -1144495302, 1027979993) + + W(6, -1120006735, -1152440548, 960835398, -1115664295) + + W(7, -1144225212, 1040728133, -1117757541, -1127435447); + sum2 = W(0, -1133465842, 1020564842, -1109452473, 1020759546) + + W(1, 1037565600, -1103457632, 1042355402, 1025892619) + + W(2, -1114814180, 1043192653, -1103393926, 1033812672) + W(3, 1023511793, 1057142397, 1042803000, 1040091206) + + W(4, 1028077461, -1101196001, -1093624788, -1104532905) + + W(5, 1005396583, 1038394412, -1094252679, -1153222990) + + W(6, 1022326514, -1113456189, 1041416796, -1130818626) + + W(7, -1149118303, -1137100532, 1021621854, 1028700597); + WS(1058353218, 1042996167); + sum1 = + W(0, 1024954343, -1094124879, 1052314513, 1009270680) + W(1, 1015847030, -1107919793, -1138612357, -1116609353) + + W(2, 1036476495, -1097421664, 1044445371, -1202266712) + + W(3, -1110665693, 1048862286, 1050687635, -1105910956) + + W(4, -1121159378, 1033331871, -1102818752, 1018868510) + W(5, -1108641009, 1049066134, 998963493, -1115498359) + + W(6, -1137738249, 1023600450, -1113493234, 1016588291) + + W(7, -1102251787, 1041347474, 1023627001, -1119202191); + sum2 = W(0, -1126808420, -1120540574, -1098896413, 1039187917) + + W(1, 980939457, -1110048722, -1075343834, -1110843188) + + W(2, -1123796075, 1033167871, -1086151008, 1032482295) + W(3, 1037410136, -1106277561, 1075199384, 991431992) + + W(4, 1002142140, -1166268225, 1058982643, -1123555130) + + W(5, -1139817292, -1116671070, -1127002394, -1163241473) + + W(6, -1134378480, -1143978200, 1028067074, 1021947375) + + W(7, -1142652688, 1011961658, 1027681870, -1117918702); + WS(1065448321, -1123448424); + sum1 = W(0, 1033675765, 1042933049, -1113419593, -1107290229) + + W(1, 1010310940, 1048779866, -1098481070, 1033876859) + + W(2, -1135157253, 1053164479, -1085464413, 1028430645) + + W(3, -1130617674, 1054681092, 1025088582, -1126554424) + + W(4, 1045868403, -1088933089, 1052703622, -1113028565) + + W(5, 1037821135, -1096402228, 1043193000, -1114258373) + + W(6, 1029877230, -1112063319, 1039463880, -1131689691) + + W(7, -1107423852, -1132569296, 1039088003, 992526257); + sum2 = W(0, 992398551, -1118632904, -1109433050, -1143518596) + + W(1, 1027463162, -1115592424, 998577524, -1122494561) + + W(2, -1110914988, -1094541616, -1095126430, -1116426235) + + W(3, -1119896935, 1058308065, 1059518579, -1103007923) + + W(4, -1100289244, 1052134722, 1038527426, -1121464784) + + W(5, 1024218964, -1139018826, -1109739820, -1115201103) + + W(6, -1111923136, 1043392525, 1004586244, -1120049915) + + W(7, 1036361104, -1114310803, 1020338285, -1128089075); + WS(1050696068, -1101686596); + sum1 = W(0, 1030829490, 1032239887, 1015204626, -1113406309) + W(1, 1025629886, 1039198082, 1033819234, 1030627629) + + W(2, 1028506281, 1035777849, 1052890832, -1111973334) + + W(3, 1044306231, -1094476563, -1083567844, 1049080890) + + W(4, 1022456523, 1053256444, -1095460989, 1026744658) + W(5, 1033300499, -1108777674, 1028011096, 1009469122) + + W(6, 1025382374, -1125496907, -1117978119, 1014700692) + + W(7, -1126351518, 1013615278, 1022077689, -1117484787); + sum2 = + W(0, -1123705959, -1116235595, 1034058997, -1139597844) + W(1, 1033160533, 1028975273, -1094422871, -1122943830) + + W(2, 1021465594, -1105037500, -1106553103, -1103106284) + W(3, 1019539230, 1055127070, 1036912081, 1037858431) + + W(4, -1115261066, 1039390029, -1113630050, 1034945731) + W(5, 1030761443, -1104569658, 1034816057, 997585040) + + W(6, 1021971134, 1024962787, -1131788338, -1137214108) + W(7, -1122567677, 999405400, 1014360364, 1023175710); + WS(1058623938, 1044469574); + sum1 = W(0, 1045615020, -1108426137, -1135193336, 1044911680) + + W(1, 1035147318, -1141312974, 1044306859, 1030580258) + + W(2, 1047639223, -1105886038, -1122107414, 1043270872) + + W(3, 1050348390, -1079944741, -1085342603, 1051720601) + + W(4, 1030161626, 1039515483, -1100470855, 1041363874) + W(5, 1044899106, 1021302290, -1110191310, 1041545713) + + W(6, -1132009366, -1167333042, -1180200419, 1040848727) + + W(7, 1044251461, 991624716, -1100960996, 1042676794); + sum2 = W(0, -1126998679, -1097249916, 1048536708, -1118913729) + + W(1, -1112584470, -1101780852, 1037003529, -1122142287) + + W(2, -1125720699, -1124055793, 1040132893, 1008038822) + + W(3, 1026650453, 1050619160, 1058508674, -1117660887) + + W(4, -1098785634, 1044368398, 1037637019, -1121056345) + + W(5, 1035354845, -1115949975, -1106591466, 1006096779) + + W(6, -1103280207, 1036370417, -1109803757, -1116815483) + + W(7, -1117822839, 1013784870, -1104576316, -1119752481); + WS(-1092650820, -1093598320); + sum1 = W(0, -1126483198, -1111185837, 1032567139, 1028684103) + + W(1, -1136033646, -1107026560, -1113511425, 1034982933) + + W(2, 1019177644, -1096033968, 1059010394, 1025024233) + + W(3, -1119240794, -1101350837, 1042563567, -1118967697) + + W(4, -1095523617, 1057301262, -1131092420, -1118984463) + + W(5, -1108863241, 1042409572, -1108714423, 1023629806) + + W(6, -1113123137, 1040959081, -1114101506, -1128061252) + + W(7, -1126435736, 1035081684, -1106962654, -1142697342); + sum2 = + W(0, -1118904411, -1134435633, -1102280300, -1150017862) + + W(1, -1158272012, -1107189637, 992574886, -1122133752) + + W(2, 1013234849, -1120249847, -1083559741, -1146867651) + + W(3, 1030464194, 1057018316, 1062197029, -1097179586) + W(4, -1106482941, 1048792270, 1040811198, -1115216614) + + W(5, 1025489390, 1024832618, -1114392001, 1021081809) + W(6, -1129648553, 1031754422, 1031998303, -1112304821) + + W(7, 1028128070, -1125347555, 1029475610, 993658022); + WS(1058173058, 1066808129); + sum1 = + W(0, -1107348220, 1035236882, 1041707116, -1121094224) + W(1, -1099127505, 1036577094, 1043659283, -1105652573) + + W(2, -1103913530, 1044208414, 1046852514, -1107481241) + + W(3, -1113478482, 1029276252, 1039741986, -1121409754) + W(4, 1040539139, -1097605967, 1044710619, 1032545388) + + W(5, -1113016318, -1100723463, 1043551606, -1115290924) + + W(6, -1120477164, 1029619500, -1133641268, 1013015214) + W(7, 1000206250, -1122925746, 994694705, 950366748); + sum2 = + W(0, -1094008764, 1034130054, 1031813868, 1011760553) + W(1, -1105723160, -1120175931, 1042930394, -1124103013) + + W(2, -1087139249, 1064201383, -1110465854, -1131636845) + + W(3, 1052425931, 1002356625, -1098172076, 1006833373) + W(4, 1056772124, -1089869944, 1043061462, 1007319139) + + W(5, 1034445536, -1114835724, -1102352886, 1014923704) + W(6, 1012087213, -1123264660, 1040830367, 1016549099) + + W(7, 1037095176, -1129286431, -1111397998, 993051546); + WS(1061928770, 1027279769); + sum1 = + W(0, 1034406912, -1100123699, 1029564087, 1033006716) + W(1, -1113862823, -1162686055, -1109708970, -1128579510) + + W(2, 1039933724, -1096018034, 1050604450, 1007368299) + W(3, -1111710246, 1056786929, 1037263862, -1109765633) + + W(4, -1111349982, 1056534441, -1098017894, -1113416432) + + W(5, -1109997378, 1044907121, 1038664521, -1101798872) + + W(6, -1113344954, 1022437288, -1116167797, -1105438789) + + W(7, -1114279935, 1031029722, 1027389387, -1113862289); + sum2 = + W(0, 1026006751, -1115270559, 1024783342, -1130316822) + W(1, -1108403600, 1031363022, 1033798448, -1139293944) + + W(2, 1035073774, -1104904057, 1037041630, 1025961213) + W(3, -1122004937, 1031769509, 1039898935, 1021529214) + + W(4, 1035952663, -1106336513, 1057393141, -1112547455) + W(5, 1015050843, 1027760004, 1027825620, -1096601581) + + W(6, -1124438030, 1019789656, -1121305243, -1105185563) + + W(7, -1133483016, -1118390529, -1128829234, -1106336479); + WS(1053752708, 1043937257); + sum1 = W(0, -1114168512, -1105170221, 1031197758, -1109039567) + + W(1, -1112521242, -1098204331, -1104934543, -1113606305) + + W(2, -1097902456, 1046102006, 1042705158, -1098767200) + + W(3, -1095288434, 1066642253, 1070561411, -1089903115) + + W(4, -1094410051, 1054367941, 1038022813, -1100846418) + + W(5, -1114520925, -1100810554, 1032266099, -1108028099) + + W(6, -1115668990, -1121904763, -1114135854, -1115062077) + + W(7, -1105295129, 1031167873, -1123380722, -1118408200); + sum2 = W(0, -1113064749, -1090786559, 1038982572, -1136678198) + + W(1, -1144787532, -1094418907, -1102980167, -1129681067) + + W(2, -1115681675, 1009808406, 1041806798, 1023762114) + W(3, 1041111385, 1053253351, 1054413523, 1035936399) + + W(4, -1130408019, 1015714915, 1024043560, -1114747797) + + W(5, -1136936214, 1020895003, -1114777947, 1001107148) + + W(6, 1022660811, -1117683238, 1021536435, -1129865467) + + W(7, -1121651306, 1028089270, -1123371786, 1019986163); + WS(-1077899937, -1095640595); + sum1 = + W(0, 1038010313, 1032811586, -1097475626, 1033727032) + W(1, 1026389718, 1036208246, -1123908524, -1117534996) + + W(2, 1040451742, -1100196466, 1051006893, -1116405267) + + W(3, 1038149954, -1112596654, -1096170677, 1043001855) + W(4, 1012098034, 1057575303, -1089360567, 1035677341) + + W(5, 1041468735, 1025151035, -1102384270, -1122502968) + W(6, 1036152598, -1108998502, -1117118702, 959089718) + + W(7, 1035936827, -1117388883, 1019229383, -1122221082); + sum2 = W(0, 1014408115, 1037203464, -1112987386, 1006518986) + W(1, 1031626038, -1111295182, 1033188726, 998532726) + + W(2, 1042237958, -1111204470, 1033012571, 1002908510) + W(3, 1024306585, 1046645930, 1040504174, -1142339005) + + W(4, 1005347958, 1036175171, 1022488131, 1028667874) + + W(5, -1108949069, -1102667672, -1113110436, 1028194944) + + W(6, -1129272561, -1105300490, 1012028387, 991631364) + + W(7, -1155665460, -1098212068, 1026136762, -1118275484); + WS(1052188868, -1105025774); + sum1 = W(0, 1026006805, 1053332527, -1093124457, 1016895952) + + W(1, -1127133881, 1046329385, -1102124985, 1015047702) + + W(2, -1111238899, 1054121990, -1093232644, 1031180874) + + W(3, 1039605133, 1041936861, -1108714798, -1123947762) + + W(4, 1036489378, -1098161716, 1047964177, -1116894641) + + W(5, 1027868467, -1100202284, 1042303686, -1100188956) + + W(6, 1028341284, -1114580002, 1042593657, -1114267170) + + W(7, 1040022103, -1112308133, 1033044781, -1120167377); + sum2 = W(0, -1098242390, 1038124206, 1035251585, -1146833441) + + W(1, -1090155459, -1111850856, -1110800852, 1015159009) + + W(2, -1080159437, -1111691725, 1033462452, -1114532383) + + W(3, -1083172772, 1045262621, -1116129960, 1037947101) + + W(4, 1043663143, -1107342132, -1117401930, -1115149205) + + W(5, 1066420271, -1115094311, 1036826747, -1118954060) + + W(6, 1065821018, 1045364599, -1121811075, -1159263732) + + W(7, 1052081121, 1039539139, 1031476522, -1144829650); + WS(-1102870152, 1022622513); + sum1 = + W(0, 1055644087, -1105950504, -1114669622, -1099940547) + W(1, 1035394969, -1108831748, 1043185370, 1029691760) + + W(2, 1045649819, 1041190721, -1098060649, -1105006556) + W(3, 1045987408, 1039545500, -1093030704, 1034170571) + + W(4, 1030573816, 1049616144, -1093912525, 1028754581) + W(5, 1031111560, 1056957379, -1084377448, 1032315050) + + W(6, 1035095156, 1047436506, -1093512673, 1008186016) + W(7, 1041054212, 1055423038, -1094643096, 1002093997); + sum2 = W(0, -1084145430, 1070336926, 1055168354, -1086613448) + + W(1, 1037387540, -1098862516, -1095516697, 1056626754) + + W(2, -1091365810, 1050914492, 1050568943, -1097405209) + + W(3, -1096604175, 1068317532, 1069441477, -1099937369) + + W(4, -1095647742, 1054262144, -1090345251, -1106692290) + + W(5, 1043123500, -1083131387, -1085743049, 1045945631) + + W(6, -1106696806, 1046449081, 1048225937, -1106594556) + + W(7, 1053814194, -1081281727, -1086819148, 1058187197); + WS(-1069426976, -1120848841); + sum1 = W(0, 1036938172, 1037072255, -1103659451, 1032159990) + W(1, 1035259664, 997868135, 1049019033, 1025792718) + + W(2, 1047028417, 1031706371, -1095105906, 1020529436) + + W(3, 1052190352, -1092031243, -1081236347, 1049696791) + + W(4, 1041456894, 1045414333, -1090672009, 1038916826) + W(5, 1039915051, 1026900661, 1032303833, 1035011641) + + W(6, 1015525967, -1119324359, 1008361890, 1039253091) + + W(7, 1036563112, 1015048091, -1146656500, -1145253028); + sum2 = W(0, -1131675185, 1029191442, -1114450600, 1024984252) + + W(1, -1130807793, -1132163681, 1018117017, -1115031522) + + W(2, -1137547874, -1115933340, 1033829647, 1037592454) + + W(3, -1119117212, -1086087178, 1063181450, 1006729954) + + W(4, -1138868050, -1112002886, 1026390676, 1017409241) + + W(5, -1139313154, 1014175682, -1143924164, -1134939410) + + W(6, -1127085641, 1023290113, -1138352802, -1125048401) + + W(7, -1144490820, -1135148482, 1007342770, 974288160); + WS(-1092511940, 1056045775); + sum1 = W(0, 1034351639, -1122413709, 1034842253, -1109413949) + + W(1, 1036571263, -1107220477, 1031880235, -1128037178) + + W(2, 1026078569, -1123595634, 1048156900, -1106487287) + + W(3, 1044122211, -1099810889, -1106066347, 1038991202) + + W(4, -1145831724, 1052039823, -1096815763, 1016305349) + + W(5, -1113387827, 1050308552, -1095462256, 1039930976) + + W(6, 1018446892, 1032192235, -1104039774, -1145490603) + + W(7, -1119385695, 1047960215, -1105858435, -1115254417); + sum2 = W(0, -1129634341, 1017418680, 1033383321, -1086264457) + + W(1, -1125984965, -1145267460, -1108053053, -1078809375) + + W(2, -1131211232, 1019434782, 1035592777, -1073693037) + + W(3, 1032304695, -1112808134, 1017461836, -1098879629) + + W(4, -1126835561, 1027589163, -1125819067, 1071963841) + + W(5, -1135310730, -1164412329, 1048432434, 1073307463) + + W(6, 1030376830, 1012340975, -1132396034, 1053688201) + + W(7, -1115706548, 1024240430, 1037084632, -1110470100); + WS(1042257800, -1115776325); + sum1 = + W(0, -1146926115, -1107281138, 1038400362, 1010013391) + W(1, 1032352980, -1098526814, 1045994468, -1117084567) + + W(2, -1109330569, 1043068219, -1111135982, 1043908518) + + W(3, -1114314051, -1109412128, -1144690495, -1120663934) + + W(4, 1039411448, -1097266597, 1057956169, -1098521248) + + W(5, -1111850704, 1039093055, -1128243973, -1122589083) + + W(6, 1040829028, -1100632132, 1048043508, 1029880783) + W(7, -1123393177, -1104810789, 1025450855, 997333453); + sum2 = W(0, 1029402915, -1139977433, -1102627522, 1041328458) + + W(1, -1131137316, -1118253197, 1040550677, -1112092070) + + W(2, 1031911869, -1122896144, -1088615745, 1058291503) + + W(3, 1023173531, -1122404168, 1061434285, -1087193418) + + W(4, 1023792141, -1098422336, 1075510495, -1072676007) + + W(5, -1123803784, 1037623928, 1067804318, -1079538943) + + W(6, 1014267385, -1106514445, 1045458757, -1106625048) + + W(7, -1128776324, 1036548666, 1035151624, -1107997040); + WS(1062572194, -1113128476); + sum1 = W(0, 1025054504, -1105909778, 1035303099, 1030405814) + + W(1, 1032745069, -1098454710, 1048922046, -1107189334) + + W(2, -1111717923, 1040855323, -1111940687, 1047137430) + + W(3, -1115121012, -1102647392, 1030883818, -1120733568) + + W(4, 1039928305, -1094658254, 1056642050, -1102007432) + + W(5, -1108655721, 1048800872, -1114397119, -1121522301) + + W(6, 1042636501, -1098634802, 1045405821, 1030940241) + + W(7, -1121873307, -1107065062, 1028123977, -1129454014); + sum2 = W(0, -1114559261, -1138228901, 1049277284, -1102431407) + + W(1, 1015935669, 1029828517, -1107208114, 1036930870) + + W(2, -1119968612, -1110320033, 1062476231, -1086736101) + + W(3, 1034980574, -1095660939, -1090290563, 1061616421) + + W(4, -1134322843, 1043622789, -1071408041, 1075586093) + + W(5, 1031387619, -1103434641, -1079628774, 1068626110) + + W(6, -1130912561, 1043842027, -1100585862, 1036267766) + + W(7, 1028413753, -1104010102, -1110973692, 1043722127); + WS(1060774594, 1031131703); + sum1 = W(0, -1107942447, 1045023814, 1022102280, -1109726834) + + W(1, -1105426838, 1048858052, 1019498368, -1115599332) + + W(2, -1102134060, 1057375412, -1089780107, 1024119623) + + W(3, -1108928460, 1053310638, 1054202179, -1117349999) + + W(4, 1030069408, -1086363546, 1056436400, -1105657043) + + W(5, -1109912496, -1121814959, 1045750878, -1112636534) + + W(6, -1111247296, -1121700033, 1017717699, 1020787328) + + W(7, 1016389396, -1112979299, -1115080444, 1034439495); + sum2 = + W(0, 1036474850, -1115841609, -1115206474, 1027456791) + W(1, -1117613837, 1015990489, -1121605429, -1155010952) + + W(2, 993333704, -1114813616, 1041891165, -1115442992) + W(3, -1124280889, 1046600726, 1051997378, -1145891812) + + W(4, -1120272689, -1127188969, -1122067781, -1135549634) + + W(5, -1147158916, -1101686418, 1035140408, 1009536026) + + W(6, -1136721538, -1106242516, 1020051521, 1019245009) + + W(7, 1006917274, -1111325506, -1113122752, 1017045509); + WS(1060194466, -1100017114); + sum1 = W(0, -1107280066, -1105747811, 1059466063, -1103466926) + + W(1, -1110455654, -1101985628, 1043733237, -1106475814) + + W(2, -1108454907, 1018718942, 1057500851, -1106517942) + + W(3, -1102634810, -1106901713, 1036011765, -1094721336) + + W(4, -1103570769, 1046100821, 1016328357, -1110005824) + + W(5, -1103041901, 1055521256, -1113318903, -1108487663) + + W(6, -1108837738, 1047438565, -1139455183, -1116101330) + + W(7, -1103753711, 1059153514, -1111475141, -1118902112); + sum2 = W(0, -1134258178, -1116481221, 1032034504, -1123729210) + + W(1, 1016306377, -1123533287, -1130975497, -1124034546) + + W(2, -1119701101, -1150894505, -1129842733, -1119840142) + + W(3, -1121134035, 1050682292, 1050657130, -1118895679) + + W(4, -1110905440, 1003934853, 1032136922, -1115758498) + + W(5, -1123898542, 1016401385, -1114630639, -1135589646) + + W(6, -1120388479, 1010383122, -1135861370, -1140270714) + + W(7, -1131815221, 1025651043, -1115813895, -1126135829); + WS(-1094780356, -1096037829); + sum1 = W(0, -1094564683, 1051641704, 1042321778, -1109647669) + W(1, 998269945, 1048993133, -1097112145, 1034946343) + + W(2, -1106439374, -1105882161, 1049078036, -1120792141) + + W(3, -1099790159, 1021385644, 1039341071, -1101585581) + + W(4, 1039955171, -1110801281, 1035258371, 1020526630) + + W(5, -1116696534, -1093602407, 1056382370, -1124429730) + + W(6, 1026955814, 1040928722, 1035779188, -1106396689) + + W(7, -1132274079, -1094848202, 1053064740, 1027970323); + sum2 = + W(0, -1137596463, 1029577680, 1038439017, -1113448033) + W(1, -1129205035, -1107142480, 1015352075, 1012914617) + + W(2, -1126343315, 1047351132, 1070713407, -1114069586) + W(3, 1039861741, -1098569120, 1073186805, 1046610276) + + W(4, -1126312459, 1038608299, 1038004689, -1144677526) + + W(5, -1123052878, -1116615219, -1073950732, 1007385420) + + W(6, 996868828, -1112533224, -1077903771, -1108941862) + + W(7, -1125851727, -1107291575, -1107100148, 1037961064); + WS(1053603780, -1115461008); + sum1 = + W(0, -1137418943, -1097293942, 1049585228, 1014991997) + W(1, 1030817204, -1095449155, 1046317951, -1134044169) + + W(2, 1016256157, -1092155167, 1056038503, -1118818050) + W(3, 1021933093, 1049349309, 1058232189, -1129232467) + + W(4, -1107174285, -1125654285, -1109114845, 1020919504) + + W(5, 1024187323, -1107838901, -1104780729, 1042320778) + + W(6, -1121722108, -1112035518, -1104946775, 1033110781) + + W(7, 1027865701, -1115980033, -1096840466, 1040979266); + sum2 = W(0, 1033231462, -1115237141, -1111323807, 1022510111) + + W(1, -1122607696, 1031925962, -1115623012, 1016053183) + + W(2, -1126070520, 1042960949, -1119706606, 999155791) + W(3, 1021774826, 1053325526, 1049744257, -1130112624) + + W(4, -1120788802, -1107200469, 1005054791, -1116359926) + + W(5, 1023918789, -1112013861, 1026671690, -1115047971) + + W(6, -1127228708, -1114604471, -1107279270, -1133439199) + + W(7, -1155086158, -1120863174, -1119270266, -1114777589); + WS(1049944452, -1118177299); + sum1 = + W(0, 1004954534, -1113835168, -1131743895, 1036431958) + W(1, 1034605113, -1117712807, 1044152226, -1113909672) + + W(2, 1042179871, 1049772083, -1105992217, 1034469319) + W(3, 1046072096, -1089565448, -1102704504, 1047091406) + + W(4, -1119832136, -1087875066, 1051885673, -1117867957) + + W(5, -1115172412, 1028249398, 1023284205, 1037431354) + W(6, 1021914321, -1115173764, 1032417701, -1128742159) + + W(7, -1125265763, 1016032028, 1023720959, 1027934165); + sum2 = + W(0, -1122048832, 1053033259, -1121809206, -1100778293) + W(1, -1102028824, 1033866732, 1014156522, 1033801710) + + W(2, -1098388207, -1083784972, 1055740053, -1112352078) + + W(3, 1042109429, 1035235216, -1104400409, 1046653262) + W(4, 1037898289, 1056376564, -1126110057, -1111071353) + + W(5, 1035691269, -1103367787, -1120368299, 1025902829) + W(6, -1120157845, 1034674212, 1015248281, 997672170) + + W(7, 1026959129, -1128193065, 984939091, -1153716682); + WS(-1165444096, -1119710264); + sum1 = W(0, 1042994118, 1051115175, -1098028621, -1108071955) + W(1, 1034244741, 1042802936, 1040688824, 1032455940) + + W(2, 1037778378, 1051394930, -1088773549, -1120702728) + + W(3, 1041081798, -1114386041, -1087521946, 1029133552) + + W(4, 1016348739, 1044466585, -1089153334, 1045508004) + + W(5, 1044760836, 1039392600, -1102706330, -1117660781) + W(6, -1126699940, 1042064672, 987979863, 1043978432) + + W(7, 1049969601, -1111502020, -1112154588, -1112358151); + sum2 = + W(0, -1113048377, 1060341068, 1067872749, -1107478445) + W(1, -1169925094, 1041247352, 1057359743, -1121919154) + + W(2, 1033233769, 999982909, -1089189226, -1112420946) + W(3, 1022227231, -1081289783, -1075624869, 1030919570) + + W(4, 973479782, -1090689511, -1088323315, 1049895882) + W(5, 1025958326, 1036288861, 1048635178, -1108655271) + + W(6, -1175748301, 1044034366, 1052769876, 1042449401) + + W(7, -1108998850, 1055733822, 1060416891, -1098757157); + WS(-1077066897, -1138646112); + sum1 = + W(0, 1024267775, -1092381223, 1053014632, 1025453154) + W(1, 1009759453, -1106693383, 1027242622, -1118474373) + + W(2, 1034803609, -1096180043, 1051220983, -1149202983) + + W(3, -1105412061, 1042083852, 1053447283, -1101608091) + + W(4, -1118414542, 1031147625, -1105039519, 1010847661) + + W(5, -1104989395, 1050244398, 1023828386, -1113932053) + W(6, 1000523125, 1024065726, -1110843104, 1023831870) + + W(7, -1099033634, 1045342865, 1028746421, -1113130950); + sum2 = W(0, 1013870164, 1032969077, 1050672871, -1105627059) + W(1, -1133465912, 1032956180, 1076869477, 1037977967) + + W(2, 1030888661, -1118666398, 1063553933, 1017901782) + + W(3, -1118337904, -1095979039, -1069322680, 1026329625) + + W(4, 1030672570, -1102611716, -1088200357, -1169512700) + + W(5, -1156287199, 1041525268, 1026168517, -1153801247) + + W(6, 1014316520, -1121602320, -1129519600, -1126632288) + + W(7, -1145191680, -1129518532, -1120095414, 1031405165); + WS(1059038658, 1028594353); + sum1 = + W(0, 1030613999, 1027001704, -1089493909, 1046010101) + W(1, -1132336275, 1035531624, -1094328658, 1026431736) + + W(2, -1117493948, 1032402678, -1106243079, 1043051950) + W(3, 1038489844, -1152158733, 1052296065, 1044337864) + + W(4, 1028206993, -1098315241, 1042911580, 1028671678) + W(5, 1044643916, -1088517514, 1049587060, 1032918142) + + W(6, 1007880004, -1103996748, 1035409673, -1137633217) + + W(7, 1043840791, -1091506711, 1044000434, 1036291743); + sum2 = W(0, -1118922878, -1105269646, 1050974192, -1110275215) + + W(1, 998383694, -1109062791, 1002191246, -1111322692) + + W(2, -1117830208, 1013977255, 1046707356, -1114811922) + + W(3, -1123342062, 1052555170, 1051646415, -1114439732) + + W(4, -1109116635, 1041098100, 1036678597, -1108145020) + + W(5, -1124252768, 1040874722, -1098775079, -1121186646) + + W(6, -1104593856, 1030669290, -1122565606, -1115485971) + + W(7, 1026377322, 1040986964, -1098750401, -1115223671); + WS(-1089038754, 1070610314); + sum1 = + W(0, 1006818038, -1112569734, -1110914124, 1038135388) + W(1, -1126323612, -1099849389, 1036085850, 1043645143) + + W(2, 1015379312, -1090696922, 1053776546, 1040122382) + + W(3, -1107760774, -1101892281, 1045400523, -1118717763) + + W(4, -1110158738, 1028620400, 1040930437, 1002760462) + W(5, -1104391397, 1047988033, -1132043583, 1016954112) + + W(6, -1114553627, 1043536901, -1119469430, 974497054) + + W(7, -1113068735, 1039930893, -1114659286, -1139206280); + sum2 = W(0, -1120363159, 1029972206, -1135126990, -1111466112) + + W(1, -1119340063, 1020623843, 1026762268, -1099605665) + + W(2, -1119078724, -1137841818, 1064611900, -1102195223) + + W(3, 1035843986, -1094446255, 1017486399, 1042926586) + W(4, 1035175754, 1049035968, -1094749382, 1014516782) + + W(5, -1106992039, -1128810267, 1039608352, -1112006203) + + W(6, -1128507307, -1123633215, -1194223384, -1136077530) + + W(7, -1129270833, -1123131987, -1197948696, -1129209333); + WS(-1120775200, -1094115106); + sum1 = W(0, -1117113151, -1112921839, 1046145098, -1107062579) + + W(1, -1133090109, -1099947345, -1097096777, -1151313622) + + W(2, -1096583070, 1050818928, 1048882552, -1100319675) + + W(3, -1098081804, 1057531117, 1067142180, -1094480203) + + W(4, -1092969217, 1051254629, 1054137781, -1100195749) + + W(5, -1119648950, -1098660371, 1035250696, -1111825641) + + W(6, -1113650835, 926574177, -1115652196, -1118408614) + + W(7, -1105675228, 1035449878, 1024424221, -1114839546); + sum2 = W(0, -1122846232, 1031116535, -1118531646, 1030028661) + + W(1, 1002183233, -1113707943, 1034244245, -1116699784) + W(2, -1129631292, 988543074, 1027624889, 1039671963) + + W(3, -1112395279, -1080457005, 1064104967, 1037992478) + + W(4, 1009610216, -1097715519, 1052372051, -1115290878) + W(5, 1001287537, 1037716560, 1011412016, 1016046512) + + W(6, -1122731504, -1112112395, 1031591085, -1122589730) + + W(7, -1139829356, 1031806339, -1128697528, 1022404309); + WS(-1083041826, -1088446577); + sum1 = + W(0, -1112868382, -1138041180, 1041478665, -1123809603) + W(1, -1139686881, 998832115, 1026886463, 1007627162) + + W(2, 1037998481, -1108164766, 1018740405, -1137074328) + + W(3, 1035817611, -1106005004, -1092605991, 1036557383) + W(4, 1028314191, 1054394898, -1096954228, 1043199331) + + W(5, -1112414119, 1042585939, -1105339656, 1016960213) + W(6, -1123391616, 1037009680, -1122534560, 949272012) + + W(7, -1152205450, 1027830053, -1128384493, -1161448845); + sum2 = W(0, 1036010005, -1080772886, -1110295637, -1113551367) + + W(1, -1112615912, -1080108640, 1033469827, -1118689741) + + W(2, -1108103320, 1042883784, 991295446, -1123719569) + W(3, 1026422893, 1067023817, -1106557392, 1039881895) + + W(4, 1043952518, 1065911618, 1015020021, 1028046949) + W(5, -1112407780, 1043418298, -1113416335, 986544491) + + W(6, 1025610945, -1121396391, -1118133995, 1028536389) + + W(7, -1142574731, 1019038275, 1027619053, -1114551687); + WS(1059656866, 1032254503); + sum1 = W(0, 1051132033, -1087438145, 1040121644, 1047684766) + W(1, 1032901415, -1096624519, 1045530925, 1031967406) + + W(2, 1050001035, -1093591378, 1040228252, 1044069767) + + W(3, 1051046262, -1092342973, -1096329486, 1050005760) + + W(4, 1024623477, 1024747921, -1100479077, 1045316401) + W(5, 1041030197, 1042870865, -1092498854, 1048633756) + + W(6, -1129401004, -1112612011, -1100524478, 1041789317) + + W(7, 1040932616, 1037509038, -1089145703, 1050363772); + sum2 = W(0, 1033723840, 1054927280, -1094930774, -1110893464) + + W(1, -1120765513, 1004192518, 1009606707, -1117945313) + + W(2, -1151198348, 1032813206, 1018976553, -1111742424) + + W(3, -1121736497, -1097617496, -1096835022, -1121652889) + + W(4, 1020727265, 994767756, -1114598300, -1144829382) + W(5, -1111733852, 1024454465, 1049726638, 1030389401) + + W(6, 1010605379, -1124374145, 1040837102, -1120942585) + + W(7, -1112033268, -1107458988, 1057385902, 1035752486); + WS(-1076352721, 1061176787); + sum1 = W(0, -1115567878, -1095051668, 1047858891, 1038434246) + + W(1, 1039493528, -1094852257, 1040618482, -1116442287) + + W(2, 1037423542, -1086623624, 1057001770, -1114755255) + W(3, 1022350014, 1044286862, 1036068672, 1022609452) + + W(4, -1114677168, 1057477914, -1086915318, 1037805114) + + W(5, -1123591576, 1049595083, -1095930750, 1044588197) + + W(6, -1143365955, 1036655988, -1108386213, 1031697064) + + W(7, -1114839417, 1044280124, 1029598047, -1119274549); + sum2 = W(0, 1018488840, -1104206436, -1111729053, 1004158510) + + W(1, -1109735653, 1012246375, -1117505825, -1122544930) + + W(2, -1113462230, -1114523900, 1042992721, -1111468709) + + W(3, -1112517262, 1056346033, 1051780099, -1133433375) + + W(4, -1108529674, 1029007760, 1042618199, -1114816198) + + W(5, -1112282852, -1106712811, 1035563022, -1136000711) + + W(6, 1002563934, -1113168793, 1024458488, -1122022613) + + W(7, 1011039855, -1115338003, -1112797570, 1037211560); + WS(1037340944, -1121222187); + sum1 = W(0, -1116495838, 1040689218, 1036502651, -1106495021) + + W(1, -1118176747, -1110298712, -1106426924, -1119752153) + + W(2, -1096308023, 1052341602, 1044624667, -1097659059) + + W(3, -1094205341, 1064260668, 1068943083, -1089978464) + + W(4, -1095502171, 1052009702, 1045175252, -1099820424) + + W(5, -1115640417, -1100646016, 1004526500, -1108261518) + + W(6, -1112496425, -1117627026, -1108867801, -1110958217) + + W(7, -1105702479, 1026863896, -1112879076, -1111070831); + sum2 = W(0, 1002312827, 1043920878, -1113959010, -1124888335) + + W(1, -1134403694, 1045056865, -1120905299, 1019347263) + + W(2, 1025097519, -1107896626, 1031903433, -1120916959) + + W(3, -1105297347, 1060319926, -1100397803, 1028583233) + + W(4, 1007461646, -1100756735, 1042137547, -1119634731) + + W(5, -1117960999, 1018292031, -1106899263, -1134844462) + + W(6, -1123847191, -1136355134, -1119650359, 998264859) + + W(7, -1129847887, -1108036266, 1033071013, -1109200858); + WS(-1087648930, 1056984912); + sum1 = W(0, 1027950097, 1048727276, -1101873716, -1111974664) + W(1, 1027762677, 1043573475, 1015678999, 1024202483) + + W(2, 1025406415, 1051177331, -1091649404, 1016025318) + + W(3, 1043195617, -1092810048, -1082302860, 1050173331) + + W(4, 1035903278, 1038252876, -1097139061, -1156961168) + W(5, 1031391740, 1044809191, 1038200652, 998750650) + + W(6, 1023913741, 1035706934, 1042041459, 1023377112) + W(7, 1033673899, 1020597221, 1041744649, -1114494621); + sum2 = + W(0, 1035604443, -1109665959, 1027481873, -1121529027) + W(1, -1113891962, 1017090586, -1115818981, 1006860245) + + W(2, -1155240851, 1033553543, 1023852497, -1124657042) + W(3, -1130359386, 1061263788, 1027230949, 1039189933) + + W(4, 1025386829, -1109330568, 1041592808, -1123139111) + W(5, 1028100153, -1106256683, -1113120233, 999905578) + + W(6, 1016774458, -1097657183, -1101411950, -1132638677) + + W(7, -1126789482, -1118236009, -1121071325, -1109638591); + WS(-1090940868, 1045539156); + sum1 = W(0, -1125495305, -1111842948, 1024315123, 1033322010) + + W(1, -1122547977, 1036262295, 1017057054, 1021788462) + W(2, 1049435900, -1083395945, 1046781659, 1041875702) + + W(3, 1022396558, 1050733040, 1016653136, 1032259903) + W(4, 1034779456, 1050803832, -1084181811, 1045300311) + + W(5, -1113477720, 1023164412, 1044787970, -1103054087) + + W(6, 1025152496, -1141132416, 1025666193, -1119238479) + + W(7, 1017461170, -1119150102, 1039405571, -1121753565); + sum2 = + W(0, -1111850352, -1154701652, 1008422401, -1128983709) + W(1, -1115334877, 1045060155, 1033609440, -1109145509) + + W(2, -1101312255, 1053683311, -1090718168, 1041457249) + W(3, -1131783421, 1041887968, 1047082317, 1009316297) + + W(4, 1037660946, -1090960245, 1034433310, 1037394704) + W(5, -1121261620, 1034919558, 1036748070, 1030040522) + + W(6, 1008969141, -1118627401, -1109720341, 1034723828) + + W(7, -1118719808, -1117531173, 998924330, -1122112519); + WS(1058382658, -1101786424); + sum1 = + W(0, -1110270554, 1020557616, 1042446649, 1010032145) + W(1, -1139041509, 1040419632, -1110579152, 1021044303) + + W(2, -1106470550, 1039037886, 1052788411, -1107378131) + + W(3, -1114587164, -1089217895, -1102603214, -1101971332) + + W(4, 1023988544, -1116816379, 1052497916, -1110520967) + + W(5, 1031022672, -1098005290, 1048850952, -1136091053) + W(6, 1028924449, 1030444000, 1041387232, -1142630143) + + W(7, 1026679274, -1105928314, 1042985742, 1036784631); + sum2 = W(0, -1122331173, 1021048143, -1121426033, -1141905806) + + W(1, -1116694246, -1117334358, 1034622720, -1109719947) + + W(2, -1133055086, 985385246, -1111822939, -1111398337) + + W(3, -1106162040, 1058597177, 1062397265, -1092732322) + + W(4, 1033705060, -1096472079, 1007125953, -1104483430) + + W(5, -1119866494, 1040447045, -1124693001, -1148551778) + + W(6, 1019582103, -1107345628, 1025700470, -1142669219) + + W(7, -1119480360, 1011504527, 1026572561, -1124513225); + WS(1062970978, -1081266569); + sum1 = W(0, -1102316771, 1049067654, 1040583110, 986432279) + W(1, 1026249566, 1049251698, 1039432874, 1031488060) + + W(2, -1104248914, -1100292756, 1040278772, 1034368766) + + W(3, 1037424006, -1086541140, -1096033842, 1046796153) + + W(4, 1035596338, -1092317470, 1045287213, 1039493185) + + W(5, -1122189220, -1101422105, 1049598341, 1029733120) + + W(6, 1016727466, -1130063849, 1043296141, -1132489681) + + W(7, 1006845944, -1103019359, 1042202275, 1032483937); + sum2 = + W(0, -1108138297, -1092863213, -1134447463, 1020893591) + W(1, 1024987284, -1095439435, -1107350452, 1030342296) + + W(2, -1107013954, 1054820060, -1104627754, 1023656240) + W(3, 1038311792, 991870971, 1058790229, -1145193326) + + W(4, -1140508767, 1019051815, 1049499199, -1111721419) + W(5, 999045582, 1003248078, -1113643053, -1124168067) + + W(6, 1010876103, -1122507350, 1040683258, -1111078930) + + W(7, -1139420079, -1120196608, -1107048418, 1036851190); + WS(-1090709444, 1049440503); + sum1 = W(0, -1112814366, 1025170444, 1021129071, -1116166456) + + W(1, 1009000949, -1102968365, -1107658202, 1013557870) + + W(2, -1102384190, 1054801672, -1108358704, -1112664461) + + W(3, -1098104133, 1055216649, 1055155742, -1102466049) + + W(4, -1100420271, 1007027006, 1062491877, -1093799886) + + W(5, -1117987732, 994631719, -1115514619, -1139587454) + + W(6, -1108681738, -1122456724, 1032182427, -1112800010) + + W(7, -1118237822, 1037835469, -1137063726, -1119596065); + sum2 = W(0, 1010269627, -1125700925, -1129465449, -1124401971) + + W(1, 1011116525, 1031004177, -1111184590, -1139731999) + + W(2, 1031226256, 991734042, -1090395945, -1123603568) + W(3, 1023655690, 1056735713, 1007034923, -1106780773) + + W(4, -1117593255, 1023598587, -1114530604, 1032362667) + + W(5, -1139919503, -1116020697, 1044588818, 1019563835) + + W(6, -1131886173, 1030692092, -1118660887, 1016903156) + + W(7, -1132337311, 1014084462, 1027760305, 1003383541); + WS(1052605444, -1096548047); + sum1 = W(0, -1135269079, -1099989408, 1035141158, -1157809579) + + W(1, 1014261129, -1107118442, 1022832721, 1034237928) + W(2, 1035728362, -1095069609, 1036930738, 1014945855) + + W(3, 1017149121, 1004173259, 1042230040, 1022065263) + W(4, 1022065234, -1102684265, 1051854441, -1149699425) + + W(5, -1106055602, 1026600311, -1123731628, 1033708682) + + W(6, -1122459306, 1029280393, 1039321653, -1120230985) + + W(7, -1120275059, -1114974728, -1108029264, 1040542518); + sum2 = W(0, -1139099893, -1078862437, -1097588090, -1129576496) + + W(1, -1102121527, -1076350607, 998819801, 1024729710) + + W(2, -1108414487, 1043522425, -1111980043, -1134118593) + + W(3, 1039987934, 1067956735, -1109442889, 1032654822) + W(4, 1045186917, 1072447081, 1044036972, -1118463242) + + W(5, -1096991255, 1053122067, -1115256553, -1124332658) + + W(6, 1033745090, -1111637970, 1027886256, -1123595298) + + W(7, -1102163660, 1040872955, 1044251533, -1115579557); + WS(1054231108, 1025279114); + sum1 = W(0, 1020527399, -1123453065, -1096862627, 1049493415) + + W(1, 1020186806, 1039341944, -1119337841, -1160786718) + + W(2, 1026191493, -1104689967, 1034733383, 1041541558) + W(3, 1046988234, -1092316960, 1008801448, 1041661776) + + W(4, -1147857037, -1122753069, -1108979241, 1034215919) + + W(5, 1041704764, -1093206832, 1050167121, 1032223662) + + W(6, -1120557618, -1107112804, 1027712531, -1156220183) + + W(7, 1039867950, -1094240131, 1043962418, 1036558383); + sum2 = + W(0, -1156731980, -1121143571, 1027150242, 1019039796) + W(1, -1125155383, 1021067288, 1024911074, -1128517034) + + W(2, -1153680156, -1101109237, -1096825521, 1036906894) + + W(3, -1140765671, -1079877582, -1088644111, -1110310177) + + W(4, 1018992688, -1094350155, -1113080073, 1019841700) + W(5, 1030176462, 1058234221, 1052444813, 1000349534) + + W(6, 1027890172, 1064456596, 1024094624, -1128055622) + W(7, 1018002108, 1058386161, 1045751986, -1134555143); + WS(-1097731588, 1025129315); + sum1 = + W(0, 1033609822, 1037092684, -1099469177, -1125480775) + W(1, 972033083, -1118357663, -1139849211, -1113410371) + + W(2, 1036940225, 1046113067, -1089626010, 1035911587) + W(3, 1025915127, 1050865438, -1143345853, 1047135499) + + W(4, 1037164536, -1109542297, -1104510832, 1038594118) + W(5, 1018117854, 1047810219, -1097851227, 1031924197) + + W(6, 1009743167, -1111680192, -1113708160, -1146462713) + + W(7, 1021572178, 1036701507, -1110047355, -1120871309); + sum2 = W(0, -1124965724, 1029789829, -1119672690, -1132623148) + + W(1, -1134630348, -1105084635, 1036116775, -1123511646) + + W(2, -1138485084, 1035852445, 1002738585, -1106888257) + + W(3, -1143859545, 1050249105, 1057234226, -1097411434) + + W(4, -1133066340, -1114135764, -1115375290, -1109817844) + + W(5, 1030422253, 1024721985, -1117221657, 1017260494) + + W(6, -1121255807, -1120848177, 1007172972, -1118819718) + + W(7, 1003464777, 1033535897, -1110249095, 1021514610); + WS(1064258306, 1057427735); + sum1 = + W(0, 1046669668, 1041288794, -1094345727, 1024557366) + W(1, 1048078371, 1002554246, -1107049340, 1026345305) + + W(2, 1049007041, 1039217007, -1092155570, -1126800432) + + W(3, 1043467602, -1117262306, -1100296810, 1017273727) + W(4, 1032131185, 1041191743, -1101509636, 1027549470) + + W(5, 1040278032, 1037075392, -1100720075, -1136038899) + W(6, 995640096, -1121877906, 1015117221, -1134767685) + + W(7, -1135631084, 1026049048, 1034147956, -1111532367); + sum2 = + W(0, -1083389635, 1062132206, -1122840851, -1127222139) + W(1, -1081609637, 1066099523, 1053156114, 1022631188) + + W(2, -1079976349, 1066066300, -1105997303, 1027246780) + + W(3, -1090363224, 1067427062, -1091867230, -1128121946) + + W(4, -1100056051, 1049064133, -1103783968, 1036294091) + W(5, -1097029007, 996873905, 1048340523, -1115106526) + + W(6, 1028030408, -1137063332, -1134953960, 1035025213) + + W(7, -1118886369, 1035057689, -1104392578, -1117772337); + WS(-1082692450, -1121765493); + sum1 = + W(0, -1112587635, 992292947, 1015927970, -1121302066) + W(1, -1118487719, -1104534315, 1038416886, -1121296341) + + W(2, -1105756568, 1049230729, -1104867982, 1043167221) + W(3, -1111631170, 1048175951, 1051898365, 1037146590) + + W(4, 1036199927, -1089217933, 1057590961, -1113442509) + + W(5, -1106833087, 1028715132, -1104675396, 1030891673) + + W(6, -1115752014, -1121927694, 1012516667, -1121397517) + + W(7, -1109370235, 1018481703, 1022215305, -1113425325); + sum2 = W(0, -1102561169, 1040868564, -1126923948, -1123351116) + + W(1, 1026098771, 1021710972, 1009959824, -1125337962) + W(2, -1149524831, 1048116590, 1037970343, 1024724387) + + W(3, -1113975834, 1041914200, 1042300553, -1123062396) + + W(4, -1157274159, 1051191259, 1032086087, 1021621986) + + W(5, -1105234356, 1039729263, -1097615037, -1105344931) + + W(6, 1008573990, 1018638481, -1115639834, -1109718449) + + W(7, -1102778536, 1035375403, -1115667744, -1105611058); + WS(1063145570, 1065948321); + sum1 = + W(0, -1113780043, 1042647928, 1023153433, -1113344071) + W(1, -1107415145, 1041877314, -1105577826, 1034763992) + + W(2, -1103697695, 1043379605, 1035095074, -1113485016) + + W(3, -1125795137, -1105676766, 1058030732, -1107021830) + + W(4, -1119226457, -1093770877, 1049757493, -1116457366) + + W(5, 1032469064, -1098882227, 1047432094, 1028002580) + W(6, -1118570362, 1029896809, -1106671851, 1033670420) + + W(7, -1148759702, 1007275454, -1110160160, 1021171309); + sum2 = + W(0, 1033443671, -1147652316, -1122182070, -1114870166) + + W(1, -1119163512, 1034562054, -1110746439, -1126213983) + + W(2, 1032648925, 1044843888, -1104224164, 1033622730) + W(3, 1022262943, -1082830049, -1080473697, 1036499220) + + W(4, -1121656898, 1049609982, 1070472721, 1042757767) + W(5, -1125619769, 1025258841, 1045148854, -1125695843) + + W(6, -1116250872, 1030706970, -1138594966, -1109235057) + + W(7, 983397903, 1030898199, 1015661307, -1105016797); + WS(1047808392, -1105857653); + sum1 = + W(0, 1036414223, 1057255335, -1089047968, 1019549874) + W(1, 1038664639, 1050583338, -1095111703, 1027946812) + + W(2, 1033646416, 1052085724, -1090242223, 1014185453) + W(3, 1045309491, -1112577010, -1088425805, 1038762993) + + W(4, 1028560677, 1049269098, -1097592171, 1038356881) + W(5, 1047007737, -1101673225, 1024927375, -1120878336) + + W(6, 1035626955, 1020504170, 988289723, 1031636916) + W(7, 1052689664, -1098667351, -1153489085, -1106190208); + sum2 = + W(0, 1050181400, -1084160527, -1088778570, 1059321043) + W(1, -1114267734, 1035785784, -1097051496, -1113170400) + + W(2, 1015251419, -1094891239, -1096397593, 1035392846) + + W(3, -1097894374, 1060354212, 1063793540, -1098458292) + W(4, 1014394222, 1045924604, -1132415773, 1030978836) + + W(5, -1098468098, 1054874631, -1115217527, -1096505073) + + W(6, 1043075035, 1042828743, -1098450150, 1042772598) + + W(7, -1092845444, 1063378281, 1051615858, -1088082125); + WS(-1073783985, -1080098540); + sum1 = + W(0, 1012137476, 1004414705, 994170013, -1135747626) + W(1, -1111733448, -1125334283, 1040796537, -1106294475) + + W(2, 1027546650, -1098951217, 1045494048, -1105932043) + + W(3, -1095391448, 1058781709, 1038631039, -1097893467) + + W(4, -1107725963, 1056582836, -1099332954, 1027155091) + W(5, 1030910366, 1054585131, -1102018612, 1001385357) + + W(6, 1024902311, -1118558951, -1106829042, -1141795121) + + W(7, -1136391487, 1012935090, -1113109295, -1118975220); + sum2 = W(0, -1136577003, -1161174488, -1123274403, 1021768566) + + W(1, -1123969951, -1115268047, 1023359390, -1128609502) + + W(2, -1126970134, -1111829747, 1034323377, 1026568479) + + W(3, 1039844510, -1078035036, -1085317526, 1027511085) + W(4, 1037108170, 1072008544, 1044555209, 986504280) + + W(5, -1182117728, 1038025274, 1032404118, -1139830523) + + W(6, 1024145219, 1006927579, 1024479257, -1127097094) + W(7, -1107924935, 1013110307, 983390936, 1022685546); + WS(-1104974728, 1024380720); + sum1 = W(0, -1117848102, -1117883163, 1036325571, -1131530510) + + W(1, 1021171896, -1127535108, 1018298053, 1031544042) + W(2, 1039153371, -1099610928, 1041104624, 982295861) + + W(3, 1036503965, -1100771573, -1101972463, 1036843369) + W(4, 988770395, 1043814114, -1133844875, 1029874576) + + W(5, -1109841905, 1034691017, -1107251369, 1031396654) + + W(6, -1121562883, 1034709362, 1025626276, -1125713989) + + W(7, -1126175513, 1004793673, -1111468989, 1034135320); + sum2 = W(0, -1125393898, 1071281072, 1044770418, 1011595325) + W(1, 1044163006, 1072425553, -1125402454, 1021558362) + + W(2, 1045704700, -1104699996, 1018560910, 1010524637) + + W(3, -1110276988, -1078765241, -1098423872, -1113439186) + + W(4, -1099412627, -1076152216, -1103281302, -1178088596) + + W(5, 1044569612, -1100004941, 1028856833, 1019143058) + + W(6, -1115657155, 1036033265, -1123488110, -1126446200) + + W(7, 1037394103, -1111185479, -1117821699, 1026998165); + WS(1066498257, 1026989550); + sum1 = W(0, -1129927868, 1023571545, 1016605513, -1133253141) + + W(1, -1123659738, 1024686464, -1143882757, 1025371048) + + W(2, 1045121333, -1084187390, 1042249043, 1033458708) + W(3, 1002634947, 1040895252, 1054068239, 1032849928) + + W(4, 1036944705, 1040663099, -1089079196, 1049903454) + W(5, 970850411, -1115004776, -1120675276, 1024612748) + + W(6, 1022709187, 1024409057, -1112524513, 1017685954) + + W(7, 1004856149, 1001199419, -1120648472, -1117202350); + sum2 = W(0, 1038138620, -1106434423, 1014840059, -1117610677) + + W(1, -1155040203, 1038340354, -1105689565, 1038431916) + + W(2, 1049419982, -1087069176, -1102609737, -1115449783) + + W(3, -1094597030, 1064410985, 1043321526, -1117967352) + + W(4, 1022703337, -1110711879, 1043390477, -1107988055) + + W(5, -1117198782, -1112639315, 1042536365, -1112813855) + + W(6, -1132959015, 1026273617, 1036470844, -1122022929) + + W(7, -1121486321, -1123727669, 1009362023, 1031915394); + WS(1059170114, -1115069681); + sum1 = + W(0, 1041126528, 1039057944, -1114418030, -1125005432) + W(1, 1031327889, 1043197758, 1035998588, -1184440086) + + W(2, -1137442701, 1057495242, -1086197480, -1107377807) + + W(3, 1027929903, 1035662548, -1092661254, -1131407372) + W(4, 1016336432, 1036387882, -1098594899, 1031309989) + + W(5, 1045677858, 1043295874, -1097533773, 1017102598) + W(6, 1027533272, 1020376880, -1116322521, 1036182828) + + W(7, 1040648584, 1035357519, -1106979748, -1128058314); + sum2 = + W(0, 1029223181, -1099457044, -1094833228, 1022472216) + W(1, 1035999136, -1123134927, -1107153986, -1129014496) + + W(2, 1032743888, -1106245137, 1052844759, 1034397600) + W(3, 1050206678, -1115721168, 1029163621, 1043654862) + + W(4, 1030968394, 1035536761, 1046847142, -1114537747) + W(5, 969652021, -1109098872, -1111905268, -1149928759) + + W(6, 1018474295, -1136790748, -1116305850, 968266517) + + W(7, -1138117380, -1105152866, -1102858563, 1033858063); + WS(-1091215044, -1097536449); + sum1 = W(0, 1036334229, -1088688205, 1028067640, 1049627927) + W(1, 1027816414, -1110004986, 1025245044, 1039037582) + + W(2, 1046343318, -1088611549, -1104070412, 1048707955) + + W(3, 1054850406, -1085760696, -1089807987, 1052669472) + W(4, 1041878433, -1113122464, 990395721, 1043177720) + + W(5, 1040414769, -1115511970, 1024229024, 1039929856) + W(6, 1017820267, -1113420363, 1028468150, 1044644789) + + W(7, 1047750528, 1034437006, -1097347774, 1048578060); + sum2 = W(0, -1114440831, 1036847317, 1041890716, 1027069126) + W(1, 1030624098, 1042295753, -1115362087, 1021176355) + + W(2, 988636086, 1050034374, -1113867927, 1029167466) + W(3, -1132718391, 1031424194, 1016671019, 1028661170) + + W(4, 1030310950, -1119733782, -1130826523, -1183083952) + + W(5, 1016098323, -1120554286, -1097953797, -1108500823) + + W(6, 1024855622, -1114648667, -1146049358, 1027651154) + + W(7, 1018716851, 1035432255, -1096082463, -1106095247); + WS(-1074893425, -1117971628); + sum1 = W(0, -1094367290, 1052023128, 1042452051, -1107223211) + + W(1, -1152038187, 1049322869, -1097322793, 1034136520) + + W(2, -1103630563, -1111356283, 1049675109, -1112379016) + + W(3, -1098786216, 999054287, 1040732760, -1102728254) + W(4, 1037656965, -1106664801, 1040645714, 1016544493) + + W(5, -1114777697, -1093651375, 1057236855, -1125564586) + + W(6, 1025099466, 1040611874, 1040783587, -1105965314) + + W(7, -1124130662, -1094570594, 1055134184, 1008351835); + sum2 = W(0, 1004001863, -1116735450, -1111481570, 1033584975) + + W(1, 1021526755, 1032892007, 1024548251, -1130001464) + + W(2, 1018968138, -1096890588, -1073615974, 1026017190) + + W(3, -1116175346, -1099974349, -1071438395, -1121576034) + + W(4, -1139174407, -1103015201, -1146527383, -1164624762) + + W(5, 1031843333, 1040986630, 1076155388, 1024580999) + W(6, -1131104448, 1035109709, 1074182350, 1043256283) + + W(7, 1009509771, 1041412407, 1041860718, -1106441216); + WS(1040475912, 1032695578); + sum1 = W(0, -1124525022, -1112011377, 1035534023, -1136466746) + + W(1, -1155851319, -1123298122, -1102944834, 1035920825) + + W(2, 1035372099, -1091278169, 1055301749, -1130626274) + + W(3, -1122500130, 1056532699, 1041895809, -1104812886) + + W(4, -1172701842, 1054708857, -1087252087, 1028924087) + + W(5, 1033773669, -1105035590, 1039100863, -1111589035) + + W(6, 1014757675, 1007501482, -1123874252, -1142366545) + + W(7, -1116930196, 1026134833, 1036316385, -1110262076); + sum2 = + W(0, -1109091355, 1041703749, -1120034208, 1010537508) + W(1, -1119792870, -1123886114, -1123320808, 1026411366) + + W(2, -1111337413, 1026232126, 1045621744, 1015836410) + W(3, -1102054262, 1049335147, 1063803841, -1098413574) + + W(4, -1118550690, -1114718085, -1088756378, 1041947548) + + W(5, -1113960445, -1111978783, 1016064880, -1123461018) + + W(6, -1142500624, -1124340572, 1026268254, 999681904) + + W(7, -1109065069, 1020384670, 1030310973, -1122952578); + WS(1060902754, 1050355625); + sum1 = + W(0, -1156790206, -1099711167, 1048936605, 975051222) + W(1, -1151083459, -1104333225, 1047935457, -1120760458) + + W(2, 1011568442, -1101534272, 1041411896, 1040804594) + W(3, 1023922682, -1082482317, -1103038759, 1043235660) + + W(4, 1024500211, -1107281952, 1049946264, 998453633) + W(5, -1112644265, 1053813598, 1036831245, 1030084558) + + W(6, 1033389138, 1025001795, 1037700978, 1007099659) + W(7, -1104847739, -1116841801, 1030797274, 1035779080); + sum2 = + W(0, 1036801640, -1106066969, -1098529900, 1051617655) + W(1, -1120633215, 1029646415, -1094622159, -1128436081) + + W(2, 1043653368, -1102845552, 1045091011, -1108268977) + W(3, 1045612039, 1020964885, 1062435502, -1120809462) + + W(4, -1100846658, 1041841354, -1104869009, -1127923721) + + W(5, 976462574, -1088569123, 1054359824, -1115259155) + + W(6, -1115109418, -1101549413, -1101036113, 1041018201) + + W(7, -1097687290, 1054633350, 1044975639, -1102739129); + WS(-1090140642, -1081743990); + sum1 = + W(0, 1014408182, 1037549524, -1119977588, -1148996265) + W(1, -1130293844, 1009886528, 1031805064, -1107483388) + + W(2, 1031847606, 1044394635, -1097431626, 1015837167) + W(3, 1042594418, 1037202321, -1094879521, 1035016080) + + W(4, 1041618980, -1117833212, -1119842097, 1032499967) + W(5, 1035968392, 1037634256, -1101547735, 1034663662) + + W(6, 1009499077, -1122700465, -1107123493, 1017023585) + + W(7, 1033368909, 1015596142, -1105308242, 1030806526); + sum2 = W(0, -1123785792, 1013646496, -1105735433, 1032589029) + + W(1, 1006912162, -1120892227, 1034054467, -1113324539) + + W(2, -1091554339, 1057715626, -1103626228, 1043157583) + + W(3, -1071648513, 1076150687, -1110563164, 1029343874) + + W(4, -1078529007, 1069469884, 1025732222, -1120727508) + + W(5, -1106043455, 1049558148, -1102425071, 1029688242) + + W(6, -1108743589, 1035924910, 1028146935, -1120545616) + + W(7, -1113325201, 1038760139, 1001130061, -1123660578); + WS(1055430148, -1108560690); + sum1 = + W(0, 1035961997, -1122962962, 1001804274, -1109508687) + W(1, 1035029338, -1107101862, 1037435821, -1114596033) + + W(2, 1036894711, -1105458486, 1049324163, -1105968336) + + W(3, 1041930516, -1102453987, -1115462345, 1030068713) + W(4, 1018389269, 1050880022, -1096234295, 1023650152) + + W(5, -1113968413, 1050461237, -1096034793, 1041307521) + W(6, 1026969083, 1017425654, -1105484423, 1023939770) + + W(7, -1119268932, 1049028992, -1109268985, -1110794404); + sum2 = W(0, 1020921672, -1133145023, 967639540, 1057583079) + W(1, 1015626891, 1000680715, 1038163303, 1066461314) + + W(2, -1131154511, 1034643892, 1029820070, 1070593780) + + W(3, -1149322267, -1106033700, 1027175814, 1042500058) + + W(4, 1005570725, -1155618307, 1050725869, -1076733999) + + W(5, -1122484266, 1030350968, -1107328459, -1075365186) + + W(6, -1125609227, -1127714103, 984351189, -1095092572) + + W(7, 1019773418, 1027949553, -1113454879, 1026351361); + WS(1048611396, -1120253991); + sum1 = W(0, -1109906484, -1104843595, 1042068462, 1029221016) + + W(1, -1134442421, 1020368935, -1116357012, 1014414830) + + W(2, -1157090745, -1102655324, -1127594975, 1044033359) + + W(3, 1038629782, -1096452233, 1048790986, 1052393436) + W(4, 1019276152, -1094801150, 1026936628, 1029702657) + + W(5, 1008425488, -1098953977, 1044903358, -1137141374) + + W(6, -1121251029, -1125379561, 1031973931, -1117911220) + + W(7, -1115994974, -1120481658, 1042932491, 1022285443); + sum2 = W(0, -1129566974, -1111784437, 1050813877, -1093517653) + + W(1, -1143746400, 1036277891, 1047551505, -1098527209) + + W(2, 1036033029, 1028988774, 1061486774, -1081514759) + + W(3, -1117052801, -1100779042, 1077969880, -1071648362) + + W(4, -1123748463, 1044450885, 1055586292, -1089190754) + + W(5, 1038502321, -1129810696, -1131992338, -1107152864) + + W(6, -1138810328, 1034348699, 1040643987, -1111885211) + + W(7, 1043607047, -1108082071, -1108034712, 1025200270); + WS(-1095808772, 1065647947); + sum1 = W(0, -1096536264, 1055051378, -1152511995, -1101629224) + + W(1, -1104862109, 1050045579, -1106144343, -1139560206) + + W(2, -1101976087, 1051209263, -1114400236, -1118447811) + + W(3, -1101730853, 1045989771, 1051130659, -1101507019) + + W(4, -1126516760, -1096666661, 1054814183, -1103658466) + + W(5, -1106707963, -1098537885, 1057272194, -1098960762) + + W(6, -1123882077, 1021791993, 1047598263, -1106849375) + + W(7, -1108125731, -1104774017, 1057816237, -1104521538); + sum2 = W(0, -1110633145, 1046325680, -1106205960, -1121981161) + + W(1, -1114352725, -1134514653, -1145236426, -1115188107) + + W(2, -1120367533, 1030462840, -1121802017, 1009337269) + + W(3, -1113576661, 1052343557, 1052169724, -1122467029) + + W(4, -1147392026, 1028577898, -1126550987, -1112707841) + + W(5, -1111220567, -1127265311, 967975074, -1123885069) + + W(6, -1123261925, 1023140267, -1125641651, -1109514951) + + W(7, -1131596567, -1109705515, 1019189507, 1036097210); + WS(-1088656098, -1093593990); + sum1 = W(0, 1009988669, 1053407451, -1093246956, 1009810317) + W(1, 1006243392, 1046462824, -1100605512, 1021367180) + + W(2, -1118924498, 1054487499, -1092703459, 1031892963) + + W(3, 1040203342, 1043824044, -1110184012, -1115326423) + + W(4, 1030956655, -1107509853, 1041099456, 1009525296) + + W(5, 1015251645, -1099069067, 1036164518, -1102894371) + + W(6, 1032654584, -1114843936, 1040232112, -1121673919) + + W(7, 1034196708, -1106663601, 1035276411, -1120762380); + sum2 = W(0, 1045046842, -1111686548, -1120613757, -1123157156) + + W(1, 1057379920, 1047338924, 1041926325, -1122209414) + W(2, 1066661430, 1044625920, 1010608356, 1026048962) + + W(3, 1065556520, -1108706635, -1107659276, -1118064370) + + W(4, -1100359290, 1040575208, 1029226052, 1036917588) + W(5, -1079104557, 1048831397, 990146073, -1118722317) + + W(6, -1080513286, -1106604517, -1122878530, 999828841) + + W(7, -1090732450, -1118381857, -1148427089, -1128484108); + WS(-1103417864, 1032685901); + sum1 = W(0, -1113840082, 999140540, 1026362243, 1025795300) + W(1, -1101895278, 1021813182, 1049323684, -1104186210) + + W(2, -1106883347, 1050838637, -1123649147, 1026463622) + + W(3, -1112947643, 1035808888, -1115046028, 1015970396) + + W(4, 1045150312, -1089518824, 1051398592, 1028121997) + + W(5, -1106874992, -1112862903, 1043843226, -1114287989) + + W(6, -1122615424, -1123587264, 1032255888, -1148228092) + + W(7, -1140285221, 991482976, 1025625239, -1125179546); + sum2 = W(0, 1052414494, -1107019340, 1021676257, -1118630717) + + W(1, 1041601968, 1032004449, -1101889010, 1028135044) + W(2, 1061231009, -1083281727, 1040520305, 994543087) + + W(3, -1094738093, 1048799024, -1113266306, 1023423300) + + W(4, -1090047391, 1060484354, -1097572834, 1023744190) + + W(5, -1112254382, 1026094686, 1047868927, -1123297050) + + W(6, -1124055261, 1028491114, -1106439707, -1134044699) + + W(7, -1106989530, 1016363351, 1039531031, -1135541243); + WS(1062251010, -1128406975); + sum1 = + W(0, -1122559590, -1114430084, 1041522773, -1200477464) + W(1, -1117783082, 998905218, -1122146129, 1030594936) + + W(2, -1111223497, -1117430765, 1049934150, -1117296696) + + W(3, -1097132645, 1043264160, 1038499861, -1108127936) + + W(4, -1118294493, -1137238321, 1040349396, -1117164622) + + W(5, 1027318465, -1109365760, 1038350478, 1008104625) + W(6, -1122628178, 1012147781, -1161426505, 1016497826) + + W(7, -1142140116, -1112437644, 1022231761, 1030977566); + sum2 = + W(0, -1117898288, 1035048481, 1015521050, -1121110162) + W(1, -1125963325, 1029071818, -1131842712, -1139107970) + + W(2, -1090182563, 1058090849, -1155119281, -1131029094) + + W(3, -1072766634, 1075259304, -1099596444, 1037152077) + W(4, -1084699977, 1062748800, 1015646848, 1018972128) + + W(5, 1030742992, -1113008632, -1124623837, 1025197316) + + W(6, -1114032290, 1039805619, -1116189732, -1127481518) + + W(7, 1021721886, -1113709765, -1146515621, 1027813594); + WS(1065708113, -1121751611); + sum1 = + W(0, -1151170552, 1004700116, 1029908854, -1203182552) + W(1, -1113367966, 1033929377, -1124121476, -1152107167) + + W(2, -1132161740, 1028879143, 1021289212, 985230639) + W(3, -1102777150, 1048992314, -1106962671, -1118004497) + + W(4, 1008580363, -1130586604, -1145749780, 1024087440) + W(5, 1034637855, -1123817297, 1010684790, 1024857134) + + W(6, -1122909571, 997138991, -1110778290, 1018524741) + + W(7, 1029317996, -1119780649, -1119558682, 1030859440); + sum2 = W(0, 1022085973, -1128031215, 1012976820, -1176062153) + W(1, 1007192988, 982756722, -1124806095, 1022708360) + + W(2, 1059325611, -1087213694, 1030136738, -1120463845) + + W(3, 1078660314, -1070139299, -1095156098, 1033425170) + + W(4, 1067996709, -1079390657, -1131018297, -1138793506) + + W(5, -1179370697, -1159362498, 1019962283, -1124014927) + + W(6, 1033764027, -1110724756, 1025926144, 1016277465) + + W(7, 1000911377, 1028041806, -1121497459, -1138055830); + WS(1067144001, 1026092834); + sum1 = + W(0, 1005864375, 1049810486, -1103125415, -1120236446) + W(1, 1014388109, 1020396467, 1040462414, -1132016387) + + W(2, -1110877439, 1055042333, -1103579165, -1107048544) + + W(3, -1100539124, 1042867979, -1096204250, -1116193256) + + W(4, -1110133357, 1031992147, 1048911295, 1010104860) + W(5, -1126225268, 1044290490, -1091768605, 1031444773) + + W(6, 1033891677, 1034565181, -1114007278, 1030176378) + W(7, 1016160519, 1034066560, -1112003369, 1031923300); + sum2 = W(0, -1130823435, -1123834023, 1036568073, -1114604843) + + W(1, -1108591076, 1045671991, -1106172311, 1024976861) + + W(2, -1112679806, -1133291790, -1115701009, 1033105292) + + W(3, -1095898825, 1058163296, 1057311204, -1122476787) + + W(4, -1107310218, -1100325346, -1097367248, 1035411458) + + W(5, 1030441654, -1113640367, 1044352434, -1111920852) + + W(6, -1128226189, 1016405480, -1106561364, 1031834977) + + W(7, 1031713190, -1116067001, 1014762708, -1121017007); + WS(1057420226, -1092362255); + sum1 = W(0, 1032475105, 1044920519, -1103664666, -1114304581) + + W(1, 1020346174, -1144052776, 1027257637, -1116794957) + + W(2, -1121587297, 1052978802, -1096869991, -1103967019) + + W(3, 1019256762, 1055322687, -1116373310, -1100439285) + + W(4, -1123769862, -1104246416, 1028318277, 1018564517) + + W(5, 1033058515, 1040777765, -1096789598, 1036089864) + W(6, 1024808486, 1016898380, -1118572551, 1021198731) + + W(7, 1030782351, 1041983019, -1106323631, -1130998630); + sum2 = + W(0, -1134184569, 1016741405, 1020516693, -1102863001) + W(1, 1006994613, -1130764191, 1043863088, -1103907912) + + W(2, 1034914936, 1033077740, 1060988002, -1084447988) + + W(3, -1134366153, -1112456708, 1071901111, -1076633620) + + W(4, 1033598449, 1043198162, 1043355315, -1098165823) + W(5, 1009154259, -1132669129, 1018979141, -1118646231) + + W(6, 989883430, 1016038391, 1034096544, -1113048166) + W(7, 1021229932, -1126067903, 1022348007, -1118544043); + WS(-1105493768, -1082709441); + + return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); +} + +shared float inp[429]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { temp[pos] = (value); } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 11 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 429; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 11, y = (uint)id % 11; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (3)) + 0.5, float(group_base.y + y - (1)) + 0.5)).x; + } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[8]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 11]; + samples[1][1] = inp[local_pos + 12]; + samples[1][2] = inp[local_pos + 13]; + samples[1][3] = inp[local_pos + 14]; + samples[2][0] = inp[local_pos + 22]; + samples[2][1] = inp[local_pos + 23]; + samples[2][2] = inp[local_pos + 24]; + samples[2][3] = inp[local_pos + 25]; + samples[3][0] = inp[local_pos + 33]; + samples[3][1] = inp[local_pos + 34]; + samples[3][2] = inp[local_pos + 35]; + samples[3][3] = inp[local_pos + 36]; + samples[4][0] = inp[local_pos + 44]; + samples[4][1] = inp[local_pos + 45]; + samples[4][2] = inp[local_pos + 46]; + samples[4][3] = inp[local_pos + 47]; + samples[5][0] = inp[local_pos + 55]; + samples[5][1] = inp[local_pos + 56]; + samples[5][2] = inp[local_pos + 57]; + samples[5][3] = inp[local_pos + 58]; + samples[6][0] = inp[local_pos + 66]; + samples[6][1] = inp[local_pos + 67]; + samples[6][2] = inp[local_pos + 68]; + samples[6][3] = inp[local_pos + 69]; + samples[7][0] = inp[local_pos + 77]; + samples[7][1] = inp[local_pos + 78]; + samples[7][2] = inp[local_pos + 79]; + samples[7][3] = inp[local_pos + 80]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 34]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2) + ivec2(0, 1), ret); +} +//!PASS 2 +//!DESC NNEDI3 (double_x, nns64, win8x4) +//!IN INPUT, temp +//!OUT OUTPUT +//!BLOCK_SIZE 64, 8 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[8]) { + float sum = 0.0, sumsq = 0.0; + [unroll] for (int i = 0; i < 8; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); + sumsq += dot(samples[i], samples[i]); + } + float mstd0 = sum / 32.0; + float mstd1 = sumsq / 32.0 - mstd0 * mstd0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); + mstd1 *= mstd2; + float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = W(0, 1040079017, -1129953383, -1112255447, -1100320116) + + W(1, 1039284509, 991372703, 1013856427, -1108390802) + W(2, -1115658697, 1052077971, 1056937139, 1052646725) + + W(3, -1091236407, -1106109018, -1112818013, 1030287040) + + W(4, -1107150591, -1102425550, -1089460941, 1029401490) + + W(5, 1057992884, 1043570389, 1044897031, 1028923089) + W(6, -1119789534, 1027055297, 1033358144, -1121552872) + + W(7, -1097729145, -1111268755, -1110301789, -1132036495); + sum2 = + W(0, -1108031914, 1026461175, 1032549274, 1041814131) + W(1, 1017353224, -1121140359, -1132764107, -1138465879) + + W(2, -1118430023, 1041449883, 1072973866, -1076980873) + + W(3, -1111091488, 1028730196, -1121430441, -1127880506) + + W(4, 1012288055, 1027150813, 1049697467, -1082819831) + W(5, 1041587393, 1027133694, -1122016279, -1141547063) + + W(6, 1013768747, -1122068949, 1026059807, 1036691428) + + W(7, -1152272302, -1116481805, 1025296987, 1012449399); + WS(-1098744132, 1034278418); + sum1 = W(0, -1103397220, -1124165708, -1110800938, 1026719460) + + W(1, 1022646298, 1032193166, 1022064347, -1136800346) + + W(2, -1145635621, 1041815523, -1099348442, -1092523700) + + W(3, -1100009602, -1088889154, -1104673163, -1098235958) + + W(4, -1105248316, -1120033612, 1050332487, 1047722705) + W(5, 1037024150, 1057456646, 1045722836, 1051861685) + + W(6, 1050544938, 1021903208, 1035939706, 994253234) + W(7, 1017012340, 992386946, -1126668889, 1007925993); + sum2 = + W(0, -1097631923, 1038271179, 1016052501, 1038061501) + W(1, -1098093629, 1036023945, -1103222197, 1049634734) + + W(2, 1051137052, -1138123466, 1047950529, 1066995446) + + W(3, 1052164534, -1086456688, -1092315790, -1088937960) + + W(4, 1049915324, -1101382095, 1053016941, 1044310058) + + W(5, -1101624661, -1097740488, 1025704121, -1095498431) + + W(6, -1103655169, 1030554459, -1103587135, 1039796029) + W(7, 1022295275, 1002073396, 1009745942, 1037237655); + WS(-1082862242, -1106529553); + sum1 = W(0, -1130854575, -1136558799, -1106220300, 1011297096) + + W(1, 1032803517, 1003802452, -1130818358, -1115833284) + + W(2, 1035507577, -1134799464, 1050921813, 1017516694) + + W(3, -1083278305, -1113237716, -1166618304, 1036440480) + + W(4, -1111381151, -1109420173, -1090777253, 1058584025) + + W(5, 1053793791, 1038764911, 1011564912, -1125528356) + W(6, -1121770975, 1040932752, 1042678624, 1023914089) + + W(7, -1114891124, 1024022374, 1003125716, -1148371520); + sum2 = + W(0, -1113253112, 1017383795, -1098397046, -1105653936) + W(1, 1043748521, -1130700635, 1040646747, -1134989990) + + W(2, -1114468103, -1105748811, 1042070822, 1062131274) + + W(3, -1088478625, 1039489157, -1112481937, -1118262998) + W(4, 1046106511, 1042779663, 1049941293, 1049624420) + + W(5, -1093962905, -1102421728, -1120850913, -1124158867) + + W(6, 1029911106, -1111927878, -1106009556, -1113543954) + + W(7, -1110290200, 1032413051, 1032000907, 1027657598); + WS(1055738180, 1034492849); + sum1 = W(0, -1130881323, -1114926341, 1041145701, -1109424308) + + W(1, -1112247108, -1112831856, -1114665081, -1121239258) + + W(2, -1111251428, -1128721888, -1099536939, 1051914904) + + W(3, 1058202118, 1051860704, 1026763985, -1130346670) + W(4, 1042092597, 1039888312, 1055510938, -1131357522) + + W(5, -1089549409, -1104754439, -1109075065, -1117425768) + + W(6, 1006685651, -1103919286, -1104400342, -1113433279) + + W(7, 1022435899, -1139575663, -1129253153, -1142807791); + sum2 = W(0, -1110077216, 1036549925, -1103858204, 1011211476) + W(1, 1008268344, 1023981444, 1010863636, 1034090633) + + W(2, -1118181126, -1116107500, 1042025735, 1056055130) + + W(3, -1090440399, -1117941654, 1001166765, -1130298175) + + W(4, 1049383148, 1034915268, 1044994486, 1058209450) + + W(5, -1098004228, -1099569204, -1121873216, -1107211558) + + W(6, -1110853226, -1119533468, -1101175453, -1113779586) + + W(7, -1109857174, 1034145803, 1026441284, 1033292040); + WS(1056018244, -1109435672); + sum1 = W(0, 1028147653, 1025593127, 1019179235, 1042157337) + + W(1, -1110312463, 1019247612, -1120006735, -1144225212) + + W(2, -1121172806, -1108190871, 1044333967, -1098176216) + + W(3, 1052400296, 1041545807, -1152440548, 1040728133) + + W(4, -1156629272, 1019648283, -1105021998, -1084323638) + + W(5, 1052575762, -1144495302, 960835398, -1117757541) + W(6, 1029524274, 998861994, -1126875818, 1048944704) + + W(7, -1112766254, 1027979993, -1115664295, -1127435447); + sum2 = W(0, -1133465842, 1037565600, -1114814180, 1023511793) + + W(1, 1028077461, 1005396583, 1022326514, -1149118303) + W(2, 1020564842, -1103457632, 1043192653, 1057142397) + + W(3, -1101196001, 1038394412, -1113456189, -1137100532) + + W(4, -1109452473, 1042355402, -1103393926, 1042803000) + + W(5, -1093624788, -1094252679, 1041416796, 1021621854) + W(6, 1020759546, 1025892619, 1033812672, 1040091206) + + W(7, -1104532905, -1153222990, -1130818626, 1028700597); + WS(1058353218, 1042996167); + sum1 = + W(0, 1024954343, 1015847030, 1036476495, -1110665693) + W(1, -1121159378, -1108641009, -1137738249, -1102251787) + + W(2, -1094124879, -1107919793, -1097421664, 1048862286) + W(3, 1033331871, 1049066134, 1023600450, 1041347474) + + W(4, 1052314513, -1138612357, 1044445371, 1050687635) + W(5, -1102818752, 998963493, -1113493234, 1023627001) + + W(6, 1009270680, -1116609353, -1202266712, -1105910956) + + W(7, 1018868510, -1115498359, 1016588291, -1119202191); + sum2 = W(0, -1126808420, 980939457, -1123796075, 1037410136) + + W(1, 1002142140, -1139817292, -1134378480, -1142652688) + + W(2, -1120540574, -1110048722, 1033167871, -1106277561) + + W(3, -1166268225, -1116671070, -1143978200, 1011961658) + + W(4, -1098896413, -1075343834, -1086151008, 1075199384) + + W(5, 1058982643, -1127002394, 1028067074, 1027681870) + W(6, 1039187917, -1110843188, 1032482295, 991431992) + + W(7, -1123555130, -1163241473, 1021947375, -1117918702); + WS(1065448321, -1123448424); + sum1 = W(0, 1033675765, 1010310940, -1135157253, -1130617674) + + W(1, 1045868403, 1037821135, 1029877230, -1107423852) + W(2, 1042933049, 1048779866, 1053164479, 1054681092) + + W(3, -1088933089, -1096402228, -1112063319, -1132569296) + + W(4, -1113419593, -1098481070, -1085464413, 1025088582) + + W(5, 1052703622, 1043193000, 1039463880, 1039088003) + W(6, -1107290229, 1033876859, 1028430645, -1126554424) + + W(7, -1113028565, -1114258373, -1131689691, 992526257); + sum2 = W(0, 992398551, 1027463162, -1110914988, -1119896935) + + W(1, -1100289244, 1024218964, -1111923136, 1036361104) + + W(2, -1118632904, -1115592424, -1094541616, 1058308065) + + W(3, 1052134722, -1139018826, 1043392525, -1114310803) + + W(4, -1109433050, 998577524, -1095126430, 1059518579) + W(5, 1038527426, -1109739820, 1004586244, 1020338285) + + W(6, -1143518596, -1122494561, -1116426235, -1103007923) + + W(7, -1121464784, -1115201103, -1120049915, -1128089075); + WS(1050696068, -1101686596); + sum1 = + W(0, 1030829490, 1025629886, 1028506281, 1044306231) + W(1, 1022456523, 1033300499, 1025382374, -1126351518) + + W(2, 1032239887, 1039198082, 1035777849, -1094476563) + W(3, 1053256444, -1108777674, -1125496907, 1013615278) + + W(4, 1015204626, 1033819234, 1052890832, -1083567844) + W(5, -1095460989, 1028011096, -1117978119, 1022077689) + + W(6, -1113406309, 1030627629, -1111973334, 1049080890) + + W(7, 1026744658, 1009469122, 1014700692, -1117484787); + sum2 = W(0, -1123705959, 1033160533, 1021465594, 1019539230) + + W(1, -1115261066, 1030761443, 1021971134, -1122567677) + + W(2, -1116235595, 1028975273, -1105037500, 1055127070) + W(3, 1039390029, -1104569658, 1024962787, 999405400) + + W(4, 1034058997, -1094422871, -1106553103, 1036912081) + + W(5, -1113630050, 1034816057, -1131788338, 1014360364) + + W(6, -1139597844, -1122943830, -1103106284, 1037858431) + + W(7, 1034945731, 997585040, -1137214108, 1023175710); + WS(1058623938, 1044469574); + sum1 = W(0, 1045615020, 1035147318, 1047639223, 1050348390) + W(1, 1030161626, 1044899106, -1132009366, 1044251461) + + W(2, -1108426137, -1141312974, -1105886038, -1079944741) + + W(3, 1039515483, 1021302290, -1167333042, 991624716) + + W(4, -1135193336, 1044306859, -1122107414, -1085342603) + + W(5, -1100470855, -1110191310, -1180200419, -1100960996) + + W(6, 1044911680, 1030580258, 1043270872, 1051720601) + W(7, 1041363874, 1041545713, 1040848727, 1042676794); + sum2 = W(0, -1126998679, -1112584470, -1125720699, 1026650453) + + W(1, -1098785634, 1035354845, -1103280207, -1117822839) + + W(2, -1097249916, -1101780852, -1124055793, 1050619160) + + W(3, 1044368398, -1115949975, 1036370417, 1013784870) + W(4, 1048536708, 1037003529, 1040132893, 1058508674) + + W(5, 1037637019, -1106591466, -1109803757, -1104576316) + + W(6, -1118913729, -1122142287, 1008038822, -1117660887) + + W(7, -1121056345, 1006096779, -1116815483, -1119752481); + WS(-1092650820, -1093598320); + sum1 = W(0, -1126483198, -1136033646, 1019177644, -1119240794) + + W(1, -1095523617, -1108863241, -1113123137, -1126435736) + + W(2, -1111185837, -1107026560, -1096033968, -1101350837) + + W(3, 1057301262, 1042409572, 1040959081, 1035081684) + W(4, 1032567139, -1113511425, 1059010394, 1042563567) + + W(5, -1131092420, -1108714423, -1114101506, -1106962654) + + W(6, 1028684103, 1034982933, 1025024233, -1118967697) + + W(7, -1118984463, 1023629806, -1128061252, -1142697342); + sum2 = W(0, -1118904411, -1158272012, 1013234849, 1030464194) + + W(1, -1106482941, 1025489390, -1129648553, 1028128070) + + W(2, -1134435633, -1107189637, -1120249847, 1057018316) + + W(3, 1048792270, 1024832618, 1031754422, -1125347555) + W(4, -1102280300, 992574886, -1083559741, 1062197029) + + W(5, 1040811198, -1114392001, 1031998303, 1029475610) + + W(6, -1150017862, -1122133752, -1146867651, -1097179586) + + W(7, -1115216614, 1021081809, -1112304821, 993658022); + WS(1058173058, 1066808129); + sum1 = W(0, -1107348220, -1099127505, -1103913530, -1113478482) + + W(1, 1040539139, -1113016318, -1120477164, 1000206250) + W(2, 1035236882, 1036577094, 1044208414, 1029276252) + + W(3, -1097605967, -1100723463, 1029619500, -1122925746) + + W(4, 1041707116, 1043659283, 1046852514, 1039741986) + W(5, 1044710619, 1043551606, -1133641268, 994694705) + + W(6, -1121094224, -1105652573, -1107481241, -1121409754) + + W(7, 1032545388, -1115290924, 1013015214, 950366748); + sum2 = W(0, -1094008764, -1105723160, -1087139249, 1052425931) + + W(1, 1056772124, 1034445536, 1012087213, 1037095176) + W(2, 1034130054, -1120175931, 1064201383, 1002356625) + + W(3, -1089869944, -1114835724, -1123264660, -1129286431) + + W(4, 1031813868, 1042930394, -1110465854, -1098172076) + + W(5, 1043061462, -1102352886, 1040830367, -1111397998) + + W(6, 1011760553, -1124103013, -1131636845, 1006833373) + W(7, 1007319139, 1014923704, 1016549099, 993051546); + WS(1061928770, 1027279769); + sum1 = W(0, 1034406912, -1113862823, 1039933724, -1111710246) + + W(1, -1111349982, -1109997378, -1113344954, -1114279935) + + W(2, -1100123699, -1162686055, -1096018034, 1056786929) + + W(3, 1056534441, 1044907121, 1022437288, 1031029722) + W(4, 1029564087, -1109708970, 1050604450, 1037263862) + + W(5, -1098017894, 1038664521, -1116167797, 1027389387) + + W(6, 1033006716, -1128579510, 1007368299, -1109765633) + + W(7, -1113416432, -1101798872, -1105438789, -1113862289); + sum2 = W(0, 1026006751, -1108403600, 1035073774, -1122004937) + + W(1, 1035952663, 1015050843, -1124438030, -1133483016) + + W(2, -1115270559, 1031363022, -1104904057, 1031769509) + + W(3, -1106336513, 1027760004, 1019789656, -1118390529) + W(4, 1024783342, 1033798448, 1037041630, 1039898935) + + W(5, 1057393141, 1027825620, -1121305243, -1128829234) + + W(6, -1130316822, -1139293944, 1025961213, 1021529214) + + W(7, -1112547455, -1096601581, -1105185563, -1106336479); + WS(1053752708, 1043937257); + sum1 = W(0, -1114168512, -1112521242, -1097902456, -1095288434) + + W(1, -1094410051, -1114520925, -1115668990, -1105295129) + + W(2, -1105170221, -1098204331, 1046102006, 1066642253) + + W(3, 1054367941, -1100810554, -1121904763, 1031167873) + + W(4, 1031197758, -1104934543, 1042705158, 1070561411) + + W(5, 1038022813, 1032266099, -1114135854, -1123380722) + + W(6, -1109039567, -1113606305, -1098767200, -1089903115) + + W(7, -1100846418, -1108028099, -1115062077, -1118408200); + sum2 = W(0, -1113064749, -1144787532, -1115681675, 1041111385) + + W(1, -1130408019, -1136936214, 1022660811, -1121651306) + + W(2, -1090786559, -1094418907, 1009808406, 1053253351) + + W(3, 1015714915, 1020895003, -1117683238, 1028089270) + W(4, 1038982572, -1102980167, 1041806798, 1054413523) + + W(5, 1024043560, -1114777947, 1021536435, -1123371786) + + W(6, -1136678198, -1129681067, 1023762114, 1035936399) + + W(7, -1114747797, 1001107148, -1129865467, 1019986163); + WS(-1077899937, -1095640595); + sum1 = W(0, 1038010313, 1026389718, 1040451742, 1038149954) + W(1, 1012098034, 1041468735, 1036152598, 1035936827) + + W(2, 1032811586, 1036208246, -1100196466, -1112596654) + + W(3, 1057575303, 1025151035, -1108998502, -1117388883) + + W(4, -1097475626, -1123908524, 1051006893, -1096170677) + + W(5, -1089360567, -1102384270, -1117118702, 1019229383) + + W(6, 1033727032, -1117534996, -1116405267, 1043001855) + + W(7, 1035677341, -1122502968, 959089718, -1122221082); + sum2 = W(0, 1014408115, 1031626038, 1042237958, 1024306585) + + W(1, 1005347958, -1108949069, -1129272561, -1155665460) + + W(2, 1037203464, -1111295182, -1111204470, 1046645930) + + W(3, 1036175171, -1102667672, -1105300490, -1098212068) + + W(4, -1112987386, 1033188726, 1033012571, 1040504174) + W(5, 1022488131, -1113110436, 1012028387, 1026136762) + + W(6, 1006518986, 998532726, 1002908510, -1142339005) + W(7, 1028667874, 1028194944, 991631364, -1118275484); + WS(1052188868, -1105025774); + sum1 = W(0, 1026006805, -1127133881, -1111238899, 1039605133) + W(1, 1036489378, 1027868467, 1028341284, 1040022103) + + W(2, 1053332527, 1046329385, 1054121990, 1041936861) + + W(3, -1098161716, -1100202284, -1114580002, -1112308133) + + W(4, -1093124457, -1102124985, -1093232644, -1108714798) + + W(5, 1047964177, 1042303686, 1042593657, 1033044781) + W(6, 1016895952, 1015047702, 1031180874, -1123947762) + + W(7, -1116894641, -1100188956, -1114267170, -1120167377); + sum2 = W(0, -1098242390, -1090155459, -1080159437, -1083172772) + + W(1, 1043663143, 1066420271, 1065821018, 1052081121) + W(2, 1038124206, -1111850856, -1111691725, 1045262621) + + W(3, -1107342132, -1115094311, 1045364599, 1039539139) + + W(4, 1035251585, -1110800852, 1033462452, -1116129960) + + W(5, -1117401930, 1036826747, -1121811075, 1031476522) + + W(6, -1146833441, 1015159009, -1114532383, 1037947101) + + W(7, -1115149205, -1118954060, -1159263732, -1144829650); + WS(-1102870152, 1022622513); + sum1 = W(0, 1055644087, 1035394969, 1045649819, 1045987408) + W(1, 1030573816, 1031111560, 1035095156, 1041054212) + + W(2, -1105950504, -1108831748, 1041190721, 1039545500) + W(3, 1049616144, 1056957379, 1047436506, 1055423038) + + W(4, -1114669622, 1043185370, -1098060649, -1093030704) + + W(5, -1093912525, -1084377448, -1093512673, -1094643096) + + W(6, -1099940547, 1029691760, -1105006556, 1034170571) + + W(7, 1028754581, 1032315050, 1008186016, 1002093997); + sum2 = + W(0, -1084145430, 1037387540, -1091365810, -1096604175) + W(1, -1095647742, 1043123500, -1106696806, 1053814194) + + W(2, 1070336926, -1098862516, 1050914492, 1068317532) + W(3, 1054262144, -1083131387, 1046449081, -1081281727) + + W(4, 1055168354, -1095516697, 1050568943, 1069441477) + + W(5, -1090345251, -1085743049, 1048225937, -1086819148) + + W(6, -1086613448, 1056626754, -1097405209, -1099937369) + + W(7, -1106692290, 1045945631, -1106594556, 1058187197); + WS(-1069426976, -1120848841); + sum1 = W(0, 1036938172, 1035259664, 1047028417, 1052190352) + W(1, 1041456894, 1039915051, 1015525967, 1036563112) + + W(2, 1037072255, 997868135, 1031706371, -1092031243) + W(3, 1045414333, 1026900661, -1119324359, 1015048091) + + W(4, -1103659451, 1049019033, -1095105906, -1081236347) + + W(5, -1090672009, 1032303833, 1008361890, -1146656500) + W(6, 1032159990, 1025792718, 1020529436, 1049696791) + + W(7, 1038916826, 1035011641, 1039253091, -1145253028); + sum2 = + W(0, -1131675185, -1130807793, -1137547874, -1119117212) + + W(1, -1138868050, -1139313154, -1127085641, -1144490820) + + W(2, 1029191442, -1132163681, -1115933340, -1086087178) + + W(3, -1112002886, 1014175682, 1023290113, -1135148482) + W(4, -1114450600, 1018117017, 1033829647, 1063181450) + + W(5, 1026390676, -1143924164, -1138352802, 1007342770) + W(6, 1024984252, -1115031522, 1037592454, 1006729954) + + W(7, 1017409241, -1134939410, -1125048401, 974288160); + WS(-1092511940, 1056045775); + sum1 = W(0, 1034351639, 1036571263, 1026078569, 1044122211) + + W(1, -1145831724, -1113387827, 1018446892, -1119385695) + + W(2, -1122413709, -1107220477, -1123595634, -1099810889) + + W(3, 1052039823, 1050308552, 1032192235, 1047960215) + W(4, 1034842253, 1031880235, 1048156900, -1106066347) + + W(5, -1096815763, -1095462256, -1104039774, -1105858435) + + W(6, -1109413949, -1128037178, -1106487287, 1038991202) + + W(7, 1016305349, 1039930976, -1145490603, -1115254417); + sum2 = W(0, -1129634341, -1125984965, -1131211232, 1032304695) + + W(1, -1126835561, -1135310730, 1030376830, -1115706548) + + W(2, 1017418680, -1145267460, 1019434782, -1112808134) + + W(3, 1027589163, -1164412329, 1012340975, 1024240430) + W(4, 1033383321, -1108053053, 1035592777, 1017461836) + + W(5, -1125819067, 1048432434, -1132396034, 1037084632) + + W(6, -1086264457, -1078809375, -1073693037, -1098879629) + + W(7, 1071963841, 1073307463, 1053688201, -1110470100); + WS(1042257800, -1115776325); + sum1 = + W(0, -1146926115, 1032352980, -1109330569, -1114314051) + W(1, 1039411448, -1111850704, 1040829028, -1123393177) + + W(2, -1107281138, -1098526814, 1043068219, -1109412128) + + W(3, -1097266597, 1039093055, -1100632132, -1104810789) + + W(4, 1038400362, 1045994468, -1111135982, -1144690495) + W(5, 1057956169, -1128243973, 1048043508, 1025450855) + + W(6, 1010013391, -1117084567, 1043908518, -1120663934) + + W(7, -1098521248, -1122589083, 1029880783, 997333453); + sum2 = W(0, 1029402915, -1131137316, 1031911869, 1023173531) + + W(1, 1023792141, -1123803784, 1014267385, -1128776324) + + W(2, -1139977433, -1118253197, -1122896144, -1122404168) + + W(3, -1098422336, 1037623928, -1106514445, 1036548666) + + W(4, -1102627522, 1040550677, -1088615745, 1061434285) + W(5, 1075510495, 1067804318, 1045458757, 1035151624) + + W(6, 1041328458, -1112092070, 1058291503, -1087193418) + + W(7, -1072676007, -1079538943, -1106625048, -1107997040); + WS(1062572194, -1113128476); + sum1 = W(0, 1025054504, 1032745069, -1111717923, -1115121012) + + W(1, 1039928305, -1108655721, 1042636501, -1121873307) + + W(2, -1105909778, -1098454710, 1040855323, -1102647392) + + W(3, -1094658254, 1048800872, -1098634802, -1107065062) + + W(4, 1035303099, 1048922046, -1111940687, 1030883818) + W(5, 1056642050, -1114397119, 1045405821, 1028123977) + + W(6, 1030405814, -1107189334, 1047137430, -1120733568) + + W(7, -1102007432, -1121522301, 1030940241, -1129454014); + sum2 = + W(0, -1114559261, 1015935669, -1119968612, 1034980574) + W(1, -1134322843, 1031387619, -1130912561, 1028413753) + + W(2, -1138228901, 1029828517, -1110320033, -1095660939) + + W(3, 1043622789, -1103434641, 1043842027, -1104010102) + + W(4, 1049277284, -1107208114, 1062476231, -1090290563) + + W(5, -1071408041, -1079628774, -1100585862, -1110973692) + + W(6, -1102431407, 1036930870, -1086736101, 1061616421) + W(7, 1075586093, 1068626110, 1036267766, 1043722127); + WS(1060774594, 1031131703); + sum1 = W(0, -1107942447, -1105426838, -1102134060, -1108928460) + + W(1, 1030069408, -1109912496, -1111247296, 1016389396) + W(2, 1045023814, 1048858052, 1057375412, 1053310638) + + W(3, -1086363546, -1121814959, -1121700033, -1112979299) + + W(4, 1022102280, 1019498368, -1089780107, 1054202179) + W(5, 1056436400, 1045750878, 1017717699, -1115080444) + + W(6, -1109726834, -1115599332, 1024119623, -1117349999) + + W(7, -1105657043, -1112636534, 1020787328, 1034439495); + sum2 = W(0, 1036474850, -1117613837, 993333704, -1124280889) + + W(1, -1120272689, -1147158916, -1136721538, 1006917274) + + W(2, -1115841609, 1015990489, -1114813616, 1046600726) + + W(3, -1127188969, -1101686418, -1106242516, -1111325506) + + W(4, -1115206474, -1121605429, 1041891165, 1051997378) + + W(5, -1122067781, 1035140408, 1020051521, -1113122752) + + W(6, 1027456791, -1155010952, -1115442992, -1145891812) + + W(7, -1135549634, 1009536026, 1019245009, 1017045509); + WS(1060194466, -1100017114); + sum1 = W(0, -1107280066, -1110455654, -1108454907, -1102634810) + + W(1, -1103570769, -1103041901, -1108837738, -1103753711) + + W(2, -1105747811, -1101985628, 1018718942, -1106901713) + + W(3, 1046100821, 1055521256, 1047438565, 1059153514) + W(4, 1059466063, 1043733237, 1057500851, 1036011765) + + W(5, 1016328357, -1113318903, -1139455183, -1111475141) + + W(6, -1103466926, -1106475814, -1106517942, -1094721336) + + W(7, -1110005824, -1108487663, -1116101330, -1118902112); + sum2 = W(0, -1134258178, 1016306377, -1119701101, -1121134035) + + W(1, -1110905440, -1123898542, -1120388479, -1131815221) + + W(2, -1116481221, -1123533287, -1150894505, 1050682292) + + W(3, 1003934853, 1016401385, 1010383122, 1025651043) + W(4, 1032034504, -1130975497, -1129842733, 1050657130) + + W(5, 1032136922, -1114630639, -1135861370, -1115813895) + + W(6, -1123729210, -1124034546, -1119840142, -1118895679) + + W(7, -1115758498, -1135589646, -1140270714, -1126135829); + WS(-1094780356, -1096037829); + sum1 = W(0, -1094564683, 998269945, -1106439374, -1099790159) + + W(1, 1039955171, -1116696534, 1026955814, -1132274079) + + W(2, 1051641704, 1048993133, -1105882161, 1021385644) + + W(3, -1110801281, -1093602407, 1040928722, -1094848202) + + W(4, 1042321778, -1097112145, 1049078036, 1039341071) + W(5, 1035258371, 1056382370, 1035779188, 1053064740) + + W(6, -1109647669, 1034946343, -1120792141, -1101585581) + + W(7, 1020526630, -1124429730, -1106396689, 1027970323); + sum2 = + W(0, -1137596463, -1129205035, -1126343315, 1039861741) + W(1, -1126312459, -1123052878, 996868828, -1125851727) + + W(2, 1029577680, -1107142480, 1047351132, -1098569120) + + W(3, 1038608299, -1116615219, -1112533224, -1107291575) + W(4, 1038439017, 1015352075, 1070713407, 1073186805) + + W(5, 1038004689, -1073950732, -1077903771, -1107100148) + + W(6, -1113448033, 1012914617, -1114069586, 1046610276) + + W(7, -1144677526, 1007385420, -1108941862, 1037961064); + WS(1053603780, -1115461008); + sum1 = W(0, -1137418943, 1030817204, 1016256157, 1021933093) + + W(1, -1107174285, 1024187323, -1121722108, 1027865701) + + W(2, -1097293942, -1095449155, -1092155167, 1049349309) + + W(3, -1125654285, -1107838901, -1112035518, -1115980033) + + W(4, 1049585228, 1046317951, 1056038503, 1058232189) + + W(5, -1109114845, -1104780729, -1104946775, -1096840466) + + W(6, 1014991997, -1134044169, -1118818050, -1129232467) + + W(7, 1020919504, 1042320778, 1033110781, 1040979266); + sum2 = W(0, 1033231462, -1122607696, -1126070520, 1021774826) + + W(1, -1120788802, 1023918789, -1127228708, -1155086158) + + W(2, -1115237141, 1031925962, 1042960949, 1053325526) + + W(3, -1107200469, -1112013861, -1114604471, -1120863174) + + W(4, -1111323807, -1115623012, -1119706606, 1049744257) + + W(5, 1005054791, 1026671690, -1107279270, -1119270266) + W(6, 1022510111, 1016053183, 999155791, -1130112624) + + W(7, -1116359926, -1115047971, -1133439199, -1114777589); + WS(1049944452, -1118177299); + sum1 = W(0, 1004954534, 1034605113, 1042179871, 1046072096) + + W(1, -1119832136, -1115172412, 1021914321, -1125265763) + + W(2, -1113835168, -1117712807, 1049772083, -1089565448) + + W(3, -1087875066, 1028249398, -1115173764, 1016032028) + + W(4, -1131743895, 1044152226, -1105992217, -1102704504) + + W(5, 1051885673, 1023284205, 1032417701, 1023720959) + W(6, 1036431958, -1113909672, 1034469319, 1047091406) + + W(7, -1117867957, 1037431354, -1128742159, 1027934165); + sum2 = + W(0, -1122048832, -1102028824, -1098388207, 1042109429) + W(1, 1037898289, 1035691269, -1120157845, 1026959129) + + W(2, 1053033259, 1033866732, -1083784972, 1035235216) + W(3, 1056376564, -1103367787, 1034674212, -1128193065) + + W(4, -1121809206, 1014156522, 1055740053, -1104400409) + W(5, -1126110057, -1120368299, 1015248281, 984939091) + + W(6, -1100778293, 1033801710, -1112352078, 1046653262) + + W(7, -1111071353, 1025902829, 997672170, -1153716682); + WS(-1165444096, -1119710264); + sum1 = W(0, 1042994118, 1034244741, 1037778378, 1041081798) + W(1, 1016348739, 1044760836, -1126699940, 1049969601) + + W(2, 1051115175, 1042802936, 1051394930, -1114386041) + W(3, 1044466585, 1039392600, 1042064672, -1111502020) + + W(4, -1098028621, 1040688824, -1088773549, -1087521946) + + W(5, -1089153334, -1102706330, 987979863, -1112154588) + + W(6, -1108071955, 1032455940, -1120702728, 1029133552) + + W(7, 1045508004, -1117660781, 1043978432, -1112358151); + sum2 = + W(0, -1113048377, -1169925094, 1033233769, 1022227231) + W(1, 973479782, 1025958326, -1175748301, -1108998850) + + W(2, 1060341068, 1041247352, 999982909, -1081289783) + W(3, -1090689511, 1036288861, 1044034366, 1055733822) + + W(4, 1067872749, 1057359743, -1089189226, -1075624869) + W(5, -1088323315, 1048635178, 1052769876, 1060416891) + + W(6, -1107478445, -1121919154, -1112420946, 1030919570) + + W(7, 1049895882, -1108655271, 1042449401, -1098757157); + WS(-1077066897, -1138646112); + sum1 = + W(0, 1024267775, 1009759453, 1034803609, -1105412061) + W(1, -1118414542, -1104989395, 1000523125, -1099033634) + + W(2, -1092381223, -1106693383, -1096180043, 1042083852) + W(3, 1031147625, 1050244398, 1024065726, 1045342865) + + W(4, 1053014632, 1027242622, 1051220983, 1053447283) + W(5, -1105039519, 1023828386, -1110843104, 1028746421) + + W(6, 1025453154, -1118474373, -1149202983, -1101608091) + + W(7, 1010847661, -1113932053, 1023831870, -1113130950); + sum2 = W(0, 1013870164, -1133465912, 1030888661, -1118337904) + + W(1, 1030672570, -1156287199, 1014316520, -1145191680) + + W(2, 1032969077, 1032956180, -1118666398, -1095979039) + + W(3, -1102611716, 1041525268, -1121602320, -1129518532) + + W(4, 1050672871, 1076869477, 1063553933, -1069322680) + + W(5, -1088200357, 1026168517, -1129519600, -1120095414) + + W(6, -1105627059, 1037977967, 1017901782, 1026329625) + + W(7, -1169512700, -1153801247, -1126632288, 1031405165); + WS(1059038658, 1028594353); + sum1 = W(0, 1030613999, -1132336275, -1117493948, 1038489844) + W(1, 1028206993, 1044643916, 1007880004, 1043840791) + + W(2, 1027001704, 1035531624, 1032402678, -1152158733) + + W(3, -1098315241, -1088517514, -1103996748, -1091506711) + + W(4, -1089493909, -1094328658, -1106243079, 1052296065) + + W(5, 1042911580, 1049587060, 1035409673, 1044000434) + W(6, 1046010101, 1026431736, 1043051950, 1044337864) + + W(7, 1028671678, 1032918142, -1137633217, 1036291743); + sum2 = + W(0, -1118922878, 998383694, -1117830208, -1123342062) + W(1, -1109116635, -1124252768, -1104593856, 1026377322) + + W(2, -1105269646, -1109062791, 1013977255, 1052555170) + W(3, 1041098100, 1040874722, 1030669290, 1040986964) + + W(4, 1050974192, 1002191246, 1046707356, 1051646415) + W(5, 1036678597, -1098775079, -1122565606, -1098750401) + + W(6, -1110275215, -1111322692, -1114811922, -1114439732) + + W(7, -1108145020, -1121186646, -1115485971, -1115223671); + WS(-1089038754, 1070610314); + sum1 = W(0, 1006818038, -1126323612, 1015379312, -1107760774) + + W(1, -1110158738, -1104391397, -1114553627, -1113068735) + + W(2, -1112569734, -1099849389, -1090696922, -1101892281) + + W(3, 1028620400, 1047988033, 1043536901, 1039930893) + W(4, -1110914124, 1036085850, 1053776546, 1045400523) + + W(5, 1040930437, -1132043583, -1119469430, -1114659286) + + W(6, 1038135388, 1043645143, 1040122382, -1118717763) + W(7, 1002760462, 1016954112, 974497054, -1139206280); + sum2 = W(0, -1120363159, -1119340063, -1119078724, 1035843986) + + W(1, 1035175754, -1106992039, -1128507307, -1129270833) + + W(2, 1029972206, 1020623843, -1137841818, -1094446255) + + W(3, 1049035968, -1128810267, -1123633215, -1123131987) + + W(4, -1135126990, 1026762268, 1064611900, 1017486399) + + W(5, -1094749382, 1039608352, -1194223384, -1197948696) + + W(6, -1111466112, -1099605665, -1102195223, 1042926586) + + W(7, 1014516782, -1112006203, -1136077530, -1129209333); + WS(-1120775200, -1094115106); + sum1 = W(0, -1117113151, -1133090109, -1096583070, -1098081804) + + W(1, -1092969217, -1119648950, -1113650835, -1105675228) + + W(2, -1112921839, -1099947345, 1050818928, 1057531117) + W(3, 1051254629, -1098660371, 926574177, 1035449878) + + W(4, 1046145098, -1097096777, 1048882552, 1067142180) + W(5, 1054137781, 1035250696, -1115652196, 1024424221) + + W(6, -1107062579, -1151313622, -1100319675, -1094480203) + + W(7, -1100195749, -1111825641, -1118408614, -1114839546); + sum2 = + W(0, -1122846232, 1002183233, -1129631292, -1112395279) + W(1, 1009610216, 1001287537, -1122731504, -1139829356) + + W(2, 1031116535, -1113707943, 988543074, -1080457005) + W(3, -1097715519, 1037716560, -1112112395, 1031806339) + + W(4, -1118531646, 1034244245, 1027624889, 1064104967) + W(5, 1052372051, 1011412016, 1031591085, -1128697528) + + W(6, 1030028661, -1116699784, 1039671963, 1037992478) + + W(7, -1115290878, 1016046512, -1122589730, 1022404309); + WS(-1083041826, -1088446577); + sum1 = + W(0, -1112868382, -1139686881, 1037998481, 1035817611) + W(1, 1028314191, -1112414119, -1123391616, -1152205450) + + W(2, -1138041180, 998832115, -1108164766, -1106005004) + W(3, 1054394898, 1042585939, 1037009680, 1027830053) + + W(4, 1041478665, 1026886463, 1018740405, -1092605991) + + W(5, -1096954228, -1105339656, -1122534560, -1128384493) + + W(6, -1123809603, 1007627162, -1137074328, 1036557383) + W(7, 1043199331, 1016960213, 949272012, -1161448845); + sum2 = + W(0, 1036010005, -1112615912, -1108103320, 1026422893) + W(1, 1043952518, -1112407780, 1025610945, -1142574731) + + W(2, -1080772886, -1080108640, 1042883784, 1067023817) + W(3, 1065911618, 1043418298, -1121396391, 1019038275) + + W(4, -1110295637, 1033469827, 991295446, -1106557392) + W(5, 1015020021, -1113416335, -1118133995, 1027619053) + + W(6, -1113551367, -1118689741, -1123719569, 1039881895) + + W(7, 1028046949, 986544491, 1028536389, -1114551687); + WS(1059656866, 1032254503); + sum1 = W(0, 1051132033, 1032901415, 1050001035, 1051046262) + W(1, 1024623477, 1041030197, -1129401004, 1040932616) + + W(2, -1087438145, -1096624519, -1093591378, -1092342973) + + W(3, 1024747921, 1042870865, -1112612011, 1037509038) + W(4, 1040121644, 1045530925, 1040228252, -1096329486) + + W(5, -1100479077, -1092498854, -1100524478, -1089145703) + + W(6, 1047684766, 1031967406, 1044069767, 1050005760) + W(7, 1045316401, 1048633756, 1041789317, 1050363772); + sum2 = + W(0, 1033723840, -1120765513, -1151198348, -1121736497) + W(1, 1020727265, -1111733852, 1010605379, -1112033268) + + W(2, 1054927280, 1004192518, 1032813206, -1097617496) + W(3, 994767756, 1024454465, -1124374145, -1107458988) + + W(4, -1094930774, 1009606707, 1018976553, -1096835022) + W(5, -1114598300, 1049726638, 1040837102, 1057385902) + + W(6, -1110893464, -1117945313, -1111742424, -1121652889) + + W(7, -1144829382, 1030389401, -1120942585, 1035752486); + WS(-1076352721, 1061176787); + sum1 = + W(0, -1115567878, 1039493528, 1037423542, 1022350014) + W(1, -1114677168, -1123591576, -1143365955, -1114839417) + + W(2, -1095051668, -1094852257, -1086623624, 1044286862) + W(3, 1057477914, 1049595083, 1036655988, 1044280124) + + W(4, 1047858891, 1040618482, 1057001770, 1036068672) + W(5, -1086915318, -1095930750, -1108386213, 1029598047) + + W(6, 1038434246, -1116442287, -1114755255, 1022609452) + + W(7, 1037805114, 1044588197, 1031697064, -1119274549); + sum2 = + W(0, 1018488840, -1109735653, -1113462230, -1112517262) + W(1, -1108529674, -1112282852, 1002563934, 1011039855) + + W(2, -1104206436, 1012246375, -1114523900, 1056346033) + + W(3, 1029007760, -1106712811, -1113168793, -1115338003) + + W(4, -1111729053, -1117505825, 1042992721, 1051780099) + W(5, 1042618199, 1035563022, 1024458488, -1112797570) + + W(6, 1004158510, -1122544930, -1111468709, -1133433375) + + W(7, -1114816198, -1136000711, -1122022613, 1037211560); + WS(1037340944, -1121222187); + sum1 = + W(0, -1116495838, -1118176747, -1096308023, -1094205341) + + W(1, -1095502171, -1115640417, -1112496425, -1105702479) + + W(2, 1040689218, -1110298712, 1052341602, 1064260668) + W(3, 1052009702, -1100646016, -1117627026, 1026863896) + + W(4, 1036502651, -1106426924, 1044624667, 1068943083) + W(5, 1045175252, 1004526500, -1108867801, -1112879076) + + W(6, -1106495021, -1119752153, -1097659059, -1089978464) + + W(7, -1099820424, -1108261518, -1110958217, -1111070831); + sum2 = W(0, 1002312827, -1134403694, 1025097519, -1105297347) + + W(1, 1007461646, -1117960999, -1123847191, -1129847887) + + W(2, 1043920878, 1045056865, -1107896626, 1060319926) + + W(3, -1100756735, 1018292031, -1136355134, -1108036266) + + W(4, -1113959010, -1120905299, 1031903433, -1100397803) + + W(5, 1042137547, -1106899263, -1119650359, 1033071013) + + W(6, -1124888335, 1019347263, -1120916959, 1028583233) + + W(7, -1119634731, -1134844462, 998264859, -1109200858); + WS(-1087648930, 1056984912); + sum1 = W(0, 1027950097, 1027762677, 1025406415, 1043195617) + W(1, 1035903278, 1031391740, 1023913741, 1033673899) + + W(2, 1048727276, 1043573475, 1051177331, -1092810048) + W(3, 1038252876, 1044809191, 1035706934, 1020597221) + + W(4, -1101873716, 1015678999, -1091649404, -1082302860) + + W(5, -1097139061, 1038200652, 1042041459, 1041744649) + W(6, -1111974664, 1024202483, 1016025318, 1050173331) + + W(7, -1156961168, 998750650, 1023377112, -1114494621); + sum2 = W(0, 1035604443, -1113891962, -1155240851, -1130359386) + + W(1, 1025386829, 1028100153, 1016774458, -1126789482) + W(2, -1109665959, 1017090586, 1033553543, 1061263788) + + W(3, -1109330568, -1106256683, -1097657183, -1118236009) + + W(4, 1027481873, -1115818981, 1023852497, 1027230949) + + W(5, 1041592808, -1113120233, -1101411950, -1121071325) + + W(6, -1121529027, 1006860245, -1124657042, 1039189933) + + W(7, -1123139111, 999905578, -1132638677, -1109638591); + WS(-1090940868, 1045539156); + sum1 = W(0, -1125495305, -1122547977, 1049435900, 1022396558) + + W(1, 1034779456, -1113477720, 1025152496, 1017461170) + + W(2, -1111842948, 1036262295, -1083395945, 1050733040) + + W(3, 1050803832, 1023164412, -1141132416, -1119150102) + W(4, 1024315123, 1017057054, 1046781659, 1016653136) + + W(5, -1084181811, 1044787970, 1025666193, 1039405571) + W(6, 1033322010, 1021788462, 1041875702, 1032259903) + + W(7, 1045300311, -1103054087, -1119238479, -1121753565); + sum2 = W(0, -1111850352, -1115334877, -1101312255, -1131783421) + + W(1, 1037660946, -1121261620, 1008969141, -1118719808) + + W(2, -1154701652, 1045060155, 1053683311, 1041887968) + + W(3, -1090960245, 1034919558, -1118627401, -1117531173) + + W(4, 1008422401, 1033609440, -1090718168, 1047082317) + W(5, 1034433310, 1036748070, -1109720341, 998924330) + + W(6, -1128983709, -1109145509, 1041457249, 1009316297) + + W(7, 1037394704, 1030040522, 1034723828, -1122112519); + WS(1058382658, -1101786424); + sum1 = W(0, -1110270554, -1139041509, -1106470550, -1114587164) + + W(1, 1023988544, 1031022672, 1028924449, 1026679274) + W(2, 1020557616, 1040419632, 1039037886, -1089217895) + + W(3, -1116816379, -1098005290, 1030444000, -1105928314) + + W(4, 1042446649, -1110579152, 1052788411, -1102603214) + W(5, 1052497916, 1048850952, 1041387232, 1042985742) + + W(6, 1010032145, 1021044303, -1107378131, -1101971332) + + W(7, -1110520967, -1136091053, -1142630143, 1036784631); + sum2 = W(0, -1122331173, -1116694246, -1133055086, -1106162040) + + W(1, 1033705060, -1119866494, 1019582103, -1119480360) + W(2, 1021048143, -1117334358, 985385246, 1058597177) + + W(3, -1096472079, 1040447045, -1107345628, 1011504527) + + W(4, -1121426033, 1034622720, -1111822939, 1062397265) + + W(5, 1007125953, -1124693001, 1025700470, 1026572561) + + W(6, -1141905806, -1109719947, -1111398337, -1092732322) + + W(7, -1104483430, -1148551778, -1142669219, -1124513225); + WS(1062970978, -1081266569); + sum1 = W(0, -1102316771, 1026249566, -1104248914, 1037424006) + + W(1, 1035596338, -1122189220, 1016727466, 1006845944) + + W(2, 1049067654, 1049251698, -1100292756, -1086541140) + + W(3, -1092317470, -1101422105, -1130063849, -1103019359) + + W(4, 1040583110, 1039432874, 1040278772, -1096033842) + W(5, 1045287213, 1049598341, 1043296141, 1042202275) + + W(6, 986432279, 1031488060, 1034368766, 1046796153) + W(7, 1039493185, 1029733120, -1132489681, 1032483937); + sum2 = + W(0, -1108138297, 1024987284, -1107013954, 1038311792) + W(1, -1140508767, 999045582, 1010876103, -1139420079) + + W(2, -1092863213, -1095439435, 1054820060, 991870971) + W(3, 1019051815, 1003248078, -1122507350, -1120196608) + + W(4, -1134447463, -1107350452, -1104627754, 1058790229) + + W(5, 1049499199, -1113643053, 1040683258, -1107048418) + W(6, 1020893591, 1030342296, 1023656240, -1145193326) + + W(7, -1111721419, -1124168067, -1111078930, 1036851190); + WS(-1090709444, 1049440503); + sum1 = W(0, -1112814366, 1009000949, -1102384190, -1098104133) + + W(1, -1100420271, -1117987732, -1108681738, -1118237822) + + W(2, 1025170444, -1102968365, 1054801672, 1055216649) + W(3, 1007027006, 994631719, -1122456724, 1037835469) + + W(4, 1021129071, -1107658202, -1108358704, 1055155742) + + W(5, 1062491877, -1115514619, 1032182427, -1137063726) + + W(6, -1116166456, 1013557870, -1112664461, -1102466049) + + W(7, -1093799886, -1139587454, -1112800010, -1119596065); + sum2 = W(0, 1010269627, 1011116525, 1031226256, 1023655690) + + W(1, -1117593255, -1139919503, -1131886173, -1132337311) + + W(2, -1125700925, 1031004177, 991734042, 1056735713) + W(3, 1023598587, -1116020697, 1030692092, 1014084462) + + W(4, -1129465449, -1111184590, -1090395945, 1007034923) + + W(5, -1114530604, 1044588818, -1118660887, 1027760305) + + W(6, -1124401971, -1139731999, -1123603568, -1106780773) + + W(7, 1032362667, 1019563835, 1016903156, 1003383541); + WS(1052605444, -1096548047); + sum1 = + W(0, -1135269079, 1014261129, 1035728362, 1017149121) + W(1, 1022065234, -1106055602, -1122459306, -1120275059) + + W(2, -1099989408, -1107118442, -1095069609, 1004173259) + + W(3, -1102684265, 1026600311, 1029280393, -1114974728) + W(4, 1035141158, 1022832721, 1036930738, 1042230040) + + W(5, 1051854441, -1123731628, 1039321653, -1108029264) + W(6, -1157809579, 1034237928, 1014945855, 1022065263) + + W(7, -1149699425, 1033708682, -1120230985, 1040542518); + sum2 = + W(0, -1139099893, -1102121527, -1108414487, 1039987934) + W(1, 1045186917, -1096991255, 1033745090, -1102163660) + + W(2, -1078862437, -1076350607, 1043522425, 1067956735) + W(3, 1072447081, 1053122067, -1111637970, 1040872955) + + W(4, -1097588090, 998819801, -1111980043, -1109442889) + W(5, 1044036972, -1115256553, 1027886256, 1044251533) + + W(6, -1129576496, 1024729710, -1134118593, 1032654822) + + W(7, -1118463242, -1124332658, -1123595298, -1115579557); + WS(1054231108, 1025279114); + sum1 = W(0, 1020527399, 1020186806, 1026191493, 1046988234) + W(1, -1147857037, 1041704764, -1120557618, 1039867950) + + W(2, -1123453065, 1039341944, -1104689967, -1092316960) + + W(3, -1122753069, -1093206832, -1107112804, -1094240131) + + W(4, -1096862627, -1119337841, 1034733383, 1008801448) + + W(5, -1108979241, 1050167121, 1027712531, 1043962418) + W(6, 1049493415, -1160786718, 1041541558, 1041661776) + + W(7, 1034215919, 1032223662, -1156220183, 1036558383); + sum2 = + W(0, -1156731980, -1125155383, -1153680156, -1140765671) + W(1, 1018992688, 1030176462, 1027890172, 1018002108) + + W(2, -1121143571, 1021067288, -1101109237, -1079877582) + + W(3, -1094350155, 1058234221, 1064456596, 1058386161) + W(4, 1027150242, 1024911074, -1096825521, -1088644111) + + W(5, -1113080073, 1052444813, 1024094624, 1045751986) + W(6, 1019039796, -1128517034, 1036906894, -1110310177) + + W(7, 1019841700, 1000349534, -1128055622, -1134555143); + WS(-1097731588, 1025129315); + sum1 = W(0, 1033609822, 972033083, 1036940225, 1025915127) + W(1, 1037164536, 1018117854, 1009743167, 1021572178) + + W(2, 1037092684, -1118357663, 1046113067, 1050865438) + + W(3, -1109542297, 1047810219, -1111680192, 1036701507) + + W(4, -1099469177, -1139849211, -1089626010, -1143345853) + + W(5, -1104510832, -1097851227, -1113708160, -1110047355) + + W(6, -1125480775, -1113410371, 1035911587, 1047135499) + + W(7, 1038594118, 1031924197, -1146462713, -1120871309); + sum2 = + W(0, -1124965724, -1134630348, -1138485084, -1143859545) + + W(1, -1133066340, 1030422253, -1121255807, 1003464777) + W(2, 1029789829, -1105084635, 1035852445, 1050249105) + + W(3, -1114135764, 1024721985, -1120848177, 1033535897) + W(4, -1119672690, 1036116775, 1002738585, 1057234226) + + W(5, -1115375290, -1117221657, 1007172972, -1110249095) + + W(6, -1132623148, -1123511646, -1106888257, -1097411434) + + W(7, -1109817844, 1017260494, -1118819718, 1021514610); + WS(1064258306, 1057427735); + sum1 = W(0, 1046669668, 1048078371, 1049007041, 1043467602) + W(1, 1032131185, 1040278032, 995640096, -1135631084) + + W(2, 1041288794, 1002554246, 1039217007, -1117262306) + W(3, 1041191743, 1037075392, -1121877906, 1026049048) + + W(4, -1094345727, -1107049340, -1092155570, -1100296810) + + W(5, -1101509636, -1100720075, 1015117221, 1034147956) + + W(6, 1024557366, 1026345305, -1126800432, 1017273727) + + W(7, 1027549470, -1136038899, -1134767685, -1111532367); + sum2 = W(0, -1083389635, -1081609637, -1079976349, -1090363224) + + W(1, -1100056051, -1097029007, 1028030408, -1118886369) + + W(2, 1062132206, 1066099523, 1066066300, 1067427062) + W(3, 1049064133, 996873905, -1137063332, 1035057689) + + W(4, -1122840851, 1053156114, -1105997303, -1091867230) + + W(5, -1103783968, 1048340523, -1134953960, -1104392578) + + W(6, -1127222139, 1022631188, 1027246780, -1128121946) + + W(7, 1036294091, -1115106526, 1035025213, -1117772337); + WS(-1082692450, -1121765493); + sum1 = W(0, -1112587635, -1118487719, -1105756568, -1111631170) + + W(1, 1036199927, -1106833087, -1115752014, -1109370235) + + W(2, 992292947, -1104534315, 1049230729, 1048175951) + W(3, -1089217933, 1028715132, -1121927694, 1018481703) + + W(4, 1015927970, 1038416886, -1104867982, 1051898365) + W(5, 1057590961, -1104675396, 1012516667, 1022215305) + + W(6, -1121302066, -1121296341, 1043167221, 1037146590) + + W(7, -1113442509, 1030891673, -1121397517, -1113425325); + sum2 = W(0, -1102561169, 1026098771, -1149524831, -1113975834) + + W(1, -1157274159, -1105234356, 1008573990, -1102778536) + + W(2, 1040868564, 1021710972, 1048116590, 1041914200) + W(3, 1051191259, 1039729263, 1018638481, 1035375403) + + W(4, -1126923948, 1009959824, 1037970343, 1042300553) + + W(5, 1032086087, -1097615037, -1115639834, -1115667744) + + W(6, -1123351116, -1125337962, 1024724387, -1123062396) + + W(7, 1021621986, -1105344931, -1109718449, -1105611058); + WS(1063145570, 1065948321); + sum1 = + W(0, -1113780043, -1107415145, -1103697695, -1125795137) + + W(1, -1119226457, 1032469064, -1118570362, -1148759702) + + W(2, 1042647928, 1041877314, 1043379605, -1105676766) + W(3, -1093770877, -1098882227, 1029896809, 1007275454) + + W(4, 1023153433, -1105577826, 1035095074, 1058030732) + W(5, 1049757493, 1047432094, -1106671851, -1110160160) + + W(6, -1113344071, 1034763992, -1113485016, -1107021830) + + W(7, -1116457366, 1028002580, 1033670420, 1021171309); + sum2 = + W(0, 1033443671, -1119163512, 1032648925, 1022262943) + W(1, -1121656898, -1125619769, -1116250872, 983397903) + + W(2, -1147652316, 1034562054, 1044843888, -1082830049) + W(3, 1049609982, 1025258841, 1030706970, 1030898199) + + W(4, -1122182070, -1110746439, -1104224164, -1080473697) + + W(5, 1070472721, 1045148854, -1138594966, 1015661307) + W(6, -1114870166, -1126213983, 1033622730, 1036499220) + + W(7, 1042757767, -1125695843, -1109235057, -1105016797); + WS(1047808392, -1105857653); + sum1 = W(0, 1036414223, 1038664639, 1033646416, 1045309491) + W(1, 1028560677, 1047007737, 1035626955, 1052689664) + + W(2, 1057255335, 1050583338, 1052085724, -1112577010) + + W(3, 1049269098, -1101673225, 1020504170, -1098667351) + + W(4, -1089047968, -1095111703, -1090242223, -1088425805) + + W(5, -1097592171, 1024927375, 988289723, -1153489085) + W(6, 1019549874, 1027946812, 1014185453, 1038762993) + + W(7, 1038356881, -1120878336, 1031636916, -1106190208); + sum2 = W(0, 1050181400, -1114267734, 1015251419, -1097894374) + + W(1, 1014394222, -1098468098, 1043075035, -1092845444) + + W(2, -1084160527, 1035785784, -1094891239, 1060354212) + W(3, 1045924604, 1054874631, 1042828743, 1063378281) + + W(4, -1088778570, -1097051496, -1096397593, 1063793540) + + W(5, -1132415773, -1115217527, -1098450150, 1051615858) + + W(6, 1059321043, -1113170400, 1035392846, -1098458292) + + W(7, 1030978836, -1096505073, 1042772598, -1088082125); + WS(-1073783985, -1080098540); + sum1 = + W(0, 1012137476, -1111733448, 1027546650, -1095391448) + W(1, -1107725963, 1030910366, 1024902311, -1136391487) + + W(2, 1004414705, -1125334283, -1098951217, 1058781709) + W(3, 1056582836, 1054585131, -1118558951, 1012935090) + + W(4, 994170013, 1040796537, 1045494048, 1038631039) + W(5, -1099332954, -1102018612, -1106829042, -1113109295) + + W(6, -1135747626, -1106294475, -1105932043, -1097893467) + + W(7, 1027155091, 1001385357, -1141795121, -1118975220); + sum2 = W(0, -1136577003, -1123969951, -1126970134, 1039844510) + + W(1, 1037108170, -1182117728, 1024145219, -1107924935) + + W(2, -1161174488, -1115268047, -1111829747, -1078035036) + + W(3, 1072008544, 1038025274, 1006927579, 1013110307) + W(4, -1123274403, 1023359390, 1034323377, -1085317526) + + W(5, 1044555209, 1032404118, 1024479257, 983390936) + W(6, 1021768566, -1128609502, 1026568479, 1027511085) + + W(7, 986504280, -1139830523, -1127097094, 1022685546); + WS(-1104974728, 1024380720); + sum1 = W(0, -1117848102, 1021171896, 1039153371, 1036503965) + + W(1, 988770395, -1109841905, -1121562883, -1126175513) + + W(2, -1117883163, -1127535108, -1099610928, -1100771573) + + W(3, 1043814114, 1034691017, 1034709362, 1004793673) + W(4, 1036325571, 1018298053, 1041104624, -1101972463) + + W(5, -1133844875, -1107251369, 1025626276, -1111468989) + + W(6, -1131530510, 1031544042, 982295861, 1036843369) + W(7, 1029874576, 1031396654, -1125713989, 1034135320); + sum2 = W(0, -1125393898, 1044163006, 1045704700, -1110276988) + + W(1, -1099412627, 1044569612, -1115657155, 1037394103) + + W(2, 1071281072, 1072425553, -1104699996, -1078765241) + + W(3, -1076152216, -1100004941, 1036033265, -1111185479) + + W(4, 1044770418, -1125402454, 1018560910, -1098423872) + + W(5, -1103281302, 1028856833, -1123488110, -1117821699) + + W(6, 1011595325, 1021558362, 1010524637, -1113439186) + + W(7, -1178088596, 1019143058, -1126446200, 1026998165); + WS(1066498257, 1026989550); + sum1 = W(0, -1129927868, -1123659738, 1045121333, 1002634947) + W(1, 1036944705, 970850411, 1022709187, 1004856149) + + W(2, 1023571545, 1024686464, -1084187390, 1040895252) + W(3, 1040663099, -1115004776, 1024409057, 1001199419) + + W(4, 1016605513, -1143882757, 1042249043, 1054068239) + + W(5, -1089079196, -1120675276, -1112524513, -1120648472) + + W(6, -1133253141, 1025371048, 1033458708, 1032849928) + + W(7, 1049903454, 1024612748, 1017685954, -1117202350); + sum2 = W(0, 1038138620, -1155040203, 1049419982, -1094597030) + + W(1, 1022703337, -1117198782, -1132959015, -1121486321) + + W(2, -1106434423, 1038340354, -1087069176, 1064410985) + + W(3, -1110711879, -1112639315, 1026273617, -1123727669) + + W(4, 1014840059, -1105689565, -1102609737, 1043321526) + W(5, 1043390477, 1042536365, 1036470844, 1009362023) + + W(6, -1117610677, 1038431916, -1115449783, -1117967352) + + W(7, -1107988055, -1112813855, -1122022929, 1031915394); + WS(1059170114, -1115069681); + sum1 = W(0, 1041126528, 1031327889, -1137442701, 1027929903) + W(1, 1016336432, 1045677858, 1027533272, 1040648584) + + W(2, 1039057944, 1043197758, 1057495242, 1035662548) + W(3, 1036387882, 1043295874, 1020376880, 1035357519) + + W(4, -1114418030, 1035998588, -1086197480, -1092661254) + + W(5, -1098594899, -1097533773, -1116322521, -1106979748) + + W(6, -1125005432, -1184440086, -1107377807, -1131407372) + + W(7, 1031309989, 1017102598, 1036182828, -1128058314); + sum2 = W(0, 1029223181, 1035999136, 1032743888, 1050206678) + W(1, 1030968394, 969652021, 1018474295, -1138117380) + + W(2, -1099457044, -1123134927, -1106245137, -1115721168) + + W(3, 1035536761, -1109098872, -1136790748, -1105152866) + + W(4, -1094833228, -1107153986, 1052844759, 1029163621) + + W(5, 1046847142, -1111905268, -1116305850, -1102858563) + + W(6, 1022472216, -1129014496, 1034397600, 1043654862) + + W(7, -1114537747, -1149928759, 968266517, 1033858063); + WS(-1091215044, -1097536449); + sum1 = W(0, 1036334229, 1027816414, 1046343318, 1054850406) + W(1, 1041878433, 1040414769, 1017820267, 1047750528) + + W(2, -1088688205, -1110004986, -1088611549, -1085760696) + + W(3, -1113122464, -1115511970, -1113420363, 1034437006) + + W(4, 1028067640, 1025245044, -1104070412, -1089807987) + W(5, 990395721, 1024229024, 1028468150, -1097347774) + + W(6, 1049627927, 1039037582, 1048707955, 1052669472) + W(7, 1043177720, 1039929856, 1044644789, 1048578060); + sum2 = W(0, -1114440831, 1030624098, 988636086, -1132718391) + W(1, 1030310950, 1016098323, 1024855622, 1018716851) + + W(2, 1036847317, 1042295753, 1050034374, 1031424194) + + W(3, -1119733782, -1120554286, -1114648667, 1035432255) + + W(4, 1041890716, -1115362087, -1113867927, 1016671019) + + W(5, -1130826523, -1097953797, -1146049358, -1096082463) + + W(6, 1027069126, 1021176355, 1029167466, 1028661170) + + W(7, -1183083952, -1108500823, 1027651154, -1106095247); + WS(-1074893425, -1117971628); + sum1 = W(0, -1094367290, -1152038187, -1103630563, -1098786216) + + W(1, 1037656965, -1114777697, 1025099466, -1124130662) + W(2, 1052023128, 1049322869, -1111356283, 999054287) + + W(3, -1106664801, -1093651375, 1040611874, -1094570594) + + W(4, 1042452051, -1097322793, 1049675109, 1040732760) + W(5, 1040645714, 1057236855, 1040783587, 1055134184) + + W(6, -1107223211, 1034136520, -1112379016, -1102728254) + + W(7, 1016544493, -1125564586, -1105965314, 1008351835); + sum2 = + W(0, 1004001863, 1021526755, 1018968138, -1116175346) + W(1, -1139174407, 1031843333, -1131104448, 1009509771) + + W(2, -1116735450, 1032892007, -1096890588, -1099974349) + + W(3, -1103015201, 1040986630, 1035109709, 1041412407) + + W(4, -1111481570, 1024548251, -1073615974, -1071438395) + + W(5, -1146527383, 1076155388, 1074182350, 1041860718) + W(6, 1033584975, -1130001464, 1026017190, -1121576034) + + W(7, -1164624762, 1024580999, 1043256283, -1106441216); + WS(1040475912, 1032695578); + sum1 = W(0, -1124525022, -1155851319, 1035372099, -1122500130) + + W(1, -1172701842, 1033773669, 1014757675, -1116930196) + + W(2, -1112011377, -1123298122, -1091278169, 1056532699) + + W(3, 1054708857, -1105035590, 1007501482, 1026134833) + W(4, 1035534023, -1102944834, 1055301749, 1041895809) + + W(5, -1087252087, 1039100863, -1123874252, 1036316385) + + W(6, -1136466746, 1035920825, -1130626274, -1104812886) + + W(7, 1028924087, -1111589035, -1142366545, -1110262076); + sum2 = W(0, -1109091355, -1119792870, -1111337413, -1102054262) + + W(1, -1118550690, -1113960445, -1142500624, -1109065069) + + W(2, 1041703749, -1123886114, 1026232126, 1049335147) + + W(3, -1114718085, -1111978783, -1124340572, 1020384670) + + W(4, -1120034208, -1123320808, 1045621744, 1063803841) + + W(5, -1088756378, 1016064880, 1026268254, 1030310973) + W(6, 1010537508, 1026411366, 1015836410, -1098413574) + + W(7, 1041947548, -1123461018, 999681904, -1122952578); + WS(1060902754, 1050355625); + sum1 = W(0, -1156790206, -1151083459, 1011568442, 1023922682) + + W(1, 1024500211, -1112644265, 1033389138, -1104847739) + + W(2, -1099711167, -1104333225, -1101534272, -1082482317) + + W(3, -1107281952, 1053813598, 1025001795, -1116841801) + + W(4, 1048936605, 1047935457, 1041411896, -1103038759) + W(5, 1049946264, 1036831245, 1037700978, 1030797274) + + W(6, 975051222, -1120760458, 1040804594, 1043235660) + W(7, 998453633, 1030084558, 1007099659, 1035779080); + sum2 = W(0, 1036801640, -1120633215, 1043653368, 1045612039) + + W(1, -1100846658, 976462574, -1115109418, -1097687290) + + W(2, -1106066969, 1029646415, -1102845552, 1020964885) + + W(3, 1041841354, -1088569123, -1101549413, 1054633350) + + W(4, -1098529900, -1094622159, 1045091011, 1062435502) + + W(5, -1104869009, 1054359824, -1101036113, 1044975639) + + W(6, 1051617655, -1128436081, -1108268977, -1120809462) + + W(7, -1127923721, -1115259155, 1041018201, -1102739129); + WS(-1090140642, -1081743990); + sum1 = W(0, 1014408182, -1130293844, 1031847606, 1042594418) + W(1, 1041618980, 1035968392, 1009499077, 1033368909) + + W(2, 1037549524, 1009886528, 1044394635, 1037202321) + W(3, -1117833212, 1037634256, -1122700465, 1015596142) + + W(4, -1119977588, 1031805064, -1097431626, -1094879521) + + W(5, -1119842097, -1101547735, -1107123493, -1105308242) + + W(6, -1148996265, -1107483388, 1015837167, 1035016080) + + W(7, 1032499967, 1034663662, 1017023585, 1030806526); + sum2 = W(0, -1123785792, 1006912162, -1091554339, -1071648513) + + W(1, -1078529007, -1106043455, -1108743589, -1113325201) + + W(2, 1013646496, -1120892227, 1057715626, 1076150687) + W(3, 1069469884, 1049558148, 1035924910, 1038760139) + + W(4, -1105735433, 1034054467, -1103626228, -1110563164) + + W(5, 1025732222, -1102425071, 1028146935, 1001130061) + W(6, 1032589029, -1113324539, 1043157583, 1029343874) + + W(7, -1120727508, 1029688242, -1120545616, -1123660578); + WS(1055430148, -1108560690); + sum1 = W(0, 1035961997, 1035029338, 1036894711, 1041930516) + W(1, 1018389269, -1113968413, 1026969083, -1119268932) + + W(2, -1122962962, -1107101862, -1105458486, -1102453987) + + W(3, 1050880022, 1050461237, 1017425654, 1049028992) + W(4, 1001804274, 1037435821, 1049324163, -1115462345) + + W(5, -1096234295, -1096034793, -1105484423, -1109268985) + + W(6, -1109508687, -1114596033, -1105968336, 1030068713) + + W(7, 1023650152, 1041307521, 1023939770, -1110794404); + sum2 = W(0, 1020921672, 1015626891, -1131154511, -1149322267) + + W(1, 1005570725, -1122484266, -1125609227, 1019773418) + + W(2, -1133145023, 1000680715, 1034643892, -1106033700) + + W(3, -1155618307, 1030350968, -1127714103, 1027949553) + W(4, 967639540, 1038163303, 1029820070, 1027175814) + + W(5, 1050725869, -1107328459, 984351189, -1113454879) + W(6, 1057583079, 1066461314, 1070593780, 1042500058) + + W(7, -1076733999, -1075365186, -1095092572, 1026351361); + WS(1048611396, -1120253991); + sum1 = + W(0, -1109906484, -1134442421, -1157090745, 1038629782) + W(1, 1019276152, 1008425488, -1121251029, -1115994974) + + W(2, -1104843595, 1020368935, -1102655324, -1096452233) + + W(3, -1094801150, -1098953977, -1125379561, -1120481658) + + W(4, 1042068462, -1116357012, -1127594975, 1048790986) + W(5, 1026936628, 1044903358, 1031973931, 1042932491) + + W(6, 1029221016, 1014414830, 1044033359, 1052393436) + W(7, 1029702657, -1137141374, -1117911220, 1022285443); + sum2 = W(0, -1129566974, -1143746400, 1036033029, -1117052801) + + W(1, -1123748463, 1038502321, -1138810328, 1043607047) + + W(2, -1111784437, 1036277891, 1028988774, -1100779042) + + W(3, 1044450885, -1129810696, 1034348699, -1108082071) + W(4, 1050813877, 1047551505, 1061486774, 1077969880) + + W(5, 1055586292, -1131992338, 1040643987, -1108034712) + + W(6, -1093517653, -1098527209, -1081514759, -1071648362) + + W(7, -1089190754, -1107152864, -1111885211, 1025200270); + WS(-1095808772, 1065647947); + sum1 = + W(0, -1096536264, -1104862109, -1101976087, -1101730853) + + W(1, -1126516760, -1106707963, -1123882077, -1108125731) + + W(2, 1055051378, 1050045579, 1051209263, 1045989771) + W(3, -1096666661, -1098537885, 1021791993, -1104774017) + + W(4, -1152511995, -1106144343, -1114400236, 1051130659) + W(5, 1054814183, 1057272194, 1047598263, 1057816237) + + W(6, -1101629224, -1139560206, -1118447811, -1101507019) + + W(7, -1103658466, -1098960762, -1106849375, -1104521538); + sum2 = W(0, -1110633145, -1114352725, -1120367533, -1113576661) + + W(1, -1147392026, -1111220567, -1123261925, -1131596567) + + W(2, 1046325680, -1134514653, 1030462840, 1052343557) + + W(3, 1028577898, -1127265311, 1023140267, -1109705515) + + W(4, -1106205960, -1145236426, -1121802017, 1052169724) + + W(5, -1126550987, 967975074, -1125641651, 1019189507) + + W(6, -1121981161, -1115188107, 1009337269, -1122467029) + + W(7, -1112707841, -1123885069, -1109514951, 1036097210); + WS(-1088656098, -1093593990); + sum1 = W(0, 1009988669, 1006243392, -1118924498, 1040203342) + W(1, 1030956655, 1015251645, 1032654584, 1034196708) + + W(2, 1053407451, 1046462824, 1054487499, 1043824044) + + W(3, -1107509853, -1099069067, -1114843936, -1106663601) + + W(4, -1093246956, -1100605512, -1092703459, -1110184012) + + W(5, 1041099456, 1036164518, 1040232112, 1035276411) + W(6, 1009810317, 1021367180, 1031892963, -1115326423) + + W(7, 1009525296, -1102894371, -1121673919, -1120762380); + sum2 = + W(0, 1045046842, 1057379920, 1066661430, 1065556520) + W(1, -1100359290, -1079104557, -1080513286, -1090732450) + + W(2, -1111686548, 1047338924, 1044625920, -1108706635) + + W(3, 1040575208, 1048831397, -1106604517, -1118381857) + + W(4, -1120613757, 1041926325, 1010608356, -1107659276) + W(5, 1029226052, 990146073, -1122878530, -1148427089) + + W(6, -1123157156, -1122209414, 1026048962, -1118064370) + + W(7, 1036917588, -1118722317, 999828841, -1128484108); + WS(-1103417864, 1032685901); + sum1 = W(0, -1113840082, -1101895278, -1106883347, -1112947643) + + W(1, 1045150312, -1106874992, -1122615424, -1140285221) + W(2, 999140540, 1021813182, 1050838637, 1035808888) + + W(3, -1089518824, -1112862903, -1123587264, 991482976) + + W(4, 1026362243, 1049323684, -1123649147, -1115046028) + W(5, 1051398592, 1043843226, 1032255888, 1025625239) + + W(6, 1025795300, -1104186210, 1026463622, 1015970396) + + W(7, 1028121997, -1114287989, -1148228092, -1125179546); + sum2 = W(0, 1052414494, 1041601968, 1061231009, -1094738093) + + W(1, -1090047391, -1112254382, -1124055261, -1106989530) + + W(2, -1107019340, 1032004449, -1083281727, 1048799024) + W(3, 1060484354, 1026094686, 1028491114, 1016363351) + + W(4, 1021676257, -1101889010, 1040520305, -1113266306) + + W(5, -1097572834, 1047868927, -1106439707, 1039531031) + W(6, -1118630717, 1028135044, 994543087, 1023423300) + + W(7, 1023744190, -1123297050, -1134044699, -1135541243); + WS(1062251010, -1128406975); + sum1 = W(0, -1122559590, -1117783082, -1111223497, -1097132645) + + W(1, -1118294493, 1027318465, -1122628178, -1142140116) + + W(2, -1114430084, 998905218, -1117430765, 1043264160) + + W(3, -1137238321, -1109365760, 1012147781, -1112437644) + + W(4, 1041522773, -1122146129, 1049934150, 1038499861) + W(5, 1040349396, 1038350478, -1161426505, 1022231761) + + W(6, -1200477464, 1030594936, -1117296696, -1108127936) + + W(7, -1117164622, 1008104625, 1016497826, 1030977566); + sum2 = W(0, -1117898288, -1125963325, -1090182563, -1072766634) + + W(1, -1084699977, 1030742992, -1114032290, 1021721886) + W(2, 1035048481, 1029071818, 1058090849, 1075259304) + + W(3, 1062748800, -1113008632, 1039805619, -1113709765) + + W(4, 1015521050, -1131842712, -1155119281, -1099596444) + + W(5, 1015646848, -1124623837, -1116189732, -1146515621) + + W(6, -1121110162, -1139107970, -1131029094, 1037152077) + + W(7, 1018972128, 1025197316, -1127481518, 1027813594); + WS(1065708113, -1121751611); + sum1 = + W(0, -1151170552, -1113367966, -1132161740, -1102777150) + W(1, 1008580363, 1034637855, -1122909571, 1029317996) + + W(2, 1004700116, 1033929377, 1028879143, 1048992314) + W(3, -1130586604, -1123817297, 997138991, -1119780649) + + W(4, 1029908854, -1124121476, 1021289212, -1106962671) + + W(5, -1145749780, 1010684790, -1110778290, -1119558682) + + W(6, -1203182552, -1152107167, 985230639, -1118004497) + W(7, 1024087440, 1024857134, 1018524741, 1030859440); + sum2 = W(0, 1022085973, 1007192988, 1059325611, 1078660314) + W(1, 1067996709, -1179370697, 1033764027, 1000911377) + + W(2, -1128031215, 982756722, -1087213694, -1070139299) + + W(3, -1079390657, -1159362498, -1110724756, 1028041806) + + W(4, 1012976820, -1124806095, 1030136738, -1095156098) + + W(5, -1131018297, 1019962283, 1025926144, -1121497459) + + W(6, -1176062153, 1022708360, -1120463845, 1033425170) + + W(7, -1138793506, -1124014927, 1016277465, -1138055830); + WS(1067144001, 1026092834); + sum1 = W(0, 1005864375, 1014388109, -1110877439, -1100539124) + + W(1, -1110133357, -1126225268, 1033891677, 1016160519) + W(2, 1049810486, 1020396467, 1055042333, 1042867979) + + W(3, 1031992147, 1044290490, 1034565181, 1034066560) + + W(4, -1103125415, 1040462414, -1103579165, -1096204250) + + W(5, 1048911295, -1091768605, -1114007278, -1112003369) + + W(6, -1120236446, -1132016387, -1107048544, -1116193256) + + W(7, 1010104860, 1031444773, 1030176378, 1031923300); + sum2 = W(0, -1130823435, -1108591076, -1112679806, -1095898825) + + W(1, -1107310218, 1030441654, -1128226189, 1031713190) + + W(2, -1123834023, 1045671991, -1133291790, 1058163296) + + W(3, -1100325346, -1113640367, 1016405480, -1116067001) + + W(4, 1036568073, -1106172311, -1115701009, 1057311204) + + W(5, -1097367248, 1044352434, -1106561364, 1014762708) + + W(6, -1114604843, 1024976861, 1033105292, -1122476787) + + W(7, 1035411458, -1111920852, 1031834977, -1121017007); + WS(1057420226, -1092362255); + sum1 = W(0, 1032475105, 1020346174, -1121587297, 1019256762) + W(1, -1123769862, 1033058515, 1024808486, 1030782351) + + W(2, 1044920519, -1144052776, 1052978802, 1055322687) + W(3, -1104246416, 1040777765, 1016898380, 1041983019) + + W(4, -1103664666, 1027257637, -1096869991, -1116373310) + + W(5, 1028318277, -1096789598, -1118572551, -1106323631) + + W(6, -1114304581, -1116794957, -1103967019, -1100439285) + + W(7, 1018564517, 1036089864, 1021198731, -1130998630); + sum2 = W(0, -1134184569, 1006994613, 1034914936, -1134366153) + W(1, 1033598449, 1009154259, 989883430, 1021229932) + + W(2, 1016741405, -1130764191, 1033077740, -1112456708) + + W(3, 1043198162, -1132669129, 1016038391, -1126067903) + W(4, 1020516693, 1043863088, 1060988002, 1071901111) + + W(5, 1043355315, 1018979141, 1034096544, 1022348007) + + W(6, -1102863001, -1103907912, -1084447988, -1076633620) + + W(7, -1098165823, -1118646231, -1113048166, -1118544043); + WS(-1105493768, -1082709441); + + return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); +} + +shared float inp[525]; + +#define CURRENT_PASS 2 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define temp_tex(pos) (float(texture(temp, pos).x)) +static const float2 temp_size = float2(GetInputSize().x * 1, GetInputSize().y * 2); +static const float2 temp_pt = float2(1.0 / (temp_size.x), 1.0 / (temp_size.y)); + +#define HOOKED_tex(pos) temp_tex(pos) +#define HOOKED_size temp_size +#define HOOKED_pt temp_pt + +void Pass2(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 525; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 15, y = (uint)id % 15; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (1)) + 0.5, float(group_base.y + y - (3)) + 0.5)).x; + } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[8]; + samples[0][0] = inp[local_pos + 0]; + samples[0][1] = inp[local_pos + 1]; + samples[0][2] = inp[local_pos + 2]; + samples[0][3] = inp[local_pos + 3]; + samples[1][0] = inp[local_pos + 4]; + samples[1][1] = inp[local_pos + 5]; + samples[1][2] = inp[local_pos + 6]; + samples[1][3] = inp[local_pos + 7]; + samples[2][0] = inp[local_pos + 15]; + samples[2][1] = inp[local_pos + 16]; + samples[2][2] = inp[local_pos + 17]; + samples[2][3] = inp[local_pos + 18]; + samples[3][0] = inp[local_pos + 19]; + samples[3][1] = inp[local_pos + 20]; + samples[3][2] = inp[local_pos + 21]; + samples[3][3] = inp[local_pos + 22]; + samples[4][0] = inp[local_pos + 30]; + samples[4][1] = inp[local_pos + 31]; + samples[4][2] = inp[local_pos + 32]; + samples[4][3] = inp[local_pos + 33]; + samples[5][0] = inp[local_pos + 34]; + samples[5][1] = inp[local_pos + 35]; + samples[5][2] = inp[local_pos + 36]; + samples[5][3] = inp[local_pos + 37]; + samples[6][0] = inp[local_pos + 45]; + samples[6][1] = inp[local_pos + 46]; + samples[6][2] = inp[local_pos + 47]; + samples[6][3] = inp[local_pos + 48]; + samples[7][0] = inp[local_pos + 49]; + samples[7][1] = inp[local_pos + 50]; + samples[7][2] = inp[local_pos + 51]; + samples[7][3] = inp[local_pos + 52]; + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 18]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1) + ivec2(1, 0), ret); +} diff --git a/src/Effects/NNEDI3/NNEDI3_nns64_win8x6.hlsl b/src/Effects/NNEDI3/NNEDI3_nns64_win8x6.hlsl index 75f402d84..0f9c2c370 100644 --- a/src/Effects/NNEDI3/NNEDI3_nns64_win8x6.hlsl +++ b/src/Effects/NNEDI3/NNEDI3_nns64_win8x6.hlsl @@ -1,149 +1,1472 @@ -// nnedi3-nns64-win8x6 -// 移植自 https://github.com/bjin/mpv-prescalers/blob/cc02ed95c1fe05b72bc21d41257c4c085e6e409b/compute/nnedi3-nns64-win8x6.hook -// 有半像素的偏移 +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: nnedi3.py --nns 64 --win 8x6 --use-compute-shader --use-magpie +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 - +//!VERSION 4 +//!SORT_NAME NNEDI3_064_6 //!TEXTURE Texture2D INPUT; //!SAMPLER //!FILTER POINT -SamplerState sam; +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 1 * 2 +//!HEIGHT INPUT_HEIGHT * 2 * 1 +Texture2D OUTPUT; //!SAMPLER //!FILTER LINEAR -SamplerState sam1; +SamplerState sam_INPUT_LINEAR; //!TEXTURE -//!WIDTH INPUT_WIDTH -//!HEIGHT INPUT_HEIGHT * 2 //!FORMAT R16_FLOAT -Texture2D tex1; +//!WIDTH INPUT_WIDTH * 1 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D temp; -//!COMMON +//!SAMPLER +//!FILTER POINT +SamplerState sam_temp; -#define T(x) asfloat(x) -#define W(i,w0,w1,w2,w3) dot(samples[i],float4(T(w0),T(w1),T(w2),T(w3))) -#define WS(w0,w1) sum1 = exp(sum1 * mstd2 + T(w0)); sum2 = sum2 * mstd2 + T(w1); wsum += sum1; vsum += sum1*(sum2/(1.0+abs(sum2))) +//!COMMON +#include "prescalers.hlsli" +#define LAST_PASS 2 //!PASS 1 -//!DESC double_y +//!DESC NNEDI3 (double_y, nns64, win8x6) //!IN INPUT -//!OUT tex1 -//!BLOCK_SIZE 32,16 -//!NUM_THREADS 32,8 - - -float nnedi3(float4 samples[12]) { +//!OUT temp +//!BLOCK_SIZE 32, 16 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[12]) { float sum = 0.0, sumsq = 0.0; - [unroll] - for (int i = 0; i < 12; i++) { - sum += dot(samples[i], 1.0f); + [unroll] for (int i = 0; i < 12; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); sumsq += dot(samples[i], samples[i]); } - float mstd0 = sum / 48.0; float mstd1 = sumsq / 48.0 - mstd0 * mstd0; - // 不能使用 lerp,否则结果可能为 nan - float mstd2 = mstd1 >= 1.192092896e-7 ? rsqrt(mstd1) : 0.0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); mstd1 *= mstd2; - float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = + W(0, -1120714617, 1035162146, -1110752415, -1121818163) + W(1, 1035961863, -1120878363, 1007614003, -1123901891) + + W(2, 1046176310, -1105876799, 1034124059, -1120533037) + + W(3, -1123305414, 1027364554, -1114390527, -1096860741) + + W(4, 1022953826, -1155422496, 1009908268, 1030225939) + W(5, -1110762423, 1046191054, 1023450788, -1124524780) + + W(6, -1129324172, -1116882705, 1032526991, 1043289735) + + W(7, -1114841418, -1129613106, -1121901526, 1030599214) + + W(8, -1096831458, 1052908756, -1114991488, 1013731840) + + W(9, 1022915304, -1135846624, -1117077507, 1034530771) + + W(10, -1116257712, 981289536, -1142748844, -1113117446) + + W(11, -1127767760, 1039462233, 1025047056, -1143328976); + sum2 = + W(0, -1131108965, 1043465638, 988544780, -1145266179) + W(1, 1018501507, 1021344415, 1000650931, -1113612493) + + W(2, -1124179817, -1111438835, 1003301507, -1131299129) + + W(3, 1025688791, -1106748353, -1070139293, -1110118136) + + W(4, 1026178295, -1123696315, -1116852534, 1029599173) + + W(5, -1102659167, -1097100047, 1031198431, 1012522521) + W(6, -1145056307, 1045881517, 1078104617, 1027449515) + + W(7, -1123515159, 1024975831, -1123043594, -1118230687) + + W(8, 1048052425, 1027739737, -1134523125, 1020199127) + W(9, 1018436747, 1029259867, -1126127905, -1136112041) + + W(10, 995063942, -1162666156, -1139914565, -1107504809) + + W(11, 1025133515, 1036504803, -1120319069, -1131042995); + WS(1061710334, -1113637247); + sum1 = W(0, -1121814583, 1036106897, -1109309294, -1143950978) + + W(1, 1033251382, -1118153848, -1138603692, -1122595552) + + W(2, 1048694647, -1103580007, 1032154570, -1122508490) + + W(3, -1138264845, 1018454510, 1030681928, -1097890158) + + W(4, 1019050396, 1012933309, -1140194465, 1033737112) + + W(5, -1115559686, -1120177173, 1021617155, -1120902675) + + W(6, -1131439046, -1122735662, 1044326142, 1042373458) + + W(7, -1113884080, -1152998244, -1125621862, 1024541667) + + W(8, -1097437158, 1052689231, -1107957310, 1016843898) + + W(9, 1023073058, -1122620661, -1132933761, 1034486114) + + W(10, -1116304653, -1144273714, -1129805541, -1116003130) + + W(11, -1128614666, 1037961704, 1019245786, -1155701620); + sum2 = W(0, -1162960213, -1107084454, -1126626424, 1025411499) + + W(1, -1126548940, -1127269937, 1013908203, 1023868855) + W(2, 1032930462, 1029914487, 997665611, 1018748433) + + W(3, -1113327157, 1032525526, 1076098019, 1035900190) + + W(4, -1121182801, -1158385717, 1023571595, 1048796624) + + W(5, -1132699465, -1093496445, 1038561302, 1000017309) + + W(6, -1133790043, -1104878976, -1072747140, 1026052839) + + W(7, -1146184997, -1123007653, 1024139725, 1003078933) + + W(8, -1104764998, -1109617402, -1142783113, -1154085627) + + W(9, -1130037920, -1121062518, 1019199125, 1025857863) + + W(10, -1147685381, 1000677165, 1007034787, 1034207166) + + W(11, -1123497947, -1118139493, 1017680405, 1016687665); + WS(1064800702, 1030635520); + sum1 = + W(0, 1004285524, -1128056492, 1035663048, -1110660706) + W(1, -1128734654, 1028175148, -1122860750, 1024005652) + + W(2, 1031878036, -1116831438, -1204558974, -1143857649) + W(3, -1122286551, 1035530413, 984521208, 1023270757) + + W(4, -1136886024, 1005008153, 1021316024, 1053335517) + W(5, -1100061872, -1093549817, 1042579293, 1016449422) + + W(6, -1116388951, -1139373960, 1048911707, -1104621327) + + W(7, -1148999845, -1135015336, 1022533838, -1130762670) + + W(8, 1035194194, -1149876978, -1142259845, 1010510404) + + W(9, -1125657162, -1121564617, -1131931048, -1142066105) + + W(10, -1120920449, 1018152308, 1001429301, -1129213371) + + W(11, -1141986761, 1031904066, -1111776529, 1020350678); + sum2 = + W(0, 1015139874, -1120731382, 1017539382, 1020112270) + W(1, -1126248934, 1013963196, -1150118671, -1116737087) + + W(2, 1031888972, -1112637962, 1010207212, -1138130380) + W(3, 1032711786, -1087320828, 1059502015, 1026899401) + + W(4, -1119882792, 1019091922, 1028577839, -1069693959) + + W(5, 1078824499, -1094071548, 1040940327, -1123861886) + + W(6, -1116703472, -1103666133, 1045823576, 1036639812) + + W(7, -1131804892, 1004924552, -1123435694, -1138801588) + + W(8, 1009232228, -1114441289, 1029410629, -1130443436) + W(9, -1140098908, 999422392, 1011539428, 1023067274) + + W(10, -1130083750, -1137456436, -1140019692, 1029210329) + + W(11, -1118193829, -1131442506, 1007549476, 1026106777); + WS(1058942782, 1023618692); + sum1 = W(0, -1126072821, 1041771492, -1110777188, -1110871838) + + W(1, 1036426764, -1142526755, 1011693912, 1025548876) + W(2, 1035801072, 1043278082, -1146527995, 1015953709) + + W(3, 1017195423, 1036961985, -1094081945, -1103634941) + + W(4, 1046715887, -1148709083, -1122268692, 1037536142) + + W(5, -1091452759, -1105679152, 1040286827, -1119884351) + + W(6, 1022607903, 1038780013, -1095962376, -1103558179) + W(7, 1049101574, -1122534754, 982545387, 1023558139) + + W(8, 1049096396, -1130619215, 1031851588, 1007174309) + + W(9, -1126061755, 1037537425, -1113879430, -1136949949) + + W(10, 1002869195, 1017517093, 1003763447, 1035261291) + W(11, 955835050, 1026350907, 1006549707, 986004587); + sum2 = + W(0, -1128639222, 1001638233, -1119841411, 1014385869) + W(1, 986023269, -1135260845, -1150616818, 1013509397) + + W(2, 1029376723, 1037911199, 970306708, 1013815957) + W(3, -1118932607, 1035428655, 1019940778, -1102461584) + + W(4, -1141011097, 959170344, -1125568566, 1024255335) + W(5, 1063260126, -1085129330, -1110526300, 1017551658) + + W(6, 991022546, 1031481278, 1032753418, -1110003004) + W(7, 1033244979, 1016351942, -1129687950, -1111645244) + + W(8, 1026999545, 1019019978, -1121764803, -1127392454) + W(9, 1018120650, 1028380841, -1121385219, 1021463302) + + W(10, 1025283273, 1009569613, 1006676397, -1115817479) + + W(11, -1134958989, 1017625850, -1128128990, -1138867245); + WS(1037837808, 1056377428); + sum1 = W(0, -1135573495, -1105969090, 1027945538, 1043223694) + + W(1, -1100584037, 1025656415, 1023727061, -1138454045) + + W(2, -1116105371, 1039709953, -1111027805, 1021082723) + + W(3, -1118803082, -1114548551, -1126955510, 1051232418) + + W(4, -1097887999, 1018764077, -1123500111, 1046351686) + + W(5, -1122693874, -1100833583, 1040497066, -1115071549) + + W(6, 1020600341, 1036383371, 1053007142, -1103740252) + W(7, 1024768136, 1025234525, -1111066541, 1034647805) + + W(8, 1035274060, -1101618713, -1138012047, -1116333124) + + W(9, 1008936161, 1026657308, -1156882549, 1033814037) + + W(10, -1123286772, 1016891995, -1115286797, 1033474888) + + W(11, -1122516460, -1129756781, 1010716753, -1114238799); + sum2 = W(0, 1020978875, -1100967324, 1043179705, 1026290891) + + W(1, 1013196277, -1138979633, -1121670462, -1130583533) + + W(2, -1122981488, 1032797386, -1120286641, 966672716) + + W(3, -1103298336, -1096736455, 1062358565, -1104401130) + + W(4, -1114052290, -1152672163, -1110731474, 1053186254) + + W(5, -1112891412, -1091109944, 1048200412, -1122242747) + + W(6, -1105136733, 1045958215, 1031005462, -1133705481) + + W(7, -1113847816, 1023845499, -1119790394, 1043729632) + + W(8, 1033523405, -1102472853, 1033631266, -1118586102) + + W(9, -1127100056, 1033816035, -1109547546, 1031959215) + + W(10, -1126209517, -1147194793, 999102475, 1020355333) + + W(11, -1122874410, -1122532081, 1027184816, -1127245018); + WS(1050865148, 1032626572); + sum1 = W(0, -1110436993, 1043627312, -1118552440, 1045700875) + + W(1, -1115060612, 1040430485, 1040042556, -1101476879) + + W(2, -1108049256, 1033317691, 1031191472, -1096410465) + + W(3, -1107890608, 1049906138, -1107535228, -1115480133) + + W(4, 1025393771, 1042925123, -1113356418, -1109025830) + + W(5, -1162779891, 1049713062, -1106727656, -1114330616) + + W(6, 1034032544, -1120749707, -1095271743, 1042738346) + + W(7, -1127443770, -1106585845, -1105712244, 1051482110) + + W(8, 1035225158, -1122810712, 1039553933, 1042770518) + W(9, 999321197, -1103830356, -1104074479, 1050825756) + + W(10, -1104755066, -1103219825, -1110201208, 1048543054) + + W(11, -1106449960, 1031488492, 1040395170, 1009713998); + sum2 = + W(0, 1024944996, -1111924540, -1107012648, 1057346437) + W(1, -1105023593, -1097010076, 1020193961, 1007347355) + + W(2, 1049541488, 1054940943, -1097277462, -1107952444) + + W(3, 1014966838, -1116539066, -1107447750, 1059914271) + + W(4, -1142156696, -1085761230, 1015828472, 1012341552) + + W(5, -1109770505, 1066972918, 1043445868, -1078926120) + + W(6, -1113612636, 1034050704, -1111197863, 1063973493) + + W(7, 1017043379, -1084679416, 1032934706, -1113193567) + + W(8, 1014122612, 1056333532, -1118781870, -1092348708) + W(9, -1116105943, 1042584092, 1016467631, 1045837843) + + W(10, -1113967259, -1128370702, 1021395599, -1116104243) + + W(11, -1135977693, 1043326701, -1117573943, -1108363561); + WS(1054811644, 1027249161); + sum1 = + W(0, 1040940111, -1101358582, -1113576073, 1049141881) + W(1, -1132483205, -1116754862, -1100243090, 1036221579) + + W(2, 1038054901, -1105999252, -1104538698, 1037145120) + W(3, 1043481462, 1014943347, -1097482896, 1051960356) + + W(4, 1036681666, -1118995685, -1119365790, -1109150355) + + W(5, -1106696288, 1048275427, -1123112824, -1126689554) + + W(6, -1111878381, 1036687602, 1022736646, -1118345360) + W(7, 1019000333, 1038634998, 1044780378, -1109759237) + + W(8, -1097940765, 1049145430, 1044474573, -1101292557) + + W(9, -1101638738, 1028424107, 1038478827, -1113101949) + + W(10, -1103799553, 1034438878, 1035929492, -1122643648) + + W(11, -1103674991, 1037909315, 1037239602, -1126283866); + sum2 = W(0, 1023766557, 1016554458, 1021815988, -1137435361) + W(1, -1114856427, 1032032312, 1015306072, 1020144724) + + W(2, -1103467660, 1016553974, 1044555388, -1105991725) + + W(3, -1136376403, -1116988904, 1034521268, 1060795535) + + W(4, -1088370625, -1127777881, -1113107482, 1050162106) + + W(5, -1102546407, 1075503478, -1071164689, 1035804901) + + W(6, 1038494252, -1104347057, -1103257907, 1072821349) + + W(7, -1077590614, -1120944929, -1113447013, 1003678517) + + W(8, 1025406308, -1113972415, -1112563255, 1036418439) + + W(9, 1023629411, 1033762395, -1113239746, 1031283975) + + W(10, 995356642, -1123929939, 1024784188, -1109441777) + + W(11, 1026085595, 1037416902, -1112537357, -1140753514); + WS(1055940220, -1124188157); + sum1 = + W(0, -1113999823, -1119569210, 1015781094, -1147977024) + W(1, -1117684995, -1108336232, 1015763751, 1043278990) + + W(2, -1106256755, 1046188556, 1036478965, 1006648968) + W(3, -1115168029, -1094609330, 1052603679, 1041916962) + + W(4, 1033522483, -1101485874, 1022861696, 1044340802) + + W(5, -1095339972, -1108128054, 1047885622, -1110457109) + + W(6, -1114461725, 1033154083, -1108564239, 1057380776) + + W(7, 1036635468, -1114614567, -1113117029, -1106205123) + + W(8, 1042723938, -1095928258, 1035899866, -1108232826) + W(9, 1032164308, 1021089498, -1125646960, 1041299799) + + W(10, -1139371156, 1034599135, -1121800074, -1107756257) + + W(11, -1171638077, 1027517543, -1131185034, -1115299047); + sum2 = + W(0, -1120118804, -1136369034, -1127959037, 1015341014) + W(1, 1033327798, -1113602386, 1013270421, 1027353114) + + W(2, -1129939889, 1034464346, 1041751325, -1114857235) + + W(3, -1120552641, -1125884733, -1117793827, -1117887441) + + W(4, 1049619624, -1103868602, -1117027367, 1035518448) + + W(5, -1095769137, 1043338219, 1049093640, -1097589990) + W(6, 1016254148, 1043285424, -1095274849, 1049861339) + + W(7, 1048627700, -1105817123, -1110780393, -1129166063) + + W(8, -1117623897, -1128474295, 1035712363, -1106207094) + + W(9, 1030787932, -1140216380, -1128598589, 1032330618) + + W(10, 1035384499, 1004274322, -1133117880, -1120569751) + + W(11, -1129927717, 1018695247, 1027229852, -1120123842); + WS(1057163582, 1025817537); + sum1 = + W(0, 1037167835, 1031655825, -1096271953, 1045084224) + W(1, 1033520525, 1016078798, -1121852313, 1046829204) + + W(2, -1093143228, 1046279122, -1103697416, 1030441081) + W(3, 1034559945, 1038865075, -1085198281, 1057074420) + + W(4, -1107583934, 1036548879, 1027044951, 987855837) + W(5, 1050118415, 1040167170, 1024989936, 974934139) + + W(6, 1039868489, -1105739319, 1057217669, -1086525384) + W(7, 1034776664, 1034922130, 1037598609, -1112419405) + + W(8, 1047779928, -1087515051, 1044254259, -1113468278) + W(9, 1007581568, 1015381448, 1035805671, -1098926044) + + W(10, -1134624240, 1036139674, 1031626785, 1037495244) + + W(11, 1015872616, -1124461564, -1113253902, 1017385676); + sum2 = W(0, -1136384493, 1036526786, 1028719631, -1123632092) + + W(1, -1145098603, -1122280548, 1023814631, -1099879539) + + W(2, 1030342243, 1043747658, -1102737141, -1130733945) + W(3, 1033659202, 1034332004, 1046758826, 1050427835) + + W(4, -1112406001, 1014543213, 1017810163, 1013925981) + + W(5, -1093436173, -1100447595, -1119158797, -1123106681) + + W(6, 1009967381, -1106516740, 1051870663, 1041231786) + W(7, 1034292458, 1019870319, 1017781727, -1101585218) + + W(8, 1042788701, 1045006578, -1107586767, 1013053757) + + W(9, -1118180802, -1131784625, -1114786901, 1010389989) + + W(10, -1143106923, -1119739637, -1141961139, -1112082470) + + W(11, -1113417628, -1117114477, 1032164569, 1012643781); + WS(-1081763615, -1092598780); + sum1 = W(0, -1130922677, -1114318275, -1127997567, 1028467828) + + W(1, 1023598927, -1139280241, -1122514966, 1026414532) + + W(2, -1121576984, -1127358845, 1017010415, 1002425203) + + W(3, -1136114845, 1017564438, -1088500870, 1061837768) + + W(4, -1116904946, -1123987795, 1015393433, 1021344041) + + W(5, -1098266094, 1035926493, 1037585875, -1136160989) + + W(6, -1137786397, -1095352826, 1061435536, -1095349722) + + W(7, 1040151266, -1131570741, 1023447063, -1116164091) + + W(8, -1148866211, -1114585584, 1031742785, -1116123969) + + W(9, -1142668459, -1123782681, 1039772482, -1114677716) + + W(10, 1007413957, 1020007637, -1140705115, 1012849463) + + W(11, 1024002537, -1116920883, -1128808099, -1126849079); + sum2 = + W(0, 1021958137, 1034408685, -1106091708, -1098486326) + W(1, 1028745769, 1024682325, -1117837187, 1036094407) + + W(2, -1147105428, -1110159069, 1031154711, -1116910593) + + W(3, 1026312941, 1033683959, -1094093059, -1088187103) + + W(4, -1105512708, 1039406737, -1115513274, -1124949158) + + W(5, 1061646324, 1055280585, -1100598283, -1117359799) + W(6, 1037370871, -1091894288, 1058519893, 1045036050) + + W(7, -1109806227, 1026080301, -1112242113, 1027011647) + + W(8, -1106569597, 1045721104, -1117345257, -1117139296) + + W(9, 1033686533, -1107943508, 1037199097, 1034325435) + W(10, -1144974476, 991455177, -1107052036, 1010459130) + + W(11, 1030945863, 1039418143, -1124920152, -1112397476); + WS(1049187708, 1061143407); + sum1 = W(0, 1032206028, 1046469409, 1044620591, -1086760535) + W(1, 1049164066, 1022165171, -1121811767, 1033070593) + + W(2, 1020031147, -1113096094, -1132628435, 1009114287) + + W(3, 1032297330, 1049873346, -1111788011, -1092765627) + + W(4, 1051671486, 1034535880, -1136528167, 1050639650) + + W(5, -1089881571, -1083700943, 1044844041, 1024964917) + + W(6, 1005295654, 1041319460, -1127219501, -1092202425) + W(7, 1037004161, 1011091807, 1031846890, 1041542033) + + W(8, 1033106537, 1011207027, 1026161438, 1038185039) + W(9, -1136496587, 1036329419, 991465499, 1015330121) + + W(10, -1134373563, 1018308239, 1032778794, 1043869975) + + W(11, -1112918515, 1026637361, 1019220893, 1032129132); + sum2 = + W(0, -1123557888, 1024038368, 964997605, -1123190992) + W(1, -1170950771, 1009594487, -1139662759, 1022314468) + + W(2, 1007232143, 1040909663, -1119781676, 1011035391) + W(3, -1120649660, 1016353740, -1112234110, 1035500288) + + W(4, -1122589660, -1122952928, 1021700488, -1149024430) + W(5, 1052797849, 1059925772, 1021322900, 1015425876) + + W(6, -1119215196, -1127462832, 1032636001, -1123381020) + + W(7, -1124742672, 1024335912, 1025758640, -1109226016) + + W(8, -1096670151, -1110751472, 1037872214, -1115520552) + + W(9, -1134635263, 1022909500, -1103246759, -1114698588) + + W(10, -1123038904, 1026100870, 1025396502, -1106595065) + + W(11, -1095307242, 1016687528, 1025386190, -1128653768); + WS(-1080960863, 1058419411); + sum1 = W(0, -1118700722, -1111066847, 984198859, 1043965403) + + W(1, -1108783427, 1000248987, 1032983255, -1112208894) + + W(2, -1099715572, -1113907195, -1113914551, -1124179019) + + W(3, -1122641758, -1099579692, 1049479517, 1050672334) + + W(4, -1105019770, -1128328213, 1033094082, -1090459763) + + W(5, 1062349342, 1061713555, -1095692932, 1023738862) + + W(6, -1121000958, -1107128402, -1099059374, 1059583022) + + W(7, -1099879371, -1131329315, -1133773881, -1107276892) + + W(8, 1033076198, -1121054998, -1108028092, 1027231576) + + W(9, -1141737059, -1116550064, -1125040759, 1024684126) + + W(10, 1024533736, -1113801521, -1120123854, -1103159313) + + W(11, 1037355536, 1026513898, -1110661700, 1007580489); + sum2 = + W(0, 995719700, -1129327601, -1125739202, 1029045123) + W(1, 1025141617, -1154923092, -1114687459, 1021214658) + + W(2, 1027238209, 1037994429, -1113434302, 1007115993) + W(3, 1024240180, 997717028, -1120915257, 1036441804) + + W(4, 1032546636, 1003062850, -1115935924, 978476974) + W(5, -1078687396, 1066236156, 1035362808, -1118441375) + + W(6, 1025427056, -1140454571, -1111104335, 1022342240) + W(7, 1033358736, 1017727844, -1123062148, 1010080137) + + W(8, -1128218828, 1028953671, -1166615662, -1129048983) + W(9, 999440794, 1002330866, -1147995690, 1024857232) + + W(10, -1165027863, 1019778022, -1131128122, -1152746908) + + W(11, -1128526870, -1126806088, 1024563904, 1009762473); + WS(-1082880574, -1095080656); + sum1 = + W(0, -1120399523, -1111736159, -1111698940, 1037879414) + + W(1, 1037066756, -1118524245, -1123397517, -1128373242) + + W(2, 1019423266, 1028697399, -1119567216, 1027581671) + W(3, -1135116544, 1025039907, -1092929515, 1062200098) + + W(4, -1131199446, -1139610160, -1168421181, -1127950106) + + W(5, -1098702381, 1023355236, 1027882718, -1139561152) + + W(6, -1131259136, -1098593297, 1060991762, -1094756999) + + W(7, 1038218895, 1005358744, 1037719400, -1106461210) + W(8, 1018082074, -1115312044, 1022591155, -1113962893) + + W(9, -1113348279, 1024172498, 1036669022, -1106391640) + + W(10, -1118950442, 1026868195, 1030740495, 1028249110) + + W(11, -1114016273, -1109491766, -1141634128, -1126548648); + sum2 = W(0, 1007561151, -1106890729, -1144059530, 1010626011) + + W(1, -1140206187, -1152147163, -1118820752, 1031296198) + + W(2, -1112391017, -1114880200, -1136937433, -1120281594) + + W(3, -1128452386, -1110907715, 1056169840, 1052511775) + + W(4, 1034255225, -1114711449, -1121745985, 1032462388) + + W(5, -1096446085, -1094697844, 1033067226, 993102387) + W(6, -1114861795, 1035962501, 1031556034, 1056886944) + + W(7, -1104164393, -1120009849, -1121584151, 1035033495) + + W(8, 987032615, -1129701328, 1013679559, -1122813977) + W(9, -1120079456, 982365671, -1138039959, 980377326) + + W(10, -1115367679, -1118918686, -1129117272, 1035951051) + + W(11, -1119286197, -1126702674, -1137374445, 1006646323); + WS(1058795070, 1058351276); + sum1 = + W(0, -1129226172, -1106246658, 1041614193, 1041522846) + W(1, -1106977045, -1140271486, 1032650784, -1132244111) + + W(2, 1048401911, 1041471472, 1031926461, -1130658915) + W(3, -1134152362, -1102574120, 1050141831, 1033352747) + + W(4, -1121658742, -1115542891, -1122312016, 1028257624) + + W(5, -1088102699, 1030285885, -1124591186, -1119417531) + + W(6, -1145265749, 1004724909, -1093353638, 1056599450) + + W(7, -1107883356, -1155196377, -1118796187, -1128587973) + + W(8, -1112285036, 1049138668, -1114299650, 1003429157) + + W(9, 1017891569, -1150296521, -1122892680, 1043369227) + + W(10, -1130730345, -1125742491, -1131365684, -1107598873) + + W(11, -1120116589, 1040658126, 1034850634, -1137642362); + sum2 = + W(0, 1018264792, -1109681111, 1028734812, -1104210606) + W(1, 1025579416, 1029305888, 1019878456, -1134735936) + + W(2, -1093714299, -1099909667, 1032419228, -1113499692) + + W(3, 1020074080, -1106212594, 1034660210, -1098896203) + W(4, 1010388000, 1025031188, -1143212320, 1037009054) + + W(5, 1047288883, 1060586916, 1030286292, -1130541520) + W(6, -1148803168, -1132238328, 1031452084, 1042866381) + + W(7, -1107165918, -1127563664, -1127882992, 1032289620) + + W(8, -1112028563, -1109339206, 1019446368, 1026005920) + W(9, 1017409120, 990246720, -1115169564, 1023925320) + + W(10, 989303425, -1123344268, -1131165168, 1004132864) + + W(11, 1020410832, -1112228144, 1029468412, 1009015280); + WS(1043816952, 1056206353); + sum1 = + W(0, -1132948972, -1115461859, 1034684352, 1037119642) + W(1, -1107088436, -1144087781, 1024900084, -1107172113) + + W(2, 1038250028, 1039626815, -1110088668, -1116127087) + + W(3, -1150680698, -1097009915, 1061195075, -1095598055) + + W(4, -1113187289, -1117712852, 1016358425, -1102438229) + + W(5, 1051576078, 1056604231, -1102307882, -1114968828) + + W(6, -1160444148, 1024199571, -1088975285, 1057172865) + + W(7, -1103288997, -1134624362, -1115326124, -1148961467) + + W(8, 1010078034, 1029312608, -1122873761, 1012385158) + W(9, 1019123435, -1130723763, -1107729186, 1039764276) + + W(10, 1032878393, -1114736693, -1120343575, -1144998513) + + W(11, 1022450003, -1112740858, 1034042985, 1002559709); + sum2 = + W(0, -1135809827, 1031002711, -1136033931, -1114169661) + W(1, -1107967067, 1027957130, -1140675011, 1009667595) + + W(2, 1025543601, -1124911966, -1108557845, 1004385158) + + W(3, -1137384851, -1156373420, -1123691685, 1056449903) + + W(4, -1096033675, 1015282250, -1147988326, 1010789683) + + W(5, -1105219090, 1044469394, -1118550723, -1115586301) + + W(6, 1019754904, -1122328477, -1118075063, 1046754031) + + W(7, 1020360378, -1120013003, -1172160176, -1135703979) + + W(8, -1111692677, 1034039333, 1005574774, -1130287690) + W(9, 1006265798, 1016962928, -1131889022, 1024901767) + + W(10, 1025982043, -1127946498, 990940844, -1135289651) + + W(11, 1010975355, -1111731157, 1036125487, -1169619760); + WS(1049886076, 1034318367); + sum1 = W(0, -1140544720, 1022042959, 1021784769, -1131075883) + + W(1, -1142660485, -1160888244, 1006465467, -1106141033) + + W(2, 966352080, 1032360624, 1029412697, -1121174096) + W(3, 1025588553, -1104462159, 1059646174, -1086897182) + + W(4, 1042100949, 1017231209, -1118154094, -1100197533) + + W(5, 1051735858, 1059041662, -1106256562, -1150715274) + W(6, 996302474, 1029682164, -1085821075, 1057694799) + + W(7, -1106530518, 1024907260, -1111905052, -1112348667) + + W(8, 1029991670, 1036690656, -1121375714, 1015672965) + + W(9, -1125961261, -1117824763, -1111853843, 1030592201) + + W(10, 1017125029, -1120397516, -1122979160, -1123002959) + + W(11, 1031029131, -1155480906, 1014936522, -1124207481); + sum2 = W(0, -1110846605, 1032320648, 1027180085, 1029603693) + + W(1, 1033577170, -1112673640, -1118194097, 1027617785) + + W(2, -1144235925, 1030911393, -1134544211, -1123614185) + + W(3, -1143898437, 1000132981, 1031570329, -1119293171) + + W(4, -1126616369, -1129974305, 1019764549, -1102116254) + + W(5, 1058710858, 1058397441, -1096477332, 1018853209) + W(6, 1008325379, 1026769453, 1031557517, -1104852463) + + W(7, -1103611475, 1036639048, 999139301, -1131321177) + + W(8, -1129394727, -1097515304, -1115701158, 1018401637) + + W(9, 997400075, 1008054267, -1131248001, -1123234663) + + W(10, -1119810656, 1024391909, 1024689095, -1118482095) + + W(11, -1120094485, -1106708620, -1134741251, 1017993989); + WS(1058429118, 1064863249); + sum1 = + W(0, 1021156518, 983001563, 1054264097, -1097350840) + W(1, -1175841770, 964768362, 1024394983, -1115666854) + + W(2, 1049468028, -1099725362, 1024539904, -1149024695) + W(3, 1020219616, 1039490386, 1055450440, -1115114453) + + W(4, 997016493, 1012766063, -1131178666, 1046648007) + W(5, -1104196510, -1085827188, 1043203980, 1022718592) + + W(6, 1016100640, 1017911734, 992909240, -1100930382) + W(7, 1033767351, -1136266891, 1007341951, 1043757188) + + W(8, -1104186631, -1128095056, -1119513801, 1035657141) + + W(9, 994206685, -1128564932, -1122765517, -1118903024) + W(10, -1131557236, 1016423590, 964693930, 1045299318) + + W(11, -1105268364, 1023907580, -1106015350, 1029236260); + sum2 = W(0, 1005391535, -1106696811, 1050575955, -1100156814) + + W(1, -1098256564, 1028021126, 1018096716, -1121907467) + + W(2, 1051313221, 1060662591, 1029467086, -1129063320) + W(3, -1135288360, 1026226746, 1047952814, 1068258385) + + W(4, -1142136447, -1131820748, -1121648533, 1031305614) + + W(5, -1081952512, -1074657656, 1051661425, 1008147176) + W(6, 996459166, 1032267559, 1040750694, -1095314974) + + W(7, 1035381379, -1168737402, -1130245220, -1111725659) + + W(8, 1031579134, 1036231973, -1107714686, 1020424408) + W(9, -1123582348, 1028271694, 1007524520, 1044691450) + + W(10, 1022647148, -1129848660, 1033610651, -1108648643) + + W(11, 1034169827, 1044987108, -1100160947, 1024695710); + WS(-1083443454, 998713176); + sum1 = W(0, 1032696047, 1040709994, 1040929033, -1087559501) + W(1, 1049786774, -1146095614, 1026355790, 1040668906) + + W(2, 1032459486, -1103862520, 1037090637, 1033091241) + W(3, 1024354795, 1036398699, 1012490030, -1102928053) + + W(4, 1047072725, 1024780023, 1034252472, 1051748575) + W(5, -1097753826, -1093112676, 1050721065, 1037512365) + + W(6, 1012872363, 1035129883, -1114537329, -1092883854) + W(7, 1039546228, 1024615038, 1032281789, 1044965075) + + W(8, -1100635790, -1099081393, 1011437457, 1037286498) + + W(9, 1027888345, 1031400701, -1103916602, -1104850339) + + W(10, 1009955155, 1036010231, 1016830581, 1048807512) + + W(11, -1098186970, -1112352715, -1114553850, 1033653631); + sum2 = + W(0, 1035861958, -1098289107, 1026686124, -1096172460) + W(1, -1113513064, 1019723703, -1161872500, -1119785106) + + W(2, 1049978705, -1107697817, 1027596044, -1121217732) + + W(3, -1113290777, 1033623622, -1098402027, -1091833397) + + W(4, -1098063812, 1033198374, -1137177342, 1045692473) + W(5, 1024040864, 1013412750, 1031079096, -1107709748) + + W(6, 1034613598, -1106425659, 1053770527, 1046082211) + W(7, 1012376430, 1038124498, -1103597964, -1117971136) + + W(8, -1127968019, 1057075430, -1095279992, -1112843267) + W(9, 1038756242, 1019694879, 1049090628, 1047895771) + + W(10, 1045163433, -1120386622, -1111249467, -1114046628) + + W(11, 1052585151, -1102204197, -1100444173, 1034273086); + WS(-1086369662, -1078015058); + sum1 = + W(0, 1024279387, 1042615374, 1054091094, -1095346029) + W(1, 1050416872, -1113840294, -1100066053, -1106499229) + + W(2, -1113085188, -1103658220, -1111759460, -1141254386) + + W(3, 1039589628, 1049518735, 1050614757, -1097514477) + W(4, 1048966812, 1032485602, -1104719791, -1115604002) + + W(5, -1103236040, -1100854410, -1122281110, -1113439156) + + W(6, -1113174076, -1127523972, 1047781504, -1097175852) + + W(7, 1043919349, -1119980106, -1125885710, 1049389829) + W(8, -1108066658, 1026529231, 1032565272, 1039770777) + + W(9, -1111149795, -1102519338, 1041501469, -1104719494) + + W(10, 1027138593, -1104628175, 1024511623, 1042212335) + + W(11, 1048227542, -1099302313, 1036646913, 1032389542); + sum2 = + W(0, -1092512531, -1098738343, 1063615535, 1009177475) + W(1, -1119750726, 1030476954, -1093473489, -1088925799) + + W(2, 1062426368, 1026425874, 1012050343, -1120184004) + W(3, -1081280020, 1042571962, 1067242406, 998962046) + + W(4, 1025403720, 1031881676, -1076988140, 1043337888) + + W(5, 1066811408, -1096426756, -1129159374, -1116680638) + + W(6, -1081342477, 1033193400, 1066297017, 1022780060) + W(7, 1023044380, -1135563955, -1086556968, 1010020371) + + W(8, 1060290312, 1026166068, -1115355073, 1004442726) + W(9, -1107875638, -1102154130, 1052028857, 1017583668) + + W(10, 1026819560, 946141982, -1121362077, -1126174974) + + W(11, 1037059936, -1119102823, -1127777686, 1034449190); + WS(-1129707456, 1007685382); + sum1 = W(0, -1134894751, -1098519836, 1040671079, 1044073412) + + W(1, -1109530006, -1134057701, 1044154267, 1030394337) + W(2, 1013414869, 1041040503, 1035376874, 1028314315) + + W(3, -1111052786, -1107874844, -1103276809, 1038778991) + + W(4, -1105794628, -1112561176, 1038046773, -1118678129) + + W(5, -1098881194, -1107344299, -1139987723, 992777541) + + W(6, -1192544411, -1111301144, -1110522396, 1048980909) + + W(7, -1108336721, -1112828028, -1128991721, -1115520098) + + W(8, -1107120076, 1052479600, -1112546431, -1132207293) + + W(9, 1035051017, 1015910765, 1033063921, 1049200001) + W(10, 1023468750, 999515194, -1122391156, -1103669035) + + W(11, -1111557603, 1051478546, 1018554845, -1119107128); + sum2 = + W(0, -1137824107, -1110122518, -1137081539, 1039063461) + + W(1, -1158403413, -1124656554, -1126465818, 1033612180) + + W(2, -1106894813, 1047362418, -1117814890, 1023374342) + W(3, 1005362547, -1116868027, 1045165527, 1067526191) + + W(4, -1136640937, -1115663214, -1158497445, 1039739597) + + W(5, -1106604299, 1071589227, 1044031039, -1148955153) + + W(6, -1119770196, 1037998233, -1138568822, 1061860820) + + W(7, -1133800170, -1123862185, 1021074352, -1112420682) + W(8, 1039276731, -1077533661, 1031122886, 999718675) + + W(9, -1138544392, 1025116420, -1104439342, -1073373771) + + W(10, -1106196407, -1139157324, 1034621027, -1120069279) + + W(11, -1113398206, -1094637864, 1031315442, 1025985948); + WS(1033791472, -1138498893); + sum1 = W(0, -1143657507, -1102835756, 1024249065, 1044190455) + + W(1, -1107762844, 1008291608, 1014781903, -1108646231) + + W(2, -1130492804, 1021681616, -1112711792, -1128240816) + + W(3, 1029133201, -1108649138, 1008218246, -1115040338) + + W(4, 1019895480, 1025256893, 1018917100, -1106020223) + W(5, 1054388603, 1058377694, -1123372431, 1014932868) + + W(6, 998381448, 1003669472, -1092207694, 1046387590) + W(7, -1111491159, 1007448080, 1016812674, -1113672010) + + W(8, -1132293500, 1049136345, -1123523050, 1008489644) + + W(9, -1118545247, -1114481905, -1112726231, 1017676194) + + W(10, -1123035739, -1112884042, -1137667330, -1127743919) + + W(11, -1119594899, -1119541597, 1028355707, 1001709096); + sum2 = W(0, 1014470821, -1106657768, 1034825132, 1006830039) + + W(1, -1104684796, -1124459301, -1120344268, -1106292024) + + W(2, 1032219219, -1101139832, -1114556582, 1008940380) + + W(3, -1114330800, 1049926230, 1057813788, -1102461554) + + W(4, -1111886229, 1029827978, -1106002658, 1054743689) + + W(5, 1049962194, -1103958261, 1050373872, -1133852786) + + W(6, 1024999308, -1112305858, 1051810194, -1098680718) + + W(7, 1035894797, -1109427720, -1126273593, -1122403994) + + W(8, 996027236, 1040205712, -1100520246, 1024816377) + W(9, -1127411221, -1116955850, 984340447, -1109277736) + + W(10, 1014102650, -1117160816, -1123975632, 1030827431) + + W(11, -1114551452, -1111240037, -1106040331, 1003623824); + WS(1060089726, 1074996161); + sum1 = + W(0, -1117558175, 1046443391, -1083103171, 1040714346) + W(1, 1044503809, 1023987857, 1029119241, 1039893948) + + W(2, -1093429938, 1021452133, 1030628716, 1027140771) + W(3, -1141503452, 1041448800, -1088988175, 1048946821) + + W(4, 1038933875, 1026874120, 1010789890, 1035829124) + W(5, 1032282738, 1021975771, 1038764813, 1026271340) + + W(6, 1037081758, 1041862066, 1035486030, 1040332065) + W(7, 1032949373, 1034225415, -1120312844, 1028398131) + + W(8, -1142682180, -1090323173, 1041097413, -1113760891) + + W(9, 1027150698, 1038548631, -1111879740, -1112512014) + W(10, 1029165798, 1028642719, 1021223318, 1047369209) + + W(11, -1105296983, -1095359061, 1046714577, -1134282558); + sum2 = W(0, -1130708327, -1131662151, 1027052068, -1104634511) + + W(1, 1037500532, -1117309872, -1156760442, -1160974837) + + W(2, -1119116140, -1148640061, 1028521640, -1132259207) + + W(3, 1011385503, -1122121264, 1006726095, -1101053828) + + W(4, 1041142688, -1129219511, 1005458237, -1104012424) + + W(5, 1053197196, 1050251696, -1121651972, -1148190653) + + W(6, 1015786967, 1025508020, -1150821434, 1043632028) + + W(7, -1105814552, 1005943453, -1121500080, -1106371091) + + W(8, 1032166230, -1115585403, -1117076340, -1120252400) + + W(9, 1023017975, 1032453110, -1116451706, 1032061998) + + W(10, -1120517680, 1024831312, 1008927007, -1107217673) + + W(11, -1143126685, 1045664978, -1103126409, -1126349911); + WS(-1081408895, 1057237802); + sum1 = + W(0, 1016592219, 1050393725, -1096948620, -1099121222) + W(1, 1042338077, 1026859007, -1135569700, 1025037351) + + W(2, 1021729271, -1114451665, 1033766733, 1023688209) + W(3, 1019913045, 1042967287, -1123436299, -1104931659) + + W(4, 1036822635, 994107237, -1134722237, 1042881918) + W(5, -1107250293, -1092679092, 1042265522, 1017621813) + + W(6, 1020337237, 1033060699, 1047123275, -1098053310) + W(7, 1043883210, 1033354815, 1019971573, 1038993285) + + W(8, 1037602588, -1091584672, 1042862441, -1147819555) + W(9, 1015924209, 1028206140, 1034435092, -1092984777) + + W(10, 1029491668, 1035642823, 1027236154, 1041649576) + W(11, 1044331459, -1089381821, 1042007901, 963919445); + sum2 = W(0, -1127968320, -1092240358, 1057542400, 1016463192) + + W(1, -1094079665, 1037438732, 1036537032, 1030963640) + + W(2, -1105653780, -1097383073, 1040975890, 1019305024) + + W(3, -1112917417, -1097473768, 1053316325, 1038201480) + + W(4, -1101066411, 1028884816, 1020801080, -1109029546) + W(5, 1058712288, 1052867895, 1013027424, 1010261952) + + W(6, 1019615648, -1116583352, 1043894648, -1123129572) + + W(7, -1118015492, 1027371024, -1127208552, 1028578068) + + W(8, -1093453186, -1141347136, -1117505000, -1113360113) + + W(9, 1023589392, -1113404853, 1034405836, 1006791808) + + W(10, -1111557726, 1034904608, -1121416948, 1041090246) + + W(11, -1094394128, 1037486484, 1044943944, -1104647878); + WS(-1076745215, 1064070508); + sum1 = + W(0, 1019383636, 1043437193, -1100863080, 1019299111) + W(1, 1041928012, 1018313975, -1138649581, 1030286820) + + W(2, 1033675388, 1027453406, 1031909515, -1136380470) + W(3, 1029667879, 1040316179, -1102225484, -1110289513) + + W(4, 1043406276, 1026991539, -1127653591, 1048543741) + + W(5, -1084687593, -1082029409, 1044625233, -1136351578) + W(6, 1027426151, 1029763954, -1111626052, 983973284) + + W(7, 1044339921, 1027257018, 1022926189, 1039947283) + W(8, 1026499316, 1031954367, 1039879984, 1022902273) + + W(9, 1004012708, 1025479577, -1122995300, -1118301831) + W(10, 1022733729, 1026798858, 1016119660, 1041368223) + + W(11, 1026134601, 1016553765, 1012331970, 1027440187); + sum2 = + W(0, -1131696089, 1012459087, -1112347233, 1039451269) + W(1, -1158657302, -1129595249, 1008718823, -1135045027) + + W(2, -1125420309, -1110618065, 1007247903, 999949222) + W(3, -1149188438, -1118135053, 1041969824, 1024383435) + + W(4, 1015365577, -1130814181, -1140198251, 1033739972) + W(5, 1052325325, 1052741682, 1034080133, -1140370763) + + W(6, -1129603953, -1142742214, 1029891118, -1121721953) + + W(7, -1110453303, 1020979581, 1007479839, 1015462815) + + W(8, -1120072309, -1095903036, -1162307222, -1132990667) + + W(9, -1141563590, -1126906553, 1017890385, -1105015368) + + W(10, -1122346803, 1008938691, 1018622169, -1135479163) + + W(11, 1016803437, -1098230278, 1021342643, -1137891195); + WS(-1094563452, 1051169575); + sum1 = + W(0, -1154622990, 1018130713, 1040428881, -1120902330) + W(1, -1113835211, 1019319877, 1029393378, 1035203109) + + W(2, 1032896399, 1039535746, 1021002107, 1036762426) + W(3, -1117775501, -1113234793, 1049040808, -1102863965) + + W(4, -1107942972, -1135300966, 1033996553, -1106127031) + + W(5, -1112183035, -1090463816, -1105904891, 1040237135) + + W(6, -1136342582, -1114335628, 1048012112, 961094679) + W(7, -1148767576, 1032716400, 1018491963, -1112874623) + + W(8, 1044702279, -1098092733, -1124337998, -1137189022) + W(9, 1032030382, 1023872671, 1032915973, 1033184663) + + W(10, 1006151100, 1037223880, 1015760954, -1110067034) + + W(11, 1035998904, 1029109433, -1110631596, 1013167238); + sum2 = + W(0, 1028413027, -1121092135, 1017337743, -1137632446) + W(1, 999930971, 1027578971, -1120070369, -1130520759) + + W(2, 1037177446, -1104029390, 1033133092, -1114678191) + + W(3, 1012563150, -1120753460, -1106734706, 1048794348) + + W(4, -1111694827, 1030530567, -1111535757, -1097640928) + + W(5, 1051800696, 1050145982, 1041394994, -1119592910) + W(6, 1011962278, -1098008778, 1035782812, -1114477541) + + W(7, -1118971109, 1001961835, 1009477102, 999766235) + W(8, -1106510696, 1047938812, -1127041263, 1024176823) + + W(9, -1153318262, -1122757360, 1010623246, -1111017918) + + W(10, 1020162151, -1115486877, 974823129, -1129371381) + + W(11, 1028214977, -1123769252, -1124747017, 1007999422); + WS(1057759166, -1088449289); + sum1 = + W(0, -1129635066, 1044873994, -1087389198, -1116302487) + W(1, 1048786726, 1017011581, 1025446574, 1037627635) + + W(2, -1097118359, 1022204225, 1039292653, 1021371937) + W(3, -1144519245, 1041685209, -1089578865, 1022125102) + + W(4, 1045860709, 1025212662, 1017539549, 1033910011) + W(5, -1129568803, -1105345911, 1045748274, 1031878029) + + W(6, 1026395242, 1039651930, 1010566389, -1122366800) + W(7, 1027479143, 1013486066, -1122678010, 1035038013) + + W(8, 1030050103, -1097803608, 1033498658, 1007580942) + W(9, 1025431320, 1039320958, -1110122707, -1119554634) + + W(10, -1161253492, 1028360045, 1024557433, 1038868536) + + W(11, -1112614790, -1126568107, 1010965710, 1015223035); + sum2 = + W(0, -1162281894, 1033992046, 1042856153, 1038465062) + W(1, -1093956780, 1024284423, -1124547002, -1149048922) + + W(2, 1036788056, 1046094595, -1100316514, 1035682376) + W(3, -1150111187, 988741862, -1112557119, 1060179844) + + W(4, -1090501223, -1122577303, -1135956229, -1132541637) + + W(5, -1097406101, 1049669935, 1022378118, -1113139255) + + W(6, -1145634890, -1132011310, -1121853795, -1101657128) + + W(7, 1045140163, -1130855582, 965409433, 1024739191) + W(8, -1104384368, 1042681611, -1121116299, -1118824057) + + W(9, -1123274137, 1008671821, 1032013500, -1111116605) + + W(10, 1035479824, 1006161754, 1024679583, -1129294934) + + W(11, -1147059498, 1027020519, -1131272734, -1124922046); + WS(-1086987838, -1100233980); + sum1 = + W(0, 1015308459, 1041765192, -1105415847, 1042444900) + W(1, -1111075669, 1003799483, -1122325985, 1028094431) + + W(2, -1114108890, 1041449686, -1106140165, 1034767973) + + W(3, -1116672610, 1042485054, -1092596043, 1049087917) + + W(4, -1110170951, -1144225139, 1032904116, -1118546351) + + W(5, 1046690691, -1128556841, 1034284768, 1033288361) + + W(6, -1141168915, -1118052589, 1054244345, -1089307961) + + W(7, 1026589088, 1010481109, 1033123257, -1116818498) + W(8, 1049735534, -1094503991, 1030527932, -1121578684) + + W(9, 1001375875, 992241989, 1040724504, -1104029377) + W(10, 1008796465, 1030765989, 1015833828, 1015146386) + + W(11, 1041637233, -1100869156, -1123717612, -1124977133); + sum2 = W(0, -1128145157, -1100557855, 1057648426, 1048654145) + + W(1, -1122215769, -1133425768, 1026284201, -1106810438) + + W(2, 1054522141, 1057115188, -1103409060, 1017140792) + W(3, 1012811840, 1042787919, -1098531848, 1046636853) + + W(4, 1046426252, -1118227807, -1136599052, 1044596046) + + W(5, -1073275189, -1074262745, 1035358469, -1143532160) + W(6, 992541697, 1038174685, 1061008073, 1044984510) + + W(7, 1036107748, 1016191398, 1011005728, -1104618112) + W(8, 1059653331, 1060082709, -1104639697, 981079778) + + W(9, -1127430370, -1166127490, 1035655860, 1034192337) + + W(10, -1130597644, 1006837720, 995789721, -1116633305) + + W(11, 1017274260, 1033697834, -1112828961, 1021901152); + WS(-1092446204, 989212831); + sum1 = + W(0, 1007124942, 1048125124, -1138101793, -1141178857) + W(1, -1102978326, 1034583667, -1116778591, -1131432602) + + W(2, -1106585464, 1019782926, -1115311760, 1015089582) + W(3, 1027403088, 1042237585, 1016240146, 1036610823) + + W(4, -1110971902, 1027127900, -1121715541, -1138060917) + + W(5, 1037962972, -1101006449, -1114322839, 1008360193) + W(6, 1019921952, 1027977243, 1053191424, -1094792341) + + W(7, 1031189102, 1028625507, 1020332096, -1126238792) + W(8, 1053622732, -1089064867, -1137835047, 1017386452) + + W(9, -1139218425, -1131884420, 1043276755, -1101892348) + + W(10, -1132849129, 1009465805, 1020381759, 1024321015) + + W(11, 1049124886, -1107274751, -1110951117, 1008208785); + sum2 = + W(0, -1120159352, -1088595541, 1060560783, 1061831343) + W(1, -1089452071, -1101640393, -1125980662, 1050146519) + + W(2, -1093128727, -1098317152, 1042347964, 1038452583) + + W(3, -1131578305, -1090859444, 1057304640, 1057012981) + + W(4, -1095995319, -1105624394, 1028063854, -1107336769) + + W(5, 1071289406, 1069689231, -1096815763, 1041229675) + + W(6, 1021975879, -1105868965, -1087527230, -1097221007) + + W(7, -1104502319, 1037791301, 1033612769, 1041431951) + + W(8, -1086794372, -1086042421, 1049524493, -1143180172) + + W(9, -1119579693, -1112749441, 982969010, 1033122226) + W(10, -1104150062, 1029453052, 1040487308, 1042279365) + + W(11, -1089785065, -1088997285, -1135764460, 1049121852); + WS(-1080642303, 1037515653); + sum1 = + W(0, 1014399585, -1123461796, -1115821917, 1037702505) + W(1, -1105260875, -1134503912, 1040389217, 1009459592) + + W(2, -1138501592, 1046330151, -1113097082, 1032999462) + + W(3, 1025436037, -1118449912, 1046808963, -1088063230) + + W(4, -1166545913, 1011659972, 1035628117, -1111999897) + W(5, 1046961203, 1048171302, -1112931778, 1033647146) + + W(6, 1032176658, 1042449564, -1085801589, 1046621083) + W(7, -1116953058, 1020514160, 1019714410, -1098862637) + + W(8, 1042823052, 1033619294, -1108126015, 1023868127) + W(9, 1031481267, 1015874414, 1032561937, 1017602698) + + W(10, 1033606503, 1030447848, -1129425897, -1108656118) + + W(11, 1022709518, -1172911161, -1161936349, 1015791986); + sum2 = + W(0, -1139497271, -1104932637, 1000924601, 1034980171) + W(1, -1102656208, -1154378706, 1025910769, 1043978669) + + W(2, -1116100213, 1042493673, 1038409695, 1025837919) + + W(3, -1105297083, -1105480129, -1102785010, 1044002411) + + W(4, -1098474508, -1109802618, 1037339247, 1037617885) + W(5, 1049988969, 1050036780, -1122964745, 1036616991) + + W(6, -1111547600, -1107402541, 1045831134, 1039942071) + + W(7, -1106907325, -1132149525, -1123186639, -1105522985) + + W(8, 1040977948, -1097804729, 1028039731, -1111839940) + W(9, 1029488041, 1011577705, 1036894407, 1042714488) + + W(10, -1112976714, 1040500443, -1137152287, -1102258444) + + W(11, 1025599417, -1139888185, -1112698813, -1114191023); + WS(1060124606, -1084472548); + sum1 = + W(0, 1012638700, -1112711971, 1024524352, 1032623962) + W(1, -1115629480, 1027484509, -1131934189, 1021193865) + + W(2, 1017460401, -1117271615, 1023262959, -1157409515) + + W(3, -1129347279, -1125331574, -1113956442, 1049120342) + + W(4, -1108495481, 1010910287, -1138043955, 1048955067) + W(5, -1094951982, 1015143283, 1031640934, 1021584239) + + W(6, 1007380511, -1106488069, 1048203795, -1110142007) + + W(7, -1129214986, -1144904917, 1022374575, -1115361884) + + W(8, 1005249701, 1024310560, -1136673413, 1017598403) + W(9, 996895579, -1120058426, 1026650437, -1121731240) + + W(10, 1017029987, -1145343061, 992489843, -1118477310) + + W(11, -1129456851, 1036683482, -1115998674, 1014521967); + sum2 = + W(0, -1113301049, 1041723134, -1119425470, 1012563935) + W(1, 1013366251, -1135264199, -1129519836, 1018887182) + + W(2, -1123219625, 1021107126, -1131491053, 1017265018) + + W(3, -1113144608, 1065721494, -1082046556, 1032971141) + + W(4, -1132479565, -1135106943, -1114997262, 1081282167) + + W(5, -1068115046, -1092304788, 1028331651, 927030725) + W(6, 1028189537, 1053752802, -1091841582, -1120681827) + + W(7, 1021570574, 983594844, -1144798799, 1004635839) + W(8, -1126716683, 1026059029, -1132445755, -1137356791) + + W(9, -1135227415, 1024321495, -1149813598, 992723582) + W(10, 1010781703, 959859164, -1123606042, 1032429021) + + W(11, -1137534547, -1121765421, 1024141029, -1123588459); + WS(1064307390, 997943845); + sum1 = + W(0, 1016504192, -1110031838, -1108555962, 1047561626) + W(1, -1114174882, 1004953741, 1004931163, -1124632947) + + W(2, -1129767067, 1003984285, 1017992857, 1018940039) + W(3, 1001468389, -1106796660, -1106350773, 1053522405) + + W(4, -1106382906, 1000370245, -1125536771, -1132488742) + + W(5, -1097456765, 1050670238, -1136221548, 1015934681) + + W(6, 1024701821, -1111862756, 1040312105, -1128004006) + + W(7, -1123896954, 1011062590, -1157300426, -1111316170) + + W(8, -1110123941, 1039422954, 1020844396, -1128259885) + W(9, 1008115882, -1153319146, 1030600762, 1005773541) + + W(10, 1015300557, -1153739770, 1014991280, -1116314499) + + W(11, -1109063375, 1042190650, 1024320647, -1130969505); + sum2 = W(0, 1036505224, -1100095598, 1039944480, -1110028565) + + W(1, 1016119595, 1017378303, 1018720963, -1119632397) + W(2, 1033659023, 1025734363, 1015545311, -1126149289) + + W(3, 1037554666, -1082101932, 1064933062, -1103479495) + + W(4, 1036578506, 1022196513, 1035136259, -1072130218) + W(5, 1074634461, 1012777017, 1036162128, -1118704038) + + W(6, -1131720942, -1090292986, 1057038142, -1132873326) + + W(7, -1120419990, 1020047721, 1012123245, -1114842502) + + W(8, 1046242606, -1117820012, -1115011900, 1033886326) + + W(9, 1027168757, -1108983905, 1018437263, -1117294599) + + W(10, 1004277346, -1136709697, 1024471767, -1105383800) + + W(11, 1041395450, 1033191694, -1113427773, 1024978781); + WS(1053812476, -1113586226); + sum1 = W(0, -1130399840, -1111011390, 1007197394, 1037708373) + + W(1, -1122978308, -1139049030, 1028349447, -1120416825) + + W(2, -1105796643, 1044272829, -1110423782, -1125249991) + + W(3, 1021963321, -1107941440, 1056809437, -1093131829) + + W(4, -1122097703, 1011721762, -1121520129, -1106086226) + + W(5, 1046348585, 1057081835, -1099751719, -1114497366) + + W(6, 1020769785, -1129958415, -1090756230, 1059679544) + + W(7, -1102044582, 1018253471, -1115853964, -1107644952) + + W(8, 1050300257, -1119895572, -1111488955, 1022041005) + + W(9, 1021860117, -1122155502, -1116443513, 1040260199) + + W(10, 1019054925, -1121197023, -1121122124, -1114352785) + + W(11, 1041113720, -1110926804, -1156322407, -1135672898); + sum2 = + W(0, 1015672618, 1010541588, -1115998566, 1020421847) + W(1, -1129704409, 1015113530, -1121994623, 1032866568) + + W(2, 1011616452, -1095022641, 1020628925, 1017019595) + + W(3, -1149412980, 1044676778, -1107438884, -1097106911) + + W(4, -1112769863, 1022483811, -1127744422, -1124040172) + + W(5, 1058620442, 1015017230, -1113422252, 1015061518) + W(6, 990134373, -1102290659, 1049322309, -1104275347) + + W(7, 1039268635, -1114030797, 1024671460, 1029499881) + W(8, -1104592911, 1045525693, 1026741584, -1136765715) + + W(9, -1132259460, -1120084193, 1035840939, -1119993661) + + W(10, 999745387, -1131682952, -1125579600, 1001699435) + + W(11, -1123886068, 1037165892, -1126762930, -1134246014); + WS(1059863230, -1098226968); + sum1 = + W(0, -1111154474, -1130042625, -1124092147, -1126659421) + + W(1, -1116454602, -1109234699, 1033627108, 1044463840) + W(2, -1125693093, 1045974478, 1040744732, 1026252457) + + W(3, -1111220881, -1098768819, 1043860042, 1040108899) + + W(4, -1149847241, -1103478147, 1018934285, 1036585799) + + W(5, -1094381437, -1098844542, 1043775752, -1107008620) + + W(6, -1156119577, 1041677635, 1034781361, 1051479065) + + W(7, 1046003792, -1127254569, -1108034825, -1100472134) + + W(8, 1038465479, -1096809363, -1121805723, -1105262391) + W(9, 1035708952, 1038444026, 1027036095, 1040658808) + + W(10, 1037992790, 1038964162, -1116166225, -1111999396) + + W(11, 1019243847, 1003830125, -1131289879, -1116821910); + sum2 = + W(0, 1031860762, 1008804487, 1036033743, -1130814575) + W(1, -1114717025, 1042429978, -1114215534, -1119320351) + + W(2, -1105317049, -1126605754, -1103884858, 1019583714) + W(3, 1037917300, 1028769566, 1033869945, 1029943513) + + W(4, -1097519272, 1050527129, -1123072391, -1145928741) + + W(5, -1104666827, 1058632863, -1084821476, 1057878257) + + W(6, -1121765219, -1118791492, -1103277106, 1010033985) + + W(7, -1091781337, 1047090522, 1036233577, 1035112755) + W(8, 1043081028, 1033184724, -1108906997, 1050006554) + + W(9, -1113082189, -1119166399, -1110309063, -1124066564) + + W(10, -1105454190, -1116436299, 1007761049, 1034891236) + + W(11, 1003334201, -1146667244, -1124982586, 1037673833); + WS(1053829756, -1108691549); + sum1 = + W(0, 1014621415, 1034972923, -1105089900, -1121242915) + W(1, 1024842558, -1125107735, -1117035633, 1031518451) + + W(2, -1146432572, 1011744694, -1195363583, 1024600093) + + W(3, 1017668333, 1028891034, -1111069516, -1099260407) + + W(4, 1035963185, -1111257697, -1121088251, 1043029617) + W(5, 1052288956, -1096216885, 1043236853, 1022444749) + + W(6, -1113943163, 1037608353, 1043183409, -1109446679) + + W(7, -1104376301, 1008767270, 1027915142, -1118505878) + W(8, 1040529537, 1033001703, -1110826184, 983774736) + + W(9, -1117882340, 1008280866, -1127832039, 1023717270) + + W(10, -1108863440, 1021787123, 1018110476, -1114328254) + + W(11, 1025673432, 1038283409, -1136644866, -1122279626); + sum2 = W(0, 1011466728, -1153535168, -1121363658, -1113493290) + + W(1, 1016979229, -1121975877, -1128158188, -1133074596) + + W(2, -1108077390, 1031842286, -1114077565, 1032726750) + + W(3, 993685376, -1111924694, 1048370655, -1096696142) + W(4, 1032702600, -1126878203, 1021341187, 1041916532) + + W(5, -1088803436, -1097359431, 1051606360, 1020447303) + + W(6, -1125064327, -1115375045, 1050909934, 1066069354) + + W(7, -1108957150, -1115472084, 1016637363, -1111577830) + + W(8, 1048268187, -1102058354, -1106393780, 1011253904) + + W(9, -1135616946, -1121861231, 1028734454, -1112810542) + + W(10, -1104652034, 1032484333, -1142265296, -1139232914) + + W(11, 1033829714, -1108918948, -1124153420, -1135690267); + WS(1043379192, -1131322837); + sum1 = + W(0, -1129021027, -1109595831, 1033800339, 1042153991) + W(1, -1099711038, 1026187331, -1148980502, -1141769560) + + W(2, 1040943040, 1016895818, 1029537886, 1023917269) + W(3, -1117253390, -1131540259, -1099641090, 1040857522) + + W(4, -1104535414, 999787984, 1015261926, 1038059198) + W(5, 1019360940, -1103092216, 1032384358, -1137467256) + + W(6, 1024719214, 1043151230, 1047704456, -1091529984) + W(7, 1043292971, 1021851650, -1130947836, 1040366611) + + W(8, -1120007803, -1100986498, 1031568104, -1117455240) + W(9, 974083843, 1036650848, 1032688054, -1116706574) + + W(10, 1019020464, 1021119266, 1010796022, 1032003186) + + W(11, -1135300928, -1114490317, 1026900536, -1124200964); + sum2 = + W(0, -1127801857, 1028175283, -1129802997, -1133354170) + W(1, -1129332011, 1029358411, -1143073539, 1010285366) + + W(2, 1022418743, -1111757330, 1032847739, -1119713292) + + W(3, -1106914355, 1045024365, -1109183829, 1035388549) + W(4, 1028119663, 984677997, -1114698807, -1107796176) + + W(5, 1055188047, -1099199714, 1027769667, -1126683209) + + W(6, -1129241721, -1093181393, 1060464193, -1108056884) + + W(7, 1015260775, -1117802232, 1034854605, -1094857339) + W(8, 1038631095, 1042537531, -1123722407, 1025618013) + + W(9, 1015357631, -1104001534, 1035894131, 1019867695) + + W(10, 1021694327, -1116557581, 1026781991, -1106206681) + + W(11, -1121422795, 1010545294, -1120402439, 1021478623); + WS(1049844732, -1121310639); + sum1 = + W(0, 1023733410, 1044402267, 1053233309, -1096212219) + W(1, 1051767783, -1114613442, -1099209689, -1106734859) + + W(2, -1108848000, -1103164857, -1109302031, -1129199659) + + W(3, 1040893204, 1050332331, 1049912979, -1099336615) + W(4, 1050917291, 1033787229, -1102957277, -1111686974) + + W(5, -1098618899, -1097562740, -1121562509, -1111886543) + + W(6, -1114244983, -1122456058, 1048539349, -1096335583) + + W(7, 1042792075, -1121890370, -1136674298, 1049981942) + W(8, -1111429758, 1033031043, 1039550692, 1041151474) + + W(9, -1108461358, -1102278760, 1039273110, -1102589972) + + W(10, 1008842338, -1104020956, 1021220348, 1043895987) + + W(11, 1048170180, -1101337719, 1041281742, 1031699907); + sum2 = W(0, 1051830333, 1040398919, -1088812275, -1112895824) + + W(1, 1030823820, -1120174945, 1052363768, 1056364330) + + W(2, -1088095023, -1128031249, -1126124865, 1022839272) + + W(3, 1062286287, -1105094784, -1083945825, -1110939052) + + W(4, -1131257287, -1113476028, 1068323273, -1117763636) + + W(5, -1086712736, -1122350162, 1034881723, 1031923437) + + W(6, 1062941652, -1117097644, -1085128967, -1107088013) + + W(7, 993374262, -1149114843, 1058036233, -1135317262) + + W(8, -1090471655, -1117471490, 1022005418, -1131972943) + + W(9, 1038399576, 1045469136, -1096613184, -1128774604) + + W(10, -1124473506, 1015689530, 1029358050, -1127713443) + + W(11, -1118175248, 1023889083, 1006015479, -1114795832); + WS(997080576, -1130763300); + sum1 = W(0, -1113905570, -1121320815, 1038521748, 1025325853) + + W(1, -1108459484, -1123369669, 1030153879, -1108108727) + + W(2, 1032364321, -1131460303, -1152289285, -1111143084) + + W(3, -1117230256, -1100762033, 1051702873, 1040312882) + + W(4, -1099800679, 1018134009, -1152616277, -1090117932) + + W(5, 1069923213, 1063008042, -1092045374, -1116551136) + + W(6, -1146808706, -1109500574, -1104049339, 1056019095) + + W(7, -1100849447, 1015899783, -1112646432, -1102311764) + + W(8, 1028696893, -1115439812, -1105934578, -1120248394) + + W(9, -1186209359, -1119610882, -1120830317, -1125581089) + + W(10, 1028735898, -1113190874, -1120216576, -1099135446) + + W(11, 1030578386, -1116447004, -1108771165, -1120342084); + sum2 = + W(0, -1130955153, 1025142055, 1025405091, 1041203911) + W(1, 1034798579, -1107186683, -1115062100, 1033634795) + + W(2, -1124058081, -1151569099, -1098708869, 1041850582) + + W(3, -1114768924, -1114031272, 1047530952, -1160505239) + + W(4, 1048498835, -1097808347, 1040485485, -1115074190) + + W(5, -1104495565, 1065129660, -1095909934, 1045395705) + + W(6, -1102247365, -1110799922, 1038913214, -1115442088) + + W(7, -1105078519, -1106337339, 1040365390, 1039570131) + W(8, -1102056033, 1037120419, 1024518769, 1007217475) + + W(9, -1105380703, -1112489130, -1123724877, -1145632358) + + W(10, -1109649658, -1152795787, -1134789779, 1035629875) + + W(11, -1104428707, 1035843769, -1117935125, -1124566009); + WS(-1078383103, 1059446981); + sum1 = + W(0, -1139864362, -1142609202, -1105667544, 1048881003) + W(1, -1110989208, 1019807480, 1022304576, 1008862865) + + W(2, -1100327790, 1047898440, 1029966423, -1128249884) + + W(3, -1117916615, -1109139429, -1098424968, 1053051896) + + W(4, 1023833874, -1114518074, 1007335089, 1032501274) + W(5, -1089810345, -1104044598, 1040575921, 1013296733) + + W(6, -1119884647, 982044231, 1033240873, 1051763270) + W(7, -1114053146, -1110353213, 1022280942, -1115332063) + + W(8, 1049118183, -1113330784, 1041606896, -1126749570) + W(9, 1023668405, 991223187, -1140016273, 1027424455) + + W(10, -1122699729, 1021514772, 1034312731, -1102309771) + + W(11, -1110413061, 1032557738, 1041157596, -1121516731); + sum2 = + W(0, -1143648694, 1050457027, -1090901428, -1095258955) + W(1, 1047388223, 1034828442, 1022767513, -1104882348) + + W(2, 1017677825, -1116653154, -1125154789, -1149114774) + W(3, 963325627, 1052893834, -1090554669, 1030383529) + + W(4, -1114292033, 1040557770, -1138915747, -1122824043) + + W(5, 1048412513, 1063122641, -1118207917, 1005930742) + W(6, 1040530304, 1019582897, -1101863176, -1089352859) + + W(7, 1048190601, 980397999, 1031585165, -1094902239) + W(8, -1104950416, 1057380402, -1102824630, 1033506230) + + W(9, 1008260859, -1113748748, -1120605391, -1106433792) + + W(10, 1038107916, -1115355457, -1120690652, -1090635807) + + W(11, 1056780417, 1049388154, -1104842210, -1136517107); + WS(-1104952056, -1073278929); + sum1 = W(0, -1144211169, -1099507218, 1047627264, 1043300637) + + W(1, -1098196346, 1030331294, 1031125659, -1116651974) + + W(2, -1113308557, 1037274304, -1126577137, -1122667056) + + W(3, -1121100585, -1101060075, 1054491321, -1102367022) + + W(4, -1102852173, 1026346581, -1142367854, -1105066053) + + W(5, 1039149624, 1051689271, -1106731162, -1120693369) + + W(6, 1025675017, -1131576359, -1106626380, 1044811828) + + W(7, -1111689619, 1018492983, -1114298540, -1114707952) + + W(8, 1042584197, -1128472513, -1126957444, -1140414731) + + W(9, 1036563163, -1120605819, -1128818827, 1045578618) + + W(10, 992521004, -1128018583, -1122834968, -1111900952) + + W(11, 1039018645, -1115669626, 1025441875, 997033420); + sum2 = + W(0, -1117577133, 1036324905, 1012676607, -1164734594) + W(1, 1039853902, -1110394349, -1139616023, -1124977156) + + W(2, -1115101040, 1032695755, -1116113854, 1006188424) + W(3, 997820975, -1120808104, 1041273688, 1041759226) + + W(4, 1041255405, -1120661488, 1028627780, -1109376562) + + W(5, 1049990275, -1096754642, -1106762918, 1033535767) + W(6, -1114909048, 1029259135, 1043064430, 1041324783) + + W(7, -1111929038, 1021759445, 1032747040, -1145526876) + W(8, 1039154681, -1094055114, 1025837595, 1023476809) + + W(9, -1116006677, 1022442689, 1027909669, -1098711698) + + W(10, -1130147608, -1133986843, 1030832637, 983753970) + + W(11, -1115224952, 1023440264, -1112181665, 1029342490); + WS(1057403966, -1096678293); + sum1 = W(0, 1015545167, -1133819725, 1037006758, -1110196161) + + W(1, -1111752718, 1028708905, 1013920108, -1127594402) + + W(2, -1136130453, -1123399563, 1041684833, -1119121787) + + W(3, 1030182836, 1031500952, 1042283546, -1086200264) + W(4, 1047364560, -1120221088, 991347091, -1107540753) + + W(5, 1040431359, 1060132444, -1104881213, 1006406689) + W(6, 1031028372, 1042502792, -1084295706, 1049379651) + + W(7, -1113607001, 1002061113, -1130110920, -1120427420) + + W(8, 1042737160, 1001749345, -1117219726, 1026883950) + W(9, -1137745417, 1023814022, 1019878434, 1023472436) + + W(10, 1017968636, -1124476270, -1127755335, -1113612941) + + W(11, 1042334622, -1123659840, 1008021369, 1011156129); + sum2 = + W(0, -1178786588, -1129322914, -1116170217, 1036181740) + W(1, 1016863918, -1132924585, 1027650789, -1117835702) + + W(2, -1106357388, 1050236879, -1107702296, -1127926494) + + W(3, 1004569898, -1120140062, 1012300621, 1029537087) + W(4, -1115347042, 1007741857, 1010647389, -1110565129) + + W(5, 1051050908, -1121260831, 1019718506, -1147888890) + + W(6, -1130907066, 1046200355, -1094299123, -1105419536) + + W(7, 1018471790, -1122466004, -1128412421, 1035331810) + + W(8, 1038925264, -1104188032, 1035623426, -1132432621) + + W(9, 1000455394, 1040609034, -1110352470, -1148832482) + + W(10, 1031812039, -1132484471, 1019136642, -1114775527) + + W(11, 1028674911, 1009994365, -1129304944, -1146317634); + WS(1056335484, -1129697442); + sum1 = W(0, -1118615510, 1026825265, -1102686619, 1045269336) + + W(1, 1031523962, -1140818205, -1123816285, -1162479083) + + W(2, -1175208362, -1122870505, 1025178018, -1112741005) + + W(3, 1016357093, -1123245334, -1112742573, 1056143939) + + W(4, -1115481871, 1004860403, -1126548591, -1115166368) + + W(5, 1012633705, 1022501065, -1098786498, -1121817826) + W(6, 1017679973, 993334229, 1048503365, 1044007618) + + W(7, -1107348056, -1134864917, -1125007109, 1009460392) + + W(8, -1110640192, 1043433395, -1104721331, -1142571619) + + W(9, -1130509569, -1117686884, 1010107581, 1037226473) + + W(10, -1124234681, -1120701013, -1127256452, -1110259820) + + W(11, 1008538193, 1040188728, -1112073470, 1004599859); + sum2 = W(0, -1140731697, -1126783010, 1035005186, -1110815467) + + W(1, -1144657885, -1127206230, 1018164054, -1144859189) + + W(2, 1011714049, 1039236492, -1130754383, -1146205013) + + W(3, -1116322329, -1148034885, -1112384840, -1109778149) + + W(4, -1113329105, -1113225660, 1033668632, -1111621890) + + W(5, 1057652336, 1053123628, -1107649638, 1012470213) + + W(6, -1110091010, 1034430751, -1103655808, -1113563636) + + W(7, -1112793157, -1113811608, 1006827755, -1133278289) + + W(8, 1035575897, 1009902097, -1136830370, -1126431906) + + W(9, 1024608202, -1134565598, 1007536733, 1031837967) + + W(10, -1131576514, 1007646275, 978878091, -1106732021) + + W(11, 1017810394, 1032913014, -1118754368, -1123083034); + WS(1064654654, 1035088379); + sum1 = W(0, -1123150274, -1101065371, 1052021669, 1034671630) + + W(1, -1103122974, -1124628232, 1013730923, -1104968795) + + W(2, 1043838624, -1116995686, 1025043295, -1123853974) + + W(3, -1124984926, -1104298005, 1050953841, -1125221929) + + W(4, -1102292982, -1122875259, -1143080352, -1097917367) + + W(5, 1053857683, 1053311748, -1101557747, -1129945790) + + W(6, -1115541655, -1115560637, -1098028717, 1054064059) + + W(7, -1102206963, -1115475969, -1120446591, -1112072243) + + W(8, -1118042236, 1056948345, -1099829586, 1010096420) + + W(9, 1019709882, -1121553409, -1117850604, 1047317764) + + W(10, -1123986585, -1120157778, -1123168830, -1101025669) + + W(11, 1024509393, 1051915779, -1105952782, 1010117900); + sum2 = + W(0, -1115290116, -1109338596, 1052546267, -1101682429) + W(1, -1106866549, 1027635895, 1036300391, -1120873277) + + W(2, 1031940424, 1045633640, -1121551223, 1021591180) + + W(3, -1109664288, -1133763869, 1041363997, -1094977574) + + W(4, 1008614181, -1119215823, 1023887773, -1110763968) + W(5, 1046589307, 1050150667, -1110488084, 1009099129) + + W(6, 1031338337, -1104363651, 1038758869, -1104681827) + + W(7, -1120223295, 1016354897, -1112746552, -1111086518) + + W(8, 1026186944, -1121584221, 1035823152, -1115537942) + W(9, 1029647363, 1014654409, 1011886363, 1042076972) + + W(10, -1111033402, 1012736237, -1123357025, -1113327782) + + W(11, -1114411312, 1033926660, 1046305164, -1109553196); + WS(-1088190206, -1108558078); + sum1 = W(0, -1109712467, 1048826552, -1117596347, 1045129740) + + W(1, -1145103958, 1040739958, 1034096473, -1100191472) + + W(2, -1104233509, 1021528052, 1015098342, -1095362037) + + W(3, -1111610193, 1051587686, -1112703594, -1123092480) + + W(4, 1033306871, 1043699492, -1111503908, -1114238856) + + W(5, -1118548075, 1045282957, -1105600408, -1112734841) + + W(6, 1029528067, -1124911644, -1094857227, 1041941490) + + W(7, -1116887040, -1106244021, -1109918121, 1053388852) + + W(8, 1040678692, -1121835291, 1043572363, 1044475255) + + W(9, -1130336610, -1101598429, -1102065648, 1050563972) + + W(10, -1101594283, -1100499697, -1110290409, 1049826386) + + W(11, -1108498290, 1032628503, 1041251987, 1015959598); + sum2 = W(0, -1140036524, 1033121292, 1020537037, -1094559771) + + W(1, 1042598592, 1048560917, -1122134368, -1135675176) + + W(2, -1102563608, -1090683436, 1049536131, 1043095342) + + W(3, -1124742331, 1022385651, 1017232470, -1089829937) + W(4, 1015761873, 1060516603, 1023368729, 1037868584) + + W(5, -1097085277, -1081905013, -1122385293, 1068776853) + + W(6, 1025643512, -1113410705, 1026038978, -1085580324) + + W(7, -1119606047, 1062905414, -1117935205, 1034815986) + + W(8, -1107749524, -1093433405, 1034046493, 1053234619) + + W(9, 1029315286, -1106876578, -1116418603, -1100509010) + + W(10, 1037794408, 1023262329, -1121900993, 1029051910) + + W(11, 1013163432, -1107168757, 1020109569, 1038509970); + WS(1048802172, -1118644607); + sum1 = + W(0, 1022629891, -1112271863, -1115934246, -1112205646) + W(1, 1050188943, -1125264263, 1001330921, 1008285357) + + W(2, 1007889405, -1137992085, 1025826270, -1127716075) + + W(3, -1147098739, -1106982824, -1110669107, 1044651974) + + W(4, 1026008921, -1113499645, 1023807923, 1020475895) + W(5, -1095657203, 1050363724, 1011316531, 1026946398) + + W(6, -1129374813, 958183765, -1113474437, 1050921091) + W(7, -1106529387, -1121270209, 1028532286, 1025640422) + + W(8, -1090481180, 1050665633, -1131537104, 1021652689) + + W(9, 1031375324, -1133565569, -1104303995, 1037811271) + + W(10, -1125009457, 1014192325, -1151549405, -1106868493) + + W(11, -1104624276, 1049383075, 1022847857, -1142930963); + sum2 = + W(0, 1016559128, -1089262209, 1053795811, 1058569170) + W(1, -1094475155, 1008650912, -1119933527, 1032931419) + + W(2, -1108360154, -1097574423, 1036603460, -1121755244) + + W(3, 1008526536, -1094914643, 1052999976, 1052760357) + W(4, -1106271635, 1039081818, 1034816070, -1096197918) + + W(5, 1069558608, 1058007152, 1022028102, 1004102711) + W(6, 1018959568, -1101210129, -1103281588, -1106340652) + + W(7, -1121182797, 1033515588, 1026554777, 1049415798) + W(8, -1085501184, -1101474305, 1027756295, 1032461240) + + W(9, -1112891495, -1128790619, -1098019814, -1114646508) + + W(10, -1124734105, -1118822413, 1046177388, 1043157162) + + W(11, -1088254262, -1096632714, 1015426864, 1043397723); + WS(-1085648446, -1079079370); + sum1 = W(0, 1031831473, 1036117159, 1049939273, -1094329359) + W(1, 1040633606, 1023486628, -1114079994, 1003975776) + + W(2, 1042007752, -1097850009, -1134555900, 1019647980) + + W(3, 1031367567, 1044164867, 1043312672, -1089391360) + W(4, 1041283678, 1023951387, -1118896788, 1045647319) + + W(5, -1095211959, -1087218668, 1044809099, 1014099708) + + W(6, -1123680847, 1033826253, -1122554975, -1100203021) + + W(7, 1046447947, 1017251780, 1033718927, 1041970952) + W(8, -1116359196, 1033960979, 1042238738, 1031788713) + + W(9, -1123944253, 1024952417, -1176880640, -1108529239) + + W(10, 1024390649, -1146375056, 1033541103, 1040029267) + + W(11, 1026974961, -1111794397, 996515216, 1033511473); + sum2 = W(0, 1019169584, 1042983019, -1098354632, -1099987280) + + W(1, 1048978343, -1118546716, -1120973142, 1009997472) + + W(2, -1106283039, 1032814660, -1103033329, 1022934360) + + W(3, 1033125970, -1103915398, 1033722376, -1095179878) + + W(4, 1055664562, -1110281795, 1025364316, 1036688212) + W(5, 1062322516, 1032770062, -1097173506, 1045599869) + + W(6, -1109812797, -1125420780, -1098634798, -1109050948) + + W(7, -1103688168, -1119010038, 1040314133, -1119986230) + + W(8, 1057333054, -1097734639, -1173745415, 1038965178) + + W(9, -1108282977, 1034277286, -1102308141, 1034195486) + + W(10, -1107997912, 1007528912, 1018923536, -1117630132) + + W(11, -1125368820, 1049154263, -1098308929, 1035253180); + WS(-1089355774, -1078290086); + sum1 = W(0, -1122950775, 1037182841, -1118638572, -1121667553) + + W(1, 1009124079, -1115334660, -1112691207, -1124344033) + + W(2, 1041047393, -1104403323, -1138062223, -1126199861) + + W(3, 1023290087, 1046400294, -1098349922, -1099164822) + + W(4, 1044398104, -1136525567, -1107733500, -1118583361) + + W(5, 1057481334, 1054069421, -1149431076, -1110719762) + + W(6, -1146188190, 1042531744, -1139289650, -1097191323) + + W(7, 1044515332, 1015659149, -1132821263, -1121639475) + + W(8, -1109625472, 1041114291, -1120573261, -1121643278) + + W(9, -1112282369, -1137408791, 1026873009, -1120861012) + + W(10, -1120947806, -1122889719, -1116356099, 1018516820) + + W(11, -1139389015, -1139200383, 1017011067, -1120569352); + sum2 = W(0, -1120919296, -1124567280, 1037030993, -1120629799) + + W(1, -1106639281, 1020211370, 1009843654, -1107129213) + + W(2, 1043380414, -1102119519, 1030878171, -1126825280) + + W(3, -1132368064, 1020831927, -1109288357, -1112537154) + + W(4, -1118412551, -1144175329, 1020138111, -1098918731) + + W(5, 1059483198, 1064361176, -1093377806, -1146281491) + + W(6, 1029564211, -1132836513, -1119273466, -1104145236) + + W(7, -1123848756, -1135251423, -1115205032, 1010227332) + + W(8, -1106893419, 1043150197, -1103899854, 1028327527) + + W(9, 1016741875, -1118685376, 1028452918, -1117770026) + + W(10, 1024207514, -1128215590, -1119663171, 1026001154) + + W(11, -1135273053, 1019055438, -1109882780, 1024565629); + WS(1064975294, 1066308158); + sum1 = + W(0, 1031747776, -1119071204, 984462229, -1132055971) + W(1, -1148253029, -1126913907, -1106142801, 1032019633) + + W(2, 1045090381, -1108909198, -1110036442, 1036419718) + W(3, 1028818885, 1041375482, -1093262116, 1038243096) + + W(4, 1036945928, -1121925745, 1001228109, -1112793497) + W(5, 1049965274, 1021279149, -1115536386, 1019907753) + + W(6, -1115396498, 1013499583, 1052662103, -1090377239) + W(7, 1034061985, 1032108124, 1041342669, -1123968938) + + W(8, -1113450905, 1040402543, 993049059, -1109636927) + W(9, -1108139113, 1024220311, 1035427697, -1107965954) + + W(10, -1113141447, 1032825796, 1027821620, 1020544662) + + W(11, -1138282267, -1116422289, 1016907357, -1128061041); + sum2 = + W(0, 1016610899, -1118189976, 1027283971, 1028895363) + W(1, -1113530321, 1007846553, -1135553471, 1018031354) + + W(2, -1118352328, 1032145382, -1123867563, 1011272254) + + W(3, -1163068737, -1120752887, 1042584076, 1042427003) + + W(4, -1099207121, 1010785270, -1147979120, 1035872696) + + W(5, -1095799786, 1073605475, -1075418961, 1026092591) + W(6, -1121296916, 1034250650, 1009508653, 1056095764) + + W(7, -1091570337, 1030054693, -1139692219, 1022157658) + + W(8, -1121907329, -1130051225, -1113117501, 1019567305) + + W(9, 1007282246, -1131969269, -1148404200, 1025403981) + + W(10, -1124635978, 1012575724, 1019770181, -1131641536) + + W(11, 1013257077, -1121748387, -1123610989, 1018865930); + WS(1062423998, 1020226002); + sum1 = W(0, -1122384152, -1116470612, -1095861522, 1055717600) + + W(1, -1128557498, -1133541800, 1000976853, 1015809956) + + W(2, -1125526476, 1034182342, -1136390676, 1025071548) + + W(3, -1122957973, -1115150860, -1101122465, 1052715645) + + W(4, -1101688288, -1121585121, -1140430696, -1100102424) + + W(5, 1051208664, 1053221217, -1097231798, 1026360261) + W(6, 1018079658, -1099378776, 1045734276, 1041738487) + + W(7, -1114764537, -1127628368, -1116871589, -1102397253) + + W(8, 1040938184, 1038184662, -1107259340, -1121765359) + + W(9, 1021042950, -1121061927, 1045437908, -1114085178) + + W(10, 1034631967, 1018430000, -1130305609, -1097083551) + + W(11, 1049869175, 1019939330, -1105843464, -1122412137); + sum2 = W(0, -1125184611, -1124729770, 1045115043, 1058112728) + W(1, 1023628890, 1007171579, 985313435, -1132095255) + + W(2, -1117207517, 1067077185, -1158219675, -1118062236) + + W(3, -1181860650, 1028760415, 1036693207, -1107623537) + + W(4, 1043689375, 1000645119, -1124740239, 1011065519) + + W(5, -1088457353, -1080660794, 1020427720, -1160295467) + + W(6, -1132645547, 1033849803, -1096437481, -1109418981) + + W(7, -1110185484, 1015083304, 999484231, 1015359010) + W(8, 1048821220, -1117188353, 1032363474, 1023561702) + + W(9, -1125466771, -1140486285, -1107942084, 1027196953) + + W(10, -1121893121, -1134577643, 1024683619, -1125848669) + + W(11, 1027562883, -1127975224, 1023023798, -1158549787); + WS(-1085592318, -1113086899); + sum1 = + W(0, -1127342656, -1105245475, -1104578161, 1053241687) + W(1, -1103827813, -1138537863, 1024334944, 1029597578) + + W(2, -1117644382, 1041591201, 1020278010, 1015573096) + + W(3, -1107952998, -1104926592, -1101901257, 1051692021) + + W(4, -1097952575, -1110174988, 1034498345, -1110412429) + + W(5, 1053247055, 1044816720, -1110749814, 1033881662) + W(6, -1131633306, -1103724998, 1051300307, 1042909421) + + W(7, -1106968399, -1116328992, -1110168462, -1098176238) + + W(8, 1051451939, -1124765258, -1105195378, -1113141267) + + W(9, 1033648125, -1115323711, 1049307889, -1143558503) + + W(10, 1027981559, 1028312016, -1119681052, -1098367950) + + W(11, 1053283054, -1113485404, -1102600950, -1113790704); + sum2 = + W(0, 1019423143, -1118706158, -1110500850, 982325064) + W(1, -1117883328, -1145733922, -1134200265, -1122995556) + + W(2, 1021681136, -1106847870, -1115095323, 1001371122) + + W(3, 1022335641, -1113839515, -1121978030, 1042414810) + + W(4, -1112736275, 1019850892, -1126211289, -1123774588) + + W(5, 1048787768, 1052221246, -1118959328, -1113736050) + W(6, -1160177640, 1002859666, 1035034344, 1009324191) + + W(7, 1026260852, 1023767274, 995157684, -1114460336) + W(8, 1025695056, -1129766425, -1114723897, -1130130145) + + W(9, -1138832033, -1131104601, -1126984825, -1136621129) + + W(10, 1016807320, -1131688905, -1130236057, 1027754115) + + W(11, 1027074464, -1120353368, -1121253912, 992217060); + WS(1025516512, -1100199588); + sum1 = + W(0, 1010096560, 1021891087, 1041308560, -1106994989) + W(1, -1107391304, 1032706511, 998291066, -1128181387) + + W(2, 1015332531, -1097993736, 1040752366, -1115681272) + + W(3, 1015136529, -1152435393, 1054580194, -1087955312) + W(4, 1021987303, 1013936722, 1016981531, 1035336779) + + W(5, 1043985948, 1049373383, 1016493518, -1153713033) + W(6, -1134306338, 1040424469, -1088067164, 1048585956) + + W(7, -1111549299, -1122756060, 1015217585, 1041134374) + W(8, -1097437107, 1044933000, 1015283332, 1028860515) + + W(9, 1025573847, -1147235028, -1108043523, 1037061588) + + W(10, -1125444197, -1124931799, 1027586712, -1105873140) + + W(11, 1024655979, 1041451309, 1025261651, 1010373746); + sum2 = + W(0, 1024904631, -1115115972, -1128899549, -1100416868) + W(1, 1014205090, -1138213458, -1136864762, 1023564875) + + W(2, -1103710322, -1105139728, -1112771593, 1024304323) + + W(3, 1005033941, -1114446231, -1140124434, -1107001478) + + W(4, -1105823902, 1028369943, 1030076655, -1115188268) + W(5, 1057566767, 1061216146, -1101449249, 1031401203) + + W(6, 1022202973, -1099782234, 1051976820, -1114917196) + W(7, -1115621063, -1127316949, 995105737, 1025338591) + + W(8, 1009750634, -1107279078, -1123827499, 1002907445) + + W(9, 1009257370, -1107689693, 1040613751, -1123762171) + + W(10, -1124460977, 1021828037, -1113660547, 1049272645) + + W(11, -1105716747, -1112133417, 1019919657, -1121285688); + WS(1050734204, -1108852232); + sum1 = W(0, -1111669430, -1116292712, -1122281377, 1027495958) + + W(1, -1113568120, -1123139966, 1026854009, -1106093993) + + W(2, -1103616254, -1113635568, -1125634353, -1117637392) + + W(3, -1115457556, -1104273631, 1027625969, 1042550097) + + W(4, -1100906821, -1147736996, 1022285659, -1090233269) + + W(5, 1069754213, 1065691213, -1090598793, 999398084) + + W(6, -1130768721, -1103820941, -1114752252, 1054098174) + + W(7, -1103145534, -1140005358, -1124925681, -1107176543) + + W(8, -1130883179, 1026991873, -1107767585, -1123096067) + + W(9, 988348114, -1114665965, -1144129324, 1018693905) + + W(10, 1007587914, -1120005196, -1117352760, -1103610460) + + W(11, 1032882469, 1034282506, -1112131865, -1122062653); + sum2 = + W(0, 1024214881, -1105712489, -1096328526, 1036545949) + W(1, -1114697429, 1029115040, -1122830075, 1026001227) + + W(2, -1089670730, -1104892025, 1027384734, -1127506094) + + W(3, 1024710111, -1105988647, -1109313571, 1042923133) + + W(4, -1108836817, 1018384918, -1118490155, 1044311614) + W(5, 1057951288, 1050757116, 1046225965, -1118982995) + + W(6, -1134765475, -1128591966, 1031038781, 1041007149) + W(7, -1111832237, 1000957030, 1007204875, 1016894538) + + W(8, -1135710147, -1111844715, 1020988490, 1018633070) + W(9, 955722144, -1132281374, -1121635711, 982097434) + + W(10, -1141601766, -1131312630, -1132418382, -1121103003) + + W(11, 1040031313, -1131469166, -1137288635, 1012074251); + WS(-1077332287, -1089760701); + sum1 = + W(0, 998476811, -1126986618, 1035645275, 1035996661) + W(1, -1111559939, 1002151576, -1146931846, -1106320367) + + W(2, 1033143495, 1030948981, -1116749294, -1132098378) + + W(3, 1020090684, -1103512376, 1057100532, -1098691810) + + W(4, -1114602228, 1020764052, -1115596769, -1106044643) + + W(5, 1033806067, 1054020686, -1102410839, -1131285468) + W(6, 1018970620, 1033789092, -1089559024, 1053006209) + + W(7, -1114317262, -1143682184, -1117338894, -1121544222) + + W(8, -1119052427, 1053247323, -1148875196, -1140686688) + + W(9, -1131301080, 1025908912, -1109205213, -1117960094) + + W(10, 1040862618, -1112291776, -1134004534, -1120853460) + + W(11, 1021655744, -1112997269, 1024305160, -1137942600); + sum2 = + W(0, 1006079429, 1015506585, -1129173219, -1126961905) + W(1, 1025558752, 1004493969, -1142220161, 1012377004) + + W(2, 1019086641, 1027847692, -1121396179, -1129089615) + + W(3, -1129651941, 1027444401, 1029291472, -1099113060) + W(4, 1031608500, 1032469225, -1120056293, 1027255740) + + W(5, -1087748986, -1071778912, -1132223695, -1117547929) + + W(6, 1021210606, -1154390002, 1030702430, 1077548482) + W(7, 1041292060, 1017793536, -1138049810, -1121552483) + + W(8, 1046290929, 1017608723, 1030045463, -1115202758) + W(9, 1020679942, 988683875, -1149010889, 1009874569) + + W(10, -1116313503, 1024909384, -1144760977, -1149209645) + + W(11, 958643736, -1165018915, -1108302337, -1153372770); + WS(1054407548, 983325672); + sum1 = + W(0, 1028412425, -1105600625, -1148390382, 1033571545) + W(1, 1032293767, -1124427701, -1115456470, 1033490817) + + W(2, 1044756246, -1114006910, -1119790070, 1037509878) + + W(3, 1017604747, -1115519181, -1103922376, 1059608097) + W(4, -1107101148, 1007976775, 1031921957, 1031550611) + + W(5, -1090010995, -1087252051, 1044072953, 1022727275) + + W(6, -1121692358, -1103698804, 1062123705, -1094628214) + + W(7, 1027680451, 1033962892, 1041128181, -1109550327) + W(8, -1103937189, 1042983319, 1021059262, -1108328080) + + W(9, -1131299175, 1030793272, 1039103265, -1106569209) + W(10, 1007158719, 1032055396, 1034097067, 1014014769) + + W(11, -1135652511, -1116995186, -1141553278, -1143059662); + sum2 = W(0, 1007145536, 1011835040, -1115291423, 1029973058) + + W(1, 1036345379, -1106716830, -1108956115, 1040320325) + + W(2, -1145921569, -1113530029, -1116109872, 1031627486) + + W(3, -1123468231, 1009493536, -1120694127, 1025444390) + + W(4, -1106839609, -1106619549, 1003522017, 1036429861) + W(5, 1057547857, 1039825371, 1028735446, 1032586179) + + W(6, -1106382527, 1045134298, -1120933925, -1093621604) + + W(7, 1043632212, -1135351552, 1023566518, -1132203156) + + W(8, -1105348488, 1030401418, 1032927131, -1107014793) + + W(9, -1116797301, 1044032552, -1105744806, -1111109931) + + W(10, 1031187850, 1030984886, 1016028592, -1103689691) + + W(11, 1019566780, 1039637835, -1119043970, -1117261713); + WS(1062610366, -1081620328); + sum1 = W(0, 1034909184, 1041975919, 1038564634, -1086061975) + W(1, 1054071881, -1118145809, 1022225638, 1036878493) + + W(2, 1032326503, -1097049310, 1038246806, 1033815666) + W(3, 1022593144, 1038275488, 1032775158, -1094128798) + + W(4, 1049378869, 984752392, 1034659649, 1049110721) + W(5, -1104434937, -1097332230, 1048167927, 1040915883) + + W(6, -1145063906, 1039909252, -1098154666, -1096830529) + + W(7, 1037005773, -1127387550, 1029979468, 1050285945) + + W(8, -1091716385, 1031802154, -1122539766, 1042582214) + + W(9, 1024460554, 1032760119, -1098908862, -1125106744) + + W(10, 1023442918, 1025327999, -1129493655, 1052309552) + + W(11, -1093908076, 1033460244, -1111713187, 1038395208); + sum2 = W(0, 1025388154, -1106689977, -1110560421, 1049316874) + W(1, 983067209, -1121273022, 1001261778, 1031762430) + + W(2, -1111104301, -1112011481, 1033280635, 1026153330) + + W(3, 1007664153, -1105078255, -1122922762, 1044332351) + + W(4, -1133572905, -1116643818, 1018216589, -1117638934) + + W(5, 1029938402, -1117360942, 1022258405, 1033323756) + + W(6, 1011431705, -1119019386, -1107704269, -1116357646) + + W(7, 1016143957, -1123412994, -1124915037, 1039919645) + + W(8, 1043034893, -1107523849, -1109485745, 1027380094) + + W(9, 1007167865, -1115769810, 1041034358, -1112781805) + + W(10, 1010088409, 1016238045, -1118462618, 1041969311) + + W(11, 1050092429, -1104673921, -1106654827, 1006440178); + WS(-1079099231, 1058224693); + sum1 = + W(0, -1136025729, -1098612147, 1040574357, 1044962866) + W(1, -1107948018, -1152707357, 1041807413, 1030021338) + + W(2, -1113591959, 1041390773, 1030293261, 1019631368) + W(3, -1113852830, -1101745716, 1035906153, 1038641429) + + W(4, -1102747762, -1119110697, 1038775953, -1114342965) + + W(5, -1100940817, -1112997607, -1131180663, 1008365619) + + W(6, -1130024634, -1109189268, -1110548904, 1053521164) + + W(7, -1102886695, -1120409456, -1139925939, -1116898669) + + W(8, -1107090858, 1048687519, -1127712365, -1156710253) + W(9, 1032899333, -1169623989, 983487291, 1048553583) + + W(10, 1020276366, -1137865855, -1135488689, -1105924753) + + W(11, -1105279552, 1049805017, 1033907987, -1120648241); + sum2 = + W(0, 1026221982, 1031288593, 1025270629, -1105433524) + W(1, 1025604422, 1025361731, -1150712731, -1121646784) + + W(2, 1040465550, -1099611845, 1015298833, -1118571342) + + W(3, -1135858910, 1020650492, -1102699692, -1077885918) + W(4, 1013887757, 1035533544, 998750602, -1118866549) + + W(5, -1090773917, -1073585027, -1111404471, -1115122758) + + W(6, 1026066546, -1126679589, -1103726643, -1083499628) + + W(7, 1029004921, 1025173545, -1122173835, 1033676882) + W(8, 1038653616, 1072488285, -1128843744, -1132364945) + + W(9, 1019885572, -1119823506, 1044175124, 1076879885) + W(10, 1042609155, 982102231, -1114797832, 1024418530) + + W(11, 1035909226, 1053754278, -1128976380, -1120827581); + WS(1039418864, -1140458522); + sum1 = W(0, 983096624, -1114535995, 1044450572, 1027001827) + W(1, -1117675070, 1035770150, 1029517505, -1145085849) + + W(2, -1097839409, 1032347927, 1031259878, -1104931415) + + W(3, -1121873782, -1102746926, 1062380387, -1106235793) + + W(4, -1105839880, 1033087169, 1019803894, 1028523779) + + W(5, -1088253674, -1090653261, 1036696746, -1121428646) + + W(6, 1027911414, -1100878768, -1109645966, 1061486174) + + W(7, -1097301074, -1120235273, -1108005699, 1038991951) + + W(8, -1154916402, 1029669390, 1024178544, 1035324408) + + W(9, 1033576274, -1142936385, -1107146685, 1047810758) + + W(10, 1038477872, -1108040973, -1135118358, -1125909817) + + W(11, -1122115416, 1038152785, 1041849616, 1017003668); + sum2 = W(0, -1121567066, 1033267920, -1104421963, -1130139832) + + W(1, -1132243276, 1024120715, 1034344084, 1026284945) + + W(2, -1103410132, 1037507308, 1034735332, -1109350039) + + W(3, -1105011035, 1033899226, 1022298858, -1102818563) + + W(4, 1044067085, -1127155070, -1142865888, -1120108491) + W(5, 1057117238, 1003910328, 1041074904, 975508032) + + W(6, -1136511728, -1122542627, 1038703002, 1043197066) + + W(7, -1101099519, -1117378085, -1106811819, -1120621959) + + W(8, 1033583066, -1124006960, -1109459597, 1001772648) + + W(9, 1032101677, -1134482876, -1122973141, -1124335993) + + W(10, 1041237660, -1113899392, -1113394506, -1114137407) + + W(11, 1030826553, -1125132432, -1128301632, -1126301549); + WS(1053462780, -1083681865); + sum1 = W(0, 1023511963, 1041747598, 1041497612, -1138339988) + W(1, 1035615616, 1020629658, -1126058411, 1015154794) + + W(2, 1018224530, 996055791, 1027711272, -1123213919) + W(3, 1027394040, 1045749945, 1041814811, 1014351349) + + W(4, 1040756601, 1032965474, -1118697149, 1048726745) + + W(5, -1086401302, -1082085657, 1048515830, -1129047376) + + W(6, 993307519, 1043007793, -1115623160, -1096882594) + W(7, 1042139693, -1132054806, 1025756296, 1038169844) + + W(8, 1032344226, -1105145572, 1038148425, 1033289129) + + W(9, -1131368524, 1024626666, -1127070442, -1110953740) + + W(10, 1009997960, -1128695798, 1029589575, 1040782360) + + W(11, -1131398580, -1110890258, 1029718722, 1029252236); + sum2 = W(0, 1008350928, -1118714967, 1030231193, -1091889801) + + W(1, -1108897465, 1032606711, 1011413600, 1024023545) + + W(2, -1124778398, -1098641764, 1033453825, -1125648711) + + W(3, -1120704007, -1126562655, -1110351493, -1104262464) + + W(4, -1106889474, 1025836907, 1027382950, 1034951721) + W(5, 1052754126, 1057762368, 1035809187, -1131934955) + + W(6, -1127381517, -1128407708, 1033272406, -1161878816) + + W(7, -1132306565, -1134699254, 981904616, -1126139913) + + W(8, -1151802072, 1012148800, -1153180738, -1131180247) + W(9, 1015535284, 999360844, 1013432992, 1024540201) + + W(10, 1014625240, -1171757551, -1135583391, -1138986423) + + W(11, -1123376913, 1031971990, -1118108772, -1140791326); + WS(-1099299320, 1056598066); + sum1 = + W(0, 1006197652, -1127547996, -1106221946, 1045972807) + W(1, 1004824505, -1119658116, -1128153660, 1041714651) + + W(2, -1098261544, 1038366195, -1113181968, 1030219574) + + W(3, -1136164796, 1029712499, -1085896508, 1057638589) + + W(4, -1113770157, -1129447670, 1035903322, -1128532088) + W(5, 1057218165, 1054413180, 1038963911, 1033715440) + + W(6, -1130396894, -1106761055, 1054525467, -1086573266) + + W(7, 1033084478, -1128924420, 1036246880, -1121593759) + + W(8, 1028130044, -1090476168, 1042818602, -1116035017) + + W(9, -1124832314, 1017604226, 1036683414, -1101308500) + + W(10, -1130963634, 1030841394, 1029570879, 1017990507) + + W(11, 1016287010, -1110319406, -1114724733, 998207538); + sum2 = + W(0, -1131923124, 1046772351, -1110971235, -1105011941) + W(1, 1028001687, -1121147281, 993280665, 1028557303) + + W(2, -1134148898, -1104446514, -1147560296, 1024426003) + + W(3, -1127811288, -1107794670, 1043137579, -1096504482) + + W(4, -1101129935, 1034898623, 1032538133, -1098564467) + + W(5, 1067961229, 1066594258, -1104613803, -1136832302) + + W(6, 992174233, -1121892222, -1095277951, -1107079502) + W(7, -1097683180, 1036381319, 1002762140, 1035775413) + + W(8, -1099141736, -1090299346, -1129031039, 1009264906) + + W(9, 1015088121, -1158807761, 1011333246, -1132433157) + W(10, -1108888578, 1032287279, 992586073, 1019898989) + + W(11, -1118039147, -1109952821, 1040568125, -1111357043); + WS(-1105186296, -1108555742); + sum1 = + W(0, 1040483623, -1105046378, -1118442722, 1040666374) + W(1, 986662468, -1118942975, -1101386974, 1038429012) + + W(2, 1042743759, -1109750289, -1103538071, 1040611956) + W(3, 1040816739, 1031828819, -1093958439, 1050019912) + + W(4, 1033262923, -1118385334, -1134311356, -1107675004) + + W(5, 1030110546, 1040526743, -1121451017, 1012951144) + W(6, -1108915605, 1034430779, 1044698927, -1097473172) + + W(7, 1032745156, 1035127903, 1046011615, -1109709717) + W(8, -1103097566, 1045974257, 1041021277, -1100748433) + + W(9, -1102638726, 1030173660, 1041159370, -1110364069) + + W(10, -1106175813, 1037154391, 1034864637, -1130419297) + + W(11, -1111922822, 1017242540, 1032931450, -1121991998); + sum2 = W(0, -1127162070, 1014979733, -1114518101, 1016203776) + + W(1, 1035296562, -1117887634, -1135271108, -1136185376) + + W(2, 1038675289, -1140478504, -1111853852, 1030827904) + + W(3, 1003505825, 1028363168, -1105093650, -1089585970) + + W(4, 1058969759, -1123277259, 1012864633, -1131143608) + + W(5, -1095849351, -1070239185, 1079096535, -1116960146) + + W(6, -1123384038, 1033256022, 1028346583, -1078361549) + + W(7, 1068193425, -1125445846, 1026898060, -1133517476) + + W(8, -1113528611, 1038794260, 1032636395, -1113311282) + + W(9, -1134649836, -1120532892, 1028725832, -1125525718) + + W(10, 1017911666, 991223090, -1125848258, 1028008335) + + W(11, -1117113572, 1005296645, 1032298564, -1134431064); + WS(1065442623, 1015025160); + sum1 = + W(0, -1115365041, -1113215535, 1018550702, 983382403) + W(1, -1103693946, -1124004468, 1040496438, 1031517084) + + W(2, 1044423084, 1017412396, 1032083208, 1040563090) + + W(3, -1114325264, -1109245393, -1131936399, -1109909072) + + W(4, -1106995843, -1120263275, 1036061554, 1002812769) + W(5, 1034975748, -1099246196, 1024898238, 1012000060) + + W(6, 1038370800, 1036804053, 1048106991, -1114225707) + W(7, 1044531927, 1036150809, -1112496600, -1099928488) + + W(8, -1107673238, -1129165678, -1096349707, -1113013165) + + W(9, 1039123015, 1036473513, 1041344487, 1011581692) + W(10, 1043676418, 1037117105, -1120668655, -1116761110) + + W(11, 1019812354, -1138585900, -1110354101, -1128538214); + sum2 = + W(0, 1010698941, 1023126843, -1106210958, -1122624743) + W(1, -1122034237, -1117968485, 1042699596, -1140656688) + + W(2, 1029081919, 1040813712, -1174961495, 1039098482) + + W(3, 1048659418, -1095653758, -1118272649, -1097694847) + + W(4, 1033474214, -1118912655, 1058578114, -1081184698) + W(5, 1060468587, 996865603, -1102355565, 1020119257) + + W(6, 1052277576, -1097586600, 1043044817, -1103459725) + W(7, 1049205466, 1031051049, 1019470633, -1098953045) + + W(8, -1108654451, -1128239326, -1112376452, -1108144089) + + W(9, 1041036392, 1018118885, 1029721267, 1034766857) + W(10, 1033989555, 1038110136, -1119471849, -1122826054) + + W(11, -1130901998, 1000734433, 1017824567, -1114988670); + WS(-1115864032, 1039792746); + sum1 = + W(0, -1140468214, -1121375417, 1034497081, 1039608980) + W(1, -1113171393, -1132620605, 996096400, -1108218204) + + W(2, 1033625894, 1040179481, -1108192053, 1018030914) + W(3, 1010495721, -1109130991, 1055392502, -1093805194) + + W(4, -1127063626, 1012649753, -1114956320, -1115276388) + + W(5, 1036829073, 1050653566, -1109643980, -1127455572) + W(6, 1013460969, 1040789857, -1088431348, 1049263376) + + W(7, -1124542831, -1135717173, -1122072970, -1114444905) + + W(8, 1030275655, 1051648994, -1129660583, -1147064482) + + W(9, -1127833232, 1026520268, -1111886108, -1121903450) + + W(10, 1034831154, -1114505023, -1140762151, -1127271361) + + W(11, 1031218972, -1112576115, 1019975176, -1135014713); + sum2 = + W(0, 1015385693, -1115435752, 1035175215, 1017441239) + W(1, -1112773411, 998363696, 1020691269, -1141202058) + + W(2, -1117300235, -1122675471, 1031000417, 1016207247) + W(3, 1015565489, -1111436746, 1033401267, 1055273933) + + W(4, -1101083444, -1113092041, 1019337403, 1040267853) + W(5, -1100581516, 1073306798, 1046685605, 1013448250) + + W(6, -1132207064, -1114623303, 1041259906, -1073173797) + + W(7, -1115311051, -1130370856, -1131086480, 1025609602) + + W(8, -1101351817, 1032997632, -1113329498, 1026870685) + + W(9, -1163542113, -1140470334, 1020278145, -1122237085) + + W(10, 1031244051, -1124843109, 1010427910, -1128429973) + + W(11, 1027629029, -1119509251, 1033297372, 1014260054); + WS(1060418110, -1122066101); + sum1 = + W(0, 1031696780, -1115430516, -1104465800, 1041576533) + W(1, -1122042801, 1028261758, -1124029560, -1129767372) + + W(2, -1124342772, 1035204176, -1109435772, 1025573168) + + W(3, -1147879495, 1033383950, -1084846132, 1057835449) + + W(4, -1106791333, 1024098613, -1123330286, 1032201307) + W(5, 1040589380, -1121351835, 1025265863, 1000233671) + + W(6, -1137787811, -1124043956, 1059143665, -1085338500) + + W(7, 1037079395, 1020968696, 1025904121, -1135655378) + W(8, 1047669789, 1026196737, 1031201378, -1122702819) + + W(9, -1122010711, 1016436406, 1032251443, -1113520196) + + W(10, -1120875027, 1016849816, -1131825009, 1011740349) + + W(11, 1017074582, 1023559701, 1024540211, -1123483709); + sum2 = W(0, -1127836624, 1033364881, -1101679270, 1023580345) + + W(1, -1149619356, -1136662275, -1173496113, -1127629564) + + W(2, -1105780675, -1119640853, 1033890881, -1122666983) + + W(3, 1027284157, -1104916712, -1113070309, -1132739235) + + W(4, -1113766541, 1023707939, 1022886010, -1097350377) + + W(5, 1061873799, 1062616603, -1112358042, 1028952789) + + W(6, 1036751625, -1105423640, -1096904559, 1009507187) + + W(7, -1119315178, -1117384012, -1124242822, 1034826035) + + W(8, -1105550966, -1098447862, 1045500652, -1137245711) + + W(9, 1017945890, -1122986652, -1127473876, -1116534057) + + W(10, 1015952658, -1136860123, -1140181115, 1033442485) + + W(11, -1108824717, -1115327874, 1033458469, 1001899534); + WS(1052277756, 1024619064); + sum1 = W(0, 1029425189, -1120340480, 1055903799, -1093981294) + + W(1, -1125235986, 1026200342, -1155801216, -1107014752) + + W(2, 1049346130, -1099678446, 1003141514, -1131584400) + + W(3, 1030252033, 1038469578, 1057427995, -1098793998) + W(4, 1025012844, 1031806855, -1112675811, 1032327591) + + W(5, -1103272826, -1090697510, -1118748640, -1126277858) + + W(6, 1025211073, 1034534022, 1040656272, -1103403378) + W(7, 1025335343, 1010568017, -1122395345, 1039704040) + + W(8, -1111436999, 1041606598, -1106833215, 1032770798) + W(9, 994289396, -1137717581, 1016775164, 1005246730) + + W(10, 1029370155, -1124454216, -1124711713, 1045346698) + + W(11, -1134653325, 1013960373, -1099652851, 1034190187); + sum2 = W(0, 1023289146, 1045862572, -1095566322, -1087461476) + W(1, 1016132472, 1044890152, 1020437582, 1028786959) + + W(2, -1132354231, -1101737384, -1127607289, -1124440021) + + W(3, 1009748873, -1120652794, -1090793815, -1083542229) + + W(4, -1107157844, 1037463572, 1030199907, 1015917080) + W(5, 1063879108, 1069217208, -1097091848, 1032502373) + + W(6, 1007345789, -1117660774, -1107315270, 1056862945) + + W(7, -1098187278, 1028169917, 1010063617, -1106003069) + + W(8, 1045942315, -1153692567, -1116485608, -1108513707) + + W(9, 1018930146, 1039304637, -1106431667, -1111757292) + + W(10, 1036040005, 1034190881, -1127799210, -1098471935) + + W(11, 1048572904, 1056217861, -1094693884, -1108016950); + WS(-1086218302, -1084258561); + sum1 = W(0, 1027347742, 1041008695, 1040638149, -1088163117) + W(1, 1045697689, -1131589088, 1021408948, 1036310380) + + W(2, 1041666443, -1103354245, 1033383494, 1033326017) + + W(3, -1127633782, 1035524367, 1033011818, -1094269524) + + W(4, 1040895122, -1132088142, 1039366096, 1048593117) + W(5, 1050103608, -1122175629, 1042904478, 1042877939) + + W(6, -1116749595, -1138037400, -1111907839, -1096387988) + + W(7, -1136050786, -1131538396, 1033396118, 1045683171) + + W(8, -1097626581, -1103712029, 1018682217, 1039834587) + + W(9, -1139872292, 1032120647, -1099416246, -1111740158) + + W(10, -1126758410, 1030901630, 1025066091, 1046369740) + + W(11, -1095221944, -1117250695, -1121047845, 1034112696); + sum2 = W(0, -1116225927, 1020583173, -1120289762, -1116630862) + + W(1, 1037488437, -1120774706, 1030773056, 1018516207) + + W(2, -1142227828, -1157244503, -1120616119, 1021325617) + + W(3, -1122365237, -1121497633, 1024002380, 1045806125) + + W(4, -1122845130, -1124264052, 1032148531, 1032012136) + W(5, 1050603076, 1061490298, 1029135782, 1020114521) + + W(6, -1127594672, -1111134479, 1040110681, -1104925089) + + W(7, -1122119114, -1129316292, 1026422036, -1112679629) + + W(8, -1109379336, -1096282325, 1035818151, -1129796491) + + W(9, 1023835916, -1123177144, -1102178993, -1103628682) + + W(10, -1118358279, 1024324894, -1131330830, -1112996078) + + W(11, -1102999759, -1116593585, 1024073644, -1129630471); + WS(-1092406524, -1089571522); - sum1 = W(0, -1120714617, 1035162146, -1110752415, -1121818163) + W(1, 1035961863, -1120878363, 1007614003, -1123901891) + W(2, 1046176310, -1105876799, 1034124059, -1120533037) + W(3, -1123305414, 1027364554, -1114390527, -1096860741) + W(4, 1022953826, -1155422496, 1009908268, 1030225939) + W(5, -1110762423, 1046191054, 1023450788, -1124524780) + W(6, -1129324172, -1116882705, 1032526991, 1043289735) + W(7, -1114841418, -1129613106, -1121901526, 1030599214) + W(8, -1096831458, 1052908756, -1114991488, 1013731840) + W(9, 1022915304, -1135846624, -1117077507, 1034530771) + W(10, -1116257712, 981289536, -1142748844, -1113117446) + W(11, -1127767760, 1039462233, 1025047056, -1143328976); sum2 = W(0, -1131108965, 1043465638, 988544780, -1145266179) + W(1, 1018501507, 1021344415, 1000650931, -1113612493) + W(2, -1124179817, -1111438835, 1003301507, -1131299129) + W(3, 1025688791, -1106748353, -1070139293, -1110118136) + W(4, 1026178295, -1123696315, -1116852534, 1029599173) + W(5, -1102659167, -1097100047, 1031198431, 1012522521) + W(6, -1145056307, 1045881517, 1078104617, 1027449515) + W(7, -1123515159, 1024975831, -1123043594, -1118230687) + W(8, 1048052425, 1027739737, -1134523125, 1020199127) + W(9, 1018436747, 1029259867, -1126127905, -1136112041) + W(10, 995063942, -1162666156, -1139914565, -1107504809) + W(11, 1025133515, 1036504803, -1120319069, -1131042995); WS(1061710334, -1113637247); - sum1 = W(0, -1121814583, 1036106897, -1109309294, -1143950978) + W(1, 1033251382, -1118153848, -1138603692, -1122595552) + W(2, 1048694647, -1103580007, 1032154570, -1122508490) + W(3, -1138264845, 1018454510, 1030681928, -1097890158) + W(4, 1019050396, 1012933309, -1140194465, 1033737112) + W(5, -1115559686, -1120177173, 1021617155, -1120902675) + W(6, -1131439046, -1122735662, 1044326142, 1042373458) + W(7, -1113884080, -1152998244, -1125621862, 1024541667) + W(8, -1097437158, 1052689231, -1107957310, 1016843898) + W(9, 1023073058, -1122620661, -1132933761, 1034486114) + W(10, -1116304653, -1144273714, -1129805541, -1116003130) + W(11, -1128614666, 1037961704, 1019245786, -1155701620); sum2 = W(0, -1162960213, -1107084454, -1126626424, 1025411499) + W(1, -1126548940, -1127269937, 1013908203, 1023868855) + W(2, 1032930462, 1029914487, 997665611, 1018748433) + W(3, -1113327157, 1032525526, 1076098019, 1035900190) + W(4, -1121182801, -1158385717, 1023571595, 1048796624) + W(5, -1132699465, -1093496445, 1038561302, 1000017309) + W(6, -1133790043, -1104878976, -1072747140, 1026052839) + W(7, -1146184997, -1123007653, 1024139725, 1003078933) + W(8, -1104764998, -1109617402, -1142783113, -1154085627) + W(9, -1130037920, -1121062518, 1019199125, 1025857863) + W(10, -1147685381, 1000677165, 1007034787, 1034207166) + W(11, -1123497947, -1118139493, 1017680405, 1016687665); WS(1064800702, 1030635520); - sum1 = W(0, 1004285524, -1128056492, 1035663048, -1110660706) + W(1, -1128734654, 1028175148, -1122860750, 1024005652) + W(2, 1031878036, -1116831438, -1204558974, -1143857649) + W(3, -1122286551, 1035530413, 984521208, 1023270757) + W(4, -1136886024, 1005008153, 1021316024, 1053335517) + W(5, -1100061872, -1093549817, 1042579293, 1016449422) + W(6, -1116388951, -1139373960, 1048911707, -1104621327) + W(7, -1148999845, -1135015336, 1022533838, -1130762670) + W(8, 1035194194, -1149876978, -1142259845, 1010510404) + W(9, -1125657162, -1121564617, -1131931048, -1142066105) + W(10, -1120920449, 1018152308, 1001429301, -1129213371) + W(11, -1141986761, 1031904066, -1111776529, 1020350678); sum2 = W(0, 1015139874, -1120731382, 1017539382, 1020112270) + W(1, -1126248934, 1013963196, -1150118671, -1116737087) + W(2, 1031888972, -1112637962, 1010207212, -1138130380) + W(3, 1032711786, -1087320828, 1059502015, 1026899401) + W(4, -1119882792, 1019091922, 1028577839, -1069693959) + W(5, 1078824499, -1094071548, 1040940327, -1123861886) + W(6, -1116703472, -1103666133, 1045823576, 1036639812) + W(7, -1131804892, 1004924552, -1123435694, -1138801588) + W(8, 1009232228, -1114441289, 1029410629, -1130443436) + W(9, -1140098908, 999422392, 1011539428, 1023067274) + W(10, -1130083750, -1137456436, -1140019692, 1029210329) + W(11, -1118193829, -1131442506, 1007549476, 1026106777); WS(1058942782, 1023618692); - sum1 = W(0, -1126072821, 1041771492, -1110777188, -1110871838) + W(1, 1036426764, -1142526755, 1011693912, 1025548876) + W(2, 1035801072, 1043278082, -1146527995, 1015953709) + W(3, 1017195423, 1036961985, -1094081945, -1103634941) + W(4, 1046715887, -1148709083, -1122268692, 1037536142) + W(5, -1091452759, -1105679152, 1040286827, -1119884351) + W(6, 1022607903, 1038780013, -1095962376, -1103558179) + W(7, 1049101574, -1122534754, 982545387, 1023558139) + W(8, 1049096396, -1130619215, 1031851588, 1007174309) + W(9, -1126061755, 1037537425, -1113879430, -1136949949) + W(10, 1002869195, 1017517093, 1003763447, 1035261291) + W(11, 955835050, 1026350907, 1006549707, 986004587); sum2 = W(0, -1128639222, 1001638233, -1119841411, 1014385869) + W(1, 986023269, -1135260845, -1150616818, 1013509397) + W(2, 1029376723, 1037911199, 970306708, 1013815957) + W(3, -1118932607, 1035428655, 1019940778, -1102461584) + W(4, -1141011097, 959170344, -1125568566, 1024255335) + W(5, 1063260126, -1085129330, -1110526300, 1017551658) + W(6, 991022546, 1031481278, 1032753418, -1110003004) + W(7, 1033244979, 1016351942, -1129687950, -1111645244) + W(8, 1026999545, 1019019978, -1121764803, -1127392454) + W(9, 1018120650, 1028380841, -1121385219, 1021463302) + W(10, 1025283273, 1009569613, 1006676397, -1115817479) + W(11, -1134958989, 1017625850, -1128128990, -1138867245); WS(1037837808, 1056377428); - sum1 = W(0, -1135573495, -1105969090, 1027945538, 1043223694) + W(1, -1100584037, 1025656415, 1023727061, -1138454045) + W(2, -1116105371, 1039709953, -1111027805, 1021082723) + W(3, -1118803082, -1114548551, -1126955510, 1051232418) + W(4, -1097887999, 1018764077, -1123500111, 1046351686) + W(5, -1122693874, -1100833583, 1040497066, -1115071549) + W(6, 1020600341, 1036383371, 1053007142, -1103740252) + W(7, 1024768136, 1025234525, -1111066541, 1034647805) + W(8, 1035274060, -1101618713, -1138012047, -1116333124) + W(9, 1008936161, 1026657308, -1156882549, 1033814037) + W(10, -1123286772, 1016891995, -1115286797, 1033474888) + W(11, -1122516460, -1129756781, 1010716753, -1114238799); sum2 = W(0, 1020978875, -1100967324, 1043179705, 1026290891) + W(1, 1013196277, -1138979633, -1121670462, -1130583533) + W(2, -1122981488, 1032797386, -1120286641, 966672716) + W(3, -1103298336, -1096736455, 1062358565, -1104401130) + W(4, -1114052290, -1152672163, -1110731474, 1053186254) + W(5, -1112891412, -1091109944, 1048200412, -1122242747) + W(6, -1105136733, 1045958215, 1031005462, -1133705481) + W(7, -1113847816, 1023845499, -1119790394, 1043729632) + W(8, 1033523405, -1102472853, 1033631266, -1118586102) + W(9, -1127100056, 1033816035, -1109547546, 1031959215) + W(10, -1126209517, -1147194793, 999102475, 1020355333) + W(11, -1122874410, -1122532081, 1027184816, -1127245018); WS(1050865148, 1032626572); - sum1 = W(0, -1110436993, 1043627312, -1118552440, 1045700875) + W(1, -1115060612, 1040430485, 1040042556, -1101476879) + W(2, -1108049256, 1033317691, 1031191472, -1096410465) + W(3, -1107890608, 1049906138, -1107535228, -1115480133) + W(4, 1025393771, 1042925123, -1113356418, -1109025830) + W(5, -1162779891, 1049713062, -1106727656, -1114330616) + W(6, 1034032544, -1120749707, -1095271743, 1042738346) + W(7, -1127443770, -1106585845, -1105712244, 1051482110) + W(8, 1035225158, -1122810712, 1039553933, 1042770518) + W(9, 999321197, -1103830356, -1104074479, 1050825756) + W(10, -1104755066, -1103219825, -1110201208, 1048543054) + W(11, -1106449960, 1031488492, 1040395170, 1009713998); sum2 = W(0, 1024944996, -1111924540, -1107012648, 1057346437) + W(1, -1105023593, -1097010076, 1020193961, 1007347355) + W(2, 1049541488, 1054940943, -1097277462, -1107952444) + W(3, 1014966838, -1116539066, -1107447750, 1059914271) + W(4, -1142156696, -1085761230, 1015828472, 1012341552) + W(5, -1109770505, 1066972918, 1043445868, -1078926120) + W(6, -1113612636, 1034050704, -1111197863, 1063973493) + W(7, 1017043379, -1084679416, 1032934706, -1113193567) + W(8, 1014122612, 1056333532, -1118781870, -1092348708) + W(9, -1116105943, 1042584092, 1016467631, 1045837843) + W(10, -1113967259, -1128370702, 1021395599, -1116104243) + W(11, -1135977693, 1043326701, -1117573943, -1108363561); WS(1054811644, 1027249161); - sum1 = W(0, 1040940111, -1101358582, -1113576073, 1049141881) + W(1, -1132483205, -1116754862, -1100243090, 1036221579) + W(2, 1038054901, -1105999252, -1104538698, 1037145120) + W(3, 1043481462, 1014943347, -1097482896, 1051960356) + W(4, 1036681666, -1118995685, -1119365790, -1109150355) + W(5, -1106696288, 1048275427, -1123112824, -1126689554) + W(6, -1111878381, 1036687602, 1022736646, -1118345360) + W(7, 1019000333, 1038634998, 1044780378, -1109759237) + W(8, -1097940765, 1049145430, 1044474573, -1101292557) + W(9, -1101638738, 1028424107, 1038478827, -1113101949) + W(10, -1103799553, 1034438878, 1035929492, -1122643648) + W(11, -1103674991, 1037909315, 1037239602, -1126283866); sum2 = W(0, 1023766557, 1016554458, 1021815988, -1137435361) + W(1, -1114856427, 1032032312, 1015306072, 1020144724) + W(2, -1103467660, 1016553974, 1044555388, -1105991725) + W(3, -1136376403, -1116988904, 1034521268, 1060795535) + W(4, -1088370625, -1127777881, -1113107482, 1050162106) + W(5, -1102546407, 1075503478, -1071164689, 1035804901) + W(6, 1038494252, -1104347057, -1103257907, 1072821349) + W(7, -1077590614, -1120944929, -1113447013, 1003678517) + W(8, 1025406308, -1113972415, -1112563255, 1036418439) + W(9, 1023629411, 1033762395, -1113239746, 1031283975) + W(10, 995356642, -1123929939, 1024784188, -1109441777) + W(11, 1026085595, 1037416902, -1112537357, -1140753514); WS(1055940220, -1124188157); - sum1 = W(0, -1113999823, -1119569210, 1015781094, -1147977024) + W(1, -1117684995, -1108336232, 1015763751, 1043278990) + W(2, -1106256755, 1046188556, 1036478965, 1006648968) + W(3, -1115168029, -1094609330, 1052603679, 1041916962) + W(4, 1033522483, -1101485874, 1022861696, 1044340802) + W(5, -1095339972, -1108128054, 1047885622, -1110457109) + W(6, -1114461725, 1033154083, -1108564239, 1057380776) + W(7, 1036635468, -1114614567, -1113117029, -1106205123) + W(8, 1042723938, -1095928258, 1035899866, -1108232826) + W(9, 1032164308, 1021089498, -1125646960, 1041299799) + W(10, -1139371156, 1034599135, -1121800074, -1107756257) + W(11, -1171638077, 1027517543, -1131185034, -1115299047); sum2 = W(0, -1120118804, -1136369034, -1127959037, 1015341014) + W(1, 1033327798, -1113602386, 1013270421, 1027353114) + W(2, -1129939889, 1034464346, 1041751325, -1114857235) + W(3, -1120552641, -1125884733, -1117793827, -1117887441) + W(4, 1049619624, -1103868602, -1117027367, 1035518448) + W(5, -1095769137, 1043338219, 1049093640, -1097589990) + W(6, 1016254148, 1043285424, -1095274849, 1049861339) + W(7, 1048627700, -1105817123, -1110780393, -1129166063) + W(8, -1117623897, -1128474295, 1035712363, -1106207094) + W(9, 1030787932, -1140216380, -1128598589, 1032330618) + W(10, 1035384499, 1004274322, -1133117880, -1120569751) + W(11, -1129927717, 1018695247, 1027229852, -1120123842); WS(1057163582, 1025817537); - sum1 = W(0, 1037167835, 1031655825, -1096271953, 1045084224) + W(1, 1033520525, 1016078798, -1121852313, 1046829204) + W(2, -1093143228, 1046279122, -1103697416, 1030441081) + W(3, 1034559945, 1038865075, -1085198281, 1057074420) + W(4, -1107583934, 1036548879, 1027044951, 987855837) + W(5, 1050118415, 1040167170, 1024989936, 974934139) + W(6, 1039868489, -1105739319, 1057217669, -1086525384) + W(7, 1034776664, 1034922130, 1037598609, -1112419405) + W(8, 1047779928, -1087515051, 1044254259, -1113468278) + W(9, 1007581568, 1015381448, 1035805671, -1098926044) + W(10, -1134624240, 1036139674, 1031626785, 1037495244) + W(11, 1015872616, -1124461564, -1113253902, 1017385676); sum2 = W(0, -1136384493, 1036526786, 1028719631, -1123632092) + W(1, -1145098603, -1122280548, 1023814631, -1099879539) + W(2, 1030342243, 1043747658, -1102737141, -1130733945) + W(3, 1033659202, 1034332004, 1046758826, 1050427835) + W(4, -1112406001, 1014543213, 1017810163, 1013925981) + W(5, -1093436173, -1100447595, -1119158797, -1123106681) + W(6, 1009967381, -1106516740, 1051870663, 1041231786) + W(7, 1034292458, 1019870319, 1017781727, -1101585218) + W(8, 1042788701, 1045006578, -1107586767, 1013053757) + W(9, -1118180802, -1131784625, -1114786901, 1010389989) + W(10, -1143106923, -1119739637, -1141961139, -1112082470) + W(11, -1113417628, -1117114477, 1032164569, 1012643781); WS(-1081763615, -1092598780); - sum1 = W(0, -1130922677, -1114318275, -1127997567, 1028467828) + W(1, 1023598927, -1139280241, -1122514966, 1026414532) + W(2, -1121576984, -1127358845, 1017010415, 1002425203) + W(3, -1136114845, 1017564438, -1088500870, 1061837768) + W(4, -1116904946, -1123987795, 1015393433, 1021344041) + W(5, -1098266094, 1035926493, 1037585875, -1136160989) + W(6, -1137786397, -1095352826, 1061435536, -1095349722) + W(7, 1040151266, -1131570741, 1023447063, -1116164091) + W(8, -1148866211, -1114585584, 1031742785, -1116123969) + W(9, -1142668459, -1123782681, 1039772482, -1114677716) + W(10, 1007413957, 1020007637, -1140705115, 1012849463) + W(11, 1024002537, -1116920883, -1128808099, -1126849079); sum2 = W(0, 1021958137, 1034408685, -1106091708, -1098486326) + W(1, 1028745769, 1024682325, -1117837187, 1036094407) + W(2, -1147105428, -1110159069, 1031154711, -1116910593) + W(3, 1026312941, 1033683959, -1094093059, -1088187103) + W(4, -1105512708, 1039406737, -1115513274, -1124949158) + W(5, 1061646324, 1055280585, -1100598283, -1117359799) + W(6, 1037370871, -1091894288, 1058519893, 1045036050) + W(7, -1109806227, 1026080301, -1112242113, 1027011647) + W(8, -1106569597, 1045721104, -1117345257, -1117139296) + W(9, 1033686533, -1107943508, 1037199097, 1034325435) + W(10, -1144974476, 991455177, -1107052036, 1010459130) + W(11, 1030945863, 1039418143, -1124920152, -1112397476); WS(1049187708, 1061143407); - sum1 = W(0, 1032206028, 1046469409, 1044620591, -1086760535) + W(1, 1049164066, 1022165171, -1121811767, 1033070593) + W(2, 1020031147, -1113096094, -1132628435, 1009114287) + W(3, 1032297330, 1049873346, -1111788011, -1092765627) + W(4, 1051671486, 1034535880, -1136528167, 1050639650) + W(5, -1089881571, -1083700943, 1044844041, 1024964917) + W(6, 1005295654, 1041319460, -1127219501, -1092202425) + W(7, 1037004161, 1011091807, 1031846890, 1041542033) + W(8, 1033106537, 1011207027, 1026161438, 1038185039) + W(9, -1136496587, 1036329419, 991465499, 1015330121) + W(10, -1134373563, 1018308239, 1032778794, 1043869975) + W(11, -1112918515, 1026637361, 1019220893, 1032129132); sum2 = W(0, -1123557888, 1024038368, 964997605, -1123190992) + W(1, -1170950771, 1009594487, -1139662759, 1022314468) + W(2, 1007232143, 1040909663, -1119781676, 1011035391) + W(3, -1120649660, 1016353740, -1112234110, 1035500288) + W(4, -1122589660, -1122952928, 1021700488, -1149024430) + W(5, 1052797849, 1059925772, 1021322900, 1015425876) + W(6, -1119215196, -1127462832, 1032636001, -1123381020) + W(7, -1124742672, 1024335912, 1025758640, -1109226016) + W(8, -1096670151, -1110751472, 1037872214, -1115520552) + W(9, -1134635263, 1022909500, -1103246759, -1114698588) + W(10, -1123038904, 1026100870, 1025396502, -1106595065) + W(11, -1095307242, 1016687528, 1025386190, -1128653768); WS(-1080960863, 1058419411); - sum1 = W(0, -1118700722, -1111066847, 984198859, 1043965403) + W(1, -1108783427, 1000248987, 1032983255, -1112208894) + W(2, -1099715572, -1113907195, -1113914551, -1124179019) + W(3, -1122641758, -1099579692, 1049479517, 1050672334) + W(4, -1105019770, -1128328213, 1033094082, -1090459763) + W(5, 1062349342, 1061713555, -1095692932, 1023738862) + W(6, -1121000958, -1107128402, -1099059374, 1059583022) + W(7, -1099879371, -1131329315, -1133773881, -1107276892) + W(8, 1033076198, -1121054998, -1108028092, 1027231576) + W(9, -1141737059, -1116550064, -1125040759, 1024684126) + W(10, 1024533736, -1113801521, -1120123854, -1103159313) + W(11, 1037355536, 1026513898, -1110661700, 1007580489); sum2 = W(0, 995719700, -1129327601, -1125739202, 1029045123) + W(1, 1025141617, -1154923092, -1114687459, 1021214658) + W(2, 1027238209, 1037994429, -1113434302, 1007115993) + W(3, 1024240180, 997717028, -1120915257, 1036441804) + W(4, 1032546636, 1003062850, -1115935924, 978476974) + W(5, -1078687396, 1066236156, 1035362808, -1118441375) + W(6, 1025427056, -1140454571, -1111104335, 1022342240) + W(7, 1033358736, 1017727844, -1123062148, 1010080137) + W(8, -1128218828, 1028953671, -1166615662, -1129048983) + W(9, 999440794, 1002330866, -1147995690, 1024857232) + W(10, -1165027863, 1019778022, -1131128122, -1152746908) + W(11, -1128526870, -1126806088, 1024563904, 1009762473); WS(-1082880574, -1095080656); - sum1 = W(0, -1120399523, -1111736159, -1111698940, 1037879414) + W(1, 1037066756, -1118524245, -1123397517, -1128373242) + W(2, 1019423266, 1028697399, -1119567216, 1027581671) + W(3, -1135116544, 1025039907, -1092929515, 1062200098) + W(4, -1131199446, -1139610160, -1168421181, -1127950106) + W(5, -1098702381, 1023355236, 1027882718, -1139561152) + W(6, -1131259136, -1098593297, 1060991762, -1094756999) + W(7, 1038218895, 1005358744, 1037719400, -1106461210) + W(8, 1018082074, -1115312044, 1022591155, -1113962893) + W(9, -1113348279, 1024172498, 1036669022, -1106391640) + W(10, -1118950442, 1026868195, 1030740495, 1028249110) + W(11, -1114016273, -1109491766, -1141634128, -1126548648); sum2 = W(0, 1007561151, -1106890729, -1144059530, 1010626011) + W(1, -1140206187, -1152147163, -1118820752, 1031296198) + W(2, -1112391017, -1114880200, -1136937433, -1120281594) + W(3, -1128452386, -1110907715, 1056169840, 1052511775) + W(4, 1034255225, -1114711449, -1121745985, 1032462388) + W(5, -1096446085, -1094697844, 1033067226, 993102387) + W(6, -1114861795, 1035962501, 1031556034, 1056886944) + W(7, -1104164393, -1120009849, -1121584151, 1035033495) + W(8, 987032615, -1129701328, 1013679559, -1122813977) + W(9, -1120079456, 982365671, -1138039959, 980377326) + W(10, -1115367679, -1118918686, -1129117272, 1035951051) + W(11, -1119286197, -1126702674, -1137374445, 1006646323); WS(1058795070, 1058351276); - sum1 = W(0, -1129226172, -1106246658, 1041614193, 1041522846) + W(1, -1106977045, -1140271486, 1032650784, -1132244111) + W(2, 1048401911, 1041471472, 1031926461, -1130658915) + W(3, -1134152362, -1102574120, 1050141831, 1033352747) + W(4, -1121658742, -1115542891, -1122312016, 1028257624) + W(5, -1088102699, 1030285885, -1124591186, -1119417531) + W(6, -1145265749, 1004724909, -1093353638, 1056599450) + W(7, -1107883356, -1155196377, -1118796187, -1128587973) + W(8, -1112285036, 1049138668, -1114299650, 1003429157) + W(9, 1017891569, -1150296521, -1122892680, 1043369227) + W(10, -1130730345, -1125742491, -1131365684, -1107598873) + W(11, -1120116589, 1040658126, 1034850634, -1137642362); sum2 = W(0, 1018264792, -1109681111, 1028734812, -1104210606) + W(1, 1025579416, 1029305888, 1019878456, -1134735936) + W(2, -1093714299, -1099909667, 1032419228, -1113499692) + W(3, 1020074080, -1106212594, 1034660210, -1098896203) + W(4, 1010388000, 1025031188, -1143212320, 1037009054) + W(5, 1047288883, 1060586916, 1030286292, -1130541520) + W(6, -1148803168, -1132238328, 1031452084, 1042866381) + W(7, -1107165918, -1127563664, -1127882992, 1032289620) + W(8, -1112028563, -1109339206, 1019446368, 1026005920) + W(9, 1017409120, 990246720, -1115169564, 1023925320) + W(10, 989303425, -1123344268, -1131165168, 1004132864) + W(11, 1020410832, -1112228144, 1029468412, 1009015280); WS(1043816952, 1056206353); - sum1 = W(0, -1132948972, -1115461859, 1034684352, 1037119642) + W(1, -1107088436, -1144087781, 1024900084, -1107172113) + W(2, 1038250028, 1039626815, -1110088668, -1116127087) + W(3, -1150680698, -1097009915, 1061195075, -1095598055) + W(4, -1113187289, -1117712852, 1016358425, -1102438229) + W(5, 1051576078, 1056604231, -1102307882, -1114968828) + W(6, -1160444148, 1024199571, -1088975285, 1057172865) + W(7, -1103288997, -1134624362, -1115326124, -1148961467) + W(8, 1010078034, 1029312608, -1122873761, 1012385158) + W(9, 1019123435, -1130723763, -1107729186, 1039764276) + W(10, 1032878393, -1114736693, -1120343575, -1144998513) + W(11, 1022450003, -1112740858, 1034042985, 1002559709); sum2 = W(0, -1135809827, 1031002711, -1136033931, -1114169661) + W(1, -1107967067, 1027957130, -1140675011, 1009667595) + W(2, 1025543601, -1124911966, -1108557845, 1004385158) + W(3, -1137384851, -1156373420, -1123691685, 1056449903) + W(4, -1096033675, 1015282250, -1147988326, 1010789683) + W(5, -1105219090, 1044469394, -1118550723, -1115586301) + W(6, 1019754904, -1122328477, -1118075063, 1046754031) + W(7, 1020360378, -1120013003, -1172160176, -1135703979) + W(8, -1111692677, 1034039333, 1005574774, -1130287690) + W(9, 1006265798, 1016962928, -1131889022, 1024901767) + W(10, 1025982043, -1127946498, 990940844, -1135289651) + W(11, 1010975355, -1111731157, 1036125487, -1169619760); WS(1049886076, 1034318367); - sum1 = W(0, -1140544720, 1022042959, 1021784769, -1131075883) + W(1, -1142660485, -1160888244, 1006465467, -1106141033) + W(2, 966352080, 1032360624, 1029412697, -1121174096) + W(3, 1025588553, -1104462159, 1059646174, -1086897182) + W(4, 1042100949, 1017231209, -1118154094, -1100197533) + W(5, 1051735858, 1059041662, -1106256562, -1150715274) + W(6, 996302474, 1029682164, -1085821075, 1057694799) + W(7, -1106530518, 1024907260, -1111905052, -1112348667) + W(8, 1029991670, 1036690656, -1121375714, 1015672965) + W(9, -1125961261, -1117824763, -1111853843, 1030592201) + W(10, 1017125029, -1120397516, -1122979160, -1123002959) + W(11, 1031029131, -1155480906, 1014936522, -1124207481); sum2 = W(0, -1110846605, 1032320648, 1027180085, 1029603693) + W(1, 1033577170, -1112673640, -1118194097, 1027617785) + W(2, -1144235925, 1030911393, -1134544211, -1123614185) + W(3, -1143898437, 1000132981, 1031570329, -1119293171) + W(4, -1126616369, -1129974305, 1019764549, -1102116254) + W(5, 1058710858, 1058397441, -1096477332, 1018853209) + W(6, 1008325379, 1026769453, 1031557517, -1104852463) + W(7, -1103611475, 1036639048, 999139301, -1131321177) + W(8, -1129394727, -1097515304, -1115701158, 1018401637) + W(9, 997400075, 1008054267, -1131248001, -1123234663) + W(10, -1119810656, 1024391909, 1024689095, -1118482095) + W(11, -1120094485, -1106708620, -1134741251, 1017993989); WS(1058429118, 1064863249); - sum1 = W(0, 1021156518, 983001563, 1054264097, -1097350840) + W(1, -1175841770, 964768362, 1024394983, -1115666854) + W(2, 1049468028, -1099725362, 1024539904, -1149024695) + W(3, 1020219616, 1039490386, 1055450440, -1115114453) + W(4, 997016493, 1012766063, -1131178666, 1046648007) + W(5, -1104196510, -1085827188, 1043203980, 1022718592) + W(6, 1016100640, 1017911734, 992909240, -1100930382) + W(7, 1033767351, -1136266891, 1007341951, 1043757188) + W(8, -1104186631, -1128095056, -1119513801, 1035657141) + W(9, 994206685, -1128564932, -1122765517, -1118903024) + W(10, -1131557236, 1016423590, 964693930, 1045299318) + W(11, -1105268364, 1023907580, -1106015350, 1029236260); sum2 = W(0, 1005391535, -1106696811, 1050575955, -1100156814) + W(1, -1098256564, 1028021126, 1018096716, -1121907467) + W(2, 1051313221, 1060662591, 1029467086, -1129063320) + W(3, -1135288360, 1026226746, 1047952814, 1068258385) + W(4, -1142136447, -1131820748, -1121648533, 1031305614) + W(5, -1081952512, -1074657656, 1051661425, 1008147176) + W(6, 996459166, 1032267559, 1040750694, -1095314974) + W(7, 1035381379, -1168737402, -1130245220, -1111725659) + W(8, 1031579134, 1036231973, -1107714686, 1020424408) + W(9, -1123582348, 1028271694, 1007524520, 1044691450) + W(10, 1022647148, -1129848660, 1033610651, -1108648643) + W(11, 1034169827, 1044987108, -1100160947, 1024695710); WS(-1083443454, 998713176); - sum1 = W(0, 1032696047, 1040709994, 1040929033, -1087559501) + W(1, 1049786774, -1146095614, 1026355790, 1040668906) + W(2, 1032459486, -1103862520, 1037090637, 1033091241) + W(3, 1024354795, 1036398699, 1012490030, -1102928053) + W(4, 1047072725, 1024780023, 1034252472, 1051748575) + W(5, -1097753826, -1093112676, 1050721065, 1037512365) + W(6, 1012872363, 1035129883, -1114537329, -1092883854) + W(7, 1039546228, 1024615038, 1032281789, 1044965075) + W(8, -1100635790, -1099081393, 1011437457, 1037286498) + W(9, 1027888345, 1031400701, -1103916602, -1104850339) + W(10, 1009955155, 1036010231, 1016830581, 1048807512) + W(11, -1098186970, -1112352715, -1114553850, 1033653631); sum2 = W(0, 1035861958, -1098289107, 1026686124, -1096172460) + W(1, -1113513064, 1019723703, -1161872500, -1119785106) + W(2, 1049978705, -1107697817, 1027596044, -1121217732) + W(3, -1113290777, 1033623622, -1098402027, -1091833397) + W(4, -1098063812, 1033198374, -1137177342, 1045692473) + W(5, 1024040864, 1013412750, 1031079096, -1107709748) + W(6, 1034613598, -1106425659, 1053770527, 1046082211) + W(7, 1012376430, 1038124498, -1103597964, -1117971136) + W(8, -1127968019, 1057075430, -1095279992, -1112843267) + W(9, 1038756242, 1019694879, 1049090628, 1047895771) + W(10, 1045163433, -1120386622, -1111249467, -1114046628) + W(11, 1052585151, -1102204197, -1100444173, 1034273086); WS(-1086369662, -1078015058); - sum1 = W(0, 1024279387, 1042615374, 1054091094, -1095346029) + W(1, 1050416872, -1113840294, -1100066053, -1106499229) + W(2, -1113085188, -1103658220, -1111759460, -1141254386) + W(3, 1039589628, 1049518735, 1050614757, -1097514477) + W(4, 1048966812, 1032485602, -1104719791, -1115604002) + W(5, -1103236040, -1100854410, -1122281110, -1113439156) + W(6, -1113174076, -1127523972, 1047781504, -1097175852) + W(7, 1043919349, -1119980106, -1125885710, 1049389829) + W(8, -1108066658, 1026529231, 1032565272, 1039770777) + W(9, -1111149795, -1102519338, 1041501469, -1104719494) + W(10, 1027138593, -1104628175, 1024511623, 1042212335) + W(11, 1048227542, -1099302313, 1036646913, 1032389542); sum2 = W(0, -1092512531, -1098738343, 1063615535, 1009177475) + W(1, -1119750726, 1030476954, -1093473489, -1088925799) + W(2, 1062426368, 1026425874, 1012050343, -1120184004) + W(3, -1081280020, 1042571962, 1067242406, 998962046) + W(4, 1025403720, 1031881676, -1076988140, 1043337888) + W(5, 1066811408, -1096426756, -1129159374, -1116680638) + W(6, -1081342477, 1033193400, 1066297017, 1022780060) + W(7, 1023044380, -1135563955, -1086556968, 1010020371) + W(8, 1060290312, 1026166068, -1115355073, 1004442726) + W(9, -1107875638, -1102154130, 1052028857, 1017583668) + W(10, 1026819560, 946141982, -1121362077, -1126174974) + W(11, 1037059936, -1119102823, -1127777686, 1034449190); WS(-1129707456, 1007685382); - sum1 = W(0, -1134894751, -1098519836, 1040671079, 1044073412) + W(1, -1109530006, -1134057701, 1044154267, 1030394337) + W(2, 1013414869, 1041040503, 1035376874, 1028314315) + W(3, -1111052786, -1107874844, -1103276809, 1038778991) + W(4, -1105794628, -1112561176, 1038046773, -1118678129) + W(5, -1098881194, -1107344299, -1139987723, 992777541) + W(6, -1192544411, -1111301144, -1110522396, 1048980909) + W(7, -1108336721, -1112828028, -1128991721, -1115520098) + W(8, -1107120076, 1052479600, -1112546431, -1132207293) + W(9, 1035051017, 1015910765, 1033063921, 1049200001) + W(10, 1023468750, 999515194, -1122391156, -1103669035) + W(11, -1111557603, 1051478546, 1018554845, -1119107128); sum2 = W(0, -1137824107, -1110122518, -1137081539, 1039063461) + W(1, -1158403413, -1124656554, -1126465818, 1033612180) + W(2, -1106894813, 1047362418, -1117814890, 1023374342) + W(3, 1005362547, -1116868027, 1045165527, 1067526191) + W(4, -1136640937, -1115663214, -1158497445, 1039739597) + W(5, -1106604299, 1071589227, 1044031039, -1148955153) + W(6, -1119770196, 1037998233, -1138568822, 1061860820) + W(7, -1133800170, -1123862185, 1021074352, -1112420682) + W(8, 1039276731, -1077533661, 1031122886, 999718675) + W(9, -1138544392, 1025116420, -1104439342, -1073373771) + W(10, -1106196407, -1139157324, 1034621027, -1120069279) + W(11, -1113398206, -1094637864, 1031315442, 1025985948); WS(1033791472, -1138498893); - sum1 = W(0, -1143657507, -1102835756, 1024249065, 1044190455) + W(1, -1107762844, 1008291608, 1014781903, -1108646231) + W(2, -1130492804, 1021681616, -1112711792, -1128240816) + W(3, 1029133201, -1108649138, 1008218246, -1115040338) + W(4, 1019895480, 1025256893, 1018917100, -1106020223) + W(5, 1054388603, 1058377694, -1123372431, 1014932868) + W(6, 998381448, 1003669472, -1092207694, 1046387590) + W(7, -1111491159, 1007448080, 1016812674, -1113672010) + W(8, -1132293500, 1049136345, -1123523050, 1008489644) + W(9, -1118545247, -1114481905, -1112726231, 1017676194) + W(10, -1123035739, -1112884042, -1137667330, -1127743919) + W(11, -1119594899, -1119541597, 1028355707, 1001709096); sum2 = W(0, 1014470821, -1106657768, 1034825132, 1006830039) + W(1, -1104684796, -1124459301, -1120344268, -1106292024) + W(2, 1032219219, -1101139832, -1114556582, 1008940380) + W(3, -1114330800, 1049926230, 1057813788, -1102461554) + W(4, -1111886229, 1029827978, -1106002658, 1054743689) + W(5, 1049962194, -1103958261, 1050373872, -1133852786) + W(6, 1024999308, -1112305858, 1051810194, -1098680718) + W(7, 1035894797, -1109427720, -1126273593, -1122403994) + W(8, 996027236, 1040205712, -1100520246, 1024816377) + W(9, -1127411221, -1116955850, 984340447, -1109277736) + W(10, 1014102650, -1117160816, -1123975632, 1030827431) + W(11, -1114551452, -1111240037, -1106040331, 1003623824); WS(1060089726, 1074996161); - sum1 = W(0, -1117558175, 1046443391, -1083103171, 1040714346) + W(1, 1044503809, 1023987857, 1029119241, 1039893948) + W(2, -1093429938, 1021452133, 1030628716, 1027140771) + W(3, -1141503452, 1041448800, -1088988175, 1048946821) + W(4, 1038933875, 1026874120, 1010789890, 1035829124) + W(5, 1032282738, 1021975771, 1038764813, 1026271340) + W(6, 1037081758, 1041862066, 1035486030, 1040332065) + W(7, 1032949373, 1034225415, -1120312844, 1028398131) + W(8, -1142682180, -1090323173, 1041097413, -1113760891) + W(9, 1027150698, 1038548631, -1111879740, -1112512014) + W(10, 1029165798, 1028642719, 1021223318, 1047369209) + W(11, -1105296983, -1095359061, 1046714577, -1134282558); sum2 = W(0, -1130708327, -1131662151, 1027052068, -1104634511) + W(1, 1037500532, -1117309872, -1156760442, -1160974837) + W(2, -1119116140, -1148640061, 1028521640, -1132259207) + W(3, 1011385503, -1122121264, 1006726095, -1101053828) + W(4, 1041142688, -1129219511, 1005458237, -1104012424) + W(5, 1053197196, 1050251696, -1121651972, -1148190653) + W(6, 1015786967, 1025508020, -1150821434, 1043632028) + W(7, -1105814552, 1005943453, -1121500080, -1106371091) + W(8, 1032166230, -1115585403, -1117076340, -1120252400) + W(9, 1023017975, 1032453110, -1116451706, 1032061998) + W(10, -1120517680, 1024831312, 1008927007, -1107217673) + W(11, -1143126685, 1045664978, -1103126409, -1126349911); WS(-1081408895, 1057237802); - sum1 = W(0, 1016592219, 1050393725, -1096948620, -1099121222) + W(1, 1042338077, 1026859007, -1135569700, 1025037351) + W(2, 1021729271, -1114451665, 1033766733, 1023688209) + W(3, 1019913045, 1042967287, -1123436299, -1104931659) + W(4, 1036822635, 994107237, -1134722237, 1042881918) + W(5, -1107250293, -1092679092, 1042265522, 1017621813) + W(6, 1020337237, 1033060699, 1047123275, -1098053310) + W(7, 1043883210, 1033354815, 1019971573, 1038993285) + W(8, 1037602588, -1091584672, 1042862441, -1147819555) + W(9, 1015924209, 1028206140, 1034435092, -1092984777) + W(10, 1029491668, 1035642823, 1027236154, 1041649576) + W(11, 1044331459, -1089381821, 1042007901, 963919445); sum2 = W(0, -1127968320, -1092240358, 1057542400, 1016463192) + W(1, -1094079665, 1037438732, 1036537032, 1030963640) + W(2, -1105653780, -1097383073, 1040975890, 1019305024) + W(3, -1112917417, -1097473768, 1053316325, 1038201480) + W(4, -1101066411, 1028884816, 1020801080, -1109029546) + W(5, 1058712288, 1052867895, 1013027424, 1010261952) + W(6, 1019615648, -1116583352, 1043894648, -1123129572) + W(7, -1118015492, 1027371024, -1127208552, 1028578068) + W(8, -1093453186, -1141347136, -1117505000, -1113360113) + W(9, 1023589392, -1113404853, 1034405836, 1006791808) + W(10, -1111557726, 1034904608, -1121416948, 1041090246) + W(11, -1094394128, 1037486484, 1044943944, -1104647878); WS(-1076745215, 1064070508); - sum1 = W(0, 1019383636, 1043437193, -1100863080, 1019299111) + W(1, 1041928012, 1018313975, -1138649581, 1030286820) + W(2, 1033675388, 1027453406, 1031909515, -1136380470) + W(3, 1029667879, 1040316179, -1102225484, -1110289513) + W(4, 1043406276, 1026991539, -1127653591, 1048543741) + W(5, -1084687593, -1082029409, 1044625233, -1136351578) + W(6, 1027426151, 1029763954, -1111626052, 983973284) + W(7, 1044339921, 1027257018, 1022926189, 1039947283) + W(8, 1026499316, 1031954367, 1039879984, 1022902273) + W(9, 1004012708, 1025479577, -1122995300, -1118301831) + W(10, 1022733729, 1026798858, 1016119660, 1041368223) + W(11, 1026134601, 1016553765, 1012331970, 1027440187); sum2 = W(0, -1131696089, 1012459087, -1112347233, 1039451269) + W(1, -1158657302, -1129595249, 1008718823, -1135045027) + W(2, -1125420309, -1110618065, 1007247903, 999949222) + W(3, -1149188438, -1118135053, 1041969824, 1024383435) + W(4, 1015365577, -1130814181, -1140198251, 1033739972) + W(5, 1052325325, 1052741682, 1034080133, -1140370763) + W(6, -1129603953, -1142742214, 1029891118, -1121721953) + W(7, -1110453303, 1020979581, 1007479839, 1015462815) + W(8, -1120072309, -1095903036, -1162307222, -1132990667) + W(9, -1141563590, -1126906553, 1017890385, -1105015368) + W(10, -1122346803, 1008938691, 1018622169, -1135479163) + W(11, 1016803437, -1098230278, 1021342643, -1137891195); WS(-1094563452, 1051169575); - sum1 = W(0, -1154622990, 1018130713, 1040428881, -1120902330) + W(1, -1113835211, 1019319877, 1029393378, 1035203109) + W(2, 1032896399, 1039535746, 1021002107, 1036762426) + W(3, -1117775501, -1113234793, 1049040808, -1102863965) + W(4, -1107942972, -1135300966, 1033996553, -1106127031) + W(5, -1112183035, -1090463816, -1105904891, 1040237135) + W(6, -1136342582, -1114335628, 1048012112, 961094679) + W(7, -1148767576, 1032716400, 1018491963, -1112874623) + W(8, 1044702279, -1098092733, -1124337998, -1137189022) + W(9, 1032030382, 1023872671, 1032915973, 1033184663) + W(10, 1006151100, 1037223880, 1015760954, -1110067034) + W(11, 1035998904, 1029109433, -1110631596, 1013167238); sum2 = W(0, 1028413027, -1121092135, 1017337743, -1137632446) + W(1, 999930971, 1027578971, -1120070369, -1130520759) + W(2, 1037177446, -1104029390, 1033133092, -1114678191) + W(3, 1012563150, -1120753460, -1106734706, 1048794348) + W(4, -1111694827, 1030530567, -1111535757, -1097640928) + W(5, 1051800696, 1050145982, 1041394994, -1119592910) + W(6, 1011962278, -1098008778, 1035782812, -1114477541) + W(7, -1118971109, 1001961835, 1009477102, 999766235) + W(8, -1106510696, 1047938812, -1127041263, 1024176823) + W(9, -1153318262, -1122757360, 1010623246, -1111017918) + W(10, 1020162151, -1115486877, 974823129, -1129371381) + W(11, 1028214977, -1123769252, -1124747017, 1007999422); WS(1057759166, -1088449289); - sum1 = W(0, -1129635066, 1044873994, -1087389198, -1116302487) + W(1, 1048786726, 1017011581, 1025446574, 1037627635) + W(2, -1097118359, 1022204225, 1039292653, 1021371937) + W(3, -1144519245, 1041685209, -1089578865, 1022125102) + W(4, 1045860709, 1025212662, 1017539549, 1033910011) + W(5, -1129568803, -1105345911, 1045748274, 1031878029) + W(6, 1026395242, 1039651930, 1010566389, -1122366800) + W(7, 1027479143, 1013486066, -1122678010, 1035038013) + W(8, 1030050103, -1097803608, 1033498658, 1007580942) + W(9, 1025431320, 1039320958, -1110122707, -1119554634) + W(10, -1161253492, 1028360045, 1024557433, 1038868536) + W(11, -1112614790, -1126568107, 1010965710, 1015223035); sum2 = W(0, -1162281894, 1033992046, 1042856153, 1038465062) + W(1, -1093956780, 1024284423, -1124547002, -1149048922) + W(2, 1036788056, 1046094595, -1100316514, 1035682376) + W(3, -1150111187, 988741862, -1112557119, 1060179844) + W(4, -1090501223, -1122577303, -1135956229, -1132541637) + W(5, -1097406101, 1049669935, 1022378118, -1113139255) + W(6, -1145634890, -1132011310, -1121853795, -1101657128) + W(7, 1045140163, -1130855582, 965409433, 1024739191) + W(8, -1104384368, 1042681611, -1121116299, -1118824057) + W(9, -1123274137, 1008671821, 1032013500, -1111116605) + W(10, 1035479824, 1006161754, 1024679583, -1129294934) + W(11, -1147059498, 1027020519, -1131272734, -1124922046); WS(-1086987838, -1100233980); - sum1 = W(0, 1015308459, 1041765192, -1105415847, 1042444900) + W(1, -1111075669, 1003799483, -1122325985, 1028094431) + W(2, -1114108890, 1041449686, -1106140165, 1034767973) + W(3, -1116672610, 1042485054, -1092596043, 1049087917) + W(4, -1110170951, -1144225139, 1032904116, -1118546351) + W(5, 1046690691, -1128556841, 1034284768, 1033288361) + W(6, -1141168915, -1118052589, 1054244345, -1089307961) + W(7, 1026589088, 1010481109, 1033123257, -1116818498) + W(8, 1049735534, -1094503991, 1030527932, -1121578684) + W(9, 1001375875, 992241989, 1040724504, -1104029377) + W(10, 1008796465, 1030765989, 1015833828, 1015146386) + W(11, 1041637233, -1100869156, -1123717612, -1124977133); sum2 = W(0, -1128145157, -1100557855, 1057648426, 1048654145) + W(1, -1122215769, -1133425768, 1026284201, -1106810438) + W(2, 1054522141, 1057115188, -1103409060, 1017140792) + W(3, 1012811840, 1042787919, -1098531848, 1046636853) + W(4, 1046426252, -1118227807, -1136599052, 1044596046) + W(5, -1073275189, -1074262745, 1035358469, -1143532160) + W(6, 992541697, 1038174685, 1061008073, 1044984510) + W(7, 1036107748, 1016191398, 1011005728, -1104618112) + W(8, 1059653331, 1060082709, -1104639697, 981079778) + W(9, -1127430370, -1166127490, 1035655860, 1034192337) + W(10, -1130597644, 1006837720, 995789721, -1116633305) + W(11, 1017274260, 1033697834, -1112828961, 1021901152); WS(-1092446204, 989212831); - sum1 = W(0, 1007124942, 1048125124, -1138101793, -1141178857) + W(1, -1102978326, 1034583667, -1116778591, -1131432602) + W(2, -1106585464, 1019782926, -1115311760, 1015089582) + W(3, 1027403088, 1042237585, 1016240146, 1036610823) + W(4, -1110971902, 1027127900, -1121715541, -1138060917) + W(5, 1037962972, -1101006449, -1114322839, 1008360193) + W(6, 1019921952, 1027977243, 1053191424, -1094792341) + W(7, 1031189102, 1028625507, 1020332096, -1126238792) + W(8, 1053622732, -1089064867, -1137835047, 1017386452) + W(9, -1139218425, -1131884420, 1043276755, -1101892348) + W(10, -1132849129, 1009465805, 1020381759, 1024321015) + W(11, 1049124886, -1107274751, -1110951117, 1008208785); sum2 = W(0, -1120159352, -1088595541, 1060560783, 1061831343) + W(1, -1089452071, -1101640393, -1125980662, 1050146519) + W(2, -1093128727, -1098317152, 1042347964, 1038452583) + W(3, -1131578305, -1090859444, 1057304640, 1057012981) + W(4, -1095995319, -1105624394, 1028063854, -1107336769) + W(5, 1071289406, 1069689231, -1096815763, 1041229675) + W(6, 1021975879, -1105868965, -1087527230, -1097221007) + W(7, -1104502319, 1037791301, 1033612769, 1041431951) + W(8, -1086794372, -1086042421, 1049524493, -1143180172) + W(9, -1119579693, -1112749441, 982969010, 1033122226) + W(10, -1104150062, 1029453052, 1040487308, 1042279365) + W(11, -1089785065, -1088997285, -1135764460, 1049121852); WS(-1080642303, 1037515653); - sum1 = W(0, 1014399585, -1123461796, -1115821917, 1037702505) + W(1, -1105260875, -1134503912, 1040389217, 1009459592) + W(2, -1138501592, 1046330151, -1113097082, 1032999462) + W(3, 1025436037, -1118449912, 1046808963, -1088063230) + W(4, -1166545913, 1011659972, 1035628117, -1111999897) + W(5, 1046961203, 1048171302, -1112931778, 1033647146) + W(6, 1032176658, 1042449564, -1085801589, 1046621083) + W(7, -1116953058, 1020514160, 1019714410, -1098862637) + W(8, 1042823052, 1033619294, -1108126015, 1023868127) + W(9, 1031481267, 1015874414, 1032561937, 1017602698) + W(10, 1033606503, 1030447848, -1129425897, -1108656118) + W(11, 1022709518, -1172911161, -1161936349, 1015791986); sum2 = W(0, -1139497271, -1104932637, 1000924601, 1034980171) + W(1, -1102656208, -1154378706, 1025910769, 1043978669) + W(2, -1116100213, 1042493673, 1038409695, 1025837919) + W(3, -1105297083, -1105480129, -1102785010, 1044002411) + W(4, -1098474508, -1109802618, 1037339247, 1037617885) + W(5, 1049988969, 1050036780, -1122964745, 1036616991) + W(6, -1111547600, -1107402541, 1045831134, 1039942071) + W(7, -1106907325, -1132149525, -1123186639, -1105522985) + W(8, 1040977948, -1097804729, 1028039731, -1111839940) + W(9, 1029488041, 1011577705, 1036894407, 1042714488) + W(10, -1112976714, 1040500443, -1137152287, -1102258444) + W(11, 1025599417, -1139888185, -1112698813, -1114191023); WS(1060124606, -1084472548); - sum1 = W(0, 1012638700, -1112711971, 1024524352, 1032623962) + W(1, -1115629480, 1027484509, -1131934189, 1021193865) + W(2, 1017460401, -1117271615, 1023262959, -1157409515) + W(3, -1129347279, -1125331574, -1113956442, 1049120342) + W(4, -1108495481, 1010910287, -1138043955, 1048955067) + W(5, -1094951982, 1015143283, 1031640934, 1021584239) + W(6, 1007380511, -1106488069, 1048203795, -1110142007) + W(7, -1129214986, -1144904917, 1022374575, -1115361884) + W(8, 1005249701, 1024310560, -1136673413, 1017598403) + W(9, 996895579, -1120058426, 1026650437, -1121731240) + W(10, 1017029987, -1145343061, 992489843, -1118477310) + W(11, -1129456851, 1036683482, -1115998674, 1014521967); sum2 = W(0, -1113301049, 1041723134, -1119425470, 1012563935) + W(1, 1013366251, -1135264199, -1129519836, 1018887182) + W(2, -1123219625, 1021107126, -1131491053, 1017265018) + W(3, -1113144608, 1065721494, -1082046556, 1032971141) + W(4, -1132479565, -1135106943, -1114997262, 1081282167) + W(5, -1068115046, -1092304788, 1028331651, 927030725) + W(6, 1028189537, 1053752802, -1091841582, -1120681827) + W(7, 1021570574, 983594844, -1144798799, 1004635839) + W(8, -1126716683, 1026059029, -1132445755, -1137356791) + W(9, -1135227415, 1024321495, -1149813598, 992723582) + W(10, 1010781703, 959859164, -1123606042, 1032429021) + W(11, -1137534547, -1121765421, 1024141029, -1123588459); WS(1064307390, 997943845); - sum1 = W(0, 1016504192, -1110031838, -1108555962, 1047561626) + W(1, -1114174882, 1004953741, 1004931163, -1124632947) + W(2, -1129767067, 1003984285, 1017992857, 1018940039) + W(3, 1001468389, -1106796660, -1106350773, 1053522405) + W(4, -1106382906, 1000370245, -1125536771, -1132488742) + W(5, -1097456765, 1050670238, -1136221548, 1015934681) + W(6, 1024701821, -1111862756, 1040312105, -1128004006) + W(7, -1123896954, 1011062590, -1157300426, -1111316170) + W(8, -1110123941, 1039422954, 1020844396, -1128259885) + W(9, 1008115882, -1153319146, 1030600762, 1005773541) + W(10, 1015300557, -1153739770, 1014991280, -1116314499) + W(11, -1109063375, 1042190650, 1024320647, -1130969505); sum2 = W(0, 1036505224, -1100095598, 1039944480, -1110028565) + W(1, 1016119595, 1017378303, 1018720963, -1119632397) + W(2, 1033659023, 1025734363, 1015545311, -1126149289) + W(3, 1037554666, -1082101932, 1064933062, -1103479495) + W(4, 1036578506, 1022196513, 1035136259, -1072130218) + W(5, 1074634461, 1012777017, 1036162128, -1118704038) + W(6, -1131720942, -1090292986, 1057038142, -1132873326) + W(7, -1120419990, 1020047721, 1012123245, -1114842502) + W(8, 1046242606, -1117820012, -1115011900, 1033886326) + W(9, 1027168757, -1108983905, 1018437263, -1117294599) + W(10, 1004277346, -1136709697, 1024471767, -1105383800) + W(11, 1041395450, 1033191694, -1113427773, 1024978781); WS(1053812476, -1113586226); - sum1 = W(0, -1130399840, -1111011390, 1007197394, 1037708373) + W(1, -1122978308, -1139049030, 1028349447, -1120416825) + W(2, -1105796643, 1044272829, -1110423782, -1125249991) + W(3, 1021963321, -1107941440, 1056809437, -1093131829) + W(4, -1122097703, 1011721762, -1121520129, -1106086226) + W(5, 1046348585, 1057081835, -1099751719, -1114497366) + W(6, 1020769785, -1129958415, -1090756230, 1059679544) + W(7, -1102044582, 1018253471, -1115853964, -1107644952) + W(8, 1050300257, -1119895572, -1111488955, 1022041005) + W(9, 1021860117, -1122155502, -1116443513, 1040260199) + W(10, 1019054925, -1121197023, -1121122124, -1114352785) + W(11, 1041113720, -1110926804, -1156322407, -1135672898); sum2 = W(0, 1015672618, 1010541588, -1115998566, 1020421847) + W(1, -1129704409, 1015113530, -1121994623, 1032866568) + W(2, 1011616452, -1095022641, 1020628925, 1017019595) + W(3, -1149412980, 1044676778, -1107438884, -1097106911) + W(4, -1112769863, 1022483811, -1127744422, -1124040172) + W(5, 1058620442, 1015017230, -1113422252, 1015061518) + W(6, 990134373, -1102290659, 1049322309, -1104275347) + W(7, 1039268635, -1114030797, 1024671460, 1029499881) + W(8, -1104592911, 1045525693, 1026741584, -1136765715) + W(9, -1132259460, -1120084193, 1035840939, -1119993661) + W(10, 999745387, -1131682952, -1125579600, 1001699435) + W(11, -1123886068, 1037165892, -1126762930, -1134246014); WS(1059863230, -1098226968); - sum1 = W(0, -1111154474, -1130042625, -1124092147, -1126659421) + W(1, -1116454602, -1109234699, 1033627108, 1044463840) + W(2, -1125693093, 1045974478, 1040744732, 1026252457) + W(3, -1111220881, -1098768819, 1043860042, 1040108899) + W(4, -1149847241, -1103478147, 1018934285, 1036585799) + W(5, -1094381437, -1098844542, 1043775752, -1107008620) + W(6, -1156119577, 1041677635, 1034781361, 1051479065) + W(7, 1046003792, -1127254569, -1108034825, -1100472134) + W(8, 1038465479, -1096809363, -1121805723, -1105262391) + W(9, 1035708952, 1038444026, 1027036095, 1040658808) + W(10, 1037992790, 1038964162, -1116166225, -1111999396) + W(11, 1019243847, 1003830125, -1131289879, -1116821910); sum2 = W(0, 1031860762, 1008804487, 1036033743, -1130814575) + W(1, -1114717025, 1042429978, -1114215534, -1119320351) + W(2, -1105317049, -1126605754, -1103884858, 1019583714) + W(3, 1037917300, 1028769566, 1033869945, 1029943513) + W(4, -1097519272, 1050527129, -1123072391, -1145928741) + W(5, -1104666827, 1058632863, -1084821476, 1057878257) + W(6, -1121765219, -1118791492, -1103277106, 1010033985) + W(7, -1091781337, 1047090522, 1036233577, 1035112755) + W(8, 1043081028, 1033184724, -1108906997, 1050006554) + W(9, -1113082189, -1119166399, -1110309063, -1124066564) + W(10, -1105454190, -1116436299, 1007761049, 1034891236) + W(11, 1003334201, -1146667244, -1124982586, 1037673833); WS(1053829756, -1108691549); - sum1 = W(0, 1014621415, 1034972923, -1105089900, -1121242915) + W(1, 1024842558, -1125107735, -1117035633, 1031518451) + W(2, -1146432572, 1011744694, -1195363583, 1024600093) + W(3, 1017668333, 1028891034, -1111069516, -1099260407) + W(4, 1035963185, -1111257697, -1121088251, 1043029617) + W(5, 1052288956, -1096216885, 1043236853, 1022444749) + W(6, -1113943163, 1037608353, 1043183409, -1109446679) + W(7, -1104376301, 1008767270, 1027915142, -1118505878) + W(8, 1040529537, 1033001703, -1110826184, 983774736) + W(9, -1117882340, 1008280866, -1127832039, 1023717270) + W(10, -1108863440, 1021787123, 1018110476, -1114328254) + W(11, 1025673432, 1038283409, -1136644866, -1122279626); sum2 = W(0, 1011466728, -1153535168, -1121363658, -1113493290) + W(1, 1016979229, -1121975877, -1128158188, -1133074596) + W(2, -1108077390, 1031842286, -1114077565, 1032726750) + W(3, 993685376, -1111924694, 1048370655, -1096696142) + W(4, 1032702600, -1126878203, 1021341187, 1041916532) + W(5, -1088803436, -1097359431, 1051606360, 1020447303) + W(6, -1125064327, -1115375045, 1050909934, 1066069354) + W(7, -1108957150, -1115472084, 1016637363, -1111577830) + W(8, 1048268187, -1102058354, -1106393780, 1011253904) + W(9, -1135616946, -1121861231, 1028734454, -1112810542) + W(10, -1104652034, 1032484333, -1142265296, -1139232914) + W(11, 1033829714, -1108918948, -1124153420, -1135690267); WS(1043379192, -1131322837); - sum1 = W(0, -1129021027, -1109595831, 1033800339, 1042153991) + W(1, -1099711038, 1026187331, -1148980502, -1141769560) + W(2, 1040943040, 1016895818, 1029537886, 1023917269) + W(3, -1117253390, -1131540259, -1099641090, 1040857522) + W(4, -1104535414, 999787984, 1015261926, 1038059198) + W(5, 1019360940, -1103092216, 1032384358, -1137467256) + W(6, 1024719214, 1043151230, 1047704456, -1091529984) + W(7, 1043292971, 1021851650, -1130947836, 1040366611) + W(8, -1120007803, -1100986498, 1031568104, -1117455240) + W(9, 974083843, 1036650848, 1032688054, -1116706574) + W(10, 1019020464, 1021119266, 1010796022, 1032003186) + W(11, -1135300928, -1114490317, 1026900536, -1124200964); sum2 = W(0, -1127801857, 1028175283, -1129802997, -1133354170) + W(1, -1129332011, 1029358411, -1143073539, 1010285366) + W(2, 1022418743, -1111757330, 1032847739, -1119713292) + W(3, -1106914355, 1045024365, -1109183829, 1035388549) + W(4, 1028119663, 984677997, -1114698807, -1107796176) + W(5, 1055188047, -1099199714, 1027769667, -1126683209) + W(6, -1129241721, -1093181393, 1060464193, -1108056884) + W(7, 1015260775, -1117802232, 1034854605, -1094857339) + W(8, 1038631095, 1042537531, -1123722407, 1025618013) + W(9, 1015357631, -1104001534, 1035894131, 1019867695) + W(10, 1021694327, -1116557581, 1026781991, -1106206681) + W(11, -1121422795, 1010545294, -1120402439, 1021478623); WS(1049844732, -1121310639); - sum1 = W(0, 1023733410, 1044402267, 1053233309, -1096212219) + W(1, 1051767783, -1114613442, -1099209689, -1106734859) + W(2, -1108848000, -1103164857, -1109302031, -1129199659) + W(3, 1040893204, 1050332331, 1049912979, -1099336615) + W(4, 1050917291, 1033787229, -1102957277, -1111686974) + W(5, -1098618899, -1097562740, -1121562509, -1111886543) + W(6, -1114244983, -1122456058, 1048539349, -1096335583) + W(7, 1042792075, -1121890370, -1136674298, 1049981942) + W(8, -1111429758, 1033031043, 1039550692, 1041151474) + W(9, -1108461358, -1102278760, 1039273110, -1102589972) + W(10, 1008842338, -1104020956, 1021220348, 1043895987) + W(11, 1048170180, -1101337719, 1041281742, 1031699907); sum2 = W(0, 1051830333, 1040398919, -1088812275, -1112895824) + W(1, 1030823820, -1120174945, 1052363768, 1056364330) + W(2, -1088095023, -1128031249, -1126124865, 1022839272) + W(3, 1062286287, -1105094784, -1083945825, -1110939052) + W(4, -1131257287, -1113476028, 1068323273, -1117763636) + W(5, -1086712736, -1122350162, 1034881723, 1031923437) + W(6, 1062941652, -1117097644, -1085128967, -1107088013) + W(7, 993374262, -1149114843, 1058036233, -1135317262) + W(8, -1090471655, -1117471490, 1022005418, -1131972943) + W(9, 1038399576, 1045469136, -1096613184, -1128774604) + W(10, -1124473506, 1015689530, 1029358050, -1127713443) + W(11, -1118175248, 1023889083, 1006015479, -1114795832); WS(997080576, -1130763300); - sum1 = W(0, -1113905570, -1121320815, 1038521748, 1025325853) + W(1, -1108459484, -1123369669, 1030153879, -1108108727) + W(2, 1032364321, -1131460303, -1152289285, -1111143084) + W(3, -1117230256, -1100762033, 1051702873, 1040312882) + W(4, -1099800679, 1018134009, -1152616277, -1090117932) + W(5, 1069923213, 1063008042, -1092045374, -1116551136) + W(6, -1146808706, -1109500574, -1104049339, 1056019095) + W(7, -1100849447, 1015899783, -1112646432, -1102311764) + W(8, 1028696893, -1115439812, -1105934578, -1120248394) + W(9, -1186209359, -1119610882, -1120830317, -1125581089) + W(10, 1028735898, -1113190874, -1120216576, -1099135446) + W(11, 1030578386, -1116447004, -1108771165, -1120342084); sum2 = W(0, -1130955153, 1025142055, 1025405091, 1041203911) + W(1, 1034798579, -1107186683, -1115062100, 1033634795) + W(2, -1124058081, -1151569099, -1098708869, 1041850582) + W(3, -1114768924, -1114031272, 1047530952, -1160505239) + W(4, 1048498835, -1097808347, 1040485485, -1115074190) + W(5, -1104495565, 1065129660, -1095909934, 1045395705) + W(6, -1102247365, -1110799922, 1038913214, -1115442088) + W(7, -1105078519, -1106337339, 1040365390, 1039570131) + W(8, -1102056033, 1037120419, 1024518769, 1007217475) + W(9, -1105380703, -1112489130, -1123724877, -1145632358) + W(10, -1109649658, -1152795787, -1134789779, 1035629875) + W(11, -1104428707, 1035843769, -1117935125, -1124566009); WS(-1078383103, 1059446981); - sum1 = W(0, -1139864362, -1142609202, -1105667544, 1048881003) + W(1, -1110989208, 1019807480, 1022304576, 1008862865) + W(2, -1100327790, 1047898440, 1029966423, -1128249884) + W(3, -1117916615, -1109139429, -1098424968, 1053051896) + W(4, 1023833874, -1114518074, 1007335089, 1032501274) + W(5, -1089810345, -1104044598, 1040575921, 1013296733) + W(6, -1119884647, 982044231, 1033240873, 1051763270) + W(7, -1114053146, -1110353213, 1022280942, -1115332063) + W(8, 1049118183, -1113330784, 1041606896, -1126749570) + W(9, 1023668405, 991223187, -1140016273, 1027424455) + W(10, -1122699729, 1021514772, 1034312731, -1102309771) + W(11, -1110413061, 1032557738, 1041157596, -1121516731); sum2 = W(0, -1143648694, 1050457027, -1090901428, -1095258955) + W(1, 1047388223, 1034828442, 1022767513, -1104882348) + W(2, 1017677825, -1116653154, -1125154789, -1149114774) + W(3, 963325627, 1052893834, -1090554669, 1030383529) + W(4, -1114292033, 1040557770, -1138915747, -1122824043) + W(5, 1048412513, 1063122641, -1118207917, 1005930742) + W(6, 1040530304, 1019582897, -1101863176, -1089352859) + W(7, 1048190601, 980397999, 1031585165, -1094902239) + W(8, -1104950416, 1057380402, -1102824630, 1033506230) + W(9, 1008260859, -1113748748, -1120605391, -1106433792) + W(10, 1038107916, -1115355457, -1120690652, -1090635807) + W(11, 1056780417, 1049388154, -1104842210, -1136517107); WS(-1104952056, -1073278929); - sum1 = W(0, -1144211169, -1099507218, 1047627264, 1043300637) + W(1, -1098196346, 1030331294, 1031125659, -1116651974) + W(2, -1113308557, 1037274304, -1126577137, -1122667056) + W(3, -1121100585, -1101060075, 1054491321, -1102367022) + W(4, -1102852173, 1026346581, -1142367854, -1105066053) + W(5, 1039149624, 1051689271, -1106731162, -1120693369) + W(6, 1025675017, -1131576359, -1106626380, 1044811828) + W(7, -1111689619, 1018492983, -1114298540, -1114707952) + W(8, 1042584197, -1128472513, -1126957444, -1140414731) + W(9, 1036563163, -1120605819, -1128818827, 1045578618) + W(10, 992521004, -1128018583, -1122834968, -1111900952) + W(11, 1039018645, -1115669626, 1025441875, 997033420); sum2 = W(0, -1117577133, 1036324905, 1012676607, -1164734594) + W(1, 1039853902, -1110394349, -1139616023, -1124977156) + W(2, -1115101040, 1032695755, -1116113854, 1006188424) + W(3, 997820975, -1120808104, 1041273688, 1041759226) + W(4, 1041255405, -1120661488, 1028627780, -1109376562) + W(5, 1049990275, -1096754642, -1106762918, 1033535767) + W(6, -1114909048, 1029259135, 1043064430, 1041324783) + W(7, -1111929038, 1021759445, 1032747040, -1145526876) + W(8, 1039154681, -1094055114, 1025837595, 1023476809) + W(9, -1116006677, 1022442689, 1027909669, -1098711698) + W(10, -1130147608, -1133986843, 1030832637, 983753970) + W(11, -1115224952, 1023440264, -1112181665, 1029342490); WS(1057403966, -1096678293); - sum1 = W(0, 1015545167, -1133819725, 1037006758, -1110196161) + W(1, -1111752718, 1028708905, 1013920108, -1127594402) + W(2, -1136130453, -1123399563, 1041684833, -1119121787) + W(3, 1030182836, 1031500952, 1042283546, -1086200264) + W(4, 1047364560, -1120221088, 991347091, -1107540753) + W(5, 1040431359, 1060132444, -1104881213, 1006406689) + W(6, 1031028372, 1042502792, -1084295706, 1049379651) + W(7, -1113607001, 1002061113, -1130110920, -1120427420) + W(8, 1042737160, 1001749345, -1117219726, 1026883950) + W(9, -1137745417, 1023814022, 1019878434, 1023472436) + W(10, 1017968636, -1124476270, -1127755335, -1113612941) + W(11, 1042334622, -1123659840, 1008021369, 1011156129); sum2 = W(0, -1178786588, -1129322914, -1116170217, 1036181740) + W(1, 1016863918, -1132924585, 1027650789, -1117835702) + W(2, -1106357388, 1050236879, -1107702296, -1127926494) + W(3, 1004569898, -1120140062, 1012300621, 1029537087) + W(4, -1115347042, 1007741857, 1010647389, -1110565129) + W(5, 1051050908, -1121260831, 1019718506, -1147888890) + W(6, -1130907066, 1046200355, -1094299123, -1105419536) + W(7, 1018471790, -1122466004, -1128412421, 1035331810) + W(8, 1038925264, -1104188032, 1035623426, -1132432621) + W(9, 1000455394, 1040609034, -1110352470, -1148832482) + W(10, 1031812039, -1132484471, 1019136642, -1114775527) + W(11, 1028674911, 1009994365, -1129304944, -1146317634); WS(1056335484, -1129697442); - sum1 = W(0, -1118615510, 1026825265, -1102686619, 1045269336) + W(1, 1031523962, -1140818205, -1123816285, -1162479083) + W(2, -1175208362, -1122870505, 1025178018, -1112741005) + W(3, 1016357093, -1123245334, -1112742573, 1056143939) + W(4, -1115481871, 1004860403, -1126548591, -1115166368) + W(5, 1012633705, 1022501065, -1098786498, -1121817826) + W(6, 1017679973, 993334229, 1048503365, 1044007618) + W(7, -1107348056, -1134864917, -1125007109, 1009460392) + W(8, -1110640192, 1043433395, -1104721331, -1142571619) + W(9, -1130509569, -1117686884, 1010107581, 1037226473) + W(10, -1124234681, -1120701013, -1127256452, -1110259820) + W(11, 1008538193, 1040188728, -1112073470, 1004599859); sum2 = W(0, -1140731697, -1126783010, 1035005186, -1110815467) + W(1, -1144657885, -1127206230, 1018164054, -1144859189) + W(2, 1011714049, 1039236492, -1130754383, -1146205013) + W(3, -1116322329, -1148034885, -1112384840, -1109778149) + W(4, -1113329105, -1113225660, 1033668632, -1111621890) + W(5, 1057652336, 1053123628, -1107649638, 1012470213) + W(6, -1110091010, 1034430751, -1103655808, -1113563636) + W(7, -1112793157, -1113811608, 1006827755, -1133278289) + W(8, 1035575897, 1009902097, -1136830370, -1126431906) + W(9, 1024608202, -1134565598, 1007536733, 1031837967) + W(10, -1131576514, 1007646275, 978878091, -1106732021) + W(11, 1017810394, 1032913014, -1118754368, -1123083034); WS(1064654654, 1035088379); - sum1 = W(0, -1123150274, -1101065371, 1052021669, 1034671630) + W(1, -1103122974, -1124628232, 1013730923, -1104968795) + W(2, 1043838624, -1116995686, 1025043295, -1123853974) + W(3, -1124984926, -1104298005, 1050953841, -1125221929) + W(4, -1102292982, -1122875259, -1143080352, -1097917367) + W(5, 1053857683, 1053311748, -1101557747, -1129945790) + W(6, -1115541655, -1115560637, -1098028717, 1054064059) + W(7, -1102206963, -1115475969, -1120446591, -1112072243) + W(8, -1118042236, 1056948345, -1099829586, 1010096420) + W(9, 1019709882, -1121553409, -1117850604, 1047317764) + W(10, -1123986585, -1120157778, -1123168830, -1101025669) + W(11, 1024509393, 1051915779, -1105952782, 1010117900); sum2 = W(0, -1115290116, -1109338596, 1052546267, -1101682429) + W(1, -1106866549, 1027635895, 1036300391, -1120873277) + W(2, 1031940424, 1045633640, -1121551223, 1021591180) + W(3, -1109664288, -1133763869, 1041363997, -1094977574) + W(4, 1008614181, -1119215823, 1023887773, -1110763968) + W(5, 1046589307, 1050150667, -1110488084, 1009099129) + W(6, 1031338337, -1104363651, 1038758869, -1104681827) + W(7, -1120223295, 1016354897, -1112746552, -1111086518) + W(8, 1026186944, -1121584221, 1035823152, -1115537942) + W(9, 1029647363, 1014654409, 1011886363, 1042076972) + W(10, -1111033402, 1012736237, -1123357025, -1113327782) + W(11, -1114411312, 1033926660, 1046305164, -1109553196); WS(-1088190206, -1108558078); - sum1 = W(0, -1109712467, 1048826552, -1117596347, 1045129740) + W(1, -1145103958, 1040739958, 1034096473, -1100191472) + W(2, -1104233509, 1021528052, 1015098342, -1095362037) + W(3, -1111610193, 1051587686, -1112703594, -1123092480) + W(4, 1033306871, 1043699492, -1111503908, -1114238856) + W(5, -1118548075, 1045282957, -1105600408, -1112734841) + W(6, 1029528067, -1124911644, -1094857227, 1041941490) + W(7, -1116887040, -1106244021, -1109918121, 1053388852) + W(8, 1040678692, -1121835291, 1043572363, 1044475255) + W(9, -1130336610, -1101598429, -1102065648, 1050563972) + W(10, -1101594283, -1100499697, -1110290409, 1049826386) + W(11, -1108498290, 1032628503, 1041251987, 1015959598); sum2 = W(0, -1140036524, 1033121292, 1020537037, -1094559771) + W(1, 1042598592, 1048560917, -1122134368, -1135675176) + W(2, -1102563608, -1090683436, 1049536131, 1043095342) + W(3, -1124742331, 1022385651, 1017232470, -1089829937) + W(4, 1015761873, 1060516603, 1023368729, 1037868584) + W(5, -1097085277, -1081905013, -1122385293, 1068776853) + W(6, 1025643512, -1113410705, 1026038978, -1085580324) + W(7, -1119606047, 1062905414, -1117935205, 1034815986) + W(8, -1107749524, -1093433405, 1034046493, 1053234619) + W(9, 1029315286, -1106876578, -1116418603, -1100509010) + W(10, 1037794408, 1023262329, -1121900993, 1029051910) + W(11, 1013163432, -1107168757, 1020109569, 1038509970); WS(1048802172, -1118644607); - sum1 = W(0, 1022629891, -1112271863, -1115934246, -1112205646) + W(1, 1050188943, -1125264263, 1001330921, 1008285357) + W(2, 1007889405, -1137992085, 1025826270, -1127716075) + W(3, -1147098739, -1106982824, -1110669107, 1044651974) + W(4, 1026008921, -1113499645, 1023807923, 1020475895) + W(5, -1095657203, 1050363724, 1011316531, 1026946398) + W(6, -1129374813, 958183765, -1113474437, 1050921091) + W(7, -1106529387, -1121270209, 1028532286, 1025640422) + W(8, -1090481180, 1050665633, -1131537104, 1021652689) + W(9, 1031375324, -1133565569, -1104303995, 1037811271) + W(10, -1125009457, 1014192325, -1151549405, -1106868493) + W(11, -1104624276, 1049383075, 1022847857, -1142930963); sum2 = W(0, 1016559128, -1089262209, 1053795811, 1058569170) + W(1, -1094475155, 1008650912, -1119933527, 1032931419) + W(2, -1108360154, -1097574423, 1036603460, -1121755244) + W(3, 1008526536, -1094914643, 1052999976, 1052760357) + W(4, -1106271635, 1039081818, 1034816070, -1096197918) + W(5, 1069558608, 1058007152, 1022028102, 1004102711) + W(6, 1018959568, -1101210129, -1103281588, -1106340652) + W(7, -1121182797, 1033515588, 1026554777, 1049415798) + W(8, -1085501184, -1101474305, 1027756295, 1032461240) + W(9, -1112891495, -1128790619, -1098019814, -1114646508) + W(10, -1124734105, -1118822413, 1046177388, 1043157162) + W(11, -1088254262, -1096632714, 1015426864, 1043397723); WS(-1085648446, -1079079370); - sum1 = W(0, 1031831473, 1036117159, 1049939273, -1094329359) + W(1, 1040633606, 1023486628, -1114079994, 1003975776) + W(2, 1042007752, -1097850009, -1134555900, 1019647980) + W(3, 1031367567, 1044164867, 1043312672, -1089391360) + W(4, 1041283678, 1023951387, -1118896788, 1045647319) + W(5, -1095211959, -1087218668, 1044809099, 1014099708) + W(6, -1123680847, 1033826253, -1122554975, -1100203021) + W(7, 1046447947, 1017251780, 1033718927, 1041970952) + W(8, -1116359196, 1033960979, 1042238738, 1031788713) + W(9, -1123944253, 1024952417, -1176880640, -1108529239) + W(10, 1024390649, -1146375056, 1033541103, 1040029267) + W(11, 1026974961, -1111794397, 996515216, 1033511473); sum2 = W(0, 1019169584, 1042983019, -1098354632, -1099987280) + W(1, 1048978343, -1118546716, -1120973142, 1009997472) + W(2, -1106283039, 1032814660, -1103033329, 1022934360) + W(3, 1033125970, -1103915398, 1033722376, -1095179878) + W(4, 1055664562, -1110281795, 1025364316, 1036688212) + W(5, 1062322516, 1032770062, -1097173506, 1045599869) + W(6, -1109812797, -1125420780, -1098634798, -1109050948) + W(7, -1103688168, -1119010038, 1040314133, -1119986230) + W(8, 1057333054, -1097734639, -1173745415, 1038965178) + W(9, -1108282977, 1034277286, -1102308141, 1034195486) + W(10, -1107997912, 1007528912, 1018923536, -1117630132) + W(11, -1125368820, 1049154263, -1098308929, 1035253180); WS(-1089355774, -1078290086); - sum1 = W(0, -1122950775, 1037182841, -1118638572, -1121667553) + W(1, 1009124079, -1115334660, -1112691207, -1124344033) + W(2, 1041047393, -1104403323, -1138062223, -1126199861) + W(3, 1023290087, 1046400294, -1098349922, -1099164822) + W(4, 1044398104, -1136525567, -1107733500, -1118583361) + W(5, 1057481334, 1054069421, -1149431076, -1110719762) + W(6, -1146188190, 1042531744, -1139289650, -1097191323) + W(7, 1044515332, 1015659149, -1132821263, -1121639475) + W(8, -1109625472, 1041114291, -1120573261, -1121643278) + W(9, -1112282369, -1137408791, 1026873009, -1120861012) + W(10, -1120947806, -1122889719, -1116356099, 1018516820) + W(11, -1139389015, -1139200383, 1017011067, -1120569352); sum2 = W(0, -1120919296, -1124567280, 1037030993, -1120629799) + W(1, -1106639281, 1020211370, 1009843654, -1107129213) + W(2, 1043380414, -1102119519, 1030878171, -1126825280) + W(3, -1132368064, 1020831927, -1109288357, -1112537154) + W(4, -1118412551, -1144175329, 1020138111, -1098918731) + W(5, 1059483198, 1064361176, -1093377806, -1146281491) + W(6, 1029564211, -1132836513, -1119273466, -1104145236) + W(7, -1123848756, -1135251423, -1115205032, 1010227332) + W(8, -1106893419, 1043150197, -1103899854, 1028327527) + W(9, 1016741875, -1118685376, 1028452918, -1117770026) + W(10, 1024207514, -1128215590, -1119663171, 1026001154) + W(11, -1135273053, 1019055438, -1109882780, 1024565629); WS(1064975294, 1066308158); - sum1 = W(0, 1031747776, -1119071204, 984462229, -1132055971) + W(1, -1148253029, -1126913907, -1106142801, 1032019633) + W(2, 1045090381, -1108909198, -1110036442, 1036419718) + W(3, 1028818885, 1041375482, -1093262116, 1038243096) + W(4, 1036945928, -1121925745, 1001228109, -1112793497) + W(5, 1049965274, 1021279149, -1115536386, 1019907753) + W(6, -1115396498, 1013499583, 1052662103, -1090377239) + W(7, 1034061985, 1032108124, 1041342669, -1123968938) + W(8, -1113450905, 1040402543, 993049059, -1109636927) + W(9, -1108139113, 1024220311, 1035427697, -1107965954) + W(10, -1113141447, 1032825796, 1027821620, 1020544662) + W(11, -1138282267, -1116422289, 1016907357, -1128061041); sum2 = W(0, 1016610899, -1118189976, 1027283971, 1028895363) + W(1, -1113530321, 1007846553, -1135553471, 1018031354) + W(2, -1118352328, 1032145382, -1123867563, 1011272254) + W(3, -1163068737, -1120752887, 1042584076, 1042427003) + W(4, -1099207121, 1010785270, -1147979120, 1035872696) + W(5, -1095799786, 1073605475, -1075418961, 1026092591) + W(6, -1121296916, 1034250650, 1009508653, 1056095764) + W(7, -1091570337, 1030054693, -1139692219, 1022157658) + W(8, -1121907329, -1130051225, -1113117501, 1019567305) + W(9, 1007282246, -1131969269, -1148404200, 1025403981) + W(10, -1124635978, 1012575724, 1019770181, -1131641536) + W(11, 1013257077, -1121748387, -1123610989, 1018865930); WS(1062423998, 1020226002); - sum1 = W(0, -1122384152, -1116470612, -1095861522, 1055717600) + W(1, -1128557498, -1133541800, 1000976853, 1015809956) + W(2, -1125526476, 1034182342, -1136390676, 1025071548) + W(3, -1122957973, -1115150860, -1101122465, 1052715645) + W(4, -1101688288, -1121585121, -1140430696, -1100102424) + W(5, 1051208664, 1053221217, -1097231798, 1026360261) + W(6, 1018079658, -1099378776, 1045734276, 1041738487) + W(7, -1114764537, -1127628368, -1116871589, -1102397253) + W(8, 1040938184, 1038184662, -1107259340, -1121765359) + W(9, 1021042950, -1121061927, 1045437908, -1114085178) + W(10, 1034631967, 1018430000, -1130305609, -1097083551) + W(11, 1049869175, 1019939330, -1105843464, -1122412137); sum2 = W(0, -1125184611, -1124729770, 1045115043, 1058112728) + W(1, 1023628890, 1007171579, 985313435, -1132095255) + W(2, -1117207517, 1067077185, -1158219675, -1118062236) + W(3, -1181860650, 1028760415, 1036693207, -1107623537) + W(4, 1043689375, 1000645119, -1124740239, 1011065519) + W(5, -1088457353, -1080660794, 1020427720, -1160295467) + W(6, -1132645547, 1033849803, -1096437481, -1109418981) + W(7, -1110185484, 1015083304, 999484231, 1015359010) + W(8, 1048821220, -1117188353, 1032363474, 1023561702) + W(9, -1125466771, -1140486285, -1107942084, 1027196953) + W(10, -1121893121, -1134577643, 1024683619, -1125848669) + W(11, 1027562883, -1127975224, 1023023798, -1158549787); WS(-1085592318, -1113086899); - sum1 = W(0, -1127342656, -1105245475, -1104578161, 1053241687) + W(1, -1103827813, -1138537863, 1024334944, 1029597578) + W(2, -1117644382, 1041591201, 1020278010, 1015573096) + W(3, -1107952998, -1104926592, -1101901257, 1051692021) + W(4, -1097952575, -1110174988, 1034498345, -1110412429) + W(5, 1053247055, 1044816720, -1110749814, 1033881662) + W(6, -1131633306, -1103724998, 1051300307, 1042909421) + W(7, -1106968399, -1116328992, -1110168462, -1098176238) + W(8, 1051451939, -1124765258, -1105195378, -1113141267) + W(9, 1033648125, -1115323711, 1049307889, -1143558503) + W(10, 1027981559, 1028312016, -1119681052, -1098367950) + W(11, 1053283054, -1113485404, -1102600950, -1113790704); sum2 = W(0, 1019423143, -1118706158, -1110500850, 982325064) + W(1, -1117883328, -1145733922, -1134200265, -1122995556) + W(2, 1021681136, -1106847870, -1115095323, 1001371122) + W(3, 1022335641, -1113839515, -1121978030, 1042414810) + W(4, -1112736275, 1019850892, -1126211289, -1123774588) + W(5, 1048787768, 1052221246, -1118959328, -1113736050) + W(6, -1160177640, 1002859666, 1035034344, 1009324191) + W(7, 1026260852, 1023767274, 995157684, -1114460336) + W(8, 1025695056, -1129766425, -1114723897, -1130130145) + W(9, -1138832033, -1131104601, -1126984825, -1136621129) + W(10, 1016807320, -1131688905, -1130236057, 1027754115) + W(11, 1027074464, -1120353368, -1121253912, 992217060); WS(1025516512, -1100199588); - sum1 = W(0, 1010096560, 1021891087, 1041308560, -1106994989) + W(1, -1107391304, 1032706511, 998291066, -1128181387) + W(2, 1015332531, -1097993736, 1040752366, -1115681272) + W(3, 1015136529, -1152435393, 1054580194, -1087955312) + W(4, 1021987303, 1013936722, 1016981531, 1035336779) + W(5, 1043985948, 1049373383, 1016493518, -1153713033) + W(6, -1134306338, 1040424469, -1088067164, 1048585956) + W(7, -1111549299, -1122756060, 1015217585, 1041134374) + W(8, -1097437107, 1044933000, 1015283332, 1028860515) + W(9, 1025573847, -1147235028, -1108043523, 1037061588) + W(10, -1125444197, -1124931799, 1027586712, -1105873140) + W(11, 1024655979, 1041451309, 1025261651, 1010373746); sum2 = W(0, 1024904631, -1115115972, -1128899549, -1100416868) + W(1, 1014205090, -1138213458, -1136864762, 1023564875) + W(2, -1103710322, -1105139728, -1112771593, 1024304323) + W(3, 1005033941, -1114446231, -1140124434, -1107001478) + W(4, -1105823902, 1028369943, 1030076655, -1115188268) + W(5, 1057566767, 1061216146, -1101449249, 1031401203) + W(6, 1022202973, -1099782234, 1051976820, -1114917196) + W(7, -1115621063, -1127316949, 995105737, 1025338591) + W(8, 1009750634, -1107279078, -1123827499, 1002907445) + W(9, 1009257370, -1107689693, 1040613751, -1123762171) + W(10, -1124460977, 1021828037, -1113660547, 1049272645) + W(11, -1105716747, -1112133417, 1019919657, -1121285688); WS(1050734204, -1108852232); - sum1 = W(0, -1111669430, -1116292712, -1122281377, 1027495958) + W(1, -1113568120, -1123139966, 1026854009, -1106093993) + W(2, -1103616254, -1113635568, -1125634353, -1117637392) + W(3, -1115457556, -1104273631, 1027625969, 1042550097) + W(4, -1100906821, -1147736996, 1022285659, -1090233269) + W(5, 1069754213, 1065691213, -1090598793, 999398084) + W(6, -1130768721, -1103820941, -1114752252, 1054098174) + W(7, -1103145534, -1140005358, -1124925681, -1107176543) + W(8, -1130883179, 1026991873, -1107767585, -1123096067) + W(9, 988348114, -1114665965, -1144129324, 1018693905) + W(10, 1007587914, -1120005196, -1117352760, -1103610460) + W(11, 1032882469, 1034282506, -1112131865, -1122062653); sum2 = W(0, 1024214881, -1105712489, -1096328526, 1036545949) + W(1, -1114697429, 1029115040, -1122830075, 1026001227) + W(2, -1089670730, -1104892025, 1027384734, -1127506094) + W(3, 1024710111, -1105988647, -1109313571, 1042923133) + W(4, -1108836817, 1018384918, -1118490155, 1044311614) + W(5, 1057951288, 1050757116, 1046225965, -1118982995) + W(6, -1134765475, -1128591966, 1031038781, 1041007149) + W(7, -1111832237, 1000957030, 1007204875, 1016894538) + W(8, -1135710147, -1111844715, 1020988490, 1018633070) + W(9, 955722144, -1132281374, -1121635711, 982097434) + W(10, -1141601766, -1131312630, -1132418382, -1121103003) + W(11, 1040031313, -1131469166, -1137288635, 1012074251); WS(-1077332287, -1089760701); - sum1 = W(0, 998476811, -1126986618, 1035645275, 1035996661) + W(1, -1111559939, 1002151576, -1146931846, -1106320367) + W(2, 1033143495, 1030948981, -1116749294, -1132098378) + W(3, 1020090684, -1103512376, 1057100532, -1098691810) + W(4, -1114602228, 1020764052, -1115596769, -1106044643) + W(5, 1033806067, 1054020686, -1102410839, -1131285468) + W(6, 1018970620, 1033789092, -1089559024, 1053006209) + W(7, -1114317262, -1143682184, -1117338894, -1121544222) + W(8, -1119052427, 1053247323, -1148875196, -1140686688) + W(9, -1131301080, 1025908912, -1109205213, -1117960094) + W(10, 1040862618, -1112291776, -1134004534, -1120853460) + W(11, 1021655744, -1112997269, 1024305160, -1137942600); sum2 = W(0, 1006079429, 1015506585, -1129173219, -1126961905) + W(1, 1025558752, 1004493969, -1142220161, 1012377004) + W(2, 1019086641, 1027847692, -1121396179, -1129089615) + W(3, -1129651941, 1027444401, 1029291472, -1099113060) + W(4, 1031608500, 1032469225, -1120056293, 1027255740) + W(5, -1087748986, -1071778912, -1132223695, -1117547929) + W(6, 1021210606, -1154390002, 1030702430, 1077548482) + W(7, 1041292060, 1017793536, -1138049810, -1121552483) + W(8, 1046290929, 1017608723, 1030045463, -1115202758) + W(9, 1020679942, 988683875, -1149010889, 1009874569) + W(10, -1116313503, 1024909384, -1144760977, -1149209645) + W(11, 958643736, -1165018915, -1108302337, -1153372770); WS(1054407548, 983325672); - sum1 = W(0, 1028412425, -1105600625, -1148390382, 1033571545) + W(1, 1032293767, -1124427701, -1115456470, 1033490817) + W(2, 1044756246, -1114006910, -1119790070, 1037509878) + W(3, 1017604747, -1115519181, -1103922376, 1059608097) + W(4, -1107101148, 1007976775, 1031921957, 1031550611) + W(5, -1090010995, -1087252051, 1044072953, 1022727275) + W(6, -1121692358, -1103698804, 1062123705, -1094628214) + W(7, 1027680451, 1033962892, 1041128181, -1109550327) + W(8, -1103937189, 1042983319, 1021059262, -1108328080) + W(9, -1131299175, 1030793272, 1039103265, -1106569209) + W(10, 1007158719, 1032055396, 1034097067, 1014014769) + W(11, -1135652511, -1116995186, -1141553278, -1143059662); sum2 = W(0, 1007145536, 1011835040, -1115291423, 1029973058) + W(1, 1036345379, -1106716830, -1108956115, 1040320325) + W(2, -1145921569, -1113530029, -1116109872, 1031627486) + W(3, -1123468231, 1009493536, -1120694127, 1025444390) + W(4, -1106839609, -1106619549, 1003522017, 1036429861) + W(5, 1057547857, 1039825371, 1028735446, 1032586179) + W(6, -1106382527, 1045134298, -1120933925, -1093621604) + W(7, 1043632212, -1135351552, 1023566518, -1132203156) + W(8, -1105348488, 1030401418, 1032927131, -1107014793) + W(9, -1116797301, 1044032552, -1105744806, -1111109931) + W(10, 1031187850, 1030984886, 1016028592, -1103689691) + W(11, 1019566780, 1039637835, -1119043970, -1117261713); WS(1062610366, -1081620328); - sum1 = W(0, 1034909184, 1041975919, 1038564634, -1086061975) + W(1, 1054071881, -1118145809, 1022225638, 1036878493) + W(2, 1032326503, -1097049310, 1038246806, 1033815666) + W(3, 1022593144, 1038275488, 1032775158, -1094128798) + W(4, 1049378869, 984752392, 1034659649, 1049110721) + W(5, -1104434937, -1097332230, 1048167927, 1040915883) + W(6, -1145063906, 1039909252, -1098154666, -1096830529) + W(7, 1037005773, -1127387550, 1029979468, 1050285945) + W(8, -1091716385, 1031802154, -1122539766, 1042582214) + W(9, 1024460554, 1032760119, -1098908862, -1125106744) + W(10, 1023442918, 1025327999, -1129493655, 1052309552) + W(11, -1093908076, 1033460244, -1111713187, 1038395208); sum2 = W(0, 1025388154, -1106689977, -1110560421, 1049316874) + W(1, 983067209, -1121273022, 1001261778, 1031762430) + W(2, -1111104301, -1112011481, 1033280635, 1026153330) + W(3, 1007664153, -1105078255, -1122922762, 1044332351) + W(4, -1133572905, -1116643818, 1018216589, -1117638934) + W(5, 1029938402, -1117360942, 1022258405, 1033323756) + W(6, 1011431705, -1119019386, -1107704269, -1116357646) + W(7, 1016143957, -1123412994, -1124915037, 1039919645) + W(8, 1043034893, -1107523849, -1109485745, 1027380094) + W(9, 1007167865, -1115769810, 1041034358, -1112781805) + W(10, 1010088409, 1016238045, -1118462618, 1041969311) + W(11, 1050092429, -1104673921, -1106654827, 1006440178); WS(-1079099231, 1058224693); - sum1 = W(0, -1136025729, -1098612147, 1040574357, 1044962866) + W(1, -1107948018, -1152707357, 1041807413, 1030021338) + W(2, -1113591959, 1041390773, 1030293261, 1019631368) + W(3, -1113852830, -1101745716, 1035906153, 1038641429) + W(4, -1102747762, -1119110697, 1038775953, -1114342965) + W(5, -1100940817, -1112997607, -1131180663, 1008365619) + W(6, -1130024634, -1109189268, -1110548904, 1053521164) + W(7, -1102886695, -1120409456, -1139925939, -1116898669) + W(8, -1107090858, 1048687519, -1127712365, -1156710253) + W(9, 1032899333, -1169623989, 983487291, 1048553583) + W(10, 1020276366, -1137865855, -1135488689, -1105924753) + W(11, -1105279552, 1049805017, 1033907987, -1120648241); sum2 = W(0, 1026221982, 1031288593, 1025270629, -1105433524) + W(1, 1025604422, 1025361731, -1150712731, -1121646784) + W(2, 1040465550, -1099611845, 1015298833, -1118571342) + W(3, -1135858910, 1020650492, -1102699692, -1077885918) + W(4, 1013887757, 1035533544, 998750602, -1118866549) + W(5, -1090773917, -1073585027, -1111404471, -1115122758) + W(6, 1026066546, -1126679589, -1103726643, -1083499628) + W(7, 1029004921, 1025173545, -1122173835, 1033676882) + W(8, 1038653616, 1072488285, -1128843744, -1132364945) + W(9, 1019885572, -1119823506, 1044175124, 1076879885) + W(10, 1042609155, 982102231, -1114797832, 1024418530) + W(11, 1035909226, 1053754278, -1128976380, -1120827581); WS(1039418864, -1140458522); - sum1 = W(0, 983096624, -1114535995, 1044450572, 1027001827) + W(1, -1117675070, 1035770150, 1029517505, -1145085849) + W(2, -1097839409, 1032347927, 1031259878, -1104931415) + W(3, -1121873782, -1102746926, 1062380387, -1106235793) + W(4, -1105839880, 1033087169, 1019803894, 1028523779) + W(5, -1088253674, -1090653261, 1036696746, -1121428646) + W(6, 1027911414, -1100878768, -1109645966, 1061486174) + W(7, -1097301074, -1120235273, -1108005699, 1038991951) + W(8, -1154916402, 1029669390, 1024178544, 1035324408) + W(9, 1033576274, -1142936385, -1107146685, 1047810758) + W(10, 1038477872, -1108040973, -1135118358, -1125909817) + W(11, -1122115416, 1038152785, 1041849616, 1017003668); sum2 = W(0, -1121567066, 1033267920, -1104421963, -1130139832) + W(1, -1132243276, 1024120715, 1034344084, 1026284945) + W(2, -1103410132, 1037507308, 1034735332, -1109350039) + W(3, -1105011035, 1033899226, 1022298858, -1102818563) + W(4, 1044067085, -1127155070, -1142865888, -1120108491) + W(5, 1057117238, 1003910328, 1041074904, 975508032) + W(6, -1136511728, -1122542627, 1038703002, 1043197066) + W(7, -1101099519, -1117378085, -1106811819, -1120621959) + W(8, 1033583066, -1124006960, -1109459597, 1001772648) + W(9, 1032101677, -1134482876, -1122973141, -1124335993) + W(10, 1041237660, -1113899392, -1113394506, -1114137407) + W(11, 1030826553, -1125132432, -1128301632, -1126301549); WS(1053462780, -1083681865); - sum1 = W(0, 1023511963, 1041747598, 1041497612, -1138339988) + W(1, 1035615616, 1020629658, -1126058411, 1015154794) + W(2, 1018224530, 996055791, 1027711272, -1123213919) + W(3, 1027394040, 1045749945, 1041814811, 1014351349) + W(4, 1040756601, 1032965474, -1118697149, 1048726745) + W(5, -1086401302, -1082085657, 1048515830, -1129047376) + W(6, 993307519, 1043007793, -1115623160, -1096882594) + W(7, 1042139693, -1132054806, 1025756296, 1038169844) + W(8, 1032344226, -1105145572, 1038148425, 1033289129) + W(9, -1131368524, 1024626666, -1127070442, -1110953740) + W(10, 1009997960, -1128695798, 1029589575, 1040782360) + W(11, -1131398580, -1110890258, 1029718722, 1029252236); sum2 = W(0, 1008350928, -1118714967, 1030231193, -1091889801) + W(1, -1108897465, 1032606711, 1011413600, 1024023545) + W(2, -1124778398, -1098641764, 1033453825, -1125648711) + W(3, -1120704007, -1126562655, -1110351493, -1104262464) + W(4, -1106889474, 1025836907, 1027382950, 1034951721) + W(5, 1052754126, 1057762368, 1035809187, -1131934955) + W(6, -1127381517, -1128407708, 1033272406, -1161878816) + W(7, -1132306565, -1134699254, 981904616, -1126139913) + W(8, -1151802072, 1012148800, -1153180738, -1131180247) + W(9, 1015535284, 999360844, 1013432992, 1024540201) + W(10, 1014625240, -1171757551, -1135583391, -1138986423) + W(11, -1123376913, 1031971990, -1118108772, -1140791326); WS(-1099299320, 1056598066); - sum1 = W(0, 1006197652, -1127547996, -1106221946, 1045972807) + W(1, 1004824505, -1119658116, -1128153660, 1041714651) + W(2, -1098261544, 1038366195, -1113181968, 1030219574) + W(3, -1136164796, 1029712499, -1085896508, 1057638589) + W(4, -1113770157, -1129447670, 1035903322, -1128532088) + W(5, 1057218165, 1054413180, 1038963911, 1033715440) + W(6, -1130396894, -1106761055, 1054525467, -1086573266) + W(7, 1033084478, -1128924420, 1036246880, -1121593759) + W(8, 1028130044, -1090476168, 1042818602, -1116035017) + W(9, -1124832314, 1017604226, 1036683414, -1101308500) + W(10, -1130963634, 1030841394, 1029570879, 1017990507) + W(11, 1016287010, -1110319406, -1114724733, 998207538); sum2 = W(0, -1131923124, 1046772351, -1110971235, -1105011941) + W(1, 1028001687, -1121147281, 993280665, 1028557303) + W(2, -1134148898, -1104446514, -1147560296, 1024426003) + W(3, -1127811288, -1107794670, 1043137579, -1096504482) + W(4, -1101129935, 1034898623, 1032538133, -1098564467) + W(5, 1067961229, 1066594258, -1104613803, -1136832302) + W(6, 992174233, -1121892222, -1095277951, -1107079502) + W(7, -1097683180, 1036381319, 1002762140, 1035775413) + W(8, -1099141736, -1090299346, -1129031039, 1009264906) + W(9, 1015088121, -1158807761, 1011333246, -1132433157) + W(10, -1108888578, 1032287279, 992586073, 1019898989) + W(11, -1118039147, -1109952821, 1040568125, -1111357043); WS(-1105186296, -1108555742); - sum1 = W(0, 1040483623, -1105046378, -1118442722, 1040666374) + W(1, 986662468, -1118942975, -1101386974, 1038429012) + W(2, 1042743759, -1109750289, -1103538071, 1040611956) + W(3, 1040816739, 1031828819, -1093958439, 1050019912) + W(4, 1033262923, -1118385334, -1134311356, -1107675004) + W(5, 1030110546, 1040526743, -1121451017, 1012951144) + W(6, -1108915605, 1034430779, 1044698927, -1097473172) + W(7, 1032745156, 1035127903, 1046011615, -1109709717) + W(8, -1103097566, 1045974257, 1041021277, -1100748433) + W(9, -1102638726, 1030173660, 1041159370, -1110364069) + W(10, -1106175813, 1037154391, 1034864637, -1130419297) + W(11, -1111922822, 1017242540, 1032931450, -1121991998); sum2 = W(0, -1127162070, 1014979733, -1114518101, 1016203776) + W(1, 1035296562, -1117887634, -1135271108, -1136185376) + W(2, 1038675289, -1140478504, -1111853852, 1030827904) + W(3, 1003505825, 1028363168, -1105093650, -1089585970) + W(4, 1058969759, -1123277259, 1012864633, -1131143608) + W(5, -1095849351, -1070239185, 1079096535, -1116960146) + W(6, -1123384038, 1033256022, 1028346583, -1078361549) + W(7, 1068193425, -1125445846, 1026898060, -1133517476) + W(8, -1113528611, 1038794260, 1032636395, -1113311282) + W(9, -1134649836, -1120532892, 1028725832, -1125525718) + W(10, 1017911666, 991223090, -1125848258, 1028008335) + W(11, -1117113572, 1005296645, 1032298564, -1134431064); WS(1065442623, 1015025160); - sum1 = W(0, -1115365041, -1113215535, 1018550702, 983382403) + W(1, -1103693946, -1124004468, 1040496438, 1031517084) + W(2, 1044423084, 1017412396, 1032083208, 1040563090) + W(3, -1114325264, -1109245393, -1131936399, -1109909072) + W(4, -1106995843, -1120263275, 1036061554, 1002812769) + W(5, 1034975748, -1099246196, 1024898238, 1012000060) + W(6, 1038370800, 1036804053, 1048106991, -1114225707) + W(7, 1044531927, 1036150809, -1112496600, -1099928488) + W(8, -1107673238, -1129165678, -1096349707, -1113013165) + W(9, 1039123015, 1036473513, 1041344487, 1011581692) + W(10, 1043676418, 1037117105, -1120668655, -1116761110) + W(11, 1019812354, -1138585900, -1110354101, -1128538214); sum2 = W(0, 1010698941, 1023126843, -1106210958, -1122624743) + W(1, -1122034237, -1117968485, 1042699596, -1140656688) + W(2, 1029081919, 1040813712, -1174961495, 1039098482) + W(3, 1048659418, -1095653758, -1118272649, -1097694847) + W(4, 1033474214, -1118912655, 1058578114, -1081184698) + W(5, 1060468587, 996865603, -1102355565, 1020119257) + W(6, 1052277576, -1097586600, 1043044817, -1103459725) + W(7, 1049205466, 1031051049, 1019470633, -1098953045) + W(8, -1108654451, -1128239326, -1112376452, -1108144089) + W(9, 1041036392, 1018118885, 1029721267, 1034766857) + W(10, 1033989555, 1038110136, -1119471849, -1122826054) + W(11, -1130901998, 1000734433, 1017824567, -1114988670); WS(-1115864032, 1039792746); - sum1 = W(0, -1140468214, -1121375417, 1034497081, 1039608980) + W(1, -1113171393, -1132620605, 996096400, -1108218204) + W(2, 1033625894, 1040179481, -1108192053, 1018030914) + W(3, 1010495721, -1109130991, 1055392502, -1093805194) + W(4, -1127063626, 1012649753, -1114956320, -1115276388) + W(5, 1036829073, 1050653566, -1109643980, -1127455572) + W(6, 1013460969, 1040789857, -1088431348, 1049263376) + W(7, -1124542831, -1135717173, -1122072970, -1114444905) + W(8, 1030275655, 1051648994, -1129660583, -1147064482) + W(9, -1127833232, 1026520268, -1111886108, -1121903450) + W(10, 1034831154, -1114505023, -1140762151, -1127271361) + W(11, 1031218972, -1112576115, 1019975176, -1135014713); sum2 = W(0, 1015385693, -1115435752, 1035175215, 1017441239) + W(1, -1112773411, 998363696, 1020691269, -1141202058) + W(2, -1117300235, -1122675471, 1031000417, 1016207247) + W(3, 1015565489, -1111436746, 1033401267, 1055273933) + W(4, -1101083444, -1113092041, 1019337403, 1040267853) + W(5, -1100581516, 1073306798, 1046685605, 1013448250) + W(6, -1132207064, -1114623303, 1041259906, -1073173797) + W(7, -1115311051, -1130370856, -1131086480, 1025609602) + W(8, -1101351817, 1032997632, -1113329498, 1026870685) + W(9, -1163542113, -1140470334, 1020278145, -1122237085) + W(10, 1031244051, -1124843109, 1010427910, -1128429973) + W(11, 1027629029, -1119509251, 1033297372, 1014260054); WS(1060418110, -1122066101); - sum1 = W(0, 1031696780, -1115430516, -1104465800, 1041576533) + W(1, -1122042801, 1028261758, -1124029560, -1129767372) + W(2, -1124342772, 1035204176, -1109435772, 1025573168) + W(3, -1147879495, 1033383950, -1084846132, 1057835449) + W(4, -1106791333, 1024098613, -1123330286, 1032201307) + W(5, 1040589380, -1121351835, 1025265863, 1000233671) + W(6, -1137787811, -1124043956, 1059143665, -1085338500) + W(7, 1037079395, 1020968696, 1025904121, -1135655378) + W(8, 1047669789, 1026196737, 1031201378, -1122702819) + W(9, -1122010711, 1016436406, 1032251443, -1113520196) + W(10, -1120875027, 1016849816, -1131825009, 1011740349) + W(11, 1017074582, 1023559701, 1024540211, -1123483709); sum2 = W(0, -1127836624, 1033364881, -1101679270, 1023580345) + W(1, -1149619356, -1136662275, -1173496113, -1127629564) + W(2, -1105780675, -1119640853, 1033890881, -1122666983) + W(3, 1027284157, -1104916712, -1113070309, -1132739235) + W(4, -1113766541, 1023707939, 1022886010, -1097350377) + W(5, 1061873799, 1062616603, -1112358042, 1028952789) + W(6, 1036751625, -1105423640, -1096904559, 1009507187) + W(7, -1119315178, -1117384012, -1124242822, 1034826035) + W(8, -1105550966, -1098447862, 1045500652, -1137245711) + W(9, 1017945890, -1122986652, -1127473876, -1116534057) + W(10, 1015952658, -1136860123, -1140181115, 1033442485) + W(11, -1108824717, -1115327874, 1033458469, 1001899534); WS(1052277756, 1024619064); - sum1 = W(0, 1029425189, -1120340480, 1055903799, -1093981294) + W(1, -1125235986, 1026200342, -1155801216, -1107014752) + W(2, 1049346130, -1099678446, 1003141514, -1131584400) + W(3, 1030252033, 1038469578, 1057427995, -1098793998) + W(4, 1025012844, 1031806855, -1112675811, 1032327591) + W(5, -1103272826, -1090697510, -1118748640, -1126277858) + W(6, 1025211073, 1034534022, 1040656272, -1103403378) + W(7, 1025335343, 1010568017, -1122395345, 1039704040) + W(8, -1111436999, 1041606598, -1106833215, 1032770798) + W(9, 994289396, -1137717581, 1016775164, 1005246730) + W(10, 1029370155, -1124454216, -1124711713, 1045346698) + W(11, -1134653325, 1013960373, -1099652851, 1034190187); sum2 = W(0, 1023289146, 1045862572, -1095566322, -1087461476) + W(1, 1016132472, 1044890152, 1020437582, 1028786959) + W(2, -1132354231, -1101737384, -1127607289, -1124440021) + W(3, 1009748873, -1120652794, -1090793815, -1083542229) + W(4, -1107157844, 1037463572, 1030199907, 1015917080) + W(5, 1063879108, 1069217208, -1097091848, 1032502373) + W(6, 1007345789, -1117660774, -1107315270, 1056862945) + W(7, -1098187278, 1028169917, 1010063617, -1106003069) + W(8, 1045942315, -1153692567, -1116485608, -1108513707) + W(9, 1018930146, 1039304637, -1106431667, -1111757292) + W(10, 1036040005, 1034190881, -1127799210, -1098471935) + W(11, 1048572904, 1056217861, -1094693884, -1108016950); WS(-1086218302, -1084258561); - sum1 = W(0, 1027347742, 1041008695, 1040638149, -1088163117) + W(1, 1045697689, -1131589088, 1021408948, 1036310380) + W(2, 1041666443, -1103354245, 1033383494, 1033326017) + W(3, -1127633782, 1035524367, 1033011818, -1094269524) + W(4, 1040895122, -1132088142, 1039366096, 1048593117) + W(5, 1050103608, -1122175629, 1042904478, 1042877939) + W(6, -1116749595, -1138037400, -1111907839, -1096387988) + W(7, -1136050786, -1131538396, 1033396118, 1045683171) + W(8, -1097626581, -1103712029, 1018682217, 1039834587) + W(9, -1139872292, 1032120647, -1099416246, -1111740158) + W(10, -1126758410, 1030901630, 1025066091, 1046369740) + W(11, -1095221944, -1117250695, -1121047845, 1034112696); sum2 = W(0, -1116225927, 1020583173, -1120289762, -1116630862) + W(1, 1037488437, -1120774706, 1030773056, 1018516207) + W(2, -1142227828, -1157244503, -1120616119, 1021325617) + W(3, -1122365237, -1121497633, 1024002380, 1045806125) + W(4, -1122845130, -1124264052, 1032148531, 1032012136) + W(5, 1050603076, 1061490298, 1029135782, 1020114521) + W(6, -1127594672, -1111134479, 1040110681, -1104925089) + W(7, -1122119114, -1129316292, 1026422036, -1112679629) + W(8, -1109379336, -1096282325, 1035818151, -1129796491) + W(9, 1023835916, -1123177144, -1102178993, -1103628682) + W(10, -1118358279, 1024324894, -1131330830, -1112996078) + W(11, -1102999759, -1116593585, 1024073644, -1129630471); WS(-1092406524, -1089571522); return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); } -float GetLuma(float3 color) { - return dot(float3(0.299f, 0.587f, 0.114f), color); -} +shared float inp[507]; -groupshared float inp[507]; +#define CURRENT_PASS 1 -void Pass1(uint2 blockStart, uint3 threadId) { - const float2 inputPt = GetInputPt(); +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { temp[pos] = (value); } - const uint2 group_base = uint2(blockStart.x, blockStart.y >> 1); - for (int id = threadId.x * MP_NUM_THREADS_Y + threadId.y; id < 507; id += MP_NUM_THREADS_X * MP_NUM_THREADS_Y) { - uint x = (uint)id / 13, y = (uint)id % 13; - inp[id] = GetLuma(INPUT.SampleLevel(sam, inputPt * float2(group_base.x + x - 3 + 0.5, group_base.y + y - 2 + 0.5), 0).rgb); - } +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); - GroupMemoryBarrierWithGroupSync(); +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt - float4 ret = 0.0; - float4 ret0 = 0.0; - float4 samples[12]; - const uint local_pos = threadId.x * 13 + threadId.y; +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 13 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 507; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 13, y = (uint)id % 13; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (3)) + 0.5, float(group_base.y + y - (2)) + 0.5)).x; + } + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[12]; samples[0][0] = inp[local_pos + 0]; samples[0][1] = inp[local_pos + 1]; samples[0][2] = inp[local_pos + 2]; @@ -192,136 +1515,1433 @@ void Pass1(uint2 blockStart, uint3 threadId) { samples[11][1] = inp[local_pos + 94]; samples[11][2] = inp[local_pos + 95]; samples[11][3] = inp[local_pos + 96]; - - const uint2 destPos = blockStart + uint2(threadId.x, threadId.y * 2); - tex1[destPos] = samples[5][0]; - tex1[destPos + uint2(0, 1)] = nnedi3(samples); + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 41]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2) + ivec2(0, 1), ret); } - - //!PASS 2 -//!DESC double_x -//!IN tex1, INPUT -//!BLOCK_SIZE 64,8 -//!NUM_THREADS 32,8 - -float nnedi3(float4 samples[12]) { +//!DESC NNEDI3 (double_x, nns64, win8x6) +//!IN INPUT, temp +//!OUT OUTPUT +//!BLOCK_SIZE 64, 8 +//!NUM_THREADS 32, 8 +#pragma optionNV(inline none) +float nnedi3(vec4 samples[12]) { float sum = 0.0, sumsq = 0.0; - [unroll] - for (int i = 0; i < 12; i++) { - sum += dot(samples[i], 1.0f); + [unroll] for (int i = 0; i < 12; i++) { + sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0)); sumsq += dot(samples[i], samples[i]); } - - float mstd0 = sum / 48; - float mstd1 = sumsq / 48 - mstd0 * mstd0; - // 不能使用 lerp,否则结果可能为 nan - float mstd2 = mstd1 >= 1.192092896e-7 ? rsqrt(mstd1) : 0.0; + float mstd0 = sum / 48.0; + float mstd1 = sumsq / 48.0 - mstd0 * mstd0; + float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7); mstd1 *= mstd2; - float vsum = 0.0, wsum = 0.0, sum1, sum2; +#define T(x) intBitsToFloat(x) +#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3))) +#define WS(w0, w1) \ + sum1 = exp(sum1 * mstd2 + T(w0)); \ + sum2 = sum2 * mstd2 + T(w1); \ + wsum += sum1; \ + vsum += sum1 * (sum2 / (1.0 + abs(sum2))); + sum1 = + W(0, -1120714617, 1007614003, -1123305414, 1009908268) + W(1, -1129324172, -1121901526, 1022915304, -1142748844) + + W(2, 1035162146, -1123901891, 1027364554, 1030225939) + + W(3, -1116882705, 1030599214, -1135846624, -1113117446) + + W(4, -1110752415, 1046176310, -1114390527, -1110762423) + + W(5, 1032526991, -1096831458, -1117077507, -1127767760) + + W(6, -1121818163, -1105876799, -1096860741, 1046191054) + W(7, 1043289735, 1052908756, 1034530771, 1039462233) + + W(8, 1035961863, 1034124059, 1022953826, 1023450788) + W(9, -1114841418, -1114991488, -1116257712, 1025047056) + + W(10, -1120878363, -1120533037, -1155422496, -1124524780) + + W(11, -1129613106, 1013731840, 981289536, -1143328976); + sum2 = + W(0, -1131108965, 1000650931, 1025688791, -1116852534) + W(1, -1145056307, -1123043594, 1018436747, -1139914565) + + W(2, 1043465638, -1113612493, -1106748353, 1029599173) + + W(3, 1045881517, -1118230687, 1029259867, -1107504809) + + W(4, 988544780, -1124179817, -1070139293, -1102659167) + W(5, 1078104617, 1048052425, -1126127905, 1025133515) + + W(6, -1145266179, -1111438835, -1110118136, -1097100047) + + W(7, 1027449515, 1027739737, -1136112041, 1036504803) + W(8, 1018501507, 1003301507, 1026178295, 1031198431) + + W(9, -1123515159, -1134523125, 995063942, -1120319069) + + W(10, 1021344415, -1131299129, -1123696315, 1012522521) + + W(11, 1024975831, 1020199127, -1162666156, -1131042995); + WS(1061710334, -1113637247); + sum1 = W(0, -1121814583, -1138603692, -1138264845, -1140194465) + + W(1, -1131439046, -1125621862, 1023073058, -1129805541) + + W(2, 1036106897, -1122595552, 1018454510, 1033737112) + + W(3, -1122735662, 1024541667, -1122620661, -1116003130) + + W(4, -1109309294, 1048694647, 1030681928, -1115559686) + + W(5, 1044326142, -1097437158, -1132933761, -1128614666) + + W(6, -1143950978, -1103580007, -1097890158, -1120177173) + + W(7, 1042373458, 1052689231, 1034486114, 1037961704) + W(8, 1033251382, 1032154570, 1019050396, 1021617155) + + W(9, -1113884080, -1107957310, -1116304653, 1019245786) + + W(10, -1118153848, -1122508490, 1012933309, -1120902675) + + W(11, -1152998244, 1016843898, -1144273714, -1155701620); + sum2 = + W(0, -1162960213, 1013908203, -1113327157, 1023571595) + W(1, -1133790043, 1024139725, -1130037920, 1007034787) + + W(2, -1107084454, 1023868855, 1032525526, 1048796624) + W(3, -1104878976, 1003078933, -1121062518, 1034207166) + + W(4, -1126626424, 1032930462, 1076098019, -1132699465) + + W(5, -1072747140, -1104764998, 1019199125, -1123497947) + + W(6, 1025411499, 1029914487, 1035900190, -1093496445) + W(7, 1026052839, -1109617402, 1025857863, -1118139493) + + W(8, -1126548940, 997665611, -1121182801, 1038561302) + + W(9, -1146184997, -1142783113, -1147685381, 1017680405) + + W(10, -1127269937, 1018748433, -1158385717, 1000017309) + + W(11, -1123007653, -1154085627, 1000677165, 1016687665); + WS(1064800702, 1030635520); + sum1 = W(0, 1004285524, -1122860750, -1122286551, 1021316024) + + W(1, -1116388951, 1022533838, -1125657162, 1001429301) + + W(2, -1128056492, 1024005652, 1035530413, 1053335517) + + W(3, -1139373960, -1130762670, -1121564617, -1129213371) + + W(4, 1035663048, 1031878036, 984521208, -1100061872) + W(5, 1048911707, 1035194194, -1131931048, -1141986761) + + W(6, -1110660706, -1116831438, 1023270757, -1093549817) + + W(7, -1104621327, -1149876978, -1142066105, 1031904066) + + W(8, -1128734654, -1204558974, -1136886024, 1042579293) + + W(9, -1148999845, -1142259845, -1120920449, -1111776529) + + W(10, 1028175148, -1143857649, 1005008153, 1016449422) + + W(11, -1135015336, 1010510404, 1018152308, 1020350678); + sum2 = + W(0, 1015139874, -1150118671, 1032711786, 1028577839) + W(1, -1116703472, -1123435694, -1140098908, -1140019692) + + W(2, -1120731382, -1116737087, -1087320828, -1069693959) + + W(3, -1103666133, -1138801588, 999422392, 1029210329) + W(4, 1017539382, 1031888972, 1059502015, 1078824499) + + W(5, 1045823576, 1009232228, 1011539428, -1118193829) + W(6, 1020112270, -1112637962, 1026899401, -1094071548) + + W(7, 1036639812, -1114441289, 1023067274, -1131442506) + + W(8, -1126248934, 1010207212, -1119882792, 1040940327) + + W(9, -1131804892, 1029410629, -1130083750, 1007549476) + + W(10, 1013963196, -1138130380, 1019091922, -1123861886) + + W(11, 1004924552, -1130443436, -1137456436, 1026106777); + WS(1058942782, 1023618692); + sum1 = W(0, -1126072821, 1011693912, 1017195423, -1122268692) + W(1, 1022607903, 982545387, -1126061755, 1003763447) + + W(2, 1041771492, 1025548876, 1036961985, 1037536142) + W(3, 1038780013, 1023558139, 1037537425, 1035261291) + + W(4, -1110777188, 1035801072, -1094081945, -1091452759) + + W(5, -1095962376, 1049096396, -1113879430, 955835050) + + W(6, -1110871838, 1043278082, -1103634941, -1105679152) + + W(7, -1103558179, -1130619215, -1136949949, 1026350907) + + W(8, 1036426764, -1146527995, 1046715887, 1040286827) + W(9, 1049101574, 1031851588, 1002869195, 1006549707) + + W(10, -1142526755, 1015953709, -1148709083, -1119884351) + + W(11, -1122534754, 1007174309, 1017517093, 986004587); + sum2 = + W(0, -1128639222, -1150616818, -1118932607, -1125568566) + W(1, 991022546, -1129687950, 1018120650, 1006676397) + + W(2, 1001638233, 1013509397, 1035428655, 1024255335) + W(3, 1031481278, -1111645244, 1028380841, -1115817479) + + W(4, -1119841411, 1029376723, 1019940778, 1063260126) + W(5, 1032753418, 1026999545, -1121385219, -1134958989) + + W(6, 1014385869, 1037911199, -1102461584, -1085129330) + W(7, -1110003004, 1019019978, 1021463302, 1017625850) + + W(8, 986023269, 970306708, -1141011097, -1110526300) + W(9, 1033244979, -1121764803, 1025283273, -1128128990) + + W(10, -1135260845, 1013815957, 959170344, 1017551658) + + W(11, 1016351942, -1127392454, 1009569613, -1138867245); + WS(1037837808, 1056377428); + sum1 = + W(0, -1135573495, 1023727061, -1118803082, -1123500111) + W(1, 1020600341, -1111066541, 1008936161, -1115286797) + + W(2, -1105969090, -1138454045, -1114548551, 1046351686) + W(3, 1036383371, 1034647805, 1026657308, 1033474888) + + W(4, 1027945538, -1116105371, -1126955510, -1122693874) + + W(5, 1053007142, 1035274060, -1156882549, -1122516460) + W(6, 1043223694, 1039709953, 1051232418, -1100833583) + + W(7, -1103740252, -1101618713, 1033814037, -1129756781) + + W(8, -1100584037, -1111027805, -1097887999, 1040497066) + + W(9, 1024768136, -1138012047, -1123286772, 1010716753) + + W(10, 1025656415, 1021082723, 1018764077, -1115071549) + + W(11, 1025234525, -1116333124, 1016891995, -1114238799); + sum2 = W(0, 1020978875, -1121670462, -1103298336, -1110731474) + + W(1, -1105136733, -1119790394, -1127100056, 999102475) + + W(2, -1100967324, -1130583533, -1096736455, 1053186254) + + W(3, 1045958215, 1043729632, 1033816035, 1020355333) + W(4, 1043179705, -1122981488, 1062358565, -1112891412) + + W(5, 1031005462, 1033523405, -1109547546, -1122874410) + + W(6, 1026290891, 1032797386, -1104401130, -1091109944) + + W(7, -1133705481, -1102472853, 1031959215, -1122532081) + + W(8, 1013196277, -1120286641, -1114052290, 1048200412) + + W(9, -1113847816, 1033631266, -1126209517, 1027184816) + + W(10, -1138979633, 966672716, -1152672163, -1122242747) + + W(11, 1023845499, -1118586102, -1147194793, -1127245018); + WS(1050865148, 1032626572); + sum1 = W(0, -1110436993, 1040042556, -1107890608, -1113356418) + + W(1, 1034032544, -1105712244, 999321197, -1110201208) + + W(2, 1043627312, -1101476879, 1049906138, -1109025830) + + W(3, -1120749707, 1051482110, -1103830356, 1048543054) + + W(4, -1118552440, -1108049256, -1107535228, -1162779891) + + W(5, -1095271743, 1035225158, -1104074479, -1106449960) + + W(6, 1045700875, 1033317691, -1115480133, 1049713062) + W(7, 1042738346, -1122810712, 1050825756, 1031488492) + + W(8, -1115060612, 1031191472, 1025393771, -1106727656) + + W(9, -1127443770, 1039553933, -1104755066, 1040395170) + + W(10, 1040430485, -1096410465, 1042925123, -1114330616) + + W(11, -1106585845, 1042770518, -1103219825, 1009713998); + sum2 = W(0, 1024944996, 1020193961, 1014966838, 1015828472) + W(1, -1113612636, 1032934706, -1116105943, 1021395599) + + W(2, -1111924540, 1007347355, -1116539066, 1012341552) + + W(3, 1034050704, -1113193567, 1042584092, -1116104243) + + W(4, -1107012648, 1049541488, -1107447750, -1109770505) + + W(5, -1111197863, 1014122612, 1016467631, -1135977693) + W(6, 1057346437, 1054940943, 1059914271, 1066972918) + + W(7, 1063973493, 1056333532, 1045837843, 1043326701) + + W(8, -1105023593, -1097277462, -1142156696, 1043445868) + + W(9, 1017043379, -1118781870, -1113967259, -1117573943) + + W(10, -1097010076, -1107952444, -1085761230, -1078926120) + + W(11, -1084679416, -1092348708, -1128370702, -1108363561); + WS(1054811644, 1027249161); + sum1 = + W(0, 1040940111, -1100243090, 1043481462, -1119365790) + W(1, -1111878381, 1044780378, -1101638738, 1035929492) + + W(2, -1101358582, 1036221579, 1014943347, -1109150355) + + W(3, 1036687602, -1109759237, 1028424107, -1122643648) + + W(4, -1113576073, 1038054901, -1097482896, -1106696288) + + W(5, 1022736646, -1097940765, 1038478827, -1103674991) + W(6, 1049141881, -1105999252, 1051960356, 1048275427) + + W(7, -1118345360, 1049145430, -1113101949, 1037909315) + + W(8, -1132483205, -1104538698, 1036681666, -1123112824) + + W(9, 1019000333, 1044474573, -1103799553, 1037239602) + + W(10, -1116754862, 1037145120, -1118995685, -1126689554) + + W(11, 1038634998, -1101292557, 1034438878, -1126283866); + sum2 = W(0, 1023766557, 1015306072, -1136376403, -1113107482) + + W(1, 1038494252, -1113447013, 1023629411, 1024784188) + W(2, 1016554458, 1020144724, -1116988904, 1050162106) + + W(3, -1104347057, 1003678517, 1033762395, -1109441777) + + W(4, 1021815988, -1103467660, 1034521268, -1102546407) + + W(5, -1103257907, 1025406308, -1113239746, 1026085595) + + W(6, -1137435361, 1016553974, 1060795535, 1075503478) + W(7, 1072821349, -1113972415, 1031283975, 1037416902) + + W(8, -1114856427, 1044555388, -1088370625, -1071164689) + + W(9, -1077590614, -1112563255, 995356642, -1112537357) + + W(10, 1032032312, -1105991725, -1127777881, 1035804901) + + W(11, -1120944929, 1036418439, -1123929939, -1140753514); + WS(1055940220, -1124188157); + sum1 = + W(0, -1113999823, 1015763751, -1115168029, 1022861696) + W(1, -1114461725, -1113117029, 1032164308, -1121800074) + + W(2, -1119569210, 1043278990, -1094609330, 1044340802) + + W(3, 1033154083, -1106205123, 1021089498, -1107756257) + + W(4, 1015781094, -1106256755, 1052603679, -1095339972) + + W(5, -1108564239, 1042723938, -1125646960, -1171638077) + + W(6, -1147977024, 1046188556, 1041916962, -1108128054) + W(7, 1057380776, -1095928258, 1041299799, 1027517543) + + W(8, -1117684995, 1036478965, 1033522483, 1047885622) + W(9, 1036635468, 1035899866, -1139371156, -1131185034) + + W(10, -1108336232, 1006648968, -1101485874, -1110457109) + + W(11, -1114614567, -1108232826, 1034599135, -1115299047); + sum2 = W(0, -1120118804, 1013270421, -1120552641, -1117027367) + + W(1, 1016254148, -1110780393, 1030787932, -1133117880) + + W(2, -1136369034, 1027353114, -1125884733, 1035518448) + + W(3, 1043285424, -1129166063, -1140216380, -1120569751) + + W(4, -1127959037, -1129939889, -1117793827, -1095769137) + + W(5, -1095274849, -1117623897, -1128598589, -1129927717) + + W(6, 1015341014, 1034464346, -1117887441, 1043338219) + W(7, 1049861339, -1128474295, 1032330618, 1018695247) + + W(8, 1033327798, 1041751325, 1049619624, 1049093640) + W(9, 1048627700, 1035712363, 1035384499, 1027229852) + + W(10, -1113602386, -1114857235, -1103868602, -1097589990) + + W(11, -1105817123, -1106207094, 1004274322, -1120123842); + WS(1057163582, 1025817537); + sum1 = W(0, 1037167835, -1121852313, 1034559945, 1027044951) + W(1, 1039868489, 1037598609, 1007581568, 1031626785) + + W(2, 1031655825, 1046829204, 1038865075, 987855837) + W(3, -1105739319, -1112419405, 1015381448, 1037495244) + + W(4, -1096271953, -1093143228, -1085198281, 1050118415) + + W(5, 1057217669, 1047779928, 1035805671, 1015872616) + W(6, 1045084224, 1046279122, 1057074420, 1040167170) + + W(7, -1086525384, -1087515051, -1098926044, -1124461564) + + W(8, 1033520525, -1103697416, -1107583934, 1024989936) + + W(9, 1034776664, 1044254259, -1134624240, -1113253902) + W(10, 1016078798, 1030441081, 1036548879, 974934139) + + W(11, 1034922130, -1113468278, 1036139674, 1017385676); + sum2 = + W(0, -1136384493, 1023814631, 1033659202, 1017810163) + W(1, 1009967381, 1017781727, -1118180802, -1141961139) + + W(2, 1036526786, -1099879539, 1034332004, 1013925981) + + W(3, -1106516740, -1101585218, -1131784625, -1112082470) + + W(4, 1028719631, 1030342243, 1046758826, -1093436173) + W(5, 1051870663, 1042788701, -1114786901, -1113417628) + + W(6, -1123632092, 1043747658, 1050427835, -1100447595) + W(7, 1041231786, 1045006578, 1010389989, -1117114477) + + W(8, -1145098603, -1102737141, -1112406001, -1119158797) + + W(9, 1034292458, -1107586767, -1143106923, 1032164569) + + W(10, -1122280548, -1130733945, 1014543213, -1123106681) + + W(11, 1019870319, 1013053757, -1119739637, 1012643781); + WS(-1081763615, -1092598780); + sum1 = W(0, -1130922677, -1122514966, -1136114845, 1015393433) + + W(1, -1137786397, 1023447063, -1142668459, -1140705115) + + W(2, -1114318275, 1026414532, 1017564438, 1021344041) + + W(3, -1095352826, -1116164091, -1123782681, 1012849463) + + W(4, -1127997567, -1121576984, -1088500870, -1098266094) + + W(5, 1061435536, -1148866211, 1039772482, 1024002537) + W(6, 1028467828, -1127358845, 1061837768, 1035926493) + + W(7, -1095349722, -1114585584, -1114677716, -1116920883) + + W(8, 1023598927, 1017010415, -1116904946, 1037585875) + W(9, 1040151266, 1031742785, 1007413957, -1128808099) + + W(10, -1139280241, 1002425203, -1123987795, -1136160989) + + W(11, -1131570741, -1116123969, 1020007637, -1126849079); + sum2 = + W(0, 1021958137, -1117837187, 1026312941, -1115513274) + W(1, 1037370871, -1112242113, 1033686533, -1107052036) + + W(2, 1034408685, 1036094407, 1033683959, -1124949158) + W(3, -1091894288, 1027011647, -1107943508, 1010459130) + + W(4, -1106091708, -1147105428, -1094093059, 1061646324) + + W(5, 1058519893, -1106569597, 1037199097, 1030945863) + + W(6, -1098486326, -1110159069, -1088187103, 1055280585) + W(7, 1045036050, 1045721104, 1034325435, 1039418143) + + W(8, 1028745769, 1031154711, -1105512708, -1100598283) + + W(9, -1109806227, -1117345257, -1144974476, -1124920152) + + W(10, 1024682325, -1116910593, 1039406737, -1117359799) + + W(11, 1026080301, -1117139296, 991455177, -1112397476); + WS(1049187708, 1061143407); + sum1 = W(0, 1032206028, -1121811767, 1032297330, -1136528167) + + W(1, 1005295654, 1031846890, -1136496587, 1032778794) + W(2, 1046469409, 1033070593, 1049873346, 1050639650) + + W(3, 1041319460, 1041542033, 1036329419, 1043869975) + W(4, 1044620591, 1020031147, -1111788011, -1089881571) + + W(5, -1127219501, 1033106537, 991465499, -1112918515) + + W(6, -1086760535, -1113096094, -1092765627, -1083700943) + + W(7, -1092202425, 1011207027, 1015330121, 1026637361) + W(8, 1049164066, -1132628435, 1051671486, 1044844041) + + W(9, 1037004161, 1026161438, -1134373563, 1019220893) + W(10, 1022165171, 1009114287, 1034535880, 1024964917) + + W(11, 1011091807, 1038185039, 1018308239, 1032129132); + sum2 = + W(0, -1123557888, -1139662759, -1120649660, 1021700488) + W(1, -1119215196, 1025758640, -1134635263, 1025396502) + + W(2, 1024038368, 1022314468, 1016353740, -1149024430) + + W(3, -1127462832, -1109226016, 1022909500, -1106595065) + W(4, 964997605, 1007232143, -1112234110, 1052797849) + + W(5, 1032636001, -1096670151, -1103246759, -1095307242) + + W(6, -1123190992, 1040909663, 1035500288, 1059925772) + + W(7, -1123381020, -1110751472, -1114698588, 1016687528) + + W(8, -1170950771, -1119781676, -1122589660, 1021322900) + + W(9, -1124742672, 1037872214, -1123038904, 1025386190) + + W(10, 1009594487, 1011035391, -1122952928, 1015425876) + + W(11, 1024335912, -1115520552, 1026100870, -1128653768); + WS(-1080960863, 1058419411); + sum1 = W(0, -1118700722, 1032983255, -1122641758, 1033094082) + + W(1, -1121000958, -1133773881, -1141737059, -1120123854) + + W(2, -1111066847, -1112208894, -1099579692, -1090459763) + + W(3, -1107128402, -1107276892, -1116550064, -1103159313) + + W(4, 984198859, -1099715572, 1049479517, 1062349342) + W(5, -1099059374, 1033076198, -1125040759, 1037355536) + + W(6, 1043965403, -1113907195, 1050672334, 1061713555) + W(7, 1059583022, -1121054998, 1024684126, 1026513898) + + W(8, -1108783427, -1113914551, -1105019770, -1095692932) + + W(9, -1099879371, -1108028092, 1024533736, -1110661700) + + W(10, 1000248987, -1124179019, -1128328213, 1023738862) + + W(11, -1131329315, 1027231576, -1113801521, 1007580489); + sum2 = W(0, 995719700, -1114687459, 1024240180, -1115935924) + W(1, 1025427056, -1123062148, 999440794, -1131128122) + + W(2, -1129327601, 1021214658, 997717028, 978476974) + W(3, -1140454571, 1010080137, 1002330866, -1152746908) + + W(4, -1125739202, 1027238209, -1120915257, -1078687396) + + W(5, -1111104335, -1128218828, -1147995690, -1128526870) + + W(6, 1029045123, 1037994429, 1036441804, 1066236156) + W(7, 1022342240, 1028953671, 1024857232, -1126806088) + + W(8, 1025141617, -1113434302, 1032546636, 1035362808) + + W(9, 1033358736, -1166615662, -1165027863, 1024563904) + + W(10, -1154923092, 1007115993, 1003062850, -1118441375) + + W(11, 1017727844, -1129048983, 1019778022, 1009762473); + WS(-1082880574, -1095080656); + sum1 = W(0, -1120399523, -1123397517, -1135116544, -1168421181) + + W(1, -1131259136, 1037719400, -1113348279, 1030740495) + + W(2, -1111736159, -1128373242, 1025039907, -1127950106) + + W(3, -1098593297, -1106461210, 1024172498, 1028249110) + + W(4, -1111698940, 1019423266, -1092929515, -1098702381) + + W(5, 1060991762, 1018082074, 1036669022, -1114016273) + W(6, 1037879414, 1028697399, 1062200098, 1023355236) + + W(7, -1094756999, -1115312044, -1106391640, -1109491766) + + W(8, 1037066756, -1119567216, -1131199446, 1027882718) + + W(9, 1038218895, 1022591155, -1118950442, -1141634128) + + W(10, -1118524245, 1027581671, -1139610160, -1139561152) + + W(11, 1005358744, -1113962893, 1026868195, -1126548648); + sum2 = + W(0, 1007561151, -1118820752, -1128452386, -1121745985) + + W(1, -1114861795, -1121584151, -1120079456, -1129117272) + + W(2, -1106890729, 1031296198, -1110907715, 1032462388) + W(3, 1035962501, 1035033495, 982365671, 1035951051) + + W(4, -1144059530, -1112391017, 1056169840, -1096446085) + + W(5, 1031556034, 987032615, -1138039959, -1119286197) + W(6, 1010626011, -1114880200, 1052511775, -1094697844) + + W(7, 1056886944, -1129701328, 980377326, -1126702674) + W(8, -1140206187, -1136937433, 1034255225, 1033067226) + + W(9, -1104164393, 1013679559, -1115367679, -1137374445) + + W(10, -1152147163, -1120281594, -1114711449, 993102387) + + W(11, -1120009849, -1122813977, -1118918686, 1006646323); + WS(1058795070, 1058351276); + sum1 = W(0, -1129226172, 1032650784, -1134152362, -1122312016) + + W(1, -1145265749, -1118796187, 1017891569, -1131365684) + + W(2, -1106246658, -1132244111, -1102574120, 1028257624) + + W(3, 1004724909, -1128587973, -1150296521, -1107598873) + + W(4, 1041614193, 1048401911, 1050141831, -1088102699) + + W(5, -1093353638, -1112285036, -1122892680, -1120116589) + + W(6, 1041522846, 1041471472, 1033352747, 1030285885) + W(7, 1056599450, 1049138668, 1043369227, 1040658126) + + W(8, -1106977045, 1031926461, -1121658742, -1124591186) + + W(9, -1107883356, -1114299650, -1130730345, 1034850634) + + W(10, -1140271486, -1130658915, -1115542891, -1119417531) + + W(11, -1155196377, 1003429157, -1125742491, -1137642362); + sum2 = + W(0, 1018264792, 1019878456, 1020074080, -1143212320) + W(1, -1148803168, -1127882992, 1017409120, -1131165168) + + W(2, -1109681111, -1134735936, -1106212594, 1037009054) + W(3, -1132238328, 1032289620, 990246720, 1004132864) + + W(4, 1028734812, -1093714299, 1034660210, 1047288883) + W(5, 1031452084, -1112028563, -1115169564, 1020410832) + + W(6, -1104210606, -1099909667, -1098896203, 1060586916) + + W(7, 1042866381, -1109339206, 1023925320, -1112228144) + W(8, 1025579416, 1032419228, 1010388000, 1030286292) + + W(9, -1107165918, 1019446368, 989303425, 1029468412) + W(10, 1029305888, -1113499692, 1025031188, -1130541520) + + W(11, -1127563664, 1026005920, -1123344268, 1009015280); + WS(1043816952, 1056206353); + sum1 = W(0, -1132948972, 1024900084, -1150680698, 1016358425) + + W(1, -1160444148, -1115326124, 1019123435, -1120343575) + + W(2, -1115461859, -1107172113, -1097009915, -1102438229) + + W(3, 1024199571, -1148961467, -1130723763, -1144998513) + + W(4, 1034684352, 1038250028, 1061195075, 1051576078) + W(5, -1088975285, 1010078034, -1107729186, 1022450003) + + W(6, 1037119642, 1039626815, -1095598055, 1056604231) + W(7, 1057172865, 1029312608, 1039764276, -1112740858) + + W(8, -1107088436, -1110088668, -1113187289, -1102307882) + + W(9, -1103288997, -1122873761, 1032878393, 1034042985) + + W(10, -1144087781, -1116127087, -1117712852, -1114968828) + + W(11, -1134624362, 1012385158, -1114736693, 1002559709); + sum2 = W(0, -1135809827, -1140675011, -1137384851, -1147988326) + + W(1, 1019754904, -1172160176, 1006265798, 990940844) + W(2, 1031002711, 1009667595, -1156373420, 1010789683) + + W(3, -1122328477, -1135703979, 1016962928, -1135289651) + + W(4, -1136033931, 1025543601, -1123691685, -1105219090) + + W(5, -1118075063, -1111692677, -1131889022, 1010975355) + + W(6, -1114169661, -1124911966, 1056449903, 1044469394) + + W(7, 1046754031, 1034039333, 1024901767, -1111731157) + + W(8, -1107967067, -1108557845, -1096033675, -1118550723) + + W(9, 1020360378, 1005574774, 1025982043, 1036125487) + W(10, 1027957130, 1004385158, 1015282250, -1115586301) + + W(11, -1120013003, -1130287690, -1127946498, -1169619760); + WS(1049886076, 1034318367); + sum1 = + W(0, -1140544720, 1006465467, 1025588553, -1118154094) + W(1, 996302474, -1111905052, -1125961261, -1122979160) + + W(2, 1022042959, -1106141033, -1104462159, -1100197533) + + W(3, 1029682164, -1112348667, -1117824763, -1123002959) + W(4, 1021784769, 966352080, 1059646174, 1051735858) + + W(5, -1085821075, 1029991670, -1111853843, 1031029131) + + W(6, -1131075883, 1032360624, -1086897182, 1059041662) + W(7, 1057694799, 1036690656, 1030592201, -1155480906) + + W(8, -1142660485, 1029412697, 1042100949, -1106256562) + + W(9, -1106530518, -1121375714, 1017125029, 1014936522) + + W(10, -1160888244, -1121174096, 1017231209, -1150715274) + + W(11, 1024907260, 1015672965, -1120397516, -1124207481); + sum2 = W(0, -1110846605, -1118194097, -1143898437, 1019764549) + W(1, 1008325379, 999139301, 997400075, 1024689095) + + W(2, 1032320648, 1027617785, 1000132981, -1102116254) + + W(3, 1026769453, -1131321177, 1008054267, -1118482095) + + W(4, 1027180085, -1144235925, 1031570329, 1058710858) + + W(5, 1031557517, -1129394727, -1131248001, -1120094485) + + W(6, 1029603693, 1030911393, -1119293171, 1058397441) + + W(7, -1104852463, -1097515304, -1123234663, -1106708620) + + W(8, 1033577170, -1134544211, -1126616369, -1096477332) + + W(9, -1103611475, -1115701158, -1119810656, -1134741251) + + W(10, -1112673640, -1123614185, -1129974305, 1018853209) + + W(11, 1036639048, 1018401637, 1024391909, 1017993989); + WS(1058429118, 1064863249); + sum1 = + W(0, 1021156518, 1024394983, 1020219616, -1131178666) + W(1, 1016100640, 1007341951, 994206685, 964693930) + + W(2, 983001563, -1115666854, 1039490386, 1046648007) + W(3, 1017911734, 1043757188, -1128564932, 1045299318) + + W(4, 1054264097, 1049468028, 1055450440, -1104196510) + W(5, 992909240, -1104186631, -1122765517, -1105268364) + + W(6, -1097350840, -1099725362, -1115114453, -1085827188) + + W(7, -1100930382, -1128095056, -1118903024, 1023907580) + W(8, -1175841770, 1024539904, 997016493, 1043203980) + + W(9, 1033767351, -1119513801, -1131557236, -1106015350) + + W(10, 964768362, -1149024695, 1012766063, 1022718592) + + W(11, -1136266891, 1035657141, 1016423590, 1029236260); + sum2 = + W(0, 1005391535, 1018096716, -1135288360, -1121648533) + W(1, 996459166, -1130245220, -1123582348, 1033610651) + + W(2, -1106696811, -1121907467, 1026226746, 1031305614) + + W(3, 1032267559, -1111725659, 1028271694, -1108648643) + W(4, 1050575955, 1051313221, 1047952814, -1081952512) + + W(5, 1040750694, 1031579134, 1007524520, 1034169827) + W(6, -1100156814, 1060662591, 1068258385, -1074657656) + + W(7, -1095314974, 1036231973, 1044691450, 1044987108) + W(8, -1098256564, 1029467086, -1142136447, 1051661425) + + W(9, 1035381379, -1107714686, 1022647148, -1100160947) + + W(10, 1028021126, -1129063320, -1131820748, 1008147176) + + W(11, -1168737402, 1020424408, -1129848660, 1024695710); + WS(-1083443454, 998713176); + sum1 = W(0, 1032696047, 1026355790, 1024354795, 1034252472) + W(1, 1012872363, 1032281789, 1027888345, 1016830581) + + W(2, 1040709994, 1040668906, 1036398699, 1051748575) + W(3, 1035129883, 1044965075, 1031400701, 1048807512) + + W(4, 1040929033, 1032459486, 1012490030, -1097753826) + + W(5, -1114537329, -1100635790, -1103916602, -1098186970) + + W(6, -1087559501, -1103862520, -1102928053, -1093112676) + + W(7, -1092883854, -1099081393, -1104850339, -1112352715) + + W(8, 1049786774, 1037090637, 1047072725, 1050721065) + W(9, 1039546228, 1011437457, 1009955155, -1114553850) + + W(10, -1146095614, 1033091241, 1024780023, 1037512365) + + W(11, 1024615038, 1037286498, 1036010231, 1033653631); + sum2 = + W(0, 1035861958, -1161872500, -1113290777, -1137177342) + W(1, 1034613598, -1103597964, 1038756242, -1111249467) + + W(2, -1098289107, -1119785106, 1033623622, 1045692473) + + W(3, -1106425659, -1117971136, 1019694879, -1114046628) + + W(4, 1026686124, 1049978705, -1098402027, 1024040864) + W(5, 1053770527, -1127968019, 1049090628, 1052585151) + + W(6, -1096172460, -1107697817, -1091833397, 1013412750) + + W(7, 1046082211, 1057075430, 1047895771, -1102204197) + W(8, -1113513064, 1027596044, -1098063812, 1031079096) + + W(9, 1012376430, -1095279992, 1045163433, -1100444173) + + W(10, 1019723703, -1121217732, 1033198374, -1107709748) + + W(11, 1038124498, -1112843267, -1120386622, 1034273086); + WS(-1086369662, -1078015058); + sum1 = + W(0, 1024279387, -1100066053, 1039589628, -1104719791) + W(1, -1113174076, -1125885710, -1111149795, 1024511623) + + W(2, 1042615374, -1106499229, 1049518735, -1115604002) + + W(3, -1127523972, 1049389829, -1102519338, 1042212335) + + W(4, 1054091094, -1113085188, 1050614757, -1103236040) + W(5, 1047781504, -1108066658, 1041501469, 1048227542) + + W(6, -1095346029, -1103658220, -1097514477, -1100854410) + + W(7, -1097175852, 1026529231, -1104719494, -1099302313) + + W(8, 1050416872, -1111759460, 1048966812, -1122281110) + W(9, 1043919349, 1032565272, 1027138593, 1036646913) + + W(10, -1113840294, -1141254386, 1032485602, -1113439156) + + W(11, -1119980106, 1039770777, -1104628175, 1032389542); + sum2 = W(0, -1092512531, -1093473489, -1081280020, -1076988140) + + W(1, -1081342477, -1086556968, -1107875638, -1121362077) + + W(2, -1098738343, -1088925799, 1042571962, 1043337888) + + W(3, 1033193400, 1010020371, -1102154130, -1126174974) + W(4, 1063615535, 1062426368, 1067242406, 1066811408) + + W(5, 1066297017, 1060290312, 1052028857, 1037059936) + W(6, 1009177475, 1026425874, 998962046, -1096426756) + + W(7, 1022780060, 1026166068, 1017583668, -1119102823) + + W(8, -1119750726, 1012050343, 1025403720, -1129159374) + + W(9, 1023044380, -1115355073, 1026819560, -1127777686) + + W(10, 1030476954, -1120184004, 1031881676, -1116680638) + + W(11, -1135563955, 1004442726, 946141982, 1034449190); + WS(-1129707456, 1007685382); + sum1 = W(0, -1134894751, 1044154267, -1111052786, 1038046773) + + W(1, -1192544411, -1128991721, 1035051017, -1122391156) + + W(2, -1098519836, 1030394337, -1107874844, -1118678129) + + W(3, -1111301144, -1115520098, 1015910765, -1103669035) + + W(4, 1040671079, 1013414869, -1103276809, -1098881194) + + W(5, -1110522396, -1107120076, 1033063921, -1111557603) + + W(6, 1044073412, 1041040503, 1038778991, -1107344299) + W(7, 1048980909, 1052479600, 1049200001, 1051478546) + + W(8, -1109530006, 1035376874, -1105794628, -1139987723) + + W(9, -1108336721, -1112546431, 1023468750, 1018554845) + + W(10, -1134057701, 1028314315, -1112561176, 992777541) + + W(11, -1112828028, -1132207293, 999515194, -1119107128); + sum2 = + W(0, -1137824107, -1126465818, 1005362547, -1158497445) + W(1, -1119770196, 1021074352, -1138544392, 1034621027) + + W(2, -1110122518, 1033612180, -1116868027, 1039739597) + + W(3, 1037998233, -1112420682, 1025116420, -1120069279) + + W(4, -1137081539, -1106894813, 1045165527, -1106604299) + + W(5, -1138568822, 1039276731, -1104439342, -1113398206) + W(6, 1039063461, 1047362418, 1067526191, 1071589227) + + W(7, 1061860820, -1077533661, -1073373771, -1094637864) + + W(8, -1158403413, -1117814890, -1136640937, 1044031039) + + W(9, -1133800170, 1031122886, -1106196407, 1031315442) + + W(10, -1124656554, 1023374342, -1115663214, -1148955153) + + W(11, -1123862185, 999718675, -1139157324, 1025985948); + WS(1033791472, -1138498893); + sum1 = W(0, -1143657507, 1014781903, 1029133201, 1018917100) + W(1, 998381448, 1016812674, -1118545247, -1137667330) + + W(2, -1102835756, -1108646231, -1108649138, -1106020223) + + W(3, 1003669472, -1113672010, -1114481905, -1127743919) + + W(4, 1024249065, -1130492804, 1008218246, 1054388603) + + W(5, -1092207694, -1132293500, -1112726231, -1119594899) + + W(6, 1044190455, 1021681616, -1115040338, 1058377694) + W(7, 1046387590, 1049136345, 1017676194, -1119541597) + + W(8, -1107762844, -1112711792, 1019895480, -1123372431) + + W(9, -1111491159, -1123523050, -1123035739, 1028355707) + + W(10, 1008291608, -1128240816, 1025256893, 1014932868) + + W(11, 1007448080, 1008489644, -1112884042, 1001709096); + sum2 = W(0, 1014470821, -1120344268, -1114330800, -1106002658) + + W(1, 1024999308, -1126273593, -1127411221, -1123975632) + + W(2, -1106657768, -1106292024, 1049926230, 1054743689) + + W(3, -1112305858, -1122403994, -1116955850, 1030827431) + + W(4, 1034825132, 1032219219, 1057813788, 1049962194) + W(5, 1051810194, 996027236, 984340447, -1114551452) + + W(6, 1006830039, -1101139832, -1102461554, -1103958261) + + W(7, -1098680718, 1040205712, -1109277736, -1111240037) + + W(8, -1104684796, -1114556582, -1111886229, 1050373872) + + W(9, 1035894797, -1100520246, 1014102650, -1106040331) + + W(10, -1124459301, 1008940380, 1029827978, -1133852786) + + W(11, -1109427720, 1024816377, -1117160816, 1003623824); + WS(1060089726, 1074996161); + sum1 = + W(0, -1117558175, 1029119241, -1141503452, 1010789890) + W(1, 1037081758, -1120312844, 1027150698, 1021223318) + + W(2, 1046443391, 1039893948, 1041448800, 1035829124) + W(3, 1041862066, 1028398131, 1038548631, 1047369209) + + W(4, -1083103171, -1093429938, -1088988175, 1032282738) + + W(5, 1035486030, -1142682180, -1111879740, -1105296983) + W(6, 1040714346, 1021452133, 1048946821, 1021975771) + + W(7, 1040332065, -1090323173, -1112512014, -1095359061) + W(8, 1044503809, 1030628716, 1038933875, 1038764813) + + W(9, 1032949373, 1041097413, 1029165798, 1046714577) + W(10, 1023987857, 1027140771, 1026874120, 1026271340) + + W(11, 1034225415, -1113760891, 1028642719, -1134282558); + sum2 = + W(0, -1130708327, -1156760442, 1011385503, 1005458237) + W(1, 1015786967, -1121500080, 1023017975, 1008927007) + + W(2, -1131662151, -1160974837, -1122121264, -1104012424) + + W(3, 1025508020, -1106371091, 1032453110, -1107217673) + W(4, 1027052068, -1119116140, 1006726095, 1053197196) + + W(5, -1150821434, 1032166230, -1116451706, -1143126685) + + W(6, -1104634511, -1148640061, -1101053828, 1050251696) + + W(7, 1043632028, -1115585403, 1032061998, 1045664978) + W(8, 1037500532, 1028521640, 1041142688, -1121651972) + + W(9, -1105814552, -1117076340, -1120517680, -1103126409) + + W(10, -1117309872, -1132259207, -1129219511, -1148190653) + + W(11, 1005943453, -1120252400, 1024831312, -1126349911); + WS(-1081408895, 1057237802); + sum1 = W(0, 1016592219, -1135569700, 1019913045, -1134722237) + W(1, 1020337237, 1019971573, 1015924209, 1027236154) + + W(2, 1050393725, 1025037351, 1042967287, 1042881918) + W(3, 1033060699, 1038993285, 1028206140, 1041649576) + + W(4, -1096948620, 1021729271, -1123436299, -1107250293) + + W(5, 1047123275, 1037602588, 1034435092, 1044331459) + + W(6, -1099121222, -1114451665, -1104931659, -1092679092) + + W(7, -1098053310, -1091584672, -1092984777, -1089381821) + + W(8, 1042338077, 1033766733, 1036822635, 1042265522) + W(9, 1043883210, 1042862441, 1029491668, 1042007901) + + W(10, 1026859007, 1023688209, 994107237, 1017621813) + W(11, 1033354815, -1147819555, 1035642823, 963919445); + sum2 = + W(0, -1127968320, 1036537032, -1112917417, 1020801080) + W(1, 1019615648, -1127208552, 1023589392, -1121416948) + + W(2, -1092240358, 1030963640, -1097473768, -1109029546) + + W(3, -1116583352, 1028578068, -1113404853, 1041090246) + W(4, 1057542400, -1105653780, 1053316325, 1058712288) + + W(5, 1043894648, -1093453186, 1034405836, -1094394128) + W(6, 1016463192, -1097383073, 1038201480, 1052867895) + + W(7, -1123129572, -1141347136, 1006791808, 1037486484) + + W(8, -1094079665, 1040975890, -1101066411, 1013027424) + + W(9, -1118015492, -1117505000, -1111557726, 1044943944) + + W(10, 1037438732, 1019305024, 1028884816, 1010261952) + + W(11, 1027371024, -1113360113, 1034904608, -1104647878); + WS(-1076745215, 1064070508); + sum1 = W(0, 1019383636, -1138649581, 1029667879, -1127653591) + W(1, 1027426151, 1022926189, 1004012708, 1016119660) + + W(2, 1043437193, 1030286820, 1040316179, 1048543741) + W(3, 1029763954, 1039947283, 1025479577, 1041368223) + + W(4, -1100863080, 1033675388, -1102225484, -1084687593) + + W(5, -1111626052, 1026499316, -1122995300, 1026134601) + + W(6, 1019299111, 1027453406, -1110289513, -1082029409) + W(7, 983973284, 1031954367, -1118301831, 1016553765) + + W(8, 1041928012, 1031909515, 1043406276, 1044625233) + W(9, 1044339921, 1039879984, 1022733729, 1012331970) + + W(10, 1018313975, -1136380470, 1026991539, -1136351578) + + W(11, 1027257018, 1022902273, 1026798858, 1027440187); + sum2 = W(0, -1131696089, 1008718823, -1149188438, -1140198251) + + W(1, -1129603953, 1007479839, -1141563590, 1018622169) + + W(2, 1012459087, -1135045027, -1118135053, 1033739972) + + W(3, -1142742214, 1015462815, -1126906553, -1135479163) + + W(4, -1112347233, -1125420309, 1041969824, 1052325325) + + W(5, 1029891118, -1120072309, 1017890385, 1016803437) + W(6, 1039451269, -1110618065, 1024383435, 1052741682) + + W(7, -1121721953, -1095903036, -1105015368, -1098230278) + + W(8, -1158657302, 1007247903, 1015365577, 1034080133) + + W(9, -1110453303, -1162307222, -1122346803, 1021342643) + + W(10, -1129595249, 999949222, -1130814181, -1140370763) + + W(11, 1020979581, -1132990667, 1008938691, -1137891195); + WS(-1094563452, 1051169575); + sum1 = + W(0, -1154622990, 1029393378, -1117775501, 1033996553) + W(1, -1136342582, 1018491963, 1032030382, 1015760954) + + W(2, 1018130713, 1035203109, -1113234793, -1106127031) + + W(3, -1114335628, -1112874623, 1023872671, -1110067034) + + W(4, 1040428881, 1032896399, 1049040808, -1112183035) + W(5, 1048012112, 1044702279, 1032915973, 1035998904) + + W(6, -1120902330, 1039535746, -1102863965, -1090463816) + W(7, 961094679, -1098092733, 1033184663, 1029109433) + + W(8, -1113835211, 1021002107, -1107942972, -1105904891) + + W(9, -1148767576, -1124337998, 1006151100, -1110631596) + + W(10, 1019319877, 1036762426, -1135300966, 1040237135) + + W(11, 1032716400, -1137189022, 1037223880, 1013167238); + sum2 = + W(0, 1028413027, -1120070369, 1012563150, -1111535757) + W(1, 1011962278, 1009477102, -1153318262, 974823129) + + W(2, -1121092135, -1130520759, -1120753460, -1097640928) + + W(3, -1098008778, 999766235, -1122757360, -1129371381) + W(4, 1017337743, 1037177446, -1106734706, 1051800696) + + W(5, 1035782812, -1106510696, 1010623246, 1028214977) + W(6, -1137632446, -1104029390, 1048794348, 1050145982) + + W(7, -1114477541, 1047938812, -1111017918, -1123769252) + W(8, 999930971, 1033133092, -1111694827, 1041394994) + + W(9, -1118971109, -1127041263, 1020162151, -1124747017) + + W(10, 1027578971, -1114678191, 1030530567, -1119592910) + + W(11, 1001961835, 1024176823, -1115486877, 1007999422); + WS(1057759166, -1088449289); + sum1 = + W(0, -1129635066, 1025446574, -1144519245, 1017539549) + W(1, 1026395242, -1122678010, 1025431320, 1024557433) + + W(2, 1044873994, 1037627635, 1041685209, 1033910011) + W(3, 1039651930, 1035038013, 1039320958, 1038868536) + + W(4, -1087389198, -1097118359, -1089578865, -1129568803) + + W(5, 1010566389, 1030050103, -1110122707, -1112614790) + + W(6, -1116302487, 1022204225, 1022125102, -1105345911) + + W(7, -1122366800, -1097803608, -1119554634, -1126568107) + + W(8, 1048786726, 1039292653, 1045860709, 1045748274) + W(9, 1027479143, 1033498658, -1161253492, 1010965710) + + W(10, 1017011581, 1021371937, 1025212662, 1031878029) + W(11, 1013486066, 1007580942, 1028360045, 1015223035); + sum2 = W(0, -1162281894, -1124547002, -1150111187, -1135956229) + + W(1, -1145634890, 965409433, -1123274137, 1024679583) + W(2, 1033992046, -1149048922, 988741862, -1132541637) + + W(3, -1132011310, 1024739191, 1008671821, -1129294934) + + W(4, 1042856153, 1036788056, -1112557119, -1097406101) + + W(5, -1121853795, -1104384368, 1032013500, -1147059498) + + W(6, 1038465062, 1046094595, 1060179844, 1049669935) + W(7, -1101657128, 1042681611, -1111116605, 1027020519) + + W(8, -1093956780, -1100316514, -1090501223, 1022378118) + + W(9, 1045140163, -1121116299, 1035479824, -1131272734) + + W(10, 1024284423, 1035682376, -1122577303, -1113139255) + + W(11, -1130855582, -1118824057, 1006161754, -1124922046); + WS(-1086987838, -1100233980); + sum1 = + W(0, 1015308459, -1122325985, -1116672610, 1032904116) + W(1, -1141168915, 1033123257, 1001375875, 1015833828) + + W(2, 1041765192, 1028094431, 1042485054, -1118546351) + W(3, -1118052589, -1116818498, 992241989, 1015146386) + + W(4, -1105415847, -1114108890, -1092596043, 1046690691) + W(5, 1054244345, 1049735534, 1040724504, 1041637233) + + W(6, 1042444900, 1041449686, 1049087917, -1128556841) + + W(7, -1089307961, -1094503991, -1104029377, -1100869156) + + W(8, -1111075669, -1106140165, -1110170951, 1034284768) + + W(9, 1026589088, 1030527932, 1008796465, -1123717612) + W(10, 1003799483, 1034767973, -1144225139, 1033288361) + + W(11, 1010481109, -1121578684, 1030765989, -1124977133); + sum2 = W(0, -1128145157, 1026284201, 1012811840, -1136599052) + W(1, 992541697, 1011005728, -1127430370, 995789721) + + W(2, -1100557855, -1106810438, 1042787919, 1044596046) + + W(3, 1038174685, -1104618112, -1166127490, -1116633305) + + W(4, 1057648426, 1054522141, -1098531848, -1073275189) + W(5, 1061008073, 1059653331, 1035655860, 1017274260) + + W(6, 1048654145, 1057115188, 1046636853, -1074262745) + W(7, 1044984510, 1060082709, 1034192337, 1033697834) + + W(8, -1122215769, -1103409060, 1046426252, 1035358469) + + W(9, 1036107748, -1104639697, -1130597644, -1112828961) + + W(10, -1133425768, 1017140792, -1118227807, -1143532160) + + W(11, 1016191398, 981079778, 1006837720, 1021901152); + WS(-1092446204, 989212831); + sum1 = + W(0, 1007124942, -1116778591, 1027403088, -1121715541) + W(1, 1019921952, 1020332096, -1139218425, 1020381759) + + W(2, 1048125124, -1131432602, 1042237585, -1138060917) + + W(3, 1027977243, -1126238792, -1131884420, 1024321015) + + W(4, -1138101793, -1106585464, 1016240146, 1037962972) + W(5, 1053191424, 1053622732, 1043276755, 1049124886) + + W(6, -1141178857, 1019782926, 1036610823, -1101006449) + + W(7, -1094792341, -1089064867, -1101892348, -1107274751) + + W(8, -1102978326, -1115311760, -1110971902, -1114322839) + + W(9, 1031189102, -1137835047, -1132849129, -1110951117) + + W(10, 1034583667, 1015089582, 1027127900, 1008360193) + W(11, 1028625507, 1017386452, 1009465805, 1008208785); + sum2 = + W(0, -1120159352, -1125980662, -1131578305, 1028063854) + W(1, 1021975879, 1033612769, -1119579693, 1040487308) + + W(2, -1088595541, 1050146519, -1090859444, -1107336769) + + W(3, -1105868965, 1041431951, -1112749441, 1042279365) + W(4, 1060560783, -1093128727, 1057304640, 1071289406) + + W(5, -1087527230, -1086794372, 982969010, -1089785065) + W(6, 1061831343, -1098317152, 1057012981, 1069689231) + + W(7, -1097221007, -1086042421, 1033122226, -1088997285) + + W(8, -1089452071, 1042347964, -1095995319, -1096815763) + + W(9, -1104502319, 1049524493, -1104150062, -1135764460) + + W(10, -1101640393, 1038452583, -1105624394, 1041229675) + + W(11, 1037791301, -1143180172, 1029453052, 1049121852); + WS(-1080642303, 1037515653); + sum1 = W(0, 1014399585, 1040389217, 1025436037, 1035628117) + W(1, 1032176658, 1019714410, 1031481267, -1129425897) + + W(2, -1123461796, 1009459592, -1118449912, -1111999897) + + W(3, 1042449564, -1098862637, 1015874414, -1108656118) + + W(4, -1115821917, -1138501592, 1046808963, 1046961203) + + W(5, -1085801589, 1042823052, 1032561937, 1022709518) + W(6, 1037702505, 1046330151, -1088063230, 1048171302) + + W(7, 1046621083, 1033619294, 1017602698, -1172911161) + + W(8, -1105260875, -1113097082, -1166545913, -1112931778) + + W(9, -1116953058, -1108126015, 1033606503, -1161936349) + + W(10, -1134503912, 1032999462, 1011659972, 1033647146) + + W(11, 1020514160, 1023868127, 1030447848, 1015791986); + sum2 = W(0, -1139497271, 1025910769, -1105297083, 1037339247) + + W(1, -1111547600, -1123186639, 1029488041, -1137152287) + + W(2, -1104932637, 1043978669, -1105480129, 1037617885) + + W(3, -1107402541, -1105522985, 1011577705, -1102258444) + + W(4, 1000924601, -1116100213, -1102785010, 1049988969) + W(5, 1045831134, 1040977948, 1036894407, 1025599417) + + W(6, 1034980171, 1042493673, 1044002411, 1050036780) + W(7, 1039942071, -1097804729, 1042714488, -1139888185) + + W(8, -1102656208, 1038409695, -1098474508, -1122964745) + + W(9, -1106907325, 1028039731, -1112976714, -1112698813) + + W(10, -1154378706, 1025837919, -1109802618, 1036616991) + + W(11, -1132149525, -1111839940, 1040500443, -1114191023); + WS(1060124606, -1084472548); + sum1 = W(0, 1012638700, -1131934189, -1129347279, -1138043955) + W(1, 1007380511, 1022374575, 996895579, 992489843) + + W(2, -1112711971, 1021193865, -1125331574, 1048955067) + + W(3, -1106488069, -1115361884, -1120058426, -1118477310) + + W(4, 1024524352, 1017460401, -1113956442, -1094951982) + + W(5, 1048203795, 1005249701, 1026650437, -1129456851) + W(6, 1032623962, -1117271615, 1049120342, 1015143283) + + W(7, -1110142007, 1024310560, -1121731240, 1036683482) + + W(8, -1115629480, 1023262959, -1108495481, 1031640934) + + W(9, -1129214986, -1136673413, 1017029987, -1115998674) + + W(10, 1027484509, -1157409515, 1010910287, 1021584239) + + W(11, -1144904917, 1017598403, -1145343061, 1014521967); + sum2 = W(0, -1113301049, -1129519836, -1113144608, -1114997262) + + W(1, 1028189537, -1144798799, -1135227415, -1123606042) + + W(2, 1041723134, 1018887182, 1065721494, 1081282167) + W(3, 1053752802, 1004635839, 1024321495, 1032429021) + + W(4, -1119425470, -1123219625, -1082046556, -1068115046) + + W(5, -1091841582, -1126716683, -1149813598, -1137534547) + + W(6, 1012563935, 1021107126, 1032971141, -1092304788) + W(7, -1120681827, 1026059029, 992723582, -1121765421) + + W(8, 1013366251, -1131491053, -1132479565, 1028331651) + + W(9, 1021570574, -1132445755, 1010781703, 1024141029) + + W(10, -1135264199, 1017265018, -1135106943, 927030725) + + W(11, 983594844, -1137356791, 959859164, -1123588459); + WS(1064307390, 997943845); + sum1 = W(0, 1016504192, 1004931163, 1001468389, -1125536771) + W(1, 1024701821, -1157300426, 1008115882, 1014991280) + + W(2, -1110031838, -1124632947, -1106796660, -1132488742) + + W(3, -1111862756, -1111316170, -1153319146, -1116314499) + + W(4, -1108555962, -1129767067, -1106350773, -1097456765) + + W(5, 1040312105, -1110123941, 1030600762, -1109063375) + W(6, 1047561626, 1003984285, 1053522405, 1050670238) + + W(7, -1128004006, 1039422954, 1005773541, 1042190650) + + W(8, -1114174882, 1017992857, -1106382906, -1136221548) + + W(9, -1123896954, 1020844396, 1015300557, 1024320647) + W(10, 1004953741, 1018940039, 1000370245, 1015934681) + + W(11, 1011062590, -1128259885, -1153739770, -1130969505); + sum2 = W(0, 1036505224, 1018720963, 1037554666, 1035136259) + W(1, -1131720942, 1012123245, 1027168757, 1024471767) + + W(2, -1100095598, -1119632397, -1082101932, -1072130218) + + W(3, -1090292986, -1114842502, -1108983905, -1105383800) + + W(4, 1039944480, 1033659023, 1064933062, 1074634461) + W(5, 1057038142, 1046242606, 1018437263, 1041395450) + + W(6, -1110028565, 1025734363, -1103479495, 1012777017) + + W(7, -1132873326, -1117820012, -1117294599, 1033191694) + + W(8, 1016119595, 1015545311, 1036578506, 1036162128) + + W(9, -1120419990, -1115011900, 1004277346, -1113427773) + + W(10, 1017378303, -1126149289, 1022196513, -1118704038) + + W(11, 1020047721, 1033886326, -1136709697, 1024978781); + WS(1053812476, -1113586226); + sum1 = + W(0, -1130399840, 1028349447, 1021963321, -1121520129) + W(1, 1020769785, -1115853964, 1021860117, -1121122124) + + W(2, -1111011390, -1120416825, -1107941440, -1106086226) + + W(3, -1129958415, -1107644952, -1122155502, -1114352785) + + W(4, 1007197394, -1105796643, 1056809437, 1046348585) + W(5, -1090756230, 1050300257, -1116443513, 1041113720) + + W(6, 1037708373, 1044272829, -1093131829, 1057081835) + W(7, 1059679544, -1119895572, 1040260199, -1110926804) + + W(8, -1122978308, -1110423782, -1122097703, -1099751719) + + W(9, -1102044582, -1111488955, 1019054925, -1156322407) + + W(10, -1139049030, -1125249991, 1011721762, -1114497366) + + W(11, 1018253471, 1022041005, -1121197023, -1135672898); + sum2 = W(0, 1015672618, -1121994623, -1149412980, -1127744422) + + W(1, 990134373, 1024671460, -1132259460, -1125579600) + W(2, 1010541588, 1032866568, 1044676778, -1124040172) + + W(3, -1102290659, 1029499881, -1120084193, 1001699435) + + W(4, -1115998566, 1011616452, -1107438884, 1058620442) + + W(5, 1049322309, -1104592911, 1035840939, -1123886068) + + W(6, 1020421847, -1095022641, -1097106911, 1015017230) + + W(7, -1104275347, 1045525693, -1119993661, 1037165892) + + W(8, -1129704409, 1020628925, -1112769863, -1113422252) + + W(9, 1039268635, 1026741584, 999745387, -1126762930) + W(10, 1015113530, 1017019595, 1022483811, 1015061518) + + W(11, -1114030797, -1136765715, -1131682952, -1134246014); + WS(1059863230, -1098226968); + sum1 = + W(0, -1111154474, 1033627108, -1111220881, 1018934285) + W(1, -1156119577, -1108034825, 1035708952, -1116166225) + + W(2, -1130042625, 1044463840, -1098768819, 1036585799) + + W(3, 1041677635, -1100472134, 1038444026, -1111999396) + + W(4, -1124092147, -1125693093, 1043860042, -1094381437) + W(5, 1034781361, 1038465479, 1027036095, 1019243847) + + W(6, -1126659421, 1045974478, 1040108899, -1098844542) + W(7, 1051479065, -1096809363, 1040658808, 1003830125) + + W(8, -1116454602, 1040744732, -1149847241, 1043775752) + + W(9, 1046003792, -1121805723, 1037992790, -1131289879) + + W(10, -1109234699, 1026252457, -1103478147, -1107008620) + + W(11, -1127254569, -1105262391, 1038964162, -1116821910); + sum2 = W(0, 1031860762, -1114215534, 1037917300, -1123072391) + + W(1, -1121765219, 1036233577, -1113082189, 1007761049) + + W(2, 1008804487, -1119320351, 1028769566, -1145928741) + + W(3, -1118791492, 1035112755, -1119166399, 1034891236) + + W(4, 1036033743, -1105317049, 1033869945, -1104666827) + + W(5, -1103277106, 1043081028, -1110309063, 1003334201) + + W(6, -1130814575, -1126605754, 1029943513, 1058632863) + + W(7, 1010033985, 1033184724, -1124066564, -1146667244) + + W(8, -1114717025, -1103884858, -1097519272, -1084821476) + + W(9, -1091781337, -1108906997, -1105454190, -1124982586) + + W(10, 1042429978, 1019583714, 1050527129, 1057878257) + + W(11, 1047090522, 1050006554, -1116436299, 1037673833); + WS(1053829756, -1108691549); + sum1 = W(0, 1014621415, -1117035633, 1017668333, -1121088251) + + W(1, -1113943163, 1027915142, -1117882340, 1018110476) + W(2, 1034972923, 1031518451, 1028891034, 1043029617) + + W(3, 1037608353, -1118505878, 1008280866, -1114328254) + + W(4, -1105089900, -1146432572, -1111069516, 1052288956) + + W(5, 1043183409, 1040529537, -1127832039, 1025673432) + + W(6, -1121242915, 1011744694, -1099260407, -1096216885) + + W(7, -1109446679, 1033001703, 1023717270, 1038283409) + W(8, 1024842558, -1195363583, 1035963185, 1043236853) + + W(9, -1104376301, -1110826184, -1108863440, -1136644866) + + W(10, -1125107735, 1024600093, -1111257697, 1022444749) + + W(11, 1008767270, 983774736, 1021787123, -1122279626); + sum2 = + W(0, 1011466728, -1128158188, 993685376, 1021341187) + W(1, -1125064327, 1016637363, -1135616946, -1142265296) + + W(2, -1153535168, -1133074596, -1111924694, 1041916532) + + W(3, -1115375045, -1111577830, -1121861231, -1139232914) + + W(4, -1121363658, -1108077390, 1048370655, -1088803436) + W(5, 1050909934, 1048268187, 1028734454, 1033829714) + + W(6, -1113493290, 1031842286, -1096696142, -1097359431) + + W(7, 1066069354, -1102058354, -1112810542, -1108918948) + + W(8, 1016979229, -1114077565, 1032702600, 1051606360) + + W(9, -1108957150, -1106393780, -1104652034, -1124153420) + + W(10, -1121975877, 1032726750, -1126878203, 1020447303) + + W(11, -1115472084, 1011253904, 1032484333, -1135690267); + WS(1043379192, -1131322837); + sum1 = + W(0, -1129021027, -1148980502, -1117253390, 1015261926) + W(1, 1024719214, -1130947836, 974083843, 1010796022) + + W(2, -1109595831, -1141769560, -1131540259, 1038059198) + W(3, 1043151230, 1040366611, 1036650848, 1032003186) + + W(4, 1033800339, 1040943040, -1099641090, 1019360940) + W(5, 1047704456, -1120007803, 1032688054, -1135300928) + + W(6, 1042153991, 1016895818, 1040857522, -1103092216) + + W(7, -1091529984, -1100986498, -1116706574, -1114490317) + + W(8, -1099711038, 1029537886, -1104535414, 1032384358) + W(9, 1043292971, 1031568104, 1019020464, 1026900536) + + W(10, 1026187331, 1023917269, 999787984, -1137467256) + + W(11, 1021851650, -1117455240, 1021119266, -1124200964); + sum2 = W(0, -1127801857, -1143073539, -1106914355, -1114698807) + + W(1, -1129241721, 1034854605, 1015357631, 1026781991) + W(2, 1028175283, 1010285366, 1045024365, -1107796176) + + W(3, -1093181393, -1094857339, -1104001534, -1106206681) + + W(4, -1129802997, 1022418743, -1109183829, 1055188047) + + W(5, 1060464193, 1038631095, 1035894131, -1121422795) + + W(6, -1133354170, -1111757330, 1035388549, -1099199714) + + W(7, -1108056884, 1042537531, 1019867695, 1010545294) + W(8, -1129332011, 1032847739, 1028119663, 1027769667) + + W(9, 1015260775, -1123722407, 1021694327, -1120402439) + + W(10, 1029358411, -1119713292, 984677997, -1126683209) + + W(11, -1117802232, 1025618013, -1116557581, 1021478623); + WS(1049844732, -1121310639); + sum1 = + W(0, 1023733410, -1099209689, 1040893204, -1102957277) + W(1, -1114244983, -1136674298, -1108461358, 1021220348) + + W(2, 1044402267, -1106734859, 1050332331, -1111686974) + + W(3, -1122456058, 1049981942, -1102278760, 1043895987) + + W(4, 1053233309, -1108848000, 1049912979, -1098618899) + W(5, 1048539349, -1111429758, 1039273110, 1048170180) + + W(6, -1096212219, -1103164857, -1099336615, -1097562740) + + W(7, -1096335583, 1033031043, -1102589972, -1101337719) + + W(8, 1051767783, -1109302031, 1050917291, -1121562509) + W(9, 1042792075, 1039550692, 1008842338, 1041281742) + + W(10, -1114613442, -1129199659, 1033787229, -1111886543) + + W(11, -1121890370, 1041151474, -1104020956, 1031699907); + sum2 = W(0, 1051830333, 1052363768, 1062286287, 1068323273) + W(1, 1062941652, 1058036233, 1038399576, 1029358050) + + W(2, 1040398919, 1056364330, -1105094784, -1117763636) + + W(3, -1117097644, -1135317262, 1045469136, -1127713443) + + W(4, -1088812275, -1088095023, -1083945825, -1086712736) + + W(5, -1085128967, -1090471655, -1096613184, -1118175248) + + W(6, -1112895824, -1128031249, -1110939052, -1122350162) + + W(7, -1107088013, -1117471490, -1128774604, 1023889083) + + W(8, 1030823820, -1126124865, -1131257287, 1034881723) + W(9, 993374262, 1022005418, -1124473506, 1006015479) + + W(10, -1120174945, 1022839272, -1113476028, 1031923437) + + W(11, -1149114843, -1131972943, 1015689530, -1114795832); + WS(997080576, -1130763300); + sum1 = W(0, -1113905570, 1030153879, -1117230256, -1152616277) + + W(1, -1146808706, -1112646432, -1186209359, -1120216576) + + W(2, -1121320815, -1108108727, -1100762033, -1090117932) + + W(3, -1109500574, -1102311764, -1119610882, -1099135446) + + W(4, 1038521748, 1032364321, 1051702873, 1069923213) + W(5, -1104049339, 1028696893, -1120830317, 1030578386) + + W(6, 1025325853, -1131460303, 1040312882, 1063008042) + + W(7, 1056019095, -1115439812, -1125581089, -1116447004) + + W(8, -1108459484, -1152289285, -1099800679, -1092045374) + + W(9, -1100849447, -1105934578, 1028735898, -1108771165) + + W(10, -1123369669, -1111143084, 1018134009, -1116551136) + + W(11, 1015899783, -1120248394, -1113190874, -1120342084); + sum2 = W(0, -1130955153, -1115062100, -1114768924, 1040485485) + + W(1, -1102247365, 1040365390, -1105380703, -1134789779) + + W(2, 1025142055, 1033634795, -1114031272, -1115074190) + + W(3, -1110799922, 1039570131, -1112489130, 1035629875) + + W(4, 1025405091, -1124058081, 1047530952, -1104495565) + + W(5, 1038913214, -1102056033, -1123724877, -1104428707) + + W(6, 1041203911, -1151569099, -1160505239, 1065129660) + + W(7, -1115442088, 1037120419, -1145632358, 1035843769) + + W(8, 1034798579, -1098708869, 1048498835, -1095909934) + + W(9, -1105078519, 1024518769, -1109649658, -1117935125) + + W(10, -1107186683, 1041850582, -1097808347, 1045395705) + + W(11, -1106337339, 1007217475, -1152795787, -1124566009); + WS(-1078383103, 1059446981); + sum1 = + W(0, -1139864362, 1022304576, -1117916615, 1007335089) + W(1, -1119884647, 1022280942, 1023668405, 1034312731) + + W(2, -1142609202, 1008862865, -1109139429, 1032501274) + W(3, 982044231, -1115332063, 991223187, -1102309771) + + W(4, -1105667544, -1100327790, -1098424968, -1089810345) + + W(5, 1033240873, 1049118183, -1140016273, -1110413061) + W(6, 1048881003, 1047898440, 1053051896, -1104044598) + + W(7, 1051763270, -1113330784, 1027424455, 1032557738) + W(8, -1110989208, 1029966423, 1023833874, 1040575921) + + W(9, -1114053146, 1041606896, -1122699729, 1041157596) + + W(10, 1019807480, -1128249884, -1114518074, 1013296733) + + W(11, -1110353213, -1126749570, 1021514772, -1121516731); + sum2 = W(0, -1143648694, 1022767513, 963325627, -1138915747) + W(1, 1040530304, 1031585165, 1008260859, -1120690652) + + W(2, 1050457027, -1104882348, 1052893834, -1122824043) + + W(3, 1019582897, -1094902239, -1113748748, -1090635807) + + W(4, -1090901428, 1017677825, -1090554669, 1048412513) + + W(5, -1101863176, -1104950416, -1120605391, 1056780417) + + W(6, -1095258955, -1116653154, 1030383529, 1063122641) + + W(7, -1089352859, 1057380402, -1106433792, 1049388154) + + W(8, 1047388223, -1125154789, -1114292033, -1118207917) + + W(9, 1048190601, -1102824630, 1038107916, -1104842210) + + W(10, 1034828442, -1149114774, 1040557770, 1005930742) + + W(11, 980397999, 1033506230, -1115355457, -1136517107); + WS(-1104952056, -1073278929); + sum1 = + W(0, -1144211169, 1031125659, -1121100585, -1142367854) + W(1, 1025675017, -1114298540, 1036563163, -1122834968) + + W(2, -1099507218, -1116651974, -1101060075, -1105066053) + + W(3, -1131576359, -1114707952, -1120605819, -1111900952) + + W(4, 1047627264, -1113308557, 1054491321, 1039149624) + W(5, -1106626380, 1042584197, -1128818827, 1039018645) + + W(6, 1043300637, 1037274304, -1102367022, 1051689271) + W(7, 1044811828, -1128472513, 1045578618, -1115669626) + + W(8, -1098196346, -1126577137, -1102852173, -1106731162) + + W(9, -1111689619, -1126957444, 992521004, 1025441875) + + W(10, 1030331294, -1122667056, 1026346581, -1120693369) + + W(11, 1018492983, -1140414731, -1128018583, 997033420); + sum2 = + W(0, -1117577133, -1139616023, 997820975, 1028627780) + W(1, -1114909048, 1032747040, -1116006677, 1030832637) + + W(2, 1036324905, -1124977156, -1120808104, -1109376562) + W(3, 1029259135, -1145526876, 1022442689, 983753970) + + W(4, 1012676607, -1115101040, 1041273688, 1049990275) + W(5, 1043064430, 1039154681, 1027909669, -1115224952) + + W(6, -1164734594, 1032695755, 1041759226, -1096754642) + + W(7, 1041324783, -1094055114, -1098711698, 1023440264) + + W(8, 1039853902, -1116113854, 1041255405, -1106762918) + + W(9, -1111929038, 1025837595, -1130147608, -1112181665) + + W(10, -1110394349, 1006188424, -1120661488, 1033535767) + + W(11, 1021759445, 1023476809, -1133986843, 1029342490); + WS(1057403966, -1096678293); + sum1 = W(0, 1015545167, 1013920108, 1030182836, 991347091) + W(1, 1031028372, -1130110920, -1137745417, -1127755335) + + W(2, -1133819725, -1127594402, 1031500952, -1107540753) + + W(3, 1042502792, -1120427420, 1023814022, -1113612941) + + W(4, 1037006758, -1136130453, 1042283546, 1040431359) + W(5, -1084295706, 1042737160, 1019878434, 1042334622) + + W(6, -1110196161, -1123399563, -1086200264, 1060132444) + + W(7, 1049379651, 1001749345, 1023472436, -1123659840) + + W(8, -1111752718, 1041684833, 1047364560, -1104881213) + + W(9, -1113607001, -1117219726, 1017968636, 1008021369) + + W(10, 1028708905, -1119121787, -1120221088, 1006406689) + + W(11, 1002061113, 1026883950, -1124476270, 1011156129); + sum2 = + W(0, -1178786588, 1027650789, 1004569898, 1010647389) + W(1, -1130907066, -1128412421, 1000455394, 1019136642) + + W(2, -1129322914, -1117835702, -1120140062, -1110565129) + + W(3, 1046200355, 1035331810, 1040609034, -1114775527) + W(4, -1116170217, -1106357388, 1012300621, 1051050908) + + W(5, -1094299123, 1038925264, -1110352470, 1028674911) + W(6, 1036181740, 1050236879, 1029537087, -1121260831) + + W(7, -1105419536, -1104188032, -1148832482, 1009994365) + + W(8, 1016863918, -1107702296, -1115347042, 1019718506) + W(9, 1018471790, 1035623426, 1031812039, -1129304944) + + W(10, -1132924585, -1127926494, 1007741857, -1147888890) + + W(11, -1122466004, -1132432621, -1132484471, -1146317634); + WS(1056335484, -1129697442); + sum1 = W(0, -1118615510, -1123816285, 1016357093, -1126548591) + + W(1, 1017679973, -1125007109, -1130509569, -1127256452) + + W(2, 1026825265, -1162479083, -1123245334, -1115166368) + + W(3, 993334229, 1009460392, -1117686884, -1110259820) + + W(4, -1102686619, -1175208362, -1112742573, 1012633705) + + W(5, 1048503365, -1110640192, 1010107581, 1008538193) + W(6, 1045269336, -1122870505, 1056143939, 1022501065) + + W(7, 1044007618, 1043433395, 1037226473, 1040188728) + W(8, 1031523962, 1025178018, -1115481871, -1098786498) + + W(9, -1107348056, -1104721331, -1124234681, -1112073470) + + W(10, -1140818205, -1112741005, 1004860403, -1121817826) + + W(11, -1134864917, -1142571619, -1120701013, 1004599859); + sum2 = W(0, -1140731697, 1018164054, -1116322329, 1033668632) + W(1, -1110091010, 1006827755, 1024608202, 978878091) + + W(2, -1126783010, -1144859189, -1148034885, -1111621890) + + W(3, 1034430751, -1133278289, -1134565598, -1106732021) + + W(4, 1035005186, 1011714049, -1112384840, 1057652336) + W(5, -1103655808, 1035575897, 1007536733, 1017810394) + + W(6, -1110815467, 1039236492, -1109778149, 1053123628) + + W(7, -1113563636, 1009902097, 1031837967, 1032913014) + + W(8, -1144657885, -1130754383, -1113329105, -1107649638) + + W(9, -1112793157, -1136830370, -1131576514, -1118754368) + + W(10, -1127206230, -1146205013, -1113225660, 1012470213) + + W(11, -1113811608, -1126431906, 1007646275, -1123083034); + WS(1064654654, 1035088379); + sum1 = W(0, -1123150274, 1013730923, -1124984926, -1143080352) + + W(1, -1115541655, -1120446591, 1019709882, -1123168830) + + W(2, -1101065371, -1104968795, -1104298005, -1097917367) + + W(3, -1115560637, -1112072243, -1121553409, -1101025669) + + W(4, 1052021669, 1043838624, 1050953841, 1053857683) + + W(5, -1098028717, -1118042236, -1117850604, 1024509393) + + W(6, 1034671630, -1116995686, -1125221929, 1053311748) + W(7, 1054064059, 1056948345, 1047317764, 1051915779) + + W(8, -1103122974, 1025043295, -1102292982, -1101557747) + + W(9, -1102206963, -1099829586, -1123986585, -1105952782) + + W(10, -1124628232, -1123853974, -1122875259, -1129945790) + + W(11, -1115475969, 1010096420, -1120157778, 1010117900); + sum2 = + W(0, -1115290116, 1036300391, -1109664288, 1023887773) + W(1, 1031338337, -1112746552, 1029647363, -1123357025) + + W(2, -1109338596, -1120873277, -1133763869, -1110763968) + + W(3, -1104363651, -1111086518, 1014654409, -1113327782) + W(4, 1052546267, 1031940424, 1041363997, 1046589307) + + W(5, 1038758869, 1026186944, 1011886363, -1114411312) + W(6, -1101682429, 1045633640, -1094977574, 1050150667) + + W(7, -1104681827, -1121584221, 1042076972, 1033926660) + + W(8, -1106866549, -1121551223, 1008614181, -1110488084) + + W(9, -1120223295, 1035823152, -1111033402, 1046305164) + + W(10, 1027635895, 1021591180, -1119215823, 1009099129) + + W(11, 1016354897, -1115537942, 1012736237, -1109553196); + WS(-1088190206, -1108558078); + sum1 = W(0, -1109712467, 1034096473, -1111610193, -1111503908) + + W(1, 1029528067, -1109918121, -1130336610, -1110290409) + + W(2, 1048826552, -1100191472, 1051587686, -1114238856) + + W(3, -1124911644, 1053388852, -1101598429, 1049826386) + + W(4, -1117596347, -1104233509, -1112703594, -1118548075) + + W(5, -1094857227, 1040678692, -1102065648, -1108498290) + + W(6, 1045129740, 1021528052, -1123092480, 1045282957) + W(7, 1041941490, -1121835291, 1050563972, 1032628503) + + W(8, -1145103958, 1015098342, 1033306871, -1105600408) + + W(9, -1116887040, 1043572363, -1101594283, 1041251987) + + W(10, 1040739958, -1095362037, 1043699492, -1112734841) + + W(11, -1106244021, 1044475255, -1100499697, 1015959598); + sum2 = + W(0, -1140036524, -1122134368, -1124742331, 1023368729) + W(1, 1025643512, -1117935205, 1029315286, -1121900993) + + W(2, 1033121292, -1135675176, 1022385651, 1037868584) + W(3, -1113410705, 1034815986, -1106876578, 1029051910) + + W(4, 1020537037, -1102563608, 1017232470, -1097085277) + + W(5, 1026038978, -1107749524, -1116418603, 1013163432) + + W(6, -1094559771, -1090683436, -1089829937, -1081905013) + + W(7, -1085580324, -1093433405, -1100509010, -1107168757) + + W(8, 1042598592, 1049536131, 1015761873, -1122385293) + W(9, -1119606047, 1034046493, 1037794408, 1020109569) + + W(10, 1048560917, 1043095342, 1060516603, 1068776853) + W(11, 1062905414, 1053234619, 1023262329, 1038509970); + WS(1048802172, -1118644607); + sum1 = + W(0, 1022629891, 1001330921, -1147098739, 1023807923) + W(1, -1129374813, 1028532286, 1031375324, -1151549405) + + W(2, -1112271863, 1008285357, -1106982824, 1020475895) + W(3, 958183765, 1025640422, -1133565569, -1106868493) + + W(4, -1115934246, 1007889405, -1110669107, -1095657203) + + W(5, -1113474437, -1090481180, -1104303995, -1104624276) + + W(6, -1112205646, -1137992085, 1044651974, 1050363724) + W(7, 1050921091, 1050665633, 1037811271, 1049383075) + + W(8, 1050188943, 1025826270, 1026008921, 1011316531) + W(9, -1106529387, -1131537104, -1125009457, 1022847857) + + W(10, -1125264263, -1127716075, -1113499645, 1026946398) + + W(11, -1121270209, 1021652689, 1014192325, -1142930963); + sum2 = W(0, 1016559128, -1119933527, 1008526536, 1034816070) + W(1, 1018959568, 1026554777, -1112891495, 1046177388) + + W(2, -1089262209, 1032931419, -1094914643, -1096197918) + + W(3, -1101210129, 1049415798, -1128790619, 1043157162) + + W(4, 1053795811, -1108360154, 1052999976, 1069558608) + + W(5, -1103281588, -1085501184, -1098019814, -1088254262) + + W(6, 1058569170, -1097574423, 1052760357, 1058007152) + + W(7, -1106340652, -1101474305, -1114646508, -1096632714) + + W(8, -1094475155, 1036603460, -1106271635, 1022028102) + + W(9, -1121182797, 1027756295, -1124734105, 1015426864) + + W(10, 1008650912, -1121755244, 1039081818, 1004102711) + + W(11, 1033515588, 1032461240, -1118822413, 1043397723); + WS(-1085648446, -1079079370); + sum1 = W(0, 1031831473, -1114079994, 1031367567, -1118896788) + + W(1, -1123680847, 1033718927, -1123944253, 1033541103) + W(2, 1036117159, 1003975776, 1044164867, 1045647319) + + W(3, 1033826253, 1041970952, 1024952417, 1040029267) + W(4, 1049939273, 1042007752, 1043312672, -1095211959) + + W(5, -1122554975, -1116359196, -1176880640, 1026974961) + + W(6, -1094329359, -1097850009, -1089391360, -1087218668) + + W(7, -1100203021, 1033960979, -1108529239, -1111794397) + + W(8, 1040633606, -1134555900, 1041283678, 1044809099) + W(9, 1046447947, 1042238738, 1024390649, 996515216) + + W(10, 1023486628, 1019647980, 1023951387, 1014099708) + + W(11, 1017251780, 1031788713, -1146375056, 1033511473); + sum2 = W(0, 1019169584, -1120973142, 1033125970, 1025364316) + + W(1, -1109812797, 1040314133, -1108282977, 1018923536) + + W(2, 1042983019, 1009997472, -1103915398, 1036688212) + + W(3, -1125420780, -1119986230, 1034277286, -1117630132) + + W(4, -1098354632, -1106283039, 1033722376, 1062322516) + + W(5, -1098634798, 1057333054, -1102308141, -1125368820) + + W(6, -1099987280, 1032814660, -1095179878, 1032770062) + + W(7, -1109050948, -1097734639, 1034195486, 1049154263) + + W(8, 1048978343, -1103033329, 1055664562, -1097173506) + + W(9, -1103688168, -1173745415, -1107997912, -1098308929) + + W(10, -1118546716, 1022934360, -1110281795, 1045599869) + + W(11, -1119010038, 1038965178, 1007528912, 1035253180); + WS(-1089355774, -1078290086); + sum1 = W(0, -1122950775, -1112691207, 1023290087, -1107733500) + + W(1, -1146188190, -1132821263, -1112282369, -1116356099) + + W(2, 1037182841, -1124344033, 1046400294, -1118583361) + + W(3, 1042531744, -1121639475, -1137408791, 1018516820) + + W(4, -1118638572, 1041047393, -1098349922, 1057481334) + + W(5, -1139289650, -1109625472, 1026873009, -1139389015) + + W(6, -1121667553, -1104403323, -1099164822, 1054069421) + + W(7, -1097191323, 1041114291, -1120861012, -1139200383) + + W(8, 1009124079, -1138062223, 1044398104, -1149431076) + + W(9, 1044515332, -1120573261, -1120947806, 1017011067) + + W(10, -1115334660, -1126199861, -1136525567, -1110719762) + + W(11, 1015659149, -1121643278, -1122889719, -1120569352); + sum2 = + W(0, -1120919296, 1009843654, -1132368064, 1020138111) + W(1, 1029564211, -1115205032, 1016741875, -1119663171) + + W(2, -1124567280, -1107129213, 1020831927, -1098918731) + + W(3, -1132836513, 1010227332, -1118685376, 1026001154) + W(4, 1037030993, 1043380414, -1109288357, 1059483198) + + W(5, -1119273466, -1106893419, 1028452918, -1135273053) + + W(6, -1120629799, -1102119519, -1112537154, 1064361176) + + W(7, -1104145236, 1043150197, -1117770026, 1019055438) + + W(8, -1106639281, 1030878171, -1118412551, -1093377806) + + W(9, -1123848756, -1103899854, 1024207514, -1109882780) + + W(10, 1020211370, -1126825280, -1144175329, -1146281491) + + W(11, -1135251423, 1028327527, -1128215590, 1024565629); + WS(1064975294, 1066308158); + sum1 = + W(0, 1031747776, -1106142801, 1028818885, 1001228109) + W(1, -1115396498, 1041342669, -1108139113, 1027821620) + + W(2, -1119071204, 1032019633, 1041375482, -1112793497) + W(3, 1013499583, -1123968938, 1024220311, 1020544662) + + W(4, 984462229, 1045090381, -1093262116, 1049965274) + W(5, 1052662103, -1113450905, 1035427697, -1138282267) + + W(6, -1132055971, -1108909198, 1038243096, 1021279149) + + W(7, -1090377239, 1040402543, -1107965954, -1116422289) + + W(8, -1148253029, -1110036442, 1036945928, -1115536386) + W(9, 1034061985, 993049059, -1113141447, 1016907357) + + W(10, -1126913907, 1036419718, -1121925745, 1019907753) + + W(11, 1032108124, -1109636927, 1032825796, -1128061041); + sum2 = + W(0, 1016610899, -1135553471, -1163068737, -1147979120) + W(1, -1121296916, -1139692219, 1007282246, 1019770181) + + W(2, -1118189976, 1018031354, -1120752887, 1035872696) + + W(3, 1034250650, 1022157658, -1131969269, -1131641536) + + W(4, 1027283971, -1118352328, 1042584076, -1095799786) + + W(5, 1009508653, -1121907329, -1148404200, 1013257077) + W(6, 1028895363, 1032145382, 1042427003, 1073605475) + + W(7, 1056095764, -1130051225, 1025403981, -1121748387) + + W(8, -1113530321, -1123867563, -1099207121, -1075418961) + + W(9, -1091570337, -1113117501, -1124635978, -1123610989) + + W(10, 1007846553, 1011272254, 1010785270, 1026092591) + W(11, 1030054693, 1019567305, 1012575724, 1018865930); + WS(1062423998, 1020226002); + sum1 = + W(0, -1122384152, 1000976853, -1122957973, -1140430696) + W(1, 1018079658, -1116871589, 1021042950, -1130305609) + + W(2, -1116470612, 1015809956, -1115150860, -1100102424) + + W(3, -1099378776, -1102397253, -1121061927, -1097083551) + + W(4, -1095861522, -1125526476, -1101122465, 1051208664) + W(5, 1045734276, 1040938184, 1045437908, 1049869175) + + W(6, 1055717600, 1034182342, 1052715645, 1053221217) + W(7, 1041738487, 1038184662, -1114085178, 1019939330) + + W(8, -1128557498, -1136390676, -1101688288, -1097231798) + + W(9, -1114764537, -1107259340, 1034631967, -1105843464) + + W(10, -1133541800, 1025071548, -1121585121, 1026360261) + + W(11, -1127628368, -1121765359, 1018430000, -1122412137); + sum2 = + W(0, -1125184611, 985313435, -1181860650, -1124740239) + W(1, -1132645547, 999484231, -1125466771, 1024683619) + + W(2, -1124729770, -1132095255, 1028760415, 1011065519) + + W(3, 1033849803, 1015359010, -1140486285, -1125848669) + + W(4, 1045115043, -1117207517, 1036693207, -1088457353) + + W(5, -1096437481, 1048821220, -1107942084, 1027562883) + + W(6, 1058112728, 1067077185, -1107623537, -1080660794) + + W(7, -1109418981, -1117188353, 1027196953, -1127975224) + + W(8, 1023628890, -1158219675, 1043689375, 1020427720) + W(9, -1110185484, 1032363474, -1121893121, 1023023798) + + W(10, 1007171579, -1118062236, 1000645119, -1160295467) + + W(11, 1015083304, 1023561702, -1134577643, -1158549787); + WS(-1085592318, -1113086899); + sum1 = + W(0, -1127342656, 1024334944, -1107952998, 1034498345) + W(1, -1131633306, -1110168462, 1033648125, -1119681052) + + W(2, -1105245475, 1029597578, -1104926592, -1110412429) + + W(3, -1103724998, -1098176238, -1115323711, -1098367950) + + W(4, -1104578161, -1117644382, -1101901257, 1053247055) + W(5, 1051300307, 1051451939, 1049307889, 1053283054) + + W(6, 1053241687, 1041591201, 1051692021, 1044816720) + W(7, 1042909421, -1124765258, -1143558503, -1113485404) + + W(8, -1103827813, 1020278010, -1097952575, -1110749814) + + W(9, -1106968399, -1105195378, 1027981559, -1102600950) + + W(10, -1138537863, 1015573096, -1110174988, 1033881662) + + W(11, -1116328992, -1113141267, 1028312016, -1113790704); + sum2 = + W(0, 1019423143, -1134200265, 1022335641, -1126211289) + W(1, -1160177640, 995157684, -1138832033, -1130236057) + + W(2, -1118706158, -1122995556, -1113839515, -1123774588) + + W(3, 1002859666, -1114460336, -1131104601, 1027754115) + + W(4, -1110500850, 1021681136, -1121978030, 1048787768) + W(5, 1035034344, 1025695056, -1126984825, 1027074464) + + W(6, 982325064, -1106847870, 1042414810, 1052221246) + W(7, 1009324191, -1129766425, -1136621129, -1120353368) + + W(8, -1117883328, -1115095323, -1112736275, -1118959328) + + W(9, 1026260852, -1114723897, 1016807320, -1121253912) + + W(10, -1145733922, 1001371122, 1019850892, -1113736050) + + W(11, 1023767274, -1130130145, -1131688905, 992217060); + WS(1025516512, -1100199588); + sum1 = W(0, 1010096560, 998291066, 1015136529, 1016981531) + W(1, -1134306338, 1015217585, 1025573847, 1027586712) + + W(2, 1021891087, -1128181387, -1152435393, 1035336779) + + W(3, 1040424469, 1041134374, -1147235028, -1105873140) + W(4, 1041308560, 1015332531, 1054580194, 1043985948) + + W(5, -1088067164, -1097437107, -1108043523, 1024655979) + + W(6, -1106994989, -1097993736, -1087955312, 1049373383) + + W(7, 1048585956, 1044933000, 1037061588, 1041451309) + W(8, -1107391304, 1040752366, 1021987303, 1016493518) + + W(9, -1111549299, 1015283332, -1125444197, 1025261651) + + W(10, 1032706511, -1115681272, 1013936722, -1153713033) + + W(11, -1122756060, 1028860515, -1124931799, 1010373746); + sum2 = W(0, 1024904631, -1136864762, 1005033941, 1030076655) + W(1, 1022202973, 995105737, 1009257370, -1113660547) + + W(2, -1115115972, 1023564875, -1114446231, -1115188268) + + W(3, -1099782234, 1025338591, -1107689693, 1049272645) + + W(4, -1128899549, -1103710322, -1140124434, 1057566767) + + W(5, 1051976820, 1009750634, 1040613751, -1105716747) + + W(6, -1100416868, -1105139728, -1107001478, 1061216146) + + W(7, -1114917196, -1107279078, -1123762171, -1112133417) + + W(8, 1014205090, -1112771593, -1105823902, -1101449249) + + W(9, -1115621063, -1123827499, -1124460977, 1019919657) + + W(10, -1138213458, 1024304323, 1028369943, 1031401203) + + W(11, -1127316949, 1002907445, 1021828037, -1121285688); + WS(1050734204, -1108852232); + sum1 = W(0, -1111669430, 1026854009, -1115457556, 1022285659) + + W(1, -1130768721, -1124925681, 988348114, -1117352760) + + W(2, -1116292712, -1106093993, -1104273631, -1090233269) + + W(3, -1103820941, -1107176543, -1114665965, -1103610460) + + W(4, -1122281377, -1103616254, 1027625969, 1069754213) + + W(5, -1114752252, -1130883179, -1144129324, 1032882469) + + W(6, 1027495958, -1113635568, 1042550097, 1065691213) + W(7, 1054098174, 1026991873, 1018693905, 1034282506) + + W(8, -1113568120, -1125634353, -1100906821, -1090598793) + + W(9, -1103145534, -1107767585, 1007587914, -1112131865) + + W(10, -1123139966, -1117637392, -1147736996, 999398084) + + W(11, -1140005358, -1123096067, -1120005196, -1122062653); + sum2 = + W(0, 1024214881, -1122830075, 1024710111, -1118490155) + W(1, -1134765475, 1007204875, 955722144, -1132418382) + + W(2, -1105712489, 1026001227, -1105988647, 1044311614) + + W(3, -1128591966, 1016894538, -1132281374, -1121103003) + + W(4, -1096328526, -1089670730, -1109313571, 1057951288) + + W(5, 1031038781, -1135710147, -1121635711, 1040031313) + W(6, 1036545949, -1104892025, 1042923133, 1050757116) + + W(7, 1041007149, -1111844715, 982097434, -1131469166) + W(8, -1114697429, 1027384734, -1108836817, 1046225965) + + W(9, -1111832237, 1020988490, -1141601766, -1137288635) + + W(10, 1029115040, -1127506094, 1018384918, -1118982995) + + W(11, 1000957030, 1018633070, -1131312630, 1012074251); + WS(-1077332287, -1089760701); + sum1 = + W(0, 998476811, -1146931846, 1020090684, -1115596769) + W(1, 1018970620, -1117338894, -1131301080, -1134004534) + + W(2, -1126986618, -1106320367, -1103512376, -1106044643) + + W(3, 1033789092, -1121544222, 1025908912, -1120853460) + W(4, 1035645275, 1033143495, 1057100532, 1033806067) + + W(5, -1089559024, -1119052427, -1109205213, 1021655744) + + W(6, 1035996661, 1030948981, -1098691810, 1054020686) + W(7, 1053006209, 1053247323, -1117960094, -1112997269) + + W(8, -1111559939, -1116749294, -1114602228, -1102410839) + + W(9, -1114317262, -1148875196, 1040862618, 1024305160) + + W(10, 1002151576, -1132098378, 1020764052, -1131285468) + + W(11, -1143682184, -1140686688, -1112291776, -1137942600); + sum2 = + W(0, 1006079429, -1142220161, -1129651941, -1120056293) + W(1, 1021210606, -1138049810, 1020679942, -1144760977) + + W(2, 1015506585, 1012377004, 1027444401, 1027255740) + W(3, -1154390002, -1121552483, 988683875, -1149209645) + + W(4, -1129173219, 1019086641, 1029291472, -1087748986) + W(5, 1030702430, 1046290929, -1149010889, 958643736) + + W(6, -1126961905, 1027847692, -1099113060, -1071778912) + + W(7, 1077548482, 1017608723, 1009874569, -1165018915) + W(8, 1025558752, -1121396179, 1031608500, -1132223695) + + W(9, 1041292060, 1030045463, -1116313503, -1108302337) + + W(10, 1004493969, -1129089615, 1032469225, -1117547929) + + W(11, 1017793536, -1115202758, 1024909384, -1153372770); + WS(1054407548, 983325672); + sum1 = + W(0, 1028412425, -1115456470, 1017604747, 1031921957) + W(1, -1121692358, 1041128181, -1131299175, 1034097067) + + W(2, -1105600625, 1033490817, -1115519181, 1031550611) + + W(3, -1103698804, -1109550327, 1030793272, 1014014769) + + W(4, -1148390382, 1044756246, -1103922376, -1090010995) + + W(5, 1062123705, -1103937189, 1039103265, -1135652511) + + W(6, 1033571545, -1114006910, 1059608097, -1087252051) + + W(7, -1094628214, 1042983319, -1106569209, -1116995186) + + W(8, 1032293767, -1119790070, -1107101148, 1044072953) + W(9, 1027680451, 1021059262, 1007158719, -1141553278) + + W(10, -1124427701, 1037509878, 1007976775, 1022727275) + + W(11, 1033962892, -1108328080, 1032055396, -1143059662); + sum2 = + W(0, 1007145536, -1108956115, -1123468231, 1003522017) + W(1, -1106382527, 1023566518, -1116797301, 1016028592) + + W(2, 1011835040, 1040320325, 1009493536, 1036429861) + W(3, 1045134298, -1132203156, 1044032552, -1103689691) + + W(4, -1115291423, -1145921569, -1120694127, 1057547857) + + W(5, -1120933925, -1105348488, -1105744806, 1019566780) + + W(6, 1029973058, -1113530029, 1025444390, 1039825371) + W(7, -1093621604, 1030401418, -1111109931, 1039637835) + + W(8, 1036345379, -1116109872, -1106839609, 1028735446) + W(9, 1043632212, 1032927131, 1031187850, -1119043970) + + W(10, -1106716830, 1031627486, -1106619549, 1032586179) + + W(11, -1135351552, -1107014793, 1030984886, -1117261713); + WS(1062610366, -1081620328); + sum1 = W(0, 1034909184, 1022225638, 1022593144, 1034659649) + W(1, -1145063906, 1029979468, 1024460554, -1129493655) + + W(2, 1041975919, 1036878493, 1038275488, 1049110721) + W(3, 1039909252, 1050285945, 1032760119, 1052309552) + + W(4, 1038564634, 1032326503, 1032775158, -1104434937) + + W(5, -1098154666, -1091716385, -1098908862, -1093908076) + + W(6, -1086061975, -1097049310, -1094128798, -1097332230) + + W(7, -1096830529, 1031802154, -1125106744, 1033460244) + W(8, 1054071881, 1038246806, 1049378869, 1048167927) + + W(9, 1037005773, -1122539766, 1023442918, -1111713187) + + W(10, -1118145809, 1033815666, 984752392, 1040915883) + + W(11, -1127387550, 1042582214, 1025327999, 1038395208); + sum2 = W(0, 1025388154, 1001261778, 1007664153, 1018216589) + W(1, 1011431705, -1124915037, 1007167865, -1118462618) + + W(2, -1106689977, 1031762430, -1105078255, -1117638934) + + W(3, -1119019386, 1039919645, -1115769810, 1041969311) + + W(4, -1110560421, -1111104301, -1122922762, 1029938402) + + W(5, -1107704269, 1043034893, 1041034358, 1050092429) + + W(6, 1049316874, -1112011481, 1044332351, -1117360942) + + W(7, -1116357646, -1107523849, -1112781805, -1104673921) + + W(8, 983067209, 1033280635, -1133572905, 1022258405) + W(9, 1016143957, -1109485745, 1010088409, -1106654827) + + W(10, -1121273022, 1026153330, -1116643818, 1033323756) + + W(11, -1123412994, 1027380094, 1016238045, 1006440178); + WS(-1079099231, 1058224693); + sum1 = + W(0, -1136025729, 1041807413, -1113852830, 1038775953) + W(1, -1130024634, -1139925939, 1032899333, -1135488689) + + W(2, -1098612147, 1030021338, -1101745716, -1114342965) + + W(3, -1109189268, -1116898669, -1169623989, -1105924753) + + W(4, 1040574357, -1113591959, 1035906153, -1100940817) + + W(5, -1110548904, -1107090858, 983487291, -1105279552) + W(6, 1044962866, 1041390773, 1038641429, -1112997607) + + W(7, 1053521164, 1048687519, 1048553583, 1049805017) + W(8, -1107948018, 1030293261, -1102747762, -1131180663) + + W(9, -1102886695, -1127712365, 1020276366, 1033907987) + + W(10, -1152707357, 1019631368, -1119110697, 1008365619) + + W(11, -1120409456, -1156710253, -1137865855, -1120648241); + sum2 = + W(0, 1026221982, -1150712731, -1135858910, 998750602) + W(1, 1026066546, -1122173835, 1019885572, -1114797832) + + W(2, 1031288593, -1121646784, 1020650492, -1118866549) + + W(3, -1126679589, 1033676882, -1119823506, 1024418530) + + W(4, 1025270629, 1040465550, -1102699692, -1090773917) + W(5, -1103726643, 1038653616, 1044175124, 1035909226) + + W(6, -1105433524, -1099611845, -1077885918, -1073585027) + + W(7, -1083499628, 1072488285, 1076879885, 1053754278) + W(8, 1025604422, 1015298833, 1013887757, -1111404471) + + W(9, 1029004921, -1128843744, 1042609155, -1128976380) + + W(10, 1025361731, -1118571342, 1035533544, -1115122758) + + W(11, 1025173545, -1132364945, 982102231, -1120827581); + WS(1039418864, -1140458522); + sum1 = W(0, 983096624, 1029517505, -1121873782, 1019803894) + W(1, 1027911414, -1108005699, 1033576274, -1135118358) + + W(2, -1114535995, -1145085849, -1102746926, 1028523779) + + W(3, -1100878768, 1038991951, -1142936385, -1125909817) + + W(4, 1044450572, -1097839409, 1062380387, -1088253674) + + W(5, -1109645966, -1154916402, -1107146685, -1122115416) + + W(6, 1027001827, 1032347927, -1106235793, -1090653261) + W(7, 1061486174, 1029669390, 1047810758, 1038152785) + + W(8, -1117675070, 1031259878, -1105839880, 1036696746) + + W(9, -1097301074, 1024178544, 1038477872, 1041849616) + + W(10, 1035770150, -1104931415, 1033087169, -1121428646) + + W(11, -1120235273, 1035324408, -1108040973, 1017003668); + sum2 = W(0, -1121567066, 1034344084, -1105011035, -1142865888) + + W(1, -1136511728, -1106811819, 1032101677, -1113394506) + + W(2, 1033267920, 1026284945, 1033899226, -1120108491) + + W(3, -1122542627, -1120621959, -1134482876, -1114137407) + + W(4, -1104421963, -1103410132, 1022298858, 1057117238) + + W(5, 1038703002, 1033583066, -1122973141, 1030826553) + + W(6, -1130139832, 1037507308, -1102818563, 1003910328) + + W(7, 1043197066, -1124006960, -1124335993, -1125132432) + + W(8, -1132243276, 1034735332, 1044067085, 1041074904) + + W(9, -1101099519, -1109459597, 1041237660, -1128301632) + + W(10, 1024120715, -1109350039, -1127155070, 975508032) + + W(11, -1117378085, 1001772648, -1113899392, -1126301549); + WS(1053462780, -1083681865); + sum1 = W(0, 1023511963, -1126058411, 1027394040, -1118697149) + W(1, 993307519, 1025756296, -1131368524, 1029589575) + + W(2, 1041747598, 1015154794, 1045749945, 1048726745) + W(3, 1043007793, 1038169844, 1024626666, 1040782360) + + W(4, 1041497612, 1018224530, 1041814811, -1086401302) + + W(5, -1115623160, 1032344226, -1127070442, -1131398580) + + W(6, -1138339988, 996055791, 1014351349, -1082085657) + + W(7, -1096882594, -1105145572, -1110953740, -1110890258) + + W(8, 1035615616, 1027711272, 1040756601, 1048515830) + W(9, 1042139693, 1038148425, 1009997960, 1029718722) + + W(10, 1020629658, -1123213919, 1032965474, -1129047376) + + W(11, -1132054806, 1033289129, -1128695798, 1029252236); + sum2 = W(0, 1008350928, 1011413600, -1120704007, 1027382950) + W(1, -1127381517, 981904616, 1015535284, -1135583391) + + W(2, -1118714967, 1024023545, -1126562655, 1034951721) + + W(3, -1128407708, -1126139913, 999360844, -1138986423) + + W(4, 1030231193, -1124778398, -1110351493, 1052754126) + + W(5, 1033272406, -1151802072, 1013432992, -1123376913) + + W(6, -1091889801, -1098641764, -1104262464, 1057762368) + + W(7, -1161878816, 1012148800, 1024540201, 1031971990) + + W(8, -1108897465, 1033453825, -1106889474, 1035809187) + + W(9, -1132306565, -1153180738, 1014625240, -1118108772) + + W(10, 1032606711, -1125648711, 1025836907, -1131934955) + + W(11, -1134699254, -1131180247, -1171757551, -1140791326); + WS(-1099299320, 1056598066); + sum1 = W(0, 1006197652, -1128153660, -1136164796, 1035903322) + + W(1, -1130396894, 1036246880, -1124832314, 1029570879) + + W(2, -1127547996, 1041714651, 1029712499, -1128532088) + + W(3, -1106761055, -1121593759, 1017604226, 1017990507) + + W(4, -1106221946, -1098261544, -1085896508, 1057218165) + + W(5, 1054525467, 1028130044, 1036683414, 1016287010) + W(6, 1045972807, 1038366195, 1057638589, 1054413180) + + W(7, -1086573266, -1090476168, -1101308500, -1110319406) + + W(8, 1004824505, -1113181968, -1113770157, 1038963911) + + W(9, 1033084478, 1042818602, -1130963634, -1114724733) + + W(10, -1119658116, 1030219574, -1129447670, 1033715440) + + W(11, -1128924420, -1116035017, 1030841394, 998207538); + sum2 = W(0, -1131923124, 993280665, -1127811288, 1032538133) + W(1, 992174233, 1002762140, 1015088121, 992586073) + + W(2, 1046772351, 1028557303, -1107794670, -1098564467) + + W(3, -1121892222, 1035775413, -1158807761, 1019898989) + + W(4, -1110971235, -1134148898, 1043137579, 1067961229) + + W(5, -1095277951, -1099141736, 1011333246, -1118039147) + + W(6, -1105011941, -1104446514, -1096504482, 1066594258) + + W(7, -1107079502, -1090299346, -1132433157, -1109952821) + + W(8, 1028001687, -1147560296, -1101129935, -1104613803) + + W(9, -1097683180, -1129031039, -1108888578, 1040568125) + + W(10, -1121147281, 1024426003, 1034898623, -1136832302) + + W(11, 1036381319, 1009264906, 1032287279, -1111357043); + WS(-1105186296, -1108555742); + sum1 = + W(0, 1040483623, -1101386974, 1040816739, -1134311356) + W(1, -1108915605, 1046011615, -1102638726, 1034864637) + + W(2, -1105046378, 1038429012, 1031828819, -1107675004) + + W(3, 1034430779, -1109709717, 1030173660, -1130419297) + + W(4, -1118442722, 1042743759, -1093958439, 1030110546) + + W(5, 1044698927, -1103097566, 1041159370, -1111922822) + W(6, 1040666374, -1109750289, 1050019912, 1040526743) + + W(7, -1097473172, 1045974257, -1110364069, 1017242540) + W(8, 986662468, -1103538071, 1033262923, -1121451017) + + W(9, 1032745156, 1041021277, -1106175813, 1032931450) + + W(10, -1118942975, 1040611956, -1118385334, 1012951144) + + W(11, 1035127903, -1100748433, 1037154391, -1121991998); + sum2 = W(0, -1127162070, -1135271108, 1003505825, 1012864633) + + W(1, -1123384038, 1026898060, -1134649836, -1125848258) + + W(2, 1014979733, -1136185376, 1028363168, -1131143608) + + W(3, 1033256022, -1133517476, -1120532892, 1028008335) + + W(4, -1114518101, 1038675289, -1105093650, -1095849351) + + W(5, 1028346583, -1113528611, 1028725832, -1117113572) + + W(6, 1016203776, -1140478504, -1089585970, -1070239185) + + W(7, -1078361549, 1038794260, -1125525718, 1005296645) + + W(8, 1035296562, -1111853852, 1058969759, 1079096535) + W(9, 1068193425, 1032636395, 1017911666, 1032298564) + + W(10, -1117887634, 1030827904, -1123277259, -1116960146) + + W(11, -1125445846, -1113311282, 991223090, -1134431064); + WS(1065442623, 1015025160); + sum1 = + W(0, -1115365041, 1040496438, -1114325264, 1036061554) + W(1, 1038370800, -1112496600, 1039123015, -1120668655) + + W(2, -1113215535, 1031517084, -1109245393, 1002812769) + + W(3, 1036804053, -1099928488, 1036473513, -1116761110) + W(4, 1018550702, 1044423084, -1131936399, 1034975748) + + W(5, 1048106991, -1107673238, 1041344487, 1019812354) + W(6, 983382403, 1017412396, -1109909072, -1099246196) + + W(7, -1114225707, -1129165678, 1011581692, -1138585900) + + W(8, -1103693946, 1032083208, -1106995843, 1024898238) + + W(9, 1044531927, -1096349707, 1043676418, -1110354101) + + W(10, -1124004468, 1040563090, -1120263275, 1012000060) + + W(11, 1036150809, -1113013165, 1037117105, -1128538214); + sum2 = W(0, 1010698941, 1042699596, 1048659418, 1058578114) + W(1, 1052277576, 1019470633, 1041036392, -1119471849) + + W(2, 1023126843, -1140656688, -1095653758, -1081184698) + + W(3, -1097586600, -1098953045, 1018118885, -1122826054) + + W(4, -1106210958, 1029081919, -1118272649, 1060468587) + + W(5, 1043044817, -1108654451, 1029721267, -1130901998) + + W(6, -1122624743, 1040813712, -1097694847, 996865603) + + W(7, -1103459725, -1128239326, 1034766857, 1000734433) + + W(8, -1122034237, -1174961495, 1033474214, -1102355565) + + W(9, 1049205466, -1112376452, 1033989555, 1017824567) + + W(10, -1117968485, 1039098482, -1118912655, 1020119257) + + W(11, 1031051049, -1108144089, 1038110136, -1114988670); + WS(-1115864032, 1039792746); + sum1 = + W(0, -1140468214, 996096400, 1010495721, -1114956320) + W(1, 1013460969, -1122072970, -1127833232, -1140762151) + + W(2, -1121375417, -1108218204, -1109130991, -1115276388) + + W(3, 1040789857, -1114444905, 1026520268, -1127271361) + W(4, 1034497081, 1033625894, 1055392502, 1036829073) + + W(5, -1088431348, 1030275655, -1111886108, 1031218972) + W(6, 1039608980, 1040179481, -1093805194, 1050653566) + + W(7, 1049263376, 1051648994, -1121903450, -1112576115) + + W(8, -1113171393, -1108192053, -1127063626, -1109643980) + + W(9, -1124542831, -1129660583, 1034831154, 1019975176) + + W(10, -1132620605, 1018030914, 1012649753, -1127455572) + + W(11, -1135717173, -1147064482, -1114505023, -1135014713); + sum2 = + W(0, 1015385693, 1020691269, 1015565489, 1019337403) + W(1, -1132207064, -1131086480, -1163542113, 1010427910) + + W(2, -1115435752, -1141202058, -1111436746, 1040267853) + + W(3, -1114623303, 1025609602, -1140470334, -1128429973) + + W(4, 1035175215, -1117300235, 1033401267, -1100581516) + W(5, 1041259906, -1101351817, 1020278145, 1027629029) + + W(6, 1017441239, -1122675471, 1055273933, 1073306798) + + W(7, -1073173797, 1032997632, -1122237085, -1119509251) + + W(8, -1112773411, 1031000417, -1101083444, 1046685605) + + W(9, -1115311051, -1113329498, 1031244051, 1033297372) + W(10, 998363696, 1016207247, -1113092041, 1013448250) + + W(11, -1130370856, 1026870685, -1124843109, 1014260054); + WS(1060418110, -1122066101); + sum1 = W(0, 1031696780, -1124029560, -1147879495, -1123330286) + + W(1, -1137787811, 1025904121, -1122010711, -1131825009) + + W(2, -1115430516, -1129767372, 1033383950, 1032201307) + + W(3, -1124043956, -1135655378, 1016436406, 1011740349) + + W(4, -1104465800, -1124342772, -1084846132, 1040589380) + + W(5, 1059143665, 1047669789, 1032251443, 1017074582) + W(6, 1041576533, 1035204176, 1057835449, -1121351835) + + W(7, -1085338500, 1026196737, -1113520196, 1023559701) + + W(8, -1122042801, -1109435772, -1106791333, 1025265863) + + W(9, 1037079395, 1031201378, -1120875027, 1024540211) + W(10, 1028261758, 1025573168, 1024098613, 1000233671) + + W(11, 1020968696, -1122702819, 1016849816, -1123483709); + sum2 = W(0, -1127836624, -1173496113, 1027284157, 1022886010) + + W(1, 1036751625, -1124242822, 1017945890, -1140181115) + + W(2, 1033364881, -1127629564, -1104916712, -1097350377) + + W(3, -1105423640, 1034826035, -1122986652, 1033442485) + + W(4, -1101679270, -1105780675, -1113070309, 1061873799) + + W(5, -1096904559, -1105550966, -1127473876, -1108824717) + + W(6, 1023580345, -1119640853, -1132739235, 1062616603) + + W(7, 1009507187, -1098447862, -1116534057, -1115327874) + + W(8, -1149619356, 1033890881, -1113766541, -1112358042) + + W(9, -1119315178, 1045500652, 1015952658, 1033458469) + + W(10, -1136662275, -1122666983, 1023707939, 1028952789) + + W(11, -1117384012, -1137245711, -1136860123, 1001899534); + WS(1052277756, 1024619064); + sum1 = + W(0, 1029425189, -1155801216, 1030252033, -1112675811) + W(1, 1025211073, -1122395345, 994289396, -1124711713) + + W(2, -1120340480, -1107014752, 1038469578, 1032327591) + W(3, 1034534022, 1039704040, -1137717581, 1045346698) + + W(4, 1055903799, 1049346130, 1057427995, -1103272826) + W(5, 1040656272, -1111436999, 1016775164, -1134653325) + + W(6, -1093981294, -1099678446, -1098793998, -1090697510) + + W(7, -1103403378, 1041606598, 1005246730, 1013960373) + W(8, -1125235986, 1003141514, 1025012844, -1118748640) + + W(9, 1025335343, -1106833215, 1029370155, -1099652851) + + W(10, 1026200342, -1131584400, 1031806855, -1126277858) + + W(11, 1010568017, 1032770798, -1124454216, 1034190187); + sum2 = W(0, 1023289146, 1020437582, 1009748873, 1030199907) + W(1, 1007345789, 1010063617, 1018930146, -1127799210) + + W(2, 1045862572, 1028786959, -1120652794, 1015917080) + + W(3, -1117660774, -1106003069, 1039304637, -1098471935) + + W(4, -1095566322, -1132354231, -1090793815, 1063879108) + + W(5, -1107315270, 1045942315, -1106431667, 1048572904) + + W(6, -1087461476, -1101737384, -1083542229, 1069217208) + + W(7, 1056862945, -1153692567, -1111757292, 1056217861) + + W(8, 1016132472, -1127607289, -1107157844, -1097091848) + + W(9, -1098187278, -1116485608, 1036040005, -1094693884) + + W(10, 1044890152, -1124440021, 1037463572, 1032502373) + + W(11, 1028169917, -1108513707, 1034190881, -1108016950); + WS(-1086218302, -1084258561); + sum1 = W(0, 1027347742, 1021408948, -1127633782, 1039366096) + + W(1, -1116749595, 1033396118, -1139872292, 1025066091) + W(2, 1041008695, 1036310380, 1035524367, 1048593117) + + W(3, -1138037400, 1045683171, 1032120647, 1046369740) + W(4, 1040638149, 1041666443, 1033011818, 1050103608) + + W(5, -1111907839, -1097626581, -1099416246, -1095221944) + + W(6, -1088163117, -1103354245, -1094269524, -1122175629) + + W(7, -1096387988, -1103712029, -1111740158, -1117250695) + + W(8, 1045697689, 1033383494, 1040895122, 1042904478) + + W(9, -1136050786, 1018682217, -1126758410, -1121047845) + + W(10, -1131589088, 1033326017, -1132088142, 1042877939) + + W(11, -1131538396, 1039834587, 1030901630, 1034112696); + sum2 = W(0, -1116225927, 1030773056, -1122365237, 1032148531) + + W(1, -1127594672, 1026422036, 1023835916, -1131330830) + + W(2, 1020583173, 1018516207, -1121497633, 1032012136) + + W(3, -1111134479, -1112679629, -1123177144, -1112996078) + + W(4, -1120289762, -1142227828, 1024002380, 1050603076) + + W(5, 1040110681, -1109379336, -1102178993, -1102999759) + + W(6, -1116630862, -1157244503, 1045806125, 1061490298) + + W(7, -1104925089, -1096282325, -1103628682, -1116593585) + + W(8, 1037488437, -1120616119, -1122845130, 1029135782) + + W(9, -1122119114, 1035818151, -1118358279, 1024073644) + + W(10, -1120774706, 1021325617, -1124264052, 1020114521) + + W(11, -1129316292, -1129796491, 1024324894, -1129630471); + WS(-1092406524, -1089571522); - sum1 = W(0, -1120714617, 1007614003, -1123305414, 1009908268) + W(1, -1129324172, -1121901526, 1022915304, -1142748844) + W(2, 1035162146, -1123901891, 1027364554, 1030225939) + W(3, -1116882705, 1030599214, -1135846624, -1113117446) + W(4, -1110752415, 1046176310, -1114390527, -1110762423) + W(5, 1032526991, -1096831458, -1117077507, -1127767760) + W(6, -1121818163, -1105876799, -1096860741, 1046191054) + W(7, 1043289735, 1052908756, 1034530771, 1039462233) + W(8, 1035961863, 1034124059, 1022953826, 1023450788) + W(9, -1114841418, -1114991488, -1116257712, 1025047056) + W(10, -1120878363, -1120533037, -1155422496, -1124524780) + W(11, -1129613106, 1013731840, 981289536, -1143328976); sum2 = W(0, -1131108965, 1000650931, 1025688791, -1116852534) + W(1, -1145056307, -1123043594, 1018436747, -1139914565) + W(2, 1043465638, -1113612493, -1106748353, 1029599173) + W(3, 1045881517, -1118230687, 1029259867, -1107504809) + W(4, 988544780, -1124179817, -1070139293, -1102659167) + W(5, 1078104617, 1048052425, -1126127905, 1025133515) + W(6, -1145266179, -1111438835, -1110118136, -1097100047) + W(7, 1027449515, 1027739737, -1136112041, 1036504803) + W(8, 1018501507, 1003301507, 1026178295, 1031198431) + W(9, -1123515159, -1134523125, 995063942, -1120319069) + W(10, 1021344415, -1131299129, -1123696315, 1012522521) + W(11, 1024975831, 1020199127, -1162666156, -1131042995); WS(1061710334, -1113637247); - sum1 = W(0, -1121814583, -1138603692, -1138264845, -1140194465) + W(1, -1131439046, -1125621862, 1023073058, -1129805541) + W(2, 1036106897, -1122595552, 1018454510, 1033737112) + W(3, -1122735662, 1024541667, -1122620661, -1116003130) + W(4, -1109309294, 1048694647, 1030681928, -1115559686) + W(5, 1044326142, -1097437158, -1132933761, -1128614666) + W(6, -1143950978, -1103580007, -1097890158, -1120177173) + W(7, 1042373458, 1052689231, 1034486114, 1037961704) + W(8, 1033251382, 1032154570, 1019050396, 1021617155) + W(9, -1113884080, -1107957310, -1116304653, 1019245786) + W(10, -1118153848, -1122508490, 1012933309, -1120902675) + W(11, -1152998244, 1016843898, -1144273714, -1155701620); sum2 = W(0, -1162960213, 1013908203, -1113327157, 1023571595) + W(1, -1133790043, 1024139725, -1130037920, 1007034787) + W(2, -1107084454, 1023868855, 1032525526, 1048796624) + W(3, -1104878976, 1003078933, -1121062518, 1034207166) + W(4, -1126626424, 1032930462, 1076098019, -1132699465) + W(5, -1072747140, -1104764998, 1019199125, -1123497947) + W(6, 1025411499, 1029914487, 1035900190, -1093496445) + W(7, 1026052839, -1109617402, 1025857863, -1118139493) + W(8, -1126548940, 997665611, -1121182801, 1038561302) + W(9, -1146184997, -1142783113, -1147685381, 1017680405) + W(10, -1127269937, 1018748433, -1158385717, 1000017309) + W(11, -1123007653, -1154085627, 1000677165, 1016687665); WS(1064800702, 1030635520); - sum1 = W(0, 1004285524, -1122860750, -1122286551, 1021316024) + W(1, -1116388951, 1022533838, -1125657162, 1001429301) + W(2, -1128056492, 1024005652, 1035530413, 1053335517) + W(3, -1139373960, -1130762670, -1121564617, -1129213371) + W(4, 1035663048, 1031878036, 984521208, -1100061872) + W(5, 1048911707, 1035194194, -1131931048, -1141986761) + W(6, -1110660706, -1116831438, 1023270757, -1093549817) + W(7, -1104621327, -1149876978, -1142066105, 1031904066) + W(8, -1128734654, -1204558974, -1136886024, 1042579293) + W(9, -1148999845, -1142259845, -1120920449, -1111776529) + W(10, 1028175148, -1143857649, 1005008153, 1016449422) + W(11, -1135015336, 1010510404, 1018152308, 1020350678); sum2 = W(0, 1015139874, -1150118671, 1032711786, 1028577839) + W(1, -1116703472, -1123435694, -1140098908, -1140019692) + W(2, -1120731382, -1116737087, -1087320828, -1069693959) + W(3, -1103666133, -1138801588, 999422392, 1029210329) + W(4, 1017539382, 1031888972, 1059502015, 1078824499) + W(5, 1045823576, 1009232228, 1011539428, -1118193829) + W(6, 1020112270, -1112637962, 1026899401, -1094071548) + W(7, 1036639812, -1114441289, 1023067274, -1131442506) + W(8, -1126248934, 1010207212, -1119882792, 1040940327) + W(9, -1131804892, 1029410629, -1130083750, 1007549476) + W(10, 1013963196, -1138130380, 1019091922, -1123861886) + W(11, 1004924552, -1130443436, -1137456436, 1026106777); WS(1058942782, 1023618692); - sum1 = W(0, -1126072821, 1011693912, 1017195423, -1122268692) + W(1, 1022607903, 982545387, -1126061755, 1003763447) + W(2, 1041771492, 1025548876, 1036961985, 1037536142) + W(3, 1038780013, 1023558139, 1037537425, 1035261291) + W(4, -1110777188, 1035801072, -1094081945, -1091452759) + W(5, -1095962376, 1049096396, -1113879430, 955835050) + W(6, -1110871838, 1043278082, -1103634941, -1105679152) + W(7, -1103558179, -1130619215, -1136949949, 1026350907) + W(8, 1036426764, -1146527995, 1046715887, 1040286827) + W(9, 1049101574, 1031851588, 1002869195, 1006549707) + W(10, -1142526755, 1015953709, -1148709083, -1119884351) + W(11, -1122534754, 1007174309, 1017517093, 986004587); sum2 = W(0, -1128639222, -1150616818, -1118932607, -1125568566) + W(1, 991022546, -1129687950, 1018120650, 1006676397) + W(2, 1001638233, 1013509397, 1035428655, 1024255335) + W(3, 1031481278, -1111645244, 1028380841, -1115817479) + W(4, -1119841411, 1029376723, 1019940778, 1063260126) + W(5, 1032753418, 1026999545, -1121385219, -1134958989) + W(6, 1014385869, 1037911199, -1102461584, -1085129330) + W(7, -1110003004, 1019019978, 1021463302, 1017625850) + W(8, 986023269, 970306708, -1141011097, -1110526300) + W(9, 1033244979, -1121764803, 1025283273, -1128128990) + W(10, -1135260845, 1013815957, 959170344, 1017551658) + W(11, 1016351942, -1127392454, 1009569613, -1138867245); WS(1037837808, 1056377428); - sum1 = W(0, -1135573495, 1023727061, -1118803082, -1123500111) + W(1, 1020600341, -1111066541, 1008936161, -1115286797) + W(2, -1105969090, -1138454045, -1114548551, 1046351686) + W(3, 1036383371, 1034647805, 1026657308, 1033474888) + W(4, 1027945538, -1116105371, -1126955510, -1122693874) + W(5, 1053007142, 1035274060, -1156882549, -1122516460) + W(6, 1043223694, 1039709953, 1051232418, -1100833583) + W(7, -1103740252, -1101618713, 1033814037, -1129756781) + W(8, -1100584037, -1111027805, -1097887999, 1040497066) + W(9, 1024768136, -1138012047, -1123286772, 1010716753) + W(10, 1025656415, 1021082723, 1018764077, -1115071549) + W(11, 1025234525, -1116333124, 1016891995, -1114238799); sum2 = W(0, 1020978875, -1121670462, -1103298336, -1110731474) + W(1, -1105136733, -1119790394, -1127100056, 999102475) + W(2, -1100967324, -1130583533, -1096736455, 1053186254) + W(3, 1045958215, 1043729632, 1033816035, 1020355333) + W(4, 1043179705, -1122981488, 1062358565, -1112891412) + W(5, 1031005462, 1033523405, -1109547546, -1122874410) + W(6, 1026290891, 1032797386, -1104401130, -1091109944) + W(7, -1133705481, -1102472853, 1031959215, -1122532081) + W(8, 1013196277, -1120286641, -1114052290, 1048200412) + W(9, -1113847816, 1033631266, -1126209517, 1027184816) + W(10, -1138979633, 966672716, -1152672163, -1122242747) + W(11, 1023845499, -1118586102, -1147194793, -1127245018); WS(1050865148, 1032626572); - sum1 = W(0, -1110436993, 1040042556, -1107890608, -1113356418) + W(1, 1034032544, -1105712244, 999321197, -1110201208) + W(2, 1043627312, -1101476879, 1049906138, -1109025830) + W(3, -1120749707, 1051482110, -1103830356, 1048543054) + W(4, -1118552440, -1108049256, -1107535228, -1162779891) + W(5, -1095271743, 1035225158, -1104074479, -1106449960) + W(6, 1045700875, 1033317691, -1115480133, 1049713062) + W(7, 1042738346, -1122810712, 1050825756, 1031488492) + W(8, -1115060612, 1031191472, 1025393771, -1106727656) + W(9, -1127443770, 1039553933, -1104755066, 1040395170) + W(10, 1040430485, -1096410465, 1042925123, -1114330616) + W(11, -1106585845, 1042770518, -1103219825, 1009713998); sum2 = W(0, 1024944996, 1020193961, 1014966838, 1015828472) + W(1, -1113612636, 1032934706, -1116105943, 1021395599) + W(2, -1111924540, 1007347355, -1116539066, 1012341552) + W(3, 1034050704, -1113193567, 1042584092, -1116104243) + W(4, -1107012648, 1049541488, -1107447750, -1109770505) + W(5, -1111197863, 1014122612, 1016467631, -1135977693) + W(6, 1057346437, 1054940943, 1059914271, 1066972918) + W(7, 1063973493, 1056333532, 1045837843, 1043326701) + W(8, -1105023593, -1097277462, -1142156696, 1043445868) + W(9, 1017043379, -1118781870, -1113967259, -1117573943) + W(10, -1097010076, -1107952444, -1085761230, -1078926120) + W(11, -1084679416, -1092348708, -1128370702, -1108363561); WS(1054811644, 1027249161); - sum1 = W(0, 1040940111, -1100243090, 1043481462, -1119365790) + W(1, -1111878381, 1044780378, -1101638738, 1035929492) + W(2, -1101358582, 1036221579, 1014943347, -1109150355) + W(3, 1036687602, -1109759237, 1028424107, -1122643648) + W(4, -1113576073, 1038054901, -1097482896, -1106696288) + W(5, 1022736646, -1097940765, 1038478827, -1103674991) + W(6, 1049141881, -1105999252, 1051960356, 1048275427) + W(7, -1118345360, 1049145430, -1113101949, 1037909315) + W(8, -1132483205, -1104538698, 1036681666, -1123112824) + W(9, 1019000333, 1044474573, -1103799553, 1037239602) + W(10, -1116754862, 1037145120, -1118995685, -1126689554) + W(11, 1038634998, -1101292557, 1034438878, -1126283866); sum2 = W(0, 1023766557, 1015306072, -1136376403, -1113107482) + W(1, 1038494252, -1113447013, 1023629411, 1024784188) + W(2, 1016554458, 1020144724, -1116988904, 1050162106) + W(3, -1104347057, 1003678517, 1033762395, -1109441777) + W(4, 1021815988, -1103467660, 1034521268, -1102546407) + W(5, -1103257907, 1025406308, -1113239746, 1026085595) + W(6, -1137435361, 1016553974, 1060795535, 1075503478) + W(7, 1072821349, -1113972415, 1031283975, 1037416902) + W(8, -1114856427, 1044555388, -1088370625, -1071164689) + W(9, -1077590614, -1112563255, 995356642, -1112537357) + W(10, 1032032312, -1105991725, -1127777881, 1035804901) + W(11, -1120944929, 1036418439, -1123929939, -1140753514); WS(1055940220, -1124188157); - sum1 = W(0, -1113999823, 1015763751, -1115168029, 1022861696) + W(1, -1114461725, -1113117029, 1032164308, -1121800074) + W(2, -1119569210, 1043278990, -1094609330, 1044340802) + W(3, 1033154083, -1106205123, 1021089498, -1107756257) + W(4, 1015781094, -1106256755, 1052603679, -1095339972) + W(5, -1108564239, 1042723938, -1125646960, -1171638077) + W(6, -1147977024, 1046188556, 1041916962, -1108128054) + W(7, 1057380776, -1095928258, 1041299799, 1027517543) + W(8, -1117684995, 1036478965, 1033522483, 1047885622) + W(9, 1036635468, 1035899866, -1139371156, -1131185034) + W(10, -1108336232, 1006648968, -1101485874, -1110457109) + W(11, -1114614567, -1108232826, 1034599135, -1115299047); sum2 = W(0, -1120118804, 1013270421, -1120552641, -1117027367) + W(1, 1016254148, -1110780393, 1030787932, -1133117880) + W(2, -1136369034, 1027353114, -1125884733, 1035518448) + W(3, 1043285424, -1129166063, -1140216380, -1120569751) + W(4, -1127959037, -1129939889, -1117793827, -1095769137) + W(5, -1095274849, -1117623897, -1128598589, -1129927717) + W(6, 1015341014, 1034464346, -1117887441, 1043338219) + W(7, 1049861339, -1128474295, 1032330618, 1018695247) + W(8, 1033327798, 1041751325, 1049619624, 1049093640) + W(9, 1048627700, 1035712363, 1035384499, 1027229852) + W(10, -1113602386, -1114857235, -1103868602, -1097589990) + W(11, -1105817123, -1106207094, 1004274322, -1120123842); WS(1057163582, 1025817537); - sum1 = W(0, 1037167835, -1121852313, 1034559945, 1027044951) + W(1, 1039868489, 1037598609, 1007581568, 1031626785) + W(2, 1031655825, 1046829204, 1038865075, 987855837) + W(3, -1105739319, -1112419405, 1015381448, 1037495244) + W(4, -1096271953, -1093143228, -1085198281, 1050118415) + W(5, 1057217669, 1047779928, 1035805671, 1015872616) + W(6, 1045084224, 1046279122, 1057074420, 1040167170) + W(7, -1086525384, -1087515051, -1098926044, -1124461564) + W(8, 1033520525, -1103697416, -1107583934, 1024989936) + W(9, 1034776664, 1044254259, -1134624240, -1113253902) + W(10, 1016078798, 1030441081, 1036548879, 974934139) + W(11, 1034922130, -1113468278, 1036139674, 1017385676); sum2 = W(0, -1136384493, 1023814631, 1033659202, 1017810163) + W(1, 1009967381, 1017781727, -1118180802, -1141961139) + W(2, 1036526786, -1099879539, 1034332004, 1013925981) + W(3, -1106516740, -1101585218, -1131784625, -1112082470) + W(4, 1028719631, 1030342243, 1046758826, -1093436173) + W(5, 1051870663, 1042788701, -1114786901, -1113417628) + W(6, -1123632092, 1043747658, 1050427835, -1100447595) + W(7, 1041231786, 1045006578, 1010389989, -1117114477) + W(8, -1145098603, -1102737141, -1112406001, -1119158797) + W(9, 1034292458, -1107586767, -1143106923, 1032164569) + W(10, -1122280548, -1130733945, 1014543213, -1123106681) + W(11, 1019870319, 1013053757, -1119739637, 1012643781); WS(-1081763615, -1092598780); - sum1 = W(0, -1130922677, -1122514966, -1136114845, 1015393433) + W(1, -1137786397, 1023447063, -1142668459, -1140705115) + W(2, -1114318275, 1026414532, 1017564438, 1021344041) + W(3, -1095352826, -1116164091, -1123782681, 1012849463) + W(4, -1127997567, -1121576984, -1088500870, -1098266094) + W(5, 1061435536, -1148866211, 1039772482, 1024002537) + W(6, 1028467828, -1127358845, 1061837768, 1035926493) + W(7, -1095349722, -1114585584, -1114677716, -1116920883) + W(8, 1023598927, 1017010415, -1116904946, 1037585875) + W(9, 1040151266, 1031742785, 1007413957, -1128808099) + W(10, -1139280241, 1002425203, -1123987795, -1136160989) + W(11, -1131570741, -1116123969, 1020007637, -1126849079); sum2 = W(0, 1021958137, -1117837187, 1026312941, -1115513274) + W(1, 1037370871, -1112242113, 1033686533, -1107052036) + W(2, 1034408685, 1036094407, 1033683959, -1124949158) + W(3, -1091894288, 1027011647, -1107943508, 1010459130) + W(4, -1106091708, -1147105428, -1094093059, 1061646324) + W(5, 1058519893, -1106569597, 1037199097, 1030945863) + W(6, -1098486326, -1110159069, -1088187103, 1055280585) + W(7, 1045036050, 1045721104, 1034325435, 1039418143) + W(8, 1028745769, 1031154711, -1105512708, -1100598283) + W(9, -1109806227, -1117345257, -1144974476, -1124920152) + W(10, 1024682325, -1116910593, 1039406737, -1117359799) + W(11, 1026080301, -1117139296, 991455177, -1112397476); WS(1049187708, 1061143407); - sum1 = W(0, 1032206028, -1121811767, 1032297330, -1136528167) + W(1, 1005295654, 1031846890, -1136496587, 1032778794) + W(2, 1046469409, 1033070593, 1049873346, 1050639650) + W(3, 1041319460, 1041542033, 1036329419, 1043869975) + W(4, 1044620591, 1020031147, -1111788011, -1089881571) + W(5, -1127219501, 1033106537, 991465499, -1112918515) + W(6, -1086760535, -1113096094, -1092765627, -1083700943) + W(7, -1092202425, 1011207027, 1015330121, 1026637361) + W(8, 1049164066, -1132628435, 1051671486, 1044844041) + W(9, 1037004161, 1026161438, -1134373563, 1019220893) + W(10, 1022165171, 1009114287, 1034535880, 1024964917) + W(11, 1011091807, 1038185039, 1018308239, 1032129132); sum2 = W(0, -1123557888, -1139662759, -1120649660, 1021700488) + W(1, -1119215196, 1025758640, -1134635263, 1025396502) + W(2, 1024038368, 1022314468, 1016353740, -1149024430) + W(3, -1127462832, -1109226016, 1022909500, -1106595065) + W(4, 964997605, 1007232143, -1112234110, 1052797849) + W(5, 1032636001, -1096670151, -1103246759, -1095307242) + W(6, -1123190992, 1040909663, 1035500288, 1059925772) + W(7, -1123381020, -1110751472, -1114698588, 1016687528) + W(8, -1170950771, -1119781676, -1122589660, 1021322900) + W(9, -1124742672, 1037872214, -1123038904, 1025386190) + W(10, 1009594487, 1011035391, -1122952928, 1015425876) + W(11, 1024335912, -1115520552, 1026100870, -1128653768); WS(-1080960863, 1058419411); - sum1 = W(0, -1118700722, 1032983255, -1122641758, 1033094082) + W(1, -1121000958, -1133773881, -1141737059, -1120123854) + W(2, -1111066847, -1112208894, -1099579692, -1090459763) + W(3, -1107128402, -1107276892, -1116550064, -1103159313) + W(4, 984198859, -1099715572, 1049479517, 1062349342) + W(5, -1099059374, 1033076198, -1125040759, 1037355536) + W(6, 1043965403, -1113907195, 1050672334, 1061713555) + W(7, 1059583022, -1121054998, 1024684126, 1026513898) + W(8, -1108783427, -1113914551, -1105019770, -1095692932) + W(9, -1099879371, -1108028092, 1024533736, -1110661700) + W(10, 1000248987, -1124179019, -1128328213, 1023738862) + W(11, -1131329315, 1027231576, -1113801521, 1007580489); sum2 = W(0, 995719700, -1114687459, 1024240180, -1115935924) + W(1, 1025427056, -1123062148, 999440794, -1131128122) + W(2, -1129327601, 1021214658, 997717028, 978476974) + W(3, -1140454571, 1010080137, 1002330866, -1152746908) + W(4, -1125739202, 1027238209, -1120915257, -1078687396) + W(5, -1111104335, -1128218828, -1147995690, -1128526870) + W(6, 1029045123, 1037994429, 1036441804, 1066236156) + W(7, 1022342240, 1028953671, 1024857232, -1126806088) + W(8, 1025141617, -1113434302, 1032546636, 1035362808) + W(9, 1033358736, -1166615662, -1165027863, 1024563904) + W(10, -1154923092, 1007115993, 1003062850, -1118441375) + W(11, 1017727844, -1129048983, 1019778022, 1009762473); WS(-1082880574, -1095080656); - sum1 = W(0, -1120399523, -1123397517, -1135116544, -1168421181) + W(1, -1131259136, 1037719400, -1113348279, 1030740495) + W(2, -1111736159, -1128373242, 1025039907, -1127950106) + W(3, -1098593297, -1106461210, 1024172498, 1028249110) + W(4, -1111698940, 1019423266, -1092929515, -1098702381) + W(5, 1060991762, 1018082074, 1036669022, -1114016273) + W(6, 1037879414, 1028697399, 1062200098, 1023355236) + W(7, -1094756999, -1115312044, -1106391640, -1109491766) + W(8, 1037066756, -1119567216, -1131199446, 1027882718) + W(9, 1038218895, 1022591155, -1118950442, -1141634128) + W(10, -1118524245, 1027581671, -1139610160, -1139561152) + W(11, 1005358744, -1113962893, 1026868195, -1126548648); sum2 = W(0, 1007561151, -1118820752, -1128452386, -1121745985) + W(1, -1114861795, -1121584151, -1120079456, -1129117272) + W(2, -1106890729, 1031296198, -1110907715, 1032462388) + W(3, 1035962501, 1035033495, 982365671, 1035951051) + W(4, -1144059530, -1112391017, 1056169840, -1096446085) + W(5, 1031556034, 987032615, -1138039959, -1119286197) + W(6, 1010626011, -1114880200, 1052511775, -1094697844) + W(7, 1056886944, -1129701328, 980377326, -1126702674) + W(8, -1140206187, -1136937433, 1034255225, 1033067226) + W(9, -1104164393, 1013679559, -1115367679, -1137374445) + W(10, -1152147163, -1120281594, -1114711449, 993102387) + W(11, -1120009849, -1122813977, -1118918686, 1006646323); WS(1058795070, 1058351276); - sum1 = W(0, -1129226172, 1032650784, -1134152362, -1122312016) + W(1, -1145265749, -1118796187, 1017891569, -1131365684) + W(2, -1106246658, -1132244111, -1102574120, 1028257624) + W(3, 1004724909, -1128587973, -1150296521, -1107598873) + W(4, 1041614193, 1048401911, 1050141831, -1088102699) + W(5, -1093353638, -1112285036, -1122892680, -1120116589) + W(6, 1041522846, 1041471472, 1033352747, 1030285885) + W(7, 1056599450, 1049138668, 1043369227, 1040658126) + W(8, -1106977045, 1031926461, -1121658742, -1124591186) + W(9, -1107883356, -1114299650, -1130730345, 1034850634) + W(10, -1140271486, -1130658915, -1115542891, -1119417531) + W(11, -1155196377, 1003429157, -1125742491, -1137642362); sum2 = W(0, 1018264792, 1019878456, 1020074080, -1143212320) + W(1, -1148803168, -1127882992, 1017409120, -1131165168) + W(2, -1109681111, -1134735936, -1106212594, 1037009054) + W(3, -1132238328, 1032289620, 990246720, 1004132864) + W(4, 1028734812, -1093714299, 1034660210, 1047288883) + W(5, 1031452084, -1112028563, -1115169564, 1020410832) + W(6, -1104210606, -1099909667, -1098896203, 1060586916) + W(7, 1042866381, -1109339206, 1023925320, -1112228144) + W(8, 1025579416, 1032419228, 1010388000, 1030286292) + W(9, -1107165918, 1019446368, 989303425, 1029468412) + W(10, 1029305888, -1113499692, 1025031188, -1130541520) + W(11, -1127563664, 1026005920, -1123344268, 1009015280); WS(1043816952, 1056206353); - sum1 = W(0, -1132948972, 1024900084, -1150680698, 1016358425) + W(1, -1160444148, -1115326124, 1019123435, -1120343575) + W(2, -1115461859, -1107172113, -1097009915, -1102438229) + W(3, 1024199571, -1148961467, -1130723763, -1144998513) + W(4, 1034684352, 1038250028, 1061195075, 1051576078) + W(5, -1088975285, 1010078034, -1107729186, 1022450003) + W(6, 1037119642, 1039626815, -1095598055, 1056604231) + W(7, 1057172865, 1029312608, 1039764276, -1112740858) + W(8, -1107088436, -1110088668, -1113187289, -1102307882) + W(9, -1103288997, -1122873761, 1032878393, 1034042985) + W(10, -1144087781, -1116127087, -1117712852, -1114968828) + W(11, -1134624362, 1012385158, -1114736693, 1002559709); sum2 = W(0, -1135809827, -1140675011, -1137384851, -1147988326) + W(1, 1019754904, -1172160176, 1006265798, 990940844) + W(2, 1031002711, 1009667595, -1156373420, 1010789683) + W(3, -1122328477, -1135703979, 1016962928, -1135289651) + W(4, -1136033931, 1025543601, -1123691685, -1105219090) + W(5, -1118075063, -1111692677, -1131889022, 1010975355) + W(6, -1114169661, -1124911966, 1056449903, 1044469394) + W(7, 1046754031, 1034039333, 1024901767, -1111731157) + W(8, -1107967067, -1108557845, -1096033675, -1118550723) + W(9, 1020360378, 1005574774, 1025982043, 1036125487) + W(10, 1027957130, 1004385158, 1015282250, -1115586301) + W(11, -1120013003, -1130287690, -1127946498, -1169619760); WS(1049886076, 1034318367); - sum1 = W(0, -1140544720, 1006465467, 1025588553, -1118154094) + W(1, 996302474, -1111905052, -1125961261, -1122979160) + W(2, 1022042959, -1106141033, -1104462159, -1100197533) + W(3, 1029682164, -1112348667, -1117824763, -1123002959) + W(4, 1021784769, 966352080, 1059646174, 1051735858) + W(5, -1085821075, 1029991670, -1111853843, 1031029131) + W(6, -1131075883, 1032360624, -1086897182, 1059041662) + W(7, 1057694799, 1036690656, 1030592201, -1155480906) + W(8, -1142660485, 1029412697, 1042100949, -1106256562) + W(9, -1106530518, -1121375714, 1017125029, 1014936522) + W(10, -1160888244, -1121174096, 1017231209, -1150715274) + W(11, 1024907260, 1015672965, -1120397516, -1124207481); sum2 = W(0, -1110846605, -1118194097, -1143898437, 1019764549) + W(1, 1008325379, 999139301, 997400075, 1024689095) + W(2, 1032320648, 1027617785, 1000132981, -1102116254) + W(3, 1026769453, -1131321177, 1008054267, -1118482095) + W(4, 1027180085, -1144235925, 1031570329, 1058710858) + W(5, 1031557517, -1129394727, -1131248001, -1120094485) + W(6, 1029603693, 1030911393, -1119293171, 1058397441) + W(7, -1104852463, -1097515304, -1123234663, -1106708620) + W(8, 1033577170, -1134544211, -1126616369, -1096477332) + W(9, -1103611475, -1115701158, -1119810656, -1134741251) + W(10, -1112673640, -1123614185, -1129974305, 1018853209) + W(11, 1036639048, 1018401637, 1024391909, 1017993989); WS(1058429118, 1064863249); - sum1 = W(0, 1021156518, 1024394983, 1020219616, -1131178666) + W(1, 1016100640, 1007341951, 994206685, 964693930) + W(2, 983001563, -1115666854, 1039490386, 1046648007) + W(3, 1017911734, 1043757188, -1128564932, 1045299318) + W(4, 1054264097, 1049468028, 1055450440, -1104196510) + W(5, 992909240, -1104186631, -1122765517, -1105268364) + W(6, -1097350840, -1099725362, -1115114453, -1085827188) + W(7, -1100930382, -1128095056, -1118903024, 1023907580) + W(8, -1175841770, 1024539904, 997016493, 1043203980) + W(9, 1033767351, -1119513801, -1131557236, -1106015350) + W(10, 964768362, -1149024695, 1012766063, 1022718592) + W(11, -1136266891, 1035657141, 1016423590, 1029236260); sum2 = W(0, 1005391535, 1018096716, -1135288360, -1121648533) + W(1, 996459166, -1130245220, -1123582348, 1033610651) + W(2, -1106696811, -1121907467, 1026226746, 1031305614) + W(3, 1032267559, -1111725659, 1028271694, -1108648643) + W(4, 1050575955, 1051313221, 1047952814, -1081952512) + W(5, 1040750694, 1031579134, 1007524520, 1034169827) + W(6, -1100156814, 1060662591, 1068258385, -1074657656) + W(7, -1095314974, 1036231973, 1044691450, 1044987108) + W(8, -1098256564, 1029467086, -1142136447, 1051661425) + W(9, 1035381379, -1107714686, 1022647148, -1100160947) + W(10, 1028021126, -1129063320, -1131820748, 1008147176) + W(11, -1168737402, 1020424408, -1129848660, 1024695710); WS(-1083443454, 998713176); - sum1 = W(0, 1032696047, 1026355790, 1024354795, 1034252472) + W(1, 1012872363, 1032281789, 1027888345, 1016830581) + W(2, 1040709994, 1040668906, 1036398699, 1051748575) + W(3, 1035129883, 1044965075, 1031400701, 1048807512) + W(4, 1040929033, 1032459486, 1012490030, -1097753826) + W(5, -1114537329, -1100635790, -1103916602, -1098186970) + W(6, -1087559501, -1103862520, -1102928053, -1093112676) + W(7, -1092883854, -1099081393, -1104850339, -1112352715) + W(8, 1049786774, 1037090637, 1047072725, 1050721065) + W(9, 1039546228, 1011437457, 1009955155, -1114553850) + W(10, -1146095614, 1033091241, 1024780023, 1037512365) + W(11, 1024615038, 1037286498, 1036010231, 1033653631); sum2 = W(0, 1035861958, -1161872500, -1113290777, -1137177342) + W(1, 1034613598, -1103597964, 1038756242, -1111249467) + W(2, -1098289107, -1119785106, 1033623622, 1045692473) + W(3, -1106425659, -1117971136, 1019694879, -1114046628) + W(4, 1026686124, 1049978705, -1098402027, 1024040864) + W(5, 1053770527, -1127968019, 1049090628, 1052585151) + W(6, -1096172460, -1107697817, -1091833397, 1013412750) + W(7, 1046082211, 1057075430, 1047895771, -1102204197) + W(8, -1113513064, 1027596044, -1098063812, 1031079096) + W(9, 1012376430, -1095279992, 1045163433, -1100444173) + W(10, 1019723703, -1121217732, 1033198374, -1107709748) + W(11, 1038124498, -1112843267, -1120386622, 1034273086); WS(-1086369662, -1078015058); - sum1 = W(0, 1024279387, -1100066053, 1039589628, -1104719791) + W(1, -1113174076, -1125885710, -1111149795, 1024511623) + W(2, 1042615374, -1106499229, 1049518735, -1115604002) + W(3, -1127523972, 1049389829, -1102519338, 1042212335) + W(4, 1054091094, -1113085188, 1050614757, -1103236040) + W(5, 1047781504, -1108066658, 1041501469, 1048227542) + W(6, -1095346029, -1103658220, -1097514477, -1100854410) + W(7, -1097175852, 1026529231, -1104719494, -1099302313) + W(8, 1050416872, -1111759460, 1048966812, -1122281110) + W(9, 1043919349, 1032565272, 1027138593, 1036646913) + W(10, -1113840294, -1141254386, 1032485602, -1113439156) + W(11, -1119980106, 1039770777, -1104628175, 1032389542); sum2 = W(0, -1092512531, -1093473489, -1081280020, -1076988140) + W(1, -1081342477, -1086556968, -1107875638, -1121362077) + W(2, -1098738343, -1088925799, 1042571962, 1043337888) + W(3, 1033193400, 1010020371, -1102154130, -1126174974) + W(4, 1063615535, 1062426368, 1067242406, 1066811408) + W(5, 1066297017, 1060290312, 1052028857, 1037059936) + W(6, 1009177475, 1026425874, 998962046, -1096426756) + W(7, 1022780060, 1026166068, 1017583668, -1119102823) + W(8, -1119750726, 1012050343, 1025403720, -1129159374) + W(9, 1023044380, -1115355073, 1026819560, -1127777686) + W(10, 1030476954, -1120184004, 1031881676, -1116680638) + W(11, -1135563955, 1004442726, 946141982, 1034449190); WS(-1129707456, 1007685382); - sum1 = W(0, -1134894751, 1044154267, -1111052786, 1038046773) + W(1, -1192544411, -1128991721, 1035051017, -1122391156) + W(2, -1098519836, 1030394337, -1107874844, -1118678129) + W(3, -1111301144, -1115520098, 1015910765, -1103669035) + W(4, 1040671079, 1013414869, -1103276809, -1098881194) + W(5, -1110522396, -1107120076, 1033063921, -1111557603) + W(6, 1044073412, 1041040503, 1038778991, -1107344299) + W(7, 1048980909, 1052479600, 1049200001, 1051478546) + W(8, -1109530006, 1035376874, -1105794628, -1139987723) + W(9, -1108336721, -1112546431, 1023468750, 1018554845) + W(10, -1134057701, 1028314315, -1112561176, 992777541) + W(11, -1112828028, -1132207293, 999515194, -1119107128); sum2 = W(0, -1137824107, -1126465818, 1005362547, -1158497445) + W(1, -1119770196, 1021074352, -1138544392, 1034621027) + W(2, -1110122518, 1033612180, -1116868027, 1039739597) + W(3, 1037998233, -1112420682, 1025116420, -1120069279) + W(4, -1137081539, -1106894813, 1045165527, -1106604299) + W(5, -1138568822, 1039276731, -1104439342, -1113398206) + W(6, 1039063461, 1047362418, 1067526191, 1071589227) + W(7, 1061860820, -1077533661, -1073373771, -1094637864) + W(8, -1158403413, -1117814890, -1136640937, 1044031039) + W(9, -1133800170, 1031122886, -1106196407, 1031315442) + W(10, -1124656554, 1023374342, -1115663214, -1148955153) + W(11, -1123862185, 999718675, -1139157324, 1025985948); WS(1033791472, -1138498893); - sum1 = W(0, -1143657507, 1014781903, 1029133201, 1018917100) + W(1, 998381448, 1016812674, -1118545247, -1137667330) + W(2, -1102835756, -1108646231, -1108649138, -1106020223) + W(3, 1003669472, -1113672010, -1114481905, -1127743919) + W(4, 1024249065, -1130492804, 1008218246, 1054388603) + W(5, -1092207694, -1132293500, -1112726231, -1119594899) + W(6, 1044190455, 1021681616, -1115040338, 1058377694) + W(7, 1046387590, 1049136345, 1017676194, -1119541597) + W(8, -1107762844, -1112711792, 1019895480, -1123372431) + W(9, -1111491159, -1123523050, -1123035739, 1028355707) + W(10, 1008291608, -1128240816, 1025256893, 1014932868) + W(11, 1007448080, 1008489644, -1112884042, 1001709096); sum2 = W(0, 1014470821, -1120344268, -1114330800, -1106002658) + W(1, 1024999308, -1126273593, -1127411221, -1123975632) + W(2, -1106657768, -1106292024, 1049926230, 1054743689) + W(3, -1112305858, -1122403994, -1116955850, 1030827431) + W(4, 1034825132, 1032219219, 1057813788, 1049962194) + W(5, 1051810194, 996027236, 984340447, -1114551452) + W(6, 1006830039, -1101139832, -1102461554, -1103958261) + W(7, -1098680718, 1040205712, -1109277736, -1111240037) + W(8, -1104684796, -1114556582, -1111886229, 1050373872) + W(9, 1035894797, -1100520246, 1014102650, -1106040331) + W(10, -1124459301, 1008940380, 1029827978, -1133852786) + W(11, -1109427720, 1024816377, -1117160816, 1003623824); WS(1060089726, 1074996161); - sum1 = W(0, -1117558175, 1029119241, -1141503452, 1010789890) + W(1, 1037081758, -1120312844, 1027150698, 1021223318) + W(2, 1046443391, 1039893948, 1041448800, 1035829124) + W(3, 1041862066, 1028398131, 1038548631, 1047369209) + W(4, -1083103171, -1093429938, -1088988175, 1032282738) + W(5, 1035486030, -1142682180, -1111879740, -1105296983) + W(6, 1040714346, 1021452133, 1048946821, 1021975771) + W(7, 1040332065, -1090323173, -1112512014, -1095359061) + W(8, 1044503809, 1030628716, 1038933875, 1038764813) + W(9, 1032949373, 1041097413, 1029165798, 1046714577) + W(10, 1023987857, 1027140771, 1026874120, 1026271340) + W(11, 1034225415, -1113760891, 1028642719, -1134282558); sum2 = W(0, -1130708327, -1156760442, 1011385503, 1005458237) + W(1, 1015786967, -1121500080, 1023017975, 1008927007) + W(2, -1131662151, -1160974837, -1122121264, -1104012424) + W(3, 1025508020, -1106371091, 1032453110, -1107217673) + W(4, 1027052068, -1119116140, 1006726095, 1053197196) + W(5, -1150821434, 1032166230, -1116451706, -1143126685) + W(6, -1104634511, -1148640061, -1101053828, 1050251696) + W(7, 1043632028, -1115585403, 1032061998, 1045664978) + W(8, 1037500532, 1028521640, 1041142688, -1121651972) + W(9, -1105814552, -1117076340, -1120517680, -1103126409) + W(10, -1117309872, -1132259207, -1129219511, -1148190653) + W(11, 1005943453, -1120252400, 1024831312, -1126349911); WS(-1081408895, 1057237802); - sum1 = W(0, 1016592219, -1135569700, 1019913045, -1134722237) + W(1, 1020337237, 1019971573, 1015924209, 1027236154) + W(2, 1050393725, 1025037351, 1042967287, 1042881918) + W(3, 1033060699, 1038993285, 1028206140, 1041649576) + W(4, -1096948620, 1021729271, -1123436299, -1107250293) + W(5, 1047123275, 1037602588, 1034435092, 1044331459) + W(6, -1099121222, -1114451665, -1104931659, -1092679092) + W(7, -1098053310, -1091584672, -1092984777, -1089381821) + W(8, 1042338077, 1033766733, 1036822635, 1042265522) + W(9, 1043883210, 1042862441, 1029491668, 1042007901) + W(10, 1026859007, 1023688209, 994107237, 1017621813) + W(11, 1033354815, -1147819555, 1035642823, 963919445); sum2 = W(0, -1127968320, 1036537032, -1112917417, 1020801080) + W(1, 1019615648, -1127208552, 1023589392, -1121416948) + W(2, -1092240358, 1030963640, -1097473768, -1109029546) + W(3, -1116583352, 1028578068, -1113404853, 1041090246) + W(4, 1057542400, -1105653780, 1053316325, 1058712288) + W(5, 1043894648, -1093453186, 1034405836, -1094394128) + W(6, 1016463192, -1097383073, 1038201480, 1052867895) + W(7, -1123129572, -1141347136, 1006791808, 1037486484) + W(8, -1094079665, 1040975890, -1101066411, 1013027424) + W(9, -1118015492, -1117505000, -1111557726, 1044943944) + W(10, 1037438732, 1019305024, 1028884816, 1010261952) + W(11, 1027371024, -1113360113, 1034904608, -1104647878); WS(-1076745215, 1064070508); - sum1 = W(0, 1019383636, -1138649581, 1029667879, -1127653591) + W(1, 1027426151, 1022926189, 1004012708, 1016119660) + W(2, 1043437193, 1030286820, 1040316179, 1048543741) + W(3, 1029763954, 1039947283, 1025479577, 1041368223) + W(4, -1100863080, 1033675388, -1102225484, -1084687593) + W(5, -1111626052, 1026499316, -1122995300, 1026134601) + W(6, 1019299111, 1027453406, -1110289513, -1082029409) + W(7, 983973284, 1031954367, -1118301831, 1016553765) + W(8, 1041928012, 1031909515, 1043406276, 1044625233) + W(9, 1044339921, 1039879984, 1022733729, 1012331970) + W(10, 1018313975, -1136380470, 1026991539, -1136351578) + W(11, 1027257018, 1022902273, 1026798858, 1027440187); sum2 = W(0, -1131696089, 1008718823, -1149188438, -1140198251) + W(1, -1129603953, 1007479839, -1141563590, 1018622169) + W(2, 1012459087, -1135045027, -1118135053, 1033739972) + W(3, -1142742214, 1015462815, -1126906553, -1135479163) + W(4, -1112347233, -1125420309, 1041969824, 1052325325) + W(5, 1029891118, -1120072309, 1017890385, 1016803437) + W(6, 1039451269, -1110618065, 1024383435, 1052741682) + W(7, -1121721953, -1095903036, -1105015368, -1098230278) + W(8, -1158657302, 1007247903, 1015365577, 1034080133) + W(9, -1110453303, -1162307222, -1122346803, 1021342643) + W(10, -1129595249, 999949222, -1130814181, -1140370763) + W(11, 1020979581, -1132990667, 1008938691, -1137891195); WS(-1094563452, 1051169575); - sum1 = W(0, -1154622990, 1029393378, -1117775501, 1033996553) + W(1, -1136342582, 1018491963, 1032030382, 1015760954) + W(2, 1018130713, 1035203109, -1113234793, -1106127031) + W(3, -1114335628, -1112874623, 1023872671, -1110067034) + W(4, 1040428881, 1032896399, 1049040808, -1112183035) + W(5, 1048012112, 1044702279, 1032915973, 1035998904) + W(6, -1120902330, 1039535746, -1102863965, -1090463816) + W(7, 961094679, -1098092733, 1033184663, 1029109433) + W(8, -1113835211, 1021002107, -1107942972, -1105904891) + W(9, -1148767576, -1124337998, 1006151100, -1110631596) + W(10, 1019319877, 1036762426, -1135300966, 1040237135) + W(11, 1032716400, -1137189022, 1037223880, 1013167238); sum2 = W(0, 1028413027, -1120070369, 1012563150, -1111535757) + W(1, 1011962278, 1009477102, -1153318262, 974823129) + W(2, -1121092135, -1130520759, -1120753460, -1097640928) + W(3, -1098008778, 999766235, -1122757360, -1129371381) + W(4, 1017337743, 1037177446, -1106734706, 1051800696) + W(5, 1035782812, -1106510696, 1010623246, 1028214977) + W(6, -1137632446, -1104029390, 1048794348, 1050145982) + W(7, -1114477541, 1047938812, -1111017918, -1123769252) + W(8, 999930971, 1033133092, -1111694827, 1041394994) + W(9, -1118971109, -1127041263, 1020162151, -1124747017) + W(10, 1027578971, -1114678191, 1030530567, -1119592910) + W(11, 1001961835, 1024176823, -1115486877, 1007999422); WS(1057759166, -1088449289); - sum1 = W(0, -1129635066, 1025446574, -1144519245, 1017539549) + W(1, 1026395242, -1122678010, 1025431320, 1024557433) + W(2, 1044873994, 1037627635, 1041685209, 1033910011) + W(3, 1039651930, 1035038013, 1039320958, 1038868536) + W(4, -1087389198, -1097118359, -1089578865, -1129568803) + W(5, 1010566389, 1030050103, -1110122707, -1112614790) + W(6, -1116302487, 1022204225, 1022125102, -1105345911) + W(7, -1122366800, -1097803608, -1119554634, -1126568107) + W(8, 1048786726, 1039292653, 1045860709, 1045748274) + W(9, 1027479143, 1033498658, -1161253492, 1010965710) + W(10, 1017011581, 1021371937, 1025212662, 1031878029) + W(11, 1013486066, 1007580942, 1028360045, 1015223035); sum2 = W(0, -1162281894, -1124547002, -1150111187, -1135956229) + W(1, -1145634890, 965409433, -1123274137, 1024679583) + W(2, 1033992046, -1149048922, 988741862, -1132541637) + W(3, -1132011310, 1024739191, 1008671821, -1129294934) + W(4, 1042856153, 1036788056, -1112557119, -1097406101) + W(5, -1121853795, -1104384368, 1032013500, -1147059498) + W(6, 1038465062, 1046094595, 1060179844, 1049669935) + W(7, -1101657128, 1042681611, -1111116605, 1027020519) + W(8, -1093956780, -1100316514, -1090501223, 1022378118) + W(9, 1045140163, -1121116299, 1035479824, -1131272734) + W(10, 1024284423, 1035682376, -1122577303, -1113139255) + W(11, -1130855582, -1118824057, 1006161754, -1124922046); WS(-1086987838, -1100233980); - sum1 = W(0, 1015308459, -1122325985, -1116672610, 1032904116) + W(1, -1141168915, 1033123257, 1001375875, 1015833828) + W(2, 1041765192, 1028094431, 1042485054, -1118546351) + W(3, -1118052589, -1116818498, 992241989, 1015146386) + W(4, -1105415847, -1114108890, -1092596043, 1046690691) + W(5, 1054244345, 1049735534, 1040724504, 1041637233) + W(6, 1042444900, 1041449686, 1049087917, -1128556841) + W(7, -1089307961, -1094503991, -1104029377, -1100869156) + W(8, -1111075669, -1106140165, -1110170951, 1034284768) + W(9, 1026589088, 1030527932, 1008796465, -1123717612) + W(10, 1003799483, 1034767973, -1144225139, 1033288361) + W(11, 1010481109, -1121578684, 1030765989, -1124977133); sum2 = W(0, -1128145157, 1026284201, 1012811840, -1136599052) + W(1, 992541697, 1011005728, -1127430370, 995789721) + W(2, -1100557855, -1106810438, 1042787919, 1044596046) + W(3, 1038174685, -1104618112, -1166127490, -1116633305) + W(4, 1057648426, 1054522141, -1098531848, -1073275189) + W(5, 1061008073, 1059653331, 1035655860, 1017274260) + W(6, 1048654145, 1057115188, 1046636853, -1074262745) + W(7, 1044984510, 1060082709, 1034192337, 1033697834) + W(8, -1122215769, -1103409060, 1046426252, 1035358469) + W(9, 1036107748, -1104639697, -1130597644, -1112828961) + W(10, -1133425768, 1017140792, -1118227807, -1143532160) + W(11, 1016191398, 981079778, 1006837720, 1021901152); WS(-1092446204, 989212831); - sum1 = W(0, 1007124942, -1116778591, 1027403088, -1121715541) + W(1, 1019921952, 1020332096, -1139218425, 1020381759) + W(2, 1048125124, -1131432602, 1042237585, -1138060917) + W(3, 1027977243, -1126238792, -1131884420, 1024321015) + W(4, -1138101793, -1106585464, 1016240146, 1037962972) + W(5, 1053191424, 1053622732, 1043276755, 1049124886) + W(6, -1141178857, 1019782926, 1036610823, -1101006449) + W(7, -1094792341, -1089064867, -1101892348, -1107274751) + W(8, -1102978326, -1115311760, -1110971902, -1114322839) + W(9, 1031189102, -1137835047, -1132849129, -1110951117) + W(10, 1034583667, 1015089582, 1027127900, 1008360193) + W(11, 1028625507, 1017386452, 1009465805, 1008208785); sum2 = W(0, -1120159352, -1125980662, -1131578305, 1028063854) + W(1, 1021975879, 1033612769, -1119579693, 1040487308) + W(2, -1088595541, 1050146519, -1090859444, -1107336769) + W(3, -1105868965, 1041431951, -1112749441, 1042279365) + W(4, 1060560783, -1093128727, 1057304640, 1071289406) + W(5, -1087527230, -1086794372, 982969010, -1089785065) + W(6, 1061831343, -1098317152, 1057012981, 1069689231) + W(7, -1097221007, -1086042421, 1033122226, -1088997285) + W(8, -1089452071, 1042347964, -1095995319, -1096815763) + W(9, -1104502319, 1049524493, -1104150062, -1135764460) + W(10, -1101640393, 1038452583, -1105624394, 1041229675) + W(11, 1037791301, -1143180172, 1029453052, 1049121852); WS(-1080642303, 1037515653); - sum1 = W(0, 1014399585, 1040389217, 1025436037, 1035628117) + W(1, 1032176658, 1019714410, 1031481267, -1129425897) + W(2, -1123461796, 1009459592, -1118449912, -1111999897) + W(3, 1042449564, -1098862637, 1015874414, -1108656118) + W(4, -1115821917, -1138501592, 1046808963, 1046961203) + W(5, -1085801589, 1042823052, 1032561937, 1022709518) + W(6, 1037702505, 1046330151, -1088063230, 1048171302) + W(7, 1046621083, 1033619294, 1017602698, -1172911161) + W(8, -1105260875, -1113097082, -1166545913, -1112931778) + W(9, -1116953058, -1108126015, 1033606503, -1161936349) + W(10, -1134503912, 1032999462, 1011659972, 1033647146) + W(11, 1020514160, 1023868127, 1030447848, 1015791986); sum2 = W(0, -1139497271, 1025910769, -1105297083, 1037339247) + W(1, -1111547600, -1123186639, 1029488041, -1137152287) + W(2, -1104932637, 1043978669, -1105480129, 1037617885) + W(3, -1107402541, -1105522985, 1011577705, -1102258444) + W(4, 1000924601, -1116100213, -1102785010, 1049988969) + W(5, 1045831134, 1040977948, 1036894407, 1025599417) + W(6, 1034980171, 1042493673, 1044002411, 1050036780) + W(7, 1039942071, -1097804729, 1042714488, -1139888185) + W(8, -1102656208, 1038409695, -1098474508, -1122964745) + W(9, -1106907325, 1028039731, -1112976714, -1112698813) + W(10, -1154378706, 1025837919, -1109802618, 1036616991) + W(11, -1132149525, -1111839940, 1040500443, -1114191023); WS(1060124606, -1084472548); - sum1 = W(0, 1012638700, -1131934189, -1129347279, -1138043955) + W(1, 1007380511, 1022374575, 996895579, 992489843) + W(2, -1112711971, 1021193865, -1125331574, 1048955067) + W(3, -1106488069, -1115361884, -1120058426, -1118477310) + W(4, 1024524352, 1017460401, -1113956442, -1094951982) + W(5, 1048203795, 1005249701, 1026650437, -1129456851) + W(6, 1032623962, -1117271615, 1049120342, 1015143283) + W(7, -1110142007, 1024310560, -1121731240, 1036683482) + W(8, -1115629480, 1023262959, -1108495481, 1031640934) + W(9, -1129214986, -1136673413, 1017029987, -1115998674) + W(10, 1027484509, -1157409515, 1010910287, 1021584239) + W(11, -1144904917, 1017598403, -1145343061, 1014521967); sum2 = W(0, -1113301049, -1129519836, -1113144608, -1114997262) + W(1, 1028189537, -1144798799, -1135227415, -1123606042) + W(2, 1041723134, 1018887182, 1065721494, 1081282167) + W(3, 1053752802, 1004635839, 1024321495, 1032429021) + W(4, -1119425470, -1123219625, -1082046556, -1068115046) + W(5, -1091841582, -1126716683, -1149813598, -1137534547) + W(6, 1012563935, 1021107126, 1032971141, -1092304788) + W(7, -1120681827, 1026059029, 992723582, -1121765421) + W(8, 1013366251, -1131491053, -1132479565, 1028331651) + W(9, 1021570574, -1132445755, 1010781703, 1024141029) + W(10, -1135264199, 1017265018, -1135106943, 927030725) + W(11, 983594844, -1137356791, 959859164, -1123588459); WS(1064307390, 997943845); - sum1 = W(0, 1016504192, 1004931163, 1001468389, -1125536771) + W(1, 1024701821, -1157300426, 1008115882, 1014991280) + W(2, -1110031838, -1124632947, -1106796660, -1132488742) + W(3, -1111862756, -1111316170, -1153319146, -1116314499) + W(4, -1108555962, -1129767067, -1106350773, -1097456765) + W(5, 1040312105, -1110123941, 1030600762, -1109063375) + W(6, 1047561626, 1003984285, 1053522405, 1050670238) + W(7, -1128004006, 1039422954, 1005773541, 1042190650) + W(8, -1114174882, 1017992857, -1106382906, -1136221548) + W(9, -1123896954, 1020844396, 1015300557, 1024320647) + W(10, 1004953741, 1018940039, 1000370245, 1015934681) + W(11, 1011062590, -1128259885, -1153739770, -1130969505); sum2 = W(0, 1036505224, 1018720963, 1037554666, 1035136259) + W(1, -1131720942, 1012123245, 1027168757, 1024471767) + W(2, -1100095598, -1119632397, -1082101932, -1072130218) + W(3, -1090292986, -1114842502, -1108983905, -1105383800) + W(4, 1039944480, 1033659023, 1064933062, 1074634461) + W(5, 1057038142, 1046242606, 1018437263, 1041395450) + W(6, -1110028565, 1025734363, -1103479495, 1012777017) + W(7, -1132873326, -1117820012, -1117294599, 1033191694) + W(8, 1016119595, 1015545311, 1036578506, 1036162128) + W(9, -1120419990, -1115011900, 1004277346, -1113427773) + W(10, 1017378303, -1126149289, 1022196513, -1118704038) + W(11, 1020047721, 1033886326, -1136709697, 1024978781); WS(1053812476, -1113586226); - sum1 = W(0, -1130399840, 1028349447, 1021963321, -1121520129) + W(1, 1020769785, -1115853964, 1021860117, -1121122124) + W(2, -1111011390, -1120416825, -1107941440, -1106086226) + W(3, -1129958415, -1107644952, -1122155502, -1114352785) + W(4, 1007197394, -1105796643, 1056809437, 1046348585) + W(5, -1090756230, 1050300257, -1116443513, 1041113720) + W(6, 1037708373, 1044272829, -1093131829, 1057081835) + W(7, 1059679544, -1119895572, 1040260199, -1110926804) + W(8, -1122978308, -1110423782, -1122097703, -1099751719) + W(9, -1102044582, -1111488955, 1019054925, -1156322407) + W(10, -1139049030, -1125249991, 1011721762, -1114497366) + W(11, 1018253471, 1022041005, -1121197023, -1135672898); sum2 = W(0, 1015672618, -1121994623, -1149412980, -1127744422) + W(1, 990134373, 1024671460, -1132259460, -1125579600) + W(2, 1010541588, 1032866568, 1044676778, -1124040172) + W(3, -1102290659, 1029499881, -1120084193, 1001699435) + W(4, -1115998566, 1011616452, -1107438884, 1058620442) + W(5, 1049322309, -1104592911, 1035840939, -1123886068) + W(6, 1020421847, -1095022641, -1097106911, 1015017230) + W(7, -1104275347, 1045525693, -1119993661, 1037165892) + W(8, -1129704409, 1020628925, -1112769863, -1113422252) + W(9, 1039268635, 1026741584, 999745387, -1126762930) + W(10, 1015113530, 1017019595, 1022483811, 1015061518) + W(11, -1114030797, -1136765715, -1131682952, -1134246014); WS(1059863230, -1098226968); - sum1 = W(0, -1111154474, 1033627108, -1111220881, 1018934285) + W(1, -1156119577, -1108034825, 1035708952, -1116166225) + W(2, -1130042625, 1044463840, -1098768819, 1036585799) + W(3, 1041677635, -1100472134, 1038444026, -1111999396) + W(4, -1124092147, -1125693093, 1043860042, -1094381437) + W(5, 1034781361, 1038465479, 1027036095, 1019243847) + W(6, -1126659421, 1045974478, 1040108899, -1098844542) + W(7, 1051479065, -1096809363, 1040658808, 1003830125) + W(8, -1116454602, 1040744732, -1149847241, 1043775752) + W(9, 1046003792, -1121805723, 1037992790, -1131289879) + W(10, -1109234699, 1026252457, -1103478147, -1107008620) + W(11, -1127254569, -1105262391, 1038964162, -1116821910); sum2 = W(0, 1031860762, -1114215534, 1037917300, -1123072391) + W(1, -1121765219, 1036233577, -1113082189, 1007761049) + W(2, 1008804487, -1119320351, 1028769566, -1145928741) + W(3, -1118791492, 1035112755, -1119166399, 1034891236) + W(4, 1036033743, -1105317049, 1033869945, -1104666827) + W(5, -1103277106, 1043081028, -1110309063, 1003334201) + W(6, -1130814575, -1126605754, 1029943513, 1058632863) + W(7, 1010033985, 1033184724, -1124066564, -1146667244) + W(8, -1114717025, -1103884858, -1097519272, -1084821476) + W(9, -1091781337, -1108906997, -1105454190, -1124982586) + W(10, 1042429978, 1019583714, 1050527129, 1057878257) + W(11, 1047090522, 1050006554, -1116436299, 1037673833); WS(1053829756, -1108691549); - sum1 = W(0, 1014621415, -1117035633, 1017668333, -1121088251) + W(1, -1113943163, 1027915142, -1117882340, 1018110476) + W(2, 1034972923, 1031518451, 1028891034, 1043029617) + W(3, 1037608353, -1118505878, 1008280866, -1114328254) + W(4, -1105089900, -1146432572, -1111069516, 1052288956) + W(5, 1043183409, 1040529537, -1127832039, 1025673432) + W(6, -1121242915, 1011744694, -1099260407, -1096216885) + W(7, -1109446679, 1033001703, 1023717270, 1038283409) + W(8, 1024842558, -1195363583, 1035963185, 1043236853) + W(9, -1104376301, -1110826184, -1108863440, -1136644866) + W(10, -1125107735, 1024600093, -1111257697, 1022444749) + W(11, 1008767270, 983774736, 1021787123, -1122279626); sum2 = W(0, 1011466728, -1128158188, 993685376, 1021341187) + W(1, -1125064327, 1016637363, -1135616946, -1142265296) + W(2, -1153535168, -1133074596, -1111924694, 1041916532) + W(3, -1115375045, -1111577830, -1121861231, -1139232914) + W(4, -1121363658, -1108077390, 1048370655, -1088803436) + W(5, 1050909934, 1048268187, 1028734454, 1033829714) + W(6, -1113493290, 1031842286, -1096696142, -1097359431) + W(7, 1066069354, -1102058354, -1112810542, -1108918948) + W(8, 1016979229, -1114077565, 1032702600, 1051606360) + W(9, -1108957150, -1106393780, -1104652034, -1124153420) + W(10, -1121975877, 1032726750, -1126878203, 1020447303) + W(11, -1115472084, 1011253904, 1032484333, -1135690267); WS(1043379192, -1131322837); - sum1 = W(0, -1129021027, -1148980502, -1117253390, 1015261926) + W(1, 1024719214, -1130947836, 974083843, 1010796022) + W(2, -1109595831, -1141769560, -1131540259, 1038059198) + W(3, 1043151230, 1040366611, 1036650848, 1032003186) + W(4, 1033800339, 1040943040, -1099641090, 1019360940) + W(5, 1047704456, -1120007803, 1032688054, -1135300928) + W(6, 1042153991, 1016895818, 1040857522, -1103092216) + W(7, -1091529984, -1100986498, -1116706574, -1114490317) + W(8, -1099711038, 1029537886, -1104535414, 1032384358) + W(9, 1043292971, 1031568104, 1019020464, 1026900536) + W(10, 1026187331, 1023917269, 999787984, -1137467256) + W(11, 1021851650, -1117455240, 1021119266, -1124200964); sum2 = W(0, -1127801857, -1143073539, -1106914355, -1114698807) + W(1, -1129241721, 1034854605, 1015357631, 1026781991) + W(2, 1028175283, 1010285366, 1045024365, -1107796176) + W(3, -1093181393, -1094857339, -1104001534, -1106206681) + W(4, -1129802997, 1022418743, -1109183829, 1055188047) + W(5, 1060464193, 1038631095, 1035894131, -1121422795) + W(6, -1133354170, -1111757330, 1035388549, -1099199714) + W(7, -1108056884, 1042537531, 1019867695, 1010545294) + W(8, -1129332011, 1032847739, 1028119663, 1027769667) + W(9, 1015260775, -1123722407, 1021694327, -1120402439) + W(10, 1029358411, -1119713292, 984677997, -1126683209) + W(11, -1117802232, 1025618013, -1116557581, 1021478623); WS(1049844732, -1121310639); - sum1 = W(0, 1023733410, -1099209689, 1040893204, -1102957277) + W(1, -1114244983, -1136674298, -1108461358, 1021220348) + W(2, 1044402267, -1106734859, 1050332331, -1111686974) + W(3, -1122456058, 1049981942, -1102278760, 1043895987) + W(4, 1053233309, -1108848000, 1049912979, -1098618899) + W(5, 1048539349, -1111429758, 1039273110, 1048170180) + W(6, -1096212219, -1103164857, -1099336615, -1097562740) + W(7, -1096335583, 1033031043, -1102589972, -1101337719) + W(8, 1051767783, -1109302031, 1050917291, -1121562509) + W(9, 1042792075, 1039550692, 1008842338, 1041281742) + W(10, -1114613442, -1129199659, 1033787229, -1111886543) + W(11, -1121890370, 1041151474, -1104020956, 1031699907); sum2 = W(0, 1051830333, 1052363768, 1062286287, 1068323273) + W(1, 1062941652, 1058036233, 1038399576, 1029358050) + W(2, 1040398919, 1056364330, -1105094784, -1117763636) + W(3, -1117097644, -1135317262, 1045469136, -1127713443) + W(4, -1088812275, -1088095023, -1083945825, -1086712736) + W(5, -1085128967, -1090471655, -1096613184, -1118175248) + W(6, -1112895824, -1128031249, -1110939052, -1122350162) + W(7, -1107088013, -1117471490, -1128774604, 1023889083) + W(8, 1030823820, -1126124865, -1131257287, 1034881723) + W(9, 993374262, 1022005418, -1124473506, 1006015479) + W(10, -1120174945, 1022839272, -1113476028, 1031923437) + W(11, -1149114843, -1131972943, 1015689530, -1114795832); WS(997080576, -1130763300); - sum1 = W(0, -1113905570, 1030153879, -1117230256, -1152616277) + W(1, -1146808706, -1112646432, -1186209359, -1120216576) + W(2, -1121320815, -1108108727, -1100762033, -1090117932) + W(3, -1109500574, -1102311764, -1119610882, -1099135446) + W(4, 1038521748, 1032364321, 1051702873, 1069923213) + W(5, -1104049339, 1028696893, -1120830317, 1030578386) + W(6, 1025325853, -1131460303, 1040312882, 1063008042) + W(7, 1056019095, -1115439812, -1125581089, -1116447004) + W(8, -1108459484, -1152289285, -1099800679, -1092045374) + W(9, -1100849447, -1105934578, 1028735898, -1108771165) + W(10, -1123369669, -1111143084, 1018134009, -1116551136) + W(11, 1015899783, -1120248394, -1113190874, -1120342084); sum2 = W(0, -1130955153, -1115062100, -1114768924, 1040485485) + W(1, -1102247365, 1040365390, -1105380703, -1134789779) + W(2, 1025142055, 1033634795, -1114031272, -1115074190) + W(3, -1110799922, 1039570131, -1112489130, 1035629875) + W(4, 1025405091, -1124058081, 1047530952, -1104495565) + W(5, 1038913214, -1102056033, -1123724877, -1104428707) + W(6, 1041203911, -1151569099, -1160505239, 1065129660) + W(7, -1115442088, 1037120419, -1145632358, 1035843769) + W(8, 1034798579, -1098708869, 1048498835, -1095909934) + W(9, -1105078519, 1024518769, -1109649658, -1117935125) + W(10, -1107186683, 1041850582, -1097808347, 1045395705) + W(11, -1106337339, 1007217475, -1152795787, -1124566009); WS(-1078383103, 1059446981); - sum1 = W(0, -1139864362, 1022304576, -1117916615, 1007335089) + W(1, -1119884647, 1022280942, 1023668405, 1034312731) + W(2, -1142609202, 1008862865, -1109139429, 1032501274) + W(3, 982044231, -1115332063, 991223187, -1102309771) + W(4, -1105667544, -1100327790, -1098424968, -1089810345) + W(5, 1033240873, 1049118183, -1140016273, -1110413061) + W(6, 1048881003, 1047898440, 1053051896, -1104044598) + W(7, 1051763270, -1113330784, 1027424455, 1032557738) + W(8, -1110989208, 1029966423, 1023833874, 1040575921) + W(9, -1114053146, 1041606896, -1122699729, 1041157596) + W(10, 1019807480, -1128249884, -1114518074, 1013296733) + W(11, -1110353213, -1126749570, 1021514772, -1121516731); sum2 = W(0, -1143648694, 1022767513, 963325627, -1138915747) + W(1, 1040530304, 1031585165, 1008260859, -1120690652) + W(2, 1050457027, -1104882348, 1052893834, -1122824043) + W(3, 1019582897, -1094902239, -1113748748, -1090635807) + W(4, -1090901428, 1017677825, -1090554669, 1048412513) + W(5, -1101863176, -1104950416, -1120605391, 1056780417) + W(6, -1095258955, -1116653154, 1030383529, 1063122641) + W(7, -1089352859, 1057380402, -1106433792, 1049388154) + W(8, 1047388223, -1125154789, -1114292033, -1118207917) + W(9, 1048190601, -1102824630, 1038107916, -1104842210) + W(10, 1034828442, -1149114774, 1040557770, 1005930742) + W(11, 980397999, 1033506230, -1115355457, -1136517107); WS(-1104952056, -1073278929); - sum1 = W(0, -1144211169, 1031125659, -1121100585, -1142367854) + W(1, 1025675017, -1114298540, 1036563163, -1122834968) + W(2, -1099507218, -1116651974, -1101060075, -1105066053) + W(3, -1131576359, -1114707952, -1120605819, -1111900952) + W(4, 1047627264, -1113308557, 1054491321, 1039149624) + W(5, -1106626380, 1042584197, -1128818827, 1039018645) + W(6, 1043300637, 1037274304, -1102367022, 1051689271) + W(7, 1044811828, -1128472513, 1045578618, -1115669626) + W(8, -1098196346, -1126577137, -1102852173, -1106731162) + W(9, -1111689619, -1126957444, 992521004, 1025441875) + W(10, 1030331294, -1122667056, 1026346581, -1120693369) + W(11, 1018492983, -1140414731, -1128018583, 997033420); sum2 = W(0, -1117577133, -1139616023, 997820975, 1028627780) + W(1, -1114909048, 1032747040, -1116006677, 1030832637) + W(2, 1036324905, -1124977156, -1120808104, -1109376562) + W(3, 1029259135, -1145526876, 1022442689, 983753970) + W(4, 1012676607, -1115101040, 1041273688, 1049990275) + W(5, 1043064430, 1039154681, 1027909669, -1115224952) + W(6, -1164734594, 1032695755, 1041759226, -1096754642) + W(7, 1041324783, -1094055114, -1098711698, 1023440264) + W(8, 1039853902, -1116113854, 1041255405, -1106762918) + W(9, -1111929038, 1025837595, -1130147608, -1112181665) + W(10, -1110394349, 1006188424, -1120661488, 1033535767) + W(11, 1021759445, 1023476809, -1133986843, 1029342490); WS(1057403966, -1096678293); - sum1 = W(0, 1015545167, 1013920108, 1030182836, 991347091) + W(1, 1031028372, -1130110920, -1137745417, -1127755335) + W(2, -1133819725, -1127594402, 1031500952, -1107540753) + W(3, 1042502792, -1120427420, 1023814022, -1113612941) + W(4, 1037006758, -1136130453, 1042283546, 1040431359) + W(5, -1084295706, 1042737160, 1019878434, 1042334622) + W(6, -1110196161, -1123399563, -1086200264, 1060132444) + W(7, 1049379651, 1001749345, 1023472436, -1123659840) + W(8, -1111752718, 1041684833, 1047364560, -1104881213) + W(9, -1113607001, -1117219726, 1017968636, 1008021369) + W(10, 1028708905, -1119121787, -1120221088, 1006406689) + W(11, 1002061113, 1026883950, -1124476270, 1011156129); sum2 = W(0, -1178786588, 1027650789, 1004569898, 1010647389) + W(1, -1130907066, -1128412421, 1000455394, 1019136642) + W(2, -1129322914, -1117835702, -1120140062, -1110565129) + W(3, 1046200355, 1035331810, 1040609034, -1114775527) + W(4, -1116170217, -1106357388, 1012300621, 1051050908) + W(5, -1094299123, 1038925264, -1110352470, 1028674911) + W(6, 1036181740, 1050236879, 1029537087, -1121260831) + W(7, -1105419536, -1104188032, -1148832482, 1009994365) + W(8, 1016863918, -1107702296, -1115347042, 1019718506) + W(9, 1018471790, 1035623426, 1031812039, -1129304944) + W(10, -1132924585, -1127926494, 1007741857, -1147888890) + W(11, -1122466004, -1132432621, -1132484471, -1146317634); WS(1056335484, -1129697442); - sum1 = W(0, -1118615510, -1123816285, 1016357093, -1126548591) + W(1, 1017679973, -1125007109, -1130509569, -1127256452) + W(2, 1026825265, -1162479083, -1123245334, -1115166368) + W(3, 993334229, 1009460392, -1117686884, -1110259820) + W(4, -1102686619, -1175208362, -1112742573, 1012633705) + W(5, 1048503365, -1110640192, 1010107581, 1008538193) + W(6, 1045269336, -1122870505, 1056143939, 1022501065) + W(7, 1044007618, 1043433395, 1037226473, 1040188728) + W(8, 1031523962, 1025178018, -1115481871, -1098786498) + W(9, -1107348056, -1104721331, -1124234681, -1112073470) + W(10, -1140818205, -1112741005, 1004860403, -1121817826) + W(11, -1134864917, -1142571619, -1120701013, 1004599859); sum2 = W(0, -1140731697, 1018164054, -1116322329, 1033668632) + W(1, -1110091010, 1006827755, 1024608202, 978878091) + W(2, -1126783010, -1144859189, -1148034885, -1111621890) + W(3, 1034430751, -1133278289, -1134565598, -1106732021) + W(4, 1035005186, 1011714049, -1112384840, 1057652336) + W(5, -1103655808, 1035575897, 1007536733, 1017810394) + W(6, -1110815467, 1039236492, -1109778149, 1053123628) + W(7, -1113563636, 1009902097, 1031837967, 1032913014) + W(8, -1144657885, -1130754383, -1113329105, -1107649638) + W(9, -1112793157, -1136830370, -1131576514, -1118754368) + W(10, -1127206230, -1146205013, -1113225660, 1012470213) + W(11, -1113811608, -1126431906, 1007646275, -1123083034); WS(1064654654, 1035088379); - sum1 = W(0, -1123150274, 1013730923, -1124984926, -1143080352) + W(1, -1115541655, -1120446591, 1019709882, -1123168830) + W(2, -1101065371, -1104968795, -1104298005, -1097917367) + W(3, -1115560637, -1112072243, -1121553409, -1101025669) + W(4, 1052021669, 1043838624, 1050953841, 1053857683) + W(5, -1098028717, -1118042236, -1117850604, 1024509393) + W(6, 1034671630, -1116995686, -1125221929, 1053311748) + W(7, 1054064059, 1056948345, 1047317764, 1051915779) + W(8, -1103122974, 1025043295, -1102292982, -1101557747) + W(9, -1102206963, -1099829586, -1123986585, -1105952782) + W(10, -1124628232, -1123853974, -1122875259, -1129945790) + W(11, -1115475969, 1010096420, -1120157778, 1010117900); sum2 = W(0, -1115290116, 1036300391, -1109664288, 1023887773) + W(1, 1031338337, -1112746552, 1029647363, -1123357025) + W(2, -1109338596, -1120873277, -1133763869, -1110763968) + W(3, -1104363651, -1111086518, 1014654409, -1113327782) + W(4, 1052546267, 1031940424, 1041363997, 1046589307) + W(5, 1038758869, 1026186944, 1011886363, -1114411312) + W(6, -1101682429, 1045633640, -1094977574, 1050150667) + W(7, -1104681827, -1121584221, 1042076972, 1033926660) + W(8, -1106866549, -1121551223, 1008614181, -1110488084) + W(9, -1120223295, 1035823152, -1111033402, 1046305164) + W(10, 1027635895, 1021591180, -1119215823, 1009099129) + W(11, 1016354897, -1115537942, 1012736237, -1109553196); WS(-1088190206, -1108558078); - sum1 = W(0, -1109712467, 1034096473, -1111610193, -1111503908) + W(1, 1029528067, -1109918121, -1130336610, -1110290409) + W(2, 1048826552, -1100191472, 1051587686, -1114238856) + W(3, -1124911644, 1053388852, -1101598429, 1049826386) + W(4, -1117596347, -1104233509, -1112703594, -1118548075) + W(5, -1094857227, 1040678692, -1102065648, -1108498290) + W(6, 1045129740, 1021528052, -1123092480, 1045282957) + W(7, 1041941490, -1121835291, 1050563972, 1032628503) + W(8, -1145103958, 1015098342, 1033306871, -1105600408) + W(9, -1116887040, 1043572363, -1101594283, 1041251987) + W(10, 1040739958, -1095362037, 1043699492, -1112734841) + W(11, -1106244021, 1044475255, -1100499697, 1015959598); sum2 = W(0, -1140036524, -1122134368, -1124742331, 1023368729) + W(1, 1025643512, -1117935205, 1029315286, -1121900993) + W(2, 1033121292, -1135675176, 1022385651, 1037868584) + W(3, -1113410705, 1034815986, -1106876578, 1029051910) + W(4, 1020537037, -1102563608, 1017232470, -1097085277) + W(5, 1026038978, -1107749524, -1116418603, 1013163432) + W(6, -1094559771, -1090683436, -1089829937, -1081905013) + W(7, -1085580324, -1093433405, -1100509010, -1107168757) + W(8, 1042598592, 1049536131, 1015761873, -1122385293) + W(9, -1119606047, 1034046493, 1037794408, 1020109569) + W(10, 1048560917, 1043095342, 1060516603, 1068776853) + W(11, 1062905414, 1053234619, 1023262329, 1038509970); WS(1048802172, -1118644607); - sum1 = W(0, 1022629891, 1001330921, -1147098739, 1023807923) + W(1, -1129374813, 1028532286, 1031375324, -1151549405) + W(2, -1112271863, 1008285357, -1106982824, 1020475895) + W(3, 958183765, 1025640422, -1133565569, -1106868493) + W(4, -1115934246, 1007889405, -1110669107, -1095657203) + W(5, -1113474437, -1090481180, -1104303995, -1104624276) + W(6, -1112205646, -1137992085, 1044651974, 1050363724) + W(7, 1050921091, 1050665633, 1037811271, 1049383075) + W(8, 1050188943, 1025826270, 1026008921, 1011316531) + W(9, -1106529387, -1131537104, -1125009457, 1022847857) + W(10, -1125264263, -1127716075, -1113499645, 1026946398) + W(11, -1121270209, 1021652689, 1014192325, -1142930963); sum2 = W(0, 1016559128, -1119933527, 1008526536, 1034816070) + W(1, 1018959568, 1026554777, -1112891495, 1046177388) + W(2, -1089262209, 1032931419, -1094914643, -1096197918) + W(3, -1101210129, 1049415798, -1128790619, 1043157162) + W(4, 1053795811, -1108360154, 1052999976, 1069558608) + W(5, -1103281588, -1085501184, -1098019814, -1088254262) + W(6, 1058569170, -1097574423, 1052760357, 1058007152) + W(7, -1106340652, -1101474305, -1114646508, -1096632714) + W(8, -1094475155, 1036603460, -1106271635, 1022028102) + W(9, -1121182797, 1027756295, -1124734105, 1015426864) + W(10, 1008650912, -1121755244, 1039081818, 1004102711) + W(11, 1033515588, 1032461240, -1118822413, 1043397723); WS(-1085648446, -1079079370); - sum1 = W(0, 1031831473, -1114079994, 1031367567, -1118896788) + W(1, -1123680847, 1033718927, -1123944253, 1033541103) + W(2, 1036117159, 1003975776, 1044164867, 1045647319) + W(3, 1033826253, 1041970952, 1024952417, 1040029267) + W(4, 1049939273, 1042007752, 1043312672, -1095211959) + W(5, -1122554975, -1116359196, -1176880640, 1026974961) + W(6, -1094329359, -1097850009, -1089391360, -1087218668) + W(7, -1100203021, 1033960979, -1108529239, -1111794397) + W(8, 1040633606, -1134555900, 1041283678, 1044809099) + W(9, 1046447947, 1042238738, 1024390649, 996515216) + W(10, 1023486628, 1019647980, 1023951387, 1014099708) + W(11, 1017251780, 1031788713, -1146375056, 1033511473); sum2 = W(0, 1019169584, -1120973142, 1033125970, 1025364316) + W(1, -1109812797, 1040314133, -1108282977, 1018923536) + W(2, 1042983019, 1009997472, -1103915398, 1036688212) + W(3, -1125420780, -1119986230, 1034277286, -1117630132) + W(4, -1098354632, -1106283039, 1033722376, 1062322516) + W(5, -1098634798, 1057333054, -1102308141, -1125368820) + W(6, -1099987280, 1032814660, -1095179878, 1032770062) + W(7, -1109050948, -1097734639, 1034195486, 1049154263) + W(8, 1048978343, -1103033329, 1055664562, -1097173506) + W(9, -1103688168, -1173745415, -1107997912, -1098308929) + W(10, -1118546716, 1022934360, -1110281795, 1045599869) + W(11, -1119010038, 1038965178, 1007528912, 1035253180); WS(-1089355774, -1078290086); - sum1 = W(0, -1122950775, -1112691207, 1023290087, -1107733500) + W(1, -1146188190, -1132821263, -1112282369, -1116356099) + W(2, 1037182841, -1124344033, 1046400294, -1118583361) + W(3, 1042531744, -1121639475, -1137408791, 1018516820) + W(4, -1118638572, 1041047393, -1098349922, 1057481334) + W(5, -1139289650, -1109625472, 1026873009, -1139389015) + W(6, -1121667553, -1104403323, -1099164822, 1054069421) + W(7, -1097191323, 1041114291, -1120861012, -1139200383) + W(8, 1009124079, -1138062223, 1044398104, -1149431076) + W(9, 1044515332, -1120573261, -1120947806, 1017011067) + W(10, -1115334660, -1126199861, -1136525567, -1110719762) + W(11, 1015659149, -1121643278, -1122889719, -1120569352); sum2 = W(0, -1120919296, 1009843654, -1132368064, 1020138111) + W(1, 1029564211, -1115205032, 1016741875, -1119663171) + W(2, -1124567280, -1107129213, 1020831927, -1098918731) + W(3, -1132836513, 1010227332, -1118685376, 1026001154) + W(4, 1037030993, 1043380414, -1109288357, 1059483198) + W(5, -1119273466, -1106893419, 1028452918, -1135273053) + W(6, -1120629799, -1102119519, -1112537154, 1064361176) + W(7, -1104145236, 1043150197, -1117770026, 1019055438) + W(8, -1106639281, 1030878171, -1118412551, -1093377806) + W(9, -1123848756, -1103899854, 1024207514, -1109882780) + W(10, 1020211370, -1126825280, -1144175329, -1146281491) + W(11, -1135251423, 1028327527, -1128215590, 1024565629); WS(1064975294, 1066308158); - sum1 = W(0, 1031747776, -1106142801, 1028818885, 1001228109) + W(1, -1115396498, 1041342669, -1108139113, 1027821620) + W(2, -1119071204, 1032019633, 1041375482, -1112793497) + W(3, 1013499583, -1123968938, 1024220311, 1020544662) + W(4, 984462229, 1045090381, -1093262116, 1049965274) + W(5, 1052662103, -1113450905, 1035427697, -1138282267) + W(6, -1132055971, -1108909198, 1038243096, 1021279149) + W(7, -1090377239, 1040402543, -1107965954, -1116422289) + W(8, -1148253029, -1110036442, 1036945928, -1115536386) + W(9, 1034061985, 993049059, -1113141447, 1016907357) + W(10, -1126913907, 1036419718, -1121925745, 1019907753) + W(11, 1032108124, -1109636927, 1032825796, -1128061041); sum2 = W(0, 1016610899, -1135553471, -1163068737, -1147979120) + W(1, -1121296916, -1139692219, 1007282246, 1019770181) + W(2, -1118189976, 1018031354, -1120752887, 1035872696) + W(3, 1034250650, 1022157658, -1131969269, -1131641536) + W(4, 1027283971, -1118352328, 1042584076, -1095799786) + W(5, 1009508653, -1121907329, -1148404200, 1013257077) + W(6, 1028895363, 1032145382, 1042427003, 1073605475) + W(7, 1056095764, -1130051225, 1025403981, -1121748387) + W(8, -1113530321, -1123867563, -1099207121, -1075418961) + W(9, -1091570337, -1113117501, -1124635978, -1123610989) + W(10, 1007846553, 1011272254, 1010785270, 1026092591) + W(11, 1030054693, 1019567305, 1012575724, 1018865930); WS(1062423998, 1020226002); - sum1 = W(0, -1122384152, 1000976853, -1122957973, -1140430696) + W(1, 1018079658, -1116871589, 1021042950, -1130305609) + W(2, -1116470612, 1015809956, -1115150860, -1100102424) + W(3, -1099378776, -1102397253, -1121061927, -1097083551) + W(4, -1095861522, -1125526476, -1101122465, 1051208664) + W(5, 1045734276, 1040938184, 1045437908, 1049869175) + W(6, 1055717600, 1034182342, 1052715645, 1053221217) + W(7, 1041738487, 1038184662, -1114085178, 1019939330) + W(8, -1128557498, -1136390676, -1101688288, -1097231798) + W(9, -1114764537, -1107259340, 1034631967, -1105843464) + W(10, -1133541800, 1025071548, -1121585121, 1026360261) + W(11, -1127628368, -1121765359, 1018430000, -1122412137); sum2 = W(0, -1125184611, 985313435, -1181860650, -1124740239) + W(1, -1132645547, 999484231, -1125466771, 1024683619) + W(2, -1124729770, -1132095255, 1028760415, 1011065519) + W(3, 1033849803, 1015359010, -1140486285, -1125848669) + W(4, 1045115043, -1117207517, 1036693207, -1088457353) + W(5, -1096437481, 1048821220, -1107942084, 1027562883) + W(6, 1058112728, 1067077185, -1107623537, -1080660794) + W(7, -1109418981, -1117188353, 1027196953, -1127975224) + W(8, 1023628890, -1158219675, 1043689375, 1020427720) + W(9, -1110185484, 1032363474, -1121893121, 1023023798) + W(10, 1007171579, -1118062236, 1000645119, -1160295467) + W(11, 1015083304, 1023561702, -1134577643, -1158549787); WS(-1085592318, -1113086899); - sum1 = W(0, -1127342656, 1024334944, -1107952998, 1034498345) + W(1, -1131633306, -1110168462, 1033648125, -1119681052) + W(2, -1105245475, 1029597578, -1104926592, -1110412429) + W(3, -1103724998, -1098176238, -1115323711, -1098367950) + W(4, -1104578161, -1117644382, -1101901257, 1053247055) + W(5, 1051300307, 1051451939, 1049307889, 1053283054) + W(6, 1053241687, 1041591201, 1051692021, 1044816720) + W(7, 1042909421, -1124765258, -1143558503, -1113485404) + W(8, -1103827813, 1020278010, -1097952575, -1110749814) + W(9, -1106968399, -1105195378, 1027981559, -1102600950) + W(10, -1138537863, 1015573096, -1110174988, 1033881662) + W(11, -1116328992, -1113141267, 1028312016, -1113790704); sum2 = W(0, 1019423143, -1134200265, 1022335641, -1126211289) + W(1, -1160177640, 995157684, -1138832033, -1130236057) + W(2, -1118706158, -1122995556, -1113839515, -1123774588) + W(3, 1002859666, -1114460336, -1131104601, 1027754115) + W(4, -1110500850, 1021681136, -1121978030, 1048787768) + W(5, 1035034344, 1025695056, -1126984825, 1027074464) + W(6, 982325064, -1106847870, 1042414810, 1052221246) + W(7, 1009324191, -1129766425, -1136621129, -1120353368) + W(8, -1117883328, -1115095323, -1112736275, -1118959328) + W(9, 1026260852, -1114723897, 1016807320, -1121253912) + W(10, -1145733922, 1001371122, 1019850892, -1113736050) + W(11, 1023767274, -1130130145, -1131688905, 992217060); WS(1025516512, -1100199588); - sum1 = W(0, 1010096560, 998291066, 1015136529, 1016981531) + W(1, -1134306338, 1015217585, 1025573847, 1027586712) + W(2, 1021891087, -1128181387, -1152435393, 1035336779) + W(3, 1040424469, 1041134374, -1147235028, -1105873140) + W(4, 1041308560, 1015332531, 1054580194, 1043985948) + W(5, -1088067164, -1097437107, -1108043523, 1024655979) + W(6, -1106994989, -1097993736, -1087955312, 1049373383) + W(7, 1048585956, 1044933000, 1037061588, 1041451309) + W(8, -1107391304, 1040752366, 1021987303, 1016493518) + W(9, -1111549299, 1015283332, -1125444197, 1025261651) + W(10, 1032706511, -1115681272, 1013936722, -1153713033) + W(11, -1122756060, 1028860515, -1124931799, 1010373746); sum2 = W(0, 1024904631, -1136864762, 1005033941, 1030076655) + W(1, 1022202973, 995105737, 1009257370, -1113660547) + W(2, -1115115972, 1023564875, -1114446231, -1115188268) + W(3, -1099782234, 1025338591, -1107689693, 1049272645) + W(4, -1128899549, -1103710322, -1140124434, 1057566767) + W(5, 1051976820, 1009750634, 1040613751, -1105716747) + W(6, -1100416868, -1105139728, -1107001478, 1061216146) + W(7, -1114917196, -1107279078, -1123762171, -1112133417) + W(8, 1014205090, -1112771593, -1105823902, -1101449249) + W(9, -1115621063, -1123827499, -1124460977, 1019919657) + W(10, -1138213458, 1024304323, 1028369943, 1031401203) + W(11, -1127316949, 1002907445, 1021828037, -1121285688); WS(1050734204, -1108852232); - sum1 = W(0, -1111669430, 1026854009, -1115457556, 1022285659) + W(1, -1130768721, -1124925681, 988348114, -1117352760) + W(2, -1116292712, -1106093993, -1104273631, -1090233269) + W(3, -1103820941, -1107176543, -1114665965, -1103610460) + W(4, -1122281377, -1103616254, 1027625969, 1069754213) + W(5, -1114752252, -1130883179, -1144129324, 1032882469) + W(6, 1027495958, -1113635568, 1042550097, 1065691213) + W(7, 1054098174, 1026991873, 1018693905, 1034282506) + W(8, -1113568120, -1125634353, -1100906821, -1090598793) + W(9, -1103145534, -1107767585, 1007587914, -1112131865) + W(10, -1123139966, -1117637392, -1147736996, 999398084) + W(11, -1140005358, -1123096067, -1120005196, -1122062653); sum2 = W(0, 1024214881, -1122830075, 1024710111, -1118490155) + W(1, -1134765475, 1007204875, 955722144, -1132418382) + W(2, -1105712489, 1026001227, -1105988647, 1044311614) + W(3, -1128591966, 1016894538, -1132281374, -1121103003) + W(4, -1096328526, -1089670730, -1109313571, 1057951288) + W(5, 1031038781, -1135710147, -1121635711, 1040031313) + W(6, 1036545949, -1104892025, 1042923133, 1050757116) + W(7, 1041007149, -1111844715, 982097434, -1131469166) + W(8, -1114697429, 1027384734, -1108836817, 1046225965) + W(9, -1111832237, 1020988490, -1141601766, -1137288635) + W(10, 1029115040, -1127506094, 1018384918, -1118982995) + W(11, 1000957030, 1018633070, -1131312630, 1012074251); WS(-1077332287, -1089760701); - sum1 = W(0, 998476811, -1146931846, 1020090684, -1115596769) + W(1, 1018970620, -1117338894, -1131301080, -1134004534) + W(2, -1126986618, -1106320367, -1103512376, -1106044643) + W(3, 1033789092, -1121544222, 1025908912, -1120853460) + W(4, 1035645275, 1033143495, 1057100532, 1033806067) + W(5, -1089559024, -1119052427, -1109205213, 1021655744) + W(6, 1035996661, 1030948981, -1098691810, 1054020686) + W(7, 1053006209, 1053247323, -1117960094, -1112997269) + W(8, -1111559939, -1116749294, -1114602228, -1102410839) + W(9, -1114317262, -1148875196, 1040862618, 1024305160) + W(10, 1002151576, -1132098378, 1020764052, -1131285468) + W(11, -1143682184, -1140686688, -1112291776, -1137942600); sum2 = W(0, 1006079429, -1142220161, -1129651941, -1120056293) + W(1, 1021210606, -1138049810, 1020679942, -1144760977) + W(2, 1015506585, 1012377004, 1027444401, 1027255740) + W(3, -1154390002, -1121552483, 988683875, -1149209645) + W(4, -1129173219, 1019086641, 1029291472, -1087748986) + W(5, 1030702430, 1046290929, -1149010889, 958643736) + W(6, -1126961905, 1027847692, -1099113060, -1071778912) + W(7, 1077548482, 1017608723, 1009874569, -1165018915) + W(8, 1025558752, -1121396179, 1031608500, -1132223695) + W(9, 1041292060, 1030045463, -1116313503, -1108302337) + W(10, 1004493969, -1129089615, 1032469225, -1117547929) + W(11, 1017793536, -1115202758, 1024909384, -1153372770); WS(1054407548, 983325672); - sum1 = W(0, 1028412425, -1115456470, 1017604747, 1031921957) + W(1, -1121692358, 1041128181, -1131299175, 1034097067) + W(2, -1105600625, 1033490817, -1115519181, 1031550611) + W(3, -1103698804, -1109550327, 1030793272, 1014014769) + W(4, -1148390382, 1044756246, -1103922376, -1090010995) + W(5, 1062123705, -1103937189, 1039103265, -1135652511) + W(6, 1033571545, -1114006910, 1059608097, -1087252051) + W(7, -1094628214, 1042983319, -1106569209, -1116995186) + W(8, 1032293767, -1119790070, -1107101148, 1044072953) + W(9, 1027680451, 1021059262, 1007158719, -1141553278) + W(10, -1124427701, 1037509878, 1007976775, 1022727275) + W(11, 1033962892, -1108328080, 1032055396, -1143059662); sum2 = W(0, 1007145536, -1108956115, -1123468231, 1003522017) + W(1, -1106382527, 1023566518, -1116797301, 1016028592) + W(2, 1011835040, 1040320325, 1009493536, 1036429861) + W(3, 1045134298, -1132203156, 1044032552, -1103689691) + W(4, -1115291423, -1145921569, -1120694127, 1057547857) + W(5, -1120933925, -1105348488, -1105744806, 1019566780) + W(6, 1029973058, -1113530029, 1025444390, 1039825371) + W(7, -1093621604, 1030401418, -1111109931, 1039637835) + W(8, 1036345379, -1116109872, -1106839609, 1028735446) + W(9, 1043632212, 1032927131, 1031187850, -1119043970) + W(10, -1106716830, 1031627486, -1106619549, 1032586179) + W(11, -1135351552, -1107014793, 1030984886, -1117261713); WS(1062610366, -1081620328); - sum1 = W(0, 1034909184, 1022225638, 1022593144, 1034659649) + W(1, -1145063906, 1029979468, 1024460554, -1129493655) + W(2, 1041975919, 1036878493, 1038275488, 1049110721) + W(3, 1039909252, 1050285945, 1032760119, 1052309552) + W(4, 1038564634, 1032326503, 1032775158, -1104434937) + W(5, -1098154666, -1091716385, -1098908862, -1093908076) + W(6, -1086061975, -1097049310, -1094128798, -1097332230) + W(7, -1096830529, 1031802154, -1125106744, 1033460244) + W(8, 1054071881, 1038246806, 1049378869, 1048167927) + W(9, 1037005773, -1122539766, 1023442918, -1111713187) + W(10, -1118145809, 1033815666, 984752392, 1040915883) + W(11, -1127387550, 1042582214, 1025327999, 1038395208); sum2 = W(0, 1025388154, 1001261778, 1007664153, 1018216589) + W(1, 1011431705, -1124915037, 1007167865, -1118462618) + W(2, -1106689977, 1031762430, -1105078255, -1117638934) + W(3, -1119019386, 1039919645, -1115769810, 1041969311) + W(4, -1110560421, -1111104301, -1122922762, 1029938402) + W(5, -1107704269, 1043034893, 1041034358, 1050092429) + W(6, 1049316874, -1112011481, 1044332351, -1117360942) + W(7, -1116357646, -1107523849, -1112781805, -1104673921) + W(8, 983067209, 1033280635, -1133572905, 1022258405) + W(9, 1016143957, -1109485745, 1010088409, -1106654827) + W(10, -1121273022, 1026153330, -1116643818, 1033323756) + W(11, -1123412994, 1027380094, 1016238045, 1006440178); WS(-1079099231, 1058224693); - sum1 = W(0, -1136025729, 1041807413, -1113852830, 1038775953) + W(1, -1130024634, -1139925939, 1032899333, -1135488689) + W(2, -1098612147, 1030021338, -1101745716, -1114342965) + W(3, -1109189268, -1116898669, -1169623989, -1105924753) + W(4, 1040574357, -1113591959, 1035906153, -1100940817) + W(5, -1110548904, -1107090858, 983487291, -1105279552) + W(6, 1044962866, 1041390773, 1038641429, -1112997607) + W(7, 1053521164, 1048687519, 1048553583, 1049805017) + W(8, -1107948018, 1030293261, -1102747762, -1131180663) + W(9, -1102886695, -1127712365, 1020276366, 1033907987) + W(10, -1152707357, 1019631368, -1119110697, 1008365619) + W(11, -1120409456, -1156710253, -1137865855, -1120648241); sum2 = W(0, 1026221982, -1150712731, -1135858910, 998750602) + W(1, 1026066546, -1122173835, 1019885572, -1114797832) + W(2, 1031288593, -1121646784, 1020650492, -1118866549) + W(3, -1126679589, 1033676882, -1119823506, 1024418530) + W(4, 1025270629, 1040465550, -1102699692, -1090773917) + W(5, -1103726643, 1038653616, 1044175124, 1035909226) + W(6, -1105433524, -1099611845, -1077885918, -1073585027) + W(7, -1083499628, 1072488285, 1076879885, 1053754278) + W(8, 1025604422, 1015298833, 1013887757, -1111404471) + W(9, 1029004921, -1128843744, 1042609155, -1128976380) + W(10, 1025361731, -1118571342, 1035533544, -1115122758) + W(11, 1025173545, -1132364945, 982102231, -1120827581); WS(1039418864, -1140458522); - sum1 = W(0, 983096624, 1029517505, -1121873782, 1019803894) + W(1, 1027911414, -1108005699, 1033576274, -1135118358) + W(2, -1114535995, -1145085849, -1102746926, 1028523779) + W(3, -1100878768, 1038991951, -1142936385, -1125909817) + W(4, 1044450572, -1097839409, 1062380387, -1088253674) + W(5, -1109645966, -1154916402, -1107146685, -1122115416) + W(6, 1027001827, 1032347927, -1106235793, -1090653261) + W(7, 1061486174, 1029669390, 1047810758, 1038152785) + W(8, -1117675070, 1031259878, -1105839880, 1036696746) + W(9, -1097301074, 1024178544, 1038477872, 1041849616) + W(10, 1035770150, -1104931415, 1033087169, -1121428646) + W(11, -1120235273, 1035324408, -1108040973, 1017003668); sum2 = W(0, -1121567066, 1034344084, -1105011035, -1142865888) + W(1, -1136511728, -1106811819, 1032101677, -1113394506) + W(2, 1033267920, 1026284945, 1033899226, -1120108491) + W(3, -1122542627, -1120621959, -1134482876, -1114137407) + W(4, -1104421963, -1103410132, 1022298858, 1057117238) + W(5, 1038703002, 1033583066, -1122973141, 1030826553) + W(6, -1130139832, 1037507308, -1102818563, 1003910328) + W(7, 1043197066, -1124006960, -1124335993, -1125132432) + W(8, -1132243276, 1034735332, 1044067085, 1041074904) + W(9, -1101099519, -1109459597, 1041237660, -1128301632) + W(10, 1024120715, -1109350039, -1127155070, 975508032) + W(11, -1117378085, 1001772648, -1113899392, -1126301549); WS(1053462780, -1083681865); - sum1 = W(0, 1023511963, -1126058411, 1027394040, -1118697149) + W(1, 993307519, 1025756296, -1131368524, 1029589575) + W(2, 1041747598, 1015154794, 1045749945, 1048726745) + W(3, 1043007793, 1038169844, 1024626666, 1040782360) + W(4, 1041497612, 1018224530, 1041814811, -1086401302) + W(5, -1115623160, 1032344226, -1127070442, -1131398580) + W(6, -1138339988, 996055791, 1014351349, -1082085657) + W(7, -1096882594, -1105145572, -1110953740, -1110890258) + W(8, 1035615616, 1027711272, 1040756601, 1048515830) + W(9, 1042139693, 1038148425, 1009997960, 1029718722) + W(10, 1020629658, -1123213919, 1032965474, -1129047376) + W(11, -1132054806, 1033289129, -1128695798, 1029252236); sum2 = W(0, 1008350928, 1011413600, -1120704007, 1027382950) + W(1, -1127381517, 981904616, 1015535284, -1135583391) + W(2, -1118714967, 1024023545, -1126562655, 1034951721) + W(3, -1128407708, -1126139913, 999360844, -1138986423) + W(4, 1030231193, -1124778398, -1110351493, 1052754126) + W(5, 1033272406, -1151802072, 1013432992, -1123376913) + W(6, -1091889801, -1098641764, -1104262464, 1057762368) + W(7, -1161878816, 1012148800, 1024540201, 1031971990) + W(8, -1108897465, 1033453825, -1106889474, 1035809187) + W(9, -1132306565, -1153180738, 1014625240, -1118108772) + W(10, 1032606711, -1125648711, 1025836907, -1131934955) + W(11, -1134699254, -1131180247, -1171757551, -1140791326); WS(-1099299320, 1056598066); - sum1 = W(0, 1006197652, -1128153660, -1136164796, 1035903322) + W(1, -1130396894, 1036246880, -1124832314, 1029570879) + W(2, -1127547996, 1041714651, 1029712499, -1128532088) + W(3, -1106761055, -1121593759, 1017604226, 1017990507) + W(4, -1106221946, -1098261544, -1085896508, 1057218165) + W(5, 1054525467, 1028130044, 1036683414, 1016287010) + W(6, 1045972807, 1038366195, 1057638589, 1054413180) + W(7, -1086573266, -1090476168, -1101308500, -1110319406) + W(8, 1004824505, -1113181968, -1113770157, 1038963911) + W(9, 1033084478, 1042818602, -1130963634, -1114724733) + W(10, -1119658116, 1030219574, -1129447670, 1033715440) + W(11, -1128924420, -1116035017, 1030841394, 998207538); sum2 = W(0, -1131923124, 993280665, -1127811288, 1032538133) + W(1, 992174233, 1002762140, 1015088121, 992586073) + W(2, 1046772351, 1028557303, -1107794670, -1098564467) + W(3, -1121892222, 1035775413, -1158807761, 1019898989) + W(4, -1110971235, -1134148898, 1043137579, 1067961229) + W(5, -1095277951, -1099141736, 1011333246, -1118039147) + W(6, -1105011941, -1104446514, -1096504482, 1066594258) + W(7, -1107079502, -1090299346, -1132433157, -1109952821) + W(8, 1028001687, -1147560296, -1101129935, -1104613803) + W(9, -1097683180, -1129031039, -1108888578, 1040568125) + W(10, -1121147281, 1024426003, 1034898623, -1136832302) + W(11, 1036381319, 1009264906, 1032287279, -1111357043); WS(-1105186296, -1108555742); - sum1 = W(0, 1040483623, -1101386974, 1040816739, -1134311356) + W(1, -1108915605, 1046011615, -1102638726, 1034864637) + W(2, -1105046378, 1038429012, 1031828819, -1107675004) + W(3, 1034430779, -1109709717, 1030173660, -1130419297) + W(4, -1118442722, 1042743759, -1093958439, 1030110546) + W(5, 1044698927, -1103097566, 1041159370, -1111922822) + W(6, 1040666374, -1109750289, 1050019912, 1040526743) + W(7, -1097473172, 1045974257, -1110364069, 1017242540) + W(8, 986662468, -1103538071, 1033262923, -1121451017) + W(9, 1032745156, 1041021277, -1106175813, 1032931450) + W(10, -1118942975, 1040611956, -1118385334, 1012951144) + W(11, 1035127903, -1100748433, 1037154391, -1121991998); sum2 = W(0, -1127162070, -1135271108, 1003505825, 1012864633) + W(1, -1123384038, 1026898060, -1134649836, -1125848258) + W(2, 1014979733, -1136185376, 1028363168, -1131143608) + W(3, 1033256022, -1133517476, -1120532892, 1028008335) + W(4, -1114518101, 1038675289, -1105093650, -1095849351) + W(5, 1028346583, -1113528611, 1028725832, -1117113572) + W(6, 1016203776, -1140478504, -1089585970, -1070239185) + W(7, -1078361549, 1038794260, -1125525718, 1005296645) + W(8, 1035296562, -1111853852, 1058969759, 1079096535) + W(9, 1068193425, 1032636395, 1017911666, 1032298564) + W(10, -1117887634, 1030827904, -1123277259, -1116960146) + W(11, -1125445846, -1113311282, 991223090, -1134431064); WS(1065442623, 1015025160); - sum1 = W(0, -1115365041, 1040496438, -1114325264, 1036061554) + W(1, 1038370800, -1112496600, 1039123015, -1120668655) + W(2, -1113215535, 1031517084, -1109245393, 1002812769) + W(3, 1036804053, -1099928488, 1036473513, -1116761110) + W(4, 1018550702, 1044423084, -1131936399, 1034975748) + W(5, 1048106991, -1107673238, 1041344487, 1019812354) + W(6, 983382403, 1017412396, -1109909072, -1099246196) + W(7, -1114225707, -1129165678, 1011581692, -1138585900) + W(8, -1103693946, 1032083208, -1106995843, 1024898238) + W(9, 1044531927, -1096349707, 1043676418, -1110354101) + W(10, -1124004468, 1040563090, -1120263275, 1012000060) + W(11, 1036150809, -1113013165, 1037117105, -1128538214); sum2 = W(0, 1010698941, 1042699596, 1048659418, 1058578114) + W(1, 1052277576, 1019470633, 1041036392, -1119471849) + W(2, 1023126843, -1140656688, -1095653758, -1081184698) + W(3, -1097586600, -1098953045, 1018118885, -1122826054) + W(4, -1106210958, 1029081919, -1118272649, 1060468587) + W(5, 1043044817, -1108654451, 1029721267, -1130901998) + W(6, -1122624743, 1040813712, -1097694847, 996865603) + W(7, -1103459725, -1128239326, 1034766857, 1000734433) + W(8, -1122034237, -1174961495, 1033474214, -1102355565) + W(9, 1049205466, -1112376452, 1033989555, 1017824567) + W(10, -1117968485, 1039098482, -1118912655, 1020119257) + W(11, 1031051049, -1108144089, 1038110136, -1114988670); WS(-1115864032, 1039792746); - sum1 = W(0, -1140468214, 996096400, 1010495721, -1114956320) + W(1, 1013460969, -1122072970, -1127833232, -1140762151) + W(2, -1121375417, -1108218204, -1109130991, -1115276388) + W(3, 1040789857, -1114444905, 1026520268, -1127271361) + W(4, 1034497081, 1033625894, 1055392502, 1036829073) + W(5, -1088431348, 1030275655, -1111886108, 1031218972) + W(6, 1039608980, 1040179481, -1093805194, 1050653566) + W(7, 1049263376, 1051648994, -1121903450, -1112576115) + W(8, -1113171393, -1108192053, -1127063626, -1109643980) + W(9, -1124542831, -1129660583, 1034831154, 1019975176) + W(10, -1132620605, 1018030914, 1012649753, -1127455572) + W(11, -1135717173, -1147064482, -1114505023, -1135014713); sum2 = W(0, 1015385693, 1020691269, 1015565489, 1019337403) + W(1, -1132207064, -1131086480, -1163542113, 1010427910) + W(2, -1115435752, -1141202058, -1111436746, 1040267853) + W(3, -1114623303, 1025609602, -1140470334, -1128429973) + W(4, 1035175215, -1117300235, 1033401267, -1100581516) + W(5, 1041259906, -1101351817, 1020278145, 1027629029) + W(6, 1017441239, -1122675471, 1055273933, 1073306798) + W(7, -1073173797, 1032997632, -1122237085, -1119509251) + W(8, -1112773411, 1031000417, -1101083444, 1046685605) + W(9, -1115311051, -1113329498, 1031244051, 1033297372) + W(10, 998363696, 1016207247, -1113092041, 1013448250) + W(11, -1130370856, 1026870685, -1124843109, 1014260054); WS(1060418110, -1122066101); - sum1 = W(0, 1031696780, -1124029560, -1147879495, -1123330286) + W(1, -1137787811, 1025904121, -1122010711, -1131825009) + W(2, -1115430516, -1129767372, 1033383950, 1032201307) + W(3, -1124043956, -1135655378, 1016436406, 1011740349) + W(4, -1104465800, -1124342772, -1084846132, 1040589380) + W(5, 1059143665, 1047669789, 1032251443, 1017074582) + W(6, 1041576533, 1035204176, 1057835449, -1121351835) + W(7, -1085338500, 1026196737, -1113520196, 1023559701) + W(8, -1122042801, -1109435772, -1106791333, 1025265863) + W(9, 1037079395, 1031201378, -1120875027, 1024540211) + W(10, 1028261758, 1025573168, 1024098613, 1000233671) + W(11, 1020968696, -1122702819, 1016849816, -1123483709); sum2 = W(0, -1127836624, -1173496113, 1027284157, 1022886010) + W(1, 1036751625, -1124242822, 1017945890, -1140181115) + W(2, 1033364881, -1127629564, -1104916712, -1097350377) + W(3, -1105423640, 1034826035, -1122986652, 1033442485) + W(4, -1101679270, -1105780675, -1113070309, 1061873799) + W(5, -1096904559, -1105550966, -1127473876, -1108824717) + W(6, 1023580345, -1119640853, -1132739235, 1062616603) + W(7, 1009507187, -1098447862, -1116534057, -1115327874) + W(8, -1149619356, 1033890881, -1113766541, -1112358042) + W(9, -1119315178, 1045500652, 1015952658, 1033458469) + W(10, -1136662275, -1122666983, 1023707939, 1028952789) + W(11, -1117384012, -1137245711, -1136860123, 1001899534); WS(1052277756, 1024619064); - sum1 = W(0, 1029425189, -1155801216, 1030252033, -1112675811) + W(1, 1025211073, -1122395345, 994289396, -1124711713) + W(2, -1120340480, -1107014752, 1038469578, 1032327591) + W(3, 1034534022, 1039704040, -1137717581, 1045346698) + W(4, 1055903799, 1049346130, 1057427995, -1103272826) + W(5, 1040656272, -1111436999, 1016775164, -1134653325) + W(6, -1093981294, -1099678446, -1098793998, -1090697510) + W(7, -1103403378, 1041606598, 1005246730, 1013960373) + W(8, -1125235986, 1003141514, 1025012844, -1118748640) + W(9, 1025335343, -1106833215, 1029370155, -1099652851) + W(10, 1026200342, -1131584400, 1031806855, -1126277858) + W(11, 1010568017, 1032770798, -1124454216, 1034190187); sum2 = W(0, 1023289146, 1020437582, 1009748873, 1030199907) + W(1, 1007345789, 1010063617, 1018930146, -1127799210) + W(2, 1045862572, 1028786959, -1120652794, 1015917080) + W(3, -1117660774, -1106003069, 1039304637, -1098471935) + W(4, -1095566322, -1132354231, -1090793815, 1063879108) + W(5, -1107315270, 1045942315, -1106431667, 1048572904) + W(6, -1087461476, -1101737384, -1083542229, 1069217208) + W(7, 1056862945, -1153692567, -1111757292, 1056217861) + W(8, 1016132472, -1127607289, -1107157844, -1097091848) + W(9, -1098187278, -1116485608, 1036040005, -1094693884) + W(10, 1044890152, -1124440021, 1037463572, 1032502373) + W(11, 1028169917, -1108513707, 1034190881, -1108016950); WS(-1086218302, -1084258561); - sum1 = W(0, 1027347742, 1021408948, -1127633782, 1039366096) + W(1, -1116749595, 1033396118, -1139872292, 1025066091) + W(2, 1041008695, 1036310380, 1035524367, 1048593117) + W(3, -1138037400, 1045683171, 1032120647, 1046369740) + W(4, 1040638149, 1041666443, 1033011818, 1050103608) + W(5, -1111907839, -1097626581, -1099416246, -1095221944) + W(6, -1088163117, -1103354245, -1094269524, -1122175629) + W(7, -1096387988, -1103712029, -1111740158, -1117250695) + W(8, 1045697689, 1033383494, 1040895122, 1042904478) + W(9, -1136050786, 1018682217, -1126758410, -1121047845) + W(10, -1131589088, 1033326017, -1132088142, 1042877939) + W(11, -1131538396, 1039834587, 1030901630, 1034112696); sum2 = W(0, -1116225927, 1030773056, -1122365237, 1032148531) + W(1, -1127594672, 1026422036, 1023835916, -1131330830) + W(2, 1020583173, 1018516207, -1121497633, 1032012136) + W(3, -1111134479, -1112679629, -1123177144, -1112996078) + W(4, -1120289762, -1142227828, 1024002380, 1050603076) + W(5, 1040110681, -1109379336, -1102178993, -1102999759) + W(6, -1116630862, -1157244503, 1045806125, 1061490298) + W(7, -1104925089, -1096282325, -1103628682, -1116593585) + W(8, 1037488437, -1120616119, -1122845130, 1029135782) + W(9, -1122119114, 1035818151, -1118358279, 1024073644) + W(10, -1120774706, 1021325617, -1124264052, 1020114521) + W(11, -1129316292, -1129796491, 1024324894, -1129630471); WS(-1092406524, -1089571522); return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0); } -const static float2x3 rgb2uv = { - -0.169, -0.331, 0.5, - 0.5, -0.419, -0.081 -}; +shared float inp[555]; -const static float3x3 yuv2rgb = { - 1, -0.00093, 1.401687, - 1, -0.3437, -0.71417, - 1, 1.77216, 0.00099 -}; +#define CURRENT_PASS 2 -groupshared float inp[555]; +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} -void Pass2(uint2 blockStart, uint3 threadId) { - const float2 inputPt = GetInputPt(); - const float2 outputPt = GetOutputPt(); +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); - const uint2 group_base = uint2(blockStart.x >> 1, blockStart.y); - for (int id = threadId.x * MP_NUM_THREADS_Y + threadId.y; id < 555; id += MP_NUM_THREADS_X * MP_NUM_THREADS_Y) { - uint x = (uint)id / 15, y = (uint)id % 15; - inp[id] = tex1.SampleLevel(sam, inputPt * float2(group_base.x + x - 2 + 0.5, (group_base.y + y - 3 + 0.5) * 0.5), 0).r; - } +#define temp_tex(pos) (float(texture(temp, pos).x)) +static const float2 temp_size = float2(GetInputSize().x * 1, GetInputSize().y * 2); +static const float2 temp_pt = float2(1.0 / (temp_size.x), 1.0 / (temp_size.y)); - GroupMemoryBarrierWithGroupSync(); +#define HOOKED_tex(pos) temp_tex(pos) +#define HOOKED_size temp_size +#define HOOKED_pt temp_pt - uint2 destPos = blockStart + uint2(threadId.x * 2, threadId.y); - if (!CheckViewport(destPos)) { - return; +void Pass2(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 555; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 15, y = (uint)id % 15; + inp[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (2)) + 0.5, float(group_base.y + y - (3)) + 0.5)).x; } - - float4 ret = 0.0; - float4 ret0 = 0.0; - float4 samples[12]; - const uint local_pos = threadId.x * 15 + threadId.y; + barrier(); + vec4 ret = vec4(0.0, 0.0, 0.0, 0.0); + vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 samples[12]; samples[0][0] = inp[local_pos + 0]; samples[0][1] = inp[local_pos + 1]; samples[0][2] = inp[local_pos + 2]; @@ -370,15 +2990,15 @@ void Pass2(uint2 blockStart, uint3 threadId) { samples[11][1] = inp[local_pos + 80]; samples[11][2] = inp[local_pos + 81]; samples[11][3] = inp[local_pos + 82]; - - float2 originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, outputPt * (destPos + 0.5f), 0).rgb); - WriteToOutput(destPos, mul(yuv2rgb, float3(samples[4][3], originUV))); - - ++destPos.x; - if (!CheckViewport(destPos)) { + ret[0] = nnedi3(samples); + ret0[0] = inp[local_pos + 33]; +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { return; } - - originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, outputPt * (destPos + 0.5f), 0).rgb); - WriteToOutput(destPos, mul(yuv2rgb, float3(nnedi3(samples), originUV))); +#endif + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1), ret0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1) + ivec2(1, 0), ret); } diff --git a/src/Effects/NNEDI3/prescalers.hlsli b/src/Effects/NNEDI3/prescalers.hlsli new file mode 100644 index 000000000..e81e3918a --- /dev/null +++ b/src/Effects/NNEDI3/prescalers.hlsli @@ -0,0 +1,73 @@ +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +// Conversion from GLSL to HLSL is done through defines as much as possible to ease synchronization and comparison with upstream +#define ivec2 int2 + +#define vec2 float2 +#define vec3 float3 +#define vec4 float4 + +#define mat4x3 float4x3 +#define matrixCompMult(mtx1, mtx2) (mtx1 * mtx2) + +#define shared groupshared + +#define atan atan2 +#define barrier GroupMemoryBarrierWithGroupSync +#define fract frac +#define intBitsToFloat asfloat +#define inversesqrt rsqrt +// mod deals only with positive numbers here and it could be substituted by fmod +#define mod fmod + +// lerp handles bools as the third argument differently from mix +float mix(float a, float b, bool c) { + return c ? b : a; +} + +#define MIX_LERP(type1, type3) type1 mix(type1 a, type1 b, type3 c) { return lerp(a, b, c); } +MIX_LERP(float, float) +MIX_LERP(float2, float2) +MIX_LERP(float3, float) +MIX_LERP(float4, float) + +#define texture(tex, pos) tex.SampleLevel(sam_##tex, pos, 0.0) + +#define OUTPUT_pt float2(GetOutputPt()) +#define frag_pos(id) (vec2(id) + vec2(0.5, 0.5)) +#define frag_map(id) (OUTPUT_pt * frag_pos(id)) +#define HOOKED_map(id) frag_map(id) + +#define gl_LocalInvocationIndex (threadId.y*MP_NUM_THREADS_X + threadId.x) +#define gl_LocalInvocationID threadId +#define gl_WorkGroupSize (uint2(MP_NUM_THREADS_X, MP_NUM_THREADS_Y)) +#define gl_WorkGroupID (blockStart / uint2(MP_BLOCK_WIDTH, MP_BLOCK_HEIGHT)) +#define gl_GlobalInvocationID (gl_WorkGroupID*gl_WorkGroupSize + threadId.xy) + +// disable warning about unknown pragma +#pragma warning(disable: 3568) +// disable warning about too many threads (ravu-r4-rgb triggers it) +#pragma warning(disable: 4714) + +// https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.709-6-201506-I!!PDF-E.pdf +static const float3 rgb2y = float3(0.2126, 0.7152, 0.0722); +static const float2x3 rgb2uv = { + -0.2126/1.8556, -0.7152/1.8556, 0.9278/1.8556, + 0.7874/1.5748, -0.7152/1.5748, -0.0722/1.5748 +}; +static const float3x3 yuv2rgb = { + 1, 0, 1.5748, + 1, -0.187324, -0.468124, + 1, 1.8556, 0 +}; diff --git a/src/Effects/Nearest.hlsl b/src/Effects/Nearest.hlsl index 15cc67754..702119f3c 100644 --- a/src/Effects/Nearest.hlsl +++ b/src/Effects/Nearest.hlsl @@ -1,20 +1,20 @@ //!MAGPIE EFFECT -//!VERSION 3 -//!GENERIC_DOWNSCALER - +//!VERSION 4 //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; - //!PASS 1 //!STYLE PS //!IN INPUT - +//!OUT OUTPUT float4 Pass1(float2 pos) { return INPUT.SampleLevel(sam, pos, 0); } diff --git a/src/Effects/Pixel Art/MMPX.hlsl b/src/Effects/Pixel Art/MMPX.hlsl index 736045a42..6bce8b6df 100644 --- a/src/Effects/Pixel Art/MMPX.hlsl +++ b/src/Effects/Pixel Art/MMPX.hlsl @@ -2,14 +2,17 @@ // 移植自 https://casual-effects.com/research/McGuire2021PixelArt/index.html //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 +//!VERSION 4 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -17,118 +20,113 @@ SamplerState sam; //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 - #define src(x, y) INPUT.SampleLevel(sam, float2(x, y) * GetInputPt(), 0).rgb float luma(float3 C) { - return C.r + C.g + C.b; + return C.r + C.g + C.b; } bool all_eq2(float3 B, float3 A0, float3 A1) { - return all(B == A0) && all(B == A1); + return all(B == A0) && all(B == A1); } bool all_eq3(float3 B, float3 A0, float3 A1, float3 A2) { - return all(B == A0) && all(B == A1) && all(B == A2); + return all(B == A0) && all(B == A1) && all(B == A2); } bool all_eq4(float3 B, float3 A0, float3 A1, float3 A2, float3 A3) { - return all(B == A0) && all(B == A1) && all(B == A2) && all(B == A3); + return all(B == A0) && all(B == A1) && all(B == A2) && all(B == A3); } bool any_eq3(float3 B, float3 A0, float3 A1, float3 A2) { - return all(B == A0) || all(B == A1) || all(B == A2); + return all(B == A0) || all(B == A1) || all(B == A2); } bool none_eq2(float3 B, float3 A0, float3 A1) { - return any(B != A0) && any(B != A1); + return any(B != A0) && any(B != A1); } bool none_eq4(float3 B, float3 A0, float3 A1, float3 A2, float3 A3) { - return any(B != A0) && any(B != A1) && any(B != A2) && any(B != A3); + return any(B != A0) && any(B != A1) && any(B != A2) && any(B != A3); } void Pass1(uint2 blockStart, uint3 threadId) { - uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - - if (!CheckViewport(gxy)) { - return; - } - - float srcX = (gxy.x >> 1) + 0.5f; - float srcY = (gxy.y >> 1) + 0.5f; - - float3 A = src(srcX - 1, srcY - 1), B = src(srcX, srcY - 1), C = src(srcX + 1, srcY - 1); - float3 D = src(srcX - 1, srcY + 0), E = src(srcX, srcY + 0), F = src(srcX + 1, srcY + 0); - float3 G = src(srcX - 1, srcY + 1), H = src(srcX, srcY + 1), I = src(srcX + 1, srcY + 1); - - float3 J = E, K = E, L = E, M = E; - - if (any(A != E) || any(B != E) || any(C != E) || any(D != E) || any(F != E) || any(G != E) || any(H != E) || any(I != E)) { - float3 P = src(srcX, srcY - 2), S = src(srcX, srcY + 2); - float3 Q = src(srcX - 2, srcY), R = src(srcX + 2, srcY); - float Bl = luma(B), Dl = luma(D), El = luma(E), Fl = luma(F), Hl = luma(H); - - // 1:1 slope rules - if ((all(D == B) && any(D != H) && any(D != F)) && (El >= Dl || all(E == A)) && any_eq3(E, A, C, G) && ((El < Dl) || any(A != D) || any(E != P) || any(E != Q))) J = D; - if ((all(B == F) && any(B != D) && any(B != H)) && (El >= Bl || all(E == C)) && any_eq3(E, A, C, I) && ((El < Bl) || any(C != B) || any(E != P) || any(E != R))) K = B; - if ((all(H == D) && any(H != F) && any(H != B)) && (El >= Hl || all(E == G)) && any_eq3(E, A, G, I) && ((El < Hl) || any(G != H) || any(E != S) || any(E != Q))) L = H; - if ((all(F == H) && any(F != B) && any(F != D)) && (El >= Fl || all(E == I)) && any_eq3(E, C, G, I) && ((El < Fl) || any(I != H) || any(E != R) || any(E != S))) M = F; - - // Intersection rules - if ((any(E != F) && all_eq4(E, C, I, D, Q) && all_eq2(F, B, H)) && (any(F != src(srcX + 3, srcY)))) K = M = F; - if ((any(E != D) && all_eq4(E, A, G, F, R) && all_eq2(D, B, H)) && (any(D != src(srcX - 3, srcY)))) J = L = D; - if ((any(E != H) && all_eq4(E, G, I, B, P) && all_eq2(H, D, F)) && (any(H != src(srcX, srcY + 3)))) L = M = H; - if ((any(E != B) && all_eq4(E, A, C, H, S) && all_eq2(B, D, F)) && (any(B != src(srcX, srcY - 3)))) J = K = B; - if (Bl < El && all_eq4(E, G, H, I, S) && none_eq4(E, A, D, C, F)) J = K = B; - if (Hl < El && all_eq4(E, A, B, C, P) && none_eq4(E, D, G, I, F)) L = M = H; - if (Fl < El && all_eq4(E, A, D, G, Q) && none_eq4(E, B, C, I, H)) K = M = F; - if (Dl < El && all_eq4(E, C, F, I, R) && none_eq4(E, B, A, G, H)) J = L = D; - - // 2:1 slope rules - if (any(H != B)) { - if (any(H != A) && any(H != E) && any(H != C)) { - if (all_eq3(H, G, F, R) && none_eq2(H, D, src(srcX + 2, srcY - 1))) L = M; - if (all_eq3(H, I, D, Q) && none_eq2(H, F, src(srcX - 2, srcY - 1))) M = L; - } - - if (any(B != I) && any(B != G) && any(B != E)) { - if (all_eq3(B, A, F, R) && none_eq2(B, D, src(srcX + 2, srcY + 1))) J = K; - if (all_eq3(B, C, D, Q) && none_eq2(B, F, src(srcX - 2, srcY + 1))) K = J; - } - } // H !== B - - if (any(F != D)) { - if (any(D != I) && any(D != E) && any(D != C)) { - if (all_eq3(D, A, H, S) && none_eq2(D, B, src(srcX + 1, srcY + 2))) J = L; - if (all_eq3(D, G, B, P) && none_eq2(D, H, src(srcX + 1, srcY - 2))) L = J; - } - - if (any(F != E) && any(F != A) && any(F != G)) { - if (all_eq3(F, C, H, S) && none_eq2(F, B, src(srcX - 1, srcY + 2))) K = M; - if (all_eq3(F, I, B, P) && none_eq2(F, H, src(srcX - 1, srcY - 2))) M = K; - } - } // F !== D - } // not constant - - // Write four pixels at once - WriteToOutput(gxy, J); - - ++gxy.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, K); - } - - ++gxy.y; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, M); - } - - --gxy.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, L); - } + uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { + return; + } + + float srcX = (gxy.x >> 1) + 0.5f; + float srcY = (gxy.y >> 1) + 0.5f; + + float3 A = src(srcX - 1, srcY - 1), B = src(srcX, srcY - 1), C = src(srcX + 1, srcY - 1); + float3 D = src(srcX - 1, srcY + 0), E = src(srcX, srcY + 0), F = src(srcX + 1, srcY + 0); + float3 G = src(srcX - 1, srcY + 1), H = src(srcX, srcY + 1), I = src(srcX + 1, srcY + 1); + + float3 J = E, K = E, L = E, M = E; + + if (any(A != E) || any(B != E) || any(C != E) || any(D != E) || any(F != E) || any(G != E) || any(H != E) || any(I != E)) { + float3 P = src(srcX, srcY - 2), S = src(srcX, srcY + 2); + float3 Q = src(srcX - 2, srcY), R = src(srcX + 2, srcY); + float Bl = luma(B), Dl = luma(D), El = luma(E), Fl = luma(F), Hl = luma(H); + + // 1:1 slope rules + if ((all(D == B) && any(D != H) && any(D != F)) && (El >= Dl || all(E == A)) && any_eq3(E, A, C, G) && ((El < Dl) || any(A != D) || any(E != P) || any(E != Q))) J = D; + if ((all(B == F) && any(B != D) && any(B != H)) && (El >= Bl || all(E == C)) && any_eq3(E, A, C, I) && ((El < Bl) || any(C != B) || any(E != P) || any(E != R))) K = B; + if ((all(H == D) && any(H != F) && any(H != B)) && (El >= Hl || all(E == G)) && any_eq3(E, A, G, I) && ((El < Hl) || any(G != H) || any(E != S) || any(E != Q))) L = H; + if ((all(F == H) && any(F != B) && any(F != D)) && (El >= Fl || all(E == I)) && any_eq3(E, C, G, I) && ((El < Fl) || any(I != H) || any(E != R) || any(E != S))) M = F; + + // Intersection rules + if ((any(E != F) && all_eq4(E, C, I, D, Q) && all_eq2(F, B, H)) && (any(F != src(srcX + 3, srcY)))) K = M = F; + if ((any(E != D) && all_eq4(E, A, G, F, R) && all_eq2(D, B, H)) && (any(D != src(srcX - 3, srcY)))) J = L = D; + if ((any(E != H) && all_eq4(E, G, I, B, P) && all_eq2(H, D, F)) && (any(H != src(srcX, srcY + 3)))) L = M = H; + if ((any(E != B) && all_eq4(E, A, C, H, S) && all_eq2(B, D, F)) && (any(B != src(srcX, srcY - 3)))) J = K = B; + if (Bl < El && all_eq4(E, G, H, I, S) && none_eq4(E, A, D, C, F)) J = K = B; + if (Hl < El && all_eq4(E, A, B, C, P) && none_eq4(E, D, G, I, F)) L = M = H; + if (Fl < El && all_eq4(E, A, D, G, Q) && none_eq4(E, B, C, I, H)) K = M = F; + if (Dl < El && all_eq4(E, C, F, I, R) && none_eq4(E, B, A, G, H)) J = L = D; + + // 2:1 slope rules + if (any(H != B)) { + if (any(H != A) && any(H != E) && any(H != C)) { + if (all_eq3(H, G, F, R) && none_eq2(H, D, src(srcX + 2, srcY - 1))) L = M; + if (all_eq3(H, I, D, Q) && none_eq2(H, F, src(srcX - 2, srcY - 1))) M = L; + } + + if (any(B != I) && any(B != G) && any(B != E)) { + if (all_eq3(B, A, F, R) && none_eq2(B, D, src(srcX + 2, srcY + 1))) J = K; + if (all_eq3(B, C, D, Q) && none_eq2(B, F, src(srcX - 2, srcY + 1))) K = J; + } + } // H !== B + + if (any(F != D)) { + if (any(D != I) && any(D != E) && any(D != C)) { + if (all_eq3(D, A, H, S) && none_eq2(D, B, src(srcX + 1, srcY + 2))) J = L; + if (all_eq3(D, G, B, P) && none_eq2(D, H, src(srcX + 1, srcY - 2))) L = J; + } + + if (any(F != E) && any(F != A) && any(F != G)) { + if (all_eq3(F, C, H, S) && none_eq2(F, B, src(srcX - 1, srcY + 2))) K = M; + if (all_eq3(F, I, B, P) && none_eq2(F, H, src(srcX - 1, srcY - 2))) M = K; + } + } // F !== D + } // not constant + + // Write four pixels at once + OUTPUT[gxy] = float4(J, 1); + + ++gxy.x; + OUTPUT[gxy] = float4(K, 1); + + ++gxy.y; + OUTPUT[gxy] = float4(M, 1); + + --gxy.x; + OUTPUT[gxy] = float4(L, 1); } diff --git a/src/Effects/Pixel Art/Pixellate.hlsl b/src/Effects/Pixel Art/Pixellate.hlsl index fa972743e..ded35889c 100644 --- a/src/Effects/Pixel Art/Pixellate.hlsl +++ b/src/Effects/Pixel Art/Pixellate.hlsl @@ -1,12 +1,15 @@ // 移植自 https://github.com/libretro/common-shaders/blob/master/interpolation/shaders/pixellate.cg //!MAGPIE EFFECT -//!VERSION 3 +//!VERSION 4 //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -15,6 +18,7 @@ SamplerState sam; //!PASS 1 //!STYLE PS //!IN INPUT +//!OUT OUTPUT float4 Pass1(float2 pos) { float2 texelSize = GetInputPt(); diff --git a/src/Effects/Pixel Art/SharpBilinear.hlsl b/src/Effects/Pixel Art/SharpBilinear.hlsl index 8e58060db..de3dd3ed4 100644 --- a/src/Effects/Pixel Art/SharpBilinear.hlsl +++ b/src/Effects/Pixel Art/SharpBilinear.hlsl @@ -1,12 +1,15 @@ // 移植自 https://github.com/libretro/common-shaders/blob/master/interpolation/shaders/sharp-bilinear.cg //!MAGPIE EFFECT -//!VERSION 3 +//!VERSION 4 //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; + //!SAMPLER //!FILTER LINEAR @@ -17,6 +20,7 @@ SamplerState sam; //!PASS 1 //!STYLE PS //!IN INPUT +//!OUT OUTPUT float4 Pass1(float2 pos) { float2 inputPt = GetInputPt(); diff --git a/src/Effects/RAVU/RAVU_3x_R2.hlsl b/src/Effects/RAVU/RAVU_3x_R2.hlsl new file mode 100644 index 000000000..c834cc986 --- /dev/null +++ b/src/Effects/RAVU/RAVU_3x_R2.hlsl @@ -0,0 +1,176 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-3x.py --target luma --weights-file weights\ravu-3x_weights-r2.py --float-format float16dx --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 3 +//!HEIGHT INPUT_HEIGHT * 3 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!SOURCE ravu_3x_lut2_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_3x_lut2; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_3x_lut2; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-3x (luma, r2) +//!IN INPUT, ravu_3x_lut2 +//!OUT OUTPUT +//!BLOCK_SIZE 96, 24 +//!NUM_THREADS 32, 8 +shared float inp[340]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_3x_lut2_tex(pos) (vec4(texture(ravu_3x_lut2, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 10 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 340; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 10, y = (uint)id % 10; + inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-0.5), float(group_base.y + y) + (-0.5))).x; + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 3; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + float luma0 = inp[local_pos + 0]; + float luma1 = inp[local_pos + 1]; + float luma2 = inp[local_pos + 2]; + float luma3 = inp[local_pos + 10]; + float luma4 = inp[local_pos + 11]; + float luma5 = inp[local_pos + 12]; + float luma6 = inp[local_pos + 20]; + float luma7 = inp[local_pos + 21]; + float luma8 = inp[local_pos + 22]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma3 - luma0); + gy = (luma1 - luma0); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (luma4 - luma1); + gy = (luma2 - luma0) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (luma5 - luma2); + gy = (luma2 - luma1); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (luma6 - luma0) / 2.0; + gy = (luma4 - luma3); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (luma7 - luma1) / 2.0; + gy = (luma5 - luma3) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833; + gx = (luma8 - luma2) / 2.0; + gy = (luma5 - luma4); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (luma6 - luma3); + gy = (luma7 - luma6); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (luma7 - luma4); + gy = (luma8 - luma6) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (luma8 - luma5); + gy = (luma8 - luma7); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.005), 2.0, lambda >= 0.02); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 3.0 + strength) * 3.0 + coherence + 0.5) / 216.0; + vec4 res0 = vec4(0.0, 0.0, 0.0, 0.0), res1 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 w0, w1; + w0 = texture(ravu_3x_lut2, vec2(0.05, coord_y)); + w1 = texture(ravu_3x_lut2, vec2(0.15, coord_y)); + res0 += luma0 * w0 + luma8 * w1.wzyx; + res1 += luma0 * w1 + luma8 * w0.wzyx; + w0 = texture(ravu_3x_lut2, vec2(0.25, coord_y)); + w1 = texture(ravu_3x_lut2, vec2(0.35, coord_y)); + res0 += luma1 * w0 + luma7 * w1.wzyx; + res1 += luma1 * w1 + luma7 * w0.wzyx; + w0 = texture(ravu_3x_lut2, vec2(0.45, coord_y)); + w1 = texture(ravu_3x_lut2, vec2(0.55, coord_y)); + res0 += luma2 * w0 + luma6 * w1.wzyx; + res1 += luma2 * w1 + luma6 * w0.wzyx; + w0 = texture(ravu_3x_lut2, vec2(0.65, coord_y)); + w1 = texture(ravu_3x_lut2, vec2(0.75, coord_y)); + res0 += luma3 * w0 + luma5 * w1.wzyx; + res1 += luma3 * w1 + luma5 * w0.wzyx; + w0 = texture(ravu_3x_lut2, vec2(0.85, coord_y)); + w1 = texture(ravu_3x_lut2, vec2(0.95, coord_y)); + res0 += luma4 * w0; + res1 += luma4 * w1; + res0 = clamp(res0, 0.0, 1.0); + res1 = clamp(res1, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 0), res0[0]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 1), res0[1]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 2), res0[2]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 0), res0[3]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 1), luma4); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 2), res1[0]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 0), res1[1]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 1), res1[2]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 2), res1[3]); +} diff --git a/src/Effects/RAVU/RAVU_3x_R2_RGB.hlsl b/src/Effects/RAVU/RAVU_3x_R2_RGB.hlsl new file mode 100644 index 000000000..94aed4dbf --- /dev/null +++ b/src/Effects/RAVU/RAVU_3x_R2_RGB.hlsl @@ -0,0 +1,180 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-3x.py --target rgb --weights-file weights\ravu-3x_weights-r2.py --float-format float16dx --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 3 +//!HEIGHT INPUT_HEIGHT * 3 +Texture2D OUTPUT; + +//!TEXTURE +//!SOURCE ravu_3x_lut2_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_3x_lut2; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_3x_lut2; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-3x (rgb, r2) +//!IN INPUT, ravu_3x_lut2 +//!OUT OUTPUT +//!BLOCK_SIZE 96, 24 +//!NUM_THREADS 32, 8 +static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722); +// HLSL doesn't have outerProduct +float4x3 outerProduct(float3 l, float4 r) { return mul(float4x1(r), float1x3(l)); } +shared vec3 inp[340]; +shared float inp_luma[340]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) x +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val) +void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_3x_lut2_tex(pos) (vec4(texture(ravu_3x_lut2, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 10 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 340; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 10, y = (uint)id % 10; + inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-0.5), float(group_base.y + y) + (-0.5))).xyz; + inp_luma[id] = dot(inp[id], color_primary); + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 3; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + float luma0 = inp_luma[local_pos + 0]; + float luma1 = inp_luma[local_pos + 1]; + float luma2 = inp_luma[local_pos + 2]; + float luma3 = inp_luma[local_pos + 10]; + float luma4 = inp_luma[local_pos + 11]; + float luma5 = inp_luma[local_pos + 12]; + float luma6 = inp_luma[local_pos + 20]; + float luma7 = inp_luma[local_pos + 21]; + float luma8 = inp_luma[local_pos + 22]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma3 - luma0); + gy = (luma1 - luma0); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (luma4 - luma1); + gy = (luma2 - luma0) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (luma5 - luma2); + gy = (luma2 - luma1); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (luma6 - luma0) / 2.0; + gy = (luma4 - luma3); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (luma7 - luma1) / 2.0; + gy = (luma5 - luma3) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833; + gx = (luma8 - luma2) / 2.0; + gy = (luma5 - luma4); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (luma6 - luma3); + gy = (luma7 - luma6); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (luma7 - luma4); + gy = (luma8 - luma6) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (luma8 - luma5); + gy = (luma8 - luma7); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.005), 2.0, lambda >= 0.02); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 3.0 + strength) * 3.0 + coherence + 0.5) / 216.0; + mat4x3 res0 = 0.0, res1 = 0.0; + vec4 w0, w1; + w0 = texture(ravu_3x_lut2, vec2(0.05, coord_y)); + w1 = texture(ravu_3x_lut2, vec2(0.15, coord_y)); + res0 += outerProduct(inp[local_pos + 0], w0) + outerProduct(inp[local_pos + 22], w1.wzyx); + res1 += outerProduct(inp[local_pos + 0], w1) + outerProduct(inp[local_pos + 22], w0.wzyx); + w0 = texture(ravu_3x_lut2, vec2(0.25, coord_y)); + w1 = texture(ravu_3x_lut2, vec2(0.35, coord_y)); + res0 += outerProduct(inp[local_pos + 1], w0) + outerProduct(inp[local_pos + 21], w1.wzyx); + res1 += outerProduct(inp[local_pos + 1], w1) + outerProduct(inp[local_pos + 21], w0.wzyx); + w0 = texture(ravu_3x_lut2, vec2(0.45, coord_y)); + w1 = texture(ravu_3x_lut2, vec2(0.55, coord_y)); + res0 += outerProduct(inp[local_pos + 2], w0) + outerProduct(inp[local_pos + 20], w1.wzyx); + res1 += outerProduct(inp[local_pos + 2], w1) + outerProduct(inp[local_pos + 20], w0.wzyx); + w0 = texture(ravu_3x_lut2, vec2(0.65, coord_y)); + w1 = texture(ravu_3x_lut2, vec2(0.75, coord_y)); + res0 += outerProduct(inp[local_pos + 10], w0) + outerProduct(inp[local_pos + 12], w1.wzyx); + res1 += outerProduct(inp[local_pos + 10], w1) + outerProduct(inp[local_pos + 12], w0.wzyx); + w0 = texture(ravu_3x_lut2, vec2(0.85, coord_y)); + w1 = texture(ravu_3x_lut2, vec2(0.95, coord_y)); + res0 += outerProduct(inp[local_pos + 11], w0); + res1 += outerProduct(inp[local_pos + 11], w1); + res0[0] = clamp(res0[0], 0.0, 1.0); + res0[1] = clamp(res0[1], 0.0, 1.0); + res0[2] = clamp(res0[2], 0.0, 1.0); + res0[3] = clamp(res0[3], 0.0, 1.0); + res1[0] = clamp(res1[0], 0.0, 1.0); + res1[1] = clamp(res1[1], 0.0, 1.0); + res1[2] = clamp(res1[2], 0.0, 1.0); + res1[3] = clamp(res1[3], 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 0), vec4(res0[0], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 1), vec4(res0[1], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 2), vec4(res0[2], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 0), vec4(res0[3], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 1), vec4(inp[local_pos + 11], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 2), vec4(res1[0], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 0), vec4(res1[1], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 1), vec4(res1[2], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 2), vec4(res1[3], 1.0)); +} diff --git a/src/Effects/RAVU/RAVU_3x_R3.hlsl b/src/Effects/RAVU/RAVU_3x_R3.hlsl new file mode 100644 index 000000000..e78eb9260 --- /dev/null +++ b/src/Effects/RAVU/RAVU_3x_R3.hlsl @@ -0,0 +1,224 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-3x.py --target luma --weights-file weights\ravu-3x_weights-r3.py --float-format float16dx --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 3 +//!HEIGHT INPUT_HEIGHT * 3 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!SOURCE ravu_3x_lut3_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_3x_lut3; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_3x_lut3; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-3x (luma, r3) +//!IN INPUT, ravu_3x_lut3 +//!OUT OUTPUT +//!BLOCK_SIZE 96, 24 +//!NUM_THREADS 32, 8 +shared float inp[432]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_3x_lut3_tex(pos) (vec4(texture(ravu_3x_lut3, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 12 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 432; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 12, y = (uint)id % 12; + inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))).x; + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 3; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + float luma0 = inp[local_pos + 0]; + float luma1 = inp[local_pos + 1]; + float luma2 = inp[local_pos + 2]; + float luma3 = inp[local_pos + 3]; + float luma4 = inp[local_pos + 4]; + float luma5 = inp[local_pos + 12]; + float luma6 = inp[local_pos + 13]; + float luma7 = inp[local_pos + 14]; + float luma8 = inp[local_pos + 15]; + float luma9 = inp[local_pos + 16]; + float luma10 = inp[local_pos + 24]; + float luma11 = inp[local_pos + 25]; + float luma12 = inp[local_pos + 26]; + float luma13 = inp[local_pos + 27]; + float luma14 = inp[local_pos + 28]; + float luma15 = inp[local_pos + 36]; + float luma16 = inp[local_pos + 37]; + float luma17 = inp[local_pos + 38]; + float luma18 = inp[local_pos + 39]; + float luma19 = inp[local_pos + 40]; + float luma20 = inp[local_pos + 48]; + float luma21 = inp[local_pos + 49]; + float luma22 = inp[local_pos + 50]; + float luma23 = inp[local_pos + 51]; + float luma24 = inp[local_pos + 52]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma11 - luma1) / 2.0; + gy = (luma7 - luma5) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (luma12 - luma2) / 2.0; + gy = (luma8 - luma6) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (luma13 - luma3) / 2.0; + gy = (luma9 - luma7) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (luma16 - luma6) / 2.0; + gy = (luma12 - luma10) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (luma17 - luma7) / 2.0; + gy = (luma13 - luma11) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833; + gx = (luma18 - luma8) / 2.0; + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (luma21 - luma11) / 2.0; + gy = (luma17 - luma15) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (luma22 - luma12) / 2.0; + gy = (luma18 - luma16) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (luma23 - luma13) / 2.0; + gy = (luma19 - luma17) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.005), 2.0, lambda >= 0.02); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 3.0 + strength) * 3.0 + coherence + 0.5) / 216.0; + vec4 res0 = vec4(0.0, 0.0, 0.0, 0.0), res1 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 w0, w1; + w0 = texture(ravu_3x_lut3, vec2(0.019230769230769232, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.057692307692307696, coord_y)); + res0 += luma0 * w0 + luma24 * w1.wzyx; + res1 += luma0 * w1 + luma24 * w0.wzyx; + w0 = texture(ravu_3x_lut3, vec2(0.09615384615384616, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.1346153846153846, coord_y)); + res0 += luma1 * w0 + luma23 * w1.wzyx; + res1 += luma1 * w1 + luma23 * w0.wzyx; + w0 = texture(ravu_3x_lut3, vec2(0.17307692307692307, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.21153846153846154, coord_y)); + res0 += luma2 * w0 + luma22 * w1.wzyx; + res1 += luma2 * w1 + luma22 * w0.wzyx; + w0 = texture(ravu_3x_lut3, vec2(0.25, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.28846153846153844, coord_y)); + res0 += luma3 * w0 + luma21 * w1.wzyx; + res1 += luma3 * w1 + luma21 * w0.wzyx; + w0 = texture(ravu_3x_lut3, vec2(0.3269230769230769, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.36538461538461536, coord_y)); + res0 += luma4 * w0 + luma20 * w1.wzyx; + res1 += luma4 * w1 + luma20 * w0.wzyx; + w0 = texture(ravu_3x_lut3, vec2(0.40384615384615385, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.4423076923076923, coord_y)); + res0 += luma5 * w0 + luma19 * w1.wzyx; + res1 += luma5 * w1 + luma19 * w0.wzyx; + w0 = texture(ravu_3x_lut3, vec2(0.4807692307692308, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.5192307692307693, coord_y)); + res0 += luma6 * w0 + luma18 * w1.wzyx; + res1 += luma6 * w1 + luma18 * w0.wzyx; + w0 = texture(ravu_3x_lut3, vec2(0.5576923076923077, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.5961538461538461, coord_y)); + res0 += luma7 * w0 + luma17 * w1.wzyx; + res1 += luma7 * w1 + luma17 * w0.wzyx; + w0 = texture(ravu_3x_lut3, vec2(0.6346153846153846, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.6730769230769231, coord_y)); + res0 += luma8 * w0 + luma16 * w1.wzyx; + res1 += luma8 * w1 + luma16 * w0.wzyx; + w0 = texture(ravu_3x_lut3, vec2(0.7115384615384616, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.75, coord_y)); + res0 += luma9 * w0 + luma15 * w1.wzyx; + res1 += luma9 * w1 + luma15 * w0.wzyx; + w0 = texture(ravu_3x_lut3, vec2(0.7884615384615384, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.8269230769230769, coord_y)); + res0 += luma10 * w0 + luma14 * w1.wzyx; + res1 += luma10 * w1 + luma14 * w0.wzyx; + w0 = texture(ravu_3x_lut3, vec2(0.8653846153846154, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.9038461538461539, coord_y)); + res0 += luma11 * w0 + luma13 * w1.wzyx; + res1 += luma11 * w1 + luma13 * w0.wzyx; + w0 = texture(ravu_3x_lut3, vec2(0.9423076923076923, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.9807692307692307, coord_y)); + res0 += luma12 * w0; + res1 += luma12 * w1; + res0 = clamp(res0, 0.0, 1.0); + res1 = clamp(res1, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 0), res0[0]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 1), res0[1]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 2), res0[2]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 0), res0[3]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 1), luma12); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 2), res1[0]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 0), res1[1]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 1), res1[2]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 2), res1[3]); +} diff --git a/src/Effects/RAVU/RAVU_3x_R3_RGB.hlsl b/src/Effects/RAVU/RAVU_3x_R3_RGB.hlsl new file mode 100644 index 000000000..9b2c05257 --- /dev/null +++ b/src/Effects/RAVU/RAVU_3x_R3_RGB.hlsl @@ -0,0 +1,224 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-3x.py --target rgb --weights-file weights\ravu-3x_weights-r3.py --float-format float16dx --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 3 +//!HEIGHT INPUT_HEIGHT * 3 +Texture2D OUTPUT; + +//!TEXTURE +//!SOURCE ravu_3x_lut3_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_3x_lut3; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_3x_lut3; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-3x (rgb, r3) +//!IN INPUT, ravu_3x_lut3 +//!OUT OUTPUT +//!BLOCK_SIZE 96, 24 +//!NUM_THREADS 32, 8 +static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722); +// HLSL doesn't have outerProduct +float4x3 outerProduct(float3 l, float4 r) { return mul(float4x1(r), float1x3(l)); } +shared vec3 inp[432]; +shared float inp_luma[432]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) x +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val) +void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_3x_lut3_tex(pos) (vec4(texture(ravu_3x_lut3, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 12 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 432; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 12, y = (uint)id % 12; + inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))).xyz; + inp_luma[id] = dot(inp[id], color_primary); + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 3; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + float luma1 = inp_luma[local_pos + 1]; + float luma2 = inp_luma[local_pos + 2]; + float luma3 = inp_luma[local_pos + 3]; + float luma5 = inp_luma[local_pos + 12]; + float luma6 = inp_luma[local_pos + 13]; + float luma7 = inp_luma[local_pos + 14]; + float luma8 = inp_luma[local_pos + 15]; + float luma9 = inp_luma[local_pos + 16]; + float luma10 = inp_luma[local_pos + 24]; + float luma11 = inp_luma[local_pos + 25]; + float luma12 = inp_luma[local_pos + 26]; + float luma13 = inp_luma[local_pos + 27]; + float luma14 = inp_luma[local_pos + 28]; + float luma15 = inp_luma[local_pos + 36]; + float luma16 = inp_luma[local_pos + 37]; + float luma17 = inp_luma[local_pos + 38]; + float luma18 = inp_luma[local_pos + 39]; + float luma19 = inp_luma[local_pos + 40]; + float luma21 = inp_luma[local_pos + 49]; + float luma22 = inp_luma[local_pos + 50]; + float luma23 = inp_luma[local_pos + 51]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma11 - luma1) / 2.0; + gy = (luma7 - luma5) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (luma12 - luma2) / 2.0; + gy = (luma8 - luma6) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (luma13 - luma3) / 2.0; + gy = (luma9 - luma7) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (luma16 - luma6) / 2.0; + gy = (luma12 - luma10) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (luma17 - luma7) / 2.0; + gy = (luma13 - luma11) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833; + gx = (luma18 - luma8) / 2.0; + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (luma21 - luma11) / 2.0; + gy = (luma17 - luma15) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (luma22 - luma12) / 2.0; + gy = (luma18 - luma16) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (luma23 - luma13) / 2.0; + gy = (luma19 - luma17) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.005), 2.0, lambda >= 0.02); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 3.0 + strength) * 3.0 + coherence + 0.5) / 216.0; + mat4x3 res0 = 0.0, res1 = 0.0; + vec4 w0, w1; + w0 = texture(ravu_3x_lut3, vec2(0.019230769230769232, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.057692307692307696, coord_y)); + res0 += outerProduct(inp[local_pos + 0], w0) + outerProduct(inp[local_pos + 52], w1.wzyx); + res1 += outerProduct(inp[local_pos + 0], w1) + outerProduct(inp[local_pos + 52], w0.wzyx); + w0 = texture(ravu_3x_lut3, vec2(0.09615384615384616, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.1346153846153846, coord_y)); + res0 += outerProduct(inp[local_pos + 1], w0) + outerProduct(inp[local_pos + 51], w1.wzyx); + res1 += outerProduct(inp[local_pos + 1], w1) + outerProduct(inp[local_pos + 51], w0.wzyx); + w0 = texture(ravu_3x_lut3, vec2(0.17307692307692307, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.21153846153846154, coord_y)); + res0 += outerProduct(inp[local_pos + 2], w0) + outerProduct(inp[local_pos + 50], w1.wzyx); + res1 += outerProduct(inp[local_pos + 2], w1) + outerProduct(inp[local_pos + 50], w0.wzyx); + w0 = texture(ravu_3x_lut3, vec2(0.25, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.28846153846153844, coord_y)); + res0 += outerProduct(inp[local_pos + 3], w0) + outerProduct(inp[local_pos + 49], w1.wzyx); + res1 += outerProduct(inp[local_pos + 3], w1) + outerProduct(inp[local_pos + 49], w0.wzyx); + w0 = texture(ravu_3x_lut3, vec2(0.3269230769230769, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.36538461538461536, coord_y)); + res0 += outerProduct(inp[local_pos + 4], w0) + outerProduct(inp[local_pos + 48], w1.wzyx); + res1 += outerProduct(inp[local_pos + 4], w1) + outerProduct(inp[local_pos + 48], w0.wzyx); + w0 = texture(ravu_3x_lut3, vec2(0.40384615384615385, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.4423076923076923, coord_y)); + res0 += outerProduct(inp[local_pos + 12], w0) + outerProduct(inp[local_pos + 40], w1.wzyx); + res1 += outerProduct(inp[local_pos + 12], w1) + outerProduct(inp[local_pos + 40], w0.wzyx); + w0 = texture(ravu_3x_lut3, vec2(0.4807692307692308, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.5192307692307693, coord_y)); + res0 += outerProduct(inp[local_pos + 13], w0) + outerProduct(inp[local_pos + 39], w1.wzyx); + res1 += outerProduct(inp[local_pos + 13], w1) + outerProduct(inp[local_pos + 39], w0.wzyx); + w0 = texture(ravu_3x_lut3, vec2(0.5576923076923077, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.5961538461538461, coord_y)); + res0 += outerProduct(inp[local_pos + 14], w0) + outerProduct(inp[local_pos + 38], w1.wzyx); + res1 += outerProduct(inp[local_pos + 14], w1) + outerProduct(inp[local_pos + 38], w0.wzyx); + w0 = texture(ravu_3x_lut3, vec2(0.6346153846153846, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.6730769230769231, coord_y)); + res0 += outerProduct(inp[local_pos + 15], w0) + outerProduct(inp[local_pos + 37], w1.wzyx); + res1 += outerProduct(inp[local_pos + 15], w1) + outerProduct(inp[local_pos + 37], w0.wzyx); + w0 = texture(ravu_3x_lut3, vec2(0.7115384615384616, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.75, coord_y)); + res0 += outerProduct(inp[local_pos + 16], w0) + outerProduct(inp[local_pos + 36], w1.wzyx); + res1 += outerProduct(inp[local_pos + 16], w1) + outerProduct(inp[local_pos + 36], w0.wzyx); + w0 = texture(ravu_3x_lut3, vec2(0.7884615384615384, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.8269230769230769, coord_y)); + res0 += outerProduct(inp[local_pos + 24], w0) + outerProduct(inp[local_pos + 28], w1.wzyx); + res1 += outerProduct(inp[local_pos + 24], w1) + outerProduct(inp[local_pos + 28], w0.wzyx); + w0 = texture(ravu_3x_lut3, vec2(0.8653846153846154, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.9038461538461539, coord_y)); + res0 += outerProduct(inp[local_pos + 25], w0) + outerProduct(inp[local_pos + 27], w1.wzyx); + res1 += outerProduct(inp[local_pos + 25], w1) + outerProduct(inp[local_pos + 27], w0.wzyx); + w0 = texture(ravu_3x_lut3, vec2(0.9423076923076923, coord_y)); + w1 = texture(ravu_3x_lut3, vec2(0.9807692307692307, coord_y)); + res0 += outerProduct(inp[local_pos + 26], w0); + res1 += outerProduct(inp[local_pos + 26], w1); + res0[0] = clamp(res0[0], 0.0, 1.0); + res0[1] = clamp(res0[1], 0.0, 1.0); + res0[2] = clamp(res0[2], 0.0, 1.0); + res0[3] = clamp(res0[3], 0.0, 1.0); + res1[0] = clamp(res1[0], 0.0, 1.0); + res1[1] = clamp(res1[1], 0.0, 1.0); + res1[2] = clamp(res1[2], 0.0, 1.0); + res1[3] = clamp(res1[3], 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 0), vec4(res0[0], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 1), vec4(res0[1], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 2), vec4(res0[2], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 0), vec4(res0[3], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 1), vec4(inp[local_pos + 26], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 2), vec4(res1[0], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 0), vec4(res1[1], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 1), vec4(res1[2], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 2), vec4(res1[3], 1.0)); +} diff --git a/src/Effects/RAVU/RAVU_3x_R4.hlsl b/src/Effects/RAVU/RAVU_3x_R4.hlsl new file mode 100644 index 000000000..97b1e223b --- /dev/null +++ b/src/Effects/RAVU/RAVU_3x_R4.hlsl @@ -0,0 +1,344 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-3x.py --target luma --weights-file weights\ravu-3x_weights-r4.py --float-format float16dx --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 3 +//!HEIGHT INPUT_HEIGHT * 3 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!SOURCE ravu_3x_lut4_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_3x_lut4; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_3x_lut4; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-3x (luma, r4) +//!IN INPUT, ravu_3x_lut4 +//!OUT OUTPUT +//!BLOCK_SIZE 96, 24 +//!NUM_THREADS 32, 8 +shared float inp[532]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_3x_lut4_tex(pos) (vec4(texture(ravu_3x_lut4, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 14 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 532; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 14, y = (uint)id % 14; + inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))).x; + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 3; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + float luma0 = inp[local_pos + 0]; + float luma1 = inp[local_pos + 1]; + float luma2 = inp[local_pos + 2]; + float luma3 = inp[local_pos + 3]; + float luma4 = inp[local_pos + 4]; + float luma5 = inp[local_pos + 5]; + float luma6 = inp[local_pos + 6]; + float luma7 = inp[local_pos + 14]; + float luma8 = inp[local_pos + 15]; + float luma9 = inp[local_pos + 16]; + float luma10 = inp[local_pos + 17]; + float luma11 = inp[local_pos + 18]; + float luma12 = inp[local_pos + 19]; + float luma13 = inp[local_pos + 20]; + float luma14 = inp[local_pos + 28]; + float luma15 = inp[local_pos + 29]; + float luma16 = inp[local_pos + 30]; + float luma17 = inp[local_pos + 31]; + float luma18 = inp[local_pos + 32]; + float luma19 = inp[local_pos + 33]; + float luma20 = inp[local_pos + 34]; + float luma21 = inp[local_pos + 42]; + float luma22 = inp[local_pos + 43]; + float luma23 = inp[local_pos + 44]; + float luma24 = inp[local_pos + 45]; + float luma25 = inp[local_pos + 46]; + float luma26 = inp[local_pos + 47]; + float luma27 = inp[local_pos + 48]; + float luma28 = inp[local_pos + 56]; + float luma29 = inp[local_pos + 57]; + float luma30 = inp[local_pos + 58]; + float luma31 = inp[local_pos + 59]; + float luma32 = inp[local_pos + 60]; + float luma33 = inp[local_pos + 61]; + float luma34 = inp[local_pos + 62]; + float luma35 = inp[local_pos + 70]; + float luma36 = inp[local_pos + 71]; + float luma37 = inp[local_pos + 72]; + float luma38 = inp[local_pos + 73]; + float luma39 = inp[local_pos + 74]; + float luma40 = inp[local_pos + 75]; + float luma41 = inp[local_pos + 76]; + float luma42 = inp[local_pos + 84]; + float luma43 = inp[local_pos + 85]; + float luma44 = inp[local_pos + 86]; + float luma45 = inp[local_pos + 87]; + float luma46 = inp[local_pos + 88]; + float luma47 = inp[local_pos + 89]; + float luma48 = inp[local_pos + 90]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma15 - luma1) / 2.0; + gy = (luma9 - luma7) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437; + gx = (luma16 - luma2) / 2.0; + gy = (luma10 - luma8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (luma17 - luma3) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906; + gx = (luma18 - luma4) / 2.0; + gy = (luma12 - luma10) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (luma19 - luma5) / 2.0; + gy = (luma13 - luma11) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437; + gx = (luma22 - luma8) / 2.0; + gy = (luma16 - luma14) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (luma23 - luma9) / 2.0; + gy = (luma17 - luma15) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137; + gx = (luma24 - luma10) / 2.0; + gy = (luma18 - luma16) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466; + gx = (luma25 - luma11) / 2.0; + gy = (luma19 - luma17) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137; + gx = (luma26 - luma12) / 2.0; + gy = (luma20 - luma18) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (luma29 - luma15) / 2.0; + gy = (luma23 - luma21) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906; + gx = (luma30 - luma16) / 2.0; + gy = (luma24 - luma22) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466; + gx = (luma31 - luma17) / 2.0; + gy = (luma25 - luma23) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06319146241026467; + gx = (luma32 - luma18) / 2.0; + gy = (luma26 - luma24) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466; + gx = (luma33 - luma19) / 2.0; + gy = (luma27 - luma25) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906; + gx = (luma36 - luma22) / 2.0; + gy = (luma30 - luma28) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (luma37 - luma23) / 2.0; + gy = (luma31 - luma29) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137; + gx = (luma38 - luma24) / 2.0; + gy = (luma32 - luma30) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466; + gx = (luma39 - luma25) / 2.0; + gy = (luma33 - luma31) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137; + gx = (luma40 - luma26) / 2.0; + gy = (luma34 - luma32) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (luma43 - luma29) / 2.0; + gy = (luma37 - luma35) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437; + gx = (luma44 - luma30) / 2.0; + gy = (luma38 - luma36) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (luma45 - luma31) / 2.0; + gy = (luma39 - luma37) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906; + gx = (luma46 - luma32) / 2.0; + gy = (luma40 - luma38) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (luma47 - luma33) / 2.0; + gy = (luma41 - luma39) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.005), 2.0, lambda >= 0.02); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 3.0 + strength) * 3.0 + coherence + 0.5) / 216.0; + vec4 res0 = vec4(0.0, 0.0, 0.0, 0.0), res1 = vec4(0.0, 0.0, 0.0, 0.0); + vec4 w0, w1; + w0 = texture(ravu_3x_lut4, vec2(0.01, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.03, coord_y)); + res0 += luma0 * w0 + luma48 * w1.wzyx; + res1 += luma0 * w1 + luma48 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.05, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.07, coord_y)); + res0 += luma1 * w0 + luma47 * w1.wzyx; + res1 += luma1 * w1 + luma47 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.09, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.11, coord_y)); + res0 += luma2 * w0 + luma46 * w1.wzyx; + res1 += luma2 * w1 + luma46 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.13, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.15, coord_y)); + res0 += luma3 * w0 + luma45 * w1.wzyx; + res1 += luma3 * w1 + luma45 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.17, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.19, coord_y)); + res0 += luma4 * w0 + luma44 * w1.wzyx; + res1 += luma4 * w1 + luma44 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.21, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.23, coord_y)); + res0 += luma5 * w0 + luma43 * w1.wzyx; + res1 += luma5 * w1 + luma43 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.25, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.27, coord_y)); + res0 += luma6 * w0 + luma42 * w1.wzyx; + res1 += luma6 * w1 + luma42 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.29, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.31, coord_y)); + res0 += luma7 * w0 + luma41 * w1.wzyx; + res1 += luma7 * w1 + luma41 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.33, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.35, coord_y)); + res0 += luma8 * w0 + luma40 * w1.wzyx; + res1 += luma8 * w1 + luma40 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.37, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.39, coord_y)); + res0 += luma9 * w0 + luma39 * w1.wzyx; + res1 += luma9 * w1 + luma39 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.41, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.43, coord_y)); + res0 += luma10 * w0 + luma38 * w1.wzyx; + res1 += luma10 * w1 + luma38 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.45, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.47, coord_y)); + res0 += luma11 * w0 + luma37 * w1.wzyx; + res1 += luma11 * w1 + luma37 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.49, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.51, coord_y)); + res0 += luma12 * w0 + luma36 * w1.wzyx; + res1 += luma12 * w1 + luma36 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.53, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.55, coord_y)); + res0 += luma13 * w0 + luma35 * w1.wzyx; + res1 += luma13 * w1 + luma35 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.57, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.59, coord_y)); + res0 += luma14 * w0 + luma34 * w1.wzyx; + res1 += luma14 * w1 + luma34 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.61, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.63, coord_y)); + res0 += luma15 * w0 + luma33 * w1.wzyx; + res1 += luma15 * w1 + luma33 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.65, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.67, coord_y)); + res0 += luma16 * w0 + luma32 * w1.wzyx; + res1 += luma16 * w1 + luma32 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.69, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.71, coord_y)); + res0 += luma17 * w0 + luma31 * w1.wzyx; + res1 += luma17 * w1 + luma31 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.73, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.75, coord_y)); + res0 += luma18 * w0 + luma30 * w1.wzyx; + res1 += luma18 * w1 + luma30 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.77, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.79, coord_y)); + res0 += luma19 * w0 + luma29 * w1.wzyx; + res1 += luma19 * w1 + luma29 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.81, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.83, coord_y)); + res0 += luma20 * w0 + luma28 * w1.wzyx; + res1 += luma20 * w1 + luma28 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.85, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.87, coord_y)); + res0 += luma21 * w0 + luma27 * w1.wzyx; + res1 += luma21 * w1 + luma27 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.89, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.91, coord_y)); + res0 += luma22 * w0 + luma26 * w1.wzyx; + res1 += luma22 * w1 + luma26 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.93, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.95, coord_y)); + res0 += luma23 * w0 + luma25 * w1.wzyx; + res1 += luma23 * w1 + luma25 * w0.wzyx; + w0 = texture(ravu_3x_lut4, vec2(0.97, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.99, coord_y)); + res0 += luma24 * w0; + res1 += luma24 * w1; + res0 = clamp(res0, 0.0, 1.0); + res1 = clamp(res1, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 0), res0[0]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 1), res0[1]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 2), res0[2]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 0), res0[3]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 1), luma24); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 2), res1[0]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 0), res1[1]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 1), res1[2]); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 2), res1[3]); +} diff --git a/src/Effects/RAVU/RAVU_3x_R4_RGB.hlsl b/src/Effects/RAVU/RAVU_3x_R4_RGB.hlsl new file mode 100644 index 000000000..c3883a6d3 --- /dev/null +++ b/src/Effects/RAVU/RAVU_3x_R4_RGB.hlsl @@ -0,0 +1,344 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-3x.py --target rgb --weights-file weights\ravu-3x_weights-r4.py --float-format float16dx --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 3 +//!HEIGHT INPUT_HEIGHT * 3 +Texture2D OUTPUT; + +//!TEXTURE +//!SOURCE ravu_3x_lut4_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_3x_lut4; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_3x_lut4; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-3x (rgb, r4) +//!IN INPUT, ravu_3x_lut4 +//!OUT OUTPUT +//!BLOCK_SIZE 96, 24 +//!NUM_THREADS 32, 8 +static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722); +// HLSL doesn't have outerProduct +float4x3 outerProduct(float3 l, float4 r) { return mul(float4x1(r), float1x3(l)); } +shared vec3 inp[532]; +shared float inp_luma[532]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) x +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val) +void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_3x_lut4_tex(pos) (vec4(texture(ravu_3x_lut4, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 14 + int(gl_LocalInvocationID.y); + for (int id = int(gl_LocalInvocationIndex); id < 532; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 14, y = (uint)id % 14; + inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))).xyz; + inp_luma[id] = dot(inp[id], color_primary); + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 3; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + float luma1 = inp_luma[local_pos + 1]; + float luma2 = inp_luma[local_pos + 2]; + float luma3 = inp_luma[local_pos + 3]; + float luma4 = inp_luma[local_pos + 4]; + float luma5 = inp_luma[local_pos + 5]; + float luma7 = inp_luma[local_pos + 14]; + float luma8 = inp_luma[local_pos + 15]; + float luma9 = inp_luma[local_pos + 16]; + float luma10 = inp_luma[local_pos + 17]; + float luma11 = inp_luma[local_pos + 18]; + float luma12 = inp_luma[local_pos + 19]; + float luma13 = inp_luma[local_pos + 20]; + float luma14 = inp_luma[local_pos + 28]; + float luma15 = inp_luma[local_pos + 29]; + float luma16 = inp_luma[local_pos + 30]; + float luma17 = inp_luma[local_pos + 31]; + float luma18 = inp_luma[local_pos + 32]; + float luma19 = inp_luma[local_pos + 33]; + float luma20 = inp_luma[local_pos + 34]; + float luma21 = inp_luma[local_pos + 42]; + float luma22 = inp_luma[local_pos + 43]; + float luma23 = inp_luma[local_pos + 44]; + float luma24 = inp_luma[local_pos + 45]; + float luma25 = inp_luma[local_pos + 46]; + float luma26 = inp_luma[local_pos + 47]; + float luma27 = inp_luma[local_pos + 48]; + float luma28 = inp_luma[local_pos + 56]; + float luma29 = inp_luma[local_pos + 57]; + float luma30 = inp_luma[local_pos + 58]; + float luma31 = inp_luma[local_pos + 59]; + float luma32 = inp_luma[local_pos + 60]; + float luma33 = inp_luma[local_pos + 61]; + float luma34 = inp_luma[local_pos + 62]; + float luma35 = inp_luma[local_pos + 70]; + float luma36 = inp_luma[local_pos + 71]; + float luma37 = inp_luma[local_pos + 72]; + float luma38 = inp_luma[local_pos + 73]; + float luma39 = inp_luma[local_pos + 74]; + float luma40 = inp_luma[local_pos + 75]; + float luma41 = inp_luma[local_pos + 76]; + float luma43 = inp_luma[local_pos + 85]; + float luma44 = inp_luma[local_pos + 86]; + float luma45 = inp_luma[local_pos + 87]; + float luma46 = inp_luma[local_pos + 88]; + float luma47 = inp_luma[local_pos + 89]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma15 - luma1) / 2.0; + gy = (luma9 - luma7) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437; + gx = (luma16 - luma2) / 2.0; + gy = (luma10 - luma8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (luma17 - luma3) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906; + gx = (luma18 - luma4) / 2.0; + gy = (luma12 - luma10) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (luma19 - luma5) / 2.0; + gy = (luma13 - luma11) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437; + gx = (luma22 - luma8) / 2.0; + gy = (luma16 - luma14) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (luma23 - luma9) / 2.0; + gy = (luma17 - luma15) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137; + gx = (luma24 - luma10) / 2.0; + gy = (luma18 - luma16) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466; + gx = (luma25 - luma11) / 2.0; + gy = (luma19 - luma17) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137; + gx = (luma26 - luma12) / 2.0; + gy = (luma20 - luma18) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (luma29 - luma15) / 2.0; + gy = (luma23 - luma21) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906; + gx = (luma30 - luma16) / 2.0; + gy = (luma24 - luma22) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466; + gx = (luma31 - luma17) / 2.0; + gy = (luma25 - luma23) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06319146241026467; + gx = (luma32 - luma18) / 2.0; + gy = (luma26 - luma24) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466; + gx = (luma33 - luma19) / 2.0; + gy = (luma27 - luma25) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906; + gx = (luma36 - luma22) / 2.0; + gy = (luma30 - luma28) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (luma37 - luma23) / 2.0; + gy = (luma31 - luma29) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137; + gx = (luma38 - luma24) / 2.0; + gy = (luma32 - luma30) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466; + gx = (luma39 - luma25) / 2.0; + gy = (luma33 - luma31) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137; + gx = (luma40 - luma26) / 2.0; + gy = (luma34 - luma32) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (luma43 - luma29) / 2.0; + gy = (luma37 - luma35) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437; + gx = (luma44 - luma30) / 2.0; + gy = (luma38 - luma36) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (luma45 - luma31) / 2.0; + gy = (luma39 - luma37) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906; + gx = (luma46 - luma32) / 2.0; + gy = (luma40 - luma38) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (luma47 - luma33) / 2.0; + gy = (luma41 - luma39) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.005), 2.0, lambda >= 0.02); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 3.0 + strength) * 3.0 + coherence + 0.5) / 216.0; + mat4x3 res0 = 0.0, res1 = 0.0; + vec4 w0, w1; + w0 = texture(ravu_3x_lut4, vec2(0.01, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.03, coord_y)); + res0 += outerProduct(inp[local_pos + 0], w0) + outerProduct(inp[local_pos + 90], w1.wzyx); + res1 += outerProduct(inp[local_pos + 0], w1) + outerProduct(inp[local_pos + 90], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.05, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.07, coord_y)); + res0 += outerProduct(inp[local_pos + 1], w0) + outerProduct(inp[local_pos + 89], w1.wzyx); + res1 += outerProduct(inp[local_pos + 1], w1) + outerProduct(inp[local_pos + 89], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.09, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.11, coord_y)); + res0 += outerProduct(inp[local_pos + 2], w0) + outerProduct(inp[local_pos + 88], w1.wzyx); + res1 += outerProduct(inp[local_pos + 2], w1) + outerProduct(inp[local_pos + 88], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.13, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.15, coord_y)); + res0 += outerProduct(inp[local_pos + 3], w0) + outerProduct(inp[local_pos + 87], w1.wzyx); + res1 += outerProduct(inp[local_pos + 3], w1) + outerProduct(inp[local_pos + 87], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.17, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.19, coord_y)); + res0 += outerProduct(inp[local_pos + 4], w0) + outerProduct(inp[local_pos + 86], w1.wzyx); + res1 += outerProduct(inp[local_pos + 4], w1) + outerProduct(inp[local_pos + 86], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.21, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.23, coord_y)); + res0 += outerProduct(inp[local_pos + 5], w0) + outerProduct(inp[local_pos + 85], w1.wzyx); + res1 += outerProduct(inp[local_pos + 5], w1) + outerProduct(inp[local_pos + 85], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.25, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.27, coord_y)); + res0 += outerProduct(inp[local_pos + 6], w0) + outerProduct(inp[local_pos + 84], w1.wzyx); + res1 += outerProduct(inp[local_pos + 6], w1) + outerProduct(inp[local_pos + 84], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.29, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.31, coord_y)); + res0 += outerProduct(inp[local_pos + 14], w0) + outerProduct(inp[local_pos + 76], w1.wzyx); + res1 += outerProduct(inp[local_pos + 14], w1) + outerProduct(inp[local_pos + 76], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.33, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.35, coord_y)); + res0 += outerProduct(inp[local_pos + 15], w0) + outerProduct(inp[local_pos + 75], w1.wzyx); + res1 += outerProduct(inp[local_pos + 15], w1) + outerProduct(inp[local_pos + 75], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.37, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.39, coord_y)); + res0 += outerProduct(inp[local_pos + 16], w0) + outerProduct(inp[local_pos + 74], w1.wzyx); + res1 += outerProduct(inp[local_pos + 16], w1) + outerProduct(inp[local_pos + 74], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.41, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.43, coord_y)); + res0 += outerProduct(inp[local_pos + 17], w0) + outerProduct(inp[local_pos + 73], w1.wzyx); + res1 += outerProduct(inp[local_pos + 17], w1) + outerProduct(inp[local_pos + 73], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.45, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.47, coord_y)); + res0 += outerProduct(inp[local_pos + 18], w0) + outerProduct(inp[local_pos + 72], w1.wzyx); + res1 += outerProduct(inp[local_pos + 18], w1) + outerProduct(inp[local_pos + 72], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.49, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.51, coord_y)); + res0 += outerProduct(inp[local_pos + 19], w0) + outerProduct(inp[local_pos + 71], w1.wzyx); + res1 += outerProduct(inp[local_pos + 19], w1) + outerProduct(inp[local_pos + 71], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.53, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.55, coord_y)); + res0 += outerProduct(inp[local_pos + 20], w0) + outerProduct(inp[local_pos + 70], w1.wzyx); + res1 += outerProduct(inp[local_pos + 20], w1) + outerProduct(inp[local_pos + 70], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.57, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.59, coord_y)); + res0 += outerProduct(inp[local_pos + 28], w0) + outerProduct(inp[local_pos + 62], w1.wzyx); + res1 += outerProduct(inp[local_pos + 28], w1) + outerProduct(inp[local_pos + 62], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.61, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.63, coord_y)); + res0 += outerProduct(inp[local_pos + 29], w0) + outerProduct(inp[local_pos + 61], w1.wzyx); + res1 += outerProduct(inp[local_pos + 29], w1) + outerProduct(inp[local_pos + 61], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.65, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.67, coord_y)); + res0 += outerProduct(inp[local_pos + 30], w0) + outerProduct(inp[local_pos + 60], w1.wzyx); + res1 += outerProduct(inp[local_pos + 30], w1) + outerProduct(inp[local_pos + 60], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.69, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.71, coord_y)); + res0 += outerProduct(inp[local_pos + 31], w0) + outerProduct(inp[local_pos + 59], w1.wzyx); + res1 += outerProduct(inp[local_pos + 31], w1) + outerProduct(inp[local_pos + 59], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.73, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.75, coord_y)); + res0 += outerProduct(inp[local_pos + 32], w0) + outerProduct(inp[local_pos + 58], w1.wzyx); + res1 += outerProduct(inp[local_pos + 32], w1) + outerProduct(inp[local_pos + 58], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.77, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.79, coord_y)); + res0 += outerProduct(inp[local_pos + 33], w0) + outerProduct(inp[local_pos + 57], w1.wzyx); + res1 += outerProduct(inp[local_pos + 33], w1) + outerProduct(inp[local_pos + 57], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.81, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.83, coord_y)); + res0 += outerProduct(inp[local_pos + 34], w0) + outerProduct(inp[local_pos + 56], w1.wzyx); + res1 += outerProduct(inp[local_pos + 34], w1) + outerProduct(inp[local_pos + 56], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.85, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.87, coord_y)); + res0 += outerProduct(inp[local_pos + 42], w0) + outerProduct(inp[local_pos + 48], w1.wzyx); + res1 += outerProduct(inp[local_pos + 42], w1) + outerProduct(inp[local_pos + 48], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.89, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.91, coord_y)); + res0 += outerProduct(inp[local_pos + 43], w0) + outerProduct(inp[local_pos + 47], w1.wzyx); + res1 += outerProduct(inp[local_pos + 43], w1) + outerProduct(inp[local_pos + 47], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.93, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.95, coord_y)); + res0 += outerProduct(inp[local_pos + 44], w0) + outerProduct(inp[local_pos + 46], w1.wzyx); + res1 += outerProduct(inp[local_pos + 44], w1) + outerProduct(inp[local_pos + 46], w0.wzyx); + w0 = texture(ravu_3x_lut4, vec2(0.97, coord_y)); + w1 = texture(ravu_3x_lut4, vec2(0.99, coord_y)); + res0 += outerProduct(inp[local_pos + 45], w0); + res1 += outerProduct(inp[local_pos + 45], w1); + res0[0] = clamp(res0[0], 0.0, 1.0); + res0[1] = clamp(res0[1], 0.0, 1.0); + res0[2] = clamp(res0[2], 0.0, 1.0); + res0[3] = clamp(res0[3], 0.0, 1.0); + res1[0] = clamp(res1[0], 0.0, 1.0); + res1[1] = clamp(res1[1], 0.0, 1.0); + res1[2] = clamp(res1[2], 0.0, 1.0); + res1[3] = clamp(res1[3], 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 0), vec4(res0[0], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 1), vec4(res0[1], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 2), vec4(res0[2], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 0), vec4(res0[3], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 1), vec4(inp[local_pos + 45], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 2), vec4(res1[0], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 0), vec4(res1[1], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 1), vec4(res1[2], 1.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 2), vec4(res1[3], 1.0)); +} diff --git a/src/Effects/RAVU/RAVU_Lite_AR_R2.hlsl b/src/Effects/RAVU/RAVU_Lite_AR_R2.hlsl new file mode 100644 index 000000000..473527c6b --- /dev/null +++ b/src/Effects/RAVU/RAVU_Lite_AR_R2.hlsl @@ -0,0 +1,224 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-lite.py --weights-file weights\ravu-lite_weights-r2.py --float-format float16dx --use-compute-shader --anti-ringing 0.8 --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!SOURCE ravu_lite_lut2_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_lite_lut2; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_lite_lut2; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-Lite-AR (r2, compute) +//!IN INPUT, ravu_lite_lut2 +//!OUT OUTPUT +//!BLOCK_SIZE 64, 16 +//!NUM_THREADS 32, 8 +shared float inp[340]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_lite_lut2_tex(pos) (vec4(texture(ravu_lite_lut2, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 10 + int(gl_LocalInvocationID.y); +#pragma warning(disable : 3557) + for (int id = int(gl_LocalInvocationIndex); id < 340; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 10, y = (uint)id % 10; + inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-0.5), float(group_base.y + y) + (-0.5))).x; + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (inp[local_pos + 10] - inp[local_pos + 0]); + gy = (inp[local_pos + 1] - inp[local_pos + 0]); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (inp[local_pos + 11] - inp[local_pos + 1]); + gy = (inp[local_pos + 2] - inp[local_pos + 0]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (inp[local_pos + 12] - inp[local_pos + 2]); + gy = (inp[local_pos + 2] - inp[local_pos + 1]); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (inp[local_pos + 20] - inp[local_pos + 0]) / 2.0; + gy = (inp[local_pos + 11] - inp[local_pos + 10]); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (inp[local_pos + 21] - inp[local_pos + 1]) / 2.0; + gy = (inp[local_pos + 12] - inp[local_pos + 10]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833; + gx = (inp[local_pos + 22] - inp[local_pos + 2]) / 2.0; + gy = (inp[local_pos + 12] - inp[local_pos + 11]); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (inp[local_pos + 20] - inp[local_pos + 10]); + gy = (inp[local_pos + 21] - inp[local_pos + 20]); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (inp[local_pos + 21] - inp[local_pos + 11]); + gy = (inp[local_pos + 22] - inp[local_pos + 20]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (inp[local_pos + 22] - inp[local_pos + 12]); + gy = (inp[local_pos + 22] - inp[local_pos + 21]); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence + 0.5) / 288.0; + vec4 res = vec4(0.0, 0.0, 0.0, 0.0), w; + vec4 lo = vec4(0.0, 0.0, 0.0, 0.0), hi = vec4(0.0, 0.0, 0.0, 0.0), lo2 = vec4(0.0, 0.0, 0.0, 0.0), + hi2 = vec4(0.0, 0.0, 0.0, 0.0), wg, cg4, cg4_1; + w = texture(ravu_lite_lut2, vec2(0.1, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 0] * w + inp[local_pos + 22] * w.wzyx; + cg4 = + vec4(0.1 + inp[local_pos + 0], 1.1 - inp[local_pos + 0], 0.1 + inp[local_pos + 22], 1.1 - inp[local_pos + 22]); + cg4_1 = cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + hi += cg4.x * wg + cg4.z * wg.wzyx; + lo += cg4.y * wg + cg4.w * wg.wzyx; + cg4 *= cg4_1; + hi2 += cg4.x * wg + cg4.z * wg.wzyx; + lo2 += cg4.y * wg + cg4.w * wg.wzyx; + w = texture(ravu_lite_lut2, vec2(0.3, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 1] * w + inp[local_pos + 21] * w.wzyx; + cg4 = + vec4(0.1 + inp[local_pos + 1], 1.1 - inp[local_pos + 1], 0.1 + inp[local_pos + 21], 1.1 - inp[local_pos + 21]); + cg4_1 = cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + hi += cg4.x * wg + cg4.z * wg.wzyx; + lo += cg4.y * wg + cg4.w * wg.wzyx; + cg4 *= cg4_1; + hi2 += cg4.x * wg + cg4.z * wg.wzyx; + lo2 += cg4.y * wg + cg4.w * wg.wzyx; + w = texture(ravu_lite_lut2, vec2(0.5, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 2] * w + inp[local_pos + 20] * w.wzyx; + cg4 = + vec4(0.1 + inp[local_pos + 2], 1.1 - inp[local_pos + 2], 0.1 + inp[local_pos + 20], 1.1 - inp[local_pos + 20]); + cg4_1 = cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + hi += cg4.x * wg + cg4.z * wg.wzyx; + lo += cg4.y * wg + cg4.w * wg.wzyx; + cg4 *= cg4_1; + hi2 += cg4.x * wg + cg4.z * wg.wzyx; + lo2 += cg4.y * wg + cg4.w * wg.wzyx; + w = texture(ravu_lite_lut2, vec2(0.7, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 10] * w + inp[local_pos + 12] * w.wzyx; + cg4 = vec4(0.1 + inp[local_pos + 10], 1.1 - inp[local_pos + 10], 0.1 + inp[local_pos + 12], + 1.1 - inp[local_pos + 12]); + cg4_1 = cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + hi += cg4.x * wg + cg4.z * wg.wzyx; + lo += cg4.y * wg + cg4.w * wg.wzyx; + cg4 *= cg4_1; + hi2 += cg4.x * wg + cg4.z * wg.wzyx; + lo2 += cg4.y * wg + cg4.w * wg.wzyx; + w = texture(ravu_lite_lut2, vec2(0.9, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 11] * w; + vec2 cg2 = vec2(0.1 + inp[local_pos + 11], 1.1 - inp[local_pos + 11]); + vec2 cg2_1 = cg2; + cg2 *= cg2; + cg2 *= cg2; + cg2 *= cg2; + cg2 *= cg2; + cg2 *= cg2; + hi += cg2.x * wg; + lo += cg2.y * wg; + cg2 *= cg2_1; + hi2 += cg2.x * wg; + lo2 += cg2.y * wg; + lo = 1.1 - lo2 / lo; + hi = hi2 / hi - 0.1; + res = mix(res, clamp(res, lo, hi), 0.800000); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res[0], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res[1], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res[2], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res[3], 0.0, 0.0, 0.0)); +} diff --git a/src/Effects/RAVU/RAVU_Lite_AR_R3.hlsl b/src/Effects/RAVU/RAVU_Lite_AR_R3.hlsl new file mode 100644 index 000000000..0f69785d8 --- /dev/null +++ b/src/Effects/RAVU/RAVU_Lite_AR_R3.hlsl @@ -0,0 +1,268 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-lite.py --weights-file weights\ravu-lite_weights-r3.py --float-format float16dx --use-compute-shader --anti-ringing 0.8 --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!SOURCE ravu_lite_lut3_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_lite_lut3; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_lite_lut3; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-Lite-AR (r3, compute) +//!IN INPUT, ravu_lite_lut3 +//!OUT OUTPUT +//!BLOCK_SIZE 64, 16 +//!NUM_THREADS 32, 8 +shared float inp[432]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_lite_lut3_tex(pos) (vec4(texture(ravu_lite_lut3, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 12 + int(gl_LocalInvocationID.y); +#pragma warning(disable : 3557) + for (int id = int(gl_LocalInvocationIndex); id < 432; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 12, y = (uint)id % 12; + inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))).x; + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (inp[local_pos + 25] - inp[local_pos + 1]) / 2.0; + gy = (inp[local_pos + 14] - inp[local_pos + 12]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (inp[local_pos + 26] - inp[local_pos + 2]) / 2.0; + gy = (inp[local_pos + 15] - inp[local_pos + 13]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (inp[local_pos + 27] - inp[local_pos + 3]) / 2.0; + gy = (inp[local_pos + 16] - inp[local_pos + 14]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (inp[local_pos + 37] - inp[local_pos + 13]) / 2.0; + gy = (inp[local_pos + 26] - inp[local_pos + 24]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (inp[local_pos + 38] - inp[local_pos + 14]) / 2.0; + gy = (inp[local_pos + 27] - inp[local_pos + 25]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833; + gx = (inp[local_pos + 39] - inp[local_pos + 15]) / 2.0; + gy = (inp[local_pos + 28] - inp[local_pos + 26]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (inp[local_pos + 49] - inp[local_pos + 25]) / 2.0; + gy = (inp[local_pos + 38] - inp[local_pos + 36]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (inp[local_pos + 50] - inp[local_pos + 26]) / 2.0; + gy = (inp[local_pos + 39] - inp[local_pos + 37]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (inp[local_pos + 51] - inp[local_pos + 27]) / 2.0; + gy = (inp[local_pos + 40] - inp[local_pos + 38]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence + 0.5) / 288.0; + vec4 res = vec4(0.0, 0.0, 0.0, 0.0), w; + vec4 lo = vec4(0.0, 0.0, 0.0, 0.0), hi = vec4(0.0, 0.0, 0.0, 0.0), lo2 = vec4(0.0, 0.0, 0.0, 0.0), + hi2 = vec4(0.0, 0.0, 0.0, 0.0), wg, cg4, cg4_1; + w = texture(ravu_lite_lut3, vec2(0.038461538461538464, coord_y)); + res += inp[local_pos + 0] * w + inp[local_pos + 52] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.11538461538461539, coord_y)); + res += inp[local_pos + 1] * w + inp[local_pos + 51] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.19230769230769232, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 2] * w + inp[local_pos + 50] * w.wzyx; + cg4 = + vec4(0.1 + inp[local_pos + 2], 1.1 - inp[local_pos + 2], 0.1 + inp[local_pos + 50], 1.1 - inp[local_pos + 50]); + cg4_1 = cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + hi += cg4.x * wg + cg4.z * wg.wzyx; + lo += cg4.y * wg + cg4.w * wg.wzyx; + cg4 *= cg4_1; + hi2 += cg4.x * wg + cg4.z * wg.wzyx; + lo2 += cg4.y * wg + cg4.w * wg.wzyx; + w = texture(ravu_lite_lut3, vec2(0.2692307692307692, coord_y)); + res += inp[local_pos + 3] * w + inp[local_pos + 49] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.34615384615384615, coord_y)); + res += inp[local_pos + 4] * w + inp[local_pos + 48] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.4230769230769231, coord_y)); + res += inp[local_pos + 12] * w + inp[local_pos + 40] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.5, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 13] * w + inp[local_pos + 39] * w.wzyx; + cg4 = vec4(0.1 + inp[local_pos + 13], 1.1 - inp[local_pos + 13], 0.1 + inp[local_pos + 39], + 1.1 - inp[local_pos + 39]); + cg4_1 = cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + hi += cg4.x * wg + cg4.z * wg.wzyx; + lo += cg4.y * wg + cg4.w * wg.wzyx; + cg4 *= cg4_1; + hi2 += cg4.x * wg + cg4.z * wg.wzyx; + lo2 += cg4.y * wg + cg4.w * wg.wzyx; + w = texture(ravu_lite_lut3, vec2(0.5769230769230769, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 14] * w + inp[local_pos + 38] * w.wzyx; + cg4 = vec4(0.1 + inp[local_pos + 14], 1.1 - inp[local_pos + 14], 0.1 + inp[local_pos + 38], + 1.1 - inp[local_pos + 38]); + cg4_1 = cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + hi += cg4.x * wg + cg4.z * wg.wzyx; + lo += cg4.y * wg + cg4.w * wg.wzyx; + cg4 *= cg4_1; + hi2 += cg4.x * wg + cg4.z * wg.wzyx; + lo2 += cg4.y * wg + cg4.w * wg.wzyx; + w = texture(ravu_lite_lut3, vec2(0.6538461538461539, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 15] * w + inp[local_pos + 37] * w.wzyx; + cg4 = vec4(0.1 + inp[local_pos + 15], 1.1 - inp[local_pos + 15], 0.1 + inp[local_pos + 37], + 1.1 - inp[local_pos + 37]); + cg4_1 = cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + hi += cg4.x * wg + cg4.z * wg.wzyx; + lo += cg4.y * wg + cg4.w * wg.wzyx; + cg4 *= cg4_1; + hi2 += cg4.x * wg + cg4.z * wg.wzyx; + lo2 += cg4.y * wg + cg4.w * wg.wzyx; + w = texture(ravu_lite_lut3, vec2(0.7307692307692307, coord_y)); + res += inp[local_pos + 16] * w + inp[local_pos + 36] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.8076923076923077, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 24] * w + inp[local_pos + 28] * w.wzyx; + cg4 = vec4(0.1 + inp[local_pos + 24], 1.1 - inp[local_pos + 24], 0.1 + inp[local_pos + 28], + 1.1 - inp[local_pos + 28]); + cg4_1 = cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + hi += cg4.x * wg + cg4.z * wg.wzyx; + lo += cg4.y * wg + cg4.w * wg.wzyx; + cg4 *= cg4_1; + hi2 += cg4.x * wg + cg4.z * wg.wzyx; + lo2 += cg4.y * wg + cg4.w * wg.wzyx; + w = texture(ravu_lite_lut3, vec2(0.8846153846153846, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 25] * w + inp[local_pos + 27] * w.wzyx; + cg4 = vec4(0.1 + inp[local_pos + 25], 1.1 - inp[local_pos + 25], 0.1 + inp[local_pos + 27], + 1.1 - inp[local_pos + 27]); + cg4_1 = cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + hi += cg4.x * wg + cg4.z * wg.wzyx; + lo += cg4.y * wg + cg4.w * wg.wzyx; + cg4 *= cg4_1; + hi2 += cg4.x * wg + cg4.z * wg.wzyx; + lo2 += cg4.y * wg + cg4.w * wg.wzyx; + w = texture(ravu_lite_lut3, vec2(0.9615384615384616, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 26] * w; + vec2 cg2 = vec2(0.1 + inp[local_pos + 26], 1.1 - inp[local_pos + 26]); + vec2 cg2_1 = cg2; + cg2 *= cg2; + cg2 *= cg2; + cg2 *= cg2; + cg2 *= cg2; + cg2 *= cg2; + hi += cg2.x * wg; + lo += cg2.y * wg; + cg2 *= cg2_1; + hi2 += cg2.x * wg; + lo2 += cg2.y * wg; + lo = 1.1 - lo2 / lo; + hi = hi2 / hi - 0.1; + res = mix(res, clamp(res, lo, hi), 0.800000); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res[0], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res[1], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res[2], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res[3], 0.0, 0.0, 0.0)); +} diff --git a/src/Effects/RAVU/RAVU_Lite_AR_R4.hlsl b/src/Effects/RAVU/RAVU_Lite_AR_R4.hlsl new file mode 100644 index 000000000..243b46d0d --- /dev/null +++ b/src/Effects/RAVU/RAVU_Lite_AR_R4.hlsl @@ -0,0 +1,340 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-lite.py --weights-file weights\ravu-lite_weights-r4.py --float-format float16dx --use-compute-shader --anti-ringing 0.8 --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!SOURCE ravu_lite_lut4_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_lite_lut4; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_lite_lut4; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-Lite-AR (r4, compute) +//!IN INPUT, ravu_lite_lut4 +//!OUT OUTPUT +//!BLOCK_SIZE 64, 16 +//!NUM_THREADS 32, 8 +shared float inp[532]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_lite_lut4_tex(pos) (vec4(texture(ravu_lite_lut4, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 14 + int(gl_LocalInvocationID.y); +#pragma warning(disable : 3557) + for (int id = int(gl_LocalInvocationIndex); id < 532; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 14, y = (uint)id % 14; + inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))).x; + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (inp[local_pos + 29] - inp[local_pos + 1]) / 2.0; + gy = (inp[local_pos + 16] - inp[local_pos + 14]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437; + gx = (inp[local_pos + 30] - inp[local_pos + 2]) / 2.0; + gy = (inp[local_pos + 17] - inp[local_pos + 15]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (inp[local_pos + 31] - inp[local_pos + 3]) / 2.0; + gy = (inp[local_pos + 18] - inp[local_pos + 16]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906; + gx = (inp[local_pos + 32] - inp[local_pos + 4]) / 2.0; + gy = (inp[local_pos + 19] - inp[local_pos + 17]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (inp[local_pos + 33] - inp[local_pos + 5]) / 2.0; + gy = (inp[local_pos + 20] - inp[local_pos + 18]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437; + gx = (inp[local_pos + 43] - inp[local_pos + 15]) / 2.0; + gy = (inp[local_pos + 30] - inp[local_pos + 28]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (inp[local_pos + 44] - inp[local_pos + 16]) / 2.0; + gy = (inp[local_pos + 31] - inp[local_pos + 29]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137; + gx = (inp[local_pos + 45] - inp[local_pos + 17]) / 2.0; + gy = (inp[local_pos + 32] - inp[local_pos + 30]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466; + gx = (inp[local_pos + 46] - inp[local_pos + 18]) / 2.0; + gy = (inp[local_pos + 33] - inp[local_pos + 31]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137; + gx = (inp[local_pos + 47] - inp[local_pos + 19]) / 2.0; + gy = (inp[local_pos + 34] - inp[local_pos + 32]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (inp[local_pos + 57] - inp[local_pos + 29]) / 2.0; + gy = (inp[local_pos + 44] - inp[local_pos + 42]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906; + gx = (inp[local_pos + 58] - inp[local_pos + 30]) / 2.0; + gy = (inp[local_pos + 45] - inp[local_pos + 43]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466; + gx = (inp[local_pos + 59] - inp[local_pos + 31]) / 2.0; + gy = (inp[local_pos + 46] - inp[local_pos + 44]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06319146241026467; + gx = (inp[local_pos + 60] - inp[local_pos + 32]) / 2.0; + gy = (inp[local_pos + 47] - inp[local_pos + 45]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466; + gx = (inp[local_pos + 61] - inp[local_pos + 33]) / 2.0; + gy = (inp[local_pos + 48] - inp[local_pos + 46]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906; + gx = (inp[local_pos + 71] - inp[local_pos + 43]) / 2.0; + gy = (inp[local_pos + 58] - inp[local_pos + 56]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (inp[local_pos + 72] - inp[local_pos + 44]) / 2.0; + gy = (inp[local_pos + 59] - inp[local_pos + 57]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137; + gx = (inp[local_pos + 73] - inp[local_pos + 45]) / 2.0; + gy = (inp[local_pos + 60] - inp[local_pos + 58]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466; + gx = (inp[local_pos + 74] - inp[local_pos + 46]) / 2.0; + gy = (inp[local_pos + 61] - inp[local_pos + 59]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137; + gx = (inp[local_pos + 75] - inp[local_pos + 47]) / 2.0; + gy = (inp[local_pos + 62] - inp[local_pos + 60]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (inp[local_pos + 85] - inp[local_pos + 57]) / 2.0; + gy = (inp[local_pos + 72] - inp[local_pos + 70]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437; + gx = (inp[local_pos + 86] - inp[local_pos + 58]) / 2.0; + gy = (inp[local_pos + 73] - inp[local_pos + 71]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (inp[local_pos + 87] - inp[local_pos + 59]) / 2.0; + gy = (inp[local_pos + 74] - inp[local_pos + 72]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906; + gx = (inp[local_pos + 88] - inp[local_pos + 60]) / 2.0; + gy = (inp[local_pos + 75] - inp[local_pos + 73]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (inp[local_pos + 89] - inp[local_pos + 61]) / 2.0; + gy = (inp[local_pos + 76] - inp[local_pos + 74]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence + 0.5) / 288.0; + vec4 res = vec4(0.0, 0.0, 0.0, 0.0), w; + vec4 lo = vec4(0.0, 0.0, 0.0, 0.0), hi = vec4(0.0, 0.0, 0.0, 0.0), lo2 = vec4(0.0, 0.0, 0.0, 0.0), + hi2 = vec4(0.0, 0.0, 0.0, 0.0), wg, cg4, cg4_1; + w = texture(ravu_lite_lut4, vec2(0.02, coord_y)); + res += inp[local_pos + 0] * w + inp[local_pos + 90] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.06, coord_y)); + res += inp[local_pos + 1] * w + inp[local_pos + 89] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.1, coord_y)); + res += inp[local_pos + 2] * w + inp[local_pos + 88] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.14, coord_y)); + res += inp[local_pos + 3] * w + inp[local_pos + 87] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.18, coord_y)); + res += inp[local_pos + 4] * w + inp[local_pos + 86] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.22, coord_y)); + res += inp[local_pos + 5] * w + inp[local_pos + 85] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.26, coord_y)); + res += inp[local_pos + 6] * w + inp[local_pos + 84] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.3, coord_y)); + res += inp[local_pos + 14] * w + inp[local_pos + 76] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.34, coord_y)); + res += inp[local_pos + 15] * w + inp[local_pos + 75] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.38, coord_y)); + res += inp[local_pos + 16] * w + inp[local_pos + 74] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.42, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 17] * w + inp[local_pos + 73] * w.wzyx; + cg4 = vec4(0.1 + inp[local_pos + 17], 1.1 - inp[local_pos + 17], 0.1 + inp[local_pos + 73], + 1.1 - inp[local_pos + 73]); + cg4_1 = cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + hi += cg4.x * wg + cg4.z * wg.wzyx; + lo += cg4.y * wg + cg4.w * wg.wzyx; + cg4 *= cg4_1; + hi2 += cg4.x * wg + cg4.z * wg.wzyx; + lo2 += cg4.y * wg + cg4.w * wg.wzyx; + w = texture(ravu_lite_lut4, vec2(0.46, coord_y)); + res += inp[local_pos + 18] * w + inp[local_pos + 72] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.5, coord_y)); + res += inp[local_pos + 19] * w + inp[local_pos + 71] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.54, coord_y)); + res += inp[local_pos + 20] * w + inp[local_pos + 70] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.58, coord_y)); + res += inp[local_pos + 28] * w + inp[local_pos + 62] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.62, coord_y)); + res += inp[local_pos + 29] * w + inp[local_pos + 61] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.66, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 30] * w + inp[local_pos + 60] * w.wzyx; + cg4 = vec4(0.1 + inp[local_pos + 30], 1.1 - inp[local_pos + 30], 0.1 + inp[local_pos + 60], + 1.1 - inp[local_pos + 60]); + cg4_1 = cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + hi += cg4.x * wg + cg4.z * wg.wzyx; + lo += cg4.y * wg + cg4.w * wg.wzyx; + cg4 *= cg4_1; + hi2 += cg4.x * wg + cg4.z * wg.wzyx; + lo2 += cg4.y * wg + cg4.w * wg.wzyx; + w = texture(ravu_lite_lut4, vec2(0.7, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 31] * w + inp[local_pos + 59] * w.wzyx; + cg4 = vec4(0.1 + inp[local_pos + 31], 1.1 - inp[local_pos + 31], 0.1 + inp[local_pos + 59], + 1.1 - inp[local_pos + 59]); + cg4_1 = cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + hi += cg4.x * wg + cg4.z * wg.wzyx; + lo += cg4.y * wg + cg4.w * wg.wzyx; + cg4 *= cg4_1; + hi2 += cg4.x * wg + cg4.z * wg.wzyx; + lo2 += cg4.y * wg + cg4.w * wg.wzyx; + w = texture(ravu_lite_lut4, vec2(0.74, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 32] * w + inp[local_pos + 58] * w.wzyx; + cg4 = vec4(0.1 + inp[local_pos + 32], 1.1 - inp[local_pos + 32], 0.1 + inp[local_pos + 58], + 1.1 - inp[local_pos + 58]); + cg4_1 = cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + hi += cg4.x * wg + cg4.z * wg.wzyx; + lo += cg4.y * wg + cg4.w * wg.wzyx; + cg4 *= cg4_1; + hi2 += cg4.x * wg + cg4.z * wg.wzyx; + lo2 += cg4.y * wg + cg4.w * wg.wzyx; + w = texture(ravu_lite_lut4, vec2(0.78, coord_y)); + res += inp[local_pos + 33] * w + inp[local_pos + 57] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.82, coord_y)); + res += inp[local_pos + 34] * w + inp[local_pos + 56] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.86, coord_y)); + res += inp[local_pos + 42] * w + inp[local_pos + 48] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.9, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 43] * w + inp[local_pos + 47] * w.wzyx; + cg4 = vec4(0.1 + inp[local_pos + 43], 1.1 - inp[local_pos + 43], 0.1 + inp[local_pos + 47], + 1.1 - inp[local_pos + 47]); + cg4_1 = cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + hi += cg4.x * wg + cg4.z * wg.wzyx; + lo += cg4.y * wg + cg4.w * wg.wzyx; + cg4 *= cg4_1; + hi2 += cg4.x * wg + cg4.z * wg.wzyx; + lo2 += cg4.y * wg + cg4.w * wg.wzyx; + w = texture(ravu_lite_lut4, vec2(0.94, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 44] * w + inp[local_pos + 46] * w.wzyx; + cg4 = vec4(0.1 + inp[local_pos + 44], 1.1 - inp[local_pos + 44], 0.1 + inp[local_pos + 46], + 1.1 - inp[local_pos + 46]); + cg4_1 = cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + cg4 *= cg4; + hi += cg4.x * wg + cg4.z * wg.wzyx; + lo += cg4.y * wg + cg4.w * wg.wzyx; + cg4 *= cg4_1; + hi2 += cg4.x * wg + cg4.z * wg.wzyx; + lo2 += cg4.y * wg + cg4.w * wg.wzyx; + w = texture(ravu_lite_lut4, vec2(0.98, coord_y)); + wg = max(vec4(0.0, 0.0, 0.0, 0.0), w); + res += inp[local_pos + 45] * w; + vec2 cg2 = vec2(0.1 + inp[local_pos + 45], 1.1 - inp[local_pos + 45]); + vec2 cg2_1 = cg2; + cg2 *= cg2; + cg2 *= cg2; + cg2 *= cg2; + cg2 *= cg2; + cg2 *= cg2; + hi += cg2.x * wg; + lo += cg2.y * wg; + cg2 *= cg2_1; + hi2 += cg2.x * wg; + lo2 += cg2.y * wg; + lo = 1.1 - lo2 / lo; + hi = hi2 / hi - 0.1; + res = mix(res, clamp(res, lo, hi), 0.800000); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res[0], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res[1], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res[2], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res[3], 0.0, 0.0, 0.0)); +} diff --git a/src/Effects/RAVU/RAVU_Lite_R2.hlsl b/src/Effects/RAVU/RAVU_Lite_R2.hlsl new file mode 100644 index 000000000..cb62cb20c --- /dev/null +++ b/src/Effects/RAVU/RAVU_Lite_R2.hlsl @@ -0,0 +1,151 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-lite.py --weights-file weights\ravu-lite_weights-r2.py --float-format float16dx --use-compute-shader --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!SOURCE ravu_lite_lut2_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_lite_lut2; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_lite_lut2; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-Lite (r2, compute) +//!IN INPUT, ravu_lite_lut2 +//!OUT OUTPUT +//!BLOCK_SIZE 64, 16 +//!NUM_THREADS 32, 8 +shared float inp[340]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_lite_lut2_tex(pos) (vec4(texture(ravu_lite_lut2, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 10 + int(gl_LocalInvocationID.y); +#pragma warning(disable : 3557) + for (int id = int(gl_LocalInvocationIndex); id < 340; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 10, y = (uint)id % 10; + inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-0.5), float(group_base.y + y) + (-0.5))).x; + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (inp[local_pos + 10] - inp[local_pos + 0]); + gy = (inp[local_pos + 1] - inp[local_pos + 0]); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (inp[local_pos + 11] - inp[local_pos + 1]); + gy = (inp[local_pos + 2] - inp[local_pos + 0]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (inp[local_pos + 12] - inp[local_pos + 2]); + gy = (inp[local_pos + 2] - inp[local_pos + 1]); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (inp[local_pos + 20] - inp[local_pos + 0]) / 2.0; + gy = (inp[local_pos + 11] - inp[local_pos + 10]); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (inp[local_pos + 21] - inp[local_pos + 1]) / 2.0; + gy = (inp[local_pos + 12] - inp[local_pos + 10]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833; + gx = (inp[local_pos + 22] - inp[local_pos + 2]) / 2.0; + gy = (inp[local_pos + 12] - inp[local_pos + 11]); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (inp[local_pos + 20] - inp[local_pos + 10]); + gy = (inp[local_pos + 21] - inp[local_pos + 20]); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (inp[local_pos + 21] - inp[local_pos + 11]); + gy = (inp[local_pos + 22] - inp[local_pos + 20]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (inp[local_pos + 22] - inp[local_pos + 12]); + gy = (inp[local_pos + 22] - inp[local_pos + 21]); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence + 0.5) / 288.0; + vec4 res = vec4(0.0, 0.0, 0.0, 0.0), w; + w = texture(ravu_lite_lut2, vec2(0.1, coord_y)); + res += inp[local_pos + 0] * w + inp[local_pos + 22] * w.wzyx; + w = texture(ravu_lite_lut2, vec2(0.3, coord_y)); + res += inp[local_pos + 1] * w + inp[local_pos + 21] * w.wzyx; + w = texture(ravu_lite_lut2, vec2(0.5, coord_y)); + res += inp[local_pos + 2] * w + inp[local_pos + 20] * w.wzyx; + w = texture(ravu_lite_lut2, vec2(0.7, coord_y)); + res += inp[local_pos + 10] * w + inp[local_pos + 12] * w.wzyx; + w = texture(ravu_lite_lut2, vec2(0.9, coord_y)); + res += inp[local_pos + 11] * w; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res[0], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res[1], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res[2], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res[3], 0.0, 0.0, 0.0)); +} diff --git a/src/Effects/RAVU/RAVU_Lite_R3.hlsl b/src/Effects/RAVU/RAVU_Lite_R3.hlsl index a9c3564cb..467a5b112 100644 --- a/src/Effects/RAVU/RAVU_Lite_R3.hlsl +++ b/src/Effects/RAVU/RAVU_Lite_R3.hlsl @@ -1,174 +1,167 @@ -// ravu-lite-r3 -// 移植自 https://github.com/bjin/mpv-prescalers/blob/cc02ed95c1fe05b72bc21d41257c4c085e6e409b/ravu-lite-r3.hook +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-lite.py --weights-file weights\ravu-lite_weights-r3.py --float-format float16dx --use-compute-shader --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 - +//!VERSION 4 //!TEXTURE Texture2D INPUT; -//!TEXTURE -//!SOURCE RAVU_Lite_R3_Weights.dds -//!FORMAT R16G16B16A16_FLOAT -Texture2D ravu_lite_lut3; - //!SAMPLER //!FILTER POINT -SamplerState sam; +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; //!SAMPLER //!FILTER LINEAR -SamplerState sam1; +SamplerState sam_INPUT_LINEAR; -//!PASS 1 -//!IN INPUT, ravu_lite_lut3 -//!BLOCK_SIZE 32, 16 -//!NUM_THREADS 16, 8 - -#pragma warning(disable: 3557) // X3557: loop only executes for 1 iteration(s), forcing loop to unroll +//!TEXTURE +//!SOURCE ravu_lite_lut3_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_lite_lut3; -#define NUM_PIXELS_X (MP_BLOCK_WIDTH + 4) -#define NUM_PIXELS_Y (MP_BLOCK_HEIGHT + 4) +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_lite_lut3; -groupshared float inp[NUM_PIXELS_Y][NUM_PIXELS_X]; +//!COMMON +#include "prescalers.hlsli" -#define PI 3.1415926535897932384626433832795 +#define LAST_PASS 1 -float GetLuma(float3 color) { - return dot(float3(0.299f, 0.587f, 0.114f), color); +//!PASS 1 +//!DESC RAVU-Lite (r3, compute) +//!IN INPUT, ravu_lite_lut3 +//!OUT OUTPUT +//!BLOCK_SIZE 64, 16 +//!NUM_THREADS 32, 8 +shared float inp[432]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); } -const static float2x3 rgb2uv = { - -0.169, -0.331, 0.5, - 0.5, -0.419, -0.081 -}; +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); -const static float3x3 yuv2rgb = { - 1, -0.00093, 1.401687, - 1, -0.3437, -0.71417, - 1, 1.77216, 0.00099 -}; +#define ravu_lite_lut3_tex(pos) (vec4(texture(ravu_lite_lut3, pos))) -float mod(float x, float y) { - return x - y * floor(x / y); -} +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt void Pass1(uint2 blockStart, uint3 threadId) { - const float2 inputPt = GetInputPt(); - - for (int id = threadId.y * MP_NUM_THREADS_X + threadId.x; id < NUM_PIXELS_X * NUM_PIXELS_Y; id += MP_NUM_THREADS_X * MP_NUM_THREADS_Y) { - uint2 pos = { (uint)id % NUM_PIXELS_X, (uint)id / NUM_PIXELS_X }; - inp[pos.y][pos.x] = GetLuma(INPUT.SampleLevel(sam, inputPt * ((blockStart / 2) + pos - 1.5f), 0).rgb); + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 12 + int(gl_LocalInvocationID.y); +#pragma warning(disable : 3557) + for (int id = int(gl_LocalInvocationIndex); id < 432; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 12, y = (uint)id % 12; + inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))).x; } - - GroupMemoryBarrierWithGroupSync(); - + barrier(); +#if CURRENT_PASS == LAST_PASS uint2 destPos = blockStart + threadId.xy * 2; - if (!CheckViewport(destPos)) { + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { return; } - - float src[5][5]; - [unroll] - for (uint i = 0; i < 5; ++i) { - [unroll] - for (uint j = 0; j < 5; ++j) { - src[j][i] = inp[threadId.y + i][threadId.x + j]; - } - } - - float3 abd = 0; +#endif + vec3 abd = vec3(0.0, 0.0, 0.0); float gx, gy; - gx = (src[2][1] - src[0][1]) / 2.0; - gy = (src[1][2] - src[1][0]) / 2.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; - gx = (src[2][2] - src[0][2]) / 2.0; - gy = (src[1][3] - src[1][1]) / 2.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; - gx = (src[2][3] - src[0][3]) / 2.0; - gy = (src[1][4] - src[1][2]) / 2.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; - gx = (src[3][1] - src[1][1]) / 2.0; - gy = (src[2][2] - src[2][0]) / 2.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; - gx = (src[3][2] - src[1][2]) / 2.0; - gy = (src[2][3] - src[2][1]) / 2.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833; - gx = (src[3][3] - src[1][3]) / 2.0; - gy = (src[2][4] - src[2][2]) / 2.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; - gx = (src[4][1] - src[2][1]) / 2.0; - gy = (src[3][2] - src[3][0]) / 2.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; - gx = (src[4][2] - src[2][2]) / 2.0; - gy = (src[3][3] - src[3][1]) / 2.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; - gx = (src[4][3] - src[2][3]) / 2.0; - gy = (src[3][4] - src[3][2]) / 2.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (inp[local_pos + 25] - inp[local_pos + 1]) / 2.0; + gy = (inp[local_pos + 14] - inp[local_pos + 12]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (inp[local_pos + 26] - inp[local_pos + 2]) / 2.0; + gy = (inp[local_pos + 15] - inp[local_pos + 13]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (inp[local_pos + 27] - inp[local_pos + 3]) / 2.0; + gy = (inp[local_pos + 16] - inp[local_pos + 14]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (inp[local_pos + 37] - inp[local_pos + 13]) / 2.0; + gy = (inp[local_pos + 26] - inp[local_pos + 24]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (inp[local_pos + 38] - inp[local_pos + 14]) / 2.0; + gy = (inp[local_pos + 27] - inp[local_pos + 25]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833; + gx = (inp[local_pos + 39] - inp[local_pos + 15]) / 2.0; + gy = (inp[local_pos + 28] - inp[local_pos + 26]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (inp[local_pos + 49] - inp[local_pos + 25]) / 2.0; + gy = (inp[local_pos + 38] - inp[local_pos + 36]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; + gx = (inp[local_pos + 50] - inp[local_pos + 26]) / 2.0; + gy = (inp[local_pos + 39] - inp[local_pos + 37]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666; + gx = (inp[local_pos + 51] - inp[local_pos + 27]) / 2.0; + gy = (inp[local_pos + 40] - inp[local_pos + 38]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163; float a = abd.x, b = abd.y, d = abd.z; float T = a + d, D = a * d - b * b; float delta = sqrt(max(T * T / 4.0 - D, 0.0)); float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); - float theta = lerp(mod(atan2(b, L1 - a) + PI, PI), 0.0, abs(b) < 1.192092896e-7); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); float lambda = sqrtL1; - float mu = lerp((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); - float angle = floor(theta * 24.0 / PI); - float strength = lerp(lerp(0.0, 1.0, lambda >= 0.004), lerp(2.0, 3.0, lambda >= 0.05), lambda >= 0.016); - float coherence = lerp(lerp(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); - float coord_y = ((angle * 4.0f + strength) * 3.0f + coherence + 0.5f) / 288.0f; - float4 res = 0, w; - w = ravu_lite_lut3.SampleLevel(sam, float2(0.038461538461538464, coord_y), 0); - res += src[0][0] * w + src[4][4] * w.wzyx; - w = ravu_lite_lut3.SampleLevel(sam, float2(0.11538461538461539, coord_y), 0); - res += src[0][1] * w + src[4][3] * w.wzyx; - w = ravu_lite_lut3.SampleLevel(sam, float2(0.19230769230769232, coord_y), 0); - res += src[0][2] * w + src[4][2] * w.wzyx; - w = ravu_lite_lut3.SampleLevel(sam, float2(0.2692307692307692, coord_y), 0); - res += src[0][3] * w + src[4][1] * w.wzyx; - w = ravu_lite_lut3.SampleLevel(sam, float2(0.34615384615384615, coord_y), 0); - res += src[0][4] * w + src[4][0] * w.wzyx; - w = ravu_lite_lut3.SampleLevel(sam, float2(0.4230769230769231, coord_y), 0); - res += src[1][0] * w + src[3][4] * w.wzyx; - w = ravu_lite_lut3.SampleLevel(sam, float2(0.5, coord_y), 0); - res += src[1][1] * w + src[3][3] * w.wzyx; - w = ravu_lite_lut3.SampleLevel(sam, float2(0.5769230769230769, coord_y), 0); - res += src[1][2] * w + src[3][2] * w.wzyx; - w = ravu_lite_lut3.SampleLevel(sam, float2(0.6538461538461539, coord_y), 0); - res += src[1][3] * w + src[3][1] * w.wzyx; - w = ravu_lite_lut3.SampleLevel(sam, float2(0.7307692307692307, coord_y), 0); - res += src[1][4] * w + src[3][0] * w.wzyx; - w = ravu_lite_lut3.SampleLevel(sam, float2(0.8076923076923077, coord_y), 0); - res += src[2][0] * w + src[2][4] * w.wzyx; - w = ravu_lite_lut3.SampleLevel(sam, float2(0.8846153846153846, coord_y), 0); - res += src[2][1] * w + src[2][3] * w.wzyx; - w = ravu_lite_lut3.SampleLevel(sam, float2(0.9615384615384616, coord_y), 0); - res += src[2][2] * w; - res = saturate(res); - - const float2 outputPt = GetOutputPt(); - float2 originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * outputPt, 0).rgb); - WriteToOutput(destPos, mul(yuv2rgb, float3(res.x, originUV))); - - ++destPos.y; - if (CheckViewport(destPos)) { - originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * outputPt, 0).rgb); - WriteToOutput(destPos, mul(yuv2rgb, float3(res.y, originUV))); - } - - ++destPos.x; - if (CheckViewport(destPos)) { - originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * outputPt, 0).rgb); - WriteToOutput(destPos, mul(yuv2rgb, float3(res.w, originUV))); - } - - --destPos.y; - if (CheckViewport(destPos)) { - originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * outputPt, 0).rgb); - WriteToOutput(destPos, mul(yuv2rgb, float3(res.z, originUV))); - } + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence + 0.5) / 288.0; + vec4 res = vec4(0.0, 0.0, 0.0, 0.0), w; + w = texture(ravu_lite_lut3, vec2(0.038461538461538464, coord_y)); + res += inp[local_pos + 0] * w + inp[local_pos + 52] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.11538461538461539, coord_y)); + res += inp[local_pos + 1] * w + inp[local_pos + 51] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.19230769230769232, coord_y)); + res += inp[local_pos + 2] * w + inp[local_pos + 50] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.2692307692307692, coord_y)); + res += inp[local_pos + 3] * w + inp[local_pos + 49] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.34615384615384615, coord_y)); + res += inp[local_pos + 4] * w + inp[local_pos + 48] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.4230769230769231, coord_y)); + res += inp[local_pos + 12] * w + inp[local_pos + 40] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.5, coord_y)); + res += inp[local_pos + 13] * w + inp[local_pos + 39] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.5769230769230769, coord_y)); + res += inp[local_pos + 14] * w + inp[local_pos + 38] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.6538461538461539, coord_y)); + res += inp[local_pos + 15] * w + inp[local_pos + 37] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.7307692307692307, coord_y)); + res += inp[local_pos + 16] * w + inp[local_pos + 36] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.8076923076923077, coord_y)); + res += inp[local_pos + 24] * w + inp[local_pos + 28] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.8846153846153846, coord_y)); + res += inp[local_pos + 25] * w + inp[local_pos + 27] * w.wzyx; + w = texture(ravu_lite_lut3, vec2(0.9615384615384616, coord_y)); + res += inp[local_pos + 26] * w; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res[0], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res[1], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res[2], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res[3], 0.0, 0.0, 0.0)); } diff --git a/src/Effects/RAVU/RAVU_Lite_R4.hlsl b/src/Effects/RAVU/RAVU_Lite_R4.hlsl new file mode 100644 index 000000000..ab6db274e --- /dev/null +++ b/src/Effects/RAVU/RAVU_Lite_R4.hlsl @@ -0,0 +1,239 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-lite.py --weights-file weights\ravu-lite_weights-r4.py --float-format float16dx --use-compute-shader --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!SOURCE ravu_lite_lut4_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_lite_lut4; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_lite_lut4; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-Lite (r4, compute) +//!IN INPUT, ravu_lite_lut4 +//!OUT OUTPUT +//!BLOCK_SIZE 64, 16 +//!NUM_THREADS 32, 8 +shared float inp[532]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_lite_lut4_tex(pos) (vec4(texture(ravu_lite_lut4, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 14 + int(gl_LocalInvocationID.y); +#pragma warning(disable : 3557) + for (int id = int(gl_LocalInvocationIndex); id < 532; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 14, y = (uint)id % 14; + inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))).x; + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (inp[local_pos + 29] - inp[local_pos + 1]) / 2.0; + gy = (inp[local_pos + 16] - inp[local_pos + 14]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437; + gx = (inp[local_pos + 30] - inp[local_pos + 2]) / 2.0; + gy = (inp[local_pos + 17] - inp[local_pos + 15]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (inp[local_pos + 31] - inp[local_pos + 3]) / 2.0; + gy = (inp[local_pos + 18] - inp[local_pos + 16]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906; + gx = (inp[local_pos + 32] - inp[local_pos + 4]) / 2.0; + gy = (inp[local_pos + 19] - inp[local_pos + 17]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (inp[local_pos + 33] - inp[local_pos + 5]) / 2.0; + gy = (inp[local_pos + 20] - inp[local_pos + 18]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437; + gx = (inp[local_pos + 43] - inp[local_pos + 15]) / 2.0; + gy = (inp[local_pos + 30] - inp[local_pos + 28]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (inp[local_pos + 44] - inp[local_pos + 16]) / 2.0; + gy = (inp[local_pos + 31] - inp[local_pos + 29]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137; + gx = (inp[local_pos + 45] - inp[local_pos + 17]) / 2.0; + gy = (inp[local_pos + 32] - inp[local_pos + 30]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466; + gx = (inp[local_pos + 46] - inp[local_pos + 18]) / 2.0; + gy = (inp[local_pos + 33] - inp[local_pos + 31]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137; + gx = (inp[local_pos + 47] - inp[local_pos + 19]) / 2.0; + gy = (inp[local_pos + 34] - inp[local_pos + 32]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (inp[local_pos + 57] - inp[local_pos + 29]) / 2.0; + gy = (inp[local_pos + 44] - inp[local_pos + 42]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906; + gx = (inp[local_pos + 58] - inp[local_pos + 30]) / 2.0; + gy = (inp[local_pos + 45] - inp[local_pos + 43]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466; + gx = (inp[local_pos + 59] - inp[local_pos + 31]) / 2.0; + gy = (inp[local_pos + 46] - inp[local_pos + 44]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06319146241026467; + gx = (inp[local_pos + 60] - inp[local_pos + 32]) / 2.0; + gy = (inp[local_pos + 47] - inp[local_pos + 45]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466; + gx = (inp[local_pos + 61] - inp[local_pos + 33]) / 2.0; + gy = (inp[local_pos + 48] - inp[local_pos + 46]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906; + gx = (inp[local_pos + 71] - inp[local_pos + 43]) / 2.0; + gy = (inp[local_pos + 58] - inp[local_pos + 56]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (inp[local_pos + 72] - inp[local_pos + 44]) / 2.0; + gy = (inp[local_pos + 59] - inp[local_pos + 57]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137; + gx = (inp[local_pos + 73] - inp[local_pos + 45]) / 2.0; + gy = (inp[local_pos + 60] - inp[local_pos + 58]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466; + gx = (inp[local_pos + 74] - inp[local_pos + 46]) / 2.0; + gy = (inp[local_pos + 61] - inp[local_pos + 59]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137; + gx = (inp[local_pos + 75] - inp[local_pos + 47]) / 2.0; + gy = (inp[local_pos + 62] - inp[local_pos + 60]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (inp[local_pos + 85] - inp[local_pos + 57]) / 2.0; + gy = (inp[local_pos + 72] - inp[local_pos + 70]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437; + gx = (inp[local_pos + 86] - inp[local_pos + 58]) / 2.0; + gy = (inp[local_pos + 73] - inp[local_pos + 71]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (inp[local_pos + 87] - inp[local_pos + 59]) / 2.0; + gy = (inp[local_pos + 74] - inp[local_pos + 72]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906; + gx = (inp[local_pos + 88] - inp[local_pos + 60]) / 2.0; + gy = (inp[local_pos + 75] - inp[local_pos + 73]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346; + gx = (inp[local_pos + 89] - inp[local_pos + 61]) / 2.0; + gy = (inp[local_pos + 76] - inp[local_pos + 74]) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence + 0.5) / 288.0; + vec4 res = vec4(0.0, 0.0, 0.0, 0.0), w; + w = texture(ravu_lite_lut4, vec2(0.02, coord_y)); + res += inp[local_pos + 0] * w + inp[local_pos + 90] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.06, coord_y)); + res += inp[local_pos + 1] * w + inp[local_pos + 89] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.1, coord_y)); + res += inp[local_pos + 2] * w + inp[local_pos + 88] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.14, coord_y)); + res += inp[local_pos + 3] * w + inp[local_pos + 87] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.18, coord_y)); + res += inp[local_pos + 4] * w + inp[local_pos + 86] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.22, coord_y)); + res += inp[local_pos + 5] * w + inp[local_pos + 85] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.26, coord_y)); + res += inp[local_pos + 6] * w + inp[local_pos + 84] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.3, coord_y)); + res += inp[local_pos + 14] * w + inp[local_pos + 76] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.34, coord_y)); + res += inp[local_pos + 15] * w + inp[local_pos + 75] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.38, coord_y)); + res += inp[local_pos + 16] * w + inp[local_pos + 74] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.42, coord_y)); + res += inp[local_pos + 17] * w + inp[local_pos + 73] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.46, coord_y)); + res += inp[local_pos + 18] * w + inp[local_pos + 72] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.5, coord_y)); + res += inp[local_pos + 19] * w + inp[local_pos + 71] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.54, coord_y)); + res += inp[local_pos + 20] * w + inp[local_pos + 70] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.58, coord_y)); + res += inp[local_pos + 28] * w + inp[local_pos + 62] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.62, coord_y)); + res += inp[local_pos + 29] * w + inp[local_pos + 61] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.66, coord_y)); + res += inp[local_pos + 30] * w + inp[local_pos + 60] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.7, coord_y)); + res += inp[local_pos + 31] * w + inp[local_pos + 59] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.74, coord_y)); + res += inp[local_pos + 32] * w + inp[local_pos + 58] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.78, coord_y)); + res += inp[local_pos + 33] * w + inp[local_pos + 57] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.82, coord_y)); + res += inp[local_pos + 34] * w + inp[local_pos + 56] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.86, coord_y)); + res += inp[local_pos + 42] * w + inp[local_pos + 48] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.9, coord_y)); + res += inp[local_pos + 43] * w + inp[local_pos + 47] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.94, coord_y)); + res += inp[local_pos + 44] * w + inp[local_pos + 46] * w.wzyx; + w = texture(ravu_lite_lut4, vec2(0.98, coord_y)); + res += inp[local_pos + 45] * w; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res[0], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res[1], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res[2], 0.0, 0.0, 0.0)); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res[3], 0.0, 0.0, 0.0)); +} diff --git a/src/Effects/RAVU/RAVU_R2.hlsl b/src/Effects/RAVU/RAVU_R2.hlsl new file mode 100644 index 000000000..4ffc8faa5 --- /dev/null +++ b/src/Effects/RAVU/RAVU_R2.hlsl @@ -0,0 +1,449 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu.py --target luma --weights-file weights\ravu_weights-r2.py --float-format float16dx --use-compute-shader --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!SOURCE ravu_lut2_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_lut2; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_lut2; + +//!TEXTURE +//!FORMAT R16_FLOAT +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D ravu_int11; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_ravu_int11; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 2 + +//!PASS 1 +//!DESC RAVU (step1, luma, r2, compute) +//!IN INPUT, ravu_lut2 +//!OUT ravu_int11 +//!BLOCK_SIZE 32, 8 +//!NUM_THREADS 32, 8 +shared float inp0[385]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { ravu_int11[pos] = (value); } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_lut2_tex(pos) (vec4(texture(ravu_lut2, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 11 + int(gl_LocalInvocationID.y); + { + for (int id = int(gl_LocalInvocationIndex); id < 385; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 11, y = (uint)id % 11; + inp0[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-0.5), float(group_base.y + y) + (-0.5))).x; + } + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + { + float luma0 = inp0[local_pos + 0]; + float luma4 = inp0[local_pos + 11]; + float luma5 = inp0[local_pos + 12]; + float luma6 = inp0[local_pos + 13]; + float luma7 = inp0[local_pos + 14]; + float luma1 = inp0[local_pos + 1]; + float luma8 = inp0[local_pos + 22]; + float luma9 = inp0[local_pos + 23]; + float luma10 = inp0[local_pos + 24]; + float luma11 = inp0[local_pos + 25]; + float luma2 = inp0[local_pos + 2]; + float luma12 = inp0[local_pos + 33]; + float luma13 = inp0[local_pos + 34]; + float luma14 = inp0[local_pos + 35]; + float luma15 = inp0[local_pos + 36]; + float luma3 = inp0[local_pos + 3]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma4 - luma0); + gy = (luma1 - luma0); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma5 - luma1); + gy = (luma2 - luma0) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma6 - luma2); + gy = (luma3 - luma1) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma7 - luma3); + gy = (luma3 - luma2); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma8 - luma0) / 2.0; + gy = (luma5 - luma4); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma9 - luma1) / 2.0; + gy = (luma6 - luma4) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma10 - luma2) / 2.0; + gy = (luma7 - luma5) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma11 - luma3) / 2.0; + gy = (luma7 - luma6); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma12 - luma4) / 2.0; + gy = (luma9 - luma8); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma13 - luma5) / 2.0; + gy = (luma10 - luma8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma14 - luma6) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma15 - luma7) / 2.0; + gy = (luma11 - luma10); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma12 - luma8); + gy = (luma13 - luma12); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma13 - luma9); + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma14 - luma10); + gy = (luma15 - luma13) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma15 - luma11); + gy = (luma15 - luma14); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + float res = 0.0; + vec4 w; + w = texture(ravu_lut2, vec2(0.25, coord_y)); + res += (inp0[local_pos + 0] + inp0[local_pos + 36]) * w[0]; + res += (inp0[local_pos + 1] + inp0[local_pos + 35]) * w[1]; + res += (inp0[local_pos + 2] + inp0[local_pos + 34]) * w[2]; + res += (inp0[local_pos + 3] + inp0[local_pos + 33]) * w[3]; + w = texture(ravu_lut2, vec2(0.75, coord_y)); + res += (inp0[local_pos + 11] + inp0[local_pos + 25]) * w[0]; + res += (inp0[local_pos + 12] + inp0[local_pos + 24]) * w[1]; + res += (inp0[local_pos + 13] + inp0[local_pos + 23]) * w[2]; + res += (inp0[local_pos + 14] + inp0[local_pos + 22]) * w[3]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID), res); + } +} +//!PASS 2 +//!DESC RAVU (step2, luma, r2, compute) +//!IN INPUT, ravu_lut2, ravu_int11 +//!OUT OUTPUT +//!BLOCK_SIZE 64, 16 +//!NUM_THREADS 32, 8 +shared float inp0[385]; +shared float inp1[385]; + +#define CURRENT_PASS 2 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_lut2_tex(pos) (vec4(texture(ravu_lut2, pos))) + +#define ravu_int11_tex(pos) (float(texture(ravu_int11, pos).x)) +static const float2 ravu_int11_size = float2(GetInputSize().x, GetInputSize().y); +static const float2 ravu_int11_pt = float2(1.0 / (ravu_int11_size.x), 1.0 / (ravu_int11_size.y)); + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass2(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 11 + int(gl_LocalInvocationID.y); + { + for (int id = int(gl_LocalInvocationIndex); id < 385; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 11, y = (uint)id % 11; + inp0[id] = + ravu_int11_tex(ravu_int11_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))) + .x; + } + } + { + for (int id = int(gl_LocalInvocationIndex); id < 385; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 11, y = (uint)id % 11; + inp1[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-0.5), float(group_base.y + y) + (-0.5))).x; + } + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + { + float luma8 = inp0[local_pos + 12]; + float luma5 = inp0[local_pos + 13]; + float luma2 = inp0[local_pos + 14]; + float luma13 = inp0[local_pos + 23]; + float luma10 = inp0[local_pos + 24]; + float luma7 = inp0[local_pos + 25]; + float luma0 = inp0[local_pos + 2]; + float luma15 = inp0[local_pos + 35]; + float luma12 = inp1[local_pos + 11]; + float luma9 = inp1[local_pos + 12]; + float luma6 = inp1[local_pos + 13]; + float luma3 = inp1[local_pos + 14]; + float luma4 = inp1[local_pos + 1]; + float luma14 = inp1[local_pos + 23]; + float luma11 = inp1[local_pos + 24]; + float luma1 = inp1[local_pos + 2]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma4 - luma0); + gy = (luma1 - luma0); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma5 - luma1); + gy = (luma2 - luma0) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma6 - luma2); + gy = (luma3 - luma1) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma7 - luma3); + gy = (luma3 - luma2); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma8 - luma0) / 2.0; + gy = (luma5 - luma4); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma9 - luma1) / 2.0; + gy = (luma6 - luma4) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma10 - luma2) / 2.0; + gy = (luma7 - luma5) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma11 - luma3) / 2.0; + gy = (luma7 - luma6); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma12 - luma4) / 2.0; + gy = (luma9 - luma8); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma13 - luma5) / 2.0; + gy = (luma10 - luma8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma14 - luma6) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma15 - luma7) / 2.0; + gy = (luma11 - luma10); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma12 - luma8); + gy = (luma13 - luma12); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma13 - luma9); + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma14 - luma10); + gy = (luma15 - luma13) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma15 - luma11); + gy = (luma15 - luma14); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + float res = 0.0; + vec4 w; + w = texture(ravu_lut2, vec2(0.25, coord_y)); + res += (inp0[local_pos + 2] + inp0[local_pos + 35]) * w[0]; + res += (inp1[local_pos + 2] + inp1[local_pos + 23]) * w[1]; + res += (inp0[local_pos + 14] + inp0[local_pos + 23]) * w[2]; + res += (inp1[local_pos + 14] + inp1[local_pos + 11]) * w[3]; + w = texture(ravu_lut2, vec2(0.75, coord_y)); + res += (inp1[local_pos + 1] + inp1[local_pos + 24]) * w[0]; + res += (inp0[local_pos + 13] + inp0[local_pos + 24]) * w[1]; + res += (inp1[local_pos + 13] + inp1[local_pos + 12]) * w[2]; + res += (inp0[local_pos + 25] + inp0[local_pos + 12]) * w[3]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), res); + } + { + float luma4 = inp0[local_pos + 12]; + float luma1 = inp0[local_pos + 13]; + float luma12 = inp0[local_pos + 22]; + float luma9 = inp0[local_pos + 23]; + float luma6 = inp0[local_pos + 24]; + float luma3 = inp0[local_pos + 25]; + float luma14 = inp0[local_pos + 34]; + float luma11 = inp0[local_pos + 35]; + float luma8 = inp1[local_pos + 11]; + float luma5 = inp1[local_pos + 12]; + float luma2 = inp1[local_pos + 13]; + float luma0 = inp1[local_pos + 1]; + float luma13 = inp1[local_pos + 22]; + float luma10 = inp1[local_pos + 23]; + float luma7 = inp1[local_pos + 24]; + float luma15 = inp1[local_pos + 34]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma4 - luma0); + gy = (luma1 - luma0); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma5 - luma1); + gy = (luma2 - luma0) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma6 - luma2); + gy = (luma3 - luma1) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma7 - luma3); + gy = (luma3 - luma2); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma8 - luma0) / 2.0; + gy = (luma5 - luma4); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma9 - luma1) / 2.0; + gy = (luma6 - luma4) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma10 - luma2) / 2.0; + gy = (luma7 - luma5) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma11 - luma3) / 2.0; + gy = (luma7 - luma6); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma12 - luma4) / 2.0; + gy = (luma9 - luma8); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma13 - luma5) / 2.0; + gy = (luma10 - luma8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma14 - luma6) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma15 - luma7) / 2.0; + gy = (luma11 - luma10); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma12 - luma8); + gy = (luma13 - luma12); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma13 - luma9); + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma14 - luma10); + gy = (luma15 - luma13) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma15 - luma11); + gy = (luma15 - luma14); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + float res = 0.0; + vec4 w; + w = texture(ravu_lut2, vec2(0.25, coord_y)); + res += (inp1[local_pos + 1] + inp1[local_pos + 34]) * w[0]; + res += (inp0[local_pos + 13] + inp0[local_pos + 34]) * w[1]; + res += (inp1[local_pos + 13] + inp1[local_pos + 22]) * w[2]; + res += (inp0[local_pos + 25] + inp0[local_pos + 22]) * w[3]; + w = texture(ravu_lut2, vec2(0.75, coord_y)); + res += (inp0[local_pos + 12] + inp0[local_pos + 35]) * w[0]; + res += (inp1[local_pos + 12] + inp1[local_pos + 23]) * w[1]; + res += (inp0[local_pos + 24] + inp0[local_pos + 23]) * w[2]; + res += (inp1[local_pos + 24] + inp1[local_pos + 11]) * w[3]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), res); + } + float res; + res = inp0[local_pos + 24]; + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), res); + res = inp1[local_pos + 12]; + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), res); +} diff --git a/src/Effects/RAVU/RAVU_R2_RGB.hlsl b/src/Effects/RAVU/RAVU_R2_RGB.hlsl new file mode 100644 index 000000000..a3001b21e --- /dev/null +++ b/src/Effects/RAVU/RAVU_R2_RGB.hlsl @@ -0,0 +1,450 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu.py --target rgb --weights-file weights\ravu_weights-r2.py --float-format float16dx --use-compute-shader --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!TEXTURE +//!SOURCE ravu_lut2_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_lut2; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_lut2; + +//!TEXTURE +//!FORMAT R16G16B16A16_FLOAT +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D ravu_int11; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_ravu_int11; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 2 + +//!PASS 1 +//!DESC RAVU (step1, rgb, r2, compute) +//!IN INPUT, ravu_lut2 +//!OUT ravu_int11 +//!BLOCK_SIZE 32, 8 +//!NUM_THREADS 32, 8 +static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722); +shared vec3 inp0[385]; +shared float inp_luma0[385]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) x +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.xyz) +void imageStoreOverride(uint2 pos, vec3 value) { ravu_int11[pos] = vec4(value, 0.0); } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_lut2_tex(pos) (vec4(texture(ravu_lut2, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 11 + int(gl_LocalInvocationID.y); + { + for (int id = int(gl_LocalInvocationIndex); id < 385; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 11, y = (uint)id % 11; + inp0[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-0.5), float(group_base.y + y) + (-0.5))).xyz; + inp_luma0[id] = dot(inp0[id], color_primary); + } + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + { + float luma0 = inp_luma0[local_pos + 0]; + float luma4 = inp_luma0[local_pos + 11]; + float luma5 = inp_luma0[local_pos + 12]; + float luma6 = inp_luma0[local_pos + 13]; + float luma7 = inp_luma0[local_pos + 14]; + float luma1 = inp_luma0[local_pos + 1]; + float luma8 = inp_luma0[local_pos + 22]; + float luma9 = inp_luma0[local_pos + 23]; + float luma10 = inp_luma0[local_pos + 24]; + float luma11 = inp_luma0[local_pos + 25]; + float luma2 = inp_luma0[local_pos + 2]; + float luma12 = inp_luma0[local_pos + 33]; + float luma13 = inp_luma0[local_pos + 34]; + float luma14 = inp_luma0[local_pos + 35]; + float luma15 = inp_luma0[local_pos + 36]; + float luma3 = inp_luma0[local_pos + 3]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma4 - luma0); + gy = (luma1 - luma0); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma5 - luma1); + gy = (luma2 - luma0) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma6 - luma2); + gy = (luma3 - luma1) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma7 - luma3); + gy = (luma3 - luma2); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma8 - luma0) / 2.0; + gy = (luma5 - luma4); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma9 - luma1) / 2.0; + gy = (luma6 - luma4) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma10 - luma2) / 2.0; + gy = (luma7 - luma5) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma11 - luma3) / 2.0; + gy = (luma7 - luma6); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma12 - luma4) / 2.0; + gy = (luma9 - luma8); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma13 - luma5) / 2.0; + gy = (luma10 - luma8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma14 - luma6) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma15 - luma7) / 2.0; + gy = (luma11 - luma10); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma12 - luma8); + gy = (luma13 - luma12); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma13 - luma9); + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma14 - luma10); + gy = (luma15 - luma13) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma15 - luma11); + gy = (luma15 - luma14); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + vec3 res = vec3(0.0, 0.0, 0.0); + vec4 w; + w = texture(ravu_lut2, vec2(0.25, coord_y)); + res += (inp0[local_pos + 0] + inp0[local_pos + 36]) * w[0]; + res += (inp0[local_pos + 1] + inp0[local_pos + 35]) * w[1]; + res += (inp0[local_pos + 2] + inp0[local_pos + 34]) * w[2]; + res += (inp0[local_pos + 3] + inp0[local_pos + 33]) * w[3]; + w = texture(ravu_lut2, vec2(0.75, coord_y)); + res += (inp0[local_pos + 11] + inp0[local_pos + 25]) * w[0]; + res += (inp0[local_pos + 12] + inp0[local_pos + 24]) * w[1]; + res += (inp0[local_pos + 13] + inp0[local_pos + 23]) * w[2]; + res += (inp0[local_pos + 14] + inp0[local_pos + 22]) * w[3]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID), vec4(res, 1.0)); + } +} +//!PASS 2 +//!DESC RAVU (step2, rgb, r2, compute) +//!IN INPUT, ravu_lut2, ravu_int11 +//!OUT OUTPUT +//!BLOCK_SIZE 64, 16 +//!NUM_THREADS 32, 8 +static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722); +shared vec3 inp0[385]; +shared float inp_luma0[385]; +shared vec3 inp1[385]; +shared float inp_luma1[385]; + +#define CURRENT_PASS 2 + +#define GET_SAMPLE(x) x +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val) +void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_lut2_tex(pos) (vec4(texture(ravu_lut2, pos))) + +#define ravu_int11_tex(pos) (vec3(texture(ravu_int11, pos).xyz)) +static const float2 ravu_int11_size = float2(GetInputSize().x, GetInputSize().y); +static const float2 ravu_int11_pt = float2(1.0 / (ravu_int11_size.x), 1.0 / (ravu_int11_size.y)); + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass2(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 11 + int(gl_LocalInvocationID.y); + { + for (int id = int(gl_LocalInvocationIndex); id < 385; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 11, y = (uint)id % 11; + inp0[id] = + ravu_int11_tex(ravu_int11_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))) + .xyz; + inp_luma0[id] = dot(inp0[id], color_primary); + } + } + { + for (int id = int(gl_LocalInvocationIndex); id < 385; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 11, y = (uint)id % 11; + inp1[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-0.5), float(group_base.y + y) + (-0.5))).xyz; + inp_luma1[id] = dot(inp1[id], color_primary); + } + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + { + float luma8 = inp_luma0[local_pos + 12]; + float luma5 = inp_luma0[local_pos + 13]; + float luma2 = inp_luma0[local_pos + 14]; + float luma13 = inp_luma0[local_pos + 23]; + float luma10 = inp_luma0[local_pos + 24]; + float luma7 = inp_luma0[local_pos + 25]; + float luma0 = inp_luma0[local_pos + 2]; + float luma15 = inp_luma0[local_pos + 35]; + float luma12 = inp_luma1[local_pos + 11]; + float luma9 = inp_luma1[local_pos + 12]; + float luma6 = inp_luma1[local_pos + 13]; + float luma3 = inp_luma1[local_pos + 14]; + float luma4 = inp_luma1[local_pos + 1]; + float luma14 = inp_luma1[local_pos + 23]; + float luma11 = inp_luma1[local_pos + 24]; + float luma1 = inp_luma1[local_pos + 2]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma4 - luma0); + gy = (luma1 - luma0); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma5 - luma1); + gy = (luma2 - luma0) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma6 - luma2); + gy = (luma3 - luma1) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma7 - luma3); + gy = (luma3 - luma2); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma8 - luma0) / 2.0; + gy = (luma5 - luma4); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma9 - luma1) / 2.0; + gy = (luma6 - luma4) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma10 - luma2) / 2.0; + gy = (luma7 - luma5) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma11 - luma3) / 2.0; + gy = (luma7 - luma6); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma12 - luma4) / 2.0; + gy = (luma9 - luma8); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma13 - luma5) / 2.0; + gy = (luma10 - luma8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma14 - luma6) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma15 - luma7) / 2.0; + gy = (luma11 - luma10); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma12 - luma8); + gy = (luma13 - luma12); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma13 - luma9); + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma14 - luma10); + gy = (luma15 - luma13) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma15 - luma11); + gy = (luma15 - luma14); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + vec3 res = vec3(0.0, 0.0, 0.0); + vec4 w; + w = texture(ravu_lut2, vec2(0.25, coord_y)); + res += (inp0[local_pos + 2] + inp0[local_pos + 35]) * w[0]; + res += (inp1[local_pos + 2] + inp1[local_pos + 23]) * w[1]; + res += (inp0[local_pos + 14] + inp0[local_pos + 23]) * w[2]; + res += (inp1[local_pos + 14] + inp1[local_pos + 11]) * w[3]; + w = texture(ravu_lut2, vec2(0.75, coord_y)); + res += (inp1[local_pos + 1] + inp1[local_pos + 24]) * w[0]; + res += (inp0[local_pos + 13] + inp0[local_pos + 24]) * w[1]; + res += (inp1[local_pos + 13] + inp1[local_pos + 12]) * w[2]; + res += (inp0[local_pos + 25] + inp0[local_pos + 12]) * w[3]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res, 1.0)); + } + { + float luma4 = inp_luma0[local_pos + 12]; + float luma1 = inp_luma0[local_pos + 13]; + float luma12 = inp_luma0[local_pos + 22]; + float luma9 = inp_luma0[local_pos + 23]; + float luma6 = inp_luma0[local_pos + 24]; + float luma3 = inp_luma0[local_pos + 25]; + float luma14 = inp_luma0[local_pos + 34]; + float luma11 = inp_luma0[local_pos + 35]; + float luma8 = inp_luma1[local_pos + 11]; + float luma5 = inp_luma1[local_pos + 12]; + float luma2 = inp_luma1[local_pos + 13]; + float luma0 = inp_luma1[local_pos + 1]; + float luma13 = inp_luma1[local_pos + 22]; + float luma10 = inp_luma1[local_pos + 23]; + float luma7 = inp_luma1[local_pos + 24]; + float luma15 = inp_luma1[local_pos + 34]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma4 - luma0); + gy = (luma1 - luma0); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma5 - luma1); + gy = (luma2 - luma0) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma6 - luma2); + gy = (luma3 - luma1) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma7 - luma3); + gy = (luma3 - luma2); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma8 - luma0) / 2.0; + gy = (luma5 - luma4); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma9 - luma1) / 2.0; + gy = (luma6 - luma4) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma10 - luma2) / 2.0; + gy = (luma7 - luma5) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma11 - luma3) / 2.0; + gy = (luma7 - luma6); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma12 - luma4) / 2.0; + gy = (luma9 - luma8); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma13 - luma5) / 2.0; + gy = (luma10 - luma8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma14 - luma6) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma15 - luma7) / 2.0; + gy = (luma11 - luma10); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma12 - luma8); + gy = (luma13 - luma12); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma13 - luma9); + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma14 - luma10); + gy = (luma15 - luma13) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma15 - luma11); + gy = (luma15 - luma14); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + vec3 res = vec3(0.0, 0.0, 0.0); + vec4 w; + w = texture(ravu_lut2, vec2(0.25, coord_y)); + res += (inp1[local_pos + 1] + inp1[local_pos + 34]) * w[0]; + res += (inp0[local_pos + 13] + inp0[local_pos + 34]) * w[1]; + res += (inp1[local_pos + 13] + inp1[local_pos + 22]) * w[2]; + res += (inp0[local_pos + 25] + inp0[local_pos + 22]) * w[3]; + w = texture(ravu_lut2, vec2(0.75, coord_y)); + res += (inp0[local_pos + 12] + inp0[local_pos + 35]) * w[0]; + res += (inp1[local_pos + 12] + inp1[local_pos + 23]) * w[1]; + res += (inp0[local_pos + 24] + inp0[local_pos + 23]) * w[2]; + res += (inp1[local_pos + 24] + inp1[local_pos + 11]) * w[3]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res, 1.0)); + } + vec3 res; + res = inp0[local_pos + 24]; + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res, 1.0)); + res = inp1[local_pos + 12]; + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res, 1.0)); +} diff --git a/src/Effects/RAVU/RAVU_R3.hlsl b/src/Effects/RAVU/RAVU_R3.hlsl new file mode 100644 index 000000000..e4b847055 --- /dev/null +++ b/src/Effects/RAVU/RAVU_R3.hlsl @@ -0,0 +1,536 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu.py --target luma --weights-file weights\ravu_weights-r3.py --float-format float16dx --use-compute-shader --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!SOURCE ravu_lut3_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_lut3; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_lut3; + +//!TEXTURE +//!FORMAT R16_FLOAT +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D ravu_int11; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_ravu_int11; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 2 + +//!PASS 1 +//!DESC RAVU (step1, luma, r3, compute) +//!IN INPUT, ravu_lut3 +//!OUT ravu_int11 +//!BLOCK_SIZE 32, 8 +//!NUM_THREADS 32, 8 +shared float inp0[481]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { ravu_int11[pos] = (value); } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_lut3_tex(pos) (vec4(texture(ravu_lut3, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 13 + int(gl_LocalInvocationID.y); + { + for (int id = int(gl_LocalInvocationIndex); id < 481; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 13, y = (uint)id % 13; + inp0[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))).x; + } + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + { + float luma6 = inp0[local_pos + 13]; + float luma7 = inp0[local_pos + 14]; + float luma8 = inp0[local_pos + 15]; + float luma9 = inp0[local_pos + 16]; + float luma10 = inp0[local_pos + 17]; + float luma11 = inp0[local_pos + 18]; + float luma1 = inp0[local_pos + 1]; + float luma12 = inp0[local_pos + 26]; + float luma13 = inp0[local_pos + 27]; + float luma14 = inp0[local_pos + 28]; + float luma15 = inp0[local_pos + 29]; + float luma2 = inp0[local_pos + 2]; + float luma16 = inp0[local_pos + 30]; + float luma17 = inp0[local_pos + 31]; + float luma18 = inp0[local_pos + 39]; + float luma3 = inp0[local_pos + 3]; + float luma19 = inp0[local_pos + 40]; + float luma20 = inp0[local_pos + 41]; + float luma21 = inp0[local_pos + 42]; + float luma22 = inp0[local_pos + 43]; + float luma23 = inp0[local_pos + 44]; + float luma4 = inp0[local_pos + 4]; + float luma24 = inp0[local_pos + 52]; + float luma25 = inp0[local_pos + 53]; + float luma26 = inp0[local_pos + 54]; + float luma27 = inp0[local_pos + 55]; + float luma28 = inp0[local_pos + 56]; + float luma29 = inp0[local_pos + 57]; + float luma31 = inp0[local_pos + 66]; + float luma32 = inp0[local_pos + 67]; + float luma33 = inp0[local_pos + 68]; + float luma34 = inp0[local_pos + 69]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma13 - luma1) / 2.0; + gy = (luma8 - luma6) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma14 - luma2) / 2.0; + gy = (-luma10 + 8.0 * luma9 - 8.0 * luma7 + luma6) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma15 - luma3) / 2.0; + gy = (-luma11 + 8.0 * luma10 - 8.0 * luma8 + luma7) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma16 - luma4) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (-luma25 + 8.0 * luma19 - 8.0 * luma7 + luma1) / 12.0; + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma26 + 8.0 * luma20 - 8.0 * luma8 + luma2) / 12.0; + gy = (-luma16 + 8.0 * luma15 - 8.0 * luma13 + luma12) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma27 + 8.0 * luma21 - 8.0 * luma9 + luma3) / 12.0; + gy = (-luma17 + 8.0 * luma16 - 8.0 * luma14 + luma13) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma28 + 8.0 * luma22 - 8.0 * luma10 + luma4) / 12.0; + gy = (luma17 - luma15) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma31 + 8.0 * luma25 - 8.0 * luma13 + luma7) / 12.0; + gy = (luma20 - luma18) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma32 + 8.0 * luma26 - 8.0 * luma14 + luma8) / 12.0; + gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma33 + 8.0 * luma27 - 8.0 * luma15 + luma9) / 12.0; + gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma34 + 8.0 * luma28 - 8.0 * luma16 + luma10) / 12.0; + gy = (luma23 - luma21) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma31 - luma19) / 2.0; + gy = (luma26 - luma24) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma32 - luma20) / 2.0; + gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma33 - luma21) / 2.0; + gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma34 - luma22) / 2.0; + gy = (luma29 - luma27) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + float res = 0.0; + vec4 w; + w = texture(ravu_lut3, vec2(0.1, coord_y)); + res += (inp0[local_pos + 0] + inp0[local_pos + 70]) * w[0]; + res += (inp0[local_pos + 1] + inp0[local_pos + 69]) * w[1]; + res += (inp0[local_pos + 2] + inp0[local_pos + 68]) * w[2]; + res += (inp0[local_pos + 3] + inp0[local_pos + 67]) * w[3]; + w = texture(ravu_lut3, vec2(0.3, coord_y)); + res += (inp0[local_pos + 4] + inp0[local_pos + 66]) * w[0]; + res += (inp0[local_pos + 5] + inp0[local_pos + 65]) * w[1]; + res += (inp0[local_pos + 13] + inp0[local_pos + 57]) * w[2]; + res += (inp0[local_pos + 14] + inp0[local_pos + 56]) * w[3]; + w = texture(ravu_lut3, vec2(0.5, coord_y)); + res += (inp0[local_pos + 15] + inp0[local_pos + 55]) * w[0]; + res += (inp0[local_pos + 16] + inp0[local_pos + 54]) * w[1]; + res += (inp0[local_pos + 17] + inp0[local_pos + 53]) * w[2]; + res += (inp0[local_pos + 18] + inp0[local_pos + 52]) * w[3]; + w = texture(ravu_lut3, vec2(0.7, coord_y)); + res += (inp0[local_pos + 26] + inp0[local_pos + 44]) * w[0]; + res += (inp0[local_pos + 27] + inp0[local_pos + 43]) * w[1]; + res += (inp0[local_pos + 28] + inp0[local_pos + 42]) * w[2]; + res += (inp0[local_pos + 29] + inp0[local_pos + 41]) * w[3]; + w = texture(ravu_lut3, vec2(0.9, coord_y)); + res += (inp0[local_pos + 30] + inp0[local_pos + 40]) * w[0]; + res += (inp0[local_pos + 31] + inp0[local_pos + 39]) * w[1]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID), res); + } +} +//!PASS 2 +//!DESC RAVU (step2, luma, r3, compute) +//!IN INPUT, ravu_lut3, ravu_int11 +//!OUT OUTPUT +//!BLOCK_SIZE 64, 16 +//!NUM_THREADS 32, 8 +shared float inp0[481]; +shared float inp1[481]; + +#define CURRENT_PASS 2 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_lut3_tex(pos) (vec4(texture(ravu_lut3, pos))) + +#define ravu_int11_tex(pos) (float(texture(ravu_int11, pos).x)) +static const float2 ravu_int11_size = float2(GetInputSize().x, GetInputSize().y); +static const float2 ravu_int11_pt = float2(1.0 / (ravu_int11_size.x), 1.0 / (ravu_int11_size.y)); + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass2(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 13 + int(gl_LocalInvocationID.y); + { + for (int id = int(gl_LocalInvocationIndex); id < 481; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 13, y = (uint)id % 13; + inp0[id] = + ravu_int11_tex(ravu_int11_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))) + .x; + } + } + { + for (int id = int(gl_LocalInvocationIndex); id < 481; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 13, y = (uint)id % 13; + inp1[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))).x; + } + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + { + float luma12 = inp0[local_pos + 15]; + float luma7 = inp0[local_pos + 16]; + float luma2 = inp0[local_pos + 17]; + float luma24 = inp0[local_pos + 27]; + float luma19 = inp0[local_pos + 28]; + float luma14 = inp0[local_pos + 29]; + float luma9 = inp0[local_pos + 30]; + float luma4 = inp0[local_pos + 31]; + float luma31 = inp0[local_pos + 40]; + float luma26 = inp0[local_pos + 41]; + float luma21 = inp0[local_pos + 42]; + float luma16 = inp0[local_pos + 43]; + float luma11 = inp0[local_pos + 44]; + float luma33 = inp0[local_pos + 54]; + float luma28 = inp0[local_pos + 55]; + float luma23 = inp0[local_pos + 56]; + float luma18 = inp1[local_pos + 14]; + float luma13 = inp1[local_pos + 15]; + float luma8 = inp1[local_pos + 16]; + float luma3 = inp1[local_pos + 17]; + float luma25 = inp1[local_pos + 27]; + float luma20 = inp1[local_pos + 28]; + float luma15 = inp1[local_pos + 29]; + float luma6 = inp1[local_pos + 2]; + float luma10 = inp1[local_pos + 30]; + float luma1 = inp1[local_pos + 3]; + float luma32 = inp1[local_pos + 40]; + float luma27 = inp1[local_pos + 41]; + float luma22 = inp1[local_pos + 42]; + float luma17 = inp1[local_pos + 43]; + float luma34 = inp1[local_pos + 54]; + float luma29 = inp1[local_pos + 55]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma13 - luma1) / 2.0; + gy = (luma8 - luma6) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma14 - luma2) / 2.0; + gy = (-luma10 + 8.0 * luma9 - 8.0 * luma7 + luma6) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma15 - luma3) / 2.0; + gy = (-luma11 + 8.0 * luma10 - 8.0 * luma8 + luma7) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma16 - luma4) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (-luma25 + 8.0 * luma19 - 8.0 * luma7 + luma1) / 12.0; + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma26 + 8.0 * luma20 - 8.0 * luma8 + luma2) / 12.0; + gy = (-luma16 + 8.0 * luma15 - 8.0 * luma13 + luma12) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma27 + 8.0 * luma21 - 8.0 * luma9 + luma3) / 12.0; + gy = (-luma17 + 8.0 * luma16 - 8.0 * luma14 + luma13) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma28 + 8.0 * luma22 - 8.0 * luma10 + luma4) / 12.0; + gy = (luma17 - luma15) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma31 + 8.0 * luma25 - 8.0 * luma13 + luma7) / 12.0; + gy = (luma20 - luma18) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma32 + 8.0 * luma26 - 8.0 * luma14 + luma8) / 12.0; + gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma33 + 8.0 * luma27 - 8.0 * luma15 + luma9) / 12.0; + gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma34 + 8.0 * luma28 - 8.0 * luma16 + luma10) / 12.0; + gy = (luma23 - luma21) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma31 - luma19) / 2.0; + gy = (luma26 - luma24) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma32 - luma20) / 2.0; + gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma33 - luma21) / 2.0; + gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma34 - luma22) / 2.0; + gy = (luma29 - luma27) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + float res = 0.0; + vec4 w; + w = texture(ravu_lut3, vec2(0.1, coord_y)); + res += (inp0[local_pos + 3] + inp0[local_pos + 68]) * w[0]; + res += (inp1[local_pos + 3] + inp1[local_pos + 54]) * w[1]; + res += (inp0[local_pos + 17] + inp0[local_pos + 54]) * w[2]; + res += (inp1[local_pos + 17] + inp1[local_pos + 40]) * w[3]; + w = texture(ravu_lut3, vec2(0.3, coord_y)); + res += (inp0[local_pos + 31] + inp0[local_pos + 40]) * w[0]; + res += (inp1[local_pos + 31] + inp1[local_pos + 26]) * w[1]; + res += (inp1[local_pos + 2] + inp1[local_pos + 55]) * w[2]; + res += (inp0[local_pos + 16] + inp0[local_pos + 55]) * w[3]; + w = texture(ravu_lut3, vec2(0.5, coord_y)); + res += (inp1[local_pos + 16] + inp1[local_pos + 41]) * w[0]; + res += (inp0[local_pos + 30] + inp0[local_pos + 41]) * w[1]; + res += (inp1[local_pos + 30] + inp1[local_pos + 27]) * w[2]; + res += (inp0[local_pos + 44] + inp0[local_pos + 27]) * w[3]; + w = texture(ravu_lut3, vec2(0.7, coord_y)); + res += (inp0[local_pos + 15] + inp0[local_pos + 56]) * w[0]; + res += (inp1[local_pos + 15] + inp1[local_pos + 42]) * w[1]; + res += (inp0[local_pos + 29] + inp0[local_pos + 42]) * w[2]; + res += (inp1[local_pos + 29] + inp1[local_pos + 28]) * w[3]; + w = texture(ravu_lut3, vec2(0.9, coord_y)); + res += (inp0[local_pos + 43] + inp0[local_pos + 28]) * w[0]; + res += (inp1[local_pos + 43] + inp1[local_pos + 14]) * w[1]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), res); + } + { + float luma6 = inp0[local_pos + 15]; + float luma1 = inp0[local_pos + 16]; + float luma18 = inp0[local_pos + 27]; + float luma13 = inp0[local_pos + 28]; + float luma8 = inp0[local_pos + 29]; + float luma3 = inp0[local_pos + 30]; + float luma25 = inp0[local_pos + 40]; + float luma20 = inp0[local_pos + 41]; + float luma15 = inp0[local_pos + 42]; + float luma10 = inp0[local_pos + 43]; + float luma32 = inp0[local_pos + 53]; + float luma27 = inp0[local_pos + 54]; + float luma22 = inp0[local_pos + 55]; + float luma17 = inp0[local_pos + 56]; + float luma34 = inp0[local_pos + 67]; + float luma29 = inp0[local_pos + 68]; + float luma12 = inp1[local_pos + 14]; + float luma7 = inp1[local_pos + 15]; + float luma2 = inp1[local_pos + 16]; + float luma24 = inp1[local_pos + 26]; + float luma19 = inp1[local_pos + 27]; + float luma14 = inp1[local_pos + 28]; + float luma9 = inp1[local_pos + 29]; + float luma4 = inp1[local_pos + 30]; + float luma31 = inp1[local_pos + 39]; + float luma26 = inp1[local_pos + 40]; + float luma21 = inp1[local_pos + 41]; + float luma16 = inp1[local_pos + 42]; + float luma11 = inp1[local_pos + 43]; + float luma33 = inp1[local_pos + 53]; + float luma28 = inp1[local_pos + 54]; + float luma23 = inp1[local_pos + 55]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma13 - luma1) / 2.0; + gy = (luma8 - luma6) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma14 - luma2) / 2.0; + gy = (-luma10 + 8.0 * luma9 - 8.0 * luma7 + luma6) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma15 - luma3) / 2.0; + gy = (-luma11 + 8.0 * luma10 - 8.0 * luma8 + luma7) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma16 - luma4) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (-luma25 + 8.0 * luma19 - 8.0 * luma7 + luma1) / 12.0; + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma26 + 8.0 * luma20 - 8.0 * luma8 + luma2) / 12.0; + gy = (-luma16 + 8.0 * luma15 - 8.0 * luma13 + luma12) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma27 + 8.0 * luma21 - 8.0 * luma9 + luma3) / 12.0; + gy = (-luma17 + 8.0 * luma16 - 8.0 * luma14 + luma13) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma28 + 8.0 * luma22 - 8.0 * luma10 + luma4) / 12.0; + gy = (luma17 - luma15) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma31 + 8.0 * luma25 - 8.0 * luma13 + luma7) / 12.0; + gy = (luma20 - luma18) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma32 + 8.0 * luma26 - 8.0 * luma14 + luma8) / 12.0; + gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma33 + 8.0 * luma27 - 8.0 * luma15 + luma9) / 12.0; + gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma34 + 8.0 * luma28 - 8.0 * luma16 + luma10) / 12.0; + gy = (luma23 - luma21) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma31 - luma19) / 2.0; + gy = (luma26 - luma24) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma32 - luma20) / 2.0; + gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma33 - luma21) / 2.0; + gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma34 - luma22) / 2.0; + gy = (luma29 - luma27) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + float res = 0.0; + vec4 w; + w = texture(ravu_lut3, vec2(0.1, coord_y)); + res += (inp1[local_pos + 2] + inp1[local_pos + 67]) * w[0]; + res += (inp0[local_pos + 16] + inp0[local_pos + 67]) * w[1]; + res += (inp1[local_pos + 16] + inp1[local_pos + 53]) * w[2]; + res += (inp0[local_pos + 30] + inp0[local_pos + 53]) * w[3]; + w = texture(ravu_lut3, vec2(0.3, coord_y)); + res += (inp1[local_pos + 30] + inp1[local_pos + 39]) * w[0]; + res += (inp0[local_pos + 44] + inp0[local_pos + 39]) * w[1]; + res += (inp0[local_pos + 15] + inp0[local_pos + 68]) * w[2]; + res += (inp1[local_pos + 15] + inp1[local_pos + 54]) * w[3]; + w = texture(ravu_lut3, vec2(0.5, coord_y)); + res += (inp0[local_pos + 29] + inp0[local_pos + 54]) * w[0]; + res += (inp1[local_pos + 29] + inp1[local_pos + 40]) * w[1]; + res += (inp0[local_pos + 43] + inp0[local_pos + 40]) * w[2]; + res += (inp1[local_pos + 43] + inp1[local_pos + 26]) * w[3]; + w = texture(ravu_lut3, vec2(0.7, coord_y)); + res += (inp1[local_pos + 14] + inp1[local_pos + 55]) * w[0]; + res += (inp0[local_pos + 28] + inp0[local_pos + 55]) * w[1]; + res += (inp1[local_pos + 28] + inp1[local_pos + 41]) * w[2]; + res += (inp0[local_pos + 42] + inp0[local_pos + 41]) * w[3]; + w = texture(ravu_lut3, vec2(0.9, coord_y)); + res += (inp1[local_pos + 42] + inp1[local_pos + 27]) * w[0]; + res += (inp0[local_pos + 56] + inp0[local_pos + 27]) * w[1]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), res); + } + float res; + res = inp0[local_pos + 42]; + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), res); + res = inp1[local_pos + 28]; + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), res); +} diff --git a/src/Effects/RAVU/RAVU_R3_RGB.hlsl b/src/Effects/RAVU/RAVU_R3_RGB.hlsl new file mode 100644 index 000000000..72dd6e50e --- /dev/null +++ b/src/Effects/RAVU/RAVU_R3_RGB.hlsl @@ -0,0 +1,537 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu.py --target rgb --weights-file weights\ravu_weights-r3.py --float-format float16dx --use-compute-shader --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!TEXTURE +//!SOURCE ravu_lut3_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_lut3; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_lut3; + +//!TEXTURE +//!FORMAT R16G16B16A16_FLOAT +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D ravu_int11; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_ravu_int11; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 2 + +//!PASS 1 +//!DESC RAVU (step1, rgb, r3, compute) +//!IN INPUT, ravu_lut3 +//!OUT ravu_int11 +//!BLOCK_SIZE 32, 8 +//!NUM_THREADS 32, 8 +static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722); +shared vec3 inp0[481]; +shared float inp_luma0[481]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) x +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.xyz) +void imageStoreOverride(uint2 pos, vec3 value) { ravu_int11[pos] = vec4(value, 0.0); } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_lut3_tex(pos) (vec4(texture(ravu_lut3, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 13 + int(gl_LocalInvocationID.y); + { + for (int id = int(gl_LocalInvocationIndex); id < 481; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 13, y = (uint)id % 13; + inp0[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))).xyz; + inp_luma0[id] = dot(inp0[id], color_primary); + } + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + { + float luma6 = inp_luma0[local_pos + 13]; + float luma7 = inp_luma0[local_pos + 14]; + float luma8 = inp_luma0[local_pos + 15]; + float luma9 = inp_luma0[local_pos + 16]; + float luma10 = inp_luma0[local_pos + 17]; + float luma11 = inp_luma0[local_pos + 18]; + float luma1 = inp_luma0[local_pos + 1]; + float luma12 = inp_luma0[local_pos + 26]; + float luma13 = inp_luma0[local_pos + 27]; + float luma14 = inp_luma0[local_pos + 28]; + float luma15 = inp_luma0[local_pos + 29]; + float luma2 = inp_luma0[local_pos + 2]; + float luma16 = inp_luma0[local_pos + 30]; + float luma17 = inp_luma0[local_pos + 31]; + float luma18 = inp_luma0[local_pos + 39]; + float luma3 = inp_luma0[local_pos + 3]; + float luma19 = inp_luma0[local_pos + 40]; + float luma20 = inp_luma0[local_pos + 41]; + float luma21 = inp_luma0[local_pos + 42]; + float luma22 = inp_luma0[local_pos + 43]; + float luma23 = inp_luma0[local_pos + 44]; + float luma4 = inp_luma0[local_pos + 4]; + float luma24 = inp_luma0[local_pos + 52]; + float luma25 = inp_luma0[local_pos + 53]; + float luma26 = inp_luma0[local_pos + 54]; + float luma27 = inp_luma0[local_pos + 55]; + float luma28 = inp_luma0[local_pos + 56]; + float luma29 = inp_luma0[local_pos + 57]; + float luma31 = inp_luma0[local_pos + 66]; + float luma32 = inp_luma0[local_pos + 67]; + float luma33 = inp_luma0[local_pos + 68]; + float luma34 = inp_luma0[local_pos + 69]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma13 - luma1) / 2.0; + gy = (luma8 - luma6) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma14 - luma2) / 2.0; + gy = (-luma10 + 8.0 * luma9 - 8.0 * luma7 + luma6) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma15 - luma3) / 2.0; + gy = (-luma11 + 8.0 * luma10 - 8.0 * luma8 + luma7) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma16 - luma4) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (-luma25 + 8.0 * luma19 - 8.0 * luma7 + luma1) / 12.0; + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma26 + 8.0 * luma20 - 8.0 * luma8 + luma2) / 12.0; + gy = (-luma16 + 8.0 * luma15 - 8.0 * luma13 + luma12) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma27 + 8.0 * luma21 - 8.0 * luma9 + luma3) / 12.0; + gy = (-luma17 + 8.0 * luma16 - 8.0 * luma14 + luma13) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma28 + 8.0 * luma22 - 8.0 * luma10 + luma4) / 12.0; + gy = (luma17 - luma15) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma31 + 8.0 * luma25 - 8.0 * luma13 + luma7) / 12.0; + gy = (luma20 - luma18) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma32 + 8.0 * luma26 - 8.0 * luma14 + luma8) / 12.0; + gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma33 + 8.0 * luma27 - 8.0 * luma15 + luma9) / 12.0; + gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma34 + 8.0 * luma28 - 8.0 * luma16 + luma10) / 12.0; + gy = (luma23 - luma21) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma31 - luma19) / 2.0; + gy = (luma26 - luma24) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma32 - luma20) / 2.0; + gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma33 - luma21) / 2.0; + gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma34 - luma22) / 2.0; + gy = (luma29 - luma27) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + vec3 res = vec3(0.0, 0.0, 0.0); + vec4 w; + w = texture(ravu_lut3, vec2(0.1, coord_y)); + res += (inp0[local_pos + 0] + inp0[local_pos + 70]) * w[0]; + res += (inp0[local_pos + 1] + inp0[local_pos + 69]) * w[1]; + res += (inp0[local_pos + 2] + inp0[local_pos + 68]) * w[2]; + res += (inp0[local_pos + 3] + inp0[local_pos + 67]) * w[3]; + w = texture(ravu_lut3, vec2(0.3, coord_y)); + res += (inp0[local_pos + 4] + inp0[local_pos + 66]) * w[0]; + res += (inp0[local_pos + 5] + inp0[local_pos + 65]) * w[1]; + res += (inp0[local_pos + 13] + inp0[local_pos + 57]) * w[2]; + res += (inp0[local_pos + 14] + inp0[local_pos + 56]) * w[3]; + w = texture(ravu_lut3, vec2(0.5, coord_y)); + res += (inp0[local_pos + 15] + inp0[local_pos + 55]) * w[0]; + res += (inp0[local_pos + 16] + inp0[local_pos + 54]) * w[1]; + res += (inp0[local_pos + 17] + inp0[local_pos + 53]) * w[2]; + res += (inp0[local_pos + 18] + inp0[local_pos + 52]) * w[3]; + w = texture(ravu_lut3, vec2(0.7, coord_y)); + res += (inp0[local_pos + 26] + inp0[local_pos + 44]) * w[0]; + res += (inp0[local_pos + 27] + inp0[local_pos + 43]) * w[1]; + res += (inp0[local_pos + 28] + inp0[local_pos + 42]) * w[2]; + res += (inp0[local_pos + 29] + inp0[local_pos + 41]) * w[3]; + w = texture(ravu_lut3, vec2(0.9, coord_y)); + res += (inp0[local_pos + 30] + inp0[local_pos + 40]) * w[0]; + res += (inp0[local_pos + 31] + inp0[local_pos + 39]) * w[1]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID), vec4(res, 1.0)); + } +} +//!PASS 2 +//!DESC RAVU (step2, rgb, r3, compute) +//!IN INPUT, ravu_lut3, ravu_int11 +//!OUT OUTPUT +//!BLOCK_SIZE 64, 16 +//!NUM_THREADS 32, 8 +static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722); +shared vec3 inp0[481]; +shared float inp_luma0[481]; +shared vec3 inp1[481]; +shared float inp_luma1[481]; + +#define CURRENT_PASS 2 + +#define GET_SAMPLE(x) x +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val) +void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_lut3_tex(pos) (vec4(texture(ravu_lut3, pos))) + +#define ravu_int11_tex(pos) (vec3(texture(ravu_int11, pos).xyz)) +static const float2 ravu_int11_size = float2(GetInputSize().x, GetInputSize().y); +static const float2 ravu_int11_pt = float2(1.0 / (ravu_int11_size.x), 1.0 / (ravu_int11_size.y)); + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass2(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 13 + int(gl_LocalInvocationID.y); + { + for (int id = int(gl_LocalInvocationIndex); id < 481; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 13, y = (uint)id % 13; + inp0[id] = + ravu_int11_tex(ravu_int11_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))) + .xyz; + inp_luma0[id] = dot(inp0[id], color_primary); + } + } + { + for (int id = int(gl_LocalInvocationIndex); id < 481; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 13, y = (uint)id % 13; + inp1[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))).xyz; + inp_luma1[id] = dot(inp1[id], color_primary); + } + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + { + float luma12 = inp_luma0[local_pos + 15]; + float luma7 = inp_luma0[local_pos + 16]; + float luma2 = inp_luma0[local_pos + 17]; + float luma24 = inp_luma0[local_pos + 27]; + float luma19 = inp_luma0[local_pos + 28]; + float luma14 = inp_luma0[local_pos + 29]; + float luma9 = inp_luma0[local_pos + 30]; + float luma4 = inp_luma0[local_pos + 31]; + float luma31 = inp_luma0[local_pos + 40]; + float luma26 = inp_luma0[local_pos + 41]; + float luma21 = inp_luma0[local_pos + 42]; + float luma16 = inp_luma0[local_pos + 43]; + float luma11 = inp_luma0[local_pos + 44]; + float luma33 = inp_luma0[local_pos + 54]; + float luma28 = inp_luma0[local_pos + 55]; + float luma23 = inp_luma0[local_pos + 56]; + float luma18 = inp_luma1[local_pos + 14]; + float luma13 = inp_luma1[local_pos + 15]; + float luma8 = inp_luma1[local_pos + 16]; + float luma3 = inp_luma1[local_pos + 17]; + float luma25 = inp_luma1[local_pos + 27]; + float luma20 = inp_luma1[local_pos + 28]; + float luma15 = inp_luma1[local_pos + 29]; + float luma6 = inp_luma1[local_pos + 2]; + float luma10 = inp_luma1[local_pos + 30]; + float luma1 = inp_luma1[local_pos + 3]; + float luma32 = inp_luma1[local_pos + 40]; + float luma27 = inp_luma1[local_pos + 41]; + float luma22 = inp_luma1[local_pos + 42]; + float luma17 = inp_luma1[local_pos + 43]; + float luma34 = inp_luma1[local_pos + 54]; + float luma29 = inp_luma1[local_pos + 55]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma13 - luma1) / 2.0; + gy = (luma8 - luma6) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma14 - luma2) / 2.0; + gy = (-luma10 + 8.0 * luma9 - 8.0 * luma7 + luma6) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma15 - luma3) / 2.0; + gy = (-luma11 + 8.0 * luma10 - 8.0 * luma8 + luma7) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma16 - luma4) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (-luma25 + 8.0 * luma19 - 8.0 * luma7 + luma1) / 12.0; + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma26 + 8.0 * luma20 - 8.0 * luma8 + luma2) / 12.0; + gy = (-luma16 + 8.0 * luma15 - 8.0 * luma13 + luma12) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma27 + 8.0 * luma21 - 8.0 * luma9 + luma3) / 12.0; + gy = (-luma17 + 8.0 * luma16 - 8.0 * luma14 + luma13) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma28 + 8.0 * luma22 - 8.0 * luma10 + luma4) / 12.0; + gy = (luma17 - luma15) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma31 + 8.0 * luma25 - 8.0 * luma13 + luma7) / 12.0; + gy = (luma20 - luma18) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma32 + 8.0 * luma26 - 8.0 * luma14 + luma8) / 12.0; + gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma33 + 8.0 * luma27 - 8.0 * luma15 + luma9) / 12.0; + gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma34 + 8.0 * luma28 - 8.0 * luma16 + luma10) / 12.0; + gy = (luma23 - luma21) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma31 - luma19) / 2.0; + gy = (luma26 - luma24) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma32 - luma20) / 2.0; + gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma33 - luma21) / 2.0; + gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma34 - luma22) / 2.0; + gy = (luma29 - luma27) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + vec3 res = vec3(0.0, 0.0, 0.0); + vec4 w; + w = texture(ravu_lut3, vec2(0.1, coord_y)); + res += (inp0[local_pos + 3] + inp0[local_pos + 68]) * w[0]; + res += (inp1[local_pos + 3] + inp1[local_pos + 54]) * w[1]; + res += (inp0[local_pos + 17] + inp0[local_pos + 54]) * w[2]; + res += (inp1[local_pos + 17] + inp1[local_pos + 40]) * w[3]; + w = texture(ravu_lut3, vec2(0.3, coord_y)); + res += (inp0[local_pos + 31] + inp0[local_pos + 40]) * w[0]; + res += (inp1[local_pos + 31] + inp1[local_pos + 26]) * w[1]; + res += (inp1[local_pos + 2] + inp1[local_pos + 55]) * w[2]; + res += (inp0[local_pos + 16] + inp0[local_pos + 55]) * w[3]; + w = texture(ravu_lut3, vec2(0.5, coord_y)); + res += (inp1[local_pos + 16] + inp1[local_pos + 41]) * w[0]; + res += (inp0[local_pos + 30] + inp0[local_pos + 41]) * w[1]; + res += (inp1[local_pos + 30] + inp1[local_pos + 27]) * w[2]; + res += (inp0[local_pos + 44] + inp0[local_pos + 27]) * w[3]; + w = texture(ravu_lut3, vec2(0.7, coord_y)); + res += (inp0[local_pos + 15] + inp0[local_pos + 56]) * w[0]; + res += (inp1[local_pos + 15] + inp1[local_pos + 42]) * w[1]; + res += (inp0[local_pos + 29] + inp0[local_pos + 42]) * w[2]; + res += (inp1[local_pos + 29] + inp1[local_pos + 28]) * w[3]; + w = texture(ravu_lut3, vec2(0.9, coord_y)); + res += (inp0[local_pos + 43] + inp0[local_pos + 28]) * w[0]; + res += (inp1[local_pos + 43] + inp1[local_pos + 14]) * w[1]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res, 1.0)); + } + { + float luma6 = inp_luma0[local_pos + 15]; + float luma1 = inp_luma0[local_pos + 16]; + float luma18 = inp_luma0[local_pos + 27]; + float luma13 = inp_luma0[local_pos + 28]; + float luma8 = inp_luma0[local_pos + 29]; + float luma3 = inp_luma0[local_pos + 30]; + float luma25 = inp_luma0[local_pos + 40]; + float luma20 = inp_luma0[local_pos + 41]; + float luma15 = inp_luma0[local_pos + 42]; + float luma10 = inp_luma0[local_pos + 43]; + float luma32 = inp_luma0[local_pos + 53]; + float luma27 = inp_luma0[local_pos + 54]; + float luma22 = inp_luma0[local_pos + 55]; + float luma17 = inp_luma0[local_pos + 56]; + float luma34 = inp_luma0[local_pos + 67]; + float luma29 = inp_luma0[local_pos + 68]; + float luma12 = inp_luma1[local_pos + 14]; + float luma7 = inp_luma1[local_pos + 15]; + float luma2 = inp_luma1[local_pos + 16]; + float luma24 = inp_luma1[local_pos + 26]; + float luma19 = inp_luma1[local_pos + 27]; + float luma14 = inp_luma1[local_pos + 28]; + float luma9 = inp_luma1[local_pos + 29]; + float luma4 = inp_luma1[local_pos + 30]; + float luma31 = inp_luma1[local_pos + 39]; + float luma26 = inp_luma1[local_pos + 40]; + float luma21 = inp_luma1[local_pos + 41]; + float luma16 = inp_luma1[local_pos + 42]; + float luma11 = inp_luma1[local_pos + 43]; + float luma33 = inp_luma1[local_pos + 53]; + float luma28 = inp_luma1[local_pos + 54]; + float luma23 = inp_luma1[local_pos + 55]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma13 - luma1) / 2.0; + gy = (luma8 - luma6) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma14 - luma2) / 2.0; + gy = (-luma10 + 8.0 * luma9 - 8.0 * luma7 + luma6) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma15 - luma3) / 2.0; + gy = (-luma11 + 8.0 * luma10 - 8.0 * luma8 + luma7) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma16 - luma4) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (-luma25 + 8.0 * luma19 - 8.0 * luma7 + luma1) / 12.0; + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma26 + 8.0 * luma20 - 8.0 * luma8 + luma2) / 12.0; + gy = (-luma16 + 8.0 * luma15 - 8.0 * luma13 + luma12) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma27 + 8.0 * luma21 - 8.0 * luma9 + luma3) / 12.0; + gy = (-luma17 + 8.0 * luma16 - 8.0 * luma14 + luma13) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma28 + 8.0 * luma22 - 8.0 * luma10 + luma4) / 12.0; + gy = (luma17 - luma15) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma31 + 8.0 * luma25 - 8.0 * luma13 + luma7) / 12.0; + gy = (luma20 - luma18) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma32 + 8.0 * luma26 - 8.0 * luma14 + luma8) / 12.0; + gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma33 + 8.0 * luma27 - 8.0 * luma15 + luma9) / 12.0; + gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma34 + 8.0 * luma28 - 8.0 * luma16 + luma10) / 12.0; + gy = (luma23 - luma21) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma31 - luma19) / 2.0; + gy = (luma26 - luma24) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma32 - luma20) / 2.0; + gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma33 - luma21) / 2.0; + gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma34 - luma22) / 2.0; + gy = (luma29 - luma27) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + vec3 res = vec3(0.0, 0.0, 0.0); + vec4 w; + w = texture(ravu_lut3, vec2(0.1, coord_y)); + res += (inp1[local_pos + 2] + inp1[local_pos + 67]) * w[0]; + res += (inp0[local_pos + 16] + inp0[local_pos + 67]) * w[1]; + res += (inp1[local_pos + 16] + inp1[local_pos + 53]) * w[2]; + res += (inp0[local_pos + 30] + inp0[local_pos + 53]) * w[3]; + w = texture(ravu_lut3, vec2(0.3, coord_y)); + res += (inp1[local_pos + 30] + inp1[local_pos + 39]) * w[0]; + res += (inp0[local_pos + 44] + inp0[local_pos + 39]) * w[1]; + res += (inp0[local_pos + 15] + inp0[local_pos + 68]) * w[2]; + res += (inp1[local_pos + 15] + inp1[local_pos + 54]) * w[3]; + w = texture(ravu_lut3, vec2(0.5, coord_y)); + res += (inp0[local_pos + 29] + inp0[local_pos + 54]) * w[0]; + res += (inp1[local_pos + 29] + inp1[local_pos + 40]) * w[1]; + res += (inp0[local_pos + 43] + inp0[local_pos + 40]) * w[2]; + res += (inp1[local_pos + 43] + inp1[local_pos + 26]) * w[3]; + w = texture(ravu_lut3, vec2(0.7, coord_y)); + res += (inp1[local_pos + 14] + inp1[local_pos + 55]) * w[0]; + res += (inp0[local_pos + 28] + inp0[local_pos + 55]) * w[1]; + res += (inp1[local_pos + 28] + inp1[local_pos + 41]) * w[2]; + res += (inp0[local_pos + 42] + inp0[local_pos + 41]) * w[3]; + w = texture(ravu_lut3, vec2(0.9, coord_y)); + res += (inp1[local_pos + 42] + inp1[local_pos + 27]) * w[0]; + res += (inp0[local_pos + 56] + inp0[local_pos + 27]) * w[1]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res, 1.0)); + } + vec3 res; + res = inp0[local_pos + 42]; + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res, 1.0)); + res = inp1[local_pos + 28]; + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res, 1.0)); +} diff --git a/src/Effects/RAVU/RAVU_R4.hlsl b/src/Effects/RAVU/RAVU_R4.hlsl new file mode 100644 index 000000000..79104920f --- /dev/null +++ b/src/Effects/RAVU/RAVU_R4.hlsl @@ -0,0 +1,851 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu.py --target luma --weights-file weights\ravu_weights-r4.py --float-format float16dx --use-compute-shader --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!SOURCE ravu_lut4_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_lut4; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_lut4; + +//!TEXTURE +//!FORMAT R16_FLOAT +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D ravu_int11; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_ravu_int11; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 2 + +//!PASS 1 +//!DESC RAVU (step1, luma, r4, compute) +//!IN INPUT, ravu_lut4 +//!OUT ravu_int11 +//!BLOCK_SIZE 32, 8 +//!NUM_THREADS 32, 8 +shared float inp0[585]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { ravu_int11[pos] = (value); } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_lut4_tex(pos) (vec4(texture(ravu_lut4, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y); + { + for (int id = int(gl_LocalInvocationIndex); id < 585; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 15, y = (uint)id % 15; + inp0[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))).x; + } + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + { + float luma57 = inp0[local_pos + 106]; + float luma58 = inp0[local_pos + 107]; + float luma59 = inp0[local_pos + 108]; + float luma60 = inp0[local_pos + 109]; + float luma61 = inp0[local_pos + 110]; + float luma62 = inp0[local_pos + 111]; + float luma8 = inp0[local_pos + 15]; + float luma9 = inp0[local_pos + 16]; + float luma10 = inp0[local_pos + 17]; + float luma11 = inp0[local_pos + 18]; + float luma12 = inp0[local_pos + 19]; + float luma1 = inp0[local_pos + 1]; + float luma13 = inp0[local_pos + 20]; + float luma14 = inp0[local_pos + 21]; + float luma15 = inp0[local_pos + 22]; + float luma2 = inp0[local_pos + 2]; + float luma16 = inp0[local_pos + 30]; + float luma17 = inp0[local_pos + 31]; + float luma18 = inp0[local_pos + 32]; + float luma19 = inp0[local_pos + 33]; + float luma20 = inp0[local_pos + 34]; + float luma21 = inp0[local_pos + 35]; + float luma22 = inp0[local_pos + 36]; + float luma23 = inp0[local_pos + 37]; + float luma3 = inp0[local_pos + 3]; + float luma24 = inp0[local_pos + 45]; + float luma25 = inp0[local_pos + 46]; + float luma26 = inp0[local_pos + 47]; + float luma27 = inp0[local_pos + 48]; + float luma28 = inp0[local_pos + 49]; + float luma4 = inp0[local_pos + 4]; + float luma29 = inp0[local_pos + 50]; + float luma30 = inp0[local_pos + 51]; + float luma31 = inp0[local_pos + 52]; + float luma5 = inp0[local_pos + 5]; + float luma32 = inp0[local_pos + 60]; + float luma33 = inp0[local_pos + 61]; + float luma34 = inp0[local_pos + 62]; + float luma35 = inp0[local_pos + 63]; + float luma36 = inp0[local_pos + 64]; + float luma37 = inp0[local_pos + 65]; + float luma38 = inp0[local_pos + 66]; + float luma39 = inp0[local_pos + 67]; + float luma6 = inp0[local_pos + 6]; + float luma40 = inp0[local_pos + 75]; + float luma41 = inp0[local_pos + 76]; + float luma42 = inp0[local_pos + 77]; + float luma43 = inp0[local_pos + 78]; + float luma44 = inp0[local_pos + 79]; + float luma45 = inp0[local_pos + 80]; + float luma46 = inp0[local_pos + 81]; + float luma47 = inp0[local_pos + 82]; + float luma48 = inp0[local_pos + 90]; + float luma49 = inp0[local_pos + 91]; + float luma50 = inp0[local_pos + 92]; + float luma51 = inp0[local_pos + 93]; + float luma52 = inp0[local_pos + 94]; + float luma53 = inp0[local_pos + 95]; + float luma54 = inp0[local_pos + 96]; + float luma55 = inp0[local_pos + 97]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma17 - luma1) / 2.0; + gy = (luma10 - luma8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (luma18 - luma2) / 2.0; + gy = (-luma12 + 8.0 * luma11 - 8.0 * luma9 + luma8) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma19 - luma3) / 2.0; + gy = (-luma13 + 8.0 * luma12 - 8.0 * luma10 + luma9) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma20 - luma4) / 2.0; + gy = (-luma14 + 8.0 * luma13 - 8.0 * luma11 + luma10) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma21 - luma5) / 2.0; + gy = (-luma15 + 8.0 * luma14 - 8.0 * luma12 + luma11) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma22 - luma6) / 2.0; + gy = (luma15 - luma13) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (-luma33 + 8.0 * luma25 - 8.0 * luma9 + luma1) / 12.0; + gy = (luma18 - luma16) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma34 + 8.0 * luma26 - 8.0 * luma10 + luma2) / 12.0; + gy = (-luma20 + 8.0 * luma19 - 8.0 * luma17 + luma16) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma35 + 8.0 * luma27 - 8.0 * luma11 + luma3) / 12.0; + gy = (-luma21 + 8.0 * luma20 - 8.0 * luma18 + luma17) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma36 + 8.0 * luma28 - 8.0 * luma12 + luma4) / 12.0; + gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma37 + 8.0 * luma29 - 8.0 * luma13 + luma5) / 12.0; + gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma38 + 8.0 * luma30 - 8.0 * luma14 + luma6) / 12.0; + gy = (luma23 - luma21) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma41 + 8.0 * luma33 - 8.0 * luma17 + luma9) / 12.0; + gy = (luma26 - luma24) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma42 + 8.0 * luma34 - 8.0 * luma18 + luma10) / 12.0; + gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma43 + 8.0 * luma35 - 8.0 * luma19 + luma11) / 12.0; + gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma44 + 8.0 * luma36 - 8.0 * luma20 + luma12) / 12.0; + gy = (-luma30 + 8.0 * luma29 - 8.0 * luma27 + luma26) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma45 + 8.0 * luma37 - 8.0 * luma21 + luma13) / 12.0; + gy = (-luma31 + 8.0 * luma30 - 8.0 * luma28 + luma27) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma46 + 8.0 * luma38 - 8.0 * luma22 + luma14) / 12.0; + gy = (luma31 - luma29) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma49 + 8.0 * luma41 - 8.0 * luma25 + luma17) / 12.0; + gy = (luma34 - luma32) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma50 + 8.0 * luma42 - 8.0 * luma26 + luma18) / 12.0; + gy = (-luma36 + 8.0 * luma35 - 8.0 * luma33 + luma32) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma51 + 8.0 * luma43 - 8.0 * luma27 + luma19) / 12.0; + gy = (-luma37 + 8.0 * luma36 - 8.0 * luma34 + luma33) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma52 + 8.0 * luma44 - 8.0 * luma28 + luma20) / 12.0; + gy = (-luma38 + 8.0 * luma37 - 8.0 * luma35 + luma34) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma53 + 8.0 * luma45 - 8.0 * luma29 + luma21) / 12.0; + gy = (-luma39 + 8.0 * luma38 - 8.0 * luma36 + luma35) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma54 + 8.0 * luma46 - 8.0 * luma30 + luma22) / 12.0; + gy = (luma39 - luma37) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma57 + 8.0 * luma49 - 8.0 * luma33 + luma25) / 12.0; + gy = (luma42 - luma40) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma58 + 8.0 * luma50 - 8.0 * luma34 + luma26) / 12.0; + gy = (-luma44 + 8.0 * luma43 - 8.0 * luma41 + luma40) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma59 + 8.0 * luma51 - 8.0 * luma35 + luma27) / 12.0; + gy = (-luma45 + 8.0 * luma44 - 8.0 * luma42 + luma41) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma60 + 8.0 * luma52 - 8.0 * luma36 + luma28) / 12.0; + gy = (-luma46 + 8.0 * luma45 - 8.0 * luma43 + luma42) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma61 + 8.0 * luma53 - 8.0 * luma37 + luma29) / 12.0; + gy = (-luma47 + 8.0 * luma46 - 8.0 * luma44 + luma43) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma62 + 8.0 * luma54 - 8.0 * luma38 + luma30) / 12.0; + gy = (luma47 - luma45) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma57 - luma41) / 2.0; + gy = (luma50 - luma48) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (luma58 - luma42) / 2.0; + gy = (-luma52 + 8.0 * luma51 - 8.0 * luma49 + luma48) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma59 - luma43) / 2.0; + gy = (-luma53 + 8.0 * luma52 - 8.0 * luma50 + luma49) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma60 - luma44) / 2.0; + gy = (-luma54 + 8.0 * luma53 - 8.0 * luma51 + luma50) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma61 - luma45) / 2.0; + gy = (-luma55 + 8.0 * luma54 - 8.0 * luma52 + luma51) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma62 - luma46) / 2.0; + gy = (luma55 - luma53) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + float res = 0.0; + vec4 w; + w = texture(ravu_lut4, vec2(0.0625, coord_y)); + res += (inp0[local_pos + 0] + inp0[local_pos + 112]) * w[0]; + res += (inp0[local_pos + 1] + inp0[local_pos + 111]) * w[1]; + res += (inp0[local_pos + 2] + inp0[local_pos + 110]) * w[2]; + res += (inp0[local_pos + 3] + inp0[local_pos + 109]) * w[3]; + w = texture(ravu_lut4, vec2(0.1875, coord_y)); + res += (inp0[local_pos + 4] + inp0[local_pos + 108]) * w[0]; + res += (inp0[local_pos + 5] + inp0[local_pos + 107]) * w[1]; + res += (inp0[local_pos + 6] + inp0[local_pos + 106]) * w[2]; + res += (inp0[local_pos + 7] + inp0[local_pos + 105]) * w[3]; + w = texture(ravu_lut4, vec2(0.3125, coord_y)); + res += (inp0[local_pos + 15] + inp0[local_pos + 97]) * w[0]; + res += (inp0[local_pos + 16] + inp0[local_pos + 96]) * w[1]; + res += (inp0[local_pos + 17] + inp0[local_pos + 95]) * w[2]; + res += (inp0[local_pos + 18] + inp0[local_pos + 94]) * w[3]; + w = texture(ravu_lut4, vec2(0.4375, coord_y)); + res += (inp0[local_pos + 19] + inp0[local_pos + 93]) * w[0]; + res += (inp0[local_pos + 20] + inp0[local_pos + 92]) * w[1]; + res += (inp0[local_pos + 21] + inp0[local_pos + 91]) * w[2]; + res += (inp0[local_pos + 22] + inp0[local_pos + 90]) * w[3]; + w = texture(ravu_lut4, vec2(0.5625, coord_y)); + res += (inp0[local_pos + 30] + inp0[local_pos + 82]) * w[0]; + res += (inp0[local_pos + 31] + inp0[local_pos + 81]) * w[1]; + res += (inp0[local_pos + 32] + inp0[local_pos + 80]) * w[2]; + res += (inp0[local_pos + 33] + inp0[local_pos + 79]) * w[3]; + w = texture(ravu_lut4, vec2(0.6875, coord_y)); + res += (inp0[local_pos + 34] + inp0[local_pos + 78]) * w[0]; + res += (inp0[local_pos + 35] + inp0[local_pos + 77]) * w[1]; + res += (inp0[local_pos + 36] + inp0[local_pos + 76]) * w[2]; + res += (inp0[local_pos + 37] + inp0[local_pos + 75]) * w[3]; + w = texture(ravu_lut4, vec2(0.8125, coord_y)); + res += (inp0[local_pos + 45] + inp0[local_pos + 67]) * w[0]; + res += (inp0[local_pos + 46] + inp0[local_pos + 66]) * w[1]; + res += (inp0[local_pos + 47] + inp0[local_pos + 65]) * w[2]; + res += (inp0[local_pos + 48] + inp0[local_pos + 64]) * w[3]; + w = texture(ravu_lut4, vec2(0.9375, coord_y)); + res += (inp0[local_pos + 49] + inp0[local_pos + 63]) * w[0]; + res += (inp0[local_pos + 50] + inp0[local_pos + 62]) * w[1]; + res += (inp0[local_pos + 51] + inp0[local_pos + 61]) * w[2]; + res += (inp0[local_pos + 52] + inp0[local_pos + 60]) * w[3]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID), res); + } +} +//!PASS 2 +//!DESC RAVU (step2, luma, r4, compute) +//!IN INPUT, ravu_lut4, ravu_int11 +//!OUT OUTPUT +//!BLOCK_SIZE 64, 16 +//!NUM_THREADS 32, 8 +shared float inp0[585]; +shared float inp1[585]; + +#define CURRENT_PASS 2 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_lut4_tex(pos) (vec4(texture(ravu_lut4, pos))) + +#define ravu_int11_tex(pos) (float(texture(ravu_int11, pos).x)) +static const float2 ravu_int11_size = float2(GetInputSize().x, GetInputSize().y); +static const float2 ravu_int11_pt = float2(1.0 / (ravu_int11_size.x), 1.0 / (ravu_int11_size.y)); + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass2(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y); + { + for (int id = int(gl_LocalInvocationIndex); id < 585; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 15, y = (uint)id % 15; + inp0[id] = + ravu_int11_tex(ravu_int11_pt * vec2(float(group_base.x + x) + (-3.5), float(group_base.y + y) + (-3.5))) + .x; + } + } + { + for (int id = int(gl_LocalInvocationIndex); id < 585; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 15, y = (uint)id % 15; + inp1[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))).x; + } + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + { + float luma16 = inp0[local_pos + 18]; + float luma9 = inp0[local_pos + 19]; + float luma2 = inp0[local_pos + 20]; + float luma32 = inp0[local_pos + 32]; + float luma25 = inp0[local_pos + 33]; + float luma18 = inp0[local_pos + 34]; + float luma11 = inp0[local_pos + 35]; + float luma4 = inp0[local_pos + 36]; + float luma48 = inp0[local_pos + 46]; + float luma41 = inp0[local_pos + 47]; + float luma34 = inp0[local_pos + 48]; + float luma27 = inp0[local_pos + 49]; + float luma20 = inp0[local_pos + 50]; + float luma13 = inp0[local_pos + 51]; + float luma6 = inp0[local_pos + 52]; + float luma57 = inp0[local_pos + 61]; + float luma50 = inp0[local_pos + 62]; + float luma43 = inp0[local_pos + 63]; + float luma36 = inp0[local_pos + 64]; + float luma29 = inp0[local_pos + 65]; + float luma22 = inp0[local_pos + 66]; + float luma15 = inp0[local_pos + 67]; + float luma59 = inp0[local_pos + 77]; + float luma52 = inp0[local_pos + 78]; + float luma45 = inp0[local_pos + 79]; + float luma38 = inp0[local_pos + 80]; + float luma31 = inp0[local_pos + 81]; + float luma61 = inp0[local_pos + 93]; + float luma54 = inp0[local_pos + 94]; + float luma47 = inp0[local_pos + 95]; + float luma24 = inp1[local_pos + 17]; + float luma17 = inp1[local_pos + 18]; + float luma10 = inp1[local_pos + 19]; + float luma3 = inp1[local_pos + 20]; + float luma40 = inp1[local_pos + 31]; + float luma33 = inp1[local_pos + 32]; + float luma26 = inp1[local_pos + 33]; + float luma19 = inp1[local_pos + 34]; + float luma12 = inp1[local_pos + 35]; + float luma5 = inp1[local_pos + 36]; + float luma8 = inp1[local_pos + 3]; + float luma49 = inp1[local_pos + 46]; + float luma42 = inp1[local_pos + 47]; + float luma35 = inp1[local_pos + 48]; + float luma28 = inp1[local_pos + 49]; + float luma1 = inp1[local_pos + 4]; + float luma21 = inp1[local_pos + 50]; + float luma14 = inp1[local_pos + 51]; + float luma58 = inp1[local_pos + 61]; + float luma51 = inp1[local_pos + 62]; + float luma44 = inp1[local_pos + 63]; + float luma37 = inp1[local_pos + 64]; + float luma30 = inp1[local_pos + 65]; + float luma23 = inp1[local_pos + 66]; + float luma60 = inp1[local_pos + 77]; + float luma53 = inp1[local_pos + 78]; + float luma46 = inp1[local_pos + 79]; + float luma39 = inp1[local_pos + 80]; + float luma62 = inp1[local_pos + 93]; + float luma55 = inp1[local_pos + 94]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma17 - luma1) / 2.0; + gy = (luma10 - luma8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (luma18 - luma2) / 2.0; + gy = (-luma12 + 8.0 * luma11 - 8.0 * luma9 + luma8) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma19 - luma3) / 2.0; + gy = (-luma13 + 8.0 * luma12 - 8.0 * luma10 + luma9) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma20 - luma4) / 2.0; + gy = (-luma14 + 8.0 * luma13 - 8.0 * luma11 + luma10) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma21 - luma5) / 2.0; + gy = (-luma15 + 8.0 * luma14 - 8.0 * luma12 + luma11) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma22 - luma6) / 2.0; + gy = (luma15 - luma13) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (-luma33 + 8.0 * luma25 - 8.0 * luma9 + luma1) / 12.0; + gy = (luma18 - luma16) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma34 + 8.0 * luma26 - 8.0 * luma10 + luma2) / 12.0; + gy = (-luma20 + 8.0 * luma19 - 8.0 * luma17 + luma16) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma35 + 8.0 * luma27 - 8.0 * luma11 + luma3) / 12.0; + gy = (-luma21 + 8.0 * luma20 - 8.0 * luma18 + luma17) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma36 + 8.0 * luma28 - 8.0 * luma12 + luma4) / 12.0; + gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma37 + 8.0 * luma29 - 8.0 * luma13 + luma5) / 12.0; + gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma38 + 8.0 * luma30 - 8.0 * luma14 + luma6) / 12.0; + gy = (luma23 - luma21) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma41 + 8.0 * luma33 - 8.0 * luma17 + luma9) / 12.0; + gy = (luma26 - luma24) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma42 + 8.0 * luma34 - 8.0 * luma18 + luma10) / 12.0; + gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma43 + 8.0 * luma35 - 8.0 * luma19 + luma11) / 12.0; + gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma44 + 8.0 * luma36 - 8.0 * luma20 + luma12) / 12.0; + gy = (-luma30 + 8.0 * luma29 - 8.0 * luma27 + luma26) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma45 + 8.0 * luma37 - 8.0 * luma21 + luma13) / 12.0; + gy = (-luma31 + 8.0 * luma30 - 8.0 * luma28 + luma27) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma46 + 8.0 * luma38 - 8.0 * luma22 + luma14) / 12.0; + gy = (luma31 - luma29) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma49 + 8.0 * luma41 - 8.0 * luma25 + luma17) / 12.0; + gy = (luma34 - luma32) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma50 + 8.0 * luma42 - 8.0 * luma26 + luma18) / 12.0; + gy = (-luma36 + 8.0 * luma35 - 8.0 * luma33 + luma32) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma51 + 8.0 * luma43 - 8.0 * luma27 + luma19) / 12.0; + gy = (-luma37 + 8.0 * luma36 - 8.0 * luma34 + luma33) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma52 + 8.0 * luma44 - 8.0 * luma28 + luma20) / 12.0; + gy = (-luma38 + 8.0 * luma37 - 8.0 * luma35 + luma34) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma53 + 8.0 * luma45 - 8.0 * luma29 + luma21) / 12.0; + gy = (-luma39 + 8.0 * luma38 - 8.0 * luma36 + luma35) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma54 + 8.0 * luma46 - 8.0 * luma30 + luma22) / 12.0; + gy = (luma39 - luma37) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma57 + 8.0 * luma49 - 8.0 * luma33 + luma25) / 12.0; + gy = (luma42 - luma40) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma58 + 8.0 * luma50 - 8.0 * luma34 + luma26) / 12.0; + gy = (-luma44 + 8.0 * luma43 - 8.0 * luma41 + luma40) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma59 + 8.0 * luma51 - 8.0 * luma35 + luma27) / 12.0; + gy = (-luma45 + 8.0 * luma44 - 8.0 * luma42 + luma41) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma60 + 8.0 * luma52 - 8.0 * luma36 + luma28) / 12.0; + gy = (-luma46 + 8.0 * luma45 - 8.0 * luma43 + luma42) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma61 + 8.0 * luma53 - 8.0 * luma37 + luma29) / 12.0; + gy = (-luma47 + 8.0 * luma46 - 8.0 * luma44 + luma43) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma62 + 8.0 * luma54 - 8.0 * luma38 + luma30) / 12.0; + gy = (luma47 - luma45) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma57 - luma41) / 2.0; + gy = (luma50 - luma48) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (luma58 - luma42) / 2.0; + gy = (-luma52 + 8.0 * luma51 - 8.0 * luma49 + luma48) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma59 - luma43) / 2.0; + gy = (-luma53 + 8.0 * luma52 - 8.0 * luma50 + luma49) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma60 - luma44) / 2.0; + gy = (-luma54 + 8.0 * luma53 - 8.0 * luma51 + luma50) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma61 - luma45) / 2.0; + gy = (-luma55 + 8.0 * luma54 - 8.0 * luma52 + luma51) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma62 - luma46) / 2.0; + gy = (luma55 - luma53) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + float res = 0.0; + vec4 w; + w = texture(ravu_lut4, vec2(0.0625, coord_y)); + res += (inp0[local_pos + 4] + inp0[local_pos + 109]) * w[0]; + res += (inp1[local_pos + 4] + inp1[local_pos + 93]) * w[1]; + res += (inp0[local_pos + 20] + inp0[local_pos + 93]) * w[2]; + res += (inp1[local_pos + 20] + inp1[local_pos + 77]) * w[3]; + w = texture(ravu_lut4, vec2(0.1875, coord_y)); + res += (inp0[local_pos + 36] + inp0[local_pos + 77]) * w[0]; + res += (inp1[local_pos + 36] + inp1[local_pos + 61]) * w[1]; + res += (inp0[local_pos + 52] + inp0[local_pos + 61]) * w[2]; + res += (inp1[local_pos + 52] + inp1[local_pos + 45]) * w[3]; + w = texture(ravu_lut4, vec2(0.3125, coord_y)); + res += (inp1[local_pos + 3] + inp1[local_pos + 94]) * w[0]; + res += (inp0[local_pos + 19] + inp0[local_pos + 94]) * w[1]; + res += (inp1[local_pos + 19] + inp1[local_pos + 78]) * w[2]; + res += (inp0[local_pos + 35] + inp0[local_pos + 78]) * w[3]; + w = texture(ravu_lut4, vec2(0.4375, coord_y)); + res += (inp1[local_pos + 35] + inp1[local_pos + 62]) * w[0]; + res += (inp0[local_pos + 51] + inp0[local_pos + 62]) * w[1]; + res += (inp1[local_pos + 51] + inp1[local_pos + 46]) * w[2]; + res += (inp0[local_pos + 67] + inp0[local_pos + 46]) * w[3]; + w = texture(ravu_lut4, vec2(0.5625, coord_y)); + res += (inp0[local_pos + 18] + inp0[local_pos + 95]) * w[0]; + res += (inp1[local_pos + 18] + inp1[local_pos + 79]) * w[1]; + res += (inp0[local_pos + 34] + inp0[local_pos + 79]) * w[2]; + res += (inp1[local_pos + 34] + inp1[local_pos + 63]) * w[3]; + w = texture(ravu_lut4, vec2(0.6875, coord_y)); + res += (inp0[local_pos + 50] + inp0[local_pos + 63]) * w[0]; + res += (inp1[local_pos + 50] + inp1[local_pos + 47]) * w[1]; + res += (inp0[local_pos + 66] + inp0[local_pos + 47]) * w[2]; + res += (inp1[local_pos + 66] + inp1[local_pos + 31]) * w[3]; + w = texture(ravu_lut4, vec2(0.8125, coord_y)); + res += (inp1[local_pos + 17] + inp1[local_pos + 80]) * w[0]; + res += (inp0[local_pos + 33] + inp0[local_pos + 80]) * w[1]; + res += (inp1[local_pos + 33] + inp1[local_pos + 64]) * w[2]; + res += (inp0[local_pos + 49] + inp0[local_pos + 64]) * w[3]; + w = texture(ravu_lut4, vec2(0.9375, coord_y)); + res += (inp1[local_pos + 49] + inp1[local_pos + 48]) * w[0]; + res += (inp0[local_pos + 65] + inp0[local_pos + 48]) * w[1]; + res += (inp1[local_pos + 65] + inp1[local_pos + 32]) * w[2]; + res += (inp0[local_pos + 81] + inp0[local_pos + 32]) * w[3]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), res); + } + { + float luma62 = inp0[local_pos + 108]; + float luma55 = inp0[local_pos + 109]; + float luma8 = inp0[local_pos + 18]; + float luma1 = inp0[local_pos + 19]; + float luma24 = inp0[local_pos + 32]; + float luma17 = inp0[local_pos + 33]; + float luma10 = inp0[local_pos + 34]; + float luma3 = inp0[local_pos + 35]; + float luma40 = inp0[local_pos + 46]; + float luma33 = inp0[local_pos + 47]; + float luma26 = inp0[local_pos + 48]; + float luma19 = inp0[local_pos + 49]; + float luma12 = inp0[local_pos + 50]; + float luma5 = inp0[local_pos + 51]; + float luma49 = inp0[local_pos + 61]; + float luma42 = inp0[local_pos + 62]; + float luma35 = inp0[local_pos + 63]; + float luma28 = inp0[local_pos + 64]; + float luma21 = inp0[local_pos + 65]; + float luma14 = inp0[local_pos + 66]; + float luma58 = inp0[local_pos + 76]; + float luma51 = inp0[local_pos + 77]; + float luma44 = inp0[local_pos + 78]; + float luma37 = inp0[local_pos + 79]; + float luma30 = inp0[local_pos + 80]; + float luma23 = inp0[local_pos + 81]; + float luma60 = inp0[local_pos + 92]; + float luma53 = inp0[local_pos + 93]; + float luma46 = inp0[local_pos + 94]; + float luma39 = inp0[local_pos + 95]; + float luma16 = inp1[local_pos + 17]; + float luma9 = inp1[local_pos + 18]; + float luma2 = inp1[local_pos + 19]; + float luma32 = inp1[local_pos + 31]; + float luma25 = inp1[local_pos + 32]; + float luma18 = inp1[local_pos + 33]; + float luma11 = inp1[local_pos + 34]; + float luma4 = inp1[local_pos + 35]; + float luma48 = inp1[local_pos + 45]; + float luma41 = inp1[local_pos + 46]; + float luma34 = inp1[local_pos + 47]; + float luma27 = inp1[local_pos + 48]; + float luma20 = inp1[local_pos + 49]; + float luma13 = inp1[local_pos + 50]; + float luma6 = inp1[local_pos + 51]; + float luma57 = inp1[local_pos + 60]; + float luma50 = inp1[local_pos + 61]; + float luma43 = inp1[local_pos + 62]; + float luma36 = inp1[local_pos + 63]; + float luma29 = inp1[local_pos + 64]; + float luma22 = inp1[local_pos + 65]; + float luma15 = inp1[local_pos + 66]; + float luma59 = inp1[local_pos + 76]; + float luma52 = inp1[local_pos + 77]; + float luma45 = inp1[local_pos + 78]; + float luma38 = inp1[local_pos + 79]; + float luma31 = inp1[local_pos + 80]; + float luma61 = inp1[local_pos + 92]; + float luma54 = inp1[local_pos + 93]; + float luma47 = inp1[local_pos + 94]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma17 - luma1) / 2.0; + gy = (luma10 - luma8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (luma18 - luma2) / 2.0; + gy = (-luma12 + 8.0 * luma11 - 8.0 * luma9 + luma8) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma19 - luma3) / 2.0; + gy = (-luma13 + 8.0 * luma12 - 8.0 * luma10 + luma9) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma20 - luma4) / 2.0; + gy = (-luma14 + 8.0 * luma13 - 8.0 * luma11 + luma10) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma21 - luma5) / 2.0; + gy = (-luma15 + 8.0 * luma14 - 8.0 * luma12 + luma11) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma22 - luma6) / 2.0; + gy = (luma15 - luma13) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (-luma33 + 8.0 * luma25 - 8.0 * luma9 + luma1) / 12.0; + gy = (luma18 - luma16) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma34 + 8.0 * luma26 - 8.0 * luma10 + luma2) / 12.0; + gy = (-luma20 + 8.0 * luma19 - 8.0 * luma17 + luma16) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma35 + 8.0 * luma27 - 8.0 * luma11 + luma3) / 12.0; + gy = (-luma21 + 8.0 * luma20 - 8.0 * luma18 + luma17) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma36 + 8.0 * luma28 - 8.0 * luma12 + luma4) / 12.0; + gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma37 + 8.0 * luma29 - 8.0 * luma13 + luma5) / 12.0; + gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma38 + 8.0 * luma30 - 8.0 * luma14 + luma6) / 12.0; + gy = (luma23 - luma21) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma41 + 8.0 * luma33 - 8.0 * luma17 + luma9) / 12.0; + gy = (luma26 - luma24) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma42 + 8.0 * luma34 - 8.0 * luma18 + luma10) / 12.0; + gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma43 + 8.0 * luma35 - 8.0 * luma19 + luma11) / 12.0; + gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma44 + 8.0 * luma36 - 8.0 * luma20 + luma12) / 12.0; + gy = (-luma30 + 8.0 * luma29 - 8.0 * luma27 + luma26) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma45 + 8.0 * luma37 - 8.0 * luma21 + luma13) / 12.0; + gy = (-luma31 + 8.0 * luma30 - 8.0 * luma28 + luma27) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma46 + 8.0 * luma38 - 8.0 * luma22 + luma14) / 12.0; + gy = (luma31 - luma29) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma49 + 8.0 * luma41 - 8.0 * luma25 + luma17) / 12.0; + gy = (luma34 - luma32) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma50 + 8.0 * luma42 - 8.0 * luma26 + luma18) / 12.0; + gy = (-luma36 + 8.0 * luma35 - 8.0 * luma33 + luma32) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma51 + 8.0 * luma43 - 8.0 * luma27 + luma19) / 12.0; + gy = (-luma37 + 8.0 * luma36 - 8.0 * luma34 + luma33) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma52 + 8.0 * luma44 - 8.0 * luma28 + luma20) / 12.0; + gy = (-luma38 + 8.0 * luma37 - 8.0 * luma35 + luma34) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma53 + 8.0 * luma45 - 8.0 * luma29 + luma21) / 12.0; + gy = (-luma39 + 8.0 * luma38 - 8.0 * luma36 + luma35) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma54 + 8.0 * luma46 - 8.0 * luma30 + luma22) / 12.0; + gy = (luma39 - luma37) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma57 + 8.0 * luma49 - 8.0 * luma33 + luma25) / 12.0; + gy = (luma42 - luma40) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma58 + 8.0 * luma50 - 8.0 * luma34 + luma26) / 12.0; + gy = (-luma44 + 8.0 * luma43 - 8.0 * luma41 + luma40) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma59 + 8.0 * luma51 - 8.0 * luma35 + luma27) / 12.0; + gy = (-luma45 + 8.0 * luma44 - 8.0 * luma42 + luma41) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma60 + 8.0 * luma52 - 8.0 * luma36 + luma28) / 12.0; + gy = (-luma46 + 8.0 * luma45 - 8.0 * luma43 + luma42) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma61 + 8.0 * luma53 - 8.0 * luma37 + luma29) / 12.0; + gy = (-luma47 + 8.0 * luma46 - 8.0 * luma44 + luma43) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma62 + 8.0 * luma54 - 8.0 * luma38 + luma30) / 12.0; + gy = (luma47 - luma45) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma57 - luma41) / 2.0; + gy = (luma50 - luma48) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (luma58 - luma42) / 2.0; + gy = (-luma52 + 8.0 * luma51 - 8.0 * luma49 + luma48) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma59 - luma43) / 2.0; + gy = (-luma53 + 8.0 * luma52 - 8.0 * luma50 + luma49) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma60 - luma44) / 2.0; + gy = (-luma54 + 8.0 * luma53 - 8.0 * luma51 + luma50) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma61 - luma45) / 2.0; + gy = (-luma55 + 8.0 * luma54 - 8.0 * luma52 + luma51) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma62 - luma46) / 2.0; + gy = (luma55 - luma53) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + float res = 0.0; + vec4 w; + w = texture(ravu_lut4, vec2(0.0625, coord_y)); + res += (inp1[local_pos + 3] + inp1[local_pos + 108]) * w[0]; + res += (inp0[local_pos + 19] + inp0[local_pos + 108]) * w[1]; + res += (inp1[local_pos + 19] + inp1[local_pos + 92]) * w[2]; + res += (inp0[local_pos + 35] + inp0[local_pos + 92]) * w[3]; + w = texture(ravu_lut4, vec2(0.1875, coord_y)); + res += (inp1[local_pos + 35] + inp1[local_pos + 76]) * w[0]; + res += (inp0[local_pos + 51] + inp0[local_pos + 76]) * w[1]; + res += (inp1[local_pos + 51] + inp1[local_pos + 60]) * w[2]; + res += (inp0[local_pos + 67] + inp0[local_pos + 60]) * w[3]; + w = texture(ravu_lut4, vec2(0.3125, coord_y)); + res += (inp0[local_pos + 18] + inp0[local_pos + 109]) * w[0]; + res += (inp1[local_pos + 18] + inp1[local_pos + 93]) * w[1]; + res += (inp0[local_pos + 34] + inp0[local_pos + 93]) * w[2]; + res += (inp1[local_pos + 34] + inp1[local_pos + 77]) * w[3]; + w = texture(ravu_lut4, vec2(0.4375, coord_y)); + res += (inp0[local_pos + 50] + inp0[local_pos + 77]) * w[0]; + res += (inp1[local_pos + 50] + inp1[local_pos + 61]) * w[1]; + res += (inp0[local_pos + 66] + inp0[local_pos + 61]) * w[2]; + res += (inp1[local_pos + 66] + inp1[local_pos + 45]) * w[3]; + w = texture(ravu_lut4, vec2(0.5625, coord_y)); + res += (inp1[local_pos + 17] + inp1[local_pos + 94]) * w[0]; + res += (inp0[local_pos + 33] + inp0[local_pos + 94]) * w[1]; + res += (inp1[local_pos + 33] + inp1[local_pos + 78]) * w[2]; + res += (inp0[local_pos + 49] + inp0[local_pos + 78]) * w[3]; + w = texture(ravu_lut4, vec2(0.6875, coord_y)); + res += (inp1[local_pos + 49] + inp1[local_pos + 62]) * w[0]; + res += (inp0[local_pos + 65] + inp0[local_pos + 62]) * w[1]; + res += (inp1[local_pos + 65] + inp1[local_pos + 46]) * w[2]; + res += (inp0[local_pos + 81] + inp0[local_pos + 46]) * w[3]; + w = texture(ravu_lut4, vec2(0.8125, coord_y)); + res += (inp0[local_pos + 32] + inp0[local_pos + 95]) * w[0]; + res += (inp1[local_pos + 32] + inp1[local_pos + 79]) * w[1]; + res += (inp0[local_pos + 48] + inp0[local_pos + 79]) * w[2]; + res += (inp1[local_pos + 48] + inp1[local_pos + 63]) * w[3]; + w = texture(ravu_lut4, vec2(0.9375, coord_y)); + res += (inp0[local_pos + 64] + inp0[local_pos + 63]) * w[0]; + res += (inp1[local_pos + 64] + inp1[local_pos + 47]) * w[1]; + res += (inp0[local_pos + 80] + inp0[local_pos + 47]) * w[2]; + res += (inp1[local_pos + 80] + inp1[local_pos + 31]) * w[3]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), res); + } + float res; + res = inp0[local_pos + 64]; + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), res); + res = inp1[local_pos + 48]; + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), res); +} diff --git a/src/Effects/RAVU/RAVU_R4_RGB.hlsl b/src/Effects/RAVU/RAVU_R4_RGB.hlsl new file mode 100644 index 000000000..98dcd40be --- /dev/null +++ b/src/Effects/RAVU/RAVU_R4_RGB.hlsl @@ -0,0 +1,852 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu.py --target rgb --weights-file weights\ravu_weights-r4.py --float-format float16dx --use-compute-shader --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + +//!TEXTURE +//!SOURCE ravu_lut4_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_lut4; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_lut4; + +//!TEXTURE +//!FORMAT R16G16B16A16_FLOAT +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D ravu_int11; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_ravu_int11; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 2 + +//!PASS 1 +//!DESC RAVU (step1, rgb, r4, compute) +//!IN INPUT, ravu_lut4 +//!OUT ravu_int11 +//!BLOCK_SIZE 32, 8 +//!NUM_THREADS 32, 8 +static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722); +shared vec3 inp0[585]; +shared float inp_luma0[585]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) x +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.xyz) +void imageStoreOverride(uint2 pos, vec3 value) { ravu_int11[pos] = vec4(value, 0.0); } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_lut4_tex(pos) (vec4(texture(ravu_lut4, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y); + { + for (int id = int(gl_LocalInvocationIndex); id < 585; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 15, y = (uint)id % 15; + inp0[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))).xyz; + inp_luma0[id] = dot(inp0[id], color_primary); + } + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + { + float luma57 = inp_luma0[local_pos + 106]; + float luma58 = inp_luma0[local_pos + 107]; + float luma59 = inp_luma0[local_pos + 108]; + float luma60 = inp_luma0[local_pos + 109]; + float luma61 = inp_luma0[local_pos + 110]; + float luma62 = inp_luma0[local_pos + 111]; + float luma8 = inp_luma0[local_pos + 15]; + float luma9 = inp_luma0[local_pos + 16]; + float luma10 = inp_luma0[local_pos + 17]; + float luma11 = inp_luma0[local_pos + 18]; + float luma12 = inp_luma0[local_pos + 19]; + float luma1 = inp_luma0[local_pos + 1]; + float luma13 = inp_luma0[local_pos + 20]; + float luma14 = inp_luma0[local_pos + 21]; + float luma15 = inp_luma0[local_pos + 22]; + float luma2 = inp_luma0[local_pos + 2]; + float luma16 = inp_luma0[local_pos + 30]; + float luma17 = inp_luma0[local_pos + 31]; + float luma18 = inp_luma0[local_pos + 32]; + float luma19 = inp_luma0[local_pos + 33]; + float luma20 = inp_luma0[local_pos + 34]; + float luma21 = inp_luma0[local_pos + 35]; + float luma22 = inp_luma0[local_pos + 36]; + float luma23 = inp_luma0[local_pos + 37]; + float luma3 = inp_luma0[local_pos + 3]; + float luma24 = inp_luma0[local_pos + 45]; + float luma25 = inp_luma0[local_pos + 46]; + float luma26 = inp_luma0[local_pos + 47]; + float luma27 = inp_luma0[local_pos + 48]; + float luma28 = inp_luma0[local_pos + 49]; + float luma4 = inp_luma0[local_pos + 4]; + float luma29 = inp_luma0[local_pos + 50]; + float luma30 = inp_luma0[local_pos + 51]; + float luma31 = inp_luma0[local_pos + 52]; + float luma5 = inp_luma0[local_pos + 5]; + float luma32 = inp_luma0[local_pos + 60]; + float luma33 = inp_luma0[local_pos + 61]; + float luma34 = inp_luma0[local_pos + 62]; + float luma35 = inp_luma0[local_pos + 63]; + float luma36 = inp_luma0[local_pos + 64]; + float luma37 = inp_luma0[local_pos + 65]; + float luma38 = inp_luma0[local_pos + 66]; + float luma39 = inp_luma0[local_pos + 67]; + float luma6 = inp_luma0[local_pos + 6]; + float luma40 = inp_luma0[local_pos + 75]; + float luma41 = inp_luma0[local_pos + 76]; + float luma42 = inp_luma0[local_pos + 77]; + float luma43 = inp_luma0[local_pos + 78]; + float luma44 = inp_luma0[local_pos + 79]; + float luma45 = inp_luma0[local_pos + 80]; + float luma46 = inp_luma0[local_pos + 81]; + float luma47 = inp_luma0[local_pos + 82]; + float luma48 = inp_luma0[local_pos + 90]; + float luma49 = inp_luma0[local_pos + 91]; + float luma50 = inp_luma0[local_pos + 92]; + float luma51 = inp_luma0[local_pos + 93]; + float luma52 = inp_luma0[local_pos + 94]; + float luma53 = inp_luma0[local_pos + 95]; + float luma54 = inp_luma0[local_pos + 96]; + float luma55 = inp_luma0[local_pos + 97]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma17 - luma1) / 2.0; + gy = (luma10 - luma8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (luma18 - luma2) / 2.0; + gy = (-luma12 + 8.0 * luma11 - 8.0 * luma9 + luma8) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma19 - luma3) / 2.0; + gy = (-luma13 + 8.0 * luma12 - 8.0 * luma10 + luma9) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma20 - luma4) / 2.0; + gy = (-luma14 + 8.0 * luma13 - 8.0 * luma11 + luma10) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma21 - luma5) / 2.0; + gy = (-luma15 + 8.0 * luma14 - 8.0 * luma12 + luma11) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma22 - luma6) / 2.0; + gy = (luma15 - luma13) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (-luma33 + 8.0 * luma25 - 8.0 * luma9 + luma1) / 12.0; + gy = (luma18 - luma16) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma34 + 8.0 * luma26 - 8.0 * luma10 + luma2) / 12.0; + gy = (-luma20 + 8.0 * luma19 - 8.0 * luma17 + luma16) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma35 + 8.0 * luma27 - 8.0 * luma11 + luma3) / 12.0; + gy = (-luma21 + 8.0 * luma20 - 8.0 * luma18 + luma17) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma36 + 8.0 * luma28 - 8.0 * luma12 + luma4) / 12.0; + gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma37 + 8.0 * luma29 - 8.0 * luma13 + luma5) / 12.0; + gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma38 + 8.0 * luma30 - 8.0 * luma14 + luma6) / 12.0; + gy = (luma23 - luma21) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma41 + 8.0 * luma33 - 8.0 * luma17 + luma9) / 12.0; + gy = (luma26 - luma24) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma42 + 8.0 * luma34 - 8.0 * luma18 + luma10) / 12.0; + gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma43 + 8.0 * luma35 - 8.0 * luma19 + luma11) / 12.0; + gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma44 + 8.0 * luma36 - 8.0 * luma20 + luma12) / 12.0; + gy = (-luma30 + 8.0 * luma29 - 8.0 * luma27 + luma26) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma45 + 8.0 * luma37 - 8.0 * luma21 + luma13) / 12.0; + gy = (-luma31 + 8.0 * luma30 - 8.0 * luma28 + luma27) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma46 + 8.0 * luma38 - 8.0 * luma22 + luma14) / 12.0; + gy = (luma31 - luma29) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma49 + 8.0 * luma41 - 8.0 * luma25 + luma17) / 12.0; + gy = (luma34 - luma32) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma50 + 8.0 * luma42 - 8.0 * luma26 + luma18) / 12.0; + gy = (-luma36 + 8.0 * luma35 - 8.0 * luma33 + luma32) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma51 + 8.0 * luma43 - 8.0 * luma27 + luma19) / 12.0; + gy = (-luma37 + 8.0 * luma36 - 8.0 * luma34 + luma33) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma52 + 8.0 * luma44 - 8.0 * luma28 + luma20) / 12.0; + gy = (-luma38 + 8.0 * luma37 - 8.0 * luma35 + luma34) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma53 + 8.0 * luma45 - 8.0 * luma29 + luma21) / 12.0; + gy = (-luma39 + 8.0 * luma38 - 8.0 * luma36 + luma35) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma54 + 8.0 * luma46 - 8.0 * luma30 + luma22) / 12.0; + gy = (luma39 - luma37) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma57 + 8.0 * luma49 - 8.0 * luma33 + luma25) / 12.0; + gy = (luma42 - luma40) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma58 + 8.0 * luma50 - 8.0 * luma34 + luma26) / 12.0; + gy = (-luma44 + 8.0 * luma43 - 8.0 * luma41 + luma40) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma59 + 8.0 * luma51 - 8.0 * luma35 + luma27) / 12.0; + gy = (-luma45 + 8.0 * luma44 - 8.0 * luma42 + luma41) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma60 + 8.0 * luma52 - 8.0 * luma36 + luma28) / 12.0; + gy = (-luma46 + 8.0 * luma45 - 8.0 * luma43 + luma42) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma61 + 8.0 * luma53 - 8.0 * luma37 + luma29) / 12.0; + gy = (-luma47 + 8.0 * luma46 - 8.0 * luma44 + luma43) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma62 + 8.0 * luma54 - 8.0 * luma38 + luma30) / 12.0; + gy = (luma47 - luma45) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma57 - luma41) / 2.0; + gy = (luma50 - luma48) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (luma58 - luma42) / 2.0; + gy = (-luma52 + 8.0 * luma51 - 8.0 * luma49 + luma48) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma59 - luma43) / 2.0; + gy = (-luma53 + 8.0 * luma52 - 8.0 * luma50 + luma49) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma60 - luma44) / 2.0; + gy = (-luma54 + 8.0 * luma53 - 8.0 * luma51 + luma50) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma61 - luma45) / 2.0; + gy = (-luma55 + 8.0 * luma54 - 8.0 * luma52 + luma51) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma62 - luma46) / 2.0; + gy = (luma55 - luma53) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + vec3 res = vec3(0.0, 0.0, 0.0); + vec4 w; + w = texture(ravu_lut4, vec2(0.0625, coord_y)); + res += (inp0[local_pos + 0] + inp0[local_pos + 112]) * w[0]; + res += (inp0[local_pos + 1] + inp0[local_pos + 111]) * w[1]; + res += (inp0[local_pos + 2] + inp0[local_pos + 110]) * w[2]; + res += (inp0[local_pos + 3] + inp0[local_pos + 109]) * w[3]; + w = texture(ravu_lut4, vec2(0.1875, coord_y)); + res += (inp0[local_pos + 4] + inp0[local_pos + 108]) * w[0]; + res += (inp0[local_pos + 5] + inp0[local_pos + 107]) * w[1]; + res += (inp0[local_pos + 6] + inp0[local_pos + 106]) * w[2]; + res += (inp0[local_pos + 7] + inp0[local_pos + 105]) * w[3]; + w = texture(ravu_lut4, vec2(0.3125, coord_y)); + res += (inp0[local_pos + 15] + inp0[local_pos + 97]) * w[0]; + res += (inp0[local_pos + 16] + inp0[local_pos + 96]) * w[1]; + res += (inp0[local_pos + 17] + inp0[local_pos + 95]) * w[2]; + res += (inp0[local_pos + 18] + inp0[local_pos + 94]) * w[3]; + w = texture(ravu_lut4, vec2(0.4375, coord_y)); + res += (inp0[local_pos + 19] + inp0[local_pos + 93]) * w[0]; + res += (inp0[local_pos + 20] + inp0[local_pos + 92]) * w[1]; + res += (inp0[local_pos + 21] + inp0[local_pos + 91]) * w[2]; + res += (inp0[local_pos + 22] + inp0[local_pos + 90]) * w[3]; + w = texture(ravu_lut4, vec2(0.5625, coord_y)); + res += (inp0[local_pos + 30] + inp0[local_pos + 82]) * w[0]; + res += (inp0[local_pos + 31] + inp0[local_pos + 81]) * w[1]; + res += (inp0[local_pos + 32] + inp0[local_pos + 80]) * w[2]; + res += (inp0[local_pos + 33] + inp0[local_pos + 79]) * w[3]; + w = texture(ravu_lut4, vec2(0.6875, coord_y)); + res += (inp0[local_pos + 34] + inp0[local_pos + 78]) * w[0]; + res += (inp0[local_pos + 35] + inp0[local_pos + 77]) * w[1]; + res += (inp0[local_pos + 36] + inp0[local_pos + 76]) * w[2]; + res += (inp0[local_pos + 37] + inp0[local_pos + 75]) * w[3]; + w = texture(ravu_lut4, vec2(0.8125, coord_y)); + res += (inp0[local_pos + 45] + inp0[local_pos + 67]) * w[0]; + res += (inp0[local_pos + 46] + inp0[local_pos + 66]) * w[1]; + res += (inp0[local_pos + 47] + inp0[local_pos + 65]) * w[2]; + res += (inp0[local_pos + 48] + inp0[local_pos + 64]) * w[3]; + w = texture(ravu_lut4, vec2(0.9375, coord_y)); + res += (inp0[local_pos + 49] + inp0[local_pos + 63]) * w[0]; + res += (inp0[local_pos + 50] + inp0[local_pos + 62]) * w[1]; + res += (inp0[local_pos + 51] + inp0[local_pos + 61]) * w[2]; + res += (inp0[local_pos + 52] + inp0[local_pos + 60]) * w[3]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID), vec4(res, 1.0)); + } +} +//!PASS 2 +//!DESC RAVU (step2, rgb, r4, compute) +//!IN INPUT, ravu_lut4, ravu_int11 +//!OUT OUTPUT +//!BLOCK_SIZE 64, 16 +//!NUM_THREADS 32, 8 +static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722); +shared vec3 inp0[585]; +shared float inp_luma0[585]; +shared vec3 inp1[585]; +shared float inp_luma1[585]; + +#define CURRENT_PASS 2 + +#define GET_SAMPLE(x) x +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val) +void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_lut4_tex(pos) (vec4(texture(ravu_lut4, pos))) + +#define ravu_int11_tex(pos) (vec3(texture(ravu_int11, pos).xyz)) +static const float2 ravu_int11_size = float2(GetInputSize().x, GetInputSize().y); +static const float2 ravu_int11_pt = float2(1.0 / (ravu_int11_size.x), 1.0 / (ravu_int11_size.y)); + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass2(uint2 blockStart, uint3 threadId) { + ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y); + { + for (int id = int(gl_LocalInvocationIndex); id < 585; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 15, y = (uint)id % 15; + inp0[id] = + ravu_int11_tex(ravu_int11_pt * vec2(float(group_base.x + x) + (-3.5), float(group_base.y + y) + (-3.5))) + .xyz; + inp_luma0[id] = dot(inp0[id], color_primary); + } + } + { + for (int id = int(gl_LocalInvocationIndex); id < 585; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint x = (uint)id / 15, y = (uint)id % 15; + inp1[id] = + HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))).xyz; + inp_luma1[id] = dot(inp1[id], color_primary); + } + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy * 2; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + { + float luma16 = inp_luma0[local_pos + 18]; + float luma9 = inp_luma0[local_pos + 19]; + float luma2 = inp_luma0[local_pos + 20]; + float luma32 = inp_luma0[local_pos + 32]; + float luma25 = inp_luma0[local_pos + 33]; + float luma18 = inp_luma0[local_pos + 34]; + float luma11 = inp_luma0[local_pos + 35]; + float luma4 = inp_luma0[local_pos + 36]; + float luma48 = inp_luma0[local_pos + 46]; + float luma41 = inp_luma0[local_pos + 47]; + float luma34 = inp_luma0[local_pos + 48]; + float luma27 = inp_luma0[local_pos + 49]; + float luma20 = inp_luma0[local_pos + 50]; + float luma13 = inp_luma0[local_pos + 51]; + float luma6 = inp_luma0[local_pos + 52]; + float luma57 = inp_luma0[local_pos + 61]; + float luma50 = inp_luma0[local_pos + 62]; + float luma43 = inp_luma0[local_pos + 63]; + float luma36 = inp_luma0[local_pos + 64]; + float luma29 = inp_luma0[local_pos + 65]; + float luma22 = inp_luma0[local_pos + 66]; + float luma15 = inp_luma0[local_pos + 67]; + float luma59 = inp_luma0[local_pos + 77]; + float luma52 = inp_luma0[local_pos + 78]; + float luma45 = inp_luma0[local_pos + 79]; + float luma38 = inp_luma0[local_pos + 80]; + float luma31 = inp_luma0[local_pos + 81]; + float luma61 = inp_luma0[local_pos + 93]; + float luma54 = inp_luma0[local_pos + 94]; + float luma47 = inp_luma0[local_pos + 95]; + float luma24 = inp_luma1[local_pos + 17]; + float luma17 = inp_luma1[local_pos + 18]; + float luma10 = inp_luma1[local_pos + 19]; + float luma3 = inp_luma1[local_pos + 20]; + float luma40 = inp_luma1[local_pos + 31]; + float luma33 = inp_luma1[local_pos + 32]; + float luma26 = inp_luma1[local_pos + 33]; + float luma19 = inp_luma1[local_pos + 34]; + float luma12 = inp_luma1[local_pos + 35]; + float luma5 = inp_luma1[local_pos + 36]; + float luma8 = inp_luma1[local_pos + 3]; + float luma49 = inp_luma1[local_pos + 46]; + float luma42 = inp_luma1[local_pos + 47]; + float luma35 = inp_luma1[local_pos + 48]; + float luma28 = inp_luma1[local_pos + 49]; + float luma1 = inp_luma1[local_pos + 4]; + float luma21 = inp_luma1[local_pos + 50]; + float luma14 = inp_luma1[local_pos + 51]; + float luma58 = inp_luma1[local_pos + 61]; + float luma51 = inp_luma1[local_pos + 62]; + float luma44 = inp_luma1[local_pos + 63]; + float luma37 = inp_luma1[local_pos + 64]; + float luma30 = inp_luma1[local_pos + 65]; + float luma23 = inp_luma1[local_pos + 66]; + float luma60 = inp_luma1[local_pos + 77]; + float luma53 = inp_luma1[local_pos + 78]; + float luma46 = inp_luma1[local_pos + 79]; + float luma39 = inp_luma1[local_pos + 80]; + float luma62 = inp_luma1[local_pos + 93]; + float luma55 = inp_luma1[local_pos + 94]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma17 - luma1) / 2.0; + gy = (luma10 - luma8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (luma18 - luma2) / 2.0; + gy = (-luma12 + 8.0 * luma11 - 8.0 * luma9 + luma8) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma19 - luma3) / 2.0; + gy = (-luma13 + 8.0 * luma12 - 8.0 * luma10 + luma9) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma20 - luma4) / 2.0; + gy = (-luma14 + 8.0 * luma13 - 8.0 * luma11 + luma10) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma21 - luma5) / 2.0; + gy = (-luma15 + 8.0 * luma14 - 8.0 * luma12 + luma11) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma22 - luma6) / 2.0; + gy = (luma15 - luma13) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (-luma33 + 8.0 * luma25 - 8.0 * luma9 + luma1) / 12.0; + gy = (luma18 - luma16) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma34 + 8.0 * luma26 - 8.0 * luma10 + luma2) / 12.0; + gy = (-luma20 + 8.0 * luma19 - 8.0 * luma17 + luma16) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma35 + 8.0 * luma27 - 8.0 * luma11 + luma3) / 12.0; + gy = (-luma21 + 8.0 * luma20 - 8.0 * luma18 + luma17) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma36 + 8.0 * luma28 - 8.0 * luma12 + luma4) / 12.0; + gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma37 + 8.0 * luma29 - 8.0 * luma13 + luma5) / 12.0; + gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma38 + 8.0 * luma30 - 8.0 * luma14 + luma6) / 12.0; + gy = (luma23 - luma21) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma41 + 8.0 * luma33 - 8.0 * luma17 + luma9) / 12.0; + gy = (luma26 - luma24) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma42 + 8.0 * luma34 - 8.0 * luma18 + luma10) / 12.0; + gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma43 + 8.0 * luma35 - 8.0 * luma19 + luma11) / 12.0; + gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma44 + 8.0 * luma36 - 8.0 * luma20 + luma12) / 12.0; + gy = (-luma30 + 8.0 * luma29 - 8.0 * luma27 + luma26) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma45 + 8.0 * luma37 - 8.0 * luma21 + luma13) / 12.0; + gy = (-luma31 + 8.0 * luma30 - 8.0 * luma28 + luma27) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma46 + 8.0 * luma38 - 8.0 * luma22 + luma14) / 12.0; + gy = (luma31 - luma29) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma49 + 8.0 * luma41 - 8.0 * luma25 + luma17) / 12.0; + gy = (luma34 - luma32) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma50 + 8.0 * luma42 - 8.0 * luma26 + luma18) / 12.0; + gy = (-luma36 + 8.0 * luma35 - 8.0 * luma33 + luma32) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma51 + 8.0 * luma43 - 8.0 * luma27 + luma19) / 12.0; + gy = (-luma37 + 8.0 * luma36 - 8.0 * luma34 + luma33) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma52 + 8.0 * luma44 - 8.0 * luma28 + luma20) / 12.0; + gy = (-luma38 + 8.0 * luma37 - 8.0 * luma35 + luma34) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma53 + 8.0 * luma45 - 8.0 * luma29 + luma21) / 12.0; + gy = (-luma39 + 8.0 * luma38 - 8.0 * luma36 + luma35) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma54 + 8.0 * luma46 - 8.0 * luma30 + luma22) / 12.0; + gy = (luma39 - luma37) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma57 + 8.0 * luma49 - 8.0 * luma33 + luma25) / 12.0; + gy = (luma42 - luma40) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma58 + 8.0 * luma50 - 8.0 * luma34 + luma26) / 12.0; + gy = (-luma44 + 8.0 * luma43 - 8.0 * luma41 + luma40) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma59 + 8.0 * luma51 - 8.0 * luma35 + luma27) / 12.0; + gy = (-luma45 + 8.0 * luma44 - 8.0 * luma42 + luma41) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma60 + 8.0 * luma52 - 8.0 * luma36 + luma28) / 12.0; + gy = (-luma46 + 8.0 * luma45 - 8.0 * luma43 + luma42) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma61 + 8.0 * luma53 - 8.0 * luma37 + luma29) / 12.0; + gy = (-luma47 + 8.0 * luma46 - 8.0 * luma44 + luma43) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma62 + 8.0 * luma54 - 8.0 * luma38 + luma30) / 12.0; + gy = (luma47 - luma45) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma57 - luma41) / 2.0; + gy = (luma50 - luma48) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (luma58 - luma42) / 2.0; + gy = (-luma52 + 8.0 * luma51 - 8.0 * luma49 + luma48) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma59 - luma43) / 2.0; + gy = (-luma53 + 8.0 * luma52 - 8.0 * luma50 + luma49) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma60 - luma44) / 2.0; + gy = (-luma54 + 8.0 * luma53 - 8.0 * luma51 + luma50) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma61 - luma45) / 2.0; + gy = (-luma55 + 8.0 * luma54 - 8.0 * luma52 + luma51) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma62 - luma46) / 2.0; + gy = (luma55 - luma53) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + vec3 res = vec3(0.0, 0.0, 0.0); + vec4 w; + w = texture(ravu_lut4, vec2(0.0625, coord_y)); + res += (inp0[local_pos + 4] + inp0[local_pos + 109]) * w[0]; + res += (inp1[local_pos + 4] + inp1[local_pos + 93]) * w[1]; + res += (inp0[local_pos + 20] + inp0[local_pos + 93]) * w[2]; + res += (inp1[local_pos + 20] + inp1[local_pos + 77]) * w[3]; + w = texture(ravu_lut4, vec2(0.1875, coord_y)); + res += (inp0[local_pos + 36] + inp0[local_pos + 77]) * w[0]; + res += (inp1[local_pos + 36] + inp1[local_pos + 61]) * w[1]; + res += (inp0[local_pos + 52] + inp0[local_pos + 61]) * w[2]; + res += (inp1[local_pos + 52] + inp1[local_pos + 45]) * w[3]; + w = texture(ravu_lut4, vec2(0.3125, coord_y)); + res += (inp1[local_pos + 3] + inp1[local_pos + 94]) * w[0]; + res += (inp0[local_pos + 19] + inp0[local_pos + 94]) * w[1]; + res += (inp1[local_pos + 19] + inp1[local_pos + 78]) * w[2]; + res += (inp0[local_pos + 35] + inp0[local_pos + 78]) * w[3]; + w = texture(ravu_lut4, vec2(0.4375, coord_y)); + res += (inp1[local_pos + 35] + inp1[local_pos + 62]) * w[0]; + res += (inp0[local_pos + 51] + inp0[local_pos + 62]) * w[1]; + res += (inp1[local_pos + 51] + inp1[local_pos + 46]) * w[2]; + res += (inp0[local_pos + 67] + inp0[local_pos + 46]) * w[3]; + w = texture(ravu_lut4, vec2(0.5625, coord_y)); + res += (inp0[local_pos + 18] + inp0[local_pos + 95]) * w[0]; + res += (inp1[local_pos + 18] + inp1[local_pos + 79]) * w[1]; + res += (inp0[local_pos + 34] + inp0[local_pos + 79]) * w[2]; + res += (inp1[local_pos + 34] + inp1[local_pos + 63]) * w[3]; + w = texture(ravu_lut4, vec2(0.6875, coord_y)); + res += (inp0[local_pos + 50] + inp0[local_pos + 63]) * w[0]; + res += (inp1[local_pos + 50] + inp1[local_pos + 47]) * w[1]; + res += (inp0[local_pos + 66] + inp0[local_pos + 47]) * w[2]; + res += (inp1[local_pos + 66] + inp1[local_pos + 31]) * w[3]; + w = texture(ravu_lut4, vec2(0.8125, coord_y)); + res += (inp1[local_pos + 17] + inp1[local_pos + 80]) * w[0]; + res += (inp0[local_pos + 33] + inp0[local_pos + 80]) * w[1]; + res += (inp1[local_pos + 33] + inp1[local_pos + 64]) * w[2]; + res += (inp0[local_pos + 49] + inp0[local_pos + 64]) * w[3]; + w = texture(ravu_lut4, vec2(0.9375, coord_y)); + res += (inp1[local_pos + 49] + inp1[local_pos + 48]) * w[0]; + res += (inp0[local_pos + 65] + inp0[local_pos + 48]) * w[1]; + res += (inp1[local_pos + 65] + inp1[local_pos + 32]) * w[2]; + res += (inp0[local_pos + 81] + inp0[local_pos + 32]) * w[3]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res, 1.0)); + } + { + float luma62 = inp_luma0[local_pos + 108]; + float luma55 = inp_luma0[local_pos + 109]; + float luma8 = inp_luma0[local_pos + 18]; + float luma1 = inp_luma0[local_pos + 19]; + float luma24 = inp_luma0[local_pos + 32]; + float luma17 = inp_luma0[local_pos + 33]; + float luma10 = inp_luma0[local_pos + 34]; + float luma3 = inp_luma0[local_pos + 35]; + float luma40 = inp_luma0[local_pos + 46]; + float luma33 = inp_luma0[local_pos + 47]; + float luma26 = inp_luma0[local_pos + 48]; + float luma19 = inp_luma0[local_pos + 49]; + float luma12 = inp_luma0[local_pos + 50]; + float luma5 = inp_luma0[local_pos + 51]; + float luma49 = inp_luma0[local_pos + 61]; + float luma42 = inp_luma0[local_pos + 62]; + float luma35 = inp_luma0[local_pos + 63]; + float luma28 = inp_luma0[local_pos + 64]; + float luma21 = inp_luma0[local_pos + 65]; + float luma14 = inp_luma0[local_pos + 66]; + float luma58 = inp_luma0[local_pos + 76]; + float luma51 = inp_luma0[local_pos + 77]; + float luma44 = inp_luma0[local_pos + 78]; + float luma37 = inp_luma0[local_pos + 79]; + float luma30 = inp_luma0[local_pos + 80]; + float luma23 = inp_luma0[local_pos + 81]; + float luma60 = inp_luma0[local_pos + 92]; + float luma53 = inp_luma0[local_pos + 93]; + float luma46 = inp_luma0[local_pos + 94]; + float luma39 = inp_luma0[local_pos + 95]; + float luma16 = inp_luma1[local_pos + 17]; + float luma9 = inp_luma1[local_pos + 18]; + float luma2 = inp_luma1[local_pos + 19]; + float luma32 = inp_luma1[local_pos + 31]; + float luma25 = inp_luma1[local_pos + 32]; + float luma18 = inp_luma1[local_pos + 33]; + float luma11 = inp_luma1[local_pos + 34]; + float luma4 = inp_luma1[local_pos + 35]; + float luma48 = inp_luma1[local_pos + 45]; + float luma41 = inp_luma1[local_pos + 46]; + float luma34 = inp_luma1[local_pos + 47]; + float luma27 = inp_luma1[local_pos + 48]; + float luma20 = inp_luma1[local_pos + 49]; + float luma13 = inp_luma1[local_pos + 50]; + float luma6 = inp_luma1[local_pos + 51]; + float luma57 = inp_luma1[local_pos + 60]; + float luma50 = inp_luma1[local_pos + 61]; + float luma43 = inp_luma1[local_pos + 62]; + float luma36 = inp_luma1[local_pos + 63]; + float luma29 = inp_luma1[local_pos + 64]; + float luma22 = inp_luma1[local_pos + 65]; + float luma15 = inp_luma1[local_pos + 66]; + float luma59 = inp_luma1[local_pos + 76]; + float luma52 = inp_luma1[local_pos + 77]; + float luma45 = inp_luma1[local_pos + 78]; + float luma38 = inp_luma1[local_pos + 79]; + float luma31 = inp_luma1[local_pos + 80]; + float luma61 = inp_luma1[local_pos + 92]; + float luma54 = inp_luma1[local_pos + 93]; + float luma47 = inp_luma1[local_pos + 94]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma17 - luma1) / 2.0; + gy = (luma10 - luma8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (luma18 - luma2) / 2.0; + gy = (-luma12 + 8.0 * luma11 - 8.0 * luma9 + luma8) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma19 - luma3) / 2.0; + gy = (-luma13 + 8.0 * luma12 - 8.0 * luma10 + luma9) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma20 - luma4) / 2.0; + gy = (-luma14 + 8.0 * luma13 - 8.0 * luma11 + luma10) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma21 - luma5) / 2.0; + gy = (-luma15 + 8.0 * luma14 - 8.0 * luma12 + luma11) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma22 - luma6) / 2.0; + gy = (luma15 - luma13) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (-luma33 + 8.0 * luma25 - 8.0 * luma9 + luma1) / 12.0; + gy = (luma18 - luma16) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma34 + 8.0 * luma26 - 8.0 * luma10 + luma2) / 12.0; + gy = (-luma20 + 8.0 * luma19 - 8.0 * luma17 + luma16) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma35 + 8.0 * luma27 - 8.0 * luma11 + luma3) / 12.0; + gy = (-luma21 + 8.0 * luma20 - 8.0 * luma18 + luma17) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma36 + 8.0 * luma28 - 8.0 * luma12 + luma4) / 12.0; + gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma37 + 8.0 * luma29 - 8.0 * luma13 + luma5) / 12.0; + gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma38 + 8.0 * luma30 - 8.0 * luma14 + luma6) / 12.0; + gy = (luma23 - luma21) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma41 + 8.0 * luma33 - 8.0 * luma17 + luma9) / 12.0; + gy = (luma26 - luma24) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma42 + 8.0 * luma34 - 8.0 * luma18 + luma10) / 12.0; + gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma43 + 8.0 * luma35 - 8.0 * luma19 + luma11) / 12.0; + gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma44 + 8.0 * luma36 - 8.0 * luma20 + luma12) / 12.0; + gy = (-luma30 + 8.0 * luma29 - 8.0 * luma27 + luma26) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma45 + 8.0 * luma37 - 8.0 * luma21 + luma13) / 12.0; + gy = (-luma31 + 8.0 * luma30 - 8.0 * luma28 + luma27) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma46 + 8.0 * luma38 - 8.0 * luma22 + luma14) / 12.0; + gy = (luma31 - luma29) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma49 + 8.0 * luma41 - 8.0 * luma25 + luma17) / 12.0; + gy = (luma34 - luma32) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma50 + 8.0 * luma42 - 8.0 * luma26 + luma18) / 12.0; + gy = (-luma36 + 8.0 * luma35 - 8.0 * luma33 + luma32) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma51 + 8.0 * luma43 - 8.0 * luma27 + luma19) / 12.0; + gy = (-luma37 + 8.0 * luma36 - 8.0 * luma34 + luma33) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma52 + 8.0 * luma44 - 8.0 * luma28 + luma20) / 12.0; + gy = (-luma38 + 8.0 * luma37 - 8.0 * luma35 + luma34) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013; + gx = (-luma53 + 8.0 * luma45 - 8.0 * luma29 + luma21) / 12.0; + gy = (-luma39 + 8.0 * luma38 - 8.0 * luma36 + luma35) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma54 + 8.0 * luma46 - 8.0 * luma30 + luma22) / 12.0; + gy = (luma39 - luma37) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (-luma57 + 8.0 * luma49 - 8.0 * luma33 + luma25) / 12.0; + gy = (luma42 - luma40) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (-luma58 + 8.0 * luma50 - 8.0 * luma34 + luma26) / 12.0; + gy = (-luma44 + 8.0 * luma43 - 8.0 * luma41 + luma40) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma59 + 8.0 * luma51 - 8.0 * luma35 + luma27) / 12.0; + gy = (-luma45 + 8.0 * luma44 - 8.0 * luma42 + luma41) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma60 + 8.0 * luma52 - 8.0 * luma36 + luma28) / 12.0; + gy = (-luma46 + 8.0 * luma45 - 8.0 * luma43 + luma42) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872; + gx = (-luma61 + 8.0 * luma53 - 8.0 * luma37 + luma29) / 12.0; + gy = (-luma47 + 8.0 * luma46 - 8.0 * luma44 + luma43) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854; + gx = (-luma62 + 8.0 * luma54 - 8.0 * luma38 + luma30) / 12.0; + gy = (luma47 - luma45) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma57 - luma41) / 2.0; + gy = (luma50 - luma48) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + gx = (luma58 - luma42) / 2.0; + gy = (-luma52 + 8.0 * luma51 - 8.0 * luma49 + luma48) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma59 - luma43) / 2.0; + gy = (-luma53 + 8.0 * luma52 - 8.0 * luma50 + luma49) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma60 - luma44) / 2.0; + gy = (-luma54 + 8.0 * luma53 - 8.0 * luma51 + luma50) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275; + gx = (luma61 - luma45) / 2.0; + gy = (-luma55 + 8.0 * luma54 - 8.0 * luma52 + luma51) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175; + gx = (luma62 - luma46) / 2.0; + gy = (luma55 - luma53) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0; + vec3 res = vec3(0.0, 0.0, 0.0); + vec4 w; + w = texture(ravu_lut4, vec2(0.0625, coord_y)); + res += (inp1[local_pos + 3] + inp1[local_pos + 108]) * w[0]; + res += (inp0[local_pos + 19] + inp0[local_pos + 108]) * w[1]; + res += (inp1[local_pos + 19] + inp1[local_pos + 92]) * w[2]; + res += (inp0[local_pos + 35] + inp0[local_pos + 92]) * w[3]; + w = texture(ravu_lut4, vec2(0.1875, coord_y)); + res += (inp1[local_pos + 35] + inp1[local_pos + 76]) * w[0]; + res += (inp0[local_pos + 51] + inp0[local_pos + 76]) * w[1]; + res += (inp1[local_pos + 51] + inp1[local_pos + 60]) * w[2]; + res += (inp0[local_pos + 67] + inp0[local_pos + 60]) * w[3]; + w = texture(ravu_lut4, vec2(0.3125, coord_y)); + res += (inp0[local_pos + 18] + inp0[local_pos + 109]) * w[0]; + res += (inp1[local_pos + 18] + inp1[local_pos + 93]) * w[1]; + res += (inp0[local_pos + 34] + inp0[local_pos + 93]) * w[2]; + res += (inp1[local_pos + 34] + inp1[local_pos + 77]) * w[3]; + w = texture(ravu_lut4, vec2(0.4375, coord_y)); + res += (inp0[local_pos + 50] + inp0[local_pos + 77]) * w[0]; + res += (inp1[local_pos + 50] + inp1[local_pos + 61]) * w[1]; + res += (inp0[local_pos + 66] + inp0[local_pos + 61]) * w[2]; + res += (inp1[local_pos + 66] + inp1[local_pos + 45]) * w[3]; + w = texture(ravu_lut4, vec2(0.5625, coord_y)); + res += (inp1[local_pos + 17] + inp1[local_pos + 94]) * w[0]; + res += (inp0[local_pos + 33] + inp0[local_pos + 94]) * w[1]; + res += (inp1[local_pos + 33] + inp1[local_pos + 78]) * w[2]; + res += (inp0[local_pos + 49] + inp0[local_pos + 78]) * w[3]; + w = texture(ravu_lut4, vec2(0.6875, coord_y)); + res += (inp1[local_pos + 49] + inp1[local_pos + 62]) * w[0]; + res += (inp0[local_pos + 65] + inp0[local_pos + 62]) * w[1]; + res += (inp1[local_pos + 65] + inp1[local_pos + 46]) * w[2]; + res += (inp0[local_pos + 81] + inp0[local_pos + 46]) * w[3]; + w = texture(ravu_lut4, vec2(0.8125, coord_y)); + res += (inp0[local_pos + 32] + inp0[local_pos + 95]) * w[0]; + res += (inp1[local_pos + 32] + inp1[local_pos + 79]) * w[1]; + res += (inp0[local_pos + 48] + inp0[local_pos + 79]) * w[2]; + res += (inp1[local_pos + 48] + inp1[local_pos + 63]) * w[3]; + w = texture(ravu_lut4, vec2(0.9375, coord_y)); + res += (inp0[local_pos + 64] + inp0[local_pos + 63]) * w[0]; + res += (inp1[local_pos + 64] + inp1[local_pos + 47]) * w[1]; + res += (inp0[local_pos + 80] + inp0[local_pos + 47]) * w[2]; + res += (inp1[local_pos + 80] + inp1[local_pos + 31]) * w[3]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res, 1.0)); + } + vec3 res; + res = inp0[local_pos + 64]; + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res, 1.0)); + res = inp1[local_pos + 48]; + imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res, 1.0)); +} diff --git a/src/Effects/RAVU/RAVU_Zoom_AR_R2.hlsl b/src/Effects/RAVU/RAVU_Zoom_AR_R2.hlsl new file mode 100644 index 000000000..dba8b22ec --- /dev/null +++ b/src/Effects/RAVU/RAVU_Zoom_AR_R2.hlsl @@ -0,0 +1,325 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-zoom.py --target luma --weights-file weights\ravu-zoom_weights-r2.py --float-format float16dx --use-compute-shader --anti-ringing 0.8 --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +// +// +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!SOURCE ravu_zoom_lut2_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_zoom_lut2; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_zoom_lut2; + +//!TEXTURE +//!SOURCE ravu_zoom_lut2_ar_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_zoom_lut2_ar; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_zoom_lut2_ar; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-Zoom-AR (luma, r2, compute) +//!IN INPUT, ravu_zoom_lut2, ravu_zoom_lut2_ar +//!OUT OUTPUT +//!BLOCK_SIZE 32, 8 +//!NUM_THREADS 32, 8 +#define LUTPOS(x, lut_size) mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x)) +shared float samples[432]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_zoom_lut2_tex(pos) (vec4(texture(ravu_zoom_lut2, pos))) + +#define ravu_zoom_lut2_ar_tex(pos) (vec4(texture(ravu_zoom_lut2_ar, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_begin = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + ivec2 group_end = group_begin + ivec2(gl_WorkGroupSize) - ivec2(1, 1); + ivec2 rectl = ivec2(floor(HOOKED_size * HOOKED_map(group_begin) - 0.5001)) - 1; + ivec2 rectr = ivec2(floor(HOOKED_size * HOOKED_map(group_end) - 0.4999)) + 2; + ivec2 rect = rectr - rectl + 1; + for (int id = int(gl_LocalInvocationIndex); id < rect.x * rect.y; + id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint y = (uint)id / rect.x, x = (uint)id % rect.x; + samples[x + y * 36] = HOOKED_tex(HOOKED_pt * (vec2(rectl + ivec2(x, y)) + vec2(0.5, 0.5))).x; + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + vec2 pos = HOOKED_size * HOOKED_map(ivec2(gl_GlobalInvocationID)); + vec2 subpix = fract(pos - 0.5); + pos -= subpix; + subpix = LUTPOS(subpix, vec2(9.0, 9.0)); + vec2 subpix_inv = 1.0 - subpix; + subpix /= vec2(2.0, 288.0); + subpix_inv /= vec2(2.0, 288.0); + ivec2 ipos = ivec2(floor(pos)) - rectl; + int lpos = ipos.x + ipos.y * 36; + float sample0 = samples[-37 + lpos]; + float sample1 = samples[-1 + lpos]; + float sample2 = samples[35 + lpos]; + float sample3 = samples[71 + lpos]; + float sample4 = samples[-36 + lpos]; + float sample5 = samples[0 + lpos]; + float sample6 = samples[36 + lpos]; + float sample7 = samples[72 + lpos]; + float sample8 = samples[-35 + lpos]; + float sample9 = samples[1 + lpos]; + float sample10 = samples[37 + lpos]; + float sample11 = samples[73 + lpos]; + float sample12 = samples[-34 + lpos]; + float sample13 = samples[2 + lpos]; + float sample14 = samples[38 + lpos]; + float sample15 = samples[74 + lpos]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (sample4 - sample0); + gy = (sample1 - sample0); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (sample5 - sample1); + gy = (sample2 - sample0) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample6 - sample2); + gy = (sample3 - sample1) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample7 - sample3); + gy = (sample3 - sample2); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (sample8 - sample0) / 2.0; + gy = (sample5 - sample4); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample9 - sample1) / 2.0; + gy = (sample6 - sample4) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (sample10 - sample2) / 2.0; + gy = (sample7 - sample5) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (sample11 - sample3) / 2.0; + gy = (sample7 - sample6); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample12 - sample4) / 2.0; + gy = (sample9 - sample8); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample13 - sample5) / 2.0; + gy = (sample10 - sample8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (sample14 - sample6) / 2.0; + gy = (sample11 - sample9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (sample15 - sample7) / 2.0; + gy = (sample11 - sample10); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample12 - sample8); + gy = (sample13 - sample12); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (sample13 - sample9); + gy = (sample14 - sample12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample14 - sample10); + gy = (sample15 - sample13) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample15 - sample11); + gy = (sample15 - sample14); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence) / 288.0; + float res = 0.0; + vec4 w; + vec4 cg, cg1; + float lo = 0.0, hi = 0.0; + float lo2 = 0.0, hi2 = 0.0; + w = texture(ravu_zoom_lut2, vec2(0.0, coord_y) + subpix); + res += sample0 * w[0]; + res += sample1 * w[1]; + res += sample2 * w[2]; + res += sample3 * w[3]; + w = texture(ravu_zoom_lut2, vec2(0.5, coord_y) + subpix); + res += sample4 * w[0]; + res += sample5 * w[1]; + res += sample6 * w[2]; + res += sample7 * w[3]; + w = texture(ravu_zoom_lut2, vec2(0.0, coord_y) + subpix_inv); + res += sample15 * w[0]; + res += sample14 * w[1]; + res += sample13 * w[2]; + res += sample12 * w[3]; + w = texture(ravu_zoom_lut2, vec2(0.5, coord_y) + subpix_inv); + res += sample11 * w[0]; + res += sample10 * w[1]; + res += sample9 * w[2]; + res += sample8 * w[3]; + w = texture(ravu_zoom_lut2_ar, vec2(0.0, coord_y) + subpix); + cg = vec4(0.1 + sample0, 1.1 - sample0, 0.1 + sample1, 1.1 - sample1); + cg1 = cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + hi += cg[0] * w[0] + cg[2] * w[1]; + lo += cg[1] * w[0] + cg[3] * w[1]; + cg *= cg1; + hi2 += cg[0] * w[0] + cg[2] * w[1]; + lo2 += cg[1] * w[0] + cg[3] * w[1]; + cg = vec4(0.1 + sample2, 1.1 - sample2, 0.1 + sample3, 1.1 - sample3); + cg1 = cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + hi += cg[0] * w[2] + cg[2] * w[3]; + lo += cg[1] * w[2] + cg[3] * w[3]; + cg *= cg1; + hi2 += cg[0] * w[2] + cg[2] * w[3]; + lo2 += cg[1] * w[2] + cg[3] * w[3]; + w = texture(ravu_zoom_lut2_ar, vec2(0.5, coord_y) + subpix); + cg = vec4(0.1 + sample4, 1.1 - sample4, 0.1 + sample5, 1.1 - sample5); + cg1 = cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + hi += cg[0] * w[0] + cg[2] * w[1]; + lo += cg[1] * w[0] + cg[3] * w[1]; + cg *= cg1; + hi2 += cg[0] * w[0] + cg[2] * w[1]; + lo2 += cg[1] * w[0] + cg[3] * w[1]; + cg = vec4(0.1 + sample6, 1.1 - sample6, 0.1 + sample7, 1.1 - sample7); + cg1 = cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + hi += cg[0] * w[2] + cg[2] * w[3]; + lo += cg[1] * w[2] + cg[3] * w[3]; + cg *= cg1; + hi2 += cg[0] * w[2] + cg[2] * w[3]; + lo2 += cg[1] * w[2] + cg[3] * w[3]; + w = texture(ravu_zoom_lut2_ar, vec2(0.0, coord_y) + subpix_inv); + cg = vec4(0.1 + sample15, 1.1 - sample15, 0.1 + sample14, 1.1 - sample14); + cg1 = cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + hi += cg[0] * w[0] + cg[2] * w[1]; + lo += cg[1] * w[0] + cg[3] * w[1]; + cg *= cg1; + hi2 += cg[0] * w[0] + cg[2] * w[1]; + lo2 += cg[1] * w[0] + cg[3] * w[1]; + cg = vec4(0.1 + sample13, 1.1 - sample13, 0.1 + sample12, 1.1 - sample12); + cg1 = cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + hi += cg[0] * w[2] + cg[2] * w[3]; + lo += cg[1] * w[2] + cg[3] * w[3]; + cg *= cg1; + hi2 += cg[0] * w[2] + cg[2] * w[3]; + lo2 += cg[1] * w[2] + cg[3] * w[3]; + w = texture(ravu_zoom_lut2_ar, vec2(0.5, coord_y) + subpix_inv); + cg = vec4(0.1 + sample11, 1.1 - sample11, 0.1 + sample10, 1.1 - sample10); + cg1 = cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + hi += cg[0] * w[0] + cg[2] * w[1]; + lo += cg[1] * w[0] + cg[3] * w[1]; + cg *= cg1; + hi2 += cg[0] * w[0] + cg[2] * w[1]; + lo2 += cg[1] * w[0] + cg[3] * w[1]; + cg = vec4(0.1 + sample9, 1.1 - sample9, 0.1 + sample8, 1.1 - sample8); + cg1 = cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + hi += cg[0] * w[2] + cg[2] * w[3]; + lo += cg[1] * w[2] + cg[3] * w[3]; + cg *= cg1; + hi2 += cg[0] * w[2] + cg[2] * w[3]; + lo2 += cg[1] * w[2] + cg[3] * w[3]; + hi = hi2 / hi - 0.1; + lo = 1.1 - lo2 / lo; + res = mix(res, clamp(res, lo, hi), 0.800000); + imageStore(out_image, ivec2(gl_GlobalInvocationID), res); +} diff --git a/src/Effects/RAVU/RAVU_Zoom_AR_R2_RGB.hlsl b/src/Effects/RAVU/RAVU_Zoom_AR_R2_RGB.hlsl new file mode 100644 index 000000000..d3d40dff6 --- /dev/null +++ b/src/Effects/RAVU/RAVU_Zoom_AR_R2_RGB.hlsl @@ -0,0 +1,335 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-zoom.py --target rgb --weights-file weights\ravu-zoom_weights-r2.py --float-format float16dx --use-compute-shader --anti-ringing 0.8 --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +// +// +Texture2D OUTPUT; + +//!TEXTURE +//!SOURCE ravu_zoom_lut2_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_zoom_lut2; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_zoom_lut2; + +//!TEXTURE +//!SOURCE ravu_zoom_lut2_ar_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_zoom_lut2_ar; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_zoom_lut2_ar; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-Zoom-AR (rgb, r2, compute) +//!IN INPUT, ravu_zoom_lut2, ravu_zoom_lut2_ar +//!OUT OUTPUT +//!BLOCK_SIZE 32, 8 +//!NUM_THREADS 32, 8 +static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722); +#define LUTPOS(x, lut_size) mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x)) +shared vec3 samples[432]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) x +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val) +void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_zoom_lut2_tex(pos) (vec4(texture(ravu_zoom_lut2, pos))) + +#define ravu_zoom_lut2_ar_tex(pos) (vec4(texture(ravu_zoom_lut2_ar, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_begin = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + ivec2 group_end = group_begin + ivec2(gl_WorkGroupSize) - ivec2(1, 1); + ivec2 rectl = ivec2(floor(HOOKED_size * HOOKED_map(group_begin) - 0.5001)) - 1; + ivec2 rectr = ivec2(floor(HOOKED_size * HOOKED_map(group_end) - 0.4999)) + 2; + ivec2 rect = rectr - rectl + 1; + for (int id = int(gl_LocalInvocationIndex); id < rect.x * rect.y; + id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint y = (uint)id / rect.x, x = (uint)id % rect.x; + samples[x + y * 36] = HOOKED_tex(HOOKED_pt * (vec2(rectl + ivec2(x, y)) + vec2(0.5, 0.5))).xyz; + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + vec2 pos = HOOKED_size * HOOKED_map(ivec2(gl_GlobalInvocationID)); + vec2 subpix = fract(pos - 0.5); + pos -= subpix; + subpix = LUTPOS(subpix, vec2(9.0, 9.0)); + vec2 subpix_inv = 1.0 - subpix; + subpix /= vec2(2.0, 288.0); + subpix_inv /= vec2(2.0, 288.0); + ivec2 ipos = ivec2(floor(pos)) - rectl; + int lpos = ipos.x + ipos.y * 36; + vec3 sample0 = samples[-37 + lpos]; + vec3 sample1 = samples[-1 + lpos]; + vec3 sample2 = samples[35 + lpos]; + vec3 sample3 = samples[71 + lpos]; + vec3 sample4 = samples[-36 + lpos]; + vec3 sample5 = samples[0 + lpos]; + vec3 sample6 = samples[36 + lpos]; + vec3 sample7 = samples[72 + lpos]; + vec3 sample8 = samples[-35 + lpos]; + vec3 sample9 = samples[1 + lpos]; + vec3 sample10 = samples[37 + lpos]; + vec3 sample11 = samples[73 + lpos]; + vec3 sample12 = samples[-34 + lpos]; + vec3 sample13 = samples[2 + lpos]; + vec3 sample14 = samples[38 + lpos]; + vec3 sample15 = samples[74 + lpos]; + float luma0 = dot(sample0, color_primary); + float luma1 = dot(sample1, color_primary); + float luma2 = dot(sample2, color_primary); + float luma3 = dot(sample3, color_primary); + float luma4 = dot(sample4, color_primary); + float luma5 = dot(sample5, color_primary); + float luma6 = dot(sample6, color_primary); + float luma7 = dot(sample7, color_primary); + float luma8 = dot(sample8, color_primary); + float luma9 = dot(sample9, color_primary); + float luma10 = dot(sample10, color_primary); + float luma11 = dot(sample11, color_primary); + float luma12 = dot(sample12, color_primary); + float luma13 = dot(sample13, color_primary); + float luma14 = dot(sample14, color_primary); + float luma15 = dot(sample15, color_primary); + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma4 - luma0); + gy = (luma1 - luma0); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma5 - luma1); + gy = (luma2 - luma0) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma6 - luma2); + gy = (luma3 - luma1) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma7 - luma3); + gy = (luma3 - luma2); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma8 - luma0) / 2.0; + gy = (luma5 - luma4); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma9 - luma1) / 2.0; + gy = (luma6 - luma4) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma10 - luma2) / 2.0; + gy = (luma7 - luma5) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma11 - luma3) / 2.0; + gy = (luma7 - luma6); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma12 - luma4) / 2.0; + gy = (luma9 - luma8); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma13 - luma5) / 2.0; + gy = (luma10 - luma8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma14 - luma6) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma15 - luma7) / 2.0; + gy = (luma11 - luma10); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma12 - luma8); + gy = (luma13 - luma12); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma13 - luma9); + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma14 - luma10); + gy = (luma15 - luma13) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma15 - luma11); + gy = (luma15 - luma14); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence) / 288.0; + vec3 res = vec3(0.0, 0.0, 0.0); + vec4 w; + mat4x3 cg, cg1; + vec3 lo = vec3(0.0, 0.0, 0.0), hi = vec3(0.0, 0.0, 0.0); + vec3 lo2 = vec3(0.0, 0.0, 0.0), hi2 = vec3(0.0, 0.0, 0.0); + w = texture(ravu_zoom_lut2, vec2(0.0, coord_y) + subpix); + res += sample0 * w[0]; + res += sample1 * w[1]; + res += sample2 * w[2]; + res += sample3 * w[3]; + w = texture(ravu_zoom_lut2, vec2(0.5, coord_y) + subpix); + res += sample4 * w[0]; + res += sample5 * w[1]; + res += sample6 * w[2]; + res += sample7 * w[3]; + w = texture(ravu_zoom_lut2, vec2(0.0, coord_y) + subpix_inv); + res += sample15 * w[0]; + res += sample14 * w[1]; + res += sample13 * w[2]; + res += sample12 * w[3]; + w = texture(ravu_zoom_lut2, vec2(0.5, coord_y) + subpix_inv); + res += sample11 * w[0]; + res += sample10 * w[1]; + res += sample9 * w[2]; + res += sample8 * w[3]; + w = texture(ravu_zoom_lut2_ar, vec2(0.0, coord_y) + subpix); + cg = mat4x3(0.1 + sample0, 1.1 - sample0, 0.1 + sample1, 1.1 - sample1); + cg1 = cg; + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + hi += cg[0] * w[0] + cg[2] * w[1]; + lo += cg[1] * w[0] + cg[3] * w[1]; + cg = matrixCompMult(cg, cg1); + hi2 += cg[0] * w[0] + cg[2] * w[1]; + lo2 += cg[1] * w[0] + cg[3] * w[1]; + cg = mat4x3(0.1 + sample2, 1.1 - sample2, 0.1 + sample3, 1.1 - sample3); + cg1 = cg; + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + hi += cg[0] * w[2] + cg[2] * w[3]; + lo += cg[1] * w[2] + cg[3] * w[3]; + cg = matrixCompMult(cg, cg1); + hi2 += cg[0] * w[2] + cg[2] * w[3]; + lo2 += cg[1] * w[2] + cg[3] * w[3]; + w = texture(ravu_zoom_lut2_ar, vec2(0.5, coord_y) + subpix); + cg = mat4x3(0.1 + sample4, 1.1 - sample4, 0.1 + sample5, 1.1 - sample5); + cg1 = cg; + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + hi += cg[0] * w[0] + cg[2] * w[1]; + lo += cg[1] * w[0] + cg[3] * w[1]; + cg = matrixCompMult(cg, cg1); + hi2 += cg[0] * w[0] + cg[2] * w[1]; + lo2 += cg[1] * w[0] + cg[3] * w[1]; + cg = mat4x3(0.1 + sample6, 1.1 - sample6, 0.1 + sample7, 1.1 - sample7); + cg1 = cg; + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + hi += cg[0] * w[2] + cg[2] * w[3]; + lo += cg[1] * w[2] + cg[3] * w[3]; + cg = matrixCompMult(cg, cg1); + hi2 += cg[0] * w[2] + cg[2] * w[3]; + lo2 += cg[1] * w[2] + cg[3] * w[3]; + w = texture(ravu_zoom_lut2_ar, vec2(0.0, coord_y) + subpix_inv); + cg = mat4x3(0.1 + sample15, 1.1 - sample15, 0.1 + sample14, 1.1 - sample14); + cg1 = cg; + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + hi += cg[0] * w[0] + cg[2] * w[1]; + lo += cg[1] * w[0] + cg[3] * w[1]; + cg = matrixCompMult(cg, cg1); + hi2 += cg[0] * w[0] + cg[2] * w[1]; + lo2 += cg[1] * w[0] + cg[3] * w[1]; + cg = mat4x3(0.1 + sample13, 1.1 - sample13, 0.1 + sample12, 1.1 - sample12); + cg1 = cg; + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + hi += cg[0] * w[2] + cg[2] * w[3]; + lo += cg[1] * w[2] + cg[3] * w[3]; + cg = matrixCompMult(cg, cg1); + hi2 += cg[0] * w[2] + cg[2] * w[3]; + lo2 += cg[1] * w[2] + cg[3] * w[3]; + w = texture(ravu_zoom_lut2_ar, vec2(0.5, coord_y) + subpix_inv); + cg = mat4x3(0.1 + sample11, 1.1 - sample11, 0.1 + sample10, 1.1 - sample10); + cg1 = cg; + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + hi += cg[0] * w[0] + cg[2] * w[1]; + lo += cg[1] * w[0] + cg[3] * w[1]; + cg = matrixCompMult(cg, cg1); + hi2 += cg[0] * w[0] + cg[2] * w[1]; + lo2 += cg[1] * w[0] + cg[3] * w[1]; + cg = mat4x3(0.1 + sample9, 1.1 - sample9, 0.1 + sample8, 1.1 - sample8); + cg1 = cg; + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + hi += cg[0] * w[2] + cg[2] * w[3]; + lo += cg[1] * w[2] + cg[3] * w[3]; + cg = matrixCompMult(cg, cg1); + hi2 += cg[0] * w[2] + cg[2] * w[3]; + lo2 += cg[1] * w[2] + cg[3] * w[3]; + hi = hi2 / hi - 0.1; + lo = 1.1 - lo2 / lo; + res = mix(res, clamp(res, lo, hi), 0.800000); + imageStore(out_image, ivec2(gl_GlobalInvocationID), vec4(res, 1.0)); +} diff --git a/src/Effects/RAVU/RAVU_Zoom_AR_R3.hlsl b/src/Effects/RAVU/RAVU_Zoom_AR_R3.hlsl new file mode 100644 index 000000000..99ba60220 --- /dev/null +++ b/src/Effects/RAVU/RAVU_Zoom_AR_R3.hlsl @@ -0,0 +1,373 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-zoom.py --target luma --weights-file weights\ravu-zoom_weights-r3.py --float-format float16dx --use-compute-shader --anti-ringing 0.8 --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +// +// +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!SOURCE ravu_zoom_lut3_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_zoom_lut3; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_zoom_lut3; + +//!TEXTURE +//!SOURCE ravu_zoom_lut3_ar_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_zoom_lut3_ar; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_zoom_lut3_ar; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-Zoom-AR (luma, r3, compute) +//!IN INPUT, ravu_zoom_lut3, ravu_zoom_lut3_ar +//!OUT OUTPUT +//!BLOCK_SIZE 32, 8 +//!NUM_THREADS 32, 8 +#define LUTPOS(x, lut_size) mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x)) +shared float samples[532]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_zoom_lut3_tex(pos) (vec4(texture(ravu_zoom_lut3, pos))) + +#define ravu_zoom_lut3_ar_tex(pos) (vec4(texture(ravu_zoom_lut3_ar, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_begin = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + ivec2 group_end = group_begin + ivec2(gl_WorkGroupSize) - ivec2(1, 1); + ivec2 rectl = ivec2(floor(HOOKED_size * HOOKED_map(group_begin) - 0.5001)) - 2; + ivec2 rectr = ivec2(floor(HOOKED_size * HOOKED_map(group_end) - 0.4999)) + 3; + ivec2 rect = rectr - rectl + 1; + for (int id = int(gl_LocalInvocationIndex); id < rect.x * rect.y; + id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint y = (uint)id / rect.x, x = (uint)id % rect.x; + samples[x + y * 38] = HOOKED_tex(HOOKED_pt * (vec2(rectl + ivec2(x, y)) + vec2(0.5, 0.5))).x; + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + vec2 pos = HOOKED_size * HOOKED_map(ivec2(gl_GlobalInvocationID)); + vec2 subpix = fract(pos - 0.5); + pos -= subpix; + subpix = LUTPOS(subpix, vec2(9.0, 9.0)); + vec2 subpix_inv = 1.0 - subpix; + vec2 subpix_ar = subpix / vec2(2.0, 288.0); + vec2 subpix_inv_ar = subpix_inv / vec2(2.0, 288.0); + subpix /= vec2(5.0, 288.0); + subpix_inv /= vec2(5.0, 288.0); + ivec2 ipos = ivec2(floor(pos)) - rectl; + int lpos = ipos.x + ipos.y * 38; + float sample0 = samples[-78 + lpos]; + float sample1 = samples[-40 + lpos]; + float sample2 = samples[-2 + lpos]; + float sample3 = samples[36 + lpos]; + float sample4 = samples[74 + lpos]; + float sample5 = samples[112 + lpos]; + float sample6 = samples[-77 + lpos]; + float sample7 = samples[-39 + lpos]; + float sample8 = samples[-1 + lpos]; + float sample9 = samples[37 + lpos]; + float sample10 = samples[75 + lpos]; + float sample11 = samples[113 + lpos]; + float sample12 = samples[-76 + lpos]; + float sample13 = samples[-38 + lpos]; + float sample14 = samples[0 + lpos]; + float sample15 = samples[38 + lpos]; + float sample16 = samples[76 + lpos]; + float sample17 = samples[114 + lpos]; + float sample18 = samples[-75 + lpos]; + float sample19 = samples[-37 + lpos]; + float sample20 = samples[1 + lpos]; + float sample21 = samples[39 + lpos]; + float sample22 = samples[77 + lpos]; + float sample23 = samples[115 + lpos]; + float sample24 = samples[-74 + lpos]; + float sample25 = samples[-36 + lpos]; + float sample26 = samples[2 + lpos]; + float sample27 = samples[40 + lpos]; + float sample28 = samples[78 + lpos]; + float sample29 = samples[116 + lpos]; + float sample30 = samples[-73 + lpos]; + float sample31 = samples[-35 + lpos]; + float sample32 = samples[3 + lpos]; + float sample33 = samples[41 + lpos]; + float sample34 = samples[79 + lpos]; + float sample35 = samples[117 + lpos]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (sample13 - sample1) / 2.0; + gy = (sample8 - sample6) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (sample14 - sample2) / 2.0; + gy = (-sample10 + 8.0 * sample9 - 8.0 * sample7 + sample6) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample15 - sample3) / 2.0; + gy = (-sample11 + 8.0 * sample10 - 8.0 * sample8 + sample7) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample16 - sample4) / 2.0; + gy = (sample11 - sample9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (-sample25 + 8.0 * sample19 - 8.0 * sample7 + sample1) / 12.0; + gy = (sample14 - sample12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-sample26 + 8.0 * sample20 - 8.0 * sample8 + sample2) / 12.0; + gy = (-sample16 + 8.0 * sample15 - 8.0 * sample13 + sample12) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-sample27 + 8.0 * sample21 - 8.0 * sample9 + sample3) / 12.0; + gy = (-sample17 + 8.0 * sample16 - 8.0 * sample14 + sample13) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-sample28 + 8.0 * sample22 - 8.0 * sample10 + sample4) / 12.0; + gy = (sample17 - sample15) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-sample31 + 8.0 * sample25 - 8.0 * sample13 + sample7) / 12.0; + gy = (sample20 - sample18) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-sample32 + 8.0 * sample26 - 8.0 * sample14 + sample8) / 12.0; + gy = (-sample22 + 8.0 * sample21 - 8.0 * sample19 + sample18) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-sample33 + 8.0 * sample27 - 8.0 * sample15 + sample9) / 12.0; + gy = (-sample23 + 8.0 * sample22 - 8.0 * sample20 + sample19) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-sample34 + 8.0 * sample28 - 8.0 * sample16 + sample10) / 12.0; + gy = (sample23 - sample21) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample31 - sample19) / 2.0; + gy = (sample26 - sample24) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (sample32 - sample20) / 2.0; + gy = (-sample28 + 8.0 * sample27 - 8.0 * sample25 + sample24) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample33 - sample21) / 2.0; + gy = (-sample29 + 8.0 * sample28 - 8.0 * sample26 + sample25) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample34 - sample22) / 2.0; + gy = (sample29 - sample27) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence) / 288.0; + float res = 0.0; + vec4 w; + vec4 cg, cg1; + float lo = 0.0, hi = 0.0; + float lo2 = 0.0, hi2 = 0.0; + w = texture(ravu_zoom_lut3, vec2(0.0, coord_y) + subpix); + res += sample0 * w[0]; + res += sample1 * w[1]; + res += sample2 * w[2]; + res += sample3 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.2, coord_y) + subpix); + res += sample4 * w[0]; + res += sample5 * w[1]; + res += sample6 * w[2]; + res += sample7 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.4, coord_y) + subpix); + res += sample8 * w[0]; + res += sample9 * w[1]; + res += sample10 * w[2]; + res += sample11 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.6, coord_y) + subpix); + res += sample12 * w[0]; + res += sample13 * w[1]; + res += sample14 * w[2]; + res += sample15 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.8, coord_y) + subpix); + res += sample16 * w[0]; + res += sample17 * w[1]; + w = texture(ravu_zoom_lut3, vec2(0.0, coord_y) + subpix_inv); + res += sample35 * w[0]; + res += sample34 * w[1]; + res += sample33 * w[2]; + res += sample32 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.2, coord_y) + subpix_inv); + res += sample31 * w[0]; + res += sample30 * w[1]; + res += sample29 * w[2]; + res += sample28 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.4, coord_y) + subpix_inv); + res += sample27 * w[0]; + res += sample26 * w[1]; + res += sample25 * w[2]; + res += sample24 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.6, coord_y) + subpix_inv); + res += sample23 * w[0]; + res += sample22 * w[1]; + res += sample21 * w[2]; + res += sample20 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.8, coord_y) + subpix_inv); + res += sample19 * w[0]; + res += sample18 * w[1]; + w = texture(ravu_zoom_lut3_ar, vec2(0.0, coord_y) + subpix_ar); + cg = vec4(0.1 + sample7, 1.1 - sample7, 0.1 + sample8, 1.1 - sample8); + cg1 = cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + hi += cg[0] * w[0] + cg[2] * w[1]; + lo += cg[1] * w[0] + cg[3] * w[1]; + cg *= cg1; + hi2 += cg[0] * w[0] + cg[2] * w[1]; + lo2 += cg[1] * w[0] + cg[3] * w[1]; + cg = vec4(0.1 + sample9, 1.1 - sample9, 0.1 + sample10, 1.1 - sample10); + cg1 = cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + hi += cg[0] * w[2] + cg[2] * w[3]; + lo += cg[1] * w[2] + cg[3] * w[3]; + cg *= cg1; + hi2 += cg[0] * w[2] + cg[2] * w[3]; + lo2 += cg[1] * w[2] + cg[3] * w[3]; + w = texture(ravu_zoom_lut3_ar, vec2(0.5, coord_y) + subpix_ar); + cg = vec4(0.1 + sample13, 1.1 - sample13, 0.1 + sample14, 1.1 - sample14); + cg1 = cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + hi += cg[0] * w[0] + cg[2] * w[1]; + lo += cg[1] * w[0] + cg[3] * w[1]; + cg *= cg1; + hi2 += cg[0] * w[0] + cg[2] * w[1]; + lo2 += cg[1] * w[0] + cg[3] * w[1]; + cg = vec4(0.1 + sample15, 1.1 - sample15, 0.1 + sample16, 1.1 - sample16); + cg1 = cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + hi += cg[0] * w[2] + cg[2] * w[3]; + lo += cg[1] * w[2] + cg[3] * w[3]; + cg *= cg1; + hi2 += cg[0] * w[2] + cg[2] * w[3]; + lo2 += cg[1] * w[2] + cg[3] * w[3]; + w = texture(ravu_zoom_lut3_ar, vec2(0.0, coord_y) + subpix_inv_ar); + cg = vec4(0.1 + sample28, 1.1 - sample28, 0.1 + sample27, 1.1 - sample27); + cg1 = cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + hi += cg[0] * w[0] + cg[2] * w[1]; + lo += cg[1] * w[0] + cg[3] * w[1]; + cg *= cg1; + hi2 += cg[0] * w[0] + cg[2] * w[1]; + lo2 += cg[1] * w[0] + cg[3] * w[1]; + cg = vec4(0.1 + sample26, 1.1 - sample26, 0.1 + sample25, 1.1 - sample25); + cg1 = cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + hi += cg[0] * w[2] + cg[2] * w[3]; + lo += cg[1] * w[2] + cg[3] * w[3]; + cg *= cg1; + hi2 += cg[0] * w[2] + cg[2] * w[3]; + lo2 += cg[1] * w[2] + cg[3] * w[3]; + w = texture(ravu_zoom_lut3_ar, vec2(0.5, coord_y) + subpix_inv_ar); + cg = vec4(0.1 + sample22, 1.1 - sample22, 0.1 + sample21, 1.1 - sample21); + cg1 = cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + hi += cg[0] * w[0] + cg[2] * w[1]; + lo += cg[1] * w[0] + cg[3] * w[1]; + cg *= cg1; + hi2 += cg[0] * w[0] + cg[2] * w[1]; + lo2 += cg[1] * w[0] + cg[3] * w[1]; + cg = vec4(0.1 + sample20, 1.1 - sample20, 0.1 + sample19, 1.1 - sample19); + cg1 = cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + cg *= cg; + hi += cg[0] * w[2] + cg[2] * w[3]; + lo += cg[1] * w[2] + cg[3] * w[3]; + cg *= cg1; + hi2 += cg[0] * w[2] + cg[2] * w[3]; + lo2 += cg[1] * w[2] + cg[3] * w[3]; + hi = hi2 / hi - 0.1; + lo = 1.1 - lo2 / lo; + res = mix(res, clamp(res, lo, hi), 0.800000); + imageStore(out_image, ivec2(gl_GlobalInvocationID), res); +} diff --git a/src/Effects/RAVU/RAVU_Zoom_AR_R3_RGB.hlsl b/src/Effects/RAVU/RAVU_Zoom_AR_R3_RGB.hlsl new file mode 100644 index 000000000..6cfe46697 --- /dev/null +++ b/src/Effects/RAVU/RAVU_Zoom_AR_R3_RGB.hlsl @@ -0,0 +1,399 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-zoom.py --target rgb --weights-file weights\ravu-zoom_weights-r3.py --float-format float16dx --use-compute-shader --anti-ringing 0.8 --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +// +// +Texture2D OUTPUT; + +//!TEXTURE +//!SOURCE ravu_zoom_lut3_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_zoom_lut3; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_zoom_lut3; + +//!TEXTURE +//!SOURCE ravu_zoom_lut3_ar_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_zoom_lut3_ar; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_zoom_lut3_ar; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-Zoom-AR (rgb, r3, compute) +//!IN INPUT, ravu_zoom_lut3, ravu_zoom_lut3_ar +//!OUT OUTPUT +//!BLOCK_SIZE 32, 8 +//!NUM_THREADS 32, 8 +static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722); +#define LUTPOS(x, lut_size) mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x)) +shared vec3 samples[532]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) x +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val) +void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_zoom_lut3_tex(pos) (vec4(texture(ravu_zoom_lut3, pos))) + +#define ravu_zoom_lut3_ar_tex(pos) (vec4(texture(ravu_zoom_lut3_ar, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_begin = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + ivec2 group_end = group_begin + ivec2(gl_WorkGroupSize) - ivec2(1, 1); + ivec2 rectl = ivec2(floor(HOOKED_size * HOOKED_map(group_begin) - 0.5001)) - 2; + ivec2 rectr = ivec2(floor(HOOKED_size * HOOKED_map(group_end) - 0.4999)) + 3; + ivec2 rect = rectr - rectl + 1; + for (int id = int(gl_LocalInvocationIndex); id < rect.x * rect.y; + id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint y = (uint)id / rect.x, x = (uint)id % rect.x; + samples[x + y * 38] = HOOKED_tex(HOOKED_pt * (vec2(rectl + ivec2(x, y)) + vec2(0.5, 0.5))).xyz; + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + vec2 pos = HOOKED_size * HOOKED_map(ivec2(gl_GlobalInvocationID)); + vec2 subpix = fract(pos - 0.5); + pos -= subpix; + subpix = LUTPOS(subpix, vec2(9.0, 9.0)); + vec2 subpix_inv = 1.0 - subpix; + vec2 subpix_ar = subpix / vec2(2.0, 288.0); + vec2 subpix_inv_ar = subpix_inv / vec2(2.0, 288.0); + subpix /= vec2(5.0, 288.0); + subpix_inv /= vec2(5.0, 288.0); + ivec2 ipos = ivec2(floor(pos)) - rectl; + int lpos = ipos.x + ipos.y * 38; + vec3 sample0 = samples[-78 + lpos]; + vec3 sample1 = samples[-40 + lpos]; + vec3 sample2 = samples[-2 + lpos]; + vec3 sample3 = samples[36 + lpos]; + vec3 sample4 = samples[74 + lpos]; + vec3 sample5 = samples[112 + lpos]; + vec3 sample6 = samples[-77 + lpos]; + vec3 sample7 = samples[-39 + lpos]; + vec3 sample8 = samples[-1 + lpos]; + vec3 sample9 = samples[37 + lpos]; + vec3 sample10 = samples[75 + lpos]; + vec3 sample11 = samples[113 + lpos]; + vec3 sample12 = samples[-76 + lpos]; + vec3 sample13 = samples[-38 + lpos]; + vec3 sample14 = samples[0 + lpos]; + vec3 sample15 = samples[38 + lpos]; + vec3 sample16 = samples[76 + lpos]; + vec3 sample17 = samples[114 + lpos]; + vec3 sample18 = samples[-75 + lpos]; + vec3 sample19 = samples[-37 + lpos]; + vec3 sample20 = samples[1 + lpos]; + vec3 sample21 = samples[39 + lpos]; + vec3 sample22 = samples[77 + lpos]; + vec3 sample23 = samples[115 + lpos]; + vec3 sample24 = samples[-74 + lpos]; + vec3 sample25 = samples[-36 + lpos]; + vec3 sample26 = samples[2 + lpos]; + vec3 sample27 = samples[40 + lpos]; + vec3 sample28 = samples[78 + lpos]; + vec3 sample29 = samples[116 + lpos]; + vec3 sample30 = samples[-73 + lpos]; + vec3 sample31 = samples[-35 + lpos]; + vec3 sample32 = samples[3 + lpos]; + vec3 sample33 = samples[41 + lpos]; + vec3 sample34 = samples[79 + lpos]; + vec3 sample35 = samples[117 + lpos]; + float luma1 = dot(sample1, color_primary); + float luma2 = dot(sample2, color_primary); + float luma3 = dot(sample3, color_primary); + float luma4 = dot(sample4, color_primary); + float luma6 = dot(sample6, color_primary); + float luma7 = dot(sample7, color_primary); + float luma8 = dot(sample8, color_primary); + float luma9 = dot(sample9, color_primary); + float luma10 = dot(sample10, color_primary); + float luma11 = dot(sample11, color_primary); + float luma12 = dot(sample12, color_primary); + float luma13 = dot(sample13, color_primary); + float luma14 = dot(sample14, color_primary); + float luma15 = dot(sample15, color_primary); + float luma16 = dot(sample16, color_primary); + float luma17 = dot(sample17, color_primary); + float luma18 = dot(sample18, color_primary); + float luma19 = dot(sample19, color_primary); + float luma20 = dot(sample20, color_primary); + float luma21 = dot(sample21, color_primary); + float luma22 = dot(sample22, color_primary); + float luma23 = dot(sample23, color_primary); + float luma24 = dot(sample24, color_primary); + float luma25 = dot(sample25, color_primary); + float luma26 = dot(sample26, color_primary); + float luma27 = dot(sample27, color_primary); + float luma28 = dot(sample28, color_primary); + float luma29 = dot(sample29, color_primary); + float luma31 = dot(sample31, color_primary); + float luma32 = dot(sample32, color_primary); + float luma33 = dot(sample33, color_primary); + float luma34 = dot(sample34, color_primary); + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma13 - luma1) / 2.0; + gy = (luma8 - luma6) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma14 - luma2) / 2.0; + gy = (-luma10 + 8.0 * luma9 - 8.0 * luma7 + luma6) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma15 - luma3) / 2.0; + gy = (-luma11 + 8.0 * luma10 - 8.0 * luma8 + luma7) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma16 - luma4) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (-luma25 + 8.0 * luma19 - 8.0 * luma7 + luma1) / 12.0; + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma26 + 8.0 * luma20 - 8.0 * luma8 + luma2) / 12.0; + gy = (-luma16 + 8.0 * luma15 - 8.0 * luma13 + luma12) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma27 + 8.0 * luma21 - 8.0 * luma9 + luma3) / 12.0; + gy = (-luma17 + 8.0 * luma16 - 8.0 * luma14 + luma13) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma28 + 8.0 * luma22 - 8.0 * luma10 + luma4) / 12.0; + gy = (luma17 - luma15) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma31 + 8.0 * luma25 - 8.0 * luma13 + luma7) / 12.0; + gy = (luma20 - luma18) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma32 + 8.0 * luma26 - 8.0 * luma14 + luma8) / 12.0; + gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma33 + 8.0 * luma27 - 8.0 * luma15 + luma9) / 12.0; + gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma34 + 8.0 * luma28 - 8.0 * luma16 + luma10) / 12.0; + gy = (luma23 - luma21) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma31 - luma19) / 2.0; + gy = (luma26 - luma24) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma32 - luma20) / 2.0; + gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma33 - luma21) / 2.0; + gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma34 - luma22) / 2.0; + gy = (luma29 - luma27) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence) / 288.0; + vec3 res = vec3(0.0, 0.0, 0.0); + vec4 w; + mat4x3 cg, cg1; + vec3 lo = vec3(0.0, 0.0, 0.0), hi = vec3(0.0, 0.0, 0.0); + vec3 lo2 = vec3(0.0, 0.0, 0.0), hi2 = vec3(0.0, 0.0, 0.0); + w = texture(ravu_zoom_lut3, vec2(0.0, coord_y) + subpix); + res += sample0 * w[0]; + res += sample1 * w[1]; + res += sample2 * w[2]; + res += sample3 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.2, coord_y) + subpix); + res += sample4 * w[0]; + res += sample5 * w[1]; + res += sample6 * w[2]; + res += sample7 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.4, coord_y) + subpix); + res += sample8 * w[0]; + res += sample9 * w[1]; + res += sample10 * w[2]; + res += sample11 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.6, coord_y) + subpix); + res += sample12 * w[0]; + res += sample13 * w[1]; + res += sample14 * w[2]; + res += sample15 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.8, coord_y) + subpix); + res += sample16 * w[0]; + res += sample17 * w[1]; + w = texture(ravu_zoom_lut3, vec2(0.0, coord_y) + subpix_inv); + res += sample35 * w[0]; + res += sample34 * w[1]; + res += sample33 * w[2]; + res += sample32 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.2, coord_y) + subpix_inv); + res += sample31 * w[0]; + res += sample30 * w[1]; + res += sample29 * w[2]; + res += sample28 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.4, coord_y) + subpix_inv); + res += sample27 * w[0]; + res += sample26 * w[1]; + res += sample25 * w[2]; + res += sample24 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.6, coord_y) + subpix_inv); + res += sample23 * w[0]; + res += sample22 * w[1]; + res += sample21 * w[2]; + res += sample20 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.8, coord_y) + subpix_inv); + res += sample19 * w[0]; + res += sample18 * w[1]; + w = texture(ravu_zoom_lut3_ar, vec2(0.0, coord_y) + subpix_ar); + cg = mat4x3(0.1 + sample7, 1.1 - sample7, 0.1 + sample8, 1.1 - sample8); + cg1 = cg; + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + hi += cg[0] * w[0] + cg[2] * w[1]; + lo += cg[1] * w[0] + cg[3] * w[1]; + cg = matrixCompMult(cg, cg1); + hi2 += cg[0] * w[0] + cg[2] * w[1]; + lo2 += cg[1] * w[0] + cg[3] * w[1]; + cg = mat4x3(0.1 + sample9, 1.1 - sample9, 0.1 + sample10, 1.1 - sample10); + cg1 = cg; + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + hi += cg[0] * w[2] + cg[2] * w[3]; + lo += cg[1] * w[2] + cg[3] * w[3]; + cg = matrixCompMult(cg, cg1); + hi2 += cg[0] * w[2] + cg[2] * w[3]; + lo2 += cg[1] * w[2] + cg[3] * w[3]; + w = texture(ravu_zoom_lut3_ar, vec2(0.5, coord_y) + subpix_ar); + cg = mat4x3(0.1 + sample13, 1.1 - sample13, 0.1 + sample14, 1.1 - sample14); + cg1 = cg; + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + hi += cg[0] * w[0] + cg[2] * w[1]; + lo += cg[1] * w[0] + cg[3] * w[1]; + cg = matrixCompMult(cg, cg1); + hi2 += cg[0] * w[0] + cg[2] * w[1]; + lo2 += cg[1] * w[0] + cg[3] * w[1]; + cg = mat4x3(0.1 + sample15, 1.1 - sample15, 0.1 + sample16, 1.1 - sample16); + cg1 = cg; + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + hi += cg[0] * w[2] + cg[2] * w[3]; + lo += cg[1] * w[2] + cg[3] * w[3]; + cg = matrixCompMult(cg, cg1); + hi2 += cg[0] * w[2] + cg[2] * w[3]; + lo2 += cg[1] * w[2] + cg[3] * w[3]; + w = texture(ravu_zoom_lut3_ar, vec2(0.0, coord_y) + subpix_inv_ar); + cg = mat4x3(0.1 + sample28, 1.1 - sample28, 0.1 + sample27, 1.1 - sample27); + cg1 = cg; + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + hi += cg[0] * w[0] + cg[2] * w[1]; + lo += cg[1] * w[0] + cg[3] * w[1]; + cg = matrixCompMult(cg, cg1); + hi2 += cg[0] * w[0] + cg[2] * w[1]; + lo2 += cg[1] * w[0] + cg[3] * w[1]; + cg = mat4x3(0.1 + sample26, 1.1 - sample26, 0.1 + sample25, 1.1 - sample25); + cg1 = cg; + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + hi += cg[0] * w[2] + cg[2] * w[3]; + lo += cg[1] * w[2] + cg[3] * w[3]; + cg = matrixCompMult(cg, cg1); + hi2 += cg[0] * w[2] + cg[2] * w[3]; + lo2 += cg[1] * w[2] + cg[3] * w[3]; + w = texture(ravu_zoom_lut3_ar, vec2(0.5, coord_y) + subpix_inv_ar); + cg = mat4x3(0.1 + sample22, 1.1 - sample22, 0.1 + sample21, 1.1 - sample21); + cg1 = cg; + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + hi += cg[0] * w[0] + cg[2] * w[1]; + lo += cg[1] * w[0] + cg[3] * w[1]; + cg = matrixCompMult(cg, cg1); + hi2 += cg[0] * w[0] + cg[2] * w[1]; + lo2 += cg[1] * w[0] + cg[3] * w[1]; + cg = mat4x3(0.1 + sample20, 1.1 - sample20, 0.1 + sample19, 1.1 - sample19); + cg1 = cg; + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + cg = matrixCompMult(cg, cg); + hi += cg[0] * w[2] + cg[2] * w[3]; + lo += cg[1] * w[2] + cg[3] * w[3]; + cg = matrixCompMult(cg, cg1); + hi2 += cg[0] * w[2] + cg[2] * w[3]; + lo2 += cg[1] * w[2] + cg[3] * w[3]; + hi = hi2 / hi - 0.1; + lo = 1.1 - lo2 / lo; + res = mix(res, clamp(res, lo, hi), 0.800000); + imageStore(out_image, ivec2(gl_GlobalInvocationID), vec4(res, 1.0)); +} diff --git a/src/Effects/RAVU/RAVU_Zoom_R2.hlsl b/src/Effects/RAVU/RAVU_Zoom_R2.hlsl new file mode 100644 index 000000000..408a39da0 --- /dev/null +++ b/src/Effects/RAVU/RAVU_Zoom_R2.hlsl @@ -0,0 +1,209 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-zoom.py --target luma --weights-file weights\ravu-zoom_weights-r2.py --float-format float16dx --use-compute-shader --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +// +// +Texture2D OUTPUT; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_INPUT_LINEAR; + +//!TEXTURE +//!SOURCE ravu_zoom_lut2_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_zoom_lut2; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_zoom_lut2; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-Zoom (luma, r2, compute) +//!IN INPUT, ravu_zoom_lut2 +//!OUT OUTPUT +//!BLOCK_SIZE 32, 8 +//!NUM_THREADS 32, 8 +#define LUTPOS(x, lut_size) mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x)) +shared float samples[432]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); +} + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_zoom_lut2_tex(pos) (vec4(texture(ravu_zoom_lut2, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_begin = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + ivec2 group_end = group_begin + ivec2(gl_WorkGroupSize) - ivec2(1, 1); + ivec2 rectl = ivec2(floor(HOOKED_size * HOOKED_map(group_begin) - 0.5001)) - 1; + ivec2 rectr = ivec2(floor(HOOKED_size * HOOKED_map(group_end) - 0.4999)) + 2; + ivec2 rect = rectr - rectl + 1; + for (int id = int(gl_LocalInvocationIndex); id < rect.x * rect.y; + id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint y = (uint)id / rect.x, x = (uint)id % rect.x; + samples[x + y * 36] = HOOKED_tex(HOOKED_pt * (vec2(rectl + ivec2(x, y)) + vec2(0.5, 0.5))).x; + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + vec2 pos = HOOKED_size * HOOKED_map(ivec2(gl_GlobalInvocationID)); + vec2 subpix = fract(pos - 0.5); + pos -= subpix; + subpix = LUTPOS(subpix, vec2(9.0, 9.0)); + vec2 subpix_inv = 1.0 - subpix; + subpix /= vec2(2.0, 288.0); + subpix_inv /= vec2(2.0, 288.0); + ivec2 ipos = ivec2(floor(pos)) - rectl; + int lpos = ipos.x + ipos.y * 36; + float sample0 = samples[-37 + lpos]; + float sample1 = samples[-1 + lpos]; + float sample2 = samples[35 + lpos]; + float sample3 = samples[71 + lpos]; + float sample4 = samples[-36 + lpos]; + float sample5 = samples[0 + lpos]; + float sample6 = samples[36 + lpos]; + float sample7 = samples[72 + lpos]; + float sample8 = samples[-35 + lpos]; + float sample9 = samples[1 + lpos]; + float sample10 = samples[37 + lpos]; + float sample11 = samples[73 + lpos]; + float sample12 = samples[-34 + lpos]; + float sample13 = samples[2 + lpos]; + float sample14 = samples[38 + lpos]; + float sample15 = samples[74 + lpos]; + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (sample4 - sample0); + gy = (sample1 - sample0); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (sample5 - sample1); + gy = (sample2 - sample0) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample6 - sample2); + gy = (sample3 - sample1) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample7 - sample3); + gy = (sample3 - sample2); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (sample8 - sample0) / 2.0; + gy = (sample5 - sample4); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample9 - sample1) / 2.0; + gy = (sample6 - sample4) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (sample10 - sample2) / 2.0; + gy = (sample7 - sample5) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (sample11 - sample3) / 2.0; + gy = (sample7 - sample6); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample12 - sample4) / 2.0; + gy = (sample9 - sample8); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample13 - sample5) / 2.0; + gy = (sample10 - sample8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (sample14 - sample6) / 2.0; + gy = (sample11 - sample9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (sample15 - sample7) / 2.0; + gy = (sample11 - sample10); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample12 - sample8); + gy = (sample13 - sample12); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (sample13 - sample9); + gy = (sample14 - sample12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample14 - sample10); + gy = (sample15 - sample13) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (sample15 - sample11); + gy = (sample15 - sample14); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence) / 288.0; + float res = 0.0; + vec4 w; + w = texture(ravu_zoom_lut2, vec2(0.0, coord_y) + subpix); + res += sample0 * w[0]; + res += sample1 * w[1]; + res += sample2 * w[2]; + res += sample3 * w[3]; + w = texture(ravu_zoom_lut2, vec2(0.5, coord_y) + subpix); + res += sample4 * w[0]; + res += sample5 * w[1]; + res += sample6 * w[2]; + res += sample7 * w[3]; + w = texture(ravu_zoom_lut2, vec2(0.0, coord_y) + subpix_inv); + res += sample15 * w[0]; + res += sample14 * w[1]; + res += sample13 * w[2]; + res += sample12 * w[3]; + w = texture(ravu_zoom_lut2, vec2(0.5, coord_y) + subpix_inv); + res += sample11 * w[0]; + res += sample10 * w[1]; + res += sample9 * w[2]; + res += sample8 * w[3]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID), res); +} diff --git a/src/Effects/RAVU/RAVU_Zoom_R2_RGB.hlsl b/src/Effects/RAVU/RAVU_Zoom_R2_RGB.hlsl new file mode 100644 index 000000000..9f9cea73f --- /dev/null +++ b/src/Effects/RAVU/RAVU_Zoom_R2_RGB.hlsl @@ -0,0 +1,219 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-zoom.py --target rgb --weights-file weights\ravu-zoom_weights-r2.py --float-format float16dx --use-compute-shader --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +// +// +Texture2D OUTPUT; + +//!TEXTURE +//!SOURCE ravu_zoom_lut2_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_zoom_lut2; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_zoom_lut2; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-Zoom (rgb, r2, compute) +//!IN INPUT, ravu_zoom_lut2 +//!OUT OUTPUT +//!BLOCK_SIZE 32, 8 +//!NUM_THREADS 32, 8 +static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722); +#define LUTPOS(x, lut_size) mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x)) +shared vec3 samples[432]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) x +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val) +void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_zoom_lut2_tex(pos) (vec4(texture(ravu_zoom_lut2, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_begin = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + ivec2 group_end = group_begin + ivec2(gl_WorkGroupSize) - ivec2(1, 1); + ivec2 rectl = ivec2(floor(HOOKED_size * HOOKED_map(group_begin) - 0.5001)) - 1; + ivec2 rectr = ivec2(floor(HOOKED_size * HOOKED_map(group_end) - 0.4999)) + 2; + ivec2 rect = rectr - rectl + 1; + for (int id = int(gl_LocalInvocationIndex); id < rect.x * rect.y; + id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint y = (uint)id / rect.x, x = (uint)id % rect.x; + samples[x + y * 36] = HOOKED_tex(HOOKED_pt * (vec2(rectl + ivec2(x, y)) + vec2(0.5, 0.5))).xyz; + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + vec2 pos = HOOKED_size * HOOKED_map(ivec2(gl_GlobalInvocationID)); + vec2 subpix = fract(pos - 0.5); + pos -= subpix; + subpix = LUTPOS(subpix, vec2(9.0, 9.0)); + vec2 subpix_inv = 1.0 - subpix; + subpix /= vec2(2.0, 288.0); + subpix_inv /= vec2(2.0, 288.0); + ivec2 ipos = ivec2(floor(pos)) - rectl; + int lpos = ipos.x + ipos.y * 36; + vec3 sample0 = samples[-37 + lpos]; + vec3 sample1 = samples[-1 + lpos]; + vec3 sample2 = samples[35 + lpos]; + vec3 sample3 = samples[71 + lpos]; + vec3 sample4 = samples[-36 + lpos]; + vec3 sample5 = samples[0 + lpos]; + vec3 sample6 = samples[36 + lpos]; + vec3 sample7 = samples[72 + lpos]; + vec3 sample8 = samples[-35 + lpos]; + vec3 sample9 = samples[1 + lpos]; + vec3 sample10 = samples[37 + lpos]; + vec3 sample11 = samples[73 + lpos]; + vec3 sample12 = samples[-34 + lpos]; + vec3 sample13 = samples[2 + lpos]; + vec3 sample14 = samples[38 + lpos]; + vec3 sample15 = samples[74 + lpos]; + float luma0 = dot(sample0, color_primary); + float luma1 = dot(sample1, color_primary); + float luma2 = dot(sample2, color_primary); + float luma3 = dot(sample3, color_primary); + float luma4 = dot(sample4, color_primary); + float luma5 = dot(sample5, color_primary); + float luma6 = dot(sample6, color_primary); + float luma7 = dot(sample7, color_primary); + float luma8 = dot(sample8, color_primary); + float luma9 = dot(sample9, color_primary); + float luma10 = dot(sample10, color_primary); + float luma11 = dot(sample11, color_primary); + float luma12 = dot(sample12, color_primary); + float luma13 = dot(sample13, color_primary); + float luma14 = dot(sample14, color_primary); + float luma15 = dot(sample15, color_primary); + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma4 - luma0); + gy = (luma1 - luma0); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma5 - luma1); + gy = (luma2 - luma0) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma6 - luma2); + gy = (luma3 - luma1) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma7 - luma3); + gy = (luma3 - luma2); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma8 - luma0) / 2.0; + gy = (luma5 - luma4); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma9 - luma1) / 2.0; + gy = (luma6 - luma4) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma10 - luma2) / 2.0; + gy = (luma7 - luma5) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma11 - luma3) / 2.0; + gy = (luma7 - luma6); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma12 - luma4) / 2.0; + gy = (luma9 - luma8); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma13 - luma5) / 2.0; + gy = (luma10 - luma8) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma14 - luma6) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (luma15 - luma7) / 2.0; + gy = (luma11 - luma10); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma12 - luma8); + gy = (luma13 - luma12); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma13 - luma9); + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma14 - luma10); + gy = (luma15 - luma13) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma15 - luma11); + gy = (luma15 - luma14); + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence) / 288.0; + vec3 res = vec3(0.0, 0.0, 0.0); + vec4 w; + w = texture(ravu_zoom_lut2, vec2(0.0, coord_y) + subpix); + res += sample0 * w[0]; + res += sample1 * w[1]; + res += sample2 * w[2]; + res += sample3 * w[3]; + w = texture(ravu_zoom_lut2, vec2(0.5, coord_y) + subpix); + res += sample4 * w[0]; + res += sample5 * w[1]; + res += sample6 * w[2]; + res += sample7 * w[3]; + w = texture(ravu_zoom_lut2, vec2(0.0, coord_y) + subpix_inv); + res += sample15 * w[0]; + res += sample14 * w[1]; + res += sample13 * w[2]; + res += sample12 * w[3]; + w = texture(ravu_zoom_lut2, vec2(0.5, coord_y) + subpix_inv); + res += sample11 * w[0]; + res += sample10 * w[1]; + res += sample9 * w[2]; + res += sample8 * w[3]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID), vec4(res, 1.0)); +} diff --git a/src/Effects/RAVU/RAVU_Zoom_R3.hlsl b/src/Effects/RAVU/RAVU_Zoom_R3.hlsl index ef70f5158..8a9b9b8c0 100644 --- a/src/Effects/RAVU/RAVU_Zoom_R3.hlsl +++ b/src/Effects/RAVU/RAVU_Zoom_R3.hlsl @@ -1,242 +1,255 @@ -// 移植自 https://raw.githubusercontent.com/bjin/mpv-prescalers/master/compute/ravu-zoom-r3.hook +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-zoom.py --target luma --weights-file weights\ravu-zoom_weights-r3.py --float-format float16dx --use-compute-shader --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . //!MAGPIE EFFECT -//!VERSION 3 - +//!VERSION 4 //!TEXTURE Texture2D INPUT; -//!TEXTURE -//!SOURCE RAVU_Zoom_R3_Weights.dds -//!FORMAT R16G16B16A16_FLOAT -Texture2D ravu_zoom_lut3; - - //!SAMPLER //!FILTER POINT -SamplerState sam; +SamplerState sam_INPUT; + +//!TEXTURE +// +// +Texture2D OUTPUT; //!SAMPLER //!FILTER LINEAR -SamplerState sam1; +SamplerState sam_INPUT_LINEAR; +//!TEXTURE +//!SOURCE ravu_zoom_lut3_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_zoom_lut3; -//!PASS 1 -//!IN INPUT, ravu_zoom_lut3 -//!BLOCK_SIZE 16, 16 -//!NUM_THREADS 16, 16 +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_zoom_lut3; -#define NUM_PIXELS_X (MP_BLOCK_WIDTH + 5) -#define NUM_PIXELS_Y (MP_BLOCK_HEIGHT + 5) +//!COMMON +#include "prescalers.hlsli" -groupshared float samples[NUM_PIXELS_X * NUM_PIXELS_Y]; +#define LAST_PASS 1 -float GetLuma(float3 color) { - return dot(float3(0.299f, 0.587f, 0.114f), color); +//!PASS 1 +//!DESC RAVU-Zoom (luma, r3, compute) +//!IN INPUT, ravu_zoom_lut3 +//!OUT OUTPUT +//!BLOCK_SIZE 32, 8 +//!NUM_THREADS 32, 8 +#define LUTPOS(x, lut_size) mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x)) +shared float samples[532]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) dot(x.rgb, rgb2y) +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x) +void imageStoreOverride(uint2 pos, float value) { + float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb); + OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0); } -#define PI 3.1415926535897932384626433832795 - -// https://github.com/mpv-player/mpv/issues/9390#issuecomment-961082863 -#define LUT_POS(x, lut_size) lerp(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x)) +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); -const static float3x3 yuv2rgb = { - 1, -0.00093, 1.401687, - 1, -0.3437, -0.71417, - 1, 1.77216, 0.00099 -}; - -const static float2x3 rgb2uv = { - -0.169, -0.331, 0.5, - 0.5, -0.419, -0.081 -}; - -float mod(float x, float y) { - return x - y * floor(x / y); -} +#define ravu_zoom_lut3_tex(pos) (vec4(texture(ravu_zoom_lut3, pos))) +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt void Pass1(uint2 blockStart, uint3 threadId) { - const float2 inputPt = GetInputPt(); - const uint2 inputSize = GetInputSize(); - const float2 rcpScale = rcp(GetScale()); - - const int2 rectl = floor(blockStart * rcpScale - 0.5f) - 2; - const int2 rectr = floor((blockStart + uint2(MP_BLOCK_WIDTH, MP_BLOCK_HEIGHT)) * rcpScale - 0.5f) + 3; - const uint2 rect = uint2(rectr - rectl + 1); - - const int maxId = int(rect.x * rect.y); - - for (int id = int(threadId.y * MP_NUM_THREADS_X + threadId.x); id < maxId; id += MP_NUM_THREADS_X * MP_NUM_THREADS_Y) { + ivec2 group_begin = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + ivec2 group_end = group_begin + ivec2(gl_WorkGroupSize) - ivec2(1, 1); + ivec2 rectl = ivec2(floor(HOOKED_size * HOOKED_map(group_begin) - 0.5001)) - 2; + ivec2 rectr = ivec2(floor(HOOKED_size * HOOKED_map(group_end) - 0.4999)) + 3; + ivec2 rect = rectr - rectl + 1; + for (int id = int(gl_LocalInvocationIndex); id < rect.x * rect.y; + id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { uint y = (uint)id / rect.x, x = (uint)id % rect.x; - samples[x + y * NUM_PIXELS_X] = GetLuma(INPUT.SampleLevel(sam, inputPt * (rectl + uint2(x, y) + 0.5f), 0).rgb); + samples[x + y * 38] = HOOKED_tex(HOOKED_pt * (vec2(rectl + ivec2(x, y)) + vec2(0.5, 0.5))).x; } - - GroupMemoryBarrierWithGroupSync(); - + barrier(); +#if CURRENT_PASS == LAST_PASS uint2 destPos = blockStart + threadId.xy; - if (!CheckViewport(destPos)) { + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { return; } - - float2 pos = (destPos + 0.5f) * rcpScale; - float2 subpix = frac(pos - 0.5f); +#endif + vec2 pos = HOOKED_size * HOOKED_map(ivec2(gl_GlobalInvocationID)); + vec2 subpix = fract(pos - 0.5); pos -= subpix; - subpix = LUT_POS(subpix, 9); - float2 subpix_inv = 1.0 - subpix; - subpix /= float2(5.0, 288.0); - subpix_inv /= float2(5.0, 288.0); - uint2 ipos = uint2(floor(pos) - rectl); - uint lpos = ipos.x + ipos.y * NUM_PIXELS_X; - float sample0 = samples[lpos - 2 * NUM_PIXELS_X - 2]; - float sample1 = samples[lpos - NUM_PIXELS_X - 2]; - float sample2 = samples[lpos - 2]; - float sample3 = samples[lpos + NUM_PIXELS_X - 2]; - float sample4 = samples[lpos + 2 * NUM_PIXELS_X - 2]; - float sample5 = samples[lpos + 3 * NUM_PIXELS_X - 2]; - float sample6 = samples[lpos - 2 * NUM_PIXELS_X - 1]; - float sample7 = samples[lpos - NUM_PIXELS_X - 1]; - float sample8 = samples[lpos - 1]; - float sample9 = samples[lpos + NUM_PIXELS_X - 1]; - float sample10 = samples[lpos + 2 * NUM_PIXELS_X - 1]; - float sample11 = samples[lpos + 3 * NUM_PIXELS_X - 1]; - float sample12 = samples[lpos - 2 * NUM_PIXELS_X]; - float sample13 = samples[lpos - NUM_PIXELS_X]; - float sample14 = samples[lpos]; - float sample15 = samples[lpos + NUM_PIXELS_X]; - float sample16 = samples[lpos + 2 * NUM_PIXELS_X]; - float sample17 = samples[lpos + 3 * NUM_PIXELS_X]; - float sample18 = samples[lpos - 2 * NUM_PIXELS_X + 1]; - float sample19 = samples[lpos - NUM_PIXELS_X + 1]; - float sample20 = samples[lpos + 1]; - float sample21 = samples[lpos + NUM_PIXELS_X + 1]; - float sample22 = samples[lpos + 2 * NUM_PIXELS_X + 1]; - float sample23 = samples[lpos + 3 * NUM_PIXELS_X + 1]; - float sample24 = samples[lpos - 2 * NUM_PIXELS_X + 2]; - float sample25 = samples[lpos - NUM_PIXELS_X + 2]; - float sample26 = samples[lpos + 2]; - float sample27 = samples[lpos + NUM_PIXELS_X + 2]; - float sample28 = samples[lpos + 2 * NUM_PIXELS_X + 2]; - float sample29 = samples[lpos + 3 * NUM_PIXELS_X + 2]; - float sample30 = samples[lpos - 2 * NUM_PIXELS_X + 3]; - float sample31 = samples[lpos - NUM_PIXELS_X + 3]; - float sample32 = samples[lpos + 3]; - float sample33 = samples[lpos + NUM_PIXELS_X + 3]; - float sample34 = samples[lpos + 2 * NUM_PIXELS_X + 3]; - float sample35 = samples[lpos + 3 * NUM_PIXELS_X + 3]; - float3 abd = 0; + subpix = LUTPOS(subpix, vec2(9.0, 9.0)); + vec2 subpix_inv = 1.0 - subpix; + subpix /= vec2(5.0, 288.0); + subpix_inv /= vec2(5.0, 288.0); + ivec2 ipos = ivec2(floor(pos)) - rectl; + int lpos = ipos.x + ipos.y * 38; + float sample0 = samples[-78 + lpos]; + float sample1 = samples[-40 + lpos]; + float sample2 = samples[-2 + lpos]; + float sample3 = samples[36 + lpos]; + float sample4 = samples[74 + lpos]; + float sample5 = samples[112 + lpos]; + float sample6 = samples[-77 + lpos]; + float sample7 = samples[-39 + lpos]; + float sample8 = samples[-1 + lpos]; + float sample9 = samples[37 + lpos]; + float sample10 = samples[75 + lpos]; + float sample11 = samples[113 + lpos]; + float sample12 = samples[-76 + lpos]; + float sample13 = samples[-38 + lpos]; + float sample14 = samples[0 + lpos]; + float sample15 = samples[38 + lpos]; + float sample16 = samples[76 + lpos]; + float sample17 = samples[114 + lpos]; + float sample18 = samples[-75 + lpos]; + float sample19 = samples[-37 + lpos]; + float sample20 = samples[1 + lpos]; + float sample21 = samples[39 + lpos]; + float sample22 = samples[77 + lpos]; + float sample23 = samples[115 + lpos]; + float sample24 = samples[-74 + lpos]; + float sample25 = samples[-36 + lpos]; + float sample26 = samples[2 + lpos]; + float sample27 = samples[40 + lpos]; + float sample28 = samples[78 + lpos]; + float sample29 = samples[116 + lpos]; + float sample30 = samples[-73 + lpos]; + float sample31 = samples[-35 + lpos]; + float sample32 = samples[3 + lpos]; + float sample33 = samples[41 + lpos]; + float sample34 = samples[79 + lpos]; + float sample35 = samples[117 + lpos]; + vec3 abd = vec3(0.0, 0.0, 0.0); float gx, gy; gx = (sample13 - sample1) / 2.0; gy = (sample8 - sample6) / 2.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; gx = (sample14 - sample2) / 2.0; gy = (-sample10 + 8.0 * sample9 - 8.0 * sample7 + sample6) / 12.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; gx = (sample15 - sample3) / 2.0; gy = (-sample11 + 8.0 * sample10 - 8.0 * sample8 + sample7) / 12.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; gx = (sample16 - sample4) / 2.0; gy = (sample11 - sample9) / 2.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; gx = (-sample25 + 8.0 * sample19 - 8.0 * sample7 + sample1) / 12.0; gy = (sample14 - sample12) / 2.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; gx = (-sample26 + 8.0 * sample20 - 8.0 * sample8 + sample2) / 12.0; gy = (-sample16 + 8.0 * sample15 - 8.0 * sample13 + sample12) / 12.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; gx = (-sample27 + 8.0 * sample21 - 8.0 * sample9 + sample3) / 12.0; gy = (-sample17 + 8.0 * sample16 - 8.0 * sample14 + sample13) / 12.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; gx = (-sample28 + 8.0 * sample22 - 8.0 * sample10 + sample4) / 12.0; gy = (sample17 - sample15) / 2.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; gx = (-sample31 + 8.0 * sample25 - 8.0 * sample13 + sample7) / 12.0; gy = (sample20 - sample18) / 2.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; gx = (-sample32 + 8.0 * sample26 - 8.0 * sample14 + sample8) / 12.0; gy = (-sample22 + 8.0 * sample21 - 8.0 * sample19 + sample18) / 12.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; gx = (-sample33 + 8.0 * sample27 - 8.0 * sample15 + sample9) / 12.0; gy = (-sample23 + 8.0 * sample22 - 8.0 * sample20 + sample19) / 12.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; gx = (-sample34 + 8.0 * sample28 - 8.0 * sample16 + sample10) / 12.0; gy = (sample23 - sample21) / 2.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; gx = (sample31 - sample19) / 2.0; gy = (sample26 - sample24) / 2.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; gx = (sample32 - sample20) / 2.0; gy = (-sample28 + 8.0 * sample27 - 8.0 * sample25 + sample24) / 12.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; gx = (sample33 - sample21) / 2.0; gy = (-sample29 + 8.0 * sample28 - 8.0 * sample26 + sample25) / 12.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; gx = (sample34 - sample22) / 2.0; gy = (sample29 - sample27) / 2.0; - abd += float3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; float a = abd.x, b = abd.y, d = abd.z; float T = a + d, D = a * d - b * b; float delta = sqrt(max(T * T / 4.0 - D, 0.0)); float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); - float theta = lerp(mod(atan2(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); float lambda = sqrtL1; - float mu = lerp((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); float angle = floor(theta * 24.0 / 3.141592653589793); - float strength = lerp(lerp(0.0, 1.0, lambda >= 0.004), lerp(2.0, 3.0, lambda >= 0.05), lambda >= 0.016); - float coherence = lerp(lerp(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence) / 288.0; float res = 0.0; - float4 w; - w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.0, coord_y) + subpix, 0); + vec4 w; + w = texture(ravu_zoom_lut3, vec2(0.0, coord_y) + subpix); res += sample0 * w[0]; res += sample1 * w[1]; res += sample2 * w[2]; res += sample3 * w[3]; - w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.2, coord_y) + subpix, 0); + w = texture(ravu_zoom_lut3, vec2(0.2, coord_y) + subpix); res += sample4 * w[0]; res += sample5 * w[1]; res += sample6 * w[2]; res += sample7 * w[3]; - w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.4, coord_y) + subpix, 0); + w = texture(ravu_zoom_lut3, vec2(0.4, coord_y) + subpix); res += sample8 * w[0]; res += sample9 * w[1]; res += sample10 * w[2]; res += sample11 * w[3]; - w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.6, coord_y) + subpix, 0); + w = texture(ravu_zoom_lut3, vec2(0.6, coord_y) + subpix); res += sample12 * w[0]; res += sample13 * w[1]; res += sample14 * w[2]; res += sample15 * w[3]; - w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.8, coord_y) + subpix, 0); + w = texture(ravu_zoom_lut3, vec2(0.8, coord_y) + subpix); res += sample16 * w[0]; res += sample17 * w[1]; - w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.0, coord_y) + subpix_inv, 0); + w = texture(ravu_zoom_lut3, vec2(0.0, coord_y) + subpix_inv); res += sample35 * w[0]; res += sample34 * w[1]; res += sample33 * w[2]; res += sample32 * w[3]; - w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.2, coord_y) + subpix_inv, 0); + w = texture(ravu_zoom_lut3, vec2(0.2, coord_y) + subpix_inv); res += sample31 * w[0]; res += sample30 * w[1]; res += sample29 * w[2]; res += sample28 * w[3]; - w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.4, coord_y) + subpix_inv, 0); + w = texture(ravu_zoom_lut3, vec2(0.4, coord_y) + subpix_inv); res += sample27 * w[0]; res += sample26 * w[1]; res += sample25 * w[2]; res += sample24 * w[3]; - w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.6, coord_y) + subpix_inv, 0); + w = texture(ravu_zoom_lut3, vec2(0.6, coord_y) + subpix_inv); res += sample23 * w[0]; res += sample22 * w[1]; res += sample21 * w[2]; res += sample20 * w[3]; - w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.8, coord_y) + subpix_inv, 0); + w = texture(ravu_zoom_lut3, vec2(0.8, coord_y) + subpix_inv); res += sample19 * w[0]; res += sample18 * w[1]; - res = saturate(res); - - float2 originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * GetOutputPt(), 0).rgb); - WriteToOutput(destPos, mul(yuv2rgb, float3(res, originUV))); + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID), res); } diff --git a/src/Effects/RAVU/RAVU_Zoom_R3_RGB.hlsl b/src/Effects/RAVU/RAVU_Zoom_R3_RGB.hlsl new file mode 100644 index 000000000..8c66a1b27 --- /dev/null +++ b/src/Effects/RAVU/RAVU_Zoom_R3_RGB.hlsl @@ -0,0 +1,281 @@ +// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers +// Please don't edit this file directly. +// Generated by: ravu-zoom.py --target rgb --weights-file weights\ravu-zoom_weights-r3.py --float-format float16dx --use-compute-shader --use-magpie --overwrite +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +//!MAGPIE EFFECT +//!VERSION 4 + +//!TEXTURE +Texture2D INPUT; + +//!SAMPLER +//!FILTER POINT +SamplerState sam_INPUT; + +//!TEXTURE +// +// +Texture2D OUTPUT; + +//!TEXTURE +//!SOURCE ravu_zoom_lut3_f16.dds +//!FORMAT R16G16B16A16_FLOAT +Texture2D ravu_zoom_lut3; + +//!SAMPLER +//!FILTER LINEAR +SamplerState sam_ravu_zoom_lut3; + +//!COMMON +#include "prescalers.hlsli" + +#define LAST_PASS 1 + +//!PASS 1 +//!DESC RAVU-Zoom (rgb, r3, compute) +//!IN INPUT, ravu_zoom_lut3 +//!OUT OUTPUT +//!BLOCK_SIZE 32, 8 +//!NUM_THREADS 32, 8 +static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722); +#define LUTPOS(x, lut_size) mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x)) +shared vec3 samples[532]; + +#define CURRENT_PASS 1 + +#define GET_SAMPLE(x) x +#define imageStore(out_image, pos, val) imageStoreOverride(pos, val) +void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; } + +#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos))) +static const float2 INPUT_size = float2(GetInputSize()); +static const float2 INPUT_pt = float2(GetInputPt()); + +#define ravu_zoom_lut3_tex(pos) (vec4(texture(ravu_zoom_lut3, pos))) + +#define HOOKED_tex(pos) INPUT_tex(pos) +#define HOOKED_size INPUT_size +#define HOOKED_pt INPUT_pt + +void Pass1(uint2 blockStart, uint3 threadId) { + ivec2 group_begin = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); + ivec2 group_end = group_begin + ivec2(gl_WorkGroupSize) - ivec2(1, 1); + ivec2 rectl = ivec2(floor(HOOKED_size * HOOKED_map(group_begin) - 0.5001)) - 2; + ivec2 rectr = ivec2(floor(HOOKED_size * HOOKED_map(group_end) - 0.4999)) + 3; + ivec2 rect = rectr - rectl + 1; + for (int id = int(gl_LocalInvocationIndex); id < rect.x * rect.y; + id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) { + uint y = (uint)id / rect.x, x = (uint)id % rect.x; + samples[x + y * 38] = HOOKED_tex(HOOKED_pt * (vec2(rectl + ivec2(x, y)) + vec2(0.5, 0.5))).xyz; + } + barrier(); +#if CURRENT_PASS == LAST_PASS + uint2 destPos = blockStart + threadId.xy; + uint2 outputSize = GetOutputSize(); + if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) { + return; + } +#endif + vec2 pos = HOOKED_size * HOOKED_map(ivec2(gl_GlobalInvocationID)); + vec2 subpix = fract(pos - 0.5); + pos -= subpix; + subpix = LUTPOS(subpix, vec2(9.0, 9.0)); + vec2 subpix_inv = 1.0 - subpix; + subpix /= vec2(5.0, 288.0); + subpix_inv /= vec2(5.0, 288.0); + ivec2 ipos = ivec2(floor(pos)) - rectl; + int lpos = ipos.x + ipos.y * 38; + vec3 sample0 = samples[-78 + lpos]; + vec3 sample1 = samples[-40 + lpos]; + vec3 sample2 = samples[-2 + lpos]; + vec3 sample3 = samples[36 + lpos]; + vec3 sample4 = samples[74 + lpos]; + vec3 sample5 = samples[112 + lpos]; + vec3 sample6 = samples[-77 + lpos]; + vec3 sample7 = samples[-39 + lpos]; + vec3 sample8 = samples[-1 + lpos]; + vec3 sample9 = samples[37 + lpos]; + vec3 sample10 = samples[75 + lpos]; + vec3 sample11 = samples[113 + lpos]; + vec3 sample12 = samples[-76 + lpos]; + vec3 sample13 = samples[-38 + lpos]; + vec3 sample14 = samples[0 + lpos]; + vec3 sample15 = samples[38 + lpos]; + vec3 sample16 = samples[76 + lpos]; + vec3 sample17 = samples[114 + lpos]; + vec3 sample18 = samples[-75 + lpos]; + vec3 sample19 = samples[-37 + lpos]; + vec3 sample20 = samples[1 + lpos]; + vec3 sample21 = samples[39 + lpos]; + vec3 sample22 = samples[77 + lpos]; + vec3 sample23 = samples[115 + lpos]; + vec3 sample24 = samples[-74 + lpos]; + vec3 sample25 = samples[-36 + lpos]; + vec3 sample26 = samples[2 + lpos]; + vec3 sample27 = samples[40 + lpos]; + vec3 sample28 = samples[78 + lpos]; + vec3 sample29 = samples[116 + lpos]; + vec3 sample30 = samples[-73 + lpos]; + vec3 sample31 = samples[-35 + lpos]; + vec3 sample32 = samples[3 + lpos]; + vec3 sample33 = samples[41 + lpos]; + vec3 sample34 = samples[79 + lpos]; + vec3 sample35 = samples[117 + lpos]; + float luma1 = dot(sample1, color_primary); + float luma2 = dot(sample2, color_primary); + float luma3 = dot(sample3, color_primary); + float luma4 = dot(sample4, color_primary); + float luma6 = dot(sample6, color_primary); + float luma7 = dot(sample7, color_primary); + float luma8 = dot(sample8, color_primary); + float luma9 = dot(sample9, color_primary); + float luma10 = dot(sample10, color_primary); + float luma11 = dot(sample11, color_primary); + float luma12 = dot(sample12, color_primary); + float luma13 = dot(sample13, color_primary); + float luma14 = dot(sample14, color_primary); + float luma15 = dot(sample15, color_primary); + float luma16 = dot(sample16, color_primary); + float luma17 = dot(sample17, color_primary); + float luma18 = dot(sample18, color_primary); + float luma19 = dot(sample19, color_primary); + float luma20 = dot(sample20, color_primary); + float luma21 = dot(sample21, color_primary); + float luma22 = dot(sample22, color_primary); + float luma23 = dot(sample23, color_primary); + float luma24 = dot(sample24, color_primary); + float luma25 = dot(sample25, color_primary); + float luma26 = dot(sample26, color_primary); + float luma27 = dot(sample27, color_primary); + float luma28 = dot(sample28, color_primary); + float luma29 = dot(sample29, color_primary); + float luma31 = dot(sample31, color_primary); + float luma32 = dot(sample32, color_primary); + float luma33 = dot(sample33, color_primary); + float luma34 = dot(sample34, color_primary); + vec3 abd = vec3(0.0, 0.0, 0.0); + float gx, gy; + gx = (luma13 - luma1) / 2.0; + gy = (luma8 - luma6) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma14 - luma2) / 2.0; + gy = (-luma10 + 8.0 * luma9 - 8.0 * luma7 + luma6) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma15 - luma3) / 2.0; + gy = (-luma11 + 8.0 * luma10 - 8.0 * luma8 + luma7) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma16 - luma4) / 2.0; + gy = (luma11 - luma9) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (-luma25 + 8.0 * luma19 - 8.0 * luma7 + luma1) / 12.0; + gy = (luma14 - luma12) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma26 + 8.0 * luma20 - 8.0 * luma8 + luma2) / 12.0; + gy = (-luma16 + 8.0 * luma15 - 8.0 * luma13 + luma12) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma27 + 8.0 * luma21 - 8.0 * luma9 + luma3) / 12.0; + gy = (-luma17 + 8.0 * luma16 - 8.0 * luma14 + luma13) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma28 + 8.0 * luma22 - 8.0 * luma10 + luma4) / 12.0; + gy = (luma17 - luma15) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma31 + 8.0 * luma25 - 8.0 * luma13 + luma7) / 12.0; + gy = (luma20 - luma18) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (-luma32 + 8.0 * luma26 - 8.0 * luma14 + luma8) / 12.0; + gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma33 + 8.0 * luma27 - 8.0 * luma15 + luma9) / 12.0; + gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994; + gx = (-luma34 + 8.0 * luma28 - 8.0 * luma16 + luma10) / 12.0; + gy = (luma23 - luma21) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma31 - luma19) / 2.0; + gy = (luma26 - luma24) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + gx = (luma32 - luma20) / 2.0; + gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma33 - luma21) / 2.0; + gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959; + gx = (luma34 - luma22) / 2.0; + gy = (luma29 - luma27) / 2.0; + abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088; + float a = abd.x, b = abd.y, d = abd.z; + float T = a + d, D = a * d - b * b; + float delta = sqrt(max(T * T / 4.0 - D, 0.0)); + float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta; + float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2); + float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7); + float lambda = sqrtL1; + float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7); + float angle = floor(theta * 24.0 / 3.141592653589793); + float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016); + float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5); + float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence) / 288.0; + vec3 res = vec3(0.0, 0.0, 0.0); + vec4 w; + w = texture(ravu_zoom_lut3, vec2(0.0, coord_y) + subpix); + res += sample0 * w[0]; + res += sample1 * w[1]; + res += sample2 * w[2]; + res += sample3 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.2, coord_y) + subpix); + res += sample4 * w[0]; + res += sample5 * w[1]; + res += sample6 * w[2]; + res += sample7 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.4, coord_y) + subpix); + res += sample8 * w[0]; + res += sample9 * w[1]; + res += sample10 * w[2]; + res += sample11 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.6, coord_y) + subpix); + res += sample12 * w[0]; + res += sample13 * w[1]; + res += sample14 * w[2]; + res += sample15 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.8, coord_y) + subpix); + res += sample16 * w[0]; + res += sample17 * w[1]; + w = texture(ravu_zoom_lut3, vec2(0.0, coord_y) + subpix_inv); + res += sample35 * w[0]; + res += sample34 * w[1]; + res += sample33 * w[2]; + res += sample32 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.2, coord_y) + subpix_inv); + res += sample31 * w[0]; + res += sample30 * w[1]; + res += sample29 * w[2]; + res += sample28 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.4, coord_y) + subpix_inv); + res += sample27 * w[0]; + res += sample26 * w[1]; + res += sample25 * w[2]; + res += sample24 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.6, coord_y) + subpix_inv); + res += sample23 * w[0]; + res += sample22 * w[1]; + res += sample21 * w[2]; + res += sample20 * w[3]; + w = texture(ravu_zoom_lut3, vec2(0.8, coord_y) + subpix_inv); + res += sample19 * w[0]; + res += sample18 * w[1]; + res = clamp(res, 0.0, 1.0); + imageStore(out_image, ivec2(gl_GlobalInvocationID), vec4(res, 1.0)); +} diff --git a/src/Effects/RAVU/RAVU_Zoom_R3_Weights.dds b/src/Effects/RAVU/RAVU_Zoom_R3_Weights.dds deleted file mode 100644 index 6777330a7..000000000 Binary files a/src/Effects/RAVU/RAVU_Zoom_R3_Weights.dds and /dev/null differ diff --git a/src/Effects/RAVU/prescalers.hlsli b/src/Effects/RAVU/prescalers.hlsli new file mode 100644 index 000000000..e81e3918a --- /dev/null +++ b/src/Effects/RAVU/prescalers.hlsli @@ -0,0 +1,73 @@ +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +// Conversion from GLSL to HLSL is done through defines as much as possible to ease synchronization and comparison with upstream +#define ivec2 int2 + +#define vec2 float2 +#define vec3 float3 +#define vec4 float4 + +#define mat4x3 float4x3 +#define matrixCompMult(mtx1, mtx2) (mtx1 * mtx2) + +#define shared groupshared + +#define atan atan2 +#define barrier GroupMemoryBarrierWithGroupSync +#define fract frac +#define intBitsToFloat asfloat +#define inversesqrt rsqrt +// mod deals only with positive numbers here and it could be substituted by fmod +#define mod fmod + +// lerp handles bools as the third argument differently from mix +float mix(float a, float b, bool c) { + return c ? b : a; +} + +#define MIX_LERP(type1, type3) type1 mix(type1 a, type1 b, type3 c) { return lerp(a, b, c); } +MIX_LERP(float, float) +MIX_LERP(float2, float2) +MIX_LERP(float3, float) +MIX_LERP(float4, float) + +#define texture(tex, pos) tex.SampleLevel(sam_##tex, pos, 0.0) + +#define OUTPUT_pt float2(GetOutputPt()) +#define frag_pos(id) (vec2(id) + vec2(0.5, 0.5)) +#define frag_map(id) (OUTPUT_pt * frag_pos(id)) +#define HOOKED_map(id) frag_map(id) + +#define gl_LocalInvocationIndex (threadId.y*MP_NUM_THREADS_X + threadId.x) +#define gl_LocalInvocationID threadId +#define gl_WorkGroupSize (uint2(MP_NUM_THREADS_X, MP_NUM_THREADS_Y)) +#define gl_WorkGroupID (blockStart / uint2(MP_BLOCK_WIDTH, MP_BLOCK_HEIGHT)) +#define gl_GlobalInvocationID (gl_WorkGroupID*gl_WorkGroupSize + threadId.xy) + +// disable warning about unknown pragma +#pragma warning(disable: 3568) +// disable warning about too many threads (ravu-r4-rgb triggers it) +#pragma warning(disable: 4714) + +// https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.709-6-201506-I!!PDF-E.pdf +static const float3 rgb2y = float3(0.2126, 0.7152, 0.0722); +static const float2x3 rgb2uv = { + -0.2126/1.8556, -0.7152/1.8556, 0.9278/1.8556, + 0.7874/1.5748, -0.7152/1.5748, -0.0722/1.5748 +}; +static const float3x3 yuv2rgb = { + 1, 0, 1.5748, + 1, -0.187324, -0.468124, + 1, 1.8556, 0 +}; diff --git a/src/Effects/RAVU/ravu_3x_lut2_f16.dds b/src/Effects/RAVU/ravu_3x_lut2_f16.dds new file mode 100644 index 000000000..63f627f29 Binary files /dev/null and b/src/Effects/RAVU/ravu_3x_lut2_f16.dds differ diff --git a/src/Effects/RAVU/ravu_3x_lut3_f16.dds b/src/Effects/RAVU/ravu_3x_lut3_f16.dds new file mode 100644 index 000000000..df17a00dd Binary files /dev/null and b/src/Effects/RAVU/ravu_3x_lut3_f16.dds differ diff --git a/src/Effects/RAVU/ravu_3x_lut4_f16.dds b/src/Effects/RAVU/ravu_3x_lut4_f16.dds new file mode 100644 index 000000000..a8d4e76df Binary files /dev/null and b/src/Effects/RAVU/ravu_3x_lut4_f16.dds differ diff --git a/src/Effects/RAVU/ravu_lite_lut2_f16.dds b/src/Effects/RAVU/ravu_lite_lut2_f16.dds new file mode 100644 index 000000000..735ac61d0 Binary files /dev/null and b/src/Effects/RAVU/ravu_lite_lut2_f16.dds differ diff --git a/src/Effects/RAVU/RAVU_Lite_R3_Weights.dds b/src/Effects/RAVU/ravu_lite_lut3_f16.dds similarity index 99% rename from src/Effects/RAVU/RAVU_Lite_R3_Weights.dds rename to src/Effects/RAVU/ravu_lite_lut3_f16.dds index b7afe3756..2caf7b0b0 100644 Binary files a/src/Effects/RAVU/RAVU_Lite_R3_Weights.dds and b/src/Effects/RAVU/ravu_lite_lut3_f16.dds differ diff --git a/src/Effects/RAVU/ravu_lite_lut4_f16.dds b/src/Effects/RAVU/ravu_lite_lut4_f16.dds new file mode 100644 index 000000000..2d10b066c Binary files /dev/null and b/src/Effects/RAVU/ravu_lite_lut4_f16.dds differ diff --git a/src/Effects/RAVU/ravu_lut2_f16.dds b/src/Effects/RAVU/ravu_lut2_f16.dds new file mode 100644 index 000000000..17d756f02 Binary files /dev/null and b/src/Effects/RAVU/ravu_lut2_f16.dds differ diff --git a/src/Effects/RAVU/ravu_lut3_f16.dds b/src/Effects/RAVU/ravu_lut3_f16.dds new file mode 100644 index 000000000..07b7c887a Binary files /dev/null and b/src/Effects/RAVU/ravu_lut3_f16.dds differ diff --git a/src/Effects/RAVU/ravu_lut4_f16.dds b/src/Effects/RAVU/ravu_lut4_f16.dds new file mode 100644 index 000000000..42e19542e Binary files /dev/null and b/src/Effects/RAVU/ravu_lut4_f16.dds differ diff --git a/src/Effects/RAVU/ravu_zoom_lut2_ar_f16.dds b/src/Effects/RAVU/ravu_zoom_lut2_ar_f16.dds new file mode 100644 index 000000000..1f0a4e6b1 Binary files /dev/null and b/src/Effects/RAVU/ravu_zoom_lut2_ar_f16.dds differ diff --git a/src/Effects/RAVU/ravu_zoom_lut2_f16.dds b/src/Effects/RAVU/ravu_zoom_lut2_f16.dds new file mode 100644 index 000000000..5cbb3f4a2 Binary files /dev/null and b/src/Effects/RAVU/ravu_zoom_lut2_f16.dds differ diff --git a/src/Effects/RAVU/ravu_zoom_lut3_ar_f16.dds b/src/Effects/RAVU/ravu_zoom_lut3_ar_f16.dds new file mode 100644 index 000000000..1f0a4e6b1 Binary files /dev/null and b/src/Effects/RAVU/ravu_zoom_lut3_ar_f16.dds differ diff --git a/src/Effects/RAVU/ravu_zoom_lut3_f16.dds b/src/Effects/RAVU/ravu_zoom_lut3_f16.dds new file mode 100644 index 000000000..0dee57459 Binary files /dev/null and b/src/Effects/RAVU/ravu_zoom_lut3_f16.dds differ diff --git a/src/Effects/SMAA/SMAA_High.hlsl b/src/Effects/SMAA/SMAA_High.hlsl index 66abb0254..b6ea179b0 100644 --- a/src/Effects/SMAA/SMAA_High.hlsl +++ b/src/Effects/SMAA/SMAA_High.hlsl @@ -1,13 +1,16 @@ //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!SORT_NAME SMAA_2 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!TEXTURE //!WIDTH INPUT_WIDTH //!HEIGHT INPUT_HEIGHT @@ -71,6 +74,7 @@ float4 Pass2(float2 pos) { //!DESC Neighborhood Blending //!STYLE PS //!IN INPUT, blendTex +//!OUT OUTPUT float4 Pass3(float2 pos) { return SMAANeighborhoodBlendingPS(pos, INPUT, blendTex); diff --git a/src/Effects/SMAA/SMAA_Low.hlsl b/src/Effects/SMAA/SMAA_Low.hlsl index c83d4851b..f8ded0640 100644 --- a/src/Effects/SMAA/SMAA_Low.hlsl +++ b/src/Effects/SMAA/SMAA_Low.hlsl @@ -1,13 +1,16 @@ //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!SORT_NAME SMAA_0 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!TEXTURE //!WIDTH INPUT_WIDTH //!HEIGHT INPUT_HEIGHT @@ -71,6 +74,7 @@ float4 Pass2(float2 pos) { //!DESC Neighborhood Blending //!STYLE PS //!IN INPUT, blendTex +//!OUT OUTPUT float4 Pass3(float2 pos) { return SMAANeighborhoodBlendingPS(pos, INPUT, blendTex); diff --git a/src/Effects/SMAA/SMAA_Medium.hlsl b/src/Effects/SMAA/SMAA_Medium.hlsl index 1898e152e..e3af0f765 100644 --- a/src/Effects/SMAA/SMAA_Medium.hlsl +++ b/src/Effects/SMAA/SMAA_Medium.hlsl @@ -1,13 +1,16 @@ //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!SORT_NAME SMAA_1 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!TEXTURE //!WIDTH INPUT_WIDTH //!HEIGHT INPUT_HEIGHT @@ -71,6 +74,7 @@ float4 Pass2(float2 pos) { //!DESC Neighborhood Blending //!STYLE PS //!IN INPUT, blendTex +//!OUT OUTPUT float4 Pass3(float2 pos) { return SMAANeighborhoodBlendingPS(pos, INPUT, blendTex); diff --git a/src/Effects/SMAA/SMAA_Ultra.hlsl b/src/Effects/SMAA/SMAA_Ultra.hlsl index df0ab55d8..2d8b018be 100644 --- a/src/Effects/SMAA/SMAA_Ultra.hlsl +++ b/src/Effects/SMAA/SMAA_Ultra.hlsl @@ -1,13 +1,16 @@ //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!SORT_NAME SMAA_3 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!TEXTURE //!WIDTH INPUT_WIDTH //!HEIGHT INPUT_HEIGHT @@ -71,6 +74,7 @@ float4 Pass2(float2 pos) { //!DESC Neighborhood Blending //!STYLE PS //!IN INPUT, blendTex +//!OUT OUTPUT float4 Pass3(float2 pos) { return SMAANeighborhoodBlendingPS(pos, INPUT, blendTex); diff --git a/src/Effects/SSimDownscaler.hlsl b/src/Effects/SSimDownscaler.hlsl index 16811854e..b40f8d84a 100644 --- a/src/Effects/SSimDownscaler.hlsl +++ b/src/Effects/SSimDownscaler.hlsl @@ -4,8 +4,7 @@ //!MAGPIE EFFECT -//!VERSION 3 -//!GENERIC_DOWNSCALER +//!VERSION 4 //!PARAMETER //!LABEL Oversharp @@ -18,6 +17,9 @@ float oversharp; //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; + //!TEXTURE //!WIDTH INPUT_WIDTH //!HEIGHT OUTPUT_HEIGHT @@ -295,6 +297,7 @@ void Pass4(uint2 blockStart, uint3 threadId) { //!PASS 5 //!DESC final pass //!IN MR, POSTKERNEL +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -307,7 +310,9 @@ void Pass4(uint2 blockStart, uint3 threadId) { void Pass5(uint2 blockStart, uint3 threadId) { const uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -359,12 +364,6 @@ void Pass5(uint2 blockStart, uint3 threadId) { for (j = 0; j <= 1; ++j) { uint2 destPos = gxy + uint2(i, j); - if (i != 0 || j != 0) { - if (!CheckViewport(destPos)) { - continue; - } - } - float W = 0; float3x3 avg = 0; @@ -386,7 +385,7 @@ void Pass5(uint2 blockStart, uint3 threadId) { } avg /= W; - WriteToOutput(destPos, avg[1] + avg[2] * src2[i][j] - avg[0]); + OUTPUT[destPos] = float4(avg[1] + avg[2] * src2[i][j] - avg[0], 1); } } } diff --git a/src/Effects/Sharpen/AdaptiveSharpen.hlsl b/src/Effects/Sharpen/AdaptiveSharpen.hlsl index d61ff07d3..62c1846f9 100644 --- a/src/Effects/Sharpen/AdaptiveSharpen.hlsl +++ b/src/Effects/Sharpen/AdaptiveSharpen.hlsl @@ -6,10 +6,7 @@ //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT - +//!VERSION 4 //!PARAMETER @@ -26,6 +23,11 @@ float curveHeight; //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -33,6 +35,7 @@ SamplerState sam; //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -51,7 +54,9 @@ float CtG(float3 RGB) { return sqrt((1.0f / 3.0f) * ((RGB * RGB).r + (RGB * RGB void Pass1(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -92,12 +97,6 @@ void Pass1(uint2 blockStart, uint3 threadId) { for (j = 0; j <= 1; ++j) { const uint2 destPos = gxy + uint2(i, j); - if (i != 0 || j != 0) { - if (!CheckViewport(destPos)) { - continue; - } - } - float2 pos = (destPos + 0.5f) * inputPt; // Get points and saturate out of range values (BTB & WTW) @@ -208,7 +207,7 @@ void Pass1(uint2 blockStart, uint3 threadId) { sharpdiff = lerp((tanh((max(sharpdiff, 0.0)) * nmax_scale) / nmax_scale), (max(sharpdiff, 0.0)), L_comp_ratio) + lerp((tanh((min(sharpdiff, 0.0)) * nmin_scale) / nmin_scale), (min(sharpdiff, 0.0)), D_comp_ratio); - WriteToOutput(destPos, src[i + 3][j + 3].rgb + sharpdiff); + OUTPUT[destPos] = float4(src[i + 3][j + 3].rgb + sharpdiff, 1); } } } diff --git a/src/Effects/Sharpen/FineSharp.hlsl b/src/Effects/Sharpen/FineSharp.hlsl index 1bdd4212e..ed8cae4f0 100644 --- a/src/Effects/Sharpen/FineSharp.hlsl +++ b/src/Effects/Sharpen/FineSharp.hlsl @@ -6,9 +6,7 @@ // The sharpener makes no attempt to filter noise or source artefacts and will sharpen those too.So denoise / clean your source first if necessary.Probably won't work very well on a really old GPU, the weakest I have tried is a GTX 560 at 1080p 60fps with no problems. //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!PARAMETER @@ -51,6 +49,11 @@ float xrep; //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!TEXTURE //!WIDTH OUTPUT_WIDTH //!HEIGHT OUTPUT_HEIGHT @@ -381,6 +384,7 @@ void Pass4(uint2 blockStart, uint3 threadId) { //!PASS 5 //!DESC Part C //!IN tex2 +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -391,7 +395,9 @@ static const float3x3 YUVtoRGB = GetInputSize().y <= 576 ? YUVtoRGB(0.114, 0.299 void Pass5(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -429,19 +435,13 @@ void Pass5(uint2 blockStart, uint3 threadId) { [unroll] for (j = 1; j <= 2; ++j) { uint2 destPos = gxy + uint2(i - 1, j - 1); - - if (i != 1 || j != 1) { - if (!CheckViewport(destPos)) { - continue; - } - } float4 o = src[i][j]; float edge = abs(src[i][j - 1].x + src[i - 1][j].x + src[i + 1][j].x + src[i][j + 1].x - 4 * o.x); o.x = lerp(o.a, o.x, xstr * (1 - saturate(edge * xrep))); - WriteToOutput(destPos, mul(YUVtoRGB, o.xyz - float3(0.0, 0.5, 0.5))); + OUTPUT[destPos] = float4(mul(YUVtoRGB, o.xyz - float3(0.0, 0.5, 0.5)), 1); } } } diff --git a/src/Effects/Sharpen/LCAS.hlsl b/src/Effects/Sharpen/LCAS.hlsl index 431b69131..65e04782a 100644 --- a/src/Effects/Sharpen/LCAS.hlsl +++ b/src/Effects/Sharpen/LCAS.hlsl @@ -1,7 +1,7 @@ // This is a combination of linear interpolation and light version of cas //!MAGPIE EFFECT -//!VERSION 3 +//!VERSION 4 //!PARAMETER //!LABEL Sharpness @@ -14,12 +14,16 @@ float sharpness; //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; + //!SAMPLER //!FILTER LINEAR SamplerState sam; //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -71,26 +75,22 @@ float3 LCAS(uint2 ip, float peak) { void Pass1(uint2 blockStart, uint3 threadId) { uint2 gxy = blockStart + Rmp8x8(threadId.x); - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } const float peak = lerp(0, -0.1111111111111111, sharpness); - WriteToOutput(gxy, LCAS(gxy, peak)); + OUTPUT[gxy] = float4(LCAS(gxy, peak), 1); gxy.x += 8u; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, LCAS(gxy, peak)); - } + OUTPUT[gxy] = float4(LCAS(gxy, peak), 1); gxy.y += 8u; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, LCAS(gxy, peak)); - } + OUTPUT[gxy] = float4(LCAS(gxy, peak), 1); gxy.x -= 8u; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, LCAS(gxy, peak)); - } + OUTPUT[gxy] = float4(LCAS(gxy, peak), 1); } diff --git a/src/Effects/Sharpen/LumaSharpen.hlsl b/src/Effects/Sharpen/LumaSharpen.hlsl index 9672ea218..1ffc8d6a9 100644 --- a/src/Effects/Sharpen/LumaSharpen.hlsl +++ b/src/Effects/Sharpen/LumaSharpen.hlsl @@ -13,9 +13,7 @@ */ //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH -//!OUTPUT_HEIGHT INPUT_HEIGHT +//!VERSION 4 //!PARAMETER @@ -64,6 +62,11 @@ float offsetBias; //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH +//!HEIGHT INPUT_HEIGHT +Texture2D OUTPUT; + //!SAMPLER //!FILTER LINEAR SamplerState sam; @@ -72,6 +75,7 @@ SamplerState sam; //!PASS 1 //!STYLE PS //!IN INPUT +//!OUT OUTPUT /*-----------------------------------------------------------. / Developer settings / diff --git a/src/Effects/xBRZ/xBRZ_2x.hlsl b/src/Effects/xBRZ/xBRZ_2x.hlsl index 157b6f70a..b6a066709 100644 --- a/src/Effects/xBRZ/xBRZ_2x.hlsl +++ b/src/Effects/xBRZ/xBRZ_2x.hlsl @@ -1,14 +1,17 @@ // 移植自 https://github.com/libretro/common-shaders/blob/master/xbrz/shaders/2xbrz.cg //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 2 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 2 +//!VERSION 4 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 2 +//!HEIGHT INPUT_HEIGHT * 2 +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -16,6 +19,7 @@ SamplerState sam; //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 16 //!NUM_THREADS 64 @@ -87,7 +91,8 @@ void ScalePixel(const int4 blend, const float3 k[9], inout float3 dst[4]) { void Pass1(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart; - if (!CheckViewport(gxy)) { + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -282,20 +287,14 @@ void Pass1(uint2 blockStart, uint3 threadId) { dst[0] = tempDst3; } - WriteToOutput(gxy, dst[0]); + OUTPUT[gxy] = float4(dst[0], 1); ++gxy.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, dst[1]); - } + OUTPUT[gxy] = float4(dst[1], 1); ++gxy.y; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, dst[2]); - } + OUTPUT[gxy] = float4(dst[2], 1); --gxy.x; - if (CheckViewport(gxy)) { - WriteToOutput(gxy, dst[3]); - } + OUTPUT[gxy] = float4(dst[3], 1); } diff --git a/src/Effects/xBRZ/xBRZ_3x.hlsl b/src/Effects/xBRZ/xBRZ_3x.hlsl index 26c184e84..52eb53560 100644 --- a/src/Effects/xBRZ/xBRZ_3x.hlsl +++ b/src/Effects/xBRZ/xBRZ_3x.hlsl @@ -1,14 +1,17 @@ // 移植自 https://github.com/libretro/common-shaders/blob/master/xbrz/shaders/3xbrz.cg //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 3 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 3 +//!VERSION 4 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 3 +//!HEIGHT INPUT_HEIGHT * 3 +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -16,6 +19,7 @@ SamplerState sam; //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 24 //!NUM_THREADS 64 @@ -98,7 +102,8 @@ const static uint destIdx[3][3] = { void Pass1(uint2 blockStart, uint3 threadId) { const uint2 gxy = (Rmp8x8(threadId.x) * 3) + blockStart; - if (!CheckViewport(gxy)) { + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -323,15 +328,7 @@ void Pass1(uint2 blockStart, uint3 threadId) { for (uint i = 0; i < 3; ++i) { [unroll] for (uint j = 0; j < 3; ++j) { - const uint2 destPos = gxy + uint2(i, j); - - if (i != 0 || j != 0) { - if (!CheckViewport(destPos)) { - continue; - } - } - - WriteToOutput(destPos, dst[destIdx[j][i]]); + OUTPUT[gxy + uint2(i, j)] = float4(dst[destIdx[j][i]], 1); } } } diff --git a/src/Effects/xBRZ/xBRZ_4x.hlsl b/src/Effects/xBRZ/xBRZ_4x.hlsl index bca5853f4..79ab5377e 100644 --- a/src/Effects/xBRZ/xBRZ_4x.hlsl +++ b/src/Effects/xBRZ/xBRZ_4x.hlsl @@ -1,14 +1,17 @@ // 移植自 https://github.com/libretro/common-shaders/blob/master/xbrz/shaders/4xbrz.cg //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 4 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 4 +//!VERSION 4 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 4 +//!HEIGHT INPUT_HEIGHT * 4 +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -16,6 +19,7 @@ SamplerState sam; //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 32 //!NUM_THREADS 64 @@ -72,7 +76,8 @@ const static uint destIdx[4][4] = { void Pass1(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) << 2) + blockStart; - if (!CheckViewport(gxy)) { + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -307,15 +312,7 @@ void Pass1(uint2 blockStart, uint3 threadId) { for (uint i = 0; i < 4; ++i) { [unroll] for (uint j = 0; j < 4; ++j) { - const uint2 destPos = gxy + uint2(i, j); - - if (i != 0 || j != 0) { - if (!CheckViewport(destPos)) { - continue; - } - } - - WriteToOutput(destPos, dst[destIdx[j][i]]); + OUTPUT[gxy + uint2(i, j)] = float4(dst[destIdx[j][i]], 1); } } } diff --git a/src/Effects/xBRZ/xBRZ_5x.hlsl b/src/Effects/xBRZ/xBRZ_5x.hlsl index 9465d91f8..a52a31fca 100644 --- a/src/Effects/xBRZ/xBRZ_5x.hlsl +++ b/src/Effects/xBRZ/xBRZ_5x.hlsl @@ -1,14 +1,17 @@ // 移植自 https://github.com/libretro/common-shaders/blob/master/xbrz/shaders/5xbrz.cg //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 5 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 5 +//!VERSION 4 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 5 +//!HEIGHT INPUT_HEIGHT * 5 +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -16,6 +19,7 @@ SamplerState sam; //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 40 //!NUM_THREADS 64 @@ -74,7 +78,8 @@ const static uint destIdx[5][5] = { void Pass1(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) * 5) + blockStart; - if (!CheckViewport(gxy)) { + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -334,15 +339,7 @@ void Pass1(uint2 blockStart, uint3 threadId) { for (uint i = 0; i < 5; ++i) { [unroll] for (uint j = 0; j < 5; ++j) { - const uint2 destPos = gxy + uint2(i, j); - - if (i != 0 || j != 0) { - if (!CheckViewport(destPos)) { - continue; - } - } - - WriteToOutput(destPos, dst[destIdx[j][i]]); + OUTPUT[gxy + uint2(i, j)] = float4(dst[destIdx[j][i]], 1); } } } diff --git a/src/Effects/xBRZ/xBRZ_6x.hlsl b/src/Effects/xBRZ/xBRZ_6x.hlsl index 49ab9b80c..03f20dd1e 100644 --- a/src/Effects/xBRZ/xBRZ_6x.hlsl +++ b/src/Effects/xBRZ/xBRZ_6x.hlsl @@ -1,14 +1,17 @@ // 移植自 https://github.com/libretro/common-shaders/blob/master/xbrz/shaders/6xbrz.cg //!MAGPIE EFFECT -//!VERSION 3 -//!OUTPUT_WIDTH INPUT_WIDTH * 6 -//!OUTPUT_HEIGHT INPUT_HEIGHT * 6 +//!VERSION 4 //!TEXTURE Texture2D INPUT; +//!TEXTURE +//!WIDTH INPUT_WIDTH * 6 +//!HEIGHT INPUT_HEIGHT * 6 +Texture2D OUTPUT; + //!SAMPLER //!FILTER POINT SamplerState sam; @@ -16,6 +19,7 @@ SamplerState sam; //!PASS 1 //!IN INPUT +//!OUT OUTPUT //!BLOCK_SIZE 48 //!NUM_THREADS 64 @@ -80,7 +84,8 @@ const static uint destIdx[6][6] = { void Pass1(uint2 blockStart, uint3 threadId) { uint2 gxy = (Rmp8x8(threadId.x) * 6) + blockStart; - if (!CheckViewport(gxy)) { + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -366,15 +371,7 @@ void Pass1(uint2 blockStart, uint3 threadId) { for (uint i = 0; i < 6; ++i) { [unroll] for (uint j = 0; j < 6; ++j) { - const uint2 destPos = gxy + uint2(i, j); - - if (i != 0 || j != 0) { - if (!CheckViewport(destPos)) { - continue; - } - } - - WriteToOutput(destPos, dst[destIdx[j][i]]); + OUTPUT[gxy + uint2(i, j)] = float4(dst[destIdx[j][i]], 1); } } } diff --git a/src/Effects/xBRZ/xBRZ_Freescale.hlsl b/src/Effects/xBRZ/xBRZ_Freescale.hlsl index 27f70dbac..b0afe789c 100644 --- a/src/Effects/xBRZ/xBRZ_Freescale.hlsl +++ b/src/Effects/xBRZ/xBRZ_Freescale.hlsl @@ -1,12 +1,15 @@ // 移植自 https://github.com/libretro/common-shaders/tree/master/xbrz/shaders/xbrz-freescale-multipass //!MAGPIE EFFECT -//!VERSION 3 +//!VERSION 4 //!TEXTURE Texture2D INPUT; +//!TEXTURE +Texture2D OUTPUT; + //!TEXTURE //!WIDTH INPUT_WIDTH //!HEIGHT INPUT_HEIGHT @@ -272,6 +275,7 @@ void Pass1(uint2 blockStart, uint3 threadId) { //!PASS 2 //!IN INPUT, tex1 +//!OUT OUTPUT //!BLOCK_SIZE 8 //!NUM_THREADS 64 @@ -279,7 +283,9 @@ void Pass1(uint2 blockStart, uint3 threadId) { void Pass2(uint2 blockStart, uint3 threadId) { uint2 gxy = Rmp8x8(threadId.x) + blockStart; - if (!CheckViewport(gxy)) { + + const uint2 outputSize = GetOutputSize(); + if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) { return; } @@ -379,5 +385,5 @@ void Pass2(uint2 blockStart, uint3 threadId) { res = lerp(res, blendPix, get_left_ratio(f, origin, direction, scale)); } - WriteToOutput(gxy, res); + OUTPUT[gxy] = float4(res, 1); } diff --git a/src/HybridCRT.props b/src/HybridCRT.props index 703e75893..cd3aff071 100644 --- a/src/HybridCRT.props +++ b/src/HybridCRT.props @@ -5,7 +5,7 @@ - + MultiThreadedDebug @@ -16,7 +16,7 @@ - + MultiThreaded diff --git a/src/Magpie.App/AboutPage.cpp b/src/Magpie.App/AboutPage.cpp index e4bbee1c2..f04d91282 100644 --- a/src/Magpie.App/AboutPage.cpp +++ b/src/Magpie.App/AboutPage.cpp @@ -4,6 +4,7 @@ #include "AboutPage.g.cpp" #endif #include "Win32Utils.h" +#include "CommonSharedConstants.h" namespace winrt::Magpie::App::implementation { @@ -12,7 +13,8 @@ void AboutPage::VersionTextBlock_DoubleTapped(IInspectable const&, Input::Double if (!_viewModel.IsDeveloperMode() && (GetAsyncKeyState(VK_MENU) & 0x8000)) { _viewModel.IsDeveloperMode(true); - hstring message = ResourceLoader::GetForCurrentView().GetString(L"About_DeveloperModeEnabled"); + const hstring message = ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID) + .GetString(L"About_DeveloperModeEnabled"); Application::Current().as().RootPage().ShowToast(message); } } diff --git a/src/Magpie.App/AboutPage.xaml b/src/Magpie.App/AboutPage.xaml index b0d00cf1f..003b26b96 100644 --- a/src/Magpie.App/AboutPage.xaml +++ b/src/Magpie.App/AboutPage.xaml @@ -7,40 +7,39 @@ xmlns:muxc="using:Microsoft.UI.Xaml.Controls" mc:Ignorable="d"> - - - + + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + - + diff --git a/src/Magpie.App/AboutViewModel.cpp b/src/Magpie.App/AboutViewModel.cpp index 918116181..8a2f53645 100644 --- a/src/Magpie.App/AboutViewModel.cpp +++ b/src/Magpie.App/AboutViewModel.cpp @@ -8,6 +8,7 @@ #include "AppSettings.h" #include "StrUtils.h" #include "IconHelper.h" +#include "CommonSharedConstants.h" using namespace winrt; using namespace Windows::UI::Xaml::Media::Imaging; @@ -56,7 +57,8 @@ AboutViewModel::AboutViewModel() { } hstring AboutViewModel::Version() const noexcept { - ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView(); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); return hstring(StrUtils::Concat( resourceLoader.GetString(L"About_Version_Version"), #ifdef MAGPIE_VERSION_TAG @@ -97,7 +99,7 @@ bool AboutViewModel::IsCheckForPreviewUpdates() const noexcept { return AppSettings::Get().IsCheckForPreviewUpdates(); } -void AboutViewModel::IsCheckForPreviewUpdates(bool value) noexcept { +void AboutViewModel::IsCheckForPreviewUpdates(bool value) { AppSettings::Get().IsCheckForPreviewUpdates(value); _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsCheckForPreviewUpdates")); } @@ -115,7 +117,7 @@ bool AboutViewModel::IsAutoCheckForUpdates() const noexcept { return AppSettings::Get().IsAutoCheckForUpdates(); } -void AboutViewModel::IsAutoCheckForUpdates(bool value) noexcept { +void AboutViewModel::IsAutoCheckForUpdates(bool value) { AppSettings::Get().IsAutoCheckForUpdates(value); _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsAutoCheckForUpdates")); } @@ -132,7 +134,7 @@ bool AboutViewModel::IsErrorWhileChecking() const noexcept { return UpdateService::Get().Status() == UpdateStatus::ErrorWhileChecking; } -void AboutViewModel::IsErrorWhileChecking(bool value) noexcept { +void AboutViewModel::IsErrorWhileChecking(bool value) { if (!value) { UpdateService& service = UpdateService::Get(); if (service.Status() == UpdateStatus::ErrorWhileChecking) { @@ -147,7 +149,7 @@ bool AboutViewModel::IsNoUpdate() const noexcept { return UpdateService::Get().Status() == UpdateStatus::NoUpdate; } -void AboutViewModel::IsNoUpdate(bool value) noexcept { +void AboutViewModel::IsNoUpdate(bool value) const noexcept { if (!value) { UpdateService& service = UpdateService::Get(); if (service.Status() == UpdateStatus::NoUpdate) { @@ -180,7 +182,7 @@ bool AboutViewModel::IsUpdateCardOpen() const noexcept { return UpdateService::Get().Status() >= UpdateStatus::Available; } -void AboutViewModel::IsUpdateCardOpen(bool value) noexcept { +void AboutViewModel::IsUpdateCardOpen(bool value) { if (!value) { UpdateService& service = UpdateService::Get(); UpdateStatus status = service.Status(); @@ -208,7 +210,8 @@ hstring AboutViewModel::UpdateCardTitle() const noexcept { return {}; } - ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView(); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); hstring titleFmt = resourceLoader.GetString(L"Home_UpdateCard_Title"); return hstring(fmt::format(fmt::runtime(std::wstring_view(titleFmt)), updateService.Tag())); } diff --git a/src/Magpie.App/AboutViewModel.h b/src/Magpie.App/AboutViewModel.h index f050510d3..21de65c1b 100644 --- a/src/Magpie.App/AboutViewModel.h +++ b/src/Magpie.App/AboutViewModel.h @@ -27,22 +27,22 @@ struct AboutViewModel : AboutViewModelT { fire_and_forget CheckForUpdates(); bool IsCheckForPreviewUpdates() const noexcept; - void IsCheckForPreviewUpdates(bool value) noexcept; + void IsCheckForPreviewUpdates(bool value); bool IsCheckForUpdatesButtonEnabled() const noexcept; bool IsAutoCheckForUpdates() const noexcept; - void IsAutoCheckForUpdates(bool value) noexcept; + void IsAutoCheckForUpdates(bool value); bool IsAnyUpdateStatus() const noexcept; bool IsCheckingForUpdates() const noexcept; bool IsErrorWhileChecking() const noexcept; - void IsErrorWhileChecking(bool value) noexcept; + void IsErrorWhileChecking(bool value); bool IsNoUpdate() const noexcept; - void IsNoUpdate(bool value) noexcept; + void IsNoUpdate(bool value) const noexcept; bool IsAvailable() const noexcept; @@ -52,7 +52,7 @@ struct AboutViewModel : AboutViewModelT { bool IsInstalling() const noexcept; bool IsUpdateCardOpen() const noexcept; - void IsUpdateCardOpen(bool value) noexcept; + void IsUpdateCardOpen(bool value); bool IsUpdateCardClosable() const noexcept; bool IsCancelButtonVisible() const noexcept; diff --git a/src/Magpie.App/App.cpp b/src/Magpie.App/App.cpp index b59f8a02c..3c95a78d2 100644 --- a/src/Magpie.App/App.cpp +++ b/src/Magpie.App/App.cpp @@ -24,16 +24,26 @@ #include "ShortcutService.h" #include "AppSettings.h" #include "CommonSharedConstants.h" -#include "MagService.h" +#include "ScalingService.h" #include #include #include "EffectsService.h" #include "UpdateService.h" #include "LocalizationService.h" +#include "Logger.h" namespace winrt::Magpie::App::implementation { App::App() { + UnhandledException([this](IInspectable const&, UnhandledExceptionEventArgs const& e) { + Logger::Get().ComCritical("未处理的异常", e.Exception().value); + + if (IsDebuggerPresent()) { + hstring errorMessage = e.Message(); + __debugbreak(); + } + }); + EffectsService::Get().StartInitialize(); // 初始化 XAML 框架 @@ -89,40 +99,31 @@ StartUpOptions App::Initialize(int) { LocalizationService::Get().Initialize(); ShortcutService::Get().Initialize(); - MagService::Get().Initialize(); + ScalingService::Get().Initialize(); UpdateService::Get().Initialize(); return result; } void App::Uninitialize() { - MagService::Get().Uninitialize(); + ScalingService::Get().Uninitialize(); // 不显示托盘图标的情况下关闭主窗口仍会在后台驻留数秒,推测和 XAML Islands 有关 // 这里提前取消热键注册,这样关闭 Magpie 后立即重新打开不会注册热键失败 ShortcutService::Get().Uninitialize(); } -bool App::IsShowTrayIcon() const noexcept { - return AppSettings::Get().IsShowTrayIcon(); +bool App::IsShowNotifyIcon() const noexcept { + return AppSettings::Get().IsShowNotifyIcon(); } -event_token App::IsShowTrayIconChanged(EventHandler const& handler) { - return AppSettings::Get().IsShowTrayIconChanged([handler(handler)](bool value) { +event_token App::IsShowNotifyIconChanged(EventHandler const& handler) { + return AppSettings::Get().IsShowNotifyIconChanged([handler(handler)](bool value) { handler(nullptr, value); }); } -void App::IsShowTrayIconChanged(event_token const& token) { - AppSettings::Get().IsShowTrayIconChanged(token); -} - -void App::HwndMain(uint64_t value) noexcept { - if (_hwndMain == (HWND)value) { - return; - } - - _hwndMain = (HWND)value; - _hwndMainChangedEvent(*this, value); +void App::IsShowNotifyIconChanged(event_token const& token) { + AppSettings::Get().IsShowNotifyIconChanged(token); } void App::RootPage(Magpie::App::RootPage const& rootPage) noexcept { diff --git a/src/Magpie.App/App.h b/src/Magpie.App/App.h index 24e91fe1f..16e744039 100644 --- a/src/Magpie.App/App.h +++ b/src/Magpie.App/App.h @@ -1,11 +1,10 @@ #pragma once #include "App.g.h" -#include "App.base.h" #include namespace winrt::Magpie::App::implementation { -class App : public AppT2 { +class App : public App_base { public: App(); ~App(); @@ -18,24 +17,18 @@ class App : public AppT2 { void Uninitialize(); - bool IsShowTrayIcon() const noexcept; + bool IsShowNotifyIcon() const noexcept; - event_token IsShowTrayIconChanged(EventHandler const& handler); + event_token IsShowNotifyIconChanged(EventHandler const& handler); - void IsShowTrayIconChanged(event_token const& token); + void IsShowNotifyIconChanged(event_token const& token); uint64_t HwndMain() const noexcept { return (uint64_t)_hwndMain; } - void HwndMain(uint64_t value) noexcept; - - event_token HwndMainChanged(EventHandler const& handler) { - return _hwndMainChangedEvent.add(handler); - } - - void HwndMainChanged(event_token const& token) noexcept { - _hwndMainChangedEvent.remove(token); + void HwndMain(uint64_t value) noexcept { + _hwndMain = (HWND)value; } // 在由外部源引发的回调中可能返回 nullptr @@ -52,15 +45,37 @@ class App : public AppT2 { private: Hosting::WindowsXamlManager _windowsXamlManager{ nullptr }; + weak_ref _rootPage{ nullptr }; + HWND _hwndMain = NULL; + bool _isClosed = false; - HWND _hwndMain{}; - event> _hwndMainChangedEvent; + //////////////////////////////////////////////////// + // + // IXamlMetadataProvider 相关 + // + ///////////////////////////////////////////////////// +public: + Markup::IXamlType GetXamlType(Interop::TypeName const& type) { + return _AppProvider()->GetXamlType(type); + } - weak_ref _rootPage{ nullptr }; + Markup::IXamlType GetXamlType(hstring const& fullName) { + return _AppProvider()->GetXamlType(fullName); + } - event> _hostWndFocusChangedEvent; - bool _isHostWndFocused = false; - bool _isClosed = false; + com_array GetXmlnsDefinitions() { + return _AppProvider()->GetXmlnsDefinitions(); + } + +private: + com_ptr _AppProvider() { + if (!_appProvider) { + _appProvider = make_self(); + } + return _appProvider; + } + + com_ptr _appProvider; }; } diff --git a/src/Magpie.App/App.idl b/src/Magpie.App/App.idl index df6748bc2..9df1310f0 100644 --- a/src/Magpie.App/App.idl +++ b/src/Magpie.App/App.idl @@ -1,10 +1,21 @@ +namespace Magpie.App { + delegate void SignalDelegate(); +} + #include "BoolNegationConverter.idl" #include "BoolToNegativeVisibilityConverter.idl" +#include "ControlSizeTrigger.idl" +#include "IsEqualStateTrigger.idl" +#include "IsNullStateTrigger.idl" #include "LoggerHelper.idl" #include "TextBlockHelper.idl" +#include "SimpleStackPanel.idl" #include "WrapPanel.idl" +#include "CaptionButtonsControl.idl" +#include "TitleBarControl.idl" #include "PageFrame.idl" #include "SettingsCard.idl" +#include "SettingsExpander.idl" #include "SettingsGroup.idl" #include "KeyVisual.idl" #include "KeyVisualState.idl" @@ -26,8 +37,6 @@ #include "ScalingConfigurationPage.idl" #include "ProfilePage.idl" #include "SettingsPage.idl" -#include "CaptionButtonsControl.idl" -#include "TitleBarControl.idl" namespace Magpie.App { enum ShortcutAction { @@ -58,11 +67,10 @@ namespace Magpie.App { StartUpOptions Initialize(Int32 notUsed); void Uninitialize(); - Boolean IsShowTrayIcon { get; }; - event Windows.Foundation.EventHandler IsShowTrayIconChanged; + Boolean IsShowNotifyIcon { get; }; + event Windows.Foundation.EventHandler IsShowNotifyIconChanged; UInt64 HwndMain; - event Windows.Foundation.EventHandler HwndMainChanged; RootPage RootPage; diff --git a/src/Magpie.App/App.xaml b/src/Magpie.App/App.xaml index b8c040b81..4915ea450 100644 --- a/src/Magpie.App/App.xaml +++ b/src/Magpie.App/App.xaml @@ -5,667 +5,25 @@ xmlns:muxc="using:Microsoft.UI.Xaml.Controls"> + + + + + + + + - - - - 12 - - - - 2 - - + 2 + 32 14 @@ -678,57 +36,8 @@ - 6,16,16,16 - 16,0,0,0 - - - - - 0 - 56, 8, 40, 8 - - Transparent - - - - - 0,0,8,0 - - - - - - - - + 0,16,22,16 + 20,0,0,0 Firebrick 1 - - - - - - 2 - - - - - - - diff --git a/src/Magpie.App/AppSettings.cpp b/src/Magpie.App/AppSettings.cpp index 375aba1af..08a01d896 100644 --- a/src/Magpie.App/AppSettings.cpp +++ b/src/Magpie.App/AppSettings.cpp @@ -21,7 +21,7 @@ using namespace ::Magpie::Core; namespace winrt::Magpie::App { -static constexpr uint32_t SETTINGS_VERSION = 1; +static constexpr uint32_t SETTINGS_VERSION = 2; _AppSettingsData::_AppSettingsData() {} @@ -83,19 +83,20 @@ static void WriteProfile(rapidjson::PrettyWriter& write writer.Uint((uint32_t)profile.captureMethod); writer.Key("multiMonitorUsage"); writer.Uint((uint32_t)profile.multiMonitorUsage); + writer.Key("graphicsCard"); writer.Int(profile.graphicsCard); + writer.Key("frameRateLimiterEnabled"); + writer.Bool(profile.isFrameRateLimiterEnabled); + writer.Key("maxFrameRate"); + writer.Double(profile.maxFrameRate); writer.Key("disableWindowResizing"); - writer.Bool(profile.IsDisableWindowResizing()); + writer.Bool(profile.IsWindowResizingDisabled()); writer.Key("3DGameMode"); writer.Bool(profile.Is3DGameMode()); writer.Key("showFPS"); writer.Bool(profile.IsShowFPS()); - writer.Key("VSync"); - writer.Bool(profile.IsVSync()); - writer.Key("tripleBuffering"); - writer.Bool(profile.IsTripleBuffering()); writer.Key("captureTitleBar"); writer.Bool(profile.IsCaptureTitleBar()); writer.Key("adjustCursorSpeed"); @@ -103,7 +104,7 @@ static void WriteProfile(rapidjson::PrettyWriter& write writer.Key("drawCursor"); writer.Bool(profile.IsDrawCursor()); writer.Key("disableDirectFlip"); - writer.Bool(profile.IsDisableDirectFlip()); + writer.Bool(profile.IsDirectFlipDisabled()); writer.Key("cursorScaling"); writer.Uint((uint32_t)profile.cursorScaling); @@ -160,8 +161,10 @@ static HRESULT CALLBACK TaskDialogCallback( } static void ShowErrorMessage(const wchar_t* mainInstruction, const wchar_t* content) noexcept { - hstring errorStr = ResourceLoader::GetForCurrentView().GetString(L"AppSettings_Dialog_Error"); - hstring exitStr = ResourceLoader::GetForCurrentView().GetString(L"AppSettings_Dialog_Exit"); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); + const hstring errorStr = resourceLoader.GetString(L"AppSettings_Dialog_Error"); + const hstring exitStr = resourceLoader.GetString(L"AppSettings_Dialog_Exit"); TASKDIALOGCONFIG tdc{ sizeof(TASKDIALOGCONFIG) }; tdc.dwFlags = TDF_SIZE_TO_CONTENT; @@ -185,7 +188,8 @@ static bool ShowOkCancelWarningMessage( ) noexcept { TASKDIALOGCONFIG tdc{ sizeof(TASKDIALOGCONFIG) }; tdc.dwFlags = TDF_SIZE_TO_CONTENT; - hstring warningStr = ResourceLoader::GetForCurrentView().GetString(L"AppSettings_Dialog_Warning"); + const hstring warningStr = ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID) + .GetString(L"AppSettings_Dialog_Warning"); tdc.pszWindowTitle = warningStr.c_str(); tdc.pszMainIcon = TD_WARNING_ICON; tdc.pszMainInstruction = mainInstruction; @@ -205,7 +209,7 @@ static bool ShowOkCancelWarningMessage( AppSettings::~AppSettings() {} -bool AppSettings::Initialize() { +bool AppSettings::Initialize() noexcept { Logger& logger = Logger::Get(); // 若程序所在目录存在配置文件则为便携模式 @@ -228,7 +232,8 @@ bool AppSettings::Initialize() { std::string configText; if (!Win32Utils::ReadTextFile(_configPath.c_str(), configText)) { logger.Error("读取配置文件失败"); - ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView(); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); hstring title = resourceLoader.GetString(L"AppSettings_ErrorDialog_ReadFailed"); hstring content = resourceLoader.GetString(L"AppSettings_ErrorDialog_ConfigLocation"); ShowErrorMessage(title.c_str(), fmt::format(fmt::runtime(std::wstring_view(content)), _configPath).c_str()); @@ -247,7 +252,8 @@ bool AppSettings::Initialize() { doc.ParseInsitu(configText.data()); if (doc.HasParseError()) { Logger::Get().Error(fmt::format("解析配置失败\n\t错误码:{}", (int)doc.GetParseError())); - ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView(); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); hstring title = resourceLoader.GetString(L"AppSettings_ErrorDialog_NotValidJson"); hstring content = resourceLoader.GetString(L"AppSettings_ErrorDialog_ConfigLocation"); ShowErrorMessage(title.c_str(), fmt::format(fmt::runtime(std::wstring_view(content)), _configPath).c_str()); @@ -256,7 +262,8 @@ bool AppSettings::Initialize() { if (!doc.IsObject()) { Logger::Get().Error("配置文件根元素不是 Object"); - ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView(); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); hstring title = resourceLoader.GetString(L"AppSettings_ErrorDialog_ParseFailed"); hstring content = resourceLoader.GetString(L"AppSettings_ErrorDialog_ConfigLocation"); ShowErrorMessage(title.c_str(), fmt::format(fmt::runtime(std::wstring_view(content)), _configPath).c_str()); @@ -272,7 +279,8 @@ bool AppSettings::Initialize() { if (settingsVersion > SETTINGS_VERSION) { Logger::Get().Warn("未知的配置文件版本"); - ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView(); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); if (_isPortableMode) { hstring contentStr = resourceLoader.GetString( L"AppSettings_PortableModeUnkownConfiguration_Content"); @@ -312,12 +320,12 @@ bool AppSettings::Initialize() { return true; } -bool AppSettings::Save() { +bool AppSettings::Save() noexcept { _UpdateWindowPlacement(); return _Save(*this); } -fire_and_forget AppSettings::SaveAsync() { +fire_and_forget AppSettings::SaveAsync() noexcept { _UpdateWindowPlacement(); // 拷贝当前配置 @@ -327,7 +335,7 @@ fire_and_forget AppSettings::SaveAsync() { _Save(data); } -void AppSettings::IsPortableMode(bool value) { +void AppSettings::IsPortableMode(bool value) noexcept { if (_isPortableMode == value) { return; } @@ -405,10 +413,12 @@ void AppSettings::IsDeveloperMode(bool value) noexcept { if (!value) { // 关闭开发者模式则禁用所有开发者选项 _isDebugMode = false; - _isDisableEffectCache = false; - _isDisableFontCache = false; + _isEffectCacheDisabled = false; + _isFontCacheDisabled = false; _isSaveEffectSources = false; _isWarningsAreErrors = false; + _duplicateFrameDetectionMode = DuplicateFrameDetectionMode::Dynamic; + _isStatisticsForDynamicDetectionEnabled = false; } SaveAsync(); @@ -423,19 +433,19 @@ void AppSettings::IsAlwaysRunAsAdmin(bool value) noexcept { std::wstring arguments; if (AutoStartHelper::IsAutoStartEnabled(arguments)) { // 更新启动任务 - AutoStartHelper::EnableAutoStart(value, _isShowTrayIcon ? arguments.c_str() : nullptr); + AutoStartHelper::EnableAutoStart(value, _isShowNotifyIcon ? arguments.c_str() : nullptr); } SaveAsync(); } -void AppSettings::IsShowTrayIcon(bool value) noexcept { - if (_isShowTrayIcon == value) { +void AppSettings::IsShowNotifyIcon(bool value) noexcept { + if (_isShowNotifyIcon == value) { return; } - _isShowTrayIcon = value; - _isShowTrayIconChangedEvent(value); + _isShowNotifyIcon = value; + _isShowNotifyIconChangedEvent(value); SaveAsync(); } @@ -521,9 +531,9 @@ bool AppSettings::_Save(const _AppSettingsData& data) noexcept { writer.Key("debugMode"); writer.Bool(data._isDebugMode); writer.Key("disableEffectCache"); - writer.Bool(data._isDisableEffectCache); + writer.Bool(data._isEffectCacheDisabled); writer.Key("disableFontCache"); - writer.Bool(data._isDisableFontCache); + writer.Bool(data._isFontCacheDisabled); writer.Key("saveEffectSources"); writer.Bool(data._isSaveEffectSources); writer.Key("warningsAreErrors"); @@ -534,8 +544,8 @@ bool AppSettings::_Save(const _AppSettingsData& data) noexcept { writer.Bool(data._isSimulateExclusiveFullscreen); writer.Key("alwaysRunAsAdmin"); writer.Bool(data._isAlwaysRunAsAdmin); - writer.Key("showTrayIcon"); - writer.Bool(data._isShowTrayIcon); + writer.Key("showNotifyIcon"); + writer.Bool(data._isShowNotifyIcon); writer.Key("inlineParams"); writer.Bool(data._isInlineParams); writer.Key("autoCheckForUpdates"); @@ -544,23 +554,10 @@ bool AppSettings::_Save(const _AppSettingsData& data) noexcept { writer.Bool(data._isCheckForPreviewUpdates); writer.Key("updateCheckDate"); writer.Int64(data._updateCheckDate.time_since_epoch().count()); - - if (!data._downscalingEffect.name.empty()) { - writer.Key("downscalingEffect"); - writer.StartObject(); - writer.Key("name"); - writer.String(StrUtils::UTF16ToUTF8(data._downscalingEffect.name).c_str()); - if (!data._downscalingEffect.parameters.empty()) { - writer.Key("parameters"); - writer.StartObject(); - for (const auto& [name, value] : data._downscalingEffect.parameters) { - writer.Key(StrUtils::UTF16ToUTF8(name).c_str()); - writer.Double(value); - } - writer.EndObject(); - } - writer.EndObject(); - } + writer.Key("duplicateFrameDetectionMode"); + writer.Uint((uint32_t)data._duplicateFrameDetectionMode); + writer.Key("enableStatisticsForDynamicDetection"); + writer.Bool(data._isStatisticsForDynamicDetectionEnabled); ScalingModesService::Get().Export(writer); @@ -585,7 +582,7 @@ bool AppSettings::_Save(const _AppSettingsData& data) noexcept { } // 永远不会失败,遇到不合法的配置项时静默忽略 -void AppSettings::_LoadSettings(const rapidjson::GenericObject& root, uint32_t /*version*/) { +void AppSettings::_LoadSettings(const rapidjson::GenericObject& root, uint32_t /*version*/) noexcept { { std::wstring language; JsonHelper::ReadString(root, "language", language); @@ -690,8 +687,8 @@ void AppSettings::_LoadSettings(const rapidjson::GenericObjectvalue.IsObject()) { - auto downscalingEffectObj = downscalingEffectNode->value.GetObj(); - - JsonHelper::ReadString(downscalingEffectObj, "name", _downscalingEffect.name); - if (!_downscalingEffect.name.empty()) { - auto parametersNode = downscalingEffectObj.FindMember("parameters"); - if (parametersNode != downscalingEffectObj.MemberEnd() && parametersNode->value.IsObject()) { - auto paramsObj = parametersNode->value.GetObj(); - _downscalingEffect.parameters.reserve(paramsObj.MemberCount()); - for (const auto& param : paramsObj) { - if (!param.value.IsNumber()) { - continue; - } - - std::wstring name = StrUtils::UTF8ToUTF16(param.name.GetString()); - _downscalingEffect.parameters[name] = param.value.GetFloat(); - } - } + { + uint32_t duplicateFrameDetectionMode = (uint32_t)DuplicateFrameDetectionMode::Dynamic; + JsonHelper::ReadUInt(root, "duplicateFrameDetectionMode", duplicateFrameDetectionMode); + if (duplicateFrameDetectionMode > 2) { + duplicateFrameDetectionMode = (uint32_t)DuplicateFrameDetectionMode::Dynamic; } + _duplicateFrameDetectionMode = (::Magpie::Core::DuplicateFrameDetectionMode)duplicateFrameDetectionMode; } + JsonHelper::ReadBool(root, "enableStatisticsForDynamicDetection", _isStatisticsForDynamicDetectionEnabled); [[maybe_unused]] bool result = ScalingModesService::Get().Import(root, true); assert(result); @@ -774,7 +761,7 @@ bool AppSettings::_LoadProfile( const rapidjson::GenericObject& profileObj, Profile& profile, bool isDefault -) const { +) const noexcept { if (!isDefault) { if (!JsonHelper::ReadString(profileObj, "name", profile.name, true)) { return false; @@ -846,18 +833,22 @@ bool AppSettings::_LoadProfile( profile.graphicsCard = (int)graphicsAdater - 1; } - JsonHelper::ReadBoolFlag(profileObj, "disableWindowResizing", MagFlags::DisableWindowResizing, profile.flags); - JsonHelper::ReadBoolFlag(profileObj, "3DGameMode", MagFlags::Is3DGameMode, profile.flags); - JsonHelper::ReadBoolFlag(profileObj, "showFPS", MagFlags::ShowFPS, profile.flags); - JsonHelper::ReadBoolFlag(profileObj, "VSync", MagFlags::VSync, profile.flags); - JsonHelper::ReadBoolFlag(profileObj, "tripleBuffering", MagFlags::TripleBuffering, profile.flags); - if (!JsonHelper::ReadBoolFlag(profileObj, "captureTitleBar", MagFlags::CaptureTitleBar, profile.flags, true)) { + JsonHelper::ReadBool(profileObj, "frameRateLimiterEnabled", profile.isFrameRateLimiterEnabled); + JsonHelper::ReadFloat(profileObj, "maxFrameRate", profile.maxFrameRate); + if (profile.maxFrameRate < 10.0f || profile.maxFrameRate > 1000.0f) { + profile.maxFrameRate = 60.0f; + } + + JsonHelper::ReadBoolFlag(profileObj, "disableWindowResizing", ScalingFlags::DisableWindowResizing, profile.flags); + JsonHelper::ReadBoolFlag(profileObj, "3DGameMode", ScalingFlags::Is3DGameMode, profile.flags); + JsonHelper::ReadBoolFlag(profileObj, "showFPS", ScalingFlags::ShowFPS, profile.flags); + if (!JsonHelper::ReadBoolFlag(profileObj, "captureTitleBar", ScalingFlags::CaptureTitleBar, profile.flags, true)) { // v0.10.0-preview1 使用 reserveTitleBar - JsonHelper::ReadBoolFlag(profileObj, "reserveTitleBar", MagFlags::CaptureTitleBar, profile.flags); + JsonHelper::ReadBoolFlag(profileObj, "reserveTitleBar", ScalingFlags::CaptureTitleBar, profile.flags); } - JsonHelper::ReadBoolFlag(profileObj, "adjustCursorSpeed", MagFlags::AdjustCursorSpeed, profile.flags); - JsonHelper::ReadBoolFlag(profileObj, "drawCursor", MagFlags::DrawCursor, profile.flags); - JsonHelper::ReadBoolFlag(profileObj, "disableDirectFlip", MagFlags::DisableDirectFlip, profile.flags); + JsonHelper::ReadBoolFlag(profileObj, "adjustCursorSpeed", ScalingFlags::AdjustCursorSpeed, profile.flags); + JsonHelper::ReadBoolFlag(profileObj, "drawCursor", ScalingFlags::DrawCursor, profile.flags); + JsonHelper::ReadBoolFlag(profileObj, "disableDirectFlip", ScalingFlags::DisableDirectFlip, profile.flags); { uint32_t cursorScaling = (uint32_t)CursorScaling::NoScaling; @@ -904,7 +895,7 @@ bool AppSettings::_LoadProfile( return true; } -bool AppSettings::_SetDefaultShortcuts() { +bool AppSettings::_SetDefaultShortcuts() noexcept { bool changed = false; Shortcut& scaleShortcut = _shortcuts[(size_t)ShortcutAction::Scale]; @@ -928,7 +919,7 @@ bool AppSettings::_SetDefaultShortcuts() { return changed; } -void AppSettings::_SetDefaultScalingModes() { +void AppSettings::_SetDefaultScalingModes() noexcept { _scalingModes.resize(7); // Lanczos @@ -996,11 +987,6 @@ void AppSettings::_SetDefaultScalingModes() { nearest.scale = { 2.0f,2.0f }; } - // 降采样效果默认为 Bicubic (B=0, C=0.5) - _downscalingEffect.name = L"Bicubic"; - _downscalingEffect.parameters[L"paramB"] = 0.0f; - _downscalingEffect.parameters[L"paramC"] = 0.5f; - // 全局缩放模式默认为 Lanczos _defaultProfile.scalingMode = 0; } diff --git a/src/Magpie.App/AppSettings.h b/src/Magpie.App/AppSettings.h index 9eea0cf53..fb862c135 100644 --- a/src/Magpie.App/AppSettings.h +++ b/src/Magpie.App/AppSettings.h @@ -6,6 +6,7 @@ #include #include #include "Win32Utils.h" +#include namespace winrt::Magpie::App { @@ -23,7 +24,6 @@ struct _AppSettingsData { std::array _shortcuts; - ::Magpie::Core::DownscalingEffect _downscalingEffect; std::vector _scalingModes; Profile _defaultProfile; @@ -47,23 +47,27 @@ struct _AppSettingsData { // 上一次自动检查更新的日期 std::chrono::system_clock::time_point _updateCheckDate; + + ::Magpie::Core::DuplicateFrameDetectionMode _duplicateFrameDetectionMode = + ::Magpie::Core::DuplicateFrameDetectionMode::Dynamic; bool _isPortableMode = false; bool _isAlwaysRunAsAdmin = false; bool _isDeveloperMode = false; bool _isDebugMode = false; - bool _isDisableEffectCache = false; - bool _isDisableFontCache = false; + bool _isEffectCacheDisabled = false; + bool _isFontCacheDisabled = false; bool _isSaveEffectSources = false; bool _isWarningsAreErrors = false; bool _isAllowScalingMaximized = false; bool _isSimulateExclusiveFullscreen = false; bool _isInlineParams = false; - bool _isShowTrayIcon = true; + bool _isShowNotifyIcon = true; bool _isAutoRestore = false; bool _isMainWindowMaximized = false; bool _isAutoCheckForUpdates = true; bool _isCheckForPreviewUpdates = false; + bool _isStatisticsForDynamicDetectionEnabled = false; }; class AppSettings : private _AppSettingsData { @@ -75,11 +79,11 @@ class AppSettings : private _AppSettingsData { virtual ~AppSettings(); - bool Initialize(); + bool Initialize() noexcept; - bool Save(); + bool Save() noexcept; - fire_and_forget SaveAsync(); + fire_and_forget SaveAsync() noexcept; const std::wstring& ConfigDir() const noexcept { return _configDir; @@ -89,7 +93,7 @@ class AppSettings : private _AppSettingsData { return _isPortableMode; } - void IsPortableMode(bool value); + void IsPortableMode(bool value) noexcept; int Language() const noexcept { return _language; @@ -207,21 +211,21 @@ class AppSettings : private _AppSettingsData { SaveAsync(); } - bool IsDisableEffectCache() const noexcept { - return _isDisableEffectCache; + bool IsEffectCacheDisabled() const noexcept { + return _isEffectCacheDisabled; } - void IsDisableEffectCache(bool value) noexcept { - _isDisableEffectCache = value; + void IsEffectCacheDisabled(bool value) noexcept { + _isEffectCacheDisabled = value; SaveAsync(); } - bool IsDisableFontCache() const noexcept { - return _isDisableFontCache; + bool IsFontCacheDisabled() const noexcept { + return _isFontCacheDisabled; } - void IsDisableFontCache(bool value) noexcept { - _isDisableFontCache = value; + void IsFontCacheDisabled(bool value) noexcept { + _isFontCacheDisabled = value; SaveAsync(); } @@ -275,25 +279,25 @@ class AppSettings : private _AppSettingsData { void IsAlwaysRunAsAdmin(bool value) noexcept; - bool IsShowTrayIcon() const noexcept { - return _isShowTrayIcon; + bool IsShowNotifyIcon() const noexcept { + return _isShowNotifyIcon; } - void IsShowTrayIcon(bool value) noexcept; + void IsShowNotifyIcon(bool value) noexcept; - event_token IsShowTrayIconChanged(delegate const& handler) { - return _isShowTrayIconChangedEvent.add(handler); + event_token IsShowNotifyIconChanged(delegate const& handler) { + return _isShowNotifyIconChangedEvent.add(handler); } - WinRTUtils::EventRevoker IsShowTrayIconChanged(auto_revoke_t, delegate const& handler) { - event_token token = IsShowTrayIconChanged(handler); + WinRTUtils::EventRevoker IsShowNotifyIconChanged(auto_revoke_t, delegate const& handler) { + event_token token = IsShowNotifyIconChanged(handler); return WinRTUtils::EventRevoker([this, token]() { - IsShowTrayIconChanged(token); + IsShowNotifyIconChanged(token); }); } - void IsShowTrayIconChanged(event_token const& token) { - _isShowTrayIconChangedEvent.remove(token); + void IsShowNotifyIconChanged(event_token const& token) { + _isShowNotifyIconChangedEvent.remove(token); } bool IsInlineParams() const noexcept { @@ -305,10 +309,6 @@ class AppSettings : private _AppSettingsData { SaveAsync(); } - ::Magpie::Core::DownscalingEffect& DownscalingEffect() noexcept { - return _downscalingEffect; - } - std::vector& ScalingModes() noexcept { return _scalingModes; } @@ -355,6 +355,24 @@ class AppSettings : private _AppSettingsData { _updateCheckDate = value; } + ::Magpie::Core::DuplicateFrameDetectionMode DuplicateFrameDetectionMode() const noexcept { + return _duplicateFrameDetectionMode; + } + + void DuplicateFrameDetectionMode(::Magpie::Core::DuplicateFrameDetectionMode value) noexcept { + _duplicateFrameDetectionMode = value; + SaveAsync(); + } + + bool IsStatisticsForDynamicDetectionEnabled() const noexcept { + return _isStatisticsForDynamicDetectionEnabled; + } + + void IsStatisticsForDynamicDetectionEnabled(bool value) noexcept { + _isStatisticsForDynamicDetectionEnabled = value; + SaveAsync(); + } + private: AppSettings() = default; @@ -364,14 +382,14 @@ class AppSettings : private _AppSettingsData { void _UpdateWindowPlacement() noexcept; bool _Save(const _AppSettingsData& data) noexcept; - void _LoadSettings(const rapidjson::GenericObject& root, uint32_t version); + void _LoadSettings(const rapidjson::GenericObject& root, uint32_t version) noexcept; bool _LoadProfile( const rapidjson::GenericObject& profileObj, Profile& profile, bool isDefault = false - ) const; - bool _SetDefaultShortcuts(); - void _SetDefaultScalingModes(); + ) const noexcept; + bool _SetDefaultShortcuts() noexcept; + void _SetDefaultScalingModes() noexcept; void _UpdateConfigPath() noexcept; @@ -382,7 +400,7 @@ class AppSettings : private _AppSettingsData { event> _shortcutChangedEvent; event> _isAutoRestoreChangedEvent; event> _countdownSecondsChangedEvent; - event> _isShowTrayIconChangedEvent; + event> _isShowNotifyIconChangedEvent; event> _isAutoCheckForUpdatesChangedEvent; }; diff --git a/src/Magpie.App/Button.xaml b/src/Magpie.App/Button.xaml deleted file mode 100644 index f27f0b295..000000000 --- a/src/Magpie.App/Button.xaml +++ /dev/null @@ -1,72 +0,0 @@ - - - - - - - diff --git a/src/Magpie.App/CaptionButtonsControl.cpp b/src/Magpie.App/CaptionButtonsControl.cpp index a97a106bd..872f96530 100644 --- a/src/Magpie.App/CaptionButtonsControl.cpp +++ b/src/Magpie.App/CaptionButtonsControl.cpp @@ -6,7 +6,7 @@ namespace winrt::Magpie::App::implementation { -Size CaptionButtonsControl::CaptionButtonSize() const noexcept { +Size CaptionButtonsControl::CaptionButtonSize() const { ResourceDictionary resources = Resources(); return { (float)unbox_value(resources.Lookup(box_value(L"CaptionButtonWidth"))), @@ -64,8 +64,10 @@ void CaptionButtonsControl::ReleaseButton(CaptionButton button) { switch (_pressedButton.value()) { case CaptionButton::Minimize: + { PostMessage(hwndMain, WM_SYSCOMMAND, SC_MINIMIZE, 0); break; + } case CaptionButton::Maximize: { POINT cursorPos; @@ -80,9 +82,11 @@ void CaptionButtonsControl::ReleaseButton(CaptionButton button) { break; } case CaptionButton::Close: + { PostMessage(hwndMain, WM_SYSCOMMAND, SC_CLOSE, 0); break; } + } } _pressedButton.reset(); diff --git a/src/Magpie.App/CaptionButtonsControl.h b/src/Magpie.App/CaptionButtonsControl.h index ab7fc7f28..6d2686f8c 100644 --- a/src/Magpie.App/CaptionButtonsControl.h +++ b/src/Magpie.App/CaptionButtonsControl.h @@ -4,9 +4,7 @@ namespace winrt::Magpie::App::implementation { struct CaptionButtonsControl : CaptionButtonsControlT { - CaptionButtonsControl() {} - - Size CaptionButtonSize() const noexcept; + Size CaptionButtonSize() const; void HoverButton(CaptionButton button); diff --git a/src/Magpie.App/CaptionButtonsControl.idl b/src/Magpie.App/CaptionButtonsControl.idl index e3e14de69..036402ea8 100644 --- a/src/Magpie.App/CaptionButtonsControl.idl +++ b/src/Magpie.App/CaptionButtonsControl.idl @@ -5,7 +5,7 @@ namespace Magpie.App { Close = 20 // HTCLOSE }; - runtimeclass CaptionButtonsControl : Windows.UI.Xaml.Controls.StackPanel { + runtimeclass CaptionButtonsControl : Windows.UI.Xaml.Controls.UserControl { CaptionButtonsControl(); Windows.Foundation.Size CaptionButtonSize { get; }; diff --git a/src/Magpie.App/CaptionButtonsControl.xaml b/src/Magpie.App/CaptionButtonsControl.xaml index 134b2e473..37b0cb6e4 100644 --- a/src/Magpie.App/CaptionButtonsControl.xaml +++ b/src/Magpie.App/CaptionButtonsControl.xaml @@ -1,13 +1,12 @@ - - + + @@ -56,10 +55,7 @@ 46 32 - + - + - - - + + - + #C42B1C + + + + + + + + + diff --git a/src/Magpie.App/ControlSizeTrigger.cpp b/src/Magpie.App/ControlSizeTrigger.cpp new file mode 100644 index 000000000..5fb45afbc --- /dev/null +++ b/src/Magpie.App/ControlSizeTrigger.cpp @@ -0,0 +1,91 @@ +// 移植自 https://github.com/CommunityToolkit/Windows/blob/bef863ca70bb1edf8c940198dd5cc74afa5d2aab/components/Triggers/src/ControlSizeTrigger.cs + +#include "pch.h" +#include "ControlSizeTrigger.h" +#if __has_include("ControlSizeTrigger.g.cpp") +#include "ControlSizeTrigger.g.cpp" +#endif + +namespace winrt::Magpie::App::implementation { + +const DependencyProperty ControlSizeTrigger::_canTriggerProperty = DependencyProperty::Register( + L"CanTrigger", + xaml_typename(), + xaml_typename(), + PropertyMetadata(box_value(true), &ControlSizeTrigger::_OnPropertyChanged) +); + +const DependencyProperty ControlSizeTrigger::_maxWidthProperty = DependencyProperty::Register( + L"MaxWidth", + xaml_typename(), + xaml_typename(), + PropertyMetadata(box_value(std::numeric_limits::infinity()), &ControlSizeTrigger::_OnPropertyChanged) +); + +const DependencyProperty ControlSizeTrigger::_minWidthProperty = DependencyProperty::Register( + L"MinWidth", + xaml_typename(), + xaml_typename(), + PropertyMetadata(box_value(0.0), &ControlSizeTrigger::_OnPropertyChanged) +); + +const DependencyProperty ControlSizeTrigger::_maxHeightProperty = DependencyProperty::Register( + L"MaxHeight", + xaml_typename(), + xaml_typename(), + PropertyMetadata(box_value(std::numeric_limits::infinity()), &ControlSizeTrigger::_OnPropertyChanged) +); + +const DependencyProperty ControlSizeTrigger::_minHeightProperty = DependencyProperty::Register( + L"MinHeight", + xaml_typename(), + xaml_typename(), + PropertyMetadata(box_value(0.0), &ControlSizeTrigger::_OnPropertyChanged) +); + +const DependencyProperty ControlSizeTrigger::_targetElementProperty = DependencyProperty::Register( + L"TargetElement", + xaml_typename(), + xaml_typename(), + PropertyMetadata(nullptr, &ControlSizeTrigger::_OnTargetElementChanged) +); + +void ControlSizeTrigger::_OnPropertyChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&) { + get_self(sender.as())->_UpdateTrigger(); +} + +void ControlSizeTrigger::_OnTargetElementChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const& e) { + ControlSizeTrigger* that = get_self(sender.as()); + + that->_targetElementSizeChangedRevoker.revoke(); + + if (IInspectable newValue = e.NewValue()) { + that->_targetElementSizeChangedRevoker = newValue.as().SizeChanged(auto_revoke, + [that](IInspectable const&, SizeChangedEventArgs const&) { + that->_UpdateTrigger(); + } + ); + } + + that->_UpdateTrigger(); +} + +void ControlSizeTrigger::_UpdateTrigger() { + const FrameworkElement targetElement = TargetElement(); + + if (!targetElement || !CanTrigger()) { + SetActive(false); + return; + } + + const double actualWidth = targetElement.ActualWidth(); + const double actualHeight = targetElement.ActualHeight(); + SetActive( + actualWidth >= MinWidth() && + actualWidth < MaxWidth() && + actualHeight >= MinHeight() && + actualHeight < MaxHeight() + ); +} + +} diff --git a/src/Magpie.App/ControlSizeTrigger.h b/src/Magpie.App/ControlSizeTrigger.h new file mode 100644 index 000000000..4cf0e0943 --- /dev/null +++ b/src/Magpie.App/ControlSizeTrigger.h @@ -0,0 +1,102 @@ +#pragma once +#include "ControlSizeTrigger.g.h" + +namespace winrt::Magpie::App::implementation { + +struct ControlSizeTrigger : ControlSizeTriggerT { + bool CanTrigger() { + return GetValue(_canTriggerProperty).as(); + } + + void CanTrigger(bool value) { + SetValue(_canTriggerProperty, box_value(value)); + } + + double MaxWidth() { + return GetValue(_maxWidthProperty).as(); + } + + void MaxWidth(double value) { + SetValue(_maxWidthProperty, box_value(value)); + } + + double MinWidth() { + return GetValue(_minWidthProperty).as(); + } + + void MinWidth(double value) { + SetValue(_minWidthProperty, box_value(value)); + } + + double MaxHeight() { + return GetValue(_maxHeightProperty).as(); + } + + void MaxHeight(double value) { + SetValue(_maxHeightProperty, box_value(value)); + } + + double MinHeight() { + return GetValue(_minHeightProperty).as(); + } + + void MinHeight(double value) { + SetValue(_minHeightProperty, box_value(value)); + } + + FrameworkElement TargetElement() { + return GetValue(_targetElementProperty).as(); + } + + void TargetElement(FrameworkElement const& value) { + SetValue(_targetElementProperty, box_value(value)); + } + + static DependencyProperty CanTriggerProperty() { + return _canTriggerProperty; + } + + static DependencyProperty MaxWidthProperty() { + return _maxWidthProperty; + } + + static DependencyProperty MinWidthProperty() { + return _minWidthProperty; + } + + static DependencyProperty MaxHeightProperty() { + return _maxHeightProperty; + } + + static DependencyProperty MinHeightProperty() { + return _minHeightProperty; + } + + static DependencyProperty TargetElementProperty() { + return _targetElementProperty; + } + +private: + static const DependencyProperty _canTriggerProperty; + static const DependencyProperty _maxWidthProperty; + static const DependencyProperty _minWidthProperty; + static const DependencyProperty _maxHeightProperty; + static const DependencyProperty _minHeightProperty; + static const DependencyProperty _targetElementProperty; + + static void _OnPropertyChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&); + static void _OnTargetElementChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const& ); + + void _UpdateTrigger(); + + FrameworkElement::SizeChanged_revoker _targetElementSizeChangedRevoker; +}; + +} + +namespace winrt::Magpie::App::factory_implementation { + +struct ControlSizeTrigger : ControlSizeTriggerT { +}; + +} diff --git a/src/Magpie.App/ControlSizeTrigger.idl b/src/Magpie.App/ControlSizeTrigger.idl new file mode 100644 index 000000000..f4d9c0f63 --- /dev/null +++ b/src/Magpie.App/ControlSizeTrigger.idl @@ -0,0 +1,19 @@ +namespace Magpie.App { + runtimeclass ControlSizeTrigger : Windows.UI.Xaml.StateTriggerBase { + ControlSizeTrigger(); + + Boolean CanTrigger; + Double MaxWidth; + Double MinWidth; + Double MaxHeight; + Double MinHeight; + Windows.UI.Xaml.FrameworkElement TargetElement; + + static Windows.UI.Xaml.DependencyProperty CanTriggerProperty { get; }; + static Windows.UI.Xaml.DependencyProperty MaxWidthProperty { get; }; + static Windows.UI.Xaml.DependencyProperty MinWidthProperty { get; }; + static Windows.UI.Xaml.DependencyProperty MaxHeightProperty { get; }; + static Windows.UI.Xaml.DependencyProperty MinHeightProperty { get; }; + static Windows.UI.Xaml.DependencyProperty TargetElementProperty{ get; }; + } +} diff --git a/src/Magpie.App/EffectParametersViewModel.cpp b/src/Magpie.App/EffectParametersViewModel.cpp index 9e953a335..23f7bbc1e 100644 --- a/src/Magpie.App/EffectParametersViewModel.cpp +++ b/src/Magpie.App/EffectParametersViewModel.cpp @@ -59,17 +59,8 @@ static fire_and_forget LazySaveAppSettings() { EffectParametersViewModel::EffectParametersViewModel(uint32_t scalingModeIdx, uint32_t effectIdx) : _scalingModeIdx(scalingModeIdx), _effectIdx(effectIdx) { - std::wstring_view effectName; - if (_IsDefaultDownscalingEffect()) { - effectName = AppSettings::Get().DownscalingEffect().name; - if (effectName.empty()) { - return; - } - } else { - ScalingMode& scalingMode = ScalingModesService::Get().GetScalingMode(_scalingModeIdx); - effectName = scalingMode.effects[_effectIdx].name; - } - _effectInfo = EffectsService::Get().GetEffect(effectName); + ScalingMode& scalingMode = ScalingModesService::Get().GetScalingMode(_scalingModeIdx); + _effectInfo = EffectsService::Get().GetEffect(scalingMode.effects[_effectIdx].name); phmap::flat_hash_map& params = _Data(); @@ -139,7 +130,7 @@ void EffectParametersViewModel::_ScalingModeBoolParameter_PropertyChanged( } ScalingModeBoolParameter* boolParamImpl = - get_self(sender.as>()); + get_self(sender.as()); const std::string& effectName = _effectInfo->params[boolParamImpl->Index()].name; _Data()[StrUtils::UTF8ToUTF16(effectName)] = (float)boolParamImpl->Value(); @@ -155,7 +146,7 @@ void EffectParametersViewModel::_ScalingModeFloatParameter_PropertyChanged( } ScalingModeFloatParameter* floatParamImpl = - get_self(sender.as>()); + get_self(sender.as()); const std::string& effectName = _effectInfo->params[floatParamImpl->Index()].name; _Data()[StrUtils::UTF8ToUTF16(effectName)] = (float)floatParamImpl->Value(); @@ -163,10 +154,6 @@ void EffectParametersViewModel::_ScalingModeFloatParameter_PropertyChanged( } phmap::flat_hash_map& EffectParametersViewModel::_Data() { - if (_IsDefaultDownscalingEffect()) { - return AppSettings::Get().DownscalingEffect().parameters; - } - ScalingMode& scalingMode = ScalingModesService::Get().GetScalingMode(_scalingModeIdx); return scalingMode.effects[_effectIdx].parameters; } diff --git a/src/Magpie.App/EffectParametersViewModel.h b/src/Magpie.App/EffectParametersViewModel.h index 4bfcff531..7f69b9d91 100644 --- a/src/Magpie.App/EffectParametersViewModel.h +++ b/src/Magpie.App/EffectParametersViewModel.h @@ -151,10 +151,6 @@ struct EffectParametersViewModel : EffectParametersViewModelT& _Data(); - bool _IsDefaultDownscalingEffect() const noexcept { - return _scalingModeIdx == std::numeric_limits::max(); - } - IVector _boolParams{ nullptr }; IVector _floatParams{ nullptr }; diff --git a/src/Magpie.App/EffectsService.cpp b/src/Magpie.App/EffectsService.cpp index 509946633..92da838df 100644 --- a/src/Magpie.App/EffectsService.cpp +++ b/src/Magpie.App/EffectsService.cpp @@ -53,7 +53,8 @@ fire_and_forget EffectsService::StartInitialize() { std::vector effectNames; ListEffects(effectNames); - uint32_t nEffect = (uint32_t)effectNames.size(); + + const uint32_t nEffect = (uint32_t)effectNames.size(); std::vector descs(nEffect); Win32Utils::RunParallel([&](uint32_t id) { @@ -64,6 +65,7 @@ fire_and_forget EffectsService::StartInitialize() { }, nEffect); _effectsMap.reserve(nEffect); + for (uint32_t i = 0; i < nEffect; ++i) { EffectDesc& effectDesc = descs[i]; if (effectDesc.name.empty()) { @@ -89,12 +91,9 @@ fire_and_forget EffectsService::StartInitialize() { } effect.params = std::move(effectDesc.params); - if (effectDesc.outSizeExpr.first.empty()) { + if (effectDesc.GetOutputSizeExpr().first.empty()) { effect.flags |= EffectInfoFlags::CanScale; } - if (effectDesc.flags & EffectFlags::GenericDownscaler) { - effect.flags |= EffectInfoFlags::GenericDownscaler; - } _effectsMap.emplace(effect.name, (uint32_t)_effects.size() - 1); } diff --git a/src/Magpie.App/EffectsService.h b/src/Magpie.App/EffectsService.h index a72030265..720841063 100644 --- a/src/Magpie.App/EffectsService.h +++ b/src/Magpie.App/EffectsService.h @@ -8,8 +8,7 @@ struct EffectParameterDesc; namespace winrt::Magpie::App { struct EffectInfoFlags { - static constexpr const uint32_t CanScale = 0x1; - static constexpr const uint32_t GenericDownscaler = 0x2; + static constexpr const uint32_t CanScale = 1; }; struct EffectInfo { @@ -24,10 +23,6 @@ struct EffectInfo { bool CanScale() const noexcept { return flags & EffectInfoFlags::CanScale; } - - bool IsGenericDownscaler() const noexcept { - return flags & EffectInfoFlags::GenericDownscaler; - } }; class EffectsService { diff --git a/src/Magpie.App/HomePage.xaml b/src/Magpie.App/HomePage.xaml index 1b6d75600..42654cd1a 100644 --- a/src/Magpie.App/HomePage.xaml +++ b/src/Magpie.App/HomePage.xaml @@ -7,17 +7,17 @@ xmlns:muxc="using:Microsoft.UI.Xaml.Controls" mc:Ignorable="d"> - + - + - + - - - + + + - - - - - Scale - - - + + + + Scale + + - - + + - - - - - Overlay - - - + + + + Overlay + + - - - - - - - - - - - - - - - + + @@ -119,8 +117,7 @@ - + - + - + - - - - + + + + - - - - + + - - + + - - - - + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - + - - - - + + - + - - - - + + - - - - - - - - - + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + - - 0.5x - 0.75x - - 1.25x - 1.5x - 2x - - - - - - - - - - - - - - - - - - - + + 0.5x + 0.75x + + 1.25x + 1.5x + 2x + + + + + + + + + + + + + + + - + - - - - + + - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + - + diff --git a/src/Magpie.App/ProfileViewModel.cpp b/src/Magpie.App/ProfileViewModel.cpp index a9396b322..2e4827647 100644 --- a/src/Magpie.App/ProfileViewModel.cpp +++ b/src/Magpie.App/ProfileViewModel.cpp @@ -14,8 +14,9 @@ #include "Logger.h" #include "ScalingMode.h" #include -#include "MagService.h" +#include "ScalingService.h" #include "FileDialogHelper.h" +#include "CommonSharedConstants.h" using namespace winrt; using namespace Windows::Graphics::Display; @@ -93,7 +94,8 @@ ProfileViewModel::ProfileViewModel(int profileIdx) : _isDefaultProfile(profileId _LoadIcon(rootPage); } - ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView(); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); { std::vector scalingModes; scalingModes.push_back(box_value(resourceLoader.GetString(L"Profile_General_ScalingMode_None"))); @@ -158,7 +160,7 @@ fire_and_forget ProfileViewModel::OpenProgramLocation() const noexcept { Win32Utils::OpenFolderAndSelectFile(programLocation.c_str()); } -std::wstring GetStartFolderForSettingLauncher(const Profile& profile) noexcept { +static std::wstring GetStartFolderForSettingLauncher(const Profile& profile) noexcept { if (profile.launcherPath.empty()) { // 没有指定启动器 size_t delimPos = profile.pathRule.find_last_of(L'\\'); @@ -207,10 +209,12 @@ void ProfileViewModel::ChangeExeForLaunching() const noexcept { return; } - static std::wstring titleStr(ResourceLoader::GetForCurrentView().GetString(L"SelectLauncherDialog_Title")); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); + static std::wstring titleStr(resourceLoader.GetString(L"SelectLauncherDialog_Title")); fileDialog->SetTitle(titleStr.c_str()); - static std::wstring exeFileStr(ResourceLoader::GetForCurrentView().GetString(L"FileDialog_ExeFile")); + static std::wstring exeFileStr(resourceLoader.GetString(L"FileDialog_ExeFile")); const COMDLG_FILTERSPEC fileType{ exeFileStr.c_str(), L"*.exe"}; fileDialog->SetFileTypes(1, &fileType); fileDialog->SetDefaultExtension(L"exe"); @@ -249,7 +253,8 @@ void ProfileViewModel::ChangeExeForLaunching() const noexcept { hstring ProfileViewModel::Name() const noexcept { if (_data->name.empty()) { - return ResourceLoader::GetForCurrentView().GetString(L"Root_Defaults/Content"); + return ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID) + .GetString(L"Root_Defaults/Content"); } else { return hstring(_data->name); } @@ -404,9 +409,9 @@ int ProfileViewModel::ScalingMode() const noexcept { void ProfileViewModel::ScalingMode(int value) { _data->scalingMode = value - 1; - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"ScalingMode")); - AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"ScalingMode")); } int ProfileViewModel::CaptureMethod() const noexcept { @@ -448,13 +453,13 @@ void ProfileViewModel::IsAutoScale(bool value) { } _data->isAutoScale = value; - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsAutoScale")); - AppSettings::Get().SaveAsync(); + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsAutoScale")); + if (value) { // 立即检查前台窗口是否应自动缩放 - MagService::Get().CheckForeground(); + ScalingService::Get().CheckForeground(); } } @@ -468,9 +473,9 @@ void ProfileViewModel::Is3DGameMode(bool value) { } _data->Is3DGameMode(value); - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"Is3DGameMode")); - AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"Is3DGameMode")); } bool ProfileViewModel::HasMultipleMonitors() const noexcept { @@ -492,16 +497,17 @@ void ProfileViewModel::MultiMonitorUsage(int value) { } _data->multiMonitorUsage = multiMonitorUsage; - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"MultiMonitorUsage")); - AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"MultiMonitorUsage")); } IVector ProfileViewModel::GraphicsCards() const noexcept { std::vector graphicsCards; graphicsCards.reserve(_graphicsCards.size() + 1); - ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView(); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); hstring defaultStr = resourceLoader.GetString(L"Profile_General_CaptureMethod_Default"); graphicsCards.push_back(box_value(defaultStr)); @@ -527,73 +533,74 @@ void ProfileViewModel::GraphicsCard(int value) { } _data->graphicsCard = value; - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"GraphicsCard")); - AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"GraphicsCard")); } bool ProfileViewModel::IsShowGraphicsCardSettingsCard() const noexcept { return _graphicsCards.size() > 1; } -bool ProfileViewModel::IsShowFPS() const noexcept { - return _data->IsShowFPS(); +bool ProfileViewModel::IsFrameRateLimiterEnabled() const noexcept { + return _data->isFrameRateLimiterEnabled; } -void ProfileViewModel::IsShowFPS(bool value) { - if (_data->IsShowFPS() == value) { +void ProfileViewModel::IsFrameRateLimiterEnabled(bool value) { + if (_data->isFrameRateLimiterEnabled == value) { return; } - _data->IsShowFPS(value); - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsShowFPS")); - + _data->isFrameRateLimiterEnabled = value; AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsFrameRateLimiterEnabled")); } -bool ProfileViewModel::IsVSync() const noexcept { - return _data->IsVSync(); +double ProfileViewModel::MaxFrameRate() const noexcept { + return _data->maxFrameRate; } -void ProfileViewModel::IsVSync(bool value) { - if (_data->IsVSync() == value) { +void ProfileViewModel::MaxFrameRate(double value) { + if (_data->maxFrameRate == value) { return; } - _data->IsVSync(value); - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsVSync")); - + // 用户已清空数字框则重置为 60 + _data->maxFrameRate = std::isnan(value) ? 60.0f : (float)value; AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"MaxFrameRate")); } -bool ProfileViewModel::IsTripleBuffering() const noexcept { - return _data->IsTripleBuffering(); +bool ProfileViewModel::IsShowFPS() const noexcept { + return _data->IsShowFPS(); } -void ProfileViewModel::IsTripleBuffering(bool value) { - if (_data->IsTripleBuffering() == value) { +void ProfileViewModel::IsShowFPS(bool value) { + if (_data->IsShowFPS() == value) { return; } - _data->IsTripleBuffering(value); - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsTripleBuffering")); - + _data->IsShowFPS(value); AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsShowFPS")); } -bool ProfileViewModel::IsDisableWindowResizing() const noexcept { - return _data->IsDisableWindowResizing(); +bool ProfileViewModel::IsWindowResizingDisabled() const noexcept { + return _data->IsWindowResizingDisabled(); } -void ProfileViewModel::IsDisableWindowResizing(bool value) { - if (_data->IsDisableWindowResizing() == value) { +void ProfileViewModel::IsWindowResizingDisabled(bool value) { + if (_data->IsWindowResizingDisabled() == value) { return; } - _data->IsDisableWindowResizing(value); - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsDisableWindowResizing")); - + _data->IsWindowResizingDisabled(value); AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsWindowResizingDisabled")); } bool ProfileViewModel::IsCaptureTitleBar() const noexcept { @@ -606,9 +613,9 @@ void ProfileViewModel::IsCaptureTitleBar(bool value) { } _data->IsCaptureTitleBar(value); - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsCaptureTitleBar")); - AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsCaptureTitleBar")); } bool ProfileViewModel::CanCaptureTitleBar() const noexcept { @@ -626,9 +633,9 @@ void ProfileViewModel::IsCroppingEnabled(bool value) { } _data->isCroppingEnabled = value; - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsCroppingEnabled")); - AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsCroppingEnabled")); } double ProfileViewModel::CroppingLeft() const noexcept { @@ -641,9 +648,9 @@ void ProfileViewModel::CroppingLeft(double value) { } _data->cropping.Left = std::isnan(value) ? 0.0f : (float)value; - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"CroppingLeft")); - AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"CroppingLeft")); } double ProfileViewModel::CroppingTop() const noexcept { @@ -657,9 +664,9 @@ void ProfileViewModel::CroppingTop(double value) { // 用户已清空数字框则重置为 0 _data->cropping.Top = std::isnan(value) ? 0.0f : (float)value; - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"CroppingTop")); - AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"CroppingTop")); } double ProfileViewModel::CroppingRight() const noexcept { @@ -672,9 +679,9 @@ void ProfileViewModel::CroppingRight(double value) { } _data->cropping.Right = std::isnan(value) ? 0.0f : (float)value; - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"CroppingRight")); - AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"CroppingRight")); } double ProfileViewModel::CroppingBottom() const noexcept { @@ -687,9 +694,9 @@ void ProfileViewModel::CroppingBottom(double value) { } _data->cropping.Bottom = std::isnan(value) ? 0.0f : (float)value; - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"CroppingBottom")); - AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"CroppingBottom")); } bool ProfileViewModel::IsAdjustCursorSpeed() const noexcept { @@ -702,9 +709,9 @@ void ProfileViewModel::IsAdjustCursorSpeed(bool value) { } _data->IsAdjustCursorSpeed(value); - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsAdjustCursorSpeed")); - AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsAdjustCursorSpeed")); } bool ProfileViewModel::IsDrawCursor() const noexcept { @@ -717,9 +724,9 @@ void ProfileViewModel::IsDrawCursor(bool value) { } _data->IsDrawCursor(value); - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsDrawCursor")); - AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsDrawCursor")); } int ProfileViewModel::CursorScaling() const noexcept { @@ -737,9 +744,9 @@ void ProfileViewModel::CursorScaling(int value) { } _data->cursorScaling = cursorScaling; - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"CursorScaling")); - AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"CursorScaling")); } double ProfileViewModel::CustomCursorScaling() const noexcept { @@ -752,9 +759,9 @@ void ProfileViewModel::CustomCursorScaling(double value) { } _data->customCursorScaling = std::isnan(value) ? 1.0f : (float)value; - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"CustomCursorScaling")); - AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"CustomCursorScaling")); } int ProfileViewModel::CursorInterpolationMode() const noexcept { @@ -772,9 +779,9 @@ void ProfileViewModel::CursorInterpolationMode(int value) { } _data->cursorInterpolationMode = cursorInterpolationMode; - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"CursorInterpolationMode")); - AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"CursorInterpolationMode")); } hstring ProfileViewModel::LaunchParameters() const noexcept { @@ -785,33 +792,24 @@ void ProfileViewModel::LaunchParameters(const hstring& value) { std::wstring_view trimmed(value); StrUtils::Trim(trimmed); _data->launchParameters = trimmed; - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"LaunchParameters")); - AppSettings::Get().SaveAsync(); -} -void ProfileViewModel::IsEditingLaunchParameters(bool value) { - if (_isEditingLaunchParameters == value) { - return; - } - - _isEditingLaunchParameters = value; - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsEditingLaunchParameters")); + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"LaunchParameters")); } -bool ProfileViewModel::IsDisableDirectFlip() const noexcept { - return _data->IsDisableDirectFlip(); +bool ProfileViewModel::IsDirectFlipDisabled() const noexcept { + return _data->IsDirectFlipDisabled(); } -void ProfileViewModel::IsDisableDirectFlip(bool value) { - if (_data->IsDisableDirectFlip() == value) { +void ProfileViewModel::IsDirectFlipDisabled(bool value) { + if (_data->IsDirectFlipDisabled() == value) { return; } - _data->IsDisableDirectFlip(value); - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsDisableDirectFlip")); - + _data->IsDirectFlipDisabled(value); AppSettings::Get().SaveAsync(); + + _propertyChangedEvent(*this, PropertyChangedEventArgs(L"IsDirectFlipDisabled")); } fire_and_forget ProfileViewModel::_LoadIcon(FrameworkElement const& rootPage) { diff --git a/src/Magpie.App/ProfileViewModel.h b/src/Magpie.App/ProfileViewModel.h index 90a2745c1..78ce07879 100644 --- a/src/Magpie.App/ProfileViewModel.h +++ b/src/Magpie.App/ProfileViewModel.h @@ -94,17 +94,17 @@ struct ProfileViewModel : ProfileViewModelT { bool IsShowGraphicsCardSettingsCard() const noexcept; - bool IsShowFPS() const noexcept; - void IsShowFPS(bool value); + bool IsFrameRateLimiterEnabled() const noexcept; + void IsFrameRateLimiterEnabled(bool value); - bool IsVSync() const noexcept; - void IsVSync(bool value); + double MaxFrameRate() const noexcept; + void MaxFrameRate(double value); - bool IsTripleBuffering() const noexcept; - void IsTripleBuffering(bool value); + bool IsShowFPS() const noexcept; + void IsShowFPS(bool value); - bool IsDisableWindowResizing() const noexcept; - void IsDisableWindowResizing(bool value); + bool IsWindowResizingDisabled() const noexcept; + void IsWindowResizingDisabled(bool value); bool IsCaptureTitleBar() const noexcept; void IsCaptureTitleBar(bool value); @@ -144,14 +144,8 @@ struct ProfileViewModel : ProfileViewModelT { hstring LaunchParameters() const noexcept; void LaunchParameters(const hstring& value); - bool IsEditingLaunchParameters() const noexcept { - return _isEditingLaunchParameters; - } - - void IsEditingLaunchParameters(bool value); - - bool IsDisableDirectFlip() const noexcept; - void IsDisableDirectFlip(bool value); + bool IsDirectFlipDisabled() const noexcept; + void IsDirectFlipDisabled(bool value); private: fire_and_forget _LoadIcon(FrameworkElement const& rootPage); @@ -179,7 +173,6 @@ struct ProfileViewModel : ProfileViewModelT { const bool _isDefaultProfile = true; bool _isRenameConfirmButtonEnabled = false; - bool _isEditingLaunchParameters = false; }; } diff --git a/src/Magpie.App/ProfileViewModel.idl b/src/Magpie.App/ProfileViewModel.idl index f02dab9ce..5b2cf578b 100644 --- a/src/Magpie.App/ProfileViewModel.idl +++ b/src/Magpie.App/ProfileViewModel.idl @@ -42,9 +42,10 @@ namespace Magpie.App { Boolean IsShowGraphicsCardSettingsCard { get; }; Boolean IsShowFPS; - Boolean IsVSync; - Boolean IsTripleBuffering; - Boolean IsDisableWindowResizing; + Boolean IsFrameRateLimiterEnabled; + Double MaxFrameRate; + + Boolean IsWindowResizingDisabled; Boolean IsCaptureTitleBar; Boolean CanCaptureTitleBar { get; }; @@ -61,7 +62,6 @@ namespace Magpie.App { Int32 CursorInterpolationMode; String LaunchParameters; - Boolean IsEditingLaunchParameters; - Boolean IsDisableDirectFlip; + Boolean IsDirectFlipDisabled; } } diff --git a/src/Magpie.App/Resources.language-de.resw b/src/Magpie.App/Resources.language-de.resw index 213704590..1c296ab7e 100644 --- a/src/Magpie.App/Resources.language-de.resw +++ b/src/Magpie.App/Resources.language-de.resw @@ -58,7 +58,7 @@ System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 - + Aktivierung @@ -82,7 +82,7 @@ Automatisch skalieren, sobald das Fenster zum Vordergrund zurück kehrt - + Verzögertes skalieren @@ -142,22 +142,22 @@ Hell - + Automatisch wiederherstellen Skaliert nach {}s - + Verzögerung in Sekunden Skalierungs Konfiguration - + Erweitert - + Allgemein @@ -166,25 +166,19 @@ Dunkel - + Skalierung - - Wird automatisch angewendet, wenn das Ausgabebild zu groß ist, um auf dem Bildschirm anzuzeigen - - - Standard Downscaling Effekt - Weitere Optionen - + Skalierungsmodus Du brauchst Administrationsrechte, um diese Einstellung zu nutzen - + Immer als Administrator ausführen @@ -193,31 +187,31 @@ Kopiere von - + Sprache - + Magpie wird im Hintergrund weiterlaufen, nach dem das Fenster geschlossen wurde - + App in der Taskleiste anzeigen - + Design Regelmäßig nach Aktualisierungen suchen - + 3D Spielmodus - + Automatisch skalieren, wenn im Vordergrund - + Aufnahmemethode - + Bevorzugte Bildschirme @@ -229,7 +223,7 @@ Bildschirme, die vom Quellfenster durchschnitten werden - + Skalierungsmodus @@ -256,16 +250,16 @@ Umbenennen - + Grafikkarte Um diese Einstellung zu verwenden, müssen Sie die Option "Anwendung in der Taskleiste anzeigen" aktivieren - + Beim Starten in die Taskleiste minimieren - + FPS Anzeigen @@ -301,7 +295,7 @@ Die globale Konfigurationsdatei stammt von einer unbekannten Version und wird möglicherweise nicht korrekt geparst. - + Skalierungsfaktor @@ -319,7 +313,7 @@ Standard - + Quellfenster @@ -328,24 +322,21 @@ Windows Standard - + Portable mode - + Beim Starten öffnen - + Tastaturkürzel Skalierungs Tastaturkürzel - + Starten - - Allgemein - Exportieren @@ -397,9 +388,6 @@ Sind Sie sicher, dass Sie diesen Skalierungsmodus löschen wollen? - - keine - Aufwärts bewegen @@ -460,7 +448,7 @@ Einstellen des Skalierungsfaktors nach dem proportionalen Ausfüllen des Bildschirms - + Allgemein @@ -487,34 +475,25 @@ Keiner - + Leistung - - Erhöhe Latenzzeiten zur Verbesserung der Leistung - - - VSync - - + Feedback - + Diskussionen - + Einen Fehler melden - + Eine Funktion vorschlagen - - Version - - + Prüfen Sie auf Aktualisierungen der Vorabversion - + Automatisch nach Updates suchen @@ -544,22 +523,22 @@ Installieren - + Erweitert - + DirectFlip deaktivieren - + Mauszeiger - + Mauszeiger anzeigen - + Anpassen der Mauszeigergeschwindigkeit während der Skalierung - + Interpolationsalgorithmus @@ -571,46 +550,46 @@ Beschränkt auf Grafikerfassung und Desktopduplikation - + Titelleiste aufnehmen - + Benutzerdefiniertes Zuschneiden - + Unten - + Links px - + Rechts - + Oben - + Fenstergrößenänderung bei Skalierung deaktivieren Bringt eine kleine Leistungssteigerung. Allerdings müssen die Effekte jedes Mal neu kompiliert werden, wenn ihre Parameter geändert werden - + Effektparameter inline machen Benachrichtigungen und Pop-ups von bestimmten Anwendungen werden blockiert - + Bei Skalierung exklusiven Vollbild simulieren Diese Einstellungen sind nur für Entwicklung gedacht - + Entwickleroptionen @@ -637,36 +616,24 @@ Skalierungsmodi importieren - + Einstellungen aktualisieren - + Startparameter - - Bearbeiten - Profiler Aufnahmemethode - - VSync - Begrenzen Deckkraft - - Bildstatistiken - - - Umschalten auf Bildraten - Zu Effekten wechseln @@ -679,7 +646,7 @@ Schrift-Cache deaktivieren - + Skalieren von maximierten oder ausfüllenden Fenstern zulassen @@ -719,13 +686,13 @@ Konfigurationsdatei konnte nicht geparst werden - + Hauptfenster Konfigurationsdatei konnte nicht gelesen werden - + Schließen @@ -734,7 +701,7 @@ Damit die Änderung wirksam wird, ist ein Neustart erforderlich - + Anzeige im Spiel @@ -749,7 +716,7 @@ {} ist verfügbar - + Andere Links @@ -782,9 +749,6 @@ Freischalten - - Umschalten auf frame timings - Timings diff --git a/src/Magpie.App/Resources.language-en-US.resw b/src/Magpie.App/Resources.language-en-US.resw index a00988e5f..85fc3db78 100644 --- a/src/Magpie.App/Resources.language-en-US.resw +++ b/src/Magpie.App/Resources.language-en-US.resw @@ -120,13 +120,13 @@ About - + Activation Scale automatically when the window returns to foreground - + Auto restore @@ -147,7 +147,7 @@ Scale the foreground window when timer ends - + Delayed scaling @@ -156,7 +156,7 @@ Cancel - + Delay in seconds @@ -216,34 +216,34 @@ Scaling configuration - + Advanced - + General - + Language Windows default - + Portable mode Open configuration file location - + Run at startup - + Magpie will continue to run in the background after the main window is closed - + Display the app on the system tray - + Theme @@ -261,33 +261,24 @@ On - + Shortcuts - + In-game overlay In-game overlay shortcut - + Scale Scale shortcut - + Launch - - General - - - Automatically applied when output image is too large to fit on the screen - - - Default downscaling effect - Export @@ -300,13 +291,13 @@ More options - + Scaling modes You need to run as administrator to use this setting - + Always run as administrator @@ -324,7 +315,7 @@ {} is available - + Other links @@ -339,9 +330,6 @@ Github repository - - None - Parameters @@ -453,19 +441,19 @@ Set the scaling factor after filling the screen with proportional scaling - + General - + 3D game mode - + Auto scale when in foreground - + Capture method - + Preferred monitors @@ -477,7 +465,7 @@ Monitors intersected by the source window - + Scaling mode @@ -525,40 +513,31 @@ None - + Performance - + Graphics card - + Display FPS counter - - Allow extra latency to improve performance - - - VSync - - + Feedback - + Discussions - + Report a bug - + Request a feature - - Version - - + Check for preview updates - + Check for updates automatically @@ -597,22 +576,22 @@ Installing - + Advanced - + Disable DirectFlip - + Cursor - + Draw cursor - + Adjust cursor speed while scaled - + Interpolation algorithm @@ -621,7 +600,7 @@ Nearest-neighbor - + Scaling factor @@ -636,52 +615,52 @@ Default - + Source window Limited to Graphics Capture and Desktop Duplication - + Capture title bar - + Custom cropping - + Bottom - + Left px - + Right - + Top - + Disable window resizing while scaled Gives a small performance boost. However, effects must be recompiled each time their parameters are changed - + Make effect parameters inline Notifications and pop-ups from certain applications will be blocked - + Simulate exclusive fullscreen when scaling You need to turn on "Display the app on the system tray" to use this setting - + Minimize to system tray at startup @@ -711,7 +690,7 @@ These settings are for development use only - + Developer options @@ -742,10 +721,10 @@ Failed to read configuration file - + Exit - + Main window @@ -769,27 +748,21 @@ Import ScaleModels.json - + Update settings Version - + Launch parameters - - Edit - Profiler Capture method - - VSync - Lock @@ -799,15 +772,6 @@ Unlock - - Frame statistics - - - Switch to frame rates - - - Switch to frame timings - Timings @@ -823,7 +787,7 @@ Disable font cache - + Allow scaling maximized or fullscreen windows @@ -844,10 +808,37 @@ Scale the foreground window or stop scaling + + Frame rate limiter + + + Maximum frame rate + Commit Developer mode is enabled. + + Duplicate frame detection + + + Always + + + Dynamic + + + Never + + + Enable statistics for dynamic detection + + + Dynamic detection + + + Frame rate + \ No newline at end of file diff --git a/src/Magpie.App/Resources.language-es.resw b/src/Magpie.App/Resources.language-es.resw index 2c19c871a..26b8746a4 100644 --- a/src/Magpie.App/Resources.language-es.resw +++ b/src/Magpie.App/Resources.language-es.resw @@ -59,7 +59,7 @@ : using a System.ComponentModel.TypeConverter : and then encoded with base64 encoding. --> - + @@ -129,10 +129,10 @@ Abre una nueva ventana para crear un perfil - + Activación - + Restauración automática @@ -150,7 +150,7 @@ Escale la ventana de primer plano cuando finalice el temporizador - + Escalado retrasado @@ -159,7 +159,7 @@ Cancelar - + Espera en segundos @@ -201,16 +201,16 @@ Configuración de escalado - + Avanzado - + General - + Idioma - + Modo portable @@ -219,13 +219,13 @@ Abrir la ubicación del archivo de configuración - + Ejecutar en el arranque - + Magpie continuará ejecutándose en segundo plano después de cerrar la ventana principal - + Mostrar la aplicación en la bandeja del sistema @@ -240,24 +240,21 @@ Encendido - + Atajos - + Superposición en el juego - + Escalado Atajo para el escalado - + Lanzamiento - - General - Exportar @@ -267,10 +264,10 @@ Mas opciones - + Modos de escalado - + Ejecutar siempre como administrador @@ -351,13 +348,13 @@ Adaptar - + General - + Modo de juego 3D - + Escalar de manera automática cuando está en primer plano @@ -366,7 +363,7 @@ Monitores intersectados por la ventana de origen - + Modo de escalado @@ -405,28 +402,19 @@ Ninguno - + Rendimiento - + Tarjeta grafica - + Mostrar contador de FPS - - Permitir latencia adicional para mejorar el rendimiento - - - VSync - - + Reportar un error - - Versión - - + Buscar actualizaciones de vista previa @@ -453,22 +441,22 @@ Descargando - + Avanzado - + Deshabilitar DirectFlip - + Ajustar la velocidad del cursor mientras se escala - + Algoritmo de interpolación Bilineal - + Factor de escalado @@ -477,28 +465,28 @@ Por defecto - + Ventana de origen - + Recorte personalizado - + Abajo px - + Izquierda - + Arriba - + Hacer que los parámetros de efecto estén en línea - + Simule pantalla completa exclusiva al escalar @@ -534,10 +522,10 @@ Error al analizar el archivo de configuración - + Cerrar - + Ventana principal @@ -558,7 +546,7 @@ Importar ScaleModels.json - + Ajustes de actualizaciones @@ -582,7 +570,7 @@ Configuración de escalado - + Tema @@ -591,30 +579,21 @@ Atajo para la superposición en el juego - - Se aplica automáticamente cuando la imagen de salida es demasiado grande para caber en la pantalla - Mover hacia abajo Renombrar - - Efecto de reducción de escala predeterminado - Importar - + Revisar actualizaciones automaticamente Necesita ejecutar como administrador para usar esta configuración - - Ninguno - Está siendo utilizado por los siguientes perfiles: @@ -636,7 +615,7 @@ Escalado - + Otros enlaces @@ -672,13 +651,13 @@ Establecer el factor de escala después de llenar la pantalla con escala proporcional - + Método de captura Llena la pantalla, la imagen puede estirarse - + Monitores preferidos @@ -687,19 +666,19 @@ Reordenar - + Solicitar una característica - + Cursor Instalando - + Feedback - + Dibujar cursor @@ -711,13 +690,13 @@ Limitado a captura de gráficos y duplicación de escritorio - + Capturar barra de título El archivo de configuración global proviene de una versión desconocida y es posible que no se analice correctamente. - + Discusiones @@ -735,10 +714,10 @@ Debe activar "Mostrar la aplicación en la bandeja del sistema" para usar esta configuración - + Derecha - + Deshabilitar el cambio de tamaño de la ventana mientras se escala @@ -747,7 +726,7 @@ Se bloquearán las notificaciones y ventanas emergentes de ciertas aplicaciones - + Minimizar a la bandeja del sistema al inicio @@ -759,7 +738,7 @@ Continuar - + Opciones de desarrollador @@ -775,21 +754,15 @@ Es necesario reiniciar para que el cambio surta efecto - + Parámetros del lanzador - - Editar - Monitor de rendimiento Método de captura - - VSync - Bloquear @@ -799,15 +772,6 @@ Desbloquear - - Fotogramas - - - Cambiar a tasas de fotogramas - - - Cambiar a tiempos de fotograma - Tiempos @@ -829,7 +793,7 @@ Archivo ejecutable - + Permitir escalar ventanas maximizadas o de pantalla completa diff --git a/src/Magpie.App/Resources.language-fr.resw b/src/Magpie.App/Resources.language-fr.resw index 90c0b419a..87d05d7ad 100644 --- a/src/Magpie.App/Resources.language-fr.resw +++ b/src/Magpie.App/Resources.language-fr.resw @@ -1,6 +1,65 @@ - + + @@ -61,7 +120,7 @@ Remise a l'échelle automatique lors du retour de la fenêtre au premier plan - + Restaurer automatiquement @@ -106,18 +165,12 @@ Configuration de mise a l'échelle - + Afficher l'application sur la barre des taches - + Thème - - Général - - - Effet de réduction d'échelle par défaut - Importer ScaleModels.json @@ -130,31 +183,22 @@ Importer ScaleModels.json - + Mettre à jour les paramètres - + Paramètres de lancement Méthode de capture - - VSync - Opacitée Déverrouiller - - Statistiques d'image - - - Basculer vers les fréquences d'images - - + Activation @@ -169,7 +213,7 @@ A propos - + Délai en secondes @@ -178,7 +222,7 @@ Activer - + Mise à l'échelle retardée @@ -208,37 +252,34 @@ Ouvrez une nouvelle fenêtre pour créer un profil - - Passage à la synchronisation des trames - Nouveau profil Configuration de mise a l'échelle - + Avancée - + Général - + Langues Fenêtre par defaut - + Lancer au démarrage - + Magpie continuera à fonctionner en arrière-plan après la fermeture de la fenêtre principale - + Raccourcis - + Mode portable @@ -259,15 +300,12 @@ On - + Superposition en jeu - + Lancez - - Appliquer automatiquement lorsque l'image de sortie est trop grande pour l'écran - Importer @@ -277,7 +315,7 @@ Raccourci de superposition en jeu - + Echelle @@ -317,18 +355,15 @@ Impossible d'analyser le fichier de configuration - + Quitter - + Fenêtre principale Exporter les modes de mise à l'échelle - - Editer - Profiler @@ -353,13 +388,13 @@ Dépôt Github - + Mode de mise à l'échelle - + Le bas - + Mise à l'échelle automatique au premier plan @@ -374,7 +409,7 @@ Fichier exécutable - + Minimiser dans la barre d'état au démarrage @@ -395,7 +430,7 @@ Désactiver le cache d'effet - + Signaler un bug @@ -416,13 +451,13 @@ Ajouter effet - + Modes de mise à l'échelle Plus d'options - + Mode jeux 3D @@ -443,13 +478,13 @@ Vérifier périodiquement les mises à jour - + Barre de titre de la capture Monter - + Désactiver le redimensionnement de la fenêtre à la mise en échelle @@ -458,12 +493,9 @@ Identique à la fenêtre source - + Générale - - Version - Personnalisé @@ -473,10 +505,10 @@ Aucun - + Curseur - + Ajuster la vitesse du curseur lors de la mise à l'échelle @@ -485,7 +517,7 @@ Commit - + Demander une fonctionnalité @@ -494,10 +526,10 @@ Aucun - + Autres liens - + La droite @@ -521,10 +553,10 @@ Fermer - + Avancée - + Carte graphique @@ -542,7 +574,7 @@ Supprimer - + Algorithme d'interpolation @@ -554,9 +586,6 @@ Type - - Aucun - px @@ -566,13 +595,10 @@ Renommer - - VSync - - + La gauche - + Recadrage personnalisé @@ -587,7 +613,7 @@ Renommer - + Options développeur @@ -611,7 +637,7 @@ Monter - + Méthode de capture @@ -632,16 +658,13 @@ Licence - + Discussions Remplir - - Autoriser un temps de latence supplémentaire pour améliorer les performances - - + Fenêtre source @@ -653,10 +676,10 @@ Erreur - + Afficher le curseur - + Vérifier automatiquement les mises à jour @@ -677,16 +700,16 @@ Annuler - + Facteur de mise à l'échelle - + Feedback Réorganisation - + Écrans préférés @@ -695,7 +718,7 @@ Supprimer - + Performance @@ -710,13 +733,13 @@ Êtes-vous sûr de vouloir supprimer ce mode de mise à l'échelle ? - + Toujours exécuter en mode administrateur Il est utilisé par les profils suivants : - + Le haut @@ -746,7 +769,7 @@ Le fichier de configuration global provient d'une version inconnue et peut ne pas être analysé correctement. - + Désactiver DirectFlip @@ -758,13 +781,13 @@ Descendre - + Permettre la mise à l'échelle des fenêtres maximisées ou en plein écran - + Simuler l'exclusivité du plein écran lors de la mise à l'échelle - + Afficher le compteur de FPS @@ -785,10 +808,10 @@ Directives relatives aux contributions - + Vérifier les mises à jour prévue - + Rendre les paramètres d'effet en ligne \ No newline at end of file diff --git a/src/Magpie.App/Resources.language-hu.resw b/src/Magpie.App/Resources.language-hu.resw index 1389a9e23..646c3e39b 100644 --- a/src/Magpie.App/Resources.language-hu.resw +++ b/src/Magpie.App/Resources.language-hu.resw @@ -1,6 +1,65 @@ - + + @@ -61,7 +120,7 @@ Aktiválás - + Haladó @@ -88,7 +147,7 @@ Kilépés - + Automatikus visszaállítás @@ -97,7 +156,7 @@ Skálázás {}mp után - + Hiba jelentése @@ -113,15 +172,12 @@ Időzítések - + Aktiválás Sötét - - Verzió - Egyéni @@ -134,7 +190,7 @@ Folytatás - + Funkció kérése @@ -146,7 +202,7 @@ Nincs - + Egyéb linkek @@ -155,7 +211,7 @@ Kezdőlap - + Téma @@ -167,7 +223,7 @@ Ablak kiválasztása - + Nyelv @@ -182,19 +238,19 @@ Verzió - + Késleltetett méretezés Rólunk - + Késleltetés másodpercben Mentés - + Bal @@ -245,7 +301,7 @@ Új profil - + Beszélgetések @@ -266,7 +322,7 @@ Másolás innen - + Frissítések automatikus keresése @@ -275,7 +331,7 @@ Mégse - + Visszajelzés @@ -284,7 +340,7 @@ Folytatás - + Kísérleti frissítések keresése @@ -299,10 +355,10 @@ Figyelmeztetés - + Beállítások frissítése - + Méretarány diff --git a/src/Magpie.App/Resources.language-id.resw b/src/Magpie.App/Resources.language-id.resw index b840f9186..6c2c7b08b 100644 --- a/src/Magpie.App/Resources.language-id.resw +++ b/src/Magpie.App/Resources.language-id.resw @@ -1,6 +1,65 @@ - + + @@ -109,34 +168,34 @@ Konfigurasi penskalaan - + Lebih lanjut - + Umum - + Bahasa Bawaan Windows - + Mode portable Buka lokasi file konfigurasi - + Jalankan saat startup - + Magpie akan tetap berjalan di belakang saat window utama ditutup - + Munculkan app di system tray - + Tema @@ -154,33 +213,24 @@ Nyala - + Pintasan - + Overlay di game Pintasan overlay di game - + Skala Pintasan skala - + Jalankan - - Umum - - - Otomatis diterapkan ketika gambar ouput terlalu besar untuk muat di layar - - - Efek downscaling bawaan - Ekspor @@ -193,16 +243,16 @@ Opsi lainnya - + Mode penskalaan - + Selalu jalan sebagai administrator Anda harus jalan sebagai administrator untuk menggunakan pengaturan ini - + Aktivasi @@ -214,7 +264,7 @@ Beranda - + Tunda dalam detik @@ -235,7 +285,7 @@ {} tersedia - + Tautan lainnya @@ -250,9 +300,6 @@ Repositori Github - - Tidak ada - Parameter @@ -343,25 +390,25 @@ Atur faktor penskalaan setelah mengisi layar dengan penskalaan proporsional - + Umum - + Mode 3D game - + Skala otomatis ketika berada di latar depan - + Metode penangkapan - + Monitor preferensi Monitor yang berpotongan dengan window sumber - + Mode penskalaan @@ -409,7 +456,7 @@ Skalakan otomatis ketika jendela kembali ke latar depan - + Restore otomatis @@ -418,7 +465,7 @@ Skalakan window latar depan saat timer berakhir - + Tunda penskalaan @@ -466,40 +513,31 @@ Tidak ada - + Perfoma - + Kartu grafis - + Tampilkan penghitung FPS - - Izinkan latensi tambahan untuk meningkatkan performa - - - VSync - - + Umpan balik - + Diskusi - + Laporkan bug - + Minta fitur - - Versi - - + Cek pembaruan preview - + Cek pembaruan otomatis @@ -538,22 +576,22 @@ Menginstal - + Tingkat lanjut - + Matikan DirectFlip - + Kursor - + Gambar kursor - + Sesuaikan kecepatan kursor saat diskalakan - + Algoritma interpolasi @@ -562,7 +600,7 @@ Nearest-neighbor - + Faktor penskalaan @@ -577,46 +615,46 @@ Bawaan - + Window sumber Terbatas untuk Graphics Capture dan Desktop Duplication - + Tangkap title bar - + Cropping kustom - + Bawah - + Kiri px - + Kanan - + Atas - + Nonaktifkan pengubahan ukuran window saat diskalakan Memberikan sedikit peningkatan performa. Namun, efek harus dikompilasi ulang setiap kali parameternya diubah - + Membuat parameter efek sejajar Pemberitahuan dan pop-up dari aplikasi tertentu akan diblokir - + Mensimulasikan layar penuh eksklusif saat melakukan penskalaan @@ -625,7 +663,7 @@ Pengaturan ini hanya untuk penggunaan pengembangan - + Meminimalkan ke baki sistem di startup @@ -652,7 +690,7 @@ File konfigurasi lokal berasal dari versi yang tidak dikenal dan mungkin tidak diuraikan dengan benar. - + Pilihan pengembang diff --git a/src/Magpie.App/Resources.language-it.resw b/src/Magpie.App/Resources.language-it.resw index 439e9093d..69c4d4fe4 100644 --- a/src/Magpie.App/Resources.language-it.resw +++ b/src/Magpie.App/Resources.language-it.resw @@ -1,6 +1,65 @@ - + + @@ -70,10 +129,10 @@ Già in uso - + Attivazione - + Ripristino automatico @@ -91,13 +150,13 @@ Ridimensiona la finestra in primo piano allo scadere del timer - + Ridimensionamento ritardato Ridimensiona dopo {}s - + Ritardo in secondi @@ -139,36 +198,30 @@ Configurazione del ridimensionamento - + Avanzate Predefinita di Windows - + Modalità portabile - + Esegui all'avvio - + Mostra l'app nell'area di notifica Scorciatoia di ridimensionamento - + Ridimensionamento - + Avvia - - Generali - - - Applicato automaticamente quando l'immagine di output è troppo grande per entrare nello schermo - Esporta @@ -181,13 +234,13 @@ Altre opzioni - + Modalità di ridimensionamento Devi eseguire l'app come amministratore per usare questa impostazione - + Esegui sempre come amministratore @@ -205,18 +258,12 @@ {} è disponibile - + Altri link Linee guida per contribuire - - Effetto di ridimensionamento predefinito - - - Nessuno - Cancella @@ -304,16 +351,16 @@ Adatta - + Generale - + Modalità gioco 3D Imposta il fattore di ridimensionamento dopo aver riempito lo schermo con il ridimensionamento proporzionale - + Monitor preferito @@ -331,31 +378,22 @@ Elimina - - Consenti ulteriore latenza per migliorare le prestazioni - - - VSync - - + Feedback - + Discussioni - + Segnala un bug - + Richiedi una feature - - Versione - - + Controlla gli aggiornamenti in anteprima - + Controlla gli aggiornamenti automaticamente @@ -385,13 +423,13 @@ Installazione - + Mostra cursore - + Regola la velocità del cursore durante il ridimensionamento - + Algoritmo di interpolazione @@ -400,7 +438,7 @@ Vicino più ravvicinato - + Fattore di scala @@ -415,31 +453,31 @@ Predefinito - + Finestra di origine Limitato all'acquisizione grafica e alla duplicazione del desktop - + Cattura barra del titolo - + Ritaglio personalizzato - + Parte inferiore - + Sinistra pixel - + Destra - + Parte superiore @@ -448,7 +486,7 @@ Devi attivare "Visualizza l'app nell'area notifiche" per utilizzare questa impostazione - + Riduci a icona nella barra delle notifiche all'avvio @@ -457,13 +495,13 @@ Continua - + Mette i parametri dell'effetto in linea - + Simula lo schermo intero esclusivo durante il ridimensionamento - + Disabilita il ridimensionamento della finestra durante il ridimensionamento @@ -503,10 +541,10 @@ Impossibile leggere il file di configurazione - + Esci - + Finestra principale @@ -518,15 +556,12 @@ Importa ScaleModels.json - + Impostazioni aggiornamenti Versione - - Modifica - Metodo di cattura @@ -539,21 +574,9 @@ Esporta le modalità di ridimensionamento - - VSync - Blocca - - Statistiche dei frame - - - Passa alla frequenza dei fotogrammi - - - Passa ai tempi dei fotogrammi - Tempi @@ -593,7 +616,7 @@ Ridimensiona automaticamente quando la finestra torna in primo piano - + Generali @@ -605,10 +628,10 @@ Configurazione del ridimensionamento - + Lingua - + Tema @@ -617,10 +640,10 @@ Scuro - + Scorciatoie - + Magpie continuerà a funzionare in background dopo la chiusura della finestra principale @@ -635,7 +658,7 @@ Disattivato - + Sovrapposizione in-gioco @@ -671,16 +694,16 @@ Aggiungi effetto - + Performance - + Mostra il contatore FPS - + Ridimensionamento automatico quando in primo piano - + Metodo di cattura @@ -692,7 +715,7 @@ Apri il percorso del programma - + Modalità di ridimensionamento @@ -716,7 +739,7 @@ Sposta in alto - + Scheda grafica @@ -734,13 +757,13 @@ Riprova - + Avanzate - + Disabilita DirectFlip - + Cursore @@ -755,7 +778,7 @@ Attenzione - + Impostazioni sviluppatore @@ -767,7 +790,7 @@ Riavvia Magpie - + Parametri di avvio @@ -779,7 +802,7 @@ Opacità - + Consenti il ridimensionamento di finestre ingrandite o a schermo intero diff --git a/src/Magpie.App/Resources.language-ja.resw b/src/Magpie.App/Resources.language-ja.resw index 4400232cd..7f7d3212a 100644 --- a/src/Magpie.App/Resources.language-ja.resw +++ b/src/Magpie.App/Resources.language-ja.resw @@ -59,7 +59,7 @@ : using a System.ComponentModel.TypeConverter : and then encoded with base64 encoding. --> - + @@ -120,7 +120,7 @@ 現在のウィンドウ - + アクティベーション @@ -135,13 +135,13 @@ ホーム - + タイマー {}秒後にスケーリング - + カウントダウンタイム @@ -180,34 +180,34 @@ スケーリング設定 - + 高度な設定 - + 一般設定 - + 言語 Windowsデフォルト - + ポータブルモード 設定ファイルの場所を開きます - + スタートアップ時に実行 - + カーソル - + スケーリング時カーソル速度を調整する - + 補間アルゴリズム @@ -228,13 +228,13 @@ スケーリング設定 - + DirectFlipを無効にする - + カーソルを描く - + スケーリングウィンドウの復元 @@ -255,10 +255,10 @@ 作成 - + システムトレイにアプリを表示 - + テーマ @@ -276,21 +276,18 @@ オン - + ゲーム内オーバーレイ - + スケーリング スケーリングのショートカット - + 起動 - - デフォルトのダウンサンプリング効果 - エクスポート @@ -303,7 +300,7 @@ その他のオプション - + スケーリングモード @@ -321,7 +318,7 @@ {} が利用できます - + 関連リンク @@ -333,9 +330,6 @@ Githubリポジトリ - - なし - パラメータ @@ -402,7 +396,7 @@ 最適化 - + キャプチャ方式 @@ -414,7 +408,7 @@ ソースウィンドウがまたがっている全てのモニター - + スケーリングモード @@ -444,27 +438,18 @@ なし - + フレームレートを表示する - - 遅延を許容してパフォーマンスを向上させる - - - 垂直同期 - - + フィードバック - + バグ報告 - + 機能のリクエスト - - バージョン - 更新を確認しています @@ -498,43 +483,43 @@ インストール中 - + 高度な設定 デフォルト - + ソースウィンドウ - + タイトルバーをキャプチャ - + - + ピクセル - + - + 内部効果パラメーター - + スケーリング時に排他的な全画面表示をシミュレーションする - + スケーリング時のウィンドウサイズ変更を無効にする 特定アプリからの通知とポップアップをブロックできます - + 起動時、システムトレイに最小化 @@ -574,7 +559,7 @@ 設定ファイルの読み取りに失敗しました - + 終了 @@ -595,7 +580,7 @@ 古いバージョンのScaleModels.jsonをインポートする - + スケーリング倍率 @@ -604,10 +589,10 @@ ソースウィンドウと同じ - + カスタムクロップ - + @@ -622,25 +607,19 @@ グローバル設定ファイルは不明なバージョンのものであり、正しく解析されない可能性があります。 - + メインウィンドウを閉じた後も、Magpie は引き続きバックグラウンドで実行されます - + ショートカット ゲーム内オーバーレイのショートカット - - 一般設定 - - - 出力画像が画面に収まりきらない場合、自動的にこの効果が適用されます - この設定を使用するには、管理者として実行する必要があります - + 常に管理者として実行する @@ -691,16 +670,16 @@ モニタいっぱいに表示されます、縦横比は無視 - + 一般設定 - + 3Dゲームモード - + 最前面時自動スケーリング - + 優先モニター @@ -718,16 +697,16 @@ 確定 - + パフォーマンス 上へ移動 - + グラフィックスカード - + フォーラム @@ -742,7 +721,7 @@ これらの設定は、開発用としてのみ使用できます - + 開発者向けオプション @@ -751,7 +730,7 @@ 未知の効果 - + メインウィンドウ @@ -763,42 +742,30 @@ アップデートを確認する - + アップデートを自動的に確認する - + プレビューの更新を確認する - + アップデート設定 バージョン - - 編集 - - + 起動時のパラメータ キャプチャ方式 - - 垂直同期 - 不透明度 アンロック - - フレームレートに切替 - - - フレームタイミングに切替 - エフェクトに切替 @@ -811,9 +778,6 @@ ロック - - フレーム統計 - レンダリング時間 @@ -823,7 +787,7 @@ フォントキャッシュの無効化 - + 最大化またはフルスクリーン画面のスケーリングを許可 @@ -850,4 +814,4 @@ デベロッパーモードが有効。 - \ No newline at end of file + diff --git a/src/Magpie.App/Resources.language-ko.resw b/src/Magpie.App/Resources.language-ko.resw index 3293b81da..7e477ce6d 100644 --- a/src/Magpie.App/Resources.language-ko.resw +++ b/src/Magpie.App/Resources.language-ko.resw @@ -1,6 +1,65 @@ - + + @@ -58,22 +117,19 @@ System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 - + 포그라운드일 때 자동으로 스케일 - + 선호 모니터 삭제 - - 수직동기화 - - + 버그 제보 - + 스케일링 모드 @@ -121,13 +177,13 @@ 정보 - + 활성화 창이 포그라운드로 돌아올 때 자동으로 스케일 - + 자동 복원 @@ -148,7 +204,7 @@ 타이머가 끝날 때 포그라운드 창을 스케일 - + 타이머 @@ -157,7 +213,7 @@ 취소 - + 카운트다운(초) @@ -193,22 +249,22 @@ 스케일링 구성 - + 고급 - + 일반 - + 언어 - + 메인 창이 닫힌 후 Magpie가 백그라운드에서 계속 실행됩니다 - + 시스템 트레이에 앱 표시 - + 테마 @@ -226,30 +282,24 @@ 켜짐 - + 바로 가기 - + 인게임 오버레이 인게임 오버레이 바로 가기 - + 스케일 스케일 바로 가기 - + 실행 - - 일반 - - - 기본 다운스케일링 효과 - 내보내기 @@ -298,10 +348,10 @@ 비례 스케일링으로 화면을 채운 후에 스케일링 인수를 설정합니다 - + 일반 - + 3D 게임 모드 @@ -328,16 +378,16 @@ 없음 - + 성능 - + 그래픽 카드 - + FPS 카운터 표시 - + 피드백 @@ -349,10 +399,10 @@ 다운로드 및 설치 - + 스케일 된 동안 커서 속도 조정 - + 보간 알고리즘 @@ -361,7 +411,7 @@ 〈 쌍선형 - + 스케일링 인수 @@ -376,37 +426,37 @@ [그래픽 캡처]와 [데스크탑 복제] 한정 - + 제목 표시줄 캡처 - + 자르기 사용자 지정 - + 하단 px - + 오른쪽 - + 상단 - + 스케일 된 동안 창 크기 조절 사용 안 함 약간의 성능 증가를 가져옵니다. 하지만 매개변수가 변경될 때마다 효과를 재컴파일해야 합니다 - + 효과 매개변수를 인라인으로 만듦 특정 응용 프로그램에서의 알림 및 팝업이 차단됩니다 - + 스케일링 할 때 단독 전체 화면 시뮬레이션 @@ -415,7 +465,7 @@ 소스 창과 동일 - + 소스 창 @@ -433,18 +483,12 @@ 끝내기 - - 편집 - 프로파일러 캡처 방식 - - 수직동기화 - 잠금 @@ -466,7 +510,7 @@ 이름 바꾸기 - + 캡처 방식 @@ -502,12 +546,9 @@ 다음에서 복사 - + 포터블 모드 - - 없음 - 매개변수 @@ -517,7 +558,7 @@ 다음에서 복사 - + 부팅 시 실행 @@ -532,19 +573,19 @@ 구성 파일 위치 열기 - + 고급 ScaleModels.json 가져오기 - + 스케일링 모드 깃허브 저장소 - + 항상 관리자 권한으로 실행 @@ -563,7 +604,7 @@ 구성 파일 위치: {} - + 기타 링크 @@ -608,16 +649,13 @@ 이름 - + 개발자 옵션 너비(픽셀) - - 지연 시간이 늘어나지만 성능 향상 - - + 업데이트를 자동으로 확인 @@ -626,16 +664,13 @@ 업데이트 확인 - + 논의 - + 기능 제안 - - 버전 - - + 미리 보기 업데이트 확인 @@ -650,16 +685,16 @@ 릴리스 노트 - + 커서 - + 커서 그리기 다운로드 실패 - + DirectFlip 사용 안 함 @@ -674,13 +709,13 @@ 구성 파일 구문분석에 실패했습니다 - + 왼쪽 이 설정은 개발 전용입니다 - + 부팅 시 시스템 트레이로 최소화 @@ -710,13 +745,13 @@ 구성 파일 읽기에 실패했습니다 - + 끝내기 변경 사항을 적용하려면 다시 시작해야 합니다 - + 메인 창 @@ -731,40 +766,28 @@ 스케일링 모드 내보내기 - - 프레임 레이트로 전환 - 스케일링 모드 가져오기 ScaleModels.json 가져오기 - + 업데이트 설정 버전 - + 실행 매개변수 잠금 해제 - - 프레임 타이밍으로 전환 - - - 프레임 통계 - - - 출력 이미지가 너무 커서 화면에 맞지 않을 때 자동으로 적용됩니다(특징을 한눈에 이해할 수 있도록 원문에 없는 아이콘을 추가했습니다. 해당 번역은 다음 버전에 추가됩니다. 무거운 알고리즘일수록 각진 부분을 뭉개서 부드럽게 합니다. 원문은 알파벳순으로 정렬되어 있습니다.) - 폰트 캐시 비활성화 - + 최대화 또는 전체 화면 창을 스케일링 하도록 허용 diff --git a/src/Magpie.App/Resources.language-pt-BR.resw b/src/Magpie.App/Resources.language-pt-BR.resw index 512978da7..af6f4441b 100644 --- a/src/Magpie.App/Resources.language-pt-BR.resw +++ b/src/Magpie.App/Resources.language-pt-BR.resw @@ -1,6 +1,65 @@ - + + @@ -58,10 +117,10 @@ System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 - + Ativação - + Restaurar automaticamente @@ -73,10 +132,10 @@ Janela atual: - + Redimensionamento com atraso - + Tempo de atraso em segundos @@ -112,22 +171,22 @@ Configuração de redimensionamento - + Avançado - + Idioma - + Modo "portable" Abrir local do arquivo de configuração - + Executar na inicialização - + Exibir o aplicativo na área de notificação @@ -139,10 +198,10 @@ Desativado - + Sobreposição em jogo - + Redimensionar @@ -151,9 +210,6 @@ Ativado - - Efeito de redimensionamento padrão - Exportar @@ -169,7 +225,7 @@ Registro de alterações - + Outros links @@ -178,9 +234,6 @@ Repositório no GitHub - - Nenhum - Parâmetros @@ -235,19 +288,19 @@ Definir o fator de redimensionamento após preencher a tela com escalonamento proporcional - + Redimensionar automaticamente quando em primeiro plano - + Método de captura - + Monitores preferidos Monitor mais próximo da janela de origem - + Modo de redimensionamento @@ -280,34 +333,28 @@ Nenhum - + Desempenho - + Placa de vídeo - + Exibir contador de FPS - - Permitir latência extra em troca de melhor desempenho - - + Feedback - + Discussões - + Reportar um bug - + Solicitar um novo recurso - - Versão - - + Verificar atualizações de versão de teste @@ -334,13 +381,13 @@ Instalando - + Avançado - + Desativar DirectFlip - + Cursor do mouse @@ -355,7 +402,7 @@ Redimensionar automaticamente quando a janela voltar ao primeiro plano - + O Magpie continuará em execução em segundo plano após o fechamento da janela principal @@ -385,7 +432,7 @@ Atalho inválido - + Tema @@ -394,37 +441,31 @@ Perfis - + Geral Padrão do Windows - + Atalhos - + Inicialização - - Geral - - - Aplicado automaticamente quando a imagem de saída é muito grande para caber na tela - Importar ScaleModels.json Mais opções - + Modos de redimensionamento É necessário executar como administrador para utilizar essa configuração - + Executar sempre como administrador @@ -478,7 +519,7 @@ Novo modo de redimensionamento - + Geral @@ -496,7 +537,7 @@ Todos os monitores - + Procurar por atualizações automaticamente @@ -517,9 +558,6 @@ Mover para baixo - - VSync - Falha ao procurar por atualizações, tente novamente mais tarde @@ -532,37 +570,31 @@ Falha no download - + Exibir cursor Bilinear - + Modificar a velocidade do cursor ao ser redimensionado - + Algoritmo de interpolação Desfixar - - VSync - Desativar cache de fonte - - Alternar para taxas de quadros - Notificações e pop-ups de determinados aplicativos serão bloqueados - + Minimizar para a área de notificação na inicialização - + Simular tela cheia exclusiva durante o redimensionamento @@ -598,7 +630,7 @@ Importar modos de redimensionamento - + Configurações de Atualização @@ -607,28 +639,19 @@ Versão - + Parâmetros de inicialização - - Editar - Método de captura Fixar - - Estatísticas de quadros - - - Alternar para tempos de quadros - Sem redimensionamento - + Fator de redimensionamento @@ -643,10 +666,10 @@ Desativar cache de efeito - + Capturar barra de título - + Janela de origem @@ -655,22 +678,22 @@ Limitado à Captura Gráfica e Duplicação de Área de Trabalho - + Corte personalizado - + Direita - + Parte superior - + Desativar redimensionamento da janela durante o escalonamento Falha ao analisar o arquivo de configuração - + Janela principal @@ -688,7 +711,7 @@ Absoluto - + Modo de jogo 3D @@ -697,10 +720,10 @@ Personalizado - + Parte inferior - + Esquerda @@ -709,7 +732,7 @@ Oferece um pequeno ganho no desempenho. No entanto, os efeitos devem ser recompilados sempre que seus parâmetros são alterados - + Incorporar os parâmetros do efeito @@ -727,7 +750,7 @@ Essas configurações são exclusivas para desenvolvimento - + Opções do desenvolvedor @@ -749,7 +772,7 @@ Falha ao ler o arquivo de configuração - + Sair @@ -764,7 +787,7 @@ Alternar para passos - + Permitir redimensionamento de janelas maximizadas ou em tela cheia diff --git a/src/Magpie.App/Resources.language-ru.resw b/src/Magpie.App/Resources.language-ru.resw index c5873e3e6..9ce31e493 100644 --- a/src/Magpie.App/Resources.language-ru.resw +++ b/src/Magpie.App/Resources.language-ru.resw @@ -59,7 +59,7 @@ : using a System.ComponentModel.TypeConverter : and then encoded with base64 encoding. --> - + @@ -120,15 +120,12 @@ О программе - + Активация Очистить - - Ничего - Параметры @@ -153,13 +150,13 @@ Установка - + Расширенные - + Курсор - + Множитель масштабирования @@ -168,10 +165,10 @@ Не удалось прочитать файл настроек - + Выйти - + Главное окно @@ -183,7 +180,7 @@ Автоматическое масштабирование при возвращении окна на передний план - + Автоматическое восстановление @@ -201,7 +198,7 @@ Масштабирование переднего окна после задержки - + Отложенное масштабирование @@ -210,7 +207,7 @@ Отмена - + Задержка в секундах @@ -270,34 +267,34 @@ Настройки масштабирования - + Расширенные - + Общие - + Язык Windows (по умолчанию) - + Портативный режим Открыть расположение файла настроек - + Запускать при включении системы - + Magpie продолжит работать в фоне после закрытия главного окна - + Отображение приложения на панели задач - + Тема @@ -309,33 +306,24 @@ Светлая - + Горячие клавиши - + Внутриигровой оверлей Клавиша внутриигрового оверлея - + Масштабировать Клавиша масштабирования - + Запуск - - Общие - - - Автоматически применяется, когда выходное изображение слишком велико, чтобы уместиться на экране - - - Эффект уменьшения масштаба по умолчанию - Экспортировать @@ -348,13 +336,13 @@ Больше настроек - + Режимы масштабирования Для использования этой настройки нужно запустить приложение от имени администратора - + Всегда запускать приложение от имени администратора @@ -372,7 +360,7 @@ {} доступно - + Другие ссылки @@ -480,31 +468,28 @@ Выставить множитель масштабирования после заполнения экрана пропорциональным масштабированием - + Общие - + Режим 3D игры - + Автоматическое масштабирование когда на переднем плане - + Способ захвата - + Предпочтительные мониторы Ближайший монитор к исходному окну - - Разрешить дополнительную задержку для повышения производительности - Пересекаемые исходным окном мониторы - + Режим масштабирования @@ -552,37 +537,31 @@ Ничего - + Производительность - + Видеокарта - + Отображать счётчик FPS - - Вертикальная синхронизация - - + Обратная связь - + Обсуждения - + Сообщить об ошибке - + Запросить функцию - - Версия - - + Проверить наличие обновлений предварительного просмотра - + Автоматически проверять наличие обновлений @@ -618,16 +597,16 @@ Скачивание - + Отключить DirectFlip - + Отображать курсор - + Подогнать скорость курсора при масштабировании - + Алгоритм интерполяции @@ -645,52 +624,52 @@ По умолчанию - + Исходное окно Ограничено захватом графики и дупликации рабочего стола - + Заголовок захвата - + Пользовательская обрезка - + Снизу - + Слева пкс. - + Справа - + Сверху - + Выключить изменение размера окна при масштабировании Даёт небольшой прирост производительности. Однако эффекты должны быть перекомпилированы при каждом изменении их параметров - + Сделать параметры эффекта встроенными Уведомления и всплывающие окна некоторых приложений будут заблокированы - + Симуляция эксклюзивного полного экрана при масштабировании Для использования этой настройки, включите "Отображать приложение в панели задач" - + Сворачивать на панель задач при запуске @@ -720,7 +699,7 @@ Эти настройки предназначены только для разработчиков - + Настройки разработчика @@ -772,13 +751,10 @@ Версия - + Обновить настройки - - Изменить - - + Параметры запуска @@ -790,21 +766,9 @@ Способ захвата - - Вертикальная синхронизация - Заблокировать - - Статистика кадра - - - Переключиться на кадровую частоту - - - Переключиться на тайминги кадра - Тайминги @@ -823,7 +787,7 @@ Выключить кэш шрифтов - + Разрешить масштабирование развёрнутых или полноэкранных окон diff --git a/src/Magpie.App/Resources.language-tr.resw b/src/Magpie.App/Resources.language-tr.resw index aee7b828a..905df3d99 100644 --- a/src/Magpie.App/Resources.language-tr.resw +++ b/src/Magpie.App/Resources.language-tr.resw @@ -59,7 +59,7 @@ : using a System.ComponentModel.TypeConverter : and then encoded with base64 encoding. --> - + @@ -126,10 +126,10 @@ Tamam - + Gelişmiş - + Tercih edilen monitörler @@ -144,7 +144,7 @@ Adlandır - + Grafik kartı @@ -165,19 +165,16 @@ Özel - + Sol - + Sağ - - VSync dikey eşitleyici - - + Aktivasyon - + Otomatik geri yükle @@ -195,7 +192,7 @@ Zamanlayıcı sona erdiğinde ön plandaki pencereyi ölçeklendir - + Gecikmeli ölçek @@ -204,7 +201,7 @@ İptal - + Gecikme saniyesi @@ -252,31 +249,31 @@ Ölçek yapılandırması - + Gelişmiş - + Genel - + Dil Windows varsayılanı - + Taşınabilir kip Yapılandırma dosyası konumunu aç - + Başlangıçta çalıştır - + Uygulamayı sistem tepsisinde görüntüle - + Tema @@ -300,7 +297,7 @@ Başlangıç - + Oyun içi arayüz @@ -309,15 +306,9 @@ Ölçek kısayolu - + Başlat - - Genel - - - Varsayılan ölçek küçültme efekti - Dışarı aktar @@ -330,13 +321,13 @@ Diğer seçenekler - + Ölçek kipleri Bu ayarı kullanmak için yönetici olarak çalıştırmak gerekir - + Her zaman yönetici olarak çalıştır @@ -354,7 +345,7 @@ {} mevcut - + Diğer bağlantılar @@ -459,16 +450,16 @@ Sığdır - + Genel - + 3D oyun kipi - + Ön planda otomatik ölçeklendir - + Yakalama yöntemi @@ -477,7 +468,7 @@ Kaynak pencereyle kesişen monitör - + Ölçek kipi @@ -495,7 +486,7 @@ Yok - + Performans @@ -507,27 +498,21 @@ Sırala - + FPS sayacını görüntüle - - Performansı artırmak için fazladan gecikmeye izin ver - - + Geri bildirim - + Tartışmalar - + Bir hata bildir - + Özellik iste - - Sürüm - Güncellemeler kontrol edilemedi, daha sonra tekrar deneyin @@ -558,19 +543,19 @@ Kuruluyor - + DirectFlip devre dışı bırak - + İmleç - + İmleci göster - + Ölçek sonrası imleç hızını ayarla - + İnterpolasyon algoritması @@ -579,7 +564,7 @@ En yakın - + Ölçek etkeni @@ -591,46 +576,46 @@ Varsayılan - + Kaynak pencere Grafik Yakalama ve Masaüstü Çoğaltma ile Sınırlı - + Başlık çubuğunu yakala - + Özel kırpma - + Alt pk - + Üst - + Ölçeklendirme sonrası pencereyi yeniden boyutlandırma Küçük bir performans artışı sağlar. Ancak, parametreleri her değiştirildiğinde efektlerin yeniden derlenmesi gerekir - + Efekt parametrelerini satır içi yap Belirli uygulamalardan gelen bildirimler ve açılır pencereler engellenir - + Ölçekleme sırasında orjinal tam ekranı taklit edin Bu ayarı kullanmak için "Uygulamayı sistem tepsisinde görüntüle" özelliğini açmanız gerekir - + Başlangıçta sistem tepsisine küçült @@ -660,7 +645,7 @@ Bu ayarlar yalnızca geliştirme amaçlı kullanım içindir - + Geliştirici seçenekleri @@ -691,10 +676,10 @@ Yapılandırma dosyası okunamadı - + Çıkış - + Ana pencere @@ -733,18 +718,15 @@ Ölçek yapılandırması - + Magpie, ana pencere kapatıldıktan sonra arka planda çalışmaya devam edecek - + Kısayollar - + Ölçek - - Çıktı görüntüsü ekrana sığmayacak kadar büyük olduğunda otomatik uygulanır - Katkı kuralları @@ -754,33 +736,27 @@ Ölçek - - Yok - Yeni ölçek kipi Güncellemeleri kontrol et - + Güncellemeleri otomatik kontrol et - + Önizleme güncellemelerini kontrol et - + Güncelleme ayarları Sürüm - + Başlatma parametreleri - - Düzenle - Profil oluştur @@ -793,21 +769,12 @@ Kilitle - - Kare istatistikleri - Saydamlık Kilidi aç - - Kare hızlarını değiştir - - - Kare gecikmesini değiştir - Zamanlamalar @@ -817,13 +784,10 @@ Efektleri değiştir - - VSync Eşitleyici - Yazı tipi önbelleği etkisizleştir - + Ekranı kaplayan veya tam ekran pencere ölçeğine izin ver diff --git a/src/Magpie.App/Resources.language-uk.resw b/src/Magpie.App/Resources.language-uk.resw index 02223dde3..c2e25b91e 100644 --- a/src/Magpie.App/Resources.language-uk.resw +++ b/src/Magpie.App/Resources.language-uk.resw @@ -59,7 +59,7 @@ : using a System.ComponentModel.TypeConverter : and then encoded with base64 encoding. --> - + @@ -120,16 +120,16 @@ Про застосунок - + Мова - + Запуск під час запуску системи - + Активація - + Автоматичне відновлення @@ -147,10 +147,10 @@ Масштабування вікна переднього плану після закінчення таймера - + Відкладене масштабування - + Затримка в секундах @@ -195,10 +195,10 @@ Конфігурація масштабування - + Розширені - + Загальні @@ -216,30 +216,24 @@ Ввімкнено - + Гарячі клавіші - + Внутрішньоігрове накладання Внутрішньоігрова клавіша накладання - + Масштаб Клавіша масштабування - + Запуск - - Загальні - - - Ефект зменшення масштабу за замовчуванням - Експорт @@ -249,13 +243,13 @@ Додаткові параметри - + Режими масштабування Щоб скористатися цим параметром, вам потрібно запустити від імені адміністратора - + Завжди запускати від імені адміністратора @@ -270,15 +264,12 @@ {} доступний - + Інші посилання Рекомендації щодо внеску - - Немає - Параметри @@ -372,19 +363,19 @@ Підігнати - + Загальні - + Режим 3D гри - + Автоматичне масштабування на передньому плані - + Метод захоплення - + Бажані монітори @@ -429,31 +420,22 @@ Немає - + Продуктивність - + Відображення лічильника FPS - - Дозволити додаткову затримку для підвищення продуктивності - - - Вертикальна синхронізація - - + Зворотній зв'язок - + Запросити функцію - - Версія - - + Перевірити наявність оновлень попереднього перегляду - + Автоматична перевірка оновлень @@ -483,19 +465,19 @@ Встановлення - + Розширені - + Вимкнути DirectFlip - + Курсор - + Намалювати курсор - + Алгоритм інтерполяції @@ -504,7 +486,7 @@ Найближчий сусід - + Коефіцієнт масштабування @@ -516,22 +498,22 @@ За замовчуванням - + Захопити рядок заголовка - + Обрізати вручну - + Низ - + Ліворуч пікс - + Праворуч @@ -543,7 +525,7 @@ Для використання цього параметра потрібно ввімкнути "Відображати застосунок в системному треї" - + Згортання в системний трей під час запуску @@ -567,7 +549,7 @@ Ці налаштування призначені лише для використання під час розробки - + Параметри розробника @@ -586,7 +568,7 @@ Файл конфігурації не є дійсним JSON - + Вихід @@ -628,27 +610,24 @@ Перемістити вгору - + Портативний режим - + Magpie продовжить працювати у фоновому режимі після закриття головного вікна - + Відображення застосунку в системному треї - + Тема - + Обговорення - + Повідомити про помилку - - Автоматично застосовується, коли вихідне зображення занадто велике, щоб поміститися на екрані - Імпорт @@ -688,10 +667,10 @@ Встановити коефіцієнт масштабування після заповнення екрана за допомогою пропорційного масштабування - + Відеокарта - + Режим масштабування @@ -706,25 +685,25 @@ Без масштабування - + Регулювання швидкості курсору під час масштабування - + Вихідне вікно Обмежено захопленням графіки та дублюванням робочого столу - + Зробити параметри ефекту вбудованими - + Верх - + Вимкнути зміну розміру вікна під час масштабування - + Імітація ексклюзивного повноекранного режиму при масштабуванні @@ -748,7 +727,7 @@ Режим налагодження - + Головне вікно @@ -769,30 +748,21 @@ Імпорт ScaleModels.json - + Оновлення налаштувань Версія - - Редагувати - - + Параметри запуску - - Вертикальна синхронізація - Прозорість Розблокувати - - Перемикання на частоту кадрів - Таймінги @@ -811,19 +781,13 @@ Заблокувати - - Статистика кадрів - - - Перемикання на таймінги кадрів - Переключитися на ефекти Вимкнути кеш шрифтів - + Дозволити масштабування розгорнутих або повноекранних вікон diff --git a/src/Magpie.App/Resources.language-vi.resw b/src/Magpie.App/Resources.language-vi.resw index 79e6868dd..ad90d9e44 100644 --- a/src/Magpie.App/Resources.language-vi.resw +++ b/src/Magpie.App/Resources.language-vi.resw @@ -117,19 +117,19 @@ System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 - + Phản hồi - + Diễn đàn - + Báo lỗi - + Yêu cầu tính năng - + Các liên kết khác @@ -147,9 +147,6 @@ Giới thiệu - - Phiên bản - Kiểm tra cập nhật @@ -174,13 +171,13 @@ Đã có bản cập nhật {} - + Cài đặt cập nhật - + Kiểm tra cập nhật tự động - + Kiểm tra các cập nhật thử nghiệm @@ -247,13 +244,13 @@ File JSON - + Kích hoạt Tự động scale cửa sổ trên cùng - + Tự động scale @@ -271,16 +268,16 @@ Chính - + Phím tắt - + Overlay trong ứng dụng Phím tắt overlay - + Scale @@ -289,7 +286,7 @@ Scale cửa sổ trên cùng khi hết thời gian - + Thời gian chờ Scale @@ -298,7 +295,7 @@ Hủy - + Thời gian bằng giây @@ -373,15 +370,6 @@ Phương pháp lấy cửa sổ - - Thông tin khung hình - - - Chuyển sang tốc độ khung hình - - - Chuyển sang thời gian khung hình - Timing @@ -394,31 +382,25 @@ Tổng cộng - - VSync - - + Nâng cao - + Tắt DirectFlip - + Thông số khởi chạy - - Sửa - - + Con trỏ chuột - + Vẽ con trỏ chuột - + Thay đổi tốc độ con trỏ khi scale - + Thuật toán @@ -427,7 +409,7 @@ Nearest-neighbor - + Mức độ Scale @@ -439,22 +421,22 @@ Như cửa sổ gốc - + Chung - + Chế độ game 3D - + Tự động scale cửa sổ trên cùng - + Phương pháp lấy cửa sổ Mặc định - + Màn hình ưu tiên @@ -466,7 +448,7 @@ Màn hình hiển thị cửa sổ gốc - + Chế độ Scale @@ -514,63 +496,45 @@ Đổi thứ tự - + Hiệu năng - + Card đồ họa - + Hiển thị bộ đếm FPS - - VSync - - - Thêm độ trễ để cải thiện hiệu năng - - + Cửa sổ nguồn Chỉ sử dụng được với Graphics Capture và Desktop Duplication - + Lấy cả thanh tiêu đề - + Lề cửa sổ tùy chỉnh - + Dưới - + Trái px - + Phải - + Trên - + Vô hiệu hóa thay đổi cỡ cửa sổ - - Chung - - - Được sử dụng khi kích thước hình ảnh lớn hơn màn hình - - - Chế độ downscale mặc định - - - Không - Xuất @@ -592,7 +556,7 @@ Phân giải thất bại - + Các chế độ scale @@ -712,28 +676,28 @@ Chọn file chạy của chương trình - + Nâng cao - + Cho phép scale cửa sổ toàn màn hình Cải thiện hiệu năng. Tuy nhiên các hiệu ứng phải được biên dịch lại khi các thông số của chúng đươc chỉnh - + Đặt thông số hiệu ứng trực tiếp Các thông báo và pop-up từ một số ứng dụng sẽ bị chặn - + Giả lập chế độ toàn màn hình độc lập Những cài đặt này chỉ dành cho mục đích phát triển - + Cài đặt nhà phát triển @@ -751,16 +715,16 @@ Đặt cảnh bảo như lỗi khi đọc hiệu ứng - + Chung - + Ngôn ngữ Mặc định Windows - + Chế độ Portable @@ -772,13 +736,13 @@ Khởi động lại Magpie - + Magpie sẽ tiếp tục chạy kể cả khi cửa sổ chính bị đóng - + Hiển thị trên thanh tác vụ - + Màu @@ -790,22 +754,22 @@ Mặc định Windows - + Chạy Bạn cần mở với quyền quản trị viên để thay đổi cài đặt này - + Luôn chạy với quyền quản trị viên - + Chạy khi khởi động Bạn cần bật "Hiển thị trên thanh tác vụ" để sử dụng cài đặt này - + Thu nhỏ về thanh tác vụ khi khởi động @@ -832,10 +796,10 @@ Bật - + Thoát - + Cửa sổ chính diff --git a/src/Magpie.App/Resources.language-zh-Hans.resw b/src/Magpie.App/Resources.language-zh-Hans.resw index baf034e12..1b6186bb6 100644 --- a/src/Magpie.App/Resources.language-zh-Hans.resw +++ b/src/Magpie.App/Resources.language-zh-Hans.resw @@ -120,13 +120,13 @@ 关于 - + 激活 当该窗口回到前台时将自动缩放 - + 记忆缩放窗口 @@ -147,7 +147,7 @@ 计时结束后缩放前台窗口 - + 定时器 @@ -156,7 +156,7 @@ 取消 - + 倒计时时长 @@ -216,34 +216,34 @@ 缩放配置 - + 高级 - + 常规 - + 语言 Windows 默认 - + 便携模式 打开配置文件位置 - + 开机启动 - + 主窗口被关闭后 Magpie 将在后台继续运行 - + 在系统托盘上显示应用程序 - + 主题 @@ -261,33 +261,24 @@ - + 快捷键 - + 游戏内叠加层 游戏内叠加层快捷键 - + 缩放 缩放快捷键 - + 启动 - - 通用 - - - 输出图像太大无法被屏幕容纳时将自动应用此效果 - - - 默认降采样效果 - 导出 @@ -300,13 +291,13 @@ 更多选项 - + 缩放模式 以管理员身份运行时才能使用此选项 - + 始终以管理员身份运行 @@ -324,7 +315,7 @@ {} 可用 - + 其他链接 @@ -339,9 +330,6 @@ Github 仓库 - - - 参数 @@ -453,19 +441,19 @@ 指定等比缩放到充满屏幕后的缩放倍数 - + 常规 - + 3D 游戏模式 - + 位于前台时自动缩放 - + 捕获方式 - + 首选的显示器 @@ -477,7 +465,7 @@ 源窗口跨越的所有显示器 - + 缩放模式 @@ -525,40 +513,31 @@ - + 性能 - + 显示卡 - + 显示帧率 - - 允许额外的延迟以提高性能 - - - 垂直同步 - - + 反馈 - + 讨论区 - + 报告错误 - + 建议功能 - - 程序版本 - - + 检查预览版更新 - + 自动检查更新 @@ -597,22 +576,22 @@ 安装中 - + 高级 - + 禁用 DirectFlip - + 光标 - + 绘制光标 - + 缩放时调整光标速度 - + 插值算法 @@ -621,7 +600,7 @@ 最近邻 - + 缩放系数 @@ -636,52 +615,52 @@ 默认 - + 源窗口 仅在 Graphics Capture 和 Desktop Duplication 捕获方式下可用 - + 捕获标题栏 - + 自定义裁剪 - + - + 像素 - + - + - + 缩放时禁用窗口大小调整 稍微提高性能,但每次修改效果的参数都需重新编译该效果 - + 内联效果参数 可以阻止某些应用的通知和弹窗 - + 缩放时模拟独占全屏 启用了“在系统托盘上显示应用程序”时才能使用此选项 - + 启动时最小化到系统托盘 @@ -711,7 +690,7 @@ 这些设置仅供开发使用 - + 开发者选项 @@ -742,10 +721,10 @@ 读取配置文件失败 - + 退出 - + 主窗口 @@ -769,39 +748,24 @@ 导入旧版程序的 ScaleModels.json - + 更新设置 版本 - + 启动参数 - - 编辑 - 性能分析器 捕获方式 - - 垂直同步 - 不透明度 - - 帧统计 - - - 切换到帧率 - - - 切换到帧时间 - 渲染用时 @@ -823,7 +787,7 @@ 禁用字体缓存 - + 允许缩放最大化或全屏的窗口 @@ -844,10 +808,37 @@ 缩放前台窗口或停止缩放 + + 帧率限制 + + + 最大帧率 + 提交 开发者模式已启用。 + + 检测重复帧 + + + 总是检测 + + + 动态检测 + + + 从不检测 + + + 启用动态检测统计 + + + 动态检测 + + + 帧率 + \ No newline at end of file diff --git a/src/Magpie.App/Resources.language-zh-Hant.resw b/src/Magpie.App/Resources.language-zh-Hant.resw index 117902c84..5d807096e 100644 --- a/src/Magpie.App/Resources.language-zh-Hant.resw +++ b/src/Magpie.App/Resources.language-zh-Hant.resw @@ -59,7 +59,7 @@ : using a System.ComponentModel.TypeConverter : and then encoded with base64 encoding. --> - + @@ -120,13 +120,13 @@ 關於 - + 啟用 當該視窗回到前景時將自動縮放 - + 記住縮放視窗 @@ -144,7 +144,7 @@ 首頁 - + 計時器 @@ -156,7 +156,7 @@ 取消 - + 倒數計時延遲 @@ -207,31 +207,31 @@ 縮放設定 - + 進階設定 - + 語言 Windows 預設 - + 可攜模式 打開組態檔案位置 - + 開機啟動 - + 主視窗被關閉後 Magpie 將在背景繼續執行 - + 一般資訊 - + 主題 @@ -246,33 +246,24 @@ 已停用 - + 鍵盤快速鍵 - + 遊戲內遮罩 遊戲內遮罩的鍵盤快速鍵 - + 縮放 縮放的鍵盤快速鍵 - + 啟動 - - 一般資訊 - - - 輸出影像大於螢幕本身解析度時將自動套用此效果 - - - 預設縮小效果 - 匯出 @@ -282,7 +273,7 @@ 匯入舊版程式的 ScaleModels.json - + 退出 @@ -294,7 +285,7 @@ 開啟新視窗以新增設定檔 - + 工具列圖示 @@ -303,13 +294,13 @@ 更多設定 - + 縮放模式 以管理員身份執行時才能使用此選項 - + 始終以管理員身份執行 @@ -324,7 +315,7 @@ 以後再提醒我 - + 其他連結 @@ -336,9 +327,6 @@ Github 倉庫 - - - 更多設定 @@ -435,16 +423,16 @@ 指定等比縮放到填滿螢幕後的縮放倍數 - + 一般資訊 - + 3D 遊戲模式 - + 前景時自動縮放 - + 偏好顯示器 @@ -471,31 +459,22 @@ 剖析失敗 - + 效能 - + 顯示卡 - + 顯示畫面影格速率 - - 允許額外的延遲以提高效能 - - - 垂直同步 - - + 回饋 - + 建議功能 - - 程式版本 - - + 檢查預覽版更新 @@ -534,28 +513,28 @@ 安裝中 - + 進階設定 - + 停用 DirectFlip - + 游標 - + 繪製游標 - + 縮放時調整游標速度 - + 插值算法 最近相鄰 - + 縮放係數 @@ -570,40 +549,40 @@ 僅在 Graphics Capture 與 Desktop Duplication 擷取方式下可用 - + 自訂裁切 - + - + 像素 - + - + - + 縮放時停用視窗大小調整 稍微提高效能,但每次修改效果的參數都需重新編譯該效果 - + 內聯效果參數 啟用了“工具列圖示”時才能使用此選項 - + 啟動時最小化到工具列圖示 - + 縮放時模擬獨占全螢幕 @@ -630,7 +609,7 @@ 這些設置僅供開發使用 - + 開發者選項 @@ -661,7 +640,7 @@ 讀取組態文件失敗 - + 主視窗 @@ -712,10 +691,10 @@ 重新排列 - + 縮放模式 - + 截取方式 @@ -742,16 +721,16 @@ - + 討論版 - + 報告錯誤 - + 自動檢查更新 - + 擷取標題欄 @@ -763,7 +742,7 @@ 自訂 - + 來源視窗 @@ -772,21 +751,15 @@ 程式版本 - + 更新設定 - + 啟動參數 - - 編輯 - 截取方式 - - 垂直同步 - 透明度 @@ -814,9 +787,6 @@ 處理時間 - - 影格統計資料 - 提交 @@ -826,9 +796,6 @@ 更改啟動時的可執行檔案 - - 切換為影格速率 - 總共 @@ -838,10 +805,7 @@ 縮放前景視窗或停止縮放 - + 允許縮放已最大化或全螢幕的視窗 - - 切換為影格生成時間 - \ No newline at end of file diff --git a/src/Magpie.App/RootPage.xaml b/src/Magpie.App/RootPage.xaml index 0103d806d..16a8e85f2 100644 --- a/src/Magpie.App/RootPage.xaml +++ b/src/Magpie.App/RootPage.xaml @@ -21,7 +21,7 @@ Canvas.ZIndex="0" CompactModeThresholdWidth="0" DisplayModeChanged="NavigationView_DisplayModeChanged" - ExpandedModeThresholdWidth="950" + ExpandedModeThresholdWidth="920" IsBackButtonVisible="Collapsed" ItemInvoked="NavigationView_ItemInvoked" PaneClosing="NavigationView_PaneClosing" @@ -67,27 +67,24 @@ HorizontalContentAlignment="Center" VerticalContentAlignment="Center" Visibility="{x:Bind NewProfileViewModel.IsNoCandidateWindow, Mode=OneWay}"> - + - + - + - + - + - - + + - + - + - - - - - - - - - - - - - - - - + + + + + SelectionMode="None" + TabNavigation="Local"> + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - + + + + + + + + + + + + + + - - - - + - + \ No newline at end of file diff --git a/src/Magpie.App/ScalingConfigurationViewModel.cpp b/src/Magpie.App/ScalingConfigurationViewModel.cpp index fcb4e962b..eeb89cd66 100644 --- a/src/Magpie.App/ScalingConfigurationViewModel.cpp +++ b/src/Magpie.App/ScalingConfigurationViewModel.cpp @@ -11,6 +11,7 @@ #include "Win32Utils.h" #include "ScalingMode.h" #include "FileDialogHelper.h" +#include "CommonSharedConstants.h" using namespace ::Magpie::Core; @@ -21,48 +22,6 @@ ScalingConfigurationViewModel::ScalingConfigurationViewModel() { Animation::RepositionThemeTransition respositionAnime; respositionAnime.IsStaggeringEnabled(false); _scalingModesListTransitions.Append(std::move(respositionAnime)); - - std::vector downscalingEffects; - downscalingEffects.reserve(7); - - ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView(); - downscalingEffects.push_back(box_value(resourceLoader.GetString( - L"ScalingConfiguration_General_DefaultDownscalingEffect_None"))); - - _downscalingEffectNames.reserve(6); - for (const EffectInfo& effectInfo : EffectsService::Get().Effects()) { - if (effectInfo.IsGenericDownscaler()) { - _downscalingEffectNames.emplace_back(effectInfo.name, - StrUtils::ToLowerCase(EffectHelper::GetDisplayName(effectInfo.name))); - } - } - - // 根据显示名排序,不区分大小写 - std::sort(_downscalingEffectNames.begin(), _downscalingEffectNames.end(), - [](const auto& l, const auto& r) { return l.second < r.second; }); - for (const auto& pair : _downscalingEffectNames) { - downscalingEffects.push_back(box_value(EffectHelper::GetDisplayName(pair.first))); - } - _downscalingEffects = single_threaded_vector(std::move(downscalingEffects)); - - DownscalingEffect& downscalingEffect = AppSettings::Get().DownscalingEffect(); - if (!downscalingEffect.name.empty()) { - auto it = std::lower_bound( - _downscalingEffectNames.begin(), - _downscalingEffectNames.end(), - downscalingEffect.name, - [](const auto& l, const std::wstring& r) { return l.first < r; } - ); - - if (it == _downscalingEffectNames.end() || it->first != downscalingEffect.name) { - Logger::Get().Warn(fmt::format("降采样效果 {} 不存在", - StrUtils::UTF16ToUTF8(downscalingEffect.name))); - downscalingEffect.name.clear(); - downscalingEffect.parameters.clear(); - } else { - _downscalingEffectIndex = int(it - _downscalingEffectNames.begin() + 1); - } - } _AddScalingModes(); @@ -75,7 +34,9 @@ ScalingConfigurationViewModel::ScalingConfigurationViewModel() { } static std::optional OpenFileDialogForJson(IFileDialog* fileDialog) noexcept { - static std::wstring jsonFileStr(ResourceLoader::GetForCurrentView().GetString(L"FileDialog_JsonFile")); + static std::wstring jsonFileStr( + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID) + .GetString(L"FileDialog_JsonFile")); const COMDLG_FILTERSPEC fileType{ jsonFileStr.c_str(), L"*.json"}; fileDialog->SetFileTypes(1, &fileType); @@ -92,7 +53,9 @@ void ScalingConfigurationViewModel::Export() const noexcept { } fileDialog->SetFileName(L"ScalingModes"); - static std::wstring title(ResourceLoader::GetForCurrentView().GetString(L"ExportDialog_Title")); + static std::wstring title( + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID) + .GetString(L"ExportDialog_Title")); fileDialog->SetTitle(title.c_str()); std::optional fileName = OpenFileDialogForJson(fileDialog.get()); @@ -116,7 +79,8 @@ static bool ImportImpl(bool legacy) noexcept { return false; } - ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView(); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); hstring title = resourceLoader.GetString(legacy ? L"ImportLegacyDialog_Title" : L"ImportDialog_Title"); fileDialog->SetTitle(title.c_str()); @@ -159,40 +123,11 @@ void ScalingConfigurationViewModel::_Import(bool legacy) { } } -void ScalingConfigurationViewModel::DownscalingEffectIndex(int value) { - if (_downscalingEffectIndex == value) { - return; - } - _downscalingEffectIndex = value; - - DownscalingEffect& downscalingEffect = AppSettings::Get().DownscalingEffect(); - downscalingEffect.parameters.clear(); - if (value <= 0) { - downscalingEffect.name.clear(); - } else { - downscalingEffect.name = _downscalingEffectNames[(size_t)value - 1].first; - } - - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"DownscalingEffectIndex")); - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"DownscalingEffectHasParameters")); - _propertyChangedEvent(*this, PropertyChangedEventArgs(L"DownscalingEffectParameters")); - - AppSettings::Get().SaveAsync(); -} - -bool ScalingConfigurationViewModel::DownscalingEffectHasParameters() noexcept { - if (_downscalingEffectIndex == 0) { - return false; - } - - const std::wstring& effectName = _downscalingEffectNames[(size_t)_downscalingEffectIndex - 1].first; - return !EffectsService::Get().GetEffect(effectName)->params.empty(); -} - void ScalingConfigurationViewModel::PrepareForAdd() { std::vector copyFromList; - ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView(); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); copyFromList.push_back(box_value(resourceLoader.GetString( L"ScalingConfiguration_ScalingModes_NewScalingModeFlyout_CopyFrom_None"))); diff --git a/src/Magpie.App/ScalingConfigurationViewModel.h b/src/Magpie.App/ScalingConfigurationViewModel.h index 4216a9acb..e5535049c 100644 --- a/src/Magpie.App/ScalingConfigurationViewModel.h +++ b/src/Magpie.App/ScalingConfigurationViewModel.h @@ -35,24 +35,6 @@ struct ScalingConfigurationViewModel : ScalingConfigurationViewModelT DownscalingEffects() const noexcept { - return _downscalingEffects; - } - - int DownscalingEffectIndex() const noexcept { - return _downscalingEffectIndex; - } - - void DownscalingEffectIndex(int value); - - bool DownscalingEffectHasParameters() noexcept; - - Magpie::App::EffectParametersViewModel DownscalingEffectParameters() const noexcept { - // 默认构造表示降采样效果参数 - // 每次调用都返回一个新的实例,因为此时降采样效果已更改 - return {}; - } - Animation::TransitionCollection ScalingModesListTransitions() const noexcept { return _scalingModesListTransitions; } @@ -103,9 +85,6 @@ struct ScalingConfigurationViewModel : ScalingConfigurationViewModelT _downscalingEffects{ nullptr }; - // (FullName, 小写 DisplayName) - std::vector> _downscalingEffectNames; IObservableVector _scalingModes = single_threaded_observable_vector(); WinRTUtils::EventRevoker _scalingModeAddedRevoker; @@ -116,7 +95,6 @@ struct ScalingConfigurationViewModel : ScalingConfigurationViewModelT _newScalingModeCopyFromList{ nullptr }; int _newScalingModeCopyFrom = 0; - int _downscalingEffectIndex = 0; bool _showErrorMessage = false; bool _addingScalingModes = false; diff --git a/src/Magpie.App/ScalingConfigurationViewModel.idl b/src/Magpie.App/ScalingConfigurationViewModel.idl index 1964bb9bd..aab597138 100644 --- a/src/Magpie.App/ScalingConfigurationViewModel.idl +++ b/src/Magpie.App/ScalingConfigurationViewModel.idl @@ -7,11 +7,6 @@ namespace Magpie.App { void ImportLegacy(); Boolean ShowErrorMessage; - - IVector DownscalingEffects { get; }; - Int32 DownscalingEffectIndex; - Boolean DownscalingEffectHasParameters { get; }; - EffectParametersViewModel DownscalingEffectParameters { get; }; Windows.UI.Xaml.Media.Animation.TransitionCollection ScalingModesListTransitions { get; }; IObservableVector ScalingModes { get; }; diff --git a/src/Magpie.App/ScalingModeEffectItem.cpp b/src/Magpie.App/ScalingModeEffectItem.cpp index 49b275fa9..d5b3ec13e 100644 --- a/src/Magpie.App/ScalingModeEffectItem.cpp +++ b/src/Magpie.App/ScalingModeEffectItem.cpp @@ -11,6 +11,7 @@ #include "Logger.h" #include "ScalingMode.h" #include "StrUtils.h" +#include "CommonSharedConstants.h" using namespace ::Magpie::Core; namespace MagpieCore = ::Magpie::Core; @@ -28,7 +29,8 @@ ScalingModeEffectItem::ScalingModeEffectItem(uint32_t scalingModeIdx, uint32_t e _name = EffectHelper::GetDisplayName(data.name); _parametersViewModel = EffectParametersViewModel(scalingModeIdx, effectIdx); } else { - ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView(); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); _name = StrUtils::Concat( resourceLoader.GetString(L"ScalingConfiguration_ScalingModes_Description_UnknownEffect"), L" (", @@ -64,7 +66,8 @@ bool ScalingModeEffectItem::HasParameters() const noexcept { IVector ScalingModeEffectItem::ScalingTypes() noexcept { using Windows::ApplicationModel::Resources::ResourceLoader; - ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView(); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); return single_threaded_vector(std::vector{ Magpie::App::ScalingType( diff --git a/src/Magpie.App/ScalingModeEffectItem.h b/src/Magpie.App/ScalingModeEffectItem.h index da1009869..aa7b8ef8c 100644 --- a/src/Magpie.App/ScalingModeEffectItem.h +++ b/src/Magpie.App/ScalingModeEffectItem.h @@ -106,7 +106,7 @@ struct ScalingModeEffectItem : ScalingModeEffectItemT { event> _removedEvent; event> _movedEvent; - Magpie::App::EffectParametersViewModel _parametersViewModel; + Magpie::App::EffectParametersViewModel _parametersViewModel{ nullptr }; }; } diff --git a/src/Magpie.App/ScalingModeItem.cpp b/src/Magpie.App/ScalingModeItem.cpp index a56a975e8..3b8c68cd6 100644 --- a/src/Magpie.App/ScalingModeItem.cpp +++ b/src/Magpie.App/ScalingModeItem.cpp @@ -9,6 +9,7 @@ #include "AppSettings.h" #include "EffectsService.h" #include "EffectHelper.h" +#include "CommonSharedConstants.h" using namespace Magpie::Core; @@ -21,7 +22,8 @@ ScalingModeItem::ScalingModeItem(uint32_t index, bool isInitialExpanded) std::vector linkedProfiles; const Profile& defaultProfile = AppSettings::Get().DefaultProfile(); if (defaultProfile.scalingMode == (int)index) { - hstring defaults = ResourceLoader::GetForCurrentView().GetString(L"Root_Defaults/Content"); + hstring defaults = ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID) + .GetString(L"Root_Defaults/Content"); linkedProfiles.push_back(box_value(defaults)); } for (const Profile& profile : AppSettings::Get().Profiles()) { @@ -229,7 +231,8 @@ hstring ScalingModeItem::Description() const noexcept { if (const EffectInfo* effectInfo = EffectsService::Get().GetEffect(effect.name)) { result += EffectHelper::GetDisplayName(effect.name); } else { - ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView(); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); result += L'('; result += resourceLoader.GetString(L"ScalingConfiguration_ScalingModes_Description_UnknownEffect"); result += L')'; diff --git a/src/Magpie.App/MagService.cpp b/src/Magpie.App/ScalingService.cpp similarity index 51% rename from src/Magpie.App/MagService.cpp rename to src/Magpie.App/ScalingService.cpp index 51d8caa8f..9be94f3b7 100644 --- a/src/Magpie.App/MagService.cpp +++ b/src/Magpie.App/ScalingService.cpp @@ -1,5 +1,5 @@ #include "pch.h" -#include "MagService.h" +#include "ScalingService.h" #include "ShortcutService.h" #include "Win32Utils.h" #include "AppSettings.h" @@ -8,6 +8,7 @@ #include "ScalingMode.h" #include "Logger.h" #include "EffectsService.h" +#include using namespace ::Magpie::Core; using namespace winrt; @@ -15,36 +16,36 @@ using namespace Windows::System::Threading; namespace winrt::Magpie::App { -void MagService::Initialize() { +void ScalingService::Initialize() { _dispatcher = CoreWindow::GetForCurrentThread().Dispatcher(); _countDownTimer.Interval(25ms); - _countDownTimer.Tick({ this, &MagService::_CountDownTimer_Tick }); - - AppSettings::Get().IsAutoRestoreChanged({ this, &MagService::_Settings_IsAutoRestoreChanged }); - _magRuntime.emplace(); - _magRuntime->IsRunningChanged({ this, &MagService::_MagRuntime_IsRunningChanged }); - - ShortcutService::Get().ShortcutActivated( - { this, &MagService::_ShortcutService_ShortcutPressed } - ); + _countDownTimer.Tick({ this, &ScalingService::_CountDownTimer_Tick }); _checkForegroundTimer = ThreadPoolTimer::CreatePeriodicTimer( - { this, &MagService::_CheckForegroundTimer_Tick }, + { this, &ScalingService::_CheckForegroundTimer_Tick }, 50ms ); + + AppSettings::Get().IsAutoRestoreChanged({ this, &ScalingService::_Settings_IsAutoRestoreChanged }); + _scalingRuntime = std::make_unique(); + _scalingRuntime->IsRunningChanged({ this, &ScalingService::_ScalingRuntime_IsRunningChanged }); + + ShortcutService::Get().ShortcutActivated( + { this, &ScalingService::_ShortcutService_ShortcutPressed } + ); // 立即检查前台窗口 _CheckForegroundTimer_Tick(nullptr); } -void MagService::Uninitialize() { +void ScalingService::Uninitialize() { _checkForegroundTimer.Cancel(); _countDownTimer.Stop(); - _magRuntime.reset(); + _scalingRuntime.reset(); } -void MagService::StartTimer() { +void ScalingService::StartTimer() { if (_curCountdownSeconds != 0) { return; } @@ -55,7 +56,7 @@ void MagService::StartTimer() { _isTimerOnChangedEvent(true); } -void MagService::StopTimer() { +void ScalingService::StopTimer() { if (_curCountdownSeconds == 0) { return; } @@ -65,7 +66,7 @@ void MagService::StopTimer() { _isTimerOnChangedEvent(false); } -double MagService::SecondsLeft() const noexcept { +double ScalingService::SecondsLeft() const noexcept { using namespace std::chrono; if (!IsTimerOn()) { @@ -78,16 +79,20 @@ double MagService::SecondsLeft() const noexcept { return msLeft / 1000.0; } -void MagService::ClearWndToRestore() { +void ScalingService::ClearWndToRestore() { _WndToRestore(NULL); } -void MagService::CheckForeground() { - _hwndChecked.store(NULL, std::memory_order_relaxed); +bool ScalingService::IsRunning() const noexcept { + return _scalingRuntime && _scalingRuntime->IsRunning(); +} + +void ScalingService::CheckForeground() { + _hwndChecked = NULL; _CheckForegroundTimer_Tick(nullptr); } -void MagService::_WndToRestore(HWND value) { +void ScalingService::_WndToRestore(HWND value) { if (_hwndToRestore == value) { return; } @@ -96,16 +101,16 @@ void MagService::_WndToRestore(HWND value) { _wndToRestoreChangedEvent(_hwndToRestore); } -void MagService::_ShortcutService_ShortcutPressed(ShortcutAction action) { - if (!_magRuntime) { +void ScalingService::_ShortcutService_ShortcutPressed(ShortcutAction action) { + if (!_scalingRuntime) { return; } switch (action) { case ShortcutAction::Scale: { - if (_magRuntime->IsRunning()) { - _magRuntime->Stop(); + if (_scalingRuntime->IsRunning()) { + _scalingRuntime->Stop(); return; } @@ -114,8 +119,8 @@ void MagService::_ShortcutService_ShortcutPressed(ShortcutAction action) { } case ShortcutAction::Overlay: { - if (_magRuntime->IsRunning()) { - _magRuntime->ToggleOverlay(); + if (_scalingRuntime->IsRunning()) { + _scalingRuntime->ToggleOverlay(); return; } break; @@ -125,7 +130,7 @@ void MagService::_ShortcutService_ShortcutPressed(ShortcutAction action) { } } -void MagService::_CountDownTimer_Tick(IInspectable const&, IInspectable const&) { +void ScalingService::_CountDownTimer_Tick(IInspectable const&, IInspectable const&) { double timeLeft = SecondsLeft(); // 剩余时间在 10 ms 以内计时结束 @@ -138,18 +143,16 @@ void MagService::_CountDownTimer_Tick(IInspectable const&, IInspectable const&) _timerTickEvent(timeLeft); } -fire_and_forget MagService::_CheckForegroundTimer_Tick(ThreadPoolTimer const& timer) { - // _magRuntime 不会改变,无需同步措施 - if (!_magRuntime || _magRuntime->IsRunning()) { +fire_and_forget ScalingService::_CheckForegroundTimer_Tick(ThreadPoolTimer const& timer) { + if (!_scalingRuntime || _scalingRuntime->IsRunning()) { co_return; } - const HWND hwndFore = GetForegroundWindow(); - // 没有对其他变量的访问,因此不需要限制内存定序 - if (hwndFore == _hwndChecked.load(std::memory_order_relaxed)) { + HWND hwndFore = GetForegroundWindow(); + if (!hwndFore || hwndFore == _hwndChecked) { co_return; } - _hwndChecked.store(NULL, std::memory_order_relaxed); + _hwndChecked = NULL; if (timer) { // ThreadPoolTimer 在后台线程触发 @@ -158,46 +161,30 @@ fire_and_forget MagService::_CheckForegroundTimer_Tick(ThreadPoolTimer const& ti const bool isAutoRestore = AppSettings::Get().IsAutoRestore(); - if (_CheckSrcWnd(hwndFore)) { - const Profile& profile = ProfileService::Get().GetProfileForWindow(hwndFore); - // 先检查自动恢复全屏 - if (profile.isAutoScale) { - if (_StartScale(hwndFore, profile)) { - // 触发自动缩放时清空记忆的窗口 - if (AppSettings::Get().IsAutoRestore()) { - _WndToRestore(NULL); - } - } - - co_return; - } - - // 恢复记忆的窗口 - if (isAutoRestore && _hwndToRestore == hwndFore) { - _StartScale(hwndFore, profile); - co_return; - } + const Profile& profile = ProfileService::Get().GetProfileForWindow(hwndFore); + // 自动恢复全屏或恢复记忆的窗口 + if ((profile.isAutoScale || (isAutoRestore && _hwndToRestore == hwndFore)) && + _CheckSrcWnd(hwndFore, _hwndToRestore != hwndFore) + ) { + _StartScale(hwndFore, profile); + co_return; } - if (isAutoRestore && !_CheckSrcWnd(_hwndToRestore)) { + if (isAutoRestore && !_CheckSrcWnd(_hwndToRestore, false)) { _WndToRestore(NULL); } // 避免重复检查 - _hwndChecked.store(hwndFore, std::memory_order_relaxed); + _hwndChecked = hwndFore; } -void MagService::_Settings_IsAutoRestoreChanged(bool) { - if (AppSettings::Get().IsAutoRestore()) { - // 立即生效,即使正处于缩放状态 - _hwndCurSrc = _magRuntime->HwndSrc(); - } else { - _hwndCurSrc = NULL; +void ScalingService::_Settings_IsAutoRestoreChanged(bool value) { + if (!value) { _WndToRestore(NULL); } } -fire_and_forget MagService::_MagRuntime_IsRunningChanged(bool isRunning) { +fire_and_forget ScalingService::_ScalingRuntime_IsRunningChanged(bool isRunning) { co_await _dispatcher; if (isRunning) { @@ -206,34 +193,19 @@ fire_and_forget MagService::_MagRuntime_IsRunningChanged(bool isRunning) { if (AppSettings::Get().IsAutoRestore()) { _WndToRestore(NULL); } - - _hwndCurSrc = _magRuntime->HwndSrc(); } else { - HWND curSrcWnd = _hwndCurSrc; - _hwndCurSrc = NULL; - - HWND hwndMain = (HWND)Application::Current().as().HwndMain(); - if (hwndMain == curSrcWnd) { - // 必须在主线程还原主窗口样式 - // 见 FrameSourceBase::~FrameSourceBase - LONG_PTR style = GetWindowLongPtr(hwndMain, GWL_STYLE); - if (!(style & WS_THICKFRAME)) { - SetWindowLongPtr(hwndMain, GWL_STYLE, style | WS_THICKFRAME); - SetWindowPos(hwndMain, 0, 0, 0, 0, 0, - SWP_NOMOVE | SWP_NOSIZE | SWP_NOZORDER | SWP_FRAMECHANGED); - } - } - - if (GetForegroundWindow() == curSrcWnd) { + if (GetForegroundWindow() == _hwndCurSrc) { // 退出全屏后如果前台窗口不变视为通过热键退出 - _hwndChecked.store(curSrcWnd, std::memory_order_relaxed); + _hwndChecked = _hwndCurSrc; } else if (!_isAutoScaling && AppSettings::Get().IsAutoRestore()) { - // 自动缩放窗口时切换窗口无需记录到自动恢复 - if (_CheckSrcWnd(curSrcWnd)) { - _WndToRestore(curSrcWnd); + // 无需再次检查完整性级别 + if (_CheckSrcWnd(_hwndCurSrc, false)) { + _WndToRestore(_hwndCurSrc); } } + _hwndCurSrc = NULL; + // 立即检查前台窗口 _CheckForegroundTimer_Tick(nullptr); } @@ -241,12 +213,12 @@ fire_and_forget MagService::_MagRuntime_IsRunningChanged(bool isRunning) { _isRunningChangedEvent(isRunning); } -bool MagService::_StartScale(HWND hWnd, const Profile& profile) { +bool ScalingService::_StartScale(HWND hWnd, const Profile& profile) { if (profile.scalingMode < 0) { return false; } - MagOptions options; + ScalingOptions options; options.effects = ScalingModesService::Get().GetScalingMode(profile.scalingMode).effects; if (options.effects.empty()) { return false; @@ -261,6 +233,9 @@ bool MagService::_StartScale(HWND hWnd, const Profile& profile) { options.graphicsCard = profile.graphicsCard; options.captureMethod = profile.captureMethod; + if (profile.isFrameRateLimiterEnabled) { + options.maxFrameRate = profile.maxFrameRate; + } options.multiMonitorUsage = profile.multiMonitorUsage; options.cursorInterpolationMode = profile.cursorInterpolationMode; options.flags = profile.flags; @@ -309,23 +284,25 @@ bool MagService::_StartScale(HWND hWnd, const Profile& profile) { } } - options.downscalingEffect = settings.DownscalingEffect(); options.IsDebugMode(settings.IsDebugMode()); - options.IsDisableEffectCache(settings.IsDisableEffectCache()); - options.IsDisableFontCache(settings.IsDisableFontCache()); + options.IsEffectCacheDisabled(settings.IsEffectCacheDisabled()); + options.IsFontCacheDisabled(settings.IsFontCacheDisabled()); options.IsSaveEffectSources(settings.IsSaveEffectSources()); options.IsWarningsAreErrors(settings.IsWarningsAreErrors()); options.IsAllowScalingMaximized(settings.IsAllowScalingMaximized()); options.IsSimulateExclusiveFullscreen(settings.IsSimulateExclusiveFullscreen()); + options.duplicateFrameDetectionMode = settings.DuplicateFrameDetectionMode(); + options.IsStatisticsForDynamicDetectionEnabled(settings.IsStatisticsForDynamicDetectionEnabled()); _isAutoScaling = profile.isAutoScale; - _magRuntime->Run(hWnd, options); + _scalingRuntime->Start(hWnd, std::move(options)); + _hwndCurSrc = hWnd; return true; } -void MagService::_ScaleForegroundWindow() { +void ScalingService::_ScaleForegroundWindow() { HWND hWnd = GetForegroundWindow(); - if (!_CheckSrcWnd(hWnd)) { + if (!_CheckSrcWnd(hWnd, true)) { return; } @@ -333,17 +310,76 @@ void MagService::_ScaleForegroundWindow() { _StartScale(hWnd, profile); } -bool MagService::_CheckSrcWnd(HWND hWnd) noexcept { +static bool GetWindowIntegrityLevel(HWND hWnd, DWORD& integrityLevel) noexcept { + DWORD processId; + if (!GetWindowThreadProcessId(hWnd, &processId)) { + Logger::Get().Win32Error("GetWindowThreadProcessId 失败"); + return false; + } + + Win32Utils::ScopedHandle hProc(Win32Utils::SafeHandle( + OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, FALSE, processId))); + if (!hProc) { + Logger::Get().Win32Error("OpenProcess 失败"); + return false; + } + + Win32Utils::ScopedHandle hQueryToken; + { + HANDLE token; + if (!OpenProcessToken(hProc.get(), TOKEN_QUERY, &token)) { + Logger::Get().Win32Error("OpenProcessToken 失败"); + return false; + } + hQueryToken.reset(token); + } + + return Win32Utils::GetProcessIntegrityLevel(hQueryToken.get(), integrityLevel); +} + +bool ScalingService::_CheckSrcWnd(HWND hWnd, bool checkIL) noexcept { if (!hWnd || !IsWindow(hWnd)) { return false; } - UINT showCmd = Win32Utils::GetWindowShowCmd(hWnd); - if (showCmd == SW_NORMAL) { - return true; + if (!WindowHelper::IsValidSrcWindow(hWnd)) { + return false; } - return showCmd == SW_MAXIMIZE && AppSettings::Get().IsAllowScalingMaximized(); + // 不缩放最小化的窗口,是否缩放最大化的窗口由设置决定 + if (UINT showCmd = Win32Utils::GetWindowShowCmd(hWnd); showCmd != SW_NORMAL) { + if (showCmd != SW_MAXIMIZE || !AppSettings::Get().IsAllowScalingMaximized()) { + return false; + } + } + + // 不缩放过小的窗口 + { + RECT clientRect; + if (!GetClientRect(hWnd, &clientRect)) { + return false; + } + + const SIZE clientSize = Win32Utils::GetSizeOfRect(clientRect); + if (clientSize.cx < 32 && clientSize.cy < 32) { + return false; + } + } + + if (checkIL) { + // 禁止缩放完整性级别 (integrity level) 更高的窗口 + static DWORD thisIL = []() -> DWORD { + DWORD il; + return Win32Utils::GetProcessIntegrityLevel(NULL, il) ? il : 0; + }(); + + DWORD windowIL; + if (!GetWindowIntegrityLevel(hWnd, windowIL) || windowIL > thisIL) { + return false; + } + } + + return true; } } diff --git a/src/Magpie.App/MagService.h b/src/Magpie.App/ScalingService.h similarity index 81% rename from src/Magpie.App/MagService.h rename to src/Magpie.App/ScalingService.h index 3d520fba9..5b533c116 100644 --- a/src/Magpie.App/MagService.h +++ b/src/Magpie.App/ScalingService.h @@ -1,21 +1,24 @@ #pragma once #include -#include #include "WinRTUtils.h" +namespace Magpie::Core { +class ScalingRuntime; +} + namespace winrt::Magpie::App { struct Profile; -class MagService { +class ScalingService { public: - static MagService& Get() noexcept { - static MagService instance; + static ScalingService& Get() noexcept { + static ScalingService instance; return instance; } - MagService(const MagService&) = delete; - MagService(MagService&&) = delete; + ScalingService(const ScalingService&) = delete; + ScalingService(ScalingService&&) = delete; void Initialize(); @@ -40,7 +43,7 @@ class MagService { }); } - void IsTimerOnChanged(event_token const& token) noexcept { + void IsTimerOnChanged(event_token const& token) { _isTimerOnChangedEvent.remove(token); } @@ -61,7 +64,7 @@ class MagService { }); } - void TimerTick(event_token const& token) noexcept { + void TimerTick(event_token const& token) { _timerTickEvent.remove(token); } @@ -80,7 +83,7 @@ class MagService { }); } - void WndToRestoreChanged(event_token const& token) noexcept { + void WndToRestoreChanged(event_token const& token) { _wndToRestoreChangedEvent.remove(token); } @@ -97,19 +100,17 @@ class MagService { }); } - void IsRunningChanged(event_token const& token) noexcept { + void IsRunningChanged(event_token const& token) { _isRunningChangedEvent.remove(token); } - bool IsRunning() const noexcept { - return _magRuntime->IsRunning(); - } + bool IsRunning() const noexcept; // 强制重新检查前台窗口 void CheckForeground(); private: - MagService() = default; + ScalingService() = default; void _WndToRestore(HWND value); @@ -119,17 +120,17 @@ class MagService { fire_and_forget _CheckForegroundTimer_Tick(Threading::ThreadPoolTimer const& timer); - void _Settings_IsAutoRestoreChanged(bool); + void _Settings_IsAutoRestoreChanged(bool value); - fire_and_forget _MagRuntime_IsRunningChanged(bool isRunning); + fire_and_forget _ScalingRuntime_IsRunningChanged(bool isRunning); bool _StartScale(HWND hWnd, const Profile& profile); void _ScaleForegroundWindow(); - bool _CheckSrcWnd(HWND hWnd) noexcept; + bool _CheckSrcWnd(HWND hWnd, bool checkIL) noexcept; - std::optional<::Magpie::Core::MagRuntime> _magRuntime; + std::unique_ptr<::Magpie::Core::ScalingRuntime> _scalingRuntime; CoreDispatcher _dispatcher{ nullptr }; DispatcherTimer _countDownTimer; diff --git a/src/Magpie.App/SettingsCard.Resource.xaml b/src/Magpie.App/SettingsCard.Resource.xaml new file mode 100644 index 000000000..297b7cdde --- /dev/null +++ b/src/Magpie.App/SettingsCard.Resource.xaml @@ -0,0 +1,1047 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + 16,16,16,16 + 148 + 68 + 20 + 0 + 200 + 2,0,20,0 + 14,0,0,0 + 13 + 8 + 476 + 286 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Left + + + + + + + + + + + + + + + + + + + Vertical + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/Magpie.App/SettingsCard.cpp b/src/Magpie.App/SettingsCard.cpp index 45aca58ad..7982180de 100644 --- a/src/Magpie.App/SettingsCard.cpp +++ b/src/Magpie.App/SettingsCard.cpp @@ -1,98 +1,322 @@ +// 移植自 https://github.com/CommunityToolkit/Windows/tree/bef863ca70bb1edf8c940198dd5cc74afa5d2aab/components/SettingsControls/src/SettingsCard + #include "pch.h" #include "SettingsCard.h" #if __has_include("SettingsCard.g.cpp") #include "SettingsCard.g.cpp" #endif +#include using namespace winrt; +using namespace Windows::UI::Xaml; using namespace Windows::UI::Xaml::Controls; -using namespace Windows::UI::Xaml::Data; +using namespace Windows::UI::Xaml::Input; namespace winrt::Magpie::App::implementation { -DependencyProperty SettingsCard::RawTitleProperty = DependencyProperty::Register( - L"RawTitle", - xaml_typename(), - xaml_typename(), - PropertyMetadata(nullptr, _OnTitleChanged) -); +static constexpr const wchar_t* CommonStates = L"CommonStates"; +static constexpr const wchar_t* NormalState = L"Normal"; +static constexpr const wchar_t* PointerOverState = L"PointerOver"; +static constexpr const wchar_t* PressedState = L"Pressed"; +static constexpr const wchar_t* DisabledState = L"Disabled"; -DependencyProperty SettingsCard::TitleProperty = DependencyProperty::Register( - L"Title", - xaml_typename(), - xaml_typename(), - PropertyMetadata(box_value(L""), _OnTitleChanged) +static constexpr const wchar_t* ContentAlignmentStates = L"ContentAlignmentStates"; +static constexpr const wchar_t* RightState = L"Right"; +static constexpr const wchar_t* RightWrappedState = L"RightWrapped"; +static constexpr const wchar_t* RightWrappedNoIconState = L"RightWrappedNoIcon"; +static constexpr const wchar_t* LeftState = L"Left"; +static constexpr const wchar_t* VerticalState = L"Vertical"; + +static constexpr const wchar_t* ContentSpacingStates = L"ContentSpacingStates"; +static constexpr const wchar_t* NoContentSpacingState = L"NoContentSpacing"; +static constexpr const wchar_t* ContentSpacingState = L"ContentSpacing"; + +static constexpr const wchar_t* RootGrid = L"PART_RootGrid"; +static constexpr const wchar_t* ActionIconPresenterHolder = L"PART_ActionIconPresenterHolder"; +static constexpr const wchar_t* HeaderPresenter = L"PART_HeaderPresenter"; +static constexpr const wchar_t* DescriptionPresenter = L"PART_DescriptionPresenter"; +static constexpr const wchar_t* HeaderIconPresenterHolder = L"PART_HeaderIconPresenterHolder"; + +static constexpr const wchar_t* RightWrappedTrigger = L"RightWrappedTrigger"; +static constexpr const wchar_t* RightWrappedNoIconTrigger = L"RightWrappedNoIconTrigger"; + +const DependencyProperty SettingsCard::_headerProperty = DependencyProperty::Register( + L"Header", + xaml_typename(), + xaml_typename(), + PropertyMetadata(nullptr, &SettingsCard::_OnHeaderChanged) ); -DependencyProperty SettingsCard::DescriptionProperty = DependencyProperty::Register( +const DependencyProperty SettingsCard::_descriptionProperty = DependencyProperty::Register( L"Description", xaml_typename(), - xaml_typename(), + xaml_typename(), PropertyMetadata(nullptr, &SettingsCard::_OnDescriptionChanged) ); -DependencyProperty SettingsCard::IconProperty = DependencyProperty::Register( - L"Icon", - xaml_typename(), - xaml_typename(), - PropertyMetadata(nullptr, &SettingsCard::_OnIconChanged) +const DependencyProperty SettingsCard::_headerIconProperty = DependencyProperty::Register( + L"HeaderIcon", + xaml_typename(), + xaml_typename(), + PropertyMetadata(nullptr, &SettingsCard::_OnHeaderIconChanged) ); -DependencyProperty SettingsCard::ActionContentProperty = DependencyProperty::Register( - L"ActionContent", - xaml_typename(), - xaml_typename(), - PropertyMetadata(nullptr, &SettingsCard::_OnActionContentChanged) +const DependencyProperty SettingsCard::_actionIconProperty = DependencyProperty::Register( + L"ActionIcon", + xaml_typename(), + xaml_typename(), + PropertyMetadata(box_value(L"\ue974")) +); + +const DependencyProperty SettingsCard::_actionIconToolTipProperty = DependencyProperty::Register( + L"ActionIconToolTip", + xaml_typename(), + xaml_typename(), + nullptr +); + +const DependencyProperty SettingsCard::_isClickEnabledProperty = DependencyProperty::Register( + L"IsClickEnabled", + xaml_typename(), + xaml_typename(), + PropertyMetadata(box_value(false), &SettingsCard::_OnIsClickEnabledChanged) ); -void SettingsCard::_OnRawTitleChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&) { - SettingsCard* that = get_self(sender.as>()); - that->_Update(); - that->_propertyChangedEvent(*that, PropertyChangedEventArgs{ L"RawTitle" }); +const DependencyProperty SettingsCard::_contentAlignmentProperty = DependencyProperty::Register( + L"ContentAlignment", + xaml_typename(), + xaml_typename(), + PropertyMetadata(box_value(ContentAlignment::Right)) +); + +const DependencyProperty SettingsCard::_isActionIconVisibleProperty = DependencyProperty::Register( + L"IsActionIconVisible", + xaml_typename(), + xaml_typename(), + PropertyMetadata(box_value(true), &SettingsCard::_OnIsActionIconVisibleChanged) +); + +const DependencyProperty SettingsCard::_isWrapEnabledProperty = DependencyProperty::Register( + L"IsWrapEnabled", + xaml_typename(), + xaml_typename(), + PropertyMetadata(box_value(false), &SettingsCard::_OnIsWrapEnabledChanged) +); + +SettingsCard::SettingsCard() { + DefaultStyleKey(box_value(GetRuntimeClassName())); +} + +SettingsCard::~SettingsCard() { + // 不知为何必须手动释放 StateTriggers,否则会内存泄露 + if (auto stateGroup = GetTemplateChild(ContentAlignmentStates)) { + for (VisualState state : stateGroup.as().States()) { + state.StateTriggers().Clear(); + } + } +} + +void SettingsCard::OnApplyTemplate() { + base_type::OnApplyTemplate(); + + // https://github.com/microsoft/microsoft-ui-xaml/issues/7792 + // 对于 Content,模板中的样式不起作用 + auto resources = Resources(); + for (const auto& [key, value] : GetTemplateChild(RootGrid).as().Resources()) { + resources.Insert(key, value); + } + + _OnIsWrapEnabledChanged(); + + _contentAlignmentStatesChangedRevoker.revoke(); + _sizeChangedRevoker.revoke(); + _isEnabledChangedRevoker.revoke(); + + _OnActionIconChanged(); + _OnHeaderChanged(); + _OnHeaderIconChanged(); + _OnDescriptionChanged(); + _OnIsClickEnabledChanged(); + + VisualStateGroup contentAlignmentStatesGroup = GetTemplateChild(ContentAlignmentStates).as(); + _contentAlignmentStatesChangedRevoker = contentAlignmentStatesGroup.CurrentStateChanged(auto_revoke, [this](IInspectable const&, VisualStateChangedEventArgs const& args) { + _CheckVerticalSpacingState(args.NewState()); + }); + + // 修复启动时的动画错误 + _sizeChangedRevoker = SizeChanged(auto_revoke, [this, contentAlignmentStatesGroup(std::move(contentAlignmentStatesGroup))](IInspectable const&, SizeChangedEventArgs const&) { + _CheckVerticalSpacingState(contentAlignmentStatesGroup.CurrentState()); + }); + + VisualStateManager::GoToState(*this, IsEnabled() ? NormalState : DisabledState, true); + _isEnabledChangedRevoker = IsEnabledChanged(auto_revoke, [this](IInspectable const&, DependencyPropertyChangedEventArgs const&) { + VisualStateManager::GoToState(*this, IsEnabled() ? NormalState : DisabledState, true); + }); } -void SettingsCard::_OnTitleChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&) { - SettingsCard* that = get_self(sender.as>()); - that->_Update(); - that->_propertyChangedEvent(*that, PropertyChangedEventArgs{ L"Title" }); +void SettingsCard::OnPointerPressed(PointerRoutedEventArgs const& args) { + // 忽略鼠标右键 + if (IsClickEnabled() && !(args.Pointer().PointerDeviceType() == Windows::Devices::Input::PointerDeviceType::Mouse && args.GetCurrentPoint(*this).Properties().PointerUpdateKind() == Windows::UI::Input::PointerUpdateKind::RightButtonPressed)) { + base_type::OnPointerPressed(args); + VisualStateManager::GoToState(*this, PressedState, true); + + _isCursorCaptured = true; + } +} + +void SettingsCard::OnPointerReleased(PointerRoutedEventArgs const& args) { + if (_isCursorCaptured && IsClickEnabled()) { + base_type::OnPointerReleased(args); + VisualStateManager::GoToState(*this, _isCursorOnControl ? PointerOverState : NormalState, true); + } + + _isCursorCaptured = false; +} + +void SettingsCard::_OnHeaderChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&) { + get_self(sender.as())->_OnHeaderChanged(); } void SettingsCard::_OnDescriptionChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&) { - SettingsCard* that = get_self(sender.as>()); - that->_Update(); - that->_propertyChangedEvent(*that, PropertyChangedEventArgs{ L"Description" }); + get_self(sender.as())->_OnDescriptionChanged(); +} + +void SettingsCard::_OnHeaderIconChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&) { + get_self(sender.as())->_OnHeaderIconChanged(); } -void SettingsCard::_OnIconChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&) { - SettingsCard* that = get_self(sender.as>()); - that->_Update(); - that->_propertyChangedEvent(*that, PropertyChangedEventArgs{ L"Icon" }); +void SettingsCard::_OnIsClickEnabledChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&) { + get_self(sender.as())->_OnIsClickEnabledChanged(); } -void SettingsCard::_OnActionContentChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&) { - SettingsCard* that = get_self(sender.as>()); - that->_Update(); - that->_propertyChangedEvent(*that, PropertyChangedEventArgs{ L"ActionContent" }); +void SettingsCard::_OnIsActionIconVisibleChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&) { + get_self(sender.as())->_OnActionIconChanged(); } -void SettingsCard::_Update() { - RawTitlePresenter().Visibility(RawTitle() == nullptr ? Visibility::Collapsed : Visibility::Visible); - TitleTextBlock().Visibility(Title().empty() ? Visibility::Collapsed : Visibility::Visible); - DescriptionPresenter().Visibility(Description() == nullptr ? Visibility::Collapsed : Visibility::Visible); - IconPresenter().Visibility(Icon() == nullptr ? Visibility::Collapsed : Visibility::Visible); +void SettingsCard::_OnIsWrapEnabledChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&) { + get_self(sender.as())->_OnIsWrapEnabledChanged(); } -void SettingsCard::_SetEnabledState() { - VisualStateManager::GoToState(*this, IsEnabled() ? L"Normal" : L"Disabled", true); +static bool IsNotEmpty(IInspectable const& value) noexcept { + if (!value) { + return false; + } + + // 空字符串会使 ContentPresenter 尝试显示 Content 导致崩溃,因此做额外的检查 + std::optional str = value.try_as(); + return !str || !str->empty(); } -void SettingsCard::IsEnabledChanged(IInspectable const&, DependencyPropertyChangedEventArgs const&) { - _SetEnabledState(); +void SettingsCard::_OnHeaderChanged() const { + if (FrameworkElement headerPresenter = GetTemplateChild(HeaderPresenter).try_as()) { + headerPresenter.Visibility(IsNotEmpty(Header()) ? Visibility::Visible : Visibility::Collapsed); + } +} + +void SettingsCard::_OnDescriptionChanged() const { + if (FrameworkElement descriptionPresenter = GetTemplateChild(DescriptionPresenter).try_as()) { + descriptionPresenter.Visibility(IsNotEmpty(Description()) ? Visibility::Visible : Visibility::Collapsed); + } +} + +void SettingsCard::_OnHeaderIconChanged() const { + if (FrameworkElement headerIconPresenter = GetTemplateChild(HeaderIconPresenterHolder).try_as()) { + headerIconPresenter.Visibility(HeaderIcon() ? Visibility::Visible : Visibility::Collapsed); + } +} + +void SettingsCard::_OnIsClickEnabledChanged() { + _OnActionIconChanged(); + + if (IsClickEnabled()) { + _EnableButtonInteraction(); + } else { + _DisableButtonInteraction(); + } +} + +void SettingsCard::_OnActionIconChanged() const { + if (FrameworkElement actionIconPresenter = GetTemplateChild(ActionIconPresenterHolder).try_as()) { + if (IsClickEnabled() && IsActionIconVisible()) { + actionIconPresenter.Visibility(Visibility::Visible); + } else { + actionIconPresenter.Visibility(Visibility::Collapsed); + } + } +} + +void SettingsCard::_OnIsWrapEnabledChanged() const { + auto trigger1 = GetTemplateChild(RightWrappedTrigger); + auto trigger2 = GetTemplateChild(RightWrappedNoIconTrigger); + + if (trigger1 && trigger2) { + // CanTrigger 无法使用 TemplateBinding? + const bool isWrapEnabled = IsWrapEnabled(); + trigger1.as().CanTrigger(isWrapEnabled); + trigger2.as().CanTrigger(isWrapEnabled); + } +} + +void SettingsCard::_CheckVerticalSpacingState(VisualState const& s) { + // On state change, checking if the Content should be wrapped (e.g. when the card is made smaller or the ContentAlignment is set to Vertical). If the Content and the Header or Description are not null, we add spacing between the Content and the Header/Description. + + const hstring stateName = s ? s.Name() : hstring(); + if (!stateName.empty() && (stateName == RightWrappedState || stateName == RightWrappedNoIconState || + stateName == VerticalState) && Content() && (Header() || IsNotEmpty(Description()))) { + VisualStateManager::GoToState(*this, ContentSpacingState, true); + } else { + VisualStateManager::GoToState(*this, NoContentSpacingState, true); + } +} + +void SettingsCard::_EnableButtonInteraction() { + _DisableButtonInteraction(); + + IsTabStop(true); + + _pointerEnteredRevoker = PointerEntered(auto_revoke, [this](IInspectable const&, PointerRoutedEventArgs const&) { + VisualStateManager::GoToState(*this, _isCursorCaptured ? PressedState : PointerOverState, true); + _isCursorOnControl = true; + }); + + _pointerExitedRevoker = PointerExited(auto_revoke, [this](IInspectable const&, PointerRoutedEventArgs const&) { + VisualStateManager::GoToState(*this, NormalState, true); + _isCursorOnControl = false; + }); + + auto goToNormalState = [this](IInspectable const&, PointerRoutedEventArgs const&) { + VisualStateManager::GoToState(*this, NormalState, true); + }; + + _pointerCaptureLostRevoker = PointerCaptureLost(auto_revoke, goToNormalState); + _pointerCanceledRevoker = PointerCanceled(auto_revoke, goToNormalState); + + _previewKeyDownRevoker = PreviewKeyDown(auto_revoke, [this](IInspectable const&, KeyRoutedEventArgs const& args) { + const VirtualKey key = args.Key(); + if (key == VirtualKey::Enter || key == VirtualKey::Space || key == VirtualKey::GamepadA) { + // Check if the active focus is on the card itself - only then we show the pressed state. + if (FocusManager::GetFocusedElement(XamlRoot()) == *this) { + VisualStateManager::GoToState(*this, PressedState, true); + } + } + }); + + _previewKeyUpRevoker = PreviewKeyUp(auto_revoke, [this](IInspectable const&, KeyRoutedEventArgs const& args) { + const VirtualKey key = args.Key(); + if (key == VirtualKey::Enter || key == VirtualKey::Space || key == VirtualKey::GamepadA) { + VisualStateManager::GoToState(*this, NormalState, true); + } + }); } -void SettingsCard::Loading(FrameworkElement const&, IInspectable const&) { - _SetEnabledState(); - _Update(); +void SettingsCard::_DisableButtonInteraction() { + IsTabStop(false); + _pointerEnteredRevoker.revoke(); + _pointerExitedRevoker.revoke(); + _pointerCaptureLostRevoker.revoke(); + _pointerCanceledRevoker.revoke(); + _previewKeyDownRevoker.revoke(); + _previewKeyUpRevoker.revoke(); } } diff --git a/src/Magpie.App/SettingsCard.h b/src/Magpie.App/SettingsCard.h index 5b1f1685b..f89152646 100644 --- a/src/Magpie.App/SettingsCard.h +++ b/src/Magpie.App/SettingsCard.h @@ -4,75 +4,97 @@ namespace winrt::Magpie::App::implementation { struct SettingsCard : SettingsCardT { - void RawTitle(IInspectable const& value) { - SetValue(RawTitleProperty, value); - } + SettingsCard(); - IInspectable RawTitle() const { - return GetValue(RawTitleProperty); - } + ~SettingsCard(); - void Title(const hstring& value) { - SetValue(TitleProperty, box_value(value)); - } + static DependencyProperty HeaderProperty() { return _headerProperty; } + static DependencyProperty DescriptionProperty() { return _descriptionProperty; } + static DependencyProperty HeaderIconProperty() { return _headerIconProperty; } + static DependencyProperty ActionIconProperty() { return _actionIconProperty; } + static DependencyProperty ActionIconToolTipProperty() { return _actionIconToolTipProperty; } + static DependencyProperty IsClickEnabledProperty() { return _isClickEnabledProperty; } + static DependencyProperty ContentAlignmentProperty() { return _contentAlignmentProperty; } + static DependencyProperty IsActionIconVisibleProperty() { return _isActionIconVisibleProperty; } + static DependencyProperty IsWrapEnabledProperty() { return _isWrapEnabledProperty; } - hstring Title() const { - return GetValue(TitleProperty).as(); - } + IInspectable Header() const { return GetValue(_headerProperty); } + void Header(IInspectable const& value) const { SetValue(_headerProperty, value); } - void Description(IInspectable const& value) { - SetValue(DescriptionProperty, value); - } + IInspectable Description() const { return GetValue(_descriptionProperty); } + void Description(IInspectable const& value) const { SetValue(_descriptionProperty, value); } - IInspectable Description() const { - return GetValue(DescriptionProperty); - } + Controls::IconElement HeaderIcon() const { return GetValue(_headerIconProperty).as(); } + void HeaderIcon(Controls::IconElement const& value) const { SetValue(_headerIconProperty, value); } - void Icon(IInspectable const& value) { - SetValue(IconProperty, value); - } + Controls::IconElement ActionIcon() const { return GetValue(_actionIconProperty).as(); } + void ActionIcon(Controls::IconElement const& value) const { SetValue(_actionIconProperty, value); } - IInspectable Icon() const { - return GetValue(IconProperty); - } + hstring ActionIconToolTip() const { return GetValue(_actionIconToolTipProperty).as(); } + void ActionIconToolTip(const hstring& value) const { SetValue(_actionIconToolTipProperty, box_value(value)); } - void ActionContent(IInspectable const& value) { - SetValue(ActionContentProperty, value); - } + bool IsClickEnabled() const { return GetValue(_isClickEnabledProperty).as(); } + void IsClickEnabled(bool value) const { SetValue(_isClickEnabledProperty, box_value(value)); } - IInspectable ActionContent() const { - return GetValue(ActionContentProperty); - } + ContentAlignment ContentAlignment() const { return GetValue(_contentAlignmentProperty).as(); } + void ContentAlignment(Magpie::App::ContentAlignment value) const { SetValue(_contentAlignmentProperty, box_value(value)); } - void IsEnabledChanged(IInspectable const&, DependencyPropertyChangedEventArgs const&); - void Loading(FrameworkElement const&, IInspectable const&); + bool IsActionIconVisible() const { return GetValue(_isActionIconVisibleProperty).as(); } + void IsActionIconVisible(bool value) const { SetValue(_isActionIconVisibleProperty, box_value(value)); } - event_token PropertyChanged(PropertyChangedEventHandler const& value) { - return _propertyChangedEvent.add(value); - } + bool IsWrapEnabled() const { return GetValue(_isWrapEnabledProperty).as(); } + void IsWrapEnabled(bool value) const { SetValue(_isWrapEnabledProperty, box_value(value)); } - void PropertyChanged(event_token const& token) { - _propertyChangedEvent.remove(token); - } + void OnApplyTemplate(); - static DependencyProperty RawTitleProperty; - static DependencyProperty TitleProperty; - static DependencyProperty DescriptionProperty; - static DependencyProperty IconProperty; - static DependencyProperty ActionContentProperty; + void OnPointerPressed(Input::PointerRoutedEventArgs const& args); + + void OnPointerReleased(Input::PointerRoutedEventArgs const& args); private: - static void _OnRawTitleChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&); - static void _OnTitleChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&); + static const DependencyProperty _headerProperty; + static const DependencyProperty _descriptionProperty; + static const DependencyProperty _headerIconProperty; + static const DependencyProperty _actionIconProperty; + static const DependencyProperty _actionIconToolTipProperty; + static const DependencyProperty _isClickEnabledProperty; + static const DependencyProperty _contentAlignmentProperty; + static const DependencyProperty _isActionIconVisibleProperty; + static const DependencyProperty _isWrapEnabledProperty; + + static void _OnHeaderChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&); static void _OnDescriptionChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&); - static void _OnIconChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&); - static void _OnActionContentChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&); + static void _OnHeaderIconChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&); + static void _OnIsClickEnabledChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&); + static void _OnIsActionIconVisibleChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&); + static void _OnIsWrapEnabledChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&); + + void _OnHeaderChanged() const; + void _OnDescriptionChanged() const; + void _OnHeaderIconChanged() const; + void _OnIsClickEnabledChanged(); + void _OnActionIconChanged() const; + void _OnIsWrapEnabledChanged() const; + + void _CheckVerticalSpacingState(VisualState const& s); + + void _EnableButtonInteraction(); + + void _DisableButtonInteraction(); - void _Update(); + IsEnabledChanged_revoker _isEnabledChangedRevoker; + VisualStateGroup::CurrentStateChanged_revoker _contentAlignmentStatesChangedRevoker; + SizeChanged_revoker _sizeChangedRevoker; - void _SetEnabledState(); + UIElement::PointerEntered_revoker _pointerEnteredRevoker; + UIElement::PointerExited_revoker _pointerExitedRevoker; + UIElement::PointerCaptureLost_revoker _pointerCaptureLostRevoker; + UIElement::PointerCanceled_revoker _pointerCanceledRevoker; + UIElement::PreviewKeyDown_revoker _previewKeyDownRevoker; + UIElement::PreviewKeyUp_revoker _previewKeyUpRevoker; - event _propertyChangedEvent; + bool _isCursorCaptured = false; + bool _isCursorOnControl = false; }; } diff --git a/src/Magpie.App/SettingsCard.idl b/src/Magpie.App/SettingsCard.idl index 1f2a17056..0e520e7bf 100644 --- a/src/Magpie.App/SettingsCard.idl +++ b/src/Magpie.App/SettingsCard.idl @@ -1,13 +1,47 @@ -namespace Magpie.App { - [Windows.UI.Xaml.Markup.ContentProperty("RawTitle")] - runtimeclass SettingsCard : Windows.UI.Xaml.Controls.UserControl, Windows.UI.Xaml.Data.INotifyPropertyChanged - { +namespace Magpie.App{ + enum ContentAlignment { + /// The Content is aligned to the right. Default state. + Right, + /// The Content is left-aligned while the Header, HeaderIcon and Description are collapsed. This is commonly used for Content types such as CheckBoxes, RadioButtons and custom layouts. + Left, + /// The Content is vertically aligned. + Vertical + }; + + [Windows.UI.Xaml.TemplateVisualState("NormalState", "CommonStates")] + [Windows.UI.Xaml.TemplateVisualState("PointerOverState", "CommonStates")] + [Windows.UI.Xaml.TemplateVisualState("PressedState", "CommonStates")] + [Windows.UI.Xaml.TemplateVisualState("DisabledState", "CommonStates")] + + [Windows.UI.Xaml.TemplateVisualState("RightState", "ContentAlignmentStates")] + [Windows.UI.Xaml.TemplateVisualState("RightWrappedState", "ContentAlignmentStates")] + [Windows.UI.Xaml.TemplateVisualState("RightWrappedNoIconState", "ContentAlignmentStates")] + [Windows.UI.Xaml.TemplateVisualState("LeftState", "ContentAlignmentStates")] + [Windows.UI.Xaml.TemplateVisualState("VerticalState", "ContentAlignmentStates")] + + [Windows.UI.Xaml.TemplateVisualState("NoContentSpacingState", "ContentSpacingStates")] + [Windows.UI.Xaml.TemplateVisualState("ContentSpacingState", "ContentSpacingStates")] + runtimeclass SettingsCard : Windows.UI.Xaml.Controls.Primitives.ButtonBase { SettingsCard(); - - Object RawTitle; - String Title; + + static Windows.UI.Xaml.DependencyProperty HeaderProperty { get; }; + static Windows.UI.Xaml.DependencyProperty DescriptionProperty { get; }; + static Windows.UI.Xaml.DependencyProperty HeaderIconProperty { get; }; + static Windows.UI.Xaml.DependencyProperty ActionIconProperty { get; }; + static Windows.UI.Xaml.DependencyProperty ActionIconToolTipProperty { get; }; + static Windows.UI.Xaml.DependencyProperty IsClickEnabledProperty { get; }; + static Windows.UI.Xaml.DependencyProperty ContentAlignmentProperty { get; }; + static Windows.UI.Xaml.DependencyProperty IsActionIconVisibleProperty { get; }; + static Windows.UI.Xaml.DependencyProperty IsWrapEnabledProperty { get; }; + + Object Header; Object Description; - Object Icon; - Object ActionContent; + Windows.UI.Xaml.Controls.IconElement HeaderIcon; + Windows.UI.Xaml.Controls.IconElement ActionIcon; + String ActionIconToolTip; + Boolean IsClickEnabled; + ContentAlignment ContentAlignment; + Boolean IsActionIconVisible; + Boolean IsWrapEnabled; } } diff --git a/src/Magpie.App/SettingsCard.xaml b/src/Magpie.App/SettingsCard.xaml deleted file mode 100644 index 10ceef8ca..000000000 --- a/src/Magpie.App/SettingsCard.xaml +++ /dev/null @@ -1,113 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/Magpie.App/SettingsExpander.Resource.xaml b/src/Magpie.App/SettingsExpander.Resource.xaml new file mode 100644 index 000000000..c1f5af186 --- /dev/null +++ b/src/Magpie.App/SettingsExpander.Resource.xaml @@ -0,0 +1,510 @@ + + + + + Show all settings + 16,16,4,16 + 58,8,44,8 + 0,1,0,0 + 58,8,16,8 + 16 + 32 + 32 + + + + + + + + + + + + + diff --git a/src/Magpie.App/SettingsExpander.cpp b/src/Magpie.App/SettingsExpander.cpp new file mode 100644 index 000000000..beabf1602 --- /dev/null +++ b/src/Magpie.App/SettingsExpander.cpp @@ -0,0 +1,136 @@ +// 移植自 https://github.com/CommunityToolkit/Windows/tree/bef863ca70bb1edf8c940198dd5cc74afa5d2aab/components/SettingsControls/src/SettingsExpander + +#include "pch.h" +#include "SettingsExpander.h" +#if __has_include("SettingsExpander.g.cpp") +#include "SettingsExpander.g.cpp" +#endif + +using namespace winrt; +using namespace Windows::UI::Xaml::Controls; + +namespace winrt::Magpie::App::implementation { + +static constexpr const wchar_t* PART_ItemsContainer = L"PART_ItemsContainer"; + +const DependencyProperty SettingsExpander::_headerProperty = DependencyProperty::Register( + L"Header", + xaml_typename(), + xaml_typename(), + nullptr +); + +const DependencyProperty SettingsExpander::_descriptionProperty = DependencyProperty::Register( + L"Description", + xaml_typename(), + xaml_typename(), + nullptr +); + +const DependencyProperty SettingsExpander::_headerIconProperty = DependencyProperty::Register( + L"HeaderIcon", + xaml_typename(), + xaml_typename(), + nullptr +); + +const DependencyProperty SettingsExpander::_contentProperty = DependencyProperty::Register( + L"Content", + xaml_typename(), + xaml_typename(), + nullptr +); + +const DependencyProperty SettingsExpander::_itemsHeaderProperty = DependencyProperty::Register( + L"ItemsHeader", + xaml_typename(), + xaml_typename(), + nullptr +); + +const DependencyProperty SettingsExpander::_itemsFooterProperty = DependencyProperty::Register( + L"ItemsFooter", + xaml_typename(), + xaml_typename(), + nullptr +); + +const DependencyProperty SettingsExpander::_isExpandedProperty = DependencyProperty::Register( + L"IsExpanded", + xaml_typename(), + xaml_typename(), + PropertyMetadata(box_value(false), &SettingsExpander::_OnIsExpandedChanged) +); + +const DependencyProperty SettingsExpander::_itemsProperty = DependencyProperty::Register( + L"Items", + xaml_typename>(), + xaml_typename(), + PropertyMetadata(nullptr, &SettingsExpander::_OnItemsConnectedPropertyChanged) +); + +const DependencyProperty SettingsExpander::_itemsSourceProperty = DependencyProperty::Register( + L"ItemsSource", + xaml_typename(), + xaml_typename(), + PropertyMetadata(nullptr, &SettingsExpander::_OnItemsConnectedPropertyChanged) +); + +const DependencyProperty SettingsExpander::_itemTemplateProperty = DependencyProperty::Register( + L"ItemTemplate", + xaml_typename(), + xaml_typename(), + nullptr +); + +SettingsExpander::SettingsExpander() { + DefaultStyleKey(box_value(GetRuntimeClassName())); + Items(single_threaded_vector()); +} + +void SettingsExpander::OnApplyTemplate() { + base_type::OnApplyTemplate(); + _OnItemsConnectedPropertyChanged(); +} + +void SettingsExpander::_OnIsExpandedChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const& args) { + SettingsExpander* that = get_self(sender.as()); + + if (args.NewValue().as()) { + that->_expandedEvent(); + } else { + that->_collapsedEvent(); + } +} + +void SettingsExpander::_OnItemsConnectedPropertyChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&) { + get_self(sender.as())->_OnItemsConnectedPropertyChanged(); +} + +void SettingsExpander::_OnItemsConnectedPropertyChanged() { + ItemsControl itemsContainer = GetTemplateChild(PART_ItemsContainer).as(); + if (!itemsContainer) { + return; + } + + IInspectable datasource = ItemsSource(); + itemsContainer.ItemsSource(datasource ? datasource : Items()); + + // 应用样式 + for (IInspectable const& item : itemsContainer.Items()) { + SettingsCard settingsCard = item.try_as(); + if (!settingsCard) { + continue; + } + + if (settingsCard.ReadLocalValue(FrameworkElement::StyleProperty()) == DependencyProperty::UnsetValue()) { + ResourceDictionary resources = Application::Current().Resources(); + const wchar_t* key = settingsCard.IsClickEnabled() + ? L"ClickableSettingsExpanderItemStyle" + : L"DefaultSettingsExpanderItemStyle"; + settingsCard.Style(resources.Lookup(box_value(key)).as()); + } + } +} + +} diff --git a/src/Magpie.App/SettingsExpander.h b/src/Magpie.App/SettingsExpander.h new file mode 100644 index 000000000..1bf255780 --- /dev/null +++ b/src/Magpie.App/SettingsExpander.h @@ -0,0 +1,86 @@ +#pragma once +#include "SettingsExpander.g.h" + +namespace winrt::Magpie::App::implementation { + +struct SettingsExpander : SettingsExpanderT { + SettingsExpander(); + + static DependencyProperty HeaderProperty() { return _headerProperty; } + static DependencyProperty DescriptionProperty() { return _descriptionProperty; } + static DependencyProperty HeaderIconProperty() { return _headerIconProperty; } + static DependencyProperty ContentProperty() { return _contentProperty; } + static DependencyProperty ItemsHeaderProperty() { return _itemsHeaderProperty; } + static DependencyProperty ItemsFooterProperty() { return _itemsFooterProperty; } + static DependencyProperty IsExpandedProperty() { return _isExpandedProperty; } + static DependencyProperty ItemsProperty() { return _itemsProperty; } + static DependencyProperty ItemsSourceProperty() { return _itemsSourceProperty; } + static DependencyProperty ItemTemplateProperty() { return _itemTemplateProperty; } + + IInspectable Header() const { return GetValue(_headerProperty); } + void Header(IInspectable const& value) const { SetValue(_headerProperty, value); } + + IInspectable Description() const { return GetValue(_descriptionProperty); } + void Description(IInspectable const& value) const { SetValue(_descriptionProperty, value); } + + Controls::IconElement HeaderIcon() const { return GetValue(_headerIconProperty).as(); } + void HeaderIcon(Controls::IconElement const& value)const { SetValue(_headerIconProperty, value); } + + IInspectable Content() const { return GetValue(_contentProperty); } + void Content(IInspectable const& value) const { SetValue(_contentProperty, value); } + + UIElement ItemsHeader() const { return GetValue(_itemsHeaderProperty).as(); } + void ItemsHeader(UIElement const& value) const { SetValue(_itemsHeaderProperty, value); } + + UIElement ItemsFooter() const { return GetValue(_itemsFooterProperty).as(); } + void ItemsFooter(UIElement const& value) const { SetValue(_itemsFooterProperty, value); } + + bool IsExpanded() const { return GetValue(_isExpandedProperty).as(); } + void IsExpanded(bool value) const { SetValue(_isExpandedProperty, box_value(value)); } + + event_token Expanded(SignalDelegate const& handler) { return _expandedEvent.add(handler); } + void Expanded(winrt::event_token const& token) { _expandedEvent.remove(token); } + + event_token Collapsed(SignalDelegate const& handler) { return _collapsedEvent.add(handler); } + void Collapsed(winrt::event_token const& token) { _collapsedEvent.remove(token); } + + IVector Items() const { return GetValue(_itemsProperty).as>(); } + void Items(IVector const& value) const { SetValue(_itemsProperty, value); } + + IInspectable ItemsSource() const { return GetValue(_itemsSourceProperty); } + void ItemsSource(IInspectable const& value) const { SetValue(_itemsSourceProperty, value); } + + IInspectable ItemTemplate() const { return GetValue(_itemTemplateProperty); } + void ItemTemplate(IInspectable const& value) const { SetValue(_itemTemplateProperty, value); } + + void OnApplyTemplate(); + +private: + static const DependencyProperty _headerProperty; + static const DependencyProperty _descriptionProperty; + static const DependencyProperty _headerIconProperty; + static const DependencyProperty _contentProperty; + static const DependencyProperty _itemsHeaderProperty; + static const DependencyProperty _itemsFooterProperty; + static const DependencyProperty _isExpandedProperty; + static const DependencyProperty _itemsProperty; + static const DependencyProperty _itemsSourceProperty; + static const DependencyProperty _itemTemplateProperty; + + static void _OnIsExpandedChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const& args); + static void _OnItemsConnectedPropertyChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&); + + void _OnItemsConnectedPropertyChanged(); + + event _expandedEvent; + event _collapsedEvent; +}; + +} + +namespace winrt::Magpie::App::factory_implementation { + +struct SettingsExpander : SettingsExpanderT { +}; + +} diff --git a/src/Magpie.App/SettingsExpander.idl b/src/Magpie.App/SettingsExpander.idl new file mode 100644 index 000000000..14985dbed --- /dev/null +++ b/src/Magpie.App/SettingsExpander.idl @@ -0,0 +1,31 @@ +namespace Magpie.App { + [Windows.UI.Xaml.Markup.ContentProperty("Content")] + runtimeclass SettingsExpander : Windows.UI.Xaml.Controls.Control { + SettingsExpander(); + + static Windows.UI.Xaml.DependencyProperty HeaderProperty { get; }; + static Windows.UI.Xaml.DependencyProperty DescriptionProperty { get; }; + static Windows.UI.Xaml.DependencyProperty HeaderIconProperty { get; }; + static Windows.UI.Xaml.DependencyProperty ContentProperty { get; }; + static Windows.UI.Xaml.DependencyProperty ItemsHeaderProperty { get; }; + static Windows.UI.Xaml.DependencyProperty ItemsFooterProperty { get; }; + static Windows.UI.Xaml.DependencyProperty IsExpandedProperty { get; }; + static Windows.UI.Xaml.DependencyProperty ItemsProperty { get; }; + static Windows.UI.Xaml.DependencyProperty ItemsSourceProperty { get; }; + static Windows.UI.Xaml.DependencyProperty ItemTemplateProperty { get; }; + + Object Header; + Object Description; + Windows.UI.Xaml.Controls.IconElement HeaderIcon; + Object Content; + Windows.UI.Xaml.UIElement ItemsHeader; + Windows.UI.Xaml.UIElement ItemsFooter; + Boolean IsExpanded; + IVector Items; + Object ItemsSource; + Object ItemTemplate; + + event SignalDelegate Expanded; + event SignalDelegate Collapsed; + } +} diff --git a/src/Magpie.App/SettingsGroup.Resource.xaml b/src/Magpie.App/SettingsGroup.Resource.xaml new file mode 100644 index 000000000..cfc387f60 --- /dev/null +++ b/src/Magpie.App/SettingsGroup.Resource.xaml @@ -0,0 +1,73 @@ + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/Magpie.App/SettingsGroup.cpp b/src/Magpie.App/SettingsGroup.cpp index c1b3b0891..3018a46c8 100644 --- a/src/Magpie.App/SettingsGroup.cpp +++ b/src/Magpie.App/SettingsGroup.cpp @@ -10,57 +10,36 @@ using namespace Windows::UI::Xaml::Data; namespace winrt::Magpie::App::implementation { -const DependencyProperty SettingsGroup::ChildrenProperty = DependencyProperty::Register( - L"Children", - xaml_typename(), - xaml_typename(), - PropertyMetadata(nullptr) -); - -const DependencyProperty SettingsGroup::TitleProperty = DependencyProperty::Register( - L"Title", - xaml_typename(), - xaml_typename(), - PropertyMetadata(box_value(L""), &SettingsGroup::_OnTitleChanged) +// Header 如果为字符串类型会编译失败,见 https://github.com/microsoft/microsoft-ui-xaml/issues/5395 +const DependencyProperty SettingsGroup::_headerProperty = DependencyProperty::Register( + L"Header", + xaml_typename(), + xaml_typename(), + nullptr ); -const DependencyProperty SettingsGroup::DescriptionProperty = DependencyProperty::Register( +const DependencyProperty SettingsGroup::_descriptionProperty = DependencyProperty::Register( L"Description", xaml_typename(), - xaml_typename(), + xaml_typename(), PropertyMetadata(nullptr, &SettingsGroup::_OnDescriptionChanged) ); -void SettingsGroup::InitializeComponent() { - SettingsGroupT::InitializeComponent(); +void SettingsGroup::OnApplyTemplate() { + base_type::OnApplyTemplate(); - Children(ChildrenHost().Children()); -} - -void SettingsGroup::IsEnabledChanged(IInspectable const&, DependencyPropertyChangedEventArgs const&) { + _isEnabledChangedRevoker = IsEnabledChanged(auto_revoke, [this](IInspectable const&, DependencyPropertyChangedEventArgs const&) { + _SetEnabledState(); + }); _SetEnabledState(); } -void SettingsGroup::Loading(FrameworkElement const&, IInspectable const&) { - _SetEnabledState(); - _Update(); -} - -void SettingsGroup::_OnTitleChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&) { - SettingsGroup* that = get_self(sender.as>()); - that->_Update(); - that->_propertyChangedEvent(*that, PropertyChangedEventArgs{ L"Title" }); -} - -void SettingsGroup::_OnDescriptionChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&) { - SettingsGroup* that = get_self(sender.as>()); - that->_Update(); - that->_propertyChangedEvent(*that, PropertyChangedEventArgs{ L"Description" }); -} +void SettingsGroup::_OnDescriptionChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const& args) { + SettingsGroup* that = get_self(sender.as()); -void SettingsGroup::_Update() { - TitleTextBlock().Visibility(Title().empty() ? Visibility::Collapsed : Visibility::Visible); - DescriptionPresenter().Visibility(Description() == nullptr ? Visibility::Collapsed : Visibility::Visible); + if (FrameworkElement descriptionPresenter = that->GetTemplateChild(L"DescriptionPresenter").try_as()) { + descriptionPresenter.Visibility(args.NewValue() == nullptr ? Visibility::Collapsed : Visibility::Visible); + } } void SettingsGroup::_SetEnabledState() { diff --git a/src/Magpie.App/SettingsGroup.h b/src/Magpie.App/SettingsGroup.h index e4ecd89bf..d018e49c0 100644 --- a/src/Magpie.App/SettingsGroup.h +++ b/src/Magpie.App/SettingsGroup.h @@ -4,56 +4,27 @@ namespace winrt::Magpie::App::implementation { struct SettingsGroup : SettingsGroupT { - void InitializeComponent(); + static DependencyProperty HeaderProperty() { return _headerProperty; } + static DependencyProperty DescriptionProperty() { return _descriptionProperty; } - void Title(const hstring& value) { - SetValue(TitleProperty, box_value(value)); - } + void Header(IInspectable const& value) const { SetValue(_headerProperty, value); } + IInspectable Header() const { return GetValue(_headerProperty); } - hstring Title() const { - return GetValue(TitleProperty).as(); - } + void Description(IInspectable value) const { SetValue(_descriptionProperty, value); } + IInspectable Description() const { return GetValue(_descriptionProperty); } - void Description(IInspectable value) { - SetValue(DescriptionProperty, value); - } - - IInspectable Description() const { - return GetValue(DescriptionProperty); - } - - Controls::UIElementCollection Children() const { - return GetValue(ChildrenProperty).as(); - } - - void Children(Controls::UIElementCollection const& value) { - SetValue(ChildrenProperty, value); - } - - void IsEnabledChanged(IInspectable const&, DependencyPropertyChangedEventArgs const&); - void Loading(FrameworkElement const&, IInspectable const&); - - event_token PropertyChanged(PropertyChangedEventHandler const& value) { - return _propertyChangedEvent.add(value); - } - - void PropertyChanged(event_token const& token) { - _propertyChangedEvent.remove(token); - } - - static const DependencyProperty ChildrenProperty; - static const DependencyProperty TitleProperty; - static const DependencyProperty DescriptionProperty; + void OnApplyTemplate(); private: - static void _OnTitleChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&); - static void _OnDescriptionChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&); + static const DependencyProperty _childrenProperty; + static const DependencyProperty _headerProperty; + static const DependencyProperty _descriptionProperty; - void _Update(); + static void _OnDescriptionChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&); void _SetEnabledState(); - event _propertyChangedEvent; + IsEnabledChanged_revoker _isEnabledChangedRevoker; }; } diff --git a/src/Magpie.App/SettingsGroup.idl b/src/Magpie.App/SettingsGroup.idl index dfaeec70a..bbda71cac 100644 --- a/src/Magpie.App/SettingsGroup.idl +++ b/src/Magpie.App/SettingsGroup.idl @@ -1,12 +1,13 @@ namespace Magpie.App { - [Windows.UI.Xaml.Markup.ContentProperty("Children")] [Windows.UI.Xaml.TemplateVisualState("Normal", "CommonStates")] [Windows.UI.Xaml.TemplateVisualState("Disabled", "CommonStates")] - runtimeclass SettingsGroup : Windows.UI.Xaml.Controls.UserControl, Windows.UI.Xaml.Data.INotifyPropertyChanged { + runtimeclass SettingsGroup : Windows.UI.Xaml.Controls.ItemsControl { SettingsGroup(); - Windows.UI.Xaml.Controls.UIElementCollection Children; - String Title; + static Windows.UI.Xaml.DependencyProperty HeaderProperty { get; }; + static Windows.UI.Xaml.DependencyProperty DescriptionProperty { get; }; + + Object Header; Object Description; } } diff --git a/src/Magpie.App/SettingsGroup.xaml b/src/Magpie.App/SettingsGroup.xaml deleted file mode 100644 index af30eab0f..000000000 --- a/src/Magpie.App/SettingsGroup.xaml +++ /dev/null @@ -1,62 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/Magpie.App/SettingsPage.cpp b/src/Magpie.App/SettingsPage.cpp index 431cbc36b..8b8aa9c80 100644 --- a/src/Magpie.App/SettingsPage.cpp +++ b/src/Magpie.App/SettingsPage.cpp @@ -5,6 +5,7 @@ #endif #include "XamlUtils.h" #include "ComboBoxHelper.h" +#include "CommonSharedConstants.h" using namespace winrt; using namespace Windows::UI::Xaml::Input; @@ -14,8 +15,10 @@ namespace winrt::Magpie::App::implementation { void SettingsPage::InitializeComponent() { SettingsPageT::InitializeComponent(); - ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView(); - hstring versionStr = resourceLoader.GetString(L"ms-resource://Magpie.App/Microsoft.UI.Xaml/Resources/SettingsButtonName"); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); + hstring versionStr = resourceLoader. + GetString(L"ms-resource://Magpie.App/Microsoft.UI.Xaml/Resources/SettingsButtonName"); SettingsPageFrame().Title(versionStr); } diff --git a/src/Magpie.App/SettingsPage.xaml b/src/Magpie.App/SettingsPage.xaml index 1911eabf3..7f183786c 100644 --- a/src/Magpie.App/SettingsPage.xaml +++ b/src/Magpie.App/SettingsPage.xaml @@ -7,185 +7,162 @@ xmlns:muxc="using:Microsoft.UI.Xaml.Controls" mc:Ignorable="d"> - - - - - - - - - - - - + + + + + + + + + + Severity="Warning" + Visibility="{x:Bind ViewModel.RequireRestart, Mode=OneWay}"> diff --git a/src/Magpie.App/ShortcutDialog.cpp b/src/Magpie.App/ShortcutDialog.cpp index c893d9cd7..755371547 100644 --- a/src/Magpie.App/ShortcutDialog.cpp +++ b/src/Magpie.App/ShortcutDialog.cpp @@ -3,6 +3,7 @@ #if __has_include("ShortcutDialog.g.cpp") #include "ShortcutDialog.g.cpp" #endif +#include "CommonSharedConstants.h" namespace winrt::Magpie::App::implementation { @@ -16,14 +17,16 @@ void ShortcutDialog::Error(ShortcutError value) { case ShortcutError::Invalid: { WarningBanner().Visibility(Visibility::Visible); - ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView(); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); InvalidShortcutWarningLabel().Text(resourceLoader.GetString(L"ShortcutDialog_InvalidShortcut")); break; } case ShortcutError::Occupied: { WarningBanner().Visibility(Visibility::Visible); - ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView(); + ResourceLoader resourceLoader = + ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); InvalidShortcutWarningLabel().Text(resourceLoader.GetString(L"ShortcutDialog_InUse")); break; } diff --git a/src/Magpie.App/ShortcutDialog.xaml b/src/Magpie.App/ShortcutDialog.xaml index be25efd8b..1d8445578 100644 --- a/src/Magpie.App/ShortcutDialog.xaml +++ b/src/Magpie.App/ShortcutDialog.xaml @@ -24,11 +24,12 @@ Margin="0,64,0,0" HorizontalAlignment="Center" VerticalAlignment="Top" - HorizontalContentAlignment="Center"> + HorizontalContentAlignment="Center" + IsTabStop="False"> - + @@ -42,11 +43,10 @@ - + - + diff --git a/src/Magpie.App/SimpleStackPanel.cpp b/src/Magpie.App/SimpleStackPanel.cpp new file mode 100644 index 000000000..44281175b --- /dev/null +++ b/src/Magpie.App/SimpleStackPanel.cpp @@ -0,0 +1,183 @@ +#include "pch.h" +#include "SimpleStackPanel.h" +#if __has_include("SimpleStackPanel.g.cpp") +#include "SimpleStackPanel.g.cpp" +#endif + +using namespace winrt; +using namespace Windows::UI::Xaml::Controls; + +namespace winrt::Magpie::App::implementation { + +const DependencyProperty SimpleStackPanel::_orientationProperty = DependencyProperty::Register( + L"Orientation", + xaml_typename(), + xaml_typename(), + PropertyMetadata(box_value(Orientation::Vertical), &SimpleStackPanel::_OnLayoutPropertyChanged) +); + +const DependencyProperty SimpleStackPanel::_paddingProperty = DependencyProperty::Register( + L"Padding", + xaml_typename(), + xaml_typename(), + PropertyMetadata(box_value(Thickness{}), &SimpleStackPanel::_OnLayoutPropertyChanged) +); + +const DependencyProperty SimpleStackPanel::_spacingProperty = DependencyProperty::Register( + L"Spacing", + xaml_typename(), + xaml_typename(), + PropertyMetadata(box_value(0.0), &SimpleStackPanel::_OnLayoutPropertyChanged) +); + +Size SimpleStackPanel::MeasureOverride(const Size& availableSize) const { + const bool isVertical = Orientation() == Orientation::Vertical; + const float spacing = (float)Spacing(); + const Thickness padding = Padding(); + const Size paddings{ (float)padding.Left + (float)padding.Right,(float)padding.Top + (float)padding.Bottom }; + + const Size childAvailableSize{ + availableSize.Width - paddings.Width, + availableSize.Height - paddings.Height + }; + + bool firstItem = true; + bool anyStretch = false; + Size finalSize{ paddings.Width, paddings.Height }; + + for (UIElement const& item : Children()) { + if (item.Visibility() == Visibility::Collapsed) { + // 不可见的子项不添加间距 + continue; + } + + item.Measure(childAvailableSize); + const Size itemSize = item.DesiredSize(); + + if (isVertical) { + finalSize.Height += itemSize.Height; + if (firstItem) { + firstItem = false; + } else { + finalSize.Height += spacing; + } + + if (anyStretch) { + continue; + } + + if (!std::isinf(availableSize.Width)) { + FrameworkElement elem = item.try_as(); + if (elem && elem.HorizontalAlignment() == HorizontalAlignment::Stretch) { + anyStretch = true; + finalSize.Width = availableSize.Width; + continue; + } + } + + if (itemSize.Height > 0) { + finalSize.Width = std::max(finalSize.Width, itemSize.Width + paddings.Width); + } + } else { + finalSize.Width += itemSize.Width; + if (firstItem) { + firstItem = false; + } else { + finalSize.Width += spacing; + } + + if (anyStretch) { + continue; + } + + if (!std::isinf(availableSize.Height)) { + FrameworkElement elem = item.try_as(); + if (elem && elem.VerticalAlignment() == VerticalAlignment::Stretch) { + anyStretch = true; + finalSize.Height = availableSize.Height; + continue; + } + } + + if (itemSize.Width > 0) { + finalSize.Height = std::max(finalSize.Height, itemSize.Height + paddings.Height); + } + } + } + + return finalSize; +} + +Size SimpleStackPanel::ArrangeOverride(Size finalSize) const { + const bool isVertical = Orientation() == Orientation::Vertical; + const Thickness padding = Padding(); + const float spacing = (float)Spacing(); + + Point position{ (float)padding.Left, (float)padding.Top }; + + for (UIElement const& item : Children()) { + if (item.Visibility() == Visibility::Collapsed) { + // 不可见的子项不添加间距 + continue; + } + + const Size itemSize = item.DesiredSize(); + Rect itemRect{ position.X, position.Y, itemSize.Width, itemSize.Height }; + + if (isVertical) { + auto alignment = HorizontalAlignment::Left; + if (FrameworkElement elem = item.try_as()) { + alignment = elem.HorizontalAlignment(); + } + + switch (alignment) { + case HorizontalAlignment::Center: + itemRect.X = position.X + (finalSize.Width - position.X - (float)padding.Right - itemRect.Width) / 2; + break; + case HorizontalAlignment::Right: + itemRect.X = finalSize.Width - (float)padding.Right - itemRect.Width; + break; + case HorizontalAlignment::Stretch: + itemRect.Width = finalSize.Width - position.X - (float)padding.Right; + break; + } + item.Arrange(itemRect); + + if (itemSize.Height > 0) { + position.Y += itemSize.Height + spacing; + } + } else { + auto alignment = VerticalAlignment::Top; + if (FrameworkElement elem = item.try_as()) { + alignment = elem.VerticalAlignment(); + } + + switch (alignment) { + case VerticalAlignment::Center: + itemRect.Y = position.Y + (finalSize.Height - position.Y - (float)padding.Bottom - itemRect.Height) / 2; + break; + case VerticalAlignment::Bottom: + itemRect.Y = finalSize.Height - (float)padding.Bottom - itemRect.Height; + break; + case VerticalAlignment::Stretch: + itemRect.Height = finalSize.Height - position.Y - (float)padding.Bottom; + break; + } + item.Arrange(itemRect); + + if (itemSize.Width > 0) { + position.X += itemSize.Width + spacing; + } + } + } + + return finalSize; +} + +void SimpleStackPanel::_OnLayoutPropertyChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&) { + SimpleStackPanel* that = get_self(sender.as()); + that->InvalidateMeasure(); + that->InvalidateArrange(); +} + +} diff --git a/src/Magpie.App/SimpleStackPanel.h b/src/Magpie.App/SimpleStackPanel.h new file mode 100644 index 000000000..72e5fd8b7 --- /dev/null +++ b/src/Magpie.App/SimpleStackPanel.h @@ -0,0 +1,39 @@ +#pragma once +#include "SimpleStackPanel.g.h" + +namespace winrt::Magpie::App::implementation { + +struct SimpleStackPanel : SimpleStackPanelT { + static DependencyProperty OrientationProperty() { return _orientationProperty; } + static DependencyProperty PaddingProperty() { return _paddingProperty; } + static DependencyProperty SpacingProperty() { return _spacingProperty; } + + Controls::Orientation Orientation() const { return GetValue(_orientationProperty).as(); } + void Orientation(Controls::Orientation value) const { SetValue(_orientationProperty, box_value(value)); } + + Thickness Padding() const { return GetValue(_paddingProperty).as(); } + void Padding(const Thickness& value) const { SetValue(_paddingProperty, box_value(value)); } + + double Spacing() const { return GetValue(_spacingProperty).as(); } + void Spacing(double value) const { SetValue(_spacingProperty, box_value(value)); } + + Size MeasureOverride(const Size& availableSize) const; + + Size ArrangeOverride(Size finalSize) const; + +private: + static const DependencyProperty _orientationProperty; + static const DependencyProperty _paddingProperty; + static const DependencyProperty _spacingProperty; + + static void _OnLayoutPropertyChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&); +}; + +} + +namespace winrt::Magpie::App::factory_implementation { + +struct SimpleStackPanel : SimpleStackPanelT { +}; + +} diff --git a/src/Magpie.App/SimpleStackPanel.idl b/src/Magpie.App/SimpleStackPanel.idl new file mode 100644 index 000000000..3c63be6bf --- /dev/null +++ b/src/Magpie.App/SimpleStackPanel.idl @@ -0,0 +1,13 @@ +namespace Magpie.App { + runtimeclass SimpleStackPanel : Windows.UI.Xaml.Controls.Panel { + SimpleStackPanel(); + + static Windows.UI.Xaml.DependencyProperty OrientationProperty { get; }; + static Windows.UI.Xaml.DependencyProperty PaddingProperty { get; }; + static Windows.UI.Xaml.DependencyProperty SpacingProperty { get; }; + + Windows.UI.Xaml.Controls.Orientation Orientation; + Windows.UI.Xaml.Thickness Padding; + Double Spacing; + } +} diff --git a/src/Magpie.App/TextBlockHelper.cpp b/src/Magpie.App/TextBlockHelper.cpp index 3e95f4421..24e290690 100644 --- a/src/Magpie.App/TextBlockHelper.cpp +++ b/src/Magpie.App/TextBlockHelper.cpp @@ -15,7 +15,7 @@ namespace winrt::Magpie::App::implementation { DependencyProperty TextBlockHelper::_isAutoTooltipProperty = DependencyProperty::RegisterAttached( L"IsAutoTooltip", xaml_typename(), - xaml_typename(), + xaml_typename(), PropertyMetadata(box_value(false), _OnIsAutoTooltipChanged) ); diff --git a/src/Magpie.App/TitlebarControl.xaml b/src/Magpie.App/TitlebarControl.xaml index 88da5eed2..d433b71ed 100644 --- a/src/Magpie.App/TitlebarControl.xaml +++ b/src/Magpie.App/TitlebarControl.xaml @@ -10,12 +10,12 @@ VerticalAlignment="Top" mc:Ignorable="d"> - - + + - + - + value.GetObj(); - auto x64Node = binaryObj.FindMember("x64"); - if (x64Node == binaryObj.end()) { - Logger::Get().Error("找不到 x64 成员"); - _Status(UpdateStatus::ErrorWhileChecking); + const char* platform = +#ifdef _M_X64 + "x64"; +#elif defined(_M_ARM64) + "ARM64"; +#else + static_assert(false, "不支持的架构") +#endif + auto platformNode = binaryObj.FindMember(platform); + if (platformNode == binaryObj.end()) { + Logger::Get().Error(StrUtils::Concat("找不到 ", platform, "成员")); + // 还不支持此架构 + _Status(UpdateStatus::NoUpdate); co_return; } - if (!x64Node->value.IsObject()) { - Logger::Get().Error("x64 成员不是对象"); + if (!platformNode->value.IsObject()) { + Logger::Get().Error(StrUtils::Concat(platform, " 成员不是对象")); _Status(UpdateStatus::ErrorWhileChecking); co_return; } - auto x64Obj = x64Node->value.GetObj(); + auto platformObj = platformNode->value.GetObj(); - auto urlNode = x64Obj.FindMember("url"); - if (urlNode == x64Obj.end()) { + auto urlNode = platformObj.FindMember("url"); + if (urlNode == platformObj.end()) { Logger::Get().Error("找不到 url 成员"); _Status(UpdateStatus::ErrorWhileChecking); co_return; @@ -176,8 +186,8 @@ fire_and_forget UpdateService::CheckForUpdatesAsync(bool isAutoUpdate) { co_return; } - auto hashNode = x64Obj.FindMember("hash"); - if (hashNode == x64Obj.end()) { + auto hashNode = platformObj.FindMember("hash"); + if (hashNode == platformObj.end()) { Logger::Get().Error("找不到 hash 成员"); _Status(UpdateStatus::ErrorWhileChecking); co_return; diff --git a/src/Magpie.App/WrapPanel.cpp b/src/Magpie.App/WrapPanel.cpp index 276bad899..65840b6c4 100644 --- a/src/Magpie.App/WrapPanel.cpp +++ b/src/Magpie.App/WrapPanel.cpp @@ -1,3 +1,5 @@ +// 移植自 https://github.com/CommunityToolkit/WindowsCommunityToolkit/tree/77b009ddf591b78dfc5bad0088c99ce35406170b/Microsoft.Toolkit.Uwp.UI.Controls.Primitives/WrapPanel + #include "pch.h" #include "WrapPanel.h" #if __has_include("WrapPanel.g.cpp") @@ -9,49 +11,49 @@ using namespace Windows::UI::Xaml::Controls; namespace winrt::Magpie::App::implementation { -const DependencyProperty WrapPanel::HorizontalSpacingProperty = DependencyProperty::Register( +const DependencyProperty WrapPanel::_horizontalSpacingProperty = DependencyProperty::Register( L"HorizontalSpacing", xaml_typename(), - xaml_typename(), + xaml_typename(), PropertyMetadata(box_value(0.0), &WrapPanel::_OnLayoutPropertyChanged) ); -const DependencyProperty WrapPanel::VerticalSpacingProperty = DependencyProperty::Register( +const DependencyProperty WrapPanel::_verticalSpacingProperty = DependencyProperty::Register( L"VerticalSpacing", xaml_typename(), - xaml_typename(), + xaml_typename(), PropertyMetadata(box_value(0.0), &WrapPanel::_OnLayoutPropertyChanged) ); -const DependencyProperty WrapPanel::OrientationProperty = DependencyProperty::Register( +const DependencyProperty WrapPanel::_orientationProperty = DependencyProperty::Register( L"Orientation", xaml_typename(), - xaml_typename(), + xaml_typename(), PropertyMetadata(box_value(Orientation::Horizontal), &WrapPanel::_OnLayoutPropertyChanged) ); -const DependencyProperty WrapPanel::PaddingProperty = DependencyProperty::Register( +const DependencyProperty WrapPanel::_paddingProperty = DependencyProperty::Register( L"Padding", xaml_typename(), - xaml_typename(), + xaml_typename(), PropertyMetadata(box_value(Thickness{}), &WrapPanel::_OnLayoutPropertyChanged) ); -const DependencyProperty WrapPanel::StretchChildProperty = DependencyProperty::Register( +const DependencyProperty WrapPanel::_stretchChildProperty = DependencyProperty::Register( L"StretchChild", xaml_typename(), - xaml_typename(), + xaml_typename(), PropertyMetadata(box_value(StretchChild::None), &WrapPanel::_OnLayoutPropertyChanged) ); Size WrapPanel::MeasureOverride(const Size& availableSize) { - Thickness padding = Padding(); - Size childAvailableSize{ + const Thickness padding = Padding(); + const Size childAvailableSize{ availableSize.Width - (float)padding.Left - (float)padding.Right, availableSize.Height - (float)padding.Top - (float)padding.Bottom }; - for (const UIElement& child : Children()) { - child.Measure(childAvailableSize); + for (const UIElement& item : Children()) { + item.Measure(childAvailableSize); } return _UpdateRows(availableSize); @@ -63,7 +65,7 @@ Size WrapPanel::ArrangeOverride(Size finalSize) { if ((orientation == Orientation::Horizontal && finalSize.Width < desiredSize.Width) || (orientation == Orientation::Vertical && finalSize.Height < desiredSize.Height)) { - // We haven't received our desired size. We need to refresh the rows. + // 没收到 DesiredSize,重新计算布局 _UpdateRows(finalSize); } @@ -73,14 +75,13 @@ Size WrapPanel::ArrangeOverride(Size finalSize) { UIElementCollection children = Children(); - // Now that we have all the data, we do the actual arrange pass + // 更新布局 uint32_t childIndex = 0; for (const Row& row : _rows) { for (const UvRect& rect : row.childrenRects) { UIElement child = children.GetAt(childIndex++); while (child.Visibility() == Visibility::Collapsed) { - // Collapsed children are not added into the rows, - // we skip them. + // _rows 不包含不可见的子项 child = children.GetAt(childIndex++); } @@ -88,9 +89,7 @@ Size WrapPanel::ArrangeOverride(Size finalSize) { rect.position, UvMeasure(rect.size.u, row.size.v), }; - - Rect finalRect = arrangeRect.ToRect(orientation); - child.Arrange(finalRect); + child.Arrange(arrangeRect.ToRect(orientation)); } } @@ -98,7 +97,7 @@ Size WrapPanel::ArrangeOverride(Size finalSize) { } void WrapPanel::_OnLayoutPropertyChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&) { - WrapPanel* that = get_self(sender.as>()); + WrapPanel* that = get_self(sender.as()); that->InvalidateMeasure(); that->InvalidateArrange(); } @@ -106,72 +105,70 @@ void WrapPanel::_OnLayoutPropertyChanged(DependencyObject const& sender, Depende Size WrapPanel::_UpdateRows(Size availableSize) { _rows.clear(); - Controls::Orientation orientation = Orientation(); - Thickness padding = Padding(); + const Controls::Orientation orientation = Orientation(); + const Thickness padding = Padding(); UIElementCollection children = Children(); UvMeasure paddingStart(orientation, (float)padding.Left, (float)padding.Top); UvMeasure paddingEnd(orientation, (float)padding.Right, (float)padding.Bottom); - if (children.Size() == 0) { - paddingStart.Add(paddingEnd); - return paddingStart.ToSize(orientation); - } + if (children.Size() > 0) { + const UvMeasure parentMeasure(orientation, availableSize.Width, availableSize.Height); + const UvMeasure spacingMeasure(orientation, (float)HorizontalSpacing(), (float)VerticalSpacing()); + UvMeasure position(orientation, (float)padding.Left, (float)padding.Top); - UvMeasure parentMeasure(orientation, availableSize.Width, availableSize.Height); - UvMeasure spacingMeasure(orientation, (float)HorizontalSpacing(), (float)VerticalSpacing()); - UvMeasure position(orientation, (float)padding.Left, (float)padding.Top); + Row currentRow; + UvMeasure finalMeasure(orientation, 0.0f, 0.0f); - Row currentRow; - UvMeasure finalMeasure(orientation, 0.0f, 0.0f); + const uint32_t count = children.Size(); + for (uint32_t i = 0; i < count; ++i) { + UIElement const& item = children.GetAt(i); - auto arrange = [&](UIElement const& child, bool isLast = false) { - if (child.Visibility() == Visibility::Collapsed) { - return; // if an item is collapsed, avoid adding the spacing - } + if (item.Visibility() == Visibility::Collapsed) { + // 不可见的子项不添加间距 + continue; + } - UvMeasure desiredMeasure(orientation, child.DesiredSize()); - if ((desiredMeasure.u + position.u + paddingEnd.u) > parentMeasure.u) { - // next row! - position.u = paddingStart.u; - position.v += currentRow.size.v + spacingMeasure.v; + UvMeasure desiredMeasure(orientation, item.DesiredSize()); + if (desiredMeasure.u + position.u + paddingEnd.u > parentMeasure.u) { + finalMeasure.u = std::max(finalMeasure.u, position.u - spacingMeasure.u); - _rows.push_back(std::move(currentRow)); - currentRow = {}; - } + // 下一行 + position.u = paddingStart.u; + position.v += currentRow.size.v + spacingMeasure.v; - // Stretch the last item to fill the available space - if (isLast) { - desiredMeasure.u = parentMeasure.u - position.u; - } + _rows.push_back(std::move(currentRow)); + currentRow = {}; + } - currentRow.Add(position, desiredMeasure); + if (i == count - 1 && StretchChild() == StretchChild::Last && !std::isinf(parentMeasure.u)) { + // 让最后一个子项填满剩余空间,剩余空间必须有限才有意义 + desiredMeasure.u = parentMeasure.u - position.u; + } - // adjust the location for the next items - position.u += desiredMeasure.u + spacingMeasure.u; - finalMeasure.u = std::max(finalMeasure.u, position.u); - }; + currentRow.Add(position, desiredMeasure); - uint32_t lastIndex = children.Size() - 1; - for (uint32_t i = 0; i < lastIndex; i++) { - arrange(children.GetAt(i)); - } + // 下一个子项的位置 + position.u += desiredMeasure.u + spacingMeasure.u; + } - arrange(children.GetAt(lastIndex), StretchChild() == StretchChild::Last); - if (!currentRow.childrenRects.empty()) { - _rows.push_back(std::move(currentRow)); - } + // 添加最后一行 + if (!currentRow.childrenRects.empty()) { + finalMeasure.u = std::max(finalMeasure.u, position.u - spacingMeasure.u); + _rows.push_back(std::move(currentRow)); + } - if (_rows.empty()) { - paddingStart.Add(paddingEnd); - return paddingStart.ToSize(orientation); - } + if (!_rows.empty()) { + // 计算 finalMeasure 的 v 分量 + UvRect lastRowRect = _rows.back().Rect(); + finalMeasure.v = lastRowRect.position.v + lastRowRect.size.v; - // Get max V here before computing final rect - UvRect lastRowRect = _rows.back().Rect(); - finalMeasure.v = lastRowRect.position.v + lastRowRect.size.v; - finalMeasure.Add(paddingEnd); - return finalMeasure.ToSize(orientation); + finalMeasure.Add(paddingEnd); + return finalMeasure.ToSize(orientation); + } + } + + return UvMeasure(paddingStart.u + paddingEnd.u, paddingStart.v + paddingEnd.v).ToSize(orientation); } } diff --git a/src/Magpie.App/WrapPanel.h b/src/Magpie.App/WrapPanel.h index f74caf886..9e7e3edcb 100644 --- a/src/Magpie.App/WrapPanel.h +++ b/src/Magpie.App/WrapPanel.h @@ -4,8 +4,6 @@ namespace winrt::Magpie::App::implementation { -// 移植自 https://github.com/CommunityToolkit/WindowsCommunityToolkit/tree/77b009ddf591b78dfc5bad0088c99ce35406170b/Microsoft.Toolkit.Uwp.UI.Controls.Primitives/WrapPanel - struct UvMeasure { UvMeasure() : u(0), v(0) {} @@ -29,7 +27,7 @@ struct UvMeasure { v += measure.v; } - Size ToSize(Controls::Orientation orientation) noexcept { + Size ToSize(Controls::Orientation orientation) const noexcept { return orientation == Controls::Orientation::Horizontal ? Size(u, v) : Size(v, u); } @@ -59,69 +57,55 @@ struct Row { } } - void Add(const UvMeasure& position, const UvMeasure& size_) { + void Add(const UvMeasure& position, const UvMeasure& size_) noexcept { childrenRects.emplace_back(position, size_); size.u = position.u + size_.u; size.v = std::max(size.v, size_.v); } + void Clear() noexcept { + childrenRects.clear(); + size = {}; + } + SmallVector childrenRects; UvMeasure size; }; struct WrapPanel : WrapPanelT { - double HorizontalSpacing() const { - return GetValue(HorizontalSpacingProperty).as(); - } - - void HorizontalSpacing(double value) { - SetValue(HorizontalSpacingProperty, box_value(value)); - } - - double VerticalSpacing() const { - return GetValue(VerticalSpacingProperty).as(); - } - - void VerticalSpacing(double value) { - SetValue(VerticalSpacingProperty, box_value(value)); - } + static DependencyProperty HorizontalSpacingProperty() { return _horizontalSpacingProperty; } + static DependencyProperty VerticalSpacingProperty() { return _verticalSpacingProperty; } + static DependencyProperty OrientationProperty() { return _orientationProperty; } + static DependencyProperty PaddingProperty() { return _paddingProperty; } + static DependencyProperty StretchChildProperty() { return _stretchChildProperty; } - Controls::Orientation Orientation() const { - return GetValue(OrientationProperty).as(); - } + double HorizontalSpacing() const { return GetValue(_horizontalSpacingProperty).as(); } + void HorizontalSpacing(double value) const { SetValue(_horizontalSpacingProperty, box_value(value)); } - void Orientation(Controls::Orientation value) { - SetValue(OrientationProperty, box_value(value)); - } + double VerticalSpacing() const { return GetValue(_verticalSpacingProperty).as(); } + void VerticalSpacing(double value) const { SetValue(_verticalSpacingProperty, box_value(value)); } - Thickness Padding() const { - return GetValue(PaddingProperty).as(); - } + Controls::Orientation Orientation() const { return GetValue(_orientationProperty).as(); } + void Orientation(Controls::Orientation value) const { SetValue(_orientationProperty, box_value(value)); } - void Padding(const Thickness& value) { - SetValue(PaddingProperty, box_value(value)); - } + Thickness Padding() const { return GetValue(_paddingProperty).as(); } + void Padding(const Thickness& value) const { SetValue(_paddingProperty, box_value(value)); } - StretchChild StretchChild() const { - return GetValue(StretchChildProperty).as(); - } - - void StretchChild(Magpie::App::StretchChild value) { - SetValue(StretchChildProperty, box_value(value)); - } + StretchChild StretchChild() const { return GetValue(_stretchChildProperty).as(); } + void StretchChild(Magpie::App::StretchChild value) const { SetValue(_stretchChildProperty, box_value(value)); } Size MeasureOverride(const Size& availableSize); Size ArrangeOverride(Size finalSize); - - static const DependencyProperty HorizontalSpacingProperty; - static const DependencyProperty VerticalSpacingProperty; - static const DependencyProperty OrientationProperty; - static const DependencyProperty PaddingProperty; - static const DependencyProperty StretchChildProperty; private: + static const DependencyProperty _horizontalSpacingProperty; + static const DependencyProperty _verticalSpacingProperty; + static const DependencyProperty _orientationProperty; + static const DependencyProperty _paddingProperty; + static const DependencyProperty _stretchChildProperty; + static void _OnLayoutPropertyChanged(DependencyObject const& sender, DependencyPropertyChangedEventArgs const&); Size _UpdateRows(Size availableSize); diff --git a/src/Magpie.App/WrapPanel.idl b/src/Magpie.App/WrapPanel.idl index c34ba77bd..1ecc3a28f 100644 --- a/src/Magpie.App/WrapPanel.idl +++ b/src/Magpie.App/WrapPanel.idl @@ -1,16 +1,22 @@ namespace Magpie.App { - enum StretchChild { - None, - Last - }; + enum StretchChild { + None, + Last + }; - runtimeclass WrapPanel : Windows.UI.Xaml.Controls.Panel { - WrapPanel(); + runtimeclass WrapPanel : Windows.UI.Xaml.Controls.Panel { + WrapPanel(); - Double HorizontalSpacing; - Double VerticalSpacing; - Windows.UI.Xaml.Controls.Orientation Orientation; - Windows.UI.Xaml.Thickness Padding; - StretchChild StretchChild; - } + static Windows.UI.Xaml.DependencyProperty HorizontalSpacingProperty { get; }; + static Windows.UI.Xaml.DependencyProperty VerticalSpacingProperty { get; }; + static Windows.UI.Xaml.DependencyProperty OrientationProperty { get; }; + static Windows.UI.Xaml.DependencyProperty PaddingProperty { get; }; + static Windows.UI.Xaml.DependencyProperty StretchChildProperty { get; }; + + Double HorizontalSpacing; + Double VerticalSpacing; + Windows.UI.Xaml.Controls.Orientation Orientation; + Windows.UI.Xaml.Thickness Padding; + StretchChild StretchChild; + } } diff --git a/src/Magpie.App/app.base.h b/src/Magpie.App/app.base.h deleted file mode 100644 index ab48f8452..000000000 --- a/src/Magpie.App/app.base.h +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once - -namespace winrt::Magpie::App::implementation { - -template -struct App_baseWithProvider : public App_base { - using IXamlType = Windows::UI::Xaml::Markup::IXamlType; - - IXamlType GetXamlType(Windows::UI::Xaml::Interop::TypeName const& type) { - return AppProvider()->GetXamlType(type); - } - - IXamlType GetXamlType(hstring const& fullName) { - return AppProvider()->GetXamlType(fullName); - } - - com_array GetXmlnsDefinitions() { - return AppProvider()->GetXmlnsDefinitions(); - } - -private: - bool _contentLoaded{ false }; - com_ptr _appProvider; - com_ptr AppProvider() { - if (!_appProvider) { - _appProvider = make_self(); - } - return _appProvider; - } -}; - -template -using AppT2 = App_baseWithProvider; - -} diff --git a/src/Magpie.App/conanfile.txt b/src/Magpie.App/conanfile.txt index f0e8967ad..b59cb4412 100644 --- a/src/Magpie.App/conanfile.txt +++ b/src/Magpie.App/conanfile.txt @@ -6,7 +6,7 @@ rapidjson/cci.20230929 kuba-zip/0.3.1 muparser/2.3.4 yas/7.1.0 -imgui/1.90 +imgui/1.90.4 [generators] MSBuildDeps diff --git a/src/Magpie.App/dummy.exe b/src/Magpie.App/dummy.exe deleted file mode 100644 index 9c251a1a7..000000000 --- a/src/Magpie.App/dummy.exe +++ /dev/null @@ -1 +0,0 @@ -Visual Studio 需要此占位可执行文件才能正确编译项目 diff --git a/src/Magpie.App/make_resources_pri.py b/src/Magpie.App/make_resources_pri.py new file mode 100644 index 000000000..38177a825 --- /dev/null +++ b/src/Magpie.App/make_resources_pri.py @@ -0,0 +1,48 @@ +import sys +import os +import glob +import subprocess + +if len(sys.argv) != 3: + raise Exception("请勿直接运行此脚本") + +windowsSdkDir = max( + glob.glob(os.environ["ProgramFiles(x86)"] + "\\Windows Kits\\10\\bin\\10.*") +) +makepriPath = windowsSdkDir + "\\x64\\makepri.exe" +if not os.access(makepriPath, os.X_OK): + raise Exception("未找到 makepri") + +os.chdir(sys.argv[1]) + +with open("priconfig.xml", "w") as priConfig: + priConfig.write( + '\n' + ) + for priPath in sys.argv[2].split(";"): + priConfig.write( + f""" + + + + + + + + + + + + + + + + """ + ) + priConfig.write("\n") + +subprocess.run( + f'"{makepriPath}" New /pr . /cf priconfig.xml /of resources.pri /in Magpie.App /o', + capture_output=True, +) +os.remove("priconfig.xml") diff --git a/src/Magpie.App/packages.config b/src/Magpie.App/packages.config index b7b699331..dfebda821 100644 --- a/src/Magpie.App/packages.config +++ b/src/Magpie.App/packages.config @@ -1,6 +1,6 @@  - + - \ No newline at end of file + diff --git a/src/Magpie.App/pch.h b/src/Magpie.App/pch.h index 18f6a0ef9..e6ce05738 100644 --- a/src/Magpie.App/pch.h +++ b/src/Magpie.App/pch.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -37,7 +38,6 @@ namespace winrt { using namespace Windows::UI::Core; using namespace Windows::UI::Xaml::Data; -using namespace Windows::Web::Http; namespace MUXC = Microsoft::UI::Xaml::Controls; } diff --git a/src/Magpie.Core/BackendDescriptorStore.cpp b/src/Magpie.Core/BackendDescriptorStore.cpp new file mode 100644 index 000000000..e0cdea689 --- /dev/null +++ b/src/Magpie.Core/BackendDescriptorStore.cpp @@ -0,0 +1,66 @@ +#include "pch.h" +#include "BackendDescriptorStore.h" +#include "Logger.h" + +namespace Magpie::Core { + +ID3D11ShaderResourceView* BackendDescriptorStore::GetShaderResourceView(ID3D11Texture2D* texture) noexcept { + if (auto it = _srvMap.find(texture); it != _srvMap.end()) { + return it->second.get(); + } + + winrt::com_ptr srv; + HRESULT hr = _d3dDevice->CreateShaderResourceView(texture, nullptr, srv.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateShaderResourceView 失败", hr); + return nullptr; + } + + return _srvMap.emplace(texture, std::move(srv)).first->second.get(); +} + +ID3D11UnorderedAccessView* BackendDescriptorStore::GetUnorderedAccessView(ID3D11Texture2D* texture) noexcept { + if (auto it = _uavMap.find(texture); it != _uavMap.end()) { + return it->second.get(); + } + + winrt::com_ptr uav; + + D3D11_UNORDERED_ACCESS_VIEW_DESC desc{ + .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D + }; + + HRESULT hr = _d3dDevice->CreateUnorderedAccessView(texture, &desc, uav.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateUnorderedAccessView 失败", hr); + return nullptr; + } + + return _uavMap.emplace(texture, std::move(uav)).first->second.get(); +} + +ID3D11UnorderedAccessView* BackendDescriptorStore::GetUnorderedAccessView(ID3D11Buffer* buffer, uint32_t numElements, DXGI_FORMAT format) noexcept { + if (auto it = _uavMap.find(buffer); it != _uavMap.end()) { + return it->second.get(); + } + + winrt::com_ptr uav; + + D3D11_UNORDERED_ACCESS_VIEW_DESC desc{ + .Format = format, + .ViewDimension = D3D11_UAV_DIMENSION_BUFFER, + .Buffer{ + .NumElements = numElements + } + }; + + HRESULT hr = _d3dDevice->CreateUnorderedAccessView(buffer, &desc, uav.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateUnorderedAccessView 失败", hr); + return nullptr; + } + + return _uavMap.emplace(buffer, std::move(uav)).first->second.get(); +} + +} diff --git a/src/Magpie.Core/BackendDescriptorStore.h b/src/Magpie.Core/BackendDescriptorStore.h new file mode 100644 index 000000000..cf2263da2 --- /dev/null +++ b/src/Magpie.Core/BackendDescriptorStore.h @@ -0,0 +1,33 @@ +#pragma once +#include + +namespace Magpie::Core { + +class BackendDescriptorStore { +public: + BackendDescriptorStore() = default; + BackendDescriptorStore(const BackendDescriptorStore&) = delete; + BackendDescriptorStore(BackendDescriptorStore&&) = default; + + void Initialize(ID3D11Device5* d3dDevice) noexcept { + _d3dDevice = d3dDevice; + } + + ID3D11ShaderResourceView* GetShaderResourceView(ID3D11Texture2D* texture) noexcept; + + ID3D11UnorderedAccessView* GetUnorderedAccessView(ID3D11Texture2D* texture) noexcept; + + ID3D11UnorderedAccessView* GetUnorderedAccessView( + ID3D11Buffer* buffer, + uint32_t numElements, + DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN + ) noexcept; + +private: + ID3D11Device5* _d3dDevice = nullptr; + + phmap::flat_hash_map> _srvMap; + phmap::flat_hash_map> _uavMap; +}; + +} diff --git a/src/Magpie.Core/CursorDrawer.cpp b/src/Magpie.Core/CursorDrawer.cpp new file mode 100644 index 000000000..95b2bfab8 --- /dev/null +++ b/src/Magpie.Core/CursorDrawer.cpp @@ -0,0 +1,562 @@ +#include "pch.h" +#include "CursorDrawer.h" +#include "DeviceResources.h" +#include "Logger.h" +#include "Utils.h" +#include "DirectXHelper.h" +#include "ScalingOptions.h" +#include "shaders/SimpleVS.h" +#include "shaders/SimplePS.h" +#include "shaders/MaskedCursorPS.h" +#include "shaders/MonochromeCursorPS.h" +#include +#include "Win32Utils.h" +#include "ScalingWindow.h" +#include "Renderer.h" +#include "CursorManager.h" +#include "StrUtils.h" + +using namespace DirectX; + +namespace Magpie::Core { + +struct VertexPositionTexture { + VertexPositionTexture() = default; + + VertexPositionTexture(const VertexPositionTexture&) = default; + VertexPositionTexture& operator=(const VertexPositionTexture&) = default; + + VertexPositionTexture(VertexPositionTexture&&) = default; + VertexPositionTexture& operator=(VertexPositionTexture&&) = default; + + VertexPositionTexture(XMFLOAT2 const& iposition, XMFLOAT2 const& itextureCoordinate) noexcept + : position(iposition), textureCoordinate(itextureCoordinate) { + } + + VertexPositionTexture(FXMVECTOR iposition, FXMVECTOR itextureCoordinate) noexcept { + XMStoreFloat2(&this->position, iposition); + XMStoreFloat2(&this->textureCoordinate, itextureCoordinate); + } + + XMFLOAT2 position; + XMFLOAT2 textureCoordinate; + + static constexpr D3D11_INPUT_ELEMENT_DESC InputElements[] = + { + { "SV_POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + }; +}; + +bool CursorDrawer::Initialize(DeviceResources& deviceResources, ID3D11Texture2D* backBuffer) noexcept { + _deviceResources = &deviceResources; + _backBuffer = backBuffer; + + const RECT& scalingWndRect = ScalingWindow::Get().WndRect(); + const RECT& destRect = ScalingWindow::Get().Renderer().DestRect(); + + _viewportRect = { + destRect.left - scalingWndRect.left, + destRect.top - scalingWndRect.top, + destRect.right - scalingWndRect.left, + destRect.bottom - scalingWndRect.top + }; + + ID3D11Device* d3dDevice = deviceResources.GetD3DDevice(); + + HRESULT hr = d3dDevice->CreateVertexShader( + SimpleVS, std::size(SimpleVS), nullptr, _simpleVS.put()); + if (FAILED(hr)) { + Logger::Get().ComError("创建顶点着色器失败", hr); + return false; + } + + hr = d3dDevice->CreateInputLayout( + VertexPositionTexture::InputElements, + (UINT)std::size(VertexPositionTexture::InputElements), + SimpleVS, + std::size(SimpleVS), + _simpleIL.put() + ); + if (FAILED(hr)) { + Logger::Get().ComError("创建输入布局失败", hr); + return false; + } + + D3D11_BUFFER_DESC bd{}; + bd.Usage = D3D11_USAGE_DYNAMIC; + bd.ByteWidth = sizeof(VertexPositionTexture) * 4; + bd.BindFlags = D3D11_BIND_VERTEX_BUFFER; + bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + + hr = d3dDevice->CreateBuffer(&bd, nullptr, _vtxBuffer.put()); + if (FAILED(hr)) { + Logger::Get().ComError("创建顶点缓冲区失败", hr); + return false; + } + + return true; +} + +void CursorDrawer::Draw() noexcept { + if (!_isCursorVisible) { + // 截屏时暂时不渲染光标 + return; + } + + const CursorManager& cursorManager = ScalingWindow::Get().CursorManager(); + const HCURSOR hCursor = cursorManager.Cursor(); + + if (!hCursor) { + return; + } + + const _CursorInfo* ci = _ResolveCursor(hCursor); + if (!ci) { + return; + } + + const POINT cursorPos = cursorManager.CursorPos(); + + const ScalingOptions& options = ScalingWindow::Get().Options(); + float cursorScaling = options.cursorScaling; + if (cursorScaling < 1e-5) { + // 光标缩放和源窗口相同 + const Renderer& renderer = ScalingWindow::Get().Renderer(); + const SIZE srcSize = Win32Utils::GetSizeOfRect(renderer.SrcRect()); + const SIZE destSize = Win32Utils::GetSizeOfRect(renderer.DestRect()); + cursorScaling = (((float)destSize.cx / srcSize.cx) + ((float)destSize.cy / srcSize.cy)) / 2; + } + + const SIZE cursorSize{ + lroundf(ci->size.cx * cursorScaling), + lroundf(ci->size.cy * cursorScaling) + }; + RECT cursorRect{ + .left = lroundf(cursorPos.x - ci->hotSpot.x * cursorScaling), + .top = lroundf(cursorPos.y - ci->hotSpot.y * cursorScaling), + .right = cursorRect.left + cursorSize.cx, + .bottom = cursorRect.top + cursorSize.cy + }; + + if (cursorRect.left >= _viewportRect.right || + cursorRect.top >= _viewportRect.bottom || + cursorRect.right <= _viewportRect.left || + cursorRect.bottom <= _viewportRect.top + ) { + // 光标在窗口外,不应发生这种情况 + return; + } + + const SIZE viewportSize = Win32Utils::GetSizeOfRect(_viewportRect); + float left = (cursorRect.left - _viewportRect.left) / (float)viewportSize.cx * 2 - 1.0f; + float top = 1.0f - (cursorRect.top - _viewportRect.top) / (float)viewportSize.cy * 2; + float right = left + cursorSize.cx / (float)viewportSize.cx * 2; + float bottom = top - cursorSize.cy / (float)viewportSize.cy * 2; + + ID3D11DeviceContext* d3dDC = _deviceResources->GetD3DDC(); + d3dDC->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + d3dDC->IASetInputLayout(_simpleIL.get()); + d3dDC->VSSetShader(_simpleVS.get(), nullptr, 0); + + // 配置顶点缓冲区 + { + const VertexPositionTexture data[] = { + { XMFLOAT2(left, top), XMFLOAT2(0.0f, 0.0f) }, + { XMFLOAT2(right, top), XMFLOAT2(1.0f, 0.0f) }, + { XMFLOAT2(left, bottom), XMFLOAT2(0.0f, 1.0f) }, + { XMFLOAT2(right, bottom), XMFLOAT2(1.0f, 1.0f) } + }; + + D3D11_MAPPED_SUBRESOURCE ms; + HRESULT hr = d3dDC->Map(_vtxBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &ms); + if (FAILED(hr)) { + Logger::Get().ComError("Map 失败", hr); + return; + } + + std::memcpy(ms.pData, data, sizeof(data)); + d3dDC->Unmap(_vtxBuffer.get(), 0); + + ID3D11Buffer* vtxBuffer = _vtxBuffer.get(); + UINT stride = sizeof(VertexPositionTexture); + UINT offset = 0; + d3dDC->IASetVertexBuffers(0, 1, &vtxBuffer, &stride, &offset); + } + + // 配置渲染视口 + { + D3D11_VIEWPORT vp{ + (float)_viewportRect.left, + (float)_viewportRect.top, + (float)viewportSize.cx, + (float)viewportSize.cy, + 0.0f, + 1.0f + }; + d3dDC->RSSetViewports(1, &vp); + d3dDC->RSSetState(nullptr); + } + + if (ci->type == _CursorType::Color) { + // 配置像素着色器 + if (!_simplePS) { + HRESULT hr = _deviceResources->GetD3DDevice()->CreatePixelShader( + SimplePS, sizeof(SimplePS), nullptr, _simplePS.put()); + if (FAILED(hr)) { + Logger::Get().ComError("创建像素着色器失败", hr); + return; + } + } + + d3dDC->PSSetShader(_simplePS.get(), nullptr, 0); + d3dDC->PSSetConstantBuffers(0, 0, nullptr); + ID3D11ShaderResourceView* cursorSrv = ci->textureSrv.get(); + d3dDC->PSSetShaderResources(0, 1, &cursorSrv); + + const bool useBilinear = options.cursorInterpolationMode == CursorInterpolationMode::Bilinear && + std::abs(options.cursorScaling - 1.0f) > 1e-3; + ID3D11SamplerState* cursorSampler = _deviceResources->GetSampler( + useBilinear ? D3D11_FILTER_MIN_MAG_MIP_LINEAR : D3D11_FILTER_MIN_MAG_MIP_POINT, + D3D11_TEXTURE_ADDRESS_CLAMP + ); + d3dDC->PSSetSamplers(0, 1, &cursorSampler); + + // 预乘 alpha + _SetPremultipliedAlphaBlend(); + } else { + if (_tempCursorTextureSize != cursorSize) { + _tempCursorTexture = nullptr; + _tempCursorTextureRtv = nullptr; + + ID3D11Device* d3dDevice = _deviceResources->GetD3DDevice(); + + // 创建临时纹理,如果光标尺寸变了则重新创建 + _tempCursorTexture = DirectXHelper::CreateTexture2D( + d3dDevice, + DXGI_FORMAT_R8G8B8A8_UNORM, + cursorSize.cx, + cursorSize.cy, + D3D11_BIND_SHADER_RESOURCE + ); + if (!_tempCursorTexture) { + Logger::Get().Error("创建光标纹理失败"); + return; + } + + HRESULT hr = d3dDevice->CreateShaderResourceView( + _tempCursorTexture.get(), nullptr, _tempCursorTextureRtv.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateShaderResourceView 失败", hr); + _tempCursorTexture = nullptr; + return; + } + + _tempCursorTextureSize = cursorSize; + } + + D3D11_BOX srcBox{ + (UINT)std::max(cursorRect.left, _viewportRect.left), + (UINT)std::max(cursorRect.top, _viewportRect.top), + 0, + (UINT)std::min(cursorRect.right, _viewportRect.right), + (UINT)std::min(cursorRect.bottom, _viewportRect.bottom), + 1 + }; + d3dDC->CopySubresourceRegion( + _tempCursorTexture.get(), + 0, + srcBox.left - cursorRect.left, + srcBox.top - cursorRect.top, + 0, + _backBuffer, + 0, + &srcBox + ); + + if (ci->type == _CursorType::MaskedColor) { + if (!_maskedCursorPS) { + HRESULT hr = _deviceResources->GetD3DDevice()->CreatePixelShader( + MaskedCursorPS, sizeof(MaskedCursorPS), nullptr, _maskedCursorPS.put()); + if (FAILED(hr)) { + Logger::Get().ComError("创建像素着色器失败", hr); + return; + } + } + d3dDC->PSSetShader(_maskedCursorPS.get(), nullptr, 0); + } else { + if (!_monochromeCursorPS) { + HRESULT hr = _deviceResources->GetD3DDevice()->CreatePixelShader( + MonochromeCursorPS, sizeof(MonochromeCursorPS), nullptr, _monochromeCursorPS.put()); + if (FAILED(hr)) { + Logger::Get().ComError("创建像素着色器失败", hr); + return; + } + } + d3dDC->PSSetShader(_monochromeCursorPS.get(), nullptr, 0); + } + + d3dDC->PSSetConstantBuffers(0, 0, nullptr); + + { + ID3D11ShaderResourceView* srvs[2]{ _tempCursorTextureRtv.get(), ci->textureSrv.get() }; + d3dDC->PSSetShaderResources(0, 2, srvs); + } + + { + // 支持双线性插值的单色光标和彩色掩码光标会转换为彩色光标,这里只需要最近邻插值 + ID3D11SamplerState* t = _deviceResources->GetSampler( + D3D11_FILTER_MIN_MAG_MIP_POINT, D3D11_TEXTURE_ADDRESS_CLAMP); + d3dDC->PSSetSamplers(0, 1, &t); + } + } + + d3dDC->Draw(4, 0); +} + +const CursorDrawer::_CursorInfo* CursorDrawer::_ResolveCursor(HCURSOR hCursor) noexcept { + if (auto it = _cursorInfos.find(hCursor); it != _cursorInfos.end()) { + return &it->second; + } + + ICONINFO iconInfo{}; + if (!GetIconInfo(hCursor, &iconInfo)) { + Logger::Get().Win32Error("GetIconInfo 失败"); + return nullptr; + } + + Utils::ScopeExit se([&iconInfo]() { + if (iconInfo.hbmColor) { + DeleteBitmap(iconInfo.hbmColor); + } + DeleteBitmap(iconInfo.hbmMask); + }); + + BITMAP bmp{}; + if (!GetObject(iconInfo.hbmMask, sizeof(bmp), &bmp)) { + Logger::Get().Win32Error("GetObject 失败"); + return nullptr; + } + + // 获取位图数据 + BITMAPINFO bi{ + .bmiHeader{ + .biSize = sizeof(BITMAPINFOHEADER), + .biWidth = bmp.bmWidth, + .biHeight = -bmp.bmHeight, + .biPlanes = 1, + .biBitCount = 32, + .biCompression = BI_RGB, + .biSizeImage = DWORD(bmp.bmWidth * bmp.bmHeight * 4) + } + }; + + std::unique_ptr pixels(std::make_unique(bi.bmiHeader.biSizeImage)); + HDC hdcScreen = GetDC(NULL); + if (GetDIBits(hdcScreen, iconInfo.hbmColor ? iconInfo.hbmColor : iconInfo.hbmMask, + 0, bmp.bmHeight, pixels.get(), &bi, DIB_RGB_COLORS) != bmp.bmHeight + ) { + Logger::Get().Win32Error("GetDIBits 失败"); + ReleaseDC(NULL, hdcScreen); + return nullptr; + } + + _CursorInfo cursorInfo{ + .hotSpot = { (LONG)iconInfo.xHotspot, (LONG)iconInfo.yHotspot }, + // 单色光标的 hbmMask 高度为实际高度的两倍 + .size = { bmp.bmWidth, iconInfo.hbmColor ? bmp.bmHeight : bmp.bmHeight / 2 } + }; + winrt::com_ptr cursorTexture; + + ID3D11Device* d3dDevice = _deviceResources->GetD3DDevice(); + + if (iconInfo.hbmColor) { + // 彩色光标或彩色掩码光标 + + // 若颜色掩码有 A 通道,则是彩色光标,否则是彩色掩码光标 + bool hasAlpha = false; + for (uint32_t i = 3; i < bi.bmiHeader.biSizeImage; i += 4) { + if (pixels[i] != 0) { + hasAlpha = true; + break; + } + } + + if (hasAlpha) { + // 彩色光标 + cursorInfo.type = _CursorType::Color; + + for (uint32_t i = 0; i < bi.bmiHeader.biSizeImage; i += 4) { + // 预乘 Alpha 通道 + double alpha = pixels[size_t(i + 3)] / 255.0f; + + uint8_t b = (uint8_t)std::lround(pixels[i] * alpha); + pixels[i] = (uint8_t)std::lround(pixels[size_t(i + 2)] * alpha); + pixels[size_t(i + 1)] = (uint8_t)std::lround(pixels[size_t(i + 1)] * alpha); + pixels[size_t(i + 2)] = b; + pixels[size_t(i + 3)] = 255 - pixels[size_t(i + 3)]; + } + } else { + // 彩色掩码光标 + std::unique_ptr maskPixels(std::make_unique(bi.bmiHeader.biSizeImage)); + if (GetDIBits(hdcScreen, iconInfo.hbmMask, 0, bmp.bmHeight, + maskPixels.get(), &bi, DIB_RGB_COLORS) != bmp.bmHeight + ) { + Logger::Get().Win32Error("GetDIBits 失败"); + ReleaseDC(NULL, hdcScreen); + return nullptr; + } + + // 计算此彩色掩码光标是否可以转换为彩色光标 + bool canConvertToColor = true; + for (uint32_t i = 0; i < bi.bmiHeader.biSizeImage; i += 4) { + if (maskPixels[i] != 0 && + (pixels[i] != 0 || pixels[size_t(i + 1)] != 0 || pixels[size_t(i + 2)] != 0) + ) { + // 掩码不为 0 则不能转换为彩色光标 + canConvertToColor = false; + break; + } + } + + if (canConvertToColor) { + // 转换为彩色光标以获得更好的插值效果和渲染性能 + cursorInfo.type = _CursorType::Color; + + for (uint32_t i = 0; i < bi.bmiHeader.biSizeImage; i += 4) { + if (maskPixels[i] == 0) { + // 保留光标颜色 + // Alpha 通道已经是 0,无需设置 + std::swap(pixels[i], pixels[size_t(i + 2)]); + } else { + // 透明像素 + std::memset(&pixels[i], 0, 3); + pixels[size_t(i + 3)] = 255; + } + } + } else { + cursorInfo.type = _CursorType::MaskedColor; + + // 将 XOR 掩码复制到透明通道中 + for (uint32_t i = 0; i < bi.bmiHeader.biSizeImage; i += 4) { + std::swap(pixels[i], pixels[size_t(i + 2)]); + pixels[size_t(i + 3)] = maskPixels[i]; + } + } + } + } else { + // 单色光标 + const uint32_t halfSize = bi.bmiHeader.biSizeImage / 2; + + // 计算此单色光标是否可以转换为彩色光标 + bool canConvertToColor = true; + for (uint32_t i = 0; i < halfSize; i += 4) { + // 上半部分是 AND 掩码,下半部分是 XOR 掩码 + if (pixels[i] != 0 && pixels[size_t(i + halfSize)] != 0) { + // 存在反色像素则不能转换为彩色光标 + canConvertToColor = false; + break; + } + } + + if (canConvertToColor) { + // 转换为彩色光标以获得更好的插值效果和渲染性能 + cursorInfo.type = _CursorType::Color; + + for (uint32_t i = 0; i < halfSize; i += 4) { + // 上半部分是 AND 掩码,下半部分是 XOR 掩码 + // https://learn.microsoft.com/en-us/windows-hardware/drivers/display/drawing-monochrome-pointers + if (pixels[i] == 0) { + if (pixels[size_t(i + halfSize)] == 0) { + // 黑色 + std::memset(&pixels[i], 0, 4); + } else { + // 白色 + std::memset(&pixels[i], 255, 3); + pixels[size_t(i + 3)] = 0; + } + } else { + // 透明 + std::memset(&pixels[i], 0, 3); + pixels[size_t(i + 3)] = 255; + } + } + } else { + cursorInfo.type = _CursorType::Monochrome; + + // 红色通道是 AND 掩码,绿色通道是 XOR 掩码 + // 构造 DXGI_FORMAT_R8G8_UNORM 的初始数据 + uint8_t* upPtr = &pixels[0]; + uint8_t* downPtr = &pixels[halfSize]; + uint8_t* targetPtr = &pixels[0]; + for (uint32_t i = 0; i < halfSize; i += 4) { + *targetPtr++ = *upPtr; + *targetPtr++ = *downPtr; + + upPtr += 4; + downPtr += 4; + } + } + } + + ReleaseDC(NULL, hdcScreen); + + { + const bool isMonochrome = cursorInfo.type == _CursorType::Monochrome; + const D3D11_SUBRESOURCE_DATA initData{ + .pSysMem = pixels.get(), + .SysMemPitch = UINT(bmp.bmWidth * (isMonochrome ? 2 : 4)) + }; + cursorTexture = DirectXHelper::CreateTexture2D( + d3dDevice, + isMonochrome ? DXGI_FORMAT_R8G8_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM, + bmp.bmWidth, + iconInfo.hbmColor ? bmp.bmHeight : bmp.bmHeight / 2, + D3D11_BIND_SHADER_RESOURCE, + D3D11_USAGE_IMMUTABLE, + 0, + &initData + ); + if (!cursorTexture) { + Logger::Get().Error("创建光标纹理失败"); + return nullptr; + } + } + + HRESULT hr = d3dDevice->CreateShaderResourceView(cursorTexture.get(), nullptr, cursorInfo.textureSrv.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateShaderResourceView 失败", hr); + return nullptr; + } + + const char* CURSOR_TYPES[] = { "彩色","彩色掩码","单色" }; + Logger::Get().Info(StrUtils::Concat("已解析", CURSOR_TYPES[(int)cursorInfo.type], "光标")); + + return &_cursorInfos.emplace(hCursor, std::move(cursorInfo)).first->second; +} + +bool CursorDrawer::_SetPremultipliedAlphaBlend() noexcept { + if (!premultipliedAlphaBlendBlendState) { + // FinalColor = ScreenColor * CursorColor.a + CursorColor + D3D11_BLEND_DESC desc{}; + desc.RenderTarget[0].BlendEnable = TRUE; + desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE; + desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; + desc.RenderTarget[0].DestBlend = D3D11_BLEND_SRC_ALPHA; + desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; + desc.RenderTarget[0].BlendOp = desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; + desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + + HRESULT hr = _deviceResources->GetD3DDevice()->CreateBlendState( + &desc, premultipliedAlphaBlendBlendState.put()); + if (FAILED(hr)) { + Logger::Get().ComError("创建混合状态失败", hr); + return false; + } + } + + _deviceResources->GetD3DDC()->OMSetBlendState(premultipliedAlphaBlendBlendState.get(), nullptr, 0xffffffff); + return true; +} + +} diff --git a/src/Magpie.Core/CursorDrawer.h b/src/Magpie.Core/CursorDrawer.h new file mode 100644 index 000000000..c3737e762 --- /dev/null +++ b/src/Magpie.Core/CursorDrawer.h @@ -0,0 +1,78 @@ +#pragma once +#include +#include "ScalingOptions.h" + +namespace Magpie::Core { + +class DeviceResources; + +class CursorDrawer { +public: + CursorDrawer() noexcept = default; + CursorDrawer(const CursorDrawer&) = delete; + CursorDrawer(CursorDrawer&&) = delete; + + bool Initialize(DeviceResources& deviceResources, ID3D11Texture2D* backBuffer) noexcept; + + void Draw() noexcept; + + void IsCursorVisible(bool value) noexcept { + _isCursorVisible = value; + } + + bool IsCursorVisible() const noexcept { + return _isCursorVisible; + } + +private: + enum class _CursorType { + // 彩色光标,此时纹理中 RGB 通道已预乘 A 通道(premultiplied alpha),A 通道已预先取反 + // 这是为了减少着色器的计算量以及确保(可能进行的)双线性差值的准确性 + // 计算公式:FinalColor = ScreenColor * CursorColor.a + CursorColor + // 纹理格式:DXGI_FORMAT_R8G8B8A8_UNORM + Color = 0, + // 彩色掩码光标,此时 A 通道可能为 0 或 255 + // 为 0 时表示 RGB 通道取代屏幕颜色,为 255 时表示 RGB 通道和屏幕颜色进行异或操作 + // 纹理格式:DXGI_FORMAT_R8G8B8A8_UNORM + MaskedColor, + // 单色光标,此时 R 通道为 AND 掩码,G 通道为 XOR 掩码,其他通道不使用 + // RG 通道的值只能是 0 或 255 + // 纹理格式:DXGI_FORMAT_R8G8_UNORM + Monochrome + }; + + struct _CursorInfo { + POINT hotSpot{}; + SIZE size{}; + winrt::com_ptr textureSrv = nullptr; + _CursorType type = _CursorType::Color; + }; + + const _CursorInfo* _ResolveCursor(HCURSOR hCursor) noexcept; + + bool _SetPremultipliedAlphaBlend() noexcept; + + DeviceResources* _deviceResources = nullptr; + ID3D11Texture2D* _backBuffer = nullptr; + + RECT _viewportRect{}; + + phmap::flat_hash_map _cursorInfos; + + winrt::com_ptr _simpleVS; + winrt::com_ptr _simpleIL; + winrt::com_ptr _vtxBuffer; + winrt::com_ptr _simplePS; + winrt::com_ptr premultipliedAlphaBlendBlendState; + winrt::com_ptr _maskedCursorPS; + winrt::com_ptr _monochromeCursorPS; + + // 用于渲染彩色掩码光标和单色光标的临时纹理 + winrt::com_ptr _tempCursorTexture; + winrt::com_ptr _tempCursorTextureRtv; + SIZE _tempCursorTextureSize{}; + + bool _isCursorVisible = true; +}; + +} diff --git a/src/Magpie.Core/CursorManager.cpp b/src/Magpie.Core/CursorManager.cpp index b9c2deee2..d1a6603ed 100644 --- a/src/Magpie.Core/CursorManager.cpp +++ b/src/Magpie.Core/CursorManager.cpp @@ -1,327 +1,212 @@ #include "pch.h" #include "CursorManager.h" -#include "MagApp.h" -#include "FrameSourceBase.h" -#include "Renderer.h" #include "Logger.h" -#include "Win32Utils.h" -#include "DeviceResources.h" -#include "GraphicsCaptureFrameSource.h" -#include "WindowHelper.h" -#include "Utils.h" #include +#include "Win32Utils.h" +#include "ScalingOptions.h" +#include "ScalingWindow.h" +#include "Renderer.h" #pragma comment(lib, "Magnification.lib") - namespace Magpie::Core { -// 将源窗口的光标位置映射到缩放后的光标位置 -// 当光标位于源窗口之外,与源窗口的距离不会缩放 -static POINT SrcToHost(POINT pt, bool screenCoord) { - const RECT& srcFrameRect = MagApp::Get().GetFrameSource().GetSrcFrameRect(); - const RECT& virtualOutputRect = MagApp::Get().GetRenderer().GetVirtualOutputRect(); - const RECT& hostRect = MagApp::Get().GetHostWndRect(); +// 将源窗口的光标位置映射到缩放后的光标位置。当光标位于源窗口之外,与源窗口的距离不会缩放。 +// 对于光标,第一个像素映射到第一个像素,最后一个像素映射到最后一个像素,因此光标区域的缩放 +// 倍率和窗口缩放倍率不同! +static POINT SrcToScaling(POINT pt) noexcept { + const Renderer& renderer = ScalingWindow::Get().Renderer(); + const RECT& srcRect = renderer.SrcRect(); + const RECT& destRect = renderer.DestRect(); + const RECT& scalingRect = ScalingWindow::Get().WndRect(); POINT result; - if (screenCoord) { - result = { hostRect.left, hostRect.top }; - } else { - result = {}; - } - if (pt.x >= srcFrameRect.right) { - result.x += hostRect.right - hostRect.left + pt.x - srcFrameRect.right; - } else if (pt.x < srcFrameRect.left) { - result.x += pt.x - srcFrameRect.left; + if (pt.x >= srcRect.right) { + result.x = scalingRect.right + pt.x - srcRect.right; + } else if (pt.x < srcRect.left) { + result.x = scalingRect.left + pt.x - srcRect.left; } else { - double pos = double(pt.x - srcFrameRect.left) / (srcFrameRect.right - srcFrameRect.left - 1); - result.x += std::lround(pos * (virtualOutputRect.right - virtualOutputRect.left - 1)) + virtualOutputRect.left; + double pos = double(pt.x - srcRect.left) / (srcRect.right - srcRect.left - 1); + result.x = std::lround(pos * (destRect.right - destRect.left - 1)) + destRect.left; } - if (pt.y >= srcFrameRect.bottom) { - result.y += hostRect.bottom - hostRect.top + pt.y - srcFrameRect.bottom; - } else if (pt.y < srcFrameRect.top) { - result.y += pt.y - srcFrameRect.top; + if (pt.y >= srcRect.bottom) { + result.y = scalingRect.bottom + pt.y - srcRect.bottom; + } else if (pt.y < srcRect.top) { + result.y = scalingRect.top + pt.y - srcRect.top; } else { - double pos = double(pt.y - srcFrameRect.top) / (srcFrameRect.bottom - srcFrameRect.top - 1); - result.y += std::lround(pos * (virtualOutputRect.bottom - virtualOutputRect.top - 1)) + virtualOutputRect.top; + double pos = double(pt.y - srcRect.top) / (srcRect.bottom - srcRect.top - 1); + result.y = std::lround(pos * (destRect.bottom - destRect.top - 1)) + destRect.top; } return result; } -// 将缩放后的光标位置映射到源窗口 -static POINT HostToSrc(POINT pt) { - const RECT& srcFrameRect = MagApp::Get().GetFrameSource().GetSrcFrameRect(); - const RECT& hostRect = MagApp::Get().GetHostWndRect(); - const RECT& virtualOutputRect = MagApp::Get().GetRenderer().GetVirtualOutputRect(); - RECT outputRect = MagApp::Get().GetRenderer().GetOutputRect(); - - const SIZE srcFrameSize = Win32Utils::GetSizeOfRect(srcFrameRect); - const SIZE virtualOutputSize = Win32Utils::GetSizeOfRect(virtualOutputRect); - const SIZE outputSize = Win32Utils::GetSizeOfRect(outputRect); +static POINT ScalingToSrc(POINT pt) noexcept { + const Renderer& renderer = ScalingWindow::Get().Renderer(); + const RECT& srcRect = renderer.SrcRect(); + const RECT& destRect = renderer.DestRect(); - pt.x -= hostRect.left; - pt.y -= hostRect.top; + const SIZE srcSize = Win32Utils::GetSizeOfRect(srcRect); + const SIZE destSize = Win32Utils::GetSizeOfRect(destRect); - POINT result = { srcFrameRect.left, srcFrameRect.top }; + POINT result = { srcRect.left, srcRect.top }; - if (pt.x >= outputRect.right) { - result.x += srcFrameSize.cx + pt.x - outputRect.right; - } else if (pt.x < outputRect.left) { - result.x += pt.x - outputRect.left; + if (pt.x >= destRect.right) { + result.x += srcSize.cx + pt.x - destRect.right; + } else if (pt.x < destRect.left) { + result.x += pt.x - destRect.left; } else { - double pos = double(pt.x - virtualOutputRect.left) / (virtualOutputSize.cx - 1); - result.x += std::lround(pos * (srcFrameSize.cx - 1)); + double pos = double(pt.x - destRect.left) / (destSize.cx - 1); + result.x += std::lround(pos * (srcSize.cx - 1)); } - if (pt.y >= outputRect.bottom) { - result.y += srcFrameSize.cx + pt.y - outputRect.bottom; - } else if (pt.y < outputRect.top) { - result.y += pt.y - outputRect.top; + if (pt.y >= destRect.bottom) { + result.y += srcSize.cy + pt.y - destRect.bottom; + } else if (pt.y < destRect.top) { + result.y += pt.y - destRect.top; } else { - double pos = double(pt.y - virtualOutputRect.top) / (virtualOutputSize.cy - 1); - result.y += std::lround(pos * (srcFrameSize.cy - 1)); + double pos = double(pt.y - destRect.top) / (destSize.cy - 1); + result.y += std::lround(pos * (srcSize.cy - 1)); } return result; } -CursorManager::~CursorManager() { - if (_curClips != RECT{}) { - ClipCursor(nullptr); - } +// SetCursorPos 无法可靠移动光标,虽然调用之后立刻查询光标位置没有问题,但经过一 +// 段时间后再次查询会发现光标位置又回到了设置之前。这可能是因为 OS 异步处理硬件输 +// 入队列,SetCursorPos 时队列中仍有旧事件尚未处理。 +// 这个函数使用 SendInput 将移动光标事件插入输入队列,然后等待系统处理到该事件, +// 避免了并发问题。如果设置不成功则多次尝试。这里旨在尽最大努力,我怀疑是否有完美 +// 的解决方案。 +static void ReliableSetCursorPos(POINT pos) noexcept { + const int screenWidth = GetSystemMetrics(SM_CXVIRTUALSCREEN); + const int screenHeight = GetSystemMetrics(SM_CYVIRTUALSCREEN); + + INPUT input{ + .type = INPUT_MOUSE, + .mi{ + .dx = (pos.x * 65535) / (screenWidth - 1), + .dy = (pos.y * 65535) / (screenHeight - 1), + .dwFlags = MOUSEEVENTF_MOVE | MOUSEEVENTF_ABSOLUTE | MOUSEEVENTF_VIRTUALDESK + } + }; - if (_isUnderCapture) { - POINT pt{}; - if (!::GetCursorPos(&pt)) { + // 如果设置不成功则多次尝试 + for (int i = 0; i < 10; ++i) { + if (!SendInput(1, &input, sizeof(input))) { + Logger::Get().Win32Error("SendInput 失败"); + break; + } + + // 等待系统处理 + Sleep(0); + + POINT curCursorPos; + if (!GetCursorPos(&curCursorPos)) { Logger::Get().Win32Error("GetCursorPos 失败"); + break; + } + + if (curCursorPos == pos) { + // 已成功,但保险起见再设置一次 + SendInput(1, &input, sizeof(input)); + return; } - _StopCapture(pt, true); } - MagApp::Get().UnregisterWndProcHandler(_handlerId); + // 回落到 SetCursorPos + SetCursorPos(pos.x, pos.y); } -static std::optional HostWndProc(HWND /*hWnd*/, UINT message, WPARAM /*wParam*/, LPARAM /*lParam*/) { - if (MagApp::Get().GetOptions().Is3DGameMode() && MagApp::Get().GetRenderer().IsUIVisiable()) { - return std::nullopt; - } - - if (message == WM_LBUTTONDOWN || message == WM_RBUTTONDOWN) { - // 主窗口会在非常特定的情况下收到光标消息: - // 1. 未处于捕获状态 - // 2. 缩放后的位置未被遮挡而缩放前的位置被遮挡 - // 或用户操作 UI 时 - HWND hwndSrc = MagApp::Get().GetHwndSrc(); - HWND hwndForground = GetForegroundWindow(); - if (hwndForground != hwndSrc) { - if (!Win32Utils::SetForegroundWindow(hwndSrc)) { - // 设置前台窗口失败,可能是因为前台窗口是开始菜单 - if (WindowHelper::IsStartMenu(hwndForground)) { - using namespace std::chrono; - - // 限制触发频率 - static steady_clock::time_point prevTimePoint{}; - auto now = steady_clock::now(); - if (duration_cast(now - prevTimePoint).count() >= 1000) { - prevTimePoint = now; - - // 模拟按键关闭开始菜单 - INPUT inputs[4]{}; - inputs[0].type = INPUT_KEYBOARD; - inputs[0].ki.wVk = VK_LWIN; - inputs[1].type = INPUT_KEYBOARD; - inputs[1].ki.wVk = VK_LWIN; - inputs[1].ki.dwFlags = KEYEVENTF_KEYUP; - SendInput((UINT)std::size(inputs), inputs, sizeof(INPUT)); - - // 等待系统处理 - Sleep(1); - } +CursorManager::~CursorManager() noexcept { + _ShowSystemCursor(true, true); - SetForegroundWindow(hwndSrc); - } - } + ClipCursor(nullptr); - return 0; + if (_isUnderCapture && !ScalingWindow::Get().Options().IsDebugMode()) { + POINT cursorPos; + if (!GetCursorPos(&cursorPos)) { + Logger::Get().Win32Error("GetCursorPos 失败"); } - if (!MagApp::Get().GetOptions().IsDebugMode()) { - SetWindowPos(MagApp::Get().GetHwndHost(), HWND_TOPMOST, 0, 0, 0, 0, SWP_NOMOVE | SWP_NOSIZE | SWP_NOREDRAW); - } + _StopCapture(cursorPos, true); + ReliableSetCursorPos(cursorPos); } - - return std::nullopt; } -bool CursorManager::Initialize() { - _handlerId = MagApp::Get().RegisterWndProcHandler(HostWndProc); - - if (MagApp::Get().GetOptions().Is3DGameMode()) { +bool CursorManager::Initialize() noexcept { + const ScalingOptions& options = ScalingWindow::Get().Options(); + if (options.IsDebugMode()) { + _shouldDrawCursor = true; + _isUnderCapture = true; + } else if (options.Is3DGameMode()) { POINT cursorPos; - ::GetCursorPos(&cursorPos); + GetCursorPos(&cursorPos); _StartCapture(cursorPos); + ReliableSetCursorPos(cursorPos); + + _shouldDrawCursor = true; + _ShowSystemCursor(false); } Logger::Get().Info("CursorManager 初始化完成"); return true; } -// 检测光标位于哪个窗口上,是否检测缩放窗口由 clickThroughHost 指定 -static HWND WindowFromPoint(POINT pt, bool clickThroughHost) { - struct EnumData { - HWND result; - POINT pt; - bool clickThroughHost; - } data{ NULL, pt, clickThroughHost }; - - EnumWindows([](HWND hWnd, LPARAM lParam) { - EnumData& data = *(EnumData*)lParam; - if (hWnd == MagApp::Get().GetHwndHost()) { - if (PtInRect(&MagApp::Get().GetHostWndRect(), data.pt) && !data.clickThroughHost) { - data.result = hWnd; - return FALSE; - } else { - return TRUE; - } - } - - // 跳过不可见的窗口 - if (!(GetWindowLongPtr(hWnd, GWL_STYLE) & WS_VISIBLE)) { - return TRUE; - } - - // 跳过透明窗口 - if (GetWindowLongPtr(hWnd, GWL_EXSTYLE) & WS_EX_TRANSPARENT) { - return TRUE; - } - - // 跳过被冻结的窗口 - UINT isCloaked{}; - DwmGetWindowAttribute(hWnd, DWMWA_CLOAKED, &isCloaked, sizeof(isCloaked)); - if (isCloaked != 0) { - return TRUE; - } - - // 对于分层窗口(Layered Window),没有公开的 API 可以检测某个像素是否透明。 - // ChildWindowFromPointEx 是一个替代方案,当命中透明像素时它将返回 NULL。 - // Windows 内部有 LayerHitTest (https://github.com/tongzx/nt5src/blob/daad8a087a4e75422ec96b7911f1df4669989611/Source/XPSP1/NT/windows/core/ntuser/kernel/winwhere.c#L21) 方法用于对分层窗口执行命中测试,虽然它没有被公开,但 ChildWindowFromPointEx 使用了它 - // 在比 Magpie 权限更高的窗口上使用会失败,失败则假设不是分层窗口 - POINT clientPt = data.pt; - ScreenToClient(hWnd, &clientPt); - SetLastError(0); - if (!ChildWindowFromPointEx(hWnd, clientPt, CWP_SKIPDISABLED | CWP_SKIPINVISIBLE | CWP_SKIPTRANSPARENT)) { - if (GetLastError() == 0) { - // 命中了透明像素 - return TRUE; - } - - // 源窗口的权限比 Magpie 更高,回落到 GetWindowRect - RECT windowRect{}; - if (!GetWindowRect(hWnd, &windowRect) || !PtInRect(&windowRect, data.pt)) { - return TRUE; - } - } - - data.result = hWnd; - return FALSE; - }, (LPARAM)&data); - - return data.result; -} - -void CursorManager::OnBeginFrame() { +void CursorManager::Update() noexcept { _UpdateCursorClip(); - if (!MagApp::Get().GetOptions().IsDrawCursor() || !_isShowCursor || !_isUnderCapture) { - // 不绘制光标 - _curCursor = NULL; - return; - } - - if (MagApp::Get().GetOptions().Is3DGameMode()) { - HWND hwndFore = GetForegroundWindow(); - if (hwndFore != MagApp::Get().GetHwndHost() && hwndFore != MagApp::Get().GetHwndSrc()) { - _curCursor = NULL; - return; - } - } + _hCursor = NULL; + _cursorPos = { std::numeric_limits::max(),std::numeric_limits::max() }; - CURSORINFO ci{}; - ci.cbSize = sizeof(ci); - if (!::GetCursorInfo(&ci)) { - Logger::Get().Win32Error("GetCursorInfo 失败"); + const ScalingOptions& options = ScalingWindow::Get().Options(); + if (!options.IsDrawCursor() || !_shouldDrawCursor) { return; } - if (!ci.hCursor || ci.flags != CURSOR_SHOWING) { - _curCursor = NULL; + CURSORINFO ci{ .cbSize = sizeof(CURSORINFO) }; + if (!GetCursorInfo(&ci)) { + Logger::Get().Win32Error("GetCursorPos 失败"); return; } - if (!_ResolveCursor(ci.hCursor, false)) { - Logger::Get().Error("解析光标失败"); - _curCursor = NULL; + if (!ci.hCursor || ci.flags != CURSOR_SHOWING) { return; } - _curCursorPos = SrcToHost(ci.ptScreenPos, false); - _curCursor = ci.hCursor; + _hCursor = ci.hCursor; + // 不处于捕获状态则位于叠加层上 + _cursorPos = _isUnderCapture ? SrcToScaling(ci.ptScreenPos) : ci.ptScreenPos; + const RECT& scalingRect = ScalingWindow::Get().WndRect(); + _cursorPos.x -= scalingRect.left; + _cursorPos.y -= scalingRect.top; } -bool CursorManager::GetCursorTexture(ID3D11Texture2D** texture, CursorManager::CursorType& cursorType) { - if (_curCursorInfo->texture) { - *texture = _curCursorInfo->texture.get(); - cursorType = _curCursorInfo->type; - return true; - } - - if (!_ResolveCursor(_curCursor, true)) { - return false; - } else { - const char* cursorTypes[] = { "Color", "Masked Color", "Monochrome" }; - Logger::Get().Info(fmt::format("已解析光标:{}\n\t类型:{}", - (void*)_curCursor, cursorTypes[(int)_curCursorInfo->type])); +void CursorManager::IsCursorOnOverlay(bool value) noexcept { + if (_isOnOverlay == value) { + return; } - - *texture = _curCursorInfo->texture.get(); - cursorType = _curCursorInfo->type; - return true; -} - -void CursorManager::OnCursorCapturedOnOverlay() { - _isCapturedOnOverlay = true; - - // 用户拖动 UI 时将光标限制在输出区域内 - const RECT& outputRect = MagApp::Get().GetRenderer().GetOutputRect(); - const RECT& hostRect = MagApp::Get().GetHostWndRect(); - _curClips = { - outputRect.left + hostRect.left, - outputRect.top + hostRect.top, - outputRect.right + hostRect.left, - outputRect.bottom + hostRect.top - }; - ClipCursor(&_curClips); -} - -void CursorManager::OnCursorReleasedOnOverlay() { - _isCapturedOnOverlay = false; + _isOnOverlay = value; + _UpdateCursorClip(); } -void CursorManager::OnCursorHoverOverlay() { - _isOnOverlay = true; - _UpdateCursorClip(); -} +void CursorManager::IsCursorCapturedOnOverlay(bool value) noexcept { + if (_isCapturedOnOverlay == value) { + return; + } + _isCapturedOnOverlay = value; -void CursorManager::OnCursorLeaveOverlay() { - _isOnOverlay = false; _UpdateCursorClip(); } -static void ShowSystemCursor(bool show) { +void CursorManager::_ShowSystemCursor(bool show, bool onDestory) { + if (_isSystemCursorShown == show) { + return; + } + static void (WINAPI* const showSystemCursor)(BOOL bShow) = []()->void(WINAPI*)(BOOL) { HMODULE lib = LoadLibrary(L"user32.dll"); if (!lib) { @@ -333,6 +218,7 @@ static void ShowSystemCursor(bool show) { if (showSystemCursor) { showSystemCursor((BOOL)show); + _isSystemCursorShown = show; } else { // 获取 ShowSystemCursor 失败则回落到 Magnification API static bool initialized = []() { @@ -346,282 +232,15 @@ static void ShowSystemCursor(bool show) { if (initialized) { MagShowSystemCursor(show); + _isSystemCursorShown = show; } } - if (show) { - MagApp::Get().Dispatcher().TryEnqueue([]() { - if (!MagApp::Get().GetHwndHost()) { - return; - } - - // 修复有时不会立即显示光标的问题 - FrameSourceBase& frameSource = MagApp::Get().GetFrameSource(); - if (frameSource.GetName() == GraphicsCaptureFrameSource::NAME) { - GraphicsCaptureFrameSource& wgc = (GraphicsCaptureFrameSource&)frameSource; - // WGC 需要重启捕获 - // 没有用户报告这个问题,只在我的电脑上出现,可能和驱动有关 - wgc.StopCapture(); - wgc.StartCapture(); - } else { - SystemParametersInfo(SPI_SETCURSORS, 0, 0, 0); - } - }); - } -} - -void CursorManager::_StartCapture(POINT cursorPt) { - if (_isUnderCapture) { - return; - } - - // 在以下情况下进入捕获状态: - // 1. 当前未捕获 - // 2. 光标进入全屏区域 - // - // 进入捕获状态时: - // 1. 调整光标速度,全局隐藏光标 - // 2. 将光标移到源窗口的对应位置 - // - // 在有黑边的情况下自动将光标调整到画面内 - - // 全局隐藏光标 - ShowSystemCursor(false); - - const RECT& srcFrameRect = MagApp::Get().GetFrameSource().GetSrcFrameRect(); - const RECT& hostRect = MagApp::Get().GetHostWndRect(); - const RECT& outputRect = MagApp::Get().GetRenderer().GetOutputRect(); - - SIZE srcFrameSize = Win32Utils::GetSizeOfRect(srcFrameRect); - SIZE outputSize = Win32Utils::GetSizeOfRect(outputRect); - - if (MagApp::Get().GetOptions().IsAdjustCursorSpeed()) { - _AdjustCursorSpeed(); - } - - // 移动光标位置 - - // 跳过黑边 - cursorPt.x = std::clamp(cursorPt.x, hostRect.left + outputRect.left, hostRect.left + outputRect.right - 1); - cursorPt.y = std::clamp(cursorPt.y, hostRect.top + outputRect.top, hostRect.top + outputRect.bottom - 1); - - POINT newCursorPos = HostToSrc(cursorPt); - SetCursorPos(newCursorPos.x, newCursorPos.y); - - _isUnderCapture = true; + ScalingWindow::Get().Renderer().OnCursorVisibilityChanged(show, onDestory); } -void CursorManager::_StopCapture(POINT cursorPos, bool onDestroy) { - if (!_isUnderCapture) { - return; - } - - if (_curClips != RECT{}) { - _curClips = {}; - ClipCursor(nullptr); - } - - // 在以下情况下离开捕获状态: - // 1. 当前处于捕获状态 - // 2. 光标离开源窗口客户区 - // 3. 目标位置存在屏幕 - // - // 离开捕获状态时 - // 1. 还原光标速度,全局显示光标 - // 2. 将光标移到全屏窗口外的对应位置 - // - // 在有黑边的情况下自动将光标调整到全屏窗口外 - - POINT newCursorPos = SrcToHost(cursorPos, true); - - if (onDestroy || MonitorFromPoint(newCursorPos, MONITOR_DEFAULTTONULL)) { - SetCursorPos(newCursorPos.x, newCursorPos.y); - - if (MagApp::Get().GetOptions().IsAdjustCursorSpeed()) { - SystemParametersInfo(SPI_SETMOUSESPEED, 0, (PVOID)(intptr_t)_cursorSpeed, 0); - } - - ShowSystemCursor(true); - _isUnderCapture = false; - } else { - // 目标位置不存在屏幕,则将光标限制在源窗口内 - const RECT& srcFrameRect = MagApp::Get().GetFrameSource().GetSrcFrameRect(); - SetCursorPos( - std::clamp(cursorPos.x, srcFrameRect.left, srcFrameRect.right - 1), - std::clamp(cursorPos.y, srcFrameRect.top, srcFrameRect.bottom - 1) - ); - } -} - -bool CursorManager::_ResolveCursor(HCURSOR hCursor, bool resolveTexture) { - auto it = _cursorInfos.find(hCursor); - if (it != _cursorInfos.end() && (!resolveTexture || (resolveTexture && _curCursorInfo->texture))) { - _curCursorInfo = &it->second; - return true; - } - - ICONINFO ii{}; - if (!GetIconInfo(hCursor, &ii)) { - Logger::Get().Win32Error("GetIconInfo 失败"); - return false; - } - - Utils::ScopeExit se([&ii]() { - if (ii.hbmColor) { - DeleteBitmap(ii.hbmColor); - } - DeleteBitmap(ii.hbmMask); - }); - - BITMAP bmp{}; - if (!GetObject(ii.hbmMask, sizeof(bmp), &bmp)) { - Logger::Get().Win32Error("GetObject 失败"); - return false; - } - - _curCursorInfo = it == _cursorInfos.end() ? &_cursorInfos[hCursor] : &it->second; - - _curCursorInfo->hotSpot = { (LONG)ii.xHotspot, (LONG)ii.yHotspot }; - // 单色光标的 hbmMask 高度为实际高度的两倍 - _curCursorInfo->size = { bmp.bmWidth, ii.hbmColor ? bmp.bmHeight : bmp.bmHeight / 2 }; - - if (!resolveTexture) { - return true; - } - - auto& dr = MagApp::Get().GetDeviceResources(); - - BITMAPINFO bi{}; - bi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER); - bi.bmiHeader.biWidth = bmp.bmWidth; - bi.bmiHeader.biHeight = -bmp.bmHeight; - bi.bmiHeader.biPlanes = 1; - bi.bmiHeader.biCompression = BI_RGB; - bi.bmiHeader.biBitCount = 32; - bi.bmiHeader.biSizeImage = bmp.bmWidth * bmp.bmHeight * 4; - - if (ii.hbmColor == NULL) { - // 单色光标 - _curCursorInfo->type = CursorType::Monochrome; - - std::unique_ptr pixels(new BYTE[bi.bmiHeader.biSizeImage]); - HDC hdc = GetDC(NULL); - if (GetDIBits(hdc, ii.hbmMask, 0, bmp.bmHeight, pixels.get(), &bi, DIB_RGB_COLORS) != bmp.bmHeight) { - Logger::Get().Win32Error("GetDIBits 失败"); - ReleaseDC(NULL, hdc); - return false; - } - ReleaseDC(NULL, hdc); - - // 红色通道是 AND 掩码,绿色通道是 XOR 掩码 - // 这里将下半部分的 XOR 掩码复制到上半部分的绿色通道中 - const int halfSize = bi.bmiHeader.biSizeImage / 8; - BYTE* upPtr = &pixels[1]; - BYTE* downPtr = &pixels[static_cast(halfSize) * 4]; - for (int i = 0; i < halfSize; ++i) { - *upPtr = *downPtr; - - upPtr += 4; - downPtr += 4; - } - - D3D11_SUBRESOURCE_DATA initData{}; - initData.pSysMem = pixels.get(); - initData.SysMemPitch = bmp.bmWidth * 4; - - _curCursorInfo->texture = dr.CreateTexture2D( - DXGI_FORMAT_R8G8B8A8_UNORM, - bmp.bmWidth, - bmp.bmHeight / 2, - D3D11_BIND_SHADER_RESOURCE, - D3D11_USAGE_IMMUTABLE, - 0, - &initData - ); - if (!_curCursorInfo->texture) { - Logger::Get().Error("创建纹理失败"); - return false; - } - - return true; - } - - std::unique_ptr pixels(new BYTE[bi.bmiHeader.biSizeImage]); - HDC hdc = GetDC(NULL); - if (GetDIBits(hdc, ii.hbmColor, 0, bmp.bmHeight, pixels.get(), &bi, DIB_RGB_COLORS) != bmp.bmHeight) { - Logger::Get().Win32Error("GetDIBits 失败"); - ReleaseDC(NULL, hdc); - return false; - } - ReleaseDC(NULL, hdc); - - // 若颜色掩码有 A 通道,则是彩色光标,否则是彩色掩码光标 - bool hasAlpha = false; - for (UINT i = 3; i < bi.bmiHeader.biSizeImage; i += 4) { - if (pixels[i] != 0) { - hasAlpha = true; - break; - } - } - - if (hasAlpha) { - // 彩色光标 - _curCursorInfo->type = CursorType::Color; - - for (size_t i = 0; i < bi.bmiHeader.biSizeImage; i += 4) { - // 预乘 Alpha 通道 - double alpha = pixels[i + 3] / 255.0f; - - BYTE b = (BYTE)std::lround(pixels[i] * alpha); - pixels[i] = (BYTE)std::lround(pixels[i + 2] * alpha); - pixels[i + 1] = (BYTE)std::lround(pixels[i + 1] * alpha); - pixels[i + 2] = b; - - pixels[i + 3] = 255 - pixels[i + 3]; - } - } else { - // 彩色掩码光标 - _curCursorInfo->type = CursorType::MaskedColor; - - std::unique_ptr maskPixels(new BYTE[bi.bmiHeader.biSizeImage]); - hdc = GetDC(NULL); - if (GetDIBits(hdc, ii.hbmMask, 0, bmp.bmHeight, maskPixels.get(), &bi, DIB_RGB_COLORS) != bmp.bmHeight) { - Logger::Get().Win32Error("GetDIBits 失败"); - ReleaseDC(NULL, hdc); - return false; - } - ReleaseDC(NULL, hdc); - - // 将 XOR 掩码复制到透明通道中 - for (size_t i = 0; i < bi.bmiHeader.biSizeImage; i += 4) { - std::swap(pixels[i], pixels[i + 2]); - pixels[i + 3] = maskPixels[i]; - } - } - - D3D11_SUBRESOURCE_DATA initData{}; - initData.pSysMem = &pixels[0]; - initData.SysMemPitch = bmp.bmWidth * 4; - - _curCursorInfo->texture = dr.CreateTexture2D( - DXGI_FORMAT_R8G8B8A8_UNORM, - bmp.bmWidth, - bmp.bmHeight, - D3D11_BIND_SHADER_RESOURCE, - D3D11_USAGE_IMMUTABLE, - 0, - &initData - ); - if (!_curCursorInfo->texture) { - Logger::Get().Error("创建纹理失败"); - return false; - } - - return true; -} - -void CursorManager::_AdjustCursorSpeed() { - if (!SystemParametersInfo(SPI_GETMOUSESPEED, 0, &_cursorSpeed, 0)) { +void CursorManager::_AdjustCursorSpeed() noexcept { + if (!SystemParametersInfo(SPI_GETMOUSESPEED, 0, &_originCursorSpeed, 0)) { Logger::Get().Win32Error("获取光标移速失败"); return; } @@ -637,24 +256,25 @@ void CursorManager::_AdjustCursorSpeed() { } } - SIZE srcFrameSize = Win32Utils::GetSizeOfRect(MagApp::Get().GetFrameSource().GetSrcFrameRect()); - SIZE virtualOutputSize = Win32Utils::GetSizeOfRect(MagApp::Get().GetRenderer().GetVirtualOutputRect()); - double scale = ((double)virtualOutputSize.cx / srcFrameSize.cx + (double)virtualOutputSize.cy / srcFrameSize.cy) / 2; + const Renderer& renderer = ScalingWindow::Get().Renderer(); + const SIZE srcSize = Win32Utils::GetSizeOfRect(renderer.SrcRect()); + const SIZE destSize = Win32Utils::GetSizeOfRect(renderer.DestRect()); + const double scale = ((double)destSize.cx / srcSize.cx + (double)destSize.cy / srcSize.cy) / 2; INT newSpeed = 0; // “提高指针精确度”(鼠标加速)打开时光标移速的调整为线性,否则为非线性 // 参见 https://liquipedia.net/counterstrike/Mouse_Settings#Windows_Sensitivity if (isMouseAccelerationOn) { - newSpeed = std::clamp((INT)lround(_cursorSpeed / scale), 1, 20); + newSpeed = std::clamp((INT)lround(_originCursorSpeed / scale), 1, 20); } else { static constexpr std::array SENSITIVITIES = { 0.03125, 0.0625, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0, 1.25, 1.5, 1.75, 2, 2.25, 2.5, 2.75, 3, 3.25, 3.5 }; - _cursorSpeed = std::clamp(_cursorSpeed, 1, 20); - double newSensitivity = SENSITIVITIES[static_cast(_cursorSpeed) - 1] / scale; + _originCursorSpeed = std::clamp(_originCursorSpeed, 1, 20); + double newSensitivity = SENSITIVITIES[static_cast(_originCursorSpeed) - 1] / scale; auto it = std::lower_bound(SENSITIVITIES.begin(), SENSITIVITIES.end(), newSensitivity - 1e-6); newSpeed = INT(it - SENSITIVITIES.begin()) + 1; @@ -672,51 +292,170 @@ void CursorManager::_AdjustCursorSpeed() { } } -void CursorManager::_UpdateCursorClip() { +// 检测光标位于哪个窗口上,是否检测缩放窗口由 clickThroughHost 指定 +static HWND WindowFromPoint(HWND hwndScaling, const RECT& scalingWndRect, POINT pt, bool clickThroughHost) noexcept { + struct EnumData { + HWND result; + HWND hwndScaling; + RECT scalingWndRect; + POINT pt; + bool clickThroughHost; + } data{ NULL, hwndScaling, scalingWndRect, pt, clickThroughHost }; + + EnumWindows([](HWND hWnd, LPARAM lParam) { + EnumData& data = *(EnumData*)lParam; + if (hWnd == data.hwndScaling) { + if (PtInRect(&data.scalingWndRect, data.pt) && !data.clickThroughHost) { + data.result = hWnd; + return FALSE; + } else { + return TRUE; + } + } + + // 跳过不可见的窗口 + if (!Win32Utils::IsWindowVisible(hWnd)) { + return TRUE; + } + + // 跳过透明窗口 + const LONG_PTR exStyle = GetWindowLongPtr(hWnd, GWL_EXSTYLE); + if (exStyle & WS_EX_TRANSPARENT) { + return TRUE; + } + + // 检查光标是否在窗口内 + RECT windowRect; + if (!GetWindowRect(hWnd, &windowRect) || !PtInRect(&windowRect, data.pt)) { + return TRUE; + } + + // 跳过被冻结的窗口。这个调用比较耗时,因此稍晚检查 + { + UINT isCloaked = 0; + HRESULT hr = DwmGetWindowAttribute(hWnd, DWMWA_CLOAKED, &isCloaked, sizeof(isCloaked)); + if (SUCCEEDED(hr) && isCloaked) { + return TRUE; + } + } + + // 检查使用 SetWindowRgn 自定义形状的窗口 + { + static HRGN hRgn = CreateRectRgn(0, 0, 0, 0); + int regionType = GetWindowRgn(hWnd, hRgn); + if (regionType == SIMPLEREGION || regionType == COMPLEXREGION) { + if (!PtInRegion(hRgn, data.pt.x - windowRect.left, data.pt.y - windowRect.top)) { + return TRUE; + } + } + } + + // 检查分层窗口 (layered window) 的透明区域 + if (exStyle & WS_EX_LAYERED) { + RECT clientRect; + if (!Win32Utils::GetClientScreenRect(hWnd, clientRect)) { + return TRUE; + } + + // 分层窗口只有客户区允许透明区域 + if (PtInRect(&clientRect, data.pt)) { + // 没有公开的 API 可以检测分层窗口的某个像素是否透明。ChildWindowFromPointEx 是 + // 一个替代方案,当命中透明像素时它将返回 NULL。 + // Windows 内部有 LayerHitTest方法用于对分层窗口执行命中测试,虽然它没有被公开, + // 但 ChildWindowFromPointEx 使用了它。 + // 见 https://github.com/tongzx/nt5src/blob/daad8a087a4e75422ec96b7911f1df4669989611/Source/XPSP1/NT/windows/core/ntuser/kernel/winwhere.c#L21 + POINT clientPt{ data.pt.x - clientRect.left, data.pt.y - clientRect.top }; + if (!ChildWindowFromPointEx(hWnd, clientPt, CWP_SKIPINVISIBLE | CWP_SKIPTRANSPARENT)) { + // 命中了透明像素或失败 + return TRUE; + } + } + } + + data.result = hWnd; + return FALSE; + }, (LPARAM)&data); + + return data.result; +} + +void CursorManager::_UpdateCursorClip() noexcept { + const ScalingOptions& options = ScalingWindow::Get().Options(); + const Renderer& renderer = ScalingWindow::Get().Renderer(); + const RECT& srcRect = renderer.SrcRect(); + const RECT& destRect = renderer.DestRect(); + // 优先级: - // 1. 断点模式:不限制,捕获/取消捕获,支持 UI - // 2. 在 3D 游戏中限制光标:每帧都限制一次,不退出捕获,因此无法使用 UI,不支持多屏幕 + // 1. 调试模式:不限制,不捕获 + // 2. 3D 游戏模式:每帧都限制一次,不退出捕获,不支持多屏幕 // 3. 常规:根据多屏幕限制光标,捕获/取消捕获,支持 UI 和多屏幕 - const RECT& srcFrameRect = MagApp::Get().GetFrameSource().GetSrcFrameRect(); + if (options.IsDebugMode()) { + if (_isCapturedOnOverlay) { + // 光标被叠加层捕获时将光标限制在输出区域内 + ClipCursor(&destRect); + } else { + ClipCursor(nullptr); + } - if (!MagApp::Get().GetOptions().IsDebugMode() && MagApp::Get().GetOptions().Is3DGameMode()) { + return; + } + + if (options.Is3DGameMode()) { // 开启“在 3D 游戏中限制光标”则每帧都限制一次光标 - _curClips = srcFrameRect; - ClipCursor(&srcFrameRect); + ClipCursor(&srcRect); return; } if (_isCapturedOnOverlay) { - // 已在 OnCursorCapturedOnOverlay 中限制光标 + // 光标被叠加层捕获时将光标限制在输出区域内 + ClipCursor(&destRect); return; } - const HWND hwndHost = MagApp::Get().GetHwndHost(); - const HWND hwndSrc = MagApp::Get().GetHwndSrc(); - const RECT& hostRect = MagApp::Get().GetHostWndRect(); - - const RECT& outputRect = MagApp::Get().GetRenderer().GetOutputRect(); - const RECT& virtualOutputRect = MagApp::Get().GetRenderer().GetVirtualOutputRect(); + // 如果前台窗口捕获了光标,应避免在光标移入/移出缩放窗口或叠加层时跳跃。为了解决 + // 前一个问题,此时则将光标限制在前台窗口内,因此不会移出缩放窗口。为了解决后一个 + // 问题,叠加层将不会试图捕获光标。 + GUITHREADINFO info{ .cbSize = sizeof(info) }; + if (GetGUIThreadInfo(NULL, &info)) { + if (info.hwndCapture) { + _isCapturedOnForeground = true; - const SIZE outputSize = Win32Utils::GetSizeOfRect(outputRect); - const SIZE srcFrameSize = Win32Utils::GetSizeOfRect(srcFrameRect); - const SIZE virtualOutputSize = Win32Utils::GetSizeOfRect(virtualOutputRect); + // 如果光标不在缩放窗口内不应限制光标 + if (_isUnderCapture) { + ClipCursor(&srcRect); + } + + // 当光标被前台窗口捕获时我们除了限制光标外什么也不做,即光标 + // 可以在缩放窗口上自由移动 + return; + } else { + _isCapturedOnForeground = false; + } + } else { + _isCapturedOnForeground = false; + } - INT_PTR style = GetWindowLongPtr(hwndHost, GWL_EXSTYLE); + const HWND hwndScaling = ScalingWindow::Get().Handle(); + const RECT scalingRect = ScalingWindow::Get().WndRect(); + const HWND hwndSrc = ScalingWindow::Get().HwndSrc(); + + INT_PTR style = GetWindowLongPtr(hwndScaling, GWL_EXSTYLE); POINT cursorPos; - if (!::GetCursorPos(&cursorPos)) { + if (!GetCursorPos(&cursorPos)) { Logger::Get().Win32Error("GetCursorPos 失败"); return; } + const POINT originCursorPos = cursorPos; + if (_isUnderCapture) { /////////////////////////////////////////////////////////// // // 处于捕获状态 // -------------------------------------------------------- - // | 虚拟位置被遮挡 | 虚拟位置未被遮挡 + // | 缩放位置被遮挡 | 缩放位置未被遮挡 // -------------------------------------------------------- // 实际位置被遮挡 | 退出捕获 | 退出捕获,主窗口不透明 // -------------------------------------------------------- @@ -725,171 +464,253 @@ void CursorManager::_UpdateCursorClip() { // /////////////////////////////////////////////////////////// - HWND hwndCur = WindowFromPoint(SrcToHost(cursorPos, true), false); - - if (hwndCur != hwndHost) { - // 主窗口被遮挡 - if (style | WS_EX_TRANSPARENT) { - SetWindowLongPtr(hwndHost, GWL_EXSTYLE, style & ~WS_EX_TRANSPARENT); - } + HWND hwndCur = WindowFromPoint(hwndScaling, scalingRect, SrcToScaling(cursorPos), false); + _shouldDrawCursor = hwndCur == hwndScaling; - _StopCapture(cursorPos); - } else { - // 主窗口未被遮挡 + if (_shouldDrawCursor) { + // 缩放窗口未被遮挡 bool stopCapture = _isOnOverlay; if (!stopCapture) { // 判断源窗口是否被遮挡 - hwndCur = WindowFromPoint(cursorPos, true); + hwndCur = WindowFromPoint(hwndScaling, scalingRect, cursorPos, true); stopCapture = hwndCur != hwndSrc && (!IsChild(hwndSrc, hwndCur) || !((GetWindowStyle(hwndCur) & WS_CHILD))); } if (stopCapture) { if (style | WS_EX_TRANSPARENT) { - SetWindowLongPtr(hwndHost, GWL_EXSTYLE, style & ~WS_EX_TRANSPARENT); + SetWindowLongPtr(hwndScaling, GWL_EXSTYLE, style & ~WS_EX_TRANSPARENT); } + // 源窗口被遮挡或者光标位于叠加层上,这时虽然停止捕获光标,但依然将光标隐藏 _StopCapture(cursorPos); } else { - if (!(style & WS_EX_TRANSPARENT)) { - SetWindowLongPtr(hwndHost, GWL_EXSTYLE, style | WS_EX_TRANSPARENT); + if (_isOnOverlay) { + if (style | WS_EX_TRANSPARENT) { + SetWindowLongPtr(hwndScaling, GWL_EXSTYLE, style & ~WS_EX_TRANSPARENT); + } + } else { + if (!(style & WS_EX_TRANSPARENT)) { + SetWindowLongPtr(hwndScaling, GWL_EXSTYLE, style | WS_EX_TRANSPARENT); + } } } + } else { + // 缩放窗口被遮挡 + if (style | WS_EX_TRANSPARENT) { + SetWindowLongPtr(hwndScaling, GWL_EXSTYLE, style & ~WS_EX_TRANSPARENT); + } + + if (!_StopCapture(cursorPos)) { + _shouldDrawCursor = true; + } } } else { ///////////////////////////////////////////////////////// // // 未处于捕获状态 // ----------------------------------------------------- - // | 虚拟位置被遮挡 | 虚拟位置未被遮挡 + // | 缩放位置被遮挡 | 缩放位置未被遮挡 // ------------------------------------------------------ - // 实际位置被遮挡 | 无操作 | 主窗口不透明 + // 实际位置被遮挡 | 无操作 | 缩放窗口不透明 // ------------------------------------------------------ // 实际位置未被遮挡 | 无操作 | 开始捕获,主窗口透明 // ------------------------------------------------------ // ///////////////////////////////////////////////////////// - HWND hwndCur = WindowFromPoint(cursorPos, false); + HWND hwndCur = WindowFromPoint(hwndScaling, scalingRect, cursorPos, false); + _shouldDrawCursor = hwndCur == hwndScaling; + + if (_shouldDrawCursor) { + // 缩放窗口未被遮挡 + POINT newCursorPos = ScalingToSrc(cursorPos); + + if (PtInRect(&srcRect, newCursorPos)) { + bool startCapture = !_isOnOverlay; - if (hwndCur == hwndHost) { - // 主窗口未被遮挡 - POINT newCursorPos = HostToSrc(cursorPos); + if (startCapture) { + // 判断源窗口是否被遮挡 + hwndCur = WindowFromPoint(hwndScaling, scalingRect, newCursorPos, true); + startCapture = hwndCur == hwndSrc || ((IsChild(hwndSrc, hwndCur) && (GetWindowStyle(hwndCur) & WS_CHILD))); + } - if (!PtInRect(&srcFrameRect, newCursorPos)) { + if (startCapture) { + if (!(style & WS_EX_TRANSPARENT)) { + SetWindowLongPtr(hwndScaling, GWL_EXSTYLE, style | WS_EX_TRANSPARENT); + } + + _StartCapture(cursorPos); + } else { + if (style | WS_EX_TRANSPARENT) { + SetWindowLongPtr(hwndScaling, GWL_EXSTYLE, style & ~WS_EX_TRANSPARENT); + } + } + } else { // 跳过黑边 if (_isOnOverlay) { // 从内部移到外部 // 此时有 UI 贴边 - if (newCursorPos.x >= srcFrameRect.right) { - cursorPos.x += hostRect.right - hostRect.left - outputRect.right; - } else if (newCursorPos.x < srcFrameRect.left) { - cursorPos.x -= outputRect.left; + if (newCursorPos.x >= srcRect.right) { + cursorPos.x += scalingRect.right - destRect.right; + } else if (newCursorPos.x < srcRect.left) { + cursorPos.x -= destRect.left - scalingRect.left; } - if (newCursorPos.y >= srcFrameRect.bottom) { - cursorPos.y += hostRect.bottom - hostRect.top - outputRect.bottom; - } else if (newCursorPos.y < srcFrameRect.top) { - cursorPos.y -= outputRect.top; + if (newCursorPos.y >= srcRect.bottom) { + cursorPos.y += scalingRect.bottom - destRect.bottom; + } else if (newCursorPos.y < srcRect.top) { + cursorPos.y -= destRect.top - scalingRect.top; } - if (MonitorFromPoint(cursorPos, MONITOR_DEFAULTTONULL)) { - SetCursorPos(cursorPos.x, cursorPos.y); - } else { + if (!MonitorFromPoint(cursorPos, MONITOR_DEFAULTTONULL)) { // 目标位置不存在屏幕,则将光标限制在输出区域内 - SetCursorPos( - std::clamp(cursorPos.x, hostRect.left + outputRect.left, hostRect.left + outputRect.right - 1), - std::clamp(cursorPos.y, hostRect.top + outputRect.top, hostRect.top + outputRect.bottom - 1) - ); + cursorPos.x = std::clamp(cursorPos.x, destRect.left, destRect.right - 1); + cursorPos.y = std::clamp(cursorPos.y, destRect.top, destRect.bottom - 1); } } else { // 从外部移到内部 - - POINT clampedPos = { - std::clamp(cursorPos.x, hostRect.left + outputRect.left, hostRect.left + outputRect.right - 1), - std::clamp(cursorPos.y, hostRect.top + outputRect.top, hostRect.top + outputRect.bottom - 1) + const POINT clampedPos{ + std::clamp(cursorPos.x, destRect.left, destRect.right - 1), + std::clamp(cursorPos.y, destRect.top, destRect.bottom - 1) }; - if (WindowFromPoint(clampedPos, false) == hwndHost) { + if (WindowFromPoint(hwndScaling, scalingRect, clampedPos, false) == hwndScaling) { if (!(style & WS_EX_TRANSPARENT)) { - SetWindowLongPtr(hwndHost, GWL_EXSTYLE, style | WS_EX_TRANSPARENT); + SetWindowLongPtr(hwndScaling, GWL_EXSTYLE, style | WS_EX_TRANSPARENT); } _StartCapture(cursorPos); } else { // 要跳跃的位置被遮挡 if (style | WS_EX_TRANSPARENT) { - SetWindowLongPtr(hwndHost, GWL_EXSTYLE, style & ~WS_EX_TRANSPARENT); + SetWindowLongPtr(hwndScaling, GWL_EXSTYLE, style & ~WS_EX_TRANSPARENT); } } } - } else { - bool startCapture = !_isOnOverlay; + } + } + } - if (startCapture) { - // 判断源窗口是否被遮挡 - hwndCur = WindowFromPoint(newCursorPos, true); - startCapture = hwndCur == hwndSrc || ((IsChild(hwndSrc, hwndCur) && (GetWindowStyle(hwndCur) & WS_CHILD))); - } + // 只要光标缩放后的位置在缩放窗口上,且该位置未被其他窗口遮挡,便可以隐藏光标。 + // 即使当前并未捕获光标也是如此。 + _ShowSystemCursor(!_shouldDrawCursor); - if (startCapture) { - if (!(style & WS_EX_TRANSPARENT)) { - SetWindowLongPtr(hwndHost, GWL_EXSTYLE, style | WS_EX_TRANSPARENT); - } + if (_shouldDrawCursor) { + // 根据当前光标位置的四个方向有无屏幕来确定应该在哪些方向限制光标,但这无法 + // 处理屏幕之间存在间隙的情况。解决办法是 _StopCapture 只在目标位置存在屏幕时才取消捕获, + // 当光标试图移动到间隙中时将被挡住。如果光标的速度足以跨越间隙,则它依然可以在屏幕间移动。 + POINT hostPos = _isUnderCapture ? SrcToScaling(cursorPos) : cursorPos; - _StartCapture(cursorPos); - } else { - if (style | WS_EX_TRANSPARENT) { - SetWindowLongPtr(hwndHost, GWL_EXSTYLE, style & ~WS_EX_TRANSPARENT); - } - } - } + RECT clips{ LONG_MIN, LONG_MIN, LONG_MAX, LONG_MAX }; + + // left + RECT rect{ LONG_MIN, hostPos.y, scalingRect.left, hostPos.y + 1 }; + if (!MonitorFromRect(&rect, MONITOR_DEFAULTTONULL)) { + clips.left = _isUnderCapture ? srcRect.left : destRect.left; + } + + // top + rect = { hostPos.x, LONG_MIN, hostPos.x + 1, scalingRect.top }; + if (!MonitorFromRect(&rect, MONITOR_DEFAULTTONULL)) { + clips.top = _isUnderCapture ? srcRect.top : destRect.top; + } + + // right + rect = { scalingRect.right, hostPos.y, LONG_MAX, hostPos.y + 1 }; + if (!MonitorFromRect(&rect, MONITOR_DEFAULTTONULL)) { + clips.right = _isUnderCapture ? srcRect.right : destRect.right; } + + // bottom + rect = { hostPos.x, scalingRect.bottom, hostPos.x + 1, LONG_MAX }; + if (!MonitorFromRect(&rect, MONITOR_DEFAULTTONULL)) { + clips.bottom = _isUnderCapture ? srcRect.bottom : destRect.bottom; + } + + ClipCursor(&clips); + } else { + ClipCursor(nullptr); } - if (MagApp::Get().GetOptions().IsDebugMode()) { - return; + // SetCursorPos 应在 ClipCursor 之后,否则会受到上一次 ClipCursor 的影响 + if (cursorPos != originCursorPos) { + ReliableSetCursorPos(cursorPos); } +} - if (!_isOnOverlay && !_isUnderCapture) { +void CursorManager::_StartCapture(POINT& cursorPos) noexcept { + if (_isUnderCapture) { return; } - // 根据当前光标位置的四个方向有无屏幕来确定应该在哪些方向限制光标,但这无法 - // 处理屏幕之间存在间隙的情况。解决办法是 _StopCapture 只在目标位置存在屏幕时才取消捕获, - // 当光标试图移动到间隙中时将被挡住。如果光标的速度足以跨越间隙,则它依然可以在屏幕间移动。 - ::GetCursorPos(&cursorPos); - POINT hostPos = _isOnOverlay ? cursorPos : SrcToHost(cursorPos, true); + const Renderer& renderer = ScalingWindow::Get().Renderer(); + const RECT& srcRect = renderer.SrcRect(); + const RECT& destRect = renderer.DestRect(); + + // 在以下情况下进入捕获状态: + // 1. 当前未捕获 + // 2. 光标进入全屏区域 + // + // 进入捕获状态时: + // 1. 调整光标速度,全局隐藏光标 + // 2. 将光标移到源窗口的对应位置 + // + // 在有黑边的情况下自动将光标调整到画面内 - RECT clips{ LONG_MIN, LONG_MIN, LONG_MAX, LONG_MAX }; + SIZE srcFrameSize = Win32Utils::GetSizeOfRect(srcRect); + SIZE outputSize = Win32Utils::GetSizeOfRect(destRect); - // left - RECT rect{ LONG_MIN, hostPos.y, hostRect.left, hostPos.y + 1 }; - if (!MonitorFromRect(&rect, MONITOR_DEFAULTTONULL)) { - clips.left = _isOnOverlay ? outputRect.left + hostRect.left : srcFrameRect.left; + if (ScalingWindow::Get().Options().IsAdjustCursorSpeed()) { + _AdjustCursorSpeed(); } - // top - rect = { hostPos.x, LONG_MIN, hostPos.x + 1,hostRect.top }; - if (!MonitorFromRect(&rect, MONITOR_DEFAULTTONULL)) { - clips.top = _isOnOverlay ? outputRect.top + hostRect.top : srcFrameRect.top; - } + // 移动光标位置 - // right - rect = { hostRect.right, hostPos.y, LONG_MAX, hostPos.y + 1 }; - if (!MonitorFromRect(&rect, MONITOR_DEFAULTTONULL)) { - clips.right = _isOnOverlay ? outputRect.right + hostRect.left : srcFrameRect.right; - } + // 跳过黑边 + cursorPos.x = std::clamp(cursorPos.x, destRect.left, destRect.right - 1); + cursorPos.y = std::clamp(cursorPos.y, destRect.top, destRect.bottom - 1); - // bottom - rect = { hostPos.x, hostRect.bottom, hostPos.x + 1, LONG_MAX }; - if (!MonitorFromRect(&rect, MONITOR_DEFAULTTONULL)) { - clips.bottom = _isOnOverlay ? outputRect.bottom + hostRect.top : srcFrameRect.bottom; + cursorPos = ScalingToSrc(cursorPos); + + _isUnderCapture = true; +} + +bool CursorManager::_StopCapture(POINT& cursorPos, bool onDestroy) noexcept { + if (!_isUnderCapture) { + return true; } - if (clips != _curClips) { - _curClips = clips; - ClipCursor(&clips); + // 在以下情况下离开捕获状态: + // 1. 当前处于捕获状态 + // 2. 光标离开源窗口客户区 + // 3. 目标位置存在屏幕 + // + // 离开捕获状态时 + // 1. 还原光标速度,全局显示光标 + // 2. 将光标移到全屏窗口外的对应位置 + // + // 在有黑边的情况下自动将光标调整到全屏窗口外 + + POINT newCursorPos = SrcToScaling(cursorPos); + + if (onDestroy || MonitorFromPoint(newCursorPos, MONITOR_DEFAULTTONULL)) { + cursorPos = newCursorPos; + + if (ScalingWindow::Get().Options().IsAdjustCursorSpeed()) { + SystemParametersInfo(SPI_SETMOUSESPEED, 0, (PVOID)(intptr_t)_originCursorSpeed, 0); + } + + _isUnderCapture = false; + return true; + } else { + // 目标位置不存在屏幕,则将光标限制在源窗口内 + const RECT& srcRect = ScalingWindow::Get().Renderer().SrcRect(); + + cursorPos.x = std::clamp(cursorPos.x, srcRect.left, srcRect.right - 1); + cursorPos.y = std::clamp(cursorPos.y, srcRect.top, srcRect.bottom - 1); + + return false; } } diff --git a/src/Magpie.Core/CursorManager.h b/src/Magpie.Core/CursorManager.h index e83c08001..d5f16fdfe 100644 --- a/src/Magpie.Core/CursorManager.h +++ b/src/Magpie.Core/CursorManager.h @@ -1,5 +1,4 @@ #pragma once -#include namespace Magpie::Core { @@ -9,99 +8,61 @@ class CursorManager { CursorManager(const CursorManager&) = delete; CursorManager(CursorManager&&) = delete; - ~CursorManager(); + ~CursorManager() noexcept; - bool Initialize(); + bool Initialize() noexcept; - void OnBeginFrame(); + void Update() noexcept; - bool HasCursor() const { - return !!_curCursor; + HCURSOR Cursor() const noexcept { + return _hCursor; } - const POINT* GetCursorPos() const { - return _curCursor ? &_curCursorPos : nullptr; + // 缩放窗口局部坐标 + POINT CursorPos() const noexcept { + return _cursorPos; } - struct CursorInfo { - POINT hotSpot{}; - SIZE size{}; - }; - const CursorInfo* GetCursorInfo() const { - return _curCursor ? _curCursorInfo : nullptr; - } - - enum class CursorType { - // 彩色光标,此时纹理中 RGB 通道已预乘 A 通道(premultiplied alpha),A 通道已预先取反 - // 这是为了减少着色器的计算量以及确保(可能进行的)双线性差值的准确性 - // 计算公式:FinalColor = ScreenColor * CursorColor.a + CursorColor.rgb - Color = 0, - // 彩色掩码光标,此时 A 通道可能为 0 或 255 - // 为 0 时表示 RGB 通道取代屏幕颜色,为 255 时表示 RGB 通道和屏幕颜色进行异或操作 - MaskedColor, - // 单色光标,此时 R 通道为 AND 掩码,G 通道为 XOR 掩码,其他通道不使用 - // RG 通道的值只能是 0 或 255 - Monochrome - }; - bool GetCursorTexture(ID3D11Texture2D** texture, CursorManager::CursorType& cursorType); - - void OnCursorCapturedOnOverlay(); - - void OnCursorReleasedOnOverlay(); - - void OnCursorHoverOverlay(); - - void OnCursorLeaveOverlay(); - - bool IsCursorCapturedOnOverlay() const noexcept { - return _isCapturedOnOverlay; + bool IsCursorCapturedOnForeground() const noexcept { + return _isCapturedOnForeground; } bool IsCursorOnOverlay() const noexcept { return _isOnOverlay; } + void IsCursorOnOverlay(bool value) noexcept; - void Show() { - _isShowCursor = true; - } - - void Hide() { - _isShowCursor = false; + bool IsCursorCapturedOnOverlay() const noexcept { + return _isCapturedOnOverlay; } + void IsCursorCapturedOnOverlay(bool value) noexcept; private: - void _StartCapture(POINT cursorPos); + void _ShowSystemCursor(bool show, bool onDestory = false); - void _StopCapture(POINT cursorPos, bool onDestroy = false); + void _AdjustCursorSpeed() noexcept; - bool _ResolveCursor(HCURSOR hCursor, bool resolveTexture); + void _UpdateCursorClip() noexcept; - void _AdjustCursorSpeed(); + void _StartCapture(POINT& cursorPos) noexcept; - void _UpdateCursorClip(); + bool _StopCapture(POINT& cursorPos, bool onDestroy = false) noexcept; - uint32_t _handlerId = 0; - bool _isShowCursor = true; - - bool _isUnderCapture = false; - RECT _curClips{}; + HCURSOR _hCursor = NULL; + POINT _cursorPos { std::numeric_limits::max(),std::numeric_limits::max() }; - bool _isCapturedOnOverlay = false; - bool _isOnOverlay = false; + int _originCursorSpeed = 0; - INT _cursorSpeed = 0; + bool _isUnderCapture = false; + // 当缩放后的光标位置在缩放窗口上且没有被其他窗口挡住时应绘制光标 + bool _shouldDrawCursor = false; - // 当前帧的光标,光标不可见则为 NULL - HCURSOR _curCursor = NULL; - POINT _curCursorPos{}; + bool _isCapturedOnForeground = false; - struct _CursorInfo : CursorInfo { - winrt::com_ptr texture = nullptr; - CursorType type = CursorType::Color; - }; - _CursorInfo* _curCursorInfo = nullptr; + bool _isOnOverlay = false; + bool _isCapturedOnOverlay = false; - phmap::flat_hash_map _cursorInfos; + bool _isSystemCursorShown = true; }; } diff --git a/src/Magpie.Core/DesktopDuplicationFrameSource.cpp b/src/Magpie.Core/DesktopDuplicationFrameSource.cpp index 888285e45..9dbf00863 100644 --- a/src/Magpie.Core/DesktopDuplicationFrameSource.cpp +++ b/src/Magpie.Core/DesktopDuplicationFrameSource.cpp @@ -1,21 +1,21 @@ #include "pch.h" #include "DesktopDuplicationFrameSource.h" -#include "MagApp.h" -#include "DeviceResources.h" #include "Logger.h" #include "Win32Utils.h" +#include "ScalingWindow.h" +#include "DeviceResources.h" +#include "DirectXHelper.h" #include "SmallVector.h" - namespace Magpie::Core { -static winrt::com_ptr FindMonitor(IDXGIAdapter1* adapter, HMONITOR hMonitor) { +static winrt::com_ptr FindMonitor(IDXGIAdapter1* adapter, HMONITOR hMonitor) noexcept { winrt::com_ptr output; for (UINT adapterIndex = 0; SUCCEEDED(adapter->EnumOutputs(adapterIndex, output.put())); ++adapterIndex - ) { + ) { DXGI_OUTPUT_DESC desc; HRESULT hr = output->GetDesc(&desc); if (FAILED(hr)) { @@ -37,55 +37,14 @@ static winrt::com_ptr FindMonitor(IDXGIAdapter1* adapter, HMONITOR return nullptr; } -// 根据显示器句柄查找 IDXGIOutput1 -static winrt::com_ptr GetDXGIOutput(HMONITOR hMonitor) { - auto& dr = MagApp::Get().GetDeviceResources(); - IDXGIAdapter1* curAdapter = dr.GetGraphicsAdapter(); - - // 首先在当前使用的图形适配器上查询显示器 - winrt::com_ptr output = FindMonitor(curAdapter, hMonitor); - if (output) { - return output; - } - - // 未找到则在所有图形适配器上查找 - winrt::com_ptr adapter; - IDXGIFactory5* dxgiFactory = dr.GetDXGIFactory(); - for (UINT adapterIndex = 0; - SUCCEEDED(dxgiFactory->EnumAdapters1(adapterIndex, adapter.put())); - ++adapterIndex - ) { - if (adapter.get() == curAdapter) { - continue; - } - - output = FindMonitor(adapter.get(), hMonitor); - if (output) { - return output; - } - } - - return nullptr; -} - -DesktopDuplicationFrameSource::~DesktopDuplicationFrameSource() { - _exiting.store(true, std::memory_order_release); - WaitForSingleObject(_hDDPThread, 1000); -} - -bool DesktopDuplicationFrameSource::Initialize() { - if (!FrameSourceBase::Initialize()) { - Logger::Get().Error("初始化 FrameSourceBase 失败"); - return false; - } - +bool DesktopDuplicationFrameSource::_Initialize() noexcept { // WDA_EXCLUDEFROMCAPTURE 只在 Win10 20H1 及更新版本中可用 if (!Win32Utils::GetOSVersion().Is20H1OrNewer()) { Logger::Get().Error("当前操作系统无法使用 Desktop Duplication"); return false; } - HWND hwndSrc = MagApp::Get().GetHwndSrc(); + const HWND hwndSrc = ScalingWindow::Get().HwndSrc(); HMONITOR hMonitor = MonitorFromWindow(hwndSrc, MONITOR_DEFAULTTONEAREST); if (!hMonitor) { @@ -93,298 +52,168 @@ bool DesktopDuplicationFrameSource::Initialize() { return false; } - MONITORINFO mi{}; - mi.cbSize = sizeof(mi); - if (!GetMonitorInfo(hMonitor, &mi)) { - Logger::Get().Win32Error("GetMonitorInfo 失败"); - return false; - } + { + MONITORINFO mi{ .cbSize = sizeof(mi) }; + if (!GetMonitorInfo(hMonitor, &mi)) { + Logger::Get().Win32Error("GetMonitorInfo 失败"); + return false; + } - if (!_CenterWindowIfNecessary(hwndSrc, mi.rcWork)) { - Logger::Get().Error("居中源窗口失败"); - return false; - } + // 最大化的窗口无需调整位置 + if (Win32Utils::GetWindowShowCmd(hwndSrc) != SW_SHOWMAXIMIZED) { + if (!_CenterWindowIfNecessary(hwndSrc, mi.rcWork)) { + Logger::Get().Error("居中源窗口失败"); + return false; + } + } - if (!_UpdateSrcFrameRect()) { - Logger::Get().Error("_UpdateSrcFrameRect 失败"); - return false; - } + if (!_CalcSrcRect()) { + Logger::Get().Error("_CalcSrcRect 失败"); + return false; + } - auto& dr = MagApp::Get().GetDeviceResources(); + // 计算源窗口客户区在该屏幕上的位置,用于计算新帧是否有更新 + _srcClientInMonitor = { + _srcRect.left - mi.rcMonitor.left, + _srcRect.top - mi.rcMonitor.top, + _srcRect.right - mi.rcMonitor.left, + _srcRect.bottom - mi.rcMonitor.top + }; + } - _output = dr.CreateTexture2D( + _frameInMonitor = { + (UINT)_srcClientInMonitor.left, + (UINT)_srcClientInMonitor.top, + 0, + (UINT)_srcClientInMonitor.right, + (UINT)_srcClientInMonitor.bottom, + 1 + }; + + _output = DirectXHelper::CreateTexture2D( + _deviceResources->GetD3DDevice(), DXGI_FORMAT_B8G8R8A8_UNORM, - _srcFrameRect.right - _srcFrameRect.left, - _srcFrameRect.bottom - _srcFrameRect.top, + _srcRect.right - _srcRect.left, + _srcRect.bottom - _srcRect.top, D3D11_BIND_SHADER_RESOURCE ); if (!_output) { - Logger::Get().Error("创建 Texture2D 失败"); - return false; - } - - // 创建共享纹理 - _sharedTex = dr.CreateTexture2D( - DXGI_FORMAT_B8G8R8A8_UNORM, - _srcFrameRect.right - _srcFrameRect.left, - _srcFrameRect.bottom - _srcFrameRect.top, - D3D11_BIND_SHADER_RESOURCE, - D3D11_USAGE_DEFAULT, - D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX - ); - if (!_sharedTex) { - Logger::Get().Error("创建 Texture2D 失败"); - return false; - } - - _sharedTexMutex = _sharedTex.try_as(); - if (!_sharedTexMutex) { - Logger::Get().Error("检索 IDXGIKeyedMutex 失败"); - return false; - } - - winrt::com_ptr sharedDxgiRes = _sharedTex.try_as(); - if (!sharedDxgiRes) { - Logger::Get().Error("检索 IDXGIResource 失败"); + Logger::Get().Error("CreateTexture2D 失败"); return false; } - HANDLE hSharedTex = NULL; - HRESULT hr = sharedDxgiRes->GetSharedHandle(&hSharedTex); - if (FAILED(hr)) { - Logger::Get().Error("GetSharedHandle 失败"); - return false; - } - - if (!_InitializeDdpD3D(hSharedTex)) { - Logger::Get().Error("初始化 D3D 失败"); - return false; - } - - winrt::com_ptr output = GetDXGIOutput(hMonitor); + winrt::com_ptr output = FindMonitor( + _deviceResources->GetGraphicsAdapter(), hMonitor); if (!output) { Logger::Get().Error("无法找到 IDXGIOutput"); return false; } - hr = output->DuplicateOutput(_ddpD3dDevice.get(), _outputDup.put()); + HRESULT hr = output->DuplicateOutput(_deviceResources->GetD3DDevice(), _outputDup.put()); if (FAILED(hr)) { Logger::Get().ComError("DuplicateOutput 失败", hr); return false; } - // 计算源窗口客户区在该屏幕上的位置,用于计算新帧是否有更新 - _srcClientInMonitor = { - _srcFrameRect.left - mi.rcMonitor.left, - _srcFrameRect.top - mi.rcMonitor.top, - _srcFrameRect.right - mi.rcMonitor.left, - _srcFrameRect.bottom - mi.rcMonitor.top - }; - - _frameInMonitor = { - (UINT)_srcClientInMonitor.left, - (UINT)_srcClientInMonitor.top, - 0, - (UINT)_srcClientInMonitor.right, - (UINT)_srcClientInMonitor.bottom, - 1 - }; - // 使全屏窗口无法被捕获到 - if (!SetWindowDisplayAffinity(MagApp::Get().GetHwndHost(), WDA_EXCLUDEFROMCAPTURE)) { + if (!SetWindowDisplayAffinity(ScalingWindow::Get().Handle(), WDA_EXCLUDEFROMCAPTURE)) { Logger::Get().Win32Error("SetWindowDisplayAffinity 失败"); return false; } - - _hDDPThread = CreateThread(nullptr, 0, _DDPThreadProc, this, 0, nullptr); - if (!_hDDPThread) { - return false; - } - Logger::Get().Info("DesktopDuplicationFrameSource 初始化完成"); return true; } +FrameSourceBase::UpdateState DesktopDuplicationFrameSource::_Update() noexcept { + ID3D11DeviceContext4* d3dDC = _deviceResources->GetD3DDC(); -FrameSourceBase::UpdateState DesktopDuplicationFrameSource::Update() { - const UINT newFrameState = _newFrameState.load(std::memory_order_acquire); - if (newFrameState == 2) { - // 第一帧之前不渲染 - return UpdateState::Waiting; - } else if (newFrameState == 0) { - return UpdateState::NoUpdate; + if (_isFrameAcquired) { + // 根据文档,释放后立刻获取下一帧可以提高性能 + _outputDup->ReleaseFrame(); + _isFrameAcquired = false; } - // 不必等待,当 newFrameState 变化时捕获线程已将锁释放 - HRESULT hr = _sharedTexMutex->AcquireSync(1, 0); - if (hr == static_cast(WAIT_TIMEOUT)) { + DXGI_OUTDUPL_FRAME_INFO info; + winrt::com_ptr dxgiRes; + // 等待 1ms + HRESULT hr = _outputDup->AcquireNextFrame(1, &info, dxgiRes.put()); + if (hr == DXGI_ERROR_WAIT_TIMEOUT) { return UpdateState::Waiting; } if (FAILED(hr)) { - Logger::Get().ComError("AcquireSync 失败", hr); + Logger::Get().ComError("AcquireNextFrame 失败", hr); return UpdateState::Error; } - // 不需要对捕获线程可见 - _newFrameState.store(0, std::memory_order_relaxed); - - MagApp::Get().GetDeviceResources().GetD3DDC()->CopyResource(_output.get(), _sharedTex.get()); - - _sharedTexMutex->ReleaseSync(0); - - return UpdateState::NewFrame; -} - -bool DesktopDuplicationFrameSource::_InitializeDdpD3D(HANDLE hSharedTex) { - UINT createDeviceFlags = D3D11_CREATE_DEVICE_BGRA_SUPPORT; - if (DeviceResources::IsDebugLayersAvailable()) { - // 在 DEBUG 配置启用调试层 - createDeviceFlags |= D3D11_CREATE_DEVICE_DEBUG; - } - - D3D_FEATURE_LEVEL featureLevels[] = { - D3D_FEATURE_LEVEL_11_1, - D3D_FEATURE_LEVEL_11_0 - }; - UINT nFeatureLevels = ARRAYSIZE(featureLevels); - - // 使用和 Renderer 相同的图像适配器以避免 GPU 间的纹理拷贝 - HRESULT hr = D3D11CreateDevice( - MagApp::Get().GetDeviceResources().GetGraphicsAdapter(), - D3D_DRIVER_TYPE_UNKNOWN, - nullptr, - createDeviceFlags, - featureLevels, - nFeatureLevels, - D3D11_SDK_VERSION, - _ddpD3dDevice.put(), - nullptr, - _ddpD3dDC.put() - ); - - if (FAILED(hr)) { - Logger::Get().ComError("D3D11CreateDevice 失败", hr); - return false; - } - - // 获取共享纹理 - hr = _ddpD3dDevice->OpenSharedResource(hSharedTex, IID_PPV_ARGS(_ddpSharedTex.put())); - if (FAILED(hr)) { - Logger::Get().ComError("OpenSharedResource 失败", hr); - return false; - } + _isFrameAcquired = true; - _ddpSharedTexMutex = _ddpSharedTex.try_as(); - if (!_ddpSharedTexMutex) { - Logger::Get().Error("检索 IDXGIKeyedMutex 失败"); - return false; - } - - return true; -} + bool noUpdate = true; -DWORD WINAPI DesktopDuplicationFrameSource::_DDPThreadProc(LPVOID lpThreadParameter) { - DesktopDuplicationFrameSource& that = *(DesktopDuplicationFrameSource*)lpThreadParameter; - - DXGI_OUTDUPL_FRAME_INFO info{}; - winrt::com_ptr dxgiRes; - SmallVector dupMetaData; - - while (!that._exiting.load(std::memory_order_acquire)) { - if (dxgiRes) { - that._outputDup->ReleaseFrame(); - } - HRESULT hr = that._outputDup->AcquireNextFrame(500, &info, dxgiRes.put()); - if (hr == DXGI_ERROR_WAIT_TIMEOUT) { - continue; + // 检索 move rects 和 dirty rects + // 这些区域如果和窗口客户区有重叠则表明画面有变化 + if (info.TotalMetadataBufferSize) { + if (info.TotalMetadataBufferSize > _dupMetaData.size()) { + _dupMetaData.resize(info.TotalMetadataBufferSize); } + uint32_t bufSize = info.TotalMetadataBufferSize; + + // Move rects + hr = _outputDup->GetFrameMoveRects( + bufSize, (DXGI_OUTDUPL_MOVE_RECT*)_dupMetaData.data(), &bufSize); if (FAILED(hr)) { - Logger::Get().ComError("AcquireNextFrame 失败", hr); - continue; + Logger::Get().ComError("GetFrameMoveRects 失败", hr); + return UpdateState::Error; } - bool noUpdate = true; - - // 检索 move rects 和 dirty rects - // 这些区域如果和窗口客户区有重叠则表明画面有变化 - if (info.TotalMetadataBufferSize) { - if (info.TotalMetadataBufferSize > dupMetaData.size()) { - dupMetaData.resize(info.TotalMetadataBufferSize); + uint32_t nRect = bufSize / sizeof(DXGI_OUTDUPL_MOVE_RECT); + for (uint32_t i = 0; i < nRect; ++i) { + const DXGI_OUTDUPL_MOVE_RECT& rect = + ((DXGI_OUTDUPL_MOVE_RECT*)_dupMetaData.data())[i]; + if (Win32Utils::CheckOverlap(_srcClientInMonitor, rect.DestinationRect)) { + noUpdate = false; + break; } + } - UINT bufSize = info.TotalMetadataBufferSize; + if (noUpdate) { + bufSize = info.TotalMetadataBufferSize; - // move rects - hr = that._outputDup->GetFrameMoveRects(bufSize, (DXGI_OUTDUPL_MOVE_RECT*)dupMetaData.data(), &bufSize); + // Dirty rects + hr = _outputDup->GetFrameDirtyRects( + bufSize, (RECT*)_dupMetaData.data(), &bufSize); if (FAILED(hr)) { - Logger::Get().ComError("GetFrameMoveRects 失败", hr); - continue; + Logger::Get().ComError("GetFrameDirtyRects 失败", hr); + return UpdateState::Error; } - UINT nRect = bufSize / sizeof(DXGI_OUTDUPL_MOVE_RECT); - for (UINT i = 0; i < nRect; ++i) { - const DXGI_OUTDUPL_MOVE_RECT& rect = ((DXGI_OUTDUPL_MOVE_RECT*)dupMetaData.data())[i]; - if (Win32Utils::CheckOverlap(that._srcClientInMonitor, rect.DestinationRect)) { + nRect = bufSize / sizeof(RECT); + for (uint32_t i = 0; i < nRect; ++i) { + const RECT& rect = ((RECT*)_dupMetaData.data())[i]; + if (Win32Utils::CheckOverlap(_srcClientInMonitor, rect)) { noUpdate = false; break; } } - - if (noUpdate) { - bufSize = info.TotalMetadataBufferSize; - - // dirty rects - hr = that._outputDup->GetFrameDirtyRects(bufSize, (RECT*)dupMetaData.data(), &bufSize); - if (FAILED(hr)) { - Logger::Get().ComError("GetFrameDirtyRects 失败", hr); - continue; - } - - nRect = bufSize / sizeof(RECT); - for (UINT i = 0; i < nRect; ++i) { - const RECT& rect = ((RECT*)dupMetaData.data())[i]; - if (Win32Utils::CheckOverlap(that._srcClientInMonitor, rect)) { - noUpdate = false; - break; - } - } - } - } - - if (noUpdate) { - continue; - } - - winrt::com_ptr d3dRes = dxgiRes.try_as(); - if (!d3dRes) { - Logger::Get().Error("从 IDXGIResource 检索 ID3D11Resource 失败"); - continue; - } - - hr = that._ddpSharedTexMutex->AcquireSync(0, 100); - while (hr == static_cast(WAIT_TIMEOUT)) { - if (that._exiting.load(std::memory_order_acquire)) { - return 0; - } - - hr = that._ddpSharedTexMutex->AcquireSync(0, 100); } + } - if (FAILED(hr)) { - Logger::Get().ComError("AcquireSync 失败", hr); - continue; - } - - - that._ddpD3dDC->CopySubresourceRegion(that._ddpSharedTex.get(), 0, 0, 0, 0, d3dRes.get(), 0, &that._frameInMonitor); - that._ddpSharedTexMutex->ReleaseSync(1); - that._newFrameState.store(1, std::memory_order_release); + if (noUpdate) { + return UpdateState::Waiting; + } + + winrt::com_ptr frameTexture = dxgiRes.try_as(); + if (!frameTexture) { + Logger::Get().Error("从 IDXGIResource 检索 ID3D11Resource 失败"); + return UpdateState::Error; } - return 0; + d3dDC->CopySubresourceRegion( + _output.get(), 0, 0, 0, 0, frameTexture.get(), 0, &_frameInMonitor); + + return UpdateState::NewFrame; } } diff --git a/src/Magpie.Core/DesktopDuplicationFrameSource.h b/src/Magpie.Core/DesktopDuplicationFrameSource.h index af1d370e9..d230c9128 100644 --- a/src/Magpie.Core/DesktopDuplicationFrameSource.h +++ b/src/Magpie.Core/DesktopDuplicationFrameSource.h @@ -1,63 +1,46 @@ #pragma once #include "FrameSourceBase.h" +#include "Win32Utils.h" +#include "SmallVector.h" namespace Magpie::Core { -// 使用 Desktop Duplication API 捕获窗口 -// 在单独的线程中接收屏幕帧以避免丢帧 -class DesktopDuplicationFrameSource : public FrameSourceBase { +class DesktopDuplicationFrameSource final : public FrameSourceBase { public: - DesktopDuplicationFrameSource() {}; - virtual ~DesktopDuplicationFrameSource(); - - bool Initialize() override; - - UpdateState Update() override; - - bool IsScreenCapture() override { + bool IsScreenCapture() const noexcept override { return true; } - const char* GetName() const noexcept override { + FrameSourceWaitType WaitType() const noexcept override { + return WaitForFrame; + } + + const char* Name() const noexcept override { return "Desktop Duplication"; } protected: - bool _HasRoundCornerInWin11() override { + bool _HasRoundCornerInWin11() noexcept override { return true; } - bool _CanCaptureTitleBar() override { + bool _CanCaptureTitleBar() noexcept override { return true; } -private: - bool _InitializeDdpD3D(HANDLE hSharedTex); + bool _Initialize() noexcept override; - static DWORD WINAPI _DDPThreadProc(LPVOID lpThreadParameter); + UpdateState _Update() noexcept override; +private: winrt::com_ptr _outputDup; - HANDLE _hDDPThread = NULL; - std::atomic _exiting = false; - // 0: 等待新帧 - // 1: 新帧到达 - // 2: 等待第一帧 - std::atomic _newFrameState = 2; - - // DDP 线程使用的 D3D 设备 - winrt::com_ptr _ddpD3dDevice; - winrt::com_ptr _ddpD3dDC; - - // 这些均指向同一个纹理 - // 用于在 D3D Device 间同步对该纹理的访问 - winrt::com_ptr _sharedTex; - winrt::com_ptr _sharedTexMutex; - winrt::com_ptr _ddpSharedTex; - winrt::com_ptr _ddpSharedTexMutex; + SmallVector _dupMetaData; RECT _srcClientInMonitor{}; D3D11_BOX _frameInMonitor{}; + + bool _isFrameAcquired = false; }; } diff --git a/src/Magpie.Core/DeviceResources.cpp b/src/Magpie.Core/DeviceResources.cpp index 8a4ee82c6..2a6db8a66 100644 --- a/src/Magpie.Core/DeviceResources.cpp +++ b/src/Magpie.Core/DeviceResources.cpp @@ -1,12 +1,14 @@ #include "pch.h" #include "DeviceResources.h" -#include "MagApp.h" -#include "StrUtils.h" +#include "ScalingOptions.h" #include "Logger.h" +#include "StrUtils.h" +#include "DirectXHelper.h" +#include "ScalingWindow.h" namespace Magpie::Core { -bool DeviceResources::Initialize() { +bool DeviceResources::Initialize() noexcept { #ifdef _DEBUG UINT flag = DXGI_CREATE_FACTORY_DEBUG; #else @@ -21,115 +23,51 @@ bool DeviceResources::Initialize() { // 检查可变帧率支持 BOOL supportTearing = FALSE; - hr = _dxgiFactory->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &supportTearing, sizeof(supportTearing)); if (FAILED(hr)) { Logger::Get().ComWarn("CheckFeatureSupport 失败", hr); } - _supportTearing = !!supportTearing; + _isSupportTearing = supportTearing; Logger::Get().Info(fmt::format("可变刷新率支持:{}", supportTearing ? "是" : "否")); - if (!MagApp::Get().GetOptions().IsVSync() && !supportTearing) { - Logger::Get().Error("当前显示器不支持可变刷新率"); - //MagApp::Get().SetErrorMsg(ErrorMessages::VSYNC_OFF_NOT_SUPPORTED); - return false; - } - - if(!_ObtainGraphicsAdapterAndD3DDevice()) { + if (!_ObtainAdapterAndDevice(ScalingWindow::Get().Options().graphicsCard)) { Logger::Get().Error("找不到可用的图形适配器"); return false; } - if (!_CreateSwapChain()) { - Logger::Get().Error("_CreateSwapChain 失败"); - return false; - } - return true; } -winrt::com_ptr DeviceResources::CreateTexture2D( - DXGI_FORMAT format, - UINT width, - UINT height, - UINT bindFlags, - D3D11_USAGE usage, - UINT miscFlags, - const D3D11_SUBRESOURCE_DATA* pInitialData -) { - D3D11_TEXTURE2D_DESC desc{}; - desc.Format = format; - desc.Width = width; - desc.Height = height; - desc.MipLevels = 1; - desc.ArraySize = 1; - desc.SampleDesc.Count = 1; - desc.SampleDesc.Quality = 0; - desc.BindFlags = bindFlags; - desc.Usage = usage; - desc.MiscFlags = miscFlags; - - winrt::com_ptr result; - HRESULT hr = _d3dDevice->CreateTexture2D(&desc, pInitialData, result.put()); - if (FAILED(hr)) { - Logger::Get().ComError("CreateTexture2D 失败", hr); - return nullptr; - } - - return result; -} - -void DeviceResources::BeginFrame() { - WaitForSingleObjectEx(_frameLatencyWaitableObject.get(), 1000, TRUE); - _d3dDC->ClearState(); -} - -void DeviceResources::EndFrame() { - if (MagApp::Get().GetOptions().IsVSync()) { - _swapChain->Present(1, 0); - } else { - _swapChain->Present(0, DXGI_PRESENT_ALLOW_TEARING); +ID3D11SamplerState* DeviceResources::GetSampler(D3D11_FILTER filterMode, D3D11_TEXTURE_ADDRESS_MODE addressMode) noexcept { + auto key = std::make_pair(filterMode, addressMode); + auto it = _samMap.find(key); + if (it != _samMap.end()) { + return it->second.get(); } -} - -static void LogAdapter(const DXGI_ADAPTER_DESC1& adapterDesc) { - Logger::Get().Info(fmt::format("当前图形适配器:\n\tVendorId:{:#x}\n\tDeviceId:{:#x}\n\t描述:{}", - adapterDesc.VendorId, adapterDesc.DeviceId, StrUtils::UTF16ToUTF8(adapterDesc.Description))); -} - -bool DeviceResources::IsDebugLayersAvailable() noexcept { -#ifdef _DEBUG - static std::optional result = std::nullopt; - if (!result.has_value()) { - HRESULT hr = D3D11CreateDevice( - nullptr, - D3D_DRIVER_TYPE_NULL, // There is no need to create a real hardware device. - nullptr, - D3D11_CREATE_DEVICE_DEBUG, // Check for the SDK layers. - nullptr, // Any feature level will do. - 0, - D3D11_SDK_VERSION, - nullptr, // No need to keep the D3D device reference. - nullptr, // No need to know the feature level. - nullptr // No need to keep the D3D device context reference. - ); + winrt::com_ptr sam; - result = SUCCEEDED(hr); + D3D11_SAMPLER_DESC desc{}; + desc.Filter = filterMode; + desc.AddressU = addressMode; + desc.AddressV = addressMode; + desc.AddressW = addressMode; + desc.ComparisonFunc = D3D11_COMPARISON_NEVER; + desc.MinLOD = 0; + desc.MaxLOD = 0; + HRESULT hr = _d3dDevice->CreateSamplerState(&desc, sam.put()); + if (FAILED(hr)) { + Logger::Get().ComError("创建 ID3D11SamplerState 出错", hr); + return nullptr; } - return result.value_or(false); -#else - // Relaese 配置不使用调试层 - return false; -#endif + return _samMap.emplace(key, std::move(sam)).first->second.get(); } -bool DeviceResources::_ObtainGraphicsAdapterAndD3DDevice() noexcept { +bool DeviceResources::_ObtainAdapterAndDevice(int adapterIdx) noexcept { winrt::com_ptr adapter; - int adapterIdx = MagApp::Get().GetOptions().graphicsCard; if (adapterIdx >= 0) { HRESULT hr = _dxgiFactory->EnumAdapters1(adapterIdx, adapter.put()); if (SUCCEEDED(hr)) { @@ -138,8 +76,7 @@ bool DeviceResources::_ObtainGraphicsAdapterAndD3DDevice() noexcept { if (SUCCEEDED(hr)) { if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { Logger::Get().Warn("用户指定的显示卡为 WARP,已忽略"); - } else if (_TryCreateD3DDevice(adapter.get())) { - LogAdapter(desc); + } else if (_TryCreateD3DDevice(adapter)) { return true; } else { Logger::Get().Warn("用户指定的显示卡不支持 FL 11"); @@ -168,8 +105,7 @@ bool DeviceResources::_ObtainGraphicsAdapterAndD3DDevice() noexcept { continue; } - if (_TryCreateD3DDevice(adapter.get())) { - LogAdapter(desc); + if (_TryCreateD3DDevice(adapter)) { return true; } } @@ -182,21 +118,15 @@ bool DeviceResources::_ObtainGraphicsAdapterAndD3DDevice() noexcept { return false; } - if (!_TryCreateD3DDevice(adapter.get())) { + if (!_TryCreateD3DDevice(adapter)) { Logger::Get().ComError("创建 WARP 设备失败", hr); return false; } - DXGI_ADAPTER_DESC1 desc; - hr = adapter->GetDesc1(&desc); - if (SUCCEEDED(hr)) { - LogAdapter(desc); - } - return true; } -bool DeviceResources::_TryCreateD3DDevice(IDXGIAdapter1* adapter) noexcept { +bool DeviceResources::_TryCreateD3DDevice(const winrt::com_ptr& adapter) noexcept { D3D_FEATURE_LEVEL featureLevels[] = { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0 @@ -204,15 +134,16 @@ bool DeviceResources::_TryCreateD3DDevice(IDXGIAdapter1* adapter) noexcept { UINT nFeatureLevels = ARRAYSIZE(featureLevels); UINT createDeviceFlags = D3D11_CREATE_DEVICE_BGRA_SUPPORT; - if (IsDebugLayersAvailable()) { + if (DirectXHelper::IsDebugLayersAvailable()) { // 在 DEBUG 配置启用调试层 createDeviceFlags |= D3D11_CREATE_DEVICE_DEBUG; } winrt::com_ptr d3dDevice; winrt::com_ptr d3dDC; + D3D_FEATURE_LEVEL featureLevel; HRESULT hr = D3D11CreateDevice( - adapter, + adapter.get(), D3D_DRIVER_TYPE_UNKNOWN, nullptr, createDeviceFlags, @@ -220,7 +151,7 @@ bool DeviceResources::_TryCreateD3DDevice(IDXGIAdapter1* adapter) noexcept { nFeatureLevels, D3D11_SDK_VERSION, d3dDevice.put(), - &_featureLevel, + &featureLevel, d3dDC.put() ); @@ -230,7 +161,7 @@ bool DeviceResources::_TryCreateD3DDevice(IDXGIAdapter1* adapter) noexcept { } std::string_view fl; - switch (_featureLevel) { + switch (featureLevel) { case D3D_FEATURE_LEVEL_11_1: fl = "11.1"; break; @@ -241,7 +172,7 @@ bool DeviceResources::_TryCreateD3DDevice(IDXGIAdapter1* adapter) noexcept { fl = "未知"; break; } - Logger::Get().Info(fmt::format("已创建 D3D Device\n\t功能级别:{}", fl)); + Logger::Get().Info(fmt::format("已创建 D3D 设备\n\t功能级别:{}", fl)); _d3dDevice = d3dDevice.try_as(); if (!_d3dDevice) { @@ -251,18 +182,12 @@ bool DeviceResources::_TryCreateD3DDevice(IDXGIAdapter1* adapter) noexcept { _d3dDC = d3dDC.try_as(); if (!_d3dDC) { - Logger::Get().Error("获取 ID3D11DeviceContext1 失败"); + Logger::Get().Error("获取 ID3D11DeviceContext4 失败"); return false; } - - _dxgiDevice = _d3dDevice.try_as(); - if (!_dxgiDevice) { - Logger::Get().Error("获取 IDXGIDevice 失败"); - return false; - } - - hr = adapter->QueryInterface(_graphicsAdapter.put()); - if (FAILED(hr)) { + + _graphicsAdapter = adapter.try_as(); + if (!_graphicsAdapter) { Logger::Get().ComError("获取 IDXGIAdapter4 失败", hr); return false; } @@ -270,187 +195,4 @@ bool DeviceResources::_TryCreateD3DDevice(IDXGIAdapter1* adapter) noexcept { return true; } -bool DeviceResources::_CreateSwapChain() { - const RECT& hostWndRect = MagApp::Get().GetHostWndRect(); - const MagOptions& options = MagApp::Get().GetOptions(); - - DXGI_SWAP_CHAIN_DESC1 sd = {}; - sd.Width = hostWndRect.right - hostWndRect.left; - sd.Height = hostWndRect.bottom - hostWndRect.top; - sd.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - sd.AlphaMode = DXGI_ALPHA_MODE_UNSPECIFIED; - sd.SampleDesc.Count = 1; - sd.SampleDesc.Quality = 0; - sd.Scaling = DXGI_SCALING_NONE; - sd.BufferUsage = DXGI_USAGE_UNORDERED_ACCESS | DXGI_USAGE_RENDER_TARGET_OUTPUT; - sd.BufferCount = (options.IsTripleBuffering() || !options.IsVSync()) ? 3 : 2; - // 渲染每帧之前都会清空后缓冲区,因此无需 DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL - sd.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; - // 只要显卡支持始终启用 DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING - sd.Flags = (_supportTearing ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0) - | DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; - - winrt::com_ptr dxgiSwapChain = nullptr; - HRESULT hr = _dxgiFactory->CreateSwapChainForHwnd( - _d3dDevice.get(), - MagApp::Get().GetHwndHost(), - &sd, - nullptr, - nullptr, - dxgiSwapChain.put() - ); - if (FAILED(hr)) { - Logger::Get().ComError("创建交换链失败", hr); - return false; - } - - _swapChain = dxgiSwapChain.try_as(); - if (!_swapChain) { - Logger::Get().Error("获取 IDXGISwapChain2 失败"); - return false; - } - - // 关闭低延迟模式或关闭垂直同步时将最大延迟设为 2 以使 CPU 和 GPU 并行执行 - _swapChain->SetMaximumFrameLatency(options.IsTripleBuffering() || !options.IsVSync() ? 2 : 1); - - _frameLatencyWaitableObject.reset(_swapChain->GetFrameLatencyWaitableObject()); - if (!_frameLatencyWaitableObject) { - Logger::Get().Error("GetFrameLatencyWaitableObject 失败"); - return false; - } - - hr = _dxgiFactory->MakeWindowAssociation(MagApp::Get().GetHwndHost(), DXGI_MWA_NO_ALT_ENTER); - if (FAILED(hr)) { - Logger::Get().ComError("MakeWindowAssociation 失败", hr); - } - - // 检查 Multiplane Overlay 和 Hardware Composition 支持 - BOOL supportMPO = FALSE; - BOOL supportHardwareComposition = FALSE; - winrt::com_ptr output; - hr = _swapChain->GetContainingOutput(output.put()); - if (FAILED(hr)) { - Logger::Get().ComError("获取 IDXGIOutput 失败", hr); - } else { - winrt::com_ptr output2 = output.try_as(); - if (!output2) { - Logger::Get().Info("获取 IDXGIOutput2 失败"); - } else { - supportMPO = output2->SupportsOverlays(); - } - - winrt::com_ptr output6 = output.try_as(); - if (!output6) { - Logger::Get().Info("获取 IDXGIOutput6 失败"); - } else { - UINT flags; - hr = output6->CheckHardwareCompositionSupport(&flags); - if (FAILED(hr)) { - Logger::Get().ComError("CheckHardwareCompositionSupport 失败", hr); - } else { - supportHardwareComposition = flags & DXGI_HARDWARE_COMPOSITION_SUPPORT_FLAG_WINDOWED; - } - } - } - - Logger::Get().Info(StrUtils::Concat("Hardware Composition 支持:", supportHardwareComposition ? "是" : "否")); - Logger::Get().Info(StrUtils::Concat("Multiplane Overlay 支持:", supportMPO ? "是" : "否")); - - hr = _swapChain->GetBuffer(0, IID_PPV_ARGS(_backBuffer.put())); - if (FAILED(hr)) { - Logger::Get().ComError("获取后缓冲区失败", hr); - return false; - } - - return true; -} - -bool DeviceResources::GetShaderResourceView(ID3D11Texture2D* texture, ID3D11ShaderResourceView** result) { - auto it = _srvMap.find(texture); - if (it != _srvMap.end()) { - *result = it->second.get(); - return true; - } - - winrt::com_ptr& r = _srvMap[texture]; - HRESULT hr = _d3dDevice->CreateShaderResourceView(texture, nullptr, r.put()); - if (FAILED(hr)) { - Logger::Get().ComError("CreateShaderResourceView 失败", hr); - return false; - } else { - *result = r.get(); - return true; - } -} - -bool DeviceResources::GetUnorderedAccessView(ID3D11Texture2D* texture, ID3D11UnorderedAccessView** result) { - auto it = _uavMap.find(texture); - if (it != _uavMap.end()) { - *result = it->second.get(); - return true; - } - - winrt::com_ptr& r = _uavMap[texture]; - - D3D11_UNORDERED_ACCESS_VIEW_DESC desc{}; - desc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; - desc.Texture2D.MipSlice = 0; - - HRESULT hr = _d3dDevice->CreateUnorderedAccessView(texture, &desc, r.put()); - if (FAILED(hr)) { - Logger::Get().ComError("CreateUnorderedAccessView 失败", hr); - return false; - } else { - *result = r.get(); - return true; - } -} - -bool DeviceResources::GetRenderTargetView(ID3D11Texture2D* texture, ID3D11RenderTargetView** result) { - auto it = _rtvMap.find(texture); - if (it != _rtvMap.end()) { - *result = it->second.get(); - return true; - } - - winrt::com_ptr& r = _rtvMap[texture]; - HRESULT hr = _d3dDevice->CreateRenderTargetView(texture, nullptr, r.put()); - if (FAILED(hr)) { - Logger::Get().ComError("CreateRenderTargetView 失败", hr); - return false; - } else { - *result = r.get(); - return true; - } -} - -bool DeviceResources::GetSampler(D3D11_FILTER filterMode, D3D11_TEXTURE_ADDRESS_MODE addressMode, ID3D11SamplerState** result) { - auto key = std::make_pair(filterMode, addressMode); - auto it = _samMap.find(key); - if (it != _samMap.end()) { - *result = it->second.get(); - return true; - } - - winrt::com_ptr sam; - - D3D11_SAMPLER_DESC desc{}; - desc.Filter = filterMode; - desc.AddressU = addressMode; - desc.AddressV = addressMode; - desc.AddressW = addressMode; - desc.ComparisonFunc = D3D11_COMPARISON_NEVER; - desc.MinLOD = 0; - desc.MaxLOD = 0; - HRESULT hr = _d3dDevice->CreateSamplerState(&desc, sam.put()); - if (FAILED(hr)) { - Logger::Get().ComError("创建 ID3D11SamplerState 出错", hr); - return false; - } - - *result = sam.get(); - _samMap.emplace(key, std::move(sam)); - return true; -} - } diff --git a/src/Magpie.Core/DeviceResources.h b/src/Magpie.Core/DeviceResources.h index ecdf25632..1141be333 100644 --- a/src/Magpie.Core/DeviceResources.h +++ b/src/Magpie.Core/DeviceResources.h @@ -1,5 +1,4 @@ #pragma once -#include "Win32Utils.h" #include namespace Magpie::Core { @@ -8,71 +7,36 @@ class DeviceResources { public: DeviceResources() = default; DeviceResources(const DeviceResources&) = delete; - DeviceResources(DeviceResources&&) = delete; + DeviceResources(DeviceResources&&) = default; - bool Initialize(); - - static bool IsDebugLayersAvailable() noexcept; - - winrt::com_ptr CreateTexture2D( - DXGI_FORMAT format, - UINT width, - UINT height, - UINT bindFlags, - D3D11_USAGE usage = D3D11_USAGE_DEFAULT, - UINT miscFlags = 0, - const D3D11_SUBRESOURCE_DATA* pInitialData = nullptr - ); - - bool GetSampler(D3D11_FILTER filterMode, D3D11_TEXTURE_ADDRESS_MODE addressMode, ID3D11SamplerState** result); - - bool GetRenderTargetView(ID3D11Texture2D* texture, ID3D11RenderTargetView** result); - - bool GetShaderResourceView(ID3D11Texture2D* texture, ID3D11ShaderResourceView** result); - - bool GetUnorderedAccessView(ID3D11Texture2D* texture, ID3D11UnorderedAccessView** result); + bool Initialize() noexcept; + IDXGIFactory7* GetDXGIFactory() const noexcept { return _dxgiFactory.get(); } ID3D11Device5* GetD3DDevice() const noexcept { return _d3dDevice.get(); } - D3D_FEATURE_LEVEL GetFeatureLevel() const noexcept { return _featureLevel; } ID3D11DeviceContext4* GetD3DDC() const noexcept { return _d3dDC.get(); } - IDXGISwapChain4* GetSwapChain() const noexcept { return _swapChain.get(); }; - ID3D11Texture2D* GetBackBuffer() const noexcept { return _backBuffer.get(); } - IDXGIFactory7* GetDXGIFactory() const noexcept { return _dxgiFactory.get(); } - IDXGIDevice4* GetDXGIDevice() const noexcept { return _dxgiDevice.get(); } IDXGIAdapter4* GetGraphicsAdapter() const noexcept { return _graphicsAdapter.get(); } - void BeginFrame(); + bool IsSupportTearing() const noexcept { + return _isSupportTearing; + } - void EndFrame(); + ID3D11SamplerState* GetSampler(D3D11_FILTER filterMode, D3D11_TEXTURE_ADDRESS_MODE addressMode) noexcept; private: - bool _ObtainGraphicsAdapterAndD3DDevice() noexcept; - - bool _TryCreateD3DDevice(IDXGIAdapter1* adapter) noexcept; - - bool _CreateSwapChain(); + bool _ObtainAdapterAndDevice(int adapterIdx) noexcept; + bool _TryCreateD3DDevice(const winrt::com_ptr& adapter) noexcept; winrt::com_ptr _dxgiFactory; - winrt::com_ptr _dxgiDevice; - winrt::com_ptr _swapChain; winrt::com_ptr _graphicsAdapter; winrt::com_ptr _d3dDevice; winrt::com_ptr _d3dDC; - Win32Utils::ScopedHandle _frameLatencyWaitableObject; - bool _supportTearing = false; - D3D_FEATURE_LEVEL _featureLevel = D3D_FEATURE_LEVEL_10_0; - - winrt::com_ptr _backBuffer; - - phmap::flat_hash_map> _rtvMap; - phmap::flat_hash_map> _srvMap; - phmap::flat_hash_map> _uavMap; - phmap::flat_hash_map< std::pair, winrt::com_ptr > _samMap; + + bool _isSupportTearing = false; }; } diff --git a/src/Magpie.Core/DirectXHelper.cpp b/src/Magpie.Core/DirectXHelper.cpp index 85ce03d5e..d406b97f4 100644 --- a/src/Magpie.Core/DirectXHelper.cpp +++ b/src/Magpie.Core/DirectXHelper.cpp @@ -51,4 +51,60 @@ bool DirectXHelper::CompileComputeShader( return true; } +bool DirectXHelper::IsDebugLayersAvailable() noexcept { +#ifdef _DEBUG + static bool result = SUCCEEDED(D3D11CreateDevice( + nullptr, + D3D_DRIVER_TYPE_NULL, // There is no need to create a real hardware device. + nullptr, + D3D11_CREATE_DEVICE_DEBUG, // Check for the SDK layers. + nullptr, // Any feature level will do. + 0, + D3D11_SDK_VERSION, + nullptr, // No need to keep the D3D device reference. + nullptr, // No need to know the feature level. + nullptr // No need to keep the D3D device context reference. + )); + return result; +#else + // Relaese 配置不使用调试层 + return false; +#endif +} + +winrt::com_ptr DirectXHelper::CreateTexture2D( + ID3D11Device* d3dDevice, + DXGI_FORMAT format, + UINT width, + UINT height, + UINT bindFlags, + D3D11_USAGE usage, + UINT miscFlags, + const D3D11_SUBRESOURCE_DATA* pInitialData +) noexcept { + const D3D11_TEXTURE2D_DESC desc{ + .Width = width, + .Height = height, + .MipLevels = 1, + .ArraySize = 1, + .Format = format, + .SampleDesc{ + .Count = 1, + .Quality = 0 + }, + .Usage = usage, + .BindFlags = bindFlags, + .MiscFlags = miscFlags + }; + + winrt::com_ptr result; + HRESULT hr = d3dDevice->CreateTexture2D(&desc, pInitialData, result.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateTexture2D 失败", hr); + return nullptr; + } + + return result; +} + } diff --git a/src/Magpie.Core/DirectXHelper.h b/src/Magpie.Core/DirectXHelper.h index 451ada361..380e1548f 100644 --- a/src/Magpie.Core/DirectXHelper.h +++ b/src/Magpie.Core/DirectXHelper.h @@ -12,6 +12,19 @@ struct DirectXHelper { const std::vector>& macros = {}, bool warningsAreErrors = false ); + + static bool IsDebugLayersAvailable() noexcept; + + static winrt::com_ptr CreateTexture2D( + ID3D11Device* d3dDevice, + DXGI_FORMAT format, + UINT width, + UINT height, + UINT bindFlags, + D3D11_USAGE usage = D3D11_USAGE_DEFAULT, + UINT miscFlags = 0, + const D3D11_SUBRESOURCE_DATA* pInitialData = nullptr + ) noexcept; }; } diff --git a/src/Magpie.Core/DwmSharedSurfaceFrameSource.cpp b/src/Magpie.Core/DwmSharedSurfaceFrameSource.cpp index c74279c58..3982dbd93 100644 --- a/src/Magpie.Core/DwmSharedSurfaceFrameSource.cpp +++ b/src/Magpie.Core/DwmSharedSurfaceFrameSource.cpp @@ -1,58 +1,75 @@ #include "pch.h" #include "DwmSharedSurfaceFrameSource.h" -#include "MagApp.h" -#include "DeviceResources.h" #include "Logger.h" - +#include "ScalingWindow.h" +#include "DirectXHelper.h" +#include "DeviceResources.h" namespace Magpie::Core { -bool DwmSharedSurfaceFrameSource::Initialize() { - if (!FrameSourceBase::Initialize()) { - Logger::Get().Error("初始化 FrameSourceBase 失败"); - return false; - } - - HMODULE hUser32 = GetModuleHandle(L"user32.dll"); - if (!hUser32) { - Logger::Get().Win32Error("获取 User32.dll 模块句柄失败"); - return false; +using DwmGetDxSharedSurfaceFunc = BOOL( + HWND hWnd, + HANDLE* phSurface, + LUID* pAdapterLuid, + ULONG* pFmtWindow, + ULONG* pPresentFlags, + ULONGLONG* pWin32KUpdateId +); + +static DwmGetDxSharedSurfaceFunc* dwmGetDxSharedSurface = nullptr; + +bool DwmSharedSurfaceFrameSource::_Initialize() noexcept { + if (!dwmGetDxSharedSurface) { + HMODULE hUser32 = GetModuleHandle(L"user32.dll"); + assert(hUser32); + dwmGetDxSharedSurface = (DwmGetDxSharedSurfaceFunc*)GetProcAddress(hUser32, "DwmGetDxSharedSurface"); + + if (!dwmGetDxSharedSurface) { + Logger::Get().Win32Error("获取函数 DwmGetDxSharedSurface 地址失败"); + return false; + } } - _dwmGetDxSharedSurface = (_DwmGetDxSharedSurfaceFunc*)GetProcAddress(hUser32, "DwmGetDxSharedSurface"); - if (!_dwmGetDxSharedSurface) { - Logger::Get().Win32Error("获取函数 DwmGetDxSharedSurface 地址失败"); + if (!_CalcSrcRect()) { + Logger::Get().Error("_CalcSrcRect 失败"); return false; } - if (!_UpdateSrcFrameRect()) { - Logger::Get().Error("_UpdateSrcFrameRect 失败"); - return false; - } + HWND hwndSrc = ScalingWindow::Get().HwndSrc(); - HWND hwndSrc = MagApp::Get().GetHwndSrc(); + RECT frameRect; + if (double a, bx, by; _GetMapToOriginDPI(hwndSrc, a, bx, by)) { + Logger::Get().Info(fmt::format("源窗口 DPI 缩放为 {}", 1 / a)); - double a, bx, by; - if (!_GetMapToOriginDPI(hwndSrc, a, bx, by)) { + frameRect = RECT{ + std::lround(_srcRect.left * a + bx), + std::lround(_srcRect.top * a + by), + std::lround(_srcRect.right * a + bx), + std::lround(_srcRect.bottom * a + by) + }; + } else { Logger::Get().Error("_GetMapToOriginDPI 失败"); - //MagApp::Get().SetErrorMsg(ErrorMessages::FAILED_TO_CAPTURE); - return false; - } - Logger::Get().Info(fmt::format("源窗口 DPI 缩放为 {}", 1 / a)); - - RECT frameRect = { - std::lround(_srcFrameRect.left * a + bx), - std::lround(_srcFrameRect.top * a + by), - std::lround(_srcFrameRect.right * a + bx), - std::lround(_srcFrameRect.bottom * a + by) - }; + // _GetMapToOriginDPI 失败则假设 DPI 缩放为 1 + RECT srcWindowRect; + if (!GetWindowRect(hwndSrc, &srcWindowRect)) { + Logger::Get().Win32Error("GetWindowRect 失败"); + return false; + } + + frameRect = RECT{ + _srcRect.left - srcWindowRect.left, + _srcRect.top - srcWindowRect.top, + _srcRect.right - srcWindowRect.left, + _srcRect.bottom - srcWindowRect.top + }; + } + if (frameRect.left < 0 || frameRect.top < 0 || frameRect.right < 0 || frameRect.bottom < 0 || frameRect.right - frameRect.left <= 0 || frameRect.bottom - frameRect.top <= 0 - ) { + ) { Logger::Get().Error("裁剪失败"); - //MagApp::Get().SetErrorMsg(ErrorMessages::FAILED_TO_CROP); return false; } @@ -65,14 +82,15 @@ bool DwmSharedSurfaceFrameSource::Initialize() { 1 }; - _output = MagApp::Get().GetDeviceResources().CreateTexture2D( + _output = DirectXHelper::CreateTexture2D( + _deviceResources->GetD3DDevice(), DXGI_FORMAT_B8G8R8A8_UNORM, frameRect.right - frameRect.left, frameRect.bottom - frameRect.top, D3D11_BIND_SHADER_RESOURCE ); if (!_output) { - Logger::Get().Error("创建 Texture2D 失败"); + Logger::Get().Error("CreateTexture2D 失败"); return false; } @@ -80,26 +98,26 @@ bool DwmSharedSurfaceFrameSource::Initialize() { return true; } -FrameSourceBase::UpdateState DwmSharedSurfaceFrameSource::Update() { +FrameSourceBase::UpdateState DwmSharedSurfaceFrameSource::_Update() noexcept { HANDLE sharedTextureHandle = NULL; - if (!_dwmGetDxSharedSurface(MagApp::Get().GetHwndSrc(), + if (!dwmGetDxSharedSurface(ScalingWindow::Get().HwndSrc(), &sharedTextureHandle, nullptr, nullptr, nullptr, nullptr) || !sharedTextureHandle - ) { + ) { Logger::Get().Win32Error("DwmGetDxSharedSurface 失败"); return UpdateState::Error; } winrt::com_ptr sharedTexture; - HRESULT hr = MagApp::Get().GetDeviceResources().GetD3DDevice() + HRESULT hr = _deviceResources->GetD3DDevice() ->OpenSharedResource(sharedTextureHandle, IID_PPV_ARGS(&sharedTexture)); if (FAILED(hr)) { Logger::Get().ComError("OpenSharedResource 失败", hr); return UpdateState::Error; } - MagApp::Get().GetDeviceResources().GetD3DDC() - ->CopySubresourceRegion(_output.get(), 0, 0, 0, 0, sharedTexture.get(), 0, &_frameInWnd); + _deviceResources->GetD3DDC()->CopySubresourceRegion( + _output.get(), 0, 0, 0, 0, sharedTexture.get(), 0, &_frameInWnd); return UpdateState::NewFrame; } diff --git a/src/Magpie.Core/DwmSharedSurfaceFrameSource.h b/src/Magpie.Core/DwmSharedSurfaceFrameSource.h index 283ac6309..01dae48a3 100644 --- a/src/Magpie.Core/DwmSharedSurfaceFrameSource.h +++ b/src/Magpie.Core/DwmSharedSurfaceFrameSource.h @@ -3,43 +3,36 @@ namespace Magpie::Core { -class DwmSharedSurfaceFrameSource : public FrameSourceBase { +class DwmSharedSurfaceFrameSource final : public FrameSourceBase { public: - DwmSharedSurfaceFrameSource() {} virtual ~DwmSharedSurfaceFrameSource() {} - bool Initialize() override; - - UpdateState Update() override; - - bool IsScreenCapture() override { + bool IsScreenCapture() const noexcept override { return false; } - const char* GetName() const noexcept override { + FrameSourceWaitType WaitType() const noexcept override { + return NoWait; + } + + const char* Name() const noexcept override { return "DwmSharedSurface"; } protected: - bool _HasRoundCornerInWin11() override { + bool _Initialize() noexcept override; + + UpdateState _Update() noexcept override; + + bool _HasRoundCornerInWin11() noexcept override { return false; } - bool _CanCaptureTitleBar() override { + bool _CanCaptureTitleBar() noexcept override { return false; } private: - using _DwmGetDxSharedSurfaceFunc = bool( - HWND hWnd, - HANDLE* phSurface, - LUID* pAdapterLuid, - ULONG* pFmtWindow, - ULONG* pPresentFlags, - ULONGLONG* pWin32KUpdateId - ); - _DwmGetDxSharedSurfaceFunc* _dwmGetDxSharedSurface = nullptr; - D3D11_BOX _frameInWnd{}; }; diff --git a/src/Magpie.Core/EffectCacheManager.cpp b/src/Magpie.Core/EffectCacheManager.cpp index f94565002..aef4ea2a8 100644 --- a/src/Magpie.Core/EffectCacheManager.cpp +++ b/src/Magpie.Core/EffectCacheManager.cpp @@ -70,14 +70,14 @@ void serialize(Archive& ar, EffectPassDesc& o) { template void serialize(Archive& ar, EffectDesc& o) { - ar& o.name& o.outSizeExpr& o.params& o.textures& o.samplers& o.passes& o.flags; + ar& o.name& o.params& o.textures& o.samplers& o.passes& o.flags; } static constexpr const uint32_t MAX_CACHE_COUNT = 127; // 缓存版本 // 当缓存文件结构有更改时更新它,使旧缓存失效 -static constexpr const uint32_t EFFECT_CACHE_VERSION = 12; +static constexpr const uint32_t EFFECT_CACHE_VERSION = 13; static std::wstring GetLinearEffectName(std::wstring_view effectName) { diff --git a/src/Magpie.Core/EffectCompiler.cpp b/src/Magpie.Core/EffectCompiler.cpp index cf8f71b25..26f960c91 100644 --- a/src/Magpie.Core/EffectCompiler.cpp +++ b/src/Magpie.Core/EffectCompiler.cpp @@ -15,8 +15,10 @@ namespace Magpie::Core { -static const char* META_INDICATOR = "//!"; +// 当前 MagpieFX 版本 +static constexpr uint32_t MAGPIE_FX_VERSION = 4; +static const char* META_INDICATOR = "//!"; class PassInclude : public ID3DInclude { public: @@ -57,7 +59,7 @@ class PassInclude : public ID3DInclude { std::wstring _localDir; }; -static UINT RemoveComments(std::string& source) { +static uint32_t RemoveComments(std::string& source) { // 确保以换行符结尾 if (source.back() != '\n') { source.push_back('\n'); @@ -149,7 +151,7 @@ static bool CheckNextToken(std::string_view& source, std::string_view token) { } template -static UINT GetNextToken(std::string_view& source, std::string_view& value) { +static uint32_t GetNextToken(std::string_view& source, std::string_view& value) { RemoveLeadingBlanks(source); if (source.empty()) { @@ -204,7 +206,7 @@ static bool CheckMagic(std::string_view& source) { return true; } -static UINT GetNextString(std::string_view& source, std::string_view& value) { +static uint32_t GetNextString(std::string_view& source, std::string_view& value) { RemoveLeadingBlanks(source); size_t pos = source.find('\n'); @@ -219,7 +221,7 @@ static UINT GetNextString(std::string_view& source, std::string_view& value) { } template -static UINT GetNextNumber(std::string_view& source, T& value) { +static uint32_t GetNextNumber(std::string_view& source, T& value) { RemoveLeadingBlanks(source); if (source.empty()) { @@ -236,7 +238,7 @@ static UINT GetNextNumber(std::string_view& source, T& value) { return 0; } -static UINT GetNextExpr(std::string_view& source, std::string& expr) { +static uint32_t GetNextExpr(std::string_view& source, std::string& expr) { RemoveLeadingBlanks(source); size_t size = std::min(source.find('\n') + 1, source.size()); @@ -260,11 +262,11 @@ static UINT GetNextExpr(std::string_view& source, std::string& expr) { return 0; } -static UINT ResolveHeader(std::string_view block, EffectDesc& desc, bool noCompile) { +static uint32_t ResolveHeader(std::string_view block, EffectDesc& desc, bool noCompile) { // 必需的选项:VERSION - // 可选的选项:OUTPUT_WIDTH, OUTPUT_HEIGHT, USE_DYNAMIC, GENERIC_DOWNSCALER, BUILT_INT + // 可选的选项:USE_DYNAMIC, SORT_NAME - std::bitset<6> processed; + std::bitset<3> processed; std::string_view token; @@ -284,63 +286,34 @@ static UINT ResolveHeader(std::string_view block, EffectDesc& desc, bool noCompi } processed[0] = true; - UINT version; + uint32_t version; if (GetNextNumber(block, version)) { return 1; } - if (version != EffectCompiler::VERSION) { + if (version != MAGPIE_FX_VERSION) { return 1; } if (GetNextToken(block, token) != 2) { return 1; } - } else if (t == "OUTPUT_WIDTH") { + } else if (t == "USE_DYNAMIC") { if (processed[1]) { return 1; } processed[1] = true; - if (GetNextExpr(block, desc.outSizeExpr.first)) { - return 1; - } - } else if (t == "OUTPUT_HEIGHT") { - if (processed[2]) { - return 1; - } - processed[2] = true; - - if (GetNextExpr(block, desc.outSizeExpr.second)) { - return 1; - } - } else if (t == "USE_DYNAMIC") { - if (processed[3]) { - return 1; - } - processed[3] = true; - if (GetNextToken(block, token) != 2) { return 1; } desc.flags |= EffectFlags::UseDynamic; - } else if (t == "GENERIC_DOWNSCALER") { - if (processed[4]) { - return 1; - } - processed[4] = true; - - if (GetNextToken(block, token) != 2) { - return 1; - } - - desc.flags |= EffectFlags::GenericDownscaler; } else if (t == "SORT_NAME") { - if (processed[5]) { + if (processed[2]) { return 1; } - processed[5] = true; + processed[2] = true; std::string_view sortName; if (GetNextString(block, sortName)) { @@ -360,19 +333,14 @@ static UINT ResolveHeader(std::string_view block, EffectDesc& desc, bool noCompi return 1; } - if (!processed[0] || processed[1] != processed[2]) { - return 1; - } - - // GENERIC_DOWNSCALER 和 OUTPUT_WIDTH/OUTPUT_HEIGHT 冲突 - if (processed[4] && processed[1]) { + if (!processed[0]) { return 1; } return 0; } -static UINT ResolveParameter(std::string_view block, EffectDesc& desc) { +static uint32_t ResolveParameter(std::string_view block, EffectDesc& desc) { // 必需的选项:DEFAULT, MIN, MAX, STEP // 可选的选项:LABEL @@ -530,8 +498,9 @@ static UINT ResolveParameter(std::string_view block, EffectDesc& desc) { } -static UINT ResolveTexture(std::string_view block, EffectDesc& desc) { +static uint32_t ResolveTexture(std::string_view block, EffectDesc& desc) { // 如果名称为 INPUT 不能有任何选项,含 SOURCE 时不能有任何其他选项 + // 如果名称为 OUTPUT 只能有 WIDTH 或 HEIGHT // 否则必需的选项:FORMAT // 可选的选项:WIDTH, HEIGHT @@ -641,13 +610,21 @@ static UINT ResolveTexture(std::string_view block, EffectDesc& desc) { return 1; } - if (token == "INPUT") { - if (processed[1] || processed[2]) { + if (token == desc.textures[0].name) { + if (processed.any()) { return 1; } // INPUT 已为第一个元素 desc.textures.pop_back(); + } else if (token == desc.textures[1].name) { + if (processed[0] || processed[1]) { + return 1; + } + + // OUTPUT 已为第二个元素 + desc.textures[1].sizeExpr = std::move(texDesc.sizeExpr); + desc.textures.pop_back(); } else { texDesc.name = token; } @@ -663,7 +640,7 @@ static UINT ResolveTexture(std::string_view block, EffectDesc& desc) { return 0; } -static UINT ResolveSampler(std::string_view block, EffectDesc& desc) { +static uint32_t ResolveSampler(std::string_view block, EffectDesc& desc) { // 必选项:FILTER // 可选项:ADDRESS @@ -764,7 +741,7 @@ static UINT ResolveSampler(std::string_view block, EffectDesc& desc) { return 0; } -static UINT ResolveCommon(std::string_view& block) { +static uint32_t ResolveCommon(std::string_view& block) { // 无选项 if (!CheckNextToken(block, META_INDICATOR)) { @@ -782,12 +759,12 @@ static UINT ResolveCommon(std::string_view& block) { return 0; } -static UINT ResolvePasses( +static uint32_t ResolvePasses( SmallVector& blocks, EffectDesc& desc ) { - // 必选项:IN - // 可选项:OUT, BLOCK_SIZE, NUM_THREADS, STYLE + // 必选项:IN, OUT + // 可选项:BLOCK_SIZE, NUM_THREADS, STYLE // STYLE 为 PS 时不能有 BLOCK_SIZE 或 NUM_THREADS std::string_view token; @@ -795,10 +772,10 @@ static UINT ResolvePasses( // 首先解析通道序号 // first 为 Pass 序号,second 为在 blocks 中的位置 - SmallVector> passNumbers; + SmallVector> passNumbers; passNumbers.reserve(blocks.size()); - for (UINT i = 0; i < blocks.size(); ++i) { + for (uint32_t i = 0; i < blocks.size(); ++i) { std::string_view& block = blocks[i]; if (!CheckNextToken(block, META_INDICATOR)) { @@ -809,7 +786,7 @@ static UINT ResolvePasses( return 1; } - UINT index; + uint32_t index; if (GetNextNumber(block, index)) { return 1; } @@ -820,32 +797,35 @@ static UINT ResolvePasses( passNumbers.emplace_back(index, i); } + // 以通道序号排序 std::sort( passNumbers.begin(), passNumbers.end(), - [](const std::pair& l, const std::pair& r) {return l.first < r.first; } + [](const auto& l, const auto& r) { return l.first < r.first; } ); - SmallVector temp = blocks; - for (UINT i = 0; i < blocks.size(); ++i) { - if (passNumbers[i].first != i + 1) { - // PASS 序号不连续 - return 1; - } + { + SmallVector temp = blocks; + for (uint32_t i = 0; i < blocks.size(); ++i) { + if (passNumbers[i].first != i + 1) { + // PASS 序号不连续 + return 1; + } - blocks[i] = temp[passNumbers[i].second]; + blocks[i] = temp[passNumbers[i].second]; + } } desc.passes.resize(blocks.size()); - for (UINT i = 0; i < blocks.size(); ++i) { + for (uint32_t i = 0; i < blocks.size(); ++i) { std::string_view& block = blocks[i]; auto& passDesc = desc.passes[i]; // 用于检查输入和输出中重复的纹理 - phmap::flat_hash_map texNames; + phmap::flat_hash_map texNames; texNames.reserve(desc.textures.size()); - for (UINT j = 0; j < desc.textures.size(); ++j) { + for (uint32_t j = 0; j < desc.textures.size(); ++j) { texNames.emplace(desc.textures[j].name, j); } @@ -868,17 +848,17 @@ static UINT ResolvePasses( } processed[0] = true; - std::string_view binds; - if (GetNextString(block, binds)) { + std::string_view inputsStr; + if (GetNextString(block, inputsStr)) { return 1; } - for (std::string_view& input : StrUtils::Split(binds, ',')) { + for (std::string_view& input : StrUtils::Split(inputsStr, ',')) { StrUtils::Trim(input); auto it = texNames.find(input); - if (it == texNames.end()) { - // 未找到纹理名称 + if (it == texNames.end() || it->second == 1) { + // 不支持 OUTPUT 作为输入 return 1; } @@ -891,33 +871,42 @@ static UINT ResolvePasses( } processed[1] = true; - std::string_view saves; - if (GetNextString(block, saves)) { - return 1; - } - - SmallVector outputs = StrUtils::Split(saves, ','); - if (outputs.size() > 8) { - // 最多 8 个输出 + std::string_view outputsStr; + if (GetNextString(block, outputsStr)) { return 1; } - for (std::string_view& output : outputs) { - StrUtils::Trim(output); - - auto it = texNames.find(output); - if (it == texNames.end()) { - // 未找到纹理名称 + if (i == blocks.size() - 1) { + // 最后一个通道的输出只能是 OUTPUT + if (outputsStr != desc.textures[1].name) { return 1; } - if (it->second == 0 || !desc.textures[it->second].source.empty()) { - // INPUT 和从文件读取的纹理不能作为输出 + passDesc.outputs.push_back(1); + } else { + SmallVector outputs = StrUtils::Split(outputsStr, ','); + if (outputs.size() > 8) { + // 最多 8 个输出 return 1; } - passDesc.outputs.push_back(it->second); - texNames.erase(it); + for (std::string_view& output : outputs) { + StrUtils::Trim(output); + + auto it = texNames.find(output); + if (it == texNames.end()) { + // 未找到纹理名称 + return 1; + } + + if (it->second == 0 || !desc.textures[it->second].source.empty()) { + // INPUT 和从文件读取的纹理不能作为输出 + return 1; + } + + passDesc.outputs.push_back(it->second); + texNames.erase(it); + } } } else if (t == "BLOCK_SIZE") { if (processed[2]) { @@ -935,7 +924,7 @@ static UINT ResolvePasses( return 1; } - UINT num; + uint32_t num; if (GetNextNumber(split[0], num) || num == 0) { return 1; } @@ -974,8 +963,8 @@ static UINT ResolvePasses( return 1; } - for (UINT j = 0; j < 3; ++j) { - UINT num = 1; + for (uint32_t j = 0; j < 3; ++j) { + uint32_t num = 1; if (split.size() > j) { if (GetNextNumber(split[j], num)) { return 1; @@ -1025,6 +1014,11 @@ static UINT ResolvePasses( } } + // 必须指定 IN 和 OUT + if (!processed[0] || !processed[1]) { + return 1; + } + if (passDesc.isPSStyle) { if (processed[2] || processed[3]) { return 1; @@ -1043,10 +1037,9 @@ static UINT ResolvePasses( return 0; } - -static UINT GeneratePassSource( +static uint32_t GeneratePassSource( const EffectDesc& desc, - UINT passIdx, + uint32_t passIdx, std::string_view cbHlsl, const SmallVector& commonBlocks, std::string_view passBlock, @@ -1054,8 +1047,6 @@ static UINT GeneratePassSource( std::string& result, std::vector>& macros ) { - bool isLastEffect = desc.flags & EffectFlags::LastEffect; - bool isLastPass = passIdx == desc.passes.size(); bool isInlineParams = desc.flags & EffectFlags::InlineParams; const EffectPassDesc& passDesc = desc.passes[(size_t)passIdx - 1]; @@ -1082,31 +1073,16 @@ static UINT GeneratePassSource( // SRV for (int i = 0; i < passDesc.inputs.size(); ++i) { auto& texDesc = desc.textures[passDesc.inputs[i]]; - result.append(fmt::format("Texture2D<{}> {} : register(t{});\n", EffectHelper::FORMAT_DESCS[(UINT)texDesc.format].srvTexelType, texDesc.name, i)); - } - - if (isLastEffect && isLastPass) { - result.append(fmt::format("Texture2D __CURSOR : register(t{});\n", passDesc.inputs.size())); + result.append(fmt::format("Texture2D<{}> {} : register(t{});\n", EffectHelper::FORMAT_DESCS[(uint32_t)texDesc.format].srvTexelType, texDesc.name, i)); } // UAV - if (passDesc.outputs.empty()) { - if (!isLastPass) { - return 1; - } - - result.append("RWTexture2D __OUTPUT : register(u0);\n"); - } else { - if (isLastPass) { - return 1; - } - - for (int i = 0; i < passDesc.outputs.size(); ++i) { - auto& texDesc = desc.textures[passDesc.outputs[i]]; - result.append(fmt::format("RWTexture2D<{}> {} : register(u{});\n", EffectHelper::FORMAT_DESCS[(UINT)texDesc.format].uavTexelType, texDesc.name, i)); - } + for (int i = 0; i < passDesc.outputs.size(); ++i) { + auto& texDesc = desc.textures[passDesc.outputs[i]]; + result.append(fmt::format("RWTexture2D<{}> {} : register(u{});\n", EffectHelper::FORMAT_DESCS[(uint32_t)texDesc.format].uavTexelType, texDesc.name, i)); } + if (!desc.samplers.empty()) { // 采样器 for (int i = 0; i < desc.samplers.size(); ++i) { @@ -1114,11 +1090,6 @@ static UINT GeneratePassSource( } } - if (isLastEffect) { - // 绘制光标使用的采样器 - result.append(fmt::format("SamplerState __CURSOR_SAMPLER : register(s{});\n", desc.samplers.size())); - } - result.push_back('\n'); //////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1141,14 +1112,6 @@ static UINT GeneratePassSource( macros.emplace_back("MP_INLINE_PARAMS", ""); } - if (isLastPass) { - macros.emplace_back("MP_LAST_PASS", ""); - } - - if (isLastEffect) { - macros.emplace_back("MP_LAST_EFFECT", ""); - } - #ifdef _DEBUG macros.emplace_back("MP_DEBUG", ""); #endif @@ -1159,20 +1122,20 @@ static UINT GeneratePassSource( macros.emplace_back("MP_FP16", ""); macros.emplace_back("MF", "min16float"); - for (UINT i = 0; i < 4; ++i) { + for (uint32_t i = 0; i < 4; ++i) { macros.emplace_back(StrUtils::Concat("MF", numbers[i]), StrUtils::Concat("min16float", numbers[i])); - for (UINT j = 0; j < 4; ++j) { + for (uint32_t j = 0; j < 4; ++j) { macros.emplace_back(StrUtils::Concat("MF", numbers[i], "x", numbers[j]), StrUtils::Concat("min16float", numbers[i], "x", numbers[j])); } } } else { macros.emplace_back("MF", "float"); - for (UINT i = 0; i < 4; ++i) { + for (uint32_t i = 0; i < 4; ++i) { macros.emplace_back(StrUtils::Concat("MF", numbers[i]), StrUtils::Concat("float", numbers[i])); - for (UINT j = 0; j < 4; ++j) { + for (uint32_t j = 0; j < 4; ++j) { macros.emplace_back(StrUtils::Concat("MF", numbers[i], "x", numbers[j]), StrUtils::Concat("float", numbers[i], "x", numbers[j])); } } @@ -1218,46 +1181,6 @@ static UINT GeneratePassSource( // 内置函数 // //////////////////////////////////////////////////////////////////////////////////////////////////////// - if (isLastPass) { - result.append("bool CheckViewport(int2 pos) { return pos.x < __viewport.x && pos.y < __viewport.y; }\n"); - - if (isLastEffect) { - // 255.001953 的由来见 https://stackoverflow.com/questions/52103720/why-does-d3dcolortoubyte4-multiplies-components-by-255-001953f - result.append(R"(void WriteToOutput(uint2 pos, float3 color) { - color = saturate(color); - pos += __offset.zw; - if ((int)pos.x >= __cursorRect.x && (int)pos.y >= __cursorRect.y && (int)pos.x < __cursorRect.z && (int)pos.y < __cursorRect.w) { - float4 mask = __CURSOR.SampleLevel(__CURSOR_SAMPLER, (pos - __cursorRect.xy + 0.5f) * __cursorPt, 0); - if (__cursorType == 0){ - color = color * mask.a + mask.rgb; - } else if (__cursorType == 1) { - if (mask.a < 0.5f){ - color = mask.rgb; - } else { - color = (uint3(round(color * 255.0f)) ^ uint3(mask.rgb * 255.001953f)) / 255.0f; - } - } else { - if( mask.x > 0.5f) { - if (mask.y > 0.5f) { - color = 1 - color; - } - } else { - if (mask.y > 0.5f) { - color = float3(1, 1, 1); - } else { - color = float3(0, 0, 0); - } - } - } - } - __OUTPUT[pos] = float4(color, 1); -} -)"); - } else { - result.append("#define WriteToOutput(pos,color) __OUTPUT[pos] = float4(color, 1)\n"); - } - } - result.append(R"(uint __Bfe(uint src, uint off, uint bits) { uint mask = (1u << bits) - 1; return (src >> off) & mask; } uint __BfiM(uint src, uint ins, uint bits) { uint mask = (1u << bits) - 1; return (ins & mask) | (src & (~mask)); } uint2 Rmp8x8(uint a) { return uint2(__Bfe(a, 1u, 3u), __BfiM(__Bfe(a, 3u, 3u), a, 1u)); } @@ -1270,7 +1193,6 @@ float2 GetScale() { return __scale; } if (desc.flags & EffectFlags::UseDynamic) { result.append(R"(uint GetFrameCount() { return __frameCount; } -uint2 GetCursorPos() { return __cursorPos; } )"); } else { @@ -1297,79 +1219,52 @@ uint2 GetCursorPos() { return __cursorPos; } //////////////////////////////////////////////////////////////////////////////////////////////////////// if (passDesc.isPSStyle) { if (passDesc.outputs.size() <= 1) { - if (isLastPass) { - result.append(fmt::format(R"([numthreads(64, 1, 1)] -void __M(uint3 tid : SV_GroupThreadID, uint3 gid : SV_GroupID) {{ - uint2 gxy = Rmp8x8(tid.x) + (gid.xy << 4u){0}; - float2 pos = (gxy + 0.5f) * __outputPt; - float2 step = 8 * __outputPt; - - if (!CheckViewport(gxy)) {{ - return; - }}; - - WriteToOutput(gxy, Pass{1}(pos).rgb); - - gxy.x += 8u; - pos.x += step.x; - if (CheckViewport(gxy)) {{ - WriteToOutput(gxy, Pass{1}(pos).rgb); - }}; - - gxy.y += 8u; - pos.y += step.y; - if (CheckViewport(gxy)) {{ - WriteToOutput(gxy, Pass{1}(pos).rgb); - }}; - - gxy.x -= 8u; - pos.x -= step.x; - if (CheckViewport(gxy)) {{ - WriteToOutput(gxy, Pass{1}(pos).rgb); - }}; -}} -)", isLastEffect ? " + __offset.xy" : "", passIdx)); + std::string outputSize; + std::string outputPt; + if (passIdx == desc.passes.size()) { + // 最后一个通道 + outputSize = "__outputSize"; + outputPt = "__outputPt"; } else { - result.append(fmt::format(R"([numthreads(64, 1, 1)] + outputSize = fmt::format("__pass{}OutputSize", passIdx); + outputPt = fmt::format("__pass{}OutputPt", passIdx); + } + + result.append(fmt::format(R"([numthreads(64, 1, 1)] void __M(uint3 tid : SV_GroupThreadID, uint3 gid : SV_GroupID) {{ - uint2 gxy = Rmp8x8(tid.x) + (gid.xy << 4u); - if (gxy.x >= __pass{0}OutputSize.x || gxy.y >= __pass{0}OutputSize.y) {{ + uint2 gxy = (gid.xy << 4u) + Rmp8x8(tid.x); + if (gxy.x >= {1}.x || gxy.y >= {1}.y) {{ return; }} - float2 pos = (gxy + 0.5f) * __pass{0}OutputPt; - float2 step = 8 * __pass{0}OutputPt; + float2 pos = (gxy + 0.5f) * {2}; + float2 step = 8 * {2}; - {1}[gxy] = Pass{0}(pos); + {3}[gxy] = Pass{0}(pos); gxy.x += 8u; pos.x += step.x; - if (gxy.x < __pass{0}OutputSize.x && gxy.y < __pass{0}OutputSize.y) {{ - {1}[gxy] = Pass{0}(pos); + if (gxy.x < {1}.x && gxy.y < {1}.y) {{ + {3}[gxy] = Pass{0}(pos); }} gxy.y += 8u; pos.y += step.y; - if (gxy.x < __pass{0}OutputSize.x && gxy.y < __pass{0}OutputSize.y) {{ - {1}[gxy] = Pass{0}(pos); + if (gxy.x < {1}.x && gxy.y < {1}.y) {{ + {3}[gxy] = Pass{0}(pos); }} gxy.x -= 8u; pos.x -= step.x; - if (gxy.x < __pass{0}OutputSize.x && gxy.y < __pass{0}OutputSize.y) {{ - {1}[gxy] = Pass{0}(pos); + if (gxy.x < {1}.x && gxy.y < {1}.y) {{ + {3}[gxy] = Pass{0}(pos); }} }} -)", passIdx, desc.textures[passDesc.outputs[0]].name)); - } +)", passIdx, outputSize, outputPt, desc.textures[passDesc.outputs[0]].name)); } else { // 多渲染目标 - if (isLastPass) { - return 1; - } - result.append(fmt::format(R"([numthreads(64, 1, 1)] void __M(uint3 tid : SV_GroupThreadID, uint3 gid : SV_GroupID) {{ - uint2 gxy = Rmp8x8(tid.x) + (gid.xy << 4u); + uint2 gxy = (gid.xy << 4u) + Rmp8x8(tid.x); if (gxy.x >= __pass{0}OutputSize.x || gxy.y >= __pass{0}OutputSize.y) {{ return; }} @@ -1379,7 +1274,7 @@ void __M(uint3 tid : SV_GroupThreadID, uint3 gid : SV_GroupID) {{ for (int i = 0; i < passDesc.outputs.size(); ++i) { auto& texDesc = desc.textures[passDesc.outputs[i]]; result.append(fmt::format("\t{} c{};\n", - EffectHelper::FORMAT_DESCS[(UINT)texDesc.format].srvTexelType, i)); + EffectHelper::FORMAT_DESCS[(uint32_t)texDesc.format].srvTexelType, i)); } std::string callPass = fmt::format("\tPass{}(pos, ", passIdx); @@ -1417,7 +1312,7 @@ void __M(uint3 tid : SV_GroupThreadID, uint3 gid : SV_GroupID) {{ // 大部分情况下 BLOCK_SIZE 都是 2 的整数次幂,这时将乘法转换为位移 std::string blockStartExpr; if (passDesc.blockSize.first == passDesc.blockSize.second && std::has_single_bit(passDesc.blockSize.first)) { - UINT nShift = std::lroundf(std::log2f((float)passDesc.blockSize.first)); + uint32_t nShift = std::lroundf(std::log2f((float)passDesc.blockSize.first)); blockStartExpr = fmt::format("(gid.xy << {})", nShift); } else { blockStartExpr = fmt::format("gid.xy * uint2({}, {})", passDesc.blockSize.first, passDesc.blockSize.second); @@ -1425,15 +1320,15 @@ void __M(uint3 tid : SV_GroupThreadID, uint3 gid : SV_GroupID) {{ result.append(fmt::format(R"([numthreads({}, {}, {})] void __M(uint3 tid : SV_GroupThreadID, uint3 gid : SV_GroupID) {{ - Pass{}({}{}, tid); + Pass{}({}, tid); }} -)", passDesc.numThreads[0], passDesc.numThreads[1], passDesc.numThreads[2], passIdx, blockStartExpr, isLastEffect && isLastPass ? " + __offset.xy" : "")); +)", passDesc.numThreads[0], passDesc.numThreads[1], passDesc.numThreads[2], passIdx, blockStartExpr)); } return 0; } -static UINT CompilePasses( +static uint32_t CompilePasses( EffectDesc& desc, uint32_t flags, const SmallVector& commonBlocks, @@ -1447,29 +1342,16 @@ static UINT CompilePasses( //////////////////////////////////////////////////////////////////////////////////////////////////////// std::string cbHlsl = R"(cbuffer __CB1 : register(b0) { - int4 __cursorRect; - float2 __cursorPt; - uint2 __cursorPos; - uint __cursorType; - uint __frameCount; -}; -cbuffer __CB2 : register(b1) { uint2 __inputSize; uint2 __outputSize; float2 __inputPt; float2 __outputPt; float2 __scale; - int2 __viewport; )"; - if (desc.flags & EffectFlags::LastEffect) { - // 指定输出到屏幕的位置 - cbHlsl.append("\tint4 __offset;\n"); - } - // PS 样式需要获知输出纹理的尺寸 // 最后一个通道不需要 - for (UINT i = 0, end = (UINT)desc.passes.size() - 1; i < end; ++i) { + for (uint32_t i = 0, end = (uint32_t)desc.passes.size() - 1; i < end; ++i) { if (desc.passes[i].isPSStyle) { cbHlsl.append(fmt::format("\tuint2 __pass{0}OutputSize;\n\tfloat2 __pass{0}OutputPt;\n", i + 1)); } @@ -1484,10 +1366,17 @@ cbuffer __CB2 : register(b1) { } } - cbHlsl.append("};\n\n"); + cbHlsl.append("};\n"); + + if (desc.flags & EffectFlags::UseDynamic) { + cbHlsl.append("cbuffer __CB2 : register(b1) { uint __frameCount; };\n\n"); + } + + std::wstring sourcesPathName = StrUtils::Concat(CommonSharedConstants::SOURCES_DIR, StrUtils::UTF8ToUTF16(desc.name)); + std::wstring sourcesPath = sourcesPathName.substr(0, sourcesPathName.find_last_of(L'\\')); - if ((flags & EffectCompilerFlags::SaveSources) && !Win32Utils::DirExists(CommonSharedConstants::SOURCES_DIR)) { - if (!CreateDirectory(CommonSharedConstants::SOURCES_DIR, nullptr)) { + if ((flags & EffectCompilerFlags::SaveSources) && !Win32Utils::DirExists(sourcesPath.c_str())) { + if (!Win32Utils::CreateDir(sourcesPath, true)) { Logger::Get().Win32Error("创建 sources 文件夹失败"); } } @@ -1498,7 +1387,7 @@ cbuffer __CB2 : register(b1) { : L"effects\\" + StrUtils::UTF8ToUTF16(std::string_view(desc.name.c_str(), delimPos + 1))); // 并行生成代码和编译 - Win32Utils::RunParallel([&](UINT id) { + Win32Utils::RunParallel([&](uint32_t id) { std::string source; std::vector> macros; if (GeneratePassSource(desc, id + 1, cbHlsl, commonBlocks, passBlocks[id], inlineParams, source, macros)) { @@ -1508,8 +1397,8 @@ cbuffer __CB2 : register(b1) { if (flags & EffectCompilerFlags::SaveSources) { std::wstring fileName = desc.passes.size() == 1 - ? fmt::format(L"{}{}.hlsl", CommonSharedConstants::SOURCES_DIR, StrUtils::UTF8ToUTF16(desc.name)) - : fmt::format(L"{}{}_Pass{}.hlsl", CommonSharedConstants::SOURCES_DIR, StrUtils::UTF8ToUTF16(desc.name), id + 1); + ? StrUtils::Concat(sourcesPathName, L".hlsl") + : fmt::format(L"{}_Pass{}.hlsl", sourcesPathName, id + 1); if (!Win32Utils::WriteFile(fileName.c_str(), source.data(), source.size())) { Logger::Get().Error(fmt::format("保存 Pass{} 源码失败", id + 1)); @@ -1521,7 +1410,7 @@ cbuffer __CB2 : register(b1) { ) { Logger::Get().Error(fmt::format("编译 Pass{} 失败", id + 1)); } - }, (UINT)passBlocks.size()); + }, (uint32_t)passBlocks.size()); // 检查编译结果 for (const EffectPassDesc& d : desc.passes) { @@ -1533,23 +1422,27 @@ cbuffer __CB2 : register(b1) { return 0; } +static std::string ReadEffectSource(const std::wstring& effectName) noexcept { + std::wstring fileName = StrUtils::Concat(CommonSharedConstants::EFFECTS_DIR, effectName, L".hlsl"); + + std::string source; + if (!Win32Utils::ReadTextFile(fileName.c_str(), source)) { + Logger::Get().Error("读取源文件失败"); + return {}; + } + return source; +} uint32_t EffectCompiler::Compile( EffectDesc& desc, uint32_t flags, const phmap::flat_hash_map* inlineParams -) { +) noexcept { bool noCompile = flags & EffectCompilerFlags::NoCompile; bool noCache = noCompile || (flags & EffectCompilerFlags::NoCache); std::wstring effectName = StrUtils::UTF8ToUTF16(desc.name); - std::wstring fileName = StrUtils::Concat(CommonSharedConstants::EFFECTS_DIR, effectName, L".hlsl"); - - std::string source; - if (!Win32Utils::ReadTextFile(fileName.c_str(), source)) { - Logger::Get().Error("读取源文件失败"); - return 1; - } + std::string source = ReadEffectSource(effectName); if (source.empty()) { Logger::Get().Error("源文件为空"); @@ -1601,28 +1494,28 @@ uint32_t EffectCompiler::Compile( size_t curBlockOff = 0; auto completeCurrentBlock = [&](size_t len, BlockType newBlockType) { - if (curBlockType == BlockType::Header) { + switch (curBlockType) { + case BlockType::Header: headerBlock = sourceView.substr(curBlockOff, len); - } else if (curBlockType == BlockType::Parameter) { + break; + case BlockType::Parameter: paramBlocks.push_back(sourceView.substr(curBlockOff, len)); - } else if (!noCompile) { - switch (curBlockType) { - case BlockType::Texture: - textureBlocks.push_back(sourceView.substr(curBlockOff, len)); - break; - case BlockType::Sampler: - samplerBlocks.push_back(sourceView.substr(curBlockOff, len)); - break; - case BlockType::Common: - commonBlocks.push_back(sourceView.substr(curBlockOff, len)); - break; - case BlockType::Pass: - passBlocks.push_back(sourceView.substr(curBlockOff, len)); - break; - default: - assert(false); - break; - } + break; + case BlockType::Texture: + textureBlocks.push_back(sourceView.substr(curBlockOff, len)); + break; + case BlockType::Sampler: + samplerBlocks.push_back(sourceView.substr(curBlockOff, len)); + break; + case BlockType::Common: + commonBlocks.push_back(sourceView.substr(curBlockOff, len)); + break; + case BlockType::Pass: + passBlocks.push_back(sourceView.substr(curBlockOff, len)); + break; + default: + assert(false); + break; } curBlockType = newBlockType; @@ -1687,24 +1580,30 @@ uint32_t EffectCompiler::Compile( } } - if (!noCompile) { - desc.textures.clear(); - // 纹理第一个元素为 INPUT - { - auto& texDesc = desc.textures.emplace_back(); - texDesc.name = "INPUT"; - texDesc.format = EffectIntermediateTextureFormat::R8G8B8A8_UNORM; - texDesc.sizeExpr.first = "INPUT_WIDTH"; - texDesc.sizeExpr.second = "INPUT_HEIGHT"; - } - - for (size_t i = 0; i < textureBlocks.size(); ++i) { - if (ResolveTexture(textureBlocks[i], desc)) { - Logger::Get().Error(fmt::format("解析 Texture#{} 块失败", i + 1)); - return 1; - } + desc.textures.clear(); + // 第一个元素为 INPUT + { + auto& inputDesc = desc.textures.emplace_back(); + inputDesc.name = "INPUT"; + inputDesc.format = EffectIntermediateTextureFormat::R8G8B8A8_UNORM; + inputDesc.sizeExpr.first = "INPUT_WIDTH"; + inputDesc.sizeExpr.second = "INPUT_HEIGHT"; + } + // 第二个元素为 OUTPUT + { + auto& outputDesc = desc.textures.emplace_back(); + outputDesc.name = "OUTPUT"; + outputDesc.format = EffectIntermediateTextureFormat::R8G8B8A8_UNORM; + } + + for (size_t i = 0; i < textureBlocks.size(); ++i) { + if (ResolveTexture(textureBlocks[i], desc)) { + Logger::Get().Error(fmt::format("解析 Texture#{} 块失败", i + 1)); + return 1; } + } + if (!noCompile) { desc.samplers.clear(); for (size_t i = 0; i < samplerBlocks.size(); ++i) { if (ResolveSampler(samplerBlocks[i], desc)) { diff --git a/src/Magpie.Core/EffectCompiler.h b/src/Magpie.Core/EffectCompiler.h index 387111a80..d9490becd 100644 --- a/src/Magpie.Core/EffectCompiler.h +++ b/src/Magpie.Core/EffectCompiler.h @@ -3,26 +3,21 @@ namespace Magpie::Core { -struct EffectDesc; - struct EffectCompilerFlags { - static constexpr const uint32_t NoCache = 0x1; - static constexpr const uint32_t SaveSources = 0x2; - static constexpr const uint32_t WarningsAreErrors = 0x4; + static constexpr const uint32_t NoCache = 1; + static constexpr const uint32_t SaveSources = 1 << 1; + static constexpr const uint32_t WarningsAreErrors = 1 << 2; // 只解析输出尺寸和参数,供用户界面使用 - static constexpr const uint32_t NoCompile = 0x8; + static constexpr const uint32_t NoCompile = 1 << 3; }; struct EffectCompiler { // 调用者需填入 desc 中的 name 和 flags static uint32_t Compile( - EffectDesc& desc, + struct EffectDesc& desc, uint32_t flags, // EffectCompilerFlags const phmap::flat_hash_map* inlineParams = nullptr - ); - - // 当前 MagpieFX 版本 - static constexpr UINT VERSION = 3; + ) noexcept; }; } diff --git a/src/Magpie.Core/EffectDesc.h b/src/Magpie.Core/EffectDesc.h index a084f7320..be8d89107 100644 --- a/src/Magpie.Core/EffectDesc.h +++ b/src/Magpie.Core/EffectDesc.h @@ -80,24 +80,25 @@ struct EffectPassDesc { struct EffectFlags { // 输入 - static constexpr const uint32_t LastEffect = 0x1; - static constexpr const uint32_t InlineParams = 0x2; - static constexpr const uint32_t FP16 = 0x4; + static constexpr const uint32_t InlineParams = 1; + static constexpr const uint32_t FP16 = 1 << 1; // 输出 // 此效果需要帧数和鼠标位置 - static constexpr const uint32_t UseDynamic = 0x10; - // 可作为通用的降采样效果 - static constexpr const uint32_t GenericDownscaler = 0x20; + static constexpr const uint32_t UseDynamic = 1 << 4; }; struct EffectDesc { std::string name; std::string sortName; // 仅供 UI 使用 - // 用于计算效果的输出,空值表示支持任意大小的输出 - std::pair outSizeExpr; + const std::pair& GetOutputSizeExpr() const noexcept { + return textures[1].sizeExpr; + } std::vector params; + // 0: INPUT + // 1: OUTPUT + // > 1: 中间纹理 std::vector textures; std::vector samplers; std::vector passes; diff --git a/src/Magpie.Core/EffectDrawer.cpp b/src/Magpie.Core/EffectDrawer.cpp index 156e761c6..d05e8c4bd 100644 --- a/src/Magpie.Core/EffectDrawer.cpp +++ b/src/Magpie.Core/EffectDrawer.cpp @@ -1,15 +1,16 @@ #include "pch.h" #include "EffectDrawer.h" -#include "Logger.h" +#include "ScalingOptions.h" #include "Win32Utils.h" -#include "MagApp.h" +#include "Logger.h" #include "DeviceResources.h" -#include "TextureLoader.h" #include "StrUtils.h" -#include "Renderer.h" -#include "CursorManager.h" -#include "GPUTimer.h" +#include "TextureLoader.h" #include "EffectHelper.h" +#include "DirectXHelper.h" +#include "ScalingWindow.h" +#include "BackendDescriptorStore.h" +#include "EffectsProfiler.h" #pragma push_macro("_UNICODE") // Conan 的 muparser 不含 UNICODE 支持 @@ -20,39 +21,18 @@ #pragma warning(push) #pragma pop_macro("_UNICODE") - namespace Magpie::Core { -bool EffectDrawer::Initialize( - const EffectDesc& desc, +static SIZE CalcOutputSize( + const std::pair& outputSizeExpr, const EffectOption& option, - ID3D11Texture2D* inputTex, - RECT* outputRect, - RECT* virtualOutputRect -) { - _desc = desc; - - SIZE inputSize{}; - { - D3D11_TEXTURE2D_DESC inputDesc; - inputTex->GetDesc(&inputDesc); - inputSize = { (LONG)inputDesc.Width, (LONG)inputDesc.Height }; - } - - const SIZE hostSize = Win32Utils::GetSizeOfRect(MagApp::Get().GetHostWndRect());; - bool isLastEffect = desc.flags & EffectFlags::LastEffect; - bool isInlineParams = desc.flags & EffectFlags::InlineParams; - - DeviceResources& dr = MagApp::Get().GetDeviceResources(); - auto d3dDevice = dr.GetD3DDevice(); - - static mu::Parser exprParser; - exprParser.DefineConst("INPUT_WIDTH", inputSize.cx); - exprParser.DefineConst("INPUT_HEIGHT", inputSize.cy); - + SIZE scalingWndSize, + SIZE inputSize, + mu::Parser& exprParser +) noexcept { SIZE outputSize{}; - if (desc.outSizeExpr.first.empty()) { + if (outputSizeExpr.first.empty()) { switch (option.scalingType) { case ScalingType::Normal: { @@ -62,7 +42,10 @@ bool EffectDrawer::Initialize( } case ScalingType::Fit: { - float fillScale = std::min(float(hostSize.cx) / inputSize.cx, float(hostSize.cy) / inputSize.cy); + const float fillScale = std::min( + float(scalingWndSize.cx) / inputSize.cx, + float(scalingWndSize.cy) / inputSize.cy + ); outputSize.cx = std::lroundf(inputSize.cx * fillScale * option.scale.first); outputSize.cy = std::lroundf(inputSize.cy * fillScale * option.scale.second); break; @@ -75,25 +58,53 @@ bool EffectDrawer::Initialize( } case ScalingType::Fill: { - outputSize = hostSize; + outputSize = scalingWndSize; break; } + default: + assert(false); + break; } } else { - assert(!desc.outSizeExpr.second.empty()); + assert(!outputSizeExpr.second.empty()); try { - exprParser.SetExpr(desc.outSizeExpr.first); + exprParser.SetExpr(outputSizeExpr.first); outputSize.cx = std::lround(exprParser.Eval()); - exprParser.SetExpr(desc.outSizeExpr.second); + exprParser.SetExpr(outputSizeExpr.second); outputSize.cy = std::lround(exprParser.Eval()); } catch (const mu::ParserError& e) { Logger::Get().Error(fmt::format("计算输出尺寸 {} 失败:{}", e.GetExpr(), e.GetMsg())); - return false; + return {}; } } + return outputSize; +} + +bool EffectDrawer::Initialize( + const EffectDesc& desc, + const EffectOption& option, + DeviceResources& deviceResources, + BackendDescriptorStore& descriptorStore, + ID3D11Texture2D** inOutTexture +) noexcept { + _d3dDC = deviceResources.GetD3DDC(); + + SIZE inputSize{}; + { + D3D11_TEXTURE2D_DESC inputDesc; + (*inOutTexture)->GetDesc(&inputDesc); + inputSize = { (LONG)inputDesc.Width, (LONG)inputDesc.Height }; + } + + static mu::Parser exprParser; + exprParser.DefineConst("INPUT_WIDTH", inputSize.cx); + exprParser.DefineConst("INPUT_HEIGHT", inputSize.cy); + + const SIZE scalingWndSize = Win32Utils::GetSizeOfRect(ScalingWindow::Get().WndRect()); + const SIZE outputSize = CalcOutputSize(desc.GetOutputSizeExpr(), option, scalingWndSize, inputSize, exprParser); if (outputSize.cx <= 0 || outputSize.cy <= 0) { Logger::Get().Error("非法的输出尺寸"); return false; @@ -105,30 +116,48 @@ bool EffectDrawer::Initialize( _samplers.resize(desc.samplers.size()); for (UINT i = 0; i < _samplers.size(); ++i) { const EffectSamplerDesc& samDesc = desc.samplers[i]; - if (!dr.GetSampler( + _samplers[i] = deviceResources.GetSampler( samDesc.filterType == EffectSamplerFilterType::Linear ? D3D11_FILTER_MIN_MAG_MIP_LINEAR : D3D11_FILTER_MIN_MAG_MIP_POINT, - samDesc.addressType == EffectSamplerAddressType::Clamp ? D3D11_TEXTURE_ADDRESS_CLAMP : D3D11_TEXTURE_ADDRESS_WRAP, - &_samplers[i]) - ) { + samDesc.addressType == EffectSamplerAddressType::Clamp ? D3D11_TEXTURE_ADDRESS_CLAMP : D3D11_TEXTURE_ADDRESS_WRAP + ); + + if (!_samplers[i]) { Logger::Get().Error(fmt::format("创建采样器 {} 失败", samDesc.name)); return false; } } // 创建中间纹理 - // 第一个为 INPUT,最后一个为 OUTPUT - _textures.resize(desc.textures.size() + 1); - _textures[0].copy_from(inputTex); - for (size_t i = 1; i < desc.textures.size(); ++i) { + // 第一个为 INPUT,第二个为 OUTPUT + _textures.resize(desc.textures.size()); + _textures[0].copy_from(*inOutTexture); + + // 创建输出纹理,格式始终是 DXGI_FORMAT_R8G8B8A8_UNORM + _textures[1] = DirectXHelper::CreateTexture2D( + deviceResources.GetD3DDevice(), + EffectHelper::FORMAT_DESCS[(uint32_t)desc.textures[1].format].dxgiFormat, + outputSize.cx, + outputSize.cy, + D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS + ); + + *inOutTexture = _textures[1].get(); + if (!*inOutTexture) { + Logger::Get().Error("创建输出纹理失败"); + return false; + } + + for (size_t i = 2; i < desc.textures.size(); ++i) { const EffectIntermediateTextureDesc& texDesc = desc.textures[i]; if (!texDesc.source.empty()) { // 从文件加载纹理 size_t delimPos = desc.name.find_last_of('\\'); - std::string texPath = delimPos == std::string::npos + std::string texPath = delimPos == std::string::npos ? StrUtils::Concat("effects\\", texDesc.source) : StrUtils::Concat("effects\\", std::string_view(desc.name.c_str(), delimPos + 1), texDesc.source); - _textures[i] = TextureLoader::Load(StrUtils::UTF8ToUTF16(texPath).c_str()); + _textures[i] = TextureLoader::Load( + StrUtils::UTF8ToUTF16(texPath).c_str(), deviceResources.GetD3DDevice()); if (!_textures[i]) { Logger::Get().Error(fmt::format("加载纹理 {} 失败", texDesc.source)); return false; @@ -138,7 +167,7 @@ bool EffectDrawer::Initialize( // 检查纹理格式是否匹配 D3D11_TEXTURE2D_DESC srcDesc{}; _textures[i]->GetDesc(&srcDesc); - if (srcDesc.Format != EffectHelper::FORMAT_DESCS[(UINT)texDesc.format].dxgiFormat) { + if (srcDesc.Format != EffectHelper::FORMAT_DESCS[(uint32_t)texDesc.format].dxgiFormat) { Logger::Get().Error("SOURCE 纹理格式不匹配"); return false; } @@ -161,7 +190,8 @@ bool EffectDrawer::Initialize( return false; } - _textures[i] = dr.CreateTexture2D( + _textures[i] = DirectXHelper::CreateTexture2D( + deviceResources.GetD3DDevice(), EffectHelper::FORMAT_DESCS[(UINT)texDesc.format].dxgiFormat, texSize.cx, texSize.cy, @@ -174,30 +204,13 @@ bool EffectDrawer::Initialize( } } - if (!isLastEffect) { - // 创建输出纹理 - _textures.back() = dr.CreateTexture2D( - DXGI_FORMAT_R8G8B8A8_UNORM, - outputSize.cx, - outputSize.cy, - D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS - ); - - if (!_textures.back()) { - Logger::Get().Error("创建纹理失败"); - return false; - } - } else { - _textures.back().copy_from(dr.GetBackBuffer()); - } - _shaders.resize(desc.passes.size()); _srvs.resize(desc.passes.size()); _uavs.resize(desc.passes.size()); for (UINT i = 0; i < _shaders.size(); ++i) { const EffectPassDesc& passDesc = desc.passes[i]; - HRESULT hr = d3dDevice->CreateComputeShader( + HRESULT hr = deviceResources.GetD3DDevice()->CreateComputeShader( passDesc.cso->GetBufferPointer(), passDesc.cso->GetBufferSize(), nullptr, _shaders[i].put()); if (FAILED(hr)) { Logger::Get().ComError("创建计算着色器失败", hr); @@ -206,61 +219,74 @@ bool EffectDrawer::Initialize( _srvs[i].resize(passDesc.inputs.size()); for (UINT j = 0; j < passDesc.inputs.size(); ++j) { - if (!dr.GetShaderResourceView(_textures[passDesc.inputs[j]].get(), &_srvs[i][j])) { + auto srv = _srvs[i][j] = descriptorStore.GetShaderResourceView(_textures[passDesc.inputs[j]].get()); + if (!srv) { Logger::Get().Error("GetShaderResourceView 失败"); return false; } } - if (!passDesc.outputs.empty()) { - _uavs[i].resize(passDesc.outputs.size() * 2); - for (UINT j = 0; j < passDesc.outputs.size(); ++j) { - if (!dr.GetUnorderedAccessView(_textures[passDesc.outputs[j]].get(), &_uavs[i][j])) { - Logger::Get().Error("GetUnorderedAccessView 失败"); - return false; - } - } - - D3D11_TEXTURE2D_DESC outputDesc; - _textures[passDesc.outputs[0]]->GetDesc(&outputDesc); - _dispatches.emplace_back( - (outputDesc.Width + passDesc.blockSize.first - 1) / passDesc.blockSize.first, - (outputDesc.Height + passDesc.blockSize.second - 1) / passDesc.blockSize.second - ); - } else { - // 最后一个 pass 输出到 OUTPUT - _uavs[i].resize(2); - if (!dr.GetUnorderedAccessView(_textures.back().get(), &_uavs[i][0])) { + _uavs[i].resize(passDesc.outputs.size() * 2); + for (UINT j = 0; j < passDesc.outputs.size(); ++j) { + auto uav = _uavs[i][j] = descriptorStore.GetUnorderedAccessView(_textures[passDesc.outputs[j]].get()); + if (!uav) { Logger::Get().Error("GetUnorderedAccessView 失败"); return false; } + } - D3D11_TEXTURE2D_DESC lastDesc; - _textures.back()->GetDesc(&lastDesc); + D3D11_TEXTURE2D_DESC outputDesc; + _textures[passDesc.outputs[0]]->GetDesc(&outputDesc); + _dispatches.emplace_back( + (outputDesc.Width + passDesc.blockSize.first - 1) / passDesc.blockSize.first, + (outputDesc.Height + passDesc.blockSize.second - 1) / passDesc.blockSize.second + ); + } - _dispatches.emplace_back( - (std::min(lastDesc.Width, (UINT)outputSize.cx) + passDesc.blockSize.first - 1) / passDesc.blockSize.first, - (std::min(lastDesc.Height, (UINT)outputSize.cy) + passDesc.blockSize.second - 1) / passDesc.blockSize.second - ); - } + if (!_InitializeConstants(desc, option, deviceResources, inputSize, outputSize)) { + Logger::Get().Error("_InitializeConstants 失败"); + return false; } - if (isLastEffect) { - // 为光标渲染预留空间 - _srvs.back().push_back(nullptr); + return true; +} - if (!dr.GetSampler( - MagApp::Get().GetOptions().cursorInterpolationMode == CursorInterpolationMode::NearestNeighbor ? D3D11_FILTER_MIN_MAG_MIP_POINT : D3D11_FILTER_MIN_MAG_MIP_LINEAR, - D3D11_TEXTURE_ADDRESS_CLAMP, - &_samplers.emplace_back(nullptr) - )) { - Logger::Get().Error("GetSampler 失败"); - return false; - } +void EffectDrawer::Draw(EffectsProfiler& profiler) const noexcept { + { + ID3D11Buffer* t = _constantBuffer.get(); + _d3dDC->CSSetConstantBuffers(0, 1, &t); } + _d3dDC->CSSetSamplers(0, (UINT)_samplers.size(), _samplers.data()); + + for (uint32_t i = 0; i < _dispatches.size(); ++i) { + _DrawPass(i); + profiler.OnEndPass(_d3dDC); + } +} + +void EffectDrawer::_DrawPass(uint32_t i) const noexcept { + _d3dDC->CSSetShader(_shaders[i].get(), nullptr, 0); + + _d3dDC->CSSetShaderResources(0, (UINT)_srvs[i].size(), _srvs[i].data()); + UINT uavCount = (UINT)_uavs[i].size() / 2; + _d3dDC->CSSetUnorderedAccessViews(0, uavCount, _uavs[i].data(), nullptr); + + _d3dDC->Dispatch(_dispatches[i].first, _dispatches[i].second, 1); + + _d3dDC->CSSetUnorderedAccessViews(0, uavCount, _uavs[i].data() + uavCount, nullptr); +} + +bool EffectDrawer::_InitializeConstants( + const EffectDesc& desc, + const EffectOption& option, + DeviceResources& deviceResources, + SIZE inputSize, + SIZE outputSize +) noexcept { + const bool isInlineParams = desc.flags & EffectFlags::InlineParams; // 大小必须为 4 的倍数 - size_t builtinConstantCount = isLastEffect ? 16 : 12; + const size_t builtinConstantCount = 10; size_t psStylePassParams = 0; for (UINT i = 0, end = (UINT)desc.passes.size() - 1; i < end; ++i) { if (desc.passes[i].isPSStyle) { @@ -268,14 +294,12 @@ bool EffectDrawer::Initialize( } } _constants.resize((builtinConstantCount + psStylePassParams + (isInlineParams ? 0 : desc.params.size()) + 3) / 4 * 4); - // cbuffer __CB2 : register(b1) { + // cbuffer __CB1 : register(b0) { // uint2 __inputSize; // uint2 __outputSize; // float2 __inputPt; // float2 __outputPt; // float2 __scale; - // int2 __viewport; - // [uint4 __offset;] // [PARAMETERS...] // ); _constants[0].uintVal = inputSize.cx; @@ -289,44 +313,6 @@ bool EffectDrawer::Initialize( _constants[8].floatVal = outputSize.cx / (FLOAT)inputSize.cx; _constants[9].floatVal = outputSize.cy / (FLOAT)inputSize.cy; - // 输出尺寸可能比主窗口更大 - RECT virtualOutputRect1{}; - RECT outputRect1{}; - - if (isLastEffect) { - virtualOutputRect1.left = (hostSize.cx - outputSize.cx) / 2; - virtualOutputRect1.top = (hostSize.cy - outputSize.cy) / 2; - virtualOutputRect1.right = virtualOutputRect1.left + outputSize.cx; - virtualOutputRect1.bottom = virtualOutputRect1.top + outputSize.cy; - - outputRect1 = RECT{ - std::max(0L, virtualOutputRect1.left), - std::max(0L, virtualOutputRect1.top), - std::min(hostSize.cx, virtualOutputRect1.right), - std::min(hostSize.cy, virtualOutputRect1.bottom) - }; - - _constants[12].intVal = -std::min(0L, virtualOutputRect1.left); - _constants[13].intVal = -std::min(0L, virtualOutputRect1.top); - _constants[10].intVal = outputRect1.right - outputRect1.left + _constants[12].intVal; - _constants[11].intVal = outputRect1.bottom - outputRect1.top + _constants[13].intVal; - _constants[14].intVal = outputRect1.left - _constants[12].intVal; - _constants[15].intVal = outputRect1.top - _constants[13].intVal; - } else { - outputRect1 = RECT{ 0, 0, outputSize.cx, outputSize.cy }; - virtualOutputRect1 = outputRect1; - - _constants[10].intVal = outputSize.cx; - _constants[11].intVal = outputSize.cy; - } - - if (outputRect) { - *outputRect = outputRect1; - } - if (virtualOutputRect) { - *virtualOutputRect = virtualOutputRect1; - } - // PS 样式的通道需要的参数 EffectHelper::Constant32* pCurParam = _constants.data() + builtinConstantCount; if (psStylePassParams > 0) { @@ -393,7 +379,7 @@ bool EffectDrawer::Initialize( D3D11_SUBRESOURCE_DATA initData{}; initData.pSysMem = _constants.data(); - HRESULT hr = dr.GetD3DDevice()->CreateBuffer(&bd, &initData, _constantBuffer.put()); + HRESULT hr = deviceResources.GetD3DDevice()->CreateBuffer(&bd, &initData, _constantBuffer.put()); if (FAILED(hr)) { Logger::Get().ComError("CreateBuffer 失败", hr); return false; @@ -402,56 +388,4 @@ bool EffectDrawer::Initialize( return true; } -void EffectDrawer::Draw(UINT& idx, bool noUpdate) { - auto d3dDC = MagApp::Get().GetDeviceResources().GetD3DDC(); - auto& gpuTimer = MagApp::Get().GetRenderer().GetGPUTimer(); - - { - ID3D11Buffer* t = _constantBuffer.get(); - d3dDC->CSSetConstantBuffers(1, 1, &t); - } - d3dDC->CSSetSamplers(0, (UINT)_samplers.size(), _samplers.data()); - - for (UINT i = 0; i < _dispatches.size(); ++i) { - // noUpdate 为真则只渲染最后一个通道 - if (!noUpdate || i == UINT(_dispatches.size() - 1)) { - _DrawPass(i); - } - - // 不渲染的通道也在 GPUTimer 中记录 - gpuTimer.OnEndPass(idx++); - } -} - -void EffectDrawer::_DrawPass(UINT i) { - auto d3dDC = MagApp::Get().GetDeviceResources().GetD3DDC(); - d3dDC->CSSetShader(_shaders[i].get(), nullptr, 0); - - if ((_desc.flags & EffectFlags::LastEffect) && i == _dispatches.size() - 1) { - // 最后一个效果的最后一个通道负责渲染光标 - - // 光标纹理 - CursorManager& cm = MagApp::Get().GetCursorManager(); - if (cm.HasCursor()) { - ID3D11Texture2D* cursorTex; - CursorManager::CursorType ct; - if (cm.GetCursorTexture(&cursorTex, ct)) { - if (!MagApp::Get().GetDeviceResources().GetShaderResourceView(cursorTex, &_srvs[i].back())) { - Logger::Get().Error("GetShaderResourceView 出错"); - } - } else { - Logger::Get().Error("GetCursorTexture 出错"); - } - } - } - - d3dDC->CSSetShaderResources(0, (UINT)_srvs[i].size(), _srvs[i].data()); - UINT uavCount = (UINT)_uavs[i].size() / 2; - d3dDC->CSSetUnorderedAccessViews(0, uavCount, _uavs[i].data(), nullptr); - - d3dDC->Dispatch(_dispatches[i].first, _dispatches[i].second, 1); - - d3dDC->CSSetUnorderedAccessViews(0, uavCount, _uavs[i].data() + uavCount, nullptr); -} - } diff --git a/src/Magpie.Core/EffectDrawer.h b/src/Magpie.Core/EffectDrawer.h index 995542da9..b2bec6d64 100644 --- a/src/Magpie.Core/EffectDrawer.h +++ b/src/Magpie.Core/EffectDrawer.h @@ -6,6 +6,9 @@ namespace Magpie::Core { struct EffectOption; +class DeviceResources; +class BackendDescriptorStore; +class EffectsProfiler; class EffectDrawer { public: @@ -16,29 +19,25 @@ class EffectDrawer { bool Initialize( const EffectDesc& desc, const EffectOption& option, - ID3D11Texture2D* inputTex, - RECT* outputRect = nullptr, - RECT* virtualOutputRect = nullptr - ); + DeviceResources& deviceResources, + BackendDescriptorStore& descriptorStore, + ID3D11Texture2D** inOutTexture + ) noexcept; - void Draw(UINT& idx, bool noUpdate = false); - - bool IsUseDynamic() const noexcept { - return _desc.flags & EffectFlags::UseDynamic; - } - - const EffectDesc& GetDesc() const noexcept { - return _desc; - } - - ID3D11Texture2D* GetOutputTexture() const noexcept { - return _textures.empty() ? nullptr : _textures.back().get(); - } + void Draw(EffectsProfiler& profiler) const noexcept; private: - void _DrawPass(UINT i); + bool _InitializeConstants( + const EffectDesc& desc, + const EffectOption& option, + DeviceResources& deviceResources, + SIZE inputSize, + SIZE outputSize + ) noexcept; + + void _DrawPass(uint32_t i) const noexcept; - EffectDesc _desc; + ID3D11DeviceContext* _d3dDC = nullptr; SmallVector _samplers; SmallVector> _textures; @@ -51,7 +50,7 @@ class EffectDrawer { SmallVector> _shaders; - SmallVector> _dispatches; + SmallVector> _dispatches; }; } diff --git a/src/Magpie.Core/EffectHelper.h b/src/Magpie.Core/EffectHelper.h index 29d88aea3..a4a3d2a27 100644 --- a/src/Magpie.Core/EffectHelper.h +++ b/src/Magpie.Core/EffectHelper.h @@ -2,6 +2,8 @@ #include #include +namespace Magpie::Core { + struct EffectHelper { struct EffectIntermediateTextureFormatDesc { const char* name; @@ -41,3 +43,5 @@ struct EffectHelper { int intVal; }; }; + +} diff --git a/src/Magpie.Core/EffectsProfiler.cpp b/src/Magpie.Core/EffectsProfiler.cpp new file mode 100644 index 000000000..da1e8f63d --- /dev/null +++ b/src/Magpie.Core/EffectsProfiler.cpp @@ -0,0 +1,99 @@ +#include "pch.h" +#include "EffectsProfiler.h" +#include "DeviceResources.h" +#include + +namespace Magpie::Core { + +void EffectsProfiler::Start(ID3D11Device* d3dDevice, uint32_t passCount) { + assert(_passQueries.empty()); + _passQueries.resize(passCount); + + D3D11_QUERY_DESC desc{ .Query = D3D11_QUERY_TIMESTAMP_DISJOINT }; + d3dDevice->CreateQuery(&desc, _disjointQuery.put()); + + desc.Query = D3D11_QUERY_TIMESTAMP; + d3dDevice->CreateQuery(&desc, _startQuery.put()); + for (winrt::com_ptr& query : _passQueries) { + d3dDevice->CreateQuery(&desc, query.put()); + } +} + +void EffectsProfiler::Stop() { + _disjointQuery = nullptr; + _startQuery = nullptr; + _passQueries.clear(); +} + +void EffectsProfiler::OnBeginEffects(ID3D11DeviceContext* d3dDC) { + if (_passQueries.empty()) { + return; + } + + d3dDC->Begin(_disjointQuery.get()); + d3dDC->End(_startQuery.get()); + + _curPass = 0; +} + +void EffectsProfiler::OnEndPass(ID3D11DeviceContext* d3dDC) { + if (_passQueries.empty()) { + return; + } + + d3dDC->End(_passQueries[_curPass++].get()); +} + +void EffectsProfiler::OnEndEffects(ID3D11DeviceContext* d3dDC) { + if (_passQueries.empty()) { + return; + } + + d3dDC->End(_disjointQuery.get()); +} + +template +static T GetQueryData(ID3D11DeviceContext* d3dDC, ID3D11Query* query) noexcept { + T data{}; + while (d3dDC->GetData(query, &data, sizeof(data), 0) != S_OK) { + Sleep(0); + } + return data; +} + +void EffectsProfiler::QueryTimings(ID3D11DeviceContext* d3dDC) noexcept { + if (_passQueries.empty()) { + return; + } + + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjointData = + GetQueryData(d3dDC, _disjointQuery.get()); + + if (disjointData.Disjoint) { + return; + } + + const float toMS = 1000.0f / disjointData.Frequency; + + uint64_t prevTimestamp = GetQueryData(d3dDC, _startQuery.get()); + + std::scoped_lock lk(_timingsMutex); + _timings.resize(_passQueries.size()); + for (size_t i = 0; i < _passQueries.size(); ++i) { + uint64_t timestamp = GetQueryData(d3dDC, _passQueries[i].get()); + _timings[i] = (timestamp - prevTimestamp) * toMS; + + prevTimestamp = timestamp; + } +} + +SmallVector EffectsProfiler::GetTimings() noexcept { + std::scoped_lock lk(_timingsMutex); + + // 没有渲染新帧时 _timings 为空 + SmallVector result = std::move(_timings); + _timings.clear(); + return result; +} + +} diff --git a/src/Magpie.Core/EffectsProfiler.h b/src/Magpie.Core/EffectsProfiler.h new file mode 100644 index 000000000..b39f0e1dc --- /dev/null +++ b/src/Magpie.Core/EffectsProfiler.h @@ -0,0 +1,42 @@ +#pragma once +#include "SmallVector.h" +#include "Win32Utils.h" + +namespace Magpie::Core { + +class DeviceResources; + +class EffectsProfiler { +public: + EffectsProfiler() = default; + + EffectsProfiler(const EffectsProfiler&) = delete; + EffectsProfiler(EffectsProfiler&&) = delete; + + void Start(ID3D11Device* d3dDevice, uint32_t passCount); + + void Stop(); + + void OnBeginEffects(ID3D11DeviceContext* d3dDC); + + void OnEndPass(ID3D11DeviceContext* d3dDC); + + void OnEndEffects(ID3D11DeviceContext* d3dDC); + + void QueryTimings(ID3D11DeviceContext* d3dDC) noexcept; + + // 从前端线程调用 + SmallVector GetTimings() noexcept; + +private: + SmallVector _timings; + Win32Utils::SRWMutex _timingsMutex; + + winrt::com_ptr _disjointQuery; + winrt::com_ptr _startQuery; + std::vector> _passQueries; + + uint32_t _curPass = 0; +}; + +} diff --git a/src/Magpie.Core/ExclModeHack.h b/src/Magpie.Core/ExclModeHack.h deleted file mode 100644 index 9561f64e3..000000000 --- a/src/Magpie.Core/ExclModeHack.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once -#include "Win32Utils.h" - -namespace Magpie::Core { - -class ExclModeHack { -public: - ExclModeHack(); - - ~ExclModeHack(); - -private: - Win32Utils::ScopedHandle _exclModeMutex; -}; - -} diff --git a/src/Magpie.Core/ExclModeHack.cpp b/src/Magpie.Core/ExclModeHelper.cpp similarity index 64% rename from src/Magpie.Core/ExclModeHack.cpp rename to src/Magpie.Core/ExclModeHelper.cpp index dc24145ac..cf7e0f53a 100644 --- a/src/Magpie.Core/ExclModeHack.cpp +++ b/src/Magpie.Core/ExclModeHelper.cpp @@ -1,64 +1,66 @@ #include "pch.h" -#include "ExclModeHack.h" -#include "MagApp.h" +#include "ExclModeHelper.h" #include "Logger.h" - namespace Magpie::Core { // 模拟 D3D 独占全屏模式,以起到免打扰的效果 // SHQueryUserNotificationState 通常被用来检测是否有 D3D 游戏独占全屏,以确定是否应该向用户推送通知/弹窗 // 此函数内部使用名为 __DDrawExclMode__ 的 mutex 检测独占全屏,因此这里直接获取该 mutex 以模拟独占全屏 -// 感谢 @codehz 提供的思路 https://github.com/Blinue/Magpie/issues/245 -ExclModeHack::ExclModeHack() { +// 感谢 @codehz 提供的思路 GH#245 +Win32Utils::ScopedHandle ExclModeHelper::EnterExclMode() noexcept { + Win32Utils::ScopedHandle exclModeMutex; + QUERY_USER_NOTIFICATION_STATE state; HRESULT hr = SHQueryUserNotificationState(&state); if (FAILED(hr)) { Logger::Get().ComError("SHQueryUserNotificationState 失败", hr); - return; + return exclModeMutex; } // 操作系统将 Magpie 的缩放窗口视为全屏应用程序,可能已经启用了“请勿打扰”,即 QUNS_BUSY。 // 但我们想要的是 QUNS_RUNNING_D3D_FULL_SCREEN if (state == QUNS_RUNNING_D3D_FULL_SCREEN) { Logger::Get().Info("已处于免打扰状态"); - return; + return exclModeMutex; } - - _exclModeMutex.reset(Win32Utils::SafeHandle( + + exclModeMutex.reset(Win32Utils::SafeHandle( OpenMutex(SYNCHRONIZE, FALSE, L"__DDrawExclMode__"))); - if (!_exclModeMutex) { + if (!exclModeMutex) { Logger::Get().Win32Error("OpenMutex 失败"); - return; + return exclModeMutex; } - DWORD result = WaitForSingleObject(_exclModeMutex.get(), 0); + DWORD result = WaitForSingleObject(exclModeMutex.get(), 0); if (result != WAIT_OBJECT_0) { Logger::Get().Error("获取 __DDrawExclMode__ 失败"); - _exclModeMutex.reset(); - return; + exclModeMutex.reset(); + return exclModeMutex; } hr = SHQueryUserNotificationState(&state); if (FAILED(hr)) { Logger::Get().ComError("SHQueryUserNotificationState 失败", hr); - ReleaseMutex(_exclModeMutex.get()); - _exclModeMutex.reset(); - return; + ReleaseMutex(exclModeMutex.get()); + exclModeMutex.reset(); + return exclModeMutex; } if (state != QUNS_RUNNING_D3D_FULL_SCREEN) { Logger::Get().Error("模拟独占全屏失败"); - ReleaseMutex(_exclModeMutex.get()); - _exclModeMutex.reset(); - return; + ReleaseMutex(exclModeMutex.get()); + exclModeMutex.reset(); + return exclModeMutex; } Logger::Get().Info("模拟独占全屏成功"); + return exclModeMutex; } -ExclModeHack::~ExclModeHack() { - if (_exclModeMutex) { - ReleaseMutex(_exclModeMutex.get()); +void ExclModeHelper::ExitExclMode(Win32Utils::ScopedHandle& mutex) noexcept { + if (mutex) { + ReleaseMutex(mutex.get()); + mutex.reset(); } } diff --git a/src/Magpie.Core/ExclModeHelper.h b/src/Magpie.Core/ExclModeHelper.h new file mode 100644 index 000000000..40d5d1c94 --- /dev/null +++ b/src/Magpie.Core/ExclModeHelper.h @@ -0,0 +1,11 @@ +#pragma once +#include "Win32Utils.h" + +namespace Magpie::Core { + +struct ExclModeHelper { + static Win32Utils::ScopedHandle EnterExclMode() noexcept; + static void ExitExclMode(Win32Utils::ScopedHandle& mutex) noexcept; +}; + +} diff --git a/src/Magpie.Core/FrameSourceBase.cpp b/src/Magpie.Core/FrameSourceBase.cpp index fa24945c7..80619344d 100644 --- a/src/Magpie.Core/FrameSourceBase.cpp +++ b/src/Magpie.Core/FrameSourceBase.cpp @@ -1,24 +1,35 @@ #include "pch.h" #include "FrameSourceBase.h" +#include "ScalingOptions.h" +#include "Logger.h" #include "Win32Utils.h" #include "Utils.h" -#include "MagApp.h" -#include "Logger.h" -#include "CommonSharedConstants.h" #include "SmallVector.h" - +#include "DirectXHelper.h" +#include "DeviceResources.h" +#include "shaders/DuplicateFrameCS.h" +#include "ScalingWindow.h" +#include "BackendDescriptorStore.h" namespace Magpie::Core { -FrameSourceBase::~FrameSourceBase() { - HWND hwndSrc = MagApp::Get().GetHwndSrc(); +static constexpr const uint16_t INITIAL_CHECK_COUNT = 16; +static constexpr const uint16_t INITIAL_SKIP_COUNT = 1; +static constexpr const uint16_t MAX_SKIP_COUNT = 16; + +FrameSourceBase::FrameSourceBase() noexcept : + _nextSkipCount(INITIAL_SKIP_COUNT), _framesLeft(INITIAL_CHECK_COUNT) {} + +FrameSourceBase::~FrameSourceBase() noexcept { + const HWND hwndSrc = ScalingWindow::Get().HwndSrc(); // 还原窗口圆角 if (_roundCornerDisabled) { _roundCornerDisabled = false; INT attr = DWMWCP_DEFAULT; - HRESULT hr = DwmSetWindowAttribute(hwndSrc, DWMWA_WINDOW_CORNER_PREFERENCE, &attr, sizeof(attr)); + HRESULT hr = DwmSetWindowAttribute( + hwndSrc, DWMWA_WINDOW_CORNER_PREFERENCE, &attr, sizeof(attr)); if (FAILED(hr)) { Logger::Get().ComError("取消禁用窗口圆角失败", hr); } else { @@ -28,36 +39,35 @@ FrameSourceBase::~FrameSourceBase() { // 还原窗口大小调整 if (_windowResizingDisabled) { - // 缩放 Magpie 主窗口时会在 SetWindowLongPtr 中卡住,似乎是 Win11 的 bug - // 将在 MagService::_MagRuntime_IsRunningChanged 还原主窗口样式 - if (Win32Utils::GetWndClassName(hwndSrc) != CommonSharedConstants::MAIN_WINDOW_CLASS_NAME) { - LONG_PTR style = GetWindowLongPtr(hwndSrc, GWL_STYLE); - if (!(style & WS_THICKFRAME)) { - if (SetWindowLongPtr(hwndSrc, GWL_STYLE, style | WS_THICKFRAME)) { - if (!SetWindowPos(hwndSrc, 0, 0, 0, 0, 0, - SWP_NOMOVE | SWP_NOSIZE | SWP_NOZORDER | SWP_FRAMECHANGED)) { - Logger::Get().Win32Error("SetWindowPos 失败"); - } - - Logger::Get().Info("已取消禁用窗口大小调整"); - } else { - Logger::Get().Win32Error("取消禁用窗口大小调整失败"); + LONG_PTR style = GetWindowLongPtr(hwndSrc, GWL_STYLE); + if (!(style & WS_THICKFRAME)) { + if (SetWindowLongPtr(hwndSrc, GWL_STYLE, style | WS_THICKFRAME)) { + if (!SetWindowPos(hwndSrc, 0, 0, 0, 0, 0, + SWP_NOMOVE | SWP_NOSIZE | SWP_NOZORDER | SWP_FRAMECHANGED)) { + Logger::Get().Win32Error("SetWindowPos 失败"); } + + Logger::Get().Info("已取消禁用窗口大小调整"); + } else { + Logger::Get().Win32Error("取消禁用窗口大小调整失败"); } } } } -bool FrameSourceBase::Initialize() { - HWND hwndSrc = MagApp::Get().GetHwndSrc(); +bool FrameSourceBase::Initialize(DeviceResources& deviceResources, BackendDescriptorStore& descriptorStore) noexcept { + _deviceResources = &deviceResources; + _descriptorStore = &descriptorStore; + + const HWND hwndSrc = ScalingWindow::Get().HwndSrc(); // 禁用窗口大小调整 - if (MagApp::Get().GetOptions().IsDisableWindowResizing()) { + if (ScalingWindow::Get().Options().IsWindowResizingDisabled()) { LONG_PTR style = GetWindowLongPtr(hwndSrc, GWL_STYLE); if (style & WS_THICKFRAME) { if (SetWindowLongPtr(hwndSrc, GWL_STYLE, style ^ WS_THICKFRAME)) { // 不重绘边框,以防某些窗口状态不正确 - // if (!SetWindowPos(hwndSrc, 0, 0, 0, 0, 0, + // if (!SetWindowPos(HwndSrc, 0, 0, 0, 0, 0, // SWP_NOMOVE | SWP_NOSIZE | SWP_NOZORDER | SWP_FRAMECHANGED)) { // SPDLOG_LOGGER_ERROR(logger, MakeWin32ErrorMsg("SetWindowPos 失败")); // } @@ -74,7 +84,8 @@ bool FrameSourceBase::Initialize() { if (_HasRoundCornerInWin11()) { if (Win32Utils::GetOSVersion().IsWin11()) { INT attr = DWMWCP_DONOTROUND; - HRESULT hr = DwmSetWindowAttribute(hwndSrc, DWMWA_WINDOW_CORNER_PREFERENCE, &attr, sizeof(attr)); + HRESULT hr = DwmSetWindowAttribute( + hwndSrc, DWMWA_WINDOW_CORNER_PREFERENCE, &attr, sizeof(attr)); if (FAILED(hr)) { Logger::Get().ComError("禁用窗口圆角失败", hr); } else { @@ -84,10 +95,307 @@ bool FrameSourceBase::Initialize() { } } + if (!_Initialize()) { + Logger::Get().Error("_Initialize 失败"); + return false; + } + + assert(_output); + _outputSrv = descriptorStore.GetShaderResourceView(_output.get()); + if (!_outputSrv) { + Logger::Get().Error("GetShaderResourceView 失败"); + return false; + } + + return true; +} + +FrameSourceBase::UpdateState FrameSourceBase::Update() noexcept { + const UpdateState state = _Update(); + + const ScalingOptions& options = ScalingWindow::Get().Options(); + const auto duplicateFrameDetectionMode = options.duplicateFrameDetectionMode; + if (state != UpdateState::NewFrame || options.Is3DGameMode() || + duplicateFrameDetectionMode == DuplicateFrameDetectionMode::Never) { + return state; + } + + ID3D11DeviceContext4* d3dDC = _deviceResources->GetD3DDC(); + + if (!_prevFrame) { + if (_InitCheckingForDuplicateFrame()) { + d3dDC->CopyResource(_prevFrame.get(), _output.get()); + } else { + Logger::Get().Error("_InitCheckingForDuplicateFrame 失败"); + _prevFrame = nullptr; + _prevFrameSrv = nullptr; + } + + return UpdateState::NewFrame; + } + + if (duplicateFrameDetectionMode == DuplicateFrameDetectionMode::Always) { + // 总是检查重复帧 + if (_IsDuplicateFrame()) { + return UpdateState::Waiting; + } else { + d3dDC->CopyResource(_prevFrame.get(), _output.get()); + return UpdateState::NewFrame; + } + } + + /////////////////////////////////////////////// + // + // 动态检查重复帧,见 #787 + // + /////////////////////////////////////////////// + + const bool isStatisticsEnabled = options.IsStatisticsForDynamicDetectionEnabled(); + + if (_isCheckingForDuplicateFrame) { + if (--_framesLeft == 0) { + _isCheckingForDuplicateFrame = false; + _framesLeft = _nextSkipCount; + if (_nextSkipCount < MAX_SKIP_COUNT) { + // 增加下一次连续跳过检查的帧数 + ++_nextSkipCount; + } + } + + if (_IsDuplicateFrame()) { + _isCheckingForDuplicateFrame = true; + _framesLeft = INITIAL_CHECK_COUNT; + _nextSkipCount = INITIAL_SKIP_COUNT; + return UpdateState::Waiting; + } else { + if (_isCheckingForDuplicateFrame || isStatisticsEnabled) { + d3dDC->CopyResource(_prevFrame.get(), _output.get()); + } + return UpdateState::NewFrame; + } + } else { + if (--_framesLeft == 0) { + _isCheckingForDuplicateFrame = true; + // 第 2 次连续检查 10 帧,之后逐渐减少,从第 16 次开始只连续检查 2 帧 + _framesLeft = uint32_t((-4 * (int)_nextSkipCount + 78) / 7); + + if (!isStatisticsEnabled) { + // 下一帧将检查重复帧,需要复制此帧 + d3dDC->CopyResource(_prevFrame.get(), _output.get()); + } + } + + if (isStatisticsEnabled) { + const bool isDuplicate = _IsDuplicateFrame(); + if (!isDuplicate) { + d3dDC->CopyResource(_prevFrame.get(), _output.get()); + } + + std::pair statistics = _statistics.load(std::memory_order_relaxed); + if (isDuplicate) { + // 预测错误 + ++statistics.first; + } + // 总帧数 + ++statistics.second; + _statistics.store(statistics, std::memory_order_relaxed); + } + + return UpdateState::NewFrame; + } +} + +std::pair FrameSourceBase::GetStatisticsForDynamicDetection() const noexcept { + return _statistics.load(std::memory_order_relaxed); +} + +struct EnumChildWndParam { + const wchar_t* clientWndClassName = nullptr; + SmallVector childWindows; +}; + +static BOOL CALLBACK EnumChildProc( + _In_ HWND hwnd, + _In_ LPARAM lParam +) { + std::wstring className = Win32Utils::GetWndClassName(hwnd); + + EnumChildWndParam* param = (EnumChildWndParam*)lParam; + if (className == param->clientWndClassName) { + param->childWindows.push_back(hwnd); + } + + return TRUE; +} + +static HWND FindClientWindowOfUWP(HWND hwndSrc, const wchar_t* clientWndClassName) noexcept { + // 查找所有窗口类名为 ApplicationFrameInputSinkWindow 的子窗口 + // 该子窗口一般为客户区 + EnumChildWndParam param{}; + param.clientWndClassName = clientWndClassName; + EnumChildWindows(hwndSrc, EnumChildProc, (LPARAM)¶m); + + if (param.childWindows.empty()) { + // 未找到符合条件的子窗口 + return hwndSrc; + } + + if (param.childWindows.size() == 1) { + return param.childWindows[0]; + } + + // 如果有多个匹配的子窗口,取最大的(一般不会出现) + int maxSize = 0, maxIdx = 0; + for (int i = 0; i < param.childWindows.size(); ++i) { + RECT rect; + if (!GetClientRect(param.childWindows[i], &rect)) { + continue; + } + + int size = rect.right - rect.left + rect.bottom - rect.top; + if (size > maxSize) { + maxSize = size; + maxIdx = i; + } + } + + return param.childWindows[maxIdx]; +} + +static bool GetClientRectOfUWP(HWND hWnd, RECT& rect) noexcept { + std::wstring className = Win32Utils::GetWndClassName(hWnd); + if (className != L"ApplicationFrameWindow" && className != L"Windows.UI.Core.CoreWindow") { + return false; + } + + // 客户区窗口类名为 ApplicationFrameInputSinkWindow + HWND hwndClient = FindClientWindowOfUWP(hWnd, L"ApplicationFrameInputSinkWindow"); + if (!hwndClient) { + return false; + } + + if (!Win32Utils::GetClientScreenRect(hwndClient, rect)) { + Logger::Get().Win32Error("GetClientScreenRect 失败"); + return false; + } + + return true; +} + +// 获取窗口上边框高度,不适用于最大化的窗口 +static uint32_t GetTopBorderHeight(HWND hWnd, const RECT& clientRect, const RECT& windowRect) noexcept { + // 检查该窗口是否禁用了非客户区域的绘制 + BOOL hasBorder = TRUE; + HRESULT hr = DwmGetWindowAttribute(hWnd, DWMWA_NCRENDERING_ENABLED, &hasBorder, sizeof(hasBorder)); + if (FAILED(hr)) { + Logger::Get().ComError("DwmGetWindowAttribute 失败", hr); + return 0; + } + + if (!hasBorder) { + return 0; + } + + // 如果左右下三边均存在边框,那么应视为存在上边框: + // * Win10 中窗口很可能绘制了假的上边框,这是很常见的创建无边框窗口的方法 + // * Win11 中 DWM 会将上边框绘制到客户区 + if (windowRect.top == clientRect.top && (windowRect.left == clientRect.left || + windowRect.right == clientRect.right || windowRect.bottom == clientRect.bottom)) { + return 0; + } + + if (Win32Utils::GetOSVersion().IsWin11()) { + uint32_t borderThickness = 0; + hr = DwmGetWindowAttribute(hWnd, DWMWA_VISIBLE_FRAME_BORDER_THICKNESS, &borderThickness, sizeof(borderThickness)); + if (FAILED(hr)) { + Logger::Get().ComError("DwmGetWindowAttribute 失败", hr); + return 0; + } + + return borderThickness; + } else { + return 1; + } +} + +bool FrameSourceBase::_CalcSrcRect() noexcept { + const ScalingOptions& options = ScalingWindow::Get().Options(); + const HWND hwndSrc = ScalingWindow::Get().HwndSrc(); + + if (options.IsCaptureTitleBar() && _CanCaptureTitleBar()) { + if (!Win32Utils::GetWindowFrameRect(hwndSrc, _srcRect)) { + Logger::Get().Error("GetWindowFrameRect 失败"); + return false; + } + + RECT clientRect; + if (!Win32Utils::GetClientScreenRect(hwndSrc, clientRect)) { + Logger::Get().Win32Error("GetClientScreenRect 失败"); + return false; + } + + // 左右下三边裁剪至客户区 + _srcRect.left = std::max(_srcRect.left, clientRect.left); + _srcRect.right = std::min(_srcRect.right, clientRect.right); + _srcRect.bottom = std::min(_srcRect.bottom, clientRect.bottom); + + if (Win32Utils::GetWindowShowCmd(hwndSrc) == SW_SHOWNORMAL) { + // 裁剪上边框 + RECT windowRect; + if (!GetWindowRect(hwndSrc, &windowRect)) { + Logger::Get().Win32Error("GetWindowRect 失败"); + return false; + } + _srcRect.top += GetTopBorderHeight(hwndSrc, clientRect, windowRect); + } + } else { + if (!GetClientRectOfUWP(hwndSrc, _srcRect)) { + if (!Win32Utils::GetClientScreenRect(hwndSrc, _srcRect)) { + Logger::Get().Error("GetClientScreenRect 失败"); + return false; + } + } + + if (Win32Utils::GetWindowShowCmd(hwndSrc) == SW_SHOWMAXIMIZED) { + // 最大化的窗口可能有一部分客户区在屏幕外面 + HMONITOR hMon = MonitorFromWindow(hwndSrc, MONITOR_DEFAULTTONEAREST); + MONITORINFO mi{ .cbSize = sizeof(mi) }; + if (!GetMonitorInfo(hMon, &mi)) { + Logger::Get().Win32Error("GetMonitorInfo 失败"); + return false; + } + + IntersectRect(&_srcRect, &_srcRect, &mi.rcWork); + } else { + RECT windowRect; + if (!GetWindowRect(hwndSrc, &windowRect)) { + Logger::Get().Win32Error("GetWindowRect 失败"); + return false; + } + + // 如果上边框在客户区内,则裁剪上边框 + if (windowRect.top == _srcRect.top) { + _srcRect.top += GetTopBorderHeight(hwndSrc, _srcRect, windowRect); + } + } + } + + _srcRect = { + std::lround(_srcRect.left + options.cropping.Left), + std::lround(_srcRect.top + options.cropping.Top), + std::lround(_srcRect.right - options.cropping.Right), + std::lround(_srcRect.bottom - options.cropping.Bottom) + }; + + if (_srcRect.right - _srcRect.left <= 0 || _srcRect.bottom - _srcRect.top <= 0) { + Logger::Get().Error("裁剪窗口失败"); + return false; + } + return true; } -bool FrameSourceBase::_GetMapToOriginDPI(HWND hWnd, double& a, double& bx, double& by) { +bool FrameSourceBase::_GetMapToOriginDPI(HWND hWnd, double& a, double& bx, double& by) noexcept { // HDC 中的 HBITMAP 尺寸为窗口的原始尺寸 // 通过 GetWindowRect 获得的尺寸为窗口的 DPI 缩放后尺寸 // 它们的商即为窗口的 DPI 缩放 @@ -163,10 +471,10 @@ bool FrameSourceBase::_GetMapToOriginDPI(HWND hWnd, double& a, double& bx, doubl return true; } -bool FrameSourceBase::_CenterWindowIfNecessary(HWND hWnd, const RECT& rcWork) { +bool FrameSourceBase::_CenterWindowIfNecessary(HWND hWnd, const RECT& rcWork) noexcept { RECT srcRect; - if (!GetWindowRect(hWnd, &srcRect)) { - Logger::Get().Win32Error("GetWindowRect 失败"); + if (!Win32Utils::GetWindowFrameRect(hWnd, srcRect)) { + Logger::Get().Error("GetWindowFrameRect 失败"); return false; } @@ -177,7 +485,6 @@ bool FrameSourceBase::_CenterWindowIfNecessary(HWND hWnd, const RECT& rcWork) { SIZE rcWorkSize = { rcWork.right - rcWork.left, rcWork.bottom - rcWork.top }; if (srcSize.cx > rcWorkSize.cx || srcSize.cy > rcWorkSize.cy) { // 源窗口无法被当前屏幕容纳,因此无法捕获 - //MagApp::Get().SetErrorMsg(ErrorMessages::SRC_TOO_LARGE); return false; } @@ -197,105 +504,97 @@ bool FrameSourceBase::_CenterWindowIfNecessary(HWND hWnd, const RECT& rcWork) { return true; } -struct EnumChildWndParam { - const wchar_t* clientWndClassName = nullptr; - SmallVector childWindows; -}; +bool FrameSourceBase::_InitCheckingForDuplicateFrame() { + ID3D11Device5* d3dDevice = _deviceResources->GetD3DDevice(); -static BOOL CALLBACK EnumChildProc( - _In_ HWND hwnd, - _In_ LPARAM lParam -) { - std::wstring className = Win32Utils::GetWndClassName(hwnd); + D3D11_TEXTURE2D_DESC td; + _output->GetDesc(&td); - EnumChildWndParam* param = (EnumChildWndParam*)lParam; - if (className == param->clientWndClassName) { - param->childWindows.push_back(hwnd); + _prevFrame = DirectXHelper::CreateTexture2D( + d3dDevice, td.Format, td.Width, td.Height, D3D11_BIND_SHADER_RESOURCE); + if (!_prevFrame) { + return false; } - return TRUE; -} - -static HWND FindClientWindow(HWND hwndSrc, const wchar_t* clientWndClassName) { - // 查找所有窗口类名为 ApplicationFrameInputSinkWindow 的子窗口 - // 该子窗口一般为客户区 - EnumChildWndParam param{}; - param.clientWndClassName = clientWndClassName; - EnumChildWindows(hwndSrc, EnumChildProc, (LPARAM)¶m); + HRESULT hr = d3dDevice->CreateShaderResourceView(_prevFrame.get(), nullptr, _prevFrameSrv.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateShaderResourceView 失败", hr); + return false; + } - if (param.childWindows.empty()) { - // 未找到符合条件的子窗口 - return hwndSrc; + D3D11_BUFFER_DESC bd{ + .ByteWidth = 4, + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = D3D11_BIND_UNORDERED_ACCESS, + .StructureByteStride = 4 + }; + hr = d3dDevice->CreateBuffer(&bd, nullptr, _resultBuffer.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateBuffer 失败", hr); + return false; } - if (param.childWindows.size() == 1) { - return param.childWindows[0]; + _resultBufferUav = _descriptorStore->GetUnorderedAccessView( + _resultBuffer.get(), 1, DXGI_FORMAT_R32_UINT); + if (!_resultBufferUav) { + Logger::Get().ComError("GetUnorderedAccessView 失败", hr); + return false; } - // 如果有多个匹配的子窗口,取最大的(一般不会出现) - int maxSize = 0, maxIdx = 0; - for (int i = 0; i < param.childWindows.size(); ++i) { - RECT rect; - if (!GetClientRect(param.childWindows[i], &rect)) { - continue; - } + bd.Usage = D3D11_USAGE_STAGING; + bd.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + bd.BindFlags = 0; + hr = d3dDevice->CreateBuffer(&bd, nullptr, _readBackBuffer.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateBuffer 失败", hr); + return false; + } - int size = rect.right - rect.left + rect.bottom - rect.top; - if (size > maxSize) { - maxSize = size; - maxIdx = i; - } + hr = d3dDevice->CreateComputeShader( + DuplicateFrameCS, sizeof(DuplicateFrameCS), nullptr, _dupFrameCS.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateComputeShader 失败", hr); + return false; } - return param.childWindows[maxIdx]; + static constexpr std::pair BLOCK_SIZE{ 16, 16 }; + _dispatchCount.first = (td.Width + BLOCK_SIZE.first - 1) / BLOCK_SIZE.first; + _dispatchCount.second = (td.Height + BLOCK_SIZE.second - 1) / BLOCK_SIZE.second; + + return true; } -bool FrameSourceBase::_UpdateSrcFrameRect() { - _srcFrameRect = {}; +bool FrameSourceBase::_IsDuplicateFrame() { + // 检查是否和前一帧相同 + ID3D11DeviceContext4* d3dDC = _deviceResources->GetD3DDC(); - HWND hwndSrc = MagApp::Get().GetHwndSrc(); + ID3D11ShaderResourceView* srvs[]{ _outputSrv, _prevFrameSrv.get() }; + d3dDC->CSSetShaderResources(0, 2, srvs); - if (MagApp::Get().GetOptions().IsCaptureTitleBar() && _CanCaptureTitleBar()) { - if (!Win32Utils::GetWindowFrameRect(hwndSrc, _srcFrameRect)) { - Logger::Get().Win32Error("GetClientScreenRect 失败"); - return false; - } - } else { - std::wstring className = Win32Utils::GetWndClassName(hwndSrc); - if (className == L"ApplicationFrameWindow" || className == L"Windows.UI.Core.CoreWindow") { - // "Modern App" - // 客户区窗口类名为 ApplicationFrameInputSinkWindow - HWND hwndClient = FindClientWindow(hwndSrc, L"ApplicationFrameInputSinkWindow"); - if (hwndClient) { - if (!Win32Utils::GetClientScreenRect(hwndClient, _srcFrameRect)) { - Logger::Get().Win32Error("GetClientScreenRect 失败"); - } - } - } + ID3D11SamplerState* sam = _deviceResources->GetSampler( + D3D11_FILTER_MIN_MAG_MIP_POINT, D3D11_TEXTURE_ADDRESS_CLAMP); + d3dDC->CSSetSamplers(0, 1, &sam); - if (_srcFrameRect == RECT{}) { - if (!Win32Utils::GetClientScreenRect(hwndSrc, _srcFrameRect)) { - Logger::Get().Win32Error("GetClientScreenRect 失败"); - return false; - } - } - } + // 将缓冲区置零 + static constexpr UINT ZERO[4]{}; + d3dDC->ClearUnorderedAccessViewUint(_resultBufferUav, ZERO); + d3dDC->CSSetUnorderedAccessViews(0, 1, &_resultBufferUav, nullptr); - const Cropping& cropping = MagApp::Get().GetOptions().cropping; - _srcFrameRect = { - std::lround(_srcFrameRect.left + cropping.Left), - std::lround(_srcFrameRect.top + cropping.Top), - std::lround(_srcFrameRect.right - cropping.Right), - std::lround(_srcFrameRect.bottom - cropping.Bottom) - }; + d3dDC->CSSetShader(_dupFrameCS.get(), nullptr, 0); - if (_srcFrameRect.right - _srcFrameRect.left <= 0 || _srcFrameRect.bottom - _srcFrameRect.top <= 0) { - //App::Get().SetErrorMsg(ErrorMessages::FAILED_TO_CROP); - Logger::Get().Error("裁剪窗口失败"); - return false; - } + d3dDC->Dispatch(_dispatchCount.first, _dispatchCount.second, 1); - return true; + // 取回结果 + d3dDC->CopyResource(_readBackBuffer.get(), _resultBuffer.get()); + + uint32_t result = 1; + D3D11_MAPPED_SUBRESOURCE ms; + HRESULT hr = d3dDC->Map(_readBackBuffer.get(), 0, D3D11_MAP_READ, 0, &ms); + if (SUCCEEDED(hr)) { + result = *(uint32_t*)ms.pData; + d3dDC->Unmap(_readBackBuffer.get(), 0); + } + return result == 0; } } diff --git a/src/Magpie.Core/FrameSourceBase.h b/src/Magpie.Core/FrameSourceBase.h index 6978103af..cc73f522d 100644 --- a/src/Magpie.Core/FrameSourceBase.h +++ b/src/Magpie.Core/FrameSourceBase.h @@ -2,62 +2,104 @@ namespace Magpie::Core { +class DeviceResources; +class BackendDescriptorStore; + class FrameSourceBase { public: - FrameSourceBase() {} + FrameSourceBase() noexcept; - virtual ~FrameSourceBase(); + virtual ~FrameSourceBase() noexcept; // 不可复制,不可移动 FrameSourceBase(const FrameSourceBase&) = delete; FrameSourceBase(FrameSourceBase&&) = delete; - virtual bool Initialize(); + bool Initialize(DeviceResources& deviceResources, BackendDescriptorStore& descriptorStore) noexcept; enum class UpdateState { NewFrame, - NoUpdate, Waiting, Error }; - virtual UpdateState Update() = 0; - - virtual bool IsScreenCapture() = 0; - - // 注意:此函数返回源窗口作为输入部分的位置,但可能和 GetOutput 获取到的纹理尺寸不同 - const RECT& GetSrcFrameRect() const noexcept { return _srcFrameRect; } + UpdateState Update() noexcept; - ID3D11Texture2D* GetOutput() { + ID3D11Texture2D* GetOutput() noexcept { return _output.get(); } - virtual const char* GetName() const noexcept = 0; + // 注意:返回源窗口作为输入部分的位置,但可能和 GetOutput 获取到的纹理尺寸不同, + // 因为源窗口可能存在 DPI 缩放,而某些捕获方法无视 DPI 缩放 + const RECT& SrcRect() const noexcept { return _srcRect; } + + std::pair GetStatisticsForDynamicDetection() const noexcept; + + virtual const char* Name() const noexcept = 0; + + virtual bool IsScreenCapture() const noexcept = 0; + + enum FrameSourceWaitType { + NoWait, + WaitForMessage, + WaitForFrame + }; + + virtual FrameSourceWaitType WaitType() const noexcept = 0; + + virtual void OnCursorVisibilityChanged(bool /*isVisible*/, bool /*onDestory*/) noexcept {}; protected: - virtual bool _HasRoundCornerInWin11() = 0; + virtual bool _Initialize() noexcept = 0; - virtual bool _CanCaptureTitleBar() = 0; + virtual UpdateState _Update() noexcept = 0; + + virtual bool _HasRoundCornerInWin11() noexcept = 0; + + virtual bool _CanCaptureTitleBar() noexcept = 0; + + bool _CalcSrcRect() noexcept; // 获取坐标系 1 到坐标系 2 的映射关系 // 坐标系 1:屏幕坐标系,即虚拟化后的坐标系。原点为屏幕左上角 - // 坐标系 2:虚拟化前的坐标系,即窗口所见的坐标系,原点为窗口左上角 + // 坐标系 2:虚拟化前的坐标系,即源窗口所见的坐标系,原点为窗口左上角 // 两坐标系为线性映射,a 和 b 返回该映射的参数 // 如果窗口本身支持高 DPI,则 a 为 1,否则 a 为 DPI 缩放的倒数 // 此函数是为了将屏幕上的点映射到窗口坐标系中,并且无视 DPI 虚拟化 // 坐标系 1 中的 (x1, y1) 映射到 (x1 * a + bx, x2 * a + by) - static bool _GetMapToOriginDPI(HWND hWnd, double& a, double& bx, double& by); - - static bool _CenterWindowIfNecessary(HWND hWnd, const RECT& rcWork); + static bool _GetMapToOriginDPI(HWND hWnd, double& a, double& bx, double& by) noexcept; - bool _UpdateSrcFrameRect(); + static bool _CenterWindowIfNecessary(HWND hWnd, const RECT& rcWork) noexcept; - RECT _srcFrameRect{}; + RECT _srcRect{}; + DeviceResources* _deviceResources = nullptr; + BackendDescriptorStore* _descriptorStore = nullptr; winrt::com_ptr _output; + ID3D11ShaderResourceView* _outputSrv; + + winrt::com_ptr _resultBuffer; + ID3D11UnorderedAccessView* _resultBufferUav = nullptr; + winrt::com_ptr _readBackBuffer; + winrt::com_ptr _dupFrameCS; + std::pair _dispatchCount; bool _roundCornerDisabled = false; bool _windowResizingDisabled = false; + +private: + bool _InitCheckingForDuplicateFrame(); + + bool _IsDuplicateFrame(); + + // 用于检查重复帧 + winrt::com_ptr _prevFrame; + winrt::com_ptr _prevFrameSrv; + uint16_t _nextSkipCount; + uint16_t _framesLeft; + // (预测错误帧数, 总计跳过帧数) + std::atomic> _statistics; + bool _isCheckingForDuplicateFrame = true; }; } diff --git a/src/Magpie.Core/GDIFrameSource.cpp b/src/Magpie.Core/GDIFrameSource.cpp index efe5ed7d3..6cf1532d7 100644 --- a/src/Magpie.Core/GDIFrameSource.cpp +++ b/src/Magpie.Core/GDIFrameSource.cpp @@ -1,34 +1,29 @@ #include "pch.h" #include "GDIFrameSource.h" -#include "MagApp.h" -#include "DeviceResources.h" #include "Logger.h" - +#include "ScalingOptions.h" +#include "DirectXHelper.h" +#include "DeviceResources.h" +#include "ScalingWindow.h" namespace Magpie::Core { -bool GDIFrameSource::Initialize() { - if (!FrameSourceBase::Initialize()) { - Logger::Get().Error("初始化 FrameSourceBase 失败"); +bool GDIFrameSource::_Initialize() noexcept { + if (!_CalcSrcRect()) { return false; } - if (!_UpdateSrcFrameRect()) { - Logger::Get().Error("_UpdateSrcFrameRect 失败"); - return false; - } - - HWND hwndSrc = MagApp::Get().GetHwndSrc(); + const HWND hwndSrc = ScalingWindow::Get().HwndSrc(); double a, bx, by; if (_GetMapToOriginDPI(hwndSrc, a, bx, by)) { Logger::Get().Info(fmt::format("源窗口 DPI 缩放为 {}", 1 / a)); _frameRect = { - std::lround(_srcFrameRect.left * a + bx), - std::lround(_srcFrameRect.top * a + by), - std::lround(_srcFrameRect.right * a + bx), - std::lround(_srcFrameRect.bottom * a + by) + std::lround(_srcRect.left * a + bx), + std::lround(_srcRect.top * a + by), + std::lround(_srcRect.right * a + bx), + std::lround(_srcRect.bottom * a + by) }; } else { Logger::Get().Error("_GetMapToOriginDPI 失败"); @@ -41,23 +36,23 @@ bool GDIFrameSource::Initialize() { } _frameRect = { - _srcFrameRect.left - srcWindowRect.left, - _srcFrameRect.top - srcWindowRect.top, - _srcFrameRect.right - srcWindowRect.left, - _srcFrameRect.bottom - srcWindowRect.top + _srcRect.left - srcWindowRect.left, + _srcRect.top - srcWindowRect.top, + _srcRect.right - srcWindowRect.left, + _srcRect.bottom - srcWindowRect.top }; } if (_frameRect.left < 0 || _frameRect.top < 0 || _frameRect.right < 0 || _frameRect.bottom < 0 || _frameRect.right - _frameRect.left <= 0 || _frameRect.bottom - _frameRect.top <= 0 - ) { - //App::Get().SetErrorMsg(ErrorMessages::FAILED_TO_CROP); + ) { Logger::Get().Error("裁剪失败"); return false; } - _output = MagApp::Get().GetDeviceResources().CreateTexture2D( + _output = DirectXHelper::CreateTexture2D( + _deviceResources->GetD3DDevice(), DXGI_FORMAT_B8G8R8A8_UNORM, _frameRect.right - _frameRect.left, _frameRect.bottom - _frameRect.top, @@ -80,9 +75,7 @@ bool GDIFrameSource::Initialize() { return true; } -FrameSourceBase::UpdateState GDIFrameSource::Update() { - HWND hwndSrc = MagApp::Get().GetHwndSrc(); - +FrameSourceBase::UpdateState GDIFrameSource::_Update() noexcept { HDC hdcDest; HRESULT hr = _dxgiSurface->GetDC(TRUE, &hdcDest); if (FAILED(hr)) { @@ -90,6 +83,7 @@ FrameSourceBase::UpdateState GDIFrameSource::Update() { return UpdateState::Error; } + const HWND hwndSrc = ScalingWindow::Get().HwndSrc(); HDC hdcSrc = GetDCEx(hwndSrc, NULL, DCX_LOCKWINDOWUPDATE | DCX_WINDOW); if (!hdcSrc) { Logger::Get().Win32Error("GetDC 失败"); @@ -99,7 +93,7 @@ FrameSourceBase::UpdateState GDIFrameSource::Update() { if (!BitBlt(hdcDest, 0, 0, _frameRect.right - _frameRect.left, _frameRect.bottom - _frameRect.top, hdcSrc, _frameRect.left, _frameRect.top, SRCCOPY) - ) { + ) { Logger::Get().Win32Error("BitBlt 失败"); } diff --git a/src/Magpie.Core/GDIFrameSource.h b/src/Magpie.Core/GDIFrameSource.h index 02abc1beb..c01bae8e5 100644 --- a/src/Magpie.Core/GDIFrameSource.h +++ b/src/Magpie.Core/GDIFrameSource.h @@ -3,29 +3,32 @@ namespace Magpie::Core { -class GDIFrameSource : public FrameSourceBase { +class GDIFrameSource final : public FrameSourceBase { public: - GDIFrameSource() {}; virtual ~GDIFrameSource() {} - bool Initialize() override; - - UpdateState Update() override; - - bool IsScreenCapture() override { + bool IsScreenCapture() const noexcept override { return false; } - const char* GetName() const noexcept override { + FrameSourceWaitType WaitType() const noexcept override { + return NoWait; + } + + const char* Name() const noexcept override { return "GDI"; } protected: - bool _HasRoundCornerInWin11() override { + bool _Initialize() noexcept override; + + UpdateState _Update() noexcept override; + + bool _HasRoundCornerInWin11() noexcept override { return false; } - bool _CanCaptureTitleBar() override { + bool _CanCaptureTitleBar() noexcept override { return false; } diff --git a/src/Magpie.Core/GPUTimer.cpp b/src/Magpie.Core/GPUTimer.cpp deleted file mode 100644 index 15b0094d6..000000000 --- a/src/Magpie.Core/GPUTimer.cpp +++ /dev/null @@ -1,170 +0,0 @@ -#include "pch.h" -#include "GPUTimer.h" -#include "MagApp.h" -#include "DeviceResources.h" - - -namespace Magpie::Core { - -void GPUTimer::OnBeginFrame() { - auto now = std::chrono::high_resolution_clock::now(); - - _elapsedTime = now - _lastTimePoint; - _lastTimePoint = now; - - _totalTime += _elapsedTime; - - // 更新当前帧率 - ++_framesThisSecond; - ++_frameCount; - - _fpsCounter += _elapsedTime; - if (_fpsCounter >= 1s) { - _framesPerSecond = _framesThisSecond; - _framesThisSecond = 0; - _fpsCounter %= 1s; - } -} - -void GPUTimer::StartProfiling(std::chrono::microseconds updateInterval, UINT passCount) { - assert(passCount > 0); - - _curQueryIdx = 0; - _updateProfilingTime = updateInterval; - _profilingCounter = {}; - - _queries[0].passes.resize(passCount); - if (MagApp::Get().GetOptions().IsTripleBuffering()) { - _queries[1].passes.resize(passCount); - } - _passesTimings.resize(passCount); - _gpuTimings.passes.resize(passCount); - _firstProfilingFrame = true; -} - -void GPUTimer::StopProfiling() { - _curQueryIdx = -1; - _updateProfilingTime = {}; - _profilingCounter = {}; - - _queries = {}; - _passesTimings = {}; - _gpuTimings = {}; -} - -void GPUTimer::OnBeginEffects() { - if (_curQueryIdx < 0) { - return; - } - - _UpdateGPUTimings(); - - auto d3dDC = MagApp::Get().GetDeviceResources().GetD3DDC(); - d3dDC->Begin(_queries[_curQueryIdx].disjoint.get()); - d3dDC->End(_queries[_curQueryIdx].start.get()); -} - -void GPUTimer::OnEndPass(UINT idx) { - if (_curQueryIdx < 0) { - return; - } - - MagApp::Get().GetDeviceResources().GetD3DDC()->End(_queries[_curQueryIdx].passes[idx].get()); -} - -void GPUTimer::OnEndEffects() { - if (_curQueryIdx < 0) { - return; - } - - MagApp::Get().GetDeviceResources().GetD3DDC()->End(_queries[_curQueryIdx].disjoint.get()); -} - -template -static T GetQueryData(ID3D11DeviceContext3* d3dDC, ID3D11Query* query) { - T data{}; - while (S_OK != d3dDC->GetData(query, &data, sizeof(data), 0)) { - Sleep(0); - } - return data; -} - -void GPUTimer::_UpdateGPUTimings() { - if (_curQueryIdx < 0) { - return; - } - - if (MagApp::Get().GetOptions().IsTripleBuffering()) { - _curQueryIdx = 1 - _curQueryIdx; - } - - auto& curQueryInfo = _queries[_curQueryIdx]; - - if (curQueryInfo.disjoint) { - auto d3dDC = MagApp::Get().GetDeviceResources().GetD3DDC(); - - D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjointData = - GetQueryData(d3dDC, curQueryInfo.disjoint.get()); - - if (!disjointData.Disjoint) { - const float toMS = 1000.0f / disjointData.Frequency; - - UINT64 startTimestamp = GetQueryData(d3dDC, curQueryInfo.start.get()); - - for (size_t i = 0; i < curQueryInfo.passes.size(); ++i) { - UINT64 timestamp = GetQueryData(d3dDC, curQueryInfo.passes[i].get()); - - float t = (timestamp - startTimestamp) * toMS; - if (t > 0.01) { - _passesTimings[i].first += t; - ++_passesTimings[i].second; - } - startTimestamp = timestamp; - } - } else { - // 查询的值不可靠 - -#ifdef _DEBUG - // 依然执行查询,否则调试层将发出警告 - GetQueryData(d3dDC, curQueryInfo.disjoint.get()); - for (auto& query : curQueryInfo.passes) { - GetQueryData(d3dDC, query.get()); - } -#endif // _DEBUG - } - - _profilingCounter += _elapsedTime; - - if (_firstProfilingFrame) { - _firstProfilingFrame = false; - - // 在第一帧更新一次 - for (UINT i = 0; i < _passesTimings.size(); ++i) { - _gpuTimings.passes[i] = _passesTimings[i].first; - } - } else if (_profilingCounter >= _updateProfilingTime) { - // 更新渲染用时 - for (UINT i = 0; i < _passesTimings.size(); ++i) { - _gpuTimings.passes[i] = _passesTimings[i].second == 0 ? - 0.0f : _passesTimings[i].first / _passesTimings[i].second; - } - - std::fill(_passesTimings.begin(), _passesTimings.end(), std::pair()); - - _profilingCounter %= _updateProfilingTime; - } - } else { - auto d3dDevice = MagApp::Get().GetDeviceResources().GetD3DDevice(); - - D3D11_QUERY_DESC desc{ D3D11_QUERY_TIMESTAMP_DISJOINT, 0 }; - d3dDevice->CreateQuery(&desc, curQueryInfo.disjoint.put()); - - desc.Query = D3D11_QUERY_TIMESTAMP; - d3dDevice->CreateQuery(&desc, curQueryInfo.start.put()); - for (UINT j = 0; j < curQueryInfo.passes.size(); ++j) { - d3dDevice->CreateQuery(&desc, curQueryInfo.passes[j].put()); - } - } -} - -} diff --git a/src/Magpie.Core/GPUTimer.h b/src/Magpie.Core/GPUTimer.h deleted file mode 100644 index c24173ceb..000000000 --- a/src/Magpie.Core/GPUTimer.h +++ /dev/null @@ -1,84 +0,0 @@ -#pragma once -#include "SmallVector.h" - -namespace Magpie::Core { - -// 用于记录帧率和 GPU 时间 -class GPUTimer { -public: - // 上一帧的渲染时间 - std::chrono::nanoseconds GetElapsedTime() const noexcept { return _elapsedTime; } - - // 经过的总时间 - std::chrono::nanoseconds GetTotalTime() const noexcept { return _totalTime; } - - // 经过的总帧数 - UINT GetFrameCount() const noexcept { return _frameCount; } - - // 上一秒的帧数 - UINT GetFramesPerSecond() const noexcept { return _framesPerSecond; } - - // 在每帧开始时调用,用于记录帧率和检索渲染用时 - void OnBeginFrame(); - - struct GPUTimings { - SmallVector passes; - // float overlay = 0.0f; - }; - - // 所有元素的处理时间,单位为 ms - const GPUTimings& GetGPUTimings() const noexcept { - return _gpuTimings; - } - - // updateInterval 为更新渲染用时的间隔 - // 可为 0,即每帧都更新 - void StartProfiling(std::chrono::microseconds updateInterval, UINT passCount); - - void StopProfiling(); - - void OnBeginEffects(); - - // 每个通道结束后调用 - void OnEndPass(UINT idx); - - void OnEndEffects(); - -private: - void _UpdateGPUTimings(); - - std::chrono::time_point _lastTimePoint; - - std::chrono::nanoseconds _elapsedTime{}; - std::chrono::nanoseconds _totalTime{}; - - UINT _frameCount = 0; - UINT _framesPerSecond = 0; - UINT _framesThisSecond = 0; - std::chrono::nanoseconds _fpsCounter{}; - - GPUTimings _gpuTimings; - // 记录的第一帧首先更新一次,而不是等待更新间隔 - bool _firstProfilingFrame = true; - // 更新渲染用时的间隔 - std::chrono::nanoseconds _updateProfilingTime{}; - std::chrono::nanoseconds _profilingCounter{}; - - struct _QueryInfo { - winrt::com_ptr disjoint; - winrt::com_ptr start; - std::vector> passes; - }; - // [(disjoint, [timestamp])] - // 允许额外的延迟时需保存两帧的数据 - std::array<_QueryInfo, 2> _queries; - // -1:无需统计渲染时间 - // 否则为当前帧在 _queries 中的位置 - int _curQueryIdx = -1; - - // 用于保存渲染时间 - // (总计用时, 已统计帧数) - SmallVector, 0> _passesTimings; -}; - -} diff --git a/src/Magpie.Core/GraphicsCaptureFrameSource.cpp b/src/Magpie.Core/GraphicsCaptureFrameSource.cpp index 31d12490c..a632c4baa 100644 --- a/src/Magpie.Core/GraphicsCaptureFrameSource.cpp +++ b/src/Magpie.Core/GraphicsCaptureFrameSource.cpp @@ -1,12 +1,14 @@ #include "pch.h" #include "GraphicsCaptureFrameSource.h" -#include "MagApp.h" #include "StrUtils.h" #include "Utils.h" #include "DeviceResources.h" #include "Logger.h" #include - +#include "Win32Utils.h" +#include "DirectXHelper.h" +#include "ScalingOptions.h" +#include "ScalingWindow.h" namespace winrt { using namespace Windows::Graphics; @@ -15,16 +17,10 @@ using namespace Windows::Graphics::DirectX; using namespace Windows::Graphics::DirectX::Direct3D11; } - namespace Magpie::Core { -bool GraphicsCaptureFrameSource::Initialize() { - if (!FrameSourceBase::Initialize()) { - Logger::Get().Error("初始化 FrameSourceBase 失败"); - return false; - } - - //App::Get().SetErrorMsg(ErrorMessages::FAILED_TO_CAPTURE); +bool GraphicsCaptureFrameSource::_Initialize() noexcept { + ID3D11Device5* d3dDevice = _deviceResources->GetD3DDevice(); HRESULT hr; @@ -35,8 +31,11 @@ bool GraphicsCaptureFrameSource::Initialize() { return false; } + winrt::com_ptr dxgiDevice; + d3dDevice->QueryInterface(dxgiDevice.put()); + hr = CreateDirect3D11DeviceFromDXGIDevice( - MagApp::Get().GetDeviceResources().GetDXGIDevice(), + dxgiDevice.get(), reinterpret_cast<::IInspectable**>(winrt::put_abi(_wrappedD3DDevice)) ); if (FAILED(hr)) { @@ -55,6 +54,11 @@ bool GraphicsCaptureFrameSource::Initialize() { return false; } + if (!_CalcSrcRect()) { + Logger::Get().Error("_CalcSrcRect 失败"); + return false; + } + if (!_CaptureWindow(interop.get())) { Logger::Get().Info("窗口捕获失败,回落到屏幕捕获"); @@ -66,7 +70,8 @@ bool GraphicsCaptureFrameSource::Initialize() { } } - _output = MagApp::Get().GetDeviceResources().CreateTexture2D( + _output = DirectXHelper::CreateTexture2D( + d3dDevice, DXGI_FORMAT_B8G8R8A8_UNORM, _frameBox.right - _frameBox.left, _frameBox.bottom - _frameBox.top, @@ -77,26 +82,23 @@ bool GraphicsCaptureFrameSource::Initialize() { return false; } - if (!StartCapture()) { + if (!_StartCapture()) { Logger::Get().Error("_StartCapture 失败"); return false; } - //App::Get().SetErrorMsg(ErrorMessages::GENERIC); Logger::Get().Info("GraphicsCaptureFrameSource 初始化完成"); return true; } -FrameSourceBase::UpdateState GraphicsCaptureFrameSource::Update() { +FrameSourceBase::UpdateState GraphicsCaptureFrameSource::_Update() noexcept { if (!_captureSession) { return UpdateState::Waiting; } winrt::Direct3D11CaptureFrame frame = _captureFramePool.TryGetNextFrame(); if (!frame) { - // 缓冲池没有帧则等待新的帧 // 因为已通过 FrameArrived 注册回调,所以每当有新帧时会有新消息到达 - WaitMessage(); return UpdateState::Waiting; } @@ -114,41 +116,103 @@ FrameSourceBase::UpdateState GraphicsCaptureFrameSource::Update() { return UpdateState::Error; } - MagApp::Get().GetDeviceResources().GetD3DDC() - ->CopySubresourceRegion(_output.get(), 0, 0, 0, 0, withFrame.get(), 0, &_frameBox); + _deviceResources->GetD3DDC()->CopySubresourceRegion(_output.get(), 0, 0, 0, 0, withFrame.get(), 0, &_frameBox); - frame.Close(); return UpdateState::NewFrame; } -bool GraphicsCaptureFrameSource::_CaptureWindow(IGraphicsCaptureItemInterop* interop) { - // DwmGetWindowAttribute 和 Graphics.Capture 无法应用于子窗口 - HWND hwndSrc = MagApp::Get().GetHwndSrc(); +void GraphicsCaptureFrameSource::OnCursorVisibilityChanged(bool isVisible, bool onDestory) noexcept { + // 显示光标时必须重启捕获 + if (isVisible) { + _StopCapture(); + + if (onDestory) { + // FIXME: 这里尝试修复拖动窗口时光标不显示的问题,但有些环境下不起作用 + SystemParametersInfo(SPI_SETCURSORS, 0, nullptr, 0); + } else { + _StartCapture(); + } + } +} + +// Graphics Capture 的捕获区域没有文档记录,这里的计算是我实验了多种窗口后得出的, +// 高度依赖实现细节,未来可能会失效 +static bool CalcWindowCapturedFrameBounds(HWND hWnd, RECT& rect) noexcept { + // Win10 中捕获区域为 extended frame bounds;Win11 中 DwmGetWindowAttribute + // 对最大化的窗口返回值和 Win10 不同,可能是 OS 的 bug,应进一步处理 + HRESULT hr = DwmGetWindowAttribute(hWnd, + DWMWA_EXTENDED_FRAME_BOUNDS, &rect, sizeof(rect)); + if (FAILED(hr)) { + Logger::Get().ComError("DwmGetWindowAttribute 失败", hr); + return false; + } + + if(!Win32Utils::GetOSVersion().IsWin11() || Win32Utils::GetWindowShowCmd(hWnd) != SW_SHOWMAXIMIZED) { + return true; + } - // 包含边框的窗口尺寸 - RECT srcRect{}; - if (!Win32Utils::GetWindowFrameRect(hwndSrc, srcRect)) { - Logger::Get().Error("GetWindowFrameRect 失败"); + // 如果窗口禁用了非客户区域绘制则捕获区域为 extended frame bounds + BOOL hasBorder = TRUE; + hr = DwmGetWindowAttribute(hWnd, DWMWA_NCRENDERING_ENABLED, &hasBorder, sizeof(hasBorder)); + if (FAILED(hr)) { + Logger::Get().ComError("DwmGetWindowAttribute 失败", hr); return false; } - if (!_UpdateSrcFrameRect()) { - Logger::Get().Error("UpdateSrcFrameRect 失败"); + if (!hasBorder) { + return true; + } + + RECT clientRect; + if (!Win32Utils::GetClientScreenRect(hWnd, clientRect)) { + Logger::Get().Error("GetClientScreenRect 失败"); + return false; + } + + // 有些窗口最大化后有部分客户区在屏幕外,如 UWP 和资源管理器,它们的捕获区域 + // 是整个客户区。否则捕获区域不会超出屏幕 + HMONITOR hMon = MonitorFromWindow(hWnd, MONITOR_DEFAULTTONEAREST); + MONITORINFO mi{ .cbSize = sizeof(mi) }; + if (!GetMonitorInfo(hMon, &mi)) { + Logger::Get().Win32Error("GetMonitorInfo 失败"); + return false; + } + + if (clientRect.top < mi.rcWork.top) { + rect = clientRect; + } else { + IntersectRect(&rect, &rect, &mi.rcWork); + } + + return true; +} + +bool GraphicsCaptureFrameSource::_CaptureWindow(IGraphicsCaptureItemInterop* interop) noexcept { + const HWND hwndSrc = ScalingWindow::Get().HwndSrc(); + + RECT frameBounds; + if (!CalcWindowCapturedFrameBounds(hwndSrc, frameBounds)) { + Logger::Get().Error("CalcWindowCapturedFrameBounds 失败"); + return false; + } + + if (_srcRect.left < frameBounds.left || _srcRect.top < frameBounds.top) { + Logger::Get().Error("裁剪边框错误"); return false; } // 在源窗口存在 DPI 缩放时有时会有一像素的偏移(取决于窗口在屏幕上的位置) // 可能是 DwmGetWindowAttribute 的 bug _frameBox = { - UINT(_srcFrameRect.left - srcRect.left), - UINT(_srcFrameRect.top - srcRect.top), + UINT(_srcRect.left - frameBounds.left), + UINT(_srcRect.top - frameBounds.top), 0, - UINT(_srcFrameRect.right - srcRect.left), - UINT(_srcFrameRect.bottom - srcRect.top), + UINT(_srcRect.right - frameBounds.left), + UINT(_srcRect.bottom - frameBounds.top), 1 }; - if (_TryCreateGraphicsCaptureItem(interop, hwndSrc)) { + if (_TryCreateGraphicsCaptureItem(interop)) { return true; } @@ -160,7 +224,7 @@ bool GraphicsCaptureFrameSource::_CaptureWindow(IGraphicsCaptureItemInterop* int Logger::Get().Info("已改变源窗口样式"); _originalSrcExStyle = srcExStyle; - if (_TryCreateGraphicsCaptureItem(interop, hwndSrc)) { + if (_TryCreateGraphicsCaptureItem(interop)) { _RemoveOwnerFromAltTabList(hwndSrc); return true; } @@ -176,7 +240,7 @@ bool GraphicsCaptureFrameSource::_CaptureWindow(IGraphicsCaptureItemInterop* int if (SUCCEEDED(hr)) { Logger::Get().Info("已添加任务栏图标"); - if (_TryCreateGraphicsCaptureItem(interop, hwndSrc)) { + if (_TryCreateGraphicsCaptureItem(interop)) { _RemoveOwnerFromAltTabList(hwndSrc); return true; } @@ -208,10 +272,10 @@ bool GraphicsCaptureFrameSource::_CaptureWindow(IGraphicsCaptureItemInterop* int return false; } -bool GraphicsCaptureFrameSource::_TryCreateGraphicsCaptureItem(IGraphicsCaptureItemInterop* interop, HWND hwndSrc) noexcept { +bool GraphicsCaptureFrameSource::_TryCreateGraphicsCaptureItem(IGraphicsCaptureItemInterop* interop) noexcept { try { HRESULT hr = interop->CreateForWindow( - hwndSrc, + ScalingWindow::Get().HwndSrc(), winrt::guid_of(), winrt::put_abi(_captureItem) ); @@ -264,7 +328,7 @@ void GraphicsCaptureFrameSource::_RemoveOwnerFromAltTabList(HWND hwndSrc) noexce _originalOwnerExStyle = ownerExStyle; } -bool GraphicsCaptureFrameSource::_CaptureMonitor(IGraphicsCaptureItemInterop* interop) { +bool GraphicsCaptureFrameSource::_CaptureMonitor(IGraphicsCaptureItemInterop* interop) noexcept { // Win10 无法隐藏黄色边框,因此只在 Win11 中回落到屏幕捕获 if (!Win32Utils::GetOSVersion().IsWin11()) { Logger::Get().Error("无法使用屏幕捕获"); @@ -273,12 +337,12 @@ bool GraphicsCaptureFrameSource::_CaptureMonitor(IGraphicsCaptureItemInterop* in // 使全屏窗口无法被捕获到 // WDA_EXCLUDEFROMCAPTURE 只在 Win10 20H1 及更新版本中可用 - if (!SetWindowDisplayAffinity(MagApp::Get().GetHwndHost(), WDA_EXCLUDEFROMCAPTURE)) { + if (!SetWindowDisplayAffinity(ScalingWindow::Get().Handle(), WDA_EXCLUDEFROMCAPTURE)) { Logger::Get().Win32Error("SetWindowDisplayAffinity 失败"); return false; } - HWND hwndSrc = MagApp::Get().GetHwndSrc(); + const HWND hwndSrc = ScalingWindow::Get().HwndSrc(); HMONITOR hMonitor = MonitorFromWindow(hwndSrc, MONITOR_DEFAULTTONEAREST); if (!hMonitor) { Logger::Get().Win32Error("MonitorFromWindow 失败"); @@ -292,23 +356,27 @@ bool GraphicsCaptureFrameSource::_CaptureMonitor(IGraphicsCaptureItemInterop* in return false; } - // 放在屏幕左上角而不是中间可以提高帧率,这里是为了和 DesktopDuplication 保持一致 - if (!_CenterWindowIfNecessary(hwndSrc, mi.rcWork)) { - Logger::Get().Error("居中源窗口失败"); - return false; - } + // 最大化的窗口无需调整位置 + if (Win32Utils::GetWindowShowCmd(hwndSrc) != SW_SHOWMAXIMIZED) { + // 放在屏幕左上角而不是中间可以提高帧率,这里是为了和 DesktopDuplication 保持一致 + if (!_CenterWindowIfNecessary(hwndSrc, mi.rcWork)) { + Logger::Get().Error("居中源窗口失败"); + return false; + } - if (!_UpdateSrcFrameRect()) { - Logger::Get().Error("UpdateSrcFrameRect 失败"); - return false; + // 重新计算捕获位置 + if (!_CalcSrcRect()) { + Logger::Get().Error("_CalcSrcRect 失败"); + return false; + } } _frameBox = { - UINT(_srcFrameRect.left - mi.rcMonitor.left), - UINT(_srcFrameRect.top - mi.rcMonitor.top), + UINT(_srcRect.left - mi.rcMonitor.left), + UINT(_srcRect.top - mi.rcMonitor.top), 0, - UINT(_srcFrameRect.right - mi.rcMonitor.left), - UINT(_srcFrameRect.bottom - mi.rcMonitor.top), + UINT(_srcRect.right - mi.rcMonitor.left), + UINT(_srcRect.bottom - mi.rcMonitor.top), 1 }; @@ -330,7 +398,7 @@ bool GraphicsCaptureFrameSource::_CaptureMonitor(IGraphicsCaptureItemInterop* in return true; } -bool GraphicsCaptureFrameSource::StartCapture() { +bool GraphicsCaptureFrameSource::_StartCapture() noexcept { if (_captureSession) { return true; } @@ -355,8 +423,8 @@ bool GraphicsCaptureFrameSource::StartCapture() { if (winrt::ApiInformation::IsPropertyPresent( winrt::name_of(), L"IsCursorCaptureEnabled" - )) { - // 从 v2004 开始提供 + )) { + // 从 v2004 开始提供 _captureSession.IsCursorCaptureEnabled(false); } @@ -364,9 +432,9 @@ bool GraphicsCaptureFrameSource::StartCapture() { if (winrt::ApiInformation::IsPropertyPresent( winrt::name_of(), L"IsBorderRequired" - )) { - // 从 Win10 v2104 开始提供 - // Win32 应用中无需请求权限 + )) { + // 从 Win10 v2104 开始提供 + // Win32 应用中无需请求权限 _captureSession.IsBorderRequired(false); } @@ -379,7 +447,7 @@ bool GraphicsCaptureFrameSource::StartCapture() { return true; } -void GraphicsCaptureFrameSource::StopCapture() { +void GraphicsCaptureFrameSource::_StopCapture() noexcept { if (_captureSession) { _captureSession.Close(); _captureSession = nullptr; @@ -391,9 +459,9 @@ void GraphicsCaptureFrameSource::StopCapture() { } GraphicsCaptureFrameSource::~GraphicsCaptureFrameSource() { - StopCapture(); + _StopCapture(); - HWND hwndSrc = MagApp::Get().GetHwndSrc(); + const HWND hwndSrc = ScalingWindow::Get().HwndSrc(); if (_taskbarList) { _taskbarList->DeleteTab(hwndSrc); diff --git a/src/Magpie.Core/GraphicsCaptureFrameSource.h b/src/Magpie.Core/GraphicsCaptureFrameSource.h index bd94e04dc..c8f19e544 100644 --- a/src/Magpie.Core/GraphicsCaptureFrameSource.h +++ b/src/Magpie.Core/GraphicsCaptureFrameSource.h @@ -7,44 +7,47 @@ namespace Magpie::Core { // 使用 Window Runtime 的 Windows.Graphics.Capture API 抓取窗口 // 见 https://docs.microsoft.com/en-us/windows/uwp/audio-video-camera/screen-capture -class GraphicsCaptureFrameSource : public FrameSourceBase { +class GraphicsCaptureFrameSource final : public FrameSourceBase { public: - GraphicsCaptureFrameSource() {}; virtual ~GraphicsCaptureFrameSource(); - bool Initialize() override; - - UpdateState Update() override; - - bool IsScreenCapture() override { + bool IsScreenCapture() const noexcept override { return _isScreenCapture; } - const char* GetName() const noexcept override { - return NAME; + FrameSourceWaitType WaitType() const noexcept override { + return WaitForMessage; } - bool StartCapture(); - - void StopCapture(); + const char* Name() const noexcept override { + return "Graphics Capture"; + } - static constexpr const char* NAME = "Graphics Capture"; + void OnCursorVisibilityChanged(bool isVisible, bool onDestory) noexcept override; protected: - bool _HasRoundCornerInWin11() override { + bool _HasRoundCornerInWin11() noexcept override { return true; } - bool _CanCaptureTitleBar() override { + bool _CanCaptureTitleBar() noexcept override { return true; } + bool _Initialize() noexcept override; + + UpdateState _Update() noexcept override; + private: - bool _CaptureWindow(IGraphicsCaptureItemInterop* interop); + bool _StartCapture() noexcept; + + void _StopCapture() noexcept; + + bool _CaptureWindow(IGraphicsCaptureItemInterop* interop) noexcept; - bool _CaptureMonitor(IGraphicsCaptureItemInterop* interop); + bool _CaptureMonitor(IGraphicsCaptureItemInterop* interop) noexcept; - bool _TryCreateGraphicsCaptureItem(IGraphicsCaptureItemInterop* interop, HWND hwndSrc) noexcept; + bool _TryCreateGraphicsCaptureItem(IGraphicsCaptureItemInterop* interop) noexcept; void _RemoveOwnerFromAltTabList(HWND hwndSrc) noexcept; diff --git a/src/Magpie.Core/ImGuiBackend.cpp b/src/Magpie.Core/ImGuiBackend.cpp index 0b26773ae..463d3493b 100644 --- a/src/Magpie.Core/ImGuiBackend.cpp +++ b/src/Magpie.Core/ImGuiBackend.cpp @@ -4,372 +4,267 @@ #include "ImGuiBackend.h" #include #include -#include "MagApp.h" #include "DeviceResources.h" #include "StrUtils.h" #include "Logger.h" +#include "DirectXHelper.h" +#include "shaders/ImGuiImplVS.h" +#include "shaders/ImGuiImplPS.h" namespace Magpie::Core { -static constexpr const char* VERTEX_SHADER = R"( -cbuffer vertexBuffer : register(b0) { - float4x4 ProjectionMatrix; -}; - -struct VS_INPUT { - float2 pos : POSITION; - float4 col : COLOR0; - float2 uv : TEXCOORD0; +struct VERTEX_CONSTANT_BUFFER { + float mvp[4][4]; }; -struct PS_INPUT { - float4 pos : SV_POSITION; - float4 col : COLOR0; - float2 uv : TEXCOORD0; -}; +bool ImGuiBackend::Initialize(DeviceResources* deviceResources) noexcept { + _deviceResources = deviceResources; -PS_INPUT main(VS_INPUT input) { - PS_INPUT output; - output.pos = mul( ProjectionMatrix, float4(input.pos.xy, 0.f, 1.f)); - output.col = input.col; - output.uv = input.uv; - return output; -})"; - -static constexpr const char* PIXEL_SHADER = R"( -struct PS_INPUT { - float4 pos : SV_POSITION; - float4 col : COLOR0; - float2 uv : TEXCOORD0; -}; + ImGuiIO& io = ImGui::GetIO(); + io.BackendRendererName = "Magpie"; + // 支持 ImDrawCmd::VtxOffset + io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset; -sampler sampler0; -Texture2D texture0; + if (!_CreateDeviceObjects()) { + Logger::Get().Error("_CreateDeviceObjects 失败"); + return false; + } -float4 main(PS_INPUT input) : SV_Target { - return input.col * float4(1, 1, 1, texture0.Sample(sampler0, input.uv).r); -})"; + return true; +} -struct VERTEX_CONSTANT_BUFFER_DX11 { - float mvp[4][4]; -}; +void ImGuiBackend::_SetupRenderState(const ImDrawData& drawData) noexcept { + ID3D11DeviceContext4* d3dDC = _deviceResources->GetD3DDC(); -void ImGuiBackend::_SetupRenderState(ImDrawData* drawData, ID3D11DeviceContext* ctx) noexcept { - D3D11_VIEWPORT vp{}; - vp.Width = drawData->DisplaySize.x; - vp.Height = drawData->DisplaySize.y; - vp.MinDepth = 0.0f; - vp.MaxDepth = 1.0f; - ctx->RSSetViewports(1, &vp); + D3D11_VIEWPORT vp{ + .Width = drawData.DisplaySize.x, + .Height = drawData.DisplaySize.y, + .MinDepth = 0.0f, + .MaxDepth = 1.0f + }; + d3dDC->RSSetViewports(1, &vp); - ctx->IASetInputLayout(_inputLayout.get()); + d3dDC->IASetInputLayout(_inputLayout.get()); { - unsigned int stride = sizeof(ImDrawVert); - unsigned int offset = 0; + UINT stride = sizeof(ImDrawVert); + UINT offset = 0; ID3D11Buffer* t = _vertexBuffer.get(); - ctx->IASetVertexBuffers(0, 1, &t, &stride, &offset); + d3dDC->IASetVertexBuffers(0, 1, &t, &stride, &offset); } - - ctx->IASetIndexBuffer(_indexBuffer.get(), sizeof(ImDrawIdx) == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT, 0); - ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - ctx->VSSetShader(_vertexShader.get(), nullptr, 0); + + d3dDC->IASetIndexBuffer(_indexBuffer.get(), + sizeof(ImDrawIdx) == 2 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT, 0); + d3dDC->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + d3dDC->VSSetShader(_vertexShader.get(), nullptr, 0); { ID3D11Buffer* t = _vertexConstantBuffer.get(); - ctx->VSSetConstantBuffers(0, 1, &t); + d3dDC->VSSetConstantBuffers(0, 1, &t); } - ctx->PSSetShader(_pixelShader.get(), nullptr, 0); + d3dDC->PSSetShader(_pixelShader.get(), nullptr, 0); { - ID3D11SamplerState* t = _fontSampler.get(); - ctx->PSSetSamplers(0, 1, &t); + // 默认需要线性采样。设置 "io.Fonts->Flags |= ImFontAtlasFlags_NoBakedLines" 或 + // "style.AntiAliasedLinesUseTex = false" 来允许最近邻采样 + ID3D11SamplerState* t = _deviceResources->GetSampler( + D3D11_FILTER_MIN_MAG_MIP_LINEAR, D3D11_TEXTURE_ADDRESS_WRAP); + d3dDC->PSSetSamplers(0, 1, &t); } - - const float blend_factor[4]{}; - ctx->OMSetBlendState(_blendState.get(), blend_factor, 0xffffffff); - ctx->RSSetState(_rasterizerState.get()); + + static constexpr float blendFactor[4]{}; + d3dDC->OMSetBlendState(_blendState.get(), blendFactor, 0xffffffff); + d3dDC->RSSetState(_rasterizerState.get()); } -void ImGuiBackend::RenderDrawData(ImDrawData* drawData) noexcept { - // Avoid rendering when minimized - if (drawData->DisplaySize.x <= 0.0f || drawData->DisplaySize.y <= 0.0f) { - return; - } +void ImGuiBackend::RenderDrawData(const ImDrawData& drawData) noexcept { + ID3D11DeviceContext4* d3dDC = _deviceResources->GetD3DDC(); + ID3D11Device5* d3dDevice = _deviceResources->GetD3DDevice(); - DeviceResources& dr = MagApp::Get().GetDeviceResources(); - ID3D11DeviceContext4* ctx = dr.GetD3DDC(); - ID3D11Device5* d3dDevice = dr.GetD3DDevice(); - - HRESULT hr; - - // Create and grow vertex/index buffers if needed - if (!_vertexBuffer || _vertexBufferSize < drawData->TotalVtxCount) { - _vertexBufferSize = drawData->TotalVtxCount + 5000; - D3D11_BUFFER_DESC desc{}; - desc.Usage = D3D11_USAGE_DYNAMIC; - desc.ByteWidth = _vertexBufferSize * sizeof(ImDrawVert); - desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - hr = d3dDevice->CreateBuffer(&desc, nullptr, _vertexBuffer.put()); + // 按需创建和增长顶点和索引缓冲区 + if (!_vertexBuffer || _vertexBufferSize < drawData.TotalVtxCount) { + _vertexBufferSize = drawData.TotalVtxCount + 5000; + + D3D11_BUFFER_DESC desc{ + .ByteWidth = _vertexBufferSize * sizeof(ImDrawVert), + .Usage = D3D11_USAGE_DYNAMIC, + .BindFlags = D3D11_BIND_VERTEX_BUFFER, + .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE + }; + HRESULT hr = d3dDevice->CreateBuffer(&desc, nullptr, _vertexBuffer.put()); if (FAILED(hr)) { Logger::Get().ComError("CreateBuffer 失败", hr); return; } } - if (!_indexBuffer || _indexBufferSize < drawData->TotalIdxCount) { - _indexBufferSize = drawData->TotalIdxCount + 10000; - D3D11_BUFFER_DESC desc{}; - desc.Usage = D3D11_USAGE_DYNAMIC; - desc.ByteWidth = _indexBufferSize * sizeof(ImDrawIdx); - desc.BindFlags = D3D11_BIND_INDEX_BUFFER; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - hr = d3dDevice->CreateBuffer(&desc, nullptr, _indexBuffer.put()); + if (!_indexBuffer || _indexBufferSize < drawData.TotalIdxCount) { + _indexBufferSize = drawData.TotalIdxCount + 10000; + + D3D11_BUFFER_DESC desc{ + .ByteWidth = _indexBufferSize * sizeof(ImDrawIdx), + .Usage = D3D11_USAGE_DYNAMIC, + .BindFlags = D3D11_BIND_INDEX_BUFFER, + .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE + }; + HRESULT hr = d3dDevice->CreateBuffer(&desc, nullptr, _indexBuffer.put()); if (FAILED(hr)) { Logger::Get().ComError("CreateBuffer 失败", hr); return; } } - // Upload vertex/index data into a single contiguous GPU buffer - D3D11_MAPPED_SUBRESOURCE vtxResource, idxResource; - hr = ctx->Map(_vertexBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &vtxResource); - if (FAILED(hr)) { - Logger::Get().ComError("Map 失败", hr); - return; - } + // 上传顶点数据 + { + D3D11_MAPPED_SUBRESOURCE vtxResource; + HRESULT hr = d3dDC->Map(_vertexBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &vtxResource); + if (FAILED(hr)) { + Logger::Get().ComError("Map 失败", hr); + return; + } - hr = ctx->Map(_indexBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &idxResource); - if (FAILED(hr)) { - Logger::Get().ComError("Map 失败", hr); - return; + ImDrawVert* vtxDst = (ImDrawVert*)vtxResource.pData; + for (const ImDrawList* cmdList : drawData.CmdLists) { + std::memcpy(vtxDst, cmdList->VtxBuffer.Data, cmdList->VtxBuffer.Size * sizeof(ImDrawVert)); + vtxDst += cmdList->VtxBuffer.Size; + } + + d3dDC->Unmap(_vertexBuffer.get(), 0); } + // 上传索引数据 + { + D3D11_MAPPED_SUBRESOURCE idxResource; + HRESULT hr = d3dDC->Map(_indexBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &idxResource); + if (FAILED(hr)) { + Logger::Get().ComError("Map 失败", hr); + return; + } + + ImDrawIdx* idxDst = (ImDrawIdx*)idxResource.pData; + for (const ImDrawList* cmdList : drawData.CmdLists) { + std::memcpy(idxDst, cmdList->IdxBuffer.Data, cmdList->IdxBuffer.Size * sizeof(ImDrawIdx)); + idxDst += cmdList->IdxBuffer.Size; + } - ImDrawVert* vtxDst = (ImDrawVert*)vtxResource.pData; - ImDrawIdx* idxDst = (ImDrawIdx*)idxResource.pData; - for (int n = 0; n < drawData->CmdListsCount; ++n) { - const ImDrawList* cmdList = drawData->CmdLists[n]; - std::memcpy(vtxDst, cmdList->VtxBuffer.Data, cmdList->VtxBuffer.Size * sizeof(ImDrawVert)); - std::memcpy(idxDst, cmdList->IdxBuffer.Data, cmdList->IdxBuffer.Size * sizeof(ImDrawIdx)); - vtxDst += cmdList->VtxBuffer.Size; - idxDst += cmdList->IdxBuffer.Size; + d3dDC->Unmap(_indexBuffer.get(), 0); } - ctx->Unmap(_vertexBuffer.get(), 0); - ctx->Unmap(_indexBuffer.get(), 0); // Setup orthographic projection matrix into our constant buffer // Our visible imgui space lies from drawData->DisplayPos (top left) to drawData->DisplayPos+data_data->DisplaySize (bottom right). DisplayPos is (0,0) for single viewport apps. { - D3D11_MAPPED_SUBRESOURCE mappedResource; - hr = ctx->Map(_vertexConstantBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource); + const float left = drawData.DisplayPos.x; + const float right = drawData.DisplayPos.x + drawData.DisplaySize.x; + const float top = drawData.DisplayPos.y; + const float bottom = drawData.DisplayPos.y + drawData.DisplaySize.y; + const VERTEX_CONSTANT_BUFFER data{ + .mvp{ + { 2.0f / (right - left), 0.0f, 0.0f, 0.0f }, + { 0.0f, 2.0f / (top - bottom), 0.0f, 0.0f }, + { 0.0f, 0.0f, 0.5f, 0.0f }, + { (right + left) / (left - right), (top + bottom) / (bottom - top), 0.5f, 1.0f }, + } + }; + + D3D11_MAPPED_SUBRESOURCE ms; + HRESULT hr = d3dDC->Map(_vertexConstantBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &ms); if (FAILED(hr)) { Logger::Get().ComError("Map 失败", hr); return; } - - VERTEX_CONSTANT_BUFFER_DX11* constant_buffer = (VERTEX_CONSTANT_BUFFER_DX11*)mappedResource.pData; - float left = drawData->DisplayPos.x; - float right = drawData->DisplayPos.x + drawData->DisplaySize.x; - float top = drawData->DisplayPos.y; - float bottom = drawData->DisplayPos.y + drawData->DisplaySize.y; - float mvp[4][4] = { - { 2.0f / (right - left), 0.0f, 0.0f, 0.0f }, - { 0.0f, 2.0f / (top - bottom), 0.0f, 0.0f }, - { 0.0f, 0.0f, 0.5f, 0.0f }, - { (right + left) / (left - right), (top + bottom) / (bottom - top), 0.5f, 1.0f }, - }; - std::memcpy(&constant_buffer->mvp, mvp, sizeof(mvp)); - ctx->Unmap(_vertexConstantBuffer.get(), 0); + + std::memcpy(ms.pData, &data, sizeof(data)); + d3dDC->Unmap(_vertexConstantBuffer.get(), 0); } - // Setup desired DX state - _SetupRenderState(drawData, ctx); + _SetupRenderState(drawData); // Render command lists // (Because we merged all buffers into a single one, we maintain our own offset into them) int globalIdxOffset = 0; int globalVtxOffset = 0; - ImVec2 clip_off = drawData->DisplayPos; - for (int n = 0; n < drawData->CmdListsCount; n++) { - const ImDrawList* cmdList = drawData->CmdLists[n]; - for (int cmd_i = 0; cmd_i < cmdList->CmdBuffer.Size; cmd_i++) { - const ImDrawCmd* pcmd = &cmdList->CmdBuffer[cmd_i]; - if (pcmd->UserCallback != nullptr) { + const ImVec2& clipOff = drawData.DisplayPos; + for (const ImDrawList* cmdList : drawData.CmdLists) { + for (const ImDrawCmd& drawCmd : cmdList->CmdBuffer) { + if (drawCmd.UserCallback) { // User callback, registered via ImDrawList::AddCallback() // (ImDrawCallback_ResetRenderState is a special callback value used by the user to request the renderer to reset render state.) - if (pcmd->UserCallback == ImDrawCallback_ResetRenderState) - _SetupRenderState(drawData, ctx); - else - pcmd->UserCallback(cmdList, pcmd); + if (drawCmd.UserCallback == ImDrawCallback_ResetRenderState) { + _SetupRenderState(drawData); + } else { + drawCmd.UserCallback(cmdList, &drawCmd); + } } else { // Project scissor/clipping rectangles into framebuffer space - ImVec2 clipMin(pcmd->ClipRect.x - clip_off.x, pcmd->ClipRect.y - clip_off.y); - ImVec2 clipMax(pcmd->ClipRect.z - clip_off.x, pcmd->ClipRect.w - clip_off.y); + ImVec2 clipMin(drawCmd.ClipRect.x - clipOff.x, drawCmd.ClipRect.y - clipOff.y); + ImVec2 clipMax(drawCmd.ClipRect.z - clipOff.x, drawCmd.ClipRect.w - clipOff.y); if (clipMax.x <= clipMin.x || clipMax.y <= clipMin.y) continue; // Apply scissor/clipping rectangle const D3D11_RECT r = { (LONG)clipMin.x, (LONG)clipMin.y, (LONG)clipMax.x, (LONG)clipMax.y }; - ctx->RSSetScissorRects(1, &r); + d3dDC->RSSetScissorRects(1, &r); // Bind texture, Draw - ID3D11ShaderResourceView* textureSrv = (ID3D11ShaderResourceView*)pcmd->GetTexID(); - ctx->PSSetShaderResources(0, 1, &textureSrv); - ctx->DrawIndexed(pcmd->ElemCount, pcmd->IdxOffset + globalIdxOffset, pcmd->VtxOffset + globalVtxOffset); + ID3D11ShaderResourceView* textureSrv = (ID3D11ShaderResourceView*)drawCmd.GetTexID(); + d3dDC->PSSetShaderResources(0, 1, &textureSrv); + d3dDC->DrawIndexed(drawCmd.ElemCount, drawCmd.IdxOffset + globalIdxOffset, drawCmd.VtxOffset + globalVtxOffset); } } + globalIdxOffset += cmdList->IdxBuffer.Size; globalVtxOffset += cmdList->VtxBuffer.Size; } } -bool ImGuiBackend::_CreateFontsTexture() noexcept { - ImGuiIO& io = ImGui::GetIO(); - ID3D11Device5* d3dDevice = MagApp::Get().GetDeviceResources().GetD3DDevice(); - - HRESULT hr; - - // 字体纹理使用 R8_UNORM 格式 - unsigned char* pixels; - int width, height; - io.Fonts->GetTexDataAsAlpha8(&pixels, &width, &height); - - // Upload texture to graphics system - { - D3D11_TEXTURE2D_DESC desc{}; - desc.Width = width; - desc.Height = height; - desc.MipLevels = 1; - desc.ArraySize = 1; - desc.Format = DXGI_FORMAT_R8_UNORM; - desc.SampleDesc.Count = 1; - desc.Usage = D3D11_USAGE_DEFAULT; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - - winrt::com_ptr texture = nullptr; - D3D11_SUBRESOURCE_DATA subResource{}; - subResource.pSysMem = pixels; - subResource.SysMemPitch = width; - hr = d3dDevice->CreateTexture2D(&desc, &subResource, texture.put()); - if (FAILED(hr)) { - Logger::Get().ComError("CreateTexture2D 失败", hr); - return false; - } - - // Create texture view - D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc{}; - srvDesc.Format = desc.Format; - srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; - srvDesc.Texture2D.MipLevels = desc.MipLevels; - hr = d3dDevice->CreateShaderResourceView(texture.get(), &srvDesc, _fontTextureView.put()); - if (FAILED(hr)) { - Logger::Get().ComError("CreateShaderResourceView 失败", hr); - return false; - } - } - - // Store our identifier - io.Fonts->SetTexID((ImTextureID)_fontTextureView.get()); - - // Create texture sampler - // (Bilinear sampling is required by default. Set 'io.Fonts->Flags |= ImFontAtlasFlags_NoBakedLines' or 'style.AntiAliasedLinesUseTex = false' to allow point/nearest sampling) - { - D3D11_SAMPLER_DESC desc{}; - desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; - desc.AddressU = D3D11_TEXTURE_ADDRESS_WRAP; - desc.AddressV = D3D11_TEXTURE_ADDRESS_WRAP; - desc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP; - desc.ComparisonFunc = D3D11_COMPARISON_ALWAYS; - hr = d3dDevice->CreateSamplerState(&desc, _fontSampler.put()); - if (FAILED(hr)) { - Logger::Get().ComError("CreateSamplerState 失败", hr); - return false; - } - } - - // 清理不再需要的数据降低内存占用 - io.Fonts->ClearTexData(); - - return true; -} - bool ImGuiBackend::_CreateDeviceObjects() noexcept { - ID3D11Device5* d3dDevice = MagApp::Get().GetDeviceResources().GetD3DDevice(); - - HRESULT hr; + ID3D11Device5* d3dDevice = _deviceResources->GetD3DDevice(); - static winrt::com_ptr vertexShaderBlob; - if (!vertexShaderBlob) { - hr = D3DCompile(VERTEX_SHADER, StrUtils::StrLen(VERTEX_SHADER), - nullptr, nullptr, nullptr, "main", "vs_5_0", 0, 0, vertexShaderBlob.put(), nullptr); - if (FAILED(hr)) { - Logger::Get().ComError("编译顶点着色器失败", hr); - return false; - } - } - - hr = d3dDevice->CreateVertexShader( - vertexShaderBlob->GetBufferPointer(), - vertexShaderBlob->GetBufferSize(), - nullptr, - _vertexShader.put() - ); + HRESULT hr = d3dDevice->CreateVertexShader(ImGuiImplVS, std::size(ImGuiImplVS), nullptr, _vertexShader.put()); if (FAILED(hr)) { Logger::Get().ComError("CreateVertexShader 失败", hr); return false; } static constexpr D3D11_INPUT_ELEMENT_DESC LOCAL_LAYOUT[] = { - { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)IM_OFFSETOF(ImDrawVert, pos), D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "SV_POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)IM_OFFSETOF(ImDrawVert, pos), D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)IM_OFFSETOF(ImDrawVert, uv), D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, (UINT)IM_OFFSETOF(ImDrawVert, col), D3D11_INPUT_PER_VERTEX_DATA, 0 }, }; - hr = d3dDevice->CreateInputLayout(LOCAL_LAYOUT, 3, - vertexShaderBlob->GetBufferPointer(), vertexShaderBlob->GetBufferSize(), _inputLayout.put()); + hr = d3dDevice->CreateInputLayout(LOCAL_LAYOUT, 3, ImGuiImplVS, std::size(ImGuiImplVS), _inputLayout.put()); if (FAILED(hr)) { Logger::Get().ComError("CreateInputLayout 失败", hr); return false; } { - D3D11_BUFFER_DESC desc{}; - desc.ByteWidth = sizeof(VERTEX_CONSTANT_BUFFER_DX11); - desc.Usage = D3D11_USAGE_DYNAMIC; - desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + D3D11_BUFFER_DESC desc{ + .ByteWidth = sizeof(VERTEX_CONSTANT_BUFFER), + .Usage = D3D11_USAGE_DYNAMIC, + .BindFlags = D3D11_BIND_CONSTANT_BUFFER, + .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE + }; d3dDevice->CreateBuffer(&desc, nullptr, _vertexConstantBuffer.put()); } - static winrt::com_ptr pixelShaderBlob; - if (!pixelShaderBlob) { - hr = D3DCompile(PIXEL_SHADER, StrUtils::StrLen(PIXEL_SHADER), - nullptr, nullptr, nullptr, "main", "ps_5_0", 0, 0, pixelShaderBlob.put(), nullptr); - if (FAILED(hr)) { - Logger::Get().ComError("编译像素着色器失败", hr); - return false; - } - } - - hr = d3dDevice->CreatePixelShader( - pixelShaderBlob->GetBufferPointer(), - pixelShaderBlob->GetBufferSize(), - nullptr, - _pixelShader.put() - ); + hr = d3dDevice->CreatePixelShader(ImGuiImplPS, std::size(ImGuiImplPS), nullptr, _pixelShader.put()); if (FAILED(hr)) { Logger::Get().ComError("CreatePixelShader 失败", hr); return false; } { - D3D11_BLEND_DESC desc{}; - desc.AlphaToCoverageEnable = false; - desc.RenderTarget[0].BlendEnable = true; - desc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA; - desc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA; - desc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD; - desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; - desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_INV_SRC_ALPHA; - desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; - desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + D3D11_BLEND_DESC desc{ + .AlphaToCoverageEnable = false, + .RenderTarget{ + D3D11_RENDER_TARGET_BLEND_DESC{ + .BlendEnable = true, + .SrcBlend = D3D11_BLEND_SRC_ALPHA, + .DestBlend = D3D11_BLEND_INV_SRC_ALPHA, + .BlendOp = D3D11_BLEND_OP_ADD, + .SrcBlendAlpha = D3D11_BLEND_ONE, + .DestBlendAlpha = D3D11_BLEND_INV_SRC_ALPHA, + .BlendOpAlpha = D3D11_BLEND_OP_ADD, + .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL + } + } + }; hr = d3dDevice->CreateBlendState(&desc, _blendState.put()); if (FAILED(hr)) { Logger::Get().ComError("CreateBlendState 失败", hr); @@ -377,39 +272,63 @@ bool ImGuiBackend::_CreateDeviceObjects() noexcept { } } - // Create the rasterizer state - { - D3D11_RASTERIZER_DESC desc{}; - desc.FillMode = D3D11_FILL_SOLID; - desc.CullMode = D3D11_CULL_NONE; - desc.ScissorEnable = true; - hr = d3dDevice->CreateRasterizerState(&desc, _rasterizerState.put()); - if (FAILED(hr)) { - Logger::Get().ComError("CreateRasterizerState 失败", hr); - return false; - } - } - - if (!_CreateFontsTexture()) { - Logger::Get().Error("_CreateFontsTexture 失败"); + // 创建光栅化器状态对象 + D3D11_RASTERIZER_DESC desc{ + .FillMode = D3D11_FILL_SOLID, + .CullMode = D3D11_CULL_NONE, + .ScissorEnable = true + }; + hr = d3dDevice->CreateRasterizerState(&desc, _rasterizerState.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateRasterizerState 失败", hr); return false; } return true; } -void ImGuiBackend::NewFrame() noexcept { - if (!_fontSampler) { - _CreateDeviceObjects(); - } -} +bool ImGuiBackend::BuildFonts() noexcept { + assert(!_fontTextureView); -bool ImGuiBackend::Initialize() noexcept { - // Setup backend capabilities flags + ID3D11Device5* d3dDevice = _deviceResources->GetD3DDevice(); ImGuiIO& io = ImGui::GetIO(); - io.BackendRendererName = "Magpie"; - io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset; // We can honor the ImDrawCmd::VtxOffset field, allowing for large meshes. + // 字体纹理使用 R8_UNORM 格式 + unsigned char* pixels; + int width, height; + io.Fonts->GetTexDataAsAlpha8(&pixels, &width, &height); + + // 上传纹理数据 + const D3D11_SUBRESOURCE_DATA initData{ + .pSysMem = pixels, + .SysMemPitch = (UINT)width + }; + winrt::com_ptr texture = DirectXHelper::CreateTexture2D( + d3dDevice, + DXGI_FORMAT_R8_UNORM, + width, + height, + D3D11_BIND_SHADER_RESOURCE, + D3D11_USAGE_DEFAULT, + 0, + &initData + ); + if (!texture) { + Logger::Get().Error("创建字体纹理失败"); + return false; + } + + HRESULT hr = d3dDevice->CreateShaderResourceView(texture.get(), nullptr, _fontTextureView.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateShaderResourceView 失败", hr); + return false; + } + + // 设置纹理 ID + io.Fonts->SetTexID((ImTextureID)_fontTextureView.get()); + + // 清理不再需要的数据降低内存占用 + io.Fonts->ClearTexData(); return true; } diff --git a/src/Magpie.Core/ImGuiBackend.h b/src/Magpie.Core/ImGuiBackend.h index f9a722591..5f1afac9f 100644 --- a/src/Magpie.Core/ImGuiBackend.h +++ b/src/Magpie.Core/ImGuiBackend.h @@ -1,25 +1,28 @@ #pragma once - -struct ImDrawData; +#include namespace Magpie::Core { +class DeviceResources; + class ImGuiBackend { public: ImGuiBackend() = default; ImGuiBackend(const ImGuiBackend&) = delete; ImGuiBackend(ImGuiBackend&&) = delete; - bool Initialize() noexcept; + bool Initialize(DeviceResources* deviceResources) noexcept; + + bool BuildFonts() noexcept; + + void RenderDrawData(const ImDrawData& drawData) noexcept; - void NewFrame() noexcept; - void RenderDrawData(ImDrawData* drawData) noexcept; - private: bool _CreateDeviceObjects() noexcept; - void _SetupRenderState(ImDrawData* drawData, ID3D11DeviceContext* ctx) noexcept; - bool _CreateFontsTexture() noexcept; + void _SetupRenderState(const ImDrawData& drawData) noexcept; + + DeviceResources* _deviceResources = nullptr; winrt::com_ptr _vertexBuffer; int _vertexBufferSize = 5000; @@ -31,7 +34,6 @@ class ImGuiBackend { winrt::com_ptr _inputLayout; winrt::com_ptr _vertexConstantBuffer; winrt::com_ptr _pixelShader; - winrt::com_ptr _fontSampler; winrt::com_ptr _fontTextureView; winrt::com_ptr _blendState; winrt::com_ptr _rasterizerState; diff --git a/src/Magpie.Core/ImGuiFontsCacheManager.h b/src/Magpie.Core/ImGuiFontsCacheManager.h index 0e8616e47..7b502794c 100644 --- a/src/Magpie.Core/ImGuiFontsCacheManager.h +++ b/src/Magpie.Core/ImGuiFontsCacheManager.h @@ -1,6 +1,5 @@ #pragma once - -struct ImFontAtlas; +#include namespace Magpie::Core { diff --git a/src/Magpie.Core/ImGuiImpl.cpp b/src/Magpie.Core/ImGuiImpl.cpp index cf9c36095..b6bffa8f9 100644 --- a/src/Magpie.Core/ImGuiImpl.cpp +++ b/src/Magpie.Core/ImGuiImpl.cpp @@ -3,142 +3,23 @@ #include #include #include "ImGuiBackend.h" -#include "MagApp.h" #include "CursorManager.h" #include "DeviceResources.h" #include "Renderer.h" #include "Logger.h" #include "Win32Utils.h" +#include "ScalingWindow.h" +#include "CursorManager.h" namespace Magpie::Core { -ImGuiImpl::ImGuiImpl() {} - -ImGuiImpl::~ImGuiImpl() { - MagApp::Get().UnregisterWndProcHandler(_handlerId); - - if (_hHookThread) { - PostThreadMessage(_hookThreadId, WM_QUIT, 0, 0); - WaitForSingleObject(_hHookThread, 1000); +ImGuiImpl::~ImGuiImpl() noexcept { + if (ImGui::GetCurrentContext()) { + ImGui::DestroyContext(); } - - _backend.reset(); - ImGui::DestroyContext(); } -static std::optional WndProcHandler(HWND hwnd, UINT msg, WPARAM wParam, LPARAM /*lParam*/) { - ImGuiIO& io = ImGui::GetIO(); - - if (!io.WantCaptureMouse) { - if (msg == WM_LBUTTONDOWN && MagApp::Get().GetOptions().Is3DGameMode()) { - MagApp::Get().GetRenderer().SetUIVisibility(false); - } - return std::nullopt; - } - - switch (msg) { - case WM_LBUTTONDOWN: case WM_LBUTTONDBLCLK: - case WM_RBUTTONDOWN: case WM_RBUTTONDBLCLK: - case WM_MBUTTONDOWN: case WM_MBUTTONDBLCLK: - case WM_XBUTTONDOWN: case WM_XBUTTONDBLCLK: - { - int button = 0; - if (msg == WM_LBUTTONDOWN || msg == WM_LBUTTONDBLCLK) { button = 0; } - if (msg == WM_RBUTTONDOWN || msg == WM_RBUTTONDBLCLK) { button = 1; } - if (msg == WM_MBUTTONDOWN || msg == WM_MBUTTONDBLCLK) { button = 2; } - if (msg == WM_XBUTTONDOWN || msg == WM_XBUTTONDBLCLK) { button = (GET_XBUTTON_WPARAM(wParam) == XBUTTON1) ? 3 : 4; } - - if (!ImGui::IsAnyMouseDown()) { - if (!GetCapture()) { - SetCapture(hwnd); - } - MagApp::Get().GetCursorManager().OnCursorCapturedOnOverlay(); - } - - io.MouseDown[button] = true; - break; - } - case WM_LBUTTONUP: - case WM_RBUTTONUP: - case WM_MBUTTONUP: - case WM_XBUTTONUP: - { - int button = 0; - if (msg == WM_LBUTTONUP) { button = 0; } - if (msg == WM_RBUTTONUP) { button = 1; } - if (msg == WM_MBUTTONUP) { button = 2; } - if (msg == WM_XBUTTONUP) { button = (GET_XBUTTON_WPARAM(wParam) == XBUTTON1) ? 3 : 4; } - - io.MouseDown[button] = false; - - if (!ImGui::IsAnyMouseDown()) { - if (GetCapture() == hwnd) { - ReleaseCapture(); - } - MagApp::Get().GetCursorManager().OnCursorReleasedOnOverlay(); - } - - break; - } - case WM_MOUSEWHEEL: - io.MouseWheel += (float)GET_WHEEL_DELTA_WPARAM(wParam) / (float)WHEEL_DELTA; - break; - case WM_MOUSEHWHEEL: - io.MouseWheelH += (float)GET_WHEEL_DELTA_WPARAM(wParam) / (float)WHEEL_DELTA; - break; - } - - return std::nullopt; -} - -static LRESULT CALLBACK LowLevelMouseProc( - _In_ int nCode, - _In_ WPARAM wParam, - _In_ LPARAM lParam -) { - if (nCode != HC_ACTION || !ImGui::GetIO().WantCaptureMouse) { - return CallNextHookEx(NULL, nCode, wParam, lParam); - } - - if (wParam == WM_MOUSEWHEEL || wParam == WM_MOUSEHWHEEL) { - // 向主线程发送滚动数据 - // 使用 Windows 消息进行线程同步 - PostMessage(MagApp::Get().GetHwndHost(), (UINT)wParam, ((MSLLHOOKSTRUCT*)lParam)->mouseData, 0); - - // 阻断滚轮消息,防止传给源窗口 - return -1; - } else if (wParam >= WM_LBUTTONDOWN && wParam <= WM_RBUTTONUP) { - PostMessage(MagApp::Get().GetHwndHost(), (UINT)wParam, 0, 0); - - // 阻断点击消息,防止传给源窗口 - return -1; - } else { - return CallNextHookEx(NULL, nCode, wParam, lParam); - } -} - -static DWORD WINAPI ThreadProc(LPVOID /*lpThreadParameter*/) { - HHOOK hook = SetWindowsHookEx(WH_MOUSE_LL, LowLevelMouseProc, NULL, 0); - if (!hook) { - Logger::Get().Win32Error("注册鼠标钩子失败"); - return 1; - } - - Logger::Get().Info("已注册鼠标钩子"); - - // 鼠标钩子需要消息循环 - MSG msg; - while (GetMessage(&msg, NULL, 0, 0)) { - TranslateMessage(&msg); - DispatchMessage(&msg); - } - - UnhookWindowsHookEx(hook); - Logger::Get().Info("已销毁鼠标钩子"); - return 0; -} - -bool ImGuiImpl::Initialize() { +bool ImGuiImpl::Initialize(DeviceResources* deviceResources) noexcept { #ifdef _DEBUG // 检查 ImGUI 版本是否匹配 if (!IMGUI_CHECKVERSION()) { @@ -153,72 +34,29 @@ bool ImGuiImpl::Initialize() { ImGuiIO& io = ImGui::GetIO(); io.BackendPlatformUserData = nullptr; io.BackendPlatformName = "Magpie"; - io.ImeWindowHandle = MagApp::Get().GetHwndHost(); io.ConfigFlags |= ImGuiConfigFlags_NavNoCaptureKeyboard | ImGuiConfigFlags_NoMouseCursorChange; - _backend = std::make_unique(); - _backend->Initialize(); - - auto& dr = MagApp::Get().GetDeviceResources(); - if (!dr.GetRenderTargetView(dr.GetBackBuffer(), &_rtv)) { - Logger::Get().Error("GetRenderTargetView 失败"); + if (!_backend.Initialize(deviceResources)) { + Logger::Get().Error("初始化 ImGuiBackend 失败"); return false; } - _handlerId = MagApp::Get().RegisterWndProcHandler(WndProcHandler); - - // 断点模式下不注册鼠标钩子,否则调试时鼠标无法使用 - if (!MagApp::Get().GetOptions().IsDebugMode() && !MagApp::Get().GetOptions().Is3DGameMode()) { - _hHookThread = CreateThread(nullptr, 0, ThreadProc, nullptr, 0, &_hookThreadId); - if (!_hHookThread) { - Logger::Get().Win32Error("创建线程失败"); - } - } - return true; } -static void UpdateMousePos() { - ImGuiIO& io = ImGui::GetIO(); - - if (MagApp::Get().GetOptions().Is3DGameMode() && !MagApp::Get().GetRenderer().IsUIVisiable()) { - io.MousePos = ImVec2(-FLT_MAX, -FLT_MAX); - return; - } - - POINT pos; - CursorManager& cm = MagApp::Get().GetCursorManager(); - if (cm.HasCursor()) { - pos = *cm.GetCursorPos(); - } else { - GetCursorPos(&pos); - - if (WindowFromPoint(pos) != MagApp::Get().GetHwndHost()) { - io.MousePos = ImVec2(-FLT_MAX, -FLT_MAX); - return; - } - - const RECT& hostRect = MagApp::Get().GetHostWndRect(); - pos.x -= hostRect.left; - pos.y -= hostRect.top; - } - - const RECT& outputRect = MagApp::Get().GetRenderer().GetOutputRect(); - pos.x -= outputRect.left; - pos.y -= outputRect.top; - - io.MousePos = ImVec2((float)pos.x, (float)pos.y); +bool ImGuiImpl::BuildFonts() noexcept { + return _backend.BuildFonts(); } -void ImGuiImpl::NewFrame() { +void ImGuiImpl::NewFrame() noexcept { ImGuiIO& io = ImGui::GetIO(); // Setup display size (every frame to accommodate for window resizing) - const RECT& outputRect = MagApp::Get().GetRenderer().GetOutputRect(); - io.DisplaySize = ImVec2((float)(outputRect.right - outputRect.left), (float)(outputRect.bottom - outputRect.top)); + const SIZE outputSize = Win32Utils::GetSizeOfRect(ScalingWindow::Get().Renderer().DestRect()); + io.DisplaySize = ImVec2((float)outputSize.cx, (float)outputSize.cy); // Update OS mouse position - UpdateMousePos(); + _UpdateMousePos(); // 不接受键盘输入 if (io.WantCaptureKeyboard) { @@ -226,13 +64,9 @@ void ImGuiImpl::NewFrame() { io.AddKeyEvent(ImGuiKey_Enter, false); } - bool originWantCaptureMouse = io.WantCaptureMouse; - - _backend->NewFrame(); ImGui::NewFrame(); // 将所有 ImGUI 窗口限制在视口内 - SIZE outputSize = Win32Utils::GetSizeOfRect(MagApp::Get().GetRenderer().GetOutputRect()); for (ImGuiWindow* window : ImGui::GetCurrentContext()->Windows) { if (window->Flags & ImGuiWindowFlags_Tooltip) { continue; @@ -255,30 +89,28 @@ void ImGuiImpl::NewFrame() { ImGui::SetWindowPos(window, pos); } - CursorManager& cm = MagApp::Get().GetCursorManager(); - - if (io.WantCaptureMouse) { - if (!originWantCaptureMouse) { - cm.OnCursorHoverOverlay(); - } - } else { - if (originWantCaptureMouse) { - cm.OnCursorLeaveOverlay(); - } - } + ScalingWindow::Get().CursorManager().IsCursorOnOverlay(io.WantCaptureMouse); } -void ImGuiImpl::EndFrame() { - const RECT& outputRect = MagApp::Get().GetRenderer().GetOutputRect(); - ImGui::GetDrawData()->DisplayPos = ImVec2(float(-outputRect.left), float(-outputRect.top)); - ImGui::GetDrawData()->DisplaySize = ImVec2((float)(outputRect.right), (float)(outputRect.bottom)); - - auto d3dDC = MagApp::Get().GetDeviceResources().GetD3DDC(); - d3dDC->OMSetRenderTargets(1, &_rtv, NULL); - _backend->RenderDrawData(ImGui::GetDrawData()); +void ImGuiImpl::Draw() noexcept { + const RECT& scalingRect = ScalingWindow::Get().WndRect(); + const RECT& destRect = ScalingWindow::Get().Renderer().DestRect(); + + ImGui::Render(); + ImDrawData& drawData = *ImGui::GetDrawData(); + drawData.DisplayPos = ImVec2( + float(scalingRect.left - destRect.left), + float(scalingRect.top - destRect.top) + ); + drawData.DisplaySize = ImVec2( + float(destRect.right - scalingRect.left), + float(destRect.bottom - scalingRect.top) + ); + + _backend.RenderDrawData(drawData); } -void ImGuiImpl::Tooltip(const char* content, float maxWidth) { +void ImGuiImpl::Tooltip(const char* content, float maxWidth) noexcept { ImVec2 padding = ImGui::GetStyle().WindowPadding; ImVec2 contentSize = ImGui::CalcTextSize(content, nullptr, false, maxWidth - 2 * padding.x); ImVec2 windowSize(contentSize.x + 2 * padding.x, contentSize.y + 2 * padding.y); @@ -288,7 +120,7 @@ void ImGuiImpl::Tooltip(const char* content, float maxWidth) { windowPos.x += 16 * ImGui::GetStyle().MouseCursorScale; windowPos.y += 8 * ImGui::GetStyle().MouseCursorScale; - SIZE outputSize = Win32Utils::GetSizeOfRect(MagApp::Get().GetRenderer().GetOutputRect()); + SIZE outputSize = Win32Utils::GetSizeOfRect(ScalingWindow::Get().Renderer().DestRect()); windowPos.x = std::clamp(windowPos.x, 0.0f, outputSize.cx - windowSize.x); windowPos.y = std::clamp(windowPos.y, 0.0f, outputSize.cy - windowSize.y); @@ -305,22 +137,38 @@ void ImGuiImpl::Tooltip(const char* content, float maxWidth) { ImGui::End(); } -void ImGuiImpl::ClearStates() { +void ImGuiImpl::_UpdateMousePos() noexcept { ImGuiIO& io = ImGui::GetIO(); io.MousePos = ImVec2(-FLT_MAX, -FLT_MAX); - std::fill(std::begin(io.MouseDown), std::end(io.MouseDown), false); - auto& cm = MagApp::Get().GetCursorManager(); - if (cm.IsCursorCapturedOnOverlay()) { - if (GetCapture() == MagApp::Get().GetHwndHost()) { - ReleaseCapture(); - } - cm.OnCursorReleasedOnOverlay(); + const CursorManager& cursorManager = ScalingWindow::Get().CursorManager(); + + if (cursorManager.IsCursorCapturedOnForeground()) { + // 光标被前台窗口捕获时应避免造成光标跳跃 + return; } - if (cm.IsCursorOnOverlay()) { - cm.OnCursorLeaveOverlay(); + const POINT cursorPos = cursorManager.CursorPos(); + if (cursorPos.x == std::numeric_limits::max()) { + // 无光标 + return; } + + const RECT& scalingRect = ScalingWindow::Get().WndRect(); + const RECT& destRect = ScalingWindow::Get().Renderer().DestRect(); + + io.MousePos.x = float(cursorPos.x + scalingRect.left - destRect.left); + io.MousePos.y = float(cursorPos.y + scalingRect.top - destRect.top); +} + +void ImGuiImpl::ClearStates() noexcept { + ImGuiIO& io = ImGui::GetIO(); + io.MousePos = ImVec2(-FLT_MAX, -FLT_MAX); + std::fill(std::begin(io.MouseDown), std::end(io.MouseDown), false); + + CursorManager& cursorManager = ScalingWindow::Get().CursorManager(); + cursorManager.IsCursorCapturedOnOverlay(false); + cursorManager.IsCursorOnOverlay(false); // 更新状态 ImGui::NewFrame(); @@ -333,4 +181,51 @@ void ImGuiImpl::ClearStates() { } } +void ImGuiImpl::MessageHandler(UINT msg, WPARAM wParam, LPARAM /*lParam*/) noexcept { + ImGuiIO& io = ImGui::GetIO(); + + if (!io.WantCaptureMouse) { + // 3D 游戏模式下显示叠加层会使缩放窗口不透明,这时点击非叠加层区域应关闭叠加层 + if (msg == WM_LBUTTONDOWN && ScalingWindow::Get().Options().Is3DGameMode()) { + ScalingWindow::Get().Renderer().SetOverlayVisibility(false); + } + return; + } + + // 缩放窗口不会收到双击消息 + switch (msg) { + case WM_LBUTTONDOWN: + case WM_RBUTTONDOWN: + { + if (!ImGui::IsAnyMouseDown()) { + ScalingWindow::Get().CursorManager().IsCursorCapturedOnOverlay(true); + } + + io.MouseDown[msg == WM_LBUTTONDOWN ? 0 : 1] = true; + break; + } + case WM_LBUTTONUP: + case WM_RBUTTONUP: + { + io.MouseDown[msg == WM_LBUTTONUP ? 0 : 1] = false; + + if (!ImGui::IsAnyMouseDown()) { + ScalingWindow::Get().CursorManager().IsCursorCapturedOnOverlay(false); + } + + break; + } + case WM_MOUSEWHEEL: + { + io.MouseWheel += (float)GET_WHEEL_DELTA_WPARAM(wParam) / (float)WHEEL_DELTA; + break; + } + case WM_MOUSEHWHEEL: + { + io.MouseWheelH += (float)GET_WHEEL_DELTA_WPARAM(wParam) / (float)WHEEL_DELTA; + break; + } + } +} + } diff --git a/src/Magpie.Core/ImGuiImpl.h b/src/Magpie.Core/ImGuiImpl.h index d4e55d1af..9f502bd85 100644 --- a/src/Magpie.Core/ImGuiImpl.h +++ b/src/Magpie.Core/ImGuiImpl.h @@ -1,31 +1,37 @@ #pragma once +#include "ImGuiBackend.h" namespace Magpie::Core { -class ImGuiBackend; +class DeviceResources; class ImGuiImpl { public: - ImGuiImpl(); + ImGuiImpl() = default; ImGuiImpl(const ImGuiImpl&) = delete; ImGuiImpl(ImGuiImpl&&) = delete; - ~ImGuiImpl(); + ~ImGuiImpl() noexcept; - bool Initialize(); + bool Initialize(DeviceResources* deviceResource) noexcept; - void NewFrame(); + bool BuildFonts() noexcept; - void EndFrame(); + void NewFrame() noexcept; - void ClearStates(); + void Draw() noexcept; + + void ClearStates() noexcept; + + void MessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noexcept; // 将提示窗口限制在屏幕内 - static void Tooltip(const char* content, float maxWidth = -1.0f); + static void Tooltip(const char* content, float maxWidth = -1.0f) noexcept; private: - std::unique_ptr _backend; + void _UpdateMousePos() noexcept; + + ImGuiBackend _backend; - ID3D11RenderTargetView* _rtv = nullptr; uint32_t _handlerId = 0; HANDLE _hHookThread = NULL; diff --git a/src/Magpie.Core/MagApp.cpp b/src/Magpie.Core/MagApp.cpp deleted file mode 100644 index 9fb6227e1..000000000 --- a/src/Magpie.Core/MagApp.cpp +++ /dev/null @@ -1,597 +0,0 @@ -#include "pch.h" -#include "MagApp.h" -#include "Logger.h" -#include "Win32Utils.h" -#include "ExclModeHack.h" -#include "DeviceResources.h" -#include "GraphicsCaptureFrameSource.h" -#include "DesktopDuplicationFrameSource.h" -#include "GDIFrameSource.h" -#include "DwmSharedSurfaceFrameSource.h" -#include "StrUtils.h" -#include "CursorManager.h" -#include "Renderer.h" -#include "GPUTimer.h" -#include "WindowHelper.h" - -namespace Magpie::Core { - -static constexpr const wchar_t* HOST_WINDOW_CLASS_NAME = L"Window_Magpie_967EB565-6F73-4E94-AE53-00CC42592A22"; -static constexpr const wchar_t* DDF_WINDOW_CLASS_NAME = L"Window_Magpie_C322D752-C866-4630-91F5-32CB242A8930"; - - -static LRESULT DDFWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) { - if (msg == WM_DESTROY) { - return 0; - } - - return DefWindowProc(hWnd, msg, wParam, lParam); -} - -static LRESULT CALLBACK LowLevelKeyboardProc( - _In_ int nCode, - _In_ WPARAM wParam, - _In_ LPARAM lParam -) { - if (nCode != HC_ACTION || wParam != WM_KEYDOWN) { - return CallNextHookEx(NULL, nCode, wParam, lParam); - } - - KBDLLHOOKSTRUCT* info = (KBDLLHOOKSTRUCT*)lParam; - if (info->vkCode == VK_SNAPSHOT) { - ([]()->winrt::fire_and_forget { - MagApp& app = MagApp::Get(); - - if (!app.GetOptions().IsDrawCursor()) { - co_return; - } - - // 暂时隐藏光标 - app.GetCursorManager().Hide(); - app.GetRenderer().Render(true); - - winrt::DispatcherQueue dispatcher = app.Dispatcher(); - - co_await 400ms; - co_await dispatcher; - - if (app.GetHwndHost()) { - app.GetCursorManager().Show(); - } - })(); - } - - return CallNextHookEx(NULL, nCode, wParam, lParam); -} - -MagApp::MagApp() : - _hInst(GetModuleHandle(nullptr)), - _dispatcher(winrt::DispatcherQueue::GetForCurrentThread()) -{ -} - -MagApp::~MagApp() {} - -static bool CheckSrcWindow(HWND hwndSrc, bool isAllowScalingMaximized) { - if (!WindowHelper::IsValidSrcWindow(hwndSrc)) { - Logger::Get().Info("禁止缩放系统窗口"); - return false; - } - - // 不缩放最大化和最小化的窗口 - if (UINT showCmd = Win32Utils::GetWindowShowCmd(hwndSrc); showCmd != SW_NORMAL) { - if (showCmd != SW_SHOWMAXIMIZED || !isAllowScalingMaximized) { - Logger::Get().Info(StrUtils::Concat("源窗口已", - showCmd == SW_SHOWMAXIMIZED ? "最大化" : "最小化")); - return false; - } - } - - // 不缩放过小的窗口 - RECT clientRect{}; - GetClientRect(hwndSrc, &clientRect); - SIZE clientSize = Win32Utils::GetSizeOfRect(clientRect); - if (clientSize.cx < 5 || clientSize.cy < 5) { - Logger::Get().Info("源窗口尺寸过小"); - return false; - } - -#if _DEBUG - OutputDebugString(fmt::format(L"可执行文件路径:{}\n窗口类:{}\n", - Win32Utils::GetPathOfWnd(hwndSrc), Win32Utils::GetWndClassName(hwndSrc)).c_str()); -#endif // _DEBUG - - return true; -} - -bool MagApp::Start(HWND hwndSrc, MagOptions&& options) { - if (_hwndHost) { - return false; - } - - if (!CheckSrcWindow(hwndSrc, options.IsAllowScalingMaximized())) { - return false; - } - - _hwndSrc = hwndSrc; - _options = options; - - _RegisterWndClasses(); - - if (!_CreateHostWnd()) { - _hwndSrc = NULL; - return false; - } - - _deviceResources = std::make_unique(); - if (!_deviceResources->Initialize()) { - Logger::Get().Error("初始化 DeviceResources 失败"); - Stop(); - return false; - } - - if (!_InitFrameSource()) { - Logger::Get().Error("_InitFrameSource 失败"); - Stop(); - return false; - } - - _renderer = std::make_unique(); - if (!_renderer->Initialize()) { - Logger::Get().Error("初始化 Renderer 失败"); - Stop(); - return false; - } - - _cursorManager = std::make_unique(); - if (!_cursorManager->Initialize()) { - Logger::Get().Error("初始化 CursorManager 失败"); - Stop(); - return false; - } - - if (_options.IsDisableDirectFlip() && !_options.IsDebugMode()) { - // 在此处创建的 DDF 窗口不会立刻显示 - if (!_DisableDirectFlip()) { - Logger::Get().Error("_DisableDirectFlip 失败"); - } - } - - _hKeyboardHook = SetWindowsHookEx(WH_KEYBOARD_LL, LowLevelKeyboardProc, NULL, 0); - - assert(_hwndHost); - // 缩放窗口可能有 WS_MAXIMIZE 样式,因此使用 SetWindowsPos 而不是 ShowWindow - // 以避免 OS 更改窗口尺寸和位置。 - SetWindowPos( - _hwndHost, - NULL, - _hostWndRect.left, - _hostWndRect.top, - _hostWndRect.right - _hostWndRect.left, - _hostWndRect.bottom - _hostWndRect.top, - SWP_SHOWWINDOW | SWP_NOCOPYBITS | SWP_NOREDRAW - ); - - // 模拟独占全屏 - if (MagApp::Get().GetOptions().IsSimulateExclusiveFullscreen()) { - // 延迟 1s 以避免干扰游戏的初始化,见 #495 - ([](HWND hwndHost)->winrt::fire_and_forget { - co_await 1s; - MagApp::Get()._dispatcher.TryEnqueue([hwndHost]() { - MagApp& app = MagApp::Get(); - // 缩放窗口句柄相同就认为中途没有退出缩放。 - // 实践中很难创建出两个句柄相同的窗口,见 https://stackoverflow.com/a/65617844 - if (app._hwndHost == hwndHost && app._options.IsSimulateExclusiveFullscreen() && !app._exclModeHack) { - app._exclModeHack = std::make_unique(); - } - }); - })(_hwndHost); - }; - - return true; -} - -winrt::fire_and_forget MagApp::_WaitForSrcMovingOrSizing() { - HWND hwndSrc = _hwndSrc; - while (true) { - if (!IsWindow(hwndSrc) || GetForegroundWindow() != hwndSrc) { - break; - } else if (UINT showCmd = Win32Utils::GetWindowShowCmd(hwndSrc); showCmd != SW_NORMAL) { - if (showCmd != SW_SHOWMAXIMIZED || !MagApp::Get().GetOptions().IsAllowScalingMaximized()) { - break; - } - } - - // 检查源窗口是否正在调整大小或移动 - GUITHREADINFO guiThreadInfo{}; - guiThreadInfo.cbSize = sizeof(GUITHREADINFO); - if (!GetGUIThreadInfo(GetWindowThreadProcessId(hwndSrc, nullptr), &guiThreadInfo)) { - Logger::Get().Win32Error("GetGUIThreadInfo 失败"); - break; - } - - if (guiThreadInfo.flags & GUI_INMOVESIZE) { - co_await 10ms; - } else { - _dispatcher.TryEnqueue([this]() { - _isWaitingForSrcMovingOrSizing = false; - Start(_hwndSrc, std::move(_options)); - }); - co_return; - } - } - - _dispatcher.TryEnqueue([this]() { - _isWaitingForSrcMovingOrSizing = false; - }); -} - -void MagApp::Stop(bool isSrcMovingOrSizing) { - if (_hwndHost) { - _dispatcher.TryEnqueue([this, isSrcMovingOrSizing]() { - _isWaitingForSrcMovingOrSizing = isSrcMovingOrSizing; - - DestroyWindow(_hwndHost); - - if(isSrcMovingOrSizing) { - // 源窗口的大小或位置不再改变时重新缩放 - _WaitForSrcMovingOrSizing(); - } - }); - } -} - -void MagApp::ToggleOverlay() { - _renderer->SetUIVisibility(!_renderer->IsUIVisiable()); -} - -uint32_t MagApp::RegisterWndProcHandler(std::function(HWND, UINT, WPARAM, LPARAM)> handler) noexcept { - uint32_t id = _nextWndProcHandlerID++; - _wndProcHandlers.emplace_back(std::move(handler), id); - return id; -} - -bool MagApp::UnregisterWndProcHandler(uint32_t id) noexcept { - if (id == 0) { - return false; - } - - // 从后向前查找,因为后注册的回调更可能先取消注册 - for (int i = (int)_wndProcHandlers.size() - 1; i >= 0; --i) { - if (_wndProcHandlers[i].second == id) { - _wndProcHandlers.erase(_wndProcHandlers.begin() + i); - return true; - } - } - - return false; -} - -bool MagApp::MessageLoop() { - if (!_hwndHost) { - return true; - } - - while (true) { - MSG msg; - while (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) { - if (msg.message == WM_QUIT) { - Stop(); - return false; - } - - TranslateMessage(&msg); - DispatchMessage(&msg); - } - - if (!_hwndHost) { - if (_isWaitingForSrcMovingOrSizing) { - // 防止 CPU 占用过高 - WaitMessage(); - continue; - } else { - return true; - } - } - - _renderer->Render(); - - // 第二帧(等待时或完成后)显示 DDF 窗口 - // 如果在 Run 中创建会有短暂的灰屏 - // 选择第二帧的原因:当 GetFrameCount() 返回 1 时第一帧可能处于等待状态而没有渲染,见 Renderer::Render() - if (_renderer->GetGPUTimer().GetFrameCount() == 2 && _hwndDDF) { - ShowWindow(_hwndDDF, SW_NORMAL); - - if (!SetWindowPos(_hwndDDF, _hwndHost, 0, 0, 0, 0, SWP_NOSIZE | SWP_NOMOVE | SWP_NOREDRAW)) { - Logger::Get().Win32Error("SetWindowPos 失败"); - } - } - } - - return true; -} - -void MagApp::_RegisterWndClasses() const { - static bool registered = false; - if (!registered) { - registered = true; - - WNDCLASSEX wcex = {}; - wcex.cbSize = sizeof(WNDCLASSEX); - wcex.lpfnWndProc = _HostWndProcStatic; - wcex.hInstance = _hInst; - wcex.hCursor = LoadCursor(NULL, IDC_ARROW); - wcex.lpszClassName = HOST_WINDOW_CLASS_NAME; - - if (!RegisterClassEx(&wcex)) { - // 忽略此错误,因为可能是重复注册产生的错误 - Logger::Get().Win32Error("注册缩放窗口类失败"); - } - - wcex.lpfnWndProc = DDFWndProc; - wcex.hbrBackground = (HBRUSH)GetStockObject(GRAY_BRUSH); - wcex.lpszClassName = DDF_WINDOW_CLASS_NAME; - - if (!RegisterClassEx(&wcex)) { - Logger::Get().Win32Error("注册 DDF 窗口类失败"); - } - } -} - -// 返回缩放窗口跨越的屏幕数量,失败返回 0 -static uint32_t CalcHostWndRect(HWND hWnd, MultiMonitorUsage multiMonitorUsage, RECT& result) { - switch (multiMonitorUsage) { - case MultiMonitorUsage::Closest: - { - // 使用距离源窗口最近的显示器 - HMONITOR hMonitor = MonitorFromWindow(hWnd, MONITOR_DEFAULTTONEAREST); - if (!hMonitor) { - Logger::Get().Win32Error("MonitorFromWindow 失败"); - return 0; - } - - MONITORINFO mi{}; - mi.cbSize = sizeof(mi); - if (!GetMonitorInfo(hMonitor, &mi)) { - Logger::Get().Win32Error("GetMonitorInfo 失败"); - return 0; - } - result = mi.rcMonitor; - - return 1; - } - case MultiMonitorUsage::Intersected: - { - // 使用源窗口跨越的所有显示器 - - // [0] 存储源窗口坐标,[1] 存储计算结果 - struct MonitorEnumParam { - RECT srcRect; - RECT destRect; - uint32_t monitorCount; - } param{}; - - if (!Win32Utils::GetWindowFrameRect(hWnd, param.srcRect)) { - Logger::Get().Error("GetWindowFrameRect 失败"); - return 0; - } - - MONITORENUMPROC monitorEnumProc = [](HMONITOR, HDC, LPRECT monitorRect, LPARAM data) { - MonitorEnumParam* param = (MonitorEnumParam*)data; - - if (Win32Utils::CheckOverlap(param->srcRect, *monitorRect)) { - UnionRect(¶m->destRect, monitorRect, ¶m->destRect); - ++param->monitorCount; - } - - return TRUE; - }; - - if (!EnumDisplayMonitors(NULL, NULL, monitorEnumProc, (LPARAM)¶m)) { - Logger::Get().Win32Error("EnumDisplayMonitors 失败"); - return 0; - } - - result = param.destRect; - if (result.right - result.left <= 0 || result.bottom - result.top <= 0) { - Logger::Get().Error("计算缩放窗口坐标失败"); - return 0; - } - - return param.monitorCount; - } - case MultiMonitorUsage::All: - { - // 使用所有显示器(Virtual Screen) - int vsWidth = GetSystemMetrics(SM_CXVIRTUALSCREEN); - int vsHeight = GetSystemMetrics(SM_CYVIRTUALSCREEN); - int vsX = GetSystemMetrics(SM_XVIRTUALSCREEN); - int vsY = GetSystemMetrics(SM_YVIRTUALSCREEN); - result = { vsX, vsY, vsX + vsWidth, vsY + vsHeight }; - - return GetSystemMetrics(SM_CMONITORS); - } - default: - return 0; - } -} - -// 创建缩放窗口 -bool MagApp::_CreateHostWnd() { - if (FindWindow(HOST_WINDOW_CLASS_NAME, nullptr)) { - Logger::Get().Error("已存在缩放窗口"); - return false; - } - - const uint32_t monitors = CalcHostWndRect(_hwndSrc, _options.multiMonitorUsage, _hostWndRect); - if (monitors == 0) { - Logger::Get().Error("CalcHostWndRect 失败"); - return false; - } - - if (!_options.IsAllowScalingMaximized()) { - // 源窗口和缩放窗口重合则不缩放,此时源窗口可能是无边框全屏窗口 - RECT srcRect; - if (!Win32Utils::GetWindowFrameRect(_hwndSrc, srcRect)) { - Win32Utils::GetClientScreenRect(_hwndSrc, srcRect); - } - - if (srcRect == _hostWndRect) { - Logger::Get().Info("源窗口已全屏"); - return false; - } - } - - // WS_EX_NOREDIRECTIONBITMAP 可以避免 WS_EX_LAYERED 导致的额外内存开销。 - // WS_MAXIMIZE 使 Wallpaper Engine 在缩放时暂停动态壁纸 #502,这个 hack 不支持 - // 跨越多个屏幕的情况。 - _hwndHost = CreateWindowEx( - (_options.IsDebugMode() ? 0 : WS_EX_TOPMOST) | WS_EX_NOACTIVATE - | WS_EX_LAYERED | WS_EX_NOREDIRECTIONBITMAP | WS_EX_TRANSPARENT | WS_EX_TOOLWINDOW, - HOST_WINDOW_CLASS_NAME, - nullptr, // 标题为空,否则会被添加新配置页面列为候选窗口 - WS_POPUP | (monitors == 1 ? WS_MAXIMIZE : 0), - _hostWndRect.left, - _hostWndRect.top, - _hostWndRect.right - _hostWndRect.left, - _hostWndRect.bottom - _hostWndRect.top, - NULL, - NULL, - _hInst, - NULL - ); - if (!_hwndHost) { - Logger::Get().Win32Error("创建缩放窗口失败"); - return false; - } - - Logger::Get().Info(fmt::format("缩放窗口尺寸:{}x{}", - _hostWndRect.right - _hostWndRect.left, _hostWndRect.bottom - _hostWndRect.top)); - - // 设置窗口不透明 - // 不完全透明时可关闭 DirectFlip - if (!SetLayeredWindowAttributes(_hwndHost, 0, _options.IsDisableDirectFlip() ? 254 : 255, LWA_ALPHA)) { - Logger::Get().Win32Error("SetLayeredWindowAttributes 失败"); - } - - return true; -} - -bool MagApp::_InitFrameSource() { - switch (_options.captureMethod) { - case CaptureMethod::GraphicsCapture: - _frameSource = std::make_unique(); - break; - case CaptureMethod::DesktopDuplication: - _frameSource = std::make_unique(); - break; - case CaptureMethod::GDI: - _frameSource = std::make_unique(); - break; - case CaptureMethod::DwmSharedSurface: - _frameSource = std::make_unique(); - break; - default: - Logger::Get().Critical("未知的捕获模式"); - return false; - } - - Logger::Get().Info(StrUtils::Concat("当前捕获模式:", _frameSource->GetName())); - - if (!_frameSource->Initialize()) { - Logger::Get().Critical("初始化 FrameSource 失败"); - return false; - } - - const RECT& frameRect = _frameSource->GetSrcFrameRect(); - Logger::Get().Info(fmt::format("源窗口尺寸:{}x{}", - frameRect.right - frameRect.left, frameRect.bottom - frameRect.top)); - - return true; -} - -bool MagApp::_DisableDirectFlip() { - // 没有显式关闭 DirectFlip 的方法 - // 将全屏窗口设为稍微透明,以灰色全屏窗口为背景 - _hwndDDF = CreateWindowEx( - WS_EX_NOACTIVATE | WS_EX_LAYERED | WS_EX_TRANSPARENT, - DDF_WINDOW_CLASS_NAME, - NULL, - WS_POPUP, - _hostWndRect.left, - _hostWndRect.top, - _hostWndRect.right - _hostWndRect.left, - _hostWndRect.bottom - _hostWndRect.top, - NULL, - NULL, - _hInst, - NULL - ); - - if (!_hwndDDF) { - Logger::Get().Win32Error("创建 DDF 窗口失败"); - return false; - } - - // 设置窗口不透明 - if (!SetLayeredWindowAttributes(_hwndDDF, 0, 255, LWA_ALPHA)) { - Logger::Get().Win32Error("SetLayeredWindowAttributes 失败"); - } - - if (_frameSource->IsScreenCapture()) { - if (Win32Utils::GetOSVersion().Is20H1OrNewer()) { - // 使 DDF 窗口无法被捕获到 - if (!SetWindowDisplayAffinity(_hwndDDF, WDA_EXCLUDEFROMCAPTURE)) { - Logger::Get().Win32Error("SetWindowDisplayAffinity 失败"); - } - } - } - - return true; -} - -LRESULT MagApp::_HostWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) { - // 以反向调用回调 - for (auto it = _wndProcHandlers.rbegin(); it != _wndProcHandlers.rend(); ++it) { - const auto& result = it->first(hWnd, msg, wParam, lParam); - if (result.has_value()) { - return *result; - } - } - - switch (msg) { - case WM_DESTROY: - { - _OnQuit(); - - if (_hwndDDF) { - DestroyWindow(_hwndDDF); - _hwndDDF = NULL; - } - - _hwndHost = NULL; - return 0; - } - } - - return DefWindowProc(hWnd, msg, wParam, lParam); -} - -void MagApp::_OnQuit() { - if (_hKeyboardHook) { - UnhookWindowsHookEx(_hKeyboardHook); - _hKeyboardHook = NULL; - } - - // 释放资源 - _exclModeHack.reset(); - _cursorManager.reset(); - _renderer.reset(); - _frameSource.reset(); - _deviceResources.reset(); - - _nextWndProcHandlerID = 1; - _wndProcHandlers.clear(); -} - -} diff --git a/src/Magpie.Core/MagApp.h b/src/Magpie.Core/MagApp.h deleted file mode 100644 index bf334479f..000000000 --- a/src/Magpie.Core/MagApp.h +++ /dev/null @@ -1,126 +0,0 @@ -#pragma once -#include "MagOptions.h" -#include - -namespace Magpie::Core { - -class DeviceResources; -class Renderer; -class FrameSourceBase; -class CursorManager; -class ExclModeHack; - -class MagApp { -public: - ~MagApp(); - - static MagApp& Get() noexcept { - static MagApp instance; - return instance; - } - - bool Start(HWND hwndSrc, MagOptions&& options); - - void Stop(bool isSrcMovingOrSizing = false); - - void ToggleOverlay(); - - HINSTANCE GetHInstance() const noexcept { - return _hInst; - } - - HWND GetHwndSrc() const noexcept { - return _hwndSrc; - } - - HWND GetHwndHost() const noexcept { - return _hwndHost; - } - - const RECT& GetHostWndRect() const noexcept { - return _hostWndRect; - } - - DeviceResources& GetDeviceResources() noexcept { - return *_deviceResources; - } - - Renderer& GetRenderer() noexcept { - return *_renderer; - } - - FrameSourceBase& GetFrameSource() noexcept { - return *_frameSource; - } - - CursorManager& GetCursorManager() noexcept { - return *_cursorManager; - } - - MagOptions& GetOptions() noexcept { - return _options; - } - - winrt::DispatcherQueue Dispatcher() const noexcept { - return _dispatcher; - } - - // 注册消息回调,回调函数如果不阻断消息应返回空 - // 返回 ID,不会为 0 - uint32_t RegisterWndProcHandler(std::function(HWND, UINT, WPARAM, LPARAM)> handler) noexcept; - bool UnregisterWndProcHandler(uint32_t id) noexcept; - - bool MessageLoop(); - -private: - MagApp(); - - void _RegisterWndClasses() const; - - // 创建主窗口 - bool _CreateHostWnd(); - - bool _InitFrameSource(); - - bool _DisableDirectFlip(); - - static LRESULT CALLBACK _HostWndProcStatic(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) { - return Get()._HostWndProc(hWnd, msg, wParam, lParam); - } - - LRESULT _HostWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam); - - void _OnQuit(); - - winrt::fire_and_forget _WaitForSrcMovingOrSizing(); - - const winrt::DispatcherQueue _dispatcher{ nullptr }; - - HINSTANCE _hInst = NULL; - HWND _hwndSrc = NULL; - HWND _hwndHost = NULL; - - // 关闭 DirectFlip 时的背景全屏窗口 - HWND _hwndDDF = NULL; - - RECT _hostWndRect{}; - - std::unique_ptr _deviceResources; - std::unique_ptr _renderer; - std::unique_ptr _frameSource; - std::unique_ptr _cursorManager; - std::unique_ptr _exclModeHack; - MagOptions _options; - - HHOOK _hKeyboardHook = NULL; - - SmallVector(HWND, UINT, WPARAM, LPARAM)>, uint32_t>, 2> _wndProcHandlers; - uint32_t _nextWndProcHandlerID = 1; - - bool _windowResizingDisabled = false; - bool _roundCornerDisabled = false; - - bool _isWaitingForSrcMovingOrSizing = false; -}; - -} diff --git a/src/Magpie.Core/MagOptions.h b/src/Magpie.Core/MagOptions.h deleted file mode 100644 index 70aa79f9b..000000000 --- a/src/Magpie.Core/MagOptions.h +++ /dev/null @@ -1,111 +0,0 @@ -#pragma once -#include - -namespace Magpie::Core { - -enum class CaptureMethod { - GraphicsCapture, - DesktopDuplication, - GDI, - DwmSharedSurface, -}; - -enum class MultiMonitorUsage { - Closest, - Intersected, - All, -}; - -enum class CursorInterpolationMode { - NearestNeighbor, - Bilinear, -}; - -struct Cropping { - float Left; - float Top; - float Right; - float Bottom; -}; - -struct MagFlags { - static constexpr const uint32_t DisableWindowResizing = 0x1; - static constexpr const uint32_t BreakpointMode = 0x2; - static constexpr const uint32_t DisableEffectCache = 0x4; - static constexpr const uint32_t SaveEffectSources = 0x8; - static constexpr const uint32_t WarningsAreErrors = 0x10; - static constexpr const uint32_t SimulateExclusiveFullscreen = 0x20; - static constexpr const uint32_t Is3DGameMode = 0x40; - static constexpr const uint32_t ShowFPS = 0x80; - static constexpr const uint32_t VSync = 0x100; - static constexpr const uint32_t TripleBuffering = 0x200; - static constexpr const uint32_t CaptureTitleBar = 0x400; - static constexpr const uint32_t AdjustCursorSpeed = 0x800; - static constexpr const uint32_t DrawCursor = 0x1000; - static constexpr const uint32_t DisableDirectFlip = 0x2000; - static constexpr const uint32_t DisableFontCache = 0x4000; - static constexpr const uint32_t AllowScalingMaximized = 0x8000; -}; - -struct DownscalingEffect { - std::wstring name; - phmap::flat_hash_map parameters; -}; - -enum class ScalingType { - Normal, // Scale 表示缩放倍数 - Fit, // Scale 表示相对于屏幕能容纳的最大等比缩放的比例 - Absolute, // Scale 表示目标大小(单位为像素) - Fill // 充满屏幕,此时不使用 Scale 参数 -}; - -struct EffectOptionFlags { - static constexpr const uint32_t InlineParams = 0x1; - static constexpr const uint32_t FP16 = 0x2; -}; - -struct EffectOption { - std::wstring name; - phmap::flat_hash_map parameters; - ScalingType scalingType = ScalingType::Normal; - std::pair scale = { 1.0f,1.0f }; - uint32_t flags = 0; // EffectOptionFlags - - bool HasScale() const noexcept { - return scalingType != ScalingType::Normal || - std::abs(scale.first - 1.0f) > 1e-5 || std::abs(scale.second - 1.0f) > 1e-5; - } -}; - -struct MagOptions { - DEFINE_FLAG_ACCESSOR(IsDisableWindowResizing, MagFlags::DisableWindowResizing, flags) - DEFINE_FLAG_ACCESSOR(IsDebugMode, MagFlags::BreakpointMode, flags) - DEFINE_FLAG_ACCESSOR(IsDisableEffectCache, MagFlags::DisableEffectCache, flags) - DEFINE_FLAG_ACCESSOR(IsDisableFontCache, MagFlags::DisableFontCache, flags) - DEFINE_FLAG_ACCESSOR(IsSaveEffectSources, MagFlags::SaveEffectSources, flags) - DEFINE_FLAG_ACCESSOR(IsWarningsAreErrors, MagFlags::WarningsAreErrors, flags) - DEFINE_FLAG_ACCESSOR(IsAllowScalingMaximized, MagFlags::AllowScalingMaximized, flags) - DEFINE_FLAG_ACCESSOR(IsSimulateExclusiveFullscreen, MagFlags::SimulateExclusiveFullscreen, flags) - DEFINE_FLAG_ACCESSOR(Is3DGameMode, MagFlags::Is3DGameMode, flags) - DEFINE_FLAG_ACCESSOR(IsShowFPS, MagFlags::ShowFPS, flags) - DEFINE_FLAG_ACCESSOR(IsVSync, MagFlags::VSync, flags) - DEFINE_FLAG_ACCESSOR(IsTripleBuffering, MagFlags::TripleBuffering, flags) - DEFINE_FLAG_ACCESSOR(IsCaptureTitleBar, MagFlags::CaptureTitleBar, flags) - DEFINE_FLAG_ACCESSOR(IsAdjustCursorSpeed, MagFlags::AdjustCursorSpeed, flags) - DEFINE_FLAG_ACCESSOR(IsDrawCursor, MagFlags::DrawCursor, flags) - DEFINE_FLAG_ACCESSOR(IsDisableDirectFlip, MagFlags::DisableDirectFlip, flags) - - Cropping cropping{}; - uint32_t flags = MagFlags::VSync | MagFlags::AdjustCursorSpeed | MagFlags::DrawCursor; // MagFlags - int graphicsCard = -1; - float cursorScaling = 1.0f; - CaptureMethod captureMethod = CaptureMethod::GraphicsCapture; - MultiMonitorUsage multiMonitorUsage = MultiMonitorUsage::Closest; - CursorInterpolationMode cursorInterpolationMode = CursorInterpolationMode::NearestNeighbor; - - DownscalingEffect downscalingEffect; - - std::vector effects; -}; - -} diff --git a/src/Magpie.Core/MagRuntime.cpp b/src/Magpie.Core/MagRuntime.cpp deleted file mode 100644 index 926ca7402..000000000 --- a/src/Magpie.Core/MagRuntime.cpp +++ /dev/null @@ -1,118 +0,0 @@ -#include "pch.h" -#include -#include "MagApp.h" -#include "MagRuntime.h" -#include "Logger.h" - - -namespace Magpie::Core { - -MagRuntime::MagRuntime() : _magWindThread(std::bind(&MagRuntime::_MagWindThreadProc, this)) { -} - -MagRuntime::~MagRuntime() { - Stop(); - - if (_magWindThread.joinable()) { - const DWORD magWndThreadId = GetThreadId(_magWindThread.native_handle()); - // 持续尝试直到 _magWndThread 创建了消息队列 - while (!PostThreadMessage(magWndThreadId, WM_QUIT, 0, 0)) { - Sleep(0); - } - _magWindThread.join(); - } -} - -void MagRuntime::Run(HWND hwndSrc, const MagOptions& options) { - HWND expected = NULL; - if (!_hwndSrc.compare_exchange_strong(expected, hwndSrc, std::memory_order_relaxed)) { - return; - } - - _isRunningChangedEvent(true); - - _EnsureDispatcherQueue(); - _dqc.DispatcherQueue().TryEnqueue([hwndSrc, options(options)]() mutable { - MagApp::Get().Start(hwndSrc, std::move(options)); - }); -} - -void MagRuntime::ToggleOverlay() { - if (!IsRunning()) { - return; - } - - _EnsureDispatcherQueue(); - _dqc.DispatcherQueue().TryEnqueue([]() { - MagApp::Get().ToggleOverlay(); - }); -} - -void MagRuntime::Stop() { - if (!IsRunning()) { - return; - } - - _EnsureDispatcherQueue(); - _dqc.DispatcherQueue().TryEnqueue([]() { - MagApp::Get().Stop(); - }); -} - -void MagRuntime::_MagWindThreadProc() noexcept { - winrt::init_apartment(winrt::apartment_type::single_threaded); - - DispatcherQueueOptions dqOptions{}; - dqOptions.dwSize = sizeof(DispatcherQueueOptions); - dqOptions.threadType = DQTYPE_THREAD_CURRENT; - - HRESULT hr = CreateDispatcherQueueController( - dqOptions, - (PDISPATCHERQUEUECONTROLLER*)winrt::put_abi(_dqc) - ); - if (FAILED(hr)) { - Logger::Get().ComError("CreateDispatcherQueueController 失败", hr); - return; - } - - MagApp& app = MagApp::Get(); - - while (true) { - if (app.GetHwndHost()) { - // 缩放时使用不同的消息循环 - bool quiting = !app.MessageLoop(); - - _hwndSrc.store(NULL, std::memory_order_relaxed); - _isRunningChangedEvent(false); - - if (quiting) { - return; - } - } else { - if (_hwndSrc.exchange(NULL, std::memory_order_relaxed)) { - // 缩放失败或立即退出缩放 - _isRunningChangedEvent(false); - } - - WaitMessage(); - - MSG msg; - while (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) { - if (msg.message == WM_QUIT) { - return; - } - - TranslateMessage(&msg); - DispatchMessage(&msg); - } - } - } -} - -void MagRuntime::_EnsureDispatcherQueue() const noexcept { - while (!_dqc) { - Sleep(1); - } -} - -} diff --git a/src/Magpie.Core/Magpie.Core.vcxproj b/src/Magpie.Core/Magpie.Core.vcxproj index 0d9ead8d0..fd668ec3e 100644 --- a/src/Magpie.Core/Magpie.Core.vcxproj +++ b/src/Magpie.Core/Magpie.Core.vcxproj @@ -8,7 +8,7 @@ Magpie.Core 10.0.22621.0 Magpie.Core - $(SolutionDir)\bin\$(Platform)\$(Configuration)\ + $(SolutionDir)bin\$(Platform)\$(Configuration)\ @@ -24,16 +24,27 @@ - + Fast + + 5.0 + true + true + %(Filename) + %(RelativeDir)%(Filename).h + + + + + @@ -43,31 +54,35 @@ - - + + + - + - - - - - + + + + + + + + @@ -75,26 +90,51 @@ - + + - + - - Create - + + + + + + + Compute + + + Pixel + + + Vertex + + + Pixel + + + Pixel + + + Pixel + + + Vertex + + diff --git a/src/Magpie.Core/Magpie.Core.vcxproj.filters b/src/Magpie.Core/Magpie.Core.vcxproj.filters index 31fffeb31..93e7efad6 100644 --- a/src/Magpie.Core/Magpie.Core.vcxproj.filters +++ b/src/Magpie.Core/Magpie.Core.vcxproj.filters @@ -16,48 +16,20 @@ {34fd6af4-f461-43ae-81e0-c663a89c2d2a} + + {1956ae10-07ad-4b77-a37f-25f7fe10654b} + Include - - - - + + - - - - - - - Capture - - - Capture - - - Capture - - - Capture - - - Capture - - - Overlay - - - Overlay - - - Overlay - TextureLoader @@ -88,43 +60,47 @@ Helpers - - - - - - - - - - - - - - - Capture - - - Capture - - + + + + + + + + + Capture - - + + Capture - - + + Capture - - + + Overlay - - + + Overlay - - + + Overlay - + + + + + Capture + + + Capture + + + + + + + + TextureLoader @@ -143,6 +119,64 @@ Overlay + + + + + + + + + Capture + + + Capture + + + Capture + + + Overlay + + + Overlay + + + Overlay + + + + + Capture + + + Capture + + + + + + + Shaders + + + Shaders + + + Shaders + + + Shaders + + + Shaders + + + Shaders + + + Shaders + diff --git a/src/Magpie.Core/OverlayDrawer.cpp b/src/Magpie.Core/OverlayDrawer.cpp index a655be552..154184a33 100644 --- a/src/Magpie.Core/OverlayDrawer.cpp +++ b/src/Magpie.Core/OverlayDrawer.cpp @@ -1,10 +1,8 @@ #include "pch.h" #include "OverlayDrawer.h" -#include "MagApp.h" #include "DeviceResources.h" -#include "ImGuiImpl.h" #include "Renderer.h" -#include "GPUTimer.h" +#include "StepTimer.h" #include "Logger.h" #include "StrUtils.h" #include "Win32Utils.h" @@ -15,40 +13,20 @@ #include #include "ImGuiHelper.h" #include "ImGuiFontsCacheManager.h" +#include "ScalingWindow.h" -namespace Magpie::Core { - -static const std::wstring& GetSystemFontsFolder() noexcept { - static std::wstring result; - - if (result.empty()) { - wchar_t* fontsFolder = nullptr; - HRESULT hr = SHGetKnownFolderPath(FOLDERID_Fonts, 0, NULL, &fontsFolder); - if (FAILED(hr)) { - CoTaskMemFree(fontsFolder); - Logger::Get().ComError("SHGetKnownFolderPath 失败", hr); - return result; - } +using namespace std::chrono; - result = fontsFolder; - CoTaskMemFree(fontsFolder); - } +namespace Magpie::Core { - return result; -} +static const char* COLOR_INDICATOR = "■"; +static const wchar_t COLOR_INDICATOR_W = L'■'; -OverlayDrawer::OverlayDrawer() noexcept { - HWND hwndSrc = MagApp::Get().GetHwndSrc(); - _isSrcMainWnd = Win32Utils::GetWndClassName(hwndSrc) == CommonSharedConstants::MAIN_WINDOW_CLASS_NAME; -} +OverlayDrawer::OverlayDrawer() : + _resourceLoader(winrt::ResourceLoader::GetForViewIndependentUse(CommonSharedConstants::APP_RESOURCE_MAP_ID)) +{} -OverlayDrawer::~OverlayDrawer() { - if (MagApp::Get().GetOptions().Is3DGameMode() && IsUIVisiable()) { - _EnableSrcWnd(true); - } -} - -static const ImColor TIMELINE_COLORS[] = { +static constexpr const ImColor TIMELINE_COLORS[] = { {229,57,53,255}, {156,39,176,255}, {63,81,181,255}, @@ -58,24 +36,19 @@ static const ImColor TIMELINE_COLORS[] = { {117,117,117,255} }; -static UINT GetSeed() { - Renderer& renderer = MagApp::Get().GetRenderer(); - UINT nEffect = renderer.GetEffectCount(); - - UINT result = 0; - for (UINT i = 0; i < nEffect; ++i) { - result ^= (UINT)std::hash()(renderer.GetEffectDesc(i).name); +static uint32_t GetSeed(const std::vector& effectInfos) noexcept { + uint32_t result = 0; + for (const Renderer::EffectInfo& effectInfo : effectInfos) { + result ^= (uint32_t)std::hash()(effectInfo.name); } return result; } -static SmallVector GenerateTimelineColors() { - Renderer& renderer = MagApp::Get().GetRenderer(); - - const UINT nEffect = renderer.GetEffectCount(); - UINT totalColors = nEffect > 1 ? nEffect : 0; - for (UINT i = 0; i < nEffect; ++i) { - UINT nPass = (UINT)renderer.GetEffectDesc(i).passes.size(); +static SmallVector GenerateTimelineColors(const std::vector& effectInfos) noexcept { + const uint32_t nEffect = (uint32_t)effectInfos.size(); + uint32_t totalColors = nEffect > 1 ? nEffect : 0; + for (uint32_t i = 0; i < nEffect; ++i) { + uint32_t nPass = (uint32_t)effectInfos[i].passNames.size(); if (nPass > 1) { totalColors += nPass; } @@ -85,14 +58,14 @@ static SmallVector GenerateTimelineColors() { return {}; } - constexpr UINT nColors = (UINT)std::size(TIMELINE_COLORS); + constexpr uint32_t nColors = (uint32_t)std::size(TIMELINE_COLORS); - std::default_random_engine randomEngine(GetSeed()); - SmallVector result; + std::default_random_engine randomEngine(GetSeed(effectInfos)); + SmallVector result; if (totalColors <= nColors) { result.resize(nColors); - for (UINT i = 0; i < nColors; ++i) { + for (uint32_t i = 0; i < nColors; ++i) { result[i] = i; } std::shuffle(result.begin(), result.end(), randomEngine); @@ -101,23 +74,23 @@ static SmallVector GenerateTimelineColors() { } else { // 相邻通道颜色不同,相邻效果颜色不同 result.resize(totalColors); - std::uniform_int_distribution uniformDst(0, nColors - 1); + std::uniform_int_distribution uniformDst(0, nColors - 1); if (nEffect <= nColors) { if (nEffect > 1) { // 确保效果的颜色不重复 - std::array effectColors{}; - for (UINT i = 0; i < nColors; ++i) { + std::array effectColors{}; + for (uint32_t i = 0; i < nColors; ++i) { effectColors[i] = i; } std::shuffle(effectColors.begin(), effectColors.end(), randomEngine); - UINT i = 0; - for (UINT j = 0; j < nEffect; ++j) { + uint32_t i = 0; + for (uint32_t j = 0; j < nEffect; ++j) { result[i] = effectColors[j]; ++i; - UINT nPass = (UINT)renderer.GetEffectDesc(j).passes.size(); + uint32_t nPass = (uint32_t)effectInfos[j].passNames.size(); if (nPass > 1) { i += nPass; } @@ -125,10 +98,10 @@ static SmallVector GenerateTimelineColors() { } } else { // 仅确保与前一个效果颜色不同 - UINT prevColor = UINT_MAX; - UINT i = 0; - for (UINT j = 0; j < nEffect; ++j) { - UINT c = uniformDst(randomEngine); + uint32_t prevColor = std::numeric_limits::max(); + uint32_t i = 0; + for (uint32_t j = 0; j < nEffect; ++j) { + uint32_t c = uniformDst(randomEngine); while (c == prevColor) { c = uniformDst(randomEngine); } @@ -137,7 +110,7 @@ static SmallVector GenerateTimelineColors() { prevColor = c; ++i; - UINT nPass = (UINT)renderer.GetEffectDesc(j).passes.size(); + uint32_t nPass = (uint32_t)effectInfos[j].passNames.size(); if (nPass > 1) { i += nPass; } @@ -146,8 +119,8 @@ static SmallVector GenerateTimelineColors() { // 生成通道的颜色 size_t idx = 0; - for (UINT i = 0; i < nEffect; ++i) { - UINT nPass = (UINT)renderer.GetEffectDesc(i).passes.size(); + for (uint32_t i = 0; i < nEffect; ++i) { + uint32_t nPass = (uint32_t)effectInfos[i].passNames.size(); if (nEffect > 1) { ++idx; @@ -157,16 +130,15 @@ static SmallVector GenerateTimelineColors() { } } - for (UINT j = 0; j < nPass; ++j) { - UINT c = uniformDst(randomEngine); + for (uint32_t j = 0; j < nPass; ++j) { + uint32_t c = uniformDst(randomEngine); if (i > 0 || j > 0) { - UINT prevColor = (i > 0 && j == 0) ? result[idx - 2] : result[idx - 1]; + uint32_t prevColor = (i > 0 && j == 0) ? result[idx - 2] : result[idx - 1]; - if (j + 1 == nPass && i + 1 != nEffect && - renderer.GetEffectDesc(i + 1).passes.size() == 1) { - // 当前效果的最后一个通道且下一个效果只有一个通道 - UINT nextColor = result[idx + 1]; + if (j + 1 == nPass && i + 1 != nEffect && effectInfos[(size_t)i + 1].passNames.size() == 1) { + // 当前效果的最后一个通道且下一个效果只有一个通道 + uint32_t nextColor = result[idx + 1]; while (c == prevColor || c == nextColor) { c = uniformDst(randomEngine); } @@ -186,14 +158,22 @@ static SmallVector GenerateTimelineColors() { return result; } -bool OverlayDrawer::Initialize() noexcept { - _imguiImpl.reset(new ImGuiImpl()); - if (!_imguiImpl->Initialize()) { +OverlayDrawer::~OverlayDrawer() { + if (ScalingWindow::Get().Options().Is3DGameMode() && IsUIVisible()) { + HWND hwndSrc = ScalingWindow::Get().HwndSrc(); + EnableWindow(hwndSrc, TRUE); + // 此时用户通过热键退出缩放,应激活源窗口 + Win32Utils::SetForegroundWindow(hwndSrc); + } +} + +bool OverlayDrawer::Initialize(DeviceResources* deviceResources) noexcept { + if (!_imguiImpl.Initialize(deviceResources)) { Logger::Get().Error("初始化 ImGuiImpl 失败"); return false; } - _dpiScale = GetDpiForWindow(MagApp::Get().GetHwndHost()) / 96.0f; + _dpiScale = GetDpiForWindow(ScalingWindow::Get().Handle()) / 96.0f; ImGui::StyleColorsDark(); ImGuiStyle& style = ImGui::GetStyle(); @@ -208,78 +188,118 @@ bool OverlayDrawer::Initialize() noexcept { return false; } - _RetrieveHardwareInfo(); - _timelineColors = GenerateTimelineColors(); - // 将 _fontUI 设为默认字体 ImGui::GetIO().FontDefault = _fontUI; + // 获取硬件信息 + DXGI_ADAPTER_DESC desc{}; + HRESULT hr = deviceResources->GetGraphicsAdapter()->GetDesc(&desc); + _hardwareInfo.gpuName = SUCCEEDED(hr) ? StrUtils::UTF16ToUTF8(desc.Description) : "UNAVAILABLE"; + + const std::vector& effectInfos = + ScalingWindow::Get().Renderer().EffectInfos(); + _timelineColors = GenerateTimelineColors(effectInfos); + + uint32_t passCount = 0; + for (const Renderer::EffectInfo& info : effectInfos) { + passCount += (uint32_t)info.passNames.size(); + } + _effectTimingsStatistics.resize(passCount); + _lastestAvgEffectTimings.resize(passCount); + return true; } -void OverlayDrawer::Draw() noexcept { - bool isShowFPS = MagApp::Get().GetOptions().IsShowFPS(); +void OverlayDrawer::Draw( + uint32_t count, + uint32_t fps, + const SmallVector& effectTimings +) noexcept { + bool isShowFPS = ScalingWindow::Get().Options().IsShowFPS(); if (!_isUIVisiable && !isShowFPS) { return; } - _imguiImpl->NewFrame(); - - if (isShowFPS) { - _DrawFPS(); + if (_isFirstFrame) { + // 刚显示时需连续渲染两帧才能显示 + _isFirstFrame = false; + ++count; } - if (_isUIVisiable) { - _DrawUI(); + // 很多时候需要多次渲染避免呈现中间状态,但最多只渲染 10 次 + for (int i = 0; i < 10; ++i) { + _imguiImpl.NewFrame(); + + if (isShowFPS) { + _DrawFPS(fps); + } + + if (_isUIVisiable) { + if (_DrawUI(effectTimings, fps)) { + ++count; + } + } + + // 中间状态不应执行渲染,因此调用 EndFrame 而不是 Render + ImGui::EndFrame(); + + if (--count == 0) { + break; + } } - ImGui::Render(); - _imguiImpl->EndFrame(); + _imguiImpl.Draw(); } -void OverlayDrawer::SetUIVisibility(bool value) noexcept { +// 3D 游戏模式下关闭叠加层将激活源窗口,但有时不希望这么做,比如用户切换 +// 窗口导致停止缩放。通过 noSetForeground 禁止激活源窗口 +void OverlayDrawer::SetUIVisibility(bool value, bool noSetForeground) noexcept { if (_isUIVisiable == value) { return; } _isUIVisiable = value; if (value) { - if (MagApp::Get().GetOptions().Is3DGameMode()) { + if (ScalingWindow::Get().Options().Is3DGameMode()) { // 使全屏窗口不透明且可以接收焦点 - HWND hwndHost = MagApp::Get().GetHwndHost(); + HWND hwndHost = ScalingWindow::Get().Handle(); INT_PTR style = GetWindowLongPtr(hwndHost, GWL_EXSTYLE); SetWindowLongPtr(hwndHost, GWL_EXSTYLE, style & ~(WS_EX_TRANSPARENT | WS_EX_NOACTIVATE)); Win32Utils::SetForegroundWindow(hwndHost); // 使源窗口无法接收用户输入 - _EnableSrcWnd(false); - // 由 ImGui 绘制光标 - ImGui::GetIO().MouseDrawCursor = true; + EnableWindow(ScalingWindow::Get().HwndSrc(), FALSE); } - Logger::Get().Info("已开启覆盖层"); + Logger::Get().Info("已开启叠加层"); } else { - _validFrames = 0; - std::fill(_frameTimes.begin(), _frameTimes.end(), 0.0f); - - if (!MagApp::Get().GetOptions().IsShowFPS()) { - _imguiImpl->ClearStates(); + if (!ScalingWindow::Get().Options().IsShowFPS()) { + _imguiImpl.ClearStates(); } - if (MagApp::Get().GetOptions().Is3DGameMode()) { + if (ScalingWindow::Get().Options().Is3DGameMode()) { // 还原全屏窗口样式 - HWND hwndHost = MagApp::Get().GetHwndHost(); + HWND hwndHost = ScalingWindow::Get().Handle(); INT_PTR style = GetWindowLongPtr(hwndHost, GWL_EXSTYLE); SetWindowLongPtr(hwndHost, GWL_EXSTYLE, style | (WS_EX_TRANSPARENT | WS_EX_NOACTIVATE)); // 重新激活源窗口 - _EnableSrcWnd(true); + HWND hwndSrc = ScalingWindow::Get().HwndSrc(); + EnableWindow(hwndSrc, TRUE); - ImGui::GetIO().MouseDrawCursor = false; + if (!noSetForeground) { + Win32Utils::SetForegroundWindow(hwndSrc); + } } - Logger::Get().Info("已关闭覆盖层"); + Logger::Get().Info("已关闭叠加层"); + } +} + +void OverlayDrawer::MessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noexcept { + if (_isUIVisiable || ScalingWindow::Get().Options().IsShowFPS()) { + _imguiImpl.MessageHandler(msg, wParam, lParam); } } @@ -293,58 +313,80 @@ static const std::wstring& GetAppLanguage() noexcept { return language; } +static const std::wstring& GetSystemFontsFolder() noexcept { + static std::wstring result; + + if (result.empty()) { + wchar_t* fontsFolder = nullptr; + HRESULT hr = SHGetKnownFolderPath(FOLDERID_Fonts, 0, NULL, &fontsFolder); + if (FAILED(hr)) { + CoTaskMemFree(fontsFolder); + Logger::Get().ComError("SHGetKnownFolderPath 失败", hr); + return result; + } + + result = fontsFolder; + CoTaskMemFree(fontsFolder); + } + + return result; +} + bool OverlayDrawer::_BuildFonts() noexcept { const std::wstring& language = GetAppLanguage(); - ImFontAtlas& fontAtlas = *ImGui::GetIO().Fonts; - const MagOptions& options = MagApp::Get().GetOptions(); - // 3D 游戏模式下字体纹理中有光标纹理,不支持缓存 - const bool fontCacheDisabled = options.IsDisableFontCache() || options.Is3DGameMode(); + const bool fontCacheDisabled = ScalingWindow::Get().Options().IsFontCacheDisabled(); if (!fontCacheDisabled && ImGuiFontsCacheManager::Get().Load(language, fontAtlas)) { _fontUI = fontAtlas.Fonts[0]; _fontMonoNumbers = fontAtlas.Fonts[1]; _fontFPS = fontAtlas.Fonts[2]; - return true; - } + } else { + fontAtlas.Flags |= ImFontAtlasFlags_NoPowerOfTwoHeight | ImFontAtlasFlags_NoMouseCursors; - fontAtlas.Flags |= ImFontAtlasFlags_NoPowerOfTwoHeight; - if (!MagApp::Get().GetOptions().Is3DGameMode()) { - // 非 3D 游戏模式无需 ImGui 绘制光标 - fontAtlas.Flags |= ImFontAtlasFlags_NoMouseCursors; - } + std::wstring fontPath = GetSystemFontsFolder(); + if (Win32Utils::GetOSVersion().IsWin11()) { + fontPath += L"\\SegUIVar.ttf"; + } else { + fontPath += L"\\segoeui.ttf"; + } - std::wstring fontPath = GetSystemFontsFolder(); - if (Win32Utils::GetOSVersion().IsWin11()) { - fontPath += L"\\SegUIVar.ttf"; - } else { - fontPath += L"\\segoeui.ttf"; - } + std::vector fontData; + if (!Win32Utils::ReadFile(fontPath.c_str(), fontData)) { + Logger::Get().Error("读取字体文件失败"); + return false; + } - std::vector fontData; - if (!Win32Utils::ReadFile(fontPath.c_str(), fontData)) { - Logger::Get().Error("读取字体文件失败"); - return false; - } + { + // 构建 ImFontAtlas 前 uiRanges 不能析构,因为 ImGui 只保存了指针 + ImVector uiRanges; + _BuildFontUI(language, fontData, uiRanges); + _BuildFontFPS(fontData); + + if (!fontAtlas.Build()) { + Logger::Get().Error("构建 ImFontAtlas 失败"); + return false; + } + } - // 构建字体前 uiRanges 不能析构,因为 ImGui 只保存了指针 - ImVector uiRanges; - _BuildFontUI(language, fontData, uiRanges); - _BuildFontFPS(fontData); + if (!fontCacheDisabled) { + ImGuiFontsCacheManager::Get().Save(language, fontAtlas); + } + } - if (!fontAtlas.Build()) { + if (!_imguiImpl.BuildFonts()) { Logger::Get().Error("构建字体失败"); return false; } - if (!fontCacheDisabled) { - ImGuiFontsCacheManager::Get().Save(language, fontAtlas); - } - return true; } -void OverlayDrawer::_BuildFontUI(std::wstring_view language, const std::vector& fontData, ImVector& uiRanges) noexcept { +void OverlayDrawer::_BuildFontUI( + std::wstring_view language, + const std::vector& fontData, + ImVector& uiRanges +) noexcept { ImFontAtlas& fontAtlas = *ImGui::GetIO().Fonts; std::string extraFontPath; @@ -392,7 +434,7 @@ void OverlayDrawer::_BuildFontUI(std::wstring_view language, const std::vector& fontData) noexcept (void*)fontData.data(), (int)fontData.size(), fpsSize, &config, (const ImWchar*)L" FFPPSS"); } -static std::string_view GetEffectDisplayName(const EffectDesc* desc) noexcept { - auto delimPos = desc->name.find_last_of('\\'); +static std::string_view GetEffectDisplayName(const Renderer::EffectInfo* effectInfo) noexcept { + auto delimPos = effectInfo->name.find_last_of('\\'); if (delimPos == std::string::npos) { - return desc->name; + return effectInfo->name; } else { - return std::string_view(desc->name.begin() + delimPos + 1, desc->name.end()); + return std::string_view(effectInfo->name.begin() + delimPos + 1, effectInfo->name.end()); } } -static void DrawTextWithFont(const char* text, ImFont* font) noexcept { - ImGui::PushFont(font); - ImGui::TextUnformatted(text); - ImGui::PopFont(); -} - -// 返回鼠标悬停的项的序号,未悬停于任何项返回 -1 -int OverlayDrawer::_DrawEffectTimings( - const _EffectTimings& et, - bool showPasses, - float maxWindowWidth, - std::span colors, - bool singleEffect -) noexcept { +bool OverlayDrawer::_DrawTimingItem( + const char* text, + const ImColor* color, + float time, + bool isExpanded +) const noexcept { ImGui::TableNextRow(); ImGui::TableNextColumn(); - int result = -1; + const std::string timeStr = fmt::format("{:.3f} ms", time); + const float timeWidth = _fontMonoNumbers->CalcTextSizeA( + ImGui::GetFontSize(), FLT_MAX, 0.0f, timeStr.c_str()).x; + + // 计算布局 + static constexpr float spacingBeforeText = 3; + static constexpr float spacingAfterText = 8; + const float descWrapPos = ImGui::GetCursorPosX() + ImGui::GetContentRegionAvail().x - timeWidth - spacingAfterText; + const float descHeight = ImGui::CalcTextSize( + text, nullptr, false, descWrapPos - ImGui::GetCursorPosX() - (color ? ImGui::CalcTextSize(COLOR_INDICATOR).x + spacingBeforeText : 0)).y; - if (!singleEffect && (et.passTimings.size() == 1 || !showPasses)) { - ImGui::Selectable("", false, ImGuiSelectableFlags_SpanAllColumns); + const float fontHeight = ImGui::GetFont()->FontSize; + + bool isHovered = false; + if (color) { + ImGui::Selectable("", false, 0, ImVec2(0, descHeight)); if (ImGui::IsItemHovered()) { - result = 0; + isHovered = true; } ImGui::SameLine(0, 0); - ImGui::PushStyleColor(ImGuiCol_Text, (ImU32)colors[0]); - ImGui::TextUnformatted("■"); + ImGui::PushStyleColor(ImGuiCol_Text, (ImU32)*color); + + if (descHeight >= fontHeight * 2) { + // 不知为何 SetCursorPos 不起作用 + // 所以这里使用占位竖直居中颜色框 + ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2()); + ImGui::BeginGroup(); + ImGui::Dummy(ImVec2(0, (descHeight - fontHeight) / 2)); + ImGui::TextUnformatted(COLOR_INDICATOR); + ImGui::EndGroup(); + ImGui::PopStyleVar(); + } else { + ImGui::TextUnformatted(COLOR_INDICATOR); + } ImGui::PopStyleColor(); - ImGui::SameLine(0, 3); - } - - ImGui::TextUnformatted(std::string(GetEffectDisplayName(et.desc)).c_str()); - ImGui::TableNextColumn(); + ImGui::SameLine(0, spacingBeforeText); + } - const float rightAlignSpace = ImGui::CalcTextSize("0").x; + ImGui::PushTextWrapPos(descWrapPos); + ImGui::TextUnformatted(text); + ImGui::PopTextWrapPos(); + ImGui::SameLine(0, 0); - if (et.passTimings.size() > 1) { - if (showPasses) { - ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(1, 1, 1, 0.5f)); - } + // 描述过长导致换行时竖直居中时间 + if (color && descHeight >= fontHeight * 2) { + ImGui::SetCursorPosY(ImGui::GetCursorPosY() + (descHeight - fontHeight) / 2); + } - if (et.totalTime < 10) { - // 右对齐 - ImGui::Dummy(ImVec2(rightAlignSpace, 0)); - ImGui::SameLine(0, 0); - } - DrawTextWithFont(fmt::format("{:.3f} ms", et.totalTime).c_str(), _fontMonoNumbers); + if (isExpanded) { + ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(1, 1, 1, 0.5f)); + } - if (showPasses) { - ImGui::PopStyleColor(); - } + ImGui::PushFont(_fontMonoNumbers); + ImGui::SetCursorPosX(descWrapPos + spacingAfterText); + ImGui::TextUnformatted(timeStr.c_str()); + ImGui::PopFont(); - if (showPasses) { - for (size_t j = 0; j < et.passTimings.size(); ++j) { - ImGui::TableNextRow(); - ImGui::TableNextColumn(); - - ImGui::Indent(20); - - float fontHeight = ImGui::GetFont()->FontSize; - std::string time = fmt::format("{:.3f} ms", et.passTimings[j]); - // 手动计算布局 - // 运行到此处时还无法确定是否需要滚动条,这里始终减去滚动条的宽度,否则展开时可能会有一帧的跳跃 - float descWrap = maxWindowWidth - ImGui::CalcTextSize(time.c_str()).x - ImGui::GetStyle().WindowPadding.x - ImGui::GetStyle().ScrollbarSize - ImGui::GetStyle().CellPadding.x * 2; - float descHeight = ImGui::CalcTextSize(et.desc->passes[j].desc.c_str(), nullptr, false, descWrap - ImGui::GetCursorPos().x - ImGui::CalcTextSize("■").x - 3).y; - - ImGui::PushStyleColor(ImGuiCol_Text, (ImU32)colors[j]); - if (descHeight >= fontHeight * 2) { - // 不知为何 SetCursorPos 不起作用 - // 所以这里使用占位竖直居中颜色框 - ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2()); - ImGui::BeginGroup(); - ImGui::Dummy(ImVec2(0, (descHeight - fontHeight) / 2)); - ImGui::TextUnformatted("■"); - ImGui::EndGroup(); - ImGui::PopStyleVar(); - } else { - ImGui::TextUnformatted("■"); - } + if (isExpanded) { + ImGui::PopStyleColor(); + } - ImGui::PopStyleColor(); - ImGui::SameLine(0, 3); + return isHovered; +} - ImGui::PushTextWrapPos(descWrap); - ImGui::TextUnformatted(et.desc->passes[j].desc.c_str()); - ImGui::PopTextWrapPos(); - ImGui::Unindent(20); +// 返回鼠标悬停的项的序号,未悬停于任何项返回 -1 +int OverlayDrawer::_DrawEffectTimings( + const _EffectDrawInfo& drawInfo, + bool showPasses, + std::span colors, + bool singleEffect +) const noexcept { + int result = -1; - ImGui::SameLine(0, 0); - ImGui::Selectable("", false, ImGuiSelectableFlags_SpanAllColumns, ImVec2(0, descHeight)); - if (ImGui::IsItemHovered()) { - result = (int)j; - } + showPasses &= drawInfo.passTimings.size() > 1; + if (_DrawTimingItem( + std::string(GetEffectDisplayName(drawInfo.info)).c_str(), + (!singleEffect && !showPasses) ? &colors[0] : nullptr, + drawInfo.totalTime, + showPasses + )) { + result = 0; + } - ImGui::TableNextColumn(); - // 描述过长导致换行时竖直居中时间 - if (descHeight >= fontHeight * 2) { - ImGui::SetCursorPosY(ImGui::GetCursorPosY() + (descHeight - fontHeight) / 2); - } + if (showPasses) { + for (size_t j = 0; j < drawInfo.passTimings.size(); ++j) { + ImGui::Indent(16); - if (et.passTimings[j] < 10) { - ImGui::Dummy(ImVec2(rightAlignSpace, 0)); - ImGui::SameLine(0, 0); - } - DrawTextWithFont(time.c_str(), _fontMonoNumbers); + if (_DrawTimingItem( + drawInfo.info->passNames[j].c_str(), + &colors[j], + drawInfo.passTimings[j] + )) { + result = (int)j; } + + ImGui::Unindent(16); } - } else { - if (et.totalTime < 10) { - ImGui::Dummy(ImVec2(rightAlignSpace, 0)); - ImGui::SameLine(0, 0); - } - DrawTextWithFont(fmt::format("{:.3f} ms", et.totalTime).c_str(), _fontMonoNumbers); } return result; } -void OverlayDrawer::_DrawTimelineItem(ImU32 color, float dpiScale, std::string_view name, float time, float effectsTotalTime, bool selected) { +void OverlayDrawer::_DrawTimelineItem( + ImU32 color, + float dpiScale, + std::string_view name, + float time, + float effectsTotalTime, + bool selected +) { ImGui::TableSetBgColor(ImGuiTableBgTarget_CellBg, color); ImGui::PushStyleColor(ImGuiCol_HeaderActive, color); ImGui::PushStyleColor(ImGuiCol_HeaderHovered, color); @@ -641,11 +683,13 @@ void OverlayDrawer::_DrawTimelineItem(ImU32 color, float dpiScale, std::string_v if (itemWidth - (selected ? 0 : itemSpacing) > textWidth + 4 * _dpiScale) { ImGui::SameLine(0, 0); ImGui::SetCursorPosX(ImGui::GetCursorPosX() + (itemWidth - textWidth - itemSpacing) / 2); + // 竖直方向居中 + ImGui::SetCursorPosY(ImGui::GetCursorPosY() - 0.5f * _dpiScale); ImGui::TextUnformatted(text.c_str()); } } -void OverlayDrawer::_DrawFPS() noexcept { +void OverlayDrawer::_DrawFPS(uint32_t fps) noexcept { static float oldOpacity = 0.0f; static float opacity = 0.0f; static bool isLocked = false; @@ -689,21 +733,21 @@ void OverlayDrawer::_DrawFPS() noexcept { cursorPos.y -= 3; ImGui::SetCursorPosY(cursorPos.y); - std::string fps = fmt::format("{} FPS", MagApp::Get().GetRenderer().GetGPUTimer().GetFramesPerSecond()); + std::string fpsStr = fmt::format("{} FPS", fps); if (drawShadow) { ImGui::SetCursorPos(ImVec2(cursorPos.x + 1.0f, cursorPos.y + 1.0f)); ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.0f, 0.0f, 0.0f, 0.8f)); - ImGui::TextUnformatted(fps.c_str()); + ImGui::TextUnformatted(fpsStr.c_str()); ImGui::PopStyleColor(); ImGui::SetCursorPos(cursorPos); ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.0f, 0.0f, 0.0f, 0.6f)); - ImGui::TextUnformatted(fps.c_str()); + ImGui::TextUnformatted(fpsStr.c_str()); ImGui::PopStyleColor(); ImGui::SetCursorPos(cursorPos); } - ImGui::TextUnformatted(fps.c_str()); + ImGui::TextUnformatted(fpsStr.c_str()); ImGui::PopFont(); @@ -730,170 +774,106 @@ void OverlayDrawer::_DrawFPS() noexcept { ImGui::PopStyleVar(); } -// 自定义提示 -static void MyPlotLines(float(*values_getter)(void* data, int idx), void* data, int values_count, int values_offset, const char* overlay_text, float scale_min, float scale_max, ImVec2 graph_size) { - // 通过改变光标位置避免绘制提示窗口 - const ImVec2 mousePos = ImGui::GetIO().MousePos; - ImGui::GetIO().MousePos = ImVec2(-FLT_MAX, -FLT_MAX); - ImGui::PlotLines("", values_getter, data, values_count, values_offset, overlay_text, scale_min, scale_max, graph_size); - ImGui::GetIO().MousePos = mousePos; - - ImVec2 framePadding = ImGui::GetStyle().FramePadding; - ImVec2 graphRectMin = ImGui::GetItemRectMin(); - ImVec2 graphRectMax = ImGui::GetItemRectMax(); - - float innerRectLeft = graphRectMin.x + framePadding.x; - float innerRectTop = graphRectMin.y + framePadding.y; - float innerRectRight = graphRectMax.x - framePadding.x; - float innerRectBottom = graphRectMax.y - framePadding.y; - - // 检查光标是否在图表上 - if (mousePos.x < innerRectLeft || mousePos.y < innerRectTop || - mousePos.x >= innerRectRight || mousePos.y >= innerRectBottom) { - return; - } +// 返回 true 表示应再渲染一次 +bool OverlayDrawer::_DrawUI(const SmallVector& effectTimings, uint32_t fps) noexcept { + const ScalingOptions& options = ScalingWindow::Get().Options(); + const Renderer& renderer = ScalingWindow::Get().Renderer(); - // 获取光标位置对应的值 - float t = std::clamp((mousePos.x - innerRectLeft) / (innerRectRight - innerRectLeft), 0.0f, 0.9999f); - int v_idx = (int)(t * values_count); - float v0 = values_getter(data, (v_idx + values_offset) % values_count); + const uint32_t passCount = (uint32_t)_effectTimingsStatistics.size(); - ImGuiImpl::Tooltip(fmt::format("{:.1f}", v0).c_str()); -} + bool needRedraw = false; + + // effectTimings 为空表示后端没有渲染新的帧 + if (!effectTimings.empty()) { + steady_clock::time_point now = steady_clock::now(); + if (_lastUpdateTime == steady_clock::time_point{}) { + // 后端渲染的第一帧 + _lastUpdateTime = now; + + for (uint32_t i = 0; i < passCount; ++i) { + _lastestAvgEffectTimings[i] = effectTimings[i]; + } + } else { + if (now - _lastUpdateTime > 500ms) { + // 更新间隔不少于 500ms,而不是 500ms 更新一次 + _lastUpdateTime = now; + + for (uint32_t i = 0; i < passCount; ++i) { + auto& [total, count] = _effectTimingsStatistics[i]; + if (count > 0) { + _lastestAvgEffectTimings[i] = total / count; + } + + count = 0; + total = 0; + } + } -void OverlayDrawer::_DrawUI() noexcept { - auto& settings = MagApp::Get().GetOptions(); - auto& renderer = MagApp::Get().GetRenderer(); - auto& gpuTimer = renderer.GetGPUTimer(); + for (uint32_t i = 0; i < passCount; ++i) { + auto& [total, count] = _effectTimingsStatistics[i]; + // 有时会跳过某些效果的渲染,即渲染时间为 0,这时不应计入 + if (effectTimings[i] > 1e-3) { + ++count; + total += effectTimings[i]; + } + } + } + } #ifdef _DEBUG ImGui::ShowDemoWindow(); #endif - const float maxWindowWidth = 400 * _dpiScale; - ImGui::SetNextWindowSizeConstraints(ImVec2(), ImVec2(maxWindowWidth, 500 * _dpiScale)); + { + const float windowWidth = 310 * _dpiScale; + ImGui::SetNextWindowSizeConstraints(ImVec2(windowWidth, 0.0f), ImVec2(windowWidth, 500 * _dpiScale)); - static float initPosX = Win32Utils::GetSizeOfRect(MagApp::Get().GetRenderer().GetOutputRect()).cx - maxWindowWidth; - ImGui::SetNextWindowPos(ImVec2(initPosX, 20), ImGuiCond_FirstUseEver); + static float initPosX = Win32Utils::GetSizeOfRect(renderer.DestRect()).cx - windowWidth; + ImGui::SetNextWindowPos(ImVec2(initPosX, 20), ImGuiCond_FirstUseEver); + } std::string profilerStr = _GetResourceString(L"Overlay_Profiler"); if (!ImGui::Begin(profilerStr.c_str(), nullptr, ImGuiWindowFlags_NoNav | ImGuiWindowFlags_AlwaysAutoResize)) { ImGui::End(); - return; + return needRedraw; } - // 始终为滚动条预留空间 - ImGui::PushTextWrapPos(maxWindowWidth - ImGui::GetStyle().WindowPadding.x - ImGui::GetStyle().ScrollbarSize); + ImGui::PushTextWrapPos(); ImGui::TextUnformatted(StrUtils::Concat("GPU: ", _hardwareInfo.gpuName).c_str()); - const std::string& vSyncStr = _GetResourceString(L"Overlay_Profiler_VSync"); - const std::string& stateStr = _GetResourceString(settings.IsVSync() ? L"ToggleSwitch/OnContent" : L"ToggleSwitch/OffContent"); - ImGui::TextUnformatted(StrUtils::Concat(vSyncStr, ": ", stateStr).c_str()); const std::string& captureMethodStr = _GetResourceString(L"Overlay_Profiler_CaptureMethod"); - ImGui::TextUnformatted(StrUtils::Concat(captureMethodStr.c_str(), ": ", MagApp::Get().GetFrameSource().GetName()).c_str()); - ImGui::PopTextWrapPos(); - - ImGui::Spacing(); - - static constexpr UINT nSamples = 180; - - if (_frameTimes.size() >= nSamples) { - _frameTimes.erase(_frameTimes.begin(), _frameTimes.begin() + (_frameTimes.size() - nSamples + 1)); - } else if (_frameTimes.size() < nSamples) { - _frameTimes.insert(_frameTimes.begin(), nSamples - _frameTimes.size() - 1, 0); - } - _frameTimes.push_back(std::chrono::duration_cast>(gpuTimer.GetElapsedTime()).count()); - _validFrames = std::min(_validFrames + 1, nSamples); - - // 帧率统计,支持在渲染时间和 FPS 间切换 - const std::string& frameStatisticsStr = _GetResourceString(L"Overlay_Profiler_FrameStatistics"); - if (ImGui::CollapsingHeader(frameStatisticsStr.c_str(), ImGuiTreeNodeFlags_DefaultOpen)) { - static bool showFrameRates = true; - - ImGui::Spacing(); - const std::string& buttonStr = _GetResourceString(showFrameRates - ? L"Overlay_Profiler_FrameStatistics_SwitchToFrameTimings" - : L"Overlay_Profiler_FrameStatistics_SwitchToFrameRates"); - if (ImGui::Button(buttonStr.c_str())) { - showFrameRates = !showFrameRates; - } - ImGui::Spacing(); - + ImGui::TextUnformatted(StrUtils::Concat(captureMethodStr.c_str(), ": ", renderer.FrameSource().Name()).c_str()); + if (options.IsStatisticsForDynamicDetectionEnabled() && + options.duplicateFrameDetectionMode == DuplicateFrameDetectionMode::Dynamic) { + const std::pair statistics = + renderer.FrameSource().GetStatisticsForDynamicDetection(); + ImGui::TextUnformatted(StrUtils::Concat(_GetResourceString(L"Overlay_Profiler_DynamicDetection"), ": ").c_str()); + ImGui::SameLine(0, 0); ImGui::PushFont(_fontMonoNumbers); - - if (showFrameRates) { - float totalTime = 0; - float minTime = FLT_MAX; - float minTime2 = FLT_MAX; - for (UINT i = nSamples - _validFrames; i < nSamples; ++i) { - totalTime += _frameTimes[i]; - - if (_frameTimes[i] <= minTime) { - minTime2 = minTime; - minTime = _frameTimes[i]; - } else if (_frameTimes[i] < minTime2) { - minTime2 = _frameTimes[i]; - } - } - - if (minTime2 == FLT_MAX) { - minTime2 = minTime; - } - - // 减少抖动 - // 1. 使用第二小的值以缓解尖峰导致的抖动 - // 2. 以 30 为最小变化单位 - const float maxFPS = std::bit_ceil((UINT)std::ceilf((1000 / minTime2 - 10) / 30)) * 30 * 1.7f; - - MyPlotLines([](void* data, int idx) { - float time = (*(std::deque*)data)[idx]; - return time < 1e-6 ? 0 : 1000 / time; - }, &_frameTimes, (int)_frameTimes.size(), 0, fmt::format("avg: {:.1f} FPS", _validFrames * 1000 / totalTime).c_str(), 0, maxFPS, ImVec2(250 * _dpiScale, 80 * _dpiScale)); - } else { - float totalTime = 0; - float maxTime = 0; - float maxTime2 = 0; - for (UINT i = nSamples - _validFrames; i < nSamples; ++i) { - totalTime += _frameTimes[i]; - - if (_frameTimes[i] >= maxTime) { - maxTime2 = maxTime; - maxTime = _frameTimes[i]; - } else if (_frameTimes[i] > maxTime2) { - maxTime2 = _frameTimes[i]; - } - } - - if (maxTime2 == 0) { - maxTime2 = maxTime; - } - - // 使用第二大的值以缓解尖峰导致的抖动 - MyPlotLines([](void* data, int idx) { - return (*(std::deque*)data)[idx]; - }, &_frameTimes, (int)_frameTimes.size(), 0, - fmt::format("avg: {:.1f} ms", totalTime / _validFrames).c_str(), - 0, maxTime2 * 1.7f, ImVec2(250 * _dpiScale, 80 * _dpiScale)); - } - + ImGui::TextUnformatted(fmt::format("{}/{} ({:.1f}%)", statistics.first, statistics.second, + statistics.second == 0 ? 0.0f : statistics.first * 100.0f / statistics.second).c_str()); ImGui::PopFont(); } + const std::string& frameRateStr = _GetResourceString(L"Overlay_Profiler_FrameRate"); + ImGui::TextUnformatted(fmt::format("{}: {} FPS", frameRateStr, fps).c_str()); + ImGui::PopTextWrapPos(); ImGui::Spacing(); + // 效果渲染用时 const std::string& timingsStr = _GetResourceString(L"Overlay_Profiler_Timings"); if (ImGui::CollapsingHeader(timingsStr.c_str(), ImGuiTreeNodeFlags_DefaultOpen)) { - const auto& gpuTimings = gpuTimer.GetGPUTimings(); - const UINT nEffect = renderer.GetEffectCount(); - - SmallVector<_EffectTimings, 4> effectTimings(nEffect); + const std::vector& effectInfos = renderer.EffectInfos(); + const uint32_t nEffect = (uint32_t)effectInfos.size(); + + SmallVector<_EffectDrawInfo, 4> effectDrawInfos(effectInfos.size()); { - UINT idx = 0; - for (UINT i = 0; i < nEffect; ++i) { - auto& effectTiming = effectTimings[i]; - effectTiming.desc = &renderer.GetEffectDesc(i); + uint32_t idx = 0; + for (uint32_t i = 0; i < nEffect; ++i) { + auto& effectTiming = effectDrawInfos[i]; + effectTiming.info = &effectInfos[i]; - UINT nPass = (UINT)effectTiming.desc->passes.size(); - effectTiming.passTimings = { gpuTimings.passes.begin() + idx, nPass }; + uint32_t nPass = (uint32_t)effectTiming.info->passNames.size(); + effectTiming.passTimings = { _lastestAvgEffectTimings.begin() + idx, nPass }; idx += nPass; for (float t : effectTiming.passTimings) { @@ -903,27 +883,32 @@ void OverlayDrawer::_DrawUI() noexcept { } float effectsTotalTime = 0.0f; - for (const auto& et : effectTimings) { - effectsTotalTime += et.totalTime; + for (const _EffectDrawInfo& drawInfo : effectDrawInfos) { + effectsTotalTime += drawInfo.totalTime; + } + + bool showSwitchButton = false; + for (const _EffectDrawInfo& drawInfo : effectDrawInfos) { + // 某个效果有多个通道,显示切换按钮 + if (drawInfo.passTimings.size() > 1) { + showSwitchButton = true; + break; + } } static bool showPasses = false; - if (nEffect == 1) { - showPasses = effectTimings[0].passTimings.size() > 1; - } else { - for (const auto& et : effectTimings) { - // 某个效果有多个通道,显示切换按钮 - if (et.passTimings.size() > 1) { - ImGui::Spacing(); - const std::string& buttonStr = _GetResourceString(showPasses - ? L"Overlay_Profiler_Timings_SwitchToEffects" - : L"Overlay_Profiler_Timings_SwitchToPasses"); - if (ImGui::Button(buttonStr.c_str())) { - showPasses = !showPasses; - } - break; - } + if (showSwitchButton) { + ImGui::Spacing(); + const std::string& buttonStr = _GetResourceString(showPasses + ? L"Overlay_Profiler_Timings_SwitchToEffects" + : L"Overlay_Profiler_Timings_SwitchToPasses"); + if (ImGui::Button(buttonStr.c_str())) { + showPasses = !showPasses; + // 需要再次渲染以处理滚动条导致的布局变化 + needRedraw = true; } + } else { + showPasses = false; } SmallVector colors; @@ -934,28 +919,28 @@ void OverlayDrawer::_DrawUI() noexcept { colors[i] = TIMELINE_COLORS[_timelineColors[i]]; } } else if (showPasses) { - UINT i = 0; - for (const auto& et : effectTimings) { - if (et.passTimings.size() == 1) { + uint32_t i = 0; + for (const _EffectDrawInfo& drawInfo : effectDrawInfos) { + if (drawInfo.passTimings.size() == 1) { colors.push_back(TIMELINE_COLORS[_timelineColors[i]]); ++i; continue; } ++i; - for (UINT j = 0; j < et.passTimings.size(); ++j) { + for (uint32_t j = 0; j < drawInfo.passTimings.size(); ++j) { colors.push_back(TIMELINE_COLORS[_timelineColors[i]]); ++i; } } } else { size_t i = 0; - for (const auto& et : effectTimings) { + for (const _EffectDrawInfo& drawInfo : effectDrawInfos) { colors.push_back(TIMELINE_COLORS[_timelineColors[i]]); ++i; - if (et.passTimings.size() > 1) { - i += et.passTimings.size(); + if (drawInfo.passTimings.size() > 1) { + i += drawInfo.passTimings.size(); } } } @@ -971,40 +956,44 @@ void OverlayDrawer::_DrawUI() noexcept { if (effectsTotalTime > 0) { if (showPasses) { - if (ImGui::BeginTable("timeline", (int)gpuTimings.passes.size())) { - for (UINT i = 0; i < gpuTimings.passes.size(); ++i) { - if (gpuTimings.passes[i] < 1e-5f) { + if (ImGui::BeginTable("timeline", (int)passCount)) { + for (uint32_t i = 0; i < passCount; ++i) { + if (_lastestAvgEffectTimings[i] < 1e-3f) { continue; } ImGui::TableSetupColumn( std::to_string(i).c_str(), ImGuiTableColumnFlags_WidthStretch | ImGuiTableColumnFlags_NoResize | ImGuiTableColumnFlags_NoReorder, - gpuTimings.passes[i] / effectsTotalTime + _lastestAvgEffectTimings[i] / effectsTotalTime ); } ImGui::TableNextRow(); - UINT i = 0; - for (const _EffectTimings& et : effectTimings) { - for (UINT j = 0, end = (UINT)et.passTimings.size(); j < end; ++j) { - if (et.passTimings[j] < 1e-5f) { + uint32_t i = 0; + for (const _EffectDrawInfo& drawInfo : effectDrawInfos) { + for (uint32_t j = 0, end = (uint32_t)drawInfo.passTimings.size(); j < end; ++j) { + if (drawInfo.passTimings[j] < 1e-5f) { continue; } ImGui::TableNextColumn(); std::string name; - if (et.passTimings.size() == 1) { - name = std::string(GetEffectDisplayName(et.desc)); + if (drawInfo.passTimings.size() == 1) { + name = std::string(GetEffectDisplayName(drawInfo.info)); } else if (nEffect == 1) { - name = et.desc->passes[j].desc; + name = drawInfo.info->passNames[j]; } else { - name = StrUtils::Concat(GetEffectDisplayName(et.desc), "/", et.desc->passes[j].desc); + name = StrUtils::Concat( + GetEffectDisplayName(drawInfo.info), "/", + drawInfo.info->passNames[j] + ); } - _DrawTimelineItem(colors[i], _dpiScale, name, et.passTimings[j], effectsTotalTime, selectedIdx == (int)i); + _DrawTimelineItem(colors[i], _dpiScale, name, drawInfo.passTimings[j], + effectsTotalTime, selectedIdx == (int)i); ++i; } @@ -1014,28 +1003,35 @@ void OverlayDrawer::_DrawUI() noexcept { } } else { if (ImGui::BeginTable("timeline", nEffect)) { - for (UINT i = 0; i < nEffect; ++i) { - if (effectTimings[i].totalTime < 1e-5f) { + for (uint32_t i = 0; i < nEffect; ++i) { + if (effectDrawInfos[i].totalTime < 1e-5f) { continue; } ImGui::TableSetupColumn( std::to_string(i).c_str(), ImGuiTableColumnFlags_WidthStretch | ImGuiTableColumnFlags_NoResize | ImGuiTableColumnFlags_NoReorder, - effectTimings[i].totalTime / effectsTotalTime + effectDrawInfos[i].totalTime / effectsTotalTime ); } ImGui::TableNextRow(); - for (UINT i = 0; i < nEffect; ++i) { - auto& et = effectTimings[i]; - if (et.totalTime < 1e-5f) { + for (uint32_t i = 0; i < nEffect; ++i) { + auto& drawInfo = effectDrawInfos[i]; + if (drawInfo.totalTime < 1e-5f) { continue; } ImGui::TableNextColumn(); - _DrawTimelineItem(colors[i], _dpiScale, GetEffectDisplayName(et.desc), et.totalTime, effectsTotalTime, selectedIdx == (int)i); + _DrawTimelineItem( + colors[i], + _dpiScale, + GetEffectDisplayName(drawInfo.info), + drawInfo.totalTime, + effectsTotalTime, + selectedIdx == (int)i + ); } ImGui::EndTable(); @@ -1062,35 +1058,32 @@ void OverlayDrawer::_DrawUI() noexcept { ImGui::Spacing(); } - + selectedIdx = -1; - ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(ImGui::GetStyle().ItemSpacing.x, ImGui::GetStyle().CellPadding.y * 2)); - if (ImGui::BeginTable("timings", 2, ImGuiTableFlags_PadOuterX)) { - ImGui::TableSetupColumn("name", ImGuiTableColumnFlags_WidthStretch | ImGuiTableColumnFlags_NoResize | ImGuiTableColumnFlags_NoReorder); - ImGui::TableSetupColumn("time", ImGuiTableColumnFlags_WidthFixed | ImGuiTableColumnFlags_NoResize | ImGuiTableColumnFlags_NoReorder); + if (ImGui::BeginTable("timings", 1, ImGuiTableFlags_PadOuterX)) { + ImGui::TableSetupColumn(nullptr, ImGuiTableColumnFlags_WidthStretch | ImGuiTableColumnFlags_NoResize | ImGuiTableColumnFlags_NoReorder); if (nEffect == 1) { - const auto& et = effectTimings[0]; - int hovered = _DrawEffectTimings(et, true, maxWindowWidth, colors, true); + int hovered = _DrawEffectTimings(effectDrawInfos[0], showPasses, colors, true); if (hovered >= 0) { selectedIdx = hovered; } } else { size_t idx = 0; - for (const auto& et : effectTimings) { + for (const _EffectDrawInfo& effectInfo : effectDrawInfos) { int idxBegin = (int)idx; std::span colorSpan; - if (!showPasses || et.passTimings.size() == 1) { + if (!showPasses || effectInfo.passTimings.size() == 1) { colorSpan = std::span(colors.begin() + idx, colors.begin() + idx + 1); ++idx; } else { - colorSpan = std::span(colors.begin() + idx, colors.begin() + idx + et.passTimings.size()); - idx += et.passTimings.size(); + colorSpan = std::span(colors.begin() + idx, colors.begin() + idx + effectInfo.passTimings.size()); + idx += effectInfo.passTimings.size(); } - int hovered = _DrawEffectTimings(et, showPasses, maxWindowWidth, colorSpan, false); + int hovered = _DrawEffectTimings(effectInfo, showPasses, colorSpan, false); if (hovered >= 0) { selectedIdx = idxBegin + hovered; } @@ -1103,45 +1096,22 @@ void OverlayDrawer::_DrawUI() noexcept { if (nEffect > 1) { ImGui::Separator(); - if (ImGui::BeginTable("total", 2, ImGuiTableFlags_PadOuterX)) { - ImGui::TableSetupColumn("name", ImGuiTableColumnFlags_WidthStretch | ImGuiTableColumnFlags_NoResize | ImGuiTableColumnFlags_NoReorder); - ImGui::TableSetupColumn("time", ImGuiTableColumnFlags_WidthFixed | ImGuiTableColumnFlags_NoResize | ImGuiTableColumnFlags_NoReorder); + if (ImGui::BeginTable("total", 1, ImGuiTableFlags_PadOuterX)) { + ImGui::TableSetupColumn(nullptr, ImGuiTableColumnFlags_WidthStretch | ImGuiTableColumnFlags_NoResize | ImGuiTableColumnFlags_NoReorder); - ImGui::TableNextRow(); - ImGui::TableNextColumn(); - const std::string& totalStr = _GetResourceString(L"Overlay_Profiler_Timings_Total"); - ImGui::TextUnformatted(totalStr.c_str()); - ImGui::TableNextColumn(); - DrawTextWithFont(fmt::format("{:.3f} ms", effectsTotalTime).c_str(), _fontMonoNumbers); + _DrawTimingItem(_GetResourceString(L"Overlay_Profiler_Timings_Total").c_str(), nullptr, effectsTotalTime); ImGui::EndTable(); } } - ImGui::PopStyleVar(); } - + ImGui::End(); -} - -void OverlayDrawer::_RetrieveHardwareInfo() noexcept { - DXGI_ADAPTER_DESC desc{}; - HRESULT hr = MagApp::Get().GetDeviceResources().GetGraphicsAdapter()->GetDesc(&desc); - _hardwareInfo.gpuName = SUCCEEDED(hr) ? StrUtils::UTF16ToUTF8(desc.Description) : "UNAVAILABLE"; -} - -void OverlayDrawer::_EnableSrcWnd(bool enable) noexcept { - HWND hwndSrc = MagApp::Get().GetHwndSrc(); - if (!_isSrcMainWnd) { - // 如果源窗口是 Magpie 主窗口会卡死 - EnableWindow(hwndSrc, TRUE); - } - if (enable) { - SetForegroundWindow(hwndSrc); - } + return needRedraw; } const std::string& OverlayDrawer::_GetResourceString(const std::wstring_view& key) noexcept { - static phmap::flat_hash_map cache; + static phmap::flat_hash_map cache; if (auto it = cache.find(key); it != cache.end()) { return it->second; diff --git a/src/Magpie.Core/OverlayDrawer.h b/src/Magpie.Core/OverlayDrawer.h index fdfc9b859..8a6ae7c58 100644 --- a/src/Magpie.Core/OverlayDrawer.h +++ b/src/Magpie.Core/OverlayDrawer.h @@ -2,52 +2,65 @@ #include #include "SmallVector.h" #include +#include "ImGuiImpl.h" +#include "Renderer.h" namespace Magpie::Core { -struct EffectDesc; -class ImGuiImpl; - class OverlayDrawer { public: - OverlayDrawer() noexcept; + OverlayDrawer(); OverlayDrawer(const OverlayDrawer&) = delete; OverlayDrawer(OverlayDrawer&&) = delete; ~OverlayDrawer(); - bool Initialize() noexcept; - - void Draw() noexcept; + bool Initialize(DeviceResources* deviceResources) noexcept; + + void Draw( + uint32_t count, + uint32_t fps, + const SmallVector& effectTimings + ) noexcept; - bool IsUIVisiable() const noexcept { + bool IsUIVisible() const noexcept { return _isUIVisiable; } - void SetUIVisibility(bool value) noexcept; + void SetUIVisibility(bool value, bool noSetForeground = false) noexcept; + + void MessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noexcept; private: bool _BuildFonts() noexcept; void _BuildFontUI(std::wstring_view language, const std::vector& fontData, ImVector& uiRanges) noexcept; void _BuildFontFPS(const std::vector& fontData) noexcept; - struct _EffectTimings { - const EffectDesc* desc = nullptr; + struct _EffectDrawInfo { + const Renderer::EffectInfo* info = nullptr; std::span passTimings; float totalTime = 0.0f; }; - int _DrawEffectTimings(const _EffectTimings& et, bool showPasses, float maxWindowWidth, std::span colors, bool singleEffect) noexcept; + bool _DrawTimingItem( + const char* text, + const ImColor* color, + float time, + bool isExpanded = false + ) const noexcept; - void _DrawTimelineItem(ImU32 color, float dpiScale, std::string_view name, float time, float effectsTotalTime, bool selected = false); - - void _DrawFPS() noexcept; + int _DrawEffectTimings( + const _EffectDrawInfo& drawInfo, + bool showPasses, + std::span colors, + bool singleEffect + ) const noexcept; - void _DrawUI() noexcept; + void _DrawTimelineItem(ImU32 color, float dpiScale, std::string_view name, float time, float effectsTotalTime, bool selected = false); - void _RetrieveHardwareInfo() noexcept; + void _DrawFPS(uint32_t fps) noexcept; - void _EnableSrcWnd(bool enable) noexcept; + bool _DrawUI(const SmallVector& effectTimings, uint32_t fps) noexcept; const std::string& _GetResourceString(const std::wstring_view& key) noexcept; @@ -57,21 +70,23 @@ class OverlayDrawer { ImFont* _fontMonoNumbers = nullptr; // 普通 UI 文字,但数字部分是等宽的,只支持 ASCII ImFont* _fontFPS = nullptr; // FPS - std::deque _frameTimes; - UINT _validFrames = 0; + std::chrono::steady_clock::time_point _lastUpdateTime{}; + // (总计时间, 帧数) + SmallVector, 0> _effectTimingsStatistics; + SmallVector _lastestAvgEffectTimings; - SmallVector _timelineColors; + SmallVector _timelineColors; struct { std::string gpuName; } _hardwareInfo; - std::unique_ptr _imguiImpl; + ImGuiImpl _imguiImpl; - winrt::ResourceLoader _resourceLoader = winrt::ResourceLoader::GetForViewIndependentUse(); + winrt::ResourceLoader _resourceLoader{ nullptr }; bool _isUIVisiable = false; - bool _isSrcMainWnd = false; + bool _isFirstFrame = true; }; } diff --git a/src/Magpie.Core/Renderer.cpp b/src/Magpie.Core/Renderer.cpp index cd18aa00f..ea8a67fe7 100644 --- a/src/Magpie.Core/Renderer.cpp +++ b/src/Magpie.Core/Renderer.cpp @@ -1,502 +1,877 @@ #include "pch.h" #include "Renderer.h" -#include "MagApp.h" +#include "DeviceResources.h" +#include "ScalingOptions.h" +#include "Logger.h" #include "Win32Utils.h" +#include "EffectDrawer.h" #include "StrUtils.h" +#include "Utils.h" #include "EffectCompiler.h" -#include "FrameSourceBase.h" -#include "DeviceResources.h" -#include "GPUTimer.h" -#include "EffectDrawer.h" +#include "GraphicsCaptureFrameSource.h" +#include "DesktopDuplicationFrameSource.h" +#include "GDIFrameSource.h" +#include "DwmSharedSurfaceFrameSource.h" +#include "DirectXHelper.h" +#include +#include "ScalingWindow.h" #include "OverlayDrawer.h" -#include "Logger.h" #include "CursorManager.h" -#include "WindowHelper.h" -#include "Utils.h" +#include "EffectsProfiler.h" namespace Magpie::Core { -Renderer::Renderer() {} +Renderer::Renderer() noexcept {} + +Renderer::~Renderer() noexcept { + if (_hKeyboardHook) { + UnhookWindowsHookEx(_hKeyboardHook); + } + + if (_backendThread.joinable()) { + DWORD backendThreadId = GetThreadId(_backendThread.native_handle()); + // 持续尝试直到 _backendThread 创建了消息队列 + while (!PostThreadMessage(backendThreadId, WM_QUIT, 0, 0)) { + Sleep(1); + } + _backendThread.join(); + } +} + +// 监听 PrintScreen 实现截屏时隐藏光标 +LRESULT CALLBACK Renderer::_LowLevelKeyboardHook(int nCode, WPARAM wParam, LPARAM lParam) { + if (nCode != HC_ACTION || wParam != WM_KEYDOWN) { + return CallNextHookEx(NULL, nCode, wParam, lParam); + } -Renderer::~Renderer() {} + KBDLLHOOKSTRUCT* info = (KBDLLHOOKSTRUCT*)lParam; + if (info->vkCode == VK_SNAPSHOT) { + // 为了缩短钩子处理时间,异步执行所有逻辑 + ScalingWindow::Get().Dispatcher().TryEnqueue([]() -> winrt::fire_and_forget { + // 暂时隐藏光标 + Renderer& renderer = ScalingWindow::Get().Renderer(); + renderer._cursorDrawer.IsCursorVisible(false); + renderer._FrontendRender(); -bool Renderer::Initialize() { - _gpuTimer.reset(new GPUTimer()); + const HWND hwndScaling = ScalingWindow::Get().Handle(); - if (!GetWindowRect(MagApp::Get().GetHwndSrc(), &_srcWndRect)) { - Logger::Get().Win32Error("GetWindowRect 失败"); + winrt::DispatcherQueue dispatcher = ScalingWindow::Get().Dispatcher(); + co_await 200ms; + co_await dispatcher; + + if (ScalingWindow::Get().Handle() == hwndScaling && + !renderer._cursorDrawer.IsCursorVisible() + ) { + renderer._cursorDrawer.IsCursorVisible(true); + renderer._FrontendRender(); + } + }); + } + + return CallNextHookEx(NULL, nCode, wParam, lParam); +} + +static void LogAdapter(IDXGIAdapter4* adapter) noexcept { + DXGI_ADAPTER_DESC1 desc; + adapter->GetDesc1(&desc); + + Logger::Get().Info(fmt::format("当前图形适配器:\n\tVendorId:{:#x}\n\tDeviceId:{:#x}\n\tDescription:{}", + desc.VendorId, desc.DeviceId, StrUtils::UTF16ToUTF8(desc.Description))); +} + +bool Renderer::Initialize() noexcept { + _backendThread = std::thread(std::bind(&Renderer::_BackendThreadProc, this)); + + if (!_frontendResources.Initialize()) { + Logger::Get().Error("初始化前端资源失败"); return false; } - if (!_BuildEffects()) { - Logger::Get().Error("_BuildEffects 失败"); + LogAdapter(_frontendResources.GetGraphicsAdapter()); + + if (!_CreateSwapChain()) { + Logger::Get().Error("_CreateSwapChain 失败"); return false; } - if (MagApp::Get().GetOptions().IsShowFPS()) { - _overlayDrawer.reset(new OverlayDrawer()); - if (!_overlayDrawer->Initialize()) { - _overlayDrawer.reset(); - Logger::Get().Error("初始化 OverlayDrawer 失败"); - } + // 等待后端初始化完成 + _sharedTextureHandle.wait(NULL, std::memory_order_relaxed); + const HANDLE sharedTextureHandle = _sharedTextureHandle.load(std::memory_order_acquire); + if (sharedTextureHandle == INVALID_HANDLE_VALUE) { + Logger::Get().Error("后端初始化失败"); + return false; } - // 初始化所有效果共用的动态常量缓冲区 - D3D11_BUFFER_DESC bd{}; - bd.Usage = D3D11_USAGE_DYNAMIC; - bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - bd.ByteWidth = 4 * (UINT)_dynamicConstants.size(); - bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - - HRESULT hr = MagApp::Get().GetDeviceResources().GetD3DDevice() - ->CreateBuffer(&bd, nullptr, _dynamicCB.put()); + // 获取共享纹理 + HRESULT hr = _frontendResources.GetD3DDevice()->OpenSharedResource( + sharedTextureHandle, IID_PPV_ARGS(_frontendSharedTexture.put())); if (FAILED(hr)) { - Logger::Get().ComError("CreateBuffer 失败", hr); + Logger::Get().ComError("OpenSharedResource 失败", hr); return false; } - return true; -} + _frontendSharedTextureMutex = _frontendSharedTexture.try_as(); + D3D11_TEXTURE2D_DESC desc; + _frontendSharedTexture->GetDesc(&desc); -void Renderer::Render(bool onPrint) { - int srcState = _CheckSrcState(); - if (srcState != 0) { - Logger::Get().Info("源窗口状态改变,退出全屏"); - MagApp::Get().Stop(srcState == 2); - return; + const RECT& scalingWndRect = ScalingWindow::Get().WndRect(); + _destRect.left = (scalingWndRect.left + scalingWndRect.right - (LONG)desc.Width) / 2; + _destRect.top = (scalingWndRect.top + scalingWndRect.bottom - (LONG)desc.Height) / 2; + _destRect.right = _destRect.left + (LONG)desc.Width; + _destRect.bottom = _destRect.top + (LONG)desc.Height; + + if (!_cursorDrawer.Initialize(_frontendResources, _backBuffer.get())) { + Logger::Get().ComError("初始化 CursorDrawer 失败", hr); + return false; } - DeviceResources& dr = MagApp::Get().GetDeviceResources(); + if (ScalingWindow::Get().Options().IsShowFPS()) { + _overlayDrawer.reset(new OverlayDrawer()); + if (!_overlayDrawer->Initialize(&_frontendResources)) { + Logger::Get().Error("初始化 OverlayDrawer 失败"); + return false; + } + } - if (!_waitingForNextFrame) { - dr.BeginFrame(); - _gpuTimer->OnBeginFrame(); + _hKeyboardHook = SetWindowsHookEx(WH_KEYBOARD_LL, _LowLevelKeyboardHook, NULL, 0); + if (!_hKeyboardHook) { + Logger::Get().Win32Warn("SetWindowsHookEx 失败"); } - // 首先处理配置改变产生的回调 - // MagApp::Get().GetOptions().OnBeginFrame(); + return true; +} - auto state = onPrint ? FrameSourceBase::UpdateState::NoUpdate : MagApp::Get().GetFrameSource().Update(); - _waitingForNextFrame = state == FrameSourceBase::UpdateState::Waiting - || state == FrameSourceBase::UpdateState::Error; - if (_waitingForNextFrame) { - return; +static bool CheckMultiplaneOverlaySupport(IDXGISwapChain4* swapChain) noexcept { + winrt::com_ptr output; + HRESULT hr = swapChain->GetContainingOutput(output.put()); + if (FAILED(hr)) { + Logger::Get().ComError("获取 IDXGIOutput 失败", hr); + return false; } - MagApp::Get().GetCursorManager().OnBeginFrame(); - - if (!_UpdateDynamicConstants()) { - Logger::Get().Error("_UpdateDynamicConstants 失败"); + winrt::com_ptr output2 = output.try_as(); + if (!output2) { + Logger::Get().Info("获取 IDXGIOutput2 失败"); + return false; } - auto d3dDC = dr.GetD3DDC(); + return output2->SupportsOverlays(); +} - { - ID3D11Buffer* t = _dynamicCB.get(); - d3dDC->CSSetConstantBuffers(0, 1, &t); - } +void Renderer::OnCursorVisibilityChanged(bool isVisible, bool onDestory) { + _backendThreadDispatcher.TryEnqueue([this, isVisible, onDestory]() { + if (_frameSource) { + _frameSource->OnCursorVisibilityChanged(isVisible, onDestory); + } + }); +} - { - SIZE outputSize = Win32Utils::GetSizeOfRect(_outputRect); - SIZE hostSize = Win32Utils::GetSizeOfRect(MagApp::Get().GetHostWndRect()); - if (outputSize.cx < hostSize.cx || outputSize.cy < hostSize.cy) { - // 存在黑边时渲染每帧前清空后缓冲区 - ID3D11UnorderedAccessView* backBufferUAV = nullptr; - dr.GetUnorderedAccessView(dr.GetBackBuffer(), &backBufferUAV); - static const UINT black[4] = { 0,0,0,255 }; - d3dDC->ClearUnorderedAccessViewUint(backBufferUAV, black); +void Renderer::MessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noexcept { + if (_overlayDrawer) { + _overlayDrawer->MessageHandler(msg, wParam, lParam); + + // 有些鼠标操作需要渲染 ImGui 多次,见 https://github.com/ocornut/imgui/issues/2268 + if (msg == WM_LBUTTONDOWN || msg == WM_RBUTTONDOWN || msg == WM_MOUSEWHEEL || + msg == WM_MOUSEHWHEEL || msg == WM_LBUTTONUP || msg == WM_RBUTTONUP) { + _FrontendRender(); } } +} + +bool Renderer::_CreateSwapChain() noexcept { + ID3D11Device5* d3dDevice = _frontendResources.GetD3DDevice(); + + // 为了降低延迟,两个垂直同步之间允许渲染 BUFFER_COUNT - 1 帧 + // 如果这个值太小,用户移动光标可能造成画面卡顿 + static constexpr uint32_t BUFFER_COUNT = 4; + + DXGI_SWAP_CHAIN_DESC1 sd{}; + const RECT& scalingWndRect = ScalingWindow::Get().WndRect(); + sd.Width = scalingWndRect.right - scalingWndRect.left; + sd.Height = scalingWndRect.bottom - scalingWndRect.top; + sd.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + sd.AlphaMode = DXGI_ALPHA_MODE_UNSPECIFIED; + sd.SampleDesc.Count = 1; + sd.Scaling = DXGI_SCALING_NONE; + sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + sd.BufferCount = BUFFER_COUNT; + // 渲染每帧之前都会清空后缓冲区,因此无需 DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL + sd.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + // 只要显卡支持始终启用 DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING 以支持可变刷新率 + sd.Flags = (_frontendResources.IsSupportTearing() ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0) + | DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + + winrt::com_ptr dxgiSwapChain = nullptr; + HRESULT hr = _frontendResources.GetDXGIFactory()->CreateSwapChainForHwnd( + d3dDevice, + ScalingWindow::Get().Handle(), + &sd, + nullptr, + nullptr, + dxgiSwapChain.put() + ); + if (FAILED(hr)) { + Logger::Get().ComError("创建交换链失败", hr); + return false; + } - _gpuTimer->OnBeginEffects(); + _swapChain = dxgiSwapChain.try_as(); + if (!_swapChain) { + Logger::Get().Error("获取 IDXGISwapChain2 失败"); + return false; + } - uint32_t idx = 0; - if (state == FrameSourceBase::UpdateState::NoUpdate) { - // 此帧内容无变化 - // 从第一个使用动态常量的效果开始渲染 - // 如果没有则只渲染最后一个效果的最后一个通道 + // 允许提前渲染 BUFFER_COUNT - 1 帧 + _swapChain->SetMaximumFrameLatency(BUFFER_COUNT - 1); - size_t i = 0; - for (size_t end = _effects.size() - 1; i < end; ++i) { - if (_effects[i].IsUseDynamic()) { - break; - } else { - for (uint32_t j = (uint32_t)_effects[i].GetDesc().passes.size(); j > 0; --j) { - _gpuTimer->OnEndPass(idx++); - } - } - } + _frameLatencyWaitableObject.reset(_swapChain->GetFrameLatencyWaitableObject()); + if (!_frameLatencyWaitableObject) { + Logger::Get().Error("GetFrameLatencyWaitableObject 失败"); + return false; + } - if (i == _effects.size()) { - // 只渲染最后一个 Effect 的最后一个 pass - _effects.back().Draw(idx, true); - } else { - for (; i < _effects.size(); ++i) { - _effects[i].Draw(idx); - } - } - } else { - for (auto& effect : _effects) { - effect.Draw(idx); - } + hr = _frontendResources.GetDXGIFactory()->MakeWindowAssociation( + ScalingWindow::Get().Handle(), DXGI_MWA_NO_ALT_ENTER); + if (FAILED(hr)) { + Logger::Get().ComError("MakeWindowAssociation 失败", hr); } - _gpuTimer->OnEndEffects(); + hr = _swapChain->GetBuffer(0, IID_PPV_ARGS(_backBuffer.put())); + if (FAILED(hr)) { + Logger::Get().ComError("获取后缓冲区失败", hr); + return false; + } - if (_overlayDrawer) { - _overlayDrawer->Draw(); + hr = d3dDevice->CreateRenderTargetView(_backBuffer.get(), nullptr, _backBufferRtv.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateRenderTargetView 失败", hr); + return false; } - dr.EndFrame(); -} + // 检查 Multiplane Overlay 支持 + const bool supportMPO = CheckMultiplaneOverlaySupport(_swapChain.get()); + Logger::Get().Info(StrUtils::Concat("Multiplane Overlay 支持:", supportMPO ? "是" : "否")); -bool Renderer::IsUIVisiable() const noexcept { - return _overlayDrawer ? _overlayDrawer->IsUIVisiable() : false; + return true; } -void Renderer::SetUIVisibility(bool value) { - if (!value) { - if (_overlayDrawer && _overlayDrawer->IsUIVisiable()) { - _overlayDrawer->SetUIVisibility(false); - _gpuTimer->StopProfiling(); - } +void Renderer::_FrontendRender() noexcept { + WaitForSingleObjectEx(_frameLatencyWaitableObject.get(), 1000, TRUE); + + ID3D11DeviceContext4* d3dDC = _frontendResources.GetD3DDC(); + d3dDC->ClearState(); + + // 所有渲染都使用三角形带拓扑 + d3dDC->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + + // 输出画面是否充满缩放窗口 + const RECT& scalingWndRect = ScalingWindow::Get().WndRect(); + const bool isFill = _destRect == scalingWndRect; + + if (!isFill) { + // 以黑色填充背景,因为我们指定了 DXGI_SWAP_EFFECT_FLIP_DISCARD,同时也是为了和 RTSS 兼容 + static constexpr FLOAT BLACK[4] = { 0.0f,0.0f,0.0f,1.0f }; + d3dDC->ClearRenderTargetView(_backBufferRtv.get(), BLACK); + } + + _lastAccessMutexKey = ++_sharedTextureMutexKey; + HRESULT hr = _frontendSharedTextureMutex->AcquireSync(_lastAccessMutexKey - 1, INFINITE); + if (FAILED(hr)) { + Logger::Get().ComError("AcquireSync 失败", hr); return; } - if (!_overlayDrawer) { - _overlayDrawer.reset(new OverlayDrawer()); - if (!_overlayDrawer->Initialize()) { - _overlayDrawer.reset(); - Logger::Get().Error("初始化 OverlayDrawer 失败"); - return; - } + if (isFill) { + d3dDC->CopyResource(_backBuffer.get(), _frontendSharedTexture.get()); + } else { + d3dDC->CopySubresourceRegion( + _backBuffer.get(), + 0, + _destRect.left - scalingWndRect.left, + _destRect.top - scalingWndRect.top, + 0, + _frontendSharedTexture.get(), + 0, + nullptr + ); } - if (!_overlayDrawer->IsUIVisiable()) { - _overlayDrawer->SetUIVisibility(true); + _frontendSharedTextureMutex->ReleaseSync(_lastAccessMutexKey); - uint32_t passCount = 0; - for (const auto& effect : _effects) { - passCount += (uint32_t)effect.GetDesc().passes.size(); - } + // 叠加层和光标都绘制到 back buffer + { + ID3D11RenderTargetView* t = _backBufferRtv.get(); + d3dDC->OMSetRenderTargets(1, &t, nullptr); + } - // StartProfiling 必须在 OnBeginFrame 之前调用 - _gpuTimer->StartProfiling(500ms, passCount); + // 绘制叠加层 + if (_overlayDrawer) { + // ImGui 至少渲染两遍,否则经常有布局错误 + _overlayDrawer->Draw( + 2, + _stepTimer.FPS(), + _overlayDrawer->IsUIVisible() ? _effectsProfiler.GetTimings() : SmallVector() + ); } -} -bool CheckForeground(HWND hwndForeground) { - std::wstring className = Win32Utils::GetWndClassName(hwndForeground); + // 绘制光标 + _cursorDrawer.Draw(); - if (!WindowHelper::IsValidSrcWindow(hwndForeground)) { - return true; - } + // 两个垂直同步之间允许渲染数帧,SyncInterval = 0 只呈现最新的一帧,旧帧被丢弃 + _swapChain->Present(0, 0); - RECT rectForground{}; + // 丢弃渲染目标的内容 + d3dDC->DiscardView(_backBufferRtv.get()); +} - // 如果捕获模式可以捕获到弹窗,则允许小的弹窗 - if (MagApp::Get().GetFrameSource().IsScreenCapture() - && GetWindowStyle(hwndForeground) & (WS_POPUP | WS_CHILD) - ) { - if (!Win32Utils::GetWindowFrameRect(hwndForeground, rectForground)) { - Logger::Get().Error("GetWindowFrameRect 失败"); - return false; - } +bool Renderer::Render() noexcept { + const CursorManager& cursorManager = ScalingWindow::Get().CursorManager(); + const HCURSOR hCursor = cursorManager.Cursor(); + const POINT cursorPos = cursorManager.CursorPos(); + const uint32_t fps = _stepTimer.FPS(); - // 弹窗如果完全在源窗口客户区内则不退出全屏 - const RECT& srcFrameRect = MagApp::Get().GetFrameSource().GetSrcFrameRect(); - if (rectForground.left >= srcFrameRect.left - && rectForground.right <= srcFrameRect.right - && rectForground.top >= srcFrameRect.top - && rectForground.bottom <= srcFrameRect.bottom - ) { - return true; + // 有新帧或光标改变则渲染新的帧 + if (_lastAccessMutexKey == _sharedTextureMutexKey.load(std::memory_order_relaxed)) { + if (_lastAccessMutexKey == 0) { + // 第一帧尚未完成 + return false; } - } - if (rectForground == RECT{}) { - if (!Win32Utils::GetWindowFrameRect(hwndForeground, rectForground)) { - Logger::Get().Error("GetWindowFrameRect 失败"); - return false; + // 检查光标是否移动 + if (hCursor == _lastCursorHandle && cursorPos == _lastCursorPos) { + if (IsOverlayVisible() || ScalingWindow::Get().Options().IsShowFPS()) { + // 检查 FPS 是否变化 + if (fps == _lastFPS) { + return false; + } + } else { + return false; + } } } - IntersectRect(&rectForground, &MagApp::Get().GetHostWndRect(), &rectForground); + _lastCursorHandle = hCursor; + _lastCursorPos = cursorPos; + _lastFPS = fps; - // 允许稍微重叠,否则前台窗口最大化时会意外退出 - return rectForground.right - rectForground.left < 10 || rectForground.right - rectForground.top < 10; + _FrontendRender(); + return true; } -uint32_t Renderer::GetEffectCount() const noexcept { - return (uint32_t)_effects.size(); +bool Renderer::IsOverlayVisible() noexcept { + return _overlayDrawer && _overlayDrawer->IsUIVisible(); } -const EffectDesc& Renderer::GetEffectDesc(uint32_t idx) const noexcept { - assert(idx < _effects.size()); - return _effects[idx].GetDesc(); -} +void Renderer::SetOverlayVisibility(bool value, bool noSetForeground) noexcept { + if (value) { + if (!_overlayDrawer) { + _overlayDrawer = std::make_unique(); + if (!_overlayDrawer->Initialize(&_frontendResources)) { + _overlayDrawer.reset(); + Logger::Get().Error("初始化 OverlayDrawer 失败"); + return; + } + } + + if (_overlayDrawer->IsUIVisible()) { + return; + } + _overlayDrawer->SetUIVisibility(true); -// 0 -> 可继续缩放 -// 1 -> 前台窗口改变或源窗口最大化(如果不允许缩放最大化的窗口)/最小化 -// 2 -> 源窗口大小或位置改变或最大化(如果允许缩放最大化的窗口) -int Renderer::_CheckSrcState() { - HWND hwndSrc = MagApp::Get().GetHwndSrc(); - const MagOptions& options = MagApp::Get().GetOptions(); - - if (!options.IsDebugMode()) { - HWND hwndForeground = GetForegroundWindow(); - // 在 3D 游戏模式下打开游戏内叠加层则全屏窗口可以接收焦点 - if (!options.Is3DGameMode() || !IsUIVisiable() || hwndForeground != MagApp::Get().GetHwndHost()) { - if (hwndForeground && hwndForeground != hwndSrc && !CheckForeground(hwndForeground)) { - Logger::Get().Info("前台窗口已改变"); - return 1; + _backendThreadDispatcher.TryEnqueue([this]() { + uint32_t passCount = 0; + for (const EffectInfo& info : _effectInfos) { + passCount += (uint32_t)info.passNames.size(); } + _effectsProfiler.Start(_backendResources.GetD3DDevice(), passCount); + }); + } else { + if (_overlayDrawer) { + if (!_overlayDrawer->IsUIVisible()) { + return; + } + _overlayDrawer->SetUIVisibility(false, noSetForeground); } - } - UINT showCmd = Win32Utils::GetWindowShowCmd(hwndSrc); - if (showCmd != SW_NORMAL && (showCmd != SW_SHOWMAXIMIZED || !options.IsAllowScalingMaximized())) { - Logger::Get().Info("源窗口显示状态改变"); - return 1; + _backendThreadDispatcher.TryEnqueue([this]() { + _effectsProfiler.Stop(); + }); } - RECT rect; - if (!GetWindowRect(hwndSrc, &rect)) { - Logger::Get().Error("GetWindowRect 失败"); - return 1; + // 立即渲染一帧 + _FrontendRender(); +} + +bool Renderer::_InitFrameSource() noexcept { + switch (ScalingWindow::Get().Options().captureMethod) { + case CaptureMethod::GraphicsCapture: + _frameSource = std::make_unique(); + break; + case CaptureMethod::DesktopDuplication: + _frameSource = std::make_unique(); + break; + case CaptureMethod::GDI: + _frameSource = std::make_unique(); + break; + case CaptureMethod::DwmSharedSurface: + _frameSource = std::make_unique(); + break; + default: + Logger::Get().Error("未知的捕获模式"); + return false; } - if (_srcWndRect != rect) { - Logger::Get().Info("源窗口位置或大小改变"); - return 2; + Logger::Get().Info(StrUtils::Concat("当前捕获模式:", _frameSource->Name())); + + if (!_frameSource->Initialize(_backendResources, _backendDescriptorStore)) { + Logger::Get().Error("初始化 FrameSource 失败"); + return false; } - return 0; + const RECT& srcRect = _frameSource->SrcRect(); + Logger::Get().Info(fmt::format("源窗口边界: {},{},{},{}", + srcRect.left, srcRect.top, srcRect.right, srcRect.bottom)); + + // 由于 DPI 缩放,捕获尺寸和边界矩形尺寸不一定相同 + D3D11_TEXTURE2D_DESC desc; + _frameSource->GetOutput()->GetDesc(&desc); + Logger::Get().Info(fmt::format("捕获尺寸: {}x{}", desc.Width, desc.Height)); + + return true; } -static bool CompileEffect(bool isLastEffect, const EffectOption& option, EffectDesc& result) { - result.name = StrUtils::UTF16ToUTF8(option.name); - // 将文件夹分隔符统一为 '\' - for (char& c : result.name) { - if (c == '/') { - c = '\\'; - } - } +static std::optional CompileEffect(const EffectOption& effectOption) noexcept { + EffectDesc result; - result.flags = isLastEffect ? EffectFlags::LastEffect : 0; + result.name = StrUtils::UTF16ToUTF8(effectOption.name); - if (option.flags & EffectOptionFlags::InlineParams) { + if (effectOption.flags & EffectOptionFlags::InlineParams) { result.flags |= EffectFlags::InlineParams; } - if (option.flags & EffectOptionFlags::FP16) { + if (effectOption.flags & EffectOptionFlags::FP16) { result.flags |= EffectFlags::FP16; } uint32_t compileFlag = 0; - MagOptions& options = MagApp::Get().GetOptions(); - if (options.IsDisableEffectCache()) { + const ScalingOptions& scalingOptions = ScalingWindow::Get().Options(); + if (scalingOptions.IsEffectCacheDisabled()) { compileFlag |= EffectCompilerFlags::NoCache; } - if (options.IsSaveEffectSources()) { + if (scalingOptions.IsSaveEffectSources()) { compileFlag |= EffectCompilerFlags::SaveSources; } - if (options.IsWarningsAreErrors()) { + if (scalingOptions.IsWarningsAreErrors()) { compileFlag |= EffectCompilerFlags::WarningsAreErrors; } bool success = true; int duration = Utils::Measure([&]() { - success = !EffectCompiler::Compile(result, compileFlag, &option.parameters); + success = !EffectCompiler::Compile(result, compileFlag, &effectOption.parameters); }); if (success) { - Logger::Get().Info(fmt::format("编译 {}.hlsl 用时 {} 毫秒", StrUtils::UTF16ToUTF8(option.name), duration / 1000.0f)); + Logger::Get().Info(fmt::format("编译 {}.hlsl 用时 {} 毫秒", + StrUtils::UTF16ToUTF8(effectOption.name), duration / 1000.0f)); + return result; } else { - Logger::Get().Error(StrUtils::Concat("编译 ", StrUtils::UTF16ToUTF8(option.name), ".hlsl 失败")); + Logger::Get().Error(StrUtils::Concat("编译 ", + StrUtils::UTF16ToUTF8(effectOption.name), ".hlsl 失败")); + return std::nullopt; } - return success; } -bool Renderer::_BuildEffects() { - const std::vector& effectsOption = MagApp::Get().GetOptions().effects; - uint32_t effectCount = (int)effectsOption.size(); - if (effectCount == 0) { - return false; - } +ID3D11Texture2D* Renderer::_BuildEffects() noexcept { + const std::vector& effects = ScalingWindow::Get().Options().effects; + assert(!effects.empty()); + + const uint32_t effectCount = (uint32_t)effects.size(); // 并行编译所有效果 - std::vector effectDescs(effectsOption.size()); + std::vector effectDescs(effects.size()); std::atomic anyFailure; int duration = Utils::Measure([&]() { Win32Utils::RunParallel([&](uint32_t id) { - if (!CompileEffect(id == effectCount - 1, effectsOption[id], effectDescs[id])) { + std::optional desc = CompileEffect(effects[id]); + if (desc) { + effectDescs[id] = std::move(*desc); + } else { anyFailure.store(true, std::memory_order_relaxed); } }, effectCount); }); if (anyFailure.load(std::memory_order_relaxed)) { - return false; + return nullptr; } if (effectCount > 1) { Logger::Get().Info(fmt::format("编译着色器总计用时 {} 毫秒", duration / 1000.0f)); } - ID3D11Texture2D* effectInput = MagApp::Get().GetFrameSource().GetOutput(); + _effectDrawers.resize(effects.size()); - DownscalingEffect& downscalingEffect = MagApp::Get().GetOptions().downscalingEffect; - if (!downscalingEffect.name.empty()) { - _effects.reserve(effectsOption.size() + 1); + ID3D11Texture2D* inOutTexture = _frameSource->GetOutput(); + for (uint32_t i = 0; i < effectCount; ++i) { + if (!_effectDrawers[i].Initialize( + effectDescs[i], + effects[i], + _backendResources, + _backendDescriptorStore, + &inOutTexture + )) { + Logger::Get().Error(fmt::format("初始化效果#{} ({}) 失败", i, StrUtils::UTF16ToUTF8(effects[i].name))); + return nullptr; + } } - _effects.resize(effectsOption.size()); - for (uint32_t i = 0; i < effectCount; ++i) { - bool isLastEffect = i == effectCount - 1; + // 初始化 _effectInfos + _effectInfos.resize(effectDescs.size()); + for (size_t i = 0; i < effectDescs.size(); ++i) { + EffectInfo& info = _effectInfos[i]; + EffectDesc& desc = effectDescs[i]; + info.name = std::move(desc.name); - if (!_effects[i].Initialize( - effectDescs[i], effectsOption[i], effectInput, - isLastEffect ? &_outputRect : nullptr, - isLastEffect ? &_virtualOutputRect : nullptr - )) { - Logger::Get().Error(fmt::format("初始化效果#{} ({}) 失败", i, StrUtils::UTF16ToUTF8(effectsOption[i].name))); - return false; + info.passNames.reserve(desc.passes.size()); + for (EffectPassDesc& passDesc : desc.passes) { + info.passNames.emplace_back(std::move(passDesc.desc)); + } + } + + // 输出尺寸大于缩放窗口尺寸则需要降采样 + { + D3D11_TEXTURE2D_DESC desc; + inOutTexture->GetDesc(&desc); + const SIZE scalingWndSize = Win32Utils::GetSizeOfRect(ScalingWindow::Get().WndRect()); + if ((LONG)desc.Width > scalingWndSize.cx || (LONG)desc.Height > scalingWndSize.cy) { + EffectOption bicubicOption; + bicubicOption.name = L"Bicubic"; + bicubicOption.parameters[L"paramB"] = 0.0f; + bicubicOption.parameters[L"paramC"] = 0.5f; + bicubicOption.scalingType = ScalingType::Fit; + // 参数不会改变,因此可以内联 + bicubicOption.flags = EffectOptionFlags::InlineParams; + + std::optional bicubicDesc = CompileEffect(bicubicOption); + if (!bicubicDesc) { + Logger::Get().Error("编译降采样效果失败"); + return nullptr; + } + + EffectDrawer& bicubicDrawer = _effectDrawers.emplace_back(); + if (!bicubicDrawer.Initialize( + *bicubicDesc, + bicubicOption, + _backendResources, + _backendDescriptorStore, + &inOutTexture + )) { + Logger::Get().Error("初始化降采样效果失败"); + return nullptr; + } + + // 为降采样算法生成 EffectInfo + EffectInfo& bicubicEffectInfo = _effectInfos.emplace_back(); + bicubicEffectInfo.name = std::move(bicubicDesc->name); + bicubicEffectInfo.passNames.reserve(bicubicDesc->passes.size()); + for (EffectPassDesc& passDesc : bicubicDesc->passes) { + bicubicEffectInfo.passNames.emplace_back(std::move(passDesc.desc)); + } } + } - effectInput = _effects[i].GetOutputTexture(); + // 初始化所有效果共用的动态常量缓冲区 + for (uint32_t i = 0; i < effectDescs.size(); ++i) { + if (effectDescs[i].flags & EffectFlags::UseDynamic) { + _firstDynamicEffectIdx = i; + break; + } } - if (!downscalingEffect.name.empty()) { - const SIZE hostSize = Win32Utils::GetSizeOfRect(MagApp::Get().GetHostWndRect()); - const SIZE outputSize = Win32Utils::GetSizeOfRect(_virtualOutputRect); - if (outputSize.cx > hostSize.cx || outputSize.cy > hostSize.cy) { - // 需降采样 - EffectOption downscalingEffectOption; - downscalingEffectOption.name = downscalingEffect.name; - downscalingEffectOption.parameters = downscalingEffect.parameters; - downscalingEffectOption.scalingType = ScalingType::Fit; - downscalingEffectOption.flags = EffectOptionFlags::InlineParams; // 内联参数 - - EffectDesc downscalingEffectDesc; - - // 最后一个效果需重新编译 - // 在分离光标渲染逻辑后这里可优化 - duration = Utils::Measure([&]() { - Win32Utils::RunParallel([&](uint32_t id) { - if (!CompileEffect( - id == 1, - id == 0 ? effectsOption.back() : downscalingEffectOption, - id == 0 ? effectDescs.back() : downscalingEffectDesc - )) { - anyFailure.store(true, std::memory_order_relaxed); - } - }, 2); - }); - - if (anyFailure.load(std::memory_order_relaxed)) { - return false; - } - - Logger::Get().Info(fmt::format("编译降采样着色器用时 {} 毫秒", duration / 1000.0f)); + if (_firstDynamicEffectIdx != std::numeric_limits::max()) { + D3D11_BUFFER_DESC bd{}; + bd.Usage = D3D11_USAGE_DYNAMIC; + bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + bd.ByteWidth = 16; // 只用 4 个字节 + bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + + HRESULT hr = _backendResources.GetD3DDevice()->CreateBuffer(&bd, nullptr, _dynamicCB.put()); + if (FAILED(hr)) { + Logger::Get().ComError("CreateBuffer 失败", hr); + return nullptr; + } + } - _effects.pop_back(); - if (_effects.empty()) { - effectInput = MagApp::Get().GetFrameSource().GetOutput(); - } else { - effectInput = _effects.back().GetOutputTexture(); + return inOutTexture; +} + +HANDLE Renderer::_CreateSharedTexture(ID3D11Texture2D* effectsOutput) noexcept { + D3D11_TEXTURE2D_DESC desc; + effectsOutput->GetDesc(&desc); + SIZE textureSize = { (LONG)desc.Width, (LONG)desc.Height }; + + // 创建共享纹理 + _backendSharedTexture = DirectXHelper::CreateTexture2D( + _backendResources.GetD3DDevice(), + DXGI_FORMAT_R8G8B8A8_UNORM, + textureSize.cx, + textureSize.cy, + D3D11_BIND_SHADER_RESOURCE, + D3D11_USAGE_DEFAULT, + D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX + ); + if (!_backendSharedTexture) { + Logger::Get().Error("创建 Texture2D 失败"); + return NULL; + } + + _backendSharedTextureMutex = _backendSharedTexture.try_as(); + + winrt::com_ptr sharedDxgiRes = _backendSharedTexture.try_as(); + + HANDLE sharedHandle = NULL; + HRESULT hr = sharedDxgiRes->GetSharedHandle(&sharedHandle); + if (FAILED(hr)) { + Logger::Get().ComError("GetSharedHandle 失败", hr); + return NULL; + } + + return sharedHandle; +} + +void Renderer::_BackendThreadProc() noexcept { +#ifdef _DEBUG + SetThreadDescription(GetCurrentThread(), L"Magpie 缩放后端线程"); +#endif + + winrt::init_apartment(winrt::apartment_type::single_threaded); + + ID3D11Texture2D* outputTexture = _InitBackend(); + if (!outputTexture) { + _frameSource.reset(); + // 通知前端初始化失败 + _sharedTextureHandle.store(INVALID_HANDLE_VALUE, std::memory_order_release); + _sharedTextureHandle.notify_one(); + + // 即使失败也要创建消息循环,否则前端线程将一直等待 + MSG msg; + while (GetMessage(&msg, NULL, 0, 0)) { + DispatchMessage(&msg); + } + return; + } + + bool waitingForStepTimer = true; + + MSG msg; + while (true) { + while (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) { + if (msg.message == WM_QUIT) { + // 不能在前端线程释放 + _frameSource.reset(); + return; } - _effects.resize(_effects.size() + 2); + DispatchMessage(&msg); + } - // 重新构建最后一个效果 - const size_t originLastEffectIdx = _effects.size() - 2; - if (!_effects[originLastEffectIdx].Initialize(effectDescs.back(), effectsOption.back(), - effectInput, nullptr, nullptr) - ) { - Logger::Get().Error(fmt::format("初始化效果#{} ({}) 失败", - originLastEffectIdx, StrUtils::UTF16ToUTF8(effectsOption.back().name))); - return false; + if (waitingForStepTimer) { + if (!_stepTimer.WaitForNextFrame()) { + _stepTimer.UpdateFPS(false); + continue; } - effectInput = _effects[originLastEffectIdx].GetOutputTexture(); + waitingForStepTimer = false; + } - // 构建降采样效果 - if (!_effects.back().Initialize(downscalingEffectDesc, downscalingEffectOption, - effectInput, &_outputRect, &_virtualOutputRect - )) { - Logger::Get().Error(fmt::format("初始化降采样效果 ({}) 失败", - StrUtils::UTF16ToUTF8(downscalingEffect.name))); + const FrameSourceBase::UpdateState state = _frameSource->Update(); + _stepTimer.UpdateFPS(state == FrameSourceBase::UpdateState::NewFrame); + + switch (state) { + case FrameSourceBase::UpdateState::NewFrame: + { + _BackendRender(outputTexture); + waitingForStepTimer = true; + break; + } + case FrameSourceBase::UpdateState::Waiting: + { + if (_frameSource->WaitType() == FrameSourceBase::WaitForMessage) { + // 等待新消息 + WaitMessage(); } + break; + } + default: + { + waitingForStepTimer = true; + break; + } } } - - return true; } -bool Renderer::_UpdateDynamicConstants() { - // cbuffer __CB1 : register(b0) { - // int4 __cursorRect; - // float2 __cursorPt; - // uint2 __cursorPos; - // uint __cursorType; - // uint __frameCount; - // }; - - CursorManager& cursorManager = MagApp::Get().GetCursorManager(); - if (cursorManager.HasCursor() && !(MagApp::Get().GetOptions().Is3DGameMode() && IsUIVisiable())) { - const POINT* pos = cursorManager.GetCursorPos(); - const CursorManager::CursorInfo* ci = cursorManager.GetCursorInfo(); - - ID3D11Texture2D* cursorTex; - CursorManager::CursorType cursorType = CursorManager::CursorType::Color; - if (!cursorManager.GetCursorTexture(&cursorTex, cursorType)) { - Logger::Get().Error("GetCursorTexture 失败"); +ID3D11Texture2D* Renderer::_InitBackend() noexcept { + // 创建 DispatcherQueue + { + DispatcherQueueOptions dqOptions{}; + dqOptions.dwSize = sizeof(DispatcherQueueOptions); + dqOptions.threadType = DQTYPE_THREAD_CURRENT; + + winrt::Windows::System::DispatcherQueueController dqc{ nullptr }; + HRESULT hr = CreateDispatcherQueueController( + dqOptions, + (PDISPATCHERQUEUECONTROLLER*)winrt::put_abi(dqc) + ); + if (FAILED(hr)) { + Logger::Get().ComError("CreateDispatcherQueueController 失败", hr); + return nullptr; } - assert(pos && ci); - - float cursorScaling = (float)MagApp::Get().GetOptions().cursorScaling; - if (cursorScaling < 1e-5) { - SIZE srcFrameSize = Win32Utils::GetSizeOfRect(MagApp::Get().GetFrameSource().GetSrcFrameRect()); - SIZE virtualOutputSize = Win32Utils::GetSizeOfRect(_virtualOutputRect); - cursorScaling = (((float)virtualOutputSize.cx / srcFrameSize.cx) - + ((float)virtualOutputSize.cy / srcFrameSize.cy)) / 2; + + _backendThreadDispatcher = dqc.DispatcherQueue(); + } + + if (!_backendResources.Initialize()) { + return nullptr; + } + + ID3D11Device5* d3dDevice = _backendResources.GetD3DDevice(); + _backendDescriptorStore.Initialize(d3dDevice); + + if (!_InitFrameSource()) { + return nullptr; + } + + { + std::optional frameRateLimit; + if (_frameSource->WaitType() == FrameSourceBase::NoWait) { + // 某些捕获方式不会限制捕获帧率,因此将捕获帧率限制为屏幕刷新率 + const HWND hwndSrc = ScalingWindow::Get().HwndSrc(); + if (HMONITOR hMon = MonitorFromWindow(hwndSrc, MONITOR_DEFAULTTONEAREST)) { + MONITORINFOEX mi{ sizeof(MONITORINFOEX) }; + GetMonitorInfo(hMon, &mi); + + DEVMODE dm{}; + dm.dmSize = sizeof(DEVMODE); + EnumDisplaySettings(mi.szDevice, ENUM_CURRENT_SETTINGS, &dm); + + if (dm.dmDisplayFrequency > 0) { + Logger::Get().Info(fmt::format("屏幕刷新率:{}", dm.dmDisplayFrequency)); + frameRateLimit = float(dm.dmDisplayFrequency); + } + } } - SIZE cursorSize = { - std::lroundf(ci->size.cx * cursorScaling), - std::lroundf(ci->size.cy * cursorScaling) - }; + const ScalingOptions& options = ScalingWindow::Get().Options(); + if (options.maxFrameRate) { + if (!frameRateLimit || *options.maxFrameRate < *frameRateLimit) { + frameRateLimit = options.maxFrameRate; + } + } - _dynamicConstants[0].intVal = pos->x - std::lroundf(ci->hotSpot.x * cursorScaling); - _dynamicConstants[1].intVal = pos->y - std::lroundf(ci->hotSpot.y * cursorScaling); - _dynamicConstants[2].intVal = _dynamicConstants[0].intVal + cursorSize.cx; - _dynamicConstants[3].intVal = _dynamicConstants[1].intVal + cursorSize.cy; + _stepTimer.Initialize(frameRateLimit); + } - _dynamicConstants[4].floatVal = 1.0f / cursorSize.cx; - _dynamicConstants[5].floatVal = 1.0f / cursorSize.cy; + ID3D11Texture2D* outputTexture = _BuildEffects(); + if (!outputTexture) { + return nullptr; + } - _dynamicConstants[6].uintVal = pos->x; - _dynamicConstants[7].uintVal = pos->y; + HRESULT hr = d3dDevice->CreateFence( + _fenceValue, D3D11_FENCE_FLAG_NONE, IID_PPV_ARGS(&_d3dFence)); + if (FAILED(hr)) { + Logger::Get().ComError("CreateFence 失败", hr); + return nullptr; + } - _dynamicConstants[8].uintVal = (uint32_t)cursorType; - } else { - _dynamicConstants[0].intVal = INT_MAX; - _dynamicConstants[1].intVal = INT_MAX; - _dynamicConstants[2].intVal = INT_MAX; - _dynamicConstants[3].intVal = INT_MAX; - _dynamicConstants[6].uintVal = UINT_MAX; - _dynamicConstants[7].uintVal = UINT_MAX; + _fenceEvent.reset(Win32Utils::SafeHandle(CreateEvent(nullptr, FALSE, FALSE, nullptr))); + if (!_fenceEvent) { + Logger::Get().Win32Error("CreateEvent 失败"); + return nullptr; + } + + HANDLE sharedHandle = _CreateSharedTexture(outputTexture); + if (!sharedHandle) { + Logger::Get().Win32Error("_CreateSharedTexture 失败"); + return nullptr; + } + + _srcRect = _frameSource->SrcRect(); + _sharedTextureHandle.store(sharedHandle, std::memory_order_release); + _sharedTextureHandle.notify_one(); + + return outputTexture; +} + +void Renderer::_BackendRender(ID3D11Texture2D* effectsOutput) noexcept { + ID3D11DeviceContext4* d3dDC = _backendResources.GetD3DDC(); + d3dDC->ClearState(); + + if (ID3D11Buffer* t = _dynamicCB.get()) { + _UpdateDynamicConstants(); + d3dDC->CSSetConstantBuffers(1, 1, &t); + } + + _effectsProfiler.OnBeginEffects(d3dDC); + + for (const EffectDrawer& effectDrawer : _effectDrawers) { + effectDrawer.Draw(_effectsProfiler); + } + + _effectsProfiler.OnEndEffects(d3dDC); + + HRESULT hr = d3dDC->Signal(_d3dFence.get(), ++_fenceValue); + if (FAILED(hr)) { + Logger::Get().ComError("Signal 失败", hr); + return; } - _dynamicConstants[9].uintVal = _gpuTimer->GetFrameCount(); + hr = _d3dFence->SetEventOnCompletion(_fenceValue, _fenceEvent.get()); + if (FAILED(hr)) { + Logger::Get().ComError("SetEventOnCompletion 失败", hr); + return; + } + + d3dDC->Flush(); + + // 等待渲染完成 + WaitForSingleObject(_fenceEvent.get(), INFINITE); + + // 查询效果的渲染时间 + _effectsProfiler.QueryTimings(d3dDC); + + // 渲染完成后再更新 _sharedTextureMutexKey,否则前端必须等待,降低光标流畅度 + const uint64_t key = ++_sharedTextureMutexKey; + hr = _backendSharedTextureMutex->AcquireSync(key - 1, INFINITE); + if (FAILED(hr)) { + Logger::Get().ComError("AcquireSync 失败", hr); + return; + } + + d3dDC->CopyResource(_backendSharedTexture.get(), effectsOutput); + + _backendSharedTextureMutex->ReleaseSync(key); + + // 根据 https://learn.microsoft.com/en-us/windows/win32/api/d3d11/nf-d3d11-id3d11device-opensharedresource, + // 更新共享纹理后必须调用 Flush + d3dDC->Flush(); + + // 唤醒前台线程 + PostMessage(ScalingWindow::Get().Handle(), WM_NULL, 0, 0); +} + +bool Renderer::_UpdateDynamicConstants() const noexcept { + // cbuffer __CB2 : register(b1) { uint __frameCount; }; - auto d3dDC = MagApp::Get().GetDeviceResources().GetD3DDC(); + ID3D11DeviceContext4* d3dDC = _backendResources.GetD3DDC(); D3D11_MAPPED_SUBRESOURCE ms; HRESULT hr = d3dDC->Map(_dynamicCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &ms); if (SUCCEEDED(hr)) { - std::memcpy(ms.pData, _dynamicConstants.data(), _dynamicConstants.size() * 4); + // 避免使用 *(uint32_t*)ms.pData,见 + // https://learn.microsoft.com/en-us/windows/win32/api/d3d11/nf-d3d11-id3d11devicecontext-map + const uint32_t frameCount = _stepTimer.FrameCount(); + std::memcpy(ms.pData, &frameCount, 4); d3dDC->Unmap(_dynamicCB.get(), 0); } else { Logger::Get().ComError("Map 失败", hr); diff --git a/src/Magpie.Core/Renderer.h b/src/Magpie.Core/Renderer.h index 2752ef4ce..755b8dafc 100644 --- a/src/Magpie.Core/Renderer.h +++ b/src/Magpie.Core/Renderer.h @@ -1,72 +1,133 @@ #pragma once -#include "EffectHelper.h" +#include "DeviceResources.h" +#include "BackendDescriptorStore.h" +#include "EffectDrawer.h" +#include "Win32Utils.h" +#include "CursorDrawer.h" +#include "StepTimer.h" +#include "EffectsProfiler.h" namespace Magpie::Core { -class GPUTimer; -class OverlayDrawer; -class CursorManager; -class EffectDrawer; -struct EffectDesc; +class FrameSourceBase; class Renderer { public: - Renderer(); + Renderer() noexcept; + ~Renderer() noexcept; + Renderer(const Renderer&) = delete; Renderer(Renderer&&) = delete; - ~Renderer(); + bool Initialize() noexcept; + + bool Render() noexcept; - bool Initialize(); + bool IsOverlayVisible() noexcept; - void Render(bool onPrint = false); + void SetOverlayVisibility(bool value, bool noSetForeground = false) noexcept; - GPUTimer& GetGPUTimer() { - return *_gpuTimer; + const RECT& SrcRect() const noexcept { + return _srcRect; } - // 可能为空 - OverlayDrawer* GetOverlayDrawer() { - return _overlayDrawer.get(); + // 屏幕坐标而不是窗口局部坐标 + const RECT& DestRect() const noexcept { + return _destRect; } - bool IsUIVisiable() const noexcept; + const FrameSourceBase& FrameSource() const noexcept { + return *_frameSource; + } - void SetUIVisibility(bool value); + void OnCursorVisibilityChanged(bool isVisible, bool onDestory); - const RECT& GetOutputRect() const noexcept { - return _outputRect; - } + void MessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noexcept; - const RECT& GetVirtualOutputRect() const noexcept { - return _virtualOutputRect; + struct EffectInfo { + std::string name; + std::vector passNames; + }; + const std::vector& EffectInfos() const noexcept { + return _effectInfos; } - uint32_t GetEffectCount() const noexcept; +private: + bool _CreateSwapChain() noexcept; + + void _FrontendRender() noexcept; - const EffectDesc& GetEffectDesc(uint32_t idx) const noexcept; + void _BackendThreadProc() noexcept; -private: - int _CheckSrcState(); + ID3D11Texture2D* _InitBackend() noexcept; + + bool _InitFrameSource() noexcept; + + ID3D11Texture2D* _BuildEffects() noexcept; + + HANDLE _CreateSharedTexture(ID3D11Texture2D* effectsOutput) noexcept; - bool _BuildEffects(); + void _BackendRender(ID3D11Texture2D* effectsOutput) noexcept; - bool _UpdateDynamicConstants(); + bool _UpdateDynamicConstants() const noexcept; - RECT _srcWndRect{}; - RECT _outputRect{}; - // 尺寸可能大于主窗口 - RECT _virtualOutputRect{}; + static LRESULT CALLBACK _LowLevelKeyboardHook(int nCode, WPARAM wParam, LPARAM lParam); - bool _waitingForNextFrame = false; + // 只能由前台线程访问 + DeviceResources _frontendResources; + winrt::com_ptr _swapChain; + Win32Utils::ScopedHandle _frameLatencyWaitableObject; + winrt::com_ptr _backBuffer; + winrt::com_ptr _backBufferRtv; + uint64_t _lastAccessMutexKey = 0; + + CursorDrawer _cursorDrawer; + std::unique_ptr _overlayDrawer; + + HCURSOR _lastCursorHandle = NULL; + POINT _lastCursorPos{ std::numeric_limits::max(), std::numeric_limits::max() }; + uint32_t _lastFPS = std::numeric_limits::max(); + + winrt::com_ptr _frontendSharedTexture; + winrt::com_ptr _frontendSharedTextureMutex; + RECT _destRect{}; + + std::thread _backendThread; + + HHOOK _hKeyboardHook = NULL; + + // 只能由后台线程访问 + DeviceResources _backendResources; + Magpie::Core::BackendDescriptorStore _backendDescriptorStore; + std::unique_ptr _frameSource; + std::vector _effectDrawers; + + StepTimer _stepTimer; + EffectsProfiler _effectsProfiler; + + winrt::com_ptr _d3dFence; + uint64_t _fenceValue = 0; + Win32Utils::ScopedHandle _fenceEvent; + + winrt::com_ptr _backendSharedTexture; + winrt::com_ptr _backendSharedTextureMutex; - std::vector _effects; - std::array _dynamicConstants; winrt::com_ptr _dynamicCB; + uint32_t _firstDynamicEffectIdx = std::numeric_limits::max(); + + // 可由所有线程访问 + winrt::Windows::System::DispatcherQueue _backendThreadDispatcher{ nullptr }; + + std::atomic _sharedTextureMutexKey = 0; - std::unique_ptr _overlayDrawer; + // INVALID_HANDLE_VALUE 表示后端初始化失败 + std::atomic _sharedTextureHandle{ NULL }; + // 初始化时由 _sharedTextureHandle 同步 + RECT _srcRect{}; - std::unique_ptr _gpuTimer; + // 供游戏内叠加层使用 + // 由于要跨线程访问,初始化之后不能更改 + std::vector _effectInfos; }; } diff --git a/src/Magpie.Core/ScalingOptions.cpp b/src/Magpie.Core/ScalingOptions.cpp new file mode 100644 index 000000000..a35c5c0d8 --- /dev/null +++ b/src/Magpie.Core/ScalingOptions.cpp @@ -0,0 +1,92 @@ +#include "pch.h" +#include "ScalingOptions.h" +#include "Logger.h" +#include "StrUtils.h" + +namespace Magpie::Core { + +static std::string LogParameters(const phmap::flat_hash_map& params) noexcept { + std::string result; + + if (params.empty()) { + result = "无"; + } else { + for (const auto& pair : params) { + result.append(fmt::format("\n\t\t\t\t{}: {}", StrUtils::UTF16ToUTF8(pair.first), pair.second)); + } + } + + return result; +} + +static std::string LogEffects(const std::vector& effects) noexcept { + std::string result; + for (const EffectOption& effect : effects) { + result.append(fmt::format(R"( + {} + scalingType: {} + scale: {},{} + parameters: {})", + StrUtils::UTF16ToUTF8(effect.name), + (int)effect.scalingType, + effect.scale.first, effect.scale.second, + LogParameters(effect.parameters) + )); + } + return result; +} + +void ScalingOptions::Log() const noexcept { + Logger::Get().Info(fmt::format(R"(缩放选项 + IsWindowResizingDisabled: {} + IsDebugMode: {} + IsEffectCacheDisabled: {} + IsFontCacheDisabled: {} + IsSaveEffectSources: {} + IsWarningsAreErrors: {} + IsAllowScalingMaximized: {} + IsSimulateExclusiveFullscreen: {} + Is3DGameMode: {} + IsShowFPS: {} + IsCaptureTitleBar: {} + IsAdjustCursorSpeed: {} + IsDrawCursor: {} + IsDirectFlipDisabled: {} + IsStatisticsForDynamicDetectionEnabled: {} + cropping: {},{},{},{} + graphicsCard: {} + maxFrameRate: {} + cursorScaling: {} + captureMethod: {} + multiMonitorUsage: {} + cursorInterpolationMode: {} + duplicateFrameDetectionMode: {} + effects: {})", + IsWindowResizingDisabled(), + IsDebugMode(), + IsEffectCacheDisabled(), + IsFontCacheDisabled(), + IsSaveEffectSources(), + IsWarningsAreErrors(), + IsAllowScalingMaximized(), + IsSimulateExclusiveFullscreen(), + Is3DGameMode(), + IsShowFPS(), + IsCaptureTitleBar(), + IsAdjustCursorSpeed(), + IsDrawCursor(), + IsDirectFlipDisabled(), + IsStatisticsForDynamicDetectionEnabled(), + cropping.Left, cropping.Top, cropping.Right, cropping.Bottom, + graphicsCard, + maxFrameRate.has_value() ? *maxFrameRate : 0.0f, + cursorScaling, + (int)captureMethod, + (int)multiMonitorUsage, + (int)cursorInterpolationMode, + (int)duplicateFrameDetectionMode, + LogEffects(effects) + )); +} + +} diff --git a/src/Magpie.Core/ScalingOptions.h b/src/Magpie.Core/ScalingOptions.h new file mode 100644 index 000000000..b2af73e6a --- /dev/null +++ b/src/Magpie.Core/ScalingOptions.h @@ -0,0 +1,113 @@ +#pragma once +#include + +namespace Magpie::Core { + +enum class CaptureMethod { + GraphicsCapture, + DesktopDuplication, + GDI, + DwmSharedSurface, +}; + +enum class MultiMonitorUsage { + Closest, + Intersected, + All, +}; + +enum class CursorInterpolationMode { + NearestNeighbor, + Bilinear, +}; + +struct Cropping { + float Left; + float Top; + float Right; + float Bottom; +}; + +struct ScalingFlags { + static constexpr const uint32_t DisableWindowResizing = 1; + static constexpr const uint32_t BreakpointMode = 1 << 1; + static constexpr const uint32_t DisableEffectCache = 1 << 2; + static constexpr const uint32_t SaveEffectSources = 1 << 3; + static constexpr const uint32_t WarningsAreErrors = 1 << 4; + static constexpr const uint32_t SimulateExclusiveFullscreen = 1 << 5; + static constexpr const uint32_t Is3DGameMode = 1 << 6; + static constexpr const uint32_t ShowFPS = 1 << 7; + static constexpr const uint32_t CaptureTitleBar = 1 << 10; + static constexpr const uint32_t AdjustCursorSpeed = 1 << 11; + static constexpr const uint32_t DrawCursor = 1 << 12; + static constexpr const uint32_t DisableDirectFlip = 1 << 13; + static constexpr const uint32_t DisableFontCache = 1 << 14; + static constexpr const uint32_t AllowScalingMaximized = 1 << 15; + static constexpr const uint32_t EnableStatisticsForDynamicDetection = 1 << 16; +}; + +enum class ScalingType { + Normal, // Scale 表示缩放倍数 + Fit, // Scale 表示相对于屏幕能容纳的最大等比缩放的比例 + Absolute, // Scale 表示目标大小(单位为像素) + Fill // 充满屏幕,此时不使用 Scale 参数 +}; + +struct EffectOptionFlags { + static constexpr const uint32_t InlineParams = 1; + static constexpr const uint32_t FP16 = 1 << 1; +}; + +struct EffectOption { + std::wstring name; + phmap::flat_hash_map parameters; + ScalingType scalingType = ScalingType::Normal; + std::pair scale = { 1.0f,1.0f }; + uint32_t flags = 0; // EffectOptionFlags + + bool HasScale() const noexcept { + return scalingType != ScalingType::Normal || + std::abs(scale.first - 1.0f) > 1e-5 || std::abs(scale.second - 1.0f) > 1e-5; + } +}; + +enum class DuplicateFrameDetectionMode { + Always, + Dynamic, + Never +}; + +struct ScalingOptions { + DEFINE_FLAG_ACCESSOR(IsWindowResizingDisabled, ScalingFlags::DisableWindowResizing, flags) + DEFINE_FLAG_ACCESSOR(IsDebugMode, ScalingFlags::BreakpointMode, flags) + DEFINE_FLAG_ACCESSOR(IsEffectCacheDisabled, ScalingFlags::DisableEffectCache, flags) + DEFINE_FLAG_ACCESSOR(IsFontCacheDisabled, ScalingFlags::DisableFontCache, flags) + DEFINE_FLAG_ACCESSOR(IsSaveEffectSources, ScalingFlags::SaveEffectSources, flags) + DEFINE_FLAG_ACCESSOR(IsWarningsAreErrors, ScalingFlags::WarningsAreErrors, flags) + DEFINE_FLAG_ACCESSOR(IsAllowScalingMaximized, ScalingFlags::AllowScalingMaximized, flags) + DEFINE_FLAG_ACCESSOR(IsSimulateExclusiveFullscreen, ScalingFlags::SimulateExclusiveFullscreen, flags) + DEFINE_FLAG_ACCESSOR(Is3DGameMode, ScalingFlags::Is3DGameMode, flags) + DEFINE_FLAG_ACCESSOR(IsShowFPS, ScalingFlags::ShowFPS, flags) + DEFINE_FLAG_ACCESSOR(IsCaptureTitleBar, ScalingFlags::CaptureTitleBar, flags) + DEFINE_FLAG_ACCESSOR(IsAdjustCursorSpeed, ScalingFlags::AdjustCursorSpeed, flags) + DEFINE_FLAG_ACCESSOR(IsDrawCursor, ScalingFlags::DrawCursor, flags) + DEFINE_FLAG_ACCESSOR(IsDirectFlipDisabled, ScalingFlags::DisableDirectFlip, flags) + DEFINE_FLAG_ACCESSOR(IsStatisticsForDynamicDetectionEnabled, ScalingFlags::EnableStatisticsForDynamicDetection, flags) + + Cropping cropping{}; + uint32_t flags = ScalingFlags::AdjustCursorSpeed | ScalingFlags::DrawCursor; // ScalingFlags + int graphicsCard = -1; + std::optional maxFrameRate; + float cursorScaling = 1.0f; + CaptureMethod captureMethod = CaptureMethod::GraphicsCapture; + MultiMonitorUsage multiMonitorUsage = MultiMonitorUsage::Closest; + CursorInterpolationMode cursorInterpolationMode = CursorInterpolationMode::NearestNeighbor; + + std::vector effects; + + DuplicateFrameDetectionMode duplicateFrameDetectionMode = DuplicateFrameDetectionMode::Dynamic; + + void Log() const noexcept; +}; + +} diff --git a/src/Magpie.Core/ScalingRuntime.cpp b/src/Magpie.Core/ScalingRuntime.cpp new file mode 100644 index 000000000..0e7f005d7 --- /dev/null +++ b/src/Magpie.Core/ScalingRuntime.cpp @@ -0,0 +1,208 @@ +#include "pch.h" +#include "ScalingRuntime.h" +#include +#include "Logger.h" +#include "ScalingWindow.h" + +namespace Magpie::Core { + +ScalingRuntime::ScalingRuntime() : + _scalingThread(std::bind(&ScalingRuntime::_ScalingThreadProc, this)) { +} + +ScalingRuntime::~ScalingRuntime() { + Stop(); + + if (_scalingThread.joinable()) { + const HANDLE hScalingThread = _scalingThread.native_handle(); + + { + const DWORD magWndThreadId = GetThreadId(hScalingThread); + // 持续尝试直到 _scalingThread 创建了消息队列 + while (!PostThreadMessage(magWndThreadId, WM_QUIT, 0, 0)) { + Sleep(0); + } + } + + // 等待缩放线程退出,在此期间必须处理消息队列,否则缩放线程调用 + // SetWindowLongPtr 会导致死锁 + while (true) { + MSG msg; + while (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) { + TranslateMessage(&msg); + DispatchMessage(&msg); + } + + if (MsgWaitForMultipleObjectsEx(1, &hScalingThread, + INFINITE, QS_ALLINPUT, MWMO_INPUTAVAILABLE) == WAIT_OBJECT_0) { + // WAIT_OBJECT_0 表示缩放线程已退出 + // WAIT_OBJECT_0 + 1 表示有新消息 + break; + } + } + + _scalingThread.join(); + } +} + +void ScalingRuntime::Start(HWND hwndSrc, ScalingOptions&& options) { + _State expected = _State::Idle; + if (!_state.compare_exchange_strong( + expected, _State::Initializing, std::memory_order_relaxed)) { + return; + } + + _isRunningChangedEvent(true); + + _Dispatcher().TryEnqueue([this, dispatcher(_Dispatcher()), hwndSrc, options(std::move(options))]() mutable { + if (ScalingWindow::Get().Create(dispatcher, hwndSrc, std::move(options))) { + _state.store(_State::Scaling, std::memory_order_relaxed); + } else { + // 缩放失败 + _state.store(_State::Idle, std::memory_order_relaxed); + _isRunningChangedEvent(false); + } + }); +} + +void ScalingRuntime::ToggleOverlay() { + if (!IsRunning()) { + return; + } + + _Dispatcher().TryEnqueue([]() { + if (ScalingWindow& scalingWindow = ScalingWindow::Get()) { + scalingWindow.ToggleOverlay(); + }; + }); +} + +void ScalingRuntime::Stop() { + if (!IsRunning()) { + return; + } + + _Dispatcher().TryEnqueue([]() { + // 消息循环会更改 _state + ScalingWindow& scalingWindow = ScalingWindow::Get(); + if (scalingWindow.IsSrcRepositioning()) { + scalingWindow.CleanAfterSrcRepositioned(); + } else { + scalingWindow.Destroy(); + } + }); +} + +// 返回值: +// -1: 应取消缩放 +// 0: 仍在调整中 +// 1: 调整完毕 +static int GetSrcRepositionState(HWND hwndSrc, bool allowScalingMaximized) noexcept { + if (!IsWindow(hwndSrc) || GetForegroundWindow() != hwndSrc) { + return -1; + } + + if (UINT showCmd = Win32Utils::GetWindowShowCmd(hwndSrc); showCmd != SW_NORMAL) { + if (showCmd != SW_SHOWMAXIMIZED || !allowScalingMaximized) { + return -1; + } + } + + // 检查源窗口是否正在调整大小或移动 + GUITHREADINFO guiThreadInfo{ + .cbSize = sizeof(GUITHREADINFO) + }; + if (!GetGUIThreadInfo(GetWindowThreadProcessId(hwndSrc, nullptr), &guiThreadInfo)) { + Logger::Get().Win32Error("GetGUIThreadInfo 失败"); + return -1; + } + + return (guiThreadInfo.flags & GUI_INMOVESIZE) ? 0 : 1; +} + +void ScalingRuntime::_ScalingThreadProc() noexcept { +#ifdef _DEBUG + SetThreadDescription(GetCurrentThread(), L"Magpie 缩放线程"); +#endif + + winrt::init_apartment(winrt::apartment_type::single_threaded); + + { + winrt::DispatcherQueueController dqc{ nullptr }; + HRESULT hr = CreateDispatcherQueueController( + DispatcherQueueOptions{ + .dwSize = sizeof(DispatcherQueueOptions), + .threadType = DQTYPE_THREAD_CURRENT + }, + (PDISPATCHERQUEUECONTROLLER*)winrt::put_abi(dqc) + ); + if (FAILED(hr)) { + Logger::Get().ComError("CreateDispatcherQueueController 失败", hr); + return; + } + + _dispatcher = dqc.DispatcherQueue(); + // 如果主线程正在等待则唤醒主线程 + _dispatcherInitialized.store(true, std::memory_order_release); + _dispatcherInitialized.notify_one(); + } + + ScalingWindow& scalingWindow = ScalingWindow::Get(); + + MSG msg; + while (true) { + while (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) { + if (msg.message == WM_QUIT) { + scalingWindow.Destroy(); + + if (_state.exchange(_State::Idle, std::memory_order_relaxed) != _State::Idle) { + _isRunningChangedEvent(false); + } + + return; + } + + DispatchMessage(&msg); + } + + if (_state.load(std::memory_order_relaxed) != _State::Scaling) { + WaitMessage(); + continue; + } + + if (scalingWindow) { + scalingWindow.Render(); + MsgWaitForMultipleObjectsEx(0, nullptr, 1, QS_ALLINPUT, MWMO_INPUTAVAILABLE); + } else if (scalingWindow.IsSrcRepositioning()) { + const int state = GetSrcRepositionState( + scalingWindow.HwndSrc(), + scalingWindow.Options().IsAllowScalingMaximized() + ); + if (state == 0) { + // 等待调整完成 + MsgWaitForMultipleObjectsEx(0, nullptr, 10, QS_ALLINPUT, MWMO_INPUTAVAILABLE); + } else if (state == 1) { + // 重新缩放 + ScalingWindow::Get().RecreateAfterSrcRepositioned(); + } else { + // 取消缩放 + ScalingWindow::Get().CleanAfterSrcRepositioned(); + } + } else { + // 缩放结束 + _state.store(_State::Idle, std::memory_order_relaxed); + _isRunningChangedEvent(false); + } + } +} + +const winrt::DispatcherQueue& ScalingRuntime::_Dispatcher() noexcept { + if (!_dispatcherInitializedCache) { + _dispatcherInitialized.wait(false, std::memory_order_acquire); + _dispatcherInitializedCache = true; + } + + return _dispatcher; +} + +} diff --git a/src/Magpie.Core/MagRuntime.h b/src/Magpie.Core/ScalingRuntime.h similarity index 51% rename from src/Magpie.Core/MagRuntime.h rename to src/Magpie.Core/ScalingRuntime.h index d45c6f7c0..a8fa4a121 100644 --- a/src/Magpie.Core/MagRuntime.h +++ b/src/Magpie.Core/ScalingRuntime.h @@ -6,25 +6,19 @@ namespace Magpie::Core { -struct MagOptions; - -class MagRuntime { +class ScalingRuntime { public: - MagRuntime(); - ~MagRuntime(); - - HWND HwndSrc() const { - return _hwndSrc.load(std::memory_order_relaxed); - } + ScalingRuntime(); + ~ScalingRuntime(); - void Run(HWND hwndSrc, const MagOptions& options); + void Start(HWND hwndSrc, struct ScalingOptions&& options); void ToggleOverlay(); void Stop(); - bool IsRunning() const { - return HwndSrc(); + bool IsRunning() const noexcept { + return _state.load(std::memory_order_relaxed) != _State::Idle; } // 调用者应处理线程同步 @@ -44,18 +38,26 @@ class MagRuntime { } private: - void _MagWindThreadProc() noexcept; + void _ScalingThreadProc() noexcept; + + // 确保 _dispatcher 完成初始化 + const winrt::DispatcherQueue& _Dispatcher() noexcept; - // 确保 _dqc 完成初始化 - void _EnsureDispatcherQueue() const noexcept; + enum class _State { + Idle, + Initializing, + Scaling + }; + std::atomic<_State> _state{ _State::Idle }; - // 主线程使用 DispatcherQueue 和缩放线程沟通,因此无需约束内存定序,只需确保原子性即可 - std::atomic _hwndSrc; winrt::event> _isRunningChangedEvent; - winrt::Windows::System::DispatcherQueueController _dqc{ nullptr }; - // 应在 _dqc 后初始化 - std::thread _magWindThread; + winrt::DispatcherQueue _dispatcher{ nullptr }; + std::atomic _dispatcherInitialized = false; + // 只能在主线程访问,省下检查 _dispatcherInitialized 的开销 + bool _dispatcherInitializedCache = false; + // 应在 _dispatcher 后初始化 + std::thread _scalingThread; }; } diff --git a/src/Magpie.Core/ScalingWindow.cpp b/src/Magpie.Core/ScalingWindow.cpp new file mode 100644 index 000000000..ca48003e9 --- /dev/null +++ b/src/Magpie.Core/ScalingWindow.cpp @@ -0,0 +1,528 @@ +#include "pch.h" +#include "ScalingWindow.h" +#include "CommonSharedConstants.h" +#include "Logger.h" +#include "Renderer.h" +#include "Win32Utils.h" +#include "WindowHelper.h" +#include "CursorManager.h" +#include +#include "FrameSourceBase.h" +#include "ExclModeHelper.h" +#include "StrUtils.h" + +namespace Magpie::Core { + +ScalingWindow::ScalingWindow() noexcept {} + +ScalingWindow::~ScalingWindow() noexcept {} + +// 返回缩放窗口跨越的屏幕数量,失败返回 0 +static uint32_t CalcWndRect(HWND hWnd, MultiMonitorUsage multiMonitorUsage, RECT& result) { + switch (multiMonitorUsage) { + case MultiMonitorUsage::Closest: + { + // 使用距离源窗口最近的显示器 + HMONITOR hMonitor = MonitorFromWindow(hWnd, MONITOR_DEFAULTTONEAREST); + if (!hMonitor) { + Logger::Get().Win32Error("MonitorFromWindow 失败"); + return 0; + } + + MONITORINFO mi{}; + mi.cbSize = sizeof(mi); + if (!GetMonitorInfo(hMonitor, &mi)) { + Logger::Get().Win32Error("GetMonitorInfo 失败"); + return 0; + } + result = mi.rcMonitor; + + return 1; + } + case MultiMonitorUsage::Intersected: + { + // 使用源窗口跨越的所有显示器 + + if (Win32Utils::GetWindowShowCmd(hWnd) == SW_SHOWMAXIMIZED) { + // 最大化的窗口不能跨越屏幕 + HMONITOR hMon = MonitorFromWindow(hWnd, MONITOR_DEFAULTTONEAREST); + MONITORINFO mi{ .cbSize = sizeof(mi) }; + if (!GetMonitorInfo(hMon, &mi)) { + Logger::Get().Win32Error("GetMonitorInfo 失败"); + return 0; + } + + result = mi.rcMonitor; + return 1; + } else { + // [0] 存储源窗口坐标,[1] 存储计算结果 + struct MonitorEnumParam { + RECT srcRect; + RECT destRect; + uint32_t monitorCount; + } param{}; + + if (!Win32Utils::GetWindowFrameRect(hWnd, param.srcRect)) { + Logger::Get().Error("GetWindowFrameRect 失败"); + return 0; + } + + MONITORENUMPROC monitorEnumProc = [](HMONITOR, HDC, LPRECT monitorRect, LPARAM data) { + MonitorEnumParam* param = (MonitorEnumParam*)data; + + if (Win32Utils::CheckOverlap(param->srcRect, *monitorRect)) { + UnionRect(¶m->destRect, monitorRect, ¶m->destRect); + ++param->monitorCount; + } + + return TRUE; + }; + + if (!EnumDisplayMonitors(NULL, NULL, monitorEnumProc, (LPARAM)¶m)) { + Logger::Get().Win32Error("EnumDisplayMonitors 失败"); + return 0; + } + + result = param.destRect; + if (result.right - result.left <= 0 || result.bottom - result.top <= 0) { + Logger::Get().Error("计算缩放窗口坐标失败"); + return 0; + } + + return param.monitorCount; + } + } + case MultiMonitorUsage::All: + { + // 使用所有显示器(Virtual Screen) + int vsWidth = GetSystemMetrics(SM_CXVIRTUALSCREEN); + int vsHeight = GetSystemMetrics(SM_CYVIRTUALSCREEN); + int vsX = GetSystemMetrics(SM_XVIRTUALSCREEN); + int vsY = GetSystemMetrics(SM_YVIRTUALSCREEN); + result = { vsX, vsY, vsX + vsWidth, vsY + vsHeight }; + + return GetSystemMetrics(SM_CMONITORS); + } + default: + return 0; + } +} + +bool ScalingWindow::Create( + const winrt::DispatcherQueue& dispatcher, + HWND hwndSrc, + ScalingOptions&& options +) noexcept { + if (_hWnd) { + return false; + } + +#if _DEBUG + OutputDebugString(fmt::format(L"可执行文件路径:{}\n窗口类:{}\n", + Win32Utils::GetPathOfWnd(hwndSrc), Win32Utils::GetWndClassName(hwndSrc)).c_str()); +#endif + + _hwndSrc = hwndSrc; + // 缩放结束后才失效 + _options = std::move(options); + _dispatcher = dispatcher; + + _isSrcRepositioning = false; + + if (FindWindow(CommonSharedConstants::SCALING_WINDOW_CLASS_NAME, nullptr)) { + Logger::Get().Error("已存在缩放窗口"); + return false; + } + + // 记录缩放选项 + _options.Log(); + + // 提高时钟精度,默认为 15.6ms + timeBeginPeriod(1); + + const uint32_t monitors = CalcWndRect(_hwndSrc, _options.multiMonitorUsage, _wndRect); + if (monitors == 0) { + Logger::Get().Error("CalcWndRect 失败"); + return false; + } + + Logger::Get().Info(fmt::format("缩放窗口边界: {},{},{},{}", + _wndRect.left, _wndRect.top, _wndRect.right, _wndRect.bottom)); + + if (!_options.IsAllowScalingMaximized()) { + if (Win32Utils::GetWindowShowCmd(_hwndSrc) == SW_SHOWMAXIMIZED) { + Logger::Get().Info("源窗口已最大化"); + return false; + } + + // 源窗口和缩放窗口重合则不缩放,此时源窗口可能是无边框全屏窗口 + RECT srcRect; + if (!Win32Utils::GetWindowFrameRect(_hwndSrc, srcRect)) { + Logger::Get().Error("GetWindowFrameRect 失败"); + return false; + } + + if (srcRect == _wndRect) { + Logger::Get().Info("源窗口已全屏"); + return false; + } + } + + const HINSTANCE hInstance = GetModuleHandle(nullptr); + + static const int _ = [](HINSTANCE hInstance) { + WNDCLASSEXW wcex{ + .cbSize = sizeof(wcex), + .lpfnWndProc = _WndProc, + .hInstance = hInstance, + .hCursor = LoadCursor(nullptr, IDC_ARROW), + .lpszClassName = CommonSharedConstants::SCALING_WINDOW_CLASS_NAME + }; + RegisterClassEx(&wcex); + + return 0; + }(hInstance); + + CreateWindowEx( + (_options.IsDebugMode() ? 0 : WS_EX_TOPMOST | WS_EX_TRANSPARENT) | WS_EX_LAYERED | WS_EX_TOOLWINDOW | WS_EX_NOACTIVATE + | WS_EX_NOREDIRECTIONBITMAP, + CommonSharedConstants::SCALING_WINDOW_CLASS_NAME, + L"Magpie", + WS_POPUP | (monitors == 1 ? WS_MAXIMIZE : 0), + _wndRect.left, + _wndRect.top, + _wndRect.right - _wndRect.left, + _wndRect.bottom - _wndRect.top, + NULL, + NULL, + hInstance, + this + ); + + if (!_hWnd) { + return false; + } + + // 设置窗口不透明 + // 不完全透明时可关闭 DirectFlip + if (!SetLayeredWindowAttributes(_hWnd, 0, _options.IsDirectFlipDisabled() ? 254 : 255, LWA_ALPHA)) { + Logger::Get().Win32Error("SetLayeredWindowAttributes 失败"); + } + + if (!GetWindowRect(hwndSrc, &_srcWndRect)) { + Logger::Get().Win32Error("GetWindowRect 失败"); + Destroy(); + return false; + } + + _renderer = std::make_unique(); + if (!_renderer->Initialize()) { + Logger::Get().Error("初始化 Renderer 失败"); + Destroy(); + return false; + } + + _cursorManager = std::make_unique(); + if (!_cursorManager->Initialize()) { + Logger::Get().Error("初始化 CursorManager 失败"); + Destroy(); + return false; + } + + if (_options.IsDirectFlipDisabled() && !_options.IsDebugMode()) { + // 在此处创建的 DDF 窗口不会立刻显示 + if (!_DisableDirectFlip(hInstance)) { + Logger::Get().Error("_DisableDirectFlip 失败"); + } + } + + // 缩放窗口可能有 WS_MAXIMIZE 样式,因此使用 SetWindowsPos 而不是 ShowWindow + // 以避免 OS 更改窗口尺寸和位置。 + SetWindowPos( + _hWnd, + NULL, + _wndRect.left, + _wndRect.top, + _wndRect.right - _wndRect.left, + _wndRect.bottom - _wndRect.top, + SWP_SHOWWINDOW | SWP_NOCOPYBITS | SWP_NOREDRAW + ); + + // 为了方便调试,调试模式下使缩放窗口显示在源窗口下面 + if (_options.IsDebugMode()) { + BringWindowToTop(_hwndSrc); + } + + // 模拟独占全屏 + if (_options.IsSimulateExclusiveFullscreen()) { + // 延迟 1s 以避免干扰游戏的初始化,见 #495 + ([]()->winrt::fire_and_forget { + ScalingWindow& that = ScalingWindow::Get(); + const HWND hwndScaling = that.Handle(); + winrt::DispatcherQueue dispatcher = that._dispatcher; + + co_await 1s; + co_await dispatcher; + + if (that.Handle() != hwndScaling) { + co_return; + } + + if (!that._exclModeMutex) { + that._exclModeMutex = ExclModeHelper::EnterExclMode(); + } + })(); + }; + + return true; +} + +void ScalingWindow::Render() noexcept { + int srcState = _CheckSrcState(); + if (srcState != 0) { + Logger::Get().Info("源窗口状态改变,退出全屏"); + // 切换前台窗口导致停止缩放时不应激活源窗口 + _renderer->SetOverlayVisibility(false, true); + + _isSrcRepositioning = srcState == 2; + Destroy(); + return; + } + + _cursorManager->Update(); + if (_renderer->Render()) { + // 为了避免用户看到 DDF 窗口,在渲染第一帧后显示 + if (_hwndDDF && !_isDDFWindowShown) { + ShowWindow(_hwndDDF, SW_NORMAL); + SetWindowPos(_hwndDDF, Handle(), 0, 0, 0, 0, SWP_NOSIZE | SWP_NOMOVE | SWP_NOREDRAW); + _isDDFWindowShown = true; + } + } +} + +void ScalingWindow::ToggleOverlay() noexcept { + if (_renderer) { + _renderer->SetOverlayVisibility(!_renderer->IsOverlayVisible()); + } +} + +void ScalingWindow::RecreateAfterSrcRepositioned() noexcept { + Create(_dispatcher, _hwndSrc, std::move(_options)); +} + +void ScalingWindow::CleanAfterSrcRepositioned() noexcept { + _options = {}; + _hwndSrc = NULL; + _dispatcher = nullptr; + _isSrcRepositioning = false; +} + +LRESULT ScalingWindow::_MessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noexcept { + if (_renderer) { + _renderer->MessageHandler(msg, wParam, lParam); + } + + switch (msg) { + case WM_LBUTTONDOWN: + case WM_RBUTTONDOWN: + { + if (_options.Is3DGameMode()) { + break; + } + + // 在以下情况下会收到光标消息: + // 1、未捕获光标且缩放后的位置未被遮挡而缩放前的位置被遮挡 + // 2、光标位于叠加层上 + // 这时鼠标点击将激活源窗口 + const HWND hwndForground = GetForegroundWindow(); + if (hwndForground != _hwndSrc) { + if (!Win32Utils::SetForegroundWindow(_hwndSrc)) { + // 设置前台窗口失败,可能是因为前台窗口是开始菜单 + if (WindowHelper::IsStartMenu(hwndForground)) { + using namespace std::chrono; + + // 限制触发频率 + static steady_clock::time_point prevTimePoint{}; + auto now = steady_clock::now(); + if (duration_cast(now - prevTimePoint).count() >= 1000) { + prevTimePoint = now; + + // 模拟按键关闭开始菜单 + INPUT inputs[]{ + INPUT{ + .type = INPUT_KEYBOARD, + .ki = KEYBDINPUT{ + .wVk = VK_LWIN + } + }, + INPUT{ + .type = INPUT_KEYBOARD, + .ki = KEYBDINPUT{ + .wVk = VK_LWIN, + .dwFlags = KEYEVENTF_KEYUP + } + } + }; + SendInput((UINT)std::size(inputs), inputs, sizeof(INPUT)); + + // 等待系统处理 + Sleep(1); + } + + SetForegroundWindow(_hwndSrc); + } + } + } + break; + } + case WM_DESTROY: + { + if (_exclModeMutex) { + ExclModeHelper::ExitExclMode(_exclModeMutex); + } + + if (_hwndDDF) { + DestroyWindow(_hwndDDF); + _hwndDDF = NULL; + _isDDFWindowShown = false; + } + + _cursorManager.reset(); + _renderer.reset(); + _srcWndRect = {}; + + // 如果正在源窗口正在调整,暂时不清理这些成员 + if (!_isSrcRepositioning) { + _options = {}; + _hwndSrc = NULL; + _dispatcher = nullptr; + } + + // 还原时钟精度 + timeEndPeriod(1); + break; + } + } + return base_type::_MessageHandler(msg, wParam, lParam); +} + +// 0 -> 可继续缩放 +// 1 -> 前台窗口改变或源窗口最大化(如果不允许缩放最大化的窗口)/最小化 +// 2 -> 源窗口大小或位置改变或最大化(如果允许缩放最大化的窗口) +int ScalingWindow::_CheckSrcState() const noexcept { + if (!_options.IsDebugMode()) { + HWND hwndForeground = GetForegroundWindow(); + + // 3D 游戏模式下打开叠加层后如果源窗口意外回到前台应关闭叠加层 + if (_options.Is3DGameMode() && _renderer->IsOverlayVisible() && hwndForeground == _hwndSrc) { + _renderer->SetOverlayVisibility(false, true); + } + + // 在 3D 游戏模式下打开叠加层则全屏窗口可以接收焦点 + if (!_options.Is3DGameMode() || !_renderer->IsOverlayVisible() || hwndForeground != _hWnd) { + if (hwndForeground && hwndForeground != _hwndSrc && !_CheckForeground(hwndForeground)) { + Logger::Get().Info("前台窗口已改变"); + return 1; + } + } + } + + UINT showCmd = Win32Utils::GetWindowShowCmd(_hwndSrc); + if (showCmd != SW_NORMAL && (showCmd != SW_SHOWMAXIMIZED || !_options.IsAllowScalingMaximized())) { + Logger::Get().Info("源窗口显示状态改变"); + return 1; + } + + RECT rect; + if (!GetWindowRect(_hwndSrc, &rect)) { + Logger::Get().Error("GetWindowRect 失败"); + return 1; + } + + if (_srcWndRect != rect) { + Logger::Get().Info("源窗口位置或大小改变"); + return 2; + } + + return 0; +} + +bool ScalingWindow::_CheckForeground(HWND hwndForeground) const noexcept { + std::wstring className = Win32Utils::GetWndClassName(hwndForeground); + + if (!WindowHelper::IsValidSrcWindow(hwndForeground)) { + return true; + } + + RECT rectForground; + if (!Win32Utils::GetWindowFrameRect(hwndForeground, rectForground)) { + Logger::Get().Error("DwmGetWindowAttribute 失败"); + return false; + } + + if (!IntersectRect(&rectForground, &rectForground, &_wndRect)) { + // 没有重叠 + return true; + } + + // 允许稍微重叠,减少意外停止缩放的机率 + SIZE rectSize = Win32Utils::GetSizeOfRect(rectForground); + return rectSize.cx < 8 || rectSize.cy < 8; +} + +bool ScalingWindow::_DisableDirectFlip(HINSTANCE hInstance) noexcept { + // 没有显式关闭 DirectFlip 的方法 + // 将全屏窗口设为稍微透明,以灰色全屏窗口为背景 + + static const int _ = [](HINSTANCE hInstance) { + WNDCLASSEXW wcex{ + .cbSize = sizeof(wcex), + .lpfnWndProc = DefWindowProc, + .hInstance = hInstance, + .hCursor = LoadCursor(nullptr, IDC_ARROW), + .hbrBackground = (HBRUSH)GetStockObject(GRAY_BRUSH), + .lpszClassName = CommonSharedConstants::DDF_WINDOW_CLASS_NAME + }; + RegisterClassEx(&wcex); + + return 0; + }(hInstance); + + _hwndDDF = CreateWindowEx( + WS_EX_NOACTIVATE | WS_EX_LAYERED | WS_EX_TRANSPARENT, + CommonSharedConstants::DDF_WINDOW_CLASS_NAME, + NULL, + WS_POPUP, + _wndRect.left, + _wndRect.top, + _wndRect.right - _wndRect.left, + _wndRect.bottom - _wndRect.top, + NULL, + NULL, + hInstance, + NULL + ); + + if (!_hwndDDF) { + Logger::Get().Win32Error("创建 DDF 窗口失败"); + return false; + } + + // 设置窗口不透明 + if (!SetLayeredWindowAttributes(_hwndDDF, 0, 255, LWA_ALPHA)) { + Logger::Get().Win32Error("SetLayeredWindowAttributes 失败"); + } + + if (_renderer->FrameSource().IsScreenCapture()) { + if (Win32Utils::GetOSVersion().Is20H1OrNewer()) { + // 使 DDF 窗口无法被捕获到 + if (!SetWindowDisplayAffinity(_hwndDDF, WDA_EXCLUDEFROMCAPTURE)) { + Logger::Get().Win32Error("SetWindowDisplayAffinity 失败"); + } + } + } + + return true; +} + +} diff --git a/src/Magpie.Core/ScalingWindow.h b/src/Magpie.Core/ScalingWindow.h new file mode 100644 index 000000000..5f0827d78 --- /dev/null +++ b/src/Magpie.Core/ScalingWindow.h @@ -0,0 +1,92 @@ +#pragma once +#include "WindowBase.h" +#include "ScalingOptions.h" +#include "Win32Utils.h" + +namespace Magpie::Core { + +class CursorManager; + +class ScalingWindow : public WindowBase { + friend class base_type; + +public: + static ScalingWindow& Get() noexcept { + static ScalingWindow instance; + return instance; + } + + bool Create( + const winrt::DispatcherQueue& dispatcher, + HWND hwndSrc, + ScalingOptions&& options + ) noexcept; + + void Render() noexcept; + + void ToggleOverlay() noexcept; + + const RECT& WndRect() const noexcept { + return _wndRect; + } + + const ScalingOptions& Options() const noexcept { + return _options; + } + + HWND HwndSrc() const noexcept { + return _hwndSrc; + } + + class Renderer& Renderer() noexcept { + return *_renderer; + } + + CursorManager& CursorManager() noexcept { + return *_cursorManager; + } + + const winrt::DispatcherQueue& Dispatcher() const noexcept { + return _dispatcher; + } + + bool IsSrcRepositioning() const noexcept { + return _isSrcRepositioning; + } + + void RecreateAfterSrcRepositioned() noexcept; + + void CleanAfterSrcRepositioned() noexcept; + +protected: + LRESULT _MessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noexcept; + +private: + ScalingWindow() noexcept; + ~ScalingWindow() noexcept; + + int _CheckSrcState() const noexcept; + + bool _CheckForeground(HWND hwndForeground) const noexcept; + + bool _DisableDirectFlip(HINSTANCE hInstance) noexcept; + + winrt::DispatcherQueue _dispatcher{ nullptr }; + + RECT _wndRect{}; + + ScalingOptions _options; + std::unique_ptr _renderer; + std::unique_ptr _cursorManager; + + HWND _hwndSrc = NULL; + RECT _srcWndRect{}; + + HWND _hwndDDF = NULL; + Win32Utils::ScopedHandle _exclModeMutex; + + bool _isSrcRepositioning = false; + bool _isDDFWindowShown = false; +}; + +} diff --git a/src/Magpie.Core/StepTimer.cpp b/src/Magpie.Core/StepTimer.cpp new file mode 100644 index 000000000..b135bf36d --- /dev/null +++ b/src/Magpie.Core/StepTimer.cpp @@ -0,0 +1,74 @@ +#include "pch.h" +#include "StepTimer.h" + +using namespace std::chrono; + +namespace Magpie::Core { + +void StepTimer::Initialize(std::optional maxFrameRate) noexcept { + if (maxFrameRate) { + _minInterval = duration_cast(duration(1 / *maxFrameRate)); + _hTimer.reset(CreateWaitableTimerEx(nullptr, nullptr, + CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, TIMER_ALL_ACCESS)); + } +} + +bool StepTimer::WaitForNextFrame() noexcept { + if (!_minInterval) { + return true; + } + + const time_point now = steady_clock::now(); + const nanoseconds delta = now - _lastFrameTime; + if (delta >= *_minInterval) { + _lastFrameTime = now - delta % *_minInterval; + return true; + } + + const nanoseconds rest = *_minInterval - delta; + if (rest > 1ms) { + // Sleep 精度太低,我们使用 WaitableTimer 睡眠。负值表示相对时间 + LARGE_INTEGER liDueTime{ + .QuadPart = (rest - 1ms).count() / -100 + }; + SetWaitableTimerEx(_hTimer.get(), &liDueTime, 0, NULL, NULL, 0, 0); + WaitForSingleObject(_hTimer.get(), INFINITE); + } else { + // 剩余时间在 1ms 以内则“忙等待” + Sleep(0); + } + + return false; +} + +void StepTimer::UpdateFPS(bool newFrame) noexcept { + if (_lastSecondTime == time_point{}) { + // 第一帧 + if (!newFrame) { + // 在第一帧前不更新 FPS + return; + } + + _lastSecondTime = steady_clock::now(); + _framesPerSecond.store(1, std::memory_order_relaxed); + return; + } + + if (newFrame) { + // 更新帧数 + ++_framesThisSecond; + ++_frameCount; + } + + const time_point now = steady_clock::now(); + const nanoseconds delta = now - _lastSecondTime; + if (delta >= 1s) { + _lastSecondTime = now - delta % 1s; + + _framesPerSecond.store(_framesThisSecond, std::memory_order_relaxed); + _framesThisSecond = 0; + } + +} + +} diff --git a/src/Magpie.Core/StepTimer.h b/src/Magpie.Core/StepTimer.h new file mode 100644 index 000000000..a53c52eb3 --- /dev/null +++ b/src/Magpie.Core/StepTimer.h @@ -0,0 +1,40 @@ +#pragma once +#include "Win32Utils.h" + +namespace Magpie::Core { + +class StepTimer { +public: + StepTimer() = default; + + StepTimer(const StepTimer&) = delete; + StepTimer(StepTimer&&) = delete; + + void Initialize(std::optional maxFrameRate) noexcept; + + bool WaitForNextFrame() noexcept; + + void UpdateFPS(bool newFrame) noexcept; + + uint32_t FrameCount() const noexcept { + return _frameCount; + } + + // 从前端线程调用 + uint32_t FPS() const noexcept { + return _framesPerSecond.load(std::memory_order_relaxed); + } + +private: + std::optional _minInterval; + Win32Utils::ScopedHandle _hTimer; + + std::chrono::time_point _lastFrameTime; + std::chrono::time_point _lastSecondTime; + + uint32_t _frameCount = 0; + std::atomic _framesPerSecond = 0; + uint32_t _framesThisSecond = 0; +}; + +} diff --git a/src/Magpie.Core/TextureLoader.cpp b/src/Magpie.Core/TextureLoader.cpp index 7b811e152..bc4ec289e 100644 --- a/src/Magpie.Core/TextureLoader.cpp +++ b/src/Magpie.Core/TextureLoader.cpp @@ -1,13 +1,11 @@ #include "pch.h" #include "TextureLoader.h" -#include "DeviceResources.h" -#include "MagApp.h" #include "Logger.h" #include "DDS.h" #include "DDSLoderHelpers.h" #include "Utils.h" #include - +#include "DirectXHelper.h" /////////////////////////////////////////////////////////////////// // 读取 DDS 文件的代码取自 https://github.com/microsoft/DirectXTK // @@ -16,7 +14,7 @@ namespace Magpie::Core { -HRESULT CreateD3DResources( +static HRESULT CreateD3DResources( _In_ ID3D11Device* d3dDevice, _In_ uint32_t resDim, _In_ size_t width, @@ -32,8 +30,7 @@ HRESULT CreateD3DResources( _In_ bool forceSRGB, _In_ bool isCubeMap, _In_reads_opt_(mipCount* arraySize) const D3D11_SUBRESOURCE_DATA* initData, - _Outptr_opt_ ID3D11Resource** texture, - _Outptr_opt_ ID3D11ShaderResourceView** textureView) noexcept { + _Outptr_opt_ ID3D11Resource** texture) noexcept { if (!d3dDevice) return E_POINTER; @@ -62,29 +59,6 @@ HRESULT CreateD3DResources( &tex ); if (SUCCEEDED(hr) && tex) { - if (textureView) { - D3D11_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; - SRVDesc.Format = format; - - if (arraySize > 1) { - SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY; - SRVDesc.Texture1DArray.MipLevels = (!mipCount) ? UINT(-1) : desc.MipLevels; - SRVDesc.Texture1DArray.ArraySize = static_cast(arraySize); - } else { - SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; - SRVDesc.Texture1D.MipLevels = (!mipCount) ? UINT(-1) : desc.MipLevels; - } - - hr = d3dDevice->CreateShaderResourceView(tex, - &SRVDesc, - textureView - ); - if (FAILED(hr)) { - tex->Release(); - return hr; - } - } - if (texture) { *texture = tex; } else { @@ -119,40 +93,6 @@ HRESULT CreateD3DResources( &tex ); if (SUCCEEDED(hr) && tex) { - if (textureView) { - D3D11_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; - SRVDesc.Format = format; - - if (isCubeMap) { - if (arraySize > 6) { - SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBEARRAY; - SRVDesc.TextureCubeArray.MipLevels = (!mipCount) ? UINT(-1) : desc.MipLevels; - - // Earlier we set arraySize to (NumCubes * 6) - SRVDesc.TextureCubeArray.NumCubes = static_cast(arraySize / 6); - } else { - SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBE; - SRVDesc.TextureCube.MipLevels = (!mipCount) ? UINT(-1) : desc.MipLevels; - } - } else if (arraySize > 1) { - SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; - SRVDesc.Texture2DArray.MipLevels = (!mipCount) ? UINT(-1) : desc.MipLevels; - SRVDesc.Texture2DArray.ArraySize = static_cast(arraySize); - } else { - SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; - SRVDesc.Texture2D.MipLevels = (!mipCount) ? UINT(-1) : desc.MipLevels; - } - - hr = d3dDevice->CreateShaderResourceView(tex, - &SRVDesc, - textureView - ); - if (FAILED(hr)) { - tex->Release(); - return hr; - } - } - if (texture) { *texture = tex; } else { @@ -181,23 +121,6 @@ HRESULT CreateD3DResources( &tex ); if (SUCCEEDED(hr) && tex) { - if (textureView) { - D3D11_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; - SRVDesc.Format = format; - - SRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; - SRVDesc.Texture3D.MipLevels = (!mipCount) ? UINT(-1) : desc.MipLevels; - - hr = d3dDevice->CreateShaderResourceView(tex, - &SRVDesc, - textureView - ); - if (FAILED(hr)) { - tex->Release(); - return hr; - } - } - if (texture) { *texture = tex; } else { @@ -211,7 +134,7 @@ HRESULT CreateD3DResources( return hr; } -HRESULT FillInitData( +static HRESULT FillInitData( _In_ size_t width, _In_ size_t height, _In_ size_t depth, @@ -295,9 +218,8 @@ HRESULT FillInitData( return (index > 0) ? S_OK : E_FAIL; } -HRESULT CreateTextureFromDDS( +static HRESULT CreateTextureFromDDS( _In_ ID3D11Device* d3dDevice, - _In_opt_ ID3D11DeviceContext* d3dContext, _In_ const DDS_HEADER* header, _In_reads_bytes_(bitSize) const uint8_t* bitData, _In_ size_t bitSize, @@ -307,8 +229,7 @@ HRESULT CreateTextureFromDDS( _In_ unsigned int cpuAccessFlags, _In_ unsigned int miscFlags, _In_ bool forceSRGB, - _Outptr_opt_ ID3D11Resource** texture, - _Outptr_opt_ ID3D11ShaderResourceView** textureView) noexcept { + _Outptr_opt_ ID3D11Resource** texture) noexcept { HRESULT hr = S_OK; const UINT width = header->width; @@ -479,163 +400,50 @@ HRESULT CreateTextureFromDDS( return HRESULT_FROM_WIN32(ERROR_NOT_SUPPORTED); } - bool autogen = false; - if (mipCount == 1 && d3dContext && textureView) // Must have context and shader-view to auto generate mipmaps - { - // See if format is supported for auto-gen mipmaps (varies by feature level) - UINT fmtSupport = 0; - hr = d3dDevice->CheckFormatSupport(format, &fmtSupport); - if (SUCCEEDED(hr) && (fmtSupport & D3D11_FORMAT_SUPPORT_MIP_AUTOGEN)) { - // 10level9 feature levels do not support auto-gen mipgen for volume textures - if ((resDim != D3D11_RESOURCE_DIMENSION_TEXTURE3D) - || (d3dDevice->GetFeatureLevel() >= D3D_FEATURE_LEVEL_10_0)) { - autogen = true; - } - } + // Create the texture + std::unique_ptr initData(new (std::nothrow) D3D11_SUBRESOURCE_DATA[mipCount * arraySize]); + if (!initData) { + return E_OUTOFMEMORY; } - if (autogen) { - // Create texture with auto-generated mipmaps - ID3D11Resource* tex = nullptr; + size_t skipMip = 0; + size_t twidth = 0; + size_t theight = 0; + size_t tdepth = 0; + hr = FillInitData(width, height, depth, mipCount, arraySize, format, + maxsize, bitSize, bitData, + twidth, theight, tdepth, skipMip, initData.get()); + + if (SUCCEEDED(hr)) { hr = CreateD3DResources(d3dDevice, - resDim, width, height, depth, 0, arraySize, + resDim, twidth, theight, tdepth, mipCount - skipMip, arraySize, format, - usage, - bindFlags | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET, - cpuAccessFlags, - miscFlags | D3D11_RESOURCE_MISC_GENERATE_MIPS, forceSRGB, + usage, bindFlags, cpuAccessFlags, miscFlags, + forceSRGB, isCubeMap, - nullptr, - &tex, textureView); - if (SUCCEEDED(hr)) { - size_t numBytes = 0; - size_t rowBytes = 0; - hr = GetSurfaceInfo(width, height, format, &numBytes, &rowBytes, nullptr); - if (FAILED(hr)) - return hr; - - if (numBytes > bitSize) { - (*textureView)->Release(); - *textureView = nullptr; - tex->Release(); - return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); - } - - if (numBytes > UINT32_MAX || rowBytes > UINT32_MAX) - return HRESULT_FROM_WIN32(ERROR_ARITHMETIC_OVERFLOW); - - D3D11_SHADER_RESOURCE_VIEW_DESC desc = {}; - (*textureView)->GetDesc(&desc); - - UINT mipLevels = 1; - - switch (desc.ViewDimension) { - case D3D_SRV_DIMENSION_TEXTURE1D: mipLevels = desc.Texture1D.MipLevels; break; - case D3D_SRV_DIMENSION_TEXTURE1DARRAY: mipLevels = desc.Texture1DArray.MipLevels; break; - case D3D_SRV_DIMENSION_TEXTURE2D: mipLevels = desc.Texture2D.MipLevels; break; - case D3D_SRV_DIMENSION_TEXTURE2DARRAY: mipLevels = desc.Texture2DArray.MipLevels; break; - case D3D_SRV_DIMENSION_TEXTURECUBE: mipLevels = desc.TextureCube.MipLevels; break; - case D3D_SRV_DIMENSION_TEXTURECUBEARRAY:mipLevels = desc.TextureCubeArray.MipLevels; break; - case D3D_SRV_DIMENSION_TEXTURE3D: mipLevels = desc.Texture3D.MipLevels; break; - default: - (*textureView)->Release(); - *textureView = nullptr; - tex->Release(); - return E_UNEXPECTED; - } - - if (arraySize > 1) { - const uint8_t* pSrcBits = bitData; - const uint8_t* pEndBits = bitData + bitSize; - for (UINT item = 0; item < arraySize; ++item) { - if ((pSrcBits + numBytes) > pEndBits) { - (*textureView)->Release(); - *textureView = nullptr; - tex->Release(); - return HRESULT_FROM_WIN32(ERROR_HANDLE_EOF); - } - - const UINT res = D3D11CalcSubresource(0, item, mipLevels); - d3dContext->UpdateSubresource(tex, res, nullptr, pSrcBits, static_cast(rowBytes), static_cast(numBytes)); - pSrcBits += numBytes; - } - } else { - d3dContext->UpdateSubresource(tex, 0, nullptr, bitData, static_cast(rowBytes), static_cast(numBytes)); - } - - d3dContext->GenerateMips(*textureView); - - if (texture) { - *texture = tex; - } else { - tex->Release(); - } - } - } else { - // Create the texture - std::unique_ptr initData(new (std::nothrow) D3D11_SUBRESOURCE_DATA[mipCount * arraySize]); - if (!initData) { - return E_OUTOFMEMORY; - } - - size_t skipMip = 0; - size_t twidth = 0; - size_t theight = 0; - size_t tdepth = 0; - hr = FillInitData(width, height, depth, mipCount, arraySize, format, - maxsize, bitSize, bitData, - twidth, theight, tdepth, skipMip, initData.get()); - - if (SUCCEEDED(hr)) { - hr = CreateD3DResources(d3dDevice, - resDim, twidth, theight, tdepth, mipCount - skipMip, arraySize, - format, - usage, bindFlags, cpuAccessFlags, miscFlags, - forceSRGB, - isCubeMap, - initData.get(), - texture, textureView); - - if (FAILED(hr) && !maxsize && (mipCount > 1)) { - // Retry with a maxsize determined by feature level - switch (d3dDevice->GetFeatureLevel()) { - case D3D_FEATURE_LEVEL_9_1: - case D3D_FEATURE_LEVEL_9_2: - if (isCubeMap) { - maxsize = 512u /*D3D_FL9_1_REQ_TEXTURECUBE_DIMENSION*/; - } else { - maxsize = (resDim == D3D11_RESOURCE_DIMENSION_TEXTURE3D) - ? 256u /*D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION*/ - : 2048u /*D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION*/; - } - break; - - case D3D_FEATURE_LEVEL_9_3: - maxsize = (resDim == D3D11_RESOURCE_DIMENSION_TEXTURE3D) - ? 256u /*D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION*/ - : 4096u /*D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION*/; - break; - - default: // D3D_FEATURE_LEVEL_10_0 & D3D_FEATURE_LEVEL_10_1 - maxsize = (resDim == D3D11_RESOURCE_DIMENSION_TEXTURE3D) - ? 2048u /*D3D10_REQ_TEXTURE3D_U_V_OR_W_DIMENSION*/ - : 8192u /*D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION*/; - break; - } + initData.get(), + texture + ); - hr = FillInitData(width, height, depth, mipCount, arraySize, format, - maxsize, bitSize, bitData, - twidth, theight, tdepth, skipMip, initData.get()); - if (SUCCEEDED(hr)) { - hr = CreateD3DResources(d3dDevice, - resDim, twidth, theight, tdepth, mipCount - skipMip, arraySize, - format, - usage, bindFlags, cpuAccessFlags, miscFlags, - forceSRGB, - isCubeMap, - initData.get(), - texture, textureView); - } + if (FAILED(hr) && !maxsize && (mipCount > 1)) { + // Retry with a maxsize determined by feature level + maxsize = (resDim == D3D11_RESOURCE_DIMENSION_TEXTURE3D) + ? D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION + : D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; + + hr = FillInitData(width, height, depth, mipCount, arraySize, format, + maxsize, bitSize, bitData, + twidth, theight, tdepth, skipMip, initData.get()); + if (SUCCEEDED(hr)) { + hr = CreateD3DResources(d3dDevice, + resDim, twidth, theight, tdepth, mipCount - skipMip, arraySize, + format, + usage, bindFlags, cpuAccessFlags, miscFlags, + forceSRGB, + isCubeMap, + initData.get(), + texture + ); } } } @@ -643,7 +451,7 @@ HRESULT CreateTextureFromDDS( return hr; } -HRESULT CreateDDSTextureFromFileEx( +static HRESULT CreateDDSTextureFromFileEx( ID3D11Device* d3dDevice, const wchar_t* fileName, size_t maxsize, @@ -653,23 +461,15 @@ HRESULT CreateDDSTextureFromFileEx( unsigned int miscFlags, bool forceSRGB, ID3D11Resource** texture, - ID3D11ShaderResourceView** textureView, DDS_ALPHA_MODE* alphaMode) noexcept { if (texture) { *texture = nullptr; } - if (textureView) { - *textureView = nullptr; - } if (alphaMode) { *alphaMode = DDS_ALPHA_MODE_UNKNOWN; } - if (!d3dDevice || !fileName || (!texture && !textureView)) { - return E_INVALIDARG; - } - - if (textureView && !(bindFlags & D3D11_BIND_SHADER_RESOURCE)) { + if (!d3dDevice || !fileName || !texture) { return E_INVALIDARG; } @@ -688,12 +488,13 @@ HRESULT CreateDDSTextureFromFileEx( return hr; } - hr = CreateTextureFromDDS(d3dDevice, nullptr, + hr = CreateTextureFromDDS(d3dDevice, header, bitData, bitSize, maxsize, usage, bindFlags, cpuAccessFlags, miscFlags, forceSRGB, - texture, textureView); + texture + ); if (SUCCEEDED(hr)) { if (alphaMode) @@ -703,7 +504,7 @@ HRESULT CreateDDSTextureFromFileEx( return hr; } -winrt::com_ptr LoadImg(const wchar_t* fileName) { +static winrt::com_ptr LoadImg(const wchar_t* fileName, ID3D11Device* d3dDevice) noexcept { winrt::com_ptr wicImgFactory = winrt::try_create_instance(CLSID_WICImagingFactory); if (!wicImgFactory) { @@ -804,8 +605,9 @@ winrt::com_ptr LoadImg(const wchar_t* fileName) { D3D11_SUBRESOURCE_DATA initData{}; initData.pSysMem = buf.get(); initData.SysMemPitch = stride; - - winrt::com_ptr result = MagApp::Get().GetDeviceResources().CreateTexture2D( + + winrt::com_ptr result = DirectXHelper::CreateTexture2D( + d3dDevice, useFloatFormat ? DXGI_FORMAT_R16G16B16A16_FLOAT : DXGI_FORMAT_R8G8B8A8_UNORM, width, height, @@ -822,12 +624,12 @@ winrt::com_ptr LoadImg(const wchar_t* fileName) { return result; } -winrt::com_ptr LoadDDS(const wchar_t* fileName) { +static winrt::com_ptr LoadDDS(const wchar_t* fileName, ID3D11Device* d3dDevice) noexcept { winrt::com_ptr result; - + DDS_ALPHA_MODE alphaMode = DDS_ALPHA_MODE_STRAIGHT; HRESULT hr = CreateDDSTextureFromFileEx( - MagApp::Get().GetDeviceResources().GetD3DDevice(), + d3dDevice, fileName, 0, D3D11_USAGE_IMMUTABLE, @@ -836,7 +638,6 @@ winrt::com_ptr LoadDDS(const wchar_t* fileName) { 0, false, result.put(), - nullptr, &alphaMode ); if (FAILED(hr)) { @@ -853,7 +654,7 @@ winrt::com_ptr LoadDDS(const wchar_t* fileName) { return tex; } -winrt::com_ptr TextureLoader::Load(const wchar_t* fileName) { +winrt::com_ptr TextureLoader::Load(const wchar_t* fileName, ID3D11Device* d3dDevice) noexcept { std::wstring_view sv(fileName); size_t npos = sv.find_last_of(L'.'); if (npos == std::wstring_view::npos) { @@ -864,13 +665,13 @@ winrt::com_ptr TextureLoader::Load(const wchar_t* fileName) { std::wstring_view suffix = sv.substr(npos + 1); if (suffix == L"dds") { - return LoadDDS(fileName); + return LoadDDS(fileName, d3dDevice); } if (suffix == L"bmp" || suffix == L"jpg" || suffix == L"jpeg" || suffix == L"png" || suffix == L"tif" || suffix == L"tiff" ) { - return LoadImg(fileName); + return LoadImg(fileName, d3dDevice); } return nullptr; diff --git a/src/Magpie.Core/TextureLoader.h b/src/Magpie.Core/TextureLoader.h index d3d561ef1..78ec723a2 100644 --- a/src/Magpie.Core/TextureLoader.h +++ b/src/Magpie.Core/TextureLoader.h @@ -4,7 +4,7 @@ namespace Magpie::Core { class TextureLoader { public: - static winrt::com_ptr Load(const wchar_t* fileName); + static winrt::com_ptr Load(const wchar_t* fileName, ID3D11Device* d3dDevice) noexcept; }; } diff --git a/src/Magpie.Core/WindowBase.h b/src/Magpie.Core/WindowBase.h new file mode 100644 index 000000000..1d01a1016 --- /dev/null +++ b/src/Magpie.Core/WindowBase.h @@ -0,0 +1,61 @@ +#pragma once + +namespace Magpie::Core { + +template +class WindowBase { +public: + WindowBase() noexcept = default; + WindowBase(const WindowBase&) = delete; + WindowBase(WindowBase&&) noexcept = default; + + virtual ~WindowBase() noexcept { + Destroy(); + } + + HWND Handle() const noexcept { + return _hWnd; + } + + operator bool() const noexcept { + return _hWnd; + } + + void Destroy() const noexcept { + if (_hWnd) { + DestroyWindow(_hWnd); + } + } + +protected: + using base_type = WindowBase; + + static LRESULT CALLBACK _WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) noexcept { + if (msg == WM_NCCREATE) { + WindowBase* that = (WindowBase*)(((CREATESTRUCT*)lParam)->lpCreateParams); + assert(that && !that->_hWnd); + that->_hWnd = hWnd; + SetWindowLongPtr(hWnd, GWLP_USERDATA, (LONG_PTR)that); + } else if (T* that = (T*)GetWindowLongPtr(hWnd, GWLP_USERDATA)) { + return that->_MessageHandler(msg, wParam, lParam); + } + + return DefWindowProc(hWnd, msg, wParam, lParam); + } + + LRESULT _MessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noexcept { + switch (msg) { + case WM_DESTROY: + { + _hWnd = NULL; + return 0; + } + } + + return DefWindowProc(_hWnd, msg, wParam, lParam); + } + + HWND _hWnd = NULL; +}; + +} diff --git a/src/Magpie.Core/include/Magpie.Core.h b/src/Magpie.Core/include/Magpie.Core.h index 5b660027d..f5a4d1ef7 100644 --- a/src/Magpie.Core/include/Magpie.Core.h +++ b/src/Magpie.Core/include/Magpie.Core.h @@ -1,6 +1,7 @@ #pragma once -#include "../MagOptions.h" -#include "../MagRuntime.h" +#include "../ScalingOptions.h" +#include "../ScalingRuntime.h" #include "../LoggerHelper.h" #include "../EffectCompiler.h" #include "../EffectDesc.h" +#include "../WindowHelper.h" diff --git a/src/Magpie.Core/shaders/.gitignore b/src/Magpie.Core/shaders/.gitignore new file mode 100644 index 000000000..424c745c1 --- /dev/null +++ b/src/Magpie.Core/shaders/.gitignore @@ -0,0 +1 @@ +*.h diff --git a/src/Magpie.Core/shaders/DuplicateFrameCS.hlsl b/src/Magpie.Core/shaders/DuplicateFrameCS.hlsl new file mode 100644 index 000000000..5de33809b --- /dev/null +++ b/src/Magpie.Core/shaders/DuplicateFrameCS.hlsl @@ -0,0 +1,35 @@ +// 无需同步 +RWBuffer result : register(u0); + +Texture2D tex1 : register(t0); +Texture2D tex2 : register(t1); + +SamplerState sam : register(s0); + +[numthreads(8, 8, 1)] +void main(uint3 tid : SV_GroupThreadID, uint3 gid : SV_GroupID) { + if (result[0]) { + return; + } + + const int2 gxy = (gid.xy << 4) + (tid.xy << 1); + + // 不知为何这比通过 cbuffer 传入更快 + uint width, height; + tex1.GetDimensions(width, height); + const float2 pos = (gxy + 1) / float2(width, height); + + if (any(tex1.GatherRed(sam, pos) != tex2.GatherRed(sam, pos))) { + result[0] = 1u; + return; + } + + if (any(tex1.GatherGreen(sam, pos) != tex2.GatherGreen(sam, pos))) { + result[0] = 1u; + return; + } + + if (any(tex1.GatherBlue(sam, pos) != tex2.GatherBlue(sam, pos))) { + result[0] = 1u; + } +} diff --git a/src/Magpie.Core/shaders/ImGuiImplPS.hlsl b/src/Magpie.Core/shaders/ImGuiImplPS.hlsl new file mode 100644 index 000000000..6507bfd5e --- /dev/null +++ b/src/Magpie.Core/shaders/ImGuiImplPS.hlsl @@ -0,0 +1,6 @@ +SamplerState sam : register(s0); +Texture2D tex : register(t0); + +float4 main(float2 coord : TEXCOORD, float4 color : COLOR) : SV_Target { + return color * float4(1, 1, 1, tex.Sample(sam, coord).r); +} diff --git a/src/Magpie.Core/shaders/ImGuiImplVS.hlsl b/src/Magpie.Core/shaders/ImGuiImplVS.hlsl new file mode 100644 index 000000000..0cd3f2c2f --- /dev/null +++ b/src/Magpie.Core/shaders/ImGuiImplVS.hlsl @@ -0,0 +1,16 @@ +cbuffer vertexBuffer : register(b0) { + float4x4 projectionMatrix; +}; + +void main( + float4 pos : SV_POSITION, + float2 coord : TEXCOORD, + float4 color : COLOR, + out float2 outCoord : TEXCOORD, + out float4 outColor : COLOR, + out float4 outPos : SV_POSITION +) { + outPos = mul(projectionMatrix, float4(pos.xy, 0.f, 1.f)); + outCoord = coord; + outColor = color; +} diff --git a/src/Magpie.Core/shaders/MaskedCursorPS.hlsl b/src/Magpie.Core/shaders/MaskedCursorPS.hlsl new file mode 100644 index 000000000..983cc59eb --- /dev/null +++ b/src/Magpie.Core/shaders/MaskedCursorPS.hlsl @@ -0,0 +1,16 @@ +Texture2D originTex : register(t0); +Texture2D cursorTex : register(t1); + +SamplerState pointSampler : register(s0); + +float4 main(float2 coord : TEXCOORD) : SV_TARGET { + float4 mask = cursorTex.Sample(pointSampler, coord); + + if (mask.a < 0.5f) { + return float4(mask.rgb, 1); + } else { + float3 origin = originTex.Sample(pointSampler, coord).rgb; + // 255.001953 的由来见 https://stackoverflow.com/questions/52103720/why-does-d3dcolortoubyte4-multiplies-components-by-255-001953f + return float4((uint3(origin * 255.001953f) ^ uint3(mask.rgb * 255.001953f)) / 255.0f, 1); + } +} diff --git a/src/Magpie.Core/shaders/MonochromeCursorPS.hlsl b/src/Magpie.Core/shaders/MonochromeCursorPS.hlsl new file mode 100644 index 000000000..6c87dca55 --- /dev/null +++ b/src/Magpie.Core/shaders/MonochromeCursorPS.hlsl @@ -0,0 +1,24 @@ +Texture2D originTex : register(t0); +Texture2D cursorTex : register(t1); + +SamplerState pointSampler : register(s0); + +float4 main(float2 coord : TEXCOORD) : SV_TARGET { + float2 mask = cursorTex.Sample(pointSampler, coord); + + if (mask.x > 0.5f) { + float3 origin = originTex.Sample(pointSampler, coord).rgb; + + if (mask.y > 0.5f) { + return float4(1 - origin, 1); + } else { + return float4(origin, 1); + } + } else { + if (mask.y > 0.5f) { + return float4(1, 1, 1, 1); + } else { + return float4(0, 0, 0, 1); + } + } +} diff --git a/src/Magpie.Core/shaders/SimplePS.hlsl b/src/Magpie.Core/shaders/SimplePS.hlsl new file mode 100644 index 000000000..bd91e2ace --- /dev/null +++ b/src/Magpie.Core/shaders/SimplePS.hlsl @@ -0,0 +1,6 @@ +Texture2D tex : register(t0); +SamplerState sam : register(s0); + +float4 main(float2 coord : TEXCOORD) : SV_Target { + return tex.Sample(sam, coord); +} diff --git a/src/Magpie.Core/shaders/SimpleVS.hlsl b/src/Magpie.Core/shaders/SimpleVS.hlsl new file mode 100644 index 000000000..8967a461d --- /dev/null +++ b/src/Magpie.Core/shaders/SimpleVS.hlsl @@ -0,0 +1,9 @@ +void main( + float4 pos : SV_POSITION, + float2 coord : TEXCOORD, + out float2 outCoord : TEXCOORD, + out float4 outPos : SV_POSITION +) { + outPos = pos; + outCoord = coord; +} diff --git a/src/Magpie/Magpie.vcxproj b/src/Magpie/Magpie.vcxproj index 1a00e7895..cd621eb46 100644 --- a/src/Magpie/Magpie.vcxproj +++ b/src/Magpie/Magpie.vcxproj @@ -8,11 +8,9 @@ {1801171b-65b6-400f-92ff-73eaf499cfb3} Magpie 10.0.22621.0 - Magpie.App - $(SolutionDir)\obj\$(Platform)\$(Configuration)\$(AppProjectName)\;$(SolutionDir)\obj\$(Platform)\$(Configuration)\$(AppProjectName)\Generated Files\; - - $(SolutionDir)\bin\$(Platform)\$(Configuration)\ - + + $(SolutionDir)bin\$(Platform)\$(Configuration)\ + true @@ -30,12 +28,9 @@ - + - - - Fast @@ -49,13 +44,12 @@ - + - @@ -67,7 +61,7 @@ - + @@ -81,20 +75,50 @@ false + + + {1239537c-e5b8-427a-9e7f-ea443d1f3529} + + - + + + + + + + <_UnpackagedWin32WinmdManifest Include="@(ReferencePath->'$(IntDir)%(FileName).manifest')" Condition="'%(ReferencePath.IsSystemReference)' != 'true' And '%(ReferencePath.WinMDFile)' == 'true' And '%(ReferencePath.ReferenceSourceTarget)' == 'ResolveAssemblyReference' And '%(ReferencePath.Implementation)' != '' And '%(FileName)' != 'Microsoft.Web.WebView2.Core'"> + %(ReferencePath.FullPath) + $([System.IO.Path]::GetFileName('%(ReferencePath.Implementation)')) + + + <_UnpackagedWin32WinmdProjectReference Condition="'%(_ResolvedNativeProjectReferencePaths.ProjectType)' != 'StaticLibrary' And '%(_ResolvedNativeProjectReferencePaths.DeploymentContent)' != 'false'" Include="@(_ResolvedNativeProjectReferencePaths->WithMetadataValue('FileType','winmd')->'%(RootDir)%(Directory)%(TargetPath)')" /> + <_UnpackagedWin32WinmdManifest Include="@(_UnpackagedWin32WinmdProjectReference->'$(IntDir)%(FileName).manifest')"> + %(Identity) + + + + + + + + + + + + - + 这台计算机上缺少此项目引用的 NuGet 程序包。使用“NuGet 程序包还原”可下载这些程序包。有关更多信息,请参见 http://go.microsoft.com/fwlink/?LinkID=322105。缺少的文件是 {0}。 - + diff --git a/src/Magpie/Magpie.vcxproj.filters b/src/Magpie/Magpie.vcxproj.filters index cfe3deace..c5badf4d9 100644 --- a/src/Magpie/Magpie.vcxproj.filters +++ b/src/Magpie/Magpie.vcxproj.filters @@ -19,15 +19,14 @@ - - Services - + + Services + - @@ -39,10 +38,10 @@ - + + Services - diff --git a/src/Magpie/MainWindow.cpp b/src/Magpie/MainWindow.cpp index 329487b1e..c9fc7d455 100644 --- a/src/Magpie/MainWindow.cpp +++ b/src/Magpie/MainWindow.cpp @@ -12,13 +12,14 @@ namespace Magpie { bool MainWindow::Create(HINSTANCE hInstance, winrt::Point windowCenter, winrt::Size windowSizeInDips, bool isMaximized) noexcept { static const int _ = [](HINSTANCE hInstance) { - WNDCLASSEXW wcex{}; - wcex.cbSize = sizeof(wcex); - wcex.lpfnWndProc = _WndProc; - wcex.hInstance = hInstance; - wcex.hIcon = LoadIcon(hInstance, MAKEINTRESOURCE(CommonSharedConstants::IDI_APP)); - wcex.hCursor = LoadCursor(nullptr, IDC_ARROW); - wcex.lpszClassName = CommonSharedConstants::MAIN_WINDOW_CLASS_NAME; + WNDCLASSEXW wcex{ + .cbSize = sizeof(wcex), + .lpfnWndProc = _WndProc, + .hInstance = hInstance, + .hIcon = LoadIcon(hInstance, MAKEINTRESOURCE(CommonSharedConstants::IDI_APP)), + .hCursor = LoadCursor(nullptr, IDC_ARROW), + .lpszClassName = CommonSharedConstants::MAIN_WINDOW_CLASS_NAME + }; RegisterClassEx(&wcex); wcex.style = CS_DBLCLKS; @@ -32,32 +33,32 @@ bool MainWindow::Create(HINSTANCE hInstance, winrt::Point windowCenter, winrt::S const auto& [posToSet, sizeToSet] = _CreateWindow(hInstance, windowCenter, windowSizeInDips); - if (!_hWnd) { + if (!Handle()) { return false; } - _SetContent(winrt::Magpie::App::RootPage()); + _Content(winrt::Magpie::App::RootPage()); - _content.ActualThemeChanged([this](winrt::FrameworkElement const&, winrt::IInspectable const&) { + Content().ActualThemeChanged([this](winrt::FrameworkElement const&, winrt::IInspectable const&) { _UpdateTheme(); }); _UpdateTheme(); - - // 窗口尚未显示无法最大化,所以我们设置 _isMaximized 使 XamlWindow 估计 XAML Islands 窗口尺寸。 - // 否则在显示窗口时可能会看到 NavigationView 的导航栏的展开动画。 - _isMaximized = isMaximized; + + if (isMaximized) { + _SetInitialMaximized(); + } // 1. 设置初始 XAML Islands 窗口的尺寸 // 2. 刷新窗口边框 // 3. 无法获知 DPI 的情况下 _CreateWindow 创建的窗口尺寸为零,在这里延后设置窗口位置 // 4. 防止窗口显示时背景闪烁: https://stackoverflow.com/questions/69715610/how-to-initialize-the-background-color-of-win32-app-to-something-other-than-whit - SetWindowPos(_hWnd, NULL, posToSet.x, posToSet.y, sizeToSet.cx, sizeToSet.cy, + SetWindowPos(Handle(), NULL, posToSet.x, posToSet.y, sizeToSet.cx, sizeToSet.cy, (sizeToSet.cx == 0 ? (SWP_NOMOVE | SWP_NOSIZE) : 0) | SWP_FRAMECHANGED | SWP_NOACTIVATE | SWP_NOCOPYBITS); // Xaml 控件加载完成后显示主窗口 - _content.Loaded([this, isMaximized](winrt::IInspectable const&, winrt::RoutedEventArgs const&) { + Content().Loaded([this, isMaximized](winrt::IInspectable const&, winrt::RoutedEventArgs const&) { if (isMaximized) { - // ShowWindow(_hWnd, SW_SHOWMAXIMIZED) 会显示错误的动画。因此我们以窗口化显示, + // ShowWindow(Handle(), SW_SHOWMAXIMIZED) 会显示错误的动画。因此我们以窗口化显示, // 但位置和大小都和最大化相同,显示完毕后将状态设为最大化。 // // 在此过程中,_isMaximized 始终是 true。 @@ -65,17 +66,17 @@ bool MainWindow::Create(HINSTANCE hInstance, winrt::Point windowCenter, winrt::S // 保存原始窗口化位置 WINDOWPLACEMENT wp{}; wp.length = sizeof(wp); - GetWindowPlacement(_hWnd, &wp); + GetWindowPlacement(Handle(), &wp); // 查询最大化窗口位置 - if (HMONITOR hMon = MonitorFromWindow(_hWnd, MONITOR_DEFAULTTONEAREST)) { + if (HMONITOR hMon = MonitorFromWindow(Handle(), MONITOR_DEFAULTTONEAREST)) { MONITORINFO mi{}; mi.cbSize = sizeof(mi); GetMonitorInfo(hMon, &mi); // 播放窗口显示动画 SetWindowPos( - _hWnd, + Handle(), NULL, mi.rcWork.left, mi.rcWork.top, @@ -87,14 +88,12 @@ bool MainWindow::Create(HINSTANCE hInstance, winrt::Point windowCenter, winrt::S // 将状态设为最大化,也还原了原始的窗口化位置 wp.showCmd = SW_SHOWMAXIMIZED; - SetWindowPlacement(_hWnd, &wp); + SetWindowPlacement(Handle(), &wp); } else { - ShowWindow(_hWnd, SW_SHOWNORMAL); + ShowWindow(Handle(), SW_SHOWNORMAL); } - Win32Utils::SetForegroundWindow(_hWnd); - - _isWindowShown = true; + Win32Utils::SetForegroundWindow(Handle()); }); // 创建标题栏窗口,它是主窗口的子窗口。我们将它置于 XAML Islands 窗口之上以防止鼠标事件被吞掉 @@ -110,7 +109,7 @@ bool MainWindow::Create(HINSTANCE hInstance, winrt::Point windowCenter, winrt::S L"", WS_CHILD | WS_MINIMIZEBOX | WS_MAXIMIZEBOX, 0, 0, 0, 0, - _hWnd, + Handle(), nullptr, hInstance, this @@ -131,7 +130,7 @@ bool MainWindow::Create(HINSTANCE hInstance, winrt::Point windowCenter, winrt::S ); } - _content.TitleBar().SizeChanged([this](winrt::IInspectable const&, winrt::SizeChangedEventArgs const&) { + Content().TitleBar().SizeChanged([this](winrt::IInspectable const&, winrt::SizeChangedEventArgs const&) { _ResizeTitleBarWindow(); }); @@ -139,11 +138,11 @@ bool MainWindow::Create(HINSTANCE hInstance, winrt::Point windowCenter, winrt::S } void MainWindow::Show() const noexcept { - if (IsIconic(_hWnd)) { - ShowWindow(_hWnd, SW_RESTORE); + if (IsIconic(Handle())) { + ShowWindow(Handle(), SW_RESTORE); } - Win32Utils::SetForegroundWindow(_hWnd); + Win32Utils::SetForegroundWindow(Handle()); } LRESULT MainWindow::_MessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noexcept { @@ -152,7 +151,7 @@ LRESULT MainWindow::_MessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noex { LRESULT ret = base_type::_MessageHandler(WM_SIZE, wParam, lParam); _ResizeTitleBarWindow(); - _content.TitleBar().CaptionButtons().IsWindowMaximized(_isMaximized); + Content().TitleBar().CaptionButtons().IsWindowMaximized(_IsMaximized()); return ret; } case WM_GETMINMAXINFO: @@ -160,8 +159,8 @@ LRESULT MainWindow::_MessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noex // 设置窗口最小尺寸 MINMAXINFO* mmi = (MINMAXINFO*)lParam; mmi->ptMinTrackSize = { - std::lroundf(550 * _currentDpi / float(USER_DEFAULT_SCREEN_DPI)), - std::lroundf(300 * _currentDpi / float(USER_DEFAULT_SCREEN_DPI)) + std::lroundf(500 * _CurrentDpi() / float(USER_DEFAULT_SCREEN_DPI)), + std::lroundf(300 * _CurrentDpi() / float(USER_DEFAULT_SCREEN_DPI)) }; return 0; } @@ -169,7 +168,16 @@ LRESULT MainWindow::_MessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noex { // 我们自己处理标题栏右键,不知为何 DefWindowProc 没有作用 if (wParam == HTCAPTION) { - HMENU systemMenu = GetSystemMenu(_hWnd, FALSE); + const POINT cursorPt{ GET_X_LPARAM(lParam), GET_Y_LPARAM(lParam) }; + + // 在标题栏上按下右键,在其他地方释放也会收到此消息。确保只有在标题栏上释放时才显示菜单 + RECT titleBarRect; + GetWindowRect(_hwndTitleBar, &titleBarRect); + if (!PtInRect(&titleBarRect, cursorPt)) { + break; + } + + HMENU systemMenu = GetSystemMenu(Handle(), FALSE); // 根据窗口状态更新选项 MENUITEMINFO mii{}; @@ -180,25 +188,25 @@ LRESULT MainWindow::_MessageHandler(UINT msg, WPARAM wParam, LPARAM lParam) noex mii.fState = enabled ? MF_ENABLED : MF_DISABLED; SetMenuItemInfo(systemMenu, item, FALSE, &mii); }; - setState(SC_RESTORE, _isMaximized); - setState(SC_MOVE, !_isMaximized); - setState(SC_SIZE, !_isMaximized); + const bool isMaximized = _IsMaximized(); + setState(SC_RESTORE, isMaximized); + setState(SC_MOVE, !isMaximized); + setState(SC_SIZE, !isMaximized); setState(SC_MINIMIZE, true); - setState(SC_MAXIMIZE, !_isMaximized); + setState(SC_MAXIMIZE, !isMaximized); setState(SC_CLOSE, true); SetMenuDefaultItem(systemMenu, UINT_MAX, FALSE); - BOOL cmd = TrackPopupMenu(systemMenu, TPM_RETURNCMD, - GET_X_LPARAM(lParam), GET_Y_LPARAM(lParam), 0, _hWnd, nullptr); + BOOL cmd = TrackPopupMenu(systemMenu, TPM_RETURNCMD, cursorPt.x, cursorPt.y, 0, Handle(), nullptr); if (cmd != 0) { - PostMessage(_hWnd, WM_SYSCOMMAND, cmd, 0); + PostMessage(Handle(), WM_SYSCOMMAND, cmd, 0); } } break; } case WM_ACTIVATE: { - _content.TitleBar().IsWindowActive(LOWORD(wParam) != WA_INACTIVE); + Content().TitleBar().IsWindowActive(LOWORD(wParam) != WA_INACTIVE); break; } case WM_DESTROY: @@ -251,7 +259,7 @@ std::pair MainWindow::_CreateWindow(HINSTANCE hInstance, winrt::Poi MONITORINFO mi{ sizeof(mi) }; GetMonitorInfo(hMon, &mi); - // 确保启动位置在屏幕工作区内。不允许启动时跨越多个屏幕。 + // 确保启动位置在屏幕工作区内。不允许启动时跨越多个屏幕 if (windowSize.cx <= mi.rcWork.right - mi.rcWork.left && windowSize.cy <= mi.rcWork.bottom - mi.rcWork.top) { windowPos.x = std::lroundf(windowCenter.X - windowSizeInPixels.Width / 2); windowPos.x = std::clamp(windowPos.x, mi.rcWork.left, mi.rcWork.right - windowSize.cx); @@ -281,14 +289,15 @@ std::pair MainWindow::_CreateWindow(HINSTANCE hInstance, winrt::Poi hInstance, this ); + assert(Handle()); if (windowSize.cx == 0) { - const HMONITOR hMon = MonitorFromWindow(_hWnd, MONITOR_DEFAULTTONEAREST); + const HMONITOR hMon = MonitorFromWindow(Handle(), MONITOR_DEFAULTTONEAREST); MONITORINFO mi{ sizeof(mi) }; GetMonitorInfo(hMon, &mi); - const float dpiFactor = _currentDpi / float(USER_DEFAULT_SCREEN_DPI); + const float dpiFactor = _CurrentDpi() / float(USER_DEFAULT_SCREEN_DPI); const winrt::Size workingAreaSizeInDips = { (mi.rcWork.right - mi.rcWork.left) / dpiFactor, (mi.rcWork.bottom - mi.rcWork.top) / dpiFactor @@ -321,7 +330,7 @@ std::pair MainWindow::_CreateWindow(HINSTANCE hInstance, winrt::Poi // 确保启动位置在屏幕工作区内 RECT targetRect; - GetWindowRect(_hWnd, &targetRect); + GetWindowRect(Handle(), &targetRect); windowPos.x = std::clamp(targetRect.left, mi.rcWork.left, mi.rcWork.right - windowSize.cx); windowPos.y = std::clamp(targetRect.top, mi.rcWork.top, mi.rcWork.bottom - windowSize.cy); @@ -331,8 +340,8 @@ std::pair MainWindow::_CreateWindow(HINSTANCE hInstance, winrt::Poi } } -void MainWindow::_UpdateTheme() { - XamlWindowT::_SetTheme(_content.ActualTheme() == winrt::ElementTheme::Dark); +void MainWindow::_UpdateTheme() noexcept { + XamlWindowT::_SetTheme(Content().ActualTheme() == winrt::ElementTheme::Dark); } LRESULT MainWindow::_TitleBarWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) noexcept { @@ -368,17 +377,17 @@ LRESULT MainWindow::_TitleBarMessageHandler(UINT msg, WPARAM wParam, LPARAM lPar return HTNOWHERE; } - if (!_isMaximized && cursorPos.y + (int)_GetTopBorderHeight() < _GetResizeHandleHeight()) { + if (!_IsMaximized() && cursorPos.y + (int)_GetTopBorderHeight() < _GetResizeHandleHeight()) { // 鼠标位于上边框 return HTTOP; } static const winrt::Size buttonSizeInDips = [this]() { - return _content.TitleBar().CaptionButtons().CaptionButtonSize(); + return Content().TitleBar().CaptionButtons().CaptionButtonSize(); }(); - const float buttonWidthInPixels = buttonSizeInDips.Width * _currentDpi / USER_DEFAULT_SCREEN_DPI; - const float buttonHeightInPixels = buttonSizeInDips.Height * _currentDpi / USER_DEFAULT_SCREEN_DPI; + const float buttonWidthInPixels = buttonSizeInDips.Width * _CurrentDpi() / USER_DEFAULT_SCREEN_DPI; + const float buttonHeightInPixels = buttonSizeInDips.Height * _CurrentDpi() / USER_DEFAULT_SCREEN_DPI; if (cursorPos.y >= buttonHeightInPixels) { // 鼠标位于标题按钮下方,如果标题栏很宽,这里也可以拖动 @@ -391,7 +400,6 @@ LRESULT MainWindow::_TitleBarMessageHandler(UINT msg, WPARAM wParam, LPARAM lPar return HTCLOSE; } else if (cursorToRight < buttonWidthInPixels * 2) { // 支持 Win11 的贴靠布局 - // FIXME: 最大化时贴靠布局的位置不对,目前没有找到解决方案。似乎只适配了系统原生框架和 UWP return HTMAXBUTTON; } else if (cursorToRight < buttonWidthInPixels * 3) { return HTMINBUTTON; @@ -410,7 +418,7 @@ LRESULT MainWindow::_TitleBarMessageHandler(UINT msg, WPARAM wParam, LPARAM lPar [[fallthrough]]; case WM_NCMOUSEMOVE: { - auto captionButtons = _content.TitleBar().CaptionButtons(); + auto captionButtons = Content().TitleBar().CaptionButtons(); // 将 hover 状态通知 CaptionButtons。标题栏窗口拦截了 XAML Islands 中的标题栏 // 控件的鼠标消息,标题栏按钮的状态由我们手动控制。 @@ -421,7 +429,7 @@ LRESULT MainWindow::_TitleBarMessageHandler(UINT msg, WPARAM wParam, LPARAM lPar captionButtons.LeaveButtons(); // 将 HTTOP 传给主窗口才能通过上边框调整窗口高度 - return SendMessage(_hWnd, msg, wParam, lParam); + return SendMessage(Handle(), msg, wParam, lParam); } case HTMINBUTTON: case HTMAXBUTTON: @@ -456,13 +464,13 @@ LRESULT MainWindow::_TitleBarMessageHandler(UINT msg, WPARAM wParam, LPARAM lPar GetCursorPos(&cursorPos); // 先检查鼠标是否在主窗口上,如果正在显示文字提示,会返回 _hwndTitleBar HWND hwndUnderCursor = WindowFromPoint(cursorPos); - if (hwndUnderCursor != _hWnd && hwndUnderCursor != _hwndTitleBar) { - _content.TitleBar().CaptionButtons().LeaveButtons(); + if (hwndUnderCursor != Handle() && hwndUnderCursor != _hwndTitleBar) { + Content().TitleBar().CaptionButtons().LeaveButtons(); } else { // 然后检查鼠标在标题栏上的位置 LRESULT hit = SendMessage(_hwndTitleBar, WM_NCHITTEST, 0, MAKELPARAM(cursorPos.x, cursorPos.y)); if (hit != HTMINBUTTON && hit != HTMAXBUTTON && hit != HTCLOSE) { - _content.TitleBar().CaptionButtons().LeaveButtons(); + Content().TitleBar().CaptionButtons().LeaveButtons(); } } @@ -479,12 +487,12 @@ LRESULT MainWindow::_TitleBarMessageHandler(UINT msg, WPARAM wParam, LPARAM lPar case HTCAPTION: { // 将 HTTOP 传给主窗口才能通过上边框调整窗口高度 - return SendMessage(_hWnd, msg, wParam, lParam); + return SendMessage(Handle(), msg, wParam, lParam); } case HTMINBUTTON: case HTMAXBUTTON: case HTCLOSE: - _content.TitleBar().CaptionButtons().PressButton((winrt::Magpie::App::CaptionButton)wParam); + Content().TitleBar().CaptionButtons().PressButton((winrt::Magpie::App::CaptionButton)wParam); // 在标题栏按钮上按下左键后我们便捕获光标,这样才能在释放时得到通知。注意捕获光标后 // 便不会再收到 NC 族消息,这就是为什么我们要处理 WM_MOUSEMOVE 和 WM_LBUTTONUP SetCapture(_hwndTitleBar); @@ -510,17 +518,17 @@ LRESULT MainWindow::_TitleBarMessageHandler(UINT msg, WPARAM wParam, LPARAM lPar case HTCAPTION: { // 在可拖拽区域或上边框释放左键,将此消息传递给主窗口 - _content.TitleBar().CaptionButtons().ReleaseButtons(); - return SendMessage(_hWnd, msg, wParam, lParam); + Content().TitleBar().CaptionButtons().ReleaseButtons(); + return SendMessage(Handle(), msg, wParam, lParam); } case HTMINBUTTON: case HTMAXBUTTON: case HTCLOSE: // 在标题栏按钮上释放左键 - _content.TitleBar().CaptionButtons().ReleaseButton((winrt::Magpie::App::CaptionButton)wParam); + Content().TitleBar().CaptionButtons().ReleaseButton((winrt::Magpie::App::CaptionButton)wParam); break; default: - _content.TitleBar().CaptionButtons().ReleaseButtons(); + Content().TitleBar().CaptionButtons().ReleaseButtons(); } return 0; @@ -529,7 +537,7 @@ LRESULT MainWindow::_TitleBarMessageHandler(UINT msg, WPARAM wParam, LPARAM lPar case WM_NCRBUTTONDBLCLK: case WM_NCRBUTTONUP: // 不关心右键,将它们传递给主窗口 - return SendMessage(_hWnd, msg, wParam, lParam); + return SendMessage(Handle(), msg, wParam, lParam); } return DefWindowProc(_hwndTitleBar, msg, wParam, lParam); @@ -540,13 +548,13 @@ void MainWindow::_ResizeTitleBarWindow() noexcept { return; } - auto titleBar = _content.TitleBar(); + auto titleBar = Content().TitleBar(); // 获取标题栏的边框矩形 winrt::Rect rect{0.0f, 0.0f, (float)titleBar.ActualWidth(), (float)titleBar.ActualHeight()}; - rect = titleBar.TransformToVisual(_content).TransformBounds(rect); + rect = titleBar.TransformToVisual(Content()).TransformBounds(rect); - const float dpiScale = _currentDpi / float(USER_DEFAULT_SCREEN_DPI); + const float dpiScale = _CurrentDpi() / float(USER_DEFAULT_SCREEN_DPI); // 将标题栏窗口置于 XAML Islands 窗口上方 const int titleBarWidth = (int)std::ceilf(rect.Width * dpiScale); @@ -574,7 +582,7 @@ void MainWindow::_ResizeTitleBarWindow() noexcept { // 设置标题栏窗口的最大化样式,这样才能展示正确的文字提示 LONG_PTR style = GetWindowLongPtr(_hwndTitleBar, GWL_STYLE); SetWindowLongPtr(_hwndTitleBar, GWL_STYLE, - _isMaximized ? style | WS_MAXIMIZE : style & ~WS_MAXIMIZE); + _IsMaximized() ? style | WS_MAXIMIZE : style & ~WS_MAXIMIZE); } } diff --git a/src/Magpie/MainWindow.h b/src/Magpie/MainWindow.h index cd4dc4010..3fa2f7cfe 100644 --- a/src/Magpie/MainWindow.h +++ b/src/Magpie/MainWindow.h @@ -5,7 +5,7 @@ namespace Magpie { class MainWindow : public XamlWindowT { - friend class base_type; + friend base_type; public: bool Create(HINSTANCE hInstance, winrt::Point windowCenter, winrt::Size windowSizeInDips, bool isMaximized) noexcept; @@ -17,7 +17,7 @@ class MainWindow : public XamlWindowT private: std::pair _CreateWindow(HINSTANCE hInstance, winrt::Point windowCenter, winrt::Size windowSizeInDips) noexcept; - void _UpdateTheme(); + void _UpdateTheme() noexcept; static LRESULT CALLBACK _TitleBarWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) noexcept; diff --git a/src/Magpie/TrayIconService.cpp b/src/Magpie/NotifyIconService.cpp similarity index 82% rename from src/Magpie/TrayIconService.cpp rename to src/Magpie/NotifyIconService.cpp index 6e624f985..d5a6fa674 100644 --- a/src/Magpie/TrayIconService.cpp +++ b/src/Magpie/NotifyIconService.cpp @@ -1,5 +1,5 @@ #include "pch.h" -#include "TrayIconService.h" +#include "NotifyIconService.h" #include "CommonSharedConstants.h" #include "Logger.h" #include "resource.h" @@ -9,9 +9,9 @@ namespace Magpie { // 当任务栏被创建时会广播此消息。用于在资源管理器被重新启动时重新创建托盘图标 // https://learn.microsoft.com/en-us/windows/win32/shell/taskbar#taskbar-creation-notification -const UINT TrayIconService::_WM_TASKBARCREATED = RegisterWindowMessage(L"TaskbarCreated"); +const UINT NotifyIconService::_WM_TASKBARCREATED = RegisterWindowMessage(L"TaskbarCreated"); -void TrayIconService::Initialize() noexcept { +void NotifyIconService::Initialize() noexcept { _nid.cbSize = sizeof(_nid); _nid.uVersion = 0; // 不使用 NOTIFYICON_VERSION_4 _nid.uCallbackMessage = CommonSharedConstants::WM_NOTIFY_ICON; @@ -19,7 +19,7 @@ void TrayIconService::Initialize() noexcept { _nid.uID = 0; } -void TrayIconService::Uninitialize() noexcept { +void NotifyIconService::Uninitialize() noexcept { IsShow(false); if (_nid.hWnd) { @@ -30,7 +30,7 @@ void TrayIconService::Uninitialize() noexcept { } } -void TrayIconService::IsShow(bool value) noexcept { +void NotifyIconService::IsShow(bool value) noexcept { _shouldShow = value; if (value) { @@ -41,7 +41,7 @@ void TrayIconService::IsShow(bool value) noexcept { WNDCLASSEXW wcex{}; wcex.cbSize = sizeof(wcex); wcex.hInstance = hInst; - wcex.lpfnWndProc = _TrayIconWndProcStatic; + wcex.lpfnWndProc = _NotifyIconWndProcStatic; wcex.lpszClassName = CommonSharedConstants::NOTIFY_ICON_WINDOW_CLASS_NAME; RegisterClassEx(&wcex); @@ -85,7 +85,7 @@ void TrayIconService::IsShow(bool value) noexcept { } } -LRESULT TrayIconService::_TrayIconWndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) { +LRESULT NotifyIconService::_NotifyIconWndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) { switch (message) { case CommonSharedConstants::WM_NOTIFY_ICON: { @@ -97,9 +97,10 @@ LRESULT TrayIconService::_TrayIconWndProc(HWND hWnd, UINT message, WPARAM wParam } case WM_RBUTTONUP: { - winrt::ResourceLoader resourceLoader = winrt::ResourceLoader::GetForCurrentView(); - winrt::hstring mainWindowText = resourceLoader.GetString(L"TrayIcon_MainWindow"); - winrt::hstring exitText = resourceLoader.GetString(L"TrayIcon_Exit"); + winrt::ResourceLoader resourceLoader = + winrt::ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID); + winrt::hstring mainWindowText = resourceLoader.GetString(L"NotifyIcon_MainWindow"); + winrt::hstring exitText = resourceLoader.GetString(L"NotifyIcon_Exit"); HMENU hMenu = CreatePopupMenu(); AppendMenu(hMenu, MF_STRING, 1, mainWindowText.c_str()); diff --git a/src/Magpie/TrayIconService.h b/src/Magpie/NotifyIconService.h similarity index 55% rename from src/Magpie/TrayIconService.h rename to src/Magpie/NotifyIconService.h index 38bf0f5e2..4b844bf56 100644 --- a/src/Magpie/TrayIconService.h +++ b/src/Magpie/NotifyIconService.h @@ -2,10 +2,10 @@ namespace Magpie { -class TrayIconService { +class NotifyIconService { public: - static TrayIconService& Get() noexcept { - static TrayIconService instance; + static NotifyIconService& Get() noexcept { + static NotifyIconService instance; return instance; } @@ -19,10 +19,10 @@ class TrayIconService { } private: - static LRESULT _TrayIconWndProcStatic(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) { - return Get()._TrayIconWndProc(hWnd, msg, wParam, lParam); + static LRESULT _NotifyIconWndProcStatic(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) { + return Get()._NotifyIconWndProc(hWnd, msg, wParam, lParam); } - LRESULT _TrayIconWndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam); + LRESULT _NotifyIconWndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam); NOTIFYICONDATA _nid{}; bool _isShow = false; diff --git a/src/Magpie/XamlApp.cpp b/src/Magpie/XamlApp.cpp index 238b1b248..f82ecba90 100644 --- a/src/Magpie/XamlApp.cpp +++ b/src/Magpie/XamlApp.cpp @@ -5,7 +5,7 @@ #include "CommonSharedConstants.h" #include #include "ThemeHelper.h" -#include "TrayIconService.h" +#include "NotifyIconService.h" namespace Magpie { @@ -60,17 +60,17 @@ bool XamlApp::Initialize(HINSTANCE hInstance, const wchar_t* arguments) { ThemeHelper::Initialize(); - TrayIconService& trayIconService = TrayIconService::Get(); - trayIconService.Initialize(); - trayIconService.IsShow(_uwpApp.IsShowTrayIcon()); - _uwpApp.IsShowTrayIconChanged([](winrt::IInspectable const&, bool value) { - TrayIconService::Get().IsShow(value); + NotifyIconService& notifyIconService = NotifyIconService::Get(); + notifyIconService.Initialize(); + notifyIconService.IsShow(_uwpApp.IsShowNotifyIcon()); + _uwpApp.IsShowNotifyIconChanged([](winrt::IInspectable const&, bool value) { + NotifyIconService::Get().IsShow(value); }); _mainWindow.Destroyed({ this, &XamlApp::_MainWindow_Destoryed }); // 不显示托盘图标时忽略 -t 参数 - if (!trayIconService.IsShow() || !arguments || arguments != L"-t"sv) { + if (!notifyIconService.IsShow() || !arguments || arguments != L"-t"sv) { if (!_CreateMainWindow()) { Quit(); return false; @@ -129,7 +129,7 @@ void XamlApp::Restart(bool asElevated, const wchar_t* arguments) noexcept { } void XamlApp::SaveSettings() { - if (_mainWindow && TrayIconService::Get().IsShow()) { + if (_mainWindow && NotifyIconService::Get().IsShow()) { WINDOWPLACEMENT wp{}; wp.length = sizeof(wp); if (GetWindowPlacement(_mainWindow.Handle(), &wp)) { @@ -157,7 +157,7 @@ XamlApp::XamlApp() {} XamlApp::~XamlApp() {} -bool XamlApp::_CheckSingleInstance() { +bool XamlApp::_CheckSingleInstance() noexcept { static constexpr const wchar_t* SINGLE_INSTANCE_MUTEX_NAME = L"{4C416227-4A30-4A2F-8F23-8701544DD7D6}"; static constexpr const wchar_t* ELEVATED_MUTEX_NAME = L"{E494C456-F587-4DAF-B68F-366278D31C45}"; @@ -200,7 +200,7 @@ bool XamlApp::_CheckSingleInstance() { return true; } -void XamlApp::_InitializeLogger() { +void XamlApp::_InitializeLogger() noexcept { Logger& logger = Logger::Get(); logger.Initialize( spdlog::level::info, @@ -214,7 +214,7 @@ void XamlApp::_InitializeLogger() { winrt::Magpie::App::LoggerHelper::Initialize((uint64_t)&logger); } -bool XamlApp::_CreateMainWindow() { +bool XamlApp::_CreateMainWindow() noexcept { if (!_mainWindow.Create(_hInst, _mainWindowCenter, _mainWindowSizeInDips, _isMainWndMaximized)) { return false; } @@ -234,7 +234,7 @@ void XamlApp::ShowMainWindow() noexcept { } void XamlApp::_QuitWithoutMainWindow() { - TrayIconService::Get().Uninitialize(); + NotifyIconService::Get().Uninitialize(); _uwpApp.Uninitialize(); // 不能调用 Close,否则切换页面时关闭主窗口会导致崩溃 @@ -250,7 +250,7 @@ void XamlApp::_MainWindow_Destoryed() { _uwpApp.HwndMain(0); _uwpApp.RootPage(nullptr); - if (!TrayIconService::Get().IsShow()) { + if (!NotifyIconService::Get().IsShow()) { _QuitWithoutMainWindow(); } } diff --git a/src/Magpie/XamlApp.h b/src/Magpie/XamlApp.h index ce17690d6..696dc94a6 100644 --- a/src/Magpie/XamlApp.h +++ b/src/Magpie/XamlApp.h @@ -31,11 +31,11 @@ class XamlApp { XamlApp(); ~XamlApp(); - bool _CheckSingleInstance(); + bool _CheckSingleInstance() noexcept; - void _InitializeLogger(); + void _InitializeLogger() noexcept; - bool _CreateMainWindow(); + bool _CreateMainWindow() noexcept; void _QuitWithoutMainWindow(); diff --git a/src/Magpie/XamlIslands.targets b/src/Magpie/XamlIslands.targets deleted file mode 100644 index e44305246..000000000 --- a/src/Magpie/XamlIslands.targets +++ /dev/null @@ -1,153 +0,0 @@ - - - - - - - <_DisableAppxCopy>true - - - - $(MSBuildProgramFiles32)\Windows Kits\10\bin\$(_TargetPlatformVersion)\x64\mt.exe - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -"; - var sb = new StringBuilder(); - sb.Append(headerF); - - if (!string.IsNullOrEmpty(AppxManifest)) - { - XmlDocument doc = new XmlDocument(); - doc.Load(AppxManifest); - var nsmgr = new XmlNamespaceManager(doc.NameTable); - nsmgr.AddNamespace("m", "http://schemas.microsoft.com/appx/manifest/foundation/windows10"); - var xQuery = "./m:Package/m:Extensions/m:Extension/m:InProcessServer"; - - foreach (XmlNode winRTFactory in doc.SelectNodes(xQuery, nsmgr)) - { - var dllPathNode = winRTFactory.SelectSingleNode("./m:Path", nsmgr); - var dllPath = dllPathNode.InnerText; - var typesNames = winRTFactory.SelectNodes("./m:ActivatableClass", nsmgr).OfType(); - var xmlHeader = String.Format(@" ", dllPath); - sb.Append(xmlHeader); - foreach (var typeNode in typesNames) - { - var attribs = typeNode.Attributes.OfType().ToArray(); - var typeName = attribs - .OfType() - .SingleOrDefault(x => x.Name == "ActivatableClassId") - .InnerText; - var xmlEntry = String.Format(@" - ", typeName); - sb.Append(xmlEntry); - } - var xmlFooter = @" - -"; - sb.Append(xmlFooter); - } - } - var xmlFooterF = @""; - sb.Append(xmlFooterF); - var manifestContent = sb.ToString(); - var outFileName = Path.Combine(DestinationFolder, "app.manifest"); - File.WriteAllText(outFileName, manifestContent, Encoding.UTF8); -]]> - - - - - - - - - - - - - - - - - - - diff --git a/src/Magpie/XamlWindow.h b/src/Magpie/XamlWindow.h index 9d3f7eb1c..d1db58fc9 100644 --- a/src/Magpie/XamlWindow.h +++ b/src/Magpie/XamlWindow.h @@ -5,6 +5,7 @@ #include "Win32Utils.h" #include "ThemeHelper.h" #include "CommonSharedConstants.h" +#include "Logger.h" #pragma comment(lib, "uxtheme.lib") @@ -19,10 +20,6 @@ class XamlWindowT { } } - operator bool() const noexcept { - return _hWnd; - } - void HandleMessage(const MSG& msg) { // XAML Islands 会吞掉 Alt+F4,需要特殊处理 // https://github.com/microsoft/microsoft-ui-xaml/issues/2408 @@ -47,6 +44,10 @@ class XamlWindowT { return _hWnd; } + operator bool() const noexcept { + return _hWnd; + } + const C& Content() const noexcept { return _content; } @@ -75,7 +76,7 @@ class XamlWindowT { return DefWindowProc(hWnd, msg, wParam, lParam); } - void _SetContent(C const& content) { + void _Content(C const& content) { _content = content; // 初始化 XAML Islands @@ -99,6 +100,20 @@ class XamlWindowT { }); } + uint32_t _CurrentDpi() const noexcept { + return _currentDpi; + } + + bool _IsMaximized() const noexcept { + return _isMaximized; + } + + // 窗口尚未显示无法最大化,通过这个方法设置 _isMaximized 使 XamlWindow 估计 XAML Islands 窗口尺寸。 + // 否则在显示窗口时可能会看到 NavigationView 的导航栏的展开动画。 + void _SetInitialMaximized() noexcept { + _isMaximized = true; + } + void _SetTheme(bool isDarkTheme) noexcept { _isDarkTheme = isDarkTheme; @@ -138,9 +153,7 @@ class XamlWindowT { switch (msg) { case WM_CREATE: { - _currentDpi = GetDpiForWindow(_hWnd); - - _UpdateFrameMargins(); + _UpdateDpi(GetDpiForWindow(_hWnd)); if (!Win32Utils::GetOSVersion().IsWin11()) { // 初始化双缓冲绘图 @@ -148,6 +161,8 @@ class XamlWindowT { BufferedPaintInit(); return 0; }(); + + _UpdateFrameMargins(); } break; @@ -161,6 +176,8 @@ class XamlWindowT { return 0; } + _isWindowShown = IsWindowVisible(_hWnd); + NCCALCSIZE_PARAMS* params = (NCCALCSIZE_PARAMS*)lParam; RECT& clientRect = params->rgrc[0]; @@ -221,6 +238,9 @@ class XamlWindowT { } } + // 如果在 WM_SIZE 中处理会导致窗口闪烁 + _UpdateFrameMargins(); + return 0; } case WM_NCHITTEST: @@ -233,7 +253,6 @@ class XamlWindowT { // XAML Islands 和它上面的标题栏窗口都会吞掉鼠标事件,因此能到达这里的唯一机会 // 是上边框。保险起见做一些额外检查。 - if (!_isMaximized) { RECT rcWindow; GetWindowRect(_hWnd, &rcWindow); @@ -303,15 +322,6 @@ class XamlWindowT { EndPaint(_hWnd, &ps); return 0; } - case WM_SHOWWINDOW: - { - if (wParam == TRUE) { - // 将焦点置于 XAML Islands 窗口可以修复按 Alt 键会导致 UI 无法交互的问题 - SetFocus(_hwndXamlIsland); - } - - break; - } case WM_KEYDOWN: { if (wParam == VK_TAB) { @@ -327,7 +337,7 @@ class XamlWindowT { } case WM_DPICHANGED: { - _currentDpi = HIWORD(wParam); + _UpdateDpi(HIWORD(wParam)); RECT* newRect = (RECT*)lParam; SetWindowPos(_hWnd, @@ -410,8 +420,6 @@ class XamlWindowT { } } - _UpdateFrameMargins(); - return 0; } case WM_DESTROY: @@ -441,26 +449,16 @@ class XamlWindowT { } uint32_t _GetTopBorderHeight() const noexcept { - static constexpr uint32_t TOP_BORDER_HEIGHT = 1; - - // Win11 或最大化时没有上边框 - return (Win32Utils::GetOSVersion().IsWin11() || _isMaximized) ? 0 : TOP_BORDER_HEIGHT; + // 最大化时没有上边框 + return _isMaximized ? 0 : _nativeTopBorderHeight; } - int _GetResizeHandleHeight() noexcept { + int _GetResizeHandleHeight() const noexcept { // 没有 SM_CYPADDEDBORDER return GetSystemMetricsForDpi(SM_CXPADDEDBORDER, _currentDpi) + GetSystemMetricsForDpi(SM_CYSIZEFRAME, _currentDpi); } - HWND _hWnd = NULL; - C _content{ nullptr }; - - uint32_t _currentDpi = USER_DEFAULT_SCREEN_DPI; - bool _isMaximized = false; - bool _isWindowShown = false; - bool _isDarkTheme = false; - private: void _UpdateIslandPosition(int width, int height) const noexcept { if (!IsWindowVisible(_hWnd) && _isMaximized) { @@ -478,7 +476,9 @@ class XamlWindowT { } } - int topBorderHeight = _GetTopBorderHeight(); + // Win10 中上边框被涂黑来显示系统原始边框,Win11 中 DWM 绘制的上边框也位于客户区内, + // 很可能是为了和 Win10 兼容。XAML Islands 不应该和上边框重叠。 + const int topBorderHeight = (int)_GetTopBorderHeight(); // SWP_NOZORDER 确保 XAML Islands 窗口始终在标题栏窗口下方,否则主窗口在调整大小时会闪烁 SetWindowPos( @@ -525,11 +525,38 @@ class XamlWindowT { DwmExtendFrameIntoClientArea(_hWnd, &margins); } + void _UpdateDpi(uint32_t dpi) noexcept { + _currentDpi = dpi; + + // Win10 中窗口边框始终只有一个像素宽,Win11 中的窗口边框宽度和 DPI 缩放有关 + if (Win32Utils::GetOSVersion().IsWin11()) { + HRESULT hr = DwmGetWindowAttribute( + _hWnd, + DWMWA_VISIBLE_FRAME_BORDER_THICKNESS, + &_nativeTopBorderHeight, + sizeof(_nativeTopBorderHeight) + ); + if (FAILED(hr)) { + Logger::Get().ComError("DwmGetWindowAttribute 失败", hr); + } + } + } + winrt::event> _destroyedEvent; + HWND _hWnd = NULL; HWND _hwndXamlIsland = NULL; winrt::DesktopWindowXamlSource _xamlSource{ nullptr }; winrt::com_ptr _xamlSourceNative2; + + C _content{ nullptr }; + + uint32_t _currentDpi = USER_DEFAULT_SCREEN_DPI; + uint32_t _nativeTopBorderHeight = 1; + + bool _isDarkTheme = false; + bool _isWindowShown = false; + bool _isMaximized = false; }; } diff --git a/src/Magpie/app.manifest b/src/Magpie/app.manifest index c87342ced..c52eb2ce4 100644 --- a/src/Magpie/app.manifest +++ b/src/Magpie/app.manifest @@ -1,16 +1,6 @@ - - - - - - @@ -26,8 +16,8 @@ - - + + true @@ -36,7 +26,7 @@ SegmentHeap - + diff --git a/src/Magpie/main.cpp b/src/Magpie/main.cpp index 782445414..3ddcbf85b 100644 --- a/src/Magpie/main.cpp +++ b/src/Magpie/main.cpp @@ -34,6 +34,24 @@ static void SetCurDir() noexcept { SetCurrentDirectory(curDir); } +static void IncreaseTimerResolution() noexcept { + // 我们需要尽可能高的时钟分辨率来提高渲染帧率。 + // 通常 Magpie 被 OS 认为是后台进程,下面的调用避免 OS 自动降低时钟分辨率。 + // 见 https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-setprocessinformation + PROCESS_POWER_THROTTLING_STATE powerThrottling{ + .Version = PROCESS_POWER_THROTTLING_CURRENT_VERSION, + .ControlMask = PROCESS_POWER_THROTTLING_EXECUTION_SPEED | + PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION, + .StateMask = 0 + }; + SetProcessInformation( + GetCurrentProcess(), + ProcessPowerThrottling, + &powerThrottling, + sizeof(powerThrottling) + ); +} + int APIENTRY wWinMain( _In_ HINSTANCE hInstance, _In_opt_ HINSTANCE /*hPrevInstance*/, @@ -47,6 +65,9 @@ int APIENTRY wWinMain( // 堆损坏时终止进程 HeapSetInformation(NULL, HeapEnableTerminationOnCorruption, nullptr, 0); + // 提高时钟分辨率 + IncreaseTimerResolution(); + // 程序结束时也不应调用 uninit_apartment // 见 https://kennykerr.ca/2018/03/24/cppwinrt-hosting-the-windows-runtime/ winrt::init_apartment(winrt::apartment_type::single_threaded); diff --git a/src/Magpie/packages.config b/src/Magpie/packages.config index b7b699331..dfebda821 100644 --- a/src/Magpie/packages.config +++ b/src/Magpie/packages.config @@ -1,6 +1,6 @@  - + - \ No newline at end of file + diff --git a/src/Shared/CommonSharedConstants.h b/src/Shared/CommonSharedConstants.h index cc6696d10..24b950cae 100644 --- a/src/Shared/CommonSharedConstants.h +++ b/src/Shared/CommonSharedConstants.h @@ -5,6 +5,8 @@ struct CommonSharedConstants { static constexpr const wchar_t* TITLE_BAR_WINDOW_CLASS_NAME = L"Magpie_TitleBar"; static constexpr const wchar_t* NOTIFY_ICON_WINDOW_CLASS_NAME = L"Magpie_NotifyIcon"; static constexpr const wchar_t* HOTKEY_WINDOW_CLASS_NAME = L"Magpie_Hotkey"; + static constexpr const wchar_t* SCALING_WINDOW_CLASS_NAME = L"Window_Magpie_967EB565-6F73-4E94-AE53-00CC42592A22"; + static constexpr const wchar_t* DDF_WINDOW_CLASS_NAME = L"Window_Magpie_C322D752-C866-4630-91F5-32CB242A8930"; static constexpr const COLORREF LIGHT_TINT_COLOR = RGB(243, 243, 243); static constexpr const COLORREF DARK_TINT_COLOR = RGB(32, 32, 32); @@ -18,12 +20,12 @@ struct CommonSharedConstants { static constexpr const wchar_t* CACHE_DIR = L"cache\\"; static constexpr const wchar_t* UPDATE_DIR = L"update\\"; - static constexpr const wchar_t* OPTION_MINIMIZE_TO_TRAY_AT_STARTUP = L"-t"; + static constexpr const wchar_t* OPTION_LAUNCH_WITHOUT_WINDOW = L"-t"; #ifndef IDI_APP // 来自 Magpie\resource.h static constexpr const UINT IDI_APP = 101; -#endif // !IDI_APP +#endif static constexpr const UINT WM_NOTIFY_ICON = WM_USER; static constexpr const UINT WM_QUIT_MAGPIE = WM_USER + 1; @@ -31,4 +33,6 @@ struct CommonSharedConstants { static constexpr const wchar_t* WM_MAGPIE_SHOWME = L"WM_MAGPIE_SHOWME"; static constexpr const wchar_t* WM_MAGPIE_QUIT = L"WM_MAGPIE_QUIT"; + + static constexpr const wchar_t* APP_RESOURCE_MAP_ID = L"Magpie.App/Resources"; }; diff --git a/src/Shared/Logger.cpp b/src/Shared/Logger.cpp index d702850a7..355dc2348 100644 --- a/src/Shared/Logger.cpp +++ b/src/Shared/Logger.cpp @@ -4,12 +4,16 @@ #include #include - -bool Logger::Initialize(spdlog::level::level_enum logLevel, const char* logFileName, int logArchiveAboveSize, int logMaxArchiveFiles) noexcept { +bool Logger::Initialize( + spdlog::level::level_enum logLevel, + const char* logFileName, + int logArchiveAboveSize, + int logMaxArchiveFiles +) noexcept { try { _logger = spdlog::rotating_logger_mt(".", logFileName, logArchiveAboveSize, logMaxArchiveFiles); _logger->set_level(logLevel); - _logger->set_pattern("%Y-%m-%d %H:%M:%S.%e|%l|%s:%!|%v"); + _logger->set_pattern("%Y-%m-%d %H:%M:%S.%e|%l|%s:%#|%!|%v"); _logger->flush_on(spdlog::level::warn); spdlog::flush_every(5s); } catch (const spdlog::spdlog_ex&) { @@ -44,7 +48,7 @@ std::string Logger::_MakeComErrorMsg(std::string_view msg, HRESULT hr) { return fmt::sprintf("%s\n\tHRESULT:0x%X", msg, hr); } -void Logger::_Log(spdlog::level::level_enum logLevel, std::string_view msg, const std::source_location& location) { +void Logger::_Log(spdlog::level::level_enum logLevel, std::string_view msg, const SourceLocation& location) { assert(!msg.empty()); if (logLevel >= spdlog::level::warn) { @@ -57,7 +61,7 @@ void Logger::_Log(spdlog::level::level_enum logLevel, std::string_view msg, cons } _logger->log( - spdlog::source_loc{ location.file_name(), (int)location.line(), location.function_name() }, + spdlog::source_loc{ location.FileName(), (int)location.Line(), location.FunctionName() }, logLevel, msg ); diff --git a/src/Shared/Logger.h b/src/Shared/Logger.h index 313654ec6..33d92da1e 100644 --- a/src/Shared/Logger.h +++ b/src/Shared/Logger.h @@ -1,7 +1,44 @@ #pragma once -#include #include +// std::source_location 中的函数名包含整个签名过于冗长,我们只需记录函数名, +// 因此创建自己的 SourceLocation +struct SourceLocation { + [[nodiscard]] static consteval SourceLocation current( + std::uint_least32_t line = __builtin_LINE(), + const char* file = __builtin_FILE(), + const char* function = __builtin_FUNCTION() + ) noexcept { + return SourceLocation{ line, file, function }; + } + + [[nodiscard]] constexpr SourceLocation() noexcept = default; + + [[nodiscard]] constexpr SourceLocation( + const std::uint_least32_t line, + const char* file, + const char* function + ) noexcept : _line(line), _file(file), _function(function) { + } + + [[nodiscard]] constexpr std::uint_least32_t Line() const noexcept { + return _line; + } + + [[nodiscard]] constexpr const char* FileName() const noexcept { + return _file; + } + + constexpr const char* FunctionName() const noexcept { + return _function; + } + +private: + const std::uint_least32_t _line = 0; + const char* _file = nullptr; + const char* _function = nullptr; +}; + class Logger { public: static Logger& Get() noexcept { @@ -20,51 +57,51 @@ class Logger { _logger->flush(); } - void Info(std::string_view msg, const std::source_location& location = std::source_location::current()) { + void Info(std::string_view msg, const SourceLocation& location = SourceLocation::current()) { _Log(spdlog::level::info, msg, location); } - void Win32Info(std::string_view msg, const std::source_location& location = std::source_location::current()) { + void Win32Info(std::string_view msg, const SourceLocation& location = SourceLocation::current()) { _Log(spdlog::level::info, _MakeWin32ErrorMsg(msg), location); } - void ComInfo(std::string_view msg, HRESULT hr, const std::source_location& location = std::source_location::current()) { + void ComInfo(std::string_view msg, HRESULT hr, const SourceLocation& location = SourceLocation::current()) { _Log(spdlog::level::info, _MakeComErrorMsg(msg, hr), location); } - void Warn(std::string_view msg, const std::source_location& location = std::source_location::current()) { + void Warn(std::string_view msg, const SourceLocation& location = SourceLocation::current()) { _Log(spdlog::level::warn, msg, location); } - void Win32Warn(std::string_view msg, const std::source_location& location = std::source_location::current()) { + void Win32Warn(std::string_view msg, const SourceLocation& location = SourceLocation::current()) { _Log(spdlog::level::warn, _MakeWin32ErrorMsg(msg), location); } - void ComWarn(std::string_view msg, HRESULT hr, const std::source_location& location = std::source_location::current()) { + void ComWarn(std::string_view msg, HRESULT hr, const SourceLocation& location = SourceLocation::current()) { _Log(spdlog::level::warn, _MakeComErrorMsg(msg, hr), location); } - void Error(std::string_view msg, const std::source_location& location = std::source_location::current()) { + void Error(std::string_view msg, const SourceLocation& location = SourceLocation::current()) { _Log(spdlog::level::err, msg, location); } - void Win32Error(std::string_view msg, const std::source_location& location = std::source_location::current()) { + void Win32Error(std::string_view msg, const SourceLocation& location = SourceLocation::current()) { _Log(spdlog::level::err, _MakeWin32ErrorMsg(msg), location); } - void ComError(std::string_view msg, HRESULT hr, const std::source_location& location = std::source_location::current()) { + void ComError(std::string_view msg, HRESULT hr, const SourceLocation& location = SourceLocation::current()) { _Log(spdlog::level::err, _MakeComErrorMsg(msg, hr), location); } - void Critical(std::string_view msg, const std::source_location& location = std::source_location::current()) { + void Critical(std::string_view msg, const SourceLocation& location = SourceLocation::current()) { _Log(spdlog::level::critical, msg, location); } - void Win32Critical(std::string_view msg, const std::source_location& location = std::source_location::current()) { + void Win32Critical(std::string_view msg, const SourceLocation& location = SourceLocation::current()) { _Log(spdlog::level::critical, _MakeWin32ErrorMsg(msg), location); } - void ComCritical(std::string_view msg, HRESULT hr, const std::source_location& location = std::source_location::current()) { + void ComCritical(std::string_view msg, HRESULT hr, const SourceLocation& location = SourceLocation::current()) { _Log(spdlog::level::critical, _MakeComErrorMsg(msg, hr), location); } @@ -73,7 +110,7 @@ class Logger { static std::string _MakeComErrorMsg(std::string_view msg, HRESULT hr); - void _Log(spdlog::level::level_enum logLevel, std::string_view msg, const std::source_location& location); + void _Log(spdlog::level::level_enum logLevel, std::string_view msg, const SourceLocation& location); std::shared_ptr _logger; }; diff --git a/src/Shared/Win32Utils.cpp b/src/Shared/Win32Utils.cpp index 6448c978b..34a8df2e2 100644 --- a/src/Shared/Win32Utils.cpp +++ b/src/Shared/Win32Utils.cpp @@ -8,7 +8,7 @@ #include #include -std::wstring Win32Utils::GetWndClassName(HWND hWnd) { +std::wstring Win32Utils::GetWndClassName(HWND hWnd) noexcept { // 窗口类名最多 256 个字符 std::wstring className(256, 0); int num = GetClassName(hWnd, &className[0], (int)className.size() + 1); @@ -21,7 +21,7 @@ std::wstring Win32Utils::GetWndClassName(HWND hWnd) { return className; } -std::wstring Win32Utils::GetWndTitle(HWND hWnd) { +std::wstring Win32Utils::GetWndTitle(HWND hWnd) noexcept { int len = GetWindowTextLength(hWnd); if (len == 0) { return {}; @@ -33,7 +33,7 @@ std::wstring Win32Utils::GetWndTitle(HWND hWnd) { return title; } -std::wstring Win32Utils::GetPathOfWnd(HWND hWnd) { +std::wstring Win32Utils::GetPathOfWnd(HWND hWnd) noexcept { ScopedHandle hProc; DWORD dwProcId = 0; @@ -73,7 +73,7 @@ std::wstring Win32Utils::GetPathOfWnd(HWND hWnd) { return fileName; } -UINT Win32Utils::GetWindowShowCmd(HWND hWnd) { +UINT Win32Utils::GetWindowShowCmd(HWND hWnd) noexcept { assert(hWnd != NULL); WINDOWPLACEMENT wp{}; @@ -85,7 +85,7 @@ UINT Win32Utils::GetWindowShowCmd(HWND hWnd) { return wp.showCmd; } -bool Win32Utils::GetClientScreenRect(HWND hWnd, RECT& rect) { +bool Win32Utils::GetClientScreenRect(HWND hWnd, RECT& rect) noexcept { if (!GetClientRect(hWnd, &rect)) { Logger::Get().Win32Error("GetClientRect 出错"); return false; @@ -105,18 +105,60 @@ bool Win32Utils::GetClientScreenRect(HWND hWnd, RECT& rect) { return true; } -bool Win32Utils::GetWindowFrameRect(HWND hWnd, RECT& result) { +bool Win32Utils::GetWindowFrameRect(HWND hWnd, RECT& rect) noexcept { HRESULT hr = DwmGetWindowAttribute(hWnd, - DWMWA_EXTENDED_FRAME_BOUNDS, &result, sizeof(result)); + DWMWA_EXTENDED_FRAME_BOUNDS, &rect, sizeof(rect)); if (FAILED(hr)) { + Logger::Get().ComError("DwmGetWindowAttribute 失败", hr); return false; } + // Win11 中最大化的窗口的 extended frame bounds 有一部分在屏幕外面, + // 不清楚 Win10 是否有这种情况 + if (GetWindowShowCmd(hWnd) == SW_SHOWMAXIMIZED) { + HMONITOR hMon = MonitorFromWindow(hWnd, MONITOR_DEFAULTTONEAREST); + MONITORINFO mi{ .cbSize = sizeof(mi) }; + if (!GetMonitorInfo(hMon, &mi)) { + Logger::Get().Win32Error("GetMonitorInfo 失败"); + return false; + } + + IntersectRect(&rect, &rect, &mi.rcWork); + } + + // 对于使用 SetWindowRgn 自定义形状的窗口,裁剪到最小矩形边框 + RECT rgnRect; + int regionType = GetWindowRgnBox(hWnd, &rgnRect); + if (regionType == SIMPLEREGION || regionType == COMPLEXREGION) { + RECT windowRect; + if (!GetWindowRect(hWnd, &windowRect)) { + Logger::Get().Win32Error("GetWindowRect 失败"); + return false; + } + + // 转换为屏幕坐标 + OffsetRect(&rgnRect, windowRect.left, windowRect.top); + + IntersectRect(&rect, &rect, &rgnRect); + } + return true; } +bool Win32Utils::IsWindowVisible(HWND hWnd) noexcept { + // 检查窗口是否可见应查看整个所有者链 + do { + if (!::IsWindowVisible(hWnd)) { + return false; + } + + hWnd = GetWindowOwner(hWnd); + } while (hWnd); -bool Win32Utils::ReadFile(const wchar_t* fileName, std::vector& result) { + return true; +} + +bool Win32Utils::ReadFile(const wchar_t* fileName, std::vector& result) noexcept { Logger::Get().Info(StrUtils::Concat("读取文件:", StrUtils::UTF16ToUTF8(fileName))); CREATEFILE2_EXTENDED_PARAMETERS extendedParams = {}; @@ -146,7 +188,7 @@ bool Win32Utils::ReadFile(const wchar_t* fileName, std::vector& result) { return true; } -bool Win32Utils::ReadTextFile(const wchar_t* fileName, std::string& result) { +bool Win32Utils::ReadTextFile(const wchar_t* fileName, std::string& result) noexcept { FILE* hFile; if (_wfopen_s(&hFile, fileName, L"rt") || !hFile) { Logger::Get().Error(StrUtils::Concat("打开文件 ", StrUtils::UTF16ToUTF8(fileName), " 失败")); @@ -167,7 +209,7 @@ bool Win32Utils::ReadTextFile(const wchar_t* fileName, std::string& result) { return true; } -bool Win32Utils::WriteFile(const wchar_t* fileName, const void* buffer, size_t bufferSize) { +bool Win32Utils::WriteFile(const wchar_t* fileName, const void* buffer, size_t bufferSize) noexcept { FILE* hFile; if (_wfopen_s(&hFile, fileName, L"wb") || !hFile) { Logger::Get().Error(StrUtils::Concat("打开文件 ", StrUtils::UTF16ToUTF8(fileName), " 失败")); @@ -183,7 +225,7 @@ bool Win32Utils::WriteFile(const wchar_t* fileName, const void* buffer, size_t b return true; } -bool Win32Utils::WriteTextFile(const wchar_t* fileName, std::string_view text) { +bool Win32Utils::WriteTextFile(const wchar_t* fileName, std::string_view text) noexcept { FILE* hFile; if (_wfopen_s(&hFile, fileName, L"wt") || !hFile) { Logger::Get().Error(StrUtils::Concat("打开文件 ", StrUtils::UTF16ToUTF8(fileName), " 失败")); @@ -196,7 +238,7 @@ bool Win32Utils::WriteTextFile(const wchar_t* fileName, std::string_view text) { return true; } -bool Win32Utils::CreateDir(const std::wstring& path, bool recursive) { +bool Win32Utils::CreateDir(const std::wstring& path, bool recursive) noexcept { if (DirExists(path.c_str())) { return true; } @@ -271,7 +313,7 @@ static void CALLBACK TPCallback(PTP_CALLBACK_INSTANCE, PVOID context, PTP_WORK) #pragma warning(pop) -void Win32Utils::RunParallel(std::function func, uint32_t times) { +void Win32Utils::RunParallel(std::function func, uint32_t times) noexcept { #ifdef _DEBUG // 为了便于调试,DEBUG 模式下不使用线程池 for (UINT i = 0; i < times; ++i) { @@ -309,7 +351,7 @@ void Win32Utils::RunParallel(std::function func, uint32_t times) #endif // _DEBUG } -bool Win32Utils::SetForegroundWindow(HWND hWnd) { +bool Win32Utils::SetForegroundWindow(HWND hWnd) noexcept { if (::SetForegroundWindow(hWnd)) { return true; } @@ -527,6 +569,31 @@ bool Win32Utils::IsProcessElevated() noexcept { return bool(result == 1); } +// 获取进程的完整性级别 +// https://devblogs.microsoft.com/oldnewthing/20221017-00/?p=107291 +bool Win32Utils::GetProcessIntegrityLevel(HANDLE hQueryToken, DWORD& integrityLevel) noexcept { + if (!hQueryToken) { + hQueryToken = GetCurrentProcessToken(); + } + + DWORD infoSize = 0; + GetTokenInformation(hQueryToken, TokenIntegrityLevel, nullptr, 0, &infoSize); + if (infoSize == 0) { + Logger::Get().Win32Error("GetTokenInformation 失败"); + return false; + } + + std::unique_ptr infoBuffer = std::make_unique(infoSize); + if (!GetTokenInformation(hQueryToken, TokenIntegrityLevel, infoBuffer.get(), infoSize, &infoSize)) { + Logger::Get().Win32Error("GetTokenInformation 失败"); + return false; + } + + PSID sid = ((TOKEN_MANDATORY_LABEL*)infoBuffer.get())->Label.Sid; + integrityLevel = *GetSidSubAuthority(sid, *GetSidSubAuthorityCount(sid) - 1); + return true; +} + static winrt::com_ptr FindDesktopFolderView() { winrt::com_ptr shellWindows = winrt::try_create_instance(CLSID_ShellWindows, CLSCTX_LOCAL_SERVER); diff --git a/src/Shared/Win32Utils.h b/src/Shared/Win32Utils.h index e994ea48c..b16637827 100644 --- a/src/Shared/Win32Utils.h +++ b/src/Shared/Win32Utils.h @@ -10,25 +10,27 @@ struct Win32Utils { return r1.right > r2.left && r1.bottom > r2.top && r1.left < r2.right&& r1.top < r2.bottom; } - static std::wstring GetWndClassName(HWND hWnd); + static std::wstring GetWndClassName(HWND hWnd) noexcept; - static std::wstring GetWndTitle(HWND hWnd); + static std::wstring GetWndTitle(HWND hWnd) noexcept; - static std::wstring GetPathOfWnd(HWND hWnd); + static std::wstring GetPathOfWnd(HWND hWnd) noexcept; - static UINT GetWindowShowCmd(HWND hWnd); + static UINT GetWindowShowCmd(HWND hWnd) noexcept; - static bool GetClientScreenRect(HWND hWnd, RECT& rect); + static bool GetClientScreenRect(HWND hWnd, RECT& rect) noexcept; - static bool GetWindowFrameRect(HWND hWnd, RECT& result); + static bool GetWindowFrameRect(HWND hWnd, RECT& rect) noexcept; - static bool ReadFile(const wchar_t* fileName, std::vector& result); + static bool IsWindowVisible(HWND hWnd) noexcept; - static bool ReadTextFile(const wchar_t* fileName, std::string& result); + static bool ReadFile(const wchar_t* fileName, std::vector& result) noexcept; - static bool WriteFile(const wchar_t* fileName, const void* buffer, size_t bufferSize); + static bool ReadTextFile(const wchar_t* fileName, std::string& result) noexcept; - static bool WriteTextFile(const wchar_t* fileName, std::string_view text); + static bool WriteFile(const wchar_t* fileName, const void* buffer, size_t bufferSize) noexcept; + + static bool WriteTextFile(const wchar_t* fileName, std::string_view text) noexcept; static bool FileExists(const wchar_t* fileName) noexcept { DWORD attrs = GetFileAttributes(fileName); @@ -41,7 +43,7 @@ struct Win32Utils { return (attrs != INVALID_FILE_ATTRIBUTES) && (attrs & FILE_ATTRIBUTE_DIRECTORY); } - static bool CreateDir(const std::wstring& path, bool recursive = false); + static bool CreateDir(const std::wstring& path, bool recursive = false) noexcept; struct OSVersion : Version { constexpr OSVersion() {} @@ -147,16 +149,18 @@ struct Win32Utils { // 并行执行 times 次 func,并行失败时回退到单线程 // 执行完毕后返回 - static void RunParallel(std::function func, uint32_t times); + static void RunParallel(std::function func, uint32_t times) noexcept; // 强制切换前台窗口 - static bool SetForegroundWindow(HWND hWnd); + static bool SetForegroundWindow(HWND hWnd) noexcept; // 获取 Virtual Key 的名字 static const std::wstring& GetKeyName(uint8_t key); static bool IsProcessElevated() noexcept; + static bool GetProcessIntegrityLevel(HANDLE hQueryToken, DWORD& integrityLevel) noexcept; + // VARIANT 封装,自动管理生命周期 struct Variant : public VARIANT { Variant() noexcept { @@ -258,3 +262,11 @@ struct Win32Utils { // 不应在主线程调用 static bool OpenFolderAndSelectFile(const wchar_t* fileName); }; + +constexpr bool operator==(const SIZE& l, const SIZE& r) noexcept { + return l.cx == r.cx && l.cy == r.cy; +} + +constexpr bool operator==(const POINT& l, const POINT& r) noexcept { + return l.x == r.x && l.y == r.y; +} diff --git a/src/Shared/XamlUtils.cpp b/src/Shared/XamlUtils.cpp index 588669abb..180e4dd9f 100644 --- a/src/Shared/XamlUtils.cpp +++ b/src/Shared/XamlUtils.cpp @@ -13,7 +13,7 @@ using namespace Windows::UI::Xaml::Media; static bool IsComboBoxPopup(const Primitives::Popup& popup) { UIElement child = popup.Child(); - if (get_class_name(child) != name_of()) { + if (!child.try_as()) { return false; } @@ -27,7 +27,7 @@ static bool IsComboBoxPopup(const Primitives::Popup& popup) { for (int i = 0; i < count; ++i) { DependencyObject current = VisualTreeHelper::GetChild(elem, i); - if (get_class_name(current) == name_of()) { + if (current.try_as()) { return true; } diff --git a/src/WinUI.props b/src/WinUI.props index d801de33e..b5f3dd9d4 100644 --- a/src/WinUI.props +++ b/src/WinUI.props @@ -1,26 +1,37 @@ - - + - - + - - + + + + + + + + + + + $(SolutionDir)obj\$(Platform)\WinUI\Microsoft.UI.Xaml.dll + - - - - false - Microsoft.UI.Xaml.dll - + + + <_ReferenceRelatedPaths Remove="@(WinUIPriReference)" /> + + <_ReferenceRelatedPaths Include="$(SolutionDir)obj\$(Platform)\WinUI\Microsoft.UI.Xaml.pri"> + %(WinUIPriReference.CopyLocal) + %(WinUIPriReference.CopyLocalSatelliteAssemblies) + %(WinUIPriReference.Implicit) + %(WinUIPriReference.OriginalItemSpec) + %(WinUIPriReference.Private) + %(WinUIPriReference.ReferenceOutputAssembly) + %(WinUIPriReference.ResolvedFrom) + %(WinUIPriReference.Version) + - - - - - diff --git a/src/_ConanDeps/build_conan_deps.py b/src/_ConanDeps/build_conan_deps.py index 3c2b324e6..209c1657d 100644 --- a/src/_ConanDeps/build_conan_deps.py +++ b/src/_ConanDeps/build_conan_deps.py @@ -45,7 +45,7 @@ # HybridCRT 要求静态链接 CRT p = subprocess.run( - f"conan install {conanfilePath} -pr:b=conanprofile.txt -pr:h=conanprofile.txt --output-folder ..\\..\\.conan\\{project} --build=missing -s build_type={configuration} -s arch={build_type} --update" + f"conan install {conanfilePath} -pr:a=conanprofile.txt --output-folder ..\\..\\.conan\\{project} --build=missing -s build_type={configuration} -s arch={build_type} --update" ) if p.returncode != 0: raise Exception("conan install 失败") diff --git a/src/extract_winui_runtime.py b/src/extract_winui_runtime.py index 5970d57c4..4edfef2bf 100644 --- a/src/extract_winui_runtime.py +++ b/src/extract_winui_runtime.py @@ -25,16 +25,25 @@ os.makedirs(intDir, exist_ok=True) os.chdir(intDir) - needExtract = True + def needExtract(): + try: + with open("version.txt") as f: + if f.read() != winuiPkg: + return True - try: - with open("version.txt") as f: - if f.read() == winuiPkg: - needExtract = False - except: - pass + for path in [ + "Microsoft.UI.Xaml.dll", + "Microsoft.UI.Xaml.pri", + "Microsoft.UI.Xaml", + ]: + if not os.access(path, os.F_OK): + return True + except: + return True - if needExtract: + return False + + if needExtract(): with zipfile.ZipFile( # 取最新的包 max( @@ -49,8 +58,14 @@ for file in appx.namelist(): if file.startswith("Microsoft.UI.Xaml/Assets"): members.append(file) - appx.extractall(members=members) + # 将 resources.pri 重命名为 Microsoft.UI.Xaml.pri + try: + os.remove("Microsoft.UI.Xaml.pri") + except: + pass + os.rename("resources.pri", "Microsoft.UI.Xaml.pri") + with open("version.txt", mode="w") as f: f.write(winuiPkg) diff --git a/src/fix_resfiles.py b/src/fix_resfiles.py deleted file mode 100644 index 0cc0ae5d2..000000000 --- a/src/fix_resfiles.py +++ /dev/null @@ -1,15 +0,0 @@ -import sys -import os - -if len(sys.argv) != 2: - raise Exception("请勿直接运行此脚本") - -with open(sys.argv[1], "r+") as f: - lines = [] - for line in f.readlines(): - if not "\\packages\\Microsoft.UI.Xaml" in line or "prerelease" in line: - lines.append(line) - - f.seek(os.SEEK_SET) - f.truncate() - f.writelines(lines)